From 9e83d5104a70d8545bad61a77e166190d9447e1d Mon Sep 17 00:00:00 2001
From: Breno Leitao <leitao@debian.org>
Date: Thu, 5 Mar 2026 08:15:41 -0800
Subject: workqueue: Add stall detector sample module

Add a sample module under samples/workqueue/stall_detector/ that
reproduces a workqueue stall caused by PF_WQ_WORKER misuse.  The
module queues two work items on the same per-CPU pool, then clears
PF_WQ_WORKER and sleeps in wait_event_idle(), hiding from the
concurrency manager and stalling the second work item indefinitely.

This is useful for testing the workqueue watchdog stall diagnostics.

Signed-off-by: Breno Leitao <leitao@debian.org>
Acked-by: Song Liu <song@kernel.org>
Signed-off-by: Tejun Heo <tj@kernel.org>
---
 samples/workqueue/stall_detector/Makefile   |  1 +
 samples/workqueue/stall_detector/wq_stall.c | 98 +++++++++++++++++++++++++++++
 2 files changed, 99 insertions(+)
 create mode 100644 samples/workqueue/stall_detector/Makefile
 create mode 100644 samples/workqueue/stall_detector/wq_stall.c

(limited to 'samples')
diff --git a/samples/workqueue/stall_detector/Makefile b/samples/workqueue/stall_detector/Makefile
new file mode 100644
index 000000000000..8849e85e95bb
--- /dev/null
+++ b/samples/workqueue/stall_detector/Makefile
@@ -0,0 +1 @@
+obj-m += wq_stall.o
diff --git a/samples/workqueue/stall_detector/wq_stall.c b/samples/workqueue/stall_detector/wq_stall.c
new file mode 100644
index 000000000000..6f4a497b1881
--- /dev/null
+++ b/samples/workqueue/stall_detector/wq_stall.c
@@ -0,0 +1,98 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * wq_stall - Test module for the workqueue stall detector.
+ *
+ * Deliberately creates a workqueue stall so the watchdog fires and
+ * prints diagnostic output.  Useful for verifying that the stall
+ * detector correctly identifies stuck workers and produces useful
+ * backtraces.
+ *
+ * The stall is triggered by clearing PF_WQ_WORKER before sleeping,
+ * which hides the worker from the concurrency manager.  A second
+ * work item queued on the same pool then sits in the worklist with
+ * no worker available to process it.
+ *
+ * After ~30s the workqueue watchdog fires:
+ *   BUG: workqueue lockup - pool cpus=N ...
+ *
+ * Build:
+ *	make -C <kernel tree> M=samples/workqueue/stall_detector modules
+ *
+ * Copyright (c) 2026 Meta Platforms, Inc. and affiliates.
+ * Copyright (c) 2026 Breno Leitao <leitao@debian.org>
+ */
+
+#include <linux/module.h>
+#include <linux/workqueue.h>
+#include <linux/wait.h>
+#include <linux/atomic.h>
+#include <linux/sched.h>
+
+static DECLARE_WAIT_QUEUE_HEAD(stall_wq_head);
+static atomic_t wake_condition = ATOMIC_INIT(0);
+static struct work_struct stall_work1;
+static struct work_struct stall_work2;
+
+static void stall_work2_fn(struct work_struct *work)
+{
+	pr_info("wq_stall: second work item finally ran\n");
+}
+
+static void stall_work1_fn(struct work_struct *work)
+{
+	pr_info("wq_stall: first work item running on cpu %d\n",
+		raw_smp_processor_id());
+
+	/*
+	 * Queue second item while we're still counted as running
+	 * (pool->nr_running > 0).  Since schedule_work() on a per-CPU
+	 * workqueue targets raw_smp_processor_id(), item 2 lands on the
+	 * same pool.  __queue_work -> kick_pool -> need_more_worker()
+	 * sees nr_running > 0 and does NOT wake a new worker.
+	 */
+	schedule_work(&stall_work2);
+
+	/*
+	 * Hide from the workqueue concurrency manager.  Without
+	 * PF_WQ_WORKER, schedule() won't call wq_worker_sleeping(),
+	 * so nr_running is never decremented and no replacement
+	 * worker is created.  Item 2 stays stuck in pool->worklist.
+	 */
+	current->flags &= ~PF_WQ_WORKER;
+
+	pr_info("wq_stall: entering wait_event_idle (PF_WQ_WORKER cleared)\n");
+	pr_info("wq_stall: expect 'BUG: workqueue lockup' in ~30-60s\n");
+	wait_event_idle(stall_wq_head, atomic_read(&wake_condition) != 0);
+
+	/* Restore so process_one_work() cleanup works correctly */
+	current->flags |= PF_WQ_WORKER;
+	pr_info("wq_stall: woke up, PF_WQ_WORKER restored\n");
+}
+
+static int __init wq_stall_init(void)
+{
+	pr_info("wq_stall: loading\n");
+
+	INIT_WORK(&stall_work1, stall_work1_fn);
+	INIT_WORK(&stall_work2, stall_work2_fn);
+	schedule_work(&stall_work1);
+
+	return 0;
+}
+
+static void __exit wq_stall_exit(void)
+{
+	pr_info("wq_stall: unloading\n");
+	atomic_set(&wake_condition, 1);
+	wake_up(&stall_wq_head);
+	flush_work(&stall_work1);
+	flush_work(&stall_work2);
+	pr_info("wq_stall: all work flushed, module unloaded\n");
+}
+
+module_init(wq_stall_init);
+module_exit(wq_stall_exit);
+
+MODULE_LICENSE("GPL");
+MODULE_DESCRIPTION("Reproduce workqueue stall caused by PF_WQ_WORKER misuse");
+MODULE_AUTHOR("Breno Leitao <leitao@debian.org>");
-- 
cgit v1.2.3


From 4da879a0d3fd170a70994b73baa554c6913918b5 Mon Sep 17 00:00:00 2001
From: Gary Guo <gary@garyguo.net>
Date: Mon, 2 Mar 2026 16:42:36 +0000
Subject: rust: dma: use pointer projection infra for `dma_{read,write}` macro

Current `dma_read!`, `dma_write!` macros also use a custom
`addr_of!()`-based implementation for projecting pointers, which has
soundness issue as it relies on absence of `Deref` implementation on types.
It also has a soundness issue where it does not protect against unaligned
fields (when `#[repr(packed)]` is used) so it can generate misaligned
accesses.

This commit migrates them to use the general pointer projection
infrastructure, which handles these cases correctly.

As part of migration, the macro is updated to have an improved surface
syntax. The current macro have

    dma_read!(a.b.c[d].e.f)

to mean `a.b.c` is a DMA coherent allocation and it should project into it
with `[d].e.f` and do a read, which is confusing as it makes the indexing
operator integral to the macro (so it will break if you have an array of
`CoherentAllocation`, for example).

This also is problematic as we would like to generalize
`CoherentAllocation` from just slices to arbitrary types.

Make the macro expects `dma_read!(path.to.dma, .path.inside.dma)` as the
canonical syntax. The index operator is no longer special and is just one
type of projection (in additional to field projection). Similarly, make
`dma_write!(path.to.dma, .path.inside.dma, value)` become the canonical
syntax for writing.

Another issue of the current macro is that it is always fallible. This
makes sense with existing design of `CoherentAllocation`, but once we
support fixed size arrays with `CoherentAllocation`, it is desirable to
have the ability to perform infallible indexing as well, e.g. doing a `[0]`
index of `[Foo; 2]` is okay and can be checked at build-time, so forcing
falliblity is non-ideal. To capture this, the macro is changed to use
`[idx]` as infallible projection and `[idx]?` as fallible index projection
(those syntax are part of the general projection infra). A benefit of this
is that while individual indexing operation may fail, the overall
read/write operation is not fallible.

Fixes: ad2907b4e308 ("rust: add dma coherent allocator abstraction")
Reviewed-by: Benno Lossin <lossin@kernel.org>
Signed-off-by: Gary Guo <gary@garyguo.net>
Link: https://patch.msgid.link/20260302164239.284084-4-gary@kernel.org
[ Capitalize safety comments; slightly improve wording in doc-comments.
  - Danilo ]
Signed-off-by: Danilo Krummrich <dakr@kernel.org>
---
 drivers/gpu/nova-core/gsp.rs      |  14 ++---
 drivers/gpu/nova-core/gsp/boot.rs |   2 +-
 drivers/gpu/nova-core/gsp/cmdq.rs |  10 +++-
 rust/kernel/dma.rs                | 114 +++++++++++++++++---------------------
 samples/rust/rust_dma.rs          |  30 +++++-----
 5 files changed, 81 insertions(+), 89 deletions(-)

(limited to 'samples')

diff --git a/drivers/gpu/nova-core/gsp.rs b/drivers/gpu/nova-core/gsp.rs
index 174feaca0a6b..25cd48514c77 100644
--- a/drivers/gpu/nova-core/gsp.rs
+++ b/drivers/gpu/nova-core/gsp.rs
@@ -143,14 +143,14 @@ impl Gsp {
                     // _kgspInitLibosLoggingStructures (allocates memory for buffers)
                     // kgspSetupLibosInitArgs_IMPL (creates pLibosInitArgs[] array)
                     dma_write!(
-                        libos[0] = LibosMemoryRegionInitArgument::new("LOGINIT", &loginit.0)
-                    )?;
+                        libos, [0]?, LibosMemoryRegionInitArgument::new("LOGINIT", &loginit.0)
+                    );
                     dma_write!(
-                        libos[1] = LibosMemoryRegionInitArgument::new("LOGINTR", &logintr.0)
-                    )?;
-                    dma_write!(libos[2] = LibosMemoryRegionInitArgument::new("LOGRM", &logrm.0))?;
-                    dma_write!(rmargs[0].inner = fw::GspArgumentsCached::new(cmdq))?;
-                    dma_write!(libos[3] = LibosMemoryRegionInitArgument::new("RMARGS", rmargs))?;
+                        libos, [1]?, LibosMemoryRegionInitArgument::new("LOGINTR", &logintr.0)
+                    );
+                    dma_write!(libos, [2]?, LibosMemoryRegionInitArgument::new("LOGRM", &logrm.0));
+                    dma_write!(rmargs, [0]?.inner, fw::GspArgumentsCached::new(cmdq));
+                    dma_write!(libos, [3]?, LibosMemoryRegionInitArgument::new("RMARGS", rmargs));
                 },
             }))
         })
diff --git a/drivers/gpu/nova-core/gsp/boot.rs b/drivers/gpu/nova-core/gsp/boot.rs
index be427fe26a58..94833f7996e8 100644
--- a/drivers/gpu/nova-core/gsp/boot.rs
+++ b/drivers/gpu/nova-core/gsp/boot.rs
@@ -157,7 +157,7 @@ impl super::Gsp {
 
         let wpr_meta =
             CoherentAllocation::<GspFwWprMeta>::alloc_coherent(dev, 1, GFP_KERNEL | __GFP_ZERO)?;
-        dma_write!(wpr_meta[0] = GspFwWprMeta::new(&gsp_fw, &fb_layout))?;
+        dma_write!(wpr_meta, [0]?, GspFwWprMeta::new(&gsp_fw, &fb_layout));
 
         self.cmdq
             .send_command(bar, commands::SetSystemInfo::new(pdev))?;
diff --git a/drivers/gpu/nova-core/gsp/cmdq.rs b/drivers/gpu/nova-core/gsp/cmdq.rs
index 46819a82a51a..ae54708c38eb 100644
--- a/drivers/gpu/nova-core/gsp/cmdq.rs
+++ b/drivers/gpu/nova-core/gsp/cmdq.rs
@@ -201,9 +201,13 @@ impl DmaGspMem {
 
         let gsp_mem =
             CoherentAllocation::<GspMem>::alloc_coherent(dev, 1, GFP_KERNEL | __GFP_ZERO)?;
-        dma_write!(gsp_mem[0].ptes = PteArray::new(gsp_mem.dma_handle())?)?;
-        dma_write!(gsp_mem[0].cpuq.tx = MsgqTxHeader::new(MSGQ_SIZE, RX_HDR_OFF, MSGQ_NUM_PAGES))?;
-        dma_write!(gsp_mem[0].cpuq.rx = MsgqRxHeader::new())?;
+        dma_write!(gsp_mem, [0]?.ptes, PteArray::new(gsp_mem.dma_handle())?);
+        dma_write!(
+            gsp_mem,
+            [0]?.cpuq.tx,
+            MsgqTxHeader::new(MSGQ_SIZE, RX_HDR_OFF, MSGQ_NUM_PAGES)
+        );
+        dma_write!(gsp_mem, [0]?.cpuq.rx, MsgqRxHeader::new());
 
         Ok(Self(gsp_mem))
     }
diff --git a/rust/kernel/dma.rs b/rust/kernel/dma.rs
index 909d56fd5118..a396f8435739 100644
--- a/rust/kernel/dma.rs
+++ b/rust/kernel/dma.rs
@@ -461,6 +461,19 @@ impl<T: AsBytes + FromBytes> CoherentAllocation<T> {
         self.count * core::mem::size_of::<T>()
     }
 
+    /// Returns the raw pointer to the allocated region in the CPU's virtual address space.
+    #[inline]
+    pub fn as_ptr(&self) -> *const [T] {
+        core::ptr::slice_from_raw_parts(self.cpu_addr.as_ptr(), self.count)
+    }
+
+    /// Returns the raw pointer to the allocated region in the CPU's virtual address space as
+    /// a mutable pointer.
+    #[inline]
+    pub fn as_mut_ptr(&self) -> *mut [T] {
+        core::ptr::slice_from_raw_parts_mut(self.cpu_addr.as_ptr(), self.count)
+    }
+
     /// Returns the base address to the allocated region in the CPU's virtual address space.
     pub fn start_ptr(&self) -> *const T {
         self.cpu_addr.as_ptr()
@@ -581,23 +594,6 @@ impl<T: AsBytes + FromBytes> CoherentAllocation<T> {
         Ok(())
     }
 
-    /// Returns a pointer to an element from the region with bounds checking. `offset` is in
-    /// units of `T`, not the number of bytes.
-    ///
-    /// Public but hidden since it should only be used from [`dma_read`] and [`dma_write`] macros.
-    #[doc(hidden)]
-    pub fn item_from_index(&self, offset: usize) -> Result<*mut T> {
-        if offset >= self.count {
-            return Err(EINVAL);
-        }
-        // SAFETY:
-        // - The pointer is valid due to type invariant on `CoherentAllocation`
-        // and we've just checked that the range and index is within bounds.
-        // - `offset` can't overflow since it is smaller than `self.count` and we've checked
-        // that `self.count` won't overflow early in the constructor.
-        Ok(unsafe { self.cpu_addr.as_ptr().add(offset) })
-    }
-
     /// Reads the value of `field` and ensures that its type is [`FromBytes`].
     ///
     /// # Safety
@@ -670,6 +666,9 @@ unsafe impl<T: AsBytes + FromBytes + Send> Send for CoherentAllocation<T> {}
 
 /// Reads a field of an item from an allocated region of structs.
 ///
+/// The syntax is of the form `kernel::dma_read!(dma, proj)` where `dma` is an expression evaluating
+/// to a [`CoherentAllocation`] and `proj` is a [projection specification](kernel::ptr::project!).
+///
 /// # Examples
 ///
 /// ```
@@ -684,36 +683,29 @@ unsafe impl<T: AsBytes + FromBytes + Send> Send for CoherentAllocation<T> {}
 /// unsafe impl kernel::transmute::AsBytes for MyStruct{};
 ///
 /// # fn test(alloc: &kernel::dma::CoherentAllocation<MyStruct>) -> Result {
-/// let whole = kernel::dma_read!(alloc[2]);
-/// let field = kernel::dma_read!(alloc[1].field);
+/// let whole = kernel::dma_read!(alloc, [2]?);
+/// let field = kernel::dma_read!(alloc, [1]?.field);
 /// # Ok::<(), Error>(()) }
 /// ```
 #[macro_export]
 macro_rules! dma_read {
-    ($dma:expr, $idx: expr, $($field:tt)*) => {{
-        (|| -> ::core::result::Result<_, $crate::error::Error> {
-            let item = $crate::dma::CoherentAllocation::item_from_index(&$dma, $idx)?;
-            // SAFETY: `item_from_index` ensures that `item` is always a valid pointer and can be
-            // dereferenced. The compiler also further validates the expression on whether `field`
-            // is a member of `item` when expanded by the macro.
-            unsafe {
-                let ptr_field = ::core::ptr::addr_of!((*item) $($field)*);
-                ::core::result::Result::Ok(
-                    $crate::dma::CoherentAllocation::field_read(&$dma, ptr_field)
-                )
-            }
-        })()
+    ($dma:expr, $($proj:tt)*) => {{
+        let dma = &$dma;
+        let ptr = $crate::ptr::project!(
+            $crate::dma::CoherentAllocation::as_ptr(dma), $($proj)*
+        );
+        // SAFETY: The pointer created by the projection is within the DMA region.
+        unsafe { $crate::dma::CoherentAllocation::field_read(dma, ptr) }
     }};
-    ($dma:ident [ $idx:expr ] $($field:tt)* ) => {
-        $crate::dma_read!($dma, $idx, $($field)*)
-    };
-    ($($dma:ident).* [ $idx:expr ] $($field:tt)* ) => {
-        $crate::dma_read!($($dma).*, $idx, $($field)*)
-    };
 }
 
 /// Writes to a field of an item from an allocated region of structs.
 ///
+/// The syntax is of the form `kernel::dma_write!(dma, proj, val)` where `dma` is an expression
+/// evaluating to a [`CoherentAllocation`], `proj` is a
+/// [projection specification](kernel::ptr::project!), and `val` is the value to be written to the
+/// projected location.
+///
 /// # Examples
 ///
 /// ```
@@ -728,37 +720,31 @@ macro_rules! dma_read {
 /// unsafe impl kernel::transmute::AsBytes for MyStruct{};
 ///
 /// # fn test(alloc: &kernel::dma::CoherentAllocation<MyStruct>) -> Result {
-/// kernel::dma_write!(alloc[2].member = 0xf);
-/// kernel::dma_write!(alloc[1] = MyStruct { member: 0xf });
+/// kernel::dma_write!(alloc, [2]?.member, 0xf);
+/// kernel::dma_write!(alloc, [1]?, MyStruct { member: 0xf });
 /// # Ok::<(), Error>(()) }
 /// ```
 #[macro_export]
 macro_rules! dma_write {
-    ($dma:ident [ $idx:expr ] $($field:tt)*) => {{
-        $crate::dma_write!($dma, $idx, $($field)*)
-    }};
-    ($($dma:ident).* [ $idx:expr ] $($field:tt)* ) => {{
-        $crate::dma_write!($($dma).*, $idx, $($field)*)
+    (@parse [$dma:expr] [$($proj:tt)*] [, $val:expr]) => {{
+        let dma = &$dma;
+        let ptr = $crate::ptr::project!(
+            mut $crate::dma::CoherentAllocation::as_mut_ptr(dma), $($proj)*
+        );
+        let val = $val;
+        // SAFETY: The pointer created by the projection is within the DMA region.
+        unsafe { $crate::dma::CoherentAllocation::field_write(dma, ptr, val) }
     }};
-    ($dma:expr, $idx: expr, = $val:expr) => {
-        (|| -> ::core::result::Result<_, $crate::error::Error> {
-            let item = $crate::dma::CoherentAllocation::item_from_index(&$dma, $idx)?;
-            // SAFETY: `item_from_index` ensures that `item` is always a valid item.
-            unsafe { $crate::dma::CoherentAllocation::field_write(&$dma, item, $val) }
-            ::core::result::Result::Ok(())
-        })()
+    (@parse [$dma:expr] [$($proj:tt)*] [.$field:tt $($rest:tt)*]) => {
+        $crate::dma_write!(@parse [$dma] [$($proj)* .$field] [$($rest)*])
+    };
+    (@parse [$dma:expr] [$($proj:tt)*] [[$index:expr]? $($rest:tt)*]) => {
+        $crate::dma_write!(@parse [$dma] [$($proj)* [$index]?] [$($rest)*])
+    };
+    (@parse [$dma:expr] [$($proj:tt)*] [[$index:expr] $($rest:tt)*]) => {
+        $crate::dma_write!(@parse [$dma] [$($proj)* [$index]] [$($rest)*])
     };
-    ($dma:expr, $idx: expr, $(.$field:ident)* = $val:expr) => {
-        (|| -> ::core::result::Result<_, $crate::error::Error> {
-            let item = $crate::dma::CoherentAllocation::item_from_index(&$dma, $idx)?;
-            // SAFETY: `item_from_index` ensures that `item` is always a valid pointer and can be
-            // dereferenced. The compiler also further validates the expression on whether `field`
-            // is a member of `item` when expanded by the macro.
-            unsafe {
-                let ptr_field = ::core::ptr::addr_of_mut!((*item) $(.$field)*);
-                $crate::dma::CoherentAllocation::field_write(&$dma, ptr_field, $val)
-            }
-            ::core::result::Result::Ok(())
-        })()
+    ($dma:expr, $($rest:tt)*) => {
+        $crate::dma_write!(@parse [$dma] [] [$($rest)*])
     };
 }
diff --git a/samples/rust/rust_dma.rs b/samples/rust/rust_dma.rs
index 9c45851c876e..ce39b5545097 100644
--- a/samples/rust/rust_dma.rs
+++ b/samples/rust/rust_dma.rs
@@ -68,7 +68,7 @@ impl pci::Driver for DmaSampleDriver {
                 CoherentAllocation::alloc_coherent(pdev.as_ref(), TEST_VALUES.len(), GFP_KERNEL)?;
 
             for (i, value) in TEST_VALUES.into_iter().enumerate() {
-                kernel::dma_write!(ca[i] = MyStruct::new(value.0, value.1))?;
+                kernel::dma_write!(ca, [i]?, MyStruct::new(value.0, value.1));
             }
 
             let size = 4 * page::PAGE_SIZE;
@@ -85,24 +85,26 @@ impl pci::Driver for DmaSampleDriver {
     }
 }
 
+impl DmaSampleDriver {
+    fn check_dma(&self) -> Result {
+        for (i, value) in TEST_VALUES.into_iter().enumerate() {
+            let val0 = kernel::dma_read!(self.ca, [i]?.h);
+            let val1 = kernel::dma_read!(self.ca, [i]?.b);
+
+            assert_eq!(val0, value.0);
+            assert_eq!(val1, value.1);
+        }
+
+        Ok(())
+    }
+}
+
 #[pinned_drop]
 impl PinnedDrop for DmaSampleDriver {
     fn drop(self: Pin<&mut Self>) {
         dev_info!(self.pdev, "Unload DMA test driver.\n");
 
-        for (i, value) in TEST_VALUES.into_iter().enumerate() {
-            let val0 = kernel::dma_read!(self.ca[i].h);
-            let val1 = kernel::dma_read!(self.ca[i].b);
-            assert!(val0.is_ok());
-            assert!(val1.is_ok());
-
-            if let Ok(val0) = val0 {
-                assert_eq!(val0, value.0);
-            }
-            if let Ok(val1) = val1 {
-                assert_eq!(val1, value.1);
-            }
-        }
+        assert!(self.check_dma().is_ok());
 
         for (i, entry) in self.sgt.iter().enumerate() {
             dev_info!(
-- 
cgit v1.2.3