From 4da879a0d3fd170a70994b73baa554c6913918b5 Mon Sep 17 00:00:00 2001 From: Gary Guo Date: Mon, 2 Mar 2026 16:42:36 +0000 Subject: rust: dma: use pointer projection infra for `dma_{read,write}` macro Current `dma_read!`, `dma_write!` macros also use a custom `addr_of!()`-based implementation for projecting pointers, which has soundness issue as it relies on absence of `Deref` implementation on types. It also has a soundness issue where it does not protect against unaligned fields (when `#[repr(packed)]` is used) so it can generate misaligned accesses. This commit migrates them to use the general pointer projection infrastructure, which handles these cases correctly. As part of migration, the macro is updated to have an improved surface syntax. The current macro have dma_read!(a.b.c[d].e.f) to mean `a.b.c` is a DMA coherent allocation and it should project into it with `[d].e.f` and do a read, which is confusing as it makes the indexing operator integral to the macro (so it will break if you have an array of `CoherentAllocation`, for example). This also is problematic as we would like to generalize `CoherentAllocation` from just slices to arbitrary types. Make the macro expects `dma_read!(path.to.dma, .path.inside.dma)` as the canonical syntax. The index operator is no longer special and is just one type of projection (in additional to field projection). Similarly, make `dma_write!(path.to.dma, .path.inside.dma, value)` become the canonical syntax for writing. Another issue of the current macro is that it is always fallible. This makes sense with existing design of `CoherentAllocation`, but once we support fixed size arrays with `CoherentAllocation`, it is desirable to have the ability to perform infallible indexing as well, e.g. doing a `[0]` index of `[Foo; 2]` is okay and can be checked at build-time, so forcing falliblity is non-ideal. To capture this, the macro is changed to use `[idx]` as infallible projection and `[idx]?` as fallible index projection (those syntax are part of the general projection infra). A benefit of this is that while individual indexing operation may fail, the overall read/write operation is not fallible. Fixes: ad2907b4e308 ("rust: add dma coherent allocator abstraction") Reviewed-by: Benno Lossin Signed-off-by: Gary Guo Link: https://patch.msgid.link/20260302164239.284084-4-gary@kernel.org [ Capitalize safety comments; slightly improve wording in doc-comments. - Danilo ] Signed-off-by: Danilo Krummrich --- drivers/gpu/nova-core/gsp.rs | 14 +++++++------- drivers/gpu/nova-core/gsp/boot.rs | 2 +- drivers/gpu/nova-core/gsp/cmdq.rs | 10 +++++++--- 3 files changed, 15 insertions(+), 11 deletions(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/nova-core/gsp.rs b/drivers/gpu/nova-core/gsp.rs index 174feaca0a6b..25cd48514c77 100644 --- a/drivers/gpu/nova-core/gsp.rs +++ b/drivers/gpu/nova-core/gsp.rs @@ -143,14 +143,14 @@ impl Gsp { // _kgspInitLibosLoggingStructures (allocates memory for buffers) // kgspSetupLibosInitArgs_IMPL (creates pLibosInitArgs[] array) dma_write!( - libos[0] = LibosMemoryRegionInitArgument::new("LOGINIT", &loginit.0) - )?; + libos, [0]?, LibosMemoryRegionInitArgument::new("LOGINIT", &loginit.0) + ); dma_write!( - libos[1] = LibosMemoryRegionInitArgument::new("LOGINTR", &logintr.0) - )?; - dma_write!(libos[2] = LibosMemoryRegionInitArgument::new("LOGRM", &logrm.0))?; - dma_write!(rmargs[0].inner = fw::GspArgumentsCached::new(cmdq))?; - dma_write!(libos[3] = LibosMemoryRegionInitArgument::new("RMARGS", rmargs))?; + libos, [1]?, LibosMemoryRegionInitArgument::new("LOGINTR", &logintr.0) + ); + dma_write!(libos, [2]?, LibosMemoryRegionInitArgument::new("LOGRM", &logrm.0)); + dma_write!(rmargs, [0]?.inner, fw::GspArgumentsCached::new(cmdq)); + dma_write!(libos, [3]?, LibosMemoryRegionInitArgument::new("RMARGS", rmargs)); }, })) }) diff --git a/drivers/gpu/nova-core/gsp/boot.rs b/drivers/gpu/nova-core/gsp/boot.rs index be427fe26a58..94833f7996e8 100644 --- a/drivers/gpu/nova-core/gsp/boot.rs +++ b/drivers/gpu/nova-core/gsp/boot.rs @@ -157,7 +157,7 @@ impl super::Gsp { let wpr_meta = CoherentAllocation::::alloc_coherent(dev, 1, GFP_KERNEL | __GFP_ZERO)?; - dma_write!(wpr_meta[0] = GspFwWprMeta::new(&gsp_fw, &fb_layout))?; + dma_write!(wpr_meta, [0]?, GspFwWprMeta::new(&gsp_fw, &fb_layout)); self.cmdq .send_command(bar, commands::SetSystemInfo::new(pdev))?; diff --git a/drivers/gpu/nova-core/gsp/cmdq.rs b/drivers/gpu/nova-core/gsp/cmdq.rs index 46819a82a51a..ae54708c38eb 100644 --- a/drivers/gpu/nova-core/gsp/cmdq.rs +++ b/drivers/gpu/nova-core/gsp/cmdq.rs @@ -201,9 +201,13 @@ impl DmaGspMem { let gsp_mem = CoherentAllocation::::alloc_coherent(dev, 1, GFP_KERNEL | __GFP_ZERO)?; - dma_write!(gsp_mem[0].ptes = PteArray::new(gsp_mem.dma_handle())?)?; - dma_write!(gsp_mem[0].cpuq.tx = MsgqTxHeader::new(MSGQ_SIZE, RX_HDR_OFF, MSGQ_NUM_PAGES))?; - dma_write!(gsp_mem[0].cpuq.rx = MsgqRxHeader::new())?; + dma_write!(gsp_mem, [0]?.ptes, PteArray::new(gsp_mem.dma_handle())?); + dma_write!( + gsp_mem, + [0]?.cpuq.tx, + MsgqTxHeader::new(MSGQ_SIZE, RX_HDR_OFF, MSGQ_NUM_PAGES) + ); + dma_write!(gsp_mem, [0]?.cpuq.rx, MsgqRxHeader::new()); Ok(Self(gsp_mem)) } -- cgit v1.2.3 From c7940c8bf215b9dc6211781c77ce80e76982a723 Mon Sep 17 00:00:00 2001 From: Tim Kovalenko Date: Mon, 9 Mar 2026 12:34:21 -0400 Subject: gpu: nova-core: fix stack overflow in GSP memory allocation The `Cmdq::new` function was allocating a `PteArray` struct on the stack and was causing a stack overflow with 8216 bytes. Modify the `PteArray` to calculate and write the Page Table Entries directly into the coherent DMA buffer one-by-one. This reduces the stack usage quite a lot. Reported-by: Gary Guo Closes: https://rust-for-linux.zulipchat.com/#narrow/channel/509436-Nova/topic/.60Cmdq.3A.3Anew.60.20uses.20excessive.20stack.20size/near/570375549 Link: https://lore.kernel.org/rust-for-linux/CANiq72mAQxbRJZDnik3Qmd4phvFwPA01O2jwaaXRh_T+2=L-qA@mail.gmail.com/ Fixes: f38b4f105cfc ("gpu: nova-core: Create initial Gsp") Acked-by: Alexandre Courbot Signed-off-by: Tim Kovalenko Link: https://patch.msgid.link/20260309-drm-rust-next-v4-4-4ef485b19a4c@proton.me [ * Use PteArray::entry() in LogBuffer::new(), * Add TODO comment to use IoView projections once available, * Add PTE_ARRAY_SIZE constant to avoid duplication. - Danilo ] Signed-off-by: Danilo Krummrich --- drivers/gpu/nova-core/gsp.rs | 32 +++++++++++++++++--------------- drivers/gpu/nova-core/gsp/cmdq.rs | 14 ++++++++++++-- 2 files changed, 29 insertions(+), 17 deletions(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/nova-core/gsp.rs b/drivers/gpu/nova-core/gsp.rs index 25cd48514c77..c69adaa92bbe 100644 --- a/drivers/gpu/nova-core/gsp.rs +++ b/drivers/gpu/nova-core/gsp.rs @@ -47,16 +47,12 @@ struct PteArray([u64; NUM_ENTRIES]); unsafe impl AsBytes for PteArray {} impl PteArray { - /// Creates a new page table array mapping `NUM_PAGES` GSP pages starting at address `start`. - fn new(start: DmaAddress) -> Result { - let mut ptes = [0u64; NUM_PAGES]; - for (i, pte) in ptes.iter_mut().enumerate() { - *pte = start - .checked_add(num::usize_as_u64(i) << GSP_PAGE_SHIFT) - .ok_or(EOVERFLOW)?; - } - - Ok(Self(ptes)) + /// Returns the page table entry for `index`, for a mapping starting at `start`. + // TODO: Replace with `IoView` projection once available. + fn entry(start: DmaAddress, index: usize) -> Result { + start + .checked_add(num::usize_as_u64(index) << GSP_PAGE_SHIFT) + .ok_or(EOVERFLOW) } } @@ -86,16 +82,22 @@ impl LogBuffer { NUM_PAGES * GSP_PAGE_SIZE, GFP_KERNEL | __GFP_ZERO, )?); - let ptes = PteArray::::new(obj.0.dma_handle())?; + + let start_addr = obj.0.dma_handle(); // SAFETY: `obj` has just been created and we are its sole user. - unsafe { - // Copy the self-mapping PTE at the expected location. + let pte_region = unsafe { obj.0 - .as_slice_mut(size_of::(), size_of_val(&ptes))? - .copy_from_slice(ptes.as_bytes()) + .as_slice_mut(size_of::(), NUM_PAGES * size_of::())? }; + // Write values one by one to avoid an on-stack instance of `PteArray`. + for (i, chunk) in pte_region.chunks_exact_mut(size_of::()).enumerate() { + let pte_value = PteArray::<0>::entry(start_addr, i)?; + + chunk.copy_from_slice(&pte_value.to_ne_bytes()); + } + Ok(obj) } } diff --git a/drivers/gpu/nova-core/gsp/cmdq.rs b/drivers/gpu/nova-core/gsp/cmdq.rs index ae54708c38eb..9c5efd2daf45 100644 --- a/drivers/gpu/nova-core/gsp/cmdq.rs +++ b/drivers/gpu/nova-core/gsp/cmdq.rs @@ -159,7 +159,7 @@ struct Msgq { #[repr(C)] struct GspMem { /// Self-mapping page table entries. - ptes: PteArray<{ GSP_PAGE_SIZE / size_of::() }>, + ptes: PteArray<{ Self::PTE_ARRAY_SIZE }>, /// CPU queue: the driver writes commands here, and the GSP reads them. It also contains the /// write and read pointers that the CPU updates. /// @@ -172,6 +172,10 @@ struct GspMem { gspq: Msgq, } +impl GspMem { + const PTE_ARRAY_SIZE: usize = GSP_PAGE_SIZE / size_of::(); +} + // SAFETY: These structs don't meet the no-padding requirements of AsBytes but // that is not a problem because they are not used outside the kernel. unsafe impl AsBytes for GspMem {} @@ -201,7 +205,13 @@ impl DmaGspMem { let gsp_mem = CoherentAllocation::::alloc_coherent(dev, 1, GFP_KERNEL | __GFP_ZERO)?; - dma_write!(gsp_mem, [0]?.ptes, PteArray::new(gsp_mem.dma_handle())?); + + let start = gsp_mem.dma_handle(); + // Write values one by one to avoid an on-stack instance of `PteArray`. + for i in 0..GspMem::PTE_ARRAY_SIZE { + dma_write!(gsp_mem, [0]?.ptes.0[i], PteArray::<0>::entry(start, i)?); + } + dma_write!( gsp_mem, [0]?.cpuq.tx, -- cgit v1.2.3 From 0073a17b466684413ac87cf8ff6c19560db44e7a Mon Sep 17 00:00:00 2001 From: Danilo Krummrich Date: Mon, 9 Mar 2026 23:53:24 +0100 Subject: gpu: nova-core: gsp: fix UB in DmaGspMem pointer accessors The DmaGspMem pointer accessor methods (gsp_write_ptr, gsp_read_ptr, cpu_read_ptr, cpu_write_ptr, advance_cpu_read_ptr, advance_cpu_write_ptr) dereference a raw pointer to DMA memory, creating an intermediate reference before calling volatile read/write methods. This is undefined behavior since DMA memory can be concurrently modified by the device. Fix this by moving the implementations into a gsp_mem module in fw.rs that uses the dma_read!() / dma_write!() macros, making the original methods on DmaGspMem thin forwarding wrappers. An alternative approach would have been to wrap the shared memory in Opaque, but that would have required even more unsafe code. Since the gsp_mem module lives in fw.rs (to access firmware-specific binding field names), GspMem, Msgq and their relevant fields are temporarily widened to pub(super). This will be reverted once IoView projections are available. Cc: Gary Guo Closes: https://lore.kernel.org/nouveau/DGUT14ILG35P.1UMNRKU93JUM1@kernel.org/ Fixes: 75f6b1de8133 ("gpu: nova-core: gsp: Add GSP command queue bindings and handling") Reviewed-by: Alexandre Courbot Link: https://patch.msgid.link/20260309225408.27714-1-dakr@kernel.org [ Use pub(super) where possible; replace bitwise-and with modulo operator analogous to [1]. - Danilo ] Link: https://lore.kernel.org/all/20260129-nova-core-cmdq1-v3-1-2ede85493a27@nvidia.com/ [1] Signed-off-by: Danilo Krummrich --- drivers/gpu/nova-core/gsp/cmdq.rs | 71 ++++++--------------------- drivers/gpu/nova-core/gsp/fw.rs | 101 ++++++++++++++++++++++++++------------ 2 files changed, 84 insertions(+), 88 deletions(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/nova-core/gsp/cmdq.rs b/drivers/gpu/nova-core/gsp/cmdq.rs index 9c5efd2daf45..03a4f3599849 100644 --- a/drivers/gpu/nova-core/gsp/cmdq.rs +++ b/drivers/gpu/nova-core/gsp/cmdq.rs @@ -2,11 +2,7 @@ use core::{ cmp, - mem, - sync::atomic::{ - fence, - Ordering, // - }, // + mem, // }; use kernel::{ @@ -146,30 +142,32 @@ static_assert!(align_of::() == GSP_PAGE_SIZE); #[repr(C)] // There is no struct defined for this in the open-gpu-kernel-source headers. // Instead it is defined by code in `GspMsgQueuesInit()`. -struct Msgq { +// TODO: Revert to private once `IoView` projections replace the `gsp_mem` module. +pub(super) struct Msgq { /// Header for sending messages, including the write pointer. - tx: MsgqTxHeader, + pub(super) tx: MsgqTxHeader, /// Header for receiving messages, including the read pointer. - rx: MsgqRxHeader, + pub(super) rx: MsgqRxHeader, /// The message queue proper. msgq: MsgqData, } /// Structure shared between the driver and the GSP and containing the command and message queues. #[repr(C)] -struct GspMem { +// TODO: Revert to private once `IoView` projections replace the `gsp_mem` module. +pub(super) struct GspMem { /// Self-mapping page table entries. ptes: PteArray<{ Self::PTE_ARRAY_SIZE }>, /// CPU queue: the driver writes commands here, and the GSP reads them. It also contains the /// write and read pointers that the CPU updates. /// /// This member is read-only for the GSP. - cpuq: Msgq, + pub(super) cpuq: Msgq, /// GSP queue: the GSP writes messages here, and the driver reads them. It also contains the /// write and read pointers that the GSP updates. /// /// This member is read-only for the driver. - gspq: Msgq, + pub(super) gspq: Msgq, } impl GspMem { @@ -331,12 +329,7 @@ impl DmaGspMem { // // - The returned value is between `0` and `MSGQ_NUM_PAGES`. fn gsp_write_ptr(&self) -> u32 { - let gsp_mem = self.0.start_ptr(); - - // SAFETY: - // - The 'CoherentAllocation' contains at least one object. - // - By the invariants of `CoherentAllocation` the pointer is valid. - (unsafe { (*gsp_mem).gspq.tx.write_ptr() } % MSGQ_NUM_PAGES) + super::fw::gsp_mem::gsp_write_ptr(&self.0) } // Returns the index of the memory page the GSP will read the next command from. @@ -345,12 +338,7 @@ impl DmaGspMem { // // - The returned value is between `0` and `MSGQ_NUM_PAGES`. fn gsp_read_ptr(&self) -> u32 { - let gsp_mem = self.0.start_ptr(); - - // SAFETY: - // - The 'CoherentAllocation' contains at least one object. - // - By the invariants of `CoherentAllocation` the pointer is valid. - (unsafe { (*gsp_mem).gspq.rx.read_ptr() } % MSGQ_NUM_PAGES) + super::fw::gsp_mem::gsp_read_ptr(&self.0) } // Returns the index of the memory page the CPU can read the next message from. @@ -359,27 +347,12 @@ impl DmaGspMem { // // - The returned value is between `0` and `MSGQ_NUM_PAGES`. fn cpu_read_ptr(&self) -> u32 { - let gsp_mem = self.0.start_ptr(); - - // SAFETY: - // - The ['CoherentAllocation'] contains at least one object. - // - By the invariants of CoherentAllocation the pointer is valid. - (unsafe { (*gsp_mem).cpuq.rx.read_ptr() } % MSGQ_NUM_PAGES) + super::fw::gsp_mem::cpu_read_ptr(&self.0) } // Informs the GSP that it can send `elem_count` new pages into the message queue. fn advance_cpu_read_ptr(&mut self, elem_count: u32) { - let rptr = self.cpu_read_ptr().wrapping_add(elem_count) % MSGQ_NUM_PAGES; - - // Ensure read pointer is properly ordered. - fence(Ordering::SeqCst); - - let gsp_mem = self.0.start_ptr_mut(); - - // SAFETY: - // - The 'CoherentAllocation' contains at least one object. - // - By the invariants of `CoherentAllocation` the pointer is valid. - unsafe { (*gsp_mem).cpuq.rx.set_read_ptr(rptr) }; + super::fw::gsp_mem::advance_cpu_read_ptr(&self.0, elem_count) } // Returns the index of the memory page the CPU can write the next command to. @@ -388,26 +361,12 @@ impl DmaGspMem { // // - The returned value is between `0` and `MSGQ_NUM_PAGES`. fn cpu_write_ptr(&self) -> u32 { - let gsp_mem = self.0.start_ptr(); - - // SAFETY: - // - The 'CoherentAllocation' contains at least one object. - // - By the invariants of `CoherentAllocation` the pointer is valid. - (unsafe { (*gsp_mem).cpuq.tx.write_ptr() } % MSGQ_NUM_PAGES) + super::fw::gsp_mem::cpu_write_ptr(&self.0) } // Informs the GSP that it can process `elem_count` new pages from the command queue. fn advance_cpu_write_ptr(&mut self, elem_count: u32) { - let wptr = self.cpu_write_ptr().wrapping_add(elem_count) & MSGQ_NUM_PAGES; - let gsp_mem = self.0.start_ptr_mut(); - - // SAFETY: - // - The 'CoherentAllocation' contains at least one object. - // - By the invariants of `CoherentAllocation` the pointer is valid. - unsafe { (*gsp_mem).cpuq.tx.set_write_ptr(wptr) }; - - // Ensure all command data is visible before triggering the GSP read. - fence(Ordering::SeqCst); + super::fw::gsp_mem::advance_cpu_write_ptr(&self.0, elem_count) } } diff --git a/drivers/gpu/nova-core/gsp/fw.rs b/drivers/gpu/nova-core/gsp/fw.rs index 83ff91614e36..040b30ec3089 100644 --- a/drivers/gpu/nova-core/gsp/fw.rs +++ b/drivers/gpu/nova-core/gsp/fw.rs @@ -40,6 +40,75 @@ use crate::{ }, }; +// TODO: Replace with `IoView` projections once available; the `unwrap()` calls go away once we +// switch to the new `dma::Coherent` API. +pub(super) mod gsp_mem { + use core::sync::atomic::{ + fence, + Ordering, // + }; + + use kernel::{ + dma::CoherentAllocation, + dma_read, + dma_write, + prelude::*, // + }; + + use crate::gsp::cmdq::{ + GspMem, + MSGQ_NUM_PAGES, // + }; + + pub(in crate::gsp) fn gsp_write_ptr(qs: &CoherentAllocation) -> u32 { + // PANIC: A `dma::CoherentAllocation` always contains at least one element. + || -> Result { Ok(dma_read!(qs, [0]?.gspq.tx.0.writePtr) % MSGQ_NUM_PAGES) }().unwrap() + } + + pub(in crate::gsp) fn gsp_read_ptr(qs: &CoherentAllocation) -> u32 { + // PANIC: A `dma::CoherentAllocation` always contains at least one element. + || -> Result { Ok(dma_read!(qs, [0]?.gspq.rx.0.readPtr) % MSGQ_NUM_PAGES) }().unwrap() + } + + pub(in crate::gsp) fn cpu_read_ptr(qs: &CoherentAllocation) -> u32 { + // PANIC: A `dma::CoherentAllocation` always contains at least one element. + || -> Result { Ok(dma_read!(qs, [0]?.cpuq.rx.0.readPtr) % MSGQ_NUM_PAGES) }().unwrap() + } + + pub(in crate::gsp) fn advance_cpu_read_ptr(qs: &CoherentAllocation, count: u32) { + let rptr = cpu_read_ptr(qs).wrapping_add(count) % MSGQ_NUM_PAGES; + + // Ensure read pointer is properly ordered. + fence(Ordering::SeqCst); + + // PANIC: A `dma::CoherentAllocation` always contains at least one element. + || -> Result { + dma_write!(qs, [0]?.cpuq.rx.0.readPtr, rptr); + Ok(()) + }() + .unwrap() + } + + pub(in crate::gsp) fn cpu_write_ptr(qs: &CoherentAllocation) -> u32 { + // PANIC: A `dma::CoherentAllocation` always contains at least one element. + || -> Result { Ok(dma_read!(qs, [0]?.cpuq.tx.0.writePtr) % MSGQ_NUM_PAGES) }().unwrap() + } + + pub(in crate::gsp) fn advance_cpu_write_ptr(qs: &CoherentAllocation, count: u32) { + let wptr = cpu_write_ptr(qs).wrapping_add(count) % MSGQ_NUM_PAGES; + + // PANIC: A `dma::CoherentAllocation` always contains at least one element. + || -> Result { + dma_write!(qs, [0]?.cpuq.tx.0.writePtr, wptr); + Ok(()) + }() + .unwrap(); + + // Ensure all command data is visible before triggering the GSP read. + fence(Ordering::SeqCst); + } +} + /// Empty type to group methods related to heap parameters for running the GSP firmware. enum GspFwHeapParams {} @@ -708,22 +777,6 @@ impl MsgqTxHeader { entryOff: num::usize_into_u32::(), }) } - - /// Returns the value of the write pointer for this queue. - pub(crate) fn write_ptr(&self) -> u32 { - let ptr = core::ptr::from_ref(&self.0.writePtr); - - // SAFETY: `ptr` is a valid pointer to a `u32`. - unsafe { ptr.read_volatile() } - } - - /// Sets the value of the write pointer for this queue. - pub(crate) fn set_write_ptr(&mut self, val: u32) { - let ptr = core::ptr::from_mut(&mut self.0.writePtr); - - // SAFETY: `ptr` is a valid pointer to a `u32`. - unsafe { ptr.write_volatile(val) } - } } // SAFETY: Padding is explicit and does not contain uninitialized data. @@ -739,22 +792,6 @@ impl MsgqRxHeader { pub(crate) fn new() -> Self { Self(Default::default()) } - - /// Returns the value of the read pointer for this queue. - pub(crate) fn read_ptr(&self) -> u32 { - let ptr = core::ptr::from_ref(&self.0.readPtr); - - // SAFETY: `ptr` is a valid pointer to a `u32`. - unsafe { ptr.read_volatile() } - } - - /// Sets the value of the read pointer for this queue. - pub(crate) fn set_read_ptr(&mut self, val: u32) { - let ptr = core::ptr::from_mut(&mut self.0.readPtr); - - // SAFETY: `ptr` is a valid pointer to a `u32`. - unsafe { ptr.write_volatile(val) } - } } // SAFETY: Padding is explicit and does not contain uninitialized data. -- cgit v1.2.3