summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorDave Airlie <airlied@redhat.com>2026-06-05 02:09:38 +0300
committerDave Airlie <airlied@redhat.com>2026-06-05 02:13:12 +0300
commit8205c61deb6e5c1cabaf02415337070f3ca1ea19 (patch)
treeabaa94c36aed7783aa6409024ebf401133cab6fa
parent527b3f2a4853f94a856815ee4e4f44d14df58182 (diff)
parent99676aed1fec109d62822e21a06760eb098dc5f4 (diff)
downloadlinux-8205c61deb6e5c1cabaf02415337070f3ca1ea19.tar.xz
Merge tag 'drm-rust-next-2026-06-04' of https://gitlab.freedesktop.org/drm/rust/kernel into drm-next
DRM Rust changes for v7.2-rc1 - Driver Core (shared via signed tag dd-lifetimes-7.2-rc1): - Introduce Higher-Ranked Lifetime Types (HRT) for Rust device drivers, allowing driver structs to hold device resources like pci::Bar and IoMem directly with a lifetime tied to the binding scope, removing the need for Devres indirection and ARef<Device>. - Replace drvdata() with scoped registration data on the auxiliary bus, using the new ForLt trait to thread lifetimes through registrations. Remove drvdata() and driver_type. - DRM: - Add GPUVM immediate mode abstraction for Rust GPU drivers: - In immediate mode, GPU virtual address space state is updated during job execution (in the DMA fence signalling critical path), keeping the GPUVM and the GPU's address space always in sync. - Provide GpuVm, GpuVa, and GpuVmBo types for managing address spaces, virtual mappings, and GEM object backing respectively. - Provide split-merge map/unmap operations that handle partial overlaps with existing mappings. - drm_exec integration for dma_resv locking and GEM object validation based on the external/evicted object lists are not yet covered and planned as follow-up work. - Introduce DeviceContext type state for drm::Device, allowing drivers to restrict operations to contexts where the device is guaranteed to be registered (or not yet registered) with userspace. - Add FEAT_RENDER flag to the Driver trait for render node support. - Nova: - Hopper/Blackwell enablement: - Add GPU identification and architecture-based HAL selection for Hopper (GH100) and Blackwell (GB100, GB202). - Implement the FSP (Foundation Security Processor) boot path used by Hopper and Blackwell, including FSP falcon engine support, EMEM operations, MCTP/NVDM message infrastructure, and FSP Chain of Trust boot with GSP lockdown release. - Add support for 32-bit firmware images and auto-detection of firmware image format. - Add architecture-specific framebuffer, sysmem flush, PCI config mirror, DMA mask, and WPR/non-WPR heap sizing. - GSP boot and unload: - Refactor the GSP boot process into a chipset-specific HAL, keeping the SEC2 and FSP boot paths separated cleanly. - Implement proper driver unload: send UNLOADING_GUEST_DRIVER command, run Booter Unloader and FWSEC-SB upon unbinding, and run the unload bundle on Gsp::boot() failure. This removes the need for a manual GPU reset between driver unbind and re-probe. - GA100 support: - Add support for the GA100 GPU, including IFR header detection and skipping, correct fwsignature selection, conditional FRTS boot, and documentation of the IFR header layout. - VBIOS hardening and refactoring: - Harden VBIOS parsing with checked arithmetic, bounds-checked accesses, and FromBytes-based structure reads throughout the FWSEC and Falcon data paths. Simplify the overall VBIOS module structure. - HRT adoption: - Use lifetime-parameterized pci::Bar directly, replacing the Arc<Devres<Bar0>> indirection. Replace ARef<Device> with &'bound Device in SysmemFlush and the GSP sequencer. Separate the driver type from driver data. - Misc: - Rename module names to kebab-case (nova-drm, nova-core). - Require little-endian in Kconfig, making the existing assumption explicit. - Tyr: - Define comprehensive typed register blocks for GPU_CONTROL, JOB_CONTROL, MMU_CONTROL (including per-address-space registers), and DOORBELL_BLOCK using the kernel register!() macro. This replaces manual bit manipulation with typed register and field accessors. - Add shmem-backed GEM objects and set DMA mask based on GPU physical address width. - Adopt HRT: separate driver type from driver data, and use IoMem directly instead of Devres for register access during probe. - Move clock cleanup into a Drop implementation. Signed-off-by: Dave Airlie <airlied@redhat.com> From: "Danilo Krummrich" <dakr@kernel.org> Link: https://patch.msgid.link/DJ0IF39U9ETK.PCCUO7ZEQ4S0@kernel.org
-rw-r--r--Documentation/gpu/nova/core/vbios.rst65
-rw-r--r--MAINTAINERS2
-rw-r--r--drivers/base/base.h16
-rw-r--r--drivers/base/dd.c2
-rw-r--r--drivers/cpufreq/rcpufreq_dt.rs9
-rw-r--r--drivers/gpu/drm/Kconfig7
-rw-r--r--drivers/gpu/drm/drm_gem_shmem_helper.c32
-rw-r--r--drivers/gpu/drm/drm_gpuvm.c6
-rw-r--r--drivers/gpu/drm/nova/Kconfig3
-rw-r--r--drivers/gpu/drm/nova/Makefile3
-rw-r--r--drivers/gpu/drm/nova/driver.rs28
-rw-r--r--drivers/gpu/drm/nova/gem.rs15
-rw-r--r--drivers/gpu/drm/nova/nova.rs2
-rw-r--r--drivers/gpu/drm/tyr/Kconfig1
-rw-r--r--drivers/gpu/drm/tyr/driver.rs91
-rw-r--r--drivers/gpu/drm/tyr/gem.rs33
-rw-r--r--drivers/gpu/drm/tyr/gpu.rs180
-rw-r--r--drivers/gpu/drm/tyr/regs.rs1745
-rw-r--r--drivers/gpu/drm/tyr/tyr.rs4
-rw-r--r--drivers/gpu/nova-core/Kconfig3
-rw-r--r--drivers/gpu/nova-core/Makefile3
-rw-r--r--drivers/gpu/nova-core/driver.rs75
-rw-r--r--drivers/gpu/nova-core/falcon.rs49
-rw-r--r--drivers/gpu/nova-core/falcon/fsp.rs171
-rw-r--r--drivers/gpu/nova-core/falcon/gsp.rs23
-rw-r--r--drivers/gpu/nova-core/falcon/hal.rs34
-rw-r--r--drivers/gpu/nova-core/falcon/hal/ga102.rs22
-rw-r--r--drivers/gpu/nova-core/falcon/hal/tu102.rs14
-rw-r--r--drivers/gpu/nova-core/fb.rs66
-rw-r--r--drivers/gpu/nova-core/fb/hal.rs41
-rw-r--r--drivers/gpu/nova-core/fb/hal/ga100.rs29
-rw-r--r--drivers/gpu/nova-core/fb/hal/ga102.rs23
-rw-r--r--drivers/gpu/nova-core/fb/hal/gb100.rs122
-rw-r--r--drivers/gpu/nova-core/fb/hal/gb202.rs95
-rw-r--r--drivers/gpu/nova-core/fb/hal/gh100.rs50
-rw-r--r--drivers/gpu/nova-core/fb/hal/tu102.rs44
-rw-r--r--drivers/gpu/nova-core/firmware.rs197
-rw-r--r--drivers/gpu/nova-core/firmware/booter.rs34
-rw-r--r--drivers/gpu/nova-core/firmware/fsp.rs128
-rw-r--r--drivers/gpu/nova-core/firmware/fwsec.rs5
-rw-r--r--drivers/gpu/nova-core/firmware/fwsec/bootloader.rs2
-rw-r--r--drivers/gpu/nova-core/firmware/gsp.rs34
-rw-r--r--drivers/gpu/nova-core/fsp.rs320
-rw-r--r--drivers/gpu/nova-core/fsp/hal.rs32
-rw-r--r--drivers/gpu/nova-core/fsp/hal/gb100.rs23
-rw-r--r--drivers/gpu/nova-core/fsp/hal/gb202.rs27
-rw-r--r--drivers/gpu/nova-core/fsp/hal/gh100.rs32
-rw-r--r--drivers/gpu/nova-core/gfw.rs76
-rw-r--r--drivers/gpu/nova-core/gpu.rs119
-rw-r--r--drivers/gpu/nova-core/gpu/hal.rs39
-rw-r--r--drivers/gpu/nova-core/gpu/hal/gh100.rs34
-rw-r--r--drivers/gpu/nova-core/gpu/hal/tu102.rs100
-rw-r--r--drivers/gpu/nova-core/gsp.rs5
-rw-r--r--drivers/gpu/nova-core/gsp/boot.rs300
-rw-r--r--drivers/gpu/nova-core/gsp/cmdq.rs10
-rw-r--r--drivers/gpu/nova-core/gsp/commands.rs78
-rw-r--r--drivers/gpu/nova-core/gsp/fw.rs121
-rw-r--r--drivers/gpu/nova-core/gsp/fw/commands.rs60
-rw-r--r--drivers/gpu/nova-core/gsp/fw/r570_144/bindings.rs94
-rw-r--r--drivers/gpu/nova-core/gsp/hal.rs94
-rw-r--r--drivers/gpu/nova-core/gsp/hal/gh100.rs192
-rw-r--r--drivers/gpu/nova-core/gsp/hal/tu102.rs349
-rw-r--r--drivers/gpu/nova-core/gsp/sequencer.rs15
-rw-r--r--drivers/gpu/nova-core/mctp.rs88
-rw-r--r--drivers/gpu/nova-core/nova_core.rs9
-rw-r--r--drivers/gpu/nova-core/regs.rs136
-rw-r--r--drivers/gpu/nova-core/vbios.rs632
-rw-r--r--drivers/pwm/pwm_th1520.rs13
-rw-r--r--include/drm/drm_gem_shmem_helper.h1
-rw-r--r--include/linux/auxiliary_bus.h4
-rw-r--r--include/linux/device/driver.h4
-rw-r--r--rust/Makefile1
-rw-r--r--rust/bindings/bindings_helper.h1
-rw-r--r--rust/helpers/drm_gpuvm.c26
-rw-r--r--rust/helpers/helpers.c1
-rw-r--r--rust/kernel/alloc/kbox.rs46
-rw-r--r--rust/kernel/auxiliary.rs285
-rw-r--r--rust/kernel/cpufreq.rs9
-rw-r--r--rust/kernel/device.rs121
-rw-r--r--rust/kernel/devres.rs2
-rw-r--r--rust/kernel/dma.rs2
-rw-r--r--rust/kernel/driver.rs41
-rw-r--r--rust/kernel/drm/device.rs252
-rw-r--r--rust/kernel/drm/driver.rs49
-rw-r--r--rust/kernel/drm/gem/mod.rs72
-rw-r--r--rust/kernel/drm/gem/shmem.rs61
-rw-r--r--rust/kernel/drm/gpuvm/mod.rs328
-rw-r--r--rust/kernel/drm/gpuvm/sm_ops.rs429
-rw-r--r--rust/kernel/drm/gpuvm/va.rs168
-rw-r--r--rust/kernel/drm/gpuvm/vm_bo.rs249
-rw-r--r--rust/kernel/drm/mod.rs5
-rw-r--r--rust/kernel/i2c.rs61
-rw-r--r--rust/kernel/io/mem.rs121
-rw-r--r--rust/kernel/pci.rs51
-rw-r--r--rust/kernel/pci/id.rs2
-rw-r--r--rust/kernel/pci/io.rs54
-rw-r--r--rust/kernel/platform.rs52
-rw-r--r--rust/kernel/types.rs12
-rw-r--r--rust/kernel/types/for_lt.rs122
-rw-r--r--rust/kernel/usb.rs57
-rw-r--r--rust/macros/for_lt.rs248
-rw-r--r--rust/macros/lib.rs13
-rw-r--r--samples/rust/rust_debugfs.rs11
-rw-r--r--samples/rust/rust_dma.rs6
-rw-r--r--samples/rust/rust_driver_auxiliary.rs79
-rw-r--r--samples/rust/rust_driver_i2c.rs13
-rw-r--r--samples/rust/rust_driver_pci.rs90
-rw-r--r--samples/rust/rust_driver_platform.rs9
-rw-r--r--samples/rust/rust_driver_usb.rs15
-rw-r--r--samples/rust/rust_i2c_client.rs14
-rw-r--r--samples/rust/rust_soc.rs9
111 files changed, 7909 insertions, 1793 deletions
diff --git a/Documentation/gpu/nova/core/vbios.rst b/Documentation/gpu/nova/core/vbios.rst
index efd40087480c..9d3379ccfb30 100644
--- a/Documentation/gpu/nova/core/vbios.rst
+++ b/Documentation/gpu/nova/core/vbios.rst
@@ -46,12 +46,71 @@ region is only accessible to heavy-secure ucode.
are of type 0xE0 and can be identified as such. This could be subject to change
in future generations.
+IFR Header
+----------
+On Kepler and later GPUs, the ROM begins with an Init-from-ROM (IFR) header
+rather than a standard PCI ROM signature (0xAA55). The driver must parse the
+IFR header to find where the PCI ROM images actually start.
+
+Init-from-ROM (IFR) is a special GPU feature used for power management
+on some Nvidia GPUs. It references data in the VBIOS for its operation,
+but for drivers the important piece is a header that precedes the
+VBIOS PCI Expansion ROM.
+
+Most such GPUs do not need to parse the IFR header in order to find the
+VBIOS, but the Nvidia GA100 is the exception. GA100 lacks a display engine,
+so the PRAMIN method (which reads the VBIOS from VRAM via display hardware)
+is unavailable, forcing the driver to read the ROM directly via PROM.
+On other similar GPUs, either PRAMIN succeeds before PROM is tried, or the
+IFR hardware has already applied the ROM offset so that PROM reads
+transparently skip the IFR header.
+
+The driver should first check for the standard 0xAA55 signature at offset 0.
+If found, there is no IFR header and the PCI ROM images start at
+offset 0. If not found, check for the IFR signature and parse the header to
+determine the PCI ROM image offset.
+
+Fixed Header Format
+~~~~~~~~~~~~~~~~~~~
+
+The IFR header begins with four 32-bit words at fixed offsets::
+
+ Offset Name Fields
+ ------ ------- ------
+ 0x00 FIXED0 bits 31:0 - Signature (must be 0x4947564E, ASCII "NVGI")
+ 0x04 FIXED1 bit 31 - Reserved
+ bits 30:16 - FIXED_DATA_SIZE Fixed data size (offset to extended section)
+ bits 15:8 - VERSIONSW Software version
+ bits 7:0 - Reserved
+ 0x08 FIXED2 bit 31 - Reserved
+ bits 30:20 - Reserved (zero)
+ bits 19:0 - TOTAL_DATA_SIZE Total data size
+
+Finding the PCI ROM Image Offset
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+The method to find this offset depends on `VERSIONSW`.
+
+- **Version 1 and 2**: Read `FIXED_DATA_SIZE` from `FIXED1` to get the extended
+ section offset. The PCI ROM image is the 32-bit value at `FIXED_DATA_SIZE + 4`.
+
+- **Version 3**: Read `TOTAL_DATA_SIZE` from `FIXED2`. The 32-bit value at that
+ offset is a flash status offset. Add 4096 to get the ROM directory offset,
+ `ROM_DIRECTORY_OFFSET`. The ROM directory must have signature 0x44524652
+ (ASCII "RFRD"). The PCI ROM image offset is the 32-bit value at
+ `ROM_DIRECTORY_OFFSET + 8`.
+
+The PCI ROM image offset must be 4-byte aligned. All offsets are relative to the
+start of ROM (BAR0 + 0x300000).
+
VBIOS ROM Layout
----------------
-The VBIOS layout is roughly a series of concatenated images laid out as follows::
+The VBIOS (PCI Expansion ROM) is a series of concatenated images laid out as
+follows. On GPUs with an IFR header, this layout begins at the image offset
+determined by parsing the IFR header. On older GPUs, it begins at offset 0::
+----------------------------------------------------------------------------+
- | VBIOS (Starting at ROM_OFFSET: 0x300000) |
+ | VBIOS (Starting at ROM_OFFSET: 0x300000 + IFR image offset) |
+----------------------------------------------------------------------------+
| +-----------------------------------------------+ |
| | PciAt Image (Type 0x00) | |
@@ -173,7 +232,7 @@ Falcon data in the VBIOS which contains the PMU lookup table. This lookup table
used to find the required Falcon ucode based on an application ID.
The location of the PMU lookup table is found by scanning the BIT (`BIOS Information Table`_)
-tokens for a token with the id `BIT_TOKEN_ID_FALCON_DATA` (0x70) which indicates the
+tokens for a token with the Falcon data token id (0x70) which indicates the
offset of the same from the start of the VBIOS image. Unfortunately, the offset
does not account for the EFI image located between the PciAt and FwSec images.
The `vbios.rs` code compensates for this with appropriate arithmetic.
diff --git a/MAINTAINERS b/MAINTAINERS
index ac87c217ab1f..4ae6919454c3 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -8902,6 +8902,8 @@ S: Supported
T: git https://gitlab.freedesktop.org/drm/misc/kernel.git
F: drivers/gpu/drm/drm_gpuvm.c
F: include/drm/drm_gpuvm.h
+F: rust/helpers/drm_gpuvm.c
+F: rust/kernel/drm/gpuvm/
DRM LOG
M: Jocelyn Falempe <jfalempe@redhat.com>
diff --git a/drivers/base/base.h b/drivers/base/base.h
index 30b416588617..a19f4cda2c83 100644
--- a/drivers/base/base.h
+++ b/drivers/base/base.h
@@ -86,18 +86,6 @@ struct driver_private {
};
#define to_driver(obj) container_of(obj, struct driver_private, kobj)
-#ifdef CONFIG_RUST
-/**
- * struct driver_type - Representation of a Rust driver type.
- */
-struct driver_type {
- /**
- * @id: Representation of core::any::TypeId.
- */
- u8 id[16];
-} __packed;
-#endif
-
/**
* struct device_private - structure to hold the private to the driver core
* portions of the device structure.
@@ -115,7 +103,6 @@ struct driver_type {
* dev_err_probe() for later retrieval via debugfs
* @device: pointer back to the struct device that this structure is
* associated with.
- * @driver_type: The type of the bound Rust driver.
* @dead: This device is currently either in the process of or has been
* removed from the system. Any asynchronous events scheduled for this
* device should exit without taking any action.
@@ -132,9 +119,6 @@ struct device_private {
const struct device_driver *async_driver;
char *deferred_probe_reason;
struct device *device;
-#ifdef CONFIG_RUST
- struct driver_type driver_type;
-#endif
u8 dead:1;
};
#define to_device_private_parent(obj) \
diff --git a/drivers/base/dd.c b/drivers/base/dd.c
index 1dc1e3528043..73801b40a416 100644
--- a/drivers/base/dd.c
+++ b/drivers/base/dd.c
@@ -595,9 +595,9 @@ static DEVICE_ATTR_RW(state_synced);
static void device_unbind_cleanup(struct device *dev)
{
- devres_release_all(dev);
if (dev->driver->p_cb.post_unbind_rust)
dev->driver->p_cb.post_unbind_rust(dev);
+ devres_release_all(dev);
arch_teardown_dma_ops(dev);
kfree(dev->dma_range_map);
dev->dma_range_map = NULL;
diff --git a/drivers/cpufreq/rcpufreq_dt.rs b/drivers/cpufreq/rcpufreq_dt.rs
index f17bf64c22e2..10106fa13095 100644
--- a/drivers/cpufreq/rcpufreq_dt.rs
+++ b/drivers/cpufreq/rcpufreq_dt.rs
@@ -201,12 +201,13 @@ kernel::of_device_table!(
impl platform::Driver for CPUFreqDTDriver {
type IdInfo = ();
+ type Data<'bound> = Self;
const OF_ID_TABLE: Option<of::IdTable<Self::IdInfo>> = Some(&OF_TABLE);
- fn probe(
- pdev: &platform::Device<Core>,
- _id_info: Option<&Self::IdInfo>,
- ) -> impl PinInit<Self, Error> {
+ fn probe<'bound>(
+ pdev: &'bound platform::Device<Core<'_>>,
+ _id_info: Option<&'bound Self::IdInfo>,
+ ) -> impl PinInit<Self, Error> + 'bound {
cpufreq::Registration::<CPUFreqDTDriver>::new_foreign_owned(pdev.as_ref())?;
Ok(Self {})
}
diff --git a/drivers/gpu/drm/Kconfig b/drivers/gpu/drm/Kconfig
index 8f5a8d3012e4..323422861e8f 100644
--- a/drivers/gpu/drm/Kconfig
+++ b/drivers/gpu/drm/Kconfig
@@ -218,6 +218,13 @@ config DRM_GPUVM
GPU-VM representation providing helpers to manage a GPUs virtual
address space
+config RUST_DRM_GPUVM
+ bool
+ depends on DRM
+ select DRM_GPUVM
+ help
+ Choose this if you need GPUVM functions in Rust
+
config DRM_GPUSVM
tristate
depends on DRM
diff --git a/drivers/gpu/drm/drm_gem_shmem_helper.c b/drivers/gpu/drm/drm_gem_shmem_helper.c
index 545933c7f712..c989459eb215 100644
--- a/drivers/gpu/drm/drm_gem_shmem_helper.c
+++ b/drivers/gpu/drm/drm_gem_shmem_helper.c
@@ -159,6 +159,30 @@ struct drm_gem_shmem_object *drm_gem_shmem_create(struct drm_device *dev, size_t
EXPORT_SYMBOL_GPL(drm_gem_shmem_create);
/**
+ * __drm_gem_shmem_release_sgt_locked - Unpin and DMA unmap pages, and release the
+ * cached scatter/gather table for an shmem GEM object.
+ * @shmem: shmem GEM object
+ *
+ * If the passed shmem object has an active scatter/gather table for driver
+ * usage, this function will unmap it and release the memory associated with it.
+ * It is the responsibility of the caller to ensure it holds the dma_resv_lock
+ * for this object.
+ *
+ * Drivers should not need to call this function themselves, it is mainly
+ * intended for usage in the Rust shmem bindings.
+ */
+void __drm_gem_shmem_free_sgt_locked(struct drm_gem_shmem_object *shmem)
+{
+ dma_resv_assert_held(shmem->base.resv);
+
+ dma_unmap_sgtable(shmem->base.dev->dev, shmem->sgt, DMA_BIDIRECTIONAL, 0);
+ sg_free_table(shmem->sgt);
+ kfree(shmem->sgt);
+ shmem->sgt = NULL;
+}
+EXPORT_SYMBOL_GPL(__drm_gem_shmem_free_sgt_locked);
+
+/**
* drm_gem_shmem_release - Release resources associated with a shmem GEM object.
* @shmem: shmem GEM object
*
@@ -176,12 +200,8 @@ void drm_gem_shmem_release(struct drm_gem_shmem_object *shmem)
drm_WARN_ON(obj->dev, refcount_read(&shmem->vmap_use_count));
- if (shmem->sgt) {
- dma_unmap_sgtable(obj->dev->dev, shmem->sgt,
- DMA_BIDIRECTIONAL, 0);
- sg_free_table(shmem->sgt);
- kfree(shmem->sgt);
- }
+ if (shmem->sgt)
+ __drm_gem_shmem_free_sgt_locked(shmem);
if (shmem->pages)
drm_gem_shmem_put_pages_locked(shmem);
diff --git a/drivers/gpu/drm/drm_gpuvm.c b/drivers/gpu/drm/drm_gpuvm.c
index 078cbc0235a4..c422c5af1f4b 100644
--- a/drivers/gpu/drm/drm_gpuvm.c
+++ b/drivers/gpu/drm/drm_gpuvm.c
@@ -25,6 +25,7 @@
*
*/
+#include <drm/drm_drv.h>
#include <drm/drm_gpuvm.h>
#include <drm/drm_print.h>
@@ -1117,6 +1118,7 @@ drm_gpuvm_init(struct drm_gpuvm *gpuvm, const char *name,
gpuvm->drm = drm;
gpuvm->r_obj = r_obj;
+ drm_dev_get(drm);
drm_gem_object_get(r_obj);
drm_gpuvm_warn_check_overflow(gpuvm, start_offset, range);
@@ -1160,13 +1162,15 @@ static void
drm_gpuvm_free(struct kref *kref)
{
struct drm_gpuvm *gpuvm = container_of(kref, struct drm_gpuvm, kref);
+ struct drm_device *drm = gpuvm->drm;
drm_gpuvm_fini(gpuvm);
- if (drm_WARN_ON(gpuvm->drm, !gpuvm->ops->vm_free))
+ if (drm_WARN_ON(drm, !gpuvm->ops->vm_free))
return;
gpuvm->ops->vm_free(gpuvm);
+ drm_dev_put(drm);
}
/**
diff --git a/drivers/gpu/drm/nova/Kconfig b/drivers/gpu/drm/nova/Kconfig
index 3e637ad7b5ba..ba16c74401f8 100644
--- a/drivers/gpu/drm/nova/Kconfig
+++ b/drivers/gpu/drm/nova/Kconfig
@@ -4,6 +4,7 @@ config DRM_NOVA
depends on DRM=y
depends on PCI
depends on RUST
+ depends on !CPU_BIG_ENDIAN
select AUXILIARY_BUS
select NOVA_CORE
default n
@@ -13,4 +14,4 @@ config DRM_NOVA
This driver is work in progress and may not be functional.
- If M is selected, the module will be called nova.
+ If M is selected, the module will be called nova-drm.
diff --git a/drivers/gpu/drm/nova/Makefile b/drivers/gpu/drm/nova/Makefile
index 42019bff3173..f8527b2b7b4a 100644
--- a/drivers/gpu/drm/nova/Makefile
+++ b/drivers/gpu/drm/nova/Makefile
@@ -1,3 +1,4 @@
# SPDX-License-Identifier: GPL-2.0
-obj-$(CONFIG_DRM_NOVA) += nova.o
+obj-$(CONFIG_DRM_NOVA) += nova-drm.o
+nova-drm-y := nova.o
diff --git a/drivers/gpu/drm/nova/driver.rs b/drivers/gpu/drm/nova/driver.rs
index b1af0a099551..48933d86ddda 100644
--- a/drivers/gpu/drm/nova/driver.rs
+++ b/drivers/gpu/drm/nova/driver.rs
@@ -15,13 +15,15 @@ use kernel::{
use crate::file::File;
use crate::gem::NovaObject;
-pub(crate) struct NovaDriver {
+pub(crate) struct NovaDriver;
+
+pub(crate) struct Nova {
#[expect(unused)]
- drm: ARef<drm::Device<Self>>,
+ drm: ARef<drm::Device<NovaDriver>>,
}
/// Convienence type alias for the DRM device type for this driver
-pub(crate) type NovaDevice = drm::Device<NovaDriver>;
+pub(crate) type NovaDevice<Ctx = drm::Registered> = drm::Device<NovaDriver, Ctx>;
#[pin_data]
pub(crate) struct NovaData {
@@ -32,11 +34,11 @@ const INFO: drm::DriverInfo = drm::DriverInfo {
major: 0,
minor: 0,
patchlevel: 0,
- name: c"nova",
- desc: c"Nvidia Graphics",
+ name: c"nova-drm",
+ desc: c"NVIDIA Graphics and Compute",
};
-const NOVA_CORE_MODULE_NAME: &CStr = c"NovaCore";
+const NOVA_CORE_MODULE_NAME: &CStr = c"nova-core";
const AUXILIARY_NAME: &CStr = c"nova-drm";
kernel::auxiliary_device_table!(
@@ -51,15 +53,19 @@ kernel::auxiliary_device_table!(
impl auxiliary::Driver for NovaDriver {
type IdInfo = ();
+ type Data<'bound> = Nova;
const ID_TABLE: auxiliary::IdTable<Self::IdInfo> = &AUX_TABLE;
- fn probe(adev: &auxiliary::Device<Core>, _info: &Self::IdInfo) -> impl PinInit<Self, Error> {
+ fn probe<'bound>(
+ adev: &'bound auxiliary::Device<Core<'_>>,
+ _info: &'bound Self::IdInfo,
+ ) -> impl PinInit<Self::Data<'bound>, Error> + 'bound {
let data = try_pin_init!(NovaData { adev: adev.into() });
- let drm = drm::Device::<Self>::new(adev.as_ref(), data)?;
- drm::Registration::new_foreign_owned(&drm, adev.as_ref(), 0)?;
+ let drm = drm::UnregisteredDevice::<Self>::new(adev.as_ref(), data)?;
+ let drm = drm::Registration::new_foreign_owned(drm, adev.as_ref(), 0)?;
- Ok(Self { drm })
+ Ok(Nova { drm: drm.into() })
}
}
@@ -67,7 +73,7 @@ impl auxiliary::Driver for NovaDriver {
impl drm::Driver for NovaDriver {
type Data = NovaData;
type File = File;
- type Object = gem::Object<NovaObject>;
+ type Object<Ctx: drm::DeviceContext> = gem::Object<NovaObject, Ctx>;
const INFO: drm::DriverInfo = INFO;
diff --git a/drivers/gpu/drm/nova/gem.rs b/drivers/gpu/drm/nova/gem.rs
index e073e174e257..9d8ff7de2c0f 100644
--- a/drivers/gpu/drm/nova/gem.rs
+++ b/drivers/gpu/drm/nova/gem.rs
@@ -2,7 +2,7 @@
use kernel::{
drm,
- drm::{gem, gem::BaseObject},
+ drm::{gem, gem::BaseObject, DeviceContext},
page,
prelude::*,
sync::aref::ARef,
@@ -21,20 +21,27 @@ impl gem::DriverObject for NovaObject {
type Driver = NovaDriver;
type Args = ();
- fn new(_dev: &NovaDevice, _size: usize, _args: Self::Args) -> impl PinInit<Self, Error> {
+ fn new<Ctx: DeviceContext>(
+ _dev: &NovaDevice<Ctx>,
+ _size: usize,
+ _args: Self::Args,
+ ) -> impl PinInit<Self, Error> {
try_pin_init!(NovaObject {})
}
}
impl NovaObject {
/// Create a new DRM GEM object.
- pub(crate) fn new(dev: &NovaDevice, size: usize) -> Result<ARef<gem::Object<Self>>> {
+ pub(crate) fn new<Ctx: DeviceContext>(
+ dev: &NovaDevice<Ctx>,
+ size: usize,
+ ) -> Result<ARef<gem::Object<Self, Ctx>>> {
if size == 0 {
return Err(EINVAL);
}
let aligned_size = page::page_align(size).ok_or(EINVAL)?;
- gem::Object::new(dev, aligned_size, ())
+ gem::Object::<Self, Ctx>::new(dev, aligned_size, ())
}
/// Look up a GEM object handle for a `File` and return an `ObjectRef` for it.
diff --git a/drivers/gpu/drm/nova/nova.rs b/drivers/gpu/drm/nova/nova.rs
index 8893e58ee0db..1fd454c7e0df 100644
--- a/drivers/gpu/drm/nova/nova.rs
+++ b/drivers/gpu/drm/nova/nova.rs
@@ -10,7 +10,7 @@ use crate::driver::NovaDriver;
kernel::module_auxiliary_driver! {
type: NovaDriver,
- name: "Nova",
+ name: "nova-drm",
authors: ["Danilo Krummrich"],
description: "Nova GPU driver",
license: "GPL v2",
diff --git a/drivers/gpu/drm/tyr/Kconfig b/drivers/gpu/drm/tyr/Kconfig
index e933e6478027..51a68ef8212c 100644
--- a/drivers/gpu/drm/tyr/Kconfig
+++ b/drivers/gpu/drm/tyr/Kconfig
@@ -8,6 +8,7 @@ config DRM_TYR
depends on !GENERIC_ATOMIC64 # for IOMMU_IO_PGTABLE_LPAE
depends on COMMON_CLK
default n
+ select RUST_DRM_GEM_SHMEM_HELPER
help
Rust DRM driver for ARM Mali CSF-based GPUs.
diff --git a/drivers/gpu/drm/tyr/driver.rs b/drivers/gpu/drm/tyr/driver.rs
index 279710b36a10..d063bc664cc1 100644
--- a/drivers/gpu/drm/tyr/driver.rs
+++ b/drivers/gpu/drm/tyr/driver.rs
@@ -6,14 +6,19 @@ use kernel::{
OptionalClk, //
},
device::{
- Bound,
Core,
Device, //
},
- devres::Devres,
+ dma::{
+ Device as DmaDevice,
+ DmaMask, //
+ },
drm,
drm::ioctl,
- io::poll,
+ io::{
+ poll,
+ Io, //
+ },
new_mutex,
of,
platform,
@@ -23,7 +28,6 @@ use kernel::{
sizes::SZ_2M,
sync::{
aref::ARef,
- Arc,
Mutex, //
},
time, //
@@ -31,25 +35,27 @@ use kernel::{
use crate::{
file::TyrDrmFileData,
- gem::TyrObject,
+ gem::BoData,
gpu,
gpu::GpuInfo,
- regs, //
+ regs::gpu_control::*, //
};
-pub(crate) type IoMem = kernel::io::mem::IoMem<SZ_2M>;
+pub(crate) type IoMem<'a> = kernel::io::mem::IoMem<'a, SZ_2M>;
pub(crate) struct TyrDrmDriver;
/// Convenience type alias for the DRM device type for this driver.
-pub(crate) type TyrDrmDevice = drm::Device<TyrDrmDriver>;
+pub(crate) type TyrDrmDevice<Ctx = drm::Registered> = drm::Device<TyrDrmDriver, Ctx>;
+
+pub(crate) struct TyrPlatformDriver;
#[pin_data(PinnedDrop)]
pub(crate) struct TyrPlatformDriverData {
_device: ARef<TyrDrmDevice>,
}
-#[pin_data(PinnedDrop)]
+#[pin_data]
pub(crate) struct TyrDrmDeviceData {
pub(crate) pdev: ARef<platform::Device>,
@@ -65,12 +71,12 @@ pub(crate) struct TyrDrmDeviceData {
pub(crate) gpu_info: GpuInfo,
}
-fn issue_soft_reset(dev: &Device<Bound>, iomem: &Devres<IoMem>) -> Result {
- regs::GPU_CMD.write(dev, iomem, regs::GPU_CMD_SOFT_RESET)?;
+fn issue_soft_reset(dev: &Device, iomem: &IoMem<'_>) -> Result {
+ iomem.write_reg(GPU_COMMAND::reset(ResetMode::SoftReset));
poll::read_poll_timeout(
- || regs::GPU_IRQ_RAWSTAT.read(dev, iomem),
- |status| *status & regs::GPU_IRQ_RAWSTAT_RESET_COMPLETED != 0,
+ || Ok(iomem.read(GPU_IRQ_RAWSTAT)),
+ |status| status.reset_completed(),
time::Delta::from_millis(1),
time::Delta::from_millis(100),
)
@@ -82,21 +88,22 @@ fn issue_soft_reset(dev: &Device<Bound>, iomem: &Devres<IoMem>) -> Result {
kernel::of_device_table!(
OF_TABLE,
MODULE_OF_TABLE,
- <TyrPlatformDriverData as platform::Driver>::IdInfo,
+ <TyrPlatformDriver as platform::Driver>::IdInfo,
[
(of::DeviceId::new(c"rockchip,rk3588-mali"), ()),
(of::DeviceId::new(c"arm,mali-valhall-csf"), ())
]
);
-impl platform::Driver for TyrPlatformDriverData {
+impl platform::Driver for TyrPlatformDriver {
type IdInfo = ();
+ type Data<'bound> = TyrPlatformDriverData;
const OF_ID_TABLE: Option<of::IdTable<Self::IdInfo>> = Some(&OF_TABLE);
- fn probe(
- pdev: &platform::Device<Core>,
- _info: Option<&Self::IdInfo>,
- ) -> impl PinInit<Self, Error> {
+ fn probe<'bound>(
+ pdev: &'bound platform::Device<Core<'_>>,
+ _info: Option<&'bound Self::IdInfo>,
+ ) -> impl PinInit<Self::Data<'bound>, Error> + 'bound {
let core_clk = Clk::get(pdev.as_ref(), Some(c"core"))?;
let stacks_clk = OptionalClk::get(pdev.as_ref(), Some(c"stacks"))?;
let coregroup_clk = OptionalClk::get(pdev.as_ref(), Some(c"coregroup"))?;
@@ -109,13 +116,21 @@ impl platform::Driver for TyrPlatformDriverData {
let sram_regulator = Regulator::<regulator::Enabled>::get(pdev.as_ref(), c"sram")?;
let request = pdev.io_request_by_index(0).ok_or(ENODEV)?;
- let iomem = Arc::pin_init(request.iomap_sized::<SZ_2M>(), GFP_KERNEL)?;
+ let iomem = request.iomap_sized::<SZ_2M>()?;
issue_soft_reset(pdev.as_ref(), &iomem)?;
gpu::l2_power_on(pdev.as_ref(), &iomem)?;
- let gpu_info = GpuInfo::new(pdev.as_ref(), &iomem)?;
- gpu_info.log(pdev);
+ let gpu_info = GpuInfo::new(&iomem);
+ gpu_info.log(pdev.as_ref());
+
+ let pa_bits = MMU_FEATURES::from_raw(gpu_info.mmu_features)
+ .pa_bits()
+ .get();
+ // SAFETY: No concurrent DMA allocations or mappings can be made because
+ // the device is still being probed and therefore isn't being used by
+ // other threads of execution.
+ unsafe { pdev.dma_set_mask_and_coherent(DmaMask::try_new(pa_bits)?)? };
let platform: ARef<platform::Device> = pdev.into();
@@ -133,10 +148,12 @@ impl platform::Driver for TyrPlatformDriverData {
gpu_info,
});
- let ddev: ARef<TyrDrmDevice> = drm::Device::new(pdev.as_ref(), data)?;
- drm::driver::Registration::new_foreign_owned(&ddev, pdev.as_ref(), 0)?;
+ let tdev = drm::UnregisteredDevice::<TyrDrmDriver>::new(pdev.as_ref(), data)?;
+ let tdev = drm::driver::Registration::new_foreign_owned(tdev, pdev.as_ref(), 0)?;
- let driver = TyrPlatformDriverData { _device: ddev };
+ let driver = TyrPlatformDriverData {
+ _device: tdev.into(),
+ };
// We need this to be dev_info!() because dev_dbg!() does not work at
// all in Rust for now, and we need to see whether probe succeeded.
@@ -150,17 +167,6 @@ impl PinnedDrop for TyrPlatformDriverData {
fn drop(self: Pin<&mut Self>) {}
}
-#[pinned_drop]
-impl PinnedDrop for TyrDrmDeviceData {
- fn drop(self: Pin<&mut Self>) {
- // TODO: the type-state pattern for Clks will fix this.
- let clks = self.clks.lock();
- clks.core.disable_unprepare();
- clks.stacks.disable_unprepare();
- clks.coregroup.disable_unprepare();
- }
-}
-
// We need to retain the name "panthor" to achieve drop-in compatibility with
// the C driver in the userspace stack.
const INFO: drm::DriverInfo = drm::DriverInfo {
@@ -175,23 +181,30 @@ const INFO: drm::DriverInfo = drm::DriverInfo {
impl drm::Driver for TyrDrmDriver {
type Data = TyrDrmDeviceData;
type File = TyrDrmFileData;
- type Object = drm::gem::Object<TyrObject>;
+ type Object<R: drm::DeviceContext> = drm::gem::shmem::Object<BoData, R>;
const INFO: drm::DriverInfo = INFO;
+ const FEAT_RENDER: bool = true;
kernel::declare_drm_ioctls! {
(PANTHOR_DEV_QUERY, drm_panthor_dev_query, ioctl::RENDER_ALLOW, TyrDrmFileData::dev_query),
}
}
-#[pin_data]
struct Clocks {
core: Clk,
stacks: OptionalClk,
coregroup: OptionalClk,
}
-#[pin_data]
+impl Drop for Clocks {
+ fn drop(&mut self) {
+ self.core.disable_unprepare();
+ self.stacks.disable_unprepare();
+ self.coregroup.disable_unprepare();
+ }
+}
+
struct Regulators {
_mali: Regulator<regulator::Enabled>,
_sram: Regulator<regulator::Enabled>,
diff --git a/drivers/gpu/drm/tyr/gem.rs b/drivers/gpu/drm/tyr/gem.rs
index 5cc6eb0b5d3f..c6d4d6f9bae3 100644
--- a/drivers/gpu/drm/tyr/gem.rs
+++ b/drivers/gpu/drm/tyr/gem.rs
@@ -1,7 +1,14 @@
// SPDX-License-Identifier: GPL-2.0 or MIT
+//! GEM buffer object management for the Tyr driver.
+//!
+//! This module provides buffer object (BO) management functionality using
+//! DRM's GEM subsystem with shmem backing.
use kernel::{
- drm::gem,
+ drm::{
+ gem,
+ DeviceContext, //
+ },
prelude::*, //
};
@@ -10,15 +17,27 @@ use crate::driver::{
TyrDrmDriver, //
};
-/// GEM Object inner driver data
+/// Tyr's DriverObject type for GEM objects.
#[pin_data]
-pub(crate) struct TyrObject {}
+pub(crate) struct BoData {
+ flags: u32,
+}
+
+/// Provides a way to pass arguments when creating BoData
+/// as required by the gem::DriverObject trait.
+pub(crate) struct BoCreateArgs {
+ flags: u32,
+}
-impl gem::DriverObject for TyrObject {
+impl gem::DriverObject for BoData {
type Driver = TyrDrmDriver;
- type Args = ();
+ type Args = BoCreateArgs;
- fn new(_dev: &TyrDrmDevice, _size: usize, _args: ()) -> impl PinInit<Self, Error> {
- try_pin_init!(TyrObject {})
+ fn new<Ctx: DeviceContext>(
+ _dev: &TyrDrmDevice<Ctx>,
+ _size: usize,
+ args: BoCreateArgs,
+ ) -> impl PinInit<Self, Error> {
+ try_pin_init!(Self { flags: args.flags })
}
}
diff --git a/drivers/gpu/drm/tyr/gpu.rs b/drivers/gpu/drm/tyr/gpu.rs
index a88775160f98..592b8bb16eba 100644
--- a/drivers/gpu/drm/tyr/gpu.rs
+++ b/drivers/gpu/drm/tyr/gpu.rs
@@ -5,14 +5,15 @@ use core::ops::{
DerefMut, //
};
use kernel::{
- bits::genmask_u32,
device::{
Bound,
Device, //
},
- devres::Devres,
- io::poll,
- platform,
+ io::{
+ poll,
+ register::Array,
+ Io, //
+ },
prelude::*,
time::Delta,
transmute::AsBytes,
@@ -21,7 +22,10 @@ use kernel::{
use crate::{
driver::IoMem,
- regs, //
+ regs::{
+ gpu_control::*,
+ join_u64, //
+ }, //
};
/// Struct containing information that can be queried by userspace. This is read from
@@ -29,120 +33,93 @@ use crate::{
///
/// # Invariants
///
-/// - The layout of this struct identical to the C `struct drm_panthor_gpu_info`.
+/// - The layout of this struct is identical to the C `struct drm_panthor_gpu_info`.
#[repr(transparent)]
#[derive(Clone, Copy)]
pub(crate) struct GpuInfo(pub(crate) uapi::drm_panthor_gpu_info);
impl GpuInfo {
- pub(crate) fn new(dev: &Device<Bound>, iomem: &Devres<IoMem>) -> Result<Self> {
- let gpu_id = regs::GPU_ID.read(dev, iomem)?;
- let csf_id = regs::GPU_CSF_ID.read(dev, iomem)?;
- let gpu_rev = regs::GPU_REVID.read(dev, iomem)?;
- let core_features = regs::GPU_CORE_FEATURES.read(dev, iomem)?;
- let l2_features = regs::GPU_L2_FEATURES.read(dev, iomem)?;
- let tiler_features = regs::GPU_TILER_FEATURES.read(dev, iomem)?;
- let mem_features = regs::GPU_MEM_FEATURES.read(dev, iomem)?;
- let mmu_features = regs::GPU_MMU_FEATURES.read(dev, iomem)?;
- let thread_features = regs::GPU_THREAD_FEATURES.read(dev, iomem)?;
- let max_threads = regs::GPU_THREAD_MAX_THREADS.read(dev, iomem)?;
- let thread_max_workgroup_size = regs::GPU_THREAD_MAX_WORKGROUP_SIZE.read(dev, iomem)?;
- let thread_max_barrier_size = regs::GPU_THREAD_MAX_BARRIER_SIZE.read(dev, iomem)?;
- let coherency_features = regs::GPU_COHERENCY_FEATURES.read(dev, iomem)?;
-
- let texture_features = regs::GPU_TEXTURE_FEATURES0.read(dev, iomem)?;
-
- let as_present = regs::GPU_AS_PRESENT.read(dev, iomem)?;
-
- let shader_present = u64::from(regs::GPU_SHADER_PRESENT_LO.read(dev, iomem)?);
- let shader_present =
- shader_present | u64::from(regs::GPU_SHADER_PRESENT_HI.read(dev, iomem)?) << 32;
-
- let tiler_present = u64::from(regs::GPU_TILER_PRESENT_LO.read(dev, iomem)?);
- let tiler_present =
- tiler_present | u64::from(regs::GPU_TILER_PRESENT_HI.read(dev, iomem)?) << 32;
-
- let l2_present = u64::from(regs::GPU_L2_PRESENT_LO.read(dev, iomem)?);
- let l2_present = l2_present | u64::from(regs::GPU_L2_PRESENT_HI.read(dev, iomem)?) << 32;
-
- Ok(Self(uapi::drm_panthor_gpu_info {
- gpu_id,
- gpu_rev,
- csf_id,
- l2_features,
- tiler_features,
- mem_features,
- mmu_features,
- thread_features,
- max_threads,
- thread_max_workgroup_size,
- thread_max_barrier_size,
- coherency_features,
- // TODO: Add texture_features_{1,2,3}.
- texture_features: [texture_features, 0, 0, 0],
- as_present,
+ pub(crate) fn new(io: &IoMem<'_>) -> Self {
+ Self(uapi::drm_panthor_gpu_info {
+ gpu_id: io.read(GPU_ID).into_raw(),
+ gpu_rev: io.read(REVIDR).into_raw(),
+ csf_id: io.read(CSF_ID).into_raw(),
+ l2_features: io.read(L2_FEATURES).into_raw(),
+ tiler_features: io.read(TILER_FEATURES).into_raw(),
+ mem_features: io.read(MEM_FEATURES).into_raw(),
+ mmu_features: io.read(MMU_FEATURES).into_raw(),
+ thread_features: io.read(THREAD_FEATURES).into_raw(),
+ max_threads: io.read(THREAD_MAX_THREADS).into_raw(),
+ thread_max_workgroup_size: io.read(THREAD_MAX_WORKGROUP_SIZE).into_raw(),
+ thread_max_barrier_size: io.read(THREAD_MAX_BARRIER_SIZE).into_raw(),
+ coherency_features: io.read(COHERENCY_FEATURES).into_raw(),
+ texture_features: [
+ io.read(TEXTURE_FEATURES::at(0)).supported_formats().get(),
+ io.read(TEXTURE_FEATURES::at(1)).supported_formats().get(),
+ io.read(TEXTURE_FEATURES::at(2)).supported_formats().get(),
+ io.read(TEXTURE_FEATURES::at(3)).supported_formats().get(),
+ ],
+ as_present: io.read(AS_PRESENT).into_raw(),
selected_coherency: uapi::drm_panthor_gpu_coherency_DRM_PANTHOR_GPU_COHERENCY_NONE,
- shader_present,
- l2_present,
- tiler_present,
- core_features,
+ shader_present: join_u64(
+ io.read(SHADER_PRESENT_LO).into_raw(),
+ io.read(SHADER_PRESENT_HI).into_raw(),
+ ),
+ l2_present: join_u64(
+ io.read(L2_PRESENT_LO).into_raw(),
+ io.read(L2_PRESENT_HI).into_raw(),
+ ),
+ tiler_present: join_u64(
+ io.read(TILER_PRESENT_LO).into_raw(),
+ io.read(TILER_PRESENT_HI).into_raw(),
+ ),
+ core_features: io.read(CORE_FEATURES).into_raw(),
+ // Padding must be zero.
pad: 0,
+ //GPU_FEATURES register is not available; it was introduced in arch 11.x.
gpu_features: 0,
- }))
+ })
}
- pub(crate) fn log(&self, pdev: &platform::Device) {
- let gpu_id = GpuId::from(self.gpu_id);
+ pub(crate) fn log(&self, dev: &Device<Bound>) {
+ let gpu_id = GPU_ID::from_raw(self.gpu_id);
- let model_name = if let Some(model) = GPU_MODELS
- .iter()
- .find(|&f| f.arch_major == gpu_id.arch_major && f.prod_major == gpu_id.prod_major)
- {
+ let model_name = if let Some(model) = GPU_MODELS.iter().find(|&f| {
+ f.arch_major == gpu_id.arch_major().get() && f.prod_major == gpu_id.prod_major().get()
+ }) {
model.name
} else {
"unknown"
};
dev_info!(
- pdev,
- "mali-{} id 0x{:x} major 0x{:x} minor 0x{:x} status 0x{:x}",
+ dev,
+ "mali-{} GPU_ID 0x{:x} major 0x{:x} minor 0x{:x} status 0x{:x}",
model_name,
- self.gpu_id >> 16,
- gpu_id.ver_major,
- gpu_id.ver_minor,
- gpu_id.ver_status
+ gpu_id.into_raw(),
+ gpu_id.ver_major().get(),
+ gpu_id.ver_minor().get(),
+ gpu_id.ver_status().get()
);
dev_info!(
- pdev,
+ dev,
"Features: L2:{:#x} Tiler:{:#x} Mem:{:#x} MMU:{:#x} AS:{:#x}",
self.l2_features,
self.tiler_features,
self.mem_features,
self.mmu_features,
- self.as_present
+ self.as_present,
);
dev_info!(
- pdev,
+ dev,
"shader_present=0x{:016x} l2_present=0x{:016x} tiler_present=0x{:016x}",
self.shader_present,
self.l2_present,
- self.tiler_present
+ self.tiler_present,
);
}
-
- /// Returns the number of virtual address bits supported by the GPU.
- #[expect(dead_code)]
- pub(crate) fn va_bits(&self) -> u32 {
- self.mmu_features & genmask_u32(0..=7)
- }
-
- /// Returns the number of physical address bits supported by the GPU.
- #[expect(dead_code)]
- pub(crate) fn pa_bits(&self) -> u32 {
- (self.mmu_features >> 8) & genmask_u32(0..=7)
- }
}
impl Deref for GpuInfo {
@@ -182,38 +159,13 @@ const GPU_MODELS: [GpuModels; 1] = [GpuModels {
prod_major: 7,
}];
-#[allow(dead_code)]
-pub(crate) struct GpuId {
- pub(crate) arch_major: u32,
- pub(crate) arch_minor: u32,
- pub(crate) arch_rev: u32,
- pub(crate) prod_major: u32,
- pub(crate) ver_major: u32,
- pub(crate) ver_minor: u32,
- pub(crate) ver_status: u32,
-}
-
-impl From<u32> for GpuId {
- fn from(value: u32) -> Self {
- GpuId {
- arch_major: (value & genmask_u32(28..=31)) >> 28,
- arch_minor: (value & genmask_u32(24..=27)) >> 24,
- arch_rev: (value & genmask_u32(20..=23)) >> 20,
- prod_major: (value & genmask_u32(16..=19)) >> 16,
- ver_major: (value & genmask_u32(12..=15)) >> 12,
- ver_minor: (value & genmask_u32(4..=11)) >> 4,
- ver_status: value & genmask_u32(0..=3),
- }
- }
-}
-
/// Powers on the l2 block.
-pub(crate) fn l2_power_on(dev: &Device<Bound>, iomem: &Devres<IoMem>) -> Result {
- regs::L2_PWRON_LO.write(dev, iomem, 1)?;
+pub(crate) fn l2_power_on(dev: &Device, io: &IoMem<'_>) -> Result {
+ io.write_reg(L2_PWRON_LO::zeroed().with_const_request::<1>());
poll::read_poll_timeout(
- || regs::L2_READY_LO.read(dev, iomem),
- |status| *status == 1,
+ || Ok(io.read(L2_READY_LO)),
+ |status| status.ready() == 1,
Delta::from_millis(1),
Delta::from_millis(100),
)
diff --git a/drivers/gpu/drm/tyr/regs.rs b/drivers/gpu/drm/tyr/regs.rs
index 611870c2e6af..562023e5df2f 100644
--- a/drivers/gpu/drm/tyr/regs.rs
+++ b/drivers/gpu/drm/tyr/regs.rs
@@ -1,5 +1,25 @@
// SPDX-License-Identifier: GPL-2.0 or MIT
+//! # Definitions
+//!
+//! - **CEU**: Command Execution Unit - A hardware component that executes commands (instructions)
+//! from the command stream.
+//! - **CS**: Command Stream - A sequence of instructions (commands) used to control a particular
+//! job or sequence of jobs. The instructions exist in one or more command buffers.
+//! - **CSF**: Command Stream Frontend - The interface and implementation for job submission
+//! exposed to the host CPU driver. This includes the global interface, as well as CSG and CS
+//! interfaces.
+//! - **CSG**: Command Stream Group - A group of related command streams. The CSF manages multiple
+//! CSGs, and each CSG contains multiple CSs.
+//! - **CSHW**: Command Stream Hardware - The hardware interpreting command streams, including the
+//! iterator control aspects. Implements the CSF in conjunction with the MCU.
+//! - **GLB**: Global - Prefix for global interface registers that control operations common to
+//! all CSs.
+//! - **JASID**: Job Address Space ID - Identifies the address space for a job.
+//! - **MCU**: Microcontroller Unit - Implements the CSF in conjunction with the command stream
+//! hardware.
+//! - **MMU**: Memory Management Unit - Handles address translation and memory access protection.
+
// We don't expect that all the registers and fields will be used, even in the
// future.
//
@@ -7,107 +27,1630 @@
// does.
#![allow(dead_code)]
-use kernel::{
- bits::bit_u32,
- device::{
- Bound,
- Device, //
- },
- devres::Devres,
- io::Io,
- prelude::*, //
-};
-
-use crate::driver::IoMem;
-
-/// Represents a register in the Register Set
-///
-/// TODO: Replace this with the Nova `register!()` macro when it is available.
-/// In particular, this will automatically give us 64bit register reads and
-/// writes.
-pub(crate) struct Register<const OFFSET: usize>;
-
-impl<const OFFSET: usize> Register<OFFSET> {
- #[inline]
- pub(crate) fn read(&self, dev: &Device<Bound>, iomem: &Devres<IoMem>) -> Result<u32> {
- let value = (*iomem).access(dev)?.read32(OFFSET);
- Ok(value)
- }
-
- #[inline]
- pub(crate) fn write(&self, dev: &Device<Bound>, iomem: &Devres<IoMem>, value: u32) -> Result {
- (*iomem).access(dev)?.write32(value, OFFSET);
- Ok(())
+/// Combine two 32-bit values into a single 64-bit value.
+pub(crate) fn join_u64(lo: u32, hi: u32) -> u64 {
+ (u64::from(lo)) | ((u64::from(hi)) << 32)
+}
+
+/// Read a logical 64-bit value from split 32-bit registers without tearing.
+pub(crate) fn read_u64_no_tearing(lo_read: impl Fn() -> u32, hi_read: impl Fn() -> u32) -> u64 {
+ loop {
+ let hi1 = hi_read();
+ let lo = lo_read();
+ let hi2 = hi_read();
+
+ if hi1 == hi2 {
+ return join_u64(lo, hi1);
+ }
}
}
-pub(crate) const GPU_ID: Register<0x0> = Register;
-pub(crate) const GPU_L2_FEATURES: Register<0x4> = Register;
-pub(crate) const GPU_CORE_FEATURES: Register<0x8> = Register;
-pub(crate) const GPU_CSF_ID: Register<0x1c> = Register;
-pub(crate) const GPU_REVID: Register<0x280> = Register;
-pub(crate) const GPU_TILER_FEATURES: Register<0xc> = Register;
-pub(crate) const GPU_MEM_FEATURES: Register<0x10> = Register;
-pub(crate) const GPU_MMU_FEATURES: Register<0x14> = Register;
-pub(crate) const GPU_AS_PRESENT: Register<0x18> = Register;
-pub(crate) const GPU_IRQ_RAWSTAT: Register<0x20> = Register;
-
-pub(crate) const GPU_IRQ_RAWSTAT_FAULT: u32 = bit_u32(0);
-pub(crate) const GPU_IRQ_RAWSTAT_PROTECTED_FAULT: u32 = bit_u32(1);
-pub(crate) const GPU_IRQ_RAWSTAT_RESET_COMPLETED: u32 = bit_u32(8);
-pub(crate) const GPU_IRQ_RAWSTAT_POWER_CHANGED_SINGLE: u32 = bit_u32(9);
-pub(crate) const GPU_IRQ_RAWSTAT_POWER_CHANGED_ALL: u32 = bit_u32(10);
-pub(crate) const GPU_IRQ_RAWSTAT_CLEAN_CACHES_COMPLETED: u32 = bit_u32(17);
-pub(crate) const GPU_IRQ_RAWSTAT_DOORBELL_STATUS: u32 = bit_u32(18);
-pub(crate) const GPU_IRQ_RAWSTAT_MCU_STATUS: u32 = bit_u32(19);
-
-pub(crate) const GPU_IRQ_CLEAR: Register<0x24> = Register;
-pub(crate) const GPU_IRQ_MASK: Register<0x28> = Register;
-pub(crate) const GPU_IRQ_STAT: Register<0x2c> = Register;
-pub(crate) const GPU_CMD: Register<0x30> = Register;
-pub(crate) const GPU_CMD_SOFT_RESET: u32 = 1 | (1 << 8);
-pub(crate) const GPU_CMD_HARD_RESET: u32 = 1 | (2 << 8);
-pub(crate) const GPU_THREAD_FEATURES: Register<0xac> = Register;
-pub(crate) const GPU_THREAD_MAX_THREADS: Register<0xa0> = Register;
-pub(crate) const GPU_THREAD_MAX_WORKGROUP_SIZE: Register<0xa4> = Register;
-pub(crate) const GPU_THREAD_MAX_BARRIER_SIZE: Register<0xa8> = Register;
-pub(crate) const GPU_TEXTURE_FEATURES0: Register<0xb0> = Register;
-pub(crate) const GPU_SHADER_PRESENT_LO: Register<0x100> = Register;
-pub(crate) const GPU_SHADER_PRESENT_HI: Register<0x104> = Register;
-pub(crate) const GPU_TILER_PRESENT_LO: Register<0x110> = Register;
-pub(crate) const GPU_TILER_PRESENT_HI: Register<0x114> = Register;
-pub(crate) const GPU_L2_PRESENT_LO: Register<0x120> = Register;
-pub(crate) const GPU_L2_PRESENT_HI: Register<0x124> = Register;
-pub(crate) const L2_READY_LO: Register<0x160> = Register;
-pub(crate) const L2_READY_HI: Register<0x164> = Register;
-pub(crate) const L2_PWRON_LO: Register<0x1a0> = Register;
-pub(crate) const L2_PWRON_HI: Register<0x1a4> = Register;
-pub(crate) const L2_PWRTRANS_LO: Register<0x220> = Register;
-pub(crate) const L2_PWRTRANS_HI: Register<0x204> = Register;
-pub(crate) const L2_PWRACTIVE_LO: Register<0x260> = Register;
-pub(crate) const L2_PWRACTIVE_HI: Register<0x264> = Register;
-
-pub(crate) const MCU_CONTROL: Register<0x700> = Register;
-pub(crate) const MCU_CONTROL_ENABLE: u32 = 1;
-pub(crate) const MCU_CONTROL_AUTO: u32 = 2;
-pub(crate) const MCU_CONTROL_DISABLE: u32 = 0;
-
-pub(crate) const MCU_STATUS: Register<0x704> = Register;
-pub(crate) const MCU_STATUS_DISABLED: u32 = 0;
-pub(crate) const MCU_STATUS_ENABLED: u32 = 1;
-pub(crate) const MCU_STATUS_HALT: u32 = 2;
-pub(crate) const MCU_STATUS_FATAL: u32 = 3;
-
-pub(crate) const GPU_COHERENCY_FEATURES: Register<0x300> = Register;
-
-pub(crate) const JOB_IRQ_RAWSTAT: Register<0x1000> = Register;
-pub(crate) const JOB_IRQ_CLEAR: Register<0x1004> = Register;
-pub(crate) const JOB_IRQ_MASK: Register<0x1008> = Register;
-pub(crate) const JOB_IRQ_STAT: Register<0x100c> = Register;
-
-pub(crate) const JOB_IRQ_GLOBAL_IF: u32 = bit_u32(31);
-
-pub(crate) const MMU_IRQ_RAWSTAT: Register<0x2000> = Register;
-pub(crate) const MMU_IRQ_CLEAR: Register<0x2004> = Register;
-pub(crate) const MMU_IRQ_MASK: Register<0x2008> = Register;
-pub(crate) const MMU_IRQ_STAT: Register<0x200c> = Register;
+/// These registers correspond to the GPU_CONTROL register page.
+/// They are involved in GPU configuration and control.
+pub(crate) mod gpu_control {
+ use core::convert::TryFrom;
+ use kernel::{
+ error::{
+ code::EINVAL,
+ Error, //
+ },
+ num::Bounded,
+ register,
+ uapi, //
+ };
+ use pin_init::Zeroable;
+
+ register! {
+ /// GPU identification register.
+ pub(crate) GPU_ID(u32) @ 0x0 {
+ /// Status of the GPU release.
+ 3:0 ver_status;
+ /// Minor release version number.
+ 11:4 ver_minor;
+ /// Major release version number.
+ 15:12 ver_major;
+ /// Product identifier.
+ 19:16 prod_major;
+ /// Architecture patch revision.
+ 23:20 arch_rev;
+ /// Architecture minor revision.
+ 27:24 arch_minor;
+ /// Architecture major revision.
+ 31:28 arch_major;
+ }
+
+ /// Level 2 cache features register.
+ pub(crate) L2_FEATURES(u32) @ 0x4 {
+ /// Cache line size.
+ 7:0 line_size;
+ /// Cache associativity.
+ 15:8 associativity;
+ /// Cache slice size.
+ 23:16 cache_size;
+ /// External bus width.
+ 31:24 bus_width;
+ }
+
+ /// Shader core features.
+ pub(crate) CORE_FEATURES(u32) @ 0x8 {
+ /// Shader core variant.
+ 7:0 core_variant;
+ }
+
+ /// Tiler features.
+ pub(crate) TILER_FEATURES(u32) @ 0xc {
+ /// Log of the tiler's bin size.
+ 5:0 bin_size;
+ /// Maximum number of active levels.
+ 11:8 max_levels;
+ }
+
+ /// Memory system features.
+ pub(crate) MEM_FEATURES(u32) @ 0x10 {
+ 0:0 coherent_core_group => bool;
+ 1:1 coherent_super_group => bool;
+ 11:8 l2_slices;
+ }
+
+ /// Memory management unit features.
+ pub(crate) MMU_FEATURES(u32) @ 0x14 {
+ /// Number of bits supported in virtual addresses.
+ 7:0 va_bits;
+ /// Number of bits supported in physical addresses.
+ 15:8 pa_bits;
+ }
+
+ /// Address spaces present.
+ pub(crate) AS_PRESENT(u32) @ 0x18 {
+ 31:0 present;
+ }
+
+ /// CSF version information.
+ pub(crate) CSF_ID(u32) @ 0x1c {
+ /// MCU revision ID.
+ 3:0 mcu_rev;
+ /// MCU minor revision number.
+ 9:4 mcu_minor;
+ /// MCU major revision number.
+ 15:10 mcu_major;
+ /// CSHW revision ID.
+ 19:16 cshw_rev;
+ /// CSHW minor revision number.
+ 25:20 cshw_minor;
+ /// CSHW major revision number.
+ 31:26 cshw_major;
+ }
+
+ /// IRQ sources raw status.
+ /// Writing to this register forces bits on, but does not clear them.
+ pub(crate) GPU_IRQ_RAWSTAT(u32) @ 0x20 {
+ /// A GPU fault has occurred, a 1-bit boolean flag.
+ 0:0 gpu_fault => bool;
+ /// A GPU fault has occurred, a 1-bit boolean flag.
+ 1:1 gpu_protected_fault => bool;
+ /// Reset has completed, a 1-bit boolean flag.
+ 8:8 reset_completed => bool;
+ /// Set when a single power domain has powered up or down, a 1-bit boolean flag.
+ 9:9 power_changed_single => bool;
+ /// Set when the all pending power domain changes are completed, a 1-bit boolean flag.
+ 10:10 power_changed_all => bool;
+ /// Set when cache cleaning has completed, a 1-bit boolean flag.
+ 17:17 clean_caches_completed => bool;
+ /// Mirrors the doorbell interrupt line to the CPU, a 1-bit boolean flag.
+ 18:18 doorbell_mirror => bool;
+ /// MCU requires attention, a 1-bit boolean flag.
+ 19:19 mcu_status => bool;
+ }
+
+ /// IRQ sources to clear. Write only.
+ pub(crate) GPU_IRQ_CLEAR(u32) @ 0x24 {
+ /// Clear the GPU_FAULT interrupt, a 1-bit boolean flag.
+ 0:0 gpu_fault => bool;
+ /// Clear the GPU_PROTECTED_FAULT interrupt, a 1-bit boolean flag.
+ 1:1 gpu_protected_fault => bool;
+ /// Clear the RESET_COMPLETED interrupt, a 1-bit boolean flag.
+ 8:8 reset_completed => bool;
+ /// Clear the POWER_CHANGED_SINGLE interrupt, a 1-bit boolean flag.
+ 9:9 power_changed_single => bool;
+ /// Clear the POWER_CHANGED_ALL interrupt, a 1-bit boolean flag.
+ 10:10 power_changed_all => bool;
+ /// Clear the CLEAN_CACHES_COMPLETED interrupt, a 1-bit boolean flag.
+ 17:17 clean_caches_completed => bool;
+ /// Clear the MCU_STATUS interrupt, a 1-bit boolean flag.
+ 19:19 mcu_status => bool;
+ }
+
+ /// IRQ sources enabled.
+ pub(crate) GPU_IRQ_MASK(u32) @ 0x28 {
+ /// Enable the GPU_FAULT interrupt, a 1-bit boolean flag.
+ 0:0 gpu_fault => bool;
+ /// Enable the GPU_PROTECTED_FAULT interrupt, a 1-bit boolean flag.
+ 1:1 gpu_protected_fault => bool;
+ /// Enable the RESET_COMPLETED interrupt, a 1-bit boolean flag.
+ 8:8 reset_completed => bool;
+ /// Enable the POWER_CHANGED_SINGLE interrupt, a 1-bit boolean flag.
+ 9:9 power_changed_single => bool;
+ /// Enable the POWER_CHANGED_ALL interrupt, a 1-bit boolean flag.
+ 10:10 power_changed_all => bool;
+ /// Enable the CLEAN_CACHES_COMPLETED interrupt, a 1-bit boolean flag.
+ 17:17 clean_caches_completed => bool;
+ /// Enable the DOORBELL_MIRROR interrupt, a 1-bit boolean flag.
+ 18:18 doorbell_mirror => bool;
+ /// Enable the MCU_STATUS interrupt, a 1-bit boolean flag.
+ 19:19 mcu_status => bool;
+ }
+
+ /// IRQ status for enabled sources. Read only.
+ pub(crate) GPU_IRQ_STATUS(u32) @ 0x2c {
+ /// GPU_FAULT interrupt status, a 1-bit boolean flag.
+ 0:0 gpu_fault => bool;
+ /// GPU_PROTECTED_FAULT interrupt status, a 1-bit boolean flag.
+ 1:1 gpu_protected_fault => bool;
+ /// RESET_COMPLETED interrupt status, a 1-bit boolean flag.
+ 8:8 reset_completed => bool;
+ /// POWER_CHANGED_SINGLE interrupt status, a 1-bit boolean flag.
+ 9:9 power_changed_single => bool;
+ /// POWER_CHANGED_ALL interrupt status, a 1-bit boolean flag.
+ 10:10 power_changed_all => bool;
+ /// CLEAN_CACHES_COMPLETED interrupt status, a 1-bit boolean flag.
+ 17:17 clean_caches_completed => bool;
+ /// DOORBELL_MIRROR interrupt status, a 1-bit boolean flag.
+ 18:18 doorbell_mirror => bool;
+ /// MCU_STATUS interrupt status, a 1-bit boolean flag.
+ 19:19 mcu_status => bool;
+ }
+ }
+
+ /// Helpers for GPU_COMMAND Register
+ #[derive(Copy, Clone, Debug, PartialEq)]
+ #[repr(u8)]
+ pub(crate) enum GpuCommand {
+ /// No operation. This is the default value.
+ Nop = 0,
+ /// Reset the GPU.
+ Reset = 1,
+ /// Flush caches.
+ FlushCaches = 4,
+ /// Clear GPU faults.
+ ClearFault = 7,
+ }
+
+ impl TryFrom<Bounded<u32, 8>> for GpuCommand {
+ type Error = Error;
+
+ fn try_from(val: Bounded<u32, 8>) -> Result<Self, Self::Error> {
+ match val.get() {
+ 0 => Ok(GpuCommand::Nop),
+ 1 => Ok(GpuCommand::Reset),
+ 4 => Ok(GpuCommand::FlushCaches),
+ 7 => Ok(GpuCommand::ClearFault),
+ _ => Err(EINVAL),
+ }
+ }
+ }
+
+ impl From<GpuCommand> for Bounded<u32, 8> {
+ fn from(cmd: GpuCommand) -> Self {
+ (cmd as u8).into()
+ }
+ }
+
+ /// Reset mode for [`GPU_COMMAND::reset()`].
+ #[derive(Copy, Clone, Debug, PartialEq)]
+ #[repr(u8)]
+ pub(crate) enum ResetMode {
+ /// Stop all external bus interfaces, then reset the entire GPU.
+ SoftReset = 1,
+ /// Force a full GPU reset.
+ HardReset = 2,
+ }
+
+ impl TryFrom<Bounded<u32, 4>> for ResetMode {
+ type Error = Error;
+
+ fn try_from(val: Bounded<u32, 4>) -> Result<Self, Self::Error> {
+ match val.get() {
+ 1 => Ok(ResetMode::SoftReset),
+ 2 => Ok(ResetMode::HardReset),
+ _ => Err(EINVAL),
+ }
+ }
+ }
+
+ impl From<ResetMode> for Bounded<u32, 4> {
+ fn from(mode: ResetMode) -> Self {
+ Bounded::try_new(mode as u32).unwrap()
+ }
+ }
+
+ /// Cache flush mode for [`GPU_COMMAND::flush_caches()`].
+ #[derive(Copy, Clone, Debug, PartialEq)]
+ #[repr(u8)]
+ pub(crate) enum FlushMode {
+ /// No flush.
+ None = 0,
+ /// Clean the caches.
+ Clean = 1,
+ /// Invalidate the caches.
+ Invalidate = 2,
+ /// Clean and invalidate the caches.
+ CleanInvalidate = 3,
+ }
+
+ impl TryFrom<Bounded<u32, 4>> for FlushMode {
+ type Error = Error;
+
+ fn try_from(val: Bounded<u32, 4>) -> Result<Self, Self::Error> {
+ match val.get() {
+ 0 => Ok(FlushMode::None),
+ 1 => Ok(FlushMode::Clean),
+ 2 => Ok(FlushMode::Invalidate),
+ 3 => Ok(FlushMode::CleanInvalidate),
+ _ => Err(EINVAL),
+ }
+ }
+ }
+
+ impl From<FlushMode> for Bounded<u32, 4> {
+ fn from(mode: FlushMode) -> Self {
+ Bounded::try_new(mode as u32).unwrap()
+ }
+ }
+
+ register! {
+ /// GPU command register.
+ ///
+ /// Use the constructor methods to create commands:
+ /// - [`GPU_COMMAND::nop()`]
+ /// - [`GPU_COMMAND::reset()`]
+ /// - [`GPU_COMMAND::flush_caches()`]
+ /// - [`GPU_COMMAND::clear_fault()`]
+ pub(crate) GPU_COMMAND (u32) @ 0x30 {
+ 7:0 command ?=> GpuCommand;
+ }
+ /// Internal alias for GPU_COMMAND in reset mode.
+ /// Use [`GPU_COMMAND::reset()`] instead.
+ GPU_COMMAND_RESET (u32) => GPU_COMMAND {
+ 7:0 command ?=> GpuCommand;
+ 11:8 reset_mode ?=> ResetMode;
+ }
+
+ /// Internal alias for GPU_COMMAND in cache flush mode.
+ /// Use [`GPU_COMMAND::flush_caches()`] instead.
+ GPU_COMMAND_FLUSH (u32) => GPU_COMMAND {
+ 7:0 command ?=> GpuCommand;
+ /// L2 cache flush mode.
+ 11:8 l2_flush ?=> FlushMode;
+ /// Shader core load/store cache flush mode.
+ 15:12 lsc_flush ?=> FlushMode;
+ /// Shader core other caches flush mode.
+ 19:16 other_flush ?=> FlushMode;
+ }
+ }
+
+ impl GPU_COMMAND {
+ /// Create a NOP command.
+ pub(crate) fn nop() -> Self {
+ Self::zeroed()
+ }
+
+ /// Create a reset command with the specified reset mode.
+ pub(crate) fn reset(mode: ResetMode) -> Self {
+ Self::from_raw(
+ GPU_COMMAND_RESET::zeroed()
+ .with_command(GpuCommand::Reset)
+ .with_reset_mode(mode)
+ .into_raw(),
+ )
+ }
+
+ /// Create a cache flush command with the specified flush modes.
+ pub(crate) fn flush_caches(l2: FlushMode, lsc: FlushMode, other: FlushMode) -> Self {
+ Self::from_raw(
+ GPU_COMMAND_FLUSH::zeroed()
+ .with_command(GpuCommand::FlushCaches)
+ .with_l2_flush(l2)
+ .with_lsc_flush(lsc)
+ .with_other_flush(other)
+ .into_raw(),
+ )
+ }
+
+ /// Create a clear fault command.
+ pub(crate) fn clear_fault() -> Self {
+ Self::zeroed().with_command(GpuCommand::ClearFault)
+ }
+ }
+
+ register! {
+ /// GPU status register. Read only.
+ pub(crate) GPU_STATUS(u32) @ 0x34 {
+ /// GPU active, a 1-bit boolean flag.
+ 0:0 gpu_active => bool;
+ /// Power manager active, a 1-bit boolean flag
+ 1:1 pwr_active => bool;
+ /// Page fault active, a 1-bit boolean flag.
+ 4:4 page_fault => bool;
+ /// Protected mode active, a 1-bit boolean flag.
+ 7:7 protected_mode_active => bool;
+ /// Debug mode active, a 1-bit boolean flag.
+ 8:8 gpu_dbg_enabled => bool;
+ }
+ }
+
+ #[derive(Copy, Clone, Debug, PartialEq)]
+ #[repr(u8)]
+ pub(crate) enum ExceptionType {
+ /// Exception type: No error.
+ Ok = 0x00,
+ /// Exception type: GPU external bus error.
+ GpuBusFault = 0x80,
+ /// Exception type: GPU shareability error.
+ GpuShareabilityFault = 0x88,
+ /// Exception type: System shareability error.
+ SystemShareabilityFault = 0x89,
+ /// Exception type: GPU cacheability error.
+ GpuCacheabilityFault = 0x8A,
+ }
+
+ impl TryFrom<Bounded<u32, 8>> for ExceptionType {
+ type Error = Error;
+
+ fn try_from(val: Bounded<u32, 8>) -> Result<Self, Self::Error> {
+ match val.get() {
+ 0x00 => Ok(ExceptionType::Ok),
+ 0x80 => Ok(ExceptionType::GpuBusFault),
+ 0x88 => Ok(ExceptionType::GpuShareabilityFault),
+ 0x89 => Ok(ExceptionType::SystemShareabilityFault),
+ 0x8A => Ok(ExceptionType::GpuCacheabilityFault),
+ _ => Err(EINVAL),
+ }
+ }
+ }
+
+ impl From<ExceptionType> for Bounded<u32, 8> {
+ fn from(exc: ExceptionType) -> Self {
+ (exc as u8).into()
+ }
+ }
+
+ #[derive(Copy, Clone, Debug, PartialEq)]
+ #[repr(u8)]
+ pub(crate) enum AccessType {
+ /// Access type: An atomic (read/write) transaction.
+ Atomic = 0,
+ /// Access type: An execute transaction.
+ Execute = 1,
+ /// Access type: A read transaction.
+ Read = 2,
+ /// Access type: A write transaction.
+ Write = 3,
+ }
+
+ impl From<Bounded<u32, 2>> for AccessType {
+ fn from(val: Bounded<u32, 2>) -> Self {
+ match val.get() {
+ 0 => AccessType::Atomic,
+ 1 => AccessType::Execute,
+ 2 => AccessType::Read,
+ 3 => AccessType::Write,
+ _ => unreachable!(),
+ }
+ }
+ }
+
+ impl From<AccessType> for Bounded<u32, 2> {
+ fn from(access: AccessType) -> Self {
+ Bounded::try_new(access as u32).unwrap()
+ }
+ }
+
+ register! {
+ /// GPU fault status register. Read only.
+ pub(crate) GPU_FAULTSTATUS(u32) @ 0x3c {
+ /// Exception type.
+ 7:0 exception_type ?=> ExceptionType;
+ /// Access type.
+ 9:8 access_type => AccessType;
+ /// The GPU_FAULTADDRESS is valid, a 1-bit boolean flag.
+ 10:10 address_valid => bool;
+ /// The JASID field is valid, a 1-bit boolean flag.
+ 11:11 jasid_valid => bool;
+ /// JASID of the fault, if known.
+ 15:12 jasid;
+ /// ID of the source that triggered the fault.
+ 31:16 source_id;
+ }
+
+ /// GPU fault address. Read only.
+ /// Once a fault is reported, it must be manually cleared by issuing a
+ /// [`GPU_COMMAND::clear_fault()`] command to the [`GPU_COMMAND`] register. No further GPU
+ /// faults will be reported until the previous fault has been cleared.
+ pub(crate) GPU_FAULTADDRESS_LO(u32) @ 0x40 {
+ 31:0 pointer;
+ }
+
+ pub(crate) GPU_FAULTADDRESS_HI(u32) @ 0x44 {
+ 31:0 pointer;
+ }
+
+ /// Level 2 cache configuration.
+ pub(crate) L2_CONFIG(u32) @ 0x48 {
+ /// Requested cache size.
+ 23:16 cache_size;
+ /// Requested hash function index.
+ 31:24 hash_function;
+ }
+
+ /// Global time stamp offset.
+ pub(crate) TIMESTAMP_OFFSET_LO(u32) @ 0x88 {
+ 31:0 offset;
+ }
+
+ pub(crate) TIMESTAMP_OFFSET_HI(u32) @ 0x8c {
+ 31:0 offset;
+ }
+
+ /// GPU cycle counter. Read only.
+ pub(crate) CYCLE_COUNT_LO(u32) @ 0x90 {
+ 31:0 count;
+ }
+
+ pub(crate) CYCLE_COUNT_HI(u32) @ 0x94 {
+ 31:0 count;
+ }
+
+ /// Global time stamp. Read only.
+ pub(crate) TIMESTAMP_LO(u32) @ 0x98 {
+ 31:0 timestamp;
+ }
+
+ pub(crate) TIMESTAMP_HI(u32) @ 0x9c {
+ 31:0 timestamp;
+ }
+
+ /// Maximum number of threads per core. Read only constant.
+ pub(crate) THREAD_MAX_THREADS(u32) @ 0xa0 {
+ 31:0 threads;
+ }
+
+ /// Maximum number of threads per workgroup. Read only constant.
+ pub(crate) THREAD_MAX_WORKGROUP_SIZE(u32) @ 0xa4 {
+ 31:0 threads;
+ }
+
+ /// Maximum number of threads per barrier. Read only constant.
+ pub(crate) THREAD_MAX_BARRIER_SIZE(u32) @ 0xa8 {
+ 31:0 threads;
+ }
+
+ /// Thread features. Read only constant.
+ pub(crate) THREAD_FEATURES(u32) @ 0xac {
+ /// Total number of registers per core.
+ 21:0 max_registers;
+ /// Implementation technology type.
+ 23:22 implementation_technology;
+ /// Maximum number of compute tasks waiting.
+ 31:24 max_task_queue;
+ }
+
+ /// Support flags for compressed texture formats. Read only constant.
+ ///
+ /// A bitmap where each bit indicates support for a specific compressed texture format.
+ /// The bit position maps to an opaque format ID (`texture_features_key_t` in spec).
+ pub(crate) TEXTURE_FEATURES(u32)[4] @ 0xb0 {
+ 31:0 supported_formats;
+ }
+
+ /// Shader core present bitmap. Read only constant.
+ pub(crate) SHADER_PRESENT_LO(u32) @ 0x100 {
+ 31:0 value;
+ }
+
+ pub(crate) SHADER_PRESENT_HI(u32) @ 0x104 {
+ 31:0 value;
+ }
+
+ /// Tiler present bitmap. Read only constant.
+ pub(crate) TILER_PRESENT_LO(u32) @ 0x110 {
+ 31:0 present;
+ }
+
+ pub(crate) TILER_PRESENT_HI(u32) @ 0x114 {
+ 31:0 present;
+ }
+
+ /// L2 cache present bitmap. Read only constant.
+ pub(crate) L2_PRESENT_LO(u32) @ 0x120 {
+ 31:0 present;
+ }
+
+ pub(crate) L2_PRESENT_HI(u32) @ 0x124 {
+ 31:0 present;
+ }
+
+ /// Shader core ready bitmap. Read only.
+ pub(crate) SHADER_READY_LO(u32) @ 0x140 {
+ 31:0 ready;
+ }
+
+ pub(crate) SHADER_READY_HI(u32) @ 0x144 {
+ 31:0 ready;
+ }
+
+ /// Tiler ready bitmap. Read only.
+ pub(crate) TILER_READY_LO(u32) @ 0x150 {
+ 31:0 ready;
+ }
+
+ pub(crate) TILER_READY_HI(u32) @ 0x154 {
+ 31:0 ready;
+ }
+
+ /// L2 ready bitmap. Read only.
+ pub(crate) L2_READY_LO(u32) @ 0x160 {
+ 31:0 ready;
+ }
+
+ pub(crate) L2_READY_HI(u32) @ 0x164 {
+ 31:0 ready;
+ }
+
+ /// Shader core power up bitmap.
+ pub(crate) SHADER_PWRON_LO(u32) @ 0x180 {
+ 31:0 request;
+ }
+
+ pub(crate) SHADER_PWRON_HI(u32) @ 0x184 {
+ 31:0 request;
+ }
+
+ /// Tiler power up bitmap.
+ pub(crate) TILER_PWRON_LO(u32) @ 0x190 {
+ 31:0 request;
+ }
+
+ pub(crate) TILER_PWRON_HI(u32) @ 0x194 {
+ 31:0 request;
+ }
+
+ /// L2 power up bitmap.
+ pub(crate) L2_PWRON_LO(u32) @ 0x1a0 {
+ 31:0 request;
+ }
+
+ pub(crate) L2_PWRON_HI(u32) @ 0x1a4 {
+ 31:0 request;
+ }
+
+ /// Shader core power down bitmap.
+ pub(crate) SHADER_PWROFF_LO(u32) @ 0x1c0 {
+ 31:0 request;
+ }
+
+ pub(crate) SHADER_PWROFF_HI(u32) @ 0x1c4 {
+ 31:0 request;
+ }
+
+ /// Tiler power down bitmap.
+ pub(crate) TILER_PWROFF_LO(u32) @ 0x1d0 {
+ 31:0 request;
+ }
+
+ pub(crate) TILER_PWROFF_HI(u32) @ 0x1d4 {
+ 31:0 request;
+ }
+
+ /// L2 power down bitmap.
+ pub(crate) L2_PWROFF_LO(u32) @ 0x1e0 {
+ 31:0 request;
+ }
+
+ pub(crate) L2_PWROFF_HI(u32) @ 0x1e4 {
+ 31:0 request;
+ }
+
+ /// Shader core power transition bitmap. Read-only.
+ pub(crate) SHADER_PWRTRANS_LO(u32) @ 0x200 {
+ 31:0 changing;
+ }
+
+ pub(crate) SHADER_PWRTRANS_HI(u32) @ 0x204 {
+ 31:0 changing;
+ }
+
+ /// Tiler power transition bitmap. Read-only.
+ pub(crate) TILER_PWRTRANS_LO(u32) @ 0x210 {
+ 31:0 changing;
+ }
+
+ pub(crate) TILER_PWRTRANS_HI(u32) @ 0x214 {
+ 31:0 changing;
+ }
+
+ /// L2 power transition bitmap. Read-only.
+ pub(crate) L2_PWRTRANS_LO(u32) @ 0x220 {
+ 31:0 changing;
+ }
+
+ pub(crate) L2_PWRTRANS_HI(u32) @ 0x224 {
+ 31:0 changing;
+ }
+
+ /// Shader core active bitmap. Read-only.
+ pub(crate) SHADER_PWRACTIVE_LO(u32) @ 0x240 {
+ 31:0 active;
+ }
+
+ pub(crate) SHADER_PWRACTIVE_HI(u32) @ 0x244 {
+ 31:0 active;
+ }
+
+ /// Tiler active bitmap. Read-only.
+ pub(crate) TILER_PWRACTIVE_LO(u32) @ 0x250 {
+ 31:0 active;
+ }
+
+ pub(crate) TILER_PWRACTIVE_HI(u32) @ 0x254 {
+ 31:0 active;
+ }
+
+ /// L2 active bitmap. Read-only.
+ pub(crate) L2_PWRACTIVE_LO(u32) @ 0x260 {
+ 31:0 active;
+ }
+
+ pub(crate) L2_PWRACTIVE_HI(u32) @ 0x264 {
+ 31:0 active;
+ }
+
+ /// Revision ID. Read only constant.
+ pub(crate) REVIDR(u32) @ 0x280 {
+ 31:0 revision;
+ }
+
+ /// Coherency features present. Read only constant.
+ /// Supported protocols on the interconnect between the GPU and the
+ /// system into which it is integrated.
+ pub(crate) COHERENCY_FEATURES(u32) @ 0x300 {
+ /// ACE-Lite protocol supported, a 1-bit boolean flag.
+ 0:0 ace_lite => bool;
+ /// ACE protocol supported, a 1-bit boolean flag.
+ 1:1 ace => bool;
+ }
+ }
+
+ #[derive(Copy, Clone, Debug, PartialEq)]
+ #[repr(u8)]
+ pub(crate) enum CoherencyMode {
+ /// ACE-Lite coherency protocol.
+ AceLite = uapi::drm_panthor_gpu_coherency_DRM_PANTHOR_GPU_COHERENCY_ACE_LITE as u8,
+ /// ACE coherency protocol.
+ Ace = uapi::drm_panthor_gpu_coherency_DRM_PANTHOR_GPU_COHERENCY_ACE as u8,
+ /// No coherency protocol.
+ None = uapi::drm_panthor_gpu_coherency_DRM_PANTHOR_GPU_COHERENCY_NONE as u8,
+ }
+
+ impl TryFrom<Bounded<u32, 32>> for CoherencyMode {
+ type Error = Error;
+
+ fn try_from(val: Bounded<u32, 32>) -> Result<Self, Self::Error> {
+ match val.get() {
+ 0 => Ok(CoherencyMode::AceLite),
+ 1 => Ok(CoherencyMode::Ace),
+ 31 => Ok(CoherencyMode::None),
+ _ => Err(EINVAL),
+ }
+ }
+ }
+
+ impl From<CoherencyMode> for Bounded<u32, 32> {
+ fn from(mode: CoherencyMode) -> Self {
+ (mode as u8).into()
+ }
+ }
+
+ register! {
+ /// Coherency enable. An index of which coherency protocols should be used.
+ /// This register only selects the protocol for coherency messages on the
+ /// interconnect. This is not to enable or disable coherency controlled by MMU.
+ pub(crate) COHERENCY_ENABLE(u32) @ 0x304 {
+ 31:0 l2_cache_protocol_select ?=> CoherencyMode;
+ }
+ }
+
+ /// Helpers for MCU_CONTROL register
+ #[derive(Copy, Clone, Debug, PartialEq)]
+ #[repr(u8)]
+ pub(crate) enum McuControlMode {
+ /// Disable the MCU.
+ Disable = 0,
+ /// Enable the MCU.
+ Enable = 1,
+ /// Enable the MCU to execute and automatically reboot after a fast reset.
+ Auto = 2,
+ }
+
+ impl TryFrom<Bounded<u32, 2>> for McuControlMode {
+ type Error = Error;
+
+ fn try_from(val: Bounded<u32, 2>) -> Result<Self, Self::Error> {
+ match val.get() {
+ 0 => Ok(McuControlMode::Disable),
+ 1 => Ok(McuControlMode::Enable),
+ 2 => Ok(McuControlMode::Auto),
+ _ => Err(EINVAL),
+ }
+ }
+ }
+
+ impl From<McuControlMode> for Bounded<u32, 2> {
+ fn from(mode: McuControlMode) -> Self {
+ Bounded::try_new(mode as u32).unwrap()
+ }
+ }
+
+ register! {
+ /// MCU control.
+ pub(crate) MCU_CONTROL(u32) @ 0x700 {
+ /// Request MCU state change.
+ 1:0 req ?=> McuControlMode;
+ }
+ }
+
+ /// Helpers for MCU_STATUS register
+ #[derive(Copy, Clone, Debug, PartialEq)]
+ #[repr(u8)]
+ pub(crate) enum McuStatus {
+ /// MCU is disabled.
+ Disabled = 0,
+ /// MCU is enabled.
+ Enabled = 1,
+ /// The MCU has halted by itself in an orderly manner to enable the core group to be
+ /// powered down.
+ Halt = 2,
+ /// The MCU has encountered an error that prevents it from continuing.
+ Fatal = 3,
+ }
+
+ impl From<Bounded<u32, 2>> for McuStatus {
+ fn from(val: Bounded<u32, 2>) -> Self {
+ match val.get() {
+ 0 => McuStatus::Disabled,
+ 1 => McuStatus::Enabled,
+ 2 => McuStatus::Halt,
+ 3 => McuStatus::Fatal,
+ _ => unreachable!(),
+ }
+ }
+ }
+
+ impl From<McuStatus> for Bounded<u32, 2> {
+ fn from(status: McuStatus) -> Self {
+ Bounded::try_new(status as u32).unwrap()
+ }
+ }
+
+ register! {
+ /// MCU status. Read only.
+ pub(crate) MCU_STATUS(u32) @ 0x704 {
+ /// Read current state of MCU.
+ 1:0 value => McuStatus;
+ }
+ }
+}
+
+/// These registers correspond to the JOB_CONTROL register page.
+/// They are involved in communication between the firmware running on the MCU and the host.
+pub(crate) mod job_control {
+ use kernel::register;
+
+ register! {
+ /// Raw status of job interrupts.
+ ///
+ /// Write to this register to trigger these interrupts.
+ /// Writing a 1 to a bit forces that bit on.
+ pub(crate) JOB_IRQ_RAWSTAT(u32) @ 0x1000 {
+ /// CSG request. These bits indicate that CSGn requires attention from the host.
+ 30:0 csg;
+ /// GLB request. Indicates that the GLB interface requires attention from the host.
+ 31:31 glb => bool;
+ }
+
+ /// Clear job interrupts. Write only.
+ ///
+ /// Write a 1 to a bit to clear the corresponding bit in [`JOB_IRQ_RAWSTAT`].
+ pub(crate) JOB_IRQ_CLEAR(u32) @ 0x1004 {
+ /// Clear CSG request interrupts.
+ 30:0 csg;
+ /// Clear GLB request interrupt.
+ 31:31 glb => bool;
+ }
+
+ /// Mask for job interrupts.
+ ///
+ /// Set each bit to 1 to enable the corresponding interrupt source or to 0 to disable it.
+ pub(crate) JOB_IRQ_MASK(u32) @ 0x1008 {
+ /// Enable CSG request interrupts.
+ 30:0 csg;
+ /// Enable GLB request interrupt.
+ 31:31 glb => bool;
+ }
+
+ /// Active job interrupts. Read only.
+ ///
+ /// This register contains the result of ANDing together [`JOB_IRQ_RAWSTAT`] and
+ /// [`JOB_IRQ_MASK`].
+ pub(crate) JOB_IRQ_STATUS(u32) @ 0x100c {
+ /// CSG request interrupt status.
+ 30:0 csg;
+ /// GLB request interrupt status.
+ 31:31 glb => bool;
+ }
+ }
+}
+
+/// These registers correspond to the MMU_CONTROL register page.
+/// They are involved in MMU configuration and control.
+pub(crate) mod mmu_control {
+ use kernel::register;
+
+ register! {
+ /// IRQ sources raw status.
+ ///
+ /// This register contains the raw unmasked interrupt sources for MMU status and exception
+ /// handling.
+ ///
+ /// Writing to this register forces bits on.
+ /// Use [`IRQ_CLEAR`] to clear interrupts.
+ pub(crate) IRQ_RAWSTAT(u32) @ 0x2000 {
+ /// Page fault for address spaces.
+ 15:0 page_fault;
+ /// Command completed in address spaces.
+ 31:16 command_completed;
+ }
+
+ /// IRQ sources to clear.
+ /// Write a 1 to a bit to clear the corresponding bit in [`IRQ_RAWSTAT`].
+ pub(crate) IRQ_CLEAR(u32) @ 0x2004 {
+ /// Clear the PAGE_FAULT interrupt.
+ 15:0 page_fault;
+ /// Clear the COMMAND_COMPLETED interrupt.
+ 31:16 command_completed;
+ }
+
+ /// IRQ sources enabled.
+ ///
+ /// Set each bit to 1 to enable the corresponding interrupt source, and to 0 to disable it.
+ pub(crate) IRQ_MASK(u32) @ 0x2008 {
+ /// Enable the PAGE_FAULT interrupt.
+ 15:0 page_fault;
+ /// Enable the COMMAND_COMPLETED interrupt.
+ 31:16 command_completed;
+ }
+
+ /// IRQ status for enabled sources. Read only.
+ ///
+ /// This register contains the result of ANDing together [`IRQ_RAWSTAT`] and [`IRQ_MASK`].
+ pub(crate) IRQ_STATUS(u32) @ 0x200c {
+ /// PAGE_FAULT interrupt status.
+ 15:0 page_fault;
+ /// COMMAND_COMPLETED interrupt status.
+ 31:16 command_completed;
+ }
+ }
+
+ /// Per-address space registers ASn [0..15] within the MMU_CONTROL page.
+ ///
+ /// This array contains 16 instances of the MMU_AS_CONTROL register page.
+ pub(crate) mod mmu_as_control {
+ use core::convert::TryFrom;
+
+ use kernel::{
+ error::{
+ code::EINVAL,
+ Error, //
+ },
+ num::Bounded,
+ register, //
+ };
+
+ /// Maximum number of hardware address space slots.
+ /// The actual number of slots available is usually lower.
+ pub(crate) const MAX_AS: usize = 16;
+
+ /// Address space register stride. The elements in the array are spaced 64B apart.
+ const STRIDE: usize = 0x40;
+
+ register! {
+ /// Translation table base address. A 64-bit pointer.
+ ///
+ /// This field contains the address of the top level of a translation table structure.
+ /// This must be 16-byte-aligned, so address bits [3:0] are assumed to be zero.
+ pub(crate) TRANSTAB(u64)[MAX_AS, stride = STRIDE] @ 0x2400 {
+ /// Base address of the translation table.
+ 63:0 base;
+ }
+
+ // TRANSTAB is a logical 64-bit register, but it is laid out in hardware as two
+ // 32-bit halves. Define it as separate low/high u32 registers so accesses match
+ // the MMIO register layout and do not rely on native 64-bit MMIO transactions.
+ pub(crate) TRANSTAB_LO(u32)[MAX_AS, stride = STRIDE] @ 0x2400 {
+ 31:0 value;
+ }
+
+ pub(crate) TRANSTAB_HI(u32)[MAX_AS, stride = STRIDE] @ 0x2404 {
+ 31:0 value;
+ }
+ }
+
+ /// Helpers for MEMATTR Register.
+
+ #[derive(Copy, Clone, Debug, PartialEq)]
+ #[repr(u8)]
+ pub(crate) enum AllocPolicySelect {
+ /// Ignore ALLOC_R/ALLOC_W fields.
+ Impl = 2,
+ /// Use ALLOC_R/ALLOC_W fields for allocation policy.
+ Alloc = 3,
+ }
+
+ impl TryFrom<Bounded<u8, 2>> for AllocPolicySelect {
+ type Error = Error;
+
+ fn try_from(val: Bounded<u8, 2>) -> Result<Self, Self::Error> {
+ match val.get() {
+ 2 => Ok(Self::Impl),
+ 3 => Ok(Self::Alloc),
+ _ => Err(EINVAL),
+ }
+ }
+ }
+
+ impl From<AllocPolicySelect> for Bounded<u8, 2> {
+ fn from(val: AllocPolicySelect) -> Self {
+ Bounded::try_new(val as u8).unwrap()
+ }
+ }
+
+ /// Coherency policy for memory attributes. Indicates the shareability of cached accesses.
+ ///
+ /// The hardware spec defines different interpretations of these values depending on
+ /// whether TRANSCFG.MODE is set to IDENTITY or not. IDENTITY mode does not use translation
+ /// tables (all input addresses map to the same output address); it is deprecated and not
+ /// used by the driver. This enum assumes that TRANSCFG.MODE is not set to IDENTITY.
+ #[derive(Copy, Clone, Debug, PartialEq)]
+ #[repr(u8)]
+ pub(crate) enum Coherency {
+ /// Midgard inner domain coherency.
+ ///
+ /// Most flexible mode - can map non-coherent, internally coherent, and system/IO
+ /// coherent memory. Used for non-cacheable memory in MAIR conversion.
+ MidgardInnerDomain = 0,
+ /// CPU inner domain coherency.
+ ///
+ /// Can map non-coherent and system/IO coherent memory. Used for write-back
+ /// cacheable memory in MAIR conversion to maintain CPU-GPU cache coherency.
+ CpuInnerDomain = 1,
+ /// CPU inner domain with shader coherency.
+ ///
+ /// Can map internally coherent and system/IO coherent memory. Used for
+ /// GPU-internal shared buffers requiring shader coherency.
+ CpuInnerDomainShaderCoh = 2,
+ }
+
+ impl TryFrom<Bounded<u8, 2>> for Coherency {
+ type Error = Error;
+
+ fn try_from(val: Bounded<u8, 2>) -> Result<Self, Self::Error> {
+ match val.get() {
+ 0 => Ok(Self::MidgardInnerDomain),
+ 1 => Ok(Self::CpuInnerDomain),
+ 2 => Ok(Self::CpuInnerDomainShaderCoh),
+ _ => Err(EINVAL),
+ }
+ }
+ }
+
+ impl From<Coherency> for Bounded<u8, 2> {
+ fn from(val: Coherency) -> Self {
+ Bounded::try_new(val as u8).unwrap()
+ }
+ }
+
+ #[derive(Copy, Clone, Debug, PartialEq)]
+ #[repr(u8)]
+ pub(crate) enum MemoryType {
+ /// Normal memory (shared).
+ Shared = 0,
+ /// Normal memory, inner/outer non-cacheable.
+ NonCacheable = 1,
+ /// Normal memory, inner/outer write-back cacheable.
+ WriteBack = 2,
+ /// Triggers MEMORY_ATTRIBUTE_FAULT.
+ Fault = 3,
+ }
+
+ impl From<Bounded<u8, 2>> for MemoryType {
+ fn from(val: Bounded<u8, 2>) -> Self {
+ match val.get() {
+ 0 => Self::Shared,
+ 1 => Self::NonCacheable,
+ 2 => Self::WriteBack,
+ 3 => Self::Fault,
+ _ => unreachable!(),
+ }
+ }
+ }
+
+ impl From<MemoryType> for Bounded<u8, 2> {
+ fn from(val: MemoryType) -> Self {
+ Bounded::try_new(val as u8).unwrap()
+ }
+ }
+
+ register! {
+ /// Stage 1 memory attributes (8-bit bitfield).
+ ///
+ /// This is not an actual register, but a bitfield definition used by the MEMATTR
+ /// register. Each of the 8 bytes in MEMATTR follows this layout.
+ MMU_MEMATTR_STAGE1(u8) @ 0x0 {
+ /// Inner cache write allocation policy.
+ 0:0 alloc_w => bool;
+ /// Inner cache read allocation policy.
+ 1:1 alloc_r => bool;
+ /// Inner allocation policy select.
+ 3:2 alloc_sel ?=> AllocPolicySelect;
+ /// Coherency policy.
+ 5:4 coherency ?=> Coherency;
+ /// Memory type.
+ 7:6 memory_type => MemoryType;
+ }
+ }
+
+ impl TryFrom<Bounded<u64, 8>> for MMU_MEMATTR_STAGE1 {
+ type Error = Error;
+
+ fn try_from(val: Bounded<u64, 8>) -> Result<Self, Self::Error> {
+ Ok(Self::from_raw(val.get() as u8))
+ }
+ }
+
+ impl From<MMU_MEMATTR_STAGE1> for Bounded<u64, 8> {
+ fn from(val: MMU_MEMATTR_STAGE1) -> Self {
+ Bounded::try_new(u64::from(val.into_raw())).unwrap()
+ }
+ }
+
+ register! {
+ /// Memory attributes.
+ ///
+ /// Each address space can configure up to 8 different memory attribute profiles.
+ /// Each attribute profile follows the MMU_MEMATTR_STAGE1 layout.
+ pub(crate) MEMATTR(u64)[MAX_AS, stride = STRIDE] @ 0x2408 {
+ 7:0 attribute0 ?=> MMU_MEMATTR_STAGE1;
+ 15:8 attribute1 ?=> MMU_MEMATTR_STAGE1;
+ 23:16 attribute2 ?=> MMU_MEMATTR_STAGE1;
+ 31:24 attribute3 ?=> MMU_MEMATTR_STAGE1;
+ 39:32 attribute4 ?=> MMU_MEMATTR_STAGE1;
+ 47:40 attribute5 ?=> MMU_MEMATTR_STAGE1;
+ 55:48 attribute6 ?=> MMU_MEMATTR_STAGE1;
+ 63:56 attribute7 ?=> MMU_MEMATTR_STAGE1;
+ }
+
+ // MEMATTR is a logical 64-bit register, but it is laid out in hardware as two
+ // 32-bit halves. Define it as separate low/high u32 registers so accesses match
+ // the MMIO register layout and do not rely on native 64-bit MMIO transactions.
+ pub(crate) MEMATTR_LO(u32)[MAX_AS, stride = STRIDE] @ 0x2408 {
+ 31:0 value;
+ }
+
+ pub(crate) MEMATTR_HI(u32)[MAX_AS, stride = STRIDE] @ 0x240c {
+ 31:0 value;
+ }
+
+ /// Lock region address for each address space.
+ pub(crate) LOCKADDR(u64)[MAX_AS, stride = STRIDE] @ 0x2410 {
+ /// Lock region size.
+ 5:0 size;
+ /// Lock region base address.
+ 63:12 base;
+ }
+
+ // LOCKADDR is a logical 64-bit register, but it is laid out in hardware as two
+ // 32-bit halves. Define it as separate low/high u32 registers so accesses match
+ // the MMIO register layout and do not rely on native 64-bit MMIO transactions.
+ pub(crate) LOCKADDR_LO(u32)[MAX_AS, stride = STRIDE] @ 0x2410 {
+ 31:0 value;
+ }
+
+ pub(crate) LOCKADDR_HI(u32)[MAX_AS, stride = STRIDE] @ 0x2414 {
+ 31:0 value;
+ }
+ }
+
+ /// Helpers for MMU COMMAND register.
+ #[derive(Copy, Clone, Debug, PartialEq)]
+ #[repr(u8)]
+ pub(crate) enum MmuCommand {
+ /// No operation, nothing happens.
+ Nop = 0,
+ /// Propagate settings to the MMU.
+ Update = 1,
+ /// Lock an address region.
+ Lock = 2,
+ /// Unlock an address region.
+ Unlock = 3,
+ /// Clean and invalidate the L2 cache, then unlock.
+ FlushPt = 4,
+ /// Clean and invalidate all caches, then unlock.
+ FlushMem = 5,
+ }
+
+ impl TryFrom<Bounded<u32, 8>> for MmuCommand {
+ type Error = Error;
+
+ fn try_from(val: Bounded<u32, 8>) -> Result<Self, Self::Error> {
+ match val.get() {
+ 0 => Ok(MmuCommand::Nop),
+ 1 => Ok(MmuCommand::Update),
+ 2 => Ok(MmuCommand::Lock),
+ 3 => Ok(MmuCommand::Unlock),
+ 4 => Ok(MmuCommand::FlushPt),
+ 5 => Ok(MmuCommand::FlushMem),
+ _ => Err(EINVAL),
+ }
+ }
+ }
+
+ impl From<MmuCommand> for Bounded<u32, 8> {
+ fn from(cmd: MmuCommand) -> Self {
+ (cmd as u8).into()
+ }
+ }
+
+ register! {
+ /// MMU command register for each address space. Write only.
+ pub(crate) COMMAND(u32)[MAX_AS, stride = STRIDE] @ 0x2418 {
+ 7:0 command ?=> MmuCommand;
+ }
+ }
+
+ /// MMU exception types for FAULTSTATUS register.
+ #[derive(Copy, Clone, Debug, PartialEq)]
+ #[repr(u8)]
+ pub(crate) enum MmuExceptionType {
+ /// No error.
+ Ok = 0x00,
+ /// Invalid translation table entry, level 0.
+ TranslationFault0 = 0xC0,
+ /// Invalid translation table entry, level 1.
+ TranslationFault1 = 0xC1,
+ /// Invalid translation table entry, level 2.
+ TranslationFault2 = 0xC2,
+ /// Invalid translation table entry, level 3.
+ TranslationFault3 = 0xC3,
+ /// Invalid block descriptor.
+ TranslationFault4 = 0xC4,
+ /// Page permission error, level 0.
+ PermissionFault0 = 0xC8,
+ /// Page permission error, level 1.
+ PermissionFault1 = 0xC9,
+ /// Page permission error, level 2.
+ PermissionFault2 = 0xCA,
+ /// Page permission error, level 3.
+ PermissionFault3 = 0xCB,
+ /// Access flag not set, level 1.
+ AccessFlag1 = 0xD9,
+ /// Access flag not set, level 2.
+ AccessFlag2 = 0xDA,
+ /// Access flag not set, level 3.
+ AccessFlag3 = 0xDB,
+ /// Virtual address out of range.
+ AddressSizeFaultIn = 0xE0,
+ /// Physical address out of range, level 0.
+ AddressSizeFaultOut0 = 0xE4,
+ /// Physical address out of range, level 1.
+ AddressSizeFaultOut1 = 0xE5,
+ /// Physical address out of range, level 2.
+ AddressSizeFaultOut2 = 0xE6,
+ /// Physical address out of range, level 3.
+ AddressSizeFaultOut3 = 0xE7,
+ /// Page attribute error, level 0.
+ MemoryAttributeFault0 = 0xE8,
+ /// Page attribute error, level 1.
+ MemoryAttributeFault1 = 0xE9,
+ /// Page attribute error, level 2.
+ MemoryAttributeFault2 = 0xEA,
+ /// Page attribute error, level 3.
+ MemoryAttributeFault3 = 0xEB,
+ }
+
+ impl TryFrom<Bounded<u32, 8>> for MmuExceptionType {
+ type Error = Error;
+
+ fn try_from(val: Bounded<u32, 8>) -> Result<Self, Self::Error> {
+ match val.get() {
+ 0x00 => Ok(MmuExceptionType::Ok),
+ 0xC0 => Ok(MmuExceptionType::TranslationFault0),
+ 0xC1 => Ok(MmuExceptionType::TranslationFault1),
+ 0xC2 => Ok(MmuExceptionType::TranslationFault2),
+ 0xC3 => Ok(MmuExceptionType::TranslationFault3),
+ 0xC4 => Ok(MmuExceptionType::TranslationFault4),
+ 0xC8 => Ok(MmuExceptionType::PermissionFault0),
+ 0xC9 => Ok(MmuExceptionType::PermissionFault1),
+ 0xCA => Ok(MmuExceptionType::PermissionFault2),
+ 0xCB => Ok(MmuExceptionType::PermissionFault3),
+ 0xD9 => Ok(MmuExceptionType::AccessFlag1),
+ 0xDA => Ok(MmuExceptionType::AccessFlag2),
+ 0xDB => Ok(MmuExceptionType::AccessFlag3),
+ 0xE0 => Ok(MmuExceptionType::AddressSizeFaultIn),
+ 0xE4 => Ok(MmuExceptionType::AddressSizeFaultOut0),
+ 0xE5 => Ok(MmuExceptionType::AddressSizeFaultOut1),
+ 0xE6 => Ok(MmuExceptionType::AddressSizeFaultOut2),
+ 0xE7 => Ok(MmuExceptionType::AddressSizeFaultOut3),
+ 0xE8 => Ok(MmuExceptionType::MemoryAttributeFault0),
+ 0xE9 => Ok(MmuExceptionType::MemoryAttributeFault1),
+ 0xEA => Ok(MmuExceptionType::MemoryAttributeFault2),
+ 0xEB => Ok(MmuExceptionType::MemoryAttributeFault3),
+ _ => Err(EINVAL),
+ }
+ }
+ }
+
+ impl From<MmuExceptionType> for Bounded<u32, 8> {
+ fn from(exc: MmuExceptionType) -> Self {
+ (exc as u8).into()
+ }
+ }
+
+ /// Access type for MMU faults.
+ #[derive(Copy, Clone, Debug, PartialEq)]
+ #[repr(u8)]
+ pub(crate) enum MmuAccessType {
+ /// An atomic (read/write) transaction.
+ Atomic = 0,
+ /// An execute transaction.
+ Execute = 1,
+ /// A read transaction.
+ Read = 2,
+ /// A write transaction.
+ Write = 3,
+ }
+
+ impl From<Bounded<u32, 2>> for MmuAccessType {
+ fn from(val: Bounded<u32, 2>) -> Self {
+ match val.get() {
+ 0 => MmuAccessType::Atomic,
+ 1 => MmuAccessType::Execute,
+ 2 => MmuAccessType::Read,
+ 3 => MmuAccessType::Write,
+ _ => unreachable!(),
+ }
+ }
+ }
+
+ impl From<MmuAccessType> for Bounded<u32, 2> {
+ fn from(access: MmuAccessType) -> Self {
+ Bounded::try_new(access as u32).unwrap()
+ }
+ }
+
+ register! {
+ /// Fault status register for each address space. Read only.
+ pub(crate) FAULTSTATUS(u32)[MAX_AS, stride = STRIDE] @ 0x241c {
+ /// Exception type.
+ 7:0 exception_type ?=> MmuExceptionType;
+ /// Access type.
+ 9:8 access_type => MmuAccessType;
+ /// ID of the source that triggered the fault.
+ 31:16 source_id;
+ }
+
+ /// Fault address for each address space. Read only.
+ pub(crate) FAULTADDRESS_LO(u32)[MAX_AS, stride = STRIDE] @ 0x2420 {
+ 31:0 pointer;
+ }
+
+ pub(crate) FAULTADDRESS_HI(u32)[MAX_AS, stride = STRIDE] @ 0x2424 {
+ 31:0 pointer;
+ }
+
+ /// MMU status register for each address space. Read only.
+ pub(crate) STATUS(u32)[MAX_AS, stride = STRIDE] @ 0x2428 {
+ /// External address space command is active, a 1-bit boolean flag.
+ 0:0 active_ext => bool;
+ /// Internal address space command is active, a 1-bit boolean flag.
+ 1:1 active_int => bool;
+ }
+ }
+
+ /// Helpers for TRANSCFG register.
+ ///
+ /// Address space mode for TRANSCFG register.
+ #[derive(Copy, Clone, Debug, PartialEq)]
+ #[repr(u8)]
+ pub(crate) enum AddressSpaceMode {
+ /// The MMU forces all memory access to fail with a decode fault.
+ Unmapped = 1,
+ /// All input addresses map to the same output address (deprecated).
+ Identity = 2,
+ /// Translation tables interpreted according to AArch64 4kB granule specification.
+ Aarch64_4K = 6,
+ /// Translation tables interpreted according to AArch64 64kB granule specification.
+ Aarch64_64K = 8,
+ }
+
+ impl TryFrom<Bounded<u64, 4>> for AddressSpaceMode {
+ type Error = Error;
+
+ fn try_from(val: Bounded<u64, 4>) -> Result<Self, Self::Error> {
+ match val.get() {
+ 1 => Ok(AddressSpaceMode::Unmapped),
+ 2 => Ok(AddressSpaceMode::Identity),
+ 6 => Ok(AddressSpaceMode::Aarch64_4K),
+ 8 => Ok(AddressSpaceMode::Aarch64_64K),
+ _ => Err(EINVAL),
+ }
+ }
+ }
+
+ impl From<AddressSpaceMode> for Bounded<u64, 4> {
+ fn from(mode: AddressSpaceMode) -> Self {
+ Bounded::try_new(mode as u64).unwrap()
+ }
+ }
+
+ /// Input address range restriction for TRANSCFG register.
+ #[derive(Copy, Clone, Debug, PartialEq)]
+ #[repr(u8)]
+ pub(crate) enum InaBits {
+ /// Invalid VA range (reset value).
+ Reset = 0,
+ /// 48-bit VA range.
+ Bits48 = 7,
+ /// 47-bit VA range.
+ Bits47 = 8,
+ /// 46-bit VA range.
+ Bits46 = 9,
+ /// 45-bit VA range.
+ Bits45 = 10,
+ /// 44-bit VA range.
+ Bits44 = 11,
+ /// 43-bit VA range.
+ Bits43 = 12,
+ /// 42-bit VA range.
+ Bits42 = 13,
+ /// 41-bit VA range.
+ Bits41 = 14,
+ /// 40-bit VA range.
+ Bits40 = 15,
+ /// 39-bit VA range.
+ Bits39 = 16,
+ /// 38-bit VA range.
+ Bits38 = 17,
+ /// 37-bit VA range.
+ Bits37 = 18,
+ /// 36-bit VA range.
+ Bits36 = 19,
+ /// 35-bit VA range.
+ Bits35 = 20,
+ /// 34-bit VA range.
+ Bits34 = 21,
+ /// 33-bit VA range.
+ Bits33 = 22,
+ /// 32-bit VA range.
+ Bits32 = 23,
+ /// 31-bit VA range.
+ Bits31 = 24,
+ /// 30-bit VA range.
+ Bits30 = 25,
+ /// 29-bit VA range.
+ Bits29 = 26,
+ /// 28-bit VA range.
+ Bits28 = 27,
+ /// 27-bit VA range.
+ Bits27 = 28,
+ /// 26-bit VA range.
+ Bits26 = 29,
+ /// 25-bit VA range.
+ Bits25 = 30,
+ }
+
+ impl TryFrom<Bounded<u64, 5>> for InaBits {
+ type Error = Error;
+
+ fn try_from(val: Bounded<u64, 5>) -> Result<Self, Self::Error> {
+ match val.get() {
+ 0 => Ok(InaBits::Reset),
+ 7 => Ok(InaBits::Bits48),
+ 8 => Ok(InaBits::Bits47),
+ 9 => Ok(InaBits::Bits46),
+ 10 => Ok(InaBits::Bits45),
+ 11 => Ok(InaBits::Bits44),
+ 12 => Ok(InaBits::Bits43),
+ 13 => Ok(InaBits::Bits42),
+ 14 => Ok(InaBits::Bits41),
+ 15 => Ok(InaBits::Bits40),
+ 16 => Ok(InaBits::Bits39),
+ 17 => Ok(InaBits::Bits38),
+ 18 => Ok(InaBits::Bits37),
+ 19 => Ok(InaBits::Bits36),
+ 20 => Ok(InaBits::Bits35),
+ 21 => Ok(InaBits::Bits34),
+ 22 => Ok(InaBits::Bits33),
+ 23 => Ok(InaBits::Bits32),
+ 24 => Ok(InaBits::Bits31),
+ 25 => Ok(InaBits::Bits30),
+ 26 => Ok(InaBits::Bits29),
+ 27 => Ok(InaBits::Bits28),
+ 28 => Ok(InaBits::Bits27),
+ 29 => Ok(InaBits::Bits26),
+ 30 => Ok(InaBits::Bits25),
+ _ => Err(EINVAL),
+ }
+ }
+ }
+
+ impl From<InaBits> for Bounded<u64, 5> {
+ fn from(bits: InaBits) -> Self {
+ Bounded::try_new(bits as u64).unwrap()
+ }
+ }
+
+ /// Translation table memory attributes for TRANSCFG register.
+ #[derive(Copy, Clone, Debug, PartialEq)]
+ #[repr(u8)]
+ pub(crate) enum PtwMemattr {
+ /// Invalid (reset value, not valid for enabled address space).
+ Invalid = 0,
+ /// Normal memory, inner/outer non-cacheable.
+ NonCacheable = 1,
+ /// Normal memory, inner/outer write-back cacheable.
+ WriteBack = 2,
+ }
+
+ impl TryFrom<Bounded<u64, 2>> for PtwMemattr {
+ type Error = Error;
+
+ fn try_from(val: Bounded<u64, 2>) -> Result<Self, Self::Error> {
+ match val.get() {
+ 0 => Ok(PtwMemattr::Invalid),
+ 1 => Ok(PtwMemattr::NonCacheable),
+ 2 => Ok(PtwMemattr::WriteBack),
+ _ => Err(EINVAL),
+ }
+ }
+ }
+
+ impl From<PtwMemattr> for Bounded<u64, 2> {
+ fn from(attr: PtwMemattr) -> Self {
+ Bounded::try_new(attr as u64).unwrap()
+ }
+ }
+
+ /// Translation table memory shareability for TRANSCFG register.
+ #[derive(Copy, Clone, Debug, PartialEq)]
+ #[repr(u8)]
+ #[allow(clippy::enum_variant_names)]
+ pub(crate) enum PtwShareability {
+ /// Non-shareable.
+ NonShareable = 0,
+ /// Outer shareable.
+ OuterShareable = 2,
+ /// Inner shareable.
+ InnerShareable = 3,
+ }
+
+ impl TryFrom<Bounded<u64, 2>> for PtwShareability {
+ type Error = Error;
+
+ fn try_from(val: Bounded<u64, 2>) -> Result<Self, Self::Error> {
+ match val.get() {
+ 0 => Ok(PtwShareability::NonShareable),
+ 2 => Ok(PtwShareability::OuterShareable),
+ 3 => Ok(PtwShareability::InnerShareable),
+ _ => Err(EINVAL),
+ }
+ }
+ }
+
+ impl From<PtwShareability> for Bounded<u64, 2> {
+ fn from(sh: PtwShareability) -> Self {
+ Bounded::try_new(sh as u64).unwrap()
+ }
+ }
+
+ register! {
+ /// Translation configuration and control.
+ pub(crate) TRANSCFG(u64)[MAX_AS, stride = STRIDE] @ 0x2430 {
+ /// Address space mode.
+ 3:0 mode ?=> AddressSpaceMode;
+ /// Address input restriction.
+ 10:6 ina_bits ?=> InaBits;
+ /// Address output restriction.
+ 18:14 outa_bits;
+ /// Translation table concatenation enable, a 1-bit boolean flag.
+ 22:22 sl_concat_en => bool;
+ /// Translation table memory attributes.
+ 25:24 ptw_memattr ?=> PtwMemattr;
+ /// Translation table memory shareability.
+ 29:28 ptw_sh ?=> PtwShareability;
+ /// Inner read allocation hint for translation table walks, a 1-bit boolean flag.
+ 30:30 r_allocate => bool;
+ /// Disable hierarchical access permissions.
+ 33:33 disable_hier_ap => bool;
+ /// Disable access fault checking.
+ 34:34 disable_af_fault => bool;
+ /// Disable execution on all writable pages.
+ 35:35 wxn => bool;
+ /// Enable execution on readable pages.
+ 36:36 xreadable => bool;
+ /// Page-based hardware attributes for translation table walks.
+ 63:60 ptw_pbha;
+ }
+
+ // TRANSCFG is a logical 64-bit register, but it is laid out in hardware as two
+ // 32-bit halves. Define it as separate low/high u32 registers so accesses match
+ // the MMIO register layout and do not rely on native 64-bit MMIO transactions.
+ pub(crate) TRANSCFG_LO(u32)[MAX_AS, stride = STRIDE] @ 0x2430 {
+ 31:0 value;
+ }
+
+ pub(crate) TRANSCFG_HI(u32)[MAX_AS, stride = STRIDE] @ 0x2434 {
+ 31:0 value;
+ }
+
+ /// Extra fault information for each address space. Read only.
+ pub(crate) FAULTEXTRA_LO(u32)[MAX_AS, stride = STRIDE] @ 0x2438 {
+ 31:0 value;
+ }
+
+ pub(crate) FAULTEXTRA_HI(u32)[MAX_AS, stride = STRIDE] @ 0x243c {
+ 31:0 value;
+ }
+ }
+ }
+}
+
+/// This module corresponds to the DOORBELL_BLOCK_n[0-63] register pages.
+pub(crate) mod doorbell_block {
+ use kernel::register;
+
+ /// Number of doorbells available.
+ pub(crate) const NUM_DOORBELLS: usize = 64;
+
+ /// Doorbell block stride (64KiB).
+ ///
+ /// Each block occupies a full page, allowing it to be mapped
+ /// separately into a virtual address space.
+ const STRIDE: usize = 0x10000;
+
+ register! {
+ /// Doorbell request register. Write-only.
+ pub(crate) DOORBELL(u32)[NUM_DOORBELLS, stride = STRIDE] @ 0x80000 {
+ /// Doorbell set. Writing 1 triggers the doorbell.
+ 0:0 ring => bool;
+ }
+ }
+}
diff --git a/drivers/gpu/drm/tyr/tyr.rs b/drivers/gpu/drm/tyr/tyr.rs
index 9432ddd6b5b8..95cda7b0962f 100644
--- a/drivers/gpu/drm/tyr/tyr.rs
+++ b/drivers/gpu/drm/tyr/tyr.rs
@@ -5,7 +5,7 @@
//! The name "Tyr" is inspired by Norse mythology, reflecting Arm's tradition of
//! naming their GPUs after Nordic mythological figures and places.
-use crate::driver::TyrPlatformDriverData;
+use crate::driver::TyrPlatformDriver;
mod driver;
mod file;
@@ -14,7 +14,7 @@ mod gpu;
mod regs;
kernel::module_platform_driver! {
- type: TyrPlatformDriverData,
+ type: TyrPlatformDriver,
name: "tyr",
authors: ["The Tyr driver authors"],
description: "Arm Mali Tyr DRM driver",
diff --git a/drivers/gpu/nova-core/Kconfig b/drivers/gpu/nova-core/Kconfig
index a4f2380654e2..f918f69e0599 100644
--- a/drivers/gpu/nova-core/Kconfig
+++ b/drivers/gpu/nova-core/Kconfig
@@ -3,6 +3,7 @@ config NOVA_CORE
depends on 64BIT
depends on PCI
depends on RUST
+ depends on !CPU_BIG_ENDIAN
select AUXILIARY_BUS
select RUST_FW_LOADER_ABSTRACTIONS
default n
@@ -13,4 +14,4 @@ config NOVA_CORE
This driver is work in progress and may not be functional.
- If M is selected, the module will be called nova_core.
+ If M is selected, the module will be called nova-core.
diff --git a/drivers/gpu/nova-core/Makefile b/drivers/gpu/nova-core/Makefile
index 2d78c50126e1..4ae544f808f4 100644
--- a/drivers/gpu/nova-core/Makefile
+++ b/drivers/gpu/nova-core/Makefile
@@ -1,3 +1,4 @@
# SPDX-License-Identifier: GPL-2.0
-obj-$(CONFIG_NOVA_CORE) += nova_core.o
+obj-$(CONFIG_NOVA_CORE) += nova-core.o
+nova-core-y := nova_core.o
diff --git a/drivers/gpu/nova-core/driver.rs b/drivers/gpu/nova-core/driver.rs
index 84b0e1703150..5738d4ac521b 100644
--- a/drivers/gpu/nova-core/driver.rs
+++ b/drivers/gpu/nova-core/driver.rs
@@ -3,9 +3,6 @@
use kernel::{
auxiliary,
device::Core,
- devres::Devres,
- dma::Device,
- dma::DmaMask,
pci,
pci::{
Class,
@@ -14,13 +11,11 @@ use kernel::{
},
prelude::*,
sizes::SZ_16M,
- sync::{
- atomic::{
- Atomic,
- Relaxed, //
- },
- Arc,
+ sync::atomic::{
+ Atomic,
+ Relaxed, //
},
+ types::ForLt,
};
use crate::gpu::Gpu;
@@ -29,29 +24,24 @@ use crate::gpu::Gpu;
static AUXILIARY_ID_COUNTER: Atomic<u32> = Atomic::new(0);
#[pin_data]
-pub(crate) struct NovaCore {
+pub(crate) struct NovaCore<'bound> {
#[pin]
- pub(crate) gpu: Gpu,
- #[pin]
- _reg: Devres<auxiliary::Registration>,
+ pub(crate) gpu: Gpu<'bound>,
+ bar: pci::Bar<'bound, BAR0_SIZE>,
+ #[allow(clippy::type_complexity)]
+ _reg: auxiliary::Registration<'bound, ForLt!(())>,
}
-const BAR0_SIZE: usize = SZ_16M;
+pub(crate) struct NovaCoreDriver;
-// For now we only support Ampere which can use up to 47-bit DMA addresses.
-//
-// TODO: Add an abstraction for this to support newer GPUs which may support
-// larger DMA addresses. Limiting these GPUs to smaller address widths won't
-// have any adverse affects, unless installed on systems which require larger
-// DMA addresses. These systems should be quite rare.
-const GPU_DMA_BITS: u32 = 47;
+const BAR0_SIZE: usize = SZ_16M;
-pub(crate) type Bar0 = pci::Bar<BAR0_SIZE>;
+pub(crate) type Bar0<'a> = &'a pci::Bar<'a, BAR0_SIZE>;
kernel::pci_device_table!(
PCI_TABLE,
MODULE_PCI_TABLE,
- <NovaCore as pci::Driver>::IdInfo,
+ <NovaCoreDriver as pci::Driver>::IdInfo,
[
// Modern NVIDIA GPUs will show up as either VGA or 3D controllers.
(
@@ -73,42 +63,39 @@ kernel::pci_device_table!(
]
);
-impl pci::Driver for NovaCore {
+impl pci::Driver for NovaCoreDriver {
type IdInfo = ();
+ type Data<'bound> = NovaCore<'bound>;
const ID_TABLE: pci::IdTable<Self::IdInfo> = &PCI_TABLE;
- fn probe(pdev: &pci::Device<Core>, _info: &Self::IdInfo) -> impl PinInit<Self, Error> {
+ fn probe<'bound>(
+ pdev: &'bound pci::Device<Core<'_>>,
+ _info: &'bound Self::IdInfo,
+ ) -> impl PinInit<Self::Data<'bound>, Error> + 'bound {
pin_init::pin_init_scope(move || {
dev_dbg!(pdev, "Probe Nova Core GPU driver.\n");
pdev.enable_device_mem()?;
pdev.set_master();
- // SAFETY: No concurrent DMA allocations or mappings can be made because
- // the device is still being probed and therefore isn't being used by
- // other threads of execution.
- unsafe { pdev.dma_set_mask_and_coherent(DmaMask::new::<GPU_DMA_BITS>())? };
-
- let bar = Arc::pin_init(
- pdev.iomap_region_sized::<BAR0_SIZE>(0, c"nova-core/bar0"),
- GFP_KERNEL,
- )?;
-
- Ok(try_pin_init!(Self {
- gpu <- Gpu::new(pdev, bar.clone(), bar.access(pdev.as_ref())?),
- _reg <- auxiliary::Registration::new(
+ Ok(try_pin_init!(NovaCore {
+ bar: pdev.iomap_region_sized::<BAR0_SIZE>(0, c"nova-core/bar0")?,
+ // TODO: Use `&bar` self-referential pin-init syntax once available.
+ //
+ // SAFETY: `bar` is initialized before this expression is evaluated
+ // (`try_pin_init!()` initializes fields in declaration order), lives at a pinned
+ // stable address, and is dropped after `gpu` (struct field drop order).
+ gpu <- Gpu::new(pdev, unsafe { &*core::ptr::from_ref(bar) }),
+ _reg: auxiliary::Registration::new(
pdev.as_ref(),
c"nova-drm",
// TODO[XARR]: Use XArray or perhaps IDA for proper ID allocation/recycling. For
// now, use a simple atomic counter that never recycles IDs.
AUXILIARY_ID_COUNTER.fetch_add(1, Relaxed),
- crate::MODULE_NAME
- ),
+ crate::MODULE_NAME,
+ (),
+ )?,
}))
})
}
-
- fn unbind(pdev: &pci::Device<Core>, this: Pin<&Self>) {
- this.gpu.unbind(pdev.as_ref());
- }
}
diff --git a/drivers/gpu/nova-core/falcon.rs b/drivers/gpu/nova-core/falcon.rs
index 33927af4134c..94c7696a6493 100644
--- a/drivers/gpu/nova-core/falcon.rs
+++ b/drivers/gpu/nova-core/falcon.rs
@@ -40,6 +40,7 @@ use crate::{
regs,
};
+pub(crate) mod fsp;
pub(crate) mod gsp;
mod hal;
pub(crate) mod sec2;
@@ -372,7 +373,7 @@ impl<E: FalconEngine + 'static> Falcon<E> {
}
/// Resets DMA-related registers.
- pub(crate) fn dma_reset(&self, bar: &Bar0) {
+ pub(crate) fn dma_reset(&self, bar: Bar0<'_>) {
bar.update(regs::NV_PFALCON_FBIF_CTL::of::<E>(), |v| {
v.with_allow_phys_no_ctx(true)
});
@@ -384,7 +385,7 @@ impl<E: FalconEngine + 'static> Falcon<E> {
}
/// Reset the controller, select the falcon core, and wait for memory scrubbing to complete.
- pub(crate) fn reset(&self, bar: &Bar0) -> Result {
+ pub(crate) fn reset(&self, bar: Bar0<'_>) -> Result {
self.hal.reset_eng(bar)?;
self.hal.select_core(self, bar)?;
self.hal.reset_wait_mem_scrubbing(bar)?;
@@ -403,7 +404,11 @@ impl<E: FalconEngine + 'static> Falcon<E> {
/// Write a slice to Falcon IMEM memory using programmed I/O (PIO).
///
/// Returns `EINVAL` if `img.len()` is not a multiple of 4.
- fn pio_wr_imem_slice(&self, bar: &Bar0, load_offsets: FalconPioImemLoadTarget<'_>) -> Result {
+ fn pio_wr_imem_slice(
+ &self,
+ bar: Bar0<'_>,
+ load_offsets: FalconPioImemLoadTarget<'_>,
+ ) -> Result {
// Rejecting misaligned images here allows us to avoid checking
// inside the loops.
if load_offsets.data.len() % 4 != 0 {
@@ -440,7 +445,11 @@ impl<E: FalconEngine + 'static> Falcon<E> {
/// Write a slice to Falcon DMEM memory using programmed I/O (PIO).
///
/// Returns `EINVAL` if `img.len()` is not a multiple of 4.
- fn pio_wr_dmem_slice(&self, bar: &Bar0, load_offsets: FalconPioDmemLoadTarget<'_>) -> Result {
+ fn pio_wr_dmem_slice(
+ &self,
+ bar: Bar0<'_>,
+ load_offsets: FalconPioDmemLoadTarget<'_>,
+ ) -> Result {
// Rejecting misaligned images here allows us to avoid checking
// inside the loops.
if load_offsets.data.len() % 4 != 0 {
@@ -468,7 +477,7 @@ impl<E: FalconEngine + 'static> Falcon<E> {
/// Perform a PIO copy into `IMEM` and `DMEM` of `fw`, and prepare the falcon to run it.
pub(crate) fn pio_load<F: FalconFirmware<Target = E> + FalconPioLoadable>(
&self,
- bar: &Bar0,
+ bar: Bar0<'_>,
fw: &F,
) -> Result {
bar.update(regs::NV_PFALCON_FBIF_CTL::of::<E>(), |v| {
@@ -488,7 +497,7 @@ impl<E: FalconEngine + 'static> Falcon<E> {
}
self.pio_wr_dmem_slice(bar, fw.dmem_load_params())?;
- self.hal.program_brom(self, bar, &fw.brom_params())?;
+ self.hal.program_brom(self, bar, &fw.brom_params());
bar.write(
WithBase::of::<E>(),
@@ -504,7 +513,7 @@ impl<E: FalconEngine + 'static> Falcon<E> {
/// `sec` is set if the loaded firmware is expected to run in secure mode.
fn dma_wr(
&self,
- bar: &Bar0,
+ bar: Bar0<'_>,
dma_obj: &Coherent<[u8]>,
target_mem: FalconMem,
load_offsets: FalconDmaLoadTarget,
@@ -611,7 +620,7 @@ impl<E: FalconEngine + 'static> Falcon<E> {
fn dma_load<F: FalconFirmware<Target = E> + FalconDmaLoadable>(
&self,
dev: &Device<device::Bound>,
- bar: &Bar0,
+ bar: Bar0<'_>,
fw: &F,
) -> Result {
// DMA object with firmware content as the source of the DMA engine.
@@ -647,7 +656,7 @@ impl<E: FalconEngine + 'static> Falcon<E> {
)?;
self.dma_wr(bar, &dma_obj, FalconMem::Dmem, fw.dmem_load_params())?;
- self.hal.program_brom(self, bar, &fw.brom_params())?;
+ self.hal.program_brom(self, bar, &fw.brom_params());
// Set `BootVec` to start of non-secure code.
bar.write(
@@ -659,7 +668,7 @@ impl<E: FalconEngine + 'static> Falcon<E> {
}
/// Wait until the falcon CPU is halted.
- pub(crate) fn wait_till_halted(&self, bar: &Bar0) -> Result<()> {
+ pub(crate) fn wait_till_halted(&self, bar: Bar0<'_>) -> Result<()> {
// TIMEOUT: arbitrarily large value, firmwares should complete in less than 2 seconds.
read_poll_timeout(
|| Ok(bar.read(regs::NV_PFALCON_FALCON_CPUCTL::of::<E>())),
@@ -672,7 +681,7 @@ impl<E: FalconEngine + 'static> Falcon<E> {
}
/// Start the falcon CPU.
- pub(crate) fn start(&self, bar: &Bar0) -> Result<()> {
+ pub(crate) fn start(&self, bar: Bar0<'_>) -> Result<()> {
match bar
.read(regs::NV_PFALCON_FALCON_CPUCTL::of::<E>())
.alias_en()
@@ -691,7 +700,7 @@ impl<E: FalconEngine + 'static> Falcon<E> {
}
/// Writes values to the mailbox registers if provided.
- pub(crate) fn write_mailboxes(&self, bar: &Bar0, mbox0: Option<u32>, mbox1: Option<u32>) {
+ pub(crate) fn write_mailboxes(&self, bar: Bar0<'_>, mbox0: Option<u32>, mbox1: Option<u32>) {
if let Some(mbox0) = mbox0 {
bar.write(
WithBase::of::<E>(),
@@ -708,19 +717,19 @@ impl<E: FalconEngine + 'static> Falcon<E> {
}
/// Reads the value from `mbox0` register.
- pub(crate) fn read_mailbox0(&self, bar: &Bar0) -> u32 {
+ pub(crate) fn read_mailbox0(&self, bar: Bar0<'_>) -> u32 {
bar.read(regs::NV_PFALCON_FALCON_MAILBOX0::of::<E>())
.value()
}
/// Reads the value from `mbox1` register.
- pub(crate) fn read_mailbox1(&self, bar: &Bar0) -> u32 {
+ pub(crate) fn read_mailbox1(&self, bar: Bar0<'_>) -> u32 {
bar.read(regs::NV_PFALCON_FALCON_MAILBOX1::of::<E>())
.value()
}
/// Reads values from both mailbox registers.
- pub(crate) fn read_mailboxes(&self, bar: &Bar0) -> (u32, u32) {
+ pub(crate) fn read_mailboxes(&self, bar: Bar0<'_>) -> (u32, u32) {
let mbox0 = self.read_mailbox0(bar);
let mbox1 = self.read_mailbox1(bar);
@@ -736,7 +745,7 @@ impl<E: FalconEngine + 'static> Falcon<E> {
/// the `MBOX0` and `MBOX1` registers.
pub(crate) fn boot(
&self,
- bar: &Bar0,
+ bar: Bar0<'_>,
mbox0: Option<u32>,
mbox1: Option<u32>,
) -> Result<(u32, u32)> {
@@ -750,7 +759,7 @@ impl<E: FalconEngine + 'static> Falcon<E> {
/// falcon instance. `engine_id_mask` and `ucode_id` are obtained from the firmware header.
pub(crate) fn signature_reg_fuse_version(
&self,
- bar: &Bar0,
+ bar: Bar0<'_>,
engine_id_mask: u16,
ucode_id: u8,
) -> Result<u32> {
@@ -761,7 +770,7 @@ impl<E: FalconEngine + 'static> Falcon<E> {
/// Check if the RISC-V core is active.
///
/// Returns `true` if the RISC-V core is active, `false` otherwise.
- pub(crate) fn is_riscv_active(&self, bar: &Bar0) -> bool {
+ pub(crate) fn is_riscv_active(&self, bar: Bar0<'_>) -> bool {
self.hal.is_riscv_active(bar)
}
@@ -770,7 +779,7 @@ impl<E: FalconEngine + 'static> Falcon<E> {
pub(crate) fn load<F: FalconFirmware<Target = E> + FalconDmaLoadable>(
&self,
dev: &Device<device::Bound>,
- bar: &Bar0,
+ bar: Bar0<'_>,
fw: &F,
) -> Result {
match self.hal.load_method() {
@@ -780,7 +789,7 @@ impl<E: FalconEngine + 'static> Falcon<E> {
}
/// Write the application version to the OS register.
- pub(crate) fn write_os_version(&self, bar: &Bar0, app_version: u32) {
+ pub(crate) fn write_os_version(&self, bar: Bar0<'_>, app_version: u32) {
bar.write(
WithBase::of::<E>(),
regs::NV_PFALCON_FALCON_OS::zeroed().with_value(app_version),
diff --git a/drivers/gpu/nova-core/falcon/fsp.rs b/drivers/gpu/nova-core/falcon/fsp.rs
new file mode 100644
index 000000000000..52cdb84ef0e8
--- /dev/null
+++ b/drivers/gpu/nova-core/falcon/fsp.rs
@@ -0,0 +1,171 @@
+// SPDX-License-Identifier: GPL-2.0
+// SPDX-FileCopyrightText: Copyright (c) 2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+
+//! FSP (Foundation Security Processor) falcon engine for Hopper/Blackwell GPUs.
+//!
+//! The FSP falcon handles secure boot and Chain of Trust operations
+//! on Hopper and Blackwell architectures, replacing SEC2's role.
+
+use kernel::{
+ io::{
+ poll::read_poll_timeout,
+ register::{
+ Array,
+ RegisterBase,
+ WithBase, //
+ },
+ Io, //
+ },
+ prelude::*,
+ time::Delta,
+};
+
+use crate::{
+ driver::Bar0,
+ falcon::{
+ Falcon,
+ FalconEngine,
+ PFalcon2Base,
+ PFalconBase, //
+ },
+ num,
+ regs, //
+};
+
+/// FSP message timeout in milliseconds.
+const FSP_MSG_TIMEOUT_MS: i64 = 2000;
+
+/// Type specifying the `Fsp` falcon engine. Cannot be instantiated.
+pub(crate) struct Fsp(());
+
+impl RegisterBase<PFalconBase> for Fsp {
+ const BASE: usize = 0x8f2000;
+}
+
+impl RegisterBase<PFalcon2Base> for Fsp {
+ const BASE: usize = 0x8f3000;
+}
+
+impl FalconEngine for Fsp {}
+
+impl Falcon<Fsp> {
+ /// Writes `data` to FSP external memory at offset `0`.
+ ///
+ /// `data` is interpreted as little-endian 32-bit words. Returns `EINVAL`
+ /// if the `data` length is not 4-byte aligned.
+ fn write_emem(&mut self, bar: Bar0<'_>, data: &[u8]) -> Result {
+ if data.len() % 4 != 0 {
+ return Err(EINVAL);
+ }
+
+ // Begin a write burst at offset `0`, auto-incrementing on each write.
+ bar.write(
+ WithBase::of::<Fsp>(),
+ regs::NV_PFALCON_FALCON_EMEMC::zeroed().with_aincw(true),
+ );
+
+ for chunk in data.chunks_exact(4) {
+ let value = u32::from_le_bytes([chunk[0], chunk[1], chunk[2], chunk[3]]);
+
+ // Write the next 32-bit `value`; hardware advances the offset.
+ bar.write(
+ WithBase::of::<Fsp>(),
+ regs::NV_PFALCON_FALCON_EMEMD::zeroed().with_data(value),
+ );
+ }
+
+ Ok(())
+ }
+
+ /// Reads FSP external memory from offset `0` into `data`.
+ ///
+ /// `data` is stored as little-endian 32-bit words. Returns `EINVAL` if
+ /// the `data` length is not 4-byte aligned.
+ fn read_emem(&mut self, bar: Bar0<'_>, data: &mut [u8]) -> Result {
+ if data.len() % 4 != 0 {
+ return Err(EINVAL);
+ }
+
+ // Begin a read burst at offset `0`, auto-incrementing on each read.
+ bar.write(
+ WithBase::of::<Fsp>(),
+ regs::NV_PFALCON_FALCON_EMEMC::zeroed().with_aincr(true),
+ );
+
+ for chunk in data.chunks_exact_mut(4) {
+ // Read the next 32-bit word; hardware advances the offset.
+ let value = bar.read(regs::NV_PFALCON_FALCON_EMEMD::of::<Fsp>()).data();
+ chunk.copy_from_slice(&value.to_le_bytes());
+ }
+
+ Ok(())
+ }
+
+ /// Poll FSP for incoming data.
+ ///
+ /// Returns the size of available data in bytes, or 0 if no data is available.
+ ///
+ /// The FSP message queue is not circular. Pointers are reset to 0 after each
+ /// message exchange, so `tail >= head` is always true when data is present.
+ fn poll_msgq(&self, bar: Bar0<'_>) -> u32 {
+ let head = bar.read(regs::NV_PFSP_MSGQ_HEAD::at(0)).val();
+ let tail = bar.read(regs::NV_PFSP_MSGQ_TAIL::at(0)).val();
+
+ if head == tail {
+ return 0;
+ }
+
+ // TAIL points at last DWORD written, so add 4 to get total size.
+ tail.saturating_sub(head).saturating_add(4)
+ }
+
+ /// Writes `packet` to FSP EMEM and updates the queue pointers to notify FSP.
+ ///
+ /// Returns `EINVAL` if `packet` is empty or its length is not 4-byte aligned.
+ pub(crate) fn send_msg(&mut self, bar: Bar0<'_>, packet: &[u8]) -> Result {
+ if packet.is_empty() {
+ return Err(EINVAL);
+ }
+
+ self.write_emem(bar, packet)?;
+
+ // Update queue pointers. TAIL points at the last DWORD written.
+ let tail_offset = u32::try_from(packet.len() - 4).map_err(|_| EINVAL)?;
+ bar.write(
+ Array::at(0),
+ regs::NV_PFSP_QUEUE_TAIL::zeroed().with_address(tail_offset),
+ );
+ bar.write(
+ Array::at(0),
+ regs::NV_PFSP_QUEUE_HEAD::zeroed().with_address(0),
+ );
+
+ Ok(())
+ }
+
+ /// Reads the next message from FSP EMEM into a newly-allocated buffer and resets the queue
+ /// pointers.
+ ///
+ /// Returns `ETIMEDOUT` if no message was available until timeout, or a regular error code if a
+ /// memory allocation error occurred.
+ pub(crate) fn recv_msg(&mut self, bar: Bar0<'_>) -> Result<KVec<u8>> {
+ let msg_size = read_poll_timeout(
+ || Ok(self.poll_msgq(bar)),
+ |&size| size > 0,
+ Delta::from_millis(10),
+ Delta::from_millis(FSP_MSG_TIMEOUT_MS),
+ )
+ .map(num::u32_as_usize)?;
+
+ let mut buffer = KVec::<u8>::new();
+ buffer.resize(msg_size, 0, GFP_KERNEL)?;
+
+ self.read_emem(bar, &mut buffer)?;
+
+ // Reset message queue pointers after reading.
+ bar.write(Array::at(0), regs::NV_PFSP_MSGQ_TAIL::zeroed().with_val(0));
+ bar.write(Array::at(0), regs::NV_PFSP_MSGQ_HEAD::zeroed().with_val(0));
+
+ Ok(buffer)
+ }
+}
diff --git a/drivers/gpu/nova-core/falcon/gsp.rs b/drivers/gpu/nova-core/falcon/gsp.rs
index df6d5a382c7a..d1f6f7fcffff 100644
--- a/drivers/gpu/nova-core/falcon/gsp.rs
+++ b/drivers/gpu/nova-core/falcon/gsp.rs
@@ -24,6 +24,10 @@ use crate::{
regs,
};
+/// Pattern returned by GSP register reads while the PRIV target mask still blocks CPU access.
+const GSP_TARGET_MASK_LOCKED_PATTERN: u32 = 0xbadf_4100;
+const GSP_TARGET_MASK_LOCKED_MASK: u32 = 0xffff_ff00;
+
/// Type specifying the `Gsp` falcon engine. Cannot be instantiated.
pub(crate) struct Gsp(());
@@ -40,7 +44,7 @@ impl FalconEngine for Gsp {}
impl Falcon<Gsp> {
/// Clears the SWGEN0 bit in the Falcon's IRQ status clear register to
/// allow GSP to signal CPU for processing new messages in message queue.
- pub(crate) fn clear_swgen0_intr(&self, bar: &Bar0) {
+ pub(crate) fn clear_swgen0_intr(&self, bar: Bar0<'_>) {
bar.write(
WithBase::of::<Gsp>(),
regs::NV_PFALCON_FALCON_IRQSCLR::zeroed().with_swgen0(true),
@@ -48,7 +52,7 @@ impl Falcon<Gsp> {
}
/// Checks if GSP reload/resume has completed during the boot process.
- pub(crate) fn check_reload_completed(&self, bar: &Bar0, timeout: Delta) -> Result<bool> {
+ pub(crate) fn check_reload_completed(&self, bar: Bar0<'_>, timeout: Delta) -> Result<bool> {
read_poll_timeout(
|| Ok(bar.read(regs::NV_PGC6_BSI_SECURE_SCRATCH_14)),
|val| val.boot_stage_3_handoff(),
@@ -57,4 +61,19 @@ impl Falcon<Gsp> {
)
.map(|_| true)
}
+
+ /// Returns whether the RISC-V branch privilege lockdown bit is set.
+ pub(crate) fn riscv_branch_privilege_lockdown(&self, bar: Bar0<'_>) -> bool {
+ bar.read(regs::NV_PFALCON_FALCON_HWCFG2::of::<Gsp>())
+ .riscv_br_priv_lockdown()
+ }
+
+ /// Returns whether GSP registers can be read by the CPU.
+ pub(crate) fn priv_target_mask_released(&self, bar: Bar0<'_>) -> bool {
+ let hwcfg2 = bar
+ .read(regs::NV_PFALCON_FALCON_HWCFG2::of::<Gsp>())
+ .into_raw();
+
+ hwcfg2 != 0 && (hwcfg2 & GSP_TARGET_MASK_LOCKED_MASK) != GSP_TARGET_MASK_LOCKED_PATTERN
+ }
}
diff --git a/drivers/gpu/nova-core/falcon/hal.rs b/drivers/gpu/nova-core/falcon/hal.rs
index a7e5ea8d0272..89b56823906b 100644
--- a/drivers/gpu/nova-core/falcon/hal.rs
+++ b/drivers/gpu/nova-core/falcon/hal.rs
@@ -9,7 +9,10 @@ use crate::{
FalconBromParams,
FalconEngine, //
},
- gpu::Chipset,
+ gpu::{
+ Architecture,
+ Chipset, //
+ },
};
mod ga102;
@@ -31,7 +34,7 @@ pub(crate) enum LoadMethod {
/// registers.
pub(crate) trait FalconHal<E: FalconEngine>: Send + Sync {
/// Activates the Falcon core if the engine is a risvc/falcon dual engine.
- fn select_core(&self, _falcon: &Falcon<E>, _bar: &Bar0) -> Result {
+ fn select_core(&self, _falcon: &Falcon<E>, _bar: Bar0<'_>) -> Result {
Ok(())
}
@@ -40,23 +43,23 @@ pub(crate) trait FalconHal<E: FalconEngine>: Send + Sync {
fn signature_reg_fuse_version(
&self,
falcon: &Falcon<E>,
- bar: &Bar0,
+ bar: Bar0<'_>,
engine_id_mask: u16,
ucode_id: u8,
) -> Result<u32>;
/// Program the boot ROM registers prior to starting a secure firmware.
- fn program_brom(&self, falcon: &Falcon<E>, bar: &Bar0, params: &FalconBromParams) -> Result;
+ fn program_brom(&self, falcon: &Falcon<E>, bar: Bar0<'_>, params: &FalconBromParams);
/// Check if the RISC-V core is active.
/// Returns `true` if the RISC-V core is active, `false` otherwise.
- fn is_riscv_active(&self, bar: &Bar0) -> bool;
+ fn is_riscv_active(&self, bar: Bar0<'_>) -> bool;
/// Wait for memory scrubbing to complete.
- fn reset_wait_mem_scrubbing(&self, bar: &Bar0) -> Result;
+ fn reset_wait_mem_scrubbing(&self, bar: Bar0<'_>) -> Result;
/// Reset the falcon engine.
- fn reset_eng(&self, bar: &Bar0) -> Result;
+ fn reset_eng(&self, bar: Bar0<'_>) -> Result;
/// Returns the method used to load data into the falcon's memory.
///
@@ -74,16 +77,21 @@ pub(crate) trait FalconHal<E: FalconEngine>: Send + Sync {
pub(super) fn falcon_hal<E: FalconEngine + 'static>(
chipset: Chipset,
) -> Result<KBox<dyn FalconHal<E>>> {
- use Chipset::*;
-
- let hal = match chipset {
- TU102 | TU104 | TU106 | TU116 | TU117 => {
+ let hal = match chipset.arch() {
+ Architecture::Turing => {
+ KBox::new(tu102::Tu102::<E>::new(), GFP_KERNEL)? as KBox<dyn FalconHal<E>>
+ }
+ // GA100 boots like Turing so use Turing HAL
+ Architecture::Ampere if chipset == Chipset::GA100 => {
KBox::new(tu102::Tu102::<E>::new(), GFP_KERNEL)? as KBox<dyn FalconHal<E>>
}
- GA102 | GA103 | GA104 | GA106 | GA107 | AD102 | AD103 | AD104 | AD106 | AD107 => {
+ Architecture::Ampere
+ | Architecture::Ada
+ | Architecture::Hopper
+ | Architecture::BlackwellGB10x
+ | Architecture::BlackwellGB20x => {
KBox::new(ga102::Ga102::<E>::new(), GFP_KERNEL)? as KBox<dyn FalconHal<E>>
}
- _ => return Err(ENOTSUPP),
};
Ok(hal)
diff --git a/drivers/gpu/nova-core/falcon/hal/ga102.rs b/drivers/gpu/nova-core/falcon/hal/ga102.rs
index 8368a61ddeef..cf6ce47e6b25 100644
--- a/drivers/gpu/nova-core/falcon/hal/ga102.rs
+++ b/drivers/gpu/nova-core/falcon/hal/ga102.rs
@@ -31,7 +31,7 @@ use crate::{
use super::FalconHal;
-fn select_core_ga102<E: FalconEngine>(bar: &Bar0) -> Result {
+fn select_core_ga102<E: FalconEngine>(bar: Bar0<'_>) -> Result {
let bcr_ctrl = bar.read(regs::NV_PRISCV_RISCV_BCR_CTRL::of::<E>());
if bcr_ctrl.core_select() != PeregrineCoreSelect::Falcon {
bar.write(
@@ -53,7 +53,7 @@ fn select_core_ga102<E: FalconEngine>(bar: &Bar0) -> Result {
fn signature_reg_fuse_version_ga102(
dev: &device::Device,
- bar: &Bar0,
+ bar: Bar0<'_>,
engine_id_mask: u16,
ucode_id: u8,
) -> Result<u32> {
@@ -86,7 +86,7 @@ fn signature_reg_fuse_version_ga102(
Ok(u16::BITS - reg_fuse_version.leading_zeros())
}
-fn program_brom_ga102<E: FalconEngine>(bar: &Bar0, params: &FalconBromParams) -> Result {
+fn program_brom_ga102<E: FalconEngine>(bar: Bar0<'_>, params: &FalconBromParams) {
bar.write(
WithBase::of::<E>().at(0),
regs::NV_PFALCON2_FALCON_BROM_PARAADDR::zeroed().with_value(params.pkc_data_offset),
@@ -104,8 +104,6 @@ fn program_brom_ga102<E: FalconEngine>(bar: &Bar0, params: &FalconBromParams) ->
WithBase::of::<E>(),
regs::NV_PFALCON2_FALCON_MOD_SEL::zeroed().with_algo(FalconModSelAlgo::Rsa3k),
);
-
- Ok(())
}
pub(super) struct Ga102<E: FalconEngine>(PhantomData<E>);
@@ -117,30 +115,30 @@ impl<E: FalconEngine> Ga102<E> {
}
impl<E: FalconEngine> FalconHal<E> for Ga102<E> {
- fn select_core(&self, _falcon: &Falcon<E>, bar: &Bar0) -> Result {
+ fn select_core(&self, _falcon: &Falcon<E>, bar: Bar0<'_>) -> Result {
select_core_ga102::<E>(bar)
}
fn signature_reg_fuse_version(
&self,
falcon: &Falcon<E>,
- bar: &Bar0,
+ bar: Bar0<'_>,
engine_id_mask: u16,
ucode_id: u8,
) -> Result<u32> {
signature_reg_fuse_version_ga102(&falcon.dev, bar, engine_id_mask, ucode_id)
}
- fn program_brom(&self, _falcon: &Falcon<E>, bar: &Bar0, params: &FalconBromParams) -> Result {
- program_brom_ga102::<E>(bar, params)
+ fn program_brom(&self, _falcon: &Falcon<E>, bar: Bar0<'_>, params: &FalconBromParams) {
+ program_brom_ga102::<E>(bar, params);
}
- fn is_riscv_active(&self, bar: &Bar0) -> bool {
+ fn is_riscv_active(&self, bar: Bar0<'_>) -> bool {
bar.read(regs::NV_PRISCV_RISCV_CPUCTL::of::<E>())
.active_stat()
}
- fn reset_wait_mem_scrubbing(&self, bar: &Bar0) -> Result {
+ fn reset_wait_mem_scrubbing(&self, bar: Bar0<'_>) -> Result {
// TIMEOUT: memory scrubbing should complete in less than 20ms.
read_poll_timeout(
|| Ok(bar.read(regs::NV_PFALCON_FALCON_HWCFG2::of::<E>())),
@@ -151,7 +149,7 @@ impl<E: FalconEngine> FalconHal<E> for Ga102<E> {
.map(|_| ())
}
- fn reset_eng(&self, bar: &Bar0) -> Result {
+ fn reset_eng(&self, bar: Bar0<'_>) -> Result {
let _ = bar.read(regs::NV_PFALCON_FALCON_HWCFG2::of::<E>());
// According to OpenRM's `kflcnPreResetWait_GA102` documentation, HW sometimes does not set
diff --git a/drivers/gpu/nova-core/falcon/hal/tu102.rs b/drivers/gpu/nova-core/falcon/hal/tu102.rs
index c7a90266cb44..3aaee3869312 100644
--- a/drivers/gpu/nova-core/falcon/hal/tu102.rs
+++ b/drivers/gpu/nova-core/falcon/hal/tu102.rs
@@ -34,30 +34,28 @@ impl<E: FalconEngine> Tu102<E> {
}
impl<E: FalconEngine> FalconHal<E> for Tu102<E> {
- fn select_core(&self, _falcon: &Falcon<E>, _bar: &Bar0) -> Result {
+ fn select_core(&self, _falcon: &Falcon<E>, _bar: Bar0<'_>) -> Result {
Ok(())
}
fn signature_reg_fuse_version(
&self,
_falcon: &Falcon<E>,
- _bar: &Bar0,
+ _bar: Bar0<'_>,
_engine_id_mask: u16,
_ucode_id: u8,
) -> Result<u32> {
Ok(0)
}
- fn program_brom(&self, _falcon: &Falcon<E>, _bar: &Bar0, _params: &FalconBromParams) -> Result {
- Ok(())
- }
+ fn program_brom(&self, _falcon: &Falcon<E>, _bar: Bar0<'_>, _params: &FalconBromParams) {}
- fn is_riscv_active(&self, bar: &Bar0) -> bool {
+ fn is_riscv_active(&self, bar: Bar0<'_>) -> bool {
bar.read(regs::NV_PRISCV_RISCV_CORE_SWITCH_RISCV_STATUS::of::<E>())
.active_stat()
}
- fn reset_wait_mem_scrubbing(&self, bar: &Bar0) -> Result {
+ fn reset_wait_mem_scrubbing(&self, bar: Bar0<'_>) -> Result {
// TIMEOUT: memory scrubbing should complete in less than 10ms.
read_poll_timeout(
|| Ok(bar.read(regs::NV_PFALCON_FALCON_DMACTL::of::<E>())),
@@ -68,7 +66,7 @@ impl<E: FalconEngine> FalconHal<E> for Tu102<E> {
.map(|_| ())
}
- fn reset_eng(&self, bar: &Bar0) -> Result {
+ fn reset_eng(&self, bar: Bar0<'_>) -> Result {
regs::NV_PFALCON_FALCON_ENGINE::reset_engine::<E>(bar);
self.reset_wait_mem_scrubbing(bar)?;
diff --git a/drivers/gpu/nova-core/fb.rs b/drivers/gpu/nova-core/fb.rs
index bdd5eed760e1..725e428154cf 100644
--- a/drivers/gpu/nova-core/fb.rs
+++ b/drivers/gpu/nova-core/fb.rs
@@ -15,8 +15,7 @@ use kernel::{
Alignable,
Alignment, //
},
- sizes::*,
- sync::aref::ARef, //
+ sizes::*, //
};
use crate::{
@@ -24,11 +23,8 @@ use crate::{
firmware::gsp::GspFirmware,
gpu::Chipset,
gsp,
- num::{
- usize_as_u64,
- FromSafeCast, //
- },
- regs,
+ num::FromSafeCast,
+ regs, //
};
mod hal;
@@ -46,21 +42,20 @@ mod hal;
/// Because of this, the sysmem flush memory page must be registered as early as possible during
/// driver initialization, and before any falcon is reset.
///
-/// Users are responsible for manually calling [`Self::unregister`] before dropping this object,
-/// otherwise the GPU might still use it even after it has been freed.
-pub(crate) struct SysmemFlush {
+pub(crate) struct SysmemFlush<'sys> {
/// Chipset we are operating on.
chipset: Chipset,
- device: ARef<device::Device>,
+ device: &'sys device::Device,
+ bar: Bar0<'sys>,
/// Keep the page alive as long as we need it.
page: CoherentHandle,
}
-impl SysmemFlush {
+impl<'sys> SysmemFlush<'sys> {
/// Allocate a memory page and register it as the sysmem flush page.
pub(crate) fn register(
- dev: &device::Device<device::Bound>,
- bar: &Bar0,
+ dev: &'sys device::Device<device::Bound>,
+ bar: Bar0<'sys>,
chipset: Chipset,
) -> Result<Self> {
let page = CoherentHandle::alloc(dev, kernel::page::PAGE_SIZE, GFP_KERNEL)?;
@@ -69,20 +64,19 @@ impl SysmemFlush {
Ok(Self {
chipset,
- device: dev.into(),
+ device: dev,
+ bar,
page,
})
}
+}
- /// Unregister the managed sysmem flush page.
- ///
- /// In order to gracefully tear down the GPU, users must make sure to call this method before
- /// dropping the object.
- pub(crate) fn unregister(&self, bar: &Bar0) {
+impl Drop for SysmemFlush<'_> {
+ fn drop(&mut self) {
let hal = hal::fb_hal(self.chipset);
- if hal.read_sysmem_flush_page(bar) == self.page.dma_handle() {
- let _ = hal.write_sysmem_flush_page(bar, 0).inspect_err(|e| {
+ if hal.read_sysmem_flush_page(self.bar) == self.page.dma_handle() {
+ let _ = hal.write_sysmem_flush_page(self.bar, 0).inspect_err(|e| {
dev_warn!(
&self.device,
"failed to unregister sysmem flush page: {:?}\n",
@@ -127,8 +121,8 @@ impl fmt::Debug for FbRange {
if f.alternate() {
let size = self.len();
- if size < usize_as_u64(SZ_1M) {
- let size_kib = size / usize_as_u64(SZ_1K);
+ if size < u64::SZ_1M {
+ let size_kib = size / u64::SZ_1K;
f.write_fmt(fmt!(
"{:#x}..{:#x} ({} KiB)",
self.0.start,
@@ -136,7 +130,7 @@ impl fmt::Debug for FbRange {
size_kib
))
} else {
- let size_mib = size / usize_as_u64(SZ_1M);
+ let size_mib = size / u64::SZ_1M;
f.write_fmt(fmt!(
"{:#x}..{:#x} ({} MiB)",
self.0.start,
@@ -171,11 +165,13 @@ pub(crate) struct FbLayout {
pub(crate) wpr2: FbRange,
pub(crate) heap: FbRange,
pub(crate) vf_partition_count: u8,
+ /// PMU reserved memory size, in bytes.
+ pub(crate) pmu_reserved_size: u32,
}
impl FbLayout {
/// Computes the FB layout for `chipset` required to run the `gsp_fw` GSP firmware.
- pub(crate) fn new(chipset: Chipset, bar: &Bar0, gsp_fw: &GspFirmware) -> Result<Self> {
+ pub(crate) fn new(chipset: Chipset, bar: Bar0<'_>, gsp_fw: &GspFirmware) -> Result<Self> {
let hal = hal::fb_hal(chipset);
let fb = {
@@ -186,7 +182,7 @@ impl FbLayout {
let vga_workspace = {
let vga_base = {
- const NV_PRAMIN_SIZE: u64 = usize_as_u64(SZ_1M);
+ const NV_PRAMIN_SIZE: u64 = u64::SZ_1M;
let base = fb.end - NV_PRAMIN_SIZE;
if hal.supports_display(bar) {
@@ -196,7 +192,7 @@ impl FbLayout {
{
Some(addr) => {
if addr < base {
- const VBIOS_WORKSPACE_SIZE: u64 = usize_as_u64(SZ_128K);
+ const VBIOS_WORKSPACE_SIZE: u64 = u64::SZ_128K;
// Point workspace address to end of framebuffer.
fb.end - VBIOS_WORKSPACE_SIZE
@@ -216,10 +212,10 @@ impl FbLayout {
let frts = {
const FRTS_DOWN_ALIGN: Alignment = Alignment::new::<SZ_128K>();
- const FRTS_SIZE: u64 = usize_as_u64(SZ_1M);
- let frts_base = vga_workspace.start.align_down(FRTS_DOWN_ALIGN) - FRTS_SIZE;
+ let frts_size: u64 = hal.frts_size();
+ let frts_base = vga_workspace.start.align_down(FRTS_DOWN_ALIGN) - frts_size;
- FbRange(frts_base..frts_base + FRTS_SIZE)
+ FbRange(frts_base..frts_base + frts_size)
};
let boot = {
@@ -241,7 +237,7 @@ impl FbLayout {
let wpr2_heap = {
const WPR2_HEAP_DOWN_ALIGN: Alignment = Alignment::new::<SZ_1M>();
let wpr2_heap_size =
- gsp::LibosParams::from_chipset(chipset).wpr_heap_size(chipset, fb.end);
+ gsp::LibosParams::from_chipset(chipset).wpr_heap_size(chipset, fb.end)?;
let wpr2_heap_addr = (elf.start - wpr2_heap_size).align_down(WPR2_HEAP_DOWN_ALIGN);
FbRange(wpr2_heap_addr..(elf.start).align_down(WPR2_HEAP_DOWN_ALIGN))
@@ -256,9 +252,8 @@ impl FbLayout {
};
let heap = {
- const HEAP_SIZE: u64 = usize_as_u64(SZ_1M);
-
- FbRange(wpr2.start - HEAP_SIZE..wpr2.start)
+ let heap_size = u64::from(hal.non_wpr_heap_size());
+ FbRange(wpr2.start - heap_size..wpr2.start)
};
Ok(Self {
@@ -271,6 +266,7 @@ impl FbLayout {
wpr2,
heap,
vf_partition_count: 0,
+ pmu_reserved_size: hal.pmu_reserved_size(),
})
}
}
diff --git a/drivers/gpu/nova-core/fb/hal.rs b/drivers/gpu/nova-core/fb/hal.rs
index aba0abd8ee00..714f0b51cd8f 100644
--- a/drivers/gpu/nova-core/fb/hal.rs
+++ b/drivers/gpu/nova-core/fb/hal.rs
@@ -1,41 +1,56 @@
// SPDX-License-Identifier: GPL-2.0
+// SPDX-FileCopyrightText: Copyright (c) 2025-2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
use kernel::prelude::*;
use crate::{
driver::Bar0,
- gpu::Chipset, //
+ gpu::{
+ Architecture,
+ Chipset, //
+ },
};
mod ga100;
mod ga102;
+mod gb100;
+mod gb202;
+mod gh100;
mod tu102;
pub(crate) trait FbHal {
/// Returns the address of the currently-registered sysmem flush page.
- fn read_sysmem_flush_page(&self, bar: &Bar0) -> u64;
+ fn read_sysmem_flush_page(&self, bar: Bar0<'_>) -> u64;
/// Register `addr` as the address of the sysmem flush page.
///
/// This might fail if the address is too large for the receiving register.
- fn write_sysmem_flush_page(&self, bar: &Bar0, addr: u64) -> Result;
+ fn write_sysmem_flush_page(&self, bar: Bar0<'_>, addr: u64) -> Result;
/// Returns `true` is display is supported.
- fn supports_display(&self, bar: &Bar0) -> bool;
+ fn supports_display(&self, bar: Bar0<'_>) -> bool;
/// Returns the VRAM size, in bytes.
- fn vidmem_size(&self, bar: &Bar0) -> u64;
+ fn vidmem_size(&self, bar: Bar0<'_>) -> u64;
+
+ /// Returns the amount of VRAM to reserve for the PMU.
+ fn pmu_reserved_size(&self) -> u32;
+
+ /// Returns the non-WPR heap size for this chipset, in bytes.
+ fn non_wpr_heap_size(&self) -> u32;
+
+ /// Returns the FRTS size, in bytes.
+ fn frts_size(&self) -> u64;
}
/// Returns the HAL corresponding to `chipset`.
pub(super) fn fb_hal(chipset: Chipset) -> &'static dyn FbHal {
- use Chipset::*;
-
- match chipset {
- TU102 | TU104 | TU106 | TU117 | TU116 => tu102::TU102_HAL,
- GA100 => ga100::GA100_HAL,
- GA102 | GA103 | GA104 | GA106 | GA107 | AD102 | AD103 | AD104 | AD106 | AD107 => {
- ga102::GA102_HAL
- }
+ match chipset.arch() {
+ Architecture::Turing => tu102::TU102_HAL,
+ Architecture::Ampere if chipset == Chipset::GA100 => ga100::GA100_HAL,
+ Architecture::Ampere | Architecture::Ada => ga102::GA102_HAL,
+ Architecture::Hopper => gh100::GH100_HAL,
+ Architecture::BlackwellGB10x => gb100::GB100_HAL,
+ Architecture::BlackwellGB20x => gb202::GB202_HAL,
}
}
diff --git a/drivers/gpu/nova-core/fb/hal/ga100.rs b/drivers/gpu/nova-core/fb/hal/ga100.rs
index 1c03783cddef..3cc1caf361c7 100644
--- a/drivers/gpu/nova-core/fb/hal/ga100.rs
+++ b/drivers/gpu/nova-core/fb/hal/ga100.rs
@@ -1,4 +1,5 @@
// SPDX-License-Identifier: GPL-2.0
+// SPDX-FileCopyrightText: Copyright (c) 2025-2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
use kernel::{
io::Io,
@@ -16,13 +17,13 @@ use super::tu102::FLUSH_SYSMEM_ADDR_SHIFT;
struct Ga100;
-pub(super) fn read_sysmem_flush_page_ga100(bar: &Bar0) -> u64 {
+pub(super) fn read_sysmem_flush_page_ga100(bar: Bar0<'_>) -> u64 {
u64::from(bar.read(regs::NV_PFB_NISO_FLUSH_SYSMEM_ADDR).adr_39_08()) << FLUSH_SYSMEM_ADDR_SHIFT
| u64::from(bar.read(regs::NV_PFB_NISO_FLUSH_SYSMEM_ADDR_HI).adr_63_40())
<< FLUSH_SYSMEM_ADDR_SHIFT_HI
}
-pub(super) fn write_sysmem_flush_page_ga100(bar: &Bar0, addr: u64) {
+pub(super) fn write_sysmem_flush_page_ga100(bar: Bar0<'_>, addr: u64) {
bar.write_reg(
regs::NV_PFB_NISO_FLUSH_SYSMEM_ADDR_HI::zeroed().with_adr_63_40(
Bounded::<u64, _>::from(addr)
@@ -39,7 +40,7 @@ pub(super) fn write_sysmem_flush_page_ga100(bar: &Bar0, addr: u64) {
);
}
-pub(super) fn display_enabled_ga100(bar: &Bar0) -> bool {
+pub(super) fn display_enabled_ga100(bar: Bar0<'_>) -> bool {
!bar.read(regs::ga100::NV_FUSE_STATUS_OPT_DISPLAY)
.display_disabled()
}
@@ -49,23 +50,37 @@ pub(super) fn display_enabled_ga100(bar: &Bar0) -> bool {
const FLUSH_SYSMEM_ADDR_SHIFT_HI: u32 = 40;
impl FbHal for Ga100 {
- fn read_sysmem_flush_page(&self, bar: &Bar0) -> u64 {
+ fn read_sysmem_flush_page(&self, bar: Bar0<'_>) -> u64 {
read_sysmem_flush_page_ga100(bar)
}
- fn write_sysmem_flush_page(&self, bar: &Bar0, addr: u64) -> Result {
+ fn write_sysmem_flush_page(&self, bar: Bar0<'_>, addr: u64) -> Result {
write_sysmem_flush_page_ga100(bar, addr);
Ok(())
}
- fn supports_display(&self, bar: &Bar0) -> bool {
+ fn supports_display(&self, bar: Bar0<'_>) -> bool {
display_enabled_ga100(bar)
}
- fn vidmem_size(&self, bar: &Bar0) -> u64 {
+ fn vidmem_size(&self, bar: Bar0<'_>) -> u64 {
super::tu102::vidmem_size_gp102(bar)
}
+
+ fn pmu_reserved_size(&self) -> u32 {
+ super::tu102::pmu_reserved_size_tu102()
+ }
+
+ fn non_wpr_heap_size(&self) -> u32 {
+ super::tu102::non_wpr_heap_size_tu102()
+ }
+
+ // GA100 is a special case where its FRTS region exists, but is empty. We
+ // return a size of 0 because we still need to record where the region starts.
+ fn frts_size(&self) -> u64 {
+ 0
+ }
}
const GA100: Ga100 = Ga100;
diff --git a/drivers/gpu/nova-core/fb/hal/ga102.rs b/drivers/gpu/nova-core/fb/hal/ga102.rs
index 4b9f0f74d0e7..44a2cf8a00f1 100644
--- a/drivers/gpu/nova-core/fb/hal/ga102.rs
+++ b/drivers/gpu/nova-core/fb/hal/ga102.rs
@@ -1,4 +1,5 @@
// SPDX-License-Identifier: GPL-2.0
+// SPDX-FileCopyrightText: Copyright (c) 2025-2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
use kernel::{
io::Io,
@@ -11,30 +12,42 @@ use crate::{
regs, //
};
-fn vidmem_size_ga102(bar: &Bar0) -> u64 {
+pub(super) fn vidmem_size_ga102(bar: Bar0<'_>) -> u64 {
bar.read(regs::NV_USABLE_FB_SIZE_IN_MB).usable_fb_size()
}
struct Ga102;
impl FbHal for Ga102 {
- fn read_sysmem_flush_page(&self, bar: &Bar0) -> u64 {
+ fn read_sysmem_flush_page(&self, bar: Bar0<'_>) -> u64 {
super::ga100::read_sysmem_flush_page_ga100(bar)
}
- fn write_sysmem_flush_page(&self, bar: &Bar0, addr: u64) -> Result {
+ fn write_sysmem_flush_page(&self, bar: Bar0<'_>, addr: u64) -> Result {
super::ga100::write_sysmem_flush_page_ga100(bar, addr);
Ok(())
}
- fn supports_display(&self, bar: &Bar0) -> bool {
+ fn supports_display(&self, bar: Bar0<'_>) -> bool {
super::ga100::display_enabled_ga100(bar)
}
- fn vidmem_size(&self, bar: &Bar0) -> u64 {
+ fn vidmem_size(&self, bar: Bar0<'_>) -> u64 {
vidmem_size_ga102(bar)
}
+
+ fn pmu_reserved_size(&self) -> u32 {
+ super::tu102::pmu_reserved_size_tu102()
+ }
+
+ fn non_wpr_heap_size(&self) -> u32 {
+ super::tu102::non_wpr_heap_size_tu102()
+ }
+
+ fn frts_size(&self) -> u64 {
+ super::tu102::frts_size_tu102()
+ }
}
const GA102: Ga102 = Ga102;
diff --git a/drivers/gpu/nova-core/fb/hal/gb100.rs b/drivers/gpu/nova-core/fb/hal/gb100.rs
new file mode 100644
index 000000000000..6e0eba101ca1
--- /dev/null
+++ b/drivers/gpu/nova-core/fb/hal/gb100.rs
@@ -0,0 +1,122 @@
+// SPDX-License-Identifier: GPL-2.0
+// SPDX-FileCopyrightText: Copyright (c) 2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+
+//! Blackwell GB10x framebuffer HAL.
+
+use kernel::{
+ io::{
+ register::{
+ RegisterBase,
+ WithBase, //
+ },
+ Io, //
+ },
+ num::Bounded,
+ prelude::*,
+ ptr::{
+ const_align_up,
+ Alignment, //
+ },
+ sizes::*, //
+};
+
+use crate::{
+ driver::Bar0,
+ fb::hal::FbHal,
+ num::usize_into_u32,
+ regs, //
+};
+
+struct Gb100;
+
+impl RegisterBase<regs::Hshub0Base> for Gb100 {
+ const BASE: usize = 0x0087_0000;
+}
+
+fn read_sysmem_flush_page_gb100(bar: Bar0<'_>) -> u64 {
+ let lo = u64::from(
+ bar.read(regs::NV_PFB_HSHUB_PCIE_FLUSH_SYSMEM_ADDR_LO::of::<Gb100>())
+ .adr(),
+ );
+ let hi = u64::from(
+ bar.read(regs::NV_PFB_HSHUB_PCIE_FLUSH_SYSMEM_ADDR_HI::of::<Gb100>())
+ .adr(),
+ );
+
+ lo | (hi << 32)
+}
+
+/// Write the sysmem flush page address through the GB10x HSHUB0 registers.
+///
+/// Both the primary and EG (egress) register pairs must be programmed to the same address,
+/// as required by hardware.
+fn write_sysmem_flush_page_gb100(bar: Bar0<'_>, addr: Bounded<u64, 52>) {
+ // CAST: lower 32 bits. Hardware ignores bits 7:0.
+ let addr_lo = *addr as u32;
+ let addr_hi = addr.shr::<32, 20>().cast::<u32>();
+
+ // Write HI first. The hardware will trigger the flush on the LO write.
+
+ // Primary HSHUB pair.
+ bar.write(
+ regs::NV_PFB_HSHUB_PCIE_FLUSH_SYSMEM_ADDR_HI::of::<Gb100>(),
+ regs::NV_PFB_HSHUB_PCIE_FLUSH_SYSMEM_ADDR_HI::zeroed().with_adr(addr_hi),
+ );
+ bar.write(
+ regs::NV_PFB_HSHUB_PCIE_FLUSH_SYSMEM_ADDR_LO::of::<Gb100>(),
+ regs::NV_PFB_HSHUB_PCIE_FLUSH_SYSMEM_ADDR_LO::zeroed().with_adr(addr_lo),
+ );
+
+ // EG (egress) pair -- must match the primary pair.
+ bar.write(
+ regs::NV_PFB_HSHUB_EG_PCIE_FLUSH_SYSMEM_ADDR_HI::of::<Gb100>(),
+ regs::NV_PFB_HSHUB_EG_PCIE_FLUSH_SYSMEM_ADDR_HI::zeroed().with_adr(addr_hi),
+ );
+ bar.write(
+ regs::NV_PFB_HSHUB_EG_PCIE_FLUSH_SYSMEM_ADDR_LO::of::<Gb100>(),
+ regs::NV_PFB_HSHUB_EG_PCIE_FLUSH_SYSMEM_ADDR_LO::zeroed().with_adr(addr_lo),
+ );
+}
+
+pub(super) const fn pmu_reserved_size_gb100() -> u32 {
+ usize_into_u32::<{ const_align_up(SZ_8M + SZ_16M + SZ_4K, Alignment::new::<SZ_128K>()).unwrap() }>(
+ )
+}
+
+impl FbHal for Gb100 {
+ fn read_sysmem_flush_page(&self, bar: Bar0<'_>) -> u64 {
+ read_sysmem_flush_page_gb100(bar)
+ }
+
+ fn write_sysmem_flush_page(&self, bar: Bar0<'_>, addr: u64) -> Result {
+ let addr = Bounded::<u64, 52>::try_new(addr).ok_or(EINVAL)?;
+
+ write_sysmem_flush_page_gb100(bar, addr);
+
+ Ok(())
+ }
+
+ fn supports_display(&self, bar: Bar0<'_>) -> bool {
+ super::ga100::display_enabled_ga100(bar)
+ }
+
+ fn vidmem_size(&self, bar: Bar0<'_>) -> u64 {
+ super::ga102::vidmem_size_ga102(bar)
+ }
+
+ fn pmu_reserved_size(&self) -> u32 {
+ pmu_reserved_size_gb100()
+ }
+
+ fn non_wpr_heap_size(&self) -> u32 {
+ // Non-WPR heap for GB10x (see Open RM: kgspGetNonWprHeapSize, GB100/GB102).
+ u32::SZ_2M
+ }
+
+ fn frts_size(&self) -> u64 {
+ super::tu102::frts_size_tu102()
+ }
+}
+
+const GB100: Gb100 = Gb100;
+pub(super) const GB100_HAL: &dyn FbHal = &GB100;
diff --git a/drivers/gpu/nova-core/fb/hal/gb202.rs b/drivers/gpu/nova-core/fb/hal/gb202.rs
new file mode 100644
index 000000000000..038d1278c634
--- /dev/null
+++ b/drivers/gpu/nova-core/fb/hal/gb202.rs
@@ -0,0 +1,95 @@
+// SPDX-License-Identifier: GPL-2.0
+// SPDX-FileCopyrightText: Copyright (c) 2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+
+//! Blackwell GB20x framebuffer HAL.
+
+use kernel::{
+ io::{
+ register::{
+ RegisterBase,
+ WithBase, //
+ },
+ Io, //
+ },
+ num::Bounded,
+ prelude::*,
+ sizes::SizeConstants, //
+};
+
+use crate::{
+ driver::Bar0,
+ fb::hal::FbHal,
+ regs, //
+};
+
+struct Gb202;
+
+impl RegisterBase<regs::Fbhub0Base> for Gb202 {
+ const BASE: usize = 0x008a_0000;
+}
+
+fn read_sysmem_flush_page_gb202(bar: Bar0<'_>) -> u64 {
+ let lo = u64::from(
+ bar.read(regs::NV_PFB_FBHUB_PCIE_FLUSH_SYSMEM_ADDR_LO::of::<Gb202>())
+ .adr(),
+ );
+ let hi = u64::from(
+ bar.read(regs::NV_PFB_FBHUB_PCIE_FLUSH_SYSMEM_ADDR_HI::of::<Gb202>())
+ .adr(),
+ );
+
+ lo | (hi << 32)
+}
+
+/// Write the sysmem flush page address through the GB20x FBHUB0 registers.
+fn write_sysmem_flush_page_gb202(bar: Bar0<'_>, addr: Bounded<u64, 52>) {
+ // Write HI first. The hardware will trigger the flush on the LO write.
+ bar.write(
+ regs::NV_PFB_FBHUB_PCIE_FLUSH_SYSMEM_ADDR_HI::of::<Gb202>(),
+ regs::NV_PFB_FBHUB_PCIE_FLUSH_SYSMEM_ADDR_HI::zeroed()
+ .with_adr(addr.shr::<32, 20>().cast::<u32>()),
+ );
+ bar.write(
+ regs::NV_PFB_FBHUB_PCIE_FLUSH_SYSMEM_ADDR_LO::of::<Gb202>(),
+ // CAST: lower 32 bits. Hardware ignores bits 7:0.
+ regs::NV_PFB_FBHUB_PCIE_FLUSH_SYSMEM_ADDR_LO::zeroed().with_adr(*addr as u32),
+ );
+}
+
+impl FbHal for Gb202 {
+ fn read_sysmem_flush_page(&self, bar: Bar0<'_>) -> u64 {
+ read_sysmem_flush_page_gb202(bar)
+ }
+
+ fn write_sysmem_flush_page(&self, bar: Bar0<'_>, addr: u64) -> Result {
+ let addr = Bounded::<u64, 52>::try_new(addr).ok_or(EINVAL)?;
+
+ write_sysmem_flush_page_gb202(bar, addr);
+
+ Ok(())
+ }
+
+ fn supports_display(&self, bar: Bar0<'_>) -> bool {
+ super::ga100::display_enabled_ga100(bar)
+ }
+
+ fn vidmem_size(&self, bar: Bar0<'_>) -> u64 {
+ super::ga102::vidmem_size_ga102(bar)
+ }
+
+ fn pmu_reserved_size(&self) -> u32 {
+ super::gb100::pmu_reserved_size_gb100()
+ }
+
+ fn non_wpr_heap_size(&self) -> u32 {
+ // Non-WPR heap for GB20x (see Open RM: kgspGetNonWprHeapSize, GB202+).
+ u32::SZ_2M + u32::SZ_128K
+ }
+
+ fn frts_size(&self) -> u64 {
+ super::tu102::frts_size_tu102()
+ }
+}
+
+const GB202: Gb202 = Gb202;
+pub(super) const GB202_HAL: &dyn FbHal = &GB202;
diff --git a/drivers/gpu/nova-core/fb/hal/gh100.rs b/drivers/gpu/nova-core/fb/hal/gh100.rs
new file mode 100644
index 000000000000..5450c7254dad
--- /dev/null
+++ b/drivers/gpu/nova-core/fb/hal/gh100.rs
@@ -0,0 +1,50 @@
+// SPDX-License-Identifier: GPL-2.0
+// SPDX-FileCopyrightText: Copyright (c) 2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+
+use kernel::{
+ prelude::*,
+ sizes::SizeConstants, //
+};
+
+use crate::{
+ driver::Bar0,
+ fb::hal::FbHal, //
+};
+
+struct Gh100;
+
+impl FbHal for Gh100 {
+ fn read_sysmem_flush_page(&self, bar: Bar0<'_>) -> u64 {
+ super::ga100::read_sysmem_flush_page_ga100(bar)
+ }
+
+ fn write_sysmem_flush_page(&self, bar: Bar0<'_>, addr: u64) -> Result {
+ super::ga100::write_sysmem_flush_page_ga100(bar, addr);
+
+ Ok(())
+ }
+
+ fn supports_display(&self, bar: Bar0<'_>) -> bool {
+ super::ga100::display_enabled_ga100(bar)
+ }
+
+ fn vidmem_size(&self, bar: Bar0<'_>) -> u64 {
+ super::ga102::vidmem_size_ga102(bar)
+ }
+
+ fn pmu_reserved_size(&self) -> u32 {
+ super::tu102::pmu_reserved_size_tu102()
+ }
+
+ fn non_wpr_heap_size(&self) -> u32 {
+ // Non-WPR heap for Hopper (see Open RM: kgspCalculateFbLayout_GH100).
+ u32::SZ_2M
+ }
+
+ fn frts_size(&self) -> u64 {
+ super::tu102::frts_size_tu102()
+ }
+}
+
+const GH100: Gh100 = Gh100;
+pub(super) const GH100_HAL: &dyn FbHal = &GH100;
diff --git a/drivers/gpu/nova-core/fb/hal/tu102.rs b/drivers/gpu/nova-core/fb/hal/tu102.rs
index 281bb796e198..f629e8e9d5d5 100644
--- a/drivers/gpu/nova-core/fb/hal/tu102.rs
+++ b/drivers/gpu/nova-core/fb/hal/tu102.rs
@@ -1,8 +1,10 @@
// SPDX-License-Identifier: GPL-2.0
+// SPDX-FileCopyrightText: Copyright (c) 2025-2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
use kernel::{
io::Io,
- prelude::*, //
+ prelude::*,
+ sizes::*, //
};
use crate::{
@@ -15,11 +17,11 @@ use crate::{
/// to be used by HALs.
pub(super) const FLUSH_SYSMEM_ADDR_SHIFT: u32 = 8;
-pub(super) fn read_sysmem_flush_page_gm107(bar: &Bar0) -> u64 {
+pub(super) fn read_sysmem_flush_page_gm107(bar: Bar0<'_>) -> u64 {
u64::from(bar.read(regs::NV_PFB_NISO_FLUSH_SYSMEM_ADDR).adr_39_08()) << FLUSH_SYSMEM_ADDR_SHIFT
}
-pub(super) fn write_sysmem_flush_page_gm107(bar: &Bar0, addr: u64) -> Result {
+pub(super) fn write_sysmem_flush_page_gm107(bar: Bar0<'_>, addr: u64) -> Result {
// Check that the address doesn't overflow the receiving 32-bit register.
u32::try_from(addr >> FLUSH_SYSMEM_ADDR_SHIFT)
.map_err(|_| EINVAL)
@@ -28,34 +30,58 @@ pub(super) fn write_sysmem_flush_page_gm107(bar: &Bar0, addr: u64) -> Result {
})
}
-pub(super) fn display_enabled_gm107(bar: &Bar0) -> bool {
+pub(super) fn display_enabled_gm107(bar: Bar0<'_>) -> bool {
!bar.read(regs::gm107::NV_FUSE_STATUS_OPT_DISPLAY)
.display_disabled()
}
-pub(super) fn vidmem_size_gp102(bar: &Bar0) -> u64 {
+pub(super) fn vidmem_size_gp102(bar: Bar0<'_>) -> u64 {
bar.read(regs::NV_PFB_PRI_MMU_LOCAL_MEMORY_RANGE)
.usable_fb_size()
}
+pub(super) const fn pmu_reserved_size_tu102() -> u32 {
+ 0
+}
+
+pub(super) const fn non_wpr_heap_size_tu102() -> u32 {
+ u32::SZ_1M
+}
+
+pub(super) const fn frts_size_tu102() -> u64 {
+ u64::SZ_1M
+}
+
struct Tu102;
impl FbHal for Tu102 {
- fn read_sysmem_flush_page(&self, bar: &Bar0) -> u64 {
+ fn read_sysmem_flush_page(&self, bar: Bar0<'_>) -> u64 {
read_sysmem_flush_page_gm107(bar)
}
- fn write_sysmem_flush_page(&self, bar: &Bar0, addr: u64) -> Result {
+ fn write_sysmem_flush_page(&self, bar: Bar0<'_>, addr: u64) -> Result {
write_sysmem_flush_page_gm107(bar, addr)
}
- fn supports_display(&self, bar: &Bar0) -> bool {
+ fn supports_display(&self, bar: Bar0<'_>) -> bool {
display_enabled_gm107(bar)
}
- fn vidmem_size(&self, bar: &Bar0) -> u64 {
+ fn vidmem_size(&self, bar: Bar0<'_>) -> u64 {
vidmem_size_gp102(bar)
}
+
+ fn pmu_reserved_size(&self) -> u32 {
+ pmu_reserved_size_tu102()
+ }
+
+ fn non_wpr_heap_size(&self) -> u32 {
+ non_wpr_heap_size_tu102()
+ }
+
+ fn frts_size(&self) -> u64 {
+ frts_size_tu102()
+ }
}
const TU102: Tu102 = Tu102;
diff --git a/drivers/gpu/nova-core/firmware.rs b/drivers/gpu/nova-core/firmware.rs
index 6c2ab69cb605..366d3b76360e 100644
--- a/drivers/gpu/nova-core/firmware.rs
+++ b/drivers/gpu/nova-core/firmware.rs
@@ -1,4 +1,5 @@
// SPDX-License-Identifier: GPL-2.0
+// SPDX-FileCopyrightText: Copyright (c) 2025-2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
//! Contains structures and functions dedicated to the parsing, building and patching of firmwares
//! to be loaded into a given execution unit.
@@ -27,6 +28,7 @@ use crate::{
};
pub(crate) mod booter;
+pub(crate) mod fsp;
pub(crate) mod fwsec;
pub(crate) mod gsp;
pub(crate) mod riscv;
@@ -388,13 +390,7 @@ impl<'a> BinFirmware<'a> {
// Extract header.
.and_then(BinHdr::from_bytes_copy)
// Validate header.
- .and_then(|hdr| {
- if hdr.bin_magic == BIN_MAGIC {
- Some(hdr)
- } else {
- None
- }
- })
+ .filter(|hdr| hdr.bin_magic == BIN_MAGIC)
.map(|hdr| Self { hdr, fw })
.ok_or(EINVAL)
}
@@ -436,10 +432,16 @@ impl<const N: usize> ModInfoBuilder<N> {
.make_entry_file(name, "bootloader")
.make_entry_file(name, "gsp");
- if chipset.needs_fwsec_bootloader() {
+ let this = if chipset.needs_fwsec_bootloader() {
this.make_entry_file(name, "gen_bootloader")
} else {
this
+ };
+
+ if chipset.uses_fsp() {
+ this.make_entry_file(name, "fmc")
+ } else {
+ this
}
}
@@ -473,17 +475,119 @@ mod elf {
transmute::FromBytes, //
};
+ /// Trait to abstract over ELF header differences.
+ trait ElfHeader: FromBytes {
+ fn shnum(&self) -> u16;
+ fn shoff(&self) -> u64;
+ fn shstrndx(&self) -> u16;
+ }
+
+ /// Trait to abstract over ELF section-header differences.
+ trait ElfSectionHeader: FromBytes {
+ fn name(&self) -> u32;
+ fn offset(&self) -> u64;
+ fn size(&self) -> u64;
+ }
+
+ /// Trait describing a matching ELF header and section-header format.
+ trait ElfFormat {
+ type Header: ElfHeader;
+ type SectionHeader: ElfSectionHeader;
+ }
+
/// Newtype to provide a [`FromBytes`] implementation.
#[repr(transparent)]
struct Elf64Hdr(bindings::elf64_hdr);
// SAFETY: all bit patterns are valid for this type, and it doesn't use interior mutability.
unsafe impl FromBytes for Elf64Hdr {}
+ impl ElfHeader for Elf64Hdr {
+ fn shnum(&self) -> u16 {
+ self.0.e_shnum
+ }
+
+ fn shoff(&self) -> u64 {
+ self.0.e_shoff
+ }
+
+ fn shstrndx(&self) -> u16 {
+ self.0.e_shstrndx
+ }
+ }
+
#[repr(transparent)]
struct Elf64SHdr(bindings::elf64_shdr);
// SAFETY: all bit patterns are valid for this type, and it doesn't use interior mutability.
unsafe impl FromBytes for Elf64SHdr {}
+ impl ElfSectionHeader for Elf64SHdr {
+ fn name(&self) -> u32 {
+ self.0.sh_name
+ }
+
+ fn offset(&self) -> u64 {
+ self.0.sh_offset
+ }
+
+ fn size(&self) -> u64 {
+ self.0.sh_size
+ }
+ }
+
+ struct Elf64Format;
+
+ impl ElfFormat for Elf64Format {
+ type Header = Elf64Hdr;
+ type SectionHeader = Elf64SHdr;
+ }
+
+ /// Newtype to provide [`FromBytes`] and [`ElfHeader`] implementations for ELF32.
+ #[repr(transparent)]
+ struct Elf32Hdr(bindings::elf32_hdr);
+ // SAFETY: all bit patterns are valid for this type, and it doesn't use interior mutability.
+ unsafe impl FromBytes for Elf32Hdr {}
+
+ impl ElfHeader for Elf32Hdr {
+ fn shnum(&self) -> u16 {
+ self.0.e_shnum
+ }
+
+ fn shoff(&self) -> u64 {
+ u64::from(self.0.e_shoff)
+ }
+
+ fn shstrndx(&self) -> u16 {
+ self.0.e_shstrndx
+ }
+ }
+
+ /// Newtype to provide [`FromBytes`] and [`ElfSectionHeader`] implementations for ELF32.
+ #[repr(transparent)]
+ struct Elf32SHdr(bindings::elf32_shdr);
+ // SAFETY: all bit patterns are valid for this type, and it doesn't use interior mutability.
+ unsafe impl FromBytes for Elf32SHdr {}
+
+ impl ElfSectionHeader for Elf32SHdr {
+ fn name(&self) -> u32 {
+ self.0.sh_name
+ }
+
+ fn offset(&self) -> u64 {
+ u64::from(self.0.sh_offset)
+ }
+
+ fn size(&self) -> u64 {
+ u64::from(self.0.sh_size)
+ }
+ }
+
+ struct Elf32Format;
+
+ impl ElfFormat for Elf32Format {
+ type Header = Elf32Hdr;
+ type SectionHeader = Elf32SHdr;
+ }
+
/// Returns a NULL-terminated string from the ELF image at `offset`.
fn elf_str(elf: &[u8], offset: u64) -> Option<&str> {
let idx = usize::try_from(offset).ok()?;
@@ -491,47 +595,74 @@ mod elf {
CStr::from_bytes_until_nul(bytes).ok()?.to_str().ok()
}
- /// Tries to extract section with name `name` from the ELF64 image `elf`, and returns it.
- pub(super) fn elf64_section<'a, 'b>(elf: &'a [u8], name: &'b str) -> Option<&'a [u8]> {
- let hdr = &elf
- .get(0..size_of::<bindings::elf64_hdr>())
- .and_then(Elf64Hdr::from_bytes)?
- .0;
+ fn elf_section_generic<'a, F>(elf: &'a [u8], name: &str) -> Option<&'a [u8]>
+ where
+ F: ElfFormat,
+ {
+ let hdr = F::Header::from_bytes(elf.get(0..size_of::<F::Header>())?)?;
- // Get all the section headers.
- let mut shdr = {
- let shdr_num = usize::from(hdr.e_shnum);
- let shdr_start = usize::try_from(hdr.e_shoff).ok()?;
- let shdr_end = shdr_num
- .checked_mul(size_of::<Elf64SHdr>())
- .and_then(|v| v.checked_add(shdr_start))?;
+ let shdr_num = usize::from(hdr.shnum());
+ let shdr_start = usize::try_from(hdr.shoff()).ok()?;
+ let shdr_end = shdr_num
+ .checked_mul(size_of::<F::SectionHeader>())
+ .and_then(|v| v.checked_add(shdr_start))?;
- elf.get(shdr_start..shdr_end)
- .map(|slice| slice.chunks_exact(size_of::<Elf64SHdr>()))?
- };
+ // Get all the section headers as an iterator over byte chunks.
+ let shdr_bytes = elf.get(shdr_start..shdr_end)?;
+ let mut shdr_iter = shdr_bytes.chunks_exact(size_of::<F::SectionHeader>());
// Get the strings table.
- let strhdr = shdr
+ let strhdr = shdr_iter
.clone()
- .nth(usize::from(hdr.e_shstrndx))
- .and_then(Elf64SHdr::from_bytes)?;
+ .nth(usize::from(hdr.shstrndx()))
+ .and_then(F::SectionHeader::from_bytes)?;
// Find the section which name matches `name` and return it.
- shdr.find_map(|sh| {
- let hdr = Elf64SHdr::from_bytes(sh)?;
- let name_offset = strhdr.0.sh_offset.checked_add(u64::from(hdr.0.sh_name))?;
+ shdr_iter.find_map(|sh_bytes| {
+ let sh = F::SectionHeader::from_bytes(sh_bytes)?;
+ let name_offset = strhdr.offset().checked_add(u64::from(sh.name()))?;
let section_name = elf_str(elf, name_offset)?;
if section_name != name {
return None;
}
- let start = usize::try_from(hdr.0.sh_offset).ok()?;
- let end = usize::try_from(hdr.0.sh_size)
+ let start = usize::try_from(sh.offset()).ok()?;
+ let end = usize::try_from(sh.size())
.ok()
- .and_then(|sh_size| start.checked_add(sh_size))?;
+ .and_then(|sz| start.checked_add(sz))?;
elf.get(start..end)
})
}
+
+ /// Extract the section with name `name` from the ELF64 image `elf`.
+ fn elf64_section<'a>(elf: &'a [u8], name: &str) -> Option<&'a [u8]> {
+ elf_section_generic::<Elf64Format>(elf, name)
+ }
+
+ /// Extract the section with name `name` from the ELF32 image `elf`.
+ fn elf32_section<'a>(elf: &'a [u8], name: &str) -> Option<&'a [u8]> {
+ elf_section_generic::<Elf32Format>(elf, name)
+ }
+
+ /// Automatically detects ELF32 vs ELF64 based on the ELF header.
+ pub(super) fn elf_section<'a>(elf: &'a [u8], name: &str) -> Option<&'a [u8]> {
+ // ELF identification: a 4-byte magic followed by a class byte (32- vs 64-bit).
+ const ELFMAG: &[u8] = b"\x7fELF";
+ const SELFMAG: usize = ELFMAG.len();
+ const EI_CLASS: usize = 4;
+ const ELFCLASS32: u8 = 1;
+ const ELFCLASS64: u8 = 2;
+
+ if elf.get(0..SELFMAG) != Some(ELFMAG) {
+ return None;
+ }
+
+ match *elf.get(EI_CLASS)? {
+ ELFCLASS32 => elf32_section(elf, name),
+ ELFCLASS64 => elf64_section(elf, name),
+ _ => None,
+ }
+ }
}
diff --git a/drivers/gpu/nova-core/firmware/booter.rs b/drivers/gpu/nova-core/firmware/booter.rs
index de2a4536b532..d9313ac361af 100644
--- a/drivers/gpu/nova-core/firmware/booter.rs
+++ b/drivers/gpu/nova-core/firmware/booter.rs
@@ -1,4 +1,5 @@
// SPDX-License-Identifier: GPL-2.0
+// SPDX-FileCopyrightText: Copyright (c) 2025-2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
//! Support for loading and patching the `Booter` firmware. `Booter` is a Heavy Secured firmware
//! running on [`Sec2`], that is used on Turing/Ampere to load the GSP firmware into the GSP falcon
@@ -8,6 +9,7 @@ use core::marker::PhantomData;
use kernel::{
device,
+ dma::Coherent,
prelude::*,
transmute::FromBytes, //
};
@@ -280,7 +282,6 @@ impl FirmwareObject<BooterFirmware, Unsigned> {
#[derive(Copy, Clone, Debug, PartialEq)]
pub(crate) enum BooterKind {
Loader,
- #[expect(unused)]
Unloader,
}
@@ -293,7 +294,7 @@ impl BooterFirmware {
chipset: Chipset,
ver: &str,
falcon: &Falcon<<Self as FalconFirmware>::Target>,
- bar: &Bar0,
+ bar: Bar0<'_>,
) -> Result<Self> {
let fw_name = match kind {
BooterKind::Loader => "booter_load",
@@ -396,6 +397,35 @@ impl BooterFirmware {
ucode: ucode_signed,
})
}
+
+ /// Load and run the booter firmware on SEC2.
+ ///
+ /// Resets SEC2, loads this firmware image, then boots with the WPR metadata
+ /// address passed via the SEC2 mailboxes.
+ pub(crate) fn run<T>(
+ &self,
+ dev: &device::Device<device::Bound>,
+ bar: Bar0<'_>,
+ sec2_falcon: &Falcon<Sec2>,
+ wpr_meta: &Coherent<T>,
+ ) -> Result {
+ sec2_falcon.reset(bar)?;
+ sec2_falcon.load(dev, bar, self)?;
+ let wpr_handle = wpr_meta.dma_handle();
+ let (mbox0, mbox1) = sec2_falcon.boot(
+ bar,
+ Some(wpr_handle as u32),
+ Some((wpr_handle >> 32) as u32),
+ )?;
+ dev_dbg!(dev, "SEC2 MBOX0: {:#x}, MBOX1: {:#x}\n", mbox0, mbox1);
+
+ if mbox0 != 0 {
+ dev_err!(dev, "Booter-load failed with error {:#x}\n", mbox0);
+ return Err(ENODEV);
+ }
+
+ Ok(())
+ }
}
impl FalconDmaLoadable for BooterFirmware {
diff --git a/drivers/gpu/nova-core/firmware/fsp.rs b/drivers/gpu/nova-core/firmware/fsp.rs
new file mode 100644
index 000000000000..6eaf1c684b9d
--- /dev/null
+++ b/drivers/gpu/nova-core/firmware/fsp.rs
@@ -0,0 +1,128 @@
+// SPDX-License-Identifier: GPL-2.0
+// SPDX-FileCopyrightText: Copyright (c) 2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+
+//! FSP is a hardware unit that runs FMC firmware.
+
+use kernel::{
+ device,
+ dma::Coherent,
+ firmware::Firmware,
+ prelude::*, //
+};
+
+use crate::{
+ firmware::elf,
+ gpu::Chipset, //
+};
+
+/// Size of the FSP SHA-384 hash, in bytes.
+const FSP_HASH_SIZE: usize = 48;
+/// Maximum size of the FSP public key (RSA-3072), in bytes.
+///
+/// The FMC ELF `publickey` section may be shorter, so the remaining bytes are zero-padded.
+const FSP_PKEY_SIZE: usize = 384;
+/// Maximum size of the FSP signature (RSA-3072), in bytes.
+///
+/// The FMC ELF `signature` section may be shorter, so the remaining bytes are zero-padded.
+const FSP_SIG_SIZE: usize = 384;
+
+/// Structure to hold FMC signatures.
+///
+/// C representation is used because this type is used for communication with the FSP.
+#[derive(Debug, Clone, Copy, Zeroable)]
+#[repr(C)]
+pub(crate) struct FmcSignatures {
+ pub(crate) hash384: [u8; FSP_HASH_SIZE],
+ pub(crate) public_key: [u8; FSP_PKEY_SIZE],
+ pub(crate) signature: [u8; FSP_SIG_SIZE],
+}
+
+pub(crate) struct FspFirmware {
+ /// FMC firmware image data (only the "image" ELF section).
+ pub(crate) fmc_image: Coherent<[u8]>,
+ /// FMC firmware signatures.
+ pub(crate) fmc_sigs: KBox<FmcSignatures>,
+}
+
+impl FspFirmware {
+ pub(crate) fn new(
+ dev: &device::Device<device::Bound>,
+ chipset: Chipset,
+ ver: &str,
+ ) -> Result<Self> {
+ let fw = super::request_firmware(dev, chipset, "fmc", ver)?;
+
+ // FSP expects only the "image" section, not the entire ELF file.
+ let fmc_image_data = elf::elf_section(fw.data(), "image").ok_or_else(|| {
+ dev_err!(dev, "FMC ELF file missing 'image' section\n");
+ EINVAL
+ })?;
+ let fmc_image = Coherent::from_slice(dev, fmc_image_data, GFP_KERNEL)?;
+
+ Ok(Self {
+ fmc_image,
+ fmc_sigs: Self::extract_fmc_signatures(&fw, dev)?,
+ })
+ }
+
+ /// Extract FMC firmware signatures for Chain of Trust verification.
+ ///
+ /// Extracts real cryptographic signatures from FMC ELF32 firmware sections.
+ /// Returns signatures in a heap-allocated structure to prevent stack overflow.
+ fn extract_fmc_signatures(
+ fmc_fw: &Firmware,
+ dev: &device::Device,
+ ) -> Result<KBox<FmcSignatures>> {
+ let get_section = |name: &str, max_len: usize| {
+ elf::elf_section(fmc_fw.data(), name)
+ .ok_or(EINVAL)
+ .inspect_err(|_| dev_err!(dev, "FMC firmware missing '{}' section\n", name))
+ .and_then(|section| {
+ if section.len() > max_len {
+ dev_err!(
+ dev,
+ "FMC {} section size {} > maximum {}\n",
+ name,
+ section.len(),
+ max_len
+ );
+ Err(EINVAL)
+ } else {
+ Ok(section)
+ }
+ })
+ };
+
+ let hash_section = get_section("hash", FSP_HASH_SIZE)?;
+ let pkey_section = get_section("publickey", FSP_PKEY_SIZE)?;
+ let sig_section = get_section("signature", FSP_SIG_SIZE)?;
+
+ // The hash section is a SHA-384 output: it must be exactly FSP_HASH_SIZE bytes.
+ if hash_section.len() != FSP_HASH_SIZE {
+ dev_err!(
+ dev,
+ "FMC hash section size {} != expected {}\n",
+ hash_section.len(),
+ FSP_HASH_SIZE
+ );
+ return Err(EINVAL);
+ }
+
+ // Initialize the signatures in place to avoid building the large `FmcSignatures` on the
+ // stack, then fill each section from the firmware.
+ let signatures = KBox::init(
+ pin_init::init_zeroed::<FmcSignatures>().chain(|sigs| {
+ // PANIC: src and dst lengths are both FSP_HASH_SIZE (verified above).
+ sigs.hash384.copy_from_slice(hash_section);
+ // PANIC: dst is sliced to src.len(); src.len() <= FSP_PKEY_SIZE per `get_section`.
+ sigs.public_key[..pkey_section.len()].copy_from_slice(pkey_section);
+ // PANIC: dst is sliced to src.len(); src.len() <= FSP_SIG_SIZE per `get_section`.
+ sigs.signature[..sig_section.len()].copy_from_slice(sig_section);
+ Ok(())
+ }),
+ GFP_KERNEL,
+ )?;
+
+ Ok(signatures)
+ }
+}
diff --git a/drivers/gpu/nova-core/firmware/fwsec.rs b/drivers/gpu/nova-core/firmware/fwsec.rs
index 8810cb49db67..199ae2adb664 100644
--- a/drivers/gpu/nova-core/firmware/fwsec.rs
+++ b/drivers/gpu/nova-core/firmware/fwsec.rs
@@ -144,7 +144,6 @@ pub(crate) enum FwsecCommand {
/// image into it.
Frts { frts_addr: u64, frts_size: u64 },
/// Asks [`FwsecFirmware`] to load pre-OS apps on the PMU.
- #[expect(dead_code)]
Sb,
}
@@ -322,7 +321,7 @@ impl FwsecFirmware {
pub(crate) fn new(
dev: &Device<device::Bound>,
falcon: &Falcon<Gsp>,
- bar: &Bar0,
+ bar: Bar0<'_>,
bios: &Vbios,
cmd: FwsecCommand,
) -> Result<Self> {
@@ -395,7 +394,7 @@ impl FwsecFirmware {
&self,
dev: &Device<device::Bound>,
falcon: &Falcon<Gsp>,
- bar: &Bar0,
+ bar: Bar0<'_>,
) -> Result<()> {
// Reset falcon, load the firmware, and run it.
falcon
diff --git a/drivers/gpu/nova-core/firmware/fwsec/bootloader.rs b/drivers/gpu/nova-core/firmware/fwsec/bootloader.rs
index bcb713a868e2..039920dc340b 100644
--- a/drivers/gpu/nova-core/firmware/fwsec/bootloader.rs
+++ b/drivers/gpu/nova-core/firmware/fwsec/bootloader.rs
@@ -280,7 +280,7 @@ impl FwsecFirmwareWithBl {
&self,
dev: &Device<device::Bound>,
falcon: &Falcon<Gsp>,
- bar: &Bar0,
+ bar: Bar0<'_>,
) -> Result<()> {
// Reset falcon, load the firmware, and run it.
falcon
diff --git a/drivers/gpu/nova-core/firmware/gsp.rs b/drivers/gpu/nova-core/firmware/gsp.rs
index 2fcc255c3bc8..99a302bae567 100644
--- a/drivers/gpu/nova-core/firmware/gsp.rs
+++ b/drivers/gpu/nova-core/firmware/gsp.rs
@@ -63,6 +63,21 @@ pub(crate) struct GspFirmware {
}
impl GspFirmware {
+ fn find_gsp_sigs_section(chipset: Chipset) -> &'static str {
+ match chipset.arch() {
+ Architecture::Turing if matches!(chipset, Chipset::TU116 | Chipset::TU117) => {
+ ".fwsignature_tu11x"
+ }
+ Architecture::Turing => ".fwsignature_tu10x",
+ Architecture::Ampere if chipset == Chipset::GA100 => ".fwsignature_ga100",
+ Architecture::Ampere => ".fwsignature_ga10x",
+ Architecture::Ada => ".fwsignature_ad10x",
+ Architecture::Hopper => ".fwsignature_gh10x",
+ Architecture::BlackwellGB10x => ".fwsignature_gb10x",
+ Architecture::BlackwellGB20x => ".fwsignature_gb20x",
+ }
+ }
+
/// Loads the GSP firmware binaries, map them into `dev`'s address-space, and creates the page
/// tables expected by the GSP bootloader to load it.
pub(crate) fn new<'a>(
@@ -73,7 +88,7 @@ impl GspFirmware {
pin_init::pin_init_scope(move || {
let firmware = super::request_firmware(dev, chipset, "gsp", ver)?;
- let fw_section = elf::elf64_section(firmware.data(), ".fwimage").ok_or(EINVAL)?;
+ let fw_section = elf::elf_section(firmware.data(), ".fwimage").ok_or(EINVAL)?;
let size = fw_section.len();
@@ -131,20 +146,9 @@ impl GspFirmware {
},
size,
signatures: {
- let sigs_section = match chipset.arch() {
- Architecture::Turing
- if matches!(chipset, Chipset::TU116 | Chipset::TU117) =>
- {
- ".fwsignature_tu11x"
- }
- Architecture::Turing => ".fwsignature_tu10x",
- // GA100 uses the same firmware as Turing
- Architecture::Ampere if chipset == Chipset::GA100 => ".fwsignature_tu10x",
- Architecture::Ampere => ".fwsignature_ga10x",
- Architecture::Ada => ".fwsignature_ad10x",
- };
-
- elf::elf64_section(firmware.data(), sigs_section)
+ let sigs_section = Self::find_gsp_sigs_section(chipset);
+
+ elf::elf_section(firmware.data(), sigs_section)
.ok_or(EINVAL)
.and_then(|data| Coherent::from_slice(dev, data, GFP_KERNEL))?
},
diff --git a/drivers/gpu/nova-core/fsp.rs b/drivers/gpu/nova-core/fsp.rs
new file mode 100644
index 000000000000..8fc243c66e35
--- /dev/null
+++ b/drivers/gpu/nova-core/fsp.rs
@@ -0,0 +1,320 @@
+// SPDX-License-Identifier: GPL-2.0
+// SPDX-FileCopyrightText: Copyright (c) 2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+
+//! FSP (Foundation Security Processor) interface for Hopper/Blackwell GPUs.
+//!
+//! Hopper/Blackwell use a simplified firmware boot sequence: FMC, then FSP, then GSP.
+//! Unlike Turing/Ampere/Ada, there is no SEC2 (Security Engine 2) usage.
+//! FSP handles secure boot directly using FMC firmware and Chain of Trust.
+
+use kernel::{
+ device,
+ dma::Coherent,
+ io::poll::read_poll_timeout,
+ prelude::*,
+ ptr::{
+ Alignable,
+ Alignment, //
+ },
+ sizes::SZ_2M,
+ time::Delta,
+ transmute::{
+ AsBytes,
+ FromBytes, //
+ },
+};
+
+use crate::{
+ driver::Bar0,
+ falcon::{
+ fsp::Fsp as FspEngine,
+ Falcon, //
+ },
+ fb::FbLayout,
+ firmware::fsp::{
+ FmcSignatures,
+ FspFirmware, //
+ },
+ gpu::Chipset,
+ gsp::GspFmcBootParams,
+ mctp::{
+ MctpHeader,
+ NvdmHeader,
+ NvdmType, //
+ },
+ num,
+ regs, //
+};
+
+mod hal;
+
+/// FSP command response payload (`NVDM_PAYLOAD_COMMAND_RESPONSE`).
+#[repr(C, packed)]
+#[derive(Clone, Copy)]
+struct NvdmPayloadCommandResponse {
+ task_id: u32,
+ command_nvdm_type: u32,
+ error_code: u32,
+}
+
+/// Complete FSP response structure with MCTP and NVDM headers.
+#[repr(C, packed)]
+#[derive(Clone, Copy)]
+struct FspResponse {
+ mctp_header: MctpHeader,
+ nvdm_header: NvdmHeader,
+ response: NvdmPayloadCommandResponse,
+}
+
+// SAFETY: FspResponse is a packed C struct with only integral fields.
+unsafe impl FromBytes for FspResponse {}
+
+/// Trait implemented by types representing a message to send to FSP.
+///
+/// This provides [`Fsp::send_sync_fsp`] with the information it needs to send
+/// a given message, following the same pattern as GSP's `CommandToGsp`.
+trait MessageToFsp: AsBytes {
+ /// NVDM type identifying this message to FSP.
+ const NVDM_TYPE: NvdmType;
+}
+
+/// NVDM (NVIDIA Data Model) CoT (Chain of Trust) payload, the main
+/// message body sent to FSP for Chain of Trust boot.
+#[repr(C, packed)]
+#[derive(Clone, Copy, Zeroable)]
+struct NvdmPayloadCot {
+ version: u16,
+ size: u16,
+ gsp_fmc_sysmem_offset: u64,
+ frts_sysmem_offset: u64,
+ frts_sysmem_size: u32,
+ frts_vidmem_offset: u64,
+ frts_vidmem_size: u32,
+ sigs: FmcSignatures,
+ gsp_boot_args_sysmem_offset: u64,
+}
+
+/// Complete FSP message structure with MCTP and NVDM headers.
+#[repr(C)]
+#[derive(Clone, Copy)]
+struct FspMessage {
+ mctp_header: MctpHeader,
+ nvdm_header: NvdmHeader,
+ cot: NvdmPayloadCot,
+}
+
+impl FspMessage {
+ /// Returns an in-place initializer for [`FspMessage`].
+ fn new<'a>(
+ fb_layout: &FbLayout,
+ fsp_fw: &'a FspFirmware,
+ args: &'a FmcBootArgs,
+ ) -> Result<impl Init<Self> + 'a> {
+ // frts_offset is relative to FB end: FRTS_location = FB_END - frts_offset
+ let frts_vidmem_offset = if !args.resume {
+ let frts_reserved_size = fb_layout.heap.len() + u64::from(fb_layout.pmu_reserved_size);
+
+ frts_reserved_size
+ .align_up(Alignment::new::<SZ_2M>())
+ .ok_or(EINVAL)?
+ } else {
+ 0
+ };
+
+ let frts_size: u32 = if !args.resume {
+ fb_layout.frts.len().try_into()?
+ } else {
+ 0
+ };
+
+ let version = hal::fsp_hal(args.chipset).ok_or(ENOTSUPP)?.cot_version();
+ let size = num::usize_into_u16::<{ core::mem::size_of::<NvdmPayloadCot>() }>();
+
+ Ok(init!(Self {
+ mctp_header: MctpHeader::single_packet(),
+ nvdm_header: NvdmHeader::new(NvdmType::Cot),
+ // The payload is packed, so we cannot use `init!`. Initialize it member-by-member using
+ // `chain`.
+ cot <- pin_init::init_zeroed(),
+ })
+ .chain(move |msg| {
+ msg.cot.version = version;
+ msg.cot.size = size;
+ msg.cot.gsp_fmc_sysmem_offset = fsp_fw.fmc_image.dma_handle();
+ msg.cot.frts_vidmem_offset = frts_vidmem_offset;
+ msg.cot.frts_vidmem_size = frts_size;
+ // frts_sysmem_* intentionally left at zero for now, but will be needed for e.g.
+ // systems without VRAM.
+ msg.cot.gsp_boot_args_sysmem_offset = args.fmc_boot_params.dma_handle();
+ msg.cot.sigs = *fsp_fw.fmc_sigs;
+
+ Ok(())
+ }))
+ }
+}
+
+// SAFETY: `FspMessage` is `#[repr(C)]` with no padding, so all of its
+// bytes are initialized.
+unsafe impl AsBytes for FspMessage {}
+
+impl MessageToFsp for FspMessage {
+ const NVDM_TYPE: NvdmType = NvdmType::Cot;
+}
+
+/// Bundled arguments for FMC boot via FSP Chain of Trust.
+pub(crate) struct FmcBootArgs {
+ chipset: Chipset,
+ fmc_boot_params: Coherent<GspFmcBootParams>,
+ resume: bool,
+}
+
+impl FmcBootArgs {
+ /// Builds FMC boot arguments, allocating the DMA-coherent boot parameter
+ /// structure that FSP will read.
+ pub(crate) fn new(
+ dev: &device::Device<device::Bound>,
+ chipset: Chipset,
+ wpr_meta_addr: u64,
+ libos_addr: u64,
+ resume: bool,
+ ) -> Result<Self> {
+ let init = GspFmcBootParams::new(wpr_meta_addr, libos_addr);
+
+ Ok(Self {
+ chipset,
+ fmc_boot_params: Coherent::<GspFmcBootParams>::init(dev, GFP_KERNEL, init)?,
+ resume,
+ })
+ }
+
+ /// DMA address of the FMC boot parameters, needed after boot for lockdown
+ /// release polling.
+ pub(crate) fn boot_params_dma_handle(&self) -> u64 {
+ self.fmc_boot_params.dma_handle()
+ }
+}
+
+/// FSP interface for Hopper/Blackwell GPUs.
+///
+/// An `Fsp` is produced by [`Fsp::wait_secure_boot`], which only returns once FSP secure boot
+/// has completed. It owns the FSP falcon and the FMC firmware, which are used for the subsequent
+/// Chain of Trust boot.
+pub(crate) struct Fsp {
+ falcon: Falcon<FspEngine>,
+ fsp_fw: FspFirmware,
+}
+
+impl Fsp {
+ /// Waits for FSP secure boot completion, then returns the [`Fsp`] interface.
+ ///
+ /// Polls the thermal scratch register until FSP signals boot completion or the timeout
+ /// elapses. Returning an [`Fsp`] only on success guarantees, at the API level, that the
+ /// interface is not used before secure boot has completed.
+ pub(crate) fn wait_secure_boot(
+ dev: &device::Device<device::Bound>,
+ bar: Bar0<'_>,
+ chipset: Chipset,
+ fsp_fw: FspFirmware,
+ ) -> Result<Fsp> {
+ /// FSP secure boot completion timeout in milliseconds.
+ const FSP_SECURE_BOOT_TIMEOUT_MS: i64 = 5000;
+
+ let hal = hal::fsp_hal(chipset).ok_or(ENOTSUPP)?;
+ let falcon = Falcon::<FspEngine>::new(dev, chipset)?;
+
+ read_poll_timeout(
+ || Ok(hal.fsp_boot_status(bar)),
+ |&status| status == regs::NV_THERM_I2CS_SCRATCH_FSP_BOOT_COMPLETE_STATUS_SUCCESS,
+ Delta::from_millis(10),
+ Delta::from_millis(FSP_SECURE_BOOT_TIMEOUT_MS),
+ )
+ .inspect_err(|e| {
+ dev_err!(dev, "FSP secure boot completion error: {:?}\n", e);
+ })?;
+
+ Ok(Fsp { falcon, fsp_fw })
+ }
+
+ /// Sends a message to FSP and waits for the response.
+ fn send_sync_fsp<M>(&mut self, dev: &device::Device, bar: Bar0<'_>, msg: &M) -> Result
+ where
+ M: MessageToFsp,
+ {
+ self.falcon.send_msg(bar, msg.as_bytes())?;
+
+ let response_buf = self.falcon.recv_msg(bar).inspect_err(|e| {
+ dev_err!(dev, "FSP response error: {:?}\n", e);
+ })?;
+
+ let (response, _) = FspResponse::from_bytes_prefix(&response_buf[..]).ok_or_else(|| {
+ dev_err!(dev, "FSP response too small: {}\n", response_buf.len());
+ EIO
+ })?;
+
+ let mctp_header = response.mctp_header;
+ let nvdm_header = response.nvdm_header;
+ let command_nvdm_type = response.response.command_nvdm_type;
+ let error_code = response.response.error_code;
+
+ if !mctp_header.is_single_packet() {
+ dev_err!(
+ dev,
+ "Unexpected MCTP header in FSP reply: {:x?}\n",
+ mctp_header,
+ );
+ return Err(EIO);
+ }
+
+ if !nvdm_header.validate(NvdmType::FspResponse) {
+ dev_err!(
+ dev,
+ "Unexpected NVDM header in FSP reply: {:x?}\n",
+ nvdm_header,
+ );
+ return Err(EIO);
+ }
+
+ if command_nvdm_type != u8::from(M::NVDM_TYPE).into() {
+ dev_err!(
+ dev,
+ "Expected NVDM type {:?} in reply, got {:#x}\n",
+ M::NVDM_TYPE,
+ command_nvdm_type
+ );
+ return Err(EIO);
+ }
+
+ if error_code != 0 {
+ dev_err!(
+ dev,
+ "NVDM command {:?} failed with error {:#x}\n",
+ M::NVDM_TYPE,
+ error_code
+ );
+ return Err(EIO);
+ }
+
+ Ok(())
+ }
+
+ /// Boots GSP FMC via FSP Chain of Trust.
+ ///
+ /// Builds the CoT message from the pre-configured [`FmcBootArgs`], sends it
+ /// to FSP, and waits for the response.
+ pub(crate) fn boot_fmc(
+ &mut self,
+ dev: &device::Device<device::Bound>,
+ bar: Bar0<'_>,
+ fb_layout: &FbLayout,
+ args: &FmcBootArgs,
+ ) -> Result {
+ dev_dbg!(dev, "Starting FSP boot sequence for {}\n", args.chipset);
+
+ let msg = KBox::init(FspMessage::new(fb_layout, &self.fsp_fw, args)?, GFP_KERNEL)?;
+
+ self.send_sync_fsp(dev, bar, &*msg)?;
+
+ dev_dbg!(dev, "FSP Chain of Trust completed successfully\n");
+ Ok(())
+ }
+}
diff --git a/drivers/gpu/nova-core/fsp/hal.rs b/drivers/gpu/nova-core/fsp/hal.rs
new file mode 100644
index 000000000000..b6f2624bb13d
--- /dev/null
+++ b/drivers/gpu/nova-core/fsp/hal.rs
@@ -0,0 +1,32 @@
+// SPDX-License-Identifier: GPL-2.0
+// SPDX-FileCopyrightText: Copyright (c) 2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+
+use crate::{
+ driver::Bar0,
+ gpu::{
+ Architecture,
+ Chipset, //
+ },
+};
+
+mod gb100;
+mod gb202;
+mod gh100;
+
+pub(super) trait FspHal {
+ /// Returns the secure boot status from the architecture-specific `NV_THERM_I2CS_SCRATCH` register.
+ fn fsp_boot_status(&self, bar: Bar0<'_>) -> u32;
+
+ /// Returns the FSP Chain of Trust protocol version this chipset advertises.
+ fn cot_version(&self) -> u16;
+}
+
+/// Returns the FSP HAL, or `None` if the architecture doesn't support FSP.
+pub(super) fn fsp_hal(chipset: Chipset) -> Option<&'static dyn FspHal> {
+ match chipset.arch() {
+ Architecture::Turing | Architecture::Ampere | Architecture::Ada => None,
+ Architecture::Hopper => Some(gh100::GH100_HAL),
+ Architecture::BlackwellGB10x => Some(gb100::GB100_HAL),
+ Architecture::BlackwellGB20x => Some(gb202::GB202_HAL),
+ }
+}
diff --git a/drivers/gpu/nova-core/fsp/hal/gb100.rs b/drivers/gpu/nova-core/fsp/hal/gb100.rs
new file mode 100644
index 000000000000..42f5ecfc6400
--- /dev/null
+++ b/drivers/gpu/nova-core/fsp/hal/gb100.rs
@@ -0,0 +1,23 @@
+// SPDX-License-Identifier: GPL-2.0
+// SPDX-FileCopyrightText: Copyright (c) 2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+
+use crate::{
+ driver::Bar0,
+ fsp::hal::FspHal, //
+};
+
+struct Gb100;
+
+impl FspHal for Gb100 {
+ fn fsp_boot_status(&self, bar: Bar0<'_>) -> u32 {
+ // GB10x shares Hopper's FSP secure boot status register.
+ super::gh100::fsp_boot_status_gh100(bar)
+ }
+
+ fn cot_version(&self) -> u16 {
+ 2
+ }
+}
+
+const GB100: Gb100 = Gb100;
+pub(super) const GB100_HAL: &dyn FspHal = &GB100;
diff --git a/drivers/gpu/nova-core/fsp/hal/gb202.rs b/drivers/gpu/nova-core/fsp/hal/gb202.rs
new file mode 100644
index 000000000000..1091b169a645
--- /dev/null
+++ b/drivers/gpu/nova-core/fsp/hal/gb202.rs
@@ -0,0 +1,27 @@
+// SPDX-License-Identifier: GPL-2.0
+// SPDX-FileCopyrightText: Copyright (c) 2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+
+use kernel::io::Io;
+
+use crate::{
+ driver::Bar0,
+ fsp::hal::FspHal,
+ regs, //
+};
+
+struct Gb202;
+
+impl FspHal for Gb202 {
+ fn fsp_boot_status(&self, bar: Bar0<'_>) -> u32 {
+ bar.read(regs::gb202::NV_THERM_I2CS_SCRATCH_FSP_BOOT_COMPLETE)
+ .fsp_boot_complete()
+ .into()
+ }
+
+ fn cot_version(&self) -> u16 {
+ 2
+ }
+}
+
+const GB202: Gb202 = Gb202;
+pub(super) const GB202_HAL: &dyn FspHal = &GB202;
diff --git a/drivers/gpu/nova-core/fsp/hal/gh100.rs b/drivers/gpu/nova-core/fsp/hal/gh100.rs
new file mode 100644
index 000000000000..291acaf2845a
--- /dev/null
+++ b/drivers/gpu/nova-core/fsp/hal/gh100.rs
@@ -0,0 +1,32 @@
+// SPDX-License-Identifier: GPL-2.0
+// SPDX-FileCopyrightText: Copyright (c) 2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+
+use kernel::io::Io;
+
+use crate::{
+ driver::Bar0,
+ fsp::hal::FspHal,
+ regs, //
+};
+
+struct Gh100;
+
+/// Reads the FSP secure boot status from the Hopper/GB10x thermal scratch register.
+pub(super) fn fsp_boot_status_gh100(bar: Bar0<'_>) -> u32 {
+ bar.read(regs::gh100::NV_THERM_I2CS_SCRATCH_FSP_BOOT_COMPLETE)
+ .fsp_boot_complete()
+ .into()
+}
+
+impl FspHal for Gh100 {
+ fn fsp_boot_status(&self, bar: Bar0<'_>) -> u32 {
+ fsp_boot_status_gh100(bar)
+ }
+
+ fn cot_version(&self) -> u16 {
+ 1
+ }
+}
+
+const GH100: Gh100 = Gh100;
+pub(super) const GH100_HAL: &dyn FspHal = &GH100;
diff --git a/drivers/gpu/nova-core/gfw.rs b/drivers/gpu/nova-core/gfw.rs
deleted file mode 100644
index fb75dd10a172..000000000000
--- a/drivers/gpu/nova-core/gfw.rs
+++ /dev/null
@@ -1,76 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0
-
-//! GPU Firmware (`GFW`) support, a.k.a `devinit`.
-//!
-//! Upon reset, the GPU runs some firmware code from the BIOS to setup its core parameters. Most of
-//! the GPU is considered unusable until this step is completed, so we must wait on it before
-//! performing driver initialization.
-//!
-//! A clarification about devinit terminology: devinit is a sequence of register read/writes after
-//! reset that performs tasks such as:
-//! 1. Programming VRAM memory controller timings.
-//! 2. Power sequencing.
-//! 3. Clock and PLL configuration.
-//! 4. Thermal management.
-//!
-//! devinit itself is a 'script' which is interpreted by an interpreter program typically running
-//! on the PMU microcontroller.
-//!
-//! Note that the devinit sequence also needs to run during suspend/resume.
-
-use kernel::{
- io::{
- poll::read_poll_timeout,
- Io, //
- },
- prelude::*,
- time::Delta, //
-};
-
-use crate::{
- driver::Bar0,
- regs, //
-};
-
-/// Wait for the `GFW` (GPU firmware) boot completion signal (`GFW_BOOT`), or a 4 seconds timeout.
-///
-/// Upon GPU reset, several microcontrollers (such as PMU, SEC2, GSP etc) run some firmware code to
-/// setup its core parameters. Most of the GPU is considered unusable until this step is completed,
-/// so it must be waited on very early during driver initialization.
-///
-/// The `GFW` code includes several components that need to execute before the driver loads. These
-/// components are located in the VBIOS ROM and executed in a sequence on these different
-/// microcontrollers. The devinit sequence typically runs on the PMU, and the FWSEC runs on the
-/// GSP.
-///
-/// This function waits for a signal indicating that core initialization is complete. Before this
-/// signal is received, little can be done with the GPU. This signal is set by the FWSEC running on
-/// the GSP in Heavy-secured mode.
-pub(crate) fn wait_gfw_boot_completion(bar: &Bar0) -> Result {
- // Before accessing the completion status in `NV_PGC6_AON_SECURE_SCRATCH_GROUP_05`, we must
- // first check `NV_PGC6_AON_SECURE_SCRATCH_GROUP_05_PRIV_LEVEL_MASK`. This is because
- // `NV_PGC6_AON_SECURE_SCRATCH_GROUP_05` becomes accessible only after the secure firmware
- // (FWSEC) lowers the privilege level to allow CPU (LS/Light-secured) access. We can only
- // safely read the status register from CPU (LS/Light-secured) once the mask indicates
- // that the privilege level has been lowered.
- //
- // TIMEOUT: arbitrarily large value. GFW starts running immediately after the GPU is put out of
- // reset, and should complete in less time than that.
- read_poll_timeout(
- || {
- Ok(
- // Check that FWSEC has lowered its protection level before reading the GFW_BOOT
- // status.
- bar.read(regs::NV_PGC6_AON_SECURE_SCRATCH_GROUP_05_PRIV_LEVEL_MASK)
- .read_protection_level0()
- && bar
- .read(regs::NV_PGC6_AON_SECURE_SCRATCH_GROUP_05_0_GFW_BOOT)
- .completed(),
- )
- },
- |&gfw_booted| gfw_booted,
- Delta::from_millis(1),
- Delta::from_secs(4),
- )
- .map(|_| ())
-}
diff --git a/drivers/gpu/nova-core/gpu.rs b/drivers/gpu/nova-core/gpu.rs
index 0f6fe9a1b955..b3c91731db45 100644
--- a/drivers/gpu/nova-core/gpu.rs
+++ b/drivers/gpu/nova-core/gpu.rs
@@ -1,14 +1,15 @@
// SPDX-License-Identifier: GPL-2.0
+use core::ops::Range;
+
use kernel::{
device,
- devres::Devres,
+ dma::Device,
fmt,
io::Io,
num::Bounded,
pci,
- prelude::*,
- sync::Arc, //
+ prelude::*, //
};
use crate::{
@@ -20,11 +21,15 @@ use crate::{
Falcon, //
},
fb::SysmemFlush,
- gfw,
- gsp::Gsp,
+ gsp::{
+ self,
+ Gsp, //
+ },
regs,
};
+mod hal;
+
macro_rules! define_chipset {
({ $($variant:ident = $value:expr),* $(,)* }) =>
{
@@ -86,12 +91,23 @@ define_chipset!({
GA104 = 0x174,
GA106 = 0x176,
GA107 = 0x177,
+ // Hopper
+ GH100 = 0x180,
// Ada
AD102 = 0x192,
AD103 = 0x193,
AD104 = 0x194,
AD106 = 0x196,
AD107 = 0x197,
+ // Blackwell GB10x
+ GB100 = 0x1a0,
+ GB102 = 0x1a2,
+ // Blackwell GB20x
+ GB202 = 0x1b2,
+ GB203 = 0x1b3,
+ GB205 = 0x1b5,
+ GB206 = 0x1b6,
+ GB207 = 0x1b7,
});
impl Chipset {
@@ -103,9 +119,14 @@ impl Chipset {
Self::GA100 | Self::GA102 | Self::GA103 | Self::GA104 | Self::GA106 | Self::GA107 => {
Architecture::Ampere
}
+ Self::GH100 => Architecture::Hopper,
Self::AD102 | Self::AD103 | Self::AD104 | Self::AD106 | Self::AD107 => {
Architecture::Ada
}
+ Self::GB100 | Self::GB102 => Architecture::BlackwellGB10x,
+ Self::GB202 | Self::GB203 | Self::GB205 | Self::GB206 | Self::GB207 => {
+ Architecture::BlackwellGB20x
+ }
}
}
@@ -115,6 +136,20 @@ impl Chipset {
pub(crate) const fn needs_fwsec_bootloader(self) -> bool {
matches!(self.arch(), Architecture::Turing) || matches!(self, Self::GA100)
}
+
+ /// Returns `true` if this chipset boots via FSP (Hopper and later), which requires the FMC
+ /// firmware image.
+ pub(crate) const fn uses_fsp(self) -> bool {
+ matches!(
+ self.arch(),
+ Architecture::Hopper | Architecture::BlackwellGB10x | Architecture::BlackwellGB20x
+ )
+ }
+
+ /// Returns the address range of the PCI config mirror space.
+ pub(crate) fn pci_config_mirror_range(self) -> Range<u32> {
+ hal::gpu_hal(self).pci_config_mirror_range()
+ }
}
// TODO
@@ -137,10 +172,14 @@ bounded_enum! {
pub(crate) enum Architecture with TryFrom<Bounded<u32, 6>> {
Turing = 0x16,
Ampere = 0x17,
+ Hopper = 0x18,
Ada = 0x19,
+ BlackwellGB10x = 0x1a,
+ BlackwellGB20x = 0x1b,
}
}
+#[derive(Clone, Copy)]
pub(crate) struct Revision {
major: Bounded<u8, 4>,
minor: Bounded<u8, 4>,
@@ -162,13 +201,14 @@ impl fmt::Display for Revision {
}
/// Structure holding a basic description of the GPU: `Chipset` and `Revision`.
+#[derive(Clone, Copy)]
pub(crate) struct Spec {
chipset: Chipset,
revision: Revision,
}
impl Spec {
- fn new(dev: &device::Device, bar: &Bar0) -> Result<Spec> {
+ fn new(dev: &device::Device, bar: Bar0<'_>) -> Result<Spec> {
// Some brief notes about boot0 and boot42, in chronological order:
//
// NV04 through NV50:
@@ -223,14 +263,16 @@ impl fmt::Display for Spec {
}
/// Structure holding the resources required to operate the GPU.
-#[pin_data]
-pub(crate) struct Gpu {
+#[pin_data(PinnedDrop)]
+pub(crate) struct Gpu<'gpu> {
+ /// Device owning the GPU.
+ device: &'gpu device::Device<device::Bound>,
spec: Spec,
- /// MMIO mapping of PCI BAR 0
- bar: Arc<Devres<Bar0>>,
+ /// MMIO mapping of PCI BAR 0.
+ bar: Bar0<'gpu>,
/// System memory page required for flushing all pending GPU-side memory writes done through
/// PCIE into system memory, via sysmembar (A GPU-initiated HW memory-barrier operation).
- sysmem_flush: SysmemFlush,
+ sysmem_flush: SysmemFlush<'gpu>,
/// GSP falcon instance, used for GSP boot up and cleanup.
gsp_falcon: Falcon<GspFalcon>,
/// SEC2 falcon instance, used for GSP boot up and cleanup.
@@ -238,22 +280,31 @@ pub(crate) struct Gpu {
/// GSP runtime data. Temporarily an empty placeholder.
#[pin]
gsp: Gsp,
+ /// GSP unload firmware bundle, if any.
+ unload_bundle: Option<gsp::UnloadBundle>,
}
-impl Gpu {
- pub(crate) fn new<'a>(
- pdev: &'a pci::Device<device::Bound>,
- devres_bar: Arc<Devres<Bar0>>,
- bar: &'a Bar0,
- ) -> impl PinInit<Self, Error> + 'a {
+impl<'gpu> Gpu<'gpu> {
+ pub(crate) fn new(
+ pdev: &'gpu pci::Device<device::Core<'_>>,
+ bar: Bar0<'gpu>,
+ ) -> impl PinInit<Self, Error> + 'gpu {
try_pin_init!(Self {
+ device: pdev.as_ref(),
spec: Spec::new(pdev.as_ref(), bar).inspect(|spec| {
dev_info!(pdev,"NVIDIA ({})\n", spec);
})?,
// We must wait for GFW_BOOT completion before doing any significant setup on the GPU.
_: {
- gfw::wait_gfw_boot_completion(bar)
+ let hal = hal::gpu_hal(spec.chipset);
+ let dma_mask = hal.dma_mask();
+
+ // SAFETY: `Gpu` owns all DMA allocations for this device, and we are
+ // still constructing it, so no concurrent DMA allocations can exist.
+ unsafe { pdev.dma_set_mask_and_coherent(dma_mask)? };
+
+ hal.wait_gfw_boot_completion(bar)
.inspect_err(|_| dev_err!(pdev, "GFW boot did not complete\n"))?;
},
@@ -269,20 +320,28 @@ impl Gpu {
gsp <- Gsp::new(pdev),
- _: { gsp.boot(pdev, bar, spec.chipset, gsp_falcon, sec2_falcon)? },
-
- bar: devres_bar,
+ // This member must be initialized last, so the `UnloadBundle` can never be dropped from
+ // outside of the constructed `Gpu`, ensuring that the unload sequence is properly run
+ // in case of failure.
+ unload_bundle: gsp.boot(pdev, bar, spec.chipset, gsp_falcon, sec2_falcon)?,
+ bar,
})
}
+}
- /// Called when the corresponding [`Device`](device::Device) is unbound.
- ///
- /// Note: This method must only be called from `Driver::unbind`.
- pub(crate) fn unbind(&self, dev: &device::Device<device::Core>) {
- kernel::warn_on!(self
- .bar
- .access(dev)
- .inspect(|bar| self.sysmem_flush.unregister(bar))
- .is_err());
+#[pinned_drop]
+impl PinnedDrop for Gpu<'_> {
+ fn drop(self: Pin<&mut Self>) {
+ let this = self.project();
+ let device = *this.device;
+ let bar = *this.bar;
+ let bundle = this.unload_bundle.take();
+
+ let _ = this
+ .gsp
+ .as_ref()
+ .get_ref()
+ .unload(device, bar, &*this.gsp_falcon, &*this.sec2_falcon, bundle)
+ .inspect_err(|e| dev_err!(device, "failed to unload GSP: {:?}\n", e));
}
}
diff --git a/drivers/gpu/nova-core/gpu/hal.rs b/drivers/gpu/nova-core/gpu/hal.rs
new file mode 100644
index 000000000000..3f25882d0e56
--- /dev/null
+++ b/drivers/gpu/nova-core/gpu/hal.rs
@@ -0,0 +1,39 @@
+// SPDX-License-Identifier: GPL-2.0
+
+use core::ops::Range;
+
+use kernel::{
+ dma::DmaMask,
+ prelude::*, //
+};
+
+use crate::{
+ driver::Bar0,
+ gpu::{
+ Architecture,
+ Chipset, //
+ },
+};
+
+mod gh100;
+mod tu102;
+
+pub(crate) trait GpuHal {
+ /// Waits for GFW_BOOT completion if required by this hardware family.
+ fn wait_gfw_boot_completion(&self, bar: Bar0<'_>) -> Result;
+
+ /// Returns the DMA mask for the current architecture.
+ fn dma_mask(&self) -> DmaMask;
+
+ /// Returns the address range of the PCI config mirror space.
+ fn pci_config_mirror_range(&self) -> Range<u32>;
+}
+
+pub(super) fn gpu_hal(chipset: Chipset) -> &'static dyn GpuHal {
+ match chipset.arch() {
+ Architecture::Turing | Architecture::Ampere | Architecture::Ada => tu102::TU102_HAL,
+ Architecture::Hopper | Architecture::BlackwellGB10x | Architecture::BlackwellGB20x => {
+ gh100::GH100_HAL
+ }
+ }
+}
diff --git a/drivers/gpu/nova-core/gpu/hal/gh100.rs b/drivers/gpu/nova-core/gpu/hal/gh100.rs
new file mode 100644
index 000000000000..e3f8ba0fab33
--- /dev/null
+++ b/drivers/gpu/nova-core/gpu/hal/gh100.rs
@@ -0,0 +1,34 @@
+// SPDX-License-Identifier: GPL-2.0
+
+use core::ops::Range;
+
+use kernel::{
+ dma::DmaMask,
+ prelude::*, //
+};
+
+use crate::driver::Bar0;
+
+use super::GpuHal;
+
+struct Gh100;
+
+impl GpuHal for Gh100 {
+ fn wait_gfw_boot_completion(&self, _bar: Bar0<'_>) -> Result {
+ Ok(())
+ }
+
+ fn dma_mask(&self) -> DmaMask {
+ DmaMask::new::<52>()
+ }
+
+ fn pci_config_mirror_range(&self) -> Range<u32> {
+ const PCI_CONFIG_MIRROR_START: u32 = 0x092000;
+ const PCI_CONFIG_MIRROR_SIZE: u32 = 0x001000;
+
+ PCI_CONFIG_MIRROR_START..PCI_CONFIG_MIRROR_START + PCI_CONFIG_MIRROR_SIZE
+ }
+}
+
+const GH100: Gh100 = Gh100;
+pub(super) const GH100_HAL: &dyn GpuHal = &GH100;
diff --git a/drivers/gpu/nova-core/gpu/hal/tu102.rs b/drivers/gpu/nova-core/gpu/hal/tu102.rs
new file mode 100644
index 000000000000..b0732e53edea
--- /dev/null
+++ b/drivers/gpu/nova-core/gpu/hal/tu102.rs
@@ -0,0 +1,100 @@
+// SPDX-License-Identifier: GPL-2.0
+
+//! GPU Firmware (`GFW`) support, a.k.a `devinit`.
+//!
+//! Upon reset, the GPU runs some firmware code from the BIOS to setup its core parameters. Most of
+//! the GPU is considered unusable until this step is completed, so we must wait on it before
+//! performing driver initialization.
+//!
+//! A clarification about devinit terminology: devinit is a sequence of register read/writes after
+//! reset that performs tasks such as:
+//! 1. Programming VRAM memory controller timings.
+//! 2. Power sequencing.
+//! 3. Clock and PLL configuration.
+//! 4. Thermal management.
+//!
+//! devinit itself is a 'script' which is interpreted by an interpreter program typically running
+//! on the PMU microcontroller.
+//!
+//! Note that the devinit sequence also needs to run during suspend/resume.
+
+use core::ops::Range;
+
+use kernel::{
+ dma::DmaMask,
+ io::{
+ poll::read_poll_timeout,
+ Io, //
+ },
+ prelude::*,
+ time::Delta, //
+};
+
+use crate::{
+ driver::Bar0,
+ regs, //
+};
+
+use super::GpuHal;
+
+struct Tu102;
+
+impl GpuHal for Tu102 {
+ /// Wait for the `GFW` (GPU firmware) boot completion signal (`GFW_BOOT`), or a 4 seconds
+ /// timeout.
+ ///
+ /// Upon GPU reset, several microcontrollers (such as PMU, SEC2, GSP etc) run some firmware
+ /// code to setup its core parameters. Most of the GPU is considered unusable until this step
+ /// is completed, so it must be waited on very early during driver initialization.
+ ///
+ /// The `GFW` code includes several components that need to execute before the driver loads.
+ /// These components are located in the VBIOS ROM and executed in a sequence on these different
+ /// microcontrollers. The devinit sequence typically runs on the PMU, and the FWSEC runs on the
+ /// GSP.
+ ///
+ /// This function waits for a signal indicating that core initialization is complete. Before
+ /// this signal is received, little can be done with the GPU. This signal is set by the FWSEC
+ /// running on the GSP in Heavy-secured mode.
+ fn wait_gfw_boot_completion(&self, bar: Bar0<'_>) -> Result {
+ // Before accessing the completion status in `NV_PGC6_AON_SECURE_SCRATCH_GROUP_05`, we must
+ // first check `NV_PGC6_AON_SECURE_SCRATCH_GROUP_05_PRIV_LEVEL_MASK`. This is because
+ // `NV_PGC6_AON_SECURE_SCRATCH_GROUP_05` becomes accessible only after the secure firmware
+ // (FWSEC) lowers the privilege level to allow CPU (LS/Light-secured) access. We can only
+ // safely read the status register from CPU (LS/Light-secured) once the mask indicates
+ // that the privilege level has been lowered.
+ //
+ // TIMEOUT: arbitrarily large value. GFW starts running immediately after the GPU is put
+ // out of reset, and should complete in less time than that.
+ read_poll_timeout(
+ || {
+ Ok(
+ // Check that FWSEC has lowered its protection level before reading the
+ // GFW_BOOT status.
+ bar.read(regs::NV_PGC6_AON_SECURE_SCRATCH_GROUP_05_PRIV_LEVEL_MASK)
+ .read_protection_level0()
+ && bar
+ .read(regs::NV_PGC6_AON_SECURE_SCRATCH_GROUP_05_0_GFW_BOOT)
+ .completed(),
+ )
+ },
+ |&gfw_booted| gfw_booted,
+ Delta::from_millis(1),
+ Delta::from_secs(4),
+ )
+ .map(|_| ())
+ }
+
+ fn dma_mask(&self) -> DmaMask {
+ DmaMask::new::<47>()
+ }
+
+ fn pci_config_mirror_range(&self) -> Range<u32> {
+ const PCI_CONFIG_MIRROR_START: u32 = 0x088000;
+ const PCI_CONFIG_MIRROR_SIZE: u32 = 0x001000;
+
+ PCI_CONFIG_MIRROR_START..PCI_CONFIG_MIRROR_START + PCI_CONFIG_MIRROR_SIZE
+ }
+}
+
+const TU102: Tu102 = Tu102;
+pub(super) const TU102_HAL: &dyn GpuHal = &TU102;
diff --git a/drivers/gpu/nova-core/gsp.rs b/drivers/gpu/nova-core/gsp.rs
index ba5b7f990031..69175ca3315c 100644
--- a/drivers/gpu/nova-core/gsp.rs
+++ b/drivers/gpu/nova-core/gsp.rs
@@ -1,6 +1,7 @@
// SPDX-License-Identifier: GPL-2.0
mod boot;
+mod hal;
use kernel::{
debugfs,
@@ -24,6 +25,7 @@ mod fw;
mod sequencer;
pub(crate) use fw::{
+ GspFmcBootParams,
GspFwWprMeta,
LibosParams, //
};
@@ -184,3 +186,6 @@ impl Gsp {
})
}
}
+
+/// Opaque bundle required to unload the GSP. Created by [`Gsp::boot`], consumed by [`Gsp::unload`].
+pub(crate) struct UnloadBundle(KBox<dyn hal::UnloadBundle>);
diff --git a/drivers/gpu/nova-core/gsp/boot.rs b/drivers/gpu/nova-core/gsp/boot.rs
index 18f356c9178e..8afb62d689cb 100644
--- a/drivers/gpu/nova-core/gsp/boot.rs
+++ b/drivers/gpu/nova-core/gsp/boot.rs
@@ -1,13 +1,15 @@
// SPDX-License-Identifier: GPL-2.0
+// SPDX-FileCopyrightText: Copyright (c) 2025-2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
use kernel::{
+ bits,
device,
dma::Coherent,
io::poll::read_poll_timeout,
- io::Io,
pci,
prelude::*,
- time::Delta, //
+ time::Delta,
+ types::ScopeGuard, //
};
use crate::{
@@ -19,192 +21,119 @@ use crate::{
},
fb::FbLayout,
firmware::{
- booter::{
- BooterFirmware,
- BooterKind, //
- },
- fwsec::{
- bootloader::FwsecFirmwareWithBl,
- FwsecCommand,
- FwsecFirmware, //
- },
gsp::GspFirmware,
FIRMWARE_VERSION, //
},
gpu::Chipset,
gsp::{
+ cmdq::Cmdq,
commands,
- sequencer::{
- GspSequencer,
- GspSequencerParams, //
- },
GspFwWprMeta, //
},
- regs,
- vbios::Vbios,
};
-impl super::Gsp {
- /// Helper function to load and run the FWSEC-FRTS firmware and confirm that it has properly
- /// created the WPR2 region.
- fn run_fwsec_frts(
- dev: &device::Device<device::Bound>,
- chipset: Chipset,
- falcon: &Falcon<Gsp>,
- bar: &Bar0,
- bios: &Vbios,
- fb_layout: &FbLayout,
- ) -> Result<()> {
- // Check that the WPR2 region does not already exists - if it does, we cannot run
- // FWSEC-FRTS until the GPU is reset.
- if bar.read(regs::NV_PFB_PRI_MMU_WPR2_ADDR_HI).higher_bound() != 0 {
- dev_err!(
- dev,
- "WPR2 region already exists - GPU needs to be reset to proceed\n"
- );
- return Err(EBUSY);
- }
-
- // FWSEC-FRTS will create the WPR2 region.
- let fwsec_frts = FwsecFirmware::new(
- dev,
- falcon,
- bar,
- bios,
- FwsecCommand::Frts {
- frts_addr: fb_layout.frts.start,
- frts_size: fb_layout.frts.len(),
- },
- )?;
-
- if chipset.needs_fwsec_bootloader() {
- let fwsec_frts_bl = FwsecFirmwareWithBl::new(fwsec_frts, dev, chipset)?;
- // Load and run the bootloader, which will load FWSEC-FRTS and run it.
- fwsec_frts_bl.run(dev, falcon, bar)?;
- } else {
- // Load and run FWSEC-FRTS directly.
- fwsec_frts.run(dev, falcon, bar)?;
- }
-
- // SCRATCH_E contains the error code for FWSEC-FRTS.
- let frts_status = bar
- .read(regs::NV_PBUS_SW_SCRATCH_0E_FRTS_ERR)
- .frts_err_code();
- if frts_status != 0 {
- dev_err!(
- dev,
- "FWSEC-FRTS returned with error code {:#x}\n",
- frts_status
- );
-
- return Err(EIO);
- }
-
- // Check that the WPR2 region has been created as we requested.
- let (wpr2_lo, wpr2_hi) = (
- bar.read(regs::NV_PFB_PRI_MMU_WPR2_ADDR_LO).lower_bound(),
- bar.read(regs::NV_PFB_PRI_MMU_WPR2_ADDR_HI).higher_bound(),
- );
+/// Arguments required to call [`Gsp::unload`](super::Gsp::unload).
+///
+/// Stored as their own type to avoid repeating a long and tedious list in [`BootUnloadGuard`].
+pub(super) struct BootUnloadArgs<'a> {
+ gsp: &'a super::Gsp,
+ dev: &'a device::Device<device::Bound>,
+ bar: Bar0<'a>,
+ gsp_falcon: &'a Falcon<Gsp>,
+ sec2_falcon: &'a Falcon<Sec2>,
+ unload_bundle: Option<super::UnloadBundle>,
+}
- match (wpr2_lo, wpr2_hi) {
- (_, 0) => {
- dev_err!(dev, "WPR2 region not created after running FWSEC-FRTS\n");
+/// Guard that calls [`Gsp::unload`](super::Gsp::unload) with a
+/// [`UnloadBundle`](super::UnloadBundle) when dropped.
+///
+/// Used to ensure the `UnloadBundle` is run during failure paths.
+pub(super) struct BootUnloadGuard<'a> {
+ guard: ScopeGuard<BootUnloadArgs<'a>, fn(BootUnloadArgs<'a>)>,
+}
- Err(EIO)
- }
- (wpr2_lo, _) if wpr2_lo != fb_layout.frts.start => {
- dev_err!(
+impl<'a> BootUnloadGuard<'a> {
+ /// Wraps `unload_bundle` into a guard that executes it when dropped.
+ pub(super) fn new(
+ gsp: &'a super::Gsp,
+ dev: &'a device::Device<device::Bound>,
+ bar: Bar0<'a>,
+ gsp_falcon: &'a Falcon<Gsp>,
+ sec2_falcon: &'a Falcon<Sec2>,
+ unload_bundle: Option<super::UnloadBundle>,
+ ) -> Self {
+ Self {
+ guard: ScopeGuard::new_with_data(
+ BootUnloadArgs {
+ gsp,
dev,
- "WPR2 region created at unexpected address {:#x}; expected {:#x}\n",
- wpr2_lo,
- fb_layout.frts.start,
- );
-
- Err(EIO)
- }
- (wpr2_lo, wpr2_hi) => {
- dev_dbg!(dev, "WPR2: {:#x}-{:#x}\n", wpr2_lo, wpr2_hi);
- dev_dbg!(dev, "GPU instance built\n");
-
- Ok(())
- }
+ bar,
+ gsp_falcon,
+ sec2_falcon,
+ unload_bundle,
+ },
+ |args| {
+ let _ = super::Gsp::unload(
+ args.gsp,
+ args.dev,
+ args.bar,
+ args.gsp_falcon,
+ args.sec2_falcon,
+ args.unload_bundle,
+ );
+ },
+ ),
}
}
+ /// Disarms the guard and returns the [`UnloadBundle`](super::UnloadBundle) it contains.
+ pub(super) fn dismiss(self) -> Option<super::UnloadBundle> {
+ self.guard.dismiss().unload_bundle
+ }
+}
+
+impl super::Gsp {
/// Attempt to boot the GSP.
///
/// This is a GPU-dependent and complex procedure that involves loading firmware files from
/// user-space, patching them with signatures, and building firmware-specific intricate data
/// structures that the GSP will use at runtime.
///
- /// Upon return, the GSP is up and running, and its runtime object given as return value.
+ /// Upon return, the GSP is up and running, and its unload bundle (to be given as argument to
+ /// [`Self::unload`]) returned.
pub(crate) fn boot(
self: Pin<&mut Self>,
pdev: &pci::Device<device::Bound>,
- bar: &Bar0,
+ bar: Bar0<'_>,
chipset: Chipset,
gsp_falcon: &Falcon<Gsp>,
sec2_falcon: &Falcon<Sec2>,
- ) -> Result {
+ ) -> Result<Option<super::UnloadBundle>> {
let dev = pdev.as_ref();
-
- let bios = Vbios::new(dev, bar)?;
+ let hal = super::hal::gsp_hal(chipset);
let gsp_fw = KBox::pin_init(GspFirmware::new(dev, chipset, FIRMWARE_VERSION), GFP_KERNEL)?;
let fb_layout = FbLayout::new(chipset, bar, &gsp_fw)?;
dev_dbg!(dev, "{:#x?}\n", fb_layout);
- Self::run_fwsec_frts(dev, chipset, gsp_falcon, bar, &bios, &fb_layout)?;
+ let wpr_meta = Coherent::init(dev, GFP_KERNEL, GspFwWprMeta::new(&gsp_fw, &fb_layout))?;
- let booter_loader = BooterFirmware::new(
+ // Perform the chipset-specific boot sequence, and retrieve the unload bundle.
+ let unload_guard = hal.boot(
+ &self,
dev,
- BooterKind::Loader,
+ bar,
chipset,
- FIRMWARE_VERSION,
+ &fb_layout,
+ &wpr_meta,
+ gsp_falcon,
sec2_falcon,
- bar,
)?;
- let wpr_meta = Coherent::init(dev, GFP_KERNEL, GspFwWprMeta::new(&gsp_fw, &fb_layout))?;
-
- self.cmdq
- .send_command_no_wait(bar, commands::SetSystemInfo::new(pdev))?;
- self.cmdq
- .send_command_no_wait(bar, commands::SetRegistry::new())?;
-
- gsp_falcon.reset(bar)?;
- let libos_handle = self.libos.dma_handle();
- let (mbox0, mbox1) = gsp_falcon.boot(
- bar,
- Some(libos_handle as u32),
- Some((libos_handle >> 32) as u32),
- )?;
- dev_dbg!(pdev, "GSP MBOX0: {:#x}, MBOX1: {:#x}\n", mbox0, mbox1);
-
- dev_dbg!(
- pdev,
- "Using SEC2 to load and run the booter_load firmware...\n"
- );
-
- sec2_falcon.reset(bar)?;
- sec2_falcon.load(dev, bar, &booter_loader)?;
- let wpr_handle = wpr_meta.dma_handle();
- let (mbox0, mbox1) = sec2_falcon.boot(
- bar,
- Some(wpr_handle as u32),
- Some((wpr_handle >> 32) as u32),
- )?;
- dev_dbg!(pdev, "SEC2 MBOX0: {:#x}, MBOX1: {:#x}\n", mbox0, mbox1);
-
- if mbox0 != 0 {
- dev_err!(pdev, "Booter-load failed with error {:#x}\n", mbox0);
- return Err(ENODEV);
- }
-
gsp_falcon.write_os_version(bar, gsp_fw.bootloader.app_version);
- // Poll for RISC-V to become active before running sequencer
+ // Poll for RISC-V to become active before continuing.
read_poll_timeout(
|| Ok(gsp_falcon.is_riscv_active(bar)),
|val: &bool| *val,
@@ -214,27 +143,84 @@ impl super::Gsp {
dev_dbg!(pdev, "RISC-V active? {}\n", gsp_falcon.is_riscv_active(bar),);
- // Create and run the GSP sequencer.
- let seq_params = GspSequencerParams {
- bootloader_app_version: gsp_fw.bootloader.app_version,
- libos_dma_handle: libos_handle,
- gsp_falcon,
- sec2_falcon,
- dev: pdev.as_ref().into(),
- bar,
- };
- GspSequencer::run(&self.cmdq, seq_params)?;
+ self.cmdq
+ .send_command_no_wait(bar, commands::SetSystemInfo::new(pdev, chipset))?;
+ self.cmdq
+ .send_command_no_wait(bar, commands::SetRegistry::new())?;
+
+ hal.post_boot(&self, dev, bar, &gsp_fw, gsp_falcon, sec2_falcon)?;
// Wait until GSP is fully initialized.
commands::wait_gsp_init_done(&self.cmdq)?;
// Obtain and display basic GPU information.
- let info = commands::get_gsp_info(&self.cmdq, bar)?;
+ let info = self.cmdq.send_command(bar, commands::GetGspStaticInfo)?;
match info.gpu_name() {
Ok(name) => dev_info!(pdev, "GPU name: {}\n", name),
Err(e) => dev_warn!(pdev, "GPU name unavailable: {:?}\n", e),
}
- Ok(())
+ Ok(unload_guard.dismiss())
+ }
+
+ /// Shut down the GSP and wait until it is offline.
+ fn shutdown_gsp(
+ cmdq: &Cmdq,
+ bar: Bar0<'_>,
+ gsp_falcon: &Falcon<Gsp>,
+ mode: commands::PowerStateLevel,
+ ) -> Result {
+ // Command to shut the GSP down.
+ cmdq.send_command(bar, commands::UnloadingGuestDriver::new(mode))?;
+
+ // Wait until GSP signals it is suspended.
+ const LIBOS_INTERRUPT_PROCESSOR_SUSPENDED: u32 = bits::bit_u32(31);
+ read_poll_timeout(
+ || Ok(gsp_falcon.read_mailbox0(bar)),
+ |&mb0| mb0 & LIBOS_INTERRUPT_PROCESSOR_SUSPENDED != 0,
+ Delta::from_millis(10),
+ Delta::from_secs(5),
+ )
+ .map(|_| ())
+ }
+
+ /// Attempts to unload the GSP firmware.
+ ///
+ /// This stops all activity on the GSP.
+ pub(crate) fn unload(
+ &self,
+ dev: &device::Device<device::Bound>,
+ bar: Bar0<'_>,
+ gsp_falcon: &Falcon<Gsp>,
+ sec2_falcon: &Falcon<Sec2>,
+ unload_bundle: Option<super::UnloadBundle>,
+ ) -> Result {
+ // Shut down the GSP. Keep going even in case of error.
+ let mut res = Self::shutdown_gsp(
+ &self.cmdq,
+ bar,
+ gsp_falcon,
+ commands::PowerStateLevel::Level0,
+ )
+ .inspect_err(|e| dev_err!(dev, "GSP shutdown failed: {:?}\n", e));
+
+ // Run the unload bundle to reset the GSP so it can be booted again.
+ if let Some(unload_bundle) = unload_bundle {
+ res = res.and(
+ unload_bundle
+ .0
+ .run(dev, bar, gsp_falcon, sec2_falcon)
+ .inspect_err(|e| dev_err!(dev, "Unload bundle failed: {:?}\n", e)),
+ );
+ } else {
+ dev_warn!(
+ dev,
+ "Unload bundle is missing, GSP won't be properly reset.\n"
+ );
+
+ res = Err(EAGAIN);
+ }
+
+ res.inspect(|()| dev_info!(dev, "GSP successfully unloaded\n"))
}
}
diff --git a/drivers/gpu/nova-core/gsp/cmdq.rs b/drivers/gpu/nova-core/gsp/cmdq.rs
index 275da9b1ee0e..0bc5a95a9cd7 100644
--- a/drivers/gpu/nova-core/gsp/cmdq.rs
+++ b/drivers/gpu/nova-core/gsp/cmdq.rs
@@ -532,7 +532,7 @@ impl Cmdq {
}
/// Notifies the GSP that we have updated the command queue pointers.
- fn notify_gsp(bar: &Bar0) {
+ fn notify_gsp(bar: Bar0<'_>) {
bar.write_reg(regs::NV_PGSP_QUEUE_HEAD::zeroed().with_address(0u32));
}
@@ -552,7 +552,7 @@ impl Cmdq {
/// written to by its [`CommandToGsp::init_variable_payload`] method.
///
/// Error codes returned by the command and reply initializers are propagated as-is.
- pub(crate) fn send_command<M>(&self, bar: &Bar0, command: M) -> Result<M::Reply>
+ pub(crate) fn send_command<M>(&self, bar: Bar0<'_>, command: M) -> Result<M::Reply>
where
M: CommandToGsp,
M::Reply: MessageFromGsp,
@@ -580,7 +580,7 @@ impl Cmdq {
/// written to by its [`CommandToGsp::init_variable_payload`] method.
///
/// Error codes returned by the command initializers are propagated as-is.
- pub(crate) fn send_command_no_wait<M>(&self, bar: &Bar0, command: M) -> Result
+ pub(crate) fn send_command_no_wait<M>(&self, bar: Bar0<'_>, command: M) -> Result
where
M: CommandToGsp<Reply = NoReply>,
Error: From<M::InitError>,
@@ -624,7 +624,7 @@ impl CmdqInner {
/// written to by its [`CommandToGsp::init_variable_payload`] method.
///
/// Error codes returned by the command initializers are propagated as-is.
- fn send_single_command<M>(&mut self, bar: &Bar0, command: M) -> Result
+ fn send_single_command<M>(&mut self, bar: Bar0<'_>, command: M) -> Result
where
M: CommandToGsp,
// This allows all error types, including `Infallible`, to be used for `M::InitError`.
@@ -694,7 +694,7 @@ impl CmdqInner {
/// written to by its [`CommandToGsp::init_variable_payload`] method.
///
/// Error codes returned by the command initializers are propagated as-is.
- fn send_command<M>(&mut self, bar: &Bar0, command: M) -> Result
+ fn send_command<M>(&mut self, bar: Bar0<'_>, command: M) -> Result
where
M: CommandToGsp,
Error: From<M::InitError>,
diff --git a/drivers/gpu/nova-core/gsp/commands.rs b/drivers/gpu/nova-core/gsp/commands.rs
index c89c7b57a751..f84de9f4f045 100644
--- a/drivers/gpu/nova-core/gsp/commands.rs
+++ b/drivers/gpu/nova-core/gsp/commands.rs
@@ -1,4 +1,5 @@
// SPDX-License-Identifier: GPL-2.0
+// SPDX-FileCopyrightText: Copyright (c) 2025-2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
use core::{
array,
@@ -18,7 +19,7 @@ use kernel::{
};
use crate::{
- driver::Bar0,
+ gpu::Chipset,
gsp::{
cmdq::{
Cmdq,
@@ -27,7 +28,7 @@ use crate::{
NoReply, //
},
fw::{
- commands::*,
+ self,
MsgFunction, //
},
},
@@ -37,23 +38,24 @@ use crate::{
/// The `GspSetSystemInfo` command.
pub(crate) struct SetSystemInfo<'a> {
pdev: &'a pci::Device<device::Bound>,
+ chipset: Chipset,
}
impl<'a> SetSystemInfo<'a> {
/// Creates a new `GspSetSystemInfo` command using the parameters of `pdev`.
- pub(crate) fn new(pdev: &'a pci::Device<device::Bound>) -> Self {
- Self { pdev }
+ pub(crate) fn new(pdev: &'a pci::Device<device::Bound>, chipset: Chipset) -> Self {
+ Self { pdev, chipset }
}
}
impl<'a> CommandToGsp for SetSystemInfo<'a> {
const FUNCTION: MsgFunction = MsgFunction::GspSetSystemInfo;
- type Command = GspSetSystemInfo;
+ type Command = fw::commands::GspSetSystemInfo;
type Reply = NoReply;
type InitError = Error;
fn init(&self) -> impl Init<Self::Command, Self::InitError> {
- GspSetSystemInfo::init(self.pdev)
+ Self::Command::init(self.pdev, self.chipset)
}
}
@@ -100,12 +102,12 @@ impl SetRegistry {
impl CommandToGsp for SetRegistry {
const FUNCTION: MsgFunction = MsgFunction::SetRegistry;
- type Command = PackedRegistryTable;
+ type Command = fw::commands::PackedRegistryTable;
type Reply = NoReply;
type InitError = Infallible;
fn init(&self) -> impl Init<Self::Command, Self::InitError> {
- PackedRegistryTable::init(Self::NUM_ENTRIES as u32, self.variable_payload_len() as u32)
+ Self::Command::init(Self::NUM_ENTRIES as u32, self.variable_payload_len() as u32)
}
fn variable_payload_len(&self) -> usize {
@@ -113,22 +115,22 @@ impl CommandToGsp for SetRegistry {
for i in 0..Self::NUM_ENTRIES {
key_size += self.entries[i].key.len() + 1; // +1 for NULL terminator
}
- Self::NUM_ENTRIES * size_of::<PackedRegistryEntry>() + key_size
+ Self::NUM_ENTRIES * size_of::<fw::commands::PackedRegistryEntry>() + key_size
}
fn init_variable_payload(
&self,
dst: &mut SBufferIter<core::array::IntoIter<&mut [u8], 2>>,
) -> Result {
- let string_data_start_offset =
- size_of::<PackedRegistryTable>() + Self::NUM_ENTRIES * size_of::<PackedRegistryEntry>();
+ let string_data_start_offset = size_of::<Self::Command>()
+ + Self::NUM_ENTRIES * size_of::<fw::commands::PackedRegistryEntry>();
// Array for string data.
let mut string_data = KVec::new();
for entry in self.entries.iter().take(Self::NUM_ENTRIES) {
dst.write_all(
- PackedRegistryEntry::new(
+ fw::commands::PackedRegistryEntry::new(
(string_data_start_offset + string_data.len()) as u32,
entry.value,
)
@@ -176,16 +178,16 @@ pub(crate) fn wait_gsp_init_done(cmdq: &Cmdq) -> Result {
}
/// The `GetGspStaticInfo` command.
-struct GetGspStaticInfo;
+pub(crate) struct GetGspStaticInfo;
impl CommandToGsp for GetGspStaticInfo {
const FUNCTION: MsgFunction = MsgFunction::GetGspStaticInfo;
- type Command = GspStaticConfigInfo;
+ type Command = fw::commands::GspStaticConfigInfo;
type Reply = GetGspStaticInfoReply;
type InitError = Infallible;
fn init(&self) -> impl Init<Self::Command, Self::InitError> {
- GspStaticConfigInfo::init_zeroed()
+ Self::Command::init_zeroed()
}
}
@@ -196,7 +198,7 @@ pub(crate) struct GetGspStaticInfoReply {
impl MessageFromGsp for GetGspStaticInfoReply {
const FUNCTION: MsgFunction = MsgFunction::GetGspStaticInfo;
- type Message = GspStaticConfigInfo;
+ type Message = fw::commands::GspStaticConfigInfo;
type InitError = Infallible;
fn read(
@@ -233,7 +235,45 @@ impl GetGspStaticInfoReply {
}
}
-/// Send the [`GetGspInfo`] command and awaits for its reply.
-pub(crate) fn get_gsp_info(cmdq: &Cmdq, bar: &Bar0) -> Result<GetGspStaticInfoReply> {
- cmdq.send_command(bar, GetGspStaticInfo)
+pub(crate) use fw::commands::PowerStateLevel;
+
+/// The `UnloadingGuestDriver` command, used to shut down the GSP.
+///
+/// Only used within the `gsp` module.
+pub(super) struct UnloadingGuestDriver {
+ level: PowerStateLevel,
+}
+
+impl UnloadingGuestDriver {
+ /// Creates a new `UnloadingGuestDriver` command for the given [`PowerStateLevel`].
+ pub(super) fn new(level: PowerStateLevel) -> Self {
+ Self { level }
+ }
+}
+
+impl CommandToGsp for UnloadingGuestDriver {
+ const FUNCTION: MsgFunction = MsgFunction::UnloadingGuestDriver;
+ type Command = fw::commands::UnloadingGuestDriver;
+ type Reply = UnloadingGuestDriverReply;
+ type InitError = Infallible;
+
+ fn init(&self) -> impl Init<Self::Command, Self::InitError> {
+ fw::commands::UnloadingGuestDriver::new(self.level)
+ }
+}
+
+/// The reply from the GSP to the [`UnloadingGuestDriver`] command.
+pub(super) struct UnloadingGuestDriverReply;
+
+impl MessageFromGsp for UnloadingGuestDriverReply {
+ const FUNCTION: MsgFunction = MsgFunction::UnloadingGuestDriver;
+ type InitError = Infallible;
+ type Message = ();
+
+ fn read(
+ _msg: &Self::Message,
+ _sbuffer: &mut SBufferIter<array::IntoIter<&[u8], 2>>,
+ ) -> Result<Self, Self::InitError> {
+ Ok(UnloadingGuestDriverReply)
+ }
}
diff --git a/drivers/gpu/nova-core/gsp/fw.rs b/drivers/gpu/nova-core/gsp/fw.rs
index 0c8a74f0e8ac..4db0cfa4dc4d 100644
--- a/drivers/gpu/nova-core/gsp/fw.rs
+++ b/drivers/gpu/nova-core/gsp/fw.rs
@@ -1,4 +1,5 @@
// SPDX-License-Identifier: GPL-2.0
+// SPDX-FileCopyrightText: Copyright (c) 2025-2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
pub(crate) mod commands;
mod r570_144;
@@ -17,8 +18,8 @@ use kernel::{
KnownSize, //
},
sizes::{
- SZ_128K,
- SZ_1M, //
+ SizeConstants,
+ SZ_128K, //
},
transmute::{
AsBytes,
@@ -29,7 +30,10 @@ use kernel::{
use crate::{
fb::FbLayout,
firmware::gsp::GspFirmware,
- gpu::Chipset,
+ gpu::{
+ Architecture,
+ Chipset, //
+ },
gsp::{
cmdq::Cmdq, //
GSP_PAGE_SIZE,
@@ -106,11 +110,15 @@ const GSP_HEAP_ALIGNMENT: Alignment = Alignment::new::<{ 1 << 20 }>();
impl GspFwHeapParams {
/// Returns the amount of GSP-RM heap memory used during GSP-RM boot and initialization (up to
/// and including the first client subdevice allocation).
- fn base_rm_size(_chipset: Chipset) -> u64 {
- // TODO: this needs to be updated to return the correct value for Hopper+ once support for
- // them is added:
- // u64::from(bindings::GSP_FW_HEAP_PARAM_BASE_RM_SIZE_GH100)
- u64::from(bindings::GSP_FW_HEAP_PARAM_BASE_RM_SIZE_TU10X)
+ fn base_rm_size(chipset: Chipset) -> u64 {
+ match chipset.arch() {
+ Architecture::Turing | Architecture::Ampere | Architecture::Ada => {
+ u64::from(bindings::GSP_FW_HEAP_PARAM_BASE_RM_SIZE_TU10X)
+ }
+ Architecture::Hopper | Architecture::BlackwellGB10x | Architecture::BlackwellGB20x => {
+ u64::from(bindings::GSP_FW_HEAP_PARAM_BASE_RM_SIZE_GH100)
+ }
+ }
}
/// Returns the amount of heap memory required to support a single channel allocation.
@@ -122,13 +130,14 @@ impl GspFwHeapParams {
/// Returns the amount of memory to reserve for management purposes for a framebuffer of size
/// `fb_size`.
- fn management_overhead(fb_size: u64) -> u64 {
- let fb_size_gb = fb_size.div_ceil(u64::from_safe_cast(kernel::sizes::SZ_1G));
+ fn management_overhead(fb_size: u64) -> Result<u64> {
+ let fb_size_gb = fb_size.div_ceil(u64::SZ_1G);
u64::from(bindings::GSP_FW_HEAP_PARAM_SIZE_PER_GB_FB)
- .saturating_mul(fb_size_gb)
+ .checked_mul(fb_size_gb)
+ .ok_or(EINVAL)?
.align_up(GSP_HEAP_ALIGNMENT)
- .unwrap_or(u64::MAX)
+ .ok_or(EINVAL)
}
}
@@ -145,9 +154,8 @@ impl LibosParams {
const LIBOS2: LibosParams = LibosParams {
carveout_size: num::u32_as_u64(bindings::GSP_FW_HEAP_PARAM_OS_SIZE_LIBOS2),
allowed_heap_size: num::u32_as_u64(bindings::GSP_FW_HEAP_SIZE_OVERRIDE_LIBOS2_MIN_MB)
- * num::usize_as_u64(SZ_1M)
- ..num::u32_as_u64(bindings::GSP_FW_HEAP_SIZE_OVERRIDE_LIBOS2_MAX_MB)
- * num::usize_as_u64(SZ_1M),
+ * u64::SZ_1M
+ ..num::u32_as_u64(bindings::GSP_FW_HEAP_SIZE_OVERRIDE_LIBOS2_MAX_MB) * u64::SZ_1M,
};
/// Version 3 of the GSP LIBOS (GA102+)
@@ -155,9 +163,9 @@ impl LibosParams {
carveout_size: num::u32_as_u64(bindings::GSP_FW_HEAP_PARAM_OS_SIZE_LIBOS3_BAREMETAL),
allowed_heap_size: num::u32_as_u64(
bindings::GSP_FW_HEAP_SIZE_OVERRIDE_LIBOS3_BAREMETAL_MIN_MB,
- ) * num::usize_as_u64(SZ_1M)
+ ) * u64::SZ_1M
..num::u32_as_u64(bindings::GSP_FW_HEAP_SIZE_OVERRIDE_LIBOS3_BAREMETAL_MAX_MB)
- * num::usize_as_u64(SZ_1M),
+ * u64::SZ_1M,
};
/// Returns the libos parameters corresponding to `chipset`.
@@ -171,18 +179,19 @@ impl LibosParams {
/// Returns the amount of memory (in bytes) to allocate for the WPR heap for a framebuffer size
/// of `fb_size` (in bytes) for `chipset`.
- pub(crate) fn wpr_heap_size(&self, chipset: Chipset, fb_size: u64) -> u64 {
+ pub(crate) fn wpr_heap_size(&self, chipset: Chipset, fb_size: u64) -> Result<u64> {
// The WPR heap will contain the following:
// LIBOS carveout,
- self.carveout_size
+ Ok(self
+ .carveout_size
// RM boot working memory,
.saturating_add(GspFwHeapParams::base_rm_size(chipset))
// One RM client,
.saturating_add(GspFwHeapParams::client_alloc_size())
// Overhead for memory management.
- .saturating_add(GspFwHeapParams::management_overhead(fb_size))
+ .saturating_add(GspFwHeapParams::management_overhead(fb_size)?)
// Clamp to the supported heap sizes.
- .clamp(self.allowed_heap_size.start, self.allowed_heap_size.end - 1)
+ .clamp(self.allowed_heap_size.start, self.allowed_heap_size.end - 1))
}
}
@@ -246,6 +255,7 @@ impl GspFwWprMeta {
fbSize: fb_layout.fb.end - fb_layout.fb.start,
vgaWorkspaceOffset: fb_layout.vga_workspace.start,
vgaWorkspaceSize: fb_layout.vga_workspace.end - fb_layout.vga_workspace.start,
+ pmuReservedSize: fb_layout.pmu_reserved_size,
..Zeroable::init_zeroed()
});
@@ -278,6 +288,7 @@ pub(crate) enum MsgFunction {
Nop = bindings::NV_VGPU_MSG_FUNCTION_NOP,
SetGuestSystemInfo = bindings::NV_VGPU_MSG_FUNCTION_SET_GUEST_SYSTEM_INFO,
SetRegistry = bindings::NV_VGPU_MSG_FUNCTION_SET_REGISTRY,
+ UnloadingGuestDriver = bindings::NV_VGPU_MSG_FUNCTION_UNLOADING_GUEST_DRIVER,
// Event codes
GspInitDone = bindings::NV_VGPU_MSG_EVENT_GSP_INIT_DONE,
@@ -322,6 +333,9 @@ impl TryFrom<u32> for MsgFunction {
Ok(MsgFunction::SetGuestSystemInfo)
}
bindings::NV_VGPU_MSG_FUNCTION_SET_REGISTRY => Ok(MsgFunction::SetRegistry),
+ bindings::NV_VGPU_MSG_FUNCTION_UNLOADING_GUEST_DRIVER => {
+ Ok(MsgFunction::UnloadingGuestDriver)
+ }
// Event codes
bindings::NV_VGPU_MSG_EVENT_GSP_INIT_DONE => Ok(MsgFunction::GspInitDone),
@@ -920,3 +934,68 @@ impl MessageQueueInitArguments {
})
}
}
+
+#[repr(u32)]
+pub(crate) enum GspDmaTarget {
+ #[expect(dead_code)]
+ LocalFb = bindings::GSP_DMA_TARGET_GSP_DMA_TARGET_LOCAL_FB,
+ CoherentSystem = bindings::GSP_DMA_TARGET_GSP_DMA_TARGET_COHERENT_SYSTEM,
+ NoncoherentSystem = bindings::GSP_DMA_TARGET_GSP_DMA_TARGET_NONCOHERENT_SYSTEM,
+}
+
+type GspAcrBootGspRmParams = bindings::GSP_ACR_BOOT_GSP_RM_PARAMS;
+
+impl GspAcrBootGspRmParams {
+ fn new(target: GspDmaTarget, wpr_meta_addr: u64) -> impl Init<Self> {
+ #[allow(non_snake_case)]
+ let params = init!(Self {
+ target: target as u32,
+ gspRmDescSize: num::usize_into_u32::<{ size_of::<GspFwWprMeta>() }>(),
+ gspRmDescOffset: wpr_meta_addr,
+ bIsGspRmBoot: 1,
+ wprCarveoutOffset: 0,
+ wprCarveoutSize: 0,
+ __bindgen_padding_0: Default::default(),
+ });
+
+ params
+ }
+}
+
+type GspRmParams = bindings::GSP_RM_PARAMS;
+
+impl GspRmParams {
+ fn new(target: GspDmaTarget, libos_addr: u64) -> impl Init<Self> {
+ #[allow(non_snake_case)]
+ let params = init!(Self {
+ target: target as u32,
+ bootArgsOffset: libos_addr,
+ __bindgen_padding_0: Default::default(),
+ });
+
+ params
+ }
+}
+
+pub(crate) type GspFmcBootParams = bindings::GSP_FMC_BOOT_PARAMS;
+
+// SAFETY: Padding is explicit and will not contain uninitialized data.
+unsafe impl AsBytes for GspFmcBootParams {}
+// SAFETY: This struct only contains integer types for which all bit patterns are valid.
+unsafe impl FromBytes for GspFmcBootParams {}
+
+impl GspFmcBootParams {
+ pub(crate) fn new(wpr_meta_addr: u64, libos_addr: u64) -> impl Init<Self> {
+ #[allow(non_snake_case)]
+ let init = init!(Self {
+ // Blackwell FSP obtains WPR info from other sources, so
+ // wprCarveoutOffset and wprCarveoutSize are left zero.
+ bootGspRmParams <- GspAcrBootGspRmParams::new(GspDmaTarget::CoherentSystem,
+ wpr_meta_addr),
+ gspRmParams <- GspRmParams::new(GspDmaTarget::NoncoherentSystem, libos_addr),
+ ..Zeroable::init_zeroed()
+ });
+
+ init
+ }
+}
diff --git a/drivers/gpu/nova-core/gsp/fw/commands.rs b/drivers/gpu/nova-core/gsp/fw/commands.rs
index db46276430be..7bcc41fc7fa0 100644
--- a/drivers/gpu/nova-core/gsp/fw/commands.rs
+++ b/drivers/gpu/nova-core/gsp/fw/commands.rs
@@ -1,4 +1,5 @@
// SPDX-License-Identifier: GPL-2.0
+// SPDX-FileCopyrightText: Copyright (c) 2025-2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
use kernel::{
device,
@@ -10,7 +11,10 @@ use kernel::{
}, //
};
-use crate::gsp::GSP_PAGE_SIZE;
+use crate::{
+ gpu::Chipset,
+ gsp::GSP_PAGE_SIZE, //
+};
use super::bindings;
@@ -24,8 +28,12 @@ static_assert!(size_of::<GspSetSystemInfo>() < GSP_PAGE_SIZE);
impl GspSetSystemInfo {
/// Returns an in-place initializer for the `GspSetSystemInfo` command.
#[allow(non_snake_case)]
- pub(crate) fn init<'a>(dev: &'a pci::Device<device::Bound>) -> impl Init<Self, Error> + 'a {
+ pub(crate) fn init<'a>(
+ dev: &'a pci::Device<device::Bound>,
+ chipset: Chipset,
+ ) -> impl Init<Self, Error> + 'a {
type InnerGspSystemInfo = bindings::GspSystemInfo;
+ let pci_config_mirror_range = chipset.pci_config_mirror_range();
let init_inner = try_init!(InnerGspSystemInfo {
gpuPhysAddr: dev.resource_start(0)?,
gpuPhysFbAddr: dev.resource_start(1)?,
@@ -35,8 +43,8 @@ impl GspSetSystemInfo {
// Using TASK_SIZE in r535_gsp_rpc_set_system_info() seems wrong because
// TASK_SIZE is per-task. That's probably a design issue in GSP-RM though.
maxUserVa: (1 << 47) - 4096,
- pciConfigMirrorBase: 0x088000,
- pciConfigMirrorSize: 0x001000,
+ pciConfigMirrorBase: pci_config_mirror_range.start,
+ pciConfigMirrorSize: pci_config_mirror_range.end - pci_config_mirror_range.start,
PCIDeviceID: (u32::from(dev.device_id()) << 16) | u32::from(dev.vendor_id().as_raw()),
PCISubDeviceID: (u32::from(dev.subsystem_device_id()) << 16)
@@ -129,3 +137,47 @@ unsafe impl AsBytes for GspStaticConfigInfo {}
// SAFETY: This struct only contains integer types for which all bit patterns
// are valid.
unsafe impl FromBytes for GspStaticConfigInfo {}
+
+/// Power level requested to the [`UnloadingGuestDriver`] command.
+#[derive(Clone, Copy, Debug, PartialEq, Eq)]
+#[repr(u32)]
+#[expect(unused)]
+pub(crate) enum PowerStateLevel {
+ /// Full unload.
+ Level0 = bindings::NV2080_CTRL_GPU_SET_POWER_STATE_GPU_LEVEL_0,
+ /// S3 (suspend to RAM).
+ Level3 = bindings::NV2080_CTRL_GPU_SET_POWER_STATE_GPU_LEVEL_3,
+ /// Hibernate (suspend to disk).
+ Level7 = bindings::NV2080_CTRL_GPU_SET_POWER_STATE_GPU_LEVEL_7,
+}
+
+impl PowerStateLevel {
+ /// Returns `true` if this state represents a power management transition, i.e. some GPU state
+ /// must survive it (as opposed to a full unload).
+ pub(crate) fn is_power_transition(self) -> bool {
+ self != PowerStateLevel::Level0
+ }
+}
+
+/// Payload of the `UnloadingGuestDriver` command and message.
+#[repr(transparent)]
+#[derive(Clone, Copy, Debug, Zeroable)]
+pub(crate) struct UnloadingGuestDriver(bindings::rpc_unloading_guest_driver_v1F_07);
+
+impl UnloadingGuestDriver {
+ pub(crate) fn new(level: PowerStateLevel) -> Self {
+ Self(bindings::rpc_unloading_guest_driver_v1F_07 {
+ bInPMTransition: u8::from(level.is_power_transition()),
+ bGc6Entering: 0,
+ newLevel: level as u32,
+ ..Zeroable::zeroed()
+ })
+ }
+}
+
+// SAFETY: Padding is explicit and will not contain uninitialized data.
+unsafe impl AsBytes for UnloadingGuestDriver {}
+
+// SAFETY: This struct only contains integer types for which all bit patterns
+// are valid.
+unsafe impl FromBytes for UnloadingGuestDriver {}
diff --git a/drivers/gpu/nova-core/gsp/fw/r570_144/bindings.rs b/drivers/gpu/nova-core/gsp/fw/r570_144/bindings.rs
index 334e8be5fde8..ea350f9b2cc4 100644
--- a/drivers/gpu/nova-core/gsp/fw/r570_144/bindings.rs
+++ b/drivers/gpu/nova-core/gsp/fw/r570_144/bindings.rs
@@ -30,10 +30,14 @@ impl<T> ::core::fmt::Debug for __IncompleteArrayField<T> {
fmt.write_str("__IncompleteArrayField")
}
}
+pub const NV2080_CTRL_GPU_SET_POWER_STATE_GPU_LEVEL_0: u32 = 0;
+pub const NV2080_CTRL_GPU_SET_POWER_STATE_GPU_LEVEL_3: u32 = 3;
+pub const NV2080_CTRL_GPU_SET_POWER_STATE_GPU_LEVEL_7: u32 = 7;
pub const NV_VGPU_MSG_SIGNATURE_VALID: u32 = 1129337430;
pub const GSP_FW_HEAP_PARAM_OS_SIZE_LIBOS2: u32 = 0;
pub const GSP_FW_HEAP_PARAM_OS_SIZE_LIBOS3_BAREMETAL: u32 = 23068672;
pub const GSP_FW_HEAP_PARAM_BASE_RM_SIZE_TU10X: u32 = 8388608;
+pub const GSP_FW_HEAP_PARAM_BASE_RM_SIZE_GH100: u32 = 14680064;
pub const GSP_FW_HEAP_PARAM_SIZE_PER_GB_FB: u32 = 98304;
pub const GSP_FW_HEAP_PARAM_CLIENT_ALLOC_SIZE: u32 = 100663296;
pub const GSP_FW_HEAP_SIZE_OVERRIDE_LIBOS2_MIN_MB: u32 = 64;
@@ -879,6 +883,96 @@ impl Default for GSP_MSG_QUEUE_ELEMENT {
}
}
}
+pub const GSP_DMA_TARGET_GSP_DMA_TARGET_LOCAL_FB: GSP_DMA_TARGET = 0;
+pub const GSP_DMA_TARGET_GSP_DMA_TARGET_COHERENT_SYSTEM: GSP_DMA_TARGET = 1;
+pub const GSP_DMA_TARGET_GSP_DMA_TARGET_NONCOHERENT_SYSTEM: GSP_DMA_TARGET = 2;
+pub const GSP_DMA_TARGET_GSP_DMA_TARGET_COUNT: GSP_DMA_TARGET = 3;
+pub type GSP_DMA_TARGET = ffi::c_uint;
+#[repr(C)]
+#[derive(Debug, Default, Copy, Clone, MaybeZeroable)]
+pub struct GSP_FMC_INIT_PARAMS {
+ pub regkeys: u32_,
+}
+#[repr(C)]
+#[derive(Debug, Copy, Clone, MaybeZeroable)]
+pub struct GSP_ACR_BOOT_GSP_RM_PARAMS {
+ pub target: GSP_DMA_TARGET,
+ pub gspRmDescSize: u32_,
+ pub gspRmDescOffset: u64_,
+ pub wprCarveoutOffset: u64_,
+ pub wprCarveoutSize: u32_,
+ pub bIsGspRmBoot: u8_,
+ pub __bindgen_padding_0: [u8; 3usize],
+}
+impl Default for GSP_ACR_BOOT_GSP_RM_PARAMS {
+ fn default() -> Self {
+ let mut s = ::core::mem::MaybeUninit::<Self>::uninit();
+ unsafe {
+ ::core::ptr::write_bytes(s.as_mut_ptr(), 0, 1);
+ s.assume_init()
+ }
+ }
+}
+#[repr(C)]
+#[derive(Debug, Copy, Clone, MaybeZeroable)]
+pub struct GSP_RM_PARAMS {
+ pub target: GSP_DMA_TARGET,
+ pub __bindgen_padding_0: [u8; 4usize],
+ pub bootArgsOffset: u64_,
+}
+impl Default for GSP_RM_PARAMS {
+ fn default() -> Self {
+ let mut s = ::core::mem::MaybeUninit::<Self>::uninit();
+ unsafe {
+ ::core::ptr::write_bytes(s.as_mut_ptr(), 0, 1);
+ s.assume_init()
+ }
+ }
+}
+#[repr(C)]
+#[derive(Debug, Copy, Clone, MaybeZeroable)]
+pub struct GSP_SPDM_PARAMS {
+ pub target: GSP_DMA_TARGET,
+ pub __bindgen_padding_0: [u8; 4usize],
+ pub payloadBufferOffset: u64_,
+ pub payloadBufferSize: u32_,
+ pub __bindgen_padding_1: [u8; 4usize],
+}
+impl Default for GSP_SPDM_PARAMS {
+ fn default() -> Self {
+ let mut s = ::core::mem::MaybeUninit::<Self>::uninit();
+ unsafe {
+ ::core::ptr::write_bytes(s.as_mut_ptr(), 0, 1);
+ s.assume_init()
+ }
+ }
+}
+#[repr(C)]
+#[derive(Debug, Copy, Clone, MaybeZeroable)]
+pub struct GSP_FMC_BOOT_PARAMS {
+ pub initParams: GSP_FMC_INIT_PARAMS,
+ pub __bindgen_padding_0: [u8; 4usize],
+ pub bootGspRmParams: GSP_ACR_BOOT_GSP_RM_PARAMS,
+ pub gspRmParams: GSP_RM_PARAMS,
+ pub gspSpdmParams: GSP_SPDM_PARAMS,
+}
+impl Default for GSP_FMC_BOOT_PARAMS {
+ fn default() -> Self {
+ let mut s = ::core::mem::MaybeUninit::<Self>::uninit();
+ unsafe {
+ ::core::ptr::write_bytes(s.as_mut_ptr(), 0, 1);
+ s.assume_init()
+ }
+ }
+}
+#[repr(C)]
+#[derive(Debug, Default, Copy, Clone, MaybeZeroable)]
+pub struct rpc_unloading_guest_driver_v1F_07 {
+ pub bInPMTransition: u8_,
+ pub bGc6Entering: u8_,
+ pub __bindgen_padding_0: [u8; 2usize],
+ pub newLevel: u32_,
+}
#[repr(C)]
#[derive(Debug, Default, MaybeZeroable)]
pub struct rpc_run_cpu_sequencer_v17_00 {
diff --git a/drivers/gpu/nova-core/gsp/hal.rs b/drivers/gpu/nova-core/gsp/hal.rs
new file mode 100644
index 000000000000..04f004856c60
--- /dev/null
+++ b/drivers/gpu/nova-core/gsp/hal.rs
@@ -0,0 +1,94 @@
+// SPDX-License-Identifier: GPL-2.0
+// SPDX-FileCopyrightText: Copyright (c) 2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+
+mod gh100;
+mod tu102;
+
+use kernel::prelude::*;
+
+use kernel::{
+ device,
+ dma::Coherent, //
+};
+
+use crate::{
+ driver::Bar0,
+ falcon::{
+ gsp::Gsp as GspEngine,
+ sec2::Sec2,
+ Falcon, //
+ },
+ fb::FbLayout,
+ firmware::gsp::GspFirmware,
+ gpu::{
+ Architecture,
+ Chipset, //
+ },
+ gsp::{
+ boot::BootUnloadGuard,
+ Gsp,
+ GspFwWprMeta, //
+ },
+};
+
+/// Trait for types containing the resources and code required to fully reset the GSP.
+///
+/// The GSP unload code might run in a situation where we cannot load firmware dynamically (e.g.
+/// because we are in shutdown and the file system is not accessible anymore). Thus, the firmware
+/// required for unloading is prepared at load time, and stored here until it needs to be run.
+pub(super) trait UnloadBundle: Send {
+ /// Performs the steps required to properly reset the GSP after it has been stopped.
+ fn run(
+ &self,
+ dev: &device::Device<device::Bound>,
+ bar: Bar0<'_>,
+ gsp_falcon: &Falcon<GspEngine>,
+ sec2_falcon: &Falcon<Sec2>,
+ ) -> Result;
+}
+
+/// Trait implemented by GSP HALs.
+pub(super) trait GspHal: Send {
+ /// Performs the GSP boot process, loading and running the required firmwares as needed.
+ ///
+ /// Upon success, returns a guard that runs the GSP unload sequence if GSP boot does not
+ /// complete.
+ #[allow(clippy::too_many_arguments)]
+ fn boot<'a>(
+ &self,
+ gsp: &'a Gsp,
+ dev: &'a device::Device<device::Bound>,
+ bar: Bar0<'a>,
+ chipset: Chipset,
+ fb_layout: &FbLayout,
+ wpr_meta: &Coherent<GspFwWprMeta>,
+ gsp_falcon: &'a Falcon<GspEngine>,
+ sec2_falcon: &'a Falcon<Sec2>,
+ ) -> Result<BootUnloadGuard<'a>>;
+
+ /// Performs HAL-specific post-GSP boot tasks.
+ ///
+ /// This method is called by the GSP boot code after the GSP is confirmed to be running, and
+ /// after the initialization commands have been pushed onto its queue.
+ fn post_boot(
+ &self,
+ _gsp: &Gsp,
+ _dev: &device::Device<device::Bound>,
+ _bar: Bar0<'_>,
+ _gsp_fw: &GspFirmware,
+ _gsp_falcon: &Falcon<GspEngine>,
+ _sec2_falcon: &Falcon<Sec2>,
+ ) -> Result {
+ Ok(())
+ }
+}
+
+/// Returns the GSP HAL to be used for `chipset`.
+pub(super) fn gsp_hal(chipset: Chipset) -> &'static dyn GspHal {
+ match chipset.arch() {
+ Architecture::Turing | Architecture::Ampere | Architecture::Ada => tu102::TU102_HAL,
+ Architecture::Hopper | Architecture::BlackwellGB10x | Architecture::BlackwellGB20x => {
+ gh100::GH100_HAL
+ }
+ }
+}
diff --git a/drivers/gpu/nova-core/gsp/hal/gh100.rs b/drivers/gpu/nova-core/gsp/hal/gh100.rs
new file mode 100644
index 000000000000..98f5ce197d13
--- /dev/null
+++ b/drivers/gpu/nova-core/gsp/hal/gh100.rs
@@ -0,0 +1,192 @@
+// SPDX-License-Identifier: GPL-2.0
+// SPDX-FileCopyrightText: Copyright (c) 2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+
+use kernel::prelude::*;
+
+use kernel::{
+ device,
+ dma::Coherent,
+ io::poll::read_poll_timeout,
+ time::Delta, //
+};
+
+use crate::{
+ driver::Bar0,
+ falcon::{
+ gsp::Gsp as GspEngine,
+ sec2::Sec2,
+ Falcon, //
+ },
+ fb::FbLayout,
+ firmware::{
+ fsp::FspFirmware,
+ FIRMWARE_VERSION, //
+ },
+ fsp::{
+ FmcBootArgs,
+ Fsp, //
+ },
+ gpu::Chipset,
+ gsp::{
+ boot::BootUnloadGuard,
+ hal::{
+ GspHal,
+ UnloadBundle, //
+ },
+ Gsp,
+ GspFwWprMeta, //
+ },
+};
+
+/// GSP falcon mailbox state, used to track lockdown release status.
+struct GspMbox {
+ mbox0: u32,
+ mbox1: u32,
+}
+
+impl GspMbox {
+ /// Reads both mailboxes from the GSP falcon.
+ fn read(gsp_falcon: &Falcon<GspEngine>, bar: Bar0<'_>) -> Self {
+ Self {
+ mbox0: gsp_falcon.read_mailbox0(bar),
+ mbox1: gsp_falcon.read_mailbox1(bar),
+ }
+ }
+
+ /// Combines mailbox0 and mailbox1 into a 64-bit address.
+ fn combined_addr(&self) -> u64 {
+ (u64::from(self.mbox1) << 32) | u64::from(self.mbox0)
+ }
+
+ /// Returns `true` if GSP lockdown has been released or a GSP-FMC error happened.
+ ///
+ /// Returns `true` both on successful lockdown release and on GSP-FMC-reported errors, since
+ /// either condition should stop the poll loop.
+ fn lockdown_released_or_error(
+ &self,
+ gsp_falcon: &Falcon<GspEngine>,
+ bar: Bar0<'_>,
+ fmc_boot_params_addr: u64,
+ ) -> bool {
+ // GSP-FMC normally clears the boot parameters address from the mailboxes early during
+ // boot. If the address is still there, keep polling rather than treating it as an error.
+ // Any other non-zero mailbox0 value is a GSP-FMC error code.
+ if self.mbox0 != 0 {
+ return self.combined_addr() != fmc_boot_params_addr;
+ }
+
+ !gsp_falcon.riscv_branch_privilege_lockdown(bar)
+ }
+}
+
+/// Waits for GSP lockdown to be released after FSP Chain of Trust.
+fn wait_for_gsp_lockdown_release(
+ dev: &device::Device<device::Bound>,
+ bar: Bar0<'_>,
+ gsp_falcon: &Falcon<GspEngine>,
+ fmc_boot_params_addr: u64,
+) -> Result {
+ dev_dbg!(dev, "Waiting for GSP lockdown release\n");
+
+ let mbox = read_poll_timeout(
+ || {
+ // While the PRIV target mask is still locked to FSP, GSP register and mailbox reads
+ // are not meaningful. Wait until HWCFG2 says the CPU can read them.
+ Ok(match gsp_falcon.priv_target_mask_released(bar) {
+ false => None,
+ true => Some(GspMbox::read(gsp_falcon, bar)),
+ })
+ },
+ |mbox| match mbox {
+ None => false,
+ Some(mbox) => mbox.lockdown_released_or_error(gsp_falcon, bar, fmc_boot_params_addr),
+ },
+ Delta::from_millis(10),
+ Delta::from_secs(30),
+ )
+ .inspect_err(|_| {
+ dev_err!(dev, "GSP lockdown release timeout\n");
+ })?
+ .ok_or(EIO)?;
+
+ // If polling stopped with a non-zero mailbox0, it was not the boot parameters address
+ // anymore and therefore represents a GSP-FMC error code.
+ if mbox.mbox0 != 0 {
+ dev_err!(dev, "GSP-FMC boot failed (mbox: {:#x})\n", mbox.mbox0);
+ return Err(EIO);
+ }
+
+ dev_dbg!(dev, "GSP lockdown released\n");
+ Ok(())
+}
+
+struct FspUnloadBundle;
+
+impl UnloadBundle for FspUnloadBundle {
+ fn run(
+ &self,
+ dev: &device::Device<device::Bound>,
+ bar: Bar0<'_>,
+ gsp_falcon: &Falcon<GspEngine>,
+ _sec2_falcon: &Falcon<Sec2>,
+ ) -> Result {
+ // GSP falcon does most of the work of resetting, so just wait for it to finish.
+ read_poll_timeout(
+ || Ok(gsp_falcon.is_riscv_active(bar)),
+ |&active| !active,
+ Delta::from_millis(10),
+ Delta::from_secs(5),
+ )
+ .map(|_| ())
+ .inspect_err(|_| dev_err!(dev, "GSP falcon failed to halt\n"))
+ }
+}
+
+struct Gh100;
+
+impl GspHal for Gh100 {
+ /// Boot GSP via FSP Chain of Trust (Hopper/Blackwell+ path).
+ ///
+ /// This path uses FSP to establish a chain of trust and boot GSP-FMC. FSP handles
+ /// the GSP boot internally - no manual GSP reset/boot is needed.
+ fn boot<'a>(
+ &self,
+ gsp: &'a Gsp,
+ dev: &'a device::Device<device::Bound>,
+ bar: Bar0<'a>,
+ chipset: Chipset,
+ fb_layout: &FbLayout,
+ wpr_meta: &Coherent<GspFwWprMeta>,
+ gsp_falcon: &'a Falcon<GspEngine>,
+ sec2_falcon: &'a Falcon<Sec2>,
+ ) -> Result<BootUnloadGuard<'a>> {
+ let fsp_fw = FspFirmware::new(dev, chipset, FIRMWARE_VERSION)?;
+
+ let unload_bundle = crate::gsp::UnloadBundle(
+ KBox::new(FspUnloadBundle, GFP_KERNEL)? as KBox<dyn UnloadBundle>
+ );
+
+ // Wrap the unload bundle into a drop guard so it is automatically run upon failure.
+ let unload_guard =
+ BootUnloadGuard::new(gsp, dev, bar, gsp_falcon, sec2_falcon, Some(unload_bundle));
+
+ let mut fsp = Fsp::wait_secure_boot(dev, bar, chipset, fsp_fw)?;
+
+ let args = FmcBootArgs::new(
+ dev,
+ chipset,
+ wpr_meta.dma_handle(),
+ gsp.libos.dma_handle(),
+ false,
+ )?;
+
+ fsp.boot_fmc(dev, bar, fb_layout, &args)?;
+
+ wait_for_gsp_lockdown_release(dev, bar, gsp_falcon, args.boot_params_dma_handle())?;
+
+ Ok(unload_guard)
+ }
+}
+
+const GH100: Gh100 = Gh100;
+pub(super) const GH100_HAL: &dyn GspHal = &GH100;
diff --git a/drivers/gpu/nova-core/gsp/hal/tu102.rs b/drivers/gpu/nova-core/gsp/hal/tu102.rs
new file mode 100644
index 000000000000..2f6301af7113
--- /dev/null
+++ b/drivers/gpu/nova-core/gsp/hal/tu102.rs
@@ -0,0 +1,349 @@
+// SPDX-License-Identifier: GPL-2.0
+// SPDX-FileCopyrightText: Copyright (c) 2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+
+use kernel::prelude::*;
+
+use kernel::{
+ device,
+ dma::Coherent,
+ io::Io, //
+};
+
+use crate::{
+ driver::Bar0,
+ falcon::{
+ gsp::Gsp as GspEngine,
+ sec2::Sec2,
+ Falcon, //
+ },
+ fb::FbLayout,
+ firmware::{
+ booter::{
+ BooterFirmware,
+ BooterKind, //
+ },
+ fwsec::{
+ bootloader::FwsecFirmwareWithBl,
+ FwsecCommand,
+ FwsecFirmware, //
+ },
+ gsp::GspFirmware,
+ FIRMWARE_VERSION, //
+ },
+ gpu::Chipset,
+ gsp::{
+ boot::BootUnloadGuard,
+ hal::{
+ GspHal,
+ UnloadBundle, //
+ },
+ sequencer::{
+ GspSequencer,
+ GspSequencerParams, //
+ },
+ Gsp,
+ GspFwWprMeta, //
+ },
+ regs,
+ vbios::Vbios, //
+};
+
+// A ready-to-run FWSEC unload firmware.
+//
+// Since there are two variants of the prepared firmware (with and without a bootloader), this type
+// abstracts the difference.
+enum FwsecUnloadFirmware {
+ WithoutBl(FwsecFirmware),
+ WithBl(FwsecFirmwareWithBl),
+}
+
+impl FwsecUnloadFirmware {
+ /// Loads the FWSEC SB firmware, as well as its bootloader if `chipset` requires it.
+ fn new(
+ dev: &device::Device<device::Bound>,
+ bar: Bar0<'_>,
+ chipset: Chipset,
+ bios: &Vbios,
+ gsp_falcon: &Falcon<GspEngine>,
+ ) -> Result<Self> {
+ let fwsec_sb = FwsecFirmware::new(dev, gsp_falcon, bar, bios, FwsecCommand::Sb)?;
+
+ Ok(if chipset.needs_fwsec_bootloader() {
+ Self::WithBl(FwsecFirmwareWithBl::new(fwsec_sb, dev, chipset)?)
+ } else {
+ Self::WithoutBl(fwsec_sb)
+ })
+ }
+
+ /// Runs the FWSEC SB firmware.
+ fn run(
+ &self,
+ dev: &device::Device<device::Bound>,
+ bar: Bar0<'_>,
+ gsp_falcon: &Falcon<GspEngine>,
+ ) -> Result {
+ match self {
+ Self::WithoutBl(fw) => fw.run(dev, gsp_falcon, bar),
+ Self::WithBl(fw) => fw.run(dev, gsp_falcon, bar),
+ }
+ }
+}
+
+// Contains the firmware required to fully reset GSP on chipsets where the GSP is started using
+// FWSEC/Booter.
+struct Sec2UnloadBundle {
+ fwsec_sb: FwsecUnloadFirmware,
+ booter_unloader: BooterFirmware,
+}
+
+impl Sec2UnloadBundle {
+ /// Load and prepare the resources required to properly reset the GSP after it has been stopped.
+ fn build(
+ dev: &device::Device<device::Bound>,
+ bar: Bar0<'_>,
+ chipset: Chipset,
+ bios: &Vbios,
+ gsp_falcon: &Falcon<GspEngine>,
+ sec2_falcon: &Falcon<Sec2>,
+ ) -> Result<KBox<dyn UnloadBundle>> {
+ KBox::new(
+ Self {
+ fwsec_sb: FwsecUnloadFirmware::new(dev, bar, chipset, bios, gsp_falcon)?,
+ booter_unloader: BooterFirmware::new(
+ dev,
+ BooterKind::Unloader,
+ chipset,
+ FIRMWARE_VERSION,
+ sec2_falcon,
+ bar,
+ )?,
+ },
+ GFP_KERNEL,
+ )
+ .map(|b| b as KBox<dyn UnloadBundle>)
+ .map_err(Into::into)
+ }
+}
+
+impl UnloadBundle for Sec2UnloadBundle {
+ fn run(
+ &self,
+ dev: &device::Device<device::Bound>,
+ bar: Bar0<'_>,
+ gsp_falcon: &Falcon<GspEngine>,
+ sec2_falcon: &Falcon<Sec2>,
+ ) -> Result {
+ // Run FWSEC-SB to reset the GSP falcon to its pre-libos state.
+ self.fwsec_sb.run(dev, bar, gsp_falcon)?;
+
+ // Remove WPR2 region if set.
+ let wpr2_hi = bar.read(regs::NV_PFB_PRI_MMU_WPR2_ADDR_HI);
+ if wpr2_hi.is_wpr2_set() {
+ sec2_falcon.reset(bar)?;
+ sec2_falcon.load(dev, bar, &self.booter_unloader)?;
+
+ // Sentinel value to confirm that Booter Unloader has run.
+ const MAILBOX_SENTINEL: u32 = 0xff;
+ let (mbox0, _) =
+ sec2_falcon.boot(bar, Some(MAILBOX_SENTINEL), Some(MAILBOX_SENTINEL))?;
+ if mbox0 != 0 {
+ dev_err!(dev, "Booter Unloader returned error 0x{:x}\n", mbox0);
+ return Err(EINVAL);
+ }
+
+ // Confirm that the WPR2 region has been removed.
+ let wpr2_hi = bar.read(regs::NV_PFB_PRI_MMU_WPR2_ADDR_HI);
+ if wpr2_hi.is_wpr2_set() {
+ dev_err!(
+ dev,
+ "WPR2 region still set after Booter Unloader returned\n"
+ );
+ return Err(EBUSY);
+ }
+ }
+
+ Ok(())
+ }
+}
+
+/// Helper function to load and run the FWSEC-FRTS firmware and confirm that it has properly
+/// created the WPR2 region.
+fn run_fwsec_frts(
+ dev: &device::Device<device::Bound>,
+ chipset: Chipset,
+ falcon: &Falcon<GspEngine>,
+ bar: Bar0<'_>,
+ bios: &Vbios,
+ fb_layout: &FbLayout,
+) -> Result {
+ // Check that the WPR2 region does not already exist - if it does, we cannot run
+ // FWSEC-FRTS until the GPU is reset.
+ if bar.read(regs::NV_PFB_PRI_MMU_WPR2_ADDR_HI).higher_bound() != 0 {
+ dev_err!(
+ dev,
+ "WPR2 region already exists - GPU needs to be reset to proceed\n"
+ );
+ return Err(EBUSY);
+ }
+
+ // FWSEC-FRTS will create the WPR2 region.
+ let fwsec_frts = FwsecFirmware::new(
+ dev,
+ falcon,
+ bar,
+ bios,
+ FwsecCommand::Frts {
+ frts_addr: fb_layout.frts.start,
+ frts_size: fb_layout.frts.len(),
+ },
+ )?;
+
+ if chipset.needs_fwsec_bootloader() {
+ let fwsec_frts_bl = FwsecFirmwareWithBl::new(fwsec_frts, dev, chipset)?;
+ // Load and run the bootloader, which will load FWSEC-FRTS and run it.
+ fwsec_frts_bl.run(dev, falcon, bar)?;
+ } else {
+ // Load and run FWSEC-FRTS directly.
+ fwsec_frts.run(dev, falcon, bar)?;
+ }
+
+ // SCRATCH_E contains the error code for FWSEC-FRTS.
+ let frts_status = bar
+ .read(regs::NV_PBUS_SW_SCRATCH_0E_FRTS_ERR)
+ .frts_err_code();
+ if frts_status != 0 {
+ dev_err!(
+ dev,
+ "FWSEC-FRTS returned with error code {:#x}\n",
+ frts_status
+ );
+
+ return Err(EIO);
+ }
+
+ // Check that the WPR2 region has been created as we requested.
+ let (wpr2_lo, wpr2_hi) = (
+ bar.read(regs::NV_PFB_PRI_MMU_WPR2_ADDR_LO).lower_bound(),
+ bar.read(regs::NV_PFB_PRI_MMU_WPR2_ADDR_HI).higher_bound(),
+ );
+
+ match (wpr2_lo, wpr2_hi) {
+ (_, 0) => {
+ dev_err!(dev, "WPR2 region not created after running FWSEC-FRTS\n");
+
+ Err(EIO)
+ }
+ (wpr2_lo, _) if wpr2_lo != fb_layout.frts.start => {
+ dev_err!(
+ dev,
+ "WPR2 region created at unexpected address {:#x}; expected {:#x}\n",
+ wpr2_lo,
+ fb_layout.frts.start,
+ );
+
+ Err(EIO)
+ }
+ (wpr2_lo, wpr2_hi) => {
+ dev_dbg!(dev, "WPR2: {:#x}-{:#x}\n", wpr2_lo, wpr2_hi);
+ dev_dbg!(dev, "GPU instance built\n");
+
+ Ok(())
+ }
+ }
+}
+
+struct Tu102;
+
+impl GspHal for Tu102 {
+ fn boot<'a>(
+ &self,
+ gsp: &'a Gsp,
+ dev: &'a device::Device<device::Bound>,
+ bar: Bar0<'a>,
+ chipset: Chipset,
+ fb_layout: &FbLayout,
+ wpr_meta: &Coherent<GspFwWprMeta>,
+ gsp_falcon: &'a Falcon<GspEngine>,
+ sec2_falcon: &'a Falcon<Sec2>,
+ ) -> Result<BootUnloadGuard<'a>> {
+ let bios = Vbios::new(dev, bar)?;
+
+ // Try and prepare the unload bundle.
+ //
+ // If the unload bundle creation fails, the GPU will need to be reset before the driver can
+ // be probed again.
+ let unload_bundle =
+ Sec2UnloadBundle::build(dev, bar, chipset, &bios, gsp_falcon, sec2_falcon)
+ .inspect_err(|e| {
+ dev_warn!(dev, "Failed to prepare unload firmware: {:?}\n", e);
+ dev_warn!(dev, "The GSP won't be able to unload properly on unbind.\n");
+ dev_warn!(
+ dev,
+ "The GPU will need to be reset before the driver can bind again.\n"
+ );
+ })
+ .ok()
+ .map(crate::gsp::UnloadBundle);
+
+ // Wrap the unload bundle into a drop guard so it is automatically run upon failure.
+ let unload_guard =
+ BootUnloadGuard::new(gsp, dev, bar, gsp_falcon, sec2_falcon, unload_bundle);
+
+ // FWSEC-FRTS is not executed on chips where the FRTS region size is 0 (e.g. GA100).
+ if !fb_layout.frts.is_empty() {
+ run_fwsec_frts(dev, chipset, gsp_falcon, bar, &bios, fb_layout)?;
+ }
+
+ gsp_falcon.reset(bar)?;
+ let libos_handle = gsp.libos.dma_handle();
+ let (mbox0, mbox1) = gsp_falcon.boot(
+ bar,
+ Some(libos_handle as u32),
+ Some((libos_handle >> 32) as u32),
+ )?;
+ dev_dbg!(dev, "GSP MBOX0: {:#x}, MBOX1: {:#x}\n", mbox0, mbox1);
+
+ dev_dbg!(
+ dev,
+ "Using SEC2 to load and run the booter_load firmware...\n"
+ );
+
+ BooterFirmware::new(
+ dev,
+ BooterKind::Loader,
+ chipset,
+ FIRMWARE_VERSION,
+ sec2_falcon,
+ bar,
+ )?
+ .run(dev, bar, sec2_falcon, wpr_meta)?;
+
+ Ok(unload_guard)
+ }
+
+ fn post_boot(
+ &self,
+ gsp: &Gsp,
+ dev: &device::Device<device::Bound>,
+ bar: Bar0<'_>,
+ gsp_fw: &GspFirmware,
+ gsp_falcon: &Falcon<GspEngine>,
+ sec2_falcon: &Falcon<Sec2>,
+ ) -> Result {
+ // Create and run the GSP sequencer.
+ let seq_params = GspSequencerParams {
+ bootloader_app_version: gsp_fw.bootloader.app_version,
+ libos_dma_handle: gsp.libos.dma_handle(),
+ gsp_falcon,
+ sec2_falcon,
+ dev,
+ bar,
+ };
+ GspSequencer::run(&gsp.cmdq, seq_params)?;
+
+ Ok(())
+ }
+}
+
+const TU102: Tu102 = Tu102;
+pub(super) const TU102_HAL: &dyn GspHal = &TU102;
diff --git a/drivers/gpu/nova-core/gsp/sequencer.rs b/drivers/gpu/nova-core/gsp/sequencer.rs
index 474e4c8021db..e0850d21adca 100644
--- a/drivers/gpu/nova-core/gsp/sequencer.rs
+++ b/drivers/gpu/nova-core/gsp/sequencer.rs
@@ -11,7 +11,6 @@ use kernel::{
Io, //
},
prelude::*,
- sync::aref::ARef,
time::{
delay::fsleep,
Delta, //
@@ -132,7 +131,7 @@ pub(crate) struct GspSequencer<'a> {
/// Sequencer information with command data.
seq_info: GspSequence,
/// `Bar0` for register access.
- bar: &'a Bar0,
+ bar: Bar0<'a>,
/// SEC2 falcon for core operations.
sec2_falcon: &'a Falcon<Sec2>,
/// GSP falcon for core operations.
@@ -142,7 +141,7 @@ pub(crate) struct GspSequencer<'a> {
/// Bootloader application version.
bootloader_app_version: u32,
/// Device for logging.
- dev: ARef<device::Device>,
+ dev: &'a device::Device,
}
impl fw::RegWritePayload {
@@ -281,7 +280,7 @@ pub(crate) struct GspSeqIter<'a> {
/// Number of commands processed so far.
cmds_processed: u32,
/// Device for logging.
- dev: ARef<device::Device>,
+ dev: &'a device::Device,
}
impl<'a> Iterator for GspSeqIter<'a> {
@@ -309,7 +308,7 @@ impl<'a> Iterator for GspSeqIter<'a> {
self.cmd_data.len() - offset
};
buffer[..copy_len].copy_from_slice(&self.cmd_data[offset..offset + copy_len]);
- let cmd_result = GspSeqCmd::new(&buffer, &self.dev);
+ let cmd_result = GspSeqCmd::new(&buffer, self.dev);
cmd_result.map_or_else(
|_err| {
@@ -334,7 +333,7 @@ impl<'a> GspSequencer<'a> {
current_offset: 0,
total_cmds: self.seq_info.cmd_index,
cmds_processed: 0,
- dev: self.dev.clone(),
+ dev: self.dev,
}
}
}
@@ -350,9 +349,9 @@ pub(crate) struct GspSequencerParams<'a> {
/// SEC2 falcon for core operations.
pub(crate) sec2_falcon: &'a Falcon<Sec2>,
/// Device for logging.
- pub(crate) dev: ARef<device::Device>,
+ pub(crate) dev: &'a device::Device,
/// BAR0 for register access.
- pub(crate) bar: &'a Bar0,
+ pub(crate) bar: Bar0<'a>,
}
impl<'a> GspSequencer<'a> {
diff --git a/drivers/gpu/nova-core/mctp.rs b/drivers/gpu/nova-core/mctp.rs
new file mode 100644
index 000000000000..482786e07bc7
--- /dev/null
+++ b/drivers/gpu/nova-core/mctp.rs
@@ -0,0 +1,88 @@
+// SPDX-License-Identifier: GPL-2.0
+// SPDX-FileCopyrightText: Copyright (c) 2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+
+//! MCTP/NVDM protocol types for NVIDIA GPU firmware communication.
+//!
+//! MCTP (Management Component Transport Protocol) carries NVDM (NVIDIA
+//! Data Model) messages between the kernel driver and GPU firmware processors
+//! such as FSP and GSP.
+
+use kernel::pci::Vendor;
+
+/// NVDM message type identifiers carried over MCTP.
+#[derive(Debug, Clone, Copy, Default, PartialEq, Eq)]
+#[repr(u8)]
+pub(crate) enum NvdmType {
+ #[default]
+ /// Chain of Trust boot message.
+ Cot = 0x14,
+ /// FSP command response.
+ FspResponse = 0x15,
+}
+
+impl TryFrom<u8> for NvdmType {
+ type Error = u8;
+
+ fn try_from(value: u8) -> Result<Self, Self::Error> {
+ match value {
+ x if x == u8::from(Self::Cot) => Ok(Self::Cot),
+ x if x == u8::from(Self::FspResponse) => Ok(Self::FspResponse),
+ _ => Err(value),
+ }
+ }
+}
+
+impl From<NvdmType> for u8 {
+ fn from(value: NvdmType) -> Self {
+ value as u8
+ }
+}
+
+bitfield! {
+ pub(crate) struct MctpHeader(u32), "MCTP transport header for NVIDIA firmware messages." {
+ 31:31 som as bool, "Start-of-message bit.";
+ 30:30 eom as bool, "End-of-message bit.";
+ 29:28 seq as u8, "Packet sequence number.";
+ 23:16 seid as u8, "Source endpoint ID.";
+ }
+}
+
+impl MctpHeader {
+ /// Builds a single-packet MCTP header (`SOM=1`, `EOM=1`, `SEQ=0`, `SEID=0`).
+ pub(crate) fn single_packet() -> Self {
+ Self::default().set_som(true).set_eom(true)
+ }
+
+ /// Returns whether this is a complete single-packet message (`SOM=1` and `EOM=1`).
+ pub(crate) fn is_single_packet(self) -> bool {
+ self.som() && self.eom()
+ }
+}
+
+/// MCTP message type for PCI vendor-defined messages.
+const MSG_TYPE_VENDOR_PCI: u8 = 0x7e;
+
+bitfield! {
+ pub(crate) struct NvdmHeader(u32), "NVIDIA Vendor-Defined Message header over MCTP." {
+ 31:24 nvdm_type as u8 ?=> NvdmType, "NVDM message type.";
+ 23:8 vendor_id as u16, "PCI vendor ID.";
+ 6:0 msg_type as u8, "MCTP vendor-defined message type.";
+ }
+}
+
+impl NvdmHeader {
+ /// Builds an NVDM header for the given message type.
+ pub(crate) fn new(nvdm_type: NvdmType) -> Self {
+ Self::default()
+ .set_msg_type(MSG_TYPE_VENDOR_PCI)
+ .set_vendor_id(Vendor::NVIDIA.as_raw())
+ .set_nvdm_type(nvdm_type)
+ }
+
+ /// Validates this header against the expected NVIDIA NVDM format and type.
+ pub(crate) fn validate(self, expected_type: NvdmType) -> bool {
+ self.msg_type() == MSG_TYPE_VENDOR_PCI
+ && self.vendor_id() == Vendor::NVIDIA.as_raw()
+ && matches!(self.nvdm_type(), Ok(nvdm_type) if nvdm_type == expected_type)
+ }
+}
diff --git a/drivers/gpu/nova-core/nova_core.rs b/drivers/gpu/nova-core/nova_core.rs
index 04a1fa6b25f8..9f0199f7b38c 100644
--- a/drivers/gpu/nova-core/nova_core.rs
+++ b/drivers/gpu/nova-core/nova_core.rs
@@ -17,9 +17,10 @@ mod driver;
mod falcon;
mod fb;
mod firmware;
-mod gfw;
+mod fsp;
mod gpu;
mod gsp;
+mod mctp;
#[macro_use]
mod num;
mod regs;
@@ -47,13 +48,13 @@ struct NovaCoreModule {
// Fields are dropped in declaration order, so `_driver` is dropped first,
// then `_debugfs_guard` clears `DEBUGFS_ROOT`.
#[pin]
- _driver: Registration<pci::Adapter<driver::NovaCore>>,
+ _driver: Registration<pci::Adapter<driver::NovaCoreDriver>>,
_debugfs_guard: DebugfsRootGuard,
}
impl InPlaceModule for NovaCoreModule {
fn init(module: &'static kernel::ThisModule) -> impl PinInit<Self, Error> {
- let dir = debugfs::Dir::new(kernel::c_str!("nova_core"));
+ let dir = debugfs::Dir::new(kernel::c_str!("nova-core"));
// SAFETY: We are the only driver code running during init, so there
// cannot be any concurrent access to `DEBUGFS_ROOT`.
@@ -68,7 +69,7 @@ impl InPlaceModule for NovaCoreModule {
module! {
type: NovaCoreModule,
- name: "NovaCore",
+ name: "nova-core",
authors: ["Danilo Krummrich"],
description: "Nova Core GPU driver",
license: "GPL v2",
diff --git a/drivers/gpu/nova-core/regs.rs b/drivers/gpu/nova-core/regs.rs
index 2f171a4ff9ba..0f49c1ab83ad 100644
--- a/drivers/gpu/nova-core/regs.rs
+++ b/drivers/gpu/nova-core/regs.rs
@@ -1,4 +1,5 @@
// SPDX-License-Identifier: GPL-2.0
+// SPDX-FileCopyrightText: Copyright (c) 2025-2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
use kernel::{
io::{
@@ -7,6 +8,7 @@ use kernel::{
Io, //
},
prelude::*,
+ sizes::SizeConstants,
time, //
};
@@ -30,7 +32,6 @@ use crate::{
Architecture,
Chipset, //
},
- num::FromSafeCast,
};
// PMC
@@ -147,11 +148,54 @@ register! {
}
}
+/// Base of the GB10x HSHUB0 register window (`NV_HSHUB0_PRIV_BASE` in Open RM).
+///
+/// The base is provided by the GB10x framebuffer HAL.
+pub(crate) struct Hshub0Base(());
+
+/// Base of the GB20x FBHUB0 register window (`NV_FBHUB0_PRI_BASE` in Open RM).
+///
+/// The base is provided by the GB20x framebuffer HAL.
+pub(crate) struct Fbhub0Base(());
+
+register! {
+ // GB10x sysmem flush registers, relative to the HSHUB0 base. GB10x routes sysmembar
+ // through a primary and an EG (egress) pair that must both be programmed to the same
+ // address. Hardware ignores bits 7:0 of each LO register. The boot path uses a fixed
+ // HSHUB0 base, so the multiple runtime-discovered HSHUB bases are not needed here.
+ pub(crate) NV_PFB_HSHUB_PCIE_FLUSH_SYSMEM_ADDR_LO(u32) @ Hshub0Base + 0x00000e50 {
+ 31:0 adr => u32;
+ }
+
+ pub(crate) NV_PFB_HSHUB_PCIE_FLUSH_SYSMEM_ADDR_HI(u32) @ Hshub0Base + 0x00000e54 {
+ 19:0 adr;
+ }
+
+ pub(crate) NV_PFB_HSHUB_EG_PCIE_FLUSH_SYSMEM_ADDR_LO(u32) @ Hshub0Base + 0x000006c0 {
+ 31:0 adr => u32;
+ }
+
+ pub(crate) NV_PFB_HSHUB_EG_PCIE_FLUSH_SYSMEM_ADDR_HI(u32) @ Hshub0Base + 0x000006c4 {
+ 19:0 adr;
+ }
+
+ // GB20x sysmem flush registers, relative to the FBHUB0 base. Unlike the older
+ // NV_PFB_NISO_FLUSH_SYSMEM_ADDR registers which encode the address with an 8-bit
+ // right-shift, these take the raw address split into lower and upper halves. Hardware
+ // ignores bits 7:0 of the LO register.
+ pub(crate) NV_PFB_FBHUB_PCIE_FLUSH_SYSMEM_ADDR_LO(u32) @ Fbhub0Base + 0x00001d58 {
+ 31:0 adr => u32;
+ }
+
+ pub(crate) NV_PFB_FBHUB_PCIE_FLUSH_SYSMEM_ADDR_HI(u32) @ Fbhub0Base + 0x00001d5c {
+ 19:0 adr;
+ }
+}
+
impl NV_PFB_PRI_MMU_LOCAL_MEMORY_RANGE {
/// Returns the usable framebuffer size, in bytes.
pub(crate) fn usable_fb_size(self) -> u64 {
- let size = (u64::from(self.lower_mag()) << u64::from(self.lower_scale()))
- * u64::from_safe_cast(kernel::sizes::SZ_1M);
+ let size = (u64::from(self.lower_mag()) << u64::from(self.lower_scale())) * u64::SZ_1M;
if self.ecc_mode_enabled() {
// Remove the amount of memory reserved for ECC (one per 16 units).
@@ -176,6 +220,11 @@ impl NV_PFB_PRI_MMU_WPR2_ADDR_HI {
pub(crate) fn higher_bound(self) -> u64 {
u64::from(self.hi_val()) << 12
}
+
+ /// Returns whether the WPR2 region is currently set.
+ pub(crate) fn is_wpr2_set(self) -> bool {
+ self.hi_val() != 0
+ }
}
// PGSP
@@ -241,7 +290,7 @@ impl NV_PGC6_AON_SECURE_SCRATCH_GROUP_05_0_GFW_BOOT {
impl NV_USABLE_FB_SIZE_IN_MB {
/// Returns the usable framebuffer size, in bytes.
pub(crate) fn usable_fb_size(self) -> u64 {
- u64::from(self.value()) * u64::from_safe_cast(kernel::sizes::SZ_1M)
+ u64::from(self.value()) * u64::SZ_1M
}
}
@@ -314,6 +363,8 @@ register! {
pub(crate) NV_PFALCON_FALCON_HWCFG2(u32) @ PFalconBase + 0x000000f4 {
/// Signal indicating that reset is completed (GA102+).
31:31 reset_ready => bool;
+ /// RISC-V branch privilege lockdown bit.
+ 13:13 riscv_br_priv_lockdown => bool;
/// Set to 0 after memory scrubbing is completed.
12:12 mem_scrubbing => bool;
10:10 riscv => bool;
@@ -426,6 +477,24 @@ register! {
pub(crate) NV_PFALCON_FBIF_CTL(u32) @ PFalconBase + 0x00000624 {
7:7 allow_phys_no_ctx => bool;
}
+
+ // Falcon EMEM PIO registers (used by FSP on Hopper/Blackwell).
+ // These provide the falcon external memory communication interface.
+
+ pub(crate) NV_PFALCON_FALCON_EMEMC(u32) @ PFalconBase + 0x00000ac0 {
+ /// EMEM byte offset (4-byte aligned) within the block.
+ 7:2 offs;
+ /// EMEM block to access.
+ 15:8 blk;
+ /// Auto-increment the offset after each write.
+ 24:24 aincw => bool;
+ /// Auto-increment the offset after each read.
+ 25:25 aincr => bool;
+ }
+
+ pub(crate) NV_PFALCON_FALCON_EMEMD(u32) @ PFalconBase + 0x00000ac4 {
+ 31:0 data => u32;
+ }
}
impl NV_PFALCON_FALCON_DMACTL {
@@ -449,7 +518,7 @@ impl NV_PFALCON_FALCON_DMATRFCMD {
impl NV_PFALCON_FALCON_ENGINE {
/// Resets the falcon
- pub(crate) fn reset_engine<E: FalconEngine>(bar: &Bar0) {
+ pub(crate) fn reset_engine<E: FalconEngine>(bar: Bar0<'_>) {
bar.update(Self::of::<E>(), |r| r.with_reset(true));
// TIMEOUT: falcon engine should not take more than 10us to reset.
@@ -512,6 +581,27 @@ register! {
}
}
+// FSP (Foundation Security Processor) queue registers for Hopper/Blackwell Chain of Trust.
+// These registers manage falcon EMEM communication queues.
+
+register! {
+ pub(crate) NV_PFSP_QUEUE_HEAD(u32)[8] @ 0x008f2c00 {
+ 31:0 address => u32;
+ }
+
+ pub(crate) NV_PFSP_QUEUE_TAIL(u32)[8] @ 0x008f2c04 {
+ 31:0 address => u32;
+ }
+
+ pub(crate) NV_PFSP_MSGQ_HEAD(u32)[8] @ 0x008f2c80 {
+ 31:0 val => u32;
+ }
+
+ pub(crate) NV_PFSP_MSGQ_TAIL(u32)[8] @ 0x008f2c84 {
+ 31:0 val => u32;
+ }
+}
+
// The modules below provide registers that are not identical on all supported chips. They should
// only be used in HAL modules.
@@ -538,3 +628,39 @@ pub(crate) mod ga100 {
}
}
}
+
+pub(crate) const NV_THERM_I2CS_SCRATCH_FSP_BOOT_COMPLETE_STATUS_SUCCESS: u32 = 0xff;
+
+pub(crate) mod gh100 {
+ use kernel::io::register;
+
+ // PTHERM
+
+ register! {
+ pub(crate) NV_THERM_I2CS_SCRATCH(u32) @ 0x000200bc {
+ 31:0 data;
+ }
+
+ // Alias to `NV_THERM_I2CS_SCRATCH` when used to check for FSP boot completion.
+ pub(crate) NV_THERM_I2CS_SCRATCH_FSP_BOOT_COMPLETE(u32) => NV_THERM_I2CS_SCRATCH {
+ 31:0 fsp_boot_complete;
+ }
+ }
+}
+
+pub(crate) mod gb202 {
+ use kernel::io::register;
+
+ // PTHERM
+
+ register! {
+ pub(crate) NV_THERM_I2CS_SCRATCH(u32) @ 0x00ad00bc {
+ 31:0 data;
+ }
+
+ // Alias to `NV_THERM_I2CS_SCRATCH` when used to check for FSP boot completion.
+ pub(crate) NV_THERM_I2CS_SCRATCH_FSP_BOOT_COMPLETE(u32) => NV_THERM_I2CS_SCRATCH {
+ 31:0 fsp_boot_complete;
+ }
+ }
+}
diff --git a/drivers/gpu/nova-core/vbios.rs b/drivers/gpu/nova-core/vbios.rs
index ebda28e596c5..fd168c5da78c 100644
--- a/drivers/gpu/nova-core/vbios.rs
+++ b/drivers/gpu/nova-core/vbios.rs
@@ -2,8 +2,6 @@
//! VBIOS extraction and parsing.
-use core::convert::TryFrom;
-
use kernel::{
device,
io::Io,
@@ -12,6 +10,8 @@ use kernel::{
Alignable,
Alignment, //
},
+ register,
+ sizes::SZ_4K,
sync::aref::ARef,
transmute::FromBytes,
};
@@ -27,16 +27,6 @@ use crate::{
num::FromSafeCast,
};
-/// The offset of the VBIOS ROM in the BAR0 space.
-const ROM_OFFSET: usize = 0x300000;
-/// The maximum length of the VBIOS ROM to scan into.
-const BIOS_MAX_SCAN_LEN: usize = 0x100000;
-/// The size to read ahead when parsing initial BIOS image headers.
-const BIOS_READ_AHEAD_SIZE: usize = 1024;
-/// The bit in the last image indicator byte for the PCI Data Structure that
-/// indicates the last image. Bit 0-6 are reserved, bit 7 is last image bit.
-const LAST_IMAGE_BIT_MASK: u8 = 0x80;
-
/// BIOS Image Type from PCI Data Structure code_type field.
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
#[repr(u8)]
@@ -65,24 +55,16 @@ impl TryFrom<u8> for BiosImageType {
}
}
-// PMU lookup table entry types. Used to locate PMU table entries
-// in the Fwsec image, corresponding to falcon ucodes.
-#[expect(dead_code)]
-const FALCON_UCODE_ENTRY_APPID_FIRMWARE_SEC_LIC: u8 = 0x05;
-#[expect(dead_code)]
-const FALCON_UCODE_ENTRY_APPID_FWSEC_DBG: u8 = 0x45;
-const FALCON_UCODE_ENTRY_APPID_FWSEC_PROD: u8 = 0x85;
-
/// Vbios Reader for constructing the VBIOS data.
struct VbiosIterator<'a> {
dev: &'a device::Device,
- bar0: &'a Bar0,
+ bar0: Bar0<'a>,
/// VBIOS data vector: As BIOS images are scanned, they are added to this vector for reference
/// or copying into other data structures. It is the entire scanned contents of the VBIOS which
/// progressively extends. It is used so that we do not re-read any contents that are already
/// read as we use the cumulative length read so far, and re-read any gaps as we extend the
/// length.
- data: KVec<u8>,
+ data: KVVec<u8>,
/// Current offset of the [`Iterator`].
current_offset: usize,
/// Indicate whether the last image has been found.
@@ -90,20 +72,111 @@ struct VbiosIterator<'a> {
}
impl<'a> VbiosIterator<'a> {
- fn new(dev: &'a device::Device, bar0: &'a Bar0) -> Result<Self> {
+ /// The offset of the VBIOS ROM in the BAR0 space.
+ const ROM_OFFSET: usize = 0x300000;
+ /// The maximum length of the VBIOS ROM to scan into.
+ const BIOS_MAX_SCAN_LEN: usize = 0x100000;
+ /// The size to read ahead when parsing initial BIOS image headers.
+ const BIOS_READ_AHEAD_SIZE: usize = 1024;
+
+ /// Return the byte offset where the PCI Expansion ROM images begin in the GPU's ROM.
+ ///
+ /// The GPU's ROM may begin with an Init-from-ROM (IFR) header that precedes the PCI Expansion
+ /// ROM images (VBIOS). When present, the PROM shadow method must parse this header to determine
+ /// the offset where the PCI ROM images actually begin, and adjust all subsequent reads
+ /// accordingly.
+ ///
+ /// On most GPUs this is not needed because the IFR microcode has already applied the ROM offset
+ /// so that PROM reads transparently skip the header. On GA100, for some reason, the IFR offset
+ /// is not applied to PROM reads. Therefore, the search for the PCI expansion must skip the IFR
+ /// header, if found.
+ fn rom_offset(dev: &device::Device, bar0: Bar0<'_>) -> Result<usize> {
+ // IFR Header in VBIOS.
+ register! {
+ NV_PBUS_IFR_FMT_FIXED0(u32) @ 0x300000 {
+ 31:0 signature;
+ }
+ }
+
+ register! {
+ NV_PBUS_IFR_FMT_FIXED1(u32) @ 0x300004 {
+ 30:16 fixed_data_size;
+ 15:8 version => u8;
+ }
+ }
+
+ register! {
+ NV_PBUS_IFR_FMT_FIXED2(u32) @ 0x300008 {
+ 19:0 total_data_size;
+ }
+ }
+
+ /// IFR signature.
+ const NV_PBUS_IFR_FMT_FIXED0_SIGNATURE_VALUE: u32 = u32::from_le_bytes(*b"NVGI");
+ /// ROM directory signature.
+ const NV_ROM_DIRECTORY_IDENTIFIER: u32 = u32::from_le_bytes(*b"RFRD");
+ /// Offset of the NV_PMGR_ROM_ADDR_OFFSET register in IFR Extended section.
+ const IFR_SW_EXT_ROM_ADDR_OFFSET: usize = 4;
+ /// Size of Redundant Firmware Flash Status section.
+ const RFW_FLASH_STATUS_SIZE: usize = SZ_4K;
+ /// Offset in the ROM Directory of the PCI Option ROM offset.
+ const PCI_OPTION_ROM_OFFSET: usize = 8;
+
+ let signature = bar0.read(NV_PBUS_IFR_FMT_FIXED0).signature();
+
+ if signature == NV_PBUS_IFR_FMT_FIXED0_SIGNATURE_VALUE {
+ let fixed1 = bar0.read(NV_PBUS_IFR_FMT_FIXED1);
+
+ match fixed1.version() {
+ 1 | 2 => {
+ let fixed_data_size = usize::from(fixed1.fixed_data_size());
+ let pmgr_rom_addr_offset = fixed_data_size + IFR_SW_EXT_ROM_ADDR_OFFSET;
+ bar0.try_read32(Self::ROM_OFFSET + pmgr_rom_addr_offset)
+ .map(usize::from_safe_cast)
+ }
+ 3 => {
+ let fixed2 = bar0.read(NV_PBUS_IFR_FMT_FIXED2);
+ let total_data_size = usize::from(fixed2.total_data_size());
+ let flash_status_offset =
+ usize::from_safe_cast(bar0.try_read32(Self::ROM_OFFSET + total_data_size)?);
+ let dir_offset = flash_status_offset + RFW_FLASH_STATUS_SIZE;
+ let dir_sig = bar0.try_read32(Self::ROM_OFFSET + dir_offset)?;
+ if dir_sig != NV_ROM_DIRECTORY_IDENTIFIER {
+ dev_err!(dev, "could not find IFR ROM directory\n");
+ return Err(EINVAL);
+ }
+ bar0.try_read32(Self::ROM_OFFSET + dir_offset + PCI_OPTION_ROM_OFFSET)
+ .map(usize::from_safe_cast)
+ }
+ _ => {
+ dev_err!(dev, "unsupported IFR header version {}\n", fixed1.version());
+ Err(EINVAL)
+ }
+ }
+ } else {
+ Ok(0)
+ }
+ }
+
+ fn new(dev: &'a device::Device, bar0: Bar0<'a>) -> Result<Self> {
Ok(Self {
dev,
bar0,
- data: KVec::new(),
- current_offset: 0,
+ data: KVVec::new(),
+ current_offset: Self::rom_offset(dev, bar0)?,
last_found: false,
})
}
/// Read bytes from the ROM at the current end of the data vector.
fn read_more(&mut self, len: usize) -> Result {
- let current_len = self.data.len();
- let start = ROM_OFFSET + current_len;
+ let start = self.data.len();
+ let end = start + len;
+
+ if end > Self::BIOS_MAX_SCAN_LEN {
+ dev_err!(self.dev, "Error: exceeded BIOS scan limit.\n");
+ return Err(EINVAL);
+ }
// Ensure length is a multiple of 4 for 32-bit reads
if len % core::mem::size_of::<u32>() != 0 {
@@ -117,9 +190,9 @@ impl<'a> VbiosIterator<'a> {
self.data.reserve(len, GFP_KERNEL)?;
// Read ROM data bytes and push directly to `data`.
- for addr in (start..start + len).step_by(core::mem::size_of::<u32>()) {
+ for addr in (start..end).step_by(core::mem::size_of::<u32>()) {
// Read 32-bit word from the VBIOS ROM
- let word = self.bar0.try_read32(addr)?;
+ let word = self.bar0.try_read32(Self::ROM_OFFSET + addr)?;
// Convert the `u32` to a 4 byte array and push each byte.
word.to_ne_bytes()
@@ -132,17 +205,9 @@ impl<'a> VbiosIterator<'a> {
/// Read bytes at a specific offset, filling any gap.
fn read_more_at_offset(&mut self, offset: usize, len: usize) -> Result {
- if offset > BIOS_MAX_SCAN_LEN {
- dev_err!(self.dev, "Error: exceeded BIOS scan limit.\n");
- return Err(EINVAL);
- }
-
- // If `offset` is beyond current data size, fill the gap first.
- let current_len = self.data.len();
- let gap_bytes = offset.saturating_sub(current_len);
+ let end = offset.checked_add(len).ok_or(EINVAL)?;
- // Now read the requested bytes at the offset.
- self.read_more(gap_bytes + len)
+ self.read_more(end.saturating_sub(self.data.len()))
}
/// Read a BIOS image at a specific offset and create a [`BiosImage`] from it.
@@ -155,8 +220,8 @@ impl<'a> VbiosIterator<'a> {
len: usize,
context: &str,
) -> Result<BiosImage> {
- let data_len = self.data.len();
- if offset + len > data_len {
+ let end = offset.checked_add(len).ok_or(EINVAL)?;
+ if end > self.data.len() {
self.read_more_at_offset(offset, len).inspect_err(|e| {
dev_err!(
self.dev,
@@ -167,7 +232,7 @@ impl<'a> VbiosIterator<'a> {
})?;
}
- BiosImage::new(self.dev, &self.data[offset..offset + len]).inspect_err(|err| {
+ BiosImage::new(self.dev, &self.data[offset..end]).inspect_err(|err| {
dev_err!(
self.dev,
"Failed to {} at offset {:#x}: {:?}\n",
@@ -189,7 +254,7 @@ impl<'a> Iterator for VbiosIterator<'a> {
return None;
}
- if self.current_offset > BIOS_MAX_SCAN_LEN {
+ if self.current_offset >= Self::BIOS_MAX_SCAN_LEN {
dev_err!(self.dev, "Error: exceeded BIOS scan limit, stopping scan\n");
return None;
}
@@ -197,7 +262,7 @@ impl<'a> Iterator for VbiosIterator<'a> {
// Parse image headers first to get image size.
let image_size = match self.read_bios_image_at_offset(
self.current_offset,
- BIOS_READ_AHEAD_SIZE,
+ Self::BIOS_READ_AHEAD_SIZE,
"parse initial BIOS image headers",
) {
Ok(image) => image.image_size_bytes(),
@@ -232,11 +297,10 @@ impl Vbios {
/// Probe for VBIOS extraction.
///
/// Once the VBIOS object is built, `bar0` is not read for [`Vbios`] purposes anymore.
- pub(crate) fn new(dev: &device::Device, bar0: &Bar0) -> Result<Vbios> {
+ pub(crate) fn new(dev: &device::Device, bar0: Bar0<'_>) -> Result<Vbios> {
// Images to extract from iteration
let mut pci_at_image: Option<PciAtBiosImage> = None;
- let mut first_fwsec_image: Option<FwSecBiosBuilder> = None;
- let mut second_fwsec_image: Option<FwSecBiosBuilder> = None;
+ let mut fwsec_section: Option<KVVec<u8>> = None;
// Parse all VBIOS images in the ROM
for image_result in VbiosIterator::new(dev, bar0)? {
@@ -250,24 +314,22 @@ impl Vbios {
image.is_last()
);
+ // Once we have found the first FWSEC image, grab all data after that as the FWSEC
+ // section. This is indexed as one logical block to build the final FWSEC image.
+ if let Some(data) = fwsec_section.as_mut() {
+ data.extend_from_slice(&image.data, GFP_KERNEL)?;
+ continue;
+ }
+
// Convert to a specific image type
match BiosImageType::try_from(image.pcir.code_type) {
Ok(BiosImageType::PciAt) => {
- pci_at_image = Some(PciAtBiosImage::try_from(image)?);
- }
- Ok(BiosImageType::FwSec) => {
- let fwsec = FwSecBiosBuilder {
- base: image,
- falcon_data_offset: None,
- pmu_lookup_table: None,
- falcon_ucode_offset: None,
- };
- if first_fwsec_image.is_none() {
- first_fwsec_image = Some(fwsec);
- } else {
- second_fwsec_image = Some(fwsec);
+ // Silently ignore any extra PCI-AT images.
+ if pci_at_image.is_none() {
+ pci_at_image = Some(PciAtBiosImage::try_from(image)?);
}
}
+ Ok(BiosImageType::FwSec) => fwsec_section = Some(image.data),
_ => {
// Ignore other image types or unknown types
}
@@ -275,22 +337,18 @@ impl Vbios {
}
// Using all the images, setup the falcon data pointer in Fwsec.
- if let (Some(mut second), Some(first), Some(pci_at)) =
- (second_fwsec_image, first_fwsec_image, pci_at_image)
- {
- second
- .setup_falcon_data(&pci_at, &first)
- .inspect_err(|e| dev_err!(dev, "Falcon data setup failed: {:?}\n", e))?;
- Ok(Vbios {
- fwsec_image: second.build()?,
- })
- } else {
+ let (Some(pci_at), Some(fwsec_section)) = (pci_at_image, fwsec_section) else {
dev_err!(
dev,
"Missing required images for falcon data setup, skipping\n"
);
- Err(EINVAL)
- }
+ return Err(EINVAL);
+ };
+
+ let fwsec_image = FwSecBiosImage::new(dev, pci_at, fwsec_section)
+ .inspect_err(|e| dev_err!(dev, "Falcon data setup failed: {:?}\n", e))?;
+
+ Ok(Vbios { fwsec_image })
}
pub(crate) fn fwsec_image(&self) -> &FwSecBiosImage {
@@ -332,6 +390,9 @@ struct PcirStruct {
unsafe impl FromBytes for PcirStruct {}
impl PcirStruct {
+ /// The bit in `last_image` that indicates the last image.
+ const LAST_IMAGE_BIT_MASK: u8 = 0x80;
+
fn new(dev: &device::Device, data: &[u8]) -> Result<Self> {
let (pcir, _) = PcirStruct::from_bytes_copy_prefix(data).ok_or(EINVAL)?;
@@ -355,7 +416,7 @@ impl PcirStruct {
/// Check if this is the last image in the ROM.
fn is_last(&self) -> bool {
- self.last_image & LAST_IMAGE_BIT_MASK != 0
+ self.last_image & Self::LAST_IMAGE_BIT_MASK != 0
}
/// Calculate image size in bytes from 512-byte blocks.
@@ -406,7 +467,7 @@ impl BitHeader {
/// BIT Token Entry: Records in the BIT table followed by the BIT header.
#[derive(Debug, Clone, Copy)]
-#[expect(dead_code)]
+#[repr(C)]
struct BitToken {
/// 00h: Token identifier
id: u8,
@@ -418,39 +479,38 @@ struct BitToken {
data_offset: u16,
}
-// Define the token ID for the Falcon data
-const BIT_TOKEN_ID_FALCON_DATA: u8 = 0x70;
+// SAFETY: all bit patterns are valid for `BitToken`.
+unsafe impl FromBytes for BitToken {}
impl BitToken {
+ /// BIT token ID for Falcon data.
+ const ID_FALCON_DATA: u8 = 0x70;
+
/// Find a BIT token entry by BIT ID in a PciAtBiosImage
fn from_id(image: &PciAtBiosImage, token_id: u8) -> Result<Self> {
let header = &image.bit_header;
+ let entry_size = usize::from(header.token_size);
// Offset to the first token entry
let tokens_start = image.bit_offset + usize::from(header.header_size);
for i in 0..usize::from(header.token_entries) {
- let entry_offset = tokens_start + (i * usize::from(header.token_size));
-
- // Make sure we don't go out of bounds
- if entry_offset + usize::from(header.token_size) > image.base.data.len() {
- return Err(EINVAL);
- }
+ let entry_offset = i
+ .checked_mul(entry_size)
+ .and_then(|offset| tokens_start.checked_add(offset))
+ .ok_or(EINVAL)?;
+ let entry = image
+ .base
+ .data
+ .get(entry_offset..)
+ .and_then(|data| data.get(..entry_size))
+ .ok_or(EINVAL)?;
+
+ let (token, _) = BitToken::from_bytes_copy_prefix(entry).ok_or(EINVAL)?;
// Check if this token has the requested ID
- if image.base.data[entry_offset] == token_id {
- return Ok(BitToken {
- id: image.base.data[entry_offset],
- data_version: image.base.data[entry_offset + 1],
- data_size: u16::from_le_bytes([
- image.base.data[entry_offset + 2],
- image.base.data[entry_offset + 3],
- ]),
- data_offset: u16::from_le_bytes([
- image.base.data[entry_offset + 4],
- image.base.data[entry_offset + 5],
- ]),
- });
+ if token.id == token_id {
+ return Ok(token);
}
}
@@ -461,67 +521,38 @@ impl BitToken {
/// PCI ROM Expansion Header as defined in PCI Firmware Specification.
///
-/// This is header is at the beginning of every image in the set of images in the ROM. It contains
-/// a pointer to the PCI Data Structure which describes the image. For "NBSI" images (NoteBook
-/// System Information), the ROM header deviates from the standard and contains an offset to the
-/// NBSI image however we do not yet parse that in this module and keep it for future reference.
+/// This header is at the beginning of every image in the set of images in the ROM. It contains a
+/// pointer to the PCI Data Structure which describes the image.
#[derive(Debug, Clone, Copy)]
-#[expect(dead_code)]
+#[repr(C)]
struct PciRomHeader {
/// 00h: Signature (0xAA55)
signature: u16,
- /// 02h: Reserved bytes for processor architecture unique data (20 bytes)
- reserved: [u8; 20],
- /// 16h: NBSI Data Offset (NBSI-specific, offset from header to NBSI image)
- nbsi_data_offset: Option<u16>,
+ /// 02h: Reserved bytes for processor architecture unique data (22 bytes)
+ reserved: [u8; 22],
/// 18h: Pointer to PCI Data Structure (offset from start of ROM image)
pci_data_struct_offset: u16,
- /// 1Ah: Size of block (this is NBSI-specific)
- size_of_block: Option<u32>,
}
+// SAFETY: all bit patterns are valid for `PciRomHeader`.
+unsafe impl FromBytes for PciRomHeader {}
+
impl PciRomHeader {
fn new(dev: &device::Device, data: &[u8]) -> Result<Self> {
- if data.len() < 26 {
- // Need at least 26 bytes to read pciDataStrucPtr and sizeOfBlock.
- return Err(EINVAL);
- }
-
- let signature = u16::from_le_bytes([data[0], data[1]]);
+ let (rom_header, _) = PciRomHeader::from_bytes_copy_prefix(data)
+ .ok_or(EINVAL)
+ .inspect_err(|_| dev_err!(dev, "Not enough data for ROM header\n"))?;
// Check for valid ROM signatures.
- match signature {
- 0xAA55 | 0xBB77 | 0x4E56 => {}
+ match rom_header.signature {
+ 0xAA55 | 0x4E56 => {}
_ => {
- dev_err!(dev, "ROM signature unknown {:#x}\n", signature);
+ dev_err!(dev, "ROM signature unknown {:#x}\n", rom_header.signature);
return Err(EINVAL);
}
}
- // Read the pointer to the PCI Data Structure at offset 0x18.
- let pci_data_struct_ptr = u16::from_le_bytes([data[24], data[25]]);
-
- // Try to read optional fields if enough data.
- let mut size_of_block = None;
- let mut nbsi_data_offset = None;
-
- if data.len() >= 30 {
- // Read size_of_block at offset 0x1A.
- size_of_block = Some(u32::from_le_bytes([data[26], data[27], data[28], data[29]]));
- }
-
- // For NBSI images, try to read the nbsiDataOffset at offset 0x16.
- if data.len() >= 24 {
- nbsi_data_offset = Some(u16::from_le_bytes([data[22], data[23]]));
- }
-
- Ok(PciRomHeader {
- signature,
- reserved: [0u8; 20],
- pci_data_struct_offset: pci_data_struct_ptr,
- size_of_block,
- nbsi_data_offset,
- })
+ Ok(rom_header)
}
}
@@ -550,6 +581,9 @@ struct NpdeStruct {
unsafe impl FromBytes for NpdeStruct {}
impl NpdeStruct {
+ /// The bit in `last_image` that indicates the last image.
+ const LAST_IMAGE_BIT_MASK: u8 = 0x80;
+
fn new(dev: &device::Device, data: &[u8]) -> Option<Self> {
let (npde, _) = NpdeStruct::from_bytes_copy_prefix(data)?;
@@ -573,7 +607,7 @@ impl NpdeStruct {
/// Check if this is the last image in the ROM.
fn is_last(&self) -> bool {
- self.last_image & LAST_IMAGE_BIT_MASK != 0
+ self.last_image & Self::LAST_IMAGE_BIT_MASK != 0
}
/// Calculate image size in bytes from 512-byte blocks.
@@ -613,39 +647,14 @@ struct PciAtBiosImage {
bit_offset: usize,
}
-#[expect(dead_code)]
-struct EfiBiosImage {
- base: BiosImage,
- // EFI-specific fields can be added here in the future.
-}
-
-#[expect(dead_code)]
-struct NbsiBiosImage {
- base: BiosImage,
- // NBSI-specific fields can be added here in the future.
-}
-
-struct FwSecBiosBuilder {
- base: BiosImage,
- /// These are temporary fields that are used during the construction of the
- /// [`FwSecBiosBuilder`].
- ///
- /// Once FwSecBiosBuilder is constructed, the `falcon_ucode_offset` will be copied into a new
- /// [`FwSecBiosImage`].
- ///
- /// The offset of the Falcon data from the start of Fwsec image.
- falcon_data_offset: Option<usize>,
- /// The [`PmuLookupTable`] starts at the offset of the falcon data pointer.
- pmu_lookup_table: Option<PmuLookupTable>,
- /// The offset of the Falcon ucode.
- falcon_ucode_offset: Option<usize>,
-}
-
/// The [`FwSecBiosImage`] structure contains the PMU table and the Falcon Ucode.
///
/// The PMU table contains voltage/frequency tables as well as a pointer to the Falcon Ucode.
pub(crate) struct FwSecBiosImage {
- base: BiosImage,
+ /// Used for logging.
+ dev: ARef<device::Device>,
+ /// FWSEC data.
+ data: KVVec<u8>,
/// The offset of the Falcon ucode.
falcon_ucode_offset: usize,
}
@@ -653,18 +662,13 @@ pub(crate) struct FwSecBiosImage {
/// BIOS Image structure containing various headers and reference fields to all BIOS images.
///
/// A BiosImage struct is embedded into all image types and implements common operations.
-#[expect(dead_code)]
struct BiosImage {
- /// Used for logging.
- dev: ARef<device::Device>,
- /// PCI ROM Expansion Header
- rom_header: PciRomHeader,
/// PCI Data Structure
pcir: PcirStruct,
/// NVIDIA PCI Data Extension (optional)
npde: Option<NpdeStruct>,
/// Image data (includes ROM header and PCIR)
- data: KVec<u8>,
+ data: KVVec<u8>,
}
impl BiosImage {
@@ -702,15 +706,8 @@ impl BiosImage {
/// Creates a new BiosImage from raw byte data.
fn new(dev: &device::Device, data: &[u8]) -> Result<Self> {
- // Ensure we have enough data for the ROM header.
- if data.len() < 26 {
- dev_err!(dev, "Not enough data for ROM header\n");
- return Err(EINVAL);
- }
-
// Parse the ROM header.
- let rom_header = PciRomHeader::new(dev, &data[0..26])
- .inspect_err(|e| dev_err!(dev, "Failed to create PciRomHeader: {:?}\n", e))?;
+ let rom_header = PciRomHeader::new(dev, data)?;
// Get the PCI Data Structure using the pointer from the ROM header.
let pcir_offset = usize::from(rom_header.pci_data_struct_offset);
@@ -737,12 +734,10 @@ impl BiosImage {
let npde = NpdeStruct::find_in_data(dev, data, &rom_header, &pcir);
// Create a copy of the data.
- let mut data_copy = KVec::new();
+ let mut data_copy = KVVec::new();
data_copy.extend_from_slice(data, GFP_KERNEL)?;
Ok(BiosImage {
- dev: dev.into(),
- rom_header,
pcir,
npde,
data: data_copy,
@@ -773,33 +768,29 @@ impl PciAtBiosImage {
BitToken::from_id(self, token_id)
}
- /// Find the Falcon data pointer structure in the [`PciAtBiosImage`].
+ /// Find the Falcon data offset from the start of the FWSEC region.
///
- /// This is just a 4 byte structure that contains a pointer to the Falcon data in the FWSEC
- /// image.
- fn falcon_data_ptr(&self) -> Result<u32> {
- let token = self.get_bit_token(BIT_TOKEN_ID_FALCON_DATA)?;
-
- // Make sure we don't go out of bounds
- if usize::from(token.data_offset) + 4 > self.base.data.len() {
- return Err(EINVAL);
- }
-
- // read the 4 bytes at the offset specified in the token
+ /// The BIT table contains a 4-byte pointer to the Falcon data. Testing shows this pointer
+ /// treats the PCI-AT and FWSEC images as logically contiguous even when an EFI image sits in
+ /// between them, so subtract the PCI-AT image size here to convert it to a FWSEC-relative
+ /// offset.
+ fn falcon_data_offset(&self, dev: &device::Device) -> Result<usize> {
+ let token = self.get_bit_token(BitToken::ID_FALCON_DATA)?;
let offset = usize::from(token.data_offset);
- let bytes: [u8; 4] = self.base.data[offset..offset + 4].try_into().map_err(|_| {
- dev_err!(self.base.dev, "Failed to convert data slice to array\n");
- EINVAL
- })?;
-
- let data_ptr = u32::from_le_bytes(bytes);
- if (usize::from_safe_cast(data_ptr)) < self.base.data.len() {
- dev_err!(self.base.dev, "Falcon data pointer out of bounds\n");
- return Err(EINVAL);
- }
+ // Read the 4-byte falcon data pointer at the offset specified in the token.
+ let data = &self.base.data;
+ let (ptr, _) = data
+ .get(offset..)
+ .and_then(u32::from_bytes_copy_prefix)
+ .ok_or(EINVAL)?;
- Ok(data_ptr)
+ usize::from_safe_cast(ptr)
+ .checked_sub(data.len())
+ .ok_or(EINVAL)
+ .inspect_err(|_| {
+ dev_err!(dev, "Falcon data pointer out of bounds\n");
+ })
}
}
@@ -828,18 +819,18 @@ struct PmuLookupTableEntry {
data: u32,
}
-impl PmuLookupTableEntry {
- fn new(data: &[u8]) -> Result<Self> {
- if data.len() < core::mem::size_of::<Self>() {
- return Err(EINVAL);
- }
+// SAFETY: all bit patterns are valid for `PmuLookupTableEntry`.
+unsafe impl FromBytes for PmuLookupTableEntry {}
- Ok(PmuLookupTableEntry {
- application_id: data[0],
- target_id: data[1],
- data: u32::from_le_bytes(data[2..6].try_into().map_err(|_| EINVAL)?),
- })
- }
+impl PmuLookupTableEntry {
+ /// PMU lookup table application ID for firmware security license ucode.
+ #[expect(dead_code)]
+ const APPID_FIRMWARE_SEC_LIC: u8 = 0x05;
+ /// PMU lookup table application ID for debug FWSEC ucode.
+ #[expect(dead_code)]
+ const APPID_FWSEC_DBG: u8 = 0x45;
+ /// PMU lookup table application ID for production FWSEC ucode.
+ const APPID_FWSEC_PROD: u8 = 0x85;
}
#[repr(C)]
@@ -859,8 +850,7 @@ unsafe impl FromBytes for PmuLookupTableHeader {}
/// The table of entries is pointed to by the falcon data pointer in the BIT table, and is used to
/// locate the Falcon Ucode.
struct PmuLookupTable {
- header: PmuLookupTableHeader,
- table_data: KVec<u8>,
+ entries: KVVec<PmuLookupTableEntry>,
}
impl PmuLookupTable {
@@ -871,144 +861,70 @@ impl PmuLookupTable {
let entry_len = usize::from(header.entry_len);
let entry_count = usize::from(header.entry_count);
- let required_bytes = header_len + (entry_count * entry_len);
-
- if data.len() < required_bytes {
- dev_err!(dev, "PmuLookupTable data length less than required\n");
- return Err(EINVAL);
- }
-
- // Create a copy of only the table data
- let table_data = {
- let mut ret = KVec::new();
- ret.extend_from_slice(&data[header_len..required_bytes], GFP_KERNEL)?;
- ret
- };
-
- Ok(PmuLookupTable { header, table_data })
- }
+ let data = data
+ .get(header_len..header_len + entry_count * entry_len)
+ .ok_or(EINVAL)
+ .inspect_err(|_| {
+ dev_err!(dev, "PmuLookupTable data length less than required\n");
+ })?;
- fn lookup_index(&self, idx: u8) -> Result<PmuLookupTableEntry> {
- if idx >= self.header.entry_count {
- return Err(EINVAL);
+ let mut entries = KVVec::with_capacity(entry_count, GFP_KERNEL)?;
+ for i in 0..entry_count {
+ let (entry, _) = PmuLookupTableEntry::from_bytes_copy_prefix(&data[i * entry_len..])
+ .ok_or(EINVAL)?;
+ entries.push(entry, GFP_KERNEL)?;
}
- let index = (usize::from(idx)) * usize::from(self.header.entry_len);
- PmuLookupTableEntry::new(&self.table_data[index..])
+ Ok(PmuLookupTable { entries })
}
// find entry by type value
- fn find_entry_by_type(&self, entry_type: u8) -> Result<PmuLookupTableEntry> {
- for i in 0..self.header.entry_count {
- let entry = self.lookup_index(i)?;
- if entry.application_id == entry_type {
- return Ok(entry);
- }
- }
-
- Err(EINVAL)
+ fn find_entry_by_type(&self, entry_type: u8) -> Result<&PmuLookupTableEntry> {
+ self.entries
+ .iter()
+ .find(|entry| entry.application_id == entry_type)
+ .ok_or(EINVAL)
}
}
-impl FwSecBiosBuilder {
- fn setup_falcon_data(
- &mut self,
- pci_at_image: &PciAtBiosImage,
- first_fwsec: &FwSecBiosBuilder,
- ) -> Result {
- let mut offset = usize::from_safe_cast(pci_at_image.falcon_data_ptr()?);
- let mut pmu_in_first_fwsec = false;
-
- // The falcon data pointer assumes that the PciAt and FWSEC images
- // are contiguous in memory. However, testing shows the EFI image sits in
- // between them. So calculate the offset from the end of the PciAt image
- // rather than the start of it. Compensate.
- offset -= pci_at_image.base.data.len();
-
- // The offset is now from the start of the first Fwsec image, however
- // the offset points to a location in the second Fwsec image. Since
- // the fwsec images are contiguous, subtract the length of the first Fwsec
- // image from the offset to get the offset to the start of the second
- // Fwsec image.
- if offset < first_fwsec.base.data.len() {
- pmu_in_first_fwsec = true;
- } else {
- offset -= first_fwsec.base.data.len();
- }
-
- self.falcon_data_offset = Some(offset);
-
- if pmu_in_first_fwsec {
- self.pmu_lookup_table = Some(PmuLookupTable::new(
- &self.base.dev,
- &first_fwsec.base.data[offset..],
- )?);
- } else {
- self.pmu_lookup_table = Some(PmuLookupTable::new(
- &self.base.dev,
- &self.base.data[offset..],
- )?);
- }
-
- match self
- .pmu_lookup_table
- .as_ref()
- .ok_or(EINVAL)?
- .find_entry_by_type(FALCON_UCODE_ENTRY_APPID_FWSEC_PROD)
- {
- Ok(entry) => {
- let mut ucode_offset = usize::from_safe_cast(entry.data);
- ucode_offset -= pci_at_image.base.data.len();
- if ucode_offset < first_fwsec.base.data.len() {
- dev_err!(self.base.dev, "Falcon Ucode offset not in second Fwsec.\n");
- return Err(EINVAL);
- }
- ucode_offset -= first_fwsec.base.data.len();
- self.falcon_ucode_offset = Some(ucode_offset);
- }
- Err(e) => {
- dev_err!(
- self.base.dev,
- "PmuLookupTableEntry not found, error: {:?}\n",
- e
- );
- return Err(EINVAL);
- }
- }
- Ok(())
- }
-
- /// Build the final FwSecBiosImage from this builder
- fn build(self) -> Result<FwSecBiosImage> {
- let ret = FwSecBiosImage {
- base: self.base,
- falcon_ucode_offset: self.falcon_ucode_offset.ok_or(EINVAL)?,
- };
+impl FwSecBiosImage {
+ /// Build the final `FwSecBiosImage` from the PCI-AT and FWSEC BIOS images.
+ fn new(
+ dev: &device::Device,
+ pci_at_image: PciAtBiosImage,
+ data: KVVec<u8>,
+ ) -> Result<FwSecBiosImage> {
+ let offset = pci_at_image.falcon_data_offset(dev)?;
+
+ let pmu_lookup_data = data.get(offset..).ok_or(EINVAL)?;
+ let pmu_lookup_table = PmuLookupTable::new(dev, pmu_lookup_data)?;
+
+ let entry = pmu_lookup_table
+ .find_entry_by_type(PmuLookupTableEntry::APPID_FWSEC_PROD)
+ .inspect_err(|e| {
+ dev_err!(dev, "PmuLookupTableEntry not found, error: {:?}\n", e);
+ })?;
- if cfg!(debug_assertions) {
- // Print the desc header for debugging
- let desc = ret.header()?;
- dev_dbg!(ret.base.dev, "PmuLookupTableEntry desc: {:#?}\n", desc);
- }
+ let falcon_ucode_offset = usize::from_safe_cast(entry.data)
+ .checked_sub(pci_at_image.base.data.len())
+ .ok_or(EINVAL)
+ .inspect_err(|_| {
+ dev_err!(dev, "Falcon Ucode offset not in Fwsec.\n");
+ })?;
- Ok(ret)
+ Ok(FwSecBiosImage {
+ dev: dev.into(),
+ data,
+ falcon_ucode_offset,
+ })
}
-}
-impl FwSecBiosImage {
/// Get the FwSec header ([`FalconUCodeDesc`]).
pub(crate) fn header(&self) -> Result<FalconUCodeDesc> {
- // Get the falcon ucode offset that was found in setup_falcon_data.
- let falcon_ucode_offset = self.falcon_ucode_offset;
+ let data = self.data.get(self.falcon_ucode_offset..).ok_or(EINVAL)?;
- // Read the first 4 bytes to get the version.
- let hdr_bytes: [u8; 4] = self.base.data[falcon_ucode_offset..falcon_ucode_offset + 4]
- .try_into()
- .map_err(|_| EINVAL)?;
- let hdr = u32::from_le_bytes(hdr_bytes);
- let ver = (hdr & 0xff00) >> 8;
-
- let data = self.base.data.get(falcon_ucode_offset..).ok_or(EINVAL)?;
+ // Read the version byte from the header.
+ let ver = data.get(1).copied().ok_or(EINVAL)?;
match ver {
2 => {
let v2 = FalconUCodeDescV2::from_bytes_copy_prefix(data)
@@ -1023,7 +939,7 @@ impl FwSecBiosImage {
Ok(FalconUCodeDesc::V3(v3))
}
_ => {
- dev_err!(self.base.dev, "invalid fwsec firmware version: {:?}\n", ver);
+ dev_err!(self.dev, "invalid fwsec firmware version: {:?}\n", ver);
Err(EINVAL)
}
}
@@ -1031,20 +947,21 @@ impl FwSecBiosImage {
/// Get the ucode data as a byte slice
pub(crate) fn ucode(&self, desc: &FalconUCodeDesc) -> Result<&[u8]> {
- let falcon_ucode_offset = self.falcon_ucode_offset;
+ let size = usize::from_safe_cast(
+ desc.imem_load_size()
+ .checked_add(desc.dmem_load_size())
+ .ok_or(ERANGE)?,
+ );
// The ucode data follows the descriptor.
- let ucode_data_offset = falcon_ucode_offset + desc.size();
- let size = usize::from_safe_cast(desc.imem_load_size() + desc.dmem_load_size());
-
- // Get the data slice, checking bounds in a single operation.
- self.base
- .data
- .get(ucode_data_offset..ucode_data_offset + size)
+ self.data
+ .get(self.falcon_ucode_offset..)
+ .and_then(|data| data.get(desc.size()..))
+ .and_then(|data| data.get(..size))
.ok_or(ERANGE)
.inspect_err(|_| {
dev_err!(
- self.base.dev,
+ self.dev,
"fwsec ucode data not contained within BIOS bounds\n"
)
})
@@ -1062,9 +979,9 @@ impl FwSecBiosImage {
let sigs_size = sigs_count * core::mem::size_of::<Bcrt30Rsa3kSignature>();
// Make sure the data is within bounds.
- if sigs_data_offset + sigs_size > self.base.data.len() {
+ if sigs_data_offset + sigs_size > self.data.len() {
dev_err!(
- self.base.dev,
+ self.dev,
"fwsec signatures data not contained within BIOS bounds\n"
);
return Err(ERANGE);
@@ -1074,8 +991,7 @@ impl FwSecBiosImage {
// sizeof::<Bcrt30Rsa3kSignature>()` is within the bounds of `data`.
Ok(unsafe {
core::slice::from_raw_parts(
- self.base
- .data
+ self.data
.as_ptr()
.add(sigs_data_offset)
.cast::<Bcrt30Rsa3kSignature>(),
diff --git a/drivers/pwm/pwm_th1520.rs b/drivers/pwm/pwm_th1520.rs
index ddd44a5ce497..48808cd80737 100644
--- a/drivers/pwm/pwm_th1520.rs
+++ b/drivers/pwm/pwm_th1520.rs
@@ -92,7 +92,7 @@ struct Th1520WfHw {
#[pin_data(PinnedDrop)]
struct Th1520PwmDriverData {
#[pin]
- iomem: devres::Devres<IoMem<TH1520_PWM_REG_SIZE>>,
+ iomem: devres::Devres<IoMem<'static, TH1520_PWM_REG_SIZE>>,
clk: Clk,
}
@@ -316,12 +316,13 @@ kernel::of_device_table!(
impl platform::Driver for Th1520PwmPlatformDriver {
type IdInfo = ();
+ type Data<'bound> = Self;
const OF_ID_TABLE: Option<of::IdTable<Self::IdInfo>> = Some(&OF_TABLE);
- fn probe(
- pdev: &platform::Device<Core>,
- _id_info: Option<&Self::IdInfo>,
- ) -> impl PinInit<Self, Error> {
+ fn probe<'bound>(
+ pdev: &'bound platform::Device<Core<'_>>,
+ _id_info: Option<&'bound Self::IdInfo>,
+ ) -> impl PinInit<Self, Error> + 'bound {
let dev = pdev.as_ref();
let request = pdev.io_request_by_index(0).ok_or(ENODEV)?;
@@ -351,7 +352,7 @@ impl platform::Driver for Th1520PwmPlatformDriver {
dev,
TH1520_MAX_PWM_NUM,
try_pin_init!(Th1520PwmDriverData {
- iomem <- request.iomap_sized::<TH1520_PWM_REG_SIZE>(),
+ iomem <- request.iomap_sized::<TH1520_PWM_REG_SIZE>()?.into_devres(),
clk <- clk,
}),
)?;
diff --git a/include/drm/drm_gem_shmem_helper.h b/include/drm/drm_gem_shmem_helper.h
index 5ccdae21b94a..b2c23af628e1 100644
--- a/include/drm/drm_gem_shmem_helper.h
+++ b/include/drm/drm_gem_shmem_helper.h
@@ -111,6 +111,7 @@ int drm_gem_shmem_init(struct drm_device *dev, struct drm_gem_shmem_object *shme
struct drm_gem_shmem_object *drm_gem_shmem_create(struct drm_device *dev, size_t size);
void drm_gem_shmem_release(struct drm_gem_shmem_object *shmem);
void drm_gem_shmem_free(struct drm_gem_shmem_object *shmem);
+void __drm_gem_shmem_free_sgt_locked(struct drm_gem_shmem_object *shmem);
void drm_gem_shmem_put_pages_locked(struct drm_gem_shmem_object *shmem);
int drm_gem_shmem_pin(struct drm_gem_shmem_object *shmem);
diff --git a/include/linux/auxiliary_bus.h b/include/linux/auxiliary_bus.h
index bc09b55e3682..4e1ad8ccbcdd 100644
--- a/include/linux/auxiliary_bus.h
+++ b/include/linux/auxiliary_bus.h
@@ -62,6 +62,9 @@
* @sysfs.irqs: irqs xarray contains irq indices which are used by the device,
* @sysfs.lock: Synchronize irq sysfs creation,
* @sysfs.irq_dir_exists: whether "irqs" directory exists,
+ * @registration_data_rust: private data owned by the registering (parent)
+ * driver; valid for as long as the device is
+ * registered with the driver core,
*
* An auxiliary_device represents a part of its parent device's functionality.
* It is given a name that, combined with the registering drivers
@@ -148,6 +151,7 @@ struct auxiliary_device {
struct mutex lock; /* Synchronize irq sysfs creation */
bool irq_dir_exists;
} sysfs;
+ void *registration_data_rust;
};
/**
diff --git a/include/linux/device/driver.h b/include/linux/device/driver.h
index bbc67ec513ed..38e9a4679447 100644
--- a/include/linux/device/driver.h
+++ b/include/linux/device/driver.h
@@ -123,8 +123,8 @@ struct device_driver {
struct driver_private *p;
struct {
/*
- * Called after remove() and after all devres entries have been
- * processed. This is a Rust only callback.
+ * Called after remove() but before devres entries are released.
+ * This is a Rust only callback.
*/
void (*post_unbind_rust)(struct device *dev);
} p_cb;
diff --git a/rust/Makefile b/rust/Makefile
index b9e9f512cec3..9b23d76030ca 100644
--- a/rust/Makefile
+++ b/rust/Makefile
@@ -110,6 +110,7 @@ syn-cfgs := \
feature="parsing" \
feature="printing" \
feature="proc-macro" \
+ feature="visit" \
feature="visit-mut"
syn-flags := \
diff --git a/rust/bindings/bindings_helper.h b/rust/bindings/bindings_helper.h
index 446dbeaf0866..1124785e210b 100644
--- a/rust/bindings/bindings_helper.h
+++ b/rust/bindings/bindings_helper.h
@@ -35,6 +35,7 @@
#include <drm/drm_file.h>
#include <drm/drm_gem.h>
#include <drm/drm_gem_shmem_helper.h>
+#include <drm/drm_gpuvm.h>
#include <drm/drm_ioctl.h>
#include <kunit/test.h>
#include <linux/auxiliary_bus.h>
diff --git a/rust/helpers/drm_gpuvm.c b/rust/helpers/drm_gpuvm.c
new file mode 100644
index 000000000000..4130b6325213
--- /dev/null
+++ b/rust/helpers/drm_gpuvm.c
@@ -0,0 +1,26 @@
+// SPDX-License-Identifier: GPL-2.0 or MIT
+
+#ifdef CONFIG_RUST_DRM_GPUVM
+
+#include <drm/drm_gpuvm.h>
+
+__rust_helper
+struct drm_gpuvm_bo *rust_helper_drm_gpuvm_bo_get(struct drm_gpuvm_bo *vm_bo)
+{
+ return drm_gpuvm_bo_get(vm_bo);
+}
+
+__rust_helper
+struct drm_gpuvm *rust_helper_drm_gpuvm_get(struct drm_gpuvm *obj)
+{
+ return drm_gpuvm_get(obj);
+}
+
+__rust_helper
+bool rust_helper_drm_gpuvm_is_extobj(struct drm_gpuvm *gpuvm,
+ struct drm_gem_object *obj)
+{
+ return drm_gpuvm_is_extobj(gpuvm, obj);
+}
+
+#endif // CONFIG_RUST_DRM_GPUVM
diff --git a/rust/helpers/helpers.c b/rust/helpers/helpers.c
index 625921e27dfb..4488a87223b9 100644
--- a/rust/helpers/helpers.c
+++ b/rust/helpers/helpers.c
@@ -59,6 +59,7 @@
#include "dma.c"
#include "dma-resv.c"
#include "drm.c"
+#include "drm_gpuvm.c"
#include "err.c"
#include "irq.c"
#include "fs.c"
diff --git a/rust/kernel/alloc/kbox.rs b/rust/kernel/alloc/kbox.rs
index bd6da02c7ab8..2f8c16473c2c 100644
--- a/rust/kernel/alloc/kbox.rs
+++ b/rust/kernel/alloc/kbox.rs
@@ -19,6 +19,7 @@ use crate::ffi::c_void;
use crate::fmt;
use crate::init::InPlaceInit;
use crate::page::AsPageIter;
+use crate::prelude::*;
use crate::types::ForeignOwnable;
use pin_init::{InPlaceWrite, Init, PinInit, ZeroableOption};
@@ -256,6 +257,27 @@ where
Ok(Box(ptr.cast(), PhantomData))
}
+ /// Creates a new zero-initialized `Box<T, A>`.
+ ///
+ /// New memory is allocated with `A` and the [`__GFP_ZERO`] flag. The allocation may fail, in
+ /// which case an error is returned. For ZSTs no memory is allocated.
+ ///
+ /// # Examples
+ ///
+ /// ```
+ /// let b = KBox::<[u8; 128]>::zeroed(GFP_KERNEL)?;
+ /// assert_eq!(*b, [0; 128]);
+ /// # Ok::<(), Error>(())
+ /// ```
+ pub fn zeroed(flags: Flags) -> Result<Self, AllocError>
+ where
+ T: Zeroable,
+ {
+ // SAFETY: `__GFP_ZERO` guarantees the memory is zeroed; `T: Zeroable` guarantees that
+ // all-zeroes is a valid bit pattern for `T`.
+ Ok(unsafe { Self::new_uninit(flags | __GFP_ZERO)?.assume_init() })
+ }
+
/// Constructs a new `Pin<Box<T, A>>`. If `T` does not implement [`Unpin`], then `x` will be
/// pinned in memory and can't be moved.
#[inline]
@@ -455,7 +477,7 @@ where
// SAFETY: The pointer returned by `into_foreign` comes from a well aligned
// pointer to `T` allocated by `A`.
-unsafe impl<T: 'static, A> ForeignOwnable for Box<T, A>
+unsafe impl<T, A> ForeignOwnable for Box<T, A>
where
A: Allocator,
{
@@ -465,8 +487,14 @@ where
core::mem::align_of::<T>()
};
- type Borrowed<'a> = &'a T;
- type BorrowedMut<'a> = &'a mut T;
+ type Borrowed<'a>
+ = &'a T
+ where
+ Self: 'a;
+ type BorrowedMut<'a>
+ = &'a mut T
+ where
+ Self: 'a;
fn into_foreign(self) -> *mut c_void {
Box::into_raw(self).cast()
@@ -494,13 +522,19 @@ where
// SAFETY: The pointer returned by `into_foreign` comes from a well aligned
// pointer to `T` allocated by `A`.
-unsafe impl<T: 'static, A> ForeignOwnable for Pin<Box<T, A>>
+unsafe impl<T, A> ForeignOwnable for Pin<Box<T, A>>
where
A: Allocator,
{
const FOREIGN_ALIGN: usize = <Box<T, A> as ForeignOwnable>::FOREIGN_ALIGN;
- type Borrowed<'a> = Pin<&'a T>;
- type BorrowedMut<'a> = Pin<&'a mut T>;
+ type Borrowed<'a>
+ = Pin<&'a T>
+ where
+ Self: 'a;
+ type BorrowedMut<'a>
+ = Pin<&'a mut T>
+ where
+ Self: 'a;
fn into_foreign(self) -> *mut c_void {
// SAFETY: We are still treating the box as pinned.
diff --git a/rust/kernel/auxiliary.rs b/rust/kernel/auxiliary.rs
index 93c0db1f6655..c42928d5a239 100644
--- a/rust/kernel/auxiliary.rs
+++ b/rust/kernel/auxiliary.rs
@@ -12,19 +12,25 @@ use crate::{
RawDeviceId,
RawDeviceIdIndex, //
},
- devres::Devres,
+
driver,
error::{
from_result,
to_result, //
},
prelude::*,
- types::Opaque,
+ types::{
+ ForLt,
+ ForeignOwnable,
+ Opaque, //
+ },
ThisModule, //
};
use core::{
+ any::TypeId,
marker::PhantomData,
mem::offset_of,
+ pin::Pin,
ptr::{
addr_of_mut,
NonNull, //
@@ -36,18 +42,18 @@ pub struct Adapter<T: Driver>(T);
// SAFETY:
// - `bindings::auxiliary_driver` is a C type declared as `repr(C)`.
-// - `T` is the type of the driver's device private data.
+// - `T::Data` is the type of the driver's device private data.
// - `struct auxiliary_driver` embeds a `struct device_driver`.
// - `DEVICE_DRIVER_OFFSET` is the correct byte offset to the embedded `struct device_driver`.
-unsafe impl<T: Driver + 'static> driver::DriverLayout for Adapter<T> {
+unsafe impl<T: Driver> driver::DriverLayout for Adapter<T> {
type DriverType = bindings::auxiliary_driver;
- type DriverData = T;
+ type DriverData<'bound> = T::Data<'bound>;
const DEVICE_DRIVER_OFFSET: usize = core::mem::offset_of!(Self::DriverType, driver);
}
// SAFETY: A call to `unregister` for a given instance of `DriverType` is guaranteed to be valid if
// a preceding call to `register` has been successful.
-unsafe impl<T: Driver + 'static> driver::RegistrationOps for Adapter<T> {
+unsafe impl<T: Driver> driver::RegistrationOps for Adapter<T> {
unsafe fn register(
adrv: &Opaque<Self::DriverType>,
name: &'static CStr,
@@ -73,7 +79,7 @@ unsafe impl<T: Driver + 'static> driver::RegistrationOps for Adapter<T> {
}
}
-impl<T: Driver + 'static> Adapter<T> {
+impl<T: Driver> Adapter<T> {
extern "C" fn probe_callback(
adev: *mut bindings::auxiliary_device,
id: *const bindings::auxiliary_device_id,
@@ -82,7 +88,7 @@ impl<T: Driver + 'static> Adapter<T> {
// `struct auxiliary_device`.
//
// INVARIANT: `adev` is valid for the duration of `probe_callback()`.
- let adev = unsafe { &*adev.cast::<Device<device::CoreInternal>>() };
+ let adev = unsafe { &*adev.cast::<Device<device::CoreInternal<'_>>>() };
// SAFETY: `DeviceId` is a `#[repr(transparent)`] wrapper of `struct auxiliary_device_id`
// and does not add additional invariants, so it's safe to transmute.
@@ -102,12 +108,12 @@ impl<T: Driver + 'static> Adapter<T> {
// `struct auxiliary_device`.
//
// INVARIANT: `adev` is valid for the duration of `remove_callback()`.
- let adev = unsafe { &*adev.cast::<Device<device::CoreInternal>>() };
+ let adev = unsafe { &*adev.cast::<Device<device::CoreInternal<'_>>>() };
// SAFETY: `remove_callback` is only ever called after a successful call to
// `probe_callback`, hence it's guaranteed that `Device::set_drvdata()` has been called
- // and stored a `Pin<KBox<T>>`.
- let data = unsafe { adev.as_ref().drvdata_borrow::<T>() };
+ // and stored a `Pin<KBox<T::Data<'_>>>`.
+ let data = unsafe { adev.as_ref().drvdata_borrow::<T::Data<'_>>() };
T::unbind(adev, data);
}
@@ -197,13 +203,19 @@ pub trait Driver {
/// type IdInfo: 'static = ();
type IdInfo: 'static;
+ /// The type of the driver's bus device private data.
+ type Data<'bound>: Send + 'bound;
+
/// The table of device ids supported by the driver.
const ID_TABLE: IdTable<Self::IdInfo>;
/// Auxiliary driver probe.
///
/// Called when an auxiliary device is matches a corresponding driver.
- fn probe(dev: &Device<device::Core>, id_info: &Self::IdInfo) -> impl PinInit<Self, Error>;
+ fn probe<'bound>(
+ dev: &'bound Device<device::Core<'_>>,
+ id_info: &'bound Self::IdInfo,
+ ) -> impl PinInit<Self::Data<'bound>, Error> + 'bound;
/// Auxiliary driver unbind.
///
@@ -214,8 +226,8 @@ pub trait Driver {
/// `&Device<Core>` or `&Device<Bound>` reference. For instance, drivers may try to perform I/O
/// operations to gracefully tear down the device.
///
- /// Otherwise, release operations for driver resources should be performed in `Self::drop`.
- fn unbind(dev: &Device<device::Core>, this: Pin<&Self>) {
+ /// Otherwise, release operations for driver resources should be performed in `Drop`.
+ fn unbind<'bound>(dev: &'bound Device<device::Core<'_>>, this: Pin<&Self::Data<'bound>>) {
let _ = (dev, this);
}
}
@@ -257,6 +269,49 @@ impl Device<device::Bound> {
// SAFETY: A bound auxiliary device always has a bound parent device.
unsafe { parent.as_bound() }
}
+
+ /// Returns a pinned reference to the registration data set by the registering (parent) driver.
+ ///
+ /// `F` is the [`ForLt`](trait@ForLt) encoding of the data type. The returned
+ /// reference has its lifetime shortened from `'static` to `&self`'s borrow lifetime via
+ /// [`ForLt::cast_ref`].
+ ///
+ /// Returns [`EINVAL`] if `F` does not match the type used by the parent driver when calling
+ /// [`Registration::new()`].
+ ///
+ /// Returns [`ENOENT`] if no registration data has been set, e.g. when the device was
+ /// registered by a C driver.
+ pub fn registration_data<F: ForLt + 'static>(&self) -> Result<Pin<&F::Of<'_>>> {
+ // SAFETY: By the type invariant, `self.as_raw()` is a valid `struct auxiliary_device`.
+ let ptr = unsafe { (*self.as_raw()).registration_data_rust };
+ if ptr.is_null() {
+ dev_warn!(
+ self.as_ref(),
+ "No registration data set; parent is not a Rust driver.\n"
+ );
+ return Err(ENOENT);
+ }
+
+ // SAFETY: `ptr` is non-null and was set via `into_foreign()` in `Registration::new()`;
+ // `RegistrationData` is `#[repr(C)]` with `type_id` at offset 0, so reading a `TypeId`
+ // at the start of the allocation is valid regardless of `F`.
+ let type_id = unsafe { ptr.cast::<TypeId>().read() };
+ if type_id != TypeId::of::<F>() {
+ return Err(EINVAL);
+ }
+
+ // SAFETY: The `TypeId` check above confirms that the stored type matches
+ // `F::Of<'static>`; `ptr` remains valid until `Registration::drop()` calls
+ // `from_foreign()`.
+ let wrapper = unsafe { Pin::<KBox<RegistrationData<F::Of<'static>>>>::borrow(ptr) };
+
+ // SAFETY: `data` is a structurally pinned field of `RegistrationData`.
+ let pinned: Pin<&F::Of<'_>> = unsafe { wrapper.map_unchecked(|w| &w.data) };
+
+ // SAFETY: The data was pinned when stored; `cast_ref` only shortens
+ // the lifetime, so the pinning guarantee is preserved.
+ Ok(unsafe { Pin::new_unchecked(F::cast_ref(pinned.get_ref())) })
+ }
}
impl Device {
@@ -326,87 +381,173 @@ unsafe impl Send for Device {}
// (i.e. `Device<Normal>) are thread safe.
unsafe impl Sync for Device {}
+// SAFETY: Same as `Device<Normal>` -- the underlying `struct auxiliary_device` is the same;
+// `Bound` is a zero-sized type-state marker that does not affect thread safety.
+unsafe impl Sync for Device<device::Bound> {}
+
+/// Wrapper that stores a [`TypeId`] alongside the registration data for runtime type checking.
+#[repr(C)]
+#[pin_data]
+struct RegistrationData<T> {
+ type_id: TypeId,
+ #[pin]
+ data: T,
+}
+
/// The registration of an auxiliary device.
///
/// This type represents the registration of a [`struct auxiliary_device`]. When its parent device
/// is unbound, the corresponding auxiliary device will be unregistered from the system.
///
+/// The type parameter `F` is a [`ForLt`](trait@ForLt) encoding of the registration
+/// data type. For non-lifetime-parameterized types, use [`ForLt!(T)`](macro@ForLt).
+/// The data can be accessed by the auxiliary driver through [`Device::registration_data()`].
+///
/// # Invariants
///
-/// `self.0` always holds a valid pointer to an initialized and registered
-/// [`struct auxiliary_device`].
-pub struct Registration(NonNull<bindings::auxiliary_device>);
+/// `self.adev` always holds a valid pointer to an initialized and registered
+/// [`struct auxiliary_device`] whose `registration_data_rust` field points to a
+/// valid `Pin<KBox<RegistrationData<F::Of<'static>>>>`.
+pub struct Registration<'a, F: ForLt + 'static> {
+ adev: NonNull<bindings::auxiliary_device>,
+ _phantom: PhantomData<F::Of<'a>>,
+}
-impl Registration {
- /// Create and register a new auxiliary device.
- pub fn new<'a>(
+impl<'a, F: ForLt> Registration<'a, F>
+where
+ for<'b> F::Of<'b>: Send + Sync,
+{
+ /// Create and register a new auxiliary device with the given registration data.
+ ///
+ /// The `data` is owned by the registration and can be accessed through the auxiliary device
+ /// via [`Device::registration_data()`].
+ ///
+ /// # Safety
+ ///
+ /// The caller must not `mem::forget()` the returned [`Registration`] or otherwise prevent its
+ /// [`Drop`] implementation from running, since the registration data may contain borrowed
+ /// references that become invalid after `'a` ends.
+ ///
+ /// If the registration data is `'static`, use the safe [`Registration::new()`] instead.
+ pub unsafe fn new_with_lt<E>(
parent: &'a device::Device<device::Bound>,
- name: &'a CStr,
+ name: &CStr,
id: u32,
- modname: &'a CStr,
- ) -> impl PinInit<Devres<Self>, Error> + 'a {
- pin_init::pin_init_scope(move || {
- let boxed = KBox::new(Opaque::<bindings::auxiliary_device>::zeroed(), GFP_KERNEL)?;
- let adev = boxed.get();
-
- // SAFETY: It's safe to set the fields of `struct auxiliary_device` on initialization.
- unsafe {
- (*adev).dev.parent = parent.as_raw();
- (*adev).dev.release = Some(Device::release);
- (*adev).name = name.as_char_ptr();
- (*adev).id = id;
- }
-
- // SAFETY: `adev` is guaranteed to be a valid pointer to a `struct auxiliary_device`,
- // which has not been initialized yet.
- unsafe { bindings::auxiliary_device_init(adev) };
-
- // Now that `adev` is initialized, leak the `Box`; the corresponding memory will be
- // freed by `Device::release` when the last reference to the `struct auxiliary_device`
- // is dropped.
- let _ = KBox::into_raw(boxed);
-
- // SAFETY:
- // - `adev` is guaranteed to be a valid pointer to a `struct auxiliary_device`, which
- // has been initialized,
- // - `modname.as_char_ptr()` is a NULL terminated string.
- let ret = unsafe { bindings::__auxiliary_device_add(adev, modname.as_char_ptr()) };
- if ret != 0 {
- // SAFETY: `adev` is guaranteed to be a valid pointer to a
- // `struct auxiliary_device`, which has been initialized.
- unsafe { bindings::auxiliary_device_uninit(adev) };
-
- return Err(Error::from_errno(ret));
- }
-
- // INVARIANT: The device will remain registered until `auxiliary_device_delete()` is
- // called, which happens in `Self::drop()`.
- Ok(Devres::new(
- parent,
- // SAFETY: `adev` is guaranteed to be non-null, since the `KBox` was allocated
- // successfully.
- Self(unsafe { NonNull::new_unchecked(adev) }),
- ))
+ modname: &CStr,
+ data: impl PinInit<F::Of<'a>, E>,
+ ) -> Result<Self>
+ where
+ Error: From<E>,
+ {
+ let data = KBox::pin_init::<Error>(
+ try_pin_init!(RegistrationData {
+ type_id: TypeId::of::<F>(),
+ data <- data,
+ }),
+ GFP_KERNEL,
+ )?;
+
+ // SAFETY: `'a` is invariant (via `Registration`'s `PhantomData`). Lifetimes do not
+ // affect layout, so RegistrationData<F::Of<'a>> and RegistrationData<F::Of<'static>>
+ // have identical representation.
+ let data: Pin<KBox<RegistrationData<F::Of<'static>>>> =
+ unsafe { core::mem::transmute(data) };
+
+ let boxed: KBox<Opaque<bindings::auxiliary_device>> = KBox::zeroed(GFP_KERNEL)?;
+ let adev = boxed.get();
+
+ // SAFETY: It's safe to set the fields of `struct auxiliary_device` on initialization.
+ unsafe {
+ (*adev).dev.parent = parent.as_raw();
+ (*adev).dev.release = Some(Device::release);
+ (*adev).name = name.as_char_ptr();
+ (*adev).id = id;
+ (*adev).registration_data_rust = data.into_foreign();
+ }
+
+ // SAFETY: `adev` is guaranteed to be a valid pointer to a `struct auxiliary_device`,
+ // which has not been initialized yet.
+ unsafe { bindings::auxiliary_device_init(adev) };
+
+ // Now that `adev` is initialized, leak the `Box`; the corresponding memory will be
+ // freed by `Device::release` when the last reference to the `struct auxiliary_device`
+ // is dropped.
+ let _ = KBox::into_raw(boxed);
+
+ // SAFETY:
+ // - `adev` is guaranteed to be a valid pointer to a `struct auxiliary_device`, which
+ // has been initialized,
+ // - `modname.as_char_ptr()` is a NULL terminated string.
+ let ret = unsafe { bindings::__auxiliary_device_add(adev, modname.as_char_ptr()) };
+ if ret != 0 {
+ // SAFETY: `registration_data` was set above via `into_foreign()`.
+ drop(unsafe {
+ Pin::<KBox<RegistrationData<F::Of<'static>>>>::from_foreign(
+ (*adev).registration_data_rust,
+ )
+ });
+
+ // SAFETY: `adev` is guaranteed to be a valid pointer to a
+ // `struct auxiliary_device`, which has been initialized.
+ unsafe { bindings::auxiliary_device_uninit(adev) };
+
+ return Err(Error::from_errno(ret));
+ }
+
+ // INVARIANT: The device will remain registered until `auxiliary_device_delete()` is
+ // called, which happens in `Self::drop()`.
+ Ok(Self {
+ // SAFETY: `adev` is guaranteed to be non-null, since the `KBox` was allocated
+ // successfully.
+ adev: unsafe { NonNull::new_unchecked(adev) },
+ _phantom: PhantomData,
})
}
+
+ /// Create and register a new auxiliary device with `'static` registration data.
+ ///
+ /// Safe variant of [`Registration::new_with_lt()`] for registration data that does not contain
+ /// borrowed references.
+ pub fn new<E>(
+ parent: &'a device::Device<device::Bound>,
+ name: &CStr,
+ id: u32,
+ modname: &CStr,
+ data: impl PinInit<F::Of<'a>, E>,
+ ) -> Result<Self>
+ where
+ F::Of<'a>: 'static,
+ Error: From<E>,
+ {
+ // SAFETY: `F::Of<'a>: 'static` guarantees the data contains no borrowed references,
+ // so forgetting the `Registration` cannot cause use-after-free.
+ unsafe { Self::new_with_lt(parent, name, id, modname, data) }
+ }
}
-impl Drop for Registration {
+impl<F: ForLt> Drop for Registration<'_, F> {
fn drop(&mut self) {
- // SAFETY: By the type invariant of `Self`, `self.0.as_ptr()` is a valid registered
+ // SAFETY: By the type invariant of `Self`, `self.adev.as_ptr()` is a valid registered
// `struct auxiliary_device`.
- unsafe { bindings::auxiliary_device_delete(self.0.as_ptr()) };
+ unsafe { bindings::auxiliary_device_delete(self.adev.as_ptr()) };
+
+ // SAFETY: `registration_data` was set in `new()` via `into_foreign()`.
+ drop(unsafe {
+ Pin::<KBox<RegistrationData<F::Of<'static>>>>::from_foreign(
+ (*self.adev.as_ptr()).registration_data_rust,
+ )
+ });
// This drops the reference we acquired through `auxiliary_device_init()`.
//
- // SAFETY: By the type invariant of `Self`, `self.0.as_ptr()` is a valid registered
+ // SAFETY: By the type invariant of `Self`, `self.adev.as_ptr()` is a valid registered
// `struct auxiliary_device`.
- unsafe { bindings::auxiliary_device_uninit(self.0.as_ptr()) };
+ unsafe { bindings::auxiliary_device_uninit(self.adev.as_ptr()) };
}
}
// SAFETY: A `Registration` of a `struct auxiliary_device` can be released from any thread.
-unsafe impl Send for Registration {}
+unsafe impl<F: ForLt> Send for Registration<'_, F> where for<'a> F::Of<'a>: Send {}
// SAFETY: `Registration` does not expose any methods or fields that need synchronization.
-unsafe impl Sync for Registration {}
+unsafe impl<F: ForLt> Sync for Registration<'_, F> where for<'a> F::Of<'a>: Send {}
diff --git a/rust/kernel/cpufreq.rs b/rust/kernel/cpufreq.rs
index d8d26870bea2..d94c6cdbc45a 100644
--- a/rust/kernel/cpufreq.rs
+++ b/rust/kernel/cpufreq.rs
@@ -888,12 +888,13 @@ pub trait Driver {
///
/// impl platform::Driver for SampleDriver {
/// type IdInfo = ();
+/// type Data<'bound> = Self;
/// const OF_ID_TABLE: Option<of::IdTable<Self::IdInfo>> = None;
///
-/// fn probe(
-/// pdev: &platform::Device<Core>,
-/// _id_info: Option<&Self::IdInfo>,
-/// ) -> impl PinInit<Self, Error> {
+/// fn probe<'bound>(
+/// pdev: &'bound platform::Device<Core<'_>>,
+/// _id_info: Option<&'bound Self::IdInfo>,
+/// ) -> impl PinInit<Self, Error> + 'bound {
/// cpufreq::Registration::<SampleDriver>::new_foreign_owned(pdev.as_ref())?;
/// Ok(Self {})
/// }
diff --git a/rust/kernel/device.rs b/rust/kernel/device.rs
index 6d5396a43ebe..645afc49a27d 100644
--- a/rust/kernel/device.rs
+++ b/rust/kernel/device.rs
@@ -15,16 +15,12 @@ use crate::{
}, //
};
use core::{
- any::TypeId,
marker::PhantomData,
ptr, //
};
pub mod property;
-// Assert that we can `read()` / `write()` a `TypeId` instance from / into `struct driver_type`.
-static_assert!(core::mem::size_of::<bindings::driver_type>() >= core::mem::size_of::<TypeId>());
-
/// The core representation of a device in the kernel's driver model.
///
/// This structure represents the Rust abstraction for a C `struct device`. A [`Device`] can either
@@ -205,30 +201,13 @@ impl Device {
}
}
-impl Device<CoreInternal> {
- fn set_type_id<T: 'static>(&self) {
- // SAFETY: By the type invariants, `self.as_raw()` is a valid pointer to a `struct device`.
- let private = unsafe { (*self.as_raw()).p };
-
- // SAFETY: For a bound device (implied by the `CoreInternal` device context), `private` is
- // guaranteed to be a valid pointer to a `struct device_private`.
- let driver_type = unsafe { &raw mut (*private).driver_type };
-
- // SAFETY: `driver_type` is valid for (unaligned) writes of a `TypeId`.
- unsafe {
- driver_type
- .cast::<TypeId>()
- .write_unaligned(TypeId::of::<T>())
- };
- }
-
+impl<'a> Device<CoreInternal<'a>> {
/// Store a pointer to the bound driver's private data.
- pub fn set_drvdata<T: 'static>(&self, data: impl PinInit<T, Error>) -> Result {
+ pub fn set_drvdata<T>(&self, data: impl PinInit<T, Error>) -> Result {
let data = KBox::pin_init(data, GFP_KERNEL)?;
// SAFETY: By the type invariants, `self.as_raw()` is a valid pointer to a `struct device`.
unsafe { bindings::dev_set_drvdata(self.as_raw(), data.into_foreign().cast()) };
- self.set_type_id::<T>();
Ok(())
}
@@ -239,7 +218,7 @@ impl Device<CoreInternal> {
///
/// - The type `T` must match the type of the `ForeignOwnable` previously stored by
/// [`Device::set_drvdata`].
- pub(crate) unsafe fn drvdata_obtain<T: 'static>(&self) -> Option<Pin<KBox<T>>> {
+ pub(crate) unsafe fn drvdata_obtain<T>(&self) -> Option<Pin<KBox<T>>> {
// SAFETY: By the type invariants, `self.as_raw()` is a valid pointer to a `struct device`.
let ptr = unsafe { bindings::dev_get_drvdata(self.as_raw()) };
@@ -265,7 +244,7 @@ impl Device<CoreInternal> {
/// device is fully unbound.
/// - The type `T` must match the type of the `ForeignOwnable` previously stored by
/// [`Device::set_drvdata`].
- pub unsafe fn drvdata_borrow<T: 'static>(&self) -> Pin<&T> {
+ pub unsafe fn drvdata_borrow<T>(&self) -> Pin<&T> {
// SAFETY: `drvdata_unchecked()` has the exact same safety requirements as the ones
// required by this method.
unsafe { self.drvdata_unchecked() }
@@ -281,7 +260,7 @@ impl Device<Bound> {
/// the device is fully unbound.
/// - The type `T` must match the type of the `ForeignOwnable` previously stored by
/// [`Device::set_drvdata`].
- unsafe fn drvdata_unchecked<T: 'static>(&self) -> Pin<&T> {
+ unsafe fn drvdata_unchecked<T>(&self) -> Pin<&T> {
// SAFETY: By the type invariants, `self.as_raw()` is a valid pointer to a `struct device`.
let ptr = unsafe { bindings::dev_get_drvdata(self.as_raw()) };
@@ -292,45 +271,6 @@ impl Device<Bound> {
// in `into_foreign()`.
unsafe { Pin::<KBox<T>>::borrow(ptr.cast()) }
}
-
- fn match_type_id<T: 'static>(&self) -> Result {
- // SAFETY: By the type invariants, `self.as_raw()` is a valid pointer to a `struct device`.
- let private = unsafe { (*self.as_raw()).p };
-
- // SAFETY: For a bound device, `private` is guaranteed to be a valid pointer to a
- // `struct device_private`.
- let driver_type = unsafe { &raw mut (*private).driver_type };
-
- // SAFETY:
- // - `driver_type` is valid for (unaligned) reads of a `TypeId`.
- // - A bound device guarantees that `driver_type` contains a valid `TypeId` value.
- let type_id = unsafe { driver_type.cast::<TypeId>().read_unaligned() };
-
- if type_id != TypeId::of::<T>() {
- return Err(EINVAL);
- }
-
- Ok(())
- }
-
- /// Access a driver's private data.
- ///
- /// Returns a pinned reference to the driver's private data or [`EINVAL`] if it doesn't match
- /// the asserted type `T`.
- pub fn drvdata<T: 'static>(&self) -> Result<Pin<&T>> {
- // SAFETY: By the type invariants, `self.as_raw()` is a valid pointer to a `struct device`.
- if unsafe { bindings::dev_get_drvdata(self.as_raw()) }.is_null() {
- return Err(ENOENT);
- }
-
- self.match_type_id::<T>()?;
-
- // SAFETY:
- // - The above check of `dev_get_drvdata()` guarantees that we are called after
- // `set_drvdata()`.
- // - We've just checked that the type of the driver's private data is in fact `T`.
- Ok(unsafe { self.drvdata_unchecked() })
- }
}
impl<Ctx: DeviceContext> Device<Ctx> {
@@ -527,6 +467,10 @@ unsafe impl Send for Device {}
// synchronization in `struct device`.
unsafe impl Sync for Device {}
+// SAFETY: Same as `Device<Normal>` -- the underlying `struct device` is the same; `Bound` is a
+// zero-sized type-state marker that does not affect thread safety.
+unsafe impl Sync for Device<Bound> {}
+
/// Marker trait for the context or scope of a bus specific device.
///
/// [`DeviceContext`] is a marker trait for types representing the context of a bus specific
@@ -567,7 +511,7 @@ pub struct Normal;
/// callback it appears in. It is intended to be used for synchronization purposes. Bus device
/// implementations can implement methods for [`Device<Core>`], such that they can only be called
/// from bus callbacks.
-pub struct Core;
+pub struct Core<'a>(PhantomData<&'a ()>);
/// Semantically the same as [`Core`], but reserved for internal usage of the corresponding bus
/// abstraction.
@@ -578,7 +522,7 @@ pub struct Core;
///
/// This context mainly exists to share generic [`Device`] infrastructure that should only be called
/// from bus callbacks with bus abstractions, but without making them accessible for drivers.
-pub struct CoreInternal;
+pub struct CoreInternal<'a>(PhantomData<&'a ()>);
/// The [`Bound`] context is the [`DeviceContext`] of a bus specific device when it is guaranteed to
/// be bound to a driver.
@@ -602,14 +546,14 @@ mod private {
pub trait Sealed {}
impl Sealed for super::Bound {}
- impl Sealed for super::Core {}
- impl Sealed for super::CoreInternal {}
+ impl<'a> Sealed for super::Core<'a> {}
+ impl<'a> Sealed for super::CoreInternal<'a> {}
impl Sealed for super::Normal {}
}
impl DeviceContext for Bound {}
-impl DeviceContext for Core {}
-impl DeviceContext for CoreInternal {}
+impl<'a> DeviceContext for Core<'a> {}
+impl<'a> DeviceContext for CoreInternal<'a> {}
impl DeviceContext for Normal {}
impl<Ctx: DeviceContext> AsRef<Device<Ctx>> for Device<Ctx> {
@@ -659,6 +603,22 @@ pub unsafe trait AsBusDevice<Ctx: DeviceContext>: AsRef<Device<Ctx>> {
#[doc(hidden)]
#[macro_export]
macro_rules! __impl_device_context_deref {
+ (unsafe { $device:ident, <$lt:lifetime> $src:ty => $dst:ty }) => {
+ impl<$lt> ::core::ops::Deref for $device<$src> {
+ type Target = $device<$dst>;
+
+ fn deref(&self) -> &Self::Target {
+ let ptr: *const Self = self;
+
+ // CAST: `$device<$src>` and `$device<$dst>` transparently wrap the same type by the
+ // safety requirement of the macro.
+ let ptr = ptr.cast::<Self::Target>();
+
+ // SAFETY: `ptr` was derived from `&self`.
+ unsafe { &*ptr }
+ }
+ }
+ };
(unsafe { $device:ident, $src:ty => $dst:ty }) => {
impl ::core::ops::Deref for $device<$src> {
type Target = $device<$dst>;
@@ -691,14 +651,14 @@ macro_rules! impl_device_context_deref {
// `__impl_device_context_deref!`.
::kernel::__impl_device_context_deref!(unsafe {
$device,
- $crate::device::CoreInternal => $crate::device::Core
+ <'a> $crate::device::CoreInternal<'a> => $crate::device::Core<'a>
});
// SAFETY: This macro has the exact same safety requirement as
// `__impl_device_context_deref!`.
::kernel::__impl_device_context_deref!(unsafe {
$device,
- $crate::device::Core => $crate::device::Bound
+ <'a> $crate::device::Core<'a> => $crate::device::Bound
});
// SAFETY: This macro has the exact same safety requirement as
@@ -713,6 +673,13 @@ macro_rules! impl_device_context_deref {
#[doc(hidden)]
#[macro_export]
macro_rules! __impl_device_context_into_aref {
+ (<$lt:lifetime> $src:ty, $device:tt) => {
+ impl<$lt> ::core::convert::From<&$device<$src>> for $crate::sync::aref::ARef<$device> {
+ fn from(dev: &$device<$src>) -> Self {
+ (&**dev).into()
+ }
+ }
+ };
($src:ty, $device:tt) => {
impl ::core::convert::From<&$device<$src>> for $crate::sync::aref::ARef<$device> {
fn from(dev: &$device<$src>) -> Self {
@@ -727,8 +694,12 @@ macro_rules! __impl_device_context_into_aref {
#[macro_export]
macro_rules! impl_device_context_into_aref {
($device:tt) => {
- ::kernel::__impl_device_context_into_aref!($crate::device::CoreInternal, $device);
- ::kernel::__impl_device_context_into_aref!($crate::device::Core, $device);
+ ::kernel::__impl_device_context_into_aref!(
+ <'a> $crate::device::CoreInternal<'a>, $device
+ );
+ ::kernel::__impl_device_context_into_aref!(
+ <'a> $crate::device::Core<'a>, $device
+ );
::kernel::__impl_device_context_into_aref!($crate::device::Bound, $device);
};
}
diff --git a/rust/kernel/devres.rs b/rust/kernel/devres.rs
index 9e5f93aed20c..82cbd8b969fb 100644
--- a/rust/kernel/devres.rs
+++ b/rust/kernel/devres.rs
@@ -304,7 +304,7 @@ impl<T: Send> Devres<T> {
/// pci, //
/// };
///
- /// fn from_core(dev: &pci::Device<Core>, devres: Devres<pci::Bar<0x4>>) -> Result {
+ /// fn from_core(dev: &pci::Device<Core<'_>>, devres: Devres<pci::Bar<'_, 0x4>>) -> Result {
/// let bar = devres.access(dev.as_ref())?;
///
/// let _ = bar.read32(0x0);
diff --git a/rust/kernel/dma.rs b/rust/kernel/dma.rs
index 4995ee5dc689..8f97916e0688 100644
--- a/rust/kernel/dma.rs
+++ b/rust/kernel/dma.rs
@@ -47,7 +47,7 @@ pub type DmaAddress = bindings::dma_addr_t;
/// where the underlying bus is DMA capable, such as:
#[cfg_attr(CONFIG_PCI, doc = "* [`pci::Device`](kernel::pci::Device)")]
/// * [`platform::Device`](::kernel::platform::Device)
-pub trait Device: AsRef<device::Device<Core>> {
+pub trait Device<'a>: AsRef<device::Device<Core<'a>>> {
/// Set up the device's DMA streaming addressing capabilities.
///
/// This method is usually called once from `probe()` as soon as the device capabilities are
diff --git a/rust/kernel/driver.rs b/rust/kernel/driver.rs
index 36de8098754d..03c0dd713f4c 100644
--- a/rust/kernel/driver.rs
+++ b/rust/kernel/driver.rs
@@ -13,10 +13,13 @@
//! The main driver interface is defined by a bus specific driver trait. For instance:
//!
//! ```ignore
-//! pub trait Driver: Send {
+//! pub trait Driver {
//! /// The type holding information about each device ID supported by the driver.
//! type IdInfo: 'static;
//!
+//! /// The type of the driver's bus device private data.
+//! type Data<'bound>: Send + 'bound;
+//!
//! /// The table of OF device ids supported by the driver.
//! const OF_ID_TABLE: Option<of::IdTable<Self::IdInfo>> = None;
//!
@@ -24,10 +27,16 @@
//! const ACPI_ID_TABLE: Option<acpi::IdTable<Self::IdInfo>> = None;
//!
//! /// Driver probe.
-//! fn probe(dev: &Device<device::Core>, id_info: &Self::IdInfo) -> impl PinInit<Self, Error>;
+//! fn probe<'bound>(
+//! dev: &'bound Device<device::Core<'_>>,
+//! id_info: &'bound Self::IdInfo,
+//! ) -> impl PinInit<Self::Data<'bound>, Error> + 'bound;
//!
//! /// Driver unbind (optional).
-//! fn unbind(dev: &Device<device::Core>, this: Pin<&Self>) {
+//! fn unbind<'bound>(
+//! dev: &'bound Device<device::Core<'_>>,
+//! this: Pin<&Self::Data<'bound>>,
+//! ) {
//! let _ = (dev, this);
//! }
//! }
@@ -42,8 +51,9 @@
)]
#![cfg_attr(CONFIG_PCI, doc = "* [`pci::Driver`](kernel::pci::Driver)")]
//!
-//! The `probe()` callback should return a `impl PinInit<Self, Error>`, i.e. the driver's private
-//! data. The bus abstraction should store the pointer in the corresponding bus device. The generic
+//! The `probe()` callback should return a
+//! `impl PinInit<Self::Data<'bound>, Error>`, i.e. the driver's private data. The bus
+//! abstraction should store the pointer in the corresponding bus device. The generic
//! [`Device`] infrastructure provides common helpers for this purpose on its
//! [`Device<CoreInternal>`] implementation.
//!
@@ -118,8 +128,8 @@ pub unsafe trait DriverLayout {
/// The specific driver type embedding a `struct device_driver`.
type DriverType: Default;
- /// The type of the driver's device private data.
- type DriverData;
+ /// The type of the driver's bus device private data.
+ type DriverData<'bound>;
/// Byte offset of the embedded `struct device_driver` within `DriverType`.
///
@@ -181,20 +191,20 @@ unsafe impl<T: RegistrationOps> Sync for Registration<T> {}
// any thread, so `Registration` is `Send`.
unsafe impl<T: RegistrationOps> Send for Registration<T> {}
-impl<T: RegistrationOps + 'static> Registration<T> {
+impl<T: RegistrationOps> Registration<T> {
extern "C" fn post_unbind_callback(dev: *mut bindings::device) {
// SAFETY: The driver core only ever calls the post unbind callback with a valid pointer to
// a `struct device`.
//
// INVARIANT: `dev` is valid for the duration of the `post_unbind_callback()`.
- let dev = unsafe { &*dev.cast::<device::Device<device::CoreInternal>>() };
+ let dev = unsafe { &*dev.cast::<device::Device<device::CoreInternal<'_>>>() };
- // `remove()` and all devres callbacks have been completed at this point, hence drop the
- // driver's device private data.
+ // `remove()` has been completed at this point; devres resources are still valid and will
+ // be released after the driver's bus device private data is dropped.
//
// SAFETY: By the safety requirements of the `Driver` trait, `T::DriverData` is the
- // driver's device private data type.
- drop(unsafe { dev.drvdata_obtain::<T::DriverData>() });
+ // driver's bus device private data type.
+ drop(unsafe { dev.drvdata_obtain::<T::DriverData<'_>>() });
}
/// Attach generic `struct device_driver` callbacks.
@@ -215,7 +225,10 @@ impl<T: RegistrationOps + 'static> Registration<T> {
}
/// Creates a new instance of the registration object.
- pub fn new(name: &'static CStr, module: &'static ThisModule) -> impl PinInit<Self, Error> {
+ pub fn new(name: &'static CStr, module: &'static ThisModule) -> impl PinInit<Self, Error>
+ where
+ T: 'static,
+ {
try_pin_init!(Self {
reg <- Opaque::try_ffi_init(|ptr: *mut T::DriverType| {
// SAFETY: `try_ffi_init` guarantees that `ptr` is valid for write.
diff --git a/rust/kernel/drm/device.rs b/rust/kernel/drm/device.rs
index 403fc35353c7..477cf771fb10 100644
--- a/rust/kernel/drm/device.rs
+++ b/rust/kernel/drm/device.rs
@@ -6,10 +6,12 @@
use crate::{
alloc::allocator::Kmalloc,
- bindings, device,
+ bindings,
+ device,
drm::{
self,
- driver::AllocImpl, //
+ driver::AllocImpl,
+ private::Sealed, //
},
error::from_err_ptr,
prelude::*,
@@ -17,16 +19,20 @@ use crate::{
ARef,
AlwaysRefCounted, //
},
- types::Opaque,
+ types::{
+ NotThreadSafe,
+ Opaque, //
+ },
workqueue::{
HasDelayedWork,
HasWork,
Work,
WorkItem, //
- },
+ }, //
};
use core::{
alloc::Layout,
+ marker::PhantomData,
mem,
ops::Deref,
ptr::{
@@ -66,36 +72,122 @@ macro_rules! drm_legacy_fields {
}
}
-/// A typed DRM device with a specific `drm::Driver` implementation.
+/// A trait implemented by all possible contexts a [`Device`] can be used in.
+///
+/// Setting up a new [`Device`] is a multi-stage process. Each step of the process that a user
+/// interacts with in Rust has a respective [`DeviceContext`] typestate. For example,
+/// `Device<T, Registered>` would be a [`Device`] that reached the [`Registered`] [`DeviceContext`].
+///
+/// Each stage of this process is described below:
+///
+/// ```text
+/// 1 2 3
+/// +--------------+ +------------------+ +-----------------------+
+/// |Device created| → |Device initialized| → |Registered w/ userspace|
+/// +--------------+ +------------------+ +-----------------------+
+/// (Uninit) (Registered)
+/// ```
+///
+/// 1. The [`Device`] is in the [`Uninit`] context and is not guaranteed to be initialized or
+/// registered with userspace. Only a limited subset of DRM core functionality is available.
+/// 2. The [`Device`] is guaranteed to be fully initialized, but is not guaranteed to be registered
+/// with userspace. All DRM core functionality which doesn't interact with userspace is
+/// available. We currently don't have a context for representing this.
+/// 3. The [`Device`] is guaranteed to be fully initialized, and is guaranteed to have been
+/// registered with userspace at some point - thus putting it in the [`Registered`] context.
+///
+/// An important caveat of [`DeviceContext`] which must be kept in mind: when used as a typestate
+/// for a reference type, it can only guarantee that a [`Device`] reached a particular stage in the
+/// initialization process _at the time the reference was taken_. No guarantee is made in regards to
+/// what stage of the process the [`Device`] is currently in. This means for instance that a
+/// `&Device<T, Uninit>` may actually be registered with userspace, it just wasn't known to be
+/// registered at the time the reference was taken.
+pub trait DeviceContext: Sealed + Send + Sync {}
+
+/// The [`DeviceContext`] of a [`Device`] that was registered with userspace at some point.
///
-/// The device is always reference-counted.
+/// This represents a [`Device`] which is guaranteed to have been registered with userspace at
+/// some point in time. Such a DRM device is guaranteed to have been fully-initialized.
+///
+/// Note: A device in this context is not guaranteed to remain registered with userspace for its
+/// entire lifetime, as this is impossible to guarantee at compile-time.
///
/// # Invariants
///
-/// `self.dev` is a valid instance of a `struct device`.
-#[repr(C)]
-pub struct Device<T: drm::Driver> {
- dev: Opaque<bindings::drm_device>,
- data: T::Data,
+/// A [`Device`] in this [`DeviceContext`] is guaranteed to have been registered with userspace
+/// at some point in time.
+pub struct Registered;
+
+impl Sealed for Registered {}
+impl DeviceContext for Registered {}
+
+/// The [`DeviceContext`] of a [`Device`] that may be unregistered and partly uninitialized.
+///
+/// A [`Device`] in this context is only guaranteed to be partly initialized, and may or may not
+/// be registered with userspace. Thus operations which depend on the [`Device`] being fully
+/// initialized, or which depend on the [`Device`] being registered with userspace are not
+/// available through this [`DeviceContext`].
+///
+/// A [`Device`] in this context can be used to create a
+/// [`Registration`](drm::driver::Registration).
+pub struct Uninit;
+
+impl Sealed for Uninit {}
+impl DeviceContext for Uninit {}
+
+/// A [`Device`] which is known at compile-time to be unregistered with userspace.
+///
+/// This type allows performing operations which are only safe to do before userspace registration,
+/// and can be used to create a [`Registration`](drm::driver::Registration) once the driver is ready
+/// to register the device with userspace.
+///
+/// Since DRM device initialization must be single-threaded, this object is not thread-safe.
+///
+/// # Invariants
+///
+/// The device in `self.0` is guaranteed to be a newly created [`Device`] that has not yet been
+/// registered with userspace until this type is dropped.
+pub struct UnregisteredDevice<T: drm::Driver>(ARef<Device<T, Uninit>>, NotThreadSafe);
+
+impl<T: drm::Driver> Deref for UnregisteredDevice<T> {
+ type Target = Device<T, Uninit>;
+
+ fn deref(&self) -> &Self::Target {
+ &self.0
+ }
}
-impl<T: drm::Driver> Device<T> {
+impl<T: drm::Driver> UnregisteredDevice<T> {
+ const fn compute_features() -> u32 {
+ let mut features = drm::driver::FEAT_GEM;
+
+ if T::FEAT_RENDER {
+ features |= drm::driver::FEAT_RENDER;
+ }
+
+ features
+ }
+
const VTABLE: bindings::drm_driver = drm_legacy_fields! {
load: None,
open: Some(drm::File::<T::File>::open_callback),
postclose: Some(drm::File::<T::File>::postclose_callback),
unload: None,
- release: Some(Self::release),
+ release: Some(Device::<T>::release),
master_set: None,
master_drop: None,
debugfs_init: None,
- gem_create_object: T::Object::ALLOC_OPS.gem_create_object,
- prime_handle_to_fd: T::Object::ALLOC_OPS.prime_handle_to_fd,
- prime_fd_to_handle: T::Object::ALLOC_OPS.prime_fd_to_handle,
- gem_prime_import: T::Object::ALLOC_OPS.gem_prime_import,
- gem_prime_import_sg_table: T::Object::ALLOC_OPS.gem_prime_import_sg_table,
- dumb_create: T::Object::ALLOC_OPS.dumb_create,
- dumb_map_offset: T::Object::ALLOC_OPS.dumb_map_offset,
+
+ // Ignore the Uninit DeviceContext below. It is only provided because it is required by the
+ // compiler, and it is not actually used by these functions.
+ gem_create_object: T::Object::<Uninit>::ALLOC_OPS.gem_create_object,
+ prime_handle_to_fd: T::Object::<Uninit>::ALLOC_OPS.prime_handle_to_fd,
+ prime_fd_to_handle: T::Object::<Uninit>::ALLOC_OPS.prime_fd_to_handle,
+ gem_prime_import: T::Object::<Uninit>::ALLOC_OPS.gem_prime_import,
+ gem_prime_import_sg_table: T::Object::<Uninit>::ALLOC_OPS.gem_prime_import_sg_table,
+ dumb_create: T::Object::<Uninit>::ALLOC_OPS.dumb_create,
+ dumb_map_offset: T::Object::<Uninit>::ALLOC_OPS.dumb_map_offset,
+
show_fdinfo: None,
fbdev_probe: None,
@@ -105,7 +197,7 @@ impl<T: drm::Driver> Device<T> {
name: crate::str::as_char_ptr_in_const_context(T::INFO.name).cast_mut(),
desc: crate::str::as_char_ptr_in_const_context(T::INFO.desc).cast_mut(),
- driver_features: drm::driver::FEAT_GEM,
+ driver_features: Self::compute_features(),
ioctls: T::IOCTLS.as_ptr(),
num_ioctls: T::IOCTLS.len() as i32,
fops: &Self::GEM_FOPS,
@@ -113,11 +205,13 @@ impl<T: drm::Driver> Device<T> {
const GEM_FOPS: bindings::file_operations = drm::gem::create_fops();
- /// Create a new `drm::Device` for a `drm::Driver`.
- pub fn new(dev: &device::Device, data: impl PinInit<T::Data, Error>) -> Result<ARef<Self>> {
+ /// Create a new `UnregisteredDevice` for a `drm::Driver`.
+ ///
+ /// This can be used to create a [`Registration`](kernel::drm::Registration).
+ pub fn new(dev: &device::Device, data: impl PinInit<T::Data, Error>) -> Result<Self> {
// `__drm_dev_alloc` uses `kmalloc()` to allocate memory, hence ensure a `kmalloc()`
// compatible `Layout`.
- let layout = Kmalloc::aligned_layout(Layout::new::<Self>());
+ let layout = Kmalloc::aligned_layout(Layout::new::<Device<T, Uninit>>());
// Use a temporary vtable without a `release` callback until `data` is initialized, so
// init failure can release the DRM device without dropping uninitialized fields.
@@ -129,12 +223,12 @@ impl<T: drm::Driver> Device<T> {
// SAFETY:
// - `alloc_vtable` reference remains valid until no longer used,
// - `dev` is valid by its type invarants,
- let raw_drm: *mut Self = unsafe {
+ let raw_drm: *mut Device<T, Uninit> = unsafe {
bindings::__drm_dev_alloc(
dev.as_raw(),
&alloc_vtable,
layout.size(),
- mem::offset_of!(Self, dev),
+ mem::offset_of!(Device<T, Uninit>, dev),
)
}
.cast();
@@ -142,7 +236,7 @@ impl<T: drm::Driver> Device<T> {
// SAFETY: `raw_drm` is a valid pointer to `Self`, given that `__drm_dev_alloc` was
// successful.
- let drm_dev = unsafe { Self::into_drm_device(raw_drm) };
+ let drm_dev = unsafe { Device::into_drm_device(raw_drm) };
// SAFETY: `raw_drm` is a valid pointer to `Self`.
let raw_data = unsafe { ptr::addr_of_mut!((*raw_drm.as_ptr()).data) };
@@ -161,9 +255,39 @@ impl<T: drm::Driver> Device<T> {
// SAFETY: The reference count is one, and now we take ownership of that reference as a
// `drm::Device`.
- Ok(unsafe { ARef::from_raw(raw_drm) })
+ // INVARIANT: We just created the device above, but have yet to call `drm_dev_register`.
+ // `Self` cannot be copied or sent to another thread - ensuring that `drm_dev_register`
+ // won't be called during its lifetime and that the device is unregistered.
+ Ok(Self(unsafe { ARef::from_raw(raw_drm) }, NotThreadSafe))
}
+}
+/// A typed DRM device with a specific [`drm::Driver`] implementation and [`DeviceContext`].
+///
+/// Since DRM devices can be used before being fully initialized and registered with userspace, `C`
+/// represents the furthest [`DeviceContext`] we can guarantee that this [`Device`] has reached.
+///
+/// Keep in mind: this means that an unregistered device can still have the registration state
+/// [`Registered`] as long as it was registered with userspace once in the past, and that the
+/// behavior of such a device is still well-defined. Additionally, a device with the registration
+/// state [`Uninit`] simply does not have a guaranteed registration state at compile time, and could
+/// be either registered or unregistered. Since there is no way to guarantee a long-lived reference
+/// to an unregistered device would remain unregistered, we do not provide a [`DeviceContext`] for
+/// this.
+///
+/// # Invariants
+///
+/// * `self.dev` is a valid instance of a `struct device`.
+/// * The data layout of `Self` remains the same across all implementations of `C`.
+/// * Any invariants for `C` also apply.
+#[repr(C)]
+pub struct Device<T: drm::Driver, C: DeviceContext = Registered> {
+ dev: Opaque<bindings::drm_device>,
+ data: T::Data,
+ _ctx: PhantomData<C>,
+}
+
+impl<T: drm::Driver, C: DeviceContext> Device<T, C> {
pub(crate) fn as_raw(&self) -> *mut bindings::drm_device {
self.dev.get()
}
@@ -189,13 +313,13 @@ impl<T: drm::Driver> Device<T> {
///
/// # Safety
///
- /// Callers must ensure that `ptr` is valid, non-null, and has a non-zero reference count,
- /// i.e. it must be ensured that the reference count of the C `struct drm_device` `ptr` points
- /// to can't drop to zero, for the duration of this function call and the entire duration when
- /// the returned reference exists.
- ///
- /// Additionally, callers must ensure that the `struct device`, `ptr` is pointing to, is
- /// embedded in `Self`.
+ /// * Callers must ensure that `ptr` is valid, non-null, and has a non-zero reference count,
+ /// i.e. it must be ensured that the reference count of the C `struct drm_device` `ptr` points
+ /// to can't drop to zero, for the duration of this function call and the entire duration when
+ /// the returned reference exists.
+ /// * Additionally, callers must ensure that the `struct device`, `ptr` is pointing to, is
+ /// embedded in `Self`.
+ /// * Callers promise that any type invariants of `C` will be upheld.
#[doc(hidden)]
pub unsafe fn from_raw<'a>(ptr: *const bindings::drm_device) -> &'a Self {
// SAFETY: By the safety requirements of this function `ptr` is a valid pointer to a
@@ -215,9 +339,20 @@ impl<T: drm::Driver> Device<T> {
// - `this` is valid for dropping.
unsafe { core::ptr::drop_in_place(this) };
}
+
+ /// Change the [`DeviceContext`] for a [`Device`].
+ ///
+ /// # Safety
+ ///
+ /// The caller promises that `self` fulfills all of the guarantees provided by the given
+ /// [`DeviceContext`].
+ pub(crate) unsafe fn assume_ctx<NewCtx: DeviceContext>(&self) -> &Device<T, NewCtx> {
+ // SAFETY: The data layout is identical via our type invariants.
+ unsafe { mem::transmute(self) }
+ }
}
-impl<T: drm::Driver> Deref for Device<T> {
+impl<T: drm::Driver, C: DeviceContext> Deref for Device<T, C> {
type Target = T::Data;
fn deref(&self) -> &Self::Target {
@@ -227,7 +362,7 @@ impl<T: drm::Driver> Deref for Device<T> {
// SAFETY: DRM device objects are always reference counted and the get/put functions
// satisfy the requirements.
-unsafe impl<T: drm::Driver> AlwaysRefCounted for Device<T> {
+unsafe impl<T: drm::Driver, C: DeviceContext> AlwaysRefCounted for Device<T, C> {
fn inc_ref(&self) {
// SAFETY: The existence of a shared reference guarantees that the refcount is non-zero.
unsafe { bindings::drm_dev_get(self.as_raw()) };
@@ -242,7 +377,7 @@ unsafe impl<T: drm::Driver> AlwaysRefCounted for Device<T> {
}
}
-impl<T: drm::Driver> AsRef<device::Device> for Device<T> {
+impl<T: drm::Driver, C: DeviceContext> AsRef<device::Device> for Device<T, C> {
fn as_ref(&self) -> &device::Device {
// SAFETY: `bindings::drm_device::dev` is valid as long as the DRM device itself is valid,
// which is guaranteed by the type invariant.
@@ -251,21 +386,22 @@ impl<T: drm::Driver> AsRef<device::Device> for Device<T> {
}
// SAFETY: A `drm::Device` can be released from any thread.
-unsafe impl<T: drm::Driver> Send for Device<T> {}
+unsafe impl<T: drm::Driver, C: DeviceContext> Send for Device<T, C> {}
// SAFETY: A `drm::Device` can be shared among threads because all immutable methods are protected
// by the synchronization in `struct drm_device`.
-unsafe impl<T: drm::Driver> Sync for Device<T> {}
+unsafe impl<T: drm::Driver, C: DeviceContext> Sync for Device<T, C> {}
-impl<T, const ID: u64> WorkItem<ID> for Device<T>
+impl<T, C, const ID: u64> WorkItem<ID> for Device<T, C>
where
T: drm::Driver,
- T::Data: WorkItem<ID, Pointer = ARef<Device<T>>>,
- T::Data: HasWork<Device<T>, ID>,
+ T::Data: WorkItem<ID, Pointer = ARef<Self>>,
+ T::Data: HasWork<Self, ID>,
+ C: DeviceContext,
{
- type Pointer = ARef<Device<T>>;
+ type Pointer = ARef<Self>;
- fn run(ptr: ARef<Device<T>>) {
+ fn run(ptr: ARef<Self>) {
T::Data::run(ptr);
}
}
@@ -277,40 +413,42 @@ where
// stored inline in `drm::Device`, so the `container_of` call is valid.
//
// - The two methods are true inverses of each other: given `ptr: *mut
-// Device<T>`, `raw_get_work` will return a `*mut Work<Device<T>, ID>` through
-// `T::Data::raw_get_work` and given a `ptr: *mut Work<Device<T>, ID>`,
-// `work_container_of` will return a `*mut Device<T>` through `container_of`.
-unsafe impl<T, const ID: u64> HasWork<Device<T>, ID> for Device<T>
+// Device<T, C>`, `raw_get_work` will return a `*mut Work<Device<T, C>, ID>` through
+// `T::Data::raw_get_work` and given a `ptr: *mut Work<Device<T, C>, ID>`,
+// `work_container_of` will return a `*mut Device<T, C>` through `container_of`.
+unsafe impl<T, C, const ID: u64> HasWork<Self, ID> for Device<T, C>
where
T: drm::Driver,
- T::Data: HasWork<Device<T>, ID>,
+ T::Data: HasWork<Self, ID>,
+ C: DeviceContext,
{
- unsafe fn raw_get_work(ptr: *mut Self) -> *mut Work<Device<T>, ID> {
- // SAFETY: The caller promises that `ptr` points to a valid `Device<T>`.
+ unsafe fn raw_get_work(ptr: *mut Self) -> *mut Work<Self, ID> {
+ // SAFETY: The caller promises that `ptr` points to a valid `Device<T, C>`.
let data_ptr = unsafe { &raw mut (*ptr).data };
// SAFETY: `data_ptr` is a valid pointer to `T::Data`.
unsafe { T::Data::raw_get_work(data_ptr) }
}
- unsafe fn work_container_of(ptr: *mut Work<Device<T>, ID>) -> *mut Self {
+ unsafe fn work_container_of(ptr: *mut Work<Self, ID>) -> *mut Self {
// SAFETY: The caller promises that `ptr` points at a `Work` field in
// `T::Data`.
let data_ptr = unsafe { T::Data::work_container_of(ptr) };
- // SAFETY: `T::Data` is stored as the `data` field in `Device<T>`.
+ // SAFETY: `T::Data` is stored as the `data` field in `Device<T, C>`.
unsafe { crate::container_of!(data_ptr, Self, data) }
}
}
// SAFETY: Our `HasWork<T, ID>` implementation returns a `work_struct` that is
// stored in the `work` field of a `delayed_work` with the same access rules as
-// the `work_struct` owing to the bound on `T::Data: HasDelayedWork<Device<T>,
+// the `work_struct` owing to the bound on `T::Data: HasDelayedWork<Device<T, C>,
// ID>`, which requires that `T::Data::raw_get_work` return a `work_struct` that
// is inside a `delayed_work`.
-unsafe impl<T, const ID: u64> HasDelayedWork<Device<T>, ID> for Device<T>
+unsafe impl<T, C, const ID: u64> HasDelayedWork<Self, ID> for Device<T, C>
where
T: drm::Driver,
- T::Data: HasDelayedWork<Device<T>, ID>,
+ T::Data: HasDelayedWork<Self, ID>,
+ C: DeviceContext,
{
}
diff --git a/rust/kernel/drm/driver.rs b/rust/kernel/drm/driver.rs
index 5233bdebc9fc..25f7e233884d 100644
--- a/rust/kernel/drm/driver.rs
+++ b/rust/kernel/drm/driver.rs
@@ -13,9 +13,15 @@ use crate::{
prelude::*,
sync::aref::ARef, //
};
+use core::{
+ mem,
+ ptr::NonNull, //
+};
/// Driver use the GEM memory manager. This should be set for all modern drivers.
pub(crate) const FEAT_GEM: u32 = bindings::drm_driver_feature_DRIVER_GEM;
+/// Driver supports render nodes, i.e.: /dev/dri/renderDXX devices.
+pub(crate) const FEAT_RENDER: u32 = bindings::drm_driver_feature_DRIVER_RENDER;
/// Information data for a DRM Driver.
pub struct DriverInfo {
@@ -105,7 +111,7 @@ pub trait Driver {
type Data: Sync + Send;
/// The type used to manage memory for this driver.
- type Object: AllocImpl;
+ type Object<Ctx: drm::DeviceContext>: AllocImpl;
/// The type used to represent a DRM File (client)
type File: drm::file::DriverFile;
@@ -115,6 +121,16 @@ pub trait Driver {
/// IOCTL list. See `kernel::drm::ioctl::declare_drm_ioctls!{}`.
const IOCTLS: &'static [drm::ioctl::DrmIoctlDescriptor];
+
+ /// Sets the `DRIVER_RENDER` feature for this driver.
+ ///
+ /// When enabled, the driver exposes `/dev/dri/renderDXX` render nodes to
+ /// userspace. The render node is an alternate low-priviledge way to access
+ /// the driver, which is enforced on a per-ioctl level. Userspace processes
+ /// that open the render node can only invoke ioctls explicitly listed as
+ /// usable from the render node (i.e. marked DRM_RENDER_ALLOW), whereas
+ /// userspace processes using the master node can invoke any ioctl.
+ const FEAT_RENDER: bool = false;
}
/// The registration type of a `drm::Device`.
@@ -123,21 +139,31 @@ pub trait Driver {
pub struct Registration<T: Driver>(ARef<drm::Device<T>>);
impl<T: Driver> Registration<T> {
- fn new(drm: &drm::Device<T>, flags: usize) -> Result<Self> {
+ fn new(drm: drm::UnregisteredDevice<T>, flags: usize) -> Result<Self> {
// SAFETY: `drm.as_raw()` is valid by the invariants of `drm::Device`.
to_result(unsafe { bindings::drm_dev_register(drm.as_raw(), flags) })?;
- Ok(Self(drm.into()))
+ // SAFETY: We just called `drm_dev_register` above
+ let new = NonNull::from(unsafe { drm.assume_ctx() });
+
+ // Leak the ARef from UnregisteredDevice in preparation for transferring its ownership.
+ mem::forget(drm);
+
+ // SAFETY: `drm`'s `Drop` constructor was never called, ensuring that there remains at least
+ // one reference to the device - which we take ownership over here.
+ let new = unsafe { ARef::from_raw(new) };
+
+ Ok(Self(new))
}
- /// Registers a new [`Device`](drm::Device) with userspace.
+ /// Registers a new [`UnregisteredDevice`](drm::UnregisteredDevice) with userspace.
///
/// Ownership of the [`Registration`] object is passed to [`devres::register`].
- pub fn new_foreign_owned(
- drm: &drm::Device<T>,
- dev: &device::Device<device::Bound>,
+ pub fn new_foreign_owned<'a>(
+ drm: drm::UnregisteredDevice<T>,
+ dev: &'a device::Device<device::Bound>,
flags: usize,
- ) -> Result
+ ) -> Result<&'a drm::Device<T>>
where
T: 'static,
{
@@ -146,8 +172,13 @@ impl<T: Driver> Registration<T> {
}
let reg = Registration::<T>::new(drm, flags)?;
+ let drm = NonNull::from(reg.device());
+
+ devres::register(dev, reg, GFP_KERNEL)?;
- devres::register(dev, reg, GFP_KERNEL)
+ // SAFETY: Since `reg` was passed to devres::register(), the device now owns the lifetime
+ // of the DRM registration - ensuring that this references lives for at least as long as 'a.
+ Ok(unsafe { drm.as_ref() })
}
/// Returns a reference to the `Device` instance for this registration.
diff --git a/rust/kernel/drm/gem/mod.rs b/rust/kernel/drm/gem/mod.rs
index 01b5bd47a333..c8b66d816871 100644
--- a/rust/kernel/drm/gem/mod.rs
+++ b/rust/kernel/drm/gem/mod.rs
@@ -8,6 +8,10 @@ use crate::{
bindings,
drm::{
self,
+ device::{
+ DeviceContext,
+ Registered, //
+ },
driver::{
AllocImpl,
AllocOps, //
@@ -22,6 +26,7 @@ use crate::{
types::Opaque,
};
use core::{
+ marker::PhantomData,
ops::Deref,
ptr::NonNull, //
};
@@ -73,6 +78,12 @@ pub(crate) use impl_aref_for_gem_obj;
/// [`DriverFile`]: drm::file::DriverFile
pub type DriverFile<T> = drm::File<<<T as DriverObject>::Driver as drm::Driver>::File>;
+/// A type alias for retrieving the current [`AllocImpl`] for a given [`DriverObject`].
+///
+/// [`Driver`]: drm::Driver
+pub type DriverAllocImpl<T, Ctx = Registered> =
+ <<T as DriverObject>::Driver as drm::Driver>::Object<Ctx>;
+
/// GEM object functions, which must be implemented by drivers.
pub trait DriverObject: Sync + Send + Sized {
/// Parent `Driver` for this object.
@@ -82,19 +93,19 @@ pub trait DriverObject: Sync + Send + Sized {
type Args;
/// Create a new driver data object for a GEM object of a given size.
- fn new(
- dev: &drm::Device<Self::Driver>,
+ fn new<Ctx: DeviceContext>(
+ dev: &drm::Device<Self::Driver, Ctx>,
size: usize,
args: Self::Args,
) -> impl PinInit<Self, Error>;
/// Open a new handle to an existing object, associated with a File.
- fn open(_obj: &<Self::Driver as drm::Driver>::Object, _file: &DriverFile<Self>) -> Result {
+ fn open(_obj: &DriverAllocImpl<Self>, _file: &DriverFile<Self>) -> Result {
Ok(())
}
/// Close a handle to an existing object, associated with a File.
- fn close(_obj: &<Self::Driver as drm::Driver>::Object, _file: &DriverFile<Self>) {}
+ fn close(_obj: &DriverAllocImpl<Self>, _file: &DriverFile<Self>) {}
}
/// Trait that represents a GEM object subtype
@@ -120,9 +131,12 @@ extern "C" fn open_callback<T: DriverObject>(
// SAFETY: `open_callback` is only ever called with a valid pointer to a `struct drm_file`.
let file = unsafe { DriverFile::<T>::from_raw(raw_file) };
- // SAFETY: `open_callback` is specified in the AllocOps structure for `DriverObject<T>`,
- // ensuring that `raw_obj` is contained within a `DriverObject<T>`
- let obj = unsafe { <<T::Driver as drm::Driver>::Object as IntoGEMObject>::from_raw(raw_obj) };
+ // SAFETY:
+ // * `open_callback` is specified in the AllocOps structure for `DriverObject`, ensuring that
+ // `raw_obj` is contained within a `DriverAllocImpl<T>`
+ // * It is only possible for `open_callback` to be called after device registration, ensuring
+ // that the object's device is in the `Registered` state.
+ let obj: &DriverAllocImpl<T> = unsafe { IntoGEMObject::from_raw(raw_obj) };
match T::open(obj, file) {
Err(e) => e.to_errno(),
@@ -139,12 +153,12 @@ extern "C" fn close_callback<T: DriverObject>(
// SAFETY: `close_callback` is specified in the AllocOps structure for `Object<T>`, ensuring
// that `raw_obj` is indeed contained within a `Object<T>`.
- let obj = unsafe { <<T::Driver as drm::Driver>::Object as IntoGEMObject>::from_raw(raw_obj) };
+ let obj: &DriverAllocImpl<T> = unsafe { IntoGEMObject::from_raw(raw_obj) };
T::close(obj, file);
}
-impl<T: DriverObject> IntoGEMObject for Object<T> {
+impl<T: DriverObject, Ctx: DeviceContext> IntoGEMObject for Object<T, Ctx> {
fn as_raw(&self) -> *mut bindings::drm_gem_object {
self.obj.get()
}
@@ -152,7 +166,7 @@ impl<T: DriverObject> IntoGEMObject for Object<T> {
unsafe fn from_raw<'a>(self_ptr: *mut bindings::drm_gem_object) -> &'a Self {
// SAFETY: `obj` is guaranteed to be in an `Object<T>` via the safety contract of this
// function
- unsafe { &*crate::container_of!(Opaque::cast_from(self_ptr), Object<T>, obj) }
+ unsafe { &*crate::container_of!(Opaque::cast_from(self_ptr), Object<T, Ctx>, obj) }
}
}
@@ -169,7 +183,7 @@ pub trait BaseObject: IntoGEMObject {
fn create_handle<D, F>(&self, file: &drm::File<F>) -> Result<u32>
where
Self: AllocImpl<Driver = D>,
- D: drm::Driver<Object = Self, File = F>,
+ D: drm::Driver<Object<Registered> = Self, File = F>,
F: drm::file::DriverFile<Driver = D>,
{
let mut handle: u32 = 0;
@@ -184,7 +198,7 @@ pub trait BaseObject: IntoGEMObject {
fn lookup_handle<D, F>(file: &drm::File<F>, handle: u32) -> Result<ARef<Self>>
where
Self: AllocImpl<Driver = D>,
- D: drm::Driver<Object = Self, File = F>,
+ D: drm::Driver<Object<Registered> = Self, File = F>,
F: drm::file::DriverFile<Driver = D>,
{
// SAFETY: The arguments are all valid per the type invariants.
@@ -236,16 +250,18 @@ impl<T: IntoGEMObject> BaseObjectPrivate for T {}
///
/// # Invariants
///
-/// - `self.obj` is a valid instance of a `struct drm_gem_object`.
+/// * `self.obj` is a valid instance of a `struct drm_gem_object`.
+/// * Any type invariants of `Ctx` apply to the parent DRM device for this GEM object.
#[repr(C)]
#[pin_data]
-pub struct Object<T: DriverObject + Send + Sync> {
+pub struct Object<T: DriverObject + Send + Sync, Ctx: DeviceContext = Registered> {
obj: Opaque<bindings::drm_gem_object>,
#[pin]
data: T,
+ _ctx: PhantomData<Ctx>,
}
-impl<T: DriverObject> Object<T> {
+impl<T: DriverObject, Ctx: DeviceContext> Object<T, Ctx> {
const OBJECT_FUNCS: bindings::drm_gem_object_funcs = bindings::drm_gem_object_funcs {
free: Some(Self::free_callback),
open: Some(open_callback::<T>),
@@ -265,11 +281,16 @@ impl<T: DriverObject> Object<T> {
};
/// Create a new GEM object.
- pub fn new(dev: &drm::Device<T::Driver>, size: usize, args: T::Args) -> Result<ARef<Self>> {
+ pub fn new(
+ dev: &drm::Device<T::Driver, Ctx>,
+ size: usize,
+ args: T::Args,
+ ) -> Result<ARef<Self>> {
let obj: Pin<KBox<Self>> = KBox::pin_init(
try_pin_init!(Self {
obj: Opaque::new(bindings::drm_gem_object::default()),
data <- T::new(dev, size, args),
+ _ctx: PhantomData,
}),
GFP_KERNEL,
)?;
@@ -277,6 +298,8 @@ impl<T: DriverObject> Object<T> {
// SAFETY: `obj.as_raw()` is guaranteed to be valid by the initialization above.
unsafe { (*obj.as_raw()).funcs = &Self::OBJECT_FUNCS };
+ // INVARIANT: `dev` and the GEM object are in the same state at the moment, and upgrading
+ // the typestate in `dev` will not carry over to the GEM object.
if let Err(err) =
// SAFETY: The arguments are all valid per the type invariants.
to_result(unsafe {
@@ -300,13 +323,15 @@ impl<T: DriverObject> Object<T> {
}
/// Returns the `Device` that owns this GEM object.
- pub fn dev(&self) -> &drm::Device<T::Driver> {
+ pub fn dev(&self) -> &drm::Device<T::Driver, Ctx> {
// SAFETY:
// - `struct drm_gem_object.dev` is initialized and valid for as long as the GEM
// object lives.
// - The device we used for creating the gem object is passed as &drm::Device<T::Driver> to
// Object::<T>::new(), so we know that `T::Driver` is the right generic parameter to use
// here.
+ // - Any type invariants of `Ctx` are upheld by using the same `Ctx` for the `Device` we
+ // return.
unsafe { drm::Device::from_raw((*self.as_raw()).dev) }
}
@@ -331,11 +356,16 @@ impl<T: DriverObject> Object<T> {
}
}
-impl_aref_for_gem_obj!(impl<T> for Object<T> where T: DriverObject);
+impl_aref_for_gem_obj! {
+ impl<T, C> for Object<T, C>
+ where
+ T: DriverObject,
+ C: DeviceContext
+}
-impl<T: DriverObject> super::private::Sealed for Object<T> {}
+impl<T: DriverObject, Ctx: DeviceContext> super::private::Sealed for Object<T, Ctx> {}
-impl<T: DriverObject> Deref for Object<T> {
+impl<T: DriverObject, Ctx: DeviceContext> Deref for Object<T, Ctx> {
type Target = T;
fn deref(&self) -> &Self::Target {
@@ -343,7 +373,7 @@ impl<T: DriverObject> Deref for Object<T> {
}
}
-impl<T: DriverObject> AllocImpl for Object<T> {
+impl<T: DriverObject, Ctx: DeviceContext> AllocImpl for Object<T, Ctx> {
type Driver = T::Driver;
const ALLOC_OPS: AllocOps = AllocOps {
diff --git a/rust/kernel/drm/gem/shmem.rs b/rust/kernel/drm/gem/shmem.rs
index e1b648920d2f..34af402899a0 100644
--- a/rust/kernel/drm/gem/shmem.rs
+++ b/rust/kernel/drm/gem/shmem.rs
@@ -12,10 +12,12 @@
use crate::{
container_of,
drm::{
- device,
driver,
gem,
- private::Sealed, //
+ private::Sealed,
+ Device,
+ DeviceContext,
+ Registered, //
},
error::to_result,
prelude::*,
@@ -23,11 +25,12 @@ use crate::{
types::Opaque, //
};
use core::{
+ marker::PhantomData,
ops::{
Deref,
DerefMut, //
},
- ptr::NonNull,
+ ptr::NonNull, //
};
use gem::{
BaseObjectPrivate,
@@ -40,42 +43,49 @@ use gem::{
/// This is used with [`Object::new()`] to control various properties that can only be set when
/// initially creating a shmem-backed GEM object.
#[derive(Default)]
-pub struct ObjectConfig<'a, T: DriverObject> {
+pub struct ObjectConfig<'a, T: DriverObject, C: DeviceContext = Registered> {
/// Whether to set the write-combine map flag.
pub map_wc: bool,
/// Reuse the DMA reservation from another GEM object.
///
/// The newly created [`Object`] will hold an owned refcount to `parent_resv_obj` if specified.
- pub parent_resv_obj: Option<&'a Object<T>>,
+ pub parent_resv_obj: Option<&'a Object<T, C>>,
}
/// A shmem-backed GEM object.
///
/// # Invariants
///
-/// `obj` contains a valid initialized `struct drm_gem_shmem_object` for the lifetime of this
-/// object.
+/// - `obj` contains a valid initialized `struct drm_gem_shmem_object` for the lifetime of this
+/// object.
+/// - Any type invariants of `C` apply to the parent DRM device for this GEM object.
#[repr(C)]
#[pin_data]
-pub struct Object<T: DriverObject> {
+pub struct Object<T: DriverObject, C: DeviceContext = Registered> {
#[pin]
obj: Opaque<bindings::drm_gem_shmem_object>,
/// Parent object that owns this object's DMA reservation object.
- parent_resv_obj: Option<ARef<Object<T>>>,
+ parent_resv_obj: Option<ARef<Object<T, C>>>,
#[pin]
inner: T,
+ _ctx: PhantomData<C>,
}
-super::impl_aref_for_gem_obj!(impl<T> for Object<T> where T: DriverObject);
+super::impl_aref_for_gem_obj! {
+ impl<T, C> for Object<T, C>
+ where
+ T: DriverObject,
+ C: DeviceContext
+}
// SAFETY: All GEM objects are thread-safe.
-unsafe impl<T: DriverObject> Send for Object<T> {}
+unsafe impl<T: DriverObject, C: DeviceContext> Send for Object<T, C> {}
// SAFETY: All GEM objects are thread-safe.
-unsafe impl<T: DriverObject> Sync for Object<T> {}
+unsafe impl<T: DriverObject, C: DeviceContext> Sync for Object<T, C> {}
-impl<T: DriverObject> Object<T> {
+impl<T: DriverObject, C: DeviceContext> Object<T, C> {
/// `drm_gem_object_funcs` vtable suitable for GEM shmem objects.
const VTABLE: bindings::drm_gem_object_funcs = bindings::drm_gem_object_funcs {
free: Some(Self::free_callback),
@@ -106,9 +116,9 @@ impl<T: DriverObject> Object<T> {
///
/// Additional config options can be specified using `config`.
pub fn new(
- dev: &device::Device<T::Driver>,
+ dev: &Device<T::Driver, C>,
size: usize,
- config: ObjectConfig<'_, T>,
+ config: ObjectConfig<'_, T, C>,
args: T::Args,
) -> Result<ARef<Self>> {
let new: Pin<KBox<Self>> = KBox::try_pin_init(
@@ -116,6 +126,7 @@ impl<T: DriverObject> Object<T> {
obj <- Opaque::init_zeroed(),
parent_resv_obj: config.parent_resv_obj.map(|p| p.into()),
inner <- T::new(dev, size, args),
+ _ctx: PhantomData::<C>,
}),
GFP_KERNEL,
)?;
@@ -148,9 +159,9 @@ impl<T: DriverObject> Object<T> {
}
/// Returns the `Device` that owns this GEM object.
- pub fn dev(&self) -> &device::Device<T::Driver> {
+ pub fn dev(&self) -> &Device<T::Driver, C> {
// SAFETY: `dev` will have been initialized in `Self::new()` by `drm_gem_shmem_init()`.
- unsafe { device::Device::from_raw((*self.as_raw()).dev) }
+ unsafe { Device::from_raw((*self.as_raw()).dev) }
}
extern "C" fn free_callback(obj: *mut bindings::drm_gem_object) {
@@ -168,7 +179,7 @@ impl<T: DriverObject> Object<T> {
// SAFETY:
// - We verified above that `obj` is valid, which makes `this` valid
// - This function is set in AllocOps, so we know that `this` is contained within a
- // `Object<T>`
+ // `Object<T, C>`
let this = unsafe { container_of!(Opaque::cast_from(this), Self, obj) }.cast_mut();
// SAFETY: We're recovering the Kbox<> we created in gem_create_object()
@@ -176,7 +187,7 @@ impl<T: DriverObject> Object<T> {
}
}
-impl<T: DriverObject> Deref for Object<T> {
+impl<T: DriverObject, C: DeviceContext> Deref for Object<T, C> {
type Target = T;
fn deref(&self) -> &Self::Target {
@@ -184,15 +195,15 @@ impl<T: DriverObject> Deref for Object<T> {
}
}
-impl<T: DriverObject> DerefMut for Object<T> {
+impl<T: DriverObject, C: DeviceContext> DerefMut for Object<T, C> {
fn deref_mut(&mut self) -> &mut Self::Target {
&mut self.inner
}
}
-impl<T: DriverObject> Sealed for Object<T> {}
+impl<T: DriverObject, C: DeviceContext> Sealed for Object<T, C> {}
-impl<T: DriverObject> gem::IntoGEMObject for Object<T> {
+impl<T: DriverObject, C: DeviceContext> gem::IntoGEMObject for Object<T, C> {
fn as_raw(&self) -> *mut bindings::drm_gem_object {
// SAFETY:
// - Our immutable reference is proof that this is safe to dereference.
@@ -200,18 +211,18 @@ impl<T: DriverObject> gem::IntoGEMObject for Object<T> {
unsafe { &raw mut (*self.obj.get()).base }
}
- unsafe fn from_raw<'a>(obj: *mut bindings::drm_gem_object) -> &'a Object<T> {
+ unsafe fn from_raw<'a>(obj: *mut bindings::drm_gem_object) -> &'a Self {
// SAFETY: The safety contract of from_gem_obj() guarantees that `obj` is contained within
// `Self`
unsafe {
let obj = Opaque::cast_from(container_of!(obj, bindings::drm_gem_shmem_object, base));
- &*container_of!(obj, Object<T>, obj)
+ &*container_of!(obj, Self, obj)
}
}
}
-impl<T: DriverObject> driver::AllocImpl for Object<T> {
+impl<T: DriverObject, C: DeviceContext> driver::AllocImpl for Object<T, C> {
type Driver = T::Driver;
const ALLOC_OPS: driver::AllocOps = driver::AllocOps {
diff --git a/rust/kernel/drm/gpuvm/mod.rs b/rust/kernel/drm/gpuvm/mod.rs
new file mode 100644
index 000000000000..ae58f6f667c1
--- /dev/null
+++ b/rust/kernel/drm/gpuvm/mod.rs
@@ -0,0 +1,328 @@
+// SPDX-License-Identifier: GPL-2.0 OR MIT
+
+#![cfg(CONFIG_RUST_DRM_GPUVM)]
+
+//! DRM GPUVM in immediate mode
+//!
+//! Rust abstractions for using GPUVM in immediate mode. This is when the GPUVM state is updated
+//! during `run_job()`, i.e., in the DMA fence signalling critical path, to ensure that the GPUVM
+//! and the GPU's virtual address space has the same state at all times.
+//!
+//! C header: [`include/drm/drm_gpuvm.h`](srctree/include/drm/drm_gpuvm.h)
+
+use kernel::{
+ alloc::{
+ AllocError,
+ Flags as AllocFlags, //
+ },
+ bindings,
+ drm,
+ drm::gem::IntoGEMObject,
+ error::to_result,
+ prelude::*,
+ sync::aref::{
+ ARef,
+ AlwaysRefCounted, //
+ },
+ types::Opaque, //
+};
+
+use core::{
+ cell::UnsafeCell,
+ marker::PhantomData,
+ mem::{
+ ManuallyDrop,
+ MaybeUninit, //
+ },
+ ops::{
+ Deref,
+ DerefMut,
+ Range, //
+ },
+ ptr::{
+ self,
+ NonNull, //
+ }, //
+};
+
+mod sm_ops;
+pub use self::sm_ops::*;
+
+mod vm_bo;
+pub use self::vm_bo::*;
+
+mod va;
+pub use self::va::*;
+
+/// A DRM GPU VA manager.
+///
+/// This object is refcounted, but the locations of mapped ranges may only be accessed or changed
+/// via the special unique handle [`UniqueRefGpuVm`].
+///
+/// # Invariants
+///
+/// * Stored in an allocation managed by the refcount in `self.vm`.
+/// * Access to `data` and the gpuvm interval tree is controlled via the [`UniqueRefGpuVm`] type.
+/// * Does not contain any sparse [`GpuVa<T>`] instances.
+#[pin_data]
+pub struct GpuVm<T: DriverGpuVm> {
+ #[pin]
+ vm: Opaque<bindings::drm_gpuvm>,
+ /// Accessed only through the [`UniqueRefGpuVm`] reference.
+ data: UnsafeCell<T>,
+}
+
+// SAFETY: The GPUVM api does not assume that it is tied to a specific thread. The destructor will
+// drop the `data` field, which is okay because it is guaranteed `Send` by the `DriverGpuVm` trait.
+unsafe impl<T: DriverGpuVm> Send for GpuVm<T> {}
+// SAFETY: The GPUVM api is designed to allow &self methods to be called in parallel.
+unsafe impl<T: DriverGpuVm> Sync for GpuVm<T> {}
+
+// SAFETY: By type invariants, the allocation is managed by the refcount in `self.vm`.
+unsafe impl<T: DriverGpuVm> AlwaysRefCounted for GpuVm<T> {
+ fn inc_ref(&self) {
+ // SAFETY: By type invariants, the allocation is managed by the refcount in `self.vm`.
+ unsafe { bindings::drm_gpuvm_get(self.vm.get()) };
+ }
+
+ unsafe fn dec_ref(obj: NonNull<Self>) {
+ // SAFETY: By type invariants, the allocation is managed by the refcount in `self.vm`.
+ unsafe { bindings::drm_gpuvm_put((*obj.as_ptr()).vm.get()) };
+ }
+}
+
+impl<T: DriverGpuVm> PartialEq for GpuVm<T> {
+ #[inline]
+ fn eq(&self, other: &Self) -> bool {
+ core::ptr::eq(self.as_raw(), other.as_raw())
+ }
+}
+impl<T: DriverGpuVm> Eq for GpuVm<T> {}
+
+impl<T: DriverGpuVm> GpuVm<T> {
+ const fn vtable() -> &'static bindings::drm_gpuvm_ops {
+ &bindings::drm_gpuvm_ops {
+ vm_free: Some(Self::vm_free),
+ op_alloc: None,
+ op_free: None,
+ vm_bo_alloc: GpuVmBo::<T>::ALLOC_FN,
+ vm_bo_free: GpuVmBo::<T>::FREE_FN,
+ vm_bo_validate: None,
+ sm_step_map: Some(Self::sm_step_map),
+ sm_step_unmap: Some(Self::sm_step_unmap),
+ sm_step_remap: Some(Self::sm_step_remap),
+ }
+ }
+
+ /// Creates a GPUVM instance.
+ #[expect(clippy::new_ret_no_self)]
+ pub fn new<E>(
+ name: &'static CStr,
+ dev: &drm::Device<T::Driver>,
+ r_obj: &T::Object,
+ range: Range<u64>,
+ reserve_range: Range<u64>,
+ data: T,
+ ) -> Result<UniqueRefGpuVm<T>, E>
+ where
+ E: From<AllocError>,
+ E: From<core::convert::Infallible>,
+ {
+ let obj = KBox::try_pin_init::<E>(
+ try_pin_init!(Self {
+ data: UnsafeCell::new(data),
+ vm <- Opaque::ffi_init(|vm| {
+ // SAFETY: These arguments are valid. `vm` is valid until refcount drops to
+ // zero. The `vm` is zeroed before calling this method by `__GFP_ZERO` flag
+ // below.
+ unsafe {
+ bindings::drm_gpuvm_init(
+ vm,
+ name.as_char_ptr(),
+ bindings::drm_gpuvm_flags_DRM_GPUVM_IMMEDIATE_MODE
+ | bindings::drm_gpuvm_flags_DRM_GPUVM_RESV_PROTECTED,
+ dev.as_raw(),
+ r_obj.as_raw(),
+ range.start,
+ range.end - range.start,
+ reserve_range.start,
+ reserve_range.end - reserve_range.start,
+ const { Self::vtable() },
+ )
+ }
+ }),
+ }? E),
+ GFP_KERNEL | __GFP_ZERO,
+ )?;
+ // SAFETY: This transfers the initial refcount to the ARef.
+ let aref = unsafe {
+ ARef::from_raw(NonNull::new_unchecked(KBox::into_raw(
+ Pin::into_inner_unchecked(obj),
+ )))
+ };
+ // INVARIANT: This reference is unique.
+ Ok(UniqueRefGpuVm(aref))
+ }
+
+ /// Access this [`GpuVm`] from a raw pointer.
+ ///
+ /// # Safety
+ ///
+ /// The pointer must reference the `struct drm_gpuvm` in a valid [`GpuVm<T>`] that remains
+ /// valid for at least `'a`.
+ #[inline]
+ pub unsafe fn from_raw<'a>(ptr: *mut bindings::drm_gpuvm) -> &'a Self {
+ // SAFETY: Caller passes a pointer to the `drm_gpuvm` in a `GpuVm<T>`. Caller ensures the
+ // pointer is valid for 'a.
+ unsafe { &*kernel::container_of!(Opaque::cast_from(ptr), Self, vm) }
+ }
+
+ /// Returns a raw pointer to the embedded `struct drm_gpuvm`.
+ #[inline]
+ pub fn as_raw(&self) -> *mut bindings::drm_gpuvm {
+ self.vm.get()
+ }
+
+ /// The start of the VA space.
+ #[inline]
+ pub fn va_start(&self) -> u64 {
+ // SAFETY: The `mm_start` field is immutable.
+ unsafe { (*self.as_raw()).mm_start }
+ }
+
+ /// The length of the GPU's virtual address space.
+ #[inline]
+ pub fn va_length(&self) -> u64 {
+ // SAFETY: The `mm_range` field is immutable.
+ unsafe { (*self.as_raw()).mm_range }
+ }
+
+ /// Returns the range of the GPU virtual address space.
+ #[inline]
+ pub fn va_range(&self) -> Range<u64> {
+ let start = self.va_start();
+ // OVERFLOW: This reconstructs the Range<u64> passed to the constructor, so it won't fail.
+ let end = start + self.va_length();
+ Range { start, end }
+ }
+
+ /// Get or create the [`GpuVmBo`] for this gem object.
+ #[inline]
+ pub fn obtain(
+ &self,
+ obj: &T::Object,
+ data: impl PinInit<T::VmBoData>,
+ ) -> Result<ARef<GpuVmBo<T>>, AllocError> {
+ Ok(GpuVmBoAlloc::new(self, obj, data)?.obtain())
+ }
+
+ /// Clean up buffer objects that are no longer used.
+ #[inline]
+ pub fn deferred_cleanup(&self) {
+ // SAFETY: This GPUVM uses immediate mode.
+ unsafe { bindings::drm_gpuvm_bo_deferred_cleanup(self.as_raw()) }
+ }
+
+ /// Check if this GEM object is an external object for this GPUVM.
+ #[inline]
+ pub fn is_extobj(&self, obj: &T::Object) -> bool {
+ // SAFETY: We may call this with any GPUVM and GEM object.
+ unsafe { bindings::drm_gpuvm_is_extobj(self.as_raw(), obj.as_raw()) }
+ }
+
+ /// Free this GPUVM.
+ ///
+ /// # Safety
+ ///
+ /// Called when refcount hits zero.
+ unsafe extern "C" fn vm_free(me: *mut bindings::drm_gpuvm) {
+ // SAFETY: Caller passes a pointer to the `drm_gpuvm` in a `GpuVm<T>`.
+ let me = unsafe { kernel::container_of!(Opaque::cast_from(me), Self, vm).cast_mut() };
+ // SAFETY: By type invariants we can free it when refcount hits zero.
+ drop(unsafe { KBox::from_raw(me) })
+ }
+
+ #[inline]
+ fn raw_resv(&self) -> *mut bindings::dma_resv {
+ // SAFETY: `r_obj` is immutable and valid for duration of GPUVM.
+ unsafe { (*(*self.as_raw()).r_obj).resv }
+ }
+}
+
+/// The manager for a GPUVM.
+pub trait DriverGpuVm: Sized + Send {
+ /// Parent `Driver` for this object.
+ type Driver: drm::Driver<Object = Self::Object>;
+
+ /// The kind of GEM object stored in this GPUVM.
+ type Object: IntoGEMObject;
+
+ /// Data stored with each [`struct drm_gpuva`](struct@GpuVa).
+ type VaData;
+
+ /// Data stored with each [`struct drm_gpuvm_bo`](struct@GpuVmBo).
+ type VmBoData;
+
+ /// The private data passed to callbacks.
+ type SmContext<'ctx>;
+
+ /// Indicates that a new mapping should be created.
+ fn sm_step_map<'op, 'ctx>(
+ &mut self,
+ op: OpMap<'op, Self>,
+ context: &mut Self::SmContext<'ctx>,
+ ) -> Result<OpMapped<'op, Self>, Error>;
+
+ /// Indicates that an existing mapping should be removed.
+ fn sm_step_unmap<'op, 'ctx>(
+ &mut self,
+ op: OpUnmap<'op, Self>,
+ context: &mut Self::SmContext<'ctx>,
+ ) -> Result<OpUnmapped<'op, Self>, Error>;
+
+ /// Indicates that an existing mapping should be split up.
+ fn sm_step_remap<'op, 'ctx>(
+ &mut self,
+ op: OpRemap<'op, Self>,
+ context: &mut Self::SmContext<'ctx>,
+ ) -> Result<OpRemapped<'op, Self>, Error>;
+}
+
+/// The core of the DRM GPU VA manager.
+///
+/// This object is a unique reference to the VM that can access the interval tree and the Rust
+/// `data` field.
+///
+/// # Invariants
+///
+/// Each `GpuVm` instance has at most one `UniqueRefGpuVm` reference.
+pub struct UniqueRefGpuVm<T: DriverGpuVm>(ARef<GpuVm<T>>);
+
+// SAFETY: The GPUVM api is designed to allow &self methods to be called in parallel, and
+// concurrent access to `data` is safe due to the `T: Sync` requirement.
+unsafe impl<T: DriverGpuVm + Sync> Sync for UniqueRefGpuVm<T> {}
+
+impl<T: DriverGpuVm> UniqueRefGpuVm<T> {
+ /// Access the data owned by this `UniqueRefGpuVm` immutably.
+ #[inline]
+ pub fn data_ref(&self) -> &T {
+ // SAFETY: By the type invariants we may access `data`.
+ unsafe { &*self.0.data.get() }
+ }
+
+ /// Access the data owned by this `UniqueRefGpuVm` mutably.
+ #[inline]
+ pub fn data(&mut self) -> &mut T {
+ // SAFETY: By the type invariants we may access `data`.
+ unsafe { &mut *self.0.data.get() }
+ }
+}
+
+impl<T: DriverGpuVm> Deref for UniqueRefGpuVm<T> {
+ type Target = GpuVm<T>;
+
+ #[inline]
+ fn deref(&self) -> &GpuVm<T> {
+ &self.0
+ }
+}
diff --git a/rust/kernel/drm/gpuvm/sm_ops.rs b/rust/kernel/drm/gpuvm/sm_ops.rs
new file mode 100644
index 000000000000..69a8e5ab2821
--- /dev/null
+++ b/rust/kernel/drm/gpuvm/sm_ops.rs
@@ -0,0 +1,429 @@
+// SPDX-License-Identifier: GPL-2.0 OR MIT
+
+use super::*;
+
+/// The actual data that gets threaded through the callbacks.
+struct SmData<'a, 'ctx, T: DriverGpuVm> {
+ gpuvm: &'a mut UniqueRefGpuVm<T>,
+ user_context: &'a mut T::SmContext<'ctx>,
+}
+
+/// Adds an extra field to `SmData` for `sm_map()` callbacks.
+///
+/// # Invariants
+///
+/// `self.vm_bo.gpuvm() == self.sm_data.gpuvm`.
+#[repr(C)]
+struct SmMapData<'a, 'ctx, T: DriverGpuVm> {
+ sm_data: SmData<'a, 'ctx, T>,
+ vm_bo: &'a GpuVmBo<T>,
+}
+
+/// The argument for [`UniqueRefGpuVm::sm_map`].
+pub struct OpMapRequest<'a, 'ctx, T: DriverGpuVm> {
+ /// Address in GPU virtual address space.
+ pub addr: u64,
+ /// Length of mapping to create.
+ pub range: u64,
+ /// Offset in GEM object.
+ pub gem_offset: u64,
+ /// The GEM object to map.
+ pub vm_bo: &'a GpuVmBo<T>,
+ /// The user-provided context type.
+ pub context: &'a mut T::SmContext<'ctx>,
+}
+
+impl<'a, 'ctx, T: DriverGpuVm> OpMapRequest<'a, 'ctx, T> {
+ fn raw_request(&self) -> bindings::drm_gpuvm_map_req {
+ bindings::drm_gpuvm_map_req {
+ map: bindings::drm_gpuva_op_map {
+ va: bindings::drm_gpuva_op_map__bindgen_ty_1 {
+ addr: self.addr,
+ range: self.range,
+ },
+ gem: bindings::drm_gpuva_op_map__bindgen_ty_2 {
+ offset: self.gem_offset,
+ obj: self.vm_bo.obj().as_raw(),
+ },
+ },
+ }
+ }
+}
+
+/// Represents an `sm_step_map` operation that has not yet been completed.
+pub struct OpMap<'op, T: DriverGpuVm> {
+ op: &'op bindings::drm_gpuva_op_map,
+ // Since these abstractions are designed for immediate mode, the VM BO needs to be
+ // pre-allocated, so we always have it available when we reach this point.
+ vm_bo: &'op GpuVmBo<T>,
+ // This ensures that 'op is invariant, so that `OpMap<'long, T>` does not
+ // coerce to `OpMap<'short, T>`. This ensures that the user can't return
+ // the wrong `OpMapped` value.
+ _invariant: PhantomData<*mut &'op mut T>,
+}
+
+impl<'op, T: DriverGpuVm> OpMap<'op, T> {
+ /// The base address of the new mapping.
+ pub fn addr(&self) -> u64 {
+ self.op.va.addr
+ }
+
+ /// The length of the new mapping.
+ pub fn length(&self) -> u64 {
+ self.op.va.range
+ }
+
+ /// The offset within the [`drm_gem_object`](DriverGpuVm::Object).
+ pub fn gem_offset(&self) -> u64 {
+ self.op.gem.offset
+ }
+
+ /// The [`drm_gem_object`](DriverGpuVm::Object) to map.
+ pub fn obj(&self) -> &T::Object {
+ // SAFETY: The `obj` pointer is guaranteed to be valid.
+ unsafe { <T::Object as IntoGEMObject>::from_raw(self.op.gem.obj) }
+ }
+
+ /// The [`GpuVmBo`] that the new VA will be associated with.
+ pub fn vm_bo(&self) -> &GpuVmBo<T> {
+ self.vm_bo
+ }
+
+ /// Use the pre-allocated VA to carry out this map operation.
+ pub fn insert(self, va: GpuVaAlloc<T>, va_data: impl PinInit<T::VaData>) -> OpMapped<'op, T> {
+ let va = va.prepare(va_data);
+ // SAFETY: By the type invariants we may access the interval tree.
+ unsafe { bindings::drm_gpuva_map(self.vm_bo.gpuvm().as_raw(), va, self.op) };
+
+ let _gpuva_guard = self.vm_bo().lock_gpuva();
+ // SAFETY: The va is prepared for insertion, and we hold the GEM lock.
+ unsafe { bindings::drm_gpuva_link(va, self.vm_bo.as_raw()) };
+
+ OpMapped {
+ _invariant: self._invariant,
+ }
+ }
+}
+
+/// Represents a completed [`OpMap`] operation.
+pub struct OpMapped<'op, T> {
+ _invariant: PhantomData<*mut &'op mut T>,
+}
+
+/// Represents an `sm_step_unmap` operation that has not yet been completed.
+pub struct OpUnmap<'op, T: DriverGpuVm> {
+ op: &'op bindings::drm_gpuva_op_unmap,
+ // This ensures that 'op is invariant, so that `OpUnmap<'long, T>` does not
+ // coerce to `OpUnmap<'short, T>`. This ensures that the user can't return the
+ // wrong`OpUnmapped` value.
+ _invariant: PhantomData<*mut &'op mut T>,
+}
+
+impl<'op, T: DriverGpuVm> OpUnmap<'op, T> {
+ /// Indicates whether this [`GpuVa`] is physically contiguous with the
+ /// original mapping request.
+ ///
+ /// Optionally, if `keep` is set, drivers may keep the actual page table
+ /// mappings for this `drm_gpuva`, adding the missing page table entries
+ /// only and update the `drm_gpuvm` accordingly.
+ pub fn keep(&self) -> bool {
+ self.op.keep
+ }
+
+ /// The range being unmapped.
+ pub fn va(&self) -> &GpuVa<T> {
+ // SAFETY: This is a valid va. It's not the `kernel_alloc_node` because you can't unmap it,
+ // and it's not sparse by the `GpuVm<T>` type invariants.
+ unsafe { GpuVa::<T>::from_raw(self.op.va) }
+ }
+
+ /// Remove the VA.
+ pub fn remove(self) -> (OpUnmapped<'op, T>, GpuVaRemoved<T>) {
+ // SAFETY: The op references a valid drm_gpuva in the GPUVM.
+ unsafe { bindings::drm_gpuva_unmap(self.op) };
+ // SAFETY: The va is no longer in the interval tree so we may unlink it.
+ unsafe { bindings::drm_gpuva_unlink_defer(self.op.va) };
+
+ // SAFETY: We just removed this va from the `GpuVm<T>`.
+ let va = unsafe { GpuVaRemoved::from_raw(self.op.va) };
+
+ (
+ OpUnmapped {
+ _invariant: self._invariant,
+ },
+ va,
+ )
+ }
+}
+
+/// Represents a completed [`OpUnmap`] operation.
+pub struct OpUnmapped<'op, T> {
+ _invariant: PhantomData<*mut &'op mut T>,
+}
+
+/// Represents an `sm_step_remap` operation that has not yet been completed.
+pub struct OpRemap<'op, T: DriverGpuVm> {
+ op: &'op bindings::drm_gpuva_op_remap,
+ // This ensures that 'op is invariant, so that `OpRemap<'long, T>` does not
+ // coerce to `OpRemap<'short, T>`. This ensures that the user can't return the
+ // wrong`OpRemapped` value.
+ _invariant: PhantomData<*mut &'op mut T>,
+}
+
+impl<'op, T: DriverGpuVm> OpRemap<'op, T> {
+ /// The preceding part of a split mapping.
+ #[inline]
+ pub fn prev(&self) -> Option<&OpRemapMapData> {
+ // SAFETY: We checked for null, so the pointer must be valid.
+ NonNull::new(self.op.prev).map(|ptr| unsafe { OpRemapMapData::from_raw(ptr) })
+ }
+
+ /// The subsequent part of a split mapping.
+ #[inline]
+ pub fn next(&self) -> Option<&OpRemapMapData> {
+ // SAFETY: We checked for null, so the pointer must be valid.
+ NonNull::new(self.op.next).map(|ptr| unsafe { OpRemapMapData::from_raw(ptr) })
+ }
+
+ /// Indicates whether the `drm_gpuva` being removed is physically contiguous with the original
+ /// mapping request.
+ ///
+ /// Optionally, if `keep` is set, drivers may keep the actual page table mappings for this
+ /// `drm_gpuva`, adding the missing page table entries only and update the `drm_gpuvm`
+ /// accordingly.
+ #[inline]
+ pub fn keep(&self) -> bool {
+ // SAFETY: The unmap pointer is always valid.
+ unsafe { (*self.op.unmap).keep }
+ }
+
+ /// The range being unmapped.
+ #[inline]
+ pub fn va_to_unmap(&self) -> &GpuVa<T> {
+ // SAFETY: This is a valid va. It's not the `kernel_alloc_node` because you can't unmap it,
+ // and it's not sparse by the `GpuVm<T>` type invariants.
+ unsafe { GpuVa::<T>::from_raw((*self.op.unmap).va) }
+ }
+
+ /// The [`drm_gem_object`](DriverGpuVm::Object) whose VA is being remapped.
+ #[inline]
+ pub fn obj(&self) -> &T::Object {
+ self.va_to_unmap().obj()
+ }
+
+ /// The [`GpuVmBo`] that is being remapped.
+ #[inline]
+ pub fn vm_bo(&self) -> &GpuVmBo<T> {
+ self.va_to_unmap().vm_bo()
+ }
+
+ /// Update the GPUVM to perform the remapping.
+ pub fn remap(
+ self,
+ va_alloc: [GpuVaAlloc<T>; 2],
+ prev_data: impl PinInit<T::VaData>,
+ next_data: impl PinInit<T::VaData>,
+ ) -> (OpRemapped<'op, T>, OpRemapRet<T>) {
+ let [va1, va2] = va_alloc;
+
+ let mut unused_va = None;
+ let mut prev_ptr = ptr::null_mut();
+ let mut next_ptr = ptr::null_mut();
+ if self.prev().is_some() {
+ prev_ptr = va1.prepare(prev_data);
+ } else {
+ unused_va = Some(va1);
+ }
+ if self.next().is_some() {
+ next_ptr = va2.prepare(next_data);
+ } else {
+ unused_va = Some(va2);
+ }
+
+ // SAFETY: the pointers are non-null when required
+ unsafe { bindings::drm_gpuva_remap(prev_ptr, next_ptr, self.op) };
+
+ let gpuva_guard = self.vm_bo().lock_gpuva();
+ if !prev_ptr.is_null() {
+ // SAFETY: The prev_ptr is a valid drm_gpuva prepared for insertion. The vm_bo is still
+ // valid as the not-yet-unlinked gpuva holds a refcount on the vm_bo.
+ unsafe { bindings::drm_gpuva_link(prev_ptr, self.vm_bo().as_raw()) };
+ }
+ if !next_ptr.is_null() {
+ // SAFETY: The next_ptr is a valid drm_gpuva prepared for insertion. The vm_bo is still
+ // valid as the not-yet-unlinked gpuva holds a refcount on the vm_bo.
+ unsafe { bindings::drm_gpuva_link(next_ptr, self.vm_bo().as_raw()) };
+ }
+ drop(gpuva_guard);
+
+ // SAFETY: The va is no longer in the interval tree so we may unlink it.
+ unsafe { bindings::drm_gpuva_unlink_defer((*self.op.unmap).va) };
+
+ (
+ OpRemapped {
+ _invariant: self._invariant,
+ },
+ OpRemapRet {
+ // SAFETY: We just removed this va from the `GpuVm<T>`.
+ unmapped_va: unsafe { GpuVaRemoved::from_raw((*self.op.unmap).va) },
+ unused_va,
+ },
+ )
+ }
+}
+
+/// Part of an [`OpRemap`] that represents a new mapping.
+#[repr(transparent)]
+pub struct OpRemapMapData(bindings::drm_gpuva_op_map);
+
+impl OpRemapMapData {
+ /// # Safety
+ /// Must reference a valid `drm_gpuva_op_map` for duration of `'a`.
+ unsafe fn from_raw<'a>(ptr: NonNull<bindings::drm_gpuva_op_map>) -> &'a Self {
+ // SAFETY: ok per safety requirements
+ unsafe { ptr.cast().as_ref() }
+ }
+
+ /// The base address of the new mapping.
+ pub fn addr(&self) -> u64 {
+ self.0.va.addr
+ }
+
+ /// The length of the new mapping.
+ pub fn length(&self) -> u64 {
+ self.0.va.range
+ }
+
+ /// The offset within the [`drm_gem_object`](DriverGpuVm::Object).
+ pub fn gem_offset(&self) -> u64 {
+ self.0.gem.offset
+ }
+}
+
+/// Struct containing objects removed or not used by [`OpRemap::remap`].
+pub struct OpRemapRet<T: DriverGpuVm> {
+ /// The `drm_gpuva` that was removed.
+ pub unmapped_va: GpuVaRemoved<T>,
+ /// If the remap did not split the region into two pieces, then the unused `drm_gpuva` is
+ /// returned here.
+ pub unused_va: Option<GpuVaAlloc<T>>,
+}
+
+/// Represents a completed [`OpRemap`] operation.
+pub struct OpRemapped<'op, T> {
+ _invariant: PhantomData<*mut &'op mut T>,
+}
+
+impl<T: DriverGpuVm> UniqueRefGpuVm<T> {
+ /// Create a mapping, removing or remapping anything that overlaps.
+ ///
+ /// Internally calls the [`DriverGpuVm`] callbacks similar to [`Self::sm_unmap`], except that
+ /// the [`DriverGpuVm::sm_step_map`] is called once to create the requested mapping.
+ #[inline]
+ pub fn sm_map(&mut self, req: OpMapRequest<'_, '_, T>) -> Result {
+ if req.vm_bo.gpuvm() != &**self {
+ return Err(EINVAL);
+ }
+
+ let gpuvm = self.as_raw();
+ let raw_req = req.raw_request();
+ // INVARIANT: Checked above that `vm_bo.gpuvm() == self`.
+ let mut p = SmMapData {
+ sm_data: SmData {
+ gpuvm: self,
+ user_context: req.context,
+ },
+ vm_bo: req.vm_bo,
+ };
+ // SAFETY:
+ // * raw_request() creates a valid request.
+ // * The private data is valid to be interpreted as both SmData and SmMapData since the
+ // first field of SmMapData is SmData.
+ to_result(unsafe {
+ bindings::drm_gpuvm_sm_map(gpuvm, (&raw mut p).cast(), &raw const raw_req)
+ })
+ }
+
+ /// Remove any mappings in the given region.
+ ///
+ /// Internally calls [`DriverGpuVm::sm_step_unmap`] for ranges entirely contained within the
+ /// given range, and [`DriverGpuVm::sm_step_remap`] for ranges that overlap with the range.
+ #[inline]
+ pub fn sm_unmap(&mut self, addr: u64, length: u64, context: &mut T::SmContext<'_>) -> Result {
+ let gpuvm = self.as_raw();
+ let mut p = SmData {
+ gpuvm: self,
+ user_context: context,
+ };
+ // SAFETY:
+ // * raw_request() creates a valid request.
+ // * The private data is a valid SmData.
+ to_result(unsafe { bindings::drm_gpuvm_sm_unmap(gpuvm, (&raw mut p).cast(), addr, length) })
+ }
+}
+
+impl<T: DriverGpuVm> GpuVm<T> {
+ /// # Safety
+ /// Must be called from `sm_map` with a pointer to `SmMapData`.
+ pub(super) unsafe extern "C" fn sm_step_map(
+ op: *mut bindings::drm_gpuva_op,
+ p: *mut c_void,
+ ) -> c_int {
+ // SAFETY: If we reach `sm_step_map` then we were called from `sm_map` which always passes
+ // an `SmMapData` as private data.
+ let p = unsafe { &mut *p.cast::<SmMapData<'_, '_, T>>() };
+ let op = OpMap {
+ // SAFETY: sm_step_map is called with a map operation.
+ op: unsafe { &(*op).__bindgen_anon_1.map },
+ vm_bo: p.vm_bo,
+ _invariant: PhantomData,
+ };
+ match p
+ .sm_data
+ .gpuvm
+ .data()
+ .sm_step_map(op, p.sm_data.user_context)
+ {
+ Ok(OpMapped { .. }) => 0,
+ Err(err) => err.to_errno(),
+ }
+ }
+
+ /// # Safety
+ /// Must be called from `sm_map` or `sm_unmap` with a pointer to `SmMapData` or `SmData`.
+ pub(super) unsafe extern "C" fn sm_step_unmap(
+ op: *mut bindings::drm_gpuva_op,
+ p: *mut c_void,
+ ) -> c_int {
+ // SAFETY: The caller provides a pointer that can be treated as `SmData`.
+ let p = unsafe { &mut *p.cast::<SmData<'_, '_, T>>() };
+ let op = OpUnmap {
+ // SAFETY: sm_step_unmap is called with an unmap operation.
+ op: unsafe { &(*op).__bindgen_anon_1.unmap },
+ _invariant: PhantomData,
+ };
+ match p.gpuvm.data().sm_step_unmap(op, p.user_context) {
+ Ok(OpUnmapped { .. }) => 0,
+ Err(err) => err.to_errno(),
+ }
+ }
+
+ /// # Safety
+ /// Must be called from `sm_map` or `sm_unmap` with a pointer to `SmMapData` or `SmData`.
+ pub(super) unsafe extern "C" fn sm_step_remap(
+ op: *mut bindings::drm_gpuva_op,
+ p: *mut c_void,
+ ) -> c_int {
+ // SAFETY: The caller provides a pointer that can be treated as `SmData`.
+ let p = unsafe { &mut *p.cast::<SmData<'_, '_, T>>() };
+ let op = OpRemap {
+ // SAFETY: sm_step_remap is called with a remap operation.
+ op: unsafe { &(*op).__bindgen_anon_1.remap },
+ _invariant: PhantomData,
+ };
+ match p.gpuvm.data().sm_step_remap(op, p.user_context) {
+ Ok(OpRemapped { .. }) => 0,
+ Err(err) => err.to_errno(),
+ }
+ }
+}
diff --git a/rust/kernel/drm/gpuvm/va.rs b/rust/kernel/drm/gpuvm/va.rs
new file mode 100644
index 000000000000..0b09fe44ab39
--- /dev/null
+++ b/rust/kernel/drm/gpuvm/va.rs
@@ -0,0 +1,168 @@
+// SPDX-License-Identifier: GPL-2.0 OR MIT
+
+use super::*;
+
+/// Represents that a range of a GEM object is mapped in this [`GpuVm`] instance.
+///
+/// Does not assume that GEM lock is held.
+///
+/// # Invariants
+///
+/// * This is a valid `drm_gpuva` object that is resident in a [`GpuVm<T>`] instance.
+/// * It is associated with a [`GpuVmBo<T>`]. Or in other words, it's not an
+/// `gpuvm->kernel_alloc_node` and `DRM_GPUVA_SPARSE` is not set.
+/// * The associated [`GpuVmBo<T>`] is part of the GEM list.
+#[repr(C)]
+#[pin_data]
+pub struct GpuVa<T: DriverGpuVm> {
+ #[pin]
+ inner: Opaque<bindings::drm_gpuva>,
+ #[pin]
+ data: T::VaData,
+}
+
+impl<T: DriverGpuVm> PartialEq for GpuVa<T> {
+ #[inline]
+ fn eq(&self, other: &Self) -> bool {
+ core::ptr::eq(self.as_raw(), other.as_raw())
+ }
+}
+impl<T: DriverGpuVm> Eq for GpuVa<T> {}
+
+impl<T: DriverGpuVm> GpuVa<T> {
+ /// Access this [`GpuVa`] from a raw pointer.
+ ///
+ /// # Safety
+ ///
+ /// * For the duration of `'a`, the pointer must reference a valid `drm_gpuva` associated with
+ /// a [`GpuVm<T>`].
+ /// * It must be associated with a [`GpuVmBo<T>`].
+ /// * The associated [`GpuVmBo<T>`] is part of the GEM list.
+ #[inline]
+ pub unsafe fn from_raw<'a>(ptr: *mut bindings::drm_gpuva) -> &'a Self {
+ // CAST: `drm_gpuva` is first field and `repr(C)`.
+ // SAFETY: The safety requirements match the invariants of `GpuVa`.
+ unsafe { &*ptr.cast() }
+ }
+
+ /// Returns a raw pointer to underlying C value.
+ #[inline]
+ pub fn as_raw(&self) -> *mut bindings::drm_gpuva {
+ self.inner.get()
+ }
+
+ /// Returns the address of this mapping in the GPU virtual address space.
+ #[inline]
+ pub fn addr(&self) -> u64 {
+ // SAFETY: The `va.addr` field of `drm_gpuva` is immutable.
+ unsafe { (*self.as_raw()).va.addr }
+ }
+
+ /// Returns the length of this mapping.
+ #[inline]
+ pub fn length(&self) -> u64 {
+ // SAFETY: The `va.range` field of `drm_gpuva` is immutable.
+ unsafe { (*self.as_raw()).va.range }
+ }
+
+ /// Returns `addr..addr+length`.
+ #[inline]
+ pub fn range(&self) -> Range<u64> {
+ let addr = self.addr();
+ addr..addr + self.length()
+ }
+
+ /// Returns the offset within the GEM object.
+ #[inline]
+ pub fn gem_offset(&self) -> u64 {
+ // SAFETY: The `gem.offset` field of `drm_gpuva` is immutable.
+ unsafe { (*self.as_raw()).gem.offset }
+ }
+
+ /// Returns the GEM object.
+ #[inline]
+ pub fn obj(&self) -> &T::Object {
+ // SAFETY: The `gem.obj` field of `drm_gpuva` is immutable. We know that it's not null
+ // because this VA is associated with a `GpuVmBo<T>`.
+ unsafe { <T::Object as IntoGEMObject>::from_raw((*self.as_raw()).gem.obj) }
+ }
+
+ /// Returns the underlying [`GpuVmBo`] object that backs this [`GpuVa`].
+ #[inline]
+ pub fn vm_bo(&self) -> &GpuVmBo<T> {
+ // SAFETY: The `vm_bo` field of `drm_gpuva` is immutable. We know that it's not null
+ // because this VA is associated with a `GpuVmBo<T>`. The BO is in the GEM list by the type
+ // invariants.
+ unsafe { GpuVmBo::from_raw((*self.as_raw()).vm_bo) }
+ }
+}
+
+/// A pre-allocated [`GpuVa`] object.
+///
+/// # Invariants
+///
+/// The memory is zeroed.
+pub struct GpuVaAlloc<T: DriverGpuVm>(KBox<MaybeUninit<GpuVa<T>>>);
+
+impl<T: DriverGpuVm> GpuVaAlloc<T> {
+ /// Pre-allocate a [`GpuVa`] object.
+ pub fn new(flags: AllocFlags) -> Result<GpuVaAlloc<T>, AllocError> {
+ // INVARIANTS: Memory allocated with __GFP_ZERO.
+ Ok(GpuVaAlloc(KBox::new_uninit(flags | __GFP_ZERO)?))
+ }
+
+ /// Prepare this `drm_gpuva` for insertion into the GPUVM.
+ #[must_use]
+ pub(super) fn prepare(mut self, va_data: impl PinInit<T::VaData>) -> *mut bindings::drm_gpuva {
+ let va_ptr = MaybeUninit::as_mut_ptr(&mut self.0);
+ // SAFETY: The `data` field is pinned.
+ let Ok(()) = unsafe { va_data.__pinned_init(&raw mut (*va_ptr).data) };
+ KBox::into_raw(self.0).cast()
+ }
+}
+
+/// A [`GpuVa`] object that has been removed.
+///
+/// # Invariants
+///
+/// The `drm_gpuva` is not resident in the [`GpuVm`].
+pub struct GpuVaRemoved<T: DriverGpuVm>(KBox<GpuVa<T>>);
+
+impl<T: DriverGpuVm> GpuVaRemoved<T> {
+ /// Convert a raw pointer into a [`GpuVaRemoved`].
+ ///
+ /// # Safety
+ ///
+ /// * Must have been removed from a [`GpuVm<T>`].
+ /// * It must not be a `gpuvm->kernel_alloc_node` va.
+ pub(super) unsafe fn from_raw(ptr: *mut bindings::drm_gpuva) -> Self {
+ // SAFETY: Since it used to be a VA in a `GpuVm<T>` and it's not a kernel_alloc_node, this
+ // pointer references a `GpuVa<T>` with a valid `T::VaData`. Since it has been removed, we
+ // can take ownership of the allocation.
+ GpuVaRemoved(unsafe { KBox::from_raw(ptr.cast()) })
+ }
+
+ /// Take ownership of the VA data.
+ pub fn into_inner(self) -> T::VaData
+ where
+ T::VaData: Unpin,
+ {
+ KBox::into_inner(self.0).data
+ }
+}
+
+impl<T: DriverGpuVm> Deref for GpuVaRemoved<T> {
+ type Target = T::VaData;
+ fn deref(&self) -> &T::VaData {
+ &self.0.data
+ }
+}
+
+impl<T: DriverGpuVm> DerefMut for GpuVaRemoved<T>
+where
+ T::VaData: Unpin,
+{
+ fn deref_mut(&mut self) -> &mut T::VaData {
+ &mut self.0.data
+ }
+}
diff --git a/rust/kernel/drm/gpuvm/vm_bo.rs b/rust/kernel/drm/gpuvm/vm_bo.rs
new file mode 100644
index 000000000000..c064ac63897b
--- /dev/null
+++ b/rust/kernel/drm/gpuvm/vm_bo.rs
@@ -0,0 +1,249 @@
+// SPDX-License-Identifier: GPL-2.0 OR MIT
+
+use super::*;
+
+/// Represents that a given GEM object has at least one mapping on this [`GpuVm`] instance.
+///
+/// Does not assume that GEM lock is held.
+///
+/// # Invariants
+///
+/// * Allocated with `kmalloc` and refcounted via `inner`.
+/// * Is present in the gem list.
+#[repr(C)]
+#[pin_data]
+pub struct GpuVmBo<T: DriverGpuVm> {
+ #[pin]
+ inner: Opaque<bindings::drm_gpuvm_bo>,
+ #[pin]
+ data: T::VmBoData,
+}
+
+// SAFETY: By type invariants, the allocation is managed by the refcount in `self.inner`.
+unsafe impl<T: DriverGpuVm> AlwaysRefCounted for GpuVmBo<T> {
+ fn inc_ref(&self) {
+ // SAFETY: By type invariants, the allocation is managed by the refcount in `self.inner`.
+ unsafe { bindings::drm_gpuvm_bo_get(self.inner.get()) };
+ }
+
+ unsafe fn dec_ref(obj: NonNull<Self>) {
+ // CAST: `drm_gpuvm_bo` is first field of repr(C) struct.
+ // SAFETY: By type invariants, the allocation is managed by the refcount in `self.inner`.
+ // This GPUVM instance uses immediate mode, so we may put the refcount using the deferred
+ // mechanism.
+ unsafe { bindings::drm_gpuvm_bo_put_deferred(obj.as_ptr().cast()) };
+ }
+}
+
+impl<T: DriverGpuVm> PartialEq for GpuVmBo<T> {
+ #[inline]
+ fn eq(&self, other: &Self) -> bool {
+ core::ptr::eq(self.as_raw(), other.as_raw())
+ }
+}
+impl<T: DriverGpuVm> Eq for GpuVmBo<T> {}
+
+impl<T: DriverGpuVm> GpuVmBo<T> {
+ /// The function pointer for allocating a GpuVmBo stored in the gpuvm vtable.
+ ///
+ /// Allocation is always implemented according to [`Self::vm_bo_alloc`], but it is set to
+ /// `None` if the default gpuvm behavior is the same as `vm_bo_alloc`.
+ ///
+ /// This may be `Some` even if `FREE_FN` is `None`, or vice-versa.
+ pub(super) const ALLOC_FN: Option<unsafe extern "C" fn() -> *mut bindings::drm_gpuvm_bo> = {
+ use core::alloc::Layout;
+ let base = Layout::new::<bindings::drm_gpuvm_bo>();
+ let rust = Layout::new::<Self>();
+ assert!(base.size() <= rust.size());
+ if base.size() != rust.size() || base.align() != rust.align() {
+ Some(Self::vm_bo_alloc)
+ } else {
+ // This causes GPUVM to allocate a `GpuVmBo<T>` with `kzalloc(sizeof(drm_gpuvm_bo))`.
+ None
+ }
+ };
+
+ /// The function pointer for freeing a GpuVmBo stored in the gpuvm vtable.
+ ///
+ /// Freeing is always implemented according to [`Self::vm_bo_free`], but it is set to `None` if
+ /// the default gpuvm behavior is the same as `vm_bo_free`.
+ ///
+ /// This may be `Some` even if `ALLOC_FN` is `None`, or vice-versa.
+ pub(super) const FREE_FN: Option<unsafe extern "C" fn(*mut bindings::drm_gpuvm_bo)> = {
+ if core::mem::needs_drop::<Self>() {
+ Some(Self::vm_bo_free)
+ } else {
+ // This causes GPUVM to free a `GpuVmBo<T>` with `kfree`.
+ None
+ }
+ };
+
+ /// Custom function for allocating a `drm_gpuvm_bo`.
+ ///
+ /// # Safety
+ ///
+ /// Always safe to call.
+ unsafe extern "C" fn vm_bo_alloc() -> *mut bindings::drm_gpuvm_bo {
+ let raw_ptr = KBox::<Self>::new_uninit(GFP_KERNEL | __GFP_ZERO)
+ .map(KBox::into_raw)
+ .unwrap_or(ptr::null_mut());
+
+ // CAST: `drm_gpuvm_bo` is first field of `Self`.
+ raw_ptr.cast()
+ }
+
+ /// Custom function for freeing a `drm_gpuvm_bo`.
+ ///
+ /// # Safety
+ ///
+ /// The pointer must have been allocated with [`GpuVmBo::ALLOC_FN`], and must not be used after
+ /// this call.
+ unsafe extern "C" fn vm_bo_free(ptr: *mut bindings::drm_gpuvm_bo) {
+ // CAST: `drm_gpuvm_bo` is first field of `Self`.
+ // SAFETY:
+ // * The ptr was allocated from kmalloc with the layout of `GpuVmBo<T>`.
+ // * `ptr->inner` has no destructor.
+ // * `ptr->data` contains a valid `T::VmBoData` that we can drop.
+ drop(unsafe { KBox::<Self>::from_raw(ptr.cast()) });
+ }
+
+ /// Access this [`GpuVmBo`] from a raw pointer.
+ ///
+ /// # Safety
+ ///
+ /// For the duration of `'a`, the pointer must reference a valid `drm_gpuvm_bo` associated with
+ /// a [`GpuVm<T>`]. The BO must also be present in the GEM list.
+ #[inline]
+ pub(crate) unsafe fn from_raw<'a>(ptr: *mut bindings::drm_gpuvm_bo) -> &'a Self {
+ // SAFETY: `drm_gpuvm_bo` is first field and `repr(C)`.
+ unsafe { &*ptr.cast() }
+ }
+
+ /// Returns a raw pointer to underlying C value.
+ #[inline]
+ pub fn as_raw(&self) -> *mut bindings::drm_gpuvm_bo {
+ self.inner.get()
+ }
+
+ /// The [`GpuVm`] that this GEM object is mapped in.
+ #[inline]
+ pub fn gpuvm(&self) -> &GpuVm<T> {
+ // SAFETY: The `obj` pointer is guaranteed to be valid.
+ unsafe { GpuVm::<T>::from_raw((*self.inner.get()).vm) }
+ }
+
+ /// The [`drm_gem_object`](DriverGpuVm::Object) for these mappings.
+ #[inline]
+ pub fn obj(&self) -> &T::Object {
+ // SAFETY: The `obj` pointer is guaranteed to be valid.
+ unsafe { <T::Object as IntoGEMObject>::from_raw((*self.inner.get()).obj) }
+ }
+
+ /// The driver data with this buffer object.
+ #[inline]
+ pub fn data(&self) -> &T::VmBoData {
+ &self.data
+ }
+
+ pub(super) fn lock_gpuva(&self) -> crate::sync::MutexGuard<'_, ()> {
+ // SAFETY: The GEM object is valid.
+ let ptr = unsafe { &raw mut (*self.obj().as_raw()).gpuva.lock };
+ // SAFETY: The GEM object is valid, so the mutex is properly initialized.
+ let mutex = unsafe { crate::sync::Mutex::from_raw(ptr) };
+ mutex.lock()
+ }
+}
+
+/// A pre-allocated [`GpuVmBo`] object.
+///
+/// # Invariants
+///
+/// Points at a `drm_gpuvm_bo` that contains a valid `T::VmBoData`, has a refcount of one, and is
+/// absent from any gem, extobj, or evict lists.
+pub(super) struct GpuVmBoAlloc<T: DriverGpuVm>(NonNull<GpuVmBo<T>>);
+
+impl<T: DriverGpuVm> GpuVmBoAlloc<T> {
+ /// Create a new pre-allocated [`GpuVmBo`].
+ ///
+ /// It's intentional that the initializer is infallible because `drm_gpuvm_bo_put` will call
+ /// drop on the data, so we don't have a way to free it when the data is missing.
+ #[inline]
+ pub(super) fn new(
+ gpuvm: &GpuVm<T>,
+ gem: &T::Object,
+ value: impl PinInit<T::VmBoData>,
+ ) -> Result<GpuVmBoAlloc<T>, AllocError> {
+ // CAST: `GpuVmBoAlloc::vm_bo_alloc` ensures that this memory was allocated with the layout
+ // of `GpuVmBo<T>`. The type is repr(C), so `container_of` is not required.
+ // SAFETY: The provided gpuvm and gem ptrs are valid for the duration of this call.
+ let raw_ptr = unsafe {
+ bindings::drm_gpuvm_bo_create(gpuvm.as_raw(), gem.as_raw()).cast::<GpuVmBo<T>>()
+ };
+ let ptr = NonNull::new(raw_ptr).ok_or(AllocError)?;
+ // SAFETY: `ptr->data` is a valid pinned location.
+ let Ok(()) = unsafe { value.__pinned_init(&raw mut (*raw_ptr).data) };
+ // INVARIANTS: We just created the vm_bo so it's absent from lists, and the data is valid
+ // as we just initialized it.
+ Ok(GpuVmBoAlloc(ptr))
+ }
+
+ /// Returns a raw pointer to underlying C value.
+ #[inline]
+ pub(super) fn as_raw(&self) -> *mut bindings::drm_gpuvm_bo {
+ // SAFETY: The pointer references a valid `drm_gpuvm_bo`.
+ unsafe { (*self.0.as_ptr()).inner.get() }
+ }
+
+ /// Look up whether there is an existing [`GpuVmBo`] for this gem object.
+ ///
+ /// The caller should not hold the GEM mutex or DMA resv lock.
+ #[inline]
+ pub(super) fn obtain(self) -> ARef<GpuVmBo<T>> {
+ let me = ManuallyDrop::new(self);
+ // SAFETY: Valid `drm_gpuvm_bo` not already in the lists. We do not access `me` after this
+ // call.
+ let ptr = unsafe { bindings::drm_gpuvm_bo_obtain_prealloc(me.as_raw()) };
+
+ // SAFETY: `drm_gpuvm_bo_obtain_prealloc` always returns a non-null ptr
+ let nonnull = unsafe { NonNull::new_unchecked(ptr.cast()) };
+
+ // INVARIANTS: `drm_gpuvm_bo_obtain_prealloc` ensures that the bo is in the GEM list.
+ // SAFETY: We received one refcount from `drm_gpuvm_bo_obtain_prealloc`.
+ let ret = unsafe { ARef::<GpuVmBo<T>>::from_raw(nonnull) };
+
+ // Ensure that external objects are in the extobj list.
+ //
+ // Note that we must call `extobj_add` even if `ptr != me` to avoid a race condition where
+ // we could end up using the extobj before the thread with `ptr == me` calls extobj_add.
+ if ret.gpuvm().is_extobj(ret.obj()) {
+ let resv_lock = ret.gpuvm().raw_resv();
+ // TODO: Use a proper lock guard here once a dma_resv lock abstraction exists.
+ // SAFETY: The GPUVM is still alive, so its resv lock is too.
+ unsafe { bindings::dma_resv_lock(resv_lock, ptr::null_mut()) };
+ // SAFETY: We hold the GPUVMs resv lock.
+ unsafe { bindings::drm_gpuvm_bo_extobj_add(ptr) };
+ // SAFETY: We took the lock, so we can unlock it.
+ unsafe { bindings::dma_resv_unlock(resv_lock) };
+ }
+
+ ret
+ }
+}
+
+impl<T: DriverGpuVm> Deref for GpuVmBoAlloc<T> {
+ type Target = GpuVmBo<T>;
+ #[inline]
+ fn deref(&self) -> &GpuVmBo<T> {
+ // SAFETY: By the type invariants we may deref while `Self` exists.
+ unsafe { self.0.as_ref() }
+ }
+}
+
+impl<T: DriverGpuVm> Drop for GpuVmBoAlloc<T> {
+ #[inline]
+ fn drop(&mut self) {
+ // TODO: Call drm_gpuvm_bo_destroy_not_in_lists() directly.
+ // SAFETY: It's safe to perform a deferred put in any context.
+ unsafe { bindings::drm_gpuvm_bo_put_deferred(self.as_raw()) };
+ }
+}
diff --git a/rust/kernel/drm/mod.rs b/rust/kernel/drm/mod.rs
index 1b82b6945edf..a66e7166f66b 100644
--- a/rust/kernel/drm/mod.rs
+++ b/rust/kernel/drm/mod.rs
@@ -6,9 +6,14 @@ pub mod device;
pub mod driver;
pub mod file;
pub mod gem;
+pub mod gpuvm;
pub mod ioctl;
pub use self::device::Device;
+pub use self::device::DeviceContext;
+pub use self::device::Registered;
+pub use self::device::Uninit;
+pub use self::device::UnregisteredDevice;
pub use self::driver::Driver;
pub use self::driver::DriverInfo;
pub use self::driver::Registration;
diff --git a/rust/kernel/i2c.rs b/rust/kernel/i2c.rs
index 7b908f0c5a58..6094d32652e3 100644
--- a/rust/kernel/i2c.rs
+++ b/rust/kernel/i2c.rs
@@ -93,18 +93,18 @@ pub struct Adapter<T: Driver>(T);
// SAFETY:
// - `bindings::i2c_driver` is a C type declared as `repr(C)`.
-// - `T` is the type of the driver's device private data.
+// - `T::Data` is the type of the driver's device private data.
// - `struct i2c_driver` embeds a `struct device_driver`.
// - `DEVICE_DRIVER_OFFSET` is the correct byte offset to the embedded `struct device_driver`.
-unsafe impl<T: Driver + 'static> driver::DriverLayout for Adapter<T> {
+unsafe impl<T: Driver> driver::DriverLayout for Adapter<T> {
type DriverType = bindings::i2c_driver;
- type DriverData = T;
+ type DriverData<'bound> = T::Data<'bound>;
const DEVICE_DRIVER_OFFSET: usize = core::mem::offset_of!(Self::DriverType, driver);
}
// SAFETY: A call to `unregister` for a given instance of `DriverType` is guaranteed to be valid if
// a preceding call to `register` has been successful.
-unsafe impl<T: Driver + 'static> driver::RegistrationOps for Adapter<T> {
+unsafe impl<T: Driver> driver::RegistrationOps for Adapter<T> {
unsafe fn register(
idrv: &Opaque<Self::DriverType>,
name: &'static CStr,
@@ -151,13 +151,13 @@ unsafe impl<T: Driver + 'static> driver::RegistrationOps for Adapter<T> {
}
}
-impl<T: Driver + 'static> Adapter<T> {
+impl<T: Driver> Adapter<T> {
extern "C" fn probe_callback(idev: *mut bindings::i2c_client) -> kernel::ffi::c_int {
// SAFETY: The I2C bus only ever calls the probe callback with a valid pointer to a
// `struct i2c_client`.
//
// INVARIANT: `idev` is valid for the duration of `probe_callback()`.
- let idev = unsafe { &*idev.cast::<I2cClient<device::CoreInternal>>() };
+ let idev = unsafe { &*idev.cast::<I2cClient<device::CoreInternal<'_>>>() };
let info =
Self::i2c_id_info(idev).or_else(|| <Self as driver::Adapter>::id_info(idev.as_ref()));
@@ -172,24 +172,24 @@ impl<T: Driver + 'static> Adapter<T> {
extern "C" fn remove_callback(idev: *mut bindings::i2c_client) {
// SAFETY: `idev` is a valid pointer to a `struct i2c_client`.
- let idev = unsafe { &*idev.cast::<I2cClient<device::CoreInternal>>() };
+ let idev = unsafe { &*idev.cast::<I2cClient<device::CoreInternal<'_>>>() };
// SAFETY: `remove_callback` is only ever called after a successful call to
// `probe_callback`, hence it's guaranteed that `I2cClient::set_drvdata()` has been called
- // and stored a `Pin<KBox<T>>`.
- let data = unsafe { idev.as_ref().drvdata_borrow::<T>() };
+ // and stored a `Pin<KBox<T::Data<'_>>>`.
+ let data = unsafe { idev.as_ref().drvdata_borrow::<T::Data<'_>>() };
T::unbind(idev, data);
}
extern "C" fn shutdown_callback(idev: *mut bindings::i2c_client) {
// SAFETY: `shutdown_callback` is only ever called for a valid `idev`
- let idev = unsafe { &*idev.cast::<I2cClient<device::CoreInternal>>() };
+ let idev = unsafe { &*idev.cast::<I2cClient<device::CoreInternal<'_>>>() };
// SAFETY: `shutdown_callback` is only ever called after a successful call to
// `probe_callback`, hence it's guaranteed that `Device::set_drvdata()` has been called
- // and stored a `Pin<KBox<T>>`.
- let data = unsafe { idev.as_ref().drvdata_borrow::<T>() };
+ // and stored a `Pin<KBox<T::Data<'_>>>`.
+ let data = unsafe { idev.as_ref().drvdata_borrow::<T::Data<'_>>() };
T::shutdown(idev, data);
}
@@ -222,7 +222,7 @@ impl<T: Driver + 'static> Adapter<T> {
}
}
-impl<T: Driver + 'static> driver::Adapter for Adapter<T> {
+impl<T: Driver> driver::Adapter for Adapter<T> {
type IdInfo = T::IdInfo;
fn of_id_table() -> Option<of::IdTable<Self::IdInfo>> {
@@ -294,22 +294,26 @@ macro_rules! module_i2c_driver {
///
/// impl i2c::Driver for MyDriver {
/// type IdInfo = ();
+/// type Data<'bound> = Self;
/// const I2C_ID_TABLE: Option<i2c::IdTable<Self::IdInfo>> = Some(&I2C_TABLE);
/// const OF_ID_TABLE: Option<of::IdTable<Self::IdInfo>> = Some(&OF_TABLE);
/// const ACPI_ID_TABLE: Option<acpi::IdTable<Self::IdInfo>> = Some(&ACPI_TABLE);
///
-/// fn probe(
-/// _idev: &i2c::I2cClient<Core>,
-/// _id_info: Option<&Self::IdInfo>,
-/// ) -> impl PinInit<Self, Error> {
+/// fn probe<'bound>(
+/// _idev: &'bound i2c::I2cClient<Core<'_>>,
+/// _id_info: Option<&'bound Self::IdInfo>,
+/// ) -> impl PinInit<Self::Data<'bound>, Error> + 'bound {
/// Err(ENODEV)
/// }
///
-/// fn shutdown(_idev: &i2c::I2cClient<Core>, this: Pin<&Self>) {
+/// fn shutdown<'bound>(
+/// _idev: &'bound i2c::I2cClient<Core<'_>>,
+/// this: Pin<&Self::Data<'bound>>,
+/// ) {
/// }
/// }
///```
-pub trait Driver: Send {
+pub trait Driver {
/// The type holding information about each device id supported by the driver.
// TODO: Use `associated_type_defaults` once stabilized:
//
@@ -318,6 +322,9 @@ pub trait Driver: Send {
// ```
type IdInfo: 'static;
+ /// The type of the driver's bus device private data.
+ type Data<'bound>: Send + 'bound;
+
/// The table of device ids supported by the driver.
const I2C_ID_TABLE: Option<IdTable<Self::IdInfo>> = None;
@@ -331,10 +338,10 @@ pub trait Driver: Send {
///
/// Called when a new i2c client is added or discovered.
/// Implementers should attempt to initialize the client here.
- fn probe(
- dev: &I2cClient<device::Core>,
- id_info: Option<&Self::IdInfo>,
- ) -> impl PinInit<Self, Error>;
+ fn probe<'bound>(
+ dev: &'bound I2cClient<device::Core<'_>>,
+ id_info: Option<&'bound Self::IdInfo>,
+ ) -> impl PinInit<Self::Data<'bound>, Error> + 'bound;
/// I2C driver shutdown.
///
@@ -346,8 +353,8 @@ pub trait Driver: Send {
///
/// This callback is distinct from final resource cleanup, as the driver instance remains valid
/// after it returns. Any deallocation or teardown of driver-owned resources should instead be
- /// handled in `Self::drop`.
- fn shutdown(dev: &I2cClient<device::Core>, this: Pin<&Self>) {
+ /// handled in `Drop`.
+ fn shutdown<'bound>(dev: &'bound I2cClient<device::Core<'_>>, this: Pin<&Self::Data<'bound>>) {
let _ = (dev, this);
}
@@ -360,8 +367,8 @@ pub trait Driver: Send {
/// `&Device<Core>` or `&Device<Bound>` reference. For instance, drivers may try to perform I/O
/// operations to gracefully tear down the device.
///
- /// Otherwise, release operations for driver resources should be performed in `Self::drop`.
- fn unbind(dev: &I2cClient<device::Core>, this: Pin<&Self>) {
+ /// Otherwise, release operations for driver resources should be performed in `Drop`.
+ fn unbind<'bound>(dev: &'bound I2cClient<device::Core<'_>>, this: Pin<&Self::Data<'bound>>) {
let _ = (dev, this);
}
}
diff --git a/rust/kernel/io/mem.rs b/rust/kernel/io/mem.rs
index 7dc78d547f7a..fc2a3e24f8d5 100644
--- a/rust/kernel/io/mem.rs
+++ b/rust/kernel/io/mem.rs
@@ -62,33 +62,31 @@ impl<'a> IoRequest<'a> {
///
/// impl platform::Driver for SampleDriver {
/// # type IdInfo = ();
+ /// # type Data<'bound> = Self;
///
- /// fn probe(
- /// pdev: &platform::Device<Core>,
- /// info: Option<&Self::IdInfo>,
- /// ) -> impl PinInit<Self, Error> {
+ /// fn probe<'bound>(
+ /// pdev: &'bound platform::Device<Core<'_>>,
+ /// info: Option<&'bound Self::IdInfo>,
+ /// ) -> impl PinInit<Self, Error> + 'bound {
/// let offset = 0; // Some offset.
///
/// // If the size is known at compile time, use [`Self::iomap_sized`].
/// //
/// // No runtime checks will apply when reading and writing.
/// let request = pdev.io_request_by_index(0).ok_or(ENODEV)?;
- /// let iomem = request.iomap_sized::<42>();
- /// let iomem = KBox::pin_init(iomem, GFP_KERNEL)?;
- ///
- /// let io = iomem.access(pdev.as_ref())?;
+ /// let iomem = request.iomap_sized::<42>()?;
///
/// // Read and write a 32-bit value at `offset`.
- /// let data = io.read32(offset);
+ /// let data = iomem.read32(offset);
///
- /// io.write32(data, offset);
+ /// iomem.write32(data, offset);
///
/// # Ok(SampleDriver)
/// }
/// }
/// ```
- pub fn iomap_sized<const SIZE: usize>(self) -> impl PinInit<Devres<IoMem<SIZE>>, Error> + 'a {
- IoMem::new(self)
+ pub fn iomap_sized<const SIZE: usize>(self) -> Result<IoMem<'a, SIZE>> {
+ IoMem::ioremap(self.device, self.resource)
}
/// Same as [`Self::iomap_sized`] but with exclusive access to the
@@ -97,10 +95,8 @@ impl<'a> IoRequest<'a> {
/// This uses the [`ioremap()`] C API.
///
/// [`ioremap()`]: https://docs.kernel.org/driver-api/device-io.html#getting-access-to-the-device
- pub fn iomap_exclusive_sized<const SIZE: usize>(
- self,
- ) -> impl PinInit<Devres<ExclusiveIoMem<SIZE>>, Error> + 'a {
- ExclusiveIoMem::new(self)
+ pub fn iomap_exclusive_sized<const SIZE: usize>(self) -> Result<ExclusiveIoMem<'a, SIZE>> {
+ ExclusiveIoMem::ioremap(self.device, self.resource)
}
/// Maps an [`IoRequest`] where the size is not known at compile time,
@@ -126,11 +122,12 @@ impl<'a> IoRequest<'a> {
///
/// impl platform::Driver for SampleDriver {
/// # type IdInfo = ();
+ /// # type Data<'bound> = Self;
///
- /// fn probe(
- /// pdev: &platform::Device<Core>,
- /// info: Option<&Self::IdInfo>,
- /// ) -> impl PinInit<Self, Error> {
+ /// fn probe<'bound>(
+ /// pdev: &'bound platform::Device<Core<'_>>,
+ /// info: Option<&'bound Self::IdInfo>,
+ /// ) -> impl PinInit<Self, Error> + 'bound {
/// let offset = 0; // Some offset.
///
/// // Unlike [`Self::iomap_sized`], here the size of the memory region
@@ -138,27 +135,24 @@ impl<'a> IoRequest<'a> {
/// // family of functions should be used, leading to runtime checks on every
/// // access.
/// let request = pdev.io_request_by_index(0).ok_or(ENODEV)?;
- /// let iomem = request.iomap();
- /// let iomem = KBox::pin_init(iomem, GFP_KERNEL)?;
- ///
- /// let io = iomem.access(pdev.as_ref())?;
+ /// let iomem = request.iomap()?;
///
- /// let data = io.try_read32(offset)?;
+ /// let data = iomem.try_read32(offset)?;
///
- /// io.try_write32(data, offset)?;
+ /// iomem.try_write32(data, offset)?;
///
/// # Ok(SampleDriver)
/// }
/// }
/// ```
- pub fn iomap(self) -> impl PinInit<Devres<IoMem<0>>, Error> + 'a {
- Self::iomap_sized::<0>(self)
+ pub fn iomap(self) -> Result<IoMem<'a>> {
+ self.iomap_sized::<0>()
}
/// Same as [`Self::iomap`] but with exclusive access to the underlying
/// region.
- pub fn iomap_exclusive(self) -> impl PinInit<Devres<ExclusiveIoMem<0>>, Error> + 'a {
- Self::iomap_exclusive_sized::<0>(self)
+ pub fn iomap_exclusive(self) -> Result<ExclusiveIoMem<'a, 0>> {
+ self.iomap_exclusive_sized::<0>()
}
}
@@ -167,9 +161,9 @@ impl<'a> IoRequest<'a> {
/// # Invariants
///
/// - [`ExclusiveIoMem`] has exclusive access to the underlying [`IoMem`].
-pub struct ExclusiveIoMem<const SIZE: usize> {
+pub struct ExclusiveIoMem<'a, const SIZE: usize> {
/// The underlying `IoMem` instance.
- iomem: IoMem<SIZE>,
+ iomem: IoMem<'a, SIZE>,
/// The region abstraction. This represents exclusive access to the
/// range represented by the underlying `iomem`.
@@ -178,9 +172,9 @@ pub struct ExclusiveIoMem<const SIZE: usize> {
_region: Region,
}
-impl<const SIZE: usize> ExclusiveIoMem<SIZE> {
+impl<'a, const SIZE: usize> ExclusiveIoMem<'a, SIZE> {
/// Creates a new `ExclusiveIoMem` instance.
- fn ioremap(resource: &Resource) -> Result<Self> {
+ fn ioremap(dev: &'a Device<Bound>, resource: &Resource) -> Result<Self> {
let start = resource.start();
let size = resource.size();
let name = resource.name().unwrap_or_default();
@@ -194,26 +188,29 @@ impl<const SIZE: usize> ExclusiveIoMem<SIZE> {
)
.ok_or(EBUSY)?;
- let iomem = IoMem::ioremap(resource)?;
+ let iomem = IoMem::ioremap(dev, resource)?;
- let iomem = ExclusiveIoMem {
+ Ok(ExclusiveIoMem {
iomem,
_region: region,
- };
-
- Ok(iomem)
+ })
}
- /// Creates a new `ExclusiveIoMem` instance from a previously acquired [`IoRequest`].
- pub fn new<'a>(io_request: IoRequest<'a>) -> impl PinInit<Devres<Self>, Error> + 'a {
- let dev = io_request.device;
- let res = io_request.resource;
-
- Devres::new(dev, Self::ioremap(res))
+ /// Consume the `ExclusiveIoMem` and register it as a device-managed resource.
+ ///
+ /// The returned `Devres<ExclusiveIoMem<'static, SIZE>>` can outlive the original lifetime
+ /// `'a`. Access to the I/O memory is revoked when the device is unbound.
+ pub fn into_devres(self) -> Result<Devres<ExclusiveIoMem<'static, SIZE>>> {
+ // SAFETY: Casting to `'static` is sound because `Devres` guarantees the
+ // `ExclusiveIoMem` does not actually outlive the device -- access is revoked and the
+ // resource is released when the device is unbound.
+ let iomem: ExclusiveIoMem<'static, SIZE> = unsafe { core::mem::transmute(self) };
+ let dev = iomem.iomem.dev;
+ Devres::new(dev, iomem)
}
}
-impl<const SIZE: usize> Deref for ExclusiveIoMem<SIZE> {
+impl<const SIZE: usize> Deref for ExclusiveIoMem<'_, SIZE> {
type Target = Mmio<SIZE>;
fn deref(&self) -> &Self::Target {
@@ -230,12 +227,13 @@ impl<const SIZE: usize> Deref for ExclusiveIoMem<SIZE> {
///
/// [`IoMem`] always holds an [`MmioRaw`] instance that holds a valid pointer to the
/// start of the I/O memory mapped region.
-pub struct IoMem<const SIZE: usize = 0> {
+pub struct IoMem<'a, const SIZE: usize = 0> {
+ dev: &'a Device<Bound>,
io: MmioRaw<SIZE>,
}
-impl<const SIZE: usize> IoMem<SIZE> {
- fn ioremap(resource: &Resource) -> Result<Self> {
+impl<'a, const SIZE: usize> IoMem<'a, SIZE> {
+ fn ioremap(dev: &'a Device<Bound>, resource: &Resource) -> Result<Self> {
// Note: Some ioremap() implementations use types that depend on the CPU
// word width rather than the bus address width.
//
@@ -267,28 +265,33 @@ impl<const SIZE: usize> IoMem<SIZE> {
}
let io = MmioRaw::new(addr as usize, size)?;
- let io = IoMem { io };
- Ok(io)
+ Ok(IoMem { dev, io })
}
- /// Creates a new `IoMem` instance from a previously acquired [`IoRequest`].
- pub fn new<'a>(io_request: IoRequest<'a>) -> impl PinInit<Devres<Self>, Error> + 'a {
- let dev = io_request.device;
- let res = io_request.resource;
-
- Devres::new(dev, Self::ioremap(res))
+ /// Consume the `IoMem` and register it as a device-managed resource.
+ ///
+ /// The returned `Devres<IoMem<'static, SIZE>>` can outlive the original
+ /// lifetime `'a`. Access to the I/O memory is revoked when the device
+ /// is unbound.
+ pub fn into_devres(self) -> Result<Devres<IoMem<'static, SIZE>>> {
+ // SAFETY: Casting to `'static` is sound because `Devres` guarantees the `IoMem` does not
+ // actually outlive the device -- access is revoked and the resource is released when the
+ // device is unbound.
+ let iomem: IoMem<'static, SIZE> = unsafe { core::mem::transmute(self) };
+ let dev = iomem.dev;
+ Devres::new(dev, iomem)
}
}
-impl<const SIZE: usize> Drop for IoMem<SIZE> {
+impl<const SIZE: usize> Drop for IoMem<'_, SIZE> {
fn drop(&mut self) {
// SAFETY: Safe as by the invariant of `Io`.
unsafe { bindings::iounmap(self.io.addr() as *mut c_void) }
}
}
-impl<const SIZE: usize> Deref for IoMem<SIZE> {
+impl<const SIZE: usize> Deref for IoMem<'_, SIZE> {
type Target = Mmio<SIZE>;
fn deref(&self) -> &Self::Target {
diff --git a/rust/kernel/pci.rs b/rust/kernel/pci.rs
index af74ddff6114..5071cae6543f 100644
--- a/rust/kernel/pci.rs
+++ b/rust/kernel/pci.rs
@@ -59,18 +59,18 @@ pub struct Adapter<T: Driver>(T);
// SAFETY:
// - `bindings::pci_driver` is a C type declared as `repr(C)`.
-// - `T` is the type of the driver's device private data.
+// - `T::Data` is the type of the driver's device private data.
// - `struct pci_driver` embeds a `struct device_driver`.
// - `DEVICE_DRIVER_OFFSET` is the correct byte offset to the embedded `struct device_driver`.
-unsafe impl<T: Driver + 'static> driver::DriverLayout for Adapter<T> {
+unsafe impl<T: Driver> driver::DriverLayout for Adapter<T> {
type DriverType = bindings::pci_driver;
- type DriverData = T;
+ type DriverData<'bound> = T::Data<'bound>;
const DEVICE_DRIVER_OFFSET: usize = core::mem::offset_of!(Self::DriverType, driver);
}
// SAFETY: A call to `unregister` for a given instance of `DriverType` is guaranteed to be valid if
// a preceding call to `register` has been successful.
-unsafe impl<T: Driver + 'static> driver::RegistrationOps for Adapter<T> {
+unsafe impl<T: Driver> driver::RegistrationOps for Adapter<T> {
unsafe fn register(
pdrv: &Opaque<Self::DriverType>,
name: &'static CStr,
@@ -96,7 +96,7 @@ unsafe impl<T: Driver + 'static> driver::RegistrationOps for Adapter<T> {
}
}
-impl<T: Driver + 'static> Adapter<T> {
+impl<T: Driver> Adapter<T> {
extern "C" fn probe_callback(
pdev: *mut bindings::pci_dev,
id: *const bindings::pci_device_id,
@@ -105,7 +105,7 @@ impl<T: Driver + 'static> Adapter<T> {
// `struct pci_dev`.
//
// INVARIANT: `pdev` is valid for the duration of `probe_callback()`.
- let pdev = unsafe { &*pdev.cast::<Device<device::CoreInternal>>() };
+ let pdev = unsafe { &*pdev.cast::<Device<device::CoreInternal<'_>>>() };
// SAFETY: `DeviceId` is a `#[repr(transparent)]` wrapper of `struct pci_device_id` and
// does not add additional invariants, so it's safe to transmute.
@@ -125,12 +125,12 @@ impl<T: Driver + 'static> Adapter<T> {
// `struct pci_dev`.
//
// INVARIANT: `pdev` is valid for the duration of `remove_callback()`.
- let pdev = unsafe { &*pdev.cast::<Device<device::CoreInternal>>() };
+ let pdev = unsafe { &*pdev.cast::<Device<device::CoreInternal<'_>>>() };
// SAFETY: `remove_callback` is only ever called after a successful call to
// `probe_callback`, hence it's guaranteed that `Device::set_drvdata()` has been called
- // and stored a `Pin<KBox<T>>`.
- let data = unsafe { pdev.as_ref().drvdata_borrow::<T>() };
+ // and stored a `Pin<KBox<T::Data<'_>>>`.
+ let data = unsafe { pdev.as_ref().drvdata_borrow::<T::Data<'_>>() };
T::unbind(pdev, data);
}
@@ -279,19 +279,20 @@ macro_rules! pci_device_table {
///
/// impl pci::Driver for MyDriver {
/// type IdInfo = ();
+/// type Data<'bound> = Self;
/// const ID_TABLE: pci::IdTable<Self::IdInfo> = &PCI_TABLE;
///
-/// fn probe(
-/// _pdev: &pci::Device<Core>,
-/// _id_info: &Self::IdInfo,
-/// ) -> impl PinInit<Self, Error> {
+/// fn probe<'bound>(
+/// _pdev: &'bound pci::Device<Core<'_>>,
+/// _id_info: &'bound Self::IdInfo,
+/// ) -> impl PinInit<Self::Data<'bound>, Error> + 'bound {
/// Err(ENODEV)
/// }
/// }
///```
/// Drivers must implement this trait in order to get a PCI driver registered. Please refer to the
/// `Adapter` documentation for an example.
-pub trait Driver: Send {
+pub trait Driver {
/// The type holding information about each device id supported by the driver.
// TODO: Use `associated_type_defaults` once stabilized:
//
@@ -300,6 +301,9 @@ pub trait Driver: Send {
// ```
type IdInfo: 'static;
+ /// The type of the driver's bus device private data.
+ type Data<'bound>: Send + 'bound;
+
/// The table of device ids supported by the driver.
const ID_TABLE: IdTable<Self::IdInfo>;
@@ -307,7 +311,10 @@ pub trait Driver: Send {
///
/// Called when a new pci device is added or discovered. Implementers should
/// attempt to initialize the device here.
- fn probe(dev: &Device<device::Core>, id_info: &Self::IdInfo) -> impl PinInit<Self, Error>;
+ fn probe<'bound>(
+ dev: &'bound Device<device::Core<'_>>,
+ id_info: &'bound Self::IdInfo,
+ ) -> impl PinInit<Self::Data<'bound>, Error> + 'bound;
/// PCI driver unbind.
///
@@ -318,8 +325,8 @@ pub trait Driver: Send {
/// `&Device<Core>` or `&Device<Bound>` reference. For instance, drivers may try to perform I/O
/// operations to gracefully tear down the device.
///
- /// Otherwise, release operations for driver resources should be performed in `Self::drop`.
- fn unbind(dev: &Device<device::Core>, this: Pin<&Self>) {
+ /// Otherwise, release operations for driver resources should be performed in `Drop`.
+ fn unbind<'bound>(dev: &'bound Device<device::Core<'_>>, this: Pin<&Self::Data<'bound>>) {
let _ = (dev, this);
}
}
@@ -354,7 +361,7 @@ impl Device {
///
/// ```
/// # use kernel::{device::Core, pci::{self, Vendor}, prelude::*};
- /// fn log_device_info(pdev: &pci::Device<Core>) -> Result {
+ /// fn log_device_info(pdev: &pci::Device<Core<'_>>) -> Result {
/// // Get an instance of `Vendor`.
/// let vendor = pdev.vendor_id();
/// dev_info!(
@@ -445,7 +452,7 @@ impl Device {
}
}
-impl Device<device::Core> {
+impl<'a> Device<device::Core<'a>> {
/// Enable memory resources for this device.
pub fn enable_device_mem(&self) -> Result {
// SAFETY: `self.as_raw` is guaranteed to be a pointer to a valid `struct pci_dev`.
@@ -471,7 +478,7 @@ unsafe impl<Ctx: device::DeviceContext> device::AsBusDevice<Ctx> for Device<Ctx>
kernel::impl_device_context_deref!(unsafe { Device });
kernel::impl_device_context_into_aref!(Device);
-impl crate::dma::Device for Device<device::Core> {}
+impl<'a> crate::dma::Device<'a> for Device<device::Core<'a>> {}
// SAFETY: Instances of `Device` are always reference-counted.
unsafe impl crate::sync::aref::AlwaysRefCounted for Device {
@@ -523,3 +530,7 @@ unsafe impl Send for Device {}
// SAFETY: `Device` can be shared among threads because all methods of `Device`
// (i.e. `Device<Normal>) are thread safe.
unsafe impl Sync for Device {}
+
+// SAFETY: Same as `Device<Normal>` -- the underlying `struct pci_dev` is the same;
+// `Bound` is a zero-sized type-state marker that does not affect thread safety.
+unsafe impl Sync for Device<device::Bound> {}
diff --git a/rust/kernel/pci/id.rs b/rust/kernel/pci/id.rs
index 50005d176561..dbaf301666e7 100644
--- a/rust/kernel/pci/id.rs
+++ b/rust/kernel/pci/id.rs
@@ -19,7 +19,7 @@ use crate::{
///
/// ```
/// # use kernel::{device::Core, pci::{self, Class}, prelude::*};
-/// fn probe_device(pdev: &pci::Device<Core>) -> Result {
+/// fn probe_device(pdev: &pci::Device<Core<'_>>) -> Result {
/// let pci_class = pdev.pci_class();
/// dev_info!(
/// pdev,
diff --git a/rust/kernel/pci/io.rs b/rust/kernel/pci/io.rs
index ae78676c927f..0461e01aaa20 100644
--- a/rust/kernel/pci/io.rs
+++ b/rust/kernel/pci/io.rs
@@ -14,8 +14,7 @@ use crate::{
Mmio,
MmioRaw, //
},
- prelude::*,
- sync::aref::ARef, //
+ prelude::*, //
};
use core::{
marker::PhantomData,
@@ -146,14 +145,18 @@ impl<'a, S: ConfigSpaceKind> IoKnownSize for ConfigSpace<'a, S> {
///
/// `Bar` always holds an `IoRaw` instance that holds a valid pointer to the start of the I/O
/// memory mapped PCI BAR and its size.
-pub struct Bar<const SIZE: usize = 0> {
- pdev: ARef<Device>,
+pub struct Bar<'a, const SIZE: usize = 0> {
+ pdev: &'a Device<device::Bound>,
io: MmioRaw<SIZE>,
num: i32,
}
-impl<const SIZE: usize> Bar<SIZE> {
- pub(super) fn new(pdev: &Device, num: u32, name: &CStr) -> Result<Self> {
+impl<'a, const SIZE: usize> Bar<'a, SIZE> {
+ pub(super) fn new(
+ pdev: &'a Device<device::Bound>,
+ num: u32,
+ name: &'static CStr,
+ ) -> Result<Self> {
let len = pdev.resource_len(num)?;
if len == 0 {
return Err(ENOMEM);
@@ -196,11 +199,7 @@ impl<const SIZE: usize> Bar<SIZE> {
}
};
- Ok(Bar {
- pdev: pdev.into(),
- io,
- num,
- })
+ Ok(Bar { pdev, io, num })
}
/// # Safety
@@ -219,11 +218,24 @@ impl<const SIZE: usize> Bar<SIZE> {
fn release(&self) {
// SAFETY: The safety requirements are guaranteed by the type invariant of `self.pdev`.
- unsafe { Self::do_release(&self.pdev, self.io.addr(), self.num) };
+ unsafe { Self::do_release(self.pdev, self.io.addr(), self.num) };
+ }
+
+ /// Consume the `Bar` and register it as a device-managed resource.
+ ///
+ /// The returned `Devres<Bar<'static, SIZE>>` can outlive the original lifetime `'a`. Access
+ /// to the BAR is revoked when the device is unbound.
+ pub fn into_devres(self) -> Result<Devres<Bar<'static, SIZE>>> {
+ // SAFETY: Casting to `'static` is sound because `Devres` guarantees the `Bar` does not
+ // actually outlive the device -- access is revoked and the resource is released when the
+ // device is unbound.
+ let bar: Bar<'static, SIZE> = unsafe { core::mem::transmute(self) };
+ let pdev = bar.pdev;
+ Devres::new(pdev.as_ref(), bar)
}
}
-impl Bar {
+impl Bar<'_> {
#[inline]
pub(super) fn index_is_valid(index: u32) -> bool {
// A `struct pci_dev` owns an array of resources with at most `PCI_NUM_RESOURCES` entries.
@@ -231,13 +243,13 @@ impl Bar {
}
}
-impl<const SIZE: usize> Drop for Bar<SIZE> {
+impl<const SIZE: usize> Drop for Bar<'_, SIZE> {
fn drop(&mut self) {
self.release();
}
}
-impl<const SIZE: usize> Deref for Bar<SIZE> {
+impl<const SIZE: usize> Deref for Bar<'_, SIZE> {
type Target = Mmio<SIZE>;
fn deref(&self) -> &Self::Target {
@@ -252,17 +264,13 @@ impl Device<device::Bound> {
pub fn iomap_region_sized<'a, const SIZE: usize>(
&'a self,
bar: u32,
- name: &'a CStr,
- ) -> impl PinInit<Devres<Bar<SIZE>>, Error> + 'a {
- Devres::new(self.as_ref(), Bar::<SIZE>::new(self, bar, name))
+ name: &'static CStr,
+ ) -> Result<Bar<'a, SIZE>> {
+ Bar::new(self, bar, name)
}
/// Maps an entire PCI BAR after performing a region-request on it.
- pub fn iomap_region<'a>(
- &'a self,
- bar: u32,
- name: &'a CStr,
- ) -> impl PinInit<Devres<Bar>, Error> + 'a {
+ pub fn iomap_region<'a>(&'a self, bar: u32, name: &'static CStr) -> Result<Bar<'a>> {
self.iomap_region_sized::<0>(bar, name)
}
diff --git a/rust/kernel/platform.rs b/rust/kernel/platform.rs
index 8917d4ee499f..d8d48f60b0b9 100644
--- a/rust/kernel/platform.rs
+++ b/rust/kernel/platform.rs
@@ -45,18 +45,18 @@ pub struct Adapter<T: Driver>(T);
// SAFETY:
// - `bindings::platform_driver` is a C type declared as `repr(C)`.
-// - `T` is the type of the driver's device private data.
+// - `T::Data` is the type of the driver's device private data.
// - `struct platform_driver` embeds a `struct device_driver`.
// - `DEVICE_DRIVER_OFFSET` is the correct byte offset to the embedded `struct device_driver`.
-unsafe impl<T: Driver + 'static> driver::DriverLayout for Adapter<T> {
+unsafe impl<T: Driver> driver::DriverLayout for Adapter<T> {
type DriverType = bindings::platform_driver;
- type DriverData = T;
+ type DriverData<'bound> = T::Data<'bound>;
const DEVICE_DRIVER_OFFSET: usize = core::mem::offset_of!(Self::DriverType, driver);
}
// SAFETY: A call to `unregister` for a given instance of `DriverType` is guaranteed to be valid if
// a preceding call to `register` has been successful.
-unsafe impl<T: Driver + 'static> driver::RegistrationOps for Adapter<T> {
+unsafe impl<T: Driver> driver::RegistrationOps for Adapter<T> {
unsafe fn register(
pdrv: &Opaque<Self::DriverType>,
name: &'static CStr,
@@ -91,13 +91,13 @@ unsafe impl<T: Driver + 'static> driver::RegistrationOps for Adapter<T> {
}
}
-impl<T: Driver + 'static> Adapter<T> {
+impl<T: Driver> Adapter<T> {
extern "C" fn probe_callback(pdev: *mut bindings::platform_device) -> kernel::ffi::c_int {
// SAFETY: The platform bus only ever calls the probe callback with a valid pointer to a
// `struct platform_device`.
//
// INVARIANT: `pdev` is valid for the duration of `probe_callback()`.
- let pdev = unsafe { &*pdev.cast::<Device<device::CoreInternal>>() };
+ let pdev = unsafe { &*pdev.cast::<Device<device::CoreInternal<'_>>>() };
let info = <Self as driver::Adapter>::id_info(pdev.as_ref());
from_result(|| {
@@ -113,18 +113,18 @@ impl<T: Driver + 'static> Adapter<T> {
// `struct platform_device`.
//
// INVARIANT: `pdev` is valid for the duration of `remove_callback()`.
- let pdev = unsafe { &*pdev.cast::<Device<device::CoreInternal>>() };
+ let pdev = unsafe { &*pdev.cast::<Device<device::CoreInternal<'_>>>() };
// SAFETY: `remove_callback` is only ever called after a successful call to
// `probe_callback`, hence it's guaranteed that `Device::set_drvdata()` has been called
- // and stored a `Pin<KBox<T>>`.
- let data = unsafe { pdev.as_ref().drvdata_borrow::<T>() };
+ // and stored a `Pin<KBox<T::Data<'_>>>`.
+ let data = unsafe { pdev.as_ref().drvdata_borrow::<T::Data<'_>>() };
T::unbind(pdev, data);
}
}
-impl<T: Driver + 'static> driver::Adapter for Adapter<T> {
+impl<T: Driver> driver::Adapter for Adapter<T> {
type IdInfo = T::IdInfo;
fn of_id_table() -> Option<of::IdTable<Self::IdInfo>> {
@@ -192,18 +192,19 @@ macro_rules! module_platform_driver {
///
/// impl platform::Driver for MyDriver {
/// type IdInfo = ();
+/// type Data<'bound> = Self;
/// const OF_ID_TABLE: Option<of::IdTable<Self::IdInfo>> = Some(&OF_TABLE);
/// const ACPI_ID_TABLE: Option<acpi::IdTable<Self::IdInfo>> = Some(&ACPI_TABLE);
///
-/// fn probe(
-/// _pdev: &platform::Device<Core>,
-/// _id_info: Option<&Self::IdInfo>,
-/// ) -> impl PinInit<Self, Error> {
+/// fn probe<'bound>(
+/// _pdev: &'bound platform::Device<Core<'_>>,
+/// _id_info: Option<&'bound Self::IdInfo>,
+/// ) -> impl PinInit<Self::Data<'bound>, Error> + 'bound {
/// Err(ENODEV)
/// }
/// }
///```
-pub trait Driver: Send {
+pub trait Driver {
/// The type holding driver private data about each device id supported by the driver.
// TODO: Use associated_type_defaults once stabilized:
//
@@ -212,6 +213,9 @@ pub trait Driver: Send {
// ```
type IdInfo: 'static;
+ /// The type of the driver's bus device private data.
+ type Data<'bound>: Send + 'bound;
+
/// The table of OF device ids supported by the driver.
const OF_ID_TABLE: Option<of::IdTable<Self::IdInfo>> = None;
@@ -222,10 +226,10 @@ pub trait Driver: Send {
///
/// Called when a new platform device is added or discovered.
/// Implementers should attempt to initialize the device here.
- fn probe(
- dev: &Device<device::Core>,
- id_info: Option<&Self::IdInfo>,
- ) -> impl PinInit<Self, Error>;
+ fn probe<'bound>(
+ dev: &'bound Device<device::Core<'_>>,
+ id_info: Option<&'bound Self::IdInfo>,
+ ) -> impl PinInit<Self::Data<'bound>, Error> + 'bound;
/// Platform driver unbind.
///
@@ -236,8 +240,8 @@ pub trait Driver: Send {
/// `&Device<Core>` or `&Device<Bound>` reference. For instance, drivers may try to perform I/O
/// operations to gracefully tear down the device.
///
- /// Otherwise, release operations for driver resources should be performed in `Self::drop`.
- fn unbind(dev: &Device<device::Core>, this: Pin<&Self>) {
+ /// Otherwise, release operations for driver resources should be performed in `Drop`.
+ fn unbind<'bound>(dev: &'bound Device<device::Core<'_>>, this: Pin<&Self::Data<'bound>>) {
let _ = (dev, this);
}
}
@@ -509,7 +513,7 @@ impl Device<Bound> {
kernel::impl_device_context_deref!(unsafe { Device });
kernel::impl_device_context_into_aref!(Device);
-impl crate::dma::Device for Device<device::Core> {}
+impl<'a> crate::dma::Device<'a> for Device<device::Core<'a>> {}
// SAFETY: Instances of `Device` are always reference-counted.
unsafe impl crate::sync::aref::AlwaysRefCounted for Device {
@@ -561,3 +565,7 @@ unsafe impl Send for Device {}
// SAFETY: `Device` can be shared among threads because all methods of `Device`
// (i.e. `Device<Normal>) are thread safe.
unsafe impl Sync for Device {}
+
+// SAFETY: Same as `Device<Normal>` -- the underlying `struct platform_device` is the same;
+// `Bound` is a zero-sized type-state marker that does not affect thread safety.
+unsafe impl Sync for Device<device::Bound> {}
diff --git a/rust/kernel/types.rs b/rust/kernel/types.rs
index 4329d3c2c2e5..ac316fd7b538 100644
--- a/rust/kernel/types.rs
+++ b/rust/kernel/types.rs
@@ -11,6 +11,10 @@ use core::{
};
use pin_init::{PinInit, Wrapper, Zeroable};
+#[doc(hidden)]
+pub mod for_lt;
+pub use for_lt::ForLt;
+
/// Used to transfer ownership to and from foreign (non-Rust) languages.
///
/// Ownership is transferred from Rust to a foreign language by calling [`Self::into_foreign`] and
@@ -27,10 +31,14 @@ pub unsafe trait ForeignOwnable: Sized {
const FOREIGN_ALIGN: usize;
/// Type used to immutably borrow a value that is currently foreign-owned.
- type Borrowed<'a>;
+ type Borrowed<'a>
+ where
+ Self: 'a;
/// Type used to mutably borrow a value that is currently foreign-owned.
- type BorrowedMut<'a>;
+ type BorrowedMut<'a>
+ where
+ Self: 'a;
/// Converts a Rust-owned object to a foreign-owned one.
///
diff --git a/rust/kernel/types/for_lt.rs b/rust/kernel/types/for_lt.rs
new file mode 100644
index 000000000000..d44323c28e8d
--- /dev/null
+++ b/rust/kernel/types/for_lt.rs
@@ -0,0 +1,122 @@
+// SPDX-License-Identifier: Apache-2.0 OR MIT
+
+//! Provide implementation and test of the `ForLt` trait and macro.
+//!
+//! This module is hidden and user should just use `ForLt!` directly.
+
+use core::marker::PhantomData;
+
+/// Representation of types generic over a lifetime.
+///
+/// The type must be covariant over the generic lifetime, i.e. the lifetime parameter
+/// can be soundly shortened.
+///
+/// The lifetime involved must be covariant.
+///
+/// # Macro
+///
+/// It is not recommended to implement this trait directly. `ForLt!` macro is provided to obtain a
+/// type that implements this trait.
+///
+/// The full syntax is
+///
+/// ```
+/// # use kernel::types::ForLt;
+/// # fn expect_lt<F: ForLt>() {}
+/// # struct TypeThatUse<'a>(&'a ());
+/// # expect_lt::<
+/// ForLt!(for<'a> TypeThatUse<'a>)
+/// # >();
+/// ```
+///
+/// which gives a type so that `<ForLt!(for<'a> TypeThatUse<'a>) as ForLt>::Of<'b>`
+/// is `TypeThatUse<'b>`.
+///
+/// You may also use a short-hand syntax which works similar to lifetime elision.
+/// The macro also accepts types that do not involve a lifetime at all.
+///
+/// ```
+/// # use kernel::types::ForLt;
+/// # fn expect_lt<F: ForLt>() {}
+/// # struct TypeThatUse<'a>(&'a ());
+/// # expect_lt::<
+/// ForLt!(TypeThatUse<'_>) // Equivalent to `ForLt!(for<'a> TypeThatUse<'a>)`.
+/// # >();
+/// # expect_lt::<
+/// ForLt!(&u32) // Equivalent to `ForLt!(for<'a> &'a u32)`.
+/// # >();
+/// # expect_lt::<
+/// ForLt!(u32) // Equivalent to `ForLt!(for<'a> u32)`.
+/// # >();
+/// ```
+///
+/// The macro will attempt to prove that the type is indeed covariant over the lifetime supplied.
+/// When it cannot be syntactically proven, it will emit checks to ask the Rust compiler to prove
+/// it.
+///
+/// ```ignore,compile_fail
+/// # use kernel::types::ForLt;
+/// # fn expect_lt<F: ForLt>() {}
+/// # expect_lt::<
+/// ForLt!(fn(&u32)) // Contravariant, will fail compilation.
+/// # >();
+/// ```
+///
+/// There is a limitation if the type refers to generic parameters; if the macro cannot prove the
+/// covariance syntactically, the emitted checks will fail the compilation as it needs to refer to
+/// the generic parameter but is in a separate item.
+///
+/// ```
+/// # use kernel::types::ForLt;
+/// fn expect_lt<F: ForLt>() {}
+/// # #[allow(clippy::unnecessary_safety_comment, reason = "false positive")]
+/// fn generic_fn<T: 'static>() {
+/// // Syntactically proven by the macro
+/// expect_lt::<ForLt!(&T)>();
+/// // Syntactically proven by the macro
+/// expect_lt::<ForLt!(&KBox<T>)>();
+/// // Cannot be syntactically proven, need to check covariance of `KBox`
+/// // expect_lt::<ForLt!(&KBox<&T>)>();
+/// }
+/// ```
+///
+/// # Safety
+///
+/// `Self::Of<'a>` must be covariant over the lifetime `'a`.
+pub unsafe trait ForLt {
+ /// The type parameterized by the lifetime.
+ type Of<'a>: 'a;
+
+ /// Cast a reference to a shorter lifetime.
+ #[inline(always)]
+ fn cast_ref<'r, 'short: 'r, 'long: 'short>(long: &'r Self::Of<'long>) -> &'r Self::Of<'short> {
+ // SAFETY: This is sound as this trait guarantees covariance.
+ unsafe { core::mem::transmute(long) }
+ }
+}
+pub use macros::ForLt;
+
+/// This is intended to be an "unsafe-to-refer-to" type.
+///
+/// Must only be used by the `ForLt!` macro.
+///
+/// `T` is the magic `dyn for<'a> WithLt<'a, TypeThatUse<'a>>` generated by macro.
+///
+/// `WF` is a type that the macro can use to assert some specific type is well-formed.
+///
+/// `N` is to provide the macro a place to emit arbitrary items, in case it needs to prove
+/// additional properties.
+#[doc(hidden)]
+pub struct UnsafeForLtImpl<T: ?Sized, WF, const N: usize>(PhantomData<(WF, T)>);
+
+// This is a helper trait for implementation `ForLt` to be able to use HRTB.
+#[doc(hidden)]
+pub trait WithLt<'a> {
+ type Of: 'a;
+}
+
+// SAFETY: In `ForLt!` macro, a covariance proof is generated when naming `UnsafeForLtImpl`
+// and it will fail to evaluate if the type is not covariant.
+unsafe impl<T: ?Sized + for<'a> WithLt<'a>, WF> ForLt for UnsafeForLtImpl<T, WF, 0> {
+ type Of<'a> = <T as WithLt<'a>>::Of;
+}
diff --git a/rust/kernel/usb.rs b/rust/kernel/usb.rs
index 9c17a672cd27..7aff0c82d0af 100644
--- a/rust/kernel/usb.rs
+++ b/rust/kernel/usb.rs
@@ -36,18 +36,18 @@ pub struct Adapter<T: Driver>(T);
// SAFETY:
// - `bindings::usb_driver` is a C type declared as `repr(C)`.
-// - `T` is the type of the driver's device private data.
+// - `T::Data` is the type of the driver's device private data.
// - `struct usb_driver` embeds a `struct device_driver`.
// - `DEVICE_DRIVER_OFFSET` is the correct byte offset to the embedded `struct device_driver`.
-unsafe impl<T: Driver + 'static> driver::DriverLayout for Adapter<T> {
+unsafe impl<T: Driver> driver::DriverLayout for Adapter<T> {
type DriverType = bindings::usb_driver;
- type DriverData = T;
+ type DriverData<'bound> = T::Data<'bound>;
const DEVICE_DRIVER_OFFSET: usize = core::mem::offset_of!(Self::DriverType, driver);
}
// SAFETY: A call to `unregister` for a given instance of `DriverType` is guaranteed to be valid if
// a preceding call to `register` has been successful.
-unsafe impl<T: Driver + 'static> driver::RegistrationOps for Adapter<T> {
+unsafe impl<T: Driver> driver::RegistrationOps for Adapter<T> {
unsafe fn register(
udrv: &Opaque<Self::DriverType>,
name: &'static CStr,
@@ -73,7 +73,7 @@ unsafe impl<T: Driver + 'static> driver::RegistrationOps for Adapter<T> {
}
}
-impl<T: Driver + 'static> Adapter<T> {
+impl<T: Driver> Adapter<T> {
extern "C" fn probe_callback(
intf: *mut bindings::usb_interface,
id: *const bindings::usb_device_id,
@@ -82,7 +82,7 @@ impl<T: Driver + 'static> Adapter<T> {
// `struct usb_interface` and `struct usb_device_id`.
//
// INVARIANT: `intf` is valid for the duration of `probe_callback()`.
- let intf = unsafe { &*intf.cast::<Interface<device::CoreInternal>>() };
+ let intf = unsafe { &*intf.cast::<Interface<device::CoreInternal<'_>>>() };
from_result(|| {
// SAFETY: `DeviceId` is a `#[repr(transparent)]` wrapper of `struct usb_device_id` and
@@ -92,7 +92,7 @@ impl<T: Driver + 'static> Adapter<T> {
let info = T::ID_TABLE.info(id.index());
let data = T::probe(intf, id, info);
- let dev: &device::Device<device::CoreInternal> = intf.as_ref();
+ let dev: &device::Device<device::CoreInternal<'_>> = intf.as_ref();
dev.set_drvdata(data)?;
Ok(0)
})
@@ -103,14 +103,14 @@ impl<T: Driver + 'static> Adapter<T> {
// `struct usb_interface`.
//
// INVARIANT: `intf` is valid for the duration of `disconnect_callback()`.
- let intf = unsafe { &*intf.cast::<Interface<device::CoreInternal>>() };
+ let intf = unsafe { &*intf.cast::<Interface<device::CoreInternal<'_>>>() };
- let dev: &device::Device<device::CoreInternal> = intf.as_ref();
+ let dev: &device::Device<device::CoreInternal<'_>> = intf.as_ref();
// SAFETY: `disconnect_callback` is only ever called after a successful call to
// `probe_callback`, hence it's guaranteed that `Device::set_drvdata()` has been called
- // and stored a `Pin<KBox<T>>`.
- let data = unsafe { dev.drvdata_borrow::<T>() };
+ // and stored a `Pin<KBox<T::Data<'_>>>`.
+ let data = unsafe { dev.drvdata_borrow::<T::Data<'_>>() };
T::disconnect(intf, data);
}
@@ -287,23 +287,31 @@ macro_rules! usb_device_table {
///
/// impl usb::Driver for MyDriver {
/// type IdInfo = ();
+/// type Data<'bound> = Self;
/// const ID_TABLE: usb::IdTable<Self::IdInfo> = &USB_TABLE;
///
-/// fn probe(
-/// _interface: &usb::Interface<Core>,
+/// fn probe<'bound>(
+/// _interface: &'bound usb::Interface<Core<'_>>,
/// _id: &usb::DeviceId,
-/// _info: &Self::IdInfo,
-/// ) -> impl PinInit<Self, Error> {
+/// _info: &'bound Self::IdInfo,
+/// ) -> impl PinInit<Self::Data<'bound>, Error> + 'bound {
/// Err(ENODEV)
/// }
///
-/// fn disconnect(_interface: &usb::Interface<Core>, _data: Pin<&Self>) {}
+/// fn disconnect<'bound>(
+/// _interface: &'bound usb::Interface<Core<'_>>,
+/// _data: Pin<&Self::Data<'bound>>,
+/// ) {
+/// }
/// }
///```
pub trait Driver {
/// The type holding information about each one of the device ids supported by the driver.
type IdInfo: 'static;
+ /// The type of the driver's bus device private data.
+ type Data<'bound>: Send + 'bound;
+
/// The table of device ids supported by the driver.
const ID_TABLE: IdTable<Self::IdInfo>;
@@ -311,16 +319,19 @@ pub trait Driver {
///
/// Called when a new USB interface is bound to this driver.
/// Implementers should attempt to initialize the interface here.
- fn probe(
- interface: &Interface<device::Core>,
+ fn probe<'bound>(
+ interface: &'bound Interface<device::Core<'_>>,
id: &DeviceId,
- id_info: &Self::IdInfo,
- ) -> impl PinInit<Self, Error>;
+ id_info: &'bound Self::IdInfo,
+ ) -> impl PinInit<Self::Data<'bound>, Error> + 'bound;
/// USB driver disconnect.
///
/// Called when the USB interface is about to be unbound from this driver.
- fn disconnect(interface: &Interface<device::Core>, data: Pin<&Self>);
+ fn disconnect<'bound>(
+ interface: &'bound Interface<device::Core<'_>>,
+ data: Pin<&Self::Data<'bound>>,
+ );
}
/// A USB interface.
@@ -464,6 +475,10 @@ unsafe impl Send for Device {}
// allow any mutation through a shared reference.
unsafe impl Sync for Device {}
+// SAFETY: Same as `Device<Normal>` -- the underlying `struct usb_device` is the same;
+// `Bound` is a zero-sized type-state marker that does not affect thread safety.
+unsafe impl Sync for Device<device::Bound> {}
+
/// Declares a kernel module that exposes a single USB driver.
///
/// # Examples
diff --git a/rust/macros/for_lt.rs b/rust/macros/for_lt.rs
new file mode 100644
index 000000000000..364d4113cd10
--- /dev/null
+++ b/rust/macros/for_lt.rs
@@ -0,0 +1,248 @@
+// SPDX-License-Identifier: Apache-2.0 OR MIT
+
+use proc_macro2::{
+ Span,
+ TokenStream, //
+};
+use quote::{
+ format_ident,
+ quote, //
+};
+use syn::{
+ parse::{
+ Parse,
+ ParseStream, //
+ },
+ visit::Visit,
+ visit_mut::VisitMut,
+ Lifetime,
+ Result,
+ Token,
+ Type, //
+};
+
+pub(crate) enum HigherRankedType {
+ Explicit {
+ _for_token: Token![for],
+ _lt_token: Token![<],
+ lifetime: Lifetime,
+ _gt_token: Token![>],
+ ty: Type,
+ },
+ Implicit {
+ ty: Type,
+ },
+}
+
+impl Parse for HigherRankedType {
+ fn parse(input: ParseStream<'_>) -> Result<Self> {
+ if input.peek(Token![for]) {
+ Ok(Self::Explicit {
+ _for_token: input.parse()?,
+ _lt_token: input.parse()?,
+ lifetime: input.parse()?,
+ _gt_token: input.parse()?,
+ ty: input.parse()?,
+ })
+ } else {
+ Ok(Self::Implicit { ty: input.parse()? })
+ }
+ }
+}
+
+trait TypeExt {
+ fn expand_elided_lifetime(&self, explicit_lt: &Lifetime) -> Type;
+ fn replace_lifetime(&self, src: &Lifetime, dst: &Lifetime) -> Type;
+ fn has_lifetime(&self, lt: &Lifetime) -> bool;
+}
+
+impl TypeExt for Type {
+ fn expand_elided_lifetime(&self, explicit_lt: &Lifetime) -> Type {
+ struct ElidedLifetimeExpander<'a>(&'a Lifetime);
+
+ impl VisitMut for ElidedLifetimeExpander<'_> {
+ fn visit_lifetime_mut(&mut self, lifetime: &mut Lifetime) {
+ // Expand explicit `'_`
+ if lifetime.ident == "_" {
+ *lifetime = self.0.clone();
+ }
+ }
+
+ fn visit_type_reference_mut(&mut self, reference: &mut syn::TypeReference) {
+ syn::visit_mut::visit_type_reference_mut(self, reference);
+
+ if reference.lifetime.is_none() {
+ reference.lifetime = Some(self.0.clone());
+ }
+ }
+ }
+
+ let mut ret = self.clone();
+ ElidedLifetimeExpander(explicit_lt).visit_type_mut(&mut ret);
+ ret
+ }
+
+ fn replace_lifetime(&self, src: &Lifetime, dst: &Lifetime) -> Type {
+ struct LifetimeReplacer<'a>(&'a Lifetime, &'a Lifetime);
+
+ impl VisitMut for LifetimeReplacer<'_> {
+ fn visit_lifetime_mut(&mut self, lifetime: &mut Lifetime) {
+ if lifetime.ident == self.0.ident {
+ *lifetime = self.1.clone();
+ }
+ }
+ }
+
+ let mut ret = self.clone();
+ LifetimeReplacer(src, dst).visit_type_mut(&mut ret);
+ ret
+ }
+
+ fn has_lifetime(&self, lt: &Lifetime) -> bool {
+ struct HasLifetime<'a>(&'a Lifetime, bool);
+
+ impl Visit<'_> for HasLifetime<'_> {
+ fn visit_lifetime(&mut self, lifetime: &Lifetime) {
+ if lifetime.ident == self.0.ident {
+ self.1 = true;
+ }
+ }
+
+ // Macro invocations are opaque; conservatively assume they may
+ // reference the lifetime.
+ fn visit_macro(&mut self, _: &syn::Macro) {
+ self.1 = true;
+ }
+ }
+
+ let mut visitor = HasLifetime(lt, false);
+ visitor.visit_type(self);
+ visitor.1
+ }
+}
+
+struct Prover<'a>(&'a Lifetime, Vec<&'a Type>);
+
+impl<'a> Prover<'a> {
+ /// Prove that `ty` is covariant over `'lt`.
+ ///
+ /// This also needs to prove that it'll be wellformed for any instance of `'lt`.
+ /// It can be assumed that `ty` will be wellformed if `'lt` is substituted to `'static`.
+ fn prove(&mut self, ty: &'a Type) {
+ match ty {
+ Type::Paren(ty) => self.prove(&ty.elem),
+ Type::Group(ty) => self.prove(&ty.elem),
+
+ // No lifetime involved
+ Type::Never(_) => {}
+
+ // `[T; N]` and `[T]` is covariant over `T`.
+ Type::Array(ty) => self.prove(&ty.elem),
+ Type::Slice(ty) => self.prove(&ty.elem),
+
+ Type::Tuple(ty) => {
+ for elem in &ty.elems {
+ self.prove(elem);
+ }
+ }
+
+ // `*const T` is covariant over `T`
+ Type::Ptr(ty) if ty.const_token.is_some() => self.prove(&ty.elem),
+
+ // `&T` is covariant over `T` and lifetime.
+ //
+ // Note that if we encounter `&'other_lt T`, then we still need to make sure the type
+ // is wellformed if `T` involves `&'lt`, so we defer to the compiler.
+ //
+ // This is to block cases like `ForLt!(for<'a> &'static &'a u32)`, as the presence of
+ // the type implies `'a: 'static` but this is unsound.
+ Type::Reference(ty)
+ if ty.mutability.is_none() && ty.lifetime.as_ref() == Some(self.0) =>
+ {
+ self.prove(&ty.elem)
+ }
+
+ // `&[mut] T` is covariant over lifetime.
+ // In case we have `&[mut] NoLifetime`, we don't need to do additional checks.
+ Type::Reference(ty) if !ty.elem.has_lifetime(self.0) => (),
+
+ // No mention of lifetime at all, no need to perform compiler check.
+ ty if !ty.has_lifetime(self.0) => (),
+
+ // Otherwise, we need to emit checks so that compiler can determine if the types are
+ // actually covariant.
+ ty => self.1.push(ty),
+ }
+ }
+}
+
+pub(crate) fn for_lt(input: HigherRankedType) -> TokenStream {
+ let (ty, lifetime) = match input {
+ HigherRankedType::Explicit { lifetime, ty, .. } => (ty, lifetime),
+ HigherRankedType::Implicit { ty } => {
+ // If there's no explicit `for<'a>` binder, inject a synthetic `'__elided` lifetime
+ // and expand elided sites.
+ let lifetime = Lifetime {
+ apostrophe: Span::mixed_site(),
+ ident: format_ident!("__elided", span = Span::mixed_site()),
+ };
+ (ty.expand_elided_lifetime(&lifetime), lifetime)
+ }
+ };
+
+ let mut prover = Prover(&lifetime, Vec::new());
+ prover.prove(&ty);
+
+ let mut proof = Vec::new();
+
+ // Emit proofs for every type that requires additional compiler help in proving covariance.
+ for (idx, required_proof) in prover.1.into_iter().enumerate() {
+ // Insert a proof that the type is well-formed.
+ //
+ // This is intended to workaround a Rust compiler soundness bug related to HRTB.
+ // https://github.com/rust-lang/rust/issues/152489
+ //
+ // This needs to be a struct instead of fn to avoid the implied WF bounds.
+ let wf_proof_name = format_ident!("ProveWf{idx}");
+ proof.push(quote!(
+ struct #wf_proof_name<#lifetime>(
+ ::core::marker::PhantomData<&#lifetime ()>, #required_proof
+ );
+ ));
+
+ // Insert a proof that the type is covariant.
+ let cov_proof_name = format_ident!("prove_covariant_{idx}");
+ proof.push(quote!(
+ fn #cov_proof_name<'__short, '__long: '__short>(
+ long: #wf_proof_name<'__long>
+ ) -> #wf_proof_name<'__short> {
+ long
+ }
+ ));
+ }
+
+ // Make sure that the type is wellformed when substituting lifetime with `'static`.
+ //
+ // Currently the Rust compiler doesn't check this, see the above `ProveWf` documentation.
+ //
+ // We prefer to use this way of proving WF-ness as it can work when generics are involved.
+ let ty_static = ty.replace_lifetime(
+ &lifetime,
+ &Lifetime {
+ apostrophe: Span::mixed_site(),
+ ident: format_ident!("static"),
+ },
+ );
+
+ quote!(
+ ::kernel::types::for_lt::UnsafeForLtImpl::<
+ dyn for<#lifetime> ::kernel::types::for_lt::WithLt<#lifetime, Of = #ty>,
+ #ty_static,
+ {
+ #(#proof)*
+
+ 0
+ }
+ >
+ )
+}
diff --git a/rust/macros/lib.rs b/rust/macros/lib.rs
index 2cfd59e0f9e7..4a48fabbc268 100644
--- a/rust/macros/lib.rs
+++ b/rust/macros/lib.rs
@@ -17,6 +17,7 @@
mod concat_idents;
mod export;
mod fmt;
+mod for_lt;
mod helpers;
mod kunit;
mod module;
@@ -489,3 +490,15 @@ pub fn kunit_tests(attr: TokenStream, input: TokenStream) -> TokenStream {
.unwrap_or_else(|e| e.into_compile_error())
.into()
}
+
+/// Obtain a type that implements [`ForLt`] for the given higher-ranked type.
+///
+/// Please refer to the documentation of the [`ForLt`] trait.
+///
+/// [`ForLt`]: trait.ForLt.html
+#[proc_macro]
+// The macro shares the name with the trait.
+#[allow(non_snake_case)]
+pub fn ForLt(input: TokenStream) -> TokenStream {
+ for_lt::for_lt(parse_macro_input!(input)).into()
+}
diff --git a/samples/rust/rust_debugfs.rs b/samples/rust/rust_debugfs.rs
index 0963efe19f93..1f59e08aaa4b 100644
--- a/samples/rust/rust_debugfs.rs
+++ b/samples/rust/rust_debugfs.rs
@@ -117,13 +117,14 @@ kernel::acpi_device_table!(
impl platform::Driver for RustDebugFs {
type IdInfo = ();
+ type Data<'bound> = Self;
const OF_ID_TABLE: Option<of::IdTable<Self::IdInfo>> = None;
const ACPI_ID_TABLE: Option<acpi::IdTable<Self::IdInfo>> = Some(&ACPI_TABLE);
- fn probe(
- pdev: &platform::Device<Core>,
- _info: Option<&Self::IdInfo>,
- ) -> impl PinInit<Self, Error> {
+ fn probe<'bound>(
+ pdev: &'bound platform::Device<Core<'_>>,
+ _info: Option<&'bound Self::IdInfo>,
+ ) -> impl PinInit<Self, Error> + 'bound {
RustDebugFs::new(pdev).pin_chain(|this| {
this.counter.store(91, Relaxed);
{
@@ -146,7 +147,7 @@ impl RustDebugFs {
dir.read_write_file(c"pair", new_mutex!(Inner { x: 3, y: 10 }))
}
- fn new(pdev: &platform::Device<Core>) -> impl PinInit<Self, Error> + '_ {
+ fn new<'a>(pdev: &'a platform::Device<Core<'_>>) -> impl PinInit<Self, Error> + 'a {
let debugfs = Dir::new(c"sample_debugfs");
let dev = pdev.as_ref();
diff --git a/samples/rust/rust_dma.rs b/samples/rust/rust_dma.rs
index 129bb4b39c04..c4d2d36602af 100644
--- a/samples/rust/rust_dma.rs
+++ b/samples/rust/rust_dma.rs
@@ -58,9 +58,13 @@ kernel::pci_device_table!(
impl pci::Driver for DmaSampleDriver {
type IdInfo = ();
+ type Data<'bound> = Self;
const ID_TABLE: pci::IdTable<Self::IdInfo> = &PCI_TABLE;
- fn probe(pdev: &pci::Device<Core>, _info: &Self::IdInfo) -> impl PinInit<Self, Error> {
+ fn probe<'bound>(
+ pdev: &'bound pci::Device<Core<'_>>,
+ _info: &'bound Self::IdInfo,
+ ) -> impl PinInit<Self, Error> + 'bound {
pin_init::pin_init_scope(move || {
dev_info!(pdev, "Probe DMA test driver.\n");
diff --git a/samples/rust/rust_driver_auxiliary.rs b/samples/rust/rust_driver_auxiliary.rs
index 5c5a5105a3ff..2c1351040e45 100644
--- a/samples/rust/rust_driver_auxiliary.rs
+++ b/samples/rust/rust_driver_auxiliary.rs
@@ -10,15 +10,13 @@ use kernel::{
Bound,
Core, //
},
- devres::Devres,
driver,
pci,
prelude::*,
+ types::ForLt,
InPlaceModule, //
};
-use core::any::TypeId;
-
const MODULE_NAME: &CStr = <LocalModule as kernel::ModuleMetadata>::NAME;
const AUXILIARY_NAME: &CStr = c"auxiliary";
@@ -33,10 +31,14 @@ kernel::auxiliary_device_table!(
impl auxiliary::Driver for AuxiliaryDriver {
type IdInfo = ();
+ type Data<'bound> = Self;
const ID_TABLE: auxiliary::IdTable<Self::IdInfo> = &AUX_TABLE;
- fn probe(adev: &auxiliary::Device<Core>, _info: &Self::IdInfo) -> impl PinInit<Self, Error> {
+ fn probe<'bound>(
+ adev: &'bound auxiliary::Device<Core<'_>>,
+ _info: &'bound Self::IdInfo,
+ ) -> impl PinInit<Self, Error> + 'bound {
dev_info!(
adev,
"Probing auxiliary driver for auxiliary device with id={}\n",
@@ -49,13 +51,17 @@ impl auxiliary::Driver for AuxiliaryDriver {
}
}
-#[pin_data]
-struct ParentDriver {
- private: TypeId,
- #[pin]
- _reg0: Devres<auxiliary::Registration>,
- #[pin]
- _reg1: Devres<auxiliary::Registration>,
+struct Data<'bound> {
+ index: u32,
+ parent: &'bound pci::Device<Bound>,
+}
+
+struct ParentDriver;
+
+#[allow(clippy::type_complexity)]
+struct ParentData<'bound> {
+ _reg0: auxiliary::Registration<'bound, ForLt!(Data<'_>)>,
+ _reg1: auxiliary::Registration<'bound, ForLt!(Data<'_>)>,
}
kernel::pci_device_table!(
@@ -67,26 +73,53 @@ kernel::pci_device_table!(
impl pci::Driver for ParentDriver {
type IdInfo = ();
+ type Data<'bound> = ParentData<'bound>;
const ID_TABLE: pci::IdTable<Self::IdInfo> = &PCI_TABLE;
- fn probe(pdev: &pci::Device<Core>, _info: &Self::IdInfo) -> impl PinInit<Self, Error> {
- try_pin_init!(Self {
- private: TypeId::of::<Self>(),
- _reg0 <- auxiliary::Registration::new(pdev.as_ref(), AUXILIARY_NAME, 0, MODULE_NAME),
- _reg1 <- auxiliary::Registration::new(pdev.as_ref(), AUXILIARY_NAME, 1, MODULE_NAME),
+ fn probe<'bound>(
+ pdev: &'bound pci::Device<Core<'_>>,
+ _info: &'bound Self::IdInfo,
+ ) -> impl PinInit<Self::Data<'bound>, Error> + 'bound {
+ Ok(ParentData {
+ // SAFETY: `ParentData` is the driver's private data, which is dropped when the
+ // device is unbound; i.e. `mem::forget()` is never called on it.
+ _reg0: unsafe {
+ auxiliary::Registration::new_with_lt(
+ pdev.as_ref(),
+ AUXILIARY_NAME,
+ 0,
+ MODULE_NAME,
+ Data {
+ index: 0,
+ parent: pdev,
+ },
+ )?
+ },
+ // SAFETY: See `_reg0` above.
+ _reg1: unsafe {
+ auxiliary::Registration::new_with_lt(
+ pdev.as_ref(),
+ AUXILIARY_NAME,
+ 1,
+ MODULE_NAME,
+ Data {
+ index: 1,
+ parent: pdev,
+ },
+ )?
+ },
})
}
}
impl ParentDriver {
fn connect(adev: &auxiliary::Device<Bound>) -> Result {
- let dev = adev.parent();
- let pdev: &pci::Device<Bound> = dev.try_into()?;
- let drvdata = dev.drvdata::<Self>()?;
+ let data = adev.registration_data::<ForLt!(Data<'_>)>()?;
+ let pdev = data.parent;
dev_info!(
- dev,
+ pdev,
"Connect auxiliary {} with parent: VendorID={}, DeviceID={:#x}\n",
adev.id(),
pdev.vendor_id(),
@@ -94,9 +127,9 @@ impl ParentDriver {
);
dev_info!(
- dev,
- "We have access to the private data of {:?}.\n",
- drvdata.private
+ pdev,
+ "Connected to auxiliary device with index {}.\n",
+ data.index
);
Ok(())
diff --git a/samples/rust/rust_driver_i2c.rs b/samples/rust/rust_driver_i2c.rs
index 6be79f9e9fb5..ead8263a7d48 100644
--- a/samples/rust/rust_driver_i2c.rs
+++ b/samples/rust/rust_driver_i2c.rs
@@ -35,15 +35,16 @@ kernel::of_device_table! {
impl i2c::Driver for SampleDriver {
type IdInfo = u32;
+ type Data<'bound> = Self;
const ACPI_ID_TABLE: Option<acpi::IdTable<Self::IdInfo>> = Some(&ACPI_TABLE);
const I2C_ID_TABLE: Option<i2c::IdTable<Self::IdInfo>> = Some(&I2C_TABLE);
const OF_ID_TABLE: Option<of::IdTable<Self::IdInfo>> = Some(&OF_TABLE);
- fn probe(
- idev: &i2c::I2cClient<Core>,
- info: Option<&Self::IdInfo>,
- ) -> impl PinInit<Self, Error> {
+ fn probe<'bound>(
+ idev: &'bound i2c::I2cClient<Core<'_>>,
+ info: Option<&'bound Self::IdInfo>,
+ ) -> impl PinInit<Self, Error> + 'bound {
let dev = idev.as_ref();
dev_info!(dev, "Probe Rust I2C driver sample.\n");
@@ -55,11 +56,11 @@ impl i2c::Driver for SampleDriver {
Ok(Self)
}
- fn shutdown(idev: &i2c::I2cClient<Core>, _this: Pin<&Self>) {
+ fn shutdown<'bound>(idev: &'bound i2c::I2cClient<Core<'_>>, _this: Pin<&Self>) {
dev_info!(idev.as_ref(), "Shutdown Rust I2C driver sample.\n");
}
- fn unbind(idev: &i2c::I2cClient<Core>, _this: Pin<&Self>) {
+ fn unbind<'bound>(idev: &'bound i2c::I2cClient<Core<'_>>, _this: Pin<&Self>) {
dev_info!(idev.as_ref(), "Unbind Rust I2C driver sample.\n");
}
}
diff --git a/samples/rust/rust_driver_pci.rs b/samples/rust/rust_driver_pci.rs
index 47d3e84fab63..1aa8197d8698 100644
--- a/samples/rust/rust_driver_pci.rs
+++ b/samples/rust/rust_driver_pci.rs
@@ -9,7 +9,6 @@ use kernel::{
Bound,
Core, //
},
- devres::Devres,
io::{
register,
register::Array,
@@ -17,8 +16,7 @@ use kernel::{
},
num::Bounded,
pci,
- prelude::*,
- sync::aref::ARef, //
+ prelude::*, //
};
mod regs {
@@ -45,7 +43,7 @@ mod regs {
pub(super) const END: usize = 0x10;
}
-type Bar0 = pci::Bar<{ regs::END }>;
+type Bar0<'bound> = pci::Bar<'bound, { regs::END }>;
#[derive(Copy, Clone, Debug)]
struct TestIndex(u8);
@@ -66,14 +64,14 @@ impl TestIndex {
const NO_EVENTFD: Self = Self(0);
}
-#[pin_data(PinnedDrop)]
-struct SampleDriver {
- pdev: ARef<pci::Device>,
- #[pin]
- bar: Devres<Bar0>,
+struct SampleDriverData<'bound> {
+ pdev: &'bound pci::Device,
+ bar: Bar0<'bound>,
index: TestIndex,
}
+struct SampleDriver;
+
kernel::pci_device_table!(
PCI_TABLE,
MODULE_PCI_TABLE,
@@ -84,8 +82,8 @@ kernel::pci_device_table!(
)]
);
-impl SampleDriver {
- fn testdev(index: &TestIndex, bar: &Bar0) -> Result<u32> {
+impl SampleDriverData<'_> {
+ fn testdev(index: &TestIndex, bar: &Bar0<'_>) -> Result<u32> {
// Select the test.
bar.write_reg(regs::TEST::zeroed().with_index(*index));
@@ -140,51 +138,49 @@ impl SampleDriver {
impl pci::Driver for SampleDriver {
type IdInfo = TestIndex;
+ type Data<'bound> = SampleDriverData<'bound>;
const ID_TABLE: pci::IdTable<Self::IdInfo> = &PCI_TABLE;
- fn probe(pdev: &pci::Device<Core>, info: &Self::IdInfo) -> impl PinInit<Self, Error> {
- pin_init::pin_init_scope(move || {
- let vendor = pdev.vendor_id();
- dev_dbg!(
- pdev,
- "Probe Rust PCI driver sample (PCI ID: {}, 0x{:x}).\n",
- vendor,
- pdev.device_id()
- );
-
- pdev.enable_device_mem()?;
- pdev.set_master();
-
- Ok(try_pin_init!(Self {
- bar <- pdev.iomap_region_sized::<{ regs::END }>(0, c"rust_driver_pci"),
- index: *info,
- _: {
- let bar = bar.access(pdev.as_ref())?;
-
- dev_info!(
- pdev,
- "pci-testdev data-match count: {}\n",
- Self::testdev(info, bar)?
- );
- Self::config_space(pdev);
- },
- pdev: pdev.into(),
- }))
+ fn probe<'bound>(
+ pdev: &'bound pci::Device<Core<'_>>,
+ info: &'bound Self::IdInfo,
+ ) -> impl PinInit<Self::Data<'bound>, Error> + 'bound {
+ let vendor = pdev.vendor_id();
+ dev_dbg!(
+ pdev,
+ "Probe Rust PCI driver sample (PCI ID: {}, 0x{:x}).\n",
+ vendor,
+ pdev.device_id()
+ );
+
+ pdev.enable_device_mem()?;
+ pdev.set_master();
+
+ let bar = pdev.iomap_region_sized::<{ regs::END }>(0, c"rust_driver_pci")?;
+
+ dev_info!(
+ pdev,
+ "pci-testdev data-match count: {}\n",
+ SampleDriverData::testdev(info, &bar)?
+ );
+ SampleDriverData::config_space(pdev);
+
+ Ok(SampleDriverData {
+ pdev,
+ bar,
+ index: *info,
})
}
- fn unbind(pdev: &pci::Device<Core>, this: Pin<&Self>) {
- if let Ok(bar) = this.bar.access(pdev.as_ref()) {
- // Reset pci-testdev by writing a new test index.
- bar.write_reg(regs::TEST::zeroed().with_index(this.index));
- }
+ fn unbind<'bound>(_pdev: &'bound pci::Device<Core<'_>>, this: Pin<&Self::Data<'bound>>) {
+ this.bar
+ .write_reg(regs::TEST::zeroed().with_index(this.index));
}
}
-#[pinned_drop]
-impl PinnedDrop for SampleDriver {
- fn drop(self: Pin<&mut Self>) {
+impl Drop for SampleDriverData<'_> {
+ fn drop(&mut self) {
dev_dbg!(self.pdev, "Remove Rust PCI driver sample.\n");
}
}
diff --git a/samples/rust/rust_driver_platform.rs b/samples/rust/rust_driver_platform.rs
index f2229d176fb9..ec0d6cac4f57 100644
--- a/samples/rust/rust_driver_platform.rs
+++ b/samples/rust/rust_driver_platform.rs
@@ -101,13 +101,14 @@ kernel::acpi_device_table!(
impl platform::Driver for SampleDriver {
type IdInfo = Info;
+ type Data<'bound> = Self;
const OF_ID_TABLE: Option<of::IdTable<Self::IdInfo>> = Some(&OF_TABLE);
const ACPI_ID_TABLE: Option<acpi::IdTable<Self::IdInfo>> = Some(&ACPI_TABLE);
- fn probe(
- pdev: &platform::Device<Core>,
- info: Option<&Self::IdInfo>,
- ) -> impl PinInit<Self, Error> {
+ fn probe<'bound>(
+ pdev: &'bound platform::Device<Core<'_>>,
+ info: Option<&'bound Self::IdInfo>,
+ ) -> impl PinInit<Self, Error> + 'bound {
let dev = pdev.as_ref();
dev_dbg!(dev, "Probe Rust Platform driver sample.\n");
diff --git a/samples/rust/rust_driver_usb.rs b/samples/rust/rust_driver_usb.rs
index ab72e99e1274..02bd5085f9bc 100644
--- a/samples/rust/rust_driver_usb.rs
+++ b/samples/rust/rust_driver_usb.rs
@@ -26,21 +26,22 @@ kernel::usb_device_table!(
impl usb::Driver for SampleDriver {
type IdInfo = ();
+ type Data<'bound> = Self;
const ID_TABLE: usb::IdTable<Self::IdInfo> = &USB_TABLE;
- fn probe(
- intf: &usb::Interface<Core>,
+ fn probe<'bound>(
+ intf: &'bound usb::Interface<Core<'_>>,
_id: &usb::DeviceId,
- _info: &Self::IdInfo,
- ) -> impl PinInit<Self, Error> {
- let dev: &device::Device<Core> = intf.as_ref();
+ _info: &'bound Self::IdInfo,
+ ) -> impl PinInit<Self, Error> + 'bound {
+ let dev: &device::Device<Core<'_>> = intf.as_ref();
dev_info!(dev, "Rust USB driver sample probed\n");
Ok(Self { _intf: intf.into() })
}
- fn disconnect(intf: &usb::Interface<Core>, _data: Pin<&Self>) {
- let dev: &device::Device<Core> = intf.as_ref();
+ fn disconnect<'bound>(intf: &'bound usb::Interface<Core<'_>>, _data: Pin<&Self>) {
+ let dev: &device::Device<Core<'_>> = intf.as_ref();
dev_info!(dev, "Rust USB driver sample disconnected\n");
}
}
diff --git a/samples/rust/rust_i2c_client.rs b/samples/rust/rust_i2c_client.rs
index 8d2c12e535b0..2d876f4e3ee0 100644
--- a/samples/rust/rust_i2c_client.rs
+++ b/samples/rust/rust_i2c_client.rs
@@ -106,13 +106,14 @@ const BOARD_INFO: i2c::I2cBoardInfo =
impl platform::Driver for SampleDriver {
type IdInfo = ();
+ type Data<'bound> = Self;
const OF_ID_TABLE: Option<of::IdTable<Self::IdInfo>> = Some(&OF_TABLE);
const ACPI_ID_TABLE: Option<acpi::IdTable<Self::IdInfo>> = Some(&ACPI_TABLE);
- fn probe(
- pdev: &platform::Device<device::Core>,
- _info: Option<&Self::IdInfo>,
- ) -> impl PinInit<Self, Error> {
+ fn probe<'bound>(
+ pdev: &'bound platform::Device<device::Core<'_>>,
+ _info: Option<&'bound Self::IdInfo>,
+ ) -> impl PinInit<Self, Error> + 'bound {
dev_info!(
pdev.as_ref(),
"Probe Rust I2C Client registration sample.\n"
@@ -129,7 +130,10 @@ impl platform::Driver for SampleDriver {
})
}
- fn unbind(pdev: &platform::Device<device::Core>, _this: Pin<&Self>) {
+ fn unbind<'bound>(
+ pdev: &'bound platform::Device<device::Core<'_>>,
+ _this: Pin<&Self::Data<'bound>>,
+ ) {
dev_info!(
pdev.as_ref(),
"Unbind Rust I2C Client registration sample.\n"
diff --git a/samples/rust/rust_soc.rs b/samples/rust/rust_soc.rs
index 8079c1c48416..808d58200eb6 100644
--- a/samples/rust/rust_soc.rs
+++ b/samples/rust/rust_soc.rs
@@ -37,13 +37,14 @@ kernel::acpi_device_table!(
impl platform::Driver for SampleSocDriver {
type IdInfo = ();
+ type Data<'bound> = Self;
const OF_ID_TABLE: Option<of::IdTable<Self::IdInfo>> = Some(&OF_TABLE);
const ACPI_ID_TABLE: Option<acpi::IdTable<Self::IdInfo>> = Some(&ACPI_TABLE);
- fn probe(
- pdev: &platform::Device<Core>,
- _info: Option<&Self::IdInfo>,
- ) -> impl PinInit<Self, Error> {
+ fn probe<'bound>(
+ pdev: &'bound platform::Device<Core<'_>>,
+ _info: Option<&'bound Self::IdInfo>,
+ ) -> impl PinInit<Self, Error> + 'bound {
dev_dbg!(pdev, "Probe Rust SoC driver sample.\n");
let pdev = pdev.into();