diff options
Diffstat (limited to 'drivers/gpu/nova-core')
30 files changed, 5097 insertions, 177 deletions
diff --git a/drivers/gpu/nova-core/Kconfig b/drivers/gpu/nova-core/Kconfig index 8726d80d6ba4..20d3e6d0d796 100644 --- a/drivers/gpu/nova-core/Kconfig +++ b/drivers/gpu/nova-core/Kconfig @@ -1,5 +1,6 @@ config NOVA_CORE tristate "Nova Core GPU driver" + depends on 64BIT depends on PCI depends on RUST depends on RUST_FW_LOADER_ABSTRACTIONS diff --git a/drivers/gpu/nova-core/dma.rs b/drivers/gpu/nova-core/dma.rs new file mode 100644 index 000000000000..94f44bcfd748 --- /dev/null +++ b/drivers/gpu/nova-core/dma.rs @@ -0,0 +1,58 @@ +// SPDX-License-Identifier: GPL-2.0 + +//! Simple DMA object wrapper. + +use core::ops::{Deref, DerefMut}; + +use kernel::device; +use kernel::dma::CoherentAllocation; +use kernel::page::PAGE_SIZE; +use kernel::prelude::*; + +pub(crate) struct DmaObject { + dma: CoherentAllocation<u8>, +} + +impl DmaObject { + pub(crate) fn new(dev: &device::Device<device::Bound>, len: usize) -> Result<Self> { + let len = core::alloc::Layout::from_size_align(len, PAGE_SIZE) + .map_err(|_| EINVAL)? + .pad_to_align() + .size(); + let dma = CoherentAllocation::alloc_coherent(dev, len, GFP_KERNEL | __GFP_ZERO)?; + + Ok(Self { dma }) + } + + pub(crate) fn from_data(dev: &device::Device<device::Bound>, data: &[u8]) -> Result<Self> { + Self::new(dev, data.len()).map(|mut dma_obj| { + // TODO[COHA]: replace with `CoherentAllocation::write()` once available. + // SAFETY: + // - `dma_obj`'s size is at least `data.len()`. + // - We have just created this object and there is no other user at this stage. + unsafe { + core::ptr::copy_nonoverlapping( + data.as_ptr(), + dma_obj.dma.start_ptr_mut(), + data.len(), + ); + } + + dma_obj + }) + } +} + +impl Deref for DmaObject { + type Target = CoherentAllocation<u8>; + + fn deref(&self) -> &Self::Target { + &self.dma + } +} + +impl DerefMut for DmaObject { + fn deref_mut(&mut self) -> &mut Self::Target { + &mut self.dma + } +} diff --git a/drivers/gpu/nova-core/driver.rs b/drivers/gpu/nova-core/driver.rs index 8c86101c26cb..edc72052e27a 100644 --- a/drivers/gpu/nova-core/driver.rs +++ b/drivers/gpu/nova-core/driver.rs @@ -1,6 +1,14 @@ // SPDX-License-Identifier: GPL-2.0 -use kernel::{auxiliary, bindings, c_str, device::Core, pci, prelude::*}; +use kernel::{ + auxiliary, c_str, + device::Core, + pci, + pci::{Class, ClassMask, Vendor}, + prelude::*, + sizes::SZ_16M, + sync::Arc, +}; use crate::gpu::Gpu; @@ -11,17 +19,32 @@ pub(crate) struct NovaCore { _reg: auxiliary::Registration, } -const BAR0_SIZE: usize = 8; +const BAR0_SIZE: usize = SZ_16M; pub(crate) type Bar0 = pci::Bar<BAR0_SIZE>; kernel::pci_device_table!( PCI_TABLE, MODULE_PCI_TABLE, <NovaCore as pci::Driver>::IdInfo, - [( - pci::DeviceId::from_id(bindings::PCI_VENDOR_ID_NVIDIA, bindings::PCI_ANY_ID as _), - () - )] + [ + // Modern NVIDIA GPUs will show up as either VGA or 3D controllers. + ( + pci::DeviceId::from_class_and_vendor( + Class::DISPLAY_VGA, + ClassMask::ClassSubclass, + Vendor::NVIDIA + ), + () + ), + ( + pci::DeviceId::from_class_and_vendor( + Class::DISPLAY_3D, + ClassMask::ClassSubclass, + Vendor::NVIDIA + ), + () + ), + ] ); impl pci::Driver for NovaCore { @@ -34,15 +57,23 @@ impl pci::Driver for NovaCore { pdev.enable_device_mem()?; pdev.set_master(); - let bar = pdev.iomap_region_sized::<BAR0_SIZE>(0, c_str!("nova-core/bar0"))?; + let devres_bar = Arc::pin_init( + pdev.iomap_region_sized::<BAR0_SIZE>(0, c_str!("nova-core/bar0")), + GFP_KERNEL, + )?; + + // Used to provided a `&Bar0` to `Gpu::new` without tying it to the lifetime of + // `devres_bar`. + let bar_clone = Arc::clone(&devres_bar); + let bar = bar_clone.access(pdev.as_ref())?; let this = KBox::pin_init( try_pin_init!(Self { - gpu <- Gpu::new(pdev, bar)?, + gpu <- Gpu::new(pdev, devres_bar, bar), _reg: auxiliary::Registration::new( pdev.as_ref(), c_str!("nova-drm"), - 0, // TODO: Once it lands, use XArray; for now we don't use the ID. + 0, // TODO[XARR]: Once it lands, use XArray; for now we don't use the ID. crate::MODULE_NAME )?, }), @@ -51,4 +82,8 @@ impl pci::Driver for NovaCore { Ok(this) } + + fn unbind(pdev: &pci::Device<Core>, this: Pin<&Self>) { + this.gpu.unbind(pdev.as_ref()); + } } diff --git a/drivers/gpu/nova-core/falcon.rs b/drivers/gpu/nova-core/falcon.rs new file mode 100644 index 000000000000..37e6298195e4 --- /dev/null +++ b/drivers/gpu/nova-core/falcon.rs @@ -0,0 +1,613 @@ +// SPDX-License-Identifier: GPL-2.0 + +//! Falcon microprocessor base support + +use core::ops::Deref; +use hal::FalconHal; +use kernel::device; +use kernel::dma::DmaAddress; +use kernel::prelude::*; +use kernel::sync::aref::ARef; +use kernel::time::Delta; + +use crate::dma::DmaObject; +use crate::driver::Bar0; +use crate::gpu::Chipset; +use crate::regs; +use crate::regs::macros::RegisterBase; +use crate::util; + +pub(crate) mod gsp; +mod hal; +pub(crate) mod sec2; + +// TODO[FPRI]: Replace with `ToPrimitive`. +macro_rules! impl_from_enum_to_u32 { + ($enum_type:ty) => { + impl From<$enum_type> for u32 { + fn from(value: $enum_type) -> Self { + value as u32 + } + } + }; +} + +/// Revision number of a falcon core, used in the [`crate::regs::NV_PFALCON_FALCON_HWCFG1`] +/// register. +#[repr(u8)] +#[derive(Debug, Default, Copy, Clone, PartialEq, Eq, PartialOrd, Ord)] +pub(crate) enum FalconCoreRev { + #[default] + Rev1 = 1, + Rev2 = 2, + Rev3 = 3, + Rev4 = 4, + Rev5 = 5, + Rev6 = 6, + Rev7 = 7, +} +impl_from_enum_to_u32!(FalconCoreRev); + +// TODO[FPRI]: replace with `FromPrimitive`. +impl TryFrom<u8> for FalconCoreRev { + type Error = Error; + + fn try_from(value: u8) -> Result<Self> { + use FalconCoreRev::*; + + let rev = match value { + 1 => Rev1, + 2 => Rev2, + 3 => Rev3, + 4 => Rev4, + 5 => Rev5, + 6 => Rev6, + 7 => Rev7, + _ => return Err(EINVAL), + }; + + Ok(rev) + } +} + +/// Revision subversion number of a falcon core, used in the +/// [`crate::regs::NV_PFALCON_FALCON_HWCFG1`] register. +#[repr(u8)] +#[derive(Debug, Default, Copy, Clone, PartialEq, Eq, PartialOrd, Ord)] +pub(crate) enum FalconCoreRevSubversion { + #[default] + Subversion0 = 0, + Subversion1 = 1, + Subversion2 = 2, + Subversion3 = 3, +} +impl_from_enum_to_u32!(FalconCoreRevSubversion); + +// TODO[FPRI]: replace with `FromPrimitive`. +impl TryFrom<u8> for FalconCoreRevSubversion { + type Error = Error; + + fn try_from(value: u8) -> Result<Self> { + use FalconCoreRevSubversion::*; + + let sub_version = match value & 0b11 { + 0 => Subversion0, + 1 => Subversion1, + 2 => Subversion2, + 3 => Subversion3, + _ => return Err(EINVAL), + }; + + Ok(sub_version) + } +} + +/// Security model of a falcon core, used in the [`crate::regs::NV_PFALCON_FALCON_HWCFG1`] +/// register. +#[repr(u8)] +#[derive(Debug, Default, Copy, Clone)] +/// Security mode of the Falcon microprocessor. +/// +/// See `falcon.rst` for more details. +pub(crate) enum FalconSecurityModel { + /// Non-Secure: runs unsigned code without privileges. + #[default] + None = 0, + /// Light-Secured (LS): Runs signed code with some privileges. + /// Entry into this mode is only possible from 'Heavy-secure' mode, which verifies the code's + /// signature. + /// + /// Also known as Low-Secure, Privilege Level 2 or PL2. + Light = 2, + /// Heavy-Secured (HS): Runs signed code with full privileges. + /// The code's signature is verified by the Falcon Boot ROM (BROM). + /// + /// Also known as High-Secure, Privilege Level 3 or PL3. + Heavy = 3, +} +impl_from_enum_to_u32!(FalconSecurityModel); + +// TODO[FPRI]: replace with `FromPrimitive`. +impl TryFrom<u8> for FalconSecurityModel { + type Error = Error; + + fn try_from(value: u8) -> Result<Self> { + use FalconSecurityModel::*; + + let sec_model = match value { + 0 => None, + 2 => Light, + 3 => Heavy, + _ => return Err(EINVAL), + }; + + Ok(sec_model) + } +} + +/// Signing algorithm for a given firmware, used in the [`crate::regs::NV_PFALCON2_FALCON_MOD_SEL`] +/// register. It is passed to the Falcon Boot ROM (BROM) as a parameter. +#[repr(u8)] +#[derive(Debug, Default, Copy, Clone, PartialEq, Eq)] +pub(crate) enum FalconModSelAlgo { + /// AES. + #[expect(dead_code)] + Aes = 0, + /// RSA3K. + #[default] + Rsa3k = 1, +} +impl_from_enum_to_u32!(FalconModSelAlgo); + +// TODO[FPRI]: replace with `FromPrimitive`. +impl TryFrom<u8> for FalconModSelAlgo { + type Error = Error; + + fn try_from(value: u8) -> Result<Self> { + match value { + 1 => Ok(FalconModSelAlgo::Rsa3k), + _ => Err(EINVAL), + } + } +} + +/// Valid values for the `size` field of the [`crate::regs::NV_PFALCON_FALCON_DMATRFCMD`] register. +#[repr(u8)] +#[derive(Debug, Default, Copy, Clone, PartialEq, Eq)] +pub(crate) enum DmaTrfCmdSize { + /// 256 bytes transfer. + #[default] + Size256B = 0x6, +} +impl_from_enum_to_u32!(DmaTrfCmdSize); + +// TODO[FPRI]: replace with `FromPrimitive`. +impl TryFrom<u8> for DmaTrfCmdSize { + type Error = Error; + + fn try_from(value: u8) -> Result<Self> { + match value { + 0x6 => Ok(Self::Size256B), + _ => Err(EINVAL), + } + } +} + +/// Currently active core on a dual falcon/riscv (Peregrine) controller. +#[derive(Debug, Clone, Copy, PartialEq, Eq, Default)] +pub(crate) enum PeregrineCoreSelect { + /// Falcon core is active. + #[default] + Falcon = 0, + /// RISC-V core is active. + Riscv = 1, +} +impl_from_enum_to_u32!(PeregrineCoreSelect); + +impl From<bool> for PeregrineCoreSelect { + fn from(value: bool) -> Self { + match value { + false => PeregrineCoreSelect::Falcon, + true => PeregrineCoreSelect::Riscv, + } + } +} + +/// Different types of memory present in a falcon core. +#[derive(Debug, Clone, Copy, PartialEq, Eq)] +pub(crate) enum FalconMem { + /// Instruction Memory. + Imem, + /// Data Memory. + Dmem, +} + +/// Defines the Framebuffer Interface (FBIF) aperture type. +/// This determines the memory type for external memory access during a DMA transfer, which is +/// performed by the Falcon's Framebuffer DMA (FBDMA) engine. See falcon.rst for more details. +#[derive(Debug, Clone, Default)] +pub(crate) enum FalconFbifTarget { + /// VRAM. + #[default] + /// Local Framebuffer (GPU's VRAM memory). + LocalFb = 0, + /// Coherent system memory (System DRAM). + CoherentSysmem = 1, + /// Non-coherent system memory (System DRAM). + NoncoherentSysmem = 2, +} +impl_from_enum_to_u32!(FalconFbifTarget); + +// TODO[FPRI]: replace with `FromPrimitive`. +impl TryFrom<u8> for FalconFbifTarget { + type Error = Error; + + fn try_from(value: u8) -> Result<Self> { + let res = match value { + 0 => Self::LocalFb, + 1 => Self::CoherentSysmem, + 2 => Self::NoncoherentSysmem, + _ => return Err(EINVAL), + }; + + Ok(res) + } +} + +/// Type of memory addresses to use. +#[derive(Debug, Clone, Default)] +pub(crate) enum FalconFbifMemType { + /// Virtual memory addresses. + #[default] + Virtual = 0, + /// Physical memory addresses. + Physical = 1, +} +impl_from_enum_to_u32!(FalconFbifMemType); + +/// Conversion from a single-bit register field. +impl From<bool> for FalconFbifMemType { + fn from(value: bool) -> Self { + match value { + false => Self::Virtual, + true => Self::Physical, + } + } +} + +/// Type used to represent the `PFALCON` registers address base for a given falcon engine. +pub(crate) struct PFalconBase(()); + +/// Type used to represent the `PFALCON2` registers address base for a given falcon engine. +pub(crate) struct PFalcon2Base(()); + +/// Trait defining the parameters of a given Falcon engine. +/// +/// Each engine provides one base for `PFALCON` and `PFALCON2` registers. The `ID` constant is used +/// to identify a given Falcon instance with register I/O methods. +pub(crate) trait FalconEngine: + Send + Sync + RegisterBase<PFalconBase> + RegisterBase<PFalcon2Base> + Sized +{ + /// Singleton of the engine, used to identify it with register I/O methods. + const ID: Self; +} + +/// Represents a portion of the firmware to be loaded into a particular memory (e.g. IMEM or DMEM). +#[derive(Debug, Clone)] +pub(crate) struct FalconLoadTarget { + /// Offset from the start of the source object to copy from. + pub(crate) src_start: u32, + /// Offset from the start of the destination memory to copy into. + pub(crate) dst_start: u32, + /// Number of bytes to copy. + pub(crate) len: u32, +} + +/// Parameters for the falcon boot ROM. +#[derive(Debug, Clone)] +pub(crate) struct FalconBromParams { + /// Offset in `DMEM`` of the firmware's signature. + pub(crate) pkc_data_offset: u32, + /// Mask of engines valid for this firmware. + pub(crate) engine_id_mask: u16, + /// ID of the ucode used to infer a fuse register to validate the signature. + pub(crate) ucode_id: u8, +} + +/// Trait for providing load parameters of falcon firmwares. +pub(crate) trait FalconLoadParams { + /// Returns the load parameters for `IMEM`. + fn imem_load_params(&self) -> FalconLoadTarget; + + /// Returns the load parameters for `DMEM`. + fn dmem_load_params(&self) -> FalconLoadTarget; + + /// Returns the parameters to write into the BROM registers. + fn brom_params(&self) -> FalconBromParams; + + /// Returns the start address of the firmware. + fn boot_addr(&self) -> u32; +} + +/// Trait for a falcon firmware. +/// +/// A falcon firmware can be loaded on a given engine, and is presented in the form of a DMA +/// object. +pub(crate) trait FalconFirmware: FalconLoadParams + Deref<Target = DmaObject> { + /// Engine on which this firmware is to be loaded. + type Target: FalconEngine; +} + +/// Contains the base parameters common to all Falcon instances. +pub(crate) struct Falcon<E: FalconEngine> { + hal: KBox<dyn FalconHal<E>>, + dev: ARef<device::Device>, +} + +impl<E: FalconEngine + 'static> Falcon<E> { + /// Create a new falcon instance. + /// + /// `need_riscv` is set to `true` if the caller expects the falcon to be a dual falcon/riscv + /// controller. + pub(crate) fn new( + dev: &device::Device, + chipset: Chipset, + bar: &Bar0, + need_riscv: bool, + ) -> Result<Self> { + let hwcfg1 = regs::NV_PFALCON_FALCON_HWCFG1::read(bar, &E::ID); + // Check that the revision and security model contain valid values. + let _ = hwcfg1.core_rev()?; + let _ = hwcfg1.security_model()?; + + if need_riscv { + let hwcfg2 = regs::NV_PFALCON_FALCON_HWCFG2::read(bar, &E::ID); + if !hwcfg2.riscv() { + dev_err!( + dev, + "riscv support requested on a controller that does not support it\n" + ); + return Err(EINVAL); + } + } + + Ok(Self { + hal: hal::falcon_hal(chipset)?, + dev: dev.into(), + }) + } + + /// Wait for memory scrubbing to complete. + fn reset_wait_mem_scrubbing(&self, bar: &Bar0) -> Result { + // TIMEOUT: memory scrubbing should complete in less than 20ms. + util::wait_on(Delta::from_millis(20), || { + if regs::NV_PFALCON_FALCON_HWCFG2::read(bar, &E::ID).mem_scrubbing_done() { + Some(()) + } else { + None + } + }) + } + + /// Reset the falcon engine. + fn reset_eng(&self, bar: &Bar0) -> Result { + let _ = regs::NV_PFALCON_FALCON_HWCFG2::read(bar, &E::ID); + + // According to OpenRM's `kflcnPreResetWait_GA102` documentation, HW sometimes does not set + // RESET_READY so a non-failing timeout is used. + let _ = util::wait_on(Delta::from_micros(150), || { + let r = regs::NV_PFALCON_FALCON_HWCFG2::read(bar, &E::ID); + if r.reset_ready() { + Some(()) + } else { + None + } + }); + + regs::NV_PFALCON_FALCON_ENGINE::alter(bar, &E::ID, |v| v.set_reset(true)); + + // TODO[DLAY]: replace with udelay() or equivalent once available. + // TIMEOUT: falcon engine should not take more than 10us to reset. + let _: Result = util::wait_on(Delta::from_micros(10), || None); + + regs::NV_PFALCON_FALCON_ENGINE::alter(bar, &E::ID, |v| v.set_reset(false)); + + self.reset_wait_mem_scrubbing(bar)?; + + Ok(()) + } + + /// Reset the controller, select the falcon core, and wait for memory scrubbing to complete. + pub(crate) fn reset(&self, bar: &Bar0) -> Result { + self.reset_eng(bar)?; + self.hal.select_core(self, bar)?; + self.reset_wait_mem_scrubbing(bar)?; + + regs::NV_PFALCON_FALCON_RM::default() + .set_value(regs::NV_PMC_BOOT_0::read(bar).into()) + .write(bar, &E::ID); + + Ok(()) + } + + /// Perform a DMA write according to `load_offsets` from `dma_handle` into the falcon's + /// `target_mem`. + /// + /// `sec` is set if the loaded firmware is expected to run in secure mode. + fn dma_wr<F: FalconFirmware<Target = E>>( + &self, + bar: &Bar0, + fw: &F, + target_mem: FalconMem, + load_offsets: FalconLoadTarget, + sec: bool, + ) -> Result { + const DMA_LEN: u32 = 256; + + // For IMEM, we want to use the start offset as a virtual address tag for each page, since + // code addresses in the firmware (and the boot vector) are virtual. + // + // For DMEM we can fold the start offset into the DMA handle. + let (src_start, dma_start) = match target_mem { + FalconMem::Imem => (load_offsets.src_start, fw.dma_handle()), + FalconMem::Dmem => ( + 0, + fw.dma_handle_with_offset(load_offsets.src_start as usize)?, + ), + }; + if dma_start % DmaAddress::from(DMA_LEN) > 0 { + dev_err!( + self.dev, + "DMA transfer start addresses must be a multiple of {}", + DMA_LEN + ); + return Err(EINVAL); + } + + // DMA transfers can only be done in units of 256 bytes. Compute how many such transfers we + // need to perform. + let num_transfers = load_offsets.len.div_ceil(DMA_LEN); + + // Check that the area we are about to transfer is within the bounds of the DMA object. + // Upper limit of transfer is `(num_transfers * DMA_LEN) + load_offsets.src_start`. + match num_transfers + .checked_mul(DMA_LEN) + .and_then(|size| size.checked_add(load_offsets.src_start)) + { + None => { + dev_err!(self.dev, "DMA transfer length overflow"); + return Err(EOVERFLOW); + } + Some(upper_bound) if upper_bound as usize > fw.size() => { + dev_err!(self.dev, "DMA transfer goes beyond range of DMA object"); + return Err(EINVAL); + } + Some(_) => (), + }; + + // Set up the base source DMA address. + + regs::NV_PFALCON_FALCON_DMATRFBASE::default() + .set_base((dma_start >> 8) as u32) + .write(bar, &E::ID); + regs::NV_PFALCON_FALCON_DMATRFBASE1::default() + .set_base((dma_start >> 40) as u16) + .write(bar, &E::ID); + + let cmd = regs::NV_PFALCON_FALCON_DMATRFCMD::default() + .set_size(DmaTrfCmdSize::Size256B) + .set_imem(target_mem == FalconMem::Imem) + .set_sec(if sec { 1 } else { 0 }); + + for pos in (0..num_transfers).map(|i| i * DMA_LEN) { + // Perform a transfer of size `DMA_LEN`. + regs::NV_PFALCON_FALCON_DMATRFMOFFS::default() + .set_offs(load_offsets.dst_start + pos) + .write(bar, &E::ID); + regs::NV_PFALCON_FALCON_DMATRFFBOFFS::default() + .set_offs(src_start + pos) + .write(bar, &E::ID); + cmd.write(bar, &E::ID); + + // Wait for the transfer to complete. + // TIMEOUT: arbitrarily large value, no DMA transfer to the falcon's small memories + // should ever take that long. + util::wait_on(Delta::from_secs(2), || { + let r = regs::NV_PFALCON_FALCON_DMATRFCMD::read(bar, &E::ID); + if r.idle() { + Some(()) + } else { + None + } + })?; + } + + Ok(()) + } + + /// Perform a DMA load into `IMEM` and `DMEM` of `fw`, and prepare the falcon to run it. + pub(crate) fn dma_load<F: FalconFirmware<Target = E>>(&self, bar: &Bar0, fw: &F) -> Result { + regs::NV_PFALCON_FBIF_CTL::alter(bar, &E::ID, |v| v.set_allow_phys_no_ctx(true)); + regs::NV_PFALCON_FALCON_DMACTL::default().write(bar, &E::ID); + regs::NV_PFALCON_FBIF_TRANSCFG::alter(bar, &E::ID, 0, |v| { + v.set_target(FalconFbifTarget::CoherentSysmem) + .set_mem_type(FalconFbifMemType::Physical) + }); + + self.dma_wr(bar, fw, FalconMem::Imem, fw.imem_load_params(), true)?; + self.dma_wr(bar, fw, FalconMem::Dmem, fw.dmem_load_params(), true)?; + + self.hal.program_brom(self, bar, &fw.brom_params())?; + + // Set `BootVec` to start of non-secure code. + regs::NV_PFALCON_FALCON_BOOTVEC::default() + .set_value(fw.boot_addr()) + .write(bar, &E::ID); + + Ok(()) + } + + /// Runs the loaded firmware and waits for its completion. + /// + /// `mbox0` and `mbox1` are optional parameters to write into the `MBOX0` and `MBOX1` registers + /// prior to running. + /// + /// Wait up to two seconds for the firmware to complete, and return its exit status read from + /// the `MBOX0` and `MBOX1` registers. + pub(crate) fn boot( + &self, + bar: &Bar0, + mbox0: Option<u32>, + mbox1: Option<u32>, + ) -> Result<(u32, u32)> { + if let Some(mbox0) = mbox0 { + regs::NV_PFALCON_FALCON_MAILBOX0::default() + .set_value(mbox0) + .write(bar, &E::ID); + } + + if let Some(mbox1) = mbox1 { + regs::NV_PFALCON_FALCON_MAILBOX1::default() + .set_value(mbox1) + .write(bar, &E::ID); + } + + match regs::NV_PFALCON_FALCON_CPUCTL::read(bar, &E::ID).alias_en() { + true => regs::NV_PFALCON_FALCON_CPUCTL_ALIAS::default() + .set_startcpu(true) + .write(bar, &E::ID), + false => regs::NV_PFALCON_FALCON_CPUCTL::default() + .set_startcpu(true) + .write(bar, &E::ID), + } + + // TIMEOUT: arbitrarily large value, firmwares should complete in less than 2 seconds. + util::wait_on(Delta::from_secs(2), || { + let r = regs::NV_PFALCON_FALCON_CPUCTL::read(bar, &E::ID); + if r.halted() { + Some(()) + } else { + None + } + })?; + + let (mbox0, mbox1) = ( + regs::NV_PFALCON_FALCON_MAILBOX0::read(bar, &E::ID).value(), + regs::NV_PFALCON_FALCON_MAILBOX1::read(bar, &E::ID).value(), + ); + + Ok((mbox0, mbox1)) + } + + /// Returns the fused version of the signature to use in order to run a HS firmware on this + /// falcon instance. `engine_id_mask` and `ucode_id` are obtained from the firmware header. + pub(crate) fn signature_reg_fuse_version( + &self, + bar: &Bar0, + engine_id_mask: u16, + ucode_id: u8, + ) -> Result<u32> { + self.hal + .signature_reg_fuse_version(self, bar, engine_id_mask, ucode_id) + } +} diff --git a/drivers/gpu/nova-core/falcon/gsp.rs b/drivers/gpu/nova-core/falcon/gsp.rs new file mode 100644 index 000000000000..f17599cb49fa --- /dev/null +++ b/drivers/gpu/nova-core/falcon/gsp.rs @@ -0,0 +1,32 @@ +// SPDX-License-Identifier: GPL-2.0 + +use crate::{ + driver::Bar0, + falcon::{Falcon, FalconEngine, PFalcon2Base, PFalconBase}, + regs::{self, macros::RegisterBase}, +}; + +/// Type specifying the `Gsp` falcon engine. Cannot be instantiated. +pub(crate) struct Gsp(()); + +impl RegisterBase<PFalconBase> for Gsp { + const BASE: usize = 0x00110000; +} + +impl RegisterBase<PFalcon2Base> for Gsp { + const BASE: usize = 0x00111000; +} + +impl FalconEngine for Gsp { + const ID: Self = Gsp(()); +} + +impl Falcon<Gsp> { + /// Clears the SWGEN0 bit in the Falcon's IRQ status clear register to + /// allow GSP to signal CPU for processing new messages in message queue. + pub(crate) fn clear_swgen0_intr(&self, bar: &Bar0) { + regs::NV_PFALCON_FALCON_IRQSCLR::default() + .set_swgen0(true) + .write(bar, &Gsp::ID); + } +} diff --git a/drivers/gpu/nova-core/falcon/hal.rs b/drivers/gpu/nova-core/falcon/hal.rs new file mode 100644 index 000000000000..bba288455617 --- /dev/null +++ b/drivers/gpu/nova-core/falcon/hal.rs @@ -0,0 +1,54 @@ +// SPDX-License-Identifier: GPL-2.0 + +use kernel::prelude::*; + +use crate::driver::Bar0; +use crate::falcon::{Falcon, FalconBromParams, FalconEngine}; +use crate::gpu::Chipset; + +mod ga102; + +/// Hardware Abstraction Layer for Falcon cores. +/// +/// Implements chipset-specific low-level operations. The trait is generic against [`FalconEngine`] +/// so its `BASE` parameter can be used in order to avoid runtime bound checks when accessing +/// registers. +pub(crate) trait FalconHal<E: FalconEngine>: Send + Sync { + /// Activates the Falcon core if the engine is a risvc/falcon dual engine. + fn select_core(&self, _falcon: &Falcon<E>, _bar: &Bar0) -> Result { + Ok(()) + } + + /// Returns the fused version of the signature to use in order to run a HS firmware on this + /// falcon instance. `engine_id_mask` and `ucode_id` are obtained from the firmware header. + fn signature_reg_fuse_version( + &self, + falcon: &Falcon<E>, + bar: &Bar0, + engine_id_mask: u16, + ucode_id: u8, + ) -> Result<u32>; + + /// Program the boot ROM registers prior to starting a secure firmware. + fn program_brom(&self, falcon: &Falcon<E>, bar: &Bar0, params: &FalconBromParams) -> Result; +} + +/// Returns a boxed falcon HAL adequate for `chipset`. +/// +/// We use a heap-allocated trait object instead of a statically defined one because the +/// generic `FalconEngine` argument makes it difficult to define all the combinations +/// statically. +pub(super) fn falcon_hal<E: FalconEngine + 'static>( + chipset: Chipset, +) -> Result<KBox<dyn FalconHal<E>>> { + use Chipset::*; + + let hal = match chipset { + GA102 | GA103 | GA104 | GA106 | GA107 => { + KBox::new(ga102::Ga102::<E>::new(), GFP_KERNEL)? as KBox<dyn FalconHal<E>> + } + _ => return Err(ENOTSUPP), + }; + + Ok(hal) +} diff --git a/drivers/gpu/nova-core/falcon/hal/ga102.rs b/drivers/gpu/nova-core/falcon/hal/ga102.rs new file mode 100644 index 000000000000..0b1cbe7853b3 --- /dev/null +++ b/drivers/gpu/nova-core/falcon/hal/ga102.rs @@ -0,0 +1,116 @@ +// SPDX-License-Identifier: GPL-2.0 + +use core::marker::PhantomData; + +use kernel::device; +use kernel::prelude::*; +use kernel::time::Delta; + +use crate::driver::Bar0; +use crate::falcon::{ + Falcon, FalconBromParams, FalconEngine, FalconModSelAlgo, PeregrineCoreSelect, +}; +use crate::regs; +use crate::util; + +use super::FalconHal; + +fn select_core_ga102<E: FalconEngine>(bar: &Bar0) -> Result { + let bcr_ctrl = regs::NV_PRISCV_RISCV_BCR_CTRL::read(bar, &E::ID); + if bcr_ctrl.core_select() != PeregrineCoreSelect::Falcon { + regs::NV_PRISCV_RISCV_BCR_CTRL::default() + .set_core_select(PeregrineCoreSelect::Falcon) + .write(bar, &E::ID); + + // TIMEOUT: falcon core should take less than 10ms to report being enabled. + util::wait_on(Delta::from_millis(10), || { + let r = regs::NV_PRISCV_RISCV_BCR_CTRL::read(bar, &E::ID); + if r.valid() { + Some(()) + } else { + None + } + })?; + } + + Ok(()) +} + +fn signature_reg_fuse_version_ga102( + dev: &device::Device, + bar: &Bar0, + engine_id_mask: u16, + ucode_id: u8, +) -> Result<u32> { + const NV_FUSE_OPT_FPF_SIZE: u8 = regs::NV_FUSE_OPT_FPF_SIZE as u8; + + // Each engine has 16 ucode version registers numbered from 1 to 16. + let ucode_idx = match ucode_id { + 1..=NV_FUSE_OPT_FPF_SIZE => (ucode_id - 1) as usize, + _ => { + dev_err!(dev, "invalid ucode id {:#x}", ucode_id); + return Err(EINVAL); + } + }; + + // `ucode_idx` is guaranteed to be in the range [0..15], making the `read` calls provable valid + // at build-time. + let reg_fuse_version = if engine_id_mask & 0x0001 != 0 { + regs::NV_FUSE_OPT_FPF_SEC2_UCODE1_VERSION::read(bar, ucode_idx).data() + } else if engine_id_mask & 0x0004 != 0 { + regs::NV_FUSE_OPT_FPF_NVDEC_UCODE1_VERSION::read(bar, ucode_idx).data() + } else if engine_id_mask & 0x0400 != 0 { + regs::NV_FUSE_OPT_FPF_GSP_UCODE1_VERSION::read(bar, ucode_idx).data() + } else { + dev_err!(dev, "unexpected engine_id_mask {:#x}", engine_id_mask); + return Err(EINVAL); + }; + + // TODO[NUMM]: replace with `last_set_bit` once it lands. + Ok(u16::BITS - reg_fuse_version.leading_zeros()) +} + +fn program_brom_ga102<E: FalconEngine>(bar: &Bar0, params: &FalconBromParams) -> Result { + regs::NV_PFALCON2_FALCON_BROM_PARAADDR::default() + .set_value(params.pkc_data_offset) + .write(bar, &E::ID, 0); + regs::NV_PFALCON2_FALCON_BROM_ENGIDMASK::default() + .set_value(u32::from(params.engine_id_mask)) + .write(bar, &E::ID); + regs::NV_PFALCON2_FALCON_BROM_CURR_UCODE_ID::default() + .set_ucode_id(params.ucode_id) + .write(bar, &E::ID); + regs::NV_PFALCON2_FALCON_MOD_SEL::default() + .set_algo(FalconModSelAlgo::Rsa3k) + .write(bar, &E::ID); + + Ok(()) +} + +pub(super) struct Ga102<E: FalconEngine>(PhantomData<E>); + +impl<E: FalconEngine> Ga102<E> { + pub(super) fn new() -> Self { + Self(PhantomData) + } +} + +impl<E: FalconEngine> FalconHal<E> for Ga102<E> { + fn select_core(&self, _falcon: &Falcon<E>, bar: &Bar0) -> Result { + select_core_ga102::<E>(bar) + } + + fn signature_reg_fuse_version( + &self, + falcon: &Falcon<E>, + bar: &Bar0, + engine_id_mask: u16, + ucode_id: u8, + ) -> Result<u32> { + signature_reg_fuse_version_ga102(&falcon.dev, bar, engine_id_mask, ucode_id) + } + + fn program_brom(&self, _falcon: &Falcon<E>, bar: &Bar0, params: &FalconBromParams) -> Result { + program_brom_ga102::<E>(bar, params) + } +} diff --git a/drivers/gpu/nova-core/falcon/sec2.rs b/drivers/gpu/nova-core/falcon/sec2.rs new file mode 100644 index 000000000000..815786c8480d --- /dev/null +++ b/drivers/gpu/nova-core/falcon/sec2.rs @@ -0,0 +1,19 @@ +// SPDX-License-Identifier: GPL-2.0 + +use crate::falcon::{FalconEngine, PFalcon2Base, PFalconBase}; +use crate::regs::macros::RegisterBase; + +/// Type specifying the `Sec2` falcon engine. Cannot be instantiated. +pub(crate) struct Sec2(()); + +impl RegisterBase<PFalconBase> for Sec2 { + const BASE: usize = 0x00840000; +} + +impl RegisterBase<PFalcon2Base> for Sec2 { + const BASE: usize = 0x00841000; +} + +impl FalconEngine for Sec2 { + const ID: Self = Sec2(()); +} diff --git a/drivers/gpu/nova-core/fb.rs b/drivers/gpu/nova-core/fb.rs new file mode 100644 index 000000000000..27d9edab8347 --- /dev/null +++ b/drivers/gpu/nova-core/fb.rs @@ -0,0 +1,147 @@ +// SPDX-License-Identifier: GPL-2.0 + +use core::ops::Range; + +use kernel::prelude::*; +use kernel::ptr::{Alignable, Alignment}; +use kernel::sizes::*; +use kernel::sync::aref::ARef; +use kernel::{dev_warn, device}; + +use crate::dma::DmaObject; +use crate::driver::Bar0; +use crate::gpu::Chipset; +use crate::regs; + +mod hal; + +/// Type holding the sysmem flush memory page, a page of memory to be written into the +/// `NV_PFB_NISO_FLUSH_SYSMEM_ADDR*` registers and used to maintain memory coherency. +/// +/// A system memory page is required for `sysmembar`, which is a GPU-initiated hardware +/// memory-barrier operation that flushes all pending GPU-side memory writes that were done through +/// PCIE to system memory. It is required for falcons to be reset as the reset operation involves a +/// reset handshake. When the falcon acknowledges a reset, it writes into system memory. To ensure +/// this write is visible to the host and prevent driver timeouts, the falcon must perform a +/// sysmembar operation to flush its writes. +/// +/// Because of this, the sysmem flush memory page must be registered as early as possible during +/// driver initialization, and before any falcon is reset. +/// +/// Users are responsible for manually calling [`Self::unregister`] before dropping this object, +/// otherwise the GPU might still use it even after it has been freed. +pub(crate) struct SysmemFlush { + /// Chipset we are operating on. + chipset: Chipset, + device: ARef<device::Device>, + /// Keep the page alive as long as we need it. + page: DmaObject, +} + +impl SysmemFlush { + /// Allocate a memory page and register it as the sysmem flush page. + pub(crate) fn register( + dev: &device::Device<device::Bound>, + bar: &Bar0, + chipset: Chipset, + ) -> Result<Self> { + let page = DmaObject::new(dev, kernel::page::PAGE_SIZE)?; + + hal::fb_hal(chipset).write_sysmem_flush_page(bar, page.dma_handle())?; + + Ok(Self { + chipset, + device: dev.into(), + page, + }) + } + + /// Unregister the managed sysmem flush page. + /// + /// In order to gracefully tear down the GPU, users must make sure to call this method before + /// dropping the object. + pub(crate) fn unregister(&self, bar: &Bar0) { + let hal = hal::fb_hal(self.chipset); + + if hal.read_sysmem_flush_page(bar) == self.page.dma_handle() { + let _ = hal.write_sysmem_flush_page(bar, 0).inspect_err(|e| { + dev_warn!( + &self.device, + "failed to unregister sysmem flush page: {:?}", + e + ) + }); + } else { + // Another page has been registered after us for some reason - warn as this is a bug. + dev_warn!( + &self.device, + "attempt to unregister a sysmem flush page that is not active\n" + ); + } + } +} + +/// Layout of the GPU framebuffer memory. +/// +/// Contains ranges of GPU memory reserved for a given purpose during the GSP boot process. +#[derive(Debug)] +#[expect(dead_code)] +pub(crate) struct FbLayout { + pub(crate) fb: Range<u64>, + pub(crate) vga_workspace: Range<u64>, + pub(crate) frts: Range<u64>, +} + +impl FbLayout { + /// Computes the FB layout. + pub(crate) fn new(chipset: Chipset, bar: &Bar0) -> Result<Self> { + let hal = hal::fb_hal(chipset); + + let fb = { + let fb_size = hal.vidmem_size(bar); + + 0..fb_size + }; + + let vga_workspace = { + let vga_base = { + const NV_PRAMIN_SIZE: u64 = SZ_1M as u64; + let base = fb.end - NV_PRAMIN_SIZE; + + if hal.supports_display(bar) { + match regs::NV_PDISP_VGA_WORKSPACE_BASE::read(bar).vga_workspace_addr() { + Some(addr) => { + if addr < base { + const VBIOS_WORKSPACE_SIZE: u64 = SZ_128K as u64; + + // Point workspace address to end of framebuffer. + fb.end - VBIOS_WORKSPACE_SIZE + } else { + addr + } + } + None => base, + } + } else { + base + } + }; + + vga_base..fb.end + }; + + let frts = { + const FRTS_DOWN_ALIGN: Alignment = Alignment::new::<SZ_128K>(); + const FRTS_SIZE: u64 = SZ_1M as u64; + let frts_base = vga_workspace.start.align_down(FRTS_DOWN_ALIGN) - FRTS_SIZE; + + frts_base..frts_base + FRTS_SIZE + }; + + Ok(Self { + fb, + vga_workspace, + frts, + }) + } +} diff --git a/drivers/gpu/nova-core/fb/hal.rs b/drivers/gpu/nova-core/fb/hal.rs new file mode 100644 index 000000000000..2f914948bb9a --- /dev/null +++ b/drivers/gpu/nova-core/fb/hal.rs @@ -0,0 +1,39 @@ +// SPDX-License-Identifier: GPL-2.0 + +use kernel::prelude::*; + +use crate::driver::Bar0; +use crate::gpu::Chipset; + +mod ga100; +mod ga102; +mod tu102; + +pub(crate) trait FbHal { + /// Returns the address of the currently-registered sysmem flush page. + fn read_sysmem_flush_page(&self, bar: &Bar0) -> u64; + + /// Register `addr` as the address of the sysmem flush page. + /// + /// This might fail if the address is too large for the receiving register. + fn write_sysmem_flush_page(&self, bar: &Bar0, addr: u64) -> Result; + + /// Returns `true` is display is supported. + fn supports_display(&self, bar: &Bar0) -> bool; + + /// Returns the VRAM size, in bytes. + fn vidmem_size(&self, bar: &Bar0) -> u64; +} + +/// Returns the HAL corresponding to `chipset`. +pub(super) fn fb_hal(chipset: Chipset) -> &'static dyn FbHal { + use Chipset::*; + + match chipset { + TU102 | TU104 | TU106 | TU117 | TU116 => tu102::TU102_HAL, + GA100 => ga100::GA100_HAL, + GA102 | GA103 | GA104 | GA106 | GA107 | AD102 | AD103 | AD104 | AD106 | AD107 => { + ga102::GA102_HAL + } + } +} diff --git a/drivers/gpu/nova-core/fb/hal/ga100.rs b/drivers/gpu/nova-core/fb/hal/ga100.rs new file mode 100644 index 000000000000..871c42bf033a --- /dev/null +++ b/drivers/gpu/nova-core/fb/hal/ga100.rs @@ -0,0 +1,57 @@ +// SPDX-License-Identifier: GPL-2.0 + +struct Ga100; + +use kernel::prelude::*; + +use crate::driver::Bar0; +use crate::fb::hal::FbHal; +use crate::regs; + +use super::tu102::FLUSH_SYSMEM_ADDR_SHIFT; + +pub(super) fn read_sysmem_flush_page_ga100(bar: &Bar0) -> u64 { + u64::from(regs::NV_PFB_NISO_FLUSH_SYSMEM_ADDR::read(bar).adr_39_08()) << FLUSH_SYSMEM_ADDR_SHIFT + | u64::from(regs::NV_PFB_NISO_FLUSH_SYSMEM_ADDR_HI::read(bar).adr_63_40()) + << FLUSH_SYSMEM_ADDR_SHIFT_HI +} + +pub(super) fn write_sysmem_flush_page_ga100(bar: &Bar0, addr: u64) { + regs::NV_PFB_NISO_FLUSH_SYSMEM_ADDR_HI::default() + .set_adr_63_40((addr >> FLUSH_SYSMEM_ADDR_SHIFT_HI) as u32) + .write(bar); + regs::NV_PFB_NISO_FLUSH_SYSMEM_ADDR::default() + .set_adr_39_08((addr >> FLUSH_SYSMEM_ADDR_SHIFT) as u32) + .write(bar); +} + +pub(super) fn display_enabled_ga100(bar: &Bar0) -> bool { + !regs::ga100::NV_FUSE_STATUS_OPT_DISPLAY::read(bar).display_disabled() +} + +/// Shift applied to the sysmem address before it is written into +/// `NV_PFB_NISO_FLUSH_SYSMEM_ADDR_HI`, +const FLUSH_SYSMEM_ADDR_SHIFT_HI: u32 = 40; + +impl FbHal for Ga100 { + fn read_sysmem_flush_page(&self, bar: &Bar0) -> u64 { + read_sysmem_flush_page_ga100(bar) + } + + fn write_sysmem_flush_page(&self, bar: &Bar0, addr: u64) -> Result { + write_sysmem_flush_page_ga100(bar, addr); + + Ok(()) + } + + fn supports_display(&self, bar: &Bar0) -> bool { + display_enabled_ga100(bar) + } + + fn vidmem_size(&self, bar: &Bar0) -> u64 { + super::tu102::vidmem_size_gp102(bar) + } +} + +const GA100: Ga100 = Ga100; +pub(super) const GA100_HAL: &dyn FbHal = &GA100; diff --git a/drivers/gpu/nova-core/fb/hal/ga102.rs b/drivers/gpu/nova-core/fb/hal/ga102.rs new file mode 100644 index 000000000000..a73b77e39715 --- /dev/null +++ b/drivers/gpu/nova-core/fb/hal/ga102.rs @@ -0,0 +1,36 @@ +// SPDX-License-Identifier: GPL-2.0 + +use kernel::prelude::*; + +use crate::driver::Bar0; +use crate::fb::hal::FbHal; +use crate::regs; + +fn vidmem_size_ga102(bar: &Bar0) -> u64 { + regs::NV_USABLE_FB_SIZE_IN_MB::read(bar).usable_fb_size() +} + +struct Ga102; + +impl FbHal for Ga102 { + fn read_sysmem_flush_page(&self, bar: &Bar0) -> u64 { + super::ga100::read_sysmem_flush_page_ga100(bar) + } + + fn write_sysmem_flush_page(&self, bar: &Bar0, addr: u64) -> Result { + super::ga100::write_sysmem_flush_page_ga100(bar, addr); + + Ok(()) + } + + fn supports_display(&self, bar: &Bar0) -> bool { + super::ga100::display_enabled_ga100(bar) + } + + fn vidmem_size(&self, bar: &Bar0) -> u64 { + vidmem_size_ga102(bar) + } +} + +const GA102: Ga102 = Ga102; +pub(super) const GA102_HAL: &dyn FbHal = &GA102; diff --git a/drivers/gpu/nova-core/fb/hal/tu102.rs b/drivers/gpu/nova-core/fb/hal/tu102.rs new file mode 100644 index 000000000000..b022c781caf4 --- /dev/null +++ b/drivers/gpu/nova-core/fb/hal/tu102.rs @@ -0,0 +1,58 @@ +// SPDX-License-Identifier: GPL-2.0 + +use crate::driver::Bar0; +use crate::fb::hal::FbHal; +use crate::regs; +use kernel::prelude::*; + +/// Shift applied to the sysmem address before it is written into `NV_PFB_NISO_FLUSH_SYSMEM_ADDR`, +/// to be used by HALs. +pub(super) const FLUSH_SYSMEM_ADDR_SHIFT: u32 = 8; + +pub(super) fn read_sysmem_flush_page_gm107(bar: &Bar0) -> u64 { + u64::from(regs::NV_PFB_NISO_FLUSH_SYSMEM_ADDR::read(bar).adr_39_08()) << FLUSH_SYSMEM_ADDR_SHIFT +} + +pub(super) fn write_sysmem_flush_page_gm107(bar: &Bar0, addr: u64) -> Result { + // Check that the address doesn't overflow the receiving 32-bit register. + if addr >> (u32::BITS + FLUSH_SYSMEM_ADDR_SHIFT) == 0 { + regs::NV_PFB_NISO_FLUSH_SYSMEM_ADDR::default() + .set_adr_39_08((addr >> FLUSH_SYSMEM_ADDR_SHIFT) as u32) + .write(bar); + + Ok(()) + } else { + Err(EINVAL) + } +} + +pub(super) fn display_enabled_gm107(bar: &Bar0) -> bool { + !regs::gm107::NV_FUSE_STATUS_OPT_DISPLAY::read(bar).display_disabled() +} + +pub(super) fn vidmem_size_gp102(bar: &Bar0) -> u64 { + regs::NV_PFB_PRI_MMU_LOCAL_MEMORY_RANGE::read(bar).usable_fb_size() +} + +struct Tu102; + +impl FbHal for Tu102 { + fn read_sysmem_flush_page(&self, bar: &Bar0) -> u64 { + read_sysmem_flush_page_gm107(bar) + } + + fn write_sysmem_flush_page(&self, bar: &Bar0, addr: u64) -> Result { + write_sysmem_flush_page_gm107(bar, addr) + } + + fn supports_display(&self, bar: &Bar0) -> bool { + display_enabled_gm107(bar) + } + + fn vidmem_size(&self, bar: &Bar0) -> u64 { + vidmem_size_gp102(bar) + } +} + +const TU102: Tu102 = Tu102; +pub(super) const TU102_HAL: &dyn FbHal = &TU102; diff --git a/drivers/gpu/nova-core/firmware.rs b/drivers/gpu/nova-core/firmware.rs index 4b8a38358a4f..4179a74a2342 100644 --- a/drivers/gpu/nova-core/firmware.rs +++ b/drivers/gpu/nova-core/firmware.rs @@ -3,41 +3,197 @@ //! Contains structures and functions dedicated to the parsing, building and patching of firmwares //! to be loaded into a given execution unit. +use core::marker::PhantomData; +use core::mem::size_of; + use kernel::device; use kernel::firmware; use kernel::prelude::*; use kernel::str::CString; +use kernel::transmute::FromBytes; +use crate::dma::DmaObject; +use crate::falcon::FalconFirmware; use crate::gpu; -use crate::gpu::Chipset; -pub(crate) const FIRMWARE_VERSION: &str = "535.113.01"; +pub(crate) mod booter; +pub(crate) mod fwsec; +pub(crate) mod gsp; +pub(crate) mod riscv; + +pub(crate) const FIRMWARE_VERSION: &str = "570.144"; + +/// Requests the GPU firmware `name` suitable for `chipset`, with version `ver`. +fn request_firmware( + dev: &device::Device, + chipset: gpu::Chipset, + name: &str, + ver: &str, +) -> Result<firmware::Firmware> { + let chip_name = chipset.name(); + + CString::try_from_fmt(fmt!("nvidia/{chip_name}/gsp/{name}-{ver}.bin")) + .and_then(|path| firmware::Firmware::request(&path, dev)) +} -/// Structure encapsulating the firmware blobs required for the GPU to operate. -#[expect(dead_code)] -pub(crate) struct Firmware { - booter_load: firmware::Firmware, - booter_unload: firmware::Firmware, - bootloader: firmware::Firmware, - gsp: firmware::Firmware, +/// Structure used to describe some firmwares, notably FWSEC-FRTS. +#[repr(C)] +#[derive(Debug, Clone)] +pub(crate) struct FalconUCodeDescV3 { + /// Header defined by `NV_BIT_FALCON_UCODE_DESC_HEADER_VDESC*` in OpenRM. + hdr: u32, + /// Stored size of the ucode after the header. + stored_size: u32, + /// Offset in `DMEM` at which the signature is expected to be found. + pub(crate) pkc_data_offset: u32, + /// Offset after the code segment at which the app headers are located. + pub(crate) interface_offset: u32, + /// Base address at which to load the code segment into `IMEM`. + pub(crate) imem_phys_base: u32, + /// Size in bytes of the code to copy into `IMEM`. + pub(crate) imem_load_size: u32, + /// Virtual `IMEM` address (i.e. `tag`) at which the code should start. + pub(crate) imem_virt_base: u32, + /// Base address at which to load the data segment into `DMEM`. + pub(crate) dmem_phys_base: u32, + /// Size in bytes of the data to copy into `DMEM`. + pub(crate) dmem_load_size: u32, + /// Mask of the falcon engines on which this firmware can run. + pub(crate) engine_id_mask: u16, + /// ID of the ucode used to infer a fuse register to validate the signature. + pub(crate) ucode_id: u8, + /// Number of signatures in this firmware. + pub(crate) signature_count: u8, + /// Versions of the signatures, used to infer a valid signature to use. + pub(crate) signature_versions: u16, + _reserved: u16, } -impl Firmware { - pub(crate) fn new(dev: &device::Device, chipset: Chipset, ver: &str) -> Result<Firmware> { - let mut chip_name = CString::try_from_fmt(fmt!("{}", chipset))?; - chip_name.make_ascii_lowercase(); +impl FalconUCodeDescV3 { + /// Returns the size in bytes of the header. + pub(crate) fn size(&self) -> usize { + const HDR_SIZE_SHIFT: u32 = 16; + const HDR_SIZE_MASK: u32 = 0xffff0000; + + ((self.hdr & HDR_SIZE_MASK) >> HDR_SIZE_SHIFT) as usize + } +} + +/// Trait implemented by types defining the signed state of a firmware. +trait SignedState {} + +/// Type indicating that the firmware must be signed before it can be used. +struct Unsigned; +impl SignedState for Unsigned {} + +/// Type indicating that the firmware is signed and ready to be loaded. +struct Signed; +impl SignedState for Signed {} + +/// A [`DmaObject`] containing a specific microcode ready to be loaded into a falcon. +/// +/// This is module-local and meant for sub-modules to use internally. +/// +/// After construction, a firmware is [`Unsigned`], and must generally be patched with a signature +/// before it can be loaded (with an exception for development hardware). The +/// [`Self::patch_signature`] and [`Self::no_patch_signature`] methods are used to transition the +/// firmware to its [`Signed`] state. +struct FirmwareDmaObject<F: FalconFirmware, S: SignedState>(DmaObject, PhantomData<(F, S)>); + +/// Trait for signatures to be patched directly into a given firmware. +/// +/// This is module-local and meant for sub-modules to use internally. +trait FirmwareSignature<F: FalconFirmware>: AsRef<[u8]> {} + +impl<F: FalconFirmware> FirmwareDmaObject<F, Unsigned> { + /// Patches the firmware at offset `sig_base_img` with `signature`. + fn patch_signature<S: FirmwareSignature<F>>( + mut self, + signature: &S, + sig_base_img: usize, + ) -> Result<FirmwareDmaObject<F, Signed>> { + let signature_bytes = signature.as_ref(); + if sig_base_img + signature_bytes.len() > self.0.size() { + return Err(EINVAL); + } + + // SAFETY: We are the only user of this object, so there cannot be any race. + let dst = unsafe { self.0.start_ptr_mut().add(sig_base_img) }; - let request = |name_| { - CString::try_from_fmt(fmt!("nvidia/{}/gsp/{}-{}.bin", &*chip_name, name_, ver)) - .and_then(|path| firmware::Firmware::request(&path, dev)) + // SAFETY: `signature` and `dst` are valid, properly aligned, and do not overlap. + unsafe { + core::ptr::copy_nonoverlapping(signature_bytes.as_ptr(), dst, signature_bytes.len()) }; - Ok(Firmware { - booter_load: request("booter_load")?, - booter_unload: request("booter_unload")?, - bootloader: request("bootloader")?, - gsp: request("gsp")?, - }) + Ok(FirmwareDmaObject(self.0, PhantomData)) + } + + /// Mark the firmware as signed without patching it. + /// + /// This method is used to explicitly confirm that we do not need to sign the firmware, while + /// allowing us to continue as if it was. This is typically only needed for development + /// hardware. + fn no_patch_signature(self) -> FirmwareDmaObject<F, Signed> { + FirmwareDmaObject(self.0, PhantomData) + } +} + +/// Header common to most firmware files. +#[repr(C)] +#[derive(Debug, Clone)] +struct BinHdr { + /// Magic number, must be `0x10de`. + bin_magic: u32, + /// Version of the header. + bin_ver: u32, + /// Size in bytes of the binary (to be ignored). + bin_size: u32, + /// Offset of the start of the application-specific header. + header_offset: u32, + /// Offset of the start of the data payload. + data_offset: u32, + /// Size in bytes of the data payload. + data_size: u32, +} + +// SAFETY: all bit patterns are valid for this type, and it doesn't use interior mutability. +unsafe impl FromBytes for BinHdr {} + +// A firmware blob starting with a `BinHdr`. +struct BinFirmware<'a> { + hdr: BinHdr, + fw: &'a [u8], +} + +impl<'a> BinFirmware<'a> { + /// Interpret `fw` as a firmware image starting with a [`BinHdr`], and returns the + /// corresponding [`BinFirmware`] that can be used to extract its payload. + fn new(fw: &'a firmware::Firmware) -> Result<Self> { + const BIN_MAGIC: u32 = 0x10de; + let fw = fw.data(); + + fw.get(0..size_of::<BinHdr>()) + // Extract header. + .and_then(BinHdr::from_bytes_copy) + // Validate header. + .and_then(|hdr| { + if hdr.bin_magic == BIN_MAGIC { + Some(hdr) + } else { + None + } + }) + .map(|hdr| Self { hdr, fw }) + .ok_or(EINVAL) + } + + /// Returns the data payload of the firmware, or `None` if the data range is out of bounds of + /// the firmware image. + fn data(&self) -> Option<&[u8]> { + let fw_start = self.hdr.data_offset as usize; + let fw_size = self.hdr.data_size as usize; + + self.fw.get(fw_start..fw_start + fw_size) } } @@ -71,8 +227,8 @@ impl<const N: usize> ModInfoBuilder<N> { let mut this = Self(firmware::ModInfoBuilder::new(module_name)); let mut i = 0; - while i < gpu::Chipset::NAMES.len() { - this = this.make_entry_chipset(gpu::Chipset::NAMES[i]); + while i < gpu::Chipset::ALL.len() { + this = this.make_entry_chipset(gpu::Chipset::ALL[i].name()); i += 1; } diff --git a/drivers/gpu/nova-core/firmware/booter.rs b/drivers/gpu/nova-core/firmware/booter.rs new file mode 100644 index 000000000000..b4ff1b17e4a0 --- /dev/null +++ b/drivers/gpu/nova-core/firmware/booter.rs @@ -0,0 +1,375 @@ +// SPDX-License-Identifier: GPL-2.0 + +//! Support for loading and patching the `Booter` firmware. `Booter` is a Heavy Secured firmware +//! running on [`Sec2`], that is used on Turing/Ampere to load the GSP firmware into the GSP falcon +//! (and optionally unload it through a separate firmware image). + +use core::marker::PhantomData; +use core::mem::size_of; +use core::ops::Deref; + +use kernel::device; +use kernel::prelude::*; +use kernel::transmute::FromBytes; + +use crate::dma::DmaObject; +use crate::driver::Bar0; +use crate::falcon::sec2::Sec2; +use crate::falcon::{Falcon, FalconBromParams, FalconFirmware, FalconLoadParams, FalconLoadTarget}; +use crate::firmware::{BinFirmware, FirmwareDmaObject, FirmwareSignature, Signed, Unsigned}; +use crate::gpu::Chipset; + +/// Local convenience function to return a copy of `S` by reinterpreting the bytes starting at +/// `offset` in `slice`. +fn frombytes_at<S: FromBytes + Sized>(slice: &[u8], offset: usize) -> Result<S> { + slice + .get(offset..offset + size_of::<S>()) + .and_then(S::from_bytes_copy) + .ok_or(EINVAL) +} + +/// Heavy-Secured firmware header. +/// +/// Such firmwares have an application-specific payload that needs to be patched with a given +/// signature. +#[repr(C)] +#[derive(Debug, Clone)] +struct HsHeaderV2 { + /// Offset to the start of the signatures. + sig_prod_offset: u32, + /// Size in bytes of the signatures. + sig_prod_size: u32, + /// Offset to a `u32` containing the location at which to patch the signature in the microcode + /// image. + patch_loc_offset: u32, + /// Offset to a `u32` containing the index of the signature to patch. + patch_sig_offset: u32, + /// Start offset to the signature metadata. + meta_data_offset: u32, + /// Size in bytes of the signature metadata. + meta_data_size: u32, + /// Offset to a `u32` containing the number of signatures in the signatures section. + num_sig_offset: u32, + /// Offset of the application-specific header. + header_offset: u32, + /// Size in bytes of the application-specific header. + header_size: u32, +} + +// SAFETY: all bit patterns are valid for this type, and it doesn't use interior mutability. +unsafe impl FromBytes for HsHeaderV2 {} + +/// Heavy-Secured Firmware image container. +/// +/// This provides convenient access to the fields of [`HsHeaderV2`] that are actually indices to +/// read from in the firmware data. +struct HsFirmwareV2<'a> { + hdr: HsHeaderV2, + fw: &'a [u8], +} + +impl<'a> HsFirmwareV2<'a> { + /// Interprets the header of `bin_fw` as a [`HsHeaderV2`] and returns an instance of + /// `HsFirmwareV2` for further parsing. + /// + /// Fails if the header pointed at by `bin_fw` is not within the bounds of the firmware image. + fn new(bin_fw: &BinFirmware<'a>) -> Result<Self> { + frombytes_at::<HsHeaderV2>(bin_fw.fw, bin_fw.hdr.header_offset as usize) + .map(|hdr| Self { hdr, fw: bin_fw.fw }) + } + + /// Returns the location at which the signatures should be patched in the microcode image. + /// + /// Fails if the offset of the patch location is outside the bounds of the firmware + /// image. + fn patch_location(&self) -> Result<u32> { + frombytes_at::<u32>(self.fw, self.hdr.patch_loc_offset as usize) + } + + /// Returns an iterator to the signatures of the firmware. The iterator can be empty if the + /// firmware is unsigned. + /// + /// Fails if the pointed signatures are outside the bounds of the firmware image. + fn signatures_iter(&'a self) -> Result<impl Iterator<Item = BooterSignature<'a>>> { + let num_sig = frombytes_at::<u32>(self.fw, self.hdr.num_sig_offset as usize)?; + let iter = match self.hdr.sig_prod_size.checked_div(num_sig) { + // If there are no signatures, return an iterator that will yield zero elements. + None => (&[] as &[u8]).chunks_exact(1), + Some(sig_size) => { + let patch_sig = frombytes_at::<u32>(self.fw, self.hdr.patch_sig_offset as usize)?; + let signatures_start = (self.hdr.sig_prod_offset + patch_sig) as usize; + + self.fw + // Get signatures range. + .get(signatures_start..signatures_start + self.hdr.sig_prod_size as usize) + .ok_or(EINVAL)? + .chunks_exact(sig_size as usize) + } + }; + + // Map the byte slices into signatures. + Ok(iter.map(BooterSignature)) + } +} + +/// Signature parameters, as defined in the firmware. +#[repr(C)] +struct HsSignatureParams { + /// Fuse version to use. + fuse_ver: u32, + /// Mask of engine IDs this firmware applies to. + engine_id_mask: u32, + /// ID of the microcode. + ucode_id: u32, +} + +// SAFETY: all bit patterns are valid for this type, and it doesn't use interior mutability. +unsafe impl FromBytes for HsSignatureParams {} + +impl HsSignatureParams { + /// Returns the signature parameters contained in `hs_fw`. + /// + /// Fails if the meta data parameter of `hs_fw` is outside the bounds of the firmware image, or + /// if its size doesn't match that of [`HsSignatureParams`]. + fn new(hs_fw: &HsFirmwareV2<'_>) -> Result<Self> { + let start = hs_fw.hdr.meta_data_offset as usize; + let end = start + .checked_add(hs_fw.hdr.meta_data_size as usize) + .ok_or(EINVAL)?; + + hs_fw + .fw + .get(start..end) + .and_then(Self::from_bytes_copy) + .ok_or(EINVAL) + } +} + +/// Header for code and data load offsets. +#[repr(C)] +#[derive(Debug, Clone)] +struct HsLoadHeaderV2 { + // Offset at which the code starts. + os_code_offset: u32, + // Total size of the code, for all apps. + os_code_size: u32, + // Offset at which the data starts. + os_data_offset: u32, + // Size of the data. + os_data_size: u32, + // Number of apps following this header. Each app is described by a [`HsLoadHeaderV2App`]. + num_apps: u32, +} + +// SAFETY: all bit patterns are valid for this type, and it doesn't use interior mutability. +unsafe impl FromBytes for HsLoadHeaderV2 {} + +impl HsLoadHeaderV2 { + /// Returns the load header contained in `hs_fw`. + /// + /// Fails if the header pointed at by `hs_fw` is not within the bounds of the firmware image. + fn new(hs_fw: &HsFirmwareV2<'_>) -> Result<Self> { + frombytes_at::<Self>(hs_fw.fw, hs_fw.hdr.header_offset as usize) + } +} + +/// Header for app code loader. +#[repr(C)] +#[derive(Debug, Clone)] +struct HsLoadHeaderV2App { + /// Offset at which to load the app code. + offset: u32, + /// Length in bytes of the app code. + len: u32, +} + +// SAFETY: all bit patterns are valid for this type, and it doesn't use interior mutability. +unsafe impl FromBytes for HsLoadHeaderV2App {} + +impl HsLoadHeaderV2App { + /// Returns the [`HsLoadHeaderV2App`] for app `idx` of `hs_fw`. + /// + /// Fails if `idx` is larger than the number of apps declared in `hs_fw`, or if the header is + /// not within the bounds of the firmware image. + fn new(hs_fw: &HsFirmwareV2<'_>, idx: u32) -> Result<Self> { + let load_hdr = HsLoadHeaderV2::new(hs_fw)?; + if idx >= load_hdr.num_apps { + Err(EINVAL) + } else { + frombytes_at::<Self>( + hs_fw.fw, + (hs_fw.hdr.header_offset as usize) + // Skip the load header... + .checked_add(size_of::<HsLoadHeaderV2>()) + // ... and jump to app header `idx`. + .and_then(|offset| { + offset.checked_add((idx as usize).checked_mul(size_of::<Self>())?) + }) + .ok_or(EINVAL)?, + ) + } + } +} + +/// Signature for Booter firmware. Their size is encoded into the header and not known a compile +/// time, so we just wrap a byte slices on which we can implement [`FirmwareSignature`]. +struct BooterSignature<'a>(&'a [u8]); + +impl<'a> AsRef<[u8]> for BooterSignature<'a> { + fn as_ref(&self) -> &[u8] { + self.0 + } +} + +impl<'a> FirmwareSignature<BooterFirmware> for BooterSignature<'a> {} + +/// The `Booter` loader firmware, responsible for loading the GSP. +pub(crate) struct BooterFirmware { + // Load parameters for `IMEM` falcon memory. + imem_load_target: FalconLoadTarget, + // Load parameters for `DMEM` falcon memory. + dmem_load_target: FalconLoadTarget, + // BROM falcon parameters. + brom_params: FalconBromParams, + // Device-mapped firmware image. + ucode: FirmwareDmaObject<Self, Signed>, +} + +impl FirmwareDmaObject<BooterFirmware, Unsigned> { + fn new_booter(dev: &device::Device<device::Bound>, data: &[u8]) -> Result<Self> { + DmaObject::from_data(dev, data).map(|ucode| Self(ucode, PhantomData)) + } +} + +#[derive(Copy, Clone, Debug, PartialEq)] +pub(crate) enum BooterKind { + Loader, + #[expect(unused)] + Unloader, +} + +impl BooterFirmware { + /// Parses the Booter firmware contained in `fw`, and patches the correct signature so it is + /// ready to be loaded and run on `falcon`. + pub(crate) fn new( + dev: &device::Device<device::Bound>, + kind: BooterKind, + chipset: Chipset, + ver: &str, + falcon: &Falcon<<Self as FalconFirmware>::Target>, + bar: &Bar0, + ) -> Result<Self> { + let fw_name = match kind { + BooterKind::Loader => "booter_load", + BooterKind::Unloader => "booter_unload", + }; + let fw = super::request_firmware(dev, chipset, fw_name, ver)?; + let bin_fw = BinFirmware::new(&fw)?; + + // The binary firmware embeds a Heavy-Secured firmware. + let hs_fw = HsFirmwareV2::new(&bin_fw)?; + + // The Heavy-Secured firmware embeds a firmware load descriptor. + let load_hdr = HsLoadHeaderV2::new(&hs_fw)?; + + // Offset in `ucode` where to patch the signature. + let patch_loc = hs_fw.patch_location()?; + + let sig_params = HsSignatureParams::new(&hs_fw)?; + let brom_params = FalconBromParams { + // `load_hdr.os_data_offset` is an absolute index, but `pkc_data_offset` is from the + // signature patch location. + pkc_data_offset: patch_loc + .checked_sub(load_hdr.os_data_offset) + .ok_or(EINVAL)?, + engine_id_mask: u16::try_from(sig_params.engine_id_mask).map_err(|_| EINVAL)?, + ucode_id: u8::try_from(sig_params.ucode_id).map_err(|_| EINVAL)?, + }; + let app0 = HsLoadHeaderV2App::new(&hs_fw, 0)?; + + // Object containing the firmware microcode to be signature-patched. + let ucode = bin_fw + .data() + .ok_or(EINVAL) + .and_then(|data| FirmwareDmaObject::<Self, _>::new_booter(dev, data))?; + + let ucode_signed = { + let mut signatures = hs_fw.signatures_iter()?.peekable(); + + if signatures.peek().is_none() { + // If there are no signatures, then the firmware is unsigned. + ucode.no_patch_signature() + } else { + // Obtain the version from the fuse register, and extract the corresponding + // signature. + let reg_fuse_version = falcon.signature_reg_fuse_version( + bar, + brom_params.engine_id_mask, + brom_params.ucode_id, + )?; + + // `0` means the last signature should be used. + const FUSE_VERSION_USE_LAST_SIG: u32 = 0; + let signature = match reg_fuse_version { + FUSE_VERSION_USE_LAST_SIG => signatures.last(), + // Otherwise hardware fuse version needs to be subtracted to obtain the index. + reg_fuse_version => { + let Some(idx) = sig_params.fuse_ver.checked_sub(reg_fuse_version) else { + dev_err!(dev, "invalid fuse version for Booter firmware\n"); + return Err(EINVAL); + }; + signatures.nth(idx as usize) + } + } + .ok_or(EINVAL)?; + + ucode.patch_signature(&signature, patch_loc as usize)? + } + }; + + Ok(Self { + imem_load_target: FalconLoadTarget { + src_start: app0.offset, + dst_start: 0, + len: app0.len, + }, + dmem_load_target: FalconLoadTarget { + src_start: load_hdr.os_data_offset, + dst_start: 0, + len: load_hdr.os_data_size, + }, + brom_params, + ucode: ucode_signed, + }) + } +} + +impl FalconLoadParams for BooterFirmware { + fn imem_load_params(&self) -> FalconLoadTarget { + self.imem_load_target.clone() + } + + fn dmem_load_params(&self) -> FalconLoadTarget { + self.dmem_load_target.clone() + } + + fn brom_params(&self) -> FalconBromParams { + self.brom_params.clone() + } + + fn boot_addr(&self) -> u32 { + self.imem_load_target.src_start + } +} + +impl Deref for BooterFirmware { + type Target = DmaObject; + + fn deref(&self) -> &Self::Target { + &self.ucode.0 + } +} + +impl FalconFirmware for BooterFirmware { + type Target = Sec2; +} diff --git a/drivers/gpu/nova-core/firmware/fwsec.rs b/drivers/gpu/nova-core/firmware/fwsec.rs new file mode 100644 index 000000000000..8edbb5c0572c --- /dev/null +++ b/drivers/gpu/nova-core/firmware/fwsec.rs @@ -0,0 +1,416 @@ +// SPDX-License-Identifier: GPL-2.0 + +//! FWSEC is a High Secure firmware that is extracted from the BIOS and performs the first step of +//! the GSP startup by creating the WPR2 memory region and copying critical areas of the VBIOS into +//! it after authenticating them, ensuring they haven't been tampered with. It runs on the GSP +//! falcon. +//! +//! Before being run, it needs to be patched in two areas: +//! +//! - The command to be run, as this firmware can perform several tasks ; +//! - The ucode signature, so the GSP falcon can run FWSEC in HS mode. + +use core::marker::PhantomData; +use core::mem::{align_of, size_of}; +use core::ops::Deref; + +use kernel::device::{self, Device}; +use kernel::prelude::*; +use kernel::transmute::FromBytes; + +use crate::dma::DmaObject; +use crate::driver::Bar0; +use crate::falcon::gsp::Gsp; +use crate::falcon::{Falcon, FalconBromParams, FalconFirmware, FalconLoadParams, FalconLoadTarget}; +use crate::firmware::{FalconUCodeDescV3, FirmwareDmaObject, FirmwareSignature, Signed, Unsigned}; +use crate::vbios::Vbios; + +const NVFW_FALCON_APPIF_ID_DMEMMAPPER: u32 = 0x4; + +#[repr(C)] +#[derive(Debug)] +struct FalconAppifHdrV1 { + version: u8, + header_size: u8, + entry_size: u8, + entry_count: u8, +} +// SAFETY: any byte sequence is valid for this struct. +unsafe impl FromBytes for FalconAppifHdrV1 {} + +#[repr(C, packed)] +#[derive(Debug)] +struct FalconAppifV1 { + id: u32, + dmem_base: u32, +} +// SAFETY: any byte sequence is valid for this struct. +unsafe impl FromBytes for FalconAppifV1 {} + +#[derive(Debug)] +#[repr(C, packed)] +struct FalconAppifDmemmapperV3 { + signature: u32, + version: u16, + size: u16, + cmd_in_buffer_offset: u32, + cmd_in_buffer_size: u32, + cmd_out_buffer_offset: u32, + cmd_out_buffer_size: u32, + nvf_img_data_buffer_offset: u32, + nvf_img_data_buffer_size: u32, + printf_buffer_hdr: u32, + ucode_build_time_stamp: u32, + ucode_signature: u32, + init_cmd: u32, + ucode_feature: u32, + ucode_cmd_mask0: u32, + ucode_cmd_mask1: u32, + multi_tgt_tbl: u32, +} +// SAFETY: any byte sequence is valid for this struct. +unsafe impl FromBytes for FalconAppifDmemmapperV3 {} + +#[derive(Debug)] +#[repr(C, packed)] +struct ReadVbios { + ver: u32, + hdr: u32, + addr: u64, + size: u32, + flags: u32, +} +// SAFETY: any byte sequence is valid for this struct. +unsafe impl FromBytes for ReadVbios {} + +#[derive(Debug)] +#[repr(C, packed)] +struct FrtsRegion { + ver: u32, + hdr: u32, + addr: u32, + size: u32, + ftype: u32, +} +// SAFETY: any byte sequence is valid for this struct. +unsafe impl FromBytes for FrtsRegion {} + +const NVFW_FRTS_CMD_REGION_TYPE_FB: u32 = 2; + +#[repr(C, packed)] +struct FrtsCmd { + read_vbios: ReadVbios, + frts_region: FrtsRegion, +} +// SAFETY: any byte sequence is valid for this struct. +unsafe impl FromBytes for FrtsCmd {} + +const NVFW_FALCON_APPIF_DMEMMAPPER_CMD_FRTS: u32 = 0x15; +const NVFW_FALCON_APPIF_DMEMMAPPER_CMD_SB: u32 = 0x19; + +/// Command for the [`FwsecFirmware`] to execute. +pub(crate) enum FwsecCommand { + /// Asks [`FwsecFirmware`] to carve out the WPR2 area and place a verified copy of the VBIOS + /// image into it. + Frts { frts_addr: u64, frts_size: u64 }, + /// Asks [`FwsecFirmware`] to load pre-OS apps on the PMU. + #[expect(dead_code)] + Sb, +} + +/// Size of the signatures used in FWSEC. +const BCRT30_RSA3K_SIG_SIZE: usize = 384; + +/// A single signature that can be patched into a FWSEC image. +#[repr(transparent)] +pub(crate) struct Bcrt30Rsa3kSignature([u8; BCRT30_RSA3K_SIG_SIZE]); + +/// SAFETY: A signature is just an array of bytes. +unsafe impl FromBytes for Bcrt30Rsa3kSignature {} + +impl From<[u8; BCRT30_RSA3K_SIG_SIZE]> for Bcrt30Rsa3kSignature { + fn from(sig: [u8; BCRT30_RSA3K_SIG_SIZE]) -> Self { + Self(sig) + } +} + +impl AsRef<[u8]> for Bcrt30Rsa3kSignature { + fn as_ref(&self) -> &[u8] { + &self.0 + } +} + +impl FirmwareSignature<FwsecFirmware> for Bcrt30Rsa3kSignature {} + +/// Reinterpret the area starting from `offset` in `fw` as an instance of `T` (which must implement +/// [`FromBytes`]) and return a reference to it. +/// +/// # Safety +/// +/// Callers must ensure that the region of memory returned is not written for as long as the +/// returned reference is alive. +/// +/// TODO[TRSM][COHA]: Remove this and `transmute_mut` once `CoherentAllocation::as_slice` is +/// available and we have a way to transmute objects implementing FromBytes, e.g.: +/// https://lore.kernel.org/lkml/20250330234039.29814-1-christiansantoslima21@gmail.com/ +unsafe fn transmute<'a, 'b, T: Sized + FromBytes>( + fw: &'a DmaObject, + offset: usize, +) -> Result<&'b T> { + if offset + size_of::<T>() > fw.size() { + return Err(EINVAL); + } + if (fw.start_ptr() as usize + offset) % align_of::<T>() != 0 { + return Err(EINVAL); + } + + // SAFETY: we have checked that the pointer is properly aligned that its pointed memory is + // large enough the contains an instance of `T`, which implements `FromBytes`. + Ok(unsafe { &*(fw.start_ptr().add(offset).cast::<T>()) }) +} + +/// Reinterpret the area starting from `offset` in `fw` as a mutable instance of `T` (which must +/// implement [`FromBytes`]) and return a reference to it. +/// +/// # Safety +/// +/// Callers must ensure that the region of memory returned is not read or written for as long as +/// the returned reference is alive. +unsafe fn transmute_mut<'a, 'b, T: Sized + FromBytes>( + fw: &'a mut DmaObject, + offset: usize, +) -> Result<&'b mut T> { + if offset + size_of::<T>() > fw.size() { + return Err(EINVAL); + } + if (fw.start_ptr_mut() as usize + offset) % align_of::<T>() != 0 { + return Err(EINVAL); + } + + // SAFETY: we have checked that the pointer is properly aligned that its pointed memory is + // large enough the contains an instance of `T`, which implements `FromBytes`. + Ok(unsafe { &mut *(fw.start_ptr_mut().add(offset).cast::<T>()) }) +} + +/// The FWSEC microcode, extracted from the BIOS and to be run on the GSP falcon. +/// +/// It is responsible for e.g. carving out the WPR2 region as the first step of the GSP bootflow. +pub(crate) struct FwsecFirmware { + /// Descriptor of the firmware. + desc: FalconUCodeDescV3, + /// GPU-accessible DMA object containing the firmware. + ucode: FirmwareDmaObject<Self, Signed>, +} + +impl FalconLoadParams for FwsecFirmware { + fn imem_load_params(&self) -> FalconLoadTarget { + FalconLoadTarget { + src_start: 0, + dst_start: self.desc.imem_phys_base, + len: self.desc.imem_load_size, + } + } + + fn dmem_load_params(&self) -> FalconLoadTarget { + FalconLoadTarget { + src_start: self.desc.imem_load_size, + dst_start: self.desc.dmem_phys_base, + len: self.desc.dmem_load_size, + } + } + + fn brom_params(&self) -> FalconBromParams { + FalconBromParams { + pkc_data_offset: self.desc.pkc_data_offset, + engine_id_mask: self.desc.engine_id_mask, + ucode_id: self.desc.ucode_id, + } + } + + fn boot_addr(&self) -> u32 { + 0 + } +} + +impl Deref for FwsecFirmware { + type Target = DmaObject; + + fn deref(&self) -> &Self::Target { + &self.ucode.0 + } +} + +impl FalconFirmware for FwsecFirmware { + type Target = Gsp; +} + +impl FirmwareDmaObject<FwsecFirmware, Unsigned> { + fn new_fwsec(dev: &Device<device::Bound>, bios: &Vbios, cmd: FwsecCommand) -> Result<Self> { + let desc = bios.fwsec_image().header()?; + let ucode = bios.fwsec_image().ucode(desc)?; + let mut dma_object = DmaObject::from_data(dev, ucode)?; + + let hdr_offset = (desc.imem_load_size + desc.interface_offset) as usize; + // SAFETY: we have exclusive access to `dma_object`. + let hdr: &FalconAppifHdrV1 = unsafe { transmute(&dma_object, hdr_offset) }?; + + if hdr.version != 1 { + return Err(EINVAL); + } + + // Find the DMEM mapper section in the firmware. + for i in 0..hdr.entry_count as usize { + let app: &FalconAppifV1 = + // SAFETY: we have exclusive access to `dma_object`. + unsafe { + transmute( + &dma_object, + hdr_offset + hdr.header_size as usize + i * hdr.entry_size as usize + ) + }?; + + if app.id != NVFW_FALCON_APPIF_ID_DMEMMAPPER { + continue; + } + + // SAFETY: we have exclusive access to `dma_object`. + let dmem_mapper: &mut FalconAppifDmemmapperV3 = unsafe { + transmute_mut( + &mut dma_object, + (desc.imem_load_size + app.dmem_base) as usize, + ) + }?; + + // SAFETY: we have exclusive access to `dma_object`. + let frts_cmd: &mut FrtsCmd = unsafe { + transmute_mut( + &mut dma_object, + (desc.imem_load_size + dmem_mapper.cmd_in_buffer_offset) as usize, + ) + }?; + + frts_cmd.read_vbios = ReadVbios { + ver: 1, + hdr: size_of::<ReadVbios>() as u32, + addr: 0, + size: 0, + flags: 2, + }; + + dmem_mapper.init_cmd = match cmd { + FwsecCommand::Frts { + frts_addr, + frts_size, + } => { + frts_cmd.frts_region = FrtsRegion { + ver: 1, + hdr: size_of::<FrtsRegion>() as u32, + addr: (frts_addr >> 12) as u32, + size: (frts_size >> 12) as u32, + ftype: NVFW_FRTS_CMD_REGION_TYPE_FB, + }; + + NVFW_FALCON_APPIF_DMEMMAPPER_CMD_FRTS + } + FwsecCommand::Sb => NVFW_FALCON_APPIF_DMEMMAPPER_CMD_SB, + }; + + // Return early as we found and patched the DMEMMAPPER region. + return Ok(Self(dma_object, PhantomData)); + } + + Err(ENOTSUPP) + } +} + +impl FwsecFirmware { + /// Extract the Fwsec firmware from `bios` and patch it to run on `falcon` with the `cmd` + /// command. + pub(crate) fn new( + dev: &Device<device::Bound>, + falcon: &Falcon<Gsp>, + bar: &Bar0, + bios: &Vbios, + cmd: FwsecCommand, + ) -> Result<Self> { + let ucode_dma = FirmwareDmaObject::<Self, _>::new_fwsec(dev, bios, cmd)?; + + // Patch signature if needed. + let desc = bios.fwsec_image().header()?; + let ucode_signed = if desc.signature_count != 0 { + let sig_base_img = (desc.imem_load_size + desc.pkc_data_offset) as usize; + let desc_sig_versions = u32::from(desc.signature_versions); + let reg_fuse_version = + falcon.signature_reg_fuse_version(bar, desc.engine_id_mask, desc.ucode_id)?; + dev_dbg!( + dev, + "desc_sig_versions: {:#x}, reg_fuse_version: {}\n", + desc_sig_versions, + reg_fuse_version + ); + let signature_idx = { + let reg_fuse_version_bit = 1 << reg_fuse_version; + + // Check if the fuse version is supported by the firmware. + if desc_sig_versions & reg_fuse_version_bit == 0 { + dev_err!( + dev, + "no matching signature: {:#x} {:#x}\n", + reg_fuse_version_bit, + desc_sig_versions, + ); + return Err(EINVAL); + } + + // `desc_sig_versions` has one bit set per included signature. Thus, the index of + // the signature to patch is the number of bits in `desc_sig_versions` set to `1` + // before `reg_fuse_version_bit`. + + // Mask of the bits of `desc_sig_versions` to preserve. + let reg_fuse_version_mask = reg_fuse_version_bit.wrapping_sub(1); + + (desc_sig_versions & reg_fuse_version_mask).count_ones() as usize + }; + + dev_dbg!(dev, "patching signature with index {}\n", signature_idx); + let signature = bios + .fwsec_image() + .sigs(desc) + .and_then(|sigs| sigs.get(signature_idx).ok_or(EINVAL))?; + + ucode_dma.patch_signature(signature, sig_base_img)? + } else { + ucode_dma.no_patch_signature() + }; + + Ok(FwsecFirmware { + desc: desc.clone(), + ucode: ucode_signed, + }) + } + + /// Loads the FWSEC firmware into `falcon` and execute it. + pub(crate) fn run( + &self, + dev: &Device<device::Bound>, + falcon: &Falcon<Gsp>, + bar: &Bar0, + ) -> Result<()> { + // Reset falcon, load the firmware, and run it. + falcon + .reset(bar) + .inspect_err(|e| dev_err!(dev, "Failed to reset GSP falcon: {:?}\n", e))?; + falcon + .dma_load(bar, self) + .inspect_err(|e| dev_err!(dev, "Failed to load FWSEC firmware: {:?}\n", e))?; + let (mbox0, _) = falcon + .boot(bar, Some(0), None) + .inspect_err(|e| dev_err!(dev, "Failed to boot FWSEC firmware: {:?}\n", e))?; + if mbox0 != 0 { + dev_err!(dev, "FWSEC firmware returned error {}\n", mbox0); + Err(EIO) + } else { + Ok(()) + } + } +} diff --git a/drivers/gpu/nova-core/firmware/gsp.rs b/drivers/gpu/nova-core/firmware/gsp.rs new file mode 100644 index 000000000000..9b70095434c6 --- /dev/null +++ b/drivers/gpu/nova-core/firmware/gsp.rs @@ -0,0 +1,243 @@ +// SPDX-License-Identifier: GPL-2.0 + +use core::mem::size_of_val; + +use kernel::device; +use kernel::dma::{DataDirection, DmaAddress}; +use kernel::kvec; +use kernel::prelude::*; +use kernel::scatterlist::{Owned, SGTable}; + +use crate::dma::DmaObject; +use crate::firmware::riscv::RiscvFirmware; +use crate::gpu::{Architecture, Chipset}; +use crate::gsp::GSP_PAGE_SIZE; + +/// Ad-hoc and temporary module to extract sections from ELF images. +/// +/// Some firmware images are currently packaged as ELF files, where sections names are used as keys +/// to specific and related bits of data. Future firmware versions are scheduled to move away from +/// that scheme before nova-core becomes stable, which means this module will eventually be +/// removed. +mod elf { + use core::mem::size_of; + + use kernel::bindings; + use kernel::str::CStr; + use kernel::transmute::FromBytes; + + /// Newtype to provide a [`FromBytes`] implementation. + #[repr(transparent)] + struct Elf64Hdr(bindings::elf64_hdr); + // SAFETY: all bit patterns are valid for this type, and it doesn't use interior mutability. + unsafe impl FromBytes for Elf64Hdr {} + + #[repr(transparent)] + struct Elf64SHdr(bindings::elf64_shdr); + // SAFETY: all bit patterns are valid for this type, and it doesn't use interior mutability. + unsafe impl FromBytes for Elf64SHdr {} + + /// Tries to extract section with name `name` from the ELF64 image `elf`, and returns it. + pub(super) fn elf64_section<'a, 'b>(elf: &'a [u8], name: &'b str) -> Option<&'a [u8]> { + let hdr = &elf + .get(0..size_of::<bindings::elf64_hdr>()) + .and_then(Elf64Hdr::from_bytes)? + .0; + + // Get all the section headers. + let mut shdr = { + let shdr_num = usize::from(hdr.e_shnum); + let shdr_start = usize::try_from(hdr.e_shoff).ok()?; + let shdr_end = shdr_num + .checked_mul(size_of::<Elf64SHdr>()) + .and_then(|v| v.checked_add(shdr_start))?; + + elf.get(shdr_start..shdr_end) + .map(|slice| slice.chunks_exact(size_of::<Elf64SHdr>()))? + }; + + // Get the strings table. + let strhdr = shdr + .clone() + .nth(usize::from(hdr.e_shstrndx)) + .and_then(Elf64SHdr::from_bytes)?; + + // Find the section which name matches `name` and return it. + shdr.find(|&sh| { + let Some(hdr) = Elf64SHdr::from_bytes(sh) else { + return false; + }; + + let Some(name_idx) = strhdr + .0 + .sh_offset + .checked_add(u64::from(hdr.0.sh_name)) + .and_then(|idx| usize::try_from(idx).ok()) + else { + return false; + }; + + // Get the start of the name. + elf.get(name_idx..) + // Stop at the first `0`. + .and_then(|nstr| nstr.get(0..=nstr.iter().position(|b| *b == 0)?)) + // Convert into CStr. This should never fail because of the line above. + .and_then(|nstr| CStr::from_bytes_with_nul(nstr).ok()) + // Convert into str. + .and_then(|c_str| c_str.to_str().ok()) + // Check that the name matches. + .map(|str| str == name) + .unwrap_or(false) + }) + // Return the slice containing the section. + .and_then(|sh| { + let hdr = Elf64SHdr::from_bytes(sh)?; + let start = usize::try_from(hdr.0.sh_offset).ok()?; + let end = usize::try_from(hdr.0.sh_size) + .ok() + .and_then(|sh_size| start.checked_add(sh_size))?; + + elf.get(start..end) + }) + } +} + +/// GSP firmware with 3-level radix page tables for the GSP bootloader. +/// +/// The bootloader expects firmware to be mapped starting at address 0 in GSP's virtual address +/// space: +/// +/// ```text +/// Level 0: 1 page, 1 entry -> points to first level 1 page +/// Level 1: Multiple pages/entries -> each entry points to a level 2 page +/// Level 2: Multiple pages/entries -> each entry points to a firmware page +/// ``` +/// +/// Each page is 4KB, each entry is 8 bytes (64-bit DMA address). +/// Also known as "Radix3" firmware. +#[pin_data] +pub(crate) struct GspFirmware { + /// The GSP firmware inside a [`VVec`], device-mapped via a SG table. + #[pin] + fw: SGTable<Owned<VVec<u8>>>, + /// Level 2 page table whose entries contain DMA addresses of firmware pages. + #[pin] + level2: SGTable<Owned<VVec<u8>>>, + /// Level 1 page table whose entries contain DMA addresses of level 2 pages. + #[pin] + level1: SGTable<Owned<VVec<u8>>>, + /// Level 0 page table (single 4KB page) with one entry: DMA address of first level 1 page. + level0: DmaObject, + /// Size in bytes of the firmware contained in [`Self::fw`]. + size: usize, + /// Device-mapped GSP signatures matching the GPU's [`Chipset`]. + signatures: DmaObject, + /// GSP bootloader, verifies the GSP firmware before loading and running it. + bootloader: RiscvFirmware, +} + +impl GspFirmware { + /// Loads the GSP firmware binaries, map them into `dev`'s address-space, and creates the page + /// tables expected by the GSP bootloader to load it. + pub(crate) fn new<'a, 'b>( + dev: &'a device::Device<device::Bound>, + chipset: Chipset, + ver: &'b str, + ) -> Result<impl PinInit<Self, Error> + 'a> { + let fw = super::request_firmware(dev, chipset, "gsp", ver)?; + + let fw_section = elf::elf64_section(fw.data(), ".fwimage").ok_or(EINVAL)?; + + let sigs_section = match chipset.arch() { + Architecture::Ampere => ".fwsignature_ga10x", + _ => return Err(ENOTSUPP), + }; + let signatures = elf::elf64_section(fw.data(), sigs_section) + .ok_or(EINVAL) + .and_then(|data| DmaObject::from_data(dev, data))?; + + let size = fw_section.len(); + + // Move the firmware into a vmalloc'd vector and map it into the device address + // space. + let fw_vvec = VVec::with_capacity(fw_section.len(), GFP_KERNEL) + .and_then(|mut v| { + v.extend_from_slice(fw_section, GFP_KERNEL)?; + Ok(v) + }) + .map_err(|_| ENOMEM)?; + + let bl = super::request_firmware(dev, chipset, "bootloader", ver)?; + let bootloader = RiscvFirmware::new(dev, &bl)?; + + Ok(try_pin_init!(Self { + fw <- SGTable::new(dev, fw_vvec, DataDirection::ToDevice, GFP_KERNEL), + level2 <- { + // Allocate the level 2 page table, map the firmware onto it, and map it into the + // device address space. + VVec::<u8>::with_capacity( + fw.iter().count() * core::mem::size_of::<u64>(), + GFP_KERNEL, + ) + .map_err(|_| ENOMEM) + .and_then(|level2| map_into_lvl(&fw, level2)) + .map(|level2| SGTable::new(dev, level2, DataDirection::ToDevice, GFP_KERNEL))? + }, + level1 <- { + // Allocate the level 1 page table, map the level 2 page table onto it, and map it + // into the device address space. + VVec::<u8>::with_capacity( + level2.iter().count() * core::mem::size_of::<u64>(), + GFP_KERNEL, + ) + .map_err(|_| ENOMEM) + .and_then(|level1| map_into_lvl(&level2, level1)) + .map(|level1| SGTable::new(dev, level1, DataDirection::ToDevice, GFP_KERNEL))? + }, + level0: { + // Allocate the level 0 page table as a device-visible DMA object, and map the + // level 1 page table onto it. + + // Level 0 page table data. + let mut level0_data = kvec![0u8; GSP_PAGE_SIZE]?; + + // Fill level 1 page entry. + #[allow(clippy::useless_conversion)] + let level1_entry = u64::from(level1.iter().next().unwrap().dma_address()); + let dst = &mut level0_data[..size_of_val(&level1_entry)]; + dst.copy_from_slice(&level1_entry.to_le_bytes()); + + // Turn the level0 page table into a [`DmaObject`]. + DmaObject::from_data(dev, &level0_data)? + }, + size, + signatures, + bootloader, + })) + } + + #[expect(unused)] + /// Returns the DMA handle of the radix3 level 0 page table. + pub(crate) fn radix3_dma_handle(&self) -> DmaAddress { + self.level0.dma_handle() + } +} + +/// Build a page table from a scatter-gather list. +/// +/// Takes each DMA-mapped region from `sg_table` and writes page table entries +/// for all 4KB pages within that region. For example, a 16KB SG entry becomes +/// 4 consecutive page table entries. +fn map_into_lvl(sg_table: &SGTable<Owned<VVec<u8>>>, mut dst: VVec<u8>) -> Result<VVec<u8>> { + for sg_entry in sg_table.iter() { + // Number of pages we need to map. + let num_pages = (sg_entry.dma_len() as usize).div_ceil(GSP_PAGE_SIZE); + + for i in 0..num_pages { + let entry = sg_entry.dma_address() + (i as u64 * GSP_PAGE_SIZE as u64); + dst.extend_from_slice(&entry.to_le_bytes(), GFP_KERNEL)?; + } + } + + Ok(dst) +} diff --git a/drivers/gpu/nova-core/firmware/riscv.rs b/drivers/gpu/nova-core/firmware/riscv.rs new file mode 100644 index 000000000000..afb08f5bc4ba --- /dev/null +++ b/drivers/gpu/nova-core/firmware/riscv.rs @@ -0,0 +1,91 @@ +// SPDX-License-Identifier: GPL-2.0 + +//! Support for firmware binaries designed to run on a RISC-V core. Such firmwares files have a +//! dedicated header. + +use core::mem::size_of; + +use kernel::device; +use kernel::firmware::Firmware; +use kernel::prelude::*; +use kernel::transmute::FromBytes; + +use crate::dma::DmaObject; +use crate::firmware::BinFirmware; + +/// Descriptor for microcode running on a RISC-V core. +#[repr(C)] +#[derive(Debug)] +struct RmRiscvUCodeDesc { + version: u32, + bootloader_offset: u32, + bootloader_size: u32, + bootloader_param_offset: u32, + bootloader_param_size: u32, + riscv_elf_offset: u32, + riscv_elf_size: u32, + app_version: u32, + manifest_offset: u32, + manifest_size: u32, + monitor_data_offset: u32, + monitor_data_size: u32, + monitor_code_offset: u32, + monitor_code_size: u32, +} + +// SAFETY: all bit patterns are valid for this type, and it doesn't use interior mutability. +unsafe impl FromBytes for RmRiscvUCodeDesc {} + +impl RmRiscvUCodeDesc { + /// Interprets the header of `bin_fw` as a [`RmRiscvUCodeDesc`] and returns it. + /// + /// Fails if the header pointed at by `bin_fw` is not within the bounds of the firmware image. + fn new(bin_fw: &BinFirmware<'_>) -> Result<Self> { + let offset = bin_fw.hdr.header_offset as usize; + + bin_fw + .fw + .get(offset..offset + size_of::<Self>()) + .and_then(Self::from_bytes_copy) + .ok_or(EINVAL) + } +} + +/// A parsed firmware for a RISC-V core, ready to be loaded and run. +#[expect(unused)] +pub(crate) struct RiscvFirmware { + /// Offset at which the code starts in the firmware image. + code_offset: u32, + /// Offset at which the data starts in the firmware image. + data_offset: u32, + /// Offset at which the manifest starts in the firmware image. + manifest_offset: u32, + /// Application version. + app_version: u32, + /// Device-mapped firmware image. + ucode: DmaObject, +} + +impl RiscvFirmware { + /// Parses the RISC-V firmware image contained in `fw`. + pub(crate) fn new(dev: &device::Device<device::Bound>, fw: &Firmware) -> Result<Self> { + let bin_fw = BinFirmware::new(fw)?; + + let riscv_desc = RmRiscvUCodeDesc::new(&bin_fw)?; + + let ucode = { + let start = bin_fw.hdr.data_offset as usize; + let len = bin_fw.hdr.data_size as usize; + + DmaObject::from_data(dev, fw.data().get(start..start + len).ok_or(EINVAL)?)? + }; + + Ok(Self { + ucode, + code_offset: riscv_desc.monitor_code_offset, + data_offset: riscv_desc.monitor_data_offset, + manifest_offset: riscv_desc.manifest_offset, + app_version: riscv_desc.app_version, + }) + } +} diff --git a/drivers/gpu/nova-core/gfw.rs b/drivers/gpu/nova-core/gfw.rs new file mode 100644 index 000000000000..8ac1ed187199 --- /dev/null +++ b/drivers/gpu/nova-core/gfw.rs @@ -0,0 +1,71 @@ +// SPDX-License-Identifier: GPL-2.0 + +//! GPU Firmware (`GFW`) support, a.k.a `devinit`. +//! +//! Upon reset, the GPU runs some firmware code from the BIOS to setup its core parameters. Most of +//! the GPU is considered unusable until this step is completed, so we must wait on it before +//! performing driver initialization. +//! +//! A clarification about devinit terminology: devinit is a sequence of register read/writes after +//! reset that performs tasks such as: +//! 1. Programming VRAM memory controller timings. +//! 2. Power sequencing. +//! 3. Clock and PLL configuration. +//! 4. Thermal management. +//! +//! devinit itself is a 'script' which is interpreted by an interpreter program typically running +//! on the PMU microcontroller. +//! +//! Note that the devinit sequence also needs to run during suspend/resume. + +use kernel::bindings; +use kernel::prelude::*; +use kernel::time::Delta; + +use crate::driver::Bar0; +use crate::regs; +use crate::util; + +/// Wait for the `GFW` (GPU firmware) boot completion signal (`GFW_BOOT`), or a 4 seconds timeout. +/// +/// Upon GPU reset, several microcontrollers (such as PMU, SEC2, GSP etc) run some firmware code to +/// setup its core parameters. Most of the GPU is considered unusable until this step is completed, +/// so it must be waited on very early during driver initialization. +/// +/// The `GFW` code includes several components that need to execute before the driver loads. These +/// components are located in the VBIOS ROM and executed in a sequence on these different +/// microcontrollers. The devinit sequence typically runs on the PMU, and the FWSEC runs on the +/// GSP. +/// +/// This function waits for a signal indicating that core initialization is complete. Before this +/// signal is received, little can be done with the GPU. This signal is set by the FWSEC running on +/// the GSP in Heavy-secured mode. +pub(crate) fn wait_gfw_boot_completion(bar: &Bar0) -> Result { + // Before accessing the completion status in `NV_PGC6_AON_SECURE_SCRATCH_GROUP_05`, we must + // first check `NV_PGC6_AON_SECURE_SCRATCH_GROUP_05_PRIV_LEVEL_MASK`. This is because + // `NV_PGC6_AON_SECURE_SCRATCH_GROUP_05` becomes accessible only after the secure firmware + // (FWSEC) lowers the privilege level to allow CPU (LS/Light-secured) access. We can only + // safely read the status register from CPU (LS/Light-secured) once the mask indicates + // that the privilege level has been lowered. + // + // TIMEOUT: arbitrarily large value. GFW starts running immediately after the GPU is put out of + // reset, and should complete in less time than that. + util::wait_on(Delta::from_secs(4), || { + // Check that FWSEC has lowered its protection level before reading the GFW_BOOT status. + let gfw_booted = regs::NV_PGC6_AON_SECURE_SCRATCH_GROUP_05_PRIV_LEVEL_MASK::read(bar) + .read_protection_level0() + && regs::NV_PGC6_AON_SECURE_SCRATCH_GROUP_05_0_GFW_BOOT::read(bar).completed(); + + if gfw_booted { + Some(()) + } else { + // TODO[DLAY]: replace with [1] once it merges. + // [1] https://lore.kernel.org/rust-for-linux/20250423192857.199712-6-fujita.tomonori@gmail.com/ + // + // SAFETY: `msleep()` is safe to call with any parameter. + unsafe { bindings::msleep(1) }; + + None + } + }) +} diff --git a/drivers/gpu/nova-core/gpu.rs b/drivers/gpu/nova-core/gpu.rs index 60b86f370284..af20e2daea24 100644 --- a/drivers/gpu/nova-core/gpu.rs +++ b/drivers/gpu/nova-core/gpu.rs @@ -1,12 +1,13 @@ // SPDX-License-Identifier: GPL-2.0 -use kernel::{device, devres::Devres, error::code::*, pci, prelude::*}; +use kernel::{device, devres::Devres, error::code::*, fmt, pci, prelude::*, sync::Arc}; use crate::driver::Bar0; -use crate::firmware::{Firmware, FIRMWARE_VERSION}; +use crate::falcon::{gsp::Gsp as GspFalcon, sec2::Sec2 as Sec2Falcon, Falcon}; +use crate::fb::SysmemFlush; +use crate::gfw; +use crate::gsp::Gsp; use crate::regs; -use crate::util; -use core::fmt; macro_rules! define_chipset { ({ $($variant:ident = $value:expr),* $(,)* }) => @@ -22,16 +23,26 @@ macro_rules! define_chipset { $( Chipset::$variant, )* ]; - pub(crate) const NAMES: [&'static str; Self::ALL.len()] = [ - $( util::const_bytes_to_str( - util::to_lowercase_bytes::<{ stringify!($variant).len() }>( - stringify!($variant) - ).as_slice() - ), )* - ]; + ::kernel::macros::paste!( + /// Returns the name of this chipset, in lowercase. + /// + /// # Examples + /// + /// ``` + /// let chipset = Chipset::GA102; + /// assert_eq!(chipset.name(), "ga102"); + /// ``` + pub(crate) const fn name(&self) -> &'static str { + match *self { + $( + Chipset::$variant => stringify!([<$variant:lower>]), + )* + } + } + ); } - // TODO replace with something like derive(FromPrimitive) + // TODO[FPRI]: replace with something like derive(FromPrimitive) impl TryFrom<u32> for Chipset { type Error = kernel::error::Error; @@ -161,31 +172,70 @@ impl Spec { pub(crate) struct Gpu { spec: Spec, /// MMIO mapping of PCI BAR 0 - bar: Devres<Bar0>, - fw: Firmware, + bar: Arc<Devres<Bar0>>, + /// System memory page required for flushing all pending GPU-side memory writes done through + /// PCIE into system memory, via sysmembar (A GPU-initiated HW memory-barrier operation). + sysmem_flush: SysmemFlush, + /// GSP falcon instance, used for GSP boot up and cleanup. + gsp_falcon: Falcon<GspFalcon>, + /// SEC2 falcon instance, used for GSP boot up and cleanup. + sec2_falcon: Falcon<Sec2Falcon>, + /// GSP runtime data. Temporarily an empty placeholder. + #[pin] + gsp: Gsp, } impl Gpu { - pub(crate) fn new( - pdev: &pci::Device<device::Bound>, - devres_bar: Devres<Bar0>, - ) -> Result<impl PinInit<Self>> { - let bar = devres_bar.access(pdev.as_ref())?; - let spec = Spec::new(bar)?; - let fw = Firmware::new(pdev.as_ref(), spec.chipset, FIRMWARE_VERSION)?; - - dev_info!( - pdev.as_ref(), - "NVIDIA (Chipset: {}, Architecture: {:?}, Revision: {})\n", - spec.chipset, - spec.chipset.arch(), - spec.revision - ); - - Ok(pin_init!(Self { - spec, + pub(crate) fn new<'a>( + pdev: &'a pci::Device<device::Bound>, + devres_bar: Arc<Devres<Bar0>>, + bar: &'a Bar0, + ) -> impl PinInit<Self, Error> + 'a { + try_pin_init!(Self { + spec: Spec::new(bar).inspect(|spec| { + dev_info!( + pdev.as_ref(), + "NVIDIA (Chipset: {}, Architecture: {:?}, Revision: {})\n", + spec.chipset, + spec.chipset.arch(), + spec.revision + ); + })?, + + // We must wait for GFW_BOOT completion before doing any significant setup on the GPU. + _: { + gfw::wait_gfw_boot_completion(bar) + .inspect_err(|_| dev_err!(pdev.as_ref(), "GFW boot did not complete"))?; + }, + + sysmem_flush: SysmemFlush::register(pdev.as_ref(), bar, spec.chipset)?, + + gsp_falcon: Falcon::new( + pdev.as_ref(), + spec.chipset, + bar, + spec.chipset > Chipset::GA100, + ) + .inspect(|falcon| falcon.clear_swgen0_intr(bar))?, + + sec2_falcon: Falcon::new(pdev.as_ref(), spec.chipset, bar, true)?, + + gsp <- Gsp::new(), + + _: { gsp.boot(pdev, bar, spec.chipset, gsp_falcon, sec2_falcon)? }, + bar: devres_bar, - fw - })) + }) + } + + /// Called when the corresponding [`Device`](device::Device) is unbound. + /// + /// Note: This method must only be called from `Driver::unbind`. + pub(crate) fn unbind(&self, dev: &device::Device<device::Core>) { + kernel::warn_on!(self + .bar + .access(dev) + .inspect(|bar| self.sysmem_flush.unregister(bar)) + .is_err()); } } diff --git a/drivers/gpu/nova-core/gsp.rs b/drivers/gpu/nova-core/gsp.rs new file mode 100644 index 000000000000..64e472e7a9d3 --- /dev/null +++ b/drivers/gpu/nova-core/gsp.rs @@ -0,0 +1,22 @@ +// SPDX-License-Identifier: GPL-2.0 + +mod boot; + +use kernel::prelude::*; + +mod fw; + +pub(crate) const GSP_PAGE_SHIFT: usize = 12; +pub(crate) const GSP_PAGE_SIZE: usize = 1 << GSP_PAGE_SHIFT; + +/// GSP runtime data. +/// +/// This is an empty pinned placeholder for now. +#[pin_data] +pub(crate) struct Gsp {} + +impl Gsp { + pub(crate) fn new() -> impl PinInit<Self> { + pin_init!(Self {}) + } +} diff --git a/drivers/gpu/nova-core/gsp/boot.rs b/drivers/gpu/nova-core/gsp/boot.rs new file mode 100644 index 000000000000..2800f3aee37d --- /dev/null +++ b/drivers/gpu/nova-core/gsp/boot.rs @@ -0,0 +1,137 @@ +// SPDX-License-Identifier: GPL-2.0 + +use kernel::device; +use kernel::pci; +use kernel::prelude::*; + +use crate::driver::Bar0; +use crate::falcon::{gsp::Gsp, sec2::Sec2, Falcon}; +use crate::fb::FbLayout; +use crate::firmware::{ + booter::{BooterFirmware, BooterKind}, + fwsec::{FwsecCommand, FwsecFirmware}, + gsp::GspFirmware, + FIRMWARE_VERSION, +}; +use crate::gpu::Chipset; +use crate::regs; +use crate::vbios::Vbios; + +impl super::Gsp { + /// Helper function to load and run the FWSEC-FRTS firmware and confirm that it has properly + /// created the WPR2 region. + fn run_fwsec_frts( + dev: &device::Device<device::Bound>, + falcon: &Falcon<Gsp>, + bar: &Bar0, + bios: &Vbios, + fb_layout: &FbLayout, + ) -> Result<()> { + // Check that the WPR2 region does not already exists - if it does, we cannot run + // FWSEC-FRTS until the GPU is reset. + if regs::NV_PFB_PRI_MMU_WPR2_ADDR_HI::read(bar).higher_bound() != 0 { + dev_err!( + dev, + "WPR2 region already exists - GPU needs to be reset to proceed\n" + ); + return Err(EBUSY); + } + + let fwsec_frts = FwsecFirmware::new( + dev, + falcon, + bar, + bios, + FwsecCommand::Frts { + frts_addr: fb_layout.frts.start, + frts_size: fb_layout.frts.end - fb_layout.frts.start, + }, + )?; + + // Run FWSEC-FRTS to create the WPR2 region. + fwsec_frts.run(dev, falcon, bar)?; + + // SCRATCH_E contains the error code for FWSEC-FRTS. + let frts_status = regs::NV_PBUS_SW_SCRATCH_0E_FRTS_ERR::read(bar).frts_err_code(); + if frts_status != 0 { + dev_err!( + dev, + "FWSEC-FRTS returned with error code {:#x}", + frts_status + ); + + return Err(EIO); + } + + // Check that the WPR2 region has been created as we requested. + let (wpr2_lo, wpr2_hi) = ( + regs::NV_PFB_PRI_MMU_WPR2_ADDR_LO::read(bar).lower_bound(), + regs::NV_PFB_PRI_MMU_WPR2_ADDR_HI::read(bar).higher_bound(), + ); + + match (wpr2_lo, wpr2_hi) { + (_, 0) => { + dev_err!(dev, "WPR2 region not created after running FWSEC-FRTS\n"); + + Err(EIO) + } + (wpr2_lo, _) if wpr2_lo != fb_layout.frts.start => { + dev_err!( + dev, + "WPR2 region created at unexpected address {:#x}; expected {:#x}\n", + wpr2_lo, + fb_layout.frts.start, + ); + + Err(EIO) + } + (wpr2_lo, wpr2_hi) => { + dev_dbg!(dev, "WPR2: {:#x}-{:#x}\n", wpr2_lo, wpr2_hi); + dev_dbg!(dev, "GPU instance built\n"); + + Ok(()) + } + } + } + + /// Attempt to boot the GSP. + /// + /// This is a GPU-dependent and complex procedure that involves loading firmware files from + /// user-space, patching them with signatures, and building firmware-specific intricate data + /// structures that the GSP will use at runtime. + /// + /// Upon return, the GSP is up and running, and its runtime object given as return value. + pub(crate) fn boot( + self: Pin<&mut Self>, + pdev: &pci::Device<device::Bound>, + bar: &Bar0, + chipset: Chipset, + gsp_falcon: &Falcon<Gsp>, + sec2_falcon: &Falcon<Sec2>, + ) -> Result { + let dev = pdev.as_ref(); + + let bios = Vbios::new(dev, bar)?; + + let _gsp_fw = KBox::pin_init( + GspFirmware::new(dev, chipset, FIRMWARE_VERSION)?, + GFP_KERNEL, + )?; + + let fb_layout = FbLayout::new(chipset, bar)?; + dev_dbg!(dev, "{:#x?}\n", fb_layout); + + Self::run_fwsec_frts(dev, gsp_falcon, bar, &bios, &fb_layout)?; + + let _booter_loader = BooterFirmware::new( + dev, + BooterKind::Loader, + chipset, + FIRMWARE_VERSION, + sec2_falcon, + bar, + )?; + + Ok(()) + } +} diff --git a/drivers/gpu/nova-core/gsp/fw.rs b/drivers/gpu/nova-core/gsp/fw.rs new file mode 100644 index 000000000000..34226dd00982 --- /dev/null +++ b/drivers/gpu/nova-core/gsp/fw.rs @@ -0,0 +1,7 @@ +// SPDX-License-Identifier: GPL-2.0 + +mod r570_144; + +// Alias to avoid repeating the version number with every use. +#[expect(unused)] +use r570_144 as bindings; diff --git a/drivers/gpu/nova-core/gsp/fw/r570_144.rs b/drivers/gpu/nova-core/gsp/fw/r570_144.rs new file mode 100644 index 000000000000..35cb0370a7c9 --- /dev/null +++ b/drivers/gpu/nova-core/gsp/fw/r570_144.rs @@ -0,0 +1,29 @@ +// SPDX-License-Identifier: GPL-2.0 + +//! Firmware bindings. +//! +//! Imports the generated bindings by `bindgen`. +//! +//! This module may not be directly used. Please abstract or re-export the needed symbols in the +//! parent module instead. + +#![cfg_attr(test, allow(deref_nullptr))] +#![cfg_attr(test, allow(unaligned_references))] +#![cfg_attr(test, allow(unsafe_op_in_unsafe_fn))] +#![allow( + dead_code, + unused_imports, + clippy::all, + clippy::undocumented_unsafe_blocks, + clippy::ptr_as_ptr, + clippy::ref_as_ptr, + missing_docs, + non_camel_case_types, + non_upper_case_globals, + non_snake_case, + improper_ctypes, + unreachable_pub, + unsafe_op_in_unsafe_fn +)] +use kernel::ffi; +include!("r570_144/bindings.rs"); diff --git a/drivers/gpu/nova-core/gsp/fw/r570_144/bindings.rs b/drivers/gpu/nova-core/gsp/fw/r570_144/bindings.rs new file mode 100644 index 000000000000..cec594032515 --- /dev/null +++ b/drivers/gpu/nova-core/gsp/fw/r570_144/bindings.rs @@ -0,0 +1 @@ +// SPDX-License-Identifier: GPL-2.0 diff --git a/drivers/gpu/nova-core/nova_core.rs b/drivers/gpu/nova-core/nova_core.rs index 618632f0abcc..fffcaee2249f 100644 --- a/drivers/gpu/nova-core/nova_core.rs +++ b/drivers/gpu/nova-core/nova_core.rs @@ -2,18 +2,24 @@ //! Nova Core GPU Driver +mod dma; mod driver; +mod falcon; +mod fb; mod firmware; +mod gfw; mod gpu; +mod gsp; mod regs; mod util; +mod vbios; pub(crate) const MODULE_NAME: &kernel::str::CStr = <LocalModule as kernel::ModuleMetadata>::NAME; kernel::module_pci_driver! { type: driver::NovaCore, name: "NovaCore", - author: "Danilo Krummrich", + authors: ["Danilo Krummrich"], description: "Nova Core GPU driver", license: "GPL v2", firmware: [], diff --git a/drivers/gpu/nova-core/regs.rs b/drivers/gpu/nova-core/regs.rs index 5a1273230306..206dab2e1335 100644 --- a/drivers/gpu/nova-core/regs.rs +++ b/drivers/gpu/nova-core/regs.rs @@ -5,12 +5,16 @@ #![allow(non_camel_case_types)] #[macro_use] -mod macros; +pub(crate) mod macros; +use crate::falcon::{ + DmaTrfCmdSize, FalconCoreRev, FalconCoreRevSubversion, FalconFbifMemType, FalconFbifTarget, + FalconModSelAlgo, FalconSecurityModel, PFalcon2Base, PFalconBase, PeregrineCoreSelect, +}; use crate::gpu::{Architecture, Chipset}; use kernel::prelude::*; -/* PMC */ +// PMC register!(NV_PMC_BOOT_0 @ 0x00000000, "Basic revision information about the GPU" { 3:0 minor_revision as u8, "Minor revision of the chip"; @@ -24,7 +28,7 @@ impl NV_PMC_BOOT_0 { /// Combines `architecture_0` and `architecture_1` to obtain the architecture of the chip. pub(crate) fn architecture(self) -> Result<Architecture> { Architecture::try_from( - self.architecture_0() | (self.architecture_1() << Self::ARCHITECTURE_0.len()), + self.architecture_0() | (self.architecture_1() << Self::ARCHITECTURE_0_RANGE.len()), ) } @@ -32,8 +36,311 @@ impl NV_PMC_BOOT_0 { pub(crate) fn chipset(self) -> Result<Chipset> { self.architecture() .map(|arch| { - ((arch as u32) << Self::IMPLEMENTATION.len()) | self.implementation() as u32 + ((arch as u32) << Self::IMPLEMENTATION_RANGE.len()) + | u32::from(self.implementation()) }) .and_then(Chipset::try_from) } } + +// PBUS + +register!(NV_PBUS_SW_SCRATCH @ 0x00001400[64] {}); + +register!(NV_PBUS_SW_SCRATCH_0E_FRTS_ERR => NV_PBUS_SW_SCRATCH[0xe], + "scratch register 0xe used as FRTS firmware error code" { + 31:16 frts_err_code as u16; +}); + +// PFB + +// The following two registers together hold the physical system memory address that is used by the +// GPU to perform sysmembar operations (see `fb::SysmemFlush`). + +register!(NV_PFB_NISO_FLUSH_SYSMEM_ADDR @ 0x00100c10 { + 31:0 adr_39_08 as u32; +}); + +register!(NV_PFB_NISO_FLUSH_SYSMEM_ADDR_HI @ 0x00100c40 { + 23:0 adr_63_40 as u32; +}); + +register!(NV_PFB_PRI_MMU_LOCAL_MEMORY_RANGE @ 0x00100ce0 { + 3:0 lower_scale as u8; + 9:4 lower_mag as u8; + 30:30 ecc_mode_enabled as bool; +}); + +impl NV_PFB_PRI_MMU_LOCAL_MEMORY_RANGE { + /// Returns the usable framebuffer size, in bytes. + pub(crate) fn usable_fb_size(self) -> u64 { + let size = (u64::from(self.lower_mag()) << u64::from(self.lower_scale())) + * kernel::sizes::SZ_1M as u64; + + if self.ecc_mode_enabled() { + // Remove the amount of memory reserved for ECC (one per 16 units). + size / 16 * 15 + } else { + size + } + } +} + +register!(NV_PFB_PRI_MMU_WPR2_ADDR_LO@0x001fa824 { + 31:4 lo_val as u32, "Bits 12..40 of the lower (inclusive) bound of the WPR2 region"; +}); + +impl NV_PFB_PRI_MMU_WPR2_ADDR_LO { + /// Returns the lower (inclusive) bound of the WPR2 region. + pub(crate) fn lower_bound(self) -> u64 { + u64::from(self.lo_val()) << 12 + } +} + +register!(NV_PFB_PRI_MMU_WPR2_ADDR_HI@0x001fa828 { + 31:4 hi_val as u32, "Bits 12..40 of the higher (exclusive) bound of the WPR2 region"; +}); + +impl NV_PFB_PRI_MMU_WPR2_ADDR_HI { + /// Returns the higher (exclusive) bound of the WPR2 region. + /// + /// A value of zero means the WPR2 region is not set. + pub(crate) fn higher_bound(self) -> u64 { + u64::from(self.hi_val()) << 12 + } +} + +// PGC6 register space. +// +// `GC6` is a GPU low-power state where VRAM is in self-refresh and the GPU is powered down (except +// for power rails needed to keep self-refresh working and important registers and hardware +// blocks). +// +// These scratch registers remain powered on even in a low-power state and have a designated group +// number. + +// Privilege level mask register. It dictates whether the host CPU has privilege to access the +// `PGC6_AON_SECURE_SCRATCH_GROUP_05` register (which it needs to read GFW_BOOT). +register!(NV_PGC6_AON_SECURE_SCRATCH_GROUP_05_PRIV_LEVEL_MASK @ 0x00118128, + "Privilege level mask register" { + 0:0 read_protection_level0 as bool, "Set after FWSEC lowers its protection level"; +}); + +// OpenRM defines this as a register array, but doesn't specify its size and only uses its first +// element. Be conservative until we know the actual size or need to use more registers. +register!(NV_PGC6_AON_SECURE_SCRATCH_GROUP_05 @ 0x00118234[1] {}); + +register!( + NV_PGC6_AON_SECURE_SCRATCH_GROUP_05_0_GFW_BOOT => NV_PGC6_AON_SECURE_SCRATCH_GROUP_05[0], + "Scratch group 05 register 0 used as GFW boot progress indicator" { + 7:0 progress as u8, "Progress of GFW boot (0xff means completed)"; + } +); + +impl NV_PGC6_AON_SECURE_SCRATCH_GROUP_05_0_GFW_BOOT { + /// Returns `true` if GFW boot is completed. + pub(crate) fn completed(self) -> bool { + self.progress() == 0xff + } +} + +register!(NV_PGC6_AON_SECURE_SCRATCH_GROUP_42 @ 0x001183a4 { + 31:0 value as u32; +}); + +register!( + NV_USABLE_FB_SIZE_IN_MB => NV_PGC6_AON_SECURE_SCRATCH_GROUP_42, + "Scratch group 42 register used as framebuffer size" { + 31:0 value as u32, "Usable framebuffer size, in megabytes"; + } +); + +impl NV_USABLE_FB_SIZE_IN_MB { + /// Returns the usable framebuffer size, in bytes. + pub(crate) fn usable_fb_size(self) -> u64 { + u64::from(self.value()) * kernel::sizes::SZ_1M as u64 + } +} + +// PDISP + +register!(NV_PDISP_VGA_WORKSPACE_BASE @ 0x00625f04 { + 3:3 status_valid as bool, "Set if the `addr` field is valid"; + 31:8 addr as u32, "VGA workspace base address divided by 0x10000"; +}); + +impl NV_PDISP_VGA_WORKSPACE_BASE { + /// Returns the base address of the VGA workspace, or `None` if none exists. + pub(crate) fn vga_workspace_addr(self) -> Option<u64> { + if self.status_valid() { + Some(u64::from(self.addr()) << 16) + } else { + None + } + } +} + +// FUSE + +pub(crate) const NV_FUSE_OPT_FPF_SIZE: usize = 16; + +register!(NV_FUSE_OPT_FPF_NVDEC_UCODE1_VERSION @ 0x00824100[NV_FUSE_OPT_FPF_SIZE] { + 15:0 data as u16; +}); + +register!(NV_FUSE_OPT_FPF_SEC2_UCODE1_VERSION @ 0x00824140[NV_FUSE_OPT_FPF_SIZE] { + 15:0 data as u16; +}); + +register!(NV_FUSE_OPT_FPF_GSP_UCODE1_VERSION @ 0x008241c0[NV_FUSE_OPT_FPF_SIZE] { + 15:0 data as u16; +}); + +// PFALCON + +register!(NV_PFALCON_FALCON_IRQSCLR @ PFalconBase[0x00000004] { + 4:4 halt as bool; + 6:6 swgen0 as bool; +}); + +register!(NV_PFALCON_FALCON_MAILBOX0 @ PFalconBase[0x00000040] { + 31:0 value as u32; +}); + +register!(NV_PFALCON_FALCON_MAILBOX1 @ PFalconBase[0x00000044] { + 31:0 value as u32; +}); + +register!(NV_PFALCON_FALCON_RM @ PFalconBase[0x00000084] { + 31:0 value as u32; +}); + +register!(NV_PFALCON_FALCON_HWCFG2 @ PFalconBase[0x000000f4] { + 10:10 riscv as bool; + 12:12 mem_scrubbing as bool, "Set to 0 after memory scrubbing is completed"; + 31:31 reset_ready as bool, "Signal indicating that reset is completed (GA102+)"; +}); + +impl NV_PFALCON_FALCON_HWCFG2 { + /// Returns `true` if memory scrubbing is completed. + pub(crate) fn mem_scrubbing_done(self) -> bool { + !self.mem_scrubbing() + } +} + +register!(NV_PFALCON_FALCON_CPUCTL @ PFalconBase[0x00000100] { + 1:1 startcpu as bool; + 4:4 halted as bool; + 6:6 alias_en as bool; +}); + +register!(NV_PFALCON_FALCON_BOOTVEC @ PFalconBase[0x00000104] { + 31:0 value as u32; +}); + +register!(NV_PFALCON_FALCON_DMACTL @ PFalconBase[0x0000010c] { + 0:0 require_ctx as bool; + 1:1 dmem_scrubbing as bool; + 2:2 imem_scrubbing as bool; + 6:3 dmaq_num as u8; + 7:7 secure_stat as bool; +}); + +register!(NV_PFALCON_FALCON_DMATRFBASE @ PFalconBase[0x00000110] { + 31:0 base as u32; +}); + +register!(NV_PFALCON_FALCON_DMATRFMOFFS @ PFalconBase[0x00000114] { + 23:0 offs as u32; +}); + +register!(NV_PFALCON_FALCON_DMATRFCMD @ PFalconBase[0x00000118] { + 0:0 full as bool; + 1:1 idle as bool; + 3:2 sec as u8; + 4:4 imem as bool; + 5:5 is_write as bool; + 10:8 size as u8 ?=> DmaTrfCmdSize; + 14:12 ctxdma as u8; + 16:16 set_dmtag as u8; +}); + +register!(NV_PFALCON_FALCON_DMATRFFBOFFS @ PFalconBase[0x0000011c] { + 31:0 offs as u32; +}); + +register!(NV_PFALCON_FALCON_DMATRFBASE1 @ PFalconBase[0x00000128] { + 8:0 base as u16; +}); + +register!(NV_PFALCON_FALCON_HWCFG1 @ PFalconBase[0x0000012c] { + 3:0 core_rev as u8 ?=> FalconCoreRev, "Core revision"; + 5:4 security_model as u8 ?=> FalconSecurityModel, "Security model"; + 7:6 core_rev_subversion as u8 ?=> FalconCoreRevSubversion, "Core revision subversion"; +}); + +register!(NV_PFALCON_FALCON_CPUCTL_ALIAS @ PFalconBase[0x00000130] { + 1:1 startcpu as bool; +}); + +// Actually known as `NV_PSEC_FALCON_ENGINE` and `NV_PGSP_FALCON_ENGINE` depending on the falcon +// instance. +register!(NV_PFALCON_FALCON_ENGINE @ PFalconBase[0x000003c0] { + 0:0 reset as bool; +}); + +register!(NV_PFALCON_FBIF_TRANSCFG @ PFalconBase[0x00000600[8]] { + 1:0 target as u8 ?=> FalconFbifTarget; + 2:2 mem_type as bool => FalconFbifMemType; +}); + +register!(NV_PFALCON_FBIF_CTL @ PFalconBase[0x00000624] { + 7:7 allow_phys_no_ctx as bool; +}); + +/* PFALCON2 */ + +register!(NV_PFALCON2_FALCON_MOD_SEL @ PFalcon2Base[0x00000180] { + 7:0 algo as u8 ?=> FalconModSelAlgo; +}); + +register!(NV_PFALCON2_FALCON_BROM_CURR_UCODE_ID @ PFalcon2Base[0x00000198] { + 7:0 ucode_id as u8; +}); + +register!(NV_PFALCON2_FALCON_BROM_ENGIDMASK @ PFalcon2Base[0x0000019c] { + 31:0 value as u32; +}); + +// OpenRM defines this as a register array, but doesn't specify its size and only uses its first +// element. Be conservative until we know the actual size or need to use more registers. +register!(NV_PFALCON2_FALCON_BROM_PARAADDR @ PFalcon2Base[0x00000210[1]] { + 31:0 value as u32; +}); + +// PRISCV + +register!(NV_PRISCV_RISCV_BCR_CTRL @ PFalconBase[0x00001668] { + 0:0 valid as bool; + 4:4 core_select as bool => PeregrineCoreSelect; + 8:8 br_fetch as bool; +}); + +// The modules below provide registers that are not identical on all supported chips. They should +// only be used in HAL modules. + +pub(crate) mod gm107 { + // FUSE + + register!(NV_FUSE_STATUS_OPT_DISPLAY @ 0x00021c04 { + 0:0 display_disabled as bool; + }); +} + +pub(crate) mod ga100 { + // FUSE + + register!(NV_FUSE_STATUS_OPT_DISPLAY @ 0x00820c04 { + 0:0 display_disabled as bool; + }); +} diff --git a/drivers/gpu/nova-core/regs/macros.rs b/drivers/gpu/nova-core/regs/macros.rs index 7ecc70efb3cd..8058e1696df9 100644 --- a/drivers/gpu/nova-core/regs/macros.rs +++ b/drivers/gpu/nova-core/regs/macros.rs @@ -1,17 +1,27 @@ // SPDX-License-Identifier: GPL-2.0 -//! Macro to define register layout and accessors. +//! `register!` macro to define register layout and accessors. //! //! A single register typically includes several fields, which are accessed through a combination //! of bit-shift and mask operations that introduce a class of potential mistakes, notably because //! not all possible field values are necessarily valid. //! -//! The macro in this module allow to define, using an intruitive and readable syntax, a dedicated -//! type for each register with its own field accessors that can return an error is a field's value -//! is invalid. +//! The `register!` macro in this module provides an intuitive and readable syntax for defining a +//! dedicated type for each register. Each such type comes with its own field accessors that can +//! return an error if a field's value is invalid. -/// Defines a dedicated type for a register with an absolute offset, alongside with getter and -/// setter methods for its fields and methods to read and write it from an `Io` region. +/// Trait providing a base address to be added to the offset of a relative register to obtain +/// its actual offset. +/// +/// The `T` generic argument is used to distinguish which base to use, in case a type provides +/// several bases. It is given to the `register!` macro to restrict the use of the register to +/// implementors of this particular variant. +pub(crate) trait RegisterBase<T> { + const BASE: usize; +} + +/// Defines a dedicated type for a register with an absolute offset, including getter and setter +/// methods for its fields and methods to read and write it from an `Io` region. /// /// Example: /// @@ -24,7 +34,7 @@ /// ``` /// /// This defines a `BOOT_0` type which can be read or written from offset `0x100` of an `Io` -/// region. It is composed of 3 fields, for instance `minor_revision` is made of the 4 less +/// region. It is composed of 3 fields, for instance `minor_revision` is made of the 4 least /// significant bits of the register. Each field can be accessed and modified using accessor /// methods: /// @@ -33,88 +43,345 @@ /// let boot0 = BOOT_0::read(&bar); /// pr_info!("chip revision: {}.{}", boot0.major_revision(), boot0.minor_revision()); /// -/// // `Chipset::try_from` will be called with the value of the field and returns an error if the -/// // value is invalid. +/// // `Chipset::try_from` is called with the value of the `chipset` field and returns an +/// // error if it is invalid. /// let chipset = boot0.chipset()?; /// /// // Update some fields and write the value back. /// boot0.set_major_revision(3).set_minor_revision(10).write(&bar); /// -/// // Or just read and update the register in a single step: +/// // Or, just read and update the register in a single step: /// BOOT_0::alter(&bar, |r| r.set_major_revision(3).set_minor_revision(10)); /// ``` /// -/// Fields can be defined as follows: +/// Fields are defined as follows: /// -/// - `as <type>` simply returns the field value casted as the requested integer type, typically -/// `u32`, `u16`, `u8` or `bool`. Note that `bool` fields must have a range of 1 bit. +/// - `as <type>` simply returns the field value casted to <type>, typically `u32`, `u16`, `u8` or +/// `bool`. Note that `bool` fields must have a range of 1 bit. /// - `as <type> => <into_type>` calls `<into_type>`'s `From::<<type>>` implementation and returns /// the result. /// - `as <type> ?=> <try_into_type>` calls `<try_into_type>`'s `TryFrom::<<type>>` implementation -/// and returns the result. This is useful on fields for which not all values are value. +/// and returns the result. This is useful with fields for which not all values are valid. /// /// The documentation strings are optional. If present, they will be added to the type's /// definition, or the field getter and setter methods they are attached to. /// -/// Putting a `+` before the address of the register makes it relative to a base: the `read` and -/// `write` methods take a `base` argument that is added to the specified address before access, -/// and `try_read` and `try_write` methods are also created, allowing access with offsets unknown -/// at compile-time: +/// It is also possible to create a alias register by using the `=> ALIAS` syntax. This is useful +/// for cases where a register's interpretation depends on the context: +/// +/// ```no_run +/// register!(SCRATCH @ 0x00000200, "Scratch register" { +/// 31:0 value as u32, "Raw value"; +/// }); +/// +/// register!(SCRATCH_BOOT_STATUS => SCRATCH, "Boot status of the firmware" { +/// 0:0 completed as bool, "Whether the firmware has completed booting"; +/// }); +/// ``` +/// +/// In this example, `SCRATCH_0_BOOT_STATUS` uses the same I/O address as `SCRATCH`, while also +/// providing its own `completed` field. +/// +/// ## Relative registers +/// +/// A register can be defined as being accessible from a fixed offset of a provided base. For +/// instance, imagine the following I/O space: +/// +/// ```text +/// +-----------------------------+ +/// | ... | +/// | | +/// 0x100--->+------------CPU0-------------+ +/// | | +/// 0x110--->+-----------------------------+ +/// | CPU_CTL | +/// +-----------------------------+ +/// | ... | +/// | | +/// | | +/// 0x200--->+------------CPU1-------------+ +/// | | +/// 0x210--->+-----------------------------+ +/// | CPU_CTL | +/// +-----------------------------+ +/// | ... | +/// +-----------------------------+ +/// ``` +/// +/// `CPU0` and `CPU1` both have a `CPU_CTL` register that starts at offset `0x10` of their I/O +/// space segment. Since both instances of `CPU_CTL` share the same layout, we don't want to define +/// them twice and would prefer a way to select which one to use from a single definition +/// +/// This can be done using the `Base[Offset]` syntax when specifying the register's address. +/// +/// `Base` is an arbitrary type (typically a ZST) to be used as a generic parameter of the +/// [`RegisterBase`] trait to provide the base as a constant, i.e. each type providing a base for +/// this register needs to implement `RegisterBase<Base>`. Here is the above example translated +/// into code: +/// +/// ```no_run +/// // Type used to identify the base. +/// pub(crate) struct CpuCtlBase; +/// +/// // ZST describing `CPU0`. +/// struct Cpu0; +/// impl RegisterBase<CpuCtlBase> for Cpu0 { +/// const BASE: usize = 0x100; +/// } +/// // Singleton of `CPU0` used to identify it. +/// const CPU0: Cpu0 = Cpu0; +/// +/// // ZST describing `CPU1`. +/// struct Cpu1; +/// impl RegisterBase<CpuCtlBase> for Cpu1 { +/// const BASE: usize = 0x200; +/// } +/// // Singleton of `CPU1` used to identify it. +/// const CPU1: Cpu1 = Cpu1; +/// +/// // This makes `CPU_CTL` accessible from all implementors of `RegisterBase<CpuCtlBase>`. +/// register!(CPU_CTL @ CpuCtlBase[0x10], "CPU core control" { +/// 0:0 start as bool, "Start the CPU core"; +/// }); +/// +/// // The `read`, `write` and `alter` methods of relative registers take an extra `base` argument +/// // that is used to resolve its final address by adding its `BASE` to the offset of the +/// // register. +/// +/// // Start `CPU0`. +/// CPU_CTL::alter(bar, &CPU0, |r| r.set_start(true)); +/// +/// // Start `CPU1`. +/// CPU_CTL::alter(bar, &CPU1, |r| r.set_start(true)); +/// +/// // Aliases can also be defined for relative register. +/// register!(CPU_CTL_ALIAS => CpuCtlBase[CPU_CTL], "Alias to CPU core control" { +/// 1:1 alias_start as bool, "Start the aliased CPU core"; +/// }); +/// +/// // Start the aliased `CPU0`. +/// CPU_CTL_ALIAS::alter(bar, &CPU0, |r| r.set_alias_start(true)); +/// ``` +/// +/// ## Arrays of registers +/// +/// Some I/O areas contain consecutive values that can be interpreted in the same way. These areas +/// can be defined as an array of identical registers, allowing them to be accessed by index with +/// compile-time or runtime bound checking. Simply define their address as `Address[Size]`, and add +/// an `idx` parameter to their `read`, `write` and `alter` methods: /// /// ```no_run -/// register!(CPU_CTL @ +0x0000010, "CPU core control" { -/// 0:0 start as bool, "Start the CPU core"; +/// # fn no_run() -> Result<(), Error> { +/// # fn get_scratch_idx() -> usize { +/// # 0x15 +/// # } +/// // Array of 64 consecutive registers with the same layout starting at offset `0x80`. +/// register!(SCRATCH @ 0x00000080[64], "Scratch registers" { +/// 31:0 value as u32; /// }); /// -/// // Flip the `start` switch for the CPU core which base address is at `CPU_BASE`. -/// let cpuctl = CPU_CTL::read(&bar, CPU_BASE); -/// pr_info!("CPU CTL: {:#x}", cpuctl); -/// cpuctl.set_start(true).write(&bar, CPU_BASE); +/// // Read scratch register 0, i.e. I/O address `0x80`. +/// let scratch_0 = SCRATCH::read(bar, 0).value(); +/// // Read scratch register 15, i.e. I/O address `0x80 + (15 * 4)`. +/// let scratch_15 = SCRATCH::read(bar, 15).value(); +/// +/// // This is out of bounds and won't build. +/// // let scratch_128 = SCRATCH::read(bar, 128).value(); +/// +/// // Runtime-obtained array index. +/// let scratch_idx = get_scratch_idx(); +/// // Access on a runtime index returns an error if it is out-of-bounds. +/// let some_scratch = SCRATCH::try_read(bar, scratch_idx)?.value(); +/// +/// // Alias to a particular register in an array. +/// // Here `SCRATCH[8]` is used to convey the firmware exit code. +/// register!(FIRMWARE_STATUS => SCRATCH[8], "Firmware exit status code" { +/// 7:0 status as u8; +/// }); +/// +/// let status = FIRMWARE_STATUS::read(bar).status(); +/// +/// // Non-contiguous register arrays can be defined by adding a stride parameter. +/// // Here, each of the 16 registers of the array are separated by 8 bytes, meaning that the +/// // registers of the two declarations below are interleaved. +/// register!(SCRATCH_INTERLEAVED_0 @ 0x000000c0[16 ; 8], "Scratch registers bank 0" { +/// 31:0 value as u32; +/// }); +/// register!(SCRATCH_INTERLEAVED_1 @ 0x000000c4[16 ; 8], "Scratch registers bank 1" { +/// 31:0 value as u32; +/// }); +/// # Ok(()) +/// # } +/// ``` +/// +/// ## Relative arrays of registers +/// +/// Combining the two features described in the sections above, arrays of registers accessible from +/// a base can also be defined: +/// +/// ```no_run +/// # fn no_run() -> Result<(), Error> { +/// # fn get_scratch_idx() -> usize { +/// # 0x15 +/// # } +/// // Type used as parameter of `RegisterBase` to specify the base. +/// pub(crate) struct CpuCtlBase; +/// +/// // ZST describing `CPU0`. +/// struct Cpu0; +/// impl RegisterBase<CpuCtlBase> for Cpu0 { +/// const BASE: usize = 0x100; +/// } +/// // Singleton of `CPU0` used to identify it. +/// const CPU0: Cpu0 = Cpu0; +/// +/// // ZST describing `CPU1`. +/// struct Cpu1; +/// impl RegisterBase<CpuCtlBase> for Cpu1 { +/// const BASE: usize = 0x200; +/// } +/// // Singleton of `CPU1` used to identify it. +/// const CPU1: Cpu1 = Cpu1; +/// +/// // 64 per-cpu scratch registers, arranged as an contiguous array. +/// register!(CPU_SCRATCH @ CpuCtlBase[0x00000080[64]], "Per-CPU scratch registers" { +/// 31:0 value as u32; +/// }); +/// +/// let cpu0_scratch_0 = CPU_SCRATCH::read(bar, &Cpu0, 0).value(); +/// let cpu1_scratch_15 = CPU_SCRATCH::read(bar, &Cpu1, 15).value(); +/// +/// // This won't build. +/// // let cpu0_scratch_128 = CPU_SCRATCH::read(bar, &Cpu0, 128).value(); +/// +/// // Runtime-obtained array index. +/// let scratch_idx = get_scratch_idx(); +/// // Access on a runtime value returns an error if it is out-of-bounds. +/// let cpu0_some_scratch = CPU_SCRATCH::try_read(bar, &Cpu0, scratch_idx)?.value(); +/// +/// // `SCRATCH[8]` is used to convey the firmware exit code. +/// register!(CPU_FIRMWARE_STATUS => CpuCtlBase[CPU_SCRATCH[8]], +/// "Per-CPU firmware exit status code" { +/// 7:0 status as u8; +/// }); +/// +/// let cpu0_status = CPU_FIRMWARE_STATUS::read(bar, &Cpu0).status(); +/// +/// // Non-contiguous register arrays can be defined by adding a stride parameter. +/// // Here, each of the 16 registers of the array are separated by 8 bytes, meaning that the +/// // registers of the two declarations below are interleaved. +/// register!(CPU_SCRATCH_INTERLEAVED_0 @ CpuCtlBase[0x00000d00[16 ; 8]], +/// "Scratch registers bank 0" { +/// 31:0 value as u32; +/// }); +/// register!(CPU_SCRATCH_INTERLEAVED_1 @ CpuCtlBase[0x00000d04[16 ; 8]], +/// "Scratch registers bank 1" { +/// 31:0 value as u32; +/// }); +/// # Ok(()) +/// # } /// ``` macro_rules! register { // Creates a register at a fixed offset of the MMIO space. + ($name:ident @ $offset:literal $(, $comment:literal)? { $($fields:tt)* } ) => { + register!(@core $name $(, $comment)? { $($fields)* } ); + register!(@io_fixed $name @ $offset); + }; + + // Creates an alias register of fixed offset register `alias` with its own fields. + ($name:ident => $alias:ident $(, $comment:literal)? { $($fields:tt)* } ) => { + register!(@core $name $(, $comment)? { $($fields)* } ); + register!(@io_fixed $name @ $alias::OFFSET); + }; + + // Creates a register at a relative offset from a base address provider. + ($name:ident @ $base:ty [ $offset:literal ] $(, $comment:literal)? { $($fields:tt)* } ) => { + register!(@core $name $(, $comment)? { $($fields)* } ); + register!(@io_relative $name @ $base [ $offset ]); + }; + + // Creates an alias register of relative offset register `alias` with its own fields. + ($name:ident => $base:ty [ $alias:ident ] $(, $comment:literal)? { $($fields:tt)* }) => { + register!(@core $name $(, $comment)? { $($fields)* } ); + register!(@io_relative $name @ $base [ $alias::OFFSET ]); + }; + + // Creates an array of registers at a fixed offset of the MMIO space. ( - $name:ident @ $offset:literal $(, $comment:literal)? { + $name:ident @ $offset:literal [ $size:expr ; $stride:expr ] $(, $comment:literal)? { $($fields:tt)* } ) => { - register!(@common $name $(, $comment)?); - register!(@field_accessors $name { $($fields)* }); - register!(@io $name @ $offset); + static_assert!(::core::mem::size_of::<u32>() <= $stride); + register!(@core $name $(, $comment)? { $($fields)* } ); + register!(@io_array $name @ $offset [ $size ; $stride ]); }; - // Creates a register at a relative offset from a base address. + // Shortcut for contiguous array of registers (stride == size of element). ( - $name:ident @ + $offset:literal $(, $comment:literal)? { + $name:ident @ $offset:literal [ $size:expr ] $(, $comment:literal)? { $($fields:tt)* } ) => { - register!(@common $name $(, $comment)?); - register!(@field_accessors $name { $($fields)* }); - register!(@io$name @ + $offset); + register!($name @ $offset [ $size ; ::core::mem::size_of::<u32>() ] $(, $comment)? { + $($fields)* + } ); }; - // Defines the wrapper `$name` type, as well as its relevant implementations (`Debug`, `BitOr`, - // and conversion to regular `u32`). - (@common $name:ident $(, $comment:literal)?) => { + // Creates an array of registers at a relative offset from a base address provider. + ( + $name:ident @ $base:ty [ $offset:literal [ $size:expr ; $stride:expr ] ] + $(, $comment:literal)? { $($fields:tt)* } + ) => { + static_assert!(::core::mem::size_of::<u32>() <= $stride); + register!(@core $name $(, $comment)? { $($fields)* } ); + register!(@io_relative_array $name @ $base [ $offset [ $size ; $stride ] ]); + }; + + // Shortcut for contiguous array of relative registers (stride == size of element). + ( + $name:ident @ $base:ty [ $offset:literal [ $size:expr ] ] $(, $comment:literal)? { + $($fields:tt)* + } + ) => { + register!($name @ $base [ $offset [ $size ; ::core::mem::size_of::<u32>() ] ] + $(, $comment)? { $($fields)* } ); + }; + + // Creates an alias of register `idx` of relative array of registers `alias` with its own + // fields. + ( + $name:ident => $base:ty [ $alias:ident [ $idx:expr ] ] $(, $comment:literal)? { + $($fields:tt)* + } + ) => { + static_assert!($idx < $alias::SIZE); + register!(@core $name $(, $comment)? { $($fields)* } ); + register!(@io_relative $name @ $base [ $alias::OFFSET + $idx * $alias::STRIDE ] ); + }; + + // Creates an alias of register `idx` of array of registers `alias` with its own fields. + // This rule belongs to the (non-relative) register arrays set, but needs to be put last + // to avoid it being interpreted in place of the relative register array alias rule. + ($name:ident => $alias:ident [ $idx:expr ] $(, $comment:literal)? { $($fields:tt)* }) => { + static_assert!($idx < $alias::SIZE); + register!(@core $name $(, $comment)? { $($fields)* } ); + register!(@io_fixed $name @ $alias::OFFSET + $idx * $alias::STRIDE ); + }; + + // All rules below are helpers. + + // Defines the wrapper `$name` type, as well as its relevant implementations (`Debug`, + // `Default`, `BitOr`, and conversion to the value type) and field accessor methods. + (@core $name:ident $(, $comment:literal)? { $($fields:tt)* }) => { $( #[doc=$comment] )? #[repr(transparent)] - #[derive(Clone, Copy, Default)] + #[derive(Clone, Copy)] pub(crate) struct $name(u32); - // TODO: display the raw hex value, then the value of all the fields. This requires - // matching the fields, which will complexify the syntax considerably... - impl ::core::fmt::Debug for $name { - fn fmt(&self, f: &mut ::core::fmt::Formatter<'_>) -> ::core::fmt::Result { - f.debug_tuple(stringify!($name)) - .field(&format_args!("0x{0:x}", &self.0)) - .finish() - } - } - - impl core::ops::BitOr for $name { + impl ::core::ops::BitOr for $name { type Output = Self; fn bitor(self, rhs: Self) -> Self::Output { @@ -127,6 +394,34 @@ macro_rules! register { reg.0 } } + + register!(@fields_dispatcher $name { $($fields)* }); + }; + + // Captures the fields and passes them to all the implementers that require field information. + // + // Used to simplify the matching rules for implementers, so they don't need to match the entire + // complex fields rule even though they only make use of part of it. + (@fields_dispatcher $name:ident { + $($hi:tt:$lo:tt $field:ident as $type:tt + $(?=> $try_into_type:ty)? + $(=> $into_type:ty)? + $(, $comment:literal)? + ; + )* + } + ) => { + register!(@field_accessors $name { + $( + $hi:$lo $field as $type + $(?=> $try_into_type)? + $(=> $into_type)? + $(, $comment)? + ; + )* + }); + register!(@debug $name { $($field;)* }); + register!(@default $name { $($field;)* }); }; // Defines all the field getter/methods methods for `$name`. @@ -161,7 +456,7 @@ macro_rules! register { (@check_field_bounds $hi:tt:$lo:tt $field:ident as bool) => { #[allow(clippy::eq_op)] const _: () = { - kernel::build_assert!( + ::kernel::build_assert!( $hi == $lo, concat!("boolean field `", stringify!($field), "` covers more than one bit") ); @@ -172,7 +467,7 @@ macro_rules! register { (@check_field_bounds $hi:tt:$lo:tt $field:ident as $type:tt) => { #[allow(clippy::eq_op)] const _: () = { - kernel::build_assert!( + ::kernel::build_assert!( $hi >= $lo, concat!("field `", stringify!($field), "`'s MSB is smaller than its LSB") ); @@ -185,7 +480,7 @@ macro_rules! register { $(, $comment:literal)?; ) => { register!( - @leaf_accessor $name $hi:$lo $field as bool + @leaf_accessor $name $hi:$lo $field { |f| <$into_type>::from(if f != 0 { true } else { false }) } $into_type => $into_type $(, $comment)?; ); @@ -203,7 +498,7 @@ macro_rules! register { @field_accessor $name:ident $hi:tt:$lo:tt $field:ident as $type:tt ?=> $try_into_type:ty $(, $comment:literal)?; ) => { - register!(@leaf_accessor $name $hi:$lo $field as $type + register!(@leaf_accessor $name $hi:$lo $field { |f| <$try_into_type>::try_from(f as $type) } $try_into_type => ::core::result::Result< $try_into_type, @@ -217,11 +512,11 @@ macro_rules! register { @field_accessor $name:ident $hi:tt:$lo:tt $field:ident as $type:tt => $into_type:ty $(, $comment:literal)?; ) => { - register!(@leaf_accessor $name $hi:$lo $field as $type + register!(@leaf_accessor $name $hi:$lo $field { |f| <$into_type>::from(f as $type) } $into_type => $into_type $(, $comment)?;); }; - // Shortcut for fields defined as non-`bool` without the `=>` or `?=>` syntax. + // Shortcut for non-boolean fields defined without the `=>` or `?=>` syntax. ( @field_accessor $name:ident $hi:tt:$lo:tt $field:ident as $type:tt $(, $comment:literal)?; @@ -231,11 +526,11 @@ macro_rules! register { // Generates the accessor methods for a single field. ( - @leaf_accessor $name:ident $hi:tt:$lo:tt $field:ident as $type:ty + @leaf_accessor $name:ident $hi:tt:$lo:tt $field:ident { $process:expr } $to_type:ty => $res_type:ty $(, $comment:literal)?; ) => { - kernel::macros::paste!( - const [<$field:upper>]: ::core::ops::RangeInclusive<u8> = $lo..=$hi; + ::kernel::macros::paste!( + const [<$field:upper _RANGE>]: ::core::ops::RangeInclusive<u8> = $lo..=$hi; const [<$field:upper _MASK>]: u32 = ((((1 << $hi) - 1) << 1) + 1) - ((1 << $lo) - 1); const [<$field:upper _SHIFT>]: u32 = Self::[<$field:upper _MASK>].trailing_zeros(); ); @@ -244,9 +539,9 @@ macro_rules! register { #[doc="Returns the value of this field:"] #[doc=$comment] )? - #[inline] + #[inline(always)] pub(crate) fn $field(self) -> $res_type { - kernel::macros::paste!( + ::kernel::macros::paste!( const MASK: u32 = $name::[<$field:upper _MASK>]; const SHIFT: u32 = $name::[<$field:upper _SHIFT>]; ); @@ -255,16 +550,16 @@ macro_rules! register { $process(field) } - kernel::macros::paste!( + ::kernel::macros::paste!( $( #[doc="Sets the value of this field:"] #[doc=$comment] )? - #[inline] + #[inline(always)] pub(crate) fn [<set_ $field>](mut self, value: $to_type) -> Self { const MASK: u32 = $name::[<$field:upper _MASK>]; const SHIFT: u32 = $name::[<$field:upper _SHIFT>]; - let value = ((value as u32) << SHIFT) & MASK; + let value = (u32::from(value) << SHIFT) & MASK; self.0 = (self.0 & !MASK) | value; self @@ -272,25 +567,64 @@ macro_rules! register { ); }; - // Creates the IO accessors for a fixed offset register. - (@io $name:ident @ $offset:literal) => { + // Generates the `Debug` implementation for `$name`. + (@debug $name:ident { $($field:ident;)* }) => { + impl ::kernel::fmt::Debug for $name { + fn fmt(&self, f: &mut ::kernel::fmt::Formatter<'_>) -> ::kernel::fmt::Result { + f.debug_struct(stringify!($name)) + .field("<raw>", &::kernel::prelude::fmt!("{:#x}", &self.0)) + $( + .field(stringify!($field), &self.$field()) + )* + .finish() + } + } + }; + + // Generates the `Default` implementation for `$name`. + (@default $name:ident { $($field:ident;)* }) => { + /// Returns a value for the register where all fields are set to their default value. + impl ::core::default::Default for $name { + fn default() -> Self { + #[allow(unused_mut)] + let mut value = Self(Default::default()); + + ::kernel::macros::paste!( + $( + value.[<set_ $field>](Default::default()); + )* + ); + + value + } + } + }; + + // Generates the IO accessors for a fixed offset register. + (@io_fixed $name:ident @ $offset:expr) => { #[allow(dead_code)] impl $name { - #[inline] + pub(crate) const OFFSET: usize = $offset; + + /// Read the register from its address in `io`. + #[inline(always)] pub(crate) fn read<const SIZE: usize, T>(io: &T) -> Self where T: ::core::ops::Deref<Target = ::kernel::io::Io<SIZE>>, { Self(io.read32($offset)) } - #[inline] + /// Write the value contained in `self` to the register address in `io`. + #[inline(always)] pub(crate) fn write<const SIZE: usize, T>(self, io: &T) where T: ::core::ops::Deref<Target = ::kernel::io::Io<SIZE>>, { io.write32(self.0, $offset) } - #[inline] + /// Read the register from its address in `io` and run `f` on its value to obtain a new + /// value to write back. + #[inline(always)] pub(crate) fn alter<const SIZE: usize, T, F>( io: &T, f: F, @@ -304,76 +638,322 @@ macro_rules! register { } }; - // Create the IO accessors for a relative offset register. - (@io $name:ident @ + $offset:literal) => { + // Generates the IO accessors for a relative offset register. + (@io_relative $name:ident @ $base:ty [ $offset:expr ]) => { #[allow(dead_code)] impl $name { - #[inline] + pub(crate) const OFFSET: usize = $offset; + + /// Read the register from `io`, using the base address provided by `base` and adding + /// the register's offset to it. + #[inline(always)] + pub(crate) fn read<const SIZE: usize, T, B>( + io: &T, + #[allow(unused_variables)] + base: &B, + ) -> Self where + T: ::core::ops::Deref<Target = ::kernel::io::Io<SIZE>>, + B: crate::regs::macros::RegisterBase<$base>, + { + const OFFSET: usize = $name::OFFSET; + + let value = io.read32( + <B as crate::regs::macros::RegisterBase<$base>>::BASE + OFFSET + ); + + Self(value) + } + + /// Write the value contained in `self` to `io`, using the base address provided by + /// `base` and adding the register's offset to it. + #[inline(always)] + pub(crate) fn write<const SIZE: usize, T, B>( + self, + io: &T, + #[allow(unused_variables)] + base: &B, + ) where + T: ::core::ops::Deref<Target = ::kernel::io::Io<SIZE>>, + B: crate::regs::macros::RegisterBase<$base>, + { + const OFFSET: usize = $name::OFFSET; + + io.write32( + self.0, + <B as crate::regs::macros::RegisterBase<$base>>::BASE + OFFSET + ); + } + + /// Read the register from `io`, using the base address provided by `base` and adding + /// the register's offset to it, then run `f` on its value to obtain a new value to + /// write back. + #[inline(always)] + pub(crate) fn alter<const SIZE: usize, T, B, F>( + io: &T, + base: &B, + f: F, + ) where + T: ::core::ops::Deref<Target = ::kernel::io::Io<SIZE>>, + B: crate::regs::macros::RegisterBase<$base>, + F: ::core::ops::FnOnce(Self) -> Self, + { + let reg = f(Self::read(io, base)); + reg.write(io, base); + } + } + }; + + // Generates the IO accessors for an array of registers. + (@io_array $name:ident @ $offset:literal [ $size:expr ; $stride:expr ]) => { + #[allow(dead_code)] + impl $name { + pub(crate) const OFFSET: usize = $offset; + pub(crate) const SIZE: usize = $size; + pub(crate) const STRIDE: usize = $stride; + + /// Read the array register at index `idx` from its address in `io`. + #[inline(always)] pub(crate) fn read<const SIZE: usize, T>( io: &T, - base: usize, + idx: usize, ) -> Self where T: ::core::ops::Deref<Target = ::kernel::io::Io<SIZE>>, { - Self(io.read32(base + $offset)) + build_assert!(idx < Self::SIZE); + + let offset = Self::OFFSET + (idx * Self::STRIDE); + let value = io.read32(offset); + + Self(value) } - #[inline] + /// Write the value contained in `self` to the array register with index `idx` in `io`. + #[inline(always)] pub(crate) fn write<const SIZE: usize, T>( self, io: &T, - base: usize, + idx: usize ) where T: ::core::ops::Deref<Target = ::kernel::io::Io<SIZE>>, { - io.write32(self.0, base + $offset) + build_assert!(idx < Self::SIZE); + + let offset = Self::OFFSET + (idx * Self::STRIDE); + + io.write32(self.0, offset); } - #[inline] + /// Read the array register at index `idx` in `io` and run `f` on its value to obtain a + /// new value to write back. + #[inline(always)] pub(crate) fn alter<const SIZE: usize, T, F>( io: &T, - base: usize, + idx: usize, f: F, ) where T: ::core::ops::Deref<Target = ::kernel::io::Io<SIZE>>, F: ::core::ops::FnOnce(Self) -> Self, { - let reg = f(Self::read(io, base)); - reg.write(io, base); + let reg = f(Self::read(io, idx)); + reg.write(io, idx); } - #[inline] + /// Read the array register at index `idx` from its address in `io`. + /// + /// The validity of `idx` is checked at run-time, and `EINVAL` is returned is the + /// access was out-of-bounds. + #[inline(always)] pub(crate) fn try_read<const SIZE: usize, T>( io: &T, - base: usize, + idx: usize, ) -> ::kernel::error::Result<Self> where T: ::core::ops::Deref<Target = ::kernel::io::Io<SIZE>>, { - io.try_read32(base + $offset).map(Self) + if idx < Self::SIZE { + Ok(Self::read(io, idx)) + } else { + Err(EINVAL) + } } - #[inline] + /// Write the value contained in `self` to the array register with index `idx` in `io`. + /// + /// The validity of `idx` is checked at run-time, and `EINVAL` is returned is the + /// access was out-of-bounds. + #[inline(always)] pub(crate) fn try_write<const SIZE: usize, T>( self, io: &T, - base: usize, - ) -> ::kernel::error::Result<()> where + idx: usize, + ) -> ::kernel::error::Result where T: ::core::ops::Deref<Target = ::kernel::io::Io<SIZE>>, { - io.try_write32(self.0, base + $offset) + if idx < Self::SIZE { + Ok(self.write(io, idx)) + } else { + Err(EINVAL) + } } - #[inline] + /// Read the array register at index `idx` in `io` and run `f` on its value to obtain a + /// new value to write back. + /// + /// The validity of `idx` is checked at run-time, and `EINVAL` is returned is the + /// access was out-of-bounds. + #[inline(always)] pub(crate) fn try_alter<const SIZE: usize, T, F>( io: &T, - base: usize, + idx: usize, + f: F, + ) -> ::kernel::error::Result where + T: ::core::ops::Deref<Target = ::kernel::io::Io<SIZE>>, + F: ::core::ops::FnOnce(Self) -> Self, + { + if idx < Self::SIZE { + Ok(Self::alter(io, idx, f)) + } else { + Err(EINVAL) + } + } + } + }; + + // Generates the IO accessors for an array of relative registers. + ( + @io_relative_array $name:ident @ $base:ty + [ $offset:literal [ $size:expr ; $stride:expr ] ] + ) => { + #[allow(dead_code)] + impl $name { + pub(crate) const OFFSET: usize = $offset; + pub(crate) const SIZE: usize = $size; + pub(crate) const STRIDE: usize = $stride; + + /// Read the array register at index `idx` from `io`, using the base address provided + /// by `base` and adding the register's offset to it. + #[inline(always)] + pub(crate) fn read<const SIZE: usize, T, B>( + io: &T, + #[allow(unused_variables)] + base: &B, + idx: usize, + ) -> Self where + T: ::core::ops::Deref<Target = ::kernel::io::Io<SIZE>>, + B: crate::regs::macros::RegisterBase<$base>, + { + build_assert!(idx < Self::SIZE); + + let offset = <B as crate::regs::macros::RegisterBase<$base>>::BASE + + Self::OFFSET + (idx * Self::STRIDE); + let value = io.read32(offset); + + Self(value) + } + + /// Write the value contained in `self` to `io`, using the base address provided by + /// `base` and adding the offset of array register `idx` to it. + #[inline(always)] + pub(crate) fn write<const SIZE: usize, T, B>( + self, + io: &T, + #[allow(unused_variables)] + base: &B, + idx: usize + ) where + T: ::core::ops::Deref<Target = ::kernel::io::Io<SIZE>>, + B: crate::regs::macros::RegisterBase<$base>, + { + build_assert!(idx < Self::SIZE); + + let offset = <B as crate::regs::macros::RegisterBase<$base>>::BASE + + Self::OFFSET + (idx * Self::STRIDE); + + io.write32(self.0, offset); + } + + /// Read the array register at index `idx` from `io`, using the base address provided + /// by `base` and adding the register's offset to it, then run `f` on its value to + /// obtain a new value to write back. + #[inline(always)] + pub(crate) fn alter<const SIZE: usize, T, B, F>( + io: &T, + base: &B, + idx: usize, + f: F, + ) where + T: ::core::ops::Deref<Target = ::kernel::io::Io<SIZE>>, + B: crate::regs::macros::RegisterBase<$base>, + F: ::core::ops::FnOnce(Self) -> Self, + { + let reg = f(Self::read(io, base, idx)); + reg.write(io, base, idx); + } + + /// Read the array register at index `idx` from `io`, using the base address provided + /// by `base` and adding the register's offset to it. + /// + /// The validity of `idx` is checked at run-time, and `EINVAL` is returned is the + /// access was out-of-bounds. + #[inline(always)] + pub(crate) fn try_read<const SIZE: usize, T, B>( + io: &T, + base: &B, + idx: usize, + ) -> ::kernel::error::Result<Self> where + T: ::core::ops::Deref<Target = ::kernel::io::Io<SIZE>>, + B: crate::regs::macros::RegisterBase<$base>, + { + if idx < Self::SIZE { + Ok(Self::read(io, base, idx)) + } else { + Err(EINVAL) + } + } + + /// Write the value contained in `self` to `io`, using the base address provided by + /// `base` and adding the offset of array register `idx` to it. + /// + /// The validity of `idx` is checked at run-time, and `EINVAL` is returned is the + /// access was out-of-bounds. + #[inline(always)] + pub(crate) fn try_write<const SIZE: usize, T, B>( + self, + io: &T, + base: &B, + idx: usize, + ) -> ::kernel::error::Result where + T: ::core::ops::Deref<Target = ::kernel::io::Io<SIZE>>, + B: crate::regs::macros::RegisterBase<$base>, + { + if idx < Self::SIZE { + Ok(self.write(io, base, idx)) + } else { + Err(EINVAL) + } + } + + /// Read the array register at index `idx` from `io`, using the base address provided + /// by `base` and adding the register's offset to it, then run `f` on its value to + /// obtain a new value to write back. + /// + /// The validity of `idx` is checked at run-time, and `EINVAL` is returned is the + /// access was out-of-bounds. + #[inline(always)] + pub(crate) fn try_alter<const SIZE: usize, T, B, F>( + io: &T, + base: &B, + idx: usize, f: F, - ) -> ::kernel::error::Result<()> where + ) -> ::kernel::error::Result where T: ::core::ops::Deref<Target = ::kernel::io::Io<SIZE>>, + B: crate::regs::macros::RegisterBase<$base>, F: ::core::ops::FnOnce(Self) -> Self, { - let reg = f(Self::try_read(io, base)?); - reg.try_write(io, base) + if idx < Self::SIZE { + Ok(Self::alter(io, base, idx, f)) + } else { + Err(EINVAL) + } } } }; diff --git a/drivers/gpu/nova-core/util.rs b/drivers/gpu/nova-core/util.rs index 332a64cfc6a9..bf35f00cb732 100644 --- a/drivers/gpu/nova-core/util.rs +++ b/drivers/gpu/nova-core/util.rs @@ -1,21 +1,27 @@ // SPDX-License-Identifier: GPL-2.0 -pub(crate) const fn to_lowercase_bytes<const N: usize>(s: &str) -> [u8; N] { - let src = s.as_bytes(); - let mut dst = [0; N]; - let mut i = 0; +use kernel::prelude::*; +use kernel::time::{Delta, Instant, Monotonic}; - while i < src.len() && i < N { - dst[i] = (src[i] as char).to_ascii_lowercase() as u8; - i += 1; - } +/// Wait until `cond` is true or `timeout` elapsed. +/// +/// When `cond` evaluates to `Some`, its return value is returned. +/// +/// `Err(ETIMEDOUT)` is returned if `timeout` has been reached without `cond` evaluating to +/// `Some`. +/// +/// TODO[DLAY]: replace with `read_poll_timeout` once it is available. +/// (https://lore.kernel.org/lkml/20250220070611.214262-8-fujita.tomonori@gmail.com/) +pub(crate) fn wait_on<R, F: Fn() -> Option<R>>(timeout: Delta, cond: F) -> Result<R> { + let start_time = Instant::<Monotonic>::now(); - dst -} + loop { + if let Some(ret) = cond() { + return Ok(ret); + } -pub(crate) const fn const_bytes_to_str(bytes: &[u8]) -> &str { - match core::str::from_utf8(bytes) { - Ok(string) => string, - Err(_) => kernel::build_error!("Bytes are not valid UTF-8."), + if start_time.elapsed().as_nanos() > timeout.as_nanos() { + return Err(ETIMEDOUT); + } } } diff --git a/drivers/gpu/nova-core/vbios.rs b/drivers/gpu/nova-core/vbios.rs new file mode 100644 index 000000000000..71fbe71b84db --- /dev/null +++ b/drivers/gpu/nova-core/vbios.rs @@ -0,0 +1,1158 @@ +// SPDX-License-Identifier: GPL-2.0 + +//! VBIOS extraction and parsing. + +use crate::driver::Bar0; +use crate::firmware::fwsec::Bcrt30Rsa3kSignature; +use crate::firmware::FalconUCodeDescV3; +use core::convert::TryFrom; +use kernel::device; +use kernel::error::Result; +use kernel::prelude::*; +use kernel::ptr::{Alignable, Alignment}; +use kernel::types::ARef; + +/// The offset of the VBIOS ROM in the BAR0 space. +const ROM_OFFSET: usize = 0x300000; +/// The maximum length of the VBIOS ROM to scan into. +const BIOS_MAX_SCAN_LEN: usize = 0x100000; +/// The size to read ahead when parsing initial BIOS image headers. +const BIOS_READ_AHEAD_SIZE: usize = 1024; +/// The bit in the last image indicator byte for the PCI Data Structure that +/// indicates the last image. Bit 0-6 are reserved, bit 7 is last image bit. +const LAST_IMAGE_BIT_MASK: u8 = 0x80; + +// PMU lookup table entry types. Used to locate PMU table entries +// in the Fwsec image, corresponding to falcon ucodes. +#[expect(dead_code)] +const FALCON_UCODE_ENTRY_APPID_FIRMWARE_SEC_LIC: u8 = 0x05; +#[expect(dead_code)] +const FALCON_UCODE_ENTRY_APPID_FWSEC_DBG: u8 = 0x45; +const FALCON_UCODE_ENTRY_APPID_FWSEC_PROD: u8 = 0x85; + +/// Vbios Reader for constructing the VBIOS data. +struct VbiosIterator<'a> { + dev: &'a device::Device, + bar0: &'a Bar0, + /// VBIOS data vector: As BIOS images are scanned, they are added to this vector for reference + /// or copying into other data structures. It is the entire scanned contents of the VBIOS which + /// progressively extends. It is used so that we do not re-read any contents that are already + /// read as we use the cumulative length read so far, and re-read any gaps as we extend the + /// length. + data: KVec<u8>, + /// Current offset of the [`Iterator`]. + current_offset: usize, + /// Indicate whether the last image has been found. + last_found: bool, +} + +impl<'a> VbiosIterator<'a> { + fn new(dev: &'a device::Device, bar0: &'a Bar0) -> Result<Self> { + Ok(Self { + dev, + bar0, + data: KVec::new(), + current_offset: 0, + last_found: false, + }) + } + + /// Read bytes from the ROM at the current end of the data vector. + fn read_more(&mut self, len: usize) -> Result { + let current_len = self.data.len(); + let start = ROM_OFFSET + current_len; + + // Ensure length is a multiple of 4 for 32-bit reads + if len % core::mem::size_of::<u32>() != 0 { + dev_err!( + self.dev, + "VBIOS read length {} is not a multiple of 4\n", + len + ); + return Err(EINVAL); + } + + self.data.reserve(len, GFP_KERNEL)?; + // Read ROM data bytes and push directly to `data`. + for addr in (start..start + len).step_by(core::mem::size_of::<u32>()) { + // Read 32-bit word from the VBIOS ROM + let word = self.bar0.try_read32(addr)?; + + // Convert the `u32` to a 4 byte array and push each byte. + word.to_ne_bytes() + .iter() + .try_for_each(|&b| self.data.push(b, GFP_KERNEL))?; + } + + Ok(()) + } + + /// Read bytes at a specific offset, filling any gap. + fn read_more_at_offset(&mut self, offset: usize, len: usize) -> Result { + if offset > BIOS_MAX_SCAN_LEN { + dev_err!(self.dev, "Error: exceeded BIOS scan limit.\n"); + return Err(EINVAL); + } + + // If `offset` is beyond current data size, fill the gap first. + let current_len = self.data.len(); + let gap_bytes = offset.saturating_sub(current_len); + + // Now read the requested bytes at the offset. + self.read_more(gap_bytes + len) + } + + /// Read a BIOS image at a specific offset and create a [`BiosImage`] from it. + /// + /// `self.data` is extended as needed and a new [`BiosImage`] is returned. + /// `context` is a string describing the operation for error reporting. + fn read_bios_image_at_offset( + &mut self, + offset: usize, + len: usize, + context: &str, + ) -> Result<BiosImage> { + let data_len = self.data.len(); + if offset + len > data_len { + self.read_more_at_offset(offset, len).inspect_err(|e| { + dev_err!( + self.dev, + "Failed to read more at offset {:#x}: {:?}\n", + offset, + e + ) + })?; + } + + BiosImage::new(self.dev, &self.data[offset..offset + len]).inspect_err(|err| { + dev_err!( + self.dev, + "Failed to {} at offset {:#x}: {:?}\n", + context, + offset, + err + ) + }) + } +} + +impl<'a> Iterator for VbiosIterator<'a> { + type Item = Result<BiosImage>; + + /// Iterate over all VBIOS images until the last image is detected or offset + /// exceeds scan limit. + fn next(&mut self) -> Option<Self::Item> { + if self.last_found { + return None; + } + + if self.current_offset > BIOS_MAX_SCAN_LEN { + dev_err!(self.dev, "Error: exceeded BIOS scan limit, stopping scan\n"); + return None; + } + + // Parse image headers first to get image size. + let image_size = match self.read_bios_image_at_offset( + self.current_offset, + BIOS_READ_AHEAD_SIZE, + "parse initial BIOS image headers", + ) { + Ok(image) => image.image_size_bytes(), + Err(e) => return Some(Err(e)), + }; + + // Now create a new `BiosImage` with the full image data. + let full_image = match self.read_bios_image_at_offset( + self.current_offset, + image_size, + "parse full BIOS image", + ) { + Ok(image) => image, + Err(e) => return Some(Err(e)), + }; + + self.last_found = full_image.is_last(); + + // Advance to next image (aligned to 512 bytes). + self.current_offset += image_size; + self.current_offset = self.current_offset.align_up(Alignment::new::<512>())?; + + Some(Ok(full_image)) + } +} + +pub(crate) struct Vbios { + fwsec_image: FwSecBiosImage, +} + +impl Vbios { + /// Probe for VBIOS extraction. + /// + /// Once the VBIOS object is built, `bar0` is not read for [`Vbios`] purposes anymore. + pub(crate) fn new(dev: &device::Device, bar0: &Bar0) -> Result<Vbios> { + // Images to extract from iteration + let mut pci_at_image: Option<PciAtBiosImage> = None; + let mut first_fwsec_image: Option<FwSecBiosBuilder> = None; + let mut second_fwsec_image: Option<FwSecBiosBuilder> = None; + + // Parse all VBIOS images in the ROM + for image_result in VbiosIterator::new(dev, bar0)? { + let full_image = image_result?; + + dev_dbg!( + dev, + "Found BIOS image: size: {:#x}, type: {}, last: {}\n", + full_image.image_size_bytes(), + full_image.image_type_str(), + full_image.is_last() + ); + + // Get references to images we will need after the loop, in order to + // setup the falcon data offset. + match full_image { + BiosImage::PciAt(image) => { + pci_at_image = Some(image); + } + BiosImage::FwSec(image) => { + if first_fwsec_image.is_none() { + first_fwsec_image = Some(image); + } else { + second_fwsec_image = Some(image); + } + } + // For now we don't need to handle these + BiosImage::Efi(_image) => {} + BiosImage::Nbsi(_image) => {} + } + } + + // Using all the images, setup the falcon data pointer in Fwsec. + if let (Some(mut second), Some(first), Some(pci_at)) = + (second_fwsec_image, first_fwsec_image, pci_at_image) + { + second + .setup_falcon_data(&pci_at, &first) + .inspect_err(|e| dev_err!(dev, "Falcon data setup failed: {:?}\n", e))?; + Ok(Vbios { + fwsec_image: second.build()?, + }) + } else { + dev_err!( + dev, + "Missing required images for falcon data setup, skipping\n" + ); + Err(EINVAL) + } + } + + pub(crate) fn fwsec_image(&self) -> &FwSecBiosImage { + &self.fwsec_image + } +} + +/// PCI Data Structure as defined in PCI Firmware Specification +#[derive(Debug, Clone)] +#[repr(C)] +struct PcirStruct { + /// PCI Data Structure signature ("PCIR" or "NPDS") + signature: [u8; 4], + /// PCI Vendor ID (e.g., 0x10DE for NVIDIA) + vendor_id: u16, + /// PCI Device ID + device_id: u16, + /// Device List Pointer + device_list_ptr: u16, + /// PCI Data Structure Length + pci_data_struct_len: u16, + /// PCI Data Structure Revision + pci_data_struct_rev: u8, + /// Class code (3 bytes, 0x03 for display controller) + class_code: [u8; 3], + /// Size of this image in 512-byte blocks + image_len: u16, + /// Revision Level of the Vendor's ROM + vendor_rom_rev: u16, + /// ROM image type (0x00 = PC-AT compatible, 0x03 = EFI, 0x70 = NBSI) + code_type: u8, + /// Last image indicator (0x00 = Not last image, 0x80 = Last image) + last_image: u8, + /// Maximum Run-time Image Length (units of 512 bytes) + max_runtime_image_len: u16, +} + +impl PcirStruct { + fn new(dev: &device::Device, data: &[u8]) -> Result<Self> { + if data.len() < core::mem::size_of::<PcirStruct>() { + dev_err!(dev, "Not enough data for PcirStruct\n"); + return Err(EINVAL); + } + + let mut signature = [0u8; 4]; + signature.copy_from_slice(&data[0..4]); + + // Signature should be "PCIR" (0x52494350) or "NPDS" (0x5344504e). + if &signature != b"PCIR" && &signature != b"NPDS" { + dev_err!(dev, "Invalid signature for PcirStruct: {:?}\n", signature); + return Err(EINVAL); + } + + let mut class_code = [0u8; 3]; + class_code.copy_from_slice(&data[13..16]); + + let image_len = u16::from_le_bytes([data[16], data[17]]); + if image_len == 0 { + dev_err!(dev, "Invalid image length: 0\n"); + return Err(EINVAL); + } + + Ok(PcirStruct { + signature, + vendor_id: u16::from_le_bytes([data[4], data[5]]), + device_id: u16::from_le_bytes([data[6], data[7]]), + device_list_ptr: u16::from_le_bytes([data[8], data[9]]), + pci_data_struct_len: u16::from_le_bytes([data[10], data[11]]), + pci_data_struct_rev: data[12], + class_code, + image_len, + vendor_rom_rev: u16::from_le_bytes([data[18], data[19]]), + code_type: data[20], + last_image: data[21], + max_runtime_image_len: u16::from_le_bytes([data[22], data[23]]), + }) + } + + /// Check if this is the last image in the ROM. + fn is_last(&self) -> bool { + self.last_image & LAST_IMAGE_BIT_MASK != 0 + } + + /// Calculate image size in bytes from 512-byte blocks. + fn image_size_bytes(&self) -> usize { + self.image_len as usize * 512 + } +} + +/// BIOS Information Table (BIT) Header. +/// +/// This is the head of the BIT table, that is used to locate the Falcon data. The BIT table (with +/// its header) is in the [`PciAtBiosImage`] and the falcon data it is pointing to is in the +/// [`FwSecBiosImage`]. +#[derive(Debug, Clone, Copy)] +#[repr(C)] +struct BitHeader { + /// 0h: BIT Header Identifier (BMP=0x7FFF/BIT=0xB8FF) + id: u16, + /// 2h: BIT Header Signature ("BIT\0") + signature: [u8; 4], + /// 6h: Binary Coded Decimal Version, ex: 0x0100 is 1.00. + bcd_version: u16, + /// 8h: Size of BIT Header (in bytes) + header_size: u8, + /// 9h: Size of BIT Tokens (in bytes) + token_size: u8, + /// 10h: Number of token entries that follow + token_entries: u8, + /// 11h: BIT Header Checksum + checksum: u8, +} + +impl BitHeader { + fn new(data: &[u8]) -> Result<Self> { + if data.len() < core::mem::size_of::<Self>() { + return Err(EINVAL); + } + + let mut signature = [0u8; 4]; + signature.copy_from_slice(&data[2..6]); + + // Check header ID and signature + let id = u16::from_le_bytes([data[0], data[1]]); + if id != 0xB8FF || &signature != b"BIT\0" { + return Err(EINVAL); + } + + Ok(BitHeader { + id, + signature, + bcd_version: u16::from_le_bytes([data[6], data[7]]), + header_size: data[8], + token_size: data[9], + token_entries: data[10], + checksum: data[11], + }) + } +} + +/// BIT Token Entry: Records in the BIT table followed by the BIT header. +#[derive(Debug, Clone, Copy)] +#[expect(dead_code)] +struct BitToken { + /// 00h: Token identifier + id: u8, + /// 01h: Version of the token data + data_version: u8, + /// 02h: Size of token data in bytes + data_size: u16, + /// 04h: Offset to the token data + data_offset: u16, +} + +// Define the token ID for the Falcon data +const BIT_TOKEN_ID_FALCON_DATA: u8 = 0x70; + +impl BitToken { + /// Find a BIT token entry by BIT ID in a PciAtBiosImage + fn from_id(image: &PciAtBiosImage, token_id: u8) -> Result<Self> { + let header = &image.bit_header; + + // Offset to the first token entry + let tokens_start = image.bit_offset + header.header_size as usize; + + for i in 0..header.token_entries as usize { + let entry_offset = tokens_start + (i * header.token_size as usize); + + // Make sure we don't go out of bounds + if entry_offset + header.token_size as usize > image.base.data.len() { + return Err(EINVAL); + } + + // Check if this token has the requested ID + if image.base.data[entry_offset] == token_id { + return Ok(BitToken { + id: image.base.data[entry_offset], + data_version: image.base.data[entry_offset + 1], + data_size: u16::from_le_bytes([ + image.base.data[entry_offset + 2], + image.base.data[entry_offset + 3], + ]), + data_offset: u16::from_le_bytes([ + image.base.data[entry_offset + 4], + image.base.data[entry_offset + 5], + ]), + }); + } + } + + // Token not found + Err(ENOENT) + } +} + +/// PCI ROM Expansion Header as defined in PCI Firmware Specification. +/// +/// This is header is at the beginning of every image in the set of images in the ROM. It contains +/// a pointer to the PCI Data Structure which describes the image. For "NBSI" images (NoteBook +/// System Information), the ROM header deviates from the standard and contains an offset to the +/// NBSI image however we do not yet parse that in this module and keep it for future reference. +#[derive(Debug, Clone, Copy)] +#[expect(dead_code)] +struct PciRomHeader { + /// 00h: Signature (0xAA55) + signature: u16, + /// 02h: Reserved bytes for processor architecture unique data (20 bytes) + reserved: [u8; 20], + /// 16h: NBSI Data Offset (NBSI-specific, offset from header to NBSI image) + nbsi_data_offset: Option<u16>, + /// 18h: Pointer to PCI Data Structure (offset from start of ROM image) + pci_data_struct_offset: u16, + /// 1Ah: Size of block (this is NBSI-specific) + size_of_block: Option<u32>, +} + +impl PciRomHeader { + fn new(dev: &device::Device, data: &[u8]) -> Result<Self> { + if data.len() < 26 { + // Need at least 26 bytes to read pciDataStrucPtr and sizeOfBlock. + return Err(EINVAL); + } + + let signature = u16::from_le_bytes([data[0], data[1]]); + + // Check for valid ROM signatures. + match signature { + 0xAA55 | 0xBB77 | 0x4E56 => {} + _ => { + dev_err!(dev, "ROM signature unknown {:#x}\n", signature); + return Err(EINVAL); + } + } + + // Read the pointer to the PCI Data Structure at offset 0x18. + let pci_data_struct_ptr = u16::from_le_bytes([data[24], data[25]]); + + // Try to read optional fields if enough data. + let mut size_of_block = None; + let mut nbsi_data_offset = None; + + if data.len() >= 30 { + // Read size_of_block at offset 0x1A. + size_of_block = Some( + u32::from(data[29]) << 24 + | u32::from(data[28]) << 16 + | u32::from(data[27]) << 8 + | u32::from(data[26]), + ); + } + + // For NBSI images, try to read the nbsiDataOffset at offset 0x16. + if data.len() >= 24 { + nbsi_data_offset = Some(u16::from_le_bytes([data[22], data[23]])); + } + + Ok(PciRomHeader { + signature, + reserved: [0u8; 20], + pci_data_struct_offset: pci_data_struct_ptr, + size_of_block, + nbsi_data_offset, + }) + } +} + +/// NVIDIA PCI Data Extension Structure. +/// +/// This is similar to the PCI Data Structure, but is Nvidia-specific and is placed right after the +/// PCI Data Structure. It contains some fields that are redundant with the PCI Data Structure, but +/// are needed for traversing the BIOS images. It is expected to be present in all BIOS images +/// except for NBSI images. +#[derive(Debug, Clone)] +#[repr(C)] +struct NpdeStruct { + /// 00h: Signature ("NPDE") + signature: [u8; 4], + /// 04h: NVIDIA PCI Data Extension Revision + npci_data_ext_rev: u16, + /// 06h: NVIDIA PCI Data Extension Length + npci_data_ext_len: u16, + /// 08h: Sub-image Length (in 512-byte units) + subimage_len: u16, + /// 0Ah: Last image indicator flag + last_image: u8, +} + +impl NpdeStruct { + fn new(dev: &device::Device, data: &[u8]) -> Option<Self> { + if data.len() < core::mem::size_of::<Self>() { + dev_dbg!(dev, "Not enough data for NpdeStruct\n"); + return None; + } + + let mut signature = [0u8; 4]; + signature.copy_from_slice(&data[0..4]); + + // Signature should be "NPDE" (0x4544504E). + if &signature != b"NPDE" { + dev_dbg!(dev, "Invalid signature for NpdeStruct: {:?}\n", signature); + return None; + } + + let subimage_len = u16::from_le_bytes([data[8], data[9]]); + if subimage_len == 0 { + dev_dbg!(dev, "Invalid subimage length: 0\n"); + return None; + } + + Some(NpdeStruct { + signature, + npci_data_ext_rev: u16::from_le_bytes([data[4], data[5]]), + npci_data_ext_len: u16::from_le_bytes([data[6], data[7]]), + subimage_len, + last_image: data[10], + }) + } + + /// Check if this is the last image in the ROM. + fn is_last(&self) -> bool { + self.last_image & LAST_IMAGE_BIT_MASK != 0 + } + + /// Calculate image size in bytes from 512-byte blocks. + fn image_size_bytes(&self) -> usize { + self.subimage_len as usize * 512 + } + + /// Try to find NPDE in the data, the NPDE is right after the PCIR. + fn find_in_data( + dev: &device::Device, + data: &[u8], + rom_header: &PciRomHeader, + pcir: &PcirStruct, + ) -> Option<Self> { + // Calculate the offset where NPDE might be located + // NPDE should be right after the PCIR structure, aligned to 16 bytes + let pcir_offset = rom_header.pci_data_struct_offset as usize; + let npde_start = (pcir_offset + pcir.pci_data_struct_len as usize + 0x0F) & !0x0F; + + // Check if we have enough data + if npde_start + core::mem::size_of::<Self>() > data.len() { + dev_dbg!(dev, "Not enough data for NPDE\n"); + return None; + } + + // Try to create NPDE from the data + NpdeStruct::new(dev, &data[npde_start..]) + } +} + +// Use a macro to implement BiosImage enum and methods. This avoids having to +// repeat each enum type when implementing functions like base() in BiosImage. +macro_rules! bios_image { + ( + $($variant:ident: $class:ident),* $(,)? + ) => { + // BiosImage enum with variants for each image type + enum BiosImage { + $($variant($class)),* + } + + impl BiosImage { + /// Get a reference to the common BIOS image data regardless of type + fn base(&self) -> &BiosImageBase { + match self { + $(Self::$variant(img) => &img.base),* + } + } + + /// Returns a string representing the type of BIOS image + fn image_type_str(&self) -> &'static str { + match self { + $(Self::$variant(_) => stringify!($variant)),* + } + } + } + } +} + +impl BiosImage { + /// Check if this is the last image. + fn is_last(&self) -> bool { + let base = self.base(); + + // For NBSI images (type == 0x70), return true as they're + // considered the last image + if matches!(self, Self::Nbsi(_)) { + return true; + } + + // For other image types, check the NPDE first if available + if let Some(ref npde) = base.npde { + return npde.is_last(); + } + + // Otherwise, fall back to checking the PCIR last_image flag + base.pcir.is_last() + } + + /// Get the image size in bytes. + fn image_size_bytes(&self) -> usize { + let base = self.base(); + + // Prefer NPDE image size if available + if let Some(ref npde) = base.npde { + return npde.image_size_bytes(); + } + + // Otherwise, fall back to the PCIR image size + base.pcir.image_size_bytes() + } + + /// Create a [`BiosImageBase`] from a byte slice and convert it to a [`BiosImage`] which + /// triggers the constructor of the specific BiosImage enum variant. + fn new(dev: &device::Device, data: &[u8]) -> Result<Self> { + let base = BiosImageBase::new(dev, data)?; + let image = base.into_image().inspect_err(|e| { + dev_err!(dev, "Failed to create BiosImage: {:?}\n", e); + })?; + + Ok(image) + } +} + +bios_image! { + PciAt: PciAtBiosImage, // PCI-AT compatible BIOS image + Efi: EfiBiosImage, // EFI (Extensible Firmware Interface) + Nbsi: NbsiBiosImage, // NBSI (Nvidia Bios System Interface) + FwSec: FwSecBiosBuilder, // FWSEC (Firmware Security) +} + +/// The PciAt BIOS image is typically the first BIOS image type found in the BIOS image chain. +/// +/// It contains the BIT header and the BIT tokens. +struct PciAtBiosImage { + base: BiosImageBase, + bit_header: BitHeader, + bit_offset: usize, +} + +struct EfiBiosImage { + base: BiosImageBase, + // EFI-specific fields can be added here in the future. +} + +struct NbsiBiosImage { + base: BiosImageBase, + // NBSI-specific fields can be added here in the future. +} + +struct FwSecBiosBuilder { + base: BiosImageBase, + /// These are temporary fields that are used during the construction of the + /// [`FwSecBiosBuilder`]. + /// + /// Once FwSecBiosBuilder is constructed, the `falcon_ucode_offset` will be copied into a new + /// [`FwSecBiosImage`]. + /// + /// The offset of the Falcon data from the start of Fwsec image. + falcon_data_offset: Option<usize>, + /// The [`PmuLookupTable`] starts at the offset of the falcon data pointer. + pmu_lookup_table: Option<PmuLookupTable>, + /// The offset of the Falcon ucode. + falcon_ucode_offset: Option<usize>, +} + +/// The [`FwSecBiosImage`] structure contains the PMU table and the Falcon Ucode. +/// +/// The PMU table contains voltage/frequency tables as well as a pointer to the Falcon Ucode. +pub(crate) struct FwSecBiosImage { + base: BiosImageBase, + /// The offset of the Falcon ucode. + falcon_ucode_offset: usize, +} + +// Convert from BiosImageBase to BiosImage +impl TryFrom<BiosImageBase> for BiosImage { + type Error = Error; + + fn try_from(base: BiosImageBase) -> Result<Self> { + match base.pcir.code_type { + 0x00 => Ok(BiosImage::PciAt(base.try_into()?)), + 0x03 => Ok(BiosImage::Efi(EfiBiosImage { base })), + 0x70 => Ok(BiosImage::Nbsi(NbsiBiosImage { base })), + 0xE0 => Ok(BiosImage::FwSec(FwSecBiosBuilder { + base, + falcon_data_offset: None, + pmu_lookup_table: None, + falcon_ucode_offset: None, + })), + _ => Err(EINVAL), + } + } +} + +/// BIOS Image structure containing various headers and reference fields to all BIOS images. +/// +/// Each BiosImage type has a BiosImageBase type along with other image-specific fields. Note that +/// Rust favors composition of types over inheritance. +#[expect(dead_code)] +struct BiosImageBase { + /// Used for logging. + dev: ARef<device::Device>, + /// PCI ROM Expansion Header + rom_header: PciRomHeader, + /// PCI Data Structure + pcir: PcirStruct, + /// NVIDIA PCI Data Extension (optional) + npde: Option<NpdeStruct>, + /// Image data (includes ROM header and PCIR) + data: KVec<u8>, +} + +impl BiosImageBase { + fn into_image(self) -> Result<BiosImage> { + BiosImage::try_from(self) + } + + /// Creates a new BiosImageBase from raw byte data. + fn new(dev: &device::Device, data: &[u8]) -> Result<Self> { + // Ensure we have enough data for the ROM header. + if data.len() < 26 { + dev_err!(dev, "Not enough data for ROM header\n"); + return Err(EINVAL); + } + + // Parse the ROM header. + let rom_header = PciRomHeader::new(dev, &data[0..26]) + .inspect_err(|e| dev_err!(dev, "Failed to create PciRomHeader: {:?}\n", e))?; + + // Get the PCI Data Structure using the pointer from the ROM header. + let pcir_offset = rom_header.pci_data_struct_offset as usize; + let pcir_data = data + .get(pcir_offset..pcir_offset + core::mem::size_of::<PcirStruct>()) + .ok_or(EINVAL) + .inspect_err(|_| { + dev_err!( + dev, + "PCIR offset {:#x} out of bounds (data length: {})\n", + pcir_offset, + data.len() + ); + dev_err!( + dev, + "Consider reading more data for construction of BiosImage\n" + ); + })?; + + let pcir = PcirStruct::new(dev, pcir_data) + .inspect_err(|e| dev_err!(dev, "Failed to create PcirStruct: {:?}\n", e))?; + + // Look for NPDE structure if this is not an NBSI image (type != 0x70). + let npde = NpdeStruct::find_in_data(dev, data, &rom_header, &pcir); + + // Create a copy of the data. + let mut data_copy = KVec::new(); + data_copy.extend_from_slice(data, GFP_KERNEL)?; + + Ok(BiosImageBase { + dev: dev.into(), + rom_header, + pcir, + npde, + data: data_copy, + }) + } +} + +impl PciAtBiosImage { + /// Find a byte pattern in a slice. + fn find_byte_pattern(haystack: &[u8], needle: &[u8]) -> Result<usize> { + haystack + .windows(needle.len()) + .position(|window| window == needle) + .ok_or(EINVAL) + } + + /// Find the BIT header in the [`PciAtBiosImage`]. + fn find_bit_header(data: &[u8]) -> Result<(BitHeader, usize)> { + let bit_pattern = [0xff, 0xb8, b'B', b'I', b'T', 0x00]; + let bit_offset = Self::find_byte_pattern(data, &bit_pattern)?; + let bit_header = BitHeader::new(&data[bit_offset..])?; + + Ok((bit_header, bit_offset)) + } + + /// Get a BIT token entry from the BIT table in the [`PciAtBiosImage`] + fn get_bit_token(&self, token_id: u8) -> Result<BitToken> { + BitToken::from_id(self, token_id) + } + + /// Find the Falcon data pointer structure in the [`PciAtBiosImage`]. + /// + /// This is just a 4 byte structure that contains a pointer to the Falcon data in the FWSEC + /// image. + fn falcon_data_ptr(&self) -> Result<u32> { + let token = self.get_bit_token(BIT_TOKEN_ID_FALCON_DATA)?; + + // Make sure we don't go out of bounds + if token.data_offset as usize + 4 > self.base.data.len() { + return Err(EINVAL); + } + + // read the 4 bytes at the offset specified in the token + let offset = token.data_offset as usize; + let bytes: [u8; 4] = self.base.data[offset..offset + 4].try_into().map_err(|_| { + dev_err!(self.base.dev, "Failed to convert data slice to array"); + EINVAL + })?; + + let data_ptr = u32::from_le_bytes(bytes); + + if (data_ptr as usize) < self.base.data.len() { + dev_err!(self.base.dev, "Falcon data pointer out of bounds\n"); + return Err(EINVAL); + } + + Ok(data_ptr) + } +} + +impl TryFrom<BiosImageBase> for PciAtBiosImage { + type Error = Error; + + fn try_from(base: BiosImageBase) -> Result<Self> { + let data_slice = &base.data; + let (bit_header, bit_offset) = PciAtBiosImage::find_bit_header(data_slice)?; + + Ok(PciAtBiosImage { + base, + bit_header, + bit_offset, + }) + } +} + +/// The [`PmuLookupTableEntry`] structure is a single entry in the [`PmuLookupTable`]. +/// +/// See the [`PmuLookupTable`] description for more information. +#[repr(C, packed)] +struct PmuLookupTableEntry { + application_id: u8, + target_id: u8, + data: u32, +} + +impl PmuLookupTableEntry { + fn new(data: &[u8]) -> Result<Self> { + if data.len() < core::mem::size_of::<Self>() { + return Err(EINVAL); + } + + Ok(PmuLookupTableEntry { + application_id: data[0], + target_id: data[1], + data: u32::from_le_bytes(data[2..6].try_into().map_err(|_| EINVAL)?), + }) + } +} + +/// The [`PmuLookupTableEntry`] structure is used to find the [`PmuLookupTableEntry`] for a given +/// application ID. +/// +/// The table of entries is pointed to by the falcon data pointer in the BIT table, and is used to +/// locate the Falcon Ucode. +#[expect(dead_code)] +struct PmuLookupTable { + version: u8, + header_len: u8, + entry_len: u8, + entry_count: u8, + table_data: KVec<u8>, +} + +impl PmuLookupTable { + fn new(dev: &device::Device, data: &[u8]) -> Result<Self> { + if data.len() < 4 { + return Err(EINVAL); + } + + let header_len = data[1] as usize; + let entry_len = data[2] as usize; + let entry_count = data[3] as usize; + + let required_bytes = header_len + (entry_count * entry_len); + + if data.len() < required_bytes { + dev_err!(dev, "PmuLookupTable data length less than required\n"); + return Err(EINVAL); + } + + // Create a copy of only the table data + let table_data = { + let mut ret = KVec::new(); + ret.extend_from_slice(&data[header_len..required_bytes], GFP_KERNEL)?; + ret + }; + + // Debug logging of entries (dumps the table data to dmesg) + for i in (header_len..required_bytes).step_by(entry_len) { + dev_dbg!(dev, "PMU entry: {:02x?}\n", &data[i..][..entry_len]); + } + + Ok(PmuLookupTable { + version: data[0], + header_len: header_len as u8, + entry_len: entry_len as u8, + entry_count: entry_count as u8, + table_data, + }) + } + + fn lookup_index(&self, idx: u8) -> Result<PmuLookupTableEntry> { + if idx >= self.entry_count { + return Err(EINVAL); + } + + let index = (idx as usize) * self.entry_len as usize; + PmuLookupTableEntry::new(&self.table_data[index..]) + } + + // find entry by type value + fn find_entry_by_type(&self, entry_type: u8) -> Result<PmuLookupTableEntry> { + for i in 0..self.entry_count { + let entry = self.lookup_index(i)?; + if entry.application_id == entry_type { + return Ok(entry); + } + } + + Err(EINVAL) + } +} + +impl FwSecBiosBuilder { + fn setup_falcon_data( + &mut self, + pci_at_image: &PciAtBiosImage, + first_fwsec: &FwSecBiosBuilder, + ) -> Result { + let mut offset = pci_at_image.falcon_data_ptr()? as usize; + let mut pmu_in_first_fwsec = false; + + // The falcon data pointer assumes that the PciAt and FWSEC images + // are contiguous in memory. However, testing shows the EFI image sits in + // between them. So calculate the offset from the end of the PciAt image + // rather than the start of it. Compensate. + offset -= pci_at_image.base.data.len(); + + // The offset is now from the start of the first Fwsec image, however + // the offset points to a location in the second Fwsec image. Since + // the fwsec images are contiguous, subtract the length of the first Fwsec + // image from the offset to get the offset to the start of the second + // Fwsec image. + if offset < first_fwsec.base.data.len() { + pmu_in_first_fwsec = true; + } else { + offset -= first_fwsec.base.data.len(); + } + + self.falcon_data_offset = Some(offset); + + if pmu_in_first_fwsec { + self.pmu_lookup_table = Some(PmuLookupTable::new( + &self.base.dev, + &first_fwsec.base.data[offset..], + )?); + } else { + self.pmu_lookup_table = Some(PmuLookupTable::new( + &self.base.dev, + &self.base.data[offset..], + )?); + } + + match self + .pmu_lookup_table + .as_ref() + .ok_or(EINVAL)? + .find_entry_by_type(FALCON_UCODE_ENTRY_APPID_FWSEC_PROD) + { + Ok(entry) => { + let mut ucode_offset = entry.data as usize; + ucode_offset -= pci_at_image.base.data.len(); + if ucode_offset < first_fwsec.base.data.len() { + dev_err!(self.base.dev, "Falcon Ucode offset not in second Fwsec.\n"); + return Err(EINVAL); + } + ucode_offset -= first_fwsec.base.data.len(); + self.falcon_ucode_offset = Some(ucode_offset); + } + Err(e) => { + dev_err!( + self.base.dev, + "PmuLookupTableEntry not found, error: {:?}\n", + e + ); + return Err(EINVAL); + } + } + Ok(()) + } + + /// Build the final FwSecBiosImage from this builder + fn build(self) -> Result<FwSecBiosImage> { + let ret = FwSecBiosImage { + base: self.base, + falcon_ucode_offset: self.falcon_ucode_offset.ok_or(EINVAL)?, + }; + + if cfg!(debug_assertions) { + // Print the desc header for debugging + let desc = ret.header()?; + dev_dbg!(ret.base.dev, "PmuLookupTableEntry desc: {:#?}\n", desc); + } + + Ok(ret) + } +} + +impl FwSecBiosImage { + /// Get the FwSec header ([`FalconUCodeDescV3`]). + pub(crate) fn header(&self) -> Result<&FalconUCodeDescV3> { + // Get the falcon ucode offset that was found in setup_falcon_data. + let falcon_ucode_offset = self.falcon_ucode_offset; + + // Make sure the offset is within the data bounds. + if falcon_ucode_offset + core::mem::size_of::<FalconUCodeDescV3>() > self.base.data.len() { + dev_err!( + self.base.dev, + "fwsec-frts header not contained within BIOS bounds\n" + ); + return Err(ERANGE); + } + + // Read the first 4 bytes to get the version. + let hdr_bytes: [u8; 4] = self.base.data[falcon_ucode_offset..falcon_ucode_offset + 4] + .try_into() + .map_err(|_| EINVAL)?; + let hdr = u32::from_le_bytes(hdr_bytes); + let ver = (hdr & 0xff00) >> 8; + + if ver != 3 { + dev_err!(self.base.dev, "invalid fwsec firmware version: {:?}\n", ver); + return Err(EINVAL); + } + + // Return a reference to the FalconUCodeDescV3 structure. + // + // SAFETY: We have checked that `falcon_ucode_offset + size_of::<FalconUCodeDescV3>` is + // within the bounds of `data`. Also, this data vector is from ROM, and the `data` field + // in `BiosImageBase` is immutable after construction. + Ok(unsafe { + &*(self + .base + .data + .as_ptr() + .add(falcon_ucode_offset) + .cast::<FalconUCodeDescV3>()) + }) + } + + /// Get the ucode data as a byte slice + pub(crate) fn ucode(&self, desc: &FalconUCodeDescV3) -> Result<&[u8]> { + let falcon_ucode_offset = self.falcon_ucode_offset; + + // The ucode data follows the descriptor. + let ucode_data_offset = falcon_ucode_offset + desc.size(); + let size = (desc.imem_load_size + desc.dmem_load_size) as usize; + + // Get the data slice, checking bounds in a single operation. + self.base + .data + .get(ucode_data_offset..ucode_data_offset + size) + .ok_or(ERANGE) + .inspect_err(|_| { + dev_err!( + self.base.dev, + "fwsec ucode data not contained within BIOS bounds\n" + ) + }) + } + + /// Get the signatures as a byte slice + pub(crate) fn sigs(&self, desc: &FalconUCodeDescV3) -> Result<&[Bcrt30Rsa3kSignature]> { + // The signatures data follows the descriptor. + let sigs_data_offset = self.falcon_ucode_offset + core::mem::size_of::<FalconUCodeDescV3>(); + let sigs_size = + desc.signature_count as usize * core::mem::size_of::<Bcrt30Rsa3kSignature>(); + + // Make sure the data is within bounds. + if sigs_data_offset + sigs_size > self.base.data.len() { + dev_err!( + self.base.dev, + "fwsec signatures data not contained within BIOS bounds\n" + ); + return Err(ERANGE); + } + + // SAFETY: we checked that `data + sigs_data_offset + (signature_count * + // sizeof::<Bcrt30Rsa3kSignature>()` is within the bounds of `data`. + Ok(unsafe { + core::slice::from_raw_parts( + self.base + .data + .as_ptr() + .add(sigs_data_offset) + .cast::<Bcrt30Rsa3kSignature>(), + desc.signature_count as usize, + ) + }) + } +} |