From 5bb9ed6cdfeb75883652fd0ed3e3885083a92b4b Mon Sep 17 00:00:00 2001 From: Alice Ryhl Date: Tue, 8 Apr 2025 09:22:38 +0000 Subject: mm: rust: add abstraction for struct mm_struct MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Patch series "Rust support for mm_struct, vm_area_struct, and mmap", v16. This updates the vm_area_struct support to use the approach we discussed at LPC where there are several different Rust wrappers for vm_area_struct depending on the kind of access you have to the vma. Each case allows a different set of operations on the vma. This includes an MM MAINTAINERS entry as proposed by Lorenzo: https://lore.kernel.org/all/33e64b12-aa07-4e78-933a-b07c37ff1d84@lucifer.local/ This patch (of 9): These abstractions allow you to reference a `struct mm_struct` using both mmgrab and mmget refcounts. This is done using two Rust types: * Mm - represents an mm_struct where you don't know anything about the value of mm_users. * MmWithUser - represents an mm_struct where you know at compile time that mm_users is non-zero. This allows us to encode in the type system whether a method requires that mm_users is non-zero or not. For instance, you can always call `mmget_not_zero` but you can only call `mmap_read_lock` when mm_users is non-zero. The struct is called Mm to keep consistency with the C side. The ability to obtain `current->mm` is added later in this series. The mm module is defined to only exist when CONFIG_MMU is set. This avoids various errors due to missing types and functions when CONFIG_MMU is disabled. More fine-grained cfgs can be considered in the future. See the thread at [1] for more info. Link: https://lkml.kernel.org/r/20250408-vma-v16-9-d8b446e885d9@google.com Link: https://lkml.kernel.org/r/20250408-vma-v16-1-d8b446e885d9@google.com Link: https://lore.kernel.org/all/202503091916.QousmtcY-lkp@intel.com/ Signed-off-by: Alice Ryhl Acked-by: Lorenzo Stoakes Acked-by: Liam R. Howlett Acked-by: Balbir Singh Reviewed-by: Andreas Hindborg Reviewed-by: Gary Guo Cc: Alex Gaynor Cc: Arnd Bergmann Cc: Benno Lossin Cc: Björn Roy Baron Cc: Boqun Feng Cc: Greg Kroah-Hartman Cc: Jann Horn Cc: John Hubbard Cc: Matthew Wilcox (Oracle) Cc: Miguel Ojeda Cc: Suren Baghdasaryan Cc: Trevor Gross Cc: Vlastimil Babka Signed-off-by: Andrew Morton --- rust/helpers/mm.c | 39 +++++++++++++++++++++++++++++++++++++++ 1 file changed, 39 insertions(+) create mode 100644 rust/helpers/mm.c (limited to 'rust/helpers/mm.c') diff --git a/rust/helpers/mm.c b/rust/helpers/mm.c new file mode 100644 index 000000000000..7201747a5d31 --- /dev/null +++ b/rust/helpers/mm.c @@ -0,0 +1,39 @@ +// SPDX-License-Identifier: GPL-2.0 + +#include +#include + +void rust_helper_mmgrab(struct mm_struct *mm) +{ + mmgrab(mm); +} + +void rust_helper_mmdrop(struct mm_struct *mm) +{ + mmdrop(mm); +} + +void rust_helper_mmget(struct mm_struct *mm) +{ + mmget(mm); +} + +bool rust_helper_mmget_not_zero(struct mm_struct *mm) +{ + return mmget_not_zero(mm); +} + +void rust_helper_mmap_read_lock(struct mm_struct *mm) +{ + mmap_read_lock(mm); +} + +bool rust_helper_mmap_read_trylock(struct mm_struct *mm) +{ + return mmap_read_trylock(mm); +} + +void rust_helper_mmap_read_unlock(struct mm_struct *mm) +{ + mmap_read_unlock(mm); +} -- cgit v1.2.3 From 040f404b731207935ed644b14bcc2bb8b8488d00 Mon Sep 17 00:00:00 2001 From: Alice Ryhl Date: Tue, 8 Apr 2025 09:22:39 +0000 Subject: mm: rust: add vm_area_struct methods that require read access MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This adds a type called VmaRef which is used when referencing a vma that you have read access to. Here, read access means that you hold either the mmap read lock or the vma read lock (or stronger). Additionally, a vma_lookup method is added to the mmap read guard, which enables you to obtain a &VmaRef in safe Rust code. This patch only provides a way to lock the mmap read lock, but a follow-up patch also provides a way to just lock the vma read lock. Link: https://lkml.kernel.org/r/20250408-vma-v16-2-d8b446e885d9@google.com Signed-off-by: Alice Ryhl Acked-by: Lorenzo Stoakes Acked-by: Liam R. Howlett Reviewed-by: Jann Horn Reviewed-by: Andreas Hindborg Reviewed-by: Gary Guo Cc: Alex Gaynor Cc: Arnd Bergmann Cc: Balbir Singh Cc: Benno Lossin Cc: Björn Roy Baron Cc: Boqun Feng Cc: Greg Kroah-Hartman Cc: John Hubbard Cc: Matthew Wilcox (Oracle) Cc: Miguel Ojeda Cc: Suren Baghdasaryan Cc: Trevor Gross Cc: Vlastimil Babka Signed-off-by: Andrew Morton --- rust/helpers/mm.c | 6 ++ rust/kernel/mm.rs | 23 ++++++ rust/kernel/mm/virt.rs | 210 +++++++++++++++++++++++++++++++++++++++++++++++++ 3 files changed, 239 insertions(+) create mode 100644 rust/kernel/mm/virt.rs (limited to 'rust/helpers/mm.c') diff --git a/rust/helpers/mm.c b/rust/helpers/mm.c index 7201747a5d31..7b72eb065a3e 100644 --- a/rust/helpers/mm.c +++ b/rust/helpers/mm.c @@ -37,3 +37,9 @@ void rust_helper_mmap_read_unlock(struct mm_struct *mm) { mmap_read_unlock(mm); } + +struct vm_area_struct *rust_helper_vma_lookup(struct mm_struct *mm, + unsigned long addr) +{ + return vma_lookup(mm, addr); +} diff --git a/rust/kernel/mm.rs b/rust/kernel/mm.rs index eda7a479cff7..f1689ccb3740 100644 --- a/rust/kernel/mm.rs +++ b/rust/kernel/mm.rs @@ -18,6 +18,8 @@ use crate::{ }; use core::{ops::Deref, ptr::NonNull}; +pub mod virt; + /// A wrapper for the kernel's `struct mm_struct`. /// /// This represents the address space of a userspace process, so each process has one `Mm` @@ -201,6 +203,27 @@ pub struct MmapReadGuard<'a> { _nts: NotThreadSafe, } +impl<'a> MmapReadGuard<'a> { + /// Look up a vma at the given address. + #[inline] + pub fn vma_lookup(&self, vma_addr: usize) -> Option<&virt::VmaRef> { + // SAFETY: By the type invariants we hold the mmap read guard, so we can safely call this + // method. Any value is okay for `vma_addr`. + let vma = unsafe { bindings::vma_lookup(self.mm.as_raw(), vma_addr) }; + + if vma.is_null() { + None + } else { + // SAFETY: We just checked that a vma was found, so the pointer references a valid vma. + // + // Furthermore, the returned vma is still under the protection of the read lock guard + // and can be used while the mmap read lock is still held. That the vma is not used + // after the MmapReadGuard gets dropped is enforced by the borrow-checker. + unsafe { Some(virt::VmaRef::from_raw(vma)) } + } + } +} + impl Drop for MmapReadGuard<'_> { #[inline] fn drop(&mut self) { diff --git a/rust/kernel/mm/virt.rs b/rust/kernel/mm/virt.rs new file mode 100644 index 000000000000..a66be649f0b8 --- /dev/null +++ b/rust/kernel/mm/virt.rs @@ -0,0 +1,210 @@ +// SPDX-License-Identifier: GPL-2.0 + +// Copyright (C) 2024 Google LLC. + +//! Virtual memory. +//! +//! This module deals with managing a single VMA in the address space of a userspace process. Each +//! VMA corresponds to a region of memory that the userspace process can access, and the VMA lets +//! you control what happens when userspace reads or writes to that region of memory. +//! +//! The module has several different Rust types that all correspond to the C type called +//! `vm_area_struct`. The different structs represent what kind of access you have to the VMA, e.g. +//! [`VmaRef`] is used when you hold the mmap or vma read lock. Using the appropriate struct +//! ensures that you can't, for example, accidentally call a function that requires holding the +//! write lock when you only hold the read lock. + +use crate::{bindings, mm::MmWithUser, types::Opaque}; + +/// A wrapper for the kernel's `struct vm_area_struct` with read access. +/// +/// It represents an area of virtual memory. +/// +/// # Invariants +/// +/// The caller must hold the mmap read lock or the vma read lock. +#[repr(transparent)] +pub struct VmaRef { + vma: Opaque, +} + +// Methods you can call when holding the mmap or vma read lock (or stronger). They must be usable +// no matter what the vma flags are. +impl VmaRef { + /// Access a virtual memory area given a raw pointer. + /// + /// # Safety + /// + /// Callers must ensure that `vma` is valid for the duration of 'a, and that the mmap or vma + /// read lock (or stronger) is held for at least the duration of 'a. + #[inline] + pub unsafe fn from_raw<'a>(vma: *const bindings::vm_area_struct) -> &'a Self { + // SAFETY: The caller ensures that the invariants are satisfied for the duration of 'a. + unsafe { &*vma.cast() } + } + + /// Returns a raw pointer to this area. + #[inline] + pub fn as_ptr(&self) -> *mut bindings::vm_area_struct { + self.vma.get() + } + + /// Access the underlying `mm_struct`. + #[inline] + pub fn mm(&self) -> &MmWithUser { + // SAFETY: By the type invariants, this `vm_area_struct` is valid and we hold the mmap/vma + // read lock or stronger. This implies that the underlying mm has a non-zero value of + // `mm_users`. + unsafe { MmWithUser::from_raw((*self.as_ptr()).vm_mm) } + } + + /// Returns the flags associated with the virtual memory area. + /// + /// The possible flags are a combination of the constants in [`flags`]. + #[inline] + pub fn flags(&self) -> vm_flags_t { + // SAFETY: By the type invariants, the caller holds at least the mmap read lock, so this + // access is not a data race. + unsafe { (*self.as_ptr()).__bindgen_anon_2.vm_flags } + } + + /// Returns the (inclusive) start address of the virtual memory area. + #[inline] + pub fn start(&self) -> usize { + // SAFETY: By the type invariants, the caller holds at least the mmap read lock, so this + // access is not a data race. + unsafe { (*self.as_ptr()).__bindgen_anon_1.__bindgen_anon_1.vm_start } + } + + /// Returns the (exclusive) end address of the virtual memory area. + #[inline] + pub fn end(&self) -> usize { + // SAFETY: By the type invariants, the caller holds at least the mmap read lock, so this + // access is not a data race. + unsafe { (*self.as_ptr()).__bindgen_anon_1.__bindgen_anon_1.vm_end } + } + + /// Zap pages in the given page range. + /// + /// This clears page table mappings for the range at the leaf level, leaving all other page + /// tables intact, and freeing any memory referenced by the VMA in this range. That is, + /// anonymous memory is completely freed, file-backed memory has its reference count on page + /// cache folio's dropped, any dirty data will still be written back to disk as usual. + /// + /// It may seem odd that we clear at the leaf level, this is however a product of the page + /// table structure used to map physical memory into a virtual address space - each virtual + /// address actually consists of a bitmap of array indices into page tables, which form a + /// hierarchical page table level structure. + /// + /// As a result, each page table level maps a multiple of page table levels below, and thus + /// span ever increasing ranges of pages. At the leaf or PTE level, we map the actual physical + /// memory. + /// + /// It is here where a zap operates, as it the only place we can be certain of clearing without + /// impacting any other virtual mappings. It is an implementation detail as to whether the + /// kernel goes further in freeing unused page tables, but for the purposes of this operation + /// we must only assume that the leaf level is cleared. + #[inline] + pub fn zap_page_range_single(&self, address: usize, size: usize) { + let (end, did_overflow) = address.overflowing_add(size); + if did_overflow || address < self.start() || self.end() < end { + // TODO: call WARN_ONCE once Rust version of it is added + return; + } + + // SAFETY: By the type invariants, the caller has read access to this VMA, which is + // sufficient for this method call. This method has no requirements on the vma flags. The + // address range is checked to be within the vma. + unsafe { + bindings::zap_page_range_single(self.as_ptr(), address, size, core::ptr::null_mut()) + }; + } +} + +/// The integer type used for vma flags. +#[doc(inline)] +pub use bindings::vm_flags_t; + +/// All possible flags for [`VmaRef`]. +pub mod flags { + use super::vm_flags_t; + use crate::bindings; + + /// No flags are set. + pub const NONE: vm_flags_t = bindings::VM_NONE as _; + + /// Mapping allows reads. + pub const READ: vm_flags_t = bindings::VM_READ as _; + + /// Mapping allows writes. + pub const WRITE: vm_flags_t = bindings::VM_WRITE as _; + + /// Mapping allows execution. + pub const EXEC: vm_flags_t = bindings::VM_EXEC as _; + + /// Mapping is shared. + pub const SHARED: vm_flags_t = bindings::VM_SHARED as _; + + /// Mapping may be updated to allow reads. + pub const MAYREAD: vm_flags_t = bindings::VM_MAYREAD as _; + + /// Mapping may be updated to allow writes. + pub const MAYWRITE: vm_flags_t = bindings::VM_MAYWRITE as _; + + /// Mapping may be updated to allow execution. + pub const MAYEXEC: vm_flags_t = bindings::VM_MAYEXEC as _; + + /// Mapping may be updated to be shared. + pub const MAYSHARE: vm_flags_t = bindings::VM_MAYSHARE as _; + + /// Page-ranges managed without `struct page`, just pure PFN. + pub const PFNMAP: vm_flags_t = bindings::VM_PFNMAP as _; + + /// Memory mapped I/O or similar. + pub const IO: vm_flags_t = bindings::VM_IO as _; + + /// Do not copy this vma on fork. + pub const DONTCOPY: vm_flags_t = bindings::VM_DONTCOPY as _; + + /// Cannot expand with mremap(). + pub const DONTEXPAND: vm_flags_t = bindings::VM_DONTEXPAND as _; + + /// Lock the pages covered when they are faulted in. + pub const LOCKONFAULT: vm_flags_t = bindings::VM_LOCKONFAULT as _; + + /// Is a VM accounted object. + pub const ACCOUNT: vm_flags_t = bindings::VM_ACCOUNT as _; + + /// Should the VM suppress accounting. + pub const NORESERVE: vm_flags_t = bindings::VM_NORESERVE as _; + + /// Huge TLB Page VM. + pub const HUGETLB: vm_flags_t = bindings::VM_HUGETLB as _; + + /// Synchronous page faults. (DAX-specific) + pub const SYNC: vm_flags_t = bindings::VM_SYNC as _; + + /// Architecture-specific flag. + pub const ARCH_1: vm_flags_t = bindings::VM_ARCH_1 as _; + + /// Wipe VMA contents in child on fork. + pub const WIPEONFORK: vm_flags_t = bindings::VM_WIPEONFORK as _; + + /// Do not include in the core dump. + pub const DONTDUMP: vm_flags_t = bindings::VM_DONTDUMP as _; + + /// Not soft dirty clean area. + pub const SOFTDIRTY: vm_flags_t = bindings::VM_SOFTDIRTY as _; + + /// Can contain `struct page` and pure PFN pages. + pub const MIXEDMAP: vm_flags_t = bindings::VM_MIXEDMAP as _; + + /// MADV_HUGEPAGE marked this vma. + pub const HUGEPAGE: vm_flags_t = bindings::VM_HUGEPAGE as _; + + /// MADV_NOHUGEPAGE marked this vma. + pub const NOHUGEPAGE: vm_flags_t = bindings::VM_NOHUGEPAGE as _; + + /// KSM may merge identical pages. + pub const MERGEABLE: vm_flags_t = bindings::VM_MERGEABLE as _; +} -- cgit v1.2.3 From 3105f8f391ce00153c553bfe89efbb30b120d858 Mon Sep 17 00:00:00 2001 From: Alice Ryhl Date: Tue, 8 Apr 2025 09:22:41 +0000 Subject: mm: rust: add lock_vma_under_rcu MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Currently, the binder driver always uses the mmap lock to make changes to its vma. Because the mmap lock is global to the process, this can involve significant contention. However, the kernel has a feature called per-vma locks, which can significantly reduce contention. For example, you can take a vma lock in parallel with an mmap write lock. This is important because contention on the mmap lock has been a long-term recurring challenge for the Binder driver. This patch introduces support for using `lock_vma_under_rcu` from Rust. The Rust Binder driver will be able to use this to reduce contention on the mmap lock. Link: https://lkml.kernel.org/r/20250408-vma-v16-4-d8b446e885d9@google.com Signed-off-by: Alice Ryhl Acked-by: Lorenzo Stoakes Acked-by: Liam R. Howlett Reviewed-by: Jann Horn Reviewed-by: Andreas Hindborg Reviewed-by: Gary Guo Cc: Alex Gaynor Cc: Arnd Bergmann Cc: Balbir Singh Cc: Benno Lossin Cc: Björn Roy Baron Cc: Boqun Feng Cc: Greg Kroah-Hartman Cc: John Hubbard Cc: Matthew Wilcox (Oracle) Cc: Miguel Ojeda Cc: Suren Baghdasaryan Cc: Trevor Gross Cc: Vlastimil Babka Signed-off-by: Andrew Morton --- rust/helpers/mm.c | 5 +++++ rust/kernel/mm.rs | 60 +++++++++++++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 65 insertions(+) (limited to 'rust/helpers/mm.c') diff --git a/rust/helpers/mm.c b/rust/helpers/mm.c index 7b72eb065a3e..81b510c96fd2 100644 --- a/rust/helpers/mm.c +++ b/rust/helpers/mm.c @@ -43,3 +43,8 @@ struct vm_area_struct *rust_helper_vma_lookup(struct mm_struct *mm, { return vma_lookup(mm, addr); } + +void rust_helper_vma_end_read(struct vm_area_struct *vma) +{ + vma_end_read(vma); +} diff --git a/rust/kernel/mm.rs b/rust/kernel/mm.rs index f1689ccb3740..c160fb52603f 100644 --- a/rust/kernel/mm.rs +++ b/rust/kernel/mm.rs @@ -19,6 +19,7 @@ use crate::{ use core::{ops::Deref, ptr::NonNull}; pub mod virt; +use virt::VmaRef; /// A wrapper for the kernel's `struct mm_struct`. /// @@ -161,6 +162,36 @@ impl MmWithUser { unsafe { &*ptr.cast() } } + /// Attempt to access a vma using the vma read lock. + /// + /// This is an optimistic trylock operation, so it may fail if there is contention. In that + /// case, you should fall back to taking the mmap read lock. + /// + /// When per-vma locks are disabled, this always returns `None`. + #[inline] + pub fn lock_vma_under_rcu(&self, vma_addr: usize) -> Option> { + #[cfg(CONFIG_PER_VMA_LOCK)] + { + // SAFETY: Calling `bindings::lock_vma_under_rcu` is always okay given an mm where + // `mm_users` is non-zero. + let vma = unsafe { bindings::lock_vma_under_rcu(self.as_raw(), vma_addr) }; + if !vma.is_null() { + return Some(VmaReadGuard { + // SAFETY: If `lock_vma_under_rcu` returns a non-null ptr, then it points at a + // valid vma. The vma is stable for as long as the vma read lock is held. + vma: unsafe { VmaRef::from_raw(vma) }, + _nts: NotThreadSafe, + }); + } + } + + // Silence warnings about unused variables. + #[cfg(not(CONFIG_PER_VMA_LOCK))] + let _ = vma_addr; + + None + } + /// Lock the mmap read lock. #[inline] pub fn mmap_read_lock(&self) -> MmapReadGuard<'_> { @@ -231,3 +262,32 @@ impl Drop for MmapReadGuard<'_> { unsafe { bindings::mmap_read_unlock(self.mm.as_raw()) }; } } + +/// A guard for the vma read lock. +/// +/// # Invariants +/// +/// This `VmaReadGuard` guard owns the vma read lock. +pub struct VmaReadGuard<'a> { + vma: &'a VmaRef, + // `vma_end_read` must be called on the same thread as where the lock was taken + _nts: NotThreadSafe, +} + +// Make all `VmaRef` methods available on `VmaReadGuard`. +impl Deref for VmaReadGuard<'_> { + type Target = VmaRef; + + #[inline] + fn deref(&self) -> &VmaRef { + self.vma + } +} + +impl Drop for VmaReadGuard<'_> { + #[inline] + fn drop(&mut self) { + // SAFETY: We hold the read lock by the type invariants. + unsafe { bindings::vma_end_read(self.vma.as_ptr()) }; + } +} -- cgit v1.2.3