From 68aa2fdbf57f769e552f472ddb762aba028a207e Mon Sep 17 00:00:00 2001 From: Lorenzo Stoakes Date: Mon, 10 Nov 2025 22:21:20 +0000 Subject: mm: introduce leaf entry type and use to simplify leaf entry logic The kernel maintains leaf page table entries which contain either: The kernel maintains leaf page table entries which contain either: - Nothing ('none' entries) - Present entries* - Everything else that will cause a fault which the kernel handles * Present entries are either entries the hardware can navigate without page fault or special cases like NUMA hint protnone or PMD with cleared present bit which contain hardware-valid entries modulo the present bit. In the 'everything else' group we include swap entries, but we also include a number of other things such as migration entries, device private entries and marker entries. Unfortunately this 'everything else' group expresses everything through a swp_entry_t type, and these entries are referred to swap entries even though they may well not contain a... swap entry. This is compounded by the rather mind-boggling concept of a non-swap swap entry (checked via non_swap_entry()) and the means by which we twist and turn to satisfy this. This patch lays the foundation for reducing this confusion. We refer to 'everything else' as a 'software-define leaf entry' or 'softleaf'. for short And in fact we scoop up the 'none' entries into this concept also so we are left with: - Present entries. - Softleaf entries (which may be empty). This allows for radical simplification across the board - one can simply convert any leaf page table entry to a leaf entry via softleaf_from_pte(). If the entry is present, we return an empty leaf entry, so it is assumed the caller is aware that they must differentiate between the two categories of page table entries, checking for the former via pte_present(). As a result, we can eliminate a number of places where we would otherwise need to use predicates to see if we can proceed with leaf page table entry conversion and instead just go ahead and do it unconditionally. We do so where we can, adjusting surrounding logic as necessary to integrate the new softleaf_t logic as far as seems reasonable at this stage. We typedef swp_entry_t to softleaf_t for the time being until the conversion can be complete, meaning everything remains compatible regardless of which type is used. We will eventually remove swp_entry_t when the conversion is complete. We introduce a new header file to keep things clear - leafops.h - this imports swapops.h so can direct replace swapops imports without issue, and we do so in all the files that require it. Additionally, add new leafops.h file to core mm maintainers entry. Link: https://lkml.kernel.org/r/c879383aac77d96a03e4d38f7daba893cd35fc76.1762812360.git.lorenzo.stoakes@oracle.com Signed-off-by: Lorenzo Stoakes Acked-by: Zi Yan Reviewed-by: Vlastimil Babka Cc: Alexander Gordeev Cc: Alistair Popple Cc: Al Viro Cc: Arnd Bergmann Cc: Axel Rasmussen Cc: Baolin Wang Cc: Baoquan He Cc: Barry Song Cc: Byungchul Park Cc: Chengming Zhou Cc: Chris Li Cc: Christian Borntraeger Cc: Christian Brauner Cc: Claudio Imbrenda Cc: David Hildenbrand Cc: Dev Jain Cc: Gerald Schaefer Cc: Gregory Price Cc: Heiko Carstens Cc: "Huang, Ying" Cc: Hugh Dickins Cc: Jan Kara Cc: Jann Horn Cc: Janosch Frank Cc: Jason Gunthorpe Cc: Joshua Hahn Cc: Kairui Song Cc: Kemeng Shi Cc: Lance Yang Cc: Leon Romanovsky Cc: Liam Howlett Cc: Mathew Brost Cc: Matthew Wilcox (Oracle) Cc: Miaohe Lin Cc: Michal Hocko Cc: Mike Rapoport Cc: Muchun Song Cc: Naoya Horiguchi Cc: Nhat Pham Cc: Nico Pache Cc: Oscar Salvador Cc: Pasha Tatashin Cc: Peter Xu Cc: Rakie Kim Cc: Rik van Riel Cc: Ryan Roberts Cc: SeongJae Park Cc: Suren Baghdasaryan Cc: Sven Schnelle Cc: Vasily Gorbik Cc: Wei Xu Cc: xu xin Cc: Yuanchu Xie Signed-off-by: Andrew Morton --- fs/proc/task_mmu.c | 26 +++++++++++++------------- fs/userfaultfd.c | 6 +++--- 2 files changed, 16 insertions(+), 16 deletions(-) (limited to 'fs') diff --git a/fs/proc/task_mmu.c b/fs/proc/task_mmu.c index db16ed91c269..5a1e897b0973 100644 --- a/fs/proc/task_mmu.c +++ b/fs/proc/task_mmu.c @@ -14,7 +14,7 @@ #include #include #include -#include +#include #include #include #include @@ -1231,11 +1231,11 @@ static int smaps_hugetlb_range(pte_t *pte, unsigned long hmask, if (pte_present(ptent)) { folio = page_folio(pte_page(ptent)); present = true; - } else if (is_swap_pte(ptent)) { - swp_entry_t swpent = pte_to_swp_entry(ptent); + } else { + const softleaf_t entry = softleaf_from_pte(ptent); - if (is_pfn_swap_entry(swpent)) - folio = pfn_swap_entry_folio(swpent); + if (softleaf_has_pfn(entry)) + folio = softleaf_to_folio(entry); } if (folio) { @@ -1956,9 +1956,9 @@ static pagemap_entry_t pte_to_pagemap_entry(struct pagemapread *pm, flags |= PM_SWAP; if (is_pfn_swap_entry(entry)) page = pfn_swap_entry_to_page(entry); - if (pte_marker_entry_uffd_wp(entry)) + if (softleaf_is_uffd_wp_marker(entry)) flags |= PM_UFFD_WP; - if (is_guard_swp_entry(entry)) + if (softleaf_is_guard_marker(entry)) flags |= PM_GUARD_REGION; } @@ -2331,18 +2331,18 @@ static unsigned long pagemap_page_category(struct pagemap_scan_private *p, if (pte_soft_dirty(pte)) categories |= PAGE_IS_SOFT_DIRTY; } else if (is_swap_pte(pte)) { - swp_entry_t swp; + softleaf_t entry; categories |= PAGE_IS_SWAPPED; if (!pte_swp_uffd_wp_any(pte)) categories |= PAGE_IS_WRITTEN; - swp = pte_to_swp_entry(pte); - if (is_guard_swp_entry(swp)) + entry = softleaf_from_pte(pte); + if (softleaf_is_guard_marker(entry)) categories |= PAGE_IS_GUARD; else if ((p->masks_of_interest & PAGE_IS_FILE) && - is_pfn_swap_entry(swp) && - !folio_test_anon(pfn_swap_entry_folio(swp))) + softleaf_has_pfn(entry) && + !folio_test_anon(softleaf_to_folio(entry))) categories |= PAGE_IS_FILE; if (pte_swp_soft_dirty(pte)) @@ -2467,7 +2467,7 @@ static void make_uffd_wp_huge_pte(struct vm_area_struct *vma, { unsigned long psize; - if (is_hugetlb_entry_hwpoisoned(ptent) || is_pte_marker(ptent)) + if (is_hugetlb_entry_hwpoisoned(ptent) || pte_is_marker(ptent)) return; psize = huge_page_size(hstate_vma(vma)); diff --git a/fs/userfaultfd.c b/fs/userfaultfd.c index 94c4d68f0818..3f539aabc3b3 100644 --- a/fs/userfaultfd.c +++ b/fs/userfaultfd.c @@ -29,7 +29,7 @@ #include #include #include -#include +#include #include #include @@ -251,7 +251,7 @@ static inline bool userfaultfd_huge_must_wait(struct userfaultfd_ctx *ctx, if (huge_pte_none(pte)) return true; /* UFFD PTE markers require userspace to resolve the fault. */ - if (is_uffd_pte_marker(pte)) + if (pte_is_uffd_marker(pte)) return true; /* * If VMA has UFFD WP faults enabled and WP fault, wait for userspace to @@ -337,7 +337,7 @@ again: if (pte_none(ptent)) goto out; /* UFFD PTE markers require userspace to resolve the fault. */ - if (is_uffd_pte_marker(ptent)) + if (pte_is_uffd_marker(ptent)) goto out; /* * If VMA has UFFD WP faults enabled and WP fault, wait for userspace to -- cgit v1.2.3