diff options
Diffstat (limited to 'include/linux/page-flags.h')
-rw-r--r-- | include/linux/page-flags.h | 485 |
1 files changed, 288 insertions, 197 deletions
diff --git a/include/linux/page-flags.h b/include/linux/page-flags.h index a77f3a7d21d1..4e8c6d0511c5 100644 --- a/include/linux/page-flags.h +++ b/include/linux/page-flags.h @@ -30,16 +30,11 @@ * - Pages falling into physical memory gaps - not IORESOURCE_SYSRAM. Trying * to read/write these pages might end badly. Don't touch! * - The zero page(s) - * - Pages not added to the page allocator when onlining a section because - * they were excluded via the online_page_callback() or because they are - * PG_hwpoison. * - Pages allocated in the context of kexec/kdump (loaded kernel image, * control pages, vmcoreinfo) * - MMIO/DMA pages. Some architectures don't allow to ioremap pages that are * not marked PG_reserved (as they might be in use by somebody else who does * not respect the caching strategy). - * - Pages part of an offline section (struct pages of offline sections should - * not be trusted as they will be initialized when first onlined). * - MCA pages on ia64 * - Pages holding CPU notes for POWER Firmware Assisted Dump * - Device memory (e.g. PMEM, DAX, HMM) @@ -71,8 +66,6 @@ * PG_referenced, PG_reclaim are used for page reclaim for anonymous and * file-backed pagecache (see mm/vmscan.c). * - * PG_error is set to indicate that an I/O error occurred on this page. - * * PG_arch_1 is an architecture specific page state bit. The generic code * guarantees that this bit is cleared for a page when it first is entered into * the page cache. @@ -108,23 +101,18 @@ enum pageflags { PG_waiters, /* Page has waiters, check its waitqueue. Must be bit #7 and in the same byte as "PG_locked" */ PG_active, PG_workingset, - PG_error, - PG_slab, - PG_owner_priv_1, /* Owner use. If pagecache, fs may use*/ + PG_owner_priv_1, /* Owner use. If pagecache, fs may use */ + PG_owner_2, /* Owner use. If pagecache, fs may use */ PG_arch_1, PG_reserved, PG_private, /* If pagecache, has fs-private data */ PG_private_2, /* If pagecache, has fs aux data */ - PG_mappedtodisk, /* Has blocks allocated on-disk */ PG_reclaim, /* To be reclaimed asap */ PG_swapbacked, /* Page is backed by RAM/swap */ PG_unevictable, /* Page is "unevictable" */ #ifdef CONFIG_MMU PG_mlocked, /* Page is vma mlocked */ #endif -#ifdef CONFIG_ARCH_USES_PG_UNCACHED - PG_uncached, /* Page has been mapped as uncached */ -#endif #ifdef CONFIG_MEMORY_FAILURE PG_hwpoison, /* hardware poisoned page. Don't touch */ #endif @@ -132,14 +120,21 @@ enum pageflags { PG_young, PG_idle, #endif -#ifdef CONFIG_ARCH_USES_PG_ARCH_X +#ifdef CONFIG_ARCH_USES_PG_ARCH_2 PG_arch_2, +#endif +#ifdef CONFIG_ARCH_USES_PG_ARCH_3 PG_arch_3, #endif __NR_PAGEFLAGS, PG_readahead = PG_reclaim, + /* Anonymous memory (and shmem) */ + PG_swapcache = PG_owner_priv_1, /* Swap page: swp_entry_t in private */ + /* Some filesystems */ + PG_checked = PG_owner_priv_1, + /* * Depending on the way an anonymous folio can be mapped into a page * table (e.g., single PMD/PUD/CONT of the head page vs. PTE-mapped @@ -147,13 +142,13 @@ enum pageflags { * tail pages of an anonymous folio. For now, we only expect it to be * set on tail pages for PTE-mapped THP. */ - PG_anon_exclusive = PG_mappedtodisk, + PG_anon_exclusive = PG_owner_2, - /* Filesystems */ - PG_checked = PG_owner_priv_1, - - /* SwapBacked */ - PG_swapcache = PG_owner_priv_1, /* Swap page: swp_entry_t in private */ + /* + * Set if all buffer heads in the folio are mapped. + * Filesystems which do not use BHs can use it for their own purpose. + */ + PG_mappedtodisk = PG_owner_2, /* Two page bits are conscripted by FS-Cache to maintain local caching * state. These bits are set on pages belonging to the netfs's inodes @@ -189,8 +184,9 @@ enum pageflags { */ /* At least one page in this folio has the hwpoison flag set */ - PG_has_hwpoisoned = PG_error, + PG_has_hwpoisoned = PG_active, PG_large_rmappable = PG_workingset, /* anon or file-backed */ + PG_partially_mapped = PG_reclaim, /* was identified to be partially mapped */ }; #define PAGEFLAGS_MASK ((1UL << NR_PAGEFLAGS) - 1) @@ -236,12 +232,12 @@ static inline const struct page *page_fixed_fake_head(const struct page *page) } #endif -static __always_inline int page_is_fake_head(struct page *page) +static __always_inline int page_is_fake_head(const struct page *page) { return page_fixed_fake_head(page) != page; } -static inline unsigned long _compound_head(const struct page *page) +static __always_inline unsigned long _compound_head(const struct page *page) { unsigned long head = READ_ONCE(page->compound_head); @@ -280,12 +276,12 @@ static inline unsigned long _compound_head(const struct page *page) */ #define folio_page(folio, n) nth_page(&(folio)->page, n) -static __always_inline int PageTail(struct page *page) +static __always_inline int PageTail(const struct page *page) { return READ_ONCE(page->compound_head) & 1 || page_is_fake_head(page); } -static __always_inline int PageCompound(struct page *page) +static __always_inline int PageCompound(const struct page *page) { return test_bit(PG_head, &page->flags) || READ_ONCE(page->compound_head) & 1; @@ -305,11 +301,21 @@ static inline void page_init_poison(struct page *page, size_t size) } #endif +static const unsigned long *const_folio_flags(const struct folio *folio, + unsigned n) +{ + const struct page *page = &folio->page; + + VM_BUG_ON_PGFLAGS(page->compound_head & 1, page); + VM_BUG_ON_PGFLAGS(n > 0 && !test_bit(PG_head, &page->flags), page); + return &page[n].flags; +} + static unsigned long *folio_flags(struct folio *folio, unsigned n) { struct page *page = &folio->page; - VM_BUG_ON_PGFLAGS(PageTail(page), page); + VM_BUG_ON_PGFLAGS(page->compound_head & 1, page); VM_BUG_ON_PGFLAGS(n > 0 && !test_bit(PG_head, &page->flags), page); return &page[n].flags; } @@ -327,9 +333,6 @@ static unsigned long *folio_flags(struct folio *folio, unsigned n) * for compound page all operations related to the page flag applied to * head page. * - * PF_ONLY_HEAD: - * for compound page, callers only ever operate on the head page. - * * PF_NO_TAIL: * modifications of the page flag must be done on small or head pages, * checks can be done on tail pages too. @@ -345,9 +348,6 @@ static unsigned long *folio_flags(struct folio *folio, unsigned n) page; }) #define PF_ANY(page, enforce) PF_POISONED_CHECK(page) #define PF_HEAD(page, enforce) PF_POISONED_CHECK(compound_head(page)) -#define PF_ONLY_HEAD(page, enforce) ({ \ - VM_BUG_ON_PGFLAGS(PageTail(page), page); \ - PF_POISONED_CHECK(page); }) #define PF_NO_TAIL(page, enforce) ({ \ VM_BUG_ON_PGFLAGS(enforce && PageTail(page), page); \ PF_POISONED_CHECK(compound_head(page)); }) @@ -361,59 +361,81 @@ static unsigned long *folio_flags(struct folio *folio, unsigned n) /* Which page is the flag stored in */ #define FOLIO_PF_ANY 0 #define FOLIO_PF_HEAD 0 -#define FOLIO_PF_ONLY_HEAD 0 #define FOLIO_PF_NO_TAIL 0 #define FOLIO_PF_NO_COMPOUND 0 #define FOLIO_PF_SECOND 1 +#define FOLIO_HEAD_PAGE 0 +#define FOLIO_SECOND_PAGE 1 + /* * Macros to create function definitions for page flags */ +#define FOLIO_TEST_FLAG(name, page) \ +static __always_inline bool folio_test_##name(const struct folio *folio) \ +{ return test_bit(PG_##name, const_folio_flags(folio, page)); } + +#define FOLIO_SET_FLAG(name, page) \ +static __always_inline void folio_set_##name(struct folio *folio) \ +{ set_bit(PG_##name, folio_flags(folio, page)); } + +#define FOLIO_CLEAR_FLAG(name, page) \ +static __always_inline void folio_clear_##name(struct folio *folio) \ +{ clear_bit(PG_##name, folio_flags(folio, page)); } + +#define __FOLIO_SET_FLAG(name, page) \ +static __always_inline void __folio_set_##name(struct folio *folio) \ +{ __set_bit(PG_##name, folio_flags(folio, page)); } + +#define __FOLIO_CLEAR_FLAG(name, page) \ +static __always_inline void __folio_clear_##name(struct folio *folio) \ +{ __clear_bit(PG_##name, folio_flags(folio, page)); } + +#define FOLIO_TEST_SET_FLAG(name, page) \ +static __always_inline bool folio_test_set_##name(struct folio *folio) \ +{ return test_and_set_bit(PG_##name, folio_flags(folio, page)); } + +#define FOLIO_TEST_CLEAR_FLAG(name, page) \ +static __always_inline bool folio_test_clear_##name(struct folio *folio) \ +{ return test_and_clear_bit(PG_##name, folio_flags(folio, page)); } + +#define FOLIO_FLAG(name, page) \ +FOLIO_TEST_FLAG(name, page) \ +FOLIO_SET_FLAG(name, page) \ +FOLIO_CLEAR_FLAG(name, page) + #define TESTPAGEFLAG(uname, lname, policy) \ -static __always_inline bool folio_test_##lname(struct folio *folio) \ -{ return test_bit(PG_##lname, folio_flags(folio, FOLIO_##policy)); } \ -static __always_inline int Page##uname(struct page *page) \ +FOLIO_TEST_FLAG(lname, FOLIO_##policy) \ +static __always_inline int Page##uname(const struct page *page) \ { return test_bit(PG_##lname, &policy(page, 0)->flags); } #define SETPAGEFLAG(uname, lname, policy) \ -static __always_inline \ -void folio_set_##lname(struct folio *folio) \ -{ set_bit(PG_##lname, folio_flags(folio, FOLIO_##policy)); } \ +FOLIO_SET_FLAG(lname, FOLIO_##policy) \ static __always_inline void SetPage##uname(struct page *page) \ { set_bit(PG_##lname, &policy(page, 1)->flags); } #define CLEARPAGEFLAG(uname, lname, policy) \ -static __always_inline \ -void folio_clear_##lname(struct folio *folio) \ -{ clear_bit(PG_##lname, folio_flags(folio, FOLIO_##policy)); } \ +FOLIO_CLEAR_FLAG(lname, FOLIO_##policy) \ static __always_inline void ClearPage##uname(struct page *page) \ { clear_bit(PG_##lname, &policy(page, 1)->flags); } #define __SETPAGEFLAG(uname, lname, policy) \ -static __always_inline \ -void __folio_set_##lname(struct folio *folio) \ -{ __set_bit(PG_##lname, folio_flags(folio, FOLIO_##policy)); } \ +__FOLIO_SET_FLAG(lname, FOLIO_##policy) \ static __always_inline void __SetPage##uname(struct page *page) \ { __set_bit(PG_##lname, &policy(page, 1)->flags); } #define __CLEARPAGEFLAG(uname, lname, policy) \ -static __always_inline \ -void __folio_clear_##lname(struct folio *folio) \ -{ __clear_bit(PG_##lname, folio_flags(folio, FOLIO_##policy)); } \ +__FOLIO_CLEAR_FLAG(lname, FOLIO_##policy) \ static __always_inline void __ClearPage##uname(struct page *page) \ { __clear_bit(PG_##lname, &policy(page, 1)->flags); } #define TESTSETFLAG(uname, lname, policy) \ -static __always_inline \ -bool folio_test_set_##lname(struct folio *folio) \ -{ return test_and_set_bit(PG_##lname, folio_flags(folio, FOLIO_##policy)); } \ +FOLIO_TEST_SET_FLAG(lname, FOLIO_##policy) \ static __always_inline int TestSetPage##uname(struct page *page) \ { return test_and_set_bit(PG_##lname, &policy(page, 1)->flags); } #define TESTCLEARFLAG(uname, lname, policy) \ -static __always_inline \ -bool folio_test_clear_##lname(struct folio *folio) \ -{ return test_and_clear_bit(PG_##lname, folio_flags(folio, FOLIO_##policy)); } \ +FOLIO_TEST_CLEAR_FLAG(lname, FOLIO_##policy) \ static __always_inline int TestClearPage##uname(struct page *page) \ { return test_and_clear_bit(PG_##lname, &policy(page, 1)->flags); } @@ -485,20 +507,19 @@ static inline int TestClearPage##uname(struct page *page) { return 0; } TESTSETFLAG_FALSE(uname, lname) TESTCLEARFLAG_FALSE(uname, lname) __PAGEFLAG(Locked, locked, PF_NO_TAIL) -PAGEFLAG(Waiters, waiters, PF_ONLY_HEAD) -PAGEFLAG(Error, error, PF_NO_TAIL) TESTCLEARFLAG(Error, error, PF_NO_TAIL) -PAGEFLAG(Referenced, referenced, PF_HEAD) - TESTCLEARFLAG(Referenced, referenced, PF_HEAD) - __SETPAGEFLAG(Referenced, referenced, PF_HEAD) +FOLIO_FLAG(waiters, FOLIO_HEAD_PAGE) +FOLIO_FLAG(referenced, FOLIO_HEAD_PAGE) + FOLIO_TEST_CLEAR_FLAG(referenced, FOLIO_HEAD_PAGE) + __FOLIO_SET_FLAG(referenced, FOLIO_HEAD_PAGE) PAGEFLAG(Dirty, dirty, PF_HEAD) TESTSCFLAG(Dirty, dirty, PF_HEAD) __CLEARPAGEFLAG(Dirty, dirty, PF_HEAD) PAGEFLAG(LRU, lru, PF_HEAD) __CLEARPAGEFLAG(LRU, lru, PF_HEAD) TESTCLEARFLAG(LRU, lru, PF_HEAD) -PAGEFLAG(Active, active, PF_HEAD) __CLEARPAGEFLAG(Active, active, PF_HEAD) - TESTCLEARFLAG(Active, active, PF_HEAD) +FOLIO_FLAG(active, FOLIO_HEAD_PAGE) + __FOLIO_CLEAR_FLAG(active, FOLIO_HEAD_PAGE) + FOLIO_TEST_CLEAR_FLAG(active, FOLIO_HEAD_PAGE) PAGEFLAG(Workingset, workingset, PF_HEAD) TESTCLEARFLAG(Workingset, workingset, PF_HEAD) -__PAGEFLAG(Slab, slab, PF_NO_TAIL) PAGEFLAG(Checked, checked, PF_NO_COMPOUND) /* Used by some filesystems */ /* Xen */ @@ -512,9 +533,9 @@ PAGEFLAG(XenRemapped, xen_remapped, PF_NO_COMPOUND) PAGEFLAG(Reserved, reserved, PF_NO_COMPOUND) __CLEARPAGEFLAG(Reserved, reserved, PF_NO_COMPOUND) __SETPAGEFLAG(Reserved, reserved, PF_NO_COMPOUND) -PAGEFLAG(SwapBacked, swapbacked, PF_NO_TAIL) - __CLEARPAGEFLAG(SwapBacked, swapbacked, PF_NO_TAIL) - __SETPAGEFLAG(SwapBacked, swapbacked, PF_NO_TAIL) +FOLIO_FLAG(swapbacked, FOLIO_HEAD_PAGE) + __FOLIO_CLEAR_FLAG(swapbacked, FOLIO_HEAD_PAGE) + __FOLIO_SET_FLAG(swapbacked, FOLIO_HEAD_PAGE) /* * Private page markings that may be used by the filesystem that owns the page @@ -523,8 +544,9 @@ PAGEFLAG(SwapBacked, swapbacked, PF_NO_TAIL) */ PAGEFLAG(Private, private, PF_ANY) PAGEFLAG(Private2, private_2, PF_ANY) TESTSCFLAG(Private2, private_2, PF_ANY) -PAGEFLAG(OwnerPriv1, owner_priv_1, PF_ANY) - TESTCLEARFLAG(OwnerPriv1, owner_priv_1, PF_ANY) + +/* owner_2 can be set on tail pages for anon memory */ +FOLIO_FLAG(owner_2, FOLIO_HEAD_PAGE) /* * Only test-and-set exist for PG_writeback. The unconditional operators are @@ -537,8 +559,8 @@ PAGEFLAG(MappedToDisk, mappedtodisk, PF_NO_TAIL) /* PG_readahead is only used for reads; PG_reclaim is only for writes */ PAGEFLAG(Reclaim, reclaim, PF_NO_TAIL) TESTCLEARFLAG(Reclaim, reclaim, PF_NO_TAIL) -PAGEFLAG(Readahead, readahead, PF_NO_COMPOUND) - TESTCLEARFLAG(Readahead, readahead, PF_NO_COMPOUND) +FOLIO_FLAG(readahead, FOLIO_HEAD_PAGE) + FOLIO_TEST_CLEAR_FLAG(readahead, FOLIO_HEAD_PAGE) #ifdef CONFIG_HIGHMEM /* @@ -551,62 +573,63 @@ PAGEFLAG(Readahead, readahead, PF_NO_COMPOUND) PAGEFLAG_FALSE(HighMem, highmem) #endif +/* Does kmap_local_folio() only allow access to one page of the folio? */ +#ifdef CONFIG_DEBUG_KMAP_LOCAL_FORCE_MAP +#define folio_test_partial_kmap(f) true +#else +#define folio_test_partial_kmap(f) folio_test_highmem(f) +#endif + #ifdef CONFIG_SWAP -static __always_inline bool folio_test_swapcache(struct folio *folio) +static __always_inline bool folio_test_swapcache(const struct folio *folio) { return folio_test_swapbacked(folio) && - test_bit(PG_swapcache, folio_flags(folio, 0)); + test_bit(PG_swapcache, const_folio_flags(folio, 0)); } -static __always_inline bool PageSwapCache(struct page *page) -{ - return folio_test_swapcache(page_folio(page)); -} - -SETPAGEFLAG(SwapCache, swapcache, PF_NO_TAIL) -CLEARPAGEFLAG(SwapCache, swapcache, PF_NO_TAIL) +FOLIO_SET_FLAG(swapcache, FOLIO_HEAD_PAGE) +FOLIO_CLEAR_FLAG(swapcache, FOLIO_HEAD_PAGE) #else -PAGEFLAG_FALSE(SwapCache, swapcache) +FOLIO_FLAG_FALSE(swapcache) #endif -PAGEFLAG(Unevictable, unevictable, PF_HEAD) - __CLEARPAGEFLAG(Unevictable, unevictable, PF_HEAD) - TESTCLEARFLAG(Unevictable, unevictable, PF_HEAD) +FOLIO_FLAG(unevictable, FOLIO_HEAD_PAGE) + __FOLIO_CLEAR_FLAG(unevictable, FOLIO_HEAD_PAGE) + FOLIO_TEST_CLEAR_FLAG(unevictable, FOLIO_HEAD_PAGE) #ifdef CONFIG_MMU -PAGEFLAG(Mlocked, mlocked, PF_NO_TAIL) - __CLEARPAGEFLAG(Mlocked, mlocked, PF_NO_TAIL) - TESTSCFLAG(Mlocked, mlocked, PF_NO_TAIL) -#else -PAGEFLAG_FALSE(Mlocked, mlocked) __CLEARPAGEFLAG_NOOP(Mlocked, mlocked) - TESTSCFLAG_FALSE(Mlocked, mlocked) -#endif - -#ifdef CONFIG_ARCH_USES_PG_UNCACHED -PAGEFLAG(Uncached, uncached, PF_NO_COMPOUND) +FOLIO_FLAG(mlocked, FOLIO_HEAD_PAGE) + __FOLIO_CLEAR_FLAG(mlocked, FOLIO_HEAD_PAGE) + FOLIO_TEST_CLEAR_FLAG(mlocked, FOLIO_HEAD_PAGE) + FOLIO_TEST_SET_FLAG(mlocked, FOLIO_HEAD_PAGE) #else -PAGEFLAG_FALSE(Uncached, uncached) +FOLIO_FLAG_FALSE(mlocked) + __FOLIO_CLEAR_FLAG_NOOP(mlocked) + FOLIO_TEST_CLEAR_FLAG_FALSE(mlocked) + FOLIO_TEST_SET_FLAG_FALSE(mlocked) #endif #ifdef CONFIG_MEMORY_FAILURE PAGEFLAG(HWPoison, hwpoison, PF_ANY) TESTSCFLAG(HWPoison, hwpoison, PF_ANY) #define __PG_HWPOISON (1UL << PG_hwpoison) -#define MAGIC_HWPOISON 0x48575053U /* HWPS */ -extern void SetPageHWPoisonTakenOff(struct page *page); -extern void ClearPageHWPoisonTakenOff(struct page *page); -extern bool take_page_off_buddy(struct page *page); -extern bool put_page_back_buddy(struct page *page); #else PAGEFLAG_FALSE(HWPoison, hwpoison) #define __PG_HWPOISON 0 #endif -#if defined(CONFIG_PAGE_IDLE_FLAG) && defined(CONFIG_64BIT) -TESTPAGEFLAG(Young, young, PF_ANY) -SETPAGEFLAG(Young, young, PF_ANY) -TESTCLEARFLAG(Young, young, PF_ANY) -PAGEFLAG(Idle, idle, PF_ANY) +#ifdef CONFIG_PAGE_IDLE_FLAG +#ifdef CONFIG_64BIT +FOLIO_TEST_FLAG(young, FOLIO_HEAD_PAGE) +FOLIO_SET_FLAG(young, FOLIO_HEAD_PAGE) +FOLIO_TEST_CLEAR_FLAG(young, FOLIO_HEAD_PAGE) +FOLIO_FLAG(idle, FOLIO_HEAD_PAGE) +#endif +/* See page_idle.h for !64BIT workaround */ +#else /* !CONFIG_PAGE_IDLE_FLAG */ +FOLIO_FLAG_FALSE(young) +FOLIO_TEST_CLEAR_FLAG_FALSE(young) +FOLIO_FLAG_FALSE(idle) #endif /* @@ -624,27 +647,28 @@ PAGEFLAG_FALSE(VmemmapSelfHosted, vmemmap_self_hosted) #endif /* - * On an anonymous page mapped into a user virtual memory area, - * page->mapping points to its anon_vma, not to a struct address_space; + * On an anonymous folio mapped into a user virtual memory area, + * folio->mapping points to its anon_vma, not to a struct address_space; * with the PAGE_MAPPING_ANON bit set to distinguish it. See rmap.h. * * On an anonymous page in a VM_MERGEABLE area, if CONFIG_KSM is enabled, * the PAGE_MAPPING_MOVABLE bit may be set along with the PAGE_MAPPING_ANON - * bit; and then page->mapping points, not to an anon_vma, but to a private + * bit; and then folio->mapping points, not to an anon_vma, but to a private * structure which KSM associates with that merged page. See ksm.h. * * PAGE_MAPPING_KSM without PAGE_MAPPING_ANON is used for non-lru movable - * page and then page->mapping points to a struct movable_operations. + * page and then folio->mapping points to a struct movable_operations. * - * Please note that, confusingly, "page_mapping" refers to the inode - * address_space which maps the page from disk; whereas "page_mapped" - * refers to user virtual address space into which the page is mapped. + * Please note that, confusingly, "folio_mapping" refers to the inode + * address_space which maps the folio from disk; whereas "folio_mapped" + * refers to user virtual address space into which the folio is mapped. * * For slab pages, since slab reuses the bits in struct page to store its - * internal states, the page->mapping does not exist as such, nor do these - * flags below. So in order to avoid testing non-existent bits, please - * make sure that PageSlab(page) actually evaluates to false before calling - * the following functions (e.g., PageAnon). See mm/slab.h. + * internal states, the folio->mapping does not exist as such, nor do + * these flags below. So in order to avoid testing non-existent bits, + * please make sure that folio_test_slab(folio) actually evaluates to + * false before calling the following functions (e.g., folio_test_anon). + * See mm/slab.h. */ #define PAGE_MAPPING_ANON 0x1 #define PAGE_MAPPING_MOVABLE 0x2 @@ -657,22 +681,22 @@ PAGEFLAG_FALSE(VmemmapSelfHosted, vmemmap_self_hosted) */ #define PAGE_MAPPING_DAX_SHARED ((void *)0x1) -static __always_inline bool folio_mapping_flags(struct folio *folio) +static __always_inline bool folio_mapping_flags(const struct folio *folio) { return ((unsigned long)folio->mapping & PAGE_MAPPING_FLAGS) != 0; } -static __always_inline int PageMappingFlags(struct page *page) +static __always_inline bool PageMappingFlags(const struct page *page) { return ((unsigned long)page->mapping & PAGE_MAPPING_FLAGS) != 0; } -static __always_inline bool folio_test_anon(struct folio *folio) +static __always_inline bool folio_test_anon(const struct folio *folio) { return ((unsigned long)folio->mapping & PAGE_MAPPING_ANON) != 0; } -static __always_inline bool PageAnon(struct page *page) +static __always_inline bool PageAnon(const struct page *page) { return folio_test_anon(page_folio(page)); } @@ -683,7 +707,7 @@ static __always_inline bool __folio_test_movable(const struct folio *folio) PAGE_MAPPING_MOVABLE; } -static __always_inline int __PageMovable(struct page *page) +static __always_inline bool __PageMovable(const struct page *page) { return ((unsigned long)page->mapping & PAGE_MAPPING_FLAGS) == PAGE_MAPPING_MOVABLE; @@ -696,13 +720,13 @@ static __always_inline int __PageMovable(struct page *page) * is found in VM_MERGEABLE vmas. It's a PageAnon page, pointing not to any * anon_vma, but to that page's node of the stable tree. */ -static __always_inline bool folio_test_ksm(struct folio *folio) +static __always_inline bool folio_test_ksm(const struct folio *folio) { return ((unsigned long)folio->mapping & PAGE_MAPPING_FLAGS) == PAGE_MAPPING_KSM; } -static __always_inline bool PageKsm(struct page *page) +static __always_inline bool PageKsm(const struct page *page) { return folio_test_ksm(page_folio(page)); } @@ -710,7 +734,26 @@ static __always_inline bool PageKsm(struct page *page) TESTPAGEFLAG_FALSE(Ksm, ksm) #endif -u64 stable_page_flags(struct page *page); +u64 stable_page_flags(const struct page *page); + +/** + * folio_xor_flags_has_waiters - Change some folio flags. + * @folio: The folio. + * @mask: Bits set in this word will be changed. + * + * This must only be used for flags which are changed with the folio + * lock held. For example, it is unsafe to use for PG_dirty as that + * can be set without the folio lock held. It can also only be used + * on flags which are in the range 0-6 as some of the implementations + * only affect those bits. + * + * Return: Whether there are tasks waiting on the folio. + */ +static inline bool folio_xor_flags_has_waiters(struct folio *folio, + unsigned long mask) +{ + return xor_unlock_is_negative_byte(mask, folio_flags(folio, 0)); +} /** * folio_test_uptodate - Is this folio up to date? @@ -722,9 +765,9 @@ u64 stable_page_flags(struct page *page); * some of the bytes in it may be; see the is_partially_uptodate() * address_space operation. */ -static inline bool folio_test_uptodate(struct folio *folio) +static inline bool folio_test_uptodate(const struct folio *folio) { - bool ret = test_bit(PG_uptodate, folio_flags(folio, 0)); + bool ret = test_bit(PG_uptodate, const_folio_flags(folio, 0)); /* * Must ensure that the data we read out of the folio is loaded * _after_ we've loaded folio->flags to check the uptodate bit. @@ -739,7 +782,7 @@ static inline bool folio_test_uptodate(struct folio *folio) return ret; } -static inline int PageUptodate(struct page *page) +static inline bool PageUptodate(const struct page *page) { return folio_test_uptodate(page_folio(page)); } @@ -773,25 +816,20 @@ static __always_inline void SetPageUptodate(struct page *page) CLEARPAGEFLAG(Uptodate, uptodate, PF_NO_TAIL) -bool __folio_start_writeback(struct folio *folio, bool keep_write); -bool set_page_writeback(struct page *page); +void __folio_start_writeback(struct folio *folio, bool keep_write); +void set_page_writeback(struct page *page); #define folio_start_writeback(folio) \ __folio_start_writeback(folio, false) #define folio_start_writeback_keepwrite(folio) \ __folio_start_writeback(folio, true) -static inline bool test_set_page_writeback(struct page *page) -{ - return set_page_writeback(page); -} - -static __always_inline bool folio_test_head(struct folio *folio) +static __always_inline bool folio_test_head(const struct folio *folio) { - return test_bit(PG_head, folio_flags(folio, FOLIO_PF_ANY)); + return test_bit(PG_head, const_folio_flags(folio, FOLIO_PF_ANY)); } -static __always_inline int PageHead(struct page *page) +static __always_inline int PageHead(const struct page *page) { PF_POISONED_CHECK(page); return test_bit(PG_head, &page->flags) && !page_is_fake_head(page); @@ -807,7 +845,7 @@ CLEARPAGEFLAG(Head, head, PF_ANY) * * Return: True if the folio is larger than one page. */ -static inline bool folio_test_large(struct folio *folio) +static inline bool folio_test_large(const struct folio *folio) { return folio_test_head(folio); } @@ -828,9 +866,11 @@ static inline void ClearPageCompound(struct page *page) BUG_ON(!PageHead(page)); ClearPageHead(page); } -PAGEFLAG(LargeRmappable, large_rmappable, PF_SECOND) +FOLIO_FLAG(large_rmappable, FOLIO_SECOND_PAGE) +FOLIO_FLAG(partially_mapped, FOLIO_SECOND_PAGE) #else -TESTPAGEFLAG_FALSE(LargeRmappable, large_rmappable) +FOLIO_FLAG_FALSE(large_rmappable) +FOLIO_FLAG_FALSE(partially_mapped) #endif #define PG_head_mask ((1UL << PG_head)) @@ -844,7 +884,7 @@ TESTPAGEFLAG_FALSE(LargeRmappable, large_rmappable) * hugetlbfs pages, but not normal pages. PageTransHuge() can only be * called only in the core VM paths where hugetlbfs pages can't exist. */ -static inline int PageTransHuge(struct page *page) +static inline int PageTransHuge(const struct page *page) { VM_BUG_ON_PAGE(PageTail(page), page); return PageHead(page); @@ -855,7 +895,7 @@ static inline int PageTransHuge(struct page *page) * and hugetlbfs pages, so it should only be called when it's known * that hugetlbfs pages aren't involved. */ -static inline int PageTransCompound(struct page *page) +static inline int PageTransCompound(const struct page *page) { return PageCompound(page); } @@ -865,7 +905,7 @@ static inline int PageTransCompound(struct page *page) * and hugetlbfs pages, so it should only be called when it's known * that hugetlbfs pages aren't involved. */ -static inline int PageTransTail(struct page *page) +static inline int PageTransTail(const struct page *page) { return PageTail(page); } @@ -891,69 +931,82 @@ PAGEFLAG_FALSE(HasHWPoisoned, has_hwpoisoned) #endif /* - * For pages that are never mapped to userspace (and aren't PageSlab), - * page_type may be used. Because it is initialised to -1, we invert the - * sense of the bit, so __SetPageFoo *clears* the bit used for PageFoo, and - * __ClearPageFoo *sets* the bit used for PageFoo. We reserve a few high and - * low bits so that an underflow or overflow of _mapcount won't be - * mistaken for a page type value. + * For pages that do not use mapcount, page_type may be used. + * The low 24 bits of pagetype may be used for your own purposes, as long + * as you are careful to not affect the top 8 bits. The low bits of + * pagetype will be overwritten when you clear the page_type from the page. */ +enum pagetype { + /* 0x00-0x7f are positive numbers, ie mapcount */ + /* Reserve 0x80-0xef for mapcount overflow. */ + PGTY_buddy = 0xf0, + PGTY_offline = 0xf1, + PGTY_table = 0xf2, + PGTY_guard = 0xf3, + PGTY_hugetlb = 0xf4, + PGTY_slab = 0xf5, + PGTY_zsmalloc = 0xf6, + PGTY_unaccepted = 0xf7, + + PGTY_mapcount_underflow = 0xff +}; -#define PAGE_TYPE_BASE 0xf0000000 -/* Reserve 0x0000007f to catch underflows of _mapcount */ -#define PAGE_MAPCOUNT_RESERVE -128 -#define PG_buddy 0x00000080 -#define PG_offline 0x00000100 -#define PG_table 0x00000200 -#define PG_guard 0x00000400 -#define PG_hugetlb 0x00000800 - -#define PageType(page, flag) \ - ((page->page_type & (PAGE_TYPE_BASE | flag)) == PAGE_TYPE_BASE) -#define folio_test_type(folio, flag) \ - ((folio->page.page_type & (PAGE_TYPE_BASE | flag)) == PAGE_TYPE_BASE) - -static inline int page_type_has_type(unsigned int page_type) +static inline bool page_type_has_type(int page_type) { - return (int)page_type < PAGE_MAPCOUNT_RESERVE; + return page_type < (PGTY_mapcount_underflow << 24); } -static inline int page_has_type(struct page *page) +/* This takes a mapcount which is one more than page->_mapcount */ +static inline bool page_mapcount_is_type(unsigned int mapcount) { - return page_type_has_type(page->page_type); + return page_type_has_type(mapcount - 1); +} + +static inline bool page_has_type(const struct page *page) +{ + return page_mapcount_is_type(data_race(page->page_type)); } #define FOLIO_TYPE_OPS(lname, fname) \ -static __always_inline bool folio_test_##fname(const struct folio *folio)\ +static __always_inline bool folio_test_##fname(const struct folio *folio) \ { \ - return folio_test_type(folio, PG_##lname); \ + return data_race(folio->page.page_type >> 24) == PGTY_##lname; \ } \ static __always_inline void __folio_set_##fname(struct folio *folio) \ { \ - VM_BUG_ON_FOLIO(!folio_test_type(folio, 0), folio); \ - folio->page.page_type &= ~PG_##lname; \ + if (folio_test_##fname(folio)) \ + return; \ + VM_BUG_ON_FOLIO(data_race(folio->page.page_type) != UINT_MAX, \ + folio); \ + folio->page.page_type = (unsigned int)PGTY_##lname << 24; \ } \ static __always_inline void __folio_clear_##fname(struct folio *folio) \ { \ + if (folio->page.page_type == UINT_MAX) \ + return; \ VM_BUG_ON_FOLIO(!folio_test_##fname(folio), folio); \ - folio->page.page_type |= PG_##lname; \ + folio->page.page_type = UINT_MAX; \ } #define PAGE_TYPE_OPS(uname, lname, fname) \ FOLIO_TYPE_OPS(lname, fname) \ static __always_inline int Page##uname(const struct page *page) \ { \ - return PageType(page, PG_##lname); \ + return data_race(page->page_type >> 24) == PGTY_##lname; \ } \ static __always_inline void __SetPage##uname(struct page *page) \ { \ - VM_BUG_ON_PAGE(!PageType(page, 0), page); \ - page->page_type &= ~PG_##lname; \ + if (Page##uname(page)) \ + return; \ + VM_BUG_ON_PAGE(data_race(page->page_type) != UINT_MAX, page); \ + page->page_type = (unsigned int)PGTY_##lname << 24; \ } \ static __always_inline void __ClearPage##uname(struct page *page) \ { \ + if (page->page_type == UINT_MAX) \ + return; \ VM_BUG_ON_PAGE(!Page##uname(page), page); \ - page->page_type |= PG_##lname; \ + page->page_type = UINT_MAX; \ } /* @@ -969,15 +1022,22 @@ PAGE_TYPE_OPS(Buddy, buddy, buddy) * The content of these pages is effectively stale. Such pages should not * be touched (read/write/dump/save) except by their owner. * + * When a memory block gets onlined, all pages are initialized with a + * refcount of 1 and PageOffline(). generic_online_page() will + * take care of clearing PageOffline(). + * * If a driver wants to allow to offline unmovable PageOffline() pages without * putting them back to the buddy, it can do so via the memory notifier by * decrementing the reference count in MEM_GOING_OFFLINE and incrementing the * reference count in MEM_CANCEL_OFFLINE. When offlining, the PageOffline() - * pages (now with a reference count of zero) are treated like free pages, - * allowing the containing memory block to get offlined. A driver that + * pages (now with a reference count of zero) are treated like free (unmanaged) + * pages, allowing the containing memory block to get offlined. A driver that * relies on this feature is aware that re-onlining the memory block will - * require to re-set the pages PageOffline() and not giving them to the - * buddy via online_page_callback_t. + * require not giving them to the buddy via generic_online_page(). + * + * Memory offlining code will not adjust the managed page count for any + * PageOffline() pages, treating them like they were never exposed to the + * buddy using generic_online_page(). * * There are drivers that mark a page PageOffline() and expect there won't be * any further access to page content. PFN walkers that read content of random @@ -1001,12 +1061,35 @@ PAGE_TYPE_OPS(Table, table, pgtable) */ PAGE_TYPE_OPS(Guard, guard, guard) +FOLIO_TYPE_OPS(slab, slab) + +/** + * PageSlab - Determine if the page belongs to the slab allocator + * @page: The page to test. + * + * Context: Any context. + * Return: True for slab pages, false for any other kind of page. + */ +static inline bool PageSlab(const struct page *page) +{ + return folio_test_slab(page_folio(page)); +} + #ifdef CONFIG_HUGETLB_PAGE FOLIO_TYPE_OPS(hugetlb, hugetlb) #else FOLIO_TEST_FLAG_FALSE(hugetlb) #endif +PAGE_TYPE_OPS(Zsmalloc, zsmalloc, zsmalloc) + +/* + * Mark pages that has to be accepted before touched for the first time. + * + * Serialized with zone lock. + */ +PAGE_TYPE_OPS(Unaccepted, unaccepted, unaccepted) + /** * PageHuge - Determine if the page belongs to hugetlbfs * @page: The page to test. @@ -1025,21 +1108,35 @@ static inline bool PageHuge(const struct page *page) * best effort only and inherently racy: there is no way to synchronize with * failing hardware. */ -static inline bool is_page_hwpoison(struct page *page) +static inline bool is_page_hwpoison(const struct page *page) { + const struct folio *folio; + if (PageHWPoison(page)) return true; - return PageHuge(page) && PageHWPoison(compound_head(page)); + folio = page_folio(page); + return folio_test_hugetlb(folio) && PageHWPoison(&folio->page); } -extern bool is_free_buddy_page(struct page *page); +static inline bool folio_contain_hwpoisoned_page(struct folio *folio) +{ + return folio_test_hwpoison(folio) || + (folio_test_large(folio) && folio_test_has_hwpoisoned(folio)); +} + +bool is_free_buddy_page(const struct page *page); PAGEFLAG(Isolated, isolated, PF_ANY); -static __always_inline int PageAnonExclusive(struct page *page) +static __always_inline int PageAnonExclusive(const struct page *page) { VM_BUG_ON_PGFLAGS(!PageAnon(page), page); - VM_BUG_ON_PGFLAGS(PageHuge(page) && !PageHead(page), page); + /* + * HugeTLB stores this information on the head page; THP keeps it per + * page + */ + if (PageHuge(page)) + page = compound_head(page); return test_bit(PG_anon_exclusive, &PF_ANY(page, 1)->flags); } @@ -1078,7 +1175,7 @@ static __always_inline void __ClearPageAnonExclusive(struct page *page) (1UL << PG_lru | 1UL << PG_locked | \ 1UL << PG_private | 1UL << PG_private_2 | \ 1UL << PG_writeback | 1UL << PG_reserved | \ - 1UL << PG_slab | 1UL << PG_active | \ + 1UL << PG_active | \ 1UL << PG_unevictable | __PG_MLOCKED | LRU_GEN_MASK) /* @@ -1098,30 +1195,24 @@ static __always_inline void __ClearPageAnonExclusive(struct page *page) */ #define PAGE_FLAGS_SECOND \ (0xffUL /* order */ | 1UL << PG_has_hwpoisoned | \ - 1UL << PG_large_rmappable) + 1UL << PG_large_rmappable | 1UL << PG_partially_mapped) #define PAGE_FLAGS_PRIVATE \ (1UL << PG_private | 1UL << PG_private_2) /** - * page_has_private - Determine if page has private stuff - * @page: The page to be checked + * folio_has_private - Determine if folio has private stuff + * @folio: The folio to be checked * - * Determine if a page has private stuff, indicating that release routines + * Determine if a folio has private stuff, indicating that release routines * should be invoked upon it. */ -static inline int page_has_private(struct page *page) -{ - return !!(page->flags & PAGE_FLAGS_PRIVATE); -} - -static inline bool folio_has_private(struct folio *folio) +static inline int folio_has_private(const struct folio *folio) { - return page_has_private(&folio->page); + return !!(folio->flags & PAGE_FLAGS_PRIVATE); } #undef PF_ANY #undef PF_HEAD -#undef PF_ONLY_HEAD #undef PF_NO_TAIL #undef PF_NO_COMPOUND #undef PF_SECOND |