diff options
Diffstat (limited to 'include/linux/mmzone.h')
-rw-r--r-- | include/linux/mmzone.h | 168 |
1 files changed, 164 insertions, 4 deletions
diff --git a/include/linux/mmzone.h b/include/linux/mmzone.h index 9540b41894da..283913d42d7b 100644 --- a/include/linux/mmzone.h +++ b/include/linux/mmzone.h @@ -37,6 +37,22 @@ #define NR_PAGE_ORDERS (MAX_PAGE_ORDER + 1) +/* Defines the order for the number of pages that have a migrate type. */ +#ifndef CONFIG_PAGE_BLOCK_ORDER +#define PAGE_BLOCK_ORDER MAX_PAGE_ORDER +#else +#define PAGE_BLOCK_ORDER CONFIG_PAGE_BLOCK_ORDER +#endif /* CONFIG_PAGE_BLOCK_ORDER */ + +/* + * The MAX_PAGE_ORDER, which defines the max order of pages to be allocated + * by the buddy allocator, has to be larger or equal to the PAGE_BLOCK_ORDER, + * which defines the order for the number of pages that can have a migrate type + */ +#if (PAGE_BLOCK_ORDER > MAX_PAGE_ORDER) +#error MAX_PAGE_ORDER must be >= PAGE_BLOCK_ORDER +#endif + /* * PAGE_ALLOC_COSTLY_ORDER is the order at which allocations are deemed * costly to service. That is between allocation orders which should @@ -138,6 +154,7 @@ enum numa_stat_item { enum zone_stat_item { /* First 128 byte cacheline (assuming 64 bit words) */ NR_FREE_PAGES, + NR_FREE_PAGES_BLOCKS, NR_ZONE_LRU_BASE, /* Used only for compaction and reclaim retry */ NR_ZONE_INACTIVE_ANON = NR_ZONE_LRU_BASE, NR_ZONE_ACTIVE_ANON, @@ -147,7 +164,6 @@ enum zone_stat_item { NR_ZONE_WRITE_PENDING, /* Count of dirty, writeback and unstable pages */ NR_MLOCK, /* mlock()ed pages found and moved off LRU */ /* Second 128 byte cacheline */ - NR_BOUNCE, #if IS_ENABLED(CONFIG_ZSMALLOC) NR_ZSPAGES, /* allocated in zsmalloc */ #endif @@ -220,9 +236,11 @@ enum node_stat_item { PGDEMOTE_KSWAPD, PGDEMOTE_DIRECT, PGDEMOTE_KHUGEPAGED, + PGDEMOTE_PROACTIVE, #ifdef CONFIG_HUGETLB_PAGE NR_HUGETLB, #endif + NR_BALLOON_PAGES, NR_VM_NODE_STAT_ITEMS }; @@ -964,6 +982,9 @@ struct zone { #ifdef CONFIG_UNACCEPTED_MEMORY /* Pages to be accepted. All pages on the list are MAX_PAGE_ORDER */ struct list_head unaccepted_pages; + + /* To be called once the last page in the zone is accepted */ + struct work_struct unaccepted_cleanup; #endif /* zone flags, see below */ @@ -972,6 +993,9 @@ struct zone { /* Primarily protects free_area */ spinlock_t lock; + /* Pages to be freed when next trylock succeeds */ + struct llist_head trylock_free_pages; + /* Write-intensive fields used by compaction and vmstats. */ CACHELINE_PADDING(_pad2_); @@ -1158,6 +1182,12 @@ static inline bool is_zone_device_page(const struct page *page) return page_zonenum(page) == ZONE_DEVICE; } +static inline struct dev_pagemap *page_pgmap(const struct page *page) +{ + VM_WARN_ON_ONCE_PAGE(!is_zone_device_page(page), page); + return page_folio(page)->pgmap; +} + /* * Consecutive zone device pages should not be merged into the same sgl * or bvec segment with other types of pages or if they belong to different @@ -1173,7 +1203,7 @@ static inline bool zone_device_pages_have_same_pgmap(const struct page *a, return false; if (!is_zone_device_page(a)) return true; - return a->pgmap == b->pgmap; + return page_pgmap(a) == page_pgmap(b); } extern void memmap_init_zone_device(struct zone *, unsigned long, @@ -1188,6 +1218,10 @@ static inline bool zone_device_pages_have_same_pgmap(const struct page *a, { return true; } +static inline struct dev_pagemap *page_pgmap(const struct page *page) +{ + return NULL; +} #endif static inline bool folio_is_zone_device(const struct folio *folio) @@ -1483,8 +1517,6 @@ bool __zone_watermark_ok(struct zone *z, unsigned int order, unsigned long mark, bool zone_watermark_ok(struct zone *z, unsigned int order, unsigned long mark, int highest_zoneidx, unsigned int alloc_flags); -bool zone_watermark_ok_safe(struct zone *z, unsigned int order, - unsigned long mark, int highest_zoneidx); /* * Memory initialization context, use to differentiate memory added by * the platform statically or via memory hotplug interface. @@ -1934,6 +1966,9 @@ enum { #ifdef CONFIG_ZONE_DEVICE SECTION_TAINT_ZONE_DEVICE_BIT, #endif +#ifdef CONFIG_SPARSEMEM_VMEMMAP_PREINIT + SECTION_IS_VMEMMAP_PREINIT_BIT, +#endif SECTION_MAP_LAST_BIT, }; @@ -1944,6 +1979,9 @@ enum { #ifdef CONFIG_ZONE_DEVICE #define SECTION_TAINT_ZONE_DEVICE BIT(SECTION_TAINT_ZONE_DEVICE_BIT) #endif +#ifdef CONFIG_SPARSEMEM_VMEMMAP_PREINIT +#define SECTION_IS_VMEMMAP_PREINIT BIT(SECTION_IS_VMEMMAP_PREINIT_BIT) +#endif #define SECTION_MAP_MASK (~(BIT(SECTION_MAP_LAST_BIT) - 1)) #define SECTION_NID_SHIFT SECTION_MAP_LAST_BIT @@ -1998,6 +2036,30 @@ static inline int online_device_section(struct mem_section *section) } #endif +#ifdef CONFIG_SPARSEMEM_VMEMMAP_PREINIT +static inline int preinited_vmemmap_section(struct mem_section *section) +{ + return (section && + (section->section_mem_map & SECTION_IS_VMEMMAP_PREINIT)); +} + +void sparse_vmemmap_init_nid_early(int nid); +void sparse_vmemmap_init_nid_late(int nid); + +#else +static inline int preinited_vmemmap_section(struct mem_section *section) +{ + return 0; +} +static inline void sparse_vmemmap_init_nid_early(int nid) +{ +} + +static inline void sparse_vmemmap_init_nid_late(int nid) +{ +} +#endif + static inline int online_section_nr(unsigned long nr) { return online_section(__nr_to_section(nr)); @@ -2028,13 +2090,42 @@ static inline int pfn_section_valid(struct mem_section *ms, unsigned long pfn) return usage ? test_bit(idx, usage->subsection_map) : 0; } + +static inline bool pfn_section_first_valid(struct mem_section *ms, unsigned long *pfn) +{ + struct mem_section_usage *usage = READ_ONCE(ms->usage); + int idx = subsection_map_index(*pfn); + unsigned long bit; + + if (!usage) + return false; + + if (test_bit(idx, usage->subsection_map)) + return true; + + /* Find the next subsection that exists */ + bit = find_next_bit(usage->subsection_map, SUBSECTIONS_PER_SECTION, idx); + if (bit == SUBSECTIONS_PER_SECTION) + return false; + + *pfn = (*pfn & PAGE_SECTION_MASK) + (bit * PAGES_PER_SUBSECTION); + return true; +} #else static inline int pfn_section_valid(struct mem_section *ms, unsigned long pfn) { return 1; } + +static inline bool pfn_section_first_valid(struct mem_section *ms, unsigned long *pfn) +{ + return true; +} #endif +void sparse_init_early_section(int nid, struct page *map, unsigned long pnum, + unsigned long flags); + #ifndef CONFIG_HAVE_ARCH_PFN_VALID /** * pfn_valid - check if there is a valid memory map entry for a PFN @@ -2078,6 +2169,58 @@ static inline int pfn_valid(unsigned long pfn) return ret; } + +/* Returns end_pfn or higher if no valid PFN remaining in range */ +static inline unsigned long first_valid_pfn(unsigned long pfn, unsigned long end_pfn) +{ + unsigned long nr = pfn_to_section_nr(pfn); + + rcu_read_lock_sched(); + + while (nr <= __highest_present_section_nr && pfn < end_pfn) { + struct mem_section *ms = __pfn_to_section(pfn); + + if (valid_section(ms) && + (early_section(ms) || pfn_section_first_valid(ms, &pfn))) { + rcu_read_unlock_sched(); + return pfn; + } + + /* Nothing left in this section? Skip to next section */ + nr++; + pfn = section_nr_to_pfn(nr); + } + + rcu_read_unlock_sched(); + return end_pfn; +} + +static inline unsigned long next_valid_pfn(unsigned long pfn, unsigned long end_pfn) +{ + pfn++; + + if (pfn >= end_pfn) + return end_pfn; + + /* + * Either every PFN within the section (or subsection for VMEMMAP) is + * valid, or none of them are. So there's no point repeating the check + * for every PFN; only call first_valid_pfn() again when crossing a + * (sub)section boundary (i.e. !(pfn & ~PAGE_{SUB,}SECTION_MASK)). + */ + if (pfn & ~(IS_ENABLED(CONFIG_SPARSEMEM_VMEMMAP) ? + PAGE_SUBSECTION_MASK : PAGE_SECTION_MASK)) + return pfn; + + return first_valid_pfn(pfn, end_pfn); +} + + +#define for_each_valid_pfn(_pfn, _start_pfn, _end_pfn) \ + for ((_pfn) = first_valid_pfn((_start_pfn), (_end_pfn)); \ + (_pfn) < (_end_pfn); \ + (_pfn) = next_valid_pfn((_pfn), (_end_pfn))) + #endif static inline int pfn_in_present_section(unsigned long pfn) @@ -2097,6 +2240,11 @@ static inline unsigned long next_present_section_nr(unsigned long section_nr) return -1; } +#define for_each_present_section_nr(start, section_nr) \ + for (section_nr = next_present_section_nr(start - 1); \ + section_nr != -1; \ + section_nr = next_present_section_nr(section_nr)) + /* * These are _only_ used during initialisation, therefore they * can use __initdata ... They could have names to indicate @@ -2116,10 +2264,22 @@ void sparse_init(void); #else #define sparse_init() do {} while (0) #define sparse_index_init(_sec, _nid) do {} while (0) +#define sparse_vmemmap_init_nid_early(_nid, _use) do {} while (0) +#define sparse_vmemmap_init_nid_late(_nid) do {} while (0) #define pfn_in_present_section pfn_valid #define subsection_map_init(_pfn, _nr_pages) do {} while (0) #endif /* CONFIG_SPARSEMEM */ +/* + * Fallback case for when the architecture provides its own pfn_valid() but + * not a corresponding for_each_valid_pfn(). + */ +#ifndef for_each_valid_pfn +#define for_each_valid_pfn(_pfn, _start_pfn, _end_pfn) \ + for ((_pfn) = (_start_pfn); (_pfn) < (_end_pfn); (_pfn)++) \ + if (pfn_valid(_pfn)) +#endif + #endif /* !__GENERATING_BOUNDS.H */ #endif /* !__ASSEMBLY__ */ #endif /* _LINUX_MMZONE_H */ |