diff options
Diffstat (limited to 'include/linux/mmzone.h')
-rw-r--r-- | include/linux/mmzone.h | 170 |
1 files changed, 102 insertions, 68 deletions
diff --git a/include/linux/mmzone.h b/include/linux/mmzone.h index 19425e988bdc..f2e4e90621ec 100644 --- a/include/linux/mmzone.h +++ b/include/linux/mmzone.h @@ -93,7 +93,7 @@ struct free_area { struct pglist_data; /* - * zone->lock and zone->lru_lock are two of the hottest locks in the kernel. + * zone->lock and the zone lru_lock are two of the hottest locks in the kernel. * So add a wild amount of padding here to ensure that they fall into separate * cachelines. There are very few zone structures in the machine, so space * consumption is not a concern here. @@ -110,36 +110,20 @@ struct zone_padding { enum zone_stat_item { /* First 128 byte cacheline (assuming 64 bit words) */ NR_FREE_PAGES, - NR_ALLOC_BATCH, - NR_LRU_BASE, - NR_INACTIVE_ANON = NR_LRU_BASE, /* must match order of LRU_[IN]ACTIVE */ - NR_ACTIVE_ANON, /* " " " " " */ - NR_INACTIVE_FILE, /* " " " " " */ - NR_ACTIVE_FILE, /* " " " " " */ - NR_UNEVICTABLE, /* " " " " " */ + NR_ZONE_LRU_BASE, /* Used only for compaction and reclaim retry */ + NR_ZONE_INACTIVE_ANON = NR_ZONE_LRU_BASE, + NR_ZONE_ACTIVE_ANON, + NR_ZONE_INACTIVE_FILE, + NR_ZONE_ACTIVE_FILE, + NR_ZONE_UNEVICTABLE, + NR_ZONE_WRITE_PENDING, /* Count of dirty, writeback and unstable pages */ NR_MLOCK, /* mlock()ed pages found and moved off LRU */ - NR_ANON_PAGES, /* Mapped anonymous pages */ - NR_FILE_MAPPED, /* pagecache pages mapped into pagetables. - only modified from process context */ - NR_FILE_PAGES, - NR_FILE_DIRTY, - NR_WRITEBACK, NR_SLAB_RECLAIMABLE, NR_SLAB_UNRECLAIMABLE, NR_PAGETABLE, /* used for pagetables */ - NR_KERNEL_STACK, + NR_KERNEL_STACK_KB, /* measured in KiB */ /* Second 128 byte cacheline */ - NR_UNSTABLE_NFS, /* NFS unstable pages */ NR_BOUNCE, - NR_VMSCAN_WRITE, - NR_VMSCAN_IMMEDIATE, /* Prioritise for reclaim when writeback ends */ - NR_WRITEBACK_TEMP, /* Writeback using temporary buffers */ - NR_ISOLATED_ANON, /* Temporary isolated pages from anon lru */ - NR_ISOLATED_FILE, /* Temporary isolated pages from file lru */ - NR_SHMEM, /* shmem pages (included tmpfs/GEM pages) */ - NR_DIRTIED, /* page dirtyings since bootup */ - NR_WRITTEN, /* page writings since bootup */ - NR_PAGES_SCANNED, /* pages scanned since last reclaim */ #if IS_ENABLED(CONFIG_ZSMALLOC) NR_ZSPAGES, /* allocated in zsmalloc */ #endif @@ -151,14 +135,40 @@ enum zone_stat_item { NUMA_LOCAL, /* allocation from local node */ NUMA_OTHER, /* allocation from other node */ #endif + NR_FREE_CMA_PAGES, + NR_VM_ZONE_STAT_ITEMS }; + +enum node_stat_item { + NR_LRU_BASE, + NR_INACTIVE_ANON = NR_LRU_BASE, /* must match order of LRU_[IN]ACTIVE */ + NR_ACTIVE_ANON, /* " " " " " */ + NR_INACTIVE_FILE, /* " " " " " */ + NR_ACTIVE_FILE, /* " " " " " */ + NR_UNEVICTABLE, /* " " " " " */ + NR_ISOLATED_ANON, /* Temporary isolated pages from anon lru */ + NR_ISOLATED_FILE, /* Temporary isolated pages from file lru */ + NR_PAGES_SCANNED, /* pages scanned since last reclaim */ WORKINGSET_REFAULT, WORKINGSET_ACTIVATE, WORKINGSET_NODERECLAIM, - NR_ANON_THPS, + NR_ANON_MAPPED, /* Mapped anonymous pages */ + NR_FILE_MAPPED, /* pagecache pages mapped into pagetables. + only modified from process context */ + NR_FILE_PAGES, + NR_FILE_DIRTY, + NR_WRITEBACK, + NR_WRITEBACK_TEMP, /* Writeback using temporary buffers */ + NR_SHMEM, /* shmem pages (included tmpfs/GEM pages) */ NR_SHMEM_THPS, NR_SHMEM_PMDMAPPED, - NR_FREE_CMA_PAGES, - NR_VM_ZONE_STAT_ITEMS }; + NR_ANON_THPS, + NR_UNSTABLE_NFS, /* NFS unstable pages */ + NR_VMSCAN_WRITE, + NR_VMSCAN_IMMEDIATE, /* Prioritise for reclaim when writeback ends */ + NR_DIRTIED, /* page dirtyings since bootup */ + NR_WRITTEN, /* page writings since bootup */ + NR_VM_NODE_STAT_ITEMS +}; /* * We do arithmetic on the LRU lists in various places in the code, @@ -215,7 +225,7 @@ struct lruvec { /* Evictions & activations on the inactive file list */ atomic_long_t inactive_age; #ifdef CONFIG_MEMCG - struct zone *zone; + struct pglist_data *pgdat; #endif }; @@ -267,6 +277,11 @@ struct per_cpu_pageset { #endif }; +struct per_cpu_nodestat { + s8 stat_threshold; + s8 vm_node_stat_diff[NR_VM_NODE_STAT_ITEMS]; +}; + #endif /* !__GENERATING_BOUNDS.H */ enum zone_type { @@ -348,22 +363,9 @@ struct zone { #ifdef CONFIG_NUMA int node; #endif - - /* - * The target ratio of ACTIVE_ANON to INACTIVE_ANON pages on - * this zone's LRU. Maintained by the pageout code. - */ - unsigned int inactive_ratio; - struct pglist_data *zone_pgdat; struct per_cpu_pageset __percpu *pageset; - /* - * This is a per-zone reserve of pages that are not available - * to userspace allocations. - */ - unsigned long totalreserve_pages; - #ifndef CONFIG_SPARSEMEM /* * Flags for a pageblock_nr_pages block. See pageblock-flags.h. @@ -372,14 +374,6 @@ struct zone { unsigned long *pageblock_flags; #endif /* CONFIG_SPARSEMEM */ -#ifdef CONFIG_NUMA - /* - * zone reclaim becomes active if more unmapped pages exist. - */ - unsigned long min_unmapped_pages; - unsigned long min_slab_pages; -#endif /* CONFIG_NUMA */ - /* zone_start_pfn == zone_start_paddr >> PAGE_SHIFT */ unsigned long zone_start_pfn; @@ -472,24 +466,21 @@ struct zone { unsigned long wait_table_hash_nr_entries; unsigned long wait_table_bits; + /* Write-intensive fields used from the page allocator */ ZONE_PADDING(_pad1_) + /* free areas of different sizes */ struct free_area free_area[MAX_ORDER]; /* zone flags, see below */ unsigned long flags; - /* Write-intensive fields used from the page allocator */ + /* Primarily protects free_area */ spinlock_t lock; + /* Write-intensive fields used by compaction and vmstats. */ ZONE_PADDING(_pad2_) - /* Write-intensive fields used by page reclaim */ - - /* Fields commonly accessed by the page reclaim scanner */ - spinlock_t lru_lock; - struct lruvec lruvec; - /* * When free pages are below this point, additional steps are taken * when reading the number of free pages to avoid per-cpu counter @@ -527,19 +518,18 @@ struct zone { atomic_long_t vm_stat[NR_VM_ZONE_STAT_ITEMS]; } ____cacheline_internodealigned_in_smp; -enum zone_flags { - ZONE_RECLAIM_LOCKED, /* prevents concurrent reclaim */ - ZONE_CONGESTED, /* zone has many dirty pages backed by +enum pgdat_flags { + PGDAT_CONGESTED, /* pgdat has many dirty pages backed by * a congested BDI */ - ZONE_DIRTY, /* reclaim scanning has recently found + PGDAT_DIRTY, /* reclaim scanning has recently found * many dirty file pages at the tail * of the LRU. */ - ZONE_WRITEBACK, /* reclaim scanning has recently found + PGDAT_WRITEBACK, /* reclaim scanning has recently found * many pages under writeback */ - ZONE_FAIR_DEPLETED, /* fair zone policy batch depleted */ + PGDAT_RECLAIM_LOCKED, /* prevents concurrent reclaim */ }; static inline unsigned long zone_end_pfn(const struct zone *zone) @@ -663,8 +653,9 @@ typedef struct pglist_data { wait_queue_head_t pfmemalloc_wait; struct task_struct *kswapd; /* Protected by mem_hotplug_begin/end() */ - int kswapd_max_order; - enum zone_type classzone_idx; + int kswapd_order; + enum zone_type kswapd_classzone_idx; + #ifdef CONFIG_COMPACTION int kcompactd_max_order; enum zone_type kcompactd_classzone_idx; @@ -681,6 +672,23 @@ typedef struct pglist_data { /* Number of pages migrated during the rate limiting time interval */ unsigned long numabalancing_migrate_nr_pages; #endif + /* + * This is a per-node reserve of pages that are not available + * to userspace allocations. + */ + unsigned long totalreserve_pages; + +#ifdef CONFIG_NUMA + /* + * zone reclaim becomes active if more unmapped pages exist. + */ + unsigned long min_unmapped_pages; + unsigned long min_slab_pages; +#endif /* CONFIG_NUMA */ + + /* Write-intensive fields used by page reclaim */ + ZONE_PADDING(_pad1_) + spinlock_t lru_lock; #ifdef CONFIG_DEFERRED_STRUCT_PAGE_INIT /* @@ -695,6 +703,23 @@ typedef struct pglist_data { struct list_head split_queue; unsigned long split_queue_len; #endif + + /* Fields commonly accessed by the page reclaim scanner */ + struct lruvec lruvec; + + /* + * The target ratio of ACTIVE_ANON to INACTIVE_ANON pages on + * this node's LRU. Maintained by the pageout code. + */ + unsigned int inactive_ratio; + + unsigned long flags; + + ZONE_PADDING(_pad2_) + + /* Per-node vmstats */ + struct per_cpu_nodestat __percpu *per_cpu_nodestats; + atomic_long_t vm_stat[NR_VM_NODE_STAT_ITEMS]; } pg_data_t; #define node_present_pages(nid) (NODE_DATA(nid)->node_present_pages) @@ -708,6 +733,15 @@ typedef struct pglist_data { #define node_start_pfn(nid) (NODE_DATA(nid)->node_start_pfn) #define node_end_pfn(nid) pgdat_end_pfn(NODE_DATA(nid)) +static inline spinlock_t *zone_lru_lock(struct zone *zone) +{ + return &zone->zone_pgdat->lru_lock; +} + +static inline struct lruvec *node_lruvec(struct pglist_data *pgdat) +{ + return &pgdat->lruvec; +} static inline unsigned long pgdat_end_pfn(pg_data_t *pgdat) { @@ -760,12 +794,12 @@ extern int init_currently_empty_zone(struct zone *zone, unsigned long start_pfn, extern void lruvec_init(struct lruvec *lruvec); -static inline struct zone *lruvec_zone(struct lruvec *lruvec) +static inline struct pglist_data *lruvec_pgdat(struct lruvec *lruvec) { #ifdef CONFIG_MEMCG - return lruvec->zone; + return lruvec->pgdat; #else - return container_of(lruvec, struct zone, lruvec); + return container_of(lruvec, struct pglist_data, lruvec); #endif } |