From 30aba6656f61ed44cba445a3c0d38b296fa9e8f5 Mon Sep 17 00:00:00 2001 From: Salvatore Mesoraca Date: Thu, 23 Aug 2018 17:00:35 -0700 Subject: namei: allow restricted O_CREAT of FIFOs and regular files Disallows open of FIFOs or regular files not owned by the user in world writable sticky directories, unless the owner is the same as that of the directory or the file is opened without the O_CREAT flag. The purpose is to make data spoofing attacks harder. This protection can be turned on and off separately for FIFOs and regular files via sysctl, just like the symlinks/hardlinks protection. This patch is based on Openwall's "HARDEN_FIFO" feature by Solar Designer. This is a brief list of old vulnerabilities that could have been prevented by this feature, some of them even allow for privilege escalation: CVE-2000-1134 CVE-2007-3852 CVE-2008-0525 CVE-2009-0416 CVE-2011-4834 CVE-2015-1838 CVE-2015-7442 CVE-2016-7489 This list is not meant to be complete. It's difficult to track down all vulnerabilities of this kind because they were often reported without any mention of this particular attack vector. In fact, before hardlinks/symlinks restrictions, fifos/regular files weren't the favorite vehicle to exploit them. [s.mesoraca16@gmail.com: fix bug reported by Dan Carpenter] Link: https://lkml.kernel.org/r/20180426081456.GA7060@mwanda Link: http://lkml.kernel.org/r/1524829819-11275-1-git-send-email-s.mesoraca16@gmail.com [keescook@chromium.org: drop pr_warn_ratelimited() in favor of audit changes in the future] [keescook@chromium.org: adjust commit subjet] Link: http://lkml.kernel.org/r/20180416175918.GA13494@beast Signed-off-by: Salvatore Mesoraca Signed-off-by: Kees Cook Suggested-by: Solar Designer Suggested-by: Kees Cook Cc: Al Viro Cc: Dan Carpenter Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/fs.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/linux') diff --git a/include/linux/fs.h b/include/linux/fs.h index e5710541183b..33322702c910 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -74,6 +74,8 @@ extern struct inodes_stat_t inodes_stat; extern int leases_enable, lease_break_time; extern int sysctl_protected_symlinks; extern int sysctl_protected_hardlinks; +extern int sysctl_protected_fifos; +extern int sysctl_protected_regular; typedef __kernel_rwf_t rwf_t; -- cgit v1.2.3 From d4ae9916ea2947341180d2b538f48875ff393a86 Mon Sep 17 00:00:00 2001 From: Naoya Horiguchi Date: Thu, 23 Aug 2018 17:00:42 -0700 Subject: mm: soft-offline: close the race against page allocation A process can be killed with SIGBUS(BUS_MCEERR_AR) when it tries to allocate a page that was just freed on the way of soft-offline. This is undesirable because soft-offline (which is about corrected error) is less aggressive than hard-offline (which is about uncorrected error), and we can make soft-offline fail and keep using the page for good reason like "system is busy." Two main changes of this patch are: - setting migrate type of the target page to MIGRATE_ISOLATE. As done in free_unref_page_commit(), this makes kernel bypass pcplist when freeing the page. So we can assume that the page is in freelist just after put_page() returns, - setting PG_hwpoison on free page under zone->lock which protects freelists, so this allows us to avoid setting PG_hwpoison on a page that is decided to be allocated soon. [akpm@linux-foundation.org: tweak set_hwpoison_free_buddy_page() comment] Link: http://lkml.kernel.org/r/1531452366-11661-3-git-send-email-n-horiguchi@ah.jp.nec.com Signed-off-by: Naoya Horiguchi Reported-by: Xishi Qiu Tested-by: Mike Kravetz Cc: Michal Hocko Cc: Cc: Mike Kravetz Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/page-flags.h | 5 +++++ include/linux/swapops.h | 10 ---------- mm/memory-failure.c | 26 +++++++++++++++++++++----- mm/migrate.c | 2 +- mm/page_alloc.c | 30 ++++++++++++++++++++++++++++++ 5 files changed, 57 insertions(+), 16 deletions(-) (limited to 'include/linux') diff --git a/include/linux/page-flags.h b/include/linux/page-flags.h index 901943e4754b..74bee8cecf4c 100644 --- a/include/linux/page-flags.h +++ b/include/linux/page-flags.h @@ -369,8 +369,13 @@ PAGEFLAG_FALSE(Uncached) PAGEFLAG(HWPoison, hwpoison, PF_ANY) TESTSCFLAG(HWPoison, hwpoison, PF_ANY) #define __PG_HWPOISON (1UL << PG_hwpoison) +extern bool set_hwpoison_free_buddy_page(struct page *page); #else PAGEFLAG_FALSE(HWPoison) +static inline bool set_hwpoison_free_buddy_page(struct page *page) +{ + return 0; +} #define __PG_HWPOISON 0 #endif diff --git a/include/linux/swapops.h b/include/linux/swapops.h index 1d3877c39a00..568a3553d918 100644 --- a/include/linux/swapops.h +++ b/include/linux/swapops.h @@ -340,11 +340,6 @@ static inline int is_hwpoison_entry(swp_entry_t entry) return swp_type(entry) == SWP_HWPOISON; } -static inline bool test_set_page_hwpoison(struct page *page) -{ - return TestSetPageHWPoison(page); -} - static inline void num_poisoned_pages_inc(void) { atomic_long_inc(&num_poisoned_pages); @@ -367,11 +362,6 @@ static inline int is_hwpoison_entry(swp_entry_t swp) return 0; } -static inline bool test_set_page_hwpoison(struct page *page) -{ - return false; -} - static inline void num_poisoned_pages_inc(void) { } diff --git a/mm/memory-failure.c b/mm/memory-failure.c index 49dc32c61137..192d0bbfc9ea 100644 --- a/mm/memory-failure.c +++ b/mm/memory-failure.c @@ -57,6 +57,7 @@ #include #include #include +#include #include "internal.h" #include "ras/ras_event.h" @@ -1697,6 +1698,7 @@ static int __soft_offline_page(struct page *page, int flags) static int soft_offline_in_use_page(struct page *page, int flags) { int ret; + int mt; struct page *hpage = compound_head(page); if (!PageHuge(page) && PageTransHuge(hpage)) { @@ -1715,23 +1717,37 @@ static int soft_offline_in_use_page(struct page *page, int flags) put_hwpoison_page(hpage); } + /* + * Setting MIGRATE_ISOLATE here ensures that the page will be linked + * to free list immediately (not via pcplist) when released after + * successful page migration. Otherwise we can't guarantee that the + * page is really free after put_page() returns, so + * set_hwpoison_free_buddy_page() highly likely fails. + */ + mt = get_pageblock_migratetype(page); + set_pageblock_migratetype(page, MIGRATE_ISOLATE); if (PageHuge(page)) ret = soft_offline_huge_page(page, flags); else ret = __soft_offline_page(page, flags); - + set_pageblock_migratetype(page, mt); return ret; } -static void soft_offline_free_page(struct page *page) +static int soft_offline_free_page(struct page *page) { int rc = 0; struct page *head = compound_head(page); if (PageHuge(head)) rc = dissolve_free_huge_page(page); - if (!rc && !TestSetPageHWPoison(page)) - num_poisoned_pages_inc(); + if (!rc) { + if (set_hwpoison_free_buddy_page(page)) + num_poisoned_pages_inc(); + else + rc = -EBUSY; + } + return rc; } /** @@ -1775,7 +1791,7 @@ int soft_offline_page(struct page *page, int flags) if (ret > 0) ret = soft_offline_in_use_page(page, flags); else if (ret == 0) - soft_offline_free_page(page); + ret = soft_offline_free_page(page); return ret; } diff --git a/mm/migrate.c b/mm/migrate.c index 91a99457127c..d6a2e89b086a 100644 --- a/mm/migrate.c +++ b/mm/migrate.c @@ -1212,7 +1212,7 @@ out: * intentionally. Although it's rather weird, * it's how HWPoison flag works at the moment. */ - if (!test_set_page_hwpoison(page)) + if (set_hwpoison_free_buddy_page(page)) num_poisoned_pages_inc(); } } else { diff --git a/mm/page_alloc.c b/mm/page_alloc.c index c677c1506d73..e75865d58ba7 100644 --- a/mm/page_alloc.c +++ b/mm/page_alloc.c @@ -8096,3 +8096,33 @@ bool is_free_buddy_page(struct page *page) return order < MAX_ORDER; } + +#ifdef CONFIG_MEMORY_FAILURE +/* + * Set PG_hwpoison flag if a given page is confirmed to be a free page. This + * test is performed under the zone lock to prevent a race against page + * allocation. + */ +bool set_hwpoison_free_buddy_page(struct page *page) +{ + struct zone *zone = page_zone(page); + unsigned long pfn = page_to_pfn(page); + unsigned long flags; + unsigned int order; + bool hwpoisoned = false; + + spin_lock_irqsave(&zone->lock, flags); + for (order = 0; order < MAX_ORDER; order++) { + struct page *page_head = page - (pfn & ((1 << order) - 1)); + + if (PageBuddy(page_head) && page_order(page_head) >= order) { + if (!TestSetPageHWPoison(page)) + hwpoisoned = true; + break; + } + } + spin_unlock_irqrestore(&zone->lock, flags); + + return hwpoisoned; +} +#endif -- cgit v1.2.3 From 263fade51f7bdd5ad7ebbfe82113a44c5ea4c36c Mon Sep 17 00:00:00 2001 From: Mike Rapoport Date: Thu, 23 Aug 2018 17:01:15 -0700 Subject: docs/mm: make GFP flags descriptions usable as kernel-doc This patch adds DOC: headings for GFP flag descriptions and adjusts the formatting to fit sphinx expectations of paragraphs. Link: http://lkml.kernel.org/r/1532626360-16650-7-git-send-email-rppt@linux.vnet.ibm.com Signed-off-by: Mike Rapoport Reviewed-by: Andrew Morton Cc: Jonathan Corbet Cc: Matthew Wilcox Cc: Michal Hocko Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/gfp.h | 291 +++++++++++++++++++++++++++------------------------- 1 file changed, 154 insertions(+), 137 deletions(-) (limited to 'include/linux') diff --git a/include/linux/gfp.h b/include/linux/gfp.h index a6afcec53795..24bcc5eec6b4 100644 --- a/include/linux/gfp.h +++ b/include/linux/gfp.h @@ -59,29 +59,32 @@ struct vm_area_struct; #define __GFP_MOVABLE ((__force gfp_t)___GFP_MOVABLE) /* ZONE_MOVABLE allowed */ #define GFP_ZONEMASK (__GFP_DMA|__GFP_HIGHMEM|__GFP_DMA32|__GFP_MOVABLE) -/* +/** + * DOC: Page mobility and placement hints + * * Page mobility and placement hints + * ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ * * These flags provide hints about how mobile the page is. Pages with similar * mobility are placed within the same pageblocks to minimise problems due * to external fragmentation. * - * __GFP_MOVABLE (also a zone modifier) indicates that the page can be - * moved by page migration during memory compaction or can be reclaimed. + * %__GFP_MOVABLE (also a zone modifier) indicates that the page can be + * moved by page migration during memory compaction or can be reclaimed. * - * __GFP_RECLAIMABLE is used for slab allocations that specify - * SLAB_RECLAIM_ACCOUNT and whose pages can be freed via shrinkers. + * %__GFP_RECLAIMABLE is used for slab allocations that specify + * SLAB_RECLAIM_ACCOUNT and whose pages can be freed via shrinkers. * - * __GFP_WRITE indicates the caller intends to dirty the page. Where possible, - * these pages will be spread between local zones to avoid all the dirty - * pages being in one zone (fair zone allocation policy). + * %__GFP_WRITE indicates the caller intends to dirty the page. Where possible, + * these pages will be spread between local zones to avoid all the dirty + * pages being in one zone (fair zone allocation policy). * - * __GFP_HARDWALL enforces the cpuset memory allocation policy. + * %__GFP_HARDWALL enforces the cpuset memory allocation policy. * - * __GFP_THISNODE forces the allocation to be satisified from the requested - * node with no fallbacks or placement policy enforcements. + * %__GFP_THISNODE forces the allocation to be satisified from the requested + * node with no fallbacks or placement policy enforcements. * - * __GFP_ACCOUNT causes the allocation to be accounted to kmemcg. + * %__GFP_ACCOUNT causes the allocation to be accounted to kmemcg. */ #define __GFP_RECLAIMABLE ((__force gfp_t)___GFP_RECLAIMABLE) #define __GFP_WRITE ((__force gfp_t)___GFP_WRITE) @@ -89,54 +92,60 @@ struct vm_area_struct; #define __GFP_THISNODE ((__force gfp_t)___GFP_THISNODE) #define __GFP_ACCOUNT ((__force gfp_t)___GFP_ACCOUNT) -/* +/** + * DOC: Watermark modifiers + * * Watermark modifiers -- controls access to emergency reserves + * ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ * - * __GFP_HIGH indicates that the caller is high-priority and that granting - * the request is necessary before the system can make forward progress. - * For example, creating an IO context to clean pages. + * %__GFP_HIGH indicates that the caller is high-priority and that granting + * the request is necessary before the system can make forward progress. + * For example, creating an IO context to clean pages. * - * __GFP_ATOMIC indicates that the caller cannot reclaim or sleep and is - * high priority. Users are typically interrupt handlers. This may be - * used in conjunction with __GFP_HIGH + * %__GFP_ATOMIC indicates that the caller cannot reclaim or sleep and is + * high priority. Users are typically interrupt handlers. This may be + * used in conjunction with %__GFP_HIGH * - * __GFP_MEMALLOC allows access to all memory. This should only be used when - * the caller guarantees the allocation will allow more memory to be freed - * very shortly e.g. process exiting or swapping. Users either should - * be the MM or co-ordinating closely with the VM (e.g. swap over NFS). + * %__GFP_MEMALLOC allows access to all memory. This should only be used when + * the caller guarantees the allocation will allow more memory to be freed + * very shortly e.g. process exiting or swapping. Users either should + * be the MM or co-ordinating closely with the VM (e.g. swap over NFS). * - * __GFP_NOMEMALLOC is used to explicitly forbid access to emergency reserves. - * This takes precedence over the __GFP_MEMALLOC flag if both are set. + * %__GFP_NOMEMALLOC is used to explicitly forbid access to emergency reserves. + * This takes precedence over the %__GFP_MEMALLOC flag if both are set. */ #define __GFP_ATOMIC ((__force gfp_t)___GFP_ATOMIC) #define __GFP_HIGH ((__force gfp_t)___GFP_HIGH) #define __GFP_MEMALLOC ((__force gfp_t)___GFP_MEMALLOC) #define __GFP_NOMEMALLOC ((__force gfp_t)___GFP_NOMEMALLOC) -/* +/** + * DOC: Reclaim modifiers + * * Reclaim modifiers + * ~~~~~~~~~~~~~~~~~ * - * __GFP_IO can start physical IO. + * %__GFP_IO can start physical IO. * - * __GFP_FS can call down to the low-level FS. Clearing the flag avoids the - * allocator recursing into the filesystem which might already be holding - * locks. + * %__GFP_FS can call down to the low-level FS. Clearing the flag avoids the + * allocator recursing into the filesystem which might already be holding + * locks. * - * __GFP_DIRECT_RECLAIM indicates that the caller may enter direct reclaim. - * This flag can be cleared to avoid unnecessary delays when a fallback - * option is available. + * %__GFP_DIRECT_RECLAIM indicates that the caller may enter direct reclaim. + * This flag can be cleared to avoid unnecessary delays when a fallback + * option is available. * - * __GFP_KSWAPD_RECLAIM indicates that the caller wants to wake kswapd when - * the low watermark is reached and have it reclaim pages until the high - * watermark is reached. A caller may wish to clear this flag when fallback - * options are available and the reclaim is likely to disrupt the system. The - * canonical example is THP allocation where a fallback is cheap but - * reclaim/compaction may cause indirect stalls. + * %__GFP_KSWAPD_RECLAIM indicates that the caller wants to wake kswapd when + * the low watermark is reached and have it reclaim pages until the high + * watermark is reached. A caller may wish to clear this flag when fallback + * options are available and the reclaim is likely to disrupt the system. The + * canonical example is THP allocation where a fallback is cheap but + * reclaim/compaction may cause indirect stalls. * - * __GFP_RECLAIM is shorthand to allow/forbid both direct and kswapd reclaim. + * %__GFP_RECLAIM is shorthand to allow/forbid both direct and kswapd reclaim. * * The default allocator behavior depends on the request size. We have a concept - * of so called costly allocations (with order > PAGE_ALLOC_COSTLY_ORDER). + * of so called costly allocations (with order > %PAGE_ALLOC_COSTLY_ORDER). * !costly allocations are too essential to fail so they are implicitly * non-failing by default (with some exceptions like OOM victims might fail so * the caller still has to check for failures) while costly requests try to be @@ -144,40 +153,40 @@ struct vm_area_struct; * The following three modifiers might be used to override some of these * implicit rules * - * __GFP_NORETRY: The VM implementation will try only very lightweight - * memory direct reclaim to get some memory under memory pressure (thus - * it can sleep). It will avoid disruptive actions like OOM killer. The - * caller must handle the failure which is quite likely to happen under - * heavy memory pressure. The flag is suitable when failure can easily be - * handled at small cost, such as reduced throughput - * - * __GFP_RETRY_MAYFAIL: The VM implementation will retry memory reclaim - * procedures that have previously failed if there is some indication - * that progress has been made else where. It can wait for other - * tasks to attempt high level approaches to freeing memory such as - * compaction (which removes fragmentation) and page-out. - * There is still a definite limit to the number of retries, but it is - * a larger limit than with __GFP_NORETRY. - * Allocations with this flag may fail, but only when there is - * genuinely little unused memory. While these allocations do not - * directly trigger the OOM killer, their failure indicates that - * the system is likely to need to use the OOM killer soon. The - * caller must handle failure, but can reasonably do so by failing - * a higher-level request, or completing it only in a much less - * efficient manner. - * If the allocation does fail, and the caller is in a position to - * free some non-essential memory, doing so could benefit the system - * as a whole. - * - * __GFP_NOFAIL: The VM implementation _must_ retry infinitely: the caller - * cannot handle allocation failures. The allocation could block - * indefinitely but will never return with failure. Testing for - * failure is pointless. - * New users should be evaluated carefully (and the flag should be - * used only when there is no reasonable failure policy) but it is - * definitely preferable to use the flag rather than opencode endless - * loop around allocator. - * Using this flag for costly allocations is _highly_ discouraged. + * %__GFP_NORETRY: The VM implementation will try only very lightweight + * memory direct reclaim to get some memory under memory pressure (thus + * it can sleep). It will avoid disruptive actions like OOM killer. The + * caller must handle the failure which is quite likely to happen under + * heavy memory pressure. The flag is suitable when failure can easily be + * handled at small cost, such as reduced throughput + * + * %__GFP_RETRY_MAYFAIL: The VM implementation will retry memory reclaim + * procedures that have previously failed if there is some indication + * that progress has been made else where. It can wait for other + * tasks to attempt high level approaches to freeing memory such as + * compaction (which removes fragmentation) and page-out. + * There is still a definite limit to the number of retries, but it is + * a larger limit than with %__GFP_NORETRY. + * Allocations with this flag may fail, but only when there is + * genuinely little unused memory. While these allocations do not + * directly trigger the OOM killer, their failure indicates that + * the system is likely to need to use the OOM killer soon. The + * caller must handle failure, but can reasonably do so by failing + * a higher-level request, or completing it only in a much less + * efficient manner. + * If the allocation does fail, and the caller is in a position to + * free some non-essential memory, doing so could benefit the system + * as a whole. + * + * %__GFP_NOFAIL: The VM implementation _must_ retry infinitely: the caller + * cannot handle allocation failures. The allocation could block + * indefinitely but will never return with failure. Testing for + * failure is pointless. + * New users should be evaluated carefully (and the flag should be + * used only when there is no reasonable failure policy) but it is + * definitely preferable to use the flag rather than opencode endless + * loop around allocator. + * Using this flag for costly allocations is _highly_ discouraged. */ #define __GFP_IO ((__force gfp_t)___GFP_IO) #define __GFP_FS ((__force gfp_t)___GFP_FS) @@ -188,14 +197,17 @@ struct vm_area_struct; #define __GFP_NOFAIL ((__force gfp_t)___GFP_NOFAIL) #define __GFP_NORETRY ((__force gfp_t)___GFP_NORETRY) -/* +/** + * DOC: Action modifiers + * * Action modifiers + * ~~~~~~~~~~~~~~~~ * - * __GFP_NOWARN suppresses allocation failure reports. + * %__GFP_NOWARN suppresses allocation failure reports. * - * __GFP_COMP address compound page metadata. + * %__GFP_COMP address compound page metadata. * - * __GFP_ZERO returns a zeroed page on success. + * %__GFP_ZERO returns a zeroed page on success. */ #define __GFP_NOWARN ((__force gfp_t)___GFP_NOWARN) #define __GFP_COMP ((__force gfp_t)___GFP_COMP) @@ -208,66 +220,71 @@ struct vm_area_struct; #define __GFP_BITS_SHIFT (23 + IS_ENABLED(CONFIG_LOCKDEP)) #define __GFP_BITS_MASK ((__force gfp_t)((1 << __GFP_BITS_SHIFT) - 1)) -/* +/** + * DOC: Useful GFP flag combinations + * + * Useful GFP flag combinations + * ~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + * * Useful GFP flag combinations that are commonly used. It is recommended * that subsystems start with one of these combinations and then set/clear - * __GFP_FOO flags as necessary. - * - * GFP_ATOMIC users can not sleep and need the allocation to succeed. A lower - * watermark is applied to allow access to "atomic reserves" - * - * GFP_KERNEL is typical for kernel-internal allocations. The caller requires - * ZONE_NORMAL or a lower zone for direct access but can direct reclaim. - * - * GFP_KERNEL_ACCOUNT is the same as GFP_KERNEL, except the allocation is - * accounted to kmemcg. - * - * GFP_NOWAIT is for kernel allocations that should not stall for direct - * reclaim, start physical IO or use any filesystem callback. - * - * GFP_NOIO will use direct reclaim to discard clean pages or slab pages - * that do not require the starting of any physical IO. - * Please try to avoid using this flag directly and instead use - * memalloc_noio_{save,restore} to mark the whole scope which cannot - * perform any IO with a short explanation why. All allocation requests - * will inherit GFP_NOIO implicitly. - * - * GFP_NOFS will use direct reclaim but will not use any filesystem interfaces. - * Please try to avoid using this flag directly and instead use - * memalloc_nofs_{save,restore} to mark the whole scope which cannot/shouldn't - * recurse into the FS layer with a short explanation why. All allocation - * requests will inherit GFP_NOFS implicitly. - * - * GFP_USER is for userspace allocations that also need to be directly - * accessibly by the kernel or hardware. It is typically used by hardware - * for buffers that are mapped to userspace (e.g. graphics) that hardware - * still must DMA to. cpuset limits are enforced for these allocations. - * - * GFP_DMA exists for historical reasons and should be avoided where possible. - * The flags indicates that the caller requires that the lowest zone be - * used (ZONE_DMA or 16M on x86-64). Ideally, this would be removed but - * it would require careful auditing as some users really require it and - * others use the flag to avoid lowmem reserves in ZONE_DMA and treat the - * lowest zone as a type of emergency reserve. - * - * GFP_DMA32 is similar to GFP_DMA except that the caller requires a 32-bit - * address. - * - * GFP_HIGHUSER is for userspace allocations that may be mapped to userspace, - * do not need to be directly accessible by the kernel but that cannot - * move once in use. An example may be a hardware allocation that maps - * data directly into userspace but has no addressing limitations. - * - * GFP_HIGHUSER_MOVABLE is for userspace allocations that the kernel does not - * need direct access to but can use kmap() when access is required. They - * are expected to be movable via page reclaim or page migration. Typically, - * pages on the LRU would also be allocated with GFP_HIGHUSER_MOVABLE. - * - * GFP_TRANSHUGE and GFP_TRANSHUGE_LIGHT are used for THP allocations. They are - * compound allocations that will generally fail quickly if memory is not - * available and will not wake kswapd/kcompactd on failure. The _LIGHT - * version does not attempt reclaim/compaction at all and is by default used - * in page fault path, while the non-light is used by khugepaged. + * %__GFP_FOO flags as necessary. + * + * %GFP_ATOMIC users can not sleep and need the allocation to succeed. A lower + * watermark is applied to allow access to "atomic reserves" + * + * %GFP_KERNEL is typical for kernel-internal allocations. The caller requires + * %ZONE_NORMAL or a lower zone for direct access but can direct reclaim. + * + * %GFP_KERNEL_ACCOUNT is the same as GFP_KERNEL, except the allocation is + * accounted to kmemcg. + * + * %GFP_NOWAIT is for kernel allocations that should not stall for direct + * reclaim, start physical IO or use any filesystem callback. + * + * %GFP_NOIO will use direct reclaim to discard clean pages or slab pages + * that do not require the starting of any physical IO. + * Please try to avoid using this flag directly and instead use + * memalloc_noio_{save,restore} to mark the whole scope which cannot + * perform any IO with a short explanation why. All allocation requests + * will inherit GFP_NOIO implicitly. + * + * %GFP_NOFS will use direct reclaim but will not use any filesystem interfaces. + * Please try to avoid using this flag directly and instead use + * memalloc_nofs_{save,restore} to mark the whole scope which cannot/shouldn't + * recurse into the FS layer with a short explanation why. All allocation + * requests will inherit GFP_NOFS implicitly. + * + * %GFP_USER is for userspace allocations that also need to be directly + * accessibly by the kernel or hardware. It is typically used by hardware + * for buffers that are mapped to userspace (e.g. graphics) that hardware + * still must DMA to. cpuset limits are enforced for these allocations. + * + * %GFP_DMA exists for historical reasons and should be avoided where possible. + * The flags indicates that the caller requires that the lowest zone be + * used (%ZONE_DMA or 16M on x86-64). Ideally, this would be removed but + * it would require careful auditing as some users really require it and + * others use the flag to avoid lowmem reserves in %ZONE_DMA and treat the + * lowest zone as a type of emergency reserve. + * + * %GFP_DMA32 is similar to %GFP_DMA except that the caller requires a 32-bit + * address. + * + * %GFP_HIGHUSER is for userspace allocations that may be mapped to userspace, + * do not need to be directly accessible by the kernel but that cannot + * move once in use. An example may be a hardware allocation that maps + * data directly into userspace but has no addressing limitations. + * + * %GFP_HIGHUSER_MOVABLE is for userspace allocations that the kernel does not + * need direct access to but can use kmap() when access is required. They + * are expected to be movable via page reclaim or page migration. Typically, + * pages on the LRU would also be allocated with %GFP_HIGHUSER_MOVABLE. + * + * %GFP_TRANSHUGE and %GFP_TRANSHUGE_LIGHT are used for THP allocations. They + * are compound allocations that will generally fail quickly if memory is not + * available and will not wake kswapd/kcompactd on failure. The _LIGHT + * version does not attempt reclaim/compaction at all and is by default used + * in page fault path, while the non-light is used by khugepaged. */ #define GFP_ATOMIC (__GFP_HIGH|__GFP_ATOMIC|__GFP_KSWAPD_RECLAIM) #define GFP_KERNEL (__GFP_RECLAIM | __GFP_IO | __GFP_FS) -- cgit v1.2.3 From 2b7403035459c75e193c6b04a293e518a4212de0 Mon Sep 17 00:00:00 2001 From: Souptick Joarder Date: Thu, 23 Aug 2018 17:01:36 -0700 Subject: mm: Change return type int to vm_fault_t for fault handlers Use new return type vm_fault_t for fault handler. For now, this is just documenting that the function returns a VM_FAULT value rather than an errno. Once all instances are converted, vm_fault_t will become a distinct type. Ref-> commit 1c8f422059ae ("mm: change return type to vm_fault_t") The aim is to change the return type of finish_fault() and handle_mm_fault() to vm_fault_t type. As part of that clean up return type of all other recursively called functions have been changed to vm_fault_t type. The places from where handle_mm_fault() is getting invoked will be change to vm_fault_t type but in a separate patch. vmf_error() is the newly introduce inline function in 4.17-rc6. [akpm@linux-foundation.org: don't shadow outer local `ret' in __do_huge_pmd_anonymous_page()] Link: http://lkml.kernel.org/r/20180604171727.GA20279@jordon-HP-15-Notebook-PC Signed-off-by: Souptick Joarder Reviewed-by: Matthew Wilcox Reviewed-by: Andrew Morton Cc: Matthew Wilcox Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- fs/userfaultfd.c | 6 +-- include/linux/huge_mm.h | 9 +++-- include/linux/hugetlb.h | 2 +- include/linux/mm.h | 14 +++---- include/linux/oom.h | 2 +- include/linux/swapops.h | 5 ++- include/linux/userfaultfd_k.h | 5 ++- kernel/memremap.c | 2 +- mm/gup.c | 4 +- mm/huge_memory.c | 31 +++++++-------- mm/hugetlb.c | 29 +++++++------- mm/internal.h | 2 +- mm/khugepaged.c | 3 +- mm/memory.c | 90 ++++++++++++++++++++++--------------------- mm/shmem.c | 5 ++- 15 files changed, 106 insertions(+), 103 deletions(-) (limited to 'include/linux') diff --git a/fs/userfaultfd.c b/fs/userfaultfd.c index f649023b19b5..bfa0ec69f924 100644 --- a/fs/userfaultfd.c +++ b/fs/userfaultfd.c @@ -340,17 +340,15 @@ out: * fatal_signal_pending()s, and the mmap_sem must be released before * returning it. */ -int handle_userfault(struct vm_fault *vmf, unsigned long reason) +vm_fault_t handle_userfault(struct vm_fault *vmf, unsigned long reason) { struct mm_struct *mm = vmf->vma->vm_mm; struct userfaultfd_ctx *ctx; struct userfaultfd_wait_queue uwq; - int ret; + vm_fault_t ret = VM_FAULT_SIGBUS; bool must_wait, return_to_userland; long blocking_state; - ret = VM_FAULT_SIGBUS; - /* * We don't do userfault handling for the final child pid update. * diff --git a/include/linux/huge_mm.h b/include/linux/huge_mm.h index a8a126259bc4..27e3e32135a8 100644 --- a/include/linux/huge_mm.h +++ b/include/linux/huge_mm.h @@ -6,7 +6,7 @@ #include /* only for vma_is_dax() */ -extern int do_huge_pmd_anonymous_page(struct vm_fault *vmf); +extern vm_fault_t do_huge_pmd_anonymous_page(struct vm_fault *vmf); extern int copy_huge_pmd(struct mm_struct *dst_mm, struct mm_struct *src_mm, pmd_t *dst_pmd, pmd_t *src_pmd, unsigned long addr, struct vm_area_struct *vma); @@ -23,7 +23,7 @@ static inline void huge_pud_set_accessed(struct vm_fault *vmf, pud_t orig_pud) } #endif -extern int do_huge_pmd_wp_page(struct vm_fault *vmf, pmd_t orig_pmd); +extern vm_fault_t do_huge_pmd_wp_page(struct vm_fault *vmf, pmd_t orig_pmd); extern struct page *follow_trans_huge_pmd(struct vm_area_struct *vma, unsigned long addr, pmd_t *pmd, @@ -216,7 +216,7 @@ struct page *follow_devmap_pmd(struct vm_area_struct *vma, unsigned long addr, struct page *follow_devmap_pud(struct vm_area_struct *vma, unsigned long addr, pud_t *pud, int flags); -extern int do_huge_pmd_numa_page(struct vm_fault *vmf, pmd_t orig_pmd); +extern vm_fault_t do_huge_pmd_numa_page(struct vm_fault *vmf, pmd_t orig_pmd); extern struct page *huge_zero_page; @@ -321,7 +321,8 @@ static inline spinlock_t *pud_trans_huge_lock(pud_t *pud, return NULL; } -static inline int do_huge_pmd_numa_page(struct vm_fault *vmf, pmd_t orig_pmd) +static inline vm_fault_t do_huge_pmd_numa_page(struct vm_fault *vmf, + pmd_t orig_pmd) { return 0; } diff --git a/include/linux/hugetlb.h b/include/linux/hugetlb.h index c39d9170a8a0..6b68e345f0ca 100644 --- a/include/linux/hugetlb.h +++ b/include/linux/hugetlb.h @@ -105,7 +105,7 @@ void hugetlb_report_meminfo(struct seq_file *); int hugetlb_report_node_meminfo(int, char *); void hugetlb_show_meminfo(void); unsigned long hugetlb_total_pages(void); -int hugetlb_fault(struct mm_struct *mm, struct vm_area_struct *vma, +vm_fault_t hugetlb_fault(struct mm_struct *mm, struct vm_area_struct *vma, unsigned long address, unsigned int flags); int hugetlb_mcopy_atomic_pte(struct mm_struct *dst_mm, pte_t *dst_pte, struct vm_area_struct *dst_vma, diff --git a/include/linux/mm.h b/include/linux/mm.h index a9e733b5fb76..8fcc36660de6 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -728,10 +728,10 @@ static inline pte_t maybe_mkwrite(pte_t pte, struct vm_area_struct *vma) return pte; } -int alloc_set_pte(struct vm_fault *vmf, struct mem_cgroup *memcg, +vm_fault_t alloc_set_pte(struct vm_fault *vmf, struct mem_cgroup *memcg, struct page *page); -int finish_fault(struct vm_fault *vmf); -int finish_mkwrite_fault(struct vm_fault *vmf); +vm_fault_t finish_fault(struct vm_fault *vmf); +vm_fault_t finish_mkwrite_fault(struct vm_fault *vmf); #endif /* @@ -1403,8 +1403,8 @@ int generic_error_remove_page(struct address_space *mapping, struct page *page); int invalidate_inode_page(struct page *page); #ifdef CONFIG_MMU -extern int handle_mm_fault(struct vm_area_struct *vma, unsigned long address, - unsigned int flags); +extern vm_fault_t handle_mm_fault(struct vm_area_struct *vma, + unsigned long address, unsigned int flags); extern int fixup_user_fault(struct task_struct *tsk, struct mm_struct *mm, unsigned long address, unsigned int fault_flags, bool *unlocked); @@ -1413,7 +1413,7 @@ void unmap_mapping_pages(struct address_space *mapping, void unmap_mapping_range(struct address_space *mapping, loff_t const holebegin, loff_t const holelen, int even_cows); #else -static inline int handle_mm_fault(struct vm_area_struct *vma, +static inline vm_fault_t handle_mm_fault(struct vm_area_struct *vma, unsigned long address, unsigned int flags) { /* should never happen if there's no MMU */ @@ -2563,7 +2563,7 @@ static inline struct page *follow_page(struct vm_area_struct *vma, #define FOLL_COW 0x4000 /* internal GUP flag */ #define FOLL_ANON 0x8000 /* don't do file mappings */ -static inline int vm_fault_to_errno(int vm_fault, int foll_flags) +static inline int vm_fault_to_errno(vm_fault_t vm_fault, int foll_flags) { if (vm_fault & VM_FAULT_OOM) return -ENOMEM; diff --git a/include/linux/oom.h b/include/linux/oom.h index 92f70e4c6252..69864a547663 100644 --- a/include/linux/oom.h +++ b/include/linux/oom.h @@ -88,7 +88,7 @@ static inline bool mm_is_oom_victim(struct mm_struct *mm) * * Return 0 when the PF is safe VM_FAULT_SIGBUS otherwise. */ -static inline int check_stable_address_space(struct mm_struct *mm) +static inline vm_fault_t check_stable_address_space(struct mm_struct *mm) { if (unlikely(test_bit(MMF_UNSTABLE, &mm->flags))) return VM_FAULT_SIGBUS; diff --git a/include/linux/swapops.h b/include/linux/swapops.h index 568a3553d918..22af9d8a84ae 100644 --- a/include/linux/swapops.h +++ b/include/linux/swapops.h @@ -4,6 +4,7 @@ #include #include +#include /* * swapcache pages are stored in the swapper_space radix tree. We want to @@ -134,7 +135,7 @@ static inline struct page *device_private_entry_to_page(swp_entry_t entry) return pfn_to_page(swp_offset(entry)); } -int device_private_entry_fault(struct vm_area_struct *vma, +vm_fault_t device_private_entry_fault(struct vm_area_struct *vma, unsigned long addr, swp_entry_t entry, unsigned int flags, @@ -169,7 +170,7 @@ static inline struct page *device_private_entry_to_page(swp_entry_t entry) return NULL; } -static inline int device_private_entry_fault(struct vm_area_struct *vma, +static inline vm_fault_t device_private_entry_fault(struct vm_area_struct *vma, unsigned long addr, swp_entry_t entry, unsigned int flags, diff --git a/include/linux/userfaultfd_k.h b/include/linux/userfaultfd_k.h index e091f0a11b11..37c9eba75c98 100644 --- a/include/linux/userfaultfd_k.h +++ b/include/linux/userfaultfd_k.h @@ -28,7 +28,7 @@ #define UFFD_SHARED_FCNTL_FLAGS (O_CLOEXEC | O_NONBLOCK) #define UFFD_FLAGS_SET (EFD_SHARED_FCNTL_FLAGS) -extern int handle_userfault(struct vm_fault *vmf, unsigned long reason); +extern vm_fault_t handle_userfault(struct vm_fault *vmf, unsigned long reason); extern ssize_t mcopy_atomic(struct mm_struct *dst_mm, unsigned long dst_start, unsigned long src_start, unsigned long len, @@ -77,7 +77,8 @@ extern void userfaultfd_unmap_complete(struct mm_struct *mm, #else /* CONFIG_USERFAULTFD */ /* mm helpers */ -static inline int handle_userfault(struct vm_fault *vmf, unsigned long reason) +static inline vm_fault_t handle_userfault(struct vm_fault *vmf, + unsigned long reason) { return VM_FAULT_SIGBUS; } diff --git a/kernel/memremap.c b/kernel/memremap.c index 1f87ea6b6545..d57d58f77409 100644 --- a/kernel/memremap.c +++ b/kernel/memremap.c @@ -43,7 +43,7 @@ static unsigned long order_at(struct resource *res, unsigned long pgoff) pgoff += 1UL << order, order = order_at((res), pgoff)) #if IS_ENABLED(CONFIG_DEVICE_PRIVATE) -int device_private_entry_fault(struct vm_area_struct *vma, +vm_fault_t device_private_entry_fault(struct vm_area_struct *vma, unsigned long addr, swp_entry_t entry, unsigned int flags, diff --git a/mm/gup.c b/mm/gup.c index fc5f98069f4e..1abc8b4afff6 100644 --- a/mm/gup.c +++ b/mm/gup.c @@ -497,7 +497,7 @@ static int faultin_page(struct task_struct *tsk, struct vm_area_struct *vma, unsigned long address, unsigned int *flags, int *nonblocking) { unsigned int fault_flags = 0; - int ret; + vm_fault_t ret; /* mlock all present pages, but do not fault in new pages */ if ((*flags & (FOLL_POPULATE | FOLL_MLOCK)) == FOLL_MLOCK) @@ -818,7 +818,7 @@ int fixup_user_fault(struct task_struct *tsk, struct mm_struct *mm, bool *unlocked) { struct vm_area_struct *vma; - int ret, major = 0; + vm_fault_t ret, major = 0; if (unlocked) fault_flags |= FAULT_FLAG_ALLOW_RETRY; diff --git a/mm/huge_memory.c b/mm/huge_memory.c index 78427af91de9..08b544383d74 100644 --- a/mm/huge_memory.c +++ b/mm/huge_memory.c @@ -541,14 +541,14 @@ unsigned long thp_get_unmapped_area(struct file *filp, unsigned long addr, } EXPORT_SYMBOL_GPL(thp_get_unmapped_area); -static int __do_huge_pmd_anonymous_page(struct vm_fault *vmf, struct page *page, - gfp_t gfp) +static vm_fault_t __do_huge_pmd_anonymous_page(struct vm_fault *vmf, + struct page *page, gfp_t gfp) { struct vm_area_struct *vma = vmf->vma; struct mem_cgroup *memcg; pgtable_t pgtable; unsigned long haddr = vmf->address & HPAGE_PMD_MASK; - int ret = 0; + vm_fault_t ret = 0; VM_BUG_ON_PAGE(!PageCompound(page), page); @@ -584,15 +584,15 @@ static int __do_huge_pmd_anonymous_page(struct vm_fault *vmf, struct page *page, /* Deliver the page fault to userland */ if (userfaultfd_missing(vma)) { - int ret; + vm_fault_t ret2; spin_unlock(vmf->ptl); mem_cgroup_cancel_charge(page, memcg, true); put_page(page); pte_free(vma->vm_mm, pgtable); - ret = handle_userfault(vmf, VM_UFFD_MISSING); - VM_BUG_ON(ret & VM_FAULT_FALLBACK); - return ret; + ret2 = handle_userfault(vmf, VM_UFFD_MISSING); + VM_BUG_ON(ret2 & VM_FAULT_FALLBACK); + return ret2; } entry = mk_huge_pmd(page, vma->vm_page_prot); @@ -663,7 +663,7 @@ static bool set_huge_zero_page(pgtable_t pgtable, struct mm_struct *mm, return true; } -int do_huge_pmd_anonymous_page(struct vm_fault *vmf) +vm_fault_t do_huge_pmd_anonymous_page(struct vm_fault *vmf) { struct vm_area_struct *vma = vmf->vma; gfp_t gfp; @@ -682,7 +682,7 @@ int do_huge_pmd_anonymous_page(struct vm_fault *vmf) pgtable_t pgtable; struct page *zero_page; bool set; - int ret; + vm_fault_t ret; pgtable = pte_alloc_one(vma->vm_mm, haddr); if (unlikely(!pgtable)) return VM_FAULT_OOM; @@ -1118,15 +1118,16 @@ unlock: spin_unlock(vmf->ptl); } -static int do_huge_pmd_wp_page_fallback(struct vm_fault *vmf, pmd_t orig_pmd, - struct page *page) +static vm_fault_t do_huge_pmd_wp_page_fallback(struct vm_fault *vmf, + pmd_t orig_pmd, struct page *page) { struct vm_area_struct *vma = vmf->vma; unsigned long haddr = vmf->address & HPAGE_PMD_MASK; struct mem_cgroup *memcg; pgtable_t pgtable; pmd_t _pmd; - int ret = 0, i; + int i; + vm_fault_t ret = 0; struct page **pages; unsigned long mmun_start; /* For mmu_notifiers */ unsigned long mmun_end; /* For mmu_notifiers */ @@ -1236,7 +1237,7 @@ out_free_pages: goto out; } -int do_huge_pmd_wp_page(struct vm_fault *vmf, pmd_t orig_pmd) +vm_fault_t do_huge_pmd_wp_page(struct vm_fault *vmf, pmd_t orig_pmd) { struct vm_area_struct *vma = vmf->vma; struct page *page = NULL, *new_page; @@ -1245,7 +1246,7 @@ int do_huge_pmd_wp_page(struct vm_fault *vmf, pmd_t orig_pmd) unsigned long mmun_start; /* For mmu_notifiers */ unsigned long mmun_end; /* For mmu_notifiers */ gfp_t huge_gfp; /* for allocation and charge */ - int ret = 0; + vm_fault_t ret = 0; vmf->ptl = pmd_lockptr(vma->vm_mm, vmf->pmd); VM_BUG_ON_VMA(!vma->anon_vma, vma); @@ -1457,7 +1458,7 @@ out: } /* NUMA hinting page fault entry point for trans huge pmds */ -int do_huge_pmd_numa_page(struct vm_fault *vmf, pmd_t pmd) +vm_fault_t do_huge_pmd_numa_page(struct vm_fault *vmf, pmd_t pmd) { struct vm_area_struct *vma = vmf->vma; struct anon_vma *anon_vma = NULL; diff --git a/mm/hugetlb.c b/mm/hugetlb.c index 9f1c853f67b5..3c21775f196b 100644 --- a/mm/hugetlb.c +++ b/mm/hugetlb.c @@ -3501,14 +3501,15 @@ static void unmap_ref_private(struct mm_struct *mm, struct vm_area_struct *vma, * cannot race with other handlers or page migration. * Keep the pte_same checks anyway to make transition from the mutex easier. */ -static int hugetlb_cow(struct mm_struct *mm, struct vm_area_struct *vma, +static vm_fault_t hugetlb_cow(struct mm_struct *mm, struct vm_area_struct *vma, unsigned long address, pte_t *ptep, struct page *pagecache_page, spinlock_t *ptl) { pte_t pte; struct hstate *h = hstate_vma(vma); struct page *old_page, *new_page; - int ret = 0, outside_reserve = 0; + int outside_reserve = 0; + vm_fault_t ret = 0; unsigned long mmun_start; /* For mmu_notifiers */ unsigned long mmun_end; /* For mmu_notifiers */ unsigned long haddr = address & huge_page_mask(h); @@ -3572,8 +3573,7 @@ retry_avoidcopy: return 0; } - ret = (PTR_ERR(new_page) == -ENOMEM) ? - VM_FAULT_OOM : VM_FAULT_SIGBUS; + ret = vmf_error(PTR_ERR(new_page)); goto out_release_old; } @@ -3676,12 +3676,13 @@ int huge_add_to_page_cache(struct page *page, struct address_space *mapping, return 0; } -static int hugetlb_no_page(struct mm_struct *mm, struct vm_area_struct *vma, - struct address_space *mapping, pgoff_t idx, - unsigned long address, pte_t *ptep, unsigned int flags) +static vm_fault_t hugetlb_no_page(struct mm_struct *mm, + struct vm_area_struct *vma, + struct address_space *mapping, pgoff_t idx, + unsigned long address, pte_t *ptep, unsigned int flags) { struct hstate *h = hstate_vma(vma); - int ret = VM_FAULT_SIGBUS; + vm_fault_t ret = VM_FAULT_SIGBUS; int anon_rmap = 0; unsigned long size; struct page *page; @@ -3744,11 +3745,7 @@ retry: page = alloc_huge_page(vma, haddr, 0); if (IS_ERR(page)) { - ret = PTR_ERR(page); - if (ret == -ENOMEM) - ret = VM_FAULT_OOM; - else - ret = VM_FAULT_SIGBUS; + ret = vmf_error(PTR_ERR(page)); goto out; } clear_huge_page(page, address, pages_per_huge_page(h)); @@ -3872,12 +3869,12 @@ u32 hugetlb_fault_mutex_hash(struct hstate *h, struct mm_struct *mm, } #endif -int hugetlb_fault(struct mm_struct *mm, struct vm_area_struct *vma, +vm_fault_t hugetlb_fault(struct mm_struct *mm, struct vm_area_struct *vma, unsigned long address, unsigned int flags) { pte_t *ptep, entry; spinlock_t *ptl; - int ret; + vm_fault_t ret; u32 hash; pgoff_t idx; struct page *page = NULL; @@ -4207,7 +4204,7 @@ long follow_hugetlb_page(struct mm_struct *mm, struct vm_area_struct *vma, if (absent || is_swap_pte(huge_ptep_get(pte)) || ((flags & FOLL_WRITE) && !huge_pte_write(huge_ptep_get(pte)))) { - int ret; + vm_fault_t ret; unsigned int fault_flags = 0; if (pte) diff --git a/mm/internal.h b/mm/internal.h index dab088cb6937..87256ae1bef8 100644 --- a/mm/internal.h +++ b/mm/internal.h @@ -38,7 +38,7 @@ void page_writeback_init(void); -int do_swap_page(struct vm_fault *vmf); +vm_fault_t do_swap_page(struct vm_fault *vmf); void free_pgtables(struct mmu_gather *tlb, struct vm_area_struct *start_vma, unsigned long floor, unsigned long ceiling); diff --git a/mm/khugepaged.c b/mm/khugepaged.c index 961cbe9062a5..a31d740e6cd1 100644 --- a/mm/khugepaged.c +++ b/mm/khugepaged.c @@ -880,7 +880,8 @@ static bool __collapse_huge_page_swapin(struct mm_struct *mm, unsigned long address, pmd_t *pmd, int referenced) { - int swapped_in = 0, ret = 0; + int swapped_in = 0; + vm_fault_t ret = 0; struct vm_fault vmf = { .vma = vma, .address = address, diff --git a/mm/memory.c b/mm/memory.c index 19f47d7b9b86..42ebdc33268e 100644 --- a/mm/memory.c +++ b/mm/memory.c @@ -2384,9 +2384,9 @@ static gfp_t __get_fault_gfp_mask(struct vm_area_struct *vma) * * We do this without the lock held, so that it can sleep if it needs to. */ -static int do_page_mkwrite(struct vm_fault *vmf) +static vm_fault_t do_page_mkwrite(struct vm_fault *vmf) { - int ret; + vm_fault_t ret; struct page *page = vmf->page; unsigned int old_flags = vmf->flags; @@ -2490,7 +2490,7 @@ static inline void wp_page_reuse(struct vm_fault *vmf) * held to the old page, as well as updating the rmap. * - In any case, unlock the PTL and drop the reference we took to the old page. */ -static int wp_page_copy(struct vm_fault *vmf) +static vm_fault_t wp_page_copy(struct vm_fault *vmf) { struct vm_area_struct *vma = vmf->vma; struct mm_struct *mm = vma->vm_mm; @@ -2638,7 +2638,7 @@ oom: * The function expects the page to be locked or other protection against * concurrent faults / writeback (such as DAX radix tree locks). */ -int finish_mkwrite_fault(struct vm_fault *vmf) +vm_fault_t finish_mkwrite_fault(struct vm_fault *vmf) { WARN_ON_ONCE(!(vmf->vma->vm_flags & VM_SHARED)); vmf->pte = pte_offset_map_lock(vmf->vma->vm_mm, vmf->pmd, vmf->address, @@ -2659,12 +2659,12 @@ int finish_mkwrite_fault(struct vm_fault *vmf) * Handle write page faults for VM_MIXEDMAP or VM_PFNMAP for a VM_SHARED * mapping */ -static int wp_pfn_shared(struct vm_fault *vmf) +static vm_fault_t wp_pfn_shared(struct vm_fault *vmf) { struct vm_area_struct *vma = vmf->vma; if (vma->vm_ops && vma->vm_ops->pfn_mkwrite) { - int ret; + vm_fault_t ret; pte_unmap_unlock(vmf->pte, vmf->ptl); vmf->flags |= FAULT_FLAG_MKWRITE; @@ -2677,7 +2677,7 @@ static int wp_pfn_shared(struct vm_fault *vmf) return VM_FAULT_WRITE; } -static int wp_page_shared(struct vm_fault *vmf) +static vm_fault_t wp_page_shared(struct vm_fault *vmf) __releases(vmf->ptl) { struct vm_area_struct *vma = vmf->vma; @@ -2685,7 +2685,7 @@ static int wp_page_shared(struct vm_fault *vmf) get_page(vmf->page); if (vma->vm_ops && vma->vm_ops->page_mkwrite) { - int tmp; + vm_fault_t tmp; pte_unmap_unlock(vmf->pte, vmf->ptl); tmp = do_page_mkwrite(vmf); @@ -2728,7 +2728,7 @@ static int wp_page_shared(struct vm_fault *vmf) * but allow concurrent faults), with pte both mapped and locked. * We return with mmap_sem still held, but pte unmapped and unlocked. */ -static int do_wp_page(struct vm_fault *vmf) +static vm_fault_t do_wp_page(struct vm_fault *vmf) __releases(vmf->ptl) { struct vm_area_struct *vma = vmf->vma; @@ -2904,7 +2904,7 @@ EXPORT_SYMBOL(unmap_mapping_range); * We return with the mmap_sem locked or unlocked in the same cases * as does filemap_fault(). */ -int do_swap_page(struct vm_fault *vmf) +vm_fault_t do_swap_page(struct vm_fault *vmf) { struct vm_area_struct *vma = vmf->vma; struct page *page = NULL, *swapcache; @@ -2913,7 +2913,7 @@ int do_swap_page(struct vm_fault *vmf) pte_t pte; int locked; int exclusive = 0; - int ret = 0; + vm_fault_t ret = 0; if (!pte_unmap_same(vma->vm_mm, vmf->pmd, vmf->pte, vmf->orig_pte)) goto out; @@ -3124,12 +3124,12 @@ out_release: * but allow concurrent faults), and pte mapped but not yet locked. * We return with mmap_sem still held, but pte unmapped and unlocked. */ -static int do_anonymous_page(struct vm_fault *vmf) +static vm_fault_t do_anonymous_page(struct vm_fault *vmf) { struct vm_area_struct *vma = vmf->vma; struct mem_cgroup *memcg; struct page *page; - int ret = 0; + vm_fault_t ret = 0; pte_t entry; /* File mapping without ->vm_ops ? */ @@ -3239,10 +3239,10 @@ oom: * released depending on flags and vma->vm_ops->fault() return value. * See filemap_fault() and __lock_page_retry(). */ -static int __do_fault(struct vm_fault *vmf) +static vm_fault_t __do_fault(struct vm_fault *vmf) { struct vm_area_struct *vma = vmf->vma; - int ret; + vm_fault_t ret; ret = vma->vm_ops->fault(vmf); if (unlikely(ret & (VM_FAULT_ERROR | VM_FAULT_NOPAGE | VM_FAULT_RETRY | @@ -3276,7 +3276,7 @@ static int pmd_devmap_trans_unstable(pmd_t *pmd) return pmd_devmap(*pmd) || pmd_trans_unstable(pmd); } -static int pte_alloc_one_map(struct vm_fault *vmf) +static vm_fault_t pte_alloc_one_map(struct vm_fault *vmf) { struct vm_area_struct *vma = vmf->vma; @@ -3352,13 +3352,14 @@ static void deposit_prealloc_pte(struct vm_fault *vmf) vmf->prealloc_pte = NULL; } -static int do_set_pmd(struct vm_fault *vmf, struct page *page) +static vm_fault_t do_set_pmd(struct vm_fault *vmf, struct page *page) { struct vm_area_struct *vma = vmf->vma; bool write = vmf->flags & FAULT_FLAG_WRITE; unsigned long haddr = vmf->address & HPAGE_PMD_MASK; pmd_t entry; - int i, ret; + int i; + vm_fault_t ret; if (!transhuge_vma_suitable(vma, haddr)) return VM_FAULT_FALLBACK; @@ -3408,7 +3409,7 @@ out: return ret; } #else -static int do_set_pmd(struct vm_fault *vmf, struct page *page) +static vm_fault_t do_set_pmd(struct vm_fault *vmf, struct page *page) { BUILD_BUG(); return 0; @@ -3429,13 +3430,13 @@ static int do_set_pmd(struct vm_fault *vmf, struct page *page) * Target users are page handler itself and implementations of * vm_ops->map_pages. */ -int alloc_set_pte(struct vm_fault *vmf, struct mem_cgroup *memcg, +vm_fault_t alloc_set_pte(struct vm_fault *vmf, struct mem_cgroup *memcg, struct page *page) { struct vm_area_struct *vma = vmf->vma; bool write = vmf->flags & FAULT_FLAG_WRITE; pte_t entry; - int ret; + vm_fault_t ret; if (pmd_none(*vmf->pmd) && PageTransCompound(page) && IS_ENABLED(CONFIG_TRANSPARENT_HUGE_PAGECACHE)) { @@ -3494,10 +3495,10 @@ int alloc_set_pte(struct vm_fault *vmf, struct mem_cgroup *memcg, * The function expects the page to be locked and on success it consumes a * reference of a page being mapped (for the PTE which maps it). */ -int finish_fault(struct vm_fault *vmf) +vm_fault_t finish_fault(struct vm_fault *vmf) { struct page *page; - int ret = 0; + vm_fault_t ret = 0; /* Did we COW the page? */ if ((vmf->flags & FAULT_FLAG_WRITE) && @@ -3583,12 +3584,13 @@ late_initcall(fault_around_debugfs); * (and therefore to page order). This way it's easier to guarantee * that we don't cross page table boundaries. */ -static int do_fault_around(struct vm_fault *vmf) +static vm_fault_t do_fault_around(struct vm_fault *vmf) { unsigned long address = vmf->address, nr_pages, mask; pgoff_t start_pgoff = vmf->pgoff; pgoff_t end_pgoff; - int off, ret = 0; + int off; + vm_fault_t ret = 0; nr_pages = READ_ONCE(fault_around_bytes) >> PAGE_SHIFT; mask = ~(nr_pages * PAGE_SIZE - 1) & PAGE_MASK; @@ -3638,10 +3640,10 @@ out: return ret; } -static int do_read_fault(struct vm_fault *vmf) +static vm_fault_t do_read_fault(struct vm_fault *vmf) { struct vm_area_struct *vma = vmf->vma; - int ret = 0; + vm_fault_t ret = 0; /* * Let's call ->map_pages() first and use ->fault() as fallback @@ -3665,10 +3667,10 @@ static int do_read_fault(struct vm_fault *vmf) return ret; } -static int do_cow_fault(struct vm_fault *vmf) +static vm_fault_t do_cow_fault(struct vm_fault *vmf) { struct vm_area_struct *vma = vmf->vma; - int ret; + vm_fault_t ret; if (unlikely(anon_vma_prepare(vma))) return VM_FAULT_OOM; @@ -3704,10 +3706,10 @@ uncharge_out: return ret; } -static int do_shared_fault(struct vm_fault *vmf) +static vm_fault_t do_shared_fault(struct vm_fault *vmf) { struct vm_area_struct *vma = vmf->vma; - int ret, tmp; + vm_fault_t ret, tmp; ret = __do_fault(vmf); if (unlikely(ret & (VM_FAULT_ERROR | VM_FAULT_NOPAGE | VM_FAULT_RETRY))) @@ -3745,10 +3747,10 @@ static int do_shared_fault(struct vm_fault *vmf) * The mmap_sem may have been released depending on flags and our * return value. See filemap_fault() and __lock_page_or_retry(). */ -static int do_fault(struct vm_fault *vmf) +static vm_fault_t do_fault(struct vm_fault *vmf) { struct vm_area_struct *vma = vmf->vma; - int ret; + vm_fault_t ret; /* The VMA was not fully populated on mmap() or missing VM_DONTEXPAND */ if (!vma->vm_ops->fault) @@ -3783,7 +3785,7 @@ static int numa_migrate_prep(struct page *page, struct vm_area_struct *vma, return mpol_misplaced(page, vma, addr); } -static int do_numa_page(struct vm_fault *vmf) +static vm_fault_t do_numa_page(struct vm_fault *vmf) { struct vm_area_struct *vma = vmf->vma; struct page *page = NULL; @@ -3873,7 +3875,7 @@ out: return 0; } -static inline int create_huge_pmd(struct vm_fault *vmf) +static inline vm_fault_t create_huge_pmd(struct vm_fault *vmf) { if (vma_is_anonymous(vmf->vma)) return do_huge_pmd_anonymous_page(vmf); @@ -3883,7 +3885,7 @@ static inline int create_huge_pmd(struct vm_fault *vmf) } /* `inline' is required to avoid gcc 4.1.2 build error */ -static inline int wp_huge_pmd(struct vm_fault *vmf, pmd_t orig_pmd) +static inline vm_fault_t wp_huge_pmd(struct vm_fault *vmf, pmd_t orig_pmd) { if (vma_is_anonymous(vmf->vma)) return do_huge_pmd_wp_page(vmf, orig_pmd); @@ -3902,7 +3904,7 @@ static inline bool vma_is_accessible(struct vm_area_struct *vma) return vma->vm_flags & (VM_READ | VM_EXEC | VM_WRITE); } -static int create_huge_pud(struct vm_fault *vmf) +static vm_fault_t create_huge_pud(struct vm_fault *vmf) { #ifdef CONFIG_TRANSPARENT_HUGEPAGE /* No support for anonymous transparent PUD pages yet */ @@ -3914,7 +3916,7 @@ static int create_huge_pud(struct vm_fault *vmf) return VM_FAULT_FALLBACK; } -static int wp_huge_pud(struct vm_fault *vmf, pud_t orig_pud) +static vm_fault_t wp_huge_pud(struct vm_fault *vmf, pud_t orig_pud) { #ifdef CONFIG_TRANSPARENT_HUGEPAGE /* No support for anonymous transparent PUD pages yet */ @@ -3941,7 +3943,7 @@ static int wp_huge_pud(struct vm_fault *vmf, pud_t orig_pud) * The mmap_sem may have been released depending on flags and our return value. * See filemap_fault() and __lock_page_or_retry(). */ -static int handle_pte_fault(struct vm_fault *vmf) +static vm_fault_t handle_pte_fault(struct vm_fault *vmf) { pte_t entry; @@ -4029,8 +4031,8 @@ unlock: * The mmap_sem may have been released depending on flags and our * return value. See filemap_fault() and __lock_page_or_retry(). */ -static int __handle_mm_fault(struct vm_area_struct *vma, unsigned long address, - unsigned int flags) +static vm_fault_t __handle_mm_fault(struct vm_area_struct *vma, + unsigned long address, unsigned int flags) { struct vm_fault vmf = { .vma = vma, @@ -4043,7 +4045,7 @@ static int __handle_mm_fault(struct vm_area_struct *vma, unsigned long address, struct mm_struct *mm = vma->vm_mm; pgd_t *pgd; p4d_t *p4d; - int ret; + vm_fault_t ret; pgd = pgd_offset(mm, address); p4d = p4d_alloc(mm, pgd, address); @@ -4118,10 +4120,10 @@ static int __handle_mm_fault(struct vm_area_struct *vma, unsigned long address, * The mmap_sem may have been released depending on flags and our * return value. See filemap_fault() and __lock_page_or_retry(). */ -int handle_mm_fault(struct vm_area_struct *vma, unsigned long address, +vm_fault_t handle_mm_fault(struct vm_area_struct *vma, unsigned long address, unsigned int flags) { - int ret; + vm_fault_t ret; __set_current_state(TASK_RUNNING); diff --git a/mm/shmem.c b/mm/shmem.c index fb04baacc9fa..0376c124b043 100644 --- a/mm/shmem.c +++ b/mm/shmem.c @@ -124,7 +124,7 @@ static int shmem_replace_page(struct page **pagep, gfp_t gfp, static int shmem_getpage_gfp(struct inode *inode, pgoff_t index, struct page **pagep, enum sgp_type sgp, gfp_t gfp, struct vm_area_struct *vma, - struct vm_fault *vmf, int *fault_type); + struct vm_fault *vmf, vm_fault_t *fault_type); int shmem_getpage(struct inode *inode, pgoff_t index, struct page **pagep, enum sgp_type sgp) @@ -1620,7 +1620,8 @@ static int shmem_replace_page(struct page **pagep, gfp_t gfp, */ static int shmem_getpage_gfp(struct inode *inode, pgoff_t index, struct page **pagep, enum sgp_type sgp, gfp_t gfp, - struct vm_area_struct *vma, struct vm_fault *vmf, int *fault_type) + struct vm_area_struct *vma, struct vm_fault *vmf, + vm_fault_t *fault_type) { struct address_space *mapping = inode->i_mapping; struct shmem_inode_info *info = SHMEM_I(inode); -- cgit v1.2.3