diff options
author | Linus Torvalds <torvalds@linux-foundation.org> | 2009-09-24 18:53:22 +0400 |
---|---|---|
committer | Linus Torvalds <torvalds@linux-foundation.org> | 2009-09-24 18:53:22 +0400 |
commit | db16826367fefcb0ddb93d76b66adc52eb4e6339 (patch) | |
tree | 626224c1eb1eb79c522714591f208b4fdbdcd9d4 /include | |
parent | cd6045138ed1bb5d8773e940d51c34318eef3ef2 (diff) | |
parent | 465fdd97cbe16ef8727221857e96ef62dd352017 (diff) | |
download | linux-db16826367fefcb0ddb93d76b66adc52eb4e6339.tar.xz |
Merge branch 'hwpoison' of git://git.kernel.org/pub/scm/linux/kernel/git/ak/linux-mce-2.6
* 'hwpoison' of git://git.kernel.org/pub/scm/linux/kernel/git/ak/linux-mce-2.6: (21 commits)
HWPOISON: Enable error_remove_page on btrfs
HWPOISON: Add simple debugfs interface to inject hwpoison on arbitary PFNs
HWPOISON: Add madvise() based injector for hardware poisoned pages v4
HWPOISON: Enable error_remove_page for NFS
HWPOISON: Enable .remove_error_page for migration aware file systems
HWPOISON: The high level memory error handler in the VM v7
HWPOISON: Add PR_MCE_KILL prctl to control early kill behaviour per process
HWPOISON: shmem: call set_page_dirty() with locked page
HWPOISON: Define a new error_remove_page address space op for async truncation
HWPOISON: Add invalidate_inode_page
HWPOISON: Refactor truncate to allow direct truncating of page v2
HWPOISON: check and isolate corrupted free pages v2
HWPOISON: Handle hardware poisoned pages in try_to_unmap
HWPOISON: Use bitmask/action code for try_to_unmap behaviour
HWPOISON: x86: Add VM_FAULT_HWPOISON handling to x86 page fault handler v2
HWPOISON: Add poison check to page fault handling
HWPOISON: Add basic support for poisoned pages in fault handler v3
HWPOISON: Add new SIGBUS error codes for hardware poison signals
HWPOISON: Add support for poison swap entries v2
HWPOISON: Export some rmap vma locking to outside world
...
Diffstat (limited to 'include')
-rw-r--r-- | include/asm-generic/mman-common.h | 1 | ||||
-rw-r--r-- | include/asm-generic/siginfo.h | 8 | ||||
-rw-r--r-- | include/linux/fs.h | 1 | ||||
-rw-r--r-- | include/linux/mm.h | 15 | ||||
-rw-r--r-- | include/linux/page-flags.h | 17 | ||||
-rw-r--r-- | include/linux/prctl.h | 2 | ||||
-rw-r--r-- | include/linux/rmap.h | 21 | ||||
-rw-r--r-- | include/linux/sched.h | 2 | ||||
-rw-r--r-- | include/linux/swap.h | 34 | ||||
-rw-r--r-- | include/linux/swapops.h | 38 |
10 files changed, 129 insertions, 10 deletions
diff --git a/include/asm-generic/mman-common.h b/include/asm-generic/mman-common.h index dd63bd38864b..5ee13b2fd223 100644 --- a/include/asm-generic/mman-common.h +++ b/include/asm-generic/mman-common.h @@ -34,6 +34,7 @@ #define MADV_REMOVE 9 /* remove these pages & resources */ #define MADV_DONTFORK 10 /* don't inherit across fork */ #define MADV_DOFORK 11 /* do inherit across fork */ +#define MADV_HWPOISON 100 /* poison a page for testing */ #define MADV_MERGEABLE 12 /* KSM may merge identical pages */ #define MADV_UNMERGEABLE 13 /* KSM may not merge identical pages */ diff --git a/include/asm-generic/siginfo.h b/include/asm-generic/siginfo.h index c840719a8c59..942d30b5aab1 100644 --- a/include/asm-generic/siginfo.h +++ b/include/asm-generic/siginfo.h @@ -82,6 +82,7 @@ typedef struct siginfo { #ifdef __ARCH_SI_TRAPNO int _trapno; /* TRAP # which caused the signal */ #endif + short _addr_lsb; /* LSB of the reported address */ } _sigfault; /* SIGPOLL */ @@ -112,6 +113,7 @@ typedef struct siginfo { #ifdef __ARCH_SI_TRAPNO #define si_trapno _sifields._sigfault._trapno #endif +#define si_addr_lsb _sifields._sigfault._addr_lsb #define si_band _sifields._sigpoll._band #define si_fd _sifields._sigpoll._fd @@ -192,7 +194,11 @@ typedef struct siginfo { #define BUS_ADRALN (__SI_FAULT|1) /* invalid address alignment */ #define BUS_ADRERR (__SI_FAULT|2) /* non-existant physical address */ #define BUS_OBJERR (__SI_FAULT|3) /* object specific hardware error */ -#define NSIGBUS 3 +/* hardware memory error consumed on a machine check: action required */ +#define BUS_MCEERR_AR (__SI_FAULT|4) +/* hardware memory error detected in process but not consumed: action optional*/ +#define BUS_MCEERR_AO (__SI_FAULT|5) +#define NSIGBUS 5 /* * SIGTRAP si_codes diff --git a/include/linux/fs.h b/include/linux/fs.h index 33ed6644abd0..78e95b8b66d4 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -595,6 +595,7 @@ struct address_space_operations { int (*launder_page) (struct page *); int (*is_partially_uptodate) (struct page *, read_descriptor_t *, unsigned long); + int (*error_remove_page)(struct address_space *, struct page *); }; /* diff --git a/include/linux/mm.h b/include/linux/mm.h index 87218ae84e36..6953a5a53e44 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -695,11 +695,12 @@ static inline int page_mapped(struct page *page) #define VM_FAULT_SIGBUS 0x0002 #define VM_FAULT_MAJOR 0x0004 #define VM_FAULT_WRITE 0x0008 /* Special case for get_user_pages */ +#define VM_FAULT_HWPOISON 0x0010 /* Hit poisoned page */ #define VM_FAULT_NOPAGE 0x0100 /* ->fault installed the pte, not return page */ #define VM_FAULT_LOCKED 0x0200 /* ->fault locked the returned page */ -#define VM_FAULT_ERROR (VM_FAULT_OOM | VM_FAULT_SIGBUS) +#define VM_FAULT_ERROR (VM_FAULT_OOM | VM_FAULT_SIGBUS | VM_FAULT_HWPOISON) /* * Can be called by the pagefault handler when it gets a VM_FAULT_OOM. @@ -794,6 +795,11 @@ static inline void unmap_shared_mapping_range(struct address_space *mapping, extern int vmtruncate(struct inode * inode, loff_t offset); extern int vmtruncate_range(struct inode * inode, loff_t offset, loff_t end); +int truncate_inode_page(struct address_space *mapping, struct page *page); +int generic_error_remove_page(struct address_space *mapping, struct page *page); + +int invalidate_inode_page(struct page *page); + #ifdef CONFIG_MMU extern int handle_mm_fault(struct mm_struct *mm, struct vm_area_struct *vma, unsigned long address, unsigned int flags); @@ -1308,5 +1314,12 @@ void vmemmap_populate_print_last(void); extern int account_locked_memory(struct mm_struct *mm, struct rlimit *rlim, size_t size); extern void refund_locked_memory(struct mm_struct *mm, size_t size); + +extern void memory_failure(unsigned long pfn, int trapno); +extern int __memory_failure(unsigned long pfn, int trapno, int ref); +extern int sysctl_memory_failure_early_kill; +extern int sysctl_memory_failure_recovery; +extern atomic_long_t mce_bad_pages; + #endif /* __KERNEL__ */ #endif /* _LINUX_MM_H */ diff --git a/include/linux/page-flags.h b/include/linux/page-flags.h index 13de789f0a5c..6b202b173955 100644 --- a/include/linux/page-flags.h +++ b/include/linux/page-flags.h @@ -51,6 +51,9 @@ * PG_buddy is set to indicate that the page is free and in the buddy system * (see mm/page_alloc.c). * + * PG_hwpoison indicates that a page got corrupted in hardware and contains + * data with incorrect ECC bits that triggered a machine check. Accessing is + * not safe since it may cause another machine check. Don't touch! */ /* @@ -102,6 +105,9 @@ enum pageflags { #ifdef CONFIG_ARCH_USES_PG_UNCACHED PG_uncached, /* Page has been mapped as uncached */ #endif +#ifdef CONFIG_MEMORY_FAILURE + PG_hwpoison, /* hardware poisoned page. Don't touch */ +#endif __NR_PAGEFLAGS, /* Filesystems */ @@ -269,6 +275,15 @@ PAGEFLAG(Uncached, uncached) PAGEFLAG_FALSE(Uncached) #endif +#ifdef CONFIG_MEMORY_FAILURE +PAGEFLAG(HWPoison, hwpoison) +TESTSETFLAG(HWPoison, hwpoison) +#define __PG_HWPOISON (1UL << PG_hwpoison) +#else +PAGEFLAG_FALSE(HWPoison) +#define __PG_HWPOISON 0 +#endif + static inline int PageUptodate(struct page *page) { int ret = test_bit(PG_uptodate, &(page)->flags); @@ -393,7 +408,7 @@ static inline void __ClearPageTail(struct page *page) 1 << PG_private | 1 << PG_private_2 | \ 1 << PG_buddy | 1 << PG_writeback | 1 << PG_reserved | \ 1 << PG_slab | 1 << PG_swapcache | 1 << PG_active | \ - 1 << PG_unevictable | __PG_MLOCKED) + 1 << PG_unevictable | __PG_MLOCKED | __PG_HWPOISON) /* * Flags checked when a page is prepped for return by the page allocator. diff --git a/include/linux/prctl.h b/include/linux/prctl.h index 07bff666e65b..931150566ade 100644 --- a/include/linux/prctl.h +++ b/include/linux/prctl.h @@ -88,4 +88,6 @@ #define PR_TASK_PERF_EVENTS_DISABLE 31 #define PR_TASK_PERF_EVENTS_ENABLE 32 +#define PR_MCE_KILL 33 + #endif /* _LINUX_PRCTL_H */ diff --git a/include/linux/rmap.h b/include/linux/rmap.h index 477841d29fce..cb0ba7032609 100644 --- a/include/linux/rmap.h +++ b/include/linux/rmap.h @@ -81,7 +81,19 @@ static inline void page_dup_rmap(struct page *page) */ int page_referenced(struct page *, int is_locked, struct mem_cgroup *cnt, unsigned long *vm_flags); -int try_to_unmap(struct page *, int ignore_refs); +enum ttu_flags { + TTU_UNMAP = 0, /* unmap mode */ + TTU_MIGRATION = 1, /* migration mode */ + TTU_MUNLOCK = 2, /* munlock mode */ + TTU_ACTION_MASK = 0xff, + + TTU_IGNORE_MLOCK = (1 << 8), /* ignore mlock */ + TTU_IGNORE_ACCESS = (1 << 9), /* don't age */ + TTU_IGNORE_HWPOISON = (1 << 10),/* corrupted page is recoverable */ +}; +#define TTU_ACTION(x) ((x) & TTU_ACTION_MASK) + +int try_to_unmap(struct page *, enum ttu_flags flags); /* * Called from mm/filemap_xip.c to unmap empty zero page @@ -108,6 +120,13 @@ int page_mkclean(struct page *); */ int try_to_munlock(struct page *); +/* + * Called by memory-failure.c to kill processes. + */ +struct anon_vma *page_lock_anon_vma(struct page *page); +void page_unlock_anon_vma(struct anon_vma *anon_vma); +int page_mapped_in_vma(struct page *page, struct vm_area_struct *vma); + #else /* !CONFIG_MMU */ #define anon_vma_init() do {} while (0) diff --git a/include/linux/sched.h b/include/linux/sched.h index 8a16f6d11dcd..75e6e60bf583 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -1734,6 +1734,7 @@ extern cputime_t task_gtime(struct task_struct *p); #define PF_EXITPIDONE 0x00000008 /* pi exit done on shut down */ #define PF_VCPU 0x00000010 /* I'm a virtual CPU */ #define PF_FORKNOEXEC 0x00000040 /* forked but didn't exec */ +#define PF_MCE_PROCESS 0x00000080 /* process policy on mce errors */ #define PF_SUPERPRIV 0x00000100 /* used super-user privileges */ #define PF_DUMPCORE 0x00000200 /* dumped core */ #define PF_SIGNALED 0x00000400 /* killed by a signal */ @@ -1753,6 +1754,7 @@ extern cputime_t task_gtime(struct task_struct *p); #define PF_SPREAD_PAGE 0x01000000 /* Spread page cache over cpuset */ #define PF_SPREAD_SLAB 0x02000000 /* Spread some slab caches over cpuset */ #define PF_THREAD_BOUND 0x04000000 /* Thread bound to specific cpu */ +#define PF_MCE_EARLY 0x08000000 /* Early kill for mce process policy */ #define PF_MEMPOLICY 0x10000000 /* Non-default NUMA mempolicy */ #define PF_MUTEX_TESTER 0x20000000 /* Thread belongs to the rt mutex tester */ #define PF_FREEZER_SKIP 0x40000000 /* Freezer should not count it as freezeable */ diff --git a/include/linux/swap.h b/include/linux/swap.h index 82232dbea3f7..4ec90019c1a4 100644 --- a/include/linux/swap.h +++ b/include/linux/swap.h @@ -34,16 +34,38 @@ static inline int current_is_kswapd(void) * the type/offset into the pte as 5/27 as well. */ #define MAX_SWAPFILES_SHIFT 5 -#ifndef CONFIG_MIGRATION -#define MAX_SWAPFILES (1 << MAX_SWAPFILES_SHIFT) + +/* + * Use some of the swap files numbers for other purposes. This + * is a convenient way to hook into the VM to trigger special + * actions on faults. + */ + +/* + * NUMA node memory migration support + */ +#ifdef CONFIG_MIGRATION +#define SWP_MIGRATION_NUM 2 +#define SWP_MIGRATION_READ (MAX_SWAPFILES + SWP_HWPOISON_NUM) +#define SWP_MIGRATION_WRITE (MAX_SWAPFILES + SWP_HWPOISON_NUM + 1) #else -/* Use last two entries for page migration swap entries */ -#define MAX_SWAPFILES ((1 << MAX_SWAPFILES_SHIFT)-2) -#define SWP_MIGRATION_READ MAX_SWAPFILES -#define SWP_MIGRATION_WRITE (MAX_SWAPFILES + 1) +#define SWP_MIGRATION_NUM 0 #endif /* + * Handling of hardware poisoned pages with memory corruption. + */ +#ifdef CONFIG_MEMORY_FAILURE +#define SWP_HWPOISON_NUM 1 +#define SWP_HWPOISON MAX_SWAPFILES +#else +#define SWP_HWPOISON_NUM 0 +#endif + +#define MAX_SWAPFILES \ + ((1 << MAX_SWAPFILES_SHIFT) - SWP_MIGRATION_NUM - SWP_HWPOISON_NUM) + +/* * Magic header for a swap area. The first part of the union is * what the swap magic looks like for the old (limited to 128MB) * swap area format, the second part of the union adds - in the diff --git a/include/linux/swapops.h b/include/linux/swapops.h index 6ec39ab27b4b..cd42e30b7c6e 100644 --- a/include/linux/swapops.h +++ b/include/linux/swapops.h @@ -131,3 +131,41 @@ static inline int is_write_migration_entry(swp_entry_t entry) #endif +#ifdef CONFIG_MEMORY_FAILURE +/* + * Support for hardware poisoned pages + */ +static inline swp_entry_t make_hwpoison_entry(struct page *page) +{ + BUG_ON(!PageLocked(page)); + return swp_entry(SWP_HWPOISON, page_to_pfn(page)); +} + +static inline int is_hwpoison_entry(swp_entry_t entry) +{ + return swp_type(entry) == SWP_HWPOISON; +} +#else + +static inline swp_entry_t make_hwpoison_entry(struct page *page) +{ + return swp_entry(0, 0); +} + +static inline int is_hwpoison_entry(swp_entry_t swp) +{ + return 0; +} +#endif + +#if defined(CONFIG_MEMORY_FAILURE) || defined(CONFIG_MIGRATION) +static inline int non_swap_entry(swp_entry_t entry) +{ + return swp_type(entry) >= MAX_SWAPFILES; +} +#else +static inline int non_swap_entry(swp_entry_t entry) +{ + return 0; +} +#endif |