diff options
author | Linus Torvalds <torvalds@linux-foundation.org> | 2017-09-09 20:30:07 +0300 |
---|---|---|
committer | Linus Torvalds <torvalds@linux-foundation.org> | 2017-09-09 20:30:07 +0300 |
commit | fbf4432ff71b7a25bef993a5312906946d27f446 (patch) | |
tree | cf3e0024af4b8f9376eff75743f1fa1526e40900 /include | |
parent | c054be10ffdbd5507a1fd738067d76acfb4808fd (diff) | |
parent | 0cfb6aee70bddbef6ec796b255f588ce0e126766 (diff) | |
download | linux-fbf4432ff71b7a25bef993a5312906946d27f446.tar.xz |
Merge branch 'akpm' (patches from Andrew)
Merge more updates from Andrew Morton:
- most of the rest of MM
- a small number of misc things
- lib/ updates
- checkpatch
- autofs updates
- ipc/ updates
* emailed patches from Andrew Morton <akpm@linux-foundation.org>: (126 commits)
ipc: optimize semget/shmget/msgget for lots of keys
ipc/sem: play nicer with large nsops allocations
ipc/sem: drop sem_checkid helper
ipc: convert kern_ipc_perm.refcount from atomic_t to refcount_t
ipc: convert sem_undo_list.refcnt from atomic_t to refcount_t
ipc: convert ipc_namespace.count from atomic_t to refcount_t
kcov: support compat processes
sh: defconfig: cleanup from old Kconfig options
mn10300: defconfig: cleanup from old Kconfig options
m32r: defconfig: cleanup from old Kconfig options
drivers/pps: use surrounding "if PPS" to remove numerous dependency checks
drivers/pps: aesthetic tweaks to PPS-related content
cpumask: make cpumask_next() out-of-line
kmod: move #ifdef CONFIG_MODULES wrapper to Makefile
kmod: split off umh headers into its own file
MAINTAINERS: clarify kmod is just a kernel module loader
kmod: split out umh code into its own file
test_kmod: flip INT checks to be consistent
test_kmod: remove paranoid UINT_MAX check on uint range processing
vfat: deduplicate hex2bin()
...
Diffstat (limited to 'include')
45 files changed, 1430 insertions, 176 deletions
diff --git a/include/asm-generic/pgtable.h b/include/asm-generic/pgtable.h index 4d7bb98f4134..8e0243036564 100644 --- a/include/asm-generic/pgtable.h +++ b/include/asm-generic/pgtable.h @@ -630,7 +630,24 @@ static inline void ptep_modify_prot_commit(struct mm_struct *mm, #define arch_start_context_switch(prev) do {} while (0) #endif -#ifndef CONFIG_HAVE_ARCH_SOFT_DIRTY +#ifdef CONFIG_HAVE_ARCH_SOFT_DIRTY +#ifndef CONFIG_ARCH_ENABLE_THP_MIGRATION +static inline pmd_t pmd_swp_mksoft_dirty(pmd_t pmd) +{ + return pmd; +} + +static inline int pmd_swp_soft_dirty(pmd_t pmd) +{ + return 0; +} + +static inline pmd_t pmd_swp_clear_soft_dirty(pmd_t pmd) +{ + return pmd; +} +#endif +#else /* !CONFIG_HAVE_ARCH_SOFT_DIRTY */ static inline int pte_soft_dirty(pte_t pte) { return 0; @@ -675,6 +692,21 @@ static inline pte_t pte_swp_clear_soft_dirty(pte_t pte) { return pte; } + +static inline pmd_t pmd_swp_mksoft_dirty(pmd_t pmd) +{ + return pmd; +} + +static inline int pmd_swp_soft_dirty(pmd_t pmd) +{ + return 0; +} + +static inline pmd_t pmd_swp_clear_soft_dirty(pmd_t pmd) +{ + return pmd; +} #endif #ifndef __HAVE_PFNMAP_TRACKING @@ -846,7 +878,23 @@ static inline int pmd_none_or_trans_huge_or_clear_bad(pmd_t *pmd) #ifdef CONFIG_TRANSPARENT_HUGEPAGE barrier(); #endif - if (pmd_none(pmdval) || pmd_trans_huge(pmdval)) + /* + * !pmd_present() checks for pmd migration entries + * + * The complete check uses is_pmd_migration_entry() in linux/swapops.h + * But using that requires moving current function and pmd_trans_unstable() + * to linux/swapops.h to resovle dependency, which is too much code move. + * + * !pmd_present() is equivalent to is_pmd_migration_entry() currently, + * because !pmd_present() pages can only be under migration not swapped + * out. + * + * pmd_none() is preseved for future condition checks on pmd migration + * entries and not confusing with this function name, although it is + * redundant with !pmd_present(). + */ + if (pmd_none(pmdval) || pmd_trans_huge(pmdval) || + (IS_ENABLED(CONFIG_ARCH_ENABLE_THP_MIGRATION) && !pmd_present(pmdval))) return 1; if (unlikely(pmd_bad(pmdval))) { pmd_clear_bad(pmd); diff --git a/include/drm/drm_mm.h b/include/drm/drm_mm.h index 49b292e98fec..8d10fc97801c 100644 --- a/include/drm/drm_mm.h +++ b/include/drm/drm_mm.h @@ -172,7 +172,7 @@ struct drm_mm { * according to the (increasing) start address of the memory node. */ struct drm_mm_node head_node; /* Keep an interval_tree for fast lookup of drm_mm_nodes by address. */ - struct rb_root interval_tree; + struct rb_root_cached interval_tree; struct rb_root holes_size; struct rb_root holes_addr; diff --git a/include/linux/bitmap.h b/include/linux/bitmap.h index 5797ca6fdfe2..700cf5f67118 100644 --- a/include/linux/bitmap.h +++ b/include/linux/bitmap.h @@ -361,6 +361,38 @@ static inline int bitmap_parse(const char *buf, unsigned int buflen, } /* + * BITMAP_FROM_U64() - Represent u64 value in the format suitable for bitmap. + * + * Linux bitmaps are internally arrays of unsigned longs, i.e. 32-bit + * integers in 32-bit environment, and 64-bit integers in 64-bit one. + * + * There are four combinations of endianness and length of the word in linux + * ABIs: LE64, BE64, LE32 and BE32. + * + * On 64-bit kernels 64-bit LE and BE numbers are naturally ordered in + * bitmaps and therefore don't require any special handling. + * + * On 32-bit kernels 32-bit LE ABI orders lo word of 64-bit number in memory + * prior to hi, and 32-bit BE orders hi word prior to lo. The bitmap on the + * other hand is represented as an array of 32-bit words and the position of + * bit N may therefore be calculated as: word #(N/32) and bit #(N%32) in that + * word. For example, bit #42 is located at 10th position of 2nd word. + * It matches 32-bit LE ABI, and we can simply let the compiler store 64-bit + * values in memory as it usually does. But for BE we need to swap hi and lo + * words manually. + * + * With all that, the macro BITMAP_FROM_U64() does explicit reordering of hi and + * lo parts of u64. For LE32 it does nothing, and for BE environment it swaps + * hi and lo words, as is expected by bitmap. + */ +#if __BITS_PER_LONG == 64 +#define BITMAP_FROM_U64(n) (n) +#else +#define BITMAP_FROM_U64(n) ((unsigned long) ((u64)(n) & ULONG_MAX)), \ + ((unsigned long) ((u64)(n) >> 32)) +#endif + +/* * bitmap_from_u64 - Check and swap words within u64. * @mask: source bitmap * @dst: destination bitmap diff --git a/include/linux/bitops.h b/include/linux/bitops.h index a83c822c35c2..8fbe259b197c 100644 --- a/include/linux/bitops.h +++ b/include/linux/bitops.h @@ -19,10 +19,11 @@ * GENMASK_ULL(39, 21) gives us the 64bit vector 0x000000ffffe00000. */ #define GENMASK(h, l) \ - (((~0UL) << (l)) & (~0UL >> (BITS_PER_LONG - 1 - (h)))) + (((~0UL) - (1UL << (l)) + 1) & (~0UL >> (BITS_PER_LONG - 1 - (h)))) #define GENMASK_ULL(h, l) \ - (((~0ULL) << (l)) & (~0ULL >> (BITS_PER_LONG_LONG - 1 - (h)))) + (((~0ULL) - (1ULL << (l)) + 1) & \ + (~0ULL >> (BITS_PER_LONG_LONG - 1 - (h)))) extern unsigned int __sw_hweight8(unsigned int w); extern unsigned int __sw_hweight16(unsigned int w); diff --git a/include/linux/byteorder/big_endian.h b/include/linux/byteorder/big_endian.h index 392041475c72..ffd215988392 100644 --- a/include/linux/byteorder/big_endian.h +++ b/include/linux/byteorder/big_endian.h @@ -3,5 +3,9 @@ #include <uapi/linux/byteorder/big_endian.h> +#ifndef CONFIG_CPU_BIG_ENDIAN +#warning inconsistent configuration, needs CONFIG_CPU_BIG_ENDIAN +#endif + #include <linux/byteorder/generic.h> #endif /* _LINUX_BYTEORDER_BIG_ENDIAN_H */ diff --git a/include/linux/byteorder/little_endian.h b/include/linux/byteorder/little_endian.h index 08057377aa23..ba910bb9aad0 100644 --- a/include/linux/byteorder/little_endian.h +++ b/include/linux/byteorder/little_endian.h @@ -3,5 +3,9 @@ #include <uapi/linux/byteorder/little_endian.h> +#ifdef CONFIG_CPU_BIG_ENDIAN +#warning inconsistent configuration, CONFIG_CPU_BIG_ENDIAN is set +#endif + #include <linux/byteorder/generic.h> #endif /* _LINUX_BYTEORDER_LITTLE_ENDIAN_H */ diff --git a/include/linux/cpumask.h b/include/linux/cpumask.h index 4bf4479a3a80..cd415b733c2a 100644 --- a/include/linux/cpumask.h +++ b/include/linux/cpumask.h @@ -32,15 +32,15 @@ typedef struct cpumask { DECLARE_BITMAP(bits, NR_CPUS); } cpumask_t; #define cpumask_pr_args(maskp) nr_cpu_ids, cpumask_bits(maskp) #if NR_CPUS == 1 -#define nr_cpu_ids 1 +#define nr_cpu_ids 1U #else -extern int nr_cpu_ids; +extern unsigned int nr_cpu_ids; #endif #ifdef CONFIG_CPUMASK_OFFSTACK /* Assuming NR_CPUS is huge, a runtime limit is more efficient. Also, * not all bits may be allocated. */ -#define nr_cpumask_bits ((unsigned int)nr_cpu_ids) +#define nr_cpumask_bits nr_cpu_ids #else #define nr_cpumask_bits ((unsigned int)NR_CPUS) #endif @@ -178,20 +178,7 @@ static inline unsigned int cpumask_first(const struct cpumask *srcp) return find_first_bit(cpumask_bits(srcp), nr_cpumask_bits); } -/** - * cpumask_next - get the next cpu in a cpumask - * @n: the cpu prior to the place to search (ie. return will be > @n) - * @srcp: the cpumask pointer - * - * Returns >= nr_cpu_ids if no further cpus set. - */ -static inline unsigned int cpumask_next(int n, const struct cpumask *srcp) -{ - /* -1 is a legal arg here. */ - if (n != -1) - cpumask_check(n); - return find_next_bit(cpumask_bits(srcp), nr_cpumask_bits, n+1); -} +unsigned int cpumask_next(int n, const struct cpumask *srcp); /** * cpumask_next_zero - get the next unset cpu in a cpumask diff --git a/include/linux/fs.h b/include/linux/fs.h index 509434aaf5a4..2d0e6748e46e 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -392,7 +392,7 @@ struct address_space { struct radix_tree_root page_tree; /* radix tree of all pages */ spinlock_t tree_lock; /* and lock protecting it */ atomic_t i_mmap_writable;/* count VM_SHARED mappings */ - struct rb_root i_mmap; /* tree of private and shared mappings */ + struct rb_root_cached i_mmap; /* tree of private and shared mappings */ struct rw_semaphore i_mmap_rwsem; /* protect tree, count, list */ /* Protected by tree_lock together with the radix tree */ unsigned long nrpages; /* number of total pages */ @@ -487,7 +487,7 @@ static inline void i_mmap_unlock_read(struct address_space *mapping) */ static inline int mapping_mapped(struct address_space *mapping) { - return !RB_EMPTY_ROOT(&mapping->i_mmap); + return !RB_EMPTY_ROOT(&mapping->i_mmap.rb_root); } /* @@ -3043,8 +3043,7 @@ static inline int vfs_lstat(const char __user *name, struct kstat *stat) static inline int vfs_fstatat(int dfd, const char __user *filename, struct kstat *stat, int flags) { - return vfs_statx(dfd, filename, flags | AT_NO_AUTOMOUNT, - stat, STATX_BASIC_STATS); + return vfs_statx(dfd, filename, flags, stat, STATX_BASIC_STATS); } static inline int vfs_fstat(int fd, struct kstat *stat) { diff --git a/include/linux/hmm.h b/include/linux/hmm.h new file mode 100644 index 000000000000..96e69979f84d --- /dev/null +++ b/include/linux/hmm.h @@ -0,0 +1,520 @@ +/* + * Copyright 2013 Red Hat Inc. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * Authors: Jérôme Glisse <jglisse@redhat.com> + */ +/* + * Heterogeneous Memory Management (HMM) + * + * See Documentation/vm/hmm.txt for reasons and overview of what HMM is and it + * is for. Here we focus on the HMM API description, with some explanation of + * the underlying implementation. + * + * Short description: HMM provides a set of helpers to share a virtual address + * space between CPU and a device, so that the device can access any valid + * address of the process (while still obeying memory protection). HMM also + * provides helpers to migrate process memory to device memory, and back. Each + * set of functionality (address space mirroring, and migration to and from + * device memory) can be used independently of the other. + * + * + * HMM address space mirroring API: + * + * Use HMM address space mirroring if you want to mirror range of the CPU page + * table of a process into a device page table. Here, "mirror" means "keep + * synchronized". Prerequisites: the device must provide the ability to write- + * protect its page tables (at PAGE_SIZE granularity), and must be able to + * recover from the resulting potential page faults. + * + * HMM guarantees that at any point in time, a given virtual address points to + * either the same memory in both CPU and device page tables (that is: CPU and + * device page tables each point to the same pages), or that one page table (CPU + * or device) points to no entry, while the other still points to the old page + * for the address. The latter case happens when the CPU page table update + * happens first, and then the update is mirrored over to the device page table. + * This does not cause any issue, because the CPU page table cannot start + * pointing to a new page until the device page table is invalidated. + * + * HMM uses mmu_notifiers to monitor the CPU page tables, and forwards any + * updates to each device driver that has registered a mirror. It also provides + * some API calls to help with taking a snapshot of the CPU page table, and to + * synchronize with any updates that might happen concurrently. + * + * + * HMM migration to and from device memory: + * + * HMM provides a set of helpers to hotplug device memory as ZONE_DEVICE, with + * a new MEMORY_DEVICE_PRIVATE type. This provides a struct page for each page + * of the device memory, and allows the device driver to manage its memory + * using those struct pages. Having struct pages for device memory makes + * migration easier. Because that memory is not addressable by the CPU it must + * never be pinned to the device; in other words, any CPU page fault can always + * cause the device memory to be migrated (copied/moved) back to regular memory. + * + * A new migrate helper (migrate_vma()) has been added (see mm/migrate.c) that + * allows use of a device DMA engine to perform the copy operation between + * regular system memory and device memory. + */ +#ifndef LINUX_HMM_H +#define LINUX_HMM_H + +#include <linux/kconfig.h> + +#if IS_ENABLED(CONFIG_HMM) + +#include <linux/device.h> +#include <linux/migrate.h> +#include <linux/memremap.h> +#include <linux/completion.h> + +struct hmm; + +/* + * hmm_pfn_t - HMM uses its own pfn type to keep several flags per page + * + * Flags: + * HMM_PFN_VALID: pfn is valid + * HMM_PFN_READ: CPU page table has read permission set + * HMM_PFN_WRITE: CPU page table has write permission set + * HMM_PFN_ERROR: corresponding CPU page table entry points to poisoned memory + * HMM_PFN_EMPTY: corresponding CPU page table entry is pte_none() + * HMM_PFN_SPECIAL: corresponding CPU page table entry is special; i.e., the + * result of vm_insert_pfn() or vm_insert_page(). Therefore, it should not + * be mirrored by a device, because the entry will never have HMM_PFN_VALID + * set and the pfn value is undefined. + * HMM_PFN_DEVICE_UNADDRESSABLE: unaddressable device memory (ZONE_DEVICE) + */ +typedef unsigned long hmm_pfn_t; + +#define HMM_PFN_VALID (1 << 0) +#define HMM_PFN_READ (1 << 1) +#define HMM_PFN_WRITE (1 << 2) +#define HMM_PFN_ERROR (1 << 3) +#define HMM_PFN_EMPTY (1 << 4) +#define HMM_PFN_SPECIAL (1 << 5) +#define HMM_PFN_DEVICE_UNADDRESSABLE (1 << 6) +#define HMM_PFN_SHIFT 7 + +/* + * hmm_pfn_t_to_page() - return struct page pointed to by a valid hmm_pfn_t + * @pfn: hmm_pfn_t to convert to struct page + * Returns: struct page pointer if pfn is a valid hmm_pfn_t, NULL otherwise + * + * If the hmm_pfn_t is valid (ie valid flag set) then return the struct page + * matching the pfn value stored in the hmm_pfn_t. Otherwise return NULL. + */ +static inline struct page *hmm_pfn_t_to_page(hmm_pfn_t pfn) +{ + if (!(pfn & HMM_PFN_VALID)) + return NULL; + return pfn_to_page(pfn >> HMM_PFN_SHIFT); +} + +/* + * hmm_pfn_t_to_pfn() - return pfn value store in a hmm_pfn_t + * @pfn: hmm_pfn_t to extract pfn from + * Returns: pfn value if hmm_pfn_t is valid, -1UL otherwise + */ +static inline unsigned long hmm_pfn_t_to_pfn(hmm_pfn_t pfn) +{ + if (!(pfn & HMM_PFN_VALID)) + return -1UL; + return (pfn >> HMM_PFN_SHIFT); +} + +/* + * hmm_pfn_t_from_page() - create a valid hmm_pfn_t value from struct page + * @page: struct page pointer for which to create the hmm_pfn_t + * Returns: valid hmm_pfn_t for the page + */ +static inline hmm_pfn_t hmm_pfn_t_from_page(struct page *page) +{ + return (page_to_pfn(page) << HMM_PFN_SHIFT) | HMM_PFN_VALID; +} + +/* + * hmm_pfn_t_from_pfn() - create a valid hmm_pfn_t value from pfn + * @pfn: pfn value for which to create the hmm_pfn_t + * Returns: valid hmm_pfn_t for the pfn + */ +static inline hmm_pfn_t hmm_pfn_t_from_pfn(unsigned long pfn) +{ + return (pfn << HMM_PFN_SHIFT) | HMM_PFN_VALID; +} + + +#if IS_ENABLED(CONFIG_HMM_MIRROR) +/* + * Mirroring: how to synchronize device page table with CPU page table. + * + * A device driver that is participating in HMM mirroring must always + * synchronize with CPU page table updates. For this, device drivers can either + * directly use mmu_notifier APIs or they can use the hmm_mirror API. Device + * drivers can decide to register one mirror per device per process, or just + * one mirror per process for a group of devices. The pattern is: + * + * int device_bind_address_space(..., struct mm_struct *mm, ...) + * { + * struct device_address_space *das; + * + * // Device driver specific initialization, and allocation of das + * // which contains an hmm_mirror struct as one of its fields. + * ... + * + * ret = hmm_mirror_register(&das->mirror, mm, &device_mirror_ops); + * if (ret) { + * // Cleanup on error + * return ret; + * } + * + * // Other device driver specific initialization + * ... + * } + * + * Once an hmm_mirror is registered for an address space, the device driver + * will get callbacks through sync_cpu_device_pagetables() operation (see + * hmm_mirror_ops struct). + * + * Device driver must not free the struct containing the hmm_mirror struct + * before calling hmm_mirror_unregister(). The expected usage is to do that when + * the device driver is unbinding from an address space. + * + * + * void device_unbind_address_space(struct device_address_space *das) + * { + * // Device driver specific cleanup + * ... + * + * hmm_mirror_unregister(&das->mirror); + * + * // Other device driver specific cleanup, and now das can be freed + * ... + * } + */ + +struct hmm_mirror; + +/* + * enum hmm_update_type - type of update + * @HMM_UPDATE_INVALIDATE: invalidate range (no indication as to why) + */ +enum hmm_update_type { + HMM_UPDATE_INVALIDATE, +}; + +/* + * struct hmm_mirror_ops - HMM mirror device operations callback + * + * @update: callback to update range on a device + */ +struct hmm_mirror_ops { + /* sync_cpu_device_pagetables() - synchronize page tables + * + * @mirror: pointer to struct hmm_mirror + * @update_type: type of update that occurred to the CPU page table + * @start: virtual start address of the range to update + * @end: virtual end address of the range to update + * + * This callback ultimately originates from mmu_notifiers when the CPU + * page table is updated. The device driver must update its page table + * in response to this callback. The update argument tells what action + * to perform. + * + * The device driver must not return from this callback until the device + * page tables are completely updated (TLBs flushed, etc); this is a + * synchronous call. + */ + void (*sync_cpu_device_pagetables)(struct hmm_mirror *mirror, + enum hmm_update_type update_type, + unsigned long start, + unsigned long end); +}; + +/* + * struct hmm_mirror - mirror struct for a device driver + * + * @hmm: pointer to struct hmm (which is unique per mm_struct) + * @ops: device driver callback for HMM mirror operations + * @list: for list of mirrors of a given mm + * + * Each address space (mm_struct) being mirrored by a device must register one + * instance of an hmm_mirror struct with HMM. HMM will track the list of all + * mirrors for each mm_struct. + */ +struct hmm_mirror { + struct hmm *hmm; + const struct hmm_mirror_ops *ops; + struct list_head list; +}; + +int hmm_mirror_register(struct hmm_mirror *mirror, struct mm_struct *mm); +void hmm_mirror_unregister(struct hmm_mirror *mirror); + + +/* + * struct hmm_range - track invalidation lock on virtual address range + * + * @list: all range lock are on a list + * @start: range virtual start address (inclusive) + * @end: range virtual end address (exclusive) + * @pfns: array of pfns (big enough for the range) + * @valid: pfns array did not change since it has been fill by an HMM function + */ +struct hmm_range { + struct list_head list; + unsigned long start; + unsigned long end; + hmm_pfn_t *pfns; + bool valid; +}; + +/* + * To snapshot the CPU page table, call hmm_vma_get_pfns(), then take a device + * driver lock that serializes device page table updates, then call + * hmm_vma_range_done(), to check if the snapshot is still valid. The same + * device driver page table update lock must also be used in the + * hmm_mirror_ops.sync_cpu_device_pagetables() callback, so that CPU page + * table invalidation serializes on it. + * + * YOU MUST CALL hmm_vma_range_done() ONCE AND ONLY ONCE EACH TIME YOU CALL + * hmm_vma_get_pfns() WITHOUT ERROR ! + * + * IF YOU DO NOT FOLLOW THE ABOVE RULE THE SNAPSHOT CONTENT MIGHT BE INVALID ! + */ +int hmm_vma_get_pfns(struct vm_area_struct *vma, + struct hmm_range *range, + unsigned long start, + unsigned long end, + hmm_pfn_t *pfns); +bool hmm_vma_range_done(struct vm_area_struct *vma, struct hmm_range *range); + + +/* + * Fault memory on behalf of device driver. Unlike handle_mm_fault(), this will + * not migrate any device memory back to system memory. The hmm_pfn_t array will + * be updated with the fault result and current snapshot of the CPU page table + * for the range. + * + * The mmap_sem must be taken in read mode before entering and it might be + * dropped by the function if the block argument is false. In that case, the + * function returns -EAGAIN. + * + * Return value does not reflect if the fault was successful for every single + * address or not. Therefore, the caller must to inspect the hmm_pfn_t array to + * determine fault status for each address. + * + * Trying to fault inside an invalid vma will result in -EINVAL. + * + * See the function description in mm/hmm.c for further documentation. + */ +int hmm_vma_fault(struct vm_area_struct *vma, + struct hmm_range *range, + unsigned long start, + unsigned long end, + hmm_pfn_t *pfns, + bool write, + bool block); +#endif /* IS_ENABLED(CONFIG_HMM_MIRROR) */ + + +#if IS_ENABLED(CONFIG_DEVICE_PRIVATE) || IS_ENABLED(CONFIG_DEVICE_PUBLIC) +struct hmm_devmem; + +struct page *hmm_vma_alloc_locked_page(struct vm_area_struct *vma, + unsigned long addr); + +/* + * struct hmm_devmem_ops - callback for ZONE_DEVICE memory events + * + * @free: call when refcount on page reach 1 and thus is no longer use + * @fault: call when there is a page fault to unaddressable memory + * + * Both callback happens from page_free() and page_fault() callback of struct + * dev_pagemap respectively. See include/linux/memremap.h for more details on + * those. + * + * The hmm_devmem_ops callback are just here to provide a coherent and + * uniq API to device driver and device driver should not register their + * own page_free() or page_fault() but rely on the hmm_devmem_ops call- + * back. + */ +struct hmm_devmem_ops { + /* + * free() - free a device page + * @devmem: device memory structure (see struct hmm_devmem) + * @page: pointer to struct page being freed + * + * Call back occurs whenever a device page refcount reach 1 which + * means that no one is holding any reference on the page anymore + * (ZONE_DEVICE page have an elevated refcount of 1 as default so + * that they are not release to the general page allocator). + * + * Note that callback has exclusive ownership of the page (as no + * one is holding any reference). + */ + void (*free)(struct hmm_devmem *devmem, struct page *page); + /* + * fault() - CPU page fault or get user page (GUP) + * @devmem: device memory structure (see struct hmm_devmem) + * @vma: virtual memory area containing the virtual address + * @addr: virtual address that faulted or for which there is a GUP + * @page: pointer to struct page backing virtual address (unreliable) + * @flags: FAULT_FLAG_* (see include/linux/mm.h) + * @pmdp: page middle directory + * Returns: VM_FAULT_MINOR/MAJOR on success or one of VM_FAULT_ERROR + * on error + * + * The callback occurs whenever there is a CPU page fault or GUP on a + * virtual address. This means that the device driver must migrate the + * page back to regular memory (CPU accessible). + * + * The device driver is free to migrate more than one page from the + * fault() callback as an optimization. However if device decide to + * migrate more than one page it must always priotirize the faulting + * address over the others. + * + * The struct page pointer is only given as an hint to allow quick + * lookup of internal device driver data. A concurrent migration + * might have already free that page and the virtual address might + * not longer be back by it. So it should not be modified by the + * callback. + * + * Note that mmap semaphore is held in read mode at least when this + * callback occurs, hence the vma is valid upon callback entry. + */ + int (*fault)(struct hmm_devmem *devmem, + struct vm_area_struct *vma, + unsigned long addr, + const struct page *page, + unsigned int flags, + pmd_t *pmdp); +}; + +/* + * struct hmm_devmem - track device memory + * + * @completion: completion object for device memory + * @pfn_first: first pfn for this resource (set by hmm_devmem_add()) + * @pfn_last: last pfn for this resource (set by hmm_devmem_add()) + * @resource: IO resource reserved for this chunk of memory + * @pagemap: device page map for that chunk + * @device: device to bind resource to + * @ops: memory operations callback + * @ref: per CPU refcount + * + * This an helper structure for device drivers that do not wish to implement + * the gory details related to hotplugging new memoy and allocating struct + * pages. + * + * Device drivers can directly use ZONE_DEVICE memory on their own if they + * wish to do so. + */ +struct hmm_devmem { + struct completion completion; + unsigned long pfn_first; + unsigned long pfn_last; + struct resource *resource; + struct device *device; + struct dev_pagemap pagemap; + const struct hmm_devmem_ops *ops; + struct percpu_ref ref; +}; + +/* + * To add (hotplug) device memory, HMM assumes that there is no real resource + * that reserves a range in the physical address space (this is intended to be + * use by unaddressable device memory). It will reserve a physical range big + * enough and allocate struct page for it. + * + * The device driver can wrap the hmm_devmem struct inside a private device + * driver struct. The device driver must call hmm_devmem_remove() before the + * device goes away and before freeing the hmm_devmem struct memory. + */ +struct hmm_devmem *hmm_devmem_add(const struct hmm_devmem_ops *ops, + struct device *device, + unsigned long size); +struct hmm_devmem *hmm_devmem_add_resource(const struct hmm_devmem_ops *ops, + struct device *device, + struct resource *res); +void hmm_devmem_remove(struct hmm_devmem *devmem); + +/* + * hmm_devmem_page_set_drvdata - set per-page driver data field + * + * @page: pointer to struct page + * @data: driver data value to set + * + * Because page can not be on lru we have an unsigned long that driver can use + * to store a per page field. This just a simple helper to do that. + */ +static inline void hmm_devmem_page_set_drvdata(struct page *page, + unsigned long data) +{ + unsigned long *drvdata = (unsigned long *)&page->pgmap; + + drvdata[1] = data; +} + +/* + * hmm_devmem_page_get_drvdata - get per page driver data field + * + * @page: pointer to struct page + * Return: driver data value + */ +static inline unsigned long hmm_devmem_page_get_drvdata(struct page *page) +{ + unsigned long *drvdata = (unsigned long *)&page->pgmap; + + return drvdata[1]; +} + + +/* + * struct hmm_device - fake device to hang device memory onto + * + * @device: device struct + * @minor: device minor number + */ +struct hmm_device { + struct device device; + unsigned int minor; +}; + +/* + * A device driver that wants to handle multiple devices memory through a + * single fake device can use hmm_device to do so. This is purely a helper and + * it is not strictly needed, in order to make use of any HMM functionality. + */ +struct hmm_device *hmm_device_new(void *drvdata); +void hmm_device_put(struct hmm_device *hmm_device); +#endif /* CONFIG_DEVICE_PRIVATE || CONFIG_DEVICE_PUBLIC */ +#endif /* IS_ENABLED(CONFIG_HMM) */ + +/* Below are for HMM internal use only! Not to be used by device driver! */ +#if IS_ENABLED(CONFIG_HMM_MIRROR) +void hmm_mm_destroy(struct mm_struct *mm); + +static inline void hmm_mm_init(struct mm_struct *mm) +{ + mm->hmm = NULL; +} +#else /* IS_ENABLED(CONFIG_HMM_MIRROR) */ +static inline void hmm_mm_destroy(struct mm_struct *mm) {} +static inline void hmm_mm_init(struct mm_struct *mm) {} +#endif /* IS_ENABLED(CONFIG_HMM_MIRROR) */ + + +#else /* IS_ENABLED(CONFIG_HMM) */ +static inline void hmm_mm_destroy(struct mm_struct *mm) {} +static inline void hmm_mm_init(struct mm_struct *mm) {} +#endif /* LINUX_HMM_H */ diff --git a/include/linux/huge_mm.h b/include/linux/huge_mm.h index ee696347f928..14bc21c2ee7f 100644 --- a/include/linux/huge_mm.h +++ b/include/linux/huge_mm.h @@ -147,7 +147,7 @@ void __split_huge_pmd(struct vm_area_struct *vma, pmd_t *pmd, #define split_huge_pmd(__vma, __pmd, __address) \ do { \ pmd_t *____pmd = (__pmd); \ - if (pmd_trans_huge(*____pmd) \ + if (is_swap_pmd(*____pmd) || pmd_trans_huge(*____pmd) \ || pmd_devmap(*____pmd)) \ __split_huge_pmd(__vma, __pmd, __address, \ false, NULL); \ @@ -178,12 +178,18 @@ extern spinlock_t *__pmd_trans_huge_lock(pmd_t *pmd, struct vm_area_struct *vma); extern spinlock_t *__pud_trans_huge_lock(pud_t *pud, struct vm_area_struct *vma); + +static inline int is_swap_pmd(pmd_t pmd) +{ + return !pmd_none(pmd) && !pmd_present(pmd); +} + /* mmap_sem must be held on entry */ static inline spinlock_t *pmd_trans_huge_lock(pmd_t *pmd, struct vm_area_struct *vma) { VM_BUG_ON_VMA(!rwsem_is_locked(&vma->vm_mm->mmap_sem), vma); - if (pmd_trans_huge(*pmd) || pmd_devmap(*pmd)) + if (is_swap_pmd(*pmd) || pmd_trans_huge(*pmd) || pmd_devmap(*pmd)) return __pmd_trans_huge_lock(pmd, vma); else return NULL; @@ -233,6 +239,11 @@ void mm_put_huge_zero_page(struct mm_struct *mm); #define mk_huge_pmd(page, prot) pmd_mkhuge(mk_pmd(page, prot)) +static inline bool thp_migration_supported(void) +{ + return IS_ENABLED(CONFIG_ARCH_ENABLE_THP_MIGRATION); +} + #else /* CONFIG_TRANSPARENT_HUGEPAGE */ #define HPAGE_PMD_SHIFT ({ BUILD_BUG(); 0; }) #define HPAGE_PMD_MASK ({ BUILD_BUG(); 0; }) @@ -294,6 +305,10 @@ static inline void vma_adjust_trans_huge(struct vm_area_struct *vma, long adjust_next) { } +static inline int is_swap_pmd(pmd_t pmd) +{ + return 0; +} static inline spinlock_t *pmd_trans_huge_lock(pmd_t *pmd, struct vm_area_struct *vma) { @@ -336,6 +351,11 @@ static inline struct page *follow_devmap_pud(struct vm_area_struct *vma, { return NULL; } + +static inline bool thp_migration_supported(void) +{ + return false; +} #endif /* CONFIG_TRANSPARENT_HUGEPAGE */ #endif /* _LINUX_HUGE_MM_H */ diff --git a/include/linux/init_task.h b/include/linux/init_task.h index 0e849715e5be..3c07ace5b431 100644 --- a/include/linux/init_task.h +++ b/include/linux/init_task.h @@ -175,9 +175,8 @@ extern struct cred init_cred; #ifdef CONFIG_RT_MUTEXES # define INIT_RT_MUTEXES(tsk) \ - .pi_waiters = RB_ROOT, \ - .pi_top_task = NULL, \ - .pi_waiters_leftmost = NULL, + .pi_waiters = RB_ROOT_CACHED, \ + .pi_top_task = NULL, #else # define INIT_RT_MUTEXES(tsk) #endif diff --git a/include/linux/interval_tree.h b/include/linux/interval_tree.h index 724556aa3c95..202ee1283f4b 100644 --- a/include/linux/interval_tree.h +++ b/include/linux/interval_tree.h @@ -11,13 +11,15 @@ struct interval_tree_node { }; extern void -interval_tree_insert(struct interval_tree_node *node, struct rb_root *root); +interval_tree_insert(struct interval_tree_node *node, + struct rb_root_cached *root); extern void -interval_tree_remove(struct interval_tree_node *node, struct rb_root *root); +interval_tree_remove(struct interval_tree_node *node, + struct rb_root_cached *root); extern struct interval_tree_node * -interval_tree_iter_first(struct rb_root *root, +interval_tree_iter_first(struct rb_root_cached *root, unsigned long start, unsigned long last); extern struct interval_tree_node * diff --git a/include/linux/interval_tree_generic.h b/include/linux/interval_tree_generic.h index 58370e1862ad..1f97ce26cccc 100644 --- a/include/linux/interval_tree_generic.h +++ b/include/linux/interval_tree_generic.h @@ -33,7 +33,7 @@ * ITSTATIC: 'static' or empty * ITPREFIX: prefix to use for the inline tree definitions * - * Note - before using this, please consider if non-generic version + * Note - before using this, please consider if generic version * (interval_tree.h) would work for you... */ @@ -65,11 +65,13 @@ RB_DECLARE_CALLBACKS(static, ITPREFIX ## _augment, ITSTRUCT, ITRB, \ \ /* Insert / remove interval nodes from the tree */ \ \ -ITSTATIC void ITPREFIX ## _insert(ITSTRUCT *node, struct rb_root *root) \ +ITSTATIC void ITPREFIX ## _insert(ITSTRUCT *node, \ + struct rb_root_cached *root) \ { \ - struct rb_node **link = &root->rb_node, *rb_parent = NULL; \ + struct rb_node **link = &root->rb_root.rb_node, *rb_parent = NULL; \ ITTYPE start = ITSTART(node), last = ITLAST(node); \ ITSTRUCT *parent; \ + bool leftmost = true; \ \ while (*link) { \ rb_parent = *link; \ @@ -78,18 +80,22 @@ ITSTATIC void ITPREFIX ## _insert(ITSTRUCT *node, struct rb_root *root) \ parent->ITSUBTREE = last; \ if (start < ITSTART(parent)) \ link = &parent->ITRB.rb_left; \ - else \ + else { \ link = &parent->ITRB.rb_right; \ + leftmost = false; \ + } \ } \ \ node->ITSUBTREE = last; \ rb_link_node(&node->ITRB, rb_parent, link); \ - rb_insert_augmented(&node->ITRB, root, &ITPREFIX ## _augment); \ + rb_insert_augmented_cached(&node->ITRB, root, \ + leftmost, &ITPREFIX ## _augment); \ } \ \ -ITSTATIC void ITPREFIX ## _remove(ITSTRUCT *node, struct rb_root *root) \ +ITSTATIC void ITPREFIX ## _remove(ITSTRUCT *node, \ + struct rb_root_cached *root) \ { \ - rb_erase_augmented(&node->ITRB, root, &ITPREFIX ## _augment); \ + rb_erase_augmented_cached(&node->ITRB, root, &ITPREFIX ## _augment); \ } \ \ /* \ @@ -140,15 +146,35 @@ ITPREFIX ## _subtree_search(ITSTRUCT *node, ITTYPE start, ITTYPE last) \ } \ \ ITSTATIC ITSTRUCT * \ -ITPREFIX ## _iter_first(struct rb_root *root, ITTYPE start, ITTYPE last) \ +ITPREFIX ## _iter_first(struct rb_root_cached *root, \ + ITTYPE start, ITTYPE last) \ { \ - ITSTRUCT *node; \ + ITSTRUCT *node, *leftmost; \ \ - if (!root->rb_node) \ + if (!root->rb_root.rb_node) \ return NULL; \ - node = rb_entry(root->rb_node, ITSTRUCT, ITRB); \ + \ + /* \ + * Fastpath range intersection/overlap between A: [a0, a1] and \ + * B: [b0, b1] is given by: \ + * \ + * a0 <= b1 && b0 <= a1 \ + * \ + * ... where A holds the lock range and B holds the smallest \ + * 'start' and largest 'last' in the tree. For the later, we \ + * rely on the root node, which by augmented interval tree \ + * property, holds the largest value in its last-in-subtree. \ + * This allows mitigating some of the tree walk overhead for \ + * for non-intersecting ranges, maintained and consulted in O(1). \ + */ \ + node = rb_entry(root->rb_root.rb_node, ITSTRUCT, ITRB); \ if (node->ITSUBTREE < start) \ return NULL; \ + \ + leftmost = rb_entry(root->rb_leftmost, ITSTRUCT, ITRB); \ + if (ITSTART(leftmost) > last) \ + return NULL; \ + \ return ITPREFIX ## _subtree_search(node, start, last); \ } \ \ diff --git a/include/linux/ioport.h b/include/linux/ioport.h index 6230064d7f95..f5cf32e80041 100644 --- a/include/linux/ioport.h +++ b/include/linux/ioport.h @@ -130,6 +130,8 @@ enum { IORES_DESC_ACPI_NV_STORAGE = 3, IORES_DESC_PERSISTENT_MEMORY = 4, IORES_DESC_PERSISTENT_MEMORY_LEGACY = 5, + IORES_DESC_DEVICE_PRIVATE_MEMORY = 6, + IORES_DESC_DEVICE_PUBLIC_MEMORY = 7, }; /* helpers to define resources */ diff --git a/include/linux/ipc.h b/include/linux/ipc.h index fadd579d577d..92a2ccff80c5 100644 --- a/include/linux/ipc.h +++ b/include/linux/ipc.h @@ -3,7 +3,9 @@ #include <linux/spinlock.h> #include <linux/uidgid.h> +#include <linux/rhashtable.h> #include <uapi/linux/ipc.h> +#include <linux/refcount.h> #define IPCMNI 32768 /* <= MAX_INT limit for ipc arrays (including sysctl changes) */ @@ -21,8 +23,10 @@ struct kern_ipc_perm { unsigned long seq; void *security; + struct rhash_head khtnode; + struct rcu_head rcu; - atomic_t refcount; + refcount_t refcount; } ____cacheline_aligned_in_smp __randomize_layout; #endif /* _LINUX_IPC_H */ diff --git a/include/linux/ipc_namespace.h b/include/linux/ipc_namespace.h index 65327ee0936b..83f0bf7a587d 100644 --- a/include/linux/ipc_namespace.h +++ b/include/linux/ipc_namespace.h @@ -7,19 +7,23 @@ #include <linux/notifier.h> #include <linux/nsproxy.h> #include <linux/ns_common.h> +#include <linux/refcount.h> +#include <linux/rhashtable.h> struct user_namespace; struct ipc_ids { int in_use; unsigned short seq; + bool tables_initialized; struct rw_semaphore rwsem; struct idr ipcs_idr; int next_id; + struct rhashtable key_ht; }; struct ipc_namespace { - atomic_t count; + refcount_t count; struct ipc_ids ids[3]; int sem_ctls[4]; @@ -118,7 +122,7 @@ extern struct ipc_namespace *copy_ipcs(unsigned long flags, static inline struct ipc_namespace *get_ipc_ns(struct ipc_namespace *ns) { if (ns) - atomic_inc(&ns->count); + refcount_inc(&ns->count); return ns; } diff --git a/include/linux/kernel.h b/include/linux/kernel.h index 6607225d0ea4..0ad4c3044cf9 100644 --- a/include/linux/kernel.h +++ b/include/linux/kernel.h @@ -78,8 +78,11 @@ #define FIELD_SIZEOF(t, f) (sizeof(((t*)0)->f)) #define DIV_ROUND_UP __KERNEL_DIV_ROUND_UP -#define DIV_ROUND_UP_ULL(ll,d) \ - ({ unsigned long long _tmp = (ll)+(d)-1; do_div(_tmp, d); _tmp; }) + +#define DIV_ROUND_DOWN_ULL(ll, d) \ + ({ unsigned long long _tmp = (ll); do_div(_tmp, d); _tmp; }) + +#define DIV_ROUND_UP_ULL(ll, d) DIV_ROUND_DOWN_ULL((ll) + (d) - 1, (d)) #if BITS_PER_LONG == 32 # define DIV_ROUND_UP_SECTOR_T(ll,d) DIV_ROUND_UP_ULL(ll, d) diff --git a/include/linux/kmod.h b/include/linux/kmod.h index 655082c88fd9..40c89ad4bea6 100644 --- a/include/linux/kmod.h +++ b/include/linux/kmod.h @@ -19,6 +19,7 @@ * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. */ +#include <linux/umh.h> #include <linux/gfp.h> #include <linux/stddef.h> #include <linux/errno.h> @@ -44,63 +45,4 @@ static inline int request_module_nowait(const char *name, ...) { return -ENOSYS; #define try_then_request_module(x, mod...) (x) #endif - -struct cred; -struct file; - -#define UMH_NO_WAIT 0 /* don't wait at all */ -#define UMH_WAIT_EXEC 1 /* wait for the exec, but not the process */ -#define UMH_WAIT_PROC 2 /* wait for the process to complete */ -#define UMH_KILLABLE 4 /* wait for EXEC/PROC killable */ - -struct subprocess_info { - struct work_struct work; - struct completion *complete; - const char *path; - char **argv; - char **envp; - int wait; - int retval; - int (*init)(struct subprocess_info *info, struct cred *new); - void (*cleanup)(struct subprocess_info *info); - void *data; -} __randomize_layout; - -extern int -call_usermodehelper(const char *path, char **argv, char **envp, int wait); - -extern struct subprocess_info * -call_usermodehelper_setup(const char *path, char **argv, char **envp, - gfp_t gfp_mask, - int (*init)(struct subprocess_info *info, struct cred *new), - void (*cleanup)(struct subprocess_info *), void *data); - -extern int -call_usermodehelper_exec(struct subprocess_info *info, int wait); - -extern struct ctl_table usermodehelper_table[]; - -enum umh_disable_depth { - UMH_ENABLED = 0, - UMH_FREEZING, - UMH_DISABLED, -}; - -extern int __usermodehelper_disable(enum umh_disable_depth depth); -extern void __usermodehelper_set_disable_depth(enum umh_disable_depth depth); - -static inline int usermodehelper_disable(void) -{ - return __usermodehelper_disable(UMH_DISABLED); -} - -static inline void usermodehelper_enable(void) -{ - __usermodehelper_set_disable_depth(UMH_ENABLED); -} - -extern int usermodehelper_read_trylock(void); -extern long usermodehelper_read_lock_wait(long timeout); -extern void usermodehelper_read_unlock(void); - #endif /* __LINUX_KMOD_H__ */ diff --git a/include/linux/memory_hotplug.h b/include/linux/memory_hotplug.h index 5e6e4cc36ff4..0995e1a2b458 100644 --- a/include/linux/memory_hotplug.h +++ b/include/linux/memory_hotplug.h @@ -133,6 +133,17 @@ extern int __remove_pages(struct zone *zone, unsigned long start_pfn, extern int __add_pages(int nid, unsigned long start_pfn, unsigned long nr_pages, bool want_memblock); +#ifndef CONFIG_ARCH_HAS_ADD_PAGES +static inline int add_pages(int nid, unsigned long start_pfn, + unsigned long nr_pages, bool want_memblock) +{ + return __add_pages(nid, start_pfn, nr_pages, want_memblock); +} +#else /* ARCH_HAS_ADD_PAGES */ +int add_pages(int nid, unsigned long start_pfn, + unsigned long nr_pages, bool want_memblock); +#endif /* ARCH_HAS_ADD_PAGES */ + #ifdef CONFIG_NUMA extern int memory_add_physaddr_to_nid(u64 start); #else diff --git a/include/linux/memremap.h b/include/linux/memremap.h index 93416196ba64..79f8ba7c3894 100644 --- a/include/linux/memremap.h +++ b/include/linux/memremap.h @@ -4,6 +4,8 @@ #include <linux/ioport.h> #include <linux/percpu-refcount.h> +#include <asm/pgtable.h> + struct resource; struct device; @@ -35,24 +37,107 @@ static inline struct vmem_altmap *to_vmem_altmap(unsigned long memmap_start) } #endif +/* + * Specialize ZONE_DEVICE memory into multiple types each having differents + * usage. + * + * MEMORY_DEVICE_HOST: + * Persistent device memory (pmem): struct page might be allocated in different + * memory and architecture might want to perform special actions. It is similar + * to regular memory, in that the CPU can access it transparently. However, + * it is likely to have different bandwidth and latency than regular memory. + * See Documentation/nvdimm/nvdimm.txt for more information. + * + * MEMORY_DEVICE_PRIVATE: + * Device memory that is not directly addressable by the CPU: CPU can neither + * read nor write private memory. In this case, we do still have struct pages + * backing the device memory. Doing so simplifies the implementation, but it is + * important to remember that there are certain points at which the struct page + * must be treated as an opaque object, rather than a "normal" struct page. + * + * A more complete discussion of unaddressable memory may be found in + * include/linux/hmm.h and Documentation/vm/hmm.txt. + * + * MEMORY_DEVICE_PUBLIC: + * Device memory that is cache coherent from device and CPU point of view. This + * is use on platform that have an advance system bus (like CAPI or CCIX). A + * driver can hotplug the device memory using ZONE_DEVICE and with that memory + * type. Any page of a process can be migrated to such memory. However no one + * should be allow to pin such memory so that it can always be evicted. + */ +enum memory_type { + MEMORY_DEVICE_HOST = 0, + MEMORY_DEVICE_PRIVATE, + MEMORY_DEVICE_PUBLIC, +}; + +/* + * For MEMORY_DEVICE_PRIVATE we use ZONE_DEVICE and extend it with two + * callbacks: + * page_fault() + * page_free() + * + * Additional notes about MEMORY_DEVICE_PRIVATE may be found in + * include/linux/hmm.h and Documentation/vm/hmm.txt. There is also a brief + * explanation in include/linux/memory_hotplug.h. + * + * The page_fault() callback must migrate page back, from device memory to + * system memory, so that the CPU can access it. This might fail for various + * reasons (device issues, device have been unplugged, ...). When such error + * conditions happen, the page_fault() callback must return VM_FAULT_SIGBUS and + * set the CPU page table entry to "poisoned". + * + * Note that because memory cgroup charges are transferred to the device memory, + * this should never fail due to memory restrictions. However, allocation + * of a regular system page might still fail because we are out of memory. If + * that happens, the page_fault() callback must return VM_FAULT_OOM. + * + * The page_fault() callback can also try to migrate back multiple pages in one + * chunk, as an optimization. It must, however, prioritize the faulting address + * over all the others. + * + * + * The page_free() callback is called once the page refcount reaches 1 + * (ZONE_DEVICE pages never reach 0 refcount unless there is a refcount bug. + * This allows the device driver to implement its own memory management.) + * + * For MEMORY_DEVICE_PUBLIC only the page_free() callback matter. + */ +typedef int (*dev_page_fault_t)(struct vm_area_struct *vma, + unsigned long addr, + const struct page *page, + unsigned int flags, + pmd_t *pmdp); +typedef void (*dev_page_free_t)(struct page *page, void *data); + /** * struct dev_pagemap - metadata for ZONE_DEVICE mappings + * @page_fault: callback when CPU fault on an unaddressable device page + * @page_free: free page callback when page refcount reaches 1 * @altmap: pre-allocated/reserved memory for vmemmap allocations * @res: physical address range covered by @ref * @ref: reference count that pins the devm_memremap_pages() mapping * @dev: host device of the mapping for debug + * @data: private data pointer for page_free() + * @type: memory type: see MEMORY_* in memory_hotplug.h */ struct dev_pagemap { + dev_page_fault_t page_fault; + dev_page_free_t page_free; struct vmem_altmap *altmap; const struct resource *res; struct percpu_ref *ref; struct device *dev; + void *data; + enum memory_type type; }; #ifdef CONFIG_ZONE_DEVICE void *devm_memremap_pages(struct device *dev, struct resource *res, struct percpu_ref *ref, struct vmem_altmap *altmap); struct dev_pagemap *find_dev_pagemap(resource_size_t phys); + +static inline bool is_zone_device_page(const struct page *page); #else static inline void *devm_memremap_pages(struct device *dev, struct resource *res, struct percpu_ref *ref, @@ -73,6 +158,20 @@ static inline struct dev_pagemap *find_dev_pagemap(resource_size_t phys) } #endif +#if defined(CONFIG_DEVICE_PRIVATE) || defined(CONFIG_DEVICE_PUBLIC) +static inline bool is_device_private_page(const struct page *page) +{ + return is_zone_device_page(page) && + page->pgmap->type == MEMORY_DEVICE_PRIVATE; +} + +static inline bool is_device_public_page(const struct page *page) +{ + return is_zone_device_page(page) && + page->pgmap->type == MEMORY_DEVICE_PUBLIC; +} +#endif /* CONFIG_DEVICE_PRIVATE || CONFIG_DEVICE_PUBLIC */ + /** * get_dev_pagemap() - take a new live reference on the dev_pagemap for @pfn * @pfn: page frame number to lookup page_map diff --git a/include/linux/migrate.h b/include/linux/migrate.h index 3e0d405dc842..643c7ae7d7b4 100644 --- a/include/linux/migrate.h +++ b/include/linux/migrate.h @@ -35,15 +35,28 @@ static inline struct page *new_page_nodemask(struct page *page, int preferred_nid, nodemask_t *nodemask) { gfp_t gfp_mask = GFP_USER | __GFP_MOVABLE | __GFP_RETRY_MAYFAIL; + unsigned int order = 0; + struct page *new_page = NULL; if (PageHuge(page)) return alloc_huge_page_nodemask(page_hstate(compound_head(page)), preferred_nid, nodemask); + if (thp_migration_supported() && PageTransHuge(page)) { + order = HPAGE_PMD_ORDER; + gfp_mask |= GFP_TRANSHUGE; + } + if (PageHighMem(page) || (zone_idx(page_zone(page)) == ZONE_MOVABLE)) gfp_mask |= __GFP_HIGHMEM; - return __alloc_pages_nodemask(gfp_mask, 0, preferred_nid, nodemask); + new_page = __alloc_pages_nodemask(gfp_mask, order, + preferred_nid, nodemask); + + if (new_page && PageTransHuge(page)) + prep_transhuge_page(new_page); + + return new_page; } #ifdef CONFIG_MIGRATION @@ -59,6 +72,7 @@ extern void putback_movable_page(struct page *page); extern int migrate_prep(void); extern int migrate_prep_local(void); +extern void migrate_page_states(struct page *newpage, struct page *page); extern void migrate_page_copy(struct page *newpage, struct page *page); extern int migrate_huge_page_move_mapping(struct address_space *mapping, struct page *newpage, struct page *page); @@ -79,6 +93,10 @@ static inline int isolate_movable_page(struct page *page, isolate_mode_t mode) static inline int migrate_prep(void) { return -ENOSYS; } static inline int migrate_prep_local(void) { return -ENOSYS; } +static inline void migrate_page_states(struct page *newpage, struct page *page) +{ +} + static inline void migrate_page_copy(struct page *newpage, struct page *page) {} @@ -138,4 +156,136 @@ static inline int migrate_misplaced_transhuge_page(struct mm_struct *mm, } #endif /* CONFIG_NUMA_BALANCING && CONFIG_TRANSPARENT_HUGEPAGE*/ + +#ifdef CONFIG_MIGRATION + +/* + * Watch out for PAE architecture, which has an unsigned long, and might not + * have enough bits to store all physical address and flags. So far we have + * enough room for all our flags. + */ +#define MIGRATE_PFN_VALID (1UL << 0) +#define MIGRATE_PFN_MIGRATE (1UL << 1) +#define MIGRATE_PFN_LOCKED (1UL << 2) +#define MIGRATE_PFN_WRITE (1UL << 3) +#define MIGRATE_PFN_DEVICE (1UL << 4) +#define MIGRATE_PFN_ERROR (1UL << 5) +#define MIGRATE_PFN_SHIFT 6 + +static inline struct page *migrate_pfn_to_page(unsigned long mpfn) +{ + if (!(mpfn & MIGRATE_PFN_VALID)) + return NULL; + return pfn_to_page(mpfn >> MIGRATE_PFN_SHIFT); +} + +static inline unsigned long migrate_pfn(unsigned long pfn) +{ + return (pfn << MIGRATE_PFN_SHIFT) | MIGRATE_PFN_VALID; +} + +/* + * struct migrate_vma_ops - migrate operation callback + * + * @alloc_and_copy: alloc destination memory and copy source memory to it + * @finalize_and_map: allow caller to map the successfully migrated pages + * + * + * The alloc_and_copy() callback happens once all source pages have been locked, + * unmapped and checked (checked whether pinned or not). All pages that can be + * migrated will have an entry in the src array set with the pfn value of the + * page and with the MIGRATE_PFN_VALID and MIGRATE_PFN_MIGRATE flag set (other + * flags might be set but should be ignored by the callback). + * + * The alloc_and_copy() callback can then allocate destination memory and copy + * source memory to it for all those entries (ie with MIGRATE_PFN_VALID and + * MIGRATE_PFN_MIGRATE flag set). Once these are allocated and copied, the + * callback must update each corresponding entry in the dst array with the pfn + * value of the destination page and with the MIGRATE_PFN_VALID and + * MIGRATE_PFN_LOCKED flags set (destination pages must have their struct pages + * locked, via lock_page()). + * + * At this point the alloc_and_copy() callback is done and returns. + * + * Note that the callback does not have to migrate all the pages that are + * marked with MIGRATE_PFN_MIGRATE flag in src array unless this is a migration + * from device memory to system memory (ie the MIGRATE_PFN_DEVICE flag is also + * set in the src array entry). If the device driver cannot migrate a device + * page back to system memory, then it must set the corresponding dst array + * entry to MIGRATE_PFN_ERROR. This will trigger a SIGBUS if CPU tries to + * access any of the virtual addresses originally backed by this page. Because + * a SIGBUS is such a severe result for the userspace process, the device + * driver should avoid setting MIGRATE_PFN_ERROR unless it is really in an + * unrecoverable state. + * + * For empty entry inside CPU page table (pte_none() or pmd_none() is true) we + * do set MIGRATE_PFN_MIGRATE flag inside the corresponding source array thus + * allowing device driver to allocate device memory for those unback virtual + * address. For this the device driver simply have to allocate device memory + * and properly set the destination entry like for regular migration. Note that + * this can still fails and thus inside the device driver must check if the + * migration was successful for those entry inside the finalize_and_map() + * callback just like for regular migration. + * + * THE alloc_and_copy() CALLBACK MUST NOT CHANGE ANY OF THE SRC ARRAY ENTRIES + * OR BAD THINGS WILL HAPPEN ! + * + * + * The finalize_and_map() callback happens after struct page migration from + * source to destination (destination struct pages are the struct pages for the + * memory allocated by the alloc_and_copy() callback). Migration can fail, and + * thus the finalize_and_map() allows the driver to inspect which pages were + * successfully migrated, and which were not. Successfully migrated pages will + * have the MIGRATE_PFN_MIGRATE flag set for their src array entry. + * + * It is safe to update device page table from within the finalize_and_map() + * callback because both destination and source page are still locked, and the + * mmap_sem is held in read mode (hence no one can unmap the range being + * migrated). + * + * Once callback is done cleaning up things and updating its page table (if it + * chose to do so, this is not an obligation) then it returns. At this point, + * the HMM core will finish up the final steps, and the migration is complete. + * + * THE finalize_and_map() CALLBACK MUST NOT CHANGE ANY OF THE SRC OR DST ARRAY + * ENTRIES OR BAD THINGS WILL HAPPEN ! + */ +struct migrate_vma_ops { + void (*alloc_and_copy)(struct vm_area_struct *vma, + const unsigned long *src, + unsigned long *dst, + unsigned long start, + unsigned long end, + void *private); + void (*finalize_and_map)(struct vm_area_struct *vma, + const unsigned long *src, + const unsigned long *dst, + unsigned long start, + unsigned long end, + void *private); +}; + +#if defined(CONFIG_MIGRATE_VMA_HELPER) +int migrate_vma(const struct migrate_vma_ops *ops, + struct vm_area_struct *vma, + unsigned long start, + unsigned long end, + unsigned long *src, + unsigned long *dst, + void *private); +#else +static inline int migrate_vma(const struct migrate_vma_ops *ops, + struct vm_area_struct *vma, + unsigned long start, + unsigned long end, + unsigned long *src, + unsigned long *dst, + void *private) +{ + return -EINVAL; +} +#endif /* IS_ENABLED(CONFIG_MIGRATE_VMA_HELPER) */ + +#endif /* CONFIG_MIGRATION */ + #endif /* _LINUX_MIGRATE_H */ diff --git a/include/linux/migrate_mode.h b/include/linux/migrate_mode.h index ebf3d89a3919..bdf66af9b937 100644 --- a/include/linux/migrate_mode.h +++ b/include/linux/migrate_mode.h @@ -6,11 +6,16 @@ * on most operations but not ->writepage as the potential stall time * is too significant * MIGRATE_SYNC will block when migrating pages + * MIGRATE_SYNC_NO_COPY will block when migrating pages but will not copy pages + * with the CPU. Instead, page copy happens outside the migratepage() + * callback and is likely using a DMA engine. See migrate_vma() and HMM + * (mm/hmm.c) for users of this mode. */ enum migrate_mode { MIGRATE_ASYNC, MIGRATE_SYNC_LIGHT, MIGRATE_SYNC, + MIGRATE_SYNC_NO_COPY, }; #endif /* MIGRATE_MODE_H_INCLUDED */ diff --git a/include/linux/mm.h b/include/linux/mm.h index 39db8e54c5d5..f8c10d336e42 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -23,6 +23,7 @@ #include <linux/page_ext.h> #include <linux/err.h> #include <linux/page_ref.h> +#include <linux/memremap.h> struct mempolicy; struct anon_vma; @@ -799,6 +800,28 @@ static inline bool is_zone_device_page(const struct page *page) } #endif +#if defined(CONFIG_DEVICE_PRIVATE) || defined(CONFIG_DEVICE_PUBLIC) +void put_zone_device_private_or_public_page(struct page *page); +DECLARE_STATIC_KEY_FALSE(device_private_key); +#define IS_HMM_ENABLED static_branch_unlikely(&device_private_key) +static inline bool is_device_private_page(const struct page *page); +static inline bool is_device_public_page(const struct page *page); +#else /* CONFIG_DEVICE_PRIVATE || CONFIG_DEVICE_PUBLIC */ +static inline void put_zone_device_private_or_public_page(struct page *page) +{ +} +#define IS_HMM_ENABLED 0 +static inline bool is_device_private_page(const struct page *page) +{ + return false; +} +static inline bool is_device_public_page(const struct page *page) +{ + return false; +} +#endif /* CONFIG_DEVICE_PRIVATE || CONFIG_DEVICE_PUBLIC */ + + static inline void get_page(struct page *page) { page = compound_head(page); @@ -814,6 +837,18 @@ static inline void put_page(struct page *page) { page = compound_head(page); + /* + * For private device pages we need to catch refcount transition from + * 2 to 1, when refcount reach one it means the private device page is + * free and we need to inform the device driver through callback. See + * include/linux/memremap.h and HMM for details. + */ + if (IS_HMM_ENABLED && unlikely(is_device_private_page(page) || + unlikely(is_device_public_page(page)))) { + put_zone_device_private_or_public_page(page); + return; + } + if (put_page_testzero(page)) __put_page(page); } @@ -1199,8 +1234,10 @@ struct zap_details { pgoff_t last_index; /* Highest page->index to unmap */ }; -struct page *vm_normal_page(struct vm_area_struct *vma, unsigned long addr, - pte_t pte); +struct page *_vm_normal_page(struct vm_area_struct *vma, unsigned long addr, + pte_t pte, bool with_public_device); +#define vm_normal_page(vma, addr, pte) _vm_normal_page(vma, addr, pte, false) + struct page *vm_normal_page_pmd(struct vm_area_struct *vma, unsigned long addr, pmd_t pmd); @@ -1997,13 +2034,13 @@ extern int nommu_shrink_inode_mappings(struct inode *, size_t, size_t); /* interval_tree.c */ void vma_interval_tree_insert(struct vm_area_struct *node, - struct rb_root *root); + struct rb_root_cached *root); void vma_interval_tree_insert_after(struct vm_area_struct *node, struct vm_area_struct *prev, - struct rb_root *root); + struct rb_root_cached *root); void vma_interval_tree_remove(struct vm_area_struct *node, - struct rb_root *root); -struct vm_area_struct *vma_interval_tree_iter_first(struct rb_root *root, + struct rb_root_cached *root); +struct vm_area_struct *vma_interval_tree_iter_first(struct rb_root_cached *root, unsigned long start, unsigned long last); struct vm_area_struct *vma_interval_tree_iter_next(struct vm_area_struct *node, unsigned long start, unsigned long last); @@ -2013,11 +2050,12 @@ struct vm_area_struct *vma_interval_tree_iter_next(struct vm_area_struct *node, vma; vma = vma_interval_tree_iter_next(vma, start, last)) void anon_vma_interval_tree_insert(struct anon_vma_chain *node, - struct rb_root *root); + struct rb_root_cached *root); void anon_vma_interval_tree_remove(struct anon_vma_chain *node, - struct rb_root *root); -struct anon_vma_chain *anon_vma_interval_tree_iter_first( - struct rb_root *root, unsigned long start, unsigned long last); + struct rb_root_cached *root); +struct anon_vma_chain * +anon_vma_interval_tree_iter_first(struct rb_root_cached *root, + unsigned long start, unsigned long last); struct anon_vma_chain *anon_vma_interval_tree_iter_next( struct anon_vma_chain *node, unsigned long start, unsigned long last); #ifdef CONFIG_DEBUG_VM_RB diff --git a/include/linux/mm_types.h b/include/linux/mm_types.h index f45ad815b7d7..46f4ecf5479a 100644 --- a/include/linux/mm_types.h +++ b/include/linux/mm_types.h @@ -23,6 +23,7 @@ struct address_space; struct mem_cgroup; +struct hmm; /* * Each physical page in the system has a struct page associated with @@ -503,6 +504,11 @@ struct mm_struct { atomic_long_t hugetlb_usage; #endif struct work_struct async_put_work; + +#if IS_ENABLED(CONFIG_HMM) + /* HMM needs to track a few things per mm */ + struct hmm *hmm; +#endif } __randomize_layout; extern struct mm_struct init_mm; diff --git a/include/linux/mmzone.h b/include/linux/mmzone.h index e7e92c8f4883..356a814e7c8e 100644 --- a/include/linux/mmzone.h +++ b/include/linux/mmzone.h @@ -114,6 +114,20 @@ struct zone_padding { #define ZONE_PADDING(name) #endif +#ifdef CONFIG_NUMA +enum numa_stat_item { + NUMA_HIT, /* allocated in intended node */ + NUMA_MISS, /* allocated in non intended node */ + NUMA_FOREIGN, /* was intended here, hit elsewhere */ + NUMA_INTERLEAVE_HIT, /* interleaver preferred this zone */ + NUMA_LOCAL, /* allocation from local node */ + NUMA_OTHER, /* allocation from other node */ + NR_VM_NUMA_STAT_ITEMS +}; +#else +#define NR_VM_NUMA_STAT_ITEMS 0 +#endif + enum zone_stat_item { /* First 128 byte cacheline (assuming 64 bit words) */ NR_FREE_PAGES, @@ -132,14 +146,6 @@ enum zone_stat_item { #if IS_ENABLED(CONFIG_ZSMALLOC) NR_ZSPAGES, /* allocated in zsmalloc */ #endif -#ifdef CONFIG_NUMA - NUMA_HIT, /* allocated in intended node */ - NUMA_MISS, /* allocated in non intended node */ - NUMA_FOREIGN, /* was intended here, hit elsewhere */ - NUMA_INTERLEAVE_HIT, /* interleaver preferred this zone */ - NUMA_LOCAL, /* allocation from local node */ - NUMA_OTHER, /* allocation from other node */ -#endif NR_FREE_CMA_PAGES, NR_VM_ZONE_STAT_ITEMS }; @@ -276,6 +282,7 @@ struct per_cpu_pageset { struct per_cpu_pages pcp; #ifdef CONFIG_NUMA s8 expire; + u16 vm_numa_stat_diff[NR_VM_NUMA_STAT_ITEMS]; #endif #ifdef CONFIG_SMP s8 stat_threshold; @@ -496,6 +503,7 @@ struct zone { ZONE_PADDING(_pad3_) /* Zone statistics */ atomic_long_t vm_stat[NR_VM_ZONE_STAT_ITEMS]; + atomic_long_t vm_numa_stat[NR_VM_NUMA_STAT_ITEMS]; } ____cacheline_internodealigned_in_smp; enum pgdat_flags { diff --git a/include/linux/pps-gpio.h b/include/linux/pps-gpio.h index 0035abe41b9a..56f35dd3d01d 100644 --- a/include/linux/pps-gpio.h +++ b/include/linux/pps-gpio.h @@ -29,4 +29,4 @@ struct pps_gpio_platform_data { const char *gpio_label; }; -#endif +#endif /* _PPS_GPIO_H */ diff --git a/include/linux/pps_kernel.h b/include/linux/pps_kernel.h index 35ac903956c7..80a980cc8d95 100644 --- a/include/linux/pps_kernel.h +++ b/include/linux/pps_kernel.h @@ -22,7 +22,6 @@ #define LINUX_PPS_KERNEL_H #include <linux/pps.h> - #include <linux/cdev.h> #include <linux/device.h> #include <linux/time.h> @@ -35,9 +34,9 @@ struct pps_device; /* The specific PPS source info */ struct pps_source_info { - char name[PPS_MAX_NAME_LEN]; /* simbolic name */ + char name[PPS_MAX_NAME_LEN]; /* symbolic name */ char path[PPS_MAX_NAME_LEN]; /* path of connected device */ - int mode; /* PPS's allowed mode */ + int mode; /* PPS allowed mode */ void (*echo)(struct pps_device *pps, int event, void *data); /* PPS echo function */ @@ -57,10 +56,10 @@ struct pps_event_time { struct pps_device { struct pps_source_info info; /* PSS source info */ - struct pps_kparams params; /* PPS's current params */ + struct pps_kparams params; /* PPS current params */ - __u32 assert_sequence; /* PPS' assert event seq # */ - __u32 clear_sequence; /* PPS' clear event seq # */ + __u32 assert_sequence; /* PPS assert event seq # */ + __u32 clear_sequence; /* PPS clear event seq # */ struct pps_ktime assert_tu; struct pps_ktime clear_tu; int current_mode; /* PPS mode at event time */ @@ -69,7 +68,7 @@ struct pps_device { wait_queue_head_t queue; /* PPS event queue */ unsigned int id; /* PPS source unique ID */ - void const *lookup_cookie; /* pps_lookup_dev only */ + void const *lookup_cookie; /* For pps_lookup_dev() only */ struct cdev cdev; struct device *dev; struct fasync_struct *async_queue; /* fasync method */ @@ -101,7 +100,7 @@ extern struct pps_device *pps_register_source( extern void pps_unregister_source(struct pps_device *pps); extern void pps_event(struct pps_device *pps, struct pps_event_time *ts, int event, void *data); -/* Look up a pps device by magic cookie */ +/* Look up a pps_device by magic cookie */ struct pps_device *pps_lookup_dev(void const *cookie); static inline void timespec_to_pps_ktime(struct pps_ktime *kt, @@ -132,4 +131,3 @@ static inline void pps_sub_ts(struct pps_event_time *ts, struct timespec64 delta } #endif /* LINUX_PPS_KERNEL_H */ - diff --git a/include/linux/proc_fs.h b/include/linux/proc_fs.h index 2d2bf592d9db..76124dd4e36d 100644 --- a/include/linux/proc_fs.h +++ b/include/linux/proc_fs.h @@ -28,13 +28,7 @@ extern struct proc_dir_entry *proc_create_data(const char *, umode_t, const struct file_operations *, void *); -static inline struct proc_dir_entry *proc_create( - const char *name, umode_t mode, struct proc_dir_entry *parent, - const struct file_operations *proc_fops) -{ - return proc_create_data(name, mode, parent, proc_fops, NULL); -} - +struct proc_dir_entry *proc_create(const char *name, umode_t mode, struct proc_dir_entry *parent, const struct file_operations *proc_fops); extern void proc_set_size(struct proc_dir_entry *, loff_t); extern void proc_set_user(struct proc_dir_entry *, kuid_t, kgid_t); extern void *PDE_DATA(const struct inode *); diff --git a/include/linux/rbtree.h b/include/linux/rbtree.h index e585018498d5..d574361943ea 100644 --- a/include/linux/rbtree.h +++ b/include/linux/rbtree.h @@ -44,10 +44,25 @@ struct rb_root { struct rb_node *rb_node; }; +/* + * Leftmost-cached rbtrees. + * + * We do not cache the rightmost node based on footprint + * size vs number of potential users that could benefit + * from O(1) rb_last(). Just not worth it, users that want + * this feature can always implement the logic explicitly. + * Furthermore, users that want to cache both pointers may + * find it a bit asymmetric, but that's ok. + */ +struct rb_root_cached { + struct rb_root rb_root; + struct rb_node *rb_leftmost; +}; #define rb_parent(r) ((struct rb_node *)((r)->__rb_parent_color & ~3)) #define RB_ROOT (struct rb_root) { NULL, } +#define RB_ROOT_CACHED (struct rb_root_cached) { {NULL, }, NULL } #define rb_entry(ptr, type, member) container_of(ptr, type, member) #define RB_EMPTY_ROOT(root) (READ_ONCE((root)->rb_node) == NULL) @@ -69,6 +84,12 @@ extern struct rb_node *rb_prev(const struct rb_node *); extern struct rb_node *rb_first(const struct rb_root *); extern struct rb_node *rb_last(const struct rb_root *); +extern void rb_insert_color_cached(struct rb_node *, + struct rb_root_cached *, bool); +extern void rb_erase_cached(struct rb_node *node, struct rb_root_cached *); +/* Same as rb_first(), but O(1) */ +#define rb_first_cached(root) (root)->rb_leftmost + /* Postorder iteration - always visit the parent after its children */ extern struct rb_node *rb_first_postorder(const struct rb_root *); extern struct rb_node *rb_next_postorder(const struct rb_node *); diff --git a/include/linux/rbtree_augmented.h b/include/linux/rbtree_augmented.h index 9702b6e183bc..6bfd2b581f75 100644 --- a/include/linux/rbtree_augmented.h +++ b/include/linux/rbtree_augmented.h @@ -41,7 +41,9 @@ struct rb_augment_callbacks { void (*rotate)(struct rb_node *old, struct rb_node *new); }; -extern void __rb_insert_augmented(struct rb_node *node, struct rb_root *root, +extern void __rb_insert_augmented(struct rb_node *node, + struct rb_root *root, + bool newleft, struct rb_node **leftmost, void (*augment_rotate)(struct rb_node *old, struct rb_node *new)); /* * Fixup the rbtree and update the augmented information when rebalancing. @@ -57,7 +59,16 @@ static inline void rb_insert_augmented(struct rb_node *node, struct rb_root *root, const struct rb_augment_callbacks *augment) { - __rb_insert_augmented(node, root, augment->rotate); + __rb_insert_augmented(node, root, false, NULL, augment->rotate); +} + +static inline void +rb_insert_augmented_cached(struct rb_node *node, + struct rb_root_cached *root, bool newleft, + const struct rb_augment_callbacks *augment) +{ + __rb_insert_augmented(node, &root->rb_root, + newleft, &root->rb_leftmost, augment->rotate); } #define RB_DECLARE_CALLBACKS(rbstatic, rbname, rbstruct, rbfield, \ @@ -150,6 +161,7 @@ extern void __rb_erase_color(struct rb_node *parent, struct rb_root *root, static __always_inline struct rb_node * __rb_erase_augmented(struct rb_node *node, struct rb_root *root, + struct rb_node **leftmost, const struct rb_augment_callbacks *augment) { struct rb_node *child = node->rb_right; @@ -157,6 +169,9 @@ __rb_erase_augmented(struct rb_node *node, struct rb_root *root, struct rb_node *parent, *rebalance; unsigned long pc; + if (leftmost && node == *leftmost) + *leftmost = rb_next(node); + if (!tmp) { /* * Case 1: node to erase has no more than 1 child (easy!) @@ -256,9 +271,21 @@ static __always_inline void rb_erase_augmented(struct rb_node *node, struct rb_root *root, const struct rb_augment_callbacks *augment) { - struct rb_node *rebalance = __rb_erase_augmented(node, root, augment); + struct rb_node *rebalance = __rb_erase_augmented(node, root, + NULL, augment); if (rebalance) __rb_erase_color(rebalance, root, augment->rotate); } +static __always_inline void +rb_erase_augmented_cached(struct rb_node *node, struct rb_root_cached *root, + const struct rb_augment_callbacks *augment) +{ + struct rb_node *rebalance = __rb_erase_augmented(node, &root->rb_root, + &root->rb_leftmost, + augment); + if (rebalance) + __rb_erase_color(rebalance, &root->rb_root, augment->rotate); +} + #endif /* _LINUX_RBTREE_AUGMENTED_H */ diff --git a/include/linux/rhashtable.h b/include/linux/rhashtable.h index 7d56a7ea2b2e..361c08e35dbc 100644 --- a/include/linux/rhashtable.h +++ b/include/linux/rhashtable.h @@ -127,7 +127,7 @@ struct rhashtable; * @head_offset: Offset of rhash_head in struct to be hashed * @max_size: Maximum size while expanding * @min_size: Minimum size while shrinking - * @locks_mul: Number of bucket locks to allocate per cpu (default: 128) + * @locks_mul: Number of bucket locks to allocate per cpu (default: 32) * @automatic_shrinking: Enable automatic shrinking of tables * @nulls_base: Base value to generate nulls marker * @hashfn: Hash function (default: jhash2 if !(key_len % 4), or jhash) diff --git a/include/linux/rmap.h b/include/linux/rmap.h index 43ef2c30cb0f..733d3d8181e2 100644 --- a/include/linux/rmap.h +++ b/include/linux/rmap.h @@ -55,7 +55,9 @@ struct anon_vma { * is serialized by a system wide lock only visible to * mm_take_all_locks() (mm_all_locks_mutex). */ - struct rb_root rb_root; /* Interval tree of private "related" vmas */ + + /* Interval tree of private "related" vmas */ + struct rb_root_cached rb_root; }; /* @@ -93,8 +95,9 @@ enum ttu_flags { TTU_BATCH_FLUSH = 0x40, /* Batch TLB flushes where possible * and caller guarantees they will * do a final flush if necessary */ - TTU_RMAP_LOCKED = 0x80 /* do not grab rmap lock: + TTU_RMAP_LOCKED = 0x80, /* do not grab rmap lock: * caller holds it */ + TTU_SPLIT_FREEZE = 0x100, /* freeze pte under splitting thp */ }; #ifdef CONFIG_MMU diff --git a/include/linux/rtmutex.h b/include/linux/rtmutex.h index 44fd002f7cd5..53fcbe9de7fd 100644 --- a/include/linux/rtmutex.h +++ b/include/linux/rtmutex.h @@ -22,18 +22,17 @@ extern int max_lock_depth; /* for sysctl */ * The rt_mutex structure * * @wait_lock: spinlock to protect the structure - * @waiters: rbtree root to enqueue waiters in priority order - * @waiters_leftmost: top waiter + * @waiters: rbtree root to enqueue waiters in priority order; + * caches top-waiter (leftmost node). * @owner: the mutex owner */ struct rt_mutex { raw_spinlock_t wait_lock; - struct rb_root waiters; - struct rb_node *waiters_leftmost; + struct rb_root_cached waiters; struct task_struct *owner; #ifdef CONFIG_DEBUG_RT_MUTEXES int save_state; - const char *name, *file; + const char *name, *file; int line; void *magic; #endif @@ -84,7 +83,7 @@ do { \ #define __RT_MUTEX_INITIALIZER(mutexname) \ { .wait_lock = __RAW_SPIN_LOCK_UNLOCKED(mutexname.wait_lock) \ - , .waiters = RB_ROOT \ + , .waiters = RB_ROOT_CACHED \ , .owner = NULL \ __DEBUG_RT_MUTEX_INITIALIZER(mutexname) \ __DEP_MAP_RT_MUTEX_INITIALIZER(mutexname)} diff --git a/include/linux/sched.h b/include/linux/sched.h index 68b38335d33c..92fb8dd5a9e4 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -812,8 +812,7 @@ struct task_struct { #ifdef CONFIG_RT_MUTEXES /* PI waiters blocked on a rt_mutex held by this task: */ - struct rb_root pi_waiters; - struct rb_node *pi_waiters_leftmost; + struct rb_root_cached pi_waiters; /* Updated under owner's pi_lock and rq lock */ struct task_struct *pi_top_task; /* Deadlock detection and priority inheritance handling: */ diff --git a/include/linux/string.h b/include/linux/string.h index a467e617eeb0..c8bdafffd2f0 100644 --- a/include/linux/string.h +++ b/include/linux/string.h @@ -99,6 +99,36 @@ extern __kernel_size_t strcspn(const char *,const char *); #ifndef __HAVE_ARCH_MEMSET extern void * memset(void *,int,__kernel_size_t); #endif + +#ifndef __HAVE_ARCH_MEMSET16 +extern void *memset16(uint16_t *, uint16_t, __kernel_size_t); +#endif + +#ifndef __HAVE_ARCH_MEMSET32 +extern void *memset32(uint32_t *, uint32_t, __kernel_size_t); +#endif + +#ifndef __HAVE_ARCH_MEMSET64 +extern void *memset64(uint64_t *, uint64_t, __kernel_size_t); +#endif + +static inline void *memset_l(unsigned long *p, unsigned long v, + __kernel_size_t n) +{ + if (BITS_PER_LONG == 32) + return memset32((uint32_t *)p, v, n); + else + return memset64((uint64_t *)p, v, n); +} + +static inline void *memset_p(void **p, void *v, __kernel_size_t n) +{ + if (BITS_PER_LONG == 32) + return memset32((uint32_t *)p, (uintptr_t)v, n); + else + return memset64((uint64_t *)p, (uintptr_t)v, n); +} + #ifndef __HAVE_ARCH_MEMCPY extern void * memcpy(void *,const void *,__kernel_size_t); #endif diff --git a/include/linux/swap.h b/include/linux/swap.h index 8bf3487fb204..8a807292037f 100644 --- a/include/linux/swap.h +++ b/include/linux/swap.h @@ -51,6 +51,23 @@ static inline int current_is_kswapd(void) */ /* + * Unaddressable device memory support. See include/linux/hmm.h and + * Documentation/vm/hmm.txt. Short description is we need struct pages for + * device memory that is unaddressable (inaccessible) by CPU, so that we can + * migrate part of a process memory to device memory. + * + * When a page is migrated from CPU to device, we set the CPU page table entry + * to a special SWP_DEVICE_* entry. + */ +#ifdef CONFIG_DEVICE_PRIVATE +#define SWP_DEVICE_NUM 2 +#define SWP_DEVICE_WRITE (MAX_SWAPFILES+SWP_HWPOISON_NUM+SWP_MIGRATION_NUM) +#define SWP_DEVICE_READ (MAX_SWAPFILES+SWP_HWPOISON_NUM+SWP_MIGRATION_NUM+1) +#else +#define SWP_DEVICE_NUM 0 +#endif + +/* * NUMA node memory migration support */ #ifdef CONFIG_MIGRATION @@ -72,7 +89,8 @@ static inline int current_is_kswapd(void) #endif #define MAX_SWAPFILES \ - ((1 << MAX_SWAPFILES_SHIFT) - SWP_MIGRATION_NUM - SWP_HWPOISON_NUM) + ((1 << MAX_SWAPFILES_SHIFT) - SWP_DEVICE_NUM - \ + SWP_MIGRATION_NUM - SWP_HWPOISON_NUM) /* * Magic header for a swap area. The first part of the union is @@ -469,8 +487,8 @@ static inline void show_swap_cache_info(void) { } -#define free_swap_and_cache(swp) is_migration_entry(swp) -#define swapcache_prepare(swp) is_migration_entry(swp) +#define free_swap_and_cache(e) ({(is_migration_entry(e) || is_device_private_entry(e));}) +#define swapcache_prepare(e) ({(is_migration_entry(e) || is_device_private_entry(e));}) static inline int add_swap_count_continuation(swp_entry_t swp, gfp_t gfp_mask) { diff --git a/include/linux/swapops.h b/include/linux/swapops.h index c5ff7b217ee6..291c4b534658 100644 --- a/include/linux/swapops.h +++ b/include/linux/swapops.h @@ -100,10 +100,79 @@ static inline void *swp_to_radix_entry(swp_entry_t entry) return (void *)(value | RADIX_TREE_EXCEPTIONAL_ENTRY); } +#if IS_ENABLED(CONFIG_DEVICE_PRIVATE) +static inline swp_entry_t make_device_private_entry(struct page *page, bool write) +{ + return swp_entry(write ? SWP_DEVICE_WRITE : SWP_DEVICE_READ, + page_to_pfn(page)); +} + +static inline bool is_device_private_entry(swp_entry_t entry) +{ + int type = swp_type(entry); + return type == SWP_DEVICE_READ || type == SWP_DEVICE_WRITE; +} + +static inline void make_device_private_entry_read(swp_entry_t *entry) +{ + *entry = swp_entry(SWP_DEVICE_READ, swp_offset(*entry)); +} + +static inline bool is_write_device_private_entry(swp_entry_t entry) +{ + return unlikely(swp_type(entry) == SWP_DEVICE_WRITE); +} + +static inline struct page *device_private_entry_to_page(swp_entry_t entry) +{ + return pfn_to_page(swp_offset(entry)); +} + +int device_private_entry_fault(struct vm_area_struct *vma, + unsigned long addr, + swp_entry_t entry, + unsigned int flags, + pmd_t *pmdp); +#else /* CONFIG_DEVICE_PRIVATE */ +static inline swp_entry_t make_device_private_entry(struct page *page, bool write) +{ + return swp_entry(0, 0); +} + +static inline void make_device_private_entry_read(swp_entry_t *entry) +{ +} + +static inline bool is_device_private_entry(swp_entry_t entry) +{ + return false; +} + +static inline bool is_write_device_private_entry(swp_entry_t entry) +{ + return false; +} + +static inline struct page *device_private_entry_to_page(swp_entry_t entry) +{ + return NULL; +} + +static inline int device_private_entry_fault(struct vm_area_struct *vma, + unsigned long addr, + swp_entry_t entry, + unsigned int flags, + pmd_t *pmdp) +{ + return VM_FAULT_SIGBUS; +} +#endif /* CONFIG_DEVICE_PRIVATE */ + #ifdef CONFIG_MIGRATION static inline swp_entry_t make_migration_entry(struct page *page, int write) { - BUG_ON(!PageLocked(page)); + BUG_ON(!PageLocked(compound_head(page))); + return swp_entry(write ? SWP_MIGRATION_WRITE : SWP_MIGRATION_READ, page_to_pfn(page)); } @@ -126,7 +195,7 @@ static inline struct page *migration_entry_to_page(swp_entry_t entry) * Any use of migration entries may only occur while the * corresponding page is locked */ - BUG_ON(!PageLocked(p)); + BUG_ON(!PageLocked(compound_head(p))); return p; } @@ -148,7 +217,11 @@ static inline int is_migration_entry(swp_entry_t swp) { return 0; } -#define migration_entry_to_page(swp) NULL +static inline struct page *migration_entry_to_page(swp_entry_t entry) +{ + return NULL; +} + static inline void make_migration_entry_read(swp_entry_t *entryp) { } static inline void __migration_entry_wait(struct mm_struct *mm, pte_t *ptep, spinlock_t *ptl) { } @@ -163,6 +236,70 @@ static inline int is_write_migration_entry(swp_entry_t entry) #endif +struct page_vma_mapped_walk; + +#ifdef CONFIG_ARCH_ENABLE_THP_MIGRATION +extern void set_pmd_migration_entry(struct page_vma_mapped_walk *pvmw, + struct page *page); + +extern void remove_migration_pmd(struct page_vma_mapped_walk *pvmw, + struct page *new); + +extern void pmd_migration_entry_wait(struct mm_struct *mm, pmd_t *pmd); + +static inline swp_entry_t pmd_to_swp_entry(pmd_t pmd) +{ + swp_entry_t arch_entry; + + if (pmd_swp_soft_dirty(pmd)) + pmd = pmd_swp_clear_soft_dirty(pmd); + arch_entry = __pmd_to_swp_entry(pmd); + return swp_entry(__swp_type(arch_entry), __swp_offset(arch_entry)); +} + +static inline pmd_t swp_entry_to_pmd(swp_entry_t entry) +{ + swp_entry_t arch_entry; + + arch_entry = __swp_entry(swp_type(entry), swp_offset(entry)); + return __swp_entry_to_pmd(arch_entry); +} + +static inline int is_pmd_migration_entry(pmd_t pmd) +{ + return !pmd_present(pmd) && is_migration_entry(pmd_to_swp_entry(pmd)); +} +#else +static inline void set_pmd_migration_entry(struct page_vma_mapped_walk *pvmw, + struct page *page) +{ + BUILD_BUG(); +} + +static inline void remove_migration_pmd(struct page_vma_mapped_walk *pvmw, + struct page *new) +{ + BUILD_BUG(); +} + +static inline void pmd_migration_entry_wait(struct mm_struct *m, pmd_t *p) { } + +static inline swp_entry_t pmd_to_swp_entry(pmd_t pmd) +{ + return swp_entry(0, 0); +} + +static inline pmd_t swp_entry_to_pmd(swp_entry_t entry) +{ + return __pmd(0); +} + +static inline int is_pmd_migration_entry(pmd_t pmd) +{ + return 0; +} +#endif + #ifdef CONFIG_MEMORY_FAILURE extern atomic_long_t num_poisoned_pages __read_mostly; diff --git a/include/linux/umh.h b/include/linux/umh.h new file mode 100644 index 000000000000..244aff638220 --- /dev/null +++ b/include/linux/umh.h @@ -0,0 +1,69 @@ +#ifndef __LINUX_UMH_H__ +#define __LINUX_UMH_H__ + +#include <linux/gfp.h> +#include <linux/stddef.h> +#include <linux/errno.h> +#include <linux/compiler.h> +#include <linux/workqueue.h> +#include <linux/sysctl.h> + +struct cred; +struct file; + +#define UMH_NO_WAIT 0 /* don't wait at all */ +#define UMH_WAIT_EXEC 1 /* wait for the exec, but not the process */ +#define UMH_WAIT_PROC 2 /* wait for the process to complete */ +#define UMH_KILLABLE 4 /* wait for EXEC/PROC killable */ + +struct subprocess_info { + struct work_struct work; + struct completion *complete; + const char *path; + char **argv; + char **envp; + int wait; + int retval; + int (*init)(struct subprocess_info *info, struct cred *new); + void (*cleanup)(struct subprocess_info *info); + void *data; +} __randomize_layout; + +extern int +call_usermodehelper(const char *path, char **argv, char **envp, int wait); + +extern struct subprocess_info * +call_usermodehelper_setup(const char *path, char **argv, char **envp, + gfp_t gfp_mask, + int (*init)(struct subprocess_info *info, struct cred *new), + void (*cleanup)(struct subprocess_info *), void *data); + +extern int +call_usermodehelper_exec(struct subprocess_info *info, int wait); + +extern struct ctl_table usermodehelper_table[]; + +enum umh_disable_depth { + UMH_ENABLED = 0, + UMH_FREEZING, + UMH_DISABLED, +}; + +extern int __usermodehelper_disable(enum umh_disable_depth depth); +extern void __usermodehelper_set_disable_depth(enum umh_disable_depth depth); + +static inline int usermodehelper_disable(void) +{ + return __usermodehelper_disable(UMH_DISABLED); +} + +static inline void usermodehelper_enable(void) +{ + __usermodehelper_set_disable_depth(UMH_ENABLED); +} + +extern int usermodehelper_read_trylock(void); +extern long usermodehelper_read_lock_wait(long timeout); +extern void usermodehelper_read_unlock(void); + +#endif /* __LINUX_UMH_H__ */ diff --git a/include/linux/vmstat.h b/include/linux/vmstat.h index 97e11ab573f0..ade7cb5f1359 100644 --- a/include/linux/vmstat.h +++ b/include/linux/vmstat.h @@ -107,8 +107,37 @@ static inline void vm_events_fold_cpu(int cpu) * Zone and node-based page accounting with per cpu differentials. */ extern atomic_long_t vm_zone_stat[NR_VM_ZONE_STAT_ITEMS]; +extern atomic_long_t vm_numa_stat[NR_VM_NUMA_STAT_ITEMS]; extern atomic_long_t vm_node_stat[NR_VM_NODE_STAT_ITEMS]; +#ifdef CONFIG_NUMA +static inline void zone_numa_state_add(long x, struct zone *zone, + enum numa_stat_item item) +{ + atomic_long_add(x, &zone->vm_numa_stat[item]); + atomic_long_add(x, &vm_numa_stat[item]); +} + +static inline unsigned long global_numa_state(enum numa_stat_item item) +{ + long x = atomic_long_read(&vm_numa_stat[item]); + + return x; +} + +static inline unsigned long zone_numa_state_snapshot(struct zone *zone, + enum numa_stat_item item) +{ + long x = atomic_long_read(&zone->vm_numa_stat[item]); + int cpu; + + for_each_online_cpu(cpu) + x += per_cpu_ptr(zone->pageset, cpu)->vm_numa_stat_diff[item]; + + return x; +} +#endif /* CONFIG_NUMA */ + static inline void zone_page_state_add(long x, struct zone *zone, enum zone_stat_item item) { @@ -194,8 +223,10 @@ static inline unsigned long node_page_state_snapshot(pg_data_t *pgdat, #ifdef CONFIG_NUMA +extern void __inc_numa_state(struct zone *zone, enum numa_stat_item item); extern unsigned long sum_zone_node_page_state(int node, - enum zone_stat_item item); + enum zone_stat_item item); +extern unsigned long sum_zone_numa_state(int node, enum numa_stat_item item); extern unsigned long node_page_state(struct pglist_data *pgdat, enum node_stat_item item); #else diff --git a/include/linux/vt_buffer.h b/include/linux/vt_buffer.h index f38c10ba3ff5..30b6e0d2a942 100644 --- a/include/linux/vt_buffer.h +++ b/include/linux/vt_buffer.h @@ -13,6 +13,7 @@ #ifndef _LINUX_VT_BUFFER_H_ #define _LINUX_VT_BUFFER_H_ +#include <linux/string.h> #if defined(CONFIG_VGA_CONSOLE) || defined(CONFIG_MDA_CONSOLE) #include <asm/vga.h> @@ -26,24 +27,33 @@ #ifndef VT_BUF_HAVE_MEMSETW static inline void scr_memsetw(u16 *s, u16 c, unsigned int count) { +#ifdef VT_BUF_HAVE_RW count /= 2; while (count--) scr_writew(c, s++); +#else + memset16(s, c, count / 2); +#endif } #endif #ifndef VT_BUF_HAVE_MEMCPYW static inline void scr_memcpyw(u16 *d, const u16 *s, unsigned int count) { +#ifdef VT_BUF_HAVE_RW count /= 2; while (count--) scr_writew(scr_readw(s++), d++); +#else + memcpy(d, s, count); +#endif } #endif #ifndef VT_BUF_HAVE_MEMMOVEW static inline void scr_memmovew(u16 *d, const u16 *s, unsigned int count) { +#ifdef VT_BUF_HAVE_RW if (d < s) scr_memcpyw(d, s, count); else { @@ -53,6 +63,9 @@ static inline void scr_memmovew(u16 *d, const u16 *s, unsigned int count) while (count--) scr_writew(scr_readw(--s), --d); } +#else + memmove(d, s, count); +#endif } #endif diff --git a/include/rdma/ib_umem_odp.h b/include/rdma/ib_umem_odp.h index fb67554aabd6..5eb7f5bc8248 100644 --- a/include/rdma/ib_umem_odp.h +++ b/include/rdma/ib_umem_odp.h @@ -111,22 +111,25 @@ int ib_umem_odp_map_dma_pages(struct ib_umem *umem, u64 start_offset, u64 bcnt, void ib_umem_odp_unmap_dma_pages(struct ib_umem *umem, u64 start_offset, u64 bound); -void rbt_ib_umem_insert(struct umem_odp_node *node, struct rb_root *root); -void rbt_ib_umem_remove(struct umem_odp_node *node, struct rb_root *root); +void rbt_ib_umem_insert(struct umem_odp_node *node, + struct rb_root_cached *root); +void rbt_ib_umem_remove(struct umem_odp_node *node, + struct rb_root_cached *root); typedef int (*umem_call_back)(struct ib_umem *item, u64 start, u64 end, void *cookie); /* * Call the callback on each ib_umem in the range. Returns the logical or of * the return values of the functions called. */ -int rbt_ib_umem_for_each_in_range(struct rb_root *root, u64 start, u64 end, +int rbt_ib_umem_for_each_in_range(struct rb_root_cached *root, + u64 start, u64 end, umem_call_back cb, void *cookie); /* * Find first region intersecting with address range. * Return NULL if not found */ -struct ib_umem_odp *rbt_ib_umem_lookup(struct rb_root *root, +struct ib_umem_odp *rbt_ib_umem_lookup(struct rb_root_cached *root, u64 addr, u64 length); static inline int ib_umem_mmu_notifier_retry(struct ib_umem *item, diff --git a/include/rdma/ib_verbs.h b/include/rdma/ib_verbs.h index e6df68048517..bdb1279a415b 100644 --- a/include/rdma/ib_verbs.h +++ b/include/rdma/ib_verbs.h @@ -1457,7 +1457,7 @@ struct ib_ucontext { struct pid *tgid; #ifdef CONFIG_INFINIBAND_ON_DEMAND_PAGING - struct rb_root umem_tree; + struct rb_root_cached umem_tree; /* * Protects .umem_rbroot and tree, as well as odp_mrs_count and * mmu notifiers registration. diff --git a/include/uapi/linux/auto_dev-ioctl.h b/include/uapi/linux/auto_dev-ioctl.h index 744b3d060968..5558db8e6646 100644 --- a/include/uapi/linux/auto_dev-ioctl.h +++ b/include/uapi/linux/auto_dev-ioctl.h @@ -16,7 +16,7 @@ #define AUTOFS_DEVICE_NAME "autofs" #define AUTOFS_DEV_IOCTL_VERSION_MAJOR 1 -#define AUTOFS_DEV_IOCTL_VERSION_MINOR 0 +#define AUTOFS_DEV_IOCTL_VERSION_MINOR 1 #define AUTOFS_DEV_IOCTL_SIZE sizeof(struct autofs_dev_ioctl) diff --git a/include/uapi/linux/auto_fs4.h b/include/uapi/linux/auto_fs4.h index 7c6da423d54e..9453e9a07c9d 100644 --- a/include/uapi/linux/auto_fs4.h +++ b/include/uapi/linux/auto_fs4.h @@ -155,8 +155,6 @@ enum { }; #define AUTOFS_IOC_EXPIRE_MULTI _IOW(AUTOFS_IOCTL, AUTOFS_IOC_EXPIRE_MULTI_CMD, int) -#define AUTOFS_IOC_EXPIRE_INDIRECT AUTOFS_IOC_EXPIRE_MULTI -#define AUTOFS_IOC_EXPIRE_DIRECT AUTOFS_IOC_EXPIRE_MULTI #define AUTOFS_IOC_PROTOSUBVER _IOR(AUTOFS_IOCTL, AUTOFS_IOC_PROTOSUBVER_CMD, int) #define AUTOFS_IOC_ASKUMOUNT _IOR(AUTOFS_IOCTL, AUTOFS_IOC_ASKUMOUNT_CMD, int) diff --git a/include/uapi/linux/pps.h b/include/uapi/linux/pps.h index c1cb3825a8bc..c29d6b791c08 100644 --- a/include/uapi/linux/pps.h +++ b/include/uapi/linux/pps.h @@ -95,8 +95,8 @@ struct pps_kparams { #define PPS_CAPTURECLEAR 0x02 /* capture clear events */ #define PPS_CAPTUREBOTH 0x03 /* capture assert and clear events */ -#define PPS_OFFSETASSERT 0x10 /* apply compensation for assert ev. */ -#define PPS_OFFSETCLEAR 0x20 /* apply compensation for clear ev. */ +#define PPS_OFFSETASSERT 0x10 /* apply compensation for assert event */ +#define PPS_OFFSETCLEAR 0x20 /* apply compensation for clear event */ #define PPS_CANWAIT 0x100 /* can we wait for an event? */ #define PPS_CANPOLL 0x200 /* bit reserved for future use */ |