diff options
| author | Thomas Gleixner <tglx@linutronix.de> | 2017-03-17 22:34:30 +0300 |
|---|---|---|
| committer | Thomas Gleixner <tglx@linutronix.de> | 2017-03-17 22:34:30 +0300 |
| commit | 79a21d572cf66968a2272fdf9711f835518256d9 (patch) | |
| tree | 5fe3e4692fb8375faf8e1aeea1c2eae38c342250 /include | |
| parent | d1eb98143c56f24fef125f5bbed49ae0b52fb7d6 (diff) | |
| parent | 822f5845f710e57d7e2df1fd1ee00d6e19d334fe (diff) | |
| download | linux-79a21d572cf66968a2272fdf9711f835518256d9.tar.xz | |
Merge tag 'efi-urgent' of git://git.kernel.org/pub/scm/linux/kernel/git/efi/efi into efi/urgent
Pull a single UEFI fix from Ard:
- Reduce the severity of the notice that appears when the ESRT table points
to memory that is not covered by the memory map. It is scaring our users
and interfering with their nice splash screens. Note that the ESRT may still
be perfectly usable, and is currently (to my knowledge) not widely used to
begin with.
Diffstat (limited to 'include')
140 files changed, 4151 insertions, 3243 deletions
diff --git a/include/asm-generic/4level-fixup.h b/include/asm-generic/4level-fixup.h index 5bdab6bffd23..928fd66b1271 100644 --- a/include/asm-generic/4level-fixup.h +++ b/include/asm-generic/4level-fixup.h @@ -15,7 +15,6 @@ ((unlikely(pgd_none(*(pud))) && __pmd_alloc(mm, pud, address))? \ NULL: pmd_offset(pud, address)) -#define pud_alloc(mm, pgd, address) (pgd) #define pud_offset(pgd, start) (pgd) #define pud_none(pud) 0 #define pud_bad(pud) 0 @@ -35,4 +34,6 @@ #undef pud_addr_end #define pud_addr_end(addr, end) (end) +#include <asm-generic/5level-fixup.h> + #endif diff --git a/include/asm-generic/5level-fixup.h b/include/asm-generic/5level-fixup.h new file mode 100644 index 000000000000..b5ca82dc4175 --- /dev/null +++ b/include/asm-generic/5level-fixup.h @@ -0,0 +1,41 @@ +#ifndef _5LEVEL_FIXUP_H +#define _5LEVEL_FIXUP_H + +#define __ARCH_HAS_5LEVEL_HACK +#define __PAGETABLE_P4D_FOLDED + +#define P4D_SHIFT PGDIR_SHIFT +#define P4D_SIZE PGDIR_SIZE +#define P4D_MASK PGDIR_MASK +#define PTRS_PER_P4D 1 + +#define p4d_t pgd_t + +#define pud_alloc(mm, p4d, address) \ + ((unlikely(pgd_none(*(p4d))) && __pud_alloc(mm, p4d, address)) ? \ + NULL : pud_offset(p4d, address)) + +#define p4d_alloc(mm, pgd, address) (pgd) +#define p4d_offset(pgd, start) (pgd) +#define p4d_none(p4d) 0 +#define p4d_bad(p4d) 0 +#define p4d_present(p4d) 1 +#define p4d_ERROR(p4d) do { } while (0) +#define p4d_clear(p4d) pgd_clear(p4d) +#define p4d_val(p4d) pgd_val(p4d) +#define p4d_populate(mm, p4d, pud) pgd_populate(mm, p4d, pud) +#define p4d_page(p4d) pgd_page(p4d) +#define p4d_page_vaddr(p4d) pgd_page_vaddr(p4d) + +#define __p4d(x) __pgd(x) +#define set_p4d(p4dp, p4d) set_pgd(p4dp, p4d) + +#undef p4d_free_tlb +#define p4d_free_tlb(tlb, x, addr) do { } while (0) +#define p4d_free(mm, x) do { } while (0) +#define __p4d_free_tlb(tlb, x, addr) do { } while (0) + +#undef p4d_addr_end +#define p4d_addr_end(addr, end) (end) + +#endif diff --git a/include/asm-generic/pgtable-nop4d-hack.h b/include/asm-generic/pgtable-nop4d-hack.h new file mode 100644 index 000000000000..752fb7511750 --- /dev/null +++ b/include/asm-generic/pgtable-nop4d-hack.h @@ -0,0 +1,62 @@ +#ifndef _PGTABLE_NOP4D_HACK_H +#define _PGTABLE_NOP4D_HACK_H + +#ifndef __ASSEMBLY__ +#include <asm-generic/5level-fixup.h> + +#define __PAGETABLE_PUD_FOLDED + +/* + * Having the pud type consist of a pgd gets the size right, and allows + * us to conceptually access the pgd entry that this pud is folded into + * without casting. + */ +typedef struct { pgd_t pgd; } pud_t; + +#define PUD_SHIFT PGDIR_SHIFT +#define PTRS_PER_PUD 1 +#define PUD_SIZE (1UL << PUD_SHIFT) +#define PUD_MASK (~(PUD_SIZE-1)) + +/* + * The "pgd_xxx()" functions here are trivial for a folded two-level + * setup: the pud is never bad, and a pud always exists (as it's folded + * into the pgd entry) + */ +static inline int pgd_none(pgd_t pgd) { return 0; } +static inline int pgd_bad(pgd_t pgd) { return 0; } +static inline int pgd_present(pgd_t pgd) { return 1; } +static inline void pgd_clear(pgd_t *pgd) { } +#define pud_ERROR(pud) (pgd_ERROR((pud).pgd)) + +#define pgd_populate(mm, pgd, pud) do { } while (0) +/* + * (puds are folded into pgds so this doesn't get actually called, + * but the define is needed for a generic inline function.) + */ +#define set_pgd(pgdptr, pgdval) set_pud((pud_t *)(pgdptr), (pud_t) { pgdval }) + +static inline pud_t *pud_offset(pgd_t *pgd, unsigned long address) +{ + return (pud_t *)pgd; +} + +#define pud_val(x) (pgd_val((x).pgd)) +#define __pud(x) ((pud_t) { __pgd(x) }) + +#define pgd_page(pgd) (pud_page((pud_t){ pgd })) +#define pgd_page_vaddr(pgd) (pud_page_vaddr((pud_t){ pgd })) + +/* + * allocating and freeing a pud is trivial: the 1-entry pud is + * inside the pgd, so has no extra memory associated with it. + */ +#define pud_alloc_one(mm, address) NULL +#define pud_free(mm, x) do { } while (0) +#define __pud_free_tlb(tlb, x, a) do { } while (0) + +#undef pud_addr_end +#define pud_addr_end(addr, end) (end) + +#endif /* __ASSEMBLY__ */ +#endif /* _PGTABLE_NOP4D_HACK_H */ diff --git a/include/asm-generic/pgtable-nop4d.h b/include/asm-generic/pgtable-nop4d.h new file mode 100644 index 000000000000..de364ecb8df6 --- /dev/null +++ b/include/asm-generic/pgtable-nop4d.h @@ -0,0 +1,56 @@ +#ifndef _PGTABLE_NOP4D_H +#define _PGTABLE_NOP4D_H + +#ifndef __ASSEMBLY__ + +#define __PAGETABLE_P4D_FOLDED + +typedef struct { pgd_t pgd; } p4d_t; + +#define P4D_SHIFT PGDIR_SHIFT +#define PTRS_PER_P4D 1 +#define P4D_SIZE (1UL << P4D_SHIFT) +#define P4D_MASK (~(P4D_SIZE-1)) + +/* + * The "pgd_xxx()" functions here are trivial for a folded two-level + * setup: the p4d is never bad, and a p4d always exists (as it's folded + * into the pgd entry) + */ +static inline int pgd_none(pgd_t pgd) { return 0; } +static inline int pgd_bad(pgd_t pgd) { return 0; } +static inline int pgd_present(pgd_t pgd) { return 1; } +static inline void pgd_clear(pgd_t *pgd) { } +#define p4d_ERROR(p4d) (pgd_ERROR((p4d).pgd)) + +#define pgd_populate(mm, pgd, p4d) do { } while (0) +/* + * (p4ds are folded into pgds so this doesn't get actually called, + * but the define is needed for a generic inline function.) + */ +#define set_pgd(pgdptr, pgdval) set_p4d((p4d_t *)(pgdptr), (p4d_t) { pgdval }) + +static inline p4d_t *p4d_offset(pgd_t *pgd, unsigned long address) +{ + return (p4d_t *)pgd; +} + +#define p4d_val(x) (pgd_val((x).pgd)) +#define __p4d(x) ((p4d_t) { __pgd(x) }) + +#define pgd_page(pgd) (p4d_page((p4d_t){ pgd })) +#define pgd_page_vaddr(pgd) (p4d_page_vaddr((p4d_t){ pgd })) + +/* + * allocating and freeing a p4d is trivial: the 1-entry p4d is + * inside the pgd, so has no extra memory associated with it. + */ +#define p4d_alloc_one(mm, address) NULL +#define p4d_free(mm, x) do { } while (0) +#define __p4d_free_tlb(tlb, x, a) do { } while (0) + +#undef p4d_addr_end +#define p4d_addr_end(addr, end) (end) + +#endif /* __ASSEMBLY__ */ +#endif /* _PGTABLE_NOP4D_H */ diff --git a/include/asm-generic/pgtable-nopud.h b/include/asm-generic/pgtable-nopud.h index 810431d8351b..c2b9b96d6268 100644 --- a/include/asm-generic/pgtable-nopud.h +++ b/include/asm-generic/pgtable-nopud.h @@ -3,52 +3,57 @@ #ifndef __ASSEMBLY__ +#ifdef __ARCH_USE_5LEVEL_HACK +#include <asm-generic/pgtable-nop4d-hack.h> +#else +#include <asm-generic/pgtable-nop4d.h> + #define __PAGETABLE_PUD_FOLDED /* - * Having the pud type consist of a pgd gets the size right, and allows - * us to conceptually access the pgd entry that this pud is folded into + * Having the pud type consist of a p4d gets the size right, and allows + * us to conceptually access the p4d entry that this pud is folded into * without casting. */ -typedef struct { pgd_t pgd; } pud_t; +typedef struct { p4d_t p4d; } pud_t; -#define PUD_SHIFT PGDIR_SHIFT +#define PUD_SHIFT P4D_SHIFT #define PTRS_PER_PUD 1 #define PUD_SIZE (1UL << PUD_SHIFT) #define PUD_MASK (~(PUD_SIZE-1)) /* - * The "pgd_xxx()" functions here are trivial for a folded two-level + * The "p4d_xxx()" functions here are trivial for a folded two-level * setup: the pud is never bad, and a pud always exists (as it's folded - * into the pgd entry) + * into the p4d entry) */ -static inline int pgd_none(pgd_t pgd) { return 0; } -static inline int pgd_bad(pgd_t pgd) { return 0; } -static inline int pgd_present(pgd_t pgd) { return 1; } -static inline void pgd_clear(pgd_t *pgd) { } -#define pud_ERROR(pud) (pgd_ERROR((pud).pgd)) +static inline int p4d_none(p4d_t p4d) { return 0; } +static inline int p4d_bad(p4d_t p4d) { return 0; } +static inline int p4d_present(p4d_t p4d) { return 1; } +static inline void p4d_clear(p4d_t *p4d) { } +#define pud_ERROR(pud) (p4d_ERROR((pud).p4d)) -#define pgd_populate(mm, pgd, pud) do { } while (0) +#define p4d_populate(mm, p4d, pud) do { } while (0) /* - * (puds are folded into pgds so this doesn't get actually called, + * (puds are folded into p4ds so this doesn't get actually called, * but the define is needed for a generic inline function.) */ -#define set_pgd(pgdptr, pgdval) set_pud((pud_t *)(pgdptr), (pud_t) { pgdval }) +#define set_p4d(p4dptr, p4dval) set_pud((pud_t *)(p4dptr), (pud_t) { p4dval }) -static inline pud_t * pud_offset(pgd_t * pgd, unsigned long address) +static inline pud_t *pud_offset(p4d_t *p4d, unsigned long address) { - return (pud_t *)pgd; + return (pud_t *)p4d; } -#define pud_val(x) (pgd_val((x).pgd)) -#define __pud(x) ((pud_t) { __pgd(x) } ) +#define pud_val(x) (p4d_val((x).p4d)) +#define __pud(x) ((pud_t) { __p4d(x) }) -#define pgd_page(pgd) (pud_page((pud_t){ pgd })) -#define pgd_page_vaddr(pgd) (pud_page_vaddr((pud_t){ pgd })) +#define p4d_page(p4d) (pud_page((pud_t){ p4d })) +#define p4d_page_vaddr(p4d) (pud_page_vaddr((pud_t){ p4d })) /* * allocating and freeing a pud is trivial: the 1-entry pud is - * inside the pgd, so has no extra memory associated with it. + * inside the p4d, so has no extra memory associated with it. */ #define pud_alloc_one(mm, address) NULL #define pud_free(mm, x) do { } while (0) @@ -58,4 +63,5 @@ static inline pud_t * pud_offset(pgd_t * pgd, unsigned long address) #define pud_addr_end(addr, end) (end) #endif /* __ASSEMBLY__ */ +#endif /* !__ARCH_USE_5LEVEL_HACK */ #endif /* _PGTABLE_NOPUD_H */ diff --git a/include/asm-generic/pgtable.h b/include/asm-generic/pgtable.h index f4ca23b158b3..1fad160f35de 100644 --- a/include/asm-generic/pgtable.h +++ b/include/asm-generic/pgtable.h @@ -10,9 +10,9 @@ #include <linux/bug.h> #include <linux/errno.h> -#if 4 - defined(__PAGETABLE_PUD_FOLDED) - defined(__PAGETABLE_PMD_FOLDED) != \ - CONFIG_PGTABLE_LEVELS -#error CONFIG_PGTABLE_LEVELS is not consistent with __PAGETABLE_{PUD,PMD}_FOLDED +#if 5 - defined(__PAGETABLE_P4D_FOLDED) - defined(__PAGETABLE_PUD_FOLDED) - \ + defined(__PAGETABLE_PMD_FOLDED) != CONFIG_PGTABLE_LEVELS +#error CONFIG_PGTABLE_LEVELS is not consistent with __PAGETABLE_{P4D,PUD,PMD}_FOLDED #endif /* @@ -424,6 +424,13 @@ static inline pgprot_t pgprot_modify(pgprot_t oldprot, pgprot_t newprot) (__boundary - 1 < (end) - 1)? __boundary: (end); \ }) +#ifndef p4d_addr_end +#define p4d_addr_end(addr, end) \ +({ unsigned long __boundary = ((addr) + P4D_SIZE) & P4D_MASK; \ + (__boundary - 1 < (end) - 1)? __boundary: (end); \ +}) +#endif + #ifndef pud_addr_end #define pud_addr_end(addr, end) \ ({ unsigned long __boundary = ((addr) + PUD_SIZE) & PUD_MASK; \ @@ -444,6 +451,7 @@ static inline pgprot_t pgprot_modify(pgprot_t oldprot, pgprot_t newprot) * Do the tests inline, but report and clear the bad entry in mm/memory.c. */ void pgd_clear_bad(pgd_t *); +void p4d_clear_bad(p4d_t *); void pud_clear_bad(pud_t *); void pmd_clear_bad(pmd_t *); @@ -458,6 +466,17 @@ static inline int pgd_none_or_clear_bad(pgd_t *pgd) return 0; } +static inline int p4d_none_or_clear_bad(p4d_t *p4d) +{ + if (p4d_none(*p4d)) + return 1; + if (unlikely(p4d_bad(*p4d))) { + p4d_clear_bad(p4d); + return 1; + } + return 0; +} + static inline int pud_none_or_clear_bad(pud_t *pud) { if (pud_none(*pud)) @@ -844,11 +863,30 @@ static inline int pmd_protnone(pmd_t pmd) #endif /* CONFIG_MMU */ #ifdef CONFIG_HAVE_ARCH_HUGE_VMAP + +#ifndef __PAGETABLE_P4D_FOLDED +int p4d_set_huge(p4d_t *p4d, phys_addr_t addr, pgprot_t prot); +int p4d_clear_huge(p4d_t *p4d); +#else +static inline int p4d_set_huge(p4d_t *p4d, phys_addr_t addr, pgprot_t prot) +{ + return 0; +} +static inline int p4d_clear_huge(p4d_t *p4d) +{ + return 0; +} +#endif /* !__PAGETABLE_P4D_FOLDED */ + int pud_set_huge(pud_t *pud, phys_addr_t addr, pgprot_t prot); int pmd_set_huge(pmd_t *pmd, phys_addr_t addr, pgprot_t prot); int pud_clear_huge(pud_t *pud); int pmd_clear_huge(pmd_t *pmd); #else /* !CONFIG_HAVE_ARCH_HUGE_VMAP */ +static inline int p4d_set_huge(p4d_t *p4d, phys_addr_t addr, pgprot_t prot) +{ + return 0; +} static inline int pud_set_huge(pud_t *pud, phys_addr_t addr, pgprot_t prot) { return 0; @@ -857,6 +895,10 @@ static inline int pmd_set_huge(pmd_t *pmd, phys_addr_t addr, pgprot_t prot) { return 0; } +static inline int p4d_clear_huge(p4d_t *p4d) +{ + return 0; +} static inline int pud_clear_huge(pud_t *pud) { return 0; diff --git a/include/asm-generic/tlb.h b/include/asm-generic/tlb.h index 4329bc6ef04b..8afa4335e5b2 100644 --- a/include/asm-generic/tlb.h +++ b/include/asm-generic/tlb.h @@ -270,6 +270,12 @@ static inline void tlb_remove_check_page_size_change(struct mmu_gather *tlb, __pte_free_tlb(tlb, ptep, address); \ } while (0) +#define pmd_free_tlb(tlb, pmdp, address) \ + do { \ + __tlb_adjust_range(tlb, address, PAGE_SIZE); \ + __pmd_free_tlb(tlb, pmdp, address); \ + } while (0) + #ifndef __ARCH_HAS_4LEVEL_HACK #define pud_free_tlb(tlb, pudp, address) \ do { \ @@ -278,11 +284,13 @@ static inline void tlb_remove_check_page_size_change(struct mmu_gather *tlb, } while (0) #endif -#define pmd_free_tlb(tlb, pmdp, address) \ +#ifndef __ARCH_HAS_5LEVEL_HACK +#define p4d_free_tlb(tlb, pudp, address) \ do { \ - __tlb_adjust_range(tlb, address, PAGE_SIZE); \ - __pmd_free_tlb(tlb, pmdp, address); \ + __tlb_adjust_range(tlb, address, PAGE_SIZE); \ + __p4d_free_tlb(tlb, pudp, address); \ } while (0) +#endif #define tlb_migrate_finish(mm) do {} while (0) diff --git a/include/crypto/algapi.h b/include/crypto/algapi.h index ebe4ded0c55d..436c4c2683c7 100644 --- a/include/crypto/algapi.h +++ b/include/crypto/algapi.h @@ -360,13 +360,18 @@ static inline struct crypto_alg *crypto_get_attr_alg(struct rtattr **tb, return crypto_attr_alg(tb[1], type, mask); } +static inline int crypto_requires_off(u32 type, u32 mask, u32 off) +{ + return (type ^ off) & mask & off; +} + /* * Returns CRYPTO_ALG_ASYNC if type/mask requires the use of sync algorithms. * Otherwise returns zero. */ static inline int crypto_requires_sync(u32 type, u32 mask) { - return (type ^ CRYPTO_ALG_ASYNC) & mask & CRYPTO_ALG_ASYNC; + return crypto_requires_off(type, mask, CRYPTO_ALG_ASYNC); } noinline unsigned long __crypto_memneq(const void *a, const void *b, size_t size); diff --git a/include/drm/drm_mm.h b/include/drm/drm_mm.h index d81b0ba9921f..2ef16bf25826 100644 --- a/include/drm/drm_mm.h +++ b/include/drm/drm_mm.h @@ -40,6 +40,7 @@ #include <linux/bug.h> #include <linux/rbtree.h> #include <linux/kernel.h> +#include <linux/mm_types.h> #include <linux/list.h> #include <linux/spinlock.h> #ifdef CONFIG_DRM_DEBUG_MM diff --git a/include/drm/drm_os_linux.h b/include/drm/drm_os_linux.h index 86ab99bc0ac5..35e1482ba8a1 100644 --- a/include/drm/drm_os_linux.h +++ b/include/drm/drm_os_linux.h @@ -4,6 +4,7 @@ */ #include <linux/interrupt.h> /* For task queue support */ +#include <linux/sched/signal.h> #include <linux/delay.h> #ifndef readq diff --git a/include/dt-bindings/clock/gxbb-clkc.h b/include/dt-bindings/clock/gxbb-clkc.h index baade6f429d0..692846c7941b 100644 --- a/include/dt-bindings/clock/gxbb-clkc.h +++ b/include/dt-bindings/clock/gxbb-clkc.h @@ -14,15 +14,21 @@ #define CLKID_MPLL2 15 #define CLKID_SPI 34 #define CLKID_I2C 22 +#define CLKID_SAR_ADC 23 #define CLKID_ETH 36 #define CLKID_USB0 50 #define CLKID_USB1 51 #define CLKID_USB 55 +#define CLKID_HDMI_PCLK 63 #define CLKID_USB1_DDR_BRIDGE 64 #define CLKID_USB0_DDR_BRIDGE 65 +#define CLKID_SANA 69 +#define CLKID_GCLK_VENCI_INT0 77 #define CLKID_AO_I2C 93 #define CLKID_SD_EMMC_A 94 #define CLKID_SD_EMMC_B 95 #define CLKID_SD_EMMC_C 96 +#define CLKID_SAR_ADC_CLK 97 +#define CLKID_SAR_ADC_SEL 98 #endif /* __GXBB_CLKC_H */ diff --git a/include/dt-bindings/pinctrl/samsung.h b/include/dt-bindings/pinctrl/samsung.h index e0ebb20ffdd3..b7aa3646208b 100644 --- a/include/dt-bindings/pinctrl/samsung.h +++ b/include/dt-bindings/pinctrl/samsung.h @@ -68,4 +68,12 @@ #define EXYNOS_PIN_FUNC_6 6 #define EXYNOS_PIN_FUNC_F 0xf +/* Drive strengths for Exynos7 FSYS1 block */ +#define EXYNOS7_FSYS1_PIN_DRV_LV1 0 +#define EXYNOS7_FSYS1_PIN_DRV_LV2 4 +#define EXYNOS7_FSYS1_PIN_DRV_LV3 2 +#define EXYNOS7_FSYS1_PIN_DRV_LV4 6 +#define EXYNOS7_FSYS1_PIN_DRV_LV5 1 +#define EXYNOS7_FSYS1_PIN_DRV_LV6 5 + #endif /* __DT_BINDINGS_PINCTRL_SAMSUNG_H__ */ diff --git a/include/dt-bindings/sound/cs42l42.h b/include/dt-bindings/sound/cs42l42.h index 399a123aed58..db69d84ed7d1 100644 --- a/include/dt-bindings/sound/cs42l42.h +++ b/include/dt-bindings/sound/cs42l42.h @@ -20,7 +20,7 @@ #define CS42L42_HPOUT_LOAD_1NF 0 #define CS42L42_HPOUT_LOAD_10NF 1 -/* HPOUT Clamp to GND Overide */ +/* HPOUT Clamp to GND Override */ #define CS42L42_HPOUT_CLAMP_EN 0 #define CS42L42_HPOUT_CLAMP_DIS 1 diff --git a/include/keys/user-type.h b/include/keys/user-type.h index c56fef40f53e..e098cbe27db5 100644 --- a/include/keys/user-type.h +++ b/include/keys/user-type.h @@ -48,9 +48,14 @@ extern void user_describe(const struct key *user, struct seq_file *m); extern long user_read(const struct key *key, char __user *buffer, size_t buflen); -static inline const struct user_key_payload *user_key_payload(const struct key *key) +static inline const struct user_key_payload *user_key_payload_rcu(const struct key *key) { - return (struct user_key_payload *)rcu_dereference_key(key); + return (struct user_key_payload *)dereference_key_rcu(key); +} + +static inline struct user_key_payload *user_key_payload_locked(const struct key *key) +{ + return (struct user_key_payload *)dereference_key_locked((struct key *)key); } #endif /* CONFIG_KEYS */ diff --git a/include/linux/average.h b/include/linux/average.h index d04aa58280de..7ddaf340d2ac 100644 --- a/include/linux/average.h +++ b/include/linux/average.h @@ -1,45 +1,66 @@ #ifndef _LINUX_AVERAGE_H #define _LINUX_AVERAGE_H -/* Exponentially weighted moving average (EWMA) */ +/* + * Exponentially weighted moving average (EWMA) + * + * This implements a fixed-precision EWMA algorithm, with both the + * precision and fall-off coefficient determined at compile-time + * and built into the generated helper funtions. + * + * The first argument to the macro is the name that will be used + * for the struct and helper functions. + * + * The second argument, the precision, expresses how many bits are + * used for the fractional part of the fixed-precision values. + * + * The third argument, the weight reciprocal, determines how the + * new values will be weighed vs. the old state, new values will + * get weight 1/weight_rcp and old values 1-1/weight_rcp. Note + * that this parameter must be a power of two for efficiency. + */ -#define DECLARE_EWMA(name, _factor, _weight) \ +#define DECLARE_EWMA(name, _precision, _weight_rcp) \ struct ewma_##name { \ unsigned long internal; \ }; \ static inline void ewma_##name##_init(struct ewma_##name *e) \ { \ - BUILD_BUG_ON(!__builtin_constant_p(_factor)); \ - BUILD_BUG_ON(!__builtin_constant_p(_weight)); \ - BUILD_BUG_ON_NOT_POWER_OF_2(_factor); \ - BUILD_BUG_ON_NOT_POWER_OF_2(_weight); \ + BUILD_BUG_ON(!__builtin_constant_p(_precision)); \ + BUILD_BUG_ON(!__builtin_constant_p(_weight_rcp)); \ + /* \ + * Even if you want to feed it just 0/1 you should have \ + * some bits for the non-fractional part... \ + */ \ + BUILD_BUG_ON((_precision) > 30); \ + BUILD_BUG_ON_NOT_POWER_OF_2(_weight_rcp); \ e->internal = 0; \ } \ static inline unsigned long \ ewma_##name##_read(struct ewma_##name *e) \ { \ - BUILD_BUG_ON(!__builtin_constant_p(_factor)); \ - BUILD_BUG_ON(!__builtin_constant_p(_weight)); \ - BUILD_BUG_ON_NOT_POWER_OF_2(_factor); \ - BUILD_BUG_ON_NOT_POWER_OF_2(_weight); \ - return e->internal >> ilog2(_factor); \ + BUILD_BUG_ON(!__builtin_constant_p(_precision)); \ + BUILD_BUG_ON(!__builtin_constant_p(_weight_rcp)); \ + BUILD_BUG_ON((_precision) > 30); \ + BUILD_BUG_ON_NOT_POWER_OF_2(_weight_rcp); \ + return e->internal >> (_precision); \ } \ static inline void ewma_##name##_add(struct ewma_##name *e, \ unsigned long val) \ { \ unsigned long internal = ACCESS_ONCE(e->internal); \ - unsigned long weight = ilog2(_weight); \ - unsigned long factor = ilog2(_factor); \ + unsigned long weight_rcp = ilog2(_weight_rcp); \ + unsigned long precision = _precision; \ \ - BUILD_BUG_ON(!__builtin_constant_p(_factor)); \ - BUILD_BUG_ON(!__builtin_constant_p(_weight)); \ - BUILD_BUG_ON_NOT_POWER_OF_2(_factor); \ - BUILD_BUG_ON_NOT_POWER_OF_2(_weight); \ + BUILD_BUG_ON(!__builtin_constant_p(_precision)); \ + BUILD_BUG_ON(!__builtin_constant_p(_weight_rcp)); \ + BUILD_BUG_ON((_precision) > 30); \ + BUILD_BUG_ON_NOT_POWER_OF_2(_weight_rcp); \ \ ACCESS_ONCE(e->internal) = internal ? \ - (((internal << weight) - internal) + \ - (val << factor)) >> weight : \ - (val << factor); \ + (((internal << weight_rcp) - internal) + \ + (val << precision)) >> weight_rcp : \ + (val << precision); \ } #endif /* _LINUX_AVERAGE_H */ diff --git a/include/linux/binfmts.h b/include/linux/binfmts.h index 1303b570b18c..05488da3aee9 100644 --- a/include/linux/binfmts.h +++ b/include/linux/binfmts.h @@ -6,6 +6,8 @@ #include <asm/exec.h> #include <uapi/linux/binfmts.h> +struct filename; + #define CORENAME_MAX_SIZE 128 /* @@ -123,4 +125,12 @@ extern void install_exec_creds(struct linux_binprm *bprm); extern void set_binfmt(struct linux_binfmt *new); extern ssize_t read_code(struct file *, unsigned long, loff_t, size_t); +extern int do_execve(struct filename *, + const char __user * const __user *, + const char __user * const __user *); +extern int do_execveat(int, struct filename *, + const char __user * const __user *, + const char __user * const __user *, + int); + #endif /* _LINUX_BINFMTS_H */ diff --git a/include/linux/blk-mq-virtio.h b/include/linux/blk-mq-virtio.h new file mode 100644 index 000000000000..b1ef6e14744f --- /dev/null +++ b/include/linux/blk-mq-virtio.h @@ -0,0 +1,10 @@ +#ifndef _LINUX_BLK_MQ_VIRTIO_H +#define _LINUX_BLK_MQ_VIRTIO_H + +struct blk_mq_tag_set; +struct virtio_device; + +int blk_mq_virtio_map_queues(struct blk_mq_tag_set *set, + struct virtio_device *vdev, int first_vec); + +#endif /* _LINUX_BLK_MQ_VIRTIO_H */ diff --git a/include/linux/blk-mq.h b/include/linux/blk-mq.h index 001d30d727c5..b296a9006117 100644 --- a/include/linux/blk-mq.h +++ b/include/linux/blk-mq.h @@ -245,6 +245,9 @@ void blk_mq_tagset_busy_iter(struct blk_mq_tag_set *tagset, void blk_mq_freeze_queue(struct request_queue *q); void blk_mq_unfreeze_queue(struct request_queue *q); void blk_mq_freeze_queue_start(struct request_queue *q); +void blk_mq_freeze_queue_wait(struct request_queue *q); +int blk_mq_freeze_queue_wait_timeout(struct request_queue *q, + unsigned long timeout); int blk_mq_reinit_tagset(struct blk_mq_tag_set *set); int blk_mq_map_queues(struct blk_mq_tag_set *set); diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index aecca0e7d9ca..5a7da607ca04 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -2,6 +2,7 @@ #define _LINUX_BLKDEV_H #include <linux/sched.h> +#include <linux/sched/clock.h> #ifdef CONFIG_BLOCK @@ -434,7 +435,6 @@ struct request_queue { struct delayed_work delay_work; struct backing_dev_info *backing_dev_info; - struct disk_devt *disk_devt; /* * The queue owner gets to use this for whatever they like. diff --git a/include/linux/ceph/libceph.h b/include/linux/ceph/libceph.h index 1816c5e26581..88cd5dc8e238 100644 --- a/include/linux/ceph/libceph.h +++ b/include/linux/ceph/libceph.h @@ -48,6 +48,7 @@ struct ceph_options { unsigned long mount_timeout; /* jiffies */ unsigned long osd_idle_ttl; /* jiffies */ unsigned long osd_keepalive_timeout; /* jiffies */ + unsigned long osd_request_timeout; /* jiffies */ /* * any type that can't be simply compared or doesn't need need @@ -68,6 +69,7 @@ struct ceph_options { #define CEPH_MOUNT_TIMEOUT_DEFAULT msecs_to_jiffies(60 * 1000) #define CEPH_OSD_KEEPALIVE_DEFAULT msecs_to_jiffies(5 * 1000) #define CEPH_OSD_IDLE_TTL_DEFAULT msecs_to_jiffies(60 * 1000) +#define CEPH_OSD_REQUEST_TIMEOUT_DEFAULT 0 /* no timeout */ #define CEPH_MONC_HUNT_INTERVAL msecs_to_jiffies(3 * 1000) #define CEPH_MONC_PING_INTERVAL msecs_to_jiffies(10 * 1000) diff --git a/include/linux/ceph/osd_client.h b/include/linux/ceph/osd_client.h index 2ea0c282f3dc..c125b5d9e13c 100644 --- a/include/linux/ceph/osd_client.h +++ b/include/linux/ceph/osd_client.h @@ -189,6 +189,7 @@ struct ceph_osd_request { /* internal */ unsigned long r_stamp; /* jiffies, send or check time */ + unsigned long r_start_stamp; /* jiffies */ int r_attempts; struct ceph_eversion r_replay_version; /* aka reassert_version */ u32 r_last_force_resend; diff --git a/include/linux/cgroup-defs.h b/include/linux/cgroup-defs.h index 3c02404cfce9..6a3f850cabab 100644 --- a/include/linux/cgroup-defs.h +++ b/include/linux/cgroup-defs.h @@ -531,8 +531,8 @@ extern struct percpu_rw_semaphore cgroup_threadgroup_rwsem; * cgroup_threadgroup_change_begin - threadgroup exclusion for cgroups * @tsk: target task * - * Called from threadgroup_change_begin() and allows cgroup operations to - * synchronize against threadgroup changes using a percpu_rw_semaphore. + * Allows cgroup operations to synchronize against threadgroup changes + * using a percpu_rw_semaphore. */ static inline void cgroup_threadgroup_change_begin(struct task_struct *tsk) { @@ -543,8 +543,7 @@ static inline void cgroup_threadgroup_change_begin(struct task_struct *tsk) * cgroup_threadgroup_change_end - threadgroup exclusion for cgroups * @tsk: target task * - * Called from threadgroup_change_end(). Counterpart of - * cgroup_threadcgroup_change_begin(). + * Counterpart of cgroup_threadcgroup_change_begin(). */ static inline void cgroup_threadgroup_change_end(struct task_struct *tsk) { @@ -555,7 +554,11 @@ static inline void cgroup_threadgroup_change_end(struct task_struct *tsk) #define CGROUP_SUBSYS_COUNT 0 -static inline void cgroup_threadgroup_change_begin(struct task_struct *tsk) {} +static inline void cgroup_threadgroup_change_begin(struct task_struct *tsk) +{ + might_sleep(); +} + static inline void cgroup_threadgroup_change_end(struct task_struct *tsk) {} #endif /* CONFIG_CGROUPS */ diff --git a/include/linux/cpu.h b/include/linux/cpu.h index 21f9c74496e7..f92081234afd 100644 --- a/include/linux/cpu.h +++ b/include/linux/cpu.h @@ -30,6 +30,8 @@ struct cpu { extern void boot_cpu_init(void); extern void boot_cpu_state_init(void); +extern void cpu_init(void); +extern void trap_init(void); extern int register_cpu(struct cpu *cpu, int num); extern struct device *get_cpu_device(unsigned cpu); diff --git a/include/linux/cpuhotplug.h b/include/linux/cpuhotplug.h index bb790c4db0c5..62d240e962f0 100644 --- a/include/linux/cpuhotplug.h +++ b/include/linux/cpuhotplug.h @@ -26,7 +26,6 @@ enum cpuhp_state { CPUHP_ARM_OMAP_WAKE_DEAD, CPUHP_IRQ_POLL_DEAD, CPUHP_BLOCK_SOFTIRQ_DEAD, - CPUHP_VIRT_SCSI_DEAD, CPUHP_ACPI_CPUDRV_DEAD, CPUHP_S390_PFAULT_DEAD, CPUHP_BLK_MQ_DEAD, diff --git a/include/linux/cpuset.h b/include/linux/cpuset.h index bfc204e70338..611fce58d670 100644 --- a/include/linux/cpuset.h +++ b/include/linux/cpuset.h @@ -9,6 +9,8 @@ */ #include <linux/sched.h> +#include <linux/sched/topology.h> +#include <linux/sched/task.h> #include <linux/cpumask.h> #include <linux/nodemask.h> #include <linux/mm.h> diff --git a/include/linux/cputime.h b/include/linux/cputime.h deleted file mode 100644 index a691dc4ddc13..000000000000 --- a/include/linux/cputime.h +++ /dev/null @@ -1,13 +0,0 @@ -#ifndef __LINUX_CPUTIME_H -#define __LINUX_CPUTIME_H - -#ifdef CONFIG_VIRT_CPU_ACCOUNTING_NATIVE -#include <asm/cputime.h> - -#ifndef cputime_to_nsecs -# define cputime_to_nsecs(__ct) \ - (cputime_to_usecs(__ct) * NSEC_PER_USEC) -#endif - -#endif /* CONFIG_VIRT_CPU_ACCOUNTING_NATIVE */ -#endif /* __LINUX_CPUTIME_H */ diff --git a/include/linux/cred.h b/include/linux/cred.h index f0e70a1bb3ac..b03e7d049a64 100644 --- a/include/linux/cred.h +++ b/include/linux/cred.h @@ -18,8 +18,9 @@ #include <linux/selinux.h> #include <linux/atomic.h> #include <linux/uidgid.h> +#include <linux/sched.h> +#include <linux/sched/user.h> -struct user_struct; struct cred; struct inode; diff --git a/include/linux/dcache.h b/include/linux/dcache.h index 591b6c16f9c1..d2e38dc6172c 100644 --- a/include/linux/dcache.h +++ b/include/linux/dcache.h @@ -11,6 +11,7 @@ #include <linux/rcupdate.h> #include <linux/lockref.h> #include <linux/stringhash.h> +#include <linux/wait.h> struct path; struct vfsmount; diff --git a/include/linux/delayacct.h b/include/linux/delayacct.h index 00e60f79a9cc..4178d2493547 100644 --- a/include/linux/delayacct.h +++ b/include/linux/delayacct.h @@ -18,8 +18,6 @@ #define _LINUX_DELAYACCT_H #include <uapi/linux/taskstats.h> -#include <linux/sched.h> -#include <linux/slab.h> /* * Per-task flags relevant to delay accounting @@ -30,7 +28,43 @@ #define DELAYACCT_PF_BLKIO 0x00000002 /* I am waiting on IO */ #ifdef CONFIG_TASK_DELAY_ACCT +struct task_delay_info { + spinlock_t lock; + unsigned int flags; /* Private per-task flags */ + + /* For each stat XXX, add following, aligned appropriately + * + * struct timespec XXX_start, XXX_end; + * u64 XXX_delay; + * u32 XXX_count; + * + * Atomicity of updates to XXX_delay, XXX_count protected by + * single lock above (split into XXX_lock if contention is an issue). + */ + + /* + * XXX_count is incremented on every XXX operation, the delay + * associated with the operation is added to XXX_delay. + * XXX_delay contains the accumulated delay time in nanoseconds. + */ + u64 blkio_start; /* Shared by blkio, swapin */ + u64 blkio_delay; /* wait for sync block io completion */ + u64 swapin_delay; /* wait for swapin block io completion */ + u32 blkio_count; /* total count of the number of sync block */ + /* io operations performed */ + u32 swapin_count; /* total count of the number of swapin block */ + /* io operations performed */ + + u64 freepages_start; + u64 freepages_delay; /* wait for memory reclaim */ + u32 freepages_count; /* total count of memory reclaim */ +}; +#endif +#include <linux/sched.h> +#include <linux/slab.h> + +#ifdef CONFIG_TASK_DELAY_ACCT extern int delayacct_on; /* Delay accounting turned on/off */ extern struct kmem_cache *delayacct_cache; extern void delayacct_init(void); diff --git a/include/linux/dmar.h b/include/linux/dmar.h index e9bc9292bd3a..e8ffba1052d3 100644 --- a/include/linux/dmar.h +++ b/include/linux/dmar.h @@ -26,7 +26,7 @@ #include <linux/msi.h> #include <linux/irqreturn.h> #include <linux/rwsem.h> -#include <linux/rcupdate.h> +#include <linux/rculist.h> struct acpi_dmar_header; diff --git a/include/linux/elfcore.h b/include/linux/elfcore.h index 698d51a0eea3..c8240a12c42d 100644 --- a/include/linux/elfcore.h +++ b/include/linux/elfcore.h @@ -3,6 +3,8 @@ #include <linux/user.h> #include <linux/bug.h> +#include <linux/sched/task_stack.h> + #include <asm/elf.h> #include <uapi/linux/elfcore.h> diff --git a/include/linux/fault-inject.h b/include/linux/fault-inject.h index 9f4956d8601c..728d4e0292aa 100644 --- a/include/linux/fault-inject.h +++ b/include/linux/fault-inject.h @@ -61,6 +61,8 @@ static inline struct dentry *fault_create_debugfs_attr(const char *name, #endif /* CONFIG_FAULT_INJECTION */ +struct kmem_cache; + #ifdef CONFIG_FAILSLAB extern bool should_failslab(struct kmem_cache *s, gfp_t gfpflags); #else diff --git a/include/linux/fs.h b/include/linux/fs.h index c64f2cb7d364..7251f7bb45e8 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -1567,6 +1567,9 @@ extern int vfs_unlink(struct inode *, struct dentry *, struct inode **); extern int vfs_rename(struct inode *, struct dentry *, struct inode *, struct dentry *, struct inode **, unsigned int); extern int vfs_whiteout(struct inode *, struct dentry *); +extern struct dentry *vfs_tmpfile(struct dentry *dentry, umode_t mode, + int open_flag); + /* * VFS file helper functions. */ @@ -1706,7 +1709,7 @@ struct inode_operations { int (*rename) (struct inode *, struct dentry *, struct inode *, struct dentry *, unsigned int); int (*setattr) (struct dentry *, struct iattr *); - int (*getattr) (struct vfsmount *mnt, struct dentry *, struct kstat *); + int (*getattr) (const struct path *, struct kstat *, u32, unsigned int); ssize_t (*listxattr) (struct dentry *, char *, size_t); int (*fiemap)(struct inode *, struct fiemap_extent_info *, u64 start, u64 len); @@ -1718,6 +1721,29 @@ struct inode_operations { int (*set_acl)(struct inode *, struct posix_acl *, int); } ____cacheline_aligned; +static inline ssize_t call_read_iter(struct file *file, struct kiocb *kio, + struct iov_iter *iter) +{ + return file->f_op->read_iter(kio, iter); +} + +static inline ssize_t call_write_iter(struct file *file, struct kiocb *kio, + struct iov_iter *iter) +{ + return file->f_op->write_iter(kio, iter); +} + +static inline int call_mmap(struct file *file, struct vm_area_struct *vma) +{ + return file->f_op->mmap(file, vma); +} + +static inline int call_fsync(struct file *file, loff_t start, loff_t end, + int datasync) +{ + return file->f_op->fsync(file, start, end, datasync); +} + ssize_t rw_copy_check_uvector(int type, const struct iovec __user * uvector, unsigned long nr_segs, unsigned long fast_segs, struct iovec *fast_pointer, @@ -1744,19 +1770,6 @@ extern int vfs_dedupe_file_range_compare(struct inode *src, loff_t srcoff, extern int vfs_dedupe_file_range(struct file *file, struct file_dedupe_range *same); -static inline int do_clone_file_range(struct file *file_in, loff_t pos_in, - struct file *file_out, loff_t pos_out, - u64 len) -{ - int ret; - - sb_start_write(file_inode(file_out)->i_sb); - ret = vfs_clone_file_range(file_in, pos_in, file_out, pos_out, len); - sb_end_write(file_inode(file_out)->i_sb); - - return ret; -} - struct super_operations { struct inode *(*alloc_inode)(struct super_block *sb); void (*destroy_inode)(struct inode *); @@ -2568,6 +2581,19 @@ static inline void file_end_write(struct file *file) __sb_end_write(file_inode(file)->i_sb, SB_FREEZE_WRITE); } +static inline int do_clone_file_range(struct file *file_in, loff_t pos_in, + struct file *file_out, loff_t pos_out, + u64 len) +{ + int ret; + + file_start_write(file_out); + ret = vfs_clone_file_range(file_in, pos_in, file_out, pos_out, len); + file_end_write(file_out); + + return ret; +} + /* * get_write_access() gets write permission for a file. * put_write_access() releases this write permission. @@ -2652,7 +2678,7 @@ static const char * const kernel_read_file_str[] = { static inline const char *kernel_read_file_id_str(enum kernel_read_file_id id) { - if (id < 0 || id >= READING_MAX_ID) + if ((unsigned)id >= READING_MAX_ID) return kernel_read_file_str[READING_UNKNOWN]; return kernel_read_file_str[id]; @@ -2876,8 +2902,8 @@ extern int page_symlink(struct inode *inode, const char *symname, int len); extern const struct inode_operations page_symlink_inode_operations; extern void kfree_link(void *); extern void generic_fillattr(struct inode *, struct kstat *); -int vfs_getattr_nosec(struct path *path, struct kstat *stat); -extern int vfs_getattr(struct path *, struct kstat *); +extern int vfs_getattr_nosec(const struct path *, struct kstat *, u32, unsigned int); +extern int vfs_getattr(const struct path *, struct kstat *, u32, unsigned int); void __inode_add_bytes(struct inode *inode, loff_t bytes); void inode_add_bytes(struct inode *inode, loff_t bytes); void __inode_sub_bytes(struct inode *inode, loff_t bytes); @@ -2890,10 +2916,29 @@ extern const struct inode_operations simple_symlink_inode_operations; extern int iterate_dir(struct file *, struct dir_context *); -extern int vfs_stat(const char __user *, struct kstat *); -extern int vfs_lstat(const char __user *, struct kstat *); -extern int vfs_fstat(unsigned int, struct kstat *); -extern int vfs_fstatat(int , const char __user *, struct kstat *, int); +extern int vfs_statx(int, const char __user *, int, struct kstat *, u32); +extern int vfs_statx_fd(unsigned int, struct kstat *, u32, unsigned int); + +static inline int vfs_stat(const char __user *filename, struct kstat *stat) +{ + return vfs_statx(AT_FDCWD, filename, 0, stat, STATX_BASIC_STATS); +} +static inline int vfs_lstat(const char __user *name, struct kstat *stat) +{ + return vfs_statx(AT_FDCWD, name, AT_SYMLINK_NOFOLLOW, + stat, STATX_BASIC_STATS); +} +static inline int vfs_fstatat(int dfd, const char __user *filename, + struct kstat *stat, int flags) +{ + return vfs_statx(dfd, filename, flags, stat, STATX_BASIC_STATS); +} +static inline int vfs_fstat(int fd, struct kstat *stat) +{ + return vfs_statx_fd(fd, stat, STATX_BASIC_STATS, 0); +} + + extern const char *vfs_get_link(struct dentry *, struct delayed_call *); extern int vfs_readlink(struct dentry *, char __user *, int); @@ -2923,7 +2968,7 @@ extern int dcache_dir_close(struct inode *, struct file *); extern loff_t dcache_dir_lseek(struct file *, loff_t, int); extern int dcache_readdir(struct file *, struct dir_context *); extern int simple_setattr(struct dentry *, struct iattr *); -extern int simple_getattr(struct vfsmount *, struct dentry *, struct kstat *); +extern int simple_getattr(const struct path *, struct kstat *, u32, unsigned int); extern int simple_statfs(struct dentry *, struct kstatfs *); extern int simple_open(struct inode *inode, struct file *file); extern int simple_link(struct dentry *, struct inode *, struct dentry *); diff --git a/include/linux/genhd.h b/include/linux/genhd.h index a999d281a2f1..76f39754e7b0 100644 --- a/include/linux/genhd.h +++ b/include/linux/genhd.h @@ -167,13 +167,6 @@ struct blk_integrity { }; #endif /* CONFIG_BLK_DEV_INTEGRITY */ -struct disk_devt { - atomic_t count; - void (*release)(struct disk_devt *disk_devt); -}; - -void put_disk_devt(struct disk_devt *disk_devt); -void get_disk_devt(struct disk_devt *disk_devt); struct gendisk { /* major, first_minor and minors are input parameters only, @@ -183,7 +176,6 @@ struct gendisk { int first_minor; int minors; /* maximum number of minors, =1 for * disks that can't be partitioned. */ - struct disk_devt *disk_devt; char disk_name[DISK_NAME_LEN]; /* name of major driver */ char *(*devnode)(struct gendisk *gd, umode_t *mode); diff --git a/include/linux/hrtimer.h b/include/linux/hrtimer.h index e52b427223ba..249e579ecd4c 100644 --- a/include/linux/hrtimer.h +++ b/include/linux/hrtimer.h @@ -19,7 +19,6 @@ #include <linux/ktime.h> #include <linux/init.h> #include <linux/list.h> -#include <linux/wait.h> #include <linux/percpu.h> #include <linux/timer.h> #include <linux/timerqueue.h> diff --git a/include/linux/hugetlb.h b/include/linux/hugetlb.h index 503099d8aada..b857fc8cc2ec 100644 --- a/include/linux/hugetlb.h +++ b/include/linux/hugetlb.h @@ -122,7 +122,7 @@ struct page *follow_huge_pmd(struct mm_struct *mm, unsigned long address, struct page *follow_huge_pud(struct mm_struct *mm, unsigned long address, pud_t *pud, int flags); int pmd_huge(pmd_t pmd); -int pud_huge(pud_t pmd); +int pud_huge(pud_t pud); unsigned long hugetlb_change_protection(struct vm_area_struct *vma, unsigned long address, unsigned long end, pgprot_t newprot); @@ -197,6 +197,9 @@ static inline void __unmap_hugepage_range(struct mmu_gather *tlb, #ifndef pgd_huge #define pgd_huge(x) 0 #endif +#ifndef p4d_huge +#define p4d_huge(x) 0 +#endif #ifndef pgd_write static inline int pgd_write(pgd_t pgd) diff --git a/include/linux/i2c.h b/include/linux/i2c.h index bed8fbb45f31..6b183521c616 100644 --- a/include/linux/i2c.h +++ b/include/linux/i2c.h @@ -30,6 +30,7 @@ #include <linux/device.h> /* for struct device */ #include <linux/sched.h> /* for completion */ #include <linux/mutex.h> +#include <linux/rtmutex.h> #include <linux/irqdomain.h> /* for Host Notify IRQ */ #include <linux/of.h> /* for struct device_node */ #include <linux/swab.h> /* for swab16 */ diff --git a/include/linux/init_task.h b/include/linux/init_task.h index 3a85d61f7614..91d9049f0039 100644 --- a/include/linux/init_task.h +++ b/include/linux/init_task.h @@ -12,8 +12,10 @@ #include <linux/securebits.h> #include <linux/seqlock.h> #include <linux/rbtree.h> +#include <linux/sched/autogroup.h> #include <net/net_namespace.h> #include <linux/sched/rt.h> +#include <linux/mm_types.h> #include <asm/thread_info.h> @@ -149,8 +151,6 @@ extern struct group_info init_groups; extern struct cred init_cred; -extern struct task_group root_task_group; - #ifdef CONFIG_CGROUP_SCHED # define INIT_CGROUP_SCHED(tsk) \ .sched_task_group = &root_task_group, diff --git a/include/linux/irqchip/arm-gic-v3.h b/include/linux/irqchip/arm-gic-v3.h index 672cfef72fc8..97cbca19430d 100644 --- a/include/linux/irqchip/arm-gic-v3.h +++ b/include/linux/irqchip/arm-gic-v3.h @@ -373,6 +373,8 @@ #define ICC_IGRPEN0_EL1_MASK (1 << ICC_IGRPEN0_EL1_SHIFT) #define ICC_IGRPEN1_EL1_SHIFT 0 #define ICC_IGRPEN1_EL1_MASK (1 << ICC_IGRPEN1_EL1_SHIFT) +#define ICC_SRE_EL1_DIB (1U << 2) +#define ICC_SRE_EL1_DFB (1U << 1) #define ICC_SRE_EL1_SRE (1U << 0) /* diff --git a/include/linux/irqdomain.h b/include/linux/irqdomain.h index 188eced6813e..9f3616085423 100644 --- a/include/linux/irqdomain.h +++ b/include/linux/irqdomain.h @@ -524,6 +524,10 @@ static inline struct irq_domain *irq_find_matching_fwnode( { return NULL; } +static inline bool irq_domain_check_msi_remap(void) +{ + return false; +} #endif /* !CONFIG_IRQ_DOMAIN */ #endif /* _LINUX_IRQDOMAIN_H */ diff --git a/include/linux/jump_label.h b/include/linux/jump_label.h index 8e06d758ee48..2afd74b9d844 100644 --- a/include/linux/jump_label.h +++ b/include/linux/jump_label.h @@ -90,6 +90,13 @@ extern bool static_key_initialized; struct static_key { atomic_t enabled; /* + * Note: + * To make anonymous unions work with old compilers, the static + * initialization of them requires brackets. This creates a dependency + * on the order of the struct with the initializers. If any fields + * are added, STATIC_KEY_INIT_TRUE and STATIC_KEY_INIT_FALSE may need + * to be modified. + * * bit 0 => 1 if key is initially true * 0 if initially false * bit 1 => 1 if points to struct static_key_mod @@ -166,10 +173,10 @@ extern void static_key_disable(struct static_key *key); */ #define STATIC_KEY_INIT_TRUE \ { .enabled = { 1 }, \ - .entries = (void *)JUMP_TYPE_TRUE } + { .entries = (void *)JUMP_TYPE_TRUE } } #define STATIC_KEY_INIT_FALSE \ { .enabled = { 0 }, \ - .entries = (void *)JUMP_TYPE_FALSE } + { .entries = (void *)JUMP_TYPE_FALSE } } #else /* !HAVE_JUMP_LABEL */ diff --git a/include/linux/kasan.h b/include/linux/kasan.h index c908b25bf5a5..1c823bef4c15 100644 --- a/include/linux/kasan.h +++ b/include/linux/kasan.h @@ -1,7 +1,6 @@ #ifndef _LINUX_KASAN_H #define _LINUX_KASAN_H -#include <linux/sched.h> #include <linux/types.h> struct kmem_cache; @@ -19,6 +18,7 @@ extern unsigned char kasan_zero_page[PAGE_SIZE]; extern pte_t kasan_zero_pte[PTRS_PER_PTE]; extern pmd_t kasan_zero_pmd[PTRS_PER_PMD]; extern pud_t kasan_zero_pud[PTRS_PER_PUD]; +extern p4d_t kasan_zero_p4d[PTRS_PER_P4D]; void kasan_populate_zero_shadow(const void *shadow_start, const void *shadow_end); @@ -30,16 +30,10 @@ static inline void *kasan_mem_to_shadow(const void *addr) } /* Enable reporting bugs after kasan_disable_current() */ -static inline void kasan_enable_current(void) -{ - current->kasan_depth++; -} +extern void kasan_enable_current(void); /* Disable reporting bugs for current task */ -static inline void kasan_disable_current(void) -{ - current->kasan_depth--; -} +extern void kasan_disable_current(void); void kasan_unpoison_shadow(const void *address, size_t size); diff --git a/include/linux/key.h b/include/linux/key.h index 722914798f37..e45212f2777e 100644 --- a/include/linux/key.h +++ b/include/linux/key.h @@ -354,7 +354,10 @@ static inline bool key_is_instantiated(const struct key *key) !test_bit(KEY_FLAG_NEGATIVE, &key->flags); } -#define rcu_dereference_key(KEY) \ +#define dereference_key_rcu(KEY) \ + (rcu_dereference((KEY)->payload.rcu_data0)) + +#define dereference_key_locked(KEY) \ (rcu_dereference_protected((KEY)->payload.rcu_data0, \ rwsem_is_locked(&((struct key *)(KEY))->sem))) diff --git a/include/linux/khugepaged.h b/include/linux/khugepaged.h index 1e032a1ddb3e..5d9a400af509 100644 --- a/include/linux/khugepaged.h +++ b/include/linux/khugepaged.h @@ -1,7 +1,8 @@ #ifndef _LINUX_KHUGEPAGED_H #define _LINUX_KHUGEPAGED_H -#include <linux/sched.h> /* MMF_VM_HUGEPAGE */ +#include <linux/sched/coredump.h> /* MMF_VM_HUGEPAGE */ + #ifdef CONFIG_TRANSPARENT_HUGEPAGE extern struct attribute_group khugepaged_attr_group; diff --git a/include/linux/ksm.h b/include/linux/ksm.h index 481c8c4627ca..e1cfda4bee58 100644 --- a/include/linux/ksm.h +++ b/include/linux/ksm.h @@ -12,6 +12,7 @@ #include <linux/pagemap.h> #include <linux/rmap.h> #include <linux/sched.h> +#include <linux/sched/coredump.h> struct stable_node; struct mem_cgroup; diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h index 8d69d5150748..2c14ad9809da 100644 --- a/include/linux/kvm_host.h +++ b/include/linux/kvm_host.h @@ -26,6 +26,7 @@ #include <linux/context_tracking.h> #include <linux/irqbypass.h> #include <linux/swait.h> +#include <linux/refcount.h> #include <asm/signal.h> #include <linux/kvm.h> @@ -401,7 +402,7 @@ struct kvm { #endif struct kvm_vm_stat stat; struct kvm_arch arch; - atomic_t users_count; + refcount_t users_count; #ifdef KVM_COALESCED_MMIO_PAGE_OFFSET struct kvm_coalesced_mmio_ring *coalesced_mmio_ring; spinlock_t ring_lock; diff --git a/include/linux/libnvdimm.h b/include/linux/libnvdimm.h index 8458c5351e56..77e7af32543f 100644 --- a/include/linux/libnvdimm.h +++ b/include/linux/libnvdimm.h @@ -70,6 +70,8 @@ struct nd_cmd_desc { struct nd_interleave_set { u64 cookie; + /* compatibility with initial buggy Linux implementation */ + u64 altcookie; }; struct nd_mapping_desc { diff --git a/include/linux/log2.h b/include/linux/log2.h index ef3d4f67118c..c373295f359f 100644 --- a/include/linux/log2.h +++ b/include/linux/log2.h @@ -16,12 +16,6 @@ #include <linux/bitops.h> /* - * deal with unrepresentable constant logarithms - */ -extern __attribute__((const, noreturn)) -int ____ilog2_NaN(void); - -/* * non-constant log of base 2 calculators * - the arch may override these in asm/bitops.h if they can be implemented * more efficiently than using fls() and fls64() @@ -85,7 +79,7 @@ unsigned long __rounddown_pow_of_two(unsigned long n) #define ilog2(n) \ ( \ __builtin_constant_p(n) ? ( \ - (n) < 1 ? ____ilog2_NaN() : \ + (n) < 2 ? 0 : \ (n) & (1ULL << 63) ? 63 : \ (n) & (1ULL << 62) ? 62 : \ (n) & (1ULL << 61) ? 61 : \ @@ -148,10 +142,7 @@ unsigned long __rounddown_pow_of_two(unsigned long n) (n) & (1ULL << 4) ? 4 : \ (n) & (1ULL << 3) ? 3 : \ (n) & (1ULL << 2) ? 2 : \ - (n) & (1ULL << 1) ? 1 : \ - (n) & (1ULL << 0) ? 0 : \ - ____ilog2_NaN() \ - ) : \ + 1 ) : \ (sizeof(n) <= 4) ? \ __ilog2_u32(n) : \ __ilog2_u64(n) \ diff --git a/include/linux/mlx4/driver.h b/include/linux/mlx4/driver.h index e965e5090d96..a858bcb6220b 100644 --- a/include/linux/mlx4/driver.h +++ b/include/linux/mlx4/driver.h @@ -109,7 +109,7 @@ static inline void mlx4_u64_to_mac(u8 *addr, u64 mac) int i; for (i = ETH_ALEN; i > 0; i--) { - addr[i - 1] = mac && 0xFF; + addr[i - 1] = mac & 0xFF; mac >>= 8; } } diff --git a/include/linux/mm.h b/include/linux/mm.h index 0d65dd72c0f4..5f01c88f0800 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -1560,14 +1560,24 @@ static inline pte_t *get_locked_pte(struct mm_struct *mm, unsigned long addr, return ptep; } +#ifdef __PAGETABLE_P4D_FOLDED +static inline int __p4d_alloc(struct mm_struct *mm, pgd_t *pgd, + unsigned long address) +{ + return 0; +} +#else +int __p4d_alloc(struct mm_struct *mm, pgd_t *pgd, unsigned long address); +#endif + #ifdef __PAGETABLE_PUD_FOLDED -static inline int __pud_alloc(struct mm_struct *mm, pgd_t *pgd, +static inline int __pud_alloc(struct mm_struct *mm, p4d_t *p4d, unsigned long address) { return 0; } #else -int __pud_alloc(struct mm_struct *mm, pgd_t *pgd, unsigned long address); +int __pud_alloc(struct mm_struct *mm, p4d_t *p4d, unsigned long address); #endif #if defined(__PAGETABLE_PMD_FOLDED) || !defined(CONFIG_MMU) @@ -1619,11 +1629,22 @@ int __pte_alloc_kernel(pmd_t *pmd, unsigned long address); * Remove it when 4level-fixup.h has been removed. */ #if defined(CONFIG_MMU) && !defined(__ARCH_HAS_4LEVEL_HACK) -static inline pud_t *pud_alloc(struct mm_struct *mm, pgd_t *pgd, unsigned long address) + +#ifndef __ARCH_HAS_5LEVEL_HACK +static inline p4d_t *p4d_alloc(struct mm_struct *mm, pgd_t *pgd, + unsigned long address) +{ + return (unlikely(pgd_none(*pgd)) && __p4d_alloc(mm, pgd, address)) ? + NULL : p4d_offset(pgd, address); +} + +static inline pud_t *pud_alloc(struct mm_struct *mm, p4d_t *p4d, + unsigned long address) { - return (unlikely(pgd_none(*pgd)) && __pud_alloc(mm, pgd, address))? - NULL: pud_offset(pgd, address); + return (unlikely(p4d_none(*p4d)) && __pud_alloc(mm, p4d, address)) ? + NULL : pud_offset(p4d, address); } +#endif /* !__ARCH_HAS_5LEVEL_HACK */ static inline pmd_t *pmd_alloc(struct mm_struct *mm, pud_t *pud, unsigned long address) { @@ -2385,7 +2406,8 @@ void sparse_mem_maps_populate_node(struct page **map_map, struct page *sparse_mem_map_populate(unsigned long pnum, int nid); pgd_t *vmemmap_pgd_populate(unsigned long addr, int node); -pud_t *vmemmap_pud_populate(pgd_t *pgd, unsigned long addr, int node); +p4d_t *vmemmap_p4d_populate(pgd_t *pgd, unsigned long addr, int node); +pud_t *vmemmap_pud_populate(p4d_t *p4d, unsigned long addr, int node); pmd_t *vmemmap_pmd_populate(pud_t *pud, unsigned long addr, int node); pte_t *vmemmap_pte_populate(pmd_t *pmd, unsigned long addr, int node); void *vmemmap_alloc_block(unsigned long size, int node); diff --git a/include/linux/mm_types.h b/include/linux/mm_types.h index 4f6d440ad785..f60f45fe226f 100644 --- a/include/linux/mm_types.h +++ b/include/linux/mm_types.h @@ -1,9 +1,9 @@ #ifndef _LINUX_MM_TYPES_H #define _LINUX_MM_TYPES_H +#include <linux/mm_types_task.h> + #include <linux/auxvec.h> -#include <linux/types.h> -#include <linux/threads.h> #include <linux/list.h> #include <linux/spinlock.h> #include <linux/rbtree.h> @@ -13,7 +13,7 @@ #include <linux/uprobes.h> #include <linux/page-flags-layout.h> #include <linux/workqueue.h> -#include <asm/page.h> + #include <asm/mmu.h> #ifndef AT_VECTOR_SIZE_ARCH @@ -24,11 +24,6 @@ struct address_space; struct mem_cgroup; -#define USE_SPLIT_PTE_PTLOCKS (NR_CPUS >= CONFIG_SPLIT_PTLOCK_CPUS) -#define USE_SPLIT_PMD_PTLOCKS (USE_SPLIT_PTE_PTLOCKS && \ - IS_ENABLED(CONFIG_ARCH_ENABLE_SPLIT_PMD_PTLOCK)) -#define ALLOC_SPLIT_PTLOCKS (SPINLOCK_SIZE > BITS_PER_LONG/8) - /* * Each physical page in the system has a struct page associated with * it to keep track of whatever it is we are using the page for at the @@ -231,17 +226,6 @@ struct page { #endif ; -struct page_frag { - struct page *page; -#if (BITS_PER_LONG > 32) || (PAGE_SIZE >= 65536) - __u32 offset; - __u32 size; -#else - __u16 offset; - __u16 size; -#endif -}; - #define PAGE_FRAG_CACHE_MAX_SIZE __ALIGN_MASK(32768, ~PAGE_MASK) #define PAGE_FRAG_CACHE_MAX_ORDER get_order(PAGE_FRAG_CACHE_MAX_SIZE) @@ -371,27 +355,6 @@ struct core_state { struct completion startup; }; -enum { - MM_FILEPAGES, /* Resident file mapping pages */ - MM_ANONPAGES, /* Resident anonymous pages */ - MM_SWAPENTS, /* Anonymous swap entries */ - MM_SHMEMPAGES, /* Resident shared memory pages */ - NR_MM_COUNTERS -}; - -#if USE_SPLIT_PTE_PTLOCKS && defined(CONFIG_MMU) -#define SPLIT_RSS_COUNTING -/* per-thread cached information, */ -struct task_rss_stat { - int events; /* for synchronization threshold */ - int count[NR_MM_COUNTERS]; -}; -#endif /* USE_SPLIT_PTE_PTLOCKS */ - -struct mm_rss_stat { - atomic_long_t count[NR_MM_COUNTERS]; -}; - struct kioctx_table; struct mm_struct { struct vm_area_struct *mmap; /* list of VMAs */ @@ -534,6 +497,8 @@ struct mm_struct { struct work_struct async_put_work; }; +extern struct mm_struct init_mm; + static inline void mm_init_cpumask(struct mm_struct *mm) { #ifdef CONFIG_CPUMASK_OFFSTACK diff --git a/include/linux/mm_types_task.h b/include/linux/mm_types_task.h new file mode 100644 index 000000000000..136dfdf63ba1 --- /dev/null +++ b/include/linux/mm_types_task.h @@ -0,0 +1,87 @@ +#ifndef _LINUX_MM_TYPES_TASK_H +#define _LINUX_MM_TYPES_TASK_H + +/* + * Here are the definitions of the MM data types that are embedded in 'struct task_struct'. + * + * (These are defined separately to decouple sched.h from mm_types.h as much as possible.) + */ + +#include <linux/types.h> +#include <linux/threads.h> +#include <linux/atomic.h> +#include <linux/cpumask.h> + +#include <asm/page.h> + +#define USE_SPLIT_PTE_PTLOCKS (NR_CPUS >= CONFIG_SPLIT_PTLOCK_CPUS) +#define USE_SPLIT_PMD_PTLOCKS (USE_SPLIT_PTE_PTLOCKS && \ + IS_ENABLED(CONFIG_ARCH_ENABLE_SPLIT_PMD_PTLOCK)) +#define ALLOC_SPLIT_PTLOCKS (SPINLOCK_SIZE > BITS_PER_LONG/8) + +/* + * The per task VMA cache array: + */ +#define VMACACHE_BITS 2 +#define VMACACHE_SIZE (1U << VMACACHE_BITS) +#define VMACACHE_MASK (VMACACHE_SIZE - 1) + +struct vmacache { + u32 seqnum; + struct vm_area_struct *vmas[VMACACHE_SIZE]; +}; + +enum { + MM_FILEPAGES, /* Resident file mapping pages */ + MM_ANONPAGES, /* Resident anonymous pages */ + MM_SWAPENTS, /* Anonymous swap entries */ + MM_SHMEMPAGES, /* Resident shared memory pages */ + NR_MM_COUNTERS +}; + +#if USE_SPLIT_PTE_PTLOCKS && defined(CONFIG_MMU) +#define SPLIT_RSS_COUNTING +/* per-thread cached information, */ +struct task_rss_stat { + int events; /* for synchronization threshold */ + int count[NR_MM_COUNTERS]; +}; +#endif /* USE_SPLIT_PTE_PTLOCKS */ + +struct mm_rss_stat { + atomic_long_t count[NR_MM_COUNTERS]; +}; + +struct page_frag { + struct page *page; +#if (BITS_PER_LONG > 32) || (PAGE_SIZE >= 65536) + __u32 offset; + __u32 size; +#else + __u16 offset; + __u16 size; +#endif +}; + +/* Track pages that require TLB flushes */ +struct tlbflush_unmap_batch { +#ifdef CONFIG_ARCH_WANT_BATCHED_UNMAP_TLB_FLUSH + /* + * Each bit set is a CPU that potentially has a TLB entry for one of + * the PFNs being flushed. See set_tlb_ubc_flush_pending(). + */ + struct cpumask cpumask; + + /* True if any bit in cpumask is set */ + bool flush_required; + + /* + * If true then the PTE was dirty when unmapped. The entry must be + * flushed before IO is initiated or a stale TLB entry potentially + * allows an update without redirtying the page. + */ + bool writable; +#endif +}; + +#endif /* _LINUX_MM_TYPES_TASK_H */ diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index f40f0ab3847a..97456b2539e4 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -330,6 +330,7 @@ struct napi_struct { enum { NAPI_STATE_SCHED, /* Poll is scheduled */ + NAPI_STATE_MISSED, /* reschedule a napi */ NAPI_STATE_DISABLE, /* Disable pending */ NAPI_STATE_NPSVC, /* Netpoll - don't dequeue from poll_list */ NAPI_STATE_HASHED, /* In NAPI hash (busy polling possible) */ @@ -338,12 +339,13 @@ enum { }; enum { - NAPIF_STATE_SCHED = (1UL << NAPI_STATE_SCHED), - NAPIF_STATE_DISABLE = (1UL << NAPI_STATE_DISABLE), - NAPIF_STATE_NPSVC = (1UL << NAPI_STATE_NPSVC), - NAPIF_STATE_HASHED = (1UL << NAPI_STATE_HASHED), - NAPIF_STATE_NO_BUSY_POLL = (1UL << NAPI_STATE_NO_BUSY_POLL), - NAPIF_STATE_IN_BUSY_POLL = (1UL << NAPI_STATE_IN_BUSY_POLL), + NAPIF_STATE_SCHED = BIT(NAPI_STATE_SCHED), + NAPIF_STATE_MISSED = BIT(NAPI_STATE_MISSED), + NAPIF_STATE_DISABLE = BIT(NAPI_STATE_DISABLE), + NAPIF_STATE_NPSVC = BIT(NAPI_STATE_NPSVC), + NAPIF_STATE_HASHED = BIT(NAPI_STATE_HASHED), + NAPIF_STATE_NO_BUSY_POLL = BIT(NAPI_STATE_NO_BUSY_POLL), + NAPIF_STATE_IN_BUSY_POLL = BIT(NAPI_STATE_IN_BUSY_POLL), }; enum gro_result { @@ -414,20 +416,7 @@ static inline bool napi_disable_pending(struct napi_struct *n) return test_bit(NAPI_STATE_DISABLE, &n->state); } -/** - * napi_schedule_prep - check if NAPI can be scheduled - * @n: NAPI context - * - * Test if NAPI routine is already running, and if not mark - * it as running. This is used as a condition variable to - * insure only one NAPI poll instance runs. We also make - * sure there is no pending NAPI disable. - */ -static inline bool napi_schedule_prep(struct napi_struct *n) -{ - return !napi_disable_pending(n) && - !test_and_set_bit(NAPI_STATE_SCHED, &n->state); -} +bool napi_schedule_prep(struct napi_struct *n); /** * napi_schedule - schedule NAPI poll diff --git a/include/linux/nfs_fs.h b/include/linux/nfs_fs.h index f1da8c8dd473..287f34161086 100644 --- a/include/linux/nfs_fs.h +++ b/include/linux/nfs_fs.h @@ -335,7 +335,7 @@ extern int nfs_refresh_inode(struct inode *, struct nfs_fattr *); extern int nfs_post_op_update_inode(struct inode *inode, struct nfs_fattr *fattr); extern int nfs_post_op_update_inode_force_wcc(struct inode *inode, struct nfs_fattr *fattr); extern int nfs_post_op_update_inode_force_wcc_locked(struct inode *inode, struct nfs_fattr *fattr); -extern int nfs_getattr(struct vfsmount *, struct dentry *, struct kstat *); +extern int nfs_getattr(const struct path *, struct kstat *, u32, unsigned int); extern void nfs_access_add_cache(struct inode *, struct nfs_access_entry *); extern void nfs_access_set_mask(struct nfs_access_entry *, u32); extern int nfs_permission(struct inode *, int); diff --git a/include/linux/nmi.h b/include/linux/nmi.h index 0a3fadc32693..aa3cd0878270 100644 --- a/include/linux/nmi.h +++ b/include/linux/nmi.h @@ -7,6 +7,43 @@ #include <linux/sched.h> #include <asm/irq.h> +#ifdef CONFIG_LOCKUP_DETECTOR +extern void touch_softlockup_watchdog_sched(void); +extern void touch_softlockup_watchdog(void); +extern void touch_softlockup_watchdog_sync(void); +extern void touch_all_softlockup_watchdogs(void); +extern int proc_dowatchdog_thresh(struct ctl_table *table, int write, + void __user *buffer, + size_t *lenp, loff_t *ppos); +extern unsigned int softlockup_panic; +extern unsigned int hardlockup_panic; +void lockup_detector_init(void); +#else +static inline void touch_softlockup_watchdog_sched(void) +{ +} +static inline void touch_softlockup_watchdog(void) +{ +} +static inline void touch_softlockup_watchdog_sync(void) +{ +} +static inline void touch_all_softlockup_watchdogs(void) +{ +} +static inline void lockup_detector_init(void) +{ +} +#endif + +#ifdef CONFIG_DETECT_HUNG_TASK +void reset_hung_task_detector(void); +#else +static inline void reset_hung_task_detector(void) +{ +} +#endif + /* * The run state of the lockup detectors is controlled by the content of the * 'watchdog_enabled' variable. Each lockup detector has its dedicated bit - diff --git a/include/linux/oom.h b/include/linux/oom.h index b4e36e92bc87..8a266e2be5a6 100644 --- a/include/linux/oom.h +++ b/include/linux/oom.h @@ -2,7 +2,7 @@ #define __INCLUDE_LINUX_OOM_H -#include <linux/sched.h> +#include <linux/sched/signal.h> #include <linux/types.h> #include <linux/nodemask.h> #include <uapi/linux/oom.h> diff --git a/include/linux/pci.h b/include/linux/pci.h index 282ed32244ce..eb3da1a04e6c 100644 --- a/include/linux/pci.h +++ b/include/linux/pci.h @@ -1323,6 +1323,7 @@ int pci_alloc_irq_vectors_affinity(struct pci_dev *dev, unsigned int min_vecs, void pci_free_irq_vectors(struct pci_dev *dev); int pci_irq_vector(struct pci_dev *dev, unsigned int nr); const struct cpumask *pci_irq_get_affinity(struct pci_dev *pdev, int vec); +int pci_irq_get_node(struct pci_dev *pdev, int vec); #else static inline int pci_msi_vec_count(struct pci_dev *dev) { return -ENOSYS; } @@ -1370,6 +1371,11 @@ static inline const struct cpumask *pci_irq_get_affinity(struct pci_dev *pdev, { return cpu_possible_mask; } + +static inline int pci_irq_get_node(struct pci_dev *pdev, int vec) +{ + return first_online_node; +} #endif static inline int diff --git a/include/linux/perf_regs.h b/include/linux/perf_regs.h index a5f98d53d732..9b7dd59fe28d 100644 --- a/include/linux/perf_regs.h +++ b/include/linux/perf_regs.h @@ -1,6 +1,8 @@ #ifndef _LINUX_PERF_REGS_H #define _LINUX_PERF_REGS_H +#include <linux/sched/task_stack.h> + struct perf_regs { __u64 abi; struct pt_regs *regs; diff --git a/include/linux/pid.h b/include/linux/pid.h index 298ead5512e5..4d179316e431 100644 --- a/include/linux/pid.h +++ b/include/linux/pid.h @@ -1,7 +1,7 @@ #ifndef _LINUX_PID_H #define _LINUX_PID_H -#include <linux/rcupdate.h> +#include <linux/rculist.h> enum pid_type { diff --git a/include/linux/pm_qos.h b/include/linux/pm_qos.h index d4d34791e463..032b55909145 100644 --- a/include/linux/pm_qos.h +++ b/include/linux/pm_qos.h @@ -146,8 +146,6 @@ int dev_pm_qos_add_notifier(struct device *dev, struct notifier_block *notifier); int dev_pm_qos_remove_notifier(struct device *dev, struct notifier_block *notifier); -int dev_pm_qos_add_global_notifier(struct notifier_block *notifier); -int dev_pm_qos_remove_global_notifier(struct notifier_block *notifier); void dev_pm_qos_constraints_init(struct device *dev); void dev_pm_qos_constraints_destroy(struct device *dev); int dev_pm_qos_add_ancestor_request(struct device *dev, @@ -172,6 +170,12 @@ static inline s32 dev_pm_qos_requested_flags(struct device *dev) { return dev->power.qos->flags_req->data.flr.flags; } + +static inline s32 dev_pm_qos_raw_read_value(struct device *dev) +{ + return IS_ERR_OR_NULL(dev->power.qos) ? + 0 : pm_qos_read_value(&dev->power.qos->resume_latency); +} #else static inline enum pm_qos_flags_status __dev_pm_qos_flags(struct device *dev, s32 mask) @@ -199,12 +203,6 @@ static inline int dev_pm_qos_add_notifier(struct device *dev, static inline int dev_pm_qos_remove_notifier(struct device *dev, struct notifier_block *notifier) { return 0; } -static inline int dev_pm_qos_add_global_notifier( - struct notifier_block *notifier) - { return 0; } -static inline int dev_pm_qos_remove_global_notifier( - struct notifier_block *notifier) - { return 0; } static inline void dev_pm_qos_constraints_init(struct device *dev) { dev->power.power_state = PMSG_ON; @@ -236,6 +234,7 @@ static inline void dev_pm_qos_hide_latency_tolerance(struct device *dev) {} static inline s32 dev_pm_qos_requested_resume_latency(struct device *dev) { return 0; } static inline s32 dev_pm_qos_requested_flags(struct device *dev) { return 0; } +static inline s32 dev_pm_qos_raw_read_value(struct device *dev) { return 0; } #endif #endif diff --git a/include/linux/preempt.h b/include/linux/preempt.h index 7eeceac52dea..cae461224948 100644 --- a/include/linux/preempt.h +++ b/include/linux/preempt.h @@ -55,6 +55,27 @@ /* We use the MSB mostly because its available */ #define PREEMPT_NEED_RESCHED 0x80000000 +#define PREEMPT_DISABLED (PREEMPT_DISABLE_OFFSET + PREEMPT_ENABLED) + +/* + * Disable preemption until the scheduler is running -- use an unconditional + * value so that it also works on !PREEMPT_COUNT kernels. + * + * Reset by start_kernel()->sched_init()->init_idle()->init_idle_preempt_count(). + */ +#define INIT_PREEMPT_COUNT PREEMPT_OFFSET + +/* + * Initial preempt_count value; reflects the preempt_count schedule invariant + * which states that during context switches: + * + * preempt_count() == 2*PREEMPT_DISABLE_OFFSET + * + * Note: PREEMPT_DISABLE_OFFSET is 0 for !PREEMPT_COUNT kernels. + * Note: See finish_task_switch(). + */ +#define FORK_PREEMPT_COUNT (2*PREEMPT_DISABLE_OFFSET + PREEMPT_ENABLED) + /* preempt_count() and related functions, depends on PREEMPT_NEED_RESCHED */ #include <asm/preempt.h> diff --git a/include/linux/ptrace.h b/include/linux/ptrace.h index e0e539321ab9..422bc2e4cb6a 100644 --- a/include/linux/ptrace.h +++ b/include/linux/ptrace.h @@ -3,6 +3,7 @@ #include <linux/compiler.h> /* For unlikely. */ #include <linux/sched.h> /* For struct task_struct. */ +#include <linux/sched/signal.h> /* For send_sig(), same_thread_group(), etc. */ #include <linux/err.h> /* for IS_ERR_VALUE */ #include <linux/bug.h> /* For BUG_ON. */ #include <linux/pid_namespace.h> /* For task_active_pid_ns. */ diff --git a/include/linux/purgatory.h b/include/linux/purgatory.h new file mode 100644 index 000000000000..d60d4e278609 --- /dev/null +++ b/include/linux/purgatory.h @@ -0,0 +1,23 @@ +#ifndef _LINUX_PURGATORY_H +#define _LINUX_PURGATORY_H + +#include <linux/types.h> +#include <crypto/sha.h> +#include <uapi/linux/kexec.h> + +struct kexec_sha_region { + unsigned long start; + unsigned long len; +}; + +/* + * These forward declarations serve two purposes: + * + * 1) Make sparse happy when checking arch/purgatory + * 2) Document that these are required to be global so the symbol + * lookup in kexec works + */ +extern struct kexec_sha_region purgatory_sha_regions[KEXEC_SEGMENT_MAX]; +extern u8 purgatory_sha256_digest[SHA256_DIGEST_SIZE]; + +#endif diff --git a/include/linux/random.h b/include/linux/random.h index 7bd2403e4fef..ed5c3838780d 100644 --- a/include/linux/random.h +++ b/include/linux/random.h @@ -37,14 +37,26 @@ extern void get_random_bytes(void *buf, int nbytes); extern int add_random_ready_callback(struct random_ready_callback *rdy); extern void del_random_ready_callback(struct random_ready_callback *rdy); extern void get_random_bytes_arch(void *buf, int nbytes); -extern int random_int_secret_init(void); #ifndef MODULE extern const struct file_operations random_fops, urandom_fops; #endif -unsigned int get_random_int(void); -unsigned long get_random_long(void); +u32 get_random_u32(void); +u64 get_random_u64(void); +static inline unsigned int get_random_int(void) +{ + return get_random_u32(); +} +static inline unsigned long get_random_long(void) +{ +#if BITS_PER_LONG == 64 + return get_random_u64(); +#else + return get_random_u32(); +#endif +} + unsigned long randomize_page(unsigned long start, unsigned long range); u32 prandom_u32(void); diff --git a/include/linux/rcupdate.h b/include/linux/rcupdate.h index 6ade6a52d9d4..de88b33c0974 100644 --- a/include/linux/rcupdate.h +++ b/include/linux/rcupdate.h @@ -40,7 +40,6 @@ #include <linux/cpumask.h> #include <linux/seqlock.h> #include <linux/lockdep.h> -#include <linux/completion.h> #include <linux/debugobjects.h> #include <linux/bug.h> #include <linux/compiler.h> @@ -226,45 +225,6 @@ void call_rcu_sched(struct rcu_head *head, void synchronize_sched(void); -/* - * Structure allowing asynchronous waiting on RCU. - */ -struct rcu_synchronize { - struct rcu_head head; - struct completion completion; -}; -void wakeme_after_rcu(struct rcu_head *head); - -void __wait_rcu_gp(bool checktiny, int n, call_rcu_func_t *crcu_array, - struct rcu_synchronize *rs_array); - -#define _wait_rcu_gp(checktiny, ...) \ -do { \ - call_rcu_func_t __crcu_array[] = { __VA_ARGS__ }; \ - struct rcu_synchronize __rs_array[ARRAY_SIZE(__crcu_array)]; \ - __wait_rcu_gp(checktiny, ARRAY_SIZE(__crcu_array), \ - __crcu_array, __rs_array); \ -} while (0) - -#define wait_rcu_gp(...) _wait_rcu_gp(false, __VA_ARGS__) - -/** - * synchronize_rcu_mult - Wait concurrently for multiple grace periods - * @...: List of call_rcu() functions for the flavors to wait on. - * - * This macro waits concurrently for multiple flavors of RCU grace periods. - * For example, synchronize_rcu_mult(call_rcu, call_rcu_bh) would wait - * on concurrent RCU and RCU-bh grace periods. Waiting on a give SRCU - * domain requires you to write a wrapper function for that SRCU domain's - * call_srcu() function, supplying the corresponding srcu_struct. - * - * If Tiny RCU, tell _wait_rcu_gp() not to bother waiting for RCU - * or RCU-bh, given that anywhere synchronize_rcu_mult() can be called - * is automatically a grace period. - */ -#define synchronize_rcu_mult(...) \ - _wait_rcu_gp(IS_ENABLED(CONFIG_TINY_RCU), __VA_ARGS__) - /** * call_rcu_tasks() - Queue an RCU for invocation task-based grace period * @head: structure to be used for queueing the RCU updates. diff --git a/include/linux/rcupdate_wait.h b/include/linux/rcupdate_wait.h new file mode 100644 index 000000000000..e774b4f5f220 --- /dev/null +++ b/include/linux/rcupdate_wait.h @@ -0,0 +1,50 @@ +#ifndef _LINUX_SCHED_RCUPDATE_WAIT_H +#define _LINUX_SCHED_RCUPDATE_WAIT_H + +/* + * RCU synchronization types and methods: + */ + +#include <linux/rcupdate.h> +#include <linux/completion.h> + +/* + * Structure allowing asynchronous waiting on RCU. + */ +struct rcu_synchronize { + struct rcu_head head; + struct completion completion; +}; +void wakeme_after_rcu(struct rcu_head *head); + +void __wait_rcu_gp(bool checktiny, int n, call_rcu_func_t *crcu_array, + struct rcu_synchronize *rs_array); + +#define _wait_rcu_gp(checktiny, ...) \ +do { \ + call_rcu_func_t __crcu_array[] = { __VA_ARGS__ }; \ + struct rcu_synchronize __rs_array[ARRAY_SIZE(__crcu_array)]; \ + __wait_rcu_gp(checktiny, ARRAY_SIZE(__crcu_array), \ + __crcu_array, __rs_array); \ +} while (0) + +#define wait_rcu_gp(...) _wait_rcu_gp(false, __VA_ARGS__) + +/** + * synchronize_rcu_mult - Wait concurrently for multiple grace periods + * @...: List of call_rcu() functions for the flavors to wait on. + * + * This macro waits concurrently for multiple flavors of RCU grace periods. + * For example, synchronize_rcu_mult(call_rcu, call_rcu_bh) would wait + * on concurrent RCU and RCU-bh grace periods. Waiting on a give SRCU + * domain requires you to write a wrapper function for that SRCU domain's + * call_srcu() function, supplying the corresponding srcu_struct. + * + * If Tiny RCU, tell _wait_rcu_gp() not to bother waiting for RCU + * or RCU-bh, given that anywhere synchronize_rcu_mult() can be called + * is automatically a grace period. + */ +#define synchronize_rcu_mult(...) \ + _wait_rcu_gp(IS_ENABLED(CONFIG_TINY_RCU), __VA_ARGS__) + +#endif /* _LINUX_SCHED_RCUPDATE_WAIT_H */ diff --git a/include/linux/rcutiny.h b/include/linux/rcutiny.h index 4f9b2fa2173d..b452953e21c8 100644 --- a/include/linux/rcutiny.h +++ b/include/linux/rcutiny.h @@ -53,15 +53,8 @@ static inline void cond_synchronize_sched(unsigned long oldstate) might_sleep(); } -static inline void rcu_barrier_bh(void) -{ - wait_rcu_gp(call_rcu_bh); -} - -static inline void rcu_barrier_sched(void) -{ - wait_rcu_gp(call_rcu_sched); -} +extern void rcu_barrier_bh(void); +extern void rcu_barrier_sched(void); static inline void synchronize_rcu_expedited(void) { diff --git a/include/linux/regulator/machine.h b/include/linux/regulator/machine.h index ad3e5158e586..c9f795e9a2ee 100644 --- a/include/linux/regulator/machine.h +++ b/include/linux/regulator/machine.h @@ -65,7 +65,7 @@ struct regulator_state { int uV; /* suspend voltage */ unsigned int mode; /* suspend regulator operating mode */ int enabled; /* is regulator enabled in this suspend state */ - int disabled; /* is the regulator disbled in this suspend state */ + int disabled; /* is the regulator disabled in this suspend state */ }; /** diff --git a/include/linux/rhashtable.h b/include/linux/rhashtable.h index f2e12a845910..092292b6675e 100644 --- a/include/linux/rhashtable.h +++ b/include/linux/rhashtable.h @@ -25,7 +25,7 @@ #include <linux/list_nulls.h> #include <linux/workqueue.h> #include <linux/mutex.h> -#include <linux/rcupdate.h> +#include <linux/rculist.h> /* * The end of the chain is marked with a special nulls marks which has diff --git a/include/linux/sched.h b/include/linux/sched.h index 4a28deb5f210..d67eee84fd43 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -1,197 +1,57 @@ #ifndef _LINUX_SCHED_H #define _LINUX_SCHED_H -#include <uapi/linux/sched.h> - -#include <linux/sched/prio.h> - - -struct sched_param { - int sched_priority; -}; - -#include <asm/param.h> /* for HZ */ +/* + * Define 'struct task_struct' and provide the main scheduler + * APIs (schedule(), wakeup variants, etc.) + */ -#include <linux/capability.h> -#include <linux/threads.h> -#include <linux/kernel.h> -#include <linux/types.h> -#include <linux/timex.h> -#include <linux/jiffies.h> -#include <linux/plist.h> -#include <linux/rbtree.h> -#include <linux/thread_info.h> -#include <linux/cpumask.h> -#include <linux/errno.h> -#include <linux/nodemask.h> -#include <linux/mm_types.h> -#include <linux/preempt.h> +#include <uapi/linux/sched.h> -#include <asm/page.h> -#include <asm/ptrace.h> +#include <asm/current.h> -#include <linux/smp.h> +#include <linux/pid.h> #include <linux/sem.h> #include <linux/shm.h> -#include <linux/signal.h> -#include <linux/compiler.h> -#include <linux/completion.h> -#include <linux/pid.h> -#include <linux/percpu.h> -#include <linux/topology.h> +#include <linux/kcov.h> +#include <linux/mutex.h> +#include <linux/plist.h> +#include <linux/hrtimer.h> #include <linux/seccomp.h> +#include <linux/nodemask.h> #include <linux/rcupdate.h> -#include <linux/rculist.h> -#include <linux/rtmutex.h> - -#include <linux/time.h> -#include <linux/param.h> #include <linux/resource.h> -#include <linux/timer.h> -#include <linux/hrtimer.h> -#include <linux/kcov.h> -#include <linux/task_io_accounting.h> #include <linux/latencytop.h> -#include <linux/cred.h> -#include <linux/llist.h> -#include <linux/uidgid.h> -#include <linux/gfp.h> -#include <linux/magic.h> -#include <linux/cgroup-defs.h> - -#include <asm/processor.h> - -#define SCHED_ATTR_SIZE_VER0 48 /* sizeof first published struct */ - -/* - * Extended scheduling parameters data structure. - * - * This is needed because the original struct sched_param can not be - * altered without introducing ABI issues with legacy applications - * (e.g., in sched_getparam()). - * - * However, the possibility of specifying more than just a priority for - * the tasks may be useful for a wide variety of application fields, e.g., - * multimedia, streaming, automation and control, and many others. - * - * This variant (sched_attr) is meant at describing a so-called - * sporadic time-constrained task. In such model a task is specified by: - * - the activation period or minimum instance inter-arrival time; - * - the maximum (or average, depending on the actual scheduling - * discipline) computation time of all instances, a.k.a. runtime; - * - the deadline (relative to the actual activation time) of each - * instance. - * Very briefly, a periodic (sporadic) task asks for the execution of - * some specific computation --which is typically called an instance-- - * (at most) every period. Moreover, each instance typically lasts no more - * than the runtime and must be completed by time instant t equal to - * the instance activation time + the deadline. - * - * This is reflected by the actual fields of the sched_attr structure: - * - * @size size of the structure, for fwd/bwd compat. - * - * @sched_policy task's scheduling policy - * @sched_flags for customizing the scheduler behaviour - * @sched_nice task's nice value (SCHED_NORMAL/BATCH) - * @sched_priority task's static priority (SCHED_FIFO/RR) - * @sched_deadline representative of the task's deadline - * @sched_runtime representative of the task's runtime - * @sched_period representative of the task's period - * - * Given this task model, there are a multiplicity of scheduling algorithms - * and policies, that can be used to ensure all the tasks will make their - * timing constraints. - * - * As of now, the SCHED_DEADLINE policy (sched_dl scheduling class) is the - * only user of this new interface. More information about the algorithm - * available in the scheduling class file or in Documentation/. - */ -struct sched_attr { - u32 size; - - u32 sched_policy; - u64 sched_flags; - - /* SCHED_NORMAL, SCHED_BATCH */ - s32 sched_nice; - - /* SCHED_FIFO, SCHED_RR */ - u32 sched_priority; - - /* SCHED_DEADLINE */ - u64 sched_runtime; - u64 sched_deadline; - u64 sched_period; -}; +#include <linux/sched/prio.h> +#include <linux/signal_types.h> +#include <linux/mm_types_task.h> +#include <linux/task_io_accounting.h> -struct futex_pi_state; -struct robust_list_head; +/* task_struct member predeclarations (sorted alphabetically): */ +struct audit_context; +struct backing_dev_info; struct bio_list; -struct fs_struct; -struct perf_event_context; struct blk_plug; -struct filename; +struct cfs_rq; +struct fs_struct; +struct futex_pi_state; +struct io_context; +struct mempolicy; struct nameidata; - -#define VMACACHE_BITS 2 -#define VMACACHE_SIZE (1U << VMACACHE_BITS) -#define VMACACHE_MASK (VMACACHE_SIZE - 1) - -/* - * These are the constant used to fake the fixed-point load-average - * counting. Some notes: - * - 11 bit fractions expand to 22 bits by the multiplies: this gives - * a load-average precision of 10 bits integer + 11 bits fractional - * - if you want to count load-averages more often, you need more - * precision, or rounding will get you. With 2-second counting freq, - * the EXP_n values would be 1981, 2034 and 2043 if still using only - * 11 bit fractions. - */ -extern unsigned long avenrun[]; /* Load averages */ -extern void get_avenrun(unsigned long *loads, unsigned long offset, int shift); - -#define FSHIFT 11 /* nr of bits of precision */ -#define FIXED_1 (1<<FSHIFT) /* 1.0 as fixed-point */ -#define LOAD_FREQ (5*HZ+1) /* 5 sec intervals */ -#define EXP_1 1884 /* 1/exp(5sec/1min) as fixed-point */ -#define EXP_5 2014 /* 1/exp(5sec/5min) */ -#define EXP_15 2037 /* 1/exp(5sec/15min) */ - -#define CALC_LOAD(load,exp,n) \ - load *= exp; \ - load += n*(FIXED_1-exp); \ - load >>= FSHIFT; - -extern unsigned long total_forks; -extern int nr_threads; -DECLARE_PER_CPU(unsigned long, process_counts); -extern int nr_processes(void); -extern unsigned long nr_running(void); -extern bool single_task_running(void); -extern unsigned long nr_iowait(void); -extern unsigned long nr_iowait_cpu(int cpu); -extern void get_iowait_load(unsigned long *nr_waiters, unsigned long *load); - -extern void calc_global_load(unsigned long ticks); - -#if defined(CONFIG_SMP) && defined(CONFIG_NO_HZ_COMMON) -extern void cpu_load_update_nohz_start(void); -extern void cpu_load_update_nohz_stop(void); -#else -static inline void cpu_load_update_nohz_start(void) { } -static inline void cpu_load_update_nohz_stop(void) { } -#endif - -extern void dump_cpu_task(int cpu); - +struct nsproxy; +struct perf_event_context; +struct pid_namespace; +struct pipe_inode_info; +struct rcu_node; +struct reclaim_state; +struct robust_list_head; +struct sched_attr; +struct sched_param; struct seq_file; -struct cfs_rq; +struct sighand_struct; +struct signal_struct; +struct task_delay_info; struct task_group; -#ifdef CONFIG_SCHED_DEBUG -extern void proc_sched_show_task(struct task_struct *p, struct seq_file *m); -extern void proc_sched_set_task(struct task_struct *p); -#endif /* * Task state bitmask. NOTE! These bits are also @@ -203,53 +63,53 @@ extern void proc_sched_set_task(struct task_struct *p); * modifying one set can't modify the other one by * mistake. */ -#define TASK_RUNNING 0 -#define TASK_INTERRUPTIBLE 1 -#define TASK_UNINTERRUPTIBLE 2 -#define __TASK_STOPPED 4 -#define __TASK_TRACED 8 -/* in tsk->exit_state */ -#define EXIT_DEAD 16 -#define EXIT_ZOMBIE 32 -#define EXIT_TRACE (EXIT_ZOMBIE | EXIT_DEAD) -/* in tsk->state again */ -#define TASK_DEAD 64 -#define TASK_WAKEKILL 128 -#define TASK_WAKING 256 -#define TASK_PARKED 512 -#define TASK_NOLOAD 1024 -#define TASK_NEW 2048 -#define TASK_STATE_MAX 4096 - -#define TASK_STATE_TO_CHAR_STR "RSDTtXZxKWPNn" - -extern char ___assert_task_state[1 - 2*!!( - sizeof(TASK_STATE_TO_CHAR_STR)-1 != ilog2(TASK_STATE_MAX)+1)]; - -/* Convenience macros for the sake of set_current_state */ -#define TASK_KILLABLE (TASK_WAKEKILL | TASK_UNINTERRUPTIBLE) -#define TASK_STOPPED (TASK_WAKEKILL | __TASK_STOPPED) -#define TASK_TRACED (TASK_WAKEKILL | __TASK_TRACED) - -#define TASK_IDLE (TASK_UNINTERRUPTIBLE | TASK_NOLOAD) - -/* Convenience macros for the sake of wake_up */ -#define TASK_NORMAL (TASK_INTERRUPTIBLE | TASK_UNINTERRUPTIBLE) -#define TASK_ALL (TASK_NORMAL | __TASK_STOPPED | __TASK_TRACED) - -/* get_task_state() */ -#define TASK_REPORT (TASK_RUNNING | TASK_INTERRUPTIBLE | \ - TASK_UNINTERRUPTIBLE | __TASK_STOPPED | \ - __TASK_TRACED | EXIT_ZOMBIE | EXIT_DEAD) - -#define task_is_traced(task) ((task->state & __TASK_TRACED) != 0) -#define task_is_stopped(task) ((task->state & __TASK_STOPPED) != 0) -#define task_is_stopped_or_traced(task) \ - ((task->state & (__TASK_STOPPED | __TASK_TRACED)) != 0) -#define task_contributes_to_load(task) \ - ((task->state & TASK_UNINTERRUPTIBLE) != 0 && \ - (task->flags & PF_FROZEN) == 0 && \ - (task->state & TASK_NOLOAD) == 0) + +/* Used in tsk->state: */ +#define TASK_RUNNING 0 +#define TASK_INTERRUPTIBLE 1 +#define TASK_UNINTERRUPTIBLE 2 +#define __TASK_STOPPED 4 +#define __TASK_TRACED 8 +/* Used in tsk->exit_state: */ +#define EXIT_DEAD 16 +#define EXIT_ZOMBIE 32 +#define EXIT_TRACE (EXIT_ZOMBIE | EXIT_DEAD) +/* Used in tsk->state again: */ +#define TASK_DEAD 64 +#define TASK_WAKEKILL 128 +#define TASK_WAKING 256 +#define TASK_PARKED 512 +#define TASK_NOLOAD 1024 +#define TASK_NEW 2048 +#define TASK_STATE_MAX 4096 + +#define TASK_STATE_TO_CHAR_STR "RSDTtXZxKWPNn" + +/* Convenience macros for the sake of set_current_state: */ +#define TASK_KILLABLE (TASK_WAKEKILL | TASK_UNINTERRUPTIBLE) +#define TASK_STOPPED (TASK_WAKEKILL | __TASK_STOPPED) +#define TASK_TRACED (TASK_WAKEKILL | __TASK_TRACED) + +#define TASK_IDLE (TASK_UNINTERRUPTIBLE | TASK_NOLOAD) + +/* Convenience macros for the sake of wake_up(): */ +#define TASK_NORMAL (TASK_INTERRUPTIBLE | TASK_UNINTERRUPTIBLE) +#define TASK_ALL (TASK_NORMAL | __TASK_STOPPED | __TASK_TRACED) + +/* get_task_state(): */ +#define TASK_REPORT (TASK_RUNNING | TASK_INTERRUPTIBLE | \ + TASK_UNINTERRUPTIBLE | __TASK_STOPPED | \ + __TASK_TRACED | EXIT_ZOMBIE | EXIT_DEAD) + +#define task_is_traced(task) ((task->state & __TASK_TRACED) != 0) + +#define task_is_stopped(task) ((task->state & __TASK_STOPPED) != 0) + +#define task_is_stopped_or_traced(task) ((task->state & (__TASK_STOPPED | __TASK_TRACED)) != 0) + +#define task_contributes_to_load(task) ((task->state & TASK_UNINTERRUPTIBLE) != 0 && \ + (task->flags & PF_FROZEN) == 0 && \ + (task->state & TASK_NOLOAD) == 0) #ifdef CONFIG_DEBUG_ATOMIC_SLEEP @@ -299,139 +159,24 @@ extern char ___assert_task_state[1 - 2*!!( * * Also see the comments of try_to_wake_up(). */ -#define __set_current_state(state_value) \ - do { current->state = (state_value); } while (0) -#define set_current_state(state_value) \ - smp_store_mb(current->state, (state_value)) - +#define __set_current_state(state_value) do { current->state = (state_value); } while (0) +#define set_current_state(state_value) smp_store_mb(current->state, (state_value)) #endif -/* Task command name length */ -#define TASK_COMM_LEN 16 - -#include <linux/spinlock.h> - -/* - * This serializes "schedule()" and also protects - * the run-queue from deletions/modifications (but - * _adding_ to the beginning of the run-queue has - * a separate lock). - */ -extern rwlock_t tasklist_lock; -extern spinlock_t mmlist_lock; - -struct task_struct; - -#ifdef CONFIG_PROVE_RCU -extern int lockdep_tasklist_lock_is_held(void); -#endif /* #ifdef CONFIG_PROVE_RCU */ - -extern void sched_init(void); -extern void sched_init_smp(void); -extern asmlinkage void schedule_tail(struct task_struct *prev); -extern void init_idle(struct task_struct *idle, int cpu); -extern void init_idle_bootup_task(struct task_struct *idle); - -extern cpumask_var_t cpu_isolated_map; - -extern int runqueue_is_locked(int cpu); - -#if defined(CONFIG_SMP) && defined(CONFIG_NO_HZ_COMMON) -extern void nohz_balance_enter_idle(int cpu); -extern void set_cpu_sd_state_idle(void); -extern int get_nohz_timer_target(void); -#else -static inline void nohz_balance_enter_idle(int cpu) { } -static inline void set_cpu_sd_state_idle(void) { } -#endif - -/* - * Only dump TASK_* tasks. (0 for all tasks) - */ -extern void show_state_filter(unsigned long state_filter); - -static inline void show_state(void) -{ - show_state_filter(0); -} +/* Task command name length: */ +#define TASK_COMM_LEN 16 -extern void show_regs(struct pt_regs *); +extern cpumask_var_t cpu_isolated_map; -/* - * TASK is a pointer to the task whose backtrace we want to see (or NULL for current - * task), SP is the stack pointer of the first frame that should be shown in the back - * trace (or NULL if the entire call-chain of the task should be shown). - */ -extern void show_stack(struct task_struct *task, unsigned long *sp); - -extern void cpu_init (void); -extern void trap_init(void); -extern void update_process_times(int user); extern void scheduler_tick(void); -extern int sched_cpu_starting(unsigned int cpu); -extern int sched_cpu_activate(unsigned int cpu); -extern int sched_cpu_deactivate(unsigned int cpu); - -#ifdef CONFIG_HOTPLUG_CPU -extern int sched_cpu_dying(unsigned int cpu); -#else -# define sched_cpu_dying NULL -#endif - -extern void sched_show_task(struct task_struct *p); - -#ifdef CONFIG_LOCKUP_DETECTOR -extern void touch_softlockup_watchdog_sched(void); -extern void touch_softlockup_watchdog(void); -extern void touch_softlockup_watchdog_sync(void); -extern void touch_all_softlockup_watchdogs(void); -extern int proc_dowatchdog_thresh(struct ctl_table *table, int write, - void __user *buffer, - size_t *lenp, loff_t *ppos); -extern unsigned int softlockup_panic; -extern unsigned int hardlockup_panic; -void lockup_detector_init(void); -#else -static inline void touch_softlockup_watchdog_sched(void) -{ -} -static inline void touch_softlockup_watchdog(void) -{ -} -static inline void touch_softlockup_watchdog_sync(void) -{ -} -static inline void touch_all_softlockup_watchdogs(void) -{ -} -static inline void lockup_detector_init(void) -{ -} -#endif - -#ifdef CONFIG_DETECT_HUNG_TASK -void reset_hung_task_detector(void); -#else -static inline void reset_hung_task_detector(void) -{ -} -#endif - -/* Attach to any functions which should be ignored in wchan output. */ -#define __sched __attribute__((__section__(".sched.text"))) -/* Linker adds these: start and end of __sched functions */ -extern char __sched_text_start[], __sched_text_end[]; +#define MAX_SCHEDULE_TIMEOUT LONG_MAX -/* Is this address in the __sched functions? */ -extern int in_sched_functions(unsigned long addr); - -#define MAX_SCHEDULE_TIMEOUT LONG_MAX -extern signed long schedule_timeout(signed long timeout); -extern signed long schedule_timeout_interruptible(signed long timeout); -extern signed long schedule_timeout_killable(signed long timeout); -extern signed long schedule_timeout_uninterruptible(signed long timeout); -extern signed long schedule_timeout_idle(signed long timeout); +extern long schedule_timeout(long timeout); +extern long schedule_timeout_interruptible(long timeout); +extern long schedule_timeout_killable(long timeout); +extern long schedule_timeout_uninterruptible(long timeout); +extern long schedule_timeout_idle(long timeout); asmlinkage void schedule(void); extern void schedule_preempt_disabled(void); @@ -440,112 +185,6 @@ extern void io_schedule_finish(int token); extern long io_schedule_timeout(long timeout); extern void io_schedule(void); -void __noreturn do_task_dead(void); - -struct nsproxy; -struct user_namespace; - -#ifdef CONFIG_MMU -extern void arch_pick_mmap_layout(struct mm_struct *mm); -extern unsigned long -arch_get_unmapped_area(struct file *, unsigned long, unsigned long, - unsigned long, unsigned long); -extern unsigned long -arch_get_unmapped_area_topdown(struct file *filp, unsigned long addr, - unsigned long len, unsigned long pgoff, - unsigned long flags); -#else -static inline void arch_pick_mmap_layout(struct mm_struct *mm) {} -#endif - -#define SUID_DUMP_DISABLE 0 /* No setuid dumping */ -#define SUID_DUMP_USER 1 /* Dump as user of process */ -#define SUID_DUMP_ROOT 2 /* Dump as root */ - -/* mm flags */ - -/* for SUID_DUMP_* above */ -#define MMF_DUMPABLE_BITS 2 -#define MMF_DUMPABLE_MASK ((1 << MMF_DUMPABLE_BITS) - 1) - -extern void set_dumpable(struct mm_struct *mm, int value); -/* - * This returns the actual value of the suid_dumpable flag. For things - * that are using this for checking for privilege transitions, it must - * test against SUID_DUMP_USER rather than treating it as a boolean - * value. - */ -static inline int __get_dumpable(unsigned long mm_flags) -{ - return mm_flags & MMF_DUMPABLE_MASK; -} - -static inline int get_dumpable(struct mm_struct *mm) -{ - return __get_dumpable(mm->flags); -} - -/* coredump filter bits */ -#define MMF_DUMP_ANON_PRIVATE 2 -#define MMF_DUMP_ANON_SHARED 3 -#define MMF_DUMP_MAPPED_PRIVATE 4 -#define MMF_DUMP_MAPPED_SHARED 5 -#define MMF_DUMP_ELF_HEADERS 6 -#define MMF_DUMP_HUGETLB_PRIVATE 7 -#define MMF_DUMP_HUGETLB_SHARED 8 -#define MMF_DUMP_DAX_PRIVATE 9 -#define MMF_DUMP_DAX_SHARED 10 - -#define MMF_DUMP_FILTER_SHIFT MMF_DUMPABLE_BITS -#define MMF_DUMP_FILTER_BITS 9 -#define MMF_DUMP_FILTER_MASK \ - (((1 << MMF_DUMP_FILTER_BITS) - 1) << MMF_DUMP_FILTER_SHIFT) -#define MMF_DUMP_FILTER_DEFAULT \ - ((1 << MMF_DUMP_ANON_PRIVATE) | (1 << MMF_DUMP_ANON_SHARED) |\ - (1 << MMF_DUMP_HUGETLB_PRIVATE) | MMF_DUMP_MASK_DEFAULT_ELF) - -#ifdef CONFIG_CORE_DUMP_DEFAULT_ELF_HEADERS -# define MMF_DUMP_MASK_DEFAULT_ELF (1 << MMF_DUMP_ELF_HEADERS) -#else -# define MMF_DUMP_MASK_DEFAULT_ELF 0 -#endif - /* leave room for more dump flags */ -#define MMF_VM_MERGEABLE 16 /* KSM may merge identical pages */ -#define MMF_VM_HUGEPAGE 17 /* set when VM_HUGEPAGE is set on vma */ -/* - * This one-shot flag is dropped due to necessity of changing exe once again - * on NFS restore - */ -//#define MMF_EXE_FILE_CHANGED 18 /* see prctl_set_mm_exe_file() */ - -#define MMF_HAS_UPROBES 19 /* has uprobes */ -#define MMF_RECALC_UPROBES 20 /* MMF_HAS_UPROBES can be wrong */ -#define MMF_OOM_SKIP 21 /* mm is of no interest for the OOM killer */ -#define MMF_UNSTABLE 22 /* mm is unstable for copy_from_user */ -#define MMF_HUGE_ZERO_PAGE 23 /* mm has ever used the global huge zero page */ - -#define MMF_INIT_MASK (MMF_DUMPABLE_MASK | MMF_DUMP_FILTER_MASK) - -struct sighand_struct { - atomic_t count; - struct k_sigaction action[_NSIG]; - spinlock_t siglock; - wait_queue_head_t signalfd_wqh; -}; - -struct pacct_struct { - int ac_flag; - long ac_exitcode; - unsigned long ac_mem; - u64 ac_utime, ac_stime; - unsigned long ac_minflt, ac_majflt; -}; - -struct cpu_itimer { - u64 expires; - u64 incr; -}; - /** * struct prev_cputime - snaphsot of system and user cputime * @utime: time spent in user mode @@ -557,20 +196,12 @@ struct cpu_itimer { */ struct prev_cputime { #ifndef CONFIG_VIRT_CPU_ACCOUNTING_NATIVE - u64 utime; - u64 stime; - raw_spinlock_t lock; + u64 utime; + u64 stime; + raw_spinlock_t lock; #endif }; -static inline void prev_cputime_init(struct prev_cputime *prev) -{ -#ifndef CONFIG_VIRT_CPU_ACCOUNTING_NATIVE - prev->utime = prev->stime = 0; - raw_spin_lock_init(&prev->lock); -#endif -} - /** * struct task_cputime - collected CPU time counts * @utime: time spent in user mode, in nanoseconds @@ -582,376 +213,35 @@ static inline void prev_cputime_init(struct prev_cputime *prev) * these counts together and treat all three of them in parallel. */ struct task_cputime { - u64 utime; - u64 stime; - unsigned long long sum_exec_runtime; + u64 utime; + u64 stime; + unsigned long long sum_exec_runtime; }; -/* Alternate field names when used to cache expirations. */ -#define virt_exp utime -#define prof_exp stime -#define sched_exp sum_exec_runtime - -/* - * This is the atomic variant of task_cputime, which can be used for - * storing and updating task_cputime statistics without locking. - */ -struct task_cputime_atomic { - atomic64_t utime; - atomic64_t stime; - atomic64_t sum_exec_runtime; -}; - -#define INIT_CPUTIME_ATOMIC \ - (struct task_cputime_atomic) { \ - .utime = ATOMIC64_INIT(0), \ - .stime = ATOMIC64_INIT(0), \ - .sum_exec_runtime = ATOMIC64_INIT(0), \ - } - -#define PREEMPT_DISABLED (PREEMPT_DISABLE_OFFSET + PREEMPT_ENABLED) - -/* - * Disable preemption until the scheduler is running -- use an unconditional - * value so that it also works on !PREEMPT_COUNT kernels. - * - * Reset by start_kernel()->sched_init()->init_idle()->init_idle_preempt_count(). - */ -#define INIT_PREEMPT_COUNT PREEMPT_OFFSET - -/* - * Initial preempt_count value; reflects the preempt_count schedule invariant - * which states that during context switches: - * - * preempt_count() == 2*PREEMPT_DISABLE_OFFSET - * - * Note: PREEMPT_DISABLE_OFFSET is 0 for !PREEMPT_COUNT kernels. - * Note: See finish_task_switch(). - */ -#define FORK_PREEMPT_COUNT (2*PREEMPT_DISABLE_OFFSET + PREEMPT_ENABLED) - -/** - * struct thread_group_cputimer - thread group interval timer counts - * @cputime_atomic: atomic thread group interval timers. - * @running: true when there are timers running and - * @cputime_atomic receives updates. - * @checking_timer: true when a thread in the group is in the - * process of checking for thread group timers. - * - * This structure contains the version of task_cputime, above, that is - * used for thread group CPU timer calculations. - */ -struct thread_group_cputimer { - struct task_cputime_atomic cputime_atomic; - bool running; - bool checking_timer; -}; - -#include <linux/rwsem.h> -struct autogroup; - -/* - * NOTE! "signal_struct" does not have its own - * locking, because a shared signal_struct always - * implies a shared sighand_struct, so locking - * sighand_struct is always a proper superset of - * the locking of signal_struct. - */ -struct signal_struct { - atomic_t sigcnt; - atomic_t live; - int nr_threads; - struct list_head thread_head; - - wait_queue_head_t wait_chldexit; /* for wait4() */ - - /* current thread group signal load-balancing target: */ - struct task_struct *curr_target; - - /* shared signal handling: */ - struct sigpending shared_pending; - - /* thread group exit support */ - int group_exit_code; - /* overloaded: - * - notify group_exit_task when ->count is equal to notify_count - * - everyone except group_exit_task is stopped during signal delivery - * of fatal signals, group_exit_task processes the signal. - */ - int notify_count; - struct task_struct *group_exit_task; - - /* thread group stop support, overloads group_exit_code too */ - int group_stop_count; - unsigned int flags; /* see SIGNAL_* flags below */ - - /* - * PR_SET_CHILD_SUBREAPER marks a process, like a service - * manager, to re-parent orphan (double-forking) child processes - * to this process instead of 'init'. The service manager is - * able to receive SIGCHLD signals and is able to investigate - * the process until it calls wait(). All children of this - * process will inherit a flag if they should look for a - * child_subreaper process at exit. - */ - unsigned int is_child_subreaper:1; - unsigned int has_child_subreaper:1; - -#ifdef CONFIG_POSIX_TIMERS - - /* POSIX.1b Interval Timers */ - int posix_timer_id; - struct list_head posix_timers; - - /* ITIMER_REAL timer for the process */ - struct hrtimer real_timer; - ktime_t it_real_incr; - - /* - * ITIMER_PROF and ITIMER_VIRTUAL timers for the process, we use - * CPUCLOCK_PROF and CPUCLOCK_VIRT for indexing array as these - * values are defined to 0 and 1 respectively - */ - struct cpu_itimer it[2]; - - /* - * Thread group totals for process CPU timers. - * See thread_group_cputimer(), et al, for details. - */ - struct thread_group_cputimer cputimer; - - /* Earliest-expiration cache. */ - struct task_cputime cputime_expires; - - struct list_head cpu_timers[3]; - -#endif - - struct pid *leader_pid; - -#ifdef CONFIG_NO_HZ_FULL - atomic_t tick_dep_mask; -#endif - - struct pid *tty_old_pgrp; - - /* boolean value for session group leader */ - int leader; - - struct tty_struct *tty; /* NULL if no tty */ - -#ifdef CONFIG_SCHED_AUTOGROUP - struct autogroup *autogroup; -#endif - /* - * Cumulative resource counters for dead threads in the group, - * and for reaped dead child processes forked by this group. - * Live threads maintain their own counters and add to these - * in __exit_signal, except for the group leader. - */ - seqlock_t stats_lock; - u64 utime, stime, cutime, cstime; - u64 gtime; - u64 cgtime; - struct prev_cputime prev_cputime; - unsigned long nvcsw, nivcsw, cnvcsw, cnivcsw; - unsigned long min_flt, maj_flt, cmin_flt, cmaj_flt; - unsigned long inblock, oublock, cinblock, coublock; - unsigned long maxrss, cmaxrss; - struct task_io_accounting ioac; - - /* - * Cumulative ns of schedule CPU time fo dead threads in the - * group, not including a zombie group leader, (This only differs - * from jiffies_to_ns(utime + stime) if sched_clock uses something - * other than jiffies.) - */ - unsigned long long sum_sched_runtime; - - /* - * We don't bother to synchronize most readers of this at all, - * because there is no reader checking a limit that actually needs - * to get both rlim_cur and rlim_max atomically, and either one - * alone is a single word that can safely be read normally. - * getrlimit/setrlimit use task_lock(current->group_leader) to - * protect this instead of the siglock, because they really - * have no need to disable irqs. - */ - struct rlimit rlim[RLIM_NLIMITS]; - -#ifdef CONFIG_BSD_PROCESS_ACCT - struct pacct_struct pacct; /* per-process accounting information */ -#endif -#ifdef CONFIG_TASKSTATS - struct taskstats *stats; -#endif -#ifdef CONFIG_AUDIT - unsigned audit_tty; - struct tty_audit_buf *tty_audit_buf; -#endif - - /* - * Thread is the potential origin of an oom condition; kill first on - * oom - */ - bool oom_flag_origin; - short oom_score_adj; /* OOM kill score adjustment */ - short oom_score_adj_min; /* OOM kill score adjustment min value. - * Only settable by CAP_SYS_RESOURCE. */ - struct mm_struct *oom_mm; /* recorded mm when the thread group got - * killed by the oom killer */ - - struct mutex cred_guard_mutex; /* guard against foreign influences on - * credential calculations - * (notably. ptrace) */ -}; - -/* - * Bits in flags field of signal_struct. - */ -#define SIGNAL_STOP_STOPPED 0x00000001 /* job control stop in effect */ -#define SIGNAL_STOP_CONTINUED 0x00000002 /* SIGCONT since WCONTINUED reap */ -#define SIGNAL_GROUP_EXIT 0x00000004 /* group exit in progress */ -#define SIGNAL_GROUP_COREDUMP 0x00000008 /* coredump in progress */ -/* - * Pending notifications to parent. - */ -#define SIGNAL_CLD_STOPPED 0x00000010 -#define SIGNAL_CLD_CONTINUED 0x00000020 -#define SIGNAL_CLD_MASK (SIGNAL_CLD_STOPPED|SIGNAL_CLD_CONTINUED) - -#define SIGNAL_UNKILLABLE 0x00000040 /* for init: ignore fatal signals */ - -#define SIGNAL_STOP_MASK (SIGNAL_CLD_MASK | SIGNAL_STOP_STOPPED | \ - SIGNAL_STOP_CONTINUED) - -static inline void signal_set_stop_flags(struct signal_struct *sig, - unsigned int flags) -{ - WARN_ON(sig->flags & (SIGNAL_GROUP_EXIT|SIGNAL_GROUP_COREDUMP)); - sig->flags = (sig->flags & ~SIGNAL_STOP_MASK) | flags; -} - -/* If true, all threads except ->group_exit_task have pending SIGKILL */ -static inline int signal_group_exit(const struct signal_struct *sig) -{ - return (sig->flags & SIGNAL_GROUP_EXIT) || - (sig->group_exit_task != NULL); -} - -/* - * Some day this will be a full-fledged user tracking system.. - */ -struct user_struct { - atomic_t __count; /* reference count */ - atomic_t processes; /* How many processes does this user have? */ - atomic_t sigpending; /* How many pending signals does this user have? */ -#ifdef CONFIG_FANOTIFY - atomic_t fanotify_listeners; -#endif -#ifdef CONFIG_EPOLL - atomic_long_t epoll_watches; /* The number of file descriptors currently watched */ -#endif -#ifdef CONFIG_POSIX_MQUEUE - /* protected by mq_lock */ - unsigned long mq_bytes; /* How many bytes can be allocated to mqueue? */ -#endif - unsigned long locked_shm; /* How many pages of mlocked shm ? */ - unsigned long unix_inflight; /* How many files in flight in unix sockets */ - atomic_long_t pipe_bufs; /* how many pages are allocated in pipe buffers */ - -#ifdef CONFIG_KEYS - struct key *uid_keyring; /* UID specific keyring */ - struct key *session_keyring; /* UID's default session keyring */ -#endif - - /* Hash table maintenance information */ - struct hlist_node uidhash_node; - kuid_t uid; - -#if defined(CONFIG_PERF_EVENTS) || defined(CONFIG_BPF_SYSCALL) - atomic_long_t locked_vm; -#endif -}; - -extern int uids_sysfs_init(void); - -extern struct user_struct *find_user(kuid_t); - -extern struct user_struct root_user; -#define INIT_USER (&root_user) - - -struct backing_dev_info; -struct reclaim_state; +/* Alternate field names when used on cache expirations: */ +#define virt_exp utime +#define prof_exp stime +#define sched_exp sum_exec_runtime -#ifdef CONFIG_SCHED_INFO struct sched_info { - /* cumulative counters */ - unsigned long pcount; /* # of times run on this cpu */ - unsigned long long run_delay; /* time spent waiting on a runqueue */ +#ifdef CONFIG_SCHED_INFO + /* Cumulative counters: */ - /* timestamps */ - unsigned long long last_arrival,/* when we last ran on a cpu */ - last_queued; /* when we were last queued to run */ -}; -#endif /* CONFIG_SCHED_INFO */ + /* # of times we have run on this CPU: */ + unsigned long pcount; -#ifdef CONFIG_TASK_DELAY_ACCT -struct task_delay_info { - spinlock_t lock; - unsigned int flags; /* Private per-task flags */ + /* Time spent waiting on a runqueue: */ + unsigned long long run_delay; - /* For each stat XXX, add following, aligned appropriately - * - * struct timespec XXX_start, XXX_end; - * u64 XXX_delay; - * u32 XXX_count; - * - * Atomicity of updates to XXX_delay, XXX_count protected by - * single lock above (split into XXX_lock if contention is an issue). - */ + /* Timestamps: */ - /* - * XXX_count is incremented on every XXX operation, the delay - * associated with the operation is added to XXX_delay. - * XXX_delay contains the accumulated delay time in nanoseconds. - */ - u64 blkio_start; /* Shared by blkio, swapin */ - u64 blkio_delay; /* wait for sync block io completion */ - u64 swapin_delay; /* wait for swapin block io completion */ - u32 blkio_count; /* total count of the number of sync block */ - /* io operations performed */ - u32 swapin_count; /* total count of the number of swapin block */ - /* io operations performed */ - - u64 freepages_start; - u64 freepages_delay; /* wait for memory reclaim */ - u32 freepages_count; /* total count of memory reclaim */ -}; -#endif /* CONFIG_TASK_DELAY_ACCT */ + /* When did we last run on a CPU? */ + unsigned long long last_arrival; -static inline int sched_info_on(void) -{ -#ifdef CONFIG_SCHEDSTATS - return 1; -#elif defined(CONFIG_TASK_DELAY_ACCT) - extern int delayacct_on; - return delayacct_on; -#else - return 0; -#endif -} - -#ifdef CONFIG_SCHEDSTATS -void force_schedstat_enabled(void); -#endif + /* When were we last queued to run? */ + unsigned long long last_queued; -enum cpu_idle_type { - CPU_IDLE, - CPU_NOT_IDLE, - CPU_NEWLY_IDLE, - CPU_MAX_IDLE_TYPES +#endif /* CONFIG_SCHED_INFO */ }; /* @@ -961,290 +251,12 @@ enum cpu_idle_type { * We define a basic fixed point arithmetic range, and then formalize * all these metrics based on that basic range. */ -# define SCHED_FIXEDPOINT_SHIFT 10 -# define SCHED_FIXEDPOINT_SCALE (1L << SCHED_FIXEDPOINT_SHIFT) - -/* - * Increase resolution of cpu_capacity calculations - */ -#define SCHED_CAPACITY_SHIFT SCHED_FIXEDPOINT_SHIFT -#define SCHED_CAPACITY_SCALE (1L << SCHED_CAPACITY_SHIFT) - -/* - * Wake-queues are lists of tasks with a pending wakeup, whose - * callers have already marked the task as woken internally, - * and can thus carry on. A common use case is being able to - * do the wakeups once the corresponding user lock as been - * released. - * - * We hold reference to each task in the list across the wakeup, - * thus guaranteeing that the memory is still valid by the time - * the actual wakeups are performed in wake_up_q(). - * - * One per task suffices, because there's never a need for a task to be - * in two wake queues simultaneously; it is forbidden to abandon a task - * in a wake queue (a call to wake_up_q() _must_ follow), so if a task is - * already in a wake queue, the wakeup will happen soon and the second - * waker can just skip it. - * - * The DEFINE_WAKE_Q macro declares and initializes the list head. - * wake_up_q() does NOT reinitialize the list; it's expected to be - * called near the end of a function. Otherwise, the list can be - * re-initialized for later re-use by wake_q_init(). - * - * Note that this can cause spurious wakeups. schedule() callers - * must ensure the call is done inside a loop, confirming that the - * wakeup condition has in fact occurred. - */ -struct wake_q_node { - struct wake_q_node *next; -}; - -struct wake_q_head { - struct wake_q_node *first; - struct wake_q_node **lastp; -}; - -#define WAKE_Q_TAIL ((struct wake_q_node *) 0x01) - -#define DEFINE_WAKE_Q(name) \ - struct wake_q_head name = { WAKE_Q_TAIL, &name.first } - -static inline void wake_q_init(struct wake_q_head *head) -{ - head->first = WAKE_Q_TAIL; - head->lastp = &head->first; -} - -extern void wake_q_add(struct wake_q_head *head, - struct task_struct *task); -extern void wake_up_q(struct wake_q_head *head); - -/* - * sched-domains (multiprocessor balancing) declarations: - */ -#ifdef CONFIG_SMP -#define SD_LOAD_BALANCE 0x0001 /* Do load balancing on this domain. */ -#define SD_BALANCE_NEWIDLE 0x0002 /* Balance when about to become idle */ -#define SD_BALANCE_EXEC 0x0004 /* Balance on exec */ -#define SD_BALANCE_FORK 0x0008 /* Balance on fork, clone */ -#define SD_BALANCE_WAKE 0x0010 /* Balance on wakeup */ -#define SD_WAKE_AFFINE 0x0020 /* Wake task to waking CPU */ -#define SD_ASYM_CPUCAPACITY 0x0040 /* Groups have different max cpu capacities */ -#define SD_SHARE_CPUCAPACITY 0x0080 /* Domain members share cpu capacity */ -#define SD_SHARE_POWERDOMAIN 0x0100 /* Domain members share power domain */ -#define SD_SHARE_PKG_RESOURCES 0x0200 /* Domain members share cpu pkg resources */ -#define SD_SERIALIZE 0x0400 /* Only a single load balancing instance */ -#define SD_ASYM_PACKING 0x0800 /* Place busy groups earlier in the domain */ -#define SD_PREFER_SIBLING 0x1000 /* Prefer to place tasks in a sibling domain */ -#define SD_OVERLAP 0x2000 /* sched_domains of this level overlap */ -#define SD_NUMA 0x4000 /* cross-node balancing */ - -#ifdef CONFIG_SCHED_SMT -static inline int cpu_smt_flags(void) -{ - return SD_SHARE_CPUCAPACITY | SD_SHARE_PKG_RESOURCES; -} -#endif - -#ifdef CONFIG_SCHED_MC -static inline int cpu_core_flags(void) -{ - return SD_SHARE_PKG_RESOURCES; -} -#endif - -#ifdef CONFIG_NUMA -static inline int cpu_numa_flags(void) -{ - return SD_NUMA; -} -#endif - -extern int arch_asym_cpu_priority(int cpu); - -struct sched_domain_attr { - int relax_domain_level; -}; - -#define SD_ATTR_INIT (struct sched_domain_attr) { \ - .relax_domain_level = -1, \ -} - -extern int sched_domain_level_max; - -struct sched_group; - -struct sched_domain_shared { - atomic_t ref; - atomic_t nr_busy_cpus; - int has_idle_cores; -}; - -struct sched_domain { - /* These fields must be setup */ - struct sched_domain *parent; /* top domain must be null terminated */ - struct sched_domain *child; /* bottom domain must be null terminated */ - struct sched_group *groups; /* the balancing groups of the domain */ - unsigned long min_interval; /* Minimum balance interval ms */ - unsigned long max_interval; /* Maximum balance interval ms */ - unsigned int busy_factor; /* less balancing by factor if busy */ - unsigned int imbalance_pct; /* No balance until over watermark */ - unsigned int cache_nice_tries; /* Leave cache hot tasks for # tries */ - unsigned int busy_idx; - unsigned int idle_idx; - unsigned int newidle_idx; - unsigned int wake_idx; - unsigned int forkexec_idx; - unsigned int smt_gain; - - int nohz_idle; /* NOHZ IDLE status */ - int flags; /* See SD_* */ - int level; - - /* Runtime fields. */ - unsigned long last_balance; /* init to jiffies. units in jiffies */ - unsigned int balance_interval; /* initialise to 1. units in ms. */ - unsigned int nr_balance_failed; /* initialise to 0 */ - - /* idle_balance() stats */ - u64 max_newidle_lb_cost; - unsigned long next_decay_max_lb_cost; - - u64 avg_scan_cost; /* select_idle_sibling */ - -#ifdef CONFIG_SCHEDSTATS - /* load_balance() stats */ - unsigned int lb_count[CPU_MAX_IDLE_TYPES]; - unsigned int lb_failed[CPU_MAX_IDLE_TYPES]; - unsigned int lb_balanced[CPU_MAX_IDLE_TYPES]; - unsigned int lb_imbalance[CPU_MAX_IDLE_TYPES]; - unsigned int lb_gained[CPU_MAX_IDLE_TYPES]; - unsigned int lb_hot_gained[CPU_MAX_IDLE_TYPES]; - unsigned int lb_nobusyg[CPU_MAX_IDLE_TYPES]; - unsigned int lb_nobusyq[CPU_MAX_IDLE_TYPES]; - - /* Active load balancing */ - unsigned int alb_count; - unsigned int alb_failed; - unsigned int alb_pushed; - - /* SD_BALANCE_EXEC stats */ - unsigned int sbe_count; - unsigned int sbe_balanced; - unsigned int sbe_pushed; - - /* SD_BALANCE_FORK stats */ - unsigned int sbf_count; - unsigned int sbf_balanced; - unsigned int sbf_pushed; - - /* try_to_wake_up() stats */ - unsigned int ttwu_wake_remote; - unsigned int ttwu_move_affine; - unsigned int ttwu_move_balance; -#endif -#ifdef CONFIG_SCHED_DEBUG - char *name; -#endif - union { - void *private; /* used during construction */ - struct rcu_head rcu; /* used during destruction */ - }; - struct sched_domain_shared *shared; - - unsigned int span_weight; - /* - * Span of all CPUs in this domain. - * - * NOTE: this field is variable length. (Allocated dynamically - * by attaching extra space to the end of the structure, - * depending on how many CPUs the kernel has booted up with) - */ - unsigned long span[0]; -}; - -static inline struct cpumask *sched_domain_span(struct sched_domain *sd) -{ - return to_cpumask(sd->span); -} - -extern void partition_sched_domains(int ndoms_new, cpumask_var_t doms_new[], - struct sched_domain_attr *dattr_new); - -/* Allocate an array of sched domains, for partition_sched_domains(). */ -cpumask_var_t *alloc_sched_domains(unsigned int ndoms); -void free_sched_domains(cpumask_var_t doms[], unsigned int ndoms); - -bool cpus_share_cache(int this_cpu, int that_cpu); - -typedef const struct cpumask *(*sched_domain_mask_f)(int cpu); -typedef int (*sched_domain_flags_f)(void); - -#define SDTL_OVERLAP 0x01 - -struct sd_data { - struct sched_domain **__percpu sd; - struct sched_domain_shared **__percpu sds; - struct sched_group **__percpu sg; - struct sched_group_capacity **__percpu sgc; -}; - -struct sched_domain_topology_level { - sched_domain_mask_f mask; - sched_domain_flags_f sd_flags; - int flags; - int numa_level; - struct sd_data data; -#ifdef CONFIG_SCHED_DEBUG - char *name; -#endif -}; - -extern void set_sched_topology(struct sched_domain_topology_level *tl); -extern void wake_up_if_idle(int cpu); - -#ifdef CONFIG_SCHED_DEBUG -# define SD_INIT_NAME(type) .name = #type -#else -# define SD_INIT_NAME(type) -#endif - -#else /* CONFIG_SMP */ - -struct sched_domain_attr; - -static inline void -partition_sched_domains(int ndoms_new, cpumask_var_t doms_new[], - struct sched_domain_attr *dattr_new) -{ -} - -static inline bool cpus_share_cache(int this_cpu, int that_cpu) -{ - return true; -} - -#endif /* !CONFIG_SMP */ - - -struct io_context; /* See blkdev.h */ - - -#ifdef ARCH_HAS_PREFETCH_SWITCH_STACK -extern void prefetch_stack(struct task_struct *t); -#else -static inline void prefetch_stack(struct task_struct *t) { } -#endif - -struct audit_context; /* See audit.c */ -struct mempolicy; -struct pipe_inode_info; -struct uts_namespace; +# define SCHED_FIXEDPOINT_SHIFT 10 +# define SCHED_FIXEDPOINT_SCALE (1L << SCHED_FIXEDPOINT_SHIFT) struct load_weight { - unsigned long weight; - u32 inv_weight; + unsigned long weight; + u32 inv_weight; }; /* @@ -1300,71 +312,73 @@ struct load_weight { * issues. */ struct sched_avg { - u64 last_update_time, load_sum; - u32 util_sum, period_contrib; - unsigned long load_avg, util_avg; + u64 last_update_time; + u64 load_sum; + u32 util_sum; + u32 period_contrib; + unsigned long load_avg; + unsigned long util_avg; }; -#ifdef CONFIG_SCHEDSTATS struct sched_statistics { - u64 wait_start; - u64 wait_max; - u64 wait_count; - u64 wait_sum; - u64 iowait_count; - u64 iowait_sum; - - u64 sleep_start; - u64 sleep_max; - s64 sum_sleep_runtime; - - u64 block_start; - u64 block_max; - u64 exec_max; - u64 slice_max; - - u64 nr_migrations_cold; - u64 nr_failed_migrations_affine; - u64 nr_failed_migrations_running; - u64 nr_failed_migrations_hot; - u64 nr_forced_migrations; - - u64 nr_wakeups; - u64 nr_wakeups_sync; - u64 nr_wakeups_migrate; - u64 nr_wakeups_local; - u64 nr_wakeups_remote; - u64 nr_wakeups_affine; - u64 nr_wakeups_affine_attempts; - u64 nr_wakeups_passive; - u64 nr_wakeups_idle; -}; +#ifdef CONFIG_SCHEDSTATS + u64 wait_start; + u64 wait_max; + u64 wait_count; + u64 wait_sum; + u64 iowait_count; + u64 iowait_sum; + + u64 sleep_start; + u64 sleep_max; + s64 sum_sleep_runtime; + + u64 block_start; + u64 block_max; + u64 exec_max; + u64 slice_max; + + u64 nr_migrations_cold; + u64 nr_failed_migrations_affine; + u64 nr_failed_migrations_running; + u64 nr_failed_migrations_hot; + u64 nr_forced_migrations; + + u64 nr_wakeups; + u64 nr_wakeups_sync; + u64 nr_wakeups_migrate; + u64 nr_wakeups_local; + u64 nr_wakeups_remote; + u64 nr_wakeups_affine; + u64 nr_wakeups_affine_attempts; + u64 nr_wakeups_passive; + u64 nr_wakeups_idle; #endif +}; struct sched_entity { - struct load_weight load; /* for load-balancing */ - struct rb_node run_node; - struct list_head group_node; - unsigned int on_rq; + /* For load-balancing: */ + struct load_weight load; + struct rb_node run_node; + struct list_head group_node; + unsigned int on_rq; - u64 exec_start; - u64 sum_exec_runtime; - u64 vruntime; - u64 prev_sum_exec_runtime; + u64 exec_start; + u64 sum_exec_runtime; + u64 vruntime; + u64 prev_sum_exec_runtime; - u64 nr_migrations; + u64 nr_migrations; -#ifdef CONFIG_SCHEDSTATS - struct sched_statistics statistics; -#endif + struct sched_statistics statistics; #ifdef CONFIG_FAIR_GROUP_SCHED - int depth; - struct sched_entity *parent; + int depth; + struct sched_entity *parent; /* rq on which this entity is (to be) queued: */ - struct cfs_rq *cfs_rq; + struct cfs_rq *cfs_rq; /* rq "owned" by this entity/group: */ - struct cfs_rq *my_q; + struct cfs_rq *my_q; #endif #ifdef CONFIG_SMP @@ -1374,49 +388,49 @@ struct sched_entity { * Put into separate cache line so it does not * collide with read-mostly values above. */ - struct sched_avg avg ____cacheline_aligned_in_smp; + struct sched_avg avg ____cacheline_aligned_in_smp; #endif }; struct sched_rt_entity { - struct list_head run_list; - unsigned long timeout; - unsigned long watchdog_stamp; - unsigned int time_slice; - unsigned short on_rq; - unsigned short on_list; - - struct sched_rt_entity *back; + struct list_head run_list; + unsigned long timeout; + unsigned long watchdog_stamp; + unsigned int time_slice; + unsigned short on_rq; + unsigned short on_list; + + struct sched_rt_entity *back; #ifdef CONFIG_RT_GROUP_SCHED - struct sched_rt_entity *parent; + struct sched_rt_entity *parent; /* rq on which this entity is (to be) queued: */ - struct rt_rq *rt_rq; + struct rt_rq *rt_rq; /* rq "owned" by this entity/group: */ - struct rt_rq *my_q; + struct rt_rq *my_q; #endif }; struct sched_dl_entity { - struct rb_node rb_node; + struct rb_node rb_node; /* * Original scheduling parameters. Copied here from sched_attr * during sched_setattr(), they will remain the same until * the next sched_setattr(). */ - u64 dl_runtime; /* maximum runtime for each instance */ - u64 dl_deadline; /* relative deadline of each instance */ - u64 dl_period; /* separation of two instances (period) */ - u64 dl_bw; /* dl_runtime / dl_deadline */ + u64 dl_runtime; /* Maximum runtime for each instance */ + u64 dl_deadline; /* Relative deadline of each instance */ + u64 dl_period; /* Separation of two instances (period) */ + u64 dl_bw; /* dl_runtime / dl_deadline */ /* * Actual scheduling parameters. Initialized with the values above, * they are continously updated during task execution. Note that * the remaining runtime could be < 0 in case we are in overrun. */ - s64 runtime; /* remaining runtime for this instance */ - u64 deadline; /* absolute deadline for this instance */ - unsigned int flags; /* specifying the scheduler behaviour */ + s64 runtime; /* Remaining runtime for this instance */ + u64 deadline; /* Absolute deadline for this instance */ + unsigned int flags; /* Specifying the scheduler behaviour */ /* * Some bool flags: @@ -1429,28 +443,31 @@ struct sched_dl_entity { * outside bandwidth enforcement mechanism (but only until we * exit the critical section); * - * @dl_yielded tells if task gave up the cpu before consuming + * @dl_yielded tells if task gave up the CPU before consuming * all its available runtime during the last job. */ - int dl_throttled, dl_boosted, dl_yielded; + int dl_throttled; + int dl_boosted; + int dl_yielded; /* * Bandwidth enforcement timer. Each -deadline task has its * own bandwidth to be enforced, thus we need one timer per task. */ - struct hrtimer dl_timer; + struct hrtimer dl_timer; }; union rcu_special { struct { - u8 blocked; - u8 need_qs; - u8 exp_need_qs; - u8 pad; /* Otherwise the compiler can store garbage here. */ + u8 blocked; + u8 need_qs; + u8 exp_need_qs; + + /* Otherwise the compiler can store garbage here: */ + u8 pad; } b; /* Bits. */ u32 s; /* Set of bits. */ }; -struct rcu_node; enum perf_event_task_context { perf_invalid_context = -1, @@ -1459,23 +476,8 @@ enum perf_event_task_context { perf_nr_task_contexts, }; -/* Track pages that require TLB flushes */ -struct tlbflush_unmap_batch { - /* - * Each bit set is a CPU that potentially has a TLB entry for one of - * the PFNs being flushed. See set_tlb_ubc_flush_pending(). - */ - struct cpumask cpumask; - - /* True if any bit in cpumask is set */ - bool flush_required; - - /* - * If true then the PTE was dirty when unmapped. The entry must be - * flushed before IO is initiated or a stale TLB entry potentially - * allows an update without redirtying the page. - */ - bool writable; +struct wake_q_node { + struct wake_q_node *next; }; struct task_struct { @@ -1484,362 +486,417 @@ struct task_struct { * For reasons of header soup (see current_thread_info()), this * must be the first element of task_struct. */ - struct thread_info thread_info; + struct thread_info thread_info; #endif - volatile long state; /* -1 unrunnable, 0 runnable, >0 stopped */ - void *stack; - atomic_t usage; - unsigned int flags; /* per process flags, defined below */ - unsigned int ptrace; + /* -1 unrunnable, 0 runnable, >0 stopped: */ + volatile long state; + void *stack; + atomic_t usage; + /* Per task flags (PF_*), defined further below: */ + unsigned int flags; + unsigned int ptrace; #ifdef CONFIG_SMP - struct llist_node wake_entry; - int on_cpu; + struct llist_node wake_entry; + int on_cpu; #ifdef CONFIG_THREAD_INFO_IN_TASK - unsigned int cpu; /* current CPU */ + /* Current CPU: */ + unsigned int cpu; #endif - unsigned int wakee_flips; - unsigned long wakee_flip_decay_ts; - struct task_struct *last_wakee; + unsigned int wakee_flips; + unsigned long wakee_flip_decay_ts; + struct task_struct *last_wakee; - int wake_cpu; + int wake_cpu; #endif - int on_rq; + int on_rq; - int prio, static_prio, normal_prio; - unsigned int rt_priority; - const struct sched_class *sched_class; - struct sched_entity se; - struct sched_rt_entity rt; + int prio; + int static_prio; + int normal_prio; + unsigned int rt_priority; + + const struct sched_class *sched_class; + struct sched_entity se; + struct sched_rt_entity rt; #ifdef CONFIG_CGROUP_SCHED - struct task_group *sched_task_group; + struct task_group *sched_task_group; #endif - struct sched_dl_entity dl; + struct sched_dl_entity dl; #ifdef CONFIG_PREEMPT_NOTIFIERS - /* list of struct preempt_notifier: */ - struct hlist_head preempt_notifiers; + /* List of struct preempt_notifier: */ + struct hlist_head preempt_notifiers; #endif #ifdef CONFIG_BLK_DEV_IO_TRACE - unsigned int btrace_seq; + unsigned int btrace_seq; #endif - unsigned int policy; - int nr_cpus_allowed; - cpumask_t cpus_allowed; + unsigned int policy; + int nr_cpus_allowed; + cpumask_t cpus_allowed; #ifdef CONFIG_PREEMPT_RCU - int rcu_read_lock_nesting; - union rcu_special rcu_read_unlock_special; - struct list_head rcu_node_entry; - struct rcu_node *rcu_blocked_node; + int rcu_read_lock_nesting; + union rcu_special rcu_read_unlock_special; + struct list_head rcu_node_entry; + struct rcu_node *rcu_blocked_node; #endif /* #ifdef CONFIG_PREEMPT_RCU */ + #ifdef CONFIG_TASKS_RCU - unsigned long rcu_tasks_nvcsw; - bool rcu_tasks_holdout; - struct list_head rcu_tasks_holdout_list; - int rcu_tasks_idle_cpu; + unsigned long rcu_tasks_nvcsw; + bool rcu_tasks_holdout; + struct list_head rcu_tasks_holdout_list; + int rcu_tasks_idle_cpu; #endif /* #ifdef CONFIG_TASKS_RCU */ -#ifdef CONFIG_SCHED_INFO - struct sched_info sched_info; -#endif + struct sched_info sched_info; - struct list_head tasks; + struct list_head tasks; #ifdef CONFIG_SMP - struct plist_node pushable_tasks; - struct rb_node pushable_dl_tasks; -#endif - - struct mm_struct *mm, *active_mm; - /* per-thread vma caching */ - u32 vmacache_seqnum; - struct vm_area_struct *vmacache[VMACACHE_SIZE]; -#if defined(SPLIT_RSS_COUNTING) - struct task_rss_stat rss_stat; -#endif -/* task state */ - int exit_state; - int exit_code, exit_signal; - int pdeath_signal; /* The signal sent when the parent dies */ - unsigned long jobctl; /* JOBCTL_*, siglock protected */ - - /* Used for emulating ABI behavior of previous Linux versions */ - unsigned int personality; - - /* scheduler bits, serialized by scheduler locks */ - unsigned sched_reset_on_fork:1; - unsigned sched_contributes_to_load:1; - unsigned sched_migrated:1; - unsigned sched_remote_wakeup:1; - unsigned :0; /* force alignment to the next boundary */ - - /* unserialized, strictly 'current' */ - unsigned in_execve:1; /* bit to tell LSMs we're in execve */ - unsigned in_iowait:1; -#if !defined(TIF_RESTORE_SIGMASK) - unsigned restore_sigmask:1; + struct plist_node pushable_tasks; + struct rb_node pushable_dl_tasks; +#endif + + struct mm_struct *mm; + struct mm_struct *active_mm; + + /* Per-thread vma caching: */ + struct vmacache vmacache; + +#ifdef SPLIT_RSS_COUNTING + struct task_rss_stat rss_stat; +#endif + int exit_state; + int exit_code; + int exit_signal; + /* The signal sent when the parent dies: */ + int pdeath_signal; + /* JOBCTL_*, siglock protected: */ + unsigned long jobctl; + + /* Used for emulating ABI behavior of previous Linux versions: */ + unsigned int personality; + + /* Scheduler bits, serialized by scheduler locks: */ + unsigned sched_reset_on_fork:1; + unsigned sched_contributes_to_load:1; + unsigned sched_migrated:1; + unsigned sched_remote_wakeup:1; + /* Force alignment to the next boundary: */ + unsigned :0; + + /* Unserialized, strictly 'current' */ + + /* Bit to tell LSMs we're in execve(): */ + unsigned in_execve:1; + unsigned in_iowait:1; +#ifndef TIF_RESTORE_SIGMASK + unsigned restore_sigmask:1; #endif #ifdef CONFIG_MEMCG - unsigned memcg_may_oom:1; + unsigned memcg_may_oom:1; #ifndef CONFIG_SLOB - unsigned memcg_kmem_skip_account:1; + unsigned memcg_kmem_skip_account:1; #endif #endif #ifdef CONFIG_COMPAT_BRK - unsigned brk_randomized:1; + unsigned brk_randomized:1; #endif - unsigned long atomic_flags; /* Flags needing atomic access. */ + unsigned long atomic_flags; /* Flags requiring atomic access. */ - struct restart_block restart_block; + struct restart_block restart_block; - pid_t pid; - pid_t tgid; + pid_t pid; + pid_t tgid; #ifdef CONFIG_CC_STACKPROTECTOR - /* Canary value for the -fstack-protector gcc feature */ - unsigned long stack_canary; + /* Canary value for the -fstack-protector GCC feature: */ + unsigned long stack_canary; #endif /* - * pointers to (original) parent process, youngest child, younger sibling, + * Pointers to the (original) parent process, youngest child, younger sibling, * older sibling, respectively. (p->father can be replaced with * p->real_parent->pid) */ - struct task_struct __rcu *real_parent; /* real parent process */ - struct task_struct __rcu *parent; /* recipient of SIGCHLD, wait4() reports */ + + /* Real parent process: */ + struct task_struct __rcu *real_parent; + + /* Recipient of SIGCHLD, wait4() reports: */ + struct task_struct __rcu *parent; + /* - * children/sibling forms the list of my natural children + * Children/sibling form the list of natural children: */ - struct list_head children; /* list of my children */ - struct list_head sibling; /* linkage in my parent's children list */ - struct task_struct *group_leader; /* threadgroup leader */ + struct list_head children; + struct list_head sibling; + struct task_struct *group_leader; /* - * ptraced is the list of tasks this task is using ptrace on. + * 'ptraced' is the list of tasks this task is using ptrace() on. + * * This includes both natural children and PTRACE_ATTACH targets. - * p->ptrace_entry is p's link on the p->parent->ptraced list. + * 'ptrace_entry' is this task's link on the p->parent->ptraced list. */ - struct list_head ptraced; - struct list_head ptrace_entry; + struct list_head ptraced; + struct list_head ptrace_entry; /* PID/PID hash table linkage. */ - struct pid_link pids[PIDTYPE_MAX]; - struct list_head thread_group; - struct list_head thread_node; + struct pid_link pids[PIDTYPE_MAX]; + struct list_head thread_group; + struct list_head thread_node; + + struct completion *vfork_done; + + /* CLONE_CHILD_SETTID: */ + int __user *set_child_tid; - struct completion *vfork_done; /* for vfork() */ - int __user *set_child_tid; /* CLONE_CHILD_SETTID */ - int __user *clear_child_tid; /* CLONE_CHILD_CLEARTID */ + /* CLONE_CHILD_CLEARTID: */ + int __user *clear_child_tid; - u64 utime, stime; + u64 utime; + u64 stime; #ifdef CONFIG_ARCH_HAS_SCALED_CPUTIME - u64 utimescaled, stimescaled; + u64 utimescaled; + u64 stimescaled; #endif - u64 gtime; - struct prev_cputime prev_cputime; + u64 gtime; + struct prev_cputime prev_cputime; #ifdef CONFIG_VIRT_CPU_ACCOUNTING_GEN - seqcount_t vtime_seqcount; - unsigned long long vtime_snap; + seqcount_t vtime_seqcount; + unsigned long long vtime_snap; enum { - /* Task is sleeping or running in a CPU with VTIME inactive */ + /* Task is sleeping or running in a CPU with VTIME inactive: */ VTIME_INACTIVE = 0, - /* Task runs in userspace in a CPU with VTIME active */ + /* Task runs in userspace in a CPU with VTIME active: */ VTIME_USER, - /* Task runs in kernelspace in a CPU with VTIME active */ + /* Task runs in kernelspace in a CPU with VTIME active: */ VTIME_SYS, } vtime_snap_whence; #endif #ifdef CONFIG_NO_HZ_FULL - atomic_t tick_dep_mask; + atomic_t tick_dep_mask; #endif - unsigned long nvcsw, nivcsw; /* context switch counts */ - u64 start_time; /* monotonic time in nsec */ - u64 real_start_time; /* boot based time in nsec */ -/* mm fault and swap info: this can arguably be seen as either mm-specific or thread-specific */ - unsigned long min_flt, maj_flt; + /* Context switch counts: */ + unsigned long nvcsw; + unsigned long nivcsw; + + /* Monotonic time in nsecs: */ + u64 start_time; + + /* Boot based time in nsecs: */ + u64 real_start_time; + + /* MM fault and swap info: this can arguably be seen as either mm-specific or thread-specific: */ + unsigned long min_flt; + unsigned long maj_flt; #ifdef CONFIG_POSIX_TIMERS - struct task_cputime cputime_expires; - struct list_head cpu_timers[3]; -#endif - -/* process credentials */ - const struct cred __rcu *ptracer_cred; /* Tracer's credentials at attach */ - const struct cred __rcu *real_cred; /* objective and real subjective task - * credentials (COW) */ - const struct cred __rcu *cred; /* effective (overridable) subjective task - * credentials (COW) */ - char comm[TASK_COMM_LEN]; /* executable name excluding path - - access with [gs]et_task_comm (which lock - it with task_lock()) - - initialized normally by setup_new_exec */ -/* file system info */ - struct nameidata *nameidata; + struct task_cputime cputime_expires; + struct list_head cpu_timers[3]; +#endif + + /* Process credentials: */ + + /* Tracer's credentials at attach: */ + const struct cred __rcu *ptracer_cred; + + /* Objective and real subjective task credentials (COW): */ + const struct cred __rcu *real_cred; + + /* Effective (overridable) subjective task credentials (COW): */ + const struct cred __rcu *cred; + + /* + * executable name, excluding path. + * + * - normally initialized setup_new_exec() + * - access it with [gs]et_task_comm() + * - lock it with task_lock() + */ + char comm[TASK_COMM_LEN]; + + struct nameidata *nameidata; + #ifdef CONFIG_SYSVIPC -/* ipc stuff */ - struct sysv_sem sysvsem; - struct sysv_shm sysvshm; + struct sysv_sem sysvsem; + struct sysv_shm sysvshm; #endif #ifdef CONFIG_DETECT_HUNG_TASK -/* hung task detection */ - unsigned long last_switch_count; -#endif -/* filesystem information */ - struct fs_struct *fs; -/* open file information */ - struct files_struct *files; -/* namespaces */ - struct nsproxy *nsproxy; -/* signal handlers */ - struct signal_struct *signal; - struct sighand_struct *sighand; - - sigset_t blocked, real_blocked; - sigset_t saved_sigmask; /* restored if set_restore_sigmask() was used */ - struct sigpending pending; - - unsigned long sas_ss_sp; - size_t sas_ss_size; - unsigned sas_ss_flags; - - struct callback_head *task_works; - - struct audit_context *audit_context; + unsigned long last_switch_count; +#endif + /* Filesystem information: */ + struct fs_struct *fs; + + /* Open file information: */ + struct files_struct *files; + + /* Namespaces: */ + struct nsproxy *nsproxy; + + /* Signal handlers: */ + struct signal_struct *signal; + struct sighand_struct *sighand; + sigset_t blocked; + sigset_t real_blocked; + /* Restored if set_restore_sigmask() was used: */ + sigset_t saved_sigmask; + struct sigpending pending; + unsigned long sas_ss_sp; + size_t sas_ss_size; + unsigned int sas_ss_flags; + + struct callback_head *task_works; + + struct audit_context *audit_context; #ifdef CONFIG_AUDITSYSCALL - kuid_t loginuid; - unsigned int sessionid; + kuid_t loginuid; + unsigned int sessionid; #endif - struct seccomp seccomp; + struct seccomp seccomp; -/* Thread group tracking */ - u32 parent_exec_id; - u32 self_exec_id; -/* Protection of (de-)allocation: mm, files, fs, tty, keyrings, mems_allowed, - * mempolicy */ - spinlock_t alloc_lock; + /* Thread group tracking: */ + u32 parent_exec_id; + u32 self_exec_id; + + /* Protection against (de-)allocation: mm, files, fs, tty, keyrings, mems_allowed, mempolicy: */ + spinlock_t alloc_lock; /* Protection of the PI data structures: */ - raw_spinlock_t pi_lock; + raw_spinlock_t pi_lock; - struct wake_q_node wake_q; + struct wake_q_node wake_q; #ifdef CONFIG_RT_MUTEXES - /* PI waiters blocked on a rt_mutex held by this task */ - struct rb_root pi_waiters; - struct rb_node *pi_waiters_leftmost; - /* Deadlock detection and priority inheritance handling */ - struct rt_mutex_waiter *pi_blocked_on; + /* PI waiters blocked on a rt_mutex held by this task: */ + struct rb_root pi_waiters; + struct rb_node *pi_waiters_leftmost; + /* Deadlock detection and priority inheritance handling: */ + struct rt_mutex_waiter *pi_blocked_on; #endif #ifdef CONFIG_DEBUG_MUTEXES - /* mutex deadlock detection */ - struct mutex_waiter *blocked_on; + /* Mutex deadlock detection: */ + struct mutex_waiter *blocked_on; #endif + #ifdef CONFIG_TRACE_IRQFLAGS - unsigned int irq_events; - unsigned long hardirq_enable_ip; - unsigned long hardirq_disable_ip; - unsigned int hardirq_enable_event; - unsigned int hardirq_disable_event; - int hardirqs_enabled; - int hardirq_context; - unsigned long softirq_disable_ip; - unsigned long softirq_enable_ip; - unsigned int softirq_disable_event; - unsigned int softirq_enable_event; - int softirqs_enabled; - int softirq_context; + unsigned int irq_events; + unsigned long hardirq_enable_ip; + unsigned long hardirq_disable_ip; + unsigned int hardirq_enable_event; + unsigned int hardirq_disable_event; + int hardirqs_enabled; + int hardirq_context; + unsigned long softirq_disable_ip; + unsigned long softirq_enable_ip; + unsigned int softirq_disable_event; + unsigned int softirq_enable_event; + int softirqs_enabled; + int softirq_context; #endif + #ifdef CONFIG_LOCKDEP -# define MAX_LOCK_DEPTH 48UL - u64 curr_chain_key; - int lockdep_depth; - unsigned int lockdep_recursion; - struct held_lock held_locks[MAX_LOCK_DEPTH]; - gfp_t lockdep_reclaim_gfp; +# define MAX_LOCK_DEPTH 48UL + u64 curr_chain_key; + int lockdep_depth; + unsigned int lockdep_recursion; + struct held_lock held_locks[MAX_LOCK_DEPTH]; + gfp_t lockdep_reclaim_gfp; #endif + #ifdef CONFIG_UBSAN - unsigned int in_ubsan; + unsigned int in_ubsan; #endif -/* journalling filesystem info */ - void *journal_info; + /* Journalling filesystem info: */ + void *journal_info; -/* stacked block device info */ - struct bio_list *bio_list; + /* Stacked block device info: */ + struct bio_list *bio_list; #ifdef CONFIG_BLOCK -/* stack plugging */ - struct blk_plug *plug; + /* Stack plugging: */ + struct blk_plug *plug; #endif -/* VM state */ - struct reclaim_state *reclaim_state; + /* VM state: */ + struct reclaim_state *reclaim_state; - struct backing_dev_info *backing_dev_info; + struct backing_dev_info *backing_dev_info; - struct io_context *io_context; + struct io_context *io_context; - unsigned long ptrace_message; - siginfo_t *last_siginfo; /* For ptrace use. */ - struct task_io_accounting ioac; -#if defined(CONFIG_TASK_XACCT) - u64 acct_rss_mem1; /* accumulated rss usage */ - u64 acct_vm_mem1; /* accumulated virtual memory usage */ - u64 acct_timexpd; /* stime + utime since last update */ + /* Ptrace state: */ + unsigned long ptrace_message; + siginfo_t *last_siginfo; + + struct task_io_accounting ioac; +#ifdef CONFIG_TASK_XACCT + /* Accumulated RSS usage: */ + u64 acct_rss_mem1; + /* Accumulated virtual memory usage: */ + u64 acct_vm_mem1; + /* stime + utime since last update: */ + u64 acct_timexpd; #endif #ifdef CONFIG_CPUSETS - nodemask_t mems_allowed; /* Protected by alloc_lock */ - seqcount_t mems_allowed_seq; /* Seqence no to catch updates */ - int cpuset_mem_spread_rotor; - int cpuset_slab_spread_rotor; + /* Protected by ->alloc_lock: */ + nodemask_t mems_allowed; + /* Seqence number to catch updates: */ + seqcount_t mems_allowed_seq; + int cpuset_mem_spread_rotor; + int cpuset_slab_spread_rotor; #endif #ifdef CONFIG_CGROUPS - /* Control Group info protected by css_set_lock */ - struct css_set __rcu *cgroups; - /* cg_list protected by css_set_lock and tsk->alloc_lock */ - struct list_head cg_list; + /* Control Group info protected by css_set_lock: */ + struct css_set __rcu *cgroups; + /* cg_list protected by css_set_lock and tsk->alloc_lock: */ + struct list_head cg_list; #endif #ifdef CONFIG_INTEL_RDT_A - int closid; + int closid; #endif #ifdef CONFIG_FUTEX - struct robust_list_head __user *robust_list; + struct robust_list_head __user *robust_list; #ifdef CONFIG_COMPAT struct compat_robust_list_head __user *compat_robust_list; #endif - struct list_head pi_state_list; - struct futex_pi_state *pi_state_cache; + struct list_head pi_state_list; + struct futex_pi_state *pi_state_cache; #endif #ifdef CONFIG_PERF_EVENTS - struct perf_event_context *perf_event_ctxp[perf_nr_task_contexts]; - struct mutex perf_event_mutex; - struct list_head perf_event_list; + struct perf_event_context *perf_event_ctxp[perf_nr_task_contexts]; + struct mutex perf_event_mutex; + struct list_head perf_event_list; #endif #ifdef CONFIG_DEBUG_PREEMPT - unsigned long preempt_disable_ip; + unsigned long preempt_disable_ip; #endif #ifdef CONFIG_NUMA - struct mempolicy *mempolicy; /* Protected by alloc_lock */ - short il_next; - short pref_node_fork; + /* Protected by alloc_lock: */ + struct mempolicy *mempolicy; + short il_next; + short pref_node_fork; #endif #ifdef CONFIG_NUMA_BALANCING - int numa_scan_seq; - unsigned int numa_scan_period; - unsigned int numa_scan_period_max; - int numa_preferred_nid; - unsigned long numa_migrate_retry; - u64 node_stamp; /* migration stamp */ - u64 last_task_numa_placement; - u64 last_sum_exec_runtime; - struct callback_head numa_work; - - struct list_head numa_entry; - struct numa_group *numa_group; + int numa_scan_seq; + unsigned int numa_scan_period; + unsigned int numa_scan_period_max; + int numa_preferred_nid; + unsigned long numa_migrate_retry; + /* Migration stamp: */ + u64 node_stamp; + u64 last_task_numa_placement; + u64 last_sum_exec_runtime; + struct callback_head numa_work; + + struct list_head numa_entry; + struct numa_group *numa_group; /* * numa_faults is an array split into four regions: @@ -1855,8 +912,8 @@ struct task_struct { * during the current scan window. When the scan completes, the counts * in faults_memory and faults_cpu decay and these values are copied. */ - unsigned long *numa_faults; - unsigned long total_numa_faults; + unsigned long *numa_faults; + unsigned long total_numa_faults; /* * numa_faults_locality tracks if faults recorded during the last @@ -1864,208 +921,133 @@ struct task_struct { * period is adapted based on the locality of the faults with different * weights depending on whether they were shared or private faults */ - unsigned long numa_faults_locality[3]; + unsigned long numa_faults_locality[3]; - unsigned long numa_pages_migrated; + unsigned long numa_pages_migrated; #endif /* CONFIG_NUMA_BALANCING */ -#ifdef CONFIG_ARCH_WANT_BATCHED_UNMAP_TLB_FLUSH - struct tlbflush_unmap_batch tlb_ubc; -#endif + struct tlbflush_unmap_batch tlb_ubc; - struct rcu_head rcu; + struct rcu_head rcu; - /* - * cache last used pipe for splice - */ - struct pipe_inode_info *splice_pipe; + /* Cache last used pipe for splice(): */ + struct pipe_inode_info *splice_pipe; - struct page_frag task_frag; + struct page_frag task_frag; -#ifdef CONFIG_TASK_DELAY_ACCT - struct task_delay_info *delays; +#ifdef CONFIG_TASK_DELAY_ACCT + struct task_delay_info *delays; #endif + #ifdef CONFIG_FAULT_INJECTION - int make_it_fail; + int make_it_fail; #endif /* - * when (nr_dirtied >= nr_dirtied_pause), it's time to call - * balance_dirty_pages() for some dirty throttling pause + * When (nr_dirtied >= nr_dirtied_pause), it's time to call + * balance_dirty_pages() for a dirty throttling pause: */ - int nr_dirtied; - int nr_dirtied_pause; - unsigned long dirty_paused_when; /* start of a write-and-pause period */ + int nr_dirtied; + int nr_dirtied_pause; + /* Start of a write-and-pause period: */ + unsigned long dirty_paused_when; #ifdef CONFIG_LATENCYTOP - int latency_record_count; - struct latency_record latency_record[LT_SAVECOUNT]; + int latency_record_count; + struct latency_record latency_record[LT_SAVECOUNT]; #endif /* - * time slack values; these are used to round up poll() and + * Time slack values; these are used to round up poll() and * select() etc timeout values. These are in nanoseconds. */ - u64 timer_slack_ns; - u64 default_timer_slack_ns; + u64 timer_slack_ns; + u64 default_timer_slack_ns; #ifdef CONFIG_KASAN - unsigned int kasan_depth; + unsigned int kasan_depth; #endif + #ifdef CONFIG_FUNCTION_GRAPH_TRACER - /* Index of current stored address in ret_stack */ - int curr_ret_stack; - /* Stack of return addresses for return function tracing */ - struct ftrace_ret_stack *ret_stack; - /* time stamp for last schedule */ - unsigned long long ftrace_timestamp; + /* Index of current stored address in ret_stack: */ + int curr_ret_stack; + + /* Stack of return addresses for return function tracing: */ + struct ftrace_ret_stack *ret_stack; + + /* Timestamp for last schedule: */ + unsigned long long ftrace_timestamp; + /* * Number of functions that haven't been traced - * because of depth overrun. + * because of depth overrun: */ - atomic_t trace_overrun; - /* Pause for the tracing */ - atomic_t tracing_graph_pause; + atomic_t trace_overrun; + + /* Pause tracing: */ + atomic_t tracing_graph_pause; #endif + #ifdef CONFIG_TRACING - /* state flags for use by tracers */ - unsigned long trace; - /* bitmask and counter of trace recursion */ - unsigned long trace_recursion; + /* State flags for use by tracers: */ + unsigned long trace; + + /* Bitmask and counter of trace recursion: */ + unsigned long trace_recursion; #endif /* CONFIG_TRACING */ + #ifdef CONFIG_KCOV - /* Coverage collection mode enabled for this task (0 if disabled). */ - enum kcov_mode kcov_mode; - /* Size of the kcov_area. */ - unsigned kcov_size; - /* Buffer for coverage collection. */ - void *kcov_area; - /* kcov desciptor wired with this task or NULL. */ - struct kcov *kcov; + /* Coverage collection mode enabled for this task (0 if disabled): */ + enum kcov_mode kcov_mode; + + /* Size of the kcov_area: */ + unsigned int kcov_size; + + /* Buffer for coverage collection: */ + void *kcov_area; + + /* KCOV descriptor wired with this task or NULL: */ + struct kcov *kcov; #endif + #ifdef CONFIG_MEMCG - struct mem_cgroup *memcg_in_oom; - gfp_t memcg_oom_gfp_mask; - int memcg_oom_order; + struct mem_cgroup *memcg_in_oom; + gfp_t memcg_oom_gfp_mask; + int memcg_oom_order; - /* number of pages to reclaim on returning to userland */ - unsigned int memcg_nr_pages_over_high; + /* Number of pages to reclaim on returning to userland: */ + unsigned int memcg_nr_pages_over_high; #endif + #ifdef CONFIG_UPROBES - struct uprobe_task *utask; + struct uprobe_task *utask; #endif #if defined(CONFIG_BCACHE) || defined(CONFIG_BCACHE_MODULE) - unsigned int sequential_io; - unsigned int sequential_io_avg; + unsigned int sequential_io; + unsigned int sequential_io_avg; #endif #ifdef CONFIG_DEBUG_ATOMIC_SLEEP - unsigned long task_state_change; + unsigned long task_state_change; #endif - int pagefault_disabled; + int pagefault_disabled; #ifdef CONFIG_MMU - struct task_struct *oom_reaper_list; + struct task_struct *oom_reaper_list; #endif #ifdef CONFIG_VMAP_STACK - struct vm_struct *stack_vm_area; + struct vm_struct *stack_vm_area; #endif #ifdef CONFIG_THREAD_INFO_IN_TASK - /* A live task holds one reference. */ - atomic_t stack_refcount; + /* A live task holds one reference: */ + atomic_t stack_refcount; #endif -/* CPU-specific state of this task */ - struct thread_struct thread; -/* - * WARNING: on x86, 'thread_struct' contains a variable-sized - * structure. It *MUST* be at the end of 'task_struct'. - * - * Do not put anything below here! - */ -}; - -#ifdef CONFIG_ARCH_WANTS_DYNAMIC_TASK_STRUCT -extern int arch_task_struct_size __read_mostly; -#else -# define arch_task_struct_size (sizeof(struct task_struct)) -#endif - -#ifdef CONFIG_VMAP_STACK -static inline struct vm_struct *task_stack_vm_area(const struct task_struct *t) -{ - return t->stack_vm_area; -} -#else -static inline struct vm_struct *task_stack_vm_area(const struct task_struct *t) -{ - return NULL; -} -#endif - -/* Future-safe accessor for struct task_struct's cpus_allowed. */ -#define tsk_cpus_allowed(tsk) (&(tsk)->cpus_allowed) - -static inline int tsk_nr_cpus_allowed(struct task_struct *p) -{ - return p->nr_cpus_allowed; -} - -#define TNF_MIGRATED 0x01 -#define TNF_NO_GROUP 0x02 -#define TNF_SHARED 0x04 -#define TNF_FAULT_LOCAL 0x08 -#define TNF_MIGRATE_FAIL 0x10 - -static inline bool in_vfork(struct task_struct *tsk) -{ - bool ret; + /* CPU-specific state of this task: */ + struct thread_struct thread; /* - * need RCU to access ->real_parent if CLONE_VM was used along with - * CLONE_PARENT. - * - * We check real_parent->mm == tsk->mm because CLONE_VFORK does not - * imply CLONE_VM - * - * CLONE_VFORK can be used with CLONE_PARENT/CLONE_THREAD and thus - * ->real_parent is not necessarily the task doing vfork(), so in - * theory we can't rely on task_lock() if we want to dereference it. + * WARNING: on x86, 'thread_struct' contains a variable-sized + * structure. It *MUST* be at the end of 'task_struct'. * - * And in this case we can't trust the real_parent->mm == tsk->mm - * check, it can be false negative. But we do not care, if init or - * another oom-unkillable task does this it should blame itself. + * Do not put anything below here! */ - rcu_read_lock(); - ret = tsk->vfork_done && tsk->real_parent->mm == tsk->mm; - rcu_read_unlock(); - - return ret; -} - -#ifdef CONFIG_NUMA_BALANCING -extern void task_numa_fault(int last_node, int node, int pages, int flags); -extern pid_t task_numa_group_id(struct task_struct *p); -extern void set_numabalancing_state(bool enabled); -extern void task_numa_free(struct task_struct *p); -extern bool should_numa_migrate_memory(struct task_struct *p, struct page *page, - int src_nid, int dst_cpu); -#else -static inline void task_numa_fault(int last_node, int node, int pages, - int flags) -{ -} -static inline pid_t task_numa_group_id(struct task_struct *p) -{ - return 0; -} -static inline void set_numabalancing_state(bool enabled) -{ -} -static inline void task_numa_free(struct task_struct *p) -{ -} -static inline bool should_numa_migrate_memory(struct task_struct *p, - struct page *page, int src_nid, int dst_cpu) -{ - return true; -} -#endif +}; static inline struct pid *task_pid(struct task_struct *task) { @@ -2078,7 +1060,7 @@ static inline struct pid *task_tgid(struct task_struct *task) } /* - * Without tasklist or rcu lock it is not safe to dereference + * Without tasklist or RCU lock it is not safe to dereference * the result of task_pgrp/task_session even if task == current, * we can race with another thread doing sys_setsid/sys_setpgid. */ @@ -2092,8 +1074,6 @@ static inline struct pid *task_session(struct task_struct *task) return task->group_leader->pids[PIDTYPE_SID].pid; } -struct pid_namespace; - /* * the helpers to get the task's different pids as they are seen * from various namespaces @@ -2107,16 +1087,14 @@ struct pid_namespace; * * see also pid_nr() etc in include/linux/pid.h */ -pid_t __task_pid_nr_ns(struct task_struct *task, enum pid_type type, - struct pid_namespace *ns); +pid_t __task_pid_nr_ns(struct task_struct *task, enum pid_type type, struct pid_namespace *ns); static inline pid_t task_pid_nr(struct task_struct *tsk) { return tsk->pid; } -static inline pid_t task_pid_nr_ns(struct task_struct *tsk, - struct pid_namespace *ns) +static inline pid_t task_pid_nr_ns(struct task_struct *tsk, struct pid_namespace *ns) { return __task_pid_nr_ns(tsk, PIDTYPE_PID, ns); } @@ -2132,15 +1110,28 @@ static inline pid_t task_tgid_nr(struct task_struct *tsk) return tsk->tgid; } -pid_t task_tgid_nr_ns(struct task_struct *tsk, struct pid_namespace *ns); +extern pid_t task_tgid_nr_ns(struct task_struct *tsk, struct pid_namespace *ns); static inline pid_t task_tgid_vnr(struct task_struct *tsk) { return pid_vnr(task_tgid(tsk)); } +/** + * pid_alive - check that a task structure is not stale + * @p: Task structure to be checked. + * + * Test if a process is not yet dead (at most zombie state) + * If pid_alive fails, then pointers within the task structure + * can be stale and must not be dereferenced. + * + * Return: 1 if the process is alive. 0 otherwise. + */ +static inline int pid_alive(const struct task_struct *p) +{ + return p->pids[PIDTYPE_PID].pid != NULL; +} -static inline int pid_alive(const struct task_struct *p); static inline pid_t task_ppid_nr_ns(const struct task_struct *tsk, struct pid_namespace *ns) { pid_t pid = 0; @@ -2158,8 +1149,7 @@ static inline pid_t task_ppid_nr(const struct task_struct *tsk) return task_ppid_nr_ns(tsk, &init_pid_ns); } -static inline pid_t task_pgrp_nr_ns(struct task_struct *tsk, - struct pid_namespace *ns) +static inline pid_t task_pgrp_nr_ns(struct task_struct *tsk, struct pid_namespace *ns) { return __task_pid_nr_ns(tsk, PIDTYPE_PGID, ns); } @@ -2170,8 +1160,7 @@ static inline pid_t task_pgrp_vnr(struct task_struct *tsk) } -static inline pid_t task_session_nr_ns(struct task_struct *tsk, - struct pid_namespace *ns) +static inline pid_t task_session_nr_ns(struct task_struct *tsk, struct pid_namespace *ns) { return __task_pid_nr_ns(tsk, PIDTYPE_SID, ns); } @@ -2181,28 +1170,13 @@ static inline pid_t task_session_vnr(struct task_struct *tsk) return __task_pid_nr_ns(tsk, PIDTYPE_SID, NULL); } -/* obsolete, do not use */ +/* Obsolete, do not use: */ static inline pid_t task_pgrp_nr(struct task_struct *tsk) { return task_pgrp_nr_ns(tsk, &init_pid_ns); } /** - * pid_alive - check that a task structure is not stale - * @p: Task structure to be checked. - * - * Test if a process is not yet dead (at most zombie state) - * If pid_alive fails, then pointers within the task structure - * can be stale and must not be dereferenced. - * - * Return: 1 if the process is alive. 0 otherwise. - */ -static inline int pid_alive(const struct task_struct *p) -{ - return p->pids[PIDTYPE_PID].pid != NULL; -} - -/** * is_global_init - check if a task structure is init. Since init * is free to have sub-threads we need to check tgid. * @tsk: Task structure to be checked. @@ -2218,89 +1192,37 @@ static inline int is_global_init(struct task_struct *tsk) extern struct pid *cad_pid; -extern void free_task(struct task_struct *tsk); -#define get_task_struct(tsk) do { atomic_inc(&(tsk)->usage); } while(0) - -extern void __put_task_struct(struct task_struct *t); - -static inline void put_task_struct(struct task_struct *t) -{ - if (atomic_dec_and_test(&t->usage)) - __put_task_struct(t); -} - -struct task_struct *task_rcu_dereference(struct task_struct **ptask); -struct task_struct *try_get_task_struct(struct task_struct **ptask); - -#ifdef CONFIG_VIRT_CPU_ACCOUNTING_GEN -extern void task_cputime(struct task_struct *t, - u64 *utime, u64 *stime); -extern u64 task_gtime(struct task_struct *t); -#else -static inline void task_cputime(struct task_struct *t, - u64 *utime, u64 *stime) -{ - *utime = t->utime; - *stime = t->stime; -} - -static inline u64 task_gtime(struct task_struct *t) -{ - return t->gtime; -} -#endif - -#ifdef CONFIG_ARCH_HAS_SCALED_CPUTIME -static inline void task_cputime_scaled(struct task_struct *t, - u64 *utimescaled, - u64 *stimescaled) -{ - *utimescaled = t->utimescaled; - *stimescaled = t->stimescaled; -} -#else -static inline void task_cputime_scaled(struct task_struct *t, - u64 *utimescaled, - u64 *stimescaled) -{ - task_cputime(t, utimescaled, stimescaled); -} -#endif - -extern void task_cputime_adjusted(struct task_struct *p, u64 *ut, u64 *st); -extern void thread_group_cputime_adjusted(struct task_struct *p, u64 *ut, u64 *st); - /* * Per process flags */ -#define PF_IDLE 0x00000002 /* I am an IDLE thread */ -#define PF_EXITING 0x00000004 /* getting shut down */ -#define PF_EXITPIDONE 0x00000008 /* pi exit done on shut down */ -#define PF_VCPU 0x00000010 /* I'm a virtual CPU */ -#define PF_WQ_WORKER 0x00000020 /* I'm a workqueue worker */ -#define PF_FORKNOEXEC 0x00000040 /* forked but didn't exec */ -#define PF_MCE_PROCESS 0x00000080 /* process policy on mce errors */ -#define PF_SUPERPRIV 0x00000100 /* used super-user privileges */ -#define PF_DUMPCORE 0x00000200 /* dumped core */ -#define PF_SIGNALED 0x00000400 /* killed by a signal */ -#define PF_MEMALLOC 0x00000800 /* Allocating memory */ -#define PF_NPROC_EXCEEDED 0x00001000 /* set_user noticed that RLIMIT_NPROC was exceeded */ -#define PF_USED_MATH 0x00002000 /* if unset the fpu must be initialized before use */ -#define PF_USED_ASYNC 0x00004000 /* used async_schedule*(), used by module init */ -#define PF_NOFREEZE 0x00008000 /* this thread should not be frozen */ -#define PF_FROZEN 0x00010000 /* frozen for system suspend */ -#define PF_FSTRANS 0x00020000 /* inside a filesystem transaction */ -#define PF_KSWAPD 0x00040000 /* I am kswapd */ -#define PF_MEMALLOC_NOIO 0x00080000 /* Allocating memory without IO involved */ -#define PF_LESS_THROTTLE 0x00100000 /* Throttle me less: I clean memory */ -#define PF_KTHREAD 0x00200000 /* I am a kernel thread */ -#define PF_RANDOMIZE 0x00400000 /* randomize virtual address space */ -#define PF_SWAPWRITE 0x00800000 /* Allowed to write to swap */ -#define PF_NO_SETAFFINITY 0x04000000 /* Userland is not allowed to meddle with cpus_allowed */ -#define PF_MCE_EARLY 0x08000000 /* Early kill for mce process policy */ -#define PF_MUTEX_TESTER 0x20000000 /* Thread belongs to the rt mutex tester */ -#define PF_FREEZER_SKIP 0x40000000 /* Freezer should not count it as freezable */ -#define PF_SUSPEND_TASK 0x80000000 /* this thread called freeze_processes and should not be frozen */ +#define PF_IDLE 0x00000002 /* I am an IDLE thread */ +#define PF_EXITING 0x00000004 /* Getting shut down */ +#define PF_EXITPIDONE 0x00000008 /* PI exit done on shut down */ +#define PF_VCPU 0x00000010 /* I'm a virtual CPU */ +#define PF_WQ_WORKER 0x00000020 /* I'm a workqueue worker */ +#define PF_FORKNOEXEC 0x00000040 /* Forked but didn't exec */ +#define PF_MCE_PROCESS 0x00000080 /* Process policy on mce errors */ +#define PF_SUPERPRIV 0x00000100 /* Used super-user privileges */ +#define PF_DUMPCORE 0x00000200 /* Dumped core */ +#define PF_SIGNALED 0x00000400 /* Killed by a signal */ +#define PF_MEMALLOC 0x00000800 /* Allocating memory */ +#define PF_NPROC_EXCEEDED 0x00001000 /* set_user() noticed that RLIMIT_NPROC was exceeded */ +#define PF_USED_MATH 0x00002000 /* If unset the fpu must be initialized before use */ +#define PF_USED_ASYNC 0x00004000 /* Used async_schedule*(), used by module init */ +#define PF_NOFREEZE 0x00008000 /* This thread should not be frozen */ +#define PF_FROZEN 0x00010000 /* Frozen for system suspend */ +#define PF_FSTRANS 0x00020000 /* Inside a filesystem transaction */ +#define PF_KSWAPD 0x00040000 /* I am kswapd */ +#define PF_MEMALLOC_NOIO 0x00080000 /* Allocating memory without IO involved */ +#define PF_LESS_THROTTLE 0x00100000 /* Throttle me less: I clean memory */ +#define PF_KTHREAD 0x00200000 /* I am a kernel thread */ +#define PF_RANDOMIZE 0x00400000 /* Randomize virtual address space */ +#define PF_SWAPWRITE 0x00800000 /* Allowed to write to swap */ +#define PF_NO_SETAFFINITY 0x04000000 /* Userland is not allowed to meddle with cpus_allowed */ +#define PF_MCE_EARLY 0x08000000 /* Early kill for mce process policy */ +#define PF_MUTEX_TESTER 0x20000000 /* Thread belongs to the rt mutex tester */ +#define PF_FREEZER_SKIP 0x40000000 /* Freezer should not count it as freezable */ +#define PF_SUSPEND_TASK 0x80000000 /* This thread called freeze_processes() and should not be frozen */ /* * Only the _current_ task can read/write to tsk->flags, but other @@ -2313,55 +1235,38 @@ extern void thread_group_cputime_adjusted(struct task_struct *p, u64 *ut, u64 *s * child is not running and in turn not changing child->flags * at the same time the parent does it. */ -#define clear_stopped_child_used_math(child) do { (child)->flags &= ~PF_USED_MATH; } while (0) -#define set_stopped_child_used_math(child) do { (child)->flags |= PF_USED_MATH; } while (0) -#define clear_used_math() clear_stopped_child_used_math(current) -#define set_used_math() set_stopped_child_used_math(current) +#define clear_stopped_child_used_math(child) do { (child)->flags &= ~PF_USED_MATH; } while (0) +#define set_stopped_child_used_math(child) do { (child)->flags |= PF_USED_MATH; } while (0) +#define clear_used_math() clear_stopped_child_used_math(current) +#define set_used_math() set_stopped_child_used_math(current) + #define conditional_stopped_child_used_math(condition, child) \ do { (child)->flags &= ~PF_USED_MATH, (child)->flags |= (condition) ? PF_USED_MATH : 0; } while (0) -#define conditional_used_math(condition) \ - conditional_stopped_child_used_math(condition, current) -#define copy_to_stopped_child_used_math(child) \ - do { (child)->flags &= ~PF_USED_MATH, (child)->flags |= current->flags & PF_USED_MATH; } while (0) -/* NOTE: this will return 0 or PF_USED_MATH, it will never return 1 */ -#define tsk_used_math(p) ((p)->flags & PF_USED_MATH) -#define used_math() tsk_used_math(current) -/* __GFP_IO isn't allowed if PF_MEMALLOC_NOIO is set in current->flags - * __GFP_FS is also cleared as it implies __GFP_IO. - */ -static inline gfp_t memalloc_noio_flags(gfp_t flags) -{ - if (unlikely(current->flags & PF_MEMALLOC_NOIO)) - flags &= ~(__GFP_IO | __GFP_FS); - return flags; -} +#define conditional_used_math(condition) conditional_stopped_child_used_math(condition, current) -static inline unsigned int memalloc_noio_save(void) -{ - unsigned int flags = current->flags & PF_MEMALLOC_NOIO; - current->flags |= PF_MEMALLOC_NOIO; - return flags; -} +#define copy_to_stopped_child_used_math(child) \ + do { (child)->flags &= ~PF_USED_MATH, (child)->flags |= current->flags & PF_USED_MATH; } while (0) -static inline void memalloc_noio_restore(unsigned int flags) -{ - current->flags = (current->flags & ~PF_MEMALLOC_NOIO) | flags; -} +/* NOTE: this will return 0 or PF_USED_MATH, it will never return 1 */ +#define tsk_used_math(p) ((p)->flags & PF_USED_MATH) +#define used_math() tsk_used_math(current) /* Per-process atomic flags. */ -#define PFA_NO_NEW_PRIVS 0 /* May not gain new privileges. */ -#define PFA_SPREAD_PAGE 1 /* Spread page cache over cpuset */ -#define PFA_SPREAD_SLAB 2 /* Spread some slab caches over cpuset */ -#define PFA_LMK_WAITING 3 /* Lowmemorykiller is waiting */ +#define PFA_NO_NEW_PRIVS 0 /* May not gain new privileges. */ +#define PFA_SPREAD_PAGE 1 /* Spread page cache over cpuset */ +#define PFA_SPREAD_SLAB 2 /* Spread some slab caches over cpuset */ +#define PFA_LMK_WAITING 3 /* Lowmemorykiller is waiting */ #define TASK_PFA_TEST(name, func) \ static inline bool task_##func(struct task_struct *p) \ { return test_bit(PFA_##name, &p->atomic_flags); } + #define TASK_PFA_SET(name, func) \ static inline void task_set_##func(struct task_struct *p) \ { set_bit(PFA_##name, &p->atomic_flags); } + #define TASK_PFA_CLEAR(name, func) \ static inline void task_clear_##func(struct task_struct *p) \ { clear_bit(PFA_##name, &p->atomic_flags); } @@ -2380,75 +1285,23 @@ TASK_PFA_CLEAR(SPREAD_SLAB, spread_slab) TASK_PFA_TEST(LMK_WAITING, lmk_waiting) TASK_PFA_SET(LMK_WAITING, lmk_waiting) -/* - * task->jobctl flags - */ -#define JOBCTL_STOP_SIGMASK 0xffff /* signr of the last group stop */ - -#define JOBCTL_STOP_DEQUEUED_BIT 16 /* stop signal dequeued */ -#define JOBCTL_STOP_PENDING_BIT 17 /* task should stop for group stop */ -#define JOBCTL_STOP_CONSUME_BIT 18 /* consume group stop count */ -#define JOBCTL_TRAP_STOP_BIT 19 /* trap for STOP */ -#define JOBCTL_TRAP_NOTIFY_BIT 20 /* trap for NOTIFY */ -#define JOBCTL_TRAPPING_BIT 21 /* switching to TRACED */ -#define JOBCTL_LISTENING_BIT 22 /* ptracer is listening for events */ - -#define JOBCTL_STOP_DEQUEUED (1UL << JOBCTL_STOP_DEQUEUED_BIT) -#define JOBCTL_STOP_PENDING (1UL << JOBCTL_STOP_PENDING_BIT) -#define JOBCTL_STOP_CONSUME (1UL << JOBCTL_STOP_CONSUME_BIT) -#define JOBCTL_TRAP_STOP (1UL << JOBCTL_TRAP_STOP_BIT) -#define JOBCTL_TRAP_NOTIFY (1UL << JOBCTL_TRAP_NOTIFY_BIT) -#define JOBCTL_TRAPPING (1UL << JOBCTL_TRAPPING_BIT) -#define JOBCTL_LISTENING (1UL << JOBCTL_LISTENING_BIT) - -#define JOBCTL_TRAP_MASK (JOBCTL_TRAP_STOP | JOBCTL_TRAP_NOTIFY) -#define JOBCTL_PENDING_MASK (JOBCTL_STOP_PENDING | JOBCTL_TRAP_MASK) - -extern bool task_set_jobctl_pending(struct task_struct *task, - unsigned long mask); -extern void task_clear_jobctl_trapping(struct task_struct *task); -extern void task_clear_jobctl_pending(struct task_struct *task, - unsigned long mask); - -static inline void rcu_copy_process(struct task_struct *p) -{ -#ifdef CONFIG_PREEMPT_RCU - p->rcu_read_lock_nesting = 0; - p->rcu_read_unlock_special.s = 0; - p->rcu_blocked_node = NULL; - INIT_LIST_HEAD(&p->rcu_node_entry); -#endif /* #ifdef CONFIG_PREEMPT_RCU */ -#ifdef CONFIG_TASKS_RCU - p->rcu_tasks_holdout = false; - INIT_LIST_HEAD(&p->rcu_tasks_holdout_list); - p->rcu_tasks_idle_cpu = -1; -#endif /* #ifdef CONFIG_TASKS_RCU */ -} - -static inline void tsk_restore_flags(struct task_struct *task, - unsigned long orig_flags, unsigned long flags) +static inline void +tsk_restore_flags(struct task_struct *task, unsigned long orig_flags, unsigned long flags) { task->flags &= ~flags; task->flags |= orig_flags & flags; } -extern int cpuset_cpumask_can_shrink(const struct cpumask *cur, - const struct cpumask *trial); -extern int task_can_attach(struct task_struct *p, - const struct cpumask *cs_cpus_allowed); +extern int cpuset_cpumask_can_shrink(const struct cpumask *cur, const struct cpumask *trial); +extern int task_can_attach(struct task_struct *p, const struct cpumask *cs_cpus_allowed); #ifdef CONFIG_SMP -extern void do_set_cpus_allowed(struct task_struct *p, - const struct cpumask *new_mask); - -extern int set_cpus_allowed_ptr(struct task_struct *p, - const struct cpumask *new_mask); +extern void do_set_cpus_allowed(struct task_struct *p, const struct cpumask *new_mask); +extern int set_cpus_allowed_ptr(struct task_struct *p, const struct cpumask *new_mask); #else -static inline void do_set_cpus_allowed(struct task_struct *p, - const struct cpumask *new_mask) +static inline void do_set_cpus_allowed(struct task_struct *p, const struct cpumask *new_mask) { } -static inline int set_cpus_allowed_ptr(struct task_struct *p, - const struct cpumask *new_mask) +static inline int set_cpus_allowed_ptr(struct task_struct *p, const struct cpumask *new_mask) { if (!cpumask_test_cpu(0, new_mask)) return -EINVAL; @@ -2456,165 +1309,14 @@ static inline int set_cpus_allowed_ptr(struct task_struct *p, } #endif -#ifdef CONFIG_NO_HZ_COMMON -void calc_load_enter_idle(void); -void calc_load_exit_idle(void); -#else -static inline void calc_load_enter_idle(void) { } -static inline void calc_load_exit_idle(void) { } -#endif /* CONFIG_NO_HZ_COMMON */ - #ifndef cpu_relax_yield #define cpu_relax_yield() cpu_relax() #endif -/* - * Do not use outside of architecture code which knows its limitations. - * - * sched_clock() has no promise of monotonicity or bounded drift between - * CPUs, use (which you should not) requires disabling IRQs. - * - * Please use one of the three interfaces below. - */ -extern unsigned long long notrace sched_clock(void); -/* - * See the comment in kernel/sched/clock.c - */ -extern u64 running_clock(void); -extern u64 sched_clock_cpu(int cpu); - - -extern void sched_clock_init(void); - -#ifndef CONFIG_HAVE_UNSTABLE_SCHED_CLOCK -static inline void sched_clock_init_late(void) -{ -} - -static inline void sched_clock_tick(void) -{ -} - -static inline void clear_sched_clock_stable(void) -{ -} - -static inline void sched_clock_idle_sleep_event(void) -{ -} - -static inline void sched_clock_idle_wakeup_event(u64 delta_ns) -{ -} - -static inline u64 cpu_clock(int cpu) -{ - return sched_clock(); -} - -static inline u64 local_clock(void) -{ - return sched_clock(); -} -#else -extern void sched_clock_init_late(void); -/* - * Architectures can set this to 1 if they have specified - * CONFIG_HAVE_UNSTABLE_SCHED_CLOCK in their arch Kconfig, - * but then during bootup it turns out that sched_clock() - * is reliable after all: - */ -extern int sched_clock_stable(void); -extern void clear_sched_clock_stable(void); - -extern void sched_clock_tick(void); -extern void sched_clock_idle_sleep_event(void); -extern void sched_clock_idle_wakeup_event(u64 delta_ns); - -/* - * As outlined in clock.c, provides a fast, high resolution, nanosecond - * time source that is monotonic per cpu argument and has bounded drift - * between cpus. - * - * ######################### BIG FAT WARNING ########################## - * # when comparing cpu_clock(i) to cpu_clock(j) for i != j, time can # - * # go backwards !! # - * #################################################################### - */ -static inline u64 cpu_clock(int cpu) -{ - return sched_clock_cpu(cpu); -} - -static inline u64 local_clock(void) -{ - return sched_clock_cpu(raw_smp_processor_id()); -} -#endif - -#ifdef CONFIG_IRQ_TIME_ACCOUNTING -/* - * An i/f to runtime opt-in for irq time accounting based off of sched_clock. - * The reason for this explicit opt-in is not to have perf penalty with - * slow sched_clocks. - */ -extern void enable_sched_clock_irqtime(void); -extern void disable_sched_clock_irqtime(void); -#else -static inline void enable_sched_clock_irqtime(void) {} -static inline void disable_sched_clock_irqtime(void) {} -#endif - -extern unsigned long long -task_sched_runtime(struct task_struct *task); - -/* sched_exec is called by processes performing an exec */ -#ifdef CONFIG_SMP -extern void sched_exec(void); -#else -#define sched_exec() {} -#endif - -extern void sched_clock_idle_sleep_event(void); -extern void sched_clock_idle_wakeup_event(u64 delta_ns); - -#ifdef CONFIG_HOTPLUG_CPU -extern void idle_task_exit(void); -#else -static inline void idle_task_exit(void) {} -#endif - -#if defined(CONFIG_NO_HZ_COMMON) && defined(CONFIG_SMP) -extern void wake_up_nohz_cpu(int cpu); -#else -static inline void wake_up_nohz_cpu(int cpu) { } -#endif - -#ifdef CONFIG_NO_HZ_FULL -extern u64 scheduler_tick_max_deferment(void); -#endif - -#ifdef CONFIG_SCHED_AUTOGROUP -extern void sched_autogroup_create_attach(struct task_struct *p); -extern void sched_autogroup_detach(struct task_struct *p); -extern void sched_autogroup_fork(struct signal_struct *sig); -extern void sched_autogroup_exit(struct signal_struct *sig); -extern void sched_autogroup_exit_task(struct task_struct *p); -#ifdef CONFIG_PROC_FS -extern void proc_sched_autogroup_show_task(struct task_struct *p, struct seq_file *m); -extern int proc_sched_autogroup_set_nice(struct task_struct *p, int nice); -#endif -#else -static inline void sched_autogroup_create_attach(struct task_struct *p) { } -static inline void sched_autogroup_detach(struct task_struct *p) { } -static inline void sched_autogroup_fork(struct signal_struct *sig) { } -static inline void sched_autogroup_exit(struct signal_struct *sig) { } -static inline void sched_autogroup_exit_task(struct task_struct *p) { } -#endif - extern int yield_to(struct task_struct *p, bool preempt); extern void set_user_nice(struct task_struct *p, long nice); extern int task_prio(const struct task_struct *p); + /** * task_nice - return the nice value of a given task. * @p: the task in question. @@ -2625,16 +1327,15 @@ static inline int task_nice(const struct task_struct *p) { return PRIO_TO_NICE((p)->static_prio); } + extern int can_nice(const struct task_struct *p, const int nice); extern int task_curr(const struct task_struct *p); extern int idle_cpu(int cpu); -extern int sched_setscheduler(struct task_struct *, int, - const struct sched_param *); -extern int sched_setscheduler_nocheck(struct task_struct *, int, - const struct sched_param *); -extern int sched_setattr(struct task_struct *, - const struct sched_attr *); +extern int sched_setscheduler(struct task_struct *, int, const struct sched_param *); +extern int sched_setscheduler_nocheck(struct task_struct *, int, const struct sched_param *); +extern int sched_setattr(struct task_struct *, const struct sched_attr *); extern struct task_struct *idle_task(int cpu); + /** * is_idle_task - is the specified task an idle task? * @p: the task in question. @@ -2645,6 +1346,7 @@ static inline bool is_idle_task(const struct task_struct *p) { return !!(p->flags & PF_IDLE); } + extern struct task_struct *curr_task(int cpu); extern void ia64_set_curr_task(int cpu, struct task_struct *p); @@ -2657,23 +1359,15 @@ union thread_union { unsigned long stack[THREAD_SIZE/sizeof(long)]; }; -#ifndef __HAVE_ARCH_KSTACK_END -static inline int kstack_end(void *addr) +#ifdef CONFIG_THREAD_INFO_IN_TASK +static inline struct thread_info *task_thread_info(struct task_struct *task) { - /* Reliable end of stack detection: - * Some APM bios versions misalign the stack - */ - return !(((unsigned long)addr+sizeof(void*)-1) & (THREAD_SIZE-sizeof(void*))); + return &task->thread_info; } +#elif !defined(__HAVE_THREAD_FUNCTIONS) +# define task_thread_info(task) ((struct thread_info *)(task)->stack) #endif -extern union thread_union init_thread_union; -extern struct task_struct init_task; - -extern struct mm_struct init_mm; - -extern struct pid_namespace init_pid_ns; - /* * find a task by one of its numerical ids * @@ -2686,365 +1380,25 @@ extern struct pid_namespace init_pid_ns; */ extern struct task_struct *find_task_by_vpid(pid_t nr); -extern struct task_struct *find_task_by_pid_ns(pid_t nr, - struct pid_namespace *ns); - -/* per-UID process charging. */ -extern struct user_struct * alloc_uid(kuid_t); -static inline struct user_struct *get_uid(struct user_struct *u) -{ - atomic_inc(&u->__count); - return u; -} -extern void free_uid(struct user_struct *); - -#include <asm/current.h> - -extern void xtime_update(unsigned long ticks); +extern struct task_struct *find_task_by_pid_ns(pid_t nr, struct pid_namespace *ns); extern int wake_up_state(struct task_struct *tsk, unsigned int state); extern int wake_up_process(struct task_struct *tsk); extern void wake_up_new_task(struct task_struct *tsk); -#ifdef CONFIG_SMP - extern void kick_process(struct task_struct *tsk); -#else - static inline void kick_process(struct task_struct *tsk) { } -#endif -extern int sched_fork(unsigned long clone_flags, struct task_struct *p); -extern void sched_dead(struct task_struct *p); - -extern void proc_caches_init(void); -extern void flush_signals(struct task_struct *); -extern void ignore_signals(struct task_struct *); -extern void flush_signal_handlers(struct task_struct *, int force_default); -extern int dequeue_signal(struct task_struct *tsk, sigset_t *mask, siginfo_t *info); - -static inline int kernel_dequeue_signal(siginfo_t *info) -{ - struct task_struct *tsk = current; - siginfo_t __info; - int ret; - - spin_lock_irq(&tsk->sighand->siglock); - ret = dequeue_signal(tsk, &tsk->blocked, info ?: &__info); - spin_unlock_irq(&tsk->sighand->siglock); - - return ret; -} - -static inline void kernel_signal_stop(void) -{ - spin_lock_irq(¤t->sighand->siglock); - if (current->jobctl & JOBCTL_STOP_DEQUEUED) - __set_current_state(TASK_STOPPED); - spin_unlock_irq(¤t->sighand->siglock); - - schedule(); -} - -extern void release_task(struct task_struct * p); -extern int send_sig_info(int, struct siginfo *, struct task_struct *); -extern int force_sigsegv(int, struct task_struct *); -extern int force_sig_info(int, struct siginfo *, struct task_struct *); -extern int __kill_pgrp_info(int sig, struct siginfo *info, struct pid *pgrp); -extern int kill_pid_info(int sig, struct siginfo *info, struct pid *pid); -extern int kill_pid_info_as_cred(int, struct siginfo *, struct pid *, - const struct cred *, u32); -extern int kill_pgrp(struct pid *pid, int sig, int priv); -extern int kill_pid(struct pid *pid, int sig, int priv); -extern int kill_proc_info(int, struct siginfo *, pid_t); -extern __must_check bool do_notify_parent(struct task_struct *, int); -extern void __wake_up_parent(struct task_struct *p, struct task_struct *parent); -extern void force_sig(int, struct task_struct *); -extern int send_sig(int, struct task_struct *, int); -extern int zap_other_threads(struct task_struct *p); -extern struct sigqueue *sigqueue_alloc(void); -extern void sigqueue_free(struct sigqueue *); -extern int send_sigqueue(struct sigqueue *, struct task_struct *, int group); -extern int do_sigaction(int, struct k_sigaction *, struct k_sigaction *); - -#ifdef TIF_RESTORE_SIGMASK -/* - * Legacy restore_sigmask accessors. These are inefficient on - * SMP architectures because they require atomic operations. - */ - -/** - * set_restore_sigmask() - make sure saved_sigmask processing gets done - * - * This sets TIF_RESTORE_SIGMASK and ensures that the arch signal code - * will run before returning to user mode, to process the flag. For - * all callers, TIF_SIGPENDING is already set or it's no harm to set - * it. TIF_RESTORE_SIGMASK need not be in the set of bits that the - * arch code will notice on return to user mode, in case those bits - * are scarce. We set TIF_SIGPENDING here to ensure that the arch - * signal code always gets run when TIF_RESTORE_SIGMASK is set. - */ -static inline void set_restore_sigmask(void) -{ - set_thread_flag(TIF_RESTORE_SIGMASK); - WARN_ON(!test_thread_flag(TIF_SIGPENDING)); -} -static inline void clear_restore_sigmask(void) -{ - clear_thread_flag(TIF_RESTORE_SIGMASK); -} -static inline bool test_restore_sigmask(void) -{ - return test_thread_flag(TIF_RESTORE_SIGMASK); -} -static inline bool test_and_clear_restore_sigmask(void) -{ - return test_and_clear_thread_flag(TIF_RESTORE_SIGMASK); -} - -#else /* TIF_RESTORE_SIGMASK */ - -/* Higher-quality implementation, used if TIF_RESTORE_SIGMASK doesn't exist. */ -static inline void set_restore_sigmask(void) -{ - current->restore_sigmask = true; - WARN_ON(!test_thread_flag(TIF_SIGPENDING)); -} -static inline void clear_restore_sigmask(void) -{ - current->restore_sigmask = false; -} -static inline bool test_restore_sigmask(void) -{ - return current->restore_sigmask; -} -static inline bool test_and_clear_restore_sigmask(void) -{ - if (!current->restore_sigmask) - return false; - current->restore_sigmask = false; - return true; -} -#endif - -static inline void restore_saved_sigmask(void) -{ - if (test_and_clear_restore_sigmask()) - __set_current_blocked(¤t->saved_sigmask); -} - -static inline sigset_t *sigmask_to_save(void) -{ - sigset_t *res = ¤t->blocked; - if (unlikely(test_restore_sigmask())) - res = ¤t->saved_sigmask; - return res; -} - -static inline int kill_cad_pid(int sig, int priv) -{ - return kill_pid(cad_pid, sig, priv); -} - -/* These can be the second arg to send_sig_info/send_group_sig_info. */ -#define SEND_SIG_NOINFO ((struct siginfo *) 0) -#define SEND_SIG_PRIV ((struct siginfo *) 1) -#define SEND_SIG_FORCED ((struct siginfo *) 2) - -/* - * True if we are on the alternate signal stack. - */ -static inline int on_sig_stack(unsigned long sp) -{ - /* - * If the signal stack is SS_AUTODISARM then, by construction, we - * can't be on the signal stack unless user code deliberately set - * SS_AUTODISARM when we were already on it. - * - * This improves reliability: if user state gets corrupted such that - * the stack pointer points very close to the end of the signal stack, - * then this check will enable the signal to be handled anyway. - */ - if (current->sas_ss_flags & SS_AUTODISARM) - return 0; - -#ifdef CONFIG_STACK_GROWSUP - return sp >= current->sas_ss_sp && - sp - current->sas_ss_sp < current->sas_ss_size; -#else - return sp > current->sas_ss_sp && - sp - current->sas_ss_sp <= current->sas_ss_size; -#endif -} - -static inline int sas_ss_flags(unsigned long sp) -{ - if (!current->sas_ss_size) - return SS_DISABLE; - - return on_sig_stack(sp) ? SS_ONSTACK : 0; -} - -static inline void sas_ss_reset(struct task_struct *p) -{ - p->sas_ss_sp = 0; - p->sas_ss_size = 0; - p->sas_ss_flags = SS_DISABLE; -} - -static inline unsigned long sigsp(unsigned long sp, struct ksignal *ksig) -{ - if (unlikely((ksig->ka.sa.sa_flags & SA_ONSTACK)) && ! sas_ss_flags(sp)) -#ifdef CONFIG_STACK_GROWSUP - return current->sas_ss_sp; -#else - return current->sas_ss_sp + current->sas_ss_size; -#endif - return sp; -} - -/* - * Routines for handling mm_structs - */ -extern struct mm_struct * mm_alloc(void); - -/** - * mmgrab() - Pin a &struct mm_struct. - * @mm: The &struct mm_struct to pin. - * - * Make sure that @mm will not get freed even after the owning task - * exits. This doesn't guarantee that the associated address space - * will still exist later on and mmget_not_zero() has to be used before - * accessing it. - * - * This is a preferred way to to pin @mm for a longer/unbounded amount - * of time. - * - * Use mmdrop() to release the reference acquired by mmgrab(). - * - * See also <Documentation/vm/active_mm.txt> for an in-depth explanation - * of &mm_struct.mm_count vs &mm_struct.mm_users. - */ -static inline void mmgrab(struct mm_struct *mm) -{ - atomic_inc(&mm->mm_count); -} - -/* mmdrop drops the mm and the page tables */ -extern void __mmdrop(struct mm_struct *); -static inline void mmdrop(struct mm_struct *mm) -{ - if (unlikely(atomic_dec_and_test(&mm->mm_count))) - __mmdrop(mm); -} - -static inline void mmdrop_async_fn(struct work_struct *work) -{ - struct mm_struct *mm = container_of(work, struct mm_struct, async_put_work); - __mmdrop(mm); -} - -static inline void mmdrop_async(struct mm_struct *mm) -{ - if (unlikely(atomic_dec_and_test(&mm->mm_count))) { - INIT_WORK(&mm->async_put_work, mmdrop_async_fn); - schedule_work(&mm->async_put_work); - } -} - -/** - * mmget() - Pin the address space associated with a &struct mm_struct. - * @mm: The address space to pin. - * - * Make sure that the address space of the given &struct mm_struct doesn't - * go away. This does not protect against parts of the address space being - * modified or freed, however. - * - * Never use this function to pin this address space for an - * unbounded/indefinite amount of time. - * - * Use mmput() to release the reference acquired by mmget(). - * - * See also <Documentation/vm/active_mm.txt> for an in-depth explanation - * of &mm_struct.mm_count vs &mm_struct.mm_users. - */ -static inline void mmget(struct mm_struct *mm) -{ - atomic_inc(&mm->mm_users); -} - -static inline bool mmget_not_zero(struct mm_struct *mm) -{ - return atomic_inc_not_zero(&mm->mm_users); -} - -/* mmput gets rid of the mappings and all user-space */ -extern void mmput(struct mm_struct *); -#ifdef CONFIG_MMU -/* same as above but performs the slow path from the async context. Can - * be called from the atomic context as well - */ -extern void mmput_async(struct mm_struct *); -#endif - -/* Grab a reference to a task's mm, if it is not already going away */ -extern struct mm_struct *get_task_mm(struct task_struct *task); -/* - * Grab a reference to a task's mm, if it is not already going away - * and ptrace_may_access with the mode parameter passed to it - * succeeds. - */ -extern struct mm_struct *mm_access(struct task_struct *task, unsigned int mode); -/* Remove the current tasks stale references to the old mm_struct */ -extern void mm_release(struct task_struct *, struct mm_struct *); - -#ifdef CONFIG_HAVE_COPY_THREAD_TLS -extern int copy_thread_tls(unsigned long, unsigned long, unsigned long, - struct task_struct *, unsigned long); -#else -extern int copy_thread(unsigned long, unsigned long, unsigned long, - struct task_struct *); - -/* Architectures that haven't opted into copy_thread_tls get the tls argument - * via pt_regs, so ignore the tls argument passed via C. */ -static inline int copy_thread_tls( - unsigned long clone_flags, unsigned long sp, unsigned long arg, - struct task_struct *p, unsigned long tls) -{ - return copy_thread(clone_flags, sp, arg, p); -} -#endif -extern void flush_thread(void); -#ifdef CONFIG_HAVE_EXIT_THREAD -extern void exit_thread(struct task_struct *tsk); +#ifdef CONFIG_SMP +extern void kick_process(struct task_struct *tsk); #else -static inline void exit_thread(struct task_struct *tsk) -{ -} +static inline void kick_process(struct task_struct *tsk) { } #endif -extern void exit_files(struct task_struct *); -extern void __cleanup_sighand(struct sighand_struct *); - -extern void exit_itimers(struct signal_struct *); -extern void flush_itimer_signals(void); - -extern void do_group_exit(int); - -extern int do_execve(struct filename *, - const char __user * const __user *, - const char __user * const __user *); -extern int do_execveat(int, struct filename *, - const char __user * const __user *, - const char __user * const __user *, - int); -extern long _do_fork(unsigned long, unsigned long, unsigned long, int __user *, int __user *, unsigned long); -extern long do_fork(unsigned long, unsigned long, unsigned long, int __user *, int __user *); -struct task_struct *fork_idle(int); -extern pid_t kernel_thread(int (*fn)(void *), void *arg, unsigned long flags); - extern void __set_task_comm(struct task_struct *tsk, const char *from, bool exec); + static inline void set_task_comm(struct task_struct *tsk, const char *from) { __set_task_comm(tsk, from, false); } + extern char *get_task_comm(char *to, struct task_struct *tsk); #ifdef CONFIG_SMP @@ -3052,263 +1406,15 @@ void scheduler_ipi(void); extern unsigned long wait_task_inactive(struct task_struct *, long match_state); #else static inline void scheduler_ipi(void) { } -static inline unsigned long wait_task_inactive(struct task_struct *p, - long match_state) +static inline unsigned long wait_task_inactive(struct task_struct *p, long match_state) { return 1; } #endif -#define tasklist_empty() \ - list_empty(&init_task.tasks) - -#define next_task(p) \ - list_entry_rcu((p)->tasks.next, struct task_struct, tasks) - -#define for_each_process(p) \ - for (p = &init_task ; (p = next_task(p)) != &init_task ; ) - -extern bool current_is_single_threaded(void); - -/* - * Careful: do_each_thread/while_each_thread is a double loop so - * 'break' will not work as expected - use goto instead. - */ -#define do_each_thread(g, t) \ - for (g = t = &init_task ; (g = t = next_task(g)) != &init_task ; ) do - -#define while_each_thread(g, t) \ - while ((t = next_thread(t)) != g) - -#define __for_each_thread(signal, t) \ - list_for_each_entry_rcu(t, &(signal)->thread_head, thread_node) - -#define for_each_thread(p, t) \ - __for_each_thread((p)->signal, t) - -/* Careful: this is a double loop, 'break' won't work as expected. */ -#define for_each_process_thread(p, t) \ - for_each_process(p) for_each_thread(p, t) - -typedef int (*proc_visitor)(struct task_struct *p, void *data); -void walk_process_tree(struct task_struct *top, proc_visitor, void *); - -static inline int get_nr_threads(struct task_struct *tsk) -{ - return tsk->signal->nr_threads; -} - -static inline bool thread_group_leader(struct task_struct *p) -{ - return p->exit_signal >= 0; -} - -/* Do to the insanities of de_thread it is possible for a process - * to have the pid of the thread group leader without actually being - * the thread group leader. For iteration through the pids in proc - * all we care about is that we have a task with the appropriate - * pid, we don't actually care if we have the right task. - */ -static inline bool has_group_leader_pid(struct task_struct *p) -{ - return task_pid(p) == p->signal->leader_pid; -} - -static inline -bool same_thread_group(struct task_struct *p1, struct task_struct *p2) -{ - return p1->signal == p2->signal; -} - -static inline struct task_struct *next_thread(const struct task_struct *p) -{ - return list_entry_rcu(p->thread_group.next, - struct task_struct, thread_group); -} - -static inline int thread_group_empty(struct task_struct *p) -{ - return list_empty(&p->thread_group); -} - -#define delay_group_leader(p) \ - (thread_group_leader(p) && !thread_group_empty(p)) - -/* - * Protects ->fs, ->files, ->mm, ->group_info, ->comm, keyring - * subscriptions and synchronises with wait4(). Also used in procfs. Also - * pins the final release of task.io_context. Also protects ->cpuset and - * ->cgroup.subsys[]. And ->vfork_done. - * - * Nests both inside and outside of read_lock(&tasklist_lock). - * It must not be nested with write_lock_irq(&tasklist_lock), - * neither inside nor outside. - */ -static inline void task_lock(struct task_struct *p) -{ - spin_lock(&p->alloc_lock); -} - -static inline void task_unlock(struct task_struct *p) -{ - spin_unlock(&p->alloc_lock); -} - -extern struct sighand_struct *__lock_task_sighand(struct task_struct *tsk, - unsigned long *flags); - -static inline struct sighand_struct *lock_task_sighand(struct task_struct *tsk, - unsigned long *flags) -{ - struct sighand_struct *ret; - - ret = __lock_task_sighand(tsk, flags); - (void)__cond_lock(&tsk->sighand->siglock, ret); - return ret; -} - -static inline void unlock_task_sighand(struct task_struct *tsk, - unsigned long *flags) -{ - spin_unlock_irqrestore(&tsk->sighand->siglock, *flags); -} - -/** - * threadgroup_change_begin - mark the beginning of changes to a threadgroup - * @tsk: task causing the changes - * - * All operations which modify a threadgroup - a new thread joining the - * group, death of a member thread (the assertion of PF_EXITING) and - * exec(2) dethreading the process and replacing the leader - are wrapped - * by threadgroup_change_{begin|end}(). This is to provide a place which - * subsystems needing threadgroup stability can hook into for - * synchronization. - */ -static inline void threadgroup_change_begin(struct task_struct *tsk) -{ - might_sleep(); - cgroup_threadgroup_change_begin(tsk); -} - -/** - * threadgroup_change_end - mark the end of changes to a threadgroup - * @tsk: task causing the changes - * - * See threadgroup_change_begin(). - */ -static inline void threadgroup_change_end(struct task_struct *tsk) -{ - cgroup_threadgroup_change_end(tsk); -} - -#ifdef CONFIG_THREAD_INFO_IN_TASK - -static inline struct thread_info *task_thread_info(struct task_struct *task) -{ - return &task->thread_info; -} - /* - * When accessing the stack of a non-current task that might exit, use - * try_get_task_stack() instead. task_stack_page will return a pointer - * that could get freed out from under you. - */ -static inline void *task_stack_page(const struct task_struct *task) -{ - return task->stack; -} - -#define setup_thread_stack(new,old) do { } while(0) - -static inline unsigned long *end_of_stack(const struct task_struct *task) -{ - return task->stack; -} - -#elif !defined(__HAVE_THREAD_FUNCTIONS) - -#define task_thread_info(task) ((struct thread_info *)(task)->stack) -#define task_stack_page(task) ((void *)(task)->stack) - -static inline void setup_thread_stack(struct task_struct *p, struct task_struct *org) -{ - *task_thread_info(p) = *task_thread_info(org); - task_thread_info(p)->task = p; -} - -/* - * Return the address of the last usable long on the stack. - * - * When the stack grows down, this is just above the thread - * info struct. Going any lower will corrupt the threadinfo. - * - * When the stack grows up, this is the highest address. - * Beyond that position, we corrupt data on the next page. - */ -static inline unsigned long *end_of_stack(struct task_struct *p) -{ -#ifdef CONFIG_STACK_GROWSUP - return (unsigned long *)((unsigned long)task_thread_info(p) + THREAD_SIZE) - 1; -#else - return (unsigned long *)(task_thread_info(p) + 1); -#endif -} - -#endif - -#ifdef CONFIG_THREAD_INFO_IN_TASK -static inline void *try_get_task_stack(struct task_struct *tsk) -{ - return atomic_inc_not_zero(&tsk->stack_refcount) ? - task_stack_page(tsk) : NULL; -} - -extern void put_task_stack(struct task_struct *tsk); -#else -static inline void *try_get_task_stack(struct task_struct *tsk) -{ - return task_stack_page(tsk); -} - -static inline void put_task_stack(struct task_struct *tsk) {} -#endif - -#define task_stack_end_corrupted(task) \ - (*(end_of_stack(task)) != STACK_END_MAGIC) - -static inline int object_is_on_stack(void *obj) -{ - void *stack = task_stack_page(current); - - return (obj >= stack) && (obj < (stack + THREAD_SIZE)); -} - -extern void thread_stack_cache_init(void); - -#ifdef CONFIG_DEBUG_STACK_USAGE -static inline unsigned long stack_not_used(struct task_struct *p) -{ - unsigned long *n = end_of_stack(p); - - do { /* Skip over canary */ -# ifdef CONFIG_STACK_GROWSUP - n--; -# else - n++; -# endif - } while (!*n); - -# ifdef CONFIG_STACK_GROWSUP - return (unsigned long)end_of_stack(p) - (unsigned long)n; -# else - return (unsigned long)n - (unsigned long)end_of_stack(p); -# endif -} -#endif -extern void set_task_stack_end_magic(struct task_struct *tsk); - -/* set thread flags in other task's structures - * - see asm/thread_info.h for TIF_xxxx flags available + * Set thread flags in other task's structures. + * See asm/thread_info.h for TIF_xxxx flags available: */ static inline void set_tsk_thread_flag(struct task_struct *tsk, int flag) { @@ -3350,37 +1456,6 @@ static inline int test_tsk_need_resched(struct task_struct *tsk) return unlikely(test_tsk_thread_flag(tsk,TIF_NEED_RESCHED)); } -static inline int restart_syscall(void) -{ - set_tsk_thread_flag(current, TIF_SIGPENDING); - return -ERESTARTNOINTR; -} - -static inline int signal_pending(struct task_struct *p) -{ - return unlikely(test_tsk_thread_flag(p,TIF_SIGPENDING)); -} - -static inline int __fatal_signal_pending(struct task_struct *p) -{ - return unlikely(sigismember(&p->pending.signal, SIGKILL)); -} - -static inline int fatal_signal_pending(struct task_struct *p) -{ - return signal_pending(p) && __fatal_signal_pending(p); -} - -static inline int signal_pending_state(long state, struct task_struct *p) -{ - if (!(state & (TASK_INTERRUPTIBLE | TASK_WAKEKILL))) - return 0; - if (!signal_pending(p)) - return 0; - - return (state & TASK_INTERRUPTIBLE) || __fatal_signal_pending(p); -} - /* * cond_resched() and cond_resched_lock(): latency reduction via * explicit rescheduling in places that are safe. The return @@ -3422,15 +1497,6 @@ static inline void cond_resched_rcu(void) #endif } -static inline unsigned long get_preempt_disable_ip(struct task_struct *p) -{ -#ifdef CONFIG_DEBUG_PREEMPT - return p->preempt_disable_ip; -#else - return 0; -#endif -} - /* * Does a critical section need to be broken due to another * task waiting?: (technically does not depend on CONFIG_PREEMPT, @@ -3445,114 +1511,12 @@ static inline int spin_needbreak(spinlock_t *lock) #endif } -/* - * Idle thread specific functions to determine the need_resched - * polling state. - */ -#ifdef TIF_POLLING_NRFLAG -static inline int tsk_is_polling(struct task_struct *p) -{ - return test_tsk_thread_flag(p, TIF_POLLING_NRFLAG); -} - -static inline void __current_set_polling(void) -{ - set_thread_flag(TIF_POLLING_NRFLAG); -} - -static inline bool __must_check current_set_polling_and_test(void) -{ - __current_set_polling(); - - /* - * Polling state must be visible before we test NEED_RESCHED, - * paired by resched_curr() - */ - smp_mb__after_atomic(); - - return unlikely(tif_need_resched()); -} - -static inline void __current_clr_polling(void) -{ - clear_thread_flag(TIF_POLLING_NRFLAG); -} - -static inline bool __must_check current_clr_polling_and_test(void) -{ - __current_clr_polling(); - - /* - * Polling state must be visible before we test NEED_RESCHED, - * paired by resched_curr() - */ - smp_mb__after_atomic(); - - return unlikely(tif_need_resched()); -} - -#else -static inline int tsk_is_polling(struct task_struct *p) { return 0; } -static inline void __current_set_polling(void) { } -static inline void __current_clr_polling(void) { } - -static inline bool __must_check current_set_polling_and_test(void) -{ - return unlikely(tif_need_resched()); -} -static inline bool __must_check current_clr_polling_and_test(void) -{ - return unlikely(tif_need_resched()); -} -#endif - -static inline void current_clr_polling(void) -{ - __current_clr_polling(); - - /* - * Ensure we check TIF_NEED_RESCHED after we clear the polling bit. - * Once the bit is cleared, we'll get IPIs with every new - * TIF_NEED_RESCHED and the IPI handler, scheduler_ipi(), will also - * fold. - */ - smp_mb(); /* paired with resched_curr() */ - - preempt_fold_need_resched(); -} - static __always_inline bool need_resched(void) { return unlikely(tif_need_resched()); } /* - * Thread group CPU time accounting. - */ -void thread_group_cputime(struct task_struct *tsk, struct task_cputime *times); -void thread_group_cputimer(struct task_struct *tsk, struct task_cputime *times); - -/* - * Reevaluate whether the task has signals pending delivery. - * Wake the task if so. - * This is required every time the blocked sigset_t changes. - * callers must hold sighand->siglock. - */ -extern void recalc_sigpending_and_wake(struct task_struct *t); -extern void recalc_sigpending(void); - -extern void signal_wake_up_state(struct task_struct *t, unsigned int state); - -static inline void signal_wake_up(struct task_struct *t, bool resume) -{ - signal_wake_up_state(t, resume ? TASK_WAKEKILL : 0); -} -static inline void ptrace_signal_wake_up(struct task_struct *t, bool resume) -{ - signal_wake_up_state(t, resume ? __TASK_TRACED : 0); -} - -/* * Wrappers for p->thread_info->cpu access. No-op on UP. */ #ifdef CONFIG_SMP @@ -3566,11 +1530,6 @@ static inline unsigned int task_cpu(const struct task_struct *p) #endif } -static inline int task_node(const struct task_struct *p) -{ - return cpu_to_node(task_cpu(p)); -} - extern void set_task_cpu(struct task_struct *p, unsigned int cpu); #else @@ -3601,100 +1560,8 @@ static inline void set_task_cpu(struct task_struct *p, unsigned int cpu) extern long sched_setaffinity(pid_t pid, const struct cpumask *new_mask); extern long sched_getaffinity(pid_t pid, struct cpumask *mask); -#ifdef CONFIG_CGROUP_SCHED -extern struct task_group root_task_group; -#endif /* CONFIG_CGROUP_SCHED */ - -extern int task_can_switch_user(struct user_struct *up, - struct task_struct *tsk); - -#ifdef CONFIG_TASK_XACCT -static inline void add_rchar(struct task_struct *tsk, ssize_t amt) -{ - tsk->ioac.rchar += amt; -} - -static inline void add_wchar(struct task_struct *tsk, ssize_t amt) -{ - tsk->ioac.wchar += amt; -} - -static inline void inc_syscr(struct task_struct *tsk) -{ - tsk->ioac.syscr++; -} - -static inline void inc_syscw(struct task_struct *tsk) -{ - tsk->ioac.syscw++; -} -#else -static inline void add_rchar(struct task_struct *tsk, ssize_t amt) -{ -} - -static inline void add_wchar(struct task_struct *tsk, ssize_t amt) -{ -} - -static inline void inc_syscr(struct task_struct *tsk) -{ -} - -static inline void inc_syscw(struct task_struct *tsk) -{ -} -#endif - #ifndef TASK_SIZE_OF #define TASK_SIZE_OF(tsk) TASK_SIZE #endif -#ifdef CONFIG_MEMCG -extern void mm_update_next_owner(struct mm_struct *mm); -#else -static inline void mm_update_next_owner(struct mm_struct *mm) -{ -} -#endif /* CONFIG_MEMCG */ - -static inline unsigned long task_rlimit(const struct task_struct *tsk, - unsigned int limit) -{ - return READ_ONCE(tsk->signal->rlim[limit].rlim_cur); -} - -static inline unsigned long task_rlimit_max(const struct task_struct *tsk, - unsigned int limit) -{ - return READ_ONCE(tsk->signal->rlim[limit].rlim_max); -} - -static inline unsigned long rlimit(unsigned int limit) -{ - return task_rlimit(current, limit); -} - -static inline unsigned long rlimit_max(unsigned int limit) -{ - return task_rlimit_max(current, limit); -} - -#define SCHED_CPUFREQ_RT (1U << 0) -#define SCHED_CPUFREQ_DL (1U << 1) -#define SCHED_CPUFREQ_IOWAIT (1U << 2) - -#define SCHED_CPUFREQ_RT_DL (SCHED_CPUFREQ_RT | SCHED_CPUFREQ_DL) - -#ifdef CONFIG_CPU_FREQ -struct update_util_data { - void (*func)(struct update_util_data *data, u64 time, unsigned int flags); -}; - -void cpufreq_add_update_util_hook(int cpu, struct update_util_data *data, - void (*func)(struct update_util_data *data, u64 time, - unsigned int flags)); -void cpufreq_remove_update_util_hook(int cpu); -#endif /* CONFIG_CPU_FREQ */ - #endif diff --git a/include/linux/sched/autogroup.h b/include/linux/sched/autogroup.h new file mode 100644 index 000000000000..55cd496df884 --- /dev/null +++ b/include/linux/sched/autogroup.h @@ -0,0 +1,31 @@ +#ifndef _LINUX_SCHED_AUTOGROUP_H +#define _LINUX_SCHED_AUTOGROUP_H + +struct signal_struct; +struct task_struct; +struct task_group; +struct seq_file; + +#ifdef CONFIG_SCHED_AUTOGROUP +extern void sched_autogroup_create_attach(struct task_struct *p); +extern void sched_autogroup_detach(struct task_struct *p); +extern void sched_autogroup_fork(struct signal_struct *sig); +extern void sched_autogroup_exit(struct signal_struct *sig); +extern void sched_autogroup_exit_task(struct task_struct *p); +#ifdef CONFIG_PROC_FS +extern void proc_sched_autogroup_show_task(struct task_struct *p, struct seq_file *m); +extern int proc_sched_autogroup_set_nice(struct task_struct *p, int nice); +#endif +#else +static inline void sched_autogroup_create_attach(struct task_struct *p) { } +static inline void sched_autogroup_detach(struct task_struct *p) { } +static inline void sched_autogroup_fork(struct signal_struct *sig) { } +static inline void sched_autogroup_exit(struct signal_struct *sig) { } +static inline void sched_autogroup_exit_task(struct task_struct *p) { } +#endif + +#ifdef CONFIG_CGROUP_SCHED +extern struct task_group root_task_group; +#endif /* CONFIG_CGROUP_SCHED */ + +#endif /* _LINUX_SCHED_AUTOGROUP_H */ diff --git a/include/linux/sched/clock.h b/include/linux/sched/clock.h new file mode 100644 index 000000000000..4a68c6791207 --- /dev/null +++ b/include/linux/sched/clock.h @@ -0,0 +1,104 @@ +#ifndef _LINUX_SCHED_CLOCK_H +#define _LINUX_SCHED_CLOCK_H + +#include <linux/smp.h> + +/* + * Do not use outside of architecture code which knows its limitations. + * + * sched_clock() has no promise of monotonicity or bounded drift between + * CPUs, use (which you should not) requires disabling IRQs. + * + * Please use one of the three interfaces below. + */ +extern unsigned long long notrace sched_clock(void); + +/* + * See the comment in kernel/sched/clock.c + */ +extern u64 running_clock(void); +extern u64 sched_clock_cpu(int cpu); + + +extern void sched_clock_init(void); + +#ifndef CONFIG_HAVE_UNSTABLE_SCHED_CLOCK +static inline void sched_clock_init_late(void) +{ +} + +static inline void sched_clock_tick(void) +{ +} + +static inline void clear_sched_clock_stable(void) +{ +} + +static inline void sched_clock_idle_sleep_event(void) +{ +} + +static inline void sched_clock_idle_wakeup_event(u64 delta_ns) +{ +} + +static inline u64 cpu_clock(int cpu) +{ + return sched_clock(); +} + +static inline u64 local_clock(void) +{ + return sched_clock(); +} +#else +extern void sched_clock_init_late(void); +/* + * Architectures can set this to 1 if they have specified + * CONFIG_HAVE_UNSTABLE_SCHED_CLOCK in their arch Kconfig, + * but then during bootup it turns out that sched_clock() + * is reliable after all: + */ +extern int sched_clock_stable(void); +extern void clear_sched_clock_stable(void); + +extern void sched_clock_tick(void); +extern void sched_clock_idle_sleep_event(void); +extern void sched_clock_idle_wakeup_event(u64 delta_ns); + +/* + * As outlined in clock.c, provides a fast, high resolution, nanosecond + * time source that is monotonic per cpu argument and has bounded drift + * between cpus. + * + * ######################### BIG FAT WARNING ########################## + * # when comparing cpu_clock(i) to cpu_clock(j) for i != j, time can # + * # go backwards !! # + * #################################################################### + */ +static inline u64 cpu_clock(int cpu) +{ + return sched_clock_cpu(cpu); +} + +static inline u64 local_clock(void) +{ + return sched_clock_cpu(raw_smp_processor_id()); +} +#endif + +#ifdef CONFIG_IRQ_TIME_ACCOUNTING +/* + * An i/f to runtime opt-in for irq time accounting based off of sched_clock. + * The reason for this explicit opt-in is not to have perf penalty with + * slow sched_clocks. + */ +extern void enable_sched_clock_irqtime(void); +extern void disable_sched_clock_irqtime(void); +#else +static inline void enable_sched_clock_irqtime(void) {} +static inline void disable_sched_clock_irqtime(void) {} +#endif + +#endif /* _LINUX_SCHED_CLOCK_H */ diff --git a/include/linux/sched/coredump.h b/include/linux/sched/coredump.h new file mode 100644 index 000000000000..69eedcef8f03 --- /dev/null +++ b/include/linux/sched/coredump.h @@ -0,0 +1,74 @@ +#ifndef _LINUX_SCHED_COREDUMP_H +#define _LINUX_SCHED_COREDUMP_H + +#include <linux/mm_types.h> + +#define SUID_DUMP_DISABLE 0 /* No setuid dumping */ +#define SUID_DUMP_USER 1 /* Dump as user of process */ +#define SUID_DUMP_ROOT 2 /* Dump as root */ + +/* mm flags */ + +/* for SUID_DUMP_* above */ +#define MMF_DUMPABLE_BITS 2 +#define MMF_DUMPABLE_MASK ((1 << MMF_DUMPABLE_BITS) - 1) + +extern void set_dumpable(struct mm_struct *mm, int value); +/* + * This returns the actual value of the suid_dumpable flag. For things + * that are using this for checking for privilege transitions, it must + * test against SUID_DUMP_USER rather than treating it as a boolean + * value. + */ +static inline int __get_dumpable(unsigned long mm_flags) +{ + return mm_flags & MMF_DUMPABLE_MASK; +} + +static inline int get_dumpable(struct mm_struct *mm) +{ + return __get_dumpable(mm->flags); +} + +/* coredump filter bits */ +#define MMF_DUMP_ANON_PRIVATE 2 +#define MMF_DUMP_ANON_SHARED 3 +#define MMF_DUMP_MAPPED_PRIVATE 4 +#define MMF_DUMP_MAPPED_SHARED 5 +#define MMF_DUMP_ELF_HEADERS 6 +#define MMF_DUMP_HUGETLB_PRIVATE 7 +#define MMF_DUMP_HUGETLB_SHARED 8 +#define MMF_DUMP_DAX_PRIVATE 9 +#define MMF_DUMP_DAX_SHARED 10 + +#define MMF_DUMP_FILTER_SHIFT MMF_DUMPABLE_BITS +#define MMF_DUMP_FILTER_BITS 9 +#define MMF_DUMP_FILTER_MASK \ + (((1 << MMF_DUMP_FILTER_BITS) - 1) << MMF_DUMP_FILTER_SHIFT) +#define MMF_DUMP_FILTER_DEFAULT \ + ((1 << MMF_DUMP_ANON_PRIVATE) | (1 << MMF_DUMP_ANON_SHARED) |\ + (1 << MMF_DUMP_HUGETLB_PRIVATE) | MMF_DUMP_MASK_DEFAULT_ELF) + +#ifdef CONFIG_CORE_DUMP_DEFAULT_ELF_HEADERS +# define MMF_DUMP_MASK_DEFAULT_ELF (1 << MMF_DUMP_ELF_HEADERS) +#else +# define MMF_DUMP_MASK_DEFAULT_ELF 0 +#endif + /* leave room for more dump flags */ +#define MMF_VM_MERGEABLE 16 /* KSM may merge identical pages */ +#define MMF_VM_HUGEPAGE 17 /* set when VM_HUGEPAGE is set on vma */ +/* + * This one-shot flag is dropped due to necessity of changing exe once again + * on NFS restore + */ +//#define MMF_EXE_FILE_CHANGED 18 /* see prctl_set_mm_exe_file() */ + +#define MMF_HAS_UPROBES 19 /* has uprobes */ +#define MMF_RECALC_UPROBES 20 /* MMF_HAS_UPROBES can be wrong */ +#define MMF_OOM_SKIP 21 /* mm is of no interest for the OOM killer */ +#define MMF_UNSTABLE 22 /* mm is unstable for copy_from_user */ +#define MMF_HUGE_ZERO_PAGE 23 /* mm has ever used the global huge zero page */ + +#define MMF_INIT_MASK (MMF_DUMPABLE_MASK | MMF_DUMP_FILTER_MASK) + +#endif /* _LINUX_SCHED_COREDUMP_H */ diff --git a/include/linux/sched/cpufreq.h b/include/linux/sched/cpufreq.h new file mode 100644 index 000000000000..d2be2ccbb372 --- /dev/null +++ b/include/linux/sched/cpufreq.h @@ -0,0 +1,27 @@ +#ifndef _LINUX_SCHED_CPUFREQ_H +#define _LINUX_SCHED_CPUFREQ_H + +#include <linux/types.h> + +/* + * Interface between cpufreq drivers and the scheduler: + */ + +#define SCHED_CPUFREQ_RT (1U << 0) +#define SCHED_CPUFREQ_DL (1U << 1) +#define SCHED_CPUFREQ_IOWAIT (1U << 2) + +#define SCHED_CPUFREQ_RT_DL (SCHED_CPUFREQ_RT | SCHED_CPUFREQ_DL) + +#ifdef CONFIG_CPU_FREQ +struct update_util_data { + void (*func)(struct update_util_data *data, u64 time, unsigned int flags); +}; + +void cpufreq_add_update_util_hook(int cpu, struct update_util_data *data, + void (*func)(struct update_util_data *data, u64 time, + unsigned int flags)); +void cpufreq_remove_update_util_hook(int cpu); +#endif /* CONFIG_CPU_FREQ */ + +#endif /* _LINUX_SCHED_CPUFREQ_H */ diff --git a/include/linux/sched/cputime.h b/include/linux/sched/cputime.h new file mode 100644 index 000000000000..4c5b9735c1ae --- /dev/null +++ b/include/linux/sched/cputime.h @@ -0,0 +1,187 @@ +#ifndef _LINUX_SCHED_CPUTIME_H +#define _LINUX_SCHED_CPUTIME_H + +#include <linux/sched/signal.h> + +/* + * cputime accounting APIs: + */ + +#ifdef CONFIG_VIRT_CPU_ACCOUNTING_NATIVE +#include <asm/cputime.h> + +#ifndef cputime_to_nsecs +# define cputime_to_nsecs(__ct) \ + (cputime_to_usecs(__ct) * NSEC_PER_USEC) +#endif +#endif /* CONFIG_VIRT_CPU_ACCOUNTING_NATIVE */ + +#ifdef CONFIG_VIRT_CPU_ACCOUNTING_GEN +extern void task_cputime(struct task_struct *t, + u64 *utime, u64 *stime); +extern u64 task_gtime(struct task_struct *t); +#else +static inline void task_cputime(struct task_struct *t, + u64 *utime, u64 *stime) +{ + *utime = t->utime; + *stime = t->stime; +} + +static inline u64 task_gtime(struct task_struct *t) +{ + return t->gtime; +} +#endif + +#ifdef CONFIG_ARCH_HAS_SCALED_CPUTIME +static inline void task_cputime_scaled(struct task_struct *t, + u64 *utimescaled, + u64 *stimescaled) +{ + *utimescaled = t->utimescaled; + *stimescaled = t->stimescaled; +} +#else +static inline void task_cputime_scaled(struct task_struct *t, + u64 *utimescaled, + u64 *stimescaled) +{ + task_cputime(t, utimescaled, stimescaled); +} +#endif + +extern void task_cputime_adjusted(struct task_struct *p, u64 *ut, u64 *st); +extern void thread_group_cputime_adjusted(struct task_struct *p, u64 *ut, u64 *st); + + +/* + * Thread group CPU time accounting. + */ +void thread_group_cputime(struct task_struct *tsk, struct task_cputime *times); +void thread_group_cputimer(struct task_struct *tsk, struct task_cputime *times); + + +/* + * The following are functions that support scheduler-internal time accounting. + * These functions are generally called at the timer tick. None of this depends + * on CONFIG_SCHEDSTATS. + */ + +/** + * get_running_cputimer - return &tsk->signal->cputimer if cputimer is running + * + * @tsk: Pointer to target task. + */ +#ifdef CONFIG_POSIX_TIMERS +static inline +struct thread_group_cputimer *get_running_cputimer(struct task_struct *tsk) +{ + struct thread_group_cputimer *cputimer = &tsk->signal->cputimer; + + /* Check if cputimer isn't running. This is accessed without locking. */ + if (!READ_ONCE(cputimer->running)) + return NULL; + + /* + * After we flush the task's sum_exec_runtime to sig->sum_sched_runtime + * in __exit_signal(), we won't account to the signal struct further + * cputime consumed by that task, even though the task can still be + * ticking after __exit_signal(). + * + * In order to keep a consistent behaviour between thread group cputime + * and thread group cputimer accounting, lets also ignore the cputime + * elapsing after __exit_signal() in any thread group timer running. + * + * This makes sure that POSIX CPU clocks and timers are synchronized, so + * that a POSIX CPU timer won't expire while the corresponding POSIX CPU + * clock delta is behind the expiring timer value. + */ + if (unlikely(!tsk->sighand)) + return NULL; + + return cputimer; +} +#else +static inline +struct thread_group_cputimer *get_running_cputimer(struct task_struct *tsk) +{ + return NULL; +} +#endif + +/** + * account_group_user_time - Maintain utime for a thread group. + * + * @tsk: Pointer to task structure. + * @cputime: Time value by which to increment the utime field of the + * thread_group_cputime structure. + * + * If thread group time is being maintained, get the structure for the + * running CPU and update the utime field there. + */ +static inline void account_group_user_time(struct task_struct *tsk, + u64 cputime) +{ + struct thread_group_cputimer *cputimer = get_running_cputimer(tsk); + + if (!cputimer) + return; + + atomic64_add(cputime, &cputimer->cputime_atomic.utime); +} + +/** + * account_group_system_time - Maintain stime for a thread group. + * + * @tsk: Pointer to task structure. + * @cputime: Time value by which to increment the stime field of the + * thread_group_cputime structure. + * + * If thread group time is being maintained, get the structure for the + * running CPU and update the stime field there. + */ +static inline void account_group_system_time(struct task_struct *tsk, + u64 cputime) +{ + struct thread_group_cputimer *cputimer = get_running_cputimer(tsk); + + if (!cputimer) + return; + + atomic64_add(cputime, &cputimer->cputime_atomic.stime); +} + +/** + * account_group_exec_runtime - Maintain exec runtime for a thread group. + * + * @tsk: Pointer to task structure. + * @ns: Time value by which to increment the sum_exec_runtime field + * of the thread_group_cputime structure. + * + * If thread group time is being maintained, get the structure for the + * running CPU and update the sum_exec_runtime field there. + */ +static inline void account_group_exec_runtime(struct task_struct *tsk, + unsigned long long ns) +{ + struct thread_group_cputimer *cputimer = get_running_cputimer(tsk); + + if (!cputimer) + return; + + atomic64_add(ns, &cputimer->cputime_atomic.sum_exec_runtime); +} + +static inline void prev_cputime_init(struct prev_cputime *prev) +{ +#ifndef CONFIG_VIRT_CPU_ACCOUNTING_NATIVE + prev->utime = prev->stime = 0; + raw_spin_lock_init(&prev->lock); +#endif +} + +extern unsigned long long +task_sched_runtime(struct task_struct *task); + +#endif /* _LINUX_SCHED_CPUTIME_H */ diff --git a/include/linux/sched/deadline.h b/include/linux/sched/deadline.h index 9089a2ae913d..975be862e083 100644 --- a/include/linux/sched/deadline.h +++ b/include/linux/sched/deadline.h @@ -1,5 +1,7 @@ -#ifndef _SCHED_DEADLINE_H -#define _SCHED_DEADLINE_H +#ifndef _LINUX_SCHED_DEADLINE_H +#define _LINUX_SCHED_DEADLINE_H + +#include <linux/sched.h> /* * SCHED_DEADLINE tasks has negative priorities, reflecting @@ -26,4 +28,4 @@ static inline bool dl_time_before(u64 a, u64 b) return (s64)(a - b) < 0; } -#endif /* _SCHED_DEADLINE_H */ +#endif /* _LINUX_SCHED_DEADLINE_H */ diff --git a/include/linux/sched/debug.h b/include/linux/sched/debug.h new file mode 100644 index 000000000000..e0eaee54c5a4 --- /dev/null +++ b/include/linux/sched/debug.h @@ -0,0 +1,50 @@ +#ifndef _LINUX_SCHED_DEBUG_H +#define _LINUX_SCHED_DEBUG_H + +/* + * Various scheduler/task debugging interfaces: + */ + +struct task_struct; + +extern void dump_cpu_task(int cpu); + +/* + * Only dump TASK_* tasks. (0 for all tasks) + */ +extern void show_state_filter(unsigned long state_filter); + +static inline void show_state(void) +{ + show_state_filter(0); +} + +struct pt_regs; + +extern void show_regs(struct pt_regs *); + +/* + * TASK is a pointer to the task whose backtrace we want to see (or NULL for current + * task), SP is the stack pointer of the first frame that should be shown in the back + * trace (or NULL if the entire call-chain of the task should be shown). + */ +extern void show_stack(struct task_struct *task, unsigned long *sp); + +extern void sched_show_task(struct task_struct *p); + +#ifdef CONFIG_SCHED_DEBUG +struct seq_file; +extern void proc_sched_show_task(struct task_struct *p, struct seq_file *m); +extern void proc_sched_set_task(struct task_struct *p); +#endif + +/* Attach to any functions which should be ignored in wchan output. */ +#define __sched __attribute__((__section__(".sched.text"))) + +/* Linker adds these: start and end of __sched functions */ +extern char __sched_text_start[], __sched_text_end[]; + +/* Is this address in the __sched functions? */ +extern int in_sched_functions(unsigned long addr); + +#endif /* _LINUX_SCHED_DEBUG_H */ diff --git a/include/linux/sched/hotplug.h b/include/linux/sched/hotplug.h new file mode 100644 index 000000000000..752ac7e628d7 --- /dev/null +++ b/include/linux/sched/hotplug.h @@ -0,0 +1,24 @@ +#ifndef _LINUX_SCHED_HOTPLUG_H +#define _LINUX_SCHED_HOTPLUG_H + +/* + * Scheduler interfaces for hotplug CPU support: + */ + +extern int sched_cpu_starting(unsigned int cpu); +extern int sched_cpu_activate(unsigned int cpu); +extern int sched_cpu_deactivate(unsigned int cpu); + +#ifdef CONFIG_HOTPLUG_CPU +extern int sched_cpu_dying(unsigned int cpu); +#else +# define sched_cpu_dying NULL +#endif + +#ifdef CONFIG_HOTPLUG_CPU +extern void idle_task_exit(void); +#else +static inline void idle_task_exit(void) {} +#endif + +#endif /* _LINUX_SCHED_HOTPLUG_H */ diff --git a/include/linux/sched/idle.h b/include/linux/sched/idle.h new file mode 100644 index 000000000000..5ca63ebad6b4 --- /dev/null +++ b/include/linux/sched/idle.h @@ -0,0 +1,86 @@ +#ifndef _LINUX_SCHED_IDLE_H +#define _LINUX_SCHED_IDLE_H + +#include <linux/sched.h> + +enum cpu_idle_type { + CPU_IDLE, + CPU_NOT_IDLE, + CPU_NEWLY_IDLE, + CPU_MAX_IDLE_TYPES +}; + +extern void wake_up_if_idle(int cpu); + +/* + * Idle thread specific functions to determine the need_resched + * polling state. + */ +#ifdef TIF_POLLING_NRFLAG + +static inline void __current_set_polling(void) +{ + set_thread_flag(TIF_POLLING_NRFLAG); +} + +static inline bool __must_check current_set_polling_and_test(void) +{ + __current_set_polling(); + + /* + * Polling state must be visible before we test NEED_RESCHED, + * paired by resched_curr() + */ + smp_mb__after_atomic(); + + return unlikely(tif_need_resched()); +} + +static inline void __current_clr_polling(void) +{ + clear_thread_flag(TIF_POLLING_NRFLAG); +} + +static inline bool __must_check current_clr_polling_and_test(void) +{ + __current_clr_polling(); + + /* + * Polling state must be visible before we test NEED_RESCHED, + * paired by resched_curr() + */ + smp_mb__after_atomic(); + + return unlikely(tif_need_resched()); +} + +#else +static inline void __current_set_polling(void) { } +static inline void __current_clr_polling(void) { } + +static inline bool __must_check current_set_polling_and_test(void) +{ + return unlikely(tif_need_resched()); +} +static inline bool __must_check current_clr_polling_and_test(void) +{ + return unlikely(tif_need_resched()); +} +#endif + +static inline void current_clr_polling(void) +{ + __current_clr_polling(); + + /* + * Ensure we check TIF_NEED_RESCHED after we clear the polling bit. + * Once the bit is cleared, we'll get IPIs with every new + * TIF_NEED_RESCHED and the IPI handler, scheduler_ipi(), will also + * fold. + */ + smp_mb(); /* paired with resched_curr() */ + + preempt_fold_need_resched(); +} + +#endif /* _LINUX_SCHED_IDLE_H */ diff --git a/include/linux/sched/init.h b/include/linux/sched/init.h new file mode 100644 index 000000000000..127215045285 --- /dev/null +++ b/include/linux/sched/init.h @@ -0,0 +1,11 @@ +#ifndef _LINUX_SCHED_INIT_H +#define _LINUX_SCHED_INIT_H + +/* + * Scheduler init related prototypes: + */ + +extern void sched_init(void); +extern void sched_init_smp(void); + +#endif /* _LINUX_SCHED_INIT_H */ diff --git a/include/linux/sched/jobctl.h b/include/linux/sched/jobctl.h new file mode 100644 index 000000000000..016afa0fb3bb --- /dev/null +++ b/include/linux/sched/jobctl.h @@ -0,0 +1,36 @@ +#ifndef _LINUX_SCHED_JOBCTL_H +#define _LINUX_SCHED_JOBCTL_H + +#include <linux/types.h> + +struct task_struct; + +/* + * task->jobctl flags + */ +#define JOBCTL_STOP_SIGMASK 0xffff /* signr of the last group stop */ + +#define JOBCTL_STOP_DEQUEUED_BIT 16 /* stop signal dequeued */ +#define JOBCTL_STOP_PENDING_BIT 17 /* task should stop for group stop */ +#define JOBCTL_STOP_CONSUME_BIT 18 /* consume group stop count */ +#define JOBCTL_TRAP_STOP_BIT 19 /* trap for STOP */ +#define JOBCTL_TRAP_NOTIFY_BIT 20 /* trap for NOTIFY */ +#define JOBCTL_TRAPPING_BIT 21 /* switching to TRACED */ +#define JOBCTL_LISTENING_BIT 22 /* ptracer is listening for events */ + +#define JOBCTL_STOP_DEQUEUED (1UL << JOBCTL_STOP_DEQUEUED_BIT) +#define JOBCTL_STOP_PENDING (1UL << JOBCTL_STOP_PENDING_BIT) +#define JOBCTL_STOP_CONSUME (1UL << JOBCTL_STOP_CONSUME_BIT) +#define JOBCTL_TRAP_STOP (1UL << JOBCTL_TRAP_STOP_BIT) +#define JOBCTL_TRAP_NOTIFY (1UL << JOBCTL_TRAP_NOTIFY_BIT) +#define JOBCTL_TRAPPING (1UL << JOBCTL_TRAPPING_BIT) +#define JOBCTL_LISTENING (1UL << JOBCTL_LISTENING_BIT) + +#define JOBCTL_TRAP_MASK (JOBCTL_TRAP_STOP | JOBCTL_TRAP_NOTIFY) +#define JOBCTL_PENDING_MASK (JOBCTL_STOP_PENDING | JOBCTL_TRAP_MASK) + +extern bool task_set_jobctl_pending(struct task_struct *task, unsigned long mask); +extern void task_clear_jobctl_trapping(struct task_struct *task); +extern void task_clear_jobctl_pending(struct task_struct *task, unsigned long mask); + +#endif /* _LINUX_SCHED_JOBCTL_H */ diff --git a/include/linux/sched/loadavg.h b/include/linux/sched/loadavg.h new file mode 100644 index 000000000000..4264bc6b2c27 --- /dev/null +++ b/include/linux/sched/loadavg.h @@ -0,0 +1,31 @@ +#ifndef _LINUX_SCHED_LOADAVG_H +#define _LINUX_SCHED_LOADAVG_H + +/* + * These are the constant used to fake the fixed-point load-average + * counting. Some notes: + * - 11 bit fractions expand to 22 bits by the multiplies: this gives + * a load-average precision of 10 bits integer + 11 bits fractional + * - if you want to count load-averages more often, you need more + * precision, or rounding will get you. With 2-second counting freq, + * the EXP_n values would be 1981, 2034 and 2043 if still using only + * 11 bit fractions. + */ +extern unsigned long avenrun[]; /* Load averages */ +extern void get_avenrun(unsigned long *loads, unsigned long offset, int shift); + +#define FSHIFT 11 /* nr of bits of precision */ +#define FIXED_1 (1<<FSHIFT) /* 1.0 as fixed-point */ +#define LOAD_FREQ (5*HZ+1) /* 5 sec intervals */ +#define EXP_1 1884 /* 1/exp(5sec/1min) as fixed-point */ +#define EXP_5 2014 /* 1/exp(5sec/5min) */ +#define EXP_15 2037 /* 1/exp(5sec/15min) */ + +#define CALC_LOAD(load,exp,n) \ + load *= exp; \ + load += n*(FIXED_1-exp); \ + load >>= FSHIFT; + +extern void calc_global_load(unsigned long ticks); + +#endif /* _LINUX_SCHED_LOADAVG_H */ diff --git a/include/linux/sched/mm.h b/include/linux/sched/mm.h new file mode 100644 index 000000000000..830953ebb391 --- /dev/null +++ b/include/linux/sched/mm.h @@ -0,0 +1,174 @@ +#ifndef _LINUX_SCHED_MM_H +#define _LINUX_SCHED_MM_H + +#include <linux/kernel.h> +#include <linux/atomic.h> +#include <linux/sched.h> +#include <linux/mm_types.h> +#include <linux/gfp.h> + +/* + * Routines for handling mm_structs + */ +extern struct mm_struct * mm_alloc(void); + +/** + * mmgrab() - Pin a &struct mm_struct. + * @mm: The &struct mm_struct to pin. + * + * Make sure that @mm will not get freed even after the owning task + * exits. This doesn't guarantee that the associated address space + * will still exist later on and mmget_not_zero() has to be used before + * accessing it. + * + * This is a preferred way to to pin @mm for a longer/unbounded amount + * of time. + * + * Use mmdrop() to release the reference acquired by mmgrab(). + * + * See also <Documentation/vm/active_mm.txt> for an in-depth explanation + * of &mm_struct.mm_count vs &mm_struct.mm_users. + */ +static inline void mmgrab(struct mm_struct *mm) +{ + atomic_inc(&mm->mm_count); +} + +/* mmdrop drops the mm and the page tables */ +extern void __mmdrop(struct mm_struct *); +static inline void mmdrop(struct mm_struct *mm) +{ + if (unlikely(atomic_dec_and_test(&mm->mm_count))) + __mmdrop(mm); +} + +static inline void mmdrop_async_fn(struct work_struct *work) +{ + struct mm_struct *mm = container_of(work, struct mm_struct, async_put_work); + __mmdrop(mm); +} + +static inline void mmdrop_async(struct mm_struct *mm) +{ + if (unlikely(atomic_dec_and_test(&mm->mm_count))) { + INIT_WORK(&mm->async_put_work, mmdrop_async_fn); + schedule_work(&mm->async_put_work); + } +} + +/** + * mmget() - Pin the address space associated with a &struct mm_struct. + * @mm: The address space to pin. + * + * Make sure that the address space of the given &struct mm_struct doesn't + * go away. This does not protect against parts of the address space being + * modified or freed, however. + * + * Never use this function to pin this address space for an + * unbounded/indefinite amount of time. + * + * Use mmput() to release the reference acquired by mmget(). + * + * See also <Documentation/vm/active_mm.txt> for an in-depth explanation + * of &mm_struct.mm_count vs &mm_struct.mm_users. + */ +static inline void mmget(struct mm_struct *mm) +{ + atomic_inc(&mm->mm_users); +} + +static inline bool mmget_not_zero(struct mm_struct *mm) +{ + return atomic_inc_not_zero(&mm->mm_users); +} + +/* mmput gets rid of the mappings and all user-space */ +extern void mmput(struct mm_struct *); +#ifdef CONFIG_MMU +/* same as above but performs the slow path from the async context. Can + * be called from the atomic context as well + */ +extern void mmput_async(struct mm_struct *); +#endif + +/* Grab a reference to a task's mm, if it is not already going away */ +extern struct mm_struct *get_task_mm(struct task_struct *task); +/* + * Grab a reference to a task's mm, if it is not already going away + * and ptrace_may_access with the mode parameter passed to it + * succeeds. + */ +extern struct mm_struct *mm_access(struct task_struct *task, unsigned int mode); +/* Remove the current tasks stale references to the old mm_struct */ +extern void mm_release(struct task_struct *, struct mm_struct *); + +#ifdef CONFIG_MEMCG +extern void mm_update_next_owner(struct mm_struct *mm); +#else +static inline void mm_update_next_owner(struct mm_struct *mm) +{ +} +#endif /* CONFIG_MEMCG */ + +#ifdef CONFIG_MMU +extern void arch_pick_mmap_layout(struct mm_struct *mm); +extern unsigned long +arch_get_unmapped_area(struct file *, unsigned long, unsigned long, + unsigned long, unsigned long); +extern unsigned long +arch_get_unmapped_area_topdown(struct file *filp, unsigned long addr, + unsigned long len, unsigned long pgoff, + unsigned long flags); +#else +static inline void arch_pick_mmap_layout(struct mm_struct *mm) {} +#endif + +static inline bool in_vfork(struct task_struct *tsk) +{ + bool ret; + + /* + * need RCU to access ->real_parent if CLONE_VM was used along with + * CLONE_PARENT. + * + * We check real_parent->mm == tsk->mm because CLONE_VFORK does not + * imply CLONE_VM + * + * CLONE_VFORK can be used with CLONE_PARENT/CLONE_THREAD and thus + * ->real_parent is not necessarily the task doing vfork(), so in + * theory we can't rely on task_lock() if we want to dereference it. + * + * And in this case we can't trust the real_parent->mm == tsk->mm + * check, it can be false negative. But we do not care, if init or + * another oom-unkillable task does this it should blame itself. + */ + rcu_read_lock(); + ret = tsk->vfork_done && tsk->real_parent->mm == tsk->mm; + rcu_read_unlock(); + + return ret; +} + +/* __GFP_IO isn't allowed if PF_MEMALLOC_NOIO is set in current->flags + * __GFP_FS is also cleared as it implies __GFP_IO. + */ +static inline gfp_t memalloc_noio_flags(gfp_t flags) +{ + if (unlikely(current->flags & PF_MEMALLOC_NOIO)) + flags &= ~(__GFP_IO | __GFP_FS); + return flags; +} + +static inline unsigned int memalloc_noio_save(void) +{ + unsigned int flags = current->flags & PF_MEMALLOC_NOIO; + current->flags |= PF_MEMALLOC_NOIO; + return flags; +} + +static inline void memalloc_noio_restore(unsigned int flags) +{ + current->flags = (current->flags & ~PF_MEMALLOC_NOIO) | flags; +} + +#endif /* _LINUX_SCHED_MM_H */ diff --git a/include/linux/sched/nohz.h b/include/linux/sched/nohz.h new file mode 100644 index 000000000000..4995b717500b --- /dev/null +++ b/include/linux/sched/nohz.h @@ -0,0 +1,43 @@ +#ifndef _LINUX_SCHED_NOHZ_H +#define _LINUX_SCHED_NOHZ_H + +/* + * This is the interface between the scheduler and nohz/dyntics: + */ + +#if defined(CONFIG_SMP) && defined(CONFIG_NO_HZ_COMMON) +extern void cpu_load_update_nohz_start(void); +extern void cpu_load_update_nohz_stop(void); +#else +static inline void cpu_load_update_nohz_start(void) { } +static inline void cpu_load_update_nohz_stop(void) { } +#endif + +#if defined(CONFIG_SMP) && defined(CONFIG_NO_HZ_COMMON) +extern void nohz_balance_enter_idle(int cpu); +extern void set_cpu_sd_state_idle(void); +extern int get_nohz_timer_target(void); +#else +static inline void nohz_balance_enter_idle(int cpu) { } +static inline void set_cpu_sd_state_idle(void) { } +#endif + +#ifdef CONFIG_NO_HZ_COMMON +void calc_load_enter_idle(void); +void calc_load_exit_idle(void); +#else +static inline void calc_load_enter_idle(void) { } +static inline void calc_load_exit_idle(void) { } +#endif /* CONFIG_NO_HZ_COMMON */ + +#if defined(CONFIG_NO_HZ_COMMON) && defined(CONFIG_SMP) +extern void wake_up_nohz_cpu(int cpu); +#else +static inline void wake_up_nohz_cpu(int cpu) { } +#endif + +#ifdef CONFIG_NO_HZ_FULL +extern u64 scheduler_tick_max_deferment(void); +#endif + +#endif /* _LINUX_SCHED_NOHZ_H */ diff --git a/include/linux/sched/numa_balancing.h b/include/linux/sched/numa_balancing.h new file mode 100644 index 000000000000..35d5fc77b4be --- /dev/null +++ b/include/linux/sched/numa_balancing.h @@ -0,0 +1,46 @@ +#ifndef _LINUX_SCHED_NUMA_BALANCING_H +#define _LINUX_SCHED_NUMA_BALANCING_H + +/* + * This is the interface between the scheduler and the MM that + * implements memory access pattern based NUMA-balancing: + */ + +#include <linux/sched.h> + +#define TNF_MIGRATED 0x01 +#define TNF_NO_GROUP 0x02 +#define TNF_SHARED 0x04 +#define TNF_FAULT_LOCAL 0x08 +#define TNF_MIGRATE_FAIL 0x10 + +#ifdef CONFIG_NUMA_BALANCING +extern void task_numa_fault(int last_node, int node, int pages, int flags); +extern pid_t task_numa_group_id(struct task_struct *p); +extern void set_numabalancing_state(bool enabled); +extern void task_numa_free(struct task_struct *p); +extern bool should_numa_migrate_memory(struct task_struct *p, struct page *page, + int src_nid, int dst_cpu); +#else +static inline void task_numa_fault(int last_node, int node, int pages, + int flags) +{ +} +static inline pid_t task_numa_group_id(struct task_struct *p) +{ + return 0; +} +static inline void set_numabalancing_state(bool enabled) +{ +} +static inline void task_numa_free(struct task_struct *p) +{ +} +static inline bool should_numa_migrate_memory(struct task_struct *p, + struct page *page, int src_nid, int dst_cpu) +{ + return true; +} +#endif + +#endif /* _LINUX_SCHED_NUMA_BALANCING_H */ diff --git a/include/linux/sched/prio.h b/include/linux/sched/prio.h index d9cf5a5762d9..2cc450f6ec54 100644 --- a/include/linux/sched/prio.h +++ b/include/linux/sched/prio.h @@ -1,5 +1,5 @@ -#ifndef _SCHED_PRIO_H -#define _SCHED_PRIO_H +#ifndef _LINUX_SCHED_PRIO_H +#define _LINUX_SCHED_PRIO_H #define MAX_NICE 19 #define MIN_NICE -20 @@ -57,4 +57,4 @@ static inline long rlimit_to_nice(long prio) return (MAX_NICE - prio + 1); } -#endif /* _SCHED_PRIO_H */ +#endif /* _LINUX_SCHED_PRIO_H */ diff --git a/include/linux/sched/rt.h b/include/linux/sched/rt.h index a30b172df6e1..3bd668414f61 100644 --- a/include/linux/sched/rt.h +++ b/include/linux/sched/rt.h @@ -1,7 +1,9 @@ -#ifndef _SCHED_RT_H -#define _SCHED_RT_H +#ifndef _LINUX_SCHED_RT_H +#define _LINUX_SCHED_RT_H -#include <linux/sched/prio.h> +#include <linux/sched.h> + +struct task_struct; static inline int rt_prio(int prio) { @@ -57,4 +59,4 @@ extern void normalize_rt_tasks(void); */ #define RR_TIMESLICE (100 * HZ / 1000) -#endif /* _SCHED_RT_H */ +#endif /* _LINUX_SCHED_RT_H */ diff --git a/include/linux/sched/signal.h b/include/linux/sched/signal.h new file mode 100644 index 000000000000..2cf446704cd4 --- /dev/null +++ b/include/linux/sched/signal.h @@ -0,0 +1,613 @@ +#ifndef _LINUX_SCHED_SIGNAL_H +#define _LINUX_SCHED_SIGNAL_H + +#include <linux/rculist.h> +#include <linux/signal.h> +#include <linux/sched.h> +#include <linux/sched/jobctl.h> +#include <linux/sched/task.h> +#include <linux/cred.h> + +/* + * Types defining task->signal and task->sighand and APIs using them: + */ + +struct sighand_struct { + atomic_t count; + struct k_sigaction action[_NSIG]; + spinlock_t siglock; + wait_queue_head_t signalfd_wqh; +}; + +/* + * Per-process accounting stats: + */ +struct pacct_struct { + int ac_flag; + long ac_exitcode; + unsigned long ac_mem; + u64 ac_utime, ac_stime; + unsigned long ac_minflt, ac_majflt; +}; + +struct cpu_itimer { + u64 expires; + u64 incr; +}; + +/* + * This is the atomic variant of task_cputime, which can be used for + * storing and updating task_cputime statistics without locking. + */ +struct task_cputime_atomic { + atomic64_t utime; + atomic64_t stime; + atomic64_t sum_exec_runtime; +}; + +#define INIT_CPUTIME_ATOMIC \ + (struct task_cputime_atomic) { \ + .utime = ATOMIC64_INIT(0), \ + .stime = ATOMIC64_INIT(0), \ + .sum_exec_runtime = ATOMIC64_INIT(0), \ + } +/** + * struct thread_group_cputimer - thread group interval timer counts + * @cputime_atomic: atomic thread group interval timers. + * @running: true when there are timers running and + * @cputime_atomic receives updates. + * @checking_timer: true when a thread in the group is in the + * process of checking for thread group timers. + * + * This structure contains the version of task_cputime, above, that is + * used for thread group CPU timer calculations. + */ +struct thread_group_cputimer { + struct task_cputime_atomic cputime_atomic; + bool running; + bool checking_timer; +}; + +/* + * NOTE! "signal_struct" does not have its own + * locking, because a shared signal_struct always + * implies a shared sighand_struct, so locking + * sighand_struct is always a proper superset of + * the locking of signal_struct. + */ +struct signal_struct { + atomic_t sigcnt; + atomic_t live; + int nr_threads; + struct list_head thread_head; + + wait_queue_head_t wait_chldexit; /* for wait4() */ + + /* current thread group signal load-balancing target: */ + struct task_struct *curr_target; + + /* shared signal handling: */ + struct sigpending shared_pending; + + /* thread group exit support */ + int group_exit_code; + /* overloaded: + * - notify group_exit_task when ->count is equal to notify_count + * - everyone except group_exit_task is stopped during signal delivery + * of fatal signals, group_exit_task processes the signal. + */ + int notify_count; + struct task_struct *group_exit_task; + + /* thread group stop support, overloads group_exit_code too */ + int group_stop_count; + unsigned int flags; /* see SIGNAL_* flags below */ + + /* + * PR_SET_CHILD_SUBREAPER marks a process, like a service + * manager, to re-parent orphan (double-forking) child processes + * to this process instead of 'init'. The service manager is + * able to receive SIGCHLD signals and is able to investigate + * the process until it calls wait(). All children of this + * process will inherit a flag if they should look for a + * child_subreaper process at exit. + */ + unsigned int is_child_subreaper:1; + unsigned int has_child_subreaper:1; + +#ifdef CONFIG_POSIX_TIMERS + + /* POSIX.1b Interval Timers */ + int posix_timer_id; + struct list_head posix_timers; + + /* ITIMER_REAL timer for the process */ + struct hrtimer real_timer; + ktime_t it_real_incr; + + /* + * ITIMER_PROF and ITIMER_VIRTUAL timers for the process, we use + * CPUCLOCK_PROF and CPUCLOCK_VIRT for indexing array as these + * values are defined to 0 and 1 respectively + */ + struct cpu_itimer it[2]; + + /* + * Thread group totals for process CPU timers. + * See thread_group_cputimer(), et al, for details. + */ + struct thread_group_cputimer cputimer; + + /* Earliest-expiration cache. */ + struct task_cputime cputime_expires; + + struct list_head cpu_timers[3]; + +#endif + + struct pid *leader_pid; + +#ifdef CONFIG_NO_HZ_FULL + atomic_t tick_dep_mask; +#endif + + struct pid *tty_old_pgrp; + + /* boolean value for session group leader */ + int leader; + + struct tty_struct *tty; /* NULL if no tty */ + +#ifdef CONFIG_SCHED_AUTOGROUP + struct autogroup *autogroup; +#endif + /* + * Cumulative resource counters for dead threads in the group, + * and for reaped dead child processes forked by this group. + * Live threads maintain their own counters and add to these + * in __exit_signal, except for the group leader. + */ + seqlock_t stats_lock; + u64 utime, stime, cutime, cstime; + u64 gtime; + u64 cgtime; + struct prev_cputime prev_cputime; + unsigned long nvcsw, nivcsw, cnvcsw, cnivcsw; + unsigned long min_flt, maj_flt, cmin_flt, cmaj_flt; + unsigned long inblock, oublock, cinblock, coublock; + unsigned long maxrss, cmaxrss; + struct task_io_accounting ioac; + + /* + * Cumulative ns of schedule CPU time fo dead threads in the + * group, not including a zombie group leader, (This only differs + * from jiffies_to_ns(utime + stime) if sched_clock uses something + * other than jiffies.) + */ + unsigned long long sum_sched_runtime; + + /* + * We don't bother to synchronize most readers of this at all, + * because there is no reader checking a limit that actually needs + * to get both rlim_cur and rlim_max atomically, and either one + * alone is a single word that can safely be read normally. + * getrlimit/setrlimit use task_lock(current->group_leader) to + * protect this instead of the siglock, because they really + * have no need to disable irqs. + */ + struct rlimit rlim[RLIM_NLIMITS]; + +#ifdef CONFIG_BSD_PROCESS_ACCT + struct pacct_struct pacct; /* per-process accounting information */ +#endif +#ifdef CONFIG_TASKSTATS + struct taskstats *stats; +#endif +#ifdef CONFIG_AUDIT + unsigned audit_tty; + struct tty_audit_buf *tty_audit_buf; +#endif + + /* + * Thread is the potential origin of an oom condition; kill first on + * oom + */ + bool oom_flag_origin; + short oom_score_adj; /* OOM kill score adjustment */ + short oom_score_adj_min; /* OOM kill score adjustment min value. + * Only settable by CAP_SYS_RESOURCE. */ + struct mm_struct *oom_mm; /* recorded mm when the thread group got + * killed by the oom killer */ + + struct mutex cred_guard_mutex; /* guard against foreign influences on + * credential calculations + * (notably. ptrace) */ +}; + +/* + * Bits in flags field of signal_struct. + */ +#define SIGNAL_STOP_STOPPED 0x00000001 /* job control stop in effect */ +#define SIGNAL_STOP_CONTINUED 0x00000002 /* SIGCONT since WCONTINUED reap */ +#define SIGNAL_GROUP_EXIT 0x00000004 /* group exit in progress */ +#define SIGNAL_GROUP_COREDUMP 0x00000008 /* coredump in progress */ +/* + * Pending notifications to parent. + */ +#define SIGNAL_CLD_STOPPED 0x00000010 +#define SIGNAL_CLD_CONTINUED 0x00000020 +#define SIGNAL_CLD_MASK (SIGNAL_CLD_STOPPED|SIGNAL_CLD_CONTINUED) + +#define SIGNAL_UNKILLABLE 0x00000040 /* for init: ignore fatal signals */ + +#define SIGNAL_STOP_MASK (SIGNAL_CLD_MASK | SIGNAL_STOP_STOPPED | \ + SIGNAL_STOP_CONTINUED) + +static inline void signal_set_stop_flags(struct signal_struct *sig, + unsigned int flags) +{ + WARN_ON(sig->flags & (SIGNAL_GROUP_EXIT|SIGNAL_GROUP_COREDUMP)); + sig->flags = (sig->flags & ~SIGNAL_STOP_MASK) | flags; +} + +/* If true, all threads except ->group_exit_task have pending SIGKILL */ +static inline int signal_group_exit(const struct signal_struct *sig) +{ + return (sig->flags & SIGNAL_GROUP_EXIT) || + (sig->group_exit_task != NULL); +} + +extern void flush_signals(struct task_struct *); +extern void ignore_signals(struct task_struct *); +extern void flush_signal_handlers(struct task_struct *, int force_default); +extern int dequeue_signal(struct task_struct *tsk, sigset_t *mask, siginfo_t *info); + +static inline int kernel_dequeue_signal(siginfo_t *info) +{ + struct task_struct *tsk = current; + siginfo_t __info; + int ret; + + spin_lock_irq(&tsk->sighand->siglock); + ret = dequeue_signal(tsk, &tsk->blocked, info ?: &__info); + spin_unlock_irq(&tsk->sighand->siglock); + + return ret; +} + +static inline void kernel_signal_stop(void) +{ + spin_lock_irq(¤t->sighand->siglock); + if (current->jobctl & JOBCTL_STOP_DEQUEUED) + __set_current_state(TASK_STOPPED); + spin_unlock_irq(¤t->sighand->siglock); + + schedule(); +} +extern int send_sig_info(int, struct siginfo *, struct task_struct *); +extern int force_sigsegv(int, struct task_struct *); +extern int force_sig_info(int, struct siginfo *, struct task_struct *); +extern int __kill_pgrp_info(int sig, struct siginfo *info, struct pid *pgrp); +extern int kill_pid_info(int sig, struct siginfo *info, struct pid *pid); +extern int kill_pid_info_as_cred(int, struct siginfo *, struct pid *, + const struct cred *, u32); +extern int kill_pgrp(struct pid *pid, int sig, int priv); +extern int kill_pid(struct pid *pid, int sig, int priv); +extern int kill_proc_info(int, struct siginfo *, pid_t); +extern __must_check bool do_notify_parent(struct task_struct *, int); +extern void __wake_up_parent(struct task_struct *p, struct task_struct *parent); +extern void force_sig(int, struct task_struct *); +extern int send_sig(int, struct task_struct *, int); +extern int zap_other_threads(struct task_struct *p); +extern struct sigqueue *sigqueue_alloc(void); +extern void sigqueue_free(struct sigqueue *); +extern int send_sigqueue(struct sigqueue *, struct task_struct *, int group); +extern int do_sigaction(int, struct k_sigaction *, struct k_sigaction *); + +static inline int restart_syscall(void) +{ + set_tsk_thread_flag(current, TIF_SIGPENDING); + return -ERESTARTNOINTR; +} + +static inline int signal_pending(struct task_struct *p) +{ + return unlikely(test_tsk_thread_flag(p,TIF_SIGPENDING)); +} + +static inline int __fatal_signal_pending(struct task_struct *p) +{ + return unlikely(sigismember(&p->pending.signal, SIGKILL)); +} + +static inline int fatal_signal_pending(struct task_struct *p) +{ + return signal_pending(p) && __fatal_signal_pending(p); +} + +static inline int signal_pending_state(long state, struct task_struct *p) +{ + if (!(state & (TASK_INTERRUPTIBLE | TASK_WAKEKILL))) + return 0; + if (!signal_pending(p)) + return 0; + + return (state & TASK_INTERRUPTIBLE) || __fatal_signal_pending(p); +} + +/* + * Reevaluate whether the task has signals pending delivery. + * Wake the task if so. + * This is required every time the blocked sigset_t changes. + * callers must hold sighand->siglock. + */ +extern void recalc_sigpending_and_wake(struct task_struct *t); +extern void recalc_sigpending(void); + +extern void signal_wake_up_state(struct task_struct *t, unsigned int state); + +static inline void signal_wake_up(struct task_struct *t, bool resume) +{ + signal_wake_up_state(t, resume ? TASK_WAKEKILL : 0); +} +static inline void ptrace_signal_wake_up(struct task_struct *t, bool resume) +{ + signal_wake_up_state(t, resume ? __TASK_TRACED : 0); +} + +#ifdef TIF_RESTORE_SIGMASK +/* + * Legacy restore_sigmask accessors. These are inefficient on + * SMP architectures because they require atomic operations. + */ + +/** + * set_restore_sigmask() - make sure saved_sigmask processing gets done + * + * This sets TIF_RESTORE_SIGMASK and ensures that the arch signal code + * will run before returning to user mode, to process the flag. For + * all callers, TIF_SIGPENDING is already set or it's no harm to set + * it. TIF_RESTORE_SIGMASK need not be in the set of bits that the + * arch code will notice on return to user mode, in case those bits + * are scarce. We set TIF_SIGPENDING here to ensure that the arch + * signal code always gets run when TIF_RESTORE_SIGMASK is set. + */ +static inline void set_restore_sigmask(void) +{ + set_thread_flag(TIF_RESTORE_SIGMASK); + WARN_ON(!test_thread_flag(TIF_SIGPENDING)); +} +static inline void clear_restore_sigmask(void) +{ + clear_thread_flag(TIF_RESTORE_SIGMASK); +} +static inline bool test_restore_sigmask(void) +{ + return test_thread_flag(TIF_RESTORE_SIGMASK); +} +static inline bool test_and_clear_restore_sigmask(void) +{ + return test_and_clear_thread_flag(TIF_RESTORE_SIGMASK); +} + +#else /* TIF_RESTORE_SIGMASK */ + +/* Higher-quality implementation, used if TIF_RESTORE_SIGMASK doesn't exist. */ +static inline void set_restore_sigmask(void) +{ + current->restore_sigmask = true; + WARN_ON(!test_thread_flag(TIF_SIGPENDING)); +} +static inline void clear_restore_sigmask(void) +{ + current->restore_sigmask = false; +} +static inline bool test_restore_sigmask(void) +{ + return current->restore_sigmask; +} +static inline bool test_and_clear_restore_sigmask(void) +{ + if (!current->restore_sigmask) + return false; + current->restore_sigmask = false; + return true; +} +#endif + +static inline void restore_saved_sigmask(void) +{ + if (test_and_clear_restore_sigmask()) + __set_current_blocked(¤t->saved_sigmask); +} + +static inline sigset_t *sigmask_to_save(void) +{ + sigset_t *res = ¤t->blocked; + if (unlikely(test_restore_sigmask())) + res = ¤t->saved_sigmask; + return res; +} + +static inline int kill_cad_pid(int sig, int priv) +{ + return kill_pid(cad_pid, sig, priv); +} + +/* These can be the second arg to send_sig_info/send_group_sig_info. */ +#define SEND_SIG_NOINFO ((struct siginfo *) 0) +#define SEND_SIG_PRIV ((struct siginfo *) 1) +#define SEND_SIG_FORCED ((struct siginfo *) 2) + +/* + * True if we are on the alternate signal stack. + */ +static inline int on_sig_stack(unsigned long sp) +{ + /* + * If the signal stack is SS_AUTODISARM then, by construction, we + * can't be on the signal stack unless user code deliberately set + * SS_AUTODISARM when we were already on it. + * + * This improves reliability: if user state gets corrupted such that + * the stack pointer points very close to the end of the signal stack, + * then this check will enable the signal to be handled anyway. + */ + if (current->sas_ss_flags & SS_AUTODISARM) + return 0; + +#ifdef CONFIG_STACK_GROWSUP + return sp >= current->sas_ss_sp && + sp - current->sas_ss_sp < current->sas_ss_size; +#else + return sp > current->sas_ss_sp && + sp - current->sas_ss_sp <= current->sas_ss_size; +#endif +} + +static inline int sas_ss_flags(unsigned long sp) +{ + if (!current->sas_ss_size) + return SS_DISABLE; + + return on_sig_stack(sp) ? SS_ONSTACK : 0; +} + +static inline void sas_ss_reset(struct task_struct *p) +{ + p->sas_ss_sp = 0; + p->sas_ss_size = 0; + p->sas_ss_flags = SS_DISABLE; +} + +static inline unsigned long sigsp(unsigned long sp, struct ksignal *ksig) +{ + if (unlikely((ksig->ka.sa.sa_flags & SA_ONSTACK)) && ! sas_ss_flags(sp)) +#ifdef CONFIG_STACK_GROWSUP + return current->sas_ss_sp; +#else + return current->sas_ss_sp + current->sas_ss_size; +#endif + return sp; +} + +extern void __cleanup_sighand(struct sighand_struct *); +extern void flush_itimer_signals(void); + +#define tasklist_empty() \ + list_empty(&init_task.tasks) + +#define next_task(p) \ + list_entry_rcu((p)->tasks.next, struct task_struct, tasks) + +#define for_each_process(p) \ + for (p = &init_task ; (p = next_task(p)) != &init_task ; ) + +extern bool current_is_single_threaded(void); + +/* + * Careful: do_each_thread/while_each_thread is a double loop so + * 'break' will not work as expected - use goto instead. + */ +#define do_each_thread(g, t) \ + for (g = t = &init_task ; (g = t = next_task(g)) != &init_task ; ) do + +#define while_each_thread(g, t) \ + while ((t = next_thread(t)) != g) + +#define __for_each_thread(signal, t) \ + list_for_each_entry_rcu(t, &(signal)->thread_head, thread_node) + +#define for_each_thread(p, t) \ + __for_each_thread((p)->signal, t) + +/* Careful: this is a double loop, 'break' won't work as expected. */ +#define for_each_process_thread(p, t) \ + for_each_process(p) for_each_thread(p, t) + +typedef int (*proc_visitor)(struct task_struct *p, void *data); +void walk_process_tree(struct task_struct *top, proc_visitor, void *); + +static inline int get_nr_threads(struct task_struct *tsk) +{ + return tsk->signal->nr_threads; +} + +static inline bool thread_group_leader(struct task_struct *p) +{ + return p->exit_signal >= 0; +} + +/* Do to the insanities of de_thread it is possible for a process + * to have the pid of the thread group leader without actually being + * the thread group leader. For iteration through the pids in proc + * all we care about is that we have a task with the appropriate + * pid, we don't actually care if we have the right task. + */ +static inline bool has_group_leader_pid(struct task_struct *p) +{ + return task_pid(p) == p->signal->leader_pid; +} + +static inline +bool same_thread_group(struct task_struct *p1, struct task_struct *p2) +{ + return p1->signal == p2->signal; +} + +static inline struct task_struct *next_thread(const struct task_struct *p) +{ + return list_entry_rcu(p->thread_group.next, + struct task_struct, thread_group); +} + +static inline int thread_group_empty(struct task_struct *p) +{ + return list_empty(&p->thread_group); +} + +#define delay_group_leader(p) \ + (thread_group_leader(p) && !thread_group_empty(p)) + +extern struct sighand_struct *__lock_task_sighand(struct task_struct *tsk, + unsigned long *flags); + +static inline struct sighand_struct *lock_task_sighand(struct task_struct *tsk, + unsigned long *flags) +{ + struct sighand_struct *ret; + + ret = __lock_task_sighand(tsk, flags); + (void)__cond_lock(&tsk->sighand->siglock, ret); + return ret; +} + +static inline void unlock_task_sighand(struct task_struct *tsk, + unsigned long *flags) +{ + spin_unlock_irqrestore(&tsk->sighand->siglock, *flags); +} + +static inline unsigned long task_rlimit(const struct task_struct *tsk, + unsigned int limit) +{ + return READ_ONCE(tsk->signal->rlim[limit].rlim_cur); +} + +static inline unsigned long task_rlimit_max(const struct task_struct *tsk, + unsigned int limit) +{ + return READ_ONCE(tsk->signal->rlim[limit].rlim_max); +} + +static inline unsigned long rlimit(unsigned int limit) +{ + return task_rlimit(current, limit); +} + +static inline unsigned long rlimit_max(unsigned int limit) +{ + return task_rlimit_max(current, limit); +} + +#endif /* _LINUX_SCHED_SIGNAL_H */ diff --git a/include/linux/sched/stat.h b/include/linux/sched/stat.h new file mode 100644 index 000000000000..141b74c53fad --- /dev/null +++ b/include/linux/sched/stat.h @@ -0,0 +1,40 @@ +#ifndef _LINUX_SCHED_STAT_H +#define _LINUX_SCHED_STAT_H + +#include <linux/percpu.h> + +/* + * Various counters maintained by the scheduler and fork(), + * exposed via /proc, sys.c or used by drivers via these APIs. + * + * ( Note that all these values are aquired without locking, + * so they can only be relied on in narrow circumstances. ) + */ + +extern unsigned long total_forks; +extern int nr_threads; +DECLARE_PER_CPU(unsigned long, process_counts); +extern int nr_processes(void); +extern unsigned long nr_running(void); +extern bool single_task_running(void); +extern unsigned long nr_iowait(void); +extern unsigned long nr_iowait_cpu(int cpu); +extern void get_iowait_load(unsigned long *nr_waiters, unsigned long *load); + +static inline int sched_info_on(void) +{ +#ifdef CONFIG_SCHEDSTATS + return 1; +#elif defined(CONFIG_TASK_DELAY_ACCT) + extern int delayacct_on; + return delayacct_on; +#else + return 0; +#endif +} + +#ifdef CONFIG_SCHEDSTATS +void force_schedstat_enabled(void); +#endif + +#endif /* _LINUX_SCHED_STAT_H */ diff --git a/include/linux/sched/sysctl.h b/include/linux/sched/sysctl.h index 49308e142aae..0f5ecd4d298e 100644 --- a/include/linux/sched/sysctl.h +++ b/include/linux/sched/sysctl.h @@ -1,5 +1,9 @@ -#ifndef _SCHED_SYSCTL_H -#define _SCHED_SYSCTL_H +#ifndef _LINUX_SCHED_SYSCTL_H +#define _LINUX_SCHED_SYSCTL_H + +#include <linux/types.h> + +struct ctl_table; #ifdef CONFIG_DETECT_HUNG_TASK extern int sysctl_hung_task_check_count; @@ -78,4 +82,4 @@ extern int sysctl_schedstats(struct ctl_table *table, int write, void __user *buffer, size_t *lenp, loff_t *ppos); -#endif /* _SCHED_SYSCTL_H */ +#endif /* _LINUX_SCHED_SYSCTL_H */ diff --git a/include/linux/sched/task.h b/include/linux/sched/task.h new file mode 100644 index 000000000000..a978d7189cfd --- /dev/null +++ b/include/linux/sched/task.h @@ -0,0 +1,139 @@ +#ifndef _LINUX_SCHED_TASK_H +#define _LINUX_SCHED_TASK_H + +/* + * Interface between the scheduler and various task lifetime (fork()/exit()) + * functionality: + */ + +#include <linux/sched.h> + +struct task_struct; +union thread_union; + +/* + * This serializes "schedule()" and also protects + * the run-queue from deletions/modifications (but + * _adding_ to the beginning of the run-queue has + * a separate lock). + */ +extern rwlock_t tasklist_lock; +extern spinlock_t mmlist_lock; + +extern union thread_union init_thread_union; +extern struct task_struct init_task; + +#ifdef CONFIG_PROVE_RCU +extern int lockdep_tasklist_lock_is_held(void); +#endif /* #ifdef CONFIG_PROVE_RCU */ + +extern asmlinkage void schedule_tail(struct task_struct *prev); +extern void init_idle(struct task_struct *idle, int cpu); +extern void init_idle_bootup_task(struct task_struct *idle); + +extern int sched_fork(unsigned long clone_flags, struct task_struct *p); +extern void sched_dead(struct task_struct *p); + +void __noreturn do_task_dead(void); + +extern void proc_caches_init(void); + +extern void release_task(struct task_struct * p); + +#ifdef CONFIG_HAVE_COPY_THREAD_TLS +extern int copy_thread_tls(unsigned long, unsigned long, unsigned long, + struct task_struct *, unsigned long); +#else +extern int copy_thread(unsigned long, unsigned long, unsigned long, + struct task_struct *); + +/* Architectures that haven't opted into copy_thread_tls get the tls argument + * via pt_regs, so ignore the tls argument passed via C. */ +static inline int copy_thread_tls( + unsigned long clone_flags, unsigned long sp, unsigned long arg, + struct task_struct *p, unsigned long tls) +{ + return copy_thread(clone_flags, sp, arg, p); +} +#endif +extern void flush_thread(void); + +#ifdef CONFIG_HAVE_EXIT_THREAD +extern void exit_thread(struct task_struct *tsk); +#else +static inline void exit_thread(struct task_struct *tsk) +{ +} +#endif +extern void do_group_exit(int); + +extern void exit_files(struct task_struct *); +extern void exit_itimers(struct signal_struct *); + +extern long _do_fork(unsigned long, unsigned long, unsigned long, int __user *, int __user *, unsigned long); +extern long do_fork(unsigned long, unsigned long, unsigned long, int __user *, int __user *); +struct task_struct *fork_idle(int); +extern pid_t kernel_thread(int (*fn)(void *), void *arg, unsigned long flags); + +extern void free_task(struct task_struct *tsk); + +/* sched_exec is called by processes performing an exec */ +#ifdef CONFIG_SMP +extern void sched_exec(void); +#else +#define sched_exec() {} +#endif + +#define get_task_struct(tsk) do { atomic_inc(&(tsk)->usage); } while(0) + +extern void __put_task_struct(struct task_struct *t); + +static inline void put_task_struct(struct task_struct *t) +{ + if (atomic_dec_and_test(&t->usage)) + __put_task_struct(t); +} + +struct task_struct *task_rcu_dereference(struct task_struct **ptask); +struct task_struct *try_get_task_struct(struct task_struct **ptask); + + +#ifdef CONFIG_ARCH_WANTS_DYNAMIC_TASK_STRUCT +extern int arch_task_struct_size __read_mostly; +#else +# define arch_task_struct_size (sizeof(struct task_struct)) +#endif + +#ifdef CONFIG_VMAP_STACK +static inline struct vm_struct *task_stack_vm_area(const struct task_struct *t) +{ + return t->stack_vm_area; +} +#else +static inline struct vm_struct *task_stack_vm_area(const struct task_struct *t) +{ + return NULL; +} +#endif + +/* + * Protects ->fs, ->files, ->mm, ->group_info, ->comm, keyring + * subscriptions and synchronises with wait4(). Also used in procfs. Also + * pins the final release of task.io_context. Also protects ->cpuset and + * ->cgroup.subsys[]. And ->vfork_done. + * + * Nests both inside and outside of read_lock(&tasklist_lock). + * It must not be nested with write_lock_irq(&tasklist_lock), + * neither inside nor outside. + */ +static inline void task_lock(struct task_struct *p) +{ + spin_lock(&p->alloc_lock); +} + +static inline void task_unlock(struct task_struct *p) +{ + spin_unlock(&p->alloc_lock); +} + +#endif /* _LINUX_SCHED_TASK_H */ diff --git a/include/linux/sched/task_stack.h b/include/linux/sched/task_stack.h new file mode 100644 index 000000000000..df6ea6665b31 --- /dev/null +++ b/include/linux/sched/task_stack.h @@ -0,0 +1,121 @@ +#ifndef _LINUX_SCHED_TASK_STACK_H +#define _LINUX_SCHED_TASK_STACK_H + +/* + * task->stack (kernel stack) handling interfaces: + */ + +#include <linux/sched.h> +#include <linux/magic.h> + +#ifdef CONFIG_THREAD_INFO_IN_TASK + +/* + * When accessing the stack of a non-current task that might exit, use + * try_get_task_stack() instead. task_stack_page will return a pointer + * that could get freed out from under you. + */ +static inline void *task_stack_page(const struct task_struct *task) +{ + return task->stack; +} + +#define setup_thread_stack(new,old) do { } while(0) + +static inline unsigned long *end_of_stack(const struct task_struct *task) +{ + return task->stack; +} + +#elif !defined(__HAVE_THREAD_FUNCTIONS) + +#define task_stack_page(task) ((void *)(task)->stack) + +static inline void setup_thread_stack(struct task_struct *p, struct task_struct *org) +{ + *task_thread_info(p) = *task_thread_info(org); + task_thread_info(p)->task = p; +} + +/* + * Return the address of the last usable long on the stack. + * + * When the stack grows down, this is just above the thread + * info struct. Going any lower will corrupt the threadinfo. + * + * When the stack grows up, this is the highest address. + * Beyond that position, we corrupt data on the next page. + */ +static inline unsigned long *end_of_stack(struct task_struct *p) +{ +#ifdef CONFIG_STACK_GROWSUP + return (unsigned long *)((unsigned long)task_thread_info(p) + THREAD_SIZE) - 1; +#else + return (unsigned long *)(task_thread_info(p) + 1); +#endif +} + +#endif + +#ifdef CONFIG_THREAD_INFO_IN_TASK +static inline void *try_get_task_stack(struct task_struct *tsk) +{ + return atomic_inc_not_zero(&tsk->stack_refcount) ? + task_stack_page(tsk) : NULL; +} + +extern void put_task_stack(struct task_struct *tsk); +#else +static inline void *try_get_task_stack(struct task_struct *tsk) +{ + return task_stack_page(tsk); +} + +static inline void put_task_stack(struct task_struct *tsk) {} +#endif + +#define task_stack_end_corrupted(task) \ + (*(end_of_stack(task)) != STACK_END_MAGIC) + +static inline int object_is_on_stack(void *obj) +{ + void *stack = task_stack_page(current); + + return (obj >= stack) && (obj < (stack + THREAD_SIZE)); +} + +extern void thread_stack_cache_init(void); + +#ifdef CONFIG_DEBUG_STACK_USAGE +static inline unsigned long stack_not_used(struct task_struct *p) +{ + unsigned long *n = end_of_stack(p); + + do { /* Skip over canary */ +# ifdef CONFIG_STACK_GROWSUP + n--; +# else + n++; +# endif + } while (!*n); + +# ifdef CONFIG_STACK_GROWSUP + return (unsigned long)end_of_stack(p) - (unsigned long)n; +# else + return (unsigned long)n - (unsigned long)end_of_stack(p); +# endif +} +#endif +extern void set_task_stack_end_magic(struct task_struct *tsk); + +#ifndef __HAVE_ARCH_KSTACK_END +static inline int kstack_end(void *addr) +{ + /* Reliable end of stack detection: + * Some APM bios versions misalign the stack + */ + return !(((unsigned long)addr+sizeof(void*)-1) & (THREAD_SIZE-sizeof(void*))); +} +#endif + +#endif /* _LINUX_SCHED_TASK_STACK_H */ diff --git a/include/linux/sched/topology.h b/include/linux/sched/topology.h new file mode 100644 index 000000000000..7d065abc7a47 --- /dev/null +++ b/include/linux/sched/topology.h @@ -0,0 +1,226 @@ +#ifndef _LINUX_SCHED_TOPOLOGY_H +#define _LINUX_SCHED_TOPOLOGY_H + +#include <linux/topology.h> + +#include <linux/sched/idle.h> + +/* + * sched-domains (multiprocessor balancing) declarations: + */ +#ifdef CONFIG_SMP + +#define SD_LOAD_BALANCE 0x0001 /* Do load balancing on this domain. */ +#define SD_BALANCE_NEWIDLE 0x0002 /* Balance when about to become idle */ +#define SD_BALANCE_EXEC 0x0004 /* Balance on exec */ +#define SD_BALANCE_FORK 0x0008 /* Balance on fork, clone */ +#define SD_BALANCE_WAKE 0x0010 /* Balance on wakeup */ +#define SD_WAKE_AFFINE 0x0020 /* Wake task to waking CPU */ +#define SD_ASYM_CPUCAPACITY 0x0040 /* Groups have different max cpu capacities */ +#define SD_SHARE_CPUCAPACITY 0x0080 /* Domain members share cpu capacity */ +#define SD_SHARE_POWERDOMAIN 0x0100 /* Domain members share power domain */ +#define SD_SHARE_PKG_RESOURCES 0x0200 /* Domain members share cpu pkg resources */ +#define SD_SERIALIZE 0x0400 /* Only a single load balancing instance */ +#define SD_ASYM_PACKING 0x0800 /* Place busy groups earlier in the domain */ +#define SD_PREFER_SIBLING 0x1000 /* Prefer to place tasks in a sibling domain */ +#define SD_OVERLAP 0x2000 /* sched_domains of this level overlap */ +#define SD_NUMA 0x4000 /* cross-node balancing */ + +/* + * Increase resolution of cpu_capacity calculations + */ +#define SCHED_CAPACITY_SHIFT SCHED_FIXEDPOINT_SHIFT +#define SCHED_CAPACITY_SCALE (1L << SCHED_CAPACITY_SHIFT) + +#ifdef CONFIG_SCHED_SMT +static inline int cpu_smt_flags(void) +{ + return SD_SHARE_CPUCAPACITY | SD_SHARE_PKG_RESOURCES; +} +#endif + +#ifdef CONFIG_SCHED_MC +static inline int cpu_core_flags(void) +{ + return SD_SHARE_PKG_RESOURCES; +} +#endif + +#ifdef CONFIG_NUMA +static inline int cpu_numa_flags(void) +{ + return SD_NUMA; +} +#endif + +extern int arch_asym_cpu_priority(int cpu); + +struct sched_domain_attr { + int relax_domain_level; +}; + +#define SD_ATTR_INIT (struct sched_domain_attr) { \ + .relax_domain_level = -1, \ +} + +extern int sched_domain_level_max; + +struct sched_group; + +struct sched_domain_shared { + atomic_t ref; + atomic_t nr_busy_cpus; + int has_idle_cores; +}; + +struct sched_domain { + /* These fields must be setup */ + struct sched_domain *parent; /* top domain must be null terminated */ + struct sched_domain *child; /* bottom domain must be null terminated */ + struct sched_group *groups; /* the balancing groups of the domain */ + unsigned long min_interval; /* Minimum balance interval ms */ + unsigned long max_interval; /* Maximum balance interval ms */ + unsigned int busy_factor; /* less balancing by factor if busy */ + unsigned int imbalance_pct; /* No balance until over watermark */ + unsigned int cache_nice_tries; /* Leave cache hot tasks for # tries */ + unsigned int busy_idx; + unsigned int idle_idx; + unsigned int newidle_idx; + unsigned int wake_idx; + unsigned int forkexec_idx; + unsigned int smt_gain; + + int nohz_idle; /* NOHZ IDLE status */ + int flags; /* See SD_* */ + int level; + + /* Runtime fields. */ + unsigned long last_balance; /* init to jiffies. units in jiffies */ + unsigned int balance_interval; /* initialise to 1. units in ms. */ + unsigned int nr_balance_failed; /* initialise to 0 */ + + /* idle_balance() stats */ + u64 max_newidle_lb_cost; + unsigned long next_decay_max_lb_cost; + + u64 avg_scan_cost; /* select_idle_sibling */ + +#ifdef CONFIG_SCHEDSTATS + /* load_balance() stats */ + unsigned int lb_count[CPU_MAX_IDLE_TYPES]; + unsigned int lb_failed[CPU_MAX_IDLE_TYPES]; + unsigned int lb_balanced[CPU_MAX_IDLE_TYPES]; + unsigned int lb_imbalance[CPU_MAX_IDLE_TYPES]; + unsigned int lb_gained[CPU_MAX_IDLE_TYPES]; + unsigned int lb_hot_gained[CPU_MAX_IDLE_TYPES]; + unsigned int lb_nobusyg[CPU_MAX_IDLE_TYPES]; + unsigned int lb_nobusyq[CPU_MAX_IDLE_TYPES]; + + /* Active load balancing */ + unsigned int alb_count; + unsigned int alb_failed; + unsigned int alb_pushed; + + /* SD_BALANCE_EXEC stats */ + unsigned int sbe_count; + unsigned int sbe_balanced; + unsigned int sbe_pushed; + + /* SD_BALANCE_FORK stats */ + unsigned int sbf_count; + unsigned int sbf_balanced; + unsigned int sbf_pushed; + + /* try_to_wake_up() stats */ + unsigned int ttwu_wake_remote; + unsigned int ttwu_move_affine; + unsigned int ttwu_move_balance; +#endif +#ifdef CONFIG_SCHED_DEBUG + char *name; +#endif + union { + void *private; /* used during construction */ + struct rcu_head rcu; /* used during destruction */ + }; + struct sched_domain_shared *shared; + + unsigned int span_weight; + /* + * Span of all CPUs in this domain. + * + * NOTE: this field is variable length. (Allocated dynamically + * by attaching extra space to the end of the structure, + * depending on how many CPUs the kernel has booted up with) + */ + unsigned long span[0]; +}; + +static inline struct cpumask *sched_domain_span(struct sched_domain *sd) +{ + return to_cpumask(sd->span); +} + +extern void partition_sched_domains(int ndoms_new, cpumask_var_t doms_new[], + struct sched_domain_attr *dattr_new); + +/* Allocate an array of sched domains, for partition_sched_domains(). */ +cpumask_var_t *alloc_sched_domains(unsigned int ndoms); +void free_sched_domains(cpumask_var_t doms[], unsigned int ndoms); + +bool cpus_share_cache(int this_cpu, int that_cpu); + +typedef const struct cpumask *(*sched_domain_mask_f)(int cpu); +typedef int (*sched_domain_flags_f)(void); + +#define SDTL_OVERLAP 0x01 + +struct sd_data { + struct sched_domain **__percpu sd; + struct sched_domain_shared **__percpu sds; + struct sched_group **__percpu sg; + struct sched_group_capacity **__percpu sgc; +}; + +struct sched_domain_topology_level { + sched_domain_mask_f mask; + sched_domain_flags_f sd_flags; + int flags; + int numa_level; + struct sd_data data; +#ifdef CONFIG_SCHED_DEBUG + char *name; +#endif +}; + +extern void set_sched_topology(struct sched_domain_topology_level *tl); + +#ifdef CONFIG_SCHED_DEBUG +# define SD_INIT_NAME(type) .name = #type +#else +# define SD_INIT_NAME(type) +#endif + +#else /* CONFIG_SMP */ + +struct sched_domain_attr; + +static inline void +partition_sched_domains(int ndoms_new, cpumask_var_t doms_new[], + struct sched_domain_attr *dattr_new) +{ +} + +static inline bool cpus_share_cache(int this_cpu, int that_cpu) +{ + return true; +} + +#endif /* !CONFIG_SMP */ + +static inline int task_node(const struct task_struct *p) +{ + return cpu_to_node(task_cpu(p)); +} + +#endif /* _LINUX_SCHED_TOPOLOGY_H */ diff --git a/include/linux/sched/user.h b/include/linux/sched/user.h new file mode 100644 index 000000000000..5d5415e129d4 --- /dev/null +++ b/include/linux/sched/user.h @@ -0,0 +1,61 @@ +#ifndef _LINUX_SCHED_USER_H +#define _LINUX_SCHED_USER_H + +#include <linux/uidgid.h> +#include <linux/atomic.h> + +struct key; + +/* + * Some day this will be a full-fledged user tracking system.. + */ +struct user_struct { + atomic_t __count; /* reference count */ + atomic_t processes; /* How many processes does this user have? */ + atomic_t sigpending; /* How many pending signals does this user have? */ +#ifdef CONFIG_FANOTIFY + atomic_t fanotify_listeners; +#endif +#ifdef CONFIG_EPOLL + atomic_long_t epoll_watches; /* The number of file descriptors currently watched */ +#endif +#ifdef CONFIG_POSIX_MQUEUE + /* protected by mq_lock */ + unsigned long mq_bytes; /* How many bytes can be allocated to mqueue? */ +#endif + unsigned long locked_shm; /* How many pages of mlocked shm ? */ + unsigned long unix_inflight; /* How many files in flight in unix sockets */ + atomic_long_t pipe_bufs; /* how many pages are allocated in pipe buffers */ + +#ifdef CONFIG_KEYS + struct key *uid_keyring; /* UID specific keyring */ + struct key *session_keyring; /* UID's default session keyring */ +#endif + + /* Hash table maintenance information */ + struct hlist_node uidhash_node; + kuid_t uid; + +#if defined(CONFIG_PERF_EVENTS) || defined(CONFIG_BPF_SYSCALL) + atomic_long_t locked_vm; +#endif +}; + +extern int uids_sysfs_init(void); + +extern struct user_struct *find_user(kuid_t); + +extern struct user_struct root_user; +#define INIT_USER (&root_user) + + +/* per-UID process charging. */ +extern struct user_struct * alloc_uid(kuid_t); +static inline struct user_struct *get_uid(struct user_struct *u) +{ + atomic_inc(&u->__count); + return u; +} +extern void free_uid(struct user_struct *); + +#endif /* _LINUX_SCHED_USER_H */ diff --git a/include/linux/sched/wake_q.h b/include/linux/sched/wake_q.h new file mode 100644 index 000000000000..d03d8a9047dc --- /dev/null +++ b/include/linux/sched/wake_q.h @@ -0,0 +1,53 @@ +#ifndef _LINUX_SCHED_WAKE_Q_H +#define _LINUX_SCHED_WAKE_Q_H + +/* + * Wake-queues are lists of tasks with a pending wakeup, whose + * callers have already marked the task as woken internally, + * and can thus carry on. A common use case is being able to + * do the wakeups once the corresponding user lock as been + * released. + * + * We hold reference to each task in the list across the wakeup, + * thus guaranteeing that the memory is still valid by the time + * the actual wakeups are performed in wake_up_q(). + * + * One per task suffices, because there's never a need for a task to be + * in two wake queues simultaneously; it is forbidden to abandon a task + * in a wake queue (a call to wake_up_q() _must_ follow), so if a task is + * already in a wake queue, the wakeup will happen soon and the second + * waker can just skip it. + * + * The DEFINE_WAKE_Q macro declares and initializes the list head. + * wake_up_q() does NOT reinitialize the list; it's expected to be + * called near the end of a function. Otherwise, the list can be + * re-initialized for later re-use by wake_q_init(). + * + * Note that this can cause spurious wakeups. schedule() callers + * must ensure the call is done inside a loop, confirming that the + * wakeup condition has in fact occurred. + */ + +#include <linux/sched.h> + +struct wake_q_head { + struct wake_q_node *first; + struct wake_q_node **lastp; +}; + +#define WAKE_Q_TAIL ((struct wake_q_node *) 0x01) + +#define DEFINE_WAKE_Q(name) \ + struct wake_q_head name = { WAKE_Q_TAIL, &name.first } + +static inline void wake_q_init(struct wake_q_head *head) +{ + head->first = WAKE_Q_TAIL; + head->lastp = &head->first; +} + +extern void wake_q_add(struct wake_q_head *head, + struct task_struct *task); +extern void wake_up_q(struct wake_q_head *head); + +#endif /* _LINUX_SCHED_WAKE_Q_H */ diff --git a/include/linux/sched/xacct.h b/include/linux/sched/xacct.h new file mode 100644 index 000000000000..a28156a0d34a --- /dev/null +++ b/include/linux/sched/xacct.h @@ -0,0 +1,48 @@ +#ifndef _LINUX_SCHED_XACCT_H +#define _LINUX_SCHED_XACCT_H + +/* + * Extended task accounting methods: + */ + +#include <linux/sched.h> + +#ifdef CONFIG_TASK_XACCT +static inline void add_rchar(struct task_struct *tsk, ssize_t amt) +{ + tsk->ioac.rchar += amt; +} + +static inline void add_wchar(struct task_struct *tsk, ssize_t amt) +{ + tsk->ioac.wchar += amt; +} + +static inline void inc_syscr(struct task_struct *tsk) +{ + tsk->ioac.syscr++; +} + +static inline void inc_syscw(struct task_struct *tsk) +{ + tsk->ioac.syscw++; +} +#else +static inline void add_rchar(struct task_struct *tsk, ssize_t amt) +{ +} + +static inline void add_wchar(struct task_struct *tsk, ssize_t amt) +{ +} + +static inline void inc_syscr(struct task_struct *tsk) +{ +} + +static inline void inc_syscw(struct task_struct *tsk) +{ +} +#endif + +#endif /* _LINUX_SCHED_XACCT_H */ diff --git a/include/linux/signal.h b/include/linux/signal.h index 5308304993be..94ad6eea9550 100644 --- a/include/linux/signal.h +++ b/include/linux/signal.h @@ -1,32 +1,13 @@ #ifndef _LINUX_SIGNAL_H #define _LINUX_SIGNAL_H -#include <linux/list.h> #include <linux/bug.h> -#include <uapi/linux/signal.h> +#include <linux/signal_types.h> struct task_struct; /* for sysctl */ extern int print_fatal_signals; -/* - * Real Time signals may be queued. - */ - -struct sigqueue { - struct list_head list; - int flags; - siginfo_t info; - struct user_struct *user; -}; - -/* flags values. */ -#define SIGQUEUE_PREALLOC 1 - -struct sigpending { - struct list_head list; - sigset_t signal; -}; #ifndef HAVE_ARCH_COPY_SIGINFO @@ -272,42 +253,6 @@ extern void set_current_blocked(sigset_t *); extern void __set_current_blocked(const sigset_t *); extern int show_unhandled_signals; -struct sigaction { -#ifndef __ARCH_HAS_IRIX_SIGACTION - __sighandler_t sa_handler; - unsigned long sa_flags; -#else - unsigned int sa_flags; - __sighandler_t sa_handler; -#endif -#ifdef __ARCH_HAS_SA_RESTORER - __sigrestore_t sa_restorer; -#endif - sigset_t sa_mask; /* mask last for extensibility */ -}; - -struct k_sigaction { - struct sigaction sa; -#ifdef __ARCH_HAS_KA_RESTORER - __sigrestore_t ka_restorer; -#endif -}; - -#ifdef CONFIG_OLD_SIGACTION -struct old_sigaction { - __sighandler_t sa_handler; - old_sigset_t sa_mask; - unsigned long sa_flags; - __sigrestore_t sa_restorer; -}; -#endif - -struct ksignal { - struct k_sigaction ka; - siginfo_t info; - int sig; -}; - extern int get_signal(struct ksignal *ksig); extern void signal_setup_done(int failed, struct ksignal *ksig, int stepping); extern void exit_signals(struct task_struct *tsk); diff --git a/include/linux/signal_types.h b/include/linux/signal_types.h new file mode 100644 index 000000000000..16d862a3d8f3 --- /dev/null +++ b/include/linux/signal_types.h @@ -0,0 +1,66 @@ +#ifndef _LINUX_SIGNAL_TYPES_H +#define _LINUX_SIGNAL_TYPES_H + +/* + * Basic signal handling related data type definitions: + */ + +#include <linux/list.h> +#include <uapi/linux/signal.h> + +/* + * Real Time signals may be queued. + */ + +struct sigqueue { + struct list_head list; + int flags; + siginfo_t info; + struct user_struct *user; +}; + +/* flags values. */ +#define SIGQUEUE_PREALLOC 1 + +struct sigpending { + struct list_head list; + sigset_t signal; +}; + +struct sigaction { +#ifndef __ARCH_HAS_IRIX_SIGACTION + __sighandler_t sa_handler; + unsigned long sa_flags; +#else + unsigned int sa_flags; + __sighandler_t sa_handler; +#endif +#ifdef __ARCH_HAS_SA_RESTORER + __sigrestore_t sa_restorer; +#endif + sigset_t sa_mask; /* mask last for extensibility */ +}; + +struct k_sigaction { + struct sigaction sa; +#ifdef __ARCH_HAS_KA_RESTORER + __sigrestore_t ka_restorer; +#endif +}; + +#ifdef CONFIG_OLD_SIGACTION +struct old_sigaction { + __sighandler_t sa_handler; + old_sigset_t sa_mask; + unsigned long sa_flags; + __sigrestore_t sa_restorer; +}; +#endif + +struct ksignal { + struct k_sigaction ka; + siginfo_t info; + int sig; +}; + +#endif /* _LINUX_SIGNAL_TYPES_H */ diff --git a/include/linux/signalfd.h b/include/linux/signalfd.h index eadbe227c256..4985048640a7 100644 --- a/include/linux/signalfd.h +++ b/include/linux/signalfd.h @@ -8,7 +8,7 @@ #define _LINUX_SIGNALFD_H #include <uapi/linux/signalfd.h> - +#include <linux/sched/signal.h> #ifdef CONFIG_SIGNALFD diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h index 69ccd2636911..c776abd86937 100644 --- a/include/linux/skbuff.h +++ b/include/linux/skbuff.h @@ -34,6 +34,7 @@ #include <linux/dma-mapping.h> #include <linux/netdev_features.h> #include <linux/sched.h> +#include <linux/sched/clock.h> #include <net/flow_dissector.h> #include <linux/splice.h> #include <linux/in6.h> diff --git a/include/linux/stat.h b/include/linux/stat.h index 075cb0c7eb2a..c76e524fb34b 100644 --- a/include/linux/stat.h +++ b/include/linux/stat.h @@ -18,20 +18,32 @@ #include <linux/time.h> #include <linux/uidgid.h> +#define KSTAT_QUERY_FLAGS (AT_STATX_SYNC_TYPE) + struct kstat { - u64 ino; - dev_t dev; + u32 result_mask; /* What fields the user got */ umode_t mode; unsigned int nlink; + uint32_t blksize; /* Preferred I/O size */ + u64 attributes; +#define KSTAT_ATTR_FS_IOC_FLAGS \ + (STATX_ATTR_COMPRESSED | \ + STATX_ATTR_IMMUTABLE | \ + STATX_ATTR_APPEND | \ + STATX_ATTR_NODUMP | \ + STATX_ATTR_ENCRYPTED \ + )/* Attrs corresponding to FS_*_FL flags */ + u64 ino; + dev_t dev; + dev_t rdev; kuid_t uid; kgid_t gid; - dev_t rdev; loff_t size; - struct timespec atime; + struct timespec atime; struct timespec mtime; struct timespec ctime; - unsigned long blksize; - unsigned long long blocks; + struct timespec btime; /* File creation time */ + u64 blocks; }; #endif diff --git a/include/linux/sunrpc/types.h b/include/linux/sunrpc/types.h index d222f47550af..11a7536c0fd2 100644 --- a/include/linux/sunrpc/types.h +++ b/include/linux/sunrpc/types.h @@ -10,6 +10,7 @@ #define _LINUX_SUNRPC_TYPES_H_ #include <linux/timer.h> +#include <linux/sched/signal.h> #include <linux/workqueue.h> #include <linux/sunrpc/debug.h> #include <linux/list.h> diff --git a/include/linux/syscalls.h b/include/linux/syscalls.h index 91a740f6b884..980c3c9b06f8 100644 --- a/include/linux/syscalls.h +++ b/include/linux/syscalls.h @@ -48,6 +48,7 @@ struct stat; struct stat64; struct statfs; struct statfs64; +struct statx; struct __sysctl_args; struct sysinfo; struct timespec; @@ -902,5 +903,7 @@ asmlinkage long sys_pkey_mprotect(unsigned long start, size_t len, unsigned long prot, int pkey); asmlinkage long sys_pkey_alloc(unsigned long flags, unsigned long init_val); asmlinkage long sys_pkey_free(int pkey); +asmlinkage long sys_statx(int dfd, const char __user *path, unsigned flags, + unsigned mask, struct statx __user *buffer); #endif diff --git a/include/linux/taskstats_kern.h b/include/linux/taskstats_kern.h index 58de6edf751f..e2a5daf8d14f 100644 --- a/include/linux/taskstats_kern.h +++ b/include/linux/taskstats_kern.h @@ -8,7 +8,7 @@ #define _LINUX_TASKSTATS_KERN_H #include <linux/taskstats.h> -#include <linux/sched.h> +#include <linux/sched/signal.h> #include <linux/slab.h> #ifdef CONFIG_TASKSTATS diff --git a/include/linux/timekeeping.h b/include/linux/timekeeping.h index d2e804e15c3e..b598cbc7b576 100644 --- a/include/linux/timekeeping.h +++ b/include/linux/timekeeping.h @@ -8,6 +8,10 @@ void timekeeping_init(void); extern int timekeeping_suspended; +/* Architecture timer tick functions: */ +extern void update_process_times(int user); +extern void xtime_update(unsigned long ticks); + /* * Get and set timeofday */ diff --git a/include/linux/timer.h b/include/linux/timer.h index c7bdf895179c..e6789b8757d5 100644 --- a/include/linux/timer.h +++ b/include/linux/timer.h @@ -212,7 +212,7 @@ struct hrtimer; extern enum hrtimer_restart it_real_fn(struct hrtimer *); #if defined(CONFIG_SMP) && defined(CONFIG_NO_HZ_COMMON) -#include <linux/sysctl.h> +struct ctl_table; extern unsigned int sysctl_timer_migration; int timer_migration_handler(struct ctl_table *table, int write, diff --git a/include/linux/user_namespace.h b/include/linux/user_namespace.h index 363e0e8082a9..32354b4b4b2b 100644 --- a/include/linux/user_namespace.h +++ b/include/linux/user_namespace.h @@ -5,6 +5,9 @@ #include <linux/nsproxy.h> #include <linux/ns_common.h> #include <linux/sched.h> +#include <linux/workqueue.h> +#include <linux/rwsem.h> +#include <linux/sysctl.h> #include <linux/err.h> #define UID_GID_MAP_MAX_EXTENTS 5 @@ -69,7 +72,7 @@ struct ucounts { struct hlist_node node; struct user_namespace *ns; kuid_t uid; - atomic_t count; + int count; atomic_t ucount[UCOUNT_COUNTS]; }; diff --git a/include/linux/userfaultfd_k.h b/include/linux/userfaultfd_k.h index 0468548acebf..48a3483dccb1 100644 --- a/include/linux/userfaultfd_k.h +++ b/include/linux/userfaultfd_k.h @@ -61,8 +61,7 @@ extern void mremap_userfaultfd_complete(struct vm_userfaultfd_ctx *, unsigned long from, unsigned long to, unsigned long len); -extern void userfaultfd_remove(struct vm_area_struct *vma, - struct vm_area_struct **prev, +extern bool userfaultfd_remove(struct vm_area_struct *vma, unsigned long start, unsigned long end); @@ -72,8 +71,6 @@ extern int userfaultfd_unmap_prep(struct vm_area_struct *vma, extern void userfaultfd_unmap_complete(struct mm_struct *mm, struct list_head *uf); -extern void userfaultfd_exit(struct mm_struct *mm); - #else /* CONFIG_USERFAULTFD */ /* mm helpers */ @@ -120,11 +117,11 @@ static inline void mremap_userfaultfd_complete(struct vm_userfaultfd_ctx *ctx, { } -static inline void userfaultfd_remove(struct vm_area_struct *vma, - struct vm_area_struct **prev, +static inline bool userfaultfd_remove(struct vm_area_struct *vma, unsigned long start, unsigned long end) { + return true; } static inline int userfaultfd_unmap_prep(struct vm_area_struct *vma, @@ -139,10 +136,6 @@ static inline void userfaultfd_unmap_complete(struct mm_struct *mm, { } -static inline void userfaultfd_exit(struct mm_struct *mm) -{ -} - #endif /* CONFIG_USERFAULTFD */ #endif /* _LINUX_USERFAULTFD_K_H */ diff --git a/include/linux/virtio_config.h b/include/linux/virtio_config.h index 26c155bb639b..8355bab175e1 100644 --- a/include/linux/virtio_config.h +++ b/include/linux/virtio_config.h @@ -7,6 +7,8 @@ #include <linux/virtio_byteorder.h> #include <uapi/linux/virtio_config.h> +struct irq_affinity; + /** * virtio_config_ops - operations for configuring a virtio device * @get: read the value of a configuration field @@ -56,6 +58,7 @@ * This returns a pointer to the bus name a la pci_name from which * the caller can then copy. * @set_vq_affinity: set the affinity for a virtqueue. + * @get_vq_affinity: get the affinity for a virtqueue (optional). */ typedef void vq_callback_t(struct virtqueue *); struct virtio_config_ops { @@ -68,14 +71,15 @@ struct virtio_config_ops { void (*set_status)(struct virtio_device *vdev, u8 status); void (*reset)(struct virtio_device *vdev); int (*find_vqs)(struct virtio_device *, unsigned nvqs, - struct virtqueue *vqs[], - vq_callback_t *callbacks[], - const char * const names[]); + struct virtqueue *vqs[], vq_callback_t *callbacks[], + const char * const names[], struct irq_affinity *desc); void (*del_vqs)(struct virtio_device *); u64 (*get_features)(struct virtio_device *vdev); int (*finalize_features)(struct virtio_device *vdev); const char *(*bus_name)(struct virtio_device *vdev); int (*set_vq_affinity)(struct virtqueue *vq, int cpu); + const struct cpumask *(*get_vq_affinity)(struct virtio_device *vdev, + int index); }; /* If driver didn't advertise the feature, it will never appear. */ @@ -169,7 +173,7 @@ struct virtqueue *virtio_find_single_vq(struct virtio_device *vdev, vq_callback_t *callbacks[] = { c }; const char *names[] = { n }; struct virtqueue *vq; - int err = vdev->config->find_vqs(vdev, 1, &vq, callbacks, names); + int err = vdev->config->find_vqs(vdev, 1, &vq, callbacks, names, NULL); if (err < 0) return ERR_PTR(err); return vq; diff --git a/include/linux/vm_event_item.h b/include/linux/vm_event_item.h index 6aa1b6cb5828..a80b7b59cf33 100644 --- a/include/linux/vm_event_item.h +++ b/include/linux/vm_event_item.h @@ -79,6 +79,9 @@ enum vm_event_item { PGPGIN, PGPGOUT, PSWPIN, PSWPOUT, THP_SPLIT_PAGE_FAILED, THP_DEFERRED_SPLIT_PAGE, THP_SPLIT_PMD, +#ifdef CONFIG_HAVE_ARCH_TRANSPARENT_HUGEPAGE_PUD + THP_SPLIT_PUD, +#endif THP_ZERO_PAGE_ALLOC, THP_ZERO_PAGE_ALLOC_FAILED, #endif diff --git a/include/linux/vmacache.h b/include/linux/vmacache.h index c3fa0fd43949..1081db987391 100644 --- a/include/linux/vmacache.h +++ b/include/linux/vmacache.h @@ -12,7 +12,7 @@ static inline void vmacache_flush(struct task_struct *tsk) { - memset(tsk->vmacache, 0, sizeof(tsk->vmacache)); + memset(tsk->vmacache.vmas, 0, sizeof(tsk->vmacache.vmas)); } extern void vmacache_flush_all(struct mm_struct *mm); diff --git a/include/linux/wait.h b/include/linux/wait.h index 1421132e9086..db076ca7f11d 100644 --- a/include/linux/wait.h +++ b/include/linux/wait.h @@ -6,6 +6,7 @@ #include <linux/list.h> #include <linux/stddef.h> #include <linux/spinlock.h> + #include <asm/current.h> #include <uapi/linux/wait.h> @@ -619,30 +620,19 @@ do { \ __ret; \ }) +extern int do_wait_intr(wait_queue_head_t *, wait_queue_t *); +extern int do_wait_intr_irq(wait_queue_head_t *, wait_queue_t *); -#define __wait_event_interruptible_locked(wq, condition, exclusive, irq) \ +#define __wait_event_interruptible_locked(wq, condition, exclusive, fn) \ ({ \ - int __ret = 0; \ + int __ret; \ DEFINE_WAIT(__wait); \ if (exclusive) \ __wait.flags |= WQ_FLAG_EXCLUSIVE; \ do { \ - if (likely(list_empty(&__wait.task_list))) \ - __add_wait_queue_tail(&(wq), &__wait); \ - set_current_state(TASK_INTERRUPTIBLE); \ - if (signal_pending(current)) { \ - __ret = -ERESTARTSYS; \ + __ret = fn(&(wq), &__wait); \ + if (__ret) \ break; \ - } \ - if (irq) \ - spin_unlock_irq(&(wq).lock); \ - else \ - spin_unlock(&(wq).lock); \ - schedule(); \ - if (irq) \ - spin_lock_irq(&(wq).lock); \ - else \ - spin_lock(&(wq).lock); \ } while (!(condition)); \ __remove_wait_queue(&(wq), &__wait); \ __set_current_state(TASK_RUNNING); \ @@ -675,7 +665,7 @@ do { \ */ #define wait_event_interruptible_locked(wq, condition) \ ((condition) \ - ? 0 : __wait_event_interruptible_locked(wq, condition, 0, 0)) + ? 0 : __wait_event_interruptible_locked(wq, condition, 0, do_wait_intr)) /** * wait_event_interruptible_locked_irq - sleep until a condition gets true @@ -702,7 +692,7 @@ do { \ */ #define wait_event_interruptible_locked_irq(wq, condition) \ ((condition) \ - ? 0 : __wait_event_interruptible_locked(wq, condition, 0, 1)) + ? 0 : __wait_event_interruptible_locked(wq, condition, 0, do_wait_intr_irq)) /** * wait_event_interruptible_exclusive_locked - sleep exclusively until a condition gets true @@ -733,7 +723,7 @@ do { \ */ #define wait_event_interruptible_exclusive_locked(wq, condition) \ ((condition) \ - ? 0 : __wait_event_interruptible_locked(wq, condition, 1, 0)) + ? 0 : __wait_event_interruptible_locked(wq, condition, 1, do_wait_intr)) /** * wait_event_interruptible_exclusive_locked_irq - sleep until a condition gets true @@ -764,7 +754,7 @@ do { \ */ #define wait_event_interruptible_exclusive_locked_irq(wq, condition) \ ((condition) \ - ? 0 : __wait_event_interruptible_locked(wq, condition, 1, 1)) + ? 0 : __wait_event_interruptible_locked(wq, condition, 1, do_wait_intr_irq)) #define __wait_event_killable(wq, condition) \ diff --git a/include/media/v4l2-ioctl.h b/include/media/v4l2-ioctl.h index 574ff2ae94be..6cd94e5ee113 100644 --- a/include/media/v4l2-ioctl.h +++ b/include/media/v4l2-ioctl.h @@ -12,6 +12,7 @@ #include <linux/poll.h> #include <linux/fs.h> #include <linux/mutex.h> +#include <linux/sched/signal.h> #include <linux/compiler.h> /* need __user */ #include <linux/videodev2.h> diff --git a/include/media/vsp1.h b/include/media/vsp1.h index 458b400373d4..38aac554dbba 100644 --- a/include/media/vsp1.h +++ b/include/media/vsp1.h @@ -20,8 +20,17 @@ struct device; int vsp1_du_init(struct device *dev); -int vsp1_du_setup_lif(struct device *dev, unsigned int width, - unsigned int height); +/** + * struct vsp1_du_lif_config - VSP LIF configuration + * @width: output frame width + * @height: output frame height + */ +struct vsp1_du_lif_config { + unsigned int width; + unsigned int height; +}; + +int vsp1_du_setup_lif(struct device *dev, const struct vsp1_du_lif_config *cfg); struct vsp1_du_atomic_config { u32 pixelformat; diff --git a/include/net/9p/9p.h b/include/net/9p/9p.h index 27dfe85772b1..b8eb51a661e5 100644 --- a/include/net/9p/9p.h +++ b/include/net/9p/9p.h @@ -402,10 +402,10 @@ struct p9_wstat { u32 atime; u32 mtime; u64 length; - char *name; - char *uid; - char *gid; - char *muid; + const char *name; + const char *uid; + const char *gid; + const char *muid; char *extension; /* 9p2000.u extensions */ kuid_t n_uid; /* 9p2000.u extensions */ kgid_t n_gid; /* 9p2000.u extensions */ diff --git a/include/net/9p/client.h b/include/net/9p/client.h index c6b97e58cf84..b582339ccef5 100644 --- a/include/net/9p/client.h +++ b/include/net/9p/client.h @@ -223,16 +223,16 @@ void p9_client_destroy(struct p9_client *clnt); void p9_client_disconnect(struct p9_client *clnt); void p9_client_begin_disconnect(struct p9_client *clnt); struct p9_fid *p9_client_attach(struct p9_client *clnt, struct p9_fid *afid, - char *uname, kuid_t n_uname, char *aname); + const char *uname, kuid_t n_uname, const char *aname); struct p9_fid *p9_client_walk(struct p9_fid *oldfid, uint16_t nwname, - char **wnames, int clone); + const unsigned char * const *wnames, int clone); int p9_client_open(struct p9_fid *fid, int mode); -int p9_client_fcreate(struct p9_fid *fid, char *name, u32 perm, int mode, +int p9_client_fcreate(struct p9_fid *fid, const char *name, u32 perm, int mode, char *extension); -int p9_client_link(struct p9_fid *fid, struct p9_fid *oldfid, char *newname); -int p9_client_symlink(struct p9_fid *fid, char *name, char *symname, kgid_t gid, - struct p9_qid *qid); -int p9_client_create_dotl(struct p9_fid *ofid, char *name, u32 flags, u32 mode, +int p9_client_link(struct p9_fid *fid, struct p9_fid *oldfid, const char *newname); +int p9_client_symlink(struct p9_fid *fid, const char *name, const char *symname, + kgid_t gid, struct p9_qid *qid); +int p9_client_create_dotl(struct p9_fid *ofid, const char *name, u32 flags, u32 mode, kgid_t gid, struct p9_qid *qid); int p9_client_clunk(struct p9_fid *fid); int p9_client_fsync(struct p9_fid *fid, int datasync); @@ -250,9 +250,9 @@ int p9_client_setattr(struct p9_fid *fid, struct p9_iattr_dotl *attr); struct p9_stat_dotl *p9_client_getattr_dotl(struct p9_fid *fid, u64 request_mask); -int p9_client_mknod_dotl(struct p9_fid *oldfid, char *name, int mode, +int p9_client_mknod_dotl(struct p9_fid *oldfid, const char *name, int mode, dev_t rdev, kgid_t gid, struct p9_qid *); -int p9_client_mkdir_dotl(struct p9_fid *fid, char *name, int mode, +int p9_client_mkdir_dotl(struct p9_fid *fid, const char *name, int mode, kgid_t gid, struct p9_qid *); int p9_client_lock_dotl(struct p9_fid *fid, struct p9_flock *flock, u8 *status); int p9_client_getlock_dotl(struct p9_fid *fid, struct p9_getlock *fl); diff --git a/include/net/bluetooth/hci_core.h b/include/net/bluetooth/hci_core.h index 90708f68cc02..95ccc1eef558 100644 --- a/include/net/bluetooth/hci_core.h +++ b/include/net/bluetooth/hci_core.h @@ -26,6 +26,8 @@ #define __HCI_CORE_H #include <linux/leds.h> +#include <linux/rculist.h> + #include <net/bluetooth/hci.h> #include <net/bluetooth/hci_sock.h> diff --git a/include/net/busy_poll.h b/include/net/busy_poll.h index b8d637225a07..c0452de83086 100644 --- a/include/net/busy_poll.h +++ b/include/net/busy_poll.h @@ -25,6 +25,8 @@ #define _LINUX_NET_BUSY_POLL_H #include <linux/netdevice.h> +#include <linux/sched/clock.h> +#include <linux/sched/signal.h> #include <net/ip.h> #ifdef CONFIG_NET_RX_BUSY_POLL diff --git a/include/net/irda/timer.h b/include/net/irda/timer.h index cb2615ccf761..d784f242cf7b 100644 --- a/include/net/irda/timer.h +++ b/include/net/irda/timer.h @@ -59,7 +59,7 @@ struct lap_cb; * Slot timer must never exceed 85 ms, and must always be at least 25 ms, * suggested to 75-85 msec by IrDA lite. This doesn't work with a lot of * devices, and other stackes uses a lot more, so it's best we do it as well - * (Note : this is the default value and sysctl overides it - Jean II) + * (Note : this is the default value and sysctl overrides it - Jean II) */ #define SLOT_TIMEOUT (90*HZ/1000) diff --git a/include/net/netfilter/nf_tables.h b/include/net/netfilter/nf_tables.h index ac84686aaafb..2aa8a9d80fbe 100644 --- a/include/net/netfilter/nf_tables.h +++ b/include/net/netfilter/nf_tables.h @@ -988,9 +988,9 @@ struct nft_object *nf_tables_obj_lookup(const struct nft_table *table, const struct nlattr *nla, u32 objtype, u8 genmask); -int nft_obj_notify(struct net *net, struct nft_table *table, - struct nft_object *obj, u32 portid, u32 seq, - int event, int family, int report, gfp_t gfp); +void nft_obj_notify(struct net *net, struct nft_table *table, + struct nft_object *obj, u32 portid, u32 seq, + int event, int family, int report, gfp_t gfp); /** * struct nft_object_type - stateful object type diff --git a/include/net/scm.h b/include/net/scm.h index 59fa93c01d2a..142ea9e7a6d0 100644 --- a/include/net/scm.h +++ b/include/net/scm.h @@ -3,6 +3,7 @@ #include <linux/limits.h> #include <linux/net.h> +#include <linux/cred.h> #include <linux/security.h> #include <linux/pid.h> #include <linux/nsproxy.h> diff --git a/include/net/sock.h b/include/net/sock.h index 9ccefa5c5487..5e5997654db6 100644 --- a/include/net/sock.h +++ b/include/net/sock.h @@ -1526,6 +1526,7 @@ struct sock *sk_alloc(struct net *net, int family, gfp_t priority, void sk_free(struct sock *sk); void sk_destruct(struct sock *sk); struct sock *sk_clone_lock(const struct sock *sk, const gfp_t priority); +void sk_free_unlock_clone(struct sock *sk); struct sk_buff *sock_wmalloc(struct sock *sk, unsigned long size, int force, gfp_t priority); diff --git a/include/rdma/ib.h b/include/rdma/ib.h index a6b93706b0fc..9b4c22a36931 100644 --- a/include/rdma/ib.h +++ b/include/rdma/ib.h @@ -35,6 +35,7 @@ #include <linux/types.h> #include <linux/sched.h> +#include <linux/cred.h> struct ib_addr { union { diff --git a/include/scsi/scsi_device.h b/include/scsi/scsi_device.h index 8990e580b278..6f22b39f1b0c 100644 --- a/include/scsi/scsi_device.h +++ b/include/scsi/scsi_device.h @@ -315,6 +315,7 @@ extern void scsi_remove_device(struct scsi_device *); extern int scsi_unregister_device_handler(struct scsi_device_handler *scsi_dh); void scsi_attach_vpd(struct scsi_device *sdev); +extern struct scsi_device *scsi_device_from_queue(struct request_queue *q); extern int scsi_device_get(struct scsi_device *); extern void scsi_device_put(struct scsi_device *); extern struct scsi_device *scsi_device_lookup(struct Scsi_Host *, @@ -409,19 +410,16 @@ extern int scsi_is_target_device(const struct device *); extern void scsi_sanitize_inquiry_string(unsigned char *s, int len); extern int scsi_execute(struct scsi_device *sdev, const unsigned char *cmd, int data_direction, void *buffer, unsigned bufflen, - unsigned char *sense, int timeout, int retries, - u64 flags, int *resid); -extern int scsi_execute_req_flags(struct scsi_device *sdev, - const unsigned char *cmd, int data_direction, void *buffer, - unsigned bufflen, struct scsi_sense_hdr *sshdr, int timeout, - int retries, int *resid, u64 flags, req_flags_t rq_flags); + unsigned char *sense, struct scsi_sense_hdr *sshdr, + int timeout, int retries, u64 flags, + req_flags_t rq_flags, int *resid); static inline int scsi_execute_req(struct scsi_device *sdev, const unsigned char *cmd, int data_direction, void *buffer, unsigned bufflen, struct scsi_sense_hdr *sshdr, int timeout, int retries, int *resid) { - return scsi_execute_req_flags(sdev, cmd, data_direction, buffer, - bufflen, sshdr, timeout, retries, resid, 0, 0); + return scsi_execute(sdev, cmd, data_direction, buffer, + bufflen, NULL, sshdr, timeout, retries, 0, 0, resid); } extern void sdev_disable_disk_events(struct scsi_device *sdev); extern void sdev_enable_disk_events(struct scsi_device *sdev); diff --git a/include/sound/control.h b/include/sound/control.h index 21d047f229a1..bd7246de58e7 100644 --- a/include/sound/control.h +++ b/include/sound/control.h @@ -22,6 +22,7 @@ * */ +#include <linux/wait.h> #include <sound/asound.h> #define snd_kcontrol_chip(kcontrol) ((kcontrol)->private_data) diff --git a/include/target/iscsi/iscsi_transport.h b/include/target/iscsi/iscsi_transport.h index 1277e9ba0318..ff1a4f4cd66d 100644 --- a/include/target/iscsi/iscsi_transport.h +++ b/include/target/iscsi/iscsi_transport.h @@ -55,8 +55,12 @@ extern int iscsit_setup_scsi_cmd(struct iscsi_conn *, struct iscsi_cmd *, extern void iscsit_set_unsoliticed_dataout(struct iscsi_cmd *); extern int iscsit_process_scsi_cmd(struct iscsi_conn *, struct iscsi_cmd *, struct iscsi_scsi_req *); -extern int iscsit_check_dataout_hdr(struct iscsi_conn *, unsigned char *, - struct iscsi_cmd **); +extern int +__iscsit_check_dataout_hdr(struct iscsi_conn *, void *, + struct iscsi_cmd *, u32, bool *); +extern int +iscsit_check_dataout_hdr(struct iscsi_conn *conn, void *buf, + struct iscsi_cmd **out_cmd); extern int iscsit_check_dataout_payload(struct iscsi_cmd *, struct iscsi_data *, bool); extern int iscsit_setup_nop_out(struct iscsi_conn *, struct iscsi_cmd *, @@ -125,6 +129,9 @@ extern void iscsit_release_cmd(struct iscsi_cmd *); extern void iscsit_free_cmd(struct iscsi_cmd *, bool); extern void iscsit_add_cmd_to_immediate_queue(struct iscsi_cmd *, struct iscsi_conn *, u8); +extern struct iscsi_cmd * +iscsit_find_cmd_from_itt_or_dump(struct iscsi_conn *conn, + itt_t init_task_tag, u32 length); /* * From iscsi_target_nego.c diff --git a/include/target/target_core_base.h b/include/target/target_core_base.h index 878560e60c75..37c274e61acc 100644 --- a/include/target/target_core_base.h +++ b/include/target/target_core_base.h @@ -4,7 +4,9 @@ #include <linux/configfs.h> /* struct config_group */ #include <linux/dma-direction.h> /* enum dma_data_direction */ #include <linux/percpu_ida.h> /* struct percpu_ida */ +#include <linux/percpu-refcount.h> #include <linux/semaphore.h> /* struct semaphore */ +#include <linux/completion.h> #define TARGET_CORE_VERSION "v5.0" @@ -197,6 +199,7 @@ enum tcm_tmreq_table { TMR_LUN_RESET = 5, TMR_TARGET_WARM_RESET = 6, TMR_TARGET_COLD_RESET = 7, + TMR_UNKNOWN = 0xff, }; /* fabric independent task management response values */ @@ -397,7 +400,6 @@ struct se_tmr_req { void *fabric_tmr_ptr; struct se_cmd *task_cmd; struct se_device *tmr_dev; - struct se_lun *tmr_lun; struct list_head tmr_list; }; @@ -488,8 +490,6 @@ struct se_cmd { #define CMD_T_COMPLETE (1 << 2) #define CMD_T_SENT (1 << 4) #define CMD_T_STOP (1 << 5) -#define CMD_T_DEV_ACTIVE (1 << 7) -#define CMD_T_BUSY (1 << 9) #define CMD_T_TAS (1 << 10) #define CMD_T_FABRIC_STOP (1 << 11) spinlock_t t_state_lock; @@ -732,6 +732,7 @@ struct se_lun { struct config_group lun_group; struct se_port_stat_grps port_stat_grps; struct completion lun_ref_comp; + struct completion lun_shutdown_comp; struct percpu_ref lun_ref; struct list_head lun_dev_link; struct hlist_node link; @@ -767,6 +768,8 @@ struct se_device { u32 dev_index; u64 creation_time; atomic_long_t num_resets; + atomic_long_t aborts_complete; + atomic_long_t aborts_no_task; atomic_long_t num_cmds; atomic_long_t read_bytes; atomic_long_t write_bytes; diff --git a/include/target/target_core_fabric.h b/include/target/target_core_fabric.h index 358041bad1da..d7dd1427fe0d 100644 --- a/include/target/target_core_fabric.h +++ b/include/target/target_core_fabric.h @@ -47,7 +47,7 @@ struct target_core_fabric_ops { u32 (*tpg_get_inst_index)(struct se_portal_group *); /* * Optional to release struct se_cmd and fabric dependent allocated - * I/O descriptor in transport_cmd_check_stop(). + * I/O descriptor after command execution has finished. * * Returning 1 will signal a descriptor has been released. * Returning 0 will signal a descriptor has not been released. diff --git a/include/trace/events/rxrpc.h b/include/trace/events/rxrpc.h index 593f586545eb..39123c06a566 100644 --- a/include/trace/events/rxrpc.h +++ b/include/trace/events/rxrpc.h @@ -119,6 +119,7 @@ enum rxrpc_recvmsg_trace { rxrpc_recvmsg_full, rxrpc_recvmsg_hole, rxrpc_recvmsg_next, + rxrpc_recvmsg_requeue, rxrpc_recvmsg_return, rxrpc_recvmsg_terminal, rxrpc_recvmsg_to_be_accepted, @@ -277,6 +278,7 @@ enum rxrpc_congest_change { EM(rxrpc_recvmsg_full, "FULL") \ EM(rxrpc_recvmsg_hole, "HOLE") \ EM(rxrpc_recvmsg_next, "NEXT") \ + EM(rxrpc_recvmsg_requeue, "REQU") \ EM(rxrpc_recvmsg_return, "RETN") \ EM(rxrpc_recvmsg_terminal, "TERM") \ EM(rxrpc_recvmsg_to_be_accepted, "TBAC") \ diff --git a/include/trace/events/sched.h b/include/trace/events/sched.h index 9b90c57517a9..9e3ef6c99e4b 100644 --- a/include/trace/events/sched.h +++ b/include/trace/events/sched.h @@ -4,7 +4,7 @@ #if !defined(_TRACE_SCHED_H) || defined(TRACE_HEADER_MULTI_READ) #define _TRACE_SCHED_H -#include <linux/sched.h> +#include <linux/sched/numa_balancing.h> #include <linux/tracepoint.h> #include <linux/binfmts.h> diff --git a/include/trace/events/syscalls.h b/include/trace/events/syscalls.h index 14e49c798135..b35533b94277 100644 --- a/include/trace/events/syscalls.h +++ b/include/trace/events/syscalls.h @@ -1,5 +1,6 @@ #undef TRACE_SYSTEM #define TRACE_SYSTEM raw_syscalls +#undef TRACE_INCLUDE_FILE #define TRACE_INCLUDE_FILE syscalls #if !defined(_TRACE_EVENTS_SYSCALLS_H) || defined(TRACE_HEADER_MULTI_READ) diff --git a/include/uapi/linux/Kbuild b/include/uapi/linux/Kbuild index 1c80efb67d10..dd9820b1c779 100644 --- a/include/uapi/linux/Kbuild +++ b/include/uapi/linux/Kbuild @@ -466,6 +466,7 @@ header-y += virtio_console.h header-y += virtio_gpu.h header-y += virtio_ids.h header-y += virtio_input.h +header-y += virtio_mmio.h header-y += virtio_net.h header-y += virtio_pci.h header-y += virtio_ring.h diff --git a/include/uapi/linux/fcntl.h b/include/uapi/linux/fcntl.h index beed138bd359..813afd6eee71 100644 --- a/include/uapi/linux/fcntl.h +++ b/include/uapi/linux/fcntl.h @@ -63,5 +63,10 @@ #define AT_NO_AUTOMOUNT 0x800 /* Suppress terminal automount traversal */ #define AT_EMPTY_PATH 0x1000 /* Allow empty relative pathname */ +#define AT_STATX_SYNC_TYPE 0x6000 /* Type of synchronisation required from statx() */ +#define AT_STATX_SYNC_AS_STAT 0x0000 /* - Do whatever stat() does */ +#define AT_STATX_FORCE_SYNC 0x2000 /* - Force the attributes to be sync'd with the server */ +#define AT_STATX_DONT_SYNC 0x4000 /* - Don't sync attributes with the server */ + #endif /* _UAPI_LINUX_FCNTL_H */ diff --git a/include/uapi/linux/sched/types.h b/include/uapi/linux/sched/types.h new file mode 100644 index 000000000000..307acbc82d80 --- /dev/null +++ b/include/uapi/linux/sched/types.h @@ -0,0 +1,74 @@ +#ifndef _UAPI_LINUX_SCHED_TYPES_H +#define _UAPI_LINUX_SCHED_TYPES_H + +#include <linux/types.h> + +struct sched_param { + int sched_priority; +}; + +#define SCHED_ATTR_SIZE_VER0 48 /* sizeof first published struct */ + +/* + * Extended scheduling parameters data structure. + * + * This is needed because the original struct sched_param can not be + * altered without introducing ABI issues with legacy applications + * (e.g., in sched_getparam()). + * + * However, the possibility of specifying more than just a priority for + * the tasks may be useful for a wide variety of application fields, e.g., + * multimedia, streaming, automation and control, and many others. + * + * This variant (sched_attr) is meant at describing a so-called + * sporadic time-constrained task. In such model a task is specified by: + * - the activation period or minimum instance inter-arrival time; + * - the maximum (or average, depending on the actual scheduling + * discipline) computation time of all instances, a.k.a. runtime; + * - the deadline (relative to the actual activation time) of each + * instance. + * Very briefly, a periodic (sporadic) task asks for the execution of + * some specific computation --which is typically called an instance-- + * (at most) every period. Moreover, each instance typically lasts no more + * than the runtime and must be completed by time instant t equal to + * the instance activation time + the deadline. + * + * This is reflected by the actual fields of the sched_attr structure: + * + * @size size of the structure, for fwd/bwd compat. + * + * @sched_policy task's scheduling policy + * @sched_flags for customizing the scheduler behaviour + * @sched_nice task's nice value (SCHED_NORMAL/BATCH) + * @sched_priority task's static priority (SCHED_FIFO/RR) + * @sched_deadline representative of the task's deadline + * @sched_runtime representative of the task's runtime + * @sched_period representative of the task's period + * + * Given this task model, there are a multiplicity of scheduling algorithms + * and policies, that can be used to ensure all the tasks will make their + * timing constraints. + * + * As of now, the SCHED_DEADLINE policy (sched_dl scheduling class) is the + * only user of this new interface. More information about the algorithm + * available in the scheduling class file or in Documentation/. + */ +struct sched_attr { + u32 size; + + u32 sched_policy; + u64 sched_flags; + + /* SCHED_NORMAL, SCHED_BATCH */ + s32 sched_nice; + + /* SCHED_FIFO, SCHED_RR */ + u32 sched_priority; + + /* SCHED_DEADLINE */ + u64 sched_runtime; + u64 sched_deadline; + u64 sched_period; +}; + +#endif /* _UAPI_LINUX_SCHED_TYPES_H */ diff --git a/include/uapi/linux/stat.h b/include/uapi/linux/stat.h index 7fec7e36d921..51a6b86e3700 100644 --- a/include/uapi/linux/stat.h +++ b/include/uapi/linux/stat.h @@ -1,6 +1,7 @@ #ifndef _UAPI_LINUX_STAT_H #define _UAPI_LINUX_STAT_H +#include <linux/types.h> #if defined(__KERNEL__) || !defined(__GLIBC__) || (__GLIBC__ < 2) @@ -41,5 +42,135 @@ #endif +/* + * Timestamp structure for the timestamps in struct statx. + * + * tv_sec holds the number of seconds before (negative) or after (positive) + * 00:00:00 1st January 1970 UTC. + * + * tv_nsec holds a number of nanoseconds before (0..-999,999,999 if tv_sec is + * negative) or after (0..999,999,999 if tv_sec is positive) the tv_sec time. + * + * Note that if both tv_sec and tv_nsec are non-zero, then the two values must + * either be both positive or both negative. + * + * __reserved is held in case we need a yet finer resolution. + */ +struct statx_timestamp { + __s64 tv_sec; + __s32 tv_nsec; + __s32 __reserved; +}; + +/* + * Structures for the extended file attribute retrieval system call + * (statx()). + * + * The caller passes a mask of what they're specifically interested in as a + * parameter to statx(). What statx() actually got will be indicated in + * st_mask upon return. + * + * For each bit in the mask argument: + * + * - if the datum is not supported: + * + * - the bit will be cleared, and + * + * - the datum will be set to an appropriate fabricated value if one is + * available (eg. CIFS can take a default uid and gid), otherwise + * + * - the field will be cleared; + * + * - otherwise, if explicitly requested: + * + * - the datum will be synchronised to the server if AT_STATX_FORCE_SYNC is + * set or if the datum is considered out of date, and + * + * - the field will be filled in and the bit will be set; + * + * - otherwise, if not requested, but available in approximate form without any + * effort, it will be filled in anyway, and the bit will be set upon return + * (it might not be up to date, however, and no attempt will be made to + * synchronise the internal state first); + * + * - otherwise the field and the bit will be cleared before returning. + * + * Items in STATX_BASIC_STATS may be marked unavailable on return, but they + * will have values installed for compatibility purposes so that stat() and + * co. can be emulated in userspace. + */ +struct statx { + /* 0x00 */ + __u32 stx_mask; /* What results were written [uncond] */ + __u32 stx_blksize; /* Preferred general I/O size [uncond] */ + __u64 stx_attributes; /* Flags conveying information about the file [uncond] */ + /* 0x10 */ + __u32 stx_nlink; /* Number of hard links */ + __u32 stx_uid; /* User ID of owner */ + __u32 stx_gid; /* Group ID of owner */ + __u16 stx_mode; /* File mode */ + __u16 __spare0[1]; + /* 0x20 */ + __u64 stx_ino; /* Inode number */ + __u64 stx_size; /* File size */ + __u64 stx_blocks; /* Number of 512-byte blocks allocated */ + __u64 __spare1[1]; + /* 0x40 */ + struct statx_timestamp stx_atime; /* Last access time */ + struct statx_timestamp stx_btime; /* File creation time */ + struct statx_timestamp stx_ctime; /* Last attribute change time */ + struct statx_timestamp stx_mtime; /* Last data modification time */ + /* 0x80 */ + __u32 stx_rdev_major; /* Device ID of special file [if bdev/cdev] */ + __u32 stx_rdev_minor; + __u32 stx_dev_major; /* ID of device containing file [uncond] */ + __u32 stx_dev_minor; + /* 0x90 */ + __u64 __spare2[14]; /* Spare space for future expansion */ + /* 0x100 */ +}; + +/* + * Flags to be stx_mask + * + * Query request/result mask for statx() and struct statx::stx_mask. + * + * These bits should be set in the mask argument of statx() to request + * particular items when calling statx(). + */ +#define STATX_TYPE 0x00000001U /* Want/got stx_mode & S_IFMT */ +#define STATX_MODE 0x00000002U /* Want/got stx_mode & ~S_IFMT */ +#define STATX_NLINK 0x00000004U /* Want/got stx_nlink */ +#define STATX_UID 0x00000008U /* Want/got stx_uid */ +#define STATX_GID 0x00000010U /* Want/got stx_gid */ +#define STATX_ATIME 0x00000020U /* Want/got stx_atime */ +#define STATX_MTIME 0x00000040U /* Want/got stx_mtime */ +#define STATX_CTIME 0x00000080U /* Want/got stx_ctime */ +#define STATX_INO 0x00000100U /* Want/got stx_ino */ +#define STATX_SIZE 0x00000200U /* Want/got stx_size */ +#define STATX_BLOCKS 0x00000400U /* Want/got stx_blocks */ +#define STATX_BASIC_STATS 0x000007ffU /* The stuff in the normal stat struct */ +#define STATX_BTIME 0x00000800U /* Want/got stx_btime */ +#define STATX_ALL 0x00000fffU /* All currently supported flags */ + +/* + * Attributes to be found in stx_attributes + * + * These give information about the features or the state of a file that might + * be of use to ordinary userspace programs such as GUIs or ls rather than + * specialised tools. + * + * Note that the flags marked [I] correspond to generic FS_IOC_FLAGS + * semantically. Where possible, the numerical value is picked to correspond + * also. + */ +#define STATX_ATTR_COMPRESSED 0x00000004 /* [I] File is compressed by the fs */ +#define STATX_ATTR_IMMUTABLE 0x00000010 /* [I] File is marked immutable */ +#define STATX_ATTR_APPEND 0x00000020 /* [I] File is append-only */ +#define STATX_ATTR_NODUMP 0x00000040 /* [I] File is not to be dumped */ +#define STATX_ATTR_ENCRYPTED 0x00000800 /* [I] File requires key to decrypt in fs */ + +#define STATX_ATTR_AUTOMOUNT 0x00001000 /* Dir: Automount trigger */ + #endif /* _UAPI_LINUX_STAT_H */ diff --git a/include/uapi/linux/target_core_user.h b/include/uapi/linux/target_core_user.h index c506cddb8165..af17b4154ef6 100644 --- a/include/uapi/linux/target_core_user.h +++ b/include/uapi/linux/target_core_user.h @@ -105,26 +105,26 @@ struct tcmu_cmd_entry { union { struct { - uint32_t iov_cnt; - uint32_t iov_bidi_cnt; - uint32_t iov_dif_cnt; - uint64_t cdb_off; - uint64_t __pad1; - uint64_t __pad2; + __u32 iov_cnt; + __u32 iov_bidi_cnt; + __u32 iov_dif_cnt; + __u64 cdb_off; + __u64 __pad1; + __u64 __pad2; struct iovec iov[0]; } req; struct { - uint8_t scsi_status; - uint8_t __pad1; - uint16_t __pad2; - uint32_t __pad3; + __u8 scsi_status; + __u8 __pad1; + __u16 __pad2; + __u32 __pad3; char sense_buffer[TCMU_SENSE_BUFFERSIZE]; } rsp; }; } __packed; -#define TCMU_OP_ALIGN_SIZE sizeof(uint64_t) +#define TCMU_OP_ALIGN_SIZE sizeof(__u64) enum tcmu_genl_cmd { TCMU_CMD_UNSPEC, diff --git a/include/uapi/linux/userfaultfd.h b/include/uapi/linux/userfaultfd.h index c055947c5c98..3b059530dac9 100644 --- a/include/uapi/linux/userfaultfd.h +++ b/include/uapi/linux/userfaultfd.h @@ -18,8 +18,7 @@ * means the userland is reading). */ #define UFFD_API ((__u64)0xAA) -#define UFFD_API_FEATURES (UFFD_FEATURE_EVENT_EXIT | \ - UFFD_FEATURE_EVENT_FORK | \ +#define UFFD_API_FEATURES (UFFD_FEATURE_EVENT_FORK | \ UFFD_FEATURE_EVENT_REMAP | \ UFFD_FEATURE_EVENT_REMOVE | \ UFFD_FEATURE_EVENT_UNMAP | \ @@ -113,7 +112,6 @@ struct uffd_msg { #define UFFD_EVENT_REMAP 0x14 #define UFFD_EVENT_REMOVE 0x15 #define UFFD_EVENT_UNMAP 0x16 -#define UFFD_EVENT_EXIT 0x17 /* flags for UFFD_EVENT_PAGEFAULT */ #define UFFD_PAGEFAULT_FLAG_WRITE (1<<0) /* If this was a write fault */ @@ -163,7 +161,6 @@ struct uffdio_api { #define UFFD_FEATURE_MISSING_HUGETLBFS (1<<4) #define UFFD_FEATURE_MISSING_SHMEM (1<<5) #define UFFD_FEATURE_EVENT_UNMAP (1<<6) -#define UFFD_FEATURE_EVENT_EXIT (1<<7) __u64 features; __u64 ioctls; diff --git a/include/linux/virtio_mmio.h b/include/uapi/linux/virtio_mmio.h index c4b09689ab64..c4b09689ab64 100644 --- a/include/linux/virtio_mmio.h +++ b/include/uapi/linux/virtio_mmio.h diff --git a/include/uapi/linux/virtio_pci.h b/include/uapi/linux/virtio_pci.h index 90007a1abcab..15b4385a2be1 100644 --- a/include/uapi/linux/virtio_pci.h +++ b/include/uapi/linux/virtio_pci.h @@ -79,7 +79,7 @@ * configuration space */ #define VIRTIO_PCI_CONFIG_OFF(msix_enabled) ((msix_enabled) ? 24 : 20) /* Deprecated: please use VIRTIO_PCI_CONFIG_OFF instead */ -#define VIRTIO_PCI_CONFIG(dev) VIRTIO_PCI_CONFIG_OFF((dev)->msix_enabled) +#define VIRTIO_PCI_CONFIG(dev) VIRTIO_PCI_CONFIG_OFF((dev)->pci_dev->msix_enabled) /* Virtio ABI version, this must match exactly */ #define VIRTIO_PCI_ABI_VERSION 0 diff --git a/include/xen/swiotlb-xen.h b/include/xen/swiotlb-xen.h index a0083be5d529..1f6d78f044b6 100644 --- a/include/xen/swiotlb-xen.h +++ b/include/xen/swiotlb-xen.h @@ -2,6 +2,7 @@ #define __LINUX_SWIOTLB_XEN_H #include <linux/dma-direction.h> +#include <linux/scatterlist.h> #include <linux/swiotlb.h> extern int xen_swiotlb_init(int verbose, bool early); @@ -55,4 +56,14 @@ xen_swiotlb_dma_supported(struct device *hwdev, u64 mask); extern int xen_swiotlb_set_dma_mask(struct device *dev, u64 dma_mask); + +extern int +xen_swiotlb_dma_mmap(struct device *dev, struct vm_area_struct *vma, + void *cpu_addr, dma_addr_t dma_addr, size_t size, + unsigned long attrs); + +extern int +xen_swiotlb_get_sgtable(struct device *dev, struct sg_table *sgt, + void *cpu_addr, dma_addr_t handle, size_t size, + unsigned long attrs); #endif /* __LINUX_SWIOTLB_XEN_H */ |
