diff options
Diffstat (limited to 'include')
76 files changed, 1331 insertions, 540 deletions
diff --git a/include/asm-generic/barrier.h b/include/asm-generic/barrier.h index f5c40b0fadc2..e6a83d712ef6 100644 --- a/include/asm-generic/barrier.h +++ b/include/asm-generic/barrier.h @@ -66,8 +66,8 @@ #define smp_read_barrier_depends() do { } while (0) #endif -#ifndef set_mb -#define set_mb(var, value) do { (var) = (value); mb(); } while (0) +#ifndef smp_store_mb +#define smp_store_mb(var, value) do { WRITE_ONCE(var, value); mb(); } while (0) #endif #ifndef smp_mb__before_atomic diff --git a/include/asm-generic/cmpxchg.h b/include/asm-generic/cmpxchg.h index 811fb1e9b061..3766ab34aa45 100644 --- a/include/asm-generic/cmpxchg.h +++ b/include/asm-generic/cmpxchg.h @@ -86,9 +86,6 @@ unsigned long __xchg(unsigned long x, volatile void *ptr, int size) /* * Atomic compare and exchange. - * - * Do not define __HAVE_ARCH_CMPXCHG because we want to use it to check whether - * a cmpxchg primitive faster than repeated local irq save/restore exists. */ #include <asm-generic/cmpxchg-local.h> diff --git a/include/asm-generic/futex.h b/include/asm-generic/futex.h index b59b5a52637e..e56272c919b5 100644 --- a/include/asm-generic/futex.h +++ b/include/asm-generic/futex.h @@ -8,8 +8,7 @@ #ifndef CONFIG_SMP /* * The following implementation only for uniprocessor machines. - * For UP, it's relies on the fact that pagefault_disable() also disables - * preemption to ensure mutual exclusion. + * It relies on preempt_disable() ensuring mutual exclusion. * */ @@ -38,6 +37,7 @@ futex_atomic_op_inuser(int encoded_op, u32 __user *uaddr) if (encoded_op & (FUTEX_OP_OPARG_SHIFT << 28)) oparg = 1 << oparg; + preempt_disable(); pagefault_disable(); ret = -EFAULT; @@ -72,6 +72,7 @@ futex_atomic_op_inuser(int encoded_op, u32 __user *uaddr) out_pagefault_enable: pagefault_enable(); + preempt_enable(); if (ret == 0) { switch (cmp) { @@ -106,6 +107,7 @@ futex_atomic_cmpxchg_inatomic(u32 *uval, u32 __user *uaddr, { u32 val; + preempt_disable(); if (unlikely(get_user(val, uaddr) != 0)) return -EFAULT; @@ -113,6 +115,7 @@ futex_atomic_cmpxchg_inatomic(u32 *uval, u32 __user *uaddr, return -EFAULT; *uval = val; + preempt_enable(); return 0; } diff --git a/include/asm-generic/io.h b/include/asm-generic/io.h index 9db042304df3..f56094cfdeff 100644 --- a/include/asm-generic/io.h +++ b/include/asm-generic/io.h @@ -769,6 +769,14 @@ static inline void __iomem *ioremap_nocache(phys_addr_t offset, size_t size) } #endif +#ifndef ioremap_uc +#define ioremap_uc ioremap_uc +static inline void __iomem *ioremap_uc(phys_addr_t offset, size_t size) +{ + return ioremap_nocache(offset, size); +} +#endif + #ifndef ioremap_wc #define ioremap_wc ioremap_wc static inline void __iomem *ioremap_wc(phys_addr_t offset, size_t size) @@ -777,8 +785,17 @@ static inline void __iomem *ioremap_wc(phys_addr_t offset, size_t size) } #endif +#ifndef ioremap_wt +#define ioremap_wt ioremap_wt +static inline void __iomem *ioremap_wt(phys_addr_t offset, size_t size) +{ + return ioremap_nocache(offset, size); +} +#endif + #ifndef iounmap #define iounmap iounmap + static inline void iounmap(void __iomem *addr) { } diff --git a/include/asm-generic/iomap.h b/include/asm-generic/iomap.h index 1b41011643a5..d8f8622fa044 100644 --- a/include/asm-generic/iomap.h +++ b/include/asm-generic/iomap.h @@ -66,6 +66,10 @@ extern void ioport_unmap(void __iomem *); #define ioremap_wc ioremap_nocache #endif +#ifndef ARCH_HAS_IOREMAP_WT +#define ioremap_wt ioremap_nocache +#endif + #ifdef CONFIG_PCI /* Destroy a virtual mapping cookie for a PCI BAR (memory or IO) */ struct pci_dev; diff --git a/include/asm-generic/pgtable.h b/include/asm-generic/pgtable.h index 39f1d6a2b04d..bd910ceaccfa 100644 --- a/include/asm-generic/pgtable.h +++ b/include/asm-generic/pgtable.h @@ -262,6 +262,10 @@ static inline int pmd_same(pmd_t pmd_a, pmd_t pmd_b) #define pgprot_writecombine pgprot_noncached #endif +#ifndef pgprot_writethrough +#define pgprot_writethrough pgprot_noncached +#endif + #ifndef pgprot_device #define pgprot_device pgprot_noncached #endif diff --git a/include/asm-generic/preempt.h b/include/asm-generic/preempt.h index eb6f9e6c3075..d0a7a4753db2 100644 --- a/include/asm-generic/preempt.h +++ b/include/asm-generic/preempt.h @@ -79,11 +79,8 @@ static __always_inline bool should_resched(void) #ifdef CONFIG_PREEMPT extern asmlinkage void preempt_schedule(void); #define __preempt_schedule() preempt_schedule() - -#ifdef CONFIG_CONTEXT_TRACKING -extern asmlinkage void preempt_schedule_context(void); -#define __preempt_schedule_context() preempt_schedule_context() -#endif +extern asmlinkage void preempt_schedule_notrace(void); +#define __preempt_schedule_notrace() preempt_schedule_notrace() #endif /* CONFIG_PREEMPT */ #endif /* __ASM_PREEMPT_H */ diff --git a/include/asm-generic/qspinlock.h b/include/asm-generic/qspinlock.h new file mode 100644 index 000000000000..83bfb87f5bf1 --- /dev/null +++ b/include/asm-generic/qspinlock.h @@ -0,0 +1,139 @@ +/* + * Queued spinlock + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * (C) Copyright 2013-2015 Hewlett-Packard Development Company, L.P. + * + * Authors: Waiman Long <waiman.long@hp.com> + */ +#ifndef __ASM_GENERIC_QSPINLOCK_H +#define __ASM_GENERIC_QSPINLOCK_H + +#include <asm-generic/qspinlock_types.h> + +/** + * queued_spin_is_locked - is the spinlock locked? + * @lock: Pointer to queued spinlock structure + * Return: 1 if it is locked, 0 otherwise + */ +static __always_inline int queued_spin_is_locked(struct qspinlock *lock) +{ + return atomic_read(&lock->val); +} + +/** + * queued_spin_value_unlocked - is the spinlock structure unlocked? + * @lock: queued spinlock structure + * Return: 1 if it is unlocked, 0 otherwise + * + * N.B. Whenever there are tasks waiting for the lock, it is considered + * locked wrt the lockref code to avoid lock stealing by the lockref + * code and change things underneath the lock. This also allows some + * optimizations to be applied without conflict with lockref. + */ +static __always_inline int queued_spin_value_unlocked(struct qspinlock lock) +{ + return !atomic_read(&lock.val); +} + +/** + * queued_spin_is_contended - check if the lock is contended + * @lock : Pointer to queued spinlock structure + * Return: 1 if lock contended, 0 otherwise + */ +static __always_inline int queued_spin_is_contended(struct qspinlock *lock) +{ + return atomic_read(&lock->val) & ~_Q_LOCKED_MASK; +} +/** + * queued_spin_trylock - try to acquire the queued spinlock + * @lock : Pointer to queued spinlock structure + * Return: 1 if lock acquired, 0 if failed + */ +static __always_inline int queued_spin_trylock(struct qspinlock *lock) +{ + if (!atomic_read(&lock->val) && + (atomic_cmpxchg(&lock->val, 0, _Q_LOCKED_VAL) == 0)) + return 1; + return 0; +} + +extern void queued_spin_lock_slowpath(struct qspinlock *lock, u32 val); + +/** + * queued_spin_lock - acquire a queued spinlock + * @lock: Pointer to queued spinlock structure + */ +static __always_inline void queued_spin_lock(struct qspinlock *lock) +{ + u32 val; + + val = atomic_cmpxchg(&lock->val, 0, _Q_LOCKED_VAL); + if (likely(val == 0)) + return; + queued_spin_lock_slowpath(lock, val); +} + +#ifndef queued_spin_unlock +/** + * queued_spin_unlock - release a queued spinlock + * @lock : Pointer to queued spinlock structure + */ +static __always_inline void queued_spin_unlock(struct qspinlock *lock) +{ + /* + * smp_mb__before_atomic() in order to guarantee release semantics + */ + smp_mb__before_atomic_dec(); + atomic_sub(_Q_LOCKED_VAL, &lock->val); +} +#endif + +/** + * queued_spin_unlock_wait - wait until current lock holder releases the lock + * @lock : Pointer to queued spinlock structure + * + * There is a very slight possibility of live-lock if the lockers keep coming + * and the waiter is just unfortunate enough to not see any unlock state. + */ +static inline void queued_spin_unlock_wait(struct qspinlock *lock) +{ + while (atomic_read(&lock->val) & _Q_LOCKED_MASK) + cpu_relax(); +} + +#ifndef virt_queued_spin_lock +static __always_inline bool virt_queued_spin_lock(struct qspinlock *lock) +{ + return false; +} +#endif + +/* + * Initializier + */ +#define __ARCH_SPIN_LOCK_UNLOCKED { ATOMIC_INIT(0) } + +/* + * Remapping spinlock architecture specific functions to the corresponding + * queued spinlock functions. + */ +#define arch_spin_is_locked(l) queued_spin_is_locked(l) +#define arch_spin_is_contended(l) queued_spin_is_contended(l) +#define arch_spin_value_unlocked(l) queued_spin_value_unlocked(l) +#define arch_spin_lock(l) queued_spin_lock(l) +#define arch_spin_trylock(l) queued_spin_trylock(l) +#define arch_spin_unlock(l) queued_spin_unlock(l) +#define arch_spin_lock_flags(l, f) queued_spin_lock(l) +#define arch_spin_unlock_wait(l) queued_spin_unlock_wait(l) + +#endif /* __ASM_GENERIC_QSPINLOCK_H */ diff --git a/include/asm-generic/qspinlock_types.h b/include/asm-generic/qspinlock_types.h new file mode 100644 index 000000000000..85f888e86761 --- /dev/null +++ b/include/asm-generic/qspinlock_types.h @@ -0,0 +1,79 @@ +/* + * Queued spinlock + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * (C) Copyright 2013-2015 Hewlett-Packard Development Company, L.P. + * + * Authors: Waiman Long <waiman.long@hp.com> + */ +#ifndef __ASM_GENERIC_QSPINLOCK_TYPES_H +#define __ASM_GENERIC_QSPINLOCK_TYPES_H + +/* + * Including atomic.h with PARAVIRT on will cause compilation errors because + * of recursive header file incluson via paravirt_types.h. So don't include + * it if PARAVIRT is on. + */ +#ifndef CONFIG_PARAVIRT +#include <linux/types.h> +#include <linux/atomic.h> +#endif + +typedef struct qspinlock { + atomic_t val; +} arch_spinlock_t; + +/* + * Bitfields in the atomic value: + * + * When NR_CPUS < 16K + * 0- 7: locked byte + * 8: pending + * 9-15: not used + * 16-17: tail index + * 18-31: tail cpu (+1) + * + * When NR_CPUS >= 16K + * 0- 7: locked byte + * 8: pending + * 9-10: tail index + * 11-31: tail cpu (+1) + */ +#define _Q_SET_MASK(type) (((1U << _Q_ ## type ## _BITS) - 1)\ + << _Q_ ## type ## _OFFSET) +#define _Q_LOCKED_OFFSET 0 +#define _Q_LOCKED_BITS 8 +#define _Q_LOCKED_MASK _Q_SET_MASK(LOCKED) + +#define _Q_PENDING_OFFSET (_Q_LOCKED_OFFSET + _Q_LOCKED_BITS) +#if CONFIG_NR_CPUS < (1U << 14) +#define _Q_PENDING_BITS 8 +#else +#define _Q_PENDING_BITS 1 +#endif +#define _Q_PENDING_MASK _Q_SET_MASK(PENDING) + +#define _Q_TAIL_IDX_OFFSET (_Q_PENDING_OFFSET + _Q_PENDING_BITS) +#define _Q_TAIL_IDX_BITS 2 +#define _Q_TAIL_IDX_MASK _Q_SET_MASK(TAIL_IDX) + +#define _Q_TAIL_CPU_OFFSET (_Q_TAIL_IDX_OFFSET + _Q_TAIL_IDX_BITS) +#define _Q_TAIL_CPU_BITS (32 - _Q_TAIL_CPU_OFFSET) +#define _Q_TAIL_CPU_MASK _Q_SET_MASK(TAIL_CPU) + +#define _Q_TAIL_OFFSET _Q_TAIL_IDX_OFFSET +#define _Q_TAIL_MASK (_Q_TAIL_IDX_MASK | _Q_TAIL_CPU_MASK) + +#define _Q_LOCKED_VAL (1U << _Q_LOCKED_OFFSET) +#define _Q_PENDING_VAL (1U << _Q_PENDING_OFFSET) + +#endif /* __ASM_GENERIC_QSPINLOCK_TYPES_H */ diff --git a/include/linux/alarmtimer.h b/include/linux/alarmtimer.h index a899402a5a0e..52f3b7da4f2d 100644 --- a/include/linux/alarmtimer.h +++ b/include/linux/alarmtimer.h @@ -43,8 +43,8 @@ struct alarm { void alarm_init(struct alarm *alarm, enum alarmtimer_type type, enum alarmtimer_restart (*function)(struct alarm *, ktime_t)); -int alarm_start(struct alarm *alarm, ktime_t start); -int alarm_start_relative(struct alarm *alarm, ktime_t start); +void alarm_start(struct alarm *alarm, ktime_t start); +void alarm_start_relative(struct alarm *alarm, ktime_t start); void alarm_restart(struct alarm *alarm); int alarm_try_to_cancel(struct alarm *alarm); int alarm_cancel(struct alarm *alarm); diff --git a/include/linux/backing-dev.h b/include/linux/backing-dev.h index aff923ae8c4b..d87d8eced064 100644 --- a/include/linux/backing-dev.h +++ b/include/linux/backing-dev.h @@ -116,7 +116,6 @@ __printf(3, 4) int bdi_register(struct backing_dev_info *bdi, struct device *parent, const char *fmt, ...); int bdi_register_dev(struct backing_dev_info *bdi, dev_t dev); -void bdi_unregister(struct backing_dev_info *bdi); int __must_check bdi_setup_and_register(struct backing_dev_info *, char *); void bdi_start_writeback(struct backing_dev_info *bdi, long nr_pages, enum wb_reason reason); diff --git a/include/linux/bottom_half.h b/include/linux/bottom_half.h index 86c12c93e3cf..8fdcb783197d 100644 --- a/include/linux/bottom_half.h +++ b/include/linux/bottom_half.h @@ -2,7 +2,6 @@ #define _LINUX_BH_H #include <linux/preempt.h> -#include <linux/preempt_mask.h> #ifdef CONFIG_TRACE_IRQFLAGS extern void __local_bh_disable_ip(unsigned long ip, unsigned int cnt); diff --git a/include/linux/brcmphy.h b/include/linux/brcmphy.h index ae2982c0f7a6..656da2a12ffe 100644 --- a/include/linux/brcmphy.h +++ b/include/linux/brcmphy.h @@ -17,7 +17,7 @@ #define PHY_ID_BCM7250 0xae025280 #define PHY_ID_BCM7364 0xae025260 #define PHY_ID_BCM7366 0x600d8490 -#define PHY_ID_BCM7425 0x03625e60 +#define PHY_ID_BCM7425 0x600d86b0 #define PHY_ID_BCM7429 0x600d8730 #define PHY_ID_BCM7439 0x600d8480 #define PHY_ID_BCM7439_2 0xae025080 diff --git a/include/linux/clockchips.h b/include/linux/clockchips.h index 96c280b2c263..597a1e836f22 100644 --- a/include/linux/clockchips.h +++ b/include/linux/clockchips.h @@ -37,12 +37,15 @@ enum clock_event_mode { * reached from DETACHED or SHUTDOWN. * ONESHOT: Device is programmed to generate event only once. Can be reached * from DETACHED or SHUTDOWN. + * ONESHOT_STOPPED: Device was programmed in ONESHOT mode and is temporarily + * stopped. */ enum clock_event_state { CLOCK_EVT_STATE_DETACHED, CLOCK_EVT_STATE_SHUTDOWN, CLOCK_EVT_STATE_PERIODIC, CLOCK_EVT_STATE_ONESHOT, + CLOCK_EVT_STATE_ONESHOT_STOPPED, }; /* @@ -84,12 +87,13 @@ enum clock_event_state { * @mult: nanosecond to cycles multiplier * @shift: nanoseconds to cycles divisor (power of two) * @mode: operating mode, relevant only to ->set_mode(), OBSOLETE - * @state: current state of the device, assigned by the core code + * @state_use_accessors:current state of the device, assigned by the core code * @features: features * @retries: number of forced programming retries * @set_mode: legacy set mode function, only for modes <= CLOCK_EVT_MODE_RESUME. * @set_state_periodic: switch state to periodic, if !set_mode * @set_state_oneshot: switch state to oneshot, if !set_mode + * @set_state_oneshot_stopped: switch state to oneshot_stopped, if !set_mode * @set_state_shutdown: switch state to shutdown, if !set_mode * @tick_resume: resume clkevt device, if !set_mode * @broadcast: function to broadcast events @@ -113,7 +117,7 @@ struct clock_event_device { u32 mult; u32 shift; enum clock_event_mode mode; - enum clock_event_state state; + enum clock_event_state state_use_accessors; unsigned int features; unsigned long retries; @@ -121,11 +125,12 @@ struct clock_event_device { * State transition callback(s): Only one of the two groups should be * defined: * - set_mode(), only for modes <= CLOCK_EVT_MODE_RESUME. - * - set_state_{shutdown|periodic|oneshot}(), tick_resume(). + * - set_state_{shutdown|periodic|oneshot|oneshot_stopped}(), tick_resume(). */ void (*set_mode)(enum clock_event_mode mode, struct clock_event_device *); int (*set_state_periodic)(struct clock_event_device *); int (*set_state_oneshot)(struct clock_event_device *); + int (*set_state_oneshot_stopped)(struct clock_event_device *); int (*set_state_shutdown)(struct clock_event_device *); int (*tick_resume)(struct clock_event_device *); @@ -144,6 +149,32 @@ struct clock_event_device { struct module *owner; } ____cacheline_aligned; +/* Helpers to verify state of a clockevent device */ +static inline bool clockevent_state_detached(struct clock_event_device *dev) +{ + return dev->state_use_accessors == CLOCK_EVT_STATE_DETACHED; +} + +static inline bool clockevent_state_shutdown(struct clock_event_device *dev) +{ + return dev->state_use_accessors == CLOCK_EVT_STATE_SHUTDOWN; +} + +static inline bool clockevent_state_periodic(struct clock_event_device *dev) +{ + return dev->state_use_accessors == CLOCK_EVT_STATE_PERIODIC; +} + +static inline bool clockevent_state_oneshot(struct clock_event_device *dev) +{ + return dev->state_use_accessors == CLOCK_EVT_STATE_ONESHOT; +} + +static inline bool clockevent_state_oneshot_stopped(struct clock_event_device *dev) +{ + return dev->state_use_accessors == CLOCK_EVT_STATE_ONESHOT_STOPPED; +} + /* * Calculate a multiplication factor for scaled math, which is used to convert * nanoseconds based values to clock ticks: diff --git a/include/linux/clocksource.h b/include/linux/clocksource.h index d27d0152271f..278dd279a7a8 100644 --- a/include/linux/clocksource.h +++ b/include/linux/clocksource.h @@ -181,7 +181,6 @@ static inline s64 clocksource_cyc2ns(cycle_t cycles, u32 mult, u32 shift) extern int clocksource_unregister(struct clocksource*); extern void clocksource_touch_watchdog(void); -extern struct clocksource* clocksource_get_next(void); extern void clocksource_change_rating(struct clocksource *cs, int rating); extern void clocksource_suspend(void); extern void clocksource_resume(void); diff --git a/include/linux/compiler.h b/include/linux/compiler.h index 867722591be2..05be2352fef8 100644 --- a/include/linux/compiler.h +++ b/include/linux/compiler.h @@ -250,7 +250,23 @@ static __always_inline void __write_once_size(volatile void *p, void *res, int s ({ union { typeof(x) __val; char __c[1]; } __u; __read_once_size(&(x), __u.__c, sizeof(x)); __u.__val; }) #define WRITE_ONCE(x, val) \ - ({ typeof(x) __val = (val); __write_once_size(&(x), &__val, sizeof(__val)); __val; }) + ({ union { typeof(x) __val; char __c[1]; } __u = { .__val = (val) }; __write_once_size(&(x), __u.__c, sizeof(x)); __u.__val; }) + +/** + * READ_ONCE_CTRL - Read a value heading a control dependency + * @x: The value to be read, heading the control dependency + * + * Control dependencies are tricky. See Documentation/memory-barriers.txt + * for important information on how to use them. Note that in many cases, + * use of smp_load_acquire() will be much simpler. Control dependencies + * should be avoided except on the hottest of hotpaths. + */ +#define READ_ONCE_CTRL(x) \ +({ \ + typeof(x) __val = READ_ONCE(x); \ + smp_read_barrier_depends(); /* Enforce control dependency. */ \ + __val; \ +}) #endif /* __KERNEL__ */ @@ -450,7 +466,7 @@ static __always_inline void __write_once_size(volatile void *p, void *res, int s * with an explicit memory barrier or atomic instruction that provides the * required ordering. * - * If possible use READ_ONCE/ASSIGN_ONCE instead. + * If possible use READ_ONCE()/WRITE_ONCE() instead. */ #define __ACCESS_ONCE(x) ({ \ __maybe_unused typeof(x) __var = (__force typeof(x)) 0; \ diff --git a/include/linux/context_tracking.h b/include/linux/context_tracking.h index 2821838256b4..b96bd299966f 100644 --- a/include/linux/context_tracking.h +++ b/include/linux/context_tracking.h @@ -14,8 +14,6 @@ extern void context_tracking_enter(enum ctx_state state); extern void context_tracking_exit(enum ctx_state state); extern void context_tracking_user_enter(void); extern void context_tracking_user_exit(void); -extern void __context_tracking_task_switch(struct task_struct *prev, - struct task_struct *next); static inline void user_enter(void) { @@ -51,19 +49,11 @@ static inline void exception_exit(enum ctx_state prev_ctx) } } -static inline void context_tracking_task_switch(struct task_struct *prev, - struct task_struct *next) -{ - if (context_tracking_is_enabled()) - __context_tracking_task_switch(prev, next); -} #else static inline void user_enter(void) { } static inline void user_exit(void) { } static inline enum ctx_state exception_enter(void) { return 0; } static inline void exception_exit(enum ctx_state prev_ctx) { } -static inline void context_tracking_task_switch(struct task_struct *prev, - struct task_struct *next) { } #endif /* !CONFIG_CONTEXT_TRACKING */ diff --git a/include/linux/context_tracking_state.h b/include/linux/context_tracking_state.h index 6b7b96a32b75..678ecdf90cf6 100644 --- a/include/linux/context_tracking_state.h +++ b/include/linux/context_tracking_state.h @@ -12,6 +12,7 @@ struct context_tracking { * may be further optimized using static keys. */ bool active; + int recursion; enum ctx_state { CONTEXT_KERNEL = 0, CONTEXT_USER, diff --git a/include/linux/cpumask.h b/include/linux/cpumask.h index 27e285b92b5f..59915ea5373c 100644 --- a/include/linux/cpumask.h +++ b/include/linux/cpumask.h @@ -151,10 +151,8 @@ static inline unsigned int cpumask_any_but(const struct cpumask *mask, return 1; } -static inline int cpumask_set_cpu_local_first(int i, int numa_node, cpumask_t *dstp) +static inline unsigned int cpumask_local_spread(unsigned int i, int node) { - set_bit(0, cpumask_bits(dstp)); - return 0; } @@ -208,7 +206,7 @@ static inline unsigned int cpumask_next_zero(int n, const struct cpumask *srcp) int cpumask_next_and(int n, const struct cpumask *, const struct cpumask *); int cpumask_any_but(const struct cpumask *mask, unsigned int cpu); -int cpumask_set_cpu_local_first(int i, int numa_node, cpumask_t *dstp); +unsigned int cpumask_local_spread(unsigned int i, int node); /** * for_each_cpu - iterate over every cpu in a mask diff --git a/include/linux/debugfs.h b/include/linux/debugfs.h index cb25af461054..420311bcee38 100644 --- a/include/linux/debugfs.h +++ b/include/linux/debugfs.h @@ -45,7 +45,6 @@ extern struct dentry *arch_debugfs_dir; /* declared over in file.c */ extern const struct file_operations debugfs_file_operations; -extern const struct inode_operations debugfs_link_operations; struct dentry *debugfs_create_file(const char *name, umode_t mode, struct dentry *parent, void *data, diff --git a/include/linux/dmar.h b/include/linux/dmar.h index 30624954dec5..e9bc9292bd3a 100644 --- a/include/linux/dmar.h +++ b/include/linux/dmar.h @@ -185,33 +185,85 @@ static inline int dmar_device_remove(void *handle) struct irte { union { + /* Shared between remapped and posted mode*/ struct { - __u64 present : 1, - fpd : 1, - dst_mode : 1, - redir_hint : 1, - trigger_mode : 1, - dlvry_mode : 3, - avail : 4, - __reserved_1 : 4, - vector : 8, - __reserved_2 : 8, - dest_id : 32; + __u64 present : 1, /* 0 */ + fpd : 1, /* 1 */ + __res0 : 6, /* 2 - 6 */ + avail : 4, /* 8 - 11 */ + __res1 : 3, /* 12 - 14 */ + pst : 1, /* 15 */ + vector : 8, /* 16 - 23 */ + __res2 : 40; /* 24 - 63 */ + }; + + /* Remapped mode */ + struct { + __u64 r_present : 1, /* 0 */ + r_fpd : 1, /* 1 */ + dst_mode : 1, /* 2 */ + redir_hint : 1, /* 3 */ + trigger_mode : 1, /* 4 */ + dlvry_mode : 3, /* 5 - 7 */ + r_avail : 4, /* 8 - 11 */ + r_res0 : 4, /* 12 - 15 */ + r_vector : 8, /* 16 - 23 */ + r_res1 : 8, /* 24 - 31 */ + dest_id : 32; /* 32 - 63 */ + }; + + /* Posted mode */ + struct { + __u64 p_present : 1, /* 0 */ + p_fpd : 1, /* 1 */ + p_res0 : 6, /* 2 - 7 */ + p_avail : 4, /* 8 - 11 */ + p_res1 : 2, /* 12 - 13 */ + p_urgent : 1, /* 14 */ + p_pst : 1, /* 15 */ + p_vector : 8, /* 16 - 23 */ + p_res2 : 14, /* 24 - 37 */ + pda_l : 26; /* 38 - 63 */ }; __u64 low; }; union { + /* Shared between remapped and posted mode*/ struct { - __u64 sid : 16, - sq : 2, - svt : 2, - __reserved_3 : 44; + __u64 sid : 16, /* 64 - 79 */ + sq : 2, /* 80 - 81 */ + svt : 2, /* 82 - 83 */ + __res3 : 44; /* 84 - 127 */ + }; + + /* Posted mode*/ + struct { + __u64 p_sid : 16, /* 64 - 79 */ + p_sq : 2, /* 80 - 81 */ + p_svt : 2, /* 82 - 83 */ + p_res3 : 12, /* 84 - 95 */ + pda_h : 32; /* 96 - 127 */ }; __u64 high; }; }; +static inline void dmar_copy_shared_irte(struct irte *dst, struct irte *src) +{ + dst->present = src->present; + dst->fpd = src->fpd; + dst->avail = src->avail; + dst->pst = src->pst; + dst->vector = src->vector; + dst->sid = src->sid; + dst->sq = src->sq; + dst->svt = src->svt; +} + +#define PDA_LOW_BIT 26 +#define PDA_HIGH_BIT 32 + enum { IRQ_REMAP_XAPIC_MODE, IRQ_REMAP_X2APIC_MODE, @@ -227,6 +279,7 @@ extern void dmar_msi_read(int irq, struct msi_msg *msg); extern void dmar_msi_write(int irq, struct msi_msg *msg); extern int dmar_set_interrupt(struct intel_iommu *iommu); extern irqreturn_t dmar_fault(int irq, void *dev_id); -extern int arch_setup_dmar_msi(unsigned int irq); +extern int dmar_alloc_hwirq(int id, int node, void *arg); +extern void dmar_free_hwirq(int irq); #endif /* __DMAR_H__ */ diff --git a/include/linux/efi.h b/include/linux/efi.h index af5be0368dec..2092965afca3 100644 --- a/include/linux/efi.h +++ b/include/linux/efi.h @@ -583,6 +583,9 @@ void efi_native_runtime_setup(void); #define EFI_FILE_INFO_ID \ EFI_GUID( 0x9576e92, 0x6d3f, 0x11d2, 0x8e, 0x39, 0x00, 0xa0, 0xc9, 0x69, 0x72, 0x3b ) +#define EFI_SYSTEM_RESOURCE_TABLE_GUID \ + EFI_GUID( 0xb122a263, 0x3661, 0x4f68, 0x99, 0x29, 0x78, 0xf8, 0xb0, 0xd6, 0x21, 0x80 ) + #define EFI_FILE_SYSTEM_GUID \ EFI_GUID( 0x964e5b22, 0x6459, 0x11d2, 0x8e, 0x39, 0x00, 0xa0, 0xc9, 0x69, 0x72, 0x3b ) @@ -823,6 +826,7 @@ extern struct efi { unsigned long fw_vendor; /* fw_vendor */ unsigned long runtime; /* runtime table */ unsigned long config_table; /* config tables */ + unsigned long esrt; /* ESRT table */ efi_get_time_t *get_time; efi_set_time_t *set_time; efi_get_wakeup_time_t *get_wakeup_time; @@ -875,6 +879,11 @@ static inline efi_status_t efi_query_variable_store(u32 attributes, unsigned lon #endif extern void __iomem *efi_lookup_mapped_addr(u64 phys_addr); extern int efi_config_init(efi_config_table_type_t *arch_tables); +#ifdef CONFIG_EFI_ESRT +extern void __init efi_esrt_init(void); +#else +static inline void efi_esrt_init(void) { } +#endif extern int efi_config_parse_tables(void *config_tables, int count, int sz, efi_config_table_type_t *arch_tables); extern u64 efi_get_iobase (void); @@ -882,12 +891,15 @@ extern u32 efi_mem_type (unsigned long phys_addr); extern u64 efi_mem_attributes (unsigned long phys_addr); extern u64 efi_mem_attribute (unsigned long phys_addr, unsigned long size); extern int __init efi_uart_console_only (void); +extern u64 efi_mem_desc_end(efi_memory_desc_t *md); +extern int efi_mem_desc_lookup(u64 phys_addr, efi_memory_desc_t *out_md); extern void efi_initialize_iomem_resources(struct resource *code_resource, struct resource *data_resource, struct resource *bss_resource); extern void efi_get_time(struct timespec *now); extern void efi_reserve_boot_services(void); extern int efi_get_fdt_params(struct efi_fdt_params *params, int verbose); extern struct efi_memory_map memmap; +extern struct kobject *efi_kobj; extern int efi_reboot_quirk_mode; extern bool efi_poweroff_required(void); diff --git a/include/linux/fs.h b/include/linux/fs.h index 35ec87e490b1..b577e801b4af 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -38,7 +38,6 @@ struct backing_dev_info; struct export_operations; struct hd_geometry; struct iovec; -struct nameidata; struct kiocb; struct kobject; struct pipe_inode_info; @@ -656,6 +655,7 @@ struct inode { struct pipe_inode_info *i_pipe; struct block_device *i_bdev; struct cdev *i_cdev; + char *i_link; }; __u32 i_generation; @@ -1607,12 +1607,12 @@ struct file_operations { struct inode_operations { struct dentry * (*lookup) (struct inode *,struct dentry *, unsigned int); - void * (*follow_link) (struct dentry *, struct nameidata *); + const char * (*follow_link) (struct dentry *, void **); int (*permission) (struct inode *, int); struct posix_acl * (*get_acl)(struct inode *, int); int (*readlink) (struct dentry *, char __user *,int); - void (*put_link) (struct dentry *, struct nameidata *, void *); + void (*put_link) (struct inode *, void *); int (*create) (struct inode *,struct dentry *, umode_t, bool); int (*link) (struct dentry *,struct inode *,struct dentry *); @@ -1879,6 +1879,7 @@ enum file_time_flags { S_VERSION = 8, }; +extern bool atime_needs_update(const struct path *, struct inode *); extern void touch_atime(const struct path *); static inline void file_accessed(struct file *file) { @@ -2704,13 +2705,14 @@ extern const struct file_operations generic_ro_fops; extern int readlink_copy(char __user *, int, const char *); extern int page_readlink(struct dentry *, char __user *, int); -extern void *page_follow_link_light(struct dentry *, struct nameidata *); -extern void page_put_link(struct dentry *, struct nameidata *, void *); +extern const char *page_follow_link_light(struct dentry *, void **); +extern void page_put_link(struct inode *, void *); extern int __page_symlink(struct inode *inode, const char *symname, int len, int nofs); extern int page_symlink(struct inode *inode, const char *symname, int len); extern const struct inode_operations page_symlink_inode_operations; -extern void kfree_put_link(struct dentry *, struct nameidata *, void *); +extern void kfree_put_link(struct inode *, void *); +extern void free_page_put_link(struct inode *, void *); extern int generic_readlink(struct dentry *, char __user *, int); extern void generic_fillattr(struct inode *, struct kstat *); int vfs_getattr_nosec(struct path *path, struct kstat *stat); @@ -2721,6 +2723,8 @@ void __inode_sub_bytes(struct inode *inode, loff_t bytes); void inode_sub_bytes(struct inode *inode, loff_t bytes); loff_t inode_get_bytes(struct inode *inode); void inode_set_bytes(struct inode *inode, loff_t bytes); +const char *simple_follow_link(struct dentry *, void **); +extern const struct inode_operations simple_symlink_inode_operations; extern int iterate_dir(struct file *, struct dir_context *); diff --git a/include/linux/hardirq.h b/include/linux/hardirq.h index f4af03404b97..dfd59d6bc6f0 100644 --- a/include/linux/hardirq.h +++ b/include/linux/hardirq.h @@ -1,7 +1,7 @@ #ifndef LINUX_HARDIRQ_H #define LINUX_HARDIRQ_H -#include <linux/preempt_mask.h> +#include <linux/preempt.h> #include <linux/lockdep.h> #include <linux/ftrace_irq.h> #include <linux/vtime.h> diff --git a/include/linux/highmem.h b/include/linux/highmem.h index 9286a46b7d69..6aefcd0031a6 100644 --- a/include/linux/highmem.h +++ b/include/linux/highmem.h @@ -65,6 +65,7 @@ static inline void kunmap(struct page *page) static inline void *kmap_atomic(struct page *page) { + preempt_disable(); pagefault_disable(); return page_address(page); } @@ -73,6 +74,7 @@ static inline void *kmap_atomic(struct page *page) static inline void __kunmap_atomic(void *addr) { pagefault_enable(); + preempt_enable(); } #define kmap_atomic_pfn(pfn) kmap_atomic(pfn_to_page(pfn)) diff --git a/include/linux/hrtimer.h b/include/linux/hrtimer.h index 05f6df1fdf5b..76dd4f0da5ca 100644 --- a/include/linux/hrtimer.h +++ b/include/linux/hrtimer.h @@ -53,34 +53,25 @@ enum hrtimer_restart { * * 0x00 inactive * 0x01 enqueued into rbtree - * 0x02 callback function running - * 0x04 timer is migrated to another cpu * - * Special cases: - * 0x03 callback function running and enqueued - * (was requeued on another CPU) - * 0x05 timer was migrated on CPU hotunplug + * The callback state is not part of the timer->state because clearing it would + * mean touching the timer after the callback, this makes it impossible to free + * the timer from the callback function. * - * The "callback function running and enqueued" status is only possible on - * SMP. It happens for example when a posix timer expired and the callback + * Therefore we track the callback state in: + * + * timer->base->cpu_base->running == timer + * + * On SMP it is possible to have a "callback function running and enqueued" + * status. It happens for example when a posix timer expired and the callback * queued a signal. Between dropping the lock which protects the posix timer * and reacquiring the base lock of the hrtimer, another CPU can deliver the - * signal and rearm the timer. We have to preserve the callback running state, - * as otherwise the timer could be removed before the softirq code finishes the - * the handling of the timer. - * - * The HRTIMER_STATE_ENQUEUED bit is always or'ed to the current state - * to preserve the HRTIMER_STATE_CALLBACK in the above scenario. This - * also affects HRTIMER_STATE_MIGRATE where the preservation is not - * necessary. HRTIMER_STATE_MIGRATE is cleared after the timer is - * enqueued on the new cpu. + * signal and rearm the timer. * * All state transitions are protected by cpu_base->lock. */ #define HRTIMER_STATE_INACTIVE 0x00 #define HRTIMER_STATE_ENQUEUED 0x01 -#define HRTIMER_STATE_CALLBACK 0x02 -#define HRTIMER_STATE_MIGRATE 0x04 /** * struct hrtimer - the basic hrtimer structure @@ -130,6 +121,12 @@ struct hrtimer_sleeper { struct task_struct *task; }; +#ifdef CONFIG_64BIT +# define HRTIMER_CLOCK_BASE_ALIGN 64 +#else +# define HRTIMER_CLOCK_BASE_ALIGN 32 +#endif + /** * struct hrtimer_clock_base - the timer base for a specific clock * @cpu_base: per cpu clock base @@ -137,9 +134,7 @@ struct hrtimer_sleeper { * timer to a base on another cpu. * @clockid: clock id for per_cpu support * @active: red black tree root node for the active timers - * @resolution: the resolution of the clock, in nanoseconds * @get_time: function to retrieve the current time of the clock - * @softirq_time: the time when running the hrtimer queue in the softirq * @offset: offset of this clock to the monotonic base */ struct hrtimer_clock_base { @@ -147,11 +142,9 @@ struct hrtimer_clock_base { int index; clockid_t clockid; struct timerqueue_head active; - ktime_t resolution; ktime_t (*get_time)(void); - ktime_t softirq_time; ktime_t offset; -}; +} __attribute__((__aligned__(HRTIMER_CLOCK_BASE_ALIGN))); enum hrtimer_base_type { HRTIMER_BASE_MONOTONIC, @@ -165,11 +158,16 @@ enum hrtimer_base_type { * struct hrtimer_cpu_base - the per cpu clock bases * @lock: lock protecting the base and associated clock bases * and timers + * @seq: seqcount around __run_hrtimer + * @running: pointer to the currently running hrtimer * @cpu: cpu number * @active_bases: Bitfield to mark bases with active timers - * @clock_was_set: Indicates that clock was set from irq context. + * @clock_was_set_seq: Sequence counter of clock was set events + * @migration_enabled: The migration of hrtimers to other cpus is enabled + * @nohz_active: The nohz functionality is enabled * @expires_next: absolute time of the next event which was scheduled * via clock_set_next_event() + * @next_timer: Pointer to the first expiring timer * @in_hrtirq: hrtimer_interrupt() is currently executing * @hres_active: State of high resolution mode * @hang_detected: The last hrtimer interrupt detected a hang @@ -178,27 +176,38 @@ enum hrtimer_base_type { * @nr_hangs: Total number of hrtimer interrupt hangs * @max_hang_time: Maximum time spent in hrtimer_interrupt * @clock_base: array of clock bases for this cpu + * + * Note: next_timer is just an optimization for __remove_hrtimer(). + * Do not dereference the pointer because it is not reliable on + * cross cpu removals. */ struct hrtimer_cpu_base { raw_spinlock_t lock; + seqcount_t seq; + struct hrtimer *running; unsigned int cpu; unsigned int active_bases; - unsigned int clock_was_set; + unsigned int clock_was_set_seq; + bool migration_enabled; + bool nohz_active; #ifdef CONFIG_HIGH_RES_TIMERS + unsigned int in_hrtirq : 1, + hres_active : 1, + hang_detected : 1; ktime_t expires_next; - int in_hrtirq; - int hres_active; - int hang_detected; - unsigned long nr_events; - unsigned long nr_retries; - unsigned long nr_hangs; - ktime_t max_hang_time; + struct hrtimer *next_timer; + unsigned int nr_events; + unsigned int nr_retries; + unsigned int nr_hangs; + unsigned int max_hang_time; #endif struct hrtimer_clock_base clock_base[HRTIMER_MAX_CLOCK_BASES]; -}; +} ____cacheline_aligned; static inline void hrtimer_set_expires(struct hrtimer *timer, ktime_t time) { + BUILD_BUG_ON(sizeof(struct hrtimer_clock_base) > HRTIMER_CLOCK_BASE_ALIGN); + timer->node.expires = time; timer->_softexpires = time; } @@ -262,19 +271,16 @@ static inline ktime_t hrtimer_expires_remaining(const struct hrtimer *timer) return ktime_sub(timer->node.expires, timer->base->get_time()); } -#ifdef CONFIG_HIGH_RES_TIMERS -struct clock_event_device; - -extern void hrtimer_interrupt(struct clock_event_device *dev); - -/* - * In high resolution mode the time reference must be read accurate - */ static inline ktime_t hrtimer_cb_get_time(struct hrtimer *timer) { return timer->base->get_time(); } +#ifdef CONFIG_HIGH_RES_TIMERS +struct clock_event_device; + +extern void hrtimer_interrupt(struct clock_event_device *dev); + static inline int hrtimer_is_hres_active(struct hrtimer *timer) { return timer->base->cpu_base->hres_active; @@ -295,21 +301,16 @@ extern void hrtimer_peek_ahead_timers(void); extern void clock_was_set_delayed(void); +extern unsigned int hrtimer_resolution; + #else # define MONOTONIC_RES_NSEC LOW_RES_NSEC # define KTIME_MONOTONIC_RES KTIME_LOW_RES -static inline void hrtimer_peek_ahead_timers(void) { } +#define hrtimer_resolution (unsigned int)LOW_RES_NSEC -/* - * In non high resolution mode the time reference is taken from - * the base softirq time variable. - */ -static inline ktime_t hrtimer_cb_get_time(struct hrtimer *timer) -{ - return timer->base->softirq_time; -} +static inline void hrtimer_peek_ahead_timers(void) { } static inline int hrtimer_is_hres_active(struct hrtimer *timer) { @@ -353,49 +354,47 @@ static inline void destroy_hrtimer_on_stack(struct hrtimer *timer) { } #endif /* Basic timer operations: */ -extern int hrtimer_start(struct hrtimer *timer, ktime_t tim, - const enum hrtimer_mode mode); -extern int hrtimer_start_range_ns(struct hrtimer *timer, ktime_t tim, +extern void hrtimer_start_range_ns(struct hrtimer *timer, ktime_t tim, unsigned long range_ns, const enum hrtimer_mode mode); -extern int -__hrtimer_start_range_ns(struct hrtimer *timer, ktime_t tim, - unsigned long delta_ns, - const enum hrtimer_mode mode, int wakeup); + +/** + * hrtimer_start - (re)start an hrtimer on the current CPU + * @timer: the timer to be added + * @tim: expiry time + * @mode: expiry mode: absolute (HRTIMER_MODE_ABS) or + * relative (HRTIMER_MODE_REL) + */ +static inline void hrtimer_start(struct hrtimer *timer, ktime_t tim, + const enum hrtimer_mode mode) +{ + hrtimer_start_range_ns(timer, tim, 0, mode); +} extern int hrtimer_cancel(struct hrtimer *timer); extern int hrtimer_try_to_cancel(struct hrtimer *timer); -static inline int hrtimer_start_expires(struct hrtimer *timer, - enum hrtimer_mode mode) +static inline void hrtimer_start_expires(struct hrtimer *timer, + enum hrtimer_mode mode) { unsigned long delta; ktime_t soft, hard; soft = hrtimer_get_softexpires(timer); hard = hrtimer_get_expires(timer); delta = ktime_to_ns(ktime_sub(hard, soft)); - return hrtimer_start_range_ns(timer, soft, delta, mode); + hrtimer_start_range_ns(timer, soft, delta, mode); } -static inline int hrtimer_restart(struct hrtimer *timer) +static inline void hrtimer_restart(struct hrtimer *timer) { - return hrtimer_start_expires(timer, HRTIMER_MODE_ABS); + hrtimer_start_expires(timer, HRTIMER_MODE_ABS); } /* Query timers: */ extern ktime_t hrtimer_get_remaining(const struct hrtimer *timer); -extern int hrtimer_get_res(const clockid_t which_clock, struct timespec *tp); -extern ktime_t hrtimer_get_next_event(void); +extern u64 hrtimer_get_next_event(void); -/* - * A timer is active, when it is enqueued into the rbtree or the - * callback function is running or it's in the state of being migrated - * to another cpu. - */ -static inline int hrtimer_active(const struct hrtimer *timer) -{ - return timer->state != HRTIMER_STATE_INACTIVE; -} +extern bool hrtimer_active(const struct hrtimer *timer); /* * Helper function to check, whether the timer is on one of the queues @@ -411,14 +410,29 @@ static inline int hrtimer_is_queued(struct hrtimer *timer) */ static inline int hrtimer_callback_running(struct hrtimer *timer) { - return timer->state & HRTIMER_STATE_CALLBACK; + return timer->base->cpu_base->running == timer; } /* Forward a hrtimer so it expires after now: */ extern u64 hrtimer_forward(struct hrtimer *timer, ktime_t now, ktime_t interval); -/* Forward a hrtimer so it expires after the hrtimer's current now */ +/** + * hrtimer_forward_now - forward the timer expiry so it expires after now + * @timer: hrtimer to forward + * @interval: the interval to forward + * + * Forward the timer expiry so it will expire after the current time + * of the hrtimer clock base. Returns the number of overruns. + * + * Can be safely called from the callback function of @timer. If + * called from other contexts @timer must neither be enqueued nor + * running the callback and the caller needs to take care of + * serialization. + * + * Note: This only updates the timer expiry value and does not requeue + * the timer. + */ static inline u64 hrtimer_forward_now(struct hrtimer *timer, ktime_t interval) { @@ -443,7 +457,6 @@ extern int schedule_hrtimeout(ktime_t *expires, const enum hrtimer_mode mode); /* Soft interrupt function to run the hrtimer queues: */ extern void hrtimer_run_queues(void); -extern void hrtimer_run_pending(void); /* Bootup initialization: */ extern void __init hrtimers_init(void); diff --git a/include/linux/htirq.h b/include/linux/htirq.h index 70a1dbbf2093..d4a527e58434 100644 --- a/include/linux/htirq.h +++ b/include/linux/htirq.h @@ -1,24 +1,38 @@ #ifndef LINUX_HTIRQ_H #define LINUX_HTIRQ_H +struct pci_dev; +struct irq_data; + struct ht_irq_msg { u32 address_lo; /* low 32 bits of the ht irq message */ u32 address_hi; /* high 32 bits of the it irq message */ }; +typedef void (ht_irq_update_t)(struct pci_dev *dev, int irq, + struct ht_irq_msg *msg); + +struct ht_irq_cfg { + struct pci_dev *dev; + /* Update callback used to cope with buggy hardware */ + ht_irq_update_t *update; + unsigned pos; + unsigned idx; + struct ht_irq_msg msg; +}; + /* Helper functions.. */ void fetch_ht_irq_msg(unsigned int irq, struct ht_irq_msg *msg); void write_ht_irq_msg(unsigned int irq, struct ht_irq_msg *msg); -struct irq_data; void mask_ht_irq(struct irq_data *data); void unmask_ht_irq(struct irq_data *data); /* The arch hook for getting things started */ -int arch_setup_ht_irq(unsigned int irq, struct pci_dev *dev); +int arch_setup_ht_irq(int idx, int pos, struct pci_dev *dev, + ht_irq_update_t *update); +void arch_teardown_ht_irq(unsigned int irq); /* For drivers of buggy hardware */ -typedef void (ht_irq_update_t)(struct pci_dev *dev, int irq, - struct ht_irq_msg *msg); int __ht_create_irq(struct pci_dev *dev, int idx, ht_irq_update_t *update); #endif /* LINUX_HTIRQ_H */ diff --git a/include/linux/init_task.h b/include/linux/init_task.h index 696d22312b31..bb9b075f0eb0 100644 --- a/include/linux/init_task.h +++ b/include/linux/init_task.h @@ -50,9 +50,8 @@ extern struct fs_struct init_fs; .cpu_timers = INIT_CPU_TIMERS(sig.cpu_timers), \ .rlim = INIT_RLIMITS, \ .cputimer = { \ - .cputime = INIT_CPUTIME, \ - .running = 0, \ - .lock = __RAW_SPIN_LOCK_UNLOCKED(sig.cputimer.lock), \ + .cputime_atomic = INIT_CPUTIME_ATOMIC, \ + .running = 0, \ }, \ .cred_guard_mutex = \ __MUTEX_INITIALIZER(sig.cred_guard_mutex), \ diff --git a/include/linux/intel-iommu.h b/include/linux/intel-iommu.h index 796ef9645827..3665cb331ca1 100644 --- a/include/linux/intel-iommu.h +++ b/include/linux/intel-iommu.h @@ -87,6 +87,7 @@ static inline void dmar_writeq(void __iomem *addr, u64 val) /* * Decoding Capability Register */ +#define cap_pi_support(c) (((c) >> 59) & 1) #define cap_read_drain(c) (((c) >> 55) & 1) #define cap_write_drain(c) (((c) >> 54) & 1) #define cap_max_amask_val(c) (((c) >> 48) & 0x3f) @@ -115,13 +116,14 @@ static inline void dmar_writeq(void __iomem *addr, u64 val) * Extended Capability Register */ +#define ecap_pasid(e) ((e >> 40) & 0x1) #define ecap_pss(e) ((e >> 35) & 0x1f) #define ecap_eafs(e) ((e >> 34) & 0x1) #define ecap_nwfs(e) ((e >> 33) & 0x1) #define ecap_srs(e) ((e >> 31) & 0x1) #define ecap_ers(e) ((e >> 30) & 0x1) #define ecap_prs(e) ((e >> 29) & 0x1) -#define ecap_pasid(e) ((e >> 28) & 0x1) +/* PASID support used to be on bit 28 */ #define ecap_dis(e) ((e >> 27) & 0x1) #define ecap_nest(e) ((e >> 26) & 0x1) #define ecap_mts(e) ((e >> 25) & 0x1) @@ -298,6 +300,8 @@ struct q_inval { #define INTR_REMAP_TABLE_ENTRIES 65536 +struct irq_domain; + struct ir_table { struct irte *base; unsigned long *bitmap; @@ -347,6 +351,8 @@ struct intel_iommu { #ifdef CONFIG_IRQ_REMAP struct ir_table *ir_table; /* Interrupt remapping info */ + struct irq_domain *ir_domain; + struct irq_domain *ir_msi_domain; #endif struct device *iommu_dev; /* IOMMU-sysfs device */ int node; diff --git a/include/linux/interrupt.h b/include/linux/interrupt.h index 950ae4501826..be7e75c945e9 100644 --- a/include/linux/interrupt.h +++ b/include/linux/interrupt.h @@ -413,7 +413,8 @@ enum BLOCK_IOPOLL_SOFTIRQ, TASKLET_SOFTIRQ, SCHED_SOFTIRQ, - HRTIMER_SOFTIRQ, + HRTIMER_SOFTIRQ, /* Unused, but kept as tools rely on the + numbering. Sigh! */ RCU_SOFTIRQ, /* Preferable RCU should always be the last softirq */ NR_SOFTIRQS @@ -592,10 +593,10 @@ tasklet_hrtimer_init(struct tasklet_hrtimer *ttimer, clockid_t which_clock, enum hrtimer_mode mode); static inline -int tasklet_hrtimer_start(struct tasklet_hrtimer *ttimer, ktime_t time, - const enum hrtimer_mode mode) +void tasklet_hrtimer_start(struct tasklet_hrtimer *ttimer, ktime_t time, + const enum hrtimer_mode mode) { - return hrtimer_start(&ttimer->timer, time, mode); + hrtimer_start(&ttimer->timer, time, mode); } static inline diff --git a/include/linux/io-mapping.h b/include/linux/io-mapping.h index 657fab4efab3..c27dde7215b5 100644 --- a/include/linux/io-mapping.h +++ b/include/linux/io-mapping.h @@ -141,6 +141,7 @@ static inline void __iomem * io_mapping_map_atomic_wc(struct io_mapping *mapping, unsigned long offset) { + preempt_disable(); pagefault_disable(); return ((char __force __iomem *) mapping) + offset; } @@ -149,6 +150,7 @@ static inline void io_mapping_unmap_atomic(void __iomem *vaddr) { pagefault_enable(); + preempt_enable(); } /* Non-atomic map/unmap */ diff --git a/include/linux/io.h b/include/linux/io.h index 986f2bffea1e..fb5a99800e77 100644 --- a/include/linux/io.h +++ b/include/linux/io.h @@ -19,6 +19,7 @@ #define _LINUX_IO_H #include <linux/types.h> +#include <linux/init.h> #include <asm/io.h> #include <asm/page.h> @@ -111,6 +112,13 @@ static inline void arch_phys_wc_del(int handle) } #define arch_phys_wc_add arch_phys_wc_add +#ifndef arch_phys_wc_index +static inline int arch_phys_wc_index(int handle) +{ + return -1; +} +#define arch_phys_wc_index arch_phys_wc_index +#endif #endif #endif /* _LINUX_IO_H */ diff --git a/include/linux/jiffies.h b/include/linux/jiffies.h index c367cbdf73ab..535fd3bb1ba8 100644 --- a/include/linux/jiffies.h +++ b/include/linux/jiffies.h @@ -7,6 +7,7 @@ #include <linux/time.h> #include <linux/timex.h> #include <asm/param.h> /* for HZ */ +#include <generated/timeconst.h> /* * The following defines establish the engineering parameters of the PLL @@ -288,8 +289,133 @@ static inline u64 jiffies_to_nsecs(const unsigned long j) return (u64)jiffies_to_usecs(j) * NSEC_PER_USEC; } -extern unsigned long msecs_to_jiffies(const unsigned int m); -extern unsigned long usecs_to_jiffies(const unsigned int u); +extern unsigned long __msecs_to_jiffies(const unsigned int m); +#if HZ <= MSEC_PER_SEC && !(MSEC_PER_SEC % HZ) +/* + * HZ is equal to or smaller than 1000, and 1000 is a nice round + * multiple of HZ, divide with the factor between them, but round + * upwards: + */ +static inline unsigned long _msecs_to_jiffies(const unsigned int m) +{ + return (m + (MSEC_PER_SEC / HZ) - 1) / (MSEC_PER_SEC / HZ); +} +#elif HZ > MSEC_PER_SEC && !(HZ % MSEC_PER_SEC) +/* + * HZ is larger than 1000, and HZ is a nice round multiple of 1000 - + * simply multiply with the factor between them. + * + * But first make sure the multiplication result cannot overflow: + */ +static inline unsigned long _msecs_to_jiffies(const unsigned int m) +{ + if (m > jiffies_to_msecs(MAX_JIFFY_OFFSET)) + return MAX_JIFFY_OFFSET; + return m * (HZ / MSEC_PER_SEC); +} +#else +/* + * Generic case - multiply, round and divide. But first check that if + * we are doing a net multiplication, that we wouldn't overflow: + */ +static inline unsigned long _msecs_to_jiffies(const unsigned int m) +{ + if (HZ > MSEC_PER_SEC && m > jiffies_to_msecs(MAX_JIFFY_OFFSET)) + return MAX_JIFFY_OFFSET; + + return (MSEC_TO_HZ_MUL32 * m + MSEC_TO_HZ_ADJ32) >> MSEC_TO_HZ_SHR32; +} +#endif +/** + * msecs_to_jiffies: - convert milliseconds to jiffies + * @m: time in milliseconds + * + * conversion is done as follows: + * + * - negative values mean 'infinite timeout' (MAX_JIFFY_OFFSET) + * + * - 'too large' values [that would result in larger than + * MAX_JIFFY_OFFSET values] mean 'infinite timeout' too. + * + * - all other values are converted to jiffies by either multiplying + * the input value by a factor or dividing it with a factor and + * handling any 32-bit overflows. + * for the details see __msecs_to_jiffies() + * + * msecs_to_jiffies() checks for the passed in value being a constant + * via __builtin_constant_p() allowing gcc to eliminate most of the + * code, __msecs_to_jiffies() is called if the value passed does not + * allow constant folding and the actual conversion must be done at + * runtime. + * the HZ range specific helpers _msecs_to_jiffies() are called both + * directly here and from __msecs_to_jiffies() in the case where + * constant folding is not possible. + */ +static inline unsigned long msecs_to_jiffies(const unsigned int m) +{ + if (__builtin_constant_p(m)) { + if ((int)m < 0) + return MAX_JIFFY_OFFSET; + return _msecs_to_jiffies(m); + } else { + return __msecs_to_jiffies(m); + } +} + +extern unsigned long __usecs_to_jiffies(const unsigned int u); +#if HZ <= USEC_PER_SEC && !(USEC_PER_SEC % HZ) +static inline unsigned long _usecs_to_jiffies(const unsigned int u) +{ + return (u + (USEC_PER_SEC / HZ) - 1) / (USEC_PER_SEC / HZ); +} +#elif HZ > USEC_PER_SEC && !(HZ % USEC_PER_SEC) +static inline unsigned long _usecs_to_jiffies(const unsigned int u) +{ + return u * (HZ / USEC_PER_SEC); +} +static inline unsigned long _usecs_to_jiffies(const unsigned int u) +{ +#else +static inline unsigned long _usecs_to_jiffies(const unsigned int u) +{ + return (USEC_TO_HZ_MUL32 * u + USEC_TO_HZ_ADJ32) + >> USEC_TO_HZ_SHR32; +} +#endif + +/** + * usecs_to_jiffies: - convert microseconds to jiffies + * @u: time in microseconds + * + * conversion is done as follows: + * + * - 'too large' values [that would result in larger than + * MAX_JIFFY_OFFSET values] mean 'infinite timeout' too. + * + * - all other values are converted to jiffies by either multiplying + * the input value by a factor or dividing it with a factor and + * handling any 32-bit overflows as for msecs_to_jiffies. + * + * usecs_to_jiffies() checks for the passed in value being a constant + * via __builtin_constant_p() allowing gcc to eliminate most of the + * code, __usecs_to_jiffies() is called if the value passed does not + * allow constant folding and the actual conversion must be done at + * runtime. + * the HZ range specific helpers _usecs_to_jiffies() are called both + * directly here and from __msecs_to_jiffies() in the case where + * constant folding is not possible. + */ +static inline unsigned long usecs_to_jiffies(const unsigned int u) +{ + if (__builtin_constant_p(u)) { + if (u > jiffies_to_usecs(MAX_JIFFY_OFFSET)) + return MAX_JIFFY_OFFSET; + return _usecs_to_jiffies(u); + } else { + return __usecs_to_jiffies(u); + } +} + extern unsigned long timespec_to_jiffies(const struct timespec *value); extern void jiffies_to_timespec(const unsigned long jiffies, struct timespec *value); diff --git a/include/linux/kernel.h b/include/linux/kernel.h index 3a5b48e52a9e..060dd7b61c6d 100644 --- a/include/linux/kernel.h +++ b/include/linux/kernel.h @@ -244,7 +244,8 @@ static inline u32 reciprocal_scale(u32 val, u32 ep_ro) #if defined(CONFIG_MMU) && \ (defined(CONFIG_PROVE_LOCKING) || defined(CONFIG_DEBUG_ATOMIC_SLEEP)) -void might_fault(void); +#define might_fault() __might_fault(__FILE__, __LINE__) +void __might_fault(const char *file, int line); #else static inline void might_fault(void) { } #endif diff --git a/include/linux/ktime.h b/include/linux/ktime.h index 5fc3d1083071..2b6a204bd8d4 100644 --- a/include/linux/ktime.h +++ b/include/linux/ktime.h @@ -166,19 +166,34 @@ static inline bool ktime_before(const ktime_t cmp1, const ktime_t cmp2) } #if BITS_PER_LONG < 64 -extern u64 __ktime_divns(const ktime_t kt, s64 div); -static inline u64 ktime_divns(const ktime_t kt, s64 div) +extern s64 __ktime_divns(const ktime_t kt, s64 div); +static inline s64 ktime_divns(const ktime_t kt, s64 div) { + /* + * Negative divisors could cause an inf loop, + * so bug out here. + */ + BUG_ON(div < 0); if (__builtin_constant_p(div) && !(div >> 32)) { - u64 ns = kt.tv64; - do_div(ns, div); - return ns; + s64 ns = kt.tv64; + u64 tmp = ns < 0 ? -ns : ns; + + do_div(tmp, div); + return ns < 0 ? -tmp : tmp; } else { return __ktime_divns(kt, div); } } #else /* BITS_PER_LONG < 64 */ -# define ktime_divns(kt, div) (u64)((kt).tv64 / (div)) +static inline s64 ktime_divns(const ktime_t kt, s64 div) +{ + /* + * 32-bit implementation cannot handle negative divisors, + * so catch them on 64bit as well. + */ + WARN_ON(div < 0); + return kt.tv64 / div; +} #endif static inline s64 ktime_to_us(const ktime_t kt) diff --git a/include/linux/lglock.h b/include/linux/lglock.h index 0081f000e34b..c92ebd100d9b 100644 --- a/include/linux/lglock.h +++ b/include/linux/lglock.h @@ -52,10 +52,15 @@ struct lglock { static struct lglock name = { .lock = &name ## _lock } void lg_lock_init(struct lglock *lg, char *name); + void lg_local_lock(struct lglock *lg); void lg_local_unlock(struct lglock *lg); void lg_local_lock_cpu(struct lglock *lg, int cpu); void lg_local_unlock_cpu(struct lglock *lg, int cpu); + +void lg_double_lock(struct lglock *lg, int cpu1, int cpu2); +void lg_double_unlock(struct lglock *lg, int cpu1, int cpu2); + void lg_global_lock(struct lglock *lg); void lg_global_unlock(struct lglock *lg); diff --git a/include/linux/lockdep.h b/include/linux/lockdep.h index 066ba4157541..2722111591a3 100644 --- a/include/linux/lockdep.h +++ b/include/linux/lockdep.h @@ -130,8 +130,8 @@ enum bounce_type { }; struct lock_class_stats { - unsigned long contention_point[4]; - unsigned long contending_point[4]; + unsigned long contention_point[LOCKSTAT_POINTS]; + unsigned long contending_point[LOCKSTAT_POINTS]; struct lock_time read_waittime; struct lock_time write_waittime; struct lock_time read_holdtime; diff --git a/include/linux/namei.h b/include/linux/namei.h index c8990779f0c3..d8c6334cd150 100644 --- a/include/linux/namei.h +++ b/include/linux/namei.h @@ -1,16 +1,15 @@ #ifndef _LINUX_NAMEI_H #define _LINUX_NAMEI_H -#include <linux/dcache.h> -#include <linux/errno.h> -#include <linux/linkage.h> +#include <linux/kernel.h> #include <linux/path.h> - -struct vfsmount; -struct nameidata; +#include <linux/fcntl.h> +#include <linux/errno.h> enum { MAX_NESTED_LINKS = 8 }; +#define MAXSYMLINKS 40 + /* * Type of the last component on LOOKUP_PARENT */ @@ -45,13 +44,29 @@ enum {LAST_NORM, LAST_ROOT, LAST_DOT, LAST_DOTDOT, LAST_BIND}; #define LOOKUP_ROOT 0x2000 #define LOOKUP_EMPTY 0x4000 -extern int user_path_at(int, const char __user *, unsigned, struct path *); extern int user_path_at_empty(int, const char __user *, unsigned, struct path *, int *empty); -#define user_path(name, path) user_path_at(AT_FDCWD, name, LOOKUP_FOLLOW, path) -#define user_lpath(name, path) user_path_at(AT_FDCWD, name, 0, path) -#define user_path_dir(name, path) \ - user_path_at(AT_FDCWD, name, LOOKUP_FOLLOW | LOOKUP_DIRECTORY, path) +static inline int user_path_at(int dfd, const char __user *name, unsigned flags, + struct path *path) +{ + return user_path_at_empty(dfd, name, flags, path, NULL); +} + +static inline int user_path(const char __user *name, struct path *path) +{ + return user_path_at_empty(AT_FDCWD, name, LOOKUP_FOLLOW, path, NULL); +} + +static inline int user_lpath(const char __user *name, struct path *path) +{ + return user_path_at_empty(AT_FDCWD, name, 0, path, NULL); +} + +static inline int user_path_dir(const char __user *name, struct path *path) +{ + return user_path_at_empty(AT_FDCWD, name, + LOOKUP_FOLLOW | LOOKUP_DIRECTORY, path, NULL); +} extern int kern_path(const char *, unsigned, struct path *); @@ -70,9 +85,7 @@ extern int follow_up(struct path *); extern struct dentry *lock_rename(struct dentry *, struct dentry *); extern void unlock_rename(struct dentry *, struct dentry *); -extern void nd_jump_link(struct nameidata *nd, struct path *path); -extern void nd_set_link(struct nameidata *nd, char *path); -extern char *nd_get_link(struct nameidata *nd); +extern void nd_jump_link(struct path *path); static inline void nd_terminate_link(void *name, size_t len, size_t maxlen) { diff --git a/include/linux/of.h b/include/linux/of.h index ddeaae6d2083..b871ff9d81d7 100644 --- a/include/linux/of.h +++ b/include/linux/of.h @@ -121,6 +121,8 @@ extern struct device_node *of_stdout; extern raw_spinlock_t devtree_lock; #ifdef CONFIG_OF +void of_core_init(void); + static inline bool is_of_node(struct fwnode_handle *fwnode) { return fwnode && fwnode->type == FWNODE_OF; @@ -376,6 +378,10 @@ bool of_console_check(struct device_node *dn, char *name, int index); #else /* CONFIG_OF */ +static inline void of_core_init(void) +{ +} + static inline bool is_of_node(struct fwnode_handle *fwnode) { return false; diff --git a/include/linux/osq_lock.h b/include/linux/osq_lock.h index 3a6490e81b28..703ea5c30a33 100644 --- a/include/linux/osq_lock.h +++ b/include/linux/osq_lock.h @@ -32,4 +32,9 @@ static inline void osq_lock_init(struct optimistic_spin_queue *lock) extern bool osq_lock(struct optimistic_spin_queue *lock); extern void osq_unlock(struct optimistic_spin_queue *lock); +static inline bool osq_is_locked(struct optimistic_spin_queue *lock) +{ + return atomic_read(&lock->tail) != OSQ_UNLOCKED_VAL; +} + #endif diff --git a/include/linux/percpu_counter.h b/include/linux/percpu_counter.h index 50e50095c8d1..84a109449610 100644 --- a/include/linux/percpu_counter.h +++ b/include/linux/percpu_counter.h @@ -41,7 +41,12 @@ void percpu_counter_destroy(struct percpu_counter *fbc); void percpu_counter_set(struct percpu_counter *fbc, s64 amount); void __percpu_counter_add(struct percpu_counter *fbc, s64 amount, s32 batch); s64 __percpu_counter_sum(struct percpu_counter *fbc); -int percpu_counter_compare(struct percpu_counter *fbc, s64 rhs); +int __percpu_counter_compare(struct percpu_counter *fbc, s64 rhs, s32 batch); + +static inline int percpu_counter_compare(struct percpu_counter *fbc, s64 rhs) +{ + return __percpu_counter_compare(fbc, rhs, percpu_counter_batch); +} static inline void percpu_counter_add(struct percpu_counter *fbc, s64 amount) { @@ -116,6 +121,12 @@ static inline int percpu_counter_compare(struct percpu_counter *fbc, s64 rhs) return 0; } +static inline int +__percpu_counter_compare(struct percpu_counter *fbc, s64 rhs, s32 batch) +{ + return percpu_counter_compare(fbc, rhs); +} + static inline void percpu_counter_add(struct percpu_counter *fbc, s64 amount) { diff --git a/include/linux/perf_event.h b/include/linux/perf_event.h index 61992cf2e977..1b82d44b0a02 100644 --- a/include/linux/perf_event.h +++ b/include/linux/perf_event.h @@ -92,8 +92,6 @@ struct hw_perf_event_extra { int idx; /* index in shared_regs->regs[] */ }; -struct event_constraint; - /** * struct hw_perf_event - performance event hardware details: */ @@ -112,8 +110,6 @@ struct hw_perf_event { struct hw_perf_event_extra extra_reg; struct hw_perf_event_extra branch_reg; - - struct event_constraint *constraint; }; struct { /* software */ struct hrtimer hrtimer; @@ -124,7 +120,7 @@ struct hw_perf_event { }; struct { /* intel_cqm */ int cqm_state; - int cqm_rmid; + u32 cqm_rmid; struct list_head cqm_events_entry; struct list_head cqm_groups_entry; struct list_head cqm_group_entry; @@ -566,8 +562,12 @@ struct perf_cpu_context { struct perf_event_context *task_ctx; int active_oncpu; int exclusive; + + raw_spinlock_t hrtimer_lock; struct hrtimer hrtimer; ktime_t hrtimer_interval; + unsigned int hrtimer_active; + struct pmu *unique_pmu; struct perf_cgroup *cgrp; }; @@ -734,6 +734,22 @@ extern int perf_event_overflow(struct perf_event *event, struct perf_sample_data *data, struct pt_regs *regs); +extern void perf_event_output(struct perf_event *event, + struct perf_sample_data *data, + struct pt_regs *regs); + +extern void +perf_event_header__init_id(struct perf_event_header *header, + struct perf_sample_data *data, + struct perf_event *event); +extern void +perf_event__output_id_sample(struct perf_event *event, + struct perf_output_handle *handle, + struct perf_sample_data *sample); + +extern void +perf_log_lost_samples(struct perf_event *event, u64 lost); + static inline bool is_sampling_event(struct perf_event *event) { return event->attr.sample_period != 0; @@ -798,11 +814,33 @@ perf_sw_event_sched(u32 event_id, u64 nr, u64 addr) extern struct static_key_deferred perf_sched_events; +static __always_inline bool +perf_sw_migrate_enabled(void) +{ + if (static_key_false(&perf_swevent_enabled[PERF_COUNT_SW_CPU_MIGRATIONS])) + return true; + return false; +} + +static inline void perf_event_task_migrate(struct task_struct *task) +{ + if (perf_sw_migrate_enabled()) + task->sched_migrated = 1; +} + static inline void perf_event_task_sched_in(struct task_struct *prev, struct task_struct *task) { if (static_key_false(&perf_sched_events.key)) __perf_event_task_sched_in(prev, task); + + if (perf_sw_migrate_enabled() && task->sched_migrated) { + struct pt_regs *regs = this_cpu_ptr(&__perf_regs[0]); + + perf_fetch_caller_regs(regs); + ___perf_sw_event(PERF_COUNT_SW_CPU_MIGRATIONS, 1, regs, 0); + task->sched_migrated = 0; + } } static inline void perf_event_task_sched_out(struct task_struct *prev, @@ -925,6 +963,8 @@ perf_aux_output_skip(struct perf_output_handle *handle, static inline void * perf_get_aux(struct perf_output_handle *handle) { return NULL; } static inline void +perf_event_task_migrate(struct task_struct *task) { } +static inline void perf_event_task_sched_in(struct task_struct *prev, struct task_struct *task) { } static inline void diff --git a/include/linux/preempt.h b/include/linux/preempt.h index de83b4eb1642..0f1534acaf60 100644 --- a/include/linux/preempt.h +++ b/include/linux/preempt.h @@ -10,13 +10,117 @@ #include <linux/list.h> /* - * We use the MSB mostly because its available; see <linux/preempt_mask.h> for - * the other bits -- can't include that header due to inclusion hell. + * We put the hardirq and softirq counter into the preemption + * counter. The bitmask has the following meaning: + * + * - bits 0-7 are the preemption count (max preemption depth: 256) + * - bits 8-15 are the softirq count (max # of softirqs: 256) + * + * The hardirq count could in theory be the same as the number of + * interrupts in the system, but we run all interrupt handlers with + * interrupts disabled, so we cannot have nesting interrupts. Though + * there are a few palaeontologic drivers which reenable interrupts in + * the handler, so we need more than one bit here. + * + * PREEMPT_MASK: 0x000000ff + * SOFTIRQ_MASK: 0x0000ff00 + * HARDIRQ_MASK: 0x000f0000 + * NMI_MASK: 0x00100000 + * PREEMPT_ACTIVE: 0x00200000 + * PREEMPT_NEED_RESCHED: 0x80000000 */ +#define PREEMPT_BITS 8 +#define SOFTIRQ_BITS 8 +#define HARDIRQ_BITS 4 +#define NMI_BITS 1 + +#define PREEMPT_SHIFT 0 +#define SOFTIRQ_SHIFT (PREEMPT_SHIFT + PREEMPT_BITS) +#define HARDIRQ_SHIFT (SOFTIRQ_SHIFT + SOFTIRQ_BITS) +#define NMI_SHIFT (HARDIRQ_SHIFT + HARDIRQ_BITS) + +#define __IRQ_MASK(x) ((1UL << (x))-1) + +#define PREEMPT_MASK (__IRQ_MASK(PREEMPT_BITS) << PREEMPT_SHIFT) +#define SOFTIRQ_MASK (__IRQ_MASK(SOFTIRQ_BITS) << SOFTIRQ_SHIFT) +#define HARDIRQ_MASK (__IRQ_MASK(HARDIRQ_BITS) << HARDIRQ_SHIFT) +#define NMI_MASK (__IRQ_MASK(NMI_BITS) << NMI_SHIFT) + +#define PREEMPT_OFFSET (1UL << PREEMPT_SHIFT) +#define SOFTIRQ_OFFSET (1UL << SOFTIRQ_SHIFT) +#define HARDIRQ_OFFSET (1UL << HARDIRQ_SHIFT) +#define NMI_OFFSET (1UL << NMI_SHIFT) + +#define SOFTIRQ_DISABLE_OFFSET (2 * SOFTIRQ_OFFSET) + +#define PREEMPT_ACTIVE_BITS 1 +#define PREEMPT_ACTIVE_SHIFT (NMI_SHIFT + NMI_BITS) +#define PREEMPT_ACTIVE (__IRQ_MASK(PREEMPT_ACTIVE_BITS) << PREEMPT_ACTIVE_SHIFT) + +/* We use the MSB mostly because its available */ #define PREEMPT_NEED_RESCHED 0x80000000 +/* preempt_count() and related functions, depends on PREEMPT_NEED_RESCHED */ #include <asm/preempt.h> +#define hardirq_count() (preempt_count() & HARDIRQ_MASK) +#define softirq_count() (preempt_count() & SOFTIRQ_MASK) +#define irq_count() (preempt_count() & (HARDIRQ_MASK | SOFTIRQ_MASK \ + | NMI_MASK)) + +/* + * Are we doing bottom half or hardware interrupt processing? + * Are we in a softirq context? Interrupt context? + * in_softirq - Are we currently processing softirq or have bh disabled? + * in_serving_softirq - Are we currently processing softirq? + */ +#define in_irq() (hardirq_count()) +#define in_softirq() (softirq_count()) +#define in_interrupt() (irq_count()) +#define in_serving_softirq() (softirq_count() & SOFTIRQ_OFFSET) + +/* + * Are we in NMI context? + */ +#define in_nmi() (preempt_count() & NMI_MASK) + +#if defined(CONFIG_PREEMPT_COUNT) +# define PREEMPT_DISABLE_OFFSET 1 +#else +# define PREEMPT_DISABLE_OFFSET 0 +#endif + +/* + * The preempt_count offset needed for things like: + * + * spin_lock_bh() + * + * Which need to disable both preemption (CONFIG_PREEMPT_COUNT) and + * softirqs, such that unlock sequences of: + * + * spin_unlock(); + * local_bh_enable(); + * + * Work as expected. + */ +#define SOFTIRQ_LOCK_OFFSET (SOFTIRQ_DISABLE_OFFSET + PREEMPT_DISABLE_OFFSET) + +/* + * Are we running in atomic context? WARNING: this macro cannot + * always detect atomic context; in particular, it cannot know about + * held spinlocks in non-preemptible kernels. Thus it should not be + * used in the general case to determine whether sleeping is possible. + * Do not use in_atomic() in driver code. + */ +#define in_atomic() (preempt_count() != 0) + +/* + * Check whether we were atomic before we did preempt_disable(): + * (used by the scheduler) + */ +#define in_atomic_preempt_off() \ + ((preempt_count() & ~PREEMPT_ACTIVE) != PREEMPT_DISABLE_OFFSET) + #if defined(CONFIG_DEBUG_PREEMPT) || defined(CONFIG_PREEMPT_TRACER) extern void preempt_count_add(int val); extern void preempt_count_sub(int val); @@ -33,6 +137,18 @@ extern void preempt_count_sub(int val); #define preempt_count_inc() preempt_count_add(1) #define preempt_count_dec() preempt_count_sub(1) +#define preempt_active_enter() \ +do { \ + preempt_count_add(PREEMPT_ACTIVE + PREEMPT_DISABLE_OFFSET); \ + barrier(); \ +} while (0) + +#define preempt_active_exit() \ +do { \ + barrier(); \ + preempt_count_sub(PREEMPT_ACTIVE + PREEMPT_DISABLE_OFFSET); \ +} while (0) + #ifdef CONFIG_PREEMPT_COUNT #define preempt_disable() \ @@ -49,6 +165,8 @@ do { \ #define preempt_enable_no_resched() sched_preempt_enable_no_resched() +#define preemptible() (preempt_count() == 0 && !irqs_disabled()) + #ifdef CONFIG_PREEMPT #define preempt_enable() \ do { \ @@ -57,52 +175,46 @@ do { \ __preempt_schedule(); \ } while (0) +#define preempt_enable_notrace() \ +do { \ + barrier(); \ + if (unlikely(__preempt_count_dec_and_test())) \ + __preempt_schedule_notrace(); \ +} while (0) + #define preempt_check_resched() \ do { \ if (should_resched()) \ __preempt_schedule(); \ } while (0) -#else +#else /* !CONFIG_PREEMPT */ #define preempt_enable() \ do { \ barrier(); \ preempt_count_dec(); \ } while (0) -#define preempt_check_resched() do { } while (0) -#endif - -#define preempt_disable_notrace() \ -do { \ - __preempt_count_inc(); \ - barrier(); \ -} while (0) -#define preempt_enable_no_resched_notrace() \ +#define preempt_enable_notrace() \ do { \ barrier(); \ __preempt_count_dec(); \ } while (0) -#ifdef CONFIG_PREEMPT - -#ifndef CONFIG_CONTEXT_TRACKING -#define __preempt_schedule_context() __preempt_schedule() -#endif +#define preempt_check_resched() do { } while (0) +#endif /* CONFIG_PREEMPT */ -#define preempt_enable_notrace() \ +#define preempt_disable_notrace() \ do { \ + __preempt_count_inc(); \ barrier(); \ - if (unlikely(__preempt_count_dec_and_test())) \ - __preempt_schedule_context(); \ } while (0) -#else -#define preempt_enable_notrace() \ + +#define preempt_enable_no_resched_notrace() \ do { \ barrier(); \ __preempt_count_dec(); \ } while (0) -#endif #else /* !CONFIG_PREEMPT_COUNT */ @@ -121,6 +233,7 @@ do { \ #define preempt_disable_notrace() barrier() #define preempt_enable_no_resched_notrace() barrier() #define preempt_enable_notrace() barrier() +#define preemptible() 0 #endif /* CONFIG_PREEMPT_COUNT */ diff --git a/include/linux/preempt_mask.h b/include/linux/preempt_mask.h deleted file mode 100644 index dbeec4d4a3be..000000000000 --- a/include/linux/preempt_mask.h +++ /dev/null @@ -1,117 +0,0 @@ -#ifndef LINUX_PREEMPT_MASK_H -#define LINUX_PREEMPT_MASK_H - -#include <linux/preempt.h> - -/* - * We put the hardirq and softirq counter into the preemption - * counter. The bitmask has the following meaning: - * - * - bits 0-7 are the preemption count (max preemption depth: 256) - * - bits 8-15 are the softirq count (max # of softirqs: 256) - * - * The hardirq count could in theory be the same as the number of - * interrupts in the system, but we run all interrupt handlers with - * interrupts disabled, so we cannot have nesting interrupts. Though - * there are a few palaeontologic drivers which reenable interrupts in - * the handler, so we need more than one bit here. - * - * PREEMPT_MASK: 0x000000ff - * SOFTIRQ_MASK: 0x0000ff00 - * HARDIRQ_MASK: 0x000f0000 - * NMI_MASK: 0x00100000 - * PREEMPT_ACTIVE: 0x00200000 - */ -#define PREEMPT_BITS 8 -#define SOFTIRQ_BITS 8 -#define HARDIRQ_BITS 4 -#define NMI_BITS 1 - -#define PREEMPT_SHIFT 0 -#define SOFTIRQ_SHIFT (PREEMPT_SHIFT + PREEMPT_BITS) -#define HARDIRQ_SHIFT (SOFTIRQ_SHIFT + SOFTIRQ_BITS) -#define NMI_SHIFT (HARDIRQ_SHIFT + HARDIRQ_BITS) - -#define __IRQ_MASK(x) ((1UL << (x))-1) - -#define PREEMPT_MASK (__IRQ_MASK(PREEMPT_BITS) << PREEMPT_SHIFT) -#define SOFTIRQ_MASK (__IRQ_MASK(SOFTIRQ_BITS) << SOFTIRQ_SHIFT) -#define HARDIRQ_MASK (__IRQ_MASK(HARDIRQ_BITS) << HARDIRQ_SHIFT) -#define NMI_MASK (__IRQ_MASK(NMI_BITS) << NMI_SHIFT) - -#define PREEMPT_OFFSET (1UL << PREEMPT_SHIFT) -#define SOFTIRQ_OFFSET (1UL << SOFTIRQ_SHIFT) -#define HARDIRQ_OFFSET (1UL << HARDIRQ_SHIFT) -#define NMI_OFFSET (1UL << NMI_SHIFT) - -#define SOFTIRQ_DISABLE_OFFSET (2 * SOFTIRQ_OFFSET) - -#define PREEMPT_ACTIVE_BITS 1 -#define PREEMPT_ACTIVE_SHIFT (NMI_SHIFT + NMI_BITS) -#define PREEMPT_ACTIVE (__IRQ_MASK(PREEMPT_ACTIVE_BITS) << PREEMPT_ACTIVE_SHIFT) - -#define hardirq_count() (preempt_count() & HARDIRQ_MASK) -#define softirq_count() (preempt_count() & SOFTIRQ_MASK) -#define irq_count() (preempt_count() & (HARDIRQ_MASK | SOFTIRQ_MASK \ - | NMI_MASK)) - -/* - * Are we doing bottom half or hardware interrupt processing? - * Are we in a softirq context? Interrupt context? - * in_softirq - Are we currently processing softirq or have bh disabled? - * in_serving_softirq - Are we currently processing softirq? - */ -#define in_irq() (hardirq_count()) -#define in_softirq() (softirq_count()) -#define in_interrupt() (irq_count()) -#define in_serving_softirq() (softirq_count() & SOFTIRQ_OFFSET) - -/* - * Are we in NMI context? - */ -#define in_nmi() (preempt_count() & NMI_MASK) - -#if defined(CONFIG_PREEMPT_COUNT) -# define PREEMPT_CHECK_OFFSET 1 -#else -# define PREEMPT_CHECK_OFFSET 0 -#endif - -/* - * The preempt_count offset needed for things like: - * - * spin_lock_bh() - * - * Which need to disable both preemption (CONFIG_PREEMPT_COUNT) and - * softirqs, such that unlock sequences of: - * - * spin_unlock(); - * local_bh_enable(); - * - * Work as expected. - */ -#define SOFTIRQ_LOCK_OFFSET (SOFTIRQ_DISABLE_OFFSET + PREEMPT_CHECK_OFFSET) - -/* - * Are we running in atomic context? WARNING: this macro cannot - * always detect atomic context; in particular, it cannot know about - * held spinlocks in non-preemptible kernels. Thus it should not be - * used in the general case to determine whether sleeping is possible. - * Do not use in_atomic() in driver code. - */ -#define in_atomic() ((preempt_count() & ~PREEMPT_ACTIVE) != 0) - -/* - * Check whether we were atomic before we did preempt_disable(): - * (used by the scheduler, *after* releasing the kernel lock) - */ -#define in_atomic_preempt_off() \ - ((preempt_count() & ~PREEMPT_ACTIVE) != PREEMPT_CHECK_OFFSET) - -#ifdef CONFIG_PREEMPT_COUNT -# define preemptible() (preempt_count() == 0 && !irqs_disabled()) -#else -# define preemptible() 0 -#endif - -#endif /* LINUX_PREEMPT_MASK_H */ diff --git a/include/linux/rculist.h b/include/linux/rculist.h index a18b16f1dc0e..17c6b1f84a77 100644 --- a/include/linux/rculist.h +++ b/include/linux/rculist.h @@ -29,8 +29,8 @@ */ static inline void INIT_LIST_HEAD_RCU(struct list_head *list) { - ACCESS_ONCE(list->next) = list; - ACCESS_ONCE(list->prev) = list; + WRITE_ONCE(list->next, list); + WRITE_ONCE(list->prev, list); } /* @@ -288,7 +288,7 @@ static inline void list_splice_init_rcu(struct list_head *list, #define list_first_or_null_rcu(ptr, type, member) \ ({ \ struct list_head *__ptr = (ptr); \ - struct list_head *__next = ACCESS_ONCE(__ptr->next); \ + struct list_head *__next = READ_ONCE(__ptr->next); \ likely(__ptr != __next) ? list_entry_rcu(__next, type, member) : NULL; \ }) @@ -549,8 +549,8 @@ static inline void hlist_add_behind_rcu(struct hlist_node *n, */ #define hlist_for_each_entry_from_rcu(pos, member) \ for (; pos; \ - pos = hlist_entry_safe(rcu_dereference((pos)->member.next),\ - typeof(*(pos)), member)) + pos = hlist_entry_safe(rcu_dereference_raw(hlist_next_rcu( \ + &(pos)->member)), typeof(*(pos)), member)) #endif /* __KERNEL__ */ #endif diff --git a/include/linux/rcupdate.h b/include/linux/rcupdate.h index 573a5afd5ed8..33a056bb886f 100644 --- a/include/linux/rcupdate.h +++ b/include/linux/rcupdate.h @@ -44,6 +44,8 @@ #include <linux/debugobjects.h> #include <linux/bug.h> #include <linux/compiler.h> +#include <linux/ktime.h> + #include <asm/barrier.h> extern int rcu_expedited; /* for sysctl */ @@ -292,10 +294,6 @@ void rcu_sched_qs(void); void rcu_bh_qs(void); void rcu_check_callbacks(int user); struct notifier_block; -void rcu_idle_enter(void); -void rcu_idle_exit(void); -void rcu_irq_enter(void); -void rcu_irq_exit(void); int rcu_cpu_notify(struct notifier_block *self, unsigned long action, void *hcpu); @@ -364,8 +362,8 @@ extern struct srcu_struct tasks_rcu_exit_srcu; #define rcu_note_voluntary_context_switch(t) \ do { \ rcu_all_qs(); \ - if (ACCESS_ONCE((t)->rcu_tasks_holdout)) \ - ACCESS_ONCE((t)->rcu_tasks_holdout) = false; \ + if (READ_ONCE((t)->rcu_tasks_holdout)) \ + WRITE_ONCE((t)->rcu_tasks_holdout, false); \ } while (0) #else /* #ifdef CONFIG_TASKS_RCU */ #define TASKS_RCU(x) do { } while (0) @@ -609,7 +607,7 @@ static inline void rcu_preempt_sleep_check(void) #define __rcu_access_pointer(p, space) \ ({ \ - typeof(*p) *_________p1 = (typeof(*p) *__force)ACCESS_ONCE(p); \ + typeof(*p) *_________p1 = (typeof(*p) *__force)READ_ONCE(p); \ rcu_dereference_sparse(p, space); \ ((typeof(*p) __force __kernel *)(_________p1)); \ }) @@ -628,21 +626,6 @@ static inline void rcu_preempt_sleep_check(void) ((typeof(*p) __force __kernel *)(p)); \ }) -#define __rcu_access_index(p, space) \ -({ \ - typeof(p) _________p1 = ACCESS_ONCE(p); \ - rcu_dereference_sparse(p, space); \ - (_________p1); \ -}) -#define __rcu_dereference_index_check(p, c) \ -({ \ - /* Dependency order vs. p above. */ \ - typeof(p) _________p1 = lockless_dereference(p); \ - rcu_lockdep_assert(c, \ - "suspicious rcu_dereference_index_check() usage"); \ - (_________p1); \ -}) - /** * RCU_INITIALIZER() - statically initialize an RCU-protected global variable * @v: The value to statically initialize with. @@ -659,7 +642,7 @@ static inline void rcu_preempt_sleep_check(void) */ #define lockless_dereference(p) \ ({ \ - typeof(p) _________p1 = ACCESS_ONCE(p); \ + typeof(p) _________p1 = READ_ONCE(p); \ smp_read_barrier_depends(); /* Dependency order vs. p above. */ \ (_________p1); \ }) @@ -702,7 +685,7 @@ static inline void rcu_preempt_sleep_check(void) * @p: The pointer to read * * Return the value of the specified RCU-protected pointer, but omit the - * smp_read_barrier_depends() and keep the ACCESS_ONCE(). This is useful + * smp_read_barrier_depends() and keep the READ_ONCE(). This is useful * when the value of this pointer is accessed, but the pointer is not * dereferenced, for example, when testing an RCU-protected pointer against * NULL. Although rcu_access_pointer() may also be used in cases where @@ -787,47 +770,12 @@ static inline void rcu_preempt_sleep_check(void) #define rcu_dereference_raw_notrace(p) __rcu_dereference_check((p), 1, __rcu) /** - * rcu_access_index() - fetch RCU index with no dereferencing - * @p: The index to read - * - * Return the value of the specified RCU-protected index, but omit the - * smp_read_barrier_depends() and keep the ACCESS_ONCE(). This is useful - * when the value of this index is accessed, but the index is not - * dereferenced, for example, when testing an RCU-protected index against - * -1. Although rcu_access_index() may also be used in cases where - * update-side locks prevent the value of the index from changing, you - * should instead use rcu_dereference_index_protected() for this use case. - */ -#define rcu_access_index(p) __rcu_access_index((p), __rcu) - -/** - * rcu_dereference_index_check() - rcu_dereference for indices with debug checking - * @p: The pointer to read, prior to dereferencing - * @c: The conditions under which the dereference will take place - * - * Similar to rcu_dereference_check(), but omits the sparse checking. - * This allows rcu_dereference_index_check() to be used on integers, - * which can then be used as array indices. Attempting to use - * rcu_dereference_check() on an integer will give compiler warnings - * because the sparse address-space mechanism relies on dereferencing - * the RCU-protected pointer. Dereferencing integers is not something - * that even gcc will put up with. - * - * Note that this function does not implicitly check for RCU read-side - * critical sections. If this function gains lots of uses, it might - * make sense to provide versions for each flavor of RCU, but it does - * not make sense as of early 2010. - */ -#define rcu_dereference_index_check(p, c) \ - __rcu_dereference_index_check((p), (c)) - -/** * rcu_dereference_protected() - fetch RCU pointer when updates prevented * @p: The pointer to read, prior to dereferencing * @c: The conditions under which the dereference will take place * * Return the value of the specified RCU-protected pointer, but omit - * both the smp_read_barrier_depends() and the ACCESS_ONCE(). This + * both the smp_read_barrier_depends() and the READ_ONCE(). This * is useful in cases where update-side locks prevent the value of the * pointer from changing. Please note that this primitive does -not- * prevent the compiler from repeating this reference or combining it @@ -1153,13 +1101,13 @@ static inline notrace void rcu_read_unlock_sched_notrace(void) #define kfree_rcu(ptr, rcu_head) \ __kfree_rcu(&((ptr)->rcu_head), offsetof(typeof(*(ptr)), rcu_head)) -#if defined(CONFIG_TINY_RCU) || defined(CONFIG_RCU_NOCB_CPU_ALL) -static inline int rcu_needs_cpu(unsigned long *delta_jiffies) +#ifdef CONFIG_TINY_RCU +static inline int rcu_needs_cpu(u64 basemono, u64 *nextevt) { - *delta_jiffies = ULONG_MAX; + *nextevt = KTIME_MAX; return 0; } -#endif /* #if defined(CONFIG_TINY_RCU) || defined(CONFIG_RCU_NOCB_CPU_ALL) */ +#endif /* #ifdef CONFIG_TINY_RCU */ #if defined(CONFIG_RCU_NOCB_CPU_ALL) static inline bool rcu_is_nocb_cpu(int cpu) { return true; } diff --git a/include/linux/rcutiny.h b/include/linux/rcutiny.h index 937edaeb150d..3df6c1ec4e25 100644 --- a/include/linux/rcutiny.h +++ b/include/linux/rcutiny.h @@ -159,6 +159,22 @@ static inline void rcu_cpu_stall_reset(void) { } +static inline void rcu_idle_enter(void) +{ +} + +static inline void rcu_idle_exit(void) +{ +} + +static inline void rcu_irq_enter(void) +{ +} + +static inline void rcu_irq_exit(void) +{ +} + static inline void exit_rcu(void) { } diff --git a/include/linux/rcutree.h b/include/linux/rcutree.h index d2e583a6aaca..456879143f89 100644 --- a/include/linux/rcutree.h +++ b/include/linux/rcutree.h @@ -31,9 +31,7 @@ #define __LINUX_RCUTREE_H void rcu_note_context_switch(void); -#ifndef CONFIG_RCU_NOCB_CPU_ALL -int rcu_needs_cpu(unsigned long *delta_jiffies); -#endif /* #ifndef CONFIG_RCU_NOCB_CPU_ALL */ +int rcu_needs_cpu(u64 basem, u64 *nextevt); void rcu_cpu_stall_reset(void); /* @@ -93,6 +91,11 @@ void rcu_force_quiescent_state(void); void rcu_bh_force_quiescent_state(void); void rcu_sched_force_quiescent_state(void); +void rcu_idle_enter(void); +void rcu_idle_exit(void); +void rcu_irq_enter(void); +void rcu_irq_exit(void); + void exit_rcu(void); void rcu_scheduler_starting(void); diff --git a/include/linux/sched.h b/include/linux/sched.h index 26a2e6122734..6633e83e608a 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -25,7 +25,7 @@ struct sched_param { #include <linux/errno.h> #include <linux/nodemask.h> #include <linux/mm_types.h> -#include <linux/preempt_mask.h> +#include <linux/preempt.h> #include <asm/page.h> #include <asm/ptrace.h> @@ -132,6 +132,7 @@ struct fs_struct; struct perf_event_context; struct blk_plug; struct filename; +struct nameidata; #define VMACACHE_BITS 2 #define VMACACHE_SIZE (1U << VMACACHE_BITS) @@ -173,7 +174,12 @@ extern unsigned long nr_iowait_cpu(int cpu); extern void get_iowait_load(unsigned long *nr_waiters, unsigned long *load); extern void calc_global_load(unsigned long ticks); + +#if defined(CONFIG_SMP) && defined(CONFIG_NO_HZ_COMMON) extern void update_cpu_load_nohz(void); +#else +static inline void update_cpu_load_nohz(void) { } +#endif extern unsigned long get_parent_ip(unsigned long addr); @@ -213,9 +219,10 @@ print_cfs_rq(struct seq_file *m, int cpu, struct cfs_rq *cfs_rq); #define TASK_WAKEKILL 128 #define TASK_WAKING 256 #define TASK_PARKED 512 -#define TASK_STATE_MAX 1024 +#define TASK_NOLOAD 1024 +#define TASK_STATE_MAX 2048 -#define TASK_STATE_TO_CHAR_STR "RSDTtXZxKWP" +#define TASK_STATE_TO_CHAR_STR "RSDTtXZxKWPN" extern char ___assert_task_state[1 - 2*!!( sizeof(TASK_STATE_TO_CHAR_STR)-1 != ilog2(TASK_STATE_MAX)+1)]; @@ -225,6 +232,8 @@ extern char ___assert_task_state[1 - 2*!!( #define TASK_STOPPED (TASK_WAKEKILL | __TASK_STOPPED) #define TASK_TRACED (TASK_WAKEKILL | __TASK_TRACED) +#define TASK_IDLE (TASK_UNINTERRUPTIBLE | TASK_NOLOAD) + /* Convenience macros for the sake of wake_up */ #define TASK_NORMAL (TASK_INTERRUPTIBLE | TASK_UNINTERRUPTIBLE) #define TASK_ALL (TASK_NORMAL | __TASK_STOPPED | __TASK_TRACED) @@ -240,7 +249,8 @@ extern char ___assert_task_state[1 - 2*!!( ((task->state & (__TASK_STOPPED | __TASK_TRACED)) != 0) #define task_contributes_to_load(task) \ ((task->state & TASK_UNINTERRUPTIBLE) != 0 && \ - (task->flags & PF_FROZEN) == 0) + (task->flags & PF_FROZEN) == 0 && \ + (task->state & TASK_NOLOAD) == 0) #ifdef CONFIG_DEBUG_ATOMIC_SLEEP @@ -252,7 +262,7 @@ extern char ___assert_task_state[1 - 2*!!( #define set_task_state(tsk, state_value) \ do { \ (tsk)->task_state_change = _THIS_IP_; \ - set_mb((tsk)->state, (state_value)); \ + smp_store_mb((tsk)->state, (state_value)); \ } while (0) /* @@ -274,7 +284,7 @@ extern char ___assert_task_state[1 - 2*!!( #define set_current_state(state_value) \ do { \ current->task_state_change = _THIS_IP_; \ - set_mb(current->state, (state_value)); \ + smp_store_mb(current->state, (state_value)); \ } while (0) #else @@ -282,7 +292,7 @@ extern char ___assert_task_state[1 - 2*!!( #define __set_task_state(tsk, state_value) \ do { (tsk)->state = (state_value); } while (0) #define set_task_state(tsk, state_value) \ - set_mb((tsk)->state, (state_value)) + smp_store_mb((tsk)->state, (state_value)) /* * set_current_state() includes a barrier so that the write of current->state @@ -298,7 +308,7 @@ extern char ___assert_task_state[1 - 2*!!( #define __set_current_state(state_value) \ do { current->state = (state_value); } while (0) #define set_current_state(state_value) \ - set_mb(current->state, (state_value)) + smp_store_mb(current->state, (state_value)) #endif @@ -335,14 +345,10 @@ extern int runqueue_is_locked(int cpu); #if defined(CONFIG_SMP) && defined(CONFIG_NO_HZ_COMMON) extern void nohz_balance_enter_idle(int cpu); extern void set_cpu_sd_state_idle(void); -extern int get_nohz_timer_target(int pinned); +extern int get_nohz_timer_target(void); #else static inline void nohz_balance_enter_idle(int cpu) { } static inline void set_cpu_sd_state_idle(void) { } -static inline int get_nohz_timer_target(int pinned) -{ - return smp_processor_id(); -} #endif /* @@ -567,6 +573,23 @@ struct task_cputime { .sum_exec_runtime = 0, \ } +/* + * This is the atomic variant of task_cputime, which can be used for + * storing and updating task_cputime statistics without locking. + */ +struct task_cputime_atomic { + atomic64_t utime; + atomic64_t stime; + atomic64_t sum_exec_runtime; +}; + +#define INIT_CPUTIME_ATOMIC \ + (struct task_cputime_atomic) { \ + .utime = ATOMIC64_INIT(0), \ + .stime = ATOMIC64_INIT(0), \ + .sum_exec_runtime = ATOMIC64_INIT(0), \ + } + #ifdef CONFIG_PREEMPT_COUNT #define PREEMPT_DISABLED (1 + PREEMPT_ENABLED) #else @@ -584,18 +607,16 @@ struct task_cputime { /** * struct thread_group_cputimer - thread group interval timer counts - * @cputime: thread group interval timers. + * @cputime_atomic: atomic thread group interval timers. * @running: non-zero when there are timers running and * @cputime receives updates. - * @lock: lock for fields in this struct. * * This structure contains the version of task_cputime, above, that is * used for thread group CPU timer calculations. */ struct thread_group_cputimer { - struct task_cputime cputime; + struct task_cputime_atomic cputime_atomic; int running; - raw_spinlock_t lock; }; #include <linux/rwsem.h> @@ -900,6 +921,50 @@ enum cpu_idle_type { #define SCHED_CAPACITY_SCALE (1L << SCHED_CAPACITY_SHIFT) /* + * Wake-queues are lists of tasks with a pending wakeup, whose + * callers have already marked the task as woken internally, + * and can thus carry on. A common use case is being able to + * do the wakeups once the corresponding user lock as been + * released. + * + * We hold reference to each task in the list across the wakeup, + * thus guaranteeing that the memory is still valid by the time + * the actual wakeups are performed in wake_up_q(). + * + * One per task suffices, because there's never a need for a task to be + * in two wake queues simultaneously; it is forbidden to abandon a task + * in a wake queue (a call to wake_up_q() _must_ follow), so if a task is + * already in a wake queue, the wakeup will happen soon and the second + * waker can just skip it. + * + * The WAKE_Q macro declares and initializes the list head. + * wake_up_q() does NOT reinitialize the list; it's expected to be + * called near the end of a function, where the fact that the queue is + * not used again will be easy to see by inspection. + * + * Note that this can cause spurious wakeups. schedule() callers + * must ensure the call is done inside a loop, confirming that the + * wakeup condition has in fact occurred. + */ +struct wake_q_node { + struct wake_q_node *next; +}; + +struct wake_q_head { + struct wake_q_node *first; + struct wake_q_node **lastp; +}; + +#define WAKE_Q_TAIL ((struct wake_q_node *) 0x01) + +#define WAKE_Q(name) \ + struct wake_q_head name = { WAKE_Q_TAIL, &name.first } + +extern void wake_q_add(struct wake_q_head *head, + struct task_struct *task); +extern void wake_up_q(struct wake_q_head *head); + +/* * sched-domains (multiprocessor balancing) declarations: */ #ifdef CONFIG_SMP @@ -1334,8 +1399,6 @@ struct task_struct { int rcu_read_lock_nesting; union rcu_special rcu_read_unlock_special; struct list_head rcu_node_entry; -#endif /* #ifdef CONFIG_PREEMPT_RCU */ -#ifdef CONFIG_PREEMPT_RCU struct rcu_node *rcu_blocked_node; #endif /* #ifdef CONFIG_PREEMPT_RCU */ #ifdef CONFIG_TASKS_RCU @@ -1356,9 +1419,6 @@ struct task_struct { #endif struct mm_struct *mm, *active_mm; -#ifdef CONFIG_COMPAT_BRK - unsigned brk_randomized:1; -#endif /* per-thread vma caching */ u32 vmacache_seqnum; struct vm_area_struct *vmacache[VMACACHE_SIZE]; @@ -1369,7 +1429,7 @@ struct task_struct { int exit_state; int exit_code, exit_signal; int pdeath_signal; /* The signal sent when the parent dies */ - unsigned int jobctl; /* JOBCTL_*, siglock protected */ + unsigned long jobctl; /* JOBCTL_*, siglock protected */ /* Used for emulating ABI behavior of previous Linux versions */ unsigned int personality; @@ -1381,10 +1441,14 @@ struct task_struct { /* Revert to default priority/policy when forking */ unsigned sched_reset_on_fork:1; unsigned sched_contributes_to_load:1; + unsigned sched_migrated:1; #ifdef CONFIG_MEMCG_KMEM unsigned memcg_kmem_skip_account:1; #endif +#ifdef CONFIG_COMPAT_BRK + unsigned brk_randomized:1; +#endif unsigned long atomic_flags; /* Flags needing atomic access. */ @@ -1461,7 +1525,7 @@ struct task_struct { it with task_lock()) - initialized normally by setup_new_exec */ /* file system info */ - int link_count, total_link_count; + struct nameidata *nameidata; #ifdef CONFIG_SYSVIPC /* ipc stuff */ struct sysv_sem sysvsem; @@ -1511,6 +1575,8 @@ struct task_struct { /* Protection of the PI data structures: */ raw_spinlock_t pi_lock; + struct wake_q_node wake_q; + #ifdef CONFIG_RT_MUTEXES /* PI waiters blocked on a rt_mutex held by this task */ struct rb_root pi_waiters; @@ -1724,6 +1790,7 @@ struct task_struct { #ifdef CONFIG_DEBUG_ATOMIC_SLEEP unsigned long task_state_change; #endif + int pagefault_disabled; }; /* Future-safe accessor for struct task_struct's cpus_allowed. */ @@ -2077,22 +2144,22 @@ TASK_PFA_CLEAR(SPREAD_SLAB, spread_slab) #define JOBCTL_TRAPPING_BIT 21 /* switching to TRACED */ #define JOBCTL_LISTENING_BIT 22 /* ptracer is listening for events */ -#define JOBCTL_STOP_DEQUEUED (1 << JOBCTL_STOP_DEQUEUED_BIT) -#define JOBCTL_STOP_PENDING (1 << JOBCTL_STOP_PENDING_BIT) -#define JOBCTL_STOP_CONSUME (1 << JOBCTL_STOP_CONSUME_BIT) -#define JOBCTL_TRAP_STOP (1 << JOBCTL_TRAP_STOP_BIT) -#define JOBCTL_TRAP_NOTIFY (1 << JOBCTL_TRAP_NOTIFY_BIT) -#define JOBCTL_TRAPPING (1 << JOBCTL_TRAPPING_BIT) -#define JOBCTL_LISTENING (1 << JOBCTL_LISTENING_BIT) +#define JOBCTL_STOP_DEQUEUED (1UL << JOBCTL_STOP_DEQUEUED_BIT) +#define JOBCTL_STOP_PENDING (1UL << JOBCTL_STOP_PENDING_BIT) +#define JOBCTL_STOP_CONSUME (1UL << JOBCTL_STOP_CONSUME_BIT) +#define JOBCTL_TRAP_STOP (1UL << JOBCTL_TRAP_STOP_BIT) +#define JOBCTL_TRAP_NOTIFY (1UL << JOBCTL_TRAP_NOTIFY_BIT) +#define JOBCTL_TRAPPING (1UL << JOBCTL_TRAPPING_BIT) +#define JOBCTL_LISTENING (1UL << JOBCTL_LISTENING_BIT) #define JOBCTL_TRAP_MASK (JOBCTL_TRAP_STOP | JOBCTL_TRAP_NOTIFY) #define JOBCTL_PENDING_MASK (JOBCTL_STOP_PENDING | JOBCTL_TRAP_MASK) extern bool task_set_jobctl_pending(struct task_struct *task, - unsigned int mask); + unsigned long mask); extern void task_clear_jobctl_trapping(struct task_struct *task); extern void task_clear_jobctl_pending(struct task_struct *task, - unsigned int mask); + unsigned long mask); static inline void rcu_copy_process(struct task_struct *p) { @@ -2532,6 +2599,9 @@ static inline unsigned long wait_task_inactive(struct task_struct *p, } #endif +#define tasklist_empty() \ + list_empty(&init_task.tasks) + #define next_task(p) \ list_entry_rcu((p)->tasks.next, struct task_struct, tasks) @@ -2962,11 +3032,6 @@ static __always_inline bool need_resched(void) void thread_group_cputime(struct task_struct *tsk, struct task_cputime *times); void thread_group_cputimer(struct task_struct *tsk, struct task_cputime *times); -static inline void thread_group_cputime_init(struct signal_struct *sig) -{ - raw_spin_lock_init(&sig->cputimer.lock); -} - /* * Reevaluate whether the task has signals pending delivery. * Wake the task if so. @@ -3080,13 +3145,13 @@ static inline void mm_update_next_owner(struct mm_struct *mm) static inline unsigned long task_rlimit(const struct task_struct *tsk, unsigned int limit) { - return ACCESS_ONCE(tsk->signal->rlim[limit].rlim_cur); + return READ_ONCE(tsk->signal->rlim[limit].rlim_cur); } static inline unsigned long task_rlimit_max(const struct task_struct *tsk, unsigned int limit) { - return ACCESS_ONCE(tsk->signal->rlim[limit].rlim_max); + return READ_ONCE(tsk->signal->rlim[limit].rlim_max); } static inline unsigned long rlimit(unsigned int limit) diff --git a/include/linux/sched/sysctl.h b/include/linux/sched/sysctl.h index 596a0e007c62..c9e4731cf10b 100644 --- a/include/linux/sched/sysctl.h +++ b/include/linux/sched/sysctl.h @@ -57,24 +57,12 @@ extern unsigned int sysctl_numa_balancing_scan_size; extern unsigned int sysctl_sched_migration_cost; extern unsigned int sysctl_sched_nr_migrate; extern unsigned int sysctl_sched_time_avg; -extern unsigned int sysctl_timer_migration; extern unsigned int sysctl_sched_shares_window; int sched_proc_update_handler(struct ctl_table *table, int write, void __user *buffer, size_t *length, loff_t *ppos); #endif -#ifdef CONFIG_SCHED_DEBUG -static inline unsigned int get_sysctl_timer_migration(void) -{ - return sysctl_timer_migration; -} -#else -static inline unsigned int get_sysctl_timer_migration(void) -{ - return 1; -} -#endif /* * control realtime throttling: diff --git a/include/linux/security.h b/include/linux/security.h index 18264ea9e314..52febde52479 100644 --- a/include/linux/security.h +++ b/include/linux/security.h @@ -43,7 +43,6 @@ struct file; struct vfsmount; struct path; struct qstr; -struct nameidata; struct iattr; struct fown_struct; struct file_operations; @@ -477,7 +476,8 @@ static inline void security_free_mnt_opts(struct security_mnt_opts *opts) * @inode_follow_link: * Check permission to follow a symbolic link when looking up a pathname. * @dentry contains the dentry structure for the link. - * @nd contains the nameidata structure for the parent directory. + * @inode contains the inode, which itself is not stable in RCU-walk + * @rcu indicates whether we are in RCU-walk mode. * Return 0 if permission is granted. * @inode_permission: * Check permission before accessing an inode. This hook is called by the @@ -1553,7 +1553,8 @@ struct security_operations { int (*inode_rename) (struct inode *old_dir, struct dentry *old_dentry, struct inode *new_dir, struct dentry *new_dentry); int (*inode_readlink) (struct dentry *dentry); - int (*inode_follow_link) (struct dentry *dentry, struct nameidata *nd); + int (*inode_follow_link) (struct dentry *dentry, struct inode *inode, + bool rcu); int (*inode_permission) (struct inode *inode, int mask); int (*inode_setattr) (struct dentry *dentry, struct iattr *attr); int (*inode_getattr) (const struct path *path); @@ -1839,7 +1840,8 @@ int security_inode_rename(struct inode *old_dir, struct dentry *old_dentry, struct inode *new_dir, struct dentry *new_dentry, unsigned int flags); int security_inode_readlink(struct dentry *dentry); -int security_inode_follow_link(struct dentry *dentry, struct nameidata *nd); +int security_inode_follow_link(struct dentry *dentry, struct inode *inode, + bool rcu); int security_inode_permission(struct inode *inode, int mask); int security_inode_setattr(struct dentry *dentry, struct iattr *attr); int security_inode_getattr(const struct path *path); @@ -2242,7 +2244,8 @@ static inline int security_inode_readlink(struct dentry *dentry) } static inline int security_inode_follow_link(struct dentry *dentry, - struct nameidata *nd) + struct inode *inode, + bool rcu) { return 0; } diff --git a/include/linux/seqlock.h b/include/linux/seqlock.h index 5f68d0a391ce..486e685a226a 100644 --- a/include/linux/seqlock.h +++ b/include/linux/seqlock.h @@ -233,6 +233,47 @@ static inline void raw_write_seqcount_end(seqcount_t *s) s->sequence++; } +/** + * raw_write_seqcount_barrier - do a seq write barrier + * @s: pointer to seqcount_t + * + * This can be used to provide an ordering guarantee instead of the + * usual consistency guarantee. It is one wmb cheaper, because we can + * collapse the two back-to-back wmb()s. + * + * seqcount_t seq; + * bool X = true, Y = false; + * + * void read(void) + * { + * bool x, y; + * + * do { + * int s = read_seqcount_begin(&seq); + * + * x = X; y = Y; + * + * } while (read_seqcount_retry(&seq, s)); + * + * BUG_ON(!x && !y); + * } + * + * void write(void) + * { + * Y = true; + * + * raw_write_seqcount_barrier(seq); + * + * X = false; + * } + */ +static inline void raw_write_seqcount_barrier(seqcount_t *s) +{ + s->sequence++; + smp_wmb(); + s->sequence++; +} + /* * raw_write_seqcount_latch - redirect readers to even/odd copy * @s: pointer to seqcount_t @@ -266,13 +307,13 @@ static inline void write_seqcount_end(seqcount_t *s) } /** - * write_seqcount_barrier - invalidate in-progress read-side seq operations + * write_seqcount_invalidate - invalidate in-progress read-side seq operations * @s: pointer to seqcount_t * - * After write_seqcount_barrier, no read-side seq operations will complete + * After write_seqcount_invalidate, no read-side seq operations will complete * successfully and see data older than this. */ -static inline void write_seqcount_barrier(seqcount_t *s) +static inline void write_seqcount_invalidate(seqcount_t *s) { smp_wmb(); s->sequence+=2; diff --git a/include/linux/spinlock.h b/include/linux/spinlock.h index 3e18379dfa6f..0063b24b4f36 100644 --- a/include/linux/spinlock.h +++ b/include/linux/spinlock.h @@ -120,7 +120,7 @@ do { \ /* * Despite its name it doesn't necessarily has to be a full barrier. * It should only guarantee that a STORE before the critical section - * can not be reordered with a LOAD inside this section. + * can not be reordered with LOADs and STOREs inside this section. * spin_lock() is the one-way barrier, this LOAD can not escape out * of the region. So the default implementation simply ensures that * a STORE can not move into the critical section, smp_wmb() should diff --git a/include/linux/tick.h b/include/linux/tick.h index f8492da57ad3..4191b5623a28 100644 --- a/include/linux/tick.h +++ b/include/linux/tick.h @@ -134,6 +134,12 @@ static inline bool tick_nohz_full_cpu(int cpu) return cpumask_test_cpu(cpu, tick_nohz_full_mask); } +static inline void tick_nohz_full_add_cpus_to(struct cpumask *mask) +{ + if (tick_nohz_full_enabled()) + cpumask_or(mask, mask, tick_nohz_full_mask); +} + extern void __tick_nohz_full_check(void); extern void tick_nohz_full_kick(void); extern void tick_nohz_full_kick_cpu(int cpu); @@ -142,6 +148,7 @@ extern void __tick_nohz_task_switch(struct task_struct *tsk); #else static inline bool tick_nohz_full_enabled(void) { return false; } static inline bool tick_nohz_full_cpu(int cpu) { return false; } +static inline void tick_nohz_full_add_cpus_to(struct cpumask *mask) { } static inline void __tick_nohz_full_check(void) { } static inline void tick_nohz_full_kick_cpu(int cpu) { } static inline void tick_nohz_full_kick(void) { } diff --git a/include/linux/time64.h b/include/linux/time64.h index a3831478d9cf..77b5df2acd2a 100644 --- a/include/linux/time64.h +++ b/include/linux/time64.h @@ -2,6 +2,7 @@ #define _LINUX_TIME64_H #include <uapi/linux/time.h> +#include <linux/math64.h> typedef __s64 time64_t; @@ -28,6 +29,7 @@ struct timespec64 { #define FSEC_PER_SEC 1000000000000000LL /* Located here for timespec[64]_valid_strict */ +#define TIME64_MAX ((s64)~((u64)1 << 63)) #define KTIME_MAX ((s64)~((u64)1 << 63)) #define KTIME_SEC_MAX (KTIME_MAX / NSEC_PER_SEC) diff --git a/include/linux/timekeeper_internal.h b/include/linux/timekeeper_internal.h index fb86963859c7..25247220b4b7 100644 --- a/include/linux/timekeeper_internal.h +++ b/include/linux/timekeeper_internal.h @@ -49,6 +49,8 @@ struct tk_read_base { * @offs_boot: Offset clock monotonic -> clock boottime * @offs_tai: Offset clock monotonic -> clock tai * @tai_offset: The current UTC to TAI offset in seconds + * @clock_was_set_seq: The sequence number of clock was set events + * @next_leap_ktime: CLOCK_MONOTONIC time value of a pending leap-second * @raw_time: Monotonic raw base time in timespec64 format * @cycle_interval: Number of clock cycles in one NTP interval * @xtime_interval: Number of clock shifted nano seconds in one NTP @@ -60,6 +62,9 @@ struct tk_read_base { * shifted nano seconds. * @ntp_error_shift: Shift conversion between clock shifted nano seconds and * ntp shifted nano seconds. + * @last_warning: Warning ratelimiter (DEBUG_TIMEKEEPING) + * @underflow_seen: Underflow warning flag (DEBUG_TIMEKEEPING) + * @overflow_seen: Overflow warning flag (DEBUG_TIMEKEEPING) * * Note: For timespec(64) based interfaces wall_to_monotonic is what * we need to add to xtime (or xtime corrected for sub jiffie times) @@ -85,6 +90,8 @@ struct timekeeper { ktime_t offs_boot; ktime_t offs_tai; s32 tai_offset; + unsigned int clock_was_set_seq; + ktime_t next_leap_ktime; struct timespec64 raw_time; /* The following members are for timekeeping internal use */ @@ -104,6 +111,18 @@ struct timekeeper { s64 ntp_error; u32 ntp_error_shift; u32 ntp_err_mult; +#ifdef CONFIG_DEBUG_TIMEKEEPING + long last_warning; + /* + * These simple flag variables are managed + * without locks, which is racy, but they are + * ok since we don't really care about being + * super precise about how many events were + * seen, just that a problem was observed. + */ + int underflow_seen; + int overflow_seen; +#endif }; #ifdef CONFIG_GENERIC_TIME_VSYSCALL diff --git a/include/linux/timekeeping.h b/include/linux/timekeeping.h index 99176af216af..3aa72e648650 100644 --- a/include/linux/timekeeping.h +++ b/include/linux/timekeeping.h @@ -163,6 +163,7 @@ extern ktime_t ktime_get(void); extern ktime_t ktime_get_with_offset(enum tk_offsets offs); extern ktime_t ktime_mono_to_any(ktime_t tmono, enum tk_offsets offs); extern ktime_t ktime_get_raw(void); +extern u32 ktime_get_resolution_ns(void); /** * ktime_get_real - get the real (wall-) time in ktime_t format @@ -266,7 +267,6 @@ extern int persistent_clock_is_local; extern void read_persistent_clock(struct timespec *ts); extern void read_persistent_clock64(struct timespec64 *ts); -extern void read_boot_clock(struct timespec *ts); extern void read_boot_clock64(struct timespec64 *ts); extern int update_persistent_clock(struct timespec now); extern int update_persistent_clock64(struct timespec64 now); diff --git a/include/linux/timer.h b/include/linux/timer.h index 8c5a197e1587..61aa61dc410c 100644 --- a/include/linux/timer.h +++ b/include/linux/timer.h @@ -14,27 +14,23 @@ struct timer_list { * All fields that change during normal runtime grouped to the * same cacheline */ - struct list_head entry; - unsigned long expires; - struct tvec_base *base; - - void (*function)(unsigned long); - unsigned long data; - - int slack; + struct hlist_node entry; + unsigned long expires; + void (*function)(unsigned long); + unsigned long data; + u32 flags; + int slack; #ifdef CONFIG_TIMER_STATS - int start_pid; - void *start_site; - char start_comm[16]; + int start_pid; + void *start_site; + char start_comm[16]; #endif #ifdef CONFIG_LOCKDEP - struct lockdep_map lockdep_map; + struct lockdep_map lockdep_map; #endif }; -extern struct tvec_base boot_tvec_bases; - #ifdef CONFIG_LOCKDEP /* * NB: because we have to copy the lockdep_map, setting the lockdep_map key @@ -49,9 +45,6 @@ extern struct tvec_base boot_tvec_bases; #endif /* - * Note that all tvec_bases are at least 4 byte aligned and lower two bits - * of base in timer_list is guaranteed to be zero. Use them for flags. - * * A deferrable timer will work normally when the system is busy, but * will not cause a CPU to come out of idle just to service it; instead, * the timer will be serviced when the CPU eventually wakes up with a @@ -65,17 +58,18 @@ extern struct tvec_base boot_tvec_bases; * workqueue locking issues. It's not meant for executing random crap * with interrupts disabled. Abuse is monitored! */ -#define TIMER_DEFERRABLE 0x1LU -#define TIMER_IRQSAFE 0x2LU - -#define TIMER_FLAG_MASK 0x3LU +#define TIMER_CPUMASK 0x0007FFFF +#define TIMER_MIGRATING 0x00080000 +#define TIMER_BASEMASK (TIMER_CPUMASK | TIMER_MIGRATING) +#define TIMER_DEFERRABLE 0x00100000 +#define TIMER_IRQSAFE 0x00200000 #define __TIMER_INITIALIZER(_function, _expires, _data, _flags) { \ - .entry = { .prev = TIMER_ENTRY_STATIC }, \ + .entry = { .next = TIMER_ENTRY_STATIC }, \ .function = (_function), \ .expires = (_expires), \ .data = (_data), \ - .base = (void *)((unsigned long)&boot_tvec_bases + (_flags)), \ + .flags = (_flags), \ .slack = -1, \ __TIMER_LOCKDEP_MAP_INITIALIZER( \ __FILE__ ":" __stringify(__LINE__)) \ @@ -168,7 +162,7 @@ static inline void init_timer_on_stack_key(struct timer_list *timer, */ static inline int timer_pending(const struct timer_list * timer) { - return timer->entry.next != NULL; + return timer->entry.pprev != NULL; } extern void add_timer_on(struct timer_list *timer, int cpu); @@ -188,26 +182,16 @@ extern void set_timer_slack(struct timer_list *time, int slack_hz); #define NEXT_TIMER_MAX_DELTA ((1UL << 30) - 1) /* - * Return when the next timer-wheel timeout occurs (in absolute jiffies), - * locks the timer base and does the comparison against the given - * jiffie. - */ -extern unsigned long get_next_timer_interrupt(unsigned long now); - -/* * Timer-statistics info: */ #ifdef CONFIG_TIMER_STATS extern int timer_stats_active; -#define TIMER_STATS_FLAG_DEFERRABLE 0x1 - extern void init_timer_stats(void); extern void timer_stats_update_stats(void *timer, pid_t pid, void *startf, - void *timerf, char *comm, - unsigned int timer_flag); + void *timerf, char *comm, u32 flags); extern void __timer_stats_timer_set_start_info(struct timer_list *timer, void *addr); @@ -254,6 +238,15 @@ extern void run_local_timers(void); struct hrtimer; extern enum hrtimer_restart it_real_fn(struct hrtimer *); +#if defined(CONFIG_SMP) && defined(CONFIG_NO_HZ_COMMON) +#include <linux/sysctl.h> + +extern unsigned int sysctl_timer_migration; +int timer_migration_handler(struct ctl_table *table, int write, + void __user *buffer, size_t *lenp, + loff_t *ppos); +#endif + unsigned long __round_jiffies(unsigned long j, int cpu); unsigned long __round_jiffies_relative(unsigned long j, int cpu); unsigned long round_jiffies(unsigned long j); diff --git a/include/linux/timerqueue.h b/include/linux/timerqueue.h index a520fd70a59f..7eec17ad7fa1 100644 --- a/include/linux/timerqueue.h +++ b/include/linux/timerqueue.h @@ -16,10 +16,10 @@ struct timerqueue_head { }; -extern void timerqueue_add(struct timerqueue_head *head, - struct timerqueue_node *node); -extern void timerqueue_del(struct timerqueue_head *head, - struct timerqueue_node *node); +extern bool timerqueue_add(struct timerqueue_head *head, + struct timerqueue_node *node); +extern bool timerqueue_del(struct timerqueue_head *head, + struct timerqueue_node *node); extern struct timerqueue_node *timerqueue_iterate_next( struct timerqueue_node *node); diff --git a/include/linux/topology.h b/include/linux/topology.h index 909b6e43b694..73ddad1e0fa3 100644 --- a/include/linux/topology.h +++ b/include/linux/topology.h @@ -191,8 +191,8 @@ static inline int cpu_to_mem(int cpu) #ifndef topology_core_id #define topology_core_id(cpu) ((void)(cpu), 0) #endif -#ifndef topology_thread_cpumask -#define topology_thread_cpumask(cpu) cpumask_of(cpu) +#ifndef topology_sibling_cpumask +#define topology_sibling_cpumask(cpu) cpumask_of(cpu) #endif #ifndef topology_core_cpumask #define topology_core_cpumask(cpu) cpumask_of(cpu) @@ -201,7 +201,7 @@ static inline int cpu_to_mem(int cpu) #ifdef CONFIG_SCHED_SMT static inline const struct cpumask *cpu_smt_mask(int cpu) { - return topology_thread_cpumask(cpu); + return topology_sibling_cpumask(cpu); } #endif diff --git a/include/linux/uaccess.h b/include/linux/uaccess.h index ecd3319dac33..ae572c138607 100644 --- a/include/linux/uaccess.h +++ b/include/linux/uaccess.h @@ -1,21 +1,30 @@ #ifndef __LINUX_UACCESS_H__ #define __LINUX_UACCESS_H__ -#include <linux/preempt.h> +#include <linux/sched.h> #include <asm/uaccess.h> +static __always_inline void pagefault_disabled_inc(void) +{ + current->pagefault_disabled++; +} + +static __always_inline void pagefault_disabled_dec(void) +{ + current->pagefault_disabled--; + WARN_ON(current->pagefault_disabled < 0); +} + /* - * These routines enable/disable the pagefault handler in that - * it will not take any locks and go straight to the fixup table. + * These routines enable/disable the pagefault handler. If disabled, it will + * not take any locks and go straight to the fixup table. * - * They have great resemblance to the preempt_disable/enable calls - * and in fact they are identical; this is because currently there is - * no other way to make the pagefault handlers do this. So we do - * disable preemption but we don't necessarily care about that. + * User access methods will not sleep when called from a pagefault_disabled() + * environment. */ static inline void pagefault_disable(void) { - preempt_count_inc(); + pagefault_disabled_inc(); /* * make sure to have issued the store before a pagefault * can hit. @@ -25,18 +34,31 @@ static inline void pagefault_disable(void) static inline void pagefault_enable(void) { -#ifndef CONFIG_PREEMPT /* * make sure to issue those last loads/stores before enabling * the pagefault handler again. */ barrier(); - preempt_count_dec(); -#else - preempt_enable(); -#endif + pagefault_disabled_dec(); } +/* + * Is the pagefault handler disabled? If so, user access methods will not sleep. + */ +#define pagefault_disabled() (current->pagefault_disabled != 0) + +/* + * The pagefault handler is in general disabled by pagefault_disable() or + * when in irq context (via in_atomic()). + * + * This function should only be used by the fault handlers. Other users should + * stick to pagefault_disabled(). + * Please NEVER use preempt_disable() to disable the fault handler. With + * !CONFIG_PREEMPT_COUNT, this is like a NOP. So the handler won't be disabled. + * in_atomic() will report different values based on !CONFIG_PREEMPT_COUNT. + */ +#define faulthandler_disabled() (pagefault_disabled() || in_atomic()) + #ifndef ARCH_HAS_NOCACHE_UACCESS static inline unsigned long __copy_from_user_inatomic_nocache(void *to, diff --git a/include/linux/wait.h b/include/linux/wait.h index 2db83349865b..d69ac4ecc88b 100644 --- a/include/linux/wait.h +++ b/include/linux/wait.h @@ -969,7 +969,7 @@ extern int bit_wait_io_timeout(struct wait_bit_key *); * on that signal. */ static inline int -wait_on_bit(void *word, int bit, unsigned mode) +wait_on_bit(unsigned long *word, int bit, unsigned mode) { might_sleep(); if (!test_bit(bit, word)) @@ -994,7 +994,7 @@ wait_on_bit(void *word, int bit, unsigned mode) * on that signal. */ static inline int -wait_on_bit_io(void *word, int bit, unsigned mode) +wait_on_bit_io(unsigned long *word, int bit, unsigned mode) { might_sleep(); if (!test_bit(bit, word)) @@ -1020,7 +1020,8 @@ wait_on_bit_io(void *word, int bit, unsigned mode) * received a signal and the mode permitted wakeup on that signal. */ static inline int -wait_on_bit_timeout(void *word, int bit, unsigned mode, unsigned long timeout) +wait_on_bit_timeout(unsigned long *word, int bit, unsigned mode, + unsigned long timeout) { might_sleep(); if (!test_bit(bit, word)) @@ -1047,7 +1048,8 @@ wait_on_bit_timeout(void *word, int bit, unsigned mode, unsigned long timeout) * on that signal. */ static inline int -wait_on_bit_action(void *word, int bit, wait_bit_action_f *action, unsigned mode) +wait_on_bit_action(unsigned long *word, int bit, wait_bit_action_f *action, + unsigned mode) { might_sleep(); if (!test_bit(bit, word)) @@ -1075,7 +1077,7 @@ wait_on_bit_action(void *word, int bit, wait_bit_action_f *action, unsigned mode * the @mode allows that signal to wake the process. */ static inline int -wait_on_bit_lock(void *word, int bit, unsigned mode) +wait_on_bit_lock(unsigned long *word, int bit, unsigned mode) { might_sleep(); if (!test_and_set_bit(bit, word)) @@ -1099,7 +1101,7 @@ wait_on_bit_lock(void *word, int bit, unsigned mode) * the @mode allows that signal to wake the process. */ static inline int -wait_on_bit_lock_io(void *word, int bit, unsigned mode) +wait_on_bit_lock_io(unsigned long *word, int bit, unsigned mode) { might_sleep(); if (!test_and_set_bit(bit, word)) @@ -1125,7 +1127,8 @@ wait_on_bit_lock_io(void *word, int bit, unsigned mode) * the @mode allows that signal to wake the process. */ static inline int -wait_on_bit_lock_action(void *word, int bit, wait_bit_action_f *action, unsigned mode) +wait_on_bit_lock_action(unsigned long *word, int bit, wait_bit_action_f *action, + unsigned mode) { might_sleep(); if (!test_and_set_bit(bit, word)) diff --git a/include/net/inet_connection_sock.h b/include/net/inet_connection_sock.h index 497bc14cdb85..0320bbb7d7b5 100644 --- a/include/net/inet_connection_sock.h +++ b/include/net/inet_connection_sock.h @@ -98,7 +98,8 @@ struct inet_connection_sock { const struct tcp_congestion_ops *icsk_ca_ops; const struct inet_connection_sock_af_ops *icsk_af_ops; unsigned int (*icsk_sync_mss)(struct sock *sk, u32 pmtu); - __u8 icsk_ca_state:7, + __u8 icsk_ca_state:6, + icsk_ca_setsockopt:1, icsk_ca_dst_locked:1; __u8 icsk_retransmits; __u8 icsk_pending; diff --git a/include/net/mac80211.h b/include/net/mac80211.h index 8e3668b44c29..fc57f6b82fc5 100644 --- a/include/net/mac80211.h +++ b/include/net/mac80211.h @@ -354,7 +354,7 @@ enum ieee80211_rssi_event_data { }; /** - * enum ieee80211_rssi_event - data attached to an %RSSI_EVENT + * struct ieee80211_rssi_event - data attached to an %RSSI_EVENT * @data: See &enum ieee80211_rssi_event_data */ struct ieee80211_rssi_event { @@ -388,7 +388,7 @@ enum ieee80211_mlme_event_status { }; /** - * enum ieee80211_mlme_event - data attached to an %MLME_EVENT + * struct ieee80211_mlme_event - data attached to an %MLME_EVENT * @data: See &enum ieee80211_mlme_event_data * @status: See &enum ieee80211_mlme_event_status * @reason: the reason code if applicable @@ -401,9 +401,10 @@ struct ieee80211_mlme_event { /** * struct ieee80211_event - event to be sent to the driver - * @type The event itself. See &enum ieee80211_event_type. + * @type: The event itself. See &enum ieee80211_event_type. * @rssi: relevant if &type is %RSSI_EVENT * @mlme: relevant if &type is %AUTH_EVENT + * @u: union holding the above two fields */ struct ieee80211_event { enum ieee80211_event_type type; diff --git a/include/net/sctp/sctp.h b/include/net/sctp/sctp.h index c56a438c3a1e..ce13cf20f625 100644 --- a/include/net/sctp/sctp.h +++ b/include/net/sctp/sctp.h @@ -574,11 +574,14 @@ static inline void sctp_v6_map_v4(union sctp_addr *addr) /* Map v4 address to v4-mapped v6 address */ static inline void sctp_v4_map_v6(union sctp_addr *addr) { + __be16 port; + + port = addr->v4.sin_port; + addr->v6.sin6_addr.s6_addr32[3] = addr->v4.sin_addr.s_addr; + addr->v6.sin6_port = port; addr->v6.sin6_family = AF_INET6; addr->v6.sin6_flowinfo = 0; addr->v6.sin6_scope_id = 0; - addr->v6.sin6_port = addr->v4.sin_port; - addr->v6.sin6_addr.s6_addr32[3] = addr->v4.sin_addr.s_addr; addr->v6.sin6_addr.s6_addr32[0] = 0; addr->v6.sin6_addr.s6_addr32[1] = 0; addr->v6.sin6_addr.s6_addr32[2] = htonl(0x0000ffff); diff --git a/include/sound/hda_regmap.h b/include/sound/hda_regmap.h index 53a18b3635e2..df705908480a 100644 --- a/include/sound/hda_regmap.h +++ b/include/sound/hda_regmap.h @@ -9,6 +9,8 @@ #include <sound/core.h> #include <sound/hdaudio.h> +#define AC_AMP_FAKE_MUTE 0x10 /* fake mute bit set to amp verbs */ + int snd_hdac_regmap_init(struct hdac_device *codec); void snd_hdac_regmap_exit(struct hdac_device *codec); int snd_hdac_regmap_add_vendor_verb(struct hdac_device *codec, diff --git a/include/target/target_core_backend.h b/include/target/target_core_backend.h index d61be7297b2c..5f1225706993 100644 --- a/include/target/target_core_backend.h +++ b/include/target/target_core_backend.h @@ -1,9 +1,7 @@ #ifndef TARGET_CORE_BACKEND_H #define TARGET_CORE_BACKEND_H -#define TRANSPORT_PLUGIN_PHBA_PDEV 1 -#define TRANSPORT_PLUGIN_VHBA_PDEV 2 -#define TRANSPORT_PLUGIN_VHBA_VDEV 3 +#define TRANSPORT_FLAG_PASSTHROUGH 1 struct target_backend_cits { struct config_item_type tb_dev_cit; @@ -22,7 +20,7 @@ struct se_subsystem_api { char inquiry_rev[4]; struct module *owner; - u8 transport_type; + u8 transport_flags; int (*attach_hba)(struct se_hba *, u32); void (*detach_hba)(struct se_hba *); @@ -138,5 +136,7 @@ int se_dev_set_queue_depth(struct se_device *, u32); int se_dev_set_max_sectors(struct se_device *, u32); int se_dev_set_optimal_sectors(struct se_device *, u32); int se_dev_set_block_size(struct se_device *, u32); +sense_reason_t passthrough_parse_cdb(struct se_cmd *cmd, + sense_reason_t (*exec_cmd)(struct se_cmd *cmd)); #endif /* TARGET_CORE_BACKEND_H */ diff --git a/include/target/target_core_configfs.h b/include/target/target_core_configfs.h index 25bb04c4209e..b99c01170392 100644 --- a/include/target/target_core_configfs.h +++ b/include/target/target_core_configfs.h @@ -40,8 +40,6 @@ struct target_fabric_configfs { struct config_item *tf_fabric; /* Passed from fabric modules */ struct config_item_type *tf_fabric_cit; - /* Pointer to target core subsystem */ - struct configfs_subsystem *tf_subsys; /* Pointer to fabric's struct module */ struct module *tf_module; struct target_core_fabric_ops tf_ops; diff --git a/include/target/target_core_fabric.h b/include/target/target_core_fabric.h index 17c7f5ac7ea0..0f4dc3768587 100644 --- a/include/target/target_core_fabric.h +++ b/include/target/target_core_fabric.h @@ -4,7 +4,6 @@ struct target_core_fabric_ops { struct module *module; const char *name; - struct configfs_subsystem *tf_subsys; char *(*get_fabric_name)(void); u8 (*get_fabric_proto_ident)(struct se_portal_group *); char *(*tpg_get_wwn)(struct se_portal_group *); @@ -109,6 +108,9 @@ struct target_core_fabric_ops { int target_register_template(const struct target_core_fabric_ops *fo); void target_unregister_template(const struct target_core_fabric_ops *fo); +int target_depend_item(struct config_item *item); +void target_undepend_item(struct config_item *item); + struct se_session *transport_init_session(enum target_prot_op); int transport_alloc_session_tags(struct se_session *, unsigned int, unsigned int); diff --git a/include/trace/events/kmem.h b/include/trace/events/kmem.h index 81ea59812117..f7554fd7fc62 100644 --- a/include/trace/events/kmem.h +++ b/include/trace/events/kmem.h @@ -140,19 +140,42 @@ DEFINE_EVENT(kmem_free, kfree, TP_ARGS(call_site, ptr) ); -DEFINE_EVENT(kmem_free, kmem_cache_free, +DEFINE_EVENT_CONDITION(kmem_free, kmem_cache_free, TP_PROTO(unsigned long call_site, const void *ptr), - TP_ARGS(call_site, ptr) + TP_ARGS(call_site, ptr), + + /* + * This trace can be potentially called from an offlined cpu. + * Since trace points use RCU and RCU should not be used from + * offline cpus, filter such calls out. + * While this trace can be called from a preemptable section, + * it has no impact on the condition since tasks can migrate + * only from online cpus to other online cpus. Thus its safe + * to use raw_smp_processor_id. + */ + TP_CONDITION(cpu_online(raw_smp_processor_id())) ); -TRACE_EVENT(mm_page_free, +TRACE_EVENT_CONDITION(mm_page_free, TP_PROTO(struct page *page, unsigned int order), TP_ARGS(page, order), + + /* + * This trace can be potentially called from an offlined cpu. + * Since trace points use RCU and RCU should not be used from + * offline cpus, filter such calls out. + * While this trace can be called from a preemptable section, + * it has no impact on the condition since tasks can migrate + * only from online cpus to other online cpus. Thus its safe + * to use raw_smp_processor_id. + */ + TP_CONDITION(cpu_online(raw_smp_processor_id())), + TP_STRUCT__entry( __field( unsigned long, pfn ) __field( unsigned int, order ) @@ -253,12 +276,35 @@ DEFINE_EVENT(mm_page, mm_page_alloc_zone_locked, TP_ARGS(page, order, migratetype) ); -DEFINE_EVENT_PRINT(mm_page, mm_page_pcpu_drain, +TRACE_EVENT_CONDITION(mm_page_pcpu_drain, TP_PROTO(struct page *page, unsigned int order, int migratetype), TP_ARGS(page, order, migratetype), + /* + * This trace can be potentially called from an offlined cpu. + * Since trace points use RCU and RCU should not be used from + * offline cpus, filter such calls out. + * While this trace can be called from a preemptable section, + * it has no impact on the condition since tasks can migrate + * only from online cpus to other online cpus. Thus its safe + * to use raw_smp_processor_id. + */ + TP_CONDITION(cpu_online(raw_smp_processor_id())), + + TP_STRUCT__entry( + __field( unsigned long, pfn ) + __field( unsigned int, order ) + __field( int, migratetype ) + ), + + TP_fast_assign( + __entry->pfn = page ? page_to_pfn(page) : -1UL; + __entry->order = order; + __entry->migratetype = migratetype; + ), + TP_printk("page=%p pfn=%lu order=%d migratetype=%d", pfn_to_page(__entry->pfn), __entry->pfn, __entry->order, __entry->migratetype) diff --git a/include/trace/events/sched.h b/include/trace/events/sched.h index 30fedaf3e56a..d57a575fe31f 100644 --- a/include/trace/events/sched.h +++ b/include/trace/events/sched.h @@ -147,7 +147,8 @@ TRACE_EVENT(sched_switch, __print_flags(__entry->prev_state & (TASK_STATE_MAX-1), "|", { 1, "S"} , { 2, "D" }, { 4, "T" }, { 8, "t" }, { 16, "Z" }, { 32, "X" }, { 64, "x" }, - { 128, "K" }, { 256, "W" }, { 512, "P" }) : "R", + { 128, "K" }, { 256, "W" }, { 512, "P" }, + { 1024, "N" }) : "R", __entry->prev_state & TASK_STATE_MAX ? "+" : "", __entry->next_comm, __entry->next_pid, __entry->next_prio) ); diff --git a/include/trace/events/timer.h b/include/trace/events/timer.h index 68c2c2000f02..073b9ac245ba 100644 --- a/include/trace/events/timer.h +++ b/include/trace/events/timer.h @@ -43,15 +43,18 @@ DEFINE_EVENT(timer_class, timer_init, */ TRACE_EVENT(timer_start, - TP_PROTO(struct timer_list *timer, unsigned long expires), + TP_PROTO(struct timer_list *timer, + unsigned long expires, + unsigned int flags), - TP_ARGS(timer, expires), + TP_ARGS(timer, expires, flags), TP_STRUCT__entry( __field( void *, timer ) __field( void *, function ) __field( unsigned long, expires ) __field( unsigned long, now ) + __field( unsigned int, flags ) ), TP_fast_assign( @@ -59,11 +62,12 @@ TRACE_EVENT(timer_start, __entry->function = timer->function; __entry->expires = expires; __entry->now = jiffies; + __entry->flags = flags; ), - TP_printk("timer=%p function=%pf expires=%lu [timeout=%ld]", + TP_printk("timer=%p function=%pf expires=%lu [timeout=%ld] flags=0x%08x", __entry->timer, __entry->function, __entry->expires, - (long)__entry->expires - __entry->now) + (long)__entry->expires - __entry->now, __entry->flags) ); /** diff --git a/include/trace/events/writeback.h b/include/trace/events/writeback.h index 880dd7437172..c178d13d6f4c 100644 --- a/include/trace/events/writeback.h +++ b/include/trace/events/writeback.h @@ -250,7 +250,6 @@ DEFINE_EVENT(writeback_class, name, \ DEFINE_WRITEBACK_EVENT(writeback_nowork); DEFINE_WRITEBACK_EVENT(writeback_wake_background); DEFINE_WRITEBACK_EVENT(writeback_bdi_register); -DEFINE_WRITEBACK_EVENT(writeback_bdi_unregister); DECLARE_EVENT_CLASS(wbc_class, TP_PROTO(struct writeback_control *wbc, struct backing_dev_info *bdi), diff --git a/include/uapi/drm/radeon_drm.h b/include/uapi/drm/radeon_drm.h index 871e73f99a4d..94d44ab2fda1 100644 --- a/include/uapi/drm/radeon_drm.h +++ b/include/uapi/drm/radeon_drm.h @@ -1038,6 +1038,7 @@ struct drm_radeon_cs { #define RADEON_INFO_CURRENT_GPU_SCLK 0x22 #define RADEON_INFO_CURRENT_GPU_MCLK 0x23 #define RADEON_INFO_READ_REG 0x24 +#define RADEON_INFO_VA_UNMAP_WORKING 0x25 struct drm_radeon_info { uint32_t request; diff --git a/include/uapi/linux/perf_event.h b/include/uapi/linux/perf_event.h index 309211b3eb67..d97f84c080da 100644 --- a/include/uapi/linux/perf_event.h +++ b/include/uapi/linux/perf_event.h @@ -167,6 +167,7 @@ enum perf_branch_sample_type_shift { PERF_SAMPLE_BRANCH_COND_SHIFT = 10, /* conditional branches */ PERF_SAMPLE_BRANCH_CALL_STACK_SHIFT = 11, /* call/ret stack */ + PERF_SAMPLE_BRANCH_IND_JUMP_SHIFT = 12, /* indirect jumps */ PERF_SAMPLE_BRANCH_MAX_SHIFT /* non-ABI */ }; @@ -186,6 +187,7 @@ enum perf_branch_sample_type { PERF_SAMPLE_BRANCH_COND = 1U << PERF_SAMPLE_BRANCH_COND_SHIFT, PERF_SAMPLE_BRANCH_CALL_STACK = 1U << PERF_SAMPLE_BRANCH_CALL_STACK_SHIFT, + PERF_SAMPLE_BRANCH_IND_JUMP = 1U << PERF_SAMPLE_BRANCH_IND_JUMP_SHIFT, PERF_SAMPLE_BRANCH_MAX = 1U << PERF_SAMPLE_BRANCH_MAX_SHIFT, }; @@ -564,6 +566,10 @@ struct perf_event_mmap_page { #define PERF_RECORD_MISC_GUEST_USER (5 << 0) /* + * Indicates that /proc/PID/maps parsing are truncated by time out. + */ +#define PERF_RECORD_MISC_PROC_MAP_PARSE_TIMEOUT (1 << 12) +/* * PERF_RECORD_MISC_MMAP_DATA and PERF_RECORD_MISC_COMM_EXEC are used on * different events so can reuse the same bit position. */ @@ -800,6 +806,18 @@ enum perf_event_type { */ PERF_RECORD_ITRACE_START = 12, + /* + * Records the dropped/lost sample number. + * + * struct { + * struct perf_event_header header; + * + * u64 lost; + * struct sample_id sample_id; + * }; + */ + PERF_RECORD_LOST_SAMPLES = 13, + PERF_RECORD_MAX, /* non-ABI */ }; diff --git a/include/uapi/linux/virtio_balloon.h b/include/uapi/linux/virtio_balloon.h index 984169a819ee..d7f1cbc3766c 100644 --- a/include/uapi/linux/virtio_balloon.h +++ b/include/uapi/linux/virtio_balloon.h @@ -26,6 +26,7 @@ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. */ #include <linux/types.h> +#include <linux/virtio_types.h> #include <linux/virtio_ids.h> #include <linux/virtio_config.h> |