diff options
Diffstat (limited to 'include/linux')
213 files changed, 4836 insertions, 2320 deletions
diff --git a/include/linux/acpi.h b/include/linux/acpi.h index d661cd0ee64d..1e4cdc6c7ae2 100644 --- a/include/linux/acpi.h +++ b/include/linux/acpi.h @@ -905,6 +905,13 @@ static inline int acpi_dma_configure(struct device *dev, return 0; } +static inline int acpi_dma_configure_id(struct device *dev, + enum dev_dma_attr attr, + const u32 *input_id) +{ + return 0; +} + #define ACPI_PTR(_ptr) (NULL) static inline void acpi_device_set_enumerated(struct acpi_device *adev) @@ -1143,16 +1150,27 @@ struct acpi_probe_entry { kernel_ulong_t driver_data; }; -#define ACPI_DECLARE_PROBE_ENTRY(table, name, table_id, subtable, valid, data, fn) \ +#define ACPI_DECLARE_PROBE_ENTRY(table, name, table_id, subtable, \ + valid, data, fn) \ + static const struct acpi_probe_entry __acpi_probe_##name \ + __used __section(__##table##_acpi_probe_table) = { \ + .id = table_id, \ + .type = subtable, \ + .subtable_valid = valid, \ + .probe_table = fn, \ + .driver_data = data, \ + } + +#define ACPI_DECLARE_SUBTABLE_PROBE_ENTRY(table, name, table_id, \ + subtable, valid, data, fn) \ static const struct acpi_probe_entry __acpi_probe_##name \ - __used __section(__##table##_acpi_probe_table) \ - = { \ + __used __section(__##table##_acpi_probe_table) = { \ .id = table_id, \ .type = subtable, \ .subtable_valid = valid, \ - .probe_table = (acpi_tbl_table_handler)fn, \ - .driver_data = data, \ - } + .probe_subtbl = fn, \ + .driver_data = data, \ + } #define ACPI_PROBE_TABLE(name) __##name##_acpi_probe_table #define ACPI_PROBE_TABLE_END(name) __##name##_acpi_probe_table_end diff --git a/include/linux/acpi_iort.h b/include/linux/acpi_iort.h index 8e7e2ec37f1b..20a32120bb88 100644 --- a/include/linux/acpi_iort.h +++ b/include/linux/acpi_iort.h @@ -28,27 +28,29 @@ void iort_deregister_domain_token(int trans_id); struct fwnode_handle *iort_find_domain_token(int trans_id); #ifdef CONFIG_ACPI_IORT void acpi_iort_init(void); -u32 iort_msi_map_rid(struct device *dev, u32 req_id); -struct irq_domain *iort_get_device_domain(struct device *dev, u32 req_id); +u32 iort_msi_map_id(struct device *dev, u32 id); +struct irq_domain *iort_get_device_domain(struct device *dev, u32 id, + enum irq_domain_bus_token bus_token); void acpi_configure_pmsi_domain(struct device *dev); int iort_pmsi_get_dev_id(struct device *dev, u32 *dev_id); /* IOMMU interface */ void iort_dma_setup(struct device *dev, u64 *dma_addr, u64 *size); -const struct iommu_ops *iort_iommu_configure(struct device *dev); +const struct iommu_ops *iort_iommu_configure_id(struct device *dev, + const u32 *id_in); int iort_iommu_msi_get_resv_regions(struct device *dev, struct list_head *head); #else static inline void acpi_iort_init(void) { } -static inline u32 iort_msi_map_rid(struct device *dev, u32 req_id) -{ return req_id; } -static inline struct irq_domain *iort_get_device_domain(struct device *dev, - u32 req_id) +static inline u32 iort_msi_map_id(struct device *dev, u32 id) +{ return id; } +static inline struct irq_domain *iort_get_device_domain( + struct device *dev, u32 id, enum irq_domain_bus_token bus_token) { return NULL; } static inline void acpi_configure_pmsi_domain(struct device *dev) { } /* IOMMU interface */ static inline void iort_dma_setup(struct device *dev, u64 *dma_addr, u64 *size) { } -static inline const struct iommu_ops *iort_iommu_configure( - struct device *dev) +static inline const struct iommu_ops *iort_iommu_configure_id( + struct device *dev, const u32 *id_in) { return NULL; } static inline int iort_iommu_msi_get_resv_regions(struct device *dev, struct list_head *head) diff --git a/include/linux/arch_topology.h b/include/linux/arch_topology.h index 0566cb3314ef..69b1dabe39dc 100644 --- a/include/linux/arch_topology.h +++ b/include/linux/arch_topology.h @@ -39,8 +39,8 @@ static inline unsigned long topology_get_thermal_pressure(int cpu) return per_cpu(thermal_pressure, cpu); } -void arch_set_thermal_pressure(struct cpumask *cpus, - unsigned long th_pressure); +void topology_set_thermal_pressure(const struct cpumask *cpus, + unsigned long th_pressure); struct cpu_topology { int thread_id; diff --git a/include/linux/arm-smccc.h b/include/linux/arm-smccc.h index 56d6a5c6e353..15c706fb0a37 100644 --- a/include/linux/arm-smccc.h +++ b/include/linux/arm-smccc.h @@ -71,6 +71,11 @@ ARM_SMCCC_SMC_32, \ 0, 1) +#define ARM_SMCCC_ARCH_SOC_ID \ + ARM_SMCCC_CALL_VAL(ARM_SMCCC_FAST_CALL, \ + ARM_SMCCC_SMC_32, \ + 0, 2) + #define ARM_SMCCC_ARCH_WORKAROUND_1 \ ARM_SMCCC_CALL_VAL(ARM_SMCCC_FAST_CALL, \ ARM_SMCCC_SMC_32, \ @@ -81,6 +86,28 @@ ARM_SMCCC_SMC_32, \ 0, 0x7fff) +/* Paravirtualised time calls (defined by ARM DEN0057A) */ +#define ARM_SMCCC_HV_PV_TIME_FEATURES \ + ARM_SMCCC_CALL_VAL(ARM_SMCCC_FAST_CALL, \ + ARM_SMCCC_SMC_64, \ + ARM_SMCCC_OWNER_STANDARD_HYP, \ + 0x20) + +#define ARM_SMCCC_HV_PV_TIME_ST \ + ARM_SMCCC_CALL_VAL(ARM_SMCCC_FAST_CALL, \ + ARM_SMCCC_SMC_64, \ + ARM_SMCCC_OWNER_STANDARD_HYP, \ + 0x21) + +/* + * Return codes defined in ARM DEN 0070A + * ARM DEN 0070A is now merged/consolidated into ARM DEN 0028 C + */ +#define SMCCC_RET_SUCCESS 0 +#define SMCCC_RET_NOT_SUPPORTED -1 +#define SMCCC_RET_NOT_REQUIRED -2 +#define SMCCC_RET_INVALID_PARAMETER -3 + #ifndef __ASSEMBLY__ #include <linux/linkage.h> @@ -332,15 +359,6 @@ asmlinkage void __arm_smccc_hvc(unsigned long a0, unsigned long a1, #define arm_smccc_1_1_hvc(...) __arm_smccc_1_1(SMCCC_HVC_INST, __VA_ARGS__) /* - * Return codes defined in ARM DEN 0070A - * ARM DEN 0070A is now merged/consolidated into ARM DEN 0028 C - */ -#define SMCCC_RET_SUCCESS 0 -#define SMCCC_RET_NOT_SUPPORTED -1 -#define SMCCC_RET_NOT_REQUIRED -2 -#define SMCCC_RET_INVALID_PARAMETER -3 - -/* * Like arm_smccc_1_1* but always returns SMCCC_RET_NOT_SUPPORTED. * Used when the SMCCC conduit is not defined. The empty asm statement * avoids compiler warnings about unused variables. @@ -385,18 +403,5 @@ asmlinkage void __arm_smccc_hvc(unsigned long a0, unsigned long a1, method; \ }) -/* Paravirtualised time calls (defined by ARM DEN0057A) */ -#define ARM_SMCCC_HV_PV_TIME_FEATURES \ - ARM_SMCCC_CALL_VAL(ARM_SMCCC_FAST_CALL, \ - ARM_SMCCC_SMC_64, \ - ARM_SMCCC_OWNER_STANDARD_HYP, \ - 0x20) - -#define ARM_SMCCC_HV_PV_TIME_ST \ - ARM_SMCCC_CALL_VAL(ARM_SMCCC_FAST_CALL, \ - ARM_SMCCC_SMC_64, \ - ARM_SMCCC_OWNER_STANDARD_HYP, \ - 0x21) - #endif /*__ASSEMBLY__*/ #endif /*__LINUX_ARM_SMCCC_H*/ diff --git a/include/linux/atomic-fallback.h b/include/linux/atomic-fallback.h index 2c4927bf7b8d..fd525c71d676 100644 --- a/include/linux/atomic-fallback.h +++ b/include/linux/atomic-fallback.h @@ -77,6 +77,9 @@ #endif /* cmpxchg64_relaxed */ +#define arch_atomic_read atomic_read +#define arch_atomic_read_acquire atomic_read_acquire + #ifndef atomic_read_acquire static __always_inline int atomic_read_acquire(const atomic_t *v) @@ -86,6 +89,9 @@ atomic_read_acquire(const atomic_t *v) #define atomic_read_acquire atomic_read_acquire #endif +#define arch_atomic_set atomic_set +#define arch_atomic_set_release atomic_set_release + #ifndef atomic_set_release static __always_inline void atomic_set_release(atomic_t *v, int i) @@ -95,6 +101,13 @@ atomic_set_release(atomic_t *v, int i) #define atomic_set_release atomic_set_release #endif +#define arch_atomic_add atomic_add + +#define arch_atomic_add_return atomic_add_return +#define arch_atomic_add_return_acquire atomic_add_return_acquire +#define arch_atomic_add_return_release atomic_add_return_release +#define arch_atomic_add_return_relaxed atomic_add_return_relaxed + #ifndef atomic_add_return_relaxed #define atomic_add_return_acquire atomic_add_return #define atomic_add_return_release atomic_add_return @@ -137,6 +150,11 @@ atomic_add_return(int i, atomic_t *v) #endif /* atomic_add_return_relaxed */ +#define arch_atomic_fetch_add atomic_fetch_add +#define arch_atomic_fetch_add_acquire atomic_fetch_add_acquire +#define arch_atomic_fetch_add_release atomic_fetch_add_release +#define arch_atomic_fetch_add_relaxed atomic_fetch_add_relaxed + #ifndef atomic_fetch_add_relaxed #define atomic_fetch_add_acquire atomic_fetch_add #define atomic_fetch_add_release atomic_fetch_add @@ -179,6 +197,13 @@ atomic_fetch_add(int i, atomic_t *v) #endif /* atomic_fetch_add_relaxed */ +#define arch_atomic_sub atomic_sub + +#define arch_atomic_sub_return atomic_sub_return +#define arch_atomic_sub_return_acquire atomic_sub_return_acquire +#define arch_atomic_sub_return_release atomic_sub_return_release +#define arch_atomic_sub_return_relaxed atomic_sub_return_relaxed + #ifndef atomic_sub_return_relaxed #define atomic_sub_return_acquire atomic_sub_return #define atomic_sub_return_release atomic_sub_return @@ -221,6 +246,11 @@ atomic_sub_return(int i, atomic_t *v) #endif /* atomic_sub_return_relaxed */ +#define arch_atomic_fetch_sub atomic_fetch_sub +#define arch_atomic_fetch_sub_acquire atomic_fetch_sub_acquire +#define arch_atomic_fetch_sub_release atomic_fetch_sub_release +#define arch_atomic_fetch_sub_relaxed atomic_fetch_sub_relaxed + #ifndef atomic_fetch_sub_relaxed #define atomic_fetch_sub_acquire atomic_fetch_sub #define atomic_fetch_sub_release atomic_fetch_sub @@ -263,6 +293,8 @@ atomic_fetch_sub(int i, atomic_t *v) #endif /* atomic_fetch_sub_relaxed */ +#define arch_atomic_inc atomic_inc + #ifndef atomic_inc static __always_inline void atomic_inc(atomic_t *v) @@ -272,6 +304,11 @@ atomic_inc(atomic_t *v) #define atomic_inc atomic_inc #endif +#define arch_atomic_inc_return atomic_inc_return +#define arch_atomic_inc_return_acquire atomic_inc_return_acquire +#define arch_atomic_inc_return_release atomic_inc_return_release +#define arch_atomic_inc_return_relaxed atomic_inc_return_relaxed + #ifndef atomic_inc_return_relaxed #ifdef atomic_inc_return #define atomic_inc_return_acquire atomic_inc_return @@ -353,6 +390,11 @@ atomic_inc_return(atomic_t *v) #endif /* atomic_inc_return_relaxed */ +#define arch_atomic_fetch_inc atomic_fetch_inc +#define arch_atomic_fetch_inc_acquire atomic_fetch_inc_acquire +#define arch_atomic_fetch_inc_release atomic_fetch_inc_release +#define arch_atomic_fetch_inc_relaxed atomic_fetch_inc_relaxed + #ifndef atomic_fetch_inc_relaxed #ifdef atomic_fetch_inc #define atomic_fetch_inc_acquire atomic_fetch_inc @@ -434,6 +476,8 @@ atomic_fetch_inc(atomic_t *v) #endif /* atomic_fetch_inc_relaxed */ +#define arch_atomic_dec atomic_dec + #ifndef atomic_dec static __always_inline void atomic_dec(atomic_t *v) @@ -443,6 +487,11 @@ atomic_dec(atomic_t *v) #define atomic_dec atomic_dec #endif +#define arch_atomic_dec_return atomic_dec_return +#define arch_atomic_dec_return_acquire atomic_dec_return_acquire +#define arch_atomic_dec_return_release atomic_dec_return_release +#define arch_atomic_dec_return_relaxed atomic_dec_return_relaxed + #ifndef atomic_dec_return_relaxed #ifdef atomic_dec_return #define atomic_dec_return_acquire atomic_dec_return @@ -524,6 +573,11 @@ atomic_dec_return(atomic_t *v) #endif /* atomic_dec_return_relaxed */ +#define arch_atomic_fetch_dec atomic_fetch_dec +#define arch_atomic_fetch_dec_acquire atomic_fetch_dec_acquire +#define arch_atomic_fetch_dec_release atomic_fetch_dec_release +#define arch_atomic_fetch_dec_relaxed atomic_fetch_dec_relaxed + #ifndef atomic_fetch_dec_relaxed #ifdef atomic_fetch_dec #define atomic_fetch_dec_acquire atomic_fetch_dec @@ -605,6 +659,13 @@ atomic_fetch_dec(atomic_t *v) #endif /* atomic_fetch_dec_relaxed */ +#define arch_atomic_and atomic_and + +#define arch_atomic_fetch_and atomic_fetch_and +#define arch_atomic_fetch_and_acquire atomic_fetch_and_acquire +#define arch_atomic_fetch_and_release atomic_fetch_and_release +#define arch_atomic_fetch_and_relaxed atomic_fetch_and_relaxed + #ifndef atomic_fetch_and_relaxed #define atomic_fetch_and_acquire atomic_fetch_and #define atomic_fetch_and_release atomic_fetch_and @@ -647,6 +708,8 @@ atomic_fetch_and(int i, atomic_t *v) #endif /* atomic_fetch_and_relaxed */ +#define arch_atomic_andnot atomic_andnot + #ifndef atomic_andnot static __always_inline void atomic_andnot(int i, atomic_t *v) @@ -656,6 +719,11 @@ atomic_andnot(int i, atomic_t *v) #define atomic_andnot atomic_andnot #endif +#define arch_atomic_fetch_andnot atomic_fetch_andnot +#define arch_atomic_fetch_andnot_acquire atomic_fetch_andnot_acquire +#define arch_atomic_fetch_andnot_release atomic_fetch_andnot_release +#define arch_atomic_fetch_andnot_relaxed atomic_fetch_andnot_relaxed + #ifndef atomic_fetch_andnot_relaxed #ifdef atomic_fetch_andnot #define atomic_fetch_andnot_acquire atomic_fetch_andnot @@ -737,6 +805,13 @@ atomic_fetch_andnot(int i, atomic_t *v) #endif /* atomic_fetch_andnot_relaxed */ +#define arch_atomic_or atomic_or + +#define arch_atomic_fetch_or atomic_fetch_or +#define arch_atomic_fetch_or_acquire atomic_fetch_or_acquire +#define arch_atomic_fetch_or_release atomic_fetch_or_release +#define arch_atomic_fetch_or_relaxed atomic_fetch_or_relaxed + #ifndef atomic_fetch_or_relaxed #define atomic_fetch_or_acquire atomic_fetch_or #define atomic_fetch_or_release atomic_fetch_or @@ -779,6 +854,13 @@ atomic_fetch_or(int i, atomic_t *v) #endif /* atomic_fetch_or_relaxed */ +#define arch_atomic_xor atomic_xor + +#define arch_atomic_fetch_xor atomic_fetch_xor +#define arch_atomic_fetch_xor_acquire atomic_fetch_xor_acquire +#define arch_atomic_fetch_xor_release atomic_fetch_xor_release +#define arch_atomic_fetch_xor_relaxed atomic_fetch_xor_relaxed + #ifndef atomic_fetch_xor_relaxed #define atomic_fetch_xor_acquire atomic_fetch_xor #define atomic_fetch_xor_release atomic_fetch_xor @@ -821,6 +903,11 @@ atomic_fetch_xor(int i, atomic_t *v) #endif /* atomic_fetch_xor_relaxed */ +#define arch_atomic_xchg atomic_xchg +#define arch_atomic_xchg_acquire atomic_xchg_acquire +#define arch_atomic_xchg_release atomic_xchg_release +#define arch_atomic_xchg_relaxed atomic_xchg_relaxed + #ifndef atomic_xchg_relaxed #define atomic_xchg_acquire atomic_xchg #define atomic_xchg_release atomic_xchg @@ -863,6 +950,11 @@ atomic_xchg(atomic_t *v, int i) #endif /* atomic_xchg_relaxed */ +#define arch_atomic_cmpxchg atomic_cmpxchg +#define arch_atomic_cmpxchg_acquire atomic_cmpxchg_acquire +#define arch_atomic_cmpxchg_release atomic_cmpxchg_release +#define arch_atomic_cmpxchg_relaxed atomic_cmpxchg_relaxed + #ifndef atomic_cmpxchg_relaxed #define atomic_cmpxchg_acquire atomic_cmpxchg #define atomic_cmpxchg_release atomic_cmpxchg @@ -905,6 +997,11 @@ atomic_cmpxchg(atomic_t *v, int old, int new) #endif /* atomic_cmpxchg_relaxed */ +#define arch_atomic_try_cmpxchg atomic_try_cmpxchg +#define arch_atomic_try_cmpxchg_acquire atomic_try_cmpxchg_acquire +#define arch_atomic_try_cmpxchg_release atomic_try_cmpxchg_release +#define arch_atomic_try_cmpxchg_relaxed atomic_try_cmpxchg_relaxed + #ifndef atomic_try_cmpxchg_relaxed #ifdef atomic_try_cmpxchg #define atomic_try_cmpxchg_acquire atomic_try_cmpxchg @@ -1002,6 +1099,8 @@ atomic_try_cmpxchg(atomic_t *v, int *old, int new) #endif /* atomic_try_cmpxchg_relaxed */ +#define arch_atomic_sub_and_test atomic_sub_and_test + #ifndef atomic_sub_and_test /** * atomic_sub_and_test - subtract value from variable and test result @@ -1020,6 +1119,8 @@ atomic_sub_and_test(int i, atomic_t *v) #define atomic_sub_and_test atomic_sub_and_test #endif +#define arch_atomic_dec_and_test atomic_dec_and_test + #ifndef atomic_dec_and_test /** * atomic_dec_and_test - decrement and test @@ -1037,6 +1138,8 @@ atomic_dec_and_test(atomic_t *v) #define atomic_dec_and_test atomic_dec_and_test #endif +#define arch_atomic_inc_and_test atomic_inc_and_test + #ifndef atomic_inc_and_test /** * atomic_inc_and_test - increment and test @@ -1054,6 +1157,8 @@ atomic_inc_and_test(atomic_t *v) #define atomic_inc_and_test atomic_inc_and_test #endif +#define arch_atomic_add_negative atomic_add_negative + #ifndef atomic_add_negative /** * atomic_add_negative - add and test if negative @@ -1072,6 +1177,8 @@ atomic_add_negative(int i, atomic_t *v) #define atomic_add_negative atomic_add_negative #endif +#define arch_atomic_fetch_add_unless atomic_fetch_add_unless + #ifndef atomic_fetch_add_unless /** * atomic_fetch_add_unless - add unless the number is already a given value @@ -1097,6 +1204,8 @@ atomic_fetch_add_unless(atomic_t *v, int a, int u) #define atomic_fetch_add_unless atomic_fetch_add_unless #endif +#define arch_atomic_add_unless atomic_add_unless + #ifndef atomic_add_unless /** * atomic_add_unless - add unless the number is already a given value @@ -1115,6 +1224,8 @@ atomic_add_unless(atomic_t *v, int a, int u) #define atomic_add_unless atomic_add_unless #endif +#define arch_atomic_inc_not_zero atomic_inc_not_zero + #ifndef atomic_inc_not_zero /** * atomic_inc_not_zero - increment unless the number is zero @@ -1131,6 +1242,8 @@ atomic_inc_not_zero(atomic_t *v) #define atomic_inc_not_zero atomic_inc_not_zero #endif +#define arch_atomic_inc_unless_negative atomic_inc_unless_negative + #ifndef atomic_inc_unless_negative static __always_inline bool atomic_inc_unless_negative(atomic_t *v) @@ -1147,6 +1260,8 @@ atomic_inc_unless_negative(atomic_t *v) #define atomic_inc_unless_negative atomic_inc_unless_negative #endif +#define arch_atomic_dec_unless_positive atomic_dec_unless_positive + #ifndef atomic_dec_unless_positive static __always_inline bool atomic_dec_unless_positive(atomic_t *v) @@ -1163,6 +1278,8 @@ atomic_dec_unless_positive(atomic_t *v) #define atomic_dec_unless_positive atomic_dec_unless_positive #endif +#define arch_atomic_dec_if_positive atomic_dec_if_positive + #ifndef atomic_dec_if_positive static __always_inline int atomic_dec_if_positive(atomic_t *v) @@ -1184,6 +1301,9 @@ atomic_dec_if_positive(atomic_t *v) #include <asm-generic/atomic64.h> #endif +#define arch_atomic64_read atomic64_read +#define arch_atomic64_read_acquire atomic64_read_acquire + #ifndef atomic64_read_acquire static __always_inline s64 atomic64_read_acquire(const atomic64_t *v) @@ -1193,6 +1313,9 @@ atomic64_read_acquire(const atomic64_t *v) #define atomic64_read_acquire atomic64_read_acquire #endif +#define arch_atomic64_set atomic64_set +#define arch_atomic64_set_release atomic64_set_release + #ifndef atomic64_set_release static __always_inline void atomic64_set_release(atomic64_t *v, s64 i) @@ -1202,6 +1325,13 @@ atomic64_set_release(atomic64_t *v, s64 i) #define atomic64_set_release atomic64_set_release #endif +#define arch_atomic64_add atomic64_add + +#define arch_atomic64_add_return atomic64_add_return +#define arch_atomic64_add_return_acquire atomic64_add_return_acquire +#define arch_atomic64_add_return_release atomic64_add_return_release +#define arch_atomic64_add_return_relaxed atomic64_add_return_relaxed + #ifndef atomic64_add_return_relaxed #define atomic64_add_return_acquire atomic64_add_return #define atomic64_add_return_release atomic64_add_return @@ -1244,6 +1374,11 @@ atomic64_add_return(s64 i, atomic64_t *v) #endif /* atomic64_add_return_relaxed */ +#define arch_atomic64_fetch_add atomic64_fetch_add +#define arch_atomic64_fetch_add_acquire atomic64_fetch_add_acquire +#define arch_atomic64_fetch_add_release atomic64_fetch_add_release +#define arch_atomic64_fetch_add_relaxed atomic64_fetch_add_relaxed + #ifndef atomic64_fetch_add_relaxed #define atomic64_fetch_add_acquire atomic64_fetch_add #define atomic64_fetch_add_release atomic64_fetch_add @@ -1286,6 +1421,13 @@ atomic64_fetch_add(s64 i, atomic64_t *v) #endif /* atomic64_fetch_add_relaxed */ +#define arch_atomic64_sub atomic64_sub + +#define arch_atomic64_sub_return atomic64_sub_return +#define arch_atomic64_sub_return_acquire atomic64_sub_return_acquire +#define arch_atomic64_sub_return_release atomic64_sub_return_release +#define arch_atomic64_sub_return_relaxed atomic64_sub_return_relaxed + #ifndef atomic64_sub_return_relaxed #define atomic64_sub_return_acquire atomic64_sub_return #define atomic64_sub_return_release atomic64_sub_return @@ -1328,6 +1470,11 @@ atomic64_sub_return(s64 i, atomic64_t *v) #endif /* atomic64_sub_return_relaxed */ +#define arch_atomic64_fetch_sub atomic64_fetch_sub +#define arch_atomic64_fetch_sub_acquire atomic64_fetch_sub_acquire +#define arch_atomic64_fetch_sub_release atomic64_fetch_sub_release +#define arch_atomic64_fetch_sub_relaxed atomic64_fetch_sub_relaxed + #ifndef atomic64_fetch_sub_relaxed #define atomic64_fetch_sub_acquire atomic64_fetch_sub #define atomic64_fetch_sub_release atomic64_fetch_sub @@ -1370,6 +1517,8 @@ atomic64_fetch_sub(s64 i, atomic64_t *v) #endif /* atomic64_fetch_sub_relaxed */ +#define arch_atomic64_inc atomic64_inc + #ifndef atomic64_inc static __always_inline void atomic64_inc(atomic64_t *v) @@ -1379,6 +1528,11 @@ atomic64_inc(atomic64_t *v) #define atomic64_inc atomic64_inc #endif +#define arch_atomic64_inc_return atomic64_inc_return +#define arch_atomic64_inc_return_acquire atomic64_inc_return_acquire +#define arch_atomic64_inc_return_release atomic64_inc_return_release +#define arch_atomic64_inc_return_relaxed atomic64_inc_return_relaxed + #ifndef atomic64_inc_return_relaxed #ifdef atomic64_inc_return #define atomic64_inc_return_acquire atomic64_inc_return @@ -1460,6 +1614,11 @@ atomic64_inc_return(atomic64_t *v) #endif /* atomic64_inc_return_relaxed */ +#define arch_atomic64_fetch_inc atomic64_fetch_inc +#define arch_atomic64_fetch_inc_acquire atomic64_fetch_inc_acquire +#define arch_atomic64_fetch_inc_release atomic64_fetch_inc_release +#define arch_atomic64_fetch_inc_relaxed atomic64_fetch_inc_relaxed + #ifndef atomic64_fetch_inc_relaxed #ifdef atomic64_fetch_inc #define atomic64_fetch_inc_acquire atomic64_fetch_inc @@ -1541,6 +1700,8 @@ atomic64_fetch_inc(atomic64_t *v) #endif /* atomic64_fetch_inc_relaxed */ +#define arch_atomic64_dec atomic64_dec + #ifndef atomic64_dec static __always_inline void atomic64_dec(atomic64_t *v) @@ -1550,6 +1711,11 @@ atomic64_dec(atomic64_t *v) #define atomic64_dec atomic64_dec #endif +#define arch_atomic64_dec_return atomic64_dec_return +#define arch_atomic64_dec_return_acquire atomic64_dec_return_acquire +#define arch_atomic64_dec_return_release atomic64_dec_return_release +#define arch_atomic64_dec_return_relaxed atomic64_dec_return_relaxed + #ifndef atomic64_dec_return_relaxed #ifdef atomic64_dec_return #define atomic64_dec_return_acquire atomic64_dec_return @@ -1631,6 +1797,11 @@ atomic64_dec_return(atomic64_t *v) #endif /* atomic64_dec_return_relaxed */ +#define arch_atomic64_fetch_dec atomic64_fetch_dec +#define arch_atomic64_fetch_dec_acquire atomic64_fetch_dec_acquire +#define arch_atomic64_fetch_dec_release atomic64_fetch_dec_release +#define arch_atomic64_fetch_dec_relaxed atomic64_fetch_dec_relaxed + #ifndef atomic64_fetch_dec_relaxed #ifdef atomic64_fetch_dec #define atomic64_fetch_dec_acquire atomic64_fetch_dec @@ -1712,6 +1883,13 @@ atomic64_fetch_dec(atomic64_t *v) #endif /* atomic64_fetch_dec_relaxed */ +#define arch_atomic64_and atomic64_and + +#define arch_atomic64_fetch_and atomic64_fetch_and +#define arch_atomic64_fetch_and_acquire atomic64_fetch_and_acquire +#define arch_atomic64_fetch_and_release atomic64_fetch_and_release +#define arch_atomic64_fetch_and_relaxed atomic64_fetch_and_relaxed + #ifndef atomic64_fetch_and_relaxed #define atomic64_fetch_and_acquire atomic64_fetch_and #define atomic64_fetch_and_release atomic64_fetch_and @@ -1754,6 +1932,8 @@ atomic64_fetch_and(s64 i, atomic64_t *v) #endif /* atomic64_fetch_and_relaxed */ +#define arch_atomic64_andnot atomic64_andnot + #ifndef atomic64_andnot static __always_inline void atomic64_andnot(s64 i, atomic64_t *v) @@ -1763,6 +1943,11 @@ atomic64_andnot(s64 i, atomic64_t *v) #define atomic64_andnot atomic64_andnot #endif +#define arch_atomic64_fetch_andnot atomic64_fetch_andnot +#define arch_atomic64_fetch_andnot_acquire atomic64_fetch_andnot_acquire +#define arch_atomic64_fetch_andnot_release atomic64_fetch_andnot_release +#define arch_atomic64_fetch_andnot_relaxed atomic64_fetch_andnot_relaxed + #ifndef atomic64_fetch_andnot_relaxed #ifdef atomic64_fetch_andnot #define atomic64_fetch_andnot_acquire atomic64_fetch_andnot @@ -1844,6 +2029,13 @@ atomic64_fetch_andnot(s64 i, atomic64_t *v) #endif /* atomic64_fetch_andnot_relaxed */ +#define arch_atomic64_or atomic64_or + +#define arch_atomic64_fetch_or atomic64_fetch_or +#define arch_atomic64_fetch_or_acquire atomic64_fetch_or_acquire +#define arch_atomic64_fetch_or_release atomic64_fetch_or_release +#define arch_atomic64_fetch_or_relaxed atomic64_fetch_or_relaxed + #ifndef atomic64_fetch_or_relaxed #define atomic64_fetch_or_acquire atomic64_fetch_or #define atomic64_fetch_or_release atomic64_fetch_or @@ -1886,6 +2078,13 @@ atomic64_fetch_or(s64 i, atomic64_t *v) #endif /* atomic64_fetch_or_relaxed */ +#define arch_atomic64_xor atomic64_xor + +#define arch_atomic64_fetch_xor atomic64_fetch_xor +#define arch_atomic64_fetch_xor_acquire atomic64_fetch_xor_acquire +#define arch_atomic64_fetch_xor_release atomic64_fetch_xor_release +#define arch_atomic64_fetch_xor_relaxed atomic64_fetch_xor_relaxed + #ifndef atomic64_fetch_xor_relaxed #define atomic64_fetch_xor_acquire atomic64_fetch_xor #define atomic64_fetch_xor_release atomic64_fetch_xor @@ -1928,6 +2127,11 @@ atomic64_fetch_xor(s64 i, atomic64_t *v) #endif /* atomic64_fetch_xor_relaxed */ +#define arch_atomic64_xchg atomic64_xchg +#define arch_atomic64_xchg_acquire atomic64_xchg_acquire +#define arch_atomic64_xchg_release atomic64_xchg_release +#define arch_atomic64_xchg_relaxed atomic64_xchg_relaxed + #ifndef atomic64_xchg_relaxed #define atomic64_xchg_acquire atomic64_xchg #define atomic64_xchg_release atomic64_xchg @@ -1970,6 +2174,11 @@ atomic64_xchg(atomic64_t *v, s64 i) #endif /* atomic64_xchg_relaxed */ +#define arch_atomic64_cmpxchg atomic64_cmpxchg +#define arch_atomic64_cmpxchg_acquire atomic64_cmpxchg_acquire +#define arch_atomic64_cmpxchg_release atomic64_cmpxchg_release +#define arch_atomic64_cmpxchg_relaxed atomic64_cmpxchg_relaxed + #ifndef atomic64_cmpxchg_relaxed #define atomic64_cmpxchg_acquire atomic64_cmpxchg #define atomic64_cmpxchg_release atomic64_cmpxchg @@ -2012,6 +2221,11 @@ atomic64_cmpxchg(atomic64_t *v, s64 old, s64 new) #endif /* atomic64_cmpxchg_relaxed */ +#define arch_atomic64_try_cmpxchg atomic64_try_cmpxchg +#define arch_atomic64_try_cmpxchg_acquire atomic64_try_cmpxchg_acquire +#define arch_atomic64_try_cmpxchg_release atomic64_try_cmpxchg_release +#define arch_atomic64_try_cmpxchg_relaxed atomic64_try_cmpxchg_relaxed + #ifndef atomic64_try_cmpxchg_relaxed #ifdef atomic64_try_cmpxchg #define atomic64_try_cmpxchg_acquire atomic64_try_cmpxchg @@ -2109,6 +2323,8 @@ atomic64_try_cmpxchg(atomic64_t *v, s64 *old, s64 new) #endif /* atomic64_try_cmpxchg_relaxed */ +#define arch_atomic64_sub_and_test atomic64_sub_and_test + #ifndef atomic64_sub_and_test /** * atomic64_sub_and_test - subtract value from variable and test result @@ -2127,6 +2343,8 @@ atomic64_sub_and_test(s64 i, atomic64_t *v) #define atomic64_sub_and_test atomic64_sub_and_test #endif +#define arch_atomic64_dec_and_test atomic64_dec_and_test + #ifndef atomic64_dec_and_test /** * atomic64_dec_and_test - decrement and test @@ -2144,6 +2362,8 @@ atomic64_dec_and_test(atomic64_t *v) #define atomic64_dec_and_test atomic64_dec_and_test #endif +#define arch_atomic64_inc_and_test atomic64_inc_and_test + #ifndef atomic64_inc_and_test /** * atomic64_inc_and_test - increment and test @@ -2161,6 +2381,8 @@ atomic64_inc_and_test(atomic64_t *v) #define atomic64_inc_and_test atomic64_inc_and_test #endif +#define arch_atomic64_add_negative atomic64_add_negative + #ifndef atomic64_add_negative /** * atomic64_add_negative - add and test if negative @@ -2179,6 +2401,8 @@ atomic64_add_negative(s64 i, atomic64_t *v) #define atomic64_add_negative atomic64_add_negative #endif +#define arch_atomic64_fetch_add_unless atomic64_fetch_add_unless + #ifndef atomic64_fetch_add_unless /** * atomic64_fetch_add_unless - add unless the number is already a given value @@ -2204,6 +2428,8 @@ atomic64_fetch_add_unless(atomic64_t *v, s64 a, s64 u) #define atomic64_fetch_add_unless atomic64_fetch_add_unless #endif +#define arch_atomic64_add_unless atomic64_add_unless + #ifndef atomic64_add_unless /** * atomic64_add_unless - add unless the number is already a given value @@ -2222,6 +2448,8 @@ atomic64_add_unless(atomic64_t *v, s64 a, s64 u) #define atomic64_add_unless atomic64_add_unless #endif +#define arch_atomic64_inc_not_zero atomic64_inc_not_zero + #ifndef atomic64_inc_not_zero /** * atomic64_inc_not_zero - increment unless the number is zero @@ -2238,6 +2466,8 @@ atomic64_inc_not_zero(atomic64_t *v) #define atomic64_inc_not_zero atomic64_inc_not_zero #endif +#define arch_atomic64_inc_unless_negative atomic64_inc_unless_negative + #ifndef atomic64_inc_unless_negative static __always_inline bool atomic64_inc_unless_negative(atomic64_t *v) @@ -2254,6 +2484,8 @@ atomic64_inc_unless_negative(atomic64_t *v) #define atomic64_inc_unless_negative atomic64_inc_unless_negative #endif +#define arch_atomic64_dec_unless_positive atomic64_dec_unless_positive + #ifndef atomic64_dec_unless_positive static __always_inline bool atomic64_dec_unless_positive(atomic64_t *v) @@ -2270,6 +2502,8 @@ atomic64_dec_unless_positive(atomic64_t *v) #define atomic64_dec_unless_positive atomic64_dec_unless_positive #endif +#define arch_atomic64_dec_if_positive atomic64_dec_if_positive + #ifndef atomic64_dec_if_positive static __always_inline s64 atomic64_dec_if_positive(atomic64_t *v) @@ -2288,4 +2522,4 @@ atomic64_dec_if_positive(atomic64_t *v) #endif #endif /* _LINUX_ATOMIC_FALLBACK_H */ -// 1fac0941c79bf0ae100723cc2ac9b94061f0b67a +// 9d95b56f98d82a2a26c7b79ccdd0c47572d50a6f diff --git a/include/linux/audit.h b/include/linux/audit.h index 3fcd9ee49734..b3d859831a31 100644 --- a/include/linux/audit.h +++ b/include/linux/audit.h @@ -12,6 +12,7 @@ #include <linux/sched.h> #include <linux/ptrace.h> #include <uapi/linux/audit.h> +#include <uapi/linux/netfilter/nf_tables.h> #define AUDIT_INO_UNSET ((unsigned long)-1) #define AUDIT_DEV_UNSET ((dev_t)-1) @@ -98,6 +99,23 @@ enum audit_nfcfgop { AUDIT_XT_OP_REGISTER, AUDIT_XT_OP_REPLACE, AUDIT_XT_OP_UNREGISTER, + AUDIT_NFT_OP_TABLE_REGISTER, + AUDIT_NFT_OP_TABLE_UNREGISTER, + AUDIT_NFT_OP_CHAIN_REGISTER, + AUDIT_NFT_OP_CHAIN_UNREGISTER, + AUDIT_NFT_OP_RULE_REGISTER, + AUDIT_NFT_OP_RULE_UNREGISTER, + AUDIT_NFT_OP_SET_REGISTER, + AUDIT_NFT_OP_SET_UNREGISTER, + AUDIT_NFT_OP_SETELEM_REGISTER, + AUDIT_NFT_OP_SETELEM_UNREGISTER, + AUDIT_NFT_OP_GEN_REGISTER, + AUDIT_NFT_OP_OBJ_REGISTER, + AUDIT_NFT_OP_OBJ_UNREGISTER, + AUDIT_NFT_OP_OBJ_RESET, + AUDIT_NFT_OP_FLOWTABLE_REGISTER, + AUDIT_NFT_OP_FLOWTABLE_UNREGISTER, + AUDIT_NFT_OP_INVALID, }; extern int is_audit_feature_set(int which); @@ -274,7 +292,7 @@ extern void __audit_syscall_entry(int major, unsigned long a0, unsigned long a1, extern void __audit_syscall_exit(int ret_success, long ret_value); extern struct filename *__audit_reusename(const __user char *uptr); extern void __audit_getname(struct filename *name); - +extern void __audit_getcwd(void); extern void __audit_inode(struct filename *name, const struct dentry *dentry, unsigned int flags); extern void __audit_file(const struct file *); @@ -333,6 +351,11 @@ static inline void audit_getname(struct filename *name) if (unlikely(!audit_dummy_context())) __audit_getname(name); } +static inline void audit_getcwd(void) +{ + if (unlikely(audit_context())) + __audit_getcwd(); +} static inline void audit_inode(struct filename *name, const struct dentry *dentry, unsigned int aflags) { @@ -386,7 +409,7 @@ extern void __audit_fanotify(unsigned int response); extern void __audit_tk_injoffset(struct timespec64 offset); extern void __audit_ntp_log(const struct audit_ntp_data *ad); extern void __audit_log_nfcfg(const char *name, u8 af, unsigned int nentries, - enum audit_nfcfgop op); + enum audit_nfcfgop op, gfp_t gfp); static inline void audit_ipc_obj(struct kern_ipc_perm *ipcp) { @@ -524,10 +547,10 @@ static inline void audit_ntp_log(const struct audit_ntp_data *ad) static inline void audit_log_nfcfg(const char *name, u8 af, unsigned int nentries, - enum audit_nfcfgop op) + enum audit_nfcfgop op, gfp_t gfp) { if (audit_enabled) - __audit_log_nfcfg(name, af, nentries, op); + __audit_log_nfcfg(name, af, nentries, op, gfp); } extern int audit_n_rules; @@ -561,13 +584,7 @@ static inline struct filename *audit_reusename(const __user char *name) } static inline void audit_getname(struct filename *name) { } -static inline void __audit_inode(struct filename *name, - const struct dentry *dentry, - unsigned int flags) -{ } -static inline void __audit_inode_child(struct inode *parent, - const struct dentry *dentry, - const unsigned char type) +static inline void audit_getcwd(void) { } static inline void audit_inode(struct filename *name, const struct dentry *dentry, @@ -665,7 +682,7 @@ static inline void audit_ptrace(struct task_struct *t) static inline void audit_log_nfcfg(const char *name, u8 af, unsigned int nentries, - enum audit_nfcfgop op) + enum audit_nfcfgop op, gfp_t gfp) { } #define audit_n_rules 0 @@ -677,9 +694,4 @@ static inline bool audit_loginuid_set(struct task_struct *tsk) return uid_valid(audit_get_loginuid(tsk)); } -static inline void audit_log_string(struct audit_buffer *ab, const char *buf) -{ - audit_log_n_string(ab, buf, strlen(buf)); -} - #endif diff --git a/include/linux/backing-dev-defs.h b/include/linux/backing-dev-defs.h index 90a7e844a098..fff9367a6348 100644 --- a/include/linux/backing-dev-defs.h +++ b/include/linux/backing-dev-defs.h @@ -33,8 +33,6 @@ enum wb_congested_state { WB_sync_congested, /* The sync queue is getting full */ }; -typedef int (congested_fn)(void *, int); - enum wb_stat_item { WB_RECLAIMABLE, WB_WRITEBACK, @@ -88,26 +86,6 @@ struct wb_completion { struct wb_completion cmpl = WB_COMPLETION_INIT(bdi) /* - * For cgroup writeback, multiple wb's may map to the same blkcg. Those - * wb's can operate mostly independently but should share the congested - * state. To facilitate such sharing, the congested state is tracked using - * the following struct which is created on demand, indexed by blkcg ID on - * its bdi, and refcounted. - */ -struct bdi_writeback_congested { - unsigned long state; /* WB_[a]sync_congested flags */ - refcount_t refcnt; /* nr of attached wb's and blkg */ - -#ifdef CONFIG_CGROUP_WRITEBACK - struct backing_dev_info *__bdi; /* the associated bdi, set to NULL - * on bdi unregistration. For memcg-wb - * internal use only! */ - int blkcg_id; /* ID of the associated blkcg */ - struct rb_node rb_node; /* on bdi->cgwb_congestion_tree */ -#endif -}; - -/* * Each wb (bdi_writeback) can perform writeback operations, is measured * and throttled, independently. Without cgroup writeback, each bdi * (bdi_writeback) is served by its embedded bdi->wb. @@ -140,7 +118,7 @@ struct bdi_writeback { struct percpu_counter stat[NR_WB_STAT_ITEMS]; - struct bdi_writeback_congested *congested; + unsigned long congested; /* WB_[a]sync_congested flags */ unsigned long bw_time_stamp; /* last time write bw is updated */ unsigned long dirtied_stamp; @@ -190,8 +168,6 @@ struct backing_dev_info { struct list_head bdi_list; unsigned long ra_pages; /* max readahead in PAGE_SIZE units */ unsigned long io_pages; /* max allowed IO size */ - congested_fn *congested_fn; /* Function pointer if device is md/dm */ - void *congested_data; /* Pointer to aux data for congested func */ struct kref refcnt; /* Reference counter for the structure */ unsigned int capabilities; /* Device capabilities */ @@ -208,11 +184,8 @@ struct backing_dev_info { struct list_head wb_list; /* list of all wbs */ #ifdef CONFIG_CGROUP_WRITEBACK struct radix_tree_root cgwb_tree; /* radix tree of active cgroup wbs */ - struct rb_root cgwb_congested_tree; /* their congested states */ struct mutex cgwb_release_mutex; /* protect shutdown of wb structs */ struct rw_semaphore wb_switch_rwsem; /* no cgwb switch while syncing */ -#else - struct bdi_writeback_congested *wb_congested; #endif wait_queue_head_t wb_waitq; @@ -232,18 +205,8 @@ enum { BLK_RW_SYNC = 1, }; -void clear_wb_congested(struct bdi_writeback_congested *congested, int sync); -void set_wb_congested(struct bdi_writeback_congested *congested, int sync); - -static inline void clear_bdi_congested(struct backing_dev_info *bdi, int sync) -{ - clear_wb_congested(bdi->wb.congested, sync); -} - -static inline void set_bdi_congested(struct backing_dev_info *bdi, int sync) -{ - set_wb_congested(bdi->wb.congested, sync); -} +void clear_bdi_congested(struct backing_dev_info *bdi, int sync); +void set_bdi_congested(struct backing_dev_info *bdi, int sync); struct wb_lock_cookie { bool locked; diff --git a/include/linux/backing-dev.h b/include/linux/backing-dev.h index 6b3504bf7a42..0b06b2d26c9a 100644 --- a/include/linux/backing-dev.h +++ b/include/linux/backing-dev.h @@ -169,11 +169,7 @@ static inline struct backing_dev_info *inode_to_bdi(struct inode *inode) static inline int wb_congested(struct bdi_writeback *wb, int cong_bits) { - struct backing_dev_info *bdi = wb->bdi; - - if (bdi->congested_fn) - return bdi->congested_fn(bdi->congested_data, cong_bits); - return wb->congested->state & cong_bits; + return wb->congested & cong_bits; } long congestion_wait(int sync, long timeout); @@ -224,9 +220,6 @@ static inline int bdi_sched_wait(void *word) #ifdef CONFIG_CGROUP_WRITEBACK -struct bdi_writeback_congested * -wb_congested_get_create(struct backing_dev_info *bdi, int blkcg_id, gfp_t gfp); -void wb_congested_put(struct bdi_writeback_congested *congested); struct bdi_writeback *wb_get_lookup(struct backing_dev_info *bdi, struct cgroup_subsys_state *memcg_css); struct bdi_writeback *wb_get_create(struct backing_dev_info *bdi, @@ -404,19 +397,6 @@ static inline bool inode_cgwb_enabled(struct inode *inode) return false; } -static inline struct bdi_writeback_congested * -wb_congested_get_create(struct backing_dev_info *bdi, int blkcg_id, gfp_t gfp) -{ - refcount_inc(&bdi->wb_congested->refcnt); - return bdi->wb_congested; -} - -static inline void wb_congested_put(struct bdi_writeback_congested *congested) -{ - if (refcount_dec_and_test(&congested->refcnt)) - kfree(congested); -} - static inline struct bdi_writeback *wb_find_current(struct backing_dev_info *bdi) { return &bdi->wb; diff --git a/include/linux/binfmts.h b/include/linux/binfmts.h index 4a20b7517dd0..0571701ab1c5 100644 --- a/include/linux/binfmts.h +++ b/include/linux/binfmts.h @@ -45,17 +45,18 @@ struct linux_binprm { #ifdef __alpha__ unsigned int taso:1; #endif - struct file * executable; /* Executable to pass to the interpreter */ - struct file * interpreter; - struct file * file; + struct file *executable; /* Executable to pass to the interpreter */ + struct file *interpreter; + struct file *file; struct cred *cred; /* new credentials */ int unsafe; /* how unsafe this exec is (mask of LSM_UNSAFE_*) */ unsigned int per_clear; /* bits to clear in current->personality */ int argc, envc; - const char * filename; /* Name of binary as seen by procps */ - const char * interp; /* Name of the binary really executed. Most + const char *filename; /* Name of binary as seen by procps */ + const char *interp; /* Name of the binary really executed. Most of the time same as filename, but could be different for binfmt_{misc,script} */ + const char *fdpath; /* generated filename for execveat */ unsigned interp_flags; int execfd; /* File descriptor of the executable */ unsigned long loader, exec; @@ -134,13 +135,7 @@ int copy_string_kernel(const char *arg, struct linux_binprm *bprm); extern void set_binfmt(struct linux_binfmt *new); extern ssize_t read_code(struct file *, unsigned long, loff_t, size_t); -extern int do_execve(struct filename *, - const char __user * const __user *, - const char __user * const __user *); -extern int do_execveat(int, struct filename *, - const char __user * const __user *, - const char __user * const __user *, - int); -int do_execve_file(struct file *file, void *__argv, void *__envp); +int kernel_execve(const char *filename, + const char *const *argv, const char *const *envp); #endif /* _LINUX_BINFMTS_H */ diff --git a/include/linux/bio.h b/include/linux/bio.h index 91676d4b2dfe..c6d765382926 100644 --- a/include/linux/bio.h +++ b/include/linux/bio.h @@ -8,8 +8,6 @@ #include <linux/highmem.h> #include <linux/mempool.h> #include <linux/ioprio.h> - -#ifdef CONFIG_BLOCK /* struct bio, bio_vec and BIO_* flags are defined in blk_types.h */ #include <linux/blk_types.h> @@ -491,21 +489,12 @@ do { \ #define bio_dev(bio) \ disk_devt((bio)->bi_disk) -#if defined(CONFIG_MEMCG) && defined(CONFIG_BLK_CGROUP) -void bio_associate_blkg_from_page(struct bio *bio, struct page *page); -#else -static inline void bio_associate_blkg_from_page(struct bio *bio, - struct page *page) { } -#endif - #ifdef CONFIG_BLK_CGROUP -void bio_disassociate_blkg(struct bio *bio); void bio_associate_blkg(struct bio *bio); void bio_associate_blkg_from_css(struct bio *bio, struct cgroup_subsys_state *css); void bio_clone_blkg_association(struct bio *dst, struct bio *src); #else /* CONFIG_BLK_CGROUP */ -static inline void bio_disassociate_blkg(struct bio *bio) { } static inline void bio_associate_blkg(struct bio *bio) { } static inline void bio_associate_blkg_from_css(struct bio *bio, struct cgroup_subsys_state *css) @@ -824,5 +813,4 @@ static inline void bio_set_polled(struct bio *bio, struct kiocb *kiocb) bio->bi_opf |= REQ_NOWAIT; } -#endif /* CONFIG_BLOCK */ #endif /* __LINUX_BIO_H */ diff --git a/include/linux/bits.h b/include/linux/bits.h index 4671fbf28842..7f475d59a097 100644 --- a/include/linux/bits.h +++ b/include/linux/bits.h @@ -18,8 +18,7 @@ * position @h. For example * GENMASK_ULL(39, 21) gives us the 64bit vector 0x000000ffffe00000. */ -#if !defined(__ASSEMBLY__) && \ - (!defined(CONFIG_CC_IS_GCC) || CONFIG_GCC_VERSION >= 49000) +#if !defined(__ASSEMBLY__) #include <linux/build_bug.h> #define GENMASK_INPUT_CHECK(h, l) \ (BUILD_BUG_ON_ZERO(__builtin_choose_expr( \ diff --git a/include/linux/blk-cgroup.h b/include/linux/blk-cgroup.h index a57ebe2f00ab..c8fc9792ac77 100644 --- a/include/linux/blk-cgroup.h +++ b/include/linux/blk-cgroup.h @@ -109,12 +109,6 @@ struct blkcg_gq { struct hlist_node blkcg_node; struct blkcg *blkcg; - /* - * Each blkg gets congested separately and the congestion state is - * propagated to the matching bdi_writeback_congested. - */ - struct bdi_writeback_congested *wb_congested; - /* all non-root blkcg_gq's are guaranteed to have access to parent */ struct blkcg_gq *parent; @@ -183,10 +177,6 @@ extern bool blkcg_debug_stats; struct blkcg_gq *blkg_lookup_slowpath(struct blkcg *blkcg, struct request_queue *q, bool update_hint); -struct blkcg_gq *__blkg_lookup_create(struct blkcg *blkcg, - struct request_queue *q); -struct blkcg_gq *blkg_lookup_create(struct blkcg *blkcg, - struct request_queue *q); int blkcg_init_queue(struct request_queue *q); void blkcg_exit_queue(struct request_queue *q); @@ -481,32 +471,6 @@ static inline bool blkg_tryget(struct blkcg_gq *blkg) } /** - * blkg_tryget_closest - try and get a blkg ref on the closet blkg - * @blkg: blkg to get - * - * This needs to be called rcu protected. As the failure mode here is to walk - * up the blkg tree, this ensure that the blkg->parent pointers are always - * valid. This returns the blkg that it ended up taking a reference on or %NULL - * if no reference was taken. - */ -static inline struct blkcg_gq *blkg_tryget_closest(struct blkcg_gq *blkg) -{ - struct blkcg_gq *ret_blkg = NULL; - - WARN_ON_ONCE(!rcu_read_lock_held()); - - while (blkg) { - if (blkg_tryget(blkg)) { - ret_blkg = blkg; - break; - } - blkg = blkg->parent; - } - - return ret_blkg; -} - -/** * blkg_put - put a blkg reference * @blkg: blkg to put */ @@ -547,14 +511,6 @@ static inline void blkg_put(struct blkcg_gq *blkg) if (((d_blkg) = __blkg_lookup(css_to_blkcg(pos_css), \ (p_blkg)->q, false))) -#ifdef CONFIG_BLK_DEV_THROTTLING -extern bool blk_throtl_bio(struct request_queue *q, struct blkcg_gq *blkg, - struct bio *bio); -#else -static inline bool blk_throtl_bio(struct request_queue *q, struct blkcg_gq *blkg, - struct bio *bio) { return false; } -#endif - bool __blkcg_punt_bio_submit(struct bio *bio); static inline bool blkcg_punt_bio_submit(struct bio *bio) @@ -570,65 +526,6 @@ static inline void blkcg_bio_issue_init(struct bio *bio) bio_issue_init(&bio->bi_issue, bio_sectors(bio)); } -static inline bool blkcg_bio_issue_check(struct request_queue *q, - struct bio *bio) -{ - struct blkcg_gq *blkg; - bool throtl = false; - - rcu_read_lock(); - - if (!bio->bi_blkg) { - char b[BDEVNAME_SIZE]; - - WARN_ONCE(1, - "no blkg associated for bio on block-device: %s\n", - bio_devname(bio, b)); - bio_associate_blkg(bio); - } - - blkg = bio->bi_blkg; - - throtl = blk_throtl_bio(q, blkg, bio); - - if (!throtl) { - struct blkg_iostat_set *bis; - int rwd, cpu; - - if (op_is_discard(bio->bi_opf)) - rwd = BLKG_IOSTAT_DISCARD; - else if (op_is_write(bio->bi_opf)) - rwd = BLKG_IOSTAT_WRITE; - else - rwd = BLKG_IOSTAT_READ; - - cpu = get_cpu(); - bis = per_cpu_ptr(blkg->iostat_cpu, cpu); - u64_stats_update_begin(&bis->sync); - - /* - * If the bio is flagged with BIO_CGROUP_ACCT it means this is a - * split bio and we would have already accounted for the size of - * the bio. - */ - if (!bio_flagged(bio, BIO_CGROUP_ACCT)) { - bio_set_flag(bio, BIO_CGROUP_ACCT); - bis->cur.bytes[rwd] += bio->bi_iter.bi_size; - } - bis->cur.ios[rwd]++; - - u64_stats_update_end(&bis->sync); - if (cgroup_subsys_on_dfl(io_cgrp_subsys)) - cgroup_rstat_updated(blkg->blkcg->css.cgroup, cpu); - put_cpu(); - } - - blkcg_bio_issue_init(bio); - - rcu_read_unlock(); - return !throtl; -} - static inline void blkcg_use_delay(struct blkcg_gq *blkg) { if (WARN_ON_ONCE(atomic_read(&blkg->use_delay) < 0)) @@ -702,6 +599,7 @@ static inline void blkcg_clear_delay(struct blkcg_gq *blkg) atomic_dec(&blkg->blkcg->css.cgroup->congestion_count); } +void blk_cgroup_bio_start(struct bio *bio); void blkcg_add_delay(struct blkcg_gq *blkg, u64 now, u64 delta); void blkcg_schedule_throttle(struct request_queue *q, bool use_memdelay); void blkcg_maybe_throttle_current(void); @@ -755,8 +653,7 @@ static inline void blkg_put(struct blkcg_gq *blkg) { } static inline bool blkcg_punt_bio_submit(struct bio *bio) { return false; } static inline void blkcg_bio_issue_init(struct bio *bio) { } -static inline bool blkcg_bio_issue_check(struct request_queue *q, - struct bio *bio) { return true; } +static inline void blk_cgroup_bio_start(struct bio *bio) { } #define blk_queue_for_each_rl(rl, q) \ for ((rl) = &(q)->root_rl; (rl); (rl) = NULL) diff --git a/include/linux/blk-mq.h b/include/linux/blk-mq.h index d6fcae17da5a..9d2d5ad367a4 100644 --- a/include/linux/blk-mq.h +++ b/include/linux/blk-mq.h @@ -267,27 +267,9 @@ struct blk_mq_queue_data { bool last; }; -typedef blk_status_t (queue_rq_fn)(struct blk_mq_hw_ctx *, - const struct blk_mq_queue_data *); -typedef void (commit_rqs_fn)(struct blk_mq_hw_ctx *); -typedef bool (get_budget_fn)(struct blk_mq_hw_ctx *); -typedef void (put_budget_fn)(struct blk_mq_hw_ctx *); -typedef enum blk_eh_timer_return (timeout_fn)(struct request *, bool); -typedef int (init_hctx_fn)(struct blk_mq_hw_ctx *, void *, unsigned int); -typedef void (exit_hctx_fn)(struct blk_mq_hw_ctx *, unsigned int); -typedef int (init_request_fn)(struct blk_mq_tag_set *set, struct request *, - unsigned int, unsigned int); -typedef void (exit_request_fn)(struct blk_mq_tag_set *set, struct request *, - unsigned int); - typedef bool (busy_iter_fn)(struct blk_mq_hw_ctx *, struct request *, void *, bool); typedef bool (busy_tag_iter_fn)(struct request *, void *, bool); -typedef int (poll_fn)(struct blk_mq_hw_ctx *); -typedef int (map_queues_fn)(struct blk_mq_tag_set *set); -typedef bool (busy_fn)(struct request_queue *); -typedef void (complete_fn)(struct request *); -typedef void (cleanup_rq_fn)(struct request *); /** * struct blk_mq_ops - Callback functions that implements block driver @@ -297,7 +279,8 @@ struct blk_mq_ops { /** * @queue_rq: Queue a new request from block IO. */ - queue_rq_fn *queue_rq; + blk_status_t (*queue_rq)(struct blk_mq_hw_ctx *, + const struct blk_mq_queue_data *); /** * @commit_rqs: If a driver uses bd->last to judge when to submit @@ -306,7 +289,7 @@ struct blk_mq_ops { * purpose of kicking the hardware (which the last request otherwise * would have done). */ - commit_rqs_fn *commit_rqs; + void (*commit_rqs)(struct blk_mq_hw_ctx *); /** * @get_budget: Reserve budget before queue request, once .queue_rq is @@ -314,37 +297,38 @@ struct blk_mq_ops { * reserved budget. Also we have to handle failure case * of .get_budget for avoiding I/O deadlock. */ - get_budget_fn *get_budget; + bool (*get_budget)(struct request_queue *); + /** * @put_budget: Release the reserved budget. */ - put_budget_fn *put_budget; + void (*put_budget)(struct request_queue *); /** * @timeout: Called on request timeout. */ - timeout_fn *timeout; + enum blk_eh_timer_return (*timeout)(struct request *, bool); /** * @poll: Called to poll for completion of a specific tag. */ - poll_fn *poll; + int (*poll)(struct blk_mq_hw_ctx *); /** * @complete: Mark the request as complete. */ - complete_fn *complete; + void (*complete)(struct request *); /** * @init_hctx: Called when the block layer side of a hardware queue has * been set up, allowing the driver to allocate/init matching * structures. */ - init_hctx_fn *init_hctx; + int (*init_hctx)(struct blk_mq_hw_ctx *, void *, unsigned int); /** * @exit_hctx: Ditto for exit/teardown. */ - exit_hctx_fn *exit_hctx; + void (*exit_hctx)(struct blk_mq_hw_ctx *, unsigned int); /** * @init_request: Called for every command allocated by the block layer @@ -353,11 +337,13 @@ struct blk_mq_ops { * Tag greater than or equal to queue_depth is for setting up * flush request. */ - init_request_fn *init_request; + int (*init_request)(struct blk_mq_tag_set *set, struct request *, + unsigned int, unsigned int); /** * @exit_request: Ditto for exit/teardown. */ - exit_request_fn *exit_request; + void (*exit_request)(struct blk_mq_tag_set *set, struct request *, + unsigned int); /** * @initialize_rq_fn: Called from inside blk_get_request(). @@ -368,18 +354,18 @@ struct blk_mq_ops { * @cleanup_rq: Called before freeing one request which isn't completed * yet, and usually for freeing the driver private data. */ - cleanup_rq_fn *cleanup_rq; + void (*cleanup_rq)(struct request *); /** * @busy: If set, returns whether or not this queue currently is busy. */ - busy_fn *busy; + bool (*busy)(struct request_queue *); /** * @map_queues: This allows drivers specify their own queue mapping by * overriding the setup-time function that builds the mq_map. */ - map_queues_fn *map_queues; + int (*map_queues)(struct blk_mq_tag_set *set); #ifdef CONFIG_BLK_DEBUG_FS /** @@ -447,8 +433,6 @@ enum { BLK_MQ_REQ_NOWAIT = (__force blk_mq_req_flags_t)(1 << 0), /* allocate from reserved pool */ BLK_MQ_REQ_RESERVED = (__force blk_mq_req_flags_t)(1 << 1), - /* allocate internal/sched tag */ - BLK_MQ_REQ_INTERNAL = (__force blk_mq_req_flags_t)(1 << 2), /* set RQF_PREEMPT */ BLK_MQ_REQ_PREEMPT = (__force blk_mq_req_flags_t)(1 << 3), }; @@ -503,8 +487,8 @@ void __blk_mq_end_request(struct request *rq, blk_status_t error); void blk_mq_requeue_request(struct request *rq, bool kick_requeue_list); void blk_mq_kick_requeue_list(struct request_queue *q); void blk_mq_delay_kick_requeue_list(struct request_queue *q, unsigned long msecs); -bool blk_mq_complete_request(struct request *rq); -void blk_mq_force_complete_rq(struct request *rq); +void blk_mq_complete_request(struct request *rq); +bool blk_mq_complete_request_remote(struct request *rq); bool blk_mq_bio_list_merge(struct request_queue *q, struct list_head *list, struct bio *bio, unsigned int nr_segs); bool blk_mq_queue_stopped(struct request_queue *q); @@ -537,6 +521,15 @@ void blk_mq_quiesce_queue_nowait(struct request_queue *q); unsigned int blk_mq_rq_cpu(struct request *rq); +bool __blk_should_fake_timeout(struct request_queue *q); +static inline bool blk_should_fake_timeout(struct request_queue *q) +{ + if (IS_ENABLED(CONFIG_FAIL_IO_TIMEOUT) && + test_bit(QUEUE_FLAG_FAIL_IO, &q->queue_flags)) + return __blk_should_fake_timeout(q); + return false; +} + /** * blk_mq_rq_from_pdu - cast a PDU to a request * @pdu: the PDU (Protocol Data Unit) to be casted @@ -589,6 +582,6 @@ static inline void blk_mq_cleanup_rq(struct request *rq) rq->q->mq_ops->cleanup_rq(rq); } -blk_qc_t blk_mq_make_request(struct request_queue *q, struct bio *bio); +blk_qc_t blk_mq_submit_bio(struct bio *bio); #endif diff --git a/include/linux/blk_types.h b/include/linux/blk_types.h index ccb895f911b1..4ecf4fed171f 100644 --- a/include/linux/blk_types.h +++ b/include/linux/blk_types.h @@ -14,12 +14,39 @@ struct bio_set; struct bio; struct bio_integrity_payload; struct page; -struct block_device; struct io_context; struct cgroup_subsys_state; typedef void (bio_end_io_t) (struct bio *); struct bio_crypt_ctx; +struct block_device { + dev_t bd_dev; /* not a kdev_t - it's a search key */ + int bd_openers; + struct inode * bd_inode; /* will die */ + struct super_block * bd_super; + struct mutex bd_mutex; /* open/close mutex */ + void * bd_claiming; + void * bd_holder; + int bd_holders; + bool bd_write_holder; +#ifdef CONFIG_SYSFS + struct list_head bd_holder_disks; +#endif + struct block_device * bd_contains; + u8 bd_partno; + struct hd_struct * bd_part; + /* number of times partitions within this device have been opened. */ + unsigned bd_part_count; + int bd_invalidated; + struct gendisk * bd_disk; + struct backing_dev_info *bd_bdi; + + /* The counter of freeze processes */ + int bd_fsfreeze_count; + /* Mutex for freeze */ + struct mutex bd_fsfreeze_mutex; +} __randomize_layout; + /* * Block error status values. See block/blk-core:blk_errors for the details. * Alpha cannot write a byte atomically, so we need to use 32-bit value. @@ -300,12 +327,8 @@ enum req_opf { REQ_OP_DISCARD = 3, /* securely erase sectors */ REQ_OP_SECURE_ERASE = 5, - /* reset a zone write pointer */ - REQ_OP_ZONE_RESET = 6, /* write the same sector many times */ REQ_OP_WRITE_SAME = 7, - /* reset all the zone present on the device */ - REQ_OP_ZONE_RESET_ALL = 8, /* write the zero filled sector many times */ REQ_OP_WRITE_ZEROES = 9, /* Open a zone */ @@ -316,6 +339,10 @@ enum req_opf { REQ_OP_ZONE_FINISH = 12, /* write data at the current zone write pointer */ REQ_OP_ZONE_APPEND = 13, + /* reset a zone write pointer */ + REQ_OP_ZONE_RESET = 15, + /* reset all the zone present on the device */ + REQ_OP_ZONE_RESET_ALL = 17, /* SCSI passthrough using struct scsi_request */ REQ_OP_SCSI_IN = 32, diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index 8fd900998b4e..06ecb2c1492f 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -4,9 +4,6 @@ #include <linux/sched.h> #include <linux/sched/clock.h> - -#ifdef CONFIG_BLOCK - #include <linux/major.h> #include <linux/genhd.h> #include <linux/list.h> @@ -289,8 +286,6 @@ static inline unsigned short req_get_ioprio(struct request *req) struct blk_queue_ctx; -typedef blk_qc_t (make_request_fn) (struct request_queue *q, struct bio *bio); - struct bio_vec; enum blk_eh_timer_return { @@ -401,8 +396,6 @@ struct request_queue { struct blk_queue_stats *stats; struct rq_qos *rq_qos; - make_request_fn *make_request_fn; - const struct blk_mq_ops *mq_ops; /* sw queues */ @@ -528,9 +521,9 @@ struct request_queue { unsigned int sg_timeout; unsigned int sg_reserved_size; int node; + struct mutex debugfs_mutex; #ifdef CONFIG_BLK_DEV_IO_TRACE struct blk_trace __rcu *blk_trace; - struct mutex blk_trace_mutex; #endif /* * for flush operations @@ -574,8 +567,9 @@ struct request_queue { struct list_head tag_set_list; struct bio_set bio_split; -#ifdef CONFIG_BLK_DEBUG_FS struct dentry *debugfs_dir; + +#ifdef CONFIG_BLK_DEBUG_FS struct dentry *sched_debugfs_dir; struct dentry *rqos_debugfs_dir; #endif @@ -584,12 +578,11 @@ struct request_queue { size_t cmd_size; - struct work_struct release_work; - #define BLK_MAX_WRITE_HINTS 5 u64 write_hints[BLK_MAX_WRITE_HINTS]; }; +/* Keep blk_queue_flag_name[] in sync with the definitions below */ #define QUEUE_FLAG_STOPPED 0 /* queue is stopped */ #define QUEUE_FLAG_DYING 1 /* queue being torn down */ #define QUEUE_FLAG_NOMERGES 3 /* disable merge attempts */ @@ -860,8 +853,7 @@ static inline void rq_flush_dcache_pages(struct request *rq) extern int blk_register_queue(struct gendisk *disk); extern void blk_unregister_queue(struct gendisk *disk); -extern blk_qc_t generic_make_request(struct bio *bio); -extern blk_qc_t direct_make_request(struct bio *bio); +blk_qc_t submit_bio_noacct(struct bio *bio); extern void blk_rq_init(struct request_queue *q, struct request *rq); extern void blk_put_request(struct request *); extern struct request *blk_get_request(struct request_queue *, unsigned int op, @@ -875,7 +867,7 @@ extern void blk_rq_unprep_clone(struct request *rq); extern blk_status_t blk_insert_cloned_request(struct request_queue *q, struct request *rq); extern int blk_rq_append_bio(struct request *rq, struct bio **bio); -extern void blk_queue_split(struct request_queue *, struct bio **); +extern void blk_queue_split(struct bio **); extern int scsi_verify_blk_ioctl(struct block_device *, unsigned int); extern int scsi_cmd_blk_ioctl(struct block_device *, fmode_t, unsigned int, void __user *); @@ -1078,7 +1070,6 @@ void blk_steal_bios(struct bio_list *list, struct request *rq); extern bool blk_update_request(struct request *rq, blk_status_t error, unsigned int nr_bytes); -extern void __blk_complete_request(struct request *); extern void blk_abort_request(struct request *); /* @@ -1165,13 +1156,13 @@ static inline int blk_rq_map_sg(struct request_queue *q, struct request *rq, return __blk_rq_map_sg(q, rq, sglist, &last_sg); } extern void blk_dump_rq_flags(struct request *, char *); -extern long nr_blockdev_pages(void); bool __must_check blk_get_queue(struct request_queue *); -struct request_queue *blk_alloc_queue(make_request_fn make_request, int node_id); +struct request_queue *blk_alloc_queue(int node_id); extern void blk_put_queue(struct request_queue *); extern void blk_set_queue_dying(struct request_queue *); +#ifdef CONFIG_BLOCK /* * blk_plug permits building a queue of related requests by holding the I/O * fragments for a short period. This allows merging of sequential requests @@ -1189,6 +1180,7 @@ struct blk_plug { struct list_head cb_list; /* md requires an unplug callback */ unsigned short rq_count; bool multiple_queues; + bool nowait; }; #define BLK_MAX_REQUEST_COUNT 16 #define BLK_PLUG_FLUSH_SIZE (128 * 1024) @@ -1231,9 +1223,47 @@ static inline bool blk_needs_flush_plug(struct task_struct *tsk) !list_empty(&plug->cb_list)); } +int blkdev_issue_flush(struct block_device *, gfp_t); +long nr_blockdev_pages(void); +#else /* CONFIG_BLOCK */ +struct blk_plug { +}; + +static inline void blk_start_plug(struct blk_plug *plug) +{ +} + +static inline void blk_finish_plug(struct blk_plug *plug) +{ +} + +static inline void blk_flush_plug(struct task_struct *task) +{ +} + +static inline void blk_schedule_flush_plug(struct task_struct *task) +{ +} + + +static inline bool blk_needs_flush_plug(struct task_struct *tsk) +{ + return false; +} + +static inline int blkdev_issue_flush(struct block_device *bdev, gfp_t gfp_mask) +{ + return 0; +} + +static inline long nr_blockdev_pages(void) +{ + return 0; +} +#endif /* CONFIG_BLOCK */ + extern void blk_io_schedule(void); -int blkdev_issue_flush(struct block_device *, gfp_t); extern int blkdev_issue_write_same(struct block_device *bdev, sector_t sector, sector_t nr_sects, gfp_t gfp_mask, struct page *page); @@ -1515,7 +1545,7 @@ static inline unsigned int blksize_bits(unsigned int size) static inline unsigned int block_size(struct block_device *bdev) { - return bdev->bd_block_size; + return 1 << bdev->bd_inode->i_blkbits; } int kblockd_schedule_work(struct work_struct *work); @@ -1745,6 +1775,7 @@ static inline void blk_ksm_unregister(struct request_queue *q) { } struct block_device_operations { + blk_qc_t (*submit_bio) (struct bio *bio); int (*open) (struct block_device *, fmode_t); void (*release) (struct gendisk *, fmode_t); int (*rw_page)(struct block_device *, sector_t, struct page *, unsigned int); @@ -1752,8 +1783,6 @@ struct block_device_operations { int (*compat_ioctl) (struct block_device *, fmode_t, unsigned, unsigned long); unsigned int (*check_events) (struct gendisk *disk, unsigned int clearing); - /* ->media_changed() is DEPRECATED, use ->check_events() instead */ - int (*media_changed) (struct gendisk *); void (*unlock_native_capacity) (struct gendisk *); int (*revalidate_disk) (struct gendisk *); int (*getgeo)(struct block_device *, struct hd_geometry *); @@ -1833,52 +1862,6 @@ static inline bool blk_req_can_dispatch_to_zone(struct request *rq) } #endif /* CONFIG_BLK_DEV_ZONED */ -#else /* CONFIG_BLOCK */ - -struct block_device; - -/* - * stubs for when the block layer is configured out - */ -#define buffer_heads_over_limit 0 - -static inline long nr_blockdev_pages(void) -{ - return 0; -} - -struct blk_plug { -}; - -static inline void blk_start_plug(struct blk_plug *plug) -{ -} - -static inline void blk_finish_plug(struct blk_plug *plug) -{ -} - -static inline void blk_flush_plug(struct task_struct *task) -{ -} - -static inline void blk_schedule_flush_plug(struct task_struct *task) -{ -} - - -static inline bool blk_needs_flush_plug(struct task_struct *tsk) -{ - return false; -} - -static inline int blkdev_issue_flush(struct block_device *bdev, gfp_t gfp_mask) -{ - return 0; -} - -#endif /* CONFIG_BLOCK */ - static inline void blk_wake_io_task(struct task_struct *waiter) { /* @@ -1892,7 +1875,6 @@ static inline void blk_wake_io_task(struct task_struct *waiter) wake_up_process(waiter); } -#ifdef CONFIG_BLOCK unsigned long disk_start_io_acct(struct gendisk *disk, unsigned int sectors, unsigned int op); void disk_end_io_acct(struct gendisk *disk, unsigned int op, @@ -1918,6 +1900,53 @@ static inline void bio_end_io_acct(struct bio *bio, unsigned long start_time) { return disk_end_io_acct(bio->bi_disk, bio_op(bio), start_time); } -#endif /* CONFIG_BLOCK */ +int bdev_read_only(struct block_device *bdev); +int set_blocksize(struct block_device *bdev, int size); + +const char *bdevname(struct block_device *bdev, char *buffer); +struct block_device *lookup_bdev(const char *); + +void blkdev_show(struct seq_file *seqf, off_t offset); + +#define BDEVNAME_SIZE 32 /* Largest string for a blockdev identifier */ +#define BDEVT_SIZE 10 /* Largest string for MAJ:MIN for blkdev */ +#ifdef CONFIG_BLOCK +#define BLKDEV_MAJOR_MAX 512 +#else +#define BLKDEV_MAJOR_MAX 0 +#endif + +int blkdev_get(struct block_device *bdev, fmode_t mode, void *holder); +struct block_device *blkdev_get_by_path(const char *path, fmode_t mode, + void *holder); +struct block_device *blkdev_get_by_dev(dev_t dev, fmode_t mode, void *holder); +int bd_prepare_to_claim(struct block_device *bdev, struct block_device *whole, + void *holder); +void bd_abort_claiming(struct block_device *bdev, struct block_device *whole, + void *holder); +void blkdev_put(struct block_device *bdev, fmode_t mode); + +struct block_device *I_BDEV(struct inode *inode); +struct block_device *bdget(dev_t); +struct block_device *bdgrab(struct block_device *bdev); +void bdput(struct block_device *); + +#ifdef CONFIG_BLOCK +void invalidate_bdev(struct block_device *bdev); +int sync_blockdev(struct block_device *bdev); +#else +static inline void invalidate_bdev(struct block_device *bdev) +{ +} +static inline int sync_blockdev(struct block_device *bdev) +{ + return 0; +} #endif +int fsync_bdev(struct block_device *bdev); + +struct super_block *freeze_bdev(struct block_device *bdev); +int thaw_bdev(struct block_device *bdev, struct super_block *sb); + +#endif /* _LINUX_BLKDEV_H */ diff --git a/include/linux/bpf-netns.h b/include/linux/bpf-netns.h index 4052d649f36d..47d5b0c708c9 100644 --- a/include/linux/bpf-netns.h +++ b/include/linux/bpf-netns.h @@ -33,7 +33,7 @@ int netns_bpf_prog_query(const union bpf_attr *attr, union bpf_attr __user *uattr); int netns_bpf_prog_attach(const union bpf_attr *attr, struct bpf_prog *prog); -int netns_bpf_prog_detach(const union bpf_attr *attr); +int netns_bpf_prog_detach(const union bpf_attr *attr, enum bpf_prog_type ptype); int netns_bpf_link_create(const union bpf_attr *attr, struct bpf_prog *prog); #else @@ -49,7 +49,8 @@ static inline int netns_bpf_prog_attach(const union bpf_attr *attr, return -EOPNOTSUPP; } -static inline int netns_bpf_prog_detach(const union bpf_attr *attr) +static inline int netns_bpf_prog_detach(const union bpf_attr *attr, + enum bpf_prog_type ptype) { return -EOPNOTSUPP; } diff --git a/include/linux/bpf.h b/include/linux/bpf.h index 07052d44bca1..9750a1902ee5 100644 --- a/include/linux/bpf.h +++ b/include/linux/bpf.h @@ -1543,13 +1543,16 @@ static inline void bpf_map_offload_map_free(struct bpf_map *map) #endif /* CONFIG_NET && CONFIG_BPF_SYSCALL */ #if defined(CONFIG_BPF_STREAM_PARSER) -int sock_map_prog_update(struct bpf_map *map, struct bpf_prog *prog, u32 which); +int sock_map_prog_update(struct bpf_map *map, struct bpf_prog *prog, + struct bpf_prog *old, u32 which); int sock_map_get_from_fd(const union bpf_attr *attr, struct bpf_prog *prog); +int sock_map_prog_detach(const union bpf_attr *attr, enum bpf_prog_type ptype); void sock_map_unhash(struct sock *sk); void sock_map_close(struct sock *sk, long timeout); #else static inline int sock_map_prog_update(struct bpf_map *map, - struct bpf_prog *prog, u32 which) + struct bpf_prog *prog, + struct bpf_prog *old, u32 which) { return -EOPNOTSUPP; } @@ -1559,6 +1562,12 @@ static inline int sock_map_get_from_fd(const union bpf_attr *attr, { return -EINVAL; } + +static inline int sock_map_prog_detach(const union bpf_attr *attr, + enum bpf_prog_type ptype) +{ + return -EOPNOTSUPP; +} #endif /* CONFIG_BPF_STREAM_PARSER */ #if defined(CONFIG_INET) && defined(CONFIG_BPF_SYSCALL) diff --git a/include/linux/bpfilter.h b/include/linux/bpfilter.h index d815622cd31e..9b114c718a76 100644 --- a/include/linux/bpfilter.h +++ b/include/linux/bpfilter.h @@ -3,22 +3,23 @@ #define _LINUX_BPFILTER_H #include <uapi/linux/bpfilter.h> -#include <linux/umh.h> +#include <linux/usermode_driver.h> struct sock; int bpfilter_ip_set_sockopt(struct sock *sk, int optname, char __user *optval, unsigned int optlen); int bpfilter_ip_get_sockopt(struct sock *sk, int optname, char __user *optval, int __user *optlen); +void bpfilter_umh_cleanup(struct umd_info *info); + struct bpfilter_umh_ops { - struct umh_info info; + struct umd_info info; /* since ip_getsockopt() can run in parallel, serialize access to umh */ struct mutex lock; int (*sockopt)(struct sock *sk, int optname, char __user *optval, unsigned int optlen, bool is_set); int (*start)(void); - bool stop; }; extern struct bpfilter_umh_ops bpfilter_ops; #endif diff --git a/include/linux/bsearch.h b/include/linux/bsearch.h index 8ed53d7524ea..e66b711d091e 100644 --- a/include/linux/bsearch.h +++ b/include/linux/bsearch.h @@ -4,7 +4,29 @@ #include <linux/types.h> -void *bsearch(const void *key, const void *base, size_t num, size_t size, - cmp_func_t cmp); +static __always_inline +void *__inline_bsearch(const void *key, const void *base, size_t num, size_t size, cmp_func_t cmp) +{ + const char *pivot; + int result; + + while (num > 0) { + pivot = base + (num >> 1) * size; + result = cmp(key, pivot); + + if (result == 0) + return (void *)pivot; + + if (result > 0) { + base = pivot + size; + num--; + } + num >>= 1; + } + + return NULL; +} + +extern void *bsearch(const void *key, const void *base, size_t num, size_t size, cmp_func_t cmp); #endif /* _LINUX_BSEARCH_H */ diff --git a/include/linux/btf.h b/include/linux/btf.h index 5c1ea99b480f..8b81fbb4497c 100644 --- a/include/linux/btf.h +++ b/include/linux/btf.h @@ -82,6 +82,11 @@ static inline bool btf_type_is_int(const struct btf_type *t) return BTF_INFO_KIND(t->info) == BTF_KIND_INT; } +static inline bool btf_type_is_small_int(const struct btf_type *t) +{ + return btf_type_is_int(t) && t->size <= sizeof(u64); +} + static inline bool btf_type_is_enum(const struct btf_type *t) { return BTF_INFO_KIND(t->info) == BTF_KIND_ENUM; diff --git a/include/linux/buffer_head.h b/include/linux/buffer_head.h index 22fb11e2d2e0..6b47f94378c5 100644 --- a/include/linux/buffer_head.h +++ b/include/linux/buffer_head.h @@ -406,6 +406,7 @@ static inline int inode_has_buffers(struct inode *inode) { return 0; } static inline void invalidate_inode_buffers(struct inode *inode) {} static inline int remove_inode_buffers(struct inode *inode) { return 1; } static inline int sync_mapping_buffers(struct address_space *mapping) { return 0; } +#define buffer_heads_over_limit 0 #endif /* CONFIG_BLOCK */ #endif /* _LINUX_BUFFER_HEAD_H */ diff --git a/include/linux/can/skb.h b/include/linux/can/skb.h index a954def26c0d..900b9f4e0605 100644 --- a/include/linux/can/skb.h +++ b/include/linux/can/skb.h @@ -34,7 +34,7 @@ struct can_skb_priv { int ifindex; int skbcnt; - struct can_frame cf[0]; + struct can_frame cf[]; }; static inline struct can_skb_priv *can_skb_prv(struct sk_buff *skb) diff --git a/include/linux/capability.h b/include/linux/capability.h index b4345b38a6be..1e7fe311cabe 100644 --- a/include/linux/capability.h +++ b/include/linux/capability.h @@ -261,6 +261,12 @@ static inline bool bpf_capable(void) return capable(CAP_BPF) || capable(CAP_SYS_ADMIN); } +static inline bool checkpoint_restore_ns_capable(struct user_namespace *ns) +{ + return ns_capable(ns, CAP_CHECKPOINT_RESTORE) || + ns_capable(ns, CAP_SYS_ADMIN); +} + /* audit system wants to get cap info from files as well */ extern int get_vfs_caps_from_disk(const struct dentry *dentry, struct cpu_vfs_cap_data *cpu_caps); diff --git a/include/linux/cb710.h b/include/linux/cb710.h index 60de3fedd3a7..405657a9a0d5 100644 --- a/include/linux/cb710.h +++ b/include/linux/cb710.h @@ -36,7 +36,7 @@ struct cb710_chip { unsigned slot_mask; unsigned slots; spinlock_t irq_lock; - struct cb710_slot slot[0]; + struct cb710_slot slot[]; }; /* NOTE: cb710_chip.slots is modified only during device init/exit and diff --git a/include/linux/cdrom.h b/include/linux/cdrom.h index 8543fa59da72..f48d0a31deae 100644 --- a/include/linux/cdrom.h +++ b/include/linux/cdrom.h @@ -73,7 +73,6 @@ struct cdrom_device_ops { int (*drive_status) (struct cdrom_device_info *, int); unsigned int (*check_events) (struct cdrom_device_info *cdi, unsigned int clearing, int slot); - int (*media_changed) (struct cdrom_device_info *, int); int (*tray_move) (struct cdrom_device_info *, int); int (*lock_door) (struct cdrom_device_info *, int); int (*select_speed) (struct cdrom_device_info *, int); @@ -107,7 +106,6 @@ extern int cdrom_ioctl(struct cdrom_device_info *cdi, struct block_device *bdev, fmode_t mode, unsigned int cmd, unsigned long arg); extern unsigned int cdrom_check_events(struct cdrom_device_info *cdi, unsigned int clearing); -extern int cdrom_media_changed(struct cdrom_device_info *); extern int register_cdrom(struct gendisk *disk, struct cdrom_device_info *cdi); extern void unregister_cdrom(struct cdrom_device_info *cdi); diff --git a/include/linux/ceph/libceph.h b/include/linux/ceph/libceph.h index 2247e71beb83..e5ed1c541e7f 100644 --- a/include/linux/ceph/libceph.h +++ b/include/linux/ceph/libceph.h @@ -52,8 +52,7 @@ struct ceph_options { unsigned long osd_idle_ttl; /* jiffies */ unsigned long osd_keepalive_timeout; /* jiffies */ unsigned long osd_request_timeout; /* jiffies */ - - u32 osd_req_flags; /* CEPH_OSD_FLAG_*, applied to each OSD request */ + u32 read_from_replica; /* CEPH_OSD_FLAG_BALANCE/LOCALIZE_READS */ /* * any type that can't be simply compared or doesn't need @@ -76,6 +75,7 @@ struct ceph_options { #define CEPH_OSD_KEEPALIVE_DEFAULT msecs_to_jiffies(5 * 1000) #define CEPH_OSD_IDLE_TTL_DEFAULT msecs_to_jiffies(60 * 1000) #define CEPH_OSD_REQUEST_TIMEOUT_DEFAULT 0 /* no timeout */ +#define CEPH_READ_FROM_REPLICA_DEFAULT 0 /* read from primary */ #define CEPH_MONC_HUNT_INTERVAL msecs_to_jiffies(3 * 1000) #define CEPH_MONC_PING_INTERVAL msecs_to_jiffies(10 * 1000) diff --git a/include/linux/cgroup-defs.h b/include/linux/cgroup-defs.h index 52661155f85f..fee0b5547cd0 100644 --- a/include/linux/cgroup-defs.h +++ b/include/linux/cgroup-defs.h @@ -790,7 +790,9 @@ struct sock_cgroup_data { union { #ifdef __LITTLE_ENDIAN struct { - u8 is_data; + u8 is_data : 1; + u8 no_refcnt : 1; + u8 unused : 6; u8 padding; u16 prioidx; u32 classid; @@ -800,7 +802,9 @@ struct sock_cgroup_data { u32 classid; u16 prioidx; u8 padding; - u8 is_data; + u8 unused : 6; + u8 no_refcnt : 1; + u8 is_data : 1; } __packed; #endif u64 val; diff --git a/include/linux/cgroup.h b/include/linux/cgroup.h index 4598e4da6b1b..618838c48313 100644 --- a/include/linux/cgroup.h +++ b/include/linux/cgroup.h @@ -822,6 +822,7 @@ extern spinlock_t cgroup_sk_update_lock; void cgroup_sk_alloc_disable(void); void cgroup_sk_alloc(struct sock_cgroup_data *skcd); +void cgroup_sk_clone(struct sock_cgroup_data *skcd); void cgroup_sk_free(struct sock_cgroup_data *skcd); static inline struct cgroup *sock_cgroup_ptr(struct sock_cgroup_data *skcd) @@ -835,7 +836,7 @@ static inline struct cgroup *sock_cgroup_ptr(struct sock_cgroup_data *skcd) */ v = READ_ONCE(skcd->val); - if (v & 1) + if (v & 3) return &cgrp_dfl_root.cgrp; return (struct cgroup *)(unsigned long)v ?: &cgrp_dfl_root.cgrp; @@ -847,6 +848,7 @@ static inline struct cgroup *sock_cgroup_ptr(struct sock_cgroup_data *skcd) #else /* CONFIG_CGROUP_DATA */ static inline void cgroup_sk_alloc(struct sock_cgroup_data *skcd) {} +static inline void cgroup_sk_clone(struct sock_cgroup_data *skcd) {} static inline void cgroup_sk_free(struct sock_cgroup_data *skcd) {} #endif /* CONFIG_CGROUP_DATA */ diff --git a/include/linux/compiler-clang.h b/include/linux/compiler-clang.h index ee37256ec8bd..8a072d00e688 100644 --- a/include/linux/compiler-clang.h +++ b/include/linux/compiler-clang.h @@ -5,8 +5,6 @@ /* Compiler specific definitions for Clang compiler */ -#define uninitialized_var(x) x = *(&(x)) - /* same as gcc, this was present in clang-2.6 so we can assume it works * with any version that can compile the kernel */ @@ -33,6 +31,14 @@ #define __no_sanitize_thread #endif +#if __has_feature(undefined_behavior_sanitizer) +/* GCC does not have __SANITIZE_UNDEFINED__ */ +#define __no_sanitize_undefined \ + __attribute__((no_sanitize("undefined"))) +#else +#define __no_sanitize_undefined +#endif + /* * Not all versions of clang implement the the type-generic versions * of the builtin overflow checkers. Fortunately, clang implements diff --git a/include/linux/compiler-gcc.h b/include/linux/compiler-gcc.h index 7dd4e0349ef3..7a3769040d7d 100644 --- a/include/linux/compiler-gcc.h +++ b/include/linux/compiler-gcc.h @@ -11,7 +11,7 @@ + __GNUC_PATCHLEVEL__) /* https://gcc.gnu.org/bugzilla/show_bug.cgi?id=58145 */ -#if GCC_VERSION < 40800 +#if GCC_VERSION < 40900 # error Sorry, your compiler is too old - please upgrade it. #endif @@ -59,12 +59,6 @@ (typeof(ptr)) (__ptr + (off)); \ }) -/* - * A trick to suppress uninitialized variable warning without generating any - * code - */ -#define uninitialized_var(x) x = x - #ifdef CONFIG_RETPOLINE #define __noretpoline __attribute__((__indirect_branch__("keep"))) #endif @@ -150,6 +144,12 @@ #define __no_sanitize_thread #endif +#if __has_attribute(__no_sanitize_undefined__) +#define __no_sanitize_undefined __attribute__((no_sanitize_undefined)) +#else +#define __no_sanitize_undefined +#endif + #if GCC_VERSION >= 50100 #define COMPILER_HAS_GENERIC_BUILTIN_OVERFLOW 1 #endif diff --git a/include/linux/compiler.h b/include/linux/compiler.h index 30827f82ad62..6810d80acb0b 100644 --- a/include/linux/compiler.h +++ b/include/linux/compiler.h @@ -120,65 +120,12 @@ void ftrace_likely_update(struct ftrace_likely_data *f, int val, /* Annotate a C jump table to allow objtool to follow the code flow */ #define __annotate_jump_table __section(.rodata..c_jump_table) -#ifdef CONFIG_DEBUG_ENTRY -/* Begin/end of an instrumentation safe region */ -#define instrumentation_begin() ({ \ - asm volatile("%c0:\n\t" \ - ".pushsection .discard.instr_begin\n\t" \ - ".long %c0b - .\n\t" \ - ".popsection\n\t" : : "i" (__COUNTER__)); \ -}) - -/* - * Because instrumentation_{begin,end}() can nest, objtool validation considers - * _begin() a +1 and _end() a -1 and computes a sum over the instructions. - * When the value is greater than 0, we consider instrumentation allowed. - * - * There is a problem with code like: - * - * noinstr void foo() - * { - * instrumentation_begin(); - * ... - * if (cond) { - * instrumentation_begin(); - * ... - * instrumentation_end(); - * } - * bar(); - * instrumentation_end(); - * } - * - * If instrumentation_end() would be an empty label, like all the other - * annotations, the inner _end(), which is at the end of a conditional block, - * would land on the instruction after the block. - * - * If we then consider the sum of the !cond path, we'll see that the call to - * bar() is with a 0-value, even though, we meant it to happen with a positive - * value. - * - * To avoid this, have _end() be a NOP instruction, this ensures it will be - * part of the condition block and does not escape. - */ -#define instrumentation_end() ({ \ - asm volatile("%c0: nop\n\t" \ - ".pushsection .discard.instr_end\n\t" \ - ".long %c0b - .\n\t" \ - ".popsection\n\t" : : "i" (__COUNTER__)); \ -}) -#endif /* CONFIG_DEBUG_ENTRY */ - #else #define annotate_reachable() #define annotate_unreachable() #define __annotate_jump_table #endif -#ifndef instrumentation_begin -#define instrumentation_begin() do { } while(0) -#define instrumentation_end() do { } while(0) -#endif - #ifndef ASM_UNREACHABLE # define ASM_UNREACHABLE #endif @@ -230,28 +177,6 @@ void ftrace_likely_update(struct ftrace_likely_data *f, int val, # define __UNIQUE_ID(prefix) __PASTE(__PASTE(__UNIQUE_ID_, prefix), __LINE__) #endif -/* - * Prevent the compiler from merging or refetching reads or writes. The - * compiler is also forbidden from reordering successive instances of - * READ_ONCE and WRITE_ONCE, but only when the compiler is aware of some - * particular ordering. One way to make the compiler aware of ordering is to - * put the two invocations of READ_ONCE or WRITE_ONCE in different C - * statements. - * - * These two macros will also work on aggregate data types like structs or - * unions. - * - * Their two major use cases are: (1) Mediating communication between - * process-level code and irq/NMI handlers, all running on the same CPU, - * and (2) Ensuring that the compiler does not fold, spindle, or otherwise - * mutilate accesses that either do not require ordering or that interact - * with an explicit memory barrier or atomic instruction that provides the - * required ordering. - */ -#include <asm/barrier.h> -#include <linux/kasan-checks.h> -#include <linux/kcsan-checks.h> - /** * data_race - mark an expression as containing intentional data races * @@ -272,65 +197,6 @@ void ftrace_likely_update(struct ftrace_likely_data *f, int val, __v; \ }) -/* - * Use __READ_ONCE() instead of READ_ONCE() if you do not require any - * atomicity or dependency ordering guarantees. Note that this may result - * in tears! - */ -#define __READ_ONCE(x) (*(const volatile __unqual_scalar_typeof(x) *)&(x)) - -#define __READ_ONCE_SCALAR(x) \ -({ \ - __unqual_scalar_typeof(x) __x = __READ_ONCE(x); \ - smp_read_barrier_depends(); \ - (typeof(x))__x; \ -}) - -#define READ_ONCE(x) \ -({ \ - compiletime_assert_rwonce_type(x); \ - __READ_ONCE_SCALAR(x); \ -}) - -#define __WRITE_ONCE(x, val) \ -do { \ - *(volatile typeof(x) *)&(x) = (val); \ -} while (0) - -#define WRITE_ONCE(x, val) \ -do { \ - compiletime_assert_rwonce_type(x); \ - __WRITE_ONCE(x, val); \ -} while (0) - -static __no_sanitize_or_inline -unsigned long __read_once_word_nocheck(const void *addr) -{ - return __READ_ONCE(*(unsigned long *)addr); -} - -/* - * Use READ_ONCE_NOCHECK() instead of READ_ONCE() if you need to load a - * word from memory atomically but without telling KASAN/KCSAN. This is - * usually used by unwinding code when walking the stack of a running process. - */ -#define READ_ONCE_NOCHECK(x) \ -({ \ - unsigned long __x; \ - compiletime_assert(sizeof(x) == sizeof(__x), \ - "Unsupported access size for READ_ONCE_NOCHECK()."); \ - __x = __read_once_word_nocheck(&(x)); \ - smp_read_barrier_depends(); \ - (typeof(x))__x; \ -}) - -static __no_kasan_or_inline -unsigned long read_word_at_a_time(const void *addr) -{ - kasan_check_read(addr, 1); - return *(unsigned long *)addr; -} - #endif /* __KERNEL__ */ /* @@ -354,57 +220,6 @@ static inline void *offset_to_ptr(const int *off) #endif /* __ASSEMBLY__ */ -/* Compile time object size, -1 for unknown */ -#ifndef __compiletime_object_size -# define __compiletime_object_size(obj) -1 -#endif -#ifndef __compiletime_warning -# define __compiletime_warning(message) -#endif -#ifndef __compiletime_error -# define __compiletime_error(message) -#endif - -#ifdef __OPTIMIZE__ -# define __compiletime_assert(condition, msg, prefix, suffix) \ - do { \ - extern void prefix ## suffix(void) __compiletime_error(msg); \ - if (!(condition)) \ - prefix ## suffix(); \ - } while (0) -#else -# define __compiletime_assert(condition, msg, prefix, suffix) do { } while (0) -#endif - -#define _compiletime_assert(condition, msg, prefix, suffix) \ - __compiletime_assert(condition, msg, prefix, suffix) - -/** - * compiletime_assert - break build and emit msg if condition is false - * @condition: a compile-time constant condition to check - * @msg: a message to emit if condition is false - * - * In tradition of POSIX assert, this macro will break the build if the - * supplied condition is *false*, emitting the supplied error message if the - * compiler has support to do so. - */ -#define compiletime_assert(condition, msg) \ - _compiletime_assert(condition, msg, __compiletime_assert_, __COUNTER__) - -#define compiletime_assert_atomic_type(t) \ - compiletime_assert(__native_word(t), \ - "Need native word sized stores/loads for atomicity.") - -/* - * Yes, this permits 64-bit accesses on 32-bit architectures. These will - * actually be atomic in some cases (namely Armv7 + LPAE), but for others we - * rely on the access being split into 2x32-bit accesses for a 32-bit quantity - * (e.g. a virtual address) and a strong prevailing wind. - */ -#define compiletime_assert_rwonce_type(t) \ - compiletime_assert(__native_word(t) || sizeof(t) == sizeof(long long), \ - "Unsupported access size for {READ,WRITE}_ONCE().") - /* &a[0] degrades to a pointer: a different type from an array */ #define __must_be_array(a) BUILD_BUG_ON_ZERO(__same_type((a), &(a)[0])) @@ -414,4 +229,6 @@ static inline void *offset_to_ptr(const int *off) */ #define prevent_tail_call_optimization() mb() +#include <asm/rwonce.h> + #endif /* __LINUX_COMPILER_H */ diff --git a/include/linux/compiler_attributes.h b/include/linux/compiler_attributes.h index cdf016596659..6122efdad6ad 100644 --- a/include/linux/compiler_attributes.h +++ b/include/linux/compiler_attributes.h @@ -37,9 +37,11 @@ # define __GCC4_has_attribute___copy__ 0 # define __GCC4_has_attribute___designated_init__ 0 # define __GCC4_has_attribute___externally_visible__ 1 +# define __GCC4_has_attribute___no_caller_saved_registers__ 0 # define __GCC4_has_attribute___noclone__ 1 # define __GCC4_has_attribute___nonstring__ 0 # define __GCC4_has_attribute___no_sanitize_address__ (__GNUC_MINOR__ >= 8) +# define __GCC4_has_attribute___no_sanitize_undefined__ (__GNUC_MINOR__ >= 9) # define __GCC4_has_attribute___fallthrough__ 0 #endif @@ -176,6 +178,18 @@ #define __mode(x) __attribute__((__mode__(x))) /* + * Optional: only supported since gcc >= 7 + * + * gcc: https://gcc.gnu.org/onlinedocs/gcc/x86-Function-Attributes.html#index-no_005fcaller_005fsaved_005fregisters-function-attribute_002c-x86 + * clang: https://clang.llvm.org/docs/AttributeReference.html#no-caller-saved-registers + */ +#if __has_attribute(__no_caller_saved_registers__) +# define __no_caller_saved_registers __attribute__((__no_caller_saved_registers__)) +#else +# define __no_caller_saved_registers +#endif + +/* * Optional: not supported by clang * * gcc: https://gcc.gnu.org/onlinedocs/gcc/Common-Function-Attributes.html#index-noclone-function-attribute diff --git a/include/linux/compiler_types.h b/include/linux/compiler_types.h index 21aed0981edf..2e231ba8fe3f 100644 --- a/include/linux/compiler_types.h +++ b/include/linux/compiler_types.h @@ -5,20 +5,20 @@ #ifndef __ASSEMBLY__ #ifdef __CHECKER__ -# define __user __attribute__((noderef, address_space(1))) # define __kernel __attribute__((address_space(0))) +# define __user __attribute__((noderef, address_space(__user))) # define __safe __attribute__((safe)) # define __force __attribute__((force)) # define __nocast __attribute__((nocast)) -# define __iomem __attribute__((noderef, address_space(2))) +# define __iomem __attribute__((noderef, address_space(__iomem))) # define __must_hold(x) __attribute__((context(x,1,1))) # define __acquires(x) __attribute__((context(x,0,1))) # define __releases(x) __attribute__((context(x,1,0))) # define __acquire(x) __context__(x,1) # define __release(x) __context__(x,-1) # define __cond_lock(x,c) ((c) ? ({ __acquire(x); 1; }) : 0) -# define __percpu __attribute__((noderef, address_space(3))) -# define __rcu __attribute__((noderef, address_space(4))) +# define __percpu __attribute__((noderef, address_space(__percpu))) +# define __rcu __attribute__((noderef, address_space(__rcu))) # define __private __attribute__((noderef)) extern void __chk_user_ptr(const volatile void __user *); extern void __chk_io_ptr(const volatile void __iomem *); @@ -118,10 +118,6 @@ struct ftrace_likely_data { #define notrace __attribute__((__no_instrument_function__)) #endif -/* Section for code which can't be instrumented at all */ -#define noinstr \ - noinline notrace __attribute((__section__(".noinstr.text"))) - /* * it doesn't make sense on ARM (currently the only user of __naked) * to trace naked functions because then mcount is called without @@ -193,16 +189,18 @@ struct ftrace_likely_data { #define __no_kcsan __no_sanitize_thread #ifdef __SANITIZE_THREAD__ -# define __no_kcsan_or_inline __no_kcsan notrace __maybe_unused -# define __no_sanitize_or_inline __no_kcsan_or_inline -#else -# define __no_kcsan_or_inline __always_inline +# define __no_sanitize_or_inline __no_kcsan notrace __maybe_unused #endif #ifndef __no_sanitize_or_inline #define __no_sanitize_or_inline __always_inline #endif +/* Section for code which can't be instrumented at all */ +#define noinstr \ + noinline notrace __attribute((__section__(".noinstr.text"))) \ + __no_kcsan __no_sanitize_address + #endif /* __KERNEL__ */ #endif /* __ASSEMBLY__ */ @@ -254,32 +252,8 @@ struct ftrace_likely_data { * __unqual_scalar_typeof(x) - Declare an unqualified scalar type, leaving * non-scalar types unchanged. */ -#if (defined(CONFIG_CC_IS_GCC) && CONFIG_GCC_VERSION < 40900) || defined(__CHECKER__) -/* - * We build this out of a couple of helper macros in a vain attempt to - * help you keep your lunch down while reading it. - */ -#define __pick_scalar_type(x, type, otherwise) \ - __builtin_choose_expr(__same_type(x, type), (type)0, otherwise) - -/* - * 'char' is not type-compatible with either 'signed char' or 'unsigned char', - * so we include the naked type here as well as the signed/unsigned variants. - */ -#define __pick_integer_type(x, type, otherwise) \ - __pick_scalar_type(x, type, \ - __pick_scalar_type(x, unsigned type, \ - __pick_scalar_type(x, signed type, otherwise))) - -#define __unqual_scalar_typeof(x) typeof( \ - __pick_integer_type(x, char, \ - __pick_integer_type(x, short, \ - __pick_integer_type(x, int, \ - __pick_integer_type(x, long, \ - __pick_integer_type(x, long long, x)))))) -#else /* - * If supported, prefer C11 _Generic for better compile-times. As above, 'char' + * Prefer C11 _Generic for better compile-times and simpler code. Note: 'char' * is not type-compatible with 'signed char', and we define a separate case. */ #define __scalar_type_to_expr_cases(type) \ @@ -295,13 +269,53 @@ struct ftrace_likely_data { __scalar_type_to_expr_cases(long), \ __scalar_type_to_expr_cases(long long), \ default: (x))) -#endif /* Is this type a native word size -- useful for atomic operations */ #define __native_word(t) \ (sizeof(t) == sizeof(char) || sizeof(t) == sizeof(short) || \ sizeof(t) == sizeof(int) || sizeof(t) == sizeof(long)) +/* Compile time object size, -1 for unknown */ +#ifndef __compiletime_object_size +# define __compiletime_object_size(obj) -1 +#endif +#ifndef __compiletime_warning +# define __compiletime_warning(message) +#endif +#ifndef __compiletime_error +# define __compiletime_error(message) +#endif + +#ifdef __OPTIMIZE__ +# define __compiletime_assert(condition, msg, prefix, suffix) \ + do { \ + extern void prefix ## suffix(void) __compiletime_error(msg); \ + if (!(condition)) \ + prefix ## suffix(); \ + } while (0) +#else +# define __compiletime_assert(condition, msg, prefix, suffix) do { } while (0) +#endif + +#define _compiletime_assert(condition, msg, prefix, suffix) \ + __compiletime_assert(condition, msg, prefix, suffix) + +/** + * compiletime_assert - break build and emit msg if condition is false + * @condition: a compile-time constant condition to check + * @msg: a message to emit if condition is false + * + * In tradition of POSIX assert, this macro will break the build if the + * supplied condition is *false*, emitting the supplied error message if the + * compiler has support to do so. + */ +#define compiletime_assert(condition, msg) \ + _compiletime_assert(condition, msg, __compiletime_assert_, __COUNTER__) + +#define compiletime_assert_atomic_type(t) \ + compiletime_assert(__native_word(t), \ + "Need native word sized stores/loads for atomicity.") + /* Helpers for emitting diagnostics in pragmas. */ #ifndef __diag #define __diag(string) diff --git a/include/linux/context_tracking.h b/include/linux/context_tracking.h index 8cac62ee6add..d53cd331c4dd 100644 --- a/include/linux/context_tracking.h +++ b/include/linux/context_tracking.h @@ -5,6 +5,8 @@ #include <linux/sched.h> #include <linux/vtime.h> #include <linux/context_tracking_state.h> +#include <linux/instrumentation.h> + #include <asm/ptrace.h> @@ -33,13 +35,13 @@ static inline void user_exit(void) } /* Called with interrupts disabled. */ -static inline void user_enter_irqoff(void) +static __always_inline void user_enter_irqoff(void) { if (context_tracking_enabled()) __context_tracking_enter(CONTEXT_USER); } -static inline void user_exit_irqoff(void) +static __always_inline void user_exit_irqoff(void) { if (context_tracking_enabled()) __context_tracking_exit(CONTEXT_USER); @@ -75,7 +77,7 @@ static inline void exception_exit(enum ctx_state prev_ctx) * is enabled. If context tracking is disabled, returns * CONTEXT_DISABLED. This should be used primarily for debugging. */ -static inline enum ctx_state ct_state(void) +static __always_inline enum ctx_state ct_state(void) { return context_tracking_enabled() ? this_cpu_read(context_tracking.state) : CONTEXT_DISABLED; diff --git a/include/linux/context_tracking_state.h b/include/linux/context_tracking_state.h index e7fe6678b7ad..65a60d3313b0 100644 --- a/include/linux/context_tracking_state.h +++ b/include/linux/context_tracking_state.h @@ -26,12 +26,12 @@ struct context_tracking { extern struct static_key_false context_tracking_key; DECLARE_PER_CPU(struct context_tracking, context_tracking); -static inline bool context_tracking_enabled(void) +static __always_inline bool context_tracking_enabled(void) { return static_branch_unlikely(&context_tracking_key); } -static inline bool context_tracking_enabled_cpu(int cpu) +static __always_inline bool context_tracking_enabled_cpu(int cpu) { return context_tracking_enabled() && per_cpu(context_tracking.active, cpu); } @@ -41,7 +41,7 @@ static inline bool context_tracking_enabled_this_cpu(void) return context_tracking_enabled() && __this_cpu_read(context_tracking.active); } -static inline bool context_tracking_in_user(void) +static __always_inline bool context_tracking_in_user(void) { return __this_cpu_read(context_tracking.state) == CONTEXT_USER; } diff --git a/include/linux/cpu.h b/include/linux/cpu.h index 52692587f7fe..8aa84c052fdf 100644 --- a/include/linux/cpu.h +++ b/include/linux/cpu.h @@ -64,6 +64,7 @@ extern ssize_t cpu_show_tsx_async_abort(struct device *dev, char *buf); extern ssize_t cpu_show_itlb_multihit(struct device *dev, struct device_attribute *attr, char *buf); +extern ssize_t cpu_show_srbds(struct device *dev, struct device_attribute *attr, char *buf); extern __printf(4, 5) struct device *cpu_device_create(struct device *parent, void *drvdata, diff --git a/include/linux/cpu_cooling.h b/include/linux/cpu_cooling.h index 65501d8f9778..a3bdc8a98f2c 100644 --- a/include/linux/cpu_cooling.h +++ b/include/linux/cpu_cooling.h @@ -63,18 +63,10 @@ of_cpufreq_cooling_register(struct cpufreq_policy *policy) struct cpuidle_driver; #ifdef CONFIG_CPU_IDLE_THERMAL -int cpuidle_cooling_register(struct cpuidle_driver *drv); -int cpuidle_of_cooling_register(struct device_node *np, - struct cpuidle_driver *drv); +void cpuidle_cooling_register(struct cpuidle_driver *drv); #else /* CONFIG_CPU_IDLE_THERMAL */ -static inline int cpuidle_cooling_register(struct cpuidle_driver *drv) +static inline void cpuidle_cooling_register(struct cpuidle_driver *drv) { - return 0; -} -static inline int cpuidle_of_cooling_register(struct device_node *np, - struct cpuidle_driver *drv) -{ - return 0; } #endif /* CONFIG_CPU_IDLE_THERMAL */ diff --git a/include/linux/cpufreq.h b/include/linux/cpufreq.h index 3494f6763597..e62b022cb07e 100644 --- a/include/linux/cpufreq.h +++ b/include/linux/cpufreq.h @@ -577,6 +577,20 @@ unsigned int cpufreq_policy_transition_delay_us(struct cpufreq_policy *policy); int cpufreq_register_governor(struct cpufreq_governor *governor); void cpufreq_unregister_governor(struct cpufreq_governor *governor); +#define cpufreq_governor_init(__governor) \ +static int __init __governor##_init(void) \ +{ \ + return cpufreq_register_governor(&__governor); \ +} \ +core_initcall(__governor##_init) + +#define cpufreq_governor_exit(__governor) \ +static void __exit __governor##_exit(void) \ +{ \ + return cpufreq_unregister_governor(&__governor); \ +} \ +module_exit(__governor##_exit) + struct cpufreq_governor *cpufreq_default_governor(void); struct cpufreq_governor *cpufreq_fallback_governor(void); diff --git a/include/linux/cpuidle.h b/include/linux/cpuidle.h index ec2ef63771f0..b65909ae4e20 100644 --- a/include/linux/cpuidle.h +++ b/include/linux/cpuidle.h @@ -65,10 +65,13 @@ struct cpuidle_state { * CPUs execute ->enter_s2idle with the local tick or entire timekeeping * suspended, so it must not re-enable interrupts at any point (even * temporarily) or attempt to change states of clock event devices. + * + * This callback may point to the same function as ->enter if all of + * the above requirements are met by it. */ - void (*enter_s2idle) (struct cpuidle_device *dev, - struct cpuidle_driver *drv, - int index); + int (*enter_s2idle)(struct cpuidle_device *dev, + struct cpuidle_driver *drv, + int index); }; /* Idle State Flags */ diff --git a/include/linux/crypto.h b/include/linux/crypto.h index 763863dbc079..ef90e07c9635 100644 --- a/include/linux/crypto.h +++ b/include/linux/crypto.h @@ -16,9 +16,8 @@ #include <linux/kernel.h> #include <linux/list.h> #include <linux/bug.h> +#include <linux/refcount.h> #include <linux/slab.h> -#include <linux/string.h> -#include <linux/uaccess.h> #include <linux/completion.h> /* @@ -61,8 +60,8 @@ #define CRYPTO_ALG_ASYNC 0x00000080 /* - * Set this bit if and only if the algorithm requires another algorithm of - * the same type to handle corner cases. + * Set if the algorithm (or an algorithm which it uses) requires another + * algorithm of the same type to handle corner cases. */ #define CRYPTO_ALG_NEED_FALLBACK 0x00000100 @@ -102,6 +101,38 @@ #define CRYPTO_NOLOAD 0x00008000 /* + * The algorithm may allocate memory during request processing, i.e. during + * encryption, decryption, or hashing. Users can request an algorithm with this + * flag unset if they can't handle memory allocation failures. + * + * This flag is currently only implemented for algorithms of type "skcipher", + * "aead", "ahash", "shash", and "cipher". Algorithms of other types might not + * have this flag set even if they allocate memory. + * + * In some edge cases, algorithms can allocate memory regardless of this flag. + * To avoid these cases, users must obey the following usage constraints: + * skcipher: + * - The IV buffer and all scatterlist elements must be aligned to the + * algorithm's alignmask. + * - If the data were to be divided into chunks of size + * crypto_skcipher_walksize() (with any remainder going at the end), no + * chunk can cross a page boundary or a scatterlist element boundary. + * aead: + * - The IV buffer and all scatterlist elements must be aligned to the + * algorithm's alignmask. + * - The first scatterlist element must contain all the associated data, + * and its pages must be !PageHighMem. + * - If the plaintext/ciphertext were to be divided into chunks of size + * crypto_aead_walksize() (with the remainder going at the end), no chunk + * can cross a page boundary or a scatterlist element boundary. + * ahash: + * - The result buffer must be aligned to the algorithm's alignmask. + * - crypto_ahash_finup() must not be used unless the algorithm implements + * ->finup() natively. + */ +#define CRYPTO_ALG_ALLOCATES_MEMORY 0x00010000 + +/* * Transform masks and values (for crt_flags). */ #define CRYPTO_TFM_NEED_KEY 0x00000001 @@ -595,6 +626,8 @@ int crypto_has_alg(const char *name, u32 type, u32 mask); struct crypto_tfm { u32 crt_flags; + + int node; void (*exit)(struct crypto_tfm *tfm); diff --git a/include/linux/dasd_mod.h b/include/linux/dasd_mod.h index d39abad2ff6e..14e6cf8c6267 100644 --- a/include/linux/dasd_mod.h +++ b/include/linux/dasd_mod.h @@ -4,6 +4,8 @@ #include <asm/dasd.h> +struct gendisk; + extern int dasd_biodasdinfo(struct gendisk *disk, dasd_information2_t *info); #endif diff --git a/include/linux/debug_locks.h b/include/linux/debug_locks.h index 257ab3c92cb8..e7e45f0cc7da 100644 --- a/include/linux/debug_locks.h +++ b/include/linux/debug_locks.h @@ -12,7 +12,7 @@ extern int debug_locks __read_mostly; extern int debug_locks_silent __read_mostly; -static inline int __debug_locks_off(void) +static __always_inline int __debug_locks_off(void) { return xchg(&debug_locks, 0); } diff --git a/include/linux/decompress/unzstd.h b/include/linux/decompress/unzstd.h new file mode 100644 index 000000000000..56d539ae880f --- /dev/null +++ b/include/linux/decompress/unzstd.h @@ -0,0 +1,11 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef LINUX_DECOMPRESS_UNZSTD_H +#define LINUX_DECOMPRESS_UNZSTD_H + +int unzstd(unsigned char *inbuf, long len, + long (*fill)(void*, unsigned long), + long (*flush)(void*, unsigned long), + unsigned char *output, + long *pos, + void (*error_fn)(char *x)); +#endif diff --git a/include/linux/devfreq.h b/include/linux/devfreq.h index 57e871a559a9..12782fbb4c25 100644 --- a/include/linux/devfreq.h +++ b/include/linux/devfreq.h @@ -31,6 +31,13 @@ #define DEVFREQ_PRECHANGE (0) #define DEVFREQ_POSTCHANGE (1) +/* DEVFREQ work timers */ +enum devfreq_timer { + DEVFREQ_TIMER_DEFERRABLE = 0, + DEVFREQ_TIMER_DELAYED, + DEVFREQ_TIMER_NUM, +}; + struct devfreq; struct devfreq_governor; @@ -70,6 +77,7 @@ struct devfreq_dev_status { * @initial_freq: The operating frequency when devfreq_add_device() is * called. * @polling_ms: The polling interval in ms. 0 disables polling. + * @timer: Timer type is either deferrable or delayed timer. * @target: The device should set its operating frequency at * freq or lowest-upper-than-freq value. If freq is * higher than any operable frequency, set maximum. @@ -96,6 +104,7 @@ struct devfreq_dev_status { struct devfreq_dev_profile { unsigned long initial_freq; unsigned int polling_ms; + enum devfreq_timer timer; int (*target)(struct device *dev, unsigned long *freq, u32 flags); int (*get_dev_status)(struct device *dev, diff --git a/include/linux/device-mapper.h b/include/linux/device-mapper.h index 8750f2dc5613..93096e524e43 100644 --- a/include/linux/device-mapper.h +++ b/include/linux/device-mapper.h @@ -322,12 +322,6 @@ struct dm_target { bool discards_supported:1; }; -/* Each target can link one of these into the table */ -struct dm_target_callbacks { - struct list_head list; - int (*congested_fn) (struct dm_target_callbacks *, int); -}; - void *dm_per_bio_data(struct bio *bio, size_t data_size); struct bio *dm_bio_from_per_bio_data(void *data, size_t data_size); unsigned dm_bio_get_target_bio_nr(const struct bio *bio); @@ -426,6 +420,7 @@ const char *dm_device_name(struct mapped_device *md); int dm_copy_name_and_uuid(struct mapped_device *md, char *name, char *uuid); struct gendisk *dm_disk(struct mapped_device *md); int dm_suspended(struct dm_target *ti); +int dm_post_suspending(struct dm_target *ti); int dm_noflush_suspending(struct dm_target *ti); void dm_accept_partial_bio(struct bio *bio, unsigned n_sectors); union map_info *dm_get_rq_mapinfo(struct request *rq); @@ -478,11 +473,6 @@ int dm_table_add_target(struct dm_table *t, const char *type, sector_t start, sector_t len, char *params); /* - * Target_ctr should call this if it needs to add any callbacks. - */ -void dm_table_add_target_callbacks(struct dm_table *t, struct dm_target_callbacks *cb); - -/* * Target can use this to set the table's type. * Can only ever be called from a target's ctr. * Useful for "hybrid" target (supports both bio-based diff --git a/include/linux/device.h b/include/linux/device.h index 15460a5ac024..06d3b7622082 100644 --- a/include/linux/device.h +++ b/include/linux/device.h @@ -13,6 +13,7 @@ #define _DEVICE_H_ #include <linux/dev_printk.h> +#include <linux/energy_model.h> #include <linux/ioport.h> #include <linux/kobject.h> #include <linux/klist.h> @@ -433,7 +434,8 @@ enum dl_dev_state { * @suppliers: List of links to supplier devices. * @consumers: List of links to consumer devices. * @needs_suppliers: Hook to global list of devices waiting for suppliers. - * @defer_sync: Hook to global list of devices that have deferred sync_state. + * @defer_hook: Hook to global list of devices that have deferred sync_state or + * deferred fw_devlink. * @need_for_probe: If needs_suppliers is on a list, this indicates if the * suppliers are needed for probe or not. * @status: Driver status information. @@ -442,7 +444,7 @@ struct dev_links_info { struct list_head suppliers; struct list_head consumers; struct list_head needs_suppliers; - struct list_head defer_sync; + struct list_head defer_hook; bool need_for_probe; enum dl_dev_state status; }; @@ -523,6 +525,11 @@ struct dev_links_info { * sync_state() callback. * @dma_coherent: this particular device is dma coherent, even if the * architecture supports non-coherent devices. + * @dma_ops_bypass: If set to %true then the dma_ops are bypassed for the + * streaming DMA operations (->map_* / ->unmap_* / ->sync_*), + * and optionall (if the coherent mask is large enough) also + * for dma allocations. This flag is managed by the dma ops + * instance from ->dma_supported. * * At the lowest level, every device in a Linux system is represented by an * instance of struct device. The device structure contains the information @@ -559,6 +566,10 @@ struct device { struct dev_pm_info power; struct dev_pm_domain *pm_domain; +#ifdef CONFIG_ENERGY_MODEL + struct em_perf_domain *em_pd; +#endif + #ifdef CONFIG_GENERIC_MSI_IRQ_DOMAIN struct irq_domain *msi_domain; #endif @@ -568,8 +579,9 @@ struct device { #ifdef CONFIG_GENERIC_MSI_IRQ struct list_head msi_list; #endif - +#ifdef CONFIG_DMA_OPS const struct dma_map_ops *dma_ops; +#endif u64 *dma_mask; /* dma mask (if dma'able device) */ u64 coherent_dma_mask;/* Like dma_mask, but for alloc_coherent mappings as @@ -622,6 +634,9 @@ struct device { defined(CONFIG_ARCH_HAS_SYNC_DMA_FOR_CPU_ALL) bool dma_coherent:1; #endif +#ifdef CONFIG_DMA_OPS_BYPASS + bool dma_ops_bypass : 1; +#endif }; static inline struct device *kobj_to_dev(struct kobject *kobj) diff --git a/include/linux/dma-buf.h b/include/linux/dma-buf.h index ab0c156abee6..a2ca294eaebe 100644 --- a/include/linux/dma-buf.h +++ b/include/linux/dma-buf.h @@ -311,6 +311,7 @@ struct dma_buf { void *vmap_ptr; const char *exp_name; const char *name; + spinlock_t name_lock; /* spinlock to protect name access */ struct module *owner; struct list_head list_node; void *priv; diff --git a/include/linux/dma-direct.h b/include/linux/dma-direct.h index 136f984df0d9..5a3ce2a24794 100644 --- a/include/linux/dma-direct.h +++ b/include/linux/dma-direct.h @@ -1,10 +1,16 @@ /* SPDX-License-Identifier: GPL-2.0 */ +/* + * Internals of the DMA direct mapping implementation. Only for use by the + * DMA mapping code and IOMMU drivers. + */ #ifndef _LINUX_DMA_DIRECT_H #define _LINUX_DMA_DIRECT_H 1 #include <linux/dma-mapping.h> +#include <linux/dma-noncoherent.h> #include <linux/memblock.h> /* for min_low_pfn */ #include <linux/mem_encrypt.h> +#include <linux/swiotlb.h> extern unsigned int zone_dma_bits; @@ -69,6 +75,7 @@ static inline bool dma_capable(struct device *dev, dma_addr_t addr, size_t size, u64 dma_direct_get_required_mask(struct device *dev); gfp_t dma_direct_optimal_gfp_mask(struct device *dev, u64 dma_mask, u64 *phys_mask); +bool dma_coherent_ok(struct device *dev, phys_addr_t phys, size_t size); void *dma_direct_alloc(struct device *dev, size_t size, dma_addr_t *dma_handle, gfp_t gfp, unsigned long attrs); void dma_direct_free(struct device *dev, size_t size, void *cpu_addr, @@ -77,8 +84,6 @@ void *dma_direct_alloc_pages(struct device *dev, size_t size, dma_addr_t *dma_handle, gfp_t gfp, unsigned long attrs); void dma_direct_free_pages(struct device *dev, size_t size, void *cpu_addr, dma_addr_t dma_addr, unsigned long attrs); -struct page *__dma_direct_alloc_pages(struct device *dev, size_t size, - gfp_t gfp, unsigned long attrs); int dma_direct_get_sgtable(struct device *dev, struct sg_table *sgt, void *cpu_addr, dma_addr_t dma_addr, size_t size, unsigned long attrs); @@ -87,4 +92,103 @@ int dma_direct_mmap(struct device *dev, struct vm_area_struct *vma, void *cpu_addr, dma_addr_t dma_addr, size_t size, unsigned long attrs); int dma_direct_supported(struct device *dev, u64 mask); +bool dma_direct_need_sync(struct device *dev, dma_addr_t dma_addr); +int dma_direct_map_sg(struct device *dev, struct scatterlist *sgl, int nents, + enum dma_data_direction dir, unsigned long attrs); +dma_addr_t dma_direct_map_resource(struct device *dev, phys_addr_t paddr, + size_t size, enum dma_data_direction dir, unsigned long attrs); +size_t dma_direct_max_mapping_size(struct device *dev); + +#if defined(CONFIG_ARCH_HAS_SYNC_DMA_FOR_DEVICE) || \ + defined(CONFIG_SWIOTLB) +void dma_direct_sync_sg_for_device(struct device *dev, struct scatterlist *sgl, + int nents, enum dma_data_direction dir); +#else +static inline void dma_direct_sync_sg_for_device(struct device *dev, + struct scatterlist *sgl, int nents, enum dma_data_direction dir) +{ +} +#endif + +#if defined(CONFIG_ARCH_HAS_SYNC_DMA_FOR_CPU) || \ + defined(CONFIG_ARCH_HAS_SYNC_DMA_FOR_CPU_ALL) || \ + defined(CONFIG_SWIOTLB) +void dma_direct_unmap_sg(struct device *dev, struct scatterlist *sgl, + int nents, enum dma_data_direction dir, unsigned long attrs); +void dma_direct_sync_sg_for_cpu(struct device *dev, + struct scatterlist *sgl, int nents, enum dma_data_direction dir); +#else +static inline void dma_direct_unmap_sg(struct device *dev, + struct scatterlist *sgl, int nents, enum dma_data_direction dir, + unsigned long attrs) +{ +} +static inline void dma_direct_sync_sg_for_cpu(struct device *dev, + struct scatterlist *sgl, int nents, enum dma_data_direction dir) +{ +} +#endif + +static inline void dma_direct_sync_single_for_device(struct device *dev, + dma_addr_t addr, size_t size, enum dma_data_direction dir) +{ + phys_addr_t paddr = dma_to_phys(dev, addr); + + if (unlikely(is_swiotlb_buffer(paddr))) + swiotlb_tbl_sync_single(dev, paddr, size, dir, SYNC_FOR_DEVICE); + + if (!dev_is_dma_coherent(dev)) + arch_sync_dma_for_device(paddr, size, dir); +} + +static inline void dma_direct_sync_single_for_cpu(struct device *dev, + dma_addr_t addr, size_t size, enum dma_data_direction dir) +{ + phys_addr_t paddr = dma_to_phys(dev, addr); + + if (!dev_is_dma_coherent(dev)) { + arch_sync_dma_for_cpu(paddr, size, dir); + arch_sync_dma_for_cpu_all(); + } + + if (unlikely(is_swiotlb_buffer(paddr))) + swiotlb_tbl_sync_single(dev, paddr, size, dir, SYNC_FOR_CPU); +} + +static inline dma_addr_t dma_direct_map_page(struct device *dev, + struct page *page, unsigned long offset, size_t size, + enum dma_data_direction dir, unsigned long attrs) +{ + phys_addr_t phys = page_to_phys(page) + offset; + dma_addr_t dma_addr = phys_to_dma(dev, phys); + + if (unlikely(swiotlb_force == SWIOTLB_FORCE)) + return swiotlb_map(dev, phys, size, dir, attrs); + + if (unlikely(!dma_capable(dev, dma_addr, size, true))) { + if (swiotlb_force != SWIOTLB_NO_FORCE) + return swiotlb_map(dev, phys, size, dir, attrs); + + dev_WARN_ONCE(dev, 1, + "DMA addr %pad+%zu overflow (mask %llx, bus limit %llx).\n", + &dma_addr, size, *dev->dma_mask, dev->bus_dma_limit); + return DMA_MAPPING_ERROR; + } + + if (!dev_is_dma_coherent(dev) && !(attrs & DMA_ATTR_SKIP_CPU_SYNC)) + arch_sync_dma_for_device(phys, size, dir); + return dma_addr; +} + +static inline void dma_direct_unmap_page(struct device *dev, dma_addr_t addr, + size_t size, enum dma_data_direction dir, unsigned long attrs) +{ + phys_addr_t phys = dma_to_phys(dev, addr); + + if (!(attrs & DMA_ATTR_SKIP_CPU_SYNC)) + dma_direct_sync_single_for_cpu(dev, addr, size, dir); + + if (unlikely(is_swiotlb_buffer(phys))) + swiotlb_tbl_unmap_single(dev, phys, size, size, dir, attrs); +} #endif /* _LINUX_DMA_DIRECT_H */ diff --git a/include/linux/dma-mapping.h b/include/linux/dma-mapping.h index 78f677cf45ab..39da883c8619 100644 --- a/include/linux/dma-mapping.h +++ b/include/linux/dma-mapping.h @@ -188,76 +188,10 @@ static inline int dma_mmap_from_global_coherent(struct vm_area_struct *vma, } #endif /* CONFIG_DMA_DECLARE_COHERENT */ -static inline bool dma_is_direct(const struct dma_map_ops *ops) -{ - return likely(!ops); -} - -/* - * All the dma_direct_* declarations are here just for the indirect call bypass, - * and must not be used directly drivers! - */ -dma_addr_t dma_direct_map_page(struct device *dev, struct page *page, - unsigned long offset, size_t size, enum dma_data_direction dir, - unsigned long attrs); -int dma_direct_map_sg(struct device *dev, struct scatterlist *sgl, int nents, - enum dma_data_direction dir, unsigned long attrs); -dma_addr_t dma_direct_map_resource(struct device *dev, phys_addr_t paddr, - size_t size, enum dma_data_direction dir, unsigned long attrs); - -#if defined(CONFIG_ARCH_HAS_SYNC_DMA_FOR_DEVICE) || \ - defined(CONFIG_SWIOTLB) -void dma_direct_sync_single_for_device(struct device *dev, - dma_addr_t addr, size_t size, enum dma_data_direction dir); -void dma_direct_sync_sg_for_device(struct device *dev, - struct scatterlist *sgl, int nents, enum dma_data_direction dir); -#else -static inline void dma_direct_sync_single_for_device(struct device *dev, - dma_addr_t addr, size_t size, enum dma_data_direction dir) -{ -} -static inline void dma_direct_sync_sg_for_device(struct device *dev, - struct scatterlist *sgl, int nents, enum dma_data_direction dir) -{ -} -#endif - -#if defined(CONFIG_ARCH_HAS_SYNC_DMA_FOR_CPU) || \ - defined(CONFIG_ARCH_HAS_SYNC_DMA_FOR_CPU_ALL) || \ - defined(CONFIG_SWIOTLB) -void dma_direct_unmap_page(struct device *dev, dma_addr_t addr, - size_t size, enum dma_data_direction dir, unsigned long attrs); -void dma_direct_unmap_sg(struct device *dev, struct scatterlist *sgl, - int nents, enum dma_data_direction dir, unsigned long attrs); -void dma_direct_sync_single_for_cpu(struct device *dev, - dma_addr_t addr, size_t size, enum dma_data_direction dir); -void dma_direct_sync_sg_for_cpu(struct device *dev, - struct scatterlist *sgl, int nents, enum dma_data_direction dir); -#else -static inline void dma_direct_unmap_page(struct device *dev, dma_addr_t addr, - size_t size, enum dma_data_direction dir, unsigned long attrs) -{ -} -static inline void dma_direct_unmap_sg(struct device *dev, - struct scatterlist *sgl, int nents, enum dma_data_direction dir, - unsigned long attrs) -{ -} -static inline void dma_direct_sync_single_for_cpu(struct device *dev, - dma_addr_t addr, size_t size, enum dma_data_direction dir) -{ -} -static inline void dma_direct_sync_sg_for_cpu(struct device *dev, - struct scatterlist *sgl, int nents, enum dma_data_direction dir) -{ -} -#endif - -size_t dma_direct_max_mapping_size(struct device *dev); - #ifdef CONFIG_HAS_DMA #include <asm/dma-mapping.h> +#ifdef CONFIG_DMA_OPS static inline const struct dma_map_ops *get_dma_ops(struct device *dev) { if (dev->dma_ops) @@ -270,165 +204,16 @@ static inline void set_dma_ops(struct device *dev, { dev->dma_ops = dma_ops; } - -static inline dma_addr_t dma_map_page_attrs(struct device *dev, - struct page *page, size_t offset, size_t size, - enum dma_data_direction dir, unsigned long attrs) -{ - const struct dma_map_ops *ops = get_dma_ops(dev); - dma_addr_t addr; - - BUG_ON(!valid_dma_direction(dir)); - if (dma_is_direct(ops)) - addr = dma_direct_map_page(dev, page, offset, size, dir, attrs); - else - addr = ops->map_page(dev, page, offset, size, dir, attrs); - debug_dma_map_page(dev, page, offset, size, dir, addr); - - return addr; -} - -static inline void dma_unmap_page_attrs(struct device *dev, dma_addr_t addr, - size_t size, enum dma_data_direction dir, unsigned long attrs) -{ - const struct dma_map_ops *ops = get_dma_ops(dev); - - BUG_ON(!valid_dma_direction(dir)); - if (dma_is_direct(ops)) - dma_direct_unmap_page(dev, addr, size, dir, attrs); - else if (ops->unmap_page) - ops->unmap_page(dev, addr, size, dir, attrs); - debug_dma_unmap_page(dev, addr, size, dir); -} - -/* - * dma_maps_sg_attrs returns 0 on error and > 0 on success. - * It should never return a value < 0. - */ -static inline int dma_map_sg_attrs(struct device *dev, struct scatterlist *sg, - int nents, enum dma_data_direction dir, - unsigned long attrs) -{ - const struct dma_map_ops *ops = get_dma_ops(dev); - int ents; - - BUG_ON(!valid_dma_direction(dir)); - if (dma_is_direct(ops)) - ents = dma_direct_map_sg(dev, sg, nents, dir, attrs); - else - ents = ops->map_sg(dev, sg, nents, dir, attrs); - BUG_ON(ents < 0); - debug_dma_map_sg(dev, sg, nents, ents, dir); - - return ents; -} - -static inline void dma_unmap_sg_attrs(struct device *dev, struct scatterlist *sg, - int nents, enum dma_data_direction dir, - unsigned long attrs) -{ - const struct dma_map_ops *ops = get_dma_ops(dev); - - BUG_ON(!valid_dma_direction(dir)); - debug_dma_unmap_sg(dev, sg, nents, dir); - if (dma_is_direct(ops)) - dma_direct_unmap_sg(dev, sg, nents, dir, attrs); - else if (ops->unmap_sg) - ops->unmap_sg(dev, sg, nents, dir, attrs); -} - -static inline dma_addr_t dma_map_resource(struct device *dev, - phys_addr_t phys_addr, - size_t size, - enum dma_data_direction dir, - unsigned long attrs) -{ - const struct dma_map_ops *ops = get_dma_ops(dev); - dma_addr_t addr = DMA_MAPPING_ERROR; - - BUG_ON(!valid_dma_direction(dir)); - - /* Don't allow RAM to be mapped */ - if (WARN_ON_ONCE(pfn_valid(PHYS_PFN(phys_addr)))) - return DMA_MAPPING_ERROR; - - if (dma_is_direct(ops)) - addr = dma_direct_map_resource(dev, phys_addr, size, dir, attrs); - else if (ops->map_resource) - addr = ops->map_resource(dev, phys_addr, size, dir, attrs); - - debug_dma_map_resource(dev, phys_addr, size, dir, addr); - return addr; -} - -static inline void dma_unmap_resource(struct device *dev, dma_addr_t addr, - size_t size, enum dma_data_direction dir, - unsigned long attrs) -{ - const struct dma_map_ops *ops = get_dma_ops(dev); - - BUG_ON(!valid_dma_direction(dir)); - if (!dma_is_direct(ops) && ops->unmap_resource) - ops->unmap_resource(dev, addr, size, dir, attrs); - debug_dma_unmap_resource(dev, addr, size, dir); -} - -static inline void dma_sync_single_for_cpu(struct device *dev, dma_addr_t addr, - size_t size, - enum dma_data_direction dir) -{ - const struct dma_map_ops *ops = get_dma_ops(dev); - - BUG_ON(!valid_dma_direction(dir)); - if (dma_is_direct(ops)) - dma_direct_sync_single_for_cpu(dev, addr, size, dir); - else if (ops->sync_single_for_cpu) - ops->sync_single_for_cpu(dev, addr, size, dir); - debug_dma_sync_single_for_cpu(dev, addr, size, dir); -} - -static inline void dma_sync_single_for_device(struct device *dev, - dma_addr_t addr, size_t size, - enum dma_data_direction dir) +#else /* CONFIG_DMA_OPS */ +static inline const struct dma_map_ops *get_dma_ops(struct device *dev) { - const struct dma_map_ops *ops = get_dma_ops(dev); - - BUG_ON(!valid_dma_direction(dir)); - if (dma_is_direct(ops)) - dma_direct_sync_single_for_device(dev, addr, size, dir); - else if (ops->sync_single_for_device) - ops->sync_single_for_device(dev, addr, size, dir); - debug_dma_sync_single_for_device(dev, addr, size, dir); + return NULL; } - -static inline void -dma_sync_sg_for_cpu(struct device *dev, struct scatterlist *sg, - int nelems, enum dma_data_direction dir) +static inline void set_dma_ops(struct device *dev, + const struct dma_map_ops *dma_ops) { - const struct dma_map_ops *ops = get_dma_ops(dev); - - BUG_ON(!valid_dma_direction(dir)); - if (dma_is_direct(ops)) - dma_direct_sync_sg_for_cpu(dev, sg, nelems, dir); - else if (ops->sync_sg_for_cpu) - ops->sync_sg_for_cpu(dev, sg, nelems, dir); - debug_dma_sync_sg_for_cpu(dev, sg, nelems, dir); -} - -static inline void -dma_sync_sg_for_device(struct device *dev, struct scatterlist *sg, - int nelems, enum dma_data_direction dir) -{ - const struct dma_map_ops *ops = get_dma_ops(dev); - - BUG_ON(!valid_dma_direction(dir)); - if (dma_is_direct(ops)) - dma_direct_sync_sg_for_device(dev, sg, nelems, dir); - else if (ops->sync_sg_for_device) - ops->sync_sg_for_device(dev, sg, nelems, dir); - debug_dma_sync_sg_for_device(dev, sg, nelems, dir); - } +#endif /* CONFIG_DMA_OPS */ static inline int dma_mapping_error(struct device *dev, dma_addr_t dma_addr) { @@ -439,6 +224,28 @@ static inline int dma_mapping_error(struct device *dev, dma_addr_t dma_addr) return 0; } +dma_addr_t dma_map_page_attrs(struct device *dev, struct page *page, + size_t offset, size_t size, enum dma_data_direction dir, + unsigned long attrs); +void dma_unmap_page_attrs(struct device *dev, dma_addr_t addr, size_t size, + enum dma_data_direction dir, unsigned long attrs); +int dma_map_sg_attrs(struct device *dev, struct scatterlist *sg, int nents, + enum dma_data_direction dir, unsigned long attrs); +void dma_unmap_sg_attrs(struct device *dev, struct scatterlist *sg, + int nents, enum dma_data_direction dir, + unsigned long attrs); +dma_addr_t dma_map_resource(struct device *dev, phys_addr_t phys_addr, + size_t size, enum dma_data_direction dir, unsigned long attrs); +void dma_unmap_resource(struct device *dev, dma_addr_t addr, size_t size, + enum dma_data_direction dir, unsigned long attrs); +void dma_sync_single_for_cpu(struct device *dev, dma_addr_t addr, size_t size, + enum dma_data_direction dir); +void dma_sync_single_for_device(struct device *dev, dma_addr_t addr, + size_t size, enum dma_data_direction dir); +void dma_sync_sg_for_cpu(struct device *dev, struct scatterlist *sg, + int nelems, enum dma_data_direction dir); +void dma_sync_sg_for_device(struct device *dev, struct scatterlist *sg, + int nelems, enum dma_data_direction dir); void *dma_alloc_attrs(struct device *dev, size_t size, dma_addr_t *dma_handle, gfp_t flag, unsigned long attrs); void dma_free_attrs(struct device *dev, size_t size, void *cpu_addr, @@ -461,6 +268,7 @@ int dma_set_mask(struct device *dev, u64 mask); int dma_set_coherent_mask(struct device *dev, u64 mask); u64 dma_get_required_mask(struct device *dev); size_t dma_max_mapping_size(struct device *dev); +bool dma_need_sync(struct device *dev, dma_addr_t dma_addr); unsigned long dma_get_merge_boundary(struct device *dev); #else /* CONFIG_HAS_DMA */ static inline dma_addr_t dma_map_page_attrs(struct device *dev, @@ -571,6 +379,10 @@ static inline size_t dma_max_mapping_size(struct device *dev) { return 0; } +static inline bool dma_need_sync(struct device *dev, dma_addr_t dma_addr) +{ + return false; +} static inline unsigned long dma_get_merge_boundary(struct device *dev) { return 0; diff --git a/include/linux/dmaengine.h b/include/linux/dmaengine.h index e1c03339918f..6283917edd90 100644 --- a/include/linux/dmaengine.h +++ b/include/linux/dmaengine.h @@ -153,7 +153,7 @@ struct dma_interleaved_template { bool dst_sgl; size_t numf; size_t frame_size; - struct data_chunk sgl[0]; + struct data_chunk sgl[]; }; /** @@ -535,7 +535,7 @@ struct dmaengine_unmap_data { struct device *dev; struct kref kref; size_t len; - dma_addr_t addr[0]; + dma_addr_t addr[]; }; struct dma_async_tx_descriptor; diff --git a/include/linux/edac.h b/include/linux/edac.h index 0f20b986b0ab..15e8f3d8a895 100644 --- a/include/linux/edac.h +++ b/include/linux/edac.h @@ -31,14 +31,6 @@ struct device; extern int edac_op_state; struct bus_type *edac_get_sysfs_subsys(void); -int edac_get_report_status(void); -void edac_set_report_status(int new); - -enum { - EDAC_REPORTING_ENABLED, - EDAC_REPORTING_DISABLED, - EDAC_REPORTING_FORCE -}; static inline void opstate_init(void) { @@ -603,27 +595,6 @@ struct mem_ctl_info { : NULL) /** - * edac_get_dimm_by_index - Get DIMM info at @index from a memory - * controller - * - * @mci: MC descriptor struct mem_ctl_info - * @index: index in the memory controller's DIMM array - * - * Returns a struct dimm_info * or NULL on failure. - */ -static inline struct dimm_info * -edac_get_dimm_by_index(struct mem_ctl_info *mci, int index) -{ - if (index < 0 || index >= mci->tot_dimms) - return NULL; - - if (WARN_ON_ONCE(mci->dimms[index]->idx != index)) - return NULL; - - return mci->dimms[index]; -} - -/** * edac_get_dimm - Get DIMM info from a memory controller given by * [layer0,layer1,layer2] position * @@ -658,6 +629,12 @@ static inline struct dimm_info *edac_get_dimm(struct mem_ctl_info *mci, if (mci->n_layers > 2) index = index * mci->layers[2].size + layer2; - return edac_get_dimm_by_index(mci, index); + if (index < 0 || index >= mci->tot_dimms) + return NULL; + + if (WARN_ON_ONCE(mci->dimms[index]->idx != index)) + return NULL; + + return mci->dimms[index]; } #endif /* _LINUX_EDAC_H_ */ diff --git a/include/linux/efi.h b/include/linux/efi.h index 2c6495f72f79..05c47f857383 100644 --- a/include/linux/efi.h +++ b/include/linux/efi.h @@ -350,6 +350,7 @@ void efi_native_runtime_setup(void); * associated with ConOut */ #define LINUX_EFI_ARM_SCREEN_INFO_TABLE_GUID EFI_GUID(0xe03fc20a, 0x85dc, 0x406e, 0xb9, 0x0e, 0x4a, 0xb5, 0x02, 0x37, 0x1d, 0x95) +#define LINUX_EFI_ARM_CPU_STATE_TABLE_GUID EFI_GUID(0xef79e4aa, 0x3c3d, 0x4989, 0xb9, 0x02, 0x07, 0xa9, 0x43, 0xe5, 0x50, 0xd2) #define LINUX_EFI_LOADER_ENTRY_GUID EFI_GUID(0x4a67b082, 0x0a4c, 0x41cf, 0xb6, 0xc7, 0x44, 0x0b, 0x29, 0xbb, 0x8c, 0x4f) #define LINUX_EFI_RANDOM_SEED_TABLE_GUID EFI_GUID(0x1ce1e5bc, 0x7ceb, 0x42f2, 0x81, 0xe5, 0x8a, 0xad, 0xf1, 0x80, 0xf5, 0x7b) #define LINUX_EFI_TPM_EVENT_LOG_GUID EFI_GUID(0xb7799cb0, 0xeca2, 0x4943, 0x96, 0x67, 0x1f, 0xae, 0x07, 0xb7, 0x47, 0xfa) @@ -993,6 +994,7 @@ int efivars_register(struct efivars *efivars, int efivars_unregister(struct efivars *efivars); struct kobject *efivars_kobject(void); +int efivar_supports_writes(void); int efivar_init(int (*func)(efi_char16_t *, efi_guid_t, unsigned long, void *), void *data, bool duplicates, struct list_head *head); @@ -1236,14 +1238,11 @@ struct linux_efi_memreserve { struct { phys_addr_t base; phys_addr_t size; - } entry[0]; + } entry[]; }; -#define EFI_MEMRESERVE_SIZE(count) (sizeof(struct linux_efi_memreserve) + \ - (count) * sizeof(((struct linux_efi_memreserve *)0)->entry[0])) - #define EFI_MEMRESERVE_COUNT(size) (((size) - sizeof(struct linux_efi_memreserve)) \ - / sizeof(((struct linux_efi_memreserve *)0)->entry[0])) + / sizeof_field(struct linux_efi_memreserve, entry[0])) void __init efi_arch_mem_reserve(phys_addr_t addr, u64 size); diff --git a/include/linux/energy_model.h b/include/linux/energy_model.h index ade6486a3382..b67a51c574b9 100644 --- a/include/linux/energy_model.h +++ b/include/linux/energy_model.h @@ -2,6 +2,7 @@ #ifndef _LINUX_ENERGY_MODEL_H #define _LINUX_ENERGY_MODEL_H #include <linux/cpumask.h> +#include <linux/device.h> #include <linux/jump_label.h> #include <linux/kobject.h> #include <linux/rcupdate.h> @@ -10,13 +11,15 @@ #include <linux/types.h> /** - * em_cap_state - Capacity state of a performance domain - * @frequency: The CPU frequency in KHz, for consistency with CPUFreq - * @power: The power consumed by 1 CPU at this level, in milli-watts + * em_perf_state - Performance state of a performance domain + * @frequency: The frequency in KHz, for consistency with CPUFreq + * @power: The power consumed at this level, in milli-watts (by 1 CPU or + by a registered device). It can be a total power: static and + dynamic. * @cost: The cost coefficient associated with this level, used during * energy calculation. Equal to: power * max_frequency / frequency */ -struct em_cap_state { +struct em_perf_state { unsigned long frequency; unsigned long power; unsigned long cost; @@ -24,102 +27,119 @@ struct em_cap_state { /** * em_perf_domain - Performance domain - * @table: List of capacity states, in ascending order - * @nr_cap_states: Number of capacity states - * @cpus: Cpumask covering the CPUs of the domain + * @table: List of performance states, in ascending order + * @nr_perf_states: Number of performance states + * @cpus: Cpumask covering the CPUs of the domain. It's here + * for performance reasons to avoid potential cache + * misses during energy calculations in the scheduler + * and simplifies allocating/freeing that memory region. * - * A "performance domain" represents a group of CPUs whose performance is - * scaled together. All CPUs of a performance domain must have the same - * micro-architecture. Performance domains often have a 1-to-1 mapping with - * CPUFreq policies. + * In case of CPU device, a "performance domain" represents a group of CPUs + * whose performance is scaled together. All CPUs of a performance domain + * must have the same micro-architecture. Performance domains often have + * a 1-to-1 mapping with CPUFreq policies. In case of other devices the @cpus + * field is unused. */ struct em_perf_domain { - struct em_cap_state *table; - int nr_cap_states; + struct em_perf_state *table; + int nr_perf_states; unsigned long cpus[]; }; +#define em_span_cpus(em) (to_cpumask((em)->cpus)) + #ifdef CONFIG_ENERGY_MODEL -#define EM_CPU_MAX_POWER 0xFFFF +#define EM_MAX_POWER 0xFFFF struct em_data_callback { /** - * active_power() - Provide power at the next capacity state of a CPU - * @power : Active power at the capacity state in mW (modified) - * @freq : Frequency at the capacity state in kHz (modified) - * @cpu : CPU for which we do this operation + * active_power() - Provide power at the next performance state of + * a device + * @power : Active power at the performance state in mW + * (modified) + * @freq : Frequency at the performance state in kHz + * (modified) + * @dev : Device for which we do this operation (can be a CPU) * - * active_power() must find the lowest capacity state of 'cpu' above + * active_power() must find the lowest performance state of 'dev' above * 'freq' and update 'power' and 'freq' to the matching active power * and frequency. * - * The power is the one of a single CPU in the domain, expressed in - * milli-watts. It is expected to fit in the [0, EM_CPU_MAX_POWER] - * range. + * In case of CPUs, the power is the one of a single CPU in the domain, + * expressed in milli-watts. It is expected to fit in the + * [0, EM_MAX_POWER] range. * * Return 0 on success. */ - int (*active_power)(unsigned long *power, unsigned long *freq, int cpu); + int (*active_power)(unsigned long *power, unsigned long *freq, + struct device *dev); }; #define EM_DATA_CB(_active_power_cb) { .active_power = &_active_power_cb } struct em_perf_domain *em_cpu_get(int cpu); -int em_register_perf_domain(cpumask_t *span, unsigned int nr_states, - struct em_data_callback *cb); +struct em_perf_domain *em_pd_get(struct device *dev); +int em_dev_register_perf_domain(struct device *dev, unsigned int nr_states, + struct em_data_callback *cb, cpumask_t *span); +void em_dev_unregister_perf_domain(struct device *dev); /** - * em_pd_energy() - Estimates the energy consumed by the CPUs of a perf. domain + * em_cpu_energy() - Estimates the energy consumed by the CPUs of a + performance domain * @pd : performance domain for which energy has to be estimated * @max_util : highest utilization among CPUs of the domain * @sum_util : sum of the utilization of all CPUs in the domain * + * This function must be used only for CPU devices. There is no validation, + * i.e. if the EM is a CPU type and has cpumask allocated. It is called from + * the scheduler code quite frequently and that is why there is not checks. + * * Return: the sum of the energy consumed by the CPUs of the domain assuming * a capacity state satisfying the max utilization of the domain. */ -static inline unsigned long em_pd_energy(struct em_perf_domain *pd, +static inline unsigned long em_cpu_energy(struct em_perf_domain *pd, unsigned long max_util, unsigned long sum_util) { unsigned long freq, scale_cpu; - struct em_cap_state *cs; + struct em_perf_state *ps; int i, cpu; /* - * In order to predict the capacity state, map the utilization of the - * most utilized CPU of the performance domain to a requested frequency, - * like schedutil. + * In order to predict the performance state, map the utilization of + * the most utilized CPU of the performance domain to a requested + * frequency, like schedutil. */ cpu = cpumask_first(to_cpumask(pd->cpus)); scale_cpu = arch_scale_cpu_capacity(cpu); - cs = &pd->table[pd->nr_cap_states - 1]; - freq = map_util_freq(max_util, cs->frequency, scale_cpu); + ps = &pd->table[pd->nr_perf_states - 1]; + freq = map_util_freq(max_util, ps->frequency, scale_cpu); /* - * Find the lowest capacity state of the Energy Model above the + * Find the lowest performance state of the Energy Model above the * requested frequency. */ - for (i = 0; i < pd->nr_cap_states; i++) { - cs = &pd->table[i]; - if (cs->frequency >= freq) + for (i = 0; i < pd->nr_perf_states; i++) { + ps = &pd->table[i]; + if (ps->frequency >= freq) break; } /* - * The capacity of a CPU in the domain at that capacity state (cs) + * The capacity of a CPU in the domain at the performance state (ps) * can be computed as: * - * cs->freq * scale_cpu - * cs->cap = -------------------- (1) + * ps->freq * scale_cpu + * ps->cap = -------------------- (1) * cpu_max_freq * * So, ignoring the costs of idle states (which are not available in - * the EM), the energy consumed by this CPU at that capacity state is - * estimated as: + * the EM), the energy consumed by this CPU at that performance state + * is estimated as: * - * cs->power * cpu_util + * ps->power * cpu_util * cpu_nrg = -------------------- (2) - * cs->cap + * ps->cap * - * since 'cpu_util / cs->cap' represents its percentage of busy time. + * since 'cpu_util / ps->cap' represents its percentage of busy time. * * NOTE: Although the result of this computation actually is in * units of power, it can be manipulated as an energy value @@ -129,55 +149,64 @@ static inline unsigned long em_pd_energy(struct em_perf_domain *pd, * By injecting (1) in (2), 'cpu_nrg' can be re-expressed as a product * of two terms: * - * cs->power * cpu_max_freq cpu_util + * ps->power * cpu_max_freq cpu_util * cpu_nrg = ------------------------ * --------- (3) - * cs->freq scale_cpu + * ps->freq scale_cpu * - * The first term is static, and is stored in the em_cap_state struct - * as 'cs->cost'. + * The first term is static, and is stored in the em_perf_state struct + * as 'ps->cost'. * * Since all CPUs of the domain have the same micro-architecture, they - * share the same 'cs->cost', and the same CPU capacity. Hence, the + * share the same 'ps->cost', and the same CPU capacity. Hence, the * total energy of the domain (which is the simple sum of the energy of * all of its CPUs) can be factorized as: * - * cs->cost * \Sum cpu_util + * ps->cost * \Sum cpu_util * pd_nrg = ------------------------ (4) * scale_cpu */ - return cs->cost * sum_util / scale_cpu; + return ps->cost * sum_util / scale_cpu; } /** - * em_pd_nr_cap_states() - Get the number of capacity states of a perf. domain + * em_pd_nr_perf_states() - Get the number of performance states of a perf. + * domain * @pd : performance domain for which this must be done * - * Return: the number of capacity states in the performance domain table + * Return: the number of performance states in the performance domain table */ -static inline int em_pd_nr_cap_states(struct em_perf_domain *pd) +static inline int em_pd_nr_perf_states(struct em_perf_domain *pd) { - return pd->nr_cap_states; + return pd->nr_perf_states; } #else struct em_data_callback {}; #define EM_DATA_CB(_active_power_cb) { } -static inline int em_register_perf_domain(cpumask_t *span, - unsigned int nr_states, struct em_data_callback *cb) +static inline +int em_dev_register_perf_domain(struct device *dev, unsigned int nr_states, + struct em_data_callback *cb, cpumask_t *span) { return -EINVAL; } +static inline void em_dev_unregister_perf_domain(struct device *dev) +{ +} static inline struct em_perf_domain *em_cpu_get(int cpu) { return NULL; } -static inline unsigned long em_pd_energy(struct em_perf_domain *pd, +static inline struct em_perf_domain *em_pd_get(struct device *dev) +{ + return NULL; +} +static inline unsigned long em_cpu_energy(struct em_perf_domain *pd, unsigned long max_util, unsigned long sum_util) { return 0; } -static inline int em_pd_nr_cap_states(struct em_perf_domain *pd) +static inline int em_pd_nr_perf_states(struct em_perf_domain *pd) { return 0; } diff --git a/include/linux/entry-common.h b/include/linux/entry-common.h new file mode 100644 index 000000000000..efebbffcd5cc --- /dev/null +++ b/include/linux/entry-common.h @@ -0,0 +1,372 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef __LINUX_ENTRYCOMMON_H +#define __LINUX_ENTRYCOMMON_H + +#include <linux/tracehook.h> +#include <linux/syscalls.h> +#include <linux/seccomp.h> +#include <linux/sched.h> + +#include <asm/entry-common.h> + +/* + * Define dummy _TIF work flags if not defined by the architecture or for + * disabled functionality. + */ +#ifndef _TIF_SYSCALL_EMU +# define _TIF_SYSCALL_EMU (0) +#endif + +#ifndef _TIF_SYSCALL_TRACEPOINT +# define _TIF_SYSCALL_TRACEPOINT (0) +#endif + +#ifndef _TIF_SECCOMP +# define _TIF_SECCOMP (0) +#endif + +#ifndef _TIF_SYSCALL_AUDIT +# define _TIF_SYSCALL_AUDIT (0) +#endif + +#ifndef _TIF_PATCH_PENDING +# define _TIF_PATCH_PENDING (0) +#endif + +#ifndef _TIF_UPROBE +# define _TIF_UPROBE (0) +#endif + +/* + * TIF flags handled in syscall_enter_from_usermode() + */ +#ifndef ARCH_SYSCALL_ENTER_WORK +# define ARCH_SYSCALL_ENTER_WORK (0) +#endif + +#define SYSCALL_ENTER_WORK \ + (_TIF_SYSCALL_TRACE | _TIF_SYSCALL_AUDIT | _TIF_SECCOMP | \ + _TIF_SYSCALL_TRACEPOINT | _TIF_SYSCALL_EMU | \ + ARCH_SYSCALL_ENTER_WORK) + +/* + * TIF flags handled in syscall_exit_to_user_mode() + */ +#ifndef ARCH_SYSCALL_EXIT_WORK +# define ARCH_SYSCALL_EXIT_WORK (0) +#endif + +#define SYSCALL_EXIT_WORK \ + (_TIF_SYSCALL_TRACE | _TIF_SYSCALL_AUDIT | \ + _TIF_SYSCALL_TRACEPOINT | ARCH_SYSCALL_EXIT_WORK) + +/* + * TIF flags handled in exit_to_user_mode_loop() + */ +#ifndef ARCH_EXIT_TO_USER_MODE_WORK +# define ARCH_EXIT_TO_USER_MODE_WORK (0) +#endif + +#define EXIT_TO_USER_MODE_WORK \ + (_TIF_SIGPENDING | _TIF_NOTIFY_RESUME | _TIF_UPROBE | \ + _TIF_NEED_RESCHED | _TIF_PATCH_PENDING | \ + ARCH_EXIT_TO_USER_MODE_WORK) + +/** + * arch_check_user_regs - Architecture specific sanity check for user mode regs + * @regs: Pointer to currents pt_regs + * + * Defaults to an empty implementation. Can be replaced by architecture + * specific code. + * + * Invoked from syscall_enter_from_user_mode() in the non-instrumentable + * section. Use __always_inline so the compiler cannot push it out of line + * and make it instrumentable. + */ +static __always_inline void arch_check_user_regs(struct pt_regs *regs); + +#ifndef arch_check_user_regs +static __always_inline void arch_check_user_regs(struct pt_regs *regs) {} +#endif + +/** + * arch_syscall_enter_tracehook - Wrapper around tracehook_report_syscall_entry() + * @regs: Pointer to currents pt_regs + * + * Returns: 0 on success or an error code to skip the syscall. + * + * Defaults to tracehook_report_syscall_entry(). Can be replaced by + * architecture specific code. + * + * Invoked from syscall_enter_from_user_mode() + */ +static inline __must_check int arch_syscall_enter_tracehook(struct pt_regs *regs); + +#ifndef arch_syscall_enter_tracehook +static inline __must_check int arch_syscall_enter_tracehook(struct pt_regs *regs) +{ + return tracehook_report_syscall_entry(regs); +} +#endif + +/** + * syscall_enter_from_user_mode - Check and handle work before invoking + * a syscall + * @regs: Pointer to currents pt_regs + * @syscall: The syscall number + * + * Invoked from architecture specific syscall entry code with interrupts + * disabled. The calling code has to be non-instrumentable. When the + * function returns all state is correct and the subsequent functions can be + * instrumented. + * + * Returns: The original or a modified syscall number + * + * If the returned syscall number is -1 then the syscall should be + * skipped. In this case the caller may invoke syscall_set_error() or + * syscall_set_return_value() first. If neither of those are called and -1 + * is returned, then the syscall will fail with ENOSYS. + * + * The following functionality is handled here: + * + * 1) Establish state (lockdep, RCU (context tracking), tracing) + * 2) TIF flag dependent invocations of arch_syscall_enter_tracehook(), + * __secure_computing(), trace_sys_enter() + * 3) Invocation of audit_syscall_entry() + */ +long syscall_enter_from_user_mode(struct pt_regs *regs, long syscall); + +/** + * local_irq_enable_exit_to_user - Exit to user variant of local_irq_enable() + * @ti_work: Cached TIF flags gathered with interrupts disabled + * + * Defaults to local_irq_enable(). Can be supplied by architecture specific + * code. + */ +static inline void local_irq_enable_exit_to_user(unsigned long ti_work); + +#ifndef local_irq_enable_exit_to_user +static inline void local_irq_enable_exit_to_user(unsigned long ti_work) +{ + local_irq_enable(); +} +#endif + +/** + * local_irq_disable_exit_to_user - Exit to user variant of local_irq_disable() + * + * Defaults to local_irq_disable(). Can be supplied by architecture specific + * code. + */ +static inline void local_irq_disable_exit_to_user(void); + +#ifndef local_irq_disable_exit_to_user +static inline void local_irq_disable_exit_to_user(void) +{ + local_irq_disable(); +} +#endif + +/** + * arch_exit_to_user_mode_work - Architecture specific TIF work for exit + * to user mode. + * @regs: Pointer to currents pt_regs + * @ti_work: Cached TIF flags gathered with interrupts disabled + * + * Invoked from exit_to_user_mode_loop() with interrupt enabled + * + * Defaults to NOOP. Can be supplied by architecture specific code. + */ +static inline void arch_exit_to_user_mode_work(struct pt_regs *regs, + unsigned long ti_work); + +#ifndef arch_exit_to_user_mode_work +static inline void arch_exit_to_user_mode_work(struct pt_regs *regs, + unsigned long ti_work) +{ +} +#endif + +/** + * arch_exit_to_user_mode_prepare - Architecture specific preparation for + * exit to user mode. + * @regs: Pointer to currents pt_regs + * @ti_work: Cached TIF flags gathered with interrupts disabled + * + * Invoked from exit_to_user_mode_prepare() with interrupt disabled as the last + * function before return. Defaults to NOOP. + */ +static inline void arch_exit_to_user_mode_prepare(struct pt_regs *regs, + unsigned long ti_work); + +#ifndef arch_exit_to_user_mode_prepare +static inline void arch_exit_to_user_mode_prepare(struct pt_regs *regs, + unsigned long ti_work) +{ +} +#endif + +/** + * arch_exit_to_user_mode - Architecture specific final work before + * exit to user mode. + * + * Invoked from exit_to_user_mode() with interrupt disabled as the last + * function before return. Defaults to NOOP. + * + * This needs to be __always_inline because it is non-instrumentable code + * invoked after context tracking switched to user mode. + * + * An architecture implementation must not do anything complex, no locking + * etc. The main purpose is for speculation mitigations. + */ +static __always_inline void arch_exit_to_user_mode(void); + +#ifndef arch_exit_to_user_mode +static __always_inline void arch_exit_to_user_mode(void) { } +#endif + +/** + * arch_do_signal - Architecture specific signal delivery function + * @regs: Pointer to currents pt_regs + * + * Invoked from exit_to_user_mode_loop(). + */ +void arch_do_signal(struct pt_regs *regs); + +/** + * arch_syscall_exit_tracehook - Wrapper around tracehook_report_syscall_exit() + * @regs: Pointer to currents pt_regs + * @step: Indicator for single step + * + * Defaults to tracehook_report_syscall_exit(). Can be replaced by + * architecture specific code. + * + * Invoked from syscall_exit_to_user_mode() + */ +static inline void arch_syscall_exit_tracehook(struct pt_regs *regs, bool step); + +#ifndef arch_syscall_exit_tracehook +static inline void arch_syscall_exit_tracehook(struct pt_regs *regs, bool step) +{ + tracehook_report_syscall_exit(regs, step); +} +#endif + +/** + * syscall_exit_to_user_mode - Handle work before returning to user mode + * @regs: Pointer to currents pt_regs + * + * Invoked with interrupts enabled and fully valid regs. Returns with all + * work handled, interrupts disabled such that the caller can immediately + * switch to user mode. Called from architecture specific syscall and ret + * from fork code. + * + * The call order is: + * 1) One-time syscall exit work: + * - rseq syscall exit + * - audit + * - syscall tracing + * - tracehook (single stepping) + * + * 2) Preparatory work + * - Exit to user mode loop (common TIF handling). Invokes + * arch_exit_to_user_mode_work() for architecture specific TIF work + * - Architecture specific one time work arch_exit_to_user_mode_prepare() + * - Address limit and lockdep checks + * + * 3) Final transition (lockdep, tracing, context tracking, RCU). Invokes + * arch_exit_to_user_mode() to handle e.g. speculation mitigations + */ +void syscall_exit_to_user_mode(struct pt_regs *regs); + +/** + * irqentry_enter_from_user_mode - Establish state before invoking the irq handler + * @regs: Pointer to currents pt_regs + * + * Invoked from architecture specific entry code with interrupts disabled. + * Can only be called when the interrupt entry came from user mode. The + * calling code must be non-instrumentable. When the function returns all + * state is correct and the subsequent functions can be instrumented. + * + * The function establishes state (lockdep, RCU (context tracking), tracing) + */ +void irqentry_enter_from_user_mode(struct pt_regs *regs); + +/** + * irqentry_exit_to_user_mode - Interrupt exit work + * @regs: Pointer to current's pt_regs + * + * Invoked with interrupts disbled and fully valid regs. Returns with all + * work handled, interrupts disabled such that the caller can immediately + * switch to user mode. Called from architecture specific interrupt + * handling code. + * + * The call order is #2 and #3 as described in syscall_exit_to_user_mode(). + * Interrupt exit is not invoking #1 which is the syscall specific one time + * work. + */ +void irqentry_exit_to_user_mode(struct pt_regs *regs); + +#ifndef irqentry_state +typedef struct irqentry_state { + bool exit_rcu; +} irqentry_state_t; +#endif + +/** + * irqentry_enter - Handle state tracking on ordinary interrupt entries + * @regs: Pointer to pt_regs of interrupted context + * + * Invokes: + * - lockdep irqflag state tracking as low level ASM entry disabled + * interrupts. + * + * - Context tracking if the exception hit user mode. + * + * - The hardirq tracer to keep the state consistent as low level ASM + * entry disabled interrupts. + * + * As a precondition, this requires that the entry came from user mode, + * idle, or a kernel context in which RCU is watching. + * + * For kernel mode entries RCU handling is done conditional. If RCU is + * watching then the only RCU requirement is to check whether the tick has + * to be restarted. If RCU is not watching then rcu_irq_enter() has to be + * invoked on entry and rcu_irq_exit() on exit. + * + * Avoiding the rcu_irq_enter/exit() calls is an optimization but also + * solves the problem of kernel mode pagefaults which can schedule, which + * is not possible after invoking rcu_irq_enter() without undoing it. + * + * For user mode entries irqentry_enter_from_user_mode() is invoked to + * establish the proper context for NOHZ_FULL. Otherwise scheduling on exit + * would not be possible. + * + * Returns: An opaque object that must be passed to idtentry_exit() + */ +irqentry_state_t noinstr irqentry_enter(struct pt_regs *regs); + +/** + * irqentry_exit_cond_resched - Conditionally reschedule on return from interrupt + * + * Conditional reschedule with additional sanity checks. + */ +void irqentry_exit_cond_resched(void); + +/** + * irqentry_exit - Handle return from exception that used irqentry_enter() + * @regs: Pointer to pt_regs (exception entry regs) + * @state: Return value from matching call to irqentry_enter() + * + * Depending on the return target (kernel/user) this runs the necessary + * preemption and work checks if possible and reguired and returns to + * the caller with interrupts disabled and no further work pending. + * + * This is the last action before returning to the low level ASM code which + * just needs to return to the appropriate context. + * + * Counterpart to irqentry_enter(). + */ +void noinstr irqentry_exit(struct pt_regs *regs, irqentry_state_t state); + +#endif diff --git a/include/linux/entry-kvm.h b/include/linux/entry-kvm.h new file mode 100644 index 000000000000..0cef17afb41a --- /dev/null +++ b/include/linux/entry-kvm.h @@ -0,0 +1,80 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef __LINUX_ENTRYKVM_H +#define __LINUX_ENTRYKVM_H + +#include <linux/entry-common.h> + +/* Transfer to guest mode work */ +#ifdef CONFIG_KVM_XFER_TO_GUEST_WORK + +#ifndef ARCH_XFER_TO_GUEST_MODE_WORK +# define ARCH_XFER_TO_GUEST_MODE_WORK (0) +#endif + +#define XFER_TO_GUEST_MODE_WORK \ + (_TIF_NEED_RESCHED | _TIF_SIGPENDING | \ + _TIF_NOTIFY_RESUME | ARCH_XFER_TO_GUEST_MODE_WORK) + +struct kvm_vcpu; + +/** + * arch_xfer_to_guest_mode_handle_work - Architecture specific xfer to guest + * mode work handling function. + * @vcpu: Pointer to current's VCPU data + * @ti_work: Cached TIF flags gathered in xfer_to_guest_mode_handle_work() + * + * Invoked from xfer_to_guest_mode_handle_work(). Defaults to NOOP. Can be + * replaced by architecture specific code. + */ +static inline int arch_xfer_to_guest_mode_handle_work(struct kvm_vcpu *vcpu, + unsigned long ti_work); + +#ifndef arch_xfer_to_guest_mode_work +static inline int arch_xfer_to_guest_mode_handle_work(struct kvm_vcpu *vcpu, + unsigned long ti_work) +{ + return 0; +} +#endif + +/** + * xfer_to_guest_mode_handle_work - Check and handle pending work which needs + * to be handled before going to guest mode + * @vcpu: Pointer to current's VCPU data + * + * Returns: 0 or an error code + */ +int xfer_to_guest_mode_handle_work(struct kvm_vcpu *vcpu); + +/** + * __xfer_to_guest_mode_work_pending - Check if work is pending + * + * Returns: True if work pending, False otherwise. + * + * Bare variant of xfer_to_guest_mode_work_pending(). Can be called from + * interrupt enabled code for racy quick checks with care. + */ +static inline bool __xfer_to_guest_mode_work_pending(void) +{ + unsigned long ti_work = READ_ONCE(current_thread_info()->flags); + + return !!(ti_work & XFER_TO_GUEST_MODE_WORK); +} + +/** + * xfer_to_guest_mode_work_pending - Check if work is pending which needs to be + * handled before returning to guest mode + * + * Returns: True if work pending, False otherwise. + * + * Has to be invoked with interrupts disabled before the transition to + * guest mode. + */ +static inline bool xfer_to_guest_mode_work_pending(void) +{ + lockdep_assert_irqs_disabled(); + return __xfer_to_guest_mode_work_pending(); +} +#endif /* CONFIG_KVM_XFER_TO_GUEST_WORK */ + +#endif diff --git a/include/linux/fdtable.h b/include/linux/fdtable.h index f07c55ea0c22..a32bf47c593e 100644 --- a/include/linux/fdtable.h +++ b/include/linux/fdtable.h @@ -22,6 +22,7 @@ * as this is the granularity returned by copy_fdset(). */ #define NR_OPEN_DEFAULT BITS_PER_LONG +#define NR_OPEN_MAX ~0U struct fdtable { unsigned int max_fds; @@ -109,7 +110,7 @@ struct files_struct *get_files_struct(struct task_struct *); void put_files_struct(struct files_struct *fs); void reset_files_struct(struct files_struct *); int unshare_files(struct files_struct **); -struct files_struct *dup_fd(struct files_struct *, int *) __latent_entropy; +struct files_struct *dup_fd(struct files_struct *, unsigned, int *) __latent_entropy; void do_close_on_exec(struct files_struct *); int iterate_fd(struct files_struct *, unsigned, int (*)(const void *, struct file *, unsigned), @@ -121,7 +122,10 @@ extern void __fd_install(struct files_struct *files, unsigned int fd, struct file *file); extern int __close_fd(struct files_struct *files, unsigned int fd); +extern int __close_range(unsigned int fd, unsigned int max_fd, unsigned int flags); extern int __close_fd_get_file(unsigned int fd, struct file **res); +extern int unshare_fd(unsigned long unshare_flags, unsigned int max_fds, + struct files_struct **new_fdp); extern struct kmem_cache *files_cachep; diff --git a/include/linux/file.h b/include/linux/file.h index 122f80084a3e..225982792fa2 100644 --- a/include/linux/file.h +++ b/include/linux/file.h @@ -9,6 +9,7 @@ #include <linux/compiler.h> #include <linux/types.h> #include <linux/posix_types.h> +#include <linux/errno.h> struct file; @@ -91,6 +92,24 @@ extern void put_unused_fd(unsigned int fd); extern void fd_install(unsigned int fd, struct file *file); +extern int __receive_fd(int fd, struct file *file, int __user *ufd, + unsigned int o_flags); +static inline int receive_fd_user(struct file *file, int __user *ufd, + unsigned int o_flags) +{ + if (ufd == NULL) + return -EFAULT; + return __receive_fd(-1, file, ufd, o_flags); +} +static inline int receive_fd(struct file *file, unsigned int o_flags) +{ + return __receive_fd(-1, file, NULL, o_flags); +} +static inline int receive_fd_replace(int fd, struct file *file, unsigned int o_flags) +{ + return __receive_fd(fd, file, NULL, o_flags); +} + extern void flush_delayed_fput(void); extern void __fput_sync(struct file *); diff --git a/include/linux/filter.h b/include/linux/filter.h index 259377723603..0b0144752d78 100644 --- a/include/linux/filter.h +++ b/include/linux/filter.h @@ -884,12 +884,12 @@ void bpf_jit_compile(struct bpf_prog *prog); bool bpf_jit_needs_zext(void); bool bpf_helper_changes_pkt_data(void *func); -static inline bool bpf_dump_raw_ok(void) +static inline bool bpf_dump_raw_ok(const struct cred *cred) { /* Reconstruction of call-sites is dependent on kallsyms, * thus make dump the same restriction. */ - return kallsyms_show_value() == 1; + return kallsyms_show_value(cred); } struct bpf_prog *bpf_patch_insn_single(struct bpf_prog *prog, u32 off, diff --git a/include/linux/firmware/imx/sci.h b/include/linux/firmware/imx/sci.h index 3fa418a4ca67..22c76571a294 100644 --- a/include/linux/firmware/imx/sci.h +++ b/include/linux/firmware/imx/sci.h @@ -14,9 +14,11 @@ #include <linux/firmware/imx/svc/misc.h> #include <linux/firmware/imx/svc/pm.h> +#include <linux/firmware/imx/svc/rm.h> int imx_scu_enable_general_irq_channel(struct device *dev); int imx_scu_irq_register_notifier(struct notifier_block *nb); int imx_scu_irq_unregister_notifier(struct notifier_block *nb); int imx_scu_irq_group_enable(u8 group, u32 mask, u8 enable); +int imx_scu_soc_init(struct device *dev); #endif /* _SC_SCI_H */ diff --git a/include/linux/firmware/imx/svc/rm.h b/include/linux/firmware/imx/svc/rm.h new file mode 100644 index 000000000000..456b6a59d29b --- /dev/null +++ b/include/linux/firmware/imx/svc/rm.h @@ -0,0 +1,69 @@ +/* SPDX-License-Identifier: GPL-2.0+ */ +/* + * Copyright (C) 2016 Freescale Semiconductor, Inc. + * Copyright 2017-2020 NXP + * + * Header file containing the public API for the System Controller (SC) + * Resource Management (RM) function. This includes functions for + * partitioning resources, pads, and memory regions. + * + * RM_SVC (SVC) Resource Management Service + * + * Module for the Resource Management (RM) service. + */ + +#ifndef _SC_RM_API_H +#define _SC_RM_API_H + +#include <linux/firmware/imx/sci.h> + +/* + * This type is used to indicate RPC RM function calls. + */ +enum imx_sc_rm_func { + IMX_SC_RM_FUNC_UNKNOWN = 0, + IMX_SC_RM_FUNC_PARTITION_ALLOC = 1, + IMX_SC_RM_FUNC_SET_CONFIDENTIAL = 31, + IMX_SC_RM_FUNC_PARTITION_FREE = 2, + IMX_SC_RM_FUNC_GET_DID = 26, + IMX_SC_RM_FUNC_PARTITION_STATIC = 3, + IMX_SC_RM_FUNC_PARTITION_LOCK = 4, + IMX_SC_RM_FUNC_GET_PARTITION = 5, + IMX_SC_RM_FUNC_SET_PARENT = 6, + IMX_SC_RM_FUNC_MOVE_ALL = 7, + IMX_SC_RM_FUNC_ASSIGN_RESOURCE = 8, + IMX_SC_RM_FUNC_SET_RESOURCE_MOVABLE = 9, + IMX_SC_RM_FUNC_SET_SUBSYS_RSRC_MOVABLE = 28, + IMX_SC_RM_FUNC_SET_MASTER_ATTRIBUTES = 10, + IMX_SC_RM_FUNC_SET_MASTER_SID = 11, + IMX_SC_RM_FUNC_SET_PERIPHERAL_PERMISSIONS = 12, + IMX_SC_RM_FUNC_IS_RESOURCE_OWNED = 13, + IMX_SC_RM_FUNC_GET_RESOURCE_OWNER = 33, + IMX_SC_RM_FUNC_IS_RESOURCE_MASTER = 14, + IMX_SC_RM_FUNC_IS_RESOURCE_PERIPHERAL = 15, + IMX_SC_RM_FUNC_GET_RESOURCE_INFO = 16, + IMX_SC_RM_FUNC_MEMREG_ALLOC = 17, + IMX_SC_RM_FUNC_MEMREG_SPLIT = 29, + IMX_SC_RM_FUNC_MEMREG_FRAG = 32, + IMX_SC_RM_FUNC_MEMREG_FREE = 18, + IMX_SC_RM_FUNC_FIND_MEMREG = 30, + IMX_SC_RM_FUNC_ASSIGN_MEMREG = 19, + IMX_SC_RM_FUNC_SET_MEMREG_PERMISSIONS = 20, + IMX_SC_RM_FUNC_IS_MEMREG_OWNED = 21, + IMX_SC_RM_FUNC_GET_MEMREG_INFO = 22, + IMX_SC_RM_FUNC_ASSIGN_PAD = 23, + IMX_SC_RM_FUNC_SET_PAD_MOVABLE = 24, + IMX_SC_RM_FUNC_IS_PAD_OWNED = 25, + IMX_SC_RM_FUNC_DUMP = 27, +}; + +#if IS_ENABLED(CONFIG_IMX_SCU) +bool imx_sc_rm_is_resource_owned(struct imx_sc_ipc *ipc, u16 resource); +#else +static inline bool +imx_sc_rm_is_resource_owned(struct imx_sc_ipc *ipc, u16 resource) +{ + return true; +} +#endif +#endif diff --git a/include/linux/freezer.h b/include/linux/freezer.h index 21f5aa0b217f..27828145ca09 100644 --- a/include/linux/freezer.h +++ b/include/linux/freezer.h @@ -207,6 +207,17 @@ static inline long freezable_schedule_timeout_interruptible(long timeout) return __retval; } +/* DO NOT ADD ANY NEW CALLERS OF THIS FUNCTION */ +static inline long freezable_schedule_timeout_interruptible_unsafe(long timeout) +{ + long __retval; + + freezer_do_not_count(); + __retval = schedule_timeout_interruptible(timeout); + freezer_count_unsafe(); + return __retval; +} + /* Like schedule_timeout_killable(), but should not block the freezer. */ static inline long freezable_schedule_timeout_killable(long timeout) { @@ -285,6 +296,9 @@ static inline void set_freezable(void) {} #define freezable_schedule_timeout_interruptible(timeout) \ schedule_timeout_interruptible(timeout) +#define freezable_schedule_timeout_interruptible_unsafe(timeout) \ + schedule_timeout_interruptible(timeout) + #define freezable_schedule_timeout_killable(timeout) \ schedule_timeout_killable(timeout) diff --git a/include/linux/fs.h b/include/linux/fs.h index 8e1f8f93108f..bd7ec3eaeed0 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -175,6 +175,9 @@ typedef int (dio_iodone_t)(struct kiocb *iocb, loff_t offset, /* File does not contribute to nr_files count */ #define FMODE_NOACCOUNT ((__force fmode_t)0x20000000) +/* File supports async buffered reads */ +#define FMODE_BUF_RASYNC ((__force fmode_t)0x40000000) + /* * Flag for rw_copy_check_uvector and compat_rw_copy_check_uvector * that indicates that they should check the contents of the iovec are @@ -315,6 +318,9 @@ enum rw_hint { #define IOCB_SYNC (1 << 5) #define IOCB_WRITE (1 << 6) #define IOCB_NOWAIT (1 << 7) +/* iocb->ki_waitq is valid */ +#define IOCB_WAITQ (1 << 8) +#define IOCB_NOIO (1 << 9) struct kiocb { struct file *ki_filp; @@ -328,7 +334,10 @@ struct kiocb { int ki_flags; u16 ki_hint; u16 ki_ioprio; /* See linux/ioprio.h */ - unsigned int ki_cookie; /* for ->iopoll */ + union { + unsigned int ki_cookie; /* for ->iopoll */ + struct wait_page_queue *ki_waitq; /* for async buffered IO */ + }; randomized_struct_fields_end }; @@ -470,45 +479,6 @@ struct address_space { * must be enforced here for CRIS, to let the least significant bit * of struct page's "mapping" pointer be used for PAGE_MAPPING_ANON. */ -struct request_queue; - -struct block_device { - dev_t bd_dev; /* not a kdev_t - it's a search key */ - int bd_openers; - struct inode * bd_inode; /* will die */ - struct super_block * bd_super; - struct mutex bd_mutex; /* open/close mutex */ - void * bd_claiming; - void * bd_holder; - int bd_holders; - bool bd_write_holder; -#ifdef CONFIG_SYSFS - struct list_head bd_holder_disks; -#endif - struct block_device * bd_contains; - unsigned bd_block_size; - u8 bd_partno; - struct hd_struct * bd_part; - /* number of times partitions within this device have been opened. */ - unsigned bd_part_count; - int bd_invalidated; - struct gendisk * bd_disk; - struct request_queue * bd_queue; - struct backing_dev_info *bd_bdi; - struct list_head bd_list; - /* - * Private data. You must have bd_claim'ed the block_device - * to use this. NOTE: bd_claim allows an owner to claim - * the same device multiple times, the owner must take special - * care to not mess up bd_private for that case. - */ - unsigned long bd_private; - - /* The counter of freeze processes */ - int bd_fsfreeze_count; - /* Mutex for freeze */ - struct mutex bd_fsfreeze_mutex; -} __randomize_layout; /* XArray tags, for tagging dirty and writeback pages in the pagecache. */ #define PAGECACHE_TAG_DIRTY XA_MARK_0 @@ -907,8 +877,6 @@ static inline unsigned imajor(const struct inode *inode) return MAJOR(inode->i_rdev); } -extern struct block_device *I_BDEV(struct inode *inode); - struct fown_struct { rwlock_t lock; /* protects pid, uid, euid fields */ struct pid *pid; /* pid or -pgrp where SIGIO should be sent */ @@ -1380,6 +1348,7 @@ extern int send_sigurg(struct fown_struct *fown); #define SB_NODIRATIME 2048 /* Do not update directory access times */ #define SB_SILENT 32768 #define SB_POSIXACL (1<<16) /* VFS does not apply the umask */ +#define SB_INLINECRYPT (1<<17) /* Use blk-crypto for encrypted files */ #define SB_KERNMOUNT (1<<22) /* this is a kern_mount call */ #define SB_I_VERSION (1<<23) /* Update inode I_version field */ #define SB_LAZYTIME (1<<25) /* Update the on-disk [acm]times lazily */ @@ -1774,14 +1743,6 @@ struct dir_context { loff_t pos; }; -struct block_device_operations; - -/* These macros are for out of kernel modules to test that - * the kernel supports the unlocked_ioctl and compat_ioctl - * fields in struct file_operations. */ -#define HAVE_COMPAT_IOCTL 1 -#define HAVE_UNLOCKED_IOCTL 1 - /* * These flags let !MMU mmap() govern direct device mapping vs immediate * copying more easily for MAP_PRIVATE, especially for ROM filesystems. @@ -1917,7 +1878,6 @@ ssize_t rw_copy_check_uvector(int type, const struct iovec __user * uvector, struct iovec *fast_pointer, struct iovec **ret_pointer); -extern ssize_t __vfs_read(struct file *, char __user *, size_t, loff_t *); extern ssize_t vfs_read(struct file *, char __user *, size_t, loff_t *); extern ssize_t vfs_write(struct file *, const char __user *, size_t, loff_t *); extern ssize_t vfs_readv(struct file *, const struct iovec __user *, @@ -2264,18 +2224,9 @@ struct file_system_type { #define MODULE_ALIAS_FS(NAME) MODULE_ALIAS("fs-" NAME) -#ifdef CONFIG_BLOCK extern struct dentry *mount_bdev(struct file_system_type *fs_type, int flags, const char *dev_name, void *data, int (*fill_super)(struct super_block *, void *, int)); -#else -static inline struct dentry *mount_bdev(struct file_system_type *fs_type, - int flags, const char *dev_name, void *data, - int (*fill_super)(struct super_block *, void *, int)) -{ - return ERR_PTR(-ENODEV); -} -#endif extern struct dentry *mount_single(struct file_system_type *fs_type, int flags, void *data, int (*fill_super)(struct super_block *, void *, int)); @@ -2284,14 +2235,7 @@ extern struct dentry *mount_nodev(struct file_system_type *fs_type, int (*fill_super)(struct super_block *, void *, int)); extern struct dentry *mount_subtree(struct vfsmount *mnt, const char *path); void generic_shutdown_super(struct super_block *sb); -#ifdef CONFIG_BLOCK void kill_block_super(struct super_block *sb); -#else -static inline void kill_block_super(struct super_block *sb) -{ - BUG(); -} -#endif void kill_anon_super(struct super_block *sb); void kill_litter_super(struct super_block *sb); void deactivate_super(struct super_block *sb); @@ -2581,95 +2525,16 @@ extern struct kmem_cache *names_cachep; #define __getname() kmem_cache_alloc(names_cachep, GFP_KERNEL) #define __putname(name) kmem_cache_free(names_cachep, (void *)(name)) -#ifdef CONFIG_BLOCK -extern int register_blkdev(unsigned int, const char *); -extern void unregister_blkdev(unsigned int, const char *); -extern struct block_device *bdget(dev_t); -extern struct block_device *bdgrab(struct block_device *bdev); -extern void bd_set_size(struct block_device *, loff_t size); -extern void bd_forget(struct inode *inode); -extern void bdput(struct block_device *); -extern void invalidate_bdev(struct block_device *); -extern void iterate_bdevs(void (*)(struct block_device *, void *), void *); -extern int sync_blockdev(struct block_device *bdev); -extern void kill_bdev(struct block_device *); -extern struct super_block *freeze_bdev(struct block_device *); -extern void emergency_thaw_all(void); -extern void emergency_thaw_bdev(struct super_block *sb); -extern int thaw_bdev(struct block_device *bdev, struct super_block *sb); -extern int fsync_bdev(struct block_device *); - extern struct super_block *blockdev_superblock; - static inline bool sb_is_blkdev_sb(struct super_block *sb) { - return sb == blockdev_superblock; -} -#else -static inline void bd_forget(struct inode *inode) {} -static inline int sync_blockdev(struct block_device *bdev) { return 0; } -static inline void kill_bdev(struct block_device *bdev) {} -static inline void invalidate_bdev(struct block_device *bdev) {} - -static inline struct super_block *freeze_bdev(struct block_device *sb) -{ - return NULL; -} - -static inline int thaw_bdev(struct block_device *bdev, struct super_block *sb) -{ - return 0; -} - -static inline int emergency_thaw_bdev(struct super_block *sb) -{ - return 0; + return IS_ENABLED(CONFIG_BLOCK) && sb == blockdev_superblock; } -static inline void iterate_bdevs(void (*f)(struct block_device *, void *), void *arg) -{ -} - -static inline bool sb_is_blkdev_sb(struct super_block *sb) -{ - return false; -} -#endif +void emergency_thaw_all(void); extern int sync_filesystem(struct super_block *); extern const struct file_operations def_blk_fops; extern const struct file_operations def_chr_fops; -#ifdef CONFIG_BLOCK -extern int blkdev_ioctl(struct block_device *, fmode_t, unsigned, unsigned long); -extern long compat_blkdev_ioctl(struct file *, unsigned, unsigned long); -extern int blkdev_get(struct block_device *bdev, fmode_t mode, void *holder); -extern struct block_device *blkdev_get_by_path(const char *path, fmode_t mode, - void *holder); -extern struct block_device *blkdev_get_by_dev(dev_t dev, fmode_t mode, - void *holder); -extern struct block_device *bd_start_claiming(struct block_device *bdev, - void *holder); -extern void bd_finish_claiming(struct block_device *bdev, - struct block_device *whole, void *holder); -extern void bd_abort_claiming(struct block_device *bdev, - struct block_device *whole, void *holder); -extern void blkdev_put(struct block_device *bdev, fmode_t mode); - -#ifdef CONFIG_SYSFS -extern int bd_link_disk_holder(struct block_device *bdev, struct gendisk *disk); -extern void bd_unlink_disk_holder(struct block_device *bdev, - struct gendisk *disk); -#else -static inline int bd_link_disk_holder(struct block_device *bdev, - struct gendisk *disk) -{ - return 0; -} -static inline void bd_unlink_disk_holder(struct block_device *bdev, - struct gendisk *disk) -{ -} -#endif -#endif /* fs/char_dev.c */ #define CHRDEV_MAJOR_MAX 512 @@ -2700,31 +2565,12 @@ static inline void unregister_chrdev(unsigned int major, const char *name) __unregister_chrdev(major, 0, 256, name); } -/* fs/block_dev.c */ -#define BDEVNAME_SIZE 32 /* Largest string for a blockdev identifier */ -#define BDEVT_SIZE 10 /* Largest string for MAJ:MIN for blkdev */ - -#ifdef CONFIG_BLOCK -#define BLKDEV_MAJOR_MAX 512 -extern const char *bdevname(struct block_device *bdev, char *buffer); -extern struct block_device *lookup_bdev(const char *); -extern void blkdev_show(struct seq_file *,off_t); - -#else -#define BLKDEV_MAJOR_MAX 0 -#endif - extern void init_special_inode(struct inode *, umode_t, dev_t); /* Invalid inode operations -- fs/bad_inode.c */ extern void make_bad_inode(struct inode *); extern bool is_bad_inode(struct inode *); -#ifdef CONFIG_BLOCK -extern int revalidate_disk(struct gendisk *); -extern int check_disk_change(struct block_device *); -extern int __invalidate_device(struct block_device *, bool); -#endif unsigned long invalidate_mapping_pages(struct address_space *mapping, pgoff_t start, pgoff_t end); @@ -3035,6 +2881,7 @@ extern int kernel_read_file_from_path_initns(const char *, void **, loff_t *, lo extern int kernel_read_file_from_fd(int, void **, loff_t *, loff_t, enum kernel_read_file_id); extern ssize_t kernel_read(struct file *, void *, size_t, loff_t *); +ssize_t __kernel_read(struct file *file, void *buf, size_t count, loff_t *pos); extern ssize_t kernel_write(struct file *, const void *, size_t, loff_t *); extern ssize_t __kernel_write(struct file *, const void *, size_t, loff_t *); extern struct file * open_exec(const char *); @@ -3124,10 +2971,6 @@ static inline void remove_inode_hash(struct inode *inode) extern void inode_sb_list_add(struct inode *inode); -#ifdef CONFIG_BLOCK -extern int bdev_read_only(struct block_device *); -#endif -extern int set_blocksize(struct block_device *, int); extern int sb_set_blocksize(struct super_block *, int); extern int sb_min_blocksize(struct super_block *, int); @@ -3204,6 +3047,8 @@ enum { DIO_SKIP_HOLES = 0x02, }; +void dio_end_io(struct bio *bio); + ssize_t __blockdev_direct_IO(struct kiocb *iocb, struct inode *inode, struct block_device *bdev, struct iov_iter *iter, get_block_t get_block, @@ -3438,22 +3283,28 @@ static inline int iocb_flags(struct file *file) static inline int kiocb_set_rw_flags(struct kiocb *ki, rwf_t flags) { + int kiocb_flags = 0; + + if (!flags) + return 0; if (unlikely(flags & ~RWF_SUPPORTED)) return -EOPNOTSUPP; if (flags & RWF_NOWAIT) { if (!(ki->ki_filp->f_mode & FMODE_NOWAIT)) return -EOPNOTSUPP; - ki->ki_flags |= IOCB_NOWAIT; + kiocb_flags |= IOCB_NOWAIT; } if (flags & RWF_HIPRI) - ki->ki_flags |= IOCB_HIPRI; + kiocb_flags |= IOCB_HIPRI; if (flags & RWF_DSYNC) - ki->ki_flags |= IOCB_DSYNC; + kiocb_flags |= IOCB_DSYNC; if (flags & RWF_SYNC) - ki->ki_flags |= (IOCB_DSYNC | IOCB_SYNC); + kiocb_flags |= (IOCB_DSYNC | IOCB_SYNC); if (flags & RWF_APPEND) - ki->ki_flags |= IOCB_APPEND; + kiocb_flags |= IOCB_APPEND; + + ki->ki_flags |= kiocb_flags; return 0; } diff --git a/include/linux/fs_context.h b/include/linux/fs_context.h index 5f24fcbfbfb4..37e1e8f7f08d 100644 --- a/include/linux/fs_context.h +++ b/include/linux/fs_context.h @@ -109,6 +109,7 @@ struct fs_context { enum fs_context_phase phase:8; /* The phase the context is in */ bool need_free:1; /* Need to call ops->free() */ bool global:1; /* Goes into &init_user_ns */ + bool oldapi:1; /* Coming from mount(2) */ }; struct fs_context_operations { diff --git a/include/linux/fscache-cache.h b/include/linux/fscache-cache.h index ce0b5fbf239d..3f0b19dcfae7 100644 --- a/include/linux/fscache-cache.h +++ b/include/linux/fscache-cache.h @@ -46,7 +46,7 @@ struct fscache_cache_tag { unsigned long flags; #define FSCACHE_TAG_RESERVED 0 /* T if tag is reserved for a cache */ atomic_t usage; - char name[0]; /* tag name */ + char name[]; /* tag name */ }; /* diff --git a/include/linux/fscrypt.h b/include/linux/fscrypt.h index 2862ca5fea33..991ff8575d0e 100644 --- a/include/linux/fscrypt.h +++ b/include/linux/fscrypt.h @@ -69,12 +69,20 @@ struct fscrypt_operations { bool (*has_stable_inodes)(struct super_block *sb); void (*get_ino_and_lblk_bits)(struct super_block *sb, int *ino_bits_ret, int *lblk_bits_ret); + int (*get_num_devices)(struct super_block *sb); + void (*get_devices)(struct super_block *sb, + struct request_queue **devs); }; -static inline bool fscrypt_has_encryption_key(const struct inode *inode) +static inline struct fscrypt_info *fscrypt_get_info(const struct inode *inode) { - /* pairs with cmpxchg_release() in fscrypt_get_encryption_info() */ - return READ_ONCE(inode->i_crypt_info) != NULL; + /* + * Pairs with the cmpxchg_release() in fscrypt_get_encryption_info(). + * I.e., another task may publish ->i_crypt_info concurrently, executing + * a RELEASE barrier. We need to use smp_load_acquire() here to safely + * ACQUIRE the memory the other task published. + */ + return smp_load_acquire(&inode->i_crypt_info); } /** @@ -231,9 +239,9 @@ static inline void fscrypt_set_ops(struct super_block *sb, } #else /* !CONFIG_FS_ENCRYPTION */ -static inline bool fscrypt_has_encryption_key(const struct inode *inode) +static inline struct fscrypt_info *fscrypt_get_info(const struct inode *inode) { - return false; + return NULL; } static inline bool fscrypt_needs_contents_encryption(const struct inode *inode) @@ -537,6 +545,99 @@ static inline void fscrypt_set_ops(struct super_block *sb, #endif /* !CONFIG_FS_ENCRYPTION */ +/* inline_crypt.c */ +#ifdef CONFIG_FS_ENCRYPTION_INLINE_CRYPT + +bool __fscrypt_inode_uses_inline_crypto(const struct inode *inode); + +void fscrypt_set_bio_crypt_ctx(struct bio *bio, + const struct inode *inode, u64 first_lblk, + gfp_t gfp_mask); + +void fscrypt_set_bio_crypt_ctx_bh(struct bio *bio, + const struct buffer_head *first_bh, + gfp_t gfp_mask); + +bool fscrypt_mergeable_bio(struct bio *bio, const struct inode *inode, + u64 next_lblk); + +bool fscrypt_mergeable_bio_bh(struct bio *bio, + const struct buffer_head *next_bh); + +#else /* CONFIG_FS_ENCRYPTION_INLINE_CRYPT */ + +static inline bool __fscrypt_inode_uses_inline_crypto(const struct inode *inode) +{ + return false; +} + +static inline void fscrypt_set_bio_crypt_ctx(struct bio *bio, + const struct inode *inode, + u64 first_lblk, gfp_t gfp_mask) { } + +static inline void fscrypt_set_bio_crypt_ctx_bh( + struct bio *bio, + const struct buffer_head *first_bh, + gfp_t gfp_mask) { } + +static inline bool fscrypt_mergeable_bio(struct bio *bio, + const struct inode *inode, + u64 next_lblk) +{ + return true; +} + +static inline bool fscrypt_mergeable_bio_bh(struct bio *bio, + const struct buffer_head *next_bh) +{ + return true; +} +#endif /* !CONFIG_FS_ENCRYPTION_INLINE_CRYPT */ + +/** + * fscrypt_inode_uses_inline_crypto() - test whether an inode uses inline + * encryption + * @inode: an inode. If encrypted, its key must be set up. + * + * Return: true if the inode requires file contents encryption and if the + * encryption should be done in the block layer via blk-crypto rather + * than in the filesystem layer. + */ +static inline bool fscrypt_inode_uses_inline_crypto(const struct inode *inode) +{ + return fscrypt_needs_contents_encryption(inode) && + __fscrypt_inode_uses_inline_crypto(inode); +} + +/** + * fscrypt_inode_uses_fs_layer_crypto() - test whether an inode uses fs-layer + * encryption + * @inode: an inode. If encrypted, its key must be set up. + * + * Return: true if the inode requires file contents encryption and if the + * encryption should be done in the filesystem layer rather than in the + * block layer via blk-crypto. + */ +static inline bool fscrypt_inode_uses_fs_layer_crypto(const struct inode *inode) +{ + return fscrypt_needs_contents_encryption(inode) && + !__fscrypt_inode_uses_inline_crypto(inode); +} + +/** + * fscrypt_has_encryption_key() - check whether an inode has had its key set up + * @inode: the inode to check + * + * Return: %true if the inode has had its encryption key set up, else %false. + * + * Usually this should be preceded by fscrypt_get_encryption_info() to try to + * set up the key first. + */ +static inline bool fscrypt_has_encryption_key(const struct inode *inode) +{ + return fscrypt_get_info(inode) != NULL; +} + /** * fscrypt_require_key() - require an inode's encryption key * @inode: the inode we need the key for diff --git a/include/linux/fsverity.h b/include/linux/fsverity.h index 78201a6d35f6..c1144a450392 100644 --- a/include/linux/fsverity.h +++ b/include/linux/fsverity.h @@ -115,8 +115,13 @@ struct fsverity_operations { static inline struct fsverity_info *fsverity_get_info(const struct inode *inode) { - /* pairs with the cmpxchg() in fsverity_set_info() */ - return READ_ONCE(inode->i_verity_info); + /* + * Pairs with the cmpxchg_release() in fsverity_set_info(). + * I.e., another task may publish ->i_verity_info concurrently, + * executing a RELEASE barrier. We need to use smp_load_acquire() here + * to safely ACQUIRE the memory the other task published. + */ + return smp_load_acquire(&inode->i_verity_info); } /* enable.c */ diff --git a/include/linux/ftrace.h b/include/linux/ftrace.h index e339dac91ee6..ce2c06f72e86 100644 --- a/include/linux/ftrace.h +++ b/include/linux/ftrace.h @@ -58,9 +58,6 @@ struct ftrace_direct_func; const char * ftrace_mod_address_lookup(unsigned long addr, unsigned long *size, unsigned long *off, char **modname, char *sym); -int ftrace_mod_get_kallsym(unsigned int symnum, unsigned long *value, - char *type, char *name, - char *module_name, int *exported); #else static inline const char * ftrace_mod_address_lookup(unsigned long addr, unsigned long *size, @@ -68,6 +65,13 @@ ftrace_mod_address_lookup(unsigned long addr, unsigned long *size, { return NULL; } +#endif + +#if defined(CONFIG_FUNCTION_TRACER) && defined(CONFIG_DYNAMIC_FTRACE) +int ftrace_mod_get_kallsym(unsigned int symnum, unsigned long *value, + char *type, char *name, + char *module_name, int *exported); +#else static inline int ftrace_mod_get_kallsym(unsigned int symnum, unsigned long *value, char *type, char *name, char *module_name, int *exported) @@ -76,7 +80,6 @@ static inline int ftrace_mod_get_kallsym(unsigned int symnum, unsigned long *val } #endif - #ifdef CONFIG_FUNCTION_TRACER extern int ftrace_enabled; @@ -207,6 +210,7 @@ struct ftrace_ops { struct ftrace_ops_hash old_hash; unsigned long trampoline; unsigned long trampoline_size; + struct list_head list; #endif }; diff --git a/include/linux/genhd.h b/include/linux/genhd.h index 392aad5e29a2..4ab853461dff 100644 --- a/include/linux/genhd.h +++ b/include/linux/genhd.h @@ -19,13 +19,12 @@ #include <linux/blk_types.h> #include <asm/local.h> -#ifdef CONFIG_BLOCK - #define dev_to_disk(device) container_of((device), struct gendisk, part0.__dev) #define dev_to_part(device) container_of((device), struct hd_struct, __dev) #define disk_to_dev(disk) (&(disk)->part0.__dev) #define part_to_dev(part) (&((part)->__dev)) +extern const struct device_type disk_type; extern struct device_type part_type; extern struct class block_class; @@ -337,12 +336,9 @@ static inline void set_capacity(struct gendisk *disk, sector_t size) disk->part0.nr_sects = size; } -extern dev_t blk_lookup_devt(const char *name, int partno); - int bdev_disk_changed(struct block_device *bdev, bool invalidate); int blk_add_partitions(struct gendisk *disk, struct block_device *bdev); int blk_drop_partitions(struct block_device *bdev); -extern void printk_all_partitions(void); extern struct gendisk *__alloc_disk_node(int minors, int node_id); extern struct kobject *get_disk_and_module(struct gendisk *disk); @@ -373,10 +369,40 @@ extern void blk_unregister_region(dev_t devt, unsigned long range); #define alloc_disk(minors) alloc_disk_node(minors, NUMA_NO_NODE) -#else /* CONFIG_BLOCK */ +int register_blkdev(unsigned int major, const char *name); +void unregister_blkdev(unsigned int major, const char *name); -static inline void printk_all_partitions(void) { } +int revalidate_disk(struct gendisk *disk); +int check_disk_change(struct block_device *bdev); +int __invalidate_device(struct block_device *bdev, bool kill_dirty); +void bd_set_size(struct block_device *bdev, loff_t size); +/* for drivers/char/raw.c: */ +int blkdev_ioctl(struct block_device *, fmode_t, unsigned, unsigned long); +long compat_blkdev_ioctl(struct file *, unsigned, unsigned long); + +#ifdef CONFIG_SYSFS +int bd_link_disk_holder(struct block_device *bdev, struct gendisk *disk); +void bd_unlink_disk_holder(struct block_device *bdev, struct gendisk *disk); +#else +static inline int bd_link_disk_holder(struct block_device *bdev, + struct gendisk *disk) +{ + return 0; +} +static inline void bd_unlink_disk_holder(struct block_device *bdev, + struct gendisk *disk) +{ +} +#endif /* CONFIG_SYSFS */ + +#ifdef CONFIG_BLOCK +void printk_all_partitions(void); +dev_t blk_lookup_devt(const char *name, int partno); +#else /* CONFIG_BLOCK */ +static inline void printk_all_partitions(void) +{ +} static inline dev_t blk_lookup_devt(const char *name, int partno) { dev_t devt = MKDEV(0, 0); diff --git a/include/linux/hardirq.h b/include/linux/hardirq.h index e07cf853aa16..754f67ac4326 100644 --- a/include/linux/hardirq.h +++ b/include/linux/hardirq.h @@ -38,9 +38,24 @@ static __always_inline void rcu_irq_enter_check_tick(void) } while (0) /* + * Like __irq_enter() without time accounting for fast + * interrupts, e.g. reschedule IPI where time accounting + * is more expensive than the actual interrupt. + */ +#define __irq_enter_raw() \ + do { \ + preempt_count_add(HARDIRQ_OFFSET); \ + lockdep_hardirq_enter(); \ + } while (0) + +/* * Enter irq context (on NO_HZ, update jiffies): */ -extern void irq_enter(void); +void irq_enter(void); +/* + * Like irq_enter(), but RCU is already watching. + */ +void irq_enter_rcu(void); /* * Exit irq context without processing softirqs: @@ -53,9 +68,23 @@ extern void irq_enter(void); } while (0) /* + * Like __irq_exit() without time accounting + */ +#define __irq_exit_raw() \ + do { \ + lockdep_hardirq_exit(); \ + preempt_count_sub(HARDIRQ_OFFSET); \ + } while (0) + +/* * Exit irq context and process softirqs if needed: */ -extern void irq_exit(void); +void irq_exit(void); + +/* + * Like irq_exit(), but return with RCU watching. + */ +void irq_exit_rcu(void); #ifndef arch_nmi_enter #define arch_nmi_enter() do { } while (0) @@ -82,28 +111,42 @@ extern void rcu_nmi_exit(void); /* * nmi_enter() can nest up to 15 times; see NMI_BITS. */ -#define nmi_enter() \ +#define __nmi_enter() \ do { \ + lockdep_off(); \ arch_nmi_enter(); \ printk_nmi_enter(); \ - lockdep_off(); \ - ftrace_nmi_enter(); \ BUG_ON(in_nmi() == NMI_MASK); \ __preempt_count_add(NMI_OFFSET + HARDIRQ_OFFSET); \ - rcu_nmi_enter(); \ + } while (0) + +#define nmi_enter() \ + do { \ + __nmi_enter(); \ lockdep_hardirq_enter(); \ + rcu_nmi_enter(); \ + instrumentation_begin(); \ + ftrace_nmi_enter(); \ + instrumentation_end(); \ } while (0) -#define nmi_exit() \ +#define __nmi_exit() \ do { \ - lockdep_hardirq_exit(); \ - rcu_nmi_exit(); \ BUG_ON(!in_nmi()); \ __preempt_count_sub(NMI_OFFSET + HARDIRQ_OFFSET); \ - ftrace_nmi_exit(); \ - lockdep_on(); \ printk_nmi_exit(); \ arch_nmi_exit(); \ + lockdep_on(); \ + } while (0) + +#define nmi_exit() \ + do { \ + instrumentation_begin(); \ + ftrace_nmi_exit(); \ + instrumentation_end(); \ + rcu_nmi_exit(); \ + lockdep_hardirq_exit(); \ + __nmi_exit(); \ } while (0) #endif /* LINUX_HARDIRQ_H */ diff --git a/include/linux/host1x.h b/include/linux/host1x.h index c230b4e70d75..a3a568bf9686 100644 --- a/include/linux/host1x.h +++ b/include/linux/host1x.h @@ -48,6 +48,9 @@ struct host1x_client_ops { * @channel: host1x channel associated with this client * @syncpts: array of syncpoints requested for this client * @num_syncpts: number of syncpoints requested for this client + * @parent: pointer to parent structure + * @usecount: reference count for this structure + * @lock: mutex for mutually exclusive concurrency */ struct host1x_client { struct list_head list; diff --git a/include/linux/i2c-smbus.h b/include/linux/i2c-smbus.h index 8c5459034f92..1e4e0de4ef8b 100644 --- a/include/linux/i2c-smbus.h +++ b/include/linux/i2c-smbus.h @@ -2,7 +2,7 @@ /* * i2c-smbus.h - SMBus extensions to the I2C protocol * - * Copyright (C) 2010 Jean Delvare <jdelvare@suse.de> + * Copyright (C) 2010-2019 Jean Delvare <jdelvare@suse.de> */ #ifndef _LINUX_I2C_SMBUS_H @@ -39,4 +39,10 @@ static inline int of_i2c_setup_smbus_alert(struct i2c_adapter *adap) } #endif +#if IS_ENABLED(CONFIG_I2C_SMBUS) && IS_ENABLED(CONFIG_DMI) +void i2c_register_spd(struct i2c_adapter *adap); +#else +static inline void i2c_register_spd(struct i2c_adapter *adap) { } +#endif + #endif /* _LINUX_I2C_SMBUS_H */ diff --git a/include/linux/i2c.h b/include/linux/i2c.h index 49d29054e657..4e7714c88f95 100644 --- a/include/linux/i2c.h +++ b/include/linux/i2c.h @@ -56,7 +56,7 @@ struct property_entry; * on a bus (or read from them). Apart from two basic transfer functions to * transmit one message at a time, a more complex version can be used to * transmit an arbitrary number of messages without interruption. - * @count must be be less than 64k since msg.len is u16. + * @count must be less than 64k since msg.len is u16. */ int i2c_transfer_buffer_flags(const struct i2c_client *client, char *buf, int count, u16 flags); @@ -351,14 +351,14 @@ static inline struct i2c_client *kobj_to_i2c_client(struct kobject *kobj) return to_i2c_client(dev); } -static inline void *i2c_get_clientdata(const struct i2c_client *dev) +static inline void *i2c_get_clientdata(const struct i2c_client *client) { - return dev_get_drvdata(&dev->dev); + return dev_get_drvdata(&client->dev); } -static inline void i2c_set_clientdata(struct i2c_client *dev, void *data) +static inline void i2c_set_clientdata(struct i2c_client *client, void *data) { - dev_set_drvdata(&dev->dev, data); + dev_set_drvdata(&client->dev, data); } /* I2C slave support */ @@ -408,7 +408,7 @@ static inline bool i2c_detect_slave_mode(struct device *dev) { return false; } * that are present. This information is used to grow the driver model tree. * For mainboards this is done statically using i2c_register_board_info(); * bus numbers identify adapters that aren't yet available. For add-on boards, - * i2c_new_device() does this dynamically with the adapter already known. + * i2c_new_client_device() does this dynamically with the adapter already known. */ struct i2c_board_info { char type[I2C_NAME_SIZE]; @@ -439,14 +439,12 @@ struct i2c_board_info { #if IS_ENABLED(CONFIG_I2C) -/* Add-on boards should register/unregister their devices; e.g. a board +/* + * Add-on boards should register/unregister their devices; e.g. a board * with integrated I2C, a config eeprom, sensors, and a codec that's * used in conjunction with the primary hardware. */ struct i2c_client * -i2c_new_device(struct i2c_adapter *adap, struct i2c_board_info const *info); - -struct i2c_client * i2c_new_client_device(struct i2c_adapter *adap, struct i2c_board_info const *info); /* If you don't know the exact address of an I2C device, use this variant @@ -1003,7 +1001,7 @@ static inline u32 i2c_acpi_find_bus_speed(struct device *dev) static inline struct i2c_client *i2c_acpi_new_device(struct device *dev, int index, struct i2c_board_info *info) { - return NULL; + return ERR_PTR(-ENODEV); } static inline struct i2c_adapter *i2c_acpi_find_adapter_by_handle(acpi_handle handle) { diff --git a/include/linux/idle_inject.h b/include/linux/idle_inject.h index a445cd1a36c5..91a8612b8bf9 100644 --- a/include/linux/idle_inject.h +++ b/include/linux/idle_inject.h @@ -26,4 +26,8 @@ void idle_inject_set_duration(struct idle_inject_device *ii_dev, void idle_inject_get_duration(struct idle_inject_device *ii_dev, unsigned int *run_duration_us, unsigned int *idle_duration_us); + +void idle_inject_set_latency(struct idle_inject_device *ii_dev, + unsigned int latency_ns); + #endif /* __IDLE_INJECT_H__ */ diff --git a/include/linux/ieee80211.h b/include/linux/ieee80211.h index fe15f831841b..9f732499ea88 100644 --- a/include/linux/ieee80211.h +++ b/include/linux/ieee80211.h @@ -3333,13 +3333,17 @@ struct ieee80211_multiple_bssid_configuration { #define WLAN_AKM_SUITE_TDLS SUITE(0x000FAC, 7) #define WLAN_AKM_SUITE_SAE SUITE(0x000FAC, 8) #define WLAN_AKM_SUITE_FT_OVER_SAE SUITE(0x000FAC, 9) +#define WLAN_AKM_SUITE_AP_PEER_KEY SUITE(0x000FAC, 10) #define WLAN_AKM_SUITE_8021X_SUITE_B SUITE(0x000FAC, 11) #define WLAN_AKM_SUITE_8021X_SUITE_B_192 SUITE(0x000FAC, 12) +#define WLAN_AKM_SUITE_FT_8021X_SHA384 SUITE(0x000FAC, 13) #define WLAN_AKM_SUITE_FILS_SHA256 SUITE(0x000FAC, 14) #define WLAN_AKM_SUITE_FILS_SHA384 SUITE(0x000FAC, 15) #define WLAN_AKM_SUITE_FT_FILS_SHA256 SUITE(0x000FAC, 16) #define WLAN_AKM_SUITE_FT_FILS_SHA384 SUITE(0x000FAC, 17) #define WLAN_AKM_SUITE_OWE SUITE(0x000FAC, 18) +#define WLAN_AKM_SUITE_FT_PSK_SHA384 SUITE(0x000FAC, 19) +#define WLAN_AKM_SUITE_PSK_SHA384 SUITE(0x000FAC, 20) #define WLAN_MAX_KEY_LEN 32 diff --git a/include/linux/if_vlan.h b/include/linux/if_vlan.h index b05e855f1ddd..41a518336673 100644 --- a/include/linux/if_vlan.h +++ b/include/linux/if_vlan.h @@ -25,6 +25,8 @@ #define VLAN_ETH_DATA_LEN 1500 /* Max. octets in payload */ #define VLAN_ETH_FRAME_LEN 1518 /* Max. octets in frame sans FCS */ +#define VLAN_MAX_DEPTH 8 /* Max. number of nested VLAN tags parsed */ + /* * struct vlan_hdr - vlan header * @h_vlan_TCI: priority and VLAN ID @@ -577,10 +579,10 @@ static inline int vlan_get_tag(const struct sk_buff *skb, u16 *vlan_tci) * Returns the EtherType of the packet, regardless of whether it is * vlan encapsulated (normal or hardware accelerated) or not. */ -static inline __be16 __vlan_get_protocol(struct sk_buff *skb, __be16 type, +static inline __be16 __vlan_get_protocol(const struct sk_buff *skb, __be16 type, int *depth) { - unsigned int vlan_depth = skb->mac_len; + unsigned int vlan_depth = skb->mac_len, parse_depth = VLAN_MAX_DEPTH; /* if type is 802.1Q/AD then the header should already be * present at mac_len - VLAN_HLEN (if mac_len > 0), or at @@ -595,13 +597,12 @@ static inline __be16 __vlan_get_protocol(struct sk_buff *skb, __be16 type, vlan_depth = ETH_HLEN; } do { - struct vlan_hdr *vh; + struct vlan_hdr vhdr, *vh; - if (unlikely(!pskb_may_pull(skb, - vlan_depth + VLAN_HLEN))) + vh = skb_header_pointer(skb, vlan_depth, sizeof(vhdr), &vhdr); + if (unlikely(!vh || !--parse_depth)) return 0; - vh = (struct vlan_hdr *)(skb->data + vlan_depth); type = vh->h_vlan_encapsulated_proto; vlan_depth += VLAN_HLEN; } while (eth_type_vlan(type)); @@ -620,11 +621,25 @@ static inline __be16 __vlan_get_protocol(struct sk_buff *skb, __be16 type, * Returns the EtherType of the packet, regardless of whether it is * vlan encapsulated (normal or hardware accelerated) or not. */ -static inline __be16 vlan_get_protocol(struct sk_buff *skb) +static inline __be16 vlan_get_protocol(const struct sk_buff *skb) { return __vlan_get_protocol(skb, skb->protocol, NULL); } +/* A getter for the SKB protocol field which will handle VLAN tags consistently + * whether VLAN acceleration is enabled or not. + */ +static inline __be16 skb_protocol(const struct sk_buff *skb, bool skip_vlan) +{ + if (!skip_vlan) + /* VLAN acceleration strips the VLAN header from the skb and + * moves it to skb->vlan_proto + */ + return skb_vlan_tag_present(skb) ? skb->vlan_proto : skb->protocol; + + return vlan_get_protocol(skb); +} + static inline void vlan_set_encap_proto(struct sk_buff *skb, struct vlan_hdr *vhdr) { diff --git a/include/linux/input/elan-i2c-ids.h b/include/linux/input/elan-i2c-ids.h index 1ecb6b45812c..520858d12680 100644 --- a/include/linux/input/elan-i2c-ids.h +++ b/include/linux/input/elan-i2c-ids.h @@ -67,8 +67,15 @@ static const struct acpi_device_id elan_acpi_id[] = { { "ELAN062B", 0 }, { "ELAN062C", 0 }, { "ELAN062D", 0 }, + { "ELAN062E", 0 }, /* Lenovo V340 Whiskey Lake U */ + { "ELAN062F", 0 }, /* Lenovo V340 Comet Lake U */ { "ELAN0631", 0 }, { "ELAN0632", 0 }, + { "ELAN0633", 0 }, /* Lenovo S145 */ + { "ELAN0634", 0 }, /* Lenovo V340 Ice lake */ + { "ELAN0635", 0 }, /* Lenovo V1415-IIL */ + { "ELAN0636", 0 }, /* Lenovo V1415-Dali */ + { "ELAN0637", 0 }, /* Lenovo V1415-IGLR */ { "ELAN1000", 0 }, { } }; diff --git a/include/linux/instrumentation.h b/include/linux/instrumentation.h new file mode 100644 index 000000000000..93e2ad67fc10 --- /dev/null +++ b/include/linux/instrumentation.h @@ -0,0 +1,57 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef __LINUX_INSTRUMENTATION_H +#define __LINUX_INSTRUMENTATION_H + +#if defined(CONFIG_DEBUG_ENTRY) && defined(CONFIG_STACK_VALIDATION) + +/* Begin/end of an instrumentation safe region */ +#define instrumentation_begin() ({ \ + asm volatile("%c0: nop\n\t" \ + ".pushsection .discard.instr_begin\n\t" \ + ".long %c0b - .\n\t" \ + ".popsection\n\t" : : "i" (__COUNTER__)); \ +}) + +/* + * Because instrumentation_{begin,end}() can nest, objtool validation considers + * _begin() a +1 and _end() a -1 and computes a sum over the instructions. + * When the value is greater than 0, we consider instrumentation allowed. + * + * There is a problem with code like: + * + * noinstr void foo() + * { + * instrumentation_begin(); + * ... + * if (cond) { + * instrumentation_begin(); + * ... + * instrumentation_end(); + * } + * bar(); + * instrumentation_end(); + * } + * + * If instrumentation_end() would be an empty label, like all the other + * annotations, the inner _end(), which is at the end of a conditional block, + * would land on the instruction after the block. + * + * If we then consider the sum of the !cond path, we'll see that the call to + * bar() is with a 0-value, even though, we meant it to happen with a positive + * value. + * + * To avoid this, have _end() be a NOP instruction, this ensures it will be + * part of the condition block and does not escape. + */ +#define instrumentation_end() ({ \ + asm volatile("%c0: nop\n\t" \ + ".pushsection .discard.instr_end\n\t" \ + ".long %c0b - .\n\t" \ + ".popsection\n\t" : : "i" (__COUNTER__)); \ +}) +#else +# define instrumentation_begin() do { } while(0) +# define instrumentation_end() do { } while(0) +#endif + +#endif /* __LINUX_INSTRUMENTATION_H */ diff --git a/include/linux/intel-iommu.h b/include/linux/intel-iommu.h index 4100bd224f5c..3e8fa1c7a1e6 100644 --- a/include/linux/intel-iommu.h +++ b/include/linux/intel-iommu.h @@ -41,6 +41,7 @@ #define DMA_PTE_SNP BIT_ULL(11) #define DMA_FL_PTE_PRESENT BIT_ULL(0) +#define DMA_FL_PTE_US BIT_ULL(2) #define DMA_FL_PTE_XD BIT_ULL(63) #define ADDR_WIDTH_5LEVEL (57) diff --git a/include/linux/intel_rapl.h b/include/linux/intel_rapl.h index efb3ce892c20..3582176a1eca 100644 --- a/include/linux/intel_rapl.h +++ b/include/linux/intel_rapl.h @@ -29,6 +29,7 @@ enum rapl_domain_reg_id { RAPL_DOMAIN_REG_PERF, RAPL_DOMAIN_REG_POLICY, RAPL_DOMAIN_REG_INFO, + RAPL_DOMAIN_REG_PL4, RAPL_DOMAIN_REG_MAX, }; @@ -38,12 +39,14 @@ enum rapl_primitives { ENERGY_COUNTER, POWER_LIMIT1, POWER_LIMIT2, + POWER_LIMIT4, FW_LOCK, PL1_ENABLE, /* power limit 1, aka long term */ PL1_CLAMP, /* allow frequency to go below OS request */ PL2_ENABLE, /* power limit 2, aka short term, instantaneous */ PL2_CLAMP, + PL4_ENABLE, /* power limit 4, aka max peak power */ TIME_WINDOW1, /* long term */ TIME_WINDOW2, /* short term */ @@ -65,7 +68,7 @@ struct rapl_domain_data { unsigned long timestamp; }; -#define NR_POWER_LIMITS (2) +#define NR_POWER_LIMITS (3) struct rapl_power_limit { struct powercap_zone_constraint *constraint; int prim_id; /* primitive ID used to enable */ diff --git a/include/linux/interrupt.h b/include/linux/interrupt.h index 80f637c3a6f3..f9aee3538461 100644 --- a/include/linux/interrupt.h +++ b/include/linux/interrupt.h @@ -585,6 +585,9 @@ static inline struct task_struct *this_cpu_ksoftirqd(void) /* Tasklets --- multithreaded analogue of BHs. + This API is deprecated. Please consider using threaded IRQs instead: + https://lore.kernel.org/lkml/20200716081538.2sivhkj4hcyrusem@linutronix.de + Main feature differing them of generic softirqs: tasklet is running only on one CPU simultaneously. @@ -608,16 +611,42 @@ struct tasklet_struct struct tasklet_struct *next; unsigned long state; atomic_t count; - void (*func)(unsigned long); + bool use_callback; + union { + void (*func)(unsigned long data); + void (*callback)(struct tasklet_struct *t); + }; unsigned long data; }; -#define DECLARE_TASKLET(name, func, data) \ -struct tasklet_struct name = { NULL, 0, ATOMIC_INIT(0), func, data } +#define DECLARE_TASKLET(name, _callback) \ +struct tasklet_struct name = { \ + .count = ATOMIC_INIT(0), \ + .callback = _callback, \ + .use_callback = true, \ +} -#define DECLARE_TASKLET_DISABLED(name, func, data) \ -struct tasklet_struct name = { NULL, 0, ATOMIC_INIT(1), func, data } +#define DECLARE_TASKLET_DISABLED(name, _callback) \ +struct tasklet_struct name = { \ + .count = ATOMIC_INIT(1), \ + .callback = _callback, \ + .use_callback = true, \ +} +#define from_tasklet(var, callback_tasklet, tasklet_fieldname) \ + container_of(callback_tasklet, typeof(*var), tasklet_fieldname) + +#define DECLARE_TASKLET_OLD(name, _func) \ +struct tasklet_struct name = { \ + .count = ATOMIC_INIT(0), \ + .func = _func, \ +} + +#define DECLARE_TASKLET_DISABLED_OLD(name, _func) \ +struct tasklet_struct name = { \ + .count = ATOMIC_INIT(1), \ + .func = _func, \ +} enum { @@ -686,6 +715,8 @@ extern void tasklet_kill(struct tasklet_struct *t); extern void tasklet_kill_immediate(struct tasklet_struct *t, unsigned int cpu); extern void tasklet_init(struct tasklet_struct *t, void (*func)(unsigned long), unsigned long data); +extern void tasklet_setup(struct tasklet_struct *t, + void (*callback)(struct tasklet_struct *)); /* * Autoprobing for irqs: @@ -760,8 +791,10 @@ extern int arch_early_irq_init(void); /* * We want to know which function is an entrypoint of a hardirq or a softirq. */ -#define __irq_entry __attribute__((__section__(".irqentry.text"))) -#define __softirq_entry \ - __attribute__((__section__(".softirqentry.text"))) +#ifndef __irq_entry +# define __irq_entry __attribute__((__section__(".irqentry.text"))) +#endif + +#define __softirq_entry __attribute__((__section__(".softirqentry.text"))) #endif diff --git a/include/linux/io-mapping.h b/include/linux/io-mapping.h index 0beaa3eba155..c75e4d3d8833 100644 --- a/include/linux/io-mapping.h +++ b/include/linux/io-mapping.h @@ -107,9 +107,12 @@ io_mapping_init_wc(struct io_mapping *iomap, resource_size_t base, unsigned long size) { + iomap->iomem = ioremap_wc(base, size); + if (!iomap->iomem) + return NULL; + iomap->base = base; iomap->size = size; - iomap->iomem = ioremap_wc(base, size); #if defined(pgprot_noncached_wc) /* archs can't agree on a name ... */ iomap->prot = pgprot_noncached_wc(PAGE_KERNEL); #elif defined(pgprot_writecombine) diff --git a/include/linux/irq.h b/include/linux/irq.h index 8d5bc2c237d7..1b7f4dfee35b 100644 --- a/include/linux/irq.h +++ b/include/linux/irq.h @@ -213,6 +213,8 @@ struct irq_data { * required * IRQD_HANDLE_ENFORCE_IRQCTX - Enforce that handle_irq_*() is only invoked * from actual interrupt context. + * IRQD_AFFINITY_ON_ACTIVATE - Affinity is set on activation. Don't call + * irq_chip::irq_set_affinity() when deactivated. */ enum { IRQD_TRIGGER_MASK = 0xf, @@ -237,6 +239,7 @@ enum { IRQD_CAN_RESERVE = (1 << 26), IRQD_MSI_NOMASK_QUIRK = (1 << 27), IRQD_HANDLE_ENFORCE_IRQCTX = (1 << 28), + IRQD_AFFINITY_ON_ACTIVATE = (1 << 29), }; #define __irqd_to_state(d) ACCESS_PRIVATE((d)->common, state_use_accessors) @@ -421,6 +424,16 @@ static inline bool irqd_msi_nomask_quirk(struct irq_data *d) return __irqd_to_state(d) & IRQD_MSI_NOMASK_QUIRK; } +static inline void irqd_set_affinity_on_activate(struct irq_data *d) +{ + __irqd_to_state(d) |= IRQD_AFFINITY_ON_ACTIVATE; +} + +static inline bool irqd_affinity_on_activate(struct irq_data *d) +{ + return __irqd_to_state(d) & IRQD_AFFINITY_ON_ACTIVATE; +} + #undef __irqd_to_state static inline irq_hw_number_t irqd_to_hwirq(struct irq_data *d) diff --git a/include/linux/irq_work.h b/include/linux/irq_work.h index 2735da5f839e..30823780c192 100644 --- a/include/linux/irq_work.h +++ b/include/linux/irq_work.h @@ -2,7 +2,7 @@ #ifndef _LINUX_IRQ_WORK_H #define _LINUX_IRQ_WORK_H -#include <linux/llist.h> +#include <linux/smp_types.h> /* * An entry can be in one of four states: @@ -13,24 +13,14 @@ * busy NULL, 2 -> {free, claimed} : callback in progress, can be claimed */ -/* flags share CSD_FLAG_ space */ - -#define IRQ_WORK_PENDING BIT(0) -#define IRQ_WORK_BUSY BIT(1) - -/* Doesn't want IPI, wait for tick: */ -#define IRQ_WORK_LAZY BIT(2) -/* Run hard IRQ context, even on RT */ -#define IRQ_WORK_HARD_IRQ BIT(3) - -#define IRQ_WORK_CLAIMED (IRQ_WORK_PENDING | IRQ_WORK_BUSY) - -/* - * structure shares layout with single_call_data_t. - */ struct irq_work { - struct llist_node llnode; - atomic_t flags; + union { + struct __call_single_node node; + struct { + struct llist_node llnode; + atomic_t flags; + }; + }; void (*func)(struct irq_work *); }; diff --git a/include/linux/irqchip.h b/include/linux/irqchip.h index 950e4b2458f0..67351aac65ef 100644 --- a/include/linux/irqchip.h +++ b/include/linux/irqchip.h @@ -12,7 +12,9 @@ #define _LINUX_IRQCHIP_H #include <linux/acpi.h> +#include <linux/module.h> #include <linux/of.h> +#include <linux/platform_device.h> /* * This macro must be used by the different irqchip drivers to declare @@ -26,6 +28,28 @@ */ #define IRQCHIP_DECLARE(name, compat, fn) OF_DECLARE_2(irqchip, name, compat, fn) +extern int platform_irqchip_probe(struct platform_device *pdev); + +#define IRQCHIP_PLATFORM_DRIVER_BEGIN(drv_name) \ +static const struct of_device_id drv_name##_irqchip_match_table[] = { + +#define IRQCHIP_MATCH(compat, fn) { .compatible = compat, .data = fn }, + +#define IRQCHIP_PLATFORM_DRIVER_END(drv_name) \ + {}, \ +}; \ +MODULE_DEVICE_TABLE(of, drv_name##_irqchip_match_table); \ +static struct platform_driver drv_name##_driver = { \ + .probe = platform_irqchip_probe, \ + .driver = { \ + .name = #drv_name, \ + .owner = THIS_MODULE, \ + .of_match_table = drv_name##_irqchip_match_table, \ + .suppress_bind_attrs = true, \ + }, \ +}; \ +builtin_platform_driver(drv_name##_driver) + /* * This macro must be used by the different irqchip drivers to declare * the association between their version and their initialization function. @@ -39,8 +63,9 @@ * @fn: initialization function */ #define IRQCHIP_ACPI_DECLARE(name, subtable, validate, data, fn) \ - ACPI_DECLARE_PROBE_ENTRY(irqchip, name, ACPI_SIG_MADT, \ - subtable, validate, data, fn) + ACPI_DECLARE_SUBTABLE_PROBE_ENTRY(irqchip, name, \ + ACPI_SIG_MADT, subtable, \ + validate, data, fn) #ifdef CONFIG_IRQCHIP void irqchip_init(void); diff --git a/include/linux/irqchip/arm-gic-v3.h b/include/linux/irqchip/arm-gic-v3.h index 6c36b6cc3edf..f6d092fdb93d 100644 --- a/include/linux/irqchip/arm-gic-v3.h +++ b/include/linux/irqchip/arm-gic-v3.h @@ -19,7 +19,6 @@ #define GICD_CLRSPI_NSR 0x0048 #define GICD_SETSPI_SR 0x0050 #define GICD_CLRSPI_SR 0x0058 -#define GICD_SEIR 0x0068 #define GICD_IGROUPR 0x0080 #define GICD_ISENABLER 0x0100 #define GICD_ICENABLER 0x0180 @@ -119,14 +118,11 @@ #define GICR_WAKER 0x0014 #define GICR_SETLPIR 0x0040 #define GICR_CLRLPIR 0x0048 -#define GICR_SEIR GICD_SEIR #define GICR_PROPBASER 0x0070 #define GICR_PENDBASER 0x0078 #define GICR_INVLPIR 0x00A0 #define GICR_INVALLR 0x00B0 #define GICR_SYNCR 0x00C0 -#define GICR_MOVLPIR 0x0100 -#define GICR_MOVALLR 0x0110 #define GICR_IDREGS GICD_IDREGS #define GICR_PIDR2 GICD_PIDR2 diff --git a/include/linux/irqchip/arm-vic.h b/include/linux/irqchip/arm-vic.h index a158b97242c7..f2b11d1df23d 100644 --- a/include/linux/irqchip/arm-vic.h +++ b/include/linux/irqchip/arm-vic.h @@ -9,17 +9,6 @@ #include <linux/types.h> -#define VIC_RAW_STATUS 0x08 -#define VIC_INT_ENABLE 0x10 /* 1 = enable, 0 = disable */ -#define VIC_INT_ENABLE_CLEAR 0x14 - -struct device_node; -struct pt_regs; - -void __vic_init(void __iomem *base, int parent_irq, int irq_start, - u32 vic_sources, u32 resume_sources, struct device_node *node); void vic_init(void __iomem *base, unsigned int irq_start, u32 vic_sources, u32 resume_sources); -int vic_init_cascaded(void __iomem *base, unsigned int parent_irq, - u32 vic_sources, u32 resume_sources); #endif diff --git a/include/linux/irqchip/irq-bcm2836.h b/include/linux/irqchip/irq-bcm2836.h index f224d6f9e550..ac5719d8f56b 100644 --- a/include/linux/irqchip/irq-bcm2836.h +++ b/include/linux/irqchip/irq-bcm2836.h @@ -30,7 +30,7 @@ */ #define LOCAL_MAILBOX_INT_CONTROL0 0x050 /* - * The CPU's interrupt status register. Bits are defined by the the + * The CPU's interrupt status register. Bits are defined by the * LOCAL_IRQ_* bits below. */ #define LOCAL_IRQ_PENDING0 0x060 diff --git a/include/linux/irqdesc.h b/include/linux/irqdesc.h index 8f2820c5e69e..5745491303e0 100644 --- a/include/linux/irqdesc.h +++ b/include/linux/irqdesc.h @@ -22,7 +22,6 @@ struct pt_regs; * @irq_common_data: per irq and chip data passed down to chip functions * @kstat_irqs: irq stats per cpu * @handle_irq: highlevel irq-events handler - * @preflow_handler: handler called before the flow handler (currently used by sparc) * @action: the irq action chain * @status_use_accessors: status information * @core_internal_state__do_not_mess_with_it: core internal status information @@ -58,9 +57,6 @@ struct irq_desc { struct irq_data irq_data; unsigned int __percpu *kstat_irqs; irq_flow_handler_t handle_irq; -#ifdef CONFIG_IRQ_PREFLOW_FASTEOI - irq_preflow_handler_t preflow_handler; -#endif struct irqaction *action; /* IRQ action list */ unsigned int status_use_accessors; unsigned int core_internal_state__do_not_mess_with_it; @@ -268,15 +264,4 @@ irq_set_lockdep_class(unsigned int irq, struct lock_class_key *lock_class, } } -#ifdef CONFIG_IRQ_PREFLOW_FASTEOI -static inline void -__irq_set_preflow_handler(unsigned int irq, irq_preflow_handler_t handler) -{ - struct irq_desc *desc; - - desc = irq_to_desc(irq); - desc->preflow_handler = handler; -} -#endif - #endif diff --git a/include/linux/irqflags.h b/include/linux/irqflags.h index d7f7e436c3af..bd5c55755447 100644 --- a/include/linux/irqflags.h +++ b/include/linux/irqflags.h @@ -14,6 +14,7 @@ #include <linux/typecheck.h> #include <asm/irqflags.h> +#include <asm/percpu.h> /* Currently lockdep_softirqs_on/off is used only by lockdep */ #ifdef CONFIG_PROVE_LOCKING @@ -31,18 +32,35 @@ #endif #ifdef CONFIG_TRACE_IRQFLAGS + +/* Per-task IRQ trace events information. */ +struct irqtrace_events { + unsigned int irq_events; + unsigned long hardirq_enable_ip; + unsigned long hardirq_disable_ip; + unsigned int hardirq_enable_event; + unsigned int hardirq_disable_event; + unsigned long softirq_disable_ip; + unsigned long softirq_enable_ip; + unsigned int softirq_disable_event; + unsigned int softirq_enable_event; +}; + +DECLARE_PER_CPU(int, hardirqs_enabled); +DECLARE_PER_CPU(int, hardirq_context); + extern void trace_hardirqs_on_prepare(void); - extern void trace_hardirqs_off_prepare(void); + extern void trace_hardirqs_off_finish(void); extern void trace_hardirqs_on(void); extern void trace_hardirqs_off(void); -# define lockdep_hardirq_context(p) ((p)->hardirq_context) +# define lockdep_hardirq_context() (this_cpu_read(hardirq_context)) # define lockdep_softirq_context(p) ((p)->softirq_context) -# define lockdep_hardirqs_enabled(p) ((p)->hardirqs_enabled) +# define lockdep_hardirqs_enabled() (this_cpu_read(hardirqs_enabled)) # define lockdep_softirqs_enabled(p) ((p)->softirqs_enabled) -# define lockdep_hardirq_enter() \ -do { \ - if (!current->hardirq_context++) \ - current->hardirq_threaded = 0; \ +# define lockdep_hardirq_enter() \ +do { \ + if (this_cpu_inc_return(hardirq_context) == 1) \ + current->hardirq_threaded = 0; \ } while (0) # define lockdep_hardirq_threaded() \ do { \ @@ -50,7 +68,7 @@ do { \ } while (0) # define lockdep_hardirq_exit() \ do { \ - current->hardirq_context--; \ + this_cpu_dec(hardirq_context); \ } while (0) # define lockdep_softirq_enter() \ do { \ @@ -101,12 +119,12 @@ do { \ #else # define trace_hardirqs_on_prepare() do { } while (0) -# define trace_hardirqs_off_prepare() do { } while (0) +# define trace_hardirqs_off_finish() do { } while (0) # define trace_hardirqs_on() do { } while (0) # define trace_hardirqs_off() do { } while (0) -# define lockdep_hardirq_context(p) 0 +# define lockdep_hardirq_context() 0 # define lockdep_softirq_context(p) 0 -# define lockdep_hardirqs_enabled(p) 0 +# define lockdep_hardirqs_enabled() 0 # define lockdep_softirqs_enabled(p) 0 # define lockdep_hardirq_enter() do { } while (0) # define lockdep_hardirq_threaded() do { } while (0) diff --git a/include/linux/irqhandler.h b/include/linux/irqhandler.h index 1e6f4e7123d6..c30f454a9518 100644 --- a/include/linux/irqhandler.h +++ b/include/linux/irqhandler.h @@ -10,6 +10,5 @@ struct irq_desc; struct irq_data; typedef void (*irq_flow_handler_t)(struct irq_desc *desc); -typedef void (*irq_preflow_handler_t)(struct irq_data *data); #endif diff --git a/include/linux/jbd2.h b/include/linux/jbd2.h index f613d8529863..4aaa29772bb0 100644 --- a/include/linux/jbd2.h +++ b/include/linux/jbd2.h @@ -27,6 +27,7 @@ #include <linux/timer.h> #include <linux/slab.h> #include <linux/bit_spinlock.h> +#include <linux/blkdev.h> #include <crypto/hash.h> #endif @@ -766,6 +767,11 @@ struct journal_s int j_errno; /** + * @j_abort_mutex: Lock the whole aborting procedure. + */ + struct mutex j_abort_mutex; + + /** * @j_sb_buffer: The first part of the superblock buffer. */ struct buffer_head *j_sb_buffer; @@ -1247,7 +1253,6 @@ JBD2_FEATURE_INCOMPAT_FUNCS(csum3, CSUM_V3) #define JBD2_ABORT_ON_SYNCDATA_ERR 0x040 /* Abort the journal on file * data write error in ordered * mode */ -#define JBD2_REC_ERR 0x080 /* The errno in the sb has been recorded */ /* * Function declarations for the journaling transaction and buffer diff --git a/include/linux/kallsyms.h b/include/linux/kallsyms.h index 98338dc6b5d2..481273f0c72d 100644 --- a/include/linux/kallsyms.h +++ b/include/linux/kallsyms.h @@ -18,6 +18,7 @@ #define KSYM_SYMBOL_LEN (sizeof("%s+%#lx/%#lx [%s]") + (KSYM_NAME_LEN - 1) + \ 2*(BITS_PER_LONG*3/10) + (MODULE_NAME_LEN - 1) + 1) +struct cred; struct module; static inline int is_kernel_inittext(unsigned long addr) @@ -98,7 +99,7 @@ int lookup_symbol_name(unsigned long addr, char *symname); int lookup_symbol_attrs(unsigned long addr, unsigned long *size, unsigned long *offset, char *modname, char *name); /* How and when do we show kallsyms values? */ -extern int kallsyms_show_value(void); +extern bool kallsyms_show_value(const struct cred *cred); #else /* !CONFIG_KALLSYMS */ @@ -158,7 +159,7 @@ static inline int lookup_symbol_attrs(unsigned long addr, unsigned long *size, u return -ERANGE; } -static inline int kallsyms_show_value(void) +static inline bool kallsyms_show_value(const struct cred *cred) { return false; } diff --git a/include/linux/kexec.h b/include/linux/kexec.h index 1776eb2e43a4..ea67910ae6b7 100644 --- a/include/linux/kexec.h +++ b/include/linux/kexec.h @@ -208,7 +208,7 @@ struct crash_mem_range { struct crash_mem { unsigned int max_nr_ranges; unsigned int nr_ranges; - struct crash_mem_range ranges[0]; + struct crash_mem_range ranges[]; }; extern int crash_exclude_mem_range(struct crash_mem *mem, diff --git a/include/linux/key.h b/include/linux/key.h index 6cf8e71cf8b7..0f2e24f13c2b 100644 --- a/include/linux/key.h +++ b/include/linux/key.h @@ -71,6 +71,23 @@ struct net; #define KEY_PERM_UNDEF 0xffffffff +/* + * The permissions required on a key that we're looking up. + */ +enum key_need_perm { + KEY_NEED_UNSPECIFIED, /* Needed permission unspecified */ + KEY_NEED_VIEW, /* Require permission to view attributes */ + KEY_NEED_READ, /* Require permission to read content */ + KEY_NEED_WRITE, /* Require permission to update / modify */ + KEY_NEED_SEARCH, /* Require permission to search (keyring) or find (key) */ + KEY_NEED_LINK, /* Require permission to link */ + KEY_NEED_SETATTR, /* Require permission to change attributes */ + KEY_NEED_UNLINK, /* Require permission to unlink key */ + KEY_SYSADMIN_OVERRIDE, /* Special: override by CAP_SYS_ADMIN */ + KEY_AUTHTOKEN_OVERRIDE, /* Special: override by possession of auth token */ + KEY_DEFER_PERM_CHECK, /* Special: permission check is deferred */ +}; + struct seq_file; struct user_struct; struct signal_struct; @@ -176,6 +193,9 @@ struct key { struct list_head graveyard_link; struct rb_node serial_node; }; +#ifdef CONFIG_KEY_NOTIFICATIONS + struct watch_list *watchers; /* Entities watching this key for changes */ +#endif struct rw_semaphore sem; /* change vs change sem */ struct key_user *user; /* owner of this key */ void *security; /* security data for this key */ @@ -417,20 +437,9 @@ static inline key_serial_t key_serial(const struct key *key) extern void key_set_timeout(struct key *, unsigned); extern key_ref_t lookup_user_key(key_serial_t id, unsigned long flags, - key_perm_t perm); + enum key_need_perm need_perm); extern void key_free_user_ns(struct user_namespace *); -/* - * The permissions required on a key that we're looking up. - */ -#define KEY_NEED_VIEW 0x01 /* Require permission to view attributes */ -#define KEY_NEED_READ 0x02 /* Require permission to read content */ -#define KEY_NEED_WRITE 0x04 /* Require permission to update / modify */ -#define KEY_NEED_SEARCH 0x08 /* Require permission to search (keyring) or find (key) */ -#define KEY_NEED_LINK 0x10 /* Require permission to link */ -#define KEY_NEED_SETATTR 0x20 /* Require permission to change attributes */ -#define KEY_NEED_ALL 0x3f /* All the above permissions */ - static inline short key_read_state(const struct key *key) { /* Barrier versus mark_key_instantiated(). */ diff --git a/include/linux/kgdb.h b/include/linux/kgdb.h index c62d76478adc..477b8b7c908f 100644 --- a/include/linux/kgdb.h +++ b/include/linux/kgdb.h @@ -177,6 +177,17 @@ kgdb_arch_handle_exception(int vector, int signo, int err_code, struct pt_regs *regs); /** + * kgdb_arch_handle_qxfer_pkt - Handle architecture specific GDB XML + * packets. + * @remcom_in_buffer: The buffer of the packet we have read. + * @remcom_out_buffer: The buffer of %BUFMAX bytes to write a packet into. + */ + +extern void +kgdb_arch_handle_qxfer_pkt(char *remcom_in_buffer, + char *remcom_out_buffer); + +/** * kgdb_call_nmi_hook - Call kgdb_nmicallback() on the current CPU * @ignored: This parameter is only here to match the prototype. * @@ -276,8 +287,7 @@ struct kgdb_arch { * the I/O driver. * @post_exception: Pointer to a function that will do any cleanup work * for the I/O driver. - * @is_console: 1 if the end device is a console 0 if the I/O device is - * not a console + * @cons: valid if the I/O device is a console; else NULL. */ struct kgdb_io { const char *name; @@ -288,7 +298,7 @@ struct kgdb_io { void (*deinit) (void); void (*pre_exception) (void); void (*post_exception) (void); - int is_console; + struct console *cons; }; extern const struct kgdb_arch arch_kgdb_ops; @@ -315,6 +325,7 @@ extern int kgdb_hex2mem(char *buf, char *mem, int count); extern int kgdb_isremovedbreak(unsigned long addr); extern void kgdb_schedule_breakpoint(void); +extern int kgdb_has_hit_break(unsigned long addr); extern int kgdb_handle_exception(int ex_vector, int signo, int err_code, diff --git a/include/linux/kprobes.h b/include/linux/kprobes.h index 594265bfd390..45b8cdc9fad7 100644 --- a/include/linux/kprobes.h +++ b/include/linux/kprobes.h @@ -161,7 +161,7 @@ struct kretprobe_instance { kprobe_opcode_t *ret_addr; struct task_struct *task; void *fp; - char data[0]; + char data[]; }; struct kretprobe_blackpoint { @@ -242,6 +242,7 @@ struct kprobe_insn_cache { struct mutex mutex; void *(*alloc)(void); /* allocate insn page */ void (*free)(void *); /* free insn page */ + const char *sym; /* symbol for insn pages */ struct list_head pages; /* list of kprobe_insn_page */ size_t insn_size; /* size of instruction slot */ int nr_garbage; @@ -272,6 +273,10 @@ static inline bool is_kprobe_##__name##_slot(unsigned long addr) \ { \ return __is_insn_slot_addr(&kprobe_##__name##_slots, addr); \ } +#define KPROBE_INSN_PAGE_SYM "kprobe_insn_page" +#define KPROBE_OPTINSN_PAGE_SYM "kprobe_optinsn_page" +int kprobe_cache_get_kallsym(struct kprobe_insn_cache *c, unsigned int *symnum, + unsigned long *value, char *type, char *sym); #else /* __ARCH_WANT_KPROBES_INSN_SLOT */ #define DEFINE_INSN_CACHE_OPS(__name) \ static inline bool is_kprobe_##__name##_slot(unsigned long addr) \ @@ -350,6 +355,10 @@ static inline struct kprobe_ctlblk *get_kprobe_ctlblk(void) return this_cpu_ptr(&kprobe_ctlblk); } +extern struct kprobe kprobe_busy; +void kprobe_busy_begin(void); +void kprobe_busy_end(void); + kprobe_opcode_t *kprobe_lookup_name(const char *name, unsigned int offset); int register_kprobe(struct kprobe *p); void unregister_kprobe(struct kprobe *p); @@ -373,6 +382,11 @@ void dump_kprobe(struct kprobe *kp); void *alloc_insn_page(void); void free_insn_page(void *page); +int kprobe_get_kallsym(unsigned int symnum, unsigned long *value, char *type, + char *sym); + +int arch_kprobe_get_kallsym(unsigned int *symnum, unsigned long *value, + char *type, char *sym); #else /* !CONFIG_KPROBES: */ static inline int kprobes_built_in(void) @@ -435,6 +449,11 @@ static inline bool within_kprobe_blacklist(unsigned long addr) { return true; } +static inline int kprobe_get_kallsym(unsigned int symnum, unsigned long *value, + char *type, char *sym) +{ + return -ERANGE; +} #endif /* CONFIG_KPROBES */ static inline int disable_kretprobe(struct kretprobe *rp) { diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h index 62ec926c78a0..ac83e9c1d82c 100644 --- a/include/linux/kvm_host.h +++ b/include/linux/kvm_host.h @@ -409,7 +409,7 @@ struct kvm_irq_routing_table { * Array indexed by gsi. Each entry contains list of irq chips * the gsi is connected to. */ - struct hlist_head map[0]; + struct hlist_head map[]; }; #endif @@ -1439,4 +1439,12 @@ int kvm_vm_create_worker_thread(struct kvm *kvm, kvm_vm_thread_fn_t thread_fn, uintptr_t data, const char *name, struct task_struct **thread_ptr); +#ifdef CONFIG_KVM_XFER_TO_GUEST_WORK +static inline void kvm_handle_signal_exit(struct kvm_vcpu *vcpu) +{ + vcpu->run->exit_reason = KVM_EXIT_INTR; + vcpu->stat.signal_exits++; +} +#endif /* CONFIG_KVM_XFER_TO_GUEST_WORK */ + #endif diff --git a/include/linux/libata.h b/include/linux/libata.h index af832852e620..77ccf040a128 100644 --- a/include/linux/libata.h +++ b/include/linux/libata.h @@ -22,6 +22,7 @@ #include <linux/acpi.h> #include <linux/cdrom.h> #include <linux/sched.h> +#include <linux/async.h> /* * Define if arch has non-standard setup. This is a _PCI_ standard @@ -609,7 +610,7 @@ struct ata_host { struct task_struct *eh_owner; struct ata_port *simplex_claimed; /* channel owning the DMA */ - struct ata_port *ports[0]; + struct ata_port *ports[]; }; struct ata_queued_cmd { @@ -872,6 +873,8 @@ struct ata_port { struct timer_list fastdrain_timer; unsigned long fastdrain_cnt; + async_cookie_t cookie; + int em_message_type; void *private_data; @@ -1092,7 +1095,11 @@ extern int ata_scsi_ioctl(struct scsi_device *dev, unsigned int cmd, #define ATA_SCSI_COMPAT_IOCTL /* empty */ #endif extern int ata_scsi_queuecmd(struct Scsi_Host *h, struct scsi_cmnd *cmd); +#if IS_REACHABLE(CONFIG_ATA) bool ata_scsi_dma_need_drain(struct request *rq); +#else +#define ata_scsi_dma_need_drain NULL +#endif extern int ata_sas_scsi_ioctl(struct ata_port *ap, struct scsi_device *dev, unsigned int cmd, void __user *arg); extern bool ata_link_online(struct ata_link *link); diff --git a/include/linux/lightnvm.h b/include/linux/lightnvm.h index ee8ec2e68055..1db223710b28 100644 --- a/include/linux/lightnvm.h +++ b/include/linux/lightnvm.h @@ -631,7 +631,6 @@ static inline int nvm_next_ppa_in_chk(struct nvm_tgt_dev *dev, return last; } -typedef blk_qc_t (nvm_tgt_make_rq_fn)(struct request_queue *, struct bio *); typedef sector_t (nvm_tgt_capacity_fn)(void *); typedef void *(nvm_tgt_init_fn)(struct nvm_tgt_dev *, struct gendisk *, int flags); @@ -650,7 +649,7 @@ struct nvm_tgt_type { int flags; /* target entry points */ - nvm_tgt_make_rq_fn *make_rq; + const struct block_device_operations *bops; nvm_tgt_capacity_fn *capacity; /* module-specific init/teardown */ diff --git a/include/linux/list.h b/include/linux/list.h index aff44d34f4e4..0d0d17a10d25 100644 --- a/include/linux/list.h +++ b/include/linux/list.h @@ -283,6 +283,24 @@ static inline int list_empty(const struct list_head *head) } /** + * list_del_init_careful - deletes entry from list and reinitialize it. + * @entry: the element to delete from the list. + * + * This is the same as list_del_init(), except designed to be used + * together with list_empty_careful() in a way to guarantee ordering + * of other memory operations. + * + * Any memory operations done before a list_del_init_careful() are + * guaranteed to be visible after a list_empty_careful() test. + */ +static inline void list_del_init_careful(struct list_head *entry) +{ + __list_del_entry(entry); + entry->prev = entry; + smp_store_release(&entry->next, entry); +} + +/** * list_empty_careful - tests whether a list is empty and not being modified * @head: the list to test * @@ -297,7 +315,7 @@ static inline int list_empty(const struct list_head *head) */ static inline int list_empty_careful(const struct list_head *head) { - struct list_head *next = head->next; + struct list_head *next = smp_load_acquire(&head->next); return (next == head) && (next == head->prev); } diff --git a/include/linux/lockdep.h b/include/linux/lockdep.h index 8fce5c98a4b0..39a35699d0d6 100644 --- a/include/linux/lockdep.h +++ b/include/linux/lockdep.h @@ -10,33 +10,15 @@ #ifndef __LINUX_LOCKDEP_H #define __LINUX_LOCKDEP_H +#include <linux/lockdep_types.h> +#include <asm/percpu.h> + struct task_struct; -struct lockdep_map; /* for sysctl */ extern int prove_locking; extern int lock_stat; -#define MAX_LOCKDEP_SUBCLASSES 8UL - -#include <linux/types.h> - -enum lockdep_wait_type { - LD_WAIT_INV = 0, /* not checked, catch all */ - - LD_WAIT_FREE, /* wait free, rcu etc.. */ - LD_WAIT_SPIN, /* spin loops, raw_spinlock_t etc.. */ - -#ifdef CONFIG_PROVE_RAW_LOCK_NESTING - LD_WAIT_CONFIG, /* CONFIG_PREEMPT_LOCK, spinlock_t etc.. */ -#else - LD_WAIT_CONFIG = LD_WAIT_SPIN, -#endif - LD_WAIT_SLEEP, /* sleeping locks, mutex_t etc.. */ - - LD_WAIT_MAX, /* must be last */ -}; - #ifdef CONFIG_LOCKDEP #include <linux/linkage.h> @@ -44,147 +26,6 @@ enum lockdep_wait_type { #include <linux/debug_locks.h> #include <linux/stacktrace.h> -/* - * We'd rather not expose kernel/lockdep_states.h this wide, but we do need - * the total number of states... :-( - */ -#define XXX_LOCK_USAGE_STATES (1+2*4) - -/* - * NR_LOCKDEP_CACHING_CLASSES ... Number of classes - * cached in the instance of lockdep_map - * - * Currently main class (subclass == 0) and signle depth subclass - * are cached in lockdep_map. This optimization is mainly targeting - * on rq->lock. double_rq_lock() acquires this highly competitive with - * single depth. - */ -#define NR_LOCKDEP_CACHING_CLASSES 2 - -/* - * A lockdep key is associated with each lock object. For static locks we use - * the lock address itself as the key. Dynamically allocated lock objects can - * have a statically or dynamically allocated key. Dynamically allocated lock - * keys must be registered before being used and must be unregistered before - * the key memory is freed. - */ -struct lockdep_subclass_key { - char __one_byte; -} __attribute__ ((__packed__)); - -/* hash_entry is used to keep track of dynamically allocated keys. */ -struct lock_class_key { - union { - struct hlist_node hash_entry; - struct lockdep_subclass_key subkeys[MAX_LOCKDEP_SUBCLASSES]; - }; -}; - -extern struct lock_class_key __lockdep_no_validate__; - -struct lock_trace; - -#define LOCKSTAT_POINTS 4 - -/* - * The lock-class itself. The order of the structure members matters. - * reinit_class() zeroes the key member and all subsequent members. - */ -struct lock_class { - /* - * class-hash: - */ - struct hlist_node hash_entry; - - /* - * Entry in all_lock_classes when in use. Entry in free_lock_classes - * when not in use. Instances that are being freed are on one of the - * zapped_classes lists. - */ - struct list_head lock_entry; - - /* - * These fields represent a directed graph of lock dependencies, - * to every node we attach a list of "forward" and a list of - * "backward" graph nodes. - */ - struct list_head locks_after, locks_before; - - const struct lockdep_subclass_key *key; - unsigned int subclass; - unsigned int dep_gen_id; - - /* - * IRQ/softirq usage tracking bits: - */ - unsigned long usage_mask; - const struct lock_trace *usage_traces[XXX_LOCK_USAGE_STATES]; - - /* - * Generation counter, when doing certain classes of graph walking, - * to ensure that we check one node only once: - */ - int name_version; - const char *name; - - short wait_type_inner; - short wait_type_outer; - -#ifdef CONFIG_LOCK_STAT - unsigned long contention_point[LOCKSTAT_POINTS]; - unsigned long contending_point[LOCKSTAT_POINTS]; -#endif -} __no_randomize_layout; - -#ifdef CONFIG_LOCK_STAT -struct lock_time { - s64 min; - s64 max; - s64 total; - unsigned long nr; -}; - -enum bounce_type { - bounce_acquired_write, - bounce_acquired_read, - bounce_contended_write, - bounce_contended_read, - nr_bounce_types, - - bounce_acquired = bounce_acquired_write, - bounce_contended = bounce_contended_write, -}; - -struct lock_class_stats { - unsigned long contention_point[LOCKSTAT_POINTS]; - unsigned long contending_point[LOCKSTAT_POINTS]; - struct lock_time read_waittime; - struct lock_time write_waittime; - struct lock_time read_holdtime; - struct lock_time write_holdtime; - unsigned long bounces[nr_bounce_types]; -}; - -struct lock_class_stats lock_stats(struct lock_class *class); -void clear_lock_stats(struct lock_class *class); -#endif - -/* - * Map the lock object (the lock instance) to the lock-class object. - * This is embedded into specific lock instances: - */ -struct lockdep_map { - struct lock_class_key *key; - struct lock_class *class_cache[NR_LOCKDEP_CACHING_CLASSES]; - const char *name; - short wait_type_outer; /* can be taken in this context */ - short wait_type_inner; /* presents this context */ -#ifdef CONFIG_LOCK_STAT - int cpu; - unsigned long ip; -#endif -}; - static inline void lockdep_copy_map(struct lockdep_map *to, struct lockdep_map *from) { @@ -440,8 +281,6 @@ static inline void lock_set_subclass(struct lockdep_map *lock, extern void lock_downgrade(struct lockdep_map *lock, unsigned long ip); -struct pin_cookie { unsigned int val; }; - #define NIL_COOKIE (struct pin_cookie){ .val = 0U, } extern struct pin_cookie lock_pin_lock(struct lockdep_map *lock); @@ -520,10 +359,6 @@ static inline void lockdep_set_selftest_task(struct task_struct *task) # define lockdep_reset() do { debug_locks = 1; } while (0) # define lockdep_free_key_range(start, size) do { } while (0) # define lockdep_sys_exit() do { } while (0) -/* - * The class key takes no space if lockdep is disabled: - */ -struct lock_class_key { }; static inline void lockdep_register_key(struct lock_class_key *key) { @@ -533,11 +368,6 @@ static inline void lockdep_unregister_key(struct lock_class_key *key) { } -/* - * The lockdep_map takes no space if lockdep is disabled: - */ -struct lockdep_map { }; - #define lockdep_depth(tsk) (0) #define lockdep_is_held_type(l, r) (1) @@ -549,8 +379,6 @@ struct lockdep_map { }; #define lockdep_recursing(tsk) (0) -struct pin_cookie { }; - #define NIL_COOKIE (struct pin_cookie){ } #define lockdep_pin_lock(l) ({ struct pin_cookie cookie = { }; cookie; }) @@ -703,38 +531,58 @@ do { \ lock_release(&(lock)->dep_map, _THIS_IP_); \ } while (0) -#define lockdep_assert_irqs_enabled() do { \ - WARN_ONCE(debug_locks && !current->lockdep_recursion && \ - !current->hardirqs_enabled, \ - "IRQs not enabled as expected\n"); \ - } while (0) +DECLARE_PER_CPU(int, hardirqs_enabled); +DECLARE_PER_CPU(int, hardirq_context); -#define lockdep_assert_irqs_disabled() do { \ - WARN_ONCE(debug_locks && !current->lockdep_recursion && \ - current->hardirqs_enabled, \ - "IRQs not disabled as expected\n"); \ - } while (0) +#define lockdep_assert_irqs_enabled() \ +do { \ + WARN_ON_ONCE(debug_locks && !this_cpu_read(hardirqs_enabled)); \ +} while (0) -#define lockdep_assert_in_irq() do { \ - WARN_ONCE(debug_locks && !current->lockdep_recursion && \ - !current->hardirq_context, \ - "Not in hardirq as expected\n"); \ - } while (0) +#define lockdep_assert_irqs_disabled() \ +do { \ + WARN_ON_ONCE(debug_locks && this_cpu_read(hardirqs_enabled)); \ +} while (0) + +#define lockdep_assert_in_irq() \ +do { \ + WARN_ON_ONCE(debug_locks && !this_cpu_read(hardirq_context)); \ +} while (0) + +#define lockdep_assert_preemption_enabled() \ +do { \ + WARN_ON_ONCE(IS_ENABLED(CONFIG_PREEMPT_COUNT) && \ + debug_locks && \ + (preempt_count() != 0 || \ + !this_cpu_read(hardirqs_enabled))); \ +} while (0) + +#define lockdep_assert_preemption_disabled() \ +do { \ + WARN_ON_ONCE(IS_ENABLED(CONFIG_PREEMPT_COUNT) && \ + debug_locks && \ + (preempt_count() == 0 && \ + this_cpu_read(hardirqs_enabled))); \ +} while (0) #else # define might_lock(lock) do { } while (0) # define might_lock_read(lock) do { } while (0) # define might_lock_nested(lock, subclass) do { } while (0) + # define lockdep_assert_irqs_enabled() do { } while (0) # define lockdep_assert_irqs_disabled() do { } while (0) # define lockdep_assert_in_irq() do { } while (0) + +# define lockdep_assert_preemption_enabled() do { } while (0) +# define lockdep_assert_preemption_disabled() do { } while (0) #endif #ifdef CONFIG_PROVE_RAW_LOCK_NESTING # define lockdep_assert_RT_in_threaded_ctx() do { \ WARN_ONCE(debug_locks && !current->lockdep_recursion && \ - current->hardirq_context && \ + lockdep_hardirq_context() && \ !(current->hardirq_threaded || current->irq_config), \ "Not in threaded context on PREEMPT_RT as expected\n"); \ } while (0) diff --git a/include/linux/lockdep_types.h b/include/linux/lockdep_types.h new file mode 100644 index 000000000000..bb35b449f533 --- /dev/null +++ b/include/linux/lockdep_types.h @@ -0,0 +1,194 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * Runtime locking correctness validator + * + * Copyright (C) 2006,2007 Red Hat, Inc., Ingo Molnar <mingo@redhat.com> + * Copyright (C) 2007 Red Hat, Inc., Peter Zijlstra + * + * see Documentation/locking/lockdep-design.rst for more details. + */ +#ifndef __LINUX_LOCKDEP_TYPES_H +#define __LINUX_LOCKDEP_TYPES_H + +#include <linux/types.h> + +#define MAX_LOCKDEP_SUBCLASSES 8UL + +enum lockdep_wait_type { + LD_WAIT_INV = 0, /* not checked, catch all */ + + LD_WAIT_FREE, /* wait free, rcu etc.. */ + LD_WAIT_SPIN, /* spin loops, raw_spinlock_t etc.. */ + +#ifdef CONFIG_PROVE_RAW_LOCK_NESTING + LD_WAIT_CONFIG, /* CONFIG_PREEMPT_LOCK, spinlock_t etc.. */ +#else + LD_WAIT_CONFIG = LD_WAIT_SPIN, +#endif + LD_WAIT_SLEEP, /* sleeping locks, mutex_t etc.. */ + + LD_WAIT_MAX, /* must be last */ +}; + +#ifdef CONFIG_LOCKDEP + +/* + * We'd rather not expose kernel/lockdep_states.h this wide, but we do need + * the total number of states... :-( + */ +#define XXX_LOCK_USAGE_STATES (1+2*4) + +/* + * NR_LOCKDEP_CACHING_CLASSES ... Number of classes + * cached in the instance of lockdep_map + * + * Currently main class (subclass == 0) and signle depth subclass + * are cached in lockdep_map. This optimization is mainly targeting + * on rq->lock. double_rq_lock() acquires this highly competitive with + * single depth. + */ +#define NR_LOCKDEP_CACHING_CLASSES 2 + +/* + * A lockdep key is associated with each lock object. For static locks we use + * the lock address itself as the key. Dynamically allocated lock objects can + * have a statically or dynamically allocated key. Dynamically allocated lock + * keys must be registered before being used and must be unregistered before + * the key memory is freed. + */ +struct lockdep_subclass_key { + char __one_byte; +} __attribute__ ((__packed__)); + +/* hash_entry is used to keep track of dynamically allocated keys. */ +struct lock_class_key { + union { + struct hlist_node hash_entry; + struct lockdep_subclass_key subkeys[MAX_LOCKDEP_SUBCLASSES]; + }; +}; + +extern struct lock_class_key __lockdep_no_validate__; + +struct lock_trace; + +#define LOCKSTAT_POINTS 4 + +/* + * The lock-class itself. The order of the structure members matters. + * reinit_class() zeroes the key member and all subsequent members. + */ +struct lock_class { + /* + * class-hash: + */ + struct hlist_node hash_entry; + + /* + * Entry in all_lock_classes when in use. Entry in free_lock_classes + * when not in use. Instances that are being freed are on one of the + * zapped_classes lists. + */ + struct list_head lock_entry; + + /* + * These fields represent a directed graph of lock dependencies, + * to every node we attach a list of "forward" and a list of + * "backward" graph nodes. + */ + struct list_head locks_after, locks_before; + + const struct lockdep_subclass_key *key; + unsigned int subclass; + unsigned int dep_gen_id; + + /* + * IRQ/softirq usage tracking bits: + */ + unsigned long usage_mask; + const struct lock_trace *usage_traces[XXX_LOCK_USAGE_STATES]; + + /* + * Generation counter, when doing certain classes of graph walking, + * to ensure that we check one node only once: + */ + int name_version; + const char *name; + + short wait_type_inner; + short wait_type_outer; + +#ifdef CONFIG_LOCK_STAT + unsigned long contention_point[LOCKSTAT_POINTS]; + unsigned long contending_point[LOCKSTAT_POINTS]; +#endif +} __no_randomize_layout; + +#ifdef CONFIG_LOCK_STAT +struct lock_time { + s64 min; + s64 max; + s64 total; + unsigned long nr; +}; + +enum bounce_type { + bounce_acquired_write, + bounce_acquired_read, + bounce_contended_write, + bounce_contended_read, + nr_bounce_types, + + bounce_acquired = bounce_acquired_write, + bounce_contended = bounce_contended_write, +}; + +struct lock_class_stats { + unsigned long contention_point[LOCKSTAT_POINTS]; + unsigned long contending_point[LOCKSTAT_POINTS]; + struct lock_time read_waittime; + struct lock_time write_waittime; + struct lock_time read_holdtime; + struct lock_time write_holdtime; + unsigned long bounces[nr_bounce_types]; +}; + +struct lock_class_stats lock_stats(struct lock_class *class); +void clear_lock_stats(struct lock_class *class); +#endif + +/* + * Map the lock object (the lock instance) to the lock-class object. + * This is embedded into specific lock instances: + */ +struct lockdep_map { + struct lock_class_key *key; + struct lock_class *class_cache[NR_LOCKDEP_CACHING_CLASSES]; + const char *name; + short wait_type_outer; /* can be taken in this context */ + short wait_type_inner; /* presents this context */ +#ifdef CONFIG_LOCK_STAT + int cpu; + unsigned long ip; +#endif +}; + +struct pin_cookie { unsigned int val; }; + +#else /* !CONFIG_LOCKDEP */ + +/* + * The class key takes no space if lockdep is disabled: + */ +struct lock_class_key { }; + +/* + * The lockdep_map takes no space if lockdep is disabled: + */ +struct lockdep_map { }; + +struct pin_cookie { }; + +#endif /* !LOCKDEP */ + +#endif /* __LINUX_LOCKDEP_TYPES_H */ diff --git a/include/linux/lsm_audit.h b/include/linux/lsm_audit.h index 99d629fd9944..28f23b341c1c 100644 --- a/include/linux/lsm_audit.h +++ b/include/linux/lsm_audit.h @@ -75,6 +75,7 @@ struct common_audit_data { #define LSM_AUDIT_DATA_IBPKEY 13 #define LSM_AUDIT_DATA_IBENDPORT 14 #define LSM_AUDIT_DATA_LOCKDOWN 15 +#define LSM_AUDIT_DATA_NOTIFICATION 16 union { struct path path; struct dentry *dentry; diff --git a/include/linux/lsm_hook_defs.h b/include/linux/lsm_hook_defs.h index fb3ce6cec997..af998f93d256 100644 --- a/include/linux/lsm_hook_defs.h +++ b/include/linux/lsm_hook_defs.h @@ -150,7 +150,7 @@ LSM_HOOK(int, 0, inode_listsecurity, struct inode *inode, char *buffer, size_t buffer_size) LSM_HOOK(void, LSM_RET_VOID, inode_getsecid, struct inode *inode, u32 *secid) LSM_HOOK(int, 0, inode_copy_up, struct dentry *src, struct cred **new) -LSM_HOOK(int, 0, inode_copy_up_xattr, const char *name) +LSM_HOOK(int, -EOPNOTSUPP, inode_copy_up_xattr, const char *name) LSM_HOOK(int, 0, kernfs_init_security, struct kernfs_node *kn_dir, struct kernfs_node *kn) LSM_HOOK(int, 0, file_permission, struct file *file, int mask) @@ -191,6 +191,8 @@ LSM_HOOK(int, 0, kernel_post_read_file, struct file *file, char *buf, loff_t size, enum kernel_read_file_id id) LSM_HOOK(int, 0, task_fix_setuid, struct cred *new, const struct cred *old, int flags) +LSM_HOOK(int, 0, task_fix_setgid, struct cred *new, const struct cred * old, + int flags) LSM_HOOK(int, 0, task_setpgid, struct task_struct *p, pid_t pgid) LSM_HOOK(int, 0, task_getpgid, struct task_struct *p) LSM_HOOK(int, 0, task_getsid, struct task_struct *p) @@ -254,6 +256,15 @@ LSM_HOOK(int, 0, inode_setsecctx, struct dentry *dentry, void *ctx, u32 ctxlen) LSM_HOOK(int, 0, inode_getsecctx, struct inode *inode, void **ctx, u32 *ctxlen) +#if defined(CONFIG_SECURITY) && defined(CONFIG_WATCH_QUEUE) +LSM_HOOK(int, 0, post_notification, const struct cred *w_cred, + const struct cred *cred, struct watch_notification *n) +#endif /* CONFIG_SECURITY && CONFIG_WATCH_QUEUE */ + +#if defined(CONFIG_SECURITY) && defined(CONFIG_KEY_NOTIFICATIONS) +LSM_HOOK(int, 0, watch_key, struct key *key) +#endif /* CONFIG_SECURITY && CONFIG_KEY_NOTIFICATIONS */ + #ifdef CONFIG_SECURITY_NETWORK LSM_HOOK(int, 0, unix_stream_connect, struct sock *sock, struct sock *other, struct sock *newsk) @@ -349,7 +360,7 @@ LSM_HOOK(int, 0, key_alloc, struct key *key, const struct cred *cred, unsigned long flags) LSM_HOOK(void, LSM_RET_VOID, key_free, struct key *key) LSM_HOOK(int, 0, key_permission, key_ref_t key_ref, const struct cred *cred, - unsigned perm) + enum key_need_perm need_perm) LSM_HOOK(int, 0, key_getsecurity, struct key *key, char **_buffer) #endif /* CONFIG_KEYS */ diff --git a/include/linux/lsm_hooks.h b/include/linux/lsm_hooks.h index 3e62dab77699..95b7c1d32062 100644 --- a/include/linux/lsm_hooks.h +++ b/include/linux/lsm_hooks.h @@ -659,6 +659,15 @@ * @old is the set of credentials that are being replaces * @flags contains one of the LSM_SETID_* values. * Return 0 on success. + * @task_fix_setgid: + * Update the module's state after setting one or more of the group + * identity attributes of the current process. The @flags parameter + * indicates which of the set*gid system calls invoked this hook. + * @new is the set of credentials that will be installed. Modifications + * should be made to this rather than to @current->cred. + * @old is the set of credentials that are being replaced. + * @flags contains one of the LSM_SETID_* values. + * Return 0 on success. * @task_setpgid: * Check permission before setting the process group identifier of the * process @p to @pgid. @@ -1445,6 +1454,20 @@ * @ctx is a pointer in which to place the allocated security context. * @ctxlen points to the place to put the length of @ctx. * + * Security hooks for the general notification queue: + * + * @post_notification: + * Check to see if a watch notification can be posted to a particular + * queue. + * @w_cred: The credentials of the whoever set the watch. + * @cred: The event-triggerer's credentials + * @n: The notification being posted + * + * @watch_key: + * Check to see if a process is allowed to watch for event notifications + * from a key or keyring. + * @key: The key to watch. + * * Security hooks for using the eBPF maps and programs functionalities through * eBPF syscalls. * diff --git a/include/linux/mailbox/mtk-cmdq-mailbox.h b/include/linux/mailbox/mtk-cmdq-mailbox.h index a4dc45fbec0a..a96e8c252bac 100644 --- a/include/linux/mailbox/mtk-cmdq-mailbox.h +++ b/include/linux/mailbox/mtk-cmdq-mailbox.h @@ -17,6 +17,7 @@ #define CMDQ_JUMP_PASS CMDQ_INST_SIZE #define CMDQ_WFE_UPDATE BIT(31) +#define CMDQ_WFE_UPDATE_VALUE BIT(16) #define CMDQ_WFE_WAIT BIT(15) #define CMDQ_WFE_WAIT_VALUE 0x1 @@ -59,6 +60,7 @@ enum cmdq_code { CMDQ_CODE_JUMP = 0x10, CMDQ_CODE_WFE = 0x20, CMDQ_CODE_EOC = 0x40, + CMDQ_CODE_LOGIC = 0xa0, }; enum cmdq_cb_status { diff --git a/include/linux/math64.h b/include/linux/math64.h index 11a267413e8e..d097119419e6 100644 --- a/include/linux/math64.h +++ b/include/linux/math64.h @@ -263,6 +263,8 @@ static inline u64 mul_u64_u32_div(u64 a, u32 mul, u32 divisor) } #endif /* mul_u64_u32_div */ +u64 mul_u64_u64_div_u64(u64 a, u64 mul, u64 div); + #define DIV64_U64_ROUND_UP(ll, d) \ ({ u64 _tmp = (d); div64_u64((ll) + _tmp - 1, _tmp); }) diff --git a/include/linux/memblock.h b/include/linux/memblock.h index 017fae833d4a..9d925db0d355 100644 --- a/include/linux/memblock.h +++ b/include/linux/memblock.h @@ -77,16 +77,12 @@ struct memblock_type { * @current_limit: physical address of the current allocation limit * @memory: usable memory regions * @reserved: reserved memory regions - * @physmem: all physical memory */ struct memblock { bool bottom_up; /* is bottom up direction? */ phys_addr_t current_limit; struct memblock_type memory; struct memblock_type reserved; -#ifdef CONFIG_HAVE_MEMBLOCK_PHYS_MAP - struct memblock_type physmem; -#endif }; extern struct memblock memblock; @@ -145,6 +141,30 @@ void __next_reserved_mem_region(u64 *idx, phys_addr_t *out_start, void __memblock_free_late(phys_addr_t base, phys_addr_t size); +#ifdef CONFIG_HAVE_MEMBLOCK_PHYS_MAP +static inline void __next_physmem_range(u64 *idx, struct memblock_type *type, + phys_addr_t *out_start, + phys_addr_t *out_end) +{ + extern struct memblock_type physmem; + + __next_mem_range(idx, NUMA_NO_NODE, MEMBLOCK_NONE, &physmem, type, + out_start, out_end, NULL); +} + +/** + * for_each_physmem_range - iterate through physmem areas not included in type. + * @i: u64 used as loop variable + * @type: ptr to memblock_type which excludes from the iteration, can be %NULL + * @p_start: ptr to phys_addr_t for start address of the range, can be %NULL + * @p_end: ptr to phys_addr_t for end address of the range, can be %NULL + */ +#define for_each_physmem_range(i, type, p_start, p_end) \ + for (i = 0, __next_physmem_range(&i, type, p_start, p_end); \ + i != (u64)ULLONG_MAX; \ + __next_physmem_range(&i, type, p_start, p_end)) +#endif /* CONFIG_HAVE_MEMBLOCK_PHYS_MAP */ + /** * for_each_mem_range - iterate through memblock areas from type_a and not * included in type_b. Or just type_a if type_b is NULL. diff --git a/include/linux/mlx5/driver.h b/include/linux/mlx5/driver.h index 13c0e4556eda..1e6ca716635a 100644 --- a/include/linux/mlx5/driver.h +++ b/include/linux/mlx5/driver.h @@ -147,6 +147,7 @@ enum { MLX5_REG_MCDA = 0x9063, MLX5_REG_MCAM = 0x907f, MLX5_REG_MIRC = 0x9162, + MLX5_REG_SBCAM = 0xB01F, MLX5_REG_RESOURCE_DUMP = 0xC000, }; diff --git a/include/linux/mlx5/mlx5_ifc.h b/include/linux/mlx5/mlx5_ifc.h index 116bd9bb347f..1340e02b14ef 100644 --- a/include/linux/mlx5/mlx5_ifc.h +++ b/include/linux/mlx5/mlx5_ifc.h @@ -4283,7 +4283,8 @@ struct mlx5_ifc_rst2init_qp_out_bits { u8 syndrome[0x20]; - u8 reserved_at_40[0x40]; + u8 reserved_at_40[0x20]; + u8 ece[0x20]; }; struct mlx5_ifc_rst2init_qp_in_bits { @@ -4300,7 +4301,7 @@ struct mlx5_ifc_rst2init_qp_in_bits { u8 opt_param_mask[0x20]; - u8 reserved_at_a0[0x20]; + u8 ece[0x20]; struct mlx5_ifc_qpc_bits qpc; @@ -4380,6 +4381,7 @@ struct mlx5_ifc_query_vport_state_out_bits { enum { MLX5_VPORT_STATE_OP_MOD_VNIC_VPORT = 0x0, MLX5_VPORT_STATE_OP_MOD_ESW_VPORT = 0x1, + MLX5_VPORT_STATE_OP_MOD_UPLINK = 0x2, }; struct mlx5_ifc_arm_monitor_counter_in_bits { @@ -6619,7 +6621,8 @@ struct mlx5_ifc_init2init_qp_out_bits { u8 syndrome[0x20]; - u8 reserved_at_40[0x40]; + u8 reserved_at_40[0x20]; + u8 ece[0x20]; }; struct mlx5_ifc_init2init_qp_in_bits { @@ -6636,7 +6639,7 @@ struct mlx5_ifc_init2init_qp_in_bits { u8 opt_param_mask[0x20]; - u8 reserved_at_a0[0x20]; + u8 ece[0x20]; struct mlx5_ifc_qpc_bits qpc; @@ -9958,6 +9961,34 @@ struct mlx5_ifc_pptb_reg_bits { u8 untagged_buff[0x4]; }; +struct mlx5_ifc_sbcam_reg_bits { + u8 reserved_at_0[0x8]; + u8 feature_group[0x8]; + u8 reserved_at_10[0x8]; + u8 access_reg_group[0x8]; + + u8 reserved_at_20[0x20]; + + u8 sb_access_reg_cap_mask[4][0x20]; + + u8 reserved_at_c0[0x80]; + + u8 sb_feature_cap_mask[4][0x20]; + + u8 reserved_at_1c0[0x40]; + + u8 cap_total_buffer_size[0x20]; + + u8 cap_cell_size[0x10]; + u8 cap_max_pg_buffers[0x8]; + u8 cap_num_pool_supported[0x8]; + + u8 reserved_at_240[0x8]; + u8 cap_sbsr_stat_size[0x8]; + u8 cap_max_tclass_data[0x8]; + u8 cap_max_cpu_ingress_tclass_sb[0x8]; +}; + struct mlx5_ifc_pbmc_reg_bits { u8 reserved_at_0[0x8]; u8 local_port[0x8]; diff --git a/include/linux/mmzone.h b/include/linux/mmzone.h index c4c37fd12104..f6f884970511 100644 --- a/include/linux/mmzone.h +++ b/include/linux/mmzone.h @@ -257,8 +257,8 @@ struct lruvec { */ unsigned long anon_cost; unsigned long file_cost; - /* Evictions & activations on the inactive file list */ - atomic_long_t inactive_age; + /* Non-resident age, driven by LRU movement */ + atomic_long_t nonresident_age; /* Refaults at the time of last reclaim cycle */ unsigned long refaults; /* Various lruvec state flags (enum lruvec_flags) */ diff --git a/include/linux/mod_devicetable.h b/include/linux/mod_devicetable.h index 8d764aab29de..e14cbe444afc 100644 --- a/include/linux/mod_devicetable.h +++ b/include/linux/mod_devicetable.h @@ -318,7 +318,7 @@ struct pcmcia_device_id { #define INPUT_DEVICE_ID_LED_MAX 0x0f #define INPUT_DEVICE_ID_SND_MAX 0x07 #define INPUT_DEVICE_ID_FF_MAX 0x7f -#define INPUT_DEVICE_ID_SW_MAX 0x0f +#define INPUT_DEVICE_ID_SW_MAX 0x10 #define INPUT_DEVICE_ID_PROP_MAX 0x1f #define INPUT_DEVICE_ID_MATCH_BUS 1 diff --git a/include/linux/mpi.h b/include/linux/mpi.h index 7bd6d8af0004..5d906dfbf3ed 100644 --- a/include/linux/mpi.h +++ b/include/linux/mpi.h @@ -63,6 +63,9 @@ int mpi_powm(MPI res, MPI base, MPI exp, MPI mod); int mpi_cmp_ui(MPI u, ulong v); int mpi_cmp(MPI u, MPI v); +/*-- mpi-sub-ui.c --*/ +int mpi_sub_ui(MPI w, MPI u, unsigned long vval); + /*-- mpi-bit.c --*/ void mpi_normalize(MPI a); unsigned mpi_get_nbits(MPI a); diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index 5b364a2e0006..39e28e11863c 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -1821,8 +1821,6 @@ enum netdev_priv_flags { * for hardware timestamping * @sfp_bus: attached &struct sfp_bus structure. * - * @addr_list_lock_key: lockdep class annotating - * net_device->addr_list_lock spinlock * @qdisc_tx_busylock: lockdep class annotating Qdisc->busylock spinlock * @qdisc_running_key: lockdep class annotating Qdisc->running seqcount * @@ -2125,7 +2123,6 @@ struct net_device { #endif struct phy_device *phydev; struct sfp_bus *sfp_bus; - struct lock_class_key addr_list_lock_key; struct lock_class_key *qdisc_tx_busylock; struct lock_class_key *qdisc_running_key; bool proto_down; @@ -2217,10 +2214,13 @@ static inline void netdev_for_each_tx_queue(struct net_device *dev, static struct lock_class_key qdisc_tx_busylock_key; \ static struct lock_class_key qdisc_running_key; \ static struct lock_class_key qdisc_xmit_lock_key; \ + static struct lock_class_key dev_addr_list_lock_key; \ unsigned int i; \ \ (dev)->qdisc_tx_busylock = &qdisc_tx_busylock_key; \ (dev)->qdisc_running_key = &qdisc_running_key; \ + lockdep_set_class(&(dev)->addr_list_lock, \ + &dev_addr_list_lock_key); \ for (i = 0; i < (dev)->num_tx_queues; i++) \ lockdep_set_class(&(dev)->_tx[i]._xmit_lock, \ &qdisc_xmit_lock_key); \ @@ -3157,7 +3157,7 @@ static inline int dev_recursion_level(void) return this_cpu_read(softnet_data.xmit.recursion); } -#define XMIT_RECURSION_LIMIT 10 +#define XMIT_RECURSION_LIMIT 8 static inline bool dev_xmit_recursion(void) { return unlikely(__this_cpu_read(softnet_data.xmit.recursion) > @@ -3253,7 +3253,6 @@ static inline void netif_stop_queue(struct net_device *dev) } void netif_tx_stop_all_queues(struct net_device *dev); -void netdev_update_lockdep_key(struct net_device *dev); static inline bool netif_tx_queue_stopped(const struct netdev_queue *dev_queue) { @@ -4239,6 +4238,11 @@ static inline void netif_addr_lock(struct net_device *dev) spin_lock(&dev->addr_list_lock); } +static inline void netif_addr_lock_nested(struct net_device *dev) +{ + spin_lock_nested(&dev->addr_list_lock, dev->lower_level); +} + static inline void netif_addr_lock_bh(struct net_device *dev) { spin_lock_bh(&dev->addr_list_lock); diff --git a/include/linux/netfilter_ipv4/ip_tables.h b/include/linux/netfilter_ipv4/ip_tables.h index b394bd4f68a3..c4676d6feeff 100644 --- a/include/linux/netfilter_ipv4/ip_tables.h +++ b/include/linux/netfilter_ipv4/ip_tables.h @@ -25,6 +25,12 @@ int ipt_register_table(struct net *net, const struct xt_table *table, const struct ipt_replace *repl, const struct nf_hook_ops *ops, struct xt_table **res); + +void ipt_unregister_table_pre_exit(struct net *net, struct xt_table *table, + const struct nf_hook_ops *ops); + +void ipt_unregister_table_exit(struct net *net, struct xt_table *table); + void ipt_unregister_table(struct net *net, struct xt_table *table, const struct nf_hook_ops *ops); diff --git a/include/linux/netfilter_ipv6/ip6_tables.h b/include/linux/netfilter_ipv6/ip6_tables.h index 8225f7821a29..1547d5f9ae06 100644 --- a/include/linux/netfilter_ipv6/ip6_tables.h +++ b/include/linux/netfilter_ipv6/ip6_tables.h @@ -29,6 +29,9 @@ int ip6t_register_table(struct net *net, const struct xt_table *table, const struct nf_hook_ops *ops, struct xt_table **res); void ip6t_unregister_table(struct net *net, struct xt_table *table, const struct nf_hook_ops *ops); +void ip6t_unregister_table_pre_exit(struct net *net, struct xt_table *table, + const struct nf_hook_ops *ops); +void ip6t_unregister_table_exit(struct net *net, struct xt_table *table); extern unsigned int ip6t_do_table(struct sk_buff *skb, const struct nf_hook_state *state, struct xt_table *table); diff --git a/include/linux/nospec.h b/include/linux/nospec.h index 0c5ef54fd416..c1e79f72cd89 100644 --- a/include/linux/nospec.h +++ b/include/linux/nospec.h @@ -5,6 +5,8 @@ #ifndef _LINUX_NOSPEC_H #define _LINUX_NOSPEC_H + +#include <linux/compiler.h> #include <asm/barrier.h> struct task_struct; diff --git a/include/linux/of.h b/include/linux/of.h index c669c0a4732f..5cf7ae0465d1 100644 --- a/include/linux/of.h +++ b/include/linux/of.h @@ -554,7 +554,7 @@ bool of_console_check(struct device_node *dn, char *name, int index); extern int of_cpu_node_to_id(struct device_node *np); -int of_map_rid(struct device_node *np, u32 rid, +int of_map_id(struct device_node *np, u32 id, const char *map_name, const char *map_mask_name, struct device_node **target, u32 *id_out); @@ -630,6 +630,11 @@ static inline struct device_node *of_get_parent(const struct device_node *node) return NULL; } +static inline struct device_node *of_get_next_parent(struct device_node *node) +{ + return NULL; +} + static inline struct device_node *of_get_next_child( const struct device_node *node, struct device_node *prev) { @@ -978,7 +983,7 @@ static inline int of_cpu_node_to_id(struct device_node *np) return -ENODEV; } -static inline int of_map_rid(struct device_node *np, u32 rid, +static inline int of_map_id(struct device_node *np, u32 id, const char *map_name, const char *map_mask_name, struct device_node **target, u32 *id_out) { diff --git a/include/linux/of_device.h b/include/linux/of_device.h index 8d31e39dd564..07ca187fc5e4 100644 --- a/include/linux/of_device.h +++ b/include/linux/of_device.h @@ -55,9 +55,15 @@ static inline struct device_node *of_cpu_device_node_get(int cpu) return of_node_get(cpu_dev->of_node); } -int of_dma_configure(struct device *dev, +int of_dma_configure_id(struct device *dev, struct device_node *np, - bool force_dma); + bool force_dma, const u32 *id); +static inline int of_dma_configure(struct device *dev, + struct device_node *np, + bool force_dma) +{ + return of_dma_configure_id(dev, np, force_dma, NULL); +} #else /* CONFIG_OF */ static inline int of_driver_match_device(struct device *dev, @@ -106,6 +112,12 @@ static inline struct device_node *of_cpu_device_node_get(int cpu) return NULL; } +static inline int of_dma_configure_id(struct device *dev, + struct device_node *np, + bool force_dma) +{ + return 0; +} static inline int of_dma_configure(struct device *dev, struct device_node *np, bool force_dma) diff --git a/include/linux/of_iommu.h b/include/linux/of_iommu.h index f3d40dd7bb66..16f4b3e87f20 100644 --- a/include/linux/of_iommu.h +++ b/include/linux/of_iommu.h @@ -13,7 +13,8 @@ extern int of_get_dma_window(struct device_node *dn, const char *prefix, size_t *size); extern const struct iommu_ops *of_iommu_configure(struct device *dev, - struct device_node *master_np); + struct device_node *master_np, + const u32 *id); #else @@ -25,7 +26,8 @@ static inline int of_get_dma_window(struct device_node *dn, const char *prefix, } static inline const struct iommu_ops *of_iommu_configure(struct device *dev, - struct device_node *master_np) + struct device_node *master_np, + const u32 *id) { return NULL; } diff --git a/include/linux/of_irq.h b/include/linux/of_irq.h index 1214cabb2247..e8b78139f78c 100644 --- a/include/linux/of_irq.h +++ b/include/linux/of_irq.h @@ -52,9 +52,10 @@ extern struct irq_domain *of_msi_get_domain(struct device *dev, struct device_node *np, enum irq_domain_bus_token token); extern struct irq_domain *of_msi_map_get_device_domain(struct device *dev, - u32 rid); + u32 id, + u32 bus_token); extern void of_msi_configure(struct device *dev, struct device_node *np); -u32 of_msi_map_rid(struct device *dev, struct device_node *msi_np, u32 rid_in); +u32 of_msi_map_id(struct device *dev, struct device_node *msi_np, u32 id_in); #else static inline int of_irq_count(struct device_node *dev) { @@ -85,17 +86,17 @@ static inline struct irq_domain *of_msi_get_domain(struct device *dev, return NULL; } static inline struct irq_domain *of_msi_map_get_device_domain(struct device *dev, - u32 rid) + u32 id, u32 bus_token) { return NULL; } static inline void of_msi_configure(struct device *dev, struct device_node *np) { } -static inline u32 of_msi_map_rid(struct device *dev, - struct device_node *msi_np, u32 rid_in) +static inline u32 of_msi_map_id(struct device *dev, + struct device_node *msi_np, u32 id_in) { - return rid_in; + return id_in; } #endif diff --git a/include/linux/overflow.h b/include/linux/overflow.h index 659045046468..93fcef105061 100644 --- a/include/linux/overflow.h +++ b/include/linux/overflow.h @@ -304,16 +304,33 @@ static inline __must_check size_t __ab_c_size(size_t a, size_t b, size_t c) * struct_size() - Calculate size of structure with trailing array. * @p: Pointer to the structure. * @member: Name of the array member. - * @n: Number of elements in the array. + * @count: Number of elements in the array. * * Calculates size of memory needed for structure @p followed by an - * array of @n @member elements. + * array of @count number of @member elements. * * Return: number of bytes needed or SIZE_MAX on overflow. */ -#define struct_size(p, member, n) \ - __ab_c_size(n, \ +#define struct_size(p, member, count) \ + __ab_c_size(count, \ sizeof(*(p)->member) + __must_be_array((p)->member),\ sizeof(*(p))) +/** + * flex_array_size() - Calculate size of a flexible array member + * within an enclosing structure. + * + * @p: Pointer to the structure. + * @member: Name of the flexible array member. + * @count: Number of elements in the array. + * + * Calculates size of a flexible array of @count number of @member + * elements, at the end of structure @p. + * + * Return: number of bytes needed or SIZE_MAX on overflow. + */ +#define flex_array_size(p, member, count) \ + array_size(count, \ + sizeof(*(p)->member) + __must_be_array((p)->member)) + #endif /* __LINUX_OVERFLOW_H */ diff --git a/include/linux/padata.h b/include/linux/padata.h index 7302efff5e65..a433f13fc4bf 100644 --- a/include/linux/padata.h +++ b/include/linux/padata.h @@ -67,17 +67,6 @@ struct padata_serial_queue { }; /** - * struct padata_parallel_queue - The percpu padata parallel queue - * - * @reorder: List to wait for reordering after parallel processing. - * @num_obj: Number of objects that are processed by this cpu. - */ -struct padata_parallel_queue { - struct padata_list reorder; - atomic_t num_obj; -}; - -/** * struct padata_cpumask - The cpumasks for the parallel/serial workers * * @pcpu: cpumask for the parallel workers. @@ -93,7 +82,7 @@ struct padata_cpumask { * that depends on the cpumask in use. * * @ps: padata_shell object. - * @pqueue: percpu padata queues used for parallelization. + * @reorder_list: percpu reorder lists * @squeue: percpu padata queues used for serialuzation. * @refcnt: Number of objects holding a reference on this parallel_data. * @seq_nr: Sequence number of the parallelized data object. @@ -105,7 +94,7 @@ struct padata_cpumask { */ struct parallel_data { struct padata_shell *ps; - struct padata_parallel_queue __percpu *pqueue; + struct padata_list __percpu *reorder_list; struct padata_serial_queue __percpu *squeue; atomic_t refcnt; unsigned int seq_nr; @@ -167,7 +156,6 @@ struct padata_mt_job { * @serial_wq: The workqueue used for serial work. * @pslist: List of padata_shell objects attached to this instance. * @cpumask: User supplied cpumasks for parallel and serial works. - * @rcpumask: Actual cpumasks based on user cpumask and cpu_online_mask. * @kobj: padata instance kernel object. * @lock: padata instance lock. * @flags: padata flags. @@ -179,7 +167,6 @@ struct padata_instance { struct workqueue_struct *serial_wq; struct list_head pslist; struct padata_cpumask cpumask; - struct padata_cpumask rcpumask; struct kobject kobj; struct mutex lock; u8 flags; @@ -194,7 +181,7 @@ extern void __init padata_init(void); static inline void __init padata_init(void) {} #endif -extern struct padata_instance *padata_alloc_possible(const char *name); +extern struct padata_instance *padata_alloc(const char *name); extern void padata_free(struct padata_instance *pinst); extern struct padata_shell *padata_alloc_shell(struct padata_instance *pinst); extern void padata_free_shell(struct padata_shell *ps); @@ -204,6 +191,4 @@ extern void padata_do_serial(struct padata_priv *padata); extern void __init padata_do_multithreaded(struct padata_mt_job *job); extern int padata_set_cpumask(struct padata_instance *pinst, int cpumask_type, cpumask_var_t cpumask); -extern int padata_start(struct padata_instance *pinst); -extern void padata_stop(struct padata_instance *pinst); #endif diff --git a/include/linux/page-flags-layout.h b/include/linux/page-flags-layout.h index 71283739ffd2..e200eef6a7fd 100644 --- a/include/linux/page-flags-layout.h +++ b/include/linux/page-flags-layout.h @@ -98,9 +98,11 @@ /* * We are going to use the flags for the page to node mapping if its in * there. This includes the case where there is no node, so it is implicit. + * Note that this #define MUST have a value so that it can be tested with + * the IS_ENABLED() macro. */ #if !(NODES_WIDTH > 0 || NODES_SHIFT == 0) -#define NODE_NOT_IN_PAGE_FLAGS +#define NODE_NOT_IN_PAGE_FLAGS 1 #endif #if defined(CONFIG_NUMA_BALANCING) && LAST_CPUPID_WIDTH == 0 diff --git a/include/linux/pagemap.h b/include/linux/pagemap.h index cf2468da68e9..d1f4eff605ad 100644 --- a/include/linux/pagemap.h +++ b/include/linux/pagemap.h @@ -496,8 +496,35 @@ static inline pgoff_t linear_page_index(struct vm_area_struct *vma, return pgoff; } +/* This has the same layout as wait_bit_key - see fs/cachefiles/rdwr.c */ +struct wait_page_key { + struct page *page; + int bit_nr; + int page_match; +}; + +struct wait_page_queue { + struct page *page; + int bit_nr; + wait_queue_entry_t wait; +}; + +static inline bool wake_page_match(struct wait_page_queue *wait_page, + struct wait_page_key *key) +{ + if (wait_page->page != key->page) + return false; + key->page_match = 1; + + if (wait_page->bit_nr != key->bit_nr) + return false; + + return true; +} + extern void __lock_page(struct page *page); extern int __lock_page_killable(struct page *page); +extern int __lock_page_async(struct page *page, struct wait_page_queue *wait); extern int __lock_page_or_retry(struct page *page, struct mm_struct *mm, unsigned int flags); extern void unlock_page(struct page *page); @@ -535,6 +562,22 @@ static inline int lock_page_killable(struct page *page) } /* + * lock_page_async - Lock the page, unless this would block. If the page + * is already locked, then queue a callback when the page becomes unlocked. + * This callback can then retry the operation. + * + * Returns 0 if the page is locked successfully, or -EIOCBQUEUED if the page + * was already locked and the callback defined in 'wait' was queued. + */ +static inline int lock_page_async(struct page *page, + struct wait_page_queue *wait) +{ + if (!trylock_page(page)) + return __lock_page_async(page, wait); + return 0; +} + +/* * lock_page_or_retry - Lock the page, unless this would block and the * caller indicated that it can handle a retry. * diff --git a/include/linux/pci.h b/include/linux/pci.h index c79d83304e52..34c1c4f45288 100644 --- a/include/linux/pci.h +++ b/include/linux/pci.h @@ -2169,12 +2169,11 @@ static inline int pci_pcie_type(const struct pci_dev *dev) */ static inline struct pci_dev *pcie_find_root_port(struct pci_dev *dev) { - struct pci_dev *bridge = pci_upstream_bridge(dev); - - while (bridge) { - if (pci_pcie_type(bridge) == PCI_EXP_TYPE_ROOT_PORT) - return bridge; - bridge = pci_upstream_bridge(bridge); + while (dev) { + if (pci_is_pcie(dev) && + pci_pcie_type(dev) == PCI_EXP_TYPE_ROOT_PORT) + return dev; + dev = pci_upstream_bridge(dev); } return NULL; diff --git a/include/linux/pci_ids.h b/include/linux/pci_ids.h index 9a57e6717e5c..0ad57693f392 100644 --- a/include/linux/pci_ids.h +++ b/include/linux/pci_ids.h @@ -550,6 +550,7 @@ #define PCI_DEVICE_ID_AMD_17H_DF_F3 0x1463 #define PCI_DEVICE_ID_AMD_17H_M10H_DF_F3 0x15eb #define PCI_DEVICE_ID_AMD_17H_M30H_DF_F3 0x1493 +#define PCI_DEVICE_ID_AMD_17H_M60H_DF_F3 0x144b #define PCI_DEVICE_ID_AMD_17H_M70H_DF_F3 0x1443 #define PCI_DEVICE_ID_AMD_19H_DF_F3 0x1653 #define PCI_DEVICE_ID_AMD_CNB17H_F3 0x1703 diff --git a/include/linux/percpu-refcount.h b/include/linux/percpu-refcount.h index 22d9d183950d..87d8a38bdea1 100644 --- a/include/linux/percpu-refcount.h +++ b/include/linux/percpu-refcount.h @@ -155,7 +155,7 @@ static inline bool __ref_is_percpu(struct percpu_ref *ref, * between contaminating the pointer value, meaning that * READ_ONCE() is required when fetching it. * - * The smp_read_barrier_depends() implied by READ_ONCE() pairs + * The dependency ordering from the READ_ONCE() pairs * with smp_store_release() in __percpu_ref_switch_to_percpu(). */ percpu_ptr = READ_ONCE(ref->percpu_count_ptr); diff --git a/include/linux/perf_event.h b/include/linux/perf_event.h index b4bb32082342..0edd257a5916 100644 --- a/include/linux/perf_event.h +++ b/include/linux/perf_event.h @@ -366,7 +366,7 @@ struct pmu { * ->stop() with PERF_EF_UPDATE will read the counter and update * period/count values like ->read() would. * - * ->start() with PERF_EF_RELOAD will reprogram the the counter + * ->start() with PERF_EF_RELOAD will reprogram the counter * value, must be preceded by a ->stop() with PERF_EF_UPDATE. */ void (*start) (struct perf_event *event, int flags); @@ -419,10 +419,11 @@ struct pmu { */ void (*sched_task) (struct perf_event_context *ctx, bool sched_in); + /* - * PMU specific data size + * Kmem cache of PMU specific data */ - size_t task_ctx_size; + struct kmem_cache *task_ctx_cache; /* * PMU specific parts of task perf event context (i.e. ctx->task_ctx_data) @@ -1232,6 +1233,9 @@ extern void perf_event_exec(void); extern void perf_event_comm(struct task_struct *tsk, bool exec); extern void perf_event_namespaces(struct task_struct *tsk); extern void perf_event_fork(struct task_struct *tsk); +extern void perf_event_text_poke(const void *addr, + const void *old_bytes, size_t old_len, + const void *new_bytes, size_t new_len); /* Callchains */ DECLARE_PER_CPU(struct perf_callchain_entry, perf_callchain_entry); @@ -1479,6 +1483,11 @@ static inline void perf_event_exec(void) { } static inline void perf_event_comm(struct task_struct *tsk, bool exec) { } static inline void perf_event_namespaces(struct task_struct *tsk) { } static inline void perf_event_fork(struct task_struct *tsk) { } +static inline void perf_event_text_poke(const void *addr, + const void *old_bytes, + size_t old_len, + const void *new_bytes, + size_t new_len) { } static inline void perf_event_init(void) { } static inline int perf_swevent_get_recursion_context(void) { return -1; } static inline void perf_swevent_put_recursion_context(int rctx) { } diff --git a/include/linux/pgtable.h b/include/linux/pgtable.h index 32b6c52d41b9..56c1e8eb7bb0 100644 --- a/include/linux/pgtable.h +++ b/include/linux/pgtable.h @@ -249,6 +249,13 @@ static inline pte_t ptep_get_and_clear(struct mm_struct *mm, } #endif +#ifndef __HAVE_ARCH_PTEP_GET +static inline pte_t ptep_get(pte_t *ptep) +{ + return READ_ONCE(*ptep); +} +#endif + #ifdef CONFIG_TRANSPARENT_HUGEPAGE #ifndef __HAVE_ARCH_PMDP_HUGE_GET_AND_CLEAR static inline pmd_t pmdp_huge_get_and_clear(struct mm_struct *mm, diff --git a/include/linux/phy.h b/include/linux/phy.h index 8c05d0fb5c00..b693b609b2f5 100644 --- a/include/linux/phy.h +++ b/include/linux/phy.h @@ -1416,6 +1416,7 @@ int phy_ethtool_ksettings_set(struct phy_device *phydev, int phy_mii_ioctl(struct phy_device *phydev, struct ifreq *ifr, int cmd); int phy_do_ioctl(struct net_device *dev, struct ifreq *ifr, int cmd); int phy_do_ioctl_running(struct net_device *dev, struct ifreq *ifr, int cmd); +int phy_disable_interrupts(struct phy_device *phydev); void phy_request_interrupt(struct phy_device *phydev); void phy_free_interrupt(struct phy_device *phydev); void phy_print_status(struct phy_device *phydev); diff --git a/include/linux/pipe_fs_i.h b/include/linux/pipe_fs_i.h index 0c31b9461262..50afd0d0084c 100644 --- a/include/linux/pipe_fs_i.h +++ b/include/linux/pipe_fs_i.h @@ -9,6 +9,10 @@ #define PIPE_BUF_FLAG_GIFT 0x04 /* page is a gift */ #define PIPE_BUF_FLAG_PACKET 0x08 /* read() as a packet */ #define PIPE_BUF_FLAG_CAN_MERGE 0x10 /* can merge buffers */ +#define PIPE_BUF_FLAG_WHOLE 0x20 /* read() must return entire buffer or error */ +#ifdef CONFIG_WATCH_QUEUE +#define PIPE_BUF_FLAG_LOSS 0x40 /* Message loss happened after this buffer */ +#endif /** * struct pipe_buffer - a linux kernel pipe buffer @@ -34,8 +38,10 @@ struct pipe_buffer { * @wr_wait: writer wait point in case of full pipe * @head: The point of buffer production * @tail: The point of buffer consumption + * @note_loss: The next read() should insert a data-lost message * @max_usage: The maximum number of slots that may be used in the ring * @ring_size: total number of buffers (should be a power of 2) + * @nr_accounted: The amount this pipe accounts for in user->pipe_bufs * @tmp_page: cached released page * @readers: number of current readers of this pipe * @writers: number of current writers of this pipe @@ -46,6 +52,7 @@ struct pipe_buffer { * @fasync_writers: writer side fasync * @bufs: the circular array of pipe buffers * @user: the user who created this pipe + * @watch_queue: If this pipe is a watch_queue, this is the stuff for that **/ struct pipe_inode_info { struct mutex mutex; @@ -54,6 +61,10 @@ struct pipe_inode_info { unsigned int tail; unsigned int max_usage; unsigned int ring_size; +#ifdef CONFIG_WATCH_QUEUE + bool note_loss; +#endif + unsigned int nr_accounted; unsigned int readers; unsigned int writers; unsigned int files; @@ -64,6 +75,9 @@ struct pipe_inode_info { struct fasync_struct *fasync_writers; struct pipe_buffer *bufs; struct user_struct *user; +#ifdef CONFIG_WATCH_QUEUE + struct watch_queue *watch_queue; +#endif }; /* @@ -239,9 +253,20 @@ void generic_pipe_buf_release(struct pipe_inode_info *, struct pipe_buffer *); extern const struct pipe_buf_operations nosteal_pipe_buf_ops; +#ifdef CONFIG_WATCH_QUEUE +unsigned long account_pipe_buffers(struct user_struct *user, + unsigned long old, unsigned long new); +bool too_many_pipe_buffers_soft(unsigned long user_bufs); +bool too_many_pipe_buffers_hard(unsigned long user_bufs); +bool pipe_is_unprivileged_user(void); +#endif + /* for F_SETPIPE_SZ and F_GETPIPE_SZ */ +#ifdef CONFIG_WATCH_QUEUE +int pipe_resize_ring(struct pipe_inode_info *pipe, unsigned int nr_slots); +#endif long pipe_fcntl(struct file *, unsigned int, unsigned long arg); -struct pipe_inode_info *get_pipe_info(struct file *file); +struct pipe_inode_info *get_pipe_info(struct file *file, bool for_splice); int create_pipe_files(struct file **, int); unsigned int round_pipe_size(unsigned long size); diff --git a/include/linux/platform_data/cros_ec_commands.h b/include/linux/platform_data/cros_ec_commands.h index 69210881ebac..91e77f53414d 100644 --- a/include/linux/platform_data/cros_ec_commands.h +++ b/include/linux/platform_data/cros_ec_commands.h @@ -5431,6 +5431,89 @@ struct ec_response_rollback_info { #define EC_CMD_AP_RESET 0x0125 /*****************************************************************************/ +/* Voltage regulator controls */ + +/* + * Get basic info of voltage regulator for given index. + * + * Returns the regulator name and supported voltage list in mV. + */ +#define EC_CMD_REGULATOR_GET_INFO 0x012C + +/* Maximum length of regulator name */ +#define EC_REGULATOR_NAME_MAX_LEN 16 + +/* Maximum length of the supported voltage list. */ +#define EC_REGULATOR_VOLTAGE_MAX_COUNT 16 + +struct ec_params_regulator_get_info { + uint32_t index; +} __ec_align4; + +struct ec_response_regulator_get_info { + char name[EC_REGULATOR_NAME_MAX_LEN]; + uint16_t num_voltages; + uint16_t voltages_mv[EC_REGULATOR_VOLTAGE_MAX_COUNT]; +} __ec_align2; + +/* + * Configure the regulator as enabled / disabled. + */ +#define EC_CMD_REGULATOR_ENABLE 0x012D + +struct ec_params_regulator_enable { + uint32_t index; + uint8_t enable; +} __ec_align4; + +/* + * Query if the regulator is enabled. + * + * Returns 1 if the regulator is enabled, 0 if not. + */ +#define EC_CMD_REGULATOR_IS_ENABLED 0x012E + +struct ec_params_regulator_is_enabled { + uint32_t index; +} __ec_align4; + +struct ec_response_regulator_is_enabled { + uint8_t enabled; +} __ec_align1; + +/* + * Set voltage for the voltage regulator within the range specified. + * + * The driver should select the voltage in range closest to min_mv. + * + * Also note that this might be called before the regulator is enabled, and the + * setting should be in effect after the regulator is enabled. + */ +#define EC_CMD_REGULATOR_SET_VOLTAGE 0x012F + +struct ec_params_regulator_set_voltage { + uint32_t index; + uint32_t min_mv; + uint32_t max_mv; +} __ec_align4; + +/* + * Get the currently configured voltage for the voltage regulator. + * + * Note that this might be called before the regulator is enabled, and this + * should return the configured output voltage if the regulator is enabled. + */ +#define EC_CMD_REGULATOR_GET_VOLTAGE 0x0130 + +struct ec_params_regulator_get_voltage { + uint32_t index; +} __ec_align4; + +struct ec_response_regulator_get_voltage { + uint32_t voltage_mv; +} __ec_align4; + +/*****************************************************************************/ /* The command range 0x200-0x2FF is reserved for Rotor. */ /*****************************************************************************/ diff --git a/include/linux/platform_data/i2c-pxa.h b/include/linux/platform_data/i2c-pxa.h index 6a9b28399b39..24953981bd9f 100644 --- a/include/linux/platform_data/i2c-pxa.h +++ b/include/linux/platform_data/i2c-pxa.h @@ -7,54 +7,6 @@ #ifndef _I2C_PXA_H_ #define _I2C_PXA_H_ -#if 0 -#define DEF_TIMEOUT 3 -#else -/* need a longer timeout if we're dealing with the fact we may well be - * looking at a multi-master environment -*/ -#define DEF_TIMEOUT 32 -#endif - -#define BUS_ERROR (-EREMOTEIO) -#define XFER_NAKED (-ECONNREFUSED) -#define I2C_RETRY (-2000) /* an error has occurred retry transmit */ - -/* ICR initialize bit values -* -* 15. FM 0 (100 Khz operation) -* 14. UR 0 (No unit reset) -* 13. SADIE 0 (Disables the unit from interrupting on slave addresses -* matching its slave address) -* 12. ALDIE 0 (Disables the unit from interrupt when it loses arbitration -* in master mode) -* 11. SSDIE 0 (Disables interrupts from a slave stop detected, in slave mode) -* 10. BEIE 1 (Enable interrupts from detected bus errors, no ACK sent) -* 9. IRFIE 1 (Enable interrupts from full buffer received) -* 8. ITEIE 1 (Enables the I2C unit to interrupt when transmit buffer empty) -* 7. GCD 1 (Disables i2c unit response to general call messages as a slave) -* 6. IUE 0 (Disable unit until we change settings) -* 5. SCLE 1 (Enables the i2c clock output for master mode (drives SCL) -* 4. MA 0 (Only send stop with the ICR stop bit) -* 3. TB 0 (We are not transmitting a byte initially) -* 2. ACKNAK 0 (Send an ACK after the unit receives a byte) -* 1. STOP 0 (Do not send a STOP) -* 0. START 0 (Do not send a START) -* -*/ -#define I2C_ICR_INIT (ICR_BEIE | ICR_IRFIE | ICR_ITEIE | ICR_GCD | ICR_SCLE) - -/* I2C status register init values - * - * 10. BED 1 (Clear bus error detected) - * 9. SAD 1 (Clear slave address detected) - * 7. IRF 1 (Clear IDBR Receive Full) - * 6. ITE 1 (Clear IDBR Transmit Empty) - * 5. ALD 1 (Clear Arbitration Loss Detected) - * 4. SSD 1 (Clear Slave Stop Detected) - */ -#define I2C_ISR_INIT 0x7FF /* status register init */ - struct i2c_pxa_platform_data { unsigned int class; unsigned int use_pio :1; diff --git a/include/linux/platform_data/leds-s3c24xx.h b/include/linux/platform_data/leds-s3c24xx.h index 5bbae85811e2..64f8d14876e0 100644 --- a/include/linux/platform_data/leds-s3c24xx.h +++ b/include/linux/platform_data/leds-s3c24xx.h @@ -10,13 +10,7 @@ #ifndef __LEDS_S3C24XX_H #define __LEDS_S3C24XX_H -#define S3C24XX_LEDF_ACTLOW (1<<0) /* LED is on when GPIO low */ -#define S3C24XX_LEDF_TRISTATE (1<<1) /* tristate to turn off */ - struct s3c24xx_led_platdata { - unsigned int gpio; - unsigned int flags; - char *name; char *def_trigger; }; diff --git a/include/linux/platform_data/mlxreg.h b/include/linux/platform_data/mlxreg.h index b8da8aef2446..9cffa9a64ab3 100644 --- a/include/linux/platform_data/mlxreg.h +++ b/include/linux/platform_data/mlxreg.h @@ -75,11 +75,13 @@ struct mlxreg_hotplug_device { * @mask: attribute access mask; * @bit: attribute effective bit; * @capability: attribute capability register; + * @reg_prsnt: attribute presence register; * @mode: access mode; * @np - pointer to node platform associated with attribute; * @hpdev - hotplug device data; * @health_cntr: dynamic device health indication counter; * @attached: true if device has been attached after good health indication; + * @regnum: number of registers occupied by multi-register attribute; */ struct mlxreg_core_data { char label[MLXREG_CORE_LABEL_MAX_SIZE]; @@ -87,11 +89,13 @@ struct mlxreg_core_data { u32 mask; u32 bit; u32 capability; + u32 reg_prsnt; umode_t mode; struct device_node *np; struct mlxreg_hotplug_device hpdev; u8 health_cntr; bool attached; + u8 regnum; }; /** diff --git a/include/linux/platform_data/spi-imx.h b/include/linux/platform_data/spi-imx.h deleted file mode 100644 index 328f670d10bd..000000000000 --- a/include/linux/platform_data/spi-imx.h +++ /dev/null @@ -1,33 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0 */ - -#ifndef __MACH_SPI_H_ -#define __MACH_SPI_H_ - -/* - * struct spi_imx_master - device.platform_data for SPI controller devices. - * @chipselect: Array of chipselects for this master or NULL. Numbers >= 0 - * mean GPIO pins, -ENOENT means internal CSPI chipselect - * matching the position in the array. E.g., if chipselect[1] = - * -ENOENT then a SPI slave using chip select 1 will use the - * native SS1 line of the CSPI. Omitting the array will use - * all native chip selects. - - * Normally you want to use gpio based chip selects as the CSPI - * module tries to be intelligent about when to assert the - * chipselect: The CSPI module deasserts the chipselect once it - * runs out of input data. The other problem is that it is not - * possible to mix between high active and low active chipselects - * on one single bus using the internal chipselects. - * Unfortunately, on some SoCs, Freescale decided to put some - * chipselects on dedicated pins which are not usable as gpios, - * so we have to support the internal chipselects. - * - * @num_chipselect: If @chipselect is specified, ARRAY_SIZE(chipselect), - * otherwise the number of native chip selects. - */ -struct spi_imx_master { - int *chipselect; - int num_chipselect; -}; - -#endif /* __MACH_SPI_H_*/ diff --git a/include/linux/pm.h b/include/linux/pm.h index 121c104a4090..a30a4b54df52 100644 --- a/include/linux/pm.h +++ b/include/linux/pm.h @@ -351,7 +351,7 @@ struct dev_pm_ops { * to RAM and hibernation. */ #define SIMPLE_DEV_PM_OPS(name, suspend_fn, resume_fn) \ -const struct dev_pm_ops name = { \ +const struct dev_pm_ops __maybe_unused name = { \ SET_SYSTEM_SLEEP_PM_OPS(suspend_fn, resume_fn) \ } @@ -369,11 +369,17 @@ const struct dev_pm_ops name = { \ * .runtime_resume(), respectively (and analogously for hibernation). */ #define UNIVERSAL_DEV_PM_OPS(name, suspend_fn, resume_fn, idle_fn) \ -const struct dev_pm_ops name = { \ +const struct dev_pm_ops __maybe_unused name = { \ SET_SYSTEM_SLEEP_PM_OPS(suspend_fn, resume_fn) \ SET_RUNTIME_PM_OPS(suspend_fn, resume_fn, idle_fn) \ } +#ifdef CONFIG_PM +#define pm_ptr(_ptr) (_ptr) +#else +#define pm_ptr(_ptr) NULL +#endif + /* * PM_EVENT_ messages * diff --git a/include/linux/pm_domain.h b/include/linux/pm_domain.h index 9ec78ee53652..ee11502a575b 100644 --- a/include/linux/pm_domain.h +++ b/include/linux/pm_domain.h @@ -95,8 +95,8 @@ struct generic_pm_domain { struct device dev; struct dev_pm_domain domain; /* PM domain operations */ struct list_head gpd_list_node; /* Node in the global PM domains list */ - struct list_head master_links; /* Links with PM domain as a master */ - struct list_head slave_links; /* Links with PM domain as a slave */ + struct list_head parent_links; /* Links with PM domain as a parent */ + struct list_head child_links; /* Links with PM domain as a child */ struct list_head dev_list; /* List of devices */ struct dev_power_governor *gov; struct work_struct power_off_work; @@ -151,10 +151,10 @@ static inline struct generic_pm_domain *pd_to_genpd(struct dev_pm_domain *pd) } struct gpd_link { - struct generic_pm_domain *master; - struct list_head master_node; - struct generic_pm_domain *slave; - struct list_head slave_node; + struct generic_pm_domain *parent; + struct list_head parent_node; + struct generic_pm_domain *child; + struct list_head child_node; /* Sub-domain's per-master domain performance state */ unsigned int performance_state; diff --git a/include/linux/pm_opp.h b/include/linux/pm_opp.h index d5c4a329321d..ee34c553f6bf 100644 --- a/include/linux/pm_opp.h +++ b/include/linux/pm_opp.h @@ -11,6 +11,7 @@ #ifndef __LINUX_OPP_H__ #define __LINUX_OPP_H__ +#include <linux/energy_model.h> #include <linux/err.h> #include <linux/notifier.h> @@ -373,7 +374,11 @@ struct device_node *dev_pm_opp_of_get_opp_desc_node(struct device *dev); struct device_node *dev_pm_opp_get_of_node(struct dev_pm_opp *opp); int of_get_required_opp_performance_state(struct device_node *np, int index); int dev_pm_opp_of_find_icc_paths(struct device *dev, struct opp_table *opp_table); -void dev_pm_opp_of_register_em(struct cpumask *cpus); +int dev_pm_opp_of_register_em(struct device *dev, struct cpumask *cpus); +static inline void dev_pm_opp_of_unregister_em(struct device *dev) +{ + em_dev_unregister_perf_domain(dev); +} #else static inline int dev_pm_opp_of_add_table(struct device *dev) { @@ -413,7 +418,13 @@ static inline struct device_node *dev_pm_opp_get_of_node(struct dev_pm_opp *opp) return NULL; } -static inline void dev_pm_opp_of_register_em(struct cpumask *cpus) +static inline int dev_pm_opp_of_register_em(struct device *dev, + struct cpumask *cpus) +{ + return -ENOTSUPP; +} + +static inline void dev_pm_opp_of_unregister_em(struct device *dev) { } diff --git a/include/linux/prandom.h b/include/linux/prandom.h new file mode 100644 index 000000000000..aa16e6468f91 --- /dev/null +++ b/include/linux/prandom.h @@ -0,0 +1,78 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * include/linux/prandom.h + * + * Include file for the fast pseudo-random 32-bit + * generation. + */ +#ifndef _LINUX_PRANDOM_H +#define _LINUX_PRANDOM_H + +#include <linux/types.h> +#include <linux/percpu.h> + +u32 prandom_u32(void); +void prandom_bytes(void *buf, size_t nbytes); +void prandom_seed(u32 seed); +void prandom_reseed_late(void); + +struct rnd_state { + __u32 s1, s2, s3, s4; +}; + +DECLARE_PER_CPU(struct rnd_state, net_rand_state); + +u32 prandom_u32_state(struct rnd_state *state); +void prandom_bytes_state(struct rnd_state *state, void *buf, size_t nbytes); +void prandom_seed_full_state(struct rnd_state __percpu *pcpu_state); + +#define prandom_init_once(pcpu_state) \ + DO_ONCE(prandom_seed_full_state, (pcpu_state)) + +/** + * prandom_u32_max - returns a pseudo-random number in interval [0, ep_ro) + * @ep_ro: right open interval endpoint + * + * Returns a pseudo-random number that is in interval [0, ep_ro). Note + * that the result depends on PRNG being well distributed in [0, ~0U] + * u32 space. Here we use maximally equidistributed combined Tausworthe + * generator, that is, prandom_u32(). This is useful when requesting a + * random index of an array containing ep_ro elements, for example. + * + * Returns: pseudo-random number in interval [0, ep_ro) + */ +static inline u32 prandom_u32_max(u32 ep_ro) +{ + return (u32)(((u64) prandom_u32() * ep_ro) >> 32); +} + +/* + * Handle minimum values for seeds + */ +static inline u32 __seed(u32 x, u32 m) +{ + return (x < m) ? x + m : x; +} + +/** + * prandom_seed_state - set seed for prandom_u32_state(). + * @state: pointer to state structure to receive the seed. + * @seed: arbitrary 64-bit value to use as a seed. + */ +static inline void prandom_seed_state(struct rnd_state *state, u64 seed) +{ + u32 i = (seed >> 32) ^ (seed << 10) ^ seed; + + state->s1 = __seed(i, 2U); + state->s2 = __seed(i, 8U); + state->s3 = __seed(i, 16U); + state->s4 = __seed(i, 128U); +} + +/* Pseudo random number generator from numerical recipes. */ +static inline u32 next_pseudo_random32(u32 seed) +{ + return seed * 1664525 + 1013904223; +} + +#endif diff --git a/include/linux/psi_types.h b/include/linux/psi_types.h index 4b7258495a04..b95f3211566a 100644 --- a/include/linux/psi_types.h +++ b/include/linux/psi_types.h @@ -153,9 +153,10 @@ struct psi_group { unsigned long avg[NR_PSI_STATES - 1][3]; /* Monitor work control */ - atomic_t poll_scheduled; - struct kthread_worker __rcu *poll_kworker; - struct kthread_delayed_work poll_work; + struct task_struct __rcu *poll_task; + struct timer_list poll_timer; + wait_queue_head_t poll_wait; + atomic_t poll_wakeup; /* Protects data used by the monitor */ struct mutex trigger_lock; diff --git a/include/linux/psp-sev.h b/include/linux/psp-sev.h index 7fbc8679145c..49d155cd2dfe 100644 --- a/include/linux/psp-sev.h +++ b/include/linux/psp-sev.h @@ -597,7 +597,7 @@ int sev_guest_df_flush(int *error); */ int sev_guest_decommission(struct sev_data_decommission *data, int *error); -void *psp_copy_user_blob(u64 __user uaddr, u32 len); +void *psp_copy_user_blob(u64 uaddr, u32 len); #else /* !CONFIG_CRYPTO_DEV_SP_PSP */ diff --git a/include/linux/ptr_ring.h b/include/linux/ptr_ring.h index 417db0a79a62..808f9d3ee546 100644 --- a/include/linux/ptr_ring.h +++ b/include/linux/ptr_ring.h @@ -107,7 +107,7 @@ static inline int __ptr_ring_produce(struct ptr_ring *r, void *ptr) return -ENOSPC; /* Make sure the pointer we are storing points to a valid data. */ - /* Pairs with smp_read_barrier_depends in __ptr_ring_consume. */ + /* Pairs with the dependency ordering in __ptr_ring_consume. */ smp_wmb(); WRITE_ONCE(r->queue[r->producer++], ptr); diff --git a/include/linux/qcom-geni-se.h b/include/linux/qcom-geni-se.h index dd464943f717..8f385fbe5a0e 100644 --- a/include/linux/qcom-geni-se.h +++ b/include/linux/qcom-geni-se.h @@ -6,6 +6,8 @@ #ifndef _LINUX_QCOM_GENI_SE #define _LINUX_QCOM_GENI_SE +#include <linux/interconnect.h> + /* Transfer mode supported by GENI Serial Engines */ enum geni_se_xfer_mode { GENI_SE_INVALID, @@ -25,6 +27,17 @@ enum geni_se_protocol_type { struct geni_wrapper; struct clk; +enum geni_icc_path_index { + GENI_TO_CORE, + CPU_TO_GENI, + GENI_TO_DDR +}; + +struct geni_icc_path { + struct icc_path *path; + unsigned int avg_bw; +}; + /** * struct geni_se - GENI Serial Engine * @base: Base Address of the Serial Engine's register block @@ -33,6 +46,9 @@ struct clk; * @clk: Handle to the core serial engine clock * @num_clk_levels: Number of valid clock levels in clk_perf_tbl * @clk_perf_tbl: Table of clock frequency input to serial engine clock + * @icc_paths: Array of ICC paths for SE + * @opp_table: Pointer to the OPP table + * @has_opp_table: Specifies if the SE has an OPP table */ struct geni_se { void __iomem *base; @@ -41,6 +57,9 @@ struct geni_se { struct clk *clk; unsigned int num_clk_levels; unsigned long *clk_perf_tbl; + struct geni_icc_path icc_paths[3]; + struct opp_table *opp_table; + bool has_opp_table; }; /* Common SE registers */ @@ -229,6 +248,21 @@ struct geni_se { #define GENI_SE_VERSION_MINOR(ver) ((ver & HW_VER_MINOR_MASK) >> HW_VER_MINOR_SHFT) #define GENI_SE_VERSION_STEP(ver) (ver & HW_VER_STEP_MASK) +/* + * Define bandwidth thresholds that cause the underlying Core 2X interconnect + * clock to run at the named frequency. These baseline values are recommended + * by the hardware team, and are not dynamically scaled with GENI bandwidth + * beyond basic on/off. + */ +#define CORE_2X_19_2_MHZ 960 +#define CORE_2X_50_MHZ 2500 +#define CORE_2X_100_MHZ 5000 +#define CORE_2X_150_MHZ 7500 +#define CORE_2X_200_MHZ 10000 +#define CORE_2X_236_MHZ 16383 + +#define GENI_DEFAULT_BW Bps_to_icc(1000) + #if IS_ENABLED(CONFIG_QCOM_GENI_SE) u32 geni_se_get_qup_hw_version(struct geni_se *se); @@ -416,5 +450,16 @@ int geni_se_rx_dma_prep(struct geni_se *se, void *buf, size_t len, void geni_se_tx_dma_unprep(struct geni_se *se, dma_addr_t iova, size_t len); void geni_se_rx_dma_unprep(struct geni_se *se, dma_addr_t iova, size_t len); + +int geni_icc_get(struct geni_se *se, const char *icc_ddr); + +int geni_icc_set_bw(struct geni_se *se); +void geni_icc_set_tag(struct geni_se *se, u32 tag); + +int geni_icc_enable(struct geni_se *se); + +int geni_icc_disable(struct geni_se *se); + +void geni_remove_earlycon_icc_vote(void); #endif #endif diff --git a/include/linux/qed/qed_chain.h b/include/linux/qed/qed_chain.h index 733fad7dfbed..6d15040c642c 100644 --- a/include/linux/qed/qed_chain.h +++ b/include/linux/qed/qed_chain.h @@ -207,28 +207,34 @@ static inline u32 qed_chain_get_cons_idx_u32(struct qed_chain *p_chain) static inline u16 qed_chain_get_elem_left(struct qed_chain *p_chain) { + u16 elem_per_page = p_chain->elem_per_page; + u32 prod = p_chain->u.chain16.prod_idx; + u32 cons = p_chain->u.chain16.cons_idx; u16 used; - used = (u16) (((u32)0x10000 + - (u32)p_chain->u.chain16.prod_idx) - - (u32)p_chain->u.chain16.cons_idx); + if (prod < cons) + prod += (u32)U16_MAX + 1; + + used = (u16)(prod - cons); if (p_chain->mode == QED_CHAIN_MODE_NEXT_PTR) - used -= p_chain->u.chain16.prod_idx / p_chain->elem_per_page - - p_chain->u.chain16.cons_idx / p_chain->elem_per_page; + used -= prod / elem_per_page - cons / elem_per_page; return (u16)(p_chain->capacity - used); } static inline u32 qed_chain_get_elem_left_u32(struct qed_chain *p_chain) { + u16 elem_per_page = p_chain->elem_per_page; + u64 prod = p_chain->u.chain32.prod_idx; + u64 cons = p_chain->u.chain32.cons_idx; u32 used; - used = (u32) (((u64)0x100000000ULL + - (u64)p_chain->u.chain32.prod_idx) - - (u64)p_chain->u.chain32.cons_idx); + if (prod < cons) + prod += (u64)U32_MAX + 1; + + used = (u32)(prod - cons); if (p_chain->mode == QED_CHAIN_MODE_NEXT_PTR) - used -= p_chain->u.chain32.prod_idx / p_chain->elem_per_page - - p_chain->u.chain32.cons_idx / p_chain->elem_per_page; + used -= (u32)(prod / elem_per_page - cons / elem_per_page); return p_chain->capacity - used; } diff --git a/include/linux/random.h b/include/linux/random.h index 45e1f8fa742b..f45b8be3e3c4 100644 --- a/include/linux/random.h +++ b/include/linux/random.h @@ -110,61 +110,12 @@ declare_get_random_var_wait(long) unsigned long randomize_page(unsigned long start, unsigned long range); -u32 prandom_u32(void); -void prandom_bytes(void *buf, size_t nbytes); -void prandom_seed(u32 seed); -void prandom_reseed_late(void); - -struct rnd_state { - __u32 s1, s2, s3, s4; -}; - -u32 prandom_u32_state(struct rnd_state *state); -void prandom_bytes_state(struct rnd_state *state, void *buf, size_t nbytes); -void prandom_seed_full_state(struct rnd_state __percpu *pcpu_state); - -#define prandom_init_once(pcpu_state) \ - DO_ONCE(prandom_seed_full_state, (pcpu_state)) - -/** - * prandom_u32_max - returns a pseudo-random number in interval [0, ep_ro) - * @ep_ro: right open interval endpoint - * - * Returns a pseudo-random number that is in interval [0, ep_ro). Note - * that the result depends on PRNG being well distributed in [0, ~0U] - * u32 space. Here we use maximally equidistributed combined Tausworthe - * generator, that is, prandom_u32(). This is useful when requesting a - * random index of an array containing ep_ro elements, for example. - * - * Returns: pseudo-random number in interval [0, ep_ro) - */ -static inline u32 prandom_u32_max(u32 ep_ro) -{ - return (u32)(((u64) prandom_u32() * ep_ro) >> 32); -} - /* - * Handle minimum values for seeds + * This is designed to be standalone for just prandom + * users, but for now we include it from <linux/random.h> + * for legacy reasons. */ -static inline u32 __seed(u32 x, u32 m) -{ - return (x < m) ? x + m : x; -} - -/** - * prandom_seed_state - set seed for prandom_u32_state(). - * @state: pointer to state structure to receive the seed. - * @seed: arbitrary 64-bit value to use as a seed. - */ -static inline void prandom_seed_state(struct rnd_state *state, u64 seed) -{ - u32 i = (seed >> 32) ^ (seed << 10) ^ seed; - - state->s1 = __seed(i, 2U); - state->s2 = __seed(i, 8U); - state->s3 = __seed(i, 16U); - state->s4 = __seed(i, 128U); -} +#include <linux/prandom.h> #ifdef CONFIG_ARCH_RANDOM # include <asm/archrandom.h> @@ -207,10 +158,4 @@ static inline bool __init arch_get_random_long_early(unsigned long *v) } #endif -/* Pseudo random number generator from numerical recipes. */ -static inline u32 next_pseudo_random32(u32 seed) -{ - return seed * 1664525 + 1013904223; -} - #endif /* _LINUX_RANDOM_H */ diff --git a/include/linux/ras.h b/include/linux/ras.h index 7c3debb47c87..1f4048bf2674 100644 --- a/include/linux/ras.h +++ b/include/linux/ras.h @@ -17,12 +17,7 @@ static inline int ras_add_daemon_trace(void) { return 0; } #endif #ifdef CONFIG_RAS_CEC -void __init cec_init(void); int __init parse_cec_param(char *str); -int cec_add_elem(u64 pfn); -#else -static inline void __init cec_init(void) { } -static inline int cec_add_elem(u64 pfn) { return -ENODEV; } #endif #ifdef CONFIG_RAS diff --git a/include/linux/rculist.h b/include/linux/rculist.h index df587d181844..7a6fc9956510 100644 --- a/include/linux/rculist.h +++ b/include/linux/rculist.h @@ -248,6 +248,8 @@ static inline void __list_splice_init_rcu(struct list_head *list, */ sync(); + ASSERT_EXCLUSIVE_ACCESS(*first); + ASSERT_EXCLUSIVE_ACCESS(*last); /* * Readers are finished with the source list, so perform splice. @@ -512,7 +514,7 @@ static inline void hlist_replace_rcu(struct hlist_node *old, * @right: The hlist head on the right * * The lists start out as [@left ][node1 ... ] and - [@right ][node2 ... ] + * [@right ][node2 ... ] * The lists end up as [@left ][node2 ... ] * [@right ][node1 ... ] */ diff --git a/include/linux/rculist_nulls.h b/include/linux/rculist_nulls.h index 9670b54b484a..ff3e94779e73 100644 --- a/include/linux/rculist_nulls.h +++ b/include/linux/rculist_nulls.h @@ -162,7 +162,7 @@ static inline void hlist_nulls_add_fake(struct hlist_nulls_node *n) * The barrier() is needed to make sure compiler doesn't cache first element [1], * as this loop can be restarted [2] * [1] Documentation/core-api/atomic_ops.rst around line 114 - * [2] Documentation/RCU/rculist_nulls.txt around line 146 + * [2] Documentation/RCU/rculist_nulls.rst around line 146 */ #define hlist_nulls_for_each_entry_rcu(tpos, pos, head, member) \ for (({barrier();}), \ diff --git a/include/linux/rcupdate.h b/include/linux/rcupdate.h index 659cbfa7581a..d15d46db61f7 100644 --- a/include/linux/rcupdate.h +++ b/include/linux/rcupdate.h @@ -828,17 +828,17 @@ static inline notrace void rcu_read_unlock_sched_notrace(void) /* * Does the specified offset indicate that the corresponding rcu_head - * structure can be handled by kfree_rcu()? + * structure can be handled by kvfree_rcu()? */ -#define __is_kfree_rcu_offset(offset) ((offset) < 4096) +#define __is_kvfree_rcu_offset(offset) ((offset) < 4096) /* * Helper macro for kfree_rcu() to prevent argument-expansion eyestrain. */ -#define __kfree_rcu(head, offset) \ +#define __kvfree_rcu(head, offset) \ do { \ - BUILD_BUG_ON(!__is_kfree_rcu_offset(offset)); \ - kfree_call_rcu(head, (rcu_callback_t)(unsigned long)(offset)); \ + BUILD_BUG_ON(!__is_kvfree_rcu_offset(offset)); \ + kvfree_call_rcu(head, (rcu_callback_t)(unsigned long)(offset)); \ } while (0) /** @@ -857,7 +857,7 @@ static inline notrace void rcu_read_unlock_sched_notrace(void) * Because the functions are not allowed in the low-order 4096 bytes of * kernel virtual memory, offsets up to 4095 bytes can be accommodated. * If the offset is larger than 4095 bytes, a compile-time error will - * be generated in __kfree_rcu(). If this error is triggered, you can + * be generated in __kvfree_rcu(). If this error is triggered, you can * either fall back to use of call_rcu() or rearrange the structure to * position the rcu_head structure into the first 4096 bytes. * @@ -872,7 +872,46 @@ do { \ typeof (ptr) ___p = (ptr); \ \ if (___p) \ - __kfree_rcu(&((___p)->rhf), offsetof(typeof(*(ptr)), rhf)); \ + __kvfree_rcu(&((___p)->rhf), offsetof(typeof(*(ptr)), rhf)); \ +} while (0) + +/** + * kvfree_rcu() - kvfree an object after a grace period. + * + * This macro consists of one or two arguments and it is + * based on whether an object is head-less or not. If it + * has a head then a semantic stays the same as it used + * to be before: + * + * kvfree_rcu(ptr, rhf); + * + * where @ptr is a pointer to kvfree(), @rhf is the name + * of the rcu_head structure within the type of @ptr. + * + * When it comes to head-less variant, only one argument + * is passed and that is just a pointer which has to be + * freed after a grace period. Therefore the semantic is + * + * kvfree_rcu(ptr); + * + * where @ptr is a pointer to kvfree(). + * + * Please note, head-less way of freeing is permitted to + * use from a context that has to follow might_sleep() + * annotation. Otherwise, please switch and embed the + * rcu_head structure within the type of @ptr. + */ +#define kvfree_rcu(...) KVFREE_GET_MACRO(__VA_ARGS__, \ + kvfree_rcu_arg_2, kvfree_rcu_arg_1)(__VA_ARGS__) + +#define KVFREE_GET_MACRO(_1, _2, NAME, ...) NAME +#define kvfree_rcu_arg_2(ptr, rhf) kfree_rcu(ptr, rhf) +#define kvfree_rcu_arg_1(ptr) \ +do { \ + typeof(ptr) ___p = (ptr); \ + \ + if (___p) \ + kvfree_call_rcu(NULL, (rcu_callback_t) (___p)); \ } while (0) /* diff --git a/include/linux/rcupdate_trace.h b/include/linux/rcupdate_trace.h index 4c25a41f8b27..d9015aac78c6 100644 --- a/include/linux/rcupdate_trace.h +++ b/include/linux/rcupdate_trace.h @@ -36,8 +36,8 @@ void rcu_read_unlock_trace_special(struct task_struct *t, int nesting); /** * rcu_read_lock_trace - mark beginning of RCU-trace read-side critical section * - * When synchronize_rcu_trace() is invoked by one task, then that task - * is guaranteed to block until all other tasks exit their read-side + * When synchronize_rcu_tasks_trace() is invoked by one task, then that + * task is guaranteed to block until all other tasks exit their read-side * critical sections. Similarly, if call_rcu_trace() is invoked on one * task while other tasks are within RCU read-side critical sections, * invocation of the corresponding RCU callback is deferred until after diff --git a/include/linux/rcutiny.h b/include/linux/rcutiny.h index 8512caeb7682..5cc9637cac16 100644 --- a/include/linux/rcutiny.h +++ b/include/linux/rcutiny.h @@ -34,9 +34,25 @@ static inline void synchronize_rcu_expedited(void) synchronize_rcu(); } -static inline void kfree_call_rcu(struct rcu_head *head, rcu_callback_t func) +/* + * Add one more declaration of kvfree() here. It is + * not so straight forward to just include <linux/mm.h> + * where it is defined due to getting many compile + * errors caused by that include. + */ +extern void kvfree(const void *addr); + +static inline void kvfree_call_rcu(struct rcu_head *head, rcu_callback_t func) { - call_rcu(head, func); + if (head) { + call_rcu(head, func); + return; + } + + // kvfree_rcu(one_arg) call. + might_sleep(); + synchronize_rcu(); + kvfree((void *) func); } void rcu_qs(void); diff --git a/include/linux/rcutree.h b/include/linux/rcutree.h index d5cc9d675987..d2f4064ebd1d 100644 --- a/include/linux/rcutree.h +++ b/include/linux/rcutree.h @@ -33,7 +33,7 @@ static inline void rcu_virt_note_context_switch(int cpu) } void synchronize_rcu_expedited(void); -void kfree_call_rcu(struct rcu_head *head, rcu_callback_t func); +void kvfree_call_rcu(struct rcu_head *head, rcu_callback_t func); void rcu_barrier(void); bool rcu_eqs_special_set(int cpu); diff --git a/include/linux/regmap.h b/include/linux/regmap.h index cb666b9c6b6a..1970ed59d49f 100644 --- a/include/linux/regmap.h +++ b/include/linux/regmap.h @@ -18,6 +18,7 @@ #include <linux/bug.h> #include <linux/lockdep.h> #include <linux/iopoll.h> +#include <linux/fwnode.h> struct module; struct clk; @@ -80,36 +81,6 @@ struct reg_sequence { } #define REG_SEQ0(_reg, _def) REG_SEQ(_reg, _def, 0) -#define regmap_update_bits(map, reg, mask, val) \ - regmap_update_bits_base(map, reg, mask, val, NULL, false, false) -#define regmap_update_bits_async(map, reg, mask, val)\ - regmap_update_bits_base(map, reg, mask, val, NULL, true, false) -#define regmap_update_bits_check(map, reg, mask, val, change)\ - regmap_update_bits_base(map, reg, mask, val, change, false, false) -#define regmap_update_bits_check_async(map, reg, mask, val, change)\ - regmap_update_bits_base(map, reg, mask, val, change, true, false) - -#define regmap_write_bits(map, reg, mask, val) \ - regmap_update_bits_base(map, reg, mask, val, NULL, false, true) - -#define regmap_field_write(field, val) \ - regmap_field_update_bits_base(field, ~0, val, NULL, false, false) -#define regmap_field_force_write(field, val) \ - regmap_field_update_bits_base(field, ~0, val, NULL, false, true) -#define regmap_field_update_bits(field, mask, val)\ - regmap_field_update_bits_base(field, mask, val, NULL, false, false) -#define regmap_field_force_update_bits(field, mask, val) \ - regmap_field_update_bits_base(field, mask, val, NULL, false, true) - -#define regmap_fields_write(field, id, val) \ - regmap_fields_update_bits_base(field, id, ~0, val, NULL, false, false) -#define regmap_fields_force_write(field, id, val) \ - regmap_fields_update_bits_base(field, id, ~0, val, NULL, false, true) -#define regmap_fields_update_bits(field, id, mask, val)\ - regmap_fields_update_bits_base(field, id, mask, val, NULL, false, false) -#define regmap_fields_force_update_bits(field, id, mask, val) \ - regmap_fields_update_bits_base(field, id, mask, val, NULL, false, true) - /** * regmap_read_poll_timeout - Poll until a condition is met or a timeout occurs * @@ -304,7 +275,7 @@ typedef void (*regmap_unlock)(void *); * readable if it belongs to one of the ranges specified * by rd_noinc_table). * @disable_locking: This regmap is either protected by external means or - * is guaranteed not be be accessed from multiple threads. + * is guaranteed not to be accessed from multiple threads. * Don't use any locking mechanisms. * @lock: Optional lock callback (overrides regmap's default lock * function, based on spinlock or mutex). @@ -1054,6 +1025,42 @@ int regmap_bulk_read(struct regmap *map, unsigned int reg, void *val, int regmap_update_bits_base(struct regmap *map, unsigned int reg, unsigned int mask, unsigned int val, bool *change, bool async, bool force); + +static inline int regmap_update_bits(struct regmap *map, unsigned int reg, + unsigned int mask, unsigned int val) +{ + return regmap_update_bits_base(map, reg, mask, val, NULL, false, false); +} + +static inline int regmap_update_bits_async(struct regmap *map, unsigned int reg, + unsigned int mask, unsigned int val) +{ + return regmap_update_bits_base(map, reg, mask, val, NULL, true, false); +} + +static inline int regmap_update_bits_check(struct regmap *map, unsigned int reg, + unsigned int mask, unsigned int val, + bool *change) +{ + return regmap_update_bits_base(map, reg, mask, val, + change, false, false); +} + +static inline int +regmap_update_bits_check_async(struct regmap *map, unsigned int reg, + unsigned int mask, unsigned int val, + bool *change) +{ + return regmap_update_bits_base(map, reg, mask, val, + change, true, false); +} + +static inline int regmap_write_bits(struct regmap *map, unsigned int reg, + unsigned int mask, unsigned int val) +{ + return regmap_update_bits_base(map, reg, mask, val, NULL, false, true); +} + int regmap_get_val_bytes(struct regmap *map); int regmap_get_max_register(struct regmap *map); int regmap_get_reg_stride(struct regmap *map); @@ -1152,6 +1159,65 @@ int regmap_fields_read(struct regmap_field *field, unsigned int id, int regmap_fields_update_bits_base(struct regmap_field *field, unsigned int id, unsigned int mask, unsigned int val, bool *change, bool async, bool force); + +static inline int regmap_field_write(struct regmap_field *field, + unsigned int val) +{ + return regmap_field_update_bits_base(field, ~0, val, + NULL, false, false); +} + +static inline int regmap_field_force_write(struct regmap_field *field, + unsigned int val) +{ + return regmap_field_update_bits_base(field, ~0, val, NULL, false, true); +} + +static inline int regmap_field_update_bits(struct regmap_field *field, + unsigned int mask, unsigned int val) +{ + return regmap_field_update_bits_base(field, mask, val, + NULL, false, false); +} + +static inline int +regmap_field_force_update_bits(struct regmap_field *field, + unsigned int mask, unsigned int val) +{ + return regmap_field_update_bits_base(field, mask, val, + NULL, false, true); +} + +static inline int regmap_fields_write(struct regmap_field *field, + unsigned int id, unsigned int val) +{ + return regmap_fields_update_bits_base(field, id, ~0, val, + NULL, false, false); +} + +static inline int regmap_fields_force_write(struct regmap_field *field, + unsigned int id, unsigned int val) +{ + return regmap_fields_update_bits_base(field, id, ~0, val, + NULL, false, true); +} + +static inline int +regmap_fields_update_bits(struct regmap_field *field, unsigned int id, + unsigned int mask, unsigned int val) +{ + return regmap_fields_update_bits_base(field, id, mask, val, + NULL, false, false); +} + +static inline int +regmap_fields_force_update_bits(struct regmap_field *field, unsigned int id, + unsigned int mask, unsigned int val) +{ + return regmap_fields_update_bits_base(field, id, mask, val, + NULL, false, true); +} + /** * struct regmap_irq_type - IRQ type definitions. * @@ -1311,21 +1377,23 @@ struct regmap_irq_chip_data; int regmap_add_irq_chip(struct regmap *map, int irq, int irq_flags, int irq_base, const struct regmap_irq_chip *chip, struct regmap_irq_chip_data **data); -int regmap_add_irq_chip_np(struct device_node *np, struct regmap *map, int irq, - int irq_flags, int irq_base, - const struct regmap_irq_chip *chip, - struct regmap_irq_chip_data **data); +int regmap_add_irq_chip_fwnode(struct fwnode_handle *fwnode, + struct regmap *map, int irq, + int irq_flags, int irq_base, + const struct regmap_irq_chip *chip, + struct regmap_irq_chip_data **data); void regmap_del_irq_chip(int irq, struct regmap_irq_chip_data *data); int devm_regmap_add_irq_chip(struct device *dev, struct regmap *map, int irq, int irq_flags, int irq_base, const struct regmap_irq_chip *chip, struct regmap_irq_chip_data **data); -int devm_regmap_add_irq_chip_np(struct device *dev, struct device_node *np, - struct regmap *map, int irq, int irq_flags, - int irq_base, - const struct regmap_irq_chip *chip, - struct regmap_irq_chip_data **data); +int devm_regmap_add_irq_chip_fwnode(struct device *dev, + struct fwnode_handle *fwnode, + struct regmap *map, int irq, + int irq_flags, int irq_base, + const struct regmap_irq_chip *chip, + struct regmap_irq_chip_data **data); void devm_regmap_del_irq_chip(struct device *dev, int irq, struct regmap_irq_chip_data *data); @@ -1458,6 +1526,103 @@ static inline int regmap_fields_update_bits_base(struct regmap_field *field, return -EINVAL; } +static inline int regmap_update_bits(struct regmap *map, unsigned int reg, + unsigned int mask, unsigned int val) +{ + WARN_ONCE(1, "regmap API is disabled"); + return -EINVAL; +} + +static inline int regmap_update_bits_async(struct regmap *map, unsigned int reg, + unsigned int mask, unsigned int val) +{ + WARN_ONCE(1, "regmap API is disabled"); + return -EINVAL; +} + +static inline int regmap_update_bits_check(struct regmap *map, unsigned int reg, + unsigned int mask, unsigned int val, + bool *change) +{ + WARN_ONCE(1, "regmap API is disabled"); + return -EINVAL; +} + +static inline int +regmap_update_bits_check_async(struct regmap *map, unsigned int reg, + unsigned int mask, unsigned int val, + bool *change) +{ + WARN_ONCE(1, "regmap API is disabled"); + return -EINVAL; +} + +static inline int regmap_write_bits(struct regmap *map, unsigned int reg, + unsigned int mask, unsigned int val) +{ + WARN_ONCE(1, "regmap API is disabled"); + return -EINVAL; +} + +static inline int regmap_field_write(struct regmap_field *field, + unsigned int val) +{ + WARN_ONCE(1, "regmap API is disabled"); + return -EINVAL; +} + +static inline int regmap_field_force_write(struct regmap_field *field, + unsigned int val) +{ + WARN_ONCE(1, "regmap API is disabled"); + return -EINVAL; +} + +static inline int regmap_field_update_bits(struct regmap_field *field, + unsigned int mask, unsigned int val) +{ + WARN_ONCE(1, "regmap API is disabled"); + return -EINVAL; +} + +static inline int +regmap_field_force_update_bits(struct regmap_field *field, + unsigned int mask, unsigned int val) +{ + WARN_ONCE(1, "regmap API is disabled"); + return -EINVAL; +} + +static inline int regmap_fields_write(struct regmap_field *field, + unsigned int id, unsigned int val) +{ + WARN_ONCE(1, "regmap API is disabled"); + return -EINVAL; +} + +static inline int regmap_fields_force_write(struct regmap_field *field, + unsigned int id, unsigned int val) +{ + WARN_ONCE(1, "regmap API is disabled"); + return -EINVAL; +} + +static inline int +regmap_fields_update_bits(struct regmap_field *field, unsigned int id, + unsigned int mask, unsigned int val) +{ + WARN_ONCE(1, "regmap API is disabled"); + return -EINVAL; +} + +static inline int +regmap_fields_force_update_bits(struct regmap_field *field, unsigned int id, + unsigned int mask, unsigned int val) +{ + WARN_ONCE(1, "regmap API is disabled"); + return -EINVAL; +} + static inline int regmap_get_val_bytes(struct regmap *map) { WARN_ONCE(1, "regmap API is disabled"); diff --git a/include/linux/regulator/consumer.h b/include/linux/regulator/consumer.h index 6a92fd3105a3..2024944fd2f7 100644 --- a/include/linux/regulator/consumer.h +++ b/include/linux/regulator/consumer.h @@ -32,10 +32,12 @@ #define __LINUX_REGULATOR_CONSUMER_H_ #include <linux/err.h> +#include <linux/suspend.h> struct device; struct notifier_block; struct regmap; +struct regulator_dev; /* * Regulator operating modes. @@ -277,6 +279,14 @@ int regulator_unregister_notifier(struct regulator *regulator, void devm_regulator_unregister_notifier(struct regulator *regulator, struct notifier_block *nb); +/* regulator suspend */ +int regulator_suspend_enable(struct regulator_dev *rdev, + suspend_state_t state); +int regulator_suspend_disable(struct regulator_dev *rdev, + suspend_state_t state); +int regulator_set_suspend_voltage(struct regulator *regulator, int min_uV, + int max_uV, suspend_state_t state); + /* driver data - core doesn't touch */ void *regulator_get_drvdata(struct regulator *regulator); void regulator_set_drvdata(struct regulator *regulator, void *data); diff --git a/include/linux/regulator/driver.h b/include/linux/regulator/driver.h index 7eb9fea8e482..8539f34ae42b 100644 --- a/include/linux/regulator/driver.h +++ b/include/linux/regulator/driver.h @@ -117,7 +117,7 @@ enum regulator_status { * suspended. * @set_suspend_mode: Set the operating mode for the regulator when the * system is suspended. - * + * @resume: Resume operation of suspended regulator. * @set_pull_down: Configure the regulator to pull down when the regulator * is disabled. * @@ -305,6 +305,9 @@ enum regulator_type { * @enable_time: Time taken for initial enable of regulator (in uS). * @off_on_delay: guard time (in uS), before re-enabling a regulator * + * @poll_enabled_time: The polling interval (in uS) to use while checking that + * the regulator was actually enabled. Max upto enable_time. + * * @of_map_mode: Maps a hardware mode defined in a DeviceTree to a standard mode */ struct regulator_desc { @@ -372,6 +375,8 @@ struct regulator_desc { unsigned int off_on_delay; + unsigned int poll_enabled_time; + unsigned int (*of_map_mode)(unsigned int mode); }; diff --git a/include/linux/regulator/machine.h b/include/linux/regulator/machine.h index a84cc8879c3e..8a56f033b6cd 100644 --- a/include/linux/regulator/machine.h +++ b/include/linux/regulator/machine.h @@ -101,6 +101,7 @@ struct regulator_state { * @system_load: Load that isn't captured by any consumer requests. * * @max_spread: Max possible spread between coupled regulators + * @max_uV_step: Max possible step change in voltage * @valid_modes_mask: Mask of modes which may be configured by consumers. * @valid_ops_mask: Operations which may be performed by consumers. * diff --git a/include/linux/regulator/pca9450.h b/include/linux/regulator/pca9450.h new file mode 100644 index 000000000000..1bbd3014f906 --- /dev/null +++ b/include/linux/regulator/pca9450.h @@ -0,0 +1,219 @@ +/* SPDX-License-Identifier: GPL-2.0-or-later */ +/* Copyright 2020 NXP. */ + +#ifndef __LINUX_REG_PCA9450_H__ +#define __LINUX_REG_PCA9450_H__ + +#include <linux/regmap.h> + +enum pca9450_chip_type { + PCA9450_TYPE_PCA9450A = 0, + PCA9450_TYPE_PCA9450BC, + PCA9450_TYPE_AMOUNT, +}; + +enum { + PCA9450_BUCK1 = 0, + PCA9450_BUCK2, + PCA9450_BUCK3, + PCA9450_BUCK4, + PCA9450_BUCK5, + PCA9450_BUCK6, + PCA9450_LDO1, + PCA9450_LDO2, + PCA9450_LDO3, + PCA9450_LDO4, + PCA9450_LDO5, + PCA9450_REGULATOR_CNT, +}; + +enum { + PCA9450_DVS_LEVEL_RUN = 0, + PCA9450_DVS_LEVEL_STANDBY, + PCA9450_DVS_LEVEL_MAX, +}; + +#define PCA9450_BUCK1_VOLTAGE_NUM 0x80 +#define PCA9450_BUCK2_VOLTAGE_NUM 0x80 +#define PCA9450_BUCK3_VOLTAGE_NUM 0x80 +#define PCA9450_BUCK4_VOLTAGE_NUM 0x80 + +#define PCA9450_BUCK5_VOLTAGE_NUM 0x80 +#define PCA9450_BUCK6_VOLTAGE_NUM 0x80 + +#define PCA9450_LDO1_VOLTAGE_NUM 0x08 +#define PCA9450_LDO2_VOLTAGE_NUM 0x08 +#define PCA9450_LDO3_VOLTAGE_NUM 0x20 +#define PCA9450_LDO4_VOLTAGE_NUM 0x20 +#define PCA9450_LDO5_VOLTAGE_NUM 0x10 + +enum { + PCA9450_REG_DEV_ID = 0x00, + PCA9450_REG_INT1 = 0x01, + PCA9450_REG_INT1_MSK = 0x02, + PCA9450_REG_STATUS1 = 0x03, + PCA9450_REG_STATUS2 = 0x04, + PCA9450_REG_PWRON_STAT = 0x05, + PCA9450_REG_SWRST = 0x06, + PCA9450_REG_PWRCTRL = 0x07, + PCA9450_REG_RESET_CTRL = 0x08, + PCA9450_REG_CONFIG1 = 0x09, + PCA9450_REG_CONFIG2 = 0x0A, + PCA9450_REG_BUCK123_DVS = 0x0C, + PCA9450_REG_BUCK1OUT_LIMIT = 0x0D, + PCA9450_REG_BUCK2OUT_LIMIT = 0x0E, + PCA9450_REG_BUCK3OUT_LIMIT = 0x0F, + PCA9450_REG_BUCK1CTRL = 0x10, + PCA9450_REG_BUCK1OUT_DVS0 = 0x11, + PCA9450_REG_BUCK1OUT_DVS1 = 0x12, + PCA9450_REG_BUCK2CTRL = 0x13, + PCA9450_REG_BUCK2OUT_DVS0 = 0x14, + PCA9450_REG_BUCK2OUT_DVS1 = 0x15, + PCA9450_REG_BUCK3CTRL = 0x16, + PCA9450_REG_BUCK3OUT_DVS0 = 0x17, + PCA9450_REG_BUCK3OUT_DVS1 = 0x18, + PCA9450_REG_BUCK4CTRL = 0x19, + PCA9450_REG_BUCK4OUT = 0x1A, + PCA9450_REG_BUCK5CTRL = 0x1B, + PCA9450_REG_BUCK5OUT = 0x1C, + PCA9450_REG_BUCK6CTRL = 0x1D, + PCA9450_REG_BUCK6OUT = 0x1E, + PCA9450_REG_LDO_AD_CTRL = 0x20, + PCA9450_REG_LDO1CTRL = 0x21, + PCA9450_REG_LDO2CTRL = 0x22, + PCA9450_REG_LDO3CTRL = 0x23, + PCA9450_REG_LDO4CTRL = 0x24, + PCA9450_REG_LDO5CTRL_L = 0x25, + PCA9450_REG_LDO5CTRL_H = 0x26, + PCA9450_REG_LOADSW_CTRL = 0x2A, + PCA9450_REG_VRFLT1_STS = 0x2B, + PCA9450_REG_VRFLT2_STS = 0x2C, + PCA9450_REG_VRFLT1_MASK = 0x2D, + PCA9450_REG_VRFLT2_MASK = 0x2E, + PCA9450_MAX_REGISTER = 0x2F, +}; + +/* PCA9450 BUCK ENMODE bits */ +#define BUCK_ENMODE_OFF 0x00 +#define BUCK_ENMODE_ONREQ 0x01 +#define BUCK_ENMODE_ONREQ_STBYREQ 0x02 +#define BUCK_ENMODE_ON 0x03 + +/* PCA9450_REG_BUCK1_CTRL bits */ +#define BUCK1_RAMP_MASK 0xC0 +#define BUCK1_RAMP_25MV 0x0 +#define BUCK1_RAMP_12P5MV 0x1 +#define BUCK1_RAMP_6P25MV 0x2 +#define BUCK1_RAMP_3P125MV 0x3 +#define BUCK1_DVS_CTRL 0x10 +#define BUCK1_AD 0x08 +#define BUCK1_FPWM 0x04 +#define BUCK1_ENMODE_MASK 0x03 + +/* PCA9450_REG_BUCK2_CTRL bits */ +#define BUCK2_RAMP_MASK 0xC0 +#define BUCK2_RAMP_25MV 0x0 +#define BUCK2_RAMP_12P5MV 0x1 +#define BUCK2_RAMP_6P25MV 0x2 +#define BUCK2_RAMP_3P125MV 0x3 +#define BUCK2_DVS_CTRL 0x10 +#define BUCK2_AD 0x08 +#define BUCK2_FPWM 0x04 +#define BUCK2_ENMODE_MASK 0x03 + +/* PCA9450_REG_BUCK3_CTRL bits */ +#define BUCK3_RAMP_MASK 0xC0 +#define BUCK3_RAMP_25MV 0x0 +#define BUCK3_RAMP_12P5MV 0x1 +#define BUCK3_RAMP_6P25MV 0x2 +#define BUCK3_RAMP_3P125MV 0x3 +#define BUCK3_DVS_CTRL 0x10 +#define BUCK3_AD 0x08 +#define BUCK3_FPWM 0x04 +#define BUCK3_ENMODE_MASK 0x03 + +/* PCA9450_REG_BUCK4_CTRL bits */ +#define BUCK4_AD 0x08 +#define BUCK4_FPWM 0x04 +#define BUCK4_ENMODE_MASK 0x03 + +/* PCA9450_REG_BUCK5_CTRL bits */ +#define BUCK5_AD 0x08 +#define BUCK5_FPWM 0x04 +#define BUCK5_ENMODE_MASK 0x03 + +/* PCA9450_REG_BUCK6_CTRL bits */ +#define BUCK6_AD 0x08 +#define BUCK6_FPWM 0x04 +#define BUCK6_ENMODE_MASK 0x03 + +/* PCA9450_BUCK1OUT_DVS0 bits */ +#define BUCK1OUT_DVS0_MASK 0x7F +#define BUCK1OUT_DVS0_DEFAULT 0x14 + +/* PCA9450_BUCK1OUT_DVS1 bits */ +#define BUCK1OUT_DVS1_MASK 0x7F +#define BUCK1OUT_DVS1_DEFAULT 0x14 + +/* PCA9450_BUCK2OUT_DVS0 bits */ +#define BUCK2OUT_DVS0_MASK 0x7F +#define BUCK2OUT_DVS0_DEFAULT 0x14 + +/* PCA9450_BUCK2OUT_DVS1 bits */ +#define BUCK2OUT_DVS1_MASK 0x7F +#define BUCK2OUT_DVS1_DEFAULT 0x14 + +/* PCA9450_BUCK3OUT_DVS0 bits */ +#define BUCK3OUT_DVS0_MASK 0x7F +#define BUCK3OUT_DVS0_DEFAULT 0x14 + +/* PCA9450_BUCK3OUT_DVS1 bits */ +#define BUCK3OUT_DVS1_MASK 0x7F +#define BUCK3OUT_DVS1_DEFAULT 0x14 + +/* PCA9450_REG_BUCK4OUT bits */ +#define BUCK4OUT_MASK 0x7F +#define BUCK4OUT_DEFAULT 0x6C + +/* PCA9450_REG_BUCK5OUT bits */ +#define BUCK5OUT_MASK 0x7F +#define BUCK5OUT_DEFAULT 0x30 + +/* PCA9450_REG_BUCK6OUT bits */ +#define BUCK6OUT_MASK 0x7F +#define BUCK6OUT_DEFAULT 0x14 + +/* PCA9450_REG_LDO1_VOLT bits */ +#define LDO1_EN_MASK 0xC0 +#define LDO1OUT_MASK 0x07 + +/* PCA9450_REG_LDO2_VOLT bits */ +#define LDO2_EN_MASK 0xC0 +#define LDO2OUT_MASK 0x07 + +/* PCA9450_REG_LDO3_VOLT bits */ +#define LDO3_EN_MASK 0xC0 +#define LDO3OUT_MASK 0x0F + +/* PCA9450_REG_LDO4_VOLT bits */ +#define LDO4_EN_MASK 0xC0 +#define LDO4OUT_MASK 0x0F + +/* PCA9450_REG_LDO5_VOLT bits */ +#define LDO5L_EN_MASK 0xC0 +#define LDO5LOUT_MASK 0x0F + +#define LDO5H_EN_MASK 0xC0 +#define LDO5HOUT_MASK 0x0F + +/* PCA9450_REG_IRQ bits */ +#define IRQ_PWRON 0x80 +#define IRQ_WDOGB 0x40 +#define IRQ_RSVD 0x20 +#define IRQ_VR_FLT1 0x10 +#define IRQ_VR_FLT2 0x08 +#define IRQ_LOWVSYS 0x04 +#define IRQ_THERM_105 0x02 +#define IRQ_THERM_125 0x01 + +#endif /* __LINUX_REG_PCA9450_H__ */ diff --git a/include/linux/reset/reset-simple.h b/include/linux/reset/reset-simple.h new file mode 100644 index 000000000000..c3e44f45b0f7 --- /dev/null +++ b/include/linux/reset/reset-simple.h @@ -0,0 +1,48 @@ +/* SPDX-License-Identifier: GPL-2.0-or-later */ +/* + * Simple Reset Controller ops + * + * Based on Allwinner SoCs Reset Controller driver + * + * Copyright 2013 Maxime Ripard + * + * Maxime Ripard <maxime.ripard@free-electrons.com> + */ + +#ifndef __RESET_SIMPLE_H__ +#define __RESET_SIMPLE_H__ + +#include <linux/io.h> +#include <linux/reset-controller.h> +#include <linux/spinlock.h> + +/** + * struct reset_simple_data - driver data for simple reset controllers + * @lock: spinlock to protect registers during read-modify-write cycles + * @membase: memory mapped I/O register range + * @rcdev: reset controller device base structure + * @active_low: if true, bits are cleared to assert the reset. Otherwise, bits + * are set to assert the reset. Note that this says nothing about + * the voltage level of the actual reset line. + * @status_active_low: if true, bits read back as cleared while the reset is + * asserted. Otherwise, bits read back as set while the + * reset is asserted. + * @reset_us: Minimum delay in microseconds needed that needs to be + * waited for between an assert and a deassert to reset the + * device. If multiple consumers with different delay + * requirements are connected to this controller, it must + * be the largest minimum delay. 0 means that such a delay is + * unknown and the reset operation is unsupported. + */ +struct reset_simple_data { + spinlock_t lock; + void __iomem *membase; + struct reset_controller_dev rcdev; + bool active_low; + bool status_active_low; + unsigned int reset_us; +}; + +extern const struct reset_control_ops reset_simple_ops; + +#endif /* __RESET_SIMPLE_H__ */ diff --git a/include/linux/rhashtable.h b/include/linux/rhashtable.h index 70ebef866cc8..68dab3e08aad 100644 --- a/include/linux/rhashtable.h +++ b/include/linux/rhashtable.h @@ -33,7 +33,7 @@ * of two or more hash tables when the rhashtable is being resized. * The end of the chain is marked with a special nulls marks which has * the least significant bit set but otherwise stores the address of - * the hash bucket. This allows us to be be sure we've found the end + * the hash bucket. This allows us to be sure we've found the end * of the right list. * The value stored in the hash bucket has BIT(0) used as a lock bit. * This bit must be atomically set before any changes are made to @@ -84,7 +84,7 @@ struct bucket_table { struct lockdep_map dep_map; - struct rhash_lock_head *buckets[] ____cacheline_aligned_in_smp; + struct rhash_lock_head __rcu *buckets[] ____cacheline_aligned_in_smp; }; /* @@ -261,13 +261,12 @@ void rhashtable_free_and_destroy(struct rhashtable *ht, void *arg); void rhashtable_destroy(struct rhashtable *ht); -struct rhash_lock_head **rht_bucket_nested(const struct bucket_table *tbl, - unsigned int hash); -struct rhash_lock_head **__rht_bucket_nested(const struct bucket_table *tbl, - unsigned int hash); -struct rhash_lock_head **rht_bucket_nested_insert(struct rhashtable *ht, - struct bucket_table *tbl, - unsigned int hash); +struct rhash_lock_head __rcu **rht_bucket_nested( + const struct bucket_table *tbl, unsigned int hash); +struct rhash_lock_head __rcu **__rht_bucket_nested( + const struct bucket_table *tbl, unsigned int hash); +struct rhash_lock_head __rcu **rht_bucket_nested_insert( + struct rhashtable *ht, struct bucket_table *tbl, unsigned int hash); #define rht_dereference(p, ht) \ rcu_dereference_protected(p, lockdep_rht_mutex_is_held(ht)) @@ -284,21 +283,21 @@ struct rhash_lock_head **rht_bucket_nested_insert(struct rhashtable *ht, #define rht_entry(tpos, pos, member) \ ({ tpos = container_of(pos, typeof(*tpos), member); 1; }) -static inline struct rhash_lock_head *const *rht_bucket( +static inline struct rhash_lock_head __rcu *const *rht_bucket( const struct bucket_table *tbl, unsigned int hash) { return unlikely(tbl->nest) ? rht_bucket_nested(tbl, hash) : &tbl->buckets[hash]; } -static inline struct rhash_lock_head **rht_bucket_var( +static inline struct rhash_lock_head __rcu **rht_bucket_var( struct bucket_table *tbl, unsigned int hash) { return unlikely(tbl->nest) ? __rht_bucket_nested(tbl, hash) : &tbl->buckets[hash]; } -static inline struct rhash_lock_head **rht_bucket_insert( +static inline struct rhash_lock_head __rcu **rht_bucket_insert( struct rhashtable *ht, struct bucket_table *tbl, unsigned int hash) { return unlikely(tbl->nest) ? rht_bucket_nested_insert(ht, tbl, hash) : @@ -325,7 +324,7 @@ static inline struct rhash_lock_head **rht_bucket_insert( */ static inline void rht_lock(struct bucket_table *tbl, - struct rhash_lock_head **bkt) + struct rhash_lock_head __rcu **bkt) { local_bh_disable(); bit_spin_lock(0, (unsigned long *)bkt); @@ -333,7 +332,7 @@ static inline void rht_lock(struct bucket_table *tbl, } static inline void rht_lock_nested(struct bucket_table *tbl, - struct rhash_lock_head **bucket, + struct rhash_lock_head __rcu **bucket, unsigned int subclass) { local_bh_disable(); @@ -342,18 +341,18 @@ static inline void rht_lock_nested(struct bucket_table *tbl, } static inline void rht_unlock(struct bucket_table *tbl, - struct rhash_lock_head **bkt) + struct rhash_lock_head __rcu **bkt) { lock_map_release(&tbl->dep_map); bit_spin_unlock(0, (unsigned long *)bkt); local_bh_enable(); } -static inline struct rhash_head __rcu *__rht_ptr( - struct rhash_lock_head *const *bkt) +static inline struct rhash_head *__rht_ptr( + struct rhash_lock_head *p, struct rhash_lock_head __rcu *const *bkt) { - return (struct rhash_head __rcu *) - ((unsigned long)*bkt & ~BIT(0) ?: + return (struct rhash_head *) + ((unsigned long)p & ~BIT(0) ?: (unsigned long)RHT_NULLS_MARKER(bkt)); } @@ -365,47 +364,41 @@ static inline struct rhash_head __rcu *__rht_ptr( * access is guaranteed, such as when destroying the table. */ static inline struct rhash_head *rht_ptr_rcu( - struct rhash_lock_head *const *bkt) + struct rhash_lock_head __rcu *const *bkt) { - struct rhash_head __rcu *p = __rht_ptr(bkt); - - return rcu_dereference(p); + return __rht_ptr(rcu_dereference(*bkt), bkt); } static inline struct rhash_head *rht_ptr( - struct rhash_lock_head *const *bkt, + struct rhash_lock_head __rcu *const *bkt, struct bucket_table *tbl, unsigned int hash) { - return rht_dereference_bucket(__rht_ptr(bkt), tbl, hash); + return __rht_ptr(rht_dereference_bucket(*bkt, tbl, hash), bkt); } static inline struct rhash_head *rht_ptr_exclusive( - struct rhash_lock_head *const *bkt) + struct rhash_lock_head __rcu *const *bkt) { - return rcu_dereference_protected(__rht_ptr(bkt), 1); + return __rht_ptr(rcu_dereference_protected(*bkt, 1), bkt); } -static inline void rht_assign_locked(struct rhash_lock_head **bkt, +static inline void rht_assign_locked(struct rhash_lock_head __rcu **bkt, struct rhash_head *obj) { - struct rhash_head __rcu **p = (struct rhash_head __rcu **)bkt; - if (rht_is_a_nulls(obj)) obj = NULL; - rcu_assign_pointer(*p, (void *)((unsigned long)obj | BIT(0))); + rcu_assign_pointer(*bkt, (void *)((unsigned long)obj | BIT(0))); } static inline void rht_assign_unlock(struct bucket_table *tbl, - struct rhash_lock_head **bkt, + struct rhash_lock_head __rcu **bkt, struct rhash_head *obj) { - struct rhash_head __rcu **p = (struct rhash_head __rcu **)bkt; - if (rht_is_a_nulls(obj)) obj = NULL; lock_map_release(&tbl->dep_map); - rcu_assign_pointer(*p, obj); + rcu_assign_pointer(*bkt, (void *)obj); preempt_enable(); __release(bitlock); local_bh_enable(); @@ -593,7 +586,7 @@ static inline struct rhash_head *__rhashtable_lookup( .ht = ht, .key = key, }; - struct rhash_lock_head *const *bkt; + struct rhash_lock_head __rcu *const *bkt; struct bucket_table *tbl; struct rhash_head *he; unsigned int hash; @@ -709,7 +702,7 @@ static inline void *__rhashtable_insert_fast( .ht = ht, .key = key, }; - struct rhash_lock_head **bkt; + struct rhash_lock_head __rcu **bkt; struct rhash_head __rcu **pprev; struct bucket_table *tbl; struct rhash_head *head; @@ -995,7 +988,7 @@ static inline int __rhashtable_remove_fast_one( struct rhash_head *obj, const struct rhashtable_params params, bool rhlist) { - struct rhash_lock_head **bkt; + struct rhash_lock_head __rcu **bkt; struct rhash_head __rcu **pprev; struct rhash_head *he; unsigned int hash; @@ -1147,7 +1140,7 @@ static inline int __rhashtable_replace_fast( struct rhash_head *obj_old, struct rhash_head *obj_new, const struct rhashtable_params params) { - struct rhash_lock_head **bkt; + struct rhash_lock_head __rcu **bkt; struct rhash_head __rcu **pprev; struct rhash_head *he; unsigned int hash; diff --git a/include/linux/rwsem.h b/include/linux/rwsem.h index 7e5b2a4eb560..25e3fde85617 100644 --- a/include/linux/rwsem.h +++ b/include/linux/rwsem.h @@ -60,39 +60,39 @@ static inline int rwsem_is_locked(struct rw_semaphore *sem) } #define RWSEM_UNLOCKED_VALUE 0L -#define __RWSEM_INIT_COUNT(name) .count = ATOMIC_LONG_INIT(RWSEM_UNLOCKED_VALUE) +#define __RWSEM_COUNT_INIT(name) .count = ATOMIC_LONG_INIT(RWSEM_UNLOCKED_VALUE) /* Common initializer macros and functions */ #ifdef CONFIG_DEBUG_LOCK_ALLOC # define __RWSEM_DEP_MAP_INIT(lockname) \ - , .dep_map = { \ + .dep_map = { \ .name = #lockname, \ .wait_type_inner = LD_WAIT_SLEEP, \ - } + }, #else # define __RWSEM_DEP_MAP_INIT(lockname) #endif #ifdef CONFIG_DEBUG_RWSEMS -# define __DEBUG_RWSEM_INITIALIZER(lockname) , .magic = &lockname +# define __RWSEM_DEBUG_INIT(lockname) .magic = &lockname, #else -# define __DEBUG_RWSEM_INITIALIZER(lockname) +# define __RWSEM_DEBUG_INIT(lockname) #endif #ifdef CONFIG_RWSEM_SPIN_ON_OWNER -#define __RWSEM_OPT_INIT(lockname) , .osq = OSQ_LOCK_UNLOCKED +#define __RWSEM_OPT_INIT(lockname) .osq = OSQ_LOCK_UNLOCKED, #else #define __RWSEM_OPT_INIT(lockname) #endif #define __RWSEM_INITIALIZER(name) \ - { __RWSEM_INIT_COUNT(name), \ + { __RWSEM_COUNT_INIT(name), \ .owner = ATOMIC_LONG_INIT(0), \ - .wait_list = LIST_HEAD_INIT((name).wait_list), \ - .wait_lock = __RAW_SPIN_LOCK_UNLOCKED(name.wait_lock) \ __RWSEM_OPT_INIT(name) \ - __DEBUG_RWSEM_INITIALIZER(name) \ + .wait_lock = __RAW_SPIN_LOCK_UNLOCKED(name.wait_lock),\ + .wait_list = LIST_HEAD_INIT((name).wait_list), \ + __RWSEM_DEBUG_INIT(name) \ __RWSEM_DEP_MAP_INIT(name) } #define DECLARE_RWSEM(name) \ diff --git a/include/linux/scatterlist.h b/include/linux/scatterlist.h index 4f922afb607a..45cf7b69d852 100644 --- a/include/linux/scatterlist.h +++ b/include/linux/scatterlist.h @@ -155,7 +155,7 @@ static inline void sg_set_buf(struct scatterlist *sg, const void *buf, * Loop over each sg element in the given sg_table object. */ #define for_each_sgtable_sg(sgt, sg, i) \ - for_each_sg(sgt->sgl, sg, sgt->orig_nents, i) + for_each_sg((sgt)->sgl, sg, (sgt)->orig_nents, i) /* * Loop over each sg element in the given *DMA mapped* sg_table object. @@ -163,7 +163,7 @@ static inline void sg_set_buf(struct scatterlist *sg, const void *buf, * of the each element. */ #define for_each_sgtable_dma_sg(sgt, sg, i) \ - for_each_sg(sgt->sgl, sg, sgt->nents, i) + for_each_sg((sgt)->sgl, sg, (sgt)->nents, i) /** * sg_chain - Chain two sglists together @@ -451,7 +451,7 @@ sg_page_iter_dma_address(struct sg_dma_page_iter *dma_iter) * See also for_each_sg_page(). In each loop it operates on PAGE_SIZE unit. */ #define for_each_sgtable_page(sgt, piter, pgoffset) \ - for_each_sg_page(sgt->sgl, piter, sgt->orig_nents, pgoffset) + for_each_sg_page((sgt)->sgl, piter, (sgt)->orig_nents, pgoffset) /** * for_each_sgtable_dma_page - iterate over the DMA mapped sg_table object @@ -465,7 +465,7 @@ sg_page_iter_dma_address(struct sg_dma_page_iter *dma_iter) * unit. */ #define for_each_sgtable_dma_page(sgt, dma_iter, pgoffset) \ - for_each_sg_dma_page(sgt->sgl, dma_iter, sgt->nents, pgoffset) + for_each_sg_dma_page((sgt)->sgl, dma_iter, (sgt)->nents, pgoffset) /* diff --git a/include/linux/sched.h b/include/linux/sched.h index 4ea612e9ad27..06ec60462af0 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -18,6 +18,7 @@ #include <linux/mutex.h> #include <linux/plist.h> #include <linux/hrtimer.h> +#include <linux/irqflags.h> #include <linux/seccomp.h> #include <linux/nodemask.h> #include <linux/rcupdate.h> @@ -114,10 +115,6 @@ struct task_group; #define task_is_stopped_or_traced(task) ((task->state & (__TASK_STOPPED | __TASK_TRACED)) != 0) -#define task_contributes_to_load(task) ((task->state & TASK_UNINTERRUPTIBLE) != 0 && \ - (task->flags & PF_FROZEN) == 0 && \ - (task->state & TASK_NOLOAD) == 0) - #ifdef CONFIG_DEBUG_ATOMIC_SLEEP /* @@ -158,24 +155,24 @@ struct task_group; * * for (;;) { * set_current_state(TASK_UNINTERRUPTIBLE); - * if (!need_sleep) - * break; + * if (CONDITION) + * break; * * schedule(); * } * __set_current_state(TASK_RUNNING); * * If the caller does not need such serialisation (because, for instance, the - * condition test and condition change and wakeup are under the same lock) then + * CONDITION test and condition change and wakeup are under the same lock) then * use __set_current_state(). * * The above is typically ordered against the wakeup, which does: * - * need_sleep = false; + * CONDITION = 1; * wake_up_state(p, TASK_UNINTERRUPTIBLE); * - * where wake_up_state() executes a full memory barrier before accessing the - * task state. + * where wake_up_state()/try_to_wake_up() executes a full memory barrier before + * accessing p->state. * * Wakeup will do: if (@state & p->state) p->state = TASK_RUNNING, that is, * once it observes the TASK_UNINTERRUPTIBLE store the waking CPU can issue a @@ -378,7 +375,7 @@ struct util_est { * For cfs_rq, they are the aggregated values of all runnable and blocked * sched_entities. * - * The load/runnable/util_avg doesn't direcly factor frequency scaling and CPU + * The load/runnable/util_avg doesn't directly factor frequency scaling and CPU * capacity scaling. The scaling is done through the rq_clock_pelt that is used * for computing those signals (see update_rq_clock_pelt()) * @@ -654,9 +651,8 @@ struct task_struct { unsigned int ptrace; #ifdef CONFIG_SMP - struct llist_node wake_entry; - unsigned int wake_entry_type; int on_cpu; + struct __call_single_node wake_entry; #ifdef CONFIG_THREAD_INFO_IN_TASK /* Current CPU: */ unsigned int cpu; @@ -691,9 +687,15 @@ struct task_struct { struct sched_dl_entity dl; #ifdef CONFIG_UCLAMP_TASK - /* Clamp values requested for a scheduling entity */ + /* + * Clamp values requested for a scheduling entity. + * Must be updated with task_rq_lock() held. + */ struct uclamp_se uclamp_req[UCLAMP_CNT]; - /* Effective clamp values used for a scheduling entity */ + /* + * Effective clamp values used for a scheduling entity. + * Must be updated with task_rq_lock() held. + */ struct uclamp_se uclamp[UCLAMP_CNT]; #endif @@ -985,19 +987,9 @@ struct task_struct { #endif #ifdef CONFIG_TRACE_IRQFLAGS - unsigned int irq_events; + struct irqtrace_events irqtrace; unsigned int hardirq_threaded; - unsigned long hardirq_enable_ip; - unsigned long hardirq_disable_ip; - unsigned int hardirq_enable_event; - unsigned int hardirq_disable_event; - int hardirqs_enabled; - int hardirq_context; u64 hardirq_chain_key; - unsigned long softirq_disable_ip; - unsigned long softirq_enable_ip; - unsigned int softirq_disable_event; - unsigned int softirq_enable_event; int softirqs_enabled; int softirq_context; int irq_config; @@ -1198,8 +1190,12 @@ struct task_struct { #ifdef CONFIG_KASAN unsigned int kasan_depth; #endif + #ifdef CONFIG_KCSAN struct kcsan_ctx kcsan_ctx; +#ifdef CONFIG_TRACE_IRQFLAGS + struct irqtrace_events kcsan_save_irqtrace; +#endif #endif #ifdef CONFIG_FUNCTION_GRAPH_TRACER @@ -1308,7 +1304,9 @@ struct task_struct { #ifdef CONFIG_X86_MCE u64 mce_addr; - u64 mce_status; + __u64 mce_ripv : 1, + mce_whole_page : 1, + __mce_reserved : 62; struct callback_head mce_kill_me; #endif @@ -1509,7 +1507,6 @@ extern struct pid *cad_pid; #define PF_KTHREAD 0x00200000 /* I am a kernel thread */ #define PF_RANDOMIZE 0x00400000 /* Randomize virtual address space */ #define PF_SWAPWRITE 0x00800000 /* Allowed to write to swap */ -#define PF_UMH 0x02000000 /* I'm an Usermodehelper process */ #define PF_NO_SETAFFINITY 0x04000000 /* Userland is not allowed to meddle with cpus_mask */ #define PF_MCE_EARLY 0x08000000 /* Early kill for mce process policy */ #define PF_MEMALLOC_NOCMA 0x10000000 /* All allocation request will have _GFP_MOVABLE cleared */ @@ -2018,14 +2015,6 @@ static inline void rseq_execve(struct task_struct *t) #endif -void __exit_umh(struct task_struct *tsk); - -static inline void exit_umh(struct task_struct *tsk) -{ - if (unlikely(tsk->flags & PF_UMH)) - __exit_umh(tsk); -} - #ifdef CONFIG_DEBUG_RSEQ void rseq_syscall(struct pt_regs *regs); @@ -2047,6 +2036,7 @@ const struct sched_avg *sched_trace_rq_avg_dl(struct rq *rq); const struct sched_avg *sched_trace_rq_avg_irq(struct rq *rq); int sched_trace_rq_cpu(struct rq *rq); +int sched_trace_rq_nr_running(struct rq *rq); const struct cpumask *sched_trace_rd_span(struct root_domain *rd); diff --git a/include/linux/sched/isolation.h b/include/linux/sched/isolation.h index 0fbcbacd1b29..cc9f393e2a70 100644 --- a/include/linux/sched/isolation.h +++ b/include/linux/sched/isolation.h @@ -14,6 +14,7 @@ enum hk_flags { HK_FLAG_DOMAIN = (1 << 5), HK_FLAG_WQ = (1 << 6), HK_FLAG_MANAGED_IRQ = (1 << 7), + HK_FLAG_KTHREAD = (1 << 8), }; #ifdef CONFIG_CPU_ISOLATION diff --git a/include/linux/sched/jobctl.h b/include/linux/sched/jobctl.h index fa067de9f1a9..d2b4204ba4d3 100644 --- a/include/linux/sched/jobctl.h +++ b/include/linux/sched/jobctl.h @@ -19,6 +19,7 @@ struct task_struct; #define JOBCTL_TRAPPING_BIT 21 /* switching to TRACED */ #define JOBCTL_LISTENING_BIT 22 /* ptracer is listening for events */ #define JOBCTL_TRAP_FREEZE_BIT 23 /* trap for cgroup freezer */ +#define JOBCTL_TASK_WORK_BIT 24 /* set by TWA_SIGNAL */ #define JOBCTL_STOP_DEQUEUED (1UL << JOBCTL_STOP_DEQUEUED_BIT) #define JOBCTL_STOP_PENDING (1UL << JOBCTL_STOP_PENDING_BIT) @@ -28,9 +29,10 @@ struct task_struct; #define JOBCTL_TRAPPING (1UL << JOBCTL_TRAPPING_BIT) #define JOBCTL_LISTENING (1UL << JOBCTL_LISTENING_BIT) #define JOBCTL_TRAP_FREEZE (1UL << JOBCTL_TRAP_FREEZE_BIT) +#define JOBCTL_TASK_WORK (1UL << JOBCTL_TASK_WORK_BIT) #define JOBCTL_TRAP_MASK (JOBCTL_TRAP_STOP | JOBCTL_TRAP_NOTIFY) -#define JOBCTL_PENDING_MASK (JOBCTL_STOP_PENDING | JOBCTL_TRAP_MASK) +#define JOBCTL_PENDING_MASK (JOBCTL_STOP_PENDING | JOBCTL_TRAP_MASK | JOBCTL_TASK_WORK) extern bool task_set_jobctl_pending(struct task_struct *task, unsigned long mask); extern void task_clear_jobctl_trapping(struct task_struct *task); diff --git a/include/linux/sched/loadavg.h b/include/linux/sched/loadavg.h index 4859bea47a7b..83ec54b65e79 100644 --- a/include/linux/sched/loadavg.h +++ b/include/linux/sched/loadavg.h @@ -43,6 +43,6 @@ extern unsigned long calc_load_n(unsigned long load, unsigned long exp, #define LOAD_INT(x) ((x) >> FSHIFT) #define LOAD_FRAC(x) LOAD_INT(((x) & (FIXED_1-1)) * 100) -extern void calc_global_load(unsigned long ticks); +extern void calc_global_load(void); #endif /* _LINUX_SCHED_LOADAVG_H */ diff --git a/include/linux/sched/mm.h b/include/linux/sched/mm.h index 480a4d1b7dd8..6be66f52a2ad 100644 --- a/include/linux/sched/mm.h +++ b/include/linux/sched/mm.h @@ -23,7 +23,7 @@ extern struct mm_struct *mm_alloc(void); * will still exist later on and mmget_not_zero() has to be used before * accessing it. * - * This is a preferred way to to pin @mm for a longer/unbounded amount + * This is a preferred way to pin @mm for a longer/unbounded amount * of time. * * Use mmdrop() to release the reference acquired by mmgrab(). @@ -49,8 +49,6 @@ static inline void mmdrop(struct mm_struct *mm) __mmdrop(mm); } -void mmdrop(struct mm_struct *mm); - /* * This has to be called after a get_task_mm()/mmget_not_zero() * followed by taking the mmap_lock for writing before modifying the @@ -234,7 +232,7 @@ static inline unsigned int memalloc_noio_save(void) * @flags: Flags to restore. * * Ends the implicit GFP_NOIO scope started by memalloc_noio_save function. - * Always make sure that that the given flags is the return value from the + * Always make sure that the given flags is the return value from the * pairing memalloc_noio_save call. */ static inline void memalloc_noio_restore(unsigned int flags) @@ -265,7 +263,7 @@ static inline unsigned int memalloc_nofs_save(void) * @flags: Flags to restore. * * Ends the implicit GFP_NOFS scope started by memalloc_nofs_save function. - * Always make sure that that the given flags is the return value from the + * Always make sure that the given flags is the return value from the * pairing memalloc_nofs_save call. */ static inline void memalloc_nofs_restore(unsigned int flags) diff --git a/include/linux/sched/signal.h b/include/linux/sched/signal.h index 0ee5e696c5d8..1bad18a1d8ba 100644 --- a/include/linux/sched/signal.h +++ b/include/linux/sched/signal.h @@ -674,6 +674,8 @@ static inline int thread_group_empty(struct task_struct *p) #define delay_group_leader(p) \ (thread_group_leader(p) && !thread_group_empty(p)) +extern bool thread_group_exited(struct pid *pid); + extern struct sighand_struct *__lock_task_sighand(struct task_struct *task, unsigned long *flags); diff --git a/include/linux/sched/sysctl.h b/include/linux/sched/sysctl.h index 660ac49f2b53..3c31ba88aca5 100644 --- a/include/linux/sched/sysctl.h +++ b/include/linux/sched/sysctl.h @@ -61,9 +61,13 @@ int sched_proc_update_handler(struct ctl_table *table, int write, extern unsigned int sysctl_sched_rt_period; extern int sysctl_sched_rt_runtime; +extern unsigned int sysctl_sched_dl_period_max; +extern unsigned int sysctl_sched_dl_period_min; + #ifdef CONFIG_UCLAMP_TASK extern unsigned int sysctl_sched_uclamp_util_min; extern unsigned int sysctl_sched_uclamp_util_max; +extern unsigned int sysctl_sched_uclamp_util_min_rt_default; #endif #ifdef CONFIG_CFS_BANDWIDTH diff --git a/include/linux/sched/task.h b/include/linux/sched/task.h index 38359071236a..ae3060f0b0c9 100644 --- a/include/linux/sched/task.h +++ b/include/linux/sched/task.h @@ -55,6 +55,7 @@ extern asmlinkage void schedule_tail(struct task_struct *prev); extern void init_idle(struct task_struct *idle, int cpu); extern int sched_fork(unsigned long clone_flags, struct task_struct *p); +extern void sched_post_fork(struct task_struct *p); extern void sched_dead(struct task_struct *p); void __noreturn do_task_dead(void); @@ -65,22 +66,9 @@ extern void fork_init(void); extern void release_task(struct task_struct * p); -#ifdef CONFIG_HAVE_COPY_THREAD_TLS -extern int copy_thread_tls(unsigned long, unsigned long, unsigned long, - struct task_struct *, unsigned long); -#else extern int copy_thread(unsigned long, unsigned long, unsigned long, - struct task_struct *); + struct task_struct *, unsigned long); -/* Architectures that haven't opted into copy_thread_tls get the tls argument - * via pt_regs, so ignore the tls argument passed via C. */ -static inline int copy_thread_tls( - unsigned long clone_flags, unsigned long sp, unsigned long arg, - struct task_struct *p, unsigned long tls) -{ - return copy_thread(clone_flags, sp, arg, p); -} -#endif extern void flush_thread(void); #ifdef CONFIG_HAVE_EXIT_THREAD @@ -96,8 +84,6 @@ extern void exit_files(struct task_struct *); extern void exit_itimers(struct signal_struct *); extern long _do_fork(struct kernel_clone_args *kargs); -extern bool legacy_clone_args_valid(const struct kernel_clone_args *kargs); -extern long do_fork(unsigned long, unsigned long, unsigned long, int __user *, int __user *); struct task_struct *fork_idle(int); struct mm_struct *copy_init_mm(void); extern pid_t kernel_thread(int (*fn)(void *), void *arg, unsigned long flags); @@ -126,6 +112,12 @@ static inline void put_task_struct(struct task_struct *t) __put_task_struct(t); } +static inline void put_task_struct_many(struct task_struct *t, int nr) +{ + if (refcount_sub_and_test(nr, &t->usage)) + __put_task_struct(t); +} + void put_task_struct_rcu_user(struct task_struct *task); #ifdef CONFIG_ARCH_WANTS_DYNAMIC_TASK_STRUCT diff --git a/include/linux/sched/topology.h b/include/linux/sched/topology.h index fb11091129b3..820511289857 100644 --- a/include/linux/sched/topology.h +++ b/include/linux/sched/topology.h @@ -217,6 +217,16 @@ static inline bool cpus_share_cache(int this_cpu, int that_cpu) #endif /* !CONFIG_SMP */ #ifndef arch_scale_cpu_capacity +/** + * arch_scale_cpu_capacity - get the capacity scale factor of a given CPU. + * @cpu: the CPU in question. + * + * Return: the CPU scale factor normalized against SCHED_CAPACITY_SCALE, i.e. + * + * max_perf(cpu) + * ----------------------------- * SCHED_CAPACITY_SCALE + * max(max_perf(c) : c \in CPUs) + */ static __always_inline unsigned long arch_scale_cpu_capacity(int cpu) { @@ -232,6 +242,13 @@ unsigned long arch_scale_thermal_pressure(int cpu) } #endif +#ifndef arch_set_thermal_pressure +static __always_inline +void arch_set_thermal_pressure(const struct cpumask *cpus, + unsigned long th_pressure) +{ } +#endif + static inline int task_node(const struct task_struct *p) { return cpu_to_node(task_cpu(p)); diff --git a/include/linux/sched_clock.h b/include/linux/sched_clock.h index 0bb04a96a6d4..528718e4ed52 100644 --- a/include/linux/sched_clock.h +++ b/include/linux/sched_clock.h @@ -6,6 +6,34 @@ #define LINUX_SCHED_CLOCK #ifdef CONFIG_GENERIC_SCHED_CLOCK +/** + * struct clock_read_data - data required to read from sched_clock() + * + * @epoch_ns: sched_clock() value at last update + * @epoch_cyc: Clock cycle value at last update. + * @sched_clock_mask: Bitmask for two's complement subtraction of non 64bit + * clocks. + * @read_sched_clock: Current clock source (or dummy source when suspended). + * @mult: Multipler for scaled math conversion. + * @shift: Shift value for scaled math conversion. + * + * Care must be taken when updating this structure; it is read by + * some very hot code paths. It occupies <=40 bytes and, when combined + * with the seqcount used to synchronize access, comfortably fits into + * a 64 byte cache line. + */ +struct clock_read_data { + u64 epoch_ns; + u64 epoch_cyc; + u64 sched_clock_mask; + u64 (*read_sched_clock)(void); + u32 mult; + u32 shift; +}; + +extern struct clock_read_data *sched_clock_read_begin(unsigned int *seq); +extern int sched_clock_read_retry(unsigned int seq); + extern void generic_sched_clock_init(void); extern void sched_clock_register(u64 (*read)(void), int bits, diff --git a/include/linux/scmi_protocol.h b/include/linux/scmi_protocol.h index ce2f5c28b2df..7e5dd7d1e221 100644 --- a/include/linux/scmi_protocol.h +++ b/include/linux/scmi_protocol.h @@ -9,6 +9,7 @@ #define _LINUX_SCMI_PROTOCOL_H #include <linux/device.h> +#include <linux/notifier.h> #include <linux/types.h> #define SCMI_MAX_STR_SIZE 16 @@ -118,6 +119,8 @@ struct scmi_perf_ops { unsigned long *rate, bool poll); int (*est_power_get)(const struct scmi_handle *handle, u32 domain, unsigned long *rate, unsigned long *power); + bool (*fast_switch_possible)(const struct scmi_handle *handle, + struct device *dev); }; /** @@ -173,18 +176,13 @@ enum scmi_sensor_class { * * @count_get: get the count of sensors provided by SCMI * @info_get: get the information of the specified sensor - * @trip_point_notify: control notifications on cross-over events for - * the trip-points * @trip_point_config: selects and configures a trip-point of interest * @reading_get: gets the current value of the sensor */ struct scmi_sensor_ops { int (*count_get)(const struct scmi_handle *handle); - const struct scmi_sensor_info *(*info_get) (const struct scmi_handle *handle, u32 sensor_id); - int (*trip_point_notify)(const struct scmi_handle *handle, - u32 sensor_id, bool enable); int (*trip_point_config)(const struct scmi_handle *handle, u32 sensor_id, u8 trip_id, u64 trip_value); int (*reading_get)(const struct scmi_handle *handle, u32 sensor_id, @@ -212,6 +210,49 @@ struct scmi_reset_ops { }; /** + * struct scmi_notify_ops - represents notifications' operations provided by + * SCMI core + * @register_event_notifier: Register a notifier_block for the requested event + * @unregister_event_notifier: Unregister a notifier_block for the requested + * event + * + * A user can register/unregister its own notifier_block against the wanted + * platform instance regarding the desired event identified by the + * tuple: (proto_id, evt_id, src_id) using the provided register/unregister + * interface where: + * + * @handle: The handle identifying the platform instance to use + * @proto_id: The protocol ID as in SCMI Specification + * @evt_id: The message ID of the desired event as in SCMI Specification + * @src_id: A pointer to the desired source ID if different sources are + * possible for the protocol (like domain_id, sensor_id...etc) + * + * @src_id can be provided as NULL if it simply does NOT make sense for + * the protocol at hand, OR if the user is explicitly interested in + * receiving notifications from ANY existent source associated to the + * specified proto_id / evt_id. + * + * Received notifications are finally delivered to the registered users, + * invoking the callback provided with the notifier_block *nb as follows: + * + * int user_cb(nb, evt_id, report) + * + * with: + * + * @nb: The notifier block provided by the user + * @evt_id: The message ID of the delivered event + * @report: A custom struct describing the specific event delivered + */ +struct scmi_notify_ops { + int (*register_event_notifier)(const struct scmi_handle *handle, + u8 proto_id, u8 evt_id, u32 *src_id, + struct notifier_block *nb); + int (*unregister_event_notifier)(const struct scmi_handle *handle, + u8 proto_id, u8 evt_id, u32 *src_id, + struct notifier_block *nb); +}; + +/** * struct scmi_handle - Handle returned to ARM SCMI clients for usage. * * @dev: pointer to the SCMI device @@ -221,6 +262,7 @@ struct scmi_reset_ops { * @clk_ops: pointer to set of clock protocol operations * @sensor_ops: pointer to set of sensor protocol operations * @reset_ops: pointer to set of reset protocol operations + * @notify_ops: pointer to set of notifications related operations * @perf_priv: pointer to private data structure specific to performance * protocol(for internal use only) * @clk_priv: pointer to private data structure specific to clock @@ -231,6 +273,8 @@ struct scmi_reset_ops { * protocol(for internal use only) * @reset_priv: pointer to private data structure specific to reset * protocol(for internal use only) + * @notify_priv: pointer to private data structure specific to notifications + * (for internal use only) */ struct scmi_handle { struct device *dev; @@ -240,12 +284,14 @@ struct scmi_handle { struct scmi_power_ops *power_ops; struct scmi_sensor_ops *sensor_ops; struct scmi_reset_ops *reset_ops; + struct scmi_notify_ops *notify_ops; /* for protocol internal use */ void *perf_priv; void *clk_priv; void *power_priv; void *sensor_priv; void *reset_priv; + void *notify_priv; }; enum scmi_std_protocol { @@ -324,4 +370,58 @@ typedef int (*scmi_prot_init_fn_t)(struct scmi_handle *); int scmi_protocol_register(int protocol_id, scmi_prot_init_fn_t fn); void scmi_protocol_unregister(int protocol_id); +/* SCMI Notification API - Custom Event Reports */ +enum scmi_notification_events { + SCMI_EVENT_POWER_STATE_CHANGED = 0x0, + SCMI_EVENT_PERFORMANCE_LIMITS_CHANGED = 0x0, + SCMI_EVENT_PERFORMANCE_LEVEL_CHANGED = 0x1, + SCMI_EVENT_SENSOR_TRIP_POINT_EVENT = 0x0, + SCMI_EVENT_RESET_ISSUED = 0x0, + SCMI_EVENT_BASE_ERROR_EVENT = 0x0, +}; + +struct scmi_power_state_changed_report { + ktime_t timestamp; + unsigned int agent_id; + unsigned int domain_id; + unsigned int power_state; +}; + +struct scmi_perf_limits_report { + ktime_t timestamp; + unsigned int agent_id; + unsigned int domain_id; + unsigned int range_max; + unsigned int range_min; +}; + +struct scmi_perf_level_report { + ktime_t timestamp; + unsigned int agent_id; + unsigned int domain_id; + unsigned int performance_level; +}; + +struct scmi_sensor_trip_point_report { + ktime_t timestamp; + unsigned int agent_id; + unsigned int sensor_id; + unsigned int trip_point_desc; +}; + +struct scmi_reset_issued_report { + ktime_t timestamp; + unsigned int agent_id; + unsigned int domain_id; + unsigned int reset_state; +}; + +struct scmi_base_error_report { + ktime_t timestamp; + unsigned int agent_id; + bool fatal; + unsigned int cmd_count; + unsigned long long reports[]; +}; + #endif /* _LINUX_SCMI_PROTOCOL_H */ diff --git a/include/linux/sctp.h b/include/linux/sctp.h index 8ccd82105de8..76731230bbc5 100644 --- a/include/linux/sctp.h +++ b/include/linux/sctp.h @@ -221,7 +221,7 @@ struct sctp_datahdr { __be16 stream; __be16 ssn; __u32 ppid; - __u8 payload[0]; + __u8 payload[]; }; struct sctp_data_chunk { @@ -269,7 +269,7 @@ struct sctp_inithdr { __be16 num_outbound_streams; __be16 num_inbound_streams; __be32 initial_tsn; - __u8 params[0]; + __u8 params[]; }; struct sctp_init_chunk { @@ -299,13 +299,13 @@ struct sctp_cookie_preserve_param { /* Section 3.3.2.1 Host Name Address (11) */ struct sctp_hostname_param { struct sctp_paramhdr param_hdr; - uint8_t hostname[0]; + uint8_t hostname[]; }; /* Section 3.3.2.1 Supported Address Types (12) */ struct sctp_supported_addrs_param { struct sctp_paramhdr param_hdr; - __be16 types[0]; + __be16 types[]; }; /* ADDIP Section 3.2.6 Adaptation Layer Indication */ @@ -317,25 +317,25 @@ struct sctp_adaptation_ind_param { /* ADDIP Section 4.2.7 Supported Extensions Parameter */ struct sctp_supported_ext_param { struct sctp_paramhdr param_hdr; - __u8 chunks[0]; + __u8 chunks[]; }; /* AUTH Section 3.1 Random */ struct sctp_random_param { struct sctp_paramhdr param_hdr; - __u8 random_val[0]; + __u8 random_val[]; }; /* AUTH Section 3.2 Chunk List */ struct sctp_chunks_param { struct sctp_paramhdr param_hdr; - __u8 chunks[0]; + __u8 chunks[]; }; /* AUTH Section 3.3 HMAC Algorithm */ struct sctp_hmac_algo_param { struct sctp_paramhdr param_hdr; - __be16 hmac_ids[0]; + __be16 hmac_ids[]; }; /* RFC 2960. Section 3.3.3 Initiation Acknowledgement (INIT ACK) (2): @@ -350,7 +350,7 @@ struct sctp_initack_chunk { /* Section 3.3.3.1 State Cookie (7) */ struct sctp_cookie_param { struct sctp_paramhdr p; - __u8 body[0]; + __u8 body[]; }; /* Section 3.3.3.1 Unrecognized Parameters (8) */ @@ -384,7 +384,7 @@ struct sctp_sackhdr { __be32 a_rwnd; __be16 num_gap_ack_blocks; __be16 num_dup_tsns; - union sctp_sack_variable variable[0]; + union sctp_sack_variable variable[]; }; struct sctp_sack_chunk { @@ -436,7 +436,7 @@ struct sctp_shutdown_chunk { struct sctp_errhdr { __be16 cause; __be16 length; - __u8 variable[0]; + __u8 variable[]; }; struct sctp_operr_chunk { @@ -594,7 +594,7 @@ struct sctp_fwdtsn_skip { struct sctp_fwdtsn_hdr { __be32 new_cum_tsn; - struct sctp_fwdtsn_skip skip[0]; + struct sctp_fwdtsn_skip skip[]; }; struct sctp_fwdtsn_chunk { @@ -611,7 +611,7 @@ struct sctp_ifwdtsn_skip { struct sctp_ifwdtsn_hdr { __be32 new_cum_tsn; - struct sctp_ifwdtsn_skip skip[0]; + struct sctp_ifwdtsn_skip skip[]; }; struct sctp_ifwdtsn_chunk { @@ -658,7 +658,7 @@ struct sctp_addip_param { struct sctp_addiphdr { __be32 serial; - __u8 params[0]; + __u8 params[]; }; struct sctp_addip_chunk { @@ -718,7 +718,7 @@ struct sctp_addip_chunk { struct sctp_authhdr { __be16 shkey_id; __be16 hmac_id; - __u8 hmac[0]; + __u8 hmac[]; }; struct sctp_auth_chunk { @@ -733,7 +733,7 @@ struct sctp_infox { struct sctp_reconf_chunk { struct sctp_chunkhdr chunk_hdr; - __u8 params[0]; + __u8 params[]; }; struct sctp_strreset_outreq { @@ -741,13 +741,13 @@ struct sctp_strreset_outreq { __be32 request_seq; __be32 response_seq; __be32 send_reset_at_tsn; - __be16 list_of_streams[0]; + __be16 list_of_streams[]; }; struct sctp_strreset_inreq { struct sctp_paramhdr param_hdr; __be32 request_seq; - __be16 list_of_streams[0]; + __be16 list_of_streams[]; }; struct sctp_strreset_tsnreq { diff --git a/include/linux/seccomp.h b/include/linux/seccomp.h index 4192369b8418..02aef2844c38 100644 --- a/include/linux/seccomp.h +++ b/include/linux/seccomp.h @@ -10,9 +10,14 @@ SECCOMP_FILTER_FLAG_NEW_LISTENER | \ SECCOMP_FILTER_FLAG_TSYNC_ESRCH) +/* sizeof() the first published struct seccomp_notif_addfd */ +#define SECCOMP_NOTIFY_ADDFD_SIZE_VER0 24 +#define SECCOMP_NOTIFY_ADDFD_SIZE_LATEST SECCOMP_NOTIFY_ADDFD_SIZE_VER0 + #ifdef CONFIG_SECCOMP #include <linux/thread_info.h> +#include <linux/atomic.h> #include <asm/seccomp.h> struct seccomp_filter; @@ -29,6 +34,7 @@ struct seccomp_filter; */ struct seccomp { int mode; + atomic_t filter_count; struct seccomp_filter *filter; }; @@ -58,9 +64,11 @@ static inline int seccomp_mode(struct seccomp *s) struct seccomp { }; struct seccomp_filter { }; +struct seccomp_data; #ifdef CONFIG_HAVE_ARCH_SECCOMP_FILTER static inline int secure_computing(void) { return 0; } +static inline int __secure_computing(const struct seccomp_data *sd) { return 0; } #else static inline void secure_computing_strict(int this_syscall) { return; } #endif @@ -82,10 +90,10 @@ static inline int seccomp_mode(struct seccomp *s) #endif /* CONFIG_SECCOMP */ #ifdef CONFIG_SECCOMP_FILTER -extern void put_seccomp_filter(struct task_struct *tsk); +extern void seccomp_filter_release(struct task_struct *tsk); extern void get_seccomp_filter(struct task_struct *tsk); #else /* CONFIG_SECCOMP_FILTER */ -static inline void put_seccomp_filter(struct task_struct *tsk) +static inline void seccomp_filter_release(struct task_struct *tsk) { return; } diff --git a/include/linux/security.h b/include/linux/security.h index b3f2cb21b4f2..0a0a03b36a3b 100644 --- a/include/linux/security.h +++ b/include/linux/security.h @@ -56,6 +56,8 @@ struct mm_struct; struct fs_context; struct fs_parameter; enum fs_value_type; +struct watch; +struct watch_notification; /* Default (no) options for the capable function */ #define CAP_OPT_NONE 0x0 @@ -390,6 +392,8 @@ int security_kernel_post_read_file(struct file *file, char *buf, loff_t size, enum kernel_read_file_id id); int security_task_fix_setuid(struct cred *new, const struct cred *old, int flags); +int security_task_fix_setgid(struct cred *new, const struct cred *old, + int flags); int security_task_setpgid(struct task_struct *p, pid_t pgid); int security_task_getpgid(struct task_struct *p); int security_task_getsid(struct task_struct *p); @@ -1034,6 +1038,13 @@ static inline int security_task_fix_setuid(struct cred *new, return cap_task_fix_setuid(new, old, flags); } +static inline int security_task_fix_setgid(struct cred *new, + const struct cred *old, + int flags) +{ + return 0; +} + static inline int security_task_setpgid(struct task_struct *p, pid_t pgid) { return 0; @@ -1282,6 +1293,28 @@ static inline int security_locked_down(enum lockdown_reason what) } #endif /* CONFIG_SECURITY */ +#if defined(CONFIG_SECURITY) && defined(CONFIG_WATCH_QUEUE) +int security_post_notification(const struct cred *w_cred, + const struct cred *cred, + struct watch_notification *n); +#else +static inline int security_post_notification(const struct cred *w_cred, + const struct cred *cred, + struct watch_notification *n) +{ + return 0; +} +#endif + +#if defined(CONFIG_SECURITY) && defined(CONFIG_KEY_NOTIFICATIONS) +int security_watch_key(struct key *key); +#else +static inline int security_watch_key(struct key *key) +{ + return 0; +} +#endif + #ifdef CONFIG_SECURITY_NETWORK int security_unix_stream_connect(struct sock *sock, struct sock *other, struct sock *newsk); @@ -1750,8 +1783,8 @@ static inline int security_path_chroot(const struct path *path) int security_key_alloc(struct key *key, const struct cred *cred, unsigned long flags); void security_key_free(struct key *key); -int security_key_permission(key_ref_t key_ref, - const struct cred *cred, unsigned perm); +int security_key_permission(key_ref_t key_ref, const struct cred *cred, + enum key_need_perm need_perm); int security_key_getsecurity(struct key *key, char **_buffer); #else @@ -1769,7 +1802,7 @@ static inline void security_key_free(struct key *key) static inline int security_key_permission(key_ref_t key_ref, const struct cred *cred, - unsigned perm) + enum key_need_perm need_perm) { return 0; } diff --git a/include/linux/seqlock.h b/include/linux/seqlock.h index 8b97204f35a7..54bc20496392 100644 --- a/include/linux/seqlock.h +++ b/include/linux/seqlock.h @@ -1,36 +1,15 @@ /* SPDX-License-Identifier: GPL-2.0 */ #ifndef __LINUX_SEQLOCK_H #define __LINUX_SEQLOCK_H + /* - * Reader/writer consistent mechanism without starving writers. This type of - * lock for data where the reader wants a consistent set of information - * and is willing to retry if the information changes. There are two types - * of readers: - * 1. Sequence readers which never block a writer but they may have to retry - * if a writer is in progress by detecting change in sequence number. - * Writers do not wait for a sequence reader. - * 2. Locking readers which will wait if a writer or another locking reader - * is in progress. A locking reader in progress will also block a writer - * from going forward. Unlike the regular rwlock, the read lock here is - * exclusive so that only one locking reader can get it. - * - * This is not as cache friendly as brlock. Also, this may not work well - * for data that contains pointers, because any writer could - * invalidate a pointer that a reader was following. - * - * Expected non-blocking reader usage: - * do { - * seq = read_seqbegin(&foo); - * ... - * } while (read_seqretry(&foo, seq)); - * + * seqcount_t / seqlock_t - a reader-writer consistency mechanism with + * lockless readers (read-only retry loops), and no writer starvation. * - * On non-SMP the spin locks disappear but the writer still needs - * to increment the sequence variables because an interrupt routine could - * change the state of the data. + * See Documentation/locking/seqlock.rst * - * Based on x86_64 vsyscall gettimeofday - * by Keith Owens and Andrea Arcangeli + * Copyrights: + * - Based on x86_64 vsyscall gettimeofday: Keith Owens, Andrea Arcangeli */ #include <linux/spinlock.h> @@ -41,8 +20,8 @@ #include <asm/processor.h> /* - * The seqlock interface does not prescribe a precise sequence of read - * begin/retry/end. For readers, typically there is a call to + * The seqlock seqcount_t interface does not prescribe a precise sequence of + * read begin/retry/end. For readers, typically there is a call to * read_seqcount_begin() and read_seqcount_retry(), however, there are more * esoteric cases which do not follow this pattern. * @@ -50,16 +29,30 @@ * via seqcount_t under KCSAN: upon beginning a seq-reader critical section, * pessimistically mark the next KCSAN_SEQLOCK_REGION_MAX memory accesses as * atomics; if there is a matching read_seqcount_retry() call, no following - * memory operations are considered atomic. Usage of seqlocks via seqlock_t - * interface is not affected. + * memory operations are considered atomic. Usage of the seqlock_t interface + * is not affected. */ #define KCSAN_SEQLOCK_REGION_MAX 1000 /* - * Version using sequence counter only. - * This can be used when code has its own mutex protecting the - * updating starting before the write_seqcountbeqin() and ending - * after the write_seqcount_end(). + * Sequence counters (seqcount_t) + * + * This is the raw counting mechanism, without any writer protection. + * + * Write side critical sections must be serialized and non-preemptible. + * + * If readers can be invoked from hardirq or softirq contexts, + * interrupts or bottom halves must also be respectively disabled before + * entering the write section. + * + * This mechanism can't be used if the protected data contains pointers, + * as the writer can invalidate a pointer that a reader is following. + * + * If it's desired to automatically handle the sequence counter writer + * serialization and non-preemptibility requirements, use a sequential + * lock (seqlock_t) instead. + * + * See Documentation/locking/seqlock.rst */ typedef struct seqcount { unsigned sequence; @@ -82,6 +75,10 @@ static inline void __seqcount_init(seqcount_t *s, const char *name, # define SEQCOUNT_DEP_MAP_INIT(lockname) \ .dep_map = { .name = #lockname } \ +/** + * seqcount_init() - runtime initializer for seqcount_t + * @s: Pointer to the seqcount_t instance + */ # define seqcount_init(s) \ do { \ static struct lock_class_key __key; \ @@ -105,13 +102,15 @@ static inline void seqcount_lockdep_reader_access(const seqcount_t *s) # define seqcount_lockdep_reader_access(x) #endif -#define SEQCNT_ZERO(lockname) { .sequence = 0, SEQCOUNT_DEP_MAP_INIT(lockname)} - +/** + * SEQCNT_ZERO() - static initializer for seqcount_t + * @name: Name of the seqcount_t instance + */ +#define SEQCNT_ZERO(name) { .sequence = 0, SEQCOUNT_DEP_MAP_INIT(name) } /** - * __read_seqcount_begin - begin a seq-read critical section (without barrier) - * @s: pointer to seqcount_t - * Returns: count to be passed to read_seqcount_retry + * __read_seqcount_begin() - begin a seqcount_t read section w/o barrier + * @s: Pointer to seqcount_t * * __read_seqcount_begin is like read_seqcount_begin, but has no smp_rmb() * barrier. Callers should ensure that smp_rmb() or equivalent ordering is @@ -120,6 +119,8 @@ static inline void seqcount_lockdep_reader_access(const seqcount_t *s) * * Use carefully, only in critical code, and comment how the barrier is * provided. + * + * Return: count to be passed to read_seqcount_retry() */ static inline unsigned __read_seqcount_begin(const seqcount_t *s) { @@ -136,30 +137,10 @@ repeat: } /** - * raw_read_seqcount - Read the raw seqcount - * @s: pointer to seqcount_t - * Returns: count to be passed to read_seqcount_retry + * raw_read_seqcount_begin() - begin a seqcount_t read section w/o lockdep + * @s: Pointer to seqcount_t * - * raw_read_seqcount opens a read critical section of the given - * seqcount without any lockdep checking and without checking or - * masking the LSB. Calling code is responsible for handling that. - */ -static inline unsigned raw_read_seqcount(const seqcount_t *s) -{ - unsigned ret = READ_ONCE(s->sequence); - smp_rmb(); - kcsan_atomic_next(KCSAN_SEQLOCK_REGION_MAX); - return ret; -} - -/** - * raw_read_seqcount_begin - start seq-read critical section w/o lockdep - * @s: pointer to seqcount_t - * Returns: count to be passed to read_seqcount_retry - * - * raw_read_seqcount_begin opens a read critical section of the given - * seqcount, but without any lockdep checking. Validity of the critical - * section is tested by checking read_seqcount_retry function. + * Return: count to be passed to read_seqcount_retry() */ static inline unsigned raw_read_seqcount_begin(const seqcount_t *s) { @@ -169,13 +150,10 @@ static inline unsigned raw_read_seqcount_begin(const seqcount_t *s) } /** - * read_seqcount_begin - begin a seq-read critical section - * @s: pointer to seqcount_t - * Returns: count to be passed to read_seqcount_retry + * read_seqcount_begin() - begin a seqcount_t read critical section + * @s: Pointer to seqcount_t * - * read_seqcount_begin opens a read critical section of the given seqcount. - * Validity of the critical section is tested by checking read_seqcount_retry - * function. + * Return: count to be passed to read_seqcount_retry() */ static inline unsigned read_seqcount_begin(const seqcount_t *s) { @@ -184,32 +162,54 @@ static inline unsigned read_seqcount_begin(const seqcount_t *s) } /** - * raw_seqcount_begin - begin a seq-read critical section - * @s: pointer to seqcount_t - * Returns: count to be passed to read_seqcount_retry + * raw_read_seqcount() - read the raw seqcount_t counter value + * @s: Pointer to seqcount_t * - * raw_seqcount_begin opens a read critical section of the given seqcount. - * Validity of the critical section is tested by checking read_seqcount_retry - * function. + * raw_read_seqcount opens a read critical section of the given + * seqcount_t, without any lockdep checking, and without checking or + * masking the sequence counter LSB. Calling code is responsible for + * handling that. * - * Unlike read_seqcount_begin(), this function will not wait for the count - * to stabilize. If a writer is active when we begin, we will fail the - * read_seqcount_retry() instead of stabilizing at the beginning of the - * critical section. + * Return: count to be passed to read_seqcount_retry() */ -static inline unsigned raw_seqcount_begin(const seqcount_t *s) +static inline unsigned raw_read_seqcount(const seqcount_t *s) { unsigned ret = READ_ONCE(s->sequence); smp_rmb(); kcsan_atomic_next(KCSAN_SEQLOCK_REGION_MAX); - return ret & ~1; + return ret; } /** - * __read_seqcount_retry - end a seq-read critical section (without barrier) - * @s: pointer to seqcount_t - * @start: count, from read_seqcount_begin - * Returns: 1 if retry is required, else 0 + * raw_seqcount_begin() - begin a seqcount_t read critical section w/o + * lockdep and w/o counter stabilization + * @s: Pointer to seqcount_t + * + * raw_seqcount_begin opens a read critical section of the given + * seqcount_t. Unlike read_seqcount_begin(), this function will not wait + * for the count to stabilize. If a writer is active when it begins, it + * will fail the read_seqcount_retry() at the end of the read critical + * section instead of stabilizing at the beginning of it. + * + * Use this only in special kernel hot paths where the read section is + * small and has a high probability of success through other external + * means. It will save a single branching instruction. + * + * Return: count to be passed to read_seqcount_retry() + */ +static inline unsigned raw_seqcount_begin(const seqcount_t *s) +{ + /* + * If the counter is odd, let read_seqcount_retry() fail + * by decrementing the counter. + */ + return raw_read_seqcount(s) & ~1; +} + +/** + * __read_seqcount_retry() - end a seqcount_t read section w/o barrier + * @s: Pointer to seqcount_t + * @start: count, from read_seqcount_begin() * * __read_seqcount_retry is like read_seqcount_retry, but has no smp_rmb() * barrier. Callers should ensure that smp_rmb() or equivalent ordering is @@ -218,6 +218,8 @@ static inline unsigned raw_seqcount_begin(const seqcount_t *s) * * Use carefully, only in critical code, and comment how the barrier is * provided. + * + * Return: true if a read section retry is required, else false */ static inline int __read_seqcount_retry(const seqcount_t *s, unsigned start) { @@ -226,14 +228,15 @@ static inline int __read_seqcount_retry(const seqcount_t *s, unsigned start) } /** - * read_seqcount_retry - end a seq-read critical section - * @s: pointer to seqcount_t - * @start: count, from read_seqcount_begin - * Returns: 1 if retry is required, else 0 + * read_seqcount_retry() - end a seqcount_t read critical section + * @s: Pointer to seqcount_t + * @start: count, from read_seqcount_begin() * - * read_seqcount_retry closes a read critical section of the given seqcount. - * If the critical section was invalid, it must be ignored (and typically - * retried). + * read_seqcount_retry closes the read critical section of given + * seqcount_t. If the critical section was invalid, it must be ignored + * (and typically retried). + * + * Return: true if a read section retry is required, else false */ static inline int read_seqcount_retry(const seqcount_t *s, unsigned start) { @@ -241,8 +244,10 @@ static inline int read_seqcount_retry(const seqcount_t *s, unsigned start) return __read_seqcount_retry(s, start); } - - +/** + * raw_write_seqcount_begin() - start a seqcount_t write section w/o lockdep + * @s: Pointer to seqcount_t + */ static inline void raw_write_seqcount_begin(seqcount_t *s) { kcsan_nestable_atomic_begin(); @@ -250,6 +255,10 @@ static inline void raw_write_seqcount_begin(seqcount_t *s) smp_wmb(); } +/** + * raw_write_seqcount_end() - end a seqcount_t write section w/o lockdep + * @s: Pointer to seqcount_t + */ static inline void raw_write_seqcount_end(seqcount_t *s) { smp_wmb(); @@ -257,45 +266,104 @@ static inline void raw_write_seqcount_end(seqcount_t *s) kcsan_nestable_atomic_end(); } +static inline void __write_seqcount_begin_nested(seqcount_t *s, int subclass) +{ + raw_write_seqcount_begin(s); + seqcount_acquire(&s->dep_map, subclass, 0, _RET_IP_); +} + /** - * raw_write_seqcount_barrier - do a seq write barrier - * @s: pointer to seqcount_t + * write_seqcount_begin_nested() - start a seqcount_t write section with + * custom lockdep nesting level + * @s: Pointer to seqcount_t + * @subclass: lockdep nesting level * - * This can be used to provide an ordering guarantee instead of the - * usual consistency guarantee. It is one wmb cheaper, because we can - * collapse the two back-to-back wmb()s. + * See Documentation/locking/lockdep-design.rst + */ +static inline void write_seqcount_begin_nested(seqcount_t *s, int subclass) +{ + lockdep_assert_preemption_disabled(); + __write_seqcount_begin_nested(s, subclass); +} + +/* + * A write_seqcount_begin() variant w/o lockdep non-preemptibility checks. + * + * Use for internal seqlock.h code where it's known that preemption is + * already disabled. For example, seqlock_t write side functions. + */ +static inline void __write_seqcount_begin(seqcount_t *s) +{ + __write_seqcount_begin_nested(s, 0); +} + +/** + * write_seqcount_begin() - start a seqcount_t write side critical section + * @s: Pointer to seqcount_t + * + * write_seqcount_begin opens a write side critical section of the given + * seqcount_t. + * + * Context: seqcount_t write side critical sections must be serialized and + * non-preemptible. If readers can be invoked from hardirq or softirq + * context, interrupts or bottom halves must be respectively disabled. + */ +static inline void write_seqcount_begin(seqcount_t *s) +{ + write_seqcount_begin_nested(s, 0); +} + +/** + * write_seqcount_end() - end a seqcount_t write side critical section + * @s: Pointer to seqcount_t + * + * The write section must've been opened with write_seqcount_begin(). + */ +static inline void write_seqcount_end(seqcount_t *s) +{ + seqcount_release(&s->dep_map, _RET_IP_); + raw_write_seqcount_end(s); +} + +/** + * raw_write_seqcount_barrier() - do a seqcount_t write barrier + * @s: Pointer to seqcount_t + * + * This can be used to provide an ordering guarantee instead of the usual + * consistency guarantee. It is one wmb cheaper, because it can collapse + * the two back-to-back wmb()s. * * Note that writes surrounding the barrier should be declared atomic (e.g. * via WRITE_ONCE): a) to ensure the writes become visible to other threads * atomically, avoiding compiler optimizations; b) to document which writes are * meant to propagate to the reader critical section. This is necessary because * neither writes before and after the barrier are enclosed in a seq-writer - * critical section that would ensure readers are aware of ongoing writes. + * critical section that would ensure readers are aware of ongoing writes:: * - * seqcount_t seq; - * bool X = true, Y = false; + * seqcount_t seq; + * bool X = true, Y = false; * - * void read(void) - * { - * bool x, y; + * void read(void) + * { + * bool x, y; * - * do { - * int s = read_seqcount_begin(&seq); + * do { + * int s = read_seqcount_begin(&seq); * - * x = X; y = Y; + * x = X; y = Y; * - * } while (read_seqcount_retry(&seq, s)); + * } while (read_seqcount_retry(&seq, s)); * - * BUG_ON(!x && !y); + * BUG_ON(!x && !y); * } * * void write(void) * { - * WRITE_ONCE(Y, true); + * WRITE_ONCE(Y, true); * - * raw_write_seqcount_barrier(seq); + * raw_write_seqcount_barrier(seq); * - * WRITE_ONCE(X, false); + * WRITE_ONCE(X, false); * } */ static inline void raw_write_seqcount_barrier(seqcount_t *s) @@ -307,6 +375,37 @@ static inline void raw_write_seqcount_barrier(seqcount_t *s) kcsan_nestable_atomic_end(); } +/** + * write_seqcount_invalidate() - invalidate in-progress seqcount_t read + * side operations + * @s: Pointer to seqcount_t + * + * After write_seqcount_invalidate, no seqcount_t read side operations + * will complete successfully and see data older than this. + */ +static inline void write_seqcount_invalidate(seqcount_t *s) +{ + smp_wmb(); + kcsan_nestable_atomic_begin(); + s->sequence+=2; + kcsan_nestable_atomic_end(); +} + +/** + * raw_read_seqcount_latch() - pick even/odd seqcount_t latch data copy + * @s: Pointer to seqcount_t + * + * Use seqcount_t latching to switch between two storage places protected + * by a sequence counter. Doing so allows having interruptible, preemptible, + * seqcount_t write side critical sections. + * + * Check raw_write_seqcount_latch() for more details and a full reader and + * writer usage example. + * + * Return: sequence counter raw value. Use the lowest bit as an index for + * picking which data copy to read. The full counter value must then be + * checked with read_seqcount_retry(). + */ static inline int raw_read_seqcount_latch(seqcount_t *s) { /* Pairs with the first smp_wmb() in raw_write_seqcount_latch() */ @@ -315,8 +414,8 @@ static inline int raw_read_seqcount_latch(seqcount_t *s) } /** - * raw_write_seqcount_latch - redirect readers to even/odd copy - * @s: pointer to seqcount_t + * raw_write_seqcount_latch() - redirect readers to even/odd copy + * @s: Pointer to seqcount_t * * The latch technique is a multiversion concurrency control method that allows * queries during non-atomic modifications. If you can guarantee queries never @@ -332,64 +431,68 @@ static inline int raw_read_seqcount_latch(seqcount_t *s) * Very simply put: we first modify one copy and then the other. This ensures * there is always one copy in a stable state, ready to give us an answer. * - * The basic form is a data structure like: + * The basic form is a data structure like:: * - * struct latch_struct { - * seqcount_t seq; - * struct data_struct data[2]; - * }; + * struct latch_struct { + * seqcount_t seq; + * struct data_struct data[2]; + * }; * * Where a modification, which is assumed to be externally serialized, does the - * following: + * following:: * - * void latch_modify(struct latch_struct *latch, ...) - * { - * smp_wmb(); <- Ensure that the last data[1] update is visible - * latch->seq++; - * smp_wmb(); <- Ensure that the seqcount update is visible + * void latch_modify(struct latch_struct *latch, ...) + * { + * smp_wmb(); // Ensure that the last data[1] update is visible + * latch->seq++; + * smp_wmb(); // Ensure that the seqcount update is visible * - * modify(latch->data[0], ...); + * modify(latch->data[0], ...); * - * smp_wmb(); <- Ensure that the data[0] update is visible - * latch->seq++; - * smp_wmb(); <- Ensure that the seqcount update is visible + * smp_wmb(); // Ensure that the data[0] update is visible + * latch->seq++; + * smp_wmb(); // Ensure that the seqcount update is visible * - * modify(latch->data[1], ...); - * } + * modify(latch->data[1], ...); + * } * - * The query will have a form like: + * The query will have a form like:: * - * struct entry *latch_query(struct latch_struct *latch, ...) - * { - * struct entry *entry; - * unsigned seq, idx; + * struct entry *latch_query(struct latch_struct *latch, ...) + * { + * struct entry *entry; + * unsigned seq, idx; * - * do { - * seq = raw_read_seqcount_latch(&latch->seq); + * do { + * seq = raw_read_seqcount_latch(&latch->seq); * - * idx = seq & 0x01; - * entry = data_query(latch->data[idx], ...); + * idx = seq & 0x01; + * entry = data_query(latch->data[idx], ...); * - * smp_rmb(); - * } while (seq != latch->seq); + * // read_seqcount_retry() includes needed smp_rmb() + * } while (read_seqcount_retry(&latch->seq, seq)); * - * return entry; - * } + * return entry; + * } * * So during the modification, queries are first redirected to data[1]. Then we * modify data[0]. When that is complete, we redirect queries back to data[0] * and we can modify data[1]. * - * NOTE: The non-requirement for atomic modifications does _NOT_ include - * the publishing of new entries in the case where data is a dynamic - * data structure. + * NOTE: * - * An iteration might start in data[0] and get suspended long enough - * to miss an entire modification sequence, once it resumes it might - * observe the new entry. + * The non-requirement for atomic modifications does _NOT_ include + * the publishing of new entries in the case where data is a dynamic + * data structure. * - * NOTE: When data is a dynamic data structure; one should use regular RCU - * patterns to manage the lifetimes of the objects within. + * An iteration might start in data[0] and get suspended long enough + * to miss an entire modification sequence, once it resumes it might + * observe the new entry. + * + * NOTE: + * + * When data is a dynamic data structure; one should use regular RCU + * patterns to manage the lifetimes of the objects within. */ static inline void raw_write_seqcount_latch(seqcount_t *s) { @@ -399,67 +502,48 @@ static inline void raw_write_seqcount_latch(seqcount_t *s) } /* - * Sequence counter only version assumes that callers are using their - * own mutexing. - */ -static inline void write_seqcount_begin_nested(seqcount_t *s, int subclass) -{ - raw_write_seqcount_begin(s); - seqcount_acquire(&s->dep_map, subclass, 0, _RET_IP_); -} - -static inline void write_seqcount_begin(seqcount_t *s) -{ - write_seqcount_begin_nested(s, 0); -} - -static inline void write_seqcount_end(seqcount_t *s) -{ - seqcount_release(&s->dep_map, _RET_IP_); - raw_write_seqcount_end(s); -} - -/** - * write_seqcount_invalidate - invalidate in-progress read-side seq operations - * @s: pointer to seqcount_t + * Sequential locks (seqlock_t) * - * After write_seqcount_invalidate, no read-side seq operations will complete - * successfully and see data older than this. + * Sequence counters with an embedded spinlock for writer serialization + * and non-preemptibility. + * + * For more info, see: + * - Comments on top of seqcount_t + * - Documentation/locking/seqlock.rst */ -static inline void write_seqcount_invalidate(seqcount_t *s) -{ - smp_wmb(); - kcsan_nestable_atomic_begin(); - s->sequence+=2; - kcsan_nestable_atomic_end(); -} - typedef struct { struct seqcount seqcount; spinlock_t lock; } seqlock_t; -/* - * These macros triggered gcc-3.x compile-time problems. We think these are - * OK now. Be cautious. - */ #define __SEQLOCK_UNLOCKED(lockname) \ { \ .seqcount = SEQCNT_ZERO(lockname), \ .lock = __SPIN_LOCK_UNLOCKED(lockname) \ } -#define seqlock_init(x) \ +/** + * seqlock_init() - dynamic initializer for seqlock_t + * @sl: Pointer to the seqlock_t instance + */ +#define seqlock_init(sl) \ do { \ - seqcount_init(&(x)->seqcount); \ - spin_lock_init(&(x)->lock); \ + seqcount_init(&(sl)->seqcount); \ + spin_lock_init(&(sl)->lock); \ } while (0) -#define DEFINE_SEQLOCK(x) \ - seqlock_t x = __SEQLOCK_UNLOCKED(x) +/** + * DEFINE_SEQLOCK() - Define a statically allocated seqlock_t + * @sl: Name of the seqlock_t instance + */ +#define DEFINE_SEQLOCK(sl) \ + seqlock_t sl = __SEQLOCK_UNLOCKED(sl) -/* - * Read side functions for starting and finalizing a read side section. +/** + * read_seqbegin() - start a seqlock_t read side critical section + * @sl: Pointer to seqlock_t + * + * Return: count, to be passed to read_seqretry() */ static inline unsigned read_seqbegin(const seqlock_t *sl) { @@ -470,6 +554,17 @@ static inline unsigned read_seqbegin(const seqlock_t *sl) return ret; } +/** + * read_seqretry() - end a seqlock_t read side section + * @sl: Pointer to seqlock_t + * @start: count, from read_seqbegin() + * + * read_seqretry closes the read side critical section of given seqlock_t. + * If the critical section was invalid, it must be ignored (and typically + * retried). + * + * Return: true if a read section retry is required, else false + */ static inline unsigned read_seqretry(const seqlock_t *sl, unsigned start) { /* @@ -481,41 +576,85 @@ static inline unsigned read_seqretry(const seqlock_t *sl, unsigned start) return read_seqcount_retry(&sl->seqcount, start); } -/* - * Lock out other writers and update the count. - * Acts like a normal spin_lock/unlock. - * Don't need preempt_disable() because that is in the spin_lock already. +/** + * write_seqlock() - start a seqlock_t write side critical section + * @sl: Pointer to seqlock_t + * + * write_seqlock opens a write side critical section for the given + * seqlock_t. It also implicitly acquires the spinlock_t embedded inside + * that sequential lock. All seqlock_t write side sections are thus + * automatically serialized and non-preemptible. + * + * Context: if the seqlock_t read section, or other write side critical + * sections, can be invoked from hardirq or softirq contexts, use the + * _irqsave or _bh variants of this function instead. */ static inline void write_seqlock(seqlock_t *sl) { spin_lock(&sl->lock); - write_seqcount_begin(&sl->seqcount); + __write_seqcount_begin(&sl->seqcount); } +/** + * write_sequnlock() - end a seqlock_t write side critical section + * @sl: Pointer to seqlock_t + * + * write_sequnlock closes the (serialized and non-preemptible) write side + * critical section of given seqlock_t. + */ static inline void write_sequnlock(seqlock_t *sl) { write_seqcount_end(&sl->seqcount); spin_unlock(&sl->lock); } +/** + * write_seqlock_bh() - start a softirqs-disabled seqlock_t write section + * @sl: Pointer to seqlock_t + * + * _bh variant of write_seqlock(). Use only if the read side section, or + * other write side sections, can be invoked from softirq contexts. + */ static inline void write_seqlock_bh(seqlock_t *sl) { spin_lock_bh(&sl->lock); - write_seqcount_begin(&sl->seqcount); + __write_seqcount_begin(&sl->seqcount); } +/** + * write_sequnlock_bh() - end a softirqs-disabled seqlock_t write section + * @sl: Pointer to seqlock_t + * + * write_sequnlock_bh closes the serialized, non-preemptible, and + * softirqs-disabled, seqlock_t write side critical section opened with + * write_seqlock_bh(). + */ static inline void write_sequnlock_bh(seqlock_t *sl) { write_seqcount_end(&sl->seqcount); spin_unlock_bh(&sl->lock); } +/** + * write_seqlock_irq() - start a non-interruptible seqlock_t write section + * @sl: Pointer to seqlock_t + * + * _irq variant of write_seqlock(). Use only if the read side section, or + * other write sections, can be invoked from hardirq contexts. + */ static inline void write_seqlock_irq(seqlock_t *sl) { spin_lock_irq(&sl->lock); - write_seqcount_begin(&sl->seqcount); + __write_seqcount_begin(&sl->seqcount); } +/** + * write_sequnlock_irq() - end a non-interruptible seqlock_t write section + * @sl: Pointer to seqlock_t + * + * write_sequnlock_irq closes the serialized and non-interruptible + * seqlock_t write side section opened with write_seqlock_irq(). + */ static inline void write_sequnlock_irq(seqlock_t *sl) { write_seqcount_end(&sl->seqcount); @@ -527,13 +666,32 @@ static inline unsigned long __write_seqlock_irqsave(seqlock_t *sl) unsigned long flags; spin_lock_irqsave(&sl->lock, flags); - write_seqcount_begin(&sl->seqcount); + __write_seqcount_begin(&sl->seqcount); return flags; } +/** + * write_seqlock_irqsave() - start a non-interruptible seqlock_t write + * section + * @lock: Pointer to seqlock_t + * @flags: Stack-allocated storage for saving caller's local interrupt + * state, to be passed to write_sequnlock_irqrestore(). + * + * _irqsave variant of write_seqlock(). Use it only if the read side + * section, or other write sections, can be invoked from hardirq context. + */ #define write_seqlock_irqsave(lock, flags) \ do { flags = __write_seqlock_irqsave(lock); } while (0) +/** + * write_sequnlock_irqrestore() - end non-interruptible seqlock_t write + * section + * @sl: Pointer to seqlock_t + * @flags: Caller's saved interrupt state, from write_seqlock_irqsave() + * + * write_sequnlock_irqrestore closes the serialized and non-interruptible + * seqlock_t write section previously opened with write_seqlock_irqsave(). + */ static inline void write_sequnlock_irqrestore(seqlock_t *sl, unsigned long flags) { @@ -541,65 +699,79 @@ write_sequnlock_irqrestore(seqlock_t *sl, unsigned long flags) spin_unlock_irqrestore(&sl->lock, flags); } -/* - * A locking reader exclusively locks out other writers and locking readers, - * but doesn't update the sequence number. Acts like a normal spin_lock/unlock. - * Don't need preempt_disable() because that is in the spin_lock already. +/** + * read_seqlock_excl() - begin a seqlock_t locking reader section + * @sl: Pointer to seqlock_t + * + * read_seqlock_excl opens a seqlock_t locking reader critical section. A + * locking reader exclusively locks out *both* other writers *and* other + * locking readers, but it does not update the embedded sequence number. + * + * Locking readers act like a normal spin_lock()/spin_unlock(). + * + * Context: if the seqlock_t write section, *or other read sections*, can + * be invoked from hardirq or softirq contexts, use the _irqsave or _bh + * variant of this function instead. + * + * The opened read section must be closed with read_sequnlock_excl(). */ static inline void read_seqlock_excl(seqlock_t *sl) { spin_lock(&sl->lock); } +/** + * read_sequnlock_excl() - end a seqlock_t locking reader critical section + * @sl: Pointer to seqlock_t + */ static inline void read_sequnlock_excl(seqlock_t *sl) { spin_unlock(&sl->lock); } /** - * read_seqbegin_or_lock - begin a sequence number check or locking block - * @lock: sequence lock - * @seq : sequence number to be checked + * read_seqlock_excl_bh() - start a seqlock_t locking reader section with + * softirqs disabled + * @sl: Pointer to seqlock_t * - * First try it once optimistically without taking the lock. If that fails, - * take the lock. The sequence number is also used as a marker for deciding - * whether to be a reader (even) or writer (odd). - * N.B. seq must be initialized to an even number to begin with. + * _bh variant of read_seqlock_excl(). Use this variant only if the + * seqlock_t write side section, *or other read sections*, can be invoked + * from softirq contexts. */ -static inline void read_seqbegin_or_lock(seqlock_t *lock, int *seq) -{ - if (!(*seq & 1)) /* Even */ - *seq = read_seqbegin(lock); - else /* Odd */ - read_seqlock_excl(lock); -} - -static inline int need_seqretry(seqlock_t *lock, int seq) -{ - return !(seq & 1) && read_seqretry(lock, seq); -} - -static inline void done_seqretry(seqlock_t *lock, int seq) -{ - if (seq & 1) - read_sequnlock_excl(lock); -} - static inline void read_seqlock_excl_bh(seqlock_t *sl) { spin_lock_bh(&sl->lock); } +/** + * read_sequnlock_excl_bh() - stop a seqlock_t softirq-disabled locking + * reader section + * @sl: Pointer to seqlock_t + */ static inline void read_sequnlock_excl_bh(seqlock_t *sl) { spin_unlock_bh(&sl->lock); } +/** + * read_seqlock_excl_irq() - start a non-interruptible seqlock_t locking + * reader section + * @sl: Pointer to seqlock_t + * + * _irq variant of read_seqlock_excl(). Use this only if the seqlock_t + * write side section, *or other read sections*, can be invoked from a + * hardirq context. + */ static inline void read_seqlock_excl_irq(seqlock_t *sl) { spin_lock_irq(&sl->lock); } +/** + * read_sequnlock_excl_irq() - end an interrupts-disabled seqlock_t + * locking reader section + * @sl: Pointer to seqlock_t + */ static inline void read_sequnlock_excl_irq(seqlock_t *sl) { spin_unlock_irq(&sl->lock); @@ -613,15 +785,117 @@ static inline unsigned long __read_seqlock_excl_irqsave(seqlock_t *sl) return flags; } +/** + * read_seqlock_excl_irqsave() - start a non-interruptible seqlock_t + * locking reader section + * @lock: Pointer to seqlock_t + * @flags: Stack-allocated storage for saving caller's local interrupt + * state, to be passed to read_sequnlock_excl_irqrestore(). + * + * _irqsave variant of read_seqlock_excl(). Use this only if the seqlock_t + * write side section, *or other read sections*, can be invoked from a + * hardirq context. + */ #define read_seqlock_excl_irqsave(lock, flags) \ do { flags = __read_seqlock_excl_irqsave(lock); } while (0) +/** + * read_sequnlock_excl_irqrestore() - end non-interruptible seqlock_t + * locking reader section + * @sl: Pointer to seqlock_t + * @flags: Caller saved interrupt state, from read_seqlock_excl_irqsave() + */ static inline void read_sequnlock_excl_irqrestore(seqlock_t *sl, unsigned long flags) { spin_unlock_irqrestore(&sl->lock, flags); } +/** + * read_seqbegin_or_lock() - begin a seqlock_t lockless or locking reader + * @lock: Pointer to seqlock_t + * @seq : Marker and return parameter. If the passed value is even, the + * reader will become a *lockless* seqlock_t reader as in read_seqbegin(). + * If the passed value is odd, the reader will become a *locking* reader + * as in read_seqlock_excl(). In the first call to this function, the + * caller *must* initialize and pass an even value to @seq; this way, a + * lockless read can be optimistically tried first. + * + * read_seqbegin_or_lock is an API designed to optimistically try a normal + * lockless seqlock_t read section first. If an odd counter is found, the + * lockless read trial has failed, and the next read iteration transforms + * itself into a full seqlock_t locking reader. + * + * This is typically used to avoid seqlock_t lockless readers starvation + * (too much retry loops) in the case of a sharp spike in write side + * activity. + * + * Context: if the seqlock_t write section, *or other read sections*, can + * be invoked from hardirq or softirq contexts, use the _irqsave or _bh + * variant of this function instead. + * + * Check Documentation/locking/seqlock.rst for template example code. + * + * Return: the encountered sequence counter value, through the @seq + * parameter, which is overloaded as a return parameter. This returned + * value must be checked with need_seqretry(). If the read section need to + * be retried, this returned value must also be passed as the @seq + * parameter of the next read_seqbegin_or_lock() iteration. + */ +static inline void read_seqbegin_or_lock(seqlock_t *lock, int *seq) +{ + if (!(*seq & 1)) /* Even */ + *seq = read_seqbegin(lock); + else /* Odd */ + read_seqlock_excl(lock); +} + +/** + * need_seqretry() - validate seqlock_t "locking or lockless" read section + * @lock: Pointer to seqlock_t + * @seq: sequence count, from read_seqbegin_or_lock() + * + * Return: true if a read section retry is required, false otherwise + */ +static inline int need_seqretry(seqlock_t *lock, int seq) +{ + return !(seq & 1) && read_seqretry(lock, seq); +} + +/** + * done_seqretry() - end seqlock_t "locking or lockless" reader section + * @lock: Pointer to seqlock_t + * @seq: count, from read_seqbegin_or_lock() + * + * done_seqretry finishes the seqlock_t read side critical section started + * with read_seqbegin_or_lock() and validated by need_seqretry(). + */ +static inline void done_seqretry(seqlock_t *lock, int seq) +{ + if (seq & 1) + read_sequnlock_excl(lock); +} + +/** + * read_seqbegin_or_lock_irqsave() - begin a seqlock_t lockless reader, or + * a non-interruptible locking reader + * @lock: Pointer to seqlock_t + * @seq: Marker and return parameter. Check read_seqbegin_or_lock(). + * + * This is the _irqsave variant of read_seqbegin_or_lock(). Use it only if + * the seqlock_t write section, *or other read sections*, can be invoked + * from hardirq context. + * + * Note: Interrupts will be disabled only for "locking reader" mode. + * + * Return: + * + * 1. The saved local interrupts state in case of a locking reader, to + * be passed to done_seqretry_irqrestore(). + * + * 2. The encountered sequence counter value, returned through @seq + * overloaded as a return parameter. Check read_seqbegin_or_lock(). + */ static inline unsigned long read_seqbegin_or_lock_irqsave(seqlock_t *lock, int *seq) { @@ -635,6 +909,18 @@ read_seqbegin_or_lock_irqsave(seqlock_t *lock, int *seq) return flags; } +/** + * done_seqretry_irqrestore() - end a seqlock_t lockless reader, or a + * non-interruptible locking reader section + * @lock: Pointer to seqlock_t + * @seq: Count, from read_seqbegin_or_lock_irqsave() + * @flags: Caller's saved local interrupt state in case of a locking + * reader, also from read_seqbegin_or_lock_irqsave() + * + * This is the _irqrestore variant of done_seqretry(). The read section + * must've been opened with read_seqbegin_or_lock_irqsave(), and validated + * by need_seqretry(). + */ static inline void done_seqretry_irqrestore(seqlock_t *lock, int seq, unsigned long flags) { diff --git a/include/linux/serial_core.h b/include/linux/serial_core.h index 9fd550e7946a..791f4844efeb 100644 --- a/include/linux/serial_core.h +++ b/include/linux/serial_core.h @@ -462,10 +462,104 @@ extern void uart_handle_cts_change(struct uart_port *uport, extern void uart_insert_char(struct uart_port *port, unsigned int status, unsigned int overrun, unsigned int ch, unsigned int flag); -extern int uart_handle_sysrq_char(struct uart_port *port, unsigned int ch); -extern int uart_prepare_sysrq_char(struct uart_port *port, unsigned int ch); -extern void uart_unlock_and_check_sysrq(struct uart_port *port, unsigned long flags); -extern int uart_handle_break(struct uart_port *port); +#ifdef CONFIG_MAGIC_SYSRQ_SERIAL +#define SYSRQ_TIMEOUT (HZ * 5) + +bool uart_try_toggle_sysrq(struct uart_port *port, unsigned int ch); + +static inline int uart_handle_sysrq_char(struct uart_port *port, unsigned int ch) +{ + if (!port->sysrq) + return 0; + + if (ch && time_before(jiffies, port->sysrq)) { + if (sysrq_mask()) { + handle_sysrq(ch); + port->sysrq = 0; + return 1; + } + if (uart_try_toggle_sysrq(port, ch)) + return 1; + } + port->sysrq = 0; + + return 0; +} + +static inline int uart_prepare_sysrq_char(struct uart_port *port, unsigned int ch) +{ + if (!port->sysrq) + return 0; + + if (ch && time_before(jiffies, port->sysrq)) { + if (sysrq_mask()) { + port->sysrq_ch = ch; + port->sysrq = 0; + return 1; + } + if (uart_try_toggle_sysrq(port, ch)) + return 1; + } + port->sysrq = 0; + + return 0; +} + +static inline void uart_unlock_and_check_sysrq(struct uart_port *port, unsigned long irqflags) +{ + int sysrq_ch; + + if (!port->has_sysrq) { + spin_unlock_irqrestore(&port->lock, irqflags); + return; + } + + sysrq_ch = port->sysrq_ch; + port->sysrq_ch = 0; + + spin_unlock_irqrestore(&port->lock, irqflags); + + if (sysrq_ch) + handle_sysrq(sysrq_ch); +} +#else /* CONFIG_MAGIC_SYSRQ_SERIAL */ +static inline int uart_handle_sysrq_char(struct uart_port *port, unsigned int ch) +{ + return 0; +} +static inline int uart_prepare_sysrq_char(struct uart_port *port, unsigned int ch) +{ + return 0; +} +static inline void uart_unlock_and_check_sysrq(struct uart_port *port, unsigned long irqflags) +{ + spin_unlock_irqrestore(&port->lock, irqflags); +} +#endif /* CONFIG_MAGIC_SYSRQ_SERIAL */ + +/* + * We do the SysRQ and SAK checking like this... + */ +static inline int uart_handle_break(struct uart_port *port) +{ + struct uart_state *state = port->state; + + if (port->handle_break) + port->handle_break(port); + +#ifdef CONFIG_MAGIC_SYSRQ_SERIAL + if (port->has_sysrq && uart_console(port)) { + if (!port->sysrq) { + port->sysrq = jiffies + SYSRQ_TIMEOUT; + return 1; + } + port->sysrq = 0; + } +#endif + if (port->flags & UPF_SAK) + do_SAK(state->port.tty); + return 0; +} /* * UART_ENABLE_MS - determine if port should enable modem status irqs diff --git a/include/linux/set_memory.h b/include/linux/set_memory.h index 86281ac7c305..860e0f843c12 100644 --- a/include/linux/set_memory.h +++ b/include/linux/set_memory.h @@ -26,7 +26,7 @@ static inline int set_direct_map_default_noflush(struct page *page) #endif #ifndef set_mce_nospec -static inline int set_mce_nospec(unsigned long pfn) +static inline int set_mce_nospec(unsigned long pfn, bool unmap) { return 0; } diff --git a/include/linux/skmsg.h b/include/linux/skmsg.h index 08674cd14d5a..1e9ed840b9fc 100644 --- a/include/linux/skmsg.h +++ b/include/linux/skmsg.h @@ -430,6 +430,19 @@ static inline void psock_set_prog(struct bpf_prog **pprog, bpf_prog_put(prog); } +static inline int psock_replace_prog(struct bpf_prog **pprog, + struct bpf_prog *prog, + struct bpf_prog *old) +{ + if (cmpxchg(pprog, old, prog) != old) + return -ENOENT; + + if (old) + bpf_prog_put(old); + + return 0; +} + static inline void psock_progs_drop(struct sk_psock_progs *progs) { psock_set_prog(&progs->msg_parser, NULL); diff --git a/include/linux/smp.h b/include/linux/smp.h index 7ee202ad21a6..80d557ef8a11 100644 --- a/include/linux/smp.h +++ b/include/linux/smp.h @@ -12,29 +12,22 @@ #include <linux/list.h> #include <linux/cpumask.h> #include <linux/init.h> -#include <linux/llist.h> +#include <linux/smp_types.h> typedef void (*smp_call_func_t)(void *info); typedef bool (*smp_cond_func_t)(int cpu, void *info); -enum { - CSD_FLAG_LOCK = 0x01, - - /* IRQ_WORK_flags */ - - CSD_TYPE_ASYNC = 0x00, - CSD_TYPE_SYNC = 0x10, - CSD_TYPE_IRQ_WORK = 0x20, - CSD_TYPE_TTWU = 0x30, - CSD_FLAG_TYPE_MASK = 0xF0, -}; - /* * structure shares (partial) layout with struct irq_work */ struct __call_single_data { - struct llist_node llist; - unsigned int flags; + union { + struct __call_single_node node; + struct { + struct llist_node llist; + unsigned int flags; + }; + }; smp_call_func_t func; void *info; }; diff --git a/include/linux/smp_types.h b/include/linux/smp_types.h new file mode 100644 index 000000000000..364b3ae3e41d --- /dev/null +++ b/include/linux/smp_types.h @@ -0,0 +1,66 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef __LINUX_SMP_TYPES_H +#define __LINUX_SMP_TYPES_H + +#include <linux/llist.h> + +enum { + CSD_FLAG_LOCK = 0x01, + + IRQ_WORK_PENDING = 0x01, + IRQ_WORK_BUSY = 0x02, + IRQ_WORK_LAZY = 0x04, /* No IPI, wait for tick */ + IRQ_WORK_HARD_IRQ = 0x08, /* IRQ context on PREEMPT_RT */ + + IRQ_WORK_CLAIMED = (IRQ_WORK_PENDING | IRQ_WORK_BUSY), + + CSD_TYPE_ASYNC = 0x00, + CSD_TYPE_SYNC = 0x10, + CSD_TYPE_IRQ_WORK = 0x20, + CSD_TYPE_TTWU = 0x30, + + CSD_FLAG_TYPE_MASK = 0xF0, +}; + +/* + * struct __call_single_node is the primary type on + * smp.c:call_single_queue. + * + * flush_smp_call_function_queue() only reads the type from + * __call_single_node::u_flags as a regular load, the above + * (anonymous) enum defines all the bits of this word. + * + * Other bits are not modified until the type is known. + * + * CSD_TYPE_SYNC/ASYNC: + * struct { + * struct llist_node node; + * unsigned int flags; + * smp_call_func_t func; + * void *info; + * }; + * + * CSD_TYPE_IRQ_WORK: + * struct { + * struct llist_node node; + * atomic_t flags; + * void (*func)(struct irq_work *); + * }; + * + * CSD_TYPE_TTWU: + * struct { + * struct llist_node node; + * unsigned int flags; + * }; + * + */ + +struct __call_single_node { + struct llist_node llist; + union { + unsigned int u_flags; + atomic_t a_flags; + }; +}; + +#endif /* __LINUX_SMP_TYPES_H */ diff --git a/include/linux/soc/mediatek/mtk-cmdq.h b/include/linux/soc/mediatek/mtk-cmdq.h index a74c1d5acdf3..2249ecaf77e4 100644 --- a/include/linux/soc/mediatek/mtk-cmdq.h +++ b/include/linux/soc/mediatek/mtk-cmdq.h @@ -121,6 +121,15 @@ int cmdq_pkt_wfe(struct cmdq_pkt *pkt, u16 event); int cmdq_pkt_clear_event(struct cmdq_pkt *pkt, u16 event); /** + * cmdq_pkt_set_event() - append set event command to the CMDQ packet + * @pkt: the CMDQ packet + * @event: the desired event to be set + * + * Return: 0 for success; else the error code is returned + */ +int cmdq_pkt_set_event(struct cmdq_pkt *pkt, u16 event); + +/** * cmdq_pkt_poll() - Append polling command to the CMDQ packet, ask GCE to * execute an instruction that wait for a specified * hardware register to check for the value w/o mask. @@ -152,6 +161,28 @@ int cmdq_pkt_poll(struct cmdq_pkt *pkt, u8 subsys, */ int cmdq_pkt_poll_mask(struct cmdq_pkt *pkt, u8 subsys, u16 offset, u32 value, u32 mask); + +/** + * cmdq_pkt_assign() - Append logic assign command to the CMDQ packet, ask GCE + * to execute an instruction that set a constant value into + * internal register and use as value, mask or address in + * read/write instruction. + * @pkt: the CMDQ packet + * @reg_idx: the CMDQ internal register ID + * @value: the specified value + * + * Return: 0 for success; else the error code is returned + */ +int cmdq_pkt_assign(struct cmdq_pkt *pkt, u16 reg_idx, u32 value); + +/** + * cmdq_pkt_finalize() - Append EOC and jump command to pkt. + * @pkt: the CMDQ packet + * + * Return: 0 for success; else the error code is returned + */ +int cmdq_pkt_finalize(struct cmdq_pkt *pkt); + /** * cmdq_pkt_flush_async() - trigger CMDQ to asynchronously execute the CMDQ * packet and call back at the end of done packet diff --git a/include/linux/soc/ti/k3-ringacc.h b/include/linux/soc/ti/k3-ringacc.h index 26f73df0a524..7ac115432fa1 100644 --- a/include/linux/soc/ti/k3-ringacc.h +++ b/include/linux/soc/ti/k3-ringacc.h @@ -107,6 +107,10 @@ struct k3_ringacc *of_k3_ringacc_get_by_phandle(struct device_node *np, struct k3_ring *k3_ringacc_request_ring(struct k3_ringacc *ringacc, int id, u32 flags); +int k3_ringacc_request_rings_pair(struct k3_ringacc *ringacc, + int fwd_id, int compl_id, + struct k3_ring **fwd_ring, + struct k3_ring **compl_ring); /** * k3_ringacc_ring_reset - ring reset * @ring: pointer on Ring diff --git a/include/linux/soc/ti/ti_sci_inta_msi.h b/include/linux/soc/ti/ti_sci_inta_msi.h index 11fb5048f5f6..e3aa8b14612e 100644 --- a/include/linux/soc/ti/ti_sci_inta_msi.h +++ b/include/linux/soc/ti/ti_sci_inta_msi.h @@ -2,7 +2,7 @@ /* * Texas Instruments' K3 TI SCI INTA MSI helper * - * Copyright (C) 2018-2019 Texas Instruments Incorporated - http://www.ti.com/ + * Copyright (C) 2018-2019 Texas Instruments Incorporated - https://www.ti.com/ * Lokesh Vutla <lokeshvutla@ti.com> */ diff --git a/include/linux/soc/ti/ti_sci_protocol.h b/include/linux/soc/ti/ti_sci_protocol.h index 9531ec823298..49c5d29cd33c 100644 --- a/include/linux/soc/ti/ti_sci_protocol.h +++ b/include/linux/soc/ti/ti_sci_protocol.h @@ -2,7 +2,7 @@ /* * Texas Instruments System Control Interface Protocol * - * Copyright (C) 2015-2016 Texas Instruments Incorporated - http://www.ti.com/ + * Copyright (C) 2015-2016 Texas Instruments Incorporated - https://www.ti.com/ * Nishanth Menon */ @@ -226,8 +226,8 @@ struct ti_sci_rm_core_ops { * and destination * @set_event_map: Set an Event based peripheral irq to Interrupt * Aggregator. - * @free_irq: Free an an IRQ route between the requested source - * destination. + * @free_irq: Free an IRQ route between the requested source + * and destination. * @free_event_map: Free an event based peripheral irq to Interrupt * Aggregator. */ diff --git a/include/linux/spi/altera.h b/include/linux/spi/altera.h new file mode 100644 index 000000000000..2d42641499a6 --- /dev/null +++ b/include/linux/spi/altera.h @@ -0,0 +1,29 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * Header File for Altera SPI Driver. + */ +#ifndef __LINUX_SPI_ALTERA_H +#define __LINUX_SPI_ALTERA_H + +#include <linux/regmap.h> +#include <linux/spi/spi.h> +#include <linux/types.h> + +/** + * struct altera_spi_platform_data - Platform data of the Altera SPI driver + * @mode_bits: Mode bits of SPI master. + * @num_chipselect: Number of chipselects. + * @bits_per_word_mask: bitmask of supported bits_per_word for transfers. + * @num_devices: Number of devices that shall be added when the driver + * is probed. + * @devices: The devices to add. + */ +struct altera_spi_platform_data { + u16 mode_bits; + u16 num_chipselect; + u32 bits_per_word_mask; + u16 num_devices; + struct spi_board_info *devices; +}; + +#endif /* __LINUX_SPI_ALTERA_H */ diff --git a/include/linux/spi/spi-mem.h b/include/linux/spi/spi-mem.h index af9ff2f0f1b2..159463cc659c 100644 --- a/include/linux/spi/spi-mem.h +++ b/include/linux/spi/spi-mem.h @@ -17,6 +17,7 @@ { \ .buswidth = __buswidth, \ .opcode = __opcode, \ + .nbytes = 1, \ } #define SPI_MEM_OP_ADDR(__nbytes, __val, __buswidth) \ @@ -69,11 +70,15 @@ enum spi_mem_data_dir { /** * struct spi_mem_op - describes a SPI memory operation + * @cmd.nbytes: number of opcode bytes (only 1 or 2 are valid). The opcode is + * sent MSB-first. * @cmd.buswidth: number of IO lines used to transmit the command * @cmd.opcode: operation opcode + * @cmd.dtr: whether the command opcode should be sent in DTR mode or not * @addr.nbytes: number of address bytes to send. Can be zero if the operation * does not need to send an address * @addr.buswidth: number of IO lines used to transmit the address cycles + * @addr.dtr: whether the address should be sent in DTR mode or not * @addr.val: address value. This value is always sent MSB first on the bus. * Note that only @addr.nbytes are taken into account in this * address value, so users should make sure the value fits in the @@ -81,7 +86,9 @@ enum spi_mem_data_dir { * @dummy.nbytes: number of dummy bytes to send after an opcode or address. Can * be zero if the operation does not require dummy bytes * @dummy.buswidth: number of IO lanes used to transmit the dummy bytes + * @dummy.dtr: whether the dummy bytes should be sent in DTR mode or not * @data.buswidth: number of IO lanes used to send/receive the data + * @data.dtr: whether the data should be sent in DTR mode or not * @data.dir: direction of the transfer * @data.nbytes: number of data bytes to send/receive. Can be zero if the * operation does not involve transferring data @@ -90,23 +97,28 @@ enum spi_mem_data_dir { */ struct spi_mem_op { struct { + u8 nbytes; u8 buswidth; - u8 opcode; + u8 dtr : 1; + u16 opcode; } cmd; struct { u8 nbytes; u8 buswidth; + u8 dtr : 1; u64 val; } addr; struct { u8 nbytes; u8 buswidth; + u8 dtr : 1; } dummy; struct { u8 buswidth; + u8 dtr : 1; enum spi_mem_data_dir dir; unsigned int nbytes; union { diff --git a/include/linux/spi/spi.h b/include/linux/spi/spi.h index aac57b5b7c21..99380c0825db 100644 --- a/include/linux/spi/spi.h +++ b/include/linux/spi/spi.h @@ -329,6 +329,7 @@ static inline void spi_unregister_driver(struct spi_driver *sdrv) * every chipselect is connected to a slave. * @dma_alignment: SPI controller constraint on DMA buffers alignment. * @mode_bits: flags understood by this controller driver + * @buswidth_override_bits: flags to override for this controller driver * @bits_per_word_mask: A mask indicating which values of bits_per_word are * supported by the driver. Bit n indicates that a bits_per_word n+1 is * supported. If set, the SPI core will reject any transfer with an @@ -358,8 +359,7 @@ static inline void spi_unregister_driver(struct spi_driver *sdrv) * @cleanup: frees controller-specific state * @can_dma: determine whether this controller supports DMA * @queued: whether this controller is providing an internal message queue - * @kworker: thread struct for message pump - * @kworker_task: pointer to task for message pump kworker thread + * @kworker: pointer to thread struct for message pump * @pump_messages: work struct for scheduling work to the message pump * @queue_lock: spinlock to syncronise access to message queue * @queue: message queue @@ -368,6 +368,8 @@ static inline void spi_unregister_driver(struct spi_driver *sdrv) * @cur_msg_prepared: spi_prepare_message was called for the currently * in-flight message * @cur_msg_mapped: message has been mapped for DMA + * @last_cs_enable: was enable true on the last call to set_cs. + * @last_cs_mode_high: was (mode & SPI_CS_HIGH) true on the last call to set_cs. * @xfer_completion: used by core transfer_one_message() * @busy: message pump is busy * @running: message pump is running @@ -447,6 +449,8 @@ static inline void spi_unregister_driver(struct spi_driver *sdrv) * If the driver does not set this, the SPI core takes the snapshot as * close to the driver hand-over as possible. * @irq_flags: Interrupt enable state during PTP system timestamping + * @fallback: fallback to pio if dma transfer return failure with + * SPI_TRANS_FAIL_NO_START. * * Each SPI controller can communicate with one or more @spi_device * children. These make a small bus, sharing MOSI, MISO and SCK signals @@ -589,8 +593,7 @@ struct spi_controller { * Over time we expect SPI drivers to be phased over to this API. */ bool queued; - struct kthread_worker kworker; - struct task_struct *kworker_task; + struct kthread_worker *kworker; struct kthread_work pump_messages; spinlock_t queue_lock; struct list_head queue; @@ -602,6 +605,9 @@ struct spi_controller { bool auto_runtime_pm; bool cur_msg_prepared; bool cur_msg_mapped; + bool last_cs_enable; + bool last_cs_mode_high; + bool fallback; struct completion xfer_completion; size_t max_dma_len; @@ -841,12 +847,8 @@ extern void spi_res_release(struct spi_controller *ctlr, * processed the word, i.e. the "pre" timestamp should be taken before * transmitting the "pre" word, and the "post" timestamp after receiving * transmit confirmation from the controller for the "post" word. - * @timestamped_pre: Set by the SPI controller driver to denote it has acted - * upon the @ptp_sts request. Not set when the SPI core has taken care of - * the task. SPI device drivers are free to print a warning if this comes - * back unset and they need the better resolution. - * @timestamped_post: See above. The reason why both exist is that these - * booleans are also used to keep state in the core SPI logic. + * @timestamped: true if the transfer has been timestamped + * @error: Error status logged by spi controller driver. * * SPI transfers always write the same number of bytes as they read. * Protocol drivers should always provide @rx_buf and/or @tx_buf. @@ -940,6 +942,9 @@ struct spi_transfer { bool timestamped; struct list_head transfer_list; + +#define SPI_TRANS_FAIL_NO_START BIT(0) + u16 error; }; /** @@ -962,7 +967,7 @@ struct spi_transfer { * each represented by a struct spi_transfer. The sequence is "atomic" * in the sense that no other spi_message may use that SPI bus until that * sequence completes. On some systems, many such sequences can execute as - * as single programmed DMA transfer. On all systems, these messages are + * a single programmed DMA transfer. On all systems, these messages are * queued, and might complete after transactions to other devices. Messages * sent to a given spi_device are always executed in FIFO order. * @@ -1225,7 +1230,7 @@ extern int spi_bus_unlock(struct spi_controller *ctlr); * * For more specific semantics see spi_sync(). * - * Return: Return: zero on success, else a negative error code. + * Return: zero on success, else a negative error code. */ static inline int spi_sync_transfer(struct spi_device *spi, struct spi_transfer *xfers, diff --git a/include/linux/spinlock.h b/include/linux/spinlock.h index d3770b3f9d9a..f2f12d746dbd 100644 --- a/include/linux/spinlock.h +++ b/include/linux/spinlock.h @@ -56,6 +56,7 @@ #include <linux/kernel.h> #include <linux/stringify.h> #include <linux/bottom_half.h> +#include <linux/lockdep.h> #include <asm/barrier.h> #include <asm/mmiowb.h> diff --git a/include/linux/spinlock_types.h b/include/linux/spinlock_types.h index 6102e6bff3ae..b981caafe8bf 100644 --- a/include/linux/spinlock_types.h +++ b/include/linux/spinlock_types.h @@ -15,7 +15,7 @@ # include <linux/spinlock_types_up.h> #endif -#include <linux/lockdep.h> +#include <linux/lockdep_types.h> typedef struct raw_spinlock { arch_spinlock_t raw_lock; diff --git a/include/linux/string_helpers.h b/include/linux/string_helpers.h index c28955132234..86f150c2a6b6 100644 --- a/include/linux/string_helpers.h +++ b/include/linux/string_helpers.h @@ -2,6 +2,7 @@ #ifndef _LINUX_STRING_HELPERS_H_ #define _LINUX_STRING_HELPERS_H_ +#include <linux/ctype.h> #include <linux/types.h> struct file; @@ -75,6 +76,20 @@ static inline int string_escape_str_any_np(const char *src, char *dst, return string_escape_str(src, dst, sz, ESCAPE_ANY_NP, only); } +static inline void string_upper(char *dst, const char *src) +{ + do { + *dst++ = toupper(*src); + } while (*src++); +} + +static inline void string_lower(char *dst, const char *src) +{ + do { + *dst++ = tolower(*src); + } while (*src++); +} + char *kstrdup_quotable(const char *src, gfp_t gfp); char *kstrdup_quotable_cmdline(struct task_struct *task, gfp_t gfp); char *kstrdup_quotable_file(struct file *file, gfp_t gfp); diff --git a/include/linux/swap.h b/include/linux/swap.h index 4c5974bb9ba9..5b3216ba39a9 100644 --- a/include/linux/swap.h +++ b/include/linux/swap.h @@ -313,6 +313,7 @@ struct vma_swap_readahead { }; /* linux/mm/workingset.c */ +void workingset_age_nonresident(struct lruvec *lruvec, unsigned long nr_pages); void *workingset_eviction(struct page *page, struct mem_cgroup *target_memcg); void workingset_refault(struct page *page, void *shadow); void workingset_activation(struct page *page); diff --git a/include/linux/syscalls.h b/include/linux/syscalls.h index 7c354c2955f5..86d973509591 100644 --- a/include/linux/syscalls.h +++ b/include/linux/syscalls.h @@ -444,6 +444,8 @@ asmlinkage long sys_openat(int dfd, const char __user *filename, int flags, asmlinkage long sys_openat2(int dfd, const char __user *filename, struct open_how *how, size_t size); asmlinkage long sys_close(unsigned int fd); +asmlinkage long sys_close_range(unsigned int fd, unsigned int max_fd, + unsigned int flags); asmlinkage long sys_vhangup(void); /* fs/pipe.c */ @@ -1360,7 +1362,7 @@ static inline long ksys_lchown(const char __user *filename, uid_t user, extern long do_sys_ftruncate(unsigned int fd, loff_t length, int small); -static inline long ksys_ftruncate(unsigned int fd, unsigned long length) +static inline long ksys_ftruncate(unsigned int fd, loff_t length) { return do_sys_ftruncate(fd, length, 1); } diff --git a/include/linux/task_work.h b/include/linux/task_work.h index bd9a6a91c097..0fb93aafa478 100644 --- a/include/linux/task_work.h +++ b/include/linux/task_work.h @@ -13,7 +13,10 @@ init_task_work(struct callback_head *twork, task_work_func_t func) twork->func = func; } -int task_work_add(struct task_struct *task, struct callback_head *twork, bool); +#define TWA_RESUME 1 +#define TWA_SIGNAL 2 +int task_work_add(struct task_struct *task, struct callback_head *twork, int); + struct callback_head *task_work_cancel(struct task_struct *, task_work_func_t); void task_work_run(void); diff --git a/include/linux/tboot.h b/include/linux/tboot.h index c7e424766360..5146d2574e85 100644 --- a/include/linux/tboot.h +++ b/include/linux/tboot.h @@ -44,7 +44,7 @@ struct tboot_acpi_generic_address { /* * combines Sx info from FADT and FACS tables per ACPI 2.0+ spec - * (http://www.acpi.info/) + * (https://uefi.org/specifications) */ struct tboot_acpi_sleep_info { struct tboot_acpi_generic_address pm1a_cnt_blk; diff --git a/include/linux/tcp.h b/include/linux/tcp.h index 9aac824c523c..a1bbaa1c1a3a 100644 --- a/include/linux/tcp.h +++ b/include/linux/tcp.h @@ -220,7 +220,9 @@ struct tcp_sock { } rack; u16 advmss; /* Advertised MSS */ u8 compressed_ack; - u8 dup_ack_counter; + u8 dup_ack_counter:2, + tlp_retrans:1, /* TLP is a retransmission */ + unused:5; u32 chrono_start; /* Start time in jiffies of a TCP chrono */ u32 chrono_stat[3]; /* Time in jiffies for chrono_stat stats */ u8 chrono_type:2, /* current chronograph type */ @@ -243,7 +245,7 @@ struct tcp_sock { save_syn:1, /* Save headers of SYN packet */ is_cwnd_limited:1,/* forward progress limited by snd_cwnd? */ syn_smc:1; /* SYN includes SMC */ - u32 tlp_high_seq; /* snd_nxt at the time of TLP retransmit. */ + u32 tlp_high_seq; /* snd_nxt at the time of TLP */ u32 tcp_tx_delay; /* delay (in usec) added to TX packets */ u64 tcp_wstamp_ns; /* departure time for next sent data packet */ diff --git a/include/linux/thermal.h b/include/linux/thermal.h index c91b1e344d56..216185bb3014 100644 --- a/include/linux/thermal.h +++ b/include/linux/thermal.h @@ -32,20 +32,10 @@ /* use value, which < 0K, to indicate an invalid/uninitialized temperature */ #define THERMAL_TEMP_INVALID -274000 -/* Default Thermal Governor */ -#if defined(CONFIG_THERMAL_DEFAULT_GOV_STEP_WISE) -#define DEFAULT_THERMAL_GOVERNOR "step_wise" -#elif defined(CONFIG_THERMAL_DEFAULT_GOV_FAIR_SHARE) -#define DEFAULT_THERMAL_GOVERNOR "fair_share" -#elif defined(CONFIG_THERMAL_DEFAULT_GOV_USER_SPACE) -#define DEFAULT_THERMAL_GOVERNOR "user_space" -#elif defined(CONFIG_THERMAL_DEFAULT_GOV_POWER_ALLOCATOR) -#define DEFAULT_THERMAL_GOVERNOR "power_allocator" -#endif - struct thermal_zone_device; struct thermal_cooling_device; struct thermal_instance; +struct thermal_attr; enum thermal_device_mode { THERMAL_DEVICE_DISABLED = 0, @@ -130,11 +120,6 @@ struct thermal_cooling_device { struct list_head node; }; -struct thermal_attr { - struct device_attribute attr; - char name[THERMAL_NAME_LENGTH]; -}; - /** * struct thermal_zone_device - structure for a thermal zone * @id: unique id number for each thermal zone @@ -347,21 +332,6 @@ struct thermal_zone_of_device_ops { int (*set_trip_temp)(void *, int, int); }; -/** - * struct thermal_trip - representation of a point in temperature domain - * @np: pointer to struct device_node that this trip point was created from - * @temperature: temperature value in miliCelsius - * @hysteresis: relative hysteresis in miliCelsius - * @type: trip point type - */ - -struct thermal_trip { - struct device_node *np; - int temperature; - int hysteresis; - enum thermal_trip_type type; -}; - /* Function declarations */ #ifdef CONFIG_THERMAL_OF int thermal_zone_of_get_sensor_id(struct device_node *tz_np, @@ -413,19 +383,7 @@ void devm_thermal_zone_of_sensor_unregister(struct device *dev, #endif -#if IS_ENABLED(CONFIG_THERMAL) -static inline bool cdev_is_power_actor(struct thermal_cooling_device *cdev) -{ - return cdev->ops->get_requested_power && cdev->ops->state2power && - cdev->ops->power2state; -} - -int power_actor_get_max_power(struct thermal_cooling_device *, - struct thermal_zone_device *tz, u32 *max_power); -int power_actor_get_min_power(struct thermal_cooling_device *, - struct thermal_zone_device *tz, u32 *min_power); -int power_actor_set_power(struct thermal_cooling_device *, - struct thermal_instance *, u32); +#ifdef CONFIG_THERMAL struct thermal_zone_device *thermal_zone_device_register(const char *, int, int, void *, struct thermal_zone_device_ops *, struct thermal_zone_params *, int, int); @@ -439,7 +397,6 @@ int thermal_zone_unbind_cooling_device(struct thermal_zone_device *, int, struct thermal_cooling_device *); void thermal_zone_device_update(struct thermal_zone_device *, enum thermal_notify_event); -void thermal_zone_set_trips(struct thermal_zone_device *); struct thermal_cooling_device *thermal_cooling_device_register(const char *, void *, const struct thermal_cooling_device_ops *); @@ -457,24 +414,9 @@ int thermal_zone_get_temp(struct thermal_zone_device *tz, int *temp); int thermal_zone_get_slope(struct thermal_zone_device *tz); int thermal_zone_get_offset(struct thermal_zone_device *tz); -int get_tz_trend(struct thermal_zone_device *, int); -struct thermal_instance *get_thermal_instance(struct thermal_zone_device *, - struct thermal_cooling_device *, int); void thermal_cdev_update(struct thermal_cooling_device *); void thermal_notify_framework(struct thermal_zone_device *, int); #else -static inline bool cdev_is_power_actor(struct thermal_cooling_device *cdev) -{ return false; } -static inline int power_actor_get_max_power(struct thermal_cooling_device *cdev, - struct thermal_zone_device *tz, u32 *max_power) -{ return 0; } -static inline int power_actor_get_min_power(struct thermal_cooling_device *cdev, - struct thermal_zone_device *tz, - u32 *min_power) -{ return -ENODEV; } -static inline int power_actor_set_power(struct thermal_cooling_device *cdev, - struct thermal_instance *tz, u32 power) -{ return 0; } static inline struct thermal_zone_device *thermal_zone_device_register( const char *type, int trips, int mask, void *devdata, struct thermal_zone_device_ops *ops, @@ -484,21 +426,6 @@ static inline struct thermal_zone_device *thermal_zone_device_register( static inline void thermal_zone_device_unregister( struct thermal_zone_device *tz) { } -static inline int thermal_zone_bind_cooling_device( - struct thermal_zone_device *tz, int trip, - struct thermal_cooling_device *cdev, - unsigned long upper, unsigned long lower, - unsigned int weight) -{ return -ENODEV; } -static inline int thermal_zone_unbind_cooling_device( - struct thermal_zone_device *tz, int trip, - struct thermal_cooling_device *cdev) -{ return -ENODEV; } -static inline void thermal_zone_device_update(struct thermal_zone_device *tz, - enum thermal_notify_event event) -{ } -static inline void thermal_zone_set_trips(struct thermal_zone_device *tz) -{ } static inline struct thermal_cooling_device * thermal_cooling_device_register(char *type, void *devdata, const struct thermal_cooling_device_ops *ops) @@ -530,12 +457,7 @@ static inline int thermal_zone_get_slope( static inline int thermal_zone_get_offset( struct thermal_zone_device *tz) { return -ENODEV; } -static inline int get_tz_trend(struct thermal_zone_device *tz, int trip) -{ return -ENODEV; } -static inline struct thermal_instance * -get_thermal_instance(struct thermal_zone_device *tz, - struct thermal_cooling_device *cdev, int trip) -{ return ERR_PTR(-ENODEV); } + static inline void thermal_cdev_update(struct thermal_cooling_device *cdev) { } static inline void thermal_notify_framework(struct thermal_zone_device *tz, diff --git a/include/linux/tifm.h b/include/linux/tifm.h index 299cbb8c63bb..44073d06710f 100644 --- a/include/linux/tifm.h +++ b/include/linux/tifm.h @@ -124,7 +124,7 @@ struct tifm_adapter { int (*has_ms_pif)(struct tifm_adapter *fm, struct tifm_dev *sock); - struct tifm_dev *sockets[0]; + struct tifm_dev *sockets[]; }; struct tifm_adapter *tifm_alloc_adapter(unsigned int num_sockets, diff --git a/include/linux/time_namespace.h b/include/linux/time_namespace.h index 824d54e057eb..5b6031385db0 100644 --- a/include/linux/time_namespace.h +++ b/include/linux/time_namespace.h @@ -33,6 +33,7 @@ extern struct time_namespace init_time_ns; #ifdef CONFIG_TIME_NS extern int vdso_join_timens(struct task_struct *task, struct time_namespace *ns); +extern void timens_commit(struct task_struct *tsk, struct time_namespace *ns); static inline struct time_namespace *get_time_ns(struct time_namespace *ns) { @@ -96,6 +97,11 @@ static inline int vdso_join_timens(struct task_struct *task, return 0; } +static inline void timens_commit(struct task_struct *tsk, + struct time_namespace *ns) +{ +} + static inline struct time_namespace *get_time_ns(struct time_namespace *ns) { return NULL; diff --git a/include/linux/timekeeping.h b/include/linux/timekeeping.h index b27e2ffa96c1..d5471d6fa778 100644 --- a/include/linux/timekeeping.h +++ b/include/linux/timekeeping.h @@ -222,9 +222,9 @@ extern bool timekeeping_rtc_skipresume(void); extern void timekeeping_inject_sleeptime64(const struct timespec64 *delta); -/* +/** * struct system_time_snapshot - simultaneous raw/real time capture with - * counter value + * counter value * @cycles: Clocksource counter value to produce the system times * @real: Realtime system time * @raw: Monotonic raw system time @@ -239,9 +239,9 @@ struct system_time_snapshot { u8 cs_was_changed_seq; }; -/* +/** * struct system_device_crosststamp - system/device cross-timestamp - * (syncronized capture) + * (synchronized capture) * @device: Device time * @sys_realtime: Realtime simultaneous with device time * @sys_monoraw: Monotonic raw simultaneous with device time @@ -252,12 +252,12 @@ struct system_device_crosststamp { ktime_t sys_monoraw; }; -/* +/** * struct system_counterval_t - system counter value with the pointer to the - * corresponding clocksource + * corresponding clocksource * @cycles: System counter value * @cs: Clocksource corresponding to system counter value. Used by - * timekeeping code to verify comparibility of two cycle values + * timekeeping code to verify comparibility of two cycle values */ struct system_counterval_t { u64 cycles; diff --git a/include/linux/torture.h b/include/linux/torture.h index 629b66e6c161..7f65bd1dd307 100644 --- a/include/linux/torture.h +++ b/include/linux/torture.h @@ -55,6 +55,11 @@ struct torture_random_state { #define DEFINE_TORTURE_RANDOM_PERCPU(name) \ DEFINE_PER_CPU(struct torture_random_state, name) unsigned long torture_random(struct torture_random_state *trsp); +static inline void torture_random_init(struct torture_random_state *trsp) +{ + trsp->trs_state = 0; + trsp->trs_count = 0; +} /* Task shuffler, which causes CPUs to occasionally go idle. */ void torture_shuffle_task_register(struct task_struct *tp); diff --git a/include/linux/tpm.h b/include/linux/tpm.h index 03e9b184411b..8f4ff39f51e7 100644 --- a/include/linux/tpm.h +++ b/include/linux/tpm.h @@ -96,6 +96,7 @@ struct tpm_space { u8 *context_buf; u32 session_tbl[3]; u8 *session_buf; + u32 buf_size; }; struct tpm_bios_log { diff --git a/include/linux/tpm_eventlog.h b/include/linux/tpm_eventlog.h index 4f8c90c93c29..739ba9a03ec1 100644 --- a/include/linux/tpm_eventlog.h +++ b/include/linux/tpm_eventlog.h @@ -81,6 +81,8 @@ struct tcg_efi_specid_event_algs { u16 digest_size; } __packed; +#define TCG_SPECID_SIG "Spec ID Event03" + struct tcg_efi_specid_event_head { u8 signature[16]; u32 platform_class; @@ -171,6 +173,7 @@ static inline int __calc_tpm2_event_size(struct tcg_pcr_event2_head *event, int i; int j; u32 count, event_type; + const u8 zero_digest[sizeof(event_header->digest)] = {0}; marker = event; marker_start = marker; @@ -198,10 +201,26 @@ static inline int __calc_tpm2_event_size(struct tcg_pcr_event2_head *event, count = READ_ONCE(event->count); event_type = READ_ONCE(event->event_type); + /* Verify that it's the log header */ + if (event_header->pcr_idx != 0 || + event_header->event_type != NO_ACTION || + memcmp(event_header->digest, zero_digest, sizeof(zero_digest))) { + size = 0; + goto out; + } + efispecid = (struct tcg_efi_specid_event_head *)event_header->event; - /* Check if event is malformed. */ - if (count > efispecid->num_algs) { + /* + * Perform validation of the event in order to identify malformed + * events. This function may be asked to parse arbitrary byte sequences + * immediately following a valid event log. The caller expects this + * function to recognize that the byte sequence is not a valid event + * and to return an event size of 0. + */ + if (memcmp(efispecid->signature, TCG_SPECID_SIG, + sizeof(TCG_SPECID_SIG)) || + !efispecid->num_algs || count != efispecid->num_algs) { size = 0; goto out; } diff --git a/include/linux/types.h b/include/linux/types.h index d3021c879179..a147977602b5 100644 --- a/include/linux/types.h +++ b/include/linux/types.h @@ -167,6 +167,8 @@ typedef struct { int counter; } atomic_t; +#define ATOMIC_INIT(i) { (i) } + #ifdef CONFIG_64BIT typedef struct { s64 counter; diff --git a/include/linux/uaccess.h b/include/linux/uaccess.h index 7bcadca22100..0a76ddc07d59 100644 --- a/include/linux/uaccess.h +++ b/include/linux/uaccess.h @@ -301,13 +301,14 @@ copy_struct_from_user(void *dst, size_t ksize, const void __user *src, return 0; } -bool probe_kernel_read_allowed(const void *unsafe_src, size_t size); +bool copy_from_kernel_nofault_allowed(const void *unsafe_src, size_t size); -extern long probe_kernel_read(void *dst, const void *src, size_t size); -extern long probe_user_read(void *dst, const void __user *src, size_t size); +long copy_from_kernel_nofault(void *dst, const void *src, size_t size); +long notrace copy_to_kernel_nofault(void *dst, const void *src, size_t size); -extern long notrace probe_kernel_write(void *dst, const void *src, size_t size); -extern long notrace probe_user_write(void __user *dst, const void *src, size_t size); +long copy_from_user_nofault(void *dst, const void __user *src, size_t size); +long notrace copy_to_user_nofault(void __user *dst, const void *src, + size_t size); long strncpy_from_kernel_nofault(char *dst, const void *unsafe_addr, long count); @@ -317,14 +318,16 @@ long strncpy_from_user_nofault(char *dst, const void __user *unsafe_addr, long strnlen_user_nofault(const void __user *unsafe_addr, long count); /** - * probe_kernel_address(): safely attempt to read from a location - * @addr: address to read from - * @retval: read into this variable + * get_kernel_nofault(): safely attempt to read from a location + * @val: read into this variable + * @ptr: address to read from * * Returns 0 on success, or -EFAULT. */ -#define probe_kernel_address(addr, retval) \ - probe_kernel_read(&retval, addr, sizeof(retval)) +#define get_kernel_nofault(val, ptr) ({ \ + const typeof(val) *__gk_ptr = (ptr); \ + copy_from_kernel_nofault(&(val), __gk_ptr, sizeof(val));\ +}) #ifndef user_access_begin #define user_access_begin(ptr,len) access_ok(ptr, len) diff --git a/include/linux/umh.h b/include/linux/umh.h index 0c08de356d0d..244aff638220 100644 --- a/include/linux/umh.h +++ b/include/linux/umh.h @@ -22,10 +22,8 @@ struct subprocess_info { const char *path; char **argv; char **envp; - struct file *file; int wait; int retval; - pid_t pid; int (*init)(struct subprocess_info *info, struct cred *new); void (*cleanup)(struct subprocess_info *info); void *data; @@ -40,19 +38,6 @@ call_usermodehelper_setup(const char *path, char **argv, char **envp, int (*init)(struct subprocess_info *info, struct cred *new), void (*cleanup)(struct subprocess_info *), void *data); -struct subprocess_info *call_usermodehelper_setup_file(struct file *file, - int (*init)(struct subprocess_info *info, struct cred *new), - void (*cleanup)(struct subprocess_info *), void *data); -struct umh_info { - const char *cmdline; - struct file *pipe_to_umh; - struct file *pipe_from_umh; - struct list_head list; - void (*cleanup)(struct umh_info *info); - pid_t pid; -}; -int fork_usermode_blob(void *data, size_t len, struct umh_info *info); - extern int call_usermodehelper_exec(struct subprocess_info *info, int wait); diff --git a/include/linux/usermode_driver.h b/include/linux/usermode_driver.h new file mode 100644 index 000000000000..073a9e0ec07d --- /dev/null +++ b/include/linux/usermode_driver.h @@ -0,0 +1,18 @@ +#ifndef __LINUX_USERMODE_DRIVER_H__ +#define __LINUX_USERMODE_DRIVER_H__ + +#include <linux/umh.h> +#include <linux/path.h> + +struct umd_info { + const char *driver_name; + struct file *pipe_to_umh; + struct file *pipe_from_umh; + struct path wd; + struct pid *tgid; +}; +int umd_load_blob(struct umd_info *info, const void *data, size_t len); +int umd_unload_blob(struct umd_info *info); +int fork_usermode_driver(struct umd_info *info); + +#endif /* __LINUX_USERMODE_DRIVER_H__ */ diff --git a/include/linux/uuid.h b/include/linux/uuid.h index d41b0d3e9474..8cdc0d3567cd 100644 --- a/include/linux/uuid.h +++ b/include/linux/uuid.h @@ -98,8 +98,6 @@ int guid_parse(const char *uuid, guid_t *u); int uuid_parse(const char *uuid, uuid_t *u); /* backwards compatibility, don't use in new code */ -#define uuid_le_to_bin(guid, u) guid_parse(guid, u) - static inline int uuid_le_cmp(const guid_t u1, const guid_t u2) { return memcmp(&u1, &u2, sizeof(guid_t)); diff --git a/include/linux/vmalloc.h b/include/linux/vmalloc.h index 48bb681e6c2a..0221f852a7e1 100644 --- a/include/linux/vmalloc.h +++ b/include/linux/vmalloc.h @@ -106,7 +106,6 @@ extern void *vzalloc(unsigned long size); extern void *vmalloc_user(unsigned long size); extern void *vmalloc_node(unsigned long size, int node); extern void *vzalloc_node(unsigned long size, int node); -extern void *vmalloc_exec(unsigned long size); extern void *vmalloc_32(unsigned long size); extern void *vmalloc_32_user(unsigned long size); extern void *__vmalloc(unsigned long size, gfp_t gfp_mask); diff --git a/include/linux/watch_queue.h b/include/linux/watch_queue.h new file mode 100644 index 000000000000..5e08db2adc31 --- /dev/null +++ b/include/linux/watch_queue.h @@ -0,0 +1,127 @@ +// SPDX-License-Identifier: GPL-2.0 +/* User-mappable watch queue + * + * Copyright (C) 2020 Red Hat, Inc. All Rights Reserved. + * Written by David Howells (dhowells@redhat.com) + * + * See Documentation/watch_queue.rst + */ + +#ifndef _LINUX_WATCH_QUEUE_H +#define _LINUX_WATCH_QUEUE_H + +#include <uapi/linux/watch_queue.h> +#include <linux/kref.h> +#include <linux/rcupdate.h> + +#ifdef CONFIG_WATCH_QUEUE + +struct cred; + +struct watch_type_filter { + enum watch_notification_type type; + __u32 subtype_filter[1]; /* Bitmask of subtypes to filter on */ + __u32 info_filter; /* Filter on watch_notification::info */ + __u32 info_mask; /* Mask of relevant bits in info_filter */ +}; + +struct watch_filter { + union { + struct rcu_head rcu; + unsigned long type_filter[2]; /* Bitmask of accepted types */ + }; + u32 nr_filters; /* Number of filters */ + struct watch_type_filter filters[]; +}; + +struct watch_queue { + struct rcu_head rcu; + struct watch_filter __rcu *filter; + struct pipe_inode_info *pipe; /* The pipe we're using as a buffer */ + struct hlist_head watches; /* Contributory watches */ + struct page **notes; /* Preallocated notifications */ + unsigned long *notes_bitmap; /* Allocation bitmap for notes */ + struct kref usage; /* Object usage count */ + spinlock_t lock; + unsigned int nr_notes; /* Number of notes */ + unsigned int nr_pages; /* Number of pages in notes[] */ + bool defunct; /* T when queues closed */ +}; + +/* + * Representation of a watch on an object. + */ +struct watch { + union { + struct rcu_head rcu; + u32 info_id; /* ID to be OR'd in to info field */ + }; + struct watch_queue __rcu *queue; /* Queue to post events to */ + struct hlist_node queue_node; /* Link in queue->watches */ + struct watch_list __rcu *watch_list; + struct hlist_node list_node; /* Link in watch_list->watchers */ + const struct cred *cred; /* Creds of the owner of the watch */ + void *private; /* Private data for the watched object */ + u64 id; /* Internal identifier */ + struct kref usage; /* Object usage count */ +}; + +/* + * List of watches on an object. + */ +struct watch_list { + struct rcu_head rcu; + struct hlist_head watchers; + void (*release_watch)(struct watch *); + spinlock_t lock; +}; + +extern void __post_watch_notification(struct watch_list *, + struct watch_notification *, + const struct cred *, + u64); +extern struct watch_queue *get_watch_queue(int); +extern void put_watch_queue(struct watch_queue *); +extern void init_watch(struct watch *, struct watch_queue *); +extern int add_watch_to_object(struct watch *, struct watch_list *); +extern int remove_watch_from_object(struct watch_list *, struct watch_queue *, u64, bool); +extern long watch_queue_set_size(struct pipe_inode_info *, unsigned int); +extern long watch_queue_set_filter(struct pipe_inode_info *, + struct watch_notification_filter __user *); +extern int watch_queue_init(struct pipe_inode_info *); +extern void watch_queue_clear(struct watch_queue *); + +static inline void init_watch_list(struct watch_list *wlist, + void (*release_watch)(struct watch *)) +{ + INIT_HLIST_HEAD(&wlist->watchers); + spin_lock_init(&wlist->lock); + wlist->release_watch = release_watch; +} + +static inline void post_watch_notification(struct watch_list *wlist, + struct watch_notification *n, + const struct cred *cred, + u64 id) +{ + if (unlikely(wlist)) + __post_watch_notification(wlist, n, cred, id); +} + +static inline void remove_watch_list(struct watch_list *wlist, u64 id) +{ + if (wlist) { + remove_watch_from_object(wlist, NULL, id, true); + kfree_rcu(wlist, rcu); + } +} + +/** + * watch_sizeof - Calculate the information part of the size of a watch record, + * given the structure size. + */ +#define watch_sizeof(STRUCT) (sizeof(STRUCT) << WATCH_INFO_LENGTH__SHIFT) + +#endif + +#endif /* _LINUX_WATCH_QUEUE_H */ diff --git a/include/linux/xattr.h b/include/linux/xattr.h index 47eaa34f8761..c5afaf8ca7a2 100644 --- a/include/linux/xattr.h +++ b/include/linux/xattr.h @@ -15,6 +15,7 @@ #include <linux/slab.h> #include <linux/types.h> #include <linux/spinlock.h> +#include <linux/mm.h> #include <uapi/linux/xattr.h> struct inode; @@ -94,7 +95,7 @@ static inline void simple_xattrs_free(struct simple_xattrs *xattrs) list_for_each_entry_safe(xattr, node, &xattrs->head, list) { kfree(xattr->name); - kfree(xattr); + kvfree(xattr); } } |