diff options
Diffstat (limited to 'include/linux')
180 files changed, 3854 insertions, 1438 deletions
diff --git a/include/linux/amd-iommu.h b/include/linux/amd-iommu.h index dc7ed2f46886..2b90c48a6a87 100644 --- a/include/linux/amd-iommu.h +++ b/include/linux/amd-iommu.h @@ -85,8 +85,10 @@ int amd_iommu_pc_get_reg(struct amd_iommu *iommu, u8 bank, u8 cntr, u8 fxn, u64 *value); struct amd_iommu *get_amd_iommu(unsigned int idx); -#ifdef CONFIG_AMD_MEM_ENCRYPT -int amd_iommu_snp_enable(void); +#ifdef CONFIG_KVM_AMD_SEV +int amd_iommu_snp_disable(void); +#else +static inline int amd_iommu_snp_disable(void) { return 0; } #endif #endif /* _ASM_X86_AMD_IOMMU_H */ diff --git a/include/linux/arm_ffa.h b/include/linux/arm_ffa.h index 3d0fde57ba90..c906f666ff5d 100644 --- a/include/linux/arm_ffa.h +++ b/include/linux/arm_ffa.h @@ -209,7 +209,7 @@ bool ffa_device_is_valid(struct ffa_device *ffa_dev) { return false; } #define module_ffa_driver(__ffa_driver) \ module_driver(__ffa_driver, ffa_register, ffa_unregister) -extern struct bus_type ffa_bus_type; +extern const struct bus_type ffa_bus_type; /* FFA transport related */ struct ffa_partition_info { diff --git a/include/linux/async.h b/include/linux/async.h index 33c9ff4afb49..19b778d08600 100644 --- a/include/linux/async.h +++ b/include/linux/async.h @@ -120,4 +120,5 @@ extern void async_synchronize_cookie(async_cookie_t cookie); extern void async_synchronize_cookie_domain(async_cookie_t cookie, struct async_domain *domain); extern bool current_is_async(void); +extern void async_init(void); #endif diff --git a/include/linux/atomic/atomic-arch-fallback.h b/include/linux/atomic/atomic-arch-fallback.h index 5e95faa959c4..956bcba5dbf2 100644 --- a/include/linux/atomic/atomic-arch-fallback.h +++ b/include/linux/atomic/atomic-arch-fallback.h @@ -2005,6 +2005,7 @@ raw_atomic_xchg_relaxed(atomic_t *v, int new) * @new: int value to assign * * If (@v == @old), atomically updates @v to @new with full ordering. + * Otherwise, @v is not modified and relaxed ordering is provided. * * Safe to use in noinstr code; prefer atomic_cmpxchg() elsewhere. * @@ -2033,6 +2034,7 @@ raw_atomic_cmpxchg(atomic_t *v, int old, int new) * @new: int value to assign * * If (@v == @old), atomically updates @v to @new with acquire ordering. + * Otherwise, @v is not modified and relaxed ordering is provided. * * Safe to use in noinstr code; prefer atomic_cmpxchg_acquire() elsewhere. * @@ -2061,6 +2063,7 @@ raw_atomic_cmpxchg_acquire(atomic_t *v, int old, int new) * @new: int value to assign * * If (@v == @old), atomically updates @v to @new with release ordering. + * Otherwise, @v is not modified and relaxed ordering is provided. * * Safe to use in noinstr code; prefer atomic_cmpxchg_release() elsewhere. * @@ -2088,6 +2091,7 @@ raw_atomic_cmpxchg_release(atomic_t *v, int old, int new) * @new: int value to assign * * If (@v == @old), atomically updates @v to @new with relaxed ordering. + * Otherwise, @v is not modified and relaxed ordering is provided. * * Safe to use in noinstr code; prefer atomic_cmpxchg_relaxed() elsewhere. * @@ -2112,7 +2116,8 @@ raw_atomic_cmpxchg_relaxed(atomic_t *v, int old, int new) * @new: int value to assign * * If (@v == @old), atomically updates @v to @new with full ordering. - * Otherwise, updates @old to the current value of @v. + * Otherwise, @v is not modified, @old is updated to the current value of @v, + * and relaxed ordering is provided. * * Safe to use in noinstr code; prefer atomic_try_cmpxchg() elsewhere. * @@ -2145,7 +2150,8 @@ raw_atomic_try_cmpxchg(atomic_t *v, int *old, int new) * @new: int value to assign * * If (@v == @old), atomically updates @v to @new with acquire ordering. - * Otherwise, updates @old to the current value of @v. + * Otherwise, @v is not modified, @old is updated to the current value of @v, + * and relaxed ordering is provided. * * Safe to use in noinstr code; prefer atomic_try_cmpxchg_acquire() elsewhere. * @@ -2178,7 +2184,8 @@ raw_atomic_try_cmpxchg_acquire(atomic_t *v, int *old, int new) * @new: int value to assign * * If (@v == @old), atomically updates @v to @new with release ordering. - * Otherwise, updates @old to the current value of @v. + * Otherwise, @v is not modified, @old is updated to the current value of @v, + * and relaxed ordering is provided. * * Safe to use in noinstr code; prefer atomic_try_cmpxchg_release() elsewhere. * @@ -2210,7 +2217,8 @@ raw_atomic_try_cmpxchg_release(atomic_t *v, int *old, int new) * @new: int value to assign * * If (@v == @old), atomically updates @v to @new with relaxed ordering. - * Otherwise, updates @old to the current value of @v. + * Otherwise, @v is not modified, @old is updated to the current value of @v, + * and relaxed ordering is provided. * * Safe to use in noinstr code; prefer atomic_try_cmpxchg_relaxed() elsewhere. * @@ -2403,6 +2411,7 @@ raw_atomic_add_negative_relaxed(int i, atomic_t *v) * @u: int value to compare with * * If (@v != @u), atomically updates @v to (@v + @a) with full ordering. + * Otherwise, @v is not modified and relaxed ordering is provided. * * Safe to use in noinstr code; prefer atomic_fetch_add_unless() elsewhere. * @@ -2432,6 +2441,7 @@ raw_atomic_fetch_add_unless(atomic_t *v, int a, int u) * @u: int value to compare with * * If (@v != @u), atomically updates @v to (@v + @a) with full ordering. + * Otherwise, @v is not modified and relaxed ordering is provided. * * Safe to use in noinstr code; prefer atomic_add_unless() elsewhere. * @@ -2452,6 +2462,7 @@ raw_atomic_add_unless(atomic_t *v, int a, int u) * @v: pointer to atomic_t * * If (@v != 0), atomically updates @v to (@v + 1) with full ordering. + * Otherwise, @v is not modified and relaxed ordering is provided. * * Safe to use in noinstr code; prefer atomic_inc_not_zero() elsewhere. * @@ -2472,6 +2483,7 @@ raw_atomic_inc_not_zero(atomic_t *v) * @v: pointer to atomic_t * * If (@v >= 0), atomically updates @v to (@v + 1) with full ordering. + * Otherwise, @v is not modified and relaxed ordering is provided. * * Safe to use in noinstr code; prefer atomic_inc_unless_negative() elsewhere. * @@ -2499,6 +2511,7 @@ raw_atomic_inc_unless_negative(atomic_t *v) * @v: pointer to atomic_t * * If (@v <= 0), atomically updates @v to (@v - 1) with full ordering. + * Otherwise, @v is not modified and relaxed ordering is provided. * * Safe to use in noinstr code; prefer atomic_dec_unless_positive() elsewhere. * @@ -2526,6 +2539,7 @@ raw_atomic_dec_unless_positive(atomic_t *v) * @v: pointer to atomic_t * * If (@v > 0), atomically updates @v to (@v - 1) with full ordering. + * Otherwise, @v is not modified and relaxed ordering is provided. * * Safe to use in noinstr code; prefer atomic_dec_if_positive() elsewhere. * @@ -4117,6 +4131,7 @@ raw_atomic64_xchg_relaxed(atomic64_t *v, s64 new) * @new: s64 value to assign * * If (@v == @old), atomically updates @v to @new with full ordering. + * Otherwise, @v is not modified and relaxed ordering is provided. * * Safe to use in noinstr code; prefer atomic64_cmpxchg() elsewhere. * @@ -4145,6 +4160,7 @@ raw_atomic64_cmpxchg(atomic64_t *v, s64 old, s64 new) * @new: s64 value to assign * * If (@v == @old), atomically updates @v to @new with acquire ordering. + * Otherwise, @v is not modified and relaxed ordering is provided. * * Safe to use in noinstr code; prefer atomic64_cmpxchg_acquire() elsewhere. * @@ -4173,6 +4189,7 @@ raw_atomic64_cmpxchg_acquire(atomic64_t *v, s64 old, s64 new) * @new: s64 value to assign * * If (@v == @old), atomically updates @v to @new with release ordering. + * Otherwise, @v is not modified and relaxed ordering is provided. * * Safe to use in noinstr code; prefer atomic64_cmpxchg_release() elsewhere. * @@ -4200,6 +4217,7 @@ raw_atomic64_cmpxchg_release(atomic64_t *v, s64 old, s64 new) * @new: s64 value to assign * * If (@v == @old), atomically updates @v to @new with relaxed ordering. + * Otherwise, @v is not modified and relaxed ordering is provided. * * Safe to use in noinstr code; prefer atomic64_cmpxchg_relaxed() elsewhere. * @@ -4224,7 +4242,8 @@ raw_atomic64_cmpxchg_relaxed(atomic64_t *v, s64 old, s64 new) * @new: s64 value to assign * * If (@v == @old), atomically updates @v to @new with full ordering. - * Otherwise, updates @old to the current value of @v. + * Otherwise, @v is not modified, @old is updated to the current value of @v, + * and relaxed ordering is provided. * * Safe to use in noinstr code; prefer atomic64_try_cmpxchg() elsewhere. * @@ -4257,7 +4276,8 @@ raw_atomic64_try_cmpxchg(atomic64_t *v, s64 *old, s64 new) * @new: s64 value to assign * * If (@v == @old), atomically updates @v to @new with acquire ordering. - * Otherwise, updates @old to the current value of @v. + * Otherwise, @v is not modified, @old is updated to the current value of @v, + * and relaxed ordering is provided. * * Safe to use in noinstr code; prefer atomic64_try_cmpxchg_acquire() elsewhere. * @@ -4290,7 +4310,8 @@ raw_atomic64_try_cmpxchg_acquire(atomic64_t *v, s64 *old, s64 new) * @new: s64 value to assign * * If (@v == @old), atomically updates @v to @new with release ordering. - * Otherwise, updates @old to the current value of @v. + * Otherwise, @v is not modified, @old is updated to the current value of @v, + * and relaxed ordering is provided. * * Safe to use in noinstr code; prefer atomic64_try_cmpxchg_release() elsewhere. * @@ -4322,7 +4343,8 @@ raw_atomic64_try_cmpxchg_release(atomic64_t *v, s64 *old, s64 new) * @new: s64 value to assign * * If (@v == @old), atomically updates @v to @new with relaxed ordering. - * Otherwise, updates @old to the current value of @v. + * Otherwise, @v is not modified, @old is updated to the current value of @v, + * and relaxed ordering is provided. * * Safe to use in noinstr code; prefer atomic64_try_cmpxchg_relaxed() elsewhere. * @@ -4515,6 +4537,7 @@ raw_atomic64_add_negative_relaxed(s64 i, atomic64_t *v) * @u: s64 value to compare with * * If (@v != @u), atomically updates @v to (@v + @a) with full ordering. + * Otherwise, @v is not modified and relaxed ordering is provided. * * Safe to use in noinstr code; prefer atomic64_fetch_add_unless() elsewhere. * @@ -4544,6 +4567,7 @@ raw_atomic64_fetch_add_unless(atomic64_t *v, s64 a, s64 u) * @u: s64 value to compare with * * If (@v != @u), atomically updates @v to (@v + @a) with full ordering. + * Otherwise, @v is not modified and relaxed ordering is provided. * * Safe to use in noinstr code; prefer atomic64_add_unless() elsewhere. * @@ -4564,6 +4588,7 @@ raw_atomic64_add_unless(atomic64_t *v, s64 a, s64 u) * @v: pointer to atomic64_t * * If (@v != 0), atomically updates @v to (@v + 1) with full ordering. + * Otherwise, @v is not modified and relaxed ordering is provided. * * Safe to use in noinstr code; prefer atomic64_inc_not_zero() elsewhere. * @@ -4584,6 +4609,7 @@ raw_atomic64_inc_not_zero(atomic64_t *v) * @v: pointer to atomic64_t * * If (@v >= 0), atomically updates @v to (@v + 1) with full ordering. + * Otherwise, @v is not modified and relaxed ordering is provided. * * Safe to use in noinstr code; prefer atomic64_inc_unless_negative() elsewhere. * @@ -4611,6 +4637,7 @@ raw_atomic64_inc_unless_negative(atomic64_t *v) * @v: pointer to atomic64_t * * If (@v <= 0), atomically updates @v to (@v - 1) with full ordering. + * Otherwise, @v is not modified and relaxed ordering is provided. * * Safe to use in noinstr code; prefer atomic64_dec_unless_positive() elsewhere. * @@ -4638,6 +4665,7 @@ raw_atomic64_dec_unless_positive(atomic64_t *v) * @v: pointer to atomic64_t * * If (@v > 0), atomically updates @v to (@v - 1) with full ordering. + * Otherwise, @v is not modified and relaxed ordering is provided. * * Safe to use in noinstr code; prefer atomic64_dec_if_positive() elsewhere. * @@ -4662,4 +4690,4 @@ raw_atomic64_dec_if_positive(atomic64_t *v) } #endif /* _LINUX_ATOMIC_FALLBACK_H */ -// eec048affea735b8464f58e6d96992101f8f85f1 +// 14850c0b0db20c62fdc78ccd1d42b98b88d76331 diff --git a/include/linux/atomic/atomic-instrumented.h b/include/linux/atomic/atomic-instrumented.h index 54d7bbe0aeaa..debd487fe971 100644 --- a/include/linux/atomic/atomic-instrumented.h +++ b/include/linux/atomic/atomic-instrumented.h @@ -1182,6 +1182,7 @@ atomic_xchg_relaxed(atomic_t *v, int new) * @new: int value to assign * * If (@v == @old), atomically updates @v to @new with full ordering. + * Otherwise, @v is not modified and relaxed ordering is provided. * * Unsafe to use in noinstr code; use raw_atomic_cmpxchg() there. * @@ -1202,6 +1203,7 @@ atomic_cmpxchg(atomic_t *v, int old, int new) * @new: int value to assign * * If (@v == @old), atomically updates @v to @new with acquire ordering. + * Otherwise, @v is not modified and relaxed ordering is provided. * * Unsafe to use in noinstr code; use raw_atomic_cmpxchg_acquire() there. * @@ -1221,6 +1223,7 @@ atomic_cmpxchg_acquire(atomic_t *v, int old, int new) * @new: int value to assign * * If (@v == @old), atomically updates @v to @new with release ordering. + * Otherwise, @v is not modified and relaxed ordering is provided. * * Unsafe to use in noinstr code; use raw_atomic_cmpxchg_release() there. * @@ -1241,6 +1244,7 @@ atomic_cmpxchg_release(atomic_t *v, int old, int new) * @new: int value to assign * * If (@v == @old), atomically updates @v to @new with relaxed ordering. + * Otherwise, @v is not modified and relaxed ordering is provided. * * Unsafe to use in noinstr code; use raw_atomic_cmpxchg_relaxed() there. * @@ -1260,7 +1264,8 @@ atomic_cmpxchg_relaxed(atomic_t *v, int old, int new) * @new: int value to assign * * If (@v == @old), atomically updates @v to @new with full ordering. - * Otherwise, updates @old to the current value of @v. + * Otherwise, @v is not modified, @old is updated to the current value of @v, + * and relaxed ordering is provided. * * Unsafe to use in noinstr code; use raw_atomic_try_cmpxchg() there. * @@ -1282,7 +1287,8 @@ atomic_try_cmpxchg(atomic_t *v, int *old, int new) * @new: int value to assign * * If (@v == @old), atomically updates @v to @new with acquire ordering. - * Otherwise, updates @old to the current value of @v. + * Otherwise, @v is not modified, @old is updated to the current value of @v, + * and relaxed ordering is provided. * * Unsafe to use in noinstr code; use raw_atomic_try_cmpxchg_acquire() there. * @@ -1303,7 +1309,8 @@ atomic_try_cmpxchg_acquire(atomic_t *v, int *old, int new) * @new: int value to assign * * If (@v == @old), atomically updates @v to @new with release ordering. - * Otherwise, updates @old to the current value of @v. + * Otherwise, @v is not modified, @old is updated to the current value of @v, + * and relaxed ordering is provided. * * Unsafe to use in noinstr code; use raw_atomic_try_cmpxchg_release() there. * @@ -1325,7 +1332,8 @@ atomic_try_cmpxchg_release(atomic_t *v, int *old, int new) * @new: int value to assign * * If (@v == @old), atomically updates @v to @new with relaxed ordering. - * Otherwise, updates @old to the current value of @v. + * Otherwise, @v is not modified, @old is updated to the current value of @v, + * and relaxed ordering is provided. * * Unsafe to use in noinstr code; use raw_atomic_try_cmpxchg_relaxed() there. * @@ -1475,6 +1483,7 @@ atomic_add_negative_relaxed(int i, atomic_t *v) * @u: int value to compare with * * If (@v != @u), atomically updates @v to (@v + @a) with full ordering. + * Otherwise, @v is not modified and relaxed ordering is provided. * * Unsafe to use in noinstr code; use raw_atomic_fetch_add_unless() there. * @@ -1495,6 +1504,7 @@ atomic_fetch_add_unless(atomic_t *v, int a, int u) * @u: int value to compare with * * If (@v != @u), atomically updates @v to (@v + @a) with full ordering. + * Otherwise, @v is not modified and relaxed ordering is provided. * * Unsafe to use in noinstr code; use raw_atomic_add_unless() there. * @@ -1513,6 +1523,7 @@ atomic_add_unless(atomic_t *v, int a, int u) * @v: pointer to atomic_t * * If (@v != 0), atomically updates @v to (@v + 1) with full ordering. + * Otherwise, @v is not modified and relaxed ordering is provided. * * Unsafe to use in noinstr code; use raw_atomic_inc_not_zero() there. * @@ -1531,6 +1542,7 @@ atomic_inc_not_zero(atomic_t *v) * @v: pointer to atomic_t * * If (@v >= 0), atomically updates @v to (@v + 1) with full ordering. + * Otherwise, @v is not modified and relaxed ordering is provided. * * Unsafe to use in noinstr code; use raw_atomic_inc_unless_negative() there. * @@ -1549,6 +1561,7 @@ atomic_inc_unless_negative(atomic_t *v) * @v: pointer to atomic_t * * If (@v <= 0), atomically updates @v to (@v - 1) with full ordering. + * Otherwise, @v is not modified and relaxed ordering is provided. * * Unsafe to use in noinstr code; use raw_atomic_dec_unless_positive() there. * @@ -1567,6 +1580,7 @@ atomic_dec_unless_positive(atomic_t *v) * @v: pointer to atomic_t * * If (@v > 0), atomically updates @v to (@v - 1) with full ordering. + * Otherwise, @v is not modified and relaxed ordering is provided. * * Unsafe to use in noinstr code; use raw_atomic_dec_if_positive() there. * @@ -2746,6 +2760,7 @@ atomic64_xchg_relaxed(atomic64_t *v, s64 new) * @new: s64 value to assign * * If (@v == @old), atomically updates @v to @new with full ordering. + * Otherwise, @v is not modified and relaxed ordering is provided. * * Unsafe to use in noinstr code; use raw_atomic64_cmpxchg() there. * @@ -2766,6 +2781,7 @@ atomic64_cmpxchg(atomic64_t *v, s64 old, s64 new) * @new: s64 value to assign * * If (@v == @old), atomically updates @v to @new with acquire ordering. + * Otherwise, @v is not modified and relaxed ordering is provided. * * Unsafe to use in noinstr code; use raw_atomic64_cmpxchg_acquire() there. * @@ -2785,6 +2801,7 @@ atomic64_cmpxchg_acquire(atomic64_t *v, s64 old, s64 new) * @new: s64 value to assign * * If (@v == @old), atomically updates @v to @new with release ordering. + * Otherwise, @v is not modified and relaxed ordering is provided. * * Unsafe to use in noinstr code; use raw_atomic64_cmpxchg_release() there. * @@ -2805,6 +2822,7 @@ atomic64_cmpxchg_release(atomic64_t *v, s64 old, s64 new) * @new: s64 value to assign * * If (@v == @old), atomically updates @v to @new with relaxed ordering. + * Otherwise, @v is not modified and relaxed ordering is provided. * * Unsafe to use in noinstr code; use raw_atomic64_cmpxchg_relaxed() there. * @@ -2824,7 +2842,8 @@ atomic64_cmpxchg_relaxed(atomic64_t *v, s64 old, s64 new) * @new: s64 value to assign * * If (@v == @old), atomically updates @v to @new with full ordering. - * Otherwise, updates @old to the current value of @v. + * Otherwise, @v is not modified, @old is updated to the current value of @v, + * and relaxed ordering is provided. * * Unsafe to use in noinstr code; use raw_atomic64_try_cmpxchg() there. * @@ -2846,7 +2865,8 @@ atomic64_try_cmpxchg(atomic64_t *v, s64 *old, s64 new) * @new: s64 value to assign * * If (@v == @old), atomically updates @v to @new with acquire ordering. - * Otherwise, updates @old to the current value of @v. + * Otherwise, @v is not modified, @old is updated to the current value of @v, + * and relaxed ordering is provided. * * Unsafe to use in noinstr code; use raw_atomic64_try_cmpxchg_acquire() there. * @@ -2867,7 +2887,8 @@ atomic64_try_cmpxchg_acquire(atomic64_t *v, s64 *old, s64 new) * @new: s64 value to assign * * If (@v == @old), atomically updates @v to @new with release ordering. - * Otherwise, updates @old to the current value of @v. + * Otherwise, @v is not modified, @old is updated to the current value of @v, + * and relaxed ordering is provided. * * Unsafe to use in noinstr code; use raw_atomic64_try_cmpxchg_release() there. * @@ -2889,7 +2910,8 @@ atomic64_try_cmpxchg_release(atomic64_t *v, s64 *old, s64 new) * @new: s64 value to assign * * If (@v == @old), atomically updates @v to @new with relaxed ordering. - * Otherwise, updates @old to the current value of @v. + * Otherwise, @v is not modified, @old is updated to the current value of @v, + * and relaxed ordering is provided. * * Unsafe to use in noinstr code; use raw_atomic64_try_cmpxchg_relaxed() there. * @@ -3039,6 +3061,7 @@ atomic64_add_negative_relaxed(s64 i, atomic64_t *v) * @u: s64 value to compare with * * If (@v != @u), atomically updates @v to (@v + @a) with full ordering. + * Otherwise, @v is not modified and relaxed ordering is provided. * * Unsafe to use in noinstr code; use raw_atomic64_fetch_add_unless() there. * @@ -3059,6 +3082,7 @@ atomic64_fetch_add_unless(atomic64_t *v, s64 a, s64 u) * @u: s64 value to compare with * * If (@v != @u), atomically updates @v to (@v + @a) with full ordering. + * Otherwise, @v is not modified and relaxed ordering is provided. * * Unsafe to use in noinstr code; use raw_atomic64_add_unless() there. * @@ -3077,6 +3101,7 @@ atomic64_add_unless(atomic64_t *v, s64 a, s64 u) * @v: pointer to atomic64_t * * If (@v != 0), atomically updates @v to (@v + 1) with full ordering. + * Otherwise, @v is not modified and relaxed ordering is provided. * * Unsafe to use in noinstr code; use raw_atomic64_inc_not_zero() there. * @@ -3095,6 +3120,7 @@ atomic64_inc_not_zero(atomic64_t *v) * @v: pointer to atomic64_t * * If (@v >= 0), atomically updates @v to (@v + 1) with full ordering. + * Otherwise, @v is not modified and relaxed ordering is provided. * * Unsafe to use in noinstr code; use raw_atomic64_inc_unless_negative() there. * @@ -3113,6 +3139,7 @@ atomic64_inc_unless_negative(atomic64_t *v) * @v: pointer to atomic64_t * * If (@v <= 0), atomically updates @v to (@v - 1) with full ordering. + * Otherwise, @v is not modified and relaxed ordering is provided. * * Unsafe to use in noinstr code; use raw_atomic64_dec_unless_positive() there. * @@ -3131,6 +3158,7 @@ atomic64_dec_unless_positive(atomic64_t *v) * @v: pointer to atomic64_t * * If (@v > 0), atomically updates @v to (@v - 1) with full ordering. + * Otherwise, @v is not modified and relaxed ordering is provided. * * Unsafe to use in noinstr code; use raw_atomic64_dec_if_positive() there. * @@ -4310,6 +4338,7 @@ atomic_long_xchg_relaxed(atomic_long_t *v, long new) * @new: long value to assign * * If (@v == @old), atomically updates @v to @new with full ordering. + * Otherwise, @v is not modified and relaxed ordering is provided. * * Unsafe to use in noinstr code; use raw_atomic_long_cmpxchg() there. * @@ -4330,6 +4359,7 @@ atomic_long_cmpxchg(atomic_long_t *v, long old, long new) * @new: long value to assign * * If (@v == @old), atomically updates @v to @new with acquire ordering. + * Otherwise, @v is not modified and relaxed ordering is provided. * * Unsafe to use in noinstr code; use raw_atomic_long_cmpxchg_acquire() there. * @@ -4349,6 +4379,7 @@ atomic_long_cmpxchg_acquire(atomic_long_t *v, long old, long new) * @new: long value to assign * * If (@v == @old), atomically updates @v to @new with release ordering. + * Otherwise, @v is not modified and relaxed ordering is provided. * * Unsafe to use in noinstr code; use raw_atomic_long_cmpxchg_release() there. * @@ -4369,6 +4400,7 @@ atomic_long_cmpxchg_release(atomic_long_t *v, long old, long new) * @new: long value to assign * * If (@v == @old), atomically updates @v to @new with relaxed ordering. + * Otherwise, @v is not modified and relaxed ordering is provided. * * Unsafe to use in noinstr code; use raw_atomic_long_cmpxchg_relaxed() there. * @@ -4388,7 +4420,8 @@ atomic_long_cmpxchg_relaxed(atomic_long_t *v, long old, long new) * @new: long value to assign * * If (@v == @old), atomically updates @v to @new with full ordering. - * Otherwise, updates @old to the current value of @v. + * Otherwise, @v is not modified, @old is updated to the current value of @v, + * and relaxed ordering is provided. * * Unsafe to use in noinstr code; use raw_atomic_long_try_cmpxchg() there. * @@ -4410,7 +4443,8 @@ atomic_long_try_cmpxchg(atomic_long_t *v, long *old, long new) * @new: long value to assign * * If (@v == @old), atomically updates @v to @new with acquire ordering. - * Otherwise, updates @old to the current value of @v. + * Otherwise, @v is not modified, @old is updated to the current value of @v, + * and relaxed ordering is provided. * * Unsafe to use in noinstr code; use raw_atomic_long_try_cmpxchg_acquire() there. * @@ -4431,7 +4465,8 @@ atomic_long_try_cmpxchg_acquire(atomic_long_t *v, long *old, long new) * @new: long value to assign * * If (@v == @old), atomically updates @v to @new with release ordering. - * Otherwise, updates @old to the current value of @v. + * Otherwise, @v is not modified, @old is updated to the current value of @v, + * and relaxed ordering is provided. * * Unsafe to use in noinstr code; use raw_atomic_long_try_cmpxchg_release() there. * @@ -4453,7 +4488,8 @@ atomic_long_try_cmpxchg_release(atomic_long_t *v, long *old, long new) * @new: long value to assign * * If (@v == @old), atomically updates @v to @new with relaxed ordering. - * Otherwise, updates @old to the current value of @v. + * Otherwise, @v is not modified, @old is updated to the current value of @v, + * and relaxed ordering is provided. * * Unsafe to use in noinstr code; use raw_atomic_long_try_cmpxchg_relaxed() there. * @@ -4603,6 +4639,7 @@ atomic_long_add_negative_relaxed(long i, atomic_long_t *v) * @u: long value to compare with * * If (@v != @u), atomically updates @v to (@v + @a) with full ordering. + * Otherwise, @v is not modified and relaxed ordering is provided. * * Unsafe to use in noinstr code; use raw_atomic_long_fetch_add_unless() there. * @@ -4623,6 +4660,7 @@ atomic_long_fetch_add_unless(atomic_long_t *v, long a, long u) * @u: long value to compare with * * If (@v != @u), atomically updates @v to (@v + @a) with full ordering. + * Otherwise, @v is not modified and relaxed ordering is provided. * * Unsafe to use in noinstr code; use raw_atomic_long_add_unless() there. * @@ -4641,6 +4679,7 @@ atomic_long_add_unless(atomic_long_t *v, long a, long u) * @v: pointer to atomic_long_t * * If (@v != 0), atomically updates @v to (@v + 1) with full ordering. + * Otherwise, @v is not modified and relaxed ordering is provided. * * Unsafe to use in noinstr code; use raw_atomic_long_inc_not_zero() there. * @@ -4659,6 +4698,7 @@ atomic_long_inc_not_zero(atomic_long_t *v) * @v: pointer to atomic_long_t * * If (@v >= 0), atomically updates @v to (@v + 1) with full ordering. + * Otherwise, @v is not modified and relaxed ordering is provided. * * Unsafe to use in noinstr code; use raw_atomic_long_inc_unless_negative() there. * @@ -4677,6 +4717,7 @@ atomic_long_inc_unless_negative(atomic_long_t *v) * @v: pointer to atomic_long_t * * If (@v <= 0), atomically updates @v to (@v - 1) with full ordering. + * Otherwise, @v is not modified and relaxed ordering is provided. * * Unsafe to use in noinstr code; use raw_atomic_long_dec_unless_positive() there. * @@ -4695,6 +4736,7 @@ atomic_long_dec_unless_positive(atomic_long_t *v) * @v: pointer to atomic_long_t * * If (@v > 0), atomically updates @v to (@v - 1) with full ordering. + * Otherwise, @v is not modified and relaxed ordering is provided. * * Unsafe to use in noinstr code; use raw_atomic_long_dec_if_positive() there. * @@ -5008,4 +5050,4 @@ atomic_long_dec_if_positive(atomic_long_t *v) #endif /* _LINUX_ATOMIC_INSTRUMENTED_H */ -// 2cc4bc990fef44d3836ec108f11b610f3f438184 +// ce5b65e0f1f8a276268b667194581d24bed219d4 diff --git a/include/linux/atomic/atomic-long.h b/include/linux/atomic/atomic-long.h index c82947170ddc..3ef844b3ab8a 100644 --- a/include/linux/atomic/atomic-long.h +++ b/include/linux/atomic/atomic-long.h @@ -1352,6 +1352,7 @@ raw_atomic_long_xchg_relaxed(atomic_long_t *v, long new) * @new: long value to assign * * If (@v == @old), atomically updates @v to @new with full ordering. + * Otherwise, @v is not modified and relaxed ordering is provided. * * Safe to use in noinstr code; prefer atomic_long_cmpxchg() elsewhere. * @@ -1374,6 +1375,7 @@ raw_atomic_long_cmpxchg(atomic_long_t *v, long old, long new) * @new: long value to assign * * If (@v == @old), atomically updates @v to @new with acquire ordering. + * Otherwise, @v is not modified and relaxed ordering is provided. * * Safe to use in noinstr code; prefer atomic_long_cmpxchg_acquire() elsewhere. * @@ -1396,6 +1398,7 @@ raw_atomic_long_cmpxchg_acquire(atomic_long_t *v, long old, long new) * @new: long value to assign * * If (@v == @old), atomically updates @v to @new with release ordering. + * Otherwise, @v is not modified and relaxed ordering is provided. * * Safe to use in noinstr code; prefer atomic_long_cmpxchg_release() elsewhere. * @@ -1418,6 +1421,7 @@ raw_atomic_long_cmpxchg_release(atomic_long_t *v, long old, long new) * @new: long value to assign * * If (@v == @old), atomically updates @v to @new with relaxed ordering. + * Otherwise, @v is not modified and relaxed ordering is provided. * * Safe to use in noinstr code; prefer atomic_long_cmpxchg_relaxed() elsewhere. * @@ -1440,7 +1444,8 @@ raw_atomic_long_cmpxchg_relaxed(atomic_long_t *v, long old, long new) * @new: long value to assign * * If (@v == @old), atomically updates @v to @new with full ordering. - * Otherwise, updates @old to the current value of @v. + * Otherwise, @v is not modified, @old is updated to the current value of @v, + * and relaxed ordering is provided. * * Safe to use in noinstr code; prefer atomic_long_try_cmpxchg() elsewhere. * @@ -1463,7 +1468,8 @@ raw_atomic_long_try_cmpxchg(atomic_long_t *v, long *old, long new) * @new: long value to assign * * If (@v == @old), atomically updates @v to @new with acquire ordering. - * Otherwise, updates @old to the current value of @v. + * Otherwise, @v is not modified, @old is updated to the current value of @v, + * and relaxed ordering is provided. * * Safe to use in noinstr code; prefer atomic_long_try_cmpxchg_acquire() elsewhere. * @@ -1486,7 +1492,8 @@ raw_atomic_long_try_cmpxchg_acquire(atomic_long_t *v, long *old, long new) * @new: long value to assign * * If (@v == @old), atomically updates @v to @new with release ordering. - * Otherwise, updates @old to the current value of @v. + * Otherwise, @v is not modified, @old is updated to the current value of @v, + * and relaxed ordering is provided. * * Safe to use in noinstr code; prefer atomic_long_try_cmpxchg_release() elsewhere. * @@ -1509,7 +1516,8 @@ raw_atomic_long_try_cmpxchg_release(atomic_long_t *v, long *old, long new) * @new: long value to assign * * If (@v == @old), atomically updates @v to @new with relaxed ordering. - * Otherwise, updates @old to the current value of @v. + * Otherwise, @v is not modified, @old is updated to the current value of @v, + * and relaxed ordering is provided. * * Safe to use in noinstr code; prefer atomic_long_try_cmpxchg_relaxed() elsewhere. * @@ -1677,6 +1685,7 @@ raw_atomic_long_add_negative_relaxed(long i, atomic_long_t *v) * @u: long value to compare with * * If (@v != @u), atomically updates @v to (@v + @a) with full ordering. + * Otherwise, @v is not modified and relaxed ordering is provided. * * Safe to use in noinstr code; prefer atomic_long_fetch_add_unless() elsewhere. * @@ -1699,6 +1708,7 @@ raw_atomic_long_fetch_add_unless(atomic_long_t *v, long a, long u) * @u: long value to compare with * * If (@v != @u), atomically updates @v to (@v + @a) with full ordering. + * Otherwise, @v is not modified and relaxed ordering is provided. * * Safe to use in noinstr code; prefer atomic_long_add_unless() elsewhere. * @@ -1719,6 +1729,7 @@ raw_atomic_long_add_unless(atomic_long_t *v, long a, long u) * @v: pointer to atomic_long_t * * If (@v != 0), atomically updates @v to (@v + 1) with full ordering. + * Otherwise, @v is not modified and relaxed ordering is provided. * * Safe to use in noinstr code; prefer atomic_long_inc_not_zero() elsewhere. * @@ -1739,6 +1750,7 @@ raw_atomic_long_inc_not_zero(atomic_long_t *v) * @v: pointer to atomic_long_t * * If (@v >= 0), atomically updates @v to (@v + 1) with full ordering. + * Otherwise, @v is not modified and relaxed ordering is provided. * * Safe to use in noinstr code; prefer atomic_long_inc_unless_negative() elsewhere. * @@ -1759,6 +1771,7 @@ raw_atomic_long_inc_unless_negative(atomic_long_t *v) * @v: pointer to atomic_long_t * * If (@v <= 0), atomically updates @v to (@v - 1) with full ordering. + * Otherwise, @v is not modified and relaxed ordering is provided. * * Safe to use in noinstr code; prefer atomic_long_dec_unless_positive() elsewhere. * @@ -1779,6 +1792,7 @@ raw_atomic_long_dec_unless_positive(atomic_long_t *v) * @v: pointer to atomic_long_t * * If (@v > 0), atomically updates @v to (@v - 1) with full ordering. + * Otherwise, @v is not modified and relaxed ordering is provided. * * Safe to use in noinstr code; prefer atomic_long_dec_if_positive() elsewhere. * @@ -1795,4 +1809,4 @@ raw_atomic_long_dec_if_positive(atomic_long_t *v) } #endif /* _LINUX_ATOMIC_LONG_H */ -// 4ef23f98c73cff96d239896175fd26b10b88899e +// 1c4a26fc77f345342953770ebe3c4d08e7ce2f9a diff --git a/include/linux/backing-dev-defs.h b/include/linux/backing-dev-defs.h index ae12696ec492..2ad261082bba 100644 --- a/include/linux/backing-dev-defs.h +++ b/include/linux/backing-dev-defs.h @@ -141,8 +141,6 @@ struct bdi_writeback { struct delayed_work dwork; /* work item used for writeback */ struct delayed_work bw_dwork; /* work item used for bandwidth estimate */ - unsigned long dirty_sleep; /* last wait */ - struct list_head bdi_node; /* anchored at bdi->wb_list */ #ifdef CONFIG_CGROUP_WRITEBACK @@ -179,6 +177,11 @@ struct backing_dev_info { * any dirty wbs, which is depended upon by bdi_has_dirty(). */ atomic_long_t tot_write_bandwidth; + /* + * Jiffies when last process was dirty throttled on this bdi. Used by + * blk-wbt. + */ + unsigned long last_bdp_sleep; struct bdi_writeback wb; /* the root writeback info for this bdi */ struct list_head wb_list; /* list of all wbs */ diff --git a/include/linux/backing-dev.h b/include/linux/backing-dev.h index 1a97277f99b1..8e7af9a03b41 100644 --- a/include/linux/backing-dev.h +++ b/include/linux/backing-dev.h @@ -38,7 +38,6 @@ struct backing_dev_info *bdi_alloc(int node_id); void wb_start_background_writeback(struct bdi_writeback *wb); void wb_workfn(struct work_struct *work); -void wb_wakeup_delayed(struct bdi_writeback *wb); void wb_wait_for_completion(struct wb_completion *done); diff --git a/include/linux/bitfield.h b/include/linux/bitfield.h index ebfa12f69501..63928f173223 100644 --- a/include/linux/bitfield.h +++ b/include/linux/bitfield.h @@ -66,7 +66,8 @@ _pfx "mask is not constant"); \ BUILD_BUG_ON_MSG((_mask) == 0, _pfx "mask is zero"); \ BUILD_BUG_ON_MSG(__builtin_constant_p(_val) ? \ - ~((_mask) >> __bf_shf(_mask)) & (_val) : 0, \ + ~((_mask) >> __bf_shf(_mask)) & \ + (0 + (_val)) : 0, \ _pfx "value too large for the field"); \ BUILD_BUG_ON_MSG(__bf_cast_unsigned(_mask, _mask) > \ __bf_cast_unsigned(_reg, ~0ull), \ diff --git a/include/linux/bitmap.h b/include/linux/bitmap.h index 99451431e4d6..fb3a9c93ac86 100644 --- a/include/linux/bitmap.h +++ b/include/linux/bitmap.h @@ -6,6 +6,7 @@ #include <linux/align.h> #include <linux/bitops.h> +#include <linux/cleanup.h> #include <linux/errno.h> #include <linux/find.h> #include <linux/limits.h> @@ -54,6 +55,7 @@ struct device; * bitmap_full(src, nbits) Are all bits set in *src? * bitmap_weight(src, nbits) Hamming Weight: number set bits * bitmap_weight_and(src1, src2, nbits) Hamming Weight of and'ed bitmap + * bitmap_weight_andnot(src1, src2, nbits) Hamming Weight of andnot'ed bitmap * bitmap_set(dst, pos, nbits) Set specified bit area * bitmap_clear(dst, pos, nbits) Clear specified bit area * bitmap_find_next_zero_area(buf, len, pos, n, mask) Find bit free area @@ -62,6 +64,8 @@ struct device; * bitmap_shift_left(dst, src, n, nbits) *dst = *src << n * bitmap_cut(dst, src, first, n, nbits) Cut n bits from first, copy rest * bitmap_replace(dst, old, new, mask, nbits) *dst = (*old & ~(*mask)) | (*new & *mask) + * bitmap_scatter(dst, src, mask, nbits) *dst = map(dense, sparse)(src) + * bitmap_gather(dst, src, mask, nbits) *dst = map(sparse, dense)(src) * bitmap_remap(dst, src, old, new, nbits) *dst = map(old, new)(src) * bitmap_bitremap(oldbit, old, new, nbits) newbit = map(old, new)(oldbit) * bitmap_onto(dst, orig, relmap, nbits) *dst = orig relative to relmap @@ -127,6 +131,8 @@ unsigned long *bitmap_alloc_node(unsigned int nbits, gfp_t flags, int node); unsigned long *bitmap_zalloc_node(unsigned int nbits, gfp_t flags, int node); void bitmap_free(const unsigned long *bitmap); +DEFINE_FREE(bitmap, unsigned long *, if (_T) bitmap_free(_T)) + /* Managed variants of the above. */ unsigned long *devm_bitmap_alloc(struct device *dev, unsigned int nbits, gfp_t flags); @@ -169,6 +175,8 @@ bool __bitmap_subset(const unsigned long *bitmap1, unsigned int __bitmap_weight(const unsigned long *bitmap, unsigned int nbits); unsigned int __bitmap_weight_and(const unsigned long *bitmap1, const unsigned long *bitmap2, unsigned int nbits); +unsigned int __bitmap_weight_andnot(const unsigned long *bitmap1, + const unsigned long *bitmap2, unsigned int nbits); void __bitmap_set(unsigned long *map, unsigned int start, int len); void __bitmap_clear(unsigned long *map, unsigned int start, int len); @@ -425,6 +433,15 @@ unsigned long bitmap_weight_and(const unsigned long *src1, return __bitmap_weight_and(src1, src2, nbits); } +static __always_inline +unsigned long bitmap_weight_andnot(const unsigned long *src1, + const unsigned long *src2, unsigned int nbits) +{ + if (small_const_nbits(nbits)) + return hweight_long(*src1 & ~(*src2) & BITMAP_LAST_WORD_MASK(nbits)); + return __bitmap_weight_andnot(src1, src2, nbits); +} + static __always_inline void bitmap_set(unsigned long *map, unsigned int start, unsigned int nbits) { @@ -487,6 +504,105 @@ static inline void bitmap_replace(unsigned long *dst, __bitmap_replace(dst, old, new, mask, nbits); } +/** + * bitmap_scatter - Scatter a bitmap according to the given mask + * @dst: scattered bitmap + * @src: gathered bitmap + * @mask: mask representing bits to assign to in the scattered bitmap + * @nbits: number of bits in each of these bitmaps + * + * Scatters bitmap with sequential bits according to the given @mask. + * + * Example: + * If @src bitmap = 0x005a, with @mask = 0x1313, @dst will be 0x0302. + * + * Or in binary form + * @src @mask @dst + * 0000000001011010 0001001100010011 0000001100000010 + * + * (Bits 0, 1, 2, 3, 4, 5 are copied to the bits 0, 1, 4, 8, 9, 12) + * + * A more 'visual' description of the operation: + * src: 0000000001011010 + * |||||| + * +------+||||| + * | +----+|||| + * | |+----+||| + * | || +-+|| + * | || | || + * mask: ...v..vv...v..vv + * ...0..11...0..10 + * dst: 0000001100000010 + * + * A relationship exists between bitmap_scatter() and bitmap_gather(). + * bitmap_gather() can be seen as the 'reverse' bitmap_scatter() operation. + * See bitmap_scatter() for details related to this relationship. + */ +static inline void bitmap_scatter(unsigned long *dst, const unsigned long *src, + const unsigned long *mask, unsigned int nbits) +{ + unsigned int n = 0; + unsigned int bit; + + bitmap_zero(dst, nbits); + + for_each_set_bit(bit, mask, nbits) + __assign_bit(bit, dst, test_bit(n++, src)); +} + +/** + * bitmap_gather - Gather a bitmap according to given mask + * @dst: gathered bitmap + * @src: scattered bitmap + * @mask: mask representing bits to extract from in the scattered bitmap + * @nbits: number of bits in each of these bitmaps + * + * Gathers bitmap with sparse bits according to the given @mask. + * + * Example: + * If @src bitmap = 0x0302, with @mask = 0x1313, @dst will be 0x001a. + * + * Or in binary form + * @src @mask @dst + * 0000001100000010 0001001100010011 0000000000011010 + * + * (Bits 0, 1, 4, 8, 9, 12 are copied to the bits 0, 1, 2, 3, 4, 5) + * + * A more 'visual' description of the operation: + * mask: ...v..vv...v..vv + * src: 0000001100000010 + * ^ ^^ ^ 0 + * | || | 10 + * | || > 010 + * | |+--> 1010 + * | +--> 11010 + * +----> 011010 + * dst: 0000000000011010 + * + * A relationship exists between bitmap_gather() and bitmap_scatter(). See + * bitmap_scatter() for the bitmap scatter detailed operations. + * Suppose scattered computed using bitmap_scatter(scattered, src, mask, n). + * The operation bitmap_gather(result, scattered, mask, n) leads to a result + * equal or equivalent to src. + * + * The result can be 'equivalent' because bitmap_scatter() and bitmap_gather() + * are not bijective. + * The result and src values are equivalent in that sense that a call to + * bitmap_scatter(res, src, mask, n) and a call to + * bitmap_scatter(res, result, mask, n) will lead to the same res value. + */ +static inline void bitmap_gather(unsigned long *dst, const unsigned long *src, + const unsigned long *mask, unsigned int nbits) +{ + unsigned int n = 0; + unsigned int bit; + + bitmap_zero(dst, nbits); + + for_each_set_bit(bit, mask, nbits) + __assign_bit(n++, dst, test_bit(bit, src)); +} + static inline void bitmap_next_set_region(unsigned long *bitmap, unsigned int *rs, unsigned int *re, unsigned int end) diff --git a/include/linux/blk-integrity.h b/include/linux/blk-integrity.h index 378b2459efe2..e253e7bd0d17 100644 --- a/include/linux/blk-integrity.h +++ b/include/linux/blk-integrity.h @@ -20,6 +20,7 @@ struct blk_integrity_iter { unsigned int data_size; unsigned short interval; unsigned char tuple_size; + unsigned char pi_offset; const char *disk_name; }; diff --git a/include/linux/blk-mq.h b/include/linux/blk-mq.h index 7a8150a5f051..d3d8fd8e229b 100644 --- a/include/linux/blk-mq.h +++ b/include/linux/blk-mq.h @@ -8,6 +8,7 @@ #include <linux/scatterlist.h> #include <linux/prefetch.h> #include <linux/srcu.h> +#include <linux/rw_hint.h> struct blk_mq_tags; struct blk_flush_queue; @@ -135,6 +136,7 @@ struct request { struct blk_crypto_keyslot *crypt_keyslot; #endif + enum rw_hint write_hint; unsigned short ioprio; enum mq_rq_state state; @@ -682,17 +684,19 @@ enum { #define BLK_MQ_NO_HCTX_IDX (-1U) -struct gendisk *__blk_mq_alloc_disk(struct blk_mq_tag_set *set, void *queuedata, +struct gendisk *__blk_mq_alloc_disk(struct blk_mq_tag_set *set, + struct queue_limits *lim, void *queuedata, struct lock_class_key *lkclass); -#define blk_mq_alloc_disk(set, queuedata) \ +#define blk_mq_alloc_disk(set, lim, queuedata) \ ({ \ static struct lock_class_key __key; \ \ - __blk_mq_alloc_disk(set, queuedata, &__key); \ + __blk_mq_alloc_disk(set, lim, queuedata, &__key); \ }) struct gendisk *blk_mq_alloc_disk_for_queue(struct request_queue *q, struct lock_class_key *lkclass); -struct request_queue *blk_mq_init_queue(struct blk_mq_tag_set *); +struct request_queue *blk_mq_alloc_queue(struct blk_mq_tag_set *set, + struct queue_limits *lim, void *queuedata); int blk_mq_init_allocated_queue(struct blk_mq_tag_set *set, struct request_queue *q); void blk_mq_destroy_queue(struct request_queue *); diff --git a/include/linux/blk_types.h b/include/linux/blk_types.h index f288c94374b3..cb1526ec44b5 100644 --- a/include/linux/blk_types.h +++ b/include/linux/blk_types.h @@ -10,6 +10,7 @@ #include <linux/bvec.h> #include <linux/device.h> #include <linux/ktime.h> +#include <linux/rw_hint.h> struct bio_set; struct bio; @@ -206,52 +207,10 @@ static inline bool blk_path_error(blk_status_t error) return true; } -/* - * From most significant bit: - * 1 bit: reserved for other usage, see below - * 12 bits: original size of bio - * 51 bits: issue time of bio - */ -#define BIO_ISSUE_RES_BITS 1 -#define BIO_ISSUE_SIZE_BITS 12 -#define BIO_ISSUE_RES_SHIFT (64 - BIO_ISSUE_RES_BITS) -#define BIO_ISSUE_SIZE_SHIFT (BIO_ISSUE_RES_SHIFT - BIO_ISSUE_SIZE_BITS) -#define BIO_ISSUE_TIME_MASK ((1ULL << BIO_ISSUE_SIZE_SHIFT) - 1) -#define BIO_ISSUE_SIZE_MASK \ - (((1ULL << BIO_ISSUE_SIZE_BITS) - 1) << BIO_ISSUE_SIZE_SHIFT) -#define BIO_ISSUE_RES_MASK (~((1ULL << BIO_ISSUE_RES_SHIFT) - 1)) - -/* Reserved bit for blk-throtl */ -#define BIO_ISSUE_THROTL_SKIP_LATENCY (1ULL << 63) - struct bio_issue { u64 value; }; -static inline u64 __bio_issue_time(u64 time) -{ - return time & BIO_ISSUE_TIME_MASK; -} - -static inline u64 bio_issue_time(struct bio_issue *issue) -{ - return __bio_issue_time(issue->value); -} - -static inline sector_t bio_issue_size(struct bio_issue *issue) -{ - return ((issue->value & BIO_ISSUE_SIZE_MASK) >> BIO_ISSUE_SIZE_SHIFT); -} - -static inline void bio_issue_init(struct bio_issue *issue, - sector_t size) -{ - size &= (1ULL << BIO_ISSUE_SIZE_BITS) - 1; - issue->value = ((issue->value & BIO_ISSUE_RES_MASK) | - (ktime_get_ns() & BIO_ISSUE_TIME_MASK) | - ((u64)size << BIO_ISSUE_SIZE_SHIFT)); -} - typedef __u32 __bitwise blk_opf_t; typedef unsigned int blk_qc_t; @@ -269,6 +228,7 @@ struct bio { */ unsigned short bi_flags; /* BIO_* below */ unsigned short bi_ioprio; + enum rw_hint bi_write_hint; blk_status_t bi_status; atomic_t __bi_remaining; diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index 99e4f5e72213..f9b87c39cab0 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -24,6 +24,7 @@ #include <linux/sbitmap.h> #include <linux/uuid.h> #include <linux/xarray.h> +#include <linux/file.h> struct module; struct request_queue; @@ -42,7 +43,7 @@ struct blk_crypto_profile; extern const struct device_type disk_type; extern const struct device_type part_type; -extern struct class block_class; +extern const struct class block_class; /* * Maximum number of blkcg policies allowed to be registered concurrently. @@ -108,6 +109,7 @@ struct blk_integrity { const struct blk_integrity_profile *profile; unsigned char flags; unsigned char tuple_size; + unsigned char pi_offset; unsigned char interval_exp; unsigned char tag_size; }; @@ -189,8 +191,6 @@ struct gendisk { * blk_mq_unfreeze_queue(). */ unsigned int nr_zones; - unsigned int max_open_zones; - unsigned int max_active_zones; unsigned long *conv_zones_bitmap; unsigned long *seq_zones_wlock; #endif /* CONFIG_BLK_DEV_ZONED */ @@ -292,6 +292,7 @@ struct queue_limits { unsigned int io_opt; unsigned int max_discard_sectors; unsigned int max_hw_discard_sectors; + unsigned int max_user_discard_sectors; unsigned int max_secure_erase_sectors; unsigned int max_write_zeroes_sectors; unsigned int max_zone_append_sectors; @@ -307,6 +308,8 @@ struct queue_limits { unsigned char discard_misaligned; unsigned char raid_partial_stripes_expensive; bool zoned; + unsigned int max_open_zones; + unsigned int max_active_zones; /* * Drivers that set dma_alignment to less than 511 must be prepared to @@ -325,7 +328,7 @@ void disk_set_zoned(struct gendisk *disk); int blkdev_report_zones(struct block_device *bdev, sector_t sector, unsigned int nr_zones, report_zones_cb cb, void *data); int blkdev_zone_mgmt(struct block_device *bdev, enum req_op op, - sector_t sectors, sector_t nr_sectors, gfp_t gfp_mask); + sector_t sectors, sector_t nr_sectors); int blk_revalidate_disk_zones(struct gendisk *disk, void (*update_driver_data)(struct gendisk *disk)); @@ -473,6 +476,7 @@ struct request_queue { struct mutex sysfs_lock; struct mutex sysfs_dir_lock; + struct mutex limits_lock; /* * for reusing dead hctx instance in case of updating @@ -639,23 +643,23 @@ static inline bool disk_zone_is_seq(struct gendisk *disk, sector_t sector) static inline void disk_set_max_open_zones(struct gendisk *disk, unsigned int max_open_zones) { - disk->max_open_zones = max_open_zones; + disk->queue->limits.max_open_zones = max_open_zones; } static inline void disk_set_max_active_zones(struct gendisk *disk, unsigned int max_active_zones) { - disk->max_active_zones = max_active_zones; + disk->queue->limits.max_active_zones = max_active_zones; } static inline unsigned int bdev_max_open_zones(struct block_device *bdev) { - return bdev->bd_disk->max_open_zones; + return bdev->bd_disk->queue->limits.max_open_zones; } static inline unsigned int bdev_max_active_zones(struct block_device *bdev) { - return bdev->bd_disk->max_active_zones; + return bdev->bd_disk->queue->limits.max_active_zones; } #else /* CONFIG_BLK_DEV_ZONED */ @@ -763,22 +767,26 @@ static inline u64 sb_bdev_nr_blocks(struct super_block *sb) int bdev_disk_changed(struct gendisk *disk, bool invalidate); void put_disk(struct gendisk *disk); -struct gendisk *__blk_alloc_disk(int node, struct lock_class_key *lkclass); +struct gendisk *__blk_alloc_disk(struct queue_limits *lim, int node, + struct lock_class_key *lkclass); /** * blk_alloc_disk - allocate a gendisk structure + * @lim: queue limits to be used for this disk. * @node_id: numa node to allocate on * * Allocate and pre-initialize a gendisk structure for use with BIO based * drivers. * + * Returns an ERR_PTR on error, else the allocated disk. + * * Context: can sleep */ -#define blk_alloc_disk(node_id) \ +#define blk_alloc_disk(lim, node_id) \ ({ \ static struct lock_class_key __key; \ \ - __blk_alloc_disk(node_id, &__key); \ + __blk_alloc_disk(lim, node_id, &__key); \ }) int __register_blkdev(unsigned int major, const char *name, @@ -861,6 +869,29 @@ static inline unsigned int blk_chunk_sectors_left(sector_t offset, return chunk_sectors - (offset & (chunk_sectors - 1)); } +/** + * queue_limits_start_update - start an atomic update of queue limits + * @q: queue to update + * + * This functions starts an atomic update of the queue limits. It takes a lock + * to prevent other updates and returns a snapshot of the current limits that + * the caller can modify. The caller must call queue_limits_commit_update() + * to finish the update. + * + * Context: process context. The caller must have frozen the queue or ensured + * that there is outstanding I/O by other means. + */ +static inline struct queue_limits +queue_limits_start_update(struct request_queue *q) + __acquires(q->limits_lock) +{ + mutex_lock(&q->limits_lock); + return q->limits; +} +int queue_limits_commit_update(struct request_queue *q, + struct queue_limits *lim); +int queue_limits_set(struct request_queue *q, struct queue_limits *lim); + /* * Access functions for manipulating queue properties */ @@ -894,8 +925,8 @@ extern void blk_set_queue_depth(struct request_queue *q, unsigned int depth); extern void blk_set_stacking_limits(struct queue_limits *lim); extern int blk_stack_limits(struct queue_limits *t, struct queue_limits *b, sector_t offset); -extern void disk_stack_limits(struct gendisk *disk, struct block_device *bdev, - sector_t offset); +void queue_limits_stack_bdev(struct queue_limits *t, struct block_device *bdev, + sector_t offset, const char *pfx); extern void blk_queue_update_dma_pad(struct request_queue *, unsigned int); extern void blk_queue_segment_boundary(struct request_queue *, unsigned long); extern void blk_queue_virt_boundary(struct request_queue *, unsigned long); @@ -942,6 +973,7 @@ struct blk_plug { /* if ios_left is > 1, we can batch tag/rq allocations */ struct request *cached_rq; + u64 cur_ktime; unsigned short nr_ios; unsigned short rq_count; @@ -972,6 +1004,18 @@ static inline void blk_flush_plug(struct blk_plug *plug, bool async) __blk_flush_plug(plug, async); } +/* + * tsk == current here + */ +static inline void blk_plug_invalidate_ts(struct task_struct *tsk) +{ + struct blk_plug *plug = tsk->plug; + + if (plug) + plug->cur_ktime = 0; + current->flags &= ~PF_BLOCK_TS; +} + int blkdev_issue_flush(struct block_device *bdev); long nr_blockdev_pages(void); #else /* CONFIG_BLOCK */ @@ -995,6 +1039,10 @@ static inline void blk_flush_plug(struct blk_plug *plug, bool async) { } +static inline void blk_plug_invalidate_ts(struct task_struct *tsk) +{ +} + static inline int blkdev_issue_flush(struct block_device *bdev) { return 0; @@ -1474,26 +1522,20 @@ extern const struct blk_holder_ops fs_holder_ops; (BLK_OPEN_READ | BLK_OPEN_RESTRICT_WRITES | \ (((flags) & SB_RDONLY) ? 0 : BLK_OPEN_WRITE)) -struct bdev_handle { - struct block_device *bdev; - void *holder; - blk_mode_t mode; -}; - -struct bdev_handle *bdev_open_by_dev(dev_t dev, blk_mode_t mode, void *holder, +struct file *bdev_file_open_by_dev(dev_t dev, blk_mode_t mode, void *holder, const struct blk_holder_ops *hops); -struct bdev_handle *bdev_open_by_path(const char *path, blk_mode_t mode, +struct file *bdev_file_open_by_path(const char *path, blk_mode_t mode, void *holder, const struct blk_holder_ops *hops); int bd_prepare_to_claim(struct block_device *bdev, void *holder, const struct blk_holder_ops *hops); void bd_abort_claiming(struct block_device *bdev, void *holder); -void bdev_release(struct bdev_handle *handle); /* just for blk-cgroup, don't use elsewhere */ struct block_device *blkdev_get_no_open(dev_t dev); void blkdev_put_no_open(struct block_device *bdev); struct block_device *I_BDEV(struct inode *inode); +struct block_device *file_bdev(struct file *bdev_file); #ifdef CONFIG_BLOCK void invalidate_bdev(struct block_device *bdev); diff --git a/include/linux/bpf-cgroup.h b/include/linux/bpf-cgroup.h index a789266feac3..fb3c3e7181e6 100644 --- a/include/linux/bpf-cgroup.h +++ b/include/linux/bpf-cgroup.h @@ -196,7 +196,8 @@ static inline bool cgroup_bpf_sock_enabled(struct sock *sk, ({ \ int __ret = 0; \ if (cgroup_bpf_enabled(CGROUP_INET_INGRESS) && \ - cgroup_bpf_sock_enabled(sk, CGROUP_INET_INGRESS)) \ + cgroup_bpf_sock_enabled(sk, CGROUP_INET_INGRESS) && sk && \ + sk_fullsock(sk)) \ __ret = __cgroup_bpf_run_filter_skb(sk, skb, \ CGROUP_INET_INGRESS); \ \ diff --git a/include/linux/bpf.h b/include/linux/bpf.h index e30100597d0a..4f20f62f9d63 100644 --- a/include/linux/bpf.h +++ b/include/linux/bpf.h @@ -37,6 +37,7 @@ struct perf_event; struct bpf_prog; struct bpf_prog_aux; struct bpf_map; +struct bpf_arena; struct sock; struct seq_file; struct btf; @@ -52,6 +53,10 @@ struct module; struct bpf_func_state; struct ftrace_ops; struct cgroup; +struct bpf_token; +struct user_namespace; +struct super_block; +struct inode; extern struct idr btf_idr; extern spinlock_t btf_idr_lock; @@ -135,6 +140,9 @@ struct bpf_map_ops { int (*map_mmap)(struct bpf_map *map, struct vm_area_struct *vma); __poll_t (*map_poll)(struct bpf_map *map, struct file *filp, struct poll_table_struct *pts); + unsigned long (*map_get_unmapped_area)(struct file *filep, unsigned long addr, + unsigned long len, unsigned long pgoff, + unsigned long flags); /* Functions called by bpf_local_storage maps */ int (*map_local_storage_charge)(struct bpf_local_storage_map *smap, @@ -247,10 +255,7 @@ struct bpf_list_node_kern { } __attribute__((aligned(8))); struct bpf_map { - /* The first two cachelines with read-mostly members of which some - * are also accessed in fast-path (e.g. ops, max_entries). - */ - const struct bpf_map_ops *ops ____cacheline_aligned; + const struct bpf_map_ops *ops; struct bpf_map *inner_map_meta; #ifdef CONFIG_SECURITY void *security; @@ -272,17 +277,14 @@ struct bpf_map { struct obj_cgroup *objcg; #endif char name[BPF_OBJ_NAME_LEN]; - /* The 3rd and 4th cacheline with misc members to avoid false sharing - * particularly with refcounting. - */ - atomic64_t refcnt ____cacheline_aligned; + struct mutex freeze_mutex; + atomic64_t refcnt; atomic64_t usercnt; /* rcu is used before freeing and work is only used during freeing */ union { struct work_struct work; struct rcu_head rcu; }; - struct mutex freeze_mutex; atomic64_t writecnt; /* 'Ownership' of program-containing map is claimed by the first program * that is going to use this map or by the first program which FD is @@ -527,8 +529,8 @@ void bpf_list_head_free(const struct btf_field *field, void *list_head, struct bpf_spin_lock *spin_lock); void bpf_rb_root_free(const struct btf_field *field, void *rb_root, struct bpf_spin_lock *spin_lock); - - +u64 bpf_arena_get_kern_vm_start(struct bpf_arena *arena); +u64 bpf_arena_get_user_vm_start(struct bpf_arena *arena); int bpf_obj_name_cpy(char *dst, const char *src, unsigned int size); struct bpf_offload_dev; @@ -710,6 +712,7 @@ enum bpf_arg_type { * on eBPF program stack */ ARG_PTR_TO_MEM, /* pointer to valid memory (stack, packet, map value) */ + ARG_PTR_TO_ARENA, ARG_CONST_SIZE, /* number of bytes accessed from memory */ ARG_CONST_SIZE_OR_ZERO, /* number of bytes accessed from memory or 0 */ @@ -881,6 +884,7 @@ enum bpf_reg_type { * an explicit null check is required for this struct. */ PTR_TO_MEM, /* reg points to valid memory region */ + PTR_TO_ARENA, PTR_TO_BUF, /* reg points to a read/write buffer */ PTR_TO_FUNC, /* reg points to a bpf program function */ CONST_PTR_TO_DYNPTR, /* reg points to a const struct bpf_dynptr */ @@ -1185,7 +1189,6 @@ struct bpf_trampoline { int progs_cnt[BPF_TRAMP_MAX]; /* Executable image of trampoline */ struct bpf_tramp_image *cur_image; - struct module *mod; }; struct bpf_attach_target_info { @@ -1412,6 +1415,7 @@ struct bpf_jit_poke_descriptor { struct bpf_ctx_arg_aux { u32 offset; enum bpf_reg_type reg_type; + struct btf *btf; u32 btf_id; }; @@ -1451,11 +1455,11 @@ struct bpf_prog_aux { bool attach_btf_trace; /* true if attaching to BTF-enabled raw tp */ bool attach_tracing_prog; /* true if tracing another tracing program */ bool func_proto_unreliable; - bool sleepable; bool tail_call_reachable; bool xdp_has_frags; bool exception_cb; bool exception_boundary; + struct bpf_arena *arena; /* BTF_KIND_FUNC_PROTO for valid attach_btf_id */ const struct btf_type *attach_func_proto; /* function name for valid attach_btf_id */ @@ -1485,6 +1489,7 @@ struct bpf_prog_aux { #ifdef CONFIG_SECURITY void *security; #endif + struct bpf_token *token; struct bpf_prog_offload *offload; struct btf *btf; struct bpf_func_info *func_info; @@ -1535,7 +1540,8 @@ struct bpf_prog { enforce_expected_attach_type:1, /* Enforce expected_attach_type checking at attach time */ call_get_stack:1, /* Do we call bpf_get_stack() or bpf_get_stackid() */ call_get_func_ip:1, /* Do we call get_func_ip() */ - tstamp_type_access:1; /* Accessed __sk_buff->tstamp_type */ + tstamp_type_access:1, /* Accessed __sk_buff->tstamp_type */ + sleepable:1; /* BPF program is sleepable */ enum bpf_prog_type type; /* Type of BPF program */ enum bpf_attach_type expected_attach_type; /* For some prog types */ u32 len; /* Number of filter blocks */ @@ -1609,6 +1615,31 @@ struct bpf_link_primer { u32 id; }; +struct bpf_mount_opts { + kuid_t uid; + kgid_t gid; + umode_t mode; + + /* BPF token-related delegation options */ + u64 delegate_cmds; + u64 delegate_maps; + u64 delegate_progs; + u64 delegate_attachs; +}; + +struct bpf_token { + struct work_struct work; + atomic64_t refcnt; + struct user_namespace *userns; + u64 allowed_cmds; + u64 allowed_maps; + u64 allowed_progs; + u64 allowed_attachs; +#ifdef CONFIG_SECURITY + void *security; +#endif +}; + struct bpf_struct_ops_value; struct btf_member; @@ -1673,19 +1704,64 @@ struct bpf_struct_ops { void (*unreg)(void *kdata); int (*update)(void *kdata, void *old_kdata); int (*validate)(void *kdata); - const struct btf_type *type; - const struct btf_type *value_type; + void *cfi_stubs; + struct module *owner; const char *name; struct btf_func_model func_models[BPF_STRUCT_OPS_MAX_NR_MEMBERS]; +}; + +/* Every member of a struct_ops type has an instance even a member is not + * an operator (function pointer). The "info" field will be assigned to + * prog->aux->ctx_arg_info of BPF struct_ops programs to provide the + * argument information required by the verifier to verify the program. + * + * btf_ctx_access() will lookup prog->aux->ctx_arg_info to find the + * corresponding entry for an given argument. + */ +struct bpf_struct_ops_arg_info { + struct bpf_ctx_arg_aux *info; + u32 cnt; +}; + +struct bpf_struct_ops_desc { + struct bpf_struct_ops *st_ops; + + const struct btf_type *type; + const struct btf_type *value_type; u32 type_id; u32 value_id; - void *cfi_stubs; + + /* Collection of argument information for each member */ + struct bpf_struct_ops_arg_info *arg_info; +}; + +enum bpf_struct_ops_state { + BPF_STRUCT_OPS_STATE_INIT, + BPF_STRUCT_OPS_STATE_INUSE, + BPF_STRUCT_OPS_STATE_TOBEFREE, + BPF_STRUCT_OPS_STATE_READY, +}; + +struct bpf_struct_ops_common_value { + refcount_t refcnt; + enum bpf_struct_ops_state state; }; #if defined(CONFIG_BPF_JIT) && defined(CONFIG_BPF_SYSCALL) +/* This macro helps developer to register a struct_ops type and generate + * type information correctly. Developers should use this macro to register + * a struct_ops type instead of calling __register_bpf_struct_ops() directly. + */ +#define register_bpf_struct_ops(st_ops, type) \ + ({ \ + struct bpf_struct_ops_##type { \ + struct bpf_struct_ops_common_value common; \ + struct type data ____cacheline_aligned_in_smp; \ + }; \ + BTF_TYPE_EMIT(struct bpf_struct_ops_##type); \ + __register_bpf_struct_ops(st_ops); \ + }) #define BPF_MODULE_OWNER ((void *)((0xeB9FUL << 2) + POISON_POINTER_DELTA)) -const struct bpf_struct_ops *bpf_struct_ops_find(u32 type_id); -void bpf_struct_ops_init(struct btf *btf, struct bpf_verifier_log *log); bool bpf_struct_ops_get(const void *kdata); void bpf_struct_ops_put(const void *kdata); int bpf_struct_ops_map_sys_lookup_elem(struct bpf_map *map, void *key, @@ -1694,7 +1770,9 @@ int bpf_struct_ops_prepare_trampoline(struct bpf_tramp_links *tlinks, struct bpf_tramp_link *link, const struct btf_func_model *model, void *stub_func, - void *image, void *image_end); + void **image, u32 *image_off, + bool allow_alloc); +void bpf_struct_ops_image_free(void *image); static inline bool bpf_try_module_get(const void *data, struct module *owner) { if (owner == BPF_MODULE_OWNER) @@ -1727,15 +1805,13 @@ struct bpf_dummy_ops { int bpf_struct_ops_test_run(struct bpf_prog *prog, const union bpf_attr *kattr, union bpf_attr __user *uattr); #endif +int bpf_struct_ops_desc_init(struct bpf_struct_ops_desc *st_ops_desc, + struct btf *btf, + struct bpf_verifier_log *log); +void bpf_map_struct_ops_info_fill(struct bpf_map_info *info, struct bpf_map *map); +void bpf_struct_ops_desc_release(struct bpf_struct_ops_desc *st_ops_desc); #else -static inline const struct bpf_struct_ops *bpf_struct_ops_find(u32 type_id) -{ - return NULL; -} -static inline void bpf_struct_ops_init(struct btf *btf, - struct bpf_verifier_log *log) -{ -} +#define register_bpf_struct_ops(st_ops, type) ({ (void *)(st_ops); 0; }) static inline bool bpf_try_module_get(const void *data, struct module *owner) { return try_module_get(owner); @@ -1754,6 +1830,13 @@ static inline int bpf_struct_ops_link_create(union bpf_attr *attr) { return -EOPNOTSUPP; } +static inline void bpf_map_struct_ops_info_fill(struct bpf_map_info *info, struct bpf_map *map) +{ +} + +static inline void bpf_struct_ops_desc_release(struct bpf_struct_ops_desc *st_ops_desc) +{ +} #endif @@ -2029,14 +2112,14 @@ bpf_prog_run_array_uprobe(const struct bpf_prog_array __rcu *array_rcu, old_run_ctx = bpf_set_run_ctx(&run_ctx.run_ctx); item = &array->items[0]; while ((prog = READ_ONCE(item->prog))) { - if (!prog->aux->sleepable) + if (!prog->sleepable) rcu_read_lock(); run_ctx.bpf_cookie = item->bpf_cookie; ret &= run_prog(prog, ctx); item++; - if (!prog->aux->sleepable) + if (!prog->sleepable) rcu_read_unlock(); } bpf_reset_run_ctx(old_run_ctx); @@ -2068,6 +2151,7 @@ static inline void bpf_enable_instrumentation(void) migrate_enable(); } +extern const struct super_operations bpf_super_ops; extern const struct file_operations bpf_map_fops; extern const struct file_operations bpf_prog_fops; extern const struct file_operations bpf_iter_fops; @@ -2135,6 +2219,8 @@ int generic_map_delete_batch(struct bpf_map *map, struct bpf_map *bpf_map_get_curr_or_next(u32 *id); struct bpf_prog *bpf_prog_get_curr_or_next(u32 *id); +int bpf_map_alloc_pages(const struct bpf_map *map, gfp_t gfp, int nid, + unsigned long nr_pages, struct page **page_array); #ifdef CONFIG_MEMCG_KMEM void *bpf_map_kmalloc_node(const struct bpf_map *map, size_t size, gfp_t flags, int node); @@ -2202,24 +2288,26 @@ static inline void bpf_map_dec_elem_count(struct bpf_map *map) extern int sysctl_unprivileged_bpf_disabled; -static inline bool bpf_allow_ptr_leaks(void) +bool bpf_token_capable(const struct bpf_token *token, int cap); + +static inline bool bpf_allow_ptr_leaks(const struct bpf_token *token) { - return perfmon_capable(); + return bpf_token_capable(token, CAP_PERFMON); } -static inline bool bpf_allow_uninit_stack(void) +static inline bool bpf_allow_uninit_stack(const struct bpf_token *token) { - return perfmon_capable(); + return bpf_token_capable(token, CAP_PERFMON); } -static inline bool bpf_bypass_spec_v1(void) +static inline bool bpf_bypass_spec_v1(const struct bpf_token *token) { - return cpu_mitigations_off() || perfmon_capable(); + return cpu_mitigations_off() || bpf_token_capable(token, CAP_PERFMON); } -static inline bool bpf_bypass_spec_v4(void) +static inline bool bpf_bypass_spec_v4(const struct bpf_token *token) { - return cpu_mitigations_off() || perfmon_capable(); + return cpu_mitigations_off() || bpf_token_capable(token, CAP_PERFMON); } int bpf_map_new_fd(struct bpf_map *map, int flags); @@ -2236,8 +2324,21 @@ int bpf_link_new_fd(struct bpf_link *link); struct bpf_link *bpf_link_get_from_fd(u32 ufd); struct bpf_link *bpf_link_get_curr_or_next(u32 *id); +void bpf_token_inc(struct bpf_token *token); +void bpf_token_put(struct bpf_token *token); +int bpf_token_create(union bpf_attr *attr); +struct bpf_token *bpf_token_get_from_fd(u32 ufd); + +bool bpf_token_allow_cmd(const struct bpf_token *token, enum bpf_cmd cmd); +bool bpf_token_allow_map_type(const struct bpf_token *token, enum bpf_map_type type); +bool bpf_token_allow_prog_type(const struct bpf_token *token, + enum bpf_prog_type prog_type, + enum bpf_attach_type attach_type); + int bpf_obj_pin_user(u32 ufd, int path_fd, const char __user *pathname); int bpf_obj_get_user(int path_fd, const char __user *pathname, int flags); +struct inode *bpf_get_inode(struct super_block *sb, const struct inode *dir, + umode_t mode); #define BPF_ITER_FUNC_PREFIX "bpf_iter_" #define DEFINE_BPF_ITER_FUNC(target, args...) \ @@ -2472,11 +2573,14 @@ int btf_check_type_match(struct bpf_verifier_log *log, const struct bpf_prog *pr struct btf *btf, const struct btf_type *t); const char *btf_find_decl_tag_value(const struct btf *btf, const struct btf_type *pt, int comp_idx, const char *tag_key); +int btf_find_next_decl_tag(const struct btf *btf, const struct btf_type *pt, + int comp_idx, const char *tag_key, int last_id); struct bpf_prog *bpf_prog_by_id(u32 id); struct bpf_link *bpf_link_by_id(u32 id); -const struct bpf_func_proto *bpf_base_func_proto(enum bpf_func_id func_id); +const struct bpf_func_proto *bpf_base_func_proto(enum bpf_func_id func_id, + const struct bpf_prog *prog); void bpf_task_storage_free(struct task_struct *task); void bpf_cgrp_storage_free(struct cgroup *cgroup); bool bpf_prog_has_kfunc_call(const struct bpf_prog *prog); @@ -2595,6 +2699,24 @@ static inline int bpf_obj_get_user(const char __user *pathname, int flags) return -EOPNOTSUPP; } +static inline bool bpf_token_capable(const struct bpf_token *token, int cap) +{ + return capable(cap) || (cap != CAP_SYS_ADMIN && capable(CAP_SYS_ADMIN)); +} + +static inline void bpf_token_inc(struct bpf_token *token) +{ +} + +static inline void bpf_token_put(struct bpf_token *token) +{ +} + +static inline struct bpf_token *bpf_token_get_from_fd(u32 ufd) +{ + return ERR_PTR(-EOPNOTSUPP); +} + static inline void __dev_flush(void) { } @@ -2718,7 +2840,7 @@ static inline int btf_struct_access(struct bpf_verifier_log *log, } static inline const struct bpf_func_proto * -bpf_base_func_proto(enum bpf_func_id func_id) +bpf_base_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog) { return NULL; } diff --git a/include/linux/bpf_local_storage.h b/include/linux/bpf_local_storage.h index 173ec7f43ed1..dcddb0aef7d8 100644 --- a/include/linux/bpf_local_storage.h +++ b/include/linux/bpf_local_storage.h @@ -129,10 +129,36 @@ bpf_local_storage_map_alloc(union bpf_attr *attr, struct bpf_local_storage_cache *cache, bool bpf_ma); -struct bpf_local_storage_data * +void __bpf_local_storage_insert_cache(struct bpf_local_storage *local_storage, + struct bpf_local_storage_map *smap, + struct bpf_local_storage_elem *selem); +/* If cacheit_lockit is false, this lookup function is lockless */ +static inline struct bpf_local_storage_data * bpf_local_storage_lookup(struct bpf_local_storage *local_storage, struct bpf_local_storage_map *smap, - bool cacheit_lockit); + bool cacheit_lockit) +{ + struct bpf_local_storage_data *sdata; + struct bpf_local_storage_elem *selem; + + /* Fast path (cache hit) */ + sdata = rcu_dereference_check(local_storage->cache[smap->cache_idx], + bpf_rcu_lock_held()); + if (sdata && rcu_access_pointer(sdata->smap) == smap) + return sdata; + + /* Slow path (cache miss) */ + hlist_for_each_entry_rcu(selem, &local_storage->list, snode, + rcu_read_lock_trace_held()) + if (rcu_access_pointer(SDATA(selem)->smap) == smap) + break; + + if (!selem) + return NULL; + if (cacheit_lockit) + __bpf_local_storage_insert_cache(local_storage, smap, selem); + return SDATA(selem); +} void bpf_local_storage_destroy(struct bpf_local_storage *local_storage); diff --git a/include/linux/bpf_types.h b/include/linux/bpf_types.h index 94baced5a1ad..9f2a6b83b49e 100644 --- a/include/linux/bpf_types.h +++ b/include/linux/bpf_types.h @@ -132,6 +132,7 @@ BPF_MAP_TYPE(BPF_MAP_TYPE_STRUCT_OPS, bpf_struct_ops_map_ops) BPF_MAP_TYPE(BPF_MAP_TYPE_RINGBUF, ringbuf_map_ops) BPF_MAP_TYPE(BPF_MAP_TYPE_BLOOM_FILTER, bloom_filter_map_ops) BPF_MAP_TYPE(BPF_MAP_TYPE_USER_RINGBUF, user_ringbuf_map_ops) +BPF_MAP_TYPE(BPF_MAP_TYPE_ARENA, arena_map_ops) BPF_LINK_TYPE(BPF_LINK_TYPE_RAW_TRACEPOINT, raw_tracepoint) BPF_LINK_TYPE(BPF_LINK_TYPE_TRACING, tracing) diff --git a/include/linux/bpf_verifier.h b/include/linux/bpf_verifier.h index d07d857ca67f..7cb1b75eee38 100644 --- a/include/linux/bpf_verifier.h +++ b/include/linux/bpf_verifier.h @@ -449,11 +449,12 @@ struct bpf_verifier_state { u32 jmp_history_cnt; u32 dfs_depth; u32 callback_unroll_depth; + u32 may_goto_depth; }; #define bpf_get_spilled_reg(slot, frame, mask) \ (((slot < frame->allocated_stack / BPF_REG_SIZE) && \ - ((1 << frame->stack[slot].slot_type[0]) & (mask))) \ + ((1 << frame->stack[slot].slot_type[BPF_REG_SIZE - 1]) & (mask))) \ ? &frame->stack[slot].spilled_ptr : NULL) /* Iterate over 'frame', setting 'reg' to either NULL or a spilled register. */ @@ -547,6 +548,7 @@ struct bpf_insn_aux_data { u32 seen; /* this insn was processed by the verifier at env->pass_cnt */ bool sanitize_stack_spill; /* subject to Spectre v4 sanitation */ bool zext_dst; /* this insn zero extends dst reg */ + bool needs_zext; /* alu op needs to clear upper bits */ bool storage_get_func_atomic; /* bpf_*_storage_get() with atomic memory alloc */ bool is_iter_next; /* bpf_iter_<type>_next() kfunc call */ bool call_with_percpu_alloc_ptr; /* {this,per}_cpu_ptr() with prog percpu alloc */ @@ -610,6 +612,7 @@ struct bpf_subprog_arg_info { enum bpf_arg_type arg_type; union { u32 mem_size; + u32 btf_id; }; }; @@ -618,6 +621,7 @@ struct bpf_subprog_info { u32 start; /* insn idx of function entry point */ u32 linfo_idx; /* The idx to the main_prog->aux->linfo */ u16 stack_depth; /* max. stack depth used by this function */ + u16 stack_extra; bool has_tail_call: 1; bool tail_call_reachable: 1; bool has_ld_abs: 1; @@ -662,6 +666,7 @@ struct bpf_verifier_env { u32 prev_insn_idx; struct bpf_prog *prog; /* eBPF program being verified */ const struct bpf_verifier_ops *ops; + struct module *attach_btf_mod; /* The owner module of prog->aux->attach_btf */ struct bpf_verifier_stack_elem *head; /* stack of verifier states to be processed */ int stack_size; /* number of states to be processed */ bool strict_alignment; /* perform strict pointer alignment checks */ @@ -917,6 +922,15 @@ static inline void mark_verifier_state_scratched(struct bpf_verifier_env *env) env->scratched_stack_slots = ~0ULL; } +static inline bool bpf_stack_narrow_access_ok(int off, int fill_size, int spill_size) +{ +#ifdef __BIG_ENDIAN + off -= spill_size - fill_size; +#endif + + return !(off % BPF_REG_SIZE); +} + const char *reg_type_str(struct bpf_verifier_env *env, enum bpf_reg_type type); const char *dynptr_type_str(enum bpf_dynptr_type type); const char *iter_type_str(const struct btf *btf, u32 btf_id); diff --git a/include/linux/btf.h b/include/linux/btf.h index cf5c6ff48981..f9e56fd12a9f 100644 --- a/include/linux/btf.h +++ b/include/linux/btf.h @@ -137,6 +137,7 @@ struct btf_struct_metas { extern const struct file_operations btf_fops; +const char *btf_get_name(const struct btf *btf); void btf_get(struct btf *btf); void btf_put(struct btf *btf); int btf_new_fd(const union bpf_attr *attr, bpfptr_t uattr, u32 uattr_sz); @@ -494,8 +495,26 @@ static inline void *btf_id_set8_contains(const struct btf_id_set8 *set, u32 id) return bsearch(&id, set->pairs, set->cnt, sizeof(set->pairs[0]), btf_id_cmp_func); } +bool btf_param_match_suffix(const struct btf *btf, + const struct btf_param *arg, + const char *suffix); +int btf_ctx_arg_offset(const struct btf *btf, const struct btf_type *func_proto, + u32 arg_no); + struct bpf_verifier_log; +#if defined(CONFIG_BPF_JIT) && defined(CONFIG_BPF_SYSCALL) +struct bpf_struct_ops; +int __register_bpf_struct_ops(struct bpf_struct_ops *st_ops); +const struct bpf_struct_ops_desc *bpf_struct_ops_find_value(struct btf *btf, u32 value_id); +const struct bpf_struct_ops_desc *bpf_struct_ops_find(struct btf *btf, u32 type_id); +#else +static inline const struct bpf_struct_ops_desc *bpf_struct_ops_find(struct btf *btf, u32 type_id) +{ + return NULL; +} +#endif + #ifdef CONFIG_BPF_SYSCALL const struct btf_type *btf_type_by_id(const struct btf *btf, u32 type_id); const char *btf_name_by_offset(const struct btf *btf, u32 offset); @@ -512,10 +531,9 @@ s32 btf_find_dtor_kfunc(struct btf *btf, u32 btf_id); int register_btf_id_dtor_kfuncs(const struct btf_id_dtor_kfunc *dtors, u32 add_cnt, struct module *owner); struct btf_struct_meta *btf_find_struct_meta(const struct btf *btf, u32 btf_id); -const struct btf_type * -btf_get_prog_ctx_type(struct bpf_verifier_log *log, const struct btf *btf, - const struct btf_type *t, enum bpf_prog_type prog_type, - int arg); +bool btf_is_prog_ctx_type(struct bpf_verifier_log *log, const struct btf *btf, + const struct btf_type *t, enum bpf_prog_type prog_type, + int arg); int get_kern_ctx_btf_id(struct bpf_verifier_log *log, enum bpf_prog_type prog_type); bool btf_types_are_same(const struct btf *btf1, u32 id1, const struct btf *btf2, u32 id2); @@ -555,12 +573,12 @@ static inline struct btf_struct_meta *btf_find_struct_meta(const struct btf *btf { return NULL; } -static inline const struct btf_member * -btf_get_prog_ctx_type(struct bpf_verifier_log *log, const struct btf *btf, - const struct btf_type *t, enum bpf_prog_type prog_type, - int arg) +static inline bool +btf_is_prog_ctx_type(struct bpf_verifier_log *log, const struct btf *btf, + const struct btf_type *t, enum bpf_prog_type prog_type, + int arg) { - return NULL; + return false; } static inline int get_kern_ctx_btf_id(struct bpf_verifier_log *log, enum bpf_prog_type prog_type) { diff --git a/include/linux/btf_ids.h b/include/linux/btf_ids.h index a9cb10b0e2e9..e24aabfe8ecc 100644 --- a/include/linux/btf_ids.h +++ b/include/linux/btf_ids.h @@ -8,6 +8,9 @@ struct btf_id_set { u32 ids[]; }; +/* This flag implies BTF_SET8 holds kfunc(s) */ +#define BTF_SET8_KFUNCS (1 << 0) + struct btf_id_set8 { u32 cnt; u32 flags; @@ -21,6 +24,7 @@ struct btf_id_set8 { #include <linux/compiler.h> /* for __PASTE */ #include <linux/compiler_attributes.h> /* for __maybe_unused */ +#include <linux/stringify.h> /* * Following macros help to define lists of BTF IDs placed @@ -183,17 +187,18 @@ extern struct btf_id_set name; * .word (1 << 3) | (1 << 1) | (1 << 2) * */ -#define __BTF_SET8_START(name, scope) \ +#define __BTF_SET8_START(name, scope, flags) \ +__BTF_ID_LIST(name, local) \ asm( \ ".pushsection " BTF_IDS_SECTION ",\"a\"; \n" \ "." #scope " __BTF_ID__set8__" #name "; \n" \ "__BTF_ID__set8__" #name ":; \n" \ -".zero 8 \n" \ +".zero 4 \n" \ +".long " __stringify(flags) "\n" \ ".popsection; \n"); #define BTF_SET8_START(name) \ -__BTF_ID_LIST(name, local) \ -__BTF_SET8_START(name, local) +__BTF_SET8_START(name, local, 0) #define BTF_SET8_END(name) \ asm( \ @@ -202,6 +207,12 @@ asm( \ ".popsection; \n"); \ extern struct btf_id_set8 name; +#define BTF_KFUNCS_START(name) \ +__BTF_SET8_START(name, local, BTF_SET8_KFUNCS) + +#define BTF_KFUNCS_END(name) \ +BTF_SET8_END(name) + #else #define BTF_ID_LIST(name) static u32 __maybe_unused name[64]; @@ -216,6 +227,8 @@ extern struct btf_id_set8 name; #define BTF_SET_END(name) #define BTF_SET8_START(name) static struct btf_id_set8 __maybe_unused name = { 0 }; #define BTF_SET8_END(name) +#define BTF_KFUNCS_START(name) static struct btf_id_set8 __maybe_unused name = { .flags = BTF_SET8_KFUNCS }; +#define BTF_KFUNCS_END(name) #endif /* CONFIG_DEBUG_INFO_BTF */ diff --git a/include/linux/bvec.h b/include/linux/bvec.h index 555aae5448ae..bd1e361b351c 100644 --- a/include/linux/bvec.h +++ b/include/linux/bvec.h @@ -83,7 +83,7 @@ struct bvec_iter { unsigned int bi_bvec_done; /* number of bytes completed in current bvec */ -} __packed; +} __packed __aligned(4); struct bvec_iter_all { struct bio_vec bv; diff --git a/include/linux/ceph/messenger.h b/include/linux/ceph/messenger.h index 2eaaabbe98cb..1717cc57cdac 100644 --- a/include/linux/ceph/messenger.h +++ b/include/linux/ceph/messenger.h @@ -283,7 +283,7 @@ struct ceph_msg { struct kref kref; bool more_to_follow; bool needs_out_seq; - bool sparse_read; + u64 sparse_read_total; int front_alloc_len; struct ceph_msgpool *pool; diff --git a/include/linux/ceph/osd_client.h b/include/linux/ceph/osd_client.h index fa018d5864e7..f66f6aac74f6 100644 --- a/include/linux/ceph/osd_client.h +++ b/include/linux/ceph/osd_client.h @@ -45,6 +45,7 @@ enum ceph_sparse_read_state { CEPH_SPARSE_READ_HDR = 0, CEPH_SPARSE_READ_EXTENTS, CEPH_SPARSE_READ_DATA_LEN, + CEPH_SPARSE_READ_DATA_PRE, CEPH_SPARSE_READ_DATA, }; @@ -64,7 +65,7 @@ struct ceph_sparse_read { u64 sr_req_len; /* orig request length */ u64 sr_pos; /* current pos in buffer */ int sr_index; /* current extent index */ - __le32 sr_datalen; /* length of actual data */ + u32 sr_datalen; /* length of actual data */ u32 sr_count; /* extent count in reply */ int sr_ext_len; /* length of extent array */ struct ceph_sparse_extent *sr_extent; /* extent array */ diff --git a/include/linux/clocksource.h b/include/linux/clocksource.h index 1d42d4b17327..0ad8b550bb4b 100644 --- a/include/linux/clocksource.h +++ b/include/linux/clocksource.h @@ -291,7 +291,19 @@ static inline void timer_probe(void) {} #define TIMER_ACPI_DECLARE(name, table_id, fn) \ ACPI_DECLARE_PROBE_ENTRY(timer, name, table_id, 0, NULL, 0, fn) -extern ulong max_cswd_read_retries; +static inline unsigned int clocksource_get_max_watchdog_retry(void) +{ + /* + * When system is in the boot phase or under heavy workload, there + * can be random big latencies during the clocksource/watchdog + * read, so allow retries to filter the noise latency. As the + * latency's frequency and maximum value goes up with the number of + * CPUs, scale the number of retries with the number of online + * CPUs. + */ + return (ilog2(num_online_cpus()) / 2) + 1; +} + void clocksource_verify_percpu(struct clocksource *cs); #endif /* _LINUX_CLOCKSOURCE_H */ diff --git a/include/linux/clocksource_ids.h b/include/linux/clocksource_ids.h index 16775d7d8f8d..a4fa3436940c 100644 --- a/include/linux/clocksource_ids.h +++ b/include/linux/clocksource_ids.h @@ -6,6 +6,9 @@ enum clocksource_ids { CSID_GENERIC = 0, CSID_ARM_ARCH_COUNTER, + CSID_X86_TSC_EARLY, + CSID_X86_TSC, + CSID_X86_KVM_CLK, CSID_MAX, }; diff --git a/include/linux/compiler-gcc.h b/include/linux/compiler-gcc.h index aebb65bf95a7..aff92b1d284f 100644 --- a/include/linux/compiler-gcc.h +++ b/include/linux/compiler-gcc.h @@ -35,7 +35,7 @@ (typeof(ptr)) (__ptr + (off)); \ }) -#ifdef CONFIG_RETPOLINE +#ifdef CONFIG_MITIGATION_RETPOLINE #define __noretpoline __attribute__((__indirect_branch__("keep"))) #endif @@ -64,6 +64,26 @@ __builtin_unreachable(); \ } while (0) +/* + * GCC 'asm goto' with outputs miscompiles certain code sequences: + * + * https://gcc.gnu.org/bugzilla/show_bug.cgi?id=113921 + * + * Work around it via the same compiler barrier quirk that we used + * to use for the old 'asm goto' workaround. + * + * Also, always mark such 'asm goto' statements as volatile: all + * asm goto statements are supposed to be volatile as per the + * documentation, but some versions of gcc didn't actually do + * that for asms with outputs: + * + * https://gcc.gnu.org/bugzilla/show_bug.cgi?id=98619 + */ +#ifdef CONFIG_GCC_ASM_GOTO_OUTPUT_WORKAROUND +#define asm_goto_output(x...) \ + do { asm volatile goto(x); asm (""); } while (0) +#endif + #if defined(CONFIG_ARCH_USE_BUILTIN_BSWAP) #define __HAVE_BUILTIN_BSWAP32__ #define __HAVE_BUILTIN_BSWAP64__ diff --git a/include/linux/compiler.h b/include/linux/compiler.h index bb1339c7057b..52730e423681 100644 --- a/include/linux/compiler.h +++ b/include/linux/compiler.h @@ -209,7 +209,7 @@ void ftrace_likely_update(struct ftrace_likely_data *f, int val, */ #define ___ADDRESSABLE(sym, __attrs) \ static void * __used __attrs \ - __UNIQUE_ID(__PASTE(__addressable_,sym)) = (void *)&sym; + __UNIQUE_ID(__PASTE(__addressable_,sym)) = (void *)(uintptr_t)&sym; #define __ADDRESSABLE(sym) \ ___ADDRESSABLE(sym, __section(".discard.addressable")) @@ -231,6 +231,45 @@ static inline void *offset_to_ptr(const int *off) * This returns a constant expression while determining if an argument is * a constant expression, most importantly without evaluating the argument. * Glory to Martin Uecker <Martin.Uecker@med.uni-goettingen.de> + * + * Details: + * - sizeof() return an integer constant expression, and does not evaluate + * the value of its operand; it only examines the type of its operand. + * - The results of comparing two integer constant expressions is also + * an integer constant expression. + * - The first literal "8" isn't important. It could be any literal value. + * - The second literal "8" is to avoid warnings about unaligned pointers; + * this could otherwise just be "1". + * - (long)(x) is used to avoid warnings about 64-bit types on 32-bit + * architectures. + * - The C Standard defines "null pointer constant", "(void *)0", as + * distinct from other void pointers. + * - If (x) is an integer constant expression, then the "* 0l" resolves + * it into an integer constant expression of value 0. Since it is cast to + * "void *", this makes the second operand a null pointer constant. + * - If (x) is not an integer constant expression, then the second operand + * resolves to a void pointer (but not a null pointer constant: the value + * is not an integer constant 0). + * - The conditional operator's third operand, "(int *)8", is an object + * pointer (to type "int"). + * - The behavior (including the return type) of the conditional operator + * ("operand1 ? operand2 : operand3") depends on the kind of expressions + * given for the second and third operands. This is the central mechanism + * of the macro: + * - When one operand is a null pointer constant (i.e. when x is an integer + * constant expression) and the other is an object pointer (i.e. our + * third operand), the conditional operator returns the type of the + * object pointer operand (i.e. "int *). Here, within the sizeof(), we + * would then get: + * sizeof(*((int *)(...)) == sizeof(int) == 4 + * - When one operand is a void pointer (i.e. when x is not an integer + * constant expression) and the other is an object pointer (i.e. our + * third operand), the conditional operator returns a "void *" type. + * Here, within the sizeof(), we would then get: + * sizeof(*((void *)(...)) == sizeof(void) == 1 + * - The equality comparison to "sizeof(int)" therefore depends on (x): + * sizeof(int) == sizeof(int) (x) was a constant expression + * sizeof(int) != sizeof(void) (x) was not a constant expression */ #define __is_constexpr(x) \ (sizeof(int) == sizeof(*(8 ? ((void *)((long)(x) * 0l)) : (int *)8))) diff --git a/include/linux/compiler_attributes.h b/include/linux/compiler_attributes.h index 28566624f008..8bdf6e0918c1 100644 --- a/include/linux/compiler_attributes.h +++ b/include/linux/compiler_attributes.h @@ -95,11 +95,11 @@ #endif /* - * Optional: only supported since gcc >= 14 + * Optional: only supported since gcc >= 15 * Optional: only supported since clang >= 18 * * gcc: https://gcc.gnu.org/bugzilla/show_bug.cgi?id=108896 - * clang: https://reviews.llvm.org/D148381 + * clang: https://github.com/llvm/llvm-project/pull/76348 */ #if __has_attribute(__counted_by__) # define __counted_by(member) __attribute__((__counted_by__(member))) @@ -334,6 +334,18 @@ #define __section(section) __attribute__((__section__(section))) /* + * Optional: only supported since gcc >= 12 + * + * gcc: https://gcc.gnu.org/onlinedocs/gcc/Common-Variable-Attributes.html#index-uninitialized-variable-attribute + * clang: https://clang.llvm.org/docs/AttributeReference.html#uninitialized + */ +#if __has_attribute(__uninitialized__) +# define __uninitialized __attribute__((__uninitialized__)) +#else +# define __uninitialized +#endif + +/* * gcc: https://gcc.gnu.org/onlinedocs/gcc/Common-Function-Attributes.html#index-unused-function-attribute * gcc: https://gcc.gnu.org/onlinedocs/gcc/Common-Type-Attributes.html#index-unused-type-attribute * gcc: https://gcc.gnu.org/onlinedocs/gcc/Common-Variable-Attributes.html#index-unused-variable-attribute diff --git a/include/linux/compiler_types.h b/include/linux/compiler_types.h index 6f1ca49306d2..3e64ec0f7ac8 100644 --- a/include/linux/compiler_types.h +++ b/include/linux/compiler_types.h @@ -282,11 +282,18 @@ struct ftrace_likely_data { #define __no_sanitize_or_inline __always_inline #endif +/* Do not trap wrapping arithmetic within an annotated function. */ +#ifdef CONFIG_UBSAN_SIGNED_WRAP +# define __signed_wrap __attribute__((no_sanitize("signed-integer-overflow"))) +#else +# define __signed_wrap +#endif + /* Section for code which can't be instrumented at all */ #define __noinstr_section(section) \ noinline notrace __attribute((__section__(section))) \ __no_kcsan __no_sanitize_address __no_profile __no_sanitize_coverage \ - __no_sanitize_memory + __no_sanitize_memory __signed_wrap #define noinstr __noinstr_section(".noinstr.text") @@ -362,8 +369,15 @@ struct ftrace_likely_data { #define __member_size(p) __builtin_object_size(p, 1) #endif -#ifndef asm_volatile_goto -#define asm_volatile_goto(x...) asm goto(x) +/* + * Some versions of gcc do not mark 'asm goto' volatile: + * + * https://gcc.gnu.org/bugzilla/show_bug.cgi?id=103979 + * + * We do it here by hand, because it doesn't hurt. + */ +#ifndef asm_goto_output +#define asm_goto_output(x...) asm volatile goto(x) #endif #ifdef CONFIG_CC_HAS_ASM_INLINE diff --git a/include/linux/cper.h b/include/linux/cper.h index c1a7dc325121..265b0f8fc0b3 100644 --- a/include/linux/cper.h +++ b/include/linux/cper.h @@ -90,6 +90,29 @@ enum { GUID_INIT(0x667DD791, 0xC6B3, 0x4c27, 0x8A, 0x6B, 0x0F, 0x8E, \ 0x72, 0x2D, 0xEB, 0x41) +/* CXL Event record UUIDs are formatted as GUIDs and reported in section type */ +/* + * General Media Event Record + * CXL rev 3.0 Section 8.2.9.2.1.1; Table 8-43 + */ +#define CPER_SEC_CXL_GEN_MEDIA_GUID \ + GUID_INIT(0xfbcd0a77, 0xc260, 0x417f, \ + 0x85, 0xa9, 0x08, 0x8b, 0x16, 0x21, 0xeb, 0xa6) +/* + * DRAM Event Record + * CXL rev 3.0 section 8.2.9.2.1.2; Table 8-44 + */ +#define CPER_SEC_CXL_DRAM_GUID \ + GUID_INIT(0x601dcbb3, 0x9c06, 0x4eab, \ + 0xb8, 0xaf, 0x4e, 0x9b, 0xfb, 0x5c, 0x96, 0x24) +/* + * Memory Module Event Record + * CXL rev 3.0 section 8.2.9.2.1.3; Table 8-45 + */ +#define CPER_SEC_CXL_MEM_MODULE_GUID \ + GUID_INIT(0xfe927475, 0xdd59, 0x4339, \ + 0xa5, 0x86, 0x79, 0xba, 0xb1, 0x13, 0xb7, 0x74) + /* * Flags bits definitions for flags in struct cper_record_header * If set, the error has been recovered diff --git a/include/linux/cpu.h b/include/linux/cpu.h index dcb89c987164..ae5a20cf2f9c 100644 --- a/include/linux/cpu.h +++ b/include/linux/cpu.h @@ -75,6 +75,8 @@ extern ssize_t cpu_show_spec_rstack_overflow(struct device *dev, struct device_attribute *attr, char *buf); extern ssize_t cpu_show_gds(struct device *dev, struct device_attribute *attr, char *buf); +extern ssize_t cpu_show_reg_file_data_sampling(struct device *dev, + struct device_attribute *attr, char *buf); extern __printf(4, 5) struct device *cpu_device_create(struct device *parent, void *drvdata, @@ -112,7 +114,7 @@ void notify_cpu_starting(unsigned int cpu); extern void cpu_maps_update_begin(void); extern void cpu_maps_update_done(void); int bringup_hibernate_cpu(unsigned int sleep_cpu); -void bringup_nonboot_cpus(unsigned int setup_max_cpus); +void bringup_nonboot_cpus(unsigned int max_cpus); #else /* CONFIG_SMP */ #define cpuhp_tasks_frozen 0 @@ -196,6 +198,8 @@ void arch_cpu_idle(void); void arch_cpu_idle_prepare(void); void arch_cpu_idle_enter(void); void arch_cpu_idle_exit(void); +void arch_tick_broadcast_enter(void); +void arch_tick_broadcast_exit(void); void __noreturn arch_cpu_idle_dead(void); #ifdef CONFIG_ARCH_HAS_CPU_FINALIZE_INIT diff --git a/include/linux/cpuhotplug.h b/include/linux/cpuhotplug.h index 172d0a743e5d..35e78ddb2b37 100644 --- a/include/linux/cpuhotplug.h +++ b/include/linux/cpuhotplug.h @@ -184,6 +184,7 @@ enum cpuhp_state { CPUHP_AP_ARM64_ISNDEP_STARTING, CPUHP_AP_SMPCFD_DYING, CPUHP_AP_HRTIMERS_DYING, + CPUHP_AP_TICK_DYING, CPUHP_AP_X86_TBOOT_DYING, CPUHP_AP_ARM_CACHE_B15_RAC_DYING, CPUHP_AP_ONLINE, @@ -231,6 +232,7 @@ enum cpuhp_state { CPUHP_AP_PERF_POWERPC_HV_24x7_ONLINE, CPUHP_AP_PERF_POWERPC_HV_GPCI_ONLINE, CPUHP_AP_PERF_CSKY_ONLINE, + CPUHP_AP_TMIGR_ONLINE, CPUHP_AP_WATCHDOG_ONLINE, CPUHP_AP_WORKQUEUE_ONLINE, CPUHP_AP_RANDOM_ONLINE, diff --git a/include/linux/cpumask.h b/include/linux/cpumask.h index cfb545841a2c..1c29947db848 100644 --- a/include/linux/cpumask.h +++ b/include/linux/cpumask.h @@ -7,6 +7,7 @@ * set of CPUs in a system, one bit position per CPU number. In general, * only nr_cpu_ids (<= NR_CPUS) bits are valid. */ +#include <linux/cleanup.h> #include <linux/kernel.h> #include <linux/threads.h> #include <linux/bitmap.h> @@ -720,6 +721,19 @@ static inline unsigned int cpumask_weight_and(const struct cpumask *srcp1, } /** + * cpumask_weight_andnot - Count of bits in (*srcp1 & ~*srcp2) + * @srcp1: the cpumask to count bits (< nr_cpu_ids) in. + * @srcp2: the cpumask to count bits (< nr_cpu_ids) in. + * + * Return: count of bits set in both *srcp1 and *srcp2 + */ +static inline unsigned int cpumask_weight_andnot(const struct cpumask *srcp1, + const struct cpumask *srcp2) +{ + return bitmap_weight_andnot(cpumask_bits(srcp1), cpumask_bits(srcp2), small_cpumask_bits); +} + +/** * cpumask_shift_right - *dstp = *srcp >> n * @dstp: the cpumask result * @srcp: the input to shift @@ -977,6 +991,8 @@ static inline bool cpumask_available(cpumask_var_t mask) } #endif /* CONFIG_CPUMASK_OFFSTACK */ +DEFINE_FREE(free_cpumask_var, struct cpumask *, if (_T) free_cpumask_var(_T)); + /* It's common to want to use cpu_all_mask in struct member initializers, * so it has to refer to an address rather than a pointer. */ extern const DECLARE_BITMAP(cpu_all_bits, NR_CPUS); diff --git a/include/linux/cpuset.h b/include/linux/cpuset.h index 875d12598bd2..0ce6ff0d9c9a 100644 --- a/include/linux/cpuset.h +++ b/include/linux/cpuset.h @@ -121,11 +121,6 @@ static inline int cpuset_do_page_mem_spread(void) return task_spread_page(current); } -static inline int cpuset_do_slab_mem_spread(void) -{ - return task_spread_slab(current); -} - extern bool current_cpuset_is_being_rebound(void); extern void rebuild_sched_domains(void); @@ -264,11 +259,6 @@ static inline int cpuset_do_page_mem_spread(void) return 0; } -static inline int cpuset_do_slab_mem_spread(void) -{ - return 0; -} - static inline bool current_cpuset_is_being_rebound(void) { return false; diff --git a/include/linux/cxl-event.h b/include/linux/cxl-event.h index 91125eca4c8a..03fa6d50d46f 100644 --- a/include/linux/cxl-event.h +++ b/include/linux/cxl-event.h @@ -140,22 +140,4 @@ struct cxl_cper_event_rec { union cxl_event event; } __packed; -typedef void (*cxl_cper_callback)(enum cxl_event_type type, - struct cxl_cper_event_rec *rec); - -#ifdef CONFIG_ACPI_APEI_GHES -int cxl_cper_register_callback(cxl_cper_callback callback); -int cxl_cper_unregister_callback(cxl_cper_callback callback); -#else -static inline int cxl_cper_register_callback(cxl_cper_callback callback) -{ - return 0; -} - -static inline int cxl_cper_unregister_callback(cxl_cper_callback callback) -{ - return 0; -} -#endif - #endif /* _LINUX_CXL_EVENT_H */ diff --git a/include/linux/dcache.h b/include/linux/dcache.h index 1666c387861f..bf53e3894aae 100644 --- a/include/linux/dcache.h +++ b/include/linux/dcache.h @@ -125,6 +125,11 @@ enum dentry_d_lock_class DENTRY_D_LOCK_NESTED }; +enum d_real_type { + D_REAL_DATA, + D_REAL_METADATA, +}; + struct dentry_operations { int (*d_revalidate)(struct dentry *, unsigned int); int (*d_weak_revalidate)(struct dentry *, unsigned int); @@ -139,7 +144,7 @@ struct dentry_operations { char *(*d_dname)(struct dentry *, char *, int); struct vfsmount *(*d_automount)(struct path *); int (*d_manage)(const struct path *, bool); - struct dentry *(*d_real)(struct dentry *, const struct inode *); + struct dentry *(*d_real)(struct dentry *, enum d_real_type type); } ____cacheline_aligned; /* @@ -173,6 +178,7 @@ struct dentry_operations { #define DCACHE_DONTCACHE BIT(7) /* Purge from memory on final dput() */ #define DCACHE_CANT_MOUNT BIT(8) +#define DCACHE_GENOCIDE BIT(9) #define DCACHE_SHRINK_LIST BIT(10) #define DCACHE_OP_WEAK_REVALIDATE BIT(11) @@ -546,24 +552,23 @@ static inline struct inode *d_backing_inode(const struct dentry *upper) /** * d_real - Return the real dentry * @dentry: the dentry to query - * @inode: inode to select the dentry from multiple layers (can be NULL) + * @type: the type of real dentry (data or metadata) * * If dentry is on a union/overlay, then return the underlying, real dentry. * Otherwise return the dentry itself. * * See also: Documentation/filesystems/vfs.rst */ -static inline struct dentry *d_real(struct dentry *dentry, - const struct inode *inode) +static inline struct dentry *d_real(struct dentry *dentry, enum d_real_type type) { if (unlikely(dentry->d_flags & DCACHE_OP_REAL)) - return dentry->d_op->d_real(dentry, inode); + return dentry->d_op->d_real(dentry, type); else return dentry; } /** - * d_real_inode - Return the real inode + * d_real_inode - Return the real inode hosting the data * @dentry: The dentry to query * * If dentry is on a union/overlay, then return the underlying, real inode. @@ -572,7 +577,7 @@ static inline struct dentry *d_real(struct dentry *dentry, static inline struct inode *d_real_inode(const struct dentry *dentry) { /* This usage of d_real() results in const dentry */ - return d_backing_inode(d_real((struct dentry *) dentry, NULL)); + return d_inode(d_real((struct dentry *) dentry, D_REAL_DATA)); } struct name_snapshot { diff --git a/include/linux/device-mapper.h b/include/linux/device-mapper.h index 772ab4d74d94..82b2195efaca 100644 --- a/include/linux/device-mapper.h +++ b/include/linux/device-mapper.h @@ -165,7 +165,7 @@ void dm_error(const char *message); struct dm_dev { struct block_device *bdev; - struct bdev_handle *bdev_handle; + struct file *bdev_file; struct dax_device *dax_dev; blk_mode_t mode; char name[16]; diff --git a/include/linux/dm-bufio.h b/include/linux/dm-bufio.h index 75e7d8cbb532..d1503b815a78 100644 --- a/include/linux/dm-bufio.h +++ b/include/linux/dm-bufio.h @@ -64,6 +64,9 @@ void dm_bufio_set_sector_offset(struct dm_bufio_client *c, sector_t start); void *dm_bufio_read(struct dm_bufio_client *c, sector_t block, struct dm_buffer **bp); +void *dm_bufio_read_with_ioprio(struct dm_bufio_client *c, sector_t block, + struct dm_buffer **bp, unsigned short ioprio); + /* * Like dm_bufio_read, but return buffer from cache, don't read * it. If the buffer is not in the cache, return NULL. @@ -86,6 +89,10 @@ void *dm_bufio_new(struct dm_bufio_client *c, sector_t block, void dm_bufio_prefetch(struct dm_bufio_client *c, sector_t block, unsigned int n_blocks); +void dm_bufio_prefetch_with_ioprio(struct dm_bufio_client *c, + sector_t block, unsigned int n_blocks, + unsigned short ioprio); + /* * Release a reference obtained with dm_bufio_{read,get,new}. The data * pointer and dm_buffer pointer is no longer valid after this call. diff --git a/include/linux/dm-io.h b/include/linux/dm-io.h index 7595142f3fc5..7b2968612b7e 100644 --- a/include/linux/dm-io.h +++ b/include/linux/dm-io.h @@ -80,7 +80,8 @@ void dm_io_client_destroy(struct dm_io_client *client); * error occurred doing io to the corresponding region. */ int dm_io(struct dm_io_request *io_req, unsigned int num_regions, - struct dm_io_region *region, unsigned int long *sync_error_bits); + struct dm_io_region *region, unsigned int long *sync_error_bits, + unsigned short ioprio); #endif /* __KERNEL__ */ #endif /* _LINUX_DM_IO_H */ diff --git a/include/linux/dmaengine.h b/include/linux/dmaengine.h index 3df70d6131c8..752dbde4cec1 100644 --- a/include/linux/dmaengine.h +++ b/include/linux/dmaengine.h @@ -953,7 +953,8 @@ static inline int dmaengine_slave_config(struct dma_chan *chan, static inline bool is_slave_direction(enum dma_transfer_direction direction) { - return (direction == DMA_MEM_TO_DEV) || (direction == DMA_DEV_TO_MEM); + return (direction == DMA_MEM_TO_DEV) || (direction == DMA_DEV_TO_MEM) || + (direction == DMA_DEV_TO_DEV); } static inline struct dma_async_tx_descriptor *dmaengine_prep_slave_single( diff --git a/include/linux/dpll.h b/include/linux/dpll.h index 9cf896ea1d41..d275736230b3 100644 --- a/include/linux/dpll.h +++ b/include/linux/dpll.h @@ -10,6 +10,8 @@ #include <uapi/linux/dpll.h> #include <linux/device.h> #include <linux/netlink.h> +#include <linux/netdevice.h> +#include <linux/rtnetlink.h> struct dpll_device; struct dpll_pin; @@ -19,6 +21,7 @@ struct dpll_device_ops { enum dpll_mode *mode, struct netlink_ext_ack *extack); int (*lock_status_get)(const struct dpll_device *dpll, void *dpll_priv, enum dpll_lock_status *status, + enum dpll_lock_status_error *status_error, struct netlink_ext_ack *extack); int (*temp_get)(const struct dpll_device *dpll, void *dpll_priv, s32 *temp, struct netlink_ext_ack *extack); @@ -120,15 +123,24 @@ struct dpll_pin_properties { }; #if IS_ENABLED(CONFIG_DPLL) -size_t dpll_msg_pin_handle_size(struct dpll_pin *pin); -int dpll_msg_add_pin_handle(struct sk_buff *msg, struct dpll_pin *pin); +void dpll_netdev_pin_set(struct net_device *dev, struct dpll_pin *dpll_pin); +void dpll_netdev_pin_clear(struct net_device *dev); + +size_t dpll_netdev_pin_handle_size(const struct net_device *dev); +int dpll_netdev_add_pin_handle(struct sk_buff *msg, + const struct net_device *dev); #else -static inline size_t dpll_msg_pin_handle_size(struct dpll_pin *pin) +static inline void +dpll_netdev_pin_set(struct net_device *dev, struct dpll_pin *dpll_pin) { } +static inline void dpll_netdev_pin_clear(struct net_device *dev) { } + +static inline size_t dpll_netdev_pin_handle_size(const struct net_device *dev) { return 0; } -static inline int dpll_msg_add_pin_handle(struct sk_buff *msg, struct dpll_pin *pin) +static inline int +dpll_netdev_add_pin_handle(struct sk_buff *msg, const struct net_device *dev) { return 0; } diff --git a/include/linux/dynamic_queue_limits.h b/include/linux/dynamic_queue_limits.h index 407c2f281b64..5693a4be0d9a 100644 --- a/include/linux/dynamic_queue_limits.h +++ b/include/linux/dynamic_queue_limits.h @@ -38,14 +38,22 @@ #ifdef __KERNEL__ +#include <linux/bitops.h> #include <asm/bug.h> +#define DQL_HIST_LEN 4 +#define DQL_HIST_ENT(dql, idx) ((dql)->history[(idx) % DQL_HIST_LEN]) + struct dql { /* Fields accessed in enqueue path (dql_queued) */ unsigned int num_queued; /* Total ever queued */ unsigned int adj_limit; /* limit + num_completed */ unsigned int last_obj_cnt; /* Count at last queuing */ + unsigned long history_head; /* top 58 bits of jiffies */ + /* stall entries, a bit per entry */ + unsigned long history[DQL_HIST_LEN]; + /* Fields accessed only by completion path (dql_completed) */ unsigned int limit ____cacheline_aligned_in_smp; /* Current limit */ @@ -62,6 +70,13 @@ struct dql { unsigned int max_limit; /* Max limit */ unsigned int min_limit; /* Minimum limit */ unsigned int slack_hold_time; /* Time to measure slack */ + + /* Stall threshold (in jiffies), defined by user */ + unsigned short stall_thrs; + /* Longest stall detected, reported to user */ + unsigned short stall_max; + unsigned long last_reap; /* Last reap (in jiffies) */ + unsigned long stall_cnt; /* Number of stalls */ }; /* Set some static maximums */ @@ -74,6 +89,8 @@ struct dql { */ static inline void dql_queued(struct dql *dql, unsigned int count) { + unsigned long map, now, now_hi, i; + BUG_ON(count > DQL_MAX_OBJECT); dql->last_obj_cnt = count; @@ -86,6 +103,34 @@ static inline void dql_queued(struct dql *dql, unsigned int count) barrier(); dql->num_queued += count; + + now = jiffies; + now_hi = now / BITS_PER_LONG; + + /* The following code set a bit in the ring buffer, where each + * bit trackes time the packet was queued. The dql->history buffer + * tracks DQL_HIST_LEN * BITS_PER_LONG time (jiffies) slot + */ + if (unlikely(now_hi != dql->history_head)) { + /* About to reuse slots, clear them */ + for (i = 0; i < DQL_HIST_LEN; i++) { + /* Multiplication masks high bits */ + if (now_hi * BITS_PER_LONG == + (dql->history_head + i) * BITS_PER_LONG) + break; + DQL_HIST_ENT(dql, dql->history_head + i + 1) = 0; + } + /* pairs with smp_rmb() in dql_check_stall() */ + smp_wmb(); + WRITE_ONCE(dql->history_head, now_hi); + } + + /* __set_bit() does not guarantee WRITE_ONCE() semantics */ + map = DQL_HIST_ENT(dql, now_hi); + + /* Populate the history with an entry (bit) per queued */ + if (!(map & BIT_MASK(now))) + WRITE_ONCE(DQL_HIST_ENT(dql, now_hi), map | BIT_MASK(now)); } /* Returns how many objects can be queued, < 0 indicates over limit. */ diff --git a/include/linux/ethtool.h b/include/linux/ethtool.h index 325e0778e937..9901e563f706 100644 --- a/include/linux/ethtool.h +++ b/include/linux/ethtool.h @@ -222,6 +222,16 @@ extern int __ethtool_get_link_ksettings(struct net_device *dev, struct ethtool_link_ksettings *link_ksettings); +struct ethtool_keee { + __ETHTOOL_DECLARE_LINK_MODE_MASK(supported); + __ETHTOOL_DECLARE_LINK_MODE_MASK(advertised); + __ETHTOOL_DECLARE_LINK_MODE_MASK(lp_advertised); + u32 tx_lpi_timer; + bool tx_lpi_enabled; + bool eee_active; + bool eee_enabled; +}; + struct kernel_ethtool_coalesce { u8 use_cqe_mode_tx; u8 use_cqe_mode_rx; @@ -892,8 +902,8 @@ struct ethtool_ops { struct ethtool_modinfo *); int (*get_module_eeprom)(struct net_device *, struct ethtool_eeprom *, u8 *); - int (*get_eee)(struct net_device *, struct ethtool_eee *); - int (*set_eee)(struct net_device *, struct ethtool_eee *); + int (*get_eee)(struct net_device *dev, struct ethtool_keee *eee); + int (*set_eee)(struct net_device *dev, struct ethtool_keee *eee); int (*get_tunable)(struct net_device *, const struct ethtool_tunable *, void *); int (*set_tunable)(struct net_device *, diff --git a/include/linux/evm.h b/include/linux/evm.h index 36ec884320d9..d48d6da32315 100644 --- a/include/linux/evm.h +++ b/include/linux/evm.h @@ -12,52 +12,12 @@ #include <linux/integrity.h> #include <linux/xattr.h> -struct integrity_iint_cache; - #ifdef CONFIG_EVM extern int evm_set_key(void *key, size_t keylen); extern enum integrity_status evm_verifyxattr(struct dentry *dentry, const char *xattr_name, void *xattr_value, - size_t xattr_value_len, - struct integrity_iint_cache *iint); -extern int evm_inode_setattr(struct mnt_idmap *idmap, - struct dentry *dentry, struct iattr *attr); -extern void evm_inode_post_setattr(struct dentry *dentry, int ia_valid); -extern int evm_inode_setxattr(struct mnt_idmap *idmap, - struct dentry *dentry, const char *name, - const void *value, size_t size); -extern void evm_inode_post_setxattr(struct dentry *dentry, - const char *xattr_name, - const void *xattr_value, - size_t xattr_value_len); -extern int evm_inode_copy_up_xattr(const char *name); -extern int evm_inode_removexattr(struct mnt_idmap *idmap, - struct dentry *dentry, const char *xattr_name); -extern void evm_inode_post_removexattr(struct dentry *dentry, - const char *xattr_name); -static inline void evm_inode_post_remove_acl(struct mnt_idmap *idmap, - struct dentry *dentry, - const char *acl_name) -{ - evm_inode_post_removexattr(dentry, acl_name); -} -extern int evm_inode_set_acl(struct mnt_idmap *idmap, - struct dentry *dentry, const char *acl_name, - struct posix_acl *kacl); -static inline int evm_inode_remove_acl(struct mnt_idmap *idmap, - struct dentry *dentry, - const char *acl_name) -{ - return evm_inode_set_acl(idmap, dentry, acl_name, NULL); -} -static inline void evm_inode_post_set_acl(struct dentry *dentry, - const char *acl_name, - struct posix_acl *kacl) -{ - return evm_inode_post_setxattr(dentry, acl_name, NULL, 0); -} - + size_t xattr_value_len); int evm_inode_init_security(struct inode *inode, struct inode *dir, const struct qstr *qstr, struct xattr *xattrs, int *xattr_count); @@ -85,85 +45,12 @@ static inline int evm_set_key(void *key, size_t keylen) static inline enum integrity_status evm_verifyxattr(struct dentry *dentry, const char *xattr_name, void *xattr_value, - size_t xattr_value_len, - struct integrity_iint_cache *iint) + size_t xattr_value_len) { return INTEGRITY_UNKNOWN; } #endif -static inline int evm_inode_setattr(struct mnt_idmap *idmap, - struct dentry *dentry, struct iattr *attr) -{ - return 0; -} - -static inline void evm_inode_post_setattr(struct dentry *dentry, int ia_valid) -{ - return; -} - -static inline int evm_inode_setxattr(struct mnt_idmap *idmap, - struct dentry *dentry, const char *name, - const void *value, size_t size) -{ - return 0; -} - -static inline void evm_inode_post_setxattr(struct dentry *dentry, - const char *xattr_name, - const void *xattr_value, - size_t xattr_value_len) -{ - return; -} - -static inline int evm_inode_copy_up_xattr(const char *name) -{ - return 0; -} - -static inline int evm_inode_removexattr(struct mnt_idmap *idmap, - struct dentry *dentry, - const char *xattr_name) -{ - return 0; -} - -static inline void evm_inode_post_removexattr(struct dentry *dentry, - const char *xattr_name) -{ - return; -} - -static inline void evm_inode_post_remove_acl(struct mnt_idmap *idmap, - struct dentry *dentry, - const char *acl_name) -{ - return; -} - -static inline int evm_inode_set_acl(struct mnt_idmap *idmap, - struct dentry *dentry, const char *acl_name, - struct posix_acl *kacl) -{ - return 0; -} - -static inline int evm_inode_remove_acl(struct mnt_idmap *idmap, - struct dentry *dentry, - const char *acl_name) -{ - return 0; -} - -static inline void evm_inode_post_set_acl(struct dentry *dentry, - const char *acl_name, - struct posix_acl *kacl) -{ - return; -} - static inline int evm_inode_init_security(struct inode *inode, struct inode *dir, const struct qstr *qstr, struct xattr *xattrs, diff --git a/include/linux/file.h b/include/linux/file.h index 6834a29338c4..169692cb1906 100644 --- a/include/linux/file.h +++ b/include/linux/file.h @@ -24,6 +24,8 @@ struct inode; struct path; extern struct file *alloc_file_pseudo(struct inode *, struct vfsmount *, const char *, int flags, const struct file_operations *); +extern struct file *alloc_file_pseudo_noaccount(struct inode *, struct vfsmount *, + const char *, int flags, const struct file_operations *); extern struct file *alloc_file_clone(struct file *, int flags, const struct file_operations *); diff --git a/include/linux/filelock.h b/include/linux/filelock.h index 95e868e09e29..daee999d05f3 100644 --- a/include/linux/filelock.h +++ b/include/linux/filelock.h @@ -27,6 +27,7 @@ #define FILE_LOCK_DEFERRED 1 struct file_lock; +struct file_lease; struct file_lock_operations { void (*fl_copy_lock)(struct file_lock *, struct file_lock *); @@ -39,14 +40,17 @@ struct lock_manager_operations { void (*lm_put_owner)(fl_owner_t); void (*lm_notify)(struct file_lock *); /* unblock callback */ int (*lm_grant)(struct file_lock *, int); - bool (*lm_break)(struct file_lock *); - int (*lm_change)(struct file_lock *, int, struct list_head *); - void (*lm_setup)(struct file_lock *, void **); - bool (*lm_breaker_owns_lease)(struct file_lock *); bool (*lm_lock_expirable)(struct file_lock *cfl); void (*lm_expire_lock)(void); }; +struct lease_manager_operations { + bool (*lm_break)(struct file_lease *); + int (*lm_change)(struct file_lease *, int, struct list_head *); + void (*lm_setup)(struct file_lease *, void **); + bool (*lm_breaker_owns_lease)(struct file_lease *); +}; + struct lock_manager { struct list_head list; /* @@ -85,31 +89,31 @@ bool opens_in_grace(struct net *); * * Obviously, the last two criteria only matter for POSIX locks. */ -struct file_lock { - struct file_lock *fl_blocker; /* The lock, that is blocking us */ - struct list_head fl_list; /* link into file_lock_context */ - struct hlist_node fl_link; /* node in global lists */ - struct list_head fl_blocked_requests; /* list of requests with + +struct file_lock_core { + struct file_lock_core *flc_blocker; /* The lock that is blocking us */ + struct list_head flc_list; /* link into file_lock_context */ + struct hlist_node flc_link; /* node in global lists */ + struct list_head flc_blocked_requests; /* list of requests with * ->fl_blocker pointing here */ - struct list_head fl_blocked_member; /* node in + struct list_head flc_blocked_member; /* node in * ->fl_blocker->fl_blocked_requests */ - fl_owner_t fl_owner; - unsigned int fl_flags; - unsigned char fl_type; - unsigned int fl_pid; - int fl_link_cpu; /* what cpu's list is this on? */ - wait_queue_head_t fl_wait; - struct file *fl_file; + fl_owner_t flc_owner; + unsigned int flc_flags; + unsigned char flc_type; + pid_t flc_pid; + int flc_link_cpu; /* what cpu's list is this on? */ + wait_queue_head_t flc_wait; + struct file *flc_file; +}; + +struct file_lock { + struct file_lock_core c; loff_t fl_start; loff_t fl_end; - struct fasync_struct * fl_fasync; /* for lease break notifications */ - /* for lease breaks: */ - unsigned long fl_break_time; - unsigned long fl_downgrade_time; - const struct file_lock_operations *fl_ops; /* Callbacks for filesystems */ const struct lock_manager_operations *fl_lmops; /* Callbacks for lockmanagers */ union { @@ -126,6 +130,15 @@ struct file_lock { } fl_u; } __randomize_layout; +struct file_lease { + struct file_lock_core c; + struct fasync_struct * fl_fasync; /* for lease break notifications */ + /* for lease breaks: */ + unsigned long fl_break_time; + unsigned long fl_downgrade_time; + const struct lease_manager_operations *fl_lmops; /* Callbacks for lease managers */ +} __randomize_layout; + struct file_lock_context { spinlock_t flc_lock; struct list_head flc_flock; @@ -147,11 +160,31 @@ int fcntl_setlk64(unsigned int, struct file *, unsigned int, int fcntl_setlease(unsigned int fd, struct file *filp, int arg); int fcntl_getlease(struct file *filp); +static inline bool lock_is_unlock(struct file_lock *fl) +{ + return fl->c.flc_type == F_UNLCK; +} + +static inline bool lock_is_read(struct file_lock *fl) +{ + return fl->c.flc_type == F_RDLCK; +} + +static inline bool lock_is_write(struct file_lock *fl) +{ + return fl->c.flc_type == F_WRLCK; +} + +static inline void locks_wake_up(struct file_lock *fl) +{ + wake_up(&fl->c.flc_wait); +} + /* fs/locks.c */ void locks_free_lock_context(struct inode *inode); void locks_free_lock(struct file_lock *fl); void locks_init_lock(struct file_lock *); -struct file_lock * locks_alloc_lock(void); +struct file_lock *locks_alloc_lock(void); void locks_copy_lock(struct file_lock *, struct file_lock *); void locks_copy_conflock(struct file_lock *, struct file_lock *); void locks_remove_posix(struct file *, fl_owner_t); @@ -165,11 +198,16 @@ int vfs_lock_file(struct file *, unsigned int, struct file_lock *, struct file_l int vfs_cancel_lock(struct file *filp, struct file_lock *fl); bool vfs_inode_has_locks(struct inode *inode); int locks_lock_inode_wait(struct inode *inode, struct file_lock *fl); + +void locks_init_lease(struct file_lease *); +void locks_free_lease(struct file_lease *fl); +struct file_lease *locks_alloc_lease(void); int __break_lease(struct inode *inode, unsigned int flags, unsigned int type); void lease_get_mtime(struct inode *, struct timespec64 *time); -int generic_setlease(struct file *, int, struct file_lock **, void **priv); -int vfs_setlease(struct file *, int, struct file_lock **, void **); -int lease_modify(struct file_lock *, int, struct list_head *); +int generic_setlease(struct file *, int, struct file_lease **, void **priv); +int kernel_setlease(struct file *, int, struct file_lease **, void **); +int vfs_setlease(struct file *, int, struct file_lease **, void **); +int lease_modify(struct file_lease *, int, struct list_head *); struct notifier_block; int lease_register_notifier(struct notifier_block *); @@ -223,6 +261,25 @@ static inline int fcntl_getlease(struct file *filp) return F_UNLCK; } +static inline bool lock_is_unlock(struct file_lock *fl) +{ + return false; +} + +static inline bool lock_is_read(struct file_lock *fl) +{ + return false; +} + +static inline bool lock_is_write(struct file_lock *fl) +{ + return false; +} + +static inline void locks_wake_up(struct file_lock *fl) +{ +} + static inline void locks_free_lock_context(struct inode *inode) { @@ -233,6 +290,11 @@ static inline void locks_init_lock(struct file_lock *fl) return; } +static inline void locks_init_lease(struct file_lease *fl) +{ + return; +} + static inline void locks_copy_conflock(struct file_lock *new, struct file_lock *fl) { return; @@ -307,18 +369,24 @@ static inline void lease_get_mtime(struct inode *inode, } static inline int generic_setlease(struct file *filp, int arg, - struct file_lock **flp, void **priv) + struct file_lease **flp, void **priv) +{ + return -EINVAL; +} + +static inline int kernel_setlease(struct file *filp, int arg, + struct file_lease **lease, void **priv) { return -EINVAL; } static inline int vfs_setlease(struct file *filp, int arg, - struct file_lock **lease, void **priv) + struct file_lease **lease, void **priv) { return -EINVAL; } -static inline int lease_modify(struct file_lock *fl, int arg, +static inline int lease_modify(struct file_lease *fl, int arg, struct list_head *dispose) { return -EINVAL; @@ -341,6 +409,9 @@ locks_inode_context(const struct inode *inode) #endif /* !CONFIG_FILE_LOCKING */ +/* for walking lists of file_locks linked by fl_list */ +#define for_each_file_lock(_fl, _head) list_for_each_entry(_fl, _head, c.flc_list) + static inline int locks_lock_file_wait(struct file *filp, struct file_lock *fl) { return locks_lock_inode_wait(file_inode(filp), fl); diff --git a/include/linux/filter.h b/include/linux/filter.h index 68fb6c8142fe..c99bc3df2d28 100644 --- a/include/linux/filter.h +++ b/include/linux/filter.h @@ -72,6 +72,9 @@ struct ctl_table_header; /* unused opcode to mark special ldsx instruction. Same as BPF_IND */ #define BPF_PROBE_MEMSX 0x40 +/* unused opcode to mark special load instruction. Same as BPF_MSH */ +#define BPF_PROBE_MEM32 0xa0 + /* unused opcode to mark call to interpreter with arguments */ #define BPF_CALL_ARGS 0xe0 @@ -547,24 +550,27 @@ static inline bool insn_is_zext(const struct bpf_insn *insn) __BPF_MAP(n, __BPF_DECL_ARGS, __BPF_N, u64, __ur_1, u64, __ur_2, \ u64, __ur_3, u64, __ur_4, u64, __ur_5) -#define BPF_CALL_x(x, name, ...) \ +#define BPF_CALL_x(x, attr, name, ...) \ static __always_inline \ u64 ____##name(__BPF_MAP(x, __BPF_DECL_ARGS, __BPF_V, __VA_ARGS__)); \ typedef u64 (*btf_##name)(__BPF_MAP(x, __BPF_DECL_ARGS, __BPF_V, __VA_ARGS__)); \ - u64 name(__BPF_REG(x, __BPF_DECL_REGS, __BPF_N, __VA_ARGS__)); \ - u64 name(__BPF_REG(x, __BPF_DECL_REGS, __BPF_N, __VA_ARGS__)) \ + attr u64 name(__BPF_REG(x, __BPF_DECL_REGS, __BPF_N, __VA_ARGS__)); \ + attr u64 name(__BPF_REG(x, __BPF_DECL_REGS, __BPF_N, __VA_ARGS__)) \ { \ return ((btf_##name)____##name)(__BPF_MAP(x,__BPF_CAST,__BPF_N,__VA_ARGS__));\ } \ static __always_inline \ u64 ____##name(__BPF_MAP(x, __BPF_DECL_ARGS, __BPF_V, __VA_ARGS__)) -#define BPF_CALL_0(name, ...) BPF_CALL_x(0, name, __VA_ARGS__) -#define BPF_CALL_1(name, ...) BPF_CALL_x(1, name, __VA_ARGS__) -#define BPF_CALL_2(name, ...) BPF_CALL_x(2, name, __VA_ARGS__) -#define BPF_CALL_3(name, ...) BPF_CALL_x(3, name, __VA_ARGS__) -#define BPF_CALL_4(name, ...) BPF_CALL_x(4, name, __VA_ARGS__) -#define BPF_CALL_5(name, ...) BPF_CALL_x(5, name, __VA_ARGS__) +#define __NOATTR +#define BPF_CALL_0(name, ...) BPF_CALL_x(0, __NOATTR, name, __VA_ARGS__) +#define BPF_CALL_1(name, ...) BPF_CALL_x(1, __NOATTR, name, __VA_ARGS__) +#define BPF_CALL_2(name, ...) BPF_CALL_x(2, __NOATTR, name, __VA_ARGS__) +#define BPF_CALL_3(name, ...) BPF_CALL_x(3, __NOATTR, name, __VA_ARGS__) +#define BPF_CALL_4(name, ...) BPF_CALL_x(4, __NOATTR, name, __VA_ARGS__) +#define BPF_CALL_5(name, ...) BPF_CALL_x(5, __NOATTR, name, __VA_ARGS__) + +#define NOTRACE_BPF_CALL_1(name, ...) BPF_CALL_x(1, notrace, name, __VA_ARGS__) #define bpf_ctx_range(TYPE, MEMBER) \ offsetof(TYPE, MEMBER) ... offsetofend(TYPE, MEMBER) - 1 @@ -955,6 +961,8 @@ bool bpf_jit_supports_subprog_tailcalls(void); bool bpf_jit_supports_kfunc_call(void); bool bpf_jit_supports_far_kfunc_call(void); bool bpf_jit_supports_exceptions(void); +bool bpf_jit_supports_ptr_xchg(void); +bool bpf_jit_supports_arena(void); void arch_bpf_stack_walk(bool (*consume_fn)(void *cookie, u64 ip, u64 sp, u64 bp), void *cookie); bool bpf_helper_changes_pkt_data(void *func); @@ -1139,7 +1147,7 @@ static inline bool bpf_jit_blinding_enabled(struct bpf_prog *prog) return false; if (!bpf_jit_harden) return false; - if (bpf_jit_harden == 1 && bpf_capable()) + if (bpf_jit_harden == 1 && bpf_token_capable(prog->aux->token, CAP_BPF)) return false; return true; diff --git a/include/linux/fortify-string.h b/include/linux/fortify-string.h index 89a6888f2f9e..6aeebe0a6777 100644 --- a/include/linux/fortify-string.h +++ b/include/linux/fortify-string.h @@ -2,6 +2,7 @@ #ifndef _LINUX_FORTIFY_STRING_H_ #define _LINUX_FORTIFY_STRING_H_ +#include <linux/bitfield.h> #include <linux/bug.h> #include <linux/const.h> #include <linux/limits.h> @@ -9,7 +10,46 @@ #define __FORTIFY_INLINE extern __always_inline __gnu_inline __overloadable #define __RENAME(x) __asm__(#x) -void fortify_panic(const char *name) __noreturn __cold; +#define FORTIFY_REASON_DIR(r) FIELD_GET(BIT(0), r) +#define FORTIFY_REASON_FUNC(r) FIELD_GET(GENMASK(7, 1), r) +#define FORTIFY_REASON(func, write) (FIELD_PREP(BIT(0), write) | \ + FIELD_PREP(GENMASK(7, 1), func)) + +#ifndef fortify_panic +# define fortify_panic(func, write, avail, size, retfail) \ + __fortify_panic(FORTIFY_REASON(func, write), avail, size) +#endif + +#define FORTIFY_READ 0 +#define FORTIFY_WRITE 1 + +#define EACH_FORTIFY_FUNC(macro) \ + macro(strncpy), \ + macro(strnlen), \ + macro(strlen), \ + macro(strscpy), \ + macro(strlcat), \ + macro(strcat), \ + macro(strncat), \ + macro(memset), \ + macro(memcpy), \ + macro(memmove), \ + macro(memscan), \ + macro(memcmp), \ + macro(memchr), \ + macro(memchr_inv), \ + macro(kmemdup), \ + macro(strcpy), \ + macro(UNKNOWN), + +#define MAKE_FORTIFY_FUNC(func) FORTIFY_FUNC_##func + +enum fortify_func { + EACH_FORTIFY_FUNC(MAKE_FORTIFY_FUNC) +}; + +void __fortify_report(const u8 reason, const size_t avail, const size_t size); +void __fortify_panic(const u8 reason, const size_t avail, const size_t size) __cold __noreturn; void __read_overflow(void) __compiletime_error("detected read beyond size of object (1st parameter)"); void __read_overflow2(void) __compiletime_error("detected read beyond size of object (2nd parameter)"); void __read_overflow2_field(size_t avail, size_t wanted) __compiletime_warning("detected read beyond size of field (2nd parameter); maybe use struct_group()?"); @@ -143,7 +183,7 @@ char *strncpy(char * const POS p, const char *q, __kernel_size_t size) if (__compiletime_lessthan(p_size, size)) __write_overflow(); if (p_size < size) - fortify_panic(__func__); + fortify_panic(FORTIFY_FUNC_strncpy, FORTIFY_WRITE, p_size, size, p); return __underlying_strncpy(p, q, size); } @@ -174,7 +214,7 @@ __FORTIFY_INLINE __kernel_size_t strnlen(const char * const POS p, __kernel_size /* Do not check characters beyond the end of p. */ ret = __real_strnlen(p, maxlen < p_size ? maxlen : p_size); if (p_size <= ret && maxlen != ret) - fortify_panic(__func__); + fortify_panic(FORTIFY_FUNC_strnlen, FORTIFY_READ, p_size, ret + 1, ret); return ret; } @@ -210,31 +250,13 @@ __kernel_size_t __fortify_strlen(const char * const POS p) return __underlying_strlen(p); ret = strnlen(p, p_size); if (p_size <= ret) - fortify_panic(__func__); + fortify_panic(FORTIFY_FUNC_strlen, FORTIFY_READ, p_size, ret + 1, ret); return ret; } /* Defined after fortified strnlen() to reuse it. */ -extern ssize_t __real_strscpy(char *, const char *, size_t) __RENAME(strscpy); -/** - * strscpy - Copy a C-string into a sized buffer - * - * @p: Where to copy the string to - * @q: Where to copy the string from - * @size: Size of destination buffer - * - * Copy the source string @q, or as much of it as fits, into the destination - * @p buffer. The behavior is undefined if the string buffers overlap. The - * destination @p buffer is always NUL terminated, unless it's zero-sized. - * - * Preferred to strncpy() since it always returns a valid string, and - * doesn't unnecessarily force the tail of the destination buffer to be - * zero padded. If padding is desired please use strscpy_pad(). - * - * Returns the number of characters copied in @p (not including the - * trailing %NUL) or -E2BIG if @size is 0 or the copy of @q was truncated. - */ -__FORTIFY_INLINE ssize_t strscpy(char * const POS p, const char * const POS q, size_t size) +extern ssize_t __real_strscpy(char *, const char *, size_t) __RENAME(sized_strscpy); +__FORTIFY_INLINE ssize_t sized_strscpy(char * const POS p, const char * const POS q, size_t size) { /* Use string size rather than possible enclosing struct size. */ const size_t p_size = __member_size(p); @@ -278,8 +300,8 @@ __FORTIFY_INLINE ssize_t strscpy(char * const POS p, const char * const POS q, s * Generate a runtime write overflow error if len is greater than * p_size. */ - if (len > p_size) - fortify_panic(__func__); + if (p_size < len) + fortify_panic(FORTIFY_FUNC_strscpy, FORTIFY_WRITE, p_size, len, -E2BIG); /* * We can now safely call vanilla strscpy because we are protected from: @@ -337,7 +359,7 @@ size_t strlcat(char * const POS p, const char * const POS q, size_t avail) /* Give up if string is already overflowed. */ if (p_size <= p_len) - fortify_panic(__func__); + fortify_panic(FORTIFY_FUNC_strlcat, FORTIFY_READ, p_size, p_len + 1, wanted); if (actual >= avail) { copy_len = avail - p_len - 1; @@ -346,7 +368,7 @@ size_t strlcat(char * const POS p, const char * const POS q, size_t avail) /* Give up if copy will overflow. */ if (p_size <= actual) - fortify_panic(__func__); + fortify_panic(FORTIFY_FUNC_strlcat, FORTIFY_WRITE, p_size, actual + 1, wanted); __underlying_memcpy(p + p_len, q, copy_len); p[actual] = '\0'; @@ -373,9 +395,10 @@ __FORTIFY_INLINE __diagnose_as(__builtin_strcat, 1, 2) char *strcat(char * const POS p, const char *q) { const size_t p_size = __member_size(p); + const size_t wanted = strlcat(p, q, p_size); - if (strlcat(p, q, p_size) >= p_size) - fortify_panic(__func__); + if (p_size <= wanted) + fortify_panic(FORTIFY_FUNC_strcat, FORTIFY_WRITE, p_size, wanted + 1, p); return p; } @@ -404,20 +427,21 @@ char *strncat(char * const POS p, const char * const POS q, __kernel_size_t coun { const size_t p_size = __member_size(p); const size_t q_size = __member_size(q); - size_t p_len, copy_len; + size_t p_len, copy_len, total; if (p_size == SIZE_MAX && q_size == SIZE_MAX) return __underlying_strncat(p, q, count); p_len = strlen(p); copy_len = strnlen(q, count); - if (p_size < p_len + copy_len + 1) - fortify_panic(__func__); + total = p_len + copy_len + 1; + if (p_size < total) + fortify_panic(FORTIFY_FUNC_strncat, FORTIFY_WRITE, p_size, total, p); __underlying_memcpy(p + p_len, q, copy_len); p[p_len + copy_len] = '\0'; return p; } -__FORTIFY_INLINE void fortify_memset_chk(__kernel_size_t size, +__FORTIFY_INLINE bool fortify_memset_chk(__kernel_size_t size, const size_t p_size, const size_t p_size_field) { @@ -452,7 +476,8 @@ __FORTIFY_INLINE void fortify_memset_chk(__kernel_size_t size, * lengths are unknown.) */ if (p_size != SIZE_MAX && p_size < size) - fortify_panic("memset"); + fortify_panic(FORTIFY_FUNC_memset, FORTIFY_WRITE, p_size, size, true); + return false; } #define __fortify_memset_chk(p, c, size, p_size, p_size_field) ({ \ @@ -506,7 +531,7 @@ __FORTIFY_INLINE bool fortify_memcpy_chk(__kernel_size_t size, const size_t q_size, const size_t p_size_field, const size_t q_size_field, - const char *func) + const u8 func) { if (__builtin_constant_p(size)) { /* @@ -550,9 +575,10 @@ __FORTIFY_INLINE bool fortify_memcpy_chk(__kernel_size_t size, * (The SIZE_MAX test is to optimize away checks where the buffer * lengths are unknown.) */ - if ((p_size != SIZE_MAX && p_size < size) || - (q_size != SIZE_MAX && q_size < size)) - fortify_panic(func); + if (p_size != SIZE_MAX && p_size < size) + fortify_panic(func, FORTIFY_WRITE, p_size, size, true); + else if (q_size != SIZE_MAX && q_size < size) + fortify_panic(func, FORTIFY_READ, p_size, size, true); /* * Warn when writing beyond destination field size. @@ -585,7 +611,7 @@ __FORTIFY_INLINE bool fortify_memcpy_chk(__kernel_size_t size, const size_t __q_size_field = (q_size_field); \ WARN_ONCE(fortify_memcpy_chk(__fortify_size, __p_size, \ __q_size, __p_size_field, \ - __q_size_field, #op), \ + __q_size_field, FORTIFY_FUNC_ ##op), \ #op ": detected field-spanning write (size %zu) of single %s (size %zu)\n", \ __fortify_size, \ "field \"" #p "\" at " FILE_LINE, \ @@ -652,7 +678,7 @@ __FORTIFY_INLINE void *memscan(void * const POS0 p, int c, __kernel_size_t size) if (__compiletime_lessthan(p_size, size)) __read_overflow(); if (p_size < size) - fortify_panic(__func__); + fortify_panic(FORTIFY_FUNC_memscan, FORTIFY_READ, p_size, size, NULL); return __real_memscan(p, c, size); } @@ -668,8 +694,10 @@ int memcmp(const void * const POS0 p, const void * const POS0 q, __kernel_size_t if (__compiletime_lessthan(q_size, size)) __read_overflow2(); } - if (p_size < size || q_size < size) - fortify_panic(__func__); + if (p_size < size) + fortify_panic(FORTIFY_FUNC_memcmp, FORTIFY_READ, p_size, size, INT_MIN); + else if (q_size < size) + fortify_panic(FORTIFY_FUNC_memcmp, FORTIFY_READ, q_size, size, INT_MIN); return __underlying_memcmp(p, q, size); } @@ -681,7 +709,7 @@ void *memchr(const void * const POS0 p, int c, __kernel_size_t size) if (__compiletime_lessthan(p_size, size)) __read_overflow(); if (p_size < size) - fortify_panic(__func__); + fortify_panic(FORTIFY_FUNC_memchr, FORTIFY_READ, p_size, size, NULL); return __underlying_memchr(p, c, size); } @@ -693,7 +721,7 @@ __FORTIFY_INLINE void *memchr_inv(const void * const POS0 p, int c, size_t size) if (__compiletime_lessthan(p_size, size)) __read_overflow(); if (p_size < size) - fortify_panic(__func__); + fortify_panic(FORTIFY_FUNC_memchr_inv, FORTIFY_READ, p_size, size, NULL); return __real_memchr_inv(p, c, size); } @@ -706,7 +734,7 @@ __FORTIFY_INLINE void *kmemdup(const void * const POS0 p, size_t size, gfp_t gfp if (__compiletime_lessthan(p_size, size)) __read_overflow(); if (p_size < size) - fortify_panic(__func__); + fortify_panic(FORTIFY_FUNC_kmemdup, FORTIFY_READ, p_size, size, NULL); return __real_kmemdup(p, size, gfp); } @@ -743,7 +771,7 @@ char *strcpy(char * const POS p, const char * const POS q) __write_overflow(); /* Run-time check for dynamic size overflow. */ if (p_size < size) - fortify_panic(__func__); + fortify_panic(FORTIFY_FUNC_strcpy, FORTIFY_WRITE, p_size, size, p); __underlying_memcpy(p, q, size); return p; } diff --git a/include/linux/framer/framer-provider.h b/include/linux/framer/framer-provider.h index 782cd5fc83d5..9724d4b44b9c 100644 --- a/include/linux/framer/framer-provider.h +++ b/include/linux/framer/framer-provider.h @@ -83,7 +83,6 @@ struct framer_ops { /** * struct framer_provider - represents the framer provider * @dev: framer provider device - * @children: can be used to override the default (dev->of_node) child node * @owner: the module owner having of_xlate * @list: to maintain a linked list of framer providers * @of_xlate: function pointer to obtain framer instance from framer pointer @@ -93,7 +92,7 @@ struct framer_provider { struct module *owner; struct list_head list; struct framer * (*of_xlate)(struct device *dev, - struct of_phandle_args *args); + const struct of_phandle_args *args); }; static inline void framer_set_drvdata(struct framer *framer, void *data) @@ -118,19 +117,19 @@ struct framer *devm_framer_create(struct device *dev, struct device_node *node, const struct framer_ops *ops); struct framer *framer_provider_simple_of_xlate(struct device *dev, - struct of_phandle_args *args); + const struct of_phandle_args *args); struct framer_provider * __framer_provider_of_register(struct device *dev, struct module *owner, struct framer *(*of_xlate)(struct device *dev, - struct of_phandle_args *args)); + const struct of_phandle_args *args)); void framer_provider_of_unregister(struct framer_provider *framer_provider); struct framer_provider * __devm_framer_provider_of_register(struct device *dev, struct module *owner, struct framer *(*of_xlate)(struct device *dev, - struct of_phandle_args *args)); + const struct of_phandle_args *args)); void framer_notify_status_change(struct framer *framer); @@ -154,7 +153,7 @@ static inline struct framer *devm_framer_create(struct device *dev, struct devic } static inline struct framer *framer_provider_simple_of_xlate(struct device *dev, - struct of_phandle_args *args) + const struct of_phandle_args *args) { return ERR_PTR(-ENOSYS); } @@ -162,7 +161,7 @@ static inline struct framer *framer_provider_simple_of_xlate(struct device *dev, static inline struct framer_provider * __framer_provider_of_register(struct device *dev, struct module *owner, struct framer *(*of_xlate)(struct device *dev, - struct of_phandle_args *args)) + const struct of_phandle_args *args)) { return ERR_PTR(-ENOSYS); } @@ -174,7 +173,7 @@ void framer_provider_of_unregister(struct framer_provider *framer_provider) static inline struct framer_provider * __devm_framer_provider_of_register(struct device *dev, struct module *owner, struct framer *(*of_xlate)(struct device *dev, - struct of_phandle_args *args)) + const struct of_phandle_args *args)) { return ERR_PTR(-ENOSYS); } diff --git a/include/linux/fs.h b/include/linux/fs.h index ed5966a70495..0a22b7245982 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -43,6 +43,8 @@ #include <linux/cred.h> #include <linux/mnt_idmapping.h> #include <linux/slab.h> +#include <linux/maple_tree.h> +#include <linux/rw_hint.h> #include <asm/byteorder.h> #include <uapi/linux/fs.h> @@ -309,19 +311,6 @@ struct address_space; struct writeback_control; struct readahead_control; -/* - * Write life time hint values. - * Stored in struct inode as u8. - */ -enum rw_hint { - WRITE_LIFE_NOT_SET = 0, - WRITE_LIFE_NONE = RWH_WRITE_LIFE_NONE, - WRITE_LIFE_SHORT = RWH_WRITE_LIFE_SHORT, - WRITE_LIFE_MEDIUM = RWH_WRITE_LIFE_MEDIUM, - WRITE_LIFE_LONG = RWH_WRITE_LIFE_LONG, - WRITE_LIFE_EXTREME = RWH_WRITE_LIFE_EXTREME, -}; - /* Match RWF_* bits to IOCB bits */ #define IOCB_HIPRI (__force int) RWF_HIPRI #define IOCB_DSYNC (__force int) RWF_DSYNC @@ -352,6 +341,8 @@ enum rw_hint { * unrelated IO (like cache flushing, new IO generation, etc). */ #define IOCB_DIO_CALLER_COMP (1 << 22) +/* kiocb is a read or write operation submitted by fs/aio.c. */ +#define IOCB_AIO_RW (1 << 23) /* for use in trace events */ #define TRACE_IOCB_STRINGS \ @@ -482,10 +473,10 @@ struct address_space { pgoff_t writeback_index; const struct address_space_operations *a_ops; unsigned long flags; - struct rw_semaphore i_mmap_rwsem; errseq_t wb_err; spinlock_t i_private_lock; struct list_head i_private_list; + struct rw_semaphore i_mmap_rwsem; void * i_private_data; } __attribute__((aligned(sizeof(long)))) __randomize_layout; /* @@ -677,7 +668,7 @@ struct inode { spinlock_t i_lock; /* i_blocks, i_bytes, maybe i_size */ unsigned short i_bytes; u8 i_blkbits; - u8 i_write_hint; + enum rw_hint i_write_hint; blkcnt_t i_blocks; #ifdef __NEED_I_SIZE_ORDERED @@ -907,7 +898,8 @@ static inline loff_t i_size_read(const struct inode *inode) preempt_enable(); return i_size; #else - return inode->i_size; + /* Pairs with smp_store_release() in i_size_write() */ + return smp_load_acquire(&inode->i_size); #endif } @@ -929,7 +921,12 @@ static inline void i_size_write(struct inode *inode, loff_t i_size) inode->i_size = i_size; preempt_enable(); #else - inode->i_size = i_size; + /* + * Pairs with smp_load_acquire() in i_size_read() to ensure + * changes related to inode size (such as page contents) are + * visible before we see the changed inode size. + */ + smp_store_release(&inode->i_size, i_size); #endif } @@ -1064,6 +1061,7 @@ struct file *get_file_active(struct file **f); typedef void *fl_owner_t; struct file_lock; +struct file_lease; /* The following constant reflects the upper bound of the file/locking space */ #ifndef OFFSET_MAX @@ -1078,9 +1076,20 @@ static inline struct inode *file_inode(const struct file *f) return f->f_inode; } +/* + * file_dentry() is a relic from the days that overlayfs was using files with a + * "fake" path, meaning, f_path on overlayfs and f_inode on underlying fs. + * In those days, file_dentry() was needed to get the underlying fs dentry that + * matches f_inode. + * Files with "fake" path should not exist nowadays, so use an assertion to make + * sure that file_dentry() was not papering over filesystem bugs. + */ static inline struct dentry *file_dentry(const struct file *file) { - return d_real(file->f_path.dentry, file_inode(file)); + struct dentry *dentry = file->f_path.dentry; + + WARN_ON_ONCE(d_inode(dentry) != file_inode(file)); + return dentry; } struct fasync_struct { @@ -1228,8 +1237,8 @@ struct super_block { #endif struct hlist_bl_head s_roots; /* alternate root dentries for NFS */ struct list_head s_mounts; /* list of mounts; _not_ for fs use */ - struct block_device *s_bdev; - struct bdev_handle *s_bdev_handle; + struct block_device *s_bdev; /* can go away once we use an accessor for @s_bdev_file */ + struct file *s_bdev_file; struct backing_dev_info *s_bdi; struct mtd_info *s_mtd; struct hlist_node s_instances; @@ -1255,8 +1264,22 @@ struct super_block { struct fsnotify_mark_connector __rcu *s_fsnotify_marks; #endif + /* + * q: why are s_id and s_sysfs_name not the same? both are human + * readable strings that identify the filesystem + * a: s_id is allowed to change at runtime; it's used in log messages, + * and we want to when a device starts out as single device (s_id is dev + * name) but then a device is hot added and we have to switch to + * identifying it by UUID + * but s_sysfs_name is a handle for programmatic access, and can't + * change at runtime + */ char s_id[32]; /* Informational name */ uuid_t s_uuid; /* UUID */ + u8 s_uuid_len; /* Default 16, possibly smaller for weird filesystems */ + + /* if set, fs shows up under sysfs at /sys/fs/$FSTYP/s_sysfs_name */ + char s_sysfs_name[UUID_STRING_LEN + 1]; unsigned int s_max_links; @@ -2005,7 +2028,7 @@ struct file_operations { ssize_t (*splice_write)(struct pipe_inode_info *, struct file *, loff_t *, size_t, unsigned int); ssize_t (*splice_read)(struct file *, loff_t *, struct pipe_inode_info *, size_t, unsigned int); void (*splice_eof)(struct file *file); - int (*setlease)(struct file *, int, struct file_lock **, void **); + int (*setlease)(struct file *, int, struct file_lease **, void **); long (*fallocate)(struct file *file, int mode, loff_t offset, loff_t len); void (*show_fdinfo)(struct seq_file *m, struct file *f); @@ -2101,9 +2124,6 @@ int __generic_remap_file_range_prep(struct file *file_in, loff_t pos_in, int generic_remap_file_range_prep(struct file *file_in, loff_t pos_in, struct file *file_out, loff_t pos_out, loff_t *count, unsigned int remap_flags); -extern loff_t do_clone_file_range(struct file *file_in, loff_t pos_in, - struct file *file_out, loff_t pos_out, - loff_t len, unsigned int remap_flags); extern loff_t vfs_clone_file_range(struct file *file_in, loff_t pos_in, struct file *file_out, loff_t pos_out, loff_t len, unsigned int remap_flags); @@ -2532,6 +2552,44 @@ extern __printf(2, 3) int super_setup_bdi_name(struct super_block *sb, char *fmt, ...); extern int super_setup_bdi(struct super_block *sb); +static inline void super_set_uuid(struct super_block *sb, const u8 *uuid, unsigned len) +{ + if (WARN_ON(len > sizeof(sb->s_uuid))) + len = sizeof(sb->s_uuid); + sb->s_uuid_len = len; + memcpy(&sb->s_uuid, uuid, len); +} + +/* set sb sysfs name based on sb->s_bdev */ +static inline void super_set_sysfs_name_bdev(struct super_block *sb) +{ + snprintf(sb->s_sysfs_name, sizeof(sb->s_sysfs_name), "%pg", sb->s_bdev); +} + +/* set sb sysfs name based on sb->s_uuid */ +static inline void super_set_sysfs_name_uuid(struct super_block *sb) +{ + WARN_ON(sb->s_uuid_len != sizeof(sb->s_uuid)); + snprintf(sb->s_sysfs_name, sizeof(sb->s_sysfs_name), "%pU", sb->s_uuid.b); +} + +/* set sb sysfs name based on sb->s_id */ +static inline void super_set_sysfs_name_id(struct super_block *sb) +{ + strscpy(sb->s_sysfs_name, sb->s_id, sizeof(sb->s_sysfs_name)); +} + +/* try to use something standard before you use this */ +__printf(2, 3) +static inline void super_set_sysfs_name_generic(struct super_block *sb, const char *fmt, ...) +{ + va_list args; + + va_start(args, fmt); + vsnprintf(sb->s_sysfs_name, sizeof(sb->s_sysfs_name), fmt, args); + va_end(args); +} + extern int current_umask(void); extern void ihold(struct inode * inode); @@ -2928,6 +2986,17 @@ extern bool path_is_under(const struct path *, const struct path *); extern char *file_path(struct file *, char *, int); +/** + * is_dot_dotdot - returns true only if @name is "." or ".." + * @name: file name to check + * @len: length of file name, in bytes + */ +static inline bool is_dot_dotdot(const char *name, size_t len) +{ + return len && unlikely(name[0] == '.') && + (len == 1 || (len == 2 && name[1] == '.')); +} + #include <linux/err.h> /* needed for stackable file system support */ @@ -3238,7 +3307,7 @@ extern int simple_write_begin(struct file *file, struct address_space *mapping, extern const struct address_space_operations ram_aops; extern int always_delete_dentry(const struct dentry *); extern struct inode *alloc_anon_inode(struct super_block *); -extern int simple_nosetlease(struct file *, int, struct file_lock **, void **); +extern int simple_nosetlease(struct file *, int, struct file_lease **, void **); extern const struct dentry_operations simple_dentry_operations; extern struct dentry *simple_lookup(struct inode *, struct dentry *, unsigned int flags); @@ -3260,13 +3329,14 @@ extern ssize_t simple_write_to_buffer(void *to, size_t available, loff_t *ppos, const void __user *from, size_t count); struct offset_ctx { - struct xarray xa; - u32 next_offset; + struct maple_tree mt; + unsigned long next_offset; }; void simple_offset_init(struct offset_ctx *octx); int simple_offset_add(struct offset_ctx *octx, struct dentry *dentry); void simple_offset_remove(struct offset_ctx *octx, struct dentry *dentry); +int simple_offset_empty(struct dentry *dentry); int simple_offset_rename_exchange(struct inode *old_dir, struct dentry *old_dentry, struct inode *new_dir, @@ -3280,7 +3350,16 @@ extern int generic_file_fsync(struct file *, loff_t, loff_t, int); extern int generic_check_addressable(unsigned, u64); -extern void generic_set_encrypted_ci_d_ops(struct dentry *dentry); +extern void generic_set_sb_d_ops(struct super_block *sb); + +static inline bool sb_has_encoding(const struct super_block *sb) +{ +#if IS_ENABLED(CONFIG_UNICODE) + return !!sb->s_encoding; +#else + return false; +#endif +} int may_setattr(struct mnt_idmap *idmap, struct inode *inode, unsigned int ia_valid); @@ -3335,6 +3414,8 @@ static inline int kiocb_set_rw_flags(struct kiocb *ki, rwf_t flags) return 0; if (unlikely(flags & ~RWF_SUPPORTED)) return -EOPNOTSUPP; + if (unlikely((flags & RWF_APPEND) && (flags & RWF_NOAPPEND))) + return -EINVAL; if (flags & RWF_NOWAIT) { if (!(ki->ki_filp->f_mode & FMODE_NOWAIT)) @@ -3345,6 +3426,12 @@ static inline int kiocb_set_rw_flags(struct kiocb *ki, rwf_t flags) if (flags & RWF_SYNC) kiocb_flags |= IOCB_DSYNC; + if ((flags & RWF_NOAPPEND) && (ki->ki_flags & IOCB_APPEND)) { + if (IS_APPEND(file_inode(ki->ki_filp))) + return -EPERM; + ki->ki_flags &= ~IOCB_APPEND; + } + ki->ki_flags |= kiocb_flags; return 0; } diff --git a/include/linux/fscrypt.h b/include/linux/fscrypt.h index 12f9e455d569..772f822dc6b8 100644 --- a/include/linux/fscrypt.h +++ b/include/linux/fscrypt.h @@ -192,6 +192,8 @@ struct fscrypt_operations { unsigned int *num_devs); }; +int fscrypt_d_revalidate(struct dentry *dentry, unsigned int flags); + static inline struct fscrypt_inode_info * fscrypt_get_inode_info(const struct inode *inode) { @@ -221,15 +223,29 @@ static inline bool fscrypt_needs_contents_encryption(const struct inode *inode) } /* - * When d_splice_alias() moves a directory's no-key alias to its plaintext alias - * as a result of the encryption key being added, DCACHE_NOKEY_NAME must be - * cleared. Note that we don't have to support arbitrary moves of this flag - * because fscrypt doesn't allow no-key names to be the source or target of a - * rename(). + * When d_splice_alias() moves a directory's no-key alias to its + * plaintext alias as a result of the encryption key being added, + * DCACHE_NOKEY_NAME must be cleared and there might be an opportunity + * to disable d_revalidate. Note that we don't have to support the + * inverse operation because fscrypt doesn't allow no-key names to be + * the source or target of a rename(). */ static inline void fscrypt_handle_d_move(struct dentry *dentry) { - dentry->d_flags &= ~DCACHE_NOKEY_NAME; + /* + * VFS calls fscrypt_handle_d_move even for non-fscrypt + * filesystems. + */ + if (dentry->d_flags & DCACHE_NOKEY_NAME) { + dentry->d_flags &= ~DCACHE_NOKEY_NAME; + + /* + * Other filesystem features might be handling dentry + * revalidation, in which case it cannot be disabled. + */ + if (dentry->d_op->d_revalidate == fscrypt_d_revalidate) + dentry->d_flags &= ~DCACHE_OP_REVALIDATE; + } } /** @@ -261,6 +277,35 @@ static inline bool fscrypt_is_nokey_name(const struct dentry *dentry) return dentry->d_flags & DCACHE_NOKEY_NAME; } +static inline void fscrypt_prepare_dentry(struct dentry *dentry, + bool is_nokey_name) +{ + /* + * This code tries to only take ->d_lock when necessary to write + * to ->d_flags. We shouldn't be peeking on d_flags for + * DCACHE_OP_REVALIDATE unlocked, but in the unlikely case + * there is a race, the worst it can happen is that we fail to + * unset DCACHE_OP_REVALIDATE and pay the cost of an extra + * d_revalidate. + */ + if (is_nokey_name) { + spin_lock(&dentry->d_lock); + dentry->d_flags |= DCACHE_NOKEY_NAME; + spin_unlock(&dentry->d_lock); + } else if (dentry->d_flags & DCACHE_OP_REVALIDATE && + dentry->d_op->d_revalidate == fscrypt_d_revalidate) { + /* + * Unencrypted dentries and encrypted dentries where the + * key is available are always valid from fscrypt + * perspective. Avoid the cost of calling + * fscrypt_d_revalidate unnecessarily. + */ + spin_lock(&dentry->d_lock); + dentry->d_flags &= ~DCACHE_OP_REVALIDATE; + spin_unlock(&dentry->d_lock); + } +} + /* crypto.c */ void fscrypt_enqueue_decrypt_work(struct work_struct *); @@ -368,7 +413,6 @@ int fscrypt_fname_disk_to_usr(const struct inode *inode, bool fscrypt_match_name(const struct fscrypt_name *fname, const u8 *de_name, u32 de_name_len); u64 fscrypt_fname_siphash(const struct inode *dir, const struct qstr *name); -int fscrypt_d_revalidate(struct dentry *dentry, unsigned int flags); /* bio.c */ bool fscrypt_decrypt_bio(struct bio *bio); @@ -425,6 +469,11 @@ static inline bool fscrypt_is_nokey_name(const struct dentry *dentry) return false; } +static inline void fscrypt_prepare_dentry(struct dentry *dentry, + bool is_nokey_name) +{ +} + /* crypto.c */ static inline void fscrypt_enqueue_decrypt_work(struct work_struct *work) { @@ -982,6 +1031,9 @@ static inline int fscrypt_prepare_lookup(struct inode *dir, fname->usr_fname = &dentry->d_name; fname->disk_name.name = (unsigned char *)dentry->d_name.name; fname->disk_name.len = dentry->d_name.len; + + fscrypt_prepare_dentry(dentry, false); + return 0; } diff --git a/include/linux/gfp.h b/include/linux/gfp.h index de292a007138..937c2a9b6e54 100644 --- a/include/linux/gfp.h +++ b/include/linux/gfp.h @@ -311,15 +311,23 @@ extern void __free_pages(struct page *page, unsigned int order); extern void free_pages(unsigned long addr, unsigned int order); struct page_frag_cache; +void page_frag_cache_drain(struct page_frag_cache *nc); extern void __page_frag_cache_drain(struct page *page, unsigned int count); -extern void *page_frag_alloc_align(struct page_frag_cache *nc, - unsigned int fragsz, gfp_t gfp_mask, - unsigned int align_mask); +void *__page_frag_alloc_align(struct page_frag_cache *nc, unsigned int fragsz, + gfp_t gfp_mask, unsigned int align_mask); + +static inline void *page_frag_alloc_align(struct page_frag_cache *nc, + unsigned int fragsz, gfp_t gfp_mask, + unsigned int align) +{ + WARN_ON_ONCE(!is_power_of_2(align)); + return __page_frag_alloc_align(nc, fragsz, gfp_mask, -align); +} static inline void *page_frag_alloc(struct page_frag_cache *nc, unsigned int fragsz, gfp_t gfp_mask) { - return page_frag_alloc_align(nc, fragsz, gfp_mask, ~0u); + return __page_frag_alloc_align(nc, fragsz, gfp_mask, ~0u); } extern void page_frag_free(void *addr); @@ -353,6 +361,15 @@ static inline bool gfp_has_io_fs(gfp_t gfp) return (gfp & (__GFP_IO | __GFP_FS)) == (__GFP_IO | __GFP_FS); } +/* + * Check if the gfp flags allow compaction - GFP_NOIO is a really + * tricky context because the migration might require IO. + */ +static inline bool gfp_compaction_allowed(gfp_t gfp_mask) +{ + return IS_ENABLED(CONFIG_COMPACTION) && (gfp_mask & __GFP_IO); +} + extern gfp_t vma_thp_gfp_mask(struct vm_area_struct *vma); #ifdef CONFIG_CONTIG_ALLOC diff --git a/include/linux/gpio/driver.h b/include/linux/gpio/driver.h index 9a5c6c76e653..7f75c9a51874 100644 --- a/include/linux/gpio/driver.h +++ b/include/linux/gpio/driver.h @@ -819,6 +819,24 @@ static inline struct gpio_chip *gpiod_to_chip(const struct gpio_desc *desc) return ERR_PTR(-ENODEV); } +static inline struct gpio_device *gpiod_to_gpio_device(struct gpio_desc *desc) +{ + WARN_ON(1); + return ERR_PTR(-ENODEV); +} + +static inline int gpio_device_get_base(struct gpio_device *gdev) +{ + WARN_ON(1); + return -ENODEV; +} + +static inline const char *gpio_device_get_label(struct gpio_device *gdev) +{ + WARN_ON(1); + return NULL; +} + static inline int gpiochip_lock_as_irq(struct gpio_chip *gc, unsigned int offset) { diff --git a/include/linux/hid_bpf.h b/include/linux/hid_bpf.h index 840cd254172d..7118ac28d468 100644 --- a/include/linux/hid_bpf.h +++ b/include/linux/hid_bpf.h @@ -77,17 +77,6 @@ enum hid_bpf_attach_flags { int hid_bpf_device_event(struct hid_bpf_ctx *ctx); int hid_bpf_rdesc_fixup(struct hid_bpf_ctx *ctx); -/* Following functions are kfunc that we export to BPF programs */ -/* available everywhere in HID-BPF */ -__u8 *hid_bpf_get_data(struct hid_bpf_ctx *ctx, unsigned int offset, const size_t __sz); - -/* only available in syscall */ -int hid_bpf_attach_prog(unsigned int hid_id, int prog_fd, __u32 flags); -int hid_bpf_hw_request(struct hid_bpf_ctx *ctx, __u8 *buf, size_t buf__sz, - enum hid_report_type rtype, enum hid_class_request reqtype); -struct hid_bpf_ctx *hid_bpf_allocate_context(unsigned int hid_id); -void hid_bpf_release_context(struct hid_bpf_ctx *ctx); - /* * Below is HID internal */ diff --git a/include/linux/hrtimer.h b/include/linux/hrtimer.h index 87e3bedf8eb0..aa1e65ccb615 100644 --- a/include/linux/hrtimer.h +++ b/include/linux/hrtimer.h @@ -18,12 +18,8 @@ #include <linux/list.h> #include <linux/percpu-defs.h> #include <linux/rbtree.h> -#include <linux/seqlock.h> #include <linux/timer.h> -struct hrtimer_clock_base; -struct hrtimer_cpu_base; - /* * Mode arguments of xxx_hrtimer functions: * @@ -98,105 +94,6 @@ struct hrtimer_sleeper { struct task_struct *task; }; -#ifdef CONFIG_64BIT -# define __hrtimer_clock_base_align ____cacheline_aligned -#else -# define __hrtimer_clock_base_align -#endif - -/** - * struct hrtimer_clock_base - the timer base for a specific clock - * @cpu_base: per cpu clock base - * @index: clock type index for per_cpu support when moving a - * timer to a base on another cpu. - * @clockid: clock id for per_cpu support - * @seq: seqcount around __run_hrtimer - * @running: pointer to the currently running hrtimer - * @active: red black tree root node for the active timers - * @get_time: function to retrieve the current time of the clock - * @offset: offset of this clock to the monotonic base - */ -struct hrtimer_clock_base { - struct hrtimer_cpu_base *cpu_base; - unsigned int index; - clockid_t clockid; - seqcount_raw_spinlock_t seq; - struct hrtimer *running; - struct timerqueue_head active; - ktime_t (*get_time)(void); - ktime_t offset; -} __hrtimer_clock_base_align; - -enum hrtimer_base_type { - HRTIMER_BASE_MONOTONIC, - HRTIMER_BASE_REALTIME, - HRTIMER_BASE_BOOTTIME, - HRTIMER_BASE_TAI, - HRTIMER_BASE_MONOTONIC_SOFT, - HRTIMER_BASE_REALTIME_SOFT, - HRTIMER_BASE_BOOTTIME_SOFT, - HRTIMER_BASE_TAI_SOFT, - HRTIMER_MAX_CLOCK_BASES, -}; - -/** - * struct hrtimer_cpu_base - the per cpu clock bases - * @lock: lock protecting the base and associated clock bases - * and timers - * @cpu: cpu number - * @active_bases: Bitfield to mark bases with active timers - * @clock_was_set_seq: Sequence counter of clock was set events - * @hres_active: State of high resolution mode - * @in_hrtirq: hrtimer_interrupt() is currently executing - * @hang_detected: The last hrtimer interrupt detected a hang - * @softirq_activated: displays, if the softirq is raised - update of softirq - * related settings is not required then. - * @nr_events: Total number of hrtimer interrupt events - * @nr_retries: Total number of hrtimer interrupt retries - * @nr_hangs: Total number of hrtimer interrupt hangs - * @max_hang_time: Maximum time spent in hrtimer_interrupt - * @softirq_expiry_lock: Lock which is taken while softirq based hrtimer are - * expired - * @timer_waiters: A hrtimer_cancel() invocation waits for the timer - * callback to finish. - * @expires_next: absolute time of the next event, is required for remote - * hrtimer enqueue; it is the total first expiry time (hard - * and soft hrtimer are taken into account) - * @next_timer: Pointer to the first expiring timer - * @softirq_expires_next: Time to check, if soft queues needs also to be expired - * @softirq_next_timer: Pointer to the first expiring softirq based timer - * @clock_base: array of clock bases for this cpu - * - * Note: next_timer is just an optimization for __remove_hrtimer(). - * Do not dereference the pointer because it is not reliable on - * cross cpu removals. - */ -struct hrtimer_cpu_base { - raw_spinlock_t lock; - unsigned int cpu; - unsigned int active_bases; - unsigned int clock_was_set_seq; - unsigned int hres_active : 1, - in_hrtirq : 1, - hang_detected : 1, - softirq_activated : 1; -#ifdef CONFIG_HIGH_RES_TIMERS - unsigned int nr_events; - unsigned short nr_retries; - unsigned short nr_hangs; - unsigned int max_hang_time; -#endif -#ifdef CONFIG_PREEMPT_RT - spinlock_t softirq_expiry_lock; - atomic_t timer_waiters; -#endif - ktime_t expires_next; - struct hrtimer *next_timer; - ktime_t softirq_expires_next; - struct hrtimer *softirq_next_timer; - struct hrtimer_clock_base clock_base[HRTIMER_MAX_CLOCK_BASES]; -} ____cacheline_aligned; - static inline void hrtimer_set_expires(struct hrtimer *timer, ktime_t time) { timer->node.expires = time; @@ -445,20 +342,12 @@ extern u64 hrtimer_forward(struct hrtimer *timer, ktime_t now, ktime_t interval); /** - * hrtimer_forward_now - forward the timer expiry so it expires after now + * hrtimer_forward_now() - forward the timer expiry so it expires after now * @timer: hrtimer to forward * @interval: the interval to forward * - * Forward the timer expiry so it will expire after the current time - * of the hrtimer clock base. Returns the number of overruns. - * - * Can be safely called from the callback function of @timer. If - * called from other contexts @timer must neither be enqueued nor - * running the callback and the caller needs to take care of - * serialization. - * - * Note: This only updates the timer expiry value and does not requeue - * the timer. + * It is a variant of hrtimer_forward(). The timer will expire after the current + * time of the hrtimer clock base. See hrtimer_forward() for details. */ static inline u64 hrtimer_forward_now(struct hrtimer *timer, ktime_t interval) diff --git a/include/linux/hrtimer_defs.h b/include/linux/hrtimer_defs.h index 2d3e3c5fb946..c3b4b7ed7c16 100644 --- a/include/linux/hrtimer_defs.h +++ b/include/linux/hrtimer_defs.h @@ -3,6 +3,8 @@ #define _LINUX_HRTIMER_DEFS_H #include <linux/ktime.h> +#include <linux/timerqueue.h> +#include <linux/seqlock.h> #ifdef CONFIG_HIGH_RES_TIMERS @@ -24,4 +26,106 @@ #endif +#ifdef CONFIG_64BIT +# define __hrtimer_clock_base_align ____cacheline_aligned +#else +# define __hrtimer_clock_base_align +#endif + +/** + * struct hrtimer_clock_base - the timer base for a specific clock + * @cpu_base: per cpu clock base + * @index: clock type index for per_cpu support when moving a + * timer to a base on another cpu. + * @clockid: clock id for per_cpu support + * @seq: seqcount around __run_hrtimer + * @running: pointer to the currently running hrtimer + * @active: red black tree root node for the active timers + * @get_time: function to retrieve the current time of the clock + * @offset: offset of this clock to the monotonic base + */ +struct hrtimer_clock_base { + struct hrtimer_cpu_base *cpu_base; + unsigned int index; + clockid_t clockid; + seqcount_raw_spinlock_t seq; + struct hrtimer *running; + struct timerqueue_head active; + ktime_t (*get_time)(void); + ktime_t offset; +} __hrtimer_clock_base_align; + +enum hrtimer_base_type { + HRTIMER_BASE_MONOTONIC, + HRTIMER_BASE_REALTIME, + HRTIMER_BASE_BOOTTIME, + HRTIMER_BASE_TAI, + HRTIMER_BASE_MONOTONIC_SOFT, + HRTIMER_BASE_REALTIME_SOFT, + HRTIMER_BASE_BOOTTIME_SOFT, + HRTIMER_BASE_TAI_SOFT, + HRTIMER_MAX_CLOCK_BASES, +}; + +/** + * struct hrtimer_cpu_base - the per cpu clock bases + * @lock: lock protecting the base and associated clock bases + * and timers + * @cpu: cpu number + * @active_bases: Bitfield to mark bases with active timers + * @clock_was_set_seq: Sequence counter of clock was set events + * @hres_active: State of high resolution mode + * @in_hrtirq: hrtimer_interrupt() is currently executing + * @hang_detected: The last hrtimer interrupt detected a hang + * @softirq_activated: displays, if the softirq is raised - update of softirq + * related settings is not required then. + * @nr_events: Total number of hrtimer interrupt events + * @nr_retries: Total number of hrtimer interrupt retries + * @nr_hangs: Total number of hrtimer interrupt hangs + * @max_hang_time: Maximum time spent in hrtimer_interrupt + * @softirq_expiry_lock: Lock which is taken while softirq based hrtimer are + * expired + * @online: CPU is online from an hrtimers point of view + * @timer_waiters: A hrtimer_cancel() invocation waits for the timer + * callback to finish. + * @expires_next: absolute time of the next event, is required for remote + * hrtimer enqueue; it is the total first expiry time (hard + * and soft hrtimer are taken into account) + * @next_timer: Pointer to the first expiring timer + * @softirq_expires_next: Time to check, if soft queues needs also to be expired + * @softirq_next_timer: Pointer to the first expiring softirq based timer + * @clock_base: array of clock bases for this cpu + * + * Note: next_timer is just an optimization for __remove_hrtimer(). + * Do not dereference the pointer because it is not reliable on + * cross cpu removals. + */ +struct hrtimer_cpu_base { + raw_spinlock_t lock; + unsigned int cpu; + unsigned int active_bases; + unsigned int clock_was_set_seq; + unsigned int hres_active : 1, + in_hrtirq : 1, + hang_detected : 1, + softirq_activated : 1, + online : 1; +#ifdef CONFIG_HIGH_RES_TIMERS + unsigned int nr_events; + unsigned short nr_retries; + unsigned short nr_hangs; + unsigned int max_hang_time; +#endif +#ifdef CONFIG_PREEMPT_RT + spinlock_t softirq_expiry_lock; + atomic_t timer_waiters; +#endif + ktime_t expires_next; + struct hrtimer *next_timer; + ktime_t softirq_expires_next; + struct hrtimer *softirq_next_timer; + struct hrtimer_clock_base clock_base[HRTIMER_MAX_CLOCK_BASES]; +} ____cacheline_aligned; + + #endif diff --git a/include/linux/hyperv.h b/include/linux/hyperv.h index 2b00faf98017..6ef0557b4bff 100644 --- a/include/linux/hyperv.h +++ b/include/linux/hyperv.h @@ -164,8 +164,28 @@ struct hv_ring_buffer { u8 buffer[]; } __packed; + +/* + * If the requested ring buffer size is at least 8 times the size of the + * header, steal space from the ring buffer for the header. Otherwise, add + * space for the header so that is doesn't take too much of the ring buffer + * space. + * + * The factor of 8 is somewhat arbitrary. The goal is to prevent adding a + * relatively small header (4 Kbytes on x86) to a large-ish power-of-2 ring + * buffer size (such as 128 Kbytes) and so end up making a nearly twice as + * large allocation that will be almost half wasted. As a contrasting example, + * on ARM64 with 64 Kbyte page size, we don't want to take 64 Kbytes for the + * header from a 128 Kbyte allocation, leaving only 64 Kbytes for the ring. + * In this latter case, we must add 64 Kbytes for the header and not worry + * about what's wasted. + */ +#define VMBUS_HEADER_ADJ(payload_sz) \ + ((payload_sz) >= 8 * sizeof(struct hv_ring_buffer) ? \ + 0 : sizeof(struct hv_ring_buffer)) + /* Calculate the proper size of a ringbuffer, it must be page-aligned */ -#define VMBUS_RING_SIZE(payload_sz) PAGE_ALIGN(sizeof(struct hv_ring_buffer) + \ +#define VMBUS_RING_SIZE(payload_sz) PAGE_ALIGN(VMBUS_HEADER_ADJ(payload_sz) + \ (payload_sz)) struct hv_ring_buffer_info { diff --git a/include/linux/ieee80211.h b/include/linux/ieee80211.h index 83c4d060a559..3385a2cc5b09 100644 --- a/include/linux/ieee80211.h +++ b/include/linux/ieee80211.h @@ -9,7 +9,7 @@ * Copyright (c) 2006, Michael Wu <flamingice@sourmilk.net> * Copyright (c) 2013 - 2014 Intel Mobile Communications GmbH * Copyright (c) 2016 - 2017 Intel Deutschland GmbH - * Copyright (c) 2018 - 2023 Intel Corporation + * Copyright (c) 2018 - 2024 Intel Corporation */ #ifndef LINUX_IEEE80211_H @@ -191,6 +191,11 @@ static inline bool ieee80211_sn_less(u16 sn1, u16 sn2) return ((sn1 - sn2) & IEEE80211_SN_MASK) > (IEEE80211_SN_MODULO >> 1); } +static inline bool ieee80211_sn_less_eq(u16 sn1, u16 sn2) +{ + return ((sn2 - sn1) & IEEE80211_SN_MASK) <= (IEEE80211_SN_MODULO >> 1); +} + static inline u16 ieee80211_sn_add(u16 sn1, u16 sn2) { return (sn1 + sn2) & IEEE80211_SN_MASK; @@ -808,6 +813,11 @@ static inline bool ieee80211_is_frag(struct ieee80211_hdr *hdr) hdr->seq_ctrl & cpu_to_le16(IEEE80211_SCTL_FRAG); } +static inline u16 ieee80211_get_sn(struct ieee80211_hdr *hdr) +{ + return le16_get_bits(hdr->seq_ctrl, IEEE80211_SCTL_SEQ); +} + struct ieee80211s_hdr { u8 flags; u8 ttl; @@ -1454,6 +1464,20 @@ struct ieee80211_mgmt { u8 max_tod_error; u8 max_toa_error; } __packed wnm_timing_msr; + struct { + u8 action_code; + u8 dialog_token; + u8 variable[]; + } __packed ttlm_req; + struct { + u8 action_code; + u8 dialog_token; + u8 status_code; + u8 variable[]; + } __packed ttlm_res; + struct { + u8 action_code; + } __packed ttlm_tear_down; } u; } __packed action; DECLARE_FLEX_ARRAY(u8, body); /* Generic frame body */ @@ -3036,6 +3060,9 @@ ieee80211_he_spr_size(const u8 *he_spr_ie) #define IEEE80211_EHT_PHY_CAP5_SUPP_EXTRA_EHT_LTF 0x40 #define IEEE80211_EHT_PHY_CAP6_MAX_NUM_SUPP_EHT_LTF_MASK 0x07 +#define IEEE80211_EHT_PHY_CAP6_MCS15_SUPP_80MHZ 0x08 +#define IEEE80211_EHT_PHY_CAP6_MCS15_SUPP_160MHZ 0x30 +#define IEEE80211_EHT_PHY_CAP6_MCS15_SUPP_320MHZ 0x40 #define IEEE80211_EHT_PHY_CAP6_MCS15_SUPP_MASK 0x78 #define IEEE80211_EHT_PHY_CAP6_EHT_DUP_6GHZ_SUPP 0x80 @@ -3175,6 +3202,22 @@ ieee80211_eht_oper_size_ok(const u8 *data, u8 len) return len >= needed; } +/* must validate ieee80211_eht_oper_size_ok() first */ +static inline u16 +ieee80211_eht_oper_dis_subchan_bitmap(const struct ieee80211_eht_operation *eht_oper) +{ + const struct ieee80211_eht_operation_info *info = + (const void *)eht_oper->optional; + + if (!(eht_oper->params & IEEE80211_EHT_OPER_INFO_PRESENT)) + return 0; + + if (!(eht_oper->params & IEEE80211_EHT_OPER_DISABLED_SUBCHANNEL_BITMAP_PRESENT)) + return 0; + + return get_unaligned_le16(info->optional); +} + #define IEEE80211_BW_IND_DIS_SUBCH_PRESENT BIT(1) struct ieee80211_bandwidth_indication { @@ -3357,6 +3400,8 @@ enum ieee80211_statuscode { WLAN_STATUS_UNKNOWN_AUTHENTICATION_SERVER = 109, WLAN_STATUS_SAE_HASH_TO_ELEMENT = 126, WLAN_STATUS_SAE_PK = 127, + WLAN_STATUS_DENIED_TID_TO_LINK_MAPPING = 133, + WLAN_STATUS_PREF_TID_TO_LINK_MAPPING_SUGGESTED = 134, }; @@ -3682,6 +3727,7 @@ enum ieee80211_category { WLAN_CATEGORY_UNPROT_DMG = 20, WLAN_CATEGORY_VHT = 21, WLAN_CATEGORY_S1G = 22, + WLAN_CATEGORY_PROTECTED_EHT = 37, WLAN_CATEGORY_VENDOR_SPECIFIC_PROTECTED = 126, WLAN_CATEGORY_VENDOR_SPECIFIC = 127, }; @@ -3745,6 +3791,13 @@ enum ieee80211_unprotected_wnm_actioncode { WLAN_UNPROTECTED_WNM_ACTION_TIMING_MEASUREMENT_RESPONSE = 1, }; +/* Protected EHT action codes */ +enum ieee80211_protected_eht_actioncode { + WLAN_PROTECTED_EHT_ACTION_TTLM_REQ = 0, + WLAN_PROTECTED_EHT_ACTION_TTLM_RES = 1, + WLAN_PROTECTED_EHT_ACTION_TTLM_TEARDOWN = 2, +}; + /* Security key length */ enum ieee80211_key_len { WLAN_KEY_LEN_WEP40 = 5, @@ -4845,6 +4898,10 @@ struct ieee80211_multi_link_elem { #define IEEE80211_MLD_CAP_OP_MAX_SIMUL_LINKS 0x000f #define IEEE80211_MLD_CAP_OP_SRS_SUPPORT 0x0010 #define IEEE80211_MLD_CAP_OP_TID_TO_LINK_MAP_NEG_SUPP 0x0060 +#define IEEE80211_MLD_CAP_OP_TID_TO_LINK_MAP_NEG_NO_SUPP 0 +#define IEEE80211_MLD_CAP_OP_TID_TO_LINK_MAP_NEG_SUPP_SAME 1 +#define IEEE80211_MLD_CAP_OP_TID_TO_LINK_MAP_NEG_RESERVED 2 +#define IEEE80211_MLD_CAP_OP_TID_TO_LINK_MAP_NEG_SUPP_DIFF 3 #define IEEE80211_MLD_CAP_OP_FREQ_SEP_TYPE_IND 0x0f80 #define IEEE80211_MLD_CAP_OP_AAR_SUPPORT 0x1000 @@ -4908,18 +4965,43 @@ static inline u8 ieee80211_mle_common_size(const u8 *data) } /** + * ieee80211_mle_get_link_id - returns the link ID + * @data: the basic multi link element + * + * The element is assumed to be of the correct type (BASIC) and big enough, + * this must be checked using ieee80211_mle_type_ok(). + * + * If the BSS link ID can't be found, -1 will be returned + */ +static inline int ieee80211_mle_get_link_id(const u8 *data) +{ + const struct ieee80211_multi_link_elem *mle = (const void *)data; + u16 control = le16_to_cpu(mle->control); + const u8 *common = mle->variable; + + /* common points now at the beginning of ieee80211_mle_basic_common_info */ + common += sizeof(struct ieee80211_mle_basic_common_info); + + if (!(control & IEEE80211_MLC_BASIC_PRES_LINK_ID)) + return -1; + + return *common; +} + +/** * ieee80211_mle_get_bss_param_ch_cnt - returns the BSS parameter change count - * @mle: the basic multi link element + * @data: pointer to the basic multi link element * * The element is assumed to be of the correct type (BASIC) and big enough, * this must be checked using ieee80211_mle_type_ok(). * * If the BSS parameter change count value can't be found (the presence bit - * for it is clear), 0 will be returned. + * for it is clear), -1 will be returned. */ -static inline u8 -ieee80211_mle_get_bss_param_ch_cnt(const struct ieee80211_multi_link_elem *mle) +static inline int +ieee80211_mle_get_bss_param_ch_cnt(const u8 *data) { + const struct ieee80211_multi_link_elem *mle = (const void *)data; u16 control = le16_to_cpu(mle->control); const u8 *common = mle->variable; @@ -4927,7 +5009,7 @@ ieee80211_mle_get_bss_param_ch_cnt(const struct ieee80211_multi_link_elem *mle) common += sizeof(struct ieee80211_mle_basic_common_info); if (!(control & IEEE80211_MLC_BASIC_PRES_BSS_PARAM_CH_CNT)) - return 0; + return -1; if (control & IEEE80211_MLC_BASIC_PRES_LINK_ID) common += 1; @@ -4997,6 +5079,81 @@ static inline u16 ieee80211_mle_get_eml_cap(const u8 *data) } /** + * ieee80211_mle_get_mld_capa_op - returns the MLD capabilities and operations. + * @data: pointer to the multi link EHT IE + * + * The element is assumed to be of the correct type (BASIC) and big enough, + * this must be checked using ieee80211_mle_type_ok(). + * + * If the MLD capabilities and operations field is not present, 0 will be + * returned. + */ +static inline u16 ieee80211_mle_get_mld_capa_op(const u8 *data) +{ + const struct ieee80211_multi_link_elem *mle = (const void *)data; + u16 control = le16_to_cpu(mle->control); + const u8 *common = mle->variable; + + /* + * common points now at the beginning of + * ieee80211_mle_basic_common_info + */ + common += sizeof(struct ieee80211_mle_basic_common_info); + + if (!(control & IEEE80211_MLC_BASIC_PRES_MLD_CAPA_OP)) + return 0; + + if (control & IEEE80211_MLC_BASIC_PRES_LINK_ID) + common += 1; + if (control & IEEE80211_MLC_BASIC_PRES_BSS_PARAM_CH_CNT) + common += 1; + if (control & IEEE80211_MLC_BASIC_PRES_MED_SYNC_DELAY) + common += 2; + if (control & IEEE80211_MLC_BASIC_PRES_EML_CAPA) + common += 2; + + return get_unaligned_le16(common); +} + +/** + * ieee80211_mle_get_mld_id - returns the MLD ID + * @data: pointer to the multi link element + * + * The element is assumed to be of the correct type (BASIC) and big enough, + * this must be checked using ieee80211_mle_type_ok(). + * + * If the MLD ID is not present, 0 will be returned. + */ +static inline u8 ieee80211_mle_get_mld_id(const u8 *data) +{ + const struct ieee80211_multi_link_elem *mle = (const void *)data; + u16 control = le16_to_cpu(mle->control); + const u8 *common = mle->variable; + + /* + * common points now at the beginning of + * ieee80211_mle_basic_common_info + */ + common += sizeof(struct ieee80211_mle_basic_common_info); + + if (!(control & IEEE80211_MLC_BASIC_PRES_MLD_ID)) + return 0; + + if (control & IEEE80211_MLC_BASIC_PRES_LINK_ID) + common += 1; + if (control & IEEE80211_MLC_BASIC_PRES_BSS_PARAM_CH_CNT) + common += 1; + if (control & IEEE80211_MLC_BASIC_PRES_MED_SYNC_DELAY) + common += 2; + if (control & IEEE80211_MLC_BASIC_PRES_EML_CAPA) + common += 2; + if (control & IEEE80211_MLC_BASIC_PRES_MLD_CAPA_OP) + common += 2; + + return *common; +} + +/** * ieee80211_mle_size_ok - validate multi-link element size * @data: pointer to the element data * @len: length of the containing element diff --git a/include/linux/if_tun.h b/include/linux/if_tun.h index 2a7660843444..043d442994b0 100644 --- a/include/linux/if_tun.h +++ b/include/linux/if_tun.h @@ -27,44 +27,54 @@ struct tun_xdp_hdr { #if defined(CONFIG_TUN) || defined(CONFIG_TUN_MODULE) struct socket *tun_get_socket(struct file *); struct ptr_ring *tun_get_tx_ring(struct file *file); + static inline bool tun_is_xdp_frame(void *ptr) { - return (unsigned long)ptr & TUN_XDP_FLAG; + return (unsigned long)ptr & TUN_XDP_FLAG; } + static inline void *tun_xdp_to_ptr(struct xdp_frame *xdp) { - return (void *)((unsigned long)xdp | TUN_XDP_FLAG); + return (void *)((unsigned long)xdp | TUN_XDP_FLAG); } + static inline struct xdp_frame *tun_ptr_to_xdp(void *ptr) { - return (void *)((unsigned long)ptr & ~TUN_XDP_FLAG); + return (void *)((unsigned long)ptr & ~TUN_XDP_FLAG); } + void tun_ptr_free(void *ptr); #else #include <linux/err.h> #include <linux/errno.h> struct file; struct socket; + static inline struct socket *tun_get_socket(struct file *f) { return ERR_PTR(-EINVAL); } + static inline struct ptr_ring *tun_get_tx_ring(struct file *f) { return ERR_PTR(-EINVAL); } + static inline bool tun_is_xdp_frame(void *ptr) { return false; } + static inline void *tun_xdp_to_ptr(struct xdp_frame *xdp) { return NULL; } + static inline struct xdp_frame *tun_ptr_to_xdp(void *ptr) { return NULL; } + static inline void tun_ptr_free(void *ptr) { } diff --git a/include/linux/iio/adc/ad_sigma_delta.h b/include/linux/iio/adc/ad_sigma_delta.h index 7852f6c9a714..719cf9cc6e1a 100644 --- a/include/linux/iio/adc/ad_sigma_delta.h +++ b/include/linux/iio/adc/ad_sigma_delta.h @@ -8,6 +8,8 @@ #ifndef __AD_SIGMA_DELTA_H__ #define __AD_SIGMA_DELTA_H__ +#include <linux/iio/iio.h> + enum ad_sigma_delta_mode { AD_SD_MODE_CONTINUOUS = 0, AD_SD_MODE_SINGLE = 1, @@ -99,7 +101,7 @@ struct ad_sigma_delta { * 'rx_buf' is up to 32 bits per sample + 64 bit timestamp, * rounded to 16 bytes to take into account padding. */ - uint8_t tx_buf[4] ____cacheline_aligned; + uint8_t tx_buf[4] __aligned(IIO_DMA_MINALIGN); uint8_t rx_buf[16] __aligned(8); }; diff --git a/include/linux/iio/common/st_sensors.h b/include/linux/iio/common/st_sensors.h index 607c3a89a647..f9ae5cdd884f 100644 --- a/include/linux/iio/common/st_sensors.h +++ b/include/linux/iio/common/st_sensors.h @@ -258,9 +258,9 @@ struct st_sensor_data { bool hw_irq_trigger; s64 hw_timestamp; - char buffer_data[ST_SENSORS_MAX_BUFFER_SIZE] ____cacheline_aligned; - struct mutex odr_lock; + + char buffer_data[ST_SENSORS_MAX_BUFFER_SIZE] __aligned(IIO_DMA_MINALIGN); }; #ifdef CONFIG_IIO_BUFFER diff --git a/include/linux/iio/imu/adis.h b/include/linux/iio/imu/adis.h index dc9ea299e088..8898966bc0f0 100644 --- a/include/linux/iio/imu/adis.h +++ b/include/linux/iio/imu/adis.h @@ -11,6 +11,7 @@ #include <linux/spi/spi.h> #include <linux/interrupt.h> +#include <linux/iio/iio.h> #include <linux/iio/types.h> #define ADIS_WRITE_REG(reg) ((0x80 | (reg))) @@ -131,7 +132,7 @@ struct adis { unsigned long irq_flag; void *buffer; - u8 tx[10] ____cacheline_aligned; + u8 tx[10] __aligned(IIO_DMA_MINALIGN); u8 rx[4]; }; diff --git a/include/linux/ima.h b/include/linux/ima.h index 86b57757c7b1..0bae61a15b60 100644 --- a/include/linux/ima.h +++ b/include/linux/ima.h @@ -16,23 +16,6 @@ struct linux_binprm; #ifdef CONFIG_IMA extern enum hash_algo ima_get_current_hash_algo(void); -extern int ima_bprm_check(struct linux_binprm *bprm); -extern int ima_file_check(struct file *file, int mask); -extern void ima_post_create_tmpfile(struct mnt_idmap *idmap, - struct inode *inode); -extern void ima_file_free(struct file *file); -extern int ima_file_mmap(struct file *file, unsigned long reqprot, - unsigned long prot, unsigned long flags); -extern int ima_file_mprotect(struct vm_area_struct *vma, unsigned long prot); -extern int ima_load_data(enum kernel_load_data_id id, bool contents); -extern int ima_post_load_data(char *buf, loff_t size, - enum kernel_load_data_id id, char *description); -extern int ima_read_file(struct file *file, enum kernel_read_file_id id, - bool contents); -extern int ima_post_read_file(struct file *file, void *buf, loff_t size, - enum kernel_read_file_id id); -extern void ima_post_path_mknod(struct mnt_idmap *idmap, - struct dentry *dentry); extern int ima_file_hash(struct file *file, char *buf, size_t buf_size); extern int ima_inode_hash(struct inode *inode, char *buf, size_t buf_size); extern void ima_kexec_cmdline(int kernel_fd, const void *buf, int size); @@ -57,68 +40,6 @@ static inline enum hash_algo ima_get_current_hash_algo(void) return HASH_ALGO__LAST; } -static inline int ima_bprm_check(struct linux_binprm *bprm) -{ - return 0; -} - -static inline int ima_file_check(struct file *file, int mask) -{ - return 0; -} - -static inline void ima_post_create_tmpfile(struct mnt_idmap *idmap, - struct inode *inode) -{ -} - -static inline void ima_file_free(struct file *file) -{ - return; -} - -static inline int ima_file_mmap(struct file *file, unsigned long reqprot, - unsigned long prot, unsigned long flags) -{ - return 0; -} - -static inline int ima_file_mprotect(struct vm_area_struct *vma, - unsigned long prot) -{ - return 0; -} - -static inline int ima_load_data(enum kernel_load_data_id id, bool contents) -{ - return 0; -} - -static inline int ima_post_load_data(char *buf, loff_t size, - enum kernel_load_data_id id, - char *description) -{ - return 0; -} - -static inline int ima_read_file(struct file *file, enum kernel_read_file_id id, - bool contents) -{ - return 0; -} - -static inline int ima_post_read_file(struct file *file, void *buf, loff_t size, - enum kernel_read_file_id id) -{ - return 0; -} - -static inline void ima_post_path_mknod(struct mnt_idmap *idmap, - struct dentry *dentry) -{ - return; -} - static inline int ima_file_hash(struct file *file, char *buf, size_t buf_size) { return -EOPNOTSUPP; @@ -169,76 +90,13 @@ static inline void ima_add_kexec_buffer(struct kimage *image) {} #endif -#ifdef CONFIG_IMA_MEASURE_ASYMMETRIC_KEYS -extern void ima_post_key_create_or_update(struct key *keyring, - struct key *key, - const void *payload, size_t plen, - unsigned long flags, bool create); -#else -static inline void ima_post_key_create_or_update(struct key *keyring, - struct key *key, - const void *payload, - size_t plen, - unsigned long flags, - bool create) {} -#endif /* CONFIG_IMA_MEASURE_ASYMMETRIC_KEYS */ - #ifdef CONFIG_IMA_APPRAISE extern bool is_ima_appraise_enabled(void); -extern void ima_inode_post_setattr(struct mnt_idmap *idmap, - struct dentry *dentry); -extern int ima_inode_setxattr(struct dentry *dentry, const char *xattr_name, - const void *xattr_value, size_t xattr_value_len); -extern int ima_inode_set_acl(struct mnt_idmap *idmap, - struct dentry *dentry, const char *acl_name, - struct posix_acl *kacl); -static inline int ima_inode_remove_acl(struct mnt_idmap *idmap, - struct dentry *dentry, - const char *acl_name) -{ - return ima_inode_set_acl(idmap, dentry, acl_name, NULL); -} -extern int ima_inode_removexattr(struct dentry *dentry, const char *xattr_name); #else static inline bool is_ima_appraise_enabled(void) { return 0; } - -static inline void ima_inode_post_setattr(struct mnt_idmap *idmap, - struct dentry *dentry) -{ - return; -} - -static inline int ima_inode_setxattr(struct dentry *dentry, - const char *xattr_name, - const void *xattr_value, - size_t xattr_value_len) -{ - return 0; -} - -static inline int ima_inode_set_acl(struct mnt_idmap *idmap, - struct dentry *dentry, const char *acl_name, - struct posix_acl *kacl) -{ - - return 0; -} - -static inline int ima_inode_removexattr(struct dentry *dentry, - const char *xattr_name) -{ - return 0; -} - -static inline int ima_inode_remove_acl(struct mnt_idmap *idmap, - struct dentry *dentry, - const char *acl_name) -{ - return 0; -} #endif /* CONFIG_IMA_APPRAISE */ #if defined(CONFIG_IMA_APPRAISE) && defined(CONFIG_INTEGRITY_TRUSTED_KEYRING) diff --git a/include/linux/indirect_call_wrapper.h b/include/linux/indirect_call_wrapper.h index adb83a42a6b9..35227d47cfc9 100644 --- a/include/linux/indirect_call_wrapper.h +++ b/include/linux/indirect_call_wrapper.h @@ -2,7 +2,7 @@ #ifndef _LINUX_INDIRECT_CALL_WRAPPER_H #define _LINUX_INDIRECT_CALL_WRAPPER_H -#ifdef CONFIG_RETPOLINE +#ifdef CONFIG_MITIGATION_RETPOLINE /* * INDIRECT_CALL_$NR - wrapper for indirect calls with $NR known builtin diff --git a/include/linux/inet_diag.h b/include/linux/inet_diag.h index 84abb30a3fbb..a9033696b0aa 100644 --- a/include/linux/inet_diag.h +++ b/include/linux/inet_diag.h @@ -8,6 +8,7 @@ struct inet_hashinfo; struct inet_diag_handler { + struct module *owner; void (*dump)(struct sk_buff *skb, struct netlink_callback *cb, const struct inet_diag_req_v2 *r); diff --git a/include/linux/inetdevice.h b/include/linux/inetdevice.h index ddb27fc0ee8c..cb5280e6cc21 100644 --- a/include/linux/inetdevice.h +++ b/include/linux/inetdevice.h @@ -53,13 +53,15 @@ struct in_device { }; #define IPV4_DEVCONF(cnf, attr) ((cnf).data[IPV4_DEVCONF_ ## attr - 1]) +#define IPV4_DEVCONF_RO(cnf, attr) READ_ONCE(IPV4_DEVCONF(cnf, attr)) #define IPV4_DEVCONF_ALL(net, attr) \ IPV4_DEVCONF((*(net)->ipv4.devconf_all), attr) +#define IPV4_DEVCONF_ALL_RO(net, attr) READ_ONCE(IPV4_DEVCONF_ALL(net, attr)) -static inline int ipv4_devconf_get(struct in_device *in_dev, int index) +static inline int ipv4_devconf_get(const struct in_device *in_dev, int index) { index--; - return in_dev->cnf.data[index]; + return READ_ONCE(in_dev->cnf.data[index]); } static inline void ipv4_devconf_set(struct in_device *in_dev, int index, @@ -67,7 +69,7 @@ static inline void ipv4_devconf_set(struct in_device *in_dev, int index, { index--; set_bit(index, in_dev->cnf.state); - in_dev->cnf.data[index] = val; + WRITE_ONCE(in_dev->cnf.data[index], val); } static inline void ipv4_devconf_setall(struct in_device *in_dev) @@ -81,18 +83,18 @@ static inline void ipv4_devconf_setall(struct in_device *in_dev) ipv4_devconf_set((in_dev), IPV4_DEVCONF_ ## attr, (val)) #define IN_DEV_ANDCONF(in_dev, attr) \ - (IPV4_DEVCONF_ALL(dev_net(in_dev->dev), attr) && \ + (IPV4_DEVCONF_ALL_RO(dev_net(in_dev->dev), attr) && \ IN_DEV_CONF_GET((in_dev), attr)) #define IN_DEV_NET_ORCONF(in_dev, net, attr) \ - (IPV4_DEVCONF_ALL(net, attr) || \ + (IPV4_DEVCONF_ALL_RO(net, attr) || \ IN_DEV_CONF_GET((in_dev), attr)) #define IN_DEV_ORCONF(in_dev, attr) \ IN_DEV_NET_ORCONF(in_dev, dev_net(in_dev->dev), attr) #define IN_DEV_MAXCONF(in_dev, attr) \ - (max(IPV4_DEVCONF_ALL(dev_net(in_dev->dev), attr), \ + (max(IPV4_DEVCONF_ALL_RO(dev_net(in_dev->dev), attr), \ IN_DEV_CONF_GET((in_dev), attr))) #define IN_DEV_FORWARD(in_dev) IN_DEV_CONF_GET((in_dev), FORWARDING) diff --git a/include/linux/integrity.h b/include/linux/integrity.h index 2ea0f2f65ab6..459b79683783 100644 --- a/include/linux/integrity.h +++ b/include/linux/integrity.h @@ -19,40 +19,13 @@ enum integrity_status { INTEGRITY_UNKNOWN, }; -/* List of EVM protected security xattrs */ #ifdef CONFIG_INTEGRITY -extern struct integrity_iint_cache *integrity_inode_get(struct inode *inode); -extern void integrity_inode_free(struct inode *inode); extern void __init integrity_load_keys(void); #else -static inline struct integrity_iint_cache * - integrity_inode_get(struct inode *inode) -{ - return NULL; -} - -static inline void integrity_inode_free(struct inode *inode) -{ - return; -} - static inline void integrity_load_keys(void) { } #endif /* CONFIG_INTEGRITY */ -#ifdef CONFIG_INTEGRITY_ASYMMETRIC_KEYS - -extern int integrity_kernel_module_request(char *kmod_name); - -#else - -static inline int integrity_kernel_module_request(char *kmod_name) -{ - return 0; -} - -#endif /* CONFIG_INTEGRITY_ASYMMETRIC_KEYS */ - #endif /* _LINUX_INTEGRITY_H */ diff --git a/include/linux/io.h b/include/linux/io.h index 7304f2a69960..235ba7d80a8f 100644 --- a/include/linux/io.h +++ b/include/linux/io.h @@ -23,12 +23,19 @@ void __iowrite64_copy(void __iomem *to, const void *from, size_t count); #ifdef CONFIG_MMU int ioremap_page_range(unsigned long addr, unsigned long end, phys_addr_t phys_addr, pgprot_t prot); +int vmap_page_range(unsigned long addr, unsigned long end, + phys_addr_t phys_addr, pgprot_t prot); #else static inline int ioremap_page_range(unsigned long addr, unsigned long end, phys_addr_t phys_addr, pgprot_t prot) { return 0; } +static inline int vmap_page_range(unsigned long addr, unsigned long end, + phys_addr_t phys_addr, pgprot_t prot) +{ + return 0; +} #endif /* diff --git a/include/linux/io_uring_types.h b/include/linux/io_uring_types.h index 854ad67a5f70..e24893625085 100644 --- a/include/linux/io_uring_types.h +++ b/include/linux/io_uring_types.h @@ -2,6 +2,7 @@ #define IO_URING_TYPES_H #include <linux/blkdev.h> +#include <linux/hashtable.h> #include <linux/task_work.h> #include <linux/bitmap.h> #include <linux/llist.h> @@ -240,12 +241,14 @@ struct io_ring_ctx { unsigned int poll_activated: 1; unsigned int drain_disabled: 1; unsigned int compat: 1; + unsigned int iowq_limits_set : 1; struct task_struct *submitter_task; struct io_rings *rings; struct percpu_ref refs; enum task_work_notify_mode notify_method; + unsigned sq_thread_idle; } ____cacheline_aligned_in_smp; /* submission data */ @@ -274,10 +277,20 @@ struct io_ring_ctx { */ struct io_rsrc_node *rsrc_node; atomic_t cancel_seq; + + /* + * ->iopoll_list is protected by the ctx->uring_lock for + * io_uring instances that don't use IORING_SETUP_SQPOLL. + * For SQPOLL, only the single threaded io_sq_thread() will + * manipulate the list, hence no extra locking is needed there. + */ + bool poll_multi_queue; + struct io_wq_work_list iopoll_list; + struct io_file_table file_table; + struct io_mapped_ubuf **user_bufs; unsigned nr_user_files; unsigned nr_user_bufs; - struct io_mapped_ubuf **user_bufs; struct io_submit_state submit_state; @@ -289,15 +302,6 @@ struct io_ring_ctx { struct io_alloc_cache netmsg_cache; /* - * ->iopoll_list is protected by the ctx->uring_lock for - * io_uring instances that don't use IORING_SETUP_SQPOLL. - * For SQPOLL, only the single threaded io_sq_thread() will - * manipulate the list, hence no extra locking is needed there. - */ - struct io_wq_work_list iopoll_list; - bool poll_multi_queue; - - /* * Any cancelable uring_cmd is added to this list in * ->uring_cmd() by io_uring_cmd_insert_cancelable() */ @@ -343,8 +347,8 @@ struct io_ring_ctx { spinlock_t completion_lock; /* IRQ completion list, under ->completion_lock */ - struct io_wq_work_list locked_free_list; unsigned int locked_free_nr; + struct io_wq_work_list locked_free_list; struct list_head io_buffers_comp; struct list_head cq_overflow_list; @@ -366,9 +370,6 @@ struct io_ring_ctx { unsigned int file_alloc_start; unsigned int file_alloc_end; - struct xarray personalities; - u32 pers_next; - struct list_head io_buffers_cache; /* deferred free list, protected by ->uring_lock */ @@ -389,6 +390,9 @@ struct io_ring_ctx { struct wait_queue_head rsrc_quiesce_wq; unsigned rsrc_quiesce; + u32 pers_next; + struct xarray personalities; + /* hashed buffered write serialization */ struct io_wq_hash *hash_map; @@ -405,11 +409,22 @@ struct io_ring_ctx { /* io-wq management, e.g. thread count */ u32 iowq_limits[2]; - bool iowq_limits_set; struct callback_head poll_wq_task_work; struct list_head defer_list; - unsigned sq_thread_idle; + +#ifdef CONFIG_NET_RX_BUSY_POLL + struct list_head napi_list; /* track busy poll napi_id */ + spinlock_t napi_lock; /* napi_list lock */ + + /* napi busy poll default timeout */ + unsigned int napi_busy_poll_to; + bool napi_prefer_busy_poll; + bool napi_enabled; + + DECLARE_HASHTABLE(napi_ht, 4); +#endif + /* protected by ->completion_lock */ unsigned evfd_last_cq_tail; @@ -455,7 +470,6 @@ enum { REQ_F_SKIP_LINK_CQES_BIT, REQ_F_SINGLE_POLL_BIT, REQ_F_DOUBLE_POLL_BIT, - REQ_F_PARTIAL_IO_BIT, REQ_F_APOLL_MULTISHOT_BIT, REQ_F_CLEAR_POLLIN_BIT, REQ_F_HASH_LOCKED_BIT, @@ -463,75 +477,88 @@ enum { REQ_F_SUPPORT_NOWAIT_BIT, REQ_F_ISREG_BIT, REQ_F_POLL_NO_LAZY_BIT, + REQ_F_CANCEL_SEQ_BIT, + REQ_F_CAN_POLL_BIT, + REQ_F_BL_EMPTY_BIT, + REQ_F_BL_NO_RECYCLE_BIT, /* not a real bit, just to check we're not overflowing the space */ __REQ_F_LAST_BIT, }; +typedef u64 __bitwise io_req_flags_t; +#define IO_REQ_FLAG(bitno) ((__force io_req_flags_t) BIT_ULL((bitno))) + enum { /* ctx owns file */ - REQ_F_FIXED_FILE = BIT(REQ_F_FIXED_FILE_BIT), + REQ_F_FIXED_FILE = IO_REQ_FLAG(REQ_F_FIXED_FILE_BIT), /* drain existing IO first */ - REQ_F_IO_DRAIN = BIT(REQ_F_IO_DRAIN_BIT), + REQ_F_IO_DRAIN = IO_REQ_FLAG(REQ_F_IO_DRAIN_BIT), /* linked sqes */ - REQ_F_LINK = BIT(REQ_F_LINK_BIT), + REQ_F_LINK = IO_REQ_FLAG(REQ_F_LINK_BIT), /* doesn't sever on completion < 0 */ - REQ_F_HARDLINK = BIT(REQ_F_HARDLINK_BIT), + REQ_F_HARDLINK = IO_REQ_FLAG(REQ_F_HARDLINK_BIT), /* IOSQE_ASYNC */ - REQ_F_FORCE_ASYNC = BIT(REQ_F_FORCE_ASYNC_BIT), + REQ_F_FORCE_ASYNC = IO_REQ_FLAG(REQ_F_FORCE_ASYNC_BIT), /* IOSQE_BUFFER_SELECT */ - REQ_F_BUFFER_SELECT = BIT(REQ_F_BUFFER_SELECT_BIT), + REQ_F_BUFFER_SELECT = IO_REQ_FLAG(REQ_F_BUFFER_SELECT_BIT), /* IOSQE_CQE_SKIP_SUCCESS */ - REQ_F_CQE_SKIP = BIT(REQ_F_CQE_SKIP_BIT), + REQ_F_CQE_SKIP = IO_REQ_FLAG(REQ_F_CQE_SKIP_BIT), /* fail rest of links */ - REQ_F_FAIL = BIT(REQ_F_FAIL_BIT), + REQ_F_FAIL = IO_REQ_FLAG(REQ_F_FAIL_BIT), /* on inflight list, should be cancelled and waited on exit reliably */ - REQ_F_INFLIGHT = BIT(REQ_F_INFLIGHT_BIT), + REQ_F_INFLIGHT = IO_REQ_FLAG(REQ_F_INFLIGHT_BIT), /* read/write uses file position */ - REQ_F_CUR_POS = BIT(REQ_F_CUR_POS_BIT), + REQ_F_CUR_POS = IO_REQ_FLAG(REQ_F_CUR_POS_BIT), /* must not punt to workers */ - REQ_F_NOWAIT = BIT(REQ_F_NOWAIT_BIT), + REQ_F_NOWAIT = IO_REQ_FLAG(REQ_F_NOWAIT_BIT), /* has or had linked timeout */ - REQ_F_LINK_TIMEOUT = BIT(REQ_F_LINK_TIMEOUT_BIT), + REQ_F_LINK_TIMEOUT = IO_REQ_FLAG(REQ_F_LINK_TIMEOUT_BIT), /* needs cleanup */ - REQ_F_NEED_CLEANUP = BIT(REQ_F_NEED_CLEANUP_BIT), + REQ_F_NEED_CLEANUP = IO_REQ_FLAG(REQ_F_NEED_CLEANUP_BIT), /* already went through poll handler */ - REQ_F_POLLED = BIT(REQ_F_POLLED_BIT), + REQ_F_POLLED = IO_REQ_FLAG(REQ_F_POLLED_BIT), /* buffer already selected */ - REQ_F_BUFFER_SELECTED = BIT(REQ_F_BUFFER_SELECTED_BIT), + REQ_F_BUFFER_SELECTED = IO_REQ_FLAG(REQ_F_BUFFER_SELECTED_BIT), /* buffer selected from ring, needs commit */ - REQ_F_BUFFER_RING = BIT(REQ_F_BUFFER_RING_BIT), + REQ_F_BUFFER_RING = IO_REQ_FLAG(REQ_F_BUFFER_RING_BIT), /* caller should reissue async */ - REQ_F_REISSUE = BIT(REQ_F_REISSUE_BIT), + REQ_F_REISSUE = IO_REQ_FLAG(REQ_F_REISSUE_BIT), /* supports async reads/writes */ - REQ_F_SUPPORT_NOWAIT = BIT(REQ_F_SUPPORT_NOWAIT_BIT), + REQ_F_SUPPORT_NOWAIT = IO_REQ_FLAG(REQ_F_SUPPORT_NOWAIT_BIT), /* regular file */ - REQ_F_ISREG = BIT(REQ_F_ISREG_BIT), + REQ_F_ISREG = IO_REQ_FLAG(REQ_F_ISREG_BIT), /* has creds assigned */ - REQ_F_CREDS = BIT(REQ_F_CREDS_BIT), + REQ_F_CREDS = IO_REQ_FLAG(REQ_F_CREDS_BIT), /* skip refcounting if not set */ - REQ_F_REFCOUNT = BIT(REQ_F_REFCOUNT_BIT), + REQ_F_REFCOUNT = IO_REQ_FLAG(REQ_F_REFCOUNT_BIT), /* there is a linked timeout that has to be armed */ - REQ_F_ARM_LTIMEOUT = BIT(REQ_F_ARM_LTIMEOUT_BIT), + REQ_F_ARM_LTIMEOUT = IO_REQ_FLAG(REQ_F_ARM_LTIMEOUT_BIT), /* ->async_data allocated */ - REQ_F_ASYNC_DATA = BIT(REQ_F_ASYNC_DATA_BIT), + REQ_F_ASYNC_DATA = IO_REQ_FLAG(REQ_F_ASYNC_DATA_BIT), /* don't post CQEs while failing linked requests */ - REQ_F_SKIP_LINK_CQES = BIT(REQ_F_SKIP_LINK_CQES_BIT), + REQ_F_SKIP_LINK_CQES = IO_REQ_FLAG(REQ_F_SKIP_LINK_CQES_BIT), /* single poll may be active */ - REQ_F_SINGLE_POLL = BIT(REQ_F_SINGLE_POLL_BIT), + REQ_F_SINGLE_POLL = IO_REQ_FLAG(REQ_F_SINGLE_POLL_BIT), /* double poll may active */ - REQ_F_DOUBLE_POLL = BIT(REQ_F_DOUBLE_POLL_BIT), - /* request has already done partial IO */ - REQ_F_PARTIAL_IO = BIT(REQ_F_PARTIAL_IO_BIT), + REQ_F_DOUBLE_POLL = IO_REQ_FLAG(REQ_F_DOUBLE_POLL_BIT), /* fast poll multishot mode */ - REQ_F_APOLL_MULTISHOT = BIT(REQ_F_APOLL_MULTISHOT_BIT), + REQ_F_APOLL_MULTISHOT = IO_REQ_FLAG(REQ_F_APOLL_MULTISHOT_BIT), /* recvmsg special flag, clear EPOLLIN */ - REQ_F_CLEAR_POLLIN = BIT(REQ_F_CLEAR_POLLIN_BIT), + REQ_F_CLEAR_POLLIN = IO_REQ_FLAG(REQ_F_CLEAR_POLLIN_BIT), /* hashed into ->cancel_hash_locked, protected by ->uring_lock */ - REQ_F_HASH_LOCKED = BIT(REQ_F_HASH_LOCKED_BIT), + REQ_F_HASH_LOCKED = IO_REQ_FLAG(REQ_F_HASH_LOCKED_BIT), /* don't use lazy poll wake for this request */ - REQ_F_POLL_NO_LAZY = BIT(REQ_F_POLL_NO_LAZY_BIT), + REQ_F_POLL_NO_LAZY = IO_REQ_FLAG(REQ_F_POLL_NO_LAZY_BIT), + /* cancel sequence is set and valid */ + REQ_F_CANCEL_SEQ = IO_REQ_FLAG(REQ_F_CANCEL_SEQ_BIT), + /* file is pollable */ + REQ_F_CAN_POLL = IO_REQ_FLAG(REQ_F_CAN_POLL_BIT), + /* buffer list was empty after selection of buffer */ + REQ_F_BL_EMPTY = IO_REQ_FLAG(REQ_F_BL_EMPTY_BIT), + /* don't recycle provided buffers for this request */ + REQ_F_BL_NO_RECYCLE = IO_REQ_FLAG(REQ_F_BL_NO_RECYCLE_BIT), }; typedef void (*io_req_tw_func_t)(struct io_kiocb *req, struct io_tw_state *ts); @@ -592,15 +619,17 @@ struct io_kiocb { * and after selection it points to the buffer ID itself. */ u16 buf_index; - unsigned int flags; + + unsigned nr_tw; + + /* REQ_F_* flags */ + io_req_flags_t flags; struct io_cqe cqe; struct io_ring_ctx *ctx; struct task_struct *task; - struct io_rsrc_node *rsrc_node; - union { /* store used ubuf, so we can prevent reloading */ struct io_mapped_ubuf *imu; @@ -621,10 +650,12 @@ struct io_kiocb { /* cache ->apoll->events */ __poll_t apoll_events; }; + + struct io_rsrc_node *rsrc_node; + atomic_t refs; atomic_t poll_refs; struct io_task_work io_task_work; - unsigned nr_tw; /* for polled requests, i.e. IORING_OP_POLL_ADD and async armed poll */ struct hlist_node hash_node; /* internal polling, see IORING_FEAT_FAST_POLL */ diff --git a/include/linux/iomap.h b/include/linux/iomap.h index 96dd0acbba44..6fc1c858013d 100644 --- a/include/linux/iomap.h +++ b/include/linux/iomap.h @@ -293,22 +293,32 @@ struct iomap_ioend { struct list_head io_list; /* next ioend in chain */ u16 io_type; u16 io_flags; /* IOMAP_F_* */ - u32 io_folios; /* folios added to ioend */ struct inode *io_inode; /* file being written to */ size_t io_size; /* size of the extent */ loff_t io_offset; /* offset in the file */ sector_t io_sector; /* start sector of ioend */ - struct bio *io_bio; /* bio being built */ - struct bio io_inline_bio; /* MUST BE LAST! */ + struct bio io_bio; /* MUST BE LAST! */ }; +static inline struct iomap_ioend *iomap_ioend_from_bio(struct bio *bio) +{ + return container_of(bio, struct iomap_ioend, io_bio); +} + struct iomap_writeback_ops { /* * Required, maps the blocks so that writeback can be performed on * the range starting at offset. + * + * Can return arbitrarily large regions, but we need to call into it at + * least once per folio to allow the file systems to synchronize with + * the write path that could be invalidating mappings. + * + * An existing mapping from a previous call to this method can be reused + * by the file system if it is still valid. */ int (*map_blocks)(struct iomap_writepage_ctx *wpc, struct inode *inode, - loff_t offset); + loff_t offset, unsigned len); /* * Optional, allows the file systems to perform actions just before @@ -329,6 +339,7 @@ struct iomap_writepage_ctx { struct iomap iomap; struct iomap_ioend *ioend; const struct iomap_writeback_ops *ops; + u32 nr_folios; /* folios added to the ioend */ }; void iomap_finish_ioends(struct iomap_ioend *ioend, int error); diff --git a/include/linux/iommu.h b/include/linux/iommu.h index 1ea2a820e1eb..2e925b5eba53 100644 --- a/include/linux/iommu.h +++ b/include/linux/iommu.h @@ -14,7 +14,6 @@ #include <linux/err.h> #include <linux/of.h> #include <linux/iova_bitmap.h> -#include <uapi/linux/iommu.h> #define IOMMU_READ (1 << 0) #define IOMMU_WRITE (1 << 1) @@ -41,8 +40,110 @@ struct iommu_domain_ops; struct iommu_dirty_ops; struct notifier_block; struct iommu_sva; -struct iommu_fault_event; struct iommu_dma_cookie; +struct iommu_fault_param; + +#define IOMMU_FAULT_PERM_READ (1 << 0) /* read */ +#define IOMMU_FAULT_PERM_WRITE (1 << 1) /* write */ +#define IOMMU_FAULT_PERM_EXEC (1 << 2) /* exec */ +#define IOMMU_FAULT_PERM_PRIV (1 << 3) /* privileged */ + +/* Generic fault types, can be expanded IRQ remapping fault */ +enum iommu_fault_type { + IOMMU_FAULT_PAGE_REQ = 1, /* page request fault */ +}; + +/** + * struct iommu_fault_page_request - Page Request data + * @flags: encodes whether the corresponding fields are valid and whether this + * is the last page in group (IOMMU_FAULT_PAGE_REQUEST_* values). + * When IOMMU_FAULT_PAGE_RESPONSE_NEEDS_PASID is set, the page response + * must have the same PASID value as the page request. When it is clear, + * the page response should not have a PASID. + * @pasid: Process Address Space ID + * @grpid: Page Request Group Index + * @perm: requested page permissions (IOMMU_FAULT_PERM_* values) + * @addr: page address + * @private_data: device-specific private information + */ +struct iommu_fault_page_request { +#define IOMMU_FAULT_PAGE_REQUEST_PASID_VALID (1 << 0) +#define IOMMU_FAULT_PAGE_REQUEST_LAST_PAGE (1 << 1) +#define IOMMU_FAULT_PAGE_REQUEST_PRIV_DATA (1 << 2) +#define IOMMU_FAULT_PAGE_RESPONSE_NEEDS_PASID (1 << 3) + u32 flags; + u32 pasid; + u32 grpid; + u32 perm; + u64 addr; + u64 private_data[2]; +}; + +/** + * struct iommu_fault - Generic fault data + * @type: fault type from &enum iommu_fault_type + * @prm: Page Request message, when @type is %IOMMU_FAULT_PAGE_REQ + */ +struct iommu_fault { + u32 type; + struct iommu_fault_page_request prm; +}; + +/** + * enum iommu_page_response_code - Return status of fault handlers + * @IOMMU_PAGE_RESP_SUCCESS: Fault has been handled and the page tables + * populated, retry the access. This is "Success" in PCI PRI. + * @IOMMU_PAGE_RESP_FAILURE: General error. Drop all subsequent faults from + * this device if possible. This is "Response Failure" in PCI PRI. + * @IOMMU_PAGE_RESP_INVALID: Could not handle this fault, don't retry the + * access. This is "Invalid Request" in PCI PRI. + */ +enum iommu_page_response_code { + IOMMU_PAGE_RESP_SUCCESS = 0, + IOMMU_PAGE_RESP_INVALID, + IOMMU_PAGE_RESP_FAILURE, +}; + +/** + * struct iommu_page_response - Generic page response information + * @pasid: Process Address Space ID + * @grpid: Page Request Group Index + * @code: response code from &enum iommu_page_response_code + */ +struct iommu_page_response { + u32 pasid; + u32 grpid; + u32 code; +}; + +struct iopf_fault { + struct iommu_fault fault; + /* node for pending lists */ + struct list_head list; +}; + +struct iopf_group { + struct iopf_fault last_fault; + struct list_head faults; + /* list node for iommu_fault_param::faults */ + struct list_head pending_node; + struct work_struct work; + struct iommu_domain *domain; + /* The device's fault data parameter. */ + struct iommu_fault_param *fault_param; +}; + +/** + * struct iopf_queue - IO Page Fault queue + * @wq: the fault workqueue + * @devices: devices attached to this queue + * @lock: protects the device list + */ +struct iopf_queue { + struct workqueue_struct *wq; + struct list_head devices; + struct mutex lock; +}; /* iommu fault flags */ #define IOMMU_FAULT_READ 0x0 @@ -50,7 +151,6 @@ struct iommu_dma_cookie; typedef int (*iommu_fault_handler_t)(struct iommu_domain *, struct device *, unsigned long, int, void *); -typedef int (*iommu_dev_fault_handler_t)(struct iommu_fault *, void *); struct iommu_domain_geometry { dma_addr_t aperture_start; /* First address that can be mapped */ @@ -110,8 +210,7 @@ struct iommu_domain { unsigned long pgsize_bitmap; /* Bitmap of page sizes in use */ struct iommu_domain_geometry geometry; struct iommu_dma_cookie *iova_cookie; - enum iommu_page_response_code (*iopf_handler)(struct iommu_fault *fault, - void *data); + int (*iopf_handler)(struct iopf_group *group); void *fault_data; union { struct { @@ -468,16 +567,15 @@ struct iommu_ops { /* Request/Free a list of reserved regions for a device */ void (*get_resv_regions)(struct device *dev, struct list_head *list); - int (*of_xlate)(struct device *dev, struct of_phandle_args *args); + int (*of_xlate)(struct device *dev, const struct of_phandle_args *args); bool (*is_attach_deferred)(struct device *dev); /* Per device IOMMU features */ int (*dev_enable_feat)(struct device *dev, enum iommu_dev_features f); int (*dev_disable_feat)(struct device *dev, enum iommu_dev_features f); - int (*page_response)(struct device *dev, - struct iommu_fault_event *evt, - struct iommu_page_response *msg); + void (*page_response)(struct device *dev, struct iopf_fault *evt, + struct iommu_page_response *msg); int (*def_domain_type)(struct device *dev); void (*remove_dev_pasid)(struct device *dev, ioasid_t pasid); @@ -487,6 +585,7 @@ struct iommu_ops { struct module *owner; struct iommu_domain *identity_domain; struct iommu_domain *blocked_domain; + struct iommu_domain *release_domain; struct iommu_domain *default_domain; }; @@ -578,38 +677,34 @@ struct iommu_device { }; /** - * struct iommu_fault_event - Generic fault event - * - * Can represent recoverable faults such as a page requests or - * unrecoverable faults such as DMA or IRQ remapping faults. - * - * @fault: fault descriptor - * @list: pending fault event list, used for tracking responses - */ -struct iommu_fault_event { - struct iommu_fault fault; - struct list_head list; -}; - -/** * struct iommu_fault_param - per-device IOMMU fault data - * @handler: Callback function to handle IOMMU faults at device level - * @data: handler private data - * @faults: holds the pending faults which needs response * @lock: protect pending faults list + * @users: user counter to manage the lifetime of the data + * @rcu: rcu head for kfree_rcu() + * @dev: the device that owns this param + * @queue: IOPF queue + * @queue_list: index into queue->devices + * @partial: faults that are part of a Page Request Group for which the last + * request hasn't been submitted yet. + * @faults: holds the pending faults which need response */ struct iommu_fault_param { - iommu_dev_fault_handler_t handler; - void *data; - struct list_head faults; struct mutex lock; + refcount_t users; + struct rcu_head rcu; + + struct device *dev; + struct iopf_queue *queue; + struct list_head queue_list; + + struct list_head partial; + struct list_head faults; }; /** * struct dev_iommu - Collection of per-device IOMMU data * * @fault_param: IOMMU detected device fault reporting data - * @iopf_param: I/O Page Fault queue and data * @fwspec: IOMMU fwspec data * @iommu_dev: IOMMU device this device is linked to * @priv: IOMMU Driver private data @@ -624,8 +719,7 @@ struct iommu_fault_param { */ struct dev_iommu { struct mutex lock; - struct iommu_fault_param *fault_param; - struct iopf_device_param *iopf_param; + struct iommu_fault_param __rcu *fault_param; struct iommu_fwspec *fwspec; struct iommu_device *iommu_dev; void *priv; @@ -654,6 +748,22 @@ static inline struct iommu_device *dev_to_iommu_device(struct device *dev) return (struct iommu_device *)dev_get_drvdata(dev); } +/** + * iommu_get_iommu_dev - Get iommu_device for a device + * @dev: an end-point device + * + * Note that this function must be called from the iommu_ops + * to retrieve the iommu_device for a device, which the core code + * guarentees it will not invoke the op without an attached iommu. + */ +static inline struct iommu_device *__iommu_get_iommu_dev(struct device *dev) +{ + return dev->iommu->iommu_dev; +} + +#define iommu_get_iommu_dev(dev, type, member) \ + container_of(__iommu_get_iommu_dev(dev), type, member) + static inline void iommu_iotlb_gather_init(struct iommu_iotlb_gather *gather) { *gather = (struct iommu_iotlb_gather) { @@ -719,16 +829,6 @@ extern int iommu_group_for_each_dev(struct iommu_group *group, void *data, extern struct iommu_group *iommu_group_get(struct device *dev); extern struct iommu_group *iommu_group_ref_get(struct iommu_group *group); extern void iommu_group_put(struct iommu_group *group); -extern int iommu_register_device_fault_handler(struct device *dev, - iommu_dev_fault_handler_t handler, - void *data); - -extern int iommu_unregister_device_fault_handler(struct device *dev); - -extern int iommu_report_device_fault(struct device *dev, - struct iommu_fault_event *evt); -extern int iommu_page_response(struct device *dev, - struct iommu_page_response *msg); extern int iommu_group_id(struct iommu_group *group); extern struct iommu_domain *iommu_group_default_domain(struct iommu_group *); @@ -892,18 +992,21 @@ struct iommu_fwspec { struct iommu_sva { struct device *dev; struct iommu_domain *domain; + struct list_head handle_item; + refcount_t users; }; struct iommu_mm_data { u32 pasid; struct list_head sva_domains; + struct list_head sva_handles; }; int iommu_fwspec_init(struct device *dev, struct fwnode_handle *iommu_fwnode, const struct iommu_ops *ops); void iommu_fwspec_free(struct device *dev); -int iommu_fwspec_add_ids(struct device *dev, u32 *ids, int num_ids); -const struct iommu_ops *iommu_ops_from_fwnode(struct fwnode_handle *fwnode); +int iommu_fwspec_add_ids(struct device *dev, const u32 *ids, int num_ids); +const struct iommu_ops *iommu_ops_from_fwnode(const struct fwnode_handle *fwnode); static inline struct iommu_fwspec *dev_iommu_fwspec_get(struct device *dev) { @@ -945,8 +1048,6 @@ bool iommu_group_dma_owner_claimed(struct iommu_group *group); int iommu_device_claim_dma_owner(struct device *dev, void *owner); void iommu_device_release_dma_owner(struct device *dev); -struct iommu_domain *iommu_sva_domain_alloc(struct device *dev, - struct mm_struct *mm); int iommu_attach_device_pasid(struct iommu_domain *domain, struct device *dev, ioasid_t pasid); void iommu_detach_device_pasid(struct iommu_domain *domain, @@ -1135,31 +1236,6 @@ static inline void iommu_group_put(struct iommu_group *group) { } -static inline -int iommu_register_device_fault_handler(struct device *dev, - iommu_dev_fault_handler_t handler, - void *data) -{ - return -ENODEV; -} - -static inline int iommu_unregister_device_fault_handler(struct device *dev) -{ - return 0; -} - -static inline -int iommu_report_device_fault(struct device *dev, struct iommu_fault_event *evt) -{ - return -ENODEV; -} - -static inline int iommu_page_response(struct device *dev, - struct iommu_page_response *msg) -{ - return -ENODEV; -} - static inline int iommu_group_id(struct iommu_group *group) { return -ENODEV; @@ -1253,7 +1329,7 @@ static inline int iommu_fwspec_add_ids(struct device *dev, u32 *ids, } static inline -const struct iommu_ops *iommu_ops_from_fwnode(struct fwnode_handle *fwnode) +const struct iommu_ops *iommu_ops_from_fwnode(const struct fwnode_handle *fwnode) { return NULL; } @@ -1308,12 +1384,6 @@ static inline int iommu_device_claim_dma_owner(struct device *dev, void *owner) return -ENODEV; } -static inline struct iommu_domain * -iommu_sva_domain_alloc(struct device *dev, struct mm_struct *mm) -{ - return NULL; -} - static inline int iommu_attach_device_pasid(struct iommu_domain *domain, struct device *dev, ioasid_t pasid) { @@ -1340,6 +1410,14 @@ static inline ioasid_t iommu_alloc_global_pasid(struct device *dev) static inline void iommu_free_global_pasid(ioasid_t pasid) {} #endif /* CONFIG_IOMMU_API */ +#if IS_ENABLED(CONFIG_LOCKDEP) && IS_ENABLED(CONFIG_IOMMU_API) +void iommu_group_mutex_assert(struct device *dev); +#else +static inline void iommu_group_mutex_assert(struct device *dev) +{ +} +#endif + /** * iommu_map_sgtable - Map the given buffer to the IOMMU domain * @domain: The IOMMU domain to perform the mapping @@ -1453,6 +1531,8 @@ struct iommu_sva *iommu_sva_bind_device(struct device *dev, struct mm_struct *mm); void iommu_sva_unbind_device(struct iommu_sva *handle); u32 iommu_sva_get_pasid(struct iommu_sva *handle); +struct iommu_domain *iommu_sva_domain_alloc(struct device *dev, + struct mm_struct *mm); #else static inline struct iommu_sva * iommu_sva_bind_device(struct device *dev, struct mm_struct *mm) @@ -1477,6 +1557,68 @@ static inline u32 mm_get_enqcmd_pasid(struct mm_struct *mm) } static inline void mm_pasid_drop(struct mm_struct *mm) {} + +static inline struct iommu_domain * +iommu_sva_domain_alloc(struct device *dev, struct mm_struct *mm) +{ + return NULL; +} #endif /* CONFIG_IOMMU_SVA */ +#ifdef CONFIG_IOMMU_IOPF +int iopf_queue_add_device(struct iopf_queue *queue, struct device *dev); +void iopf_queue_remove_device(struct iopf_queue *queue, struct device *dev); +int iopf_queue_flush_dev(struct device *dev); +struct iopf_queue *iopf_queue_alloc(const char *name); +void iopf_queue_free(struct iopf_queue *queue); +int iopf_queue_discard_partial(struct iopf_queue *queue); +void iopf_free_group(struct iopf_group *group); +void iommu_report_device_fault(struct device *dev, struct iopf_fault *evt); +void iopf_group_response(struct iopf_group *group, + enum iommu_page_response_code status); +#else +static inline int +iopf_queue_add_device(struct iopf_queue *queue, struct device *dev) +{ + return -ENODEV; +} + +static inline void +iopf_queue_remove_device(struct iopf_queue *queue, struct device *dev) +{ +} + +static inline int iopf_queue_flush_dev(struct device *dev) +{ + return -ENODEV; +} + +static inline struct iopf_queue *iopf_queue_alloc(const char *name) +{ + return NULL; +} + +static inline void iopf_queue_free(struct iopf_queue *queue) +{ +} + +static inline int iopf_queue_discard_partial(struct iopf_queue *queue) +{ + return -ENODEV; +} + +static inline void iopf_free_group(struct iopf_group *group) +{ +} + +static inline void +iommu_report_device_fault(struct device *dev, struct iopf_fault *evt) +{ +} + +static inline void iopf_group_response(struct iopf_group *group, + enum iommu_page_response_code status) +{ +} +#endif /* CONFIG_IOMMU_IOPF */ #endif /* __LINUX_IOMMU_H */ diff --git a/include/linux/ipv6.h b/include/linux/ipv6.h index 5e605e384aac..383a0ea2ab91 100644 --- a/include/linux/ipv6.h +++ b/include/linux/ipv6.h @@ -3,6 +3,7 @@ #define _IPV6_H #include <uapi/linux/ipv6.h> +#include <linux/cache.h> #define ipv6_optlen(p) (((p)->hdrlen+1) << 3) #define ipv6_authlen(p) (((p)->hdrlen+2) << 2) @@ -10,9 +11,16 @@ * This structure contains configuration options per IPv6 link. */ struct ipv6_devconf { - __s32 forwarding; + /* RX & TX fastpath fields. */ + __cacheline_group_begin(ipv6_devconf_read_txrx); + __s32 disable_ipv6; __s32 hop_limit; __s32 mtu6; + __s32 forwarding; + __s32 disable_policy; + __s32 proxy_ndp; + __cacheline_group_end(ipv6_devconf_read_txrx); + __s32 accept_ra; __s32 accept_redirects; __s32 autoconf; @@ -27,6 +35,7 @@ struct ipv6_devconf { __s32 use_tempaddr; __s32 temp_valid_lft; __s32 temp_prefered_lft; + __s32 regen_min_advance; __s32 regen_max_retry; __s32 max_desync_factor; __s32 max_addresses; @@ -44,7 +53,6 @@ struct ipv6_devconf { __s32 accept_ra_rt_info_max_plen; #endif #endif - __s32 proxy_ndp; __s32 accept_source_route; __s32 accept_ra_from_local; #ifdef CONFIG_IPV6_OPTIMISTIC_DAD @@ -54,7 +62,6 @@ struct ipv6_devconf { #ifdef CONFIG_IPV6_MROUTE atomic_t mc_forwarding; #endif - __s32 disable_ipv6; __s32 drop_unicast_in_l2_multicast; __s32 accept_dad; __s32 force_tllao; @@ -75,7 +82,6 @@ struct ipv6_devconf { #endif __u32 enhanced_dad; __u32 addr_gen_mode; - __s32 disable_policy; __s32 ndisc_tclass; __s32 rpl_seg_enabled; __u32 ioam6_id; diff --git a/include/linux/irq.h b/include/linux/irq.h index 90081afa10ce..97baa937ab5b 100644 --- a/include/linux/irq.h +++ b/include/linux/irq.h @@ -179,7 +179,7 @@ struct irq_common_data { struct irq_data { u32 mask; unsigned int irq; - unsigned long hwirq; + irq_hw_number_t hwirq; struct irq_common_data *common; struct irq_chip *chip; struct irq_domain *domain; diff --git a/include/linux/irqdomain.h b/include/linux/irqdomain.h index ee0a82c60508..21ecf582a0fe 100644 --- a/include/linux/irqdomain.h +++ b/include/linux/irqdomain.h @@ -619,6 +619,23 @@ static inline bool irq_domain_is_msi_device(struct irq_domain *domain) #endif /* CONFIG_IRQ_DOMAIN_HIERARCHY */ +#ifdef CONFIG_GENERIC_MSI_IRQ +int msi_device_domain_alloc_wired(struct irq_domain *domain, unsigned int hwirq, + unsigned int type); +void msi_device_domain_free_wired(struct irq_domain *domain, unsigned int virq); +#else +static inline int msi_device_domain_alloc_wired(struct irq_domain *domain, unsigned int hwirq, + unsigned int type) +{ + WARN_ON_ONCE(1); + return -EINVAL; +} +static inline void msi_device_domain_free_wired(struct irq_domain *domain, unsigned int virq) +{ + WARN_ON_ONCE(1); +} +#endif + #else /* CONFIG_IRQ_DOMAIN */ static inline void irq_dispose_mapping(unsigned int virq) { } static inline struct irq_domain *irq_find_matching_fwnode( diff --git a/include/linux/irqdomain_defs.h b/include/linux/irqdomain_defs.h index c29921fd8cd1..5c1fe6f1fcde 100644 --- a/include/linux/irqdomain_defs.h +++ b/include/linux/irqdomain_defs.h @@ -26,6 +26,8 @@ enum irq_domain_bus_token { DOMAIN_BUS_DMAR, DOMAIN_BUS_AMDVI, DOMAIN_BUS_PCI_DEVICE_IMS, + DOMAIN_BUS_DEVICE_MSI, + DOMAIN_BUS_WIRED_TO_MSI, }; #endif /* _LINUX_IRQDOMAIN_DEFS_H */ diff --git a/include/linux/irqhandler.h b/include/linux/irqhandler.h index c30f454a9518..72dd1eb3a0e7 100644 --- a/include/linux/irqhandler.h +++ b/include/linux/irqhandler.h @@ -8,7 +8,7 @@ */ struct irq_desc; -struct irq_data; + typedef void (*irq_flow_handler_t)(struct irq_desc *desc); #endif diff --git a/include/linux/jiffies.h b/include/linux/jiffies.h index e0ae2a43e0eb..d9f1435a5a13 100644 --- a/include/linux/jiffies.h +++ b/include/linux/jiffies.h @@ -102,12 +102,15 @@ static inline u64 get_jiffies_64(void) } #endif -/* - * These inlines deal with timer wrapping correctly. You are - * strongly encouraged to use them: - * 1. Because people otherwise forget - * 2. Because if the timer wrap changes in future you won't have to - * alter your driver code. +/** + * DOC: General information about time_* inlines + * + * These inlines deal with timer wrapping correctly. You are strongly encouraged + * to use them: + * + * #. Because people otherwise forget + * #. Because if the timer wrap changes in future you won't have to alter your + * driver code. */ /** diff --git a/include/linux/kasan.h b/include/linux/kasan.h index dbb06d789e74..70d6a8f6e25d 100644 --- a/include/linux/kasan.h +++ b/include/linux/kasan.h @@ -429,7 +429,6 @@ struct kasan_cache { }; size_t kasan_metadata_size(struct kmem_cache *cache, bool in_object); -slab_flags_t kasan_never_merge(void); void kasan_cache_create(struct kmem_cache *cache, unsigned int *size, slab_flags_t *flags); @@ -446,11 +445,6 @@ static inline size_t kasan_metadata_size(struct kmem_cache *cache, { return 0; } -/* And thus nothing prevents cache merging. */ -static inline slab_flags_t kasan_never_merge(void) -{ - return 0; -} /* And no cache-related metadata initialization is required. */ static inline void kasan_cache_create(struct kmem_cache *cache, unsigned int *size, diff --git a/include/linux/kernel.h b/include/linux/kernel.h index d9ad21058eed..d718fbec72dd 100644 --- a/include/linux/kernel.h +++ b/include/linux/kernel.h @@ -33,20 +33,14 @@ #include <linux/sprintf.h> #include <linux/static_call_types.h> #include <linux/instruction_pointer.h> +#include <linux/wordpart.h> + #include <asm/byteorder.h> #include <uapi/linux/kernel.h> #define STACK_MAGIC 0xdeadbeef -/** - * REPEAT_BYTE - repeat the value @x multiple times as an unsigned long value - * @x: value to repeat - * - * NOTE: @x is not checked for > 0xff; larger values produce odd results. - */ -#define REPEAT_BYTE(x) ((~0ul / 0xff) * (x)) - /* generic data direction definitions */ #define READ 0 #define WRITE 1 @@ -60,34 +54,6 @@ } \ ) -/** - * upper_32_bits - return bits 32-63 of a number - * @n: the number we're accessing - * - * A basic shift-right of a 64- or 32-bit quantity. Use this to suppress - * the "right shift count >= width of type" warning when that quantity is - * 32-bits. - */ -#define upper_32_bits(n) ((u32)(((n) >> 16) >> 16)) - -/** - * lower_32_bits - return bits 0-31 of a number - * @n: the number we're accessing - */ -#define lower_32_bits(n) ((u32)((n) & 0xffffffff)) - -/** - * upper_16_bits - return bits 16-31 of a number - * @n: the number we're accessing - */ -#define upper_16_bits(n) ((u16)((n) >> 16)) - -/** - * lower_16_bits - return bits 0-15 of a number - * @n: the number we're accessing - */ -#define lower_16_bits(n) ((u16)((n) & 0xffff)) - struct completion; struct user; @@ -199,12 +165,6 @@ static inline void might_fault(void) { } void do_exit(long error_code) __noreturn; -extern int get_option(char **str, int *pint); -extern char *get_options(const char *str, int nints, int *ints); -extern unsigned long long memparse(const char *ptr, char **retptr); -extern bool parse_option_str(const char *str, const char *option); -extern char *next_arg(char *args, char **param, char **val); - extern int core_kernel_text(unsigned long addr); extern int __kernel_text_address(unsigned long addr); extern int kernel_text_address(unsigned long addr); diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h index 7e7fd25b09b3..179df96b20f8 100644 --- a/include/linux/kvm_host.h +++ b/include/linux/kvm_host.h @@ -2031,6 +2031,32 @@ static inline int mmu_invalidate_retry_gfn(struct kvm *kvm, return 1; return 0; } + +/* + * This lockless version of the range-based retry check *must* be paired with a + * call to the locked version after acquiring mmu_lock, i.e. this is safe to + * use only as a pre-check to avoid contending mmu_lock. This version *will* + * get false negatives and false positives. + */ +static inline bool mmu_invalidate_retry_gfn_unsafe(struct kvm *kvm, + unsigned long mmu_seq, + gfn_t gfn) +{ + /* + * Use READ_ONCE() to ensure the in-progress flag and sequence counter + * are always read from memory, e.g. so that checking for retry in a + * loop won't result in an infinite retry loop. Don't force loads for + * start+end, as the key to avoiding infinite retry loops is observing + * the 1=>0 transition of in-progress, i.e. getting false negatives + * due to stale start+end values is acceptable. + */ + if (unlikely(READ_ONCE(kvm->mmu_invalidate_in_progress)) && + gfn >= kvm->mmu_invalidate_range_start && + gfn < kvm->mmu_invalidate_range_end) + return true; + + return READ_ONCE(kvm->mmu_invalidate_seq) != mmu_seq; +} #endif #ifdef CONFIG_HAVE_KVM_IRQ_ROUTING diff --git a/include/linux/libata.h b/include/linux/libata.h index 1dbb14daccfa..26d68115afb8 100644 --- a/include/linux/libata.h +++ b/include/linux/libata.h @@ -471,7 +471,7 @@ enum ata_completion_errors { /* * Link power management policy: If you alter this, you also need to - * alter libata-scsi.c (for the ascii descriptions) + * alter libata-sata.c (for the ascii descriptions) */ enum ata_lpm_policy { ATA_LPM_UNKNOWN, diff --git a/include/linux/lockd/lockd.h b/include/linux/lockd/lockd.h index 9f565416d186..1b95fe31051f 100644 --- a/include/linux/lockd/lockd.h +++ b/include/linux/lockd/lockd.h @@ -375,12 +375,12 @@ static inline int nlm_privileged_requester(const struct svc_rqst *rqstp) static inline int nlm_compare_locks(const struct file_lock *fl1, const struct file_lock *fl2) { - return file_inode(fl1->fl_file) == file_inode(fl2->fl_file) - && fl1->fl_pid == fl2->fl_pid - && fl1->fl_owner == fl2->fl_owner + return file_inode(fl1->c.flc_file) == file_inode(fl2->c.flc_file) + && fl1->c.flc_pid == fl2->c.flc_pid + && fl1->c.flc_owner == fl2->c.flc_owner && fl1->fl_start == fl2->fl_start && fl1->fl_end == fl2->fl_end - &&(fl1->fl_type == fl2->fl_type || fl2->fl_type == F_UNLCK); + &&(fl1->c.flc_type == fl2->c.flc_type || fl2->c.flc_type == F_UNLCK); } extern const struct lock_manager_operations nlmsvc_lock_operations; diff --git a/include/linux/lockd/xdr.h b/include/linux/lockd/xdr.h index b60fbcd8cdfa..80cca9426761 100644 --- a/include/linux/lockd/xdr.h +++ b/include/linux/lockd/xdr.h @@ -52,7 +52,7 @@ struct nlm_lock { * FreeBSD uses 16, Apple Mac OS X 10.3 uses 20. Therefore we set it to * 32 bytes. */ - + struct nlm_cookie { unsigned char data[NLM_MAXCOOKIELEN]; diff --git a/include/linux/lsm_hook_defs.h b/include/linux/lsm_hook_defs.h index 185924c56378..a8057a3f8de6 100644 --- a/include/linux/lsm_hook_defs.h +++ b/include/linux/lsm_hook_defs.h @@ -94,6 +94,8 @@ LSM_HOOK(int, 0, path_mkdir, const struct path *dir, struct dentry *dentry, LSM_HOOK(int, 0, path_rmdir, const struct path *dir, struct dentry *dentry) LSM_HOOK(int, 0, path_mknod, const struct path *dir, struct dentry *dentry, umode_t mode, unsigned int dev) +LSM_HOOK(void, LSM_RET_VOID, path_post_mknod, struct mnt_idmap *idmap, + struct dentry *dentry) LSM_HOOK(int, 0, path_truncate, const struct path *path) LSM_HOOK(int, 0, path_symlink, const struct path *dir, struct dentry *dentry, const char *old_name) @@ -119,6 +121,8 @@ LSM_HOOK(int, 0, inode_init_security_anon, struct inode *inode, const struct qstr *name, const struct inode *context_inode) LSM_HOOK(int, 0, inode_create, struct inode *dir, struct dentry *dentry, umode_t mode) +LSM_HOOK(void, LSM_RET_VOID, inode_post_create_tmpfile, struct mnt_idmap *idmap, + struct inode *inode) LSM_HOOK(int, 0, inode_link, struct dentry *old_dentry, struct inode *dir, struct dentry *new_dentry) LSM_HOOK(int, 0, inode_unlink, struct inode *dir, struct dentry *dentry) @@ -135,7 +139,10 @@ LSM_HOOK(int, 0, inode_readlink, struct dentry *dentry) LSM_HOOK(int, 0, inode_follow_link, struct dentry *dentry, struct inode *inode, bool rcu) LSM_HOOK(int, 0, inode_permission, struct inode *inode, int mask) -LSM_HOOK(int, 0, inode_setattr, struct dentry *dentry, struct iattr *attr) +LSM_HOOK(int, 0, inode_setattr, struct mnt_idmap *idmap, struct dentry *dentry, + struct iattr *attr) +LSM_HOOK(void, LSM_RET_VOID, inode_post_setattr, struct mnt_idmap *idmap, + struct dentry *dentry, int ia_valid) LSM_HOOK(int, 0, inode_getattr, const struct path *path) LSM_HOOK(int, 0, inode_setxattr, struct mnt_idmap *idmap, struct dentry *dentry, const char *name, const void *value, @@ -146,12 +153,18 @@ LSM_HOOK(int, 0, inode_getxattr, struct dentry *dentry, const char *name) LSM_HOOK(int, 0, inode_listxattr, struct dentry *dentry) LSM_HOOK(int, 0, inode_removexattr, struct mnt_idmap *idmap, struct dentry *dentry, const char *name) +LSM_HOOK(void, LSM_RET_VOID, inode_post_removexattr, struct dentry *dentry, + const char *name) LSM_HOOK(int, 0, inode_set_acl, struct mnt_idmap *idmap, struct dentry *dentry, const char *acl_name, struct posix_acl *kacl) +LSM_HOOK(void, LSM_RET_VOID, inode_post_set_acl, struct dentry *dentry, + const char *acl_name, struct posix_acl *kacl) LSM_HOOK(int, 0, inode_get_acl, struct mnt_idmap *idmap, struct dentry *dentry, const char *acl_name) LSM_HOOK(int, 0, inode_remove_acl, struct mnt_idmap *idmap, struct dentry *dentry, const char *acl_name) +LSM_HOOK(void, LSM_RET_VOID, inode_post_remove_acl, struct mnt_idmap *idmap, + struct dentry *dentry, const char *acl_name) LSM_HOOK(int, 0, inode_need_killpriv, struct dentry *dentry) LSM_HOOK(int, 0, inode_killpriv, struct mnt_idmap *idmap, struct dentry *dentry) @@ -168,6 +181,7 @@ LSM_HOOK(int, 0, kernfs_init_security, struct kernfs_node *kn_dir, struct kernfs_node *kn) LSM_HOOK(int, 0, file_permission, struct file *file, int mask) LSM_HOOK(int, 0, file_alloc_security, struct file *file) +LSM_HOOK(void, LSM_RET_VOID, file_release, struct file *file) LSM_HOOK(void, LSM_RET_VOID, file_free_security, struct file *file) LSM_HOOK(int, 0, file_ioctl, struct file *file, unsigned int cmd, unsigned long arg) @@ -186,6 +200,7 @@ LSM_HOOK(int, 0, file_send_sigiotask, struct task_struct *tsk, struct fown_struct *fown, int sig) LSM_HOOK(int, 0, file_receive, struct file *file) LSM_HOOK(int, 0, file_open, struct file *file) +LSM_HOOK(int, 0, file_post_open, struct file *file, int mask) LSM_HOOK(int, 0, file_truncate, struct file *file) LSM_HOOK(int, 0, task_alloc, struct task_struct *task, unsigned long clone_flags) @@ -315,9 +330,9 @@ LSM_HOOK(int, 0, socket_getsockopt, struct socket *sock, int level, int optname) LSM_HOOK(int, 0, socket_setsockopt, struct socket *sock, int level, int optname) LSM_HOOK(int, 0, socket_shutdown, struct socket *sock, int how) LSM_HOOK(int, 0, socket_sock_rcv_skb, struct sock *sk, struct sk_buff *skb) -LSM_HOOK(int, 0, socket_getpeersec_stream, struct socket *sock, +LSM_HOOK(int, -ENOPROTOOPT, socket_getpeersec_stream, struct socket *sock, sockptr_t optval, sockptr_t optlen, unsigned int len) -LSM_HOOK(int, 0, socket_getpeersec_dgram, struct socket *sock, +LSM_HOOK(int, -ENOPROTOOPT, socket_getpeersec_dgram, struct socket *sock, struct sk_buff *skb, u32 *secid) LSM_HOOK(int, 0, sk_alloc_security, struct sock *sk, int family, gfp_t priority) LSM_HOOK(void, LSM_RET_VOID, sk_free_security, struct sock *sk) @@ -390,6 +405,9 @@ LSM_HOOK(void, LSM_RET_VOID, key_free, struct key *key) LSM_HOOK(int, 0, key_permission, key_ref_t key_ref, const struct cred *cred, enum key_need_perm need_perm) LSM_HOOK(int, 0, key_getsecurity, struct key *key, char **buffer) +LSM_HOOK(void, LSM_RET_VOID, key_post_create_or_update, struct key *keyring, + struct key *key, const void *payload, size_t payload_len, + unsigned long flags, bool create) #endif /* CONFIG_KEYS */ #ifdef CONFIG_AUDIT @@ -404,10 +422,17 @@ LSM_HOOK(void, LSM_RET_VOID, audit_rule_free, void *lsmrule) LSM_HOOK(int, 0, bpf, int cmd, union bpf_attr *attr, unsigned int size) LSM_HOOK(int, 0, bpf_map, struct bpf_map *map, fmode_t fmode) LSM_HOOK(int, 0, bpf_prog, struct bpf_prog *prog) -LSM_HOOK(int, 0, bpf_map_alloc_security, struct bpf_map *map) -LSM_HOOK(void, LSM_RET_VOID, bpf_map_free_security, struct bpf_map *map) -LSM_HOOK(int, 0, bpf_prog_alloc_security, struct bpf_prog_aux *aux) -LSM_HOOK(void, LSM_RET_VOID, bpf_prog_free_security, struct bpf_prog_aux *aux) +LSM_HOOK(int, 0, bpf_map_create, struct bpf_map *map, union bpf_attr *attr, + struct bpf_token *token) +LSM_HOOK(void, LSM_RET_VOID, bpf_map_free, struct bpf_map *map) +LSM_HOOK(int, 0, bpf_prog_load, struct bpf_prog *prog, union bpf_attr *attr, + struct bpf_token *token) +LSM_HOOK(void, LSM_RET_VOID, bpf_prog_free, struct bpf_prog *prog) +LSM_HOOK(int, 0, bpf_token_create, struct bpf_token *token, union bpf_attr *attr, + struct path *path) +LSM_HOOK(void, LSM_RET_VOID, bpf_token_free, struct bpf_token *token) +LSM_HOOK(int, 0, bpf_token_cmd, const struct bpf_token *token, enum bpf_cmd cmd) +LSM_HOOK(int, 0, bpf_token_capable, const struct bpf_token *token, int cap) #endif /* CONFIG_BPF_SYSCALL */ LSM_HOOK(int, 0, locked_down, enum lockdown_reason what) diff --git a/include/linux/maple_tree.h b/include/linux/maple_tree.h index b3d63123b945..a53ad4dabd7e 100644 --- a/include/linux/maple_tree.h +++ b/include/linux/maple_tree.h @@ -171,6 +171,7 @@ enum maple_type { #define MT_FLAGS_LOCK_IRQ 0x100 #define MT_FLAGS_LOCK_BH 0x200 #define MT_FLAGS_LOCK_EXTERN 0x300 +#define MT_FLAGS_ALLOC_WRAPPED 0x0800 #define MAPLE_HEIGHT_MAX 31 @@ -319,6 +320,9 @@ int mtree_insert_range(struct maple_tree *mt, unsigned long first, int mtree_alloc_range(struct maple_tree *mt, unsigned long *startp, void *entry, unsigned long size, unsigned long min, unsigned long max, gfp_t gfp); +int mtree_alloc_cyclic(struct maple_tree *mt, unsigned long *startp, + void *entry, unsigned long range_lo, unsigned long range_hi, + unsigned long *next, gfp_t gfp); int mtree_alloc_rrange(struct maple_tree *mt, unsigned long *startp, void *entry, unsigned long size, unsigned long min, unsigned long max, gfp_t gfp); @@ -499,6 +503,9 @@ void *mas_find_range(struct ma_state *mas, unsigned long max); void *mas_find_rev(struct ma_state *mas, unsigned long min); void *mas_find_range_rev(struct ma_state *mas, unsigned long max); int mas_preallocate(struct ma_state *mas, void *entry, gfp_t gfp); +int mas_alloc_cyclic(struct ma_state *mas, unsigned long *startp, + void *entry, unsigned long range_lo, unsigned long range_hi, + unsigned long *next, gfp_t gfp); bool mas_nomem(struct ma_state *mas, gfp_t gfp); void mas_pause(struct ma_state *mas); diff --git a/include/linux/marvell_phy.h b/include/linux/marvell_phy.h index 9b54c4f0677f..693eba9869e4 100644 --- a/include/linux/marvell_phy.h +++ b/include/linux/marvell_phy.h @@ -26,6 +26,7 @@ #define MARVELL_PHY_ID_88E2110 0x002b09b0 #define MARVELL_PHY_ID_88X2222 0x01410f10 #define MARVELL_PHY_ID_88Q2110 0x002b0980 +#define MARVELL_PHY_ID_88Q2220 0x002b0b20 /* Marvel 88E1111 in Finisar SFP module with modified PHY ID */ #define MARVELL_PHY_ID_88E1111_FINISAR 0x01ff0cc0 diff --git a/include/linux/mdio.h b/include/linux/mdio.h index 79ceee3c8673..68f8d2e970d4 100644 --- a/include/linux/mdio.h +++ b/include/linux/mdio.h @@ -373,6 +373,10 @@ static inline void mii_t1_adv_m_mod_linkmode_t(unsigned long *advertising, u32 l { linkmode_mod_bit(ETHTOOL_LINK_MODE_10baseT1L_Full_BIT, advertising, lpa & MDIO_AN_T1_ADV_M_B10L); + linkmode_mod_bit(ETHTOOL_LINK_MODE_100baseT1_Full_BIT, + advertising, lpa & MDIO_AN_T1_ADV_M_100BT1); + linkmode_mod_bit(ETHTOOL_LINK_MODE_1000baseT1_Full_BIT, + advertising, lpa & MDIO_AN_T1_ADV_M_1000BT1); } /** @@ -409,6 +413,10 @@ static inline u32 linkmode_adv_to_mii_t1_adv_m_t(unsigned long *advertising) if (linkmode_test_bit(ETHTOOL_LINK_MODE_10baseT1L_Full_BIT, advertising)) result |= MDIO_AN_T1_ADV_M_B10L; + if (linkmode_test_bit(ETHTOOL_LINK_MODE_100baseT1_Full_BIT, advertising)) + result |= MDIO_AN_T1_ADV_M_100BT1; + if (linkmode_test_bit(ETHTOOL_LINK_MODE_1000baseT1_Full_BIT, advertising)) + result |= MDIO_AN_T1_ADV_M_1000BT1; return result; } @@ -440,6 +448,42 @@ static inline void mii_eee_cap1_mod_linkmode_t(unsigned long *adv, u32 val) } /** + * mii_eee_cap2_mod_linkmode_sup_t() + * @adv: target the linkmode settings + * @val: register value + * + * A function that translates value of following registers to the linkmode: + * IEEE 802.3-2022 45.2.3.11 "EEE control and capability 2" register (3.21) + */ +static inline void mii_eee_cap2_mod_linkmode_sup_t(unsigned long *adv, u32 val) +{ + linkmode_mod_bit(ETHTOOL_LINK_MODE_2500baseT_Full_BIT, + adv, val & MDIO_EEE_2_5GT); + linkmode_mod_bit(ETHTOOL_LINK_MODE_5000baseT_Full_BIT, + adv, val & MDIO_EEE_5GT); +} + +/** + * mii_eee_cap2_mod_linkmode_adv_t() + * @adv: target the linkmode advertisement settings + * @val: register value + * + * A function that translates value of following registers to the linkmode: + * IEEE 802.3-2022 45.2.7.16 "EEE advertisement 2" register (7.62) + * IEEE 802.3-2022 45.2.7.17 "EEE link partner ability 2" register (7.63) + * Note: Currently this function is the same as mii_eee_cap2_mod_linkmode_sup_t. + * For certain, not yet supported, modes however the bits differ. + * Therefore create separate functions already. + */ +static inline void mii_eee_cap2_mod_linkmode_adv_t(unsigned long *adv, u32 val) +{ + linkmode_mod_bit(ETHTOOL_LINK_MODE_2500baseT_Full_BIT, + adv, val & MDIO_EEE_2_5GT); + linkmode_mod_bit(ETHTOOL_LINK_MODE_5000baseT_Full_BIT, + adv, val & MDIO_EEE_5GT); +} + +/** * linkmode_to_mii_eee_cap1_t() * @adv: the linkmode advertisement settings * @@ -467,6 +511,25 @@ static inline u32 linkmode_to_mii_eee_cap1_t(unsigned long *adv) } /** + * linkmode_to_mii_eee_cap2_t() + * @adv: the linkmode advertisement settings + * + * A function that translates linkmode to value for IEEE 802.3-2022 45.2.7.16 + * "EEE advertisement 2" register (7.62) + */ +static inline u32 linkmode_to_mii_eee_cap2_t(unsigned long *adv) +{ + u32 result = 0; + + if (linkmode_test_bit(ETHTOOL_LINK_MODE_2500baseT_Full_BIT, adv)) + result |= MDIO_EEE_2_5GT; + if (linkmode_test_bit(ETHTOOL_LINK_MODE_5000baseT_Full_BIT, adv)) + result |= MDIO_EEE_5GT; + + return result; +} + +/** * mii_10base_t1_adv_mod_linkmode_t() * @adv: linkmode advertisement settings * @val: register value diff --git a/include/linux/memblock.h b/include/linux/memblock.h index b695f9e946da..e2082240586d 100644 --- a/include/linux/memblock.h +++ b/include/linux/memblock.h @@ -121,6 +121,8 @@ int memblock_reserve(phys_addr_t base, phys_addr_t size); int memblock_physmem_add(phys_addr_t base, phys_addr_t size); #endif void memblock_trim_memory(phys_addr_t align); +unsigned long memblock_addrs_overlap(phys_addr_t base1, phys_addr_t size1, + phys_addr_t base2, phys_addr_t size2); bool memblock_overlaps_region(struct memblock_type *type, phys_addr_t base, phys_addr_t size); bool memblock_validate_numa_coverage(unsigned long threshold_bytes); diff --git a/include/linux/mfd/idtRC38xxx_reg.h b/include/linux/mfd/idtRC38xxx_reg.h new file mode 100644 index 000000000000..ec11872f51ad --- /dev/null +++ b/include/linux/mfd/idtRC38xxx_reg.h @@ -0,0 +1,273 @@ +/* SPDX-License-Identifier: GPL-2.0+ */ +/* + * Register Map - Based on PolarBear_CSRs.RevA.xlsx (2023-04-21) + * + * Copyright (C) 2023 Integrated Device Technology, Inc., a Renesas Company. + */ +#ifndef MFD_IDTRC38XXX_REG +#define MFD_IDTRC38XXX_REG + +/* GLOBAL */ +#define SOFT_RESET_CTRL (0x15) /* Specific to FC3W */ +#define MISC_CTRL (0x14) /* Specific to FC3A */ +#define APLL_REINIT BIT(1) +#define APLL_REINIT_VFC3A BIT(2) + +#define DEVICE_ID (0x2) +#define DEVICE_ID_MASK (0x1000) /* Bit 12 is 1 if FC3W and 0 if FC3A */ +#define DEVICE_ID_SHIFT (12) + +/* FOD */ +#define FOD_0 (0x300) +#define FOD_0_VFC3A (0x400) +#define FOD_1 (0x340) +#define FOD_1_VFC3A (0x440) +#define FOD_2 (0x380) +#define FOD_2_VFC3A (0x480) + +/* TDCAPLL */ +#define TDC_CTRL (0x44a) /* Specific to FC3W */ +#define TDC_ENABLE_CTRL (0x169) /* Specific to FC3A */ +#define TDC_DAC_CAL_CTRL (0x16a) /* Specific to FC3A */ +#define TDC_EN BIT(0) +#define TDC_DAC_RECAL_REQ BIT(1) +#define TDC_DAC_RECAL_REQ_VFC3A BIT(0) + +#define TDC_FB_DIV_INT_CNFG (0x442) +#define TDC_FB_DIV_INT_CNFG_VFC3A (0x162) +#define TDC_FB_DIV_INT_MASK GENMASK(7, 0) +#define TDC_REF_DIV_CNFG (0x443) +#define TDC_REF_DIV_CNFG_VFC3A (0x163) +#define TDC_REF_DIV_CONFIG_MASK GENMASK(2, 0) + +/* TIME SYNC CHANNEL */ +#define TIME_CLOCK_SRC (0xa01) /* Specific to FC3W */ +#define TIME_CLOCK_COUNT (0xa00) /* Specific to FC3W */ +#define TIME_CLOCK_COUNT_MASK GENMASK(5, 0) + +#define SUB_SYNC_GEN_CNFG (0xa04) + +#define TOD_COUNTER_READ_REQ (0xa5f) +#define TOD_COUNTER_READ_REQ_VFC3A (0x6df) +#define TOD_SYNC_LOAD_VAL_CTRL (0xa10) +#define TOD_SYNC_LOAD_VAL_CTRL_VFC3A (0x690) +#define SYNC_COUNTER_MASK GENMASK_ULL(51, 0) +#define SUB_SYNC_COUNTER_MASK GENMASK(30, 0) +#define TOD_SYNC_LOAD_REQ_CTRL (0xa21) +#define TOD_SYNC_LOAD_REQ_CTRL_VFC3A (0x6a1) +#define SYNC_LOAD_ENABLE BIT(1) +#define SUB_SYNC_LOAD_ENABLE BIT(0) +#define SYNC_LOAD_REQ BIT(0) + +#define LPF_MODE_CNFG (0xa80) +#define LPF_MODE_CNFG_VFC3A (0x700) +enum lpf_mode { + LPF_DISABLED = 0, + LPF_WP = 1, + LPF_HOLDOVER = 2, + LPF_WF = 3, + LPF_INVALID = 4 +}; +#define LPF_CTRL (0xa98) +#define LPF_CTRL_VFC3A (0x718) +#define LPF_EN BIT(0) + +#define LPF_BW_CNFG (0xa81) +#define LPF_BW_SHIFT GENMASK(7, 3) +#define LPF_BW_MULT GENMASK(2, 0) +#define LPF_BW_SHIFT_DEFAULT (0xb) +#define LPF_BW_MULT_DEFAULT (0x0) +#define LPF_BW_SHIFT_1PPS (0x5) + +#define LPF_WR_PHASE_CTRL (0xaa8) +#define LPF_WR_PHASE_CTRL_VFC3A (0x728) +#define LPF_WR_FREQ_CTRL (0xab0) +#define LPF_WR_FREQ_CTRL_VFC3A (0x730) + +#define TIME_CLOCK_TDC_FANOUT_CNFG (0xB00) +#define TIME_SYNC_TO_TDC_EN BIT(0) +#define SIG1_MUX_SEL_MASK GENMASK(7, 4) +#define SIG2_MUX_SEL_MASK GENMASK(11, 8) +enum tdc_mux_sel { + REF0 = 0, + REF1 = 1, + REF2 = 2, + REF3 = 3, + REF_CLK5 = 4, + REF_CLK6 = 5, + DPLL_FB_TO_TDC = 6, + DPLL_FB_DIVIDED_TO_TDC = 7, + TIME_CLK_DIVIDED = 8, + TIME_SYNC = 9, +}; + +#define TIME_CLOCK_MEAS_CNFG (0xB04) +#define TDC_MEAS_MODE BIT(0) +enum tdc_meas_mode { + CONTINUOUS = 0, + ONE_SHOT = 1, + MEAS_MODE_INVALID = 2, +}; + +#define TIME_CLOCK_MEAS_DIV_CNFG (0xB08) +#define TIME_REF_DIV_MASK GENMASK(29, 24) + +#define TIME_CLOCK_MEAS_CTRL (0xB10) +#define TDC_MEAS_EN BIT(0) +#define TDC_MEAS_START BIT(1) + +#define TDC_FIFO_READ_REQ (0xB2F) +#define TDC_FIFO_READ (0xB30) +#define COARSE_MEAS_MASK GENMASK_ULL(39, 13) +#define FINE_MEAS_MASK GENMASK(12, 0) + +#define TDC_FIFO_CTRL (0xB12) +#define FIFO_CLEAR BIT(0) +#define TDC_FIFO_STS (0xB38) +#define FIFO_FULL BIT(1) +#define FIFO_EMPTY BIT(0) +#define TDC_FIFO_EVENT (0xB39) +#define FIFO_OVERRUN BIT(1) + +/* DPLL */ +#define MAX_REFERENCE_INDEX (3) +#define MAX_NUM_REF_PRIORITY (4) + +#define MAX_DPLL_INDEX (2) + +#define DPLL_STS (0x580) +#define DPLL_STS_VFC3A (0x571) +#define DPLL_STATE_STS_MASK (0x70) +#define DPLL_STATE_STS_SHIFT (4) +#define DPLL_REF_SEL_STS_MASK (0x6) +#define DPLL_REF_SEL_STS_SHIFT (1) + +#define DPLL_REF_PRIORITY_CNFG (0x502) +#define DPLL_REFX_PRIORITY_DISABLE_MASK (0xf) +#define DPLL_REF0_PRIORITY_ENABLE_AND_SET_MASK (0x31) +#define DPLL_REF1_PRIORITY_ENABLE_AND_SET_MASK (0xc2) +#define DPLL_REF2_PRIORITY_ENABLE_AND_SET_MASK (0x304) +#define DPLL_REF3_PRIORITY_ENABLE_AND_SET_MASK (0xc08) +#define DPLL_REF0_PRIORITY_SHIFT (4) +#define DPLL_REF1_PRIORITY_SHIFT (6) +#define DPLL_REF2_PRIORITY_SHIFT (8) +#define DPLL_REF3_PRIORITY_SHIFT (10) + +enum dpll_state { + DPLL_STATE_MIN = 0, + DPLL_STATE_FREERUN = DPLL_STATE_MIN, + DPLL_STATE_LOCKED = 1, + DPLL_STATE_HOLDOVER = 2, + DPLL_STATE_WRITE_FREQUENCY = 3, + DPLL_STATE_ACQUIRE = 4, + DPLL_STATE_HITLESS_SWITCH = 5, + DPLL_STATE_MAX = DPLL_STATE_HITLESS_SWITCH +}; + +/* REFMON */ +#define LOSMON_STS_0 (0x81e) +#define LOSMON_STS_0_VFC3A (0x18e) +#define LOSMON_STS_1 (0x82e) +#define LOSMON_STS_1_VFC3A (0x19e) +#define LOSMON_STS_2 (0x83e) +#define LOSMON_STS_2_VFC3A (0x1ae) +#define LOSMON_STS_3 (0x84e) +#define LOSMON_STS_3_VFC3A (0x1be) +#define LOS_STS_MASK (0x1) + +#define FREQMON_STS_0 (0x874) +#define FREQMON_STS_0_VFC3A (0x1d4) +#define FREQMON_STS_1 (0x894) +#define FREQMON_STS_1_VFC3A (0x1f4) +#define FREQMON_STS_2 (0x8b4) +#define FREQMON_STS_2_VFC3A (0x214) +#define FREQMON_STS_3 (0x8d4) +#define FREQMON_STS_3_VFC3A (0x234) +#define FREQ_FAIL_STS_SHIFT (31) + +/* Firmware interface */ +#define TIME_CLK_FREQ_ADDR (0xffa0) +#define XTAL_FREQ_ADDR (0xffa1) + +/* + * Return register address and field mask based on passed in firmware version + */ +#define IDTFC3_FW_REG(FW, VER, REG) (((FW) < (VER)) ? (REG) : (REG##_##VER)) +#define IDTFC3_FW_FIELD(FW, VER, FIELD) (((FW) < (VER)) ? (FIELD) : (FIELD##_##VER)) +enum fw_version { + V_DEFAULT = 0, + VFC3W = 1, + VFC3A = 2 +}; + +/* XTAL_FREQ_ADDR/TIME_CLK_FREQ_ADDR */ +enum { + FREQ_MIN = 0, + FREQ_25M = 1, + FREQ_49_152M = 2, + FREQ_50M = 3, + FREQ_100M = 4, + FREQ_125M = 5, + FREQ_250M = 6, + FREQ_MAX +}; + +struct idtfc3_hw_param { + u32 xtal_freq; + u32 time_clk_freq; +}; + +struct idtfc3_fwrc { + u8 hiaddr; + u8 loaddr; + u8 value; + u8 reserved; +} __packed; + +static inline void idtfc3_default_hw_param(struct idtfc3_hw_param *hw_param) +{ + hw_param->xtal_freq = 49152000; + hw_param->time_clk_freq = 25000000; +} + +static inline int idtfc3_set_hw_param(struct idtfc3_hw_param *hw_param, + u16 addr, u8 val) +{ + if (addr == XTAL_FREQ_ADDR) + switch (val) { + case FREQ_49_152M: + hw_param->xtal_freq = 49152000; + break; + case FREQ_50M: + hw_param->xtal_freq = 50000000; + break; + default: + return -EINVAL; + } + else if (addr == TIME_CLK_FREQ_ADDR) + switch (val) { + case FREQ_25M: + hw_param->time_clk_freq = 25000000; + break; + case FREQ_50M: + hw_param->time_clk_freq = 50000000; + break; + case FREQ_100M: + hw_param->time_clk_freq = 100000000; + break; + case FREQ_125M: + hw_param->time_clk_freq = 125000000; + break; + case FREQ_250M: + hw_param->time_clk_freq = 250000000; + break; + default: + return -EINVAL; + } + else + return -EFAULT; + + return 0; +} + +#endif diff --git a/include/linux/mlx5/driver.h b/include/linux/mlx5/driver.h index 8c55ff351e5f..bf9324a31ae9 100644 --- a/include/linux/mlx5/driver.h +++ b/include/linux/mlx5/driver.h @@ -681,6 +681,7 @@ struct mlx5e_resources { struct mlx5_sq_bfreg bfreg; #define MLX5_MAX_NUM_TC 8 u32 tisn[MLX5_MAX_PORTS][MLX5_MAX_NUM_TC]; + bool tisn_valid; } hw_objs; struct net_device *uplink_netdev; struct mutex uplink_netdev_lock; @@ -822,6 +823,7 @@ struct mlx5_core_dev { struct blocking_notifier_head macsec_nh; #endif u64 num_ipsec_offloads; + struct mlx5_sd *sd; }; struct mlx5_db { diff --git a/include/linux/mlx5/fs.h b/include/linux/mlx5/fs.h index 6f7725238abc..3fb428ce7d1c 100644 --- a/include/linux/mlx5/fs.h +++ b/include/linux/mlx5/fs.h @@ -132,6 +132,7 @@ struct mlx5_flow_handle; enum { FLOW_CONTEXT_HAS_TAG = BIT(0), + FLOW_CONTEXT_UPLINK_HAIRPIN_EN = BIT(1), }; struct mlx5_flow_context { diff --git a/include/linux/mlx5/mlx5_ifc.h b/include/linux/mlx5/mlx5_ifc.h index bf5320b28b8b..49f660563e49 100644 --- a/include/linux/mlx5/mlx5_ifc.h +++ b/include/linux/mlx5/mlx5_ifc.h @@ -1103,7 +1103,7 @@ struct mlx5_ifc_roce_cap_bits { u8 sw_r_roce_src_udp_port[0x1]; u8 fl_rc_qp_when_roce_disabled[0x1]; u8 fl_rc_qp_when_roce_enabled[0x1]; - u8 reserved_at_7[0x1]; + u8 roce_cc_general[0x1]; u8 qp_ooo_transmit_default[0x1]; u8 reserved_at_9[0x15]; u8 qp_ts_format[0x2]; @@ -3576,7 +3576,7 @@ struct mlx5_ifc_flow_context_bits { u8 action[0x10]; u8 extended_destination[0x1]; - u8 reserved_at_81[0x1]; + u8 uplink_hairpin_en[0x1]; u8 flow_source[0x2]; u8 encrypt_decrypt_type[0x4]; u8 destination_list_size[0x18]; @@ -4036,8 +4036,13 @@ struct mlx5_ifc_nic_vport_context_bits { u8 affiliation_criteria[0x4]; u8 affiliated_vhca_id[0x10]; - u8 reserved_at_60[0xd0]; + u8 reserved_at_60[0xa0]; + + u8 reserved_at_100[0x1]; + u8 sd_group[0x3]; + u8 reserved_at_104[0x1c]; + u8 reserved_at_120[0x10]; u8 mtu[0x10]; u8 system_image_guid[0x40]; @@ -10122,8 +10127,7 @@ struct mlx5_ifc_mpir_reg_bits { u8 reserved_at_20[0x20]; u8 local_port[0x8]; - u8 reserved_at_28[0x15]; - u8 sd_group[0x3]; + u8 reserved_at_28[0x18]; u8 reserved_at_60[0x20]; }; @@ -10249,7 +10253,9 @@ struct mlx5_ifc_mcam_access_reg_bits { u8 mcqi[0x1]; u8 mcqs[0x1]; - u8 regs_95_to_87[0x9]; + u8 regs_95_to_90[0x6]; + u8 mpir[0x1]; + u8 regs_88_to_87[0x2]; u8 mpegc[0x1]; u8 mtutc[0x1]; u8 regs_84_to_68[0x11]; @@ -10257,7 +10263,9 @@ struct mlx5_ifc_mcam_access_reg_bits { u8 regs_63_to_46[0x12]; u8 mrtc[0x1]; - u8 regs_44_to_32[0xd]; + u8 regs_44_to_41[0x4]; + u8 mfrl[0x1]; + u8 regs_39_to_32[0x8]; u8 regs_31_to_10[0x16]; u8 mtmp[0x1]; @@ -10657,6 +10665,7 @@ enum { MLX5_INITIAL_SEG_NIC_INTERFACE_FULL_DRIVER = 0x0, MLX5_INITIAL_SEG_NIC_INTERFACE_DISABLED = 0x1, MLX5_INITIAL_SEG_NIC_INTERFACE_NO_DRAM_NIC = 0x2, + MLX5_INITIAL_SEG_NIC_INTERFACE_SW_RESET = 0x7, }; enum { @@ -12701,6 +12710,14 @@ enum mlx5_msees_oper_status { MLX5_MSEES_OPER_STATUS_FAIL_FREE_RUNNING = 0x5, }; +enum mlx5_msees_failure_reason { + MLX5_MSEES_FAILURE_REASON_UNDEFINED_ERROR = 0x0, + MLX5_MSEES_FAILURE_REASON_PORT_DOWN = 0x1, + MLX5_MSEES_FAILURE_REASON_TOO_HIGH_FREQUENCY_DIFF = 0x2, + MLX5_MSEES_FAILURE_REASON_NET_SYNCHRONIZER_DEVICE_ERROR = 0x3, + MLX5_MSEES_FAILURE_REASON_LACK_OF_RESOURCES = 0x4, +}; + struct mlx5_ifc_msees_reg_bits { u8 reserved_at_0[0x8]; u8 local_port[0x8]; diff --git a/include/linux/mlx5/qp.h b/include/linux/mlx5/qp.h index bd53cf4be7bd..f0e55bf3ec8b 100644 --- a/include/linux/mlx5/qp.h +++ b/include/linux/mlx5/qp.h @@ -269,7 +269,10 @@ struct mlx5_wqe_eth_seg { union { struct { __be16 sz; - u8 start[2]; + union { + u8 start[2]; + DECLARE_FLEX_ARRAY(u8, data); + }; } inline_hdr; struct { __be16 type; diff --git a/include/linux/mlx5/vport.h b/include/linux/mlx5/vport.h index fbb9bf447889..c36cc6d82926 100644 --- a/include/linux/mlx5/vport.h +++ b/include/linux/mlx5/vport.h @@ -72,6 +72,7 @@ int mlx5_query_nic_vport_mtu(struct mlx5_core_dev *mdev, u16 *mtu); int mlx5_modify_nic_vport_mtu(struct mlx5_core_dev *mdev, u16 mtu); int mlx5_query_nic_vport_system_image_guid(struct mlx5_core_dev *mdev, u64 *system_image_guid); +int mlx5_query_nic_vport_sd_group(struct mlx5_core_dev *mdev, u8 *sd_group); int mlx5_query_nic_vport_node_guid(struct mlx5_core_dev *mdev, u64 *node_guid); int mlx5_modify_nic_vport_node_guid(struct mlx5_core_dev *mdev, u16 vport, u64 node_guid); diff --git a/include/linux/mman.h b/include/linux/mman.h index 40d94411d492..dc7048824be8 100644 --- a/include/linux/mman.h +++ b/include/linux/mman.h @@ -156,6 +156,7 @@ calc_vm_flag_bits(unsigned long flags) return _calc_vm_trans(flags, MAP_GROWSDOWN, VM_GROWSDOWN ) | _calc_vm_trans(flags, MAP_LOCKED, VM_LOCKED ) | _calc_vm_trans(flags, MAP_SYNC, VM_SYNC ) | + _calc_vm_trans(flags, MAP_STACK, VM_NOHUGEPAGE) | arch_calc_vm_flag_bits(flags); } diff --git a/include/linux/mmzone.h b/include/linux/mmzone.h index 4ed33b127821..a497f189d988 100644 --- a/include/linux/mmzone.h +++ b/include/linux/mmzone.h @@ -2013,9 +2013,9 @@ static inline int pfn_valid(unsigned long pfn) if (pfn_to_section_nr(pfn) >= NR_MEM_SECTIONS) return 0; ms = __pfn_to_section(pfn); - rcu_read_lock(); + rcu_read_lock_sched(); if (!valid_section(ms)) { - rcu_read_unlock(); + rcu_read_unlock_sched(); return 0; } /* @@ -2023,7 +2023,7 @@ static inline int pfn_valid(unsigned long pfn) * the entire section-sized span. */ ret = early_section(ms) || pfn_section_valid(ms, pfn); - rcu_read_unlock(); + rcu_read_unlock_sched(); return ret; } diff --git a/include/linux/module.h b/include/linux/module.h index 96bc462872c0..1153b0d99a80 100644 --- a/include/linux/module.h +++ b/include/linux/module.h @@ -885,7 +885,7 @@ static inline void module_bug_finalize(const Elf_Ehdr *hdr, static inline void module_bug_cleanup(struct module *mod) {} #endif /* CONFIG_GENERIC_BUG */ -#ifdef CONFIG_RETPOLINE +#ifdef CONFIG_MITIGATION_RETPOLINE extern bool retpoline_module_ok(bool has_retpoline); #else static inline bool retpoline_module_ok(bool has_retpoline) diff --git a/include/linux/msi.h b/include/linux/msi.h index ddace8c34dcf..26d07e23052e 100644 --- a/include/linux/msi.h +++ b/include/linux/msi.h @@ -412,6 +412,7 @@ bool arch_restore_msi_irqs(struct pci_dev *dev); struct irq_domain; struct irq_domain_ops; struct irq_chip; +struct irq_fwspec; struct device_node; struct fwnode_handle; struct msi_domain_info; @@ -431,6 +432,8 @@ struct msi_domain_info; * function. * @msi_post_free: Optional function which is invoked after freeing * all interrupts. + * @msi_translate: Optional translate callback to support the odd wire to + * MSI bridges, e.g. MBIGEN * * @get_hwirq, @msi_init and @msi_free are callbacks used by the underlying * irqdomain. @@ -468,6 +471,8 @@ struct msi_domain_ops { struct device *dev); void (*msi_post_free)(struct irq_domain *domain, struct device *dev); + int (*msi_translate)(struct irq_domain *domain, struct irq_fwspec *fwspec, + irq_hw_number_t *hwirq, unsigned int *type); }; /** @@ -547,6 +552,10 @@ enum { MSI_FLAG_ALLOC_SIMPLE_MSI_DESCS = (1 << 5), /* Free MSI descriptors */ MSI_FLAG_FREE_MSI_DESCS = (1 << 6), + /* Use dev->fwnode for MSI device domain creation */ + MSI_FLAG_USE_DEV_FWNODE = (1 << 7), + /* Set parent->dev into domain->pm_dev on device domain creation */ + MSI_FLAG_PARENT_PM_DEV = (1 << 8), /* Mask for the generic functionality */ MSI_GENERIC_FLAGS_MASK = GENMASK(15, 0), @@ -572,6 +581,11 @@ enum { * struct msi_parent_ops - MSI parent domain callbacks and configuration info * * @supported_flags: Required: The supported MSI flags of the parent domain + * @required_flags: Optional: The required MSI flags of the parent MSI domain + * @bus_select_token: Optional: The bus token of the real parent domain for + * irq_domain::select() + * @bus_select_mask: Optional: A mask of supported BUS_DOMAINs for + * irq_domain::select() * @prefix: Optional: Prefix for the domain and chip name * @init_dev_msi_info: Required: Callback for MSI parent domains to setup parent * domain specific domain flags, domain ops and interrupt chip @@ -579,6 +593,9 @@ enum { */ struct msi_parent_ops { u32 supported_flags; + u32 required_flags; + u32 bus_select_token; + u32 bus_select_mask; const char *prefix; bool (*init_dev_msi_info)(struct device *dev, struct irq_domain *domain, struct irq_domain *msi_parent_domain, @@ -627,9 +644,6 @@ struct msi_domain_info *msi_get_domain_info(struct irq_domain *domain); struct irq_domain *platform_msi_create_irq_domain(struct fwnode_handle *fwnode, struct msi_domain_info *info, struct irq_domain *parent); -int platform_msi_domain_alloc_irqs(struct device *dev, unsigned int nvec, - irq_write_msi_msg_t write_msi_msg); -void platform_msi_domain_free_irqs(struct device *dev); /* When an MSI domain is used as an intermediate domain */ int msi_domain_prepare_irqs(struct irq_domain *domain, struct device *dev, @@ -656,6 +670,10 @@ int platform_msi_device_domain_alloc(struct irq_domain *domain, unsigned int vir void platform_msi_device_domain_free(struct irq_domain *domain, unsigned int virq, unsigned int nvec); void *platform_msi_get_host_data(struct irq_domain *domain); +/* Per device platform MSI */ +int platform_device_msi_init_and_alloc_irqs(struct device *dev, unsigned int nvec, + irq_write_msi_msg_t write_msi_msg); +void platform_device_msi_free_irqs_all(struct device *dev); bool msi_device_has_isolated_msi(struct device *dev); #else /* CONFIG_GENERIC_MSI_IRQ */ diff --git a/include/linux/mutex.h b/include/linux/mutex.h index 7e208d46ba5b..67edc4ca2bee 100644 --- a/include/linux/mutex.h +++ b/include/linux/mutex.h @@ -32,11 +32,9 @@ # define __DEP_MAP_MUTEX_INITIALIZER(lockname) #endif -#ifndef CONFIG_PREEMPT_RT - #ifdef CONFIG_DEBUG_MUTEXES -#define __DEBUG_MUTEX_INITIALIZER(lockname) \ +# define __DEBUG_MUTEX_INITIALIZER(lockname) \ , .magic = &lockname extern void mutex_destroy(struct mutex *lock); @@ -49,6 +47,7 @@ static inline void mutex_destroy(struct mutex *lock) {} #endif +#ifndef CONFIG_PREEMPT_RT /** * mutex_init - initialize the mutex * @mutex: the mutex to be initialized @@ -101,9 +100,6 @@ extern bool mutex_is_locked(struct mutex *lock); extern void __mutex_rt_init(struct mutex *lock, const char *name, struct lock_class_key *key); -extern int mutex_trylock(struct mutex *lock); - -static inline void mutex_destroy(struct mutex *lock) { } #define mutex_is_locked(l) rt_mutex_base_is_locked(&(l)->rtmutex) diff --git a/include/linux/net.h b/include/linux/net.h index c9b4a63791a4..15df6d5f27a7 100644 --- a/include/linux/net.h +++ b/include/linux/net.h @@ -299,10 +299,7 @@ do { \ net_ratelimited_function(pr_debug, fmt, ##__VA_ARGS__) #else #define net_dbg_ratelimited(fmt, ...) \ - do { \ - if (0) \ - no_printk(KERN_DEBUG pr_fmt(fmt), ##__VA_ARGS__); \ - } while (0) + no_printk(KERN_DEBUG pr_fmt(fmt), ##__VA_ARGS__) #endif #define net_get_random_once(buf, nbytes) \ diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index 118c40258d07..c6f6ac779b34 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -79,8 +79,6 @@ struct xdp_buff; struct xdp_frame; struct xdp_metadata_ops; struct xdp_md; -/* DPLL specific */ -struct dpll_pin; typedef u32 xdp_features_t; @@ -227,12 +225,6 @@ struct net_device_core_stats { #include <linux/cache.h> #include <linux/skbuff.h> -#ifdef CONFIG_RPS -#include <linux/static_key.h> -extern struct static_key_false rps_needed; -extern struct static_key_false rfs_needed; -#endif - struct neighbour; struct neigh_parms; struct sk_buff; @@ -732,86 +724,10 @@ static inline void netdev_queue_numa_node_write(struct netdev_queue *q, int node #endif } -#ifdef CONFIG_RPS -/* - * This structure holds an RPS map which can be of variable length. The - * map is an array of CPUs. - */ -struct rps_map { - unsigned int len; - struct rcu_head rcu; - u16 cpus[]; -}; -#define RPS_MAP_SIZE(_num) (sizeof(struct rps_map) + ((_num) * sizeof(u16))) - -/* - * The rps_dev_flow structure contains the mapping of a flow to a CPU, the - * tail pointer for that CPU's input queue at the time of last enqueue, and - * a hardware filter index. - */ -struct rps_dev_flow { - u16 cpu; - u16 filter; - unsigned int last_qtail; -}; -#define RPS_NO_FILTER 0xffff - -/* - * The rps_dev_flow_table structure contains a table of flow mappings. - */ -struct rps_dev_flow_table { - unsigned int mask; - struct rcu_head rcu; - struct rps_dev_flow flows[]; -}; -#define RPS_DEV_FLOW_TABLE_SIZE(_num) (sizeof(struct rps_dev_flow_table) + \ - ((_num) * sizeof(struct rps_dev_flow))) - -/* - * The rps_sock_flow_table contains mappings of flows to the last CPU - * on which they were processed by the application (set in recvmsg). - * Each entry is a 32bit value. Upper part is the high-order bits - * of flow hash, lower part is CPU number. - * rps_cpu_mask is used to partition the space, depending on number of - * possible CPUs : rps_cpu_mask = roundup_pow_of_two(nr_cpu_ids) - 1 - * For example, if 64 CPUs are possible, rps_cpu_mask = 0x3f, - * meaning we use 32-6=26 bits for the hash. - */ -struct rps_sock_flow_table { - u32 mask; - - u32 ents[] ____cacheline_aligned_in_smp; -}; -#define RPS_SOCK_FLOW_TABLE_SIZE(_num) (offsetof(struct rps_sock_flow_table, ents[_num])) - -#define RPS_NO_CPU 0xffff - -extern u32 rps_cpu_mask; -extern struct rps_sock_flow_table __rcu *rps_sock_flow_table; - -static inline void rps_record_sock_flow(struct rps_sock_flow_table *table, - u32 hash) -{ - if (table && hash) { - unsigned int index = hash & table->mask; - u32 val = hash & ~rps_cpu_mask; - - /* We only give a hint, preemption can change CPU under us */ - val |= raw_smp_processor_id(); - - /* The following WRITE_ONCE() is paired with the READ_ONCE() - * here, and another one in get_rps_cpu(). - */ - if (READ_ONCE(table->ents[index]) != val) - WRITE_ONCE(table->ents[index], val); - } -} - #ifdef CONFIG_RFS_ACCEL bool rps_may_expire_flow(struct net_device *dev, u16 rxq_index, u32 flow_id, u16 filter_id); #endif -#endif /* CONFIG_RPS */ /* XPS map type and offset of the xps map within net_device->xps_maps[]. */ enum xps_map_type { @@ -1062,7 +978,7 @@ struct xfrmdev_ops { bool (*xdo_dev_offload_ok) (struct sk_buff *skb, struct xfrm_state *x); void (*xdo_dev_state_advance_esn) (struct xfrm_state *x); - void (*xdo_dev_state_update_curlft) (struct xfrm_state *x); + void (*xdo_dev_state_update_stats) (struct xfrm_state *x); int (*xdo_dev_policy_add) (struct xfrm_policy *x, struct netlink_ext_ack *extack); void (*xdo_dev_policy_delete) (struct xfrm_policy *x); void (*xdo_dev_policy_free) (struct xfrm_policy *x); @@ -1815,6 +1731,15 @@ enum netdev_stat_type { NETDEV_PCPU_STAT_DSTATS, /* struct pcpu_dstats */ }; +enum netdev_reg_state { + NETREG_UNINITIALIZED = 0, + NETREG_REGISTERED, /* completed register_netdevice */ + NETREG_UNREGISTERING, /* called unregister_netdevice */ + NETREG_UNREGISTERED, /* completed unregister todo */ + NETREG_RELEASED, /* called free_netdev */ + NETREG_DUMMY, /* dummy device for NAPI poll */ +}; + /** * struct net_device - The DEVICE structure. * @@ -2030,6 +1955,7 @@ enum netdev_stat_type { * * @sysfs_rx_queue_group: Space for optional per-rx queue attributes * @rtnl_link_ops: Rtnl_link_ops + * @stat_ops: Optional ops for queue-aware statistics * * @gso_max_size: Maximum size of generic segmentation offload * @tso_max_size: Device (as in HW) limit on the max TSO request size @@ -2141,6 +2067,11 @@ struct net_device { /* TXRX read-mostly hotpath */ __cacheline_group_begin(net_device_read_txrx); + union { + struct pcpu_lstats __percpu *lstats; + struct pcpu_sw_netstats __percpu *tstats; + struct pcpu_dstats __percpu *dstats; + }; unsigned int flags; unsigned short hard_header_len; netdev_features_t features; @@ -2249,7 +2180,7 @@ struct net_device { const struct tlsdev_ops *tlsdev_ops; #endif - unsigned char operstate; + unsigned int operstate; unsigned char link_mode; unsigned char if_port; @@ -2372,13 +2303,7 @@ struct net_device { struct list_head link_watch_list; - enum { NETREG_UNINITIALIZED=0, - NETREG_REGISTERED, /* completed register_netdevice */ - NETREG_UNREGISTERING, /* called unregister_netdevice */ - NETREG_UNREGISTERED, /* completed unregister todo */ - NETREG_RELEASED, /* called free_netdev */ - NETREG_DUMMY, /* dummy device for NAPI poll */ - } reg_state:8; + u8 reg_state; bool dismantle; @@ -2395,11 +2320,6 @@ struct net_device { enum netdev_ml_priv_type ml_priv_type; enum netdev_stat_type pcpu_stat_type:8; - union { - struct pcpu_lstats __percpu *lstats; - struct pcpu_sw_netstats __percpu *tstats; - struct pcpu_dstats __percpu *dstats; - }; #if IS_ENABLED(CONFIG_GARP) struct garp_port __rcu *garp_port; @@ -2416,6 +2336,8 @@ struct net_device { const struct rtnl_link_ops *rtnl_link_ops; + const struct netdev_stat_ops *stat_ops; + /* for setting kernel sock attribute on TCP connection setup */ #define GSO_MAX_SEGS 65535u #define GSO_LEGACY_MAX_SIZE 65536u @@ -2469,7 +2391,7 @@ struct net_device { struct devlink_port *devlink_port; #if IS_ENABLED(CONFIG_DPLL) - struct dpll_pin *dpll_pin; + struct dpll_pin __rcu *dpll_pin; #endif #if IS_ENABLED(CONFIG_PAGE_POOL) /** @page_pools: page pools created for this netdevice */ @@ -3074,8 +2996,6 @@ int call_netdevice_notifiers(unsigned long val, struct net_device *dev); int call_netdevice_notifiers_info(unsigned long val, struct netdev_notifier_info *info); -extern rwlock_t dev_base_lock; /* Device list lock */ - #define for_each_netdev(net, d) \ list_for_each_entry(d, &(net)->dev_base_head, dev_list) #define for_each_netdev_reverse(net, d) \ @@ -3198,7 +3118,7 @@ static inline void unregister_netdevice(struct net_device *dev) int netdev_refcnt_read(const struct net_device *dev); void free_netdev(struct net_device *dev); void netdev_freemem(struct net_device *dev); -int init_dummy_netdev(struct net_device *dev); +void init_dummy_netdev(struct net_device *dev); struct net_device *netdev_get_xmit_slave(struct net_device *dev, struct sk_buff *skb, @@ -3499,6 +3419,16 @@ static inline void netdev_queue_set_dql_min_limit(struct netdev_queue *dev_queue #endif } +static inline int netdev_queue_dql_avail(const struct netdev_queue *txq) +{ +#ifdef CONFIG_BQL + /* Non-BQL migrated drivers will return 0, too. */ + return dql_avail(&txq->dql); +#else + return 0; +#endif +} + /** * netdev_txq_bql_enqueue_prefetchw - prefetch bql data for write * @dev_queue: pointer to transmit queue @@ -3958,7 +3888,7 @@ static inline void dev_consume_skb_any(struct sk_buff *skb) u32 bpf_prog_run_generic_xdp(struct sk_buff *skb, struct xdp_buff *xdp, struct bpf_prog *xdp_prog); void generic_xdp_tx(struct sk_buff *skb, struct bpf_prog *xdp_prog); -int do_xdp_generic(struct bpf_prog *xdp_prog, struct sk_buff *skb); +int do_xdp_generic(struct bpf_prog *xdp_prog, struct sk_buff **pskb); int netif_rx(struct sk_buff *skb); int __netif_rx(struct sk_buff *skb); @@ -3971,8 +3901,6 @@ void napi_gro_flush(struct napi_struct *napi, bool flush_old); struct sk_buff *napi_get_frags(struct napi_struct *napi); void napi_get_frags_check(struct napi_struct *napi); gro_result_t napi_gro_frags(struct napi_struct *napi); -struct packet_offload *gro_find_receive_by_type(__be16 type); -struct packet_offload *gro_find_complete_by_type(__be16 type); static inline void napi_free_frags(struct napi_struct *napi) { @@ -4032,17 +3960,6 @@ int dev_get_mac_address(struct sockaddr *sa, struct net *net, char *dev_name); int dev_get_port_parent_id(struct net_device *dev, struct netdev_phys_item_id *ppid, bool recurse); bool netdev_port_same_parent_id(struct net_device *a, struct net_device *b); -void netdev_dpll_pin_set(struct net_device *dev, struct dpll_pin *dpll_pin); -void netdev_dpll_pin_clear(struct net_device *dev); - -static inline struct dpll_pin *netdev_dpll_pin(const struct net_device *dev) -{ -#if IS_ENABLED(CONFIG_DPLL) - return dev->dpll_pin; -#else - return NULL; -#endif -} struct sk_buff *validate_xmit_skb_list(struct sk_buff *skb, struct net_device *dev, bool *again); struct sk_buff *dev_hard_start_xmit(struct sk_buff *skb, struct net_device *dev, @@ -4353,8 +4270,10 @@ static inline bool netif_testing(const struct net_device *dev) */ static inline bool netif_oper_up(const struct net_device *dev) { - return (dev->operstate == IF_OPER_UP || - dev->operstate == IF_OPER_UNKNOWN /* backward compat */); + unsigned int operstate = READ_ONCE(dev->operstate); + + return operstate == IF_OPER_UP || + operstate == IF_OPER_UNKNOWN /* backward compat */; } /** @@ -4793,11 +4712,6 @@ void dev_fetch_sw_netstats(struct rtnl_link_stats64 *s, const struct pcpu_sw_netstats __percpu *netstats); void dev_get_tstats64(struct net_device *dev, struct rtnl_link_stats64 *s); -extern int netdev_max_backlog; -extern int dev_rx_weight; -extern int dev_tx_weight; -extern int gro_normal_batch; - enum { NESTED_SYNC_IMM_BIT, NESTED_SYNC_TODO_BIT, @@ -5254,7 +5168,9 @@ static inline const char *netdev_name(const struct net_device *dev) static inline const char *netdev_reg_state(const struct net_device *dev) { - switch (dev->reg_state) { + u8 reg_state = READ_ONCE(dev->reg_state); + + switch (reg_state) { case NETREG_UNINITIALIZED: return " (uninitialized)"; case NETREG_REGISTERED: return ""; case NETREG_UNREGISTERING: return " (unregistering)"; @@ -5263,7 +5179,7 @@ static inline const char *netdev_reg_state(const struct net_device *dev) case NETREG_DUMMY: return " (dummy)"; } - WARN_ONCE(1, "%s: unknown reg_state %d\n", dev->name, dev->reg_state); + WARN_ONCE(1, "%s: unknown reg_state %d\n", dev->name, reg_state); return " (unknown)"; } @@ -5305,7 +5221,6 @@ static inline const char *netdev_reg_state(const struct net_device *dev) #define PTYPE_HASH_SIZE (16) #define PTYPE_HASH_MASK (PTYPE_HASH_SIZE - 1) -extern struct list_head ptype_all __read_mostly; extern struct list_head ptype_base[PTYPE_HASH_SIZE] __read_mostly; extern struct net_device *blackhole_netdev; diff --git a/include/linux/netfilter.h b/include/linux/netfilter.h index 80900d910992..2683b2b77612 100644 --- a/include/linux/netfilter.h +++ b/include/linux/netfilter.h @@ -370,7 +370,6 @@ __sum16 nf_checksum_partial(struct sk_buff *skb, unsigned int hook, u_int8_t protocol, unsigned short family); int nf_route(struct net *net, struct dst_entry **dst, struct flowi *fl, bool strict, unsigned short family); -int nf_reroute(struct sk_buff *skb, struct nf_queue_entry *entry); #include <net/flow.h> @@ -474,6 +473,7 @@ struct nf_ct_hook { const struct sk_buff *); void (*attach)(struct sk_buff *nskb, const struct sk_buff *skb); void (*set_closing)(struct nf_conntrack *nfct); + int (*confirm)(struct sk_buff *skb); }; extern const struct nf_ct_hook __rcu *nf_ct_hook; diff --git a/include/linux/netfilter/ipset/ip_set.h b/include/linux/netfilter/ipset/ip_set.h index e8c350a3ade1..e9f4f845d760 100644 --- a/include/linux/netfilter/ipset/ip_set.h +++ b/include/linux/netfilter/ipset/ip_set.h @@ -186,6 +186,8 @@ struct ip_set_type_variant { /* Return true if "b" set is the same as "a" * according to the create set parameters */ bool (*same_set)(const struct ip_set *a, const struct ip_set *b); + /* Cancel ongoing garbage collectors before destroying the set*/ + void (*cancel_gc)(struct ip_set *set); /* Region-locking is used */ bool region_lock; }; @@ -242,6 +244,8 @@ extern void ip_set_type_unregister(struct ip_set_type *set_type); /* A generic IP set */ struct ip_set { + /* For call_cru in destroy */ + struct rcu_head rcu; /* The name of the set */ char name[IPSET_MAXNAMELEN]; /* Lock protecting the set data */ diff --git a/include/linux/netlink.h b/include/linux/netlink.h index 1a4445bf2ab9..5df7340d4dab 100644 --- a/include/linux/netlink.h +++ b/include/linux/netlink.h @@ -291,6 +291,7 @@ struct netlink_callback { u16 answer_flags; u32 min_dump_alloc; unsigned int prev_seq, seq; + int flags; bool strict_check; union { u8 ctx[48]; @@ -323,6 +324,7 @@ struct netlink_dump_control { void *data; struct module *module; u32 min_dump_alloc; + int flags; }; int __netlink_dump_start(struct sock *ssk, struct sk_buff *skb, diff --git a/include/linux/nfs_fs_sb.h b/include/linux/nfs_fs_sb.h index cd797e00fe35..92de074e63b9 100644 --- a/include/linux/nfs_fs_sb.h +++ b/include/linux/nfs_fs_sb.h @@ -124,6 +124,7 @@ struct nfs_client { char cl_ipaddr[48]; struct net *cl_net; struct list_head pending_cb_stateids; + struct rcu_head rcu; }; /* @@ -265,6 +266,7 @@ struct nfs_server { const struct cred *cred; bool has_sec_mnt_opts; struct kobject kobj; + struct rcu_head rcu; }; /* Server capabilities */ diff --git a/include/linux/ns_common.h b/include/linux/ns_common.h index 0f1d024bd958..7d22ea50b098 100644 --- a/include/linux/ns_common.h +++ b/include/linux/ns_common.h @@ -7,7 +7,7 @@ struct proc_ns_operations; struct ns_common { - atomic_long_t stashed; + struct dentry *stashed; const struct proc_ns_operations *ops; unsigned int inum; refcount_t count; diff --git a/include/linux/nvme-rdma.h b/include/linux/nvme-rdma.h index 4dd7e6fe92fb..eb2f04d636c8 100644 --- a/include/linux/nvme-rdma.h +++ b/include/linux/nvme-rdma.h @@ -6,7 +6,11 @@ #ifndef _LINUX_NVME_RDMA_H #define _LINUX_NVME_RDMA_H -#define NVME_RDMA_MAX_QUEUE_SIZE 128 +#define NVME_RDMA_IP_PORT 4420 + +#define NVME_RDMA_MAX_QUEUE_SIZE 256 +#define NVME_RDMA_MAX_METADATA_QUEUE_SIZE 128 +#define NVME_RDMA_DEFAULT_QUEUE_SIZE 128 enum nvme_rdma_cm_fmt { NVME_RDMA_CM_FMT_1_0 = 0x0, diff --git a/include/linux/nvme.h b/include/linux/nvme.h index 462c21e0e417..425573202295 100644 --- a/include/linux/nvme.h +++ b/include/linux/nvme.h @@ -23,8 +23,6 @@ #define NVME_DISC_SUBSYS_NAME "nqn.2014-08.org.nvmexpress.discovery" -#define NVME_RDMA_IP_PORT 4420 - #define NVME_NSID_ALL 0xffffffff enum nvme_subsys_type { @@ -646,6 +644,7 @@ enum { NVME_CMD_EFFECTS_NCC = 1 << 2, NVME_CMD_EFFECTS_NIC = 1 << 3, NVME_CMD_EFFECTS_CCC = 1 << 4, + NVME_CMD_EFFECTS_CSER_MASK = GENMASK(15, 14), NVME_CMD_EFFECTS_CSE_MASK = GENMASK(18, 16), NVME_CMD_EFFECTS_UUID_SEL = 1 << 19, NVME_CMD_EFFECTS_SCOPE_MASK = GENMASK(31, 20), @@ -816,12 +815,6 @@ struct nvme_reservation_status_ext { struct nvme_registered_ctrl_ext regctl_eds[]; }; -enum nvme_async_event_type { - NVME_AER_TYPE_ERROR = 0, - NVME_AER_TYPE_SMART = 1, - NVME_AER_TYPE_NOTICE = 2, -}; - /* I/O commands */ enum nvme_opcode { @@ -1818,7 +1811,7 @@ struct nvme_command { }; }; -static inline bool nvme_is_fabrics(struct nvme_command *cmd) +static inline bool nvme_is_fabrics(const struct nvme_command *cmd) { return cmd->common.opcode == nvme_fabrics_command; } @@ -1837,7 +1830,7 @@ struct nvme_error_slot { __u8 resv2[24]; }; -static inline bool nvme_is_write(struct nvme_command *cmd) +static inline bool nvme_is_write(const struct nvme_command *cmd) { /* * What a mess... diff --git a/include/linux/objtool.h b/include/linux/objtool.h index 33212e93f4a6..b3b8d3dab52d 100644 --- a/include/linux/objtool.h +++ b/include/linux/objtool.h @@ -131,7 +131,7 @@ */ .macro VALIDATE_UNRET_BEGIN #if defined(CONFIG_NOINSTR_VALIDATION) && \ - (defined(CONFIG_CPU_UNRET_ENTRY) || defined(CONFIG_CPU_SRSO)) + (defined(CONFIG_MITIGATION_UNRET_ENTRY) || defined(CONFIG_MITIGATION_SRSO)) .Lhere_\@: .pushsection .discard.validate_unret .long .Lhere_\@ - . diff --git a/include/linux/overflow.h b/include/linux/overflow.h index 7b5cf4a5cd19..aa691f2119b0 100644 --- a/include/linux/overflow.h +++ b/include/linux/overflow.h @@ -31,8 +31,10 @@ * credit to Christian Biere. */ #define __type_half_max(type) ((type)1 << (8*sizeof(type) - 1 - is_signed_type(type))) -#define type_max(T) ((T)((__type_half_max(T) - 1) + __type_half_max(T))) -#define type_min(T) ((T)((T)-type_max(T)-(T)1)) +#define __type_max(T) ((T)((__type_half_max(T) - 1) + __type_half_max(T))) +#define type_max(t) __type_max(typeof(t)) +#define __type_min(T) ((T)((T)-type_max(T)-(T)1)) +#define type_min(t) __type_min(typeof(t)) /* * Avoids triggering -Wtype-limits compilation warning, @@ -57,46 +59,123 @@ static inline bool __must_check __must_check_overflow(bool overflow) * @b: second addend * @d: pointer to store sum * - * Returns 0 on success. + * Returns true on wrap-around, false otherwise. * - * *@d holds the results of the attempted addition, but is not considered - * "safe for use" on a non-zero return value, which indicates that the - * sum has overflowed or been truncated. + * *@d holds the results of the attempted addition, regardless of whether + * wrap-around occurred. */ #define check_add_overflow(a, b, d) \ __must_check_overflow(__builtin_add_overflow(a, b, d)) /** + * wrapping_add() - Intentionally perform a wrapping addition + * @type: type for result of calculation + * @a: first addend + * @b: second addend + * + * Return the potentially wrapped-around addition without + * tripping any wrap-around sanitizers that may be enabled. + */ +#define wrapping_add(type, a, b) \ + ({ \ + type __val; \ + __builtin_add_overflow(a, b, &__val); \ + __val; \ + }) + +/** + * wrapping_assign_add() - Intentionally perform a wrapping increment assignment + * @var: variable to be incremented + * @offset: amount to add + * + * Increments @var by @offset with wrap-around. Returns the resulting + * value of @var. Will not trip any wrap-around sanitizers. + * + * Returns the new value of @var. + */ +#define wrapping_assign_add(var, offset) \ + ({ \ + typeof(var) *__ptr = &(var); \ + *__ptr = wrapping_add(typeof(var), *__ptr, offset); \ + }) + +/** * check_sub_overflow() - Calculate subtraction with overflow checking * @a: minuend; value to subtract from * @b: subtrahend; value to subtract from @a * @d: pointer to store difference * - * Returns 0 on success. + * Returns true on wrap-around, false otherwise. * - * *@d holds the results of the attempted subtraction, but is not considered - * "safe for use" on a non-zero return value, which indicates that the - * difference has underflowed or been truncated. + * *@d holds the results of the attempted subtraction, regardless of whether + * wrap-around occurred. */ #define check_sub_overflow(a, b, d) \ __must_check_overflow(__builtin_sub_overflow(a, b, d)) /** + * wrapping_sub() - Intentionally perform a wrapping subtraction + * @type: type for result of calculation + * @a: minuend; value to subtract from + * @b: subtrahend; value to subtract from @a + * + * Return the potentially wrapped-around subtraction without + * tripping any wrap-around sanitizers that may be enabled. + */ +#define wrapping_sub(type, a, b) \ + ({ \ + type __val; \ + __builtin_sub_overflow(a, b, &__val); \ + __val; \ + }) + +/** + * wrapping_assign_sub() - Intentionally perform a wrapping decrement assign + * @var: variable to be decremented + * @offset: amount to subtract + * + * Decrements @var by @offset with wrap-around. Returns the resulting + * value of @var. Will not trip any wrap-around sanitizers. + * + * Returns the new value of @var. + */ +#define wrapping_assign_sub(var, offset) \ + ({ \ + typeof(var) *__ptr = &(var); \ + *__ptr = wrapping_sub(typeof(var), *__ptr, offset); \ + }) + +/** * check_mul_overflow() - Calculate multiplication with overflow checking * @a: first factor * @b: second factor * @d: pointer to store product * - * Returns 0 on success. + * Returns true on wrap-around, false otherwise. * - * *@d holds the results of the attempted multiplication, but is not - * considered "safe for use" on a non-zero return value, which indicates - * that the product has overflowed or been truncated. + * *@d holds the results of the attempted multiplication, regardless of whether + * wrap-around occurred. */ #define check_mul_overflow(a, b, d) \ __must_check_overflow(__builtin_mul_overflow(a, b, d)) /** + * wrapping_mul() - Intentionally perform a wrapping multiplication + * @type: type for result of calculation + * @a: first factor + * @b: second factor + * + * Return the potentially wrapped-around multiplication without + * tripping any wrap-around sanitizers that may be enabled. + */ +#define wrapping_mul(type, a, b) \ + ({ \ + type __val; \ + __builtin_mul_overflow(a, b, &__val); \ + __val; \ + }) + +/** * check_shl_overflow() - Calculate a left-shifted value and check overflow * @a: Value to be shifted * @s: How many bits left to shift @@ -120,7 +199,7 @@ static inline bool __must_check __must_check_overflow(bool overflow) typeof(a) _a = a; \ typeof(s) _s = s; \ typeof(d) _d = d; \ - u64 _a_full = _a; \ + unsigned long long _a_full = _a; \ unsigned int _to_shift = \ is_non_negative(_s) && _s < 8 * sizeof(*d) ? _s : 0; \ *_d = (_a_full << _to_shift); \ @@ -130,10 +209,10 @@ static inline bool __must_check __must_check_overflow(bool overflow) #define __overflows_type_constexpr(x, T) ( \ is_unsigned_type(typeof(x)) ? \ - (x) > type_max(typeof(T)) : \ + (x) > type_max(T) : \ is_unsigned_type(typeof(T)) ? \ - (x) < 0 || (x) > type_max(typeof(T)) : \ - (x) < type_min(typeof(T)) || (x) > type_max(typeof(T))) + (x) < 0 || (x) > type_max(T) : \ + (x) < type_min(T) || (x) > type_max(T)) #define __overflows_type(x, T) ({ \ typeof(T) v = 0; \ diff --git a/include/linux/pci.h b/include/linux/pci.h index add9368e6314..213109d3c601 100644 --- a/include/linux/pci.h +++ b/include/linux/pci.h @@ -1422,6 +1422,7 @@ int pci_load_and_free_saved_state(struct pci_dev *dev, struct pci_saved_state **state); int pci_platform_power_transition(struct pci_dev *dev, pci_power_t state); int pci_set_power_state(struct pci_dev *dev, pci_power_t state); +int pci_set_power_state_locked(struct pci_dev *dev, pci_power_t state); pci_power_t pci_choose_state(struct pci_dev *dev, pm_message_t state); bool pci_pme_capable(struct pci_dev *dev, pci_power_t state); void pci_pme_active(struct pci_dev *dev, bool enable); @@ -1625,6 +1626,8 @@ int pci_scan_bridge(struct pci_bus *bus, struct pci_dev *dev, int max, void pci_walk_bus(struct pci_bus *top, int (*cb)(struct pci_dev *, void *), void *userdata); +void pci_walk_bus_locked(struct pci_bus *top, int (*cb)(struct pci_dev *, void *), + void *userdata); int pci_cfg_space_size(struct pci_dev *dev); unsigned char pci_bus_max_busnr(struct pci_bus *bus); void pci_setup_bridge(struct pci_bus *bus); @@ -2025,6 +2028,8 @@ static inline int pci_save_state(struct pci_dev *dev) { return 0; } static inline void pci_restore_state(struct pci_dev *dev) { } static inline int pci_set_power_state(struct pci_dev *dev, pci_power_t state) { return 0; } +static inline int pci_set_power_state_locked(struct pci_dev *dev, pci_power_t state) +{ return 0; } static inline int pci_wake_from_d3(struct pci_dev *dev, bool enable) { return 0; } static inline pci_power_t pci_choose_state(struct pci_dev *dev, @@ -2512,6 +2517,11 @@ static inline struct pci_dev *pcie_find_root_port(struct pci_dev *dev) return NULL; } +static inline bool pci_dev_is_disconnected(const struct pci_dev *dev) +{ + return dev->error_state == pci_channel_io_perm_failure; +} + void pci_request_acs(void); bool pci_acs_enabled(struct pci_dev *pdev, u16 acs_flags); bool pci_acs_path_enabled(struct pci_dev *start, diff --git a/include/linux/phy.h b/include/linux/phy.h index 684efaeca07c..3f68b8239bb1 100644 --- a/include/linux/phy.h +++ b/include/linux/phy.h @@ -30,6 +30,7 @@ #include <linux/refcount.h> #include <linux/atomic.h> +#include <net/eee.h> #define PHY_DEFAULT_FEATURES (SUPPORTED_Autoneg | \ SUPPORTED_TP | \ @@ -54,6 +55,7 @@ extern __ETHTOOL_DECLARE_LINK_MODE_MASK(phy_10gbit_features) __ro_after_init; extern __ETHTOOL_DECLARE_LINK_MODE_MASK(phy_10gbit_fec_features) __ro_after_init; extern __ETHTOOL_DECLARE_LINK_MODE_MASK(phy_10gbit_full_features) __ro_after_init; extern __ETHTOOL_DECLARE_LINK_MODE_MASK(phy_eee_cap1_features) __ro_after_init; +extern __ETHTOOL_DECLARE_LINK_MODE_MASK(phy_eee_cap2_features) __ro_after_init; #define PHY_BASIC_FEATURES ((unsigned long *)&phy_basic_features) #define PHY_BASIC_T1_FEATURES ((unsigned long *)&phy_basic_t1_features) @@ -65,6 +67,7 @@ extern __ETHTOOL_DECLARE_LINK_MODE_MASK(phy_eee_cap1_features) __ro_after_init; #define PHY_10GBIT_FEC_FEATURES ((unsigned long *)&phy_10gbit_fec_features) #define PHY_10GBIT_FULL_FEATURES ((unsigned long *)&phy_10gbit_full_features) #define PHY_EEE_CAP1_FEATURES ((unsigned long *)&phy_eee_cap1_features) +#define PHY_EEE_CAP2_FEATURES ((unsigned long *)&phy_eee_cap2_features) extern const int phy_basic_ports_array[3]; extern const int phy_fibre_port_array[1]; @@ -329,6 +332,7 @@ struct mdio_bus_stats { * struct phy_package_shared - Shared information in PHY packages * @base_addr: Base PHY address of PHY package used to combine PHYs * in one package and for offset calculation of phy_package_read/write + * @np: Pointer to the Device Node if PHY package defined in DT * @refcnt: Number of PHYs connected to this shared data * @flags: Initialization of PHY package * @priv_size: Size of the shared private data @priv @@ -340,6 +344,8 @@ struct mdio_bus_stats { */ struct phy_package_shared { u8 base_addr; + /* With PHY package defined in DT this points to the PHY package node */ + struct device_node *np; refcount_t refcnt; unsigned long flags; size_t priv_size; @@ -589,6 +595,8 @@ struct macsec_ops; * @supported_eee: supported PHY EEE linkmodes * @advertising_eee: Currently advertised EEE linkmodes * @eee_enabled: Flag indicating whether the EEE feature is enabled + * @enable_tx_lpi: When True, MAC should transmit LPI to PHY + * @eee_cfg: User configuration of EEE * @lp_advertising: Current link partner advertised linkmodes * @host_interfaces: PHY interface modes supported by host * @eee_broken_modes: Energy efficient ethernet modes which should be prohibited @@ -638,7 +646,7 @@ struct phy_device { /* Information about the PHY type */ /* And management functions */ - struct phy_driver *drv; + const struct phy_driver *drv; struct device_link *devlink; @@ -698,7 +706,7 @@ struct phy_device { __ETHTOOL_DECLARE_LINK_MODE_MASK(lp_advertising); /* used with phy_speed_down */ __ETHTOOL_DECLARE_LINK_MODE_MASK(adv_old); - /* used for eee validation */ + /* used for eee validation and configuration*/ __ETHTOOL_DECLARE_LINK_MODE_MASK(supported_eee); __ETHTOOL_DECLARE_LINK_MODE_MASK(advertising_eee); bool eee_enabled; @@ -708,6 +716,8 @@ struct phy_device { /* Energy efficient ethernet modes which should be prohibited */ u32 eee_broken_modes; + bool enable_tx_lpi; + struct eee_config eee_cfg; #ifdef CONFIG_LED_TRIGGER_PHY struct phy_led_trigger *phy_led_triggers; @@ -852,6 +862,15 @@ struct phy_plca_status { bool pst; }; +/* Modes for PHY LED configuration */ +enum phy_led_modes { + PHY_LED_ACTIVE_LOW = 0, + PHY_LED_INACTIVE_HIGH_IMPEDANCE = 1, + + /* keep it last */ + __PHY_LED_MODES_NUM, +}; + /** * struct phy_led: An LED driven by the PHY * @@ -1145,6 +1164,19 @@ struct phy_driver { int (*led_hw_control_get)(struct phy_device *dev, u8 index, unsigned long *rules); + /** + * @led_polarity_set: Set the LED polarity modes + * @dev: PHY device which has the LED + * @index: Which LED of the PHY device + * @modes: bitmap of LED polarity modes + * + * Configure LED with all the required polarity modes in @modes + * to make it correctly turn ON or OFF. + * + * Returns 0, or an error code. + */ + int (*led_polarity_set)(struct phy_device *dev, int index, + unsigned long modes); }; #define to_phy_driver(d) container_of(to_mdio_common_driver(d), \ struct phy_driver, mdiodrv) @@ -1851,7 +1883,7 @@ int genphy_write_mmd_unsupported(struct phy_device *phdev, int devnum, /* Clause 37 */ int genphy_c37_config_aneg(struct phy_device *phydev); -int genphy_c37_read_status(struct phy_device *phydev); +int genphy_c37_read_status(struct phy_device *phydev, bool *changed); /* Clause 45 PHY */ int genphy_c45_restart_aneg(struct phy_device *phydev); @@ -1886,9 +1918,9 @@ int genphy_c45_plca_get_status(struct phy_device *phydev, int genphy_c45_eee_is_active(struct phy_device *phydev, unsigned long *adv, unsigned long *lp, bool *is_enabled); int genphy_c45_ethtool_get_eee(struct phy_device *phydev, - struct ethtool_eee *data); + struct ethtool_keee *data); int genphy_c45_ethtool_set_eee(struct phy_device *phydev, - struct ethtool_eee *data); + struct ethtool_keee *data); int genphy_c45_write_eee_adv(struct phy_device *phydev, unsigned long *adv); int genphy_c45_an_config_eee_aneg(struct phy_device *phydev); int genphy_c45_read_eee_adv(struct phy_device *phydev, unsigned long *adv); @@ -1938,8 +1970,10 @@ int phy_get_rate_matching(struct phy_device *phydev, void phy_set_max_speed(struct phy_device *phydev, u32 max_speed); void phy_remove_link_mode(struct phy_device *phydev, u32 link_mode); void phy_advertise_supported(struct phy_device *phydev); +void phy_advertise_eee_all(struct phy_device *phydev); void phy_support_sym_pause(struct phy_device *phydev); void phy_support_asym_pause(struct phy_device *phydev); +void phy_support_eee(struct phy_device *phydev); void phy_set_sym_pause(struct phy_device *phydev, bool rx, bool tx, bool autoneg); void phy_set_asym_pause(struct phy_device *phydev, bool rx, bool tx); @@ -1966,8 +2000,8 @@ int phy_unregister_fixup_for_uid(u32 phy_uid, u32 phy_uid_mask); int phy_init_eee(struct phy_device *phydev, bool clk_stop_enable); int phy_get_eee_err(struct phy_device *phydev); -int phy_ethtool_set_eee(struct phy_device *phydev, struct ethtool_eee *data); -int phy_ethtool_get_eee(struct phy_device *phydev, struct ethtool_eee *data); +int phy_ethtool_set_eee(struct phy_device *phydev, struct ethtool_keee *data); +int phy_ethtool_get_eee(struct phy_device *phydev, struct ethtool_keee *data); int phy_ethtool_set_wol(struct phy_device *phydev, struct ethtool_wolinfo *wol); void phy_ethtool_get_wol(struct phy_device *phydev, struct ethtool_wolinfo *wol); @@ -1977,9 +2011,12 @@ int phy_ethtool_set_link_ksettings(struct net_device *ndev, const struct ethtool_link_ksettings *cmd); int phy_ethtool_nway_reset(struct net_device *ndev); int phy_package_join(struct phy_device *phydev, int base_addr, size_t priv_size); +int of_phy_package_join(struct phy_device *phydev, size_t priv_size); void phy_package_leave(struct phy_device *phydev); int devm_phy_package_join(struct device *dev, struct phy_device *phydev, int base_addr, size_t priv_size); +int devm_of_phy_package_join(struct device *dev, struct phy_device *phydev, + size_t priv_size); int __init mdio_bus_init(void); void mdio_bus_exit(void); @@ -2100,7 +2137,7 @@ static inline bool phy_package_probe_once(struct phy_device *phydev) return __phy_package_set_once(phydev, PHY_SHARED_F_PROBE_DONE); } -extern struct bus_type mdio_bus_type; +extern const struct bus_type mdio_bus_type; struct mdio_board_info { const char *bus_id; diff --git a/include/linux/phylink.h b/include/linux/phylink.h index d589f89c612c..9a57deefcb07 100644 --- a/include/linux/phylink.h +++ b/include/linux/phylink.h @@ -480,9 +480,6 @@ void pcs_disable(struct phylink_pcs *pcs); * negotiation completion state in @state->an_complete, and link up state * in @state->link. If possible, @state->lp_advertising should also be * populated. - * - * When present, this overrides pcs_get_state() in &struct - * phylink_pcs_ops. */ void pcs_get_state(struct phylink_pcs *pcs, struct phylink_link_state *state); @@ -584,8 +581,8 @@ int phylink_ethtool_set_pauseparam(struct phylink *, struct ethtool_pauseparam *); int phylink_get_eee_err(struct phylink *); int phylink_init_eee(struct phylink *, bool); -int phylink_ethtool_get_eee(struct phylink *, struct ethtool_eee *); -int phylink_ethtool_set_eee(struct phylink *, struct ethtool_eee *); +int phylink_ethtool_get_eee(struct phylink *link, struct ethtool_keee *eee); +int phylink_ethtool_set_eee(struct phylink *link, struct ethtool_keee *eee); int phylink_mii_ioctl(struct phylink *, struct ifreq *, int); int phylink_speed_down(struct phylink *pl, bool sync); int phylink_speed_up(struct phylink *pl); diff --git a/include/linux/pid.h b/include/linux/pid.h index 395cacce1179..c79a0efd0258 100644 --- a/include/linux/pid.h +++ b/include/linux/pid.h @@ -55,6 +55,10 @@ struct pid refcount_t count; unsigned int level; spinlock_t lock; +#ifdef CONFIG_FS_PID + struct dentry *stashed; + unsigned long ino; +#endif /* lists of tasks that use this pid */ struct hlist_head tasks[PIDTYPE_MAX]; struct hlist_head inodes; @@ -66,15 +70,13 @@ struct pid extern struct pid init_struct_pid; -extern const struct file_operations pidfd_fops; - struct file; -extern struct pid *pidfd_pid(const struct file *file); +struct pid *pidfd_pid(const struct file *file); struct pid *pidfd_get_pid(unsigned int fd, unsigned int *flags); struct task_struct *pidfd_get_task(int pidfd, unsigned int *flags); -int pidfd_create(struct pid *pid, unsigned int flags); int pidfd_prepare(struct pid *pid, unsigned int flags, struct file **ret); +void do_notify_pidfd(struct task_struct *task); static inline struct pid *get_pid(struct pid *pid) { diff --git a/include/linux/pidfs.h b/include/linux/pidfs.h new file mode 100644 index 000000000000..40dd325a32a6 --- /dev/null +++ b/include/linux/pidfs.h @@ -0,0 +1,9 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef _LINUX_PID_FS_H +#define _LINUX_PID_FS_H + +struct file *pidfs_alloc_file(struct pid *pid, unsigned int flags); +void __init pidfs_init(void); +bool is_pidfs_sb(const struct super_block *sb); + +#endif /* _LINUX_PID_FS_H */ diff --git a/include/linux/pktcdvd.h b/include/linux/pktcdvd.h index 79594aeb160d..2f1b952d596a 100644 --- a/include/linux/pktcdvd.h +++ b/include/linux/pktcdvd.h @@ -154,9 +154,9 @@ struct packet_stacked_data struct pktcdvd_device { - struct bdev_handle *bdev_handle; /* dev attached */ + struct file *bdev_file; /* dev attached */ /* handle acquired for bdev during pkt_open_dev() */ - struct bdev_handle *open_bdev_handle; + struct file *f_open_bdev; dev_t pkt_dev; /* our dev */ struct packet_settings settings; struct packet_stats stats; diff --git a/include/linux/platform_data/brcmfmac.h b/include/linux/platform_data/brcmfmac.h index f922a192fe58..ec99b7b73d1d 100644 --- a/include/linux/platform_data/brcmfmac.h +++ b/include/linux/platform_data/brcmfmac.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 201 Broadcom Corporation + * Copyright (c) 2016 Broadcom Corporation * * Permission to use, copy, modify, and/or distribute this software for any * purpose with or without fee is hereby granted, provided that the above diff --git a/include/linux/platform_data/mdio-bcm-unimac.h b/include/linux/platform_data/mdio-bcm-unimac.h index 8a5f9f0b2c52..724e1f57b81f 100644 --- a/include/linux/platform_data/mdio-bcm-unimac.h +++ b/include/linux/platform_data/mdio-bcm-unimac.h @@ -1,11 +1,14 @@ #ifndef __MDIO_BCM_UNIMAC_PDATA_H #define __MDIO_BCM_UNIMAC_PDATA_H +struct clk; + struct unimac_mdio_pdata { u32 phy_mask; int (*wait_func)(void *data); void *wait_func_data; const char *bus_name; + struct clk *clk; }; #define UNIMAC_MDIO_DRV_NAME "unimac-mdio" diff --git a/include/linux/platform_data/microchip-ksz.h b/include/linux/platform_data/microchip-ksz.h index f177416635a2..8c659db4da6b 100644 --- a/include/linux/platform_data/microchip-ksz.h +++ b/include/linux/platform_data/microchip-ksz.h @@ -33,6 +33,7 @@ enum ksz_chip_id { KSZ9897_CHIP_ID = 0x00989700, KSZ9893_CHIP_ID = 0x00989300, KSZ9563_CHIP_ID = 0x00956300, + KSZ8567_CHIP_ID = 0x00856700, KSZ9567_CHIP_ID = 0x00956700, LAN9370_CHIP_ID = 0x00937000, LAN9371_CHIP_ID = 0x00937100, diff --git a/include/linux/platform_data/net-cw1200.h b/include/linux/platform_data/net-cw1200.h index c510734405bb..89d0ec6f7d46 100644 --- a/include/linux/platform_data/net-cw1200.h +++ b/include/linux/platform_data/net-cw1200.h @@ -14,8 +14,6 @@ struct cw1200_platform_data_spi { /* All others are optional */ bool have_5ghz; - int reset; /* GPIO to RSTn signal (0 disables) */ - int powerup; /* GPIO to POWERUP signal (0 disables) */ int (*power_ctrl)(const struct cw1200_platform_data_spi *pdata, bool enable); /* Control 3v3 / 1v8 supply */ int (*clk_ctrl)(const struct cw1200_platform_data_spi *pdata, @@ -30,8 +28,6 @@ struct cw1200_platform_data_sdio { /* All others are optional */ bool have_5ghz; bool no_nptb; /* SDIO hardware does not support non-power-of-2-blocksizes */ - int reset; /* GPIO to RSTn signal (0 disables) */ - int powerup; /* GPIO to POWERUP signal (0 disables) */ int irq; /* IRQ line or 0 to use SDIO IRQ */ int (*power_ctrl)(const struct cw1200_platform_data_sdio *pdata, bool enable); /* Control 3v3 / 1v8 supply */ diff --git a/include/linux/poison.h b/include/linux/poison.h index 27a7dad17eef..1f0ee2459f2a 100644 --- a/include/linux/poison.h +++ b/include/linux/poison.h @@ -92,4 +92,7 @@ /********** VFS **********/ #define VFS_PTR_POISON ((void *)(0xF5 + POISON_POINTER_DELTA)) +/********** lib/stackdepot.c **********/ +#define STACK_DEPOT_POISON ((void *)(0xD390 + POISON_POINTER_DELTA)) + #endif diff --git a/include/linux/poll.h b/include/linux/poll.h index a9e0e1c2d1f2..d1ea4f3714a8 100644 --- a/include/linux/poll.h +++ b/include/linux/poll.h @@ -14,11 +14,7 @@ /* ~832 bytes of stack space used max in sys_select/sys_poll before allocating additional memory. */ -#ifdef __clang__ -#define MAX_STACK_ALLOC 768 -#else #define MAX_STACK_ALLOC 832 -#endif #define FRONTEND_STACK_ALLOC 256 #define SELECT_STACK_ALLOC FRONTEND_STACK_ALLOC #define POLL_STACK_ALLOC FRONTEND_STACK_ALLOC diff --git a/include/linux/printk.h b/include/linux/printk.h index 8ef499ab3c1e..955e31860095 100644 --- a/include/linux/printk.h +++ b/include/linux/printk.h @@ -273,6 +273,8 @@ static inline void printk_trigger_flush(void) } #endif +bool this_cpu_in_panic(void); + #ifdef CONFIG_SMP extern int __printk_cpu_sync_try_get(void); extern void __printk_cpu_sync_wait(void); diff --git a/include/linux/proc_fs.h b/include/linux/proc_fs.h index de407e7c3b55..0b2a89854440 100644 --- a/include/linux/proc_fs.h +++ b/include/linux/proc_fs.h @@ -65,6 +65,7 @@ struct proc_fs_info { kgid_t pid_gid; enum proc_hidepid hide_pid; enum proc_pidonly pidonly; + struct rcu_head rcu; }; static inline struct proc_fs_info *proc_sb_info(struct super_block *sb) diff --git a/include/linux/proc_ns.h b/include/linux/proc_ns.h index 49539bc416ce..5ea470eb4d76 100644 --- a/include/linux/proc_ns.h +++ b/include/linux/proc_ns.h @@ -66,7 +66,7 @@ static inline void proc_free_inum(unsigned int inum) {} static inline int ns_alloc_inum(struct ns_common *ns) { - atomic_long_set(&ns->stashed, 0); + WRITE_ONCE(ns->stashed, NULL); return proc_alloc_inum(&ns->inum); } diff --git a/include/linux/psp-sev.h b/include/linux/psp-sev.h index 7fd17e82bab4..3705c2044fc0 100644 --- a/include/linux/psp-sev.h +++ b/include/linux/psp-sev.h @@ -78,6 +78,36 @@ enum sev_cmd { SEV_CMD_DBG_DECRYPT = 0x060, SEV_CMD_DBG_ENCRYPT = 0x061, + /* SNP specific commands */ + SEV_CMD_SNP_INIT = 0x081, + SEV_CMD_SNP_SHUTDOWN = 0x082, + SEV_CMD_SNP_PLATFORM_STATUS = 0x083, + SEV_CMD_SNP_DF_FLUSH = 0x084, + SEV_CMD_SNP_INIT_EX = 0x085, + SEV_CMD_SNP_SHUTDOWN_EX = 0x086, + SEV_CMD_SNP_DECOMMISSION = 0x090, + SEV_CMD_SNP_ACTIVATE = 0x091, + SEV_CMD_SNP_GUEST_STATUS = 0x092, + SEV_CMD_SNP_GCTX_CREATE = 0x093, + SEV_CMD_SNP_GUEST_REQUEST = 0x094, + SEV_CMD_SNP_ACTIVATE_EX = 0x095, + SEV_CMD_SNP_LAUNCH_START = 0x0A0, + SEV_CMD_SNP_LAUNCH_UPDATE = 0x0A1, + SEV_CMD_SNP_LAUNCH_FINISH = 0x0A2, + SEV_CMD_SNP_DBG_DECRYPT = 0x0B0, + SEV_CMD_SNP_DBG_ENCRYPT = 0x0B1, + SEV_CMD_SNP_PAGE_SWAP_OUT = 0x0C0, + SEV_CMD_SNP_PAGE_SWAP_IN = 0x0C1, + SEV_CMD_SNP_PAGE_MOVE = 0x0C2, + SEV_CMD_SNP_PAGE_MD_INIT = 0x0C3, + SEV_CMD_SNP_PAGE_SET_STATE = 0x0C6, + SEV_CMD_SNP_PAGE_RECLAIM = 0x0C7, + SEV_CMD_SNP_PAGE_UNSMASH = 0x0C8, + SEV_CMD_SNP_CONFIG = 0x0C9, + SEV_CMD_SNP_DOWNLOAD_FIRMWARE_EX = 0x0CA, + SEV_CMD_SNP_COMMIT = 0x0CB, + SEV_CMD_SNP_VLEK_LOAD = 0x0CD, + SEV_CMD_MAX, }; @@ -523,12 +553,269 @@ struct sev_data_attestation_report { u32 len; /* In/Out */ } __packed; +/** + * struct sev_data_snp_download_firmware - SNP_DOWNLOAD_FIRMWARE command params + * + * @address: physical address of firmware image + * @len: length of the firmware image + */ +struct sev_data_snp_download_firmware { + u64 address; /* In */ + u32 len; /* In */ +} __packed; + +/** + * struct sev_data_snp_activate - SNP_ACTIVATE command params + * + * @gctx_paddr: system physical address guest context page + * @asid: ASID to bind to the guest + */ +struct sev_data_snp_activate { + u64 gctx_paddr; /* In */ + u32 asid; /* In */ +} __packed; + +/** + * struct sev_data_snp_addr - generic SNP command params + * + * @address: physical address of generic data param + */ +struct sev_data_snp_addr { + u64 address; /* In/Out */ +} __packed; + +/** + * struct sev_data_snp_launch_start - SNP_LAUNCH_START command params + * + * @gctx_paddr: system physical address of guest context page + * @policy: guest policy + * @ma_gctx_paddr: system physical address of migration agent + * @ma_en: the guest is associated with a migration agent + * @imi_en: launch flow is launching an IMI (Incoming Migration Image) for the + * purpose of guest-assisted migration. + * @rsvd: reserved + * @gosvw: guest OS-visible workarounds, as defined by hypervisor + */ +struct sev_data_snp_launch_start { + u64 gctx_paddr; /* In */ + u64 policy; /* In */ + u64 ma_gctx_paddr; /* In */ + u32 ma_en:1; /* In */ + u32 imi_en:1; /* In */ + u32 rsvd:30; + u8 gosvw[16]; /* In */ +} __packed; + +/* SNP support page type */ +enum { + SNP_PAGE_TYPE_NORMAL = 0x1, + SNP_PAGE_TYPE_VMSA = 0x2, + SNP_PAGE_TYPE_ZERO = 0x3, + SNP_PAGE_TYPE_UNMEASURED = 0x4, + SNP_PAGE_TYPE_SECRET = 0x5, + SNP_PAGE_TYPE_CPUID = 0x6, + + SNP_PAGE_TYPE_MAX +}; + +/** + * struct sev_data_snp_launch_update - SNP_LAUNCH_UPDATE command params + * + * @gctx_paddr: system physical address of guest context page + * @page_size: page size 0 indicates 4K and 1 indicates 2MB page + * @page_type: encoded page type + * @imi_page: indicates that this page is part of the IMI (Incoming Migration + * Image) of the guest + * @rsvd: reserved + * @rsvd2: reserved + * @address: system physical address of destination page to encrypt + * @rsvd3: reserved + * @vmpl1_perms: VMPL permission mask for VMPL1 + * @vmpl2_perms: VMPL permission mask for VMPL2 + * @vmpl3_perms: VMPL permission mask for VMPL3 + * @rsvd4: reserved + */ +struct sev_data_snp_launch_update { + u64 gctx_paddr; /* In */ + u32 page_size:1; /* In */ + u32 page_type:3; /* In */ + u32 imi_page:1; /* In */ + u32 rsvd:27; + u32 rsvd2; + u64 address; /* In */ + u32 rsvd3:8; + u32 vmpl1_perms:8; /* In */ + u32 vmpl2_perms:8; /* In */ + u32 vmpl3_perms:8; /* In */ + u32 rsvd4; +} __packed; + +/** + * struct sev_data_snp_launch_finish - SNP_LAUNCH_FINISH command params + * + * @gctx_paddr: system physical address of guest context page + * @id_block_paddr: system physical address of ID block + * @id_auth_paddr: system physical address of ID block authentication structure + * @id_block_en: indicates whether ID block is present + * @auth_key_en: indicates whether author key is present in authentication structure + * @rsvd: reserved + * @host_data: host-supplied data for guest, not interpreted by firmware + */ +struct sev_data_snp_launch_finish { + u64 gctx_paddr; + u64 id_block_paddr; + u64 id_auth_paddr; + u8 id_block_en:1; + u8 auth_key_en:1; + u64 rsvd:62; + u8 host_data[32]; +} __packed; + +/** + * struct sev_data_snp_guest_status - SNP_GUEST_STATUS command params + * + * @gctx_paddr: system physical address of guest context page + * @address: system physical address of guest status page + */ +struct sev_data_snp_guest_status { + u64 gctx_paddr; + u64 address; +} __packed; + +/** + * struct sev_data_snp_page_reclaim - SNP_PAGE_RECLAIM command params + * + * @paddr: system physical address of page to be claimed. The 0th bit in the + * address indicates the page size. 0h indicates 4KB and 1h indicates + * 2MB page. + */ +struct sev_data_snp_page_reclaim { + u64 paddr; +} __packed; + +/** + * struct sev_data_snp_page_unsmash - SNP_PAGE_UNSMASH command params + * + * @paddr: system physical address of page to be unsmashed. The 0th bit in the + * address indicates the page size. 0h indicates 4 KB and 1h indicates + * 2 MB page. + */ +struct sev_data_snp_page_unsmash { + u64 paddr; +} __packed; + +/** + * struct sev_data_snp_dbg - DBG_ENCRYPT/DBG_DECRYPT command parameters + * + * @gctx_paddr: system physical address of guest context page + * @src_addr: source address of data to operate on + * @dst_addr: destination address of data to operate on + */ +struct sev_data_snp_dbg { + u64 gctx_paddr; /* In */ + u64 src_addr; /* In */ + u64 dst_addr; /* In */ +} __packed; + +/** + * struct sev_data_snp_guest_request - SNP_GUEST_REQUEST command params + * + * @gctx_paddr: system physical address of guest context page + * @req_paddr: system physical address of request page + * @res_paddr: system physical address of response page + */ +struct sev_data_snp_guest_request { + u64 gctx_paddr; /* In */ + u64 req_paddr; /* In */ + u64 res_paddr; /* In */ +} __packed; + +/** + * struct sev_data_snp_init_ex - SNP_INIT_EX structure + * + * @init_rmp: indicate that the RMP should be initialized. + * @list_paddr_en: indicate that list_paddr is valid + * @rsvd: reserved + * @rsvd1: reserved + * @list_paddr: system physical address of range list + * @rsvd2: reserved + */ +struct sev_data_snp_init_ex { + u32 init_rmp:1; + u32 list_paddr_en:1; + u32 rsvd:30; + u32 rsvd1; + u64 list_paddr; + u8 rsvd2[48]; +} __packed; + +/** + * struct sev_data_range - RANGE structure + * + * @base: system physical address of first byte of range + * @page_count: number of 4KB pages in this range + * @rsvd: reserved + */ +struct sev_data_range { + u64 base; + u32 page_count; + u32 rsvd; +} __packed; + +/** + * struct sev_data_range_list - RANGE_LIST structure + * + * @num_elements: number of elements in RANGE_ARRAY + * @rsvd: reserved + * @ranges: array of num_elements of type RANGE + */ +struct sev_data_range_list { + u32 num_elements; + u32 rsvd; + struct sev_data_range ranges[]; +} __packed; + +/** + * struct sev_data_snp_shutdown_ex - SNP_SHUTDOWN_EX structure + * + * @len: length of the command buffer read by the PSP + * @iommu_snp_shutdown: Disable enforcement of SNP in the IOMMU + * @rsvd1: reserved + */ +struct sev_data_snp_shutdown_ex { + u32 len; + u32 iommu_snp_shutdown:1; + u32 rsvd1:31; +} __packed; + +/** + * struct sev_platform_init_args + * + * @error: SEV firmware error code + * @probe: True if this is being called as part of CCP module probe, which + * will defer SEV_INIT/SEV_INIT_EX firmware initialization until needed + * unless psp_init_on_probe module param is set + */ +struct sev_platform_init_args { + int error; + bool probe; +}; + +/** + * struct sev_data_snp_commit - SNP_COMMIT structure + * + * @len: length of the command buffer read by the PSP + */ +struct sev_data_snp_commit { + u32 len; +} __packed; + #ifdef CONFIG_CRYPTO_DEV_SP_PSP /** * sev_platform_init - perform SEV INIT command * - * @error: SEV command return code + * @args: struct sev_platform_init_args to pass in arguments * * Returns: * 0 if the SEV successfully processed the command @@ -537,7 +824,7 @@ struct sev_data_attestation_report { * -%ETIMEDOUT if the SEV command timed out * -%EIO if the SEV returned a non-zero return code */ -int sev_platform_init(int *error); +int sev_platform_init(struct sev_platform_init_args *args); /** * sev_platform_status - perform SEV PLATFORM_STATUS command @@ -637,14 +924,32 @@ int sev_guest_df_flush(int *error); */ int sev_guest_decommission(struct sev_data_decommission *data, int *error); +/** + * sev_do_cmd - issue an SEV or an SEV-SNP command + * + * @cmd: SEV or SEV-SNP firmware command to issue + * @data: arguments for firmware command + * @psp_ret: SEV command return code + * + * Returns: + * 0 if the SEV device successfully processed the command + * -%ENODEV if the PSP device is not available + * -%ENOTSUPP if PSP device does not support SEV + * -%ETIMEDOUT if the SEV command timed out + * -%EIO if PSP device returned a non-zero return code + */ +int sev_do_cmd(int cmd, void *data, int *psp_ret); + void *psp_copy_user_blob(u64 uaddr, u32 len); +void *snp_alloc_firmware_page(gfp_t mask); +void snp_free_firmware_page(void *addr); #else /* !CONFIG_CRYPTO_DEV_SP_PSP */ static inline int sev_platform_status(struct sev_user_data_status *status, int *error) { return -ENODEV; } -static inline int sev_platform_init(int *error) { return -ENODEV; } +static inline int sev_platform_init(struct sev_platform_init_args *args) { return -ENODEV; } static inline int sev_guest_deactivate(struct sev_data_deactivate *data, int *error) { return -ENODEV; } @@ -653,6 +958,9 @@ static inline int sev_guest_decommission(struct sev_data_decommission *data, int *error) { return -ENODEV; } static inline int +sev_do_cmd(int cmd, void *data, int *psp_ret) { return -ENODEV; } + +static inline int sev_guest_activate(struct sev_data_activate *data, int *error) { return -ENODEV; } static inline int sev_guest_df_flush(int *error) { return -ENODEV; } @@ -662,6 +970,13 @@ sev_issue_cmd_external_user(struct file *filep, unsigned int id, void *data, int static inline void *psp_copy_user_blob(u64 __user uaddr, u32 len) { return ERR_PTR(-EINVAL); } +static inline void *snp_alloc_firmware_page(gfp_t mask) +{ + return NULL; +} + +static inline void snp_free_firmware_page(void *addr) { } + #endif /* CONFIG_CRYPTO_DEV_SP_PSP */ #endif /* __PSP_SEV_H__ */ diff --git a/include/linux/pti.h b/include/linux/pti.h index 1a941efcaa62..1fbf9d6c20ef 100644 --- a/include/linux/pti.h +++ b/include/linux/pti.h @@ -2,7 +2,7 @@ #ifndef _INCLUDE_PTI_H #define _INCLUDE_PTI_H -#ifdef CONFIG_PAGE_TABLE_ISOLATION +#ifdef CONFIG_MITIGATION_PAGE_TABLE_ISOLATION #include <asm/pti.h> #else static inline void pti_init(void) { } diff --git a/include/linux/ptp_clock_kernel.h b/include/linux/ptp_clock_kernel.h index 1ef4e0f9bd2a..6e4b8206c7d0 100644 --- a/include/linux/ptp_clock_kernel.h +++ b/include/linux/ptp_clock_kernel.h @@ -200,6 +200,7 @@ struct ptp_clock; enum ptp_clock_events { PTP_CLOCK_ALARM, PTP_CLOCK_EXTTS, + PTP_CLOCK_EXTOFF, PTP_CLOCK_PPS, PTP_CLOCK_PPSUSR, }; @@ -210,6 +211,7 @@ enum ptp_clock_events { * @type: One of the ptp_clock_events enumeration values. * @index: Identifies the source of the event. * @timestamp: When the event occurred (%PTP_CLOCK_EXTTS only). + * @offset: When the event occurred (%PTP_CLOCK_EXTOFF only). * @pps_times: When the event occurred (%PTP_CLOCK_PPSUSR only). */ @@ -218,6 +220,7 @@ struct ptp_clock_event { int index; union { u64 timestamp; + s64 offset; struct pps_event_time pps_times; }; }; diff --git a/include/linux/ptp_kvm.h b/include/linux/ptp_kvm.h index 746fd67c3480..e8c74fa3f455 100644 --- a/include/linux/ptp_kvm.h +++ b/include/linux/ptp_kvm.h @@ -8,15 +8,15 @@ #ifndef _PTP_KVM_H_ #define _PTP_KVM_H_ +#include <linux/clocksource_ids.h> #include <linux/types.h> struct timespec64; -struct clocksource; int kvm_arch_ptp_init(void); void kvm_arch_ptp_exit(void); int kvm_arch_ptp_get_clock(struct timespec64 *ts); int kvm_arch_ptp_get_crosststamp(u64 *cycle, - struct timespec64 *tspec, struct clocksource **cs); + struct timespec64 *tspec, enum clocksource_ids *cs_id); #endif /* _PTP_KVM_H_ */ diff --git a/include/linux/ptrace.h b/include/linux/ptrace.h index eaaef3ffec22..90507d4afcd6 100644 --- a/include/linux/ptrace.h +++ b/include/linux/ptrace.h @@ -393,6 +393,10 @@ static inline void user_single_step_report(struct pt_regs *regs) #define current_user_stack_pointer() user_stack_pointer(current_pt_regs()) #endif +#ifndef exception_ip +#define exception_ip(x) instruction_pointer(x) +#endif + extern int task_current_syscall(struct task_struct *target, struct syscall_info *info); extern void sigaction_compat_abi(struct k_sigaction *act, struct k_sigaction *oact); diff --git a/include/linux/ras.h b/include/linux/ras.h index 1f4048bf2674..a64182bc72ad 100644 --- a/include/linux/ras.h +++ b/include/linux/ras.h @@ -25,6 +25,7 @@ void log_non_standard_event(const guid_t *sec_type, const guid_t *fru_id, const char *fru_text, const u8 sev, const u8 *err, const u32 len); void log_arm_hw_error(struct cper_sec_proc_arm *err); + #else static inline void log_non_standard_event(const guid_t *sec_type, @@ -35,4 +36,21 @@ static inline void log_arm_hw_error(struct cper_sec_proc_arm *err) { return; } #endif +struct atl_err { + u64 addr; + u64 ipid; + u32 cpu; +}; + +#if IS_ENABLED(CONFIG_AMD_ATL) +void amd_atl_register_decoder(unsigned long (*f)(struct atl_err *)); +void amd_atl_unregister_decoder(void); +void amd_retire_dram_row(struct atl_err *err); +unsigned long amd_convert_umc_mca_addr_to_sys_addr(struct atl_err *err); +#else +static inline void amd_retire_dram_row(struct atl_err *err) { } +static inline unsigned long +amd_convert_umc_mca_addr_to_sys_addr(struct atl_err *err) { return -EINVAL; } +#endif /* CONFIG_AMD_ATL */ + #endif /* __RAS_H__ */ diff --git a/include/linux/rcu_sync.h b/include/linux/rcu_sync.h index 0027d4c8087c..3860dbb9107a 100644 --- a/include/linux/rcu_sync.h +++ b/include/linux/rcu_sync.h @@ -37,7 +37,6 @@ static inline bool rcu_sync_is_idle(struct rcu_sync *rsp) } extern void rcu_sync_init(struct rcu_sync *); -extern void rcu_sync_enter_start(struct rcu_sync *); extern void rcu_sync_enter(struct rcu_sync *); extern void rcu_sync_exit(struct rcu_sync *); extern void rcu_sync_dtor(struct rcu_sync *); diff --git a/include/linux/rcupdate.h b/include/linux/rcupdate.h index 0746b1b0b663..16f519914415 100644 --- a/include/linux/rcupdate.h +++ b/include/linux/rcupdate.h @@ -184,9 +184,9 @@ void rcu_tasks_trace_qs_blkd(struct task_struct *t); do { \ int ___rttq_nesting = READ_ONCE((t)->trc_reader_nesting); \ \ - if (likely(!READ_ONCE((t)->trc_reader_special.b.need_qs)) && \ + if (unlikely(READ_ONCE((t)->trc_reader_special.b.need_qs) == TRC_NEED_QS) && \ likely(!___rttq_nesting)) { \ - rcu_trc_cmpxchg_need_qs((t), 0, TRC_NEED_QS_CHECKED); \ + rcu_trc_cmpxchg_need_qs((t), TRC_NEED_QS, TRC_NEED_QS_CHECKED); \ } else if (___rttq_nesting && ___rttq_nesting != INT_MIN && \ !READ_ONCE((t)->trc_reader_special.b.blocked)) { \ rcu_tasks_trace_qs_blkd(t); \ diff --git a/include/linux/refcount.h b/include/linux/refcount.h index 85c6df0d1bef..59b3b752394d 100644 --- a/include/linux/refcount.h +++ b/include/linux/refcount.h @@ -136,7 +136,8 @@ static inline unsigned int refcount_read(const refcount_t *r) return atomic_read(&r->refs); } -static inline __must_check bool __refcount_add_not_zero(int i, refcount_t *r, int *oldp) +static inline __must_check __signed_wrap +bool __refcount_add_not_zero(int i, refcount_t *r, int *oldp) { int old = refcount_read(r); @@ -177,7 +178,8 @@ static inline __must_check bool refcount_add_not_zero(int i, refcount_t *r) return __refcount_add_not_zero(i, r, NULL); } -static inline void __refcount_add(int i, refcount_t *r, int *oldp) +static inline __signed_wrap +void __refcount_add(int i, refcount_t *r, int *oldp) { int old = atomic_fetch_add_relaxed(i, &r->refs); @@ -256,7 +258,8 @@ static inline void refcount_inc(refcount_t *r) __refcount_inc(r, NULL); } -static inline __must_check bool __refcount_sub_and_test(int i, refcount_t *r, int *oldp) +static inline __must_check __signed_wrap +bool __refcount_sub_and_test(int i, refcount_t *r, int *oldp) { int old = atomic_fetch_sub_release(i, &r->refs); diff --git a/include/linux/resctrl.h b/include/linux/resctrl.h index 66942d7fba7f..a365f67131ec 100644 --- a/include/linux/resctrl.h +++ b/include/linux/resctrl.h @@ -6,6 +6,12 @@ #include <linux/list.h> #include <linux/pid.h> +/* CLOSID, RMID value used by the default control group */ +#define RESCTRL_RESERVED_CLOSID 0 +#define RESCTRL_RESERVED_RMID 0 + +#define RESCTRL_PICK_ANY_CPU -1 + #ifdef CONFIG_PROC_CPU_RESCTRL int proc_resctrl_show(struct seq_file *m, @@ -153,7 +159,7 @@ struct resctrl_schema; * @cache_level: Which cache level defines scope of this resource * @cache: Cache allocation related data * @membw: If the component has bandwidth controls, their properties. - * @domains: All domains for this resource + * @domains: RCU list of all domains for this resource * @name: Name to use in "schemata" file. * @data_width: Character width of data when displaying * @default_ctrl: Specifies default cache cbm or memory B/W percent. @@ -219,36 +225,70 @@ u32 resctrl_arch_get_config(struct rdt_resource *r, struct rdt_domain *d, u32 closid, enum resctrl_conf_type type); int resctrl_online_domain(struct rdt_resource *r, struct rdt_domain *d); void resctrl_offline_domain(struct rdt_resource *r, struct rdt_domain *d); +void resctrl_online_cpu(unsigned int cpu); +void resctrl_offline_cpu(unsigned int cpu); /** * resctrl_arch_rmid_read() - Read the eventid counter corresponding to rmid * for this resource and domain. * @r: resource that the counter should be read from. * @d: domain that the counter should be read from. + * @closid: closid that matches the rmid. Depending on the architecture, the + * counter may match traffic of both @closid and @rmid, or @rmid + * only. * @rmid: rmid of the counter to read. * @eventid: eventid to read, e.g. L3 occupancy. * @val: result of the counter read in bytes. + * @arch_mon_ctx: An architecture specific value from + * resctrl_arch_mon_ctx_alloc(), for MPAM this identifies + * the hardware monitor allocated for this read request. * - * Call from process context on a CPU that belongs to domain @d. + * Some architectures need to sleep when first programming some of the counters. + * (specifically: arm64's MPAM cache occupancy counters can return 'not ready' + * for a short period of time). Call from a non-migrateable process context on + * a CPU that belongs to domain @d. e.g. use smp_call_on_cpu() or + * schedule_work_on(). This function can be called with interrupts masked, + * e.g. using smp_call_function_any(), but may consistently return an error. * * Return: * 0 on success, or -EIO, -EINVAL etc on error. */ int resctrl_arch_rmid_read(struct rdt_resource *r, struct rdt_domain *d, - u32 rmid, enum resctrl_event_id eventid, u64 *val); + u32 closid, u32 rmid, enum resctrl_event_id eventid, + u64 *val, void *arch_mon_ctx); + +/** + * resctrl_arch_rmid_read_context_check() - warn about invalid contexts + * + * When built with CONFIG_DEBUG_ATOMIC_SLEEP generate a warning when + * resctrl_arch_rmid_read() is called with preemption disabled. + * + * The contract with resctrl_arch_rmid_read() is that if interrupts + * are unmasked, it can sleep. This allows NOHZ_FULL systems to use an + * IPI, (and fail if the call needed to sleep), while most of the time + * the work is scheduled, allowing the call to sleep. + */ +static inline void resctrl_arch_rmid_read_context_check(void) +{ + if (!irqs_disabled()) + might_sleep(); +} /** * resctrl_arch_reset_rmid() - Reset any private state associated with rmid * and eventid. * @r: The domain's resource. * @d: The rmid's domain. + * @closid: closid that matches the rmid. Depending on the architecture, the + * counter may match traffic of both @closid and @rmid, or @rmid only. * @rmid: The rmid whose counter values should be reset. * @eventid: The eventid whose counter values should be reset. * * This can be called from any CPU. */ void resctrl_arch_reset_rmid(struct rdt_resource *r, struct rdt_domain *d, - u32 rmid, enum resctrl_event_id eventid); + u32 closid, u32 rmid, + enum resctrl_event_id eventid); /** * resctrl_arch_reset_rmid_all() - Reset all private state associated with diff --git a/include/linux/rtnetlink.h b/include/linux/rtnetlink.h index 410529fca18b..cdfc897f1e3c 100644 --- a/include/linux/rtnetlink.h +++ b/include/linux/rtnetlink.h @@ -47,6 +47,7 @@ extern int rtnl_lock_killable(void); extern bool refcount_dec_and_rtnl_lock(refcount_t *r); extern wait_queue_head_t netdev_unregistering_wq; +extern atomic_t dev_unreg_count; extern struct rw_semaphore pernet_ops_rwsem; extern struct rw_semaphore net_rwsem; @@ -171,4 +172,6 @@ rtnl_notify_needed(const struct net *net, u16 nlflags, u32 group) return (nlflags & NLM_F_ECHO) || rtnl_has_listeners(net, group); } +void netdev_set_operstate(struct net_device *dev, int newstate); + #endif /* __LINUX_RTNETLINK_H */ diff --git a/include/linux/rw_hint.h b/include/linux/rw_hint.h new file mode 100644 index 000000000000..309ca72f2dfb --- /dev/null +++ b/include/linux/rw_hint.h @@ -0,0 +1,24 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef _LINUX_RW_HINT_H +#define _LINUX_RW_HINT_H + +#include <linux/build_bug.h> +#include <linux/compiler_attributes.h> +#include <uapi/linux/fcntl.h> + +/* Block storage write lifetime hint values. */ +enum rw_hint { + WRITE_LIFE_NOT_SET = RWH_WRITE_LIFE_NOT_SET, + WRITE_LIFE_NONE = RWH_WRITE_LIFE_NONE, + WRITE_LIFE_SHORT = RWH_WRITE_LIFE_SHORT, + WRITE_LIFE_MEDIUM = RWH_WRITE_LIFE_MEDIUM, + WRITE_LIFE_LONG = RWH_WRITE_LIFE_LONG, + WRITE_LIFE_EXTREME = RWH_WRITE_LIFE_EXTREME, +} __packed; + +/* Sparse ignores __packed annotations on enums, hence the #ifndef below. */ +#ifndef __CHECKER__ +static_assert(sizeof(enum rw_hint) == 1); +#endif + +#endif /* _LINUX_RW_HINT_H */ diff --git a/include/linux/sched.h b/include/linux/sched.h index cdb8ea53c365..17cb0761ff65 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -858,6 +858,8 @@ struct task_struct { u8 rcu_tasks_idx; int rcu_tasks_idle_cpu; struct list_head rcu_tasks_holdout_list; + int rcu_tasks_exit_cpu; + struct list_head rcu_tasks_exit_list; #endif /* #ifdef CONFIG_TASKS_RCU */ #ifdef CONFIG_TASKS_TRACE_RCU @@ -920,7 +922,7 @@ struct task_struct { unsigned sched_rt_mutex:1; #endif - /* Bit to tell LSMs we're in execve(): */ + /* Bit to tell TOMOYO we're in execve(): */ unsigned in_execve:1; unsigned in_iowait:1; #ifndef TIF_RESTORE_SIGMASK @@ -1642,7 +1644,7 @@ extern struct pid *cad_pid; #define PF_NO_SETAFFINITY 0x04000000 /* Userland is not allowed to meddle with cpus_mask */ #define PF_MCE_EARLY 0x08000000 /* Early kill for mce process policy */ #define PF_MEMALLOC_PIN 0x10000000 /* Allocation context constrained to zones which allow long term pinning. */ -#define PF__HOLE__20000000 0x20000000 +#define PF_BLOCK_TS 0x20000000 /* plug has ts that needs updating */ #define PF__HOLE__40000000 0x40000000 #define PF_SUSPEND_TASK 0x80000000 /* This thread called freeze_processes() and should not be frozen */ diff --git a/include/linux/sched/sd_flags.h b/include/linux/sched/sd_flags.h index a8b28647aafc..b04a5d04dee9 100644 --- a/include/linux/sched/sd_flags.h +++ b/include/linux/sched/sd_flags.h @@ -117,13 +117,13 @@ SD_FLAG(SD_SHARE_CPUCAPACITY, SDF_SHARED_CHILD | SDF_NEEDS_GROUPS) SD_FLAG(SD_CLUSTER, SDF_NEEDS_GROUPS) /* - * Domain members share CPU package resources (i.e. caches) + * Domain members share CPU Last Level Caches * * SHARED_CHILD: Set from the base domain up until spanned CPUs no longer share * the same cache(s). * NEEDS_GROUPS: Caches are shared between groups. */ -SD_FLAG(SD_SHARE_PKG_RESOURCES, SDF_SHARED_CHILD | SDF_NEEDS_GROUPS) +SD_FLAG(SD_SHARE_LLC, SDF_SHARED_CHILD | SDF_NEEDS_GROUPS) /* * Only a single load balancing instance diff --git a/include/linux/sched/signal.h b/include/linux/sched/signal.h index 4b7664c56208..0a0e23c45406 100644 --- a/include/linux/sched/signal.h +++ b/include/linux/sched/signal.h @@ -735,8 +735,6 @@ static inline int thread_group_empty(struct task_struct *p) #define delay_group_leader(p) \ (thread_group_leader(p) && !thread_group_empty(p)) -extern bool thread_group_exited(struct pid *pid); - extern struct sighand_struct *__lock_task_sighand(struct task_struct *task, unsigned long *flags); diff --git a/include/linux/sched/topology.h b/include/linux/sched/topology.h index a6e04b4a21d7..18572c9ea724 100644 --- a/include/linux/sched/topology.h +++ b/include/linux/sched/topology.h @@ -38,21 +38,21 @@ extern const struct sd_flag_debug sd_flag_debug[]; #ifdef CONFIG_SCHED_SMT static inline int cpu_smt_flags(void) { - return SD_SHARE_CPUCAPACITY | SD_SHARE_PKG_RESOURCES; + return SD_SHARE_CPUCAPACITY | SD_SHARE_LLC; } #endif #ifdef CONFIG_SCHED_CLUSTER static inline int cpu_cluster_flags(void) { - return SD_CLUSTER | SD_SHARE_PKG_RESOURCES; + return SD_CLUSTER | SD_SHARE_LLC; } #endif #ifdef CONFIG_SCHED_MC static inline int cpu_core_flags(void) { - return SD_SHARE_PKG_RESOURCES; + return SD_SHARE_LLC; } #endif @@ -176,6 +176,7 @@ extern void partition_sched_domains(int ndoms_new, cpumask_var_t doms_new[], cpumask_var_t *alloc_sched_domains(unsigned int ndoms); void free_sched_domains(cpumask_var_t doms[], unsigned int ndoms); +bool cpus_equal_capacity(int this_cpu, int that_cpu); bool cpus_share_cache(int this_cpu, int that_cpu); bool cpus_share_resources(int this_cpu, int that_cpu); @@ -226,6 +227,11 @@ partition_sched_domains(int ndoms_new, cpumask_var_t doms_new[], { } +static inline bool cpus_equal_capacity(int this_cpu, int that_cpu) +{ + return true; +} + static inline bool cpus_share_cache(int this_cpu, int that_cpu) { return true; diff --git a/include/linux/scmi_protocol.h b/include/linux/scmi_protocol.h index f2f05fb42d28..2ee94ff0320c 100644 --- a/include/linux/scmi_protocol.h +++ b/include/linux/scmi_protocol.h @@ -47,6 +47,10 @@ struct scmi_clock_info { bool rate_discrete; bool rate_changed_notifications; bool rate_change_requested_notifications; + bool state_ctrl_forbidden; + bool rate_ctrl_forbidden; + bool parent_ctrl_forbidden; + bool extended_config; union { struct { int num_rates; @@ -72,6 +76,13 @@ struct scmi_handle; struct scmi_device; struct scmi_protocol_handle; +enum scmi_clock_oem_config { + SCMI_CLOCK_CFG_DUTY_CYCLE = 0x1, + SCMI_CLOCK_CFG_PHASE, + SCMI_CLOCK_CFG_OEM_START = 0x80, + SCMI_CLOCK_CFG_OEM_END = 0xFF, +}; + /** * struct scmi_clk_proto_ops - represents the various operations provided * by SCMI Clock Protocol @@ -104,10 +115,11 @@ struct scmi_clk_proto_ops { int (*state_get)(const struct scmi_protocol_handle *ph, u32 clk_id, bool *enabled, bool atomic); int (*config_oem_get)(const struct scmi_protocol_handle *ph, u32 clk_id, - u8 oem_type, u32 *oem_val, u32 *attributes, - bool atomic); + enum scmi_clock_oem_config oem_type, + u32 *oem_val, u32 *attributes, bool atomic); int (*config_oem_set)(const struct scmi_protocol_handle *ph, u32 clk_id, - u8 oem_type, u32 oem_val, bool atomic); + enum scmi_clock_oem_config oem_type, + u32 oem_val, bool atomic); int (*parent_get)(const struct scmi_protocol_handle *ph, u32 clk_id, u32 *parent_id); int (*parent_set)(const struct scmi_protocol_handle *ph, u32 clk_id, u32 parent_id); }; @@ -953,6 +965,8 @@ struct scmi_perf_limits_report { unsigned int domain_id; unsigned int range_max; unsigned int range_min; + unsigned long range_max_freq; + unsigned long range_min_freq; }; struct scmi_perf_level_report { @@ -960,6 +974,7 @@ struct scmi_perf_level_report { unsigned int agent_id; unsigned int domain_id; unsigned int performance_level; + unsigned long performance_level_freq; }; struct scmi_sensor_trip_point_report { diff --git a/include/linux/security.h b/include/linux/security.h index d0eb20f90b26..f249f5b9a9d7 100644 --- a/include/linux/security.h +++ b/include/linux/security.h @@ -32,6 +32,7 @@ #include <linux/string.h> #include <linux/mm.h> #include <linux/sockptr.h> +#include <linux/bpf.h> #include <uapi/linux/lsm.h> struct linux_binprm; @@ -344,6 +345,8 @@ int security_inode_init_security_anon(struct inode *inode, const struct qstr *name, const struct inode *context_inode); int security_inode_create(struct inode *dir, struct dentry *dentry, umode_t mode); +void security_inode_post_create_tmpfile(struct mnt_idmap *idmap, + struct inode *inode); int security_inode_link(struct dentry *old_dentry, struct inode *dir, struct dentry *new_dentry); int security_inode_unlink(struct inode *dir, struct dentry *dentry); @@ -361,6 +364,8 @@ int security_inode_follow_link(struct dentry *dentry, struct inode *inode, int security_inode_permission(struct inode *inode, int mask); int security_inode_setattr(struct mnt_idmap *idmap, struct dentry *dentry, struct iattr *attr); +void security_inode_post_setattr(struct mnt_idmap *idmap, struct dentry *dentry, + int ia_valid); int security_inode_getattr(const struct path *path); int security_inode_setxattr(struct mnt_idmap *idmap, struct dentry *dentry, const char *name, @@ -368,16 +373,22 @@ int security_inode_setxattr(struct mnt_idmap *idmap, int security_inode_set_acl(struct mnt_idmap *idmap, struct dentry *dentry, const char *acl_name, struct posix_acl *kacl); +void security_inode_post_set_acl(struct dentry *dentry, const char *acl_name, + struct posix_acl *kacl); int security_inode_get_acl(struct mnt_idmap *idmap, struct dentry *dentry, const char *acl_name); int security_inode_remove_acl(struct mnt_idmap *idmap, struct dentry *dentry, const char *acl_name); +void security_inode_post_remove_acl(struct mnt_idmap *idmap, + struct dentry *dentry, + const char *acl_name); void security_inode_post_setxattr(struct dentry *dentry, const char *name, const void *value, size_t size, int flags); int security_inode_getxattr(struct dentry *dentry, const char *name); int security_inode_listxattr(struct dentry *dentry); int security_inode_removexattr(struct mnt_idmap *idmap, struct dentry *dentry, const char *name); +void security_inode_post_removexattr(struct dentry *dentry, const char *name); int security_inode_need_killpriv(struct dentry *dentry); int security_inode_killpriv(struct mnt_idmap *idmap, struct dentry *dentry); int security_inode_getsecurity(struct mnt_idmap *idmap, @@ -392,6 +403,7 @@ int security_kernfs_init_security(struct kernfs_node *kn_dir, struct kernfs_node *kn); int security_file_permission(struct file *file, int mask); int security_file_alloc(struct file *file); +void security_file_release(struct file *file); void security_file_free(struct file *file); int security_file_ioctl(struct file *file, unsigned int cmd, unsigned long arg); int security_file_ioctl_compat(struct file *file, unsigned int cmd, @@ -408,6 +420,7 @@ int security_file_send_sigiotask(struct task_struct *tsk, struct fown_struct *fown, int sig); int security_file_receive(struct file *file); int security_file_open(struct file *file); +int security_file_post_open(struct file *file, int mask); int security_file_truncate(struct file *file); int security_task_alloc(struct task_struct *task, unsigned long clone_flags); void security_task_free(struct task_struct *task); @@ -806,6 +819,10 @@ static inline int security_inode_create(struct inode *dir, return 0; } +static inline void +security_inode_post_create_tmpfile(struct mnt_idmap *idmap, struct inode *inode) +{ } + static inline int security_inode_link(struct dentry *old_dentry, struct inode *dir, struct dentry *new_dentry) @@ -879,6 +896,11 @@ static inline int security_inode_setattr(struct mnt_idmap *idmap, return 0; } +static inline void +security_inode_post_setattr(struct mnt_idmap *idmap, struct dentry *dentry, + int ia_valid) +{ } + static inline int security_inode_getattr(const struct path *path) { return 0; @@ -899,6 +921,11 @@ static inline int security_inode_set_acl(struct mnt_idmap *idmap, return 0; } +static inline void security_inode_post_set_acl(struct dentry *dentry, + const char *acl_name, + struct posix_acl *kacl) +{ } + static inline int security_inode_get_acl(struct mnt_idmap *idmap, struct dentry *dentry, const char *acl_name) @@ -913,6 +940,11 @@ static inline int security_inode_remove_acl(struct mnt_idmap *idmap, return 0; } +static inline void security_inode_post_remove_acl(struct mnt_idmap *idmap, + struct dentry *dentry, + const char *acl_name) +{ } + static inline void security_inode_post_setxattr(struct dentry *dentry, const char *name, const void *value, size_t size, int flags) { } @@ -935,6 +967,10 @@ static inline int security_inode_removexattr(struct mnt_idmap *idmap, return cap_inode_removexattr(idmap, dentry, name); } +static inline void security_inode_post_removexattr(struct dentry *dentry, + const char *name) +{ } + static inline int security_inode_need_killpriv(struct dentry *dentry) { return cap_inode_need_killpriv(dentry); @@ -995,6 +1031,9 @@ static inline int security_file_alloc(struct file *file) return 0; } +static inline void security_file_release(struct file *file) +{ } + static inline void security_file_free(struct file *file) { } @@ -1062,6 +1101,11 @@ static inline int security_file_open(struct file *file) return 0; } +static inline int security_file_post_open(struct file *file, int mask) +{ + return 0; +} + static inline int security_file_truncate(struct file *file) { return 0; @@ -1871,6 +1915,7 @@ int security_path_mkdir(const struct path *dir, struct dentry *dentry, umode_t m int security_path_rmdir(const struct path *dir, struct dentry *dentry); int security_path_mknod(const struct path *dir, struct dentry *dentry, umode_t mode, unsigned int dev); +void security_path_post_mknod(struct mnt_idmap *idmap, struct dentry *dentry); int security_path_truncate(const struct path *path); int security_path_symlink(const struct path *dir, struct dentry *dentry, const char *old_name); @@ -1905,6 +1950,10 @@ static inline int security_path_mknod(const struct path *dir, struct dentry *den return 0; } +static inline void security_path_post_mknod(struct mnt_idmap *idmap, + struct dentry *dentry) +{ } + static inline int security_path_truncate(const struct path *path) { return 0; @@ -1956,6 +2005,9 @@ void security_key_free(struct key *key); int security_key_permission(key_ref_t key_ref, const struct cred *cred, enum key_need_perm need_perm); int security_key_getsecurity(struct key *key, char **_buffer); +void security_key_post_create_or_update(struct key *keyring, struct key *key, + const void *payload, size_t payload_len, + unsigned long flags, bool create); #else @@ -1983,6 +2035,14 @@ static inline int security_key_getsecurity(struct key *key, char **_buffer) return 0; } +static inline void security_key_post_create_or_update(struct key *keyring, + struct key *key, + const void *payload, + size_t payload_len, + unsigned long flags, + bool create) +{ } + #endif #endif /* CONFIG_KEYS */ @@ -2064,15 +2124,22 @@ static inline void securityfs_remove(struct dentry *dentry) union bpf_attr; struct bpf_map; struct bpf_prog; -struct bpf_prog_aux; +struct bpf_token; #ifdef CONFIG_SECURITY extern int security_bpf(int cmd, union bpf_attr *attr, unsigned int size); extern int security_bpf_map(struct bpf_map *map, fmode_t fmode); extern int security_bpf_prog(struct bpf_prog *prog); -extern int security_bpf_map_alloc(struct bpf_map *map); +extern int security_bpf_map_create(struct bpf_map *map, union bpf_attr *attr, + struct bpf_token *token); extern void security_bpf_map_free(struct bpf_map *map); -extern int security_bpf_prog_alloc(struct bpf_prog_aux *aux); -extern void security_bpf_prog_free(struct bpf_prog_aux *aux); +extern int security_bpf_prog_load(struct bpf_prog *prog, union bpf_attr *attr, + struct bpf_token *token); +extern void security_bpf_prog_free(struct bpf_prog *prog); +extern int security_bpf_token_create(struct bpf_token *token, union bpf_attr *attr, + struct path *path); +extern void security_bpf_token_free(struct bpf_token *token); +extern int security_bpf_token_cmd(const struct bpf_token *token, enum bpf_cmd cmd); +extern int security_bpf_token_capable(const struct bpf_token *token, int cap); #else static inline int security_bpf(int cmd, union bpf_attr *attr, unsigned int size) @@ -2090,7 +2157,8 @@ static inline int security_bpf_prog(struct bpf_prog *prog) return 0; } -static inline int security_bpf_map_alloc(struct bpf_map *map) +static inline int security_bpf_map_create(struct bpf_map *map, union bpf_attr *attr, + struct bpf_token *token) { return 0; } @@ -2098,13 +2166,33 @@ static inline int security_bpf_map_alloc(struct bpf_map *map) static inline void security_bpf_map_free(struct bpf_map *map) { } -static inline int security_bpf_prog_alloc(struct bpf_prog_aux *aux) +static inline int security_bpf_prog_load(struct bpf_prog *prog, union bpf_attr *attr, + struct bpf_token *token) +{ + return 0; +} + +static inline void security_bpf_prog_free(struct bpf_prog *prog) +{ } + +static inline int security_bpf_token_create(struct bpf_token *token, union bpf_attr *attr, + struct path *path) { return 0; } -static inline void security_bpf_prog_free(struct bpf_prog_aux *aux) +static inline void security_bpf_token_free(struct bpf_token *token) { } + +static inline int security_bpf_token_cmd(const struct bpf_token *token, enum bpf_cmd cmd) +{ + return 0; +} + +static inline int security_bpf_token_capable(const struct bpf_token *token, int cap) +{ + return 0; +} #endif /* CONFIG_SECURITY */ #endif /* CONFIG_BPF_SYSCALL */ diff --git a/include/linux/seq_buf.h b/include/linux/seq_buf.h index c44f4b47b945..fe41da005970 100644 --- a/include/linux/seq_buf.h +++ b/include/linux/seq_buf.h @@ -2,7 +2,10 @@ #ifndef _LINUX_SEQ_BUF_H #define _LINUX_SEQ_BUF_H -#include <linux/fs.h> +#include <linux/bug.h> +#include <linux/minmax.h> +#include <linux/seq_file.h> +#include <linux/types.h> /* * Trace sequences are used to allow a function to call several other functions @@ -10,7 +13,7 @@ */ /** - * seq_buf - seq buffer structure + * struct seq_buf - seq buffer structure * @buffer: pointer to the buffer * @size: size of the buffer * @len: the amount of data inside the buffer @@ -77,10 +80,10 @@ static inline unsigned int seq_buf_used(struct seq_buf *s) } /** - * seq_buf_str - get %NUL-terminated C string from seq_buf + * seq_buf_str - get NUL-terminated C string from seq_buf * @s: the seq_buf handle * - * This makes sure that the buffer in @s is nul terminated and + * This makes sure that the buffer in @s is NUL-terminated and * safe to read as a string. * * Note, if this is called when the buffer has overflowed, then @@ -90,7 +93,7 @@ static inline unsigned int seq_buf_used(struct seq_buf *s) * After this function is called, s->buffer is safe to use * in string operations. * - * Returns @s->buf after making sure it is terminated. + * Returns: @s->buf after making sure it is terminated. */ static inline const char *seq_buf_str(struct seq_buf *s) { @@ -110,7 +113,7 @@ static inline const char *seq_buf_str(struct seq_buf *s) * @s: the seq_buf handle * @bufp: the beginning of the buffer is stored here * - * Return the number of bytes available in the buffer, or zero if + * Returns: the number of bytes available in the buffer, or zero if * there's no space. */ static inline size_t seq_buf_get_buf(struct seq_buf *s, char **bufp) @@ -132,7 +135,7 @@ static inline size_t seq_buf_get_buf(struct seq_buf *s, char **bufp) * @num: the number of bytes to commit * * Commit @num bytes of data written to a buffer previously acquired - * by seq_buf_get. To signal an error condition, or that the data + * by seq_buf_get_buf(). To signal an error condition, or that the data * didn't fit in the available space, pass a negative @num value. */ static inline void seq_buf_commit(struct seq_buf *s, int num) diff --git a/include/linux/serial_core.h b/include/linux/serial_core.h index 536b2581d3e2..55b1f3ba48ac 100644 --- a/include/linux/serial_core.h +++ b/include/linux/serial_core.h @@ -748,8 +748,17 @@ struct uart_driver { void uart_write_wakeup(struct uart_port *port); -#define __uart_port_tx(uport, ch, tx_ready, put_char, tx_done, for_test, \ - for_post) \ +/** + * enum UART_TX_FLAGS -- flags for uart_port_tx_flags() + * + * @UART_TX_NOSTOP: don't call port->ops->stop_tx() on empty buffer + */ +enum UART_TX_FLAGS { + UART_TX_NOSTOP = BIT(0), +}; + +#define __uart_port_tx(uport, ch, flags, tx_ready, put_char, tx_done, \ + for_test, for_post) \ ({ \ struct uart_port *__port = (uport); \ struct circ_buf *xmit = &__port->state->xmit; \ @@ -777,7 +786,7 @@ void uart_write_wakeup(struct uart_port *port); if (pending < WAKEUP_CHARS) { \ uart_write_wakeup(__port); \ \ - if (pending == 0) \ + if (!((flags) & UART_TX_NOSTOP) && pending == 0) \ __port->ops->stop_tx(__port); \ } \ \ @@ -812,7 +821,7 @@ void uart_write_wakeup(struct uart_port *port); */ #define uart_port_tx_limited(port, ch, count, tx_ready, put_char, tx_done) ({ \ unsigned int __count = (count); \ - __uart_port_tx(port, ch, tx_ready, put_char, tx_done, __count, \ + __uart_port_tx(port, ch, 0, tx_ready, put_char, tx_done, __count, \ __count--); \ }) @@ -826,8 +835,21 @@ void uart_write_wakeup(struct uart_port *port); * See uart_port_tx_limited() for more details. */ #define uart_port_tx(port, ch, tx_ready, put_char) \ - __uart_port_tx(port, ch, tx_ready, put_char, ({}), true, ({})) + __uart_port_tx(port, ch, 0, tx_ready, put_char, ({}), true, ({})) + +/** + * uart_port_tx_flags -- transmit helper for uart_port with flags + * @port: uart port + * @ch: variable to store a character to be written to the HW + * @flags: %UART_TX_NOSTOP or similar + * @tx_ready: can HW accept more data function + * @put_char: function to write a character + * + * See uart_port_tx_limited() for more details. + */ +#define uart_port_tx_flags(port, ch, flags, tx_ready, put_char) \ + __uart_port_tx(port, ch, flags, tx_ready, put_char, ({}), true, ({})) /* * Baud rate helpers. */ diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h index 2dde34c29203..3023bc2be6a1 100644 --- a/include/linux/skbuff.h +++ b/include/linux/skbuff.h @@ -37,6 +37,7 @@ #endif #include <net/net_debug.h> #include <net/dropreason-core.h> +#include <net/netmem.h> /** * DOC: skb checksums @@ -359,7 +360,11 @@ extern int sysctl_max_skb_frags; */ #define GSO_BY_FRAGS 0xFFFF -typedef struct bio_vec skb_frag_t; +typedef struct skb_frag { + netmem_ref netmem; + unsigned int len; + unsigned int offset; +} skb_frag_t; /** * skb_frag_size() - Returns the size of a skb fragment @@ -367,7 +372,7 @@ typedef struct bio_vec skb_frag_t; */ static inline unsigned int skb_frag_size(const skb_frag_t *frag) { - return frag->bv_len; + return frag->len; } /** @@ -377,7 +382,7 @@ static inline unsigned int skb_frag_size(const skb_frag_t *frag) */ static inline void skb_frag_size_set(skb_frag_t *frag, unsigned int size) { - frag->bv_len = size; + frag->len = size; } /** @@ -387,7 +392,7 @@ static inline void skb_frag_size_set(skb_frag_t *frag, unsigned int size) */ static inline void skb_frag_size_add(skb_frag_t *frag, int delta) { - frag->bv_len += delta; + frag->len += delta; } /** @@ -397,7 +402,7 @@ static inline void skb_frag_size_add(skb_frag_t *frag, int delta) */ static inline void skb_frag_size_sub(skb_frag_t *frag, int delta) { - frag->bv_len -= delta; + frag->len -= delta; } /** @@ -417,7 +422,7 @@ static inline bool skb_frag_must_loop(struct page *p) * skb_frag_foreach_page - loop over pages in a fragment * * @f: skb frag to operate on - * @f_off: offset from start of f->bv_page + * @f_off: offset from start of f->netmem * @f_len: length from f_off to loop over * @p: (temp var) current page * @p_off: (temp var) offset from start of current page, @@ -817,9 +822,9 @@ typedef unsigned char *sk_buff_data_t; * @decrypted: Decrypted SKB * @slow_gro: state present at GRO time, slower prepare step required * @mono_delivery_time: When set, skb->tstamp has the - * delivery_time in mono clock base (i.e. EDT). Otherwise, the - * skb->tstamp has the (rcv) timestamp at ingress and - * delivery_time at egress. + * delivery_time in mono clock base (i.e., EDT) or a clock base chosen + * by SO_TXTIME. If zero, skb->tstamp has the (rcv) timestamp at + * ingress. * @napi_id: id of the NAPI struct this skb came from * @sender_cpu: (aka @napi_id) source CPU in XPS * @alloc_cpu: CPU which did the skb allocation. @@ -1232,6 +1237,24 @@ static inline bool skb_unref(struct sk_buff *skb) return true; } +static inline bool skb_data_unref(const struct sk_buff *skb, + struct skb_shared_info *shinfo) +{ + int bias; + + if (!skb->cloned) + return true; + + bias = skb->nohdr ? (1 << SKB_DATAREF_SHIFT) + 1 : 1; + + if (atomic_read(&shinfo->dataref) == bias) + smp_rmb(); + else if (atomic_sub_return(bias, &shinfo->dataref)) + return false; + + return true; +} + void __fix_address kfree_skb_reason(struct sk_buff *skb, enum skb_drop_reason reason); @@ -1266,7 +1289,6 @@ static inline void consume_skb(struct sk_buff *skb) void __consume_stateless_skb(struct sk_buff *skb); void __kfree_skb(struct sk_buff *skb); -extern struct kmem_cache *skbuff_cache; void kfree_skb_partial(struct sk_buff *skb, bool head_stolen); bool skb_try_coalesce(struct sk_buff *to, struct sk_buff *from, @@ -2429,22 +2451,37 @@ static inline unsigned int skb_pagelen(const struct sk_buff *skb) return skb_headlen(skb) + __skb_pagelen(skb); } +static inline void skb_frag_fill_netmem_desc(skb_frag_t *frag, + netmem_ref netmem, int off, + int size) +{ + frag->netmem = netmem; + frag->offset = off; + skb_frag_size_set(frag, size); +} + static inline void skb_frag_fill_page_desc(skb_frag_t *frag, struct page *page, int off, int size) { - frag->bv_page = page; - frag->bv_offset = off; - skb_frag_size_set(frag, size); + skb_frag_fill_netmem_desc(frag, page_to_netmem(page), off, size); +} + +static inline void __skb_fill_netmem_desc_noacc(struct skb_shared_info *shinfo, + int i, netmem_ref netmem, + int off, int size) +{ + skb_frag_t *frag = &shinfo->frags[i]; + + skb_frag_fill_netmem_desc(frag, netmem, off, size); } static inline void __skb_fill_page_desc_noacc(struct skb_shared_info *shinfo, int i, struct page *page, int off, int size) { - skb_frag_t *frag = &shinfo->frags[i]; - - skb_frag_fill_page_desc(frag, page, off, size); + __skb_fill_netmem_desc_noacc(shinfo, i, page_to_netmem(page), off, + size); } /** @@ -2460,10 +2497,10 @@ static inline void skb_len_add(struct sk_buff *skb, int delta) } /** - * __skb_fill_page_desc - initialise a paged fragment in an skb + * __skb_fill_netmem_desc - initialise a fragment in an skb * @skb: buffer containing fragment to be initialised - * @i: paged fragment index to initialise - * @page: the page to use for this fragment + * @i: fragment index to initialise + * @netmem: the netmem to use for this fragment * @off: the offset to the data with @page * @size: the length of the data * @@ -2472,10 +2509,12 @@ static inline void skb_len_add(struct sk_buff *skb, int delta) * * Does not take any additional reference on the fragment. */ -static inline void __skb_fill_page_desc(struct sk_buff *skb, int i, - struct page *page, int off, int size) +static inline void __skb_fill_netmem_desc(struct sk_buff *skb, int i, + netmem_ref netmem, int off, int size) { - __skb_fill_page_desc_noacc(skb_shinfo(skb), i, page, off, size); + struct page *page = netmem_to_page(netmem); + + __skb_fill_netmem_desc_noacc(skb_shinfo(skb), i, netmem, off, size); /* Propagate page pfmemalloc to the skb if we can. The problem is * that not all callers have unique ownership of the page but rely @@ -2483,7 +2522,20 @@ static inline void __skb_fill_page_desc(struct sk_buff *skb, int i, */ page = compound_head(page); if (page_is_pfmemalloc(page)) - skb->pfmemalloc = true; + skb->pfmemalloc = true; +} + +static inline void __skb_fill_page_desc(struct sk_buff *skb, int i, + struct page *page, int off, int size) +{ + __skb_fill_netmem_desc(skb, i, page_to_netmem(page), off, size); +} + +static inline void skb_fill_netmem_desc(struct sk_buff *skb, int i, + netmem_ref netmem, int off, int size) +{ + __skb_fill_netmem_desc(skb, i, netmem, off, size); + skb_shinfo(skb)->nr_frags = i + 1; } /** @@ -2503,8 +2555,7 @@ static inline void __skb_fill_page_desc(struct sk_buff *skb, int i, static inline void skb_fill_page_desc(struct sk_buff *skb, int i, struct page *page, int off, int size) { - __skb_fill_page_desc(skb, i, page, off, size); - skb_shinfo(skb)->nr_frags = i + 1; + skb_fill_netmem_desc(skb, i, page_to_netmem(page), off, size); } /** @@ -2528,8 +2579,16 @@ static inline void skb_fill_page_desc_noacc(struct sk_buff *skb, int i, shinfo->nr_frags = i + 1; } -void skb_add_rx_frag(struct sk_buff *skb, int i, struct page *page, int off, - int size, unsigned int truesize); +void skb_add_rx_frag_netmem(struct sk_buff *skb, int i, netmem_ref netmem, + int off, int size, unsigned int truesize); + +static inline void skb_add_rx_frag(struct sk_buff *skb, int i, + struct page *page, int off, int size, + unsigned int truesize) +{ + skb_add_rx_frag_netmem(skb, i, page_to_netmem(page), off, size, + truesize); +} void skb_coalesce_rx_frag(struct sk_buff *skb, int i, int size, unsigned int truesize); @@ -2642,6 +2701,8 @@ static inline void skb_put_u8(struct sk_buff *skb, u8 val) void *skb_push(struct sk_buff *skb, unsigned int len); static inline void *__skb_push(struct sk_buff *skb, unsigned int len) { + DEBUG_NET_WARN_ON_ONCE(len > INT_MAX); + skb->data -= len; skb->len += len; return skb->data; @@ -2650,6 +2711,8 @@ static inline void *__skb_push(struct sk_buff *skb, unsigned int len) void *skb_pull(struct sk_buff *skb, unsigned int len); static inline void *__skb_pull(struct sk_buff *skb, unsigned int len) { + DEBUG_NET_WARN_ON_ONCE(len > INT_MAX); + skb->len -= len; if (unlikely(skb->len < skb->data_len)) { #if defined(CONFIG_DEBUG_NET) @@ -2674,6 +2737,8 @@ void *__pskb_pull_tail(struct sk_buff *skb, int delta); static inline enum skb_drop_reason pskb_may_pull_reason(struct sk_buff *skb, unsigned int len) { + DEBUG_NET_WARN_ON_ONCE(len > INT_MAX); + if (likely(len <= skb_headlen(skb))) return SKB_NOT_DROPPED_YET; @@ -2846,6 +2911,11 @@ static inline void skb_set_inner_network_header(struct sk_buff *skb, skb->inner_network_header += offset; } +static inline bool skb_inner_network_header_was_set(const struct sk_buff *skb) +{ + return skb->inner_network_header > 0; +} + static inline unsigned char *skb_inner_mac_header(const struct sk_buff *skb) { return skb->head + skb->inner_mac_header; @@ -2983,6 +3053,7 @@ static inline int skb_transport_offset(const struct sk_buff *skb) static inline u32 skb_network_header_len(const struct sk_buff *skb) { + DEBUG_NET_WARN_ON_ONCE(!skb_transport_header_was_set(skb)); return skb->transport_header - skb->network_header; } @@ -3378,7 +3449,7 @@ static inline void skb_propagate_pfmemalloc(const struct page *page, */ static inline unsigned int skb_frag_off(const skb_frag_t *frag) { - return frag->bv_offset; + return frag->offset; } /** @@ -3388,7 +3459,7 @@ static inline unsigned int skb_frag_off(const skb_frag_t *frag) */ static inline void skb_frag_off_add(skb_frag_t *frag, int delta) { - frag->bv_offset += delta; + frag->offset += delta; } /** @@ -3398,7 +3469,7 @@ static inline void skb_frag_off_add(skb_frag_t *frag, int delta) */ static inline void skb_frag_off_set(skb_frag_t *frag, unsigned int offset) { - frag->bv_offset = offset; + frag->offset = offset; } /** @@ -3409,7 +3480,7 @@ static inline void skb_frag_off_set(skb_frag_t *frag, unsigned int offset) static inline void skb_frag_off_copy(skb_frag_t *fragto, const skb_frag_t *fragfrom) { - fragto->bv_offset = fragfrom->bv_offset; + fragto->offset = fragfrom->offset; } /** @@ -3420,7 +3491,7 @@ static inline void skb_frag_off_copy(skb_frag_t *fragto, */ static inline struct page *skb_frag_page(const skb_frag_t *frag) { - return frag->bv_page; + return netmem_to_page(frag->netmem); } /** @@ -3446,6 +3517,10 @@ static inline void skb_frag_ref(struct sk_buff *skb, int f) __skb_frag_ref(&skb_shinfo(skb)->frags[f]); } +int skb_pp_cow_data(struct page_pool *pool, struct sk_buff **pskb, + unsigned int headroom); +int skb_cow_data_for_xdp(struct page_pool *pool, struct sk_buff **pskb, + struct bpf_prog *prog); bool napi_pp_put_page(struct page *page, bool napi_safe); static inline void @@ -3524,7 +3599,7 @@ static inline void *skb_frag_address_safe(const skb_frag_t *frag) static inline void skb_frag_page_copy(skb_frag_t *fragto, const skb_frag_t *fragfrom) { - fragto->bv_page = fragfrom->bv_page; + fragto->netmem = fragfrom->netmem; } bool skb_page_frag_refill(unsigned int sz, struct page_frag *pfrag, gfp_t prio); diff --git a/include/linux/skmsg.h b/include/linux/skmsg.h index 888a4b217829..e65ec3fd2799 100644 --- a/include/linux/skmsg.h +++ b/include/linux/skmsg.h @@ -505,12 +505,6 @@ static inline bool sk_psock_strp_enabled(struct sk_psock *psock) return !!psock->saved_data_ready; } -static inline bool sk_is_udp(const struct sock *sk) -{ - return sk->sk_type == SOCK_DGRAM && - sk->sk_protocol == IPPROTO_UDP; -} - #if IS_ENABLED(CONFIG_NET_SOCK_MSG) #define BPF_F_STRPARSER (1UL << 1) diff --git a/include/linux/slab.h b/include/linux/slab.h index b5f5ee8308d0..e53cbfa18325 100644 --- a/include/linux/slab.h +++ b/include/linux/slab.h @@ -21,29 +21,69 @@ #include <linux/cleanup.h> #include <linux/hash.h> +enum _slab_flag_bits { + _SLAB_CONSISTENCY_CHECKS, + _SLAB_RED_ZONE, + _SLAB_POISON, + _SLAB_KMALLOC, + _SLAB_HWCACHE_ALIGN, + _SLAB_CACHE_DMA, + _SLAB_CACHE_DMA32, + _SLAB_STORE_USER, + _SLAB_PANIC, + _SLAB_TYPESAFE_BY_RCU, + _SLAB_TRACE, +#ifdef CONFIG_DEBUG_OBJECTS + _SLAB_DEBUG_OBJECTS, +#endif + _SLAB_NOLEAKTRACE, + _SLAB_NO_MERGE, +#ifdef CONFIG_FAILSLAB + _SLAB_FAILSLAB, +#endif +#ifdef CONFIG_MEMCG_KMEM + _SLAB_ACCOUNT, +#endif +#ifdef CONFIG_KASAN_GENERIC + _SLAB_KASAN, +#endif + _SLAB_NO_USER_FLAGS, +#ifdef CONFIG_KFENCE + _SLAB_SKIP_KFENCE, +#endif +#ifndef CONFIG_SLUB_TINY + _SLAB_RECLAIM_ACCOUNT, +#endif + _SLAB_OBJECT_POISON, + _SLAB_CMPXCHG_DOUBLE, + _SLAB_FLAGS_LAST_BIT +}; + +#define __SLAB_FLAG_BIT(nr) ((slab_flags_t __force)(1U << (nr))) +#define __SLAB_FLAG_UNUSED ((slab_flags_t __force)(0U)) /* * Flags to pass to kmem_cache_create(). * The ones marked DEBUG need CONFIG_SLUB_DEBUG enabled, otherwise are no-op */ /* DEBUG: Perform (expensive) checks on alloc/free */ -#define SLAB_CONSISTENCY_CHECKS ((slab_flags_t __force)0x00000100U) +#define SLAB_CONSISTENCY_CHECKS __SLAB_FLAG_BIT(_SLAB_CONSISTENCY_CHECKS) /* DEBUG: Red zone objs in a cache */ -#define SLAB_RED_ZONE ((slab_flags_t __force)0x00000400U) +#define SLAB_RED_ZONE __SLAB_FLAG_BIT(_SLAB_RED_ZONE) /* DEBUG: Poison objects */ -#define SLAB_POISON ((slab_flags_t __force)0x00000800U) +#define SLAB_POISON __SLAB_FLAG_BIT(_SLAB_POISON) /* Indicate a kmalloc slab */ -#define SLAB_KMALLOC ((slab_flags_t __force)0x00001000U) +#define SLAB_KMALLOC __SLAB_FLAG_BIT(_SLAB_KMALLOC) /* Align objs on cache lines */ -#define SLAB_HWCACHE_ALIGN ((slab_flags_t __force)0x00002000U) +#define SLAB_HWCACHE_ALIGN __SLAB_FLAG_BIT(_SLAB_HWCACHE_ALIGN) /* Use GFP_DMA memory */ -#define SLAB_CACHE_DMA ((slab_flags_t __force)0x00004000U) +#define SLAB_CACHE_DMA __SLAB_FLAG_BIT(_SLAB_CACHE_DMA) /* Use GFP_DMA32 memory */ -#define SLAB_CACHE_DMA32 ((slab_flags_t __force)0x00008000U) +#define SLAB_CACHE_DMA32 __SLAB_FLAG_BIT(_SLAB_CACHE_DMA32) /* DEBUG: Store the last owner for bug hunting */ -#define SLAB_STORE_USER ((slab_flags_t __force)0x00010000U) +#define SLAB_STORE_USER __SLAB_FLAG_BIT(_SLAB_STORE_USER) /* Panic if kmem_cache_create() fails */ -#define SLAB_PANIC ((slab_flags_t __force)0x00040000U) +#define SLAB_PANIC __SLAB_FLAG_BIT(_SLAB_PANIC) /* * SLAB_TYPESAFE_BY_RCU - **WARNING** READ THIS! * @@ -95,21 +135,19 @@ * Note that SLAB_TYPESAFE_BY_RCU was originally named SLAB_DESTROY_BY_RCU. */ /* Defer freeing slabs to RCU */ -#define SLAB_TYPESAFE_BY_RCU ((slab_flags_t __force)0x00080000U) -/* Spread some memory over cpuset */ -#define SLAB_MEM_SPREAD ((slab_flags_t __force)0x00100000U) +#define SLAB_TYPESAFE_BY_RCU __SLAB_FLAG_BIT(_SLAB_TYPESAFE_BY_RCU) /* Trace allocations and frees */ -#define SLAB_TRACE ((slab_flags_t __force)0x00200000U) +#define SLAB_TRACE __SLAB_FLAG_BIT(_SLAB_TRACE) /* Flag to prevent checks on free */ #ifdef CONFIG_DEBUG_OBJECTS -# define SLAB_DEBUG_OBJECTS ((slab_flags_t __force)0x00400000U) +# define SLAB_DEBUG_OBJECTS __SLAB_FLAG_BIT(_SLAB_DEBUG_OBJECTS) #else -# define SLAB_DEBUG_OBJECTS 0 +# define SLAB_DEBUG_OBJECTS __SLAB_FLAG_UNUSED #endif /* Avoid kmemleak tracing */ -#define SLAB_NOLEAKTRACE ((slab_flags_t __force)0x00800000U) +#define SLAB_NOLEAKTRACE __SLAB_FLAG_BIT(_SLAB_NOLEAKTRACE) /* * Prevent merging with compatible kmem caches. This flag should be used @@ -121,25 +159,25 @@ * - performance critical caches, should be very rare and consulted with slab * maintainers, and not used together with CONFIG_SLUB_TINY */ -#define SLAB_NO_MERGE ((slab_flags_t __force)0x01000000U) +#define SLAB_NO_MERGE __SLAB_FLAG_BIT(_SLAB_NO_MERGE) /* Fault injection mark */ #ifdef CONFIG_FAILSLAB -# define SLAB_FAILSLAB ((slab_flags_t __force)0x02000000U) +# define SLAB_FAILSLAB __SLAB_FLAG_BIT(_SLAB_FAILSLAB) #else -# define SLAB_FAILSLAB 0 +# define SLAB_FAILSLAB __SLAB_FLAG_UNUSED #endif /* Account to memcg */ #ifdef CONFIG_MEMCG_KMEM -# define SLAB_ACCOUNT ((slab_flags_t __force)0x04000000U) +# define SLAB_ACCOUNT __SLAB_FLAG_BIT(_SLAB_ACCOUNT) #else -# define SLAB_ACCOUNT 0 +# define SLAB_ACCOUNT __SLAB_FLAG_UNUSED #endif #ifdef CONFIG_KASAN_GENERIC -#define SLAB_KASAN ((slab_flags_t __force)0x08000000U) +#define SLAB_KASAN __SLAB_FLAG_BIT(_SLAB_KASAN) #else -#define SLAB_KASAN 0 +#define SLAB_KASAN __SLAB_FLAG_UNUSED #endif /* @@ -147,20 +185,20 @@ * Intended for caches created for self-tests so they have only flags * specified in the code and other flags are ignored. */ -#define SLAB_NO_USER_FLAGS ((slab_flags_t __force)0x10000000U) +#define SLAB_NO_USER_FLAGS __SLAB_FLAG_BIT(_SLAB_NO_USER_FLAGS) #ifdef CONFIG_KFENCE -#define SLAB_SKIP_KFENCE ((slab_flags_t __force)0x20000000U) +#define SLAB_SKIP_KFENCE __SLAB_FLAG_BIT(_SLAB_SKIP_KFENCE) #else -#define SLAB_SKIP_KFENCE 0 +#define SLAB_SKIP_KFENCE __SLAB_FLAG_UNUSED #endif /* The following flags affect the page allocator grouping pages by mobility */ /* Objects are reclaimable */ #ifndef CONFIG_SLUB_TINY -#define SLAB_RECLAIM_ACCOUNT ((slab_flags_t __force)0x00020000U) +#define SLAB_RECLAIM_ACCOUNT __SLAB_FLAG_BIT(_SLAB_RECLAIM_ACCOUNT) #else -#define SLAB_RECLAIM_ACCOUNT ((slab_flags_t __force)0) +#define SLAB_RECLAIM_ACCOUNT __SLAB_FLAG_UNUSED #endif #define SLAB_TEMPORARY SLAB_RECLAIM_ACCOUNT /* Objects are short-lived */ diff --git a/include/linux/smp.h b/include/linux/smp.h index e87520dc2959..fcd61dfe2af3 100644 --- a/include/linux/smp.h +++ b/include/linux/smp.h @@ -105,6 +105,12 @@ static inline void on_each_cpu_cond(smp_cond_func_t cond_func, on_each_cpu_cond_mask(cond_func, func, info, wait, cpu_online_mask); } +/* + * Architecture specific boot CPU setup. Defined as empty weak function in + * init/main.c. Architectures can override it. + */ +void smp_prepare_boot_cpu(void); + #ifdef CONFIG_SMP #include <linux/preempt.h> @@ -171,12 +177,6 @@ void generic_smp_call_function_single_interrupt(void); #define generic_smp_call_function_interrupt \ generic_smp_call_function_single_interrupt -/* - * Mark the boot cpu "online" so that it can call console drivers in - * printk() and can access its per-cpu storage. - */ -void smp_prepare_boot_cpu(void); - extern unsigned int setup_max_cpus; extern void __init setup_nr_cpu_ids(void); extern void __init smp_init(void); @@ -203,7 +203,6 @@ static inline void up_smp_call_function(smp_call_func_t func, void *info) (up_smp_call_function(func, info)) static inline void smp_send_reschedule(int cpu) { } -#define smp_prepare_boot_cpu() do {} while (0) #define smp_call_function_many(mask, func, info, wait) \ (up_smp_call_function(func, info)) static inline void call_function_init(void) { } @@ -218,6 +217,8 @@ smp_call_function_any(const struct cpumask *mask, smp_call_func_t func, static inline void kick_all_cpus_sync(void) { } static inline void wake_up_all_idle_cpus(void) { } +#define setup_max_cpus 0 + #ifdef CONFIG_UP_LATE_INIT extern void __init up_late_init(void); static inline void smp_init(void) { up_late_init(); } @@ -261,7 +262,7 @@ static inline int get_boot_cpu_id(void) * regular asm read for the stable. */ #ifndef __smp_processor_id -#define __smp_processor_id(x) raw_smp_processor_id(x) +#define __smp_processor_id() raw_smp_processor_id() #endif #ifdef CONFIG_DEBUG_PREEMPT diff --git a/include/linux/soc/andes/irq.h b/include/linux/soc/andes/irq.h new file mode 100644 index 000000000000..edc3182d6e66 --- /dev/null +++ b/include/linux/soc/andes/irq.h @@ -0,0 +1,18 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ +/* + * Copyright (C) 2023 Andes Technology Corporation + */ +#ifndef __ANDES_IRQ_H +#define __ANDES_IRQ_H + +/* Andes PMU irq number */ +#define ANDES_RV_IRQ_PMOVI 18 +#define ANDES_RV_IRQ_LAST ANDES_RV_IRQ_PMOVI +#define ANDES_SLI_CAUSE_BASE 256 + +/* Andes PMU related registers */ +#define ANDES_CSR_SLIE 0x9c4 +#define ANDES_CSR_SLIP 0x9c5 +#define ANDES_CSR_SCOUNTEROF 0x9d4 + +#endif /* __ANDES_IRQ_H */ diff --git a/include/linux/soc/qcom/apr.h b/include/linux/soc/qcom/apr.h index be98aebcb3e1..7161a3183eda 100644 --- a/include/linux/soc/qcom/apr.h +++ b/include/linux/soc/qcom/apr.h @@ -9,7 +9,7 @@ #include <dt-bindings/soc/qcom,apr.h> #include <dt-bindings/soc/qcom,gpr.h> -extern struct bus_type aprbus; +extern const struct bus_type aprbus; #define APR_HDR_LEN(hdr_len) ((hdr_len)/4) diff --git a/include/linux/soc/qcom/qcom-pbs.h b/include/linux/soc/qcom/qcom-pbs.h new file mode 100644 index 000000000000..8a46209ccf13 --- /dev/null +++ b/include/linux/soc/qcom/qcom-pbs.h @@ -0,0 +1,30 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ +/* + * Copyright (c) 2023 Qualcomm Innovation Center, Inc. All rights reserved. + */ + +#ifndef _QCOM_PBS_H +#define _QCOM_PBS_H + +#include <linux/errno.h> +#include <linux/types.h> + +struct device_node; +struct pbs_dev; + +#if IS_ENABLED(CONFIG_QCOM_PBS) +int qcom_pbs_trigger_event(struct pbs_dev *pbs, u8 bitmap); +struct pbs_dev *get_pbs_client_device(struct device *client_dev); +#else +static inline int qcom_pbs_trigger_event(struct pbs_dev *pbs, u8 bitmap) +{ + return -ENODEV; +} + +static inline struct pbs_dev *get_pbs_client_device(struct device *client_dev) +{ + return ERR_PTR(-ENODEV); +} +#endif + +#endif diff --git a/include/linux/soc/samsung/exynos-pmu.h b/include/linux/soc/samsung/exynos-pmu.h index a4f5516cc956..2bd9d12d9a52 100644 --- a/include/linux/soc/samsung/exynos-pmu.h +++ b/include/linux/soc/samsung/exynos-pmu.h @@ -10,6 +10,7 @@ #define __LINUX_SOC_EXYNOS_PMU_H struct regmap; +struct device_node; enum sys_powerdown { SYS_AFTR, @@ -20,12 +21,20 @@ enum sys_powerdown { extern void exynos_sys_powerdown_conf(enum sys_powerdown mode); #ifdef CONFIG_EXYNOS_PMU -extern struct regmap *exynos_get_pmu_regmap(void); +struct regmap *exynos_get_pmu_regmap(void); +struct regmap *exynos_get_pmu_regmap_by_phandle(struct device_node *np, + const char *propname); #else static inline struct regmap *exynos_get_pmu_regmap(void) { return ERR_PTR(-ENODEV); } + +static inline struct regmap *exynos_get_pmu_regmap_by_phandle(struct device_node *np, + const char *propname) +{ + return ERR_PTR(-ENODEV); +} #endif #endif /* __LINUX_SOC_EXYNOS_PMU_H */ diff --git a/include/linux/sock_diag.h b/include/linux/sock_diag.h index 0b9ecd8cf979..110978dc9af1 100644 --- a/include/linux/sock_diag.h +++ b/include/linux/sock_diag.h @@ -13,6 +13,7 @@ struct nlmsghdr; struct sock; struct sock_diag_handler { + struct module *owner; __u8 family; int (*dump)(struct sk_buff *skb, struct nlmsghdr *nlh); int (*get_info)(struct sk_buff *skb, struct sock *sk); @@ -22,8 +23,13 @@ struct sock_diag_handler { int sock_diag_register(const struct sock_diag_handler *h); void sock_diag_unregister(const struct sock_diag_handler *h); -void sock_diag_register_inet_compat(int (*fn)(struct sk_buff *skb, struct nlmsghdr *nlh)); -void sock_diag_unregister_inet_compat(int (*fn)(struct sk_buff *skb, struct nlmsghdr *nlh)); +struct sock_diag_inet_compat { + struct module *owner; + int (*fn)(struct sk_buff *skb, struct nlmsghdr *nlh); +}; + +void sock_diag_register_inet_compat(const struct sock_diag_inet_compat *ptr); +void sock_diag_unregister_inet_compat(const struct sock_diag_inet_compat *ptr); u64 __sock_gen_cookie(struct sock *sk); diff --git a/include/linux/spi/spi.h b/include/linux/spi/spi.h index 471fe2ff9066..600fbd5daf68 100644 --- a/include/linux/spi/spi.h +++ b/include/linux/spi/spi.h @@ -21,7 +21,7 @@ #include <uapi/linux/spi/spi.h> /* Max no. of CS supported per spi device */ -#define SPI_CS_CNT_MAX 4 +#define SPI_CS_CNT_MAX 16 struct dma_chan; struct software_node; diff --git a/include/linux/stmmac.h b/include/linux/stmmac.h index dee5ad6e48c5..dfa1828cd756 100644 --- a/include/linux/stmmac.h +++ b/include/linux/stmmac.h @@ -127,6 +127,7 @@ struct stmmac_est { u32 gcl_unaligned[EST_GCL]; u32 gcl[EST_GCL]; u32 gcl_size; + u32 max_sdu[MTL_MAX_TX_QUEUES]; }; struct stmmac_rxq_cfg { diff --git a/include/linux/string.h b/include/linux/string.h index ab148d8dbfc1..9ba8b4597009 100644 --- a/include/linux/string.h +++ b/include/linux/string.h @@ -2,6 +2,7 @@ #ifndef _LINUX_STRING_H_ #define _LINUX_STRING_H_ +#include <linux/args.h> #include <linux/array_size.h> #include <linux/compiler.h> /* for inline */ #include <linux/types.h> /* for size_t */ @@ -66,12 +67,79 @@ extern char * strcpy(char *,const char *); #ifndef __HAVE_ARCH_STRNCPY extern char * strncpy(char *,const char *, __kernel_size_t); #endif -#ifndef __HAVE_ARCH_STRSCPY -ssize_t strscpy(char *, const char *, size_t); -#endif +ssize_t sized_strscpy(char *, const char *, size_t); + +/* + * The 2 argument style can only be used when dst is an array with a + * known size. + */ +#define __strscpy0(dst, src, ...) \ + sized_strscpy(dst, src, sizeof(dst) + __must_be_array(dst)) +#define __strscpy1(dst, src, size) sized_strscpy(dst, src, size) + +#define __strscpy_pad0(dst, src, ...) \ + sized_strscpy_pad(dst, src, sizeof(dst) + __must_be_array(dst)) +#define __strscpy_pad1(dst, src, size) sized_strscpy_pad(dst, src, size) + +/** + * strscpy - Copy a C-string into a sized buffer + * @dst: Where to copy the string to + * @src: Where to copy the string from + * @...: Size of destination buffer (optional) + * + * Copy the source string @src, or as much of it as fits, into the + * destination @dst buffer. The behavior is undefined if the string + * buffers overlap. The destination @dst buffer is always NUL terminated, + * unless it's zero-sized. + * + * The size argument @... is only required when @dst is not an array, or + * when the copy needs to be smaller than sizeof(@dst). + * + * Preferred to strncpy() since it always returns a valid string, and + * doesn't unnecessarily force the tail of the destination buffer to be + * zero padded. If padding is desired please use strscpy_pad(). + * + * Returns the number of characters copied in @dst (not including the + * trailing %NUL) or -E2BIG if @size is 0 or the copy from @src was + * truncated. + */ +#define strscpy(dst, src, ...) \ + CONCATENATE(__strscpy, COUNT_ARGS(__VA_ARGS__))(dst, src, __VA_ARGS__) + +#define sized_strscpy_pad(dest, src, count) ({ \ + char *__dst = (dest); \ + const char *__src = (src); \ + const size_t __count = (count); \ + ssize_t __wrote; \ + \ + __wrote = sized_strscpy(__dst, __src, __count); \ + if (__wrote >= 0 && __wrote < __count) \ + memset(__dst + __wrote + 1, 0, __count - __wrote - 1); \ + __wrote; \ +}) -/* Wraps calls to strscpy()/memset(), no arch specific code required */ -ssize_t strscpy_pad(char *dest, const char *src, size_t count); +/** + * strscpy_pad() - Copy a C-string into a sized buffer + * @dst: Where to copy the string to + * @src: Where to copy the string from + * @...: Size of destination buffer + * + * Copy the string, or as much of it as fits, into the dest buffer. The + * behavior is undefined if the string buffers overlap. The destination + * buffer is always %NUL terminated, unless it's zero-sized. + * + * If the source string is shorter than the destination buffer, the + * remaining bytes in the buffer will be filled with %NUL bytes. + * + * For full explanation of why you may want to consider using the + * 'strscpy' functions please see the function docstring for strscpy(). + * + * Returns: + * * The number of characters copied (not including the trailing %NULs) + * * -E2BIG if count is 0 or @src was truncated. + */ +#define strscpy_pad(dst, src, ...) \ + CONCATENATE(__strscpy_pad, COUNT_ARGS(__VA_ARGS__))(dst, src, __VA_ARGS__) #ifndef __HAVE_ARCH_STRCAT extern char * strcat(char *, const char *); @@ -217,10 +285,19 @@ extern char *kstrndup(const char *s, size_t len, gfp_t gfp); extern void *kmemdup(const void *src, size_t len, gfp_t gfp) __realloc_size(2); extern void *kvmemdup(const void *src, size_t len, gfp_t gfp) __realloc_size(2); extern char *kmemdup_nul(const char *s, size_t len, gfp_t gfp); +extern void *kmemdup_array(const void *src, size_t element_size, size_t count, gfp_t gfp); +/* lib/argv_split.c */ extern char **argv_split(gfp_t gfp, const char *str, int *argcp); extern void argv_free(char **argv); +/* lib/cmdline.c */ +extern int get_option(char **str, int *pint); +extern char *get_options(const char *str, int nints, int *ints); +extern unsigned long long memparse(const char *ptr, char **retptr); +extern bool parse_option_str(const char *str, const char *option); +extern char *next_arg(char *args, char **param, char **val); + extern bool sysfs_streq(const char *s1, const char *s2); int match_string(const char * const *array, size_t n, const char *string); int __sysfs_match_string(const char * const *array, size_t n, const char *s); diff --git a/include/linux/string_choices.h b/include/linux/string_choices.h index 3c1091941eb8..d9ebe20229f8 100644 --- a/include/linux/string_choices.h +++ b/include/linux/string_choices.h @@ -42,4 +42,15 @@ static inline const char *str_yes_no(bool v) return v ? "yes" : "no"; } +/** + * str_plural - Return the simple pluralization based on English counts + * @num: Number used for deciding pluralization + * + * If @num is 1, returns empty string, otherwise returns "s". + */ +static inline const char *str_plural(size_t num) +{ + return num == 1 ? "" : "s"; +} + #endif diff --git a/include/linux/string_helpers.h b/include/linux/string_helpers.h index 58fb1f90eda5..e93fbb5b0c01 100644 --- a/include/linux/string_helpers.h +++ b/include/linux/string_helpers.h @@ -17,14 +17,18 @@ static inline bool string_is_terminated(const char *s, int len) return memchr(s, '\0', len) ? true : false; } -/* Descriptions of the types of units to - * print in */ +/* Descriptions of the types of units to print in */ enum string_size_units { STRING_UNITS_10, /* use powers of 10^3 (standard SI) */ STRING_UNITS_2, /* use binary powers of 2^10 */ + STRING_UNITS_MASK = BIT(0), + + /* Modifiers */ + STRING_UNITS_NO_SPACE = BIT(30), + STRING_UNITS_NO_BYTES = BIT(31), }; -int string_get_size(u64 size, u64 blk_size, enum string_size_units units, +int string_get_size(u64 size, u64 blk_size, const enum string_size_units units, char *buf, int len); int parse_int_array_user(const char __user *from, size_t count, int **array); diff --git a/include/linux/sunrpc/svc.h b/include/linux/sunrpc/svc.h index 67cf1c9efd80..23617da0e565 100644 --- a/include/linux/sunrpc/svc.h +++ b/include/linux/sunrpc/svc.h @@ -339,7 +339,6 @@ struct svc_program { const struct svc_version **pg_vers; /* version array */ char * pg_name; /* service name */ char * pg_class; /* class name: services sharing authentication */ - struct svc_stat * pg_stats; /* rpc statistics */ enum svc_auth_status (*pg_authenticate)(struct svc_rqst *rqstp); __be32 (*pg_init_request)(struct svc_rqst *, const struct svc_program *, @@ -411,7 +410,9 @@ bool svc_rqst_replace_page(struct svc_rqst *rqstp, void svc_rqst_release_pages(struct svc_rqst *rqstp); void svc_rqst_free(struct svc_rqst *); void svc_exit_thread(struct svc_rqst *); -struct svc_serv * svc_create_pooled(struct svc_program *, unsigned int, +struct svc_serv * svc_create_pooled(struct svc_program *prog, + struct svc_stat *stats, + unsigned int bufsize, int (*threadfn)(void *data)); int svc_set_num_threads(struct svc_serv *, struct svc_pool *, int); int svc_pool_stats_open(struct svc_info *si, struct file *file); diff --git a/include/linux/sunrpc/svc_rdma.h b/include/linux/sunrpc/svc_rdma.h index e7595ae62fe2..24cd199dd6f3 100644 --- a/include/linux/sunrpc/svc_rdma.h +++ b/include/linux/sunrpc/svc_rdma.h @@ -203,6 +203,30 @@ struct svc_rdma_recv_ctxt { struct page *rc_pages[RPCSVC_MAXPAGES]; }; +/* + * State for sending a Write chunk. + * - Tracks progress of writing one chunk over all its segments + * - Stores arguments for the SGL constructor functions + */ +struct svc_rdma_write_info { + struct svcxprt_rdma *wi_rdma; + struct list_head wi_list; + + const struct svc_rdma_chunk *wi_chunk; + + /* write state of this chunk */ + unsigned int wi_seg_off; + unsigned int wi_seg_no; + + /* SGL constructor arguments */ + const struct xdr_buf *wi_xdr; + unsigned char *wi_base; + unsigned int wi_next_off; + + struct svc_rdma_chunk_ctxt wi_cc; + struct work_struct wi_work; +}; + struct svc_rdma_send_ctxt { struct llist_node sc_node; struct rpc_rdma_cid sc_cid; @@ -210,9 +234,15 @@ struct svc_rdma_send_ctxt { struct svcxprt_rdma *sc_rdma; struct ib_send_wr sc_send_wr; + struct ib_send_wr *sc_wr_chain; + int sc_sqecount; struct ib_cqe sc_cqe; struct xdr_buf sc_hdrbuf; struct xdr_stream sc_stream; + + struct list_head sc_write_info_list; + struct svc_rdma_write_info sc_reply_info; + void *sc_xprt_buf; int sc_page_count; int sc_cur_sge_no; @@ -236,18 +266,27 @@ extern void svc_rdma_release_ctxt(struct svc_xprt *xprt, void *ctxt); extern int svc_rdma_recvfrom(struct svc_rqst *); /* svc_rdma_rw.c */ +extern void svc_rdma_cc_init(struct svcxprt_rdma *rdma, + struct svc_rdma_chunk_ctxt *cc); extern void svc_rdma_destroy_rw_ctxts(struct svcxprt_rdma *rdma); extern void svc_rdma_cc_init(struct svcxprt_rdma *rdma, struct svc_rdma_chunk_ctxt *cc); extern void svc_rdma_cc_release(struct svcxprt_rdma *rdma, struct svc_rdma_chunk_ctxt *cc, enum dma_data_direction dir); -extern int svc_rdma_send_write_chunk(struct svcxprt_rdma *rdma, - const struct svc_rdma_chunk *chunk, - const struct xdr_buf *xdr); -extern int svc_rdma_send_reply_chunk(struct svcxprt_rdma *rdma, - const struct svc_rdma_recv_ctxt *rctxt, - const struct xdr_buf *xdr); +extern void svc_rdma_write_chunk_release(struct svcxprt_rdma *rdma, + struct svc_rdma_send_ctxt *ctxt); +extern void svc_rdma_reply_chunk_release(struct svcxprt_rdma *rdma, + struct svc_rdma_send_ctxt *ctxt); +extern int svc_rdma_prepare_write_list(struct svcxprt_rdma *rdma, + const struct svc_rdma_pcl *write_pcl, + struct svc_rdma_send_ctxt *sctxt, + const struct xdr_buf *xdr); +extern int svc_rdma_prepare_reply_chunk(struct svcxprt_rdma *rdma, + const struct svc_rdma_pcl *write_pcl, + const struct svc_rdma_pcl *reply_pcl, + struct svc_rdma_send_ctxt *sctxt, + const struct xdr_buf *xdr); extern int svc_rdma_process_read_list(struct svcxprt_rdma *rdma, struct svc_rqst *rqstp, struct svc_rdma_recv_ctxt *head); @@ -258,8 +297,8 @@ extern struct svc_rdma_send_ctxt * svc_rdma_send_ctxt_get(struct svcxprt_rdma *rdma); extern void svc_rdma_send_ctxt_put(struct svcxprt_rdma *rdma, struct svc_rdma_send_ctxt *ctxt); -extern int svc_rdma_send(struct svcxprt_rdma *rdma, - struct svc_rdma_send_ctxt *ctxt); +extern int svc_rdma_post_send(struct svcxprt_rdma *rdma, + struct svc_rdma_send_ctxt *ctxt); extern int svc_rdma_map_reply_msg(struct svcxprt_rdma *rdma, struct svc_rdma_send_ctxt *sctxt, const struct svc_rdma_pcl *write_pcl, diff --git a/include/linux/swap.h b/include/linux/swap.h index 4db00ddad261..378ab1cd23bd 100644 --- a/include/linux/swap.h +++ b/include/linux/swap.h @@ -298,7 +298,7 @@ struct swap_info_struct { unsigned int __percpu *cluster_next_cpu; /*percpu index for next allocation */ struct percpu_cluster __percpu *percpu_cluster; /* per cpu's swap location */ struct rb_root swap_extent_root;/* root of the swap extent rbtree */ - struct bdev_handle *bdev_handle;/* open handle of the bdev */ + struct file *bdev_file; /* open handle of the bdev */ struct block_device *bdev; /* swap device or bdev of swap file */ struct file *swap_file; /* seldom referenced */ unsigned int old_block_size; /* seldom referenced */ @@ -549,6 +549,11 @@ static inline int swap_duplicate(swp_entry_t swp) return 0; } +static inline int swapcache_prepare(swp_entry_t swp) +{ + return 0; +} + static inline void swap_free(swp_entry_t swp) { } diff --git a/include/linux/swiotlb.h b/include/linux/swiotlb.h index ecde0312dd52..ea23097e351f 100644 --- a/include/linux/swiotlb.h +++ b/include/linux/swiotlb.h @@ -120,6 +120,8 @@ struct io_tlb_pool { * debugfs. * @used_hiwater: The high water mark for total_used. Used only for reporting * in debugfs. + * @transient_nslabs: The total number of slots in all transient pools that + * are currently used across all areas. */ struct io_tlb_mem { struct io_tlb_pool defpool; @@ -137,6 +139,7 @@ struct io_tlb_mem { #ifdef CONFIG_DEBUG_FS atomic_long_t total_used; atomic_long_t used_hiwater; + atomic_long_t transient_nslabs; #endif }; diff --git a/include/linux/syscalls.h b/include/linux/syscalls.h index cdba4d0c6d4a..77eb9b0e7685 100644 --- a/include/linux/syscalls.h +++ b/include/linux/syscalls.h @@ -128,6 +128,7 @@ struct mnt_id_req; #define __TYPE_IS_LL(t) (__TYPE_AS(t, 0LL) || __TYPE_AS(t, 0ULL)) #define __SC_LONG(t, a) __typeof(__builtin_choose_expr(__TYPE_IS_LL(t), 0LL, 0L)) a #define __SC_CAST(t, a) (__force t) a +#define __SC_TYPE(t, a) t #define __SC_ARGS(t, a) a #define __SC_TEST(t, a) (void)BUILD_BUG_ON_ZERO(!__TYPE_IS_LL(t) && sizeof(t) > sizeof(long)) diff --git a/include/linux/tcp.h b/include/linux/tcp.h index 89b290d8c8dc..55399ee2a57e 100644 --- a/include/linux/tcp.h +++ b/include/linux/tcp.h @@ -221,8 +221,10 @@ struct tcp_sock { u32 lost_out; /* Lost packets */ u32 sacked_out; /* SACK'd packets */ u16 tcp_header_len; /* Bytes of tcp header to send */ + u8 scaling_ratio; /* see tcp_win_from_space() */ u8 chrono_type : 2, /* current chronograph type */ repair : 1, + tcp_usec_ts : 1, /* TSval values in usec */ is_sack_reneg:1, /* in recovery from loss with SACK reneg? */ is_cwnd_limited:1;/* forward progress limited by snd_cwnd? */ __cacheline_group_end(tcp_sock_read_txrx); @@ -262,10 +264,10 @@ struct tcp_sock { u32 pushed_seq; /* Last pushed seq, required to talk to windows */ u32 lsndtime; u32 mdev_us; /* medium deviation */ + u32 rtt_seq; /* sequence number to update rttvar */ u64 tcp_wstamp_ns; /* departure time for next sent data packet */ u64 tcp_clock_cache; /* cache last tcp_clock_ns() (see tcp_mstamp_refresh()) */ u64 tcp_mstamp; /* most recent packet received/sent */ - u32 rtt_seq; /* sequence number to update rttvar */ struct list_head tsorted_sent_queue; /* time-sorted sent but un-SACKed skbs */ struct sk_buff *highest_sack; /* skb just after the highest * skb with SACKed bit set @@ -302,7 +304,7 @@ struct tcp_sock { __cacheline_group_end(tcp_sock_write_txrx); /* RX read-write hotpath cache lines */ - __cacheline_group_begin(tcp_sock_write_rx); + __cacheline_group_begin(tcp_sock_write_rx) __aligned(8); u64 bytes_received; /* RFC4898 tcpEStatsAppHCThruOctetsReceived * sum(delta(rcv_nxt)), or how many bytes @@ -348,11 +350,9 @@ struct tcp_sock { u32 dsack_dups; /* RFC4898 tcpEStatsStackDSACKDups * total number of DSACK blocks received */ - u32 last_oow_ack_time; /* timestamp of last out-of-window ACK */ u32 compressed_ack_rcv_nxt; struct list_head tsq_node; /* anchor in tsq_tasklet.head list */ - u8 scaling_ratio; /* see tcp_win_from_space() */ /* Information of the most recently (s)acked skb */ struct tcp_rack { u64 mstamp; /* (Re)sent time of the skb */ @@ -368,8 +368,7 @@ struct tcp_sock { u8 compressed_ack; u8 dup_ack_counter:2, tlp_retrans:1, /* TLP is a retransmission */ - tcp_usec_ts:1, /* TSval values in usec */ - unused:4; + unused:5; u8 thin_lto : 1,/* Use linear timeouts for thin streams */ recvmsg_inq : 1,/* Indicate # of bytes in queue upon recvmsg */ fastopen_connect:1, /* FASTOPEN_CONNECT sockopt */ @@ -384,12 +383,12 @@ struct tcp_sock { syn_fastopen_ch:1, /* Active TFO re-enabling probe */ syn_data_acked:1;/* data in SYN is acked by SYN-ACK */ + u8 keepalive_probes; /* num of allowed keep alive probes */ u32 tcp_tx_delay; /* delay (in usec) added to TX packets */ /* RTT measurement */ u32 mdev_max_us; /* maximal mdev for the last rtt period */ - u8 keepalive_probes; /* num of allowed keep alive probes */ u32 reord_seen; /* number of data packet reordering events */ /* @@ -402,6 +401,7 @@ struct tcp_sock { u32 prior_cwnd; /* cwnd right before starting loss recovery */ u32 prr_delivered; /* Number of newly delivered packets to * receiver in Recovery. */ + u32 last_oow_ack_time; /* timestamp of last out-of-window ACK */ struct hrtimer pacing_timer; struct hrtimer compressed_ack_timer; @@ -477,8 +477,8 @@ struct tcp_sock { bool is_mptcp; #endif #if IS_ENABLED(CONFIG_SMC) - bool (*smc_hs_congested)(const struct sock *sk); bool syn_smc; /* SYN includes SMC */ + bool (*smc_hs_congested)(const struct sock *sk); #endif #if defined(CONFIG_TCP_MD5SIG) || defined(CONFIG_TCP_AO) diff --git a/include/linux/tee_drv.h b/include/linux/tee_drv.h index 911ddf92dcee..71632e3c5f18 100644 --- a/include/linux/tee_drv.h +++ b/include/linux/tee_drv.h @@ -482,7 +482,7 @@ static inline bool tee_param_is_memref(struct tee_param *param) } } -extern struct bus_type tee_bus_type; +extern const struct bus_type tee_bus_type; /** * struct tee_client_device - tee based device diff --git a/include/linux/tick.h b/include/linux/tick.h index 716d17f31c45..4924a33700b7 100644 --- a/include/linux/tick.h +++ b/include/linux/tick.h @@ -12,6 +12,7 @@ #include <linux/cpumask.h> #include <linux/sched.h> #include <linux/rcupdate.h> +#include <linux/static_key.h> #ifdef CONFIG_GENERIC_CLOCKEVENTS extern void __init tick_init(void); @@ -19,16 +20,22 @@ extern void __init tick_init(void); extern void tick_suspend_local(void); /* Should be core only, but XEN resume magic and ARM BL switcher require it */ extern void tick_resume_local(void); -extern void tick_handover_do_timer(void); extern void tick_cleanup_dead_cpu(int cpu); #else /* CONFIG_GENERIC_CLOCKEVENTS */ static inline void tick_init(void) { } static inline void tick_suspend_local(void) { } static inline void tick_resume_local(void) { } -static inline void tick_handover_do_timer(void) { } static inline void tick_cleanup_dead_cpu(int cpu) { } #endif /* !CONFIG_GENERIC_CLOCKEVENTS */ +#if defined(CONFIG_GENERIC_CLOCKEVENTS) && defined(CONFIG_HOTPLUG_CPU) +extern int tick_cpu_dying(unsigned int cpu); +extern void tick_assert_timekeeping_handover(void); +#else +#define tick_cpu_dying NULL +static inline void tick_assert_timekeeping_handover(void) { } +#endif + #if defined(CONFIG_GENERIC_CLOCKEVENTS) && defined(CONFIG_SUSPEND) extern void tick_freeze(void); extern void tick_unfreeze(void); @@ -63,18 +70,14 @@ enum tick_broadcast_state { TICK_BROADCAST_ENTER, }; +extern struct static_key_false arch_needs_tick_broadcast; + #ifdef CONFIG_GENERIC_CLOCKEVENTS_BROADCAST extern void tick_broadcast_control(enum tick_broadcast_mode mode); #else static inline void tick_broadcast_control(enum tick_broadcast_mode mode) { } #endif /* BROADCAST */ -#if defined(CONFIG_GENERIC_CLOCKEVENTS_BROADCAST) && defined(CONFIG_HOTPLUG_CPU) -extern void tick_offline_cpu(unsigned int cpu); -#else -static inline void tick_offline_cpu(unsigned int cpu) { } -#endif - #ifdef CONFIG_GENERIC_CLOCKEVENTS extern int tick_broadcast_oneshot_control(enum tick_broadcast_state state); #else @@ -164,9 +167,16 @@ static inline u64 get_cpu_idle_time_us(int cpu, u64 *unused) { return -1; } static inline u64 get_cpu_iowait_time_us(int cpu, u64 *unused) { return -1; } #endif /* !CONFIG_NO_HZ_COMMON */ +/* + * Mask of CPUs that are nohz_full. + * + * Users should be guarded by CONFIG_NO_HZ_FULL or a tick_nohz_full_cpu() + * check. + */ +extern cpumask_var_t tick_nohz_full_mask; + #ifdef CONFIG_NO_HZ_FULL extern bool tick_nohz_full_running; -extern cpumask_var_t tick_nohz_full_mask; static inline bool tick_nohz_full_enabled(void) { diff --git a/include/linux/timekeeping.h b/include/linux/timekeeping.h index 7c43e98cf211..7e50cbd97f86 100644 --- a/include/linux/timekeeping.h +++ b/include/linux/timekeeping.h @@ -268,15 +268,17 @@ struct system_device_crosststamp { }; /** - * struct system_counterval_t - system counter value with the pointer to the + * struct system_counterval_t - system counter value with the ID of the * corresponding clocksource * @cycles: System counter value - * @cs: Clocksource corresponding to system counter value. Used by - * timekeeping code to verify comparibility of two cycle values + * @cs_id: Clocksource ID corresponding to system counter value. Used by + * timekeeping code to verify comparability of two cycle values. + * The default ID, CSID_GENERIC, does not identify a specific + * clocksource. */ struct system_counterval_t { u64 cycles; - struct clocksource *cs; + enum clocksource_ids cs_id; }; /* diff --git a/include/linux/timer.h b/include/linux/timer.h index f18a2f1eb79e..14a633ba61d6 100644 --- a/include/linux/timer.h +++ b/include/linux/timer.h @@ -36,16 +36,10 @@ * workqueue locking issues. It's not meant for executing random crap * with interrupts disabled. Abuse is monitored! * - * @TIMER_PINNED: A pinned timer will not be affected by any timer - * placement heuristics (like, NOHZ) and will always expire on the CPU - * on which the timer was enqueued. - * - * Note: Because enqueuing of timers can migrate the timer from one - * CPU to another, pinned timers are not guaranteed to stay on the - * initialy selected CPU. They move to the CPU on which the enqueue - * function is invoked via mod_timer() or add_timer(). If the timer - * should be placed on a particular CPU, then add_timer_on() has to be - * used. + * @TIMER_PINNED: A pinned timer will always expire on the CPU on which the + * timer was enqueued. When a particular CPU is required, add_timer_on() + * has to be used. Enqueue via mod_timer() and add_timer() is always done + * on the local CPU. */ #define TIMER_CPUMASK 0x0003FFFF #define TIMER_MIGRATING 0x00040000 @@ -165,6 +159,8 @@ extern int timer_reduce(struct timer_list *timer, unsigned long expires); #define NEXT_TIMER_MAX_DELTA ((1UL << 30) - 1) extern void add_timer(struct timer_list *timer); +extern void add_timer_local(struct timer_list *timer); +extern void add_timer_global(struct timer_list *timer); extern int try_to_del_timer_sync(struct timer_list *timer); extern int timer_delete_sync(struct timer_list *timer); diff --git a/include/linux/trace_seq.h b/include/linux/trace_seq.h index 9ec229dfddaa..1ef95c0287f0 100644 --- a/include/linux/trace_seq.h +++ b/include/linux/trace_seq.h @@ -9,9 +9,15 @@ /* * Trace sequences are used to allow a function to call several other functions * to create a string of data to use. + * + * Have the trace seq to be 8K which is typically PAGE_SIZE * 2 on + * most architectures. The TRACE_SEQ_BUFFER_SIZE (which is + * TRACE_SEQ_SIZE minus the other fields of trace_seq), is the + * max size the output of a trace event may be. */ -#define TRACE_SEQ_BUFFER_SIZE (PAGE_SIZE * 2 - \ +#define TRACE_SEQ_SIZE 8192 +#define TRACE_SEQ_BUFFER_SIZE (TRACE_SEQ_SIZE - \ (sizeof(struct seq_buf) + sizeof(size_t) + sizeof(int))) struct trace_seq { diff --git a/include/linux/udp.h b/include/linux/udp.h index d04188714dca..3748e82b627b 100644 --- a/include/linux/udp.h +++ b/include/linux/udp.h @@ -92,6 +92,9 @@ struct udp_sock { /* This fields follows rcvbuf value, and is touched by udp_recvmsg */ int forward_threshold; + + /* Cache friendly copy of sk->sk_peek_off >= 0 */ + bool peeking_with_offset; }; #define udp_test_bit(nr, sk) \ @@ -109,6 +112,13 @@ struct udp_sock { #define udp_sk(ptr) container_of_const(ptr, struct udp_sock, inet.sk) +static inline int udp_set_peek_off(struct sock *sk, int val) +{ + sk_set_peek_off(sk, val); + WRITE_ONCE(udp_sk(sk)->peeking_with_offset, val >= 0); + return 0; +} + static inline void udp_set_no_check6_tx(struct sock *sk, bool val) { udp_assign_bit(NO_CHECK6_TX, sk, val); diff --git a/include/linux/uio.h b/include/linux/uio.h index bea9c89922d9..00cebe2b70de 100644 --- a/include/linux/uio.h +++ b/include/linux/uio.h @@ -40,7 +40,6 @@ struct iov_iter_state { struct iov_iter { u8 iter_type; - bool copy_mc; bool nofault; bool data_source; size_t iov_offset; @@ -248,22 +247,8 @@ size_t _copy_from_iter_flushcache(void *addr, size_t bytes, struct iov_iter *i); #ifdef CONFIG_ARCH_HAS_COPY_MC size_t _copy_mc_to_iter(const void *addr, size_t bytes, struct iov_iter *i); -static inline void iov_iter_set_copy_mc(struct iov_iter *i) -{ - i->copy_mc = true; -} - -static inline bool iov_iter_is_copy_mc(const struct iov_iter *i) -{ - return i->copy_mc; -} #else #define _copy_mc_to_iter _copy_to_iter -static inline void iov_iter_set_copy_mc(struct iov_iter *i) { } -static inline bool iov_iter_is_copy_mc(const struct iov_iter *i) -{ - return false; -} #endif size_t iov_iter_zero(size_t bytes, struct iov_iter *); @@ -355,7 +340,6 @@ static inline void iov_iter_ubuf(struct iov_iter *i, unsigned int direction, WARN_ON(direction & ~(READ | WRITE)); *i = (struct iov_iter) { .iter_type = ITER_UBUF, - .copy_mc = false, .data_source = direction, .ubuf = buf, .count = count, diff --git a/include/linux/units.h b/include/linux/units.h index 45110daaf8d3..00e15de33eca 100644 --- a/include/linux/units.h +++ b/include/linux/units.h @@ -24,10 +24,13 @@ #define NANOHZ_PER_HZ 1000000000UL #define MICROHZ_PER_HZ 1000000UL #define MILLIHZ_PER_HZ 1000UL + #define HZ_PER_KHZ 1000UL -#define KHZ_PER_MHZ 1000UL #define HZ_PER_MHZ 1000000UL +#define KHZ_PER_MHZ 1000UL +#define KHZ_PER_GHZ 1000000UL + #define MILLIWATT_PER_WATT 1000UL #define MICROWATT_PER_MILLIWATT 1000UL #define MICROWATT_PER_WATT 1000000UL diff --git a/include/linux/usb/gadget.h b/include/linux/usb/gadget.h index a771ccc038ac..6532beb587b1 100644 --- a/include/linux/usb/gadget.h +++ b/include/linux/usb/gadget.h @@ -236,7 +236,6 @@ struct usb_ep { unsigned max_streams:16; unsigned mult:2; unsigned maxburst:5; - unsigned fifo_mode:1; u8 address; const struct usb_endpoint_descriptor *desc; const struct usb_ss_ep_comp_descriptor *comp_desc; diff --git a/include/linux/usb/hcd.h b/include/linux/usb/hcd.h index cd77fc6095a1..ac95e7c89df5 100644 --- a/include/linux/usb/hcd.h +++ b/include/linux/usb/hcd.h @@ -55,7 +55,7 @@ struct giveback_urb_bh { bool high_prio; spinlock_t lock; struct list_head head; - struct tasklet_struct bh; + struct work_struct bh; struct usb_host_endpoint *completing_ep; }; diff --git a/include/linux/vmalloc.h b/include/linux/vmalloc.h index c720be70c8dd..0f72c85a377b 100644 --- a/include/linux/vmalloc.h +++ b/include/linux/vmalloc.h @@ -35,6 +35,7 @@ struct iov_iter; /* in uio.h */ #else #define VM_DEFER_KMEMLEAK 0 #endif +#define VM_SPARSE 0x00001000 /* sparse vm_area. not all pages are present. */ /* bits [20..32] reserved for arch specific ioremap internals */ @@ -232,6 +233,10 @@ static inline bool is_vm_area_hugepages(const void *addr) } #ifdef CONFIG_MMU +int vm_area_map_pages(struct vm_struct *area, unsigned long start, + unsigned long end, struct page **pages); +void vm_area_unmap_pages(struct vm_struct *area, unsigned long start, + unsigned long end); void vunmap_range(unsigned long addr, unsigned long end); static inline void set_vm_flush_reset_perms(void *addr) { diff --git a/include/linux/wordpart.h b/include/linux/wordpart.h new file mode 100644 index 000000000000..f6f8f83b15b0 --- /dev/null +++ b/include/linux/wordpart.h @@ -0,0 +1,42 @@ +/* SPDX-License-Identifier: GPL-2.0 */ + +#ifndef _LINUX_WORDPART_H +#define _LINUX_WORDPART_H + +/** + * upper_32_bits - return bits 32-63 of a number + * @n: the number we're accessing + * + * A basic shift-right of a 64- or 32-bit quantity. Use this to suppress + * the "right shift count >= width of type" warning when that quantity is + * 32-bits. + */ +#define upper_32_bits(n) ((u32)(((n) >> 16) >> 16)) + +/** + * lower_32_bits - return bits 0-31 of a number + * @n: the number we're accessing + */ +#define lower_32_bits(n) ((u32)((n) & 0xffffffff)) + +/** + * upper_16_bits - return bits 16-31 of a number + * @n: the number we're accessing + */ +#define upper_16_bits(n) ((u16)((n) >> 16)) + +/** + * lower_16_bits - return bits 0-15 of a number + * @n: the number we're accessing + */ +#define lower_16_bits(n) ((u16)((n) & 0xffff)) + +/** + * REPEAT_BYTE - repeat the value @x multiple times as an unsigned long value + * @x: value to repeat + * + * NOTE: @x is not checked for > 0xff; larger values produce odd results. + */ +#define REPEAT_BYTE(x) ((~0ul / 0xff) * (x)) + +#endif // _LINUX_WORDPART_H diff --git a/include/linux/workqueue.h b/include/linux/workqueue.h index 2cc0a9606175..158784dd189a 100644 --- a/include/linux/workqueue.h +++ b/include/linux/workqueue.h @@ -22,20 +22,54 @@ */ #define work_data_bits(work) ((unsigned long *)(&(work)->data)) -enum { +enum work_bits { WORK_STRUCT_PENDING_BIT = 0, /* work item is pending execution */ - WORK_STRUCT_INACTIVE_BIT= 1, /* work item is inactive */ - WORK_STRUCT_PWQ_BIT = 2, /* data points to pwq */ - WORK_STRUCT_LINKED_BIT = 3, /* next work is linked to this one */ + WORK_STRUCT_INACTIVE_BIT, /* work item is inactive */ + WORK_STRUCT_PWQ_BIT, /* data points to pwq */ + WORK_STRUCT_LINKED_BIT, /* next work is linked to this one */ #ifdef CONFIG_DEBUG_OBJECTS_WORK - WORK_STRUCT_STATIC_BIT = 4, /* static initializer (debugobjects) */ - WORK_STRUCT_COLOR_SHIFT = 5, /* color for workqueue flushing */ -#else - WORK_STRUCT_COLOR_SHIFT = 4, /* color for workqueue flushing */ + WORK_STRUCT_STATIC_BIT, /* static initializer (debugobjects) */ #endif + WORK_STRUCT_FLAG_BITS, + /* color for workqueue flushing */ + WORK_STRUCT_COLOR_SHIFT = WORK_STRUCT_FLAG_BITS, WORK_STRUCT_COLOR_BITS = 4, + /* + * When WORK_STRUCT_PWQ is set, reserve 8 bits off of pwq pointer w/ + * debugobjects turned off. This makes pwqs aligned to 256 bytes (512 + * bytes w/ DEBUG_OBJECTS_WORK) and allows 16 workqueue flush colors. + * + * MSB + * [ pwq pointer ] [ flush color ] [ STRUCT flags ] + * 4 bits 4 or 5 bits + */ + WORK_STRUCT_PWQ_SHIFT = WORK_STRUCT_COLOR_SHIFT + WORK_STRUCT_COLOR_BITS, + + /* + * data contains off-queue information when !WORK_STRUCT_PWQ. + * + * MSB + * [ pool ID ] [ OFFQ flags ] [ STRUCT flags ] + * 1 bit 4 or 5 bits + */ + WORK_OFFQ_FLAG_SHIFT = WORK_STRUCT_FLAG_BITS, + WORK_OFFQ_CANCELING_BIT = WORK_OFFQ_FLAG_SHIFT, + WORK_OFFQ_FLAG_END, + WORK_OFFQ_FLAG_BITS = WORK_OFFQ_FLAG_END - WORK_OFFQ_FLAG_SHIFT, + + /* + * When a work item is off queue, the high bits encode off-queue flags + * and the last pool it was on. Cap pool ID to 31 bits and use the + * highest number to indicate that no pool is associated. + */ + WORK_OFFQ_POOL_SHIFT = WORK_OFFQ_FLAG_SHIFT + WORK_OFFQ_FLAG_BITS, + WORK_OFFQ_LEFT = BITS_PER_LONG - WORK_OFFQ_POOL_SHIFT, + WORK_OFFQ_POOL_BITS = WORK_OFFQ_LEFT <= 31 ? WORK_OFFQ_LEFT : 31, +}; + +enum work_flags { WORK_STRUCT_PENDING = 1 << WORK_STRUCT_PENDING_BIT, WORK_STRUCT_INACTIVE = 1 << WORK_STRUCT_INACTIVE_BIT, WORK_STRUCT_PWQ = 1 << WORK_STRUCT_PWQ_BIT, @@ -45,35 +79,14 @@ enum { #else WORK_STRUCT_STATIC = 0, #endif +}; +enum wq_misc_consts { WORK_NR_COLORS = (1 << WORK_STRUCT_COLOR_BITS), /* not bound to any CPU, prefer the local CPU */ WORK_CPU_UNBOUND = NR_CPUS, - /* - * Reserve 8 bits off of pwq pointer w/ debugobjects turned off. - * This makes pwqs aligned to 256 bytes and allows 16 workqueue - * flush colors. - */ - WORK_STRUCT_FLAG_BITS = WORK_STRUCT_COLOR_SHIFT + - WORK_STRUCT_COLOR_BITS, - - /* data contains off-queue information when !WORK_STRUCT_PWQ */ - WORK_OFFQ_FLAG_BASE = WORK_STRUCT_COLOR_SHIFT, - - __WORK_OFFQ_CANCELING = WORK_OFFQ_FLAG_BASE, - - /* - * When a work item is off queue, its high bits point to the last - * pool it was on. Cap at 31 bits and use the highest number to - * indicate that no pool is associated. - */ - WORK_OFFQ_FLAG_BITS = 1, - WORK_OFFQ_POOL_SHIFT = WORK_OFFQ_FLAG_BASE + WORK_OFFQ_FLAG_BITS, - WORK_OFFQ_LEFT = BITS_PER_LONG - WORK_OFFQ_POOL_SHIFT, - WORK_OFFQ_POOL_BITS = WORK_OFFQ_LEFT <= 31 ? WORK_OFFQ_LEFT : 31, - /* bit mask for work_busy() return values */ WORK_BUSY_PENDING = 1 << 0, WORK_BUSY_RUNNING = 1 << 1, @@ -83,12 +96,10 @@ enum { }; /* Convenience constants - of type 'unsigned long', not 'enum'! */ -#define WORK_OFFQ_CANCELING (1ul << __WORK_OFFQ_CANCELING) +#define WORK_OFFQ_CANCELING (1ul << WORK_OFFQ_CANCELING_BIT) #define WORK_OFFQ_POOL_NONE ((1ul << WORK_OFFQ_POOL_BITS) - 1) #define WORK_STRUCT_NO_POOL (WORK_OFFQ_POOL_NONE << WORK_OFFQ_POOL_SHIFT) - -#define WORK_STRUCT_FLAG_MASK ((1ul << WORK_STRUCT_FLAG_BITS) - 1) -#define WORK_STRUCT_WQ_DATA_MASK (~WORK_STRUCT_FLAG_MASK) +#define WORK_STRUCT_PWQ_MASK (~((1ul << WORK_STRUCT_PWQ_SHIFT) - 1)) #define WORK_DATA_INIT() ATOMIC_LONG_INIT((unsigned long)WORK_STRUCT_NO_POOL) #define WORK_DATA_STATIC_INIT() \ @@ -347,7 +358,8 @@ static inline unsigned int work_static(struct work_struct *work) { return 0; } * Workqueue flags and constants. For details, please refer to * Documentation/core-api/workqueue.rst. */ -enum { +enum wq_flags { + WQ_BH = 1 << 0, /* execute in bottom half (softirq) context */ WQ_UNBOUND = 1 << 1, /* not bound to any cpu */ WQ_FREEZABLE = 1 << 2, /* freeze during suspend */ WQ_MEM_RECLAIM = 1 << 3, /* may be used for memory reclaim */ @@ -386,11 +398,22 @@ enum { __WQ_DRAINING = 1 << 16, /* internal: workqueue is draining */ __WQ_ORDERED = 1 << 17, /* internal: workqueue is ordered */ __WQ_LEGACY = 1 << 18, /* internal: create*_workqueue() */ - __WQ_ORDERED_EXPLICIT = 1 << 19, /* internal: alloc_ordered_workqueue() */ + /* BH wq only allows the following flags */ + __WQ_BH_ALLOWS = WQ_BH | WQ_HIGHPRI, +}; + +enum wq_consts { WQ_MAX_ACTIVE = 512, /* I like 512, better ideas? */ WQ_UNBOUND_MAX_ACTIVE = WQ_MAX_ACTIVE, WQ_DFL_ACTIVE = WQ_MAX_ACTIVE / 2, + + /* + * Per-node default cap on min_active. Unless explicitly set, min_active + * is set to min(max_active, WQ_DFL_MIN_ACTIVE). For more details, see + * workqueue_struct->min_active definition. + */ + WQ_DFL_MIN_ACTIVE = 8, }; /* @@ -420,6 +443,9 @@ enum { * they are same as their non-power-efficient counterparts - e.g. * system_power_efficient_wq is identical to system_wq if * 'wq_power_efficient' is disabled. See WQ_POWER_EFFICIENT for more info. + * + * system_bh[_highpri]_wq are convenience interface to softirq. BH work items + * are executed in the queueing CPU's BH context in the queueing order. */ extern struct workqueue_struct *system_wq; extern struct workqueue_struct *system_highpri_wq; @@ -428,16 +454,43 @@ extern struct workqueue_struct *system_unbound_wq; extern struct workqueue_struct *system_freezable_wq; extern struct workqueue_struct *system_power_efficient_wq; extern struct workqueue_struct *system_freezable_power_efficient_wq; +extern struct workqueue_struct *system_bh_wq; +extern struct workqueue_struct *system_bh_highpri_wq; + +void workqueue_softirq_action(bool highpri); +void workqueue_softirq_dead(unsigned int cpu); /** * alloc_workqueue - allocate a workqueue * @fmt: printf format for the name of the workqueue * @flags: WQ_* flags - * @max_active: max in-flight work items per CPU, 0 for default + * @max_active: max in-flight work items, 0 for default * remaining args: args for @fmt * - * Allocate a workqueue with the specified parameters. For detailed - * information on WQ_* flags, please refer to + * For a per-cpu workqueue, @max_active limits the number of in-flight work + * items for each CPU. e.g. @max_active of 1 indicates that each CPU can be + * executing at most one work item for the workqueue. + * + * For unbound workqueues, @max_active limits the number of in-flight work items + * for the whole system. e.g. @max_active of 16 indicates that that there can be + * at most 16 work items executing for the workqueue in the whole system. + * + * As sharing the same active counter for an unbound workqueue across multiple + * NUMA nodes can be expensive, @max_active is distributed to each NUMA node + * according to the proportion of the number of online CPUs and enforced + * independently. + * + * Depending on online CPU distribution, a node may end up with per-node + * max_active which is significantly lower than @max_active, which can lead to + * deadlocks if the per-node concurrency limit is lower than the maximum number + * of interdependent work items for the workqueue. + * + * To guarantee forward progress regardless of online CPU distribution, the + * concurrency limit on every node is guaranteed to be equal to or greater than + * min_active which is set to min(@max_active, %WQ_DFL_MIN_ACTIVE). This means + * that the sum of per-node max_active's may be larger than @max_active. + * + * For detailed information on %WQ_* flags, please refer to * Documentation/core-api/workqueue.rst. * * RETURNS: @@ -460,8 +513,7 @@ alloc_workqueue(const char *fmt, unsigned int flags, int max_active, ...); * Pointer to the allocated workqueue on success, %NULL on failure. */ #define alloc_ordered_workqueue(fmt, flags, args...) \ - alloc_workqueue(fmt, WQ_UNBOUND | __WQ_ORDERED | \ - __WQ_ORDERED_EXPLICIT | (flags), 1, ##args) + alloc_workqueue(fmt, WQ_UNBOUND | __WQ_ORDERED | (flags), 1, ##args) #define create_workqueue(name) \ alloc_workqueue("%s", __WQ_LEGACY | WQ_MEM_RECLAIM, 1, (name)) @@ -471,6 +523,9 @@ alloc_workqueue(const char *fmt, unsigned int flags, int max_active, ...); #define create_singlethread_workqueue(name) \ alloc_ordered_workqueue("%s", __WQ_LEGACY | WQ_MEM_RECLAIM, name) +#define from_work(var, callback_work, work_fieldname) \ + container_of(callback_work, typeof(*var), work_fieldname) + extern void destroy_workqueue(struct workqueue_struct *wq); struct workqueue_attrs *alloc_workqueue_attrs(void); @@ -508,6 +563,8 @@ extern bool flush_rcu_work(struct rcu_work *rwork); extern void workqueue_set_max_active(struct workqueue_struct *wq, int max_active); +extern void workqueue_set_min_active(struct workqueue_struct *wq, + int min_active); extern struct work_struct *current_work(void); extern bool current_is_workqueue_rescuer(void); extern bool workqueue_congested(int cpu, struct workqueue_struct *wq); diff --git a/include/linux/wwan.h b/include/linux/wwan.h index 01fa15506286..170fdee6339c 100644 --- a/include/linux/wwan.h +++ b/include/linux/wwan.h @@ -16,6 +16,7 @@ * @WWAN_PORT_QCDM: Qcom Modem diagnostic interface * @WWAN_PORT_FIREHOSE: XML based command protocol * @WWAN_PORT_XMMRPC: Control protocol for Intel XMM modems + * @WWAN_PORT_FASTBOOT: Fastboot protocol control * * @WWAN_PORT_MAX: Highest supported port types * @WWAN_PORT_UNKNOWN: Special value to indicate an unknown port type @@ -28,6 +29,7 @@ enum wwan_port_type { WWAN_PORT_QCDM, WWAN_PORT_FIREHOSE, WWAN_PORT_XMMRPC, + WWAN_PORT_FASTBOOT, /* Add new port types above this line */ |