diff options
Diffstat (limited to 'include')
85 files changed, 1489 insertions, 477 deletions
diff --git a/include/acpi/acpi_bus.h b/include/acpi/acpi_bus.h index e7d27373ff71..c09d72986968 100644 --- a/include/acpi/acpi_bus.h +++ b/include/acpi/acpi_bus.h @@ -365,7 +365,6 @@ struct acpi_device { int device_type; acpi_handle handle; /* no handle for fixed hardware */ struct fwnode_handle fwnode; - struct acpi_device *parent; struct list_head wakeup_list; struct list_head del_list; struct acpi_device_status status; @@ -458,6 +457,14 @@ static inline void *acpi_driver_data(struct acpi_device *d) #define to_acpi_device(d) container_of(d, struct acpi_device, dev) #define to_acpi_driver(d) container_of(d, struct acpi_driver, drv) +static inline struct acpi_device *acpi_dev_parent(struct acpi_device *adev) +{ + if (adev->dev.parent) + return to_acpi_device(adev->dev.parent); + + return NULL; +} + static inline void acpi_set_device_status(struct acpi_device *adev, u32 sta) { *((u32 *)&adev->status) = sta; @@ -512,7 +519,6 @@ extern int unregister_acpi_notifier(struct notifier_block *); * External Functions */ -struct acpi_device *acpi_fetch_acpi_dev(acpi_handle handle); acpi_status acpi_bus_get_status_handle(acpi_handle handle, unsigned long long *sta); int acpi_bus_get_status(struct acpi_device *device); @@ -613,8 +619,7 @@ enum dev_dma_attr acpi_get_dma_attr(struct acpi_device *adev); int acpi_iommu_fwspec_init(struct device *dev, u32 id, struct fwnode_handle *fwnode, const struct iommu_ops *ops); -int acpi_dma_get_range(struct device *dev, u64 *dma_addr, u64 *offset, - u64 *size); +int acpi_dma_get_range(struct device *dev, const struct bus_dma_region **map); int acpi_dma_configure_id(struct device *dev, enum dev_dma_attr attr, const u32 *input_id); static inline int acpi_dma_configure(struct device *dev, @@ -733,10 +738,24 @@ static inline bool acpi_device_can_poweroff(struct acpi_device *adev) } bool acpi_dev_hid_uid_match(struct acpi_device *adev, const char *hid2, const char *uid2); +int acpi_dev_uid_to_integer(struct acpi_device *adev, u64 *integer); void acpi_dev_clear_dependencies(struct acpi_device *supplier); bool acpi_dev_ready_for_enumeration(const struct acpi_device *device); -struct acpi_device *acpi_dev_get_first_consumer_dev(struct acpi_device *supplier); +struct acpi_device *acpi_dev_get_next_consumer_dev(struct acpi_device *supplier, + struct acpi_device *start); + +/** + * for_each_acpi_consumer_dev - iterate over the consumer ACPI devices for a + * given supplier + * @supplier: Pointer to the supplier's ACPI device + * @consumer: Pointer to &struct acpi_device to hold the consumer, initially NULL + */ +#define for_each_acpi_consumer_dev(supplier, consumer) \ + for (consumer = acpi_dev_get_next_consumer_dev(supplier, NULL); \ + consumer; \ + consumer = acpi_dev_get_next_consumer_dev(supplier, consumer)) + struct acpi_device * acpi_dev_get_next_match_dev(struct acpi_device *adev, const char *hid, const char *uid, s64 hrv); struct acpi_device * @@ -767,9 +786,10 @@ static inline void acpi_dev_put(struct acpi_device *adev) put_device(&adev->dev); } -struct acpi_device *acpi_bus_get_acpi_device(acpi_handle handle); +struct acpi_device *acpi_fetch_acpi_dev(acpi_handle handle); +struct acpi_device *acpi_get_acpi_dev(acpi_handle handle); -static inline void acpi_bus_put_acpi_device(struct acpi_device *adev) +static inline void acpi_put_acpi_dev(struct acpi_device *adev) { acpi_dev_put(adev); } diff --git a/include/acpi/cppc_acpi.h b/include/acpi/cppc_acpi.h index f73d357ecdf5..c5614444031f 100644 --- a/include/acpi/cppc_acpi.h +++ b/include/acpi/cppc_acpi.h @@ -140,6 +140,7 @@ extern int cppc_get_perf_ctrs(int cpu, struct cppc_perf_fb_ctrs *perf_fb_ctrs); extern int cppc_set_perf(int cpu, struct cppc_perf_ctrls *perf_ctrls); extern int cppc_set_enable(int cpu, bool enable); extern int cppc_get_perf_caps(int cpu, struct cppc_perf_caps *caps); +extern bool cppc_perf_ctrs_in_pcc(void); extern bool acpi_cpc_valid(void); extern bool cppc_allow_fast_switch(void); extern int acpi_get_psd_map(unsigned int cpu, struct cppc_cpudata *cpu_data); @@ -173,6 +174,10 @@ static inline int cppc_get_perf_caps(int cpu, struct cppc_perf_caps *caps) { return -ENOTSUPP; } +static inline bool cppc_perf_ctrs_in_pcc(void) +{ + return false; +} static inline bool acpi_cpc_valid(void) { return false; diff --git a/include/asm-generic/bug.h b/include/asm-generic/bug.h index ba1f860af38b..4050b191e1a9 100644 --- a/include/asm-generic/bug.h +++ b/include/asm-generic/bug.h @@ -220,22 +220,6 @@ extern __printf(1, 2) void __warn_printk(const char *fmt, ...); # define WARN_ON_SMP(x) ({0;}) #endif -/* - * WARN_ON_FUNCTION_MISMATCH() warns if a value doesn't match a - * function address, and can be useful for catching issues with - * callback functions, for example. - * - * With CONFIG_CFI_CLANG, the warning is disabled because the - * compiler replaces function addresses taken in C code with - * local jump table addresses, which breaks cross-module function - * address equality. - */ -#if defined(CONFIG_CFI_CLANG) && defined(CONFIG_MODULES) -# define WARN_ON_FUNCTION_MISMATCH(x, fn) ({ 0; }) -#else -# define WARN_ON_FUNCTION_MISMATCH(x, fn) WARN_ON_ONCE((x) != (fn)) -#endif - #endif /* __ASSEMBLY__ */ #endif diff --git a/include/asm-generic/vmlinux.lds.h b/include/asm-generic/vmlinux.lds.h index 7c90b1ab3e00..a232f518d98a 100644 --- a/include/asm-generic/vmlinux.lds.h +++ b/include/asm-generic/vmlinux.lds.h @@ -154,6 +154,14 @@ #define MEM_DISCARD(sec) *(.mem##sec) #endif +#ifndef CONFIG_HAVE_DYNAMIC_FTRACE_NO_PATCHABLE +#define KEEP_PATCHABLE KEEP(*(__patchable_function_entries)) +#define PATCHABLE_DISCARDS +#else +#define KEEP_PATCHABLE +#define PATCHABLE_DISCARDS *(__patchable_function_entries) +#endif + #ifdef CONFIG_FTRACE_MCOUNT_RECORD /* * The ftrace call sites are logged to a section whose name depends on the @@ -172,7 +180,7 @@ #define MCOUNT_REC() . = ALIGN(8); \ __start_mcount_loc = .; \ KEEP(*(__mcount_loc)) \ - KEEP(*(__patchable_function_entries)) \ + KEEP_PATCHABLE \ __stop_mcount_loc = .; \ ftrace_stub_graph = ftrace_stub; \ ftrace_ops_list_func = arch_ftrace_ops_list_func; @@ -422,6 +430,22 @@ #endif /* + * .kcfi_traps contains a list KCFI trap locations. + */ +#ifndef KCFI_TRAPS +#ifdef CONFIG_ARCH_USES_CFI_TRAPS +#define KCFI_TRAPS \ + __kcfi_traps : AT(ADDR(__kcfi_traps) - LOAD_OFFSET) { \ + __start___kcfi_traps = .; \ + KEEP(*(.kcfi_traps)) \ + __stop___kcfi_traps = .; \ + } +#else +#define KCFI_TRAPS +#endif +#endif + +/* * Read only Data */ #define RO_DATA(align) \ @@ -529,6 +553,8 @@ __stop___modver = .; \ } \ \ + KCFI_TRAPS \ + \ RO_EXCEPTION_TABLE \ NOTES \ BTF \ @@ -538,20 +564,6 @@ /* - * .text..L.cfi.jumptable.* contain Control-Flow Integrity (CFI) - * jump table entries. - */ -#ifdef CONFIG_CFI_CLANG -#define TEXT_CFI_JT \ - ALIGN_FUNCTION(); \ - __cfi_jt_start = .; \ - *(.text..L.cfi.jumptable .text..L.cfi.jumptable.*) \ - __cfi_jt_end = .; -#else -#define TEXT_CFI_JT -#endif - -/* * Non-instrumentable text section */ #define NOINSTR_TEXT \ @@ -578,7 +590,6 @@ *(.text..refcount) \ *(.ref.text) \ *(.text.asan.* .text.tsan.*) \ - TEXT_CFI_JT \ MEM_KEEP(init.text*) \ MEM_KEEP(exit.text*) \ @@ -1007,8 +1018,7 @@ * keep any .init_array.* sections. * https://bugs.llvm.org/show_bug.cgi?id=46478 */ -#if defined(CONFIG_GCOV_KERNEL) || defined(CONFIG_KASAN_GENERIC) || defined(CONFIG_KCSAN) || \ - defined(CONFIG_CFI_CLANG) +#if defined(CONFIG_GCOV_KERNEL) || defined(CONFIG_KASAN_GENERIC) || defined(CONFIG_KCSAN) # ifdef CONFIG_CONSTRUCTORS # define SANITIZER_DISCARDS \ *(.eh_frame) @@ -1023,6 +1033,7 @@ #define COMMON_DISCARDS \ SANITIZER_DISCARDS \ + PATCHABLE_DISCARDS \ *(.discard) \ *(.discard.*) \ *(.modinfo) \ diff --git a/include/linux/a.out.h b/include/linux/a.out.h deleted file mode 100644 index 600cf45645c6..000000000000 --- a/include/linux/a.out.h +++ /dev/null @@ -1,18 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0 */ -#ifndef __A_OUT_GNU_H__ -#define __A_OUT_GNU_H__ - -#include <uapi/linux/a.out.h> - -#ifndef __ASSEMBLY__ -#ifdef linux -#include <asm/page.h> -#if defined(__i386__) || defined(__mc68000__) -#else -#ifndef SEGMENT_SIZE -#define SEGMENT_SIZE PAGE_SIZE -#endif -#endif -#endif -#endif /*__ASSEMBLY__ */ -#endif /* __A_OUT_GNU_H__ */ diff --git a/include/linux/acpi.h b/include/linux/acpi.h index 6f64b2f3dc54..2f9193b8dfc1 100644 --- a/include/linux/acpi.h +++ b/include/linux/acpi.h @@ -279,14 +279,17 @@ acpi_numa_processor_affinity_init(struct acpi_srat_cpu_affinity *pa) { } void acpi_numa_x2apic_affinity_init(struct acpi_srat_x2apic_cpu_affinity *pa); +#if defined(CONFIG_ARM64) || defined(CONFIG_LOONGARCH) +void acpi_arch_dma_setup(struct device *dev); +#else +static inline void acpi_arch_dma_setup(struct device *dev) { } +#endif + #ifdef CONFIG_ARM64 void acpi_numa_gicc_affinity_init(struct acpi_srat_gicc_affinity *pa); -void acpi_arch_dma_setup(struct device *dev, u64 *dma_addr, u64 *dma_size); #else static inline void acpi_numa_gicc_affinity_init(struct acpi_srat_gicc_affinity *pa) { } -static inline void -acpi_arch_dma_setup(struct device *dev, u64 *dma_addr, u64 *dma_size) { } #endif int acpi_numa_memory_affinity_init (struct acpi_srat_mem_affinity *ma); @@ -506,6 +509,7 @@ int acpi_dev_get_resources(struct acpi_device *adev, struct list_head *list, void *preproc_data); int acpi_dev_get_dma_resources(struct acpi_device *adev, struct list_head *list); +int acpi_dev_get_memory_resources(struct acpi_device *adev, struct list_head *list); int acpi_dev_filter_resource_type(struct acpi_resource *ares, unsigned long types); @@ -798,6 +802,11 @@ acpi_dev_hid_uid_match(struct acpi_device *adev, const char *hid2, const char *u return false; } +static inline int acpi_dev_uid_to_integer(struct acpi_device *adev, u64 *integer) +{ + return -ENODEV; +} + static inline struct acpi_device * acpi_dev_get_first_match_dev(const char *hid, const char *uid, s64 hrv) { @@ -977,8 +986,7 @@ static inline enum dev_dma_attr acpi_get_dma_attr(struct acpi_device *adev) return DEV_DMA_NOT_SUPPORTED; } -static inline int acpi_dma_get_range(struct device *dev, u64 *dma_addr, - u64 *offset, u64 *size) +static inline int acpi_dma_get_range(struct device *dev, const struct bus_dma_region **map) { return -ENODEV; } diff --git a/include/linux/binfmts.h b/include/linux/binfmts.h index 3dc20c4f394c..8d51f69f9f5e 100644 --- a/include/linux/binfmts.h +++ b/include/linux/binfmts.h @@ -43,9 +43,6 @@ struct linux_binprm { * original userspace. */ point_of_no_return:1; -#ifdef __alpha__ - unsigned int taso:1; -#endif struct file *executable; /* Executable to pass to the interpreter */ struct file *interpreter; struct file *file; diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index 84b13fdd34a7..8038c5fbde40 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -1498,6 +1498,7 @@ int sync_blockdev(struct block_device *bdev); int sync_blockdev_range(struct block_device *bdev, loff_t lstart, loff_t lend); int sync_blockdev_nowait(struct block_device *bdev); void sync_bdevs(bool wait); +void bdev_statx_dioalign(struct inode *inode, struct kstat *stat); void printk_all_partitions(void); #else static inline void invalidate_bdev(struct block_device *bdev) @@ -1514,6 +1515,9 @@ static inline int sync_blockdev_nowait(struct block_device *bdev) static inline void sync_bdevs(bool wait) { } +static inline void bdev_statx_dioalign(struct inode *inode, struct kstat *stat) +{ +} static inline void printk_all_partitions(void) { } diff --git a/include/linux/bpf.h b/include/linux/bpf.h index 9c1674973e03..9e7d46d16032 100644 --- a/include/linux/bpf.h +++ b/include/linux/bpf.h @@ -280,14 +280,33 @@ static inline void check_and_init_map_value(struct bpf_map *map, void *dst) } } -/* copy everything but bpf_spin_lock and bpf_timer. There could be one of each. */ -static inline void copy_map_value(struct bpf_map *map, void *dst, void *src) +/* memcpy that is used with 8-byte aligned pointers, power-of-8 size and + * forced to use 'long' read/writes to try to atomically copy long counters. + * Best-effort only. No barriers here, since it _will_ race with concurrent + * updates from BPF programs. Called from bpf syscall and mostly used with + * size 8 or 16 bytes, so ask compiler to inline it. + */ +static inline void bpf_long_memcpy(void *dst, const void *src, u32 size) +{ + const long *lsrc = src; + long *ldst = dst; + + size /= sizeof(long); + while (size--) + *ldst++ = *lsrc++; +} + +/* copy everything but bpf_spin_lock, bpf_timer, and kptrs. There could be one of each. */ +static inline void __copy_map_value(struct bpf_map *map, void *dst, void *src, bool long_memcpy) { u32 curr_off = 0; int i; if (likely(!map->off_arr)) { - memcpy(dst, src, map->value_size); + if (long_memcpy) + bpf_long_memcpy(dst, src, round_up(map->value_size, 8)); + else + memcpy(dst, src, map->value_size); return; } @@ -299,6 +318,36 @@ static inline void copy_map_value(struct bpf_map *map, void *dst, void *src) } memcpy(dst + curr_off, src + curr_off, map->value_size - curr_off); } + +static inline void copy_map_value(struct bpf_map *map, void *dst, void *src) +{ + __copy_map_value(map, dst, src, false); +} + +static inline void copy_map_value_long(struct bpf_map *map, void *dst, void *src) +{ + __copy_map_value(map, dst, src, true); +} + +static inline void zero_map_value(struct bpf_map *map, void *dst) +{ + u32 curr_off = 0; + int i; + + if (likely(!map->off_arr)) { + memset(dst, 0, map->value_size); + return; + } + + for (i = 0; i < map->off_arr->cnt; i++) { + u32 next_off = map->off_arr->field_off[i]; + + memset(dst + curr_off, 0, next_off - curr_off); + curr_off += map->off_arr->field_sz[i]; + } + memset(dst + curr_off, 0, map->value_size - curr_off); +} + void copy_map_value_locked(struct bpf_map *map, void *dst, void *src, bool lock_src); void bpf_timer_cancel_and_free(void *timer); @@ -402,7 +451,7 @@ enum bpf_type_flag { /* DYNPTR points to memory local to the bpf program. */ DYNPTR_TYPE_LOCAL = BIT(8 + BPF_BASE_TYPE_BITS), - /* DYNPTR points to a ringbuf record. */ + /* DYNPTR points to a kernel-produced ringbuf record. */ DYNPTR_TYPE_RINGBUF = BIT(9 + BPF_BASE_TYPE_BITS), /* Size is known at compile time. */ @@ -607,6 +656,7 @@ enum bpf_reg_type { PTR_TO_MEM, /* reg points to valid memory region */ PTR_TO_BUF, /* reg points to a read/write buffer */ PTR_TO_FUNC, /* reg points to a bpf program function */ + PTR_TO_DYNPTR, /* reg points to a dynptr */ __BPF_REG_TYPE_MAX, /* Extended reg_types. */ @@ -727,10 +777,14 @@ enum bpf_cgroup_storage_type { */ #define MAX_BPF_FUNC_REG_ARGS 5 +/* The argument is a structure. */ +#define BTF_FMODEL_STRUCT_ARG BIT(0) + struct btf_func_model { u8 ret_size; u8 nr_args; u8 arg_size[MAX_BPF_FUNC_ARGS]; + u8 arg_flags[MAX_BPF_FUNC_ARGS]; }; /* Restore arguments before returning from trampoline to let original function @@ -810,6 +864,10 @@ u64 notrace __bpf_prog_enter_lsm_cgroup(struct bpf_prog *prog, struct bpf_tramp_run_ctx *run_ctx); void notrace __bpf_prog_exit_lsm_cgroup(struct bpf_prog *prog, u64 start, struct bpf_tramp_run_ctx *run_ctx); +u64 notrace __bpf_prog_enter_struct_ops(struct bpf_prog *prog, + struct bpf_tramp_run_ctx *run_ctx); +void notrace __bpf_prog_exit_struct_ops(struct bpf_prog *prog, u64 start, + struct bpf_tramp_run_ctx *run_ctx); void notrace __bpf_tramp_enter(struct bpf_tramp_image *tr); void notrace __bpf_tramp_exit(struct bpf_tramp_image *tr); @@ -892,6 +950,7 @@ struct bpf_dispatcher { struct bpf_dispatcher_prog progs[BPF_DISPATCHER_MAX]; int num_progs; void *image; + void *rw_image; u32 image_off; struct bpf_ksym ksym; }; @@ -910,7 +969,7 @@ int bpf_trampoline_unlink_prog(struct bpf_tramp_link *link, struct bpf_trampolin struct bpf_trampoline *bpf_trampoline_get(u64 key, struct bpf_attach_target_info *tgt_info); void bpf_trampoline_put(struct bpf_trampoline *tr); -int arch_prepare_bpf_dispatcher(void *image, s64 *funcs, int num_funcs); +int arch_prepare_bpf_dispatcher(void *image, void *buf, s64 *funcs, int num_funcs); #define BPF_DISPATCHER_INIT(_name) { \ .mutex = __MUTEX_INITIALIZER(_name.mutex), \ .func = &_name##_func, \ @@ -924,7 +983,14 @@ int arch_prepare_bpf_dispatcher(void *image, s64 *funcs, int num_funcs); }, \ } +#ifdef CONFIG_X86_64 +#define BPF_DISPATCHER_ATTRIBUTES __attribute__((patchable_function_entry(5))) +#else +#define BPF_DISPATCHER_ATTRIBUTES +#endif + #define DEFINE_BPF_DISPATCHER(name) \ + notrace BPF_DISPATCHER_ATTRIBUTES \ noinline __nocfi unsigned int bpf_dispatcher_##name##_func( \ const void *ctx, \ const struct bpf_insn *insnsi, \ @@ -946,7 +1012,6 @@ int arch_prepare_bpf_dispatcher(void *image, s64 *funcs, int num_funcs); void bpf_dispatcher_change_prog(struct bpf_dispatcher *d, struct bpf_prog *from, struct bpf_prog *to); /* Called only from JIT-enabled code, so there's no need for stubs. */ -void *bpf_jit_alloc_exec_page(void); void bpf_image_ksym_add(void *data, struct bpf_ksym *ksym); void bpf_image_ksym_del(struct bpf_ksym *ksym); void bpf_ksym_add(struct bpf_ksym *ksym); @@ -1334,6 +1399,11 @@ struct bpf_array { #define BPF_MAP_CAN_READ BIT(0) #define BPF_MAP_CAN_WRITE BIT(1) +/* Maximum number of user-producer ring buffer samples that can be drained in + * a call to bpf_user_ringbuf_drain(). + */ +#define BPF_MAX_USER_RINGBUF_SAMPLES (128 * 1024) + static inline u32 bpf_map_flags_to_cap(struct bpf_map *map) { u32 access_flags = map->map_flags & (BPF_F_RDONLY_PROG | BPF_F_WRONLY_PROG); @@ -1730,6 +1800,27 @@ int bpf_obj_get_user(const char __user *pathname, int flags); extern int bpf_iter_ ## target(args); \ int __init bpf_iter_ ## target(args) { return 0; } +/* + * The task type of iterators. + * + * For BPF task iterators, they can be parameterized with various + * parameters to visit only some of tasks. + * + * BPF_TASK_ITER_ALL (default) + * Iterate over resources of every task. + * + * BPF_TASK_ITER_TID + * Iterate over resources of a task/tid. + * + * BPF_TASK_ITER_TGID + * Iterate over resources of every task of a process / task group. + */ +enum bpf_iter_task_type { + BPF_TASK_ITER_ALL = 0, + BPF_TASK_ITER_TID, + BPF_TASK_ITER_TGID, +}; + struct bpf_iter_aux_info { /* for map_elem iter */ struct bpf_map *map; @@ -1739,6 +1830,10 @@ struct bpf_iter_aux_info { struct cgroup *start; /* starting cgroup */ enum bpf_cgroup_iter_order order; } cgroup; + struct { + enum bpf_iter_task_type type; + u32 pid; + } task; }; typedef int (*bpf_iter_attach_target_t)(struct bpf_prog *prog, @@ -1823,22 +1918,6 @@ int bpf_get_file_flag(int flags); int bpf_check_uarg_tail_zero(bpfptr_t uaddr, size_t expected_size, size_t actual_size); -/* memcpy that is used with 8-byte aligned pointers, power-of-8 size and - * forced to use 'long' read/writes to try to atomically copy long counters. - * Best-effort only. No barriers here, since it _will_ race with concurrent - * updates from BPF programs. Called from bpf syscall and mostly used with - * size 8 or 16 bytes, so ask compiler to inline it. - */ -static inline void bpf_long_memcpy(void *dst, const void *src, u32 size) -{ - const long *lsrc = src; - long *ldst = dst; - - size /= sizeof(long); - while (size--) - *ldst++ = *lsrc++; -} - /* verify correctness of eBPF program */ int bpf_check(struct bpf_prog **fp, union bpf_attr *attr, bpfptr_t uattr); @@ -1940,13 +2019,22 @@ int btf_distill_func_proto(struct bpf_verifier_log *log, const char *func_name, struct btf_func_model *m); +struct bpf_kfunc_arg_meta { + u64 r0_size; + bool r0_rdonly; + int ref_obj_id; + u32 flags; +}; + struct bpf_reg_state; int btf_check_subprog_arg_match(struct bpf_verifier_env *env, int subprog, struct bpf_reg_state *regs); +int btf_check_subprog_call(struct bpf_verifier_env *env, int subprog, + struct bpf_reg_state *regs); int btf_check_kfunc_arg_match(struct bpf_verifier_env *env, const struct btf *btf, u32 func_id, struct bpf_reg_state *regs, - u32 kfunc_flags); + struct bpf_kfunc_arg_meta *meta); int btf_prepare_func_args(struct bpf_verifier_env *env, int subprog, struct bpf_reg_state *reg); int btf_check_type_match(struct bpf_verifier_log *log, const struct bpf_prog *prog, @@ -1983,6 +2071,8 @@ static inline bool has_current_bpf_ctx(void) { return !!current->bpf_ctx; } + +void notrace bpf_prog_inc_misses_counter(struct bpf_prog *prog); #else /* !CONFIG_BPF_SYSCALL */ static inline struct bpf_prog *bpf_prog_get(u32 ufd) { @@ -2165,6 +2255,15 @@ static inline struct bpf_prog *bpf_prog_by_id(u32 id) return ERR_PTR(-ENOTSUPP); } +static inline int btf_struct_access(struct bpf_verifier_log *log, + const struct btf *btf, + const struct btf_type *t, int off, int size, + enum bpf_access_type atype, + u32 *next_btf_id, enum bpf_type_flag *flag) +{ + return -EACCES; +} + static inline const struct bpf_func_proto * bpf_base_func_proto(enum bpf_func_id func_id) { @@ -2196,6 +2295,10 @@ static inline bool has_current_bpf_ctx(void) { return false; } + +static inline void bpf_prog_inc_misses_counter(struct bpf_prog *prog) +{ +} #endif /* CONFIG_BPF_SYSCALL */ void __bpf_free_used_btfs(struct bpf_prog_aux *aux, @@ -2433,6 +2536,7 @@ extern const struct bpf_func_proto bpf_loop_proto; extern const struct bpf_func_proto bpf_copy_from_user_task_proto; extern const struct bpf_func_proto bpf_set_retval_proto; extern const struct bpf_func_proto bpf_get_retval_proto; +extern const struct bpf_func_proto bpf_user_ringbuf_drain_proto; const struct bpf_func_proto *tracing_prog_func_proto( enum bpf_func_id func_id, const struct bpf_prog *prog); @@ -2577,7 +2681,7 @@ enum bpf_dynptr_type { BPF_DYNPTR_TYPE_INVALID, /* Points to memory that is local to the bpf program */ BPF_DYNPTR_TYPE_LOCAL, - /* Underlying data is a ringbuf record */ + /* Underlying data is a kernel-produced ringbuf record */ BPF_DYNPTR_TYPE_RINGBUF, }; @@ -2585,6 +2689,7 @@ void bpf_dynptr_init(struct bpf_dynptr_kern *ptr, void *data, enum bpf_dynptr_type type, u32 offset, u32 size); void bpf_dynptr_set_null(struct bpf_dynptr_kern *ptr); int bpf_dynptr_check_size(u32 size); +u32 bpf_dynptr_get_size(struct bpf_dynptr_kern *ptr); #ifdef CONFIG_BPF_LSM void bpf_cgroup_atype_get(u32 attach_btf_id, int cgroup_atype); @@ -2594,4 +2699,12 @@ static inline void bpf_cgroup_atype_get(u32 attach_btf_id, int cgroup_atype) {} static inline void bpf_cgroup_atype_put(int cgroup_atype) {} #endif /* CONFIG_BPF_LSM */ +struct key; + +#ifdef CONFIG_KEYS +struct bpf_key { + struct key *key; + bool has_ref; +}; +#endif /* CONFIG_KEYS */ #endif /* _LINUX_BPF_H */ diff --git a/include/linux/bpf_types.h b/include/linux/bpf_types.h index 2b9112b80171..2c6a4f2562a7 100644 --- a/include/linux/bpf_types.h +++ b/include/linux/bpf_types.h @@ -126,6 +126,7 @@ BPF_MAP_TYPE(BPF_MAP_TYPE_STRUCT_OPS, bpf_struct_ops_map_ops) #endif BPF_MAP_TYPE(BPF_MAP_TYPE_RINGBUF, ringbuf_map_ops) BPF_MAP_TYPE(BPF_MAP_TYPE_BLOOM_FILTER, bloom_filter_map_ops) +BPF_MAP_TYPE(BPF_MAP_TYPE_USER_RINGBUF, user_ringbuf_map_ops) BPF_LINK_TYPE(BPF_LINK_TYPE_RAW_TRACEPOINT, raw_tracepoint) BPF_LINK_TYPE(BPF_LINK_TYPE_TRACING, tracing) diff --git a/include/linux/bpf_verifier.h b/include/linux/bpf_verifier.h index 1fdddbf3546b..9e1e6965f407 100644 --- a/include/linux/bpf_verifier.h +++ b/include/linux/bpf_verifier.h @@ -248,6 +248,7 @@ struct bpf_func_state { */ u32 async_entry_cnt; bool in_callback_fn; + struct tnum callback_ret_range; bool in_async_callback_fn; /* The following fields should be last. See copy_func_state() */ @@ -348,6 +349,27 @@ struct bpf_verifier_state { iter < frame->allocated_stack / BPF_REG_SIZE; \ iter++, reg = bpf_get_spilled_reg(iter, frame)) +/* Invoke __expr over regsiters in __vst, setting __state and __reg */ +#define bpf_for_each_reg_in_vstate(__vst, __state, __reg, __expr) \ + ({ \ + struct bpf_verifier_state *___vstate = __vst; \ + int ___i, ___j; \ + for (___i = 0; ___i <= ___vstate->curframe; ___i++) { \ + struct bpf_reg_state *___regs; \ + __state = ___vstate->frame[___i]; \ + ___regs = __state->regs; \ + for (___j = 0; ___j < MAX_BPF_REG; ___j++) { \ + __reg = &___regs[___j]; \ + (void)(__expr); \ + } \ + bpf_for_each_spilled_reg(___j, __state, __reg) { \ + if (!__reg) \ + continue; \ + (void)(__expr); \ + } \ + } \ + }) + /* linked list of verifier states used to prune search */ struct bpf_verifier_state_list { struct bpf_verifier_state state; @@ -571,6 +593,11 @@ int check_kfunc_mem_size_reg(struct bpf_verifier_env *env, struct bpf_reg_state u32 regno); int check_mem_reg(struct bpf_verifier_env *env, struct bpf_reg_state *reg, u32 regno, u32 mem_size); +bool is_dynptr_reg_valid_init(struct bpf_verifier_env *env, + struct bpf_reg_state *reg); +bool is_dynptr_type_expected(struct bpf_verifier_env *env, + struct bpf_reg_state *reg, + enum bpf_arg_type arg_type); /* this lives here instead of in bpf.h because it needs to dereference tgt_prog */ static inline u64 bpf_trampoline_compute_key(const struct bpf_prog *tgt_prog, @@ -598,6 +625,8 @@ int bpf_check_attach_target(struct bpf_verifier_log *log, struct bpf_attach_target_info *tgt_info); void bpf_free_kfunc_btf_tab(struct bpf_kfunc_btf_tab *tab); +int mark_chain_precision(struct bpf_verifier_env *env, int regno); + #define BPF_BASE_TYPE_MASK GENMASK(BPF_BASE_TYPE_BITS - 1, 0) /* extract base type from bpf_{arg, return, reg}_type. */ diff --git a/include/linux/btf.h b/include/linux/btf.h index ad93c2d9cc1c..f9aababc5d78 100644 --- a/include/linux/btf.h +++ b/include/linux/btf.h @@ -52,6 +52,15 @@ #define KF_SLEEPABLE (1 << 5) /* kfunc may sleep */ #define KF_DESTRUCTIVE (1 << 6) /* kfunc performs destructive actions */ +/* + * Return the name of the passed struct, if exists, or halt the build if for + * example the structure gets renamed. In this way, developers have to revisit + * the code using that structure name, and update it accordingly. + */ +#define stringify_struct(x) \ + ({ BUILD_BUG_ON(sizeof(struct x) < 0); \ + __stringify(x); }) + struct btf; struct btf_member; struct btf_type; @@ -441,4 +450,14 @@ static inline int register_btf_id_dtor_kfuncs(const struct btf_id_dtor_kfunc *dt } #endif +static inline bool btf_type_is_struct_ptr(struct btf *btf, const struct btf_type *t) +{ + if (!btf_type_is_ptr(t)) + return false; + + t = btf_type_skip_modifiers(btf, t->type, NULL); + + return btf_type_is_struct(t); +} + #endif diff --git a/include/linux/cfi.h b/include/linux/cfi.h index c6dfc1ed0626..5e134f4ce8b7 100644 --- a/include/linux/cfi.h +++ b/include/linux/cfi.h @@ -2,49 +2,38 @@ /* * Clang Control Flow Integrity (CFI) support. * - * Copyright (C) 2021 Google LLC + * Copyright (C) 2022 Google LLC */ #ifndef _LINUX_CFI_H #define _LINUX_CFI_H -#ifdef CONFIG_CFI_CLANG -typedef void (*cfi_check_fn)(uint64_t id, void *ptr, void *diag); - -/* Compiler-generated function in each module, and the kernel */ -extern void __cfi_check(uint64_t id, void *ptr, void *diag); - -/* - * Force the compiler to generate a CFI jump table entry for a function - * and store the jump table address to __cfi_jt_<function>. - */ -#define __CFI_ADDRESSABLE(fn, __attr) \ - const void *__cfi_jt_ ## fn __visible __attr = (void *)&fn - -#ifdef CONFIG_CFI_CLANG_SHADOW - -extern void cfi_module_add(struct module *mod, unsigned long base_addr); -extern void cfi_module_remove(struct module *mod, unsigned long base_addr); - -#else +#include <linux/bug.h> +#include <linux/module.h> -static inline void cfi_module_add(struct module *mod, unsigned long base_addr) {} -static inline void cfi_module_remove(struct module *mod, unsigned long base_addr) {} - -#endif /* CONFIG_CFI_CLANG_SHADOW */ - -#else /* !CONFIG_CFI_CLANG */ - -#ifdef CONFIG_X86_KERNEL_IBT - -#define __CFI_ADDRESSABLE(fn, __attr) \ - const void *__cfi_jt_ ## fn __visible __attr = (void *)&fn +#ifdef CONFIG_CFI_CLANG +enum bug_trap_type report_cfi_failure(struct pt_regs *regs, unsigned long addr, + unsigned long *target, u32 type); -#endif /* CONFIG_X86_KERNEL_IBT */ +static inline enum bug_trap_type report_cfi_failure_noaddr(struct pt_regs *regs, + unsigned long addr) +{ + return report_cfi_failure(regs, addr, NULL, 0); +} +#ifdef CONFIG_ARCH_USES_CFI_TRAPS +bool is_cfi_trap(unsigned long addr); +#endif #endif /* CONFIG_CFI_CLANG */ -#ifndef __CFI_ADDRESSABLE -#define __CFI_ADDRESSABLE(fn, __attr) -#endif +#ifdef CONFIG_MODULES +#ifdef CONFIG_ARCH_USES_CFI_TRAPS +void module_cfi_finalize(const Elf_Ehdr *hdr, const Elf_Shdr *sechdrs, + struct module *mod); +#else +static inline void module_cfi_finalize(const Elf_Ehdr *hdr, + const Elf_Shdr *sechdrs, + struct module *mod) {} +#endif /* CONFIG_ARCH_USES_CFI_TRAPS */ +#endif /* CONFIG_MODULES */ #endif /* _LINUX_CFI_H */ diff --git a/include/linux/cfi_types.h b/include/linux/cfi_types.h new file mode 100644 index 000000000000..6b8713675765 --- /dev/null +++ b/include/linux/cfi_types.h @@ -0,0 +1,45 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * Clang Control Flow Integrity (CFI) type definitions. + */ +#ifndef _LINUX_CFI_TYPES_H +#define _LINUX_CFI_TYPES_H + +#ifdef __ASSEMBLY__ +#include <linux/linkage.h> + +#ifdef CONFIG_CFI_CLANG +/* + * Use the __kcfi_typeid_<function> type identifier symbol to + * annotate indirectly called assembly functions. The compiler emits + * these symbols for all address-taken function declarations in C + * code. + */ +#ifndef __CFI_TYPE +#define __CFI_TYPE(name) \ + .4byte __kcfi_typeid_##name +#endif + +#define SYM_TYPED_ENTRY(name, linkage, align...) \ + linkage(name) ASM_NL \ + align ASM_NL \ + __CFI_TYPE(name) ASM_NL \ + name: + +#define SYM_TYPED_START(name, linkage, align...) \ + SYM_TYPED_ENTRY(name, linkage, align) + +#else /* CONFIG_CFI_CLANG */ + +#define SYM_TYPED_START(name, linkage, align...) \ + SYM_START(name, linkage, align) + +#endif /* CONFIG_CFI_CLANG */ + +#ifndef SYM_TYPED_FUNC_START +#define SYM_TYPED_FUNC_START(name) \ + SYM_TYPED_START(name, SYM_L_GLOBAL, SYM_A_ALIGN) +#endif + +#endif /* __ASSEMBLY__ */ +#endif /* _LINUX_CFI_TYPES_H */ diff --git a/include/linux/compiler-clang.h b/include/linux/compiler-clang.h index c84fec767445..42e55579d649 100644 --- a/include/linux/compiler-clang.h +++ b/include/linux/compiler-clang.h @@ -66,17 +66,9 @@ # define __noscs __attribute__((__no_sanitize__("shadow-call-stack"))) #endif -#define __nocfi __attribute__((__no_sanitize__("cfi"))) -#define __cficanonical __attribute__((__cfi_canonical_jump_table__)) - -#if defined(CONFIG_CFI_CLANG) -/* - * With CONFIG_CFI_CLANG, the compiler replaces function address - * references with the address of the function's CFI jump table - * entry. The function_nocfi macro always returns the address of the - * actual function instead. - */ -#define function_nocfi(x) __builtin_function_start(x) +#if __has_feature(kcfi) +/* Disable CFI checking inside a function. */ +#define __nocfi __attribute__((__no_sanitize__("kcfi"))) #endif /* diff --git a/include/linux/compiler.h b/include/linux/compiler.h index 7713d7bcdaea..973a1bfd7ef5 100644 --- a/include/linux/compiler.h +++ b/include/linux/compiler.h @@ -203,16 +203,6 @@ void ftrace_likely_update(struct ftrace_likely_data *f, int val, __v; \ }) -/* - * With CONFIG_CFI_CLANG, the compiler replaces function addresses in - * instrumented C code with jump table addresses. Architectures that - * support CFI can define this macro to return the actual function address - * when needed. - */ -#ifndef function_nocfi -#define function_nocfi(x) (x) -#endif - #endif /* __KERNEL__ */ /* @@ -221,9 +211,11 @@ void ftrace_likely_update(struct ftrace_likely_data *f, int val, * otherwise, or eliminated entirely due to lack of references that are * visible to the compiler. */ -#define __ADDRESSABLE(sym) \ - static void * __section(".discard.addressable") __used \ +#define ___ADDRESSABLE(sym, __attrs) \ + static void * __used __attrs \ __UNIQUE_ID(__PASTE(__addressable_,sym)) = (void *)&sym; +#define __ADDRESSABLE(sym) \ + ___ADDRESSABLE(sym, __section(".discard.addressable")) /** * offset_to_ptr - convert a relative memory offset to an absolute pointer diff --git a/include/linux/compiler_types.h b/include/linux/compiler_types.h index 4f2a819fd60a..d9d98e8a9a3b 100644 --- a/include/linux/compiler_types.h +++ b/include/linux/compiler_types.h @@ -4,8 +4,12 @@ #ifndef __ASSEMBLY__ +/* + * Skipped when running bindgen due to a libclang issue; + * see https://github.com/rust-lang/rust-bindgen/issues/2244. + */ #if defined(CONFIG_DEBUG_INFO_BTF) && defined(CONFIG_PAHOLE_HAS_BTF_TAG) && \ - __has_attribute(btf_type_tag) + __has_attribute(btf_type_tag) && !defined(__BINDGEN__) # define BTF_TYPE_TAG(value) __attribute__((btf_type_tag(#value))) #else # define BTF_TYPE_TAG(value) /* nothing */ @@ -265,10 +269,6 @@ struct ftrace_likely_data { # define __nocfi #endif -#ifndef __cficanonical -# define __cficanonical -#endif - /* * Any place that could be marked with the "alloc_size" attribute is also * a place to be marked with the "malloc" attribute. Do this as part of the diff --git a/include/linux/dlm.h b/include/linux/dlm.h index ff951e9f6f20..c6bc2b5ee7e6 100644 --- a/include/linux/dlm.h +++ b/include/linux/dlm.h @@ -56,9 +56,6 @@ struct dlm_lockspace_ops { * DLM_LSFL_TIMEWARN * The dlm should emit netlink messages if locks have been waiting * for a configurable amount of time. (Unused.) - * DLM_LSFL_FS - * The lockspace user is in the kernel (i.e. filesystem). Enables - * direct bast/cast callbacks. * DLM_LSFL_NEWEXCL * dlm_new_lockspace() should return -EEXIST if the lockspace exists. * @@ -134,7 +131,7 @@ int dlm_lock(dlm_lockspace_t *lockspace, int mode, struct dlm_lksb *lksb, uint32_t flags, - void *name, + const void *name, unsigned int namelen, uint32_t parent_lkid, void (*lockast) (void *astarg), diff --git a/include/linux/edac.h b/include/linux/edac.h index e730b3468719..fa4bda2a70f6 100644 --- a/include/linux/edac.h +++ b/include/linux/edac.h @@ -231,21 +231,21 @@ enum mem_type { #define MEM_FLAG_DDR BIT(MEM_DDR) #define MEM_FLAG_RDDR BIT(MEM_RDDR) #define MEM_FLAG_RMBS BIT(MEM_RMBS) -#define MEM_FLAG_DDR2 BIT(MEM_DDR2) -#define MEM_FLAG_FB_DDR2 BIT(MEM_FB_DDR2) -#define MEM_FLAG_RDDR2 BIT(MEM_RDDR2) -#define MEM_FLAG_XDR BIT(MEM_XDR) -#define MEM_FLAG_DDR3 BIT(MEM_DDR3) -#define MEM_FLAG_RDDR3 BIT(MEM_RDDR3) -#define MEM_FLAG_LPDDR3 BIT(MEM_LPDDR3) -#define MEM_FLAG_DDR4 BIT(MEM_DDR4) -#define MEM_FLAG_RDDR4 BIT(MEM_RDDR4) -#define MEM_FLAG_LRDDR4 BIT(MEM_LRDDR4) -#define MEM_FLAG_LPDDR4 BIT(MEM_LPDDR4) -#define MEM_FLAG_DDR5 BIT(MEM_DDR5) -#define MEM_FLAG_RDDR5 BIT(MEM_RDDR5) -#define MEM_FLAG_LRDDR5 BIT(MEM_LRDDR5) -#define MEM_FLAG_NVDIMM BIT(MEM_NVDIMM) +#define MEM_FLAG_DDR2 BIT(MEM_DDR2) +#define MEM_FLAG_FB_DDR2 BIT(MEM_FB_DDR2) +#define MEM_FLAG_RDDR2 BIT(MEM_RDDR2) +#define MEM_FLAG_XDR BIT(MEM_XDR) +#define MEM_FLAG_DDR3 BIT(MEM_DDR3) +#define MEM_FLAG_RDDR3 BIT(MEM_RDDR3) +#define MEM_FLAG_LPDDR3 BIT(MEM_LPDDR3) +#define MEM_FLAG_DDR4 BIT(MEM_DDR4) +#define MEM_FLAG_RDDR4 BIT(MEM_RDDR4) +#define MEM_FLAG_LRDDR4 BIT(MEM_LRDDR4) +#define MEM_FLAG_LPDDR4 BIT(MEM_LPDDR4) +#define MEM_FLAG_DDR5 BIT(MEM_DDR5) +#define MEM_FLAG_RDDR5 BIT(MEM_RDDR5) +#define MEM_FLAG_LRDDR5 BIT(MEM_LRDDR5) +#define MEM_FLAG_NVDIMM BIT(MEM_NVDIMM) #define MEM_FLAG_WIO2 BIT(MEM_WIO2) #define MEM_FLAG_HBM2 BIT(MEM_HBM2) diff --git a/include/linux/filter.h b/include/linux/filter.h index 527ae1d64e27..efc42a6e3aed 100644 --- a/include/linux/filter.h +++ b/include/linux/filter.h @@ -567,6 +567,12 @@ struct sk_filter { DECLARE_STATIC_KEY_FALSE(bpf_stats_enabled_key); +extern struct mutex nf_conn_btf_access_lock; +extern int (*nfct_btf_struct_access)(struct bpf_verifier_log *log, const struct btf *btf, + const struct btf_type *t, int off, int size, + enum bpf_access_type atype, u32 *next_btf_id, + enum bpf_type_flag *flag); + typedef unsigned int (*bpf_dispatcher_fn)(const void *ctx, const struct bpf_insn *insnsi, unsigned int (*bpf_func)(const void *, @@ -1017,6 +1023,8 @@ extern long bpf_jit_limit_max; typedef void (*bpf_jit_fill_hole_t)(void *area, unsigned int size); +void bpf_jit_fill_hole_with_zero(void *area, unsigned int size); + struct bpf_binary_header * bpf_jit_binary_alloc(unsigned int proglen, u8 **image_ptr, unsigned int alignment, @@ -1029,6 +1037,9 @@ void bpf_jit_free(struct bpf_prog *fp); struct bpf_binary_header * bpf_jit_binary_pack_hdr(const struct bpf_prog *fp); +void *bpf_prog_pack_alloc(u32 size, bpf_jit_fill_hole_t bpf_fill_ill_insns); +void bpf_prog_pack_free(struct bpf_binary_header *hdr); + static inline bool bpf_prog_kallsyms_verify_off(const struct bpf_prog *fp) { return list_empty(&fp->aux->ksym.lnode) || @@ -1099,7 +1110,7 @@ static inline bool bpf_jit_blinding_enabled(struct bpf_prog *prog) return false; if (!bpf_jit_harden) return false; - if (bpf_jit_harden == 1 && capable(CAP_SYS_ADMIN)) + if (bpf_jit_harden == 1 && bpf_capable()) return false; return true; diff --git a/include/linux/fortify-string.h b/include/linux/fortify-string.h index 3b401fa0f374..b62c90cfafaf 100644 --- a/include/linux/fortify-string.h +++ b/include/linux/fortify-string.h @@ -2,7 +2,9 @@ #ifndef _LINUX_FORTIFY_STRING_H_ #define _LINUX_FORTIFY_STRING_H_ +#include <linux/bug.h> #include <linux/const.h> +#include <linux/limits.h> #define __FORTIFY_INLINE extern __always_inline __gnu_inline __overloadable #define __RENAME(x) __asm__(#x) @@ -17,9 +19,10 @@ void __write_overflow_field(size_t avail, size_t wanted) __compiletime_warning(" #define __compiletime_strlen(p) \ ({ \ unsigned char *__p = (unsigned char *)(p); \ - size_t __ret = (size_t)-1; \ - size_t __p_size = __builtin_object_size(p, 1); \ - if (__p_size != (size_t)-1) { \ + size_t __ret = SIZE_MAX; \ + size_t __p_size = __member_size(p); \ + if (__p_size != SIZE_MAX && \ + __builtin_constant_p(*__p)) { \ size_t __p_len = __p_size - 1; \ if (__builtin_constant_p(__p[__p_len]) && \ __p[__p_len] == '\0') \ @@ -69,20 +72,59 @@ extern char *__underlying_strncpy(char *p, const char *q, __kernel_size_t size) __underlying_memcpy(dst, src, bytes) /* - * Clang's use of __builtin_object_size() within inlines needs hinting via - * __pass_object_size(). The preference is to only ever use type 1 (member + * Clang's use of __builtin_*object_size() within inlines needs hinting via + * __pass_*object_size(). The preference is to only ever use type 1 (member * size, rather than struct size), but there remain some stragglers using * type 0 that will be converted in the future. */ -#define POS __pass_object_size(1) -#define POS0 __pass_object_size(0) +#define POS __pass_object_size(1) +#define POS0 __pass_object_size(0) +#define __struct_size(p) __builtin_object_size(p, 0) +#define __member_size(p) __builtin_object_size(p, 1) +#define __compiletime_lessthan(bounds, length) ( \ + __builtin_constant_p((bounds) < (length)) && \ + (bounds) < (length) \ +) + +/** + * strncpy - Copy a string to memory with non-guaranteed NUL padding + * + * @p: pointer to destination of copy + * @q: pointer to NUL-terminated source string to copy + * @size: bytes to write at @p + * + * If strlen(@q) >= @size, the copy of @q will stop after @size bytes, + * and @p will NOT be NUL-terminated + * + * If strlen(@q) < @size, following the copy of @q, trailing NUL bytes + * will be written to @p until @size total bytes have been written. + * + * Do not use this function. While FORTIFY_SOURCE tries to avoid + * over-reads of @q, it cannot defend against writing unterminated + * results to @p. Using strncpy() remains ambiguous and fragile. + * Instead, please choose an alternative, so that the expectation + * of @p's contents is unambiguous: + * + * +--------------------+-----------------+------------+ + * | @p needs to be: | padded to @size | not padded | + * +====================+=================+============+ + * | NUL-terminated | strscpy_pad() | strscpy() | + * +--------------------+-----------------+------------+ + * | not NUL-terminated | strtomem_pad() | strtomem() | + * +--------------------+-----------------+------------+ + * + * Note strscpy*()'s differing return values for detecting truncation, + * and strtomem*()'s expectation that the destination is marked with + * __nonstring when it is a character array. + * + */ __FORTIFY_INLINE __diagnose_as(__builtin_strncpy, 1, 2, 3) char *strncpy(char * const POS p, const char *q, __kernel_size_t size) { - size_t p_size = __builtin_object_size(p, 1); + size_t p_size = __member_size(p); - if (__builtin_constant_p(size) && p_size < size) + if (__compiletime_lessthan(p_size, size)) __write_overflow(); if (p_size < size) fortify_panic(__func__); @@ -92,9 +134,9 @@ char *strncpy(char * const POS p, const char *q, __kernel_size_t size) __FORTIFY_INLINE __diagnose_as(__builtin_strcat, 1, 2) char *strcat(char * const POS p, const char *q) { - size_t p_size = __builtin_object_size(p, 1); + size_t p_size = __member_size(p); - if (p_size == (size_t)-1) + if (p_size == SIZE_MAX) return __underlying_strcat(p, q); if (strlcat(p, q, p_size) >= p_size) fortify_panic(__func__); @@ -104,12 +146,12 @@ char *strcat(char * const POS p, const char *q) extern __kernel_size_t __real_strnlen(const char *, __kernel_size_t) __RENAME(strnlen); __FORTIFY_INLINE __kernel_size_t strnlen(const char * const POS p, __kernel_size_t maxlen) { - size_t p_size = __builtin_object_size(p, 1); + size_t p_size = __member_size(p); size_t p_len = __compiletime_strlen(p); size_t ret; /* We can take compile-time actions when maxlen is const. */ - if (__builtin_constant_p(maxlen) && p_len != (size_t)-1) { + if (__builtin_constant_p(maxlen) && p_len != SIZE_MAX) { /* If p is const, we can use its compile-time-known len. */ if (maxlen >= p_size) return p_len; @@ -134,10 +176,10 @@ __FORTIFY_INLINE __diagnose_as(__builtin_strlen, 1) __kernel_size_t __fortify_strlen(const char * const POS p) { __kernel_size_t ret; - size_t p_size = __builtin_object_size(p, 1); + size_t p_size = __member_size(p); /* Give up if we don't know how large p is. */ - if (p_size == (size_t)-1) + if (p_size == SIZE_MAX) return __underlying_strlen(p); ret = strnlen(p, p_size); if (p_size <= ret) @@ -149,12 +191,12 @@ __kernel_size_t __fortify_strlen(const char * const POS p) extern size_t __real_strlcpy(char *, const char *, size_t) __RENAME(strlcpy); __FORTIFY_INLINE size_t strlcpy(char * const POS p, const char * const POS q, size_t size) { - size_t p_size = __builtin_object_size(p, 1); - size_t q_size = __builtin_object_size(q, 1); + size_t p_size = __member_size(p); + size_t q_size = __member_size(q); size_t q_len; /* Full count of source string length. */ size_t len; /* Count of characters going into destination. */ - if (p_size == (size_t)-1 && q_size == (size_t)-1) + if (p_size == SIZE_MAX && q_size == SIZE_MAX) return __real_strlcpy(p, q, size); q_len = strlen(q); len = (q_len >= size) ? size - 1 : q_len; @@ -178,18 +220,18 @@ __FORTIFY_INLINE ssize_t strscpy(char * const POS p, const char * const POS q, s { size_t len; /* Use string size rather than possible enclosing struct size. */ - size_t p_size = __builtin_object_size(p, 1); - size_t q_size = __builtin_object_size(q, 1); + size_t p_size = __member_size(p); + size_t q_size = __member_size(q); /* If we cannot get size of p and q default to call strscpy. */ - if (p_size == (size_t) -1 && q_size == (size_t) -1) + if (p_size == SIZE_MAX && q_size == SIZE_MAX) return __real_strscpy(p, q, size); /* * If size can be known at compile time and is greater than * p_size, generate a compile time write overflow error. */ - if (__builtin_constant_p(size) && size > p_size) + if (__compiletime_lessthan(p_size, size)) __write_overflow(); /* @@ -224,10 +266,10 @@ __FORTIFY_INLINE __diagnose_as(__builtin_strncat, 1, 2, 3) char *strncat(char * const POS p, const char * const POS q, __kernel_size_t count) { size_t p_len, copy_len; - size_t p_size = __builtin_object_size(p, 1); - size_t q_size = __builtin_object_size(q, 1); + size_t p_size = __member_size(p); + size_t q_size = __member_size(q); - if (p_size == (size_t)-1 && q_size == (size_t)-1) + if (p_size == SIZE_MAX && q_size == SIZE_MAX) return __underlying_strncat(p, q, count); p_len = strlen(p); copy_len = strnlen(q, count); @@ -246,15 +288,16 @@ __FORTIFY_INLINE void fortify_memset_chk(__kernel_size_t size, /* * Length argument is a constant expression, so we * can perform compile-time bounds checking where - * buffer sizes are known. + * buffer sizes are also known at compile time. */ /* Error when size is larger than enclosing struct. */ - if (p_size > p_size_field && p_size < size) + if (__compiletime_lessthan(p_size_field, p_size) && + __compiletime_lessthan(p_size, size)) __write_overflow(); /* Warn when write size is larger than dest field. */ - if (p_size_field < size) + if (__compiletime_lessthan(p_size_field, size)) __write_overflow_field(p_size_field, size); } /* @@ -268,10 +311,10 @@ __FORTIFY_INLINE void fortify_memset_chk(__kernel_size_t size, /* * Always stop accesses beyond the struct that contains the * field, when the buffer's remaining size is known. - * (The -1 test is to optimize away checks where the buffer + * (The SIZE_MAX test is to optimize away checks where the buffer * lengths are unknown.) */ - if (p_size != (size_t)(-1) && p_size < size) + if (p_size != SIZE_MAX && p_size < size) fortify_panic("memset"); } @@ -282,11 +325,11 @@ __FORTIFY_INLINE void fortify_memset_chk(__kernel_size_t size, }) /* - * __builtin_object_size() must be captured here to avoid evaluating argument - * side-effects further into the macro layers. + * __struct_size() vs __member_size() must be captured here to avoid + * evaluating argument side-effects further into the macro layers. */ #define memset(p, c, s) __fortify_memset_chk(p, c, s, \ - __builtin_object_size(p, 0), __builtin_object_size(p, 1)) + __struct_size(p), __member_size(p)) /* * To make sure the compiler can enforce protection against buffer overflows, @@ -319,7 +362,7 @@ __FORTIFY_INLINE void fortify_memset_chk(__kernel_size_t size, * V = vulnerable to run-time overflow (will need refactoring to solve) * */ -__FORTIFY_INLINE void fortify_memcpy_chk(__kernel_size_t size, +__FORTIFY_INLINE bool fortify_memcpy_chk(__kernel_size_t size, const size_t p_size, const size_t q_size, const size_t p_size_field, @@ -330,25 +373,28 @@ __FORTIFY_INLINE void fortify_memcpy_chk(__kernel_size_t size, /* * Length argument is a constant expression, so we * can perform compile-time bounds checking where - * buffer sizes are known. + * buffer sizes are also known at compile time. */ /* Error when size is larger than enclosing struct. */ - if (p_size > p_size_field && p_size < size) + if (__compiletime_lessthan(p_size_field, p_size) && + __compiletime_lessthan(p_size, size)) __write_overflow(); - if (q_size > q_size_field && q_size < size) + if (__compiletime_lessthan(q_size_field, q_size) && + __compiletime_lessthan(q_size, size)) __read_overflow2(); /* Warn when write size argument larger than dest field. */ - if (p_size_field < size) + if (__compiletime_lessthan(p_size_field, size)) __write_overflow_field(p_size_field, size); /* * Warn for source field over-read when building with W=1 * or when an over-write happened, so both can be fixed at * the same time. */ - if ((IS_ENABLED(KBUILD_EXTRA_WARN1) || p_size_field < size) && - q_size_field < size) + if ((IS_ENABLED(KBUILD_EXTRA_WARN1) || + __compiletime_lessthan(p_size_field, size)) && + __compiletime_lessthan(q_size_field, size)) __read_overflow2_field(q_size_field, size); } /* @@ -362,41 +408,104 @@ __FORTIFY_INLINE void fortify_memcpy_chk(__kernel_size_t size, /* * Always stop accesses beyond the struct that contains the * field, when the buffer's remaining size is known. - * (The -1 test is to optimize away checks where the buffer + * (The SIZE_MAX test is to optimize away checks where the buffer * lengths are unknown.) */ - if ((p_size != (size_t)(-1) && p_size < size) || - (q_size != (size_t)(-1) && q_size < size)) + if ((p_size != SIZE_MAX && p_size < size) || + (q_size != SIZE_MAX && q_size < size)) fortify_panic(func); + + /* + * Warn when writing beyond destination field size. + * + * We must ignore p_size_field == 0 for existing 0-element + * fake flexible arrays, until they are all converted to + * proper flexible arrays. + * + * The implementation of __builtin_*object_size() behaves + * like sizeof() when not directly referencing a flexible + * array member, which means there will be many bounds checks + * that will appear at run-time, without a way for them to be + * detected at compile-time (as can be done when the destination + * is specifically the flexible array member). + * https://gcc.gnu.org/bugzilla/show_bug.cgi?id=101832 + */ + if (p_size_field != 0 && p_size_field != SIZE_MAX && + p_size != p_size_field && p_size_field < size) + return true; + + return false; } #define __fortify_memcpy_chk(p, q, size, p_size, q_size, \ p_size_field, q_size_field, op) ({ \ size_t __fortify_size = (size_t)(size); \ - fortify_memcpy_chk(__fortify_size, p_size, q_size, \ - p_size_field, q_size_field, #op); \ + WARN_ONCE(fortify_memcpy_chk(__fortify_size, p_size, q_size, \ + p_size_field, q_size_field, #op), \ + #op ": detected field-spanning write (size %zu) of single %s (size %zu)\n", \ + __fortify_size, \ + "field \"" #p "\" at " __FILE__ ":" __stringify(__LINE__), \ + p_size_field); \ __underlying_##op(p, q, __fortify_size); \ }) /* - * __builtin_object_size() must be captured here to avoid evaluating argument - * side-effects further into the macro layers. + * Notes about compile-time buffer size detection: + * + * With these types... + * + * struct middle { + * u16 a; + * u8 middle_buf[16]; + * int b; + * }; + * struct end { + * u16 a; + * u8 end_buf[16]; + * }; + * struct flex { + * int a; + * u8 flex_buf[]; + * }; + * + * void func(TYPE *ptr) { ... } + * + * Cases where destination size cannot be currently detected: + * - the size of ptr's object (seemingly by design, gcc & clang fail): + * __builtin_object_size(ptr, 1) == SIZE_MAX + * - the size of flexible arrays in ptr's obj (by design, dynamic size): + * __builtin_object_size(ptr->flex_buf, 1) == SIZE_MAX + * - the size of ANY array at the end of ptr's obj (gcc and clang bug): + * __builtin_object_size(ptr->end_buf, 1) == SIZE_MAX + * https://gcc.gnu.org/bugzilla/show_bug.cgi?id=101836 + * + * Cases where destination size is currently detected: + * - the size of non-array members within ptr's object: + * __builtin_object_size(ptr->a, 1) == 2 + * - the size of non-flexible-array in the middle of ptr's obj: + * __builtin_object_size(ptr->middle_buf, 1) == 16 + * + */ + +/* + * __struct_size() vs __member_size() must be captured here to avoid + * evaluating argument side-effects further into the macro layers. */ #define memcpy(p, q, s) __fortify_memcpy_chk(p, q, s, \ - __builtin_object_size(p, 0), __builtin_object_size(q, 0), \ - __builtin_object_size(p, 1), __builtin_object_size(q, 1), \ + __struct_size(p), __struct_size(q), \ + __member_size(p), __member_size(q), \ memcpy) #define memmove(p, q, s) __fortify_memcpy_chk(p, q, s, \ - __builtin_object_size(p, 0), __builtin_object_size(q, 0), \ - __builtin_object_size(p, 1), __builtin_object_size(q, 1), \ + __struct_size(p), __struct_size(q), \ + __member_size(p), __member_size(q), \ memmove) extern void *__real_memscan(void *, int, __kernel_size_t) __RENAME(memscan); __FORTIFY_INLINE void *memscan(void * const POS0 p, int c, __kernel_size_t size) { - size_t p_size = __builtin_object_size(p, 0); + size_t p_size = __struct_size(p); - if (__builtin_constant_p(size) && p_size < size) + if (__compiletime_lessthan(p_size, size)) __read_overflow(); if (p_size < size) fortify_panic(__func__); @@ -406,13 +515,13 @@ __FORTIFY_INLINE void *memscan(void * const POS0 p, int c, __kernel_size_t size) __FORTIFY_INLINE __diagnose_as(__builtin_memcmp, 1, 2, 3) int memcmp(const void * const POS0 p, const void * const POS0 q, __kernel_size_t size) { - size_t p_size = __builtin_object_size(p, 0); - size_t q_size = __builtin_object_size(q, 0); + size_t p_size = __struct_size(p); + size_t q_size = __struct_size(q); if (__builtin_constant_p(size)) { - if (p_size < size) + if (__compiletime_lessthan(p_size, size)) __read_overflow(); - if (q_size < size) + if (__compiletime_lessthan(q_size, size)) __read_overflow2(); } if (p_size < size || q_size < size) @@ -423,9 +532,9 @@ int memcmp(const void * const POS0 p, const void * const POS0 q, __kernel_size_t __FORTIFY_INLINE __diagnose_as(__builtin_memchr, 1, 2, 3) void *memchr(const void * const POS0 p, int c, __kernel_size_t size) { - size_t p_size = __builtin_object_size(p, 0); + size_t p_size = __struct_size(p); - if (__builtin_constant_p(size) && p_size < size) + if (__compiletime_lessthan(p_size, size)) __read_overflow(); if (p_size < size) fortify_panic(__func__); @@ -435,9 +544,9 @@ void *memchr(const void * const POS0 p, int c, __kernel_size_t size) void *__real_memchr_inv(const void *s, int c, size_t n) __RENAME(memchr_inv); __FORTIFY_INLINE void *memchr_inv(const void * const POS0 p, int c, size_t size) { - size_t p_size = __builtin_object_size(p, 0); + size_t p_size = __struct_size(p); - if (__builtin_constant_p(size) && p_size < size) + if (__compiletime_lessthan(p_size, size)) __read_overflow(); if (p_size < size) fortify_panic(__func__); @@ -447,9 +556,9 @@ __FORTIFY_INLINE void *memchr_inv(const void * const POS0 p, int c, size_t size) extern void *__real_kmemdup(const void *src, size_t len, gfp_t gfp) __RENAME(kmemdup); __FORTIFY_INLINE void *kmemdup(const void * const POS0 p, size_t size, gfp_t gfp) { - size_t p_size = __builtin_object_size(p, 0); + size_t p_size = __struct_size(p); - if (__builtin_constant_p(size) && p_size < size) + if (__compiletime_lessthan(p_size, size)) __read_overflow(); if (p_size < size) fortify_panic(__func__); @@ -460,16 +569,18 @@ __FORTIFY_INLINE void *kmemdup(const void * const POS0 p, size_t size, gfp_t gfp __FORTIFY_INLINE __diagnose_as(__builtin_strcpy, 1, 2) char *strcpy(char * const POS p, const char * const POS q) { - size_t p_size = __builtin_object_size(p, 1); - size_t q_size = __builtin_object_size(q, 1); + size_t p_size = __member_size(p); + size_t q_size = __member_size(q); size_t size; /* If neither buffer size is known, immediately give up. */ - if (p_size == (size_t)-1 && q_size == (size_t)-1) + if (__builtin_constant_p(p_size) && + __builtin_constant_p(q_size) && + p_size == SIZE_MAX && q_size == SIZE_MAX) return __underlying_strcpy(p, q); size = strlen(q) + 1; /* Compile-time check for const size overflow. */ - if (__builtin_constant_p(size) && p_size < size) + if (__compiletime_lessthan(p_size, size)) __write_overflow(); /* Run-time check for dynamic size overflow. */ if (p_size < size) diff --git a/include/linux/fs.h b/include/linux/fs.h index 9eced4cc286e..0830486f47ef 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -1472,7 +1472,7 @@ struct super_block { const struct xattr_handler **s_xattr; #ifdef CONFIG_FS_ENCRYPTION const struct fscrypt_operations *s_cop; - struct key *s_master_keys; /* master crypto keys in use */ + struct fscrypt_keyring *s_master_keys; /* master crypto keys in use */ #endif #ifdef CONFIG_FS_VERITY const struct fsverity_operations *s_vop; diff --git a/include/linux/fscrypt.h b/include/linux/fscrypt.h index 7d2f1e0f23b1..cad78b569c7e 100644 --- a/include/linux/fscrypt.h +++ b/include/linux/fscrypt.h @@ -161,24 +161,21 @@ struct fscrypt_operations { int *ino_bits_ret, int *lblk_bits_ret); /* - * Return the number of block devices to which the filesystem may write - * encrypted file contents. + * Return an array of pointers to the block devices to which the + * filesystem may write encrypted file contents, NULL if the filesystem + * only has a single such block device, or an ERR_PTR() on error. + * + * On successful non-NULL return, *num_devs is set to the number of + * devices in the returned array. The caller must free the returned + * array using kfree(). * * If the filesystem can use multiple block devices (other than block * devices that aren't used for encrypted file contents, such as * external journal devices), and wants to support inline encryption, * then it must implement this function. Otherwise it's not needed. */ - int (*get_num_devices)(struct super_block *sb); - - /* - * If ->get_num_devices() returns a value greater than 1, then this - * function is called to get the array of request_queues that the - * filesystem is using -- one per block device. (There may be duplicate - * entries in this array, as block devices can share a request_queue.) - */ - void (*get_devices)(struct super_block *sb, - struct request_queue **devs); + struct block_device **(*get_devices)(struct super_block *sb, + unsigned int *num_devs); }; static inline struct fscrypt_info *fscrypt_get_info(const struct inode *inode) @@ -295,8 +292,6 @@ int fscrypt_parse_test_dummy_encryption(const struct fs_parameter *param, struct fscrypt_dummy_policy *dummy_policy); bool fscrypt_dummy_policies_equal(const struct fscrypt_dummy_policy *p1, const struct fscrypt_dummy_policy *p2); -int fscrypt_set_test_dummy_encryption(struct super_block *sb, const char *arg, - struct fscrypt_dummy_policy *dummy_policy); void fscrypt_show_test_dummy_encryption(struct seq_file *seq, char sep, struct super_block *sb); static inline bool @@ -312,7 +307,7 @@ fscrypt_free_dummy_policy(struct fscrypt_dummy_policy *dummy_policy) } /* keyring.c */ -void fscrypt_sb_free(struct super_block *sb); +void fscrypt_sb_delete(struct super_block *sb); int fscrypt_ioctl_add_key(struct file *filp, void __user *arg); int fscrypt_add_test_dummy_key(struct super_block *sb, const struct fscrypt_dummy_policy *dummy_policy); @@ -353,7 +348,7 @@ u64 fscrypt_fname_siphash(const struct inode *dir, const struct qstr *name); int fscrypt_d_revalidate(struct dentry *dentry, unsigned int flags); /* bio.c */ -void fscrypt_decrypt_bio(struct bio *bio); +bool fscrypt_decrypt_bio(struct bio *bio); int fscrypt_zeroout_range(const struct inode *inode, pgoff_t lblk, sector_t pblk, unsigned int len); @@ -526,7 +521,7 @@ fscrypt_free_dummy_policy(struct fscrypt_dummy_policy *dummy_policy) } /* keyring.c */ -static inline void fscrypt_sb_free(struct super_block *sb) +static inline void fscrypt_sb_delete(struct super_block *sb) { } @@ -646,8 +641,9 @@ static inline int fscrypt_d_revalidate(struct dentry *dentry, } /* bio.c */ -static inline void fscrypt_decrypt_bio(struct bio *bio) +static inline bool fscrypt_decrypt_bio(struct bio *bio) { + return true; } static inline int fscrypt_zeroout_range(const struct inode *inode, pgoff_t lblk, @@ -768,7 +764,7 @@ bool fscrypt_mergeable_bio(struct bio *bio, const struct inode *inode, bool fscrypt_mergeable_bio_bh(struct bio *bio, const struct buffer_head *next_bh); -bool fscrypt_dio_supported(struct kiocb *iocb, struct iov_iter *iter); +bool fscrypt_dio_supported(struct inode *inode); u64 fscrypt_limit_io_blocks(const struct inode *inode, u64 lblk, u64 nr_blocks); @@ -801,11 +797,8 @@ static inline bool fscrypt_mergeable_bio_bh(struct bio *bio, return true; } -static inline bool fscrypt_dio_supported(struct kiocb *iocb, - struct iov_iter *iter) +static inline bool fscrypt_dio_supported(struct inode *inode) { - const struct inode *inode = file_inode(iocb->ki_filp); - return !fscrypt_needs_contents_encryption(inode); } diff --git a/include/linux/init.h b/include/linux/init.h index baf0b29a7010..a0a90cd73ebe 100644 --- a/include/linux/init.h +++ b/include/linux/init.h @@ -47,7 +47,7 @@ /* These are for everybody (although not all archs will actually discard it in modules) */ -#define __init __section(".init.text") __cold __latent_entropy __noinitretpoline __nocfi +#define __init __section(".init.text") __cold __latent_entropy __noinitretpoline #define __initdata __section(".init.data") #define __initconst __section(".init.rodata") #define __exitdata __section(".exit.data") @@ -220,8 +220,8 @@ extern bool initcall_debug; __initcall_name(initstub, __iid, id) #define __define_initcall_stub(__stub, fn) \ - int __init __cficanonical __stub(void); \ - int __init __cficanonical __stub(void) \ + int __init __stub(void); \ + int __init __stub(void) \ { \ return fn(); \ } \ diff --git a/include/linux/kallsyms.h b/include/linux/kallsyms.h index ad39636e0c3f..649faac31ddb 100644 --- a/include/linux/kallsyms.h +++ b/include/linux/kallsyms.h @@ -15,7 +15,7 @@ #include <asm/sections.h> -#define KSYM_NAME_LEN 128 +#define KSYM_NAME_LEN 512 #define KSYM_SYMBOL_LEN (sizeof("%s+%#lx/%#lx [%s %s]") + \ (KSYM_NAME_LEN - 1) + \ 2*(BITS_PER_LONG*3/10) + (MODULE_NAME_LEN - 1) + \ diff --git a/include/linux/key.h b/include/linux/key.h index 7febc4881363..d27477faf00d 100644 --- a/include/linux/key.h +++ b/include/linux/key.h @@ -88,6 +88,12 @@ enum key_need_perm { KEY_DEFER_PERM_CHECK, /* Special: permission check is deferred */ }; +enum key_lookup_flag { + KEY_LOOKUP_CREATE = 0x01, + KEY_LOOKUP_PARTIAL = 0x02, + KEY_LOOKUP_ALL = (KEY_LOOKUP_CREATE | KEY_LOOKUP_PARTIAL), +}; + struct seq_file; struct user_struct; struct signal_struct; diff --git a/include/linux/kprobes.h b/include/linux/kprobes.h index 55041d2f884d..a0b92be98984 100644 --- a/include/linux/kprobes.h +++ b/include/linux/kprobes.h @@ -103,6 +103,7 @@ struct kprobe { * this flag is only for optimized_kprobe. */ #define KPROBE_FLAG_FTRACE 8 /* probe is using ftrace */ +#define KPROBE_FLAG_ON_FUNC_ENTRY 16 /* probe is on the function entry */ /* Has this kprobe gone ? */ static inline bool kprobe_gone(struct kprobe *p) diff --git a/include/linux/lsm_hook_defs.h b/include/linux/lsm_hook_defs.h index 60fff133c0b1..f8715ddbfcf4 100644 --- a/include/linux/lsm_hook_defs.h +++ b/include/linux/lsm_hook_defs.h @@ -224,6 +224,7 @@ LSM_HOOK(int, -ENOSYS, task_prctl, int option, unsigned long arg2, unsigned long arg3, unsigned long arg4, unsigned long arg5) LSM_HOOK(void, LSM_RET_VOID, task_to_inode, struct task_struct *p, struct inode *inode) +LSM_HOOK(int, 0, userns_create, const struct cred *cred) LSM_HOOK(int, 0, ipc_permission, struct kern_ipc_perm *ipcp, short flag) LSM_HOOK(void, LSM_RET_VOID, ipc_getsecid, struct kern_ipc_perm *ipcp, u32 *secid) diff --git a/include/linux/lsm_hooks.h b/include/linux/lsm_hooks.h index 3aa6030302f5..4ec80b96c22e 100644 --- a/include/linux/lsm_hooks.h +++ b/include/linux/lsm_hooks.h @@ -806,6 +806,10 @@ * security attributes, e.g. for /proc/pid inodes. * @p contains the task_struct for the task. * @inode contains the inode structure for the inode. + * @userns_create: + * Check permission prior to creating a new user namespace. + * @cred points to prepared creds. + * Return 0 if successful, otherwise < 0 error code. * * Security hooks for Netlink messaging. * diff --git a/include/linux/mdio/mdio-i2c.h b/include/linux/mdio/mdio-i2c.h index b1d27f7cd23f..65b550a6fc32 100644 --- a/include/linux/mdio/mdio-i2c.h +++ b/include/linux/mdio/mdio-i2c.h @@ -11,6 +11,14 @@ struct device; struct i2c_adapter; struct mii_bus; -struct mii_bus *mdio_i2c_alloc(struct device *parent, struct i2c_adapter *i2c); +enum mdio_i2c_proto { + MDIO_I2C_NONE, + MDIO_I2C_MARVELL_C22, + MDIO_I2C_C45, + MDIO_I2C_ROLLBALL, +}; + +struct mii_bus *mdio_i2c_alloc(struct device *parent, struct i2c_adapter *i2c, + enum mdio_i2c_proto protocol); #endif diff --git a/include/linux/mlx5/driver.h b/include/linux/mlx5/driver.h index f8ecb33105d3..a12929bc31b2 100644 --- a/include/linux/mlx5/driver.h +++ b/include/linux/mlx5/driver.h @@ -1017,6 +1017,7 @@ void mlx5_health_cleanup(struct mlx5_core_dev *dev); int mlx5_health_init(struct mlx5_core_dev *dev); void mlx5_start_health_poll(struct mlx5_core_dev *dev); void mlx5_stop_health_poll(struct mlx5_core_dev *dev, bool disable_health); +void mlx5_start_health_fw_log_up(struct mlx5_core_dev *dev); void mlx5_drain_health_wq(struct mlx5_core_dev *dev); void mlx5_trigger_health_work(struct mlx5_core_dev *dev); int mlx5_frag_buf_alloc_node(struct mlx5_core_dev *dev, int size, @@ -1288,4 +1289,8 @@ static inline bool mlx5_get_roce_state(struct mlx5_core_dev *dev) return mlx5_is_roce_on(dev); } +enum { + MLX5_OCTWORD = 16, +}; + #endif /* MLX5_DRIVER_H */ diff --git a/include/linux/mlx5/mlx5_ifc.h b/include/linux/mlx5/mlx5_ifc.h index 1ad762e22d86..06574d430ff5 100644 --- a/include/linux/mlx5/mlx5_ifc.h +++ b/include/linux/mlx5/mlx5_ifc.h @@ -1491,7 +1491,9 @@ struct mlx5_ifc_cmd_hca_cap_bits { u8 reserved_at_120[0xa]; u8 log_max_ra_req_dc[0x6]; - u8 reserved_at_130[0x9]; + u8 reserved_at_130[0x2]; + u8 eth_wqe_too_small[0x1]; + u8 reserved_at_133[0x6]; u8 vnic_env_cq_overrun[0x1]; u8 log_max_ra_res_dc[0x6]; @@ -3537,7 +3539,9 @@ struct mlx5_ifc_vnic_diagnostic_statistics_bits { u8 cq_overrun[0x20]; - u8 reserved_at_220[0xde0]; + u8 eth_wqe_too_small[0x20]; + + u8 reserved_at_220[0xdc0]; }; struct mlx5_ifc_traffic_counter_bits { diff --git a/include/linux/module.h b/include/linux/module.h index 518296ea7f73..ec61fb53979a 100644 --- a/include/linux/module.h +++ b/include/linux/module.h @@ -27,7 +27,6 @@ #include <linux/tracepoint-defs.h> #include <linux/srcu.h> #include <linux/static_call_types.h> -#include <linux/cfi.h> #include <linux/percpu.h> #include <asm/module.h> @@ -132,7 +131,7 @@ extern void cleanup_module(void); { return initfn; } \ int init_module(void) __copy(initfn) \ __attribute__((alias(#initfn))); \ - __CFI_ADDRESSABLE(init_module, __initdata); + ___ADDRESSABLE(init_module, __initdata); /* This is only required if you want to be unloadable. */ #define module_exit(exitfn) \ @@ -140,7 +139,7 @@ extern void cleanup_module(void); { return exitfn; } \ void cleanup_module(void) __copy(exitfn) \ __attribute__((alias(#exitfn))); \ - __CFI_ADDRESSABLE(cleanup_module, __exitdata); + ___ADDRESSABLE(cleanup_module, __exitdata); #endif @@ -387,8 +386,9 @@ struct module { const s32 *crcs; unsigned int num_syms; -#ifdef CONFIG_CFI_CLANG - cfi_check_fn cfi_check; +#ifdef CONFIG_ARCH_USES_CFI_TRAPS + s32 *kcfi_traps; + s32 *kcfi_traps_end; #endif /* Kernel parameters. */ diff --git a/include/linux/once.h b/include/linux/once.h index b14d8b309d52..bc714d414448 100644 --- a/include/linux/once.h +++ b/include/linux/once.h @@ -5,10 +5,18 @@ #include <linux/types.h> #include <linux/jump_label.h> +/* Helpers used from arbitrary contexts. + * Hard irqs are blocked, be cautious. + */ bool __do_once_start(bool *done, unsigned long *flags); void __do_once_done(bool *done, struct static_key_true *once_key, unsigned long *flags, struct module *mod); +/* Variant for process contexts only. */ +bool __do_once_sleepable_start(bool *done); +void __do_once_sleepable_done(bool *done, struct static_key_true *once_key, + struct module *mod); + /* Call a function exactly once. The idea of DO_ONCE() is to perform * a function call such as initialization of random seeds, etc, only * once, where DO_ONCE() can live in the fast-path. After @func has @@ -52,7 +60,27 @@ void __do_once_done(bool *done, struct static_key_true *once_key, ___ret; \ }) +/* Variant of DO_ONCE() for process/sleepable contexts. */ +#define DO_ONCE_SLEEPABLE(func, ...) \ + ({ \ + bool ___ret = false; \ + static bool __section(".data.once") ___done = false; \ + static DEFINE_STATIC_KEY_TRUE(___once_key); \ + if (static_branch_unlikely(&___once_key)) { \ + ___ret = __do_once_sleepable_start(&___done); \ + if (unlikely(___ret)) { \ + func(__VA_ARGS__); \ + __do_once_sleepable_done(&___done, &___once_key,\ + THIS_MODULE); \ + } \ + } \ + ___ret; \ + }) + #define get_random_once(buf, nbytes) \ DO_ONCE(get_random_bytes, (buf), (nbytes)) +#define get_random_sleepable_once(buf, nbytes) \ + DO_ONCE_SLEEPABLE(get_random_bytes, (buf), (nbytes)) + #endif /* _LINUX_ONCE_H */ diff --git a/include/linux/overflow.h b/include/linux/overflow.h index 0eb3b192f07a..19dfdd74835e 100644 --- a/include/linux/overflow.h +++ b/include/linux/overflow.h @@ -51,40 +51,50 @@ static inline bool __must_check __must_check_overflow(bool overflow) return unlikely(overflow); } -/* - * For simplicity and code hygiene, the fallback code below insists on - * a, b and *d having the same type (similar to the min() and max() - * macros), whereas gcc's type-generic overflow checkers accept - * different types. Hence we don't just make check_add_overflow an - * alias for __builtin_add_overflow, but add type checks similar to - * below. +/** check_add_overflow() - Calculate addition with overflow checking + * + * @a: first addend + * @b: second addend + * @d: pointer to store sum + * + * Returns 0 on success. + * + * *@d holds the results of the attempted addition, but is not considered + * "safe for use" on a non-zero return value, which indicates that the + * sum has overflowed or been truncated. */ -#define check_add_overflow(a, b, d) __must_check_overflow(({ \ - typeof(a) __a = (a); \ - typeof(b) __b = (b); \ - typeof(d) __d = (d); \ - (void) (&__a == &__b); \ - (void) (&__a == __d); \ - __builtin_add_overflow(__a, __b, __d); \ -})) +#define check_add_overflow(a, b, d) \ + __must_check_overflow(__builtin_add_overflow(a, b, d)) -#define check_sub_overflow(a, b, d) __must_check_overflow(({ \ - typeof(a) __a = (a); \ - typeof(b) __b = (b); \ - typeof(d) __d = (d); \ - (void) (&__a == &__b); \ - (void) (&__a == __d); \ - __builtin_sub_overflow(__a, __b, __d); \ -})) +/** check_sub_overflow() - Calculate subtraction with overflow checking + * + * @a: minuend; value to subtract from + * @b: subtrahend; value to subtract from @a + * @d: pointer to store difference + * + * Returns 0 on success. + * + * *@d holds the results of the attempted subtraction, but is not considered + * "safe for use" on a non-zero return value, which indicates that the + * difference has underflowed or been truncated. + */ +#define check_sub_overflow(a, b, d) \ + __must_check_overflow(__builtin_sub_overflow(a, b, d)) -#define check_mul_overflow(a, b, d) __must_check_overflow(({ \ - typeof(a) __a = (a); \ - typeof(b) __b = (b); \ - typeof(d) __d = (d); \ - (void) (&__a == &__b); \ - (void) (&__a == __d); \ - __builtin_mul_overflow(__a, __b, __d); \ -})) +/** check_mul_overflow() - Calculate multiplication with overflow checking + * + * @a: first factor + * @b: second factor + * @d: pointer to store product + * + * Returns 0 on success. + * + * *@d holds the results of the attempted multiplication, but is not + * considered "safe for use" on a non-zero return value, which indicates + * that the product has overflowed or been truncated. + */ +#define check_mul_overflow(a, b, d) \ + __must_check_overflow(__builtin_mul_overflow(a, b, d)) /** check_shl_overflow() - Calculate a left-shifted value and check overflow * diff --git a/include/linux/pci.h b/include/linux/pci.h index 060af91bafcd..5da0846aa3c1 100644 --- a/include/linux/pci.h +++ b/include/linux/pci.h @@ -2019,8 +2019,8 @@ enum pci_fixup_pass { #ifdef CONFIG_LTO_CLANG #define __DECLARE_PCI_FIXUP_SECTION(sec, name, vendor, device, class, \ class_shift, hook, stub) \ - void __cficanonical stub(struct pci_dev *dev); \ - void __cficanonical stub(struct pci_dev *dev) \ + void stub(struct pci_dev *dev); \ + void stub(struct pci_dev *dev) \ { \ hook(dev); \ } \ diff --git a/include/linux/pci_ids.h b/include/linux/pci_ids.h index 15b49e655ce3..b362d90eb9b0 100644 --- a/include/linux/pci_ids.h +++ b/include/linux/pci_ids.h @@ -75,6 +75,9 @@ #define PCI_CLASS_COMMUNICATION_MODEM 0x0703 #define PCI_CLASS_COMMUNICATION_OTHER 0x0780 +/* Interface for SERIAL/MODEM */ +#define PCI_SERIAL_16550_COMPATIBLE 0x02 + #define PCI_BASE_CLASS_SYSTEM 0x08 #define PCI_CLASS_SYSTEM_PIC 0x0800 #define PCI_CLASS_SYSTEM_PIC_IOAPIC 0x080010 diff --git a/include/linux/phy.h b/include/linux/phy.h index 9c66f357f489..ddf66198f751 100644 --- a/include/linux/phy.h +++ b/include/linux/phy.h @@ -571,6 +571,7 @@ struct macsec_ops; * @advertising: Currently advertised linkmodes * @adv_old: Saved advertised while power saving for WoL * @lp_advertising: Current link partner advertised linkmodes + * @host_interfaces: PHY interface modes supported by host * @eee_broken_modes: Energy efficient ethernet modes which should be prohibited * @autoneg: Flag autoneg being used * @rate_matching: Current rate matching mode @@ -596,6 +597,7 @@ struct macsec_ops; * @master_slave_get: Current master/slave advertisement * @master_slave_state: Current master/slave configuration * @mii_ts: Pointer to time stamper callbacks + * @psec: Pointer to Power Sourcing Equipment control struct * @lock: Mutex for serialization access to PHY * @state_queue: Work queue for state machine * @shared: Pointer to private data shared by phys in one package @@ -670,6 +672,9 @@ struct phy_device { /* used with phy_speed_down */ __ETHTOOL_DECLARE_LINK_MODE_MASK(adv_old); + /* Host supported PHY interface types. Should be ignored if empty. */ + DECLARE_PHY_INTERFACE_MASK(host_interfaces); + /* Energy efficient ethernet modes which should be prohibited */ u32 eee_broken_modes; @@ -711,6 +716,7 @@ struct phy_device { struct phylink *phylink; struct net_device *attached_dev; struct mii_timestamper *mii_ts; + struct pse_control *psec; u8 mdix; u8 mdix_ctrl; diff --git a/include/linux/platform_data/tps68470.h b/include/linux/platform_data/tps68470.h index 126d082c3f2e..e605a2cab07f 100644 --- a/include/linux/platform_data/tps68470.h +++ b/include/linux/platform_data/tps68470.h @@ -27,9 +27,14 @@ struct tps68470_regulator_platform_data { const struct regulator_init_data *reg_init_data[TPS68470_NUM_REGULATORS]; }; -struct tps68470_clk_platform_data { +struct tps68470_clk_consumer { const char *consumer_dev_name; const char *consumer_con_id; }; +struct tps68470_clk_platform_data { + unsigned int n_consumers; + struct tps68470_clk_consumer consumers[]; +}; + #endif diff --git a/include/linux/poison.h b/include/linux/poison.h index d62ef5a6b4e9..2d3249eb0e62 100644 --- a/include/linux/poison.h +++ b/include/linux/poison.h @@ -81,4 +81,7 @@ /********** net/core/page_pool.c **********/ #define PP_SIGNATURE (0x40 + POISON_POINTER_DELTA) +/********** kernel/bpf/ **********/ +#define BPF_PTR_POISON ((void *)(0xeB9FUL + POISON_POINTER_DELTA)) + #endif diff --git a/include/linux/posix_acl_xattr.h b/include/linux/posix_acl_xattr.h index b6bd3eac2bcc..8163dd48c430 100644 --- a/include/linux/posix_acl_xattr.h +++ b/include/linux/posix_acl_xattr.h @@ -38,9 +38,6 @@ void posix_acl_fix_xattr_to_user(void *value, size_t size); void posix_acl_getxattr_idmapped_mnt(struct user_namespace *mnt_userns, const struct inode *inode, void *value, size_t size); -void posix_acl_setxattr_idmapped_mnt(struct user_namespace *mnt_userns, - const struct inode *inode, - void *value, size_t size); #else static inline void posix_acl_fix_xattr_from_user(void *value, size_t size) { @@ -54,18 +51,15 @@ posix_acl_getxattr_idmapped_mnt(struct user_namespace *mnt_userns, size_t size) { } -static inline void -posix_acl_setxattr_idmapped_mnt(struct user_namespace *mnt_userns, - const struct inode *inode, void *value, - size_t size) -{ -} #endif struct posix_acl *posix_acl_from_xattr(struct user_namespace *user_ns, const void *value, size_t size); int posix_acl_to_xattr(struct user_namespace *user_ns, const struct posix_acl *acl, void *buffer, size_t size); +struct posix_acl *vfs_set_acl_prepare(struct user_namespace *mnt_userns, + struct user_namespace *fs_userns, + const void *value, size_t size); extern const struct xattr_handler posix_acl_access_xattr_handler; extern const struct xattr_handler posix_acl_default_xattr_handler; diff --git a/include/linux/pse-pd/pse.h b/include/linux/pse-pd/pse.h new file mode 100644 index 000000000000..fb724c65c77b --- /dev/null +++ b/include/linux/pse-pd/pse.h @@ -0,0 +1,129 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* +// Copyright (c) 2022 Pengutronix, Oleksij Rempel <kernel@pengutronix.de> + */ +#ifndef _LINUX_PSE_CONTROLLER_H +#define _LINUX_PSE_CONTROLLER_H + +#include <linux/ethtool.h> +#include <linux/list.h> +#include <uapi/linux/ethtool.h> + +struct phy_device; +struct pse_controller_dev; + +/** + * struct pse_control_config - PSE control/channel configuration. + * + * @admin_cotrol: set PoDL PSE admin control as described in + * IEEE 802.3-2018 30.15.1.2.1 acPoDLPSEAdminControl + */ +struct pse_control_config { + enum ethtool_podl_pse_admin_state admin_cotrol; +}; + +/** + * struct pse_control_status - PSE control/channel status. + * + * @podl_admin_state: operational state of the PoDL PSE + * functions. IEEE 802.3-2018 30.15.1.1.2 aPoDLPSEAdminState + * @podl_pw_status: power detection status of the PoDL PSE. + * IEEE 802.3-2018 30.15.1.1.3 aPoDLPSEPowerDetectionStatus: + */ +struct pse_control_status { + enum ethtool_podl_pse_admin_state podl_admin_state; + enum ethtool_podl_pse_pw_d_status podl_pw_status; +}; + +/** + * struct pse_controller_ops - PSE controller driver callbacks + * + * @ethtool_get_status: get PSE control status for ethtool interface + * @ethtool_set_config: set PSE control configuration over ethtool interface + */ +struct pse_controller_ops { + int (*ethtool_get_status)(struct pse_controller_dev *pcdev, + unsigned long id, struct netlink_ext_ack *extack, + struct pse_control_status *status); + int (*ethtool_set_config)(struct pse_controller_dev *pcdev, + unsigned long id, struct netlink_ext_ack *extack, + const struct pse_control_config *config); +}; + +struct module; +struct device_node; +struct of_phandle_args; +struct pse_control; + +/** + * struct pse_controller_dev - PSE controller entity that might + * provide multiple PSE controls + * @ops: a pointer to device specific struct pse_controller_ops + * @owner: kernel module of the PSE controller driver + * @list: internal list of PSE controller devices + * @pse_control_head: head of internal list of requested PSE controls + * @dev: corresponding driver model device struct + * @of_pse_n_cells: number of cells in PSE line specifiers + * @of_xlate: translation function to translate from specifier as found in the + * device tree to id as given to the PSE control ops + * @nr_lines: number of PSE controls in this controller device + * @lock: Mutex for serialization access to the PSE controller + */ +struct pse_controller_dev { + const struct pse_controller_ops *ops; + struct module *owner; + struct list_head list; + struct list_head pse_control_head; + struct device *dev; + int of_pse_n_cells; + int (*of_xlate)(struct pse_controller_dev *pcdev, + const struct of_phandle_args *pse_spec); + unsigned int nr_lines; + struct mutex lock; +}; + +#if IS_ENABLED(CONFIG_PSE_CONTROLLER) +int pse_controller_register(struct pse_controller_dev *pcdev); +void pse_controller_unregister(struct pse_controller_dev *pcdev); +struct device; +int devm_pse_controller_register(struct device *dev, + struct pse_controller_dev *pcdev); + +struct pse_control *of_pse_control_get(struct device_node *node); +void pse_control_put(struct pse_control *psec); + +int pse_ethtool_get_status(struct pse_control *psec, + struct netlink_ext_ack *extack, + struct pse_control_status *status); +int pse_ethtool_set_config(struct pse_control *psec, + struct netlink_ext_ack *extack, + const struct pse_control_config *config); + +#else + +static inline struct pse_control *of_pse_control_get(struct device_node *node) +{ + return ERR_PTR(-ENOENT); +} + +static inline void pse_control_put(struct pse_control *psec) +{ +} + +static inline int pse_ethtool_get_status(struct pse_control *psec, + struct netlink_ext_ack *extack, + struct pse_control_status *status) +{ + return -ENOTSUPP; +} + +static inline int pse_ethtool_set_config(struct pse_control *psec, + struct netlink_ext_ack *extack, + const struct pse_control_config *config) +{ + return -ENOTSUPP; +} + +#endif + +#endif diff --git a/include/linux/rcupdate.h b/include/linux/rcupdate.h index f527f27e6438..08605ce7379d 100644 --- a/include/linux/rcupdate.h +++ b/include/linux/rcupdate.h @@ -42,7 +42,31 @@ void call_rcu(struct rcu_head *head, rcu_callback_t func); void rcu_barrier_tasks(void); void rcu_barrier_tasks_rude(void); void synchronize_rcu(void); + +struct rcu_gp_oldstate; unsigned long get_completed_synchronize_rcu(void); +void get_completed_synchronize_rcu_full(struct rcu_gp_oldstate *rgosp); + +// Maximum number of unsigned long values corresponding to +// not-yet-completed RCU grace periods. +#define NUM_ACTIVE_RCU_POLL_OLDSTATE 2 + +/** + * same_state_synchronize_rcu - Are two old-state values identical? + * @oldstate1: First old-state value. + * @oldstate2: Second old-state value. + * + * The two old-state values must have been obtained from either + * get_state_synchronize_rcu(), start_poll_synchronize_rcu(), or + * get_completed_synchronize_rcu(). Returns @true if the two values are + * identical and @false otherwise. This allows structures whose lifetimes + * are tracked by old-state values to push these values to a list header, + * allowing those structures to be slightly smaller. + */ +static inline bool same_state_synchronize_rcu(unsigned long oldstate1, unsigned long oldstate2) +{ + return oldstate1 == oldstate2; +} #ifdef CONFIG_PREEMPT_RCU @@ -496,13 +520,21 @@ do { \ * against NULL. Although rcu_access_pointer() may also be used in cases * where update-side locks prevent the value of the pointer from changing, * you should instead use rcu_dereference_protected() for this use case. + * Within an RCU read-side critical section, there is little reason to + * use rcu_access_pointer(). + * + * It is usually best to test the rcu_access_pointer() return value + * directly in order to avoid accidental dereferences being introduced + * by later inattentive changes. In other words, assigning the + * rcu_access_pointer() return value to a local variable results in an + * accident waiting to happen. * * It is also permissible to use rcu_access_pointer() when read-side - * access to the pointer was removed at least one grace period ago, as - * is the case in the context of the RCU callback that is freeing up - * the data, or after a synchronize_rcu() returns. This can be useful - * when tearing down multi-linked structures after a grace period - * has elapsed. + * access to the pointer was removed at least one grace period ago, as is + * the case in the context of the RCU callback that is freeing up the data, + * or after a synchronize_rcu() returns. This can be useful when tearing + * down multi-linked structures after a grace period has elapsed. However, + * rcu_dereference_protected() is normally preferred for this use case. */ #define rcu_access_pointer(p) __rcu_access_pointer((p), __UNIQUE_ID(rcu), __rcu) diff --git a/include/linux/rcutiny.h b/include/linux/rcutiny.h index 62815c0a2dce..768196a5f39d 100644 --- a/include/linux/rcutiny.h +++ b/include/linux/rcutiny.h @@ -14,25 +14,75 @@ #include <asm/param.h> /* for HZ */ +struct rcu_gp_oldstate { + unsigned long rgos_norm; +}; + +// Maximum number of rcu_gp_oldstate values corresponding to +// not-yet-completed RCU grace periods. +#define NUM_ACTIVE_RCU_POLL_FULL_OLDSTATE 2 + +/* + * Are the two oldstate values the same? See the Tree RCU version for + * docbook header. + */ +static inline bool same_state_synchronize_rcu_full(struct rcu_gp_oldstate *rgosp1, + struct rcu_gp_oldstate *rgosp2) +{ + return rgosp1->rgos_norm == rgosp2->rgos_norm; +} + unsigned long get_state_synchronize_rcu(void); + +static inline void get_state_synchronize_rcu_full(struct rcu_gp_oldstate *rgosp) +{ + rgosp->rgos_norm = get_state_synchronize_rcu(); +} + unsigned long start_poll_synchronize_rcu(void); + +static inline void start_poll_synchronize_rcu_full(struct rcu_gp_oldstate *rgosp) +{ + rgosp->rgos_norm = start_poll_synchronize_rcu(); +} + bool poll_state_synchronize_rcu(unsigned long oldstate); +static inline bool poll_state_synchronize_rcu_full(struct rcu_gp_oldstate *rgosp) +{ + return poll_state_synchronize_rcu(rgosp->rgos_norm); +} + static inline void cond_synchronize_rcu(unsigned long oldstate) { might_sleep(); } +static inline void cond_synchronize_rcu_full(struct rcu_gp_oldstate *rgosp) +{ + cond_synchronize_rcu(rgosp->rgos_norm); +} + static inline unsigned long start_poll_synchronize_rcu_expedited(void) { return start_poll_synchronize_rcu(); } +static inline void start_poll_synchronize_rcu_expedited_full(struct rcu_gp_oldstate *rgosp) +{ + rgosp->rgos_norm = start_poll_synchronize_rcu_expedited(); +} + static inline void cond_synchronize_rcu_expedited(unsigned long oldstate) { cond_synchronize_rcu(oldstate); } +static inline void cond_synchronize_rcu_expedited_full(struct rcu_gp_oldstate *rgosp) +{ + cond_synchronize_rcu_expedited(rgosp->rgos_norm); +} + extern void rcu_barrier(void); static inline void synchronize_rcu_expedited(void) diff --git a/include/linux/rcutree.h b/include/linux/rcutree.h index 47eaa4cb0df7..5efb51486e8a 100644 --- a/include/linux/rcutree.h +++ b/include/linux/rcutree.h @@ -40,12 +40,52 @@ bool rcu_eqs_special_set(int cpu); void rcu_momentary_dyntick_idle(void); void kfree_rcu_scheduler_running(void); bool rcu_gp_might_be_stalled(void); + +struct rcu_gp_oldstate { + unsigned long rgos_norm; + unsigned long rgos_exp; +}; + +// Maximum number of rcu_gp_oldstate values corresponding to +// not-yet-completed RCU grace periods. +#define NUM_ACTIVE_RCU_POLL_FULL_OLDSTATE 4 + +/** + * same_state_synchronize_rcu_full - Are two old-state values identical? + * @rgosp1: First old-state value. + * @rgosp2: Second old-state value. + * + * The two old-state values must have been obtained from either + * get_state_synchronize_rcu_full(), start_poll_synchronize_rcu_full(), + * or get_completed_synchronize_rcu_full(). Returns @true if the two + * values are identical and @false otherwise. This allows structures + * whose lifetimes are tracked by old-state values to push these values + * to a list header, allowing those structures to be slightly smaller. + * + * Note that equality is judged on a bitwise basis, so that an + * @rcu_gp_oldstate structure with an already-completed state in one field + * will compare not-equal to a structure with an already-completed state + * in the other field. After all, the @rcu_gp_oldstate structure is opaque + * so how did such a situation come to pass in the first place? + */ +static inline bool same_state_synchronize_rcu_full(struct rcu_gp_oldstate *rgosp1, + struct rcu_gp_oldstate *rgosp2) +{ + return rgosp1->rgos_norm == rgosp2->rgos_norm && rgosp1->rgos_exp == rgosp2->rgos_exp; +} + unsigned long start_poll_synchronize_rcu_expedited(void); +void start_poll_synchronize_rcu_expedited_full(struct rcu_gp_oldstate *rgosp); void cond_synchronize_rcu_expedited(unsigned long oldstate); +void cond_synchronize_rcu_expedited_full(struct rcu_gp_oldstate *rgosp); unsigned long get_state_synchronize_rcu(void); +void get_state_synchronize_rcu_full(struct rcu_gp_oldstate *rgosp); unsigned long start_poll_synchronize_rcu(void); +void start_poll_synchronize_rcu_full(struct rcu_gp_oldstate *rgosp); bool poll_state_synchronize_rcu(unsigned long oldstate); +bool poll_state_synchronize_rcu_full(struct rcu_gp_oldstate *rgosp); void cond_synchronize_rcu(unsigned long oldstate); +void cond_synchronize_rcu_full(struct rcu_gp_oldstate *rgosp); bool rcu_is_idle_cpu(int cpu); diff --git a/include/linux/resctrl.h b/include/linux/resctrl.h index 21deb5212bbd..0cf5b20c6ddf 100644 --- a/include/linux/resctrl.h +++ b/include/linux/resctrl.h @@ -15,6 +15,9 @@ int proc_resctrl_show(struct seq_file *m, #endif +/* max value for struct rdt_domain's mbps_val */ +#define MBA_MAX_MBPS U32_MAX + /** * enum resctrl_conf_type - The type of configuration. * @CDP_NONE: No prioritisation, both code and data are controlled or monitored. @@ -29,6 +32,16 @@ enum resctrl_conf_type { #define CDP_NUM_TYPES (CDP_DATA + 1) +/* + * Event IDs, the values match those used to program IA32_QM_EVTSEL before + * reading IA32_QM_CTR on RDT systems. + */ +enum resctrl_event_id { + QOS_L3_OCCUP_EVENT_ID = 0x01, + QOS_L3_MBM_TOTAL_EVENT_ID = 0x02, + QOS_L3_MBM_LOCAL_EVENT_ID = 0x03, +}; + /** * struct resctrl_staged_config - parsed configuration to be applied * @new_ctrl: new ctrl value to be loaded @@ -53,6 +66,9 @@ struct resctrl_staged_config { * @cqm_work_cpu: worker CPU for CQM h/w counters * @plr: pseudo-locked region (if any) associated with domain * @staged_config: parsed configuration to be applied + * @mbps_val: When mba_sc is enabled, this holds the array of user + * specified control values for mba_sc in MBps, indexed + * by closid */ struct rdt_domain { struct list_head list; @@ -67,6 +83,7 @@ struct rdt_domain { int cqm_work_cpu; struct pseudo_lock_region *plr; struct resctrl_staged_config staged_config[CDP_NUM_TYPES]; + u32 *mbps_val; }; /** @@ -130,8 +147,6 @@ struct resctrl_schema; /** * struct rdt_resource - attributes of a resctrl resource * @rid: The index of the resource - * @alloc_enabled: Is allocation enabled on this machine - * @mon_enabled: Is monitoring enabled for this feature * @alloc_capable: Is allocation available on this machine * @mon_capable: Is monitor feature available on this machine * @num_rmid: Number of RMIDs available @@ -150,8 +165,6 @@ struct resctrl_schema; */ struct rdt_resource { int rid; - bool alloc_enabled; - bool mon_enabled; bool alloc_capable; bool mon_capable; int num_rmid; @@ -194,7 +207,50 @@ struct resctrl_schema { /* The number of closid supported by this resource regardless of CDP */ u32 resctrl_arch_get_num_closid(struct rdt_resource *r); int resctrl_arch_update_domains(struct rdt_resource *r, u32 closid); + +/* + * Update the ctrl_val and apply this config right now. + * Must be called on one of the domain's CPUs. + */ +int resctrl_arch_update_one(struct rdt_resource *r, struct rdt_domain *d, + u32 closid, enum resctrl_conf_type t, u32 cfg_val); + u32 resctrl_arch_get_config(struct rdt_resource *r, struct rdt_domain *d, u32 closid, enum resctrl_conf_type type); +int resctrl_online_domain(struct rdt_resource *r, struct rdt_domain *d); +void resctrl_offline_domain(struct rdt_resource *r, struct rdt_domain *d); + +/** + * resctrl_arch_rmid_read() - Read the eventid counter corresponding to rmid + * for this resource and domain. + * @r: resource that the counter should be read from. + * @d: domain that the counter should be read from. + * @rmid: rmid of the counter to read. + * @eventid: eventid to read, e.g. L3 occupancy. + * @val: result of the counter read in bytes. + * + * Call from process context on a CPU that belongs to domain @d. + * + * Return: + * 0 on success, or -EIO, -EINVAL etc on error. + */ +int resctrl_arch_rmid_read(struct rdt_resource *r, struct rdt_domain *d, + u32 rmid, enum resctrl_event_id eventid, u64 *val); + +/** + * resctrl_arch_reset_rmid() - Reset any private state associated with rmid + * and eventid. + * @r: The domain's resource. + * @d: The rmid's domain. + * @rmid: The rmid whose counter values should be reset. + * @eventid: The eventid whose counter values should be reset. + * + * This can be called from any CPU. + */ +void resctrl_arch_reset_rmid(struct rdt_resource *r, struct rdt_domain *d, + u32 rmid, enum resctrl_event_id eventid); + +extern unsigned int resctrl_rmid_realloc_threshold; +extern unsigned int resctrl_rmid_realloc_limit; #endif /* _RESCTRL_H */ diff --git a/include/linux/security.h b/include/linux/security.h index 7bd0c490703d..3480f61e1b2d 100644 --- a/include/linux/security.h +++ b/include/linux/security.h @@ -437,6 +437,7 @@ int security_task_kill(struct task_struct *p, struct kernel_siginfo *info, int security_task_prctl(int option, unsigned long arg2, unsigned long arg3, unsigned long arg4, unsigned long arg5); void security_task_to_inode(struct task_struct *p, struct inode *inode); +int security_create_user_ns(const struct cred *cred); int security_ipc_permission(struct kern_ipc_perm *ipcp, short flag); void security_ipc_getsecid(struct kern_ipc_perm *ipcp, u32 *secid); int security_msg_msg_alloc(struct msg_msg *msg); @@ -1194,6 +1195,11 @@ static inline int security_task_prctl(int option, unsigned long arg2, static inline void security_task_to_inode(struct task_struct *p, struct inode *inode) { } +static inline int security_create_user_ns(const struct cred *cred) +{ + return 0; +} + static inline int security_ipc_permission(struct kern_ipc_perm *ipcp, short flag) { diff --git a/include/linux/sfp.h b/include/linux/sfp.h index 302094b855fb..d1f343853b6c 100644 --- a/include/linux/sfp.h +++ b/include/linux/sfp.h @@ -535,7 +535,7 @@ int sfp_parse_port(struct sfp_bus *bus, const struct sfp_eeprom_id *id, unsigned long *support); bool sfp_may_have_phy(struct sfp_bus *bus, const struct sfp_eeprom_id *id); void sfp_parse_support(struct sfp_bus *bus, const struct sfp_eeprom_id *id, - unsigned long *support); + unsigned long *support, unsigned long *interfaces); phy_interface_t sfp_select_interface(struct sfp_bus *bus, unsigned long *link_modes); @@ -568,7 +568,8 @@ static inline bool sfp_may_have_phy(struct sfp_bus *bus, static inline void sfp_parse_support(struct sfp_bus *bus, const struct sfp_eeprom_id *id, - unsigned long *support) + unsigned long *support, + unsigned long *interfaces) { } diff --git a/include/linux/srcutiny.h b/include/linux/srcutiny.h index 6cfaa0a9a9b9..5aa5e0faf6a1 100644 --- a/include/linux/srcutiny.h +++ b/include/linux/srcutiny.h @@ -15,10 +15,10 @@ struct srcu_struct { short srcu_lock_nesting[2]; /* srcu_read_lock() nesting depth. */ - unsigned short srcu_idx; /* Current reader array element in bit 0x2. */ - unsigned short srcu_idx_max; /* Furthest future srcu_idx request. */ u8 srcu_gp_running; /* GP workqueue running? */ u8 srcu_gp_waiting; /* GP waiting for readers? */ + unsigned long srcu_idx; /* Current reader array element in bit 0x2. */ + unsigned long srcu_idx_max; /* Furthest future srcu_idx request. */ struct swait_queue_head srcu_wq; /* Last srcu_read_unlock() wakes GP. */ struct rcu_head *srcu_cb_head; /* Pending callbacks: Head. */ @@ -82,10 +82,12 @@ static inline void srcu_torture_stats_print(struct srcu_struct *ssp, int idx; idx = ((data_race(READ_ONCE(ssp->srcu_idx)) + 1) & 0x2) >> 1; - pr_alert("%s%s Tiny SRCU per-CPU(idx=%d): (%hd,%hd)\n", + pr_alert("%s%s Tiny SRCU per-CPU(idx=%d): (%hd,%hd) gp: %lu->%lu\n", tt, tf, idx, data_race(READ_ONCE(ssp->srcu_lock_nesting[!idx])), - data_race(READ_ONCE(ssp->srcu_lock_nesting[idx]))); + data_race(READ_ONCE(ssp->srcu_lock_nesting[idx])), + data_race(READ_ONCE(ssp->srcu_idx)), + data_race(READ_ONCE(ssp->srcu_idx_max))); } #endif diff --git a/include/linux/stat.h b/include/linux/stat.h index 7df06931f25d..ff277ced50e9 100644 --- a/include/linux/stat.h +++ b/include/linux/stat.h @@ -50,6 +50,8 @@ struct kstat { struct timespec64 btime; /* File creation time */ u64 blocks; u64 mnt_id; + u32 dio_mem_align; + u32 dio_offset_align; }; #endif diff --git a/include/linux/string.h b/include/linux/string.h index 61ec7e4f6311..cf7607b32102 100644 --- a/include/linux/string.h +++ b/include/linux/string.h @@ -261,6 +261,49 @@ void memcpy_and_pad(void *dest, size_t dest_len, const void *src, size_t count, int pad); /** + * strtomem_pad - Copy NUL-terminated string to non-NUL-terminated buffer + * + * @dest: Pointer of destination character array (marked as __nonstring) + * @src: Pointer to NUL-terminated string + * @pad: Padding character to fill any remaining bytes of @dest after copy + * + * This is a replacement for strncpy() uses where the destination is not + * a NUL-terminated string, but with bounds checking on the source size, and + * an explicit padding character. If padding is not required, use strtomem(). + * + * Note that the size of @dest is not an argument, as the length of @dest + * must be discoverable by the compiler. + */ +#define strtomem_pad(dest, src, pad) do { \ + const size_t _dest_len = __builtin_object_size(dest, 1); \ + \ + BUILD_BUG_ON(!__builtin_constant_p(_dest_len) || \ + _dest_len == (size_t)-1); \ + memcpy_and_pad(dest, _dest_len, src, strnlen(src, _dest_len), pad); \ +} while (0) + +/** + * strtomem - Copy NUL-terminated string to non-NUL-terminated buffer + * + * @dest: Pointer of destination character array (marked as __nonstring) + * @src: Pointer to NUL-terminated string + * + * This is a replacement for strncpy() uses where the destination is not + * a NUL-terminated string, but with bounds checking on the source size, and + * without trailing padding. If padding is required, use strtomem_pad(). + * + * Note that the size of @dest is not an argument, as the length of @dest + * must be discoverable by the compiler. + */ +#define strtomem(dest, src) do { \ + const size_t _dest_len = __builtin_object_size(dest, 1); \ + \ + BUILD_BUG_ON(!__builtin_constant_p(_dest_len) || \ + _dest_len == (size_t)-1); \ + memcpy(dest, src, min(_dest_len, strnlen(src, _dest_len))); \ +} while (0) + +/** * memset_after - Set a value after a struct member to the end of a struct * * @obj: Address of target struct instance diff --git a/include/linux/sunrpc/svc.h b/include/linux/sunrpc/svc.h index daecb009c05b..88de45491376 100644 --- a/include/linux/sunrpc/svc.h +++ b/include/linux/sunrpc/svc.h @@ -472,6 +472,7 @@ struct svc_procedure { /* XDR free result: */ void (*pc_release)(struct svc_rqst *); unsigned int pc_argsize; /* argument struct size */ + unsigned int pc_argzero; /* how much of argument to clear */ unsigned int pc_ressize; /* result struct size */ unsigned int pc_cachetype; /* cache info (NFS) */ unsigned int pc_xdrressize; /* maximum size of XDR reply */ @@ -544,16 +545,27 @@ static inline void svc_reserve_auth(struct svc_rqst *rqstp, int space) } /** - * svcxdr_init_decode - Prepare an xdr_stream for svc Call decoding + * svcxdr_init_decode - Prepare an xdr_stream for Call decoding * @rqstp: controlling server RPC transaction context * + * This function currently assumes the RPC header in rq_arg has + * already been decoded. Upon return, xdr->p points to the + * location of the upper layer header. */ static inline void svcxdr_init_decode(struct svc_rqst *rqstp) { struct xdr_stream *xdr = &rqstp->rq_arg_stream; - struct kvec *argv = rqstp->rq_arg.head; + struct xdr_buf *buf = &rqstp->rq_arg; + struct kvec *argv = buf->head; - xdr_init_decode(xdr, &rqstp->rq_arg, argv->iov_base, NULL); + /* + * svc_getnl() and friends do not keep the xdr_buf's ::len + * field up to date. Refresh that field before initializing + * the argument decoding stream. + */ + buf->len = buf->head->iov_len + buf->page_len + buf->tail->iov_len; + + xdr_init_decode(xdr, buf, argv->iov_base, NULL); xdr_set_scratch_page(xdr, rqstp->rq_scratch_page); } @@ -576,7 +588,7 @@ static inline void svcxdr_init_encode(struct svc_rqst *rqstp) xdr->end = resv->iov_base + PAGE_SIZE - rqstp->rq_auth_slack; buf->len = resv->iov_len; xdr->page_ptr = buf->pages - 1; - buf->buflen = PAGE_SIZE * (1 + rqstp->rq_page_end - buf->pages); + buf->buflen = PAGE_SIZE * (rqstp->rq_page_end - buf->pages); buf->buflen -= rqstp->rq_auth_slack; xdr->rqst = NULL; } diff --git a/include/linux/sunrpc/xdr.h b/include/linux/sunrpc/xdr.h index 69175029abbb..f84e2a1358e1 100644 --- a/include/linux/sunrpc/xdr.h +++ b/include/linux/sunrpc/xdr.h @@ -240,6 +240,8 @@ typedef int (*kxdrdproc_t)(struct rpc_rqst *rqstp, struct xdr_stream *xdr, extern void xdr_init_encode(struct xdr_stream *xdr, struct xdr_buf *buf, __be32 *p, struct rpc_rqst *rqst); +extern void xdr_init_encode_pages(struct xdr_stream *xdr, struct xdr_buf *buf, + struct page **pages, struct rpc_rqst *rqst); extern __be32 *xdr_reserve_space(struct xdr_stream *xdr, size_t nbytes); extern int xdr_reserve_space_vec(struct xdr_stream *xdr, struct kvec *vec, size_t nbytes); diff --git a/include/linux/suspend.h b/include/linux/suspend.h index 70f2921e2e70..23a253df7f6b 100644 --- a/include/linux/suspend.h +++ b/include/linux/suspend.h @@ -75,7 +75,7 @@ extern struct suspend_stats suspend_stats; static inline void dpm_save_failed_dev(const char *name) { - strlcpy(suspend_stats.failed_devs[suspend_stats.last_failed_dev], + strscpy(suspend_stats.failed_devs[suspend_stats.last_failed_dev], name, sizeof(suspend_stats.failed_devs[0])); suspend_stats.last_failed_dev++; diff --git a/include/linux/tcp.h b/include/linux/tcp.h index a9fbe22732c3..41b1da621a45 100644 --- a/include/linux/tcp.h +++ b/include/linux/tcp.h @@ -295,7 +295,7 @@ struct tcp_sock { u32 packets_out; /* Packets which are "in flight" */ u32 retrans_out; /* Retransmitted packets out */ u32 max_packets_out; /* max packets_out in last window */ - u32 max_packets_seq; /* right edge of max_packets_out flight */ + u32 cwnd_usage_seq; /* right edge of cwnd usage tracking flight */ u16 urg_data; /* Saved octet of OOB data and control flags */ u8 ecn_flags; /* ECN status bits. */ @@ -388,6 +388,12 @@ struct tcp_sock { u8 bpf_sock_ops_cb_flags; /* Control calling BPF programs * values defined in uapi/linux/tcp.h */ + u8 bpf_chg_cc_inprogress:1; /* In the middle of + * bpf_setsockopt(TCP_CONGESTION), + * it is to avoid the bpf_tcp_cc->init() + * to recur itself by calling + * bpf_setsockopt(TCP_CONGESTION, "itself"). + */ #define BPF_SOCK_OPS_TEST_FLAG(TP, ARG) (TP->bpf_sock_ops_cb_flags & ARG) #else #define BPF_SOCK_OPS_TEST_FLAG(TP, ARG) 0 diff --git a/include/linux/thermal.h b/include/linux/thermal.h index 1386c713885d..6f1ec4fb7ef8 100644 --- a/include/linux/thermal.h +++ b/include/linux/thermal.h @@ -17,8 +17,6 @@ #include <linux/workqueue.h> #include <uapi/linux/thermal.h> -#define THERMAL_MAX_TRIPS 12 - /* invalid cooling state */ #define THERMAL_CSTATE_INVALID -1UL @@ -296,82 +294,53 @@ struct thermal_zone_params { int offset; }; -/** - * struct thermal_zone_of_device_ops - callbacks for handling DT based zones - * - * Mandatory: - * @get_temp: a pointer to a function that reads the sensor temperature. - * - * Optional: - * @get_trend: a pointer to a function that reads the sensor temperature trend. - * @set_trips: a pointer to a function that sets a temperature window. When - * this window is left the driver must inform the thermal core via - * thermal_zone_device_update. - * @set_emul_temp: a pointer to a function that sets sensor emulated - * temperature. - * @set_trip_temp: a pointer to a function that sets the trip temperature on - * hardware. - * @change_mode: a pointer to a function that notifies the thermal zone - * mode change. - */ -struct thermal_zone_of_device_ops { - int (*get_temp)(void *, int *); - int (*get_trend)(void *, int, enum thermal_trend *); - int (*set_trips)(void *, int, int); - int (*set_emul_temp)(void *, int); - int (*set_trip_temp)(void *, int, int); - int (*change_mode) (void *, enum thermal_device_mode); -}; - /* Function declarations */ #ifdef CONFIG_THERMAL_OF +struct thermal_zone_device *thermal_of_zone_register(struct device_node *sensor, int id, void *data, + const struct thermal_zone_device_ops *ops); + +struct thermal_zone_device *devm_thermal_of_zone_register(struct device *dev, int id, void *data, + const struct thermal_zone_device_ops *ops); + +void thermal_of_zone_unregister(struct thermal_zone_device *tz); + +void devm_thermal_of_zone_unregister(struct device *dev, struct thermal_zone_device *tz); + +void thermal_of_zone_unregister(struct thermal_zone_device *tz); + int thermal_zone_of_get_sensor_id(struct device_node *tz_np, struct device_node *sensor_np, u32 *id); -struct thermal_zone_device * -thermal_zone_of_sensor_register(struct device *dev, int id, void *data, - const struct thermal_zone_of_device_ops *ops); -void thermal_zone_of_sensor_unregister(struct device *dev, - struct thermal_zone_device *tz); -struct thermal_zone_device *devm_thermal_zone_of_sensor_register( - struct device *dev, int id, void *data, - const struct thermal_zone_of_device_ops *ops); -void devm_thermal_zone_of_sensor_unregister(struct device *dev, - struct thermal_zone_device *tz); #else - -static inline int thermal_zone_of_get_sensor_id(struct device_node *tz_np, - struct device_node *sensor_np, - u32 *id) -{ - return -ENOENT; -} -static inline struct thermal_zone_device * -thermal_zone_of_sensor_register(struct device *dev, int id, void *data, - const struct thermal_zone_of_device_ops *ops) +static inline +struct thermal_zone_device *thermal_of_zone_register(struct device_node *sensor, int id, void *data, + const struct thermal_zone_device_ops *ops) { - return ERR_PTR(-ENODEV); + return ERR_PTR(-ENOTSUPP); } static inline -void thermal_zone_of_sensor_unregister(struct device *dev, - struct thermal_zone_device *tz) +struct thermal_zone_device *devm_thermal_of_zone_register(struct device *dev, int id, void *data, + const struct thermal_zone_device_ops *ops) { + return ERR_PTR(-ENOTSUPP); } -static inline struct thermal_zone_device *devm_thermal_zone_of_sensor_register( - struct device *dev, int id, void *data, - const struct thermal_zone_of_device_ops *ops) +static inline void thermal_of_zone_unregister(struct thermal_zone_device *tz) { - return ERR_PTR(-ENODEV); } -static inline -void devm_thermal_zone_of_sensor_unregister(struct device *dev, - struct thermal_zone_device *tz) +static inline void devm_thermal_of_zone_unregister(struct device *dev, + struct thermal_zone_device *tz) { } +static inline int thermal_zone_of_get_sensor_id(struct device_node *tz_np, + struct device_node *sensor_np, + u32 *id) +{ + return -ENOENT; +} #endif #ifdef CONFIG_THERMAL diff --git a/include/linux/verification.h b/include/linux/verification.h index a655923335ae..f34e50ebcf60 100644 --- a/include/linux/verification.h +++ b/include/linux/verification.h @@ -17,6 +17,14 @@ #define VERIFY_USE_SECONDARY_KEYRING ((struct key *)1UL) #define VERIFY_USE_PLATFORM_KEYRING ((struct key *)2UL) +static inline int system_keyring_id_check(u64 id) +{ + if (id > (unsigned long)VERIFY_USE_PLATFORM_KEYRING) + return -EINVAL; + + return 0; +} + /* * The use to which an asymmetric key is being put. */ diff --git a/include/linux/wireless.h b/include/linux/wireless.h index 2d1b54556eff..e6e34d74dda0 100644 --- a/include/linux/wireless.h +++ b/include/linux/wireless.h @@ -26,7 +26,15 @@ struct compat_iw_point { struct __compat_iw_event { __u16 len; /* Real length of this stuff */ __u16 cmd; /* Wireless IOCTL */ - compat_caddr_t pointer; + + union { + compat_caddr_t pointer; + + /* we need ptr_bytes to make memcpy() run-time destination + * buffer bounds checking happy, nothing special + */ + DECLARE_FLEX_ARRAY(__u8, ptr_bytes); + }; }; #define IW_EV_COMPAT_LCP_LEN offsetof(struct __compat_iw_event, pointer) #define IW_EV_COMPAT_POINT_OFF offsetof(struct compat_iw_point, length) diff --git a/include/linux/xattr.h b/include/linux/xattr.h index 979a9d3e5bfb..4c379d23ec6e 100644 --- a/include/linux/xattr.h +++ b/include/linux/xattr.h @@ -61,7 +61,7 @@ int __vfs_setxattr_locked(struct user_namespace *, struct dentry *, const char *, const void *, size_t, int, struct inode **); int vfs_setxattr(struct user_namespace *, struct dentry *, const char *, - void *, size_t, int); + const void *, size_t, int); int __vfs_removexattr(struct user_namespace *, struct dentry *, const char *); int __vfs_removexattr_locked(struct user_namespace *, struct dentry *, const char *, struct inode **); diff --git a/include/net/bluetooth/bluetooth.h b/include/net/bluetooth/bluetooth.h index e72f3b247b5e..bcc5a4cd2c17 100644 --- a/include/net/bluetooth/bluetooth.h +++ b/include/net/bluetooth/bluetooth.h @@ -627,6 +627,7 @@ static inline bool iso_enabled(void) int mgmt_init(void); void mgmt_exit(void); +void mgmt_cleanup(struct sock *sk); void bt_sock_reclassify_lock(struct sock *sk, int proto); diff --git a/include/net/bluetooth/hci.h b/include/net/bluetooth/hci.h index cf29511b25a8..e004ba04a9ae 100644 --- a/include/net/bluetooth/hci.h +++ b/include/net/bluetooth/hci.h @@ -354,6 +354,10 @@ enum { HCI_LE_SIMULTANEOUS_ROLES, HCI_CMD_DRAIN_WORKQUEUE, + HCI_MESH_EXPERIMENTAL, + HCI_MESH, + HCI_MESH_SENDING, + __HCI_NUM_FLAGS, }; diff --git a/include/net/bluetooth/hci_core.h b/include/net/bluetooth/hci_core.h index e7862903187d..c54bc71254af 100644 --- a/include/net/bluetooth/hci_core.h +++ b/include/net/bluetooth/hci_core.h @@ -238,6 +238,7 @@ struct adv_info { bool enabled; bool pending; bool periodic; + __u8 mesh; __u8 instance; __u32 flags; __u16 timeout; @@ -372,6 +373,8 @@ struct hci_dev { __u8 le_resolv_list_size; __u8 le_num_of_adv_sets; __u8 le_states[8]; + __u8 mesh_ad_types[16]; + __u8 mesh_send_ref; __u8 commands[64]; __u8 hci_ver; __u16 hci_rev; @@ -511,6 +514,7 @@ struct hci_dev { struct list_head cmd_sync_work_list; struct mutex cmd_sync_work_lock; struct work_struct cmd_sync_cancel_work; + struct work_struct reenable_adv_work; __u16 discov_timeout; struct delayed_work discov_off; @@ -561,6 +565,7 @@ struct hci_dev { struct hci_conn_hash conn_hash; + struct list_head mesh_pending; struct list_head mgmt_pending; struct list_head reject_list; struct list_head accept_list; @@ -614,6 +619,8 @@ struct hci_dev { struct delayed_work rpa_expired; bdaddr_t rpa; + struct delayed_work mesh_send_done; + enum { INTERLEAVE_SCAN_NONE, INTERLEAVE_SCAN_NO_FILTER, @@ -1576,7 +1583,8 @@ struct adv_info *hci_add_adv_instance(struct hci_dev *hdev, u8 instance, u32 flags, u16 adv_data_len, u8 *adv_data, u16 scan_rsp_len, u8 *scan_rsp_data, u16 timeout, u16 duration, s8 tx_power, - u32 min_interval, u32 max_interval); + u32 min_interval, u32 max_interval, + u8 mesh_handle); struct adv_info *hci_add_per_instance(struct hci_dev *hdev, u8 instance, u32 flags, u8 data_len, u8 *data, u32 min_interval, u32 max_interval); @@ -1997,6 +2005,9 @@ void hci_mgmt_chan_unregister(struct hci_mgmt_chan *c); #define DISCOV_LE_FAST_ADV_INT_MAX 0x00F0 /* 150 msec */ #define DISCOV_LE_PER_ADV_INT_MIN 0x00A0 /* 200 msec */ #define DISCOV_LE_PER_ADV_INT_MAX 0x00A0 /* 200 msec */ +#define DISCOV_LE_ADV_MESH_MIN 0x00A0 /* 100 msec */ +#define DISCOV_LE_ADV_MESH_MAX 0x00A0 /* 100 msec */ +#define INTERVAL_TO_MS(x) (((x) * 10) / 0x10) #define NAME_RESOLVE_DURATION msecs_to_jiffies(10240) /* 10.24 sec */ @@ -2048,7 +2059,8 @@ void mgmt_start_discovery_complete(struct hci_dev *hdev, u8 status); void mgmt_stop_discovery_complete(struct hci_dev *hdev, u8 status); void mgmt_device_found(struct hci_dev *hdev, bdaddr_t *bdaddr, u8 link_type, u8 addr_type, u8 *dev_class, s8 rssi, u32 flags, - u8 *eir, u16 eir_len, u8 *scan_rsp, u8 scan_rsp_len); + u8 *eir, u16 eir_len, u8 *scan_rsp, u8 scan_rsp_len, + u64 instant); void mgmt_remote_name(struct hci_dev *hdev, bdaddr_t *bdaddr, u8 link_type, u8 addr_type, s8 rssi, u8 *name, u8 name_len); void mgmt_discovering(struct hci_dev *hdev, u8 discovering); @@ -2075,6 +2087,7 @@ int mgmt_phy_configuration_changed(struct hci_dev *hdev, struct sock *skip); void mgmt_adv_monitor_device_lost(struct hci_dev *hdev, u16 handle, bdaddr_t *bdaddr, u8 addr_type); +int hci_abort_conn(struct hci_conn *conn, u8 reason); u8 hci_le_conn_update(struct hci_conn *conn, u16 min, u16 max, u16 latency, u16 to_multiplier); void hci_le_start_enc(struct hci_conn *conn, __le16 ediv, __le64 rand, diff --git a/include/net/bluetooth/hci_sync.h b/include/net/bluetooth/hci_sync.h index 3843f5060c73..17f5a4c32f36 100644 --- a/include/net/bluetooth/hci_sync.h +++ b/include/net/bluetooth/hci_sync.h @@ -16,6 +16,7 @@ struct hci_cmd_sync_work_entry { hci_cmd_sync_work_destroy_t destroy; }; +struct adv_info; /* Function with sync suffix shall not be called with hdev->lock held as they * wait the command to complete and in the meantime an event could be received * which could attempt to acquire hdev->lock causing a deadlock. @@ -51,11 +52,16 @@ int hci_update_class_sync(struct hci_dev *hdev); int hci_update_name_sync(struct hci_dev *hdev); int hci_write_ssp_mode_sync(struct hci_dev *hdev, u8 mode); +int hci_get_random_address(struct hci_dev *hdev, bool require_privacy, + bool use_rpa, struct adv_info *adv_instance, + u8 *own_addr_type, bdaddr_t *rand_addr); + int hci_update_random_address_sync(struct hci_dev *hdev, bool require_privacy, bool rpa, u8 *own_addr_type); int hci_update_scan_rsp_data_sync(struct hci_dev *hdev, u8 instance); int hci_update_adv_data_sync(struct hci_dev *hdev, u8 instance); +int hci_update_adv_data(struct hci_dev *hdev, u8 instance); int hci_schedule_adv_instance_sync(struct hci_dev *hdev, u8 instance, bool force); @@ -72,7 +78,8 @@ int hci_start_per_adv_sync(struct hci_dev *hdev, u8 instance, u8 data_len, int hci_remove_advertising_sync(struct hci_dev *hdev, struct sock *sk, u8 instance, bool force); int hci_disable_advertising_sync(struct hci_dev *hdev); - +int hci_clear_adv_instance_sync(struct hci_dev *hdev, struct sock *sk, + u8 instance, bool force); int hci_update_passive_scan_sync(struct hci_dev *hdev); int hci_update_passive_scan(struct hci_dev *hdev); int hci_read_rssi_sync(struct hci_dev *hdev, __le16 handle); diff --git a/include/net/bluetooth/mgmt.h b/include/net/bluetooth/mgmt.h index 7c1ad0f6fcec..743f6f59dff8 100644 --- a/include/net/bluetooth/mgmt.h +++ b/include/net/bluetooth/mgmt.h @@ -837,6 +837,42 @@ struct mgmt_cp_add_adv_patterns_monitor_rssi { struct mgmt_adv_pattern patterns[]; } __packed; #define MGMT_ADD_ADV_PATTERNS_MONITOR_RSSI_SIZE 8 +#define MGMT_OP_SET_MESH_RECEIVER 0x0057 +struct mgmt_cp_set_mesh { + __u8 enable; + __le16 window; + __le16 period; + __u8 num_ad_types; + __u8 ad_types[]; +} __packed; +#define MGMT_SET_MESH_RECEIVER_SIZE 6 + +#define MGMT_OP_MESH_READ_FEATURES 0x0058 +#define MGMT_MESH_READ_FEATURES_SIZE 0 +#define MESH_HANDLES_MAX 3 +struct mgmt_rp_mesh_read_features { + __le16 index; + __u8 max_handles; + __u8 used_handles; + __u8 handles[MESH_HANDLES_MAX]; +} __packed; + +#define MGMT_OP_MESH_SEND 0x0059 +struct mgmt_cp_mesh_send { + struct mgmt_addr_info addr; + __le64 instant; + __le16 delay; + __u8 cnt; + __u8 adv_data_len; + __u8 adv_data[]; +} __packed; +#define MGMT_MESH_SEND_SIZE 19 + +#define MGMT_OP_MESH_SEND_CANCEL 0x005A +struct mgmt_cp_mesh_send_cancel { + __u8 handle; +} __packed; +#define MGMT_MESH_SEND_CANCEL_SIZE 1 #define MGMT_EV_CMD_COMPLETE 0x0001 struct mgmt_ev_cmd_complete { @@ -1120,3 +1156,19 @@ struct mgmt_ev_adv_monitor_device_lost { __le16 monitor_handle; struct mgmt_addr_info addr; } __packed; + +#define MGMT_EV_MESH_DEVICE_FOUND 0x0031 +struct mgmt_ev_mesh_device_found { + struct mgmt_addr_info addr; + __s8 rssi; + __le64 instant; + __le32 flags; + __le16 eir_len; + __u8 eir[]; +} __packed; + + +#define MGMT_EV_MESH_PACKET_CMPLT 0x0032 +struct mgmt_ev_mesh_pkt_cmplt { + __u8 handle; +} __packed; diff --git a/include/net/devlink.h b/include/net/devlink.h index 264aa98e6da6..ba6b8b094943 100644 --- a/include/net/devlink.h +++ b/include/net/devlink.h @@ -129,7 +129,9 @@ struct devlink_port { void *type_dev; struct devlink_port_attrs attrs; u8 attrs_set:1, - switch_port:1; + switch_port:1, + registered:1, + initialized:1; struct delayed_work type_warn_dw; struct list_head reporter_list; struct mutex reporters_lock; /* Protects reporter_list */ @@ -1562,6 +1564,9 @@ void devlink_set_features(struct devlink *devlink, u64 features); void devlink_register(struct devlink *devlink); void devlink_unregister(struct devlink *devlink); void devlink_free(struct devlink *devlink); +void devlink_port_init(struct devlink *devlink, + struct devlink_port *devlink_port); +void devlink_port_fini(struct devlink_port *devlink_port); int devl_port_register(struct devlink *devlink, struct devlink_port *devlink_port, unsigned int port_index); diff --git a/include/net/dsa.h b/include/net/dsa.h index d777eac5694f..ee369670e20e 100644 --- a/include/net/dsa.h +++ b/include/net/dsa.h @@ -294,8 +294,6 @@ struct dsa_port { u8 lag_tx_enabled:1; - u8 devlink_port_setup:1; - /* Master state bits, valid only on CPU ports */ u8 master_admin_up:1; u8 master_oper_up:1; diff --git a/include/net/dst_metadata.h b/include/net/dst_metadata.h index 22a6924bf6da..a454cf4327fe 100644 --- a/include/net/dst_metadata.h +++ b/include/net/dst_metadata.h @@ -11,6 +11,7 @@ enum metadata_type { METADATA_IP_TUNNEL, METADATA_HW_PORT_MUX, METADATA_MACSEC, + METADATA_XFRM, }; struct hw_port_info { @@ -22,6 +23,11 @@ struct macsec_info { sci_t sci; }; +struct xfrm_md_info { + u32 if_id; + int link; +}; + struct metadata_dst { struct dst_entry dst; enum metadata_type type; @@ -29,6 +35,7 @@ struct metadata_dst { struct ip_tunnel_info tun_info; struct hw_port_info port_info; struct macsec_info macsec_info; + struct xfrm_md_info xfrm_info; } u; }; @@ -60,6 +67,27 @@ skb_tunnel_info(const struct sk_buff *skb) return NULL; } +static inline struct xfrm_md_info *lwt_xfrm_info(struct lwtunnel_state *lwt) +{ + return (struct xfrm_md_info *)lwt->data; +} + +static inline struct xfrm_md_info *skb_xfrm_md_info(const struct sk_buff *skb) +{ + struct metadata_dst *md_dst = skb_metadata_dst(skb); + struct dst_entry *dst; + + if (md_dst && md_dst->type == METADATA_XFRM) + return &md_dst->u.xfrm_info; + + dst = skb_dst(skb); + if (dst && dst->lwtstate && + dst->lwtstate->type == LWTUNNEL_ENCAP_XFRM) + return lwt_xfrm_info(dst->lwtstate); + + return NULL; +} + static inline bool skb_valid_dst(const struct sk_buff *skb) { struct dst_entry *dst = skb_dst(skb); @@ -92,6 +120,9 @@ static inline int skb_metadata_dst_cmp(const struct sk_buff *skb_a, case METADATA_MACSEC: return memcmp(&a->u.macsec_info, &b->u.macsec_info, sizeof(a->u.macsec_info)); + case METADATA_XFRM: + return memcmp(&a->u.xfrm_info, &b->u.xfrm_info, + sizeof(a->u.xfrm_info)); default: return 1; } diff --git a/include/net/flow.h b/include/net/flow.h index 987bd511d652..2f0da4f0318b 100644 --- a/include/net/flow.h +++ b/include/net/flow.h @@ -54,11 +54,6 @@ union flowi_uli { __u8 code; } icmpt; - struct { - __le16 dport; - __le16 sport; - } dnports; - __be32 gre_key; struct { @@ -156,27 +151,11 @@ struct flowi6 { __u32 mp_hash; } __attribute__((__aligned__(BITS_PER_LONG/8))); -struct flowidn { - struct flowi_common __fl_common; -#define flowidn_oif __fl_common.flowic_oif -#define flowidn_iif __fl_common.flowic_iif -#define flowidn_mark __fl_common.flowic_mark -#define flowidn_scope __fl_common.flowic_scope -#define flowidn_proto __fl_common.flowic_proto -#define flowidn_flags __fl_common.flowic_flags - __le16 daddr; - __le16 saddr; - union flowi_uli uli; -#define fld_sport uli.ports.sport -#define fld_dport uli.ports.dport -} __attribute__((__aligned__(BITS_PER_LONG/8))); - struct flowi { union { struct flowi_common __fl_common; struct flowi4 ip4; struct flowi6 ip6; - struct flowidn dn; } u; #define flowi_oif u.__fl_common.flowic_oif #define flowi_iif u.__fl_common.flowic_iif @@ -211,11 +190,6 @@ static inline struct flowi_common *flowi6_to_flowi_common(struct flowi6 *fl6) return &(fl6->__fl_common); } -static inline struct flowi *flowidn_to_flowi(struct flowidn *fldn) -{ - return container_of(fldn, struct flowi, u.dn); -} - __u32 __get_hash_from_flowi6(const struct flowi6 *fl6, struct flow_keys *keys); #endif diff --git a/include/net/ip_tunnels.h b/include/net/ip_tunnels.h index ced80e2f8b58..fca357679816 100644 --- a/include/net/ip_tunnels.h +++ b/include/net/ip_tunnels.h @@ -302,6 +302,12 @@ int ip_tunnel_newlink(struct net_device *dev, struct nlattr *tb[], struct ip_tunnel_parm *p, __u32 fwmark); void ip_tunnel_setup(struct net_device *dev, unsigned int net_id); +bool ip_tunnel_netlink_encap_parms(struct nlattr *data[], + struct ip_tunnel_encap *encap); + +void ip_tunnel_netlink_parms(struct nlattr *data[], + struct ip_tunnel_parm *parms); + extern const struct header_ops ip_tunnel_header_ops; __be16 ip_tunnel_parse_protocol(const struct sk_buff *skb); diff --git a/include/net/ipcomp.h b/include/net/ipcomp.h index c31108295079..8660a2a6d1fc 100644 --- a/include/net/ipcomp.h +++ b/include/net/ipcomp.h @@ -22,7 +22,7 @@ struct xfrm_state; int ipcomp_input(struct xfrm_state *x, struct sk_buff *skb); int ipcomp_output(struct xfrm_state *x, struct sk_buff *skb); void ipcomp_destroy(struct xfrm_state *x); -int ipcomp_init_state(struct xfrm_state *x); +int ipcomp_init_state(struct xfrm_state *x, struct netlink_ext_ack *extack); static inline struct ip_comp_hdr *ip_comp_hdr(const struct sk_buff *skb) { diff --git a/include/net/netfilter/nf_conntrack_bpf.h b/include/net/netfilter/nf_conntrack_bpf.h index a473b56842c5..2d0da478c8e0 100644 --- a/include/net/netfilter/nf_conntrack_bpf.h +++ b/include/net/netfilter/nf_conntrack_bpf.h @@ -3,13 +3,18 @@ #ifndef _NF_CONNTRACK_BPF_H #define _NF_CONNTRACK_BPF_H -#include <linux/btf.h> #include <linux/kconfig.h> +#include <net/netfilter/nf_conntrack.h> + +struct nf_conn___init { + struct nf_conn ct; +}; #if (IS_BUILTIN(CONFIG_NF_CONNTRACK) && IS_ENABLED(CONFIG_DEBUG_INFO_BTF)) || \ (IS_MODULE(CONFIG_NF_CONNTRACK) && IS_ENABLED(CONFIG_DEBUG_INFO_BTF_MODULES)) extern int register_nf_conntrack_bpf(void); +extern void cleanup_nf_conntrack_bpf(void); #else @@ -18,6 +23,24 @@ static inline int register_nf_conntrack_bpf(void) return 0; } +static inline void cleanup_nf_conntrack_bpf(void) +{ +} + +#endif + +#if (IS_BUILTIN(CONFIG_NF_NAT) && IS_ENABLED(CONFIG_DEBUG_INFO_BTF)) || \ + (IS_MODULE(CONFIG_NF_NAT) && IS_ENABLED(CONFIG_DEBUG_INFO_BTF_MODULES)) + +extern int register_nf_nat_bpf(void); + +#else + +static inline int register_nf_nat_bpf(void) +{ + return 0; +} + #endif #endif /* _NF_CONNTRACK_BPF_H */ diff --git a/include/net/pkt_cls.h b/include/net/pkt_cls.h index d376c995d906..4cabb32a2ad9 100644 --- a/include/net/pkt_cls.h +++ b/include/net/pkt_cls.h @@ -210,6 +210,18 @@ tcf_unbind_filter(struct tcf_proto *tp, struct tcf_result *r) __tcf_unbind_filter(q, r); } +static inline void tc_cls_bind_class(u32 classid, unsigned long cl, + void *q, struct tcf_result *res, + unsigned long base) +{ + if (res->classid == classid) { + if (cl) + __tcf_bind_filter(q, res, base); + else + __tcf_unbind_filter(q, res); + } +} + struct tcf_exts { #ifdef CONFIG_NET_CLS_ACT __u32 type; /* for backward compat(TCA_OLD_COMPAT) */ diff --git a/include/net/tcp.h b/include/net/tcp.h index 4f71cc15ff8e..14d45661a84d 100644 --- a/include/net/tcp.h +++ b/include/net/tcp.h @@ -1302,11 +1302,14 @@ static inline bool tcp_is_cwnd_limited(const struct sock *sk) { const struct tcp_sock *tp = tcp_sk(sk); + if (tp->is_cwnd_limited) + return true; + /* If in slow start, ensure cwnd grows to twice what was ACKed. */ if (tcp_in_slow_start(tp)) return tcp_snd_cwnd(tp) < 2 * tp->max_packets_out; - return tp->is_cwnd_limited; + return false; } /* BBR congestion control needs pacing. diff --git a/include/net/xfrm.h b/include/net/xfrm.h index 6e8fa98f786f..dbc81f5eb553 100644 --- a/include/net/xfrm.h +++ b/include/net/xfrm.h @@ -312,9 +312,15 @@ struct km_event { struct net *net; }; +struct xfrm_if_decode_session_result { + struct net *net; + u32 if_id; +}; + struct xfrm_if_cb { - struct xfrm_if *(*decode_session)(struct sk_buff *skb, - unsigned short family); + bool (*decode_session)(struct sk_buff *skb, + unsigned short family, + struct xfrm_if_decode_session_result *res); }; void xfrm_if_register_cb(const struct xfrm_if_cb *ifcb); @@ -399,7 +405,8 @@ struct xfrm_type { #define XFRM_TYPE_LOCAL_COADDR 4 #define XFRM_TYPE_REMOTE_COADDR 8 - int (*init_state)(struct xfrm_state *x); + int (*init_state)(struct xfrm_state *x, + struct netlink_ext_ack *extack); void (*destructor)(struct xfrm_state *); int (*input)(struct xfrm_state *, struct sk_buff *skb); int (*output)(struct xfrm_state *, struct sk_buff *pskb); @@ -985,6 +992,7 @@ void xfrm_dst_ifdown(struct dst_entry *dst, struct net_device *dev); struct xfrm_if_parms { int link; /* ifindex of underlying L2 interface */ u32 if_id; /* interface identifyer */ + bool collect_md; }; struct xfrm_if { @@ -1573,9 +1581,10 @@ int xfrm_dev_state_flush(struct net *net, struct net_device *dev, bool task_vali void xfrm_sad_getinfo(struct net *net, struct xfrmk_sadinfo *si); void xfrm_spd_getinfo(struct net *net, struct xfrmk_spdinfo *si); u32 xfrm_replay_seqhi(struct xfrm_state *x, __be32 net_seq); -int xfrm_init_replay(struct xfrm_state *x); +int xfrm_init_replay(struct xfrm_state *x, struct netlink_ext_ack *extack); u32 xfrm_state_mtu(struct xfrm_state *x, int mtu); -int __xfrm_init_state(struct xfrm_state *x, bool init_replay, bool offload); +int __xfrm_init_state(struct xfrm_state *x, bool init_replay, bool offload, + struct netlink_ext_ack *extack); int xfrm_init_state(struct xfrm_state *x); int xfrm_input(struct sk_buff *skb, int nexthdr, __be32 spi, int encap_type); int xfrm_input_resume(struct sk_buff *skb, int nexthdr); @@ -1879,7 +1888,8 @@ void xfrm_dev_resume(struct sk_buff *skb); void xfrm_dev_backlog(struct softnet_data *sd); struct sk_buff *validate_xmit_xfrm(struct sk_buff *skb, netdev_features_t features, bool *again); int xfrm_dev_state_add(struct net *net, struct xfrm_state *x, - struct xfrm_user_offload *xuo); + struct xfrm_user_offload *xuo, + struct netlink_ext_ack *extack); bool xfrm_dev_offload_ok(struct sk_buff *skb, struct xfrm_state *x); static inline void xfrm_dev_state_advance_esn(struct xfrm_state *x) @@ -1942,7 +1952,7 @@ static inline struct sk_buff *validate_xmit_xfrm(struct sk_buff *skb, netdev_fea return skb; } -static inline int xfrm_dev_state_add(struct net *net, struct xfrm_state *x, struct xfrm_user_offload *xuo) +static inline int xfrm_dev_state_add(struct net *net, struct xfrm_state *x, struct xfrm_user_offload *xuo, struct netlink_ext_ack *extack) { return 0; } diff --git a/include/net/xsk_buff_pool.h b/include/net/xsk_buff_pool.h index 647722e847b4..f787c3f524b0 100644 --- a/include/net/xsk_buff_pool.h +++ b/include/net/xsk_buff_pool.h @@ -95,7 +95,7 @@ struct xsk_buff_pool *xp_create_and_assign_umem(struct xdp_sock *xs, struct xdp_umem *umem); int xp_assign_dev(struct xsk_buff_pool *pool, struct net_device *dev, u16 queue_id, u16 flags); -int xp_assign_dev_shared(struct xsk_buff_pool *pool, struct xdp_umem *umem, +int xp_assign_dev_shared(struct xsk_buff_pool *pool, struct xdp_sock *umem_xs, struct net_device *dev, u16 queue_id); int xp_alloc_tx_descs(struct xsk_buff_pool *pool, struct xdp_sock *xs); void xp_destroy(struct xsk_buff_pool *pool); diff --git a/include/trace/events/dlm.h b/include/trace/events/dlm.h index bad21222130e..da0eaae98fa3 100644 --- a/include/trace/events/dlm.h +++ b/include/trace/events/dlm.h @@ -49,7 +49,7 @@ /* note: we begin tracing dlm_lock_start() only if ls and lkb are found */ TRACE_EVENT(dlm_lock_start, - TP_PROTO(struct dlm_ls *ls, struct dlm_lkb *lkb, void *name, + TP_PROTO(struct dlm_ls *ls, struct dlm_lkb *lkb, const void *name, unsigned int namelen, int mode, __u32 flags), TP_ARGS(ls, lkb, name, namelen, mode, flags), @@ -91,10 +91,11 @@ TRACE_EVENT(dlm_lock_start, TRACE_EVENT(dlm_lock_end, - TP_PROTO(struct dlm_ls *ls, struct dlm_lkb *lkb, void *name, - unsigned int namelen, int mode, __u32 flags, int error), + TP_PROTO(struct dlm_ls *ls, struct dlm_lkb *lkb, const void *name, + unsigned int namelen, int mode, __u32 flags, int error, + bool kernel_lock), - TP_ARGS(ls, lkb, name, namelen, mode, flags, error), + TP_ARGS(ls, lkb, name, namelen, mode, flags, error, kernel_lock), TP_STRUCT__entry( __field(__u32, ls_id) @@ -113,6 +114,7 @@ TRACE_EVENT(dlm_lock_end, __entry->lkb_id = lkb->lkb_id; __entry->mode = mode; __entry->flags = flags; + __entry->error = error; r = lkb->lkb_resource; if (r) @@ -122,14 +124,14 @@ TRACE_EVENT(dlm_lock_end, memcpy(__get_dynamic_array(res_name), name, __get_dynamic_array_len(res_name)); - /* return value will be zeroed in those cases by dlm_lock() - * we do it here again to not introduce more overhead if - * trace isn't running and error reflects the return value. - */ - if (error == -EAGAIN || error == -EDEADLK) - __entry->error = 0; - else - __entry->error = error; + if (kernel_lock) { + /* return value will be zeroed in those cases by dlm_lock() + * we do it here again to not introduce more overhead if + * trace isn't running and error reflects the return value. + */ + if (error == -EAGAIN || error == -EDEADLK) + __entry->error = 0; + } ), diff --git a/include/trace/events/erofs.h b/include/trace/events/erofs.h index 57de057bd503..4f4c44ea3a65 100644 --- a/include/trace/events/erofs.h +++ b/include/trace/events/erofs.h @@ -53,15 +53,14 @@ TRACE_EVENT(erofs_lookup, ); TRACE_EVENT(erofs_fill_inode, - TP_PROTO(struct inode *inode, int isdir), - TP_ARGS(inode, isdir), + TP_PROTO(struct inode *inode), + TP_ARGS(inode), TP_STRUCT__entry( __field(dev_t, dev ) __field(erofs_nid_t, nid ) __field(erofs_blk_t, blkaddr ) __field(unsigned int, ofs ) - __field(int, isdir ) ), TP_fast_assign( @@ -69,13 +68,11 @@ TRACE_EVENT(erofs_fill_inode, __entry->nid = EROFS_I(inode)->nid; __entry->blkaddr = erofs_blknr(iloc(EROFS_I_SB(inode), __entry->nid)); __entry->ofs = erofs_blkoff(iloc(EROFS_I_SB(inode), __entry->nid)); - __entry->isdir = isdir; ), - TP_printk("dev = (%d,%d), nid = %llu, blkaddr %u ofs %u, isdir %d", + TP_printk("dev = (%d,%d), nid = %llu, blkaddr %u ofs %u", show_dev_nid(__entry), - __entry->blkaddr, __entry->ofs, - __entry->isdir) + __entry->blkaddr, __entry->ofs) ); TRACE_EVENT(erofs_readpage, diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h index 793103b10eab..51b9aa640ad2 100644 --- a/include/uapi/linux/bpf.h +++ b/include/uapi/linux/bpf.h @@ -110,6 +110,12 @@ union bpf_iter_link_info { __u32 cgroup_fd; __u64 cgroup_id; } cgroup; + /* Parameters of task iterators. */ + struct { + __u32 tid; + __u32 pid; + __u32 pid_fd; + } task; }; /* BPF syscall commands, see bpf(2) man-page for more details. */ @@ -928,6 +934,7 @@ enum bpf_map_type { BPF_MAP_TYPE_INODE_STORAGE, BPF_MAP_TYPE_TASK_STORAGE, BPF_MAP_TYPE_BLOOM_FILTER, + BPF_MAP_TYPE_USER_RINGBUF, }; /* Note that tracing related programs such as @@ -1252,7 +1259,7 @@ enum { /* Query effective (directly attached + inherited from ancestor cgroups) * programs that will be executed for events within a cgroup. - * attach_flags with this flag are returned only for directly attached programs. + * attach_flags with this flag are always returned 0. */ #define BPF_F_QUERY_EFFECTIVE (1U << 0) @@ -1451,7 +1458,10 @@ union bpf_attr { __u32 attach_flags; __aligned_u64 prog_ids; __u32 prog_cnt; - __aligned_u64 prog_attach_flags; /* output: per-program attach_flags */ + /* output: per-program attach_flags. + * not allowed to be set during effective query. + */ + __aligned_u64 prog_attach_flags; } query; struct { /* anonymous struct used by BPF_RAW_TRACEPOINT_OPEN command */ @@ -4950,6 +4960,7 @@ union bpf_attr { * Get address of the traced function (for tracing and kprobe programs). * Return * Address of the traced function. + * 0 for kprobes placed within the function (not at the entry). * * u64 bpf_get_attach_cookie(void *ctx) * Description @@ -5079,12 +5090,12 @@ union bpf_attr { * * long bpf_get_func_arg(void *ctx, u32 n, u64 *value) * Description - * Get **n**-th argument (zero based) of the traced function (for tracing programs) + * Get **n**-th argument register (zero based) of the traced function (for tracing programs) * returned in **value**. * * Return * 0 on success. - * **-EINVAL** if n >= arguments count of traced function. + * **-EINVAL** if n >= argument register count of traced function. * * long bpf_get_func_ret(void *ctx, u64 *value) * Description @@ -5097,10 +5108,11 @@ union bpf_attr { * * long bpf_get_func_arg_cnt(void *ctx) * Description - * Get number of arguments of the traced function (for tracing programs). + * Get number of registers of the traced function (for tracing programs) where + * function arguments are stored in these registers. * * Return - * The number of arguments of the traced function. + * The number of argument registers of the traced function. * * int bpf_get_retval(void) * Description @@ -5386,6 +5398,43 @@ union bpf_attr { * Return * Current *ktime*. * + * long bpf_user_ringbuf_drain(struct bpf_map *map, void *callback_fn, void *ctx, u64 flags) + * Description + * Drain samples from the specified user ring buffer, and invoke + * the provided callback for each such sample: + * + * long (\*callback_fn)(struct bpf_dynptr \*dynptr, void \*ctx); + * + * If **callback_fn** returns 0, the helper will continue to try + * and drain the next sample, up to a maximum of + * BPF_MAX_USER_RINGBUF_SAMPLES samples. If the return value is 1, + * the helper will skip the rest of the samples and return. Other + * return values are not used now, and will be rejected by the + * verifier. + * Return + * The number of drained samples if no error was encountered while + * draining samples, or 0 if no samples were present in the ring + * buffer. If a user-space producer was epoll-waiting on this map, + * and at least one sample was drained, they will receive an event + * notification notifying them of available space in the ring + * buffer. If the BPF_RB_NO_WAKEUP flag is passed to this + * function, no wakeup notification will be sent. If the + * BPF_RB_FORCE_WAKEUP flag is passed, a wakeup notification will + * be sent even if no sample was drained. + * + * On failure, the returned value is one of the following: + * + * **-EBUSY** if the ring buffer is contended, and another calling + * context was concurrently draining the ring buffer. + * + * **-EINVAL** if user-space is not properly tracking the ring + * buffer due to the producer position not being aligned to 8 + * bytes, a sample not being aligned to 8 bytes, or the producer + * position not matching the advertised length of a sample. + * + * **-E2BIG** if user-space has tried to publish a sample which is + * larger than the size of the ring buffer, or which cannot fit + * within a struct bpf_dynptr. */ #define __BPF_FUNC_MAPPER(FN) \ FN(unspec), \ @@ -5597,6 +5646,7 @@ union bpf_attr { FN(tcp_raw_check_syncookie_ipv4), \ FN(tcp_raw_check_syncookie_ipv6), \ FN(ktime_get_tai_ns), \ + FN(user_ringbuf_drain), \ /* */ /* integer value in 'imm' field of BPF_CALL instruction selects which helper @@ -6218,6 +6268,10 @@ struct bpf_link_info { __u64 cgroup_id; __u32 order; } cgroup; + struct { + __u32 tid; + __u32 pid; + } task; }; } iter; struct { diff --git a/include/uapi/linux/dlm.h b/include/uapi/linux/dlm.h index 0d2eca287567..1923f4f3b05e 100644 --- a/include/uapi/linux/dlm.h +++ b/include/uapi/linux/dlm.h @@ -69,7 +69,6 @@ struct dlm_lksb { /* dlm_new_lockspace() flags */ #define DLM_LSFL_TIMEWARN 0x00000002 -#define DLM_LSFL_FS 0x00000004 #define DLM_LSFL_NEWEXCL 0x00000008 diff --git a/include/uapi/linux/ethtool.h b/include/uapi/linux/ethtool.h index fe9893d1485d..dc2aa3d75b39 100644 --- a/include/uapi/linux/ethtool.h +++ b/include/uapi/linux/ethtool.h @@ -737,6 +737,51 @@ enum ethtool_module_power_mode { }; /** + * enum ethtool_podl_pse_admin_state - operational state of the PoDL PSE + * functions. IEEE 802.3-2018 30.15.1.1.2 aPoDLPSEAdminState + * @ETHTOOL_PODL_PSE_ADMIN_STATE_UNKNOWN: state of PoDL PSE functions are + * unknown + * @ETHTOOL_PODL_PSE_ADMIN_STATE_DISABLED: PoDL PSE functions are disabled + * @ETHTOOL_PODL_PSE_ADMIN_STATE_ENABLED: PoDL PSE functions are enabled + */ +enum ethtool_podl_pse_admin_state { + ETHTOOL_PODL_PSE_ADMIN_STATE_UNKNOWN = 1, + ETHTOOL_PODL_PSE_ADMIN_STATE_DISABLED, + ETHTOOL_PODL_PSE_ADMIN_STATE_ENABLED, +}; + +/** + * enum ethtool_podl_pse_pw_d_status - power detection status of the PoDL PSE. + * IEEE 802.3-2018 30.15.1.1.3 aPoDLPSEPowerDetectionStatus: + * @ETHTOOL_PODL_PSE_PW_D_STATUS_UNKNOWN: PoDL PSE + * @ETHTOOL_PODL_PSE_PW_D_STATUS_DISABLED: "The enumeration “disabled” is + * asserted true when the PoDL PSE state diagram variable mr_pse_enable is + * false" + * @ETHTOOL_PODL_PSE_PW_D_STATUS_SEARCHING: "The enumeration “searching” is + * asserted true when either of the PSE state diagram variables + * pi_detecting or pi_classifying is true." + * @ETHTOOL_PODL_PSE_PW_D_STATUS_DELIVERING: "The enumeration “deliveringPower” + * is asserted true when the PoDL PSE state diagram variable pi_powered is + * true." + * @ETHTOOL_PODL_PSE_PW_D_STATUS_SLEEP: "The enumeration “sleep” is asserted + * true when the PoDL PSE state diagram variable pi_sleeping is true." + * @ETHTOOL_PODL_PSE_PW_D_STATUS_IDLE: "The enumeration “idle” is asserted true + * when the logical combination of the PoDL PSE state diagram variables + * pi_prebiased*!pi_sleeping is true." + * @ETHTOOL_PODL_PSE_PW_D_STATUS_ERROR: "The enumeration “error” is asserted + * true when the PoDL PSE state diagram variable overload_held is true." + */ +enum ethtool_podl_pse_pw_d_status { + ETHTOOL_PODL_PSE_PW_D_STATUS_UNKNOWN = 1, + ETHTOOL_PODL_PSE_PW_D_STATUS_DISABLED, + ETHTOOL_PODL_PSE_PW_D_STATUS_SEARCHING, + ETHTOOL_PODL_PSE_PW_D_STATUS_DELIVERING, + ETHTOOL_PODL_PSE_PW_D_STATUS_SLEEP, + ETHTOOL_PODL_PSE_PW_D_STATUS_IDLE, + ETHTOOL_PODL_PSE_PW_D_STATUS_ERROR, +}; + +/** * struct ethtool_gstrings - string set for data tagging * @cmd: Command number = %ETHTOOL_GSTRINGS * @string_set: String set ID; one of &enum ethtool_stringset diff --git a/include/uapi/linux/ethtool_netlink.h b/include/uapi/linux/ethtool_netlink.h index 408a664fad59..bb57084ac524 100644 --- a/include/uapi/linux/ethtool_netlink.h +++ b/include/uapi/linux/ethtool_netlink.h @@ -49,6 +49,8 @@ enum { ETHTOOL_MSG_PHC_VCLOCKS_GET, ETHTOOL_MSG_MODULE_GET, ETHTOOL_MSG_MODULE_SET, + ETHTOOL_MSG_PSE_GET, + ETHTOOL_MSG_PSE_SET, /* add new constants above here */ __ETHTOOL_MSG_USER_CNT, @@ -94,6 +96,7 @@ enum { ETHTOOL_MSG_PHC_VCLOCKS_GET_REPLY, ETHTOOL_MSG_MODULE_GET_REPLY, ETHTOOL_MSG_MODULE_NTF, + ETHTOOL_MSG_PSE_GET_REPLY, /* add new constants above here */ __ETHTOOL_MSG_KERNEL_CNT, @@ -863,6 +866,19 @@ enum { ETHTOOL_A_MODULE_MAX = (__ETHTOOL_A_MODULE_CNT - 1) }; +/* Power Sourcing Equipment */ +enum { + ETHTOOL_A_PSE_UNSPEC, + ETHTOOL_A_PSE_HEADER, /* nest - _A_HEADER_* */ + ETHTOOL_A_PODL_PSE_ADMIN_STATE, /* u32 */ + ETHTOOL_A_PODL_PSE_ADMIN_CONTROL, /* u32 */ + ETHTOOL_A_PODL_PSE_PW_D_STATUS, /* u32 */ + + /* add new constants above here */ + __ETHTOOL_A_PSE_CNT, + ETHTOOL_A_PSE_MAX = (__ETHTOOL_A_PSE_CNT - 1) +}; + /* generic netlink info */ #define ETHTOOL_GENL_NAME "ethtool" #define ETHTOOL_GENL_VERSION 1 diff --git a/include/uapi/linux/if_link.h b/include/uapi/linux/if_link.h index 3d39fb398d65..5e7a1041df3a 100644 --- a/include/uapi/linux/if_link.h +++ b/include/uapi/linux/if_link.h @@ -695,6 +695,7 @@ enum { IFLA_XFRM_UNSPEC, IFLA_XFRM_LINK, IFLA_XFRM_IF_ID, + IFLA_XFRM_COLLECT_METADATA, __IFLA_XFRM_MAX }; diff --git a/include/uapi/linux/landlock.h b/include/uapi/linux/landlock.h index 23df4e0e8ace..9c4bcc37a455 100644 --- a/include/uapi/linux/landlock.h +++ b/include/uapi/linux/landlock.h @@ -26,7 +26,7 @@ struct landlock_ruleset_attr { * Landlock filesystem access rights that are not part of * handled_access_fs are allowed. This is needed for backward * compatibility reasons. One exception is the - * LANDLOCK_ACCESS_FS_REFER access right, which is always implicitly + * %LANDLOCK_ACCESS_FS_REFER access right, which is always implicitly * handled, but must still be explicitly handled to add new rules with * this access right. */ @@ -128,11 +128,11 @@ struct landlock_path_beneath_attr { * hierarchy must also always have the same or a superset of restrictions of * the source hierarchy. If it is not the case, or if the domain doesn't * handle this access right, such actions are denied by default with errno - * set to EXDEV. Linking also requires a LANDLOCK_ACCESS_FS_MAKE_* access - * right on the destination directory, and renaming also requires a - * LANDLOCK_ACCESS_FS_REMOVE_* access right on the source's (file or + * set to ``EXDEV``. Linking also requires a ``LANDLOCK_ACCESS_FS_MAKE_*`` + * access right on the destination directory, and renaming also requires a + * ``LANDLOCK_ACCESS_FS_REMOVE_*`` access right on the source's (file or * directory) parent. Otherwise, such actions are denied with errno set to - * EACCES. The EACCES errno prevails over EXDEV to let user space + * ``EACCES``. The ``EACCES`` errno prevails over ``EXDEV`` to let user space * efficiently deal with an unrecoverable error. * * .. warning:: diff --git a/include/uapi/linux/lwtunnel.h b/include/uapi/linux/lwtunnel.h index 2e206919125c..229655ef792f 100644 --- a/include/uapi/linux/lwtunnel.h +++ b/include/uapi/linux/lwtunnel.h @@ -15,6 +15,7 @@ enum lwtunnel_encap_types { LWTUNNEL_ENCAP_SEG6_LOCAL, LWTUNNEL_ENCAP_RPL, LWTUNNEL_ENCAP_IOAM6, + LWTUNNEL_ENCAP_XFRM, __LWTUNNEL_ENCAP_MAX, }; @@ -111,4 +112,13 @@ enum { #define LWT_BPF_MAX_HEADROOM 256 +enum { + LWT_XFRM_UNSPEC, + LWT_XFRM_IF_ID, + LWT_XFRM_LINK, + __LWT_XFRM_MAX, +}; + +#define LWT_XFRM_MAX (__LWT_XFRM_MAX - 1) + #endif /* _UAPI_LWTUNNEL_H_ */ diff --git a/include/uapi/linux/stat.h b/include/uapi/linux/stat.h index 1500a0f58041..7cab2c65d3d7 100644 --- a/include/uapi/linux/stat.h +++ b/include/uapi/linux/stat.h @@ -124,7 +124,8 @@ struct statx { __u32 stx_dev_minor; /* 0x90 */ __u64 stx_mnt_id; - __u64 __spare2; + __u32 stx_dio_mem_align; /* Memory buffer alignment for direct I/O */ + __u32 stx_dio_offset_align; /* File offset alignment for direct I/O */ /* 0xa0 */ __u64 __spare3[12]; /* Spare space for future expansion */ /* 0x100 */ @@ -152,6 +153,7 @@ struct statx { #define STATX_BASIC_STATS 0x000007ffU /* The stuff in the normal stat struct */ #define STATX_BTIME 0x00000800U /* Want/got stx_btime */ #define STATX_MNT_ID 0x00001000U /* Got stx_mnt_id */ +#define STATX_DIOALIGN 0x00002000U /* Want/got direct I/O alignment info */ #define STATX__RESERVED 0x80000000U /* Reserved for future struct statx expansion */ |