From cef8cdc0d0e7c701fe4dcfba4ed3fd25d28a6020 Mon Sep 17 00:00:00 2001 From: Claudiu Beznea Date: Wed, 26 Oct 2022 15:41:04 +0300 Subject: ARM: at91: pm: avoid soft resetting AC DLL Do not soft reset AC DLL as controller is buggy and this operation my introduce glitches in the controller leading to undefined behavior. Fixes: f0bbf17958e8 ("ARM: at91: pm: add self-refresh support for sama7g5") Depends-on: a02875c4cbd6 ("ARM: at91: pm: fix self-refresh for sama7g5") Signed-off-by: Claudiu Beznea Link: https://lore.kernel.org/r/20221026124114.985876-2-claudiu.beznea@microchip.com --- include/soc/at91/sama7-ddr.h | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/include/soc/at91/sama7-ddr.h b/include/soc/at91/sama7-ddr.h index 6ce3bd22f6c6..5ad7ac2e3a7c 100644 --- a/include/soc/at91/sama7-ddr.h +++ b/include/soc/at91/sama7-ddr.h @@ -26,7 +26,10 @@ #define DDR3PHY_PGSR (0x0C) /* DDR3PHY PHY General Status Register */ #define DDR3PHY_PGSR_IDONE (1 << 0) /* Initialization Done */ -#define DDR3PHY_ACIOCR (0x24) /* DDR3PHY AC I/O Configuration Register */ +#define DDR3PHY_ACDLLCR (0x14) /* DDR3PHY AC DLL Control Register */ +#define DDR3PHY_ACDLLCR_DLLSRST (1 << 30) /* DLL Soft Reset */ + +#define DDR3PHY_ACIOCR (0x24) /* DDR3PHY AC I/O Configuration Register */ #define DDR3PHY_ACIOCR_CSPDD_CS0 (1 << 18) /* CS#[0] Power Down Driver */ #define DDR3PHY_ACIOCR_CKPDD_CK0 (1 << 8) /* CK[0] Power Down Driver */ #define DDR3PHY_ACIORC_ACPDD (1 << 3) /* AC Power Down Driver */ -- cgit v1.2.3 From 003b786b678919e072c2b12ffa73901ef840963e Mon Sep 17 00:00:00 2001 From: Kai Vehmanen Date: Tue, 1 Nov 2022 13:49:13 +0200 Subject: ASoC: SOF: ipc3-topology: use old pipeline teardown flow with SOF2.1 and older MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Originally in commit b2ebcf42a48f ("ASoC: SOF: free widgets in sof_tear_down_pipelines() for static pipelines"), freeing of pipeline components at suspend was only done with recent FW as there were known limitations in older firmware versions. Tests show that if static pipelines are used, i.e. all pipelines are setup whenever firmware is powered up, the reverse action of freeing all components at power down, leads to firmware failures with also SOF2.0 and SOF2.1 based firmware. The problems can be specific to certain topologies with e.g. components not prepared to be freed at suspend (as this did not happen with older SOF kernels). To avoid hitting these problems when kernel is upgraded and used with an older firmware, bump the firmware requirement to SOF2.2 or newer. If an older firmware is used, and pipeline is a static one, do not free the components at suspend. This ensures the suspend flow remains backwards compatible with older firmware versions. This limitation does not apply if the product configuration is updated to dynamic pipelines. The limitation is not linked to firmware ABI, as the interface to free pipeline components has been available already before ABI3.19. The problem is in the implementation, so firmware version should be used to decide whether it is safe to use the newer flow or not. This patch adds a new SOF_FW_VER() macro to compare SOF firmware release versions. Link: https://github.com/thesofproject/sof/issues/6475 Signed-off-by: Kai Vehmanen Reviewed-by: Pierre-Louis Bossart Reviewed-by: Ranjani Sridharan Reviewed-by: Péter Ujfalusi Link: https://lore.kernel.org/r/20221101114913.1292671-1-kai.vehmanen@linux.intel.com Signed-off-by: Mark Brown --- include/sound/sof/info.h | 4 ++++ sound/soc/sof/ipc3-topology.c | 15 ++++++++++----- 2 files changed, 14 insertions(+), 5 deletions(-) (limited to 'include') diff --git a/include/sound/sof/info.h b/include/sound/sof/info.h index 65e86e4e9fd8..75193850ead0 100644 --- a/include/sound/sof/info.h +++ b/include/sound/sof/info.h @@ -36,6 +36,10 @@ enum sof_ipc_ext_data { SOF_IPC_EXT_USER_ABI_INFO = 4, }; +/* Build u32 number in format MMmmmppp */ +#define SOF_FW_VER(MAJOR, MINOR, PATCH) ((uint32_t)( \ + ((MAJOR) << 24) | ((MINOR) << 12) | (PATCH))) + /* FW version - SOF_IPC_GLB_VERSION */ struct sof_ipc_fw_version { struct sof_ipc_hdr hdr; diff --git a/sound/soc/sof/ipc3-topology.c b/sound/soc/sof/ipc3-topology.c index c148715aa0f9..0720e1eae084 100644 --- a/sound/soc/sof/ipc3-topology.c +++ b/sound/soc/sof/ipc3-topology.c @@ -2275,6 +2275,7 @@ static int sof_ipc3_tear_down_all_pipelines(struct snd_sof_dev *sdev, bool verif struct sof_ipc_fw_version *v = &sdev->fw_ready.version; struct snd_sof_widget *swidget; struct snd_sof_route *sroute; + bool dyn_widgets = false; int ret; /* @@ -2284,12 +2285,14 @@ static int sof_ipc3_tear_down_all_pipelines(struct snd_sof_dev *sdev, bool verif * topology loading the sound card unavailable to open PCMs. */ list_for_each_entry(swidget, &sdev->widget_list, list) { - if (swidget->dynamic_pipeline_widget) + if (swidget->dynamic_pipeline_widget) { + dyn_widgets = true; continue; + } - /* Do not free widgets for static pipelines with FW ABI older than 3.19 */ + /* Do not free widgets for static pipelines with FW older than SOF2.2 */ if (!verify && !swidget->dynamic_pipeline_widget && - v->abi_version < SOF_ABI_VER(3, 19, 0)) { + SOF_FW_VER(v->major, v->minor, v->micro) < SOF_FW_VER(2, 2, 0)) { swidget->use_count = 0; swidget->complete = 0; continue; @@ -2303,9 +2306,11 @@ static int sof_ipc3_tear_down_all_pipelines(struct snd_sof_dev *sdev, bool verif /* * Tear down all pipelines associated with PCMs that did not get suspended * and unset the prepare flag so that they can be set up again during resume. - * Skip this step for older firmware. + * Skip this step for older firmware unless topology has any + * dynamic pipeline (in which case the step is mandatory). */ - if (!verify && v->abi_version >= SOF_ABI_VER(3, 19, 0)) { + if (!verify && (dyn_widgets || SOF_FW_VER(v->major, v->minor, v->micro) >= + SOF_FW_VER(2, 2, 0))) { ret = sof_tear_down_left_over_pipelines(sdev); if (ret < 0) { dev_err(sdev->dev, "failed to tear down paused pipelines\n"); -- cgit v1.2.3 From 18acb7fac22ff7b36c7ea5a76b12996e7b7dbaba Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Thu, 3 Nov 2022 13:00:13 +0100 Subject: bpf: Revert ("Fix dispatcher patchable function entry to 5 bytes nop") MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Because __attribute__((patchable_function_entry)) is only available since GCC-8 this solution fails to build on the minimum required GCC version. Undo these changes so we might try again -- without cluttering up the patches with too many changes. This is an almost complete revert of: dbe69b299884 ("bpf: Fix dispatcher patchable function entry to 5 bytes nop") ceea991a019c ("bpf: Move bpf_dispatcher function out of ftrace locations") (notably the arch/x86/Kconfig hunk is kept). Reported-by: David Laight Signed-off-by: Peter Zijlstra (Intel) Signed-off-by: Daniel Borkmann Tested-by: Björn Töpel Tested-by: Jiri Olsa Acked-by: Björn Töpel Acked-by: Jiri Olsa Link: https://lkml.kernel.org/r/439d8dc735bb4858875377df67f1b29a@AcuMS.aculab.com Link: https://lore.kernel.org/bpf/20221103120647.728830733@infradead.org --- arch/x86/net/bpf_jit_comp.c | 13 ------------- include/linux/bpf.h | 21 +-------------------- kernel/bpf/dispatcher.c | 6 ------ 3 files changed, 1 insertion(+), 39 deletions(-) (limited to 'include') diff --git a/arch/x86/net/bpf_jit_comp.c b/arch/x86/net/bpf_jit_comp.c index 00127abd89ee..99620428ad78 100644 --- a/arch/x86/net/bpf_jit_comp.c +++ b/arch/x86/net/bpf_jit_comp.c @@ -11,7 +11,6 @@ #include #include #include -#include #include #include #include @@ -389,18 +388,6 @@ out: return ret; } -int __init bpf_arch_init_dispatcher_early(void *ip) -{ - const u8 *nop_insn = x86_nops[5]; - - if (is_endbr(*(u32 *)ip)) - ip += ENDBR_INSN_SIZE; - - if (memcmp(ip, nop_insn, X86_PATCH_SIZE)) - text_poke_early(ip, nop_insn, X86_PATCH_SIZE); - return 0; -} - int bpf_arch_text_poke(void *ip, enum bpf_text_poke_type t, void *old_addr, void *new_addr) { diff --git a/include/linux/bpf.h b/include/linux/bpf.h index 0566705c1d4e..5cd95716b441 100644 --- a/include/linux/bpf.h +++ b/include/linux/bpf.h @@ -27,7 +27,6 @@ #include #include #include -#include struct bpf_verifier_env; struct bpf_verifier_log; @@ -971,8 +970,6 @@ struct bpf_trampoline *bpf_trampoline_get(u64 key, struct bpf_attach_target_info *tgt_info); void bpf_trampoline_put(struct bpf_trampoline *tr); int arch_prepare_bpf_dispatcher(void *image, void *buf, s64 *funcs, int num_funcs); -int __init bpf_arch_init_dispatcher_early(void *ip); - #define BPF_DISPATCHER_INIT(_name) { \ .mutex = __MUTEX_INITIALIZER(_name.mutex), \ .func = &_name##_func, \ @@ -986,21 +983,7 @@ int __init bpf_arch_init_dispatcher_early(void *ip); }, \ } -#define BPF_DISPATCHER_INIT_CALL(_name) \ - static int __init _name##_init(void) \ - { \ - return bpf_arch_init_dispatcher_early(_name##_func); \ - } \ - early_initcall(_name##_init) - -#ifdef CONFIG_X86_64 -#define BPF_DISPATCHER_ATTRIBUTES __attribute__((patchable_function_entry(5))) -#else -#define BPF_DISPATCHER_ATTRIBUTES -#endif - #define DEFINE_BPF_DISPATCHER(name) \ - notrace BPF_DISPATCHER_ATTRIBUTES \ noinline __nocfi unsigned int bpf_dispatcher_##name##_func( \ const void *ctx, \ const struct bpf_insn *insnsi, \ @@ -1010,9 +993,7 @@ int __init bpf_arch_init_dispatcher_early(void *ip); } \ EXPORT_SYMBOL(bpf_dispatcher_##name##_func); \ struct bpf_dispatcher bpf_dispatcher_##name = \ - BPF_DISPATCHER_INIT(bpf_dispatcher_##name); \ - BPF_DISPATCHER_INIT_CALL(bpf_dispatcher_##name); - + BPF_DISPATCHER_INIT(bpf_dispatcher_##name); #define DECLARE_BPF_DISPATCHER(name) \ unsigned int bpf_dispatcher_##name##_func( \ const void *ctx, \ diff --git a/kernel/bpf/dispatcher.c b/kernel/bpf/dispatcher.c index 04f0a045dcaa..fa64b80b8bca 100644 --- a/kernel/bpf/dispatcher.c +++ b/kernel/bpf/dispatcher.c @@ -4,7 +4,6 @@ #include #include #include -#include /* The BPF dispatcher is a multiway branch code generator. The * dispatcher is a mechanism to avoid the performance penalty of an @@ -91,11 +90,6 @@ int __weak arch_prepare_bpf_dispatcher(void *image, void *buf, s64 *funcs, int n return -ENOTSUPP; } -int __weak __init bpf_arch_init_dispatcher_early(void *ip) -{ - return -ENOTSUPP; -} - static int bpf_dispatcher_prepare(struct bpf_dispatcher *d, void *image, void *buf) { s64 ips[BPF_DISPATCHER_MAX] = {}, *ipsp = &ips[0]; -- cgit v1.2.3 From c86df29d11dfba27c0a1f5039cd6fe387fbf4239 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Thu, 3 Nov 2022 13:00:14 +0100 Subject: bpf: Convert BPF_DISPATCHER to use static_call() (not ftrace) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The dispatcher function is currently abusing the ftrace __fentry__ call location for its own purposes -- this obviously gives trouble when the dispatcher and ftrace are both in use. A previous solution tried using __attribute__((patchable_function_entry())) which works, except it is GCC-8+ only, breaking the build on the earlier still supported compilers. Instead use static_call() -- which has its own annotations and does not conflict with ftrace -- to rewrite the dispatch function. By using: return static_call()(ctx, insni, bpf_func) you get a perfect forwarding tail call as function body (iow a single jmp instruction). By having the default static_call() target be bpf_dispatcher_nop_func() it retains the default behaviour (an indirect call to the argument function). Only once a dispatcher program is attached is the target rewritten to directly call the JIT'ed image. Signed-off-by: Peter Zijlstra (Intel) Signed-off-by: Daniel Borkmann Tested-by: Björn Töpel Tested-by: Jiri Olsa Acked-by: Björn Töpel Acked-by: Jiri Olsa Link: https://lkml.kernel.org/r/Y1/oBlK0yFk5c/Im@hirez.programming.kicks-ass.net Link: https://lore.kernel.org/bpf/20221103120647.796772565@infradead.org --- include/linux/bpf.h | 39 ++++++++++++++++++++++++++++++++++++++- kernel/bpf/dispatcher.c | 22 ++++++++-------------- 2 files changed, 46 insertions(+), 15 deletions(-) (limited to 'include') diff --git a/include/linux/bpf.h b/include/linux/bpf.h index 5cd95716b441..74c6f449d81e 100644 --- a/include/linux/bpf.h +++ b/include/linux/bpf.h @@ -27,6 +27,7 @@ #include #include #include +#include struct bpf_verifier_env; struct bpf_verifier_log; @@ -953,6 +954,10 @@ struct bpf_dispatcher { void *rw_image; u32 image_off; struct bpf_ksym ksym; +#ifdef CONFIG_HAVE_STATIC_CALL + struct static_call_key *sc_key; + void *sc_tramp; +#endif }; static __always_inline __nocfi unsigned int bpf_dispatcher_nop_func( @@ -970,6 +975,34 @@ struct bpf_trampoline *bpf_trampoline_get(u64 key, struct bpf_attach_target_info *tgt_info); void bpf_trampoline_put(struct bpf_trampoline *tr); int arch_prepare_bpf_dispatcher(void *image, void *buf, s64 *funcs, int num_funcs); + +/* + * When the architecture supports STATIC_CALL replace the bpf_dispatcher_fn + * indirection with a direct call to the bpf program. If the architecture does + * not have STATIC_CALL, avoid a double-indirection. + */ +#ifdef CONFIG_HAVE_STATIC_CALL + +#define __BPF_DISPATCHER_SC_INIT(_name) \ + .sc_key = &STATIC_CALL_KEY(_name), \ + .sc_tramp = STATIC_CALL_TRAMP_ADDR(_name), + +#define __BPF_DISPATCHER_SC(name) \ + DEFINE_STATIC_CALL(bpf_dispatcher_##name##_call, bpf_dispatcher_nop_func) + +#define __BPF_DISPATCHER_CALL(name) \ + static_call(bpf_dispatcher_##name##_call)(ctx, insnsi, bpf_func) + +#define __BPF_DISPATCHER_UPDATE(_d, _new) \ + __static_call_update((_d)->sc_key, (_d)->sc_tramp, (_new)) + +#else +#define __BPF_DISPATCHER_SC_INIT(name) +#define __BPF_DISPATCHER_SC(name) +#define __BPF_DISPATCHER_CALL(name) bpf_func(ctx, insnsi) +#define __BPF_DISPATCHER_UPDATE(_d, _new) +#endif + #define BPF_DISPATCHER_INIT(_name) { \ .mutex = __MUTEX_INITIALIZER(_name.mutex), \ .func = &_name##_func, \ @@ -981,25 +1014,29 @@ int arch_prepare_bpf_dispatcher(void *image, void *buf, s64 *funcs, int num_func .name = #_name, \ .lnode = LIST_HEAD_INIT(_name.ksym.lnode), \ }, \ + __BPF_DISPATCHER_SC_INIT(_name##_call) \ } #define DEFINE_BPF_DISPATCHER(name) \ + __BPF_DISPATCHER_SC(name); \ noinline __nocfi unsigned int bpf_dispatcher_##name##_func( \ const void *ctx, \ const struct bpf_insn *insnsi, \ bpf_func_t bpf_func) \ { \ - return bpf_func(ctx, insnsi); \ + return __BPF_DISPATCHER_CALL(name); \ } \ EXPORT_SYMBOL(bpf_dispatcher_##name##_func); \ struct bpf_dispatcher bpf_dispatcher_##name = \ BPF_DISPATCHER_INIT(bpf_dispatcher_##name); + #define DECLARE_BPF_DISPATCHER(name) \ unsigned int bpf_dispatcher_##name##_func( \ const void *ctx, \ const struct bpf_insn *insnsi, \ bpf_func_t bpf_func); \ extern struct bpf_dispatcher bpf_dispatcher_##name; + #define BPF_DISPATCHER_FUNC(name) bpf_dispatcher_##name##_func #define BPF_DISPATCHER_PTR(name) (&bpf_dispatcher_##name) void bpf_dispatcher_change_prog(struct bpf_dispatcher *d, struct bpf_prog *from, diff --git a/kernel/bpf/dispatcher.c b/kernel/bpf/dispatcher.c index fa64b80b8bca..7dfb8d0d5202 100644 --- a/kernel/bpf/dispatcher.c +++ b/kernel/bpf/dispatcher.c @@ -4,6 +4,7 @@ #include #include #include +#include /* The BPF dispatcher is a multiway branch code generator. The * dispatcher is a mechanism to avoid the performance penalty of an @@ -104,17 +105,11 @@ static int bpf_dispatcher_prepare(struct bpf_dispatcher *d, void *image, void *b static void bpf_dispatcher_update(struct bpf_dispatcher *d, int prev_num_progs) { - void *old, *new, *tmp; - u32 noff; - int err; - - if (!prev_num_progs) { - old = NULL; - noff = 0; - } else { - old = d->image + d->image_off; + void *new, *tmp; + u32 noff = 0; + + if (prev_num_progs) noff = d->image_off ^ (PAGE_SIZE / 2); - } new = d->num_progs ? d->image + noff : NULL; tmp = d->num_progs ? d->rw_image + noff : NULL; @@ -128,11 +123,10 @@ static void bpf_dispatcher_update(struct bpf_dispatcher *d, int prev_num_progs) return; } - err = bpf_arch_text_poke(d->func, BPF_MOD_JUMP, old, new); - if (err || !new) - return; + __BPF_DISPATCHER_UPDATE(d, new ?: &bpf_dispatcher_nop_func); - d->image_off = noff; + if (new) + d->image_off = noff; } void bpf_dispatcher_change_prog(struct bpf_dispatcher *d, struct bpf_prog *from, -- cgit v1.2.3 From 5cd189e410debedda416fecfc12f4716b5829845 Mon Sep 17 00:00:00 2001 From: Anthony DeRossi Date: Wed, 9 Nov 2022 17:40:26 -0800 Subject: vfio: Export the device set open count The open count of a device set is the sum of the open counts of all devices in the set. Drivers can use this value to determine whether shared resources are in use without tracking them manually or accessing the private open_count in vfio_device. Signed-off-by: Anthony DeRossi Reviewed-by: Jason Gunthorpe Reviewed-by: Kevin Tian Reviewed-by: Yi Liu Link: https://lore.kernel.org/r/20221110014027.28780-3-ajderossi@gmail.com Signed-off-by: Alex Williamson --- drivers/vfio/vfio_main.c | 13 +++++++++++++ include/linux/vfio.h | 1 + 2 files changed, 14 insertions(+) (limited to 'include') diff --git a/drivers/vfio/vfio_main.c b/drivers/vfio/vfio_main.c index 9a4af880e941..6e8804fe0095 100644 --- a/drivers/vfio/vfio_main.c +++ b/drivers/vfio/vfio_main.c @@ -125,6 +125,19 @@ static void vfio_release_device_set(struct vfio_device *device) xa_unlock(&vfio_device_set_xa); } +unsigned int vfio_device_set_open_count(struct vfio_device_set *dev_set) +{ + struct vfio_device *cur; + unsigned int open_count = 0; + + lockdep_assert_held(&dev_set->lock); + + list_for_each_entry(cur, &dev_set->device_list, dev_set_list) + open_count += cur->open_count; + return open_count; +} +EXPORT_SYMBOL_GPL(vfio_device_set_open_count); + /* * Group objects - create, release, get, put, search */ diff --git a/include/linux/vfio.h b/include/linux/vfio.h index e7cebeb875dd..fdd393f70b19 100644 --- a/include/linux/vfio.h +++ b/include/linux/vfio.h @@ -189,6 +189,7 @@ int vfio_register_emulated_iommu_dev(struct vfio_device *device); void vfio_unregister_group_dev(struct vfio_device *device); int vfio_assign_device_set(struct vfio_device *device, void *set_id); +unsigned int vfio_device_set_open_count(struct vfio_device_set *dev_set); int vfio_mig_get_next_state(struct vfio_device *device, enum vfio_device_mig_state cur_fsm, -- cgit v1.2.3 From 1f6e04a1c7b85da3b765ca9f46029e5d1826d839 Mon Sep 17 00:00:00 2001 From: Xu Kuohai Date: Fri, 11 Nov 2022 07:56:20 -0500 Subject: bpf: Fix offset calculation error in __copy_map_value and zero_map_value Function __copy_map_value and zero_map_value miscalculated copy offset, resulting in possible copy of unwanted data to user or kernel. Fix it. Fixes: cc48755808c6 ("bpf: Add zero_map_value to zero map value with special fields") Fixes: 4d7d7f69f4b1 ("bpf: Adapt copy_map_value for multiple offset case") Signed-off-by: Xu Kuohai Signed-off-by: Andrii Nakryiko Acked-by: Kumar Kartikeya Dwivedi Link: https://lore.kernel.org/bpf/20221111125620.754855-1-xukuohai@huaweicloud.com --- include/linux/bpf.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/include/linux/bpf.h b/include/linux/bpf.h index 74c6f449d81e..c1bd1bd10506 100644 --- a/include/linux/bpf.h +++ b/include/linux/bpf.h @@ -315,7 +315,7 @@ static inline void __copy_map_value(struct bpf_map *map, void *dst, void *src, b u32 next_off = map->off_arr->field_off[i]; memcpy(dst + curr_off, src + curr_off, next_off - curr_off); - curr_off += map->off_arr->field_sz[i]; + curr_off = next_off + map->off_arr->field_sz[i]; } memcpy(dst + curr_off, src + curr_off, map->value_size - curr_off); } @@ -344,7 +344,7 @@ static inline void zero_map_value(struct bpf_map *map, void *dst) u32 next_off = map->off_arr->field_off[i]; memset(dst + curr_off, 0, next_off - curr_off); - curr_off += map->off_arr->field_sz[i]; + curr_off = next_off + map->off_arr->field_sz[i]; } memset(dst + curr_off, 0, map->value_size - curr_off); } -- cgit v1.2.3 From b68777d54fac21fc833ec26ea1a2a84f975ab035 Mon Sep 17 00:00:00 2001 From: Jakub Sitnicki Date: Mon, 14 Nov 2022 20:16:19 +0100 Subject: l2tp: Serialize access to sk_user_data with sk_callback_lock sk->sk_user_data has multiple users, which are not compatible with each other. Writers must synchronize by grabbing the sk->sk_callback_lock. l2tp currently fails to grab the lock when modifying the underlying tunnel socket fields. Fix it by adding appropriate locking. We err on the side of safety and grab the sk_callback_lock also inside the sk_destruct callback overridden by l2tp, even though there should be no refs allowing access to the sock at the time when sk_destruct gets called. v4: - serialize write to sk_user_data in l2tp sk_destruct v3: - switch from sock lock to sk_callback_lock - document write-protection for sk_user_data v2: - update Fixes to point to origin of the bug - use real names in Reported/Tested-by tags Cc: Tom Parkin Fixes: 3557baabf280 ("[L2TP]: PPP over L2TP driver core") Reported-by: Haowei Yan Signed-off-by: Jakub Sitnicki Signed-off-by: David S. Miller --- include/net/sock.h | 2 +- net/l2tp/l2tp_core.c | 19 +++++++++++++------ 2 files changed, 14 insertions(+), 7 deletions(-) (limited to 'include') diff --git a/include/net/sock.h b/include/net/sock.h index 5db02546941c..e0517ecc6531 100644 --- a/include/net/sock.h +++ b/include/net/sock.h @@ -323,7 +323,7 @@ struct sk_filter; * @sk_tskey: counter to disambiguate concurrent tstamp requests * @sk_zckey: counter to order MSG_ZEROCOPY notifications * @sk_socket: Identd and reporting IO signals - * @sk_user_data: RPC layer private data + * @sk_user_data: RPC layer private data. Write-protected by @sk_callback_lock. * @sk_frag: cached page frag * @sk_peek_off: current peek_offset value * @sk_send_head: front of stuff to transmit diff --git a/net/l2tp/l2tp_core.c b/net/l2tp/l2tp_core.c index 7499c51b1850..754fdda8a5f5 100644 --- a/net/l2tp/l2tp_core.c +++ b/net/l2tp/l2tp_core.c @@ -1150,8 +1150,10 @@ static void l2tp_tunnel_destruct(struct sock *sk) } /* Remove hooks into tunnel socket */ + write_lock_bh(&sk->sk_callback_lock); sk->sk_destruct = tunnel->old_sk_destruct; sk->sk_user_data = NULL; + write_unlock_bh(&sk->sk_callback_lock); /* Call the original destructor */ if (sk->sk_destruct) @@ -1469,16 +1471,18 @@ int l2tp_tunnel_register(struct l2tp_tunnel *tunnel, struct net *net, sock = sockfd_lookup(tunnel->fd, &ret); if (!sock) goto err; - - ret = l2tp_validate_socket(sock->sk, net, tunnel->encap); - if (ret < 0) - goto err_sock; } + sk = sock->sk; + write_lock(&sk->sk_callback_lock); + + ret = l2tp_validate_socket(sk, net, tunnel->encap); + if (ret < 0) + goto err_sock; + tunnel->l2tp_net = net; pn = l2tp_pernet(net); - sk = sock->sk; sock_hold(sk); tunnel->sock = sk; @@ -1504,7 +1508,7 @@ int l2tp_tunnel_register(struct l2tp_tunnel *tunnel, struct net *net, setup_udp_tunnel_sock(net, sock, &udp_cfg); } else { - sk->sk_user_data = tunnel; + rcu_assign_sk_user_data(sk, tunnel); } tunnel->old_sk_destruct = sk->sk_destruct; @@ -1518,6 +1522,7 @@ int l2tp_tunnel_register(struct l2tp_tunnel *tunnel, struct net *net, if (tunnel->fd >= 0) sockfd_put(sock); + write_unlock(&sk->sk_callback_lock); return 0; err_sock: @@ -1525,6 +1530,8 @@ err_sock: sock_release(sock); else sockfd_put(sock); + + write_unlock(&sk->sk_callback_lock); err: return ret; } -- cgit v1.2.3 From c964d62f5cab7b43dd0534f22a96eab386c6ec5d Mon Sep 17 00:00:00 2001 From: Keith Busch Date: Thu, 10 Nov 2022 10:44:57 -0800 Subject: block: make dma_alignment a stacking queue_limit Device mappers had always been getting the default 511 dma mask, but the underlying device might have a larger alignment requirement. Since this value is used to determine alloweable direct-io alignment, this needs to be a stackable limit. Signed-off-by: Keith Busch Reviewed-by: Christoph Hellwig Link: https://lore.kernel.org/r/20221110184501.2451620-2-kbusch@meta.com Signed-off-by: Jens Axboe --- block/blk-core.c | 1 - block/blk-settings.c | 8 +++++--- include/linux/blkdev.h | 15 ++++++++------- 3 files changed, 13 insertions(+), 11 deletions(-) (limited to 'include') diff --git a/block/blk-core.c b/block/blk-core.c index 17667159482e..5487912befe8 100644 --- a/block/blk-core.c +++ b/block/blk-core.c @@ -425,7 +425,6 @@ struct request_queue *blk_alloc_queue(int node_id, bool alloc_srcu) PERCPU_REF_INIT_ATOMIC, GFP_KERNEL)) goto fail_stats; - blk_queue_dma_alignment(q, 511); blk_set_default_limits(&q->limits); q->nr_requests = BLKDEV_DEFAULT_RQ; diff --git a/block/blk-settings.c b/block/blk-settings.c index 8bb9eef5310e..4949ed3ce7c9 100644 --- a/block/blk-settings.c +++ b/block/blk-settings.c @@ -57,6 +57,7 @@ void blk_set_default_limits(struct queue_limits *lim) lim->misaligned = 0; lim->zoned = BLK_ZONED_NONE; lim->zone_write_granularity = 0; + lim->dma_alignment = 511; } EXPORT_SYMBOL(blk_set_default_limits); @@ -600,6 +601,7 @@ int blk_stack_limits(struct queue_limits *t, struct queue_limits *b, t->io_min = max(t->io_min, b->io_min); t->io_opt = lcm_not_zero(t->io_opt, b->io_opt); + t->dma_alignment = max(t->dma_alignment, b->dma_alignment); /* Set non-power-of-2 compatible chunk_sectors boundary */ if (b->chunk_sectors) @@ -773,7 +775,7 @@ EXPORT_SYMBOL(blk_queue_virt_boundary); **/ void blk_queue_dma_alignment(struct request_queue *q, int mask) { - q->dma_alignment = mask; + q->limits.dma_alignment = mask; } EXPORT_SYMBOL(blk_queue_dma_alignment); @@ -795,8 +797,8 @@ void blk_queue_update_dma_alignment(struct request_queue *q, int mask) { BUG_ON(mask > PAGE_SIZE); - if (mask > q->dma_alignment) - q->dma_alignment = mask; + if (mask > q->limits.dma_alignment) + q->limits.dma_alignment = mask; } EXPORT_SYMBOL(blk_queue_update_dma_alignment); diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index 50e358a19d98..9898e34b2c2d 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -311,6 +311,13 @@ struct queue_limits { unsigned char discard_misaligned; unsigned char raid_partial_stripes_expensive; enum blk_zoned_model zoned; + + /* + * Drivers that set dma_alignment to less than 511 must be prepared to + * handle individual bvec's that are not a multiple of a SECTOR_SIZE + * due to possible offsets. + */ + unsigned int dma_alignment; }; typedef int (*report_zones_cb)(struct blk_zone *zone, unsigned int idx, @@ -456,12 +463,6 @@ struct request_queue { unsigned long nr_requests; /* Max # of requests */ unsigned int dma_pad_mask; - /* - * Drivers that set dma_alignment to less than 511 must be prepared to - * handle individual bvec's that are not a multiple of a SECTOR_SIZE - * due to possible offsets. - */ - unsigned int dma_alignment; #ifdef CONFIG_BLK_INLINE_ENCRYPTION struct blk_crypto_profile *crypto_profile; @@ -1324,7 +1325,7 @@ static inline sector_t bdev_zone_sectors(struct block_device *bdev) static inline int queue_dma_alignment(const struct request_queue *q) { - return q ? q->dma_alignment : 511; + return q ? q->limits.dma_alignment : 511; } static inline unsigned int bdev_dma_alignment(struct block_device *bdev) -- cgit v1.2.3 From b3228254bb6e91e57f920227f72a1a7d81925d81 Mon Sep 17 00:00:00 2001 From: Keith Busch Date: Thu, 10 Nov 2022 10:44:59 -0800 Subject: block: make blk_set_default_limits() private There are no external users of this function. Signed-off-by: Keith Busch Reviewed-by: Christoph Hellwig Link: https://lore.kernel.org/r/20221110184501.2451620-4-kbusch@meta.com Signed-off-by: Jens Axboe --- block/blk-settings.c | 1 - block/blk.h | 1 + include/linux/blkdev.h | 1 - 3 files changed, 1 insertion(+), 2 deletions(-) (limited to 'include') diff --git a/block/blk-settings.c b/block/blk-settings.c index 4949ed3ce7c9..8ac1038d0c79 100644 --- a/block/blk-settings.c +++ b/block/blk-settings.c @@ -59,7 +59,6 @@ void blk_set_default_limits(struct queue_limits *lim) lim->zone_write_granularity = 0; lim->dma_alignment = 511; } -EXPORT_SYMBOL(blk_set_default_limits); /** * blk_set_stacking_limits - set default limits for stacking devices diff --git a/block/blk.h b/block/blk.h index 5350bf363035..a0c1d31a8fe7 100644 --- a/block/blk.h +++ b/block/blk.h @@ -324,6 +324,7 @@ void blk_rq_set_mixed_merge(struct request *rq); bool blk_rq_merge_ok(struct request *rq, struct bio *bio); enum elv_merge blk_try_merge(struct request *rq, struct bio *bio); +void blk_set_default_limits(struct queue_limits *lim); int blk_dev_init(void); /* diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index 9898e34b2c2d..891f8cbcd043 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -945,7 +945,6 @@ extern void blk_queue_io_min(struct request_queue *q, unsigned int min); extern void blk_limits_io_opt(struct queue_limits *limits, unsigned int opt); extern void blk_queue_io_opt(struct request_queue *q, unsigned int opt); extern void blk_set_queue_depth(struct request_queue *q, unsigned int depth); -extern void blk_set_default_limits(struct queue_limits *lim); extern void blk_set_stacking_limits(struct queue_limits *lim); extern int blk_stack_limits(struct queue_limits *t, struct queue_limits *b, sector_t offset); -- cgit v1.2.3 From 58e0be1ef6118c5352b56a4d06e974c5599993a5 Mon Sep 17 00:00:00 2001 From: Hangbin Liu Date: Tue, 15 Nov 2022 22:24:00 +0800 Subject: net: use struct_group to copy ip/ipv6 header addresses MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit kernel test robot reported warnings when build bonding module with make W=1 O=build_dir ARCH=x86_64 SHELL=/bin/bash drivers/net/bonding/: from ../drivers/net/bonding/bond_main.c:35: In function ‘fortify_memcpy_chk’, inlined from ‘iph_to_flow_copy_v4addrs’ at ../include/net/ip.h:566:2, inlined from ‘bond_flow_ip’ at ../drivers/net/bonding/bond_main.c:3984:3: ../include/linux/fortify-string.h:413:25: warning: call to ‘__read_overflow2_field’ declared with attribute warning: detected read beyond size of f ield (2nd parameter); maybe use struct_group()? [-Wattribute-warning] 413 | __read_overflow2_field(q_size_field, size); | ^~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ In function ‘fortify_memcpy_chk’, inlined from ‘iph_to_flow_copy_v6addrs’ at ../include/net/ipv6.h:900:2, inlined from ‘bond_flow_ip’ at ../drivers/net/bonding/bond_main.c:3994:3: ../include/linux/fortify-string.h:413:25: warning: call to ‘__read_overflow2_field’ declared with attribute warning: detected read beyond size of f ield (2nd parameter); maybe use struct_group()? [-Wattribute-warning] 413 | __read_overflow2_field(q_size_field, size); | ^~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ This is because we try to copy the whole ip/ip6 address to the flow_key, while we only point the to ip/ip6 saddr. Note that since these are UAPI headers, __struct_group() is used to avoid the compiler warnings. Reported-by: kernel test robot Fixes: c3f8324188fa ("net: Add full IPv6 addresses to flow_keys") Signed-off-by: Hangbin Liu Link: https://lore.kernel.org/r/20221115142400.1204786-1-liuhangbin@gmail.com Signed-off-by: Paolo Abeni --- include/net/ip.h | 2 +- include/net/ipv6.h | 2 +- include/uapi/linux/ip.h | 6 ++++-- include/uapi/linux/ipv6.h | 6 ++++-- 4 files changed, 10 insertions(+), 6 deletions(-) (limited to 'include') diff --git a/include/net/ip.h b/include/net/ip.h index 038097c2a152..144bdfbb25af 100644 --- a/include/net/ip.h +++ b/include/net/ip.h @@ -563,7 +563,7 @@ static inline void iph_to_flow_copy_v4addrs(struct flow_keys *flow, BUILD_BUG_ON(offsetof(typeof(flow->addrs), v4addrs.dst) != offsetof(typeof(flow->addrs), v4addrs.src) + sizeof(flow->addrs.v4addrs.src)); - memcpy(&flow->addrs.v4addrs, &iph->saddr, sizeof(flow->addrs.v4addrs)); + memcpy(&flow->addrs.v4addrs, &iph->addrs, sizeof(flow->addrs.v4addrs)); flow->control.addr_type = FLOW_DISSECTOR_KEY_IPV4_ADDRS; } diff --git a/include/net/ipv6.h b/include/net/ipv6.h index 37943ba3a73c..d383c895592a 100644 --- a/include/net/ipv6.h +++ b/include/net/ipv6.h @@ -897,7 +897,7 @@ static inline void iph_to_flow_copy_v6addrs(struct flow_keys *flow, BUILD_BUG_ON(offsetof(typeof(flow->addrs), v6addrs.dst) != offsetof(typeof(flow->addrs), v6addrs.src) + sizeof(flow->addrs.v6addrs.src)); - memcpy(&flow->addrs.v6addrs, &iph->saddr, sizeof(flow->addrs.v6addrs)); + memcpy(&flow->addrs.v6addrs, &iph->addrs, sizeof(flow->addrs.v6addrs)); flow->control.addr_type = FLOW_DISSECTOR_KEY_IPV6_ADDRS; } diff --git a/include/uapi/linux/ip.h b/include/uapi/linux/ip.h index 961ec16a26b8..874a92349bf5 100644 --- a/include/uapi/linux/ip.h +++ b/include/uapi/linux/ip.h @@ -100,8 +100,10 @@ struct iphdr { __u8 ttl; __u8 protocol; __sum16 check; - __be32 saddr; - __be32 daddr; + __struct_group(/* no tag */, addrs, /* no attrs */, + __be32 saddr; + __be32 daddr; + ); /*The options start here. */ }; diff --git a/include/uapi/linux/ipv6.h b/include/uapi/linux/ipv6.h index 03cdbe798fe3..81f4243bebb1 100644 --- a/include/uapi/linux/ipv6.h +++ b/include/uapi/linux/ipv6.h @@ -130,8 +130,10 @@ struct ipv6hdr { __u8 nexthdr; __u8 hop_limit; - struct in6_addr saddr; - struct in6_addr daddr; + __struct_group(/* no tag */, addrs, /* no attrs */, + struct in6_addr saddr; + struct in6_addr daddr; + ); }; -- cgit v1.2.3 From 91482864768a874c4290ef93b84a78f4f1dac51b Mon Sep 17 00:00:00 2001 From: Pavel Begunkov Date: Thu, 17 Nov 2022 18:40:16 +0000 Subject: io_uring: fix multishot accept request leaks Having REQ_F_POLLED set doesn't guarantee that the request is executed as a multishot from the polling path. Fortunately for us, if the code thinks it's multishot issue when it's not, it can only ask to skip completion so leaking the request. Use issue_flags to mark multipoll issues. Cc: stable@vger.kernel.org Fixes: 390ed29b5e425 ("io_uring: add IORING_ACCEPT_MULTISHOT for accept") Signed-off-by: Pavel Begunkov Link: https://lore.kernel.org/r/7700ac57653f2823e30b34dc74da68678c0c5f13.1668710222.git.asml.silence@gmail.com Signed-off-by: Jens Axboe --- include/linux/io_uring.h | 3 +++ io_uring/io_uring.c | 2 +- io_uring/io_uring.h | 4 ++-- io_uring/net.c | 7 ++----- 4 files changed, 8 insertions(+), 8 deletions(-) (limited to 'include') diff --git a/include/linux/io_uring.h b/include/linux/io_uring.h index 43bc8a2edccf..0ded9e271523 100644 --- a/include/linux/io_uring.h +++ b/include/linux/io_uring.h @@ -16,6 +16,9 @@ enum io_uring_cmd_flags { IO_URING_F_SQE128 = 4, IO_URING_F_CQE32 = 8, IO_URING_F_IOPOLL = 16, + + /* the request is executed from poll, it should not be freed */ + IO_URING_F_MULTISHOT = 32, }; struct io_uring_cmd { diff --git a/io_uring/io_uring.c b/io_uring/io_uring.c index 4a1e482747cc..8840cf3e20f2 100644 --- a/io_uring/io_uring.c +++ b/io_uring/io_uring.c @@ -1768,7 +1768,7 @@ int io_poll_issue(struct io_kiocb *req, bool *locked) io_tw_lock(req->ctx, locked); if (unlikely(req->task->flags & PF_EXITING)) return -EFAULT; - return io_issue_sqe(req, IO_URING_F_NONBLOCK); + return io_issue_sqe(req, IO_URING_F_NONBLOCK|IO_URING_F_MULTISHOT); } struct io_wq_work *io_wq_free_work(struct io_wq_work *work) diff --git a/io_uring/io_uring.h b/io_uring/io_uring.h index e99a79f2df9b..cef5ff924e63 100644 --- a/io_uring/io_uring.h +++ b/io_uring/io_uring.h @@ -17,8 +17,8 @@ enum { IOU_ISSUE_SKIP_COMPLETE = -EIOCBQUEUED, /* - * Intended only when both REQ_F_POLLED and REQ_F_APOLL_MULTISHOT - * are set to indicate to the poll runner that multishot should be + * Intended only when both IO_URING_F_MULTISHOT is passed + * to indicate to the poll runner that multishot should be * removed and the result is set on req->cqe.res. */ IOU_STOP_MULTISHOT = -ECANCELED, diff --git a/io_uring/net.c b/io_uring/net.c index 15dea91625e2..a390d3ea486c 100644 --- a/io_uring/net.c +++ b/io_uring/net.c @@ -1289,8 +1289,7 @@ retry: * return EAGAIN to arm the poll infra since it * has already been done */ - if ((req->flags & IO_APOLL_MULTI_POLLED) == - IO_APOLL_MULTI_POLLED) + if (issue_flags & IO_URING_F_MULTISHOT) ret = IOU_ISSUE_SKIP_COMPLETE; return ret; } @@ -1315,9 +1314,7 @@ retry: goto retry; io_req_set_res(req, ret, 0); - if (req->flags & REQ_F_POLLED) - return IOU_STOP_MULTISHOT; - return IOU_OK; + return (issue_flags & IO_URING_F_MULTISHOT) ? IOU_STOP_MULTISHOT : IOU_OK; } int io_socket_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe) -- cgit v1.2.3