From 4ce413d1840b25b101be3c0559161db8891f3360 Mon Sep 17 00:00:00 2001 From: Will Deacon Date: Fri, 1 Dec 2017 15:29:39 +0000 Subject: irqdesc: Use bool return type instead of int The irq_balancing_disabled and irq_is_percpu{,_devid} functions are clearly intended to return bool like the functions in kernel/irq/settings.h, but actually return an int containing a masked value of desc->status_use_accessors. This can lead to subtle breakage if, for example, the return value is subsequently truncated when assigned to a narrower type. As Linus points out: | In particular, what can (and _has_ happened) is that people end up | using these functions that return true or false, and they assign the | result to something like a bitfield (or a char) or whatever. | | And the code looks *obviously* correct, when you have things like | | dev->percpu = irq_is_percpu_devid(dev->irq); | | and that "percpu" thing is just one status bit among many. It may even | *work*, because maybe that "percpu" flag ends up not being all that | important, or it just happens to never be set on the particular | hardware that people end up testing. | | But while it looks obviously correct, and might even work, it's really | fundamentally broken. Because that "true or false" function didn't | actually return 0/1, it returned 0 or 0x20000. | | And 0x20000 may not fit in a bitmask or a "char" or whatever. Fix the problem by consistently using bool as the return type for these functions. Reported-by: Linus Torvalds Signed-off-by: Will Deacon Signed-off-by: Thomas Gleixner Cc: marc.zyngier@arm.com Link: https://lkml.kernel.org/r/1512142179-24616-1-git-send-email-will.deacon@arm.com --- include/linux/irqdesc.h | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'include/linux') diff --git a/include/linux/irqdesc.h b/include/linux/irqdesc.h index dd418955962b..39fb3700f7a9 100644 --- a/include/linux/irqdesc.h +++ b/include/linux/irqdesc.h @@ -230,7 +230,7 @@ irq_set_chip_handler_name_locked(struct irq_data *data, struct irq_chip *chip, data->chip = chip; } -static inline int irq_balancing_disabled(unsigned int irq) +static inline bool irq_balancing_disabled(unsigned int irq) { struct irq_desc *desc; @@ -238,7 +238,7 @@ static inline int irq_balancing_disabled(unsigned int irq) return desc->status_use_accessors & IRQ_NO_BALANCING_MASK; } -static inline int irq_is_percpu(unsigned int irq) +static inline bool irq_is_percpu(unsigned int irq) { struct irq_desc *desc; @@ -246,7 +246,7 @@ static inline int irq_is_percpu(unsigned int irq) return desc->status_use_accessors & IRQ_PER_CPU; } -static inline int irq_is_percpu_devid(unsigned int irq) +static inline bool irq_is_percpu_devid(unsigned int irq) { struct irq_desc *desc; -- cgit v1.2.3 From c895f6f703ad7dd2f99e751d9884b0aa5d0eea25 Mon Sep 17 00:00:00 2001 From: Hendrik Brueckner Date: Mon, 4 Dec 2017 10:56:44 +0100 Subject: bpf: correct broken uapi for BPF_PROG_TYPE_PERF_EVENT program type Commit 0515e5999a466dfe ("bpf: introduce BPF_PROG_TYPE_PERF_EVENT program type") introduced the bpf_perf_event_data structure which exports the pt_regs structure. This is OK for multiple architectures but fail for s390 and arm64 which do not export pt_regs. Programs using them, for example, the bpf selftest fail to compile on these architectures. For s390, exporting the pt_regs is not an option because s390 wants to allow changes to it. For arm64, there is a user_pt_regs structure that covers parts of the pt_regs structure for use by user space. To solve the broken uapi for s390 and arm64, introduce an abstract type for pt_regs and add an asm/bpf_perf_event.h file that concretes the type. An asm-generic header file covers the architectures that export pt_regs today. The arch-specific enablement for s390 and arm64 follows in separate commits. Reported-by: Thomas Richter Fixes: 0515e5999a466dfe ("bpf: introduce BPF_PROG_TYPE_PERF_EVENT program type") Signed-off-by: Hendrik Brueckner Reviewed-and-tested-by: Thomas Richter Acked-by: Alexei Starovoitov Cc: Arnaldo Carvalho de Melo Cc: Peter Zijlstra Cc: Ingo Molnar Cc: Alexander Shishkin Cc: Jiri Olsa Cc: Namhyung Kim Cc: Arnd Bergmann Cc: Daniel Borkmann Signed-off-by: Daniel Borkmann --- arch/alpha/include/uapi/asm/Kbuild | 2 ++ arch/arc/include/uapi/asm/Kbuild | 1 + arch/arm/include/uapi/asm/Kbuild | 1 + arch/blackfin/include/uapi/asm/Kbuild | 1 + arch/c6x/include/uapi/asm/Kbuild | 1 + arch/cris/include/uapi/asm/Kbuild | 1 + arch/frv/include/uapi/asm/Kbuild | 2 ++ arch/h8300/include/uapi/asm/Kbuild | 1 + arch/hexagon/include/uapi/asm/Kbuild | 1 + arch/ia64/include/uapi/asm/Kbuild | 1 + arch/m32r/include/uapi/asm/Kbuild | 1 + arch/m68k/include/uapi/asm/Kbuild | 1 + arch/metag/include/uapi/asm/Kbuild | 1 + arch/microblaze/include/uapi/asm/Kbuild | 1 + arch/mips/include/uapi/asm/Kbuild | 1 + arch/mn10300/include/uapi/asm/Kbuild | 1 + arch/nios2/include/uapi/asm/Kbuild | 1 + arch/openrisc/include/uapi/asm/Kbuild | 1 + arch/parisc/include/uapi/asm/Kbuild | 1 + arch/powerpc/include/uapi/asm/Kbuild | 1 + arch/riscv/include/uapi/asm/Kbuild | 1 + arch/score/include/uapi/asm/Kbuild | 1 + arch/sh/include/uapi/asm/Kbuild | 1 + arch/sparc/include/uapi/asm/Kbuild | 1 + arch/tile/include/uapi/asm/Kbuild | 1 + arch/unicore32/include/uapi/asm/Kbuild | 1 + arch/x86/include/uapi/asm/Kbuild | 1 + arch/xtensa/include/uapi/asm/Kbuild | 1 + include/linux/perf_event.h | 6 +++++- include/uapi/asm-generic/bpf_perf_event.h | 9 +++++++++ include/uapi/linux/bpf_perf_event.h | 5 ++--- kernel/events/core.c | 2 +- 32 files changed, 47 insertions(+), 5 deletions(-) create mode 100644 include/uapi/asm-generic/bpf_perf_event.h (limited to 'include/linux') diff --git a/arch/alpha/include/uapi/asm/Kbuild b/arch/alpha/include/uapi/asm/Kbuild index b15bf6bc0e94..14a2e9af97e9 100644 --- a/arch/alpha/include/uapi/asm/Kbuild +++ b/arch/alpha/include/uapi/asm/Kbuild @@ -1,2 +1,4 @@ # UAPI Header export list include include/uapi/asm-generic/Kbuild.asm + +generic-y += bpf_perf_event.h diff --git a/arch/arc/include/uapi/asm/Kbuild b/arch/arc/include/uapi/asm/Kbuild index fa6d0ff4ff89..170b5db64afe 100644 --- a/arch/arc/include/uapi/asm/Kbuild +++ b/arch/arc/include/uapi/asm/Kbuild @@ -3,6 +3,7 @@ include include/uapi/asm-generic/Kbuild.asm generic-y += auxvec.h generic-y += bitsperlong.h +generic-y += bpf_perf_event.h generic-y += errno.h generic-y += fcntl.h generic-y += ioctl.h diff --git a/arch/arm/include/uapi/asm/Kbuild b/arch/arm/include/uapi/asm/Kbuild index 4d53de308ee0..4d1cc1847edf 100644 --- a/arch/arm/include/uapi/asm/Kbuild +++ b/arch/arm/include/uapi/asm/Kbuild @@ -7,6 +7,7 @@ generated-y += unistd-oabi.h generated-y += unistd-eabi.h generic-y += bitsperlong.h +generic-y += bpf_perf_event.h generic-y += errno.h generic-y += ioctl.h generic-y += ipcbuf.h diff --git a/arch/blackfin/include/uapi/asm/Kbuild b/arch/blackfin/include/uapi/asm/Kbuild index aa624b4ab655..2240b38c2915 100644 --- a/arch/blackfin/include/uapi/asm/Kbuild +++ b/arch/blackfin/include/uapi/asm/Kbuild @@ -3,6 +3,7 @@ include include/uapi/asm-generic/Kbuild.asm generic-y += auxvec.h generic-y += bitsperlong.h +generic-y += bpf_perf_event.h generic-y += errno.h generic-y += ioctl.h generic-y += ipcbuf.h diff --git a/arch/c6x/include/uapi/asm/Kbuild b/arch/c6x/include/uapi/asm/Kbuild index 67ee896a76a7..26644e15d854 100644 --- a/arch/c6x/include/uapi/asm/Kbuild +++ b/arch/c6x/include/uapi/asm/Kbuild @@ -3,6 +3,7 @@ include include/uapi/asm-generic/Kbuild.asm generic-y += auxvec.h generic-y += bitsperlong.h +generic-y += bpf_perf_event.h generic-y += errno.h generic-y += fcntl.h generic-y += ioctl.h diff --git a/arch/cris/include/uapi/asm/Kbuild b/arch/cris/include/uapi/asm/Kbuild index 3687b54bb18e..3470c6e9c7b9 100644 --- a/arch/cris/include/uapi/asm/Kbuild +++ b/arch/cris/include/uapi/asm/Kbuild @@ -3,6 +3,7 @@ include include/uapi/asm-generic/Kbuild.asm generic-y += auxvec.h generic-y += bitsperlong.h +generic-y += bpf_perf_event.h generic-y += errno.h generic-y += fcntl.h generic-y += ioctl.h diff --git a/arch/frv/include/uapi/asm/Kbuild b/arch/frv/include/uapi/asm/Kbuild index b15bf6bc0e94..14a2e9af97e9 100644 --- a/arch/frv/include/uapi/asm/Kbuild +++ b/arch/frv/include/uapi/asm/Kbuild @@ -1,2 +1,4 @@ # UAPI Header export list include include/uapi/asm-generic/Kbuild.asm + +generic-y += bpf_perf_event.h diff --git a/arch/h8300/include/uapi/asm/Kbuild b/arch/h8300/include/uapi/asm/Kbuild index 187aed820e71..2f65f78792cb 100644 --- a/arch/h8300/include/uapi/asm/Kbuild +++ b/arch/h8300/include/uapi/asm/Kbuild @@ -2,6 +2,7 @@ include include/uapi/asm-generic/Kbuild.asm generic-y += auxvec.h +generic-y += bpf_perf_event.h generic-y += errno.h generic-y += fcntl.h generic-y += ioctl.h diff --git a/arch/hexagon/include/uapi/asm/Kbuild b/arch/hexagon/include/uapi/asm/Kbuild index cb5df3aad3a8..41a176dbb53e 100644 --- a/arch/hexagon/include/uapi/asm/Kbuild +++ b/arch/hexagon/include/uapi/asm/Kbuild @@ -2,6 +2,7 @@ include include/uapi/asm-generic/Kbuild.asm generic-y += auxvec.h +generic-y += bpf_perf_event.h generic-y += errno.h generic-y += fcntl.h generic-y += ioctl.h diff --git a/arch/ia64/include/uapi/asm/Kbuild b/arch/ia64/include/uapi/asm/Kbuild index 13a97aa2285f..f5c6967a93bb 100644 --- a/arch/ia64/include/uapi/asm/Kbuild +++ b/arch/ia64/include/uapi/asm/Kbuild @@ -1,4 +1,5 @@ # UAPI Header export list include include/uapi/asm-generic/Kbuild.asm +generic-y += bpf_perf_event.h generic-y += kvm_para.h diff --git a/arch/m32r/include/uapi/asm/Kbuild b/arch/m32r/include/uapi/asm/Kbuild index 1c44d3b3eba0..451bf6071c6e 100644 --- a/arch/m32r/include/uapi/asm/Kbuild +++ b/arch/m32r/include/uapi/asm/Kbuild @@ -1,5 +1,6 @@ # UAPI Header export list include include/uapi/asm-generic/Kbuild.asm +generic-y += bpf_perf_event.h generic-y += kvm_para.h generic-y += siginfo.h diff --git a/arch/m68k/include/uapi/asm/Kbuild b/arch/m68k/include/uapi/asm/Kbuild index 3717b64a620d..c2e26a44c482 100644 --- a/arch/m68k/include/uapi/asm/Kbuild +++ b/arch/m68k/include/uapi/asm/Kbuild @@ -3,6 +3,7 @@ include include/uapi/asm-generic/Kbuild.asm generic-y += auxvec.h generic-y += bitsperlong.h +generic-y += bpf_perf_event.h generic-y += errno.h generic-y += ioctl.h generic-y += ipcbuf.h diff --git a/arch/metag/include/uapi/asm/Kbuild b/arch/metag/include/uapi/asm/Kbuild index 6ac763d9a3e3..f9eaf07d29f8 100644 --- a/arch/metag/include/uapi/asm/Kbuild +++ b/arch/metag/include/uapi/asm/Kbuild @@ -3,6 +3,7 @@ include include/uapi/asm-generic/Kbuild.asm generic-y += auxvec.h generic-y += bitsperlong.h +generic-y += bpf_perf_event.h generic-y += errno.h generic-y += fcntl.h generic-y += ioctl.h diff --git a/arch/microblaze/include/uapi/asm/Kbuild b/arch/microblaze/include/uapi/asm/Kbuild index 06609ca36115..2c6a6bffea32 100644 --- a/arch/microblaze/include/uapi/asm/Kbuild +++ b/arch/microblaze/include/uapi/asm/Kbuild @@ -2,6 +2,7 @@ include include/uapi/asm-generic/Kbuild.asm generic-y += bitsperlong.h +generic-y += bpf_perf_event.h generic-y += errno.h generic-y += fcntl.h generic-y += ioctl.h diff --git a/arch/mips/include/uapi/asm/Kbuild b/arch/mips/include/uapi/asm/Kbuild index a0266feba9e6..7a4becd8963a 100644 --- a/arch/mips/include/uapi/asm/Kbuild +++ b/arch/mips/include/uapi/asm/Kbuild @@ -1,4 +1,5 @@ # UAPI Header export list include include/uapi/asm-generic/Kbuild.asm +generic-y += bpf_perf_event.h generic-y += ipcbuf.h diff --git a/arch/mn10300/include/uapi/asm/Kbuild b/arch/mn10300/include/uapi/asm/Kbuild index c94ee54210bc..81271d3af47c 100644 --- a/arch/mn10300/include/uapi/asm/Kbuild +++ b/arch/mn10300/include/uapi/asm/Kbuild @@ -1,4 +1,5 @@ # UAPI Header export list include include/uapi/asm-generic/Kbuild.asm +generic-y += bpf_perf_event.h generic-y += siginfo.h diff --git a/arch/nios2/include/uapi/asm/Kbuild b/arch/nios2/include/uapi/asm/Kbuild index ffca24da7647..13a3d77b4d7b 100644 --- a/arch/nios2/include/uapi/asm/Kbuild +++ b/arch/nios2/include/uapi/asm/Kbuild @@ -3,6 +3,7 @@ include include/uapi/asm-generic/Kbuild.asm generic-y += auxvec.h generic-y += bitsperlong.h +generic-y += bpf_perf_event.h generic-y += errno.h generic-y += fcntl.h generic-y += ioctl.h diff --git a/arch/openrisc/include/uapi/asm/Kbuild b/arch/openrisc/include/uapi/asm/Kbuild index 62286dbeb904..130c16ccba0a 100644 --- a/arch/openrisc/include/uapi/asm/Kbuild +++ b/arch/openrisc/include/uapi/asm/Kbuild @@ -3,6 +3,7 @@ include include/uapi/asm-generic/Kbuild.asm generic-y += auxvec.h generic-y += bitsperlong.h +generic-y += bpf_perf_event.h generic-y += errno.h generic-y += fcntl.h generic-y += ioctl.h diff --git a/arch/parisc/include/uapi/asm/Kbuild b/arch/parisc/include/uapi/asm/Kbuild index 196d2a4efb31..286ef5a5904b 100644 --- a/arch/parisc/include/uapi/asm/Kbuild +++ b/arch/parisc/include/uapi/asm/Kbuild @@ -2,6 +2,7 @@ include include/uapi/asm-generic/Kbuild.asm generic-y += auxvec.h +generic-y += bpf_perf_event.h generic-y += kvm_para.h generic-y += param.h generic-y += poll.h diff --git a/arch/powerpc/include/uapi/asm/Kbuild b/arch/powerpc/include/uapi/asm/Kbuild index 0d960ef78a9a..1a6ed5919ffd 100644 --- a/arch/powerpc/include/uapi/asm/Kbuild +++ b/arch/powerpc/include/uapi/asm/Kbuild @@ -1,6 +1,7 @@ # UAPI Header export list include include/uapi/asm-generic/Kbuild.asm +generic-y += bpf_perf_event.h generic-y += param.h generic-y += poll.h generic-y += resource.h diff --git a/arch/riscv/include/uapi/asm/Kbuild b/arch/riscv/include/uapi/asm/Kbuild index 5ded96b06352..7e91f4850475 100644 --- a/arch/riscv/include/uapi/asm/Kbuild +++ b/arch/riscv/include/uapi/asm/Kbuild @@ -3,6 +3,7 @@ include include/uapi/asm-generic/Kbuild.asm generic-y += setup.h generic-y += unistd.h +generic-y += bpf_perf_event.h generic-y += errno.h generic-y += fcntl.h generic-y += ioctl.h diff --git a/arch/score/include/uapi/asm/Kbuild b/arch/score/include/uapi/asm/Kbuild index c94ee54210bc..81271d3af47c 100644 --- a/arch/score/include/uapi/asm/Kbuild +++ b/arch/score/include/uapi/asm/Kbuild @@ -1,4 +1,5 @@ # UAPI Header export list include include/uapi/asm-generic/Kbuild.asm +generic-y += bpf_perf_event.h generic-y += siginfo.h diff --git a/arch/sh/include/uapi/asm/Kbuild b/arch/sh/include/uapi/asm/Kbuild index e28531333efa..ba4d39cb321d 100644 --- a/arch/sh/include/uapi/asm/Kbuild +++ b/arch/sh/include/uapi/asm/Kbuild @@ -2,6 +2,7 @@ include include/uapi/asm-generic/Kbuild.asm generic-y += bitsperlong.h +generic-y += bpf_perf_event.h generic-y += errno.h generic-y += fcntl.h generic-y += ioctl.h diff --git a/arch/sparc/include/uapi/asm/Kbuild b/arch/sparc/include/uapi/asm/Kbuild index 2178c78c7c1a..4680ba246b55 100644 --- a/arch/sparc/include/uapi/asm/Kbuild +++ b/arch/sparc/include/uapi/asm/Kbuild @@ -1,4 +1,5 @@ # UAPI Header export list include include/uapi/asm-generic/Kbuild.asm +generic-y += bpf_perf_event.h generic-y += types.h diff --git a/arch/tile/include/uapi/asm/Kbuild b/arch/tile/include/uapi/asm/Kbuild index 5711de0a1b5e..cc439612bcd5 100644 --- a/arch/tile/include/uapi/asm/Kbuild +++ b/arch/tile/include/uapi/asm/Kbuild @@ -1,6 +1,7 @@ # UAPI Header export list include include/uapi/asm-generic/Kbuild.asm +generic-y += bpf_perf_event.h generic-y += errno.h generic-y += fcntl.h generic-y += ioctl.h diff --git a/arch/unicore32/include/uapi/asm/Kbuild b/arch/unicore32/include/uapi/asm/Kbuild index 759a71411169..8611ef980554 100644 --- a/arch/unicore32/include/uapi/asm/Kbuild +++ b/arch/unicore32/include/uapi/asm/Kbuild @@ -3,6 +3,7 @@ include include/uapi/asm-generic/Kbuild.asm generic-y += auxvec.h generic-y += bitsperlong.h +generic-y += bpf_perf_event.h generic-y += errno.h generic-y += fcntl.h generic-y += ioctl.h diff --git a/arch/x86/include/uapi/asm/Kbuild b/arch/x86/include/uapi/asm/Kbuild index da1489cb64dc..1e901e421f2d 100644 --- a/arch/x86/include/uapi/asm/Kbuild +++ b/arch/x86/include/uapi/asm/Kbuild @@ -1,6 +1,7 @@ # UAPI Header export list include include/uapi/asm-generic/Kbuild.asm +generic-y += bpf_perf_event.h generated-y += unistd_32.h generated-y += unistd_64.h generated-y += unistd_x32.h diff --git a/arch/xtensa/include/uapi/asm/Kbuild b/arch/xtensa/include/uapi/asm/Kbuild index a5bcdfb890f1..837d4dd76785 100644 --- a/arch/xtensa/include/uapi/asm/Kbuild +++ b/arch/xtensa/include/uapi/asm/Kbuild @@ -2,6 +2,7 @@ include include/uapi/asm-generic/Kbuild.asm generic-y += bitsperlong.h +generic-y += bpf_perf_event.h generic-y += errno.h generic-y += fcntl.h generic-y += ioctl.h diff --git a/include/linux/perf_event.h b/include/linux/perf_event.h index 2c9c87d8a0c1..7546822a1d74 100644 --- a/include/linux/perf_event.h +++ b/include/linux/perf_event.h @@ -15,6 +15,7 @@ #define _LINUX_PERF_EVENT_H #include +#include /* * Kernel-internal data types and definitions: @@ -787,7 +788,7 @@ struct perf_output_handle { }; struct bpf_perf_event_data_kern { - struct pt_regs *regs; + bpf_user_pt_regs_t *regs; struct perf_sample_data *data; struct perf_event *event; }; @@ -1177,6 +1178,9 @@ extern void perf_bp_event(struct perf_event *event, void *data); (user_mode(regs) ? PERF_RECORD_MISC_USER : PERF_RECORD_MISC_KERNEL) # define perf_instruction_pointer(regs) instruction_pointer(regs) #endif +#ifndef perf_arch_bpf_user_pt_regs +# define perf_arch_bpf_user_pt_regs(regs) regs +#endif static inline bool has_branch_stack(struct perf_event *event) { diff --git a/include/uapi/asm-generic/bpf_perf_event.h b/include/uapi/asm-generic/bpf_perf_event.h new file mode 100644 index 000000000000..53815d2cd047 --- /dev/null +++ b/include/uapi/asm-generic/bpf_perf_event.h @@ -0,0 +1,9 @@ +#ifndef _UAPI__ASM_GENERIC_BPF_PERF_EVENT_H__ +#define _UAPI__ASM_GENERIC_BPF_PERF_EVENT_H__ + +#include + +/* Export kernel pt_regs structure */ +typedef struct pt_regs bpf_user_pt_regs_t; + +#endif /* _UAPI__ASM_GENERIC_BPF_PERF_EVENT_H__ */ diff --git a/include/uapi/linux/bpf_perf_event.h b/include/uapi/linux/bpf_perf_event.h index af549d4ecf1b..8f95303f9d80 100644 --- a/include/uapi/linux/bpf_perf_event.h +++ b/include/uapi/linux/bpf_perf_event.h @@ -8,11 +8,10 @@ #ifndef _UAPI__LINUX_BPF_PERF_EVENT_H__ #define _UAPI__LINUX_BPF_PERF_EVENT_H__ -#include -#include +#include struct bpf_perf_event_data { - struct pt_regs regs; + bpf_user_pt_regs_t regs; __u64 sample_period; }; diff --git a/kernel/events/core.c b/kernel/events/core.c index 16beab4767e1..ba957b9812b3 100644 --- a/kernel/events/core.c +++ b/kernel/events/core.c @@ -7987,11 +7987,11 @@ static void bpf_overflow_handler(struct perf_event *event, { struct bpf_perf_event_data_kern ctx = { .data = data, - .regs = regs, .event = event, }; int ret = 0; + ctx.regs = perf_arch_bpf_user_pt_regs(regs); preempt_disable(); if (unlikely(__this_cpu_inc_return(bpf_prog_active) != 1)) goto out; -- cgit v1.2.3 From f775b13eedee2f7f3c6fdd4e90fb79090ce5d339 Mon Sep 17 00:00:00 2001 From: Rik van Riel Date: Tue, 14 Nov 2017 16:54:23 -0500 Subject: x86,kvm: move qemu/guest FPU switching out to vcpu_run MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Currently, every time a VCPU is scheduled out, the host kernel will first save the guest FPU/xstate context, then load the qemu userspace FPU context, only to then immediately save the qemu userspace FPU context back to memory. When scheduling in a VCPU, the same extraneous FPU loads and saves are done. This could be avoided by moving from a model where the guest FPU is loaded and stored with preemption disabled, to a model where the qemu userspace FPU is swapped out for the guest FPU context for the duration of the KVM_RUN ioctl. This is done under the VCPU mutex, which is also taken when other tasks inspect the VCPU FPU context, so the code should already be safe for this change. That should come as no surprise, given that s390 already has this optimization. This can fix a bug where KVM calls get_user_pages while owning the FPU, and the file system ends up requesting the FPU again: [258270.527947] __warn+0xcb/0xf0 [258270.527948] warn_slowpath_null+0x1d/0x20 [258270.527951] kernel_fpu_disable+0x3f/0x50 [258270.527953] __kernel_fpu_begin+0x49/0x100 [258270.527955] kernel_fpu_begin+0xe/0x10 [258270.527958] crc32c_pcl_intel_update+0x84/0xb0 [258270.527961] crypto_shash_update+0x3f/0x110 [258270.527968] crc32c+0x63/0x8a [libcrc32c] [258270.527975] dm_bm_checksum+0x1b/0x20 [dm_persistent_data] [258270.527978] node_prepare_for_write+0x44/0x70 [dm_persistent_data] [258270.527985] dm_block_manager_write_callback+0x41/0x50 [dm_persistent_data] [258270.527988] submit_io+0x170/0x1b0 [dm_bufio] [258270.527992] __write_dirty_buffer+0x89/0x90 [dm_bufio] [258270.527994] __make_buffer_clean+0x4f/0x80 [dm_bufio] [258270.527996] __try_evict_buffer+0x42/0x60 [dm_bufio] [258270.527998] dm_bufio_shrink_scan+0xc0/0x130 [dm_bufio] [258270.528002] shrink_slab.part.40+0x1f5/0x420 [258270.528004] shrink_node+0x22c/0x320 [258270.528006] do_try_to_free_pages+0xf5/0x330 [258270.528008] try_to_free_pages+0xe9/0x190 [258270.528009] __alloc_pages_slowpath+0x40f/0xba0 [258270.528011] __alloc_pages_nodemask+0x209/0x260 [258270.528014] alloc_pages_vma+0x1f1/0x250 [258270.528017] do_huge_pmd_anonymous_page+0x123/0x660 [258270.528021] handle_mm_fault+0xfd3/0x1330 [258270.528025] __get_user_pages+0x113/0x640 [258270.528027] get_user_pages+0x4f/0x60 [258270.528063] __gfn_to_pfn_memslot+0x120/0x3f0 [kvm] [258270.528108] try_async_pf+0x66/0x230 [kvm] [258270.528135] tdp_page_fault+0x130/0x280 [kvm] [258270.528149] kvm_mmu_page_fault+0x60/0x120 [kvm] [258270.528158] handle_ept_violation+0x91/0x170 [kvm_intel] [258270.528162] vmx_handle_exit+0x1ca/0x1400 [kvm_intel] No performance changes were detected in quick ping-pong tests on my 4 socket system, which is expected since an FPU+xstate load is on the order of 0.1us, while ping-ponging between CPUs is on the order of 20us, and somewhat noisy. Cc: stable@vger.kernel.org Signed-off-by: Rik van Riel Suggested-by: Christian Borntraeger Signed-off-by: Paolo Bonzini [Fixed a bug where reset_vcpu called put_fpu without preceding load_fpu, which happened inside from KVM_CREATE_VCPU ioctl. - Radim] Signed-off-by: Radim Krčmář --- arch/x86/include/asm/kvm_host.h | 13 +++++++++++++ arch/x86/kvm/x86.c | 39 +++++++++++++++++---------------------- include/linux/kvm_host.h | 2 +- 3 files changed, 31 insertions(+), 23 deletions(-) (limited to 'include/linux') diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h index 977de5fb968b..62527e053ee4 100644 --- a/arch/x86/include/asm/kvm_host.h +++ b/arch/x86/include/asm/kvm_host.h @@ -536,7 +536,20 @@ struct kvm_vcpu_arch { struct kvm_mmu_memory_cache mmu_page_cache; struct kvm_mmu_memory_cache mmu_page_header_cache; + /* + * QEMU userspace and the guest each have their own FPU state. + * In vcpu_run, we switch between the user and guest FPU contexts. + * While running a VCPU, the VCPU thread will have the guest FPU + * context. + * + * Note that while the PKRU state lives inside the fpu registers, + * it is switched out separately at VMENTER and VMEXIT time. The + * "guest_fpu" state here contains the guest FPU context, with the + * host PRKU bits. + */ + struct fpu user_fpu; struct fpu guest_fpu; + u64 xcr0; u64 guest_supported_xcr0; u32 guest_xstate_size; diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index eee8e7faf1af..c8da1680a7d6 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -2937,7 +2937,6 @@ void kvm_arch_vcpu_put(struct kvm_vcpu *vcpu) srcu_read_unlock(&vcpu->kvm->srcu, idx); pagefault_enable(); kvm_x86_ops->vcpu_put(vcpu); - kvm_put_guest_fpu(vcpu); vcpu->arch.last_host_tsc = rdtsc(); } @@ -5254,13 +5253,10 @@ static void emulator_halt(struct x86_emulate_ctxt *ctxt) static void emulator_get_fpu(struct x86_emulate_ctxt *ctxt) { - preempt_disable(); - kvm_load_guest_fpu(emul_to_vcpu(ctxt)); } static void emulator_put_fpu(struct x86_emulate_ctxt *ctxt) { - preempt_enable(); } static int emulator_intercept(struct x86_emulate_ctxt *ctxt, @@ -6952,7 +6948,6 @@ static int vcpu_enter_guest(struct kvm_vcpu *vcpu) preempt_disable(); kvm_x86_ops->prepare_guest_switch(vcpu); - kvm_load_guest_fpu(vcpu); /* * Disable IRQs before setting IN_GUEST_MODE. Posted interrupt @@ -7297,12 +7292,14 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) } } + kvm_load_guest_fpu(vcpu); + if (unlikely(vcpu->arch.complete_userspace_io)) { int (*cui)(struct kvm_vcpu *) = vcpu->arch.complete_userspace_io; vcpu->arch.complete_userspace_io = NULL; r = cui(vcpu); if (r <= 0) - goto out; + goto out_fpu; } else WARN_ON(vcpu->arch.pio.count || vcpu->mmio_needed); @@ -7311,6 +7308,8 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) else r = vcpu_run(vcpu); +out_fpu: + kvm_put_guest_fpu(vcpu); out: post_kvm_run_save(vcpu); kvm_sigset_deactivate(vcpu); @@ -7704,32 +7703,25 @@ static void fx_init(struct kvm_vcpu *vcpu) vcpu->arch.cr0 |= X86_CR0_ET; } +/* Swap (qemu) user FPU context for the guest FPU context. */ void kvm_load_guest_fpu(struct kvm_vcpu *vcpu) { - if (vcpu->guest_fpu_loaded) - return; - - /* - * Restore all possible states in the guest, - * and assume host would use all available bits. - * Guest xcr0 would be loaded later. - */ - vcpu->guest_fpu_loaded = 1; - __kernel_fpu_begin(); + preempt_disable(); + copy_fpregs_to_fpstate(&vcpu->arch.user_fpu); /* PKRU is separately restored in kvm_x86_ops->run. */ __copy_kernel_to_fpregs(&vcpu->arch.guest_fpu.state, ~XFEATURE_MASK_PKRU); + preempt_enable(); trace_kvm_fpu(1); } +/* When vcpu_run ends, restore user space FPU context. */ void kvm_put_guest_fpu(struct kvm_vcpu *vcpu) { - if (!vcpu->guest_fpu_loaded) - return; - - vcpu->guest_fpu_loaded = 0; + preempt_disable(); copy_fpregs_to_fpstate(&vcpu->arch.guest_fpu); - __kernel_fpu_end(); + copy_kernel_to_fpregs(&vcpu->arch.user_fpu.state); + preempt_enable(); ++vcpu->stat.fpu_reload; trace_kvm_fpu(0); } @@ -7846,7 +7838,8 @@ void kvm_vcpu_reset(struct kvm_vcpu *vcpu, bool init_event) * To avoid have the INIT path from kvm_apic_has_events() that be * called with loaded FPU and does not let userspace fix the state. */ - kvm_put_guest_fpu(vcpu); + if (init_event) + kvm_put_guest_fpu(vcpu); mpx_state_buffer = get_xsave_addr(&vcpu->arch.guest_fpu.state.xsave, XFEATURE_MASK_BNDREGS); if (mpx_state_buffer) @@ -7855,6 +7848,8 @@ void kvm_vcpu_reset(struct kvm_vcpu *vcpu, bool init_event) XFEATURE_MASK_BNDCSR); if (mpx_state_buffer) memset(mpx_state_buffer, 0, sizeof(struct mpx_bndcsr)); + if (init_event) + kvm_load_guest_fpu(vcpu); } if (!init_event) { diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h index 893d6d606cd0..6bdd4b9f6611 100644 --- a/include/linux/kvm_host.h +++ b/include/linux/kvm_host.h @@ -232,7 +232,7 @@ struct kvm_vcpu { struct mutex mutex; struct kvm_run *run; - int guest_fpu_loaded, guest_xcr0_loaded; + int guest_xcr0_loaded; struct swait_queue_head wq; struct pid __rcu *pid; int sigset_active; -- cgit v1.2.3 From d7efc6c11b277d9d80b99b1334a78bfe7d7edf10 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Tue, 5 Dec 2017 12:45:56 -0800 Subject: net: remove hlist_nulls_add_tail_rcu() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Alexander Potapenko reported use of uninitialized memory [1] This happens when inserting a request socket into TCP ehash, in __sk_nulls_add_node_rcu(), since sk_reuseport is not initialized. Bug was added by commit d894ba18d4e4 ("soreuseport: fix ordering for mixed v4/v6 sockets") Note that d296ba60d8e2 ("soreuseport: Resolve merge conflict for v4/v6 ordering fix") missed the opportunity to get rid of hlist_nulls_add_tail_rcu() : Both UDP sockets and TCP/DCCP listeners no longer use __sk_nulls_add_node_rcu() for their hash insertion. Since all other sockets have unique 4-tuple, the reuseport status has no special meaning, so we can always use hlist_nulls_add_head_rcu() for them and save few cycles/instructions. [1] ================================================================== BUG: KMSAN: use of uninitialized memory in inet_ehash_insert+0xd40/0x1050 CPU: 0 PID: 0 Comm: swapper/0 Not tainted 4.13.0+ #3288 Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS Bochs 01/01/2011 Call Trace:    __dump_stack lib/dump_stack.c:16  dump_stack+0x185/0x1d0 lib/dump_stack.c:52  kmsan_report+0x13f/0x1c0 mm/kmsan/kmsan.c:1016  __msan_warning_32+0x69/0xb0 mm/kmsan/kmsan_instr.c:766  __sk_nulls_add_node_rcu ./include/net/sock.h:684  inet_ehash_insert+0xd40/0x1050 net/ipv4/inet_hashtables.c:413  reqsk_queue_hash_req net/ipv4/inet_connection_sock.c:754  inet_csk_reqsk_queue_hash_add+0x1cc/0x300 net/ipv4/inet_connection_sock.c:765  tcp_conn_request+0x31e7/0x36f0 net/ipv4/tcp_input.c:6414  tcp_v4_conn_request+0x16d/0x220 net/ipv4/tcp_ipv4.c:1314  tcp_rcv_state_process+0x42a/0x7210 net/ipv4/tcp_input.c:5917  tcp_v4_do_rcv+0xa6a/0xcd0 net/ipv4/tcp_ipv4.c:1483  tcp_v4_rcv+0x3de0/0x4ab0 net/ipv4/tcp_ipv4.c:1763  ip_local_deliver_finish+0x6bb/0xcb0 net/ipv4/ip_input.c:216  NF_HOOK ./include/linux/netfilter.h:248  ip_local_deliver+0x3fa/0x480 net/ipv4/ip_input.c:257  dst_input ./include/net/dst.h:477  ip_rcv_finish+0x6fb/0x1540 net/ipv4/ip_input.c:397  NF_HOOK ./include/linux/netfilter.h:248  ip_rcv+0x10f6/0x15c0 net/ipv4/ip_input.c:488  __netif_receive_skb_core+0x36f6/0x3f60 net/core/dev.c:4298  __netif_receive_skb net/core/dev.c:4336  netif_receive_skb_internal+0x63c/0x19c0 net/core/dev.c:4497  napi_skb_finish net/core/dev.c:4858  napi_gro_receive+0x629/0xa50 net/core/dev.c:4889  e1000_receive_skb drivers/net/ethernet/intel/e1000/e1000_main.c:4018  e1000_clean_rx_irq+0x1492/0x1d30 drivers/net/ethernet/intel/e1000/e1000_main.c:4474  e1000_clean+0x43aa/0x5970 drivers/net/ethernet/intel/e1000/e1000_main.c:3819  napi_poll net/core/dev.c:5500  net_rx_action+0x73c/0x1820 net/core/dev.c:5566  __do_softirq+0x4b4/0x8dd kernel/softirq.c:284  invoke_softirq kernel/softirq.c:364  irq_exit+0x203/0x240 kernel/softirq.c:405  exiting_irq+0xe/0x10 ./arch/x86/include/asm/apic.h:638  do_IRQ+0x15e/0x1a0 arch/x86/kernel/irq.c:263  common_interrupt+0x86/0x86 Fixes: d894ba18d4e4 ("soreuseport: fix ordering for mixed v4/v6 sockets") Fixes: d296ba60d8e2 ("soreuseport: Resolve merge conflict for v4/v6 ordering fix") Signed-off-by: Eric Dumazet Reported-by: Alexander Potapenko Acked-by: Craig Gallek Signed-off-by: David S. Miller --- include/linux/rculist_nulls.h | 38 -------------------------------------- include/net/sock.h | 6 +----- 2 files changed, 1 insertion(+), 43 deletions(-) (limited to 'include/linux') diff --git a/include/linux/rculist_nulls.h b/include/linux/rculist_nulls.h index a328e8181e49..e4b257ff881b 100644 --- a/include/linux/rculist_nulls.h +++ b/include/linux/rculist_nulls.h @@ -100,44 +100,6 @@ static inline void hlist_nulls_add_head_rcu(struct hlist_nulls_node *n, first->pprev = &n->next; } -/** - * hlist_nulls_add_tail_rcu - * @n: the element to add to the hash list. - * @h: the list to add to. - * - * Description: - * Adds the specified element to the end of the specified hlist_nulls, - * while permitting racing traversals. NOTE: tail insertion requires - * list traversal. - * - * The caller must take whatever precautions are necessary - * (such as holding appropriate locks) to avoid racing - * with another list-mutation primitive, such as hlist_nulls_add_head_rcu() - * or hlist_nulls_del_rcu(), running on this same list. - * However, it is perfectly legal to run concurrently with - * the _rcu list-traversal primitives, such as - * hlist_nulls_for_each_entry_rcu(), used to prevent memory-consistency - * problems on Alpha CPUs. Regardless of the type of CPU, the - * list-traversal primitive must be guarded by rcu_read_lock(). - */ -static inline void hlist_nulls_add_tail_rcu(struct hlist_nulls_node *n, - struct hlist_nulls_head *h) -{ - struct hlist_nulls_node *i, *last = NULL; - - for (i = hlist_nulls_first_rcu(h); !is_a_nulls(i); - i = hlist_nulls_next_rcu(i)) - last = i; - - if (last) { - n->next = last->next; - n->pprev = &last->next; - rcu_assign_pointer(hlist_nulls_next_rcu(last), n); - } else { - hlist_nulls_add_head_rcu(n, h); - } -} - /** * hlist_nulls_for_each_entry_rcu - iterate over rcu list of given type * @tpos: the type * to use as a loop cursor. diff --git a/include/net/sock.h b/include/net/sock.h index 79e1a2c7912c..9155da422692 100644 --- a/include/net/sock.h +++ b/include/net/sock.h @@ -685,11 +685,7 @@ static inline void sk_add_node_rcu(struct sock *sk, struct hlist_head *list) static inline void __sk_nulls_add_node_rcu(struct sock *sk, struct hlist_nulls_head *list) { - if (IS_ENABLED(CONFIG_IPV6) && sk->sk_reuseport && - sk->sk_family == AF_INET6) - hlist_nulls_add_tail_rcu(&sk->sk_nulls_node, list); - else - hlist_nulls_add_head_rcu(&sk->sk_nulls_node, list); + hlist_nulls_add_head_rcu(&sk->sk_nulls_node, list); } static inline void sk_nulls_add_node_rcu(struct sock *sk, struct hlist_nulls_head *list) -- cgit v1.2.3 From af97a77bc01ce49a466f9d4c0125479e2e2230b6 Mon Sep 17 00:00:00 2001 From: Greg Kroah-Hartman Date: Wed, 6 Dec 2017 09:50:08 +0000 Subject: efi: Move some sysfs files to be read-only by root Thanks to the scripts/leaking_addresses.pl script, it was found that some EFI values should not be readable by non-root users. So make them root-only, and to do that, add a __ATTR_RO_MODE() macro to make this easier, and use it in other places at the same time. Reported-by: Linus Torvalds Tested-by: Dave Young Signed-off-by: Greg Kroah-Hartman Signed-off-by: Ard Biesheuvel Cc: H. Peter Anvin Cc: Matt Fleming Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: linux-efi@vger.kernel.org Cc: stable Link: http://lkml.kernel.org/r/20171206095010.24170-2-ard.biesheuvel@linaro.org Signed-off-by: Ingo Molnar --- drivers/firmware/efi/efi.c | 3 +-- drivers/firmware/efi/esrt.c | 15 ++++++--------- drivers/firmware/efi/runtime-map.c | 10 +++++----- include/linux/sysfs.h | 6 ++++++ 4 files changed, 18 insertions(+), 16 deletions(-) (limited to 'include/linux') diff --git a/drivers/firmware/efi/efi.c b/drivers/firmware/efi/efi.c index f70febf680c3..c3eefa126e3b 100644 --- a/drivers/firmware/efi/efi.c +++ b/drivers/firmware/efi/efi.c @@ -143,8 +143,7 @@ static ssize_t systab_show(struct kobject *kobj, return str - buf; } -static struct kobj_attribute efi_attr_systab = - __ATTR(systab, 0400, systab_show, NULL); +static struct kobj_attribute efi_attr_systab = __ATTR_RO_MODE(systab, 0400); #define EFI_FIELD(var) efi.var diff --git a/drivers/firmware/efi/esrt.c b/drivers/firmware/efi/esrt.c index bd7ed3c1148a..7aae2483fcb9 100644 --- a/drivers/firmware/efi/esrt.c +++ b/drivers/firmware/efi/esrt.c @@ -106,7 +106,7 @@ static const struct sysfs_ops esre_attr_ops = { }; /* Generic ESRT Entry ("ESRE") support. */ -static ssize_t esre_fw_class_show(struct esre_entry *entry, char *buf) +static ssize_t fw_class_show(struct esre_entry *entry, char *buf) { char *str = buf; @@ -117,18 +117,16 @@ static ssize_t esre_fw_class_show(struct esre_entry *entry, char *buf) return str - buf; } -static struct esre_attribute esre_fw_class = __ATTR(fw_class, 0400, - esre_fw_class_show, NULL); +static struct esre_attribute esre_fw_class = __ATTR_RO_MODE(fw_class, 0400); #define esre_attr_decl(name, size, fmt) \ -static ssize_t esre_##name##_show(struct esre_entry *entry, char *buf) \ +static ssize_t name##_show(struct esre_entry *entry, char *buf) \ { \ return sprintf(buf, fmt "\n", \ le##size##_to_cpu(entry->esre.esre1->name)); \ } \ \ -static struct esre_attribute esre_##name = __ATTR(name, 0400, \ - esre_##name##_show, NULL) +static struct esre_attribute esre_##name = __ATTR_RO_MODE(name, 0400) esre_attr_decl(fw_type, 32, "%u"); esre_attr_decl(fw_version, 32, "%u"); @@ -193,14 +191,13 @@ static int esre_create_sysfs_entry(void *esre, int entry_num) /* support for displaying ESRT fields at the top level */ #define esrt_attr_decl(name, size, fmt) \ -static ssize_t esrt_##name##_show(struct kobject *kobj, \ +static ssize_t name##_show(struct kobject *kobj, \ struct kobj_attribute *attr, char *buf)\ { \ return sprintf(buf, fmt "\n", le##size##_to_cpu(esrt->name)); \ } \ \ -static struct kobj_attribute esrt_##name = __ATTR(name, 0400, \ - esrt_##name##_show, NULL) +static struct kobj_attribute esrt_##name = __ATTR_RO_MODE(name, 0400) esrt_attr_decl(fw_resource_count, 32, "%u"); esrt_attr_decl(fw_resource_count_max, 32, "%u"); diff --git a/drivers/firmware/efi/runtime-map.c b/drivers/firmware/efi/runtime-map.c index 8e64b77aeac9..f377609ff141 100644 --- a/drivers/firmware/efi/runtime-map.c +++ b/drivers/firmware/efi/runtime-map.c @@ -63,11 +63,11 @@ static ssize_t map_attr_show(struct kobject *kobj, struct attribute *attr, return map_attr->show(entry, buf); } -static struct map_attribute map_type_attr = __ATTR_RO(type); -static struct map_attribute map_phys_addr_attr = __ATTR_RO(phys_addr); -static struct map_attribute map_virt_addr_attr = __ATTR_RO(virt_addr); -static struct map_attribute map_num_pages_attr = __ATTR_RO(num_pages); -static struct map_attribute map_attribute_attr = __ATTR_RO(attribute); +static struct map_attribute map_type_attr = __ATTR_RO_MODE(type, 0400); +static struct map_attribute map_phys_addr_attr = __ATTR_RO_MODE(phys_addr, 0400); +static struct map_attribute map_virt_addr_attr = __ATTR_RO_MODE(virt_addr, 0400); +static struct map_attribute map_num_pages_attr = __ATTR_RO_MODE(num_pages, 0400); +static struct map_attribute map_attribute_attr = __ATTR_RO_MODE(attribute, 0400); /* * These are default attributes that are added for every memmap entry. diff --git a/include/linux/sysfs.h b/include/linux/sysfs.h index e32dfe098e82..40839c02d28c 100644 --- a/include/linux/sysfs.h +++ b/include/linux/sysfs.h @@ -117,6 +117,12 @@ struct attribute_group { .show = _name##_show, \ } +#define __ATTR_RO_MODE(_name, _mode) { \ + .attr = { .name = __stringify(_name), \ + .mode = VERIFY_OCTAL_PERMISSIONS(_mode) }, \ + .show = _name##_show, \ +} + #define __ATTR_WO(_name) { \ .attr = { .name = __stringify(_name), .mode = S_IWUSR }, \ .store = _name##_store, \ -- cgit v1.2.3 From a4abd7a80addb4a9547f7dfc7812566b60ec505c Mon Sep 17 00:00:00 2001 From: Bjørn Mork Date: Wed, 6 Dec 2017 20:21:24 +0100 Subject: usbnet: fix alignment for frames with no ethernet header MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The qmi_wwan minidriver support a 'raw-ip' mode where frames are received without any ethernet header. This causes alignment issues because the skbs allocated by usbnet are "IP aligned". Fix by allowing minidrivers to disable the additional alignment offset. This is implemented using a per-device flag, since the same minidriver also supports 'ethernet' mode. Fixes: 32f7adf633b9 ("net: qmi_wwan: support "raw IP" mode") Reported-and-tested-by: Jay Foster Signed-off-by: Bjørn Mork Signed-off-by: David S. Miller --- drivers/net/usb/qmi_wwan.c | 2 ++ drivers/net/usb/usbnet.c | 5 ++++- include/linux/usb/usbnet.h | 1 + 3 files changed, 7 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/drivers/net/usb/qmi_wwan.c b/drivers/net/usb/qmi_wwan.c index c750cf7c042b..304ec6555cd8 100644 --- a/drivers/net/usb/qmi_wwan.c +++ b/drivers/net/usb/qmi_wwan.c @@ -261,9 +261,11 @@ static void qmi_wwan_netdev_setup(struct net_device *net) net->hard_header_len = 0; net->addr_len = 0; net->flags = IFF_POINTOPOINT | IFF_NOARP | IFF_MULTICAST; + set_bit(EVENT_NO_IP_ALIGN, &dev->flags); netdev_dbg(net, "mode: raw IP\n"); } else if (!net->header_ops) { /* don't bother if already set */ ether_setup(net); + clear_bit(EVENT_NO_IP_ALIGN, &dev->flags); netdev_dbg(net, "mode: Ethernet\n"); } diff --git a/drivers/net/usb/usbnet.c b/drivers/net/usb/usbnet.c index 80348b6a8646..d56fe32bf48d 100644 --- a/drivers/net/usb/usbnet.c +++ b/drivers/net/usb/usbnet.c @@ -484,7 +484,10 @@ static int rx_submit (struct usbnet *dev, struct urb *urb, gfp_t flags) return -ENOLINK; } - skb = __netdev_alloc_skb_ip_align(dev->net, size, flags); + if (test_bit(EVENT_NO_IP_ALIGN, &dev->flags)) + skb = __netdev_alloc_skb(dev->net, size, flags); + else + skb = __netdev_alloc_skb_ip_align(dev->net, size, flags); if (!skb) { netif_dbg(dev, rx_err, dev->net, "no rx skb\n"); usbnet_defer_kevent (dev, EVENT_RX_MEMORY); diff --git a/include/linux/usb/usbnet.h b/include/linux/usb/usbnet.h index a69877734c4e..e2ec3582e549 100644 --- a/include/linux/usb/usbnet.h +++ b/include/linux/usb/usbnet.h @@ -82,6 +82,7 @@ struct usbnet { # define EVENT_RX_KILL 10 # define EVENT_LINK_CHANGE 11 # define EVENT_SET_RX_MODE 12 +# define EVENT_NO_IP_ALIGN 13 }; static inline struct usb_driver *driver_of(struct usb_interface *intf) -- cgit v1.2.3 From d4761754b4fb2ef8d9a1e9d121c4bec84e1fe292 Mon Sep 17 00:00:00 2001 From: Yousuk Seung Date: Thu, 7 Dec 2017 13:41:34 -0800 Subject: tcp: invalidate rate samples during SACK reneging Mark tcp_sock during a SACK reneging event and invalidate rate samples while marked. Such rate samples may overestimate bw by including packets that were SACKed before reneging. < ack 6001 win 10000 sack 7001:38001 < ack 7001 win 0 sack 8001:38001 // Reneg detected > seq 7001:8001 // RTO, SACK cleared. < ack 38001 win 10000 In above example the rate sample taken after the last ack will count 7001-38001 as delivered while the actual delivery rate likely could be much lower i.e. 7001-8001. This patch adds a new field tcp_sock.sack_reneg and marks it when we declare SACK reneging and entering TCP_CA_Loss, and unmarks it after the last rate sample was taken before moving back to TCP_CA_Open. This patch also invalidates rate samples taken while tcp_sock.is_sack_reneg is set. Fixes: b9f64820fb22 ("tcp: track data delivery rate for a TCP connection") Signed-off-by: Yousuk Seung Signed-off-by: Neal Cardwell Signed-off-by: Yuchung Cheng Acked-by: Soheil Hassas Yeganeh Acked-by: Eric Dumazet Acked-by: Priyaranjan Jha Signed-off-by: David S. Miller --- include/linux/tcp.h | 3 ++- include/net/tcp.h | 2 +- net/ipv4/tcp.c | 1 + net/ipv4/tcp_input.c | 10 ++++++++-- net/ipv4/tcp_rate.c | 10 +++++++--- 5 files changed, 19 insertions(+), 7 deletions(-) (limited to 'include/linux') diff --git a/include/linux/tcp.h b/include/linux/tcp.h index df5d97a85e1a..ca4a6361389b 100644 --- a/include/linux/tcp.h +++ b/include/linux/tcp.h @@ -224,7 +224,8 @@ struct tcp_sock { rate_app_limited:1, /* rate_{delivered,interval_us} limited? */ fastopen_connect:1, /* FASTOPEN_CONNECT sockopt */ fastopen_no_cookie:1, /* Allow send/recv SYN+data without a cookie */ - unused:3; + is_sack_reneg:1, /* in recovery from loss with SACK reneg? */ + unused:2; u8 nonagle : 4,/* Disable Nagle algorithm? */ thin_lto : 1,/* Use linear timeouts for thin streams */ unused1 : 1, diff --git a/include/net/tcp.h b/include/net/tcp.h index 6998707e81f3..6da880d2f022 100644 --- a/include/net/tcp.h +++ b/include/net/tcp.h @@ -1055,7 +1055,7 @@ void tcp_rate_skb_sent(struct sock *sk, struct sk_buff *skb); void tcp_rate_skb_delivered(struct sock *sk, struct sk_buff *skb, struct rate_sample *rs); void tcp_rate_gen(struct sock *sk, u32 delivered, u32 lost, - struct rate_sample *rs); + bool is_sack_reneg, struct rate_sample *rs); void tcp_rate_check_app_limited(struct sock *sk); /* These functions determine how the current flow behaves in respect of SACK diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c index bf97317e6c97..f08eebe60446 100644 --- a/net/ipv4/tcp.c +++ b/net/ipv4/tcp.c @@ -2412,6 +2412,7 @@ int tcp_disconnect(struct sock *sk, int flags) tp->snd_cwnd_cnt = 0; tp->window_clamp = 0; tcp_set_ca_state(sk, TCP_CA_Open); + tp->is_sack_reneg = 0; tcp_clear_retrans(tp); inet_csk_delack_init(sk); /* Initialize rcv_mss to TCP_MIN_MSS to avoid division by 0 diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c index 514c00732988..075c559570e6 100644 --- a/net/ipv4/tcp_input.c +++ b/net/ipv4/tcp_input.c @@ -1942,6 +1942,8 @@ void tcp_enter_loss(struct sock *sk) if (is_reneg) { NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPSACKRENEGING); tp->sacked_out = 0; + /* Mark SACK reneging until we recover from this loss event. */ + tp->is_sack_reneg = 1; } tcp_clear_all_retrans_hints(tp); @@ -2365,6 +2367,7 @@ static bool tcp_try_undo_recovery(struct sock *sk) return true; } tcp_set_ca_state(sk, TCP_CA_Open); + tp->is_sack_reneg = 0; return false; } @@ -2398,8 +2401,10 @@ static bool tcp_try_undo_loss(struct sock *sk, bool frto_undo) NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPSPURIOUSRTOS); inet_csk(sk)->icsk_retransmits = 0; - if (frto_undo || tcp_is_sack(tp)) + if (frto_undo || tcp_is_sack(tp)) { tcp_set_ca_state(sk, TCP_CA_Open); + tp->is_sack_reneg = 0; + } return true; } return false; @@ -3496,6 +3501,7 @@ static int tcp_ack(struct sock *sk, const struct sk_buff *skb, int flag) struct tcp_sacktag_state sack_state; struct rate_sample rs = { .prior_delivered = 0 }; u32 prior_snd_una = tp->snd_una; + bool is_sack_reneg = tp->is_sack_reneg; u32 ack_seq = TCP_SKB_CB(skb)->seq; u32 ack = TCP_SKB_CB(skb)->ack_seq; bool is_dupack = false; @@ -3612,7 +3618,7 @@ static int tcp_ack(struct sock *sk, const struct sk_buff *skb, int flag) delivered = tp->delivered - delivered; /* freshly ACKed or SACKed */ lost = tp->lost - lost; /* freshly marked lost */ - tcp_rate_gen(sk, delivered, lost, sack_state.rate); + tcp_rate_gen(sk, delivered, lost, is_sack_reneg, sack_state.rate); tcp_cong_control(sk, ack, delivered, flag, sack_state.rate); tcp_xmit_recovery(sk, rexmit); return 1; diff --git a/net/ipv4/tcp_rate.c b/net/ipv4/tcp_rate.c index 3330a370d306..c61240e43923 100644 --- a/net/ipv4/tcp_rate.c +++ b/net/ipv4/tcp_rate.c @@ -106,7 +106,7 @@ void tcp_rate_skb_delivered(struct sock *sk, struct sk_buff *skb, /* Update the connection delivery information and generate a rate sample. */ void tcp_rate_gen(struct sock *sk, u32 delivered, u32 lost, - struct rate_sample *rs) + bool is_sack_reneg, struct rate_sample *rs) { struct tcp_sock *tp = tcp_sk(sk); u32 snd_us, ack_us; @@ -124,8 +124,12 @@ void tcp_rate_gen(struct sock *sk, u32 delivered, u32 lost, rs->acked_sacked = delivered; /* freshly ACKed or SACKed */ rs->losses = lost; /* freshly marked lost */ - /* Return an invalid sample if no timing information is available. */ - if (!rs->prior_mstamp) { + /* Return an invalid sample if no timing information is available or + * in recovery from loss with SACK reneging. Rate samples taken during + * a SACK reneging event may overestimate bw by including packets that + * were SACKed before the reneg. + */ + if (!rs->prior_mstamp || is_sack_reneg) { rs->delivered = -1; rs->interval_us = -1; return; -- cgit v1.2.3 From f335195adf043168ee69d78ea72ac3e30f0c57ce Mon Sep 17 00:00:00 2001 From: Michal Hocko Date: Wed, 6 Dec 2017 11:27:57 +0100 Subject: kmemcheck: rip it out for real Commit 4675ff05de2d ("kmemcheck: rip it out") has removed the code but for some reason SPDX header stayed in place. This looks like a rebase mistake in the mmotm tree or the merge mistake. Let's drop those leftovers as well. Signed-off-by: Michal Hocko Signed-off-by: Linus Torvalds --- arch/x86/include/asm/kmemcheck.h | 1 - arch/x86/mm/kmemcheck/error.c | 1 - arch/x86/mm/kmemcheck/error.h | 1 - arch/x86/mm/kmemcheck/opcode.c | 1 - arch/x86/mm/kmemcheck/opcode.h | 1 - arch/x86/mm/kmemcheck/pte.c | 1 - arch/x86/mm/kmemcheck/pte.h | 1 - arch/x86/mm/kmemcheck/selftest.c | 1 - arch/x86/mm/kmemcheck/selftest.h | 1 - arch/x86/mm/kmemcheck/shadow.h | 1 - include/linux/kmemcheck.h | 1 - mm/kmemcheck.c | 1 - tools/include/linux/kmemcheck.h | 1 - 13 files changed, 13 deletions(-) delete mode 100644 arch/x86/include/asm/kmemcheck.h delete mode 100644 arch/x86/mm/kmemcheck/error.c delete mode 100644 arch/x86/mm/kmemcheck/error.h delete mode 100644 arch/x86/mm/kmemcheck/opcode.c delete mode 100644 arch/x86/mm/kmemcheck/opcode.h delete mode 100644 arch/x86/mm/kmemcheck/pte.c delete mode 100644 arch/x86/mm/kmemcheck/pte.h delete mode 100644 arch/x86/mm/kmemcheck/selftest.c delete mode 100644 arch/x86/mm/kmemcheck/selftest.h delete mode 100644 arch/x86/mm/kmemcheck/shadow.h delete mode 100644 include/linux/kmemcheck.h delete mode 100644 mm/kmemcheck.c delete mode 100644 tools/include/linux/kmemcheck.h (limited to 'include/linux') diff --git a/arch/x86/include/asm/kmemcheck.h b/arch/x86/include/asm/kmemcheck.h deleted file mode 100644 index ea32a7d3cf1b..000000000000 --- a/arch/x86/include/asm/kmemcheck.h +++ /dev/null @@ -1 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0 */ diff --git a/arch/x86/mm/kmemcheck/error.c b/arch/x86/mm/kmemcheck/error.c deleted file mode 100644 index cec594032515..000000000000 --- a/arch/x86/mm/kmemcheck/error.c +++ /dev/null @@ -1 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0 diff --git a/arch/x86/mm/kmemcheck/error.h b/arch/x86/mm/kmemcheck/error.h deleted file mode 100644 index ea32a7d3cf1b..000000000000 --- a/arch/x86/mm/kmemcheck/error.h +++ /dev/null @@ -1 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0 */ diff --git a/arch/x86/mm/kmemcheck/opcode.c b/arch/x86/mm/kmemcheck/opcode.c deleted file mode 100644 index cec594032515..000000000000 --- a/arch/x86/mm/kmemcheck/opcode.c +++ /dev/null @@ -1 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0 diff --git a/arch/x86/mm/kmemcheck/opcode.h b/arch/x86/mm/kmemcheck/opcode.h deleted file mode 100644 index ea32a7d3cf1b..000000000000 --- a/arch/x86/mm/kmemcheck/opcode.h +++ /dev/null @@ -1 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0 */ diff --git a/arch/x86/mm/kmemcheck/pte.c b/arch/x86/mm/kmemcheck/pte.c deleted file mode 100644 index cec594032515..000000000000 --- a/arch/x86/mm/kmemcheck/pte.c +++ /dev/null @@ -1 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0 diff --git a/arch/x86/mm/kmemcheck/pte.h b/arch/x86/mm/kmemcheck/pte.h deleted file mode 100644 index ea32a7d3cf1b..000000000000 --- a/arch/x86/mm/kmemcheck/pte.h +++ /dev/null @@ -1 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0 */ diff --git a/arch/x86/mm/kmemcheck/selftest.c b/arch/x86/mm/kmemcheck/selftest.c deleted file mode 100644 index cec594032515..000000000000 --- a/arch/x86/mm/kmemcheck/selftest.c +++ /dev/null @@ -1 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0 diff --git a/arch/x86/mm/kmemcheck/selftest.h b/arch/x86/mm/kmemcheck/selftest.h deleted file mode 100644 index ea32a7d3cf1b..000000000000 --- a/arch/x86/mm/kmemcheck/selftest.h +++ /dev/null @@ -1 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0 */ diff --git a/arch/x86/mm/kmemcheck/shadow.h b/arch/x86/mm/kmemcheck/shadow.h deleted file mode 100644 index ea32a7d3cf1b..000000000000 --- a/arch/x86/mm/kmemcheck/shadow.h +++ /dev/null @@ -1 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0 */ diff --git a/include/linux/kmemcheck.h b/include/linux/kmemcheck.h deleted file mode 100644 index ea32a7d3cf1b..000000000000 --- a/include/linux/kmemcheck.h +++ /dev/null @@ -1 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0 */ diff --git a/mm/kmemcheck.c b/mm/kmemcheck.c deleted file mode 100644 index cec594032515..000000000000 --- a/mm/kmemcheck.c +++ /dev/null @@ -1 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0 diff --git a/tools/include/linux/kmemcheck.h b/tools/include/linux/kmemcheck.h deleted file mode 100644 index ea32a7d3cf1b..000000000000 --- a/tools/include/linux/kmemcheck.h +++ /dev/null @@ -1 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0 */ -- cgit v1.2.3