diff options
Diffstat (limited to 'arch/x86')
159 files changed, 1140 insertions, 1226 deletions
diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig index 2bbbd4d1ba31..fbabf59692ff 100644 --- a/arch/x86/Kconfig +++ b/arch/x86/Kconfig @@ -17,6 +17,7 @@ config X86_32 select HAVE_DEBUG_STACKOVERFLOW select MODULES_USE_ELF_REL select OLD_SIGACTION + select GENERIC_VDSO_32 config X86_64 def_bool y @@ -121,6 +122,7 @@ config X86 select GENERIC_STRNCPY_FROM_USER select GENERIC_STRNLEN_USER select GENERIC_TIME_VSYSCALL + select GENERIC_GETTIMEOFDAY select HARDLOCKUP_CHECK_TIMESTAMP if X86_64 select HAVE_ACPI_APEI if ACPI select HAVE_ACPI_APEI_NMI if ACPI @@ -202,6 +204,7 @@ config X86 select HAVE_SYSCALL_TRACEPOINTS select HAVE_UNSTABLE_SCHED_CLOCK select HAVE_USER_RETURN_NOTIFIER + select HAVE_GENERIC_VDSO select HOTPLUG_SMT if SMP select IRQ_FORCED_THREADING select NEED_SG_DMA_LENGTH @@ -2285,7 +2288,7 @@ config COMPAT_VDSO choice prompt "vsyscall table for legacy applications" depends on X86_64 - default LEGACY_VSYSCALL_EMULATE + default LEGACY_VSYSCALL_XONLY help Legacy user code that does not know how to find the vDSO expects to be able to issue three syscalls by calling fixed addresses in @@ -2293,23 +2296,38 @@ choice it can be used to assist security vulnerability exploitation. This setting can be changed at boot time via the kernel command - line parameter vsyscall=[emulate|none]. + line parameter vsyscall=[emulate|xonly|none]. On a system with recent enough glibc (2.14 or newer) and no static binaries, you can say None without a performance penalty to improve security. - If unsure, select "Emulate". + If unsure, select "Emulate execution only". config LEGACY_VSYSCALL_EMULATE - bool "Emulate" + bool "Full emulation" help - The kernel traps and emulates calls into the fixed - vsyscall address mapping. This makes the mapping - non-executable, but it still contains known contents, - which could be used in certain rare security vulnerability - exploits. This configuration is recommended when userspace - still uses the vsyscall area. + The kernel traps and emulates calls into the fixed vsyscall + address mapping. This makes the mapping non-executable, but + it still contains readable known contents, which could be + used in certain rare security vulnerability exploits. This + configuration is recommended when using legacy userspace + that still uses vsyscalls along with legacy binary + instrumentation tools that require code to be readable. + + An example of this type of legacy userspace is running + Pin on an old binary that still uses vsyscalls. + + config LEGACY_VSYSCALL_XONLY + bool "Emulate execution only" + help + The kernel traps and emulates calls into the fixed vsyscall + address mapping and does not allow reads. This + configuration is recommended when userspace might use the + legacy vsyscall area but support for legacy binary + instrumentation of legacy code is not needed. It mitigates + certain uses of the vsyscall area as an ASLR-bypassing + buffer. config LEGACY_VSYSCALL_NONE bool "None" diff --git a/arch/x86/boot/a20.c b/arch/x86/boot/a20.c index 64a31a6d751a..a2b6b428922a 100644 --- a/arch/x86/boot/a20.c +++ b/arch/x86/boot/a20.c @@ -1,12 +1,10 @@ +// SPDX-License-Identifier: GPL-2.0-only /* -*- linux-c -*- ------------------------------------------------------- * * * Copyright (C) 1991, 1992 Linus Torvalds * Copyright 2007-2008 rPath, Inc. - All Rights Reserved * Copyright 2009 Intel Corporation; author H. Peter Anvin * - * This file is part of the Linux kernel, and is made available under - * the terms of the GNU General Public License version 2. - * * ----------------------------------------------------------------------- */ /* diff --git a/arch/x86/boot/apm.c b/arch/x86/boot/apm.c index ee274834ea8b..b72fc10fc1be 100644 --- a/arch/x86/boot/apm.c +++ b/arch/x86/boot/apm.c @@ -1,3 +1,4 @@ +// SPDX-License-Identifier: GPL-2.0-only /* -*- linux-c -*- ------------------------------------------------------- * * * Copyright (C) 1991, 1992 Linus Torvalds @@ -7,9 +8,6 @@ * Original APM BIOS checking by Stephen Rothwell, May 1994 * (sfr@canb.auug.org.au) * - * This file is part of the Linux kernel, and is made available under - * the terms of the GNU General Public License version 2. - * * ----------------------------------------------------------------------- */ /* diff --git a/arch/x86/boot/bitops.h b/arch/x86/boot/bitops.h index 2e1382486e91..02e1dea11d94 100644 --- a/arch/x86/boot/bitops.h +++ b/arch/x86/boot/bitops.h @@ -1,11 +1,9 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ /* -*- linux-c -*- ------------------------------------------------------- * * * Copyright (C) 1991, 1992 Linus Torvalds * Copyright 2007 rPath, Inc. - All Rights Reserved * - * This file is part of the Linux kernel, and is made available under - * the terms of the GNU General Public License version 2. - * * ----------------------------------------------------------------------- */ /* diff --git a/arch/x86/boot/boot.h b/arch/x86/boot/boot.h index 32a09eb5c101..19eca14b49a0 100644 --- a/arch/x86/boot/boot.h +++ b/arch/x86/boot/boot.h @@ -1,12 +1,10 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ /* -*- linux-c -*- ------------------------------------------------------- * * * Copyright (C) 1991, 1992 Linus Torvalds * Copyright 2007 rPath, Inc. - All Rights Reserved * Copyright 2009 Intel Corporation; author H. Peter Anvin * - * This file is part of the Linux kernel, and is made available under - * the terms of the GNU General Public License version 2. - * * ----------------------------------------------------------------------- */ /* diff --git a/arch/x86/boot/cmdline.c b/arch/x86/boot/cmdline.c index 625d21b0cd3f..4ff01176c1cc 100644 --- a/arch/x86/boot/cmdline.c +++ b/arch/x86/boot/cmdline.c @@ -1,11 +1,9 @@ +// SPDX-License-Identifier: GPL-2.0-only /* -*- linux-c -*- ------------------------------------------------------- * * * Copyright (C) 1991, 1992 Linus Torvalds * Copyright 2007 rPath, Inc. - All Rights Reserved * - * This file is part of the Linux kernel, and is made available under - * the terms of the GNU General Public License version 2. - * * ----------------------------------------------------------------------- */ /* diff --git a/arch/x86/boot/compressed/eboot.c b/arch/x86/boot/compressed/eboot.c index 544ac4fafd11..220d1279d0e2 100644 --- a/arch/x86/boot/compressed/eboot.c +++ b/arch/x86/boot/compressed/eboot.c @@ -1,11 +1,9 @@ +// SPDX-License-Identifier: GPL-2.0-only /* ----------------------------------------------------------------------- * * Copyright 2011 Intel Corporation; author Matt Fleming * - * This file is part of the Linux kernel, and is made available under - * the terms of the GNU General Public License version 2. - * * ----------------------------------------------------------------------- */ #include <linux/efi.h> diff --git a/arch/x86/boot/compressed/mem_encrypt.S b/arch/x86/boot/compressed/mem_encrypt.S index a480356e0ed8..6afb7130a387 100644 --- a/arch/x86/boot/compressed/mem_encrypt.S +++ b/arch/x86/boot/compressed/mem_encrypt.S @@ -1,13 +1,10 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ /* * AMD Memory Encryption Support * * Copyright (C) 2017 Advanced Micro Devices, Inc. * * Author: Tom Lendacky <thomas.lendacky@amd.com> - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License version 2 as - * published by the Free Software Foundation. */ #include <linux/linkage.h> diff --git a/arch/x86/boot/copy.S b/arch/x86/boot/copy.S index 15d9f74b0008..4c5f4f4ad035 100644 --- a/arch/x86/boot/copy.S +++ b/arch/x86/boot/copy.S @@ -1,11 +1,9 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ /* ----------------------------------------------------------------------- * * * Copyright (C) 1991, 1992 Linus Torvalds * Copyright 2007 rPath, Inc. - All Rights Reserved * - * This file is part of the Linux kernel, and is made available under - * the terms of the GNU General Public License version 2. - * * ----------------------------------------------------------------------- */ #include <linux/linkage.h> diff --git a/arch/x86/boot/cpu.c b/arch/x86/boot/cpu.c index 26240dde081e..0bbf4f3707d2 100644 --- a/arch/x86/boot/cpu.c +++ b/arch/x86/boot/cpu.c @@ -1,11 +1,9 @@ +// SPDX-License-Identifier: GPL-2.0-only /* -*- linux-c -*- ------------------------------------------------------- * * * Copyright (C) 1991, 1992 Linus Torvalds * Copyright 2007-2008 rPath, Inc. - All Rights Reserved * - * This file is part of the Linux kernel, and is made available under - * the terms of the GNU General Public License version 2. - * * ----------------------------------------------------------------------- */ /* diff --git a/arch/x86/boot/cpucheck.c b/arch/x86/boot/cpucheck.c index 51079fc9298f..e1478d32de1a 100644 --- a/arch/x86/boot/cpucheck.c +++ b/arch/x86/boot/cpucheck.c @@ -1,11 +1,9 @@ +// SPDX-License-Identifier: GPL-2.0-only /* -*- linux-c -*- ------------------------------------------------------- * * * Copyright (C) 1991, 1992 Linus Torvalds * Copyright 2007 rPath, Inc. - All Rights Reserved * - * This file is part of the Linux kernel, and is made available under - * the terms of the GNU General Public License version 2. - * * ----------------------------------------------------------------------- */ /* diff --git a/arch/x86/boot/edd.c b/arch/x86/boot/edd.c index 6c176b6a42ad..1fb4bc70cee9 100644 --- a/arch/x86/boot/edd.c +++ b/arch/x86/boot/edd.c @@ -1,12 +1,10 @@ +// SPDX-License-Identifier: GPL-2.0-only /* -*- linux-c -*- ------------------------------------------------------- * * * Copyright (C) 1991, 1992 Linus Torvalds * Copyright 2007 rPath, Inc. - All Rights Reserved * Copyright 2009 Intel Corporation; author H. Peter Anvin * - * This file is part of the Linux kernel, and is made available under - * the terms of the GNU General Public License version 2. - * * ----------------------------------------------------------------------- */ /* diff --git a/arch/x86/boot/main.c b/arch/x86/boot/main.c index 73532543d689..996df3d586f0 100644 --- a/arch/x86/boot/main.c +++ b/arch/x86/boot/main.c @@ -1,12 +1,10 @@ +// SPDX-License-Identifier: GPL-2.0-only /* -*- linux-c -*- ------------------------------------------------------- * * * Copyright (C) 1991, 1992 Linus Torvalds * Copyright 2007 rPath, Inc. - All Rights Reserved * Copyright 2009 Intel Corporation; author H. Peter Anvin * - * This file is part of the Linux kernel, and is made available under - * the terms of the GNU General Public License version 2. - * * ----------------------------------------------------------------------- */ /* diff --git a/arch/x86/boot/memory.c b/arch/x86/boot/memory.c index f06c147b5140..b0422b79debc 100644 --- a/arch/x86/boot/memory.c +++ b/arch/x86/boot/memory.c @@ -1,12 +1,10 @@ +// SPDX-License-Identifier: GPL-2.0-only /* -*- linux-c -*- ------------------------------------------------------- * * * Copyright (C) 1991, 1992 Linus Torvalds * Copyright 2007 rPath, Inc. - All Rights Reserved * Copyright 2009 Intel Corporation; author H. Peter Anvin * - * This file is part of the Linux kernel, and is made available under - * the terms of the GNU General Public License version 2. - * * ----------------------------------------------------------------------- */ /* diff --git a/arch/x86/boot/pm.c b/arch/x86/boot/pm.c index 8062f8915250..40031a614712 100644 --- a/arch/x86/boot/pm.c +++ b/arch/x86/boot/pm.c @@ -1,11 +1,9 @@ +// SPDX-License-Identifier: GPL-2.0-only /* -*- linux-c -*- ------------------------------------------------------- * * * Copyright (C) 1991, 1992 Linus Torvalds * Copyright 2007 rPath, Inc. - All Rights Reserved * - * This file is part of the Linux kernel, and is made available under - * the terms of the GNU General Public License version 2. - * * ----------------------------------------------------------------------- */ /* diff --git a/arch/x86/boot/pmjump.S b/arch/x86/boot/pmjump.S index 3e0edc6d2a20..c22f9a7d1aeb 100644 --- a/arch/x86/boot/pmjump.S +++ b/arch/x86/boot/pmjump.S @@ -1,11 +1,9 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ /* ----------------------------------------------------------------------- * * * Copyright (C) 1991, 1992 Linus Torvalds * Copyright 2007 rPath, Inc. - All Rights Reserved * - * This file is part of the Linux kernel, and is made available under - * the terms of the GNU General Public License version 2. - * * ----------------------------------------------------------------------- */ /* diff --git a/arch/x86/boot/printf.c b/arch/x86/boot/printf.c index 565083c16e5c..1237beeb9540 100644 --- a/arch/x86/boot/printf.c +++ b/arch/x86/boot/printf.c @@ -1,11 +1,9 @@ +// SPDX-License-Identifier: GPL-2.0-only /* -*- linux-c -*- ------------------------------------------------------- * * * Copyright (C) 1991, 1992 Linus Torvalds * Copyright 2007 rPath, Inc. - All Rights Reserved * - * This file is part of the Linux kernel, and is made available under - * the terms of the GNU General Public License version 2. - * * ----------------------------------------------------------------------- */ /* diff --git a/arch/x86/boot/string.c b/arch/x86/boot/string.c index 90154df8f125..401e30ca0a75 100644 --- a/arch/x86/boot/string.c +++ b/arch/x86/boot/string.c @@ -1,11 +1,9 @@ +// SPDX-License-Identifier: GPL-2.0-only /* -*- linux-c -*- ------------------------------------------------------- * * * Copyright (C) 1991, 1992 Linus Torvalds * Copyright 2007 rPath, Inc. - All Rights Reserved * - * This file is part of the Linux kernel, and is made available under - * the terms of the GNU General Public License version 2. - * * ----------------------------------------------------------------------- */ /* diff --git a/arch/x86/boot/tty.c b/arch/x86/boot/tty.c index def2451f46ae..1fedabdb95ad 100644 --- a/arch/x86/boot/tty.c +++ b/arch/x86/boot/tty.c @@ -1,12 +1,10 @@ +// SPDX-License-Identifier: GPL-2.0-only /* -*- linux-c -*- ------------------------------------------------------- * * * Copyright (C) 1991, 1992 Linus Torvalds * Copyright 2007 rPath, Inc. - All Rights Reserved * Copyright 2009 Intel Corporation; author H. Peter Anvin * - * This file is part of the Linux kernel, and is made available under - * the terms of the GNU General Public License version 2. - * * ----------------------------------------------------------------------- */ /* diff --git a/arch/x86/boot/version.c b/arch/x86/boot/version.c index 2b15aa488ffb..a1aaaf6c06a6 100644 --- a/arch/x86/boot/version.c +++ b/arch/x86/boot/version.c @@ -1,11 +1,9 @@ +// SPDX-License-Identifier: GPL-2.0-only /* -*- linux-c -*- ------------------------------------------------------- * * * Copyright (C) 1991, 1992 Linus Torvalds * Copyright 2007 rPath, Inc. - All Rights Reserved * - * This file is part of the Linux kernel, and is made available under - * the terms of the GNU General Public License version 2. - * * ----------------------------------------------------------------------- */ /* diff --git a/arch/x86/boot/video-bios.c b/arch/x86/boot/video-bios.c index 49e0c18833e0..6eb8c06bc287 100644 --- a/arch/x86/boot/video-bios.c +++ b/arch/x86/boot/video-bios.c @@ -1,12 +1,10 @@ +// SPDX-License-Identifier: GPL-2.0-only /* -*- linux-c -*- ------------------------------------------------------- * * * Copyright (C) 1991, 1992 Linus Torvalds * Copyright 2007 rPath, Inc. - All Rights Reserved * Copyright 2009 Intel Corporation; author H. Peter Anvin * - * This file is part of the Linux kernel, and is made available under - * the terms of the GNU General Public License version 2. - * * ----------------------------------------------------------------------- */ /* diff --git a/arch/x86/boot/video-mode.c b/arch/x86/boot/video-mode.c index 95c7a818c0ed..9ada55dc1ab7 100644 --- a/arch/x86/boot/video-mode.c +++ b/arch/x86/boot/video-mode.c @@ -1,11 +1,9 @@ +// SPDX-License-Identifier: GPL-2.0-only /* -*- linux-c -*- ------------------------------------------------------- * * * Copyright (C) 1991, 1992 Linus Torvalds * Copyright 2007-2008 rPath, Inc. - All Rights Reserved * - * This file is part of the Linux kernel, and is made available under - * the terms of the GNU General Public License version 2. - * * ----------------------------------------------------------------------- */ /* diff --git a/arch/x86/boot/video-vesa.c b/arch/x86/boot/video-vesa.c index 3ecc11a9c440..7e185977a984 100644 --- a/arch/x86/boot/video-vesa.c +++ b/arch/x86/boot/video-vesa.c @@ -1,12 +1,10 @@ +// SPDX-License-Identifier: GPL-2.0-only /* -*- linux-c -*- ------------------------------------------------------- * * * Copyright (C) 1991, 1992 Linus Torvalds * Copyright 2007 rPath, Inc. - All Rights Reserved * Copyright 2009 Intel Corporation; author H. Peter Anvin * - * This file is part of the Linux kernel, and is made available under - * the terms of the GNU General Public License version 2. - * * ----------------------------------------------------------------------- */ /* diff --git a/arch/x86/boot/video-vga.c b/arch/x86/boot/video-vga.c index a14c5178d4ba..4816cb9cf996 100644 --- a/arch/x86/boot/video-vga.c +++ b/arch/x86/boot/video-vga.c @@ -1,12 +1,10 @@ +// SPDX-License-Identifier: GPL-2.0-only /* -*- linux-c -*- ------------------------------------------------------- * * * Copyright (C) 1991, 1992 Linus Torvalds * Copyright 2007 rPath, Inc. - All Rights Reserved * Copyright 2009 Intel Corporation; author H. Peter Anvin * - * This file is part of the Linux kernel, and is made available under - * the terms of the GNU General Public License version 2. - * * ----------------------------------------------------------------------- */ /* diff --git a/arch/x86/boot/video.c b/arch/x86/boot/video.c index ac89b6624a40..f2e96905b3fe 100644 --- a/arch/x86/boot/video.c +++ b/arch/x86/boot/video.c @@ -1,12 +1,10 @@ +// SPDX-License-Identifier: GPL-2.0-only /* -*- linux-c -*- ------------------------------------------------------- * * * Copyright (C) 1991, 1992 Linus Torvalds * Copyright 2007 rPath, Inc. - All Rights Reserved * Copyright 2009 Intel Corporation; author H. Peter Anvin * - * This file is part of the Linux kernel, and is made available under - * the terms of the GNU General Public License version 2. - * * ----------------------------------------------------------------------- */ /* diff --git a/arch/x86/boot/video.h b/arch/x86/boot/video.h index b54e0328c449..cbf7fed22441 100644 --- a/arch/x86/boot/video.h +++ b/arch/x86/boot/video.h @@ -1,11 +1,9 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ /* -*- linux-c -*- ------------------------------------------------------- * * * Copyright (C) 1991, 1992 Linus Torvalds * Copyright 2007 rPath, Inc. - All Rights Reserved * - * This file is part of the Linux kernel, and is made available under - * the terms of the GNU General Public License version 2. - * * ----------------------------------------------------------------------- */ /* diff --git a/arch/x86/crypto/aegis128-aesni-asm.S b/arch/x86/crypto/aegis128-aesni-asm.S index 5f7e43d4f64a..4434607e366d 100644 --- a/arch/x86/crypto/aegis128-aesni-asm.S +++ b/arch/x86/crypto/aegis128-aesni-asm.S @@ -1,12 +1,9 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ /* * AES-NI + SSE2 implementation of AEGIS-128 * * Copyright (c) 2017-2018 Ondrej Mosnacek <omosnacek@gmail.com> * Copyright (C) 2017-2018 Red Hat, Inc. All rights reserved. - * - * This program is free software; you can redistribute it and/or modify it - * under the terms of the GNU General Public License version 2 as published - * by the Free Software Foundation. */ #include <linux/linkage.h> diff --git a/arch/x86/crypto/aegis128l-aesni-asm.S b/arch/x86/crypto/aegis128l-aesni-asm.S index 491dd61c845c..1461ef00c0e8 100644 --- a/arch/x86/crypto/aegis128l-aesni-asm.S +++ b/arch/x86/crypto/aegis128l-aesni-asm.S @@ -1,12 +1,9 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ /* * AES-NI + SSE2 implementation of AEGIS-128L * * Copyright (c) 2017-2018 Ondrej Mosnacek <omosnacek@gmail.com> * Copyright (C) 2017-2018 Red Hat, Inc. All rights reserved. - * - * This program is free software; you can redistribute it and/or modify it - * under the terms of the GNU General Public License version 2 as published - * by the Free Software Foundation. */ #include <linux/linkage.h> diff --git a/arch/x86/crypto/aegis256-aesni-asm.S b/arch/x86/crypto/aegis256-aesni-asm.S index 8870c7c5d9a4..37d9b13dfd85 100644 --- a/arch/x86/crypto/aegis256-aesni-asm.S +++ b/arch/x86/crypto/aegis256-aesni-asm.S @@ -1,12 +1,9 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ /* * AES-NI + SSE2 implementation of AEGIS-128L * * Copyright (c) 2017-2018 Ondrej Mosnacek <omosnacek@gmail.com> * Copyright (C) 2017-2018 Red Hat, Inc. All rights reserved. - * - * This program is free software; you can redistribute it and/or modify it - * under the terms of the GNU General Public License version 2 as published - * by the Free Software Foundation. */ #include <linux/linkage.h> diff --git a/arch/x86/crypto/ghash-clmulni-intel_asm.S b/arch/x86/crypto/ghash-clmulni-intel_asm.S index f94375a8dcd1..5d53effe8abe 100644 --- a/arch/x86/crypto/ghash-clmulni-intel_asm.S +++ b/arch/x86/crypto/ghash-clmulni-intel_asm.S @@ -1,3 +1,4 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ /* * Accelerated GHASH implementation with Intel PCLMULQDQ-NI * instructions. This file contains accelerated part of ghash @@ -10,10 +11,6 @@ * Vinodh Gopal * Erdinc Ozturk * Deniz Karakoyunlu - * - * This program is free software; you can redistribute it and/or modify it - * under the terms of the GNU General Public License version 2 as published - * by the Free Software Foundation. */ #include <linux/linkage.h> diff --git a/arch/x86/crypto/ghash-clmulni-intel_glue.c b/arch/x86/crypto/ghash-clmulni-intel_glue.c index e3f3e6fd9d65..ac76fe88ac4f 100644 --- a/arch/x86/crypto/ghash-clmulni-intel_glue.c +++ b/arch/x86/crypto/ghash-clmulni-intel_glue.c @@ -1,13 +1,10 @@ +// SPDX-License-Identifier: GPL-2.0-only /* * Accelerated GHASH implementation with Intel PCLMULQDQ-NI * instructions. This file contains glue code. * * Copyright (c) 2009 Intel Corp. * Author: Huang Ying <ying.huang@intel.com> - * - * This program is free software; you can redistribute it and/or modify it - * under the terms of the GNU General Public License version 2 as published - * by the Free Software Foundation. */ #include <linux/err.h> diff --git a/arch/x86/crypto/morus1280-avx2-asm.S b/arch/x86/crypto/morus1280-avx2-asm.S index de182c460f82..5413fee33481 100644 --- a/arch/x86/crypto/morus1280-avx2-asm.S +++ b/arch/x86/crypto/morus1280-avx2-asm.S @@ -1,12 +1,9 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ /* * AVX2 implementation of MORUS-1280 * * Copyright (c) 2017-2018 Ondrej Mosnacek <omosnacek@gmail.com> * Copyright (C) 2017-2018 Red Hat, Inc. All rights reserved. - * - * This program is free software; you can redistribute it and/or modify it - * under the terms of the GNU General Public License version 2 as published - * by the Free Software Foundation. */ #include <linux/linkage.h> diff --git a/arch/x86/crypto/morus1280-sse2-asm.S b/arch/x86/crypto/morus1280-sse2-asm.S index da5d2905db60..0eece772866b 100644 --- a/arch/x86/crypto/morus1280-sse2-asm.S +++ b/arch/x86/crypto/morus1280-sse2-asm.S @@ -1,12 +1,9 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ /* * SSE2 implementation of MORUS-1280 * * Copyright (c) 2017-2018 Ondrej Mosnacek <omosnacek@gmail.com> * Copyright (C) 2017-2018 Red Hat, Inc. All rights reserved. - * - * This program is free software; you can redistribute it and/or modify it - * under the terms of the GNU General Public License version 2 as published - * by the Free Software Foundation. */ #include <linux/linkage.h> diff --git a/arch/x86/crypto/morus640-sse2-asm.S b/arch/x86/crypto/morus640-sse2-asm.S index 414db480250e..a60891101bbd 100644 --- a/arch/x86/crypto/morus640-sse2-asm.S +++ b/arch/x86/crypto/morus640-sse2-asm.S @@ -1,12 +1,9 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ /* * SSE2 implementation of MORUS-640 * * Copyright (c) 2017-2018 Ondrej Mosnacek <omosnacek@gmail.com> * Copyright (C) 2017-2018 Red Hat, Inc. All rights reserved. - * - * This program is free software; you can redistribute it and/or modify it - * under the terms of the GNU General Public License version 2 as published - * by the Free Software Foundation. */ #include <linux/linkage.h> diff --git a/arch/x86/entry/common.c b/arch/x86/entry/common.c index 2418804e66b4..536b574b6161 100644 --- a/arch/x86/entry/common.c +++ b/arch/x86/entry/common.c @@ -72,23 +72,18 @@ static long syscall_trace_enter(struct pt_regs *regs) struct thread_info *ti = current_thread_info(); unsigned long ret = 0; - bool emulated = false; u32 work; if (IS_ENABLED(CONFIG_DEBUG_ENTRY)) BUG_ON(regs != task_pt_regs(current)); - work = READ_ONCE(ti->flags) & _TIF_WORK_SYSCALL_ENTRY; + work = READ_ONCE(ti->flags); - if (unlikely(work & _TIF_SYSCALL_EMU)) - emulated = true; - - if ((emulated || (work & _TIF_SYSCALL_TRACE)) && - tracehook_report_syscall_entry(regs)) - return -1L; - - if (emulated) - return -1L; + if (work & (_TIF_SYSCALL_TRACE | _TIF_SYSCALL_EMU)) { + ret = tracehook_report_syscall_entry(regs); + if (ret || (work & _TIF_SYSCALL_EMU)) + return -1L; + } #ifdef CONFIG_SECCOMP /* diff --git a/arch/x86/entry/entry_32.S b/arch/x86/entry/entry_32.S index 7b23431be5cb..44c6e6f54bf7 100644 --- a/arch/x86/entry/entry_32.S +++ b/arch/x86/entry/entry_32.S @@ -1104,6 +1104,30 @@ ENTRY(irq_entries_start) .endr END(irq_entries_start) +#ifdef CONFIG_X86_LOCAL_APIC + .align 8 +ENTRY(spurious_entries_start) + vector=FIRST_SYSTEM_VECTOR + .rept (NR_VECTORS - FIRST_SYSTEM_VECTOR) + pushl $(~vector+0x80) /* Note: always in signed byte range */ + vector=vector+1 + jmp common_spurious + .align 8 + .endr +END(spurious_entries_start) + +common_spurious: + ASM_CLAC + addl $-0x80, (%esp) /* Adjust vector into the [-256, -1] range */ + SAVE_ALL switch_stacks=1 + ENCODE_FRAME_POINTER + TRACE_IRQS_OFF + movl %esp, %eax + call smp_spurious_interrupt + jmp ret_from_intr +ENDPROC(common_interrupt) +#endif + /* * the CPU automatically disables interrupts when executing an IRQ vector, * so IRQ-flags tracing has to follow that: diff --git a/arch/x86/entry/entry_64.S b/arch/x86/entry/entry_64.S index 3b7a0e8d3bc0..15f0749d0a15 100644 --- a/arch/x86/entry/entry_64.S +++ b/arch/x86/entry/entry_64.S @@ -375,6 +375,18 @@ ENTRY(irq_entries_start) .endr END(irq_entries_start) + .align 8 +ENTRY(spurious_entries_start) + vector=FIRST_SYSTEM_VECTOR + .rept (NR_VECTORS - FIRST_SYSTEM_VECTOR) + UNWIND_HINT_IRET_REGS + pushq $(~vector+0x80) /* Note: always in signed byte range */ + jmp common_spurious + .align 8 + vector=vector+1 + .endr +END(spurious_entries_start) + .macro DEBUG_ENTRY_ASSERT_IRQS_OFF #ifdef CONFIG_DEBUG_ENTRY pushq %rax @@ -571,10 +583,20 @@ _ASM_NOKPROBE(interrupt_entry) /* Interrupt entry/exit. */ - /* - * The interrupt stubs push (~vector+0x80) onto the stack and - * then jump to common_interrupt. - */ +/* + * The interrupt stubs push (~vector+0x80) onto the stack and + * then jump to common_spurious/interrupt. + */ +common_spurious: + addq $-0x80, (%rsp) /* Adjust vector to [-256, -1] range */ + call interrupt_entry + UNWIND_HINT_REGS indirect=1 + call smp_spurious_interrupt /* rdi points to pt_regs */ + jmp ret_from_intr +END(common_spurious) +_ASM_NOKPROBE(common_spurious) + +/* common_interrupt is a hotpath. Align it */ .p2align CONFIG_X86_L1_CACHE_SHIFT common_interrupt: addq $-0x80, (%rsp) /* Adjust vector to [-256, -1] range */ diff --git a/arch/x86/entry/thunk_32.S b/arch/x86/entry/thunk_32.S index fee6bc79b987..cb3464525b37 100644 --- a/arch/x86/entry/thunk_32.S +++ b/arch/x86/entry/thunk_32.S @@ -1,8 +1,8 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ /* * Trampoline to trace irqs off. (otherwise CALLER_ADDR1 might crash) * Copyright 2008 by Steven Rostedt, Red Hat, Inc * (inspired by Andi Kleen's thunk_64.S) - * Subject to the GNU public license, v.2. No warranty of any kind. */ #include <linux/linkage.h> #include <asm/asm.h> diff --git a/arch/x86/entry/thunk_64.S b/arch/x86/entry/thunk_64.S index be36bf4e0957..cfdca8b42c70 100644 --- a/arch/x86/entry/thunk_64.S +++ b/arch/x86/entry/thunk_64.S @@ -1,9 +1,9 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ /* * Save registers before calling assembly functions. This avoids * disturbance of register allocation in some inline assembly constructs. * Copyright 2001,2002 by Andi Kleen, SuSE Labs. * Added trace_hardirqs callers - Copyright 2007 Steven Rostedt, Red Hat, Inc. - * Subject to the GNU public license, v.2. No warranty of any kind. */ #include <linux/linkage.h> #include "calling.h" diff --git a/arch/x86/entry/vdso/Makefile b/arch/x86/entry/vdso/Makefile index 42fe42e82baf..39106111be86 100644 --- a/arch/x86/entry/vdso/Makefile +++ b/arch/x86/entry/vdso/Makefile @@ -3,6 +3,12 @@ # Building vDSO images for x86. # +# Absolute relocation type $(ARCH_REL_TYPE_ABS) needs to be defined before +# the inclusion of generic Makefile. +ARCH_REL_TYPE_ABS := R_X86_64_JUMP_SLOT|R_X86_64_GLOB_DAT|R_X86_64_RELATIVE| +ARCH_REL_TYPE_ABS += R_386_GLOB_DAT|R_386_JMP_SLOT|R_386_RELATIVE +include $(srctree)/lib/vdso/Makefile + KBUILD_CFLAGS += $(DISABLE_LTO) KASAN_SANITIZE := n UBSAN_SANITIZE := n @@ -51,6 +57,7 @@ VDSO_LDFLAGS_vdso.lds = -m elf_x86_64 -soname linux-vdso.so.1 --no-undefined \ $(obj)/vdso64.so.dbg: $(obj)/vdso.lds $(vobjs) FORCE $(call if_changed,vdso) + $(call if_changed,vdso_check) HOST_EXTRACFLAGS += -I$(srctree)/tools/include -I$(srctree)/include/uapi -I$(srctree)/arch/$(SUBARCH)/include/uapi hostprogs-y += vdso2c @@ -121,6 +128,7 @@ $(obj)/%.so: $(obj)/%.so.dbg FORCE $(obj)/vdsox32.so.dbg: $(obj)/vdsox32.lds $(vobjx32s) FORCE $(call if_changed,vdso) + $(call if_changed,vdso_check) CPPFLAGS_vdso32.lds = $(CPPFLAGS_vdso.lds) VDSO_LDFLAGS_vdso32.lds = -m elf_i386 -soname linux-gate.so.1 @@ -160,6 +168,7 @@ $(obj)/vdso32.so.dbg: FORCE \ $(obj)/vdso32/system_call.o \ $(obj)/vdso32/sigreturn.o $(call if_changed,vdso) + $(call if_changed,vdso_check) # # The DSO images are built using a special linker script. diff --git a/arch/x86/entry/vdso/vclock_gettime.c b/arch/x86/entry/vdso/vclock_gettime.c index 0f82a70c7682..d9ff616bb0f6 100644 --- a/arch/x86/entry/vdso/vclock_gettime.c +++ b/arch/x86/entry/vdso/vclock_gettime.c @@ -1,240 +1,85 @@ // SPDX-License-Identifier: GPL-2.0-only /* - * Copyright 2006 Andi Kleen, SUSE Labs. - * * Fast user context implementation of clock_gettime, gettimeofday, and time. * + * Copyright 2006 Andi Kleen, SUSE Labs. + * Copyright 2019 ARM Limited + * * 32 Bit compat layer by Stefani Seibold <stefani@seibold.net> * sponsored by Rohde & Schwarz GmbH & Co. KG Munich/Germany - * - * The code should have no internal unresolved relocations. - * Check with readelf after changing. */ - -#include <uapi/linux/time.h> -#include <asm/vgtod.h> -#include <asm/vvar.h> -#include <asm/unistd.h> -#include <asm/msr.h> -#include <asm/pvclock.h> -#include <asm/mshyperv.h> -#include <linux/math64.h> #include <linux/time.h> #include <linux/kernel.h> +#include <linux/types.h> -#define gtod (&VVAR(vsyscall_gtod_data)) +#include "../../../../lib/vdso/gettimeofday.c" -extern int __vdso_clock_gettime(clockid_t clock, struct timespec *ts); -extern int __vdso_gettimeofday(struct timeval *tv, struct timezone *tz); +extern int __vdso_gettimeofday(struct __kernel_old_timeval *tv, struct timezone *tz); extern time_t __vdso_time(time_t *t); -#ifdef CONFIG_PARAVIRT_CLOCK -extern u8 pvclock_page[PAGE_SIZE] - __attribute__((visibility("hidden"))); -#endif - -#ifdef CONFIG_HYPERV_TSCPAGE -extern u8 hvclock_page[PAGE_SIZE] - __attribute__((visibility("hidden"))); -#endif - -#ifndef BUILD_VDSO32 - -notrace static long vdso_fallback_gettime(long clock, struct timespec *ts) +int __vdso_gettimeofday(struct __kernel_old_timeval *tv, struct timezone *tz) { - long ret; - asm ("syscall" : "=a" (ret), "=m" (*ts) : - "0" (__NR_clock_gettime), "D" (clock), "S" (ts) : - "rcx", "r11"); - return ret; + return __cvdso_gettimeofday(tv, tz); } -#else +int gettimeofday(struct __kernel_old_timeval *, struct timezone *) + __attribute__((weak, alias("__vdso_gettimeofday"))); -notrace static long vdso_fallback_gettime(long clock, struct timespec *ts) +time_t __vdso_time(time_t *t) { - long ret; - - asm ( - "mov %%ebx, %%edx \n" - "mov %[clock], %%ebx \n" - "call __kernel_vsyscall \n" - "mov %%edx, %%ebx \n" - : "=a" (ret), "=m" (*ts) - : "0" (__NR_clock_gettime), [clock] "g" (clock), "c" (ts) - : "edx"); - return ret; + return __cvdso_time(t); } -#endif +time_t time(time_t *t) __attribute__((weak, alias("__vdso_time"))); -#ifdef CONFIG_PARAVIRT_CLOCK -static notrace const struct pvclock_vsyscall_time_info *get_pvti0(void) -{ - return (const struct pvclock_vsyscall_time_info *)&pvclock_page; -} -static notrace u64 vread_pvclock(void) -{ - const struct pvclock_vcpu_time_info *pvti = &get_pvti0()->pvti; - u32 version; - u64 ret; - - /* - * Note: The kernel and hypervisor must guarantee that cpu ID - * number maps 1:1 to per-CPU pvclock time info. - * - * Because the hypervisor is entirely unaware of guest userspace - * preemption, it cannot guarantee that per-CPU pvclock time - * info is updated if the underlying CPU changes or that that - * version is increased whenever underlying CPU changes. - * - * On KVM, we are guaranteed that pvti updates for any vCPU are - * atomic as seen by *all* vCPUs. This is an even stronger - * guarantee than we get with a normal seqlock. - * - * On Xen, we don't appear to have that guarantee, but Xen still - * supplies a valid seqlock using the version field. - * - * We only do pvclock vdso timing at all if - * PVCLOCK_TSC_STABLE_BIT is set, and we interpret that bit to - * mean that all vCPUs have matching pvti and that the TSC is - * synced, so we can just look at vCPU 0's pvti. - */ - - do { - version = pvclock_read_begin(pvti); - - if (unlikely(!(pvti->flags & PVCLOCK_TSC_STABLE_BIT))) - return U64_MAX; - - ret = __pvclock_read_cycles(pvti, rdtsc_ordered()); - } while (pvclock_read_retry(pvti, version)); - - return ret; -} -#endif -#ifdef CONFIG_HYPERV_TSCPAGE -static notrace u64 vread_hvclock(void) -{ - const struct ms_hyperv_tsc_page *tsc_pg = - (const struct ms_hyperv_tsc_page *)&hvclock_page; +#if defined(CONFIG_X86_64) && !defined(BUILD_VDSO32_64) +/* both 64-bit and x32 use these */ +extern int __vdso_clock_gettime(clockid_t clock, struct __kernel_timespec *ts); +extern int __vdso_clock_getres(clockid_t clock, struct __kernel_timespec *res); - return hv_read_tsc_page(tsc_pg); -} -#endif - -notrace static inline u64 vgetcyc(int mode) +int __vdso_clock_gettime(clockid_t clock, struct __kernel_timespec *ts) { - if (mode == VCLOCK_TSC) - return (u64)rdtsc_ordered(); -#ifdef CONFIG_PARAVIRT_CLOCK - else if (mode == VCLOCK_PVCLOCK) - return vread_pvclock(); -#endif -#ifdef CONFIG_HYPERV_TSCPAGE - else if (mode == VCLOCK_HVCLOCK) - return vread_hvclock(); -#endif - return U64_MAX; + return __cvdso_clock_gettime(clock, ts); } -notrace static int do_hres(clockid_t clk, struct timespec *ts) -{ - struct vgtod_ts *base = >od->basetime[clk]; - u64 cycles, last, sec, ns; - unsigned int seq; - - do { - seq = gtod_read_begin(gtod); - cycles = vgetcyc(gtod->vclock_mode); - ns = base->nsec; - last = gtod->cycle_last; - if (unlikely((s64)cycles < 0)) - return vdso_fallback_gettime(clk, ts); - if (cycles > last) - ns += (cycles - last) * gtod->mult; - ns >>= gtod->shift; - sec = base->sec; - } while (unlikely(gtod_read_retry(gtod, seq))); - - /* - * Do this outside the loop: a race inside the loop could result - * in __iter_div_u64_rem() being extremely slow. - */ - ts->tv_sec = sec + __iter_div_u64_rem(ns, NSEC_PER_SEC, &ns); - ts->tv_nsec = ns; - - return 0; -} +int clock_gettime(clockid_t, struct __kernel_timespec *) + __attribute__((weak, alias("__vdso_clock_gettime"))); -notrace static void do_coarse(clockid_t clk, struct timespec *ts) +int __vdso_clock_getres(clockid_t clock, + struct __kernel_timespec *res) { - struct vgtod_ts *base = >od->basetime[clk]; - unsigned int seq; - - do { - seq = gtod_read_begin(gtod); - ts->tv_sec = base->sec; - ts->tv_nsec = base->nsec; - } while (unlikely(gtod_read_retry(gtod, seq))); + return __cvdso_clock_getres(clock, res); } +int clock_getres(clockid_t, struct __kernel_timespec *) + __attribute__((weak, alias("__vdso_clock_getres"))); -notrace int __vdso_clock_gettime(clockid_t clock, struct timespec *ts) +#else +/* i386 only */ +extern int __vdso_clock_gettime(clockid_t clock, struct old_timespec32 *ts); +extern int __vdso_clock_getres(clockid_t clock, struct old_timespec32 *res); + +int __vdso_clock_gettime(clockid_t clock, struct old_timespec32 *ts) { - unsigned int msk; - - /* Sort out negative (CPU/FD) and invalid clocks */ - if (unlikely((unsigned int) clock >= MAX_CLOCKS)) - return vdso_fallback_gettime(clock, ts); - - /* - * Convert the clockid to a bitmask and use it to check which - * clocks are handled in the VDSO directly. - */ - msk = 1U << clock; - if (likely(msk & VGTOD_HRES)) { - return do_hres(clock, ts); - } else if (msk & VGTOD_COARSE) { - do_coarse(clock, ts); - return 0; - } - return vdso_fallback_gettime(clock, ts); + return __cvdso_clock_gettime32(clock, ts); } -int clock_gettime(clockid_t, struct timespec *) +int clock_gettime(clockid_t, struct old_timespec32 *) __attribute__((weak, alias("__vdso_clock_gettime"))); -notrace int __vdso_gettimeofday(struct timeval *tv, struct timezone *tz) +int __vdso_clock_gettime64(clockid_t clock, struct __kernel_timespec *ts) { - if (likely(tv != NULL)) { - struct timespec *ts = (struct timespec *) tv; - - do_hres(CLOCK_REALTIME, ts); - tv->tv_usec /= 1000; - } - if (unlikely(tz != NULL)) { - tz->tz_minuteswest = gtod->tz_minuteswest; - tz->tz_dsttime = gtod->tz_dsttime; - } - - return 0; + return __cvdso_clock_gettime(clock, ts); } -int gettimeofday(struct timeval *, struct timezone *) - __attribute__((weak, alias("__vdso_gettimeofday"))); -/* - * This will break when the xtime seconds get inaccurate, but that is - * unlikely - */ -notrace time_t __vdso_time(time_t *t) -{ - /* This is atomic on x86 so we don't need any locks. */ - time_t result = READ_ONCE(gtod->basetime[CLOCK_REALTIME].sec); +int clock_gettime64(clockid_t, struct __kernel_timespec *) + __attribute__((weak, alias("__vdso_clock_gettime64"))); - if (t) - *t = result; - return result; +int __vdso_clock_getres(clockid_t clock, struct old_timespec32 *res) +{ + return __cvdso_clock_getres_time32(clock, res); } -time_t time(time_t *t) - __attribute__((weak, alias("__vdso_time"))); + +int clock_getres(clockid_t, struct old_timespec32 *) + __attribute__((weak, alias("__vdso_clock_getres"))); +#endif diff --git a/arch/x86/entry/vdso/vdso.lds.S b/arch/x86/entry/vdso/vdso.lds.S index d3a2dce4cfa9..36b644e16272 100644 --- a/arch/x86/entry/vdso/vdso.lds.S +++ b/arch/x86/entry/vdso/vdso.lds.S @@ -25,6 +25,8 @@ VERSION { __vdso_getcpu; time; __vdso_time; + clock_getres; + __vdso_clock_getres; local: *; }; } diff --git a/arch/x86/entry/vdso/vdso32/vdso32.lds.S b/arch/x86/entry/vdso/vdso32/vdso32.lds.S index 422764a81d32..c7720995ab1a 100644 --- a/arch/x86/entry/vdso/vdso32/vdso32.lds.S +++ b/arch/x86/entry/vdso/vdso32/vdso32.lds.S @@ -26,6 +26,8 @@ VERSION __vdso_clock_gettime; __vdso_gettimeofday; __vdso_time; + __vdso_clock_getres; + __vdso_clock_gettime64; }; LINUX_2.5 { diff --git a/arch/x86/entry/vdso/vdsox32.lds.S b/arch/x86/entry/vdso/vdsox32.lds.S index 05cd1c5c4a15..16a8050a4fb6 100644 --- a/arch/x86/entry/vdso/vdsox32.lds.S +++ b/arch/x86/entry/vdso/vdsox32.lds.S @@ -21,6 +21,7 @@ VERSION { __vdso_gettimeofday; __vdso_getcpu; __vdso_time; + __vdso_clock_getres; local: *; }; } diff --git a/arch/x86/entry/vdso/vma.c b/arch/x86/entry/vdso/vma.c index 8db1f594e8b1..349a61d8bf34 100644 --- a/arch/x86/entry/vdso/vma.c +++ b/arch/x86/entry/vdso/vma.c @@ -22,7 +22,7 @@ #include <asm/page.h> #include <asm/desc.h> #include <asm/cpufeature.h> -#include <asm/mshyperv.h> +#include <clocksource/hyperv_timer.h> #if defined(CONFIG_X86_64) unsigned int __read_mostly vdso64_enabled = 1; diff --git a/arch/x86/entry/vsyscall/Makefile b/arch/x86/entry/vsyscall/Makefile index 1ac4dd116c26..93c1b3e949a7 100644 --- a/arch/x86/entry/vsyscall/Makefile +++ b/arch/x86/entry/vsyscall/Makefile @@ -2,7 +2,5 @@ # # Makefile for the x86 low level vsyscall code # -obj-y := vsyscall_gtod.o - obj-$(CONFIG_X86_VSYSCALL_EMULATION) += vsyscall_64.o vsyscall_emu_64.o diff --git a/arch/x86/entry/vsyscall/vsyscall_64.c b/arch/x86/entry/vsyscall/vsyscall_64.c index d9d81ad7a400..07003f3f1bfc 100644 --- a/arch/x86/entry/vsyscall/vsyscall_64.c +++ b/arch/x86/entry/vsyscall/vsyscall_64.c @@ -42,9 +42,11 @@ #define CREATE_TRACE_POINTS #include "vsyscall_trace.h" -static enum { EMULATE, NONE } vsyscall_mode = +static enum { EMULATE, XONLY, NONE } vsyscall_mode __ro_after_init = #ifdef CONFIG_LEGACY_VSYSCALL_NONE NONE; +#elif defined(CONFIG_LEGACY_VSYSCALL_XONLY) + XONLY; #else EMULATE; #endif @@ -54,6 +56,8 @@ static int __init vsyscall_setup(char *str) if (str) { if (!strcmp("emulate", str)) vsyscall_mode = EMULATE; + else if (!strcmp("xonly", str)) + vsyscall_mode = XONLY; else if (!strcmp("none", str)) vsyscall_mode = NONE; else @@ -113,7 +117,8 @@ static bool write_ok_or_segv(unsigned long ptr, size_t size) } } -bool emulate_vsyscall(struct pt_regs *regs, unsigned long address) +bool emulate_vsyscall(unsigned long error_code, + struct pt_regs *regs, unsigned long address) { struct task_struct *tsk; unsigned long caller; @@ -122,6 +127,22 @@ bool emulate_vsyscall(struct pt_regs *regs, unsigned long address) long ret; unsigned long orig_dx; + /* Write faults or kernel-privilege faults never get fixed up. */ + if ((error_code & (X86_PF_WRITE | X86_PF_USER)) != X86_PF_USER) + return false; + + if (!(error_code & X86_PF_INSTR)) { + /* Failed vsyscall read */ + if (vsyscall_mode == EMULATE) + return false; + + /* + * User code tried and failed to read the vsyscall page. + */ + warn_bad_vsyscall(KERN_INFO, regs, "vsyscall read attempt denied -- look up the vsyscall kernel parameter if you need a workaround"); + return false; + } + /* * No point in checking CS -- the only way to get here is a user mode * trap to a high address, which means that we're in 64-bit user code. @@ -284,7 +305,7 @@ static const char *gate_vma_name(struct vm_area_struct *vma) static const struct vm_operations_struct gate_vma_ops = { .name = gate_vma_name, }; -static struct vm_area_struct gate_vma = { +static struct vm_area_struct gate_vma __ro_after_init = { .vm_start = VSYSCALL_ADDR, .vm_end = VSYSCALL_ADDR + PAGE_SIZE, .vm_page_prot = PAGE_READONLY_EXEC, @@ -357,12 +378,20 @@ void __init map_vsyscall(void) extern char __vsyscall_page; unsigned long physaddr_vsyscall = __pa_symbol(&__vsyscall_page); - if (vsyscall_mode != NONE) { + /* + * For full emulation, the page needs to exist for real. In + * execute-only mode, there is no PTE at all backing the vsyscall + * page. + */ + if (vsyscall_mode == EMULATE) { __set_fixmap(VSYSCALL_PAGE, physaddr_vsyscall, PAGE_KERNEL_VVAR); set_vsyscall_pgtable_user_bits(swapper_pg_dir); } + if (vsyscall_mode == XONLY) + gate_vma.vm_flags = VM_EXEC; + BUILD_BUG_ON((unsigned long)__fix_to_virt(VSYSCALL_PAGE) != (unsigned long)VSYSCALL_ADDR); } diff --git a/arch/x86/entry/vsyscall/vsyscall_gtod.c b/arch/x86/entry/vsyscall/vsyscall_gtod.c deleted file mode 100644 index cfcdba082feb..000000000000 --- a/arch/x86/entry/vsyscall/vsyscall_gtod.c +++ /dev/null @@ -1,83 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0 -/* - * Copyright (C) 2001 Andrea Arcangeli <andrea@suse.de> SuSE - * Copyright 2003 Andi Kleen, SuSE Labs. - * - * Modified for x86 32 bit architecture by - * Stefani Seibold <stefani@seibold.net> - * sponsored by Rohde & Schwarz GmbH & Co. KG Munich/Germany - * - * Thanks to hpa@transmeta.com for some useful hint. - * Special thanks to Ingo Molnar for his early experience with - * a different vsyscall implementation for Linux/IA32 and for the name. - * - */ - -#include <linux/timekeeper_internal.h> -#include <asm/vgtod.h> -#include <asm/vvar.h> - -int vclocks_used __read_mostly; - -DEFINE_VVAR(struct vsyscall_gtod_data, vsyscall_gtod_data); - -void update_vsyscall_tz(void) -{ - vsyscall_gtod_data.tz_minuteswest = sys_tz.tz_minuteswest; - vsyscall_gtod_data.tz_dsttime = sys_tz.tz_dsttime; -} - -void update_vsyscall(struct timekeeper *tk) -{ - int vclock_mode = tk->tkr_mono.clock->archdata.vclock_mode; - struct vsyscall_gtod_data *vdata = &vsyscall_gtod_data; - struct vgtod_ts *base; - u64 nsec; - - /* Mark the new vclock used. */ - BUILD_BUG_ON(VCLOCK_MAX >= 32); - WRITE_ONCE(vclocks_used, READ_ONCE(vclocks_used) | (1 << vclock_mode)); - - gtod_write_begin(vdata); - - /* copy vsyscall data */ - vdata->vclock_mode = vclock_mode; - vdata->cycle_last = tk->tkr_mono.cycle_last; - vdata->mask = tk->tkr_mono.mask; - vdata->mult = tk->tkr_mono.mult; - vdata->shift = tk->tkr_mono.shift; - - base = &vdata->basetime[CLOCK_REALTIME]; - base->sec = tk->xtime_sec; - base->nsec = tk->tkr_mono.xtime_nsec; - - base = &vdata->basetime[CLOCK_TAI]; - base->sec = tk->xtime_sec + (s64)tk->tai_offset; - base->nsec = tk->tkr_mono.xtime_nsec; - - base = &vdata->basetime[CLOCK_MONOTONIC]; - base->sec = tk->xtime_sec + tk->wall_to_monotonic.tv_sec; - nsec = tk->tkr_mono.xtime_nsec; - nsec += ((u64)tk->wall_to_monotonic.tv_nsec << tk->tkr_mono.shift); - while (nsec >= (((u64)NSEC_PER_SEC) << tk->tkr_mono.shift)) { - nsec -= ((u64)NSEC_PER_SEC) << tk->tkr_mono.shift; - base->sec++; - } - base->nsec = nsec; - - base = &vdata->basetime[CLOCK_REALTIME_COARSE]; - base->sec = tk->xtime_sec; - base->nsec = tk->tkr_mono.xtime_nsec >> tk->tkr_mono.shift; - - base = &vdata->basetime[CLOCK_MONOTONIC_COARSE]; - base->sec = tk->xtime_sec + tk->wall_to_monotonic.tv_sec; - nsec = tk->tkr_mono.xtime_nsec >> tk->tkr_mono.shift; - nsec += tk->wall_to_monotonic.tv_nsec; - while (nsec >= NSEC_PER_SEC) { - nsec -= NSEC_PER_SEC; - base->sec++; - } - base->nsec = nsec; - - gtod_write_end(vdata); -} diff --git a/arch/x86/events/amd/iommu.c b/arch/x86/events/amd/iommu.c index 58a6993d7eb3..fb616203ce42 100644 --- a/arch/x86/events/amd/iommu.c +++ b/arch/x86/events/amd/iommu.c @@ -1,3 +1,4 @@ +// SPDX-License-Identifier: GPL-2.0-only /* * Copyright (C) 2013 Advanced Micro Devices, Inc. * @@ -5,10 +6,6 @@ * Author: Suravee Suthikulpanit <Suraveee.Suthikulpanit@amd.com> * * Perf: amd_iommu - AMD IOMMU Performance Counter PMU implementation - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License version 2 as - * published by the Free Software Foundation. */ #define pr_fmt(fmt) "perf/amd_iommu: " fmt diff --git a/arch/x86/events/amd/iommu.h b/arch/x86/events/amd/iommu.h index 62e0702c4374..0e5c036fd7be 100644 --- a/arch/x86/events/amd/iommu.h +++ b/arch/x86/events/amd/iommu.h @@ -1,12 +1,9 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ /* * Copyright (C) 2013 Advanced Micro Devices, Inc. * * Author: Steven Kinney <Steven.Kinney@amd.com> * Author: Suravee Suthikulpanit <Suraveee.Suthikulpanit@amd.com> - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License version 2 as - * published by the Free Software Foundation. */ #ifndef _PERF_EVENT_AMD_IOMMU_H_ diff --git a/arch/x86/events/amd/power.c b/arch/x86/events/amd/power.c index c5ff084551c6..abef51320e3a 100644 --- a/arch/x86/events/amd/power.c +++ b/arch/x86/events/amd/power.c @@ -1,13 +1,10 @@ +// SPDX-License-Identifier: GPL-2.0-only /* * Performance events - AMD Processor Power Reporting Mechanism * * Copyright (C) 2016 Advanced Micro Devices, Inc. * * Author: Huang Rui <ray.huang@amd.com> - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License version 2 as - * published by the Free Software Foundation. */ #include <linux/module.h> diff --git a/arch/x86/events/amd/uncore.c b/arch/x86/events/amd/uncore.c index 79cfd3b30ceb..85e6984c560b 100644 --- a/arch/x86/events/amd/uncore.c +++ b/arch/x86/events/amd/uncore.c @@ -1,11 +1,8 @@ +// SPDX-License-Identifier: GPL-2.0-only /* * Copyright (C) 2013 Advanced Micro Devices, Inc. * * Author: Jacob Shin <jacob.shin@amd.com> - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License version 2 as - * published by the Free Software Foundation. */ #include <linux/perf_event.h> diff --git a/arch/x86/events/core.c b/arch/x86/events/core.c index f315425d8468..3cd94a21bd53 100644 --- a/arch/x86/events/core.c +++ b/arch/x86/events/core.c @@ -561,14 +561,14 @@ int x86_pmu_hw_config(struct perf_event *event) } /* sample_regs_user never support XMM registers */ - if (unlikely(event->attr.sample_regs_user & PEBS_XMM_REGS)) + if (unlikely(event->attr.sample_regs_user & PERF_REG_EXTENDED_MASK)) return -EINVAL; /* * Besides the general purpose registers, XMM registers may * be collected in PEBS on some platforms, e.g. Icelake */ - if (unlikely(event->attr.sample_regs_intr & PEBS_XMM_REGS)) { - if (x86_pmu.pebs_no_xmm_regs) + if (unlikely(event->attr.sample_regs_intr & PERF_REG_EXTENDED_MASK)) { + if (!(event->pmu->capabilities & PERF_PMU_CAP_EXTENDED_REGS)) return -EINVAL; if (!event->attr.precise_ip) @@ -2402,13 +2402,13 @@ perf_callchain_kernel(struct perf_callchain_entry_ctx *entry, struct pt_regs *re return; } - if (perf_hw_regs(regs)) { - if (perf_callchain_store(entry, regs->ip)) - return; + if (perf_callchain_store(entry, regs->ip)) + return; + + if (perf_hw_regs(regs)) unwind_start(&state, current, regs, NULL); - } else { + else unwind_start(&state, current, NULL, (void *)regs->sp); - } for (; !unwind_done(&state); unwind_next_frame(&state)) { addr = unwind_get_return_address(&state); diff --git a/arch/x86/events/intel/ds.c b/arch/x86/events/intel/ds.c index 7acc526b4ad2..505c73dc6a73 100644 --- a/arch/x86/events/intel/ds.c +++ b/arch/x86/events/intel/ds.c @@ -987,7 +987,7 @@ static u64 pebs_update_adaptive_cfg(struct perf_event *event) pebs_data_cfg |= PEBS_DATACFG_GP; if ((sample_type & PERF_SAMPLE_REGS_INTR) && - (attr->sample_regs_intr & PEBS_XMM_REGS)) + (attr->sample_regs_intr & PERF_REG_EXTENDED_MASK)) pebs_data_cfg |= PEBS_DATACFG_XMMS; if (sample_type & PERF_SAMPLE_BRANCH_STACK) { @@ -1964,10 +1964,9 @@ void __init intel_ds_init(void) x86_pmu.bts = boot_cpu_has(X86_FEATURE_BTS); x86_pmu.pebs = boot_cpu_has(X86_FEATURE_PEBS); x86_pmu.pebs_buffer_size = PEBS_BUFFER_SIZE; - if (x86_pmu.version <= 4) { + if (x86_pmu.version <= 4) x86_pmu.pebs_no_isolation = 1; - x86_pmu.pebs_no_xmm_regs = 1; - } + if (x86_pmu.pebs) { char pebs_type = x86_pmu.intel_cap.pebs_trap ? '+' : '-'; char *pebs_qual = ""; @@ -2020,9 +2019,9 @@ void __init intel_ds_init(void) PERF_SAMPLE_TIME; x86_pmu.flags |= PMU_FL_PEBS_ALL; pebs_qual = "-baseline"; + x86_get_pmu()->capabilities |= PERF_PMU_CAP_EXTENDED_REGS; } else { /* Only basic record supported */ - x86_pmu.pebs_no_xmm_regs = 1; x86_pmu.large_pebs_flags &= ~(PERF_SAMPLE_ADDR | PERF_SAMPLE_TIME | diff --git a/arch/x86/events/perf_event.h b/arch/x86/events/perf_event.h index a6ac2f4f76fc..4e346856ee19 100644 --- a/arch/x86/events/perf_event.h +++ b/arch/x86/events/perf_event.h @@ -121,24 +121,6 @@ struct amd_nb { (1ULL << PERF_REG_X86_R14) | \ (1ULL << PERF_REG_X86_R15)) -#define PEBS_XMM_REGS \ - ((1ULL << PERF_REG_X86_XMM0) | \ - (1ULL << PERF_REG_X86_XMM1) | \ - (1ULL << PERF_REG_X86_XMM2) | \ - (1ULL << PERF_REG_X86_XMM3) | \ - (1ULL << PERF_REG_X86_XMM4) | \ - (1ULL << PERF_REG_X86_XMM5) | \ - (1ULL << PERF_REG_X86_XMM6) | \ - (1ULL << PERF_REG_X86_XMM7) | \ - (1ULL << PERF_REG_X86_XMM8) | \ - (1ULL << PERF_REG_X86_XMM9) | \ - (1ULL << PERF_REG_X86_XMM10) | \ - (1ULL << PERF_REG_X86_XMM11) | \ - (1ULL << PERF_REG_X86_XMM12) | \ - (1ULL << PERF_REG_X86_XMM13) | \ - (1ULL << PERF_REG_X86_XMM14) | \ - (1ULL << PERF_REG_X86_XMM15)) - /* * Per register state. */ @@ -668,8 +650,7 @@ struct x86_pmu { pebs_broken :1, pebs_prec_dist :1, pebs_no_tlb :1, - pebs_no_isolation :1, - pebs_no_xmm_regs :1; + pebs_no_isolation :1; int pebs_record_size; int pebs_buffer_size; int max_pebs_events; diff --git a/arch/x86/hyperv/hv_init.c b/arch/x86/hyperv/hv_init.c index 1608050e9df9..0e033ef11a9f 100644 --- a/arch/x86/hyperv/hv_init.c +++ b/arch/x86/hyperv/hv_init.c @@ -17,64 +17,13 @@ #include <linux/version.h> #include <linux/vmalloc.h> #include <linux/mm.h> -#include <linux/clockchips.h> #include <linux/hyperv.h> #include <linux/slab.h> #include <linux/cpuhotplug.h> - -#ifdef CONFIG_HYPERV_TSCPAGE - -static struct ms_hyperv_tsc_page *tsc_pg; - -struct ms_hyperv_tsc_page *hv_get_tsc_page(void) -{ - return tsc_pg; -} -EXPORT_SYMBOL_GPL(hv_get_tsc_page); - -static u64 read_hv_clock_tsc(struct clocksource *arg) -{ - u64 current_tick = hv_read_tsc_page(tsc_pg); - - if (current_tick == U64_MAX) - rdmsrl(HV_X64_MSR_TIME_REF_COUNT, current_tick); - - return current_tick; -} - -static struct clocksource hyperv_cs_tsc = { - .name = "hyperv_clocksource_tsc_page", - .rating = 400, - .read = read_hv_clock_tsc, - .mask = CLOCKSOURCE_MASK(64), - .flags = CLOCK_SOURCE_IS_CONTINUOUS, -}; -#endif - -static u64 read_hv_clock_msr(struct clocksource *arg) -{ - u64 current_tick; - /* - * Read the partition counter to get the current tick count. This count - * is set to 0 when the partition is created and is incremented in - * 100 nanosecond units. - */ - rdmsrl(HV_X64_MSR_TIME_REF_COUNT, current_tick); - return current_tick; -} - -static struct clocksource hyperv_cs_msr = { - .name = "hyperv_clocksource_msr", - .rating = 400, - .read = read_hv_clock_msr, - .mask = CLOCKSOURCE_MASK(64), - .flags = CLOCK_SOURCE_IS_CONTINUOUS, -}; +#include <clocksource/hyperv_timer.h> void *hv_hypercall_pg; EXPORT_SYMBOL_GPL(hv_hypercall_pg); -struct clocksource *hyperv_cs; -EXPORT_SYMBOL_GPL(hyperv_cs); u32 *hv_vp_index; EXPORT_SYMBOL_GPL(hv_vp_index); @@ -343,42 +292,8 @@ void __init hyperv_init(void) x86_init.pci.arch_init = hv_pci_init; - /* - * Register Hyper-V specific clocksource. - */ -#ifdef CONFIG_HYPERV_TSCPAGE - if (ms_hyperv.features & HV_MSR_REFERENCE_TSC_AVAILABLE) { - union hv_x64_msr_hypercall_contents tsc_msr; - - tsc_pg = __vmalloc(PAGE_SIZE, GFP_KERNEL, PAGE_KERNEL); - if (!tsc_pg) - goto register_msr_cs; - - hyperv_cs = &hyperv_cs_tsc; - - rdmsrl(HV_X64_MSR_REFERENCE_TSC, tsc_msr.as_uint64); - - tsc_msr.enable = 1; - tsc_msr.guest_physical_address = vmalloc_to_pfn(tsc_pg); - - wrmsrl(HV_X64_MSR_REFERENCE_TSC, tsc_msr.as_uint64); - - hyperv_cs_tsc.archdata.vclock_mode = VCLOCK_HVCLOCK; - - clocksource_register_hz(&hyperv_cs_tsc, NSEC_PER_SEC/100); - return; - } -register_msr_cs: -#endif - /* - * For 32 bit guests just use the MSR based mechanism for reading - * the partition counter. - */ - - hyperv_cs = &hyperv_cs_msr; - if (ms_hyperv.features & HV_MSR_TIME_REF_COUNT_AVAILABLE) - clocksource_register_hz(&hyperv_cs_msr, NSEC_PER_SEC/100); - + /* Register Hyper-V specific clocksource */ + hv_init_clocksource(); return; remove_cpuhp_state: diff --git a/arch/x86/include/asm/acenv.h b/arch/x86/include/asm/acenv.h index 1b010a859b8b..9aff97f0de7f 100644 --- a/arch/x86/include/asm/acenv.h +++ b/arch/x86/include/asm/acenv.h @@ -1,12 +1,9 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ /* * X86 specific ACPICA environments and implementation * * Copyright (C) 2014, Intel Corporation * Author: Lv Zheng <lv.zheng@intel.com> - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License version 2 as - * published by the Free Software Foundation. */ #ifndef _ASM_X86_ACENV_H diff --git a/arch/x86/include/asm/apic.h b/arch/x86/include/asm/apic.h index 1340fa53b575..050e5f9ebf81 100644 --- a/arch/x86/include/asm/apic.h +++ b/arch/x86/include/asm/apic.h @@ -53,7 +53,7 @@ extern unsigned int apic_verbosity; extern int local_apic_timer_c2_ok; extern int disable_apic; -extern unsigned int lapic_timer_frequency; +extern unsigned int lapic_timer_period; extern enum apic_intr_mode_id apic_intr_mode; enum apic_intr_mode_id { @@ -155,7 +155,6 @@ static inline int apic_force_enable(unsigned long addr) extern int apic_force_enable(unsigned long addr); #endif -extern void apic_bsp_setup(bool upmode); extern void apic_ap_setup(void); /* @@ -175,6 +174,7 @@ extern void lapic_assign_system_vectors(void); extern void lapic_assign_legacy_vector(unsigned int isairq, bool replace); extern void lapic_online(void); extern void lapic_offline(void); +extern bool apic_needs_pit(void); #else /* !CONFIG_X86_LOCAL_APIC */ static inline void lapic_shutdown(void) { } @@ -188,6 +188,7 @@ static inline void init_bsp_APIC(void) { } static inline void apic_intr_mode_init(void) { } static inline void lapic_assign_system_vectors(void) { } static inline void lapic_assign_legacy_vector(unsigned int i, bool r) { } +static inline bool apic_needs_pit(void) { return true; } #endif /* !CONFIG_X86_LOCAL_APIC */ #ifdef CONFIG_X86_X2APIC diff --git a/arch/x86/include/asm/fpu/internal.h b/arch/x86/include/asm/fpu/internal.h index 9e27fa05a7ae..4c95c365058a 100644 --- a/arch/x86/include/asm/fpu/internal.h +++ b/arch/x86/include/asm/fpu/internal.h @@ -536,7 +536,7 @@ static inline void __fpregs_load_activate(void) struct fpu *fpu = ¤t->thread.fpu; int cpu = smp_processor_id(); - if (WARN_ON_ONCE(current->mm == NULL)) + if (WARN_ON_ONCE(current->flags & PF_KTHREAD)) return; if (!fpregs_state_valid(fpu, cpu)) { @@ -567,11 +567,11 @@ static inline void __fpregs_load_activate(void) * otherwise. * * The FPU context is only stored/restored for a user task and - * ->mm is used to distinguish between kernel and user threads. + * PF_KTHREAD is used to distinguish between kernel and user threads. */ static inline void switch_fpu_prepare(struct fpu *old_fpu, int cpu) { - if (static_cpu_has(X86_FEATURE_FPU) && current->mm) { + if (static_cpu_has(X86_FEATURE_FPU) && !(current->flags & PF_KTHREAD)) { if (!copy_fpregs_to_fpstate(old_fpu)) old_fpu->last_cpu = -1; else diff --git a/arch/x86/include/asm/fpu/xstate.h b/arch/x86/include/asm/fpu/xstate.h index 7e42b285c856..c6136d79f8c0 100644 --- a/arch/x86/include/asm/fpu/xstate.h +++ b/arch/x86/include/asm/fpu/xstate.h @@ -47,7 +47,6 @@ extern u64 xstate_fx_sw_bytes[USER_XSTATE_FX_SW_WORDS]; extern void __init update_regset_xstate_info(unsigned int size, u64 xstate_mask); -void fpu__xstate_clear_all_cpu_caps(void); void *get_xsave_addr(struct xregs_state *xsave, int xfeature_nr); const void *get_xsave_field_ptr(int xfeature_nr); int using_compacted_format(void); diff --git a/arch/x86/include/asm/hw_irq.h b/arch/x86/include/asm/hw_irq.h index 32e666e1231e..cbd97e22d2f3 100644 --- a/arch/x86/include/asm/hw_irq.h +++ b/arch/x86/include/asm/hw_irq.h @@ -150,8 +150,11 @@ extern char irq_entries_start[]; #define trace_irq_entries_start irq_entries_start #endif +extern char spurious_entries_start[]; + #define VECTOR_UNUSED NULL -#define VECTOR_RETRIGGERED ((void *)~0UL) +#define VECTOR_SHUTDOWN ((void *)~0UL) +#define VECTOR_RETRIGGERED ((void *)~1UL) typedef struct irq_desc* vector_irq_t[NR_VECTORS]; DECLARE_PER_CPU(vector_irq_t, vector_irq); diff --git a/arch/x86/include/asm/hyperv-tlfs.h b/arch/x86/include/asm/hyperv-tlfs.h index cdf44aa9a501..af78cd72b8f3 100644 --- a/arch/x86/include/asm/hyperv-tlfs.h +++ b/arch/x86/include/asm/hyperv-tlfs.h @@ -401,6 +401,12 @@ enum HV_GENERIC_SET_FORMAT { #define HV_STATUS_INVALID_CONNECTION_ID 18 #define HV_STATUS_INSUFFICIENT_BUFFERS 19 +/* + * The Hyper-V TimeRefCount register and the TSC + * page provide a guest VM clock with 100ns tick rate + */ +#define HV_CLOCK_HZ (NSEC_PER_SEC/100) + typedef struct _HV_REFERENCE_TSC_PAGE { __u32 tsc_sequence; __u32 res1; diff --git a/arch/x86/include/asm/intel-family.h b/arch/x86/include/asm/intel-family.h index c92a367a4a7a..0278aa66ef62 100644 --- a/arch/x86/include/asm/intel-family.h +++ b/arch/x86/include/asm/intel-family.h @@ -52,6 +52,9 @@ #define INTEL_FAM6_CANNONLAKE_MOBILE 0x66 +#define INTEL_FAM6_ICELAKE_X 0x6A +#define INTEL_FAM6_ICELAKE_XEON_D 0x6C +#define INTEL_FAM6_ICELAKE_DESKTOP 0x7D #define INTEL_FAM6_ICELAKE_MOBILE 0x7E #define INTEL_FAM6_ICELAKE_NNPI 0x9D diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h index 450d69a1e6fa..26d1eb83f72a 100644 --- a/arch/x86/include/asm/kvm_host.h +++ b/arch/x86/include/asm/kvm_host.h @@ -1,11 +1,8 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ /* * Kernel-based Virtual Machine driver for Linux * * This header defines architecture specific interfaces, x86 version - * - * This work is licensed under the terms of the GNU GPL, version 2. See - * the COPYING file in the top-level directory. - * */ #ifndef _ASM_X86_KVM_HOST_H diff --git a/arch/x86/include/asm/mem_encrypt.h b/arch/x86/include/asm/mem_encrypt.h index 616f8e637bc3..0c196c47d621 100644 --- a/arch/x86/include/asm/mem_encrypt.h +++ b/arch/x86/include/asm/mem_encrypt.h @@ -1,13 +1,10 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ /* * AMD Memory Encryption Support * * Copyright (C) 2016 Advanced Micro Devices, Inc. * * Author: Tom Lendacky <thomas.lendacky@amd.com> - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License version 2 as - * published by the Free Software Foundation. */ #ifndef __X86_MEM_ENCRYPT_H__ diff --git a/arch/x86/include/asm/mshyperv.h b/arch/x86/include/asm/mshyperv.h index cc60e617931c..f4fa8a9d5d0b 100644 --- a/arch/x86/include/asm/mshyperv.h +++ b/arch/x86/include/asm/mshyperv.h @@ -105,6 +105,17 @@ static inline void vmbus_signal_eom(struct hv_message *msg, u32 old_msg_type) #define hv_get_crash_ctl(val) \ rdmsrl(HV_X64_MSR_CRASH_CTL, val) +#define hv_get_time_ref_count(val) \ + rdmsrl(HV_X64_MSR_TIME_REF_COUNT, val) + +#define hv_get_reference_tsc(val) \ + rdmsrl(HV_X64_MSR_REFERENCE_TSC, val) +#define hv_set_reference_tsc(val) \ + wrmsrl(HV_X64_MSR_REFERENCE_TSC, val) +#define hv_set_clocksource_vdso(val) \ + ((val).archdata.vclock_mode = VCLOCK_HVCLOCK) +#define hv_get_raw_timer() rdtsc_ordered() + void hyperv_callback_vector(void); void hyperv_reenlightenment_vector(void); #ifdef CONFIG_TRACING @@ -133,7 +144,6 @@ static inline void hv_disable_stimer0_percpu_irq(int irq) {} #if IS_ENABLED(CONFIG_HYPERV) -extern struct clocksource *hyperv_cs; extern void *hv_hypercall_pg; extern void __percpu **hyperv_pcpu_input_arg; @@ -387,73 +397,4 @@ static inline int hyperv_flush_guest_mapping_range(u64 as, } #endif /* CONFIG_HYPERV */ -#ifdef CONFIG_HYPERV_TSCPAGE -struct ms_hyperv_tsc_page *hv_get_tsc_page(void); -static inline u64 hv_read_tsc_page_tsc(const struct ms_hyperv_tsc_page *tsc_pg, - u64 *cur_tsc) -{ - u64 scale, offset; - u32 sequence; - - /* - * The protocol for reading Hyper-V TSC page is specified in Hypervisor - * Top-Level Functional Specification ver. 3.0 and above. To get the - * reference time we must do the following: - * - READ ReferenceTscSequence - * A special '0' value indicates the time source is unreliable and we - * need to use something else. The currently published specification - * versions (up to 4.0b) contain a mistake and wrongly claim '-1' - * instead of '0' as the special value, see commit c35b82ef0294. - * - ReferenceTime = - * ((RDTSC() * ReferenceTscScale) >> 64) + ReferenceTscOffset - * - READ ReferenceTscSequence again. In case its value has changed - * since our first reading we need to discard ReferenceTime and repeat - * the whole sequence as the hypervisor was updating the page in - * between. - */ - do { - sequence = READ_ONCE(tsc_pg->tsc_sequence); - if (!sequence) - return U64_MAX; - /* - * Make sure we read sequence before we read other values from - * TSC page. - */ - smp_rmb(); - - scale = READ_ONCE(tsc_pg->tsc_scale); - offset = READ_ONCE(tsc_pg->tsc_offset); - *cur_tsc = rdtsc_ordered(); - - /* - * Make sure we read sequence after we read all other values - * from TSC page. - */ - smp_rmb(); - - } while (READ_ONCE(tsc_pg->tsc_sequence) != sequence); - - return mul_u64_u64_shr(*cur_tsc, scale, 64) + offset; -} - -static inline u64 hv_read_tsc_page(const struct ms_hyperv_tsc_page *tsc_pg) -{ - u64 cur_tsc; - - return hv_read_tsc_page_tsc(tsc_pg, &cur_tsc); -} - -#else -static inline struct ms_hyperv_tsc_page *hv_get_tsc_page(void) -{ - return NULL; -} - -static inline u64 hv_read_tsc_page_tsc(const struct ms_hyperv_tsc_page *tsc_pg, - u64 *cur_tsc) -{ - BUG(); - return U64_MAX; -} -#endif #endif diff --git a/arch/x86/include/asm/pvclock.h b/arch/x86/include/asm/pvclock.h index b6033680d458..19b695ff2c68 100644 --- a/arch/x86/include/asm/pvclock.h +++ b/arch/x86/include/asm/pvclock.h @@ -2,7 +2,7 @@ #ifndef _ASM_X86_PVCLOCK_H #define _ASM_X86_PVCLOCK_H -#include <linux/clocksource.h> +#include <asm/clocksource.h> #include <asm/pvclock-abi.h> /* some helper functions for xen and kvm pv clock sources */ diff --git a/arch/x86/include/asm/time.h b/arch/x86/include/asm/time.h index cef818b16045..8ac563abb567 100644 --- a/arch/x86/include/asm/time.h +++ b/arch/x86/include/asm/time.h @@ -7,6 +7,7 @@ extern void hpet_time_init(void); extern void time_init(void); +extern bool pit_timer_init(void); extern struct clock_event_device *global_clock_event; diff --git a/arch/x86/include/asm/vdso/gettimeofday.h b/arch/x86/include/asm/vdso/gettimeofday.h new file mode 100644 index 000000000000..ae91429129a6 --- /dev/null +++ b/arch/x86/include/asm/vdso/gettimeofday.h @@ -0,0 +1,261 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * Fast user context implementation of clock_gettime, gettimeofday, and time. + * + * Copyright (C) 2019 ARM Limited. + * Copyright 2006 Andi Kleen, SUSE Labs. + * 32 Bit compat layer by Stefani Seibold <stefani@seibold.net> + * sponsored by Rohde & Schwarz GmbH & Co. KG Munich/Germany + */ +#ifndef __ASM_VDSO_GETTIMEOFDAY_H +#define __ASM_VDSO_GETTIMEOFDAY_H + +#ifndef __ASSEMBLY__ + +#include <uapi/linux/time.h> +#include <asm/vgtod.h> +#include <asm/vvar.h> +#include <asm/unistd.h> +#include <asm/msr.h> +#include <asm/pvclock.h> +#include <clocksource/hyperv_timer.h> + +#define __vdso_data (VVAR(_vdso_data)) + +#define VDSO_HAS_TIME 1 + +#define VDSO_HAS_CLOCK_GETRES 1 + +/* + * Declare the memory-mapped vclock data pages. These come from hypervisors. + * If we ever reintroduce something like direct access to an MMIO clock like + * the HPET again, it will go here as well. + * + * A load from any of these pages will segfault if the clock in question is + * disabled, so appropriate compiler barriers and checks need to be used + * to prevent stray loads. + * + * These declarations MUST NOT be const. The compiler will assume that + * an extern const variable has genuinely constant contents, and the + * resulting code won't work, since the whole point is that these pages + * change over time, possibly while we're accessing them. + */ + +#ifdef CONFIG_PARAVIRT_CLOCK +/* + * This is the vCPU 0 pvclock page. We only use pvclock from the vDSO + * if the hypervisor tells us that all vCPUs can get valid data from the + * vCPU 0 page. + */ +extern struct pvclock_vsyscall_time_info pvclock_page + __attribute__((visibility("hidden"))); +#endif + +#ifdef CONFIG_HYPERV_TSCPAGE +extern struct ms_hyperv_tsc_page hvclock_page + __attribute__((visibility("hidden"))); +#endif + +#ifndef BUILD_VDSO32 + +static __always_inline +long clock_gettime_fallback(clockid_t _clkid, struct __kernel_timespec *_ts) +{ + long ret; + + asm ("syscall" : "=a" (ret), "=m" (*_ts) : + "0" (__NR_clock_gettime), "D" (_clkid), "S" (_ts) : + "rcx", "r11"); + + return ret; +} + +static __always_inline +long gettimeofday_fallback(struct __kernel_old_timeval *_tv, + struct timezone *_tz) +{ + long ret; + + asm("syscall" : "=a" (ret) : + "0" (__NR_gettimeofday), "D" (_tv), "S" (_tz) : "memory"); + + return ret; +} + +static __always_inline +long clock_getres_fallback(clockid_t _clkid, struct __kernel_timespec *_ts) +{ + long ret; + + asm ("syscall" : "=a" (ret), "=m" (*_ts) : + "0" (__NR_clock_getres), "D" (_clkid), "S" (_ts) : + "rcx", "r11"); + + return ret; +} + +#else + +static __always_inline +long clock_gettime_fallback(clockid_t _clkid, struct __kernel_timespec *_ts) +{ + long ret; + + asm ( + "mov %%ebx, %%edx \n" + "mov %[clock], %%ebx \n" + "call __kernel_vsyscall \n" + "mov %%edx, %%ebx \n" + : "=a" (ret), "=m" (*_ts) + : "0" (__NR_clock_gettime64), [clock] "g" (_clkid), "c" (_ts) + : "edx"); + + return ret; +} + +static __always_inline +long gettimeofday_fallback(struct __kernel_old_timeval *_tv, + struct timezone *_tz) +{ + long ret; + + asm( + "mov %%ebx, %%edx \n" + "mov %2, %%ebx \n" + "call __kernel_vsyscall \n" + "mov %%edx, %%ebx \n" + : "=a" (ret) + : "0" (__NR_gettimeofday), "g" (_tv), "c" (_tz) + : "memory", "edx"); + + return ret; +} + +static __always_inline long +clock_getres_fallback(clockid_t _clkid, struct __kernel_timespec *_ts) +{ + long ret; + + asm ( + "mov %%ebx, %%edx \n" + "mov %[clock], %%ebx \n" + "call __kernel_vsyscall \n" + "mov %%edx, %%ebx \n" + : "=a" (ret), "=m" (*_ts) + : "0" (__NR_clock_getres_time64), [clock] "g" (_clkid), "c" (_ts) + : "edx"); + + return ret; +} + +#endif + +#ifdef CONFIG_PARAVIRT_CLOCK +static u64 vread_pvclock(void) +{ + const struct pvclock_vcpu_time_info *pvti = &pvclock_page.pvti; + u32 version; + u64 ret; + + /* + * Note: The kernel and hypervisor must guarantee that cpu ID + * number maps 1:1 to per-CPU pvclock time info. + * + * Because the hypervisor is entirely unaware of guest userspace + * preemption, it cannot guarantee that per-CPU pvclock time + * info is updated if the underlying CPU changes or that that + * version is increased whenever underlying CPU changes. + * + * On KVM, we are guaranteed that pvti updates for any vCPU are + * atomic as seen by *all* vCPUs. This is an even stronger + * guarantee than we get with a normal seqlock. + * + * On Xen, we don't appear to have that guarantee, but Xen still + * supplies a valid seqlock using the version field. + * + * We only do pvclock vdso timing at all if + * PVCLOCK_TSC_STABLE_BIT is set, and we interpret that bit to + * mean that all vCPUs have matching pvti and that the TSC is + * synced, so we can just look at vCPU 0's pvti. + */ + + do { + version = pvclock_read_begin(pvti); + + if (unlikely(!(pvti->flags & PVCLOCK_TSC_STABLE_BIT))) + return U64_MAX; + + ret = __pvclock_read_cycles(pvti, rdtsc_ordered()); + } while (pvclock_read_retry(pvti, version)); + + return ret; +} +#endif + +#ifdef CONFIG_HYPERV_TSCPAGE +static u64 vread_hvclock(void) +{ + return hv_read_tsc_page(&hvclock_page); +} +#endif + +static inline u64 __arch_get_hw_counter(s32 clock_mode) +{ + if (clock_mode == VCLOCK_TSC) + return (u64)rdtsc_ordered(); + /* + * For any memory-mapped vclock type, we need to make sure that gcc + * doesn't cleverly hoist a load before the mode check. Otherwise we + * might end up touching the memory-mapped page even if the vclock in + * question isn't enabled, which will segfault. Hence the barriers. + */ +#ifdef CONFIG_PARAVIRT_CLOCK + if (clock_mode == VCLOCK_PVCLOCK) { + barrier(); + return vread_pvclock(); + } +#endif +#ifdef CONFIG_HYPERV_TSCPAGE + if (clock_mode == VCLOCK_HVCLOCK) { + barrier(); + return vread_hvclock(); + } +#endif + return U64_MAX; +} + +static __always_inline const struct vdso_data *__arch_get_vdso_data(void) +{ + return __vdso_data; +} + +/* + * x86 specific delta calculation. + * + * The regular implementation assumes that clocksource reads are globally + * monotonic. The TSC can be slightly off across sockets which can cause + * the regular delta calculation (@cycles - @last) to return a huge time + * jump. + * + * Therefore it needs to be verified that @cycles are greater than + * @last. If not then use @last, which is the base time of the current + * conversion period. + * + * This variant also removes the masking of the subtraction because the + * clocksource mask of all VDSO capable clocksources on x86 is U64_MAX + * which would result in a pointless operation. The compiler cannot + * optimize it away as the mask comes from the vdso data and is not compile + * time constant. + */ +static __always_inline +u64 vdso_calc_delta(u64 cycles, u64 last, u64 mask, u32 mult) +{ + if (cycles > last) + return (cycles - last) * mult; + return 0; +} +#define vdso_calc_delta vdso_calc_delta + +#endif /* !__ASSEMBLY__ */ + +#endif /* __ASM_VDSO_GETTIMEOFDAY_H */ diff --git a/arch/x86/include/asm/vdso/vsyscall.h b/arch/x86/include/asm/vdso/vsyscall.h new file mode 100644 index 000000000000..0026ab2123ce --- /dev/null +++ b/arch/x86/include/asm/vdso/vsyscall.h @@ -0,0 +1,44 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef __ASM_VDSO_VSYSCALL_H +#define __ASM_VDSO_VSYSCALL_H + +#ifndef __ASSEMBLY__ + +#include <linux/hrtimer.h> +#include <linux/timekeeper_internal.h> +#include <vdso/datapage.h> +#include <asm/vgtod.h> +#include <asm/vvar.h> + +int vclocks_used __read_mostly; + +DEFINE_VVAR(struct vdso_data, _vdso_data); +/* + * Update the vDSO data page to keep in sync with kernel timekeeping. + */ +static __always_inline +struct vdso_data *__x86_get_k_vdso_data(void) +{ + return _vdso_data; +} +#define __arch_get_k_vdso_data __x86_get_k_vdso_data + +static __always_inline +int __x86_get_clock_mode(struct timekeeper *tk) +{ + int vclock_mode = tk->tkr_mono.clock->archdata.vclock_mode; + + /* Mark the new vclock used. */ + BUILD_BUG_ON(VCLOCK_MAX >= 32); + WRITE_ONCE(vclocks_used, READ_ONCE(vclocks_used) | (1 << vclock_mode)); + + return vclock_mode; +} +#define __arch_get_clock_mode __x86_get_clock_mode + +/* The asm-generic header needs to be included after the definitions above */ +#include <asm-generic/vdso/vsyscall.h> + +#endif /* !__ASSEMBLY__ */ + +#endif /* __ASM_VDSO_VSYSCALL_H */ diff --git a/arch/x86/include/asm/vgtod.h b/arch/x86/include/asm/vgtod.h index 913a133f8e6f..a2638c6124ed 100644 --- a/arch/x86/include/asm/vgtod.h +++ b/arch/x86/include/asm/vgtod.h @@ -3,7 +3,9 @@ #define _ASM_X86_VGTOD_H #include <linux/compiler.h> -#include <linux/clocksource.h> +#include <asm/clocksource.h> +#include <vdso/datapage.h> +#include <vdso/helpers.h> #include <uapi/linux/time.h> @@ -13,81 +15,10 @@ typedef u64 gtod_long_t; typedef unsigned long gtod_long_t; #endif -/* - * There is one of these objects in the vvar page for each - * vDSO-accelerated clockid. For high-resolution clocks, this encodes - * the time corresponding to vsyscall_gtod_data.cycle_last. For coarse - * clocks, this encodes the actual time. - * - * To confuse the reader, for high-resolution clocks, nsec is left-shifted - * by vsyscall_gtod_data.shift. - */ -struct vgtod_ts { - u64 sec; - u64 nsec; -}; - -#define VGTOD_BASES (CLOCK_TAI + 1) -#define VGTOD_HRES (BIT(CLOCK_REALTIME) | BIT(CLOCK_MONOTONIC) | BIT(CLOCK_TAI)) -#define VGTOD_COARSE (BIT(CLOCK_REALTIME_COARSE) | BIT(CLOCK_MONOTONIC_COARSE)) - -/* - * vsyscall_gtod_data will be accessed by 32 and 64 bit code at the same time - * so be carefull by modifying this structure. - */ -struct vsyscall_gtod_data { - unsigned int seq; - - int vclock_mode; - u64 cycle_last; - u64 mask; - u32 mult; - u32 shift; - - struct vgtod_ts basetime[VGTOD_BASES]; - - int tz_minuteswest; - int tz_dsttime; -}; -extern struct vsyscall_gtod_data vsyscall_gtod_data; - extern int vclocks_used; static inline bool vclock_was_used(int vclock) { return READ_ONCE(vclocks_used) & (1 << vclock); } -static inline unsigned int gtod_read_begin(const struct vsyscall_gtod_data *s) -{ - unsigned int ret; - -repeat: - ret = READ_ONCE(s->seq); - if (unlikely(ret & 1)) { - cpu_relax(); - goto repeat; - } - smp_rmb(); - return ret; -} - -static inline int gtod_read_retry(const struct vsyscall_gtod_data *s, - unsigned int start) -{ - smp_rmb(); - return unlikely(s->seq != start); -} - -static inline void gtod_write_begin(struct vsyscall_gtod_data *s) -{ - ++s->seq; - smp_wmb(); -} - -static inline void gtod_write_end(struct vsyscall_gtod_data *s) -{ - smp_wmb(); - ++s->seq; -} - #endif /* _ASM_X86_VGTOD_H */ diff --git a/arch/x86/include/asm/virtext.h b/arch/x86/include/asm/virtext.h index 1fc7a0d1e877..9aad0e0876fb 100644 --- a/arch/x86/include/asm/virtext.h +++ b/arch/x86/include/asm/virtext.h @@ -1,3 +1,4 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ /* CPU virtualization extensions handling * * This should carry the code for handling CPU virtualization extensions @@ -8,9 +9,6 @@ * Copyright (C) 2008, Red Hat Inc. * * Contains code from KVM, Copyright (C) 2006 Qumranet, Inc. - * - * This work is licensed under the terms of the GNU GPL, version 2. See - * the COPYING file in the top-level directory. */ #ifndef _ASM_X86_VIRTEX_H #define _ASM_X86_VIRTEX_H diff --git a/arch/x86/include/asm/vsyscall.h b/arch/x86/include/asm/vsyscall.h index b986b2ca688a..ab60a71a8dcb 100644 --- a/arch/x86/include/asm/vsyscall.h +++ b/arch/x86/include/asm/vsyscall.h @@ -13,10 +13,12 @@ extern void set_vsyscall_pgtable_user_bits(pgd_t *root); * Called on instruction fetch fault in vsyscall page. * Returns true if handled. */ -extern bool emulate_vsyscall(struct pt_regs *regs, unsigned long address); +extern bool emulate_vsyscall(unsigned long error_code, + struct pt_regs *regs, unsigned long address); #else static inline void map_vsyscall(void) {} -static inline bool emulate_vsyscall(struct pt_regs *regs, unsigned long address) +static inline bool emulate_vsyscall(unsigned long error_code, + struct pt_regs *regs, unsigned long address) { return false; } diff --git a/arch/x86/include/asm/vvar.h b/arch/x86/include/asm/vvar.h index e474f5c6e387..32f5d9a0b90e 100644 --- a/arch/x86/include/asm/vvar.h +++ b/arch/x86/include/asm/vvar.h @@ -32,19 +32,20 @@ extern char __vvar_page; #define DECLARE_VVAR(offset, type, name) \ - extern type vvar_ ## name __attribute__((visibility("hidden"))); + extern type vvar_ ## name[CS_BASES] \ + __attribute__((visibility("hidden"))); #define VVAR(name) (vvar_ ## name) #define DEFINE_VVAR(type, name) \ - type name \ + type name[CS_BASES] \ __attribute__((section(".vvar_" #name), aligned(16))) __visible #endif /* DECLARE_VVAR(offset, type, name) */ -DECLARE_VVAR(128, struct vsyscall_gtod_data, vsyscall_gtod_data) +DECLARE_VVAR(128, struct vdso_data, _vdso_data) #undef DECLARE_VVAR diff --git a/arch/x86/include/uapi/asm/kvm.h b/arch/x86/include/uapi/asm/kvm.h index 7a0e64ccd6ff..d6ab5b4d15e5 100644 --- a/arch/x86/include/uapi/asm/kvm.h +++ b/arch/x86/include/uapi/asm/kvm.h @@ -383,6 +383,9 @@ struct kvm_sync_regs { #define KVM_X86_QUIRK_LAPIC_MMIO_HOLE (1 << 2) #define KVM_X86_QUIRK_OUT_7E_INC_RIP (1 << 3) +#define KVM_STATE_NESTED_FORMAT_VMX 0 +#define KVM_STATE_NESTED_FORMAT_SVM 1 /* unused */ + #define KVM_STATE_NESTED_GUEST_MODE 0x00000001 #define KVM_STATE_NESTED_RUN_PENDING 0x00000002 #define KVM_STATE_NESTED_EVMCS 0x00000004 @@ -390,9 +393,16 @@ struct kvm_sync_regs { #define KVM_STATE_NESTED_SMM_GUEST_MODE 0x00000001 #define KVM_STATE_NESTED_SMM_VMXON 0x00000002 -struct kvm_vmx_nested_state { +#define KVM_STATE_NESTED_VMX_VMCS_SIZE 0x1000 + +struct kvm_vmx_nested_state_data { + __u8 vmcs12[KVM_STATE_NESTED_VMX_VMCS_SIZE]; + __u8 shadow_vmcs12[KVM_STATE_NESTED_VMX_VMCS_SIZE]; +}; + +struct kvm_vmx_nested_state_hdr { __u64 vmxon_pa; - __u64 vmcs_pa; + __u64 vmcs12_pa; struct { __u16 flags; @@ -401,24 +411,25 @@ struct kvm_vmx_nested_state { /* for KVM_CAP_NESTED_STATE */ struct kvm_nested_state { - /* KVM_STATE_* flags */ __u16 flags; - - /* 0 for VMX, 1 for SVM. */ __u16 format; - - /* 128 for SVM, 128 + VMCS size for VMX. */ __u32 size; union { - /* VMXON, VMCS */ - struct kvm_vmx_nested_state vmx; + struct kvm_vmx_nested_state_hdr vmx; /* Pad the header to 128 bytes. */ __u8 pad[120]; - }; + } hdr; - __u8 data[0]; + /* + * Define data region as 0 bytes to preserve backwards-compatability + * to old definition of kvm_nested_state in order to avoid changing + * KVM_{GET,PUT}_NESTED_STATE ioctl values. + */ + union { + struct kvm_vmx_nested_state_data vmx[0]; + } data; }; #endif /* _ASM_X86_KVM_H */ diff --git a/arch/x86/include/uapi/asm/perf_regs.h b/arch/x86/include/uapi/asm/perf_regs.h index ac67bbea10ca..7c9d2bb3833b 100644 --- a/arch/x86/include/uapi/asm/perf_regs.h +++ b/arch/x86/include/uapi/asm/perf_regs.h @@ -52,4 +52,7 @@ enum perf_event_x86_regs { /* These include both GPRs and XMMX registers */ PERF_REG_X86_XMM_MAX = PERF_REG_X86_XMM15 + 2, }; + +#define PERF_REG_EXTENDED_MASK (~((1ULL << PERF_REG_X86_XMM0) - 1)) + #endif /* _ASM_X86_PERF_REGS_H */ diff --git a/arch/x86/kernel/amd_nb.c b/arch/x86/kernel/amd_nb.c index cc51275c8759..002aedc69393 100644 --- a/arch/x86/kernel/amd_nb.c +++ b/arch/x86/kernel/amd_nb.c @@ -1,6 +1,7 @@ +// SPDX-License-Identifier: GPL-2.0-only /* * Shared support code for AMD K8 northbridges and derivates. - * Copyright 2006 Andi Kleen, SUSE Labs. Subject to GPLv2. + * Copyright 2006 Andi Kleen, SUSE Labs. */ #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt diff --git a/arch/x86/kernel/apic/apic.c b/arch/x86/kernel/apic/apic.c index 177aa8ef2afa..1bd91cb7b320 100644 --- a/arch/x86/kernel/apic/apic.c +++ b/arch/x86/kernel/apic/apic.c @@ -195,7 +195,7 @@ static struct resource lapic_resource = { .flags = IORESOURCE_MEM | IORESOURCE_BUSY, }; -unsigned int lapic_timer_frequency = 0; +unsigned int lapic_timer_period = 0; static void apic_pm_activate(void); @@ -501,7 +501,7 @@ lapic_timer_set_periodic_oneshot(struct clock_event_device *evt, bool oneshot) if (evt->features & CLOCK_EVT_FEAT_DUMMY) return 0; - __setup_APIC_LVTT(lapic_timer_frequency, oneshot, 1); + __setup_APIC_LVTT(lapic_timer_period, oneshot, 1); return 0; } @@ -805,11 +805,11 @@ calibrate_by_pmtimer(long deltapm, long *delta, long *deltatsc) static int __init lapic_init_clockevent(void) { - if (!lapic_timer_frequency) + if (!lapic_timer_period) return -1; /* Calculate the scaled math multiplication factor */ - lapic_clockevent.mult = div_sc(lapic_timer_frequency/APIC_DIVISOR, + lapic_clockevent.mult = div_sc(lapic_timer_period/APIC_DIVISOR, TICK_NSEC, lapic_clockevent.shift); lapic_clockevent.max_delta_ns = clockevent_delta2ns(0x7FFFFFFF, &lapic_clockevent); @@ -821,6 +821,33 @@ static int __init lapic_init_clockevent(void) return 0; } +bool __init apic_needs_pit(void) +{ + /* + * If the frequencies are not known, PIT is required for both TSC + * and apic timer calibration. + */ + if (!tsc_khz || !cpu_khz) + return true; + + /* Is there an APIC at all? */ + if (!boot_cpu_has(X86_FEATURE_APIC)) + return true; + + /* Deadline timer is based on TSC so no further PIT action required */ + if (boot_cpu_has(X86_FEATURE_TSC_DEADLINE_TIMER)) + return false; + + /* APIC timer disabled? */ + if (disable_apic_timer) + return true; + /* + * The APIC timer frequency is known already, no PIT calibration + * required. If unknown, let the PIT be initialized. + */ + return lapic_timer_period == 0; +} + static int __init calibrate_APIC_clock(void) { struct clock_event_device *levt = this_cpu_ptr(&lapic_events); @@ -839,7 +866,7 @@ static int __init calibrate_APIC_clock(void) */ if (!lapic_init_clockevent()) { apic_printk(APIC_VERBOSE, "lapic timer already calibrated %d\n", - lapic_timer_frequency); + lapic_timer_period); /* * Direct calibration methods must have an always running * local APIC timer, no need for broadcast timer. @@ -884,13 +911,13 @@ static int __init calibrate_APIC_clock(void) pm_referenced = !calibrate_by_pmtimer(lapic_cal_pm2 - lapic_cal_pm1, &delta, &deltatsc); - lapic_timer_frequency = (delta * APIC_DIVISOR) / LAPIC_CAL_LOOPS; + lapic_timer_period = (delta * APIC_DIVISOR) / LAPIC_CAL_LOOPS; lapic_init_clockevent(); apic_printk(APIC_VERBOSE, "..... delta %ld\n", delta); apic_printk(APIC_VERBOSE, "..... mult: %u\n", lapic_clockevent.mult); apic_printk(APIC_VERBOSE, "..... calibration result: %u\n", - lapic_timer_frequency); + lapic_timer_period); if (boot_cpu_has(X86_FEATURE_TSC)) { apic_printk(APIC_VERBOSE, "..... CPU clock speed is " @@ -901,13 +928,13 @@ static int __init calibrate_APIC_clock(void) apic_printk(APIC_VERBOSE, "..... host bus clock speed is " "%u.%04u MHz.\n", - lapic_timer_frequency / (1000000 / HZ), - lapic_timer_frequency % (1000000 / HZ)); + lapic_timer_period / (1000000 / HZ), + lapic_timer_period % (1000000 / HZ)); /* * Do a sanity check on the APIC calibration result */ - if (lapic_timer_frequency < (1000000 / HZ)) { + if (lapic_timer_period < (1000000 / HZ)) { local_irq_enable(); pr_warning("APIC frequency too slow, disabling apic timer\n"); return -1; @@ -1351,6 +1378,8 @@ void __init init_bsp_APIC(void) apic_write(APIC_LVT1, value); } +static void __init apic_bsp_setup(bool upmode); + /* Init the interrupt delivery mode for the BSP */ void __init apic_intr_mode_init(void) { @@ -1464,7 +1493,8 @@ static void apic_pending_intr_clear(void) if (queued) { if (boot_cpu_has(X86_FEATURE_TSC) && cpu_khz) { ntsc = rdtsc(); - max_loops = (cpu_khz << 10) - (ntsc - tsc); + max_loops = (long long)cpu_khz << 10; + max_loops -= ntsc - tsc; } else { max_loops--; } @@ -2040,21 +2070,32 @@ __visible void __irq_entry smp_spurious_interrupt(struct pt_regs *regs) entering_irq(); trace_spurious_apic_entry(vector); + inc_irq_stat(irq_spurious_count); + + /* + * If this is a spurious interrupt then do not acknowledge + */ + if (vector == SPURIOUS_APIC_VECTOR) { + /* See SDM vol 3 */ + pr_info("Spurious APIC interrupt (vector 0xFF) on CPU#%d, should never happen.\n", + smp_processor_id()); + goto out; + } + /* - * Check if this really is a spurious interrupt and ACK it - * if it is a vectored one. Just in case... - * Spurious interrupts should not be ACKed. + * If it is a vectored one, verify it's set in the ISR. If set, + * acknowledge it. */ v = apic_read(APIC_ISR + ((vector & ~0x1f) >> 1)); - if (v & (1 << (vector & 0x1f))) + if (v & (1 << (vector & 0x1f))) { + pr_info("Spurious interrupt (vector 0x%02x) on CPU#%d. Acked\n", + vector, smp_processor_id()); ack_APIC_irq(); - - inc_irq_stat(irq_spurious_count); - - /* see sw-dev-man vol 3, chapter 7.4.13.5 */ - pr_info("spurious APIC interrupt through vector %02x on CPU#%d, " - "should never happen.\n", vector, smp_processor_id()); - + } else { + pr_info("Spurious interrupt (vector 0x%02x) on CPU#%d. Not pending!\n", + vector, smp_processor_id()); + } +out: trace_spurious_apic_exit(vector); exiting_irq(); } @@ -2415,11 +2456,8 @@ static void __init apic_bsp_up_setup(void) /** * apic_bsp_setup - Setup function for local apic and io-apic * @upmode: Force UP mode (for APIC_init_uniprocessor) - * - * Returns: - * apic_id of BSP APIC */ -void __init apic_bsp_setup(bool upmode) +static void __init apic_bsp_setup(bool upmode) { connect_bsp_APIC(); if (upmode) diff --git a/arch/x86/kernel/apic/apic_flat_64.c b/arch/x86/kernel/apic/apic_flat_64.c index bf083c3f1d73..bbdca603f94a 100644 --- a/arch/x86/kernel/apic/apic_flat_64.c +++ b/arch/x86/kernel/apic/apic_flat_64.c @@ -78,7 +78,7 @@ flat_send_IPI_mask_allbutself(const struct cpumask *cpumask, int vector) int cpu = smp_processor_id(); if (cpu < BITS_PER_LONG) - clear_bit(cpu, &mask); + __clear_bit(cpu, &mask); _flat_send_IPI_mask(mask, vector); } @@ -92,7 +92,7 @@ static void flat_send_IPI_allbutself(int vector) unsigned long mask = cpumask_bits(cpu_online_mask)[0]; if (cpu < BITS_PER_LONG) - clear_bit(cpu, &mask); + __clear_bit(cpu, &mask); _flat_send_IPI_mask(mask, vector); } diff --git a/arch/x86/kernel/apic/io_apic.c b/arch/x86/kernel/apic/io_apic.c index 53aa234a6803..c7bb6c69f21c 100644 --- a/arch/x86/kernel/apic/io_apic.c +++ b/arch/x86/kernel/apic/io_apic.c @@ -58,6 +58,7 @@ #include <asm/acpi.h> #include <asm/dma.h> #include <asm/timer.h> +#include <asm/time.h> #include <asm/i8259.h> #include <asm/setup.h> #include <asm/irq_remapping.h> @@ -1893,6 +1894,50 @@ static int ioapic_set_affinity(struct irq_data *irq_data, return ret; } +/* + * Interrupt shutdown masks the ioapic pin, but the interrupt might already + * be in flight, but not yet serviced by the target CPU. That means + * __synchronize_hardirq() would return and claim that everything is calmed + * down. So free_irq() would proceed and deactivate the interrupt and free + * resources. + * + * Once the target CPU comes around to service it it will find a cleared + * vector and complain. While the spurious interrupt is harmless, the full + * release of resources might prevent the interrupt from being acknowledged + * which keeps the hardware in a weird state. + * + * Verify that the corresponding Remote-IRR bits are clear. + */ +static int ioapic_irq_get_chip_state(struct irq_data *irqd, + enum irqchip_irq_state which, + bool *state) +{ + struct mp_chip_data *mcd = irqd->chip_data; + struct IO_APIC_route_entry rentry; + struct irq_pin_list *p; + + if (which != IRQCHIP_STATE_ACTIVE) + return -EINVAL; + + *state = false; + raw_spin_lock(&ioapic_lock); + for_each_irq_pin(p, mcd->irq_2_pin) { + rentry = __ioapic_read_entry(p->apic, p->pin); + /* + * The remote IRR is only valid in level trigger mode. It's + * meaning is undefined for edge triggered interrupts and + * irrelevant because the IO-APIC treats them as fire and + * forget. + */ + if (rentry.irr && rentry.trigger) { + *state = true; + break; + } + } + raw_spin_unlock(&ioapic_lock); + return 0; +} + static struct irq_chip ioapic_chip __read_mostly = { .name = "IO-APIC", .irq_startup = startup_ioapic_irq, @@ -1902,6 +1947,7 @@ static struct irq_chip ioapic_chip __read_mostly = { .irq_eoi = ioapic_ack_level, .irq_set_affinity = ioapic_set_affinity, .irq_retrigger = irq_chip_retrigger_hierarchy, + .irq_get_irqchip_state = ioapic_irq_get_chip_state, .flags = IRQCHIP_SKIP_SET_WAKE, }; @@ -1914,6 +1960,7 @@ static struct irq_chip ioapic_ir_chip __read_mostly = { .irq_eoi = ioapic_ir_ack_level, .irq_set_affinity = ioapic_set_affinity, .irq_retrigger = irq_chip_retrigger_hierarchy, + .irq_get_irqchip_state = ioapic_irq_get_chip_state, .flags = IRQCHIP_SKIP_SET_WAKE, }; @@ -2083,6 +2130,9 @@ static inline void __init check_timer(void) unsigned long flags; int no_pin1 = 0; + if (!global_clock_event) + return; + local_irq_save(flags); /* diff --git a/arch/x86/kernel/apic/msi.c b/arch/x86/kernel/apic/msi.c index 72a94401f9e0..dad0dd759de2 100644 --- a/arch/x86/kernel/apic/msi.c +++ b/arch/x86/kernel/apic/msi.c @@ -1,3 +1,4 @@ +// SPDX-License-Identifier: GPL-2.0-only /* * Support of MSI, HPET and DMAR interrupts. * @@ -5,10 +6,6 @@ * Moved from arch/x86/kernel/apic/io_apic.c. * Jiang Liu <jiang.liu@linux.intel.com> * Convert to hierarchical irqdomain - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License version 2 as - * published by the Free Software Foundation. */ #include <linux/mm.h> #include <linux/interrupt.h> diff --git a/arch/x86/kernel/apic/vector.c b/arch/x86/kernel/apic/vector.c index 3173e07d3791..fdacb864c3dd 100644 --- a/arch/x86/kernel/apic/vector.c +++ b/arch/x86/kernel/apic/vector.c @@ -1,3 +1,4 @@ +// SPDX-License-Identifier: GPL-2.0-only /* * Local APIC related interfaces to support IOAPIC, MSI, etc. * @@ -5,10 +6,6 @@ * Moved from arch/x86/kernel/apic/io_apic.c. * Jiang Liu <jiang.liu@linux.intel.com> * Enable support of hierarchical irqdomains - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License version 2 as - * published by the Free Software Foundation. */ #include <linux/interrupt.h> #include <linux/irq.h> @@ -343,7 +340,7 @@ static void clear_irq_vector(struct irq_data *irqd) trace_vector_clear(irqd->irq, vector, apicd->cpu, apicd->prev_vector, apicd->prev_cpu); - per_cpu(vector_irq, apicd->cpu)[vector] = VECTOR_UNUSED; + per_cpu(vector_irq, apicd->cpu)[vector] = VECTOR_SHUTDOWN; irq_matrix_free(vector_matrix, apicd->cpu, vector, managed); apicd->vector = 0; @@ -352,7 +349,7 @@ static void clear_irq_vector(struct irq_data *irqd) if (!vector) return; - per_cpu(vector_irq, apicd->prev_cpu)[vector] = VECTOR_UNUSED; + per_cpu(vector_irq, apicd->prev_cpu)[vector] = VECTOR_SHUTDOWN; irq_matrix_free(vector_matrix, apicd->prev_cpu, vector, managed); apicd->prev_vector = 0; apicd->move_in_progress = 0; diff --git a/arch/x86/kernel/apic/x2apic_cluster.c b/arch/x86/kernel/apic/x2apic_cluster.c index 7685444a106b..609e499387a1 100644 --- a/arch/x86/kernel/apic/x2apic_cluster.c +++ b/arch/x86/kernel/apic/x2apic_cluster.c @@ -50,7 +50,7 @@ __x2apic_send_IPI_mask(const struct cpumask *mask, int vector, int apic_dest) cpumask_copy(tmpmsk, mask); /* If IPI should not be sent to self, clear current CPU */ if (apic_dest != APIC_DEST_ALLINC) - cpumask_clear_cpu(smp_processor_id(), tmpmsk); + __cpumask_clear_cpu(smp_processor_id(), tmpmsk); /* Collapse cpus in a cluster so a single IPI per cluster is sent */ for_each_cpu(cpu, tmpmsk) { diff --git a/arch/x86/kernel/cpu/bugs.c b/arch/x86/kernel/cpu/bugs.c index 03b4cc0ec3a7..66ca906aa790 100644 --- a/arch/x86/kernel/cpu/bugs.c +++ b/arch/x86/kernel/cpu/bugs.c @@ -836,6 +836,16 @@ static enum ssb_mitigation __init __ssb_select_mitigation(void) } /* + * If SSBD is controlled by the SPEC_CTRL MSR, then set the proper + * bit in the mask to allow guests to use the mitigation even in the + * case where the host does not enable it. + */ + if (static_cpu_has(X86_FEATURE_SPEC_CTRL_SSBD) || + static_cpu_has(X86_FEATURE_AMD_SSBD)) { + x86_spec_ctrl_mask |= SPEC_CTRL_SSBD; + } + + /* * We have three CPU feature flags that are in play here: * - X86_BUG_SPEC_STORE_BYPASS - CPU is susceptible. * - X86_FEATURE_SSBD - CPU is able to turn off speculative store bypass @@ -852,7 +862,6 @@ static enum ssb_mitigation __init __ssb_select_mitigation(void) x86_amd_ssb_disable(); } else { x86_spec_ctrl_base |= SPEC_CTRL_SSBD; - x86_spec_ctrl_mask |= SPEC_CTRL_SSBD; wrmsrl(MSR_IA32_SPEC_CTRL, x86_spec_ctrl_base); } } diff --git a/arch/x86/kernel/cpu/cpuid-deps.c b/arch/x86/kernel/cpu/cpuid-deps.c index a444028d8145..b5353244749b 100644 --- a/arch/x86/kernel/cpu/cpuid-deps.c +++ b/arch/x86/kernel/cpu/cpuid-deps.c @@ -20,6 +20,7 @@ struct cpuid_dep { * but it's difficult to tell that to the init reference checker. */ static const struct cpuid_dep cpuid_deps[] = { + { X86_FEATURE_FXSR, X86_FEATURE_FPU }, { X86_FEATURE_XSAVEOPT, X86_FEATURE_XSAVE }, { X86_FEATURE_XSAVEC, X86_FEATURE_XSAVE }, { X86_FEATURE_XSAVES, X86_FEATURE_XSAVE }, @@ -27,7 +28,11 @@ static const struct cpuid_dep cpuid_deps[] = { { X86_FEATURE_PKU, X86_FEATURE_XSAVE }, { X86_FEATURE_MPX, X86_FEATURE_XSAVE }, { X86_FEATURE_XGETBV1, X86_FEATURE_XSAVE }, + { X86_FEATURE_CMOV, X86_FEATURE_FXSR }, + { X86_FEATURE_MMX, X86_FEATURE_FXSR }, + { X86_FEATURE_MMXEXT, X86_FEATURE_MMX }, { X86_FEATURE_FXSR_OPT, X86_FEATURE_FXSR }, + { X86_FEATURE_XSAVE, X86_FEATURE_FXSR }, { X86_FEATURE_XMM, X86_FEATURE_FXSR }, { X86_FEATURE_XMM2, X86_FEATURE_XMM }, { X86_FEATURE_XMM3, X86_FEATURE_XMM2 }, diff --git a/arch/x86/kernel/cpu/microcode/core.c b/arch/x86/kernel/cpu/microcode/core.c index 70a04436380e..cb0fdcaf1415 100644 --- a/arch/x86/kernel/cpu/microcode/core.c +++ b/arch/x86/kernel/cpu/microcode/core.c @@ -789,13 +789,16 @@ static struct syscore_ops mc_syscore_ops = { .resume = mc_bp_resume, }; -static int mc_cpu_online(unsigned int cpu) +static int mc_cpu_starting(unsigned int cpu) { - struct device *dev; - - dev = get_cpu_device(cpu); microcode_update_cpu(cpu); pr_debug("CPU%d added\n", cpu); + return 0; +} + +static int mc_cpu_online(unsigned int cpu) +{ + struct device *dev = get_cpu_device(cpu); if (sysfs_create_group(&dev->kobj, &mc_attr_group)) pr_err("Failed to create group for CPU%d\n", cpu); @@ -872,6 +875,8 @@ int __init microcode_init(void) goto out_ucode_group; register_syscore_ops(&mc_syscore_ops); + cpuhp_setup_state_nocalls(CPUHP_AP_MICROCODE_LOADER, "x86/microcode:starting", + mc_cpu_starting, NULL); cpuhp_setup_state_nocalls(CPUHP_AP_ONLINE_DYN, "x86/microcode:online", mc_cpu_online, mc_cpu_down_prep); diff --git a/arch/x86/kernel/cpu/mshyperv.c b/arch/x86/kernel/cpu/mshyperv.c index 7df29f08871b..062f77279ce3 100644 --- a/arch/x86/kernel/cpu/mshyperv.c +++ b/arch/x86/kernel/cpu/mshyperv.c @@ -17,6 +17,7 @@ #include <linux/irq.h> #include <linux/kexec.h> #include <linux/i8253.h> +#include <linux/random.h> #include <asm/processor.h> #include <asm/hypervisor.h> #include <asm/hyperv-tlfs.h> @@ -80,6 +81,7 @@ __visible void __irq_entry hv_stimer0_vector_handler(struct pt_regs *regs) inc_irq_stat(hyperv_stimer0_count); if (hv_stimer0_handler) hv_stimer0_handler(); + add_interrupt_randomness(HYPERV_STIMER0_VECTOR, 0); ack_APIC_irq(); exiting_irq(); @@ -89,7 +91,7 @@ __visible void __irq_entry hv_stimer0_vector_handler(struct pt_regs *regs) int hv_setup_stimer0_irq(int *irq, int *vector, void (*handler)(void)) { *vector = HYPERV_STIMER0_VECTOR; - *irq = 0; /* Unused on x86/x64 */ + *irq = -1; /* Unused on x86/x64 */ hv_stimer0_handler = handler; return 0; } @@ -266,9 +268,9 @@ static void __init ms_hyperv_init_platform(void) rdmsrl(HV_X64_MSR_APIC_FREQUENCY, hv_lapic_frequency); hv_lapic_frequency = div_u64(hv_lapic_frequency, HZ); - lapic_timer_frequency = hv_lapic_frequency; + lapic_timer_period = hv_lapic_frequency; pr_info("Hyper-V: LAPIC Timer Frequency: %#x\n", - lapic_timer_frequency); + lapic_timer_period); } register_nmi_handler(NMI_UNKNOWN, hv_nmi_unknown, NMI_FLAG_FIRST, diff --git a/arch/x86/kernel/cpu/resctrl/monitor.c b/arch/x86/kernel/cpu/resctrl/monitor.c index 7ee93125a211..397206f23d14 100644 --- a/arch/x86/kernel/cpu/resctrl/monitor.c +++ b/arch/x86/kernel/cpu/resctrl/monitor.c @@ -360,6 +360,9 @@ static void update_mba_bw(struct rdtgroup *rgrp, struct rdt_domain *dom_mbm) struct list_head *head; struct rdtgroup *entry; + if (!is_mbm_local_enabled()) + return; + r_mba = &rdt_resources_all[RDT_RESOURCE_MBA]; closid = rgrp->closid; rmid = rgrp->mon.rmid; diff --git a/arch/x86/kernel/cpu/resctrl/rdtgroup.c b/arch/x86/kernel/cpu/resctrl/rdtgroup.c index 2f48f208f7e2..2f4824793798 100644 --- a/arch/x86/kernel/cpu/resctrl/rdtgroup.c +++ b/arch/x86/kernel/cpu/resctrl/rdtgroup.c @@ -796,8 +796,12 @@ static int rdt_bit_usage_show(struct kernfs_open_file *of, struct seq_file *seq, void *v) { struct rdt_resource *r = of->kn->parent->priv; - u32 sw_shareable = 0, hw_shareable = 0; - u32 exclusive = 0, pseudo_locked = 0; + /* + * Use unsigned long even though only 32 bits are used to ensure + * test_bit() is used safely. + */ + unsigned long sw_shareable = 0, hw_shareable = 0; + unsigned long exclusive = 0, pseudo_locked = 0; struct rdt_domain *dom; int i, hwb, swb, excl, psl; enum rdtgrp_mode mode; @@ -842,10 +846,10 @@ static int rdt_bit_usage_show(struct kernfs_open_file *of, } for (i = r->cache.cbm_len - 1; i >= 0; i--) { pseudo_locked = dom->plr ? dom->plr->cbm : 0; - hwb = test_bit(i, (unsigned long *)&hw_shareable); - swb = test_bit(i, (unsigned long *)&sw_shareable); - excl = test_bit(i, (unsigned long *)&exclusive); - psl = test_bit(i, (unsigned long *)&pseudo_locked); + hwb = test_bit(i, &hw_shareable); + swb = test_bit(i, &sw_shareable); + excl = test_bit(i, &exclusive); + psl = test_bit(i, &pseudo_locked); if (hwb && swb) seq_putc(seq, 'X'); else if (hwb && !swb) @@ -2486,26 +2490,19 @@ out_destroy: */ static void cbm_ensure_valid(u32 *_val, struct rdt_resource *r) { - /* - * Convert the u32 _val to an unsigned long required by all the bit - * operations within this function. No more than 32 bits of this - * converted value can be accessed because all bit operations are - * additionally provided with cbm_len that is initialized during - * hardware enumeration using five bits from the EAX register and - * thus never can exceed 32 bits. - */ - unsigned long *val = (unsigned long *)_val; + unsigned long val = *_val; unsigned int cbm_len = r->cache.cbm_len; unsigned long first_bit, zero_bit; - if (*val == 0) + if (val == 0) return; - first_bit = find_first_bit(val, cbm_len); - zero_bit = find_next_zero_bit(val, cbm_len, first_bit); + first_bit = find_first_bit(&val, cbm_len); + zero_bit = find_next_zero_bit(&val, cbm_len, first_bit); /* Clear any remaining bits to ensure contiguous region */ - bitmap_clear(val, zero_bit, cbm_len - zero_bit); + bitmap_clear(&val, zero_bit, cbm_len - zero_bit); + *_val = (u32)val; } /* @@ -2534,7 +2531,12 @@ static int __init_one_rdt_domain(struct rdt_domain *d, struct rdt_resource *r, if (closid_allocated(i) && i != closid) { mode = rdtgroup_mode_by_closid(i); if (mode == RDT_MODE_PSEUDO_LOCKSETUP) - break; + /* + * ctrl values for locksetup aren't relevant + * until the schemata is written, and the mode + * becomes RDT_MODE_PSEUDO_LOCKED. + */ + continue; /* * If CDP is active include peer domain's * usage to ensure there is no overlap diff --git a/arch/x86/kernel/cpu/vmware.c b/arch/x86/kernel/cpu/vmware.c index 0eda91f8eeac..3c648476d4fb 100644 --- a/arch/x86/kernel/cpu/vmware.c +++ b/arch/x86/kernel/cpu/vmware.c @@ -157,7 +157,7 @@ static void __init vmware_platform_setup(void) #ifdef CONFIG_X86_LOCAL_APIC /* Skip lapic calibration since we know the bus frequency. */ - lapic_timer_frequency = ecx / HZ; + lapic_timer_period = ecx / HZ; pr_info("Host bus clock speed read from hypervisor : %u Hz\n", ecx); #endif diff --git a/arch/x86/kernel/eisa.c b/arch/x86/kernel/eisa.c index e8c8c5d78dbd..e963344b0449 100644 --- a/arch/x86/kernel/eisa.c +++ b/arch/x86/kernel/eisa.c @@ -1,7 +1,6 @@ +// SPDX-License-Identifier: GPL-2.0-only /* * EISA specific code - * - * This file is licensed under the GPL V2 */ #include <linux/ioport.h> #include <linux/eisa.h> diff --git a/arch/x86/kernel/fpu/core.c b/arch/x86/kernel/fpu/core.c index 466fca686fb9..12c70840980e 100644 --- a/arch/x86/kernel/fpu/core.c +++ b/arch/x86/kernel/fpu/core.c @@ -43,18 +43,6 @@ static DEFINE_PER_CPU(bool, in_kernel_fpu); */ DEFINE_PER_CPU(struct fpu *, fpu_fpregs_owner_ctx); -static void kernel_fpu_disable(void) -{ - WARN_ON_FPU(this_cpu_read(in_kernel_fpu)); - this_cpu_write(in_kernel_fpu, true); -} - -static void kernel_fpu_enable(void) -{ - WARN_ON_FPU(!this_cpu_read(in_kernel_fpu)); - this_cpu_write(in_kernel_fpu, false); -} - static bool kernel_fpu_disabled(void) { return this_cpu_read(in_kernel_fpu); @@ -94,42 +82,33 @@ bool irq_fpu_usable(void) } EXPORT_SYMBOL(irq_fpu_usable); -static void __kernel_fpu_begin(void) +void kernel_fpu_begin(void) { - struct fpu *fpu = ¤t->thread.fpu; + preempt_disable(); WARN_ON_FPU(!irq_fpu_usable()); + WARN_ON_FPU(this_cpu_read(in_kernel_fpu)); - kernel_fpu_disable(); + this_cpu_write(in_kernel_fpu, true); - if (current->mm) { - if (!test_thread_flag(TIF_NEED_FPU_LOAD)) { - set_thread_flag(TIF_NEED_FPU_LOAD); - /* - * Ignore return value -- we don't care if reg state - * is clobbered. - */ - copy_fpregs_to_fpstate(fpu); - } + if (!(current->flags & PF_KTHREAD) && + !test_thread_flag(TIF_NEED_FPU_LOAD)) { + set_thread_flag(TIF_NEED_FPU_LOAD); + /* + * Ignore return value -- we don't care if reg state + * is clobbered. + */ + copy_fpregs_to_fpstate(¤t->thread.fpu); } __cpu_invalidate_fpregs_state(); } - -static void __kernel_fpu_end(void) -{ - kernel_fpu_enable(); -} - -void kernel_fpu_begin(void) -{ - preempt_disable(); - __kernel_fpu_begin(); -} EXPORT_SYMBOL_GPL(kernel_fpu_begin); void kernel_fpu_end(void) { - __kernel_fpu_end(); + WARN_ON_FPU(!this_cpu_read(in_kernel_fpu)); + + this_cpu_write(in_kernel_fpu, false); preempt_enable(); } EXPORT_SYMBOL_GPL(kernel_fpu_end); @@ -155,7 +134,6 @@ void fpu__save(struct fpu *fpu) trace_x86_fpu_after_save(fpu); fpregs_unlock(); } -EXPORT_SYMBOL_GPL(fpu__save); /* * Legacy x87 fpstate state init: diff --git a/arch/x86/kernel/fpu/init.c b/arch/x86/kernel/fpu/init.c index ef0030e3fe6b..6ce7e0a23268 100644 --- a/arch/x86/kernel/fpu/init.c +++ b/arch/x86/kernel/fpu/init.c @@ -204,12 +204,6 @@ static void __init fpu__init_system_xstate_size_legacy(void) */ if (!boot_cpu_has(X86_FEATURE_FPU)) { - /* - * Disable xsave as we do not support it if i387 - * emulation is enabled. - */ - setup_clear_cpu_cap(X86_FEATURE_XSAVE); - setup_clear_cpu_cap(X86_FEATURE_XSAVEOPT); fpu_kernel_xstate_size = sizeof(struct swregs_state); } else { if (boot_cpu_has(X86_FEATURE_FXSR)) @@ -252,17 +246,20 @@ static void __init fpu__init_parse_early_param(void) char *argptr = arg; int bit; +#ifdef CONFIG_X86_32 if (cmdline_find_option_bool(boot_command_line, "no387")) +#ifdef CONFIG_MATH_EMULATION setup_clear_cpu_cap(X86_FEATURE_FPU); +#else + pr_err("Option 'no387' required CONFIG_MATH_EMULATION enabled.\n"); +#endif - if (cmdline_find_option_bool(boot_command_line, "nofxsr")) { + if (cmdline_find_option_bool(boot_command_line, "nofxsr")) setup_clear_cpu_cap(X86_FEATURE_FXSR); - setup_clear_cpu_cap(X86_FEATURE_FXSR_OPT); - setup_clear_cpu_cap(X86_FEATURE_XMM); - } +#endif if (cmdline_find_option_bool(boot_command_line, "noxsave")) - fpu__xstate_clear_all_cpu_caps(); + setup_clear_cpu_cap(X86_FEATURE_XSAVE); if (cmdline_find_option_bool(boot_command_line, "noxsaveopt")) setup_clear_cpu_cap(X86_FEATURE_XSAVEOPT); diff --git a/arch/x86/kernel/fpu/signal.c b/arch/x86/kernel/fpu/signal.c index 5a8d118bc423..0071b794ed19 100644 --- a/arch/x86/kernel/fpu/signal.c +++ b/arch/x86/kernel/fpu/signal.c @@ -5,6 +5,7 @@ #include <linux/compat.h> #include <linux/cpu.h> +#include <linux/pagemap.h> #include <asm/fpu/internal.h> #include <asm/fpu/signal.h> @@ -61,6 +62,11 @@ static inline int save_fsave_header(struct task_struct *tsk, void __user *buf) struct user_i387_ia32_struct env; struct _fpstate_32 __user *fp = buf; + fpregs_lock(); + if (!test_thread_flag(TIF_NEED_FPU_LOAD)) + copy_fxregs_to_kernel(&tsk->thread.fpu); + fpregs_unlock(); + convert_from_fxsr(&env, tsk); if (__copy_to_user(buf, &env, sizeof(env)) || @@ -189,15 +195,7 @@ retry: fpregs_unlock(); if (ret) { - int aligned_size; - int nr_pages; - - aligned_size = offset_in_page(buf_fx) + fpu_user_xstate_size; - nr_pages = DIV_ROUND_UP(aligned_size, PAGE_SIZE); - - ret = get_user_pages_unlocked((unsigned long)buf_fx, nr_pages, - NULL, FOLL_WRITE); - if (ret == nr_pages) + if (!fault_in_pages_writeable(buf_fx, fpu_user_xstate_size)) goto retry; return -EFAULT; } diff --git a/arch/x86/kernel/fpu/xstate.c b/arch/x86/kernel/fpu/xstate.c index 3c36dd1784db..7b4c52aa929f 100644 --- a/arch/x86/kernel/fpu/xstate.c +++ b/arch/x86/kernel/fpu/xstate.c @@ -68,15 +68,6 @@ static unsigned int xstate_comp_offsets[sizeof(xfeatures_mask)*8]; unsigned int fpu_user_xstate_size; /* - * Clear all of the X86_FEATURE_* bits that are unavailable - * when the CPU has no XSAVE support. - */ -void fpu__xstate_clear_all_cpu_caps(void) -{ - setup_clear_cpu_cap(X86_FEATURE_XSAVE); -} - -/* * Return whether the system supports a given xfeature. * * Also return the name of the (most advanced) feature that the caller requested: @@ -709,7 +700,7 @@ static void fpu__init_disable_system_xstate(void) { xfeatures_mask = 0; cr4_clear_bits(X86_CR4_OSXSAVE); - fpu__xstate_clear_all_cpu_caps(); + setup_clear_cpu_cap(X86_FEATURE_XSAVE); } /* diff --git a/arch/x86/kernel/ftrace.c b/arch/x86/kernel/ftrace.c index 0927bb158ffc..76228525acd0 100644 --- a/arch/x86/kernel/ftrace.c +++ b/arch/x86/kernel/ftrace.c @@ -22,6 +22,7 @@ #include <linux/init.h> #include <linux/list.h> #include <linux/module.h> +#include <linux/memory.h> #include <trace/syscall.h> @@ -34,16 +35,25 @@ #ifdef CONFIG_DYNAMIC_FTRACE int ftrace_arch_code_modify_prepare(void) + __acquires(&text_mutex) { + /* + * Need to grab text_mutex to prevent a race from module loading + * and live kernel patching from changing the text permissions while + * ftrace has it set to "read/write". + */ + mutex_lock(&text_mutex); set_kernel_text_rw(); set_all_modules_text_rw(); return 0; } int ftrace_arch_code_modify_post_process(void) + __releases(&text_mutex) { set_all_modules_text_ro(); set_kernel_text_ro(); + mutex_unlock(&text_mutex); return 0; } diff --git a/arch/x86/kernel/head64.c b/arch/x86/kernel/head64.c index 16b1cbd3a61e..29ffa495bd1c 100644 --- a/arch/x86/kernel/head64.c +++ b/arch/x86/kernel/head64.c @@ -184,24 +184,25 @@ unsigned long __head __startup_64(unsigned long physaddr, pgtable_flags = _KERNPG_TABLE_NOENC + sme_get_me_mask(); if (la57) { - p4d = fixup_pointer(early_dynamic_pgts[next_early_pgt++], physaddr); + p4d = fixup_pointer(early_dynamic_pgts[(*next_pgt_ptr)++], + physaddr); i = (physaddr >> PGDIR_SHIFT) % PTRS_PER_PGD; pgd[i + 0] = (pgdval_t)p4d + pgtable_flags; pgd[i + 1] = (pgdval_t)p4d + pgtable_flags; - i = (physaddr >> P4D_SHIFT) % PTRS_PER_P4D; - p4d[i + 0] = (pgdval_t)pud + pgtable_flags; - p4d[i + 1] = (pgdval_t)pud + pgtable_flags; + i = physaddr >> P4D_SHIFT; + p4d[(i + 0) % PTRS_PER_P4D] = (pgdval_t)pud + pgtable_flags; + p4d[(i + 1) % PTRS_PER_P4D] = (pgdval_t)pud + pgtable_flags; } else { i = (physaddr >> PGDIR_SHIFT) % PTRS_PER_PGD; pgd[i + 0] = (pgdval_t)pud + pgtable_flags; pgd[i + 1] = (pgdval_t)pud + pgtable_flags; } - i = (physaddr >> PUD_SHIFT) % PTRS_PER_PUD; - pud[i + 0] = (pudval_t)pmd + pgtable_flags; - pud[i + 1] = (pudval_t)pmd + pgtable_flags; + i = physaddr >> PUD_SHIFT; + pud[(i + 0) % PTRS_PER_PUD] = (pudval_t)pmd + pgtable_flags; + pud[(i + 1) % PTRS_PER_PUD] = (pudval_t)pmd + pgtable_flags; pmd_entry = __PAGE_KERNEL_LARGE_EXEC & ~_PAGE_GLOBAL; /* Filter out unsupported __PAGE_KERNEL_* bits: */ @@ -211,8 +212,9 @@ unsigned long __head __startup_64(unsigned long physaddr, pmd_entry += physaddr; for (i = 0; i < DIV_ROUND_UP(_end - _text, PMD_SIZE); i++) { - int idx = i + (physaddr >> PMD_SHIFT) % PTRS_PER_PMD; - pmd[idx] = pmd_entry + i * PMD_SIZE; + int idx = i + (physaddr >> PMD_SHIFT); + + pmd[idx % PTRS_PER_PMD] = pmd_entry + i * PMD_SIZE; } /* diff --git a/arch/x86/kernel/i8253.c b/arch/x86/kernel/i8253.c index 0d307a657abb..2b7999a1a50a 100644 --- a/arch/x86/kernel/i8253.c +++ b/arch/x86/kernel/i8253.c @@ -8,6 +8,7 @@ #include <linux/timex.h> #include <linux/i8253.h> +#include <asm/apic.h> #include <asm/hpet.h> #include <asm/time.h> #include <asm/smp.h> @@ -18,10 +19,32 @@ */ struct clock_event_device *global_clock_event; -void __init setup_pit_timer(void) +/* + * Modern chipsets can disable the PIT clock which makes it unusable. It + * would be possible to enable the clock but the registers are chipset + * specific and not discoverable. Avoid the whack a mole game. + * + * These platforms have discoverable TSC/CPU frequencies but this also + * requires to know the local APIC timer frequency as it normally is + * calibrated against the PIT interrupt. + */ +static bool __init use_pit(void) +{ + if (!IS_ENABLED(CONFIG_X86_TSC) || !boot_cpu_has(X86_FEATURE_TSC)) + return true; + + /* This also returns true when APIC is disabled */ + return apic_needs_pit(); +} + +bool __init pit_timer_init(void) { + if (!use_pit()) + return false; + clockevent_i8253_init(true); global_clock_event = &i8253_clockevent; + return true; } #ifndef CONFIG_X86_64 diff --git a/arch/x86/kernel/idt.c b/arch/x86/kernel/idt.c index 6d8917875f44..87ef69a72c52 100644 --- a/arch/x86/kernel/idt.c +++ b/arch/x86/kernel/idt.c @@ -1,7 +1,6 @@ +// SPDX-License-Identifier: GPL-2.0-only /* * Interrupt descriptor table related code - * - * This file is licensed under the GPL V2 */ #include <linux/interrupt.h> @@ -320,7 +319,8 @@ void __init idt_setup_apic_and_irq_gates(void) #ifdef CONFIG_X86_LOCAL_APIC for_each_clear_bit_from(i, system_vectors, NR_VECTORS) { set_bit(i, system_vectors); - set_intr_gate(i, spurious_interrupt); + entry = spurious_entries_start + 8 * (i - FIRST_SYSTEM_VECTOR); + set_intr_gate(i, entry); } #endif } diff --git a/arch/x86/kernel/irq.c b/arch/x86/kernel/irq.c index 9b68b5b00ac9..cc496eb7a8d2 100644 --- a/arch/x86/kernel/irq.c +++ b/arch/x86/kernel/irq.c @@ -247,7 +247,7 @@ __visible unsigned int __irq_entry do_IRQ(struct pt_regs *regs) if (!handle_irq(desc, regs)) { ack_APIC_irq(); - if (desc != VECTOR_RETRIGGERED) { + if (desc != VECTOR_RETRIGGERED && desc != VECTOR_SHUTDOWN) { pr_emerg_ratelimited("%s: %d.%d No irq handler for vector\n", __func__, smp_processor_id(), vector); diff --git a/arch/x86/kernel/jailhouse.c b/arch/x86/kernel/jailhouse.c index 1b2ee55a2dfb..ba95bc70460d 100644 --- a/arch/x86/kernel/jailhouse.c +++ b/arch/x86/kernel/jailhouse.c @@ -45,7 +45,7 @@ static void jailhouse_get_wallclock(struct timespec64 *now) static void __init jailhouse_timer_init(void) { - lapic_timer_frequency = setup_data.apic_khz * (1000 / HZ); + lapic_timer_period = setup_data.apic_khz * (1000 / HZ); } static unsigned long jailhouse_get_tsc(void) diff --git a/arch/x86/kernel/kexec-bzimage64.c b/arch/x86/kernel/kexec-bzimage64.c index 22f60dd26460..f03237e3f192 100644 --- a/arch/x86/kernel/kexec-bzimage64.c +++ b/arch/x86/kernel/kexec-bzimage64.c @@ -1,12 +1,10 @@ +// SPDX-License-Identifier: GPL-2.0-only /* * Kexec bzImage loader * * Copyright (C) 2014 Red Hat Inc. * Authors: * Vivek Goyal <vgoyal@redhat.com> - * - * This source code is licensed under the GNU General Public License, - * Version 2. See the file COPYING for more details. */ #define pr_fmt(fmt) "kexec-bzImage64: " fmt diff --git a/arch/x86/kernel/kgdb.c b/arch/x86/kernel/kgdb.c index 9a8c1648fc9a..6690c5652aeb 100644 --- a/arch/x86/kernel/kgdb.c +++ b/arch/x86/kernel/kgdb.c @@ -758,7 +758,7 @@ int kgdb_arch_set_breakpoint(struct kgdb_bkpt *bpt) BREAK_INSTR_SIZE); bpt->type = BP_POKE_BREAKPOINT; - return err; + return 0; } int kgdb_arch_remove_breakpoint(struct kgdb_bkpt *bpt) diff --git a/arch/x86/kernel/machine_kexec_32.c b/arch/x86/kernel/machine_kexec_32.c index 5409c2800ab5..77854b192fef 100644 --- a/arch/x86/kernel/machine_kexec_32.c +++ b/arch/x86/kernel/machine_kexec_32.c @@ -1,9 +1,7 @@ +// SPDX-License-Identifier: GPL-2.0-only /* * handle transition of Linux booting another kernel * Copyright (C) 2002-2005 Eric Biederman <ebiederm@xmission.com> - * - * This source code is licensed under the GNU General Public License, - * Version 2. See the file COPYING for more details. */ #include <linux/mm.h> diff --git a/arch/x86/kernel/machine_kexec_64.c b/arch/x86/kernel/machine_kexec_64.c index ceba408ea982..d7be2376ac0b 100644 --- a/arch/x86/kernel/machine_kexec_64.c +++ b/arch/x86/kernel/machine_kexec_64.c @@ -1,9 +1,7 @@ +// SPDX-License-Identifier: GPL-2.0-only /* * handle transition of Linux booting another kernel * Copyright (C) 2002-2005 Eric Biederman <ebiederm@xmission.com> - * - * This source code is licensed under the GNU General Public License, - * Version 2. See the file COPYING for more details. */ #define pr_fmt(fmt) "kexec: " fmt diff --git a/arch/x86/kernel/perf_regs.c b/arch/x86/kernel/perf_regs.c index 07c30ee17425..bb7e1132290b 100644 --- a/arch/x86/kernel/perf_regs.c +++ b/arch/x86/kernel/perf_regs.c @@ -74,6 +74,9 @@ u64 perf_reg_value(struct pt_regs *regs, int idx) return regs_get_register(regs, pt_regs_offset[idx]); } +#define PERF_REG_X86_RESERVED (((1ULL << PERF_REG_X86_XMM0) - 1) & \ + ~((1ULL << PERF_REG_X86_MAX) - 1)) + #ifdef CONFIG_X86_32 #define REG_NOSUPPORT ((1ULL << PERF_REG_X86_R8) | \ (1ULL << PERF_REG_X86_R9) | \ @@ -86,7 +89,7 @@ u64 perf_reg_value(struct pt_regs *regs, int idx) int perf_reg_validate(u64 mask) { - if (!mask || (mask & REG_NOSUPPORT)) + if (!mask || (mask & (REG_NOSUPPORT | PERF_REG_X86_RESERVED))) return -EINVAL; return 0; @@ -112,7 +115,7 @@ void perf_get_regs_user(struct perf_regs *regs_user, int perf_reg_validate(u64 mask) { - if (!mask || (mask & REG_NOSUPPORT)) + if (!mask || (mask & (REG_NOSUPPORT | PERF_REG_X86_RESERVED))) return -EINVAL; return 0; diff --git a/arch/x86/kernel/ptrace.c b/arch/x86/kernel/ptrace.c index 3108cdc00b29..53f34121fdca 100644 --- a/arch/x86/kernel/ptrace.c +++ b/arch/x86/kernel/ptrace.c @@ -737,9 +737,6 @@ static int ioperm_get(struct task_struct *target, void ptrace_disable(struct task_struct *child) { user_disable_single_step(child); -#ifdef TIF_SYSCALL_EMU - clear_tsk_thread_flag(child, TIF_SYSCALL_EMU); -#endif } #if defined CONFIG_X86_32 || defined CONFIG_IA32_EMULATION diff --git a/arch/x86/kernel/pvclock.c b/arch/x86/kernel/pvclock.c index 0ff3e294d0e5..10125358b9c4 100644 --- a/arch/x86/kernel/pvclock.c +++ b/arch/x86/kernel/pvclock.c @@ -3,6 +3,7 @@ */ +#include <linux/clocksource.h> #include <linux/kernel.h> #include <linux/percpu.h> #include <linux/notifier.h> diff --git a/arch/x86/kernel/relocate_kernel_32.S b/arch/x86/kernel/relocate_kernel_32.S index 77630d57e7bf..ee26df08002e 100644 --- a/arch/x86/kernel/relocate_kernel_32.S +++ b/arch/x86/kernel/relocate_kernel_32.S @@ -1,9 +1,7 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ /* * relocate_kernel.S - put the kernel image in place to boot * Copyright (C) 2002-2004 Eric Biederman <ebiederm@xmission.com> - * - * This source code is licensed under the GNU General Public License, - * Version 2. See the file COPYING for more details. */ #include <linux/linkage.h> diff --git a/arch/x86/kernel/relocate_kernel_64.S b/arch/x86/kernel/relocate_kernel_64.S index 11eda21eb697..c51ccff5cd01 100644 --- a/arch/x86/kernel/relocate_kernel_64.S +++ b/arch/x86/kernel/relocate_kernel_64.S @@ -1,9 +1,7 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ /* * relocate_kernel.S - put the kernel image in place to boot * Copyright (C) 2002-2005 Eric Biederman <ebiederm@xmission.com> - * - * This source code is licensed under the GNU General Public License, - * Version 2. See the file COPYING for more details. */ #include <linux/linkage.h> diff --git a/arch/x86/kernel/smp.c b/arch/x86/kernel/smp.c index 4693e2f3a03e..96421f97e75c 100644 --- a/arch/x86/kernel/smp.c +++ b/arch/x86/kernel/smp.c @@ -144,7 +144,7 @@ void native_send_call_func_ipi(const struct cpumask *mask) } cpumask_copy(allbutself, cpu_online_mask); - cpumask_clear_cpu(smp_processor_id(), allbutself); + __cpumask_clear_cpu(smp_processor_id(), allbutself); if (cpumask_equal(mask, allbutself) && cpumask_equal(cpu_online_mask, cpu_callout_mask)) diff --git a/arch/x86/kernel/time.c b/arch/x86/kernel/time.c index 0e14f6c0d35e..07c0e960b3f3 100644 --- a/arch/x86/kernel/time.c +++ b/arch/x86/kernel/time.c @@ -82,8 +82,11 @@ static void __init setup_default_timer_irq(void) /* Default timer init function */ void __init hpet_time_init(void) { - if (!hpet_enable()) - setup_pit_timer(); + if (!hpet_enable()) { + if (!pit_timer_init()) + return; + } + setup_default_timer_irq(); } diff --git a/arch/x86/kernel/tsc.c b/arch/x86/kernel/tsc.c index 0b29e58f288e..59b57605e66c 100644 --- a/arch/x86/kernel/tsc.c +++ b/arch/x86/kernel/tsc.c @@ -632,31 +632,38 @@ unsigned long native_calibrate_tsc(void) crystal_khz = ecx_hz / 1000; - if (crystal_khz == 0) { - switch (boot_cpu_data.x86_model) { - case INTEL_FAM6_SKYLAKE_MOBILE: - case INTEL_FAM6_SKYLAKE_DESKTOP: - case INTEL_FAM6_KABYLAKE_MOBILE: - case INTEL_FAM6_KABYLAKE_DESKTOP: - crystal_khz = 24000; /* 24.0 MHz */ - break; - case INTEL_FAM6_ATOM_GOLDMONT_X: - crystal_khz = 25000; /* 25.0 MHz */ - break; - case INTEL_FAM6_ATOM_GOLDMONT: - crystal_khz = 19200; /* 19.2 MHz */ - break; - } - } + /* + * Denverton SoCs don't report crystal clock, and also don't support + * CPUID.0x16 for the calculation below, so hardcode the 25MHz crystal + * clock. + */ + if (crystal_khz == 0 && + boot_cpu_data.x86_model == INTEL_FAM6_ATOM_GOLDMONT_X) + crystal_khz = 25000; - if (crystal_khz == 0) - return 0; /* - * TSC frequency determined by CPUID is a "hardware reported" + * TSC frequency reported directly by CPUID is a "hardware reported" * frequency and is the most accurate one so far we have. This * is considered a known frequency. */ - setup_force_cpu_cap(X86_FEATURE_TSC_KNOWN_FREQ); + if (crystal_khz != 0) + setup_force_cpu_cap(X86_FEATURE_TSC_KNOWN_FREQ); + + /* + * Some Intel SoCs like Skylake and Kabylake don't report the crystal + * clock, but we can easily calculate it to a high degree of accuracy + * by considering the crystal ratio and the CPU speed. + */ + if (crystal_khz == 0 && boot_cpu_data.cpuid_level >= 0x16) { + unsigned int eax_base_mhz, ebx, ecx, edx; + + cpuid(0x16, &eax_base_mhz, &ebx, &ecx, &edx); + crystal_khz = eax_base_mhz * 1000 * + eax_denominator / ebx_numerator; + } + + if (crystal_khz == 0) + return 0; /* * For Atom SoCs TSC is the only reliable clocksource. @@ -665,6 +672,16 @@ unsigned long native_calibrate_tsc(void) if (boot_cpu_data.x86_model == INTEL_FAM6_ATOM_GOLDMONT) setup_force_cpu_cap(X86_FEATURE_TSC_RELIABLE); +#ifdef CONFIG_X86_LOCAL_APIC + /* + * The local APIC appears to be fed by the core crystal clock + * (which sounds entirely sensible). We can set the global + * lapic_timer_period here to avoid having to calibrate the APIC + * timer later. + */ + lapic_timer_period = crystal_khz * 1000 / HZ; +#endif + return crystal_khz * ebx_numerator / eax_denominator; } diff --git a/arch/x86/kernel/tsc_msr.c b/arch/x86/kernel/tsc_msr.c index 3d0e9aeea7c8..067858fe4db8 100644 --- a/arch/x86/kernel/tsc_msr.c +++ b/arch/x86/kernel/tsc_msr.c @@ -71,7 +71,7 @@ static const struct x86_cpu_id tsc_msr_cpu_ids[] = { /* * MSR-based CPU/TSC frequency discovery for certain CPUs. * - * Set global "lapic_timer_frequency" to bus_clock_cycles/jiffy + * Set global "lapic_timer_period" to bus_clock_cycles/jiffy * Return processor base frequency in KHz, or 0 on failure. */ unsigned long cpu_khz_from_msr(void) @@ -104,7 +104,7 @@ unsigned long cpu_khz_from_msr(void) res = freq * ratio; #ifdef CONFIG_X86_LOCAL_APIC - lapic_timer_frequency = (freq * 1000) / HZ; + lapic_timer_period = (freq * 1000) / HZ; #endif /* diff --git a/arch/x86/kernel/unwind_orc.c b/arch/x86/kernel/unwind_orc.c index 33b66b5c5aec..72b997eaa1fc 100644 --- a/arch/x86/kernel/unwind_orc.c +++ b/arch/x86/kernel/unwind_orc.c @@ -82,9 +82,9 @@ static struct orc_entry *orc_find(unsigned long ip); * But they are copies of the ftrace entries that are static and * defined in ftrace_*.S, which do have orc entries. * - * If the undwinder comes across a ftrace trampoline, then find the + * If the unwinder comes across a ftrace trampoline, then find the * ftrace function that was used to create it, and use that ftrace - * function's orc entrie, as the placement of the return code in + * function's orc entry, as the placement of the return code in * the stack will be identical. */ static struct orc_entry *orc_ftrace_find(unsigned long ip) @@ -128,6 +128,16 @@ static struct orc_entry null_orc_entry = { .type = ORC_TYPE_CALL }; +/* Fake frame pointer entry -- used as a fallback for generated code */ +static struct orc_entry orc_fp_entry = { + .type = ORC_TYPE_CALL, + .sp_reg = ORC_REG_BP, + .sp_offset = 16, + .bp_reg = ORC_REG_PREV_SP, + .bp_offset = -16, + .end = 0, +}; + static struct orc_entry *orc_find(unsigned long ip) { static struct orc_entry *orc; @@ -392,8 +402,16 @@ bool unwind_next_frame(struct unwind_state *state) * calls and calls to noreturn functions. */ orc = orc_find(state->signal ? state->ip : state->ip - 1); - if (!orc) - goto err; + if (!orc) { + /* + * As a fallback, try to assume this code uses a frame pointer. + * This is useful for generated code, like BPF, which ORC + * doesn't know about. This is just a guess, so the rest of + * the unwind is no longer considered reliable. + */ + orc = &orc_fp_entry; + state->error = true; + } /* End-of-stack check for kernel threads: */ if (orc->sp_reg == ORC_REG_UNDEFINED) { diff --git a/arch/x86/kernel/verify_cpu.S b/arch/x86/kernel/verify_cpu.S index 3d3c2f71f617..a024c4f7ba56 100644 --- a/arch/x86/kernel/verify_cpu.S +++ b/arch/x86/kernel/verify_cpu.S @@ -1,3 +1,4 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ /* * * verify_cpu.S - Code for cpu long mode and SSE verification. This @@ -9,9 +10,6 @@ * Copyright (c) 2007 Vivek Goyal (vgoyal@in.ibm.com) * Copyright (c) 2010 Kees Cook (kees.cook@canonical.com) * - * This source code is licensed under the GNU General Public License, - * Version 2. See the file COPYING for more details. - * * This is a common code for verification whether CPU supports * long mode and SSE or not. It is not called directly instead this * file is included at various places and compiled in that context. diff --git a/arch/x86/kvm/cpuid.c b/arch/x86/kvm/cpuid.c index e18a9f9f65b5..4992e7c99588 100644 --- a/arch/x86/kvm/cpuid.c +++ b/arch/x86/kvm/cpuid.c @@ -1,3 +1,4 @@ +// SPDX-License-Identifier: GPL-2.0-only /* * Kernel-based Virtual Machine driver for Linux * cpuid support routines @@ -6,10 +7,6 @@ * * Copyright 2011 Red Hat, Inc. and/or its affiliates. * Copyright IBM Corporation, 2008 - * - * This work is licensed under the terms of the GNU GPL, version 2. See - * the COPYING file in the top-level directory. - * */ #include <linux/kvm_host.h> diff --git a/arch/x86/kvm/debugfs.c b/arch/x86/kvm/debugfs.c index a2f3432ce090..329361b69d5e 100644 --- a/arch/x86/kvm/debugfs.c +++ b/arch/x86/kvm/debugfs.c @@ -1,11 +1,8 @@ +// SPDX-License-Identifier: GPL-2.0-only /* * Kernel-based Virtual Machine driver for Linux * * Copyright 2016 Red Hat, Inc. and/or its affiliates. - * - * This work is licensed under the terms of the GNU GPL, version 2. See - * the COPYING file in the top-level directory. - * */ #include <linux/kvm_host.h> #include <linux/debugfs.h> diff --git a/arch/x86/kvm/emulate.c b/arch/x86/kvm/emulate.c index d0d5dd44b4f4..4a387a235424 100644 --- a/arch/x86/kvm/emulate.c +++ b/arch/x86/kvm/emulate.c @@ -1,3 +1,4 @@ +// SPDX-License-Identifier: GPL-2.0-only /****************************************************************************** * emulate.c * @@ -14,9 +15,6 @@ * Avi Kivity <avi@qumranet.com> * Yaniv Kamay <yaniv@qumranet.com> * - * This work is licensed under the terms of the GNU GPL, version 2. See - * the COPYING file in the top-level directory. - * * From: xen-unstable 10676:af9809f51f81a3c43f276f00c81a52ef558afda4 */ diff --git a/arch/x86/kvm/hyperv.c b/arch/x86/kvm/hyperv.c index 8ca4b39918e0..a39e38f13029 100644 --- a/arch/x86/kvm/hyperv.c +++ b/arch/x86/kvm/hyperv.c @@ -1,3 +1,4 @@ +// SPDX-License-Identifier: GPL-2.0-only /* * KVM Microsoft Hyper-V emulation * @@ -15,10 +16,6 @@ * Amit Shah <amit.shah@qumranet.com> * Ben-Ami Yassour <benami@il.ibm.com> * Andrey Smetanin <asmetanin@virtuozzo.com> - * - * This work is licensed under the terms of the GNU GPL, version 2. See - * the COPYING file in the top-level directory. - * */ #include "x86.h" diff --git a/arch/x86/kvm/hyperv.h b/arch/x86/kvm/hyperv.h index fd7cf13a2144..757cb578101c 100644 --- a/arch/x86/kvm/hyperv.h +++ b/arch/x86/kvm/hyperv.h @@ -1,3 +1,4 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ /* * KVM Microsoft Hyper-V emulation * @@ -15,10 +16,6 @@ * Amit Shah <amit.shah@qumranet.com> * Ben-Ami Yassour <benami@il.ibm.com> * Andrey Smetanin <asmetanin@virtuozzo.com> - * - * This work is licensed under the terms of the GNU GPL, version 2. See - * the COPYING file in the top-level directory. - * */ #ifndef __ARCH_X86_KVM_HYPERV_H__ diff --git a/arch/x86/kvm/lapic.c b/arch/x86/kvm/lapic.c index 4924f83ed4f3..4dabc318adb8 100644 --- a/arch/x86/kvm/lapic.c +++ b/arch/x86/kvm/lapic.c @@ -1,3 +1,4 @@ +// SPDX-License-Identifier: GPL-2.0-only /* * Local APIC virtualization @@ -13,9 +14,6 @@ * Yaozu (Eddie) Dong <eddie.dong@intel.com> * * Based on Xen 3.1 code, Copyright (c) 2004, Intel Corporation. - * - * This work is licensed under the terms of the GNU GPL, version 2. See - * the COPYING file in the top-level directory. */ #include <linux/kvm_host.h> @@ -2341,7 +2339,7 @@ int kvm_apic_has_interrupt(struct kvm_vcpu *vcpu) struct kvm_lapic *apic = vcpu->arch.apic; u32 ppr; - if (!apic_enabled(apic)) + if (!kvm_apic_hw_enabled(apic)) return -1; __apic_update_ppr(apic, &ppr); diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c index 1e9ba81accba..98f6e4f88b04 100644 --- a/arch/x86/kvm/mmu.c +++ b/arch/x86/kvm/mmu.c @@ -1,3 +1,4 @@ +// SPDX-License-Identifier: GPL-2.0-only /* * Kernel-based Virtual Machine driver for Linux * @@ -12,10 +13,6 @@ * Authors: * Yaniv Kamay <yaniv@qumranet.com> * Avi Kivity <avi@qumranet.com> - * - * This work is licensed under the terms of the GNU GPL, version 2. See - * the COPYING file in the top-level directory. - * */ #include "irq.h" @@ -5602,14 +5599,18 @@ static int alloc_mmu_pages(struct kvm_vcpu *vcpu) struct page *page; int i; - if (tdp_enabled) - return 0; - /* - * When emulating 32-bit mode, cr3 is only 32 bits even on x86_64. - * Therefore we need to allocate shadow page tables in the first - * 4GB of memory, which happens to fit the DMA32 zone. + * When using PAE paging, the four PDPTEs are treated as 'root' pages, + * while the PDP table is a per-vCPU construct that's allocated at MMU + * creation. When emulating 32-bit mode, cr3 is only 32 bits even on + * x86_64. Therefore we need to allocate the PDP table in the first + * 4GB of memory, which happens to fit the DMA32 zone. Except for + * SVM's 32-bit NPT support, TDP paging doesn't use PAE paging and can + * skip allocating the PDP table. */ + if (tdp_enabled && kvm_x86_ops->get_tdp_level(vcpu) > PT32E_ROOT_LEVEL) + return 0; + page = alloc_page(GFP_KERNEL_ACCOUNT | __GFP_DMA32); if (!page) return -ENOMEM; diff --git a/arch/x86/kvm/mmu_audit.c b/arch/x86/kvm/mmu_audit.c index abac7e208853..ca39f62aabc6 100644 --- a/arch/x86/kvm/mmu_audit.c +++ b/arch/x86/kvm/mmu_audit.c @@ -1,3 +1,4 @@ +// SPDX-License-Identifier: GPL-2.0-only /* * mmu_audit.c: * @@ -11,10 +12,6 @@ * Avi Kivity <avi@qumranet.com> * Marcelo Tosatti <mtosatti@redhat.com> * Xiao Guangrong <xiaoguangrong@cn.fujitsu.com> - * - * This work is licensed under the terms of the GNU GPL, version 2. See - * the COPYING file in the top-level directory. - * */ #include <linux/ratelimit.h> diff --git a/arch/x86/kvm/mtrr.c b/arch/x86/kvm/mtrr.c index 9f72cc427158..25ce3edd1872 100644 --- a/arch/x86/kvm/mtrr.c +++ b/arch/x86/kvm/mtrr.c @@ -1,3 +1,4 @@ +// SPDX-License-Identifier: GPL-2.0-only /* * vMTRR implementation * @@ -11,9 +12,6 @@ * Marcelo Tosatti <mtosatti@redhat.com> * Paolo Bonzini <pbonzini@redhat.com> * Xiao Guangrong <guangrong.xiao@linux.intel.com> - * - * This work is licensed under the terms of the GNU GPL, version 2. See - * the COPYING file in the top-level directory. */ #include <linux/kvm_host.h> diff --git a/arch/x86/kvm/page_track.c b/arch/x86/kvm/page_track.c index fd04d462fdae..3521e2d176f2 100644 --- a/arch/x86/kvm/page_track.c +++ b/arch/x86/kvm/page_track.c @@ -1,3 +1,4 @@ +// SPDX-License-Identifier: GPL-2.0-only /* * Support KVM gust page tracking * @@ -8,9 +9,6 @@ * * Author: * Xiao Guangrong <guangrong.xiao@linux.intel.com> - * - * This work is licensed under the terms of the GNU GPL, version 2. See - * the COPYING file in the top-level directory. */ #include <linux/kvm_host.h> diff --git a/arch/x86/kvm/paging_tmpl.h b/arch/x86/kvm/paging_tmpl.h index 367a47df4ba0..d583bcd119fc 100644 --- a/arch/x86/kvm/paging_tmpl.h +++ b/arch/x86/kvm/paging_tmpl.h @@ -1,3 +1,4 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ /* * Kernel-based Virtual Machine driver for Linux * @@ -12,10 +13,6 @@ * Authors: * Yaniv Kamay <yaniv@qumranet.com> * Avi Kivity <avi@qumranet.com> - * - * This work is licensed under the terms of the GNU GPL, version 2. See - * the COPYING file in the top-level directory. - * */ /* diff --git a/arch/x86/kvm/pmu.c b/arch/x86/kvm/pmu.c index dd745b58ffd8..ab73a9a639ae 100644 --- a/arch/x86/kvm/pmu.c +++ b/arch/x86/kvm/pmu.c @@ -1,3 +1,4 @@ +// SPDX-License-Identifier: GPL-2.0-only /* * Kernel-based Virtual Machine -- Performance Monitoring Unit support * @@ -7,10 +8,6 @@ * Avi Kivity <avi@redhat.com> * Gleb Natapov <gleb@redhat.com> * Wei Huang <wei@redhat.com> - * - * This work is licensed under the terms of the GNU GPL, version 2. See - * the COPYING file in the top-level directory. - * */ #include <linux/types.h> @@ -264,10 +261,10 @@ static int kvm_pmu_rdpmc_vmware(struct kvm_vcpu *vcpu, unsigned idx, u64 *data) ctr_val = rdtsc(); break; case VMWARE_BACKDOOR_PMC_REAL_TIME: - ctr_val = ktime_get_boot_ns(); + ctr_val = ktime_get_boottime_ns(); break; case VMWARE_BACKDOOR_PMC_APPARENT_TIME: - ctr_val = ktime_get_boot_ns() + + ctr_val = ktime_get_boottime_ns() + vcpu->kvm->arch.kvmclock_offset; break; default: diff --git a/arch/x86/kvm/pmu_amd.c b/arch/x86/kvm/pmu_amd.c index d3118088f1cd..c8388389a3b0 100644 --- a/arch/x86/kvm/pmu_amd.c +++ b/arch/x86/kvm/pmu_amd.c @@ -1,3 +1,4 @@ +// SPDX-License-Identifier: GPL-2.0-only /* * KVM PMU support for AMD * @@ -6,9 +7,6 @@ * Author: * Wei Huang <wei@redhat.com> * - * This work is licensed under the terms of the GNU GPL, version 2. See - * the COPYING file in the top-level directory. - * * Implementation is based on pmu_intel.c file */ #include <linux/types.h> diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c index 735b8c01895e..48c865a4e5dd 100644 --- a/arch/x86/kvm/svm.c +++ b/arch/x86/kvm/svm.c @@ -1,3 +1,4 @@ +// SPDX-License-Identifier: GPL-2.0-only /* * Kernel-based Virtual Machine driver for Linux * @@ -9,10 +10,6 @@ * Authors: * Yaniv Kamay <yaniv@qumranet.com> * Avi Kivity <avi@qumranet.com> - * - * This work is licensed under the terms of the GNU GPL, version 2. See - * the COPYING file in the top-level directory. - * */ #define pr_fmt(fmt) "SVM: " fmt diff --git a/arch/x86/kvm/vmx/nested.c b/arch/x86/kvm/vmx/nested.c index 1032f068f0b9..46af3a5e9209 100644 --- a/arch/x86/kvm/vmx/nested.c +++ b/arch/x86/kvm/vmx/nested.c @@ -1397,7 +1397,7 @@ static int copy_enlightened_to_vmcs12(struct vcpu_vmx *vmx) } if (unlikely(!(evmcs->hv_clean_fields & - HV_VMX_ENLIGHTENED_CLEAN_FIELD_CONTROL_PROC))) { + HV_VMX_ENLIGHTENED_CLEAN_FIELD_CONTROL_EXCPN))) { vmcs12->exception_bitmap = evmcs->exception_bitmap; } @@ -1437,7 +1437,7 @@ static int copy_enlightened_to_vmcs12(struct vcpu_vmx *vmx) } if (unlikely(!(evmcs->hv_clean_fields & - HV_VMX_ENLIGHTENED_CLEAN_FIELD_HOST_GRP1))) { + HV_VMX_ENLIGHTENED_CLEAN_FIELD_CONTROL_GRP1))) { vmcs12->pin_based_vm_exec_control = evmcs->pin_based_vm_exec_control; vmcs12->vm_exit_controls = evmcs->vm_exit_controls; @@ -5226,40 +5226,42 @@ static int vmx_get_nested_state(struct kvm_vcpu *vcpu, struct vmcs12 *vmcs12; struct kvm_nested_state kvm_state = { .flags = 0, - .format = 0, + .format = KVM_STATE_NESTED_FORMAT_VMX, .size = sizeof(kvm_state), - .vmx.vmxon_pa = -1ull, - .vmx.vmcs_pa = -1ull, + .hdr.vmx.vmxon_pa = -1ull, + .hdr.vmx.vmcs12_pa = -1ull, }; + struct kvm_vmx_nested_state_data __user *user_vmx_nested_state = + &user_kvm_nested_state->data.vmx[0]; if (!vcpu) - return kvm_state.size + 2 * VMCS12_SIZE; + return kvm_state.size + sizeof(*user_vmx_nested_state); vmx = to_vmx(vcpu); vmcs12 = get_vmcs12(vcpu); - if (nested_vmx_allowed(vcpu) && vmx->nested.enlightened_vmcs_enabled) - kvm_state.flags |= KVM_STATE_NESTED_EVMCS; - if (nested_vmx_allowed(vcpu) && (vmx->nested.vmxon || vmx->nested.smm.vmxon)) { - kvm_state.vmx.vmxon_pa = vmx->nested.vmxon_ptr; - kvm_state.vmx.vmcs_pa = vmx->nested.current_vmptr; + kvm_state.hdr.vmx.vmxon_pa = vmx->nested.vmxon_ptr; + kvm_state.hdr.vmx.vmcs12_pa = vmx->nested.current_vmptr; if (vmx_has_valid_vmcs12(vcpu)) { - kvm_state.size += VMCS12_SIZE; + kvm_state.size += sizeof(user_vmx_nested_state->vmcs12); + + if (vmx->nested.hv_evmcs) + kvm_state.flags |= KVM_STATE_NESTED_EVMCS; if (is_guest_mode(vcpu) && nested_cpu_has_shadow_vmcs(vmcs12) && vmcs12->vmcs_link_pointer != -1ull) - kvm_state.size += VMCS12_SIZE; + kvm_state.size += sizeof(user_vmx_nested_state->shadow_vmcs12); } if (vmx->nested.smm.vmxon) - kvm_state.vmx.smm.flags |= KVM_STATE_NESTED_SMM_VMXON; + kvm_state.hdr.vmx.smm.flags |= KVM_STATE_NESTED_SMM_VMXON; if (vmx->nested.smm.guest_mode) - kvm_state.vmx.smm.flags |= KVM_STATE_NESTED_SMM_GUEST_MODE; + kvm_state.hdr.vmx.smm.flags |= KVM_STATE_NESTED_SMM_GUEST_MODE; if (is_guest_mode(vcpu)) { kvm_state.flags |= KVM_STATE_NESTED_GUEST_MODE; @@ -5294,16 +5296,19 @@ static int vmx_get_nested_state(struct kvm_vcpu *vcpu, copy_shadow_to_vmcs12(vmx); } + BUILD_BUG_ON(sizeof(user_vmx_nested_state->vmcs12) < VMCS12_SIZE); + BUILD_BUG_ON(sizeof(user_vmx_nested_state->shadow_vmcs12) < VMCS12_SIZE); + /* * Copy over the full allocated size of vmcs12 rather than just the size * of the struct. */ - if (copy_to_user(user_kvm_nested_state->data, vmcs12, VMCS12_SIZE)) + if (copy_to_user(user_vmx_nested_state->vmcs12, vmcs12, VMCS12_SIZE)) return -EFAULT; if (nested_cpu_has_shadow_vmcs(vmcs12) && vmcs12->vmcs_link_pointer != -1ull) { - if (copy_to_user(user_kvm_nested_state->data + VMCS12_SIZE, + if (copy_to_user(user_vmx_nested_state->shadow_vmcs12, get_shadow_vmcs12(vcpu), VMCS12_SIZE)) return -EFAULT; } @@ -5331,33 +5336,44 @@ static int vmx_set_nested_state(struct kvm_vcpu *vcpu, struct vcpu_vmx *vmx = to_vmx(vcpu); struct vmcs12 *vmcs12; u32 exit_qual; + struct kvm_vmx_nested_state_data __user *user_vmx_nested_state = + &user_kvm_nested_state->data.vmx[0]; int ret; - if (kvm_state->format != 0) + if (kvm_state->format != KVM_STATE_NESTED_FORMAT_VMX) return -EINVAL; - if (!nested_vmx_allowed(vcpu)) - return kvm_state->vmx.vmxon_pa == -1ull ? 0 : -EINVAL; + if (kvm_state->hdr.vmx.vmxon_pa == -1ull) { + if (kvm_state->hdr.vmx.smm.flags) + return -EINVAL; - if (kvm_state->vmx.vmxon_pa == -1ull) { - if (kvm_state->vmx.smm.flags) + if (kvm_state->hdr.vmx.vmcs12_pa != -1ull) return -EINVAL; - if (kvm_state->vmx.vmcs_pa != -1ull) + /* + * KVM_STATE_NESTED_EVMCS used to signal that KVM should + * enable eVMCS capability on vCPU. However, since then + * code was changed such that flag signals vmcs12 should + * be copied into eVMCS in guest memory. + * + * To preserve backwards compatability, allow user + * to set this flag even when there is no VMXON region. + */ + if (kvm_state->flags & ~KVM_STATE_NESTED_EVMCS) + return -EINVAL; + } else { + if (!nested_vmx_allowed(vcpu)) return -EINVAL; - vmx_leave_nested(vcpu); - return 0; + if (!page_address_valid(vcpu, kvm_state->hdr.vmx.vmxon_pa)) + return -EINVAL; } - if (!page_address_valid(vcpu, kvm_state->vmx.vmxon_pa)) - return -EINVAL; - - if ((kvm_state->vmx.smm.flags & KVM_STATE_NESTED_SMM_GUEST_MODE) && + if ((kvm_state->hdr.vmx.smm.flags & KVM_STATE_NESTED_SMM_GUEST_MODE) && (kvm_state->flags & KVM_STATE_NESTED_GUEST_MODE)) return -EINVAL; - if (kvm_state->vmx.smm.flags & + if (kvm_state->hdr.vmx.smm.flags & ~(KVM_STATE_NESTED_SMM_GUEST_MODE | KVM_STATE_NESTED_SMM_VMXON)) return -EINVAL; @@ -5366,21 +5382,26 @@ static int vmx_set_nested_state(struct kvm_vcpu *vcpu, * nor can VMLAUNCH/VMRESUME be pending. Outside SMM, SMM flags * must be zero. */ - if (is_smm(vcpu) ? kvm_state->flags : kvm_state->vmx.smm.flags) + if (is_smm(vcpu) ? + (kvm_state->flags & + (KVM_STATE_NESTED_GUEST_MODE | KVM_STATE_NESTED_RUN_PENDING)) + : kvm_state->hdr.vmx.smm.flags) return -EINVAL; - if ((kvm_state->vmx.smm.flags & KVM_STATE_NESTED_SMM_GUEST_MODE) && - !(kvm_state->vmx.smm.flags & KVM_STATE_NESTED_SMM_VMXON)) + if ((kvm_state->hdr.vmx.smm.flags & KVM_STATE_NESTED_SMM_GUEST_MODE) && + !(kvm_state->hdr.vmx.smm.flags & KVM_STATE_NESTED_SMM_VMXON)) return -EINVAL; + if ((kvm_state->flags & KVM_STATE_NESTED_EVMCS) && + (!nested_vmx_allowed(vcpu) || !vmx->nested.enlightened_vmcs_enabled)) + return -EINVAL; + vmx_leave_nested(vcpu); - if (kvm_state->vmx.vmxon_pa == -1ull) - return 0; - if (kvm_state->flags & KVM_STATE_NESTED_EVMCS) - nested_enable_evmcs(vcpu, NULL); + if (kvm_state->hdr.vmx.vmxon_pa == -1ull) + return 0; - vmx->nested.vmxon_ptr = kvm_state->vmx.vmxon_pa; + vmx->nested.vmxon_ptr = kvm_state->hdr.vmx.vmxon_pa; ret = enter_vmx_operation(vcpu); if (ret) return ret; @@ -5389,12 +5410,12 @@ static int vmx_set_nested_state(struct kvm_vcpu *vcpu, if (kvm_state->size < sizeof(*kvm_state) + sizeof(*vmcs12)) return 0; - if (kvm_state->vmx.vmcs_pa != -1ull) { - if (kvm_state->vmx.vmcs_pa == kvm_state->vmx.vmxon_pa || - !page_address_valid(vcpu, kvm_state->vmx.vmcs_pa)) + if (kvm_state->hdr.vmx.vmcs12_pa != -1ull) { + if (kvm_state->hdr.vmx.vmcs12_pa == kvm_state->hdr.vmx.vmxon_pa || + !page_address_valid(vcpu, kvm_state->hdr.vmx.vmcs12_pa)) return -EINVAL; - set_current_vmptr(vmx, kvm_state->vmx.vmcs_pa); + set_current_vmptr(vmx, kvm_state->hdr.vmx.vmcs12_pa); } else if (kvm_state->flags & KVM_STATE_NESTED_EVMCS) { /* * Sync eVMCS upon entry as we may not have @@ -5405,16 +5426,16 @@ static int vmx_set_nested_state(struct kvm_vcpu *vcpu, return -EINVAL; } - if (kvm_state->vmx.smm.flags & KVM_STATE_NESTED_SMM_VMXON) { + if (kvm_state->hdr.vmx.smm.flags & KVM_STATE_NESTED_SMM_VMXON) { vmx->nested.smm.vmxon = true; vmx->nested.vmxon = false; - if (kvm_state->vmx.smm.flags & KVM_STATE_NESTED_SMM_GUEST_MODE) + if (kvm_state->hdr.vmx.smm.flags & KVM_STATE_NESTED_SMM_GUEST_MODE) vmx->nested.smm.guest_mode = true; } vmcs12 = get_vmcs12(vcpu); - if (copy_from_user(vmcs12, user_kvm_nested_state->data, sizeof(*vmcs12))) + if (copy_from_user(vmcs12, user_vmx_nested_state->vmcs12, sizeof(*vmcs12))) return -EFAULT; if (vmcs12->hdr.revision_id != VMCS12_REVISION) @@ -5431,12 +5452,14 @@ static int vmx_set_nested_state(struct kvm_vcpu *vcpu, vmcs12->vmcs_link_pointer != -1ull) { struct vmcs12 *shadow_vmcs12 = get_shadow_vmcs12(vcpu); - if (kvm_state->size < sizeof(*kvm_state) + VMCS12_SIZE + sizeof(*vmcs12)) + if (kvm_state->size < + sizeof(*kvm_state) + + sizeof(user_vmx_nested_state->vmcs12) + sizeof(*shadow_vmcs12)) goto error_guest_mode; if (copy_from_user(shadow_vmcs12, - user_kvm_nested_state->data + VMCS12_SIZE, - sizeof(*vmcs12))) { + user_vmx_nested_state->shadow_vmcs12, + sizeof(*shadow_vmcs12))) { ret = -EFAULT; goto error_guest_mode; } diff --git a/arch/x86/kvm/vmx/pmu_intel.c b/arch/x86/kvm/vmx/pmu_intel.c index a99613a060dd..68d231d49c7a 100644 --- a/arch/x86/kvm/vmx/pmu_intel.c +++ b/arch/x86/kvm/vmx/pmu_intel.c @@ -1,3 +1,4 @@ +// SPDX-License-Identifier: GPL-2.0-only /* * KVM PMU support for Intel CPUs * @@ -6,10 +7,6 @@ * Authors: * Avi Kivity <avi@redhat.com> * Gleb Natapov <gleb@redhat.com> - * - * This work is licensed under the terms of the GNU GPL, version 2. See - * the COPYING file in the top-level directory. - * */ #include <linux/types.h> #include <linux/kvm_host.h> diff --git a/arch/x86/kvm/vmx/vmcs12.h b/arch/x86/kvm/vmx/vmcs12.h index 3a742428ad17..337718fc8a36 100644 --- a/arch/x86/kvm/vmx/vmcs12.h +++ b/arch/x86/kvm/vmx/vmcs12.h @@ -201,9 +201,10 @@ struct __packed vmcs12 { /* * VMCS12_SIZE is the number of bytes L1 should allocate for the VMXON region * and any VMCS region. Although only sizeof(struct vmcs12) are used by the - * current implementation, 4K are reserved to avoid future complications. + * current implementation, 4K are reserved to avoid future complications and + * to preserve userspace ABI. */ -#define VMCS12_SIZE 0x1000 +#define VMCS12_SIZE KVM_STATE_NESTED_VMX_VMCS_SIZE /* * VMCS12_MAX_FIELD_INDEX is the highest index value used in any diff --git a/arch/x86/kvm/vmx/vmx.c b/arch/x86/kvm/vmx/vmx.c index b93e36ddee5e..d98eac371c0a 100644 --- a/arch/x86/kvm/vmx/vmx.c +++ b/arch/x86/kvm/vmx/vmx.c @@ -1,3 +1,4 @@ +// SPDX-License-Identifier: GPL-2.0-only /* * Kernel-based Virtual Machine driver for Linux * @@ -10,10 +11,6 @@ * Authors: * Avi Kivity <avi@qumranet.com> * Yaniv Kamay <yaniv@qumranet.com> - * - * This work is licensed under the terms of the GNU GPL, version 2. See - * the COPYING file in the top-level directory. - * */ #include <linux/frame.h> diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index 83aefd759846..63bb1ee8258e 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -1,3 +1,4 @@ +// SPDX-License-Identifier: GPL-2.0-only /* * Kernel-based Virtual Machine driver for Linux * @@ -13,10 +14,6 @@ * Yaniv Kamay <yaniv@qumranet.com> * Amit Shah <amit.shah@qumranet.com> * Ben-Ami Yassour <benami@il.ibm.com> - * - * This work is licensed under the terms of the GNU GPL, version 2. See - * the COPYING file in the top-level directory. - * */ #include <linux/kvm_host.h> @@ -70,6 +67,7 @@ #include <asm/mshyperv.h> #include <asm/hypervisor.h> #include <asm/intel_pt.h> +#include <clocksource/hyperv_timer.h> #define CREATE_TRACE_POINTS #include "trace.h" @@ -1557,7 +1555,7 @@ static int set_tsc_khz(struct kvm_vcpu *vcpu, u32 user_tsc_khz, bool scale) vcpu->arch.tsc_always_catchup = 1; return 0; } else { - WARN(1, "user requested TSC rate below hardware speed\n"); + pr_warn_ratelimited("user requested TSC rate below hardware speed\n"); return -1; } } @@ -1567,8 +1565,8 @@ static int set_tsc_khz(struct kvm_vcpu *vcpu, u32 user_tsc_khz, bool scale) user_tsc_khz, tsc_khz); if (ratio == 0 || ratio >= kvm_max_tsc_scaling_ratio) { - WARN_ONCE(1, "Invalid TSC scaling ratio - virtual-tsc-khz=%u\n", - user_tsc_khz); + pr_warn_ratelimited("Invalid TSC scaling ratio - virtual-tsc-khz=%u\n", + user_tsc_khz); return -1; } @@ -1731,7 +1729,7 @@ void kvm_write_tsc(struct kvm_vcpu *vcpu, struct msr_data *msr) raw_spin_lock_irqsave(&kvm->arch.tsc_write_lock, flags); offset = kvm_compute_tsc_offset(vcpu, data); - ns = ktime_get_boot_ns(); + ns = ktime_get_boottime_ns(); elapsed = ns - kvm->arch.last_tsc_nsec; if (vcpu->arch.virtual_tsc_khz) { @@ -2073,7 +2071,7 @@ u64 get_kvmclock_ns(struct kvm *kvm) spin_lock(&ka->pvclock_gtod_sync_lock); if (!ka->use_master_clock) { spin_unlock(&ka->pvclock_gtod_sync_lock); - return ktime_get_boot_ns() + ka->kvmclock_offset; + return ktime_get_boottime_ns() + ka->kvmclock_offset; } hv_clock.tsc_timestamp = ka->master_cycle_now; @@ -2089,7 +2087,7 @@ u64 get_kvmclock_ns(struct kvm *kvm) &hv_clock.tsc_to_system_mul); ret = __pvclock_read_cycles(&hv_clock, rdtsc()); } else - ret = ktime_get_boot_ns() + ka->kvmclock_offset; + ret = ktime_get_boottime_ns() + ka->kvmclock_offset; put_cpu(); @@ -2188,7 +2186,7 @@ static int kvm_guest_time_update(struct kvm_vcpu *v) } if (!use_master_clock) { host_tsc = rdtsc(); - kernel_ns = ktime_get_boot_ns(); + kernel_ns = ktime_get_boottime_ns(); } tsc_timestamp = kvm_read_l1_tsc(v, host_tsc); @@ -9018,7 +9016,7 @@ int kvm_arch_hardware_enable(void) * before any KVM threads can be running. Unfortunately, we can't * bring the TSCs fully up to date with real time, as we aren't yet far * enough into CPU bringup that we know how much real time has actually - * elapsed; our helper function, ktime_get_boot_ns() will be using boot + * elapsed; our helper function, ktime_get_boottime_ns() will be using boot * variables that haven't been updated yet. * * So we simply find the maximum observed TSC above, then record the @@ -9246,7 +9244,7 @@ int kvm_arch_init_vm(struct kvm *kvm, unsigned long type) mutex_init(&kvm->arch.apic_map_lock); spin_lock_init(&kvm->arch.pvclock_gtod_sync_lock); - kvm->arch.kvmclock_offset = -ktime_get_boot_ns(); + kvm->arch.kvmclock_offset = -ktime_get_boottime_ns(); pvclock_update_vm_gtod_copy(kvm); kvm->arch.guest_can_read_msr_platform_info = true; diff --git a/arch/x86/lib/cache-smp.c b/arch/x86/lib/cache-smp.c index 1811fa4a1b1a..7c48ff4ae8d1 100644 --- a/arch/x86/lib/cache-smp.c +++ b/arch/x86/lib/cache-smp.c @@ -15,6 +15,7 @@ EXPORT_SYMBOL(wbinvd_on_cpu); int wbinvd_on_all_cpus(void) { - return on_each_cpu(__wbinvd, NULL, 1); + on_each_cpu(__wbinvd, NULL, 1); + return 0; } EXPORT_SYMBOL(wbinvd_on_all_cpus); diff --git a/arch/x86/lib/cmdline.c b/arch/x86/lib/cmdline.c index 3261abb21ef4..4f1719e22d3c 100644 --- a/arch/x86/lib/cmdline.c +++ b/arch/x86/lib/cmdline.c @@ -1,6 +1,5 @@ +// SPDX-License-Identifier: GPL-2.0-only /* - * This file is part of the Linux kernel, and is made available under - * the terms of the GNU General Public License version 2. * * Misc librarized functions for cmdline poking. */ diff --git a/arch/x86/lib/iomap_copy_64.S b/arch/x86/lib/iomap_copy_64.S index 33147fef3452..a9bdf0805be0 100644 --- a/arch/x86/lib/iomap_copy_64.S +++ b/arch/x86/lib/iomap_copy_64.S @@ -1,18 +1,6 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ /* * Copyright 2006 PathScale, Inc. All Rights Reserved. - * - * This file is free software; you can redistribute it and/or modify - * it under the terms of version 2 of the GNU General Public License - * as published by the Free Software Foundation. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, write to the Free Software Foundation, - * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA. */ #include <linux/linkage.h> diff --git a/arch/x86/mm/fault.c b/arch/x86/mm/fault.c index 46df4c6aae46..58e4f1f00bbc 100644 --- a/arch/x86/mm/fault.c +++ b/arch/x86/mm/fault.c @@ -710,6 +710,10 @@ static void set_signal_archinfo(unsigned long address, * To avoid leaking information about the kernel page * table layout, pretend that user-mode accesses to * kernel addresses are always protection faults. + * + * NB: This means that failed vsyscalls with vsyscall=none + * will have the PROT bit. This doesn't leak any + * information and does not appear to cause any problems. */ if (address >= TASK_SIZE_MAX) error_code |= X86_PF_PROT; @@ -1369,16 +1373,18 @@ void do_user_addr_fault(struct pt_regs *regs, #ifdef CONFIG_X86_64 /* - * Instruction fetch faults in the vsyscall page might need - * emulation. The vsyscall page is at a high address - * (>PAGE_OFFSET), but is considered to be part of the user - * address space. + * Faults in the vsyscall page might need emulation. The + * vsyscall page is at a high address (>PAGE_OFFSET), but is + * considered to be part of the user address space. * * The vsyscall page does not have a "real" VMA, so do this * emulation before we go searching for VMAs. + * + * PKRU never rejects instruction fetches, so we don't need + * to consider the PF_PK bit. */ - if ((hw_error_code & X86_PF_INSTR) && is_vsyscall_vaddr(address)) { - if (emulate_vsyscall(regs, address)) + if (is_vsyscall_vaddr(address)) { + if (emulate_vsyscall(hw_error_code, regs, address)) return; } #endif diff --git a/arch/x86/mm/init_64.c b/arch/x86/mm/init_64.c index 693aaf28d5fe..0f01c7b1d217 100644 --- a/arch/x86/mm/init_64.c +++ b/arch/x86/mm/init_64.c @@ -671,23 +671,25 @@ static unsigned long __meminit phys_p4d_init(p4d_t *p4d_page, unsigned long paddr, unsigned long paddr_end, unsigned long page_size_mask, bool init) { - unsigned long paddr_next, paddr_last = paddr_end; - unsigned long vaddr = (unsigned long)__va(paddr); - int i = p4d_index(vaddr); + unsigned long vaddr, vaddr_end, vaddr_next, paddr_next, paddr_last; + + paddr_last = paddr_end; + vaddr = (unsigned long)__va(paddr); + vaddr_end = (unsigned long)__va(paddr_end); if (!pgtable_l5_enabled()) return phys_pud_init((pud_t *) p4d_page, paddr, paddr_end, page_size_mask, init); - for (; i < PTRS_PER_P4D; i++, paddr = paddr_next) { - p4d_t *p4d; + for (; vaddr < vaddr_end; vaddr = vaddr_next) { + p4d_t *p4d = p4d_page + p4d_index(vaddr); pud_t *pud; - vaddr = (unsigned long)__va(paddr); - p4d = p4d_page + p4d_index(vaddr); - paddr_next = (paddr & P4D_MASK) + P4D_SIZE; + vaddr_next = (vaddr & P4D_MASK) + P4D_SIZE; + paddr = __pa(vaddr); if (paddr >= paddr_end) { + paddr_next = __pa(vaddr_next); if (!after_bootmem && !e820__mapped_any(paddr & P4D_MASK, paddr_next, E820_TYPE_RAM) && @@ -699,13 +701,13 @@ phys_p4d_init(p4d_t *p4d_page, unsigned long paddr, unsigned long paddr_end, if (!p4d_none(*p4d)) { pud = pud_offset(p4d, 0); - paddr_last = phys_pud_init(pud, paddr, paddr_end, - page_size_mask, init); + paddr_last = phys_pud_init(pud, paddr, __pa(vaddr_end), + page_size_mask, init); continue; } pud = alloc_low_page(); - paddr_last = phys_pud_init(pud, paddr, paddr_end, + paddr_last = phys_pud_init(pud, paddr, __pa(vaddr_end), page_size_mask, init); spin_lock(&init_mm.page_table_lock); diff --git a/arch/x86/mm/kasan_init_64.c b/arch/x86/mm/kasan_init_64.c index 8dc0fc0b1382..296da58f3013 100644 --- a/arch/x86/mm/kasan_init_64.c +++ b/arch/x86/mm/kasan_init_64.c @@ -199,7 +199,7 @@ static inline p4d_t *early_p4d_offset(pgd_t *pgd, unsigned long addr) if (!pgtable_l5_enabled()) return (p4d_t *)pgd; - p4d = __pa_nodebug(pgd_val(*pgd)) & PTE_PFN_MASK; + p4d = pgd_val(*pgd) & PTE_PFN_MASK; p4d += __START_KERNEL_map - phys_base; return (p4d_t *)p4d + p4d_index(addr); } diff --git a/arch/x86/mm/kaslr.c b/arch/x86/mm/kaslr.c index dc3f058bdf9b..dc6182eecefa 100644 --- a/arch/x86/mm/kaslr.c +++ b/arch/x86/mm/kaslr.c @@ -52,7 +52,7 @@ static __initdata struct kaslr_memory_region { } kaslr_regions[] = { { &page_offset_base, 0 }, { &vmalloc_base, 0 }, - { &vmemmap_base, 1 }, + { &vmemmap_base, 0 }, }; /* Get size in bytes used by the memory region */ @@ -78,6 +78,7 @@ void __init kernel_randomize_memory(void) unsigned long rand, memory_tb; struct rnd_state rand_state; unsigned long remain_entropy; + unsigned long vmemmap_size; vaddr_start = pgtable_l5_enabled() ? __PAGE_OFFSET_BASE_L5 : __PAGE_OFFSET_BASE_L4; vaddr = vaddr_start; @@ -109,6 +110,14 @@ void __init kernel_randomize_memory(void) if (memory_tb < kaslr_regions[0].size_tb) kaslr_regions[0].size_tb = memory_tb; + /* + * Calculate the vmemmap region size in TBs, aligned to a TB + * boundary. + */ + vmemmap_size = (kaslr_regions[0].size_tb << (TB_SHIFT - PAGE_SHIFT)) * + sizeof(struct page); + kaslr_regions[2].size_tb = DIV_ROUND_UP(vmemmap_size, 1UL << TB_SHIFT); + /* Calculate entropy available between regions */ remain_entropy = vaddr_end - vaddr_start; for (i = 0; i < ARRAY_SIZE(kaslr_regions); i++) diff --git a/arch/x86/mm/mem_encrypt.c b/arch/x86/mm/mem_encrypt.c index 51f50a7a07ef..e0df96fdfe46 100644 --- a/arch/x86/mm/mem_encrypt.c +++ b/arch/x86/mm/mem_encrypt.c @@ -1,13 +1,10 @@ +// SPDX-License-Identifier: GPL-2.0-only /* * AMD Memory Encryption Support * * Copyright (C) 2016 Advanced Micro Devices, Inc. * * Author: Tom Lendacky <thomas.lendacky@amd.com> - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License version 2 as - * published by the Free Software Foundation. */ #define DISABLE_BRANCH_PROFILING diff --git a/arch/x86/mm/mem_encrypt_boot.S b/arch/x86/mm/mem_encrypt_boot.S index 40a6085063d6..6d71481a1e70 100644 --- a/arch/x86/mm/mem_encrypt_boot.S +++ b/arch/x86/mm/mem_encrypt_boot.S @@ -1,13 +1,10 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ /* * AMD Memory Encryption Support * * Copyright (C) 2016 Advanced Micro Devices, Inc. * * Author: Tom Lendacky <thomas.lendacky@amd.com> - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License version 2 as - * published by the Free Software Foundation. */ #include <linux/linkage.h> diff --git a/arch/x86/mm/mem_encrypt_identity.c b/arch/x86/mm/mem_encrypt_identity.c index 4aa9b1480866..dddcd2a1afdb 100644 --- a/arch/x86/mm/mem_encrypt_identity.c +++ b/arch/x86/mm/mem_encrypt_identity.c @@ -1,13 +1,10 @@ +// SPDX-License-Identifier: GPL-2.0-only /* * AMD Memory Encryption Support * * Copyright (C) 2016 Advanced Micro Devices, Inc. * * Author: Tom Lendacky <thomas.lendacky@amd.com> - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License version 2 as - * published by the Free Software Foundation. */ #define DISABLE_BRANCH_PROFILING diff --git a/arch/x86/net/bpf_jit_comp.c b/arch/x86/net/bpf_jit_comp.c index 32bfab4e21eb..eaaed5bfc4a4 100644 --- a/arch/x86/net/bpf_jit_comp.c +++ b/arch/x86/net/bpf_jit_comp.c @@ -186,9 +186,7 @@ struct jit_context { #define BPF_MAX_INSN_SIZE 128 #define BPF_INSN_SAFETY 64 -#define AUX_STACK_SPACE 40 /* Space for RBX, R13, R14, R15, tailcnt */ - -#define PROLOGUE_SIZE 37 +#define PROLOGUE_SIZE 20 /* * Emit x86-64 prologue code for BPF program and check its size. @@ -199,44 +197,19 @@ static void emit_prologue(u8 **pprog, u32 stack_depth, bool ebpf_from_cbpf) u8 *prog = *pprog; int cnt = 0; - /* push rbp */ - EMIT1(0x55); - - /* mov rbp,rsp */ - EMIT3(0x48, 0x89, 0xE5); - - /* sub rsp, rounded_stack_depth + AUX_STACK_SPACE */ - EMIT3_off32(0x48, 0x81, 0xEC, - round_up(stack_depth, 8) + AUX_STACK_SPACE); - - /* sub rbp, AUX_STACK_SPACE */ - EMIT4(0x48, 0x83, 0xED, AUX_STACK_SPACE); - - /* mov qword ptr [rbp+0],rbx */ - EMIT4(0x48, 0x89, 0x5D, 0); - /* mov qword ptr [rbp+8],r13 */ - EMIT4(0x4C, 0x89, 0x6D, 8); - /* mov qword ptr [rbp+16],r14 */ - EMIT4(0x4C, 0x89, 0x75, 16); - /* mov qword ptr [rbp+24],r15 */ - EMIT4(0x4C, 0x89, 0x7D, 24); - + EMIT1(0x55); /* push rbp */ + EMIT3(0x48, 0x89, 0xE5); /* mov rbp, rsp */ + /* sub rsp, rounded_stack_depth */ + EMIT3_off32(0x48, 0x81, 0xEC, round_up(stack_depth, 8)); + EMIT1(0x53); /* push rbx */ + EMIT2(0x41, 0x55); /* push r13 */ + EMIT2(0x41, 0x56); /* push r14 */ + EMIT2(0x41, 0x57); /* push r15 */ if (!ebpf_from_cbpf) { - /* - * Clear the tail call counter (tail_call_cnt): for eBPF tail - * calls we need to reset the counter to 0. It's done in two - * instructions, resetting RAX register to 0, and moving it - * to the counter location. - */ - - /* xor eax, eax */ - EMIT2(0x31, 0xc0); - /* mov qword ptr [rbp+32], rax */ - EMIT4(0x48, 0x89, 0x45, 32); - + /* zero init tail_call_cnt */ + EMIT2(0x6a, 0x00); BUILD_BUG_ON(cnt != PROLOGUE_SIZE); } - *pprog = prog; } @@ -281,13 +254,13 @@ static void emit_bpf_tail_call(u8 **pprog) * if (tail_call_cnt > MAX_TAIL_CALL_CNT) * goto out; */ - EMIT2_off32(0x8B, 0x85, 36); /* mov eax, dword ptr [rbp + 36] */ + EMIT2_off32(0x8B, 0x85, -36 - MAX_BPF_STACK); /* mov eax, dword ptr [rbp - 548] */ EMIT3(0x83, 0xF8, MAX_TAIL_CALL_CNT); /* cmp eax, MAX_TAIL_CALL_CNT */ #define OFFSET2 (30 + RETPOLINE_RAX_BPF_JIT_SIZE) EMIT2(X86_JA, OFFSET2); /* ja out */ label2 = cnt; EMIT3(0x83, 0xC0, 0x01); /* add eax, 1 */ - EMIT2_off32(0x89, 0x85, 36); /* mov dword ptr [rbp + 36], eax */ + EMIT2_off32(0x89, 0x85, -36 - MAX_BPF_STACK); /* mov dword ptr [rbp -548], eax */ /* prog = array->ptrs[index]; */ EMIT4_off32(0x48, 0x8B, 0x84, 0xD6, /* mov rax, [rsi + rdx * 8 + offsetof(...)] */ @@ -1036,19 +1009,14 @@ emit_jmp: seen_exit = true; /* Update cleanup_addr */ ctx->cleanup_addr = proglen; - /* mov rbx, qword ptr [rbp+0] */ - EMIT4(0x48, 0x8B, 0x5D, 0); - /* mov r13, qword ptr [rbp+8] */ - EMIT4(0x4C, 0x8B, 0x6D, 8); - /* mov r14, qword ptr [rbp+16] */ - EMIT4(0x4C, 0x8B, 0x75, 16); - /* mov r15, qword ptr [rbp+24] */ - EMIT4(0x4C, 0x8B, 0x7D, 24); - - /* add rbp, AUX_STACK_SPACE */ - EMIT4(0x48, 0x83, 0xC5, AUX_STACK_SPACE); - EMIT1(0xC9); /* leave */ - EMIT1(0xC3); /* ret */ + if (!bpf_prog_was_classic(bpf_prog)) + EMIT1(0x5B); /* get rid of tail_call_cnt */ + EMIT2(0x41, 0x5F); /* pop r15 */ + EMIT2(0x41, 0x5E); /* pop r14 */ + EMIT2(0x41, 0x5D); /* pop r13 */ + EMIT1(0x5B); /* pop rbx */ + EMIT1(0xC9); /* leave */ + EMIT1(0xC3); /* ret */ break; default: diff --git a/arch/x86/platform/efi/quirks.c b/arch/x86/platform/efi/quirks.c index 632b83885867..3b9fd679cea9 100644 --- a/arch/x86/platform/efi/quirks.c +++ b/arch/x86/platform/efi/quirks.c @@ -728,7 +728,7 @@ void efi_recover_from_page_fault(unsigned long phys_addr) * Address range 0x0000 - 0x0fff is always mapped in the efi_pgd, so * page faulting on these addresses isn't expected. */ - if (phys_addr >= 0x0000 && phys_addr <= 0x0fff) + if (phys_addr <= 0x0fff) return; /* diff --git a/arch/x86/platform/geode/alix.c b/arch/x86/platform/geode/alix.c index 1865c196f136..8d4daca81eda 100644 --- a/arch/x86/platform/geode/alix.c +++ b/arch/x86/platform/geode/alix.c @@ -1,9 +1,9 @@ +// SPDX-License-Identifier: GPL-2.0-only /* * System Specific setup for PCEngines ALIX. * At the moment this means setup of GPIO control of LEDs * on Alix.2/3/6 boards. * - * * Copyright (C) 2008 Constantin Baranov <const@mimas.ru> * Copyright (C) 2011 Ed Wildgoose <kernel@wildgooses.com> * and Philip Prindeville <philipp@redfish-solutions.com> @@ -11,10 +11,6 @@ * TODO: There are large similarities with leds-net5501.c * by Alessandro Zummo <a.zummo@towertech.it> * In the future leds-net5501.c should be migrated over to platform - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License version 2 - * as published by the Free Software Foundation. */ #include <linux/kernel.h> diff --git a/arch/x86/platform/geode/geos.c b/arch/x86/platform/geode/geos.c index 4fcdb91318a0..136974ec9a90 100644 --- a/arch/x86/platform/geode/geos.c +++ b/arch/x86/platform/geode/geos.c @@ -1,3 +1,4 @@ +// SPDX-License-Identifier: GPL-2.0-only /* * System Specific setup for Traverse Technologies GEOS. * At the moment this means setup of GPIO control of LEDs. @@ -9,10 +10,6 @@ * TODO: There are large similarities with leds-net5501.c * by Alessandro Zummo <a.zummo@towertech.it> * In the future leds-net5501.c should be migrated over to platform - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License version 2 - * as published by the Free Software Foundation. */ #include <linux/kernel.h> diff --git a/arch/x86/platform/geode/net5501.c b/arch/x86/platform/geode/net5501.c index a2f6b982a729..2c24d8d30436 100644 --- a/arch/x86/platform/geode/net5501.c +++ b/arch/x86/platform/geode/net5501.c @@ -1,19 +1,15 @@ +// SPDX-License-Identifier: GPL-2.0-only /* * System Specific setup for Soekris net5501 * At the moment this means setup of GPIO control of LEDs and buttons * on net5501 boards. * - * * Copyright (C) 2008-2009 Tower Technologies * Written by Alessandro Zummo <a.zummo@towertech.it> * * Copyright (C) 2008 Constantin Baranov <const@mimas.ru> * Copyright (C) 2011 Ed Wildgoose <kernel@wildgooses.com> * and Philip Prindeville <philipp@redfish-solutions.com> - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License version 2 - * as published by the Free Software Foundation. */ #include <linux/kernel.h> diff --git a/arch/x86/purgatory/entry64.S b/arch/x86/purgatory/entry64.S index d1a4291d3568..275a646d1048 100644 --- a/arch/x86/purgatory/entry64.S +++ b/arch/x86/purgatory/entry64.S @@ -1,3 +1,4 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ /* * Copyright (C) 2003,2004 Eric Biederman (ebiederm@xmission.com) * Copyright (C) 2014 Red Hat Inc. @@ -5,9 +6,6 @@ * Author(s): Vivek Goyal <vgoyal@redhat.com> * * This code has been taken from kexec-tools. - * - * This source code is licensed under the GNU General Public License, - * Version 2. See the file COPYING for more details. */ .text diff --git a/arch/x86/purgatory/purgatory.c b/arch/x86/purgatory/purgatory.c index 025c34ac0d84..6d8d5a34c377 100644 --- a/arch/x86/purgatory/purgatory.c +++ b/arch/x86/purgatory/purgatory.c @@ -1,3 +1,4 @@ +// SPDX-License-Identifier: GPL-2.0-only /* * purgatory: Runs between two kernels * @@ -5,9 +6,6 @@ * * Author: * Vivek Goyal <vgoyal@redhat.com> - * - * This source code is licensed under the GNU General Public License, - * Version 2. See the file COPYING for more details. */ #include <linux/bug.h> diff --git a/arch/x86/purgatory/setup-x86_64.S b/arch/x86/purgatory/setup-x86_64.S index dfae9b9e60b5..321146be741d 100644 --- a/arch/x86/purgatory/setup-x86_64.S +++ b/arch/x86/purgatory/setup-x86_64.S @@ -1,3 +1,4 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ /* * purgatory: setup code * @@ -5,9 +6,6 @@ * Copyright (C) 2014 Red Hat Inc. * * This code has been taken from kexec-tools. - * - * This source code is licensed under the GNU General Public License, - * Version 2. See the file COPYING for more details. */ #include <asm/purgatory.h> diff --git a/arch/x86/purgatory/stack.S b/arch/x86/purgatory/stack.S index 50a4147f91fb..8b1427422dfc 100644 --- a/arch/x86/purgatory/stack.S +++ b/arch/x86/purgatory/stack.S @@ -1,10 +1,8 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ /* * purgatory: stack * * Copyright (C) 2014 Red Hat Inc. - * - * This source code is licensed under the GNU General Public License, - * Version 2. See the file COPYING for more details. */ /* A stack for the loaded kernel. diff --git a/arch/x86/purgatory/string.c b/arch/x86/purgatory/string.c index 795ca4f2cb3c..01ad43873ad9 100644 --- a/arch/x86/purgatory/string.c +++ b/arch/x86/purgatory/string.c @@ -1,3 +1,4 @@ +// SPDX-License-Identifier: GPL-2.0-only /* * Simple string functions. * @@ -5,9 +6,6 @@ * * Author: * Vivek Goyal <vgoyal@redhat.com> - * - * This source code is licensed under the GNU General Public License, - * Version 2. See the file COPYING for more details. */ #include <linux/types.h> diff --git a/arch/x86/um/delay.c b/arch/x86/um/delay.c index a8fb7ca4822b..8d510ceb43fb 100644 --- a/arch/x86/um/delay.c +++ b/arch/x86/um/delay.c @@ -1,10 +1,7 @@ +// SPDX-License-Identifier: GPL-2.0-only /* * Copyright (C) 2011 Richard Weinberger <richrd@nod.at> * Mostly copied from arch/x86/lib/delay.c - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License version 2 as - * published by the Free Software Foundation. */ #include <linux/export.h> diff --git a/arch/x86/um/mem_32.c b/arch/x86/um/mem_32.c index 56c44d865f7b..19c5dbd46770 100644 --- a/arch/x86/um/mem_32.c +++ b/arch/x86/um/mem_32.c @@ -1,9 +1,6 @@ +// SPDX-License-Identifier: GPL-2.0-only /* * Copyright (C) 2011 Richard Weinberger <richrd@nod.at> - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License version 2 as - * published by the Free Software Foundation. */ #include <linux/mm.h> diff --git a/arch/x86/um/vdso/um_vdso.c b/arch/x86/um/vdso/um_vdso.c index 7c441b59d375..ac9c02b9d92c 100644 --- a/arch/x86/um/vdso/um_vdso.c +++ b/arch/x86/um/vdso/um_vdso.c @@ -1,10 +1,7 @@ +// SPDX-License-Identifier: GPL-2.0-only /* * Copyright (C) 2011 Richard Weinberger <richrd@nod.at> * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License version 2 as - * published by the Free Software Foundation. - * * This vDSO turns all calls into a syscall so that UML can trap them. */ diff --git a/arch/x86/um/vdso/vma.c b/arch/x86/um/vdso/vma.c index 6be22f991b59..9e7c4aba6c3a 100644 --- a/arch/x86/um/vdso/vma.c +++ b/arch/x86/um/vdso/vma.c @@ -1,9 +1,6 @@ +// SPDX-License-Identifier: GPL-2.0-only /* * Copyright (C) 2011 Richard Weinberger <richrd@nod.at> - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License version 2 as - * published by the Free Software Foundation. */ #include <linux/slab.h> |