summaryrefslogtreecommitdiff
path: root/include/asm-generic
diff options
context:
space:
mode:
Diffstat (limited to 'include/asm-generic')
-rw-r--r--include/asm-generic/Kbuild1
-rw-r--r--include/asm-generic/early_ioremap.h2
-rw-r--r--include/asm-generic/fprobe.h46
-rw-r--r--include/asm-generic/hugetlb.h5
-rw-r--r--include/asm-generic/hyperv-tlfs.h874
-rw-r--r--include/asm-generic/io.h10
-rw-r--r--include/asm-generic/iomap.h36
-rw-r--r--include/asm-generic/mcs_spinlock.h6
-rw-r--r--include/asm-generic/memory_model.h15
-rw-r--r--include/asm-generic/module.h8
-rw-r--r--include/asm-generic/mshyperv.h89
-rw-r--r--include/asm-generic/percpu.h39
-rw-r--r--include/asm-generic/pgalloc.h94
-rw-r--r--include/asm-generic/rqspinlock.h250
-rw-r--r--include/asm-generic/rwonce.h10
-rw-r--r--include/asm-generic/sections.h2
-rw-r--r--include/asm-generic/simd.h9
-rw-r--r--include/asm-generic/syscall.h32
-rw-r--r--include/asm-generic/tlb.h109
-rw-r--r--include/asm-generic/vdso/vsyscall.h27
-rw-r--r--include/asm-generic/vmlinux.lds.h57
21 files changed, 667 insertions, 1054 deletions
diff --git a/include/asm-generic/Kbuild b/include/asm-generic/Kbuild
index 1b43c3a77012..8675b7b4ad23 100644
--- a/include/asm-generic/Kbuild
+++ b/include/asm-generic/Kbuild
@@ -45,6 +45,7 @@ mandatory-y += pci.h
mandatory-y += percpu.h
mandatory-y += pgalloc.h
mandatory-y += preempt.h
+mandatory-y += rqspinlock.h
mandatory-y += runtime-const.h
mandatory-y += rwonce.h
mandatory-y += sections.h
diff --git a/include/asm-generic/early_ioremap.h b/include/asm-generic/early_ioremap.h
index 9d0479f50f97..5db59a1efb65 100644
--- a/include/asm-generic/early_ioremap.h
+++ b/include/asm-generic/early_ioremap.h
@@ -35,7 +35,7 @@ extern void early_ioremap_reset(void);
/*
* Early copy from unmapped memory to kernel mapped memory.
*/
-extern void copy_from_early_mem(void *dest, phys_addr_t src,
+extern int copy_from_early_mem(void *dest, phys_addr_t src,
unsigned long size);
#else
diff --git a/include/asm-generic/fprobe.h b/include/asm-generic/fprobe.h
new file mode 100644
index 000000000000..8659a4dc6eb6
--- /dev/null
+++ b/include/asm-generic/fprobe.h
@@ -0,0 +1,46 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+/*
+ * Generic arch dependent fprobe macros.
+ */
+#ifndef __ASM_GENERIC_FPROBE_H__
+#define __ASM_GENERIC_FPROBE_H__
+
+#include <linux/bits.h>
+
+#ifdef CONFIG_64BIT
+/*
+ * Encoding the size and the address of fprobe into one 64bit entry.
+ * The 32bit architectures should use 2 entries to store those info.
+ */
+
+#define ARCH_DEFINE_ENCODE_FPROBE_HEADER
+
+#define FPROBE_HEADER_MSB_SIZE_SHIFT (BITS_PER_LONG - FPROBE_DATA_SIZE_BITS)
+#define FPROBE_HEADER_MSB_MASK \
+ GENMASK(FPROBE_HEADER_MSB_SIZE_SHIFT - 1, 0)
+
+/*
+ * By default, this expects the MSBs in the address of kprobe is 0xf.
+ * If any arch needs another fixed pattern (e.g. s390 is zero filled),
+ * override this.
+ */
+#define FPROBE_HEADER_MSB_PATTERN \
+ GENMASK(BITS_PER_LONG - 1, FPROBE_HEADER_MSB_SIZE_SHIFT)
+
+#define arch_fprobe_header_encodable(fp) \
+ (((unsigned long)(fp) & ~FPROBE_HEADER_MSB_MASK) == \
+ FPROBE_HEADER_MSB_PATTERN)
+
+#define arch_encode_fprobe_header(fp, size) \
+ (((unsigned long)(fp) & FPROBE_HEADER_MSB_MASK) | \
+ ((unsigned long)(size) << FPROBE_HEADER_MSB_SIZE_SHIFT))
+
+#define arch_decode_fprobe_header_size(val) \
+ ((unsigned long)(val) >> FPROBE_HEADER_MSB_SIZE_SHIFT)
+
+#define arch_decode_fprobe_header_fp(val) \
+ ((struct fprobe *)(((unsigned long)(val) & FPROBE_HEADER_MSB_MASK) | \
+ FPROBE_HEADER_MSB_PATTERN))
+#endif /* CONFIG_64BIT */
+
+#endif /* __ASM_GENERIC_FPROBE_H__ */
diff --git a/include/asm-generic/hugetlb.h b/include/asm-generic/hugetlb.h
index 2afc95bf1655..3e0a8fe9b108 100644
--- a/include/asm-generic/hugetlb.h
+++ b/include/asm-generic/hugetlb.h
@@ -5,11 +5,6 @@
#include <linux/swap.h>
#include <linux/swapops.h>
-static inline pte_t mk_huge_pte(struct page *page, pgprot_t pgprot)
-{
- return mk_pte(page, pgprot);
-}
-
static inline unsigned long huge_pte_write(pte_t pte)
{
return pte_write(pte);
diff --git a/include/asm-generic/hyperv-tlfs.h b/include/asm-generic/hyperv-tlfs.h
deleted file mode 100644
index 814207e7c37f..000000000000
--- a/include/asm-generic/hyperv-tlfs.h
+++ /dev/null
@@ -1,874 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 */
-
-/*
- * This file contains definitions from Hyper-V Hypervisor Top-Level Functional
- * Specification (TLFS):
- * https://docs.microsoft.com/en-us/virtualization/hyper-v-on-windows/reference/tlfs
- */
-
-#ifndef _ASM_GENERIC_HYPERV_TLFS_H
-#define _ASM_GENERIC_HYPERV_TLFS_H
-
-#include <linux/types.h>
-#include <linux/bits.h>
-#include <linux/time64.h>
-
-/*
- * While not explicitly listed in the TLFS, Hyper-V always runs with a page size
- * of 4096. These definitions are used when communicating with Hyper-V using
- * guest physical pages and guest physical page addresses, since the guest page
- * size may not be 4096 on all architectures.
- */
-#define HV_HYP_PAGE_SHIFT 12
-#define HV_HYP_PAGE_SIZE BIT(HV_HYP_PAGE_SHIFT)
-#define HV_HYP_PAGE_MASK (~(HV_HYP_PAGE_SIZE - 1))
-
-/*
- * Hyper-V provides two categories of flags relevant to guest VMs. The
- * "Features" category indicates specific functionality that is available
- * to guests on this particular instance of Hyper-V. The "Features"
- * are presented in four groups, each of which is 32 bits. The group A
- * and B definitions are common across architectures and are listed here.
- * However, not all flags are relevant on all architectures.
- *
- * Groups C and D vary across architectures and are listed in the
- * architecture specific portion of hyperv-tlfs.h. Some of these flags exist
- * on multiple architectures, but the bit positions are different so they
- * cannot appear in the generic portion of hyperv-tlfs.h.
- *
- * The "Enlightenments" category provides recommendations on whether to use
- * specific enlightenments that are available. The Enlighenments are a single
- * group of 32 bits, but they vary across architectures and are listed in
- * the architecture specific portion of hyperv-tlfs.h.
- */
-
-/*
- * Group A Features.
- */
-
-/* VP Runtime register available */
-#define HV_MSR_VP_RUNTIME_AVAILABLE BIT(0)
-/* Partition Reference Counter available*/
-#define HV_MSR_TIME_REF_COUNT_AVAILABLE BIT(1)
-/* Basic SynIC register available */
-#define HV_MSR_SYNIC_AVAILABLE BIT(2)
-/* Synthetic Timer registers available */
-#define HV_MSR_SYNTIMER_AVAILABLE BIT(3)
-/* Virtual APIC assist and VP assist page registers available */
-#define HV_MSR_APIC_ACCESS_AVAILABLE BIT(4)
-/* Hypercall and Guest OS ID registers available*/
-#define HV_MSR_HYPERCALL_AVAILABLE BIT(5)
-/* Access virtual processor index register available*/
-#define HV_MSR_VP_INDEX_AVAILABLE BIT(6)
-/* Virtual system reset register available*/
-#define HV_MSR_RESET_AVAILABLE BIT(7)
-/* Access statistics page registers available */
-#define HV_MSR_STAT_PAGES_AVAILABLE BIT(8)
-/* Partition reference TSC register is available */
-#define HV_MSR_REFERENCE_TSC_AVAILABLE BIT(9)
-/* Partition Guest IDLE register is available */
-#define HV_MSR_GUEST_IDLE_AVAILABLE BIT(10)
-/* Partition local APIC and TSC frequency registers available */
-#define HV_ACCESS_FREQUENCY_MSRS BIT(11)
-/* AccessReenlightenmentControls privilege */
-#define HV_ACCESS_REENLIGHTENMENT BIT(13)
-/* AccessTscInvariantControls privilege */
-#define HV_ACCESS_TSC_INVARIANT BIT(15)
-
-/*
- * Group B features.
- */
-#define HV_CREATE_PARTITIONS BIT(0)
-#define HV_ACCESS_PARTITION_ID BIT(1)
-#define HV_ACCESS_MEMORY_POOL BIT(2)
-#define HV_ADJUST_MESSAGE_BUFFERS BIT(3)
-#define HV_POST_MESSAGES BIT(4)
-#define HV_SIGNAL_EVENTS BIT(5)
-#define HV_CREATE_PORT BIT(6)
-#define HV_CONNECT_PORT BIT(7)
-#define HV_ACCESS_STATS BIT(8)
-#define HV_DEBUGGING BIT(11)
-#define HV_CPU_MANAGEMENT BIT(12)
-#define HV_ENABLE_EXTENDED_HYPERCALLS BIT(20)
-#define HV_ISOLATION BIT(22)
-
-/*
- * TSC page layout.
- */
-struct ms_hyperv_tsc_page {
- volatile u32 tsc_sequence;
- u32 reserved1;
- volatile u64 tsc_scale;
- volatile s64 tsc_offset;
-} __packed;
-
-union hv_reference_tsc_msr {
- u64 as_uint64;
- struct {
- u64 enable:1;
- u64 reserved:11;
- u64 pfn:52;
- } __packed;
-};
-
-/*
- * The guest OS needs to register the guest ID with the hypervisor.
- * The guest ID is a 64 bit entity and the structure of this ID is
- * specified in the Hyper-V specification:
- *
- * msdn.microsoft.com/en-us/library/windows/hardware/ff542653%28v=vs.85%29.aspx
- *
- * While the current guideline does not specify how Linux guest ID(s)
- * need to be generated, our plan is to publish the guidelines for
- * Linux and other guest operating systems that currently are hosted
- * on Hyper-V. The implementation here conforms to this yet
- * unpublished guidelines.
- *
- *
- * Bit(s)
- * 63 - Indicates if the OS is Open Source or not; 1 is Open Source
- * 62:56 - Os Type; Linux is 0x100
- * 55:48 - Distro specific identification
- * 47:16 - Linux kernel version number
- * 15:0 - Distro specific identification
- *
- *
- */
-
-#define HV_LINUX_VENDOR_ID 0x8100
-
-/*
- * Crash notification flags.
- */
-#define HV_CRASH_CTL_CRASH_NOTIFY_MSG BIT_ULL(62)
-#define HV_CRASH_CTL_CRASH_NOTIFY BIT_ULL(63)
-
-/* Declare the various hypercall operations. */
-#define HVCALL_FLUSH_VIRTUAL_ADDRESS_SPACE 0x0002
-#define HVCALL_FLUSH_VIRTUAL_ADDRESS_LIST 0x0003
-#define HVCALL_ENABLE_VP_VTL 0x000f
-#define HVCALL_NOTIFY_LONG_SPIN_WAIT 0x0008
-#define HVCALL_SEND_IPI 0x000b
-#define HVCALL_FLUSH_VIRTUAL_ADDRESS_SPACE_EX 0x0013
-#define HVCALL_FLUSH_VIRTUAL_ADDRESS_LIST_EX 0x0014
-#define HVCALL_SEND_IPI_EX 0x0015
-#define HVCALL_GET_PARTITION_ID 0x0046
-#define HVCALL_DEPOSIT_MEMORY 0x0048
-#define HVCALL_CREATE_VP 0x004e
-#define HVCALL_GET_VP_REGISTERS 0x0050
-#define HVCALL_SET_VP_REGISTERS 0x0051
-#define HVCALL_POST_MESSAGE 0x005c
-#define HVCALL_SIGNAL_EVENT 0x005d
-#define HVCALL_POST_DEBUG_DATA 0x0069
-#define HVCALL_RETRIEVE_DEBUG_DATA 0x006a
-#define HVCALL_RESET_DEBUG_SESSION 0x006b
-#define HVCALL_ADD_LOGICAL_PROCESSOR 0x0076
-#define HVCALL_MAP_DEVICE_INTERRUPT 0x007c
-#define HVCALL_UNMAP_DEVICE_INTERRUPT 0x007d
-#define HVCALL_RETARGET_INTERRUPT 0x007e
-#define HVCALL_START_VP 0x0099
-#define HVCALL_GET_VP_ID_FROM_APIC_ID 0x009a
-#define HVCALL_FLUSH_GUEST_PHYSICAL_ADDRESS_SPACE 0x00af
-#define HVCALL_FLUSH_GUEST_PHYSICAL_ADDRESS_LIST 0x00b0
-#define HVCALL_MODIFY_SPARSE_GPA_PAGE_HOST_VISIBILITY 0x00db
-#define HVCALL_MMIO_READ 0x0106
-#define HVCALL_MMIO_WRITE 0x0107
-
-/* Extended hypercalls */
-#define HV_EXT_CALL_QUERY_CAPABILITIES 0x8001
-#define HV_EXT_CALL_MEMORY_HEAT_HINT 0x8003
-
-#define HV_FLUSH_ALL_PROCESSORS BIT(0)
-#define HV_FLUSH_ALL_VIRTUAL_ADDRESS_SPACES BIT(1)
-#define HV_FLUSH_NON_GLOBAL_MAPPINGS_ONLY BIT(2)
-#define HV_FLUSH_USE_EXTENDED_RANGE_FORMAT BIT(3)
-
-/* Extended capability bits */
-#define HV_EXT_CAPABILITY_MEMORY_COLD_DISCARD_HINT BIT(8)
-
-enum HV_GENERIC_SET_FORMAT {
- HV_GENERIC_SET_SPARSE_4K,
- HV_GENERIC_SET_ALL,
-};
-
-#define HV_PARTITION_ID_SELF ((u64)-1)
-#define HV_VP_INDEX_SELF ((u32)-2)
-
-#define HV_HYPERCALL_RESULT_MASK GENMASK_ULL(15, 0)
-#define HV_HYPERCALL_FAST_BIT BIT(16)
-#define HV_HYPERCALL_VARHEAD_OFFSET 17
-#define HV_HYPERCALL_VARHEAD_MASK GENMASK_ULL(26, 17)
-#define HV_HYPERCALL_RSVD0_MASK GENMASK_ULL(31, 27)
-#define HV_HYPERCALL_NESTED BIT_ULL(31)
-#define HV_HYPERCALL_REP_COMP_OFFSET 32
-#define HV_HYPERCALL_REP_COMP_1 BIT_ULL(32)
-#define HV_HYPERCALL_REP_COMP_MASK GENMASK_ULL(43, 32)
-#define HV_HYPERCALL_RSVD1_MASK GENMASK_ULL(47, 44)
-#define HV_HYPERCALL_REP_START_OFFSET 48
-#define HV_HYPERCALL_REP_START_MASK GENMASK_ULL(59, 48)
-#define HV_HYPERCALL_RSVD2_MASK GENMASK_ULL(63, 60)
-#define HV_HYPERCALL_RSVD_MASK (HV_HYPERCALL_RSVD0_MASK | \
- HV_HYPERCALL_RSVD1_MASK | \
- HV_HYPERCALL_RSVD2_MASK)
-
-/* hypercall status code */
-#define HV_STATUS_SUCCESS 0
-#define HV_STATUS_INVALID_HYPERCALL_CODE 2
-#define HV_STATUS_INVALID_HYPERCALL_INPUT 3
-#define HV_STATUS_INVALID_ALIGNMENT 4
-#define HV_STATUS_INVALID_PARAMETER 5
-#define HV_STATUS_ACCESS_DENIED 6
-#define HV_STATUS_OPERATION_DENIED 8
-#define HV_STATUS_INSUFFICIENT_MEMORY 11
-#define HV_STATUS_INVALID_PORT_ID 17
-#define HV_STATUS_INVALID_CONNECTION_ID 18
-#define HV_STATUS_INSUFFICIENT_BUFFERS 19
-#define HV_STATUS_TIME_OUT 120
-#define HV_STATUS_VTL_ALREADY_ENABLED 134
-
-/*
- * The Hyper-V TimeRefCount register and the TSC
- * page provide a guest VM clock with 100ns tick rate
- */
-#define HV_CLOCK_HZ (NSEC_PER_SEC/100)
-
-/* Define the number of synthetic interrupt sources. */
-#define HV_SYNIC_SINT_COUNT (16)
-/* Define the expected SynIC version. */
-#define HV_SYNIC_VERSION_1 (0x1)
-/* Valid SynIC vectors are 16-255. */
-#define HV_SYNIC_FIRST_VALID_VECTOR (16)
-
-#define HV_SYNIC_CONTROL_ENABLE (1ULL << 0)
-#define HV_SYNIC_SIMP_ENABLE (1ULL << 0)
-#define HV_SYNIC_SIEFP_ENABLE (1ULL << 0)
-#define HV_SYNIC_SINT_MASKED (1ULL << 16)
-#define HV_SYNIC_SINT_AUTO_EOI (1ULL << 17)
-#define HV_SYNIC_SINT_VECTOR_MASK (0xFF)
-
-#define HV_SYNIC_STIMER_COUNT (4)
-
-/* Define synthetic interrupt controller message constants. */
-#define HV_MESSAGE_SIZE (256)
-#define HV_MESSAGE_PAYLOAD_BYTE_COUNT (240)
-#define HV_MESSAGE_PAYLOAD_QWORD_COUNT (30)
-
-/*
- * Define hypervisor message types. Some of the message types
- * are x86/x64 specific, but there's no good way to separate
- * them out into the arch-specific version of hyperv-tlfs.h
- * because C doesn't provide a way to extend enum types.
- * Keeping them all in the arch neutral hyperv-tlfs.h seems
- * the least messy compromise.
- */
-enum hv_message_type {
- HVMSG_NONE = 0x00000000,
-
- /* Memory access messages. */
- HVMSG_UNMAPPED_GPA = 0x80000000,
- HVMSG_GPA_INTERCEPT = 0x80000001,
-
- /* Timer notification messages. */
- HVMSG_TIMER_EXPIRED = 0x80000010,
-
- /* Error messages. */
- HVMSG_INVALID_VP_REGISTER_VALUE = 0x80000020,
- HVMSG_UNRECOVERABLE_EXCEPTION = 0x80000021,
- HVMSG_UNSUPPORTED_FEATURE = 0x80000022,
-
- /* Trace buffer complete messages. */
- HVMSG_EVENTLOG_BUFFERCOMPLETE = 0x80000040,
-
- /* Platform-specific processor intercept messages. */
- HVMSG_X64_IOPORT_INTERCEPT = 0x80010000,
- HVMSG_X64_MSR_INTERCEPT = 0x80010001,
- HVMSG_X64_CPUID_INTERCEPT = 0x80010002,
- HVMSG_X64_EXCEPTION_INTERCEPT = 0x80010003,
- HVMSG_X64_APIC_EOI = 0x80010004,
- HVMSG_X64_LEGACY_FP_ERROR = 0x80010005
-};
-
-/* Define synthetic interrupt controller message flags. */
-union hv_message_flags {
- __u8 asu8;
- struct {
- __u8 msg_pending:1;
- __u8 reserved:7;
- } __packed;
-};
-
-/* Define port identifier type. */
-union hv_port_id {
- __u32 asu32;
- struct {
- __u32 id:24;
- __u32 reserved:8;
- } __packed u;
-};
-
-/* Define synthetic interrupt controller message header. */
-struct hv_message_header {
- __u32 message_type;
- __u8 payload_size;
- union hv_message_flags message_flags;
- __u8 reserved[2];
- union {
- __u64 sender;
- union hv_port_id port;
- };
-} __packed;
-
-/* Define synthetic interrupt controller message format. */
-struct hv_message {
- struct hv_message_header header;
- union {
- __u64 payload[HV_MESSAGE_PAYLOAD_QWORD_COUNT];
- } u;
-} __packed;
-
-/* Define the synthetic interrupt message page layout. */
-struct hv_message_page {
- struct hv_message sint_message[HV_SYNIC_SINT_COUNT];
-} __packed;
-
-/* Define timer message payload structure. */
-struct hv_timer_message_payload {
- __u32 timer_index;
- __u32 reserved;
- __u64 expiration_time; /* When the timer expired */
- __u64 delivery_time; /* When the message was delivered */
-} __packed;
-
-
-/* Define synthetic interrupt controller flag constants. */
-#define HV_EVENT_FLAGS_COUNT (256 * 8)
-#define HV_EVENT_FLAGS_LONG_COUNT (256 / sizeof(unsigned long))
-
-/*
- * Synthetic timer configuration.
- */
-union hv_stimer_config {
- u64 as_uint64;
- struct {
- u64 enable:1;
- u64 periodic:1;
- u64 lazy:1;
- u64 auto_enable:1;
- u64 apic_vector:8;
- u64 direct_mode:1;
- u64 reserved_z0:3;
- u64 sintx:4;
- u64 reserved_z1:44;
- } __packed;
-};
-
-
-/* Define the synthetic interrupt controller event flags format. */
-union hv_synic_event_flags {
- unsigned long flags[HV_EVENT_FLAGS_LONG_COUNT];
-};
-
-/* Define SynIC control register. */
-union hv_synic_scontrol {
- u64 as_uint64;
- struct {
- u64 enable:1;
- u64 reserved:63;
- } __packed;
-};
-
-/* Define synthetic interrupt source. */
-union hv_synic_sint {
- u64 as_uint64;
- struct {
- u64 vector:8;
- u64 reserved1:8;
- u64 masked:1;
- u64 auto_eoi:1;
- u64 polling:1;
- u64 reserved2:45;
- } __packed;
-};
-
-/* Define the format of the SIMP register */
-union hv_synic_simp {
- u64 as_uint64;
- struct {
- u64 simp_enabled:1;
- u64 preserved:11;
- u64 base_simp_gpa:52;
- } __packed;
-};
-
-/* Define the format of the SIEFP register */
-union hv_synic_siefp {
- u64 as_uint64;
- struct {
- u64 siefp_enabled:1;
- u64 preserved:11;
- u64 base_siefp_gpa:52;
- } __packed;
-};
-
-struct hv_vpset {
- u64 format;
- u64 valid_bank_mask;
- u64 bank_contents[];
-} __packed;
-
-/* The maximum number of sparse vCPU banks which can be encoded by 'struct hv_vpset' */
-#define HV_MAX_SPARSE_VCPU_BANKS (64)
-/* The number of vCPUs in one sparse bank */
-#define HV_VCPUS_PER_SPARSE_BANK (64)
-
-/* HvCallSendSyntheticClusterIpi hypercall */
-struct hv_send_ipi {
- u32 vector;
- u32 reserved;
- u64 cpu_mask;
-} __packed;
-
-/* HvCallSendSyntheticClusterIpiEx hypercall */
-struct hv_send_ipi_ex {
- u32 vector;
- u32 reserved;
- struct hv_vpset vp_set;
-} __packed;
-
-/* HvFlushGuestPhysicalAddressSpace hypercalls */
-struct hv_guest_mapping_flush {
- u64 address_space;
- u64 flags;
-} __packed;
-
-/*
- * HV_MAX_FLUSH_PAGES = "additional_pages" + 1. It's limited
- * by the bitwidth of "additional_pages" in union hv_gpa_page_range.
- */
-#define HV_MAX_FLUSH_PAGES (2048)
-#define HV_GPA_PAGE_RANGE_PAGE_SIZE_2MB 0
-#define HV_GPA_PAGE_RANGE_PAGE_SIZE_1GB 1
-
-/* HvFlushGuestPhysicalAddressList, HvExtCallMemoryHeatHint hypercall */
-union hv_gpa_page_range {
- u64 address_space;
- struct {
- u64 additional_pages:11;
- u64 largepage:1;
- u64 basepfn:52;
- } page;
- struct {
- u64 reserved:12;
- u64 page_size:1;
- u64 reserved1:8;
- u64 base_large_pfn:43;
- };
-};
-
-/*
- * All input flush parameters should be in single page. The max flush
- * count is equal with how many entries of union hv_gpa_page_range can
- * be populated into the input parameter page.
- */
-#define HV_MAX_FLUSH_REP_COUNT ((HV_HYP_PAGE_SIZE - 2 * sizeof(u64)) / \
- sizeof(union hv_gpa_page_range))
-
-struct hv_guest_mapping_flush_list {
- u64 address_space;
- u64 flags;
- union hv_gpa_page_range gpa_list[HV_MAX_FLUSH_REP_COUNT];
-};
-
-/* HvFlushVirtualAddressSpace, HvFlushVirtualAddressList hypercalls */
-struct hv_tlb_flush {
- u64 address_space;
- u64 flags;
- u64 processor_mask;
- u64 gva_list[];
-} __packed;
-
-/* HvFlushVirtualAddressSpaceEx, HvFlushVirtualAddressListEx hypercalls */
-struct hv_tlb_flush_ex {
- u64 address_space;
- u64 flags;
- struct hv_vpset hv_vp_set;
- u64 gva_list[];
-} __packed;
-
-/* HvGetPartitionId hypercall (output only) */
-struct hv_get_partition_id {
- u64 partition_id;
-} __packed;
-
-/* HvDepositMemory hypercall */
-struct hv_deposit_memory {
- u64 partition_id;
- u64 gpa_page_list[];
-} __packed;
-
-struct hv_proximity_domain_flags {
- u32 proximity_preferred : 1;
- u32 reserved : 30;
- u32 proximity_info_valid : 1;
-} __packed;
-
-struct hv_proximity_domain_info {
- u32 domain_id;
- struct hv_proximity_domain_flags flags;
-} __packed;
-
-struct hv_lp_startup_status {
- u64 hv_status;
- u64 substatus1;
- u64 substatus2;
- u64 substatus3;
- u64 substatus4;
- u64 substatus5;
- u64 substatus6;
-} __packed;
-
-/* HvAddLogicalProcessor hypercall */
-struct hv_input_add_logical_processor {
- u32 lp_index;
- u32 apic_id;
- struct hv_proximity_domain_info proximity_domain_info;
-} __packed;
-
-struct hv_output_add_logical_processor {
- struct hv_lp_startup_status startup_status;
-} __packed;
-
-enum HV_SUBNODE_TYPE
-{
- HvSubnodeAny = 0,
- HvSubnodeSocket = 1,
- HvSubnodeAmdNode = 2,
- HvSubnodeL3 = 3,
- HvSubnodeCount = 4,
- HvSubnodeInvalid = -1
-};
-
-/* HvCreateVp hypercall */
-struct hv_create_vp {
- u64 partition_id;
- u32 vp_index;
- u8 padding[3];
- u8 subnode_type;
- u64 subnode_id;
- struct hv_proximity_domain_info proximity_domain_info;
- u64 flags;
-} __packed;
-
-enum hv_interrupt_source {
- HV_INTERRUPT_SOURCE_MSI = 1, /* MSI and MSI-X */
- HV_INTERRUPT_SOURCE_IOAPIC,
-};
-
-union hv_ioapic_rte {
- u64 as_uint64;
-
- struct {
- u32 vector:8;
- u32 delivery_mode:3;
- u32 destination_mode:1;
- u32 delivery_status:1;
- u32 interrupt_polarity:1;
- u32 remote_irr:1;
- u32 trigger_mode:1;
- u32 interrupt_mask:1;
- u32 reserved1:15;
-
- u32 reserved2:24;
- u32 destination_id:8;
- };
-
- struct {
- u32 low_uint32;
- u32 high_uint32;
- };
-} __packed;
-
-struct hv_interrupt_entry {
- u32 source;
- u32 reserved1;
- union {
- union hv_msi_entry msi_entry;
- union hv_ioapic_rte ioapic_rte;
- };
-} __packed;
-
-/*
- * flags for hv_device_interrupt_target.flags
- */
-#define HV_DEVICE_INTERRUPT_TARGET_MULTICAST 1
-#define HV_DEVICE_INTERRUPT_TARGET_PROCESSOR_SET 2
-
-struct hv_device_interrupt_target {
- u32 vector;
- u32 flags;
- union {
- u64 vp_mask;
- struct hv_vpset vp_set;
- };
-} __packed;
-
-struct hv_retarget_device_interrupt {
- u64 partition_id; /* use "self" */
- u64 device_id;
- struct hv_interrupt_entry int_entry;
- u64 reserved2;
- struct hv_device_interrupt_target int_target;
-} __packed __aligned(8);
-
-/*
- * These Hyper-V registers provide information equivalent to the CPUID
- * instruction on x86/x64.
- */
-#define HV_REGISTER_HYPERVISOR_VERSION 0x00000100 /*CPUID 0x40000002 */
-#define HV_REGISTER_FEATURES 0x00000200 /*CPUID 0x40000003 */
-#define HV_REGISTER_ENLIGHTENMENTS 0x00000201 /*CPUID 0x40000004 */
-
-/*
- * Synthetic register definitions equivalent to MSRs on x86/x64
- */
-#define HV_REGISTER_GUEST_CRASH_P0 0x00000210
-#define HV_REGISTER_GUEST_CRASH_P1 0x00000211
-#define HV_REGISTER_GUEST_CRASH_P2 0x00000212
-#define HV_REGISTER_GUEST_CRASH_P3 0x00000213
-#define HV_REGISTER_GUEST_CRASH_P4 0x00000214
-#define HV_REGISTER_GUEST_CRASH_CTL 0x00000215
-
-#define HV_REGISTER_GUEST_OS_ID 0x00090002
-#define HV_REGISTER_VP_INDEX 0x00090003
-#define HV_REGISTER_TIME_REF_COUNT 0x00090004
-#define HV_REGISTER_REFERENCE_TSC 0x00090017
-
-#define HV_REGISTER_SINT0 0x000A0000
-#define HV_REGISTER_SCONTROL 0x000A0010
-#define HV_REGISTER_SIEFP 0x000A0012
-#define HV_REGISTER_SIMP 0x000A0013
-#define HV_REGISTER_EOM 0x000A0014
-
-#define HV_REGISTER_STIMER0_CONFIG 0x000B0000
-#define HV_REGISTER_STIMER0_COUNT 0x000B0001
-
-/* HvGetVpRegisters hypercall input with variable size reg name list*/
-struct hv_get_vp_registers_input {
- struct {
- u64 partitionid;
- u32 vpindex;
- u8 inputvtl;
- u8 padding[3];
- } header;
- struct input {
- u32 name0;
- u32 name1;
- } element[];
-} __packed;
-
-/* HvGetVpRegisters returns an array of these output elements */
-struct hv_get_vp_registers_output {
- union {
- struct {
- u32 a;
- u32 b;
- u32 c;
- u32 d;
- } as32 __packed;
- struct {
- u64 low;
- u64 high;
- } as64 __packed;
- };
-};
-
-/* HvSetVpRegisters hypercall with variable size reg name/value list*/
-struct hv_set_vp_registers_input {
- struct {
- u64 partitionid;
- u32 vpindex;
- u8 inputvtl;
- u8 padding[3];
- } header;
- struct {
- u32 name;
- u32 padding1;
- u64 padding2;
- u64 valuelow;
- u64 valuehigh;
- } element[];
-} __packed;
-
-enum hv_device_type {
- HV_DEVICE_TYPE_LOGICAL = 0,
- HV_DEVICE_TYPE_PCI = 1,
- HV_DEVICE_TYPE_IOAPIC = 2,
- HV_DEVICE_TYPE_ACPI = 3,
-};
-
-typedef u16 hv_pci_rid;
-typedef u16 hv_pci_segment;
-typedef u64 hv_logical_device_id;
-union hv_pci_bdf {
- u16 as_uint16;
-
- struct {
- u8 function:3;
- u8 device:5;
- u8 bus;
- };
-} __packed;
-
-union hv_pci_bus_range {
- u16 as_uint16;
-
- struct {
- u8 subordinate_bus;
- u8 secondary_bus;
- };
-} __packed;
-
-union hv_device_id {
- u64 as_uint64;
-
- struct {
- u64 reserved0:62;
- u64 device_type:2;
- };
-
- /* HV_DEVICE_TYPE_LOGICAL */
- struct {
- u64 id:62;
- u64 device_type:2;
- } logical;
-
- /* HV_DEVICE_TYPE_PCI */
- struct {
- union {
- hv_pci_rid rid;
- union hv_pci_bdf bdf;
- };
-
- hv_pci_segment segment;
- union hv_pci_bus_range shadow_bus_range;
-
- u16 phantom_function_bits:2;
- u16 source_shadow:1;
-
- u16 rsvdz0:11;
- u16 device_type:2;
- } pci;
-
- /* HV_DEVICE_TYPE_IOAPIC */
- struct {
- u8 ioapic_id;
- u8 rsvdz0;
- u16 rsvdz1;
- u16 rsvdz2;
-
- u16 rsvdz3:14;
- u16 device_type:2;
- } ioapic;
-
- /* HV_DEVICE_TYPE_ACPI */
- struct {
- u32 input_mapping_base;
- u32 input_mapping_count:30;
- u32 device_type:2;
- } acpi;
-} __packed;
-
-enum hv_interrupt_trigger_mode {
- HV_INTERRUPT_TRIGGER_MODE_EDGE = 0,
- HV_INTERRUPT_TRIGGER_MODE_LEVEL = 1,
-};
-
-struct hv_device_interrupt_descriptor {
- u32 interrupt_type;
- u32 trigger_mode;
- u32 vector_count;
- u32 reserved;
- struct hv_device_interrupt_target target;
-} __packed;
-
-struct hv_input_map_device_interrupt {
- u64 partition_id;
- u64 device_id;
- u64 flags;
- struct hv_interrupt_entry logical_interrupt_entry;
- struct hv_device_interrupt_descriptor interrupt_descriptor;
-} __packed;
-
-struct hv_output_map_device_interrupt {
- struct hv_interrupt_entry interrupt_entry;
-} __packed;
-
-struct hv_input_unmap_device_interrupt {
- u64 partition_id;
- u64 device_id;
- struct hv_interrupt_entry interrupt_entry;
-} __packed;
-
-#define HV_SOURCE_SHADOW_NONE 0x0
-#define HV_SOURCE_SHADOW_BRIDGE_BUS_RANGE 0x1
-
-/*
- * Version info reported by hypervisor
- */
-union hv_hypervisor_version_info {
- struct {
- u32 build_number;
-
- u32 minor_version : 16;
- u32 major_version : 16;
-
- u32 service_pack;
-
- u32 service_number : 24;
- u32 service_branch : 8;
- };
- struct {
- u32 eax;
- u32 ebx;
- u32 ecx;
- u32 edx;
- };
-};
-
-/*
- * The whole argument should fit in a page to be able to pass to the hypervisor
- * in one hypercall.
- */
-#define HV_MEMORY_HINT_MAX_GPA_PAGE_RANGES \
- ((HV_HYP_PAGE_SIZE - sizeof(struct hv_memory_hint)) / \
- sizeof(union hv_gpa_page_range))
-
-/* HvExtCallMemoryHeatHint hypercall */
-#define HV_EXT_MEMORY_HEAT_HINT_TYPE_COLD_DISCARD 2
-struct hv_memory_hint {
- u64 type:2;
- u64 reserved:62;
- union hv_gpa_page_range ranges[];
-} __packed;
-
-/* Data structures for HVCALL_MMIO_READ and HVCALL_MMIO_WRITE */
-#define HV_HYPERCALL_MMIO_MAX_DATA_LENGTH 64
-
-struct hv_mmio_read_input {
- u64 gpa;
- u32 size;
- u32 reserved;
-} __packed;
-
-struct hv_mmio_read_output {
- u8 data[HV_HYPERCALL_MMIO_MAX_DATA_LENGTH];
-} __packed;
-
-struct hv_mmio_write_input {
- u64 gpa;
- u32 size;
- u32 reserved;
- u8 data[HV_HYPERCALL_MMIO_MAX_DATA_LENGTH];
-} __packed;
-
-#endif
diff --git a/include/asm-generic/io.h b/include/asm-generic/io.h
index a5cbbf3e26ec..11abad6c87e1 100644
--- a/include/asm-generic/io.h
+++ b/include/asm-generic/io.h
@@ -1111,7 +1111,7 @@ void __iomem *generic_ioremap_prot(phys_addr_t phys_addr, size_t size,
pgprot_t prot);
void __iomem *ioremap_prot(phys_addr_t phys_addr, size_t size,
- unsigned long prot);
+ pgprot_t prot);
void iounmap(volatile void __iomem *addr);
void generic_iounmap(volatile void __iomem *addr);
@@ -1120,7 +1120,7 @@ void generic_iounmap(volatile void __iomem *addr);
static inline void __iomem *ioremap(phys_addr_t addr, size_t size)
{
/* _PAGE_IOREMAP needs to be supplied by the architecture */
- return ioremap_prot(addr, size, _PAGE_IOREMAP);
+ return ioremap_prot(addr, size, __pgprot(_PAGE_IOREMAP));
}
#endif
#endif /* !CONFIG_MMU || CONFIG_GENERIC_IOREMAP */
@@ -1212,7 +1212,7 @@ static inline void unxlate_dev_mem_ptr(phys_addr_t phys, void *addr)
#ifndef memset_io
/**
- * memset_io Set a range of I/O memory to a constant value
+ * memset_io - Set a range of I/O memory to a constant value
* @addr: The beginning of the I/O-memory range to set
* @val: The value to set the memory to
* @count: The number of bytes to set
@@ -1224,7 +1224,7 @@ void memset_io(volatile void __iomem *addr, int val, size_t count);
#ifndef memcpy_fromio
/**
- * memcpy_fromio Copy a block of data from I/O memory
+ * memcpy_fromio - Copy a block of data from I/O memory
* @dst: The (RAM) destination for the copy
* @src: The (I/O memory) source for the data
* @count: The number of bytes to copy
@@ -1236,7 +1236,7 @@ void memcpy_fromio(void *dst, const volatile void __iomem *src, size_t count);
#ifndef memcpy_toio
/**
- * memcpy_toio Copy a block of data into I/O memory
+ * memcpy_toio - Copy a block of data into I/O memory
* @dst: The (I/O memory) destination for the copy
* @src: The (RAM) source for the data
* @count: The number of bytes to copy
diff --git a/include/asm-generic/iomap.h b/include/asm-generic/iomap.h
index 196087a8126e..9f3f25d7fc58 100644
--- a/include/asm-generic/iomap.h
+++ b/include/asm-generic/iomap.h
@@ -31,42 +31,22 @@ extern unsigned int ioread16(const void __iomem *);
extern unsigned int ioread16be(const void __iomem *);
extern unsigned int ioread32(const void __iomem *);
extern unsigned int ioread32be(const void __iomem *);
-#ifdef CONFIG_64BIT
-extern u64 ioread64(const void __iomem *);
-extern u64 ioread64be(const void __iomem *);
-#endif
-#ifdef readq
-#define ioread64_lo_hi ioread64_lo_hi
-#define ioread64_hi_lo ioread64_hi_lo
-#define ioread64be_lo_hi ioread64be_lo_hi
-#define ioread64be_hi_lo ioread64be_hi_lo
-extern u64 ioread64_lo_hi(const void __iomem *addr);
-extern u64 ioread64_hi_lo(const void __iomem *addr);
-extern u64 ioread64be_lo_hi(const void __iomem *addr);
-extern u64 ioread64be_hi_lo(const void __iomem *addr);
-#endif
+extern u64 __ioread64_lo_hi(const void __iomem *addr);
+extern u64 __ioread64_hi_lo(const void __iomem *addr);
+extern u64 __ioread64be_lo_hi(const void __iomem *addr);
+extern u64 __ioread64be_hi_lo(const void __iomem *addr);
extern void iowrite8(u8, void __iomem *);
extern void iowrite16(u16, void __iomem *);
extern void iowrite16be(u16, void __iomem *);
extern void iowrite32(u32, void __iomem *);
extern void iowrite32be(u32, void __iomem *);
-#ifdef CONFIG_64BIT
-extern void iowrite64(u64, void __iomem *);
-extern void iowrite64be(u64, void __iomem *);
-#endif
-#ifdef writeq
-#define iowrite64_lo_hi iowrite64_lo_hi
-#define iowrite64_hi_lo iowrite64_hi_lo
-#define iowrite64be_lo_hi iowrite64be_lo_hi
-#define iowrite64be_hi_lo iowrite64be_hi_lo
-extern void iowrite64_lo_hi(u64 val, void __iomem *addr);
-extern void iowrite64_hi_lo(u64 val, void __iomem *addr);
-extern void iowrite64be_lo_hi(u64 val, void __iomem *addr);
-extern void iowrite64be_hi_lo(u64 val, void __iomem *addr);
-#endif
+extern void __iowrite64_lo_hi(u64 val, void __iomem *addr);
+extern void __iowrite64_hi_lo(u64 val, void __iomem *addr);
+extern void __iowrite64be_lo_hi(u64 val, void __iomem *addr);
+extern void __iowrite64be_hi_lo(u64 val, void __iomem *addr);
/*
* "string" versions of the above. Note that they
diff --git a/include/asm-generic/mcs_spinlock.h b/include/asm-generic/mcs_spinlock.h
index 10cd4ffc6ba2..39c94012b88a 100644
--- a/include/asm-generic/mcs_spinlock.h
+++ b/include/asm-generic/mcs_spinlock.h
@@ -1,6 +1,12 @@
#ifndef __ASM_MCS_SPINLOCK_H
#define __ASM_MCS_SPINLOCK_H
+struct mcs_spinlock {
+ struct mcs_spinlock *next;
+ int locked; /* 1 if lock acquired */
+ int count; /* nesting count, see qspinlock.c */
+};
+
/*
* Architectures can define their own:
*
diff --git a/include/asm-generic/memory_model.h b/include/asm-generic/memory_model.h
index 6d1fb6162ac1..74d0077cc5fa 100644
--- a/include/asm-generic/memory_model.h
+++ b/include/asm-generic/memory_model.h
@@ -19,17 +19,26 @@
#define __page_to_pfn(page) ((unsigned long)((page) - mem_map) + \
ARCH_PFN_OFFSET)
+/* avoid <linux/mm.h> include hell */
+extern unsigned long max_mapnr;
+
#ifndef pfn_valid
static inline int pfn_valid(unsigned long pfn)
{
- /* avoid <linux/mm.h> include hell */
- extern unsigned long max_mapnr;
unsigned long pfn_offset = ARCH_PFN_OFFSET;
return pfn >= pfn_offset && (pfn - pfn_offset) < max_mapnr;
}
#define pfn_valid pfn_valid
-#endif
+
+#ifndef for_each_valid_pfn
+#define for_each_valid_pfn(pfn, start_pfn, end_pfn) \
+ for ((pfn) = max_t(unsigned long, (start_pfn), ARCH_PFN_OFFSET); \
+ (pfn) < min_t(unsigned long, (end_pfn), \
+ ARCH_PFN_OFFSET + max_mapnr); \
+ (pfn)++)
+#endif /* for_each_valid_pfn */
+#endif /* valid_pfn */
#elif defined(CONFIG_SPARSEMEM_VMEMMAP)
diff --git a/include/asm-generic/module.h b/include/asm-generic/module.h
index 98e1541b72b7..a8622501b975 100644
--- a/include/asm-generic/module.h
+++ b/include/asm-generic/module.h
@@ -19,12 +19,8 @@ struct mod_arch_specific
#define Elf_Dyn Elf64_Dyn
#define Elf_Ehdr Elf64_Ehdr
#define Elf_Addr Elf64_Addr
-#ifdef CONFIG_MODULES_USE_ELF_REL
#define Elf_Rel Elf64_Rel
-#endif
-#ifdef CONFIG_MODULES_USE_ELF_RELA
#define Elf_Rela Elf64_Rela
-#endif
#define ELF_R_TYPE(X) ELF64_R_TYPE(X)
#define ELF_R_SYM(X) ELF64_R_SYM(X)
@@ -36,12 +32,8 @@ struct mod_arch_specific
#define Elf_Dyn Elf32_Dyn
#define Elf_Ehdr Elf32_Ehdr
#define Elf_Addr Elf32_Addr
-#ifdef CONFIG_MODULES_USE_ELF_REL
#define Elf_Rel Elf32_Rel
-#endif
-#ifdef CONFIG_MODULES_USE_ELF_RELA
#define Elf_Rela Elf32_Rela
-#endif
#define ELF_R_TYPE(X) ELF32_R_TYPE(X)
#define ELF_R_SYM(X) ELF32_R_SYM(X)
#endif
diff --git a/include/asm-generic/mshyperv.h b/include/asm-generic/mshyperv.h
index 8fe7aaab2599..a729b77983fa 100644
--- a/include/asm-generic/mshyperv.h
+++ b/include/asm-generic/mshyperv.h
@@ -6,9 +6,8 @@
* independent. See arch/<arch>/include/asm/mshyperv.h for definitions
* that are specific to architecture <arch>.
*
- * Definitions that are specified in the Hyper-V Top Level Functional
- * Spec (TLFS) should not go in this file, but should instead go in
- * hyperv-tlfs.h.
+ * Definitions that are derived from Hyper-V code or headers should not go in
+ * this file, but should instead go in the relevant files in include/hyperv.
*
* Copyright (C) 2019, Microsoft, Inc.
*
@@ -25,13 +24,19 @@
#include <linux/cpumask.h>
#include <linux/nmi.h>
#include <asm/ptrace.h>
-#include <asm/hyperv-tlfs.h>
+#include <hyperv/hvhdk.h>
#define VTPM_BASE_ADDRESS 0xfed40000
+enum hv_partition_type {
+ HV_PARTITION_TYPE_GUEST,
+ HV_PARTITION_TYPE_ROOT,
+};
+
struct ms_hyperv_info {
u32 features;
u32 priv_high;
+ u32 ext_features;
u32 misc_features;
u32 hints;
u32 nested_features;
@@ -59,15 +64,32 @@ struct ms_hyperv_info {
};
extern struct ms_hyperv_info ms_hyperv;
extern bool hv_nested;
+extern u64 hv_current_partition_id;
+extern enum hv_partition_type hv_curr_partition_type;
extern void * __percpu *hyperv_pcpu_input_arg;
extern void * __percpu *hyperv_pcpu_output_arg;
-extern u64 hv_do_hypercall(u64 control, void *inputaddr, void *outputaddr);
-extern u64 hv_do_fast_hypercall8(u16 control, u64 input8);
+u64 hv_do_hypercall(u64 control, void *inputaddr, void *outputaddr);
+u64 hv_do_fast_hypercall8(u16 control, u64 input8);
+u64 hv_do_fast_hypercall16(u16 control, u64 input1, u64 input2);
+
bool hv_isolation_type_snp(void);
bool hv_isolation_type_tdx(void);
+/*
+ * On architectures where Hyper-V doesn't support AEOI (e.g., ARM64),
+ * it doesn't provide a recommendation flag and AEOI must be disabled.
+ */
+static inline bool hv_recommend_using_aeoi(void)
+{
+#ifdef HV_DEPRECATING_AEOI_RECOMMENDED
+ return !(ms_hyperv.hints & HV_DEPRECATING_AEOI_RECOMMENDED);
+#else
+ return false;
+#endif
+}
+
static inline struct hv_proximity_domain_info hv_numa_node_to_pxm_info(int node)
{
struct hv_proximity_domain_info pxm_info = {};
@@ -186,12 +208,11 @@ void hv_setup_kexec_handler(void (*handler)(void));
void hv_remove_kexec_handler(void);
void hv_setup_crash_handler(void (*handler)(struct pt_regs *regs));
void hv_remove_crash_handler(void);
+void hv_setup_mshv_handler(void (*handler)(void));
extern int vmbus_interrupt;
extern int vmbus_irq;
-extern bool hv_root_partition;
-
#if IS_ENABLED(CONFIG_HYPERV)
/*
* Hypervisor's notion of virtual processor ID is different from
@@ -208,14 +229,12 @@ extern u64 (*hv_read_reference_counter)(void);
#define VP_INVAL U32_MAX
int __init hv_common_init(void);
+void __init hv_get_partition_id(void);
void __init hv_common_free(void);
void __init ms_hyperv_late_init(void);
int hv_common_cpu_init(unsigned int cpu);
int hv_common_cpu_die(unsigned int cpu);
-
-void *hv_alloc_hyperv_page(void);
-void *hv_alloc_hyperv_zeroed_page(void);
-void hv_free_hyperv_page(void *addr);
+void hv_identify_partition_type(void);
/**
* hv_cpu_number_to_vp_number() - Map CPU to VP.
@@ -292,6 +311,20 @@ static inline int cpumask_to_vpset_skip(struct hv_vpset *vpset,
return __cpumask_to_vpset(vpset, cpus, func);
}
+#define _hv_status_fmt(fmt) "%s: Hyper-V status: %#x = %s: " fmt
+#define hv_status_printk(level, status, fmt, ...) \
+do { \
+ u64 __status = (status); \
+ pr_##level(_hv_status_fmt(fmt), __func__, hv_result(__status), \
+ hv_result_to_string(__status), ##__VA_ARGS__); \
+} while (0)
+#define hv_status_err(status, fmt, ...) \
+ hv_status_printk(err, status, fmt, ##__VA_ARGS__)
+#define hv_status_debug(status, fmt, ...) \
+ hv_status_printk(debug, status, fmt, ##__VA_ARGS__)
+
+const char *hv_result_to_string(u64 hv_status);
+int hv_result_to_errno(u64 status);
void hyperv_report_panic(struct pt_regs *regs, long err, bool in_die);
bool hv_is_hyperv_initialized(void);
bool hv_is_hibernation_supported(void);
@@ -304,6 +337,7 @@ void hyperv_cleanup(void);
bool hv_query_ext_cap(u64 cap_query);
void hv_setup_dma_ops(struct device *dev, bool coherent);
#else /* CONFIG_HYPERV */
+static inline void hv_identify_partition_type(void) {}
static inline bool hv_is_hyperv_initialized(void) { return false; }
static inline bool hv_is_hibernation_supported(void) { return false; }
static inline void hyperv_cleanup(void) {}
@@ -315,4 +349,35 @@ static inline enum hv_isolation_type hv_get_isolation_type(void)
}
#endif /* CONFIG_HYPERV */
+#if IS_ENABLED(CONFIG_MSHV_ROOT)
+static inline bool hv_root_partition(void)
+{
+ return hv_curr_partition_type == HV_PARTITION_TYPE_ROOT;
+}
+int hv_call_deposit_pages(int node, u64 partition_id, u32 num_pages);
+int hv_call_add_logical_proc(int node, u32 lp_index, u32 acpi_id);
+int hv_call_create_vp(int node, u64 partition_id, u32 vp_index, u32 flags);
+
+#else /* CONFIG_MSHV_ROOT */
+static inline bool hv_root_partition(void) { return false; }
+static inline int hv_call_deposit_pages(int node, u64 partition_id, u32 num_pages)
+{
+ return -EOPNOTSUPP;
+}
+static inline int hv_call_add_logical_proc(int node, u32 lp_index, u32 acpi_id)
+{
+ return -EOPNOTSUPP;
+}
+static inline int hv_call_create_vp(int node, u64 partition_id, u32 vp_index, u32 flags)
+{
+ return -EOPNOTSUPP;
+}
+#endif /* CONFIG_MSHV_ROOT */
+
+#if IS_ENABLED(CONFIG_HYPERV_VTL_MODE)
+u8 __init get_vtl(void);
+#else
+static inline u8 get_vtl(void) { return 0; }
+#endif
+
#endif
diff --git a/include/asm-generic/percpu.h b/include/asm-generic/percpu.h
index 94cbd50cc870..02aeca21479a 100644
--- a/include/asm-generic/percpu.h
+++ b/include/asm-generic/percpu.h
@@ -6,6 +6,19 @@
#include <linux/threads.h>
#include <linux/percpu-defs.h>
+/*
+ * __percpu_qual is the qualifier for the percpu named address space.
+ *
+ * Most arches use generic named address space for percpu variables but
+ * some arches define percpu variables in different named address space
+ * (on the x86 arch, percpu variable may be declared as being relative
+ * to the %fs or %gs segments using __seg_fs or __seg_gs named address
+ * space qualifier).
+ */
+#ifndef __percpu_qual
+# define __percpu_qual
+#endif
+
#ifdef CONFIG_SMP
/*
@@ -74,7 +87,7 @@ do { \
#define raw_cpu_generic_add_return(pcp, val) \
({ \
- typeof(pcp) *__p = raw_cpu_ptr(&(pcp)); \
+ TYPEOF_UNQUAL(pcp) *__p = raw_cpu_ptr(&(pcp)); \
\
*__p += val; \
*__p; \
@@ -82,8 +95,8 @@ do { \
#define raw_cpu_generic_xchg(pcp, nval) \
({ \
- typeof(pcp) *__p = raw_cpu_ptr(&(pcp)); \
- typeof(pcp) __ret; \
+ TYPEOF_UNQUAL(pcp) *__p = raw_cpu_ptr(&(pcp)); \
+ TYPEOF_UNQUAL(pcp) __ret; \
__ret = *__p; \
*__p = nval; \
__ret; \
@@ -91,7 +104,7 @@ do { \
#define __cpu_fallback_try_cmpxchg(pcp, ovalp, nval, _cmpxchg) \
({ \
- typeof(pcp) __val, __old = *(ovalp); \
+ TYPEOF_UNQUAL(pcp) __val, __old = *(ovalp); \
__val = _cmpxchg(pcp, __old, nval); \
if (__val != __old) \
*(ovalp) = __val; \
@@ -100,8 +113,8 @@ do { \
#define raw_cpu_generic_try_cmpxchg(pcp, ovalp, nval) \
({ \
- typeof(pcp) *__p = raw_cpu_ptr(&(pcp)); \
- typeof(pcp) __val = *__p, ___old = *(ovalp); \
+ TYPEOF_UNQUAL(pcp) *__p = raw_cpu_ptr(&(pcp)); \
+ TYPEOF_UNQUAL(pcp) __val = *__p, ___old = *(ovalp); \
bool __ret; \
if (__val == ___old) { \
*__p = nval; \
@@ -115,14 +128,14 @@ do { \
#define raw_cpu_generic_cmpxchg(pcp, oval, nval) \
({ \
- typeof(pcp) __old = (oval); \
+ TYPEOF_UNQUAL(pcp) __old = (oval); \
raw_cpu_generic_try_cmpxchg(pcp, &__old, nval); \
__old; \
})
#define __this_cpu_generic_read_nopreempt(pcp) \
({ \
- typeof(pcp) ___ret; \
+ TYPEOF_UNQUAL(pcp) ___ret; \
preempt_disable_notrace(); \
___ret = READ_ONCE(*raw_cpu_ptr(&(pcp))); \
preempt_enable_notrace(); \
@@ -131,7 +144,7 @@ do { \
#define __this_cpu_generic_read_noirq(pcp) \
({ \
- typeof(pcp) ___ret; \
+ TYPEOF_UNQUAL(pcp) ___ret; \
unsigned long ___flags; \
raw_local_irq_save(___flags); \
___ret = raw_cpu_generic_read(pcp); \
@@ -141,7 +154,7 @@ do { \
#define this_cpu_generic_read(pcp) \
({ \
- typeof(pcp) __ret; \
+ TYPEOF_UNQUAL(pcp) __ret; \
if (__native_word(pcp)) \
__ret = __this_cpu_generic_read_nopreempt(pcp); \
else \
@@ -160,7 +173,7 @@ do { \
#define this_cpu_generic_add_return(pcp, val) \
({ \
- typeof(pcp) __ret; \
+ TYPEOF_UNQUAL(pcp) __ret; \
unsigned long __flags; \
raw_local_irq_save(__flags); \
__ret = raw_cpu_generic_add_return(pcp, val); \
@@ -170,7 +183,7 @@ do { \
#define this_cpu_generic_xchg(pcp, nval) \
({ \
- typeof(pcp) __ret; \
+ TYPEOF_UNQUAL(pcp) __ret; \
unsigned long __flags; \
raw_local_irq_save(__flags); \
__ret = raw_cpu_generic_xchg(pcp, nval); \
@@ -190,7 +203,7 @@ do { \
#define this_cpu_generic_cmpxchg(pcp, oval, nval) \
({ \
- typeof(pcp) __ret; \
+ TYPEOF_UNQUAL(pcp) __ret; \
unsigned long __flags; \
raw_local_irq_save(__flags); \
__ret = raw_cpu_generic_cmpxchg(pcp, oval, nval); \
diff --git a/include/asm-generic/pgalloc.h b/include/asm-generic/pgalloc.h
index 7c48f5fbf8aa..3c8ec3bfea44 100644
--- a/include/asm-generic/pgalloc.h
+++ b/include/asm-generic/pgalloc.h
@@ -23,6 +23,11 @@ static inline pte_t *__pte_alloc_one_kernel_noprof(struct mm_struct *mm)
if (!ptdesc)
return NULL;
+ if (!pagetable_pte_ctor(mm, ptdesc)) {
+ pagetable_free(ptdesc);
+ return NULL;
+ }
+
return ptdesc_address(ptdesc);
}
#define __pte_alloc_one_kernel(...) alloc_hooks(__pte_alloc_one_kernel_noprof(__VA_ARGS__))
@@ -48,7 +53,7 @@ static inline pte_t *pte_alloc_one_kernel_noprof(struct mm_struct *mm)
*/
static inline void pte_free_kernel(struct mm_struct *mm, pte_t *pte)
{
- pagetable_free(virt_to_ptdesc(pte));
+ pagetable_dtor_free(virt_to_ptdesc(pte));
}
/**
@@ -70,7 +75,7 @@ static inline pgtable_t __pte_alloc_one_noprof(struct mm_struct *mm, gfp_t gfp)
ptdesc = pagetable_alloc_noprof(gfp, 0);
if (!ptdesc)
return NULL;
- if (!pagetable_pte_ctor(ptdesc)) {
+ if (!pagetable_pte_ctor(mm, ptdesc)) {
pagetable_free(ptdesc);
return NULL;
}
@@ -109,8 +114,7 @@ static inline void pte_free(struct mm_struct *mm, struct page *pte_page)
{
struct ptdesc *ptdesc = page_ptdesc(pte_page);
- pagetable_pte_dtor(ptdesc);
- pagetable_free(ptdesc);
+ pagetable_dtor_free(ptdesc);
}
@@ -138,7 +142,7 @@ static inline pmd_t *pmd_alloc_one_noprof(struct mm_struct *mm, unsigned long ad
ptdesc = pagetable_alloc_noprof(gfp, 0);
if (!ptdesc)
return NULL;
- if (!pagetable_pmd_ctor(ptdesc)) {
+ if (!pagetable_pmd_ctor(mm, ptdesc)) {
pagetable_free(ptdesc);
return NULL;
}
@@ -153,8 +157,7 @@ static inline void pmd_free(struct mm_struct *mm, pmd_t *pmd)
struct ptdesc *ptdesc = virt_to_ptdesc(pmd);
BUG_ON((unsigned long)pmd & (PAGE_SIZE-1));
- pagetable_pmd_dtor(ptdesc);
- pagetable_free(ptdesc);
+ pagetable_dtor_free(ptdesc);
}
#endif
@@ -202,8 +205,7 @@ static inline void __pud_free(struct mm_struct *mm, pud_t *pud)
struct ptdesc *ptdesc = virt_to_ptdesc(pud);
BUG_ON((unsigned long)pud & (PAGE_SIZE-1));
- pagetable_pud_dtor(ptdesc);
- pagetable_free(ptdesc);
+ pagetable_dtor_free(ptdesc);
}
#ifndef __HAVE_ARCH_PUD_FREE
@@ -215,10 +217,82 @@ static inline void pud_free(struct mm_struct *mm, pud_t *pud)
#endif /* CONFIG_PGTABLE_LEVELS > 3 */
+#if CONFIG_PGTABLE_LEVELS > 4
+
+static inline p4d_t *__p4d_alloc_one_noprof(struct mm_struct *mm, unsigned long addr)
+{
+ gfp_t gfp = GFP_PGTABLE_USER;
+ struct ptdesc *ptdesc;
+
+ if (mm == &init_mm)
+ gfp = GFP_PGTABLE_KERNEL;
+ gfp &= ~__GFP_HIGHMEM;
+
+ ptdesc = pagetable_alloc_noprof(gfp, 0);
+ if (!ptdesc)
+ return NULL;
+
+ pagetable_p4d_ctor(ptdesc);
+ return ptdesc_address(ptdesc);
+}
+#define __p4d_alloc_one(...) alloc_hooks(__p4d_alloc_one_noprof(__VA_ARGS__))
+
+#ifndef __HAVE_ARCH_P4D_ALLOC_ONE
+static inline p4d_t *p4d_alloc_one_noprof(struct mm_struct *mm, unsigned long addr)
+{
+ return __p4d_alloc_one_noprof(mm, addr);
+}
+#define p4d_alloc_one(...) alloc_hooks(p4d_alloc_one_noprof(__VA_ARGS__))
+#endif
+
+static inline void __p4d_free(struct mm_struct *mm, p4d_t *p4d)
+{
+ struct ptdesc *ptdesc = virt_to_ptdesc(p4d);
+
+ BUG_ON((unsigned long)p4d & (PAGE_SIZE-1));
+ pagetable_dtor_free(ptdesc);
+}
+
+#ifndef __HAVE_ARCH_P4D_FREE
+static inline void p4d_free(struct mm_struct *mm, p4d_t *p4d)
+{
+ if (!mm_p4d_folded(mm))
+ __p4d_free(mm, p4d);
+}
+#endif
+
+#endif /* CONFIG_PGTABLE_LEVELS > 4 */
+
+static inline pgd_t *__pgd_alloc_noprof(struct mm_struct *mm, unsigned int order)
+{
+ gfp_t gfp = GFP_PGTABLE_USER;
+ struct ptdesc *ptdesc;
+
+ if (mm == &init_mm)
+ gfp = GFP_PGTABLE_KERNEL;
+ gfp &= ~__GFP_HIGHMEM;
+
+ ptdesc = pagetable_alloc_noprof(gfp, order);
+ if (!ptdesc)
+ return NULL;
+
+ pagetable_pgd_ctor(ptdesc);
+ return ptdesc_address(ptdesc);
+}
+#define __pgd_alloc(...) alloc_hooks(__pgd_alloc_noprof(__VA_ARGS__))
+
+static inline void __pgd_free(struct mm_struct *mm, pgd_t *pgd)
+{
+ struct ptdesc *ptdesc = virt_to_ptdesc(pgd);
+
+ BUG_ON((unsigned long)pgd & (PAGE_SIZE-1));
+ pagetable_dtor_free(ptdesc);
+}
+
#ifndef __HAVE_ARCH_PGD_FREE
static inline void pgd_free(struct mm_struct *mm, pgd_t *pgd)
{
- pagetable_free(virt_to_ptdesc(pgd));
+ __pgd_free(mm, pgd);
}
#endif
diff --git a/include/asm-generic/rqspinlock.h b/include/asm-generic/rqspinlock.h
new file mode 100644
index 000000000000..6d4244d643df
--- /dev/null
+++ b/include/asm-generic/rqspinlock.h
@@ -0,0 +1,250 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later */
+/*
+ * Resilient Queued Spin Lock
+ *
+ * (C) Copyright 2024-2025 Meta Platforms, Inc. and affiliates.
+ *
+ * Authors: Kumar Kartikeya Dwivedi <memxor@gmail.com>
+ */
+#ifndef __ASM_GENERIC_RQSPINLOCK_H
+#define __ASM_GENERIC_RQSPINLOCK_H
+
+#include <linux/types.h>
+#include <vdso/time64.h>
+#include <linux/percpu.h>
+#ifdef CONFIG_QUEUED_SPINLOCKS
+#include <asm/qspinlock.h>
+#endif
+
+struct rqspinlock {
+ union {
+ atomic_t val;
+ u32 locked;
+ };
+};
+
+/* Even though this is same as struct rqspinlock, we need to emit a distinct
+ * type in BTF for BPF programs.
+ */
+struct bpf_res_spin_lock {
+ u32 val;
+};
+
+struct qspinlock;
+#ifdef CONFIG_QUEUED_SPINLOCKS
+typedef struct qspinlock rqspinlock_t;
+#else
+typedef struct rqspinlock rqspinlock_t;
+#endif
+
+extern int resilient_tas_spin_lock(rqspinlock_t *lock);
+#ifdef CONFIG_QUEUED_SPINLOCKS
+extern int resilient_queued_spin_lock_slowpath(rqspinlock_t *lock, u32 val);
+#endif
+
+#ifndef resilient_virt_spin_lock_enabled
+static __always_inline bool resilient_virt_spin_lock_enabled(void)
+{
+ return false;
+}
+#endif
+
+#ifndef resilient_virt_spin_lock
+static __always_inline int resilient_virt_spin_lock(rqspinlock_t *lock)
+{
+ return 0;
+}
+#endif
+
+/*
+ * Default timeout for waiting loops is 0.25 seconds
+ */
+#define RES_DEF_TIMEOUT (NSEC_PER_SEC / 4)
+
+/*
+ * Choose 31 as it makes rqspinlock_held cacheline-aligned.
+ */
+#define RES_NR_HELD 31
+
+struct rqspinlock_held {
+ int cnt;
+ void *locks[RES_NR_HELD];
+};
+
+DECLARE_PER_CPU_ALIGNED(struct rqspinlock_held, rqspinlock_held_locks);
+
+static __always_inline void grab_held_lock_entry(void *lock)
+{
+ int cnt = this_cpu_inc_return(rqspinlock_held_locks.cnt);
+
+ if (unlikely(cnt > RES_NR_HELD)) {
+ /* Still keep the inc so we decrement later. */
+ return;
+ }
+
+ /*
+ * Implied compiler barrier in per-CPU operations; otherwise we can have
+ * the compiler reorder inc with write to table, allowing interrupts to
+ * overwrite and erase our write to the table (as on interrupt exit it
+ * will be reset to NULL).
+ *
+ * It is fine for cnt inc to be reordered wrt remote readers though,
+ * they won't observe our entry until the cnt update is visible, that's
+ * all.
+ */
+ this_cpu_write(rqspinlock_held_locks.locks[cnt - 1], lock);
+}
+
+/*
+ * We simply don't support out-of-order unlocks, and keep the logic simple here.
+ * The verifier prevents BPF programs from unlocking out-of-order, and the same
+ * holds for in-kernel users.
+ *
+ * It is possible to run into misdetection scenarios of AA deadlocks on the same
+ * CPU, and missed ABBA deadlocks on remote CPUs if this function pops entries
+ * out of order (due to lock A, lock B, unlock A, unlock B) pattern. The correct
+ * logic to preserve right entries in the table would be to walk the array of
+ * held locks and swap and clear out-of-order entries, but that's too
+ * complicated and we don't have a compelling use case for out of order unlocking.
+ */
+static __always_inline void release_held_lock_entry(void)
+{
+ struct rqspinlock_held *rqh = this_cpu_ptr(&rqspinlock_held_locks);
+
+ if (unlikely(rqh->cnt > RES_NR_HELD))
+ goto dec;
+ WRITE_ONCE(rqh->locks[rqh->cnt - 1], NULL);
+dec:
+ /*
+ * Reordering of clearing above with inc and its write in
+ * grab_held_lock_entry that came before us (in same acquisition
+ * attempt) is ok, we either see a valid entry or NULL when it's
+ * visible.
+ *
+ * But this helper is invoked when we unwind upon failing to acquire the
+ * lock. Unlike the unlock path which constitutes a release store after
+ * we clear the entry, we need to emit a write barrier here. Otherwise,
+ * we may have a situation as follows:
+ *
+ * <error> for lock B
+ * release_held_lock_entry
+ *
+ * try_cmpxchg_acquire for lock A
+ * grab_held_lock_entry
+ *
+ * Lack of any ordering means reordering may occur such that dec, inc
+ * are done before entry is overwritten. This permits a remote lock
+ * holder of lock B (which this CPU failed to acquire) to now observe it
+ * as being attempted on this CPU, and may lead to misdetection (if this
+ * CPU holds a lock it is attempting to acquire, leading to false ABBA
+ * diagnosis).
+ *
+ * In case of unlock, we will always do a release on the lock word after
+ * releasing the entry, ensuring that other CPUs cannot hold the lock
+ * (and make conclusions about deadlocks) until the entry has been
+ * cleared on the local CPU, preventing any anomalies. Reordering is
+ * still possible there, but a remote CPU cannot observe a lock in our
+ * table which it is already holding, since visibility entails our
+ * release store for the said lock has not retired.
+ *
+ * In theory we don't have a problem if the dec and WRITE_ONCE above get
+ * reordered with each other, we either notice an empty NULL entry on
+ * top (if dec succeeds WRITE_ONCE), or a potentially stale entry which
+ * cannot be observed (if dec precedes WRITE_ONCE).
+ *
+ * Emit the write barrier _before_ the dec, this permits dec-inc
+ * reordering but that is harmless as we'd have new entry set to NULL
+ * already, i.e. they cannot precede the NULL store above.
+ */
+ smp_wmb();
+ this_cpu_dec(rqspinlock_held_locks.cnt);
+}
+
+#ifdef CONFIG_QUEUED_SPINLOCKS
+
+/**
+ * res_spin_lock - acquire a queued spinlock
+ * @lock: Pointer to queued spinlock structure
+ *
+ * Return:
+ * * 0 - Lock was acquired successfully.
+ * * -EDEADLK - Lock acquisition failed because of AA/ABBA deadlock.
+ * * -ETIMEDOUT - Lock acquisition failed because of timeout.
+ */
+static __always_inline int res_spin_lock(rqspinlock_t *lock)
+{
+ int val = 0;
+
+ if (likely(atomic_try_cmpxchg_acquire(&lock->val, &val, _Q_LOCKED_VAL))) {
+ grab_held_lock_entry(lock);
+ return 0;
+ }
+ return resilient_queued_spin_lock_slowpath(lock, val);
+}
+
+#else
+
+#define res_spin_lock(lock) resilient_tas_spin_lock(lock)
+
+#endif /* CONFIG_QUEUED_SPINLOCKS */
+
+static __always_inline void res_spin_unlock(rqspinlock_t *lock)
+{
+ struct rqspinlock_held *rqh = this_cpu_ptr(&rqspinlock_held_locks);
+
+ if (unlikely(rqh->cnt > RES_NR_HELD))
+ goto unlock;
+ WRITE_ONCE(rqh->locks[rqh->cnt - 1], NULL);
+unlock:
+ /*
+ * Release barrier, ensures correct ordering. See release_held_lock_entry
+ * for details. Perform release store instead of queued_spin_unlock,
+ * since we use this function for test-and-set fallback as well. When we
+ * have CONFIG_QUEUED_SPINLOCKS=n, we clear the full 4-byte lockword.
+ *
+ * Like release_held_lock_entry, we can do the release before the dec.
+ * We simply care about not seeing the 'lock' in our table from a remote
+ * CPU once the lock has been released, which doesn't rely on the dec.
+ *
+ * Unlike smp_wmb(), release is not a two way fence, hence it is
+ * possible for a inc to move up and reorder with our clearing of the
+ * entry. This isn't a problem however, as for a misdiagnosis of ABBA,
+ * the remote CPU needs to hold this lock, which won't be released until
+ * the store below is done, which would ensure the entry is overwritten
+ * to NULL, etc.
+ */
+ smp_store_release(&lock->locked, 0);
+ this_cpu_dec(rqspinlock_held_locks.cnt);
+}
+
+#ifdef CONFIG_QUEUED_SPINLOCKS
+#define raw_res_spin_lock_init(lock) ({ *(lock) = (rqspinlock_t)__ARCH_SPIN_LOCK_UNLOCKED; })
+#else
+#define raw_res_spin_lock_init(lock) ({ *(lock) = (rqspinlock_t){0}; })
+#endif
+
+#define raw_res_spin_lock(lock) \
+ ({ \
+ int __ret; \
+ preempt_disable(); \
+ __ret = res_spin_lock(lock); \
+ if (__ret) \
+ preempt_enable(); \
+ __ret; \
+ })
+
+#define raw_res_spin_unlock(lock) ({ res_spin_unlock(lock); preempt_enable(); })
+
+#define raw_res_spin_lock_irqsave(lock, flags) \
+ ({ \
+ int __ret; \
+ local_irq_save(flags); \
+ __ret = raw_res_spin_lock(lock); \
+ if (__ret) \
+ local_irq_restore(flags); \
+ __ret; \
+ })
+
+#define raw_res_spin_unlock_irqrestore(lock, flags) ({ raw_res_spin_unlock(lock); local_irq_restore(flags); })
+
+#endif /* __ASM_GENERIC_RQSPINLOCK_H */
diff --git a/include/asm-generic/rwonce.h b/include/asm-generic/rwonce.h
index 8d0a6280e982..52b969c7cef9 100644
--- a/include/asm-generic/rwonce.h
+++ b/include/asm-generic/rwonce.h
@@ -79,10 +79,18 @@ unsigned long __read_once_word_nocheck(const void *addr)
(typeof(x))__read_once_word_nocheck(&(x)); \
})
-static __no_kasan_or_inline
+static __no_sanitize_or_inline
unsigned long read_word_at_a_time(const void *addr)
{
+ /* open-coded instrument_read(addr, 1) */
kasan_check_read(addr, 1);
+ kcsan_check_read(addr, 1);
+
+ /*
+ * This load can race with concurrent stores to out-of-bounds memory,
+ * but READ_ONCE() can't be used because it requires higher alignment
+ * than plain loads in arm64 builds with LTO.
+ */
return *(unsigned long *)addr;
}
diff --git a/include/asm-generic/sections.h b/include/asm-generic/sections.h
index c768de6f19a9..0755bc39b0d8 100644
--- a/include/asm-generic/sections.h
+++ b/include/asm-generic/sections.h
@@ -39,7 +39,7 @@ extern char __init_begin[], __init_end[];
extern char _sinittext[], _einittext[];
extern char __start_ro_after_init[], __end_ro_after_init[];
extern char _end[];
-extern char __per_cpu_load[], __per_cpu_start[], __per_cpu_end[];
+extern char __per_cpu_start[], __per_cpu_end[];
extern char __kprobes_text_start[], __kprobes_text_end[];
extern char __entry_text_start[], __entry_text_end[];
extern char __start_rodata[], __end_rodata[];
diff --git a/include/asm-generic/simd.h b/include/asm-generic/simd.h
index d0343d58a74a..70c8716ad32a 100644
--- a/include/asm-generic/simd.h
+++ b/include/asm-generic/simd.h
@@ -1,6 +1,11 @@
/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _ASM_GENERIC_SIMD_H
+#define _ASM_GENERIC_SIMD_H
-#include <linux/hardirq.h>
+#include <linux/compiler_attributes.h>
+#include <linux/preempt.h>
+#include <linux/sched.h>
+#include <linux/types.h>
/*
* may_use_simd - whether it is allowable at this time to issue SIMD
@@ -13,3 +18,5 @@ static __must_check inline bool may_use_simd(void)
{
return !in_interrupt();
}
+
+#endif /* _ASM_GENERIC_SIMD_H */
diff --git a/include/asm-generic/syscall.h b/include/asm-generic/syscall.h
index 5a80fe728dc8..c5a3ad53beec 100644
--- a/include/asm-generic/syscall.h
+++ b/include/asm-generic/syscall.h
@@ -5,7 +5,7 @@
* Copyright (C) 2008-2009 Red Hat, Inc. All rights reserved.
*
* This file is a stub providing documentation for what functions
- * asm-ARCH/syscall.h files need to define. Most arch definitions
+ * arch/ARCH/include/asm/syscall.h files need to define. Most arch definitions
* will be simple inlines.
*
* All of these functions expect to be called with no locks,
@@ -38,6 +38,20 @@ struct pt_regs;
int syscall_get_nr(struct task_struct *task, struct pt_regs *regs);
/**
+ * syscall_set_nr - change the system call a task is executing
+ * @task: task of interest, must be blocked
+ * @regs: task_pt_regs() of @task
+ * @nr: system call number
+ *
+ * Changes the system call number @task is about to execute.
+ *
+ * It's only valid to call this when @task is stopped for tracing on
+ * entry to a system call, due to %SYSCALL_WORK_SYSCALL_TRACE or
+ * %SYSCALL_WORK_SYSCALL_AUDIT.
+ */
+void syscall_set_nr(struct task_struct *task, struct pt_regs *regs, int nr);
+
+/**
* syscall_rollback - roll back registers after an aborted system call
* @task: task of interest, must be in system call exit tracing
* @regs: task_pt_regs() of @task
@@ -118,6 +132,22 @@ void syscall_get_arguments(struct task_struct *task, struct pt_regs *regs,
unsigned long *args);
/**
+ * syscall_set_arguments - change system call parameter value
+ * @task: task of interest, must be in system call entry tracing
+ * @regs: task_pt_regs() of @task
+ * @args: array of argument values to store
+ *
+ * Changes 6 arguments to the system call.
+ * The first argument gets value @args[0], and so on.
+ *
+ * It's only valid to call this when @task is stopped for tracing on
+ * entry to a system call, due to %SYSCALL_WORK_SYSCALL_TRACE or
+ * %SYSCALL_WORK_SYSCALL_AUDIT.
+ */
+void syscall_set_arguments(struct task_struct *task, struct pt_regs *regs,
+ const unsigned long *args);
+
+/**
* syscall_get_arch - return the AUDIT_ARCH for the current system call
* @task: task of interest, must be blocked
*
diff --git a/include/asm-generic/tlb.h b/include/asm-generic/tlb.h
index 709830274b75..1fff717cae51 100644
--- a/include/asm-generic/tlb.h
+++ b/include/asm-generic/tlb.h
@@ -58,6 +58,11 @@
* Defaults to flushing at tlb_end_vma() to reset the range; helps when
* there's large holes between the VMAs.
*
+ * - tlb_free_vmas()
+ *
+ * tlb_free_vmas() marks the start of unlinking of one or more vmas
+ * and freeing page-tables.
+ *
* - tlb_remove_table()
*
* tlb_remove_table() is the basic primitive to free page-table directories
@@ -67,22 +72,21 @@
*
* See also MMU_GATHER_TABLE_FREE and MMU_GATHER_RCU_TABLE_FREE.
*
- * - tlb_remove_page() / __tlb_remove_page()
- * - tlb_remove_page_size() / __tlb_remove_page_size()
- * - __tlb_remove_folio_pages()
+ * - tlb_remove_page() / tlb_remove_page_size()
+ * - __tlb_remove_folio_pages() / __tlb_remove_page_size()
+ * - __tlb_remove_folio_pages_size()
*
- * __tlb_remove_page_size() is the basic primitive that queues a page for
- * freeing. __tlb_remove_page() assumes PAGE_SIZE. Both will return a
- * boolean indicating if the queue is (now) full and a call to
- * tlb_flush_mmu() is required.
+ * __tlb_remove_folio_pages_size() is the basic primitive that queues pages
+ * for freeing. It will return a boolean indicating if the queue is (now)
+ * full and a call to tlb_flush_mmu() is required.
*
* tlb_remove_page() and tlb_remove_page_size() imply the call to
* tlb_flush_mmu() when required and has no return value.
*
- * __tlb_remove_folio_pages() is similar to __tlb_remove_page(), however,
- * instead of removing a single page, remove the given number of consecutive
- * pages that are all part of the same (large) folio: just like calling
- * __tlb_remove_page() on each page individually.
+ * __tlb_remove_folio_pages() is similar to __tlb_remove_page_size(),
+ * however, instead of removing a single page, assume PAGE_SIZE and remove
+ * the given number of consecutive pages that are all part of the
+ * same (large) folio.
*
* - tlb_change_page_size()
*
@@ -153,8 +157,9 @@
*
* Useful if your architecture has non-page page directories.
*
- * When used, an architecture is expected to provide __tlb_remove_table()
- * which does the actual freeing of these pages.
+ * When used, an architecture is expected to provide __tlb_remove_table() or
+ * use the generic __tlb_remove_table(), which does the actual freeing of these
+ * pages.
*
* MMU_GATHER_RCU_TABLE_FREE
*
@@ -207,16 +212,31 @@ struct mmu_table_batch {
#define MAX_TABLE_BATCH \
((PAGE_SIZE - sizeof(struct mmu_table_batch)) / sizeof(void *))
+#ifndef __HAVE_ARCH_TLB_REMOVE_TABLE
+static inline void __tlb_remove_table(void *table)
+{
+ struct ptdesc *ptdesc = (struct ptdesc *)table;
+
+ pagetable_dtor_free(ptdesc);
+}
+#endif
+
extern void tlb_remove_table(struct mmu_gather *tlb, void *table);
-#else /* !CONFIG_MMU_GATHER_HAVE_TABLE_FREE */
+#else /* !CONFIG_MMU_GATHER_TABLE_FREE */
+static inline void tlb_remove_page(struct mmu_gather *tlb, struct page *page);
/*
* Without MMU_GATHER_TABLE_FREE the architecture is assumed to have page based
* page directories and we can use the normal page batching to free them.
*/
-#define tlb_remove_table(tlb, page) tlb_remove_page((tlb), (page))
+static inline void tlb_remove_table(struct mmu_gather *tlb, void *table)
+{
+ struct ptdesc *ptdesc = (struct ptdesc *)table;
+ pagetable_dtor(ptdesc);
+ tlb_remove_page(tlb, ptdesc_page(ptdesc));
+}
#endif /* CONFIG_MMU_GATHER_TABLE_FREE */
#ifdef CONFIG_MMU_GATHER_RCU_TABLE_FREE
@@ -449,7 +469,12 @@ tlb_update_vma_flags(struct mmu_gather *tlb, struct vm_area_struct *vma)
*/
tlb->vma_huge = is_vm_hugetlb_page(vma);
tlb->vma_exec = !!(vma->vm_flags & VM_EXEC);
- tlb->vma_pfn = !!(vma->vm_flags & (VM_PFNMAP|VM_MIXEDMAP));
+
+ /*
+ * Track if there's at least one VM_PFNMAP/VM_MIXEDMAP vma
+ * in the tracked range, see tlb_free_vmas().
+ */
+ tlb->vma_pfn |= !!(vma->vm_flags & (VM_PFNMAP|VM_MIXEDMAP));
}
static inline void tlb_flush_mmu_tlbonly(struct mmu_gather *tlb)
@@ -473,32 +498,16 @@ static inline void tlb_remove_page_size(struct mmu_gather *tlb,
tlb_flush_mmu(tlb);
}
-static __always_inline bool __tlb_remove_page(struct mmu_gather *tlb,
- struct page *page, bool delay_rmap)
-{
- return __tlb_remove_page_size(tlb, page, delay_rmap, PAGE_SIZE);
-}
-
-/* tlb_remove_page
- * Similar to __tlb_remove_page but will call tlb_flush_mmu() itself when
- * required.
- */
static inline void tlb_remove_page(struct mmu_gather *tlb, struct page *page)
{
return tlb_remove_page_size(tlb, page, PAGE_SIZE);
}
-static inline void tlb_remove_ptdesc(struct mmu_gather *tlb, void *pt)
+static inline void tlb_remove_ptdesc(struct mmu_gather *tlb, struct ptdesc *pt)
{
tlb_remove_table(tlb, pt);
}
-/* Like tlb_remove_ptdesc, but for page-like page directories. */
-static inline void tlb_remove_page_ptdesc(struct mmu_gather *tlb, struct ptdesc *pt)
-{
- tlb_remove_page(tlb, ptdesc_page(pt));
-}
-
static inline void tlb_change_page_size(struct mmu_gather *tlb,
unsigned int page_size)
{
@@ -549,22 +558,38 @@ static inline void tlb_start_vma(struct mmu_gather *tlb, struct vm_area_struct *
static inline void tlb_end_vma(struct mmu_gather *tlb, struct vm_area_struct *vma)
{
+ if (tlb->fullmm || IS_ENABLED(CONFIG_MMU_GATHER_MERGE_VMAS))
+ return;
+
+ /*
+ * Do a TLB flush and reset the range at VMA boundaries; this avoids
+ * the ranges growing with the unused space between consecutive VMAs,
+ * but also the mmu_gather::vma_* flags from tlb_start_vma() rely on
+ * this.
+ */
+ tlb_flush_mmu_tlbonly(tlb);
+}
+
+static inline void tlb_free_vmas(struct mmu_gather *tlb)
+{
if (tlb->fullmm)
return;
/*
* VM_PFNMAP is more fragile because the core mm will not track the
- * page mapcount -- there might not be page-frames for these PFNs after
- * all. Force flush TLBs for such ranges to avoid munmap() vs
- * unmap_mapping_range() races.
+ * page mapcount -- there might not be page-frames for these PFNs
+ * after all.
+ *
+ * Specifically() there is a race between munmap() and
+ * unmap_mapping_range(), where munmap() will unlink the VMA, such
+ * that unmap_mapping_range() will no longer observe the VMA and
+ * no-op, without observing the TLBI, returning prematurely.
+ *
+ * So if we're about to unlink such a VMA, and we have pending
+ * TLBI for such a vma, flush things now.
*/
- if (tlb->vma_pfn || !IS_ENABLED(CONFIG_MMU_GATHER_MERGE_VMAS)) {
- /*
- * Do a TLB flush and reset the range at VMA boundaries; this avoids
- * the ranges growing with the unused space between consecutive VMAs.
- */
+ if (tlb->vma_pfn)
tlb_flush_mmu_tlbonly(tlb);
- }
}
/*
diff --git a/include/asm-generic/vdso/vsyscall.h b/include/asm-generic/vdso/vsyscall.h
index 01dafd604188..b550afa15ecd 100644
--- a/include/asm-generic/vdso/vsyscall.h
+++ b/include/asm-generic/vdso/vsyscall.h
@@ -4,24 +4,35 @@
#ifndef __ASSEMBLY__
-#ifndef __arch_get_k_vdso_data
-static __always_inline struct vdso_data *__arch_get_k_vdso_data(void)
+#ifdef CONFIG_GENERIC_VDSO_DATA_STORE
+
+#ifndef __arch_get_vdso_u_time_data
+static __always_inline const struct vdso_time_data *__arch_get_vdso_u_time_data(void)
{
- return NULL;
+ return &vdso_u_time_data;
}
-#endif /* __arch_get_k_vdso_data */
+#endif
+
+#ifndef __arch_get_vdso_u_rng_data
+static __always_inline const struct vdso_rng_data *__arch_get_vdso_u_rng_data(void)
+{
+ return &vdso_u_rng_data;
+}
+#endif
+
+#endif /* CONFIG_GENERIC_VDSO_DATA_STORE */
#ifndef __arch_update_vsyscall
-static __always_inline void __arch_update_vsyscall(struct vdso_data *vdata)
+static __always_inline void __arch_update_vsyscall(struct vdso_time_data *vdata)
{
}
#endif /* __arch_update_vsyscall */
-#ifndef __arch_sync_vdso_data
-static __always_inline void __arch_sync_vdso_data(struct vdso_data *vdata)
+#ifndef __arch_sync_vdso_time_data
+static __always_inline void __arch_sync_vdso_time_data(struct vdso_time_data *vdata)
{
}
-#endif /* __arch_sync_vdso_data */
+#endif /* __arch_sync_vdso_time_data */
#endif /* !__ASSEMBLY__ */
diff --git a/include/asm-generic/vmlinux.lds.h b/include/asm-generic/vmlinux.lds.h
index 337d3336e175..fa5f19b8d53a 100644
--- a/include/asm-generic/vmlinux.lds.h
+++ b/include/asm-generic/vmlinux.lds.h
@@ -108,13 +108,13 @@ defined(CONFIG_AUTOFDO_CLANG) || defined(CONFIG_PROPELLER_CLANG)
#define TEXT_MAIN .text
#endif
#if defined(CONFIG_LD_DEAD_CODE_DATA_ELIMINATION) || defined(CONFIG_LTO_CLANG)
-#define DATA_MAIN .data .data.[0-9a-zA-Z_]* .data..L* .data..compoundliteral* .data.$__unnamed_* .data.$L*
+#define DATA_MAIN .data .data.[0-9a-zA-Z_]* .data.rel.* .data..L* .data..compoundliteral* .data.$__unnamed_* .data.$L*
#define SDATA_MAIN .sdata .sdata.[0-9a-zA-Z_]*
#define RODATA_MAIN .rodata .rodata.[0-9a-zA-Z_]* .rodata..L*
#define BSS_MAIN .bss .bss.[0-9a-zA-Z_]* .bss..L* .bss..compoundliteral*
#define SBSS_MAIN .sbss .sbss.[0-9a-zA-Z_]*
#else
-#define DATA_MAIN .data
+#define DATA_MAIN .data .data.rel .data.rel.local
#define SDATA_MAIN .sdata
#define RODATA_MAIN .rodata
#define BSS_MAIN .bss
@@ -385,6 +385,11 @@ defined(CONFIG_AUTOFDO_CLANG) || defined(CONFIG_PROPELLER_CLANG)
. = ALIGN(PAGE_SIZE); \
__nosave_end = .;
+#define CACHE_HOT_DATA(align) \
+ . = ALIGN(align); \
+ *(SORT_BY_ALIGNMENT(.data..hot.*)) \
+ . = ALIGN(align);
+
#define PAGE_ALIGNED_DATA(page_align) \
. = ALIGN(page_align); \
*(.data..page_aligned) \
@@ -404,7 +409,6 @@ defined(CONFIG_AUTOFDO_CLANG) || defined(CONFIG_PROPELLER_CLANG)
__start_init_stack = .; \
init_thread_union = .; \
init_stack = .; \
- KEEP(*(.data..init_task)) \
KEEP(*(.data..init_thread_info)) \
. = __start_init_stack + THREAD_SIZE; \
__end_init_stack = .;
@@ -663,10 +667,11 @@ defined(CONFIG_AUTOFDO_CLANG) || defined(CONFIG_PROPELLER_CLANG)
*/
#ifdef CONFIG_DEBUG_INFO_BTF
#define BTF \
+ . = ALIGN(PAGE_SIZE); \
.BTF : AT(ADDR(.BTF) - LOAD_OFFSET) { \
BOUNDED_SECTION_BY(.BTF, _BTF) \
} \
- . = ALIGN(4); \
+ . = ALIGN(PAGE_SIZE); \
.BTF_ids : AT(ADDR(.BTF_ids) - LOAD_OFFSET) { \
*(.BTF_ids) \
}
@@ -1038,6 +1043,7 @@ defined(CONFIG_AUTOFDO_CLANG) || defined(CONFIG_PROPELLER_CLANG)
*(.discard) \
*(.discard.*) \
*(.export_symbol) \
+ *(.no_trim_symbol) \
*(.modinfo) \
/* ld.bfd warns about .gnu.version* even when not emitted */ \
*(.gnu.version*) \
@@ -1061,10 +1067,13 @@ defined(CONFIG_AUTOFDO_CLANG) || defined(CONFIG_PROPELLER_CLANG)
*/
#define PERCPU_INPUT(cacheline) \
__per_cpu_start = .; \
- *(.data..percpu..first) \
. = ALIGN(PAGE_SIZE); \
*(.data..percpu..page_aligned) \
. = ALIGN(cacheline); \
+ __per_cpu_hot_start = .; \
+ *(SORT_BY_ALIGNMENT(.data..percpu..hot.*)) \
+ __per_cpu_hot_end = .; \
+ . = ALIGN(cacheline); \
*(.data..percpu..read_mostly) \
. = ALIGN(cacheline); \
*(.data..percpu) \
@@ -1073,52 +1082,17 @@ defined(CONFIG_AUTOFDO_CLANG) || defined(CONFIG_PROPELLER_CLANG)
__per_cpu_end = .;
/**
- * PERCPU_VADDR - define output section for percpu area
+ * PERCPU_SECTION - define output section for percpu area
* @cacheline: cacheline size
- * @vaddr: explicit base address (optional)
- * @phdr: destination PHDR (optional)
*
* Macro which expands to output section for percpu area.
*
* @cacheline is used to align subsections to avoid false cacheline
* sharing between subsections for different purposes.
- *
- * If @vaddr is not blank, it specifies explicit base address and all
- * percpu symbols will be offset from the given address. If blank,
- * @vaddr always equals @laddr + LOAD_OFFSET.
- *
- * @phdr defines the output PHDR to use if not blank. Be warned that
- * output PHDR is sticky. If @phdr is specified, the next output
- * section in the linker script will go there too. @phdr should have
- * a leading colon.
- *
- * Note that this macros defines __per_cpu_load as an absolute symbol.
- * If there is no need to put the percpu section at a predetermined
- * address, use PERCPU_SECTION.
- */
-#define PERCPU_VADDR(cacheline, vaddr, phdr) \
- __per_cpu_load = .; \
- .data..percpu vaddr : AT(__per_cpu_load - LOAD_OFFSET) { \
- PERCPU_INPUT(cacheline) \
- } phdr \
- . = __per_cpu_load + SIZEOF(.data..percpu);
-
-/**
- * PERCPU_SECTION - define output section for percpu area, simple version
- * @cacheline: cacheline size
- *
- * Align to PAGE_SIZE and outputs output section for percpu area. This
- * macro doesn't manipulate @vaddr or @phdr and __per_cpu_load and
- * __per_cpu_start will be identical.
- *
- * This macro is equivalent to ALIGN(PAGE_SIZE); PERCPU_VADDR(@cacheline,,)
- * except that __per_cpu_load is defined as a relative symbol against
- * .data..percpu which is required for relocatable x86_32 configuration.
*/
#define PERCPU_SECTION(cacheline) \
. = ALIGN(PAGE_SIZE); \
.data..percpu : AT(ADDR(.data..percpu) - LOAD_OFFSET) { \
- __per_cpu_load = .; \
PERCPU_INPUT(cacheline) \
}
@@ -1147,6 +1121,7 @@ defined(CONFIG_AUTOFDO_CLANG) || defined(CONFIG_PROPELLER_CLANG)
INIT_TASK_DATA(inittask) \
NOSAVE_DATA \
PAGE_ALIGNED_DATA(pagealigned) \
+ CACHE_HOT_DATA(cacheline) \
CACHELINE_ALIGNED_DATA(cacheline) \
READ_MOSTLY_DATA(cacheline) \
DATA_DATA \