summaryrefslogtreecommitdiff
path: root/include/linux
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2025-07-31 03:14:01 +0300
committerLinus Torvalds <torvalds@linux-foundation.org>2025-07-31 03:14:01 +0300
commit63eb28bb1402891b1ad2be02a530f29a9dd7f1cd (patch)
tree982d9fa36c6e4f3e437c6fca10c151d165f7ddde /include/linux
parent7d767a9528f6d203bca5e83faf1b8f2f6af3fc07 (diff)
parent196d9e72c4b0bd68b74a4ec7f52d248f37d0f030 (diff)
downloadlinux-63eb28bb1402891b1ad2be02a530f29a9dd7f1cd.tar.xz
Merge tag 'for-linus' of git://git.kernel.org/pub/scm/virt/kvm/kvm
Pull kvm updates from Paolo Bonzini: "ARM: - Host driver for GICv5, the next generation interrupt controller for arm64, including support for interrupt routing, MSIs, interrupt translation and wired interrupts - Use FEAT_GCIE_LEGACY on GICv5 systems to virtualize GICv3 VMs on GICv5 hardware, leveraging the legacy VGIC interface - Userspace control of the 'nASSGIcap' GICv3 feature, allowing userspace to disable support for SGIs w/o an active state on hardware that previously advertised it unconditionally - Map supporting endpoints with cacheable memory attributes on systems with FEAT_S2FWB and DIC where KVM no longer needs to perform cache maintenance on the address range - Nested support for FEAT_RAS and FEAT_DoubleFault2, allowing the guest hypervisor to inject external aborts into an L2 VM and take traps of masked external aborts to the hypervisor - Convert more system register sanitization to the config-driven implementation - Fixes to the visibility of EL2 registers, namely making VGICv3 system registers accessible through the VGIC device instead of the ONE_REG vCPU ioctls - Various cleanups and minor fixes LoongArch: - Add stat information for in-kernel irqchip - Add tracepoints for CPUCFG and CSR emulation exits - Enhance in-kernel irqchip emulation - Various cleanups RISC-V: - Enable ring-based dirty memory tracking - Improve perf kvm stat to report interrupt events - Delegate illegal instruction trap to VS-mode - MMU improvements related to upcoming nested virtualization s390x - Fixes x86: - Add CONFIG_KVM_IOAPIC for x86 to allow disabling support for I/O APIC, PIC, and PIT emulation at compile time - Share device posted IRQ code between SVM and VMX and harden it against bugs and runtime errors - Use vcpu_idx, not vcpu_id, for GA log tag/metadata, to make lookups O(1) instead of O(n) - For MMIO stale data mitigation, track whether or not a vCPU has access to (host) MMIO based on whether the page tables have MMIO pfns mapped; using VFIO is prone to false negatives - Rework the MSR interception code so that the SVM and VMX APIs are more or less identical - Recalculate all MSR intercepts from scratch on MSR filter changes, instead of maintaining shadow bitmaps - Advertise support for LKGS (Load Kernel GS base), a new instruction that's loosely related to FRED, but is supported and enumerated independently - Fix a user-triggerable WARN that syzkaller found by setting the vCPU in INIT_RECEIVED state (aka wait-for-SIPI), and then putting the vCPU into VMX Root Mode (post-VMXON). Trying to detect every possible path leading to architecturally forbidden states is hard and even risks breaking userspace (if it goes from valid to valid state but passes through invalid states), so just wait until KVM_RUN to detect that the vCPU state isn't allowed - Add KVM_X86_DISABLE_EXITS_APERFMPERF to allow disabling interception of APERF/MPERF reads, so that a "properly" configured VM can access APERF/MPERF. This has many caveats (APERF/MPERF cannot be zeroed on vCPU creation or saved/restored on suspend and resume, or preserved over thread migration let alone VM migration) but can be useful whenever you're interested in letting Linux guests see the effective physical CPU frequency in /proc/cpuinfo - Reject KVM_SET_TSC_KHZ for vm file descriptors if vCPUs have been created, as there's no known use case for changing the default frequency for other VM types and it goes counter to the very reason why the ioctl was added to the vm file descriptor. And also, there would be no way to make it work for confidential VMs with a "secure" TSC, so kill two birds with one stone - Dynamically allocation the shadow MMU's hashed page list, and defer allocating the hashed list until it's actually needed (the TDP MMU doesn't use the list) - Extract many of KVM's helpers for accessing architectural local APIC state to common x86 so that they can be shared by guest-side code for Secure AVIC - Various cleanups and fixes x86 (Intel): - Preserve the host's DEBUGCTL.FREEZE_IN_SMM when running the guest. Failure to honor FREEZE_IN_SMM can leak host state into guests - Explicitly check vmcs12.GUEST_DEBUGCTL on nested VM-Enter to prevent L1 from running L2 with features that KVM doesn't support, e.g. BTF x86 (AMD): - WARN and reject loading kvm-amd.ko instead of panicking the kernel if the nested SVM MSRPM offsets tracker can't handle an MSR (which is pretty much a static condition and therefore should never happen, but still) - Fix a variety of flaws and bugs in the AVIC device posted IRQ code - Inhibit AVIC if a vCPU's ID is too big (relative to what hardware supports) instead of rejecting vCPU creation - Extend enable_ipiv module param support to SVM, by simply leaving IsRunning clear in the vCPU's physical ID table entry - Disable IPI virtualization, via enable_ipiv, if the CPU is affected by erratum #1235, to allow (safely) enabling AVIC on such CPUs - Request GA Log interrupts if and only if the target vCPU is blocking, i.e. only if KVM needs a notification in order to wake the vCPU - Intercept SPEC_CTRL on AMD if the MSR shouldn't exist according to the vCPU's CPUID model - Accept any SNP policy that is accepted by the firmware with respect to SMT and single-socket restrictions. An incompatible policy doesn't put the kernel at risk in any way, so there's no reason for KVM to care - Drop a superfluous WBINVD (on all CPUs!) when destroying a VM and use WBNOINVD instead of WBINVD when possible for SEV cache maintenance - When reclaiming memory from an SEV guest, only do cache flushes on CPUs that have ever run a vCPU for the guest, i.e. don't flush the caches for CPUs that can't possibly have cache lines with dirty, encrypted data Generic: - Rework irqbypass to track/match producers and consumers via an xarray instead of a linked list. Using a linked list leads to O(n^2) insertion times, which is hugely problematic for use cases that create large numbers of VMs. Such use cases typically don't actually use irqbypass, but eliminating the pointless registration is a future problem to solve as it likely requires new uAPI - Track irqbypass's "token" as "struct eventfd_ctx *" instead of a "void *", to avoid making a simple concept unnecessarily difficult to understand - Decouple device posted IRQs from VFIO device assignment, as binding a VM to a VFIO group is not a requirement for enabling device posted IRQs - Clean up and document/comment the irqfd assignment code - Disallow binding multiple irqfds to an eventfd with a priority waiter, i.e. ensure an eventfd is bound to at most one irqfd through the entire host, and add a selftest to verify eventfd:irqfd bindings are globally unique - Add a tracepoint for KVM_SET_MEMORY_ATTRIBUTES to help debug issues related to private <=> shared memory conversions - Drop guest_memfd's .getattr() implementation as the VFS layer will call generic_fillattr() if inode_operations.getattr is NULL - Fix issues with dirty ring harvesting where KVM doesn't bound the processing of entries in any way, which allows userspace to keep KVM in a tight loop indefinitely - Kill off kvm_arch_{start,end}_assignment() and x86's associated tracking, now that KVM no longer uses assigned_device_count as a heuristic for either irqbypass usage or MDS mitigation Selftests: - Fix a comment typo - Verify KVM is loaded when getting any KVM module param so that attempting to run a selftest without kvm.ko loaded results in a SKIP message about KVM not being loaded/enabled (versus some random parameter not existing) - Skip tests that hit EACCES when attempting to access a file, and print a "Root required?" help message. In most cases, the test just needs to be run with elevated permissions" * tag 'for-linus' of git://git.kernel.org/pub/scm/virt/kvm/kvm: (340 commits) Documentation: KVM: Use unordered list for pre-init VGIC registers RISC-V: KVM: Avoid re-acquiring memslot in kvm_riscv_gstage_map() RISC-V: KVM: Use find_vma_intersection() to search for intersecting VMAs RISC-V: perf/kvm: Add reporting of interrupt events RISC-V: KVM: Enable ring-based dirty memory tracking RISC-V: KVM: Fix inclusion of Smnpm in the guest ISA bitmap RISC-V: KVM: Delegate illegal instruction fault to VS mode RISC-V: KVM: Pass VMID as parameter to kvm_riscv_hfence_xyz() APIs RISC-V: KVM: Factor-out g-stage page table management RISC-V: KVM: Add vmid field to struct kvm_riscv_hfence RISC-V: KVM: Introduce struct kvm_gstage_mapping RISC-V: KVM: Factor-out MMU related declarations into separate headers RISC-V: KVM: Use ncsr_xyz() in kvm_riscv_vcpu_trap_redirect() RISC-V: KVM: Implement kvm_arch_flush_remote_tlbs_range() RISC-V: KVM: Don't flush TLB when PTE is unchanged RISC-V: KVM: Replace KVM_REQ_HFENCE_GVMA_VMID_ALL with KVM_REQ_TLB_FLUSH RISC-V: KVM: Rename and move kvm_riscv_local_tlb_sanitize() RISC-V: KVM: Drop the return value of kvm_riscv_vcpu_aia_init() RISC-V: KVM: Check kvm_riscv_vcpu_alloc_vector_context() return value KVM: arm64: selftests: Add FEAT_RAS EL2 registers to get-reg-list ...
Diffstat (limited to 'include/linux')
-rw-r--r--include/linux/amd-iommu.h25
-rw-r--r--include/linux/irq-entry-common.h16
-rw-r--r--include/linux/irqbypass.h46
-rw-r--r--include/linux/irqchip/arm-gic-v4.h2
-rw-r--r--include/linux/irqchip/arm-gic-v5.h394
-rw-r--r--include/linux/irqchip/arm-vgic-info.h4
-rw-r--r--include/linux/irqdomain.h3
-rw-r--r--include/linux/kvm_dirty_ring.h18
-rw-r--r--include/linux/kvm_host.h36
-rw-r--r--include/linux/kvm_irqfd.h5
-rw-r--r--include/linux/msi.h1
-rw-r--r--include/linux/of_irq.h5
-rw-r--r--include/linux/wait.h2
13 files changed, 472 insertions, 85 deletions
diff --git a/include/linux/amd-iommu.h b/include/linux/amd-iommu.h
index 062fbd4c9b77..8cced632ecd0 100644
--- a/include/linux/amd-iommu.h
+++ b/include/linux/amd-iommu.h
@@ -12,20 +12,6 @@
struct amd_iommu;
-/*
- * This is mainly used to communicate information back-and-forth
- * between SVM and IOMMU for setting up and tearing down posted
- * interrupt
- */
-struct amd_iommu_pi_data {
- u32 ga_tag;
- u32 prev_ga_tag;
- u64 base;
- bool is_guest_mode;
- struct vcpu_data *vcpu_data;
- void *ir_data;
-};
-
#ifdef CONFIG_AMD_IOMMU
struct task_struct;
@@ -44,10 +30,8 @@ static inline void amd_iommu_detect(void) { }
/* IOMMU AVIC Function */
extern int amd_iommu_register_ga_log_notifier(int (*notifier)(u32));
-extern int
-amd_iommu_update_ga(int cpu, bool is_run, void *data);
-
-extern int amd_iommu_activate_guest_mode(void *data);
+extern int amd_iommu_update_ga(void *data, int cpu, bool ga_log_intr);
+extern int amd_iommu_activate_guest_mode(void *data, int cpu, bool ga_log_intr);
extern int amd_iommu_deactivate_guest_mode(void *data);
#else /* defined(CONFIG_AMD_IOMMU) && defined(CONFIG_IRQ_REMAP) */
@@ -58,13 +42,12 @@ amd_iommu_register_ga_log_notifier(int (*notifier)(u32))
return 0;
}
-static inline int
-amd_iommu_update_ga(int cpu, bool is_run, void *data)
+static inline int amd_iommu_update_ga(void *data, int cpu, bool ga_log_intr)
{
return 0;
}
-static inline int amd_iommu_activate_guest_mode(void *data)
+static inline int amd_iommu_activate_guest_mode(void *data, int cpu, bool ga_log_intr)
{
return 0;
}
diff --git a/include/linux/irq-entry-common.h b/include/linux/irq-entry-common.h
index 8af374331900..4fb2f93d82ed 100644
--- a/include/linux/irq-entry-common.h
+++ b/include/linux/irq-entry-common.h
@@ -49,6 +49,22 @@ static __always_inline void arch_enter_from_user_mode(struct pt_regs *regs) {}
#endif
/**
+ * arch_in_rcu_eqs - Architecture specific check for RCU extended quiescent
+ * states.
+ *
+ * Returns: true if the CPU is potentially in an RCU EQS, false otherwise.
+ *
+ * Architectures only need to define this if threads other than the idle thread
+ * may have an interruptible EQS. This does not need to handle idle threads. It
+ * is safe to over-estimate at the cost of redundant RCU management work.
+ *
+ * Invoked from irqentry_enter()
+ */
+#ifndef arch_in_rcu_eqs
+static __always_inline bool arch_in_rcu_eqs(void) { return false; }
+#endif
+
+/**
* enter_from_user_mode - Establish state when coming from user mode
*
* Syscall/interrupt entry disables interrupts, but user mode is traced as
diff --git a/include/linux/irqbypass.h b/include/linux/irqbypass.h
index 9bdb2a781841..ede1fa938152 100644
--- a/include/linux/irqbypass.h
+++ b/include/linux/irqbypass.h
@@ -10,6 +10,7 @@
#include <linux/list.h>
+struct eventfd_ctx;
struct irq_bypass_consumer;
/*
@@ -18,20 +19,22 @@ struct irq_bypass_consumer;
* The IRQ bypass manager is a simple set of lists and callbacks that allows
* IRQ producers (ex. physical interrupt sources) to be matched to IRQ
* consumers (ex. virtualization hardware that allows IRQ bypass or offload)
- * via a shared token (ex. eventfd_ctx). Producers and consumers register
- * independently. When a token match is found, the optional @stop callback
- * will be called for each participant. The pair will then be connected via
- * the @add_* callbacks, and finally the optional @start callback will allow
- * any final coordination. When either participant is unregistered, the
- * process is repeated using the @del_* callbacks in place of the @add_*
- * callbacks. Match tokens must be unique per producer/consumer, 1:N pairings
- * are not supported.
+ * via a shared eventfd_ctx. Producers and consumers register independently.
+ * When a producer and consumer are paired, i.e. an eventfd match is found, the
+ * optional @stop callback will be called for each participant. The pair will
+ * then be connected via the @add_* callbacks, and finally the optional @start
+ * callback will allow any final coordination. When either participant is
+ * unregistered, the process is repeated using the @del_* callbacks in place of
+ * the @add_* callbacks. eventfds must be unique per producer/consumer, 1:N
+ * pairings are not supported.
*/
+struct irq_bypass_consumer;
+
/**
* struct irq_bypass_producer - IRQ bypass producer definition
- * @node: IRQ bypass manager private list management
- * @token: opaque token to match between producer and consumer (non-NULL)
+ * @eventfd: eventfd context used to match producers and consumers
+ * @consumer: The connected consumer (NULL if no connection)
* @irq: Linux IRQ number for the producer device
* @add_consumer: Connect the IRQ producer to an IRQ consumer (optional)
* @del_consumer: Disconnect the IRQ producer from an IRQ consumer (optional)
@@ -43,8 +46,8 @@ struct irq_bypass_consumer;
* for a physical device assigned to a VM.
*/
struct irq_bypass_producer {
- struct list_head node;
- void *token;
+ struct eventfd_ctx *eventfd;
+ struct irq_bypass_consumer *consumer;
int irq;
int (*add_consumer)(struct irq_bypass_producer *,
struct irq_bypass_consumer *);
@@ -56,8 +59,8 @@ struct irq_bypass_producer {
/**
* struct irq_bypass_consumer - IRQ bypass consumer definition
- * @node: IRQ bypass manager private list management
- * @token: opaque token to match between producer and consumer (non-NULL)
+ * @eventfd: eventfd context used to match producers and consumers
+ * @producer: The connected producer (NULL if no connection)
* @add_producer: Connect the IRQ consumer to an IRQ producer
* @del_producer: Disconnect the IRQ consumer from an IRQ producer
* @stop: Perform any quiesce operations necessary prior to add/del (optional)
@@ -69,8 +72,9 @@ struct irq_bypass_producer {
* portions of the interrupt handling to the VM.
*/
struct irq_bypass_consumer {
- struct list_head node;
- void *token;
+ struct eventfd_ctx *eventfd;
+ struct irq_bypass_producer *producer;
+
int (*add_producer)(struct irq_bypass_consumer *,
struct irq_bypass_producer *);
void (*del_producer)(struct irq_bypass_consumer *,
@@ -79,9 +83,11 @@ struct irq_bypass_consumer {
void (*start)(struct irq_bypass_consumer *);
};
-int irq_bypass_register_producer(struct irq_bypass_producer *);
-void irq_bypass_unregister_producer(struct irq_bypass_producer *);
-int irq_bypass_register_consumer(struct irq_bypass_consumer *);
-void irq_bypass_unregister_consumer(struct irq_bypass_consumer *);
+int irq_bypass_register_producer(struct irq_bypass_producer *producer,
+ struct eventfd_ctx *eventfd, int irq);
+void irq_bypass_unregister_producer(struct irq_bypass_producer *producer);
+int irq_bypass_register_consumer(struct irq_bypass_consumer *consumer,
+ struct eventfd_ctx *eventfd);
+void irq_bypass_unregister_consumer(struct irq_bypass_consumer *consumer);
#endif /* IRQBYPASS_H */
diff --git a/include/linux/irqchip/arm-gic-v4.h b/include/linux/irqchip/arm-gic-v4.h
index 7f1f11a5e4e4..0b0887099fd7 100644
--- a/include/linux/irqchip/arm-gic-v4.h
+++ b/include/linux/irqchip/arm-gic-v4.h
@@ -146,7 +146,7 @@ int its_commit_vpe(struct its_vpe *vpe);
int its_invall_vpe(struct its_vpe *vpe);
int its_map_vlpi(int irq, struct its_vlpi_map *map);
int its_get_vlpi(int irq, struct its_vlpi_map *map);
-int its_unmap_vlpi(int irq);
+void its_unmap_vlpi(int irq);
int its_prop_update_vlpi(int irq, u8 config, bool inv);
int its_prop_update_vsgi(int irq, u8 priority, bool group);
diff --git a/include/linux/irqchip/arm-gic-v5.h b/include/linux/irqchip/arm-gic-v5.h
new file mode 100644
index 000000000000..68ddcdb1cec5
--- /dev/null
+++ b/include/linux/irqchip/arm-gic-v5.h
@@ -0,0 +1,394 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+/*
+ * Copyright (C) 2025 ARM Limited, All Rights Reserved.
+ */
+#ifndef __LINUX_IRQCHIP_ARM_GIC_V5_H
+#define __LINUX_IRQCHIP_ARM_GIC_V5_H
+
+#include <linux/iopoll.h>
+
+#include <asm/cacheflush.h>
+#include <asm/smp.h>
+#include <asm/sysreg.h>
+
+#define GICV5_IPIS_PER_CPU MAX_IPI
+
+/*
+ * INTID handling
+ */
+#define GICV5_HWIRQ_ID GENMASK(23, 0)
+#define GICV5_HWIRQ_TYPE GENMASK(31, 29)
+#define GICV5_HWIRQ_INTID GENMASK_ULL(31, 0)
+
+#define GICV5_HWIRQ_TYPE_PPI UL(0x1)
+#define GICV5_HWIRQ_TYPE_LPI UL(0x2)
+#define GICV5_HWIRQ_TYPE_SPI UL(0x3)
+
+/*
+ * Tables attributes
+ */
+#define GICV5_NO_READ_ALLOC 0b0
+#define GICV5_READ_ALLOC 0b1
+#define GICV5_NO_WRITE_ALLOC 0b0
+#define GICV5_WRITE_ALLOC 0b1
+
+#define GICV5_NON_CACHE 0b00
+#define GICV5_WB_CACHE 0b01
+#define GICV5_WT_CACHE 0b10
+
+#define GICV5_NON_SHARE 0b00
+#define GICV5_OUTER_SHARE 0b10
+#define GICV5_INNER_SHARE 0b11
+
+/*
+ * IRS registers and tables structures
+ */
+#define GICV5_IRS_IDR1 0x0004
+#define GICV5_IRS_IDR2 0x0008
+#define GICV5_IRS_IDR5 0x0014
+#define GICV5_IRS_IDR6 0x0018
+#define GICV5_IRS_IDR7 0x001c
+#define GICV5_IRS_CR0 0x0080
+#define GICV5_IRS_CR1 0x0084
+#define GICV5_IRS_SYNCR 0x00c0
+#define GICV5_IRS_SYNC_STATUSR 0x00c4
+#define GICV5_IRS_SPI_SELR 0x0108
+#define GICV5_IRS_SPI_CFGR 0x0114
+#define GICV5_IRS_SPI_STATUSR 0x0118
+#define GICV5_IRS_PE_SELR 0x0140
+#define GICV5_IRS_PE_STATUSR 0x0144
+#define GICV5_IRS_PE_CR0 0x0148
+#define GICV5_IRS_IST_BASER 0x0180
+#define GICV5_IRS_IST_CFGR 0x0190
+#define GICV5_IRS_IST_STATUSR 0x0194
+#define GICV5_IRS_MAP_L2_ISTR 0x01c0
+
+#define GICV5_IRS_IDR1_PRIORITY_BITS GENMASK(22, 20)
+#define GICV5_IRS_IDR1_IAFFID_BITS GENMASK(19, 16)
+
+#define GICV5_IRS_IDR1_PRIORITY_BITS_1BITS 0b000
+#define GICV5_IRS_IDR1_PRIORITY_BITS_2BITS 0b001
+#define GICV5_IRS_IDR1_PRIORITY_BITS_3BITS 0b010
+#define GICV5_IRS_IDR1_PRIORITY_BITS_4BITS 0b011
+#define GICV5_IRS_IDR1_PRIORITY_BITS_5BITS 0b100
+
+#define GICV5_IRS_IDR2_ISTMD_SZ GENMASK(19, 15)
+#define GICV5_IRS_IDR2_ISTMD BIT(14)
+#define GICV5_IRS_IDR2_IST_L2SZ GENMASK(13, 11)
+#define GICV5_IRS_IDR2_IST_LEVELS BIT(10)
+#define GICV5_IRS_IDR2_MIN_LPI_ID_BITS GENMASK(9, 6)
+#define GICV5_IRS_IDR2_LPI BIT(5)
+#define GICV5_IRS_IDR2_ID_BITS GENMASK(4, 0)
+
+#define GICV5_IRS_IDR5_SPI_RANGE GENMASK(24, 0)
+#define GICV5_IRS_IDR6_SPI_IRS_RANGE GENMASK(24, 0)
+#define GICV5_IRS_IDR7_SPI_BASE GENMASK(23, 0)
+
+#define GICV5_IRS_IST_L2SZ_SUPPORT_4KB(r) FIELD_GET(BIT(11), (r))
+#define GICV5_IRS_IST_L2SZ_SUPPORT_16KB(r) FIELD_GET(BIT(12), (r))
+#define GICV5_IRS_IST_L2SZ_SUPPORT_64KB(r) FIELD_GET(BIT(13), (r))
+
+#define GICV5_IRS_CR0_IDLE BIT(1)
+#define GICV5_IRS_CR0_IRSEN BIT(0)
+
+#define GICV5_IRS_CR1_VPED_WA BIT(15)
+#define GICV5_IRS_CR1_VPED_RA BIT(14)
+#define GICV5_IRS_CR1_VMD_WA BIT(13)
+#define GICV5_IRS_CR1_VMD_RA BIT(12)
+#define GICV5_IRS_CR1_VPET_WA BIT(11)
+#define GICV5_IRS_CR1_VPET_RA BIT(10)
+#define GICV5_IRS_CR1_VMT_WA BIT(9)
+#define GICV5_IRS_CR1_VMT_RA BIT(8)
+#define GICV5_IRS_CR1_IST_WA BIT(7)
+#define GICV5_IRS_CR1_IST_RA BIT(6)
+#define GICV5_IRS_CR1_IC GENMASK(5, 4)
+#define GICV5_IRS_CR1_OC GENMASK(3, 2)
+#define GICV5_IRS_CR1_SH GENMASK(1, 0)
+
+#define GICV5_IRS_SYNCR_SYNC BIT(31)
+
+#define GICV5_IRS_SYNC_STATUSR_IDLE BIT(0)
+
+#define GICV5_IRS_SPI_STATUSR_V BIT(1)
+#define GICV5_IRS_SPI_STATUSR_IDLE BIT(0)
+
+#define GICV5_IRS_SPI_SELR_ID GENMASK(23, 0)
+
+#define GICV5_IRS_SPI_CFGR_TM BIT(0)
+
+#define GICV5_IRS_PE_SELR_IAFFID GENMASK(15, 0)
+
+#define GICV5_IRS_PE_STATUSR_V BIT(1)
+#define GICV5_IRS_PE_STATUSR_IDLE BIT(0)
+
+#define GICV5_IRS_PE_CR0_DPS BIT(0)
+
+#define GICV5_IRS_IST_STATUSR_IDLE BIT(0)
+
+#define GICV5_IRS_IST_CFGR_STRUCTURE BIT(16)
+#define GICV5_IRS_IST_CFGR_ISTSZ GENMASK(8, 7)
+#define GICV5_IRS_IST_CFGR_L2SZ GENMASK(6, 5)
+#define GICV5_IRS_IST_CFGR_LPI_ID_BITS GENMASK(4, 0)
+
+#define GICV5_IRS_IST_CFGR_STRUCTURE_LINEAR 0b0
+#define GICV5_IRS_IST_CFGR_STRUCTURE_TWO_LEVEL 0b1
+
+#define GICV5_IRS_IST_CFGR_ISTSZ_4 0b00
+#define GICV5_IRS_IST_CFGR_ISTSZ_8 0b01
+#define GICV5_IRS_IST_CFGR_ISTSZ_16 0b10
+
+#define GICV5_IRS_IST_CFGR_L2SZ_4K 0b00
+#define GICV5_IRS_IST_CFGR_L2SZ_16K 0b01
+#define GICV5_IRS_IST_CFGR_L2SZ_64K 0b10
+
+#define GICV5_IRS_IST_BASER_ADDR_MASK GENMASK_ULL(55, 6)
+#define GICV5_IRS_IST_BASER_VALID BIT_ULL(0)
+
+#define GICV5_IRS_MAP_L2_ISTR_ID GENMASK(23, 0)
+
+#define GICV5_ISTL1E_VALID BIT_ULL(0)
+
+#define GICV5_ISTL1E_L2_ADDR_MASK GENMASK_ULL(55, 12)
+
+/*
+ * ITS registers and tables structures
+ */
+#define GICV5_ITS_IDR1 0x0004
+#define GICV5_ITS_IDR2 0x0008
+#define GICV5_ITS_CR0 0x0080
+#define GICV5_ITS_CR1 0x0084
+#define GICV5_ITS_DT_BASER 0x00c0
+#define GICV5_ITS_DT_CFGR 0x00d0
+#define GICV5_ITS_DIDR 0x0100
+#define GICV5_ITS_EIDR 0x0108
+#define GICV5_ITS_INV_EVENTR 0x010c
+#define GICV5_ITS_INV_DEVICER 0x0110
+#define GICV5_ITS_STATUSR 0x0120
+#define GICV5_ITS_SYNCR 0x0140
+#define GICV5_ITS_SYNC_STATUSR 0x0148
+
+#define GICV5_ITS_IDR1_L2SZ GENMASK(10, 8)
+#define GICV5_ITS_IDR1_ITT_LEVELS BIT(7)
+#define GICV5_ITS_IDR1_DT_LEVELS BIT(6)
+#define GICV5_ITS_IDR1_DEVICEID_BITS GENMASK(5, 0)
+
+#define GICV5_ITS_IDR1_L2SZ_SUPPORT_4KB(r) FIELD_GET(BIT(8), (r))
+#define GICV5_ITS_IDR1_L2SZ_SUPPORT_16KB(r) FIELD_GET(BIT(9), (r))
+#define GICV5_ITS_IDR1_L2SZ_SUPPORT_64KB(r) FIELD_GET(BIT(10), (r))
+
+#define GICV5_ITS_IDR2_XDMN_EVENTs GENMASK(6, 5)
+#define GICV5_ITS_IDR2_EVENTID_BITS GENMASK(4, 0)
+
+#define GICV5_ITS_CR0_IDLE BIT(1)
+#define GICV5_ITS_CR0_ITSEN BIT(0)
+
+#define GICV5_ITS_CR1_ITT_RA BIT(7)
+#define GICV5_ITS_CR1_DT_RA BIT(6)
+#define GICV5_ITS_CR1_IC GENMASK(5, 4)
+#define GICV5_ITS_CR1_OC GENMASK(3, 2)
+#define GICV5_ITS_CR1_SH GENMASK(1, 0)
+
+#define GICV5_ITS_DT_CFGR_STRUCTURE BIT(16)
+#define GICV5_ITS_DT_CFGR_L2SZ GENMASK(7, 6)
+#define GICV5_ITS_DT_CFGR_DEVICEID_BITS GENMASK(5, 0)
+
+#define GICV5_ITS_DT_BASER_ADDR_MASK GENMASK_ULL(55, 3)
+
+#define GICV5_ITS_INV_DEVICER_I BIT(31)
+#define GICV5_ITS_INV_DEVICER_EVENTID_BITS GENMASK(5, 1)
+#define GICV5_ITS_INV_DEVICER_L1 BIT(0)
+
+#define GICV5_ITS_DIDR_DEVICEID GENMASK_ULL(31, 0)
+
+#define GICV5_ITS_EIDR_EVENTID GENMASK(15, 0)
+
+#define GICV5_ITS_INV_EVENTR_I BIT(31)
+#define GICV5_ITS_INV_EVENTR_ITT_L2SZ GENMASK(2, 1)
+#define GICV5_ITS_INV_EVENTR_L1 BIT(0)
+
+#define GICV5_ITS_STATUSR_IDLE BIT(0)
+
+#define GICV5_ITS_SYNCR_SYNC BIT_ULL(63)
+#define GICV5_ITS_SYNCR_SYNCALL BIT_ULL(32)
+#define GICV5_ITS_SYNCR_DEVICEID GENMASK_ULL(31, 0)
+
+#define GICV5_ITS_SYNC_STATUSR_IDLE BIT(0)
+
+#define GICV5_DTL1E_VALID BIT_ULL(0)
+/* Note that there is no shift for the address by design */
+#define GICV5_DTL1E_L2_ADDR_MASK GENMASK_ULL(55, 3)
+#define GICV5_DTL1E_SPAN GENMASK_ULL(63, 60)
+
+#define GICV5_DTL2E_VALID BIT_ULL(0)
+#define GICV5_DTL2E_ITT_L2SZ GENMASK_ULL(2, 1)
+/* Note that there is no shift for the address by design */
+#define GICV5_DTL2E_ITT_ADDR_MASK GENMASK_ULL(55, 3)
+#define GICV5_DTL2E_ITT_DSWE BIT_ULL(57)
+#define GICV5_DTL2E_ITT_STRUCTURE BIT_ULL(58)
+#define GICV5_DTL2E_EVENT_ID_BITS GENMASK_ULL(63, 59)
+
+#define GICV5_ITTL1E_VALID BIT_ULL(0)
+/* Note that there is no shift for the address by design */
+#define GICV5_ITTL1E_L2_ADDR_MASK GENMASK_ULL(55, 3)
+#define GICV5_ITTL1E_SPAN GENMASK_ULL(63, 60)
+
+#define GICV5_ITTL2E_LPI_ID GENMASK_ULL(23, 0)
+#define GICV5_ITTL2E_DAC GENMASK_ULL(29, 28)
+#define GICV5_ITTL2E_VIRTUAL BIT_ULL(30)
+#define GICV5_ITTL2E_VALID BIT_ULL(31)
+#define GICV5_ITTL2E_VM_ID GENMASK_ULL(47, 32)
+
+#define GICV5_ITS_DT_ITT_CFGR_L2SZ_4k 0b00
+#define GICV5_ITS_DT_ITT_CFGR_L2SZ_16k 0b01
+#define GICV5_ITS_DT_ITT_CFGR_L2SZ_64k 0b10
+
+#define GICV5_ITS_DT_ITT_CFGR_STRUCTURE_LINEAR 0
+#define GICV5_ITS_DT_ITT_CFGR_STRUCTURE_TWO_LEVEL 1
+
+#define GICV5_ITS_HWIRQ_DEVICE_ID GENMASK_ULL(31, 0)
+#define GICV5_ITS_HWIRQ_EVENT_ID GENMASK_ULL(63, 32)
+
+/*
+ * IWB registers
+ */
+#define GICV5_IWB_IDR0 0x0000
+#define GICV5_IWB_CR0 0x0080
+#define GICV5_IWB_WENABLE_STATUSR 0x00c0
+#define GICV5_IWB_WENABLER 0x2000
+#define GICV5_IWB_WTMR 0x4000
+
+#define GICV5_IWB_IDR0_INT_DOMS GENMASK(14, 11)
+#define GICV5_IWB_IDR0_IW_RANGE GENMASK(10, 0)
+
+#define GICV5_IWB_CR0_IDLE BIT(1)
+#define GICV5_IWB_CR0_IWBEN BIT(0)
+
+#define GICV5_IWB_WENABLE_STATUSR_IDLE BIT(0)
+
+/*
+ * Global Data structures and functions
+ */
+struct gicv5_chip_data {
+ struct fwnode_handle *fwnode;
+ struct irq_domain *ppi_domain;
+ struct irq_domain *spi_domain;
+ struct irq_domain *lpi_domain;
+ struct irq_domain *ipi_domain;
+ u32 global_spi_count;
+ u8 cpuif_pri_bits;
+ u8 cpuif_id_bits;
+ u8 irs_pri_bits;
+ struct {
+ __le64 *l1ist_addr;
+ u32 l2_size;
+ u8 l2_bits;
+ bool l2;
+ } ist;
+};
+
+extern struct gicv5_chip_data gicv5_global_data __read_mostly;
+
+struct gicv5_irs_chip_data {
+ struct list_head entry;
+ struct fwnode_handle *fwnode;
+ void __iomem *irs_base;
+ u32 flags;
+ u32 spi_min;
+ u32 spi_range;
+ raw_spinlock_t spi_config_lock;
+};
+
+static inline int gicv5_wait_for_op_s_atomic(void __iomem *addr, u32 offset,
+ const char *reg_s, u32 mask,
+ u32 *val)
+{
+ void __iomem *reg = addr + offset;
+ u32 tmp;
+ int ret;
+
+ ret = readl_poll_timeout_atomic(reg, tmp, tmp & mask, 1, 10 * USEC_PER_MSEC);
+ if (unlikely(ret == -ETIMEDOUT)) {
+ pr_err_ratelimited("%s timeout...\n", reg_s);
+ return ret;
+ }
+
+ if (val)
+ *val = tmp;
+
+ return 0;
+}
+
+static inline int gicv5_wait_for_op_s(void __iomem *addr, u32 offset,
+ const char *reg_s, u32 mask)
+{
+ void __iomem *reg = addr + offset;
+ u32 val;
+ int ret;
+
+ ret = readl_poll_timeout(reg, val, val & mask, 1, 10 * USEC_PER_MSEC);
+ if (unlikely(ret == -ETIMEDOUT)) {
+ pr_err_ratelimited("%s timeout...\n", reg_s);
+ return ret;
+ }
+
+ return 0;
+}
+
+#define gicv5_wait_for_op_atomic(base, reg, mask, val) \
+ gicv5_wait_for_op_s_atomic(base, reg, #reg, mask, val)
+
+#define gicv5_wait_for_op(base, reg, mask) \
+ gicv5_wait_for_op_s(base, reg, #reg, mask)
+
+void __init gicv5_init_lpi_domain(void);
+void __init gicv5_free_lpi_domain(void);
+
+int gicv5_irs_of_probe(struct device_node *parent);
+void gicv5_irs_remove(void);
+int gicv5_irs_enable(void);
+void gicv5_irs_its_probe(void);
+int gicv5_irs_register_cpu(int cpuid);
+int gicv5_irs_cpu_to_iaffid(int cpu_id, u16 *iaffid);
+struct gicv5_irs_chip_data *gicv5_irs_lookup_by_spi_id(u32 spi_id);
+int gicv5_spi_irq_set_type(struct irq_data *d, unsigned int type);
+int gicv5_irs_iste_alloc(u32 lpi);
+void gicv5_irs_syncr(void);
+
+struct gicv5_its_devtab_cfg {
+ union {
+ struct {
+ __le64 *devtab;
+ } linear;
+ struct {
+ __le64 *l1devtab;
+ __le64 **l2ptrs;
+ } l2;
+ };
+ u32 cfgr;
+};
+
+struct gicv5_its_itt_cfg {
+ union {
+ struct {
+ __le64 *itt;
+ unsigned int num_ents;
+ } linear;
+ struct {
+ __le64 *l1itt;
+ __le64 **l2ptrs;
+ unsigned int num_l1_ents;
+ u8 l2sz;
+ } l2;
+ };
+ u8 event_id_bits;
+ bool l2itt;
+};
+
+void gicv5_init_lpis(u32 max);
+void gicv5_deinit_lpis(void);
+
+int gicv5_alloc_lpi(void);
+void gicv5_free_lpi(u32 lpi);
+
+void __init gicv5_its_of_probe(struct device_node *parent);
+#endif
diff --git a/include/linux/irqchip/arm-vgic-info.h b/include/linux/irqchip/arm-vgic-info.h
index a75b2c7de69d..ca1713fac6e3 100644
--- a/include/linux/irqchip/arm-vgic-info.h
+++ b/include/linux/irqchip/arm-vgic-info.h
@@ -15,6 +15,8 @@ enum gic_type {
GIC_V2,
/* Full GICv3, optionally with v2 compat */
GIC_V3,
+ /* Full GICv5, optionally with v3 compat */
+ GIC_V5,
};
struct gic_kvm_info {
@@ -34,6 +36,8 @@ struct gic_kvm_info {
bool has_v4_1;
/* Deactivation impared, subpar stuff */
bool no_hw_deactivation;
+ /* v3 compat support (GICv5 hosts, only) */
+ bool has_gcie_v3_compat;
};
#ifdef CONFIG_KVM
diff --git a/include/linux/irqdomain.h b/include/linux/irqdomain.h
index 266b5e5bb8ce..4a86e6b915dd 100644
--- a/include/linux/irqdomain.h
+++ b/include/linux/irqdomain.h
@@ -212,6 +212,9 @@ enum {
/* Address and data pair is mutable when irq_set_affinity() */
IRQ_DOMAIN_FLAG_MSI_IMMUTABLE = (1 << 11),
+ /* IRQ domain requires parent fwnode matching */
+ IRQ_DOMAIN_FLAG_FWNODE_PARENT = (1 << 12),
+
/*
* Flags starting from IRQ_DOMAIN_FLAG_NONCORE are reserved
* for implementation specific purposes and ignored by the
diff --git a/include/linux/kvm_dirty_ring.h b/include/linux/kvm_dirty_ring.h
index da4d9b5f58f1..eb10d87adf7d 100644
--- a/include/linux/kvm_dirty_ring.h
+++ b/include/linux/kvm_dirty_ring.h
@@ -49,9 +49,10 @@ static inline int kvm_dirty_ring_alloc(struct kvm *kvm, struct kvm_dirty_ring *r
}
static inline int kvm_dirty_ring_reset(struct kvm *kvm,
- struct kvm_dirty_ring *ring)
+ struct kvm_dirty_ring *ring,
+ int *nr_entries_reset)
{
- return 0;
+ return -ENOENT;
}
static inline void kvm_dirty_ring_push(struct kvm_vcpu *vcpu,
@@ -77,17 +78,8 @@ bool kvm_arch_allow_write_without_running_vcpu(struct kvm *kvm);
u32 kvm_dirty_ring_get_rsvd_entries(struct kvm *kvm);
int kvm_dirty_ring_alloc(struct kvm *kvm, struct kvm_dirty_ring *ring,
int index, u32 size);
-
-/*
- * called with kvm->slots_lock held, returns the number of
- * processed pages.
- */
-int kvm_dirty_ring_reset(struct kvm *kvm, struct kvm_dirty_ring *ring);
-
-/*
- * returns =0: successfully pushed
- * <0: unable to push, need to wait
- */
+int kvm_dirty_ring_reset(struct kvm *kvm, struct kvm_dirty_ring *ring,
+ int *nr_entries_reset);
void kvm_dirty_ring_push(struct kvm_vcpu *vcpu, u32 slot, u64 offset);
bool kvm_dirty_ring_check_request(struct kvm_vcpu *vcpu);
diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h
index 3bde4fb5c6aa..15656b7fba6c 100644
--- a/include/linux/kvm_host.h
+++ b/include/linux/kvm_host.h
@@ -190,6 +190,7 @@ bool kvm_make_all_cpus_request(struct kvm *kvm, unsigned int req);
#define KVM_USERSPACE_IRQ_SOURCE_ID 0
#define KVM_IRQFD_RESAMPLE_IRQ_SOURCE_ID 1
+#define KVM_PIT_IRQ_SOURCE_ID 2
extern struct mutex kvm_lock;
extern struct list_head vm_list;
@@ -1022,16 +1023,12 @@ void kvm_unlock_all_vcpus(struct kvm *kvm);
void vcpu_load(struct kvm_vcpu *vcpu);
void vcpu_put(struct kvm_vcpu *vcpu);
-#ifdef __KVM_HAVE_IOAPIC
+#ifdef CONFIG_KVM_IOAPIC
void kvm_arch_post_irq_ack_notifier_list_update(struct kvm *kvm);
-void kvm_arch_post_irq_routing_update(struct kvm *kvm);
#else
static inline void kvm_arch_post_irq_ack_notifier_list_update(struct kvm *kvm)
{
}
-static inline void kvm_arch_post_irq_routing_update(struct kvm *kvm)
-{
-}
#endif
#ifdef CONFIG_HAVE_KVM_IRQCHIP
@@ -1693,24 +1690,6 @@ static inline bool kvm_arch_has_noncoherent_dma(struct kvm *kvm)
return false;
}
#endif
-#ifdef __KVM_HAVE_ARCH_ASSIGNED_DEVICE
-void kvm_arch_start_assignment(struct kvm *kvm);
-void kvm_arch_end_assignment(struct kvm *kvm);
-bool kvm_arch_has_assigned_device(struct kvm *kvm);
-#else
-static inline void kvm_arch_start_assignment(struct kvm *kvm)
-{
-}
-
-static inline void kvm_arch_end_assignment(struct kvm *kvm)
-{
-}
-
-static __always_inline bool kvm_arch_has_assigned_device(struct kvm *kvm)
-{
- return false;
-}
-#endif
static inline struct rcuwait *kvm_arch_vcpu_get_wait(struct kvm_vcpu *vcpu)
{
@@ -1788,8 +1767,6 @@ void kvm_register_irq_ack_notifier(struct kvm *kvm,
struct kvm_irq_ack_notifier *kian);
void kvm_unregister_irq_ack_notifier(struct kvm *kvm,
struct kvm_irq_ack_notifier *kian);
-int kvm_request_irq_source_id(struct kvm *kvm);
-void kvm_free_irq_source_id(struct kvm *kvm, int irq_source_id);
bool kvm_arch_irqfd_allowed(struct kvm *kvm, struct kvm_irqfd *args);
/*
@@ -2406,6 +2383,8 @@ struct kvm_vcpu *kvm_get_running_vcpu(void);
struct kvm_vcpu * __percpu *kvm_get_running_vcpus(void);
#if IS_ENABLED(CONFIG_HAVE_KVM_IRQ_BYPASS)
+struct kvm_kernel_irqfd;
+
bool kvm_arch_has_irq_bypass(void);
int kvm_arch_irq_bypass_add_producer(struct irq_bypass_consumer *,
struct irq_bypass_producer *);
@@ -2413,10 +2392,9 @@ void kvm_arch_irq_bypass_del_producer(struct irq_bypass_consumer *,
struct irq_bypass_producer *);
void kvm_arch_irq_bypass_stop(struct irq_bypass_consumer *);
void kvm_arch_irq_bypass_start(struct irq_bypass_consumer *);
-int kvm_arch_update_irqfd_routing(struct kvm *kvm, unsigned int host_irq,
- uint32_t guest_irq, bool set);
-bool kvm_arch_irqfd_route_changed(struct kvm_kernel_irq_routing_entry *,
- struct kvm_kernel_irq_routing_entry *);
+void kvm_arch_update_irqfd_routing(struct kvm_kernel_irqfd *irqfd,
+ struct kvm_kernel_irq_routing_entry *old,
+ struct kvm_kernel_irq_routing_entry *new);
#endif /* CONFIG_HAVE_KVM_IRQ_BYPASS */
#ifdef CONFIG_HAVE_KVM_INVALID_WAKEUPS
diff --git a/include/linux/kvm_irqfd.h b/include/linux/kvm_irqfd.h
index 8ad43692e3bb..ef8c134ded8a 100644
--- a/include/linux/kvm_irqfd.h
+++ b/include/linux/kvm_irqfd.h
@@ -55,10 +55,13 @@ struct kvm_kernel_irqfd {
/* Used for setup/shutdown */
struct eventfd_ctx *eventfd;
struct list_head list;
- poll_table pt;
struct work_struct shutdown;
struct irq_bypass_consumer consumer;
struct irq_bypass_producer *producer;
+
+ struct kvm_vcpu *irq_bypass_vcpu;
+ struct list_head vcpu_list;
+ void *irq_bypass_data;
};
#endif /* __LINUX_KVM_IRQFD_H */
diff --git a/include/linux/msi.h b/include/linux/msi.h
index 71e5cc830393..e5e86a8529fb 100644
--- a/include/linux/msi.h
+++ b/include/linux/msi.h
@@ -707,6 +707,7 @@ struct irq_domain *pci_msi_create_irq_domain(struct fwnode_handle *fwnode,
struct msi_domain_info *info,
struct irq_domain *parent);
u32 pci_msi_domain_get_msi_rid(struct irq_domain *domain, struct pci_dev *pdev);
+u32 pci_msi_map_rid_ctlr_node(struct pci_dev *pdev, struct device_node **node);
struct irq_domain *pci_msi_get_device_domain(struct pci_dev *pdev);
void pci_msix_prepare_desc(struct irq_domain *domain, msi_alloc_info_t *arg,
struct msi_desc *desc);
diff --git a/include/linux/of_irq.h b/include/linux/of_irq.h
index 6337ad4e5fe8..a480063c9cb1 100644
--- a/include/linux/of_irq.h
+++ b/include/linux/of_irq.h
@@ -54,6 +54,7 @@ extern struct irq_domain *of_msi_map_get_device_domain(struct device *dev,
u32 id,
u32 bus_token);
extern void of_msi_configure(struct device *dev, const struct device_node *np);
+extern u32 of_msi_xlate(struct device *dev, struct device_node **msi_np, u32 id_in);
u32 of_msi_map_id(struct device *dev, struct device_node *msi_np, u32 id_in);
#else
static inline void of_irq_init(const struct of_device_id *matches)
@@ -100,6 +101,10 @@ static inline struct irq_domain *of_msi_map_get_device_domain(struct device *dev
static inline void of_msi_configure(struct device *dev, struct device_node *np)
{
}
+static inline u32 of_msi_xlate(struct device *dev, struct device_node **msi_np, u32 id_in)
+{
+ return id_in;
+}
static inline u32 of_msi_map_id(struct device *dev,
struct device_node *msi_np, u32 id_in)
{
diff --git a/include/linux/wait.h b/include/linux/wait.h
index 965a19809c7e..09855d819418 100644
--- a/include/linux/wait.h
+++ b/include/linux/wait.h
@@ -164,6 +164,8 @@ static inline bool wq_has_sleeper(struct wait_queue_head *wq_head)
extern void add_wait_queue(struct wait_queue_head *wq_head, struct wait_queue_entry *wq_entry);
extern void add_wait_queue_exclusive(struct wait_queue_head *wq_head, struct wait_queue_entry *wq_entry);
extern void add_wait_queue_priority(struct wait_queue_head *wq_head, struct wait_queue_entry *wq_entry);
+extern int add_wait_queue_priority_exclusive(struct wait_queue_head *wq_head,
+ struct wait_queue_entry *wq_entry);
extern void remove_wait_queue(struct wait_queue_head *wq_head, struct wait_queue_entry *wq_entry);
static inline void __add_wait_queue(struct wait_queue_head *wq_head, struct wait_queue_entry *wq_entry)