diff options
| author | Thomas Gleixner <tglx@linutronix.de> | 2025-07-24 13:49:30 +0300 | 
|---|---|---|
| committer | Thomas Gleixner <tglx@linutronix.de> | 2025-08-05 00:34:03 +0300 | 
| commit | ce0b5eedcb753697d43f61dd2e27d68eb5d3150f (patch) | |
| tree | 1d58db9f21b3d09ed257fc486ffdb636ac7a6395 /arch/x86/kernel | |
| parent | 49f848788a4d157bb6648a57963cb060fed3d56e (diff) | |
| download | linux-ce0b5eedcb753697d43f61dd2e27d68eb5d3150f.tar.xz | |
x86/irq: Plug vector setup race
Hogan reported a vector setup race, which overwrites the interrupt
descriptor in the per CPU vector array resulting in a disfunctional device.
CPU0				CPU1
				interrupt is raised in APIC IRR
				but not handled
  free_irq()
    per_cpu(vector_irq, CPU1)[vector] = VECTOR_SHUTDOWN;
  request_irq()			common_interrupt()
  				  d = this_cpu_read(vector_irq[vector]);
    per_cpu(vector_irq, CPU1)[vector] = desc;
    				  if (d == VECTOR_SHUTDOWN)
				    this_cpu_write(vector_irq[vector], VECTOR_UNUSED);
free_irq() cannot observe the pending vector in the CPU1 APIC as there is
no way to query the remote CPUs APIC IRR.
This requires that request_irq() uses the same vector/CPU as the one which
was freed, but this also can be triggered by a spurious interrupt.
Interestingly enough this problem managed to be hidden for more than a
decade.
Prevent this by reevaluating vector_irq under the vector lock, which is
held by the interrupt activation code when vector_irq is updated.
To avoid ifdeffery or IS_ENABLED() nonsense, move the
[un]lock_vector_lock() declarations out under the
CONFIG_IRQ_DOMAIN_HIERARCHY guard as it's only provided when
CONFIG_X86_LOCAL_APIC=y.
The current CONFIG_IRQ_DOMAIN_HIERARCHY guard is selected by
CONFIG_X86_LOCAL_APIC, but can also be selected by other parts of the
Kconfig system, which makes 32-bit UP builds with CONFIG_X86_LOCAL_APIC=n
fail.
Can we just get rid of this !APIC nonsense once and forever?
Fixes: 9345005f4eed ("x86/irq: Fix do_IRQ() interrupt warning for cpu hotplug retriggered irqs")
Reported-by: Hogan Wang <hogan.wang@huawei.com>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Tested-by: Hogan Wang <hogan.wang@huawei.com>
Link: https://lore.kernel.org/all/draft-87ikjhrhhh.ffs@tglx
Diffstat (limited to 'arch/x86/kernel')
| -rw-r--r-- | arch/x86/kernel/irq.c | 63 | 
1 files changed, 48 insertions, 15 deletions
diff --git a/arch/x86/kernel/irq.c b/arch/x86/kernel/irq.c index 9ed29ff10e59..10721a125226 100644 --- a/arch/x86/kernel/irq.c +++ b/arch/x86/kernel/irq.c @@ -256,26 +256,59 @@ static __always_inline void handle_irq(struct irq_desc *desc,  		__handle_irq(desc, regs);  } -static __always_inline int call_irq_handler(int vector, struct pt_regs *regs) +static struct irq_desc *reevaluate_vector(int vector)  { -	struct irq_desc *desc; -	int ret = 0; +	struct irq_desc *desc = __this_cpu_read(vector_irq[vector]); + +	if (!IS_ERR_OR_NULL(desc)) +		return desc; + +	if (desc == VECTOR_UNUSED) +		pr_emerg_ratelimited("No irq handler for %d.%u\n", smp_processor_id(), vector); +	else +		__this_cpu_write(vector_irq[vector], VECTOR_UNUSED); +	return NULL; +} + +static __always_inline bool call_irq_handler(int vector, struct pt_regs *regs) +{ +	struct irq_desc *desc = __this_cpu_read(vector_irq[vector]); -	desc = __this_cpu_read(vector_irq[vector]);  	if (likely(!IS_ERR_OR_NULL(desc))) {  		handle_irq(desc, regs); -	} else { -		ret = -EINVAL; -		if (desc == VECTOR_UNUSED) { -			pr_emerg_ratelimited("%s: %d.%u No irq handler for vector\n", -					     __func__, smp_processor_id(), -					     vector); -		} else { -			__this_cpu_write(vector_irq[vector], VECTOR_UNUSED); -		} +		return true;  	} -	return ret; +	/* +	 * Reevaluate with vector_lock held to prevent a race against +	 * request_irq() setting up the vector: +	 * +	 * CPU0				CPU1 +	 *				interrupt is raised in APIC IRR +	 *				but not handled +	 * free_irq() +	 *   per_cpu(vector_irq, CPU1)[vector] = VECTOR_SHUTDOWN; +	 * +	 * request_irq()		common_interrupt() +	 *				  d = this_cpu_read(vector_irq[vector]); +	 * +	 * per_cpu(vector_irq, CPU1)[vector] = desc; +	 * +	 *				  if (d == VECTOR_SHUTDOWN) +	 *				    this_cpu_write(vector_irq[vector], VECTOR_UNUSED); +	 * +	 * This requires that the same vector on the same target CPU is +	 * handed out or that a spurious interrupt hits that CPU/vector. +	 */ +	lock_vector_lock(); +	desc = reevaluate_vector(vector); +	unlock_vector_lock(); + +	if (!desc) +		return false; + +	handle_irq(desc, regs); +	return true;  }  /* @@ -289,7 +322,7 @@ DEFINE_IDTENTRY_IRQ(common_interrupt)  	/* entry code tells RCU that we're not quiescent.  Check it. */  	RCU_LOCKDEP_WARN(!rcu_is_watching(), "IRQ failed to wake up RCU"); -	if (unlikely(call_irq_handler(vector, regs))) +	if (unlikely(!call_irq_handler(vector, regs)))  		apic_eoi();  	set_irq_regs(old_regs);  | 
