From 54d5d42404e7705cf3804593189e963350d470e5 Mon Sep 17 00:00:00 2001 From: Ashok Raj Date: Tue, 6 Sep 2005 15:16:15 -0700 Subject: [PATCH] x86/x86_64: deferred handling of writes to /proc/irqxx/smp_affinity When handling writes to /proc/irq, current code is re-programming rte entries directly. This is not recommended and could potentially cause chipset's to lockup, or cause missing interrupts. CONFIG_IRQ_BALANCE does this correctly, where it re-programs only when the interrupt is pending. The same needs to be done for /proc/irq handling as well. Otherwise user space irq balancers are really not doing the right thing. - Changed pending_irq_balance_cpumask to pending_irq_migrate_cpumask for lack of a generic name. - added move_irq out of IRQ_BALANCE, and added this same to X86_64 - Added new proc handler for write, so we can do deferred write at irq handling time. - Display of /proc/irq/XX/smp_affinity used to display CPU_MASKALL, instead it now shows only active cpu masks, or exactly what was set. - Provided a common move_irq implementation, instead of duplicating when using generic irq framework. Tested on i386/x86_64 and ia64 with CONFIG_PCI_MSI turned on and off. Tested UP builds as well. MSI testing: tbd: I have cards, need to look for a x-over cable, although I did test an earlier version of this patch. Will test in a couple days. Signed-off-by: Ashok Raj Acked-by: Zwane Mwaikambo Grudgingly-acked-by: Andi Kleen Signed-off-by: Coywolf Qi Hunt Signed-off-by: Ashok Raj Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/irq.h | 123 ++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 123 insertions(+) (limited to 'include/linux/irq.h') diff --git a/include/linux/irq.h b/include/linux/irq.h index 069d3b84d311..4a362b9ec966 100644 --- a/include/linux/irq.h +++ b/include/linux/irq.h @@ -71,16 +71,139 @@ typedef struct irq_desc { unsigned int irq_count; /* For detecting broken interrupts */ unsigned int irqs_unhandled; spinlock_t lock; +#if defined (CONFIG_GENERIC_PENDING_IRQ) || defined (CONFIG_IRQBALANCE) + unsigned int move_irq; /* Flag need to re-target intr dest*/ +#endif } ____cacheline_aligned irq_desc_t; extern irq_desc_t irq_desc [NR_IRQS]; +/* Return a pointer to the irq descriptor for IRQ. */ +static inline irq_desc_t * +irq_descp (int irq) +{ + return irq_desc + irq; +} + #include /* the arch dependent stuff */ extern int setup_irq(unsigned int irq, struct irqaction * new); #ifdef CONFIG_GENERIC_HARDIRQS extern cpumask_t irq_affinity[NR_IRQS]; + +#ifdef CONFIG_SMP +static inline void set_native_irq_info(int irq, cpumask_t mask) +{ + irq_affinity[irq] = mask; +} +#else +static inline void set_native_irq_info(int irq, cpumask_t mask) +{ +} +#endif + +#ifdef CONFIG_SMP + +#if defined (CONFIG_GENERIC_PENDING_IRQ) || defined (CONFIG_IRQBALANCE) +extern cpumask_t pending_irq_cpumask[NR_IRQS]; + +static inline void set_pending_irq(unsigned int irq, cpumask_t mask) +{ + irq_desc_t *desc = irq_desc + irq; + unsigned long flags; + + spin_lock_irqsave(&desc->lock, flags); + desc->move_irq = 1; + pending_irq_cpumask[irq] = mask; + spin_unlock_irqrestore(&desc->lock, flags); +} + +static inline void +move_native_irq(int irq) +{ + cpumask_t tmp; + irq_desc_t *desc = irq_descp(irq); + + if (likely (!desc->move_irq)) + return; + + desc->move_irq = 0; + + if (likely(cpus_empty(pending_irq_cpumask[irq]))) + return; + + if (!desc->handler->set_affinity) + return; + + /* note - we hold the desc->lock */ + cpus_and(tmp, pending_irq_cpumask[irq], cpu_online_map); + + /* + * If there was a valid mask to work with, please + * do the disable, re-program, enable sequence. + * This is *not* particularly important for level triggered + * but in a edge trigger case, we might be setting rte + * when an active trigger is comming in. This could + * cause some ioapics to mal-function. + * Being paranoid i guess! + */ + if (unlikely(!cpus_empty(tmp))) { + desc->handler->disable(irq); + desc->handler->set_affinity(irq,tmp); + desc->handler->enable(irq); + } + cpus_clear(pending_irq_cpumask[irq]); +} + +#ifdef CONFIG_PCI_MSI +/* + * Wonder why these are dummies? + * For e.g the set_ioapic_affinity_vector() calls the set_ioapic_affinity_irq() + * counter part after translating the vector to irq info. We need to perform + * this operation on the real irq, when we dont use vector, i.e when + * pci_use_vector() is false. + */ +static inline void move_irq(int irq) +{ +} + +static inline void set_irq_info(int irq, cpumask_t mask) +{ +} + +#else // CONFIG_PCI_MSI + +static inline void move_irq(int irq) +{ + move_native_irq(irq); +} + +static inline void set_irq_info(int irq, cpumask_t mask) +{ + set_native_irq_info(irq, mask); +} +#endif // CONFIG_PCI_MSI + +#else // CONFIG_GENERIC_PENDING_IRQ || CONFIG_IRQBALANCE + +#define move_irq(x) +#define move_native_irq(x) +#define set_pending_irq(x,y) +static inline void set_irq_info(int irq, cpumask_t mask) +{ + set_native_irq_info(irq, mask); +} + +#endif // CONFIG_GENERIC_PENDING_IRQ + +#else // CONFIG_SMP + +#define move_irq(x) +#define move_native_irq(x) + +#endif // CONFIG_SMP + extern int no_irq_affinity; extern int noirqdebug_setup(char *str); -- cgit v1.2.3