From c3a877fea962d9d0fb1e3747334699978f566930 Mon Sep 17 00:00:00 2001 From: Andy Shevchenko Date: Fri, 30 Oct 2020 18:59:17 +0200 Subject: irqdomain: Replace open coded of_node_to_fwnode() of_node_to_fwnode() should be used for conversion. Replace the open coded variant of it in of_phandle_args_to_fwspec(). Signed-off-by: Andy Shevchenko Signed-off-by: Thomas Gleixner Reviewed-by: Rafael J. Wysocki Link: https://lore.kernel.org/r/20201030165919.86234-4-andriy.shevchenko@linux.intel.com --- kernel/irq/irqdomain.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'kernel') diff --git a/kernel/irq/irqdomain.c b/kernel/irq/irqdomain.c index cf8b374b892d..831526f2e728 100644 --- a/kernel/irq/irqdomain.c +++ b/kernel/irq/irqdomain.c @@ -737,7 +737,7 @@ static void of_phandle_args_to_fwspec(struct device_node *np, const u32 *args, { int i; - fwspec->fwnode = np ? &np->fwnode : NULL; + fwspec->fwnode = of_node_to_fwnode(np); fwspec->param_count = count; for (i = 0; i < count; i++) -- cgit v1.2.3 From b6e95788fde8c9bc9da729102085dd36a5a0cda6 Mon Sep 17 00:00:00 2001 From: Andy Shevchenko Date: Fri, 30 Oct 2020 18:59:18 +0200 Subject: irqdomain: Introduce irq_domain_create_legacy() API Introduce irq_domain_create_legacy() API which is functional equivalent to the existing irq_domain_add_legacy(), but takes a pointer to the struct fwnode_handle as a parameter. This is useful for non OF systems. Signed-off-by: Andy Shevchenko Signed-off-by: Thomas Gleixner Reviewed-by: Rafael J. Wysocki Link: https://lore.kernel.org/r/20201030165919.86234-5-andriy.shevchenko@linux.intel.com --- Documentation/core-api/irq/irq-domain.rst | 6 ++++++ include/linux/irqdomain.h | 6 ++++++ kernel/irq/irqdomain.c | 17 ++++++++++++++--- 3 files changed, 26 insertions(+), 3 deletions(-) (limited to 'kernel') diff --git a/Documentation/core-api/irq/irq-domain.rst b/Documentation/core-api/irq/irq-domain.rst index 096db12f32d5..a77c24c27f7b 100644 --- a/Documentation/core-api/irq/irq-domain.rst +++ b/Documentation/core-api/irq/irq-domain.rst @@ -147,6 +147,7 @@ Legacy irq_domain_add_simple() irq_domain_add_legacy() irq_domain_add_legacy_isa() + irq_domain_create_legacy() The Legacy mapping is a special case for drivers that already have a range of irq_descs allocated for the hwirqs. It is used when the @@ -185,6 +186,11 @@ that the driver using the simple domain call irq_create_mapping() before any irq_find_mapping() since the latter will actually work for the static IRQ assignment case. +irq_domain_add_legacy() and irq_domain_create_legacy() are functionally +equivalent, except for the first argument is different - the former +accepts an Open Firmware specific 'struct device_node', while the latter +accepts a more general abstraction 'struct fwnode_handle'. + Hierarchy IRQ domain -------------------- diff --git a/include/linux/irqdomain.h b/include/linux/irqdomain.h index d21f75d294d7..77bf7d84c673 100644 --- a/include/linux/irqdomain.h +++ b/include/linux/irqdomain.h @@ -271,6 +271,12 @@ struct irq_domain *irq_domain_add_legacy(struct device_node *of_node, irq_hw_number_t first_hwirq, const struct irq_domain_ops *ops, void *host_data); +struct irq_domain *irq_domain_create_legacy(struct fwnode_handle *fwnode, + unsigned int size, + unsigned int first_irq, + irq_hw_number_t first_hwirq, + const struct irq_domain_ops *ops, + void *host_data); extern struct irq_domain *irq_find_matching_fwspec(struct irq_fwspec *fwspec, enum irq_domain_bus_token bus_token); extern bool irq_domain_check_msi_remap(void); diff --git a/kernel/irq/irqdomain.c b/kernel/irq/irqdomain.c index 831526f2e728..9c9cb8829f7a 100644 --- a/kernel/irq/irqdomain.c +++ b/kernel/irq/irqdomain.c @@ -350,17 +350,28 @@ struct irq_domain *irq_domain_add_legacy(struct device_node *of_node, irq_hw_number_t first_hwirq, const struct irq_domain_ops *ops, void *host_data) +{ + return irq_domain_create_legacy(of_node_to_fwnode(of_node), size, + first_irq, first_hwirq, ops, host_data); +} +EXPORT_SYMBOL_GPL(irq_domain_add_legacy); + +struct irq_domain *irq_domain_create_legacy(struct fwnode_handle *fwnode, + unsigned int size, + unsigned int first_irq, + irq_hw_number_t first_hwirq, + const struct irq_domain_ops *ops, + void *host_data) { struct irq_domain *domain; - domain = __irq_domain_add(of_node_to_fwnode(of_node), first_hwirq + size, - first_hwirq + size, 0, ops, host_data); + domain = __irq_domain_add(fwnode, first_hwirq + size, first_hwirq + size, 0, ops, host_data); if (domain) irq_domain_associate_many(domain, first_irq, first_hwirq, size); return domain; } -EXPORT_SYMBOL_GPL(irq_domain_add_legacy); +EXPORT_SYMBOL_GPL(irq_domain_create_legacy); /** * irq_find_matching_fwspec() - Locates a domain for a given fwspec -- cgit v1.2.3 From f296dcd629aa412a80a53215e46087f53af87f08 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Sat, 14 Nov 2020 22:01:45 +0100 Subject: genirq: Remove GENERIC_IRQ_LEGACY_ALLOC_HWIRQ Commit bb9d812643d8 ("arch: remove tile port") removed the last user of this cruft two years ago... Signed-off-by: Thomas Gleixner Link: https://lore.kernel.org/r/87eekvac06.fsf@nanos.tec.linutronix.de --- include/linux/irq.h | 15 --------------- kernel/irq/Kconfig | 5 ----- kernel/irq/irqdesc.c | 51 --------------------------------------------------- 3 files changed, 71 deletions(-) (limited to 'kernel') diff --git a/include/linux/irq.h b/include/linux/irq.h index c54365309e97..79ce314a603b 100644 --- a/include/linux/irq.h +++ b/include/linux/irq.h @@ -954,21 +954,6 @@ static inline void irq_free_desc(unsigned int irq) irq_free_descs(irq, 1); } -#ifdef CONFIG_GENERIC_IRQ_LEGACY_ALLOC_HWIRQ -unsigned int irq_alloc_hwirqs(int cnt, int node); -static inline unsigned int irq_alloc_hwirq(int node) -{ - return irq_alloc_hwirqs(1, node); -} -void irq_free_hwirqs(unsigned int from, int cnt); -static inline void irq_free_hwirq(unsigned int irq) -{ - return irq_free_hwirqs(irq, 1); -} -int arch_setup_hwirq(unsigned int irq, int node); -void arch_teardown_hwirq(unsigned int irq); -#endif - #ifdef CONFIG_GENERIC_IRQ_LEGACY void irq_init_desc(unsigned int irq); #endif diff --git a/kernel/irq/Kconfig b/kernel/irq/Kconfig index 10a5aff4eecc..f2cda6b0057f 100644 --- a/kernel/irq/Kconfig +++ b/kernel/irq/Kconfig @@ -26,11 +26,6 @@ config GENERIC_IRQ_SHOW_LEVEL config GENERIC_IRQ_EFFECTIVE_AFF_MASK bool -# Facility to allocate a hardware interrupt. This is legacy support -# and should not be used in new code. Use irq domains instead. -config GENERIC_IRQ_LEGACY_ALLOC_HWIRQ - bool - # Support for delayed migration from interrupt context config GENERIC_PENDING_IRQ bool diff --git a/kernel/irq/irqdesc.c b/kernel/irq/irqdesc.c index 1a7723604399..e810eb9906ea 100644 --- a/kernel/irq/irqdesc.c +++ b/kernel/irq/irqdesc.c @@ -810,57 +810,6 @@ unlock: } EXPORT_SYMBOL_GPL(__irq_alloc_descs); -#ifdef CONFIG_GENERIC_IRQ_LEGACY_ALLOC_HWIRQ -/** - * irq_alloc_hwirqs - Allocate an irq descriptor and initialize the hardware - * @cnt: number of interrupts to allocate - * @node: node on which to allocate - * - * Returns an interrupt number > 0 or 0, if the allocation fails. - */ -unsigned int irq_alloc_hwirqs(int cnt, int node) -{ - int i, irq = __irq_alloc_descs(-1, 0, cnt, node, NULL, NULL); - - if (irq < 0) - return 0; - - for (i = irq; cnt > 0; i++, cnt--) { - if (arch_setup_hwirq(i, node)) - goto err; - irq_clear_status_flags(i, _IRQ_NOREQUEST); - } - return irq; - -err: - for (i--; i >= irq; i--) { - irq_set_status_flags(i, _IRQ_NOREQUEST | _IRQ_NOPROBE); - arch_teardown_hwirq(i); - } - irq_free_descs(irq, cnt); - return 0; -} -EXPORT_SYMBOL_GPL(irq_alloc_hwirqs); - -/** - * irq_free_hwirqs - Free irq descriptor and cleanup the hardware - * @from: Free from irq number - * @cnt: number of interrupts to free - * - */ -void irq_free_hwirqs(unsigned int from, int cnt) -{ - int i, j; - - for (i = from, j = cnt; j > 0; i++, j--) { - irq_set_status_flags(i, _IRQ_NOREQUEST | _IRQ_NOPROBE); - arch_teardown_hwirq(i); - } - irq_free_descs(from, cnt); -} -EXPORT_SYMBOL_GPL(irq_free_hwirqs); -#endif - /** * irq_get_next_irq - get next allocated irq number * @offset: where to start the search -- cgit v1.2.3 From e906a546bd8653ed2e7a14cb300fd17952d7f862 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Sat, 14 Nov 2020 23:36:28 +0100 Subject: genirq/irqdomain: Make irq_domain_disassociate() static No users outside of the core code. Signed-off-by: Thomas Gleixner Link: https://lore.kernel.org/r/87a6vja7mb.fsf@nanos.tec.linutronix.de --- include/linux/irqdomain.h | 2 -- kernel/irq/irqdomain.c | 2 +- 2 files changed, 1 insertion(+), 3 deletions(-) (limited to 'kernel') diff --git a/include/linux/irqdomain.h b/include/linux/irqdomain.h index 77bf7d84c673..5701a8b01726 100644 --- a/include/linux/irqdomain.h +++ b/include/linux/irqdomain.h @@ -387,8 +387,6 @@ extern int irq_domain_associate(struct irq_domain *domain, unsigned int irq, extern void irq_domain_associate_many(struct irq_domain *domain, unsigned int irq_base, irq_hw_number_t hwirq_base, int count); -extern void irq_domain_disassociate(struct irq_domain *domain, - unsigned int irq); extern unsigned int irq_create_mapping(struct irq_domain *host, irq_hw_number_t hwirq); diff --git a/kernel/irq/irqdomain.c b/kernel/irq/irqdomain.c index 9c9cb8829f7a..3d7463fd6453 100644 --- a/kernel/irq/irqdomain.c +++ b/kernel/irq/irqdomain.c @@ -496,7 +496,7 @@ static void irq_domain_set_mapping(struct irq_domain *domain, } } -void irq_domain_disassociate(struct irq_domain *domain, unsigned int irq) +static void irq_domain_disassociate(struct irq_domain *domain, unsigned int irq) { struct irq_data *irq_data = irq_get_irq_data(irq); irq_hw_number_t hwirq; -- cgit v1.2.3 From 8c67d247dcad67fbdd07c8bab9818d0b8d9240bf Mon Sep 17 00:00:00 2001 From: Mauro Carvalho Chehab Date: Mon, 16 Nov 2020 11:18:15 +0100 Subject: genirq: Fix kernel-doc markups Some identifiers have different names between their prototypes and the kernel-doc markup. Signed-off-by: Mauro Carvalho Chehab Signed-off-by: Thomas Gleixner Link: https://lore.kernel.org/r/13a44f4f0c3135e14b16ae8fcce4af1eab27cb5f.1605521731.git.mchehab+huawei@kernel.org --- kernel/irq/chip.c | 2 +- kernel/irq/generic-chip.c | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) (limited to 'kernel') diff --git a/kernel/irq/chip.c b/kernel/irq/chip.c index b9b9618e1aca..df75c3573dcb 100644 --- a/kernel/irq/chip.c +++ b/kernel/irq/chip.c @@ -61,7 +61,7 @@ int irq_set_chip(unsigned int irq, struct irq_chip *chip) EXPORT_SYMBOL(irq_set_chip); /** - * irq_set_type - set the irq trigger type for an irq + * irq_set_irq_type - set the irq trigger type for an irq * @irq: irq number * @type: IRQ_TYPE_{LEVEL,EDGE}_* value - see include/linux/irq.h */ diff --git a/kernel/irq/generic-chip.c b/kernel/irq/generic-chip.c index e2999a070a99..a23ac2bbf433 100644 --- a/kernel/irq/generic-chip.c +++ b/kernel/irq/generic-chip.c @@ -269,7 +269,7 @@ irq_gc_init_mask_cache(struct irq_chip_generic *gc, enum irq_gc_flags flags) } /** - * __irq_alloc_domain_generic_chip - Allocate generic chips for an irq domain + * __irq_alloc_domain_generic_chips - Allocate generic chips for an irq domain * @d: irq domain for which to allocate chips * @irqs_per_chip: Number of interrupts each chip handles (max 32) * @num_ct: Number of irq_chip_type instances associated with this -- cgit v1.2.3 From ae9ef58996a4447dd44aa638759f913c883ba816 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Fri, 13 Nov 2020 15:02:18 +0100 Subject: softirq: Move related code into one section To prepare for adding a RT aware variant of softirq serialization and processing move related code into one section so the necessary #ifdeffery is reduced to one. Signed-off-by: Thomas Gleixner Reviewed-by: Frederic Weisbecker Link: https://lore.kernel.org/r/20201113141733.974214480@linutronix.de --- kernel/softirq.c | 107 ++++++++++++++++++++++++++++--------------------------- 1 file changed, 54 insertions(+), 53 deletions(-) (limited to 'kernel') diff --git a/kernel/softirq.c b/kernel/softirq.c index 09229ad82209..617009ccd82c 100644 --- a/kernel/softirq.c +++ b/kernel/softirq.c @@ -92,6 +92,13 @@ static bool ksoftirqd_running(unsigned long pending) !__kthread_should_park(tsk); } +#ifdef CONFIG_TRACE_IRQFLAGS +DEFINE_PER_CPU(int, hardirqs_enabled); +DEFINE_PER_CPU(int, hardirq_context); +EXPORT_PER_CPU_SYMBOL_GPL(hardirqs_enabled); +EXPORT_PER_CPU_SYMBOL_GPL(hardirq_context); +#endif + /* * preempt_count and SOFTIRQ_OFFSET usage: * - preempt_count is changed by SOFTIRQ_OFFSET on entering or leaving @@ -102,17 +109,11 @@ static bool ksoftirqd_running(unsigned long pending) * softirq and whether we just have bh disabled. */ +#ifdef CONFIG_TRACE_IRQFLAGS /* - * This one is for softirq.c-internal use, - * where hardirqs are disabled legitimately: + * This is for softirq.c-internal use, where hardirqs are disabled + * legitimately: */ -#ifdef CONFIG_TRACE_IRQFLAGS - -DEFINE_PER_CPU(int, hardirqs_enabled); -DEFINE_PER_CPU(int, hardirq_context); -EXPORT_PER_CPU_SYMBOL_GPL(hardirqs_enabled); -EXPORT_PER_CPU_SYMBOL_GPL(hardirq_context); - void __local_bh_disable_ip(unsigned long ip, unsigned int cnt) { unsigned long flags; @@ -203,6 +204,50 @@ void __local_bh_enable_ip(unsigned long ip, unsigned int cnt) } EXPORT_SYMBOL(__local_bh_enable_ip); +static inline void invoke_softirq(void) +{ + if (ksoftirqd_running(local_softirq_pending())) + return; + + if (!force_irqthreads) { +#ifdef CONFIG_HAVE_IRQ_EXIT_ON_IRQ_STACK + /* + * We can safely execute softirq on the current stack if + * it is the irq stack, because it should be near empty + * at this stage. + */ + __do_softirq(); +#else + /* + * Otherwise, irq_exit() is called on the task stack that can + * be potentially deep already. So call softirq in its own stack + * to prevent from any overrun. + */ + do_softirq_own_stack(); +#endif + } else { + wakeup_softirqd(); + } +} + +asmlinkage __visible void do_softirq(void) +{ + __u32 pending; + unsigned long flags; + + if (in_interrupt()) + return; + + local_irq_save(flags); + + pending = local_softirq_pending(); + + if (pending && !ksoftirqd_running(pending)) + do_softirq_own_stack(); + + local_irq_restore(flags); +} + /* * We restart softirq processing for at most MAX_SOFTIRQ_RESTART times, * but break the loop if need_resched() is set or after 2 ms. @@ -327,24 +372,6 @@ restart: current_restore_flags(old_flags, PF_MEMALLOC); } -asmlinkage __visible void do_softirq(void) -{ - __u32 pending; - unsigned long flags; - - if (in_interrupt()) - return; - - local_irq_save(flags); - - pending = local_softirq_pending(); - - if (pending && !ksoftirqd_running(pending)) - do_softirq_own_stack(); - - local_irq_restore(flags); -} - /** * irq_enter_rcu - Enter an interrupt context with RCU watching */ @@ -371,32 +398,6 @@ void irq_enter(void) irq_enter_rcu(); } -static inline void invoke_softirq(void) -{ - if (ksoftirqd_running(local_softirq_pending())) - return; - - if (!force_irqthreads) { -#ifdef CONFIG_HAVE_IRQ_EXIT_ON_IRQ_STACK - /* - * We can safely execute softirq on the current stack if - * it is the irq stack, because it should be near empty - * at this stage. - */ - __do_softirq(); -#else - /* - * Otherwise, irq_exit() is called on the task stack that can - * be potentially deep already. So call softirq in its own stack - * to prevent from any overrun. - */ - do_softirq_own_stack(); -#endif - } else { - wakeup_softirqd(); - } -} - static inline void tick_irq_exit(void) { #ifdef CONFIG_NO_HZ_COMMON -- cgit v1.2.3 From 4615fbc3788ddc8e7c6d697714ad35a53729aa2c Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Sun, 29 Nov 2020 13:55:51 +0000 Subject: genirq/irqdomain: Don't try to free an interrupt that has no mapping When an interrupt allocation fails for N interrupts, it is pretty common for the error handling code to free the same number of interrupts, no matter how many interrupts have actually been allocated. This may result in the domain freeing code to be unexpectedly called for interrupts that have no mapping in that domain. Things end pretty badly. Instead, add some checks to irq_domain_free_irqs_hierarchy() to make sure that thiss does not follow the hierarchy if no mapping exists for a given interrupt. Fixes: 6a6544e520abe ("genirq/irqdomain: Remove auto-recursive hierarchy support") Signed-off-by: Marc Zyngier Signed-off-by: Thomas Gleixner Link: https://lore.kernel.org/r/20201129135551.396777-1-maz@kernel.org --- kernel/irq/irqdomain.c | 11 +++++++++-- 1 file changed, 9 insertions(+), 2 deletions(-) (limited to 'kernel') diff --git a/kernel/irq/irqdomain.c b/kernel/irq/irqdomain.c index 3d7463fd6453..30a78872a5cf 100644 --- a/kernel/irq/irqdomain.c +++ b/kernel/irq/irqdomain.c @@ -1381,8 +1381,15 @@ static void irq_domain_free_irqs_hierarchy(struct irq_domain *domain, unsigned int irq_base, unsigned int nr_irqs) { - if (domain->ops->free) - domain->ops->free(domain, irq_base, nr_irqs); + unsigned int i; + + if (!domain->ops->free) + return; + + for (i = 0; i < nr_irqs; i++) { + if (irq_domain_get_irq_data(domain, irq_base + i)) + domain->ops->free(domain, irq_base + i, 1); + } } int irq_domain_alloc_irqs_hierarchy(struct irq_domain *domain, -- cgit v1.2.3 From 7197688b2006357da75a014e0a76be89ca9c2d46 Mon Sep 17 00:00:00 2001 From: Frederic Weisbecker Date: Wed, 2 Dec 2020 12:57:28 +0100 Subject: sched/cputime: Remove symbol exports from IRQ time accounting account_irq_enter_time() and account_irq_exit_time() are not called from modules. EXPORT_SYMBOL_GPL() can be safely removed from the IRQ cputime accounting functions called from there. Signed-off-by: Frederic Weisbecker Signed-off-by: Thomas Gleixner Link: https://lore.kernel.org/r/20201202115732.27827-2-frederic@kernel.org --- arch/s390/kernel/vtime.c | 10 +++++----- kernel/sched/cputime.c | 2 -- 2 files changed, 5 insertions(+), 7 deletions(-) (limited to 'kernel') diff --git a/arch/s390/kernel/vtime.c b/arch/s390/kernel/vtime.c index 8df10d3c8f6c..f9f2a11958a5 100644 --- a/arch/s390/kernel/vtime.c +++ b/arch/s390/kernel/vtime.c @@ -226,7 +226,7 @@ void vtime_flush(struct task_struct *tsk) * Update process times based on virtual cpu times stored by entry.S * to the lowcore fields user_timer, system_timer & steal_clock. */ -void vtime_account_irq_enter(struct task_struct *tsk) +void vtime_account_kernel(struct task_struct *tsk) { u64 timer; @@ -245,12 +245,12 @@ void vtime_account_irq_enter(struct task_struct *tsk) virt_timer_forward(timer); } -EXPORT_SYMBOL_GPL(vtime_account_irq_enter); - -void vtime_account_kernel(struct task_struct *tsk) -__attribute__((alias("vtime_account_irq_enter"))); EXPORT_SYMBOL_GPL(vtime_account_kernel); +void vtime_account_irq_enter(struct task_struct *tsk) +__attribute__((alias("vtime_account_kernel"))); + + /* * Sorted add to a list. List is linear searched until first bigger * element is found. diff --git a/kernel/sched/cputime.c b/kernel/sched/cputime.c index 5a55d2300452..61ce9f9bf0a3 100644 --- a/kernel/sched/cputime.c +++ b/kernel/sched/cputime.c @@ -71,7 +71,6 @@ void irqtime_account_irq(struct task_struct *curr) else if (in_serving_softirq() && curr != this_cpu_ksoftirqd()) irqtime_account_delta(irqtime, delta, CPUTIME_SOFTIRQ); } -EXPORT_SYMBOL_GPL(irqtime_account_irq); static u64 irqtime_tick_accounted(u64 maxtime) { @@ -434,7 +433,6 @@ void vtime_account_irq_enter(struct task_struct *tsk) else vtime_account_kernel(tsk); } -EXPORT_SYMBOL_GPL(vtime_account_irq_enter); #endif /* __ARCH_HAS_VTIME_ACCOUNT */ void cputime_adjust(struct task_cputime *curr, struct prev_cputime *prev, -- cgit v1.2.3 From 2b91ec9f551b56751cde48792f1c0a1130358844 Mon Sep 17 00:00:00 2001 From: Frederic Weisbecker Date: Wed, 2 Dec 2020 12:57:29 +0100 Subject: s390/vtime: Use the generic IRQ entry accounting s390 has its own version of IRQ entry accounting because it doesn't account the idle time the same way the other architectures do. Only the actual idle sleep time is accounted as idle time, the rest of the idle task execution is accounted as system time. Make the generic IRQ entry accounting aware of architectures that have their own way of accounting idle time and convert s390 to use it. This prepares s390 to get involved in further consolidations of IRQ time accounting. Signed-off-by: Frederic Weisbecker Signed-off-by: Thomas Gleixner Link: https://lore.kernel.org/r/20201202115732.27827-3-frederic@kernel.org --- arch/Kconfig | 7 ++++++- arch/s390/Kconfig | 1 + arch/s390/include/asm/vtime.h | 1 - arch/s390/kernel/vtime.c | 4 ---- kernel/sched/cputime.c | 13 ++----------- 5 files changed, 9 insertions(+), 17 deletions(-) (limited to 'kernel') diff --git a/arch/Kconfig b/arch/Kconfig index 56b6ccc0e32d..0f151b49c7b7 100644 --- a/arch/Kconfig +++ b/arch/Kconfig @@ -627,6 +627,12 @@ config HAVE_TIF_NOHZ config HAVE_VIRT_CPU_ACCOUNTING bool +config HAVE_VIRT_CPU_ACCOUNTING_IDLE + bool + help + Architecture has its own way to account idle CPU time and therefore + doesn't implement vtime_account_idle(). + config ARCH_HAS_SCALED_CPUTIME bool @@ -641,7 +647,6 @@ config HAVE_VIRT_CPU_ACCOUNTING_GEN some 32-bit arches may require multiple accesses, so proper locking is needed to protect against concurrent accesses. - config HAVE_IRQ_TIME_ACCOUNTING bool help diff --git a/arch/s390/Kconfig b/arch/s390/Kconfig index 4a2a12be04c9..6f1fdcd3b5db 100644 --- a/arch/s390/Kconfig +++ b/arch/s390/Kconfig @@ -181,6 +181,7 @@ config S390 select HAVE_RSEQ select HAVE_SYSCALL_TRACEPOINTS select HAVE_VIRT_CPU_ACCOUNTING + select HAVE_VIRT_CPU_ACCOUNTING_IDLE select IOMMU_HELPER if PCI select IOMMU_SUPPORT if PCI select MODULES_USE_ELF_RELA diff --git a/arch/s390/include/asm/vtime.h b/arch/s390/include/asm/vtime.h index 3622d4ebc73a..fac6a67988eb 100644 --- a/arch/s390/include/asm/vtime.h +++ b/arch/s390/include/asm/vtime.h @@ -2,7 +2,6 @@ #ifndef _S390_VTIME_H #define _S390_VTIME_H -#define __ARCH_HAS_VTIME_ACCOUNT #define __ARCH_HAS_VTIME_TASK_SWITCH #endif /* _S390_VTIME_H */ diff --git a/arch/s390/kernel/vtime.c b/arch/s390/kernel/vtime.c index f9f2a11958a5..ebd8e5655789 100644 --- a/arch/s390/kernel/vtime.c +++ b/arch/s390/kernel/vtime.c @@ -247,10 +247,6 @@ void vtime_account_kernel(struct task_struct *tsk) } EXPORT_SYMBOL_GPL(vtime_account_kernel); -void vtime_account_irq_enter(struct task_struct *tsk) -__attribute__((alias("vtime_account_kernel"))); - - /* * Sorted add to a list. List is linear searched until first bigger * element is found. diff --git a/kernel/sched/cputime.c b/kernel/sched/cputime.c index 61ce9f9bf0a3..2783162542b1 100644 --- a/kernel/sched/cputime.c +++ b/kernel/sched/cputime.c @@ -417,23 +417,14 @@ void vtime_task_switch(struct task_struct *prev) } # endif -/* - * Archs that account the whole time spent in the idle task - * (outside irq) as idle time can rely on this and just implement - * vtime_account_kernel() and vtime_account_idle(). Archs that - * have other meaning of the idle time (s390 only includes the - * time spent by the CPU when it's in low power mode) must override - * vtime_account(). - */ -#ifndef __ARCH_HAS_VTIME_ACCOUNT void vtime_account_irq_enter(struct task_struct *tsk) { - if (!in_interrupt() && is_idle_task(tsk)) + if (!IS_ENABLED(CONFIG_HAVE_VIRT_CPU_ACCOUNTING_IDLE) && + !in_interrupt() && is_idle_task(tsk)) vtime_account_idle(tsk); else vtime_account_kernel(tsk); } -#endif /* __ARCH_HAS_VTIME_ACCOUNT */ void cputime_adjust(struct task_cputime *curr, struct prev_cputime *prev, u64 *ut, u64 *st) -- cgit v1.2.3 From 8a6a5920d3286eb0eae9f36a4ec4fc9df511eccb Mon Sep 17 00:00:00 2001 From: Frederic Weisbecker Date: Wed, 2 Dec 2020 12:57:30 +0100 Subject: sched/vtime: Consolidate IRQ time accounting The 3 architectures implementing CONFIG_VIRT_CPU_ACCOUNTING_NATIVE all have their own version of irq time accounting that dispatch the cputime to the appropriate index: hardirq, softirq, system, idle, guest... from an all-in-one function. Instead of having these ad-hoc versions, move the cputime destination dispatch decision to the core code and leave only the actual per-index cputime accounting to the architecture. Signed-off-by: Frederic Weisbecker Signed-off-by: Thomas Gleixner Link: https://lore.kernel.org/r/20201202115732.27827-4-frederic@kernel.org --- arch/ia64/kernel/time.c | 20 ++++++++++++----- arch/powerpc/kernel/time.c | 56 +++++++++++++++++++++++++++++++++------------- arch/s390/kernel/vtime.c | 45 ++++++++++++++++++++++++++----------- include/linux/vtime.h | 16 +++++-------- kernel/sched/cputime.c | 13 +++++++---- 5 files changed, 102 insertions(+), 48 deletions(-) (limited to 'kernel') diff --git a/arch/ia64/kernel/time.c b/arch/ia64/kernel/time.c index 7abc5f37bfaf..733e0e3324b8 100644 --- a/arch/ia64/kernel/time.c +++ b/arch/ia64/kernel/time.c @@ -138,12 +138,8 @@ void vtime_account_kernel(struct task_struct *tsk) struct thread_info *ti = task_thread_info(tsk); __u64 stime = vtime_delta(tsk); - if ((tsk->flags & PF_VCPU) && !irq_count()) + if (tsk->flags & PF_VCPU) ti->gtime += stime; - else if (hardirq_count()) - ti->hardirq_time += stime; - else if (in_serving_softirq()) - ti->softirq_time += stime; else ti->stime += stime; } @@ -156,6 +152,20 @@ void vtime_account_idle(struct task_struct *tsk) ti->idle_time += vtime_delta(tsk); } +void vtime_account_softirq(struct task_struct *tsk) +{ + struct thread_info *ti = task_thread_info(tsk); + + ti->softirq_time += vtime_delta(tsk); +} + +void vtime_account_hardirq(struct task_struct *tsk) +{ + struct thread_info *ti = task_thread_info(tsk); + + ti->hardirq_time += vtime_delta(tsk); +} + #endif /* CONFIG_VIRT_CPU_ACCOUNTING_NATIVE */ static irqreturn_t diff --git a/arch/powerpc/kernel/time.c b/arch/powerpc/kernel/time.c index 74efe46f5532..cf3f8db7e0e3 100644 --- a/arch/powerpc/kernel/time.c +++ b/arch/powerpc/kernel/time.c @@ -311,12 +311,11 @@ static unsigned long vtime_delta_scaled(struct cpu_accounting_data *acct, return stime_scaled; } -static unsigned long vtime_delta(struct task_struct *tsk, +static unsigned long vtime_delta(struct cpu_accounting_data *acct, unsigned long *stime_scaled, unsigned long *steal_time) { unsigned long now, stime; - struct cpu_accounting_data *acct = get_accounting(tsk); WARN_ON_ONCE(!irqs_disabled()); @@ -331,29 +330,30 @@ static unsigned long vtime_delta(struct task_struct *tsk, return stime; } +static void vtime_delta_kernel(struct cpu_accounting_data *acct, + unsigned long *stime, unsigned long *stime_scaled) +{ + unsigned long steal_time; + + *stime = vtime_delta(acct, stime_scaled, &steal_time); + *stime -= min(*stime, steal_time); + acct->steal_time += steal_time; +} + void vtime_account_kernel(struct task_struct *tsk) { - unsigned long stime, stime_scaled, steal_time; struct cpu_accounting_data *acct = get_accounting(tsk); + unsigned long stime, stime_scaled; - stime = vtime_delta(tsk, &stime_scaled, &steal_time); - - stime -= min(stime, steal_time); - acct->steal_time += steal_time; + vtime_delta_kernel(acct, &stime, &stime_scaled); - if ((tsk->flags & PF_VCPU) && !irq_count()) { + if (tsk->flags & PF_VCPU) { acct->gtime += stime; #ifdef CONFIG_ARCH_HAS_SCALED_CPUTIME acct->utime_scaled += stime_scaled; #endif } else { - if (hardirq_count()) - acct->hardirq_time += stime; - else if (in_serving_softirq()) - acct->softirq_time += stime; - else - acct->stime += stime; - + acct->stime += stime; #ifdef CONFIG_ARCH_HAS_SCALED_CPUTIME acct->stime_scaled += stime_scaled; #endif @@ -366,10 +366,34 @@ void vtime_account_idle(struct task_struct *tsk) unsigned long stime, stime_scaled, steal_time; struct cpu_accounting_data *acct = get_accounting(tsk); - stime = vtime_delta(tsk, &stime_scaled, &steal_time); + stime = vtime_delta(acct, &stime_scaled, &steal_time); acct->idle_time += stime + steal_time; } +static void vtime_account_irq_field(struct cpu_accounting_data *acct, + unsigned long *field) +{ + unsigned long stime, stime_scaled; + + vtime_delta_kernel(acct, &stime, &stime_scaled); + *field += stime; +#ifdef CONFIG_ARCH_HAS_SCALED_CPUTIME + acct->stime_scaled += stime_scaled; +#endif +} + +void vtime_account_softirq(struct task_struct *tsk) +{ + struct cpu_accounting_data *acct = get_accounting(tsk); + vtime_account_irq_field(acct, &acct->softirq_time); +} + +void vtime_account_hardirq(struct task_struct *tsk) +{ + struct cpu_accounting_data *acct = get_accounting(tsk); + vtime_account_irq_field(acct, &acct->hardirq_time); +} + static void vtime_flush_scaled(struct task_struct *tsk, struct cpu_accounting_data *acct) { diff --git a/arch/s390/kernel/vtime.c b/arch/s390/kernel/vtime.c index ebd8e5655789..5aaa2ca6a928 100644 --- a/arch/s390/kernel/vtime.c +++ b/arch/s390/kernel/vtime.c @@ -222,31 +222,50 @@ void vtime_flush(struct task_struct *tsk) S390_lowcore.avg_steal_timer = avg_steal; } +static u64 vtime_delta(void) +{ + u64 timer = S390_lowcore.last_update_timer; + + S390_lowcore.last_update_timer = get_vtimer(); + + return timer - S390_lowcore.last_update_timer; +} + /* * Update process times based on virtual cpu times stored by entry.S * to the lowcore fields user_timer, system_timer & steal_clock. */ void vtime_account_kernel(struct task_struct *tsk) { - u64 timer; - - timer = S390_lowcore.last_update_timer; - S390_lowcore.last_update_timer = get_vtimer(); - timer -= S390_lowcore.last_update_timer; + u64 delta = vtime_delta(); - if ((tsk->flags & PF_VCPU) && (irq_count() == 0)) - S390_lowcore.guest_timer += timer; - else if (hardirq_count()) - S390_lowcore.hardirq_timer += timer; - else if (in_serving_softirq()) - S390_lowcore.softirq_timer += timer; + if (tsk->flags & PF_VCPU) + S390_lowcore.guest_timer += delta; else - S390_lowcore.system_timer += timer; + S390_lowcore.system_timer += delta; - virt_timer_forward(timer); + virt_timer_forward(delta); } EXPORT_SYMBOL_GPL(vtime_account_kernel); +void vtime_account_softirq(struct task_struct *tsk) +{ + u64 delta = vtime_delta(); + + S390_lowcore.softirq_timer += delta; + + virt_timer_forward(delta); +} + +void vtime_account_hardirq(struct task_struct *tsk) +{ + u64 delta = vtime_delta(); + + S390_lowcore.hardirq_timer += delta; + + virt_timer_forward(delta); +} + /* * Sorted add to a list. List is linear searched until first bigger * element is found. diff --git a/include/linux/vtime.h b/include/linux/vtime.h index 2cdeca062db3..6c9867419615 100644 --- a/include/linux/vtime.h +++ b/include/linux/vtime.h @@ -83,16 +83,12 @@ static inline void vtime_init_idle(struct task_struct *tsk, int cpu) { } #endif #ifdef CONFIG_VIRT_CPU_ACCOUNTING_NATIVE -extern void vtime_account_irq_enter(struct task_struct *tsk); -static inline void vtime_account_irq_exit(struct task_struct *tsk) -{ - /* On hard|softirq exit we always account to hard|softirq cputime */ - vtime_account_kernel(tsk); -} +extern void vtime_account_irq(struct task_struct *tsk); +extern void vtime_account_softirq(struct task_struct *tsk); +extern void vtime_account_hardirq(struct task_struct *tsk); extern void vtime_flush(struct task_struct *tsk); #else /* !CONFIG_VIRT_CPU_ACCOUNTING_NATIVE */ -static inline void vtime_account_irq_enter(struct task_struct *tsk) { } -static inline void vtime_account_irq_exit(struct task_struct *tsk) { } +static inline void vtime_account_irq(struct task_struct *tsk) { } static inline void vtime_flush(struct task_struct *tsk) { } #endif @@ -105,13 +101,13 @@ static inline void irqtime_account_irq(struct task_struct *tsk) { } static inline void account_irq_enter_time(struct task_struct *tsk) { - vtime_account_irq_enter(tsk); + vtime_account_irq(tsk); irqtime_account_irq(tsk); } static inline void account_irq_exit_time(struct task_struct *tsk) { - vtime_account_irq_exit(tsk); + vtime_account_irq(tsk); irqtime_account_irq(tsk); } diff --git a/kernel/sched/cputime.c b/kernel/sched/cputime.c index 2783162542b1..02163d4260d7 100644 --- a/kernel/sched/cputime.c +++ b/kernel/sched/cputime.c @@ -417,13 +417,18 @@ void vtime_task_switch(struct task_struct *prev) } # endif -void vtime_account_irq_enter(struct task_struct *tsk) +void vtime_account_irq(struct task_struct *tsk) { - if (!IS_ENABLED(CONFIG_HAVE_VIRT_CPU_ACCOUNTING_IDLE) && - !in_interrupt() && is_idle_task(tsk)) + if (hardirq_count()) { + vtime_account_hardirq(tsk); + } else if (in_serving_softirq()) { + vtime_account_softirq(tsk); + } else if (!IS_ENABLED(CONFIG_HAVE_VIRT_CPU_ACCOUNTING_IDLE) && + is_idle_task(tsk)) { vtime_account_idle(tsk); - else + } else { vtime_account_kernel(tsk); + } } void cputime_adjust(struct task_cputime *curr, struct prev_cputime *prev, -- cgit v1.2.3 From d3759e7184f8f6187e62f8c4e7dcb1f6c47c075a Mon Sep 17 00:00:00 2001 From: Frederic Weisbecker Date: Wed, 2 Dec 2020 12:57:31 +0100 Subject: irqtime: Move irqtime entry accounting after irq offset incrementation IRQ time entry is currently accounted before HARDIRQ_OFFSET or SOFTIRQ_OFFSET are incremented. This is convenient to decide to which index the cputime to account is dispatched. Unfortunately it prevents tick_irq_enter() from being called under HARDIRQ_OFFSET because tick_irq_enter() has to be called before the IRQ entry accounting due to the necessary clock catch up. As a result we don't benefit from appropriate lockdep coverage on tick_irq_enter(). To prepare for fixing this, move the IRQ entry cputime accounting after the preempt offset is incremented. This requires the cputime dispatch code to handle the extra offset. Signed-off-by: Frederic Weisbecker Signed-off-by: Thomas Gleixner Acked-by: Peter Zijlstra (Intel) Link: https://lore.kernel.org/r/20201202115732.27827-5-frederic@kernel.org --- include/linux/hardirq.h | 4 ++-- include/linux/vtime.h | 34 ++++++++++++++++++++++++---------- kernel/sched/cputime.c | 18 +++++++++++------- kernel/softirq.c | 6 +++--- 4 files changed, 40 insertions(+), 22 deletions(-) (limited to 'kernel') diff --git a/include/linux/hardirq.h b/include/linux/hardirq.h index 754f67ac4326..7c9d6a2d7e90 100644 --- a/include/linux/hardirq.h +++ b/include/linux/hardirq.h @@ -32,9 +32,9 @@ static __always_inline void rcu_irq_enter_check_tick(void) */ #define __irq_enter() \ do { \ - account_irq_enter_time(current); \ preempt_count_add(HARDIRQ_OFFSET); \ lockdep_hardirq_enter(); \ + account_hardirq_enter(current); \ } while (0) /* @@ -62,8 +62,8 @@ void irq_enter_rcu(void); */ #define __irq_exit() \ do { \ + account_hardirq_exit(current); \ lockdep_hardirq_exit(); \ - account_irq_exit_time(current); \ preempt_count_sub(HARDIRQ_OFFSET); \ } while (0) diff --git a/include/linux/vtime.h b/include/linux/vtime.h index 6c9867419615..041d6524d144 100644 --- a/include/linux/vtime.h +++ b/include/linux/vtime.h @@ -83,32 +83,46 @@ static inline void vtime_init_idle(struct task_struct *tsk, int cpu) { } #endif #ifdef CONFIG_VIRT_CPU_ACCOUNTING_NATIVE -extern void vtime_account_irq(struct task_struct *tsk); +extern void vtime_account_irq(struct task_struct *tsk, unsigned int offset); extern void vtime_account_softirq(struct task_struct *tsk); extern void vtime_account_hardirq(struct task_struct *tsk); extern void vtime_flush(struct task_struct *tsk); #else /* !CONFIG_VIRT_CPU_ACCOUNTING_NATIVE */ -static inline void vtime_account_irq(struct task_struct *tsk) { } +static inline void vtime_account_irq(struct task_struct *tsk, unsigned int offset) { } +static inline void vtime_account_softirq(struct task_struct *tsk) { } +static inline void vtime_account_hardirq(struct task_struct *tsk) { } static inline void vtime_flush(struct task_struct *tsk) { } #endif #ifdef CONFIG_IRQ_TIME_ACCOUNTING -extern void irqtime_account_irq(struct task_struct *tsk); +extern void irqtime_account_irq(struct task_struct *tsk, unsigned int offset); #else -static inline void irqtime_account_irq(struct task_struct *tsk) { } +static inline void irqtime_account_irq(struct task_struct *tsk, unsigned int offset) { } #endif -static inline void account_irq_enter_time(struct task_struct *tsk) +static inline void account_softirq_enter(struct task_struct *tsk) { - vtime_account_irq(tsk); - irqtime_account_irq(tsk); + vtime_account_irq(tsk, SOFTIRQ_OFFSET); + irqtime_account_irq(tsk, SOFTIRQ_OFFSET); } -static inline void account_irq_exit_time(struct task_struct *tsk) +static inline void account_softirq_exit(struct task_struct *tsk) { - vtime_account_irq(tsk); - irqtime_account_irq(tsk); + vtime_account_softirq(tsk); + irqtime_account_irq(tsk, 0); +} + +static inline void account_hardirq_enter(struct task_struct *tsk) +{ + vtime_account_irq(tsk, HARDIRQ_OFFSET); + irqtime_account_irq(tsk, HARDIRQ_OFFSET); +} + +static inline void account_hardirq_exit(struct task_struct *tsk) +{ + vtime_account_hardirq(tsk); + irqtime_account_irq(tsk, 0); } #endif /* _LINUX_KERNEL_VTIME_H */ diff --git a/kernel/sched/cputime.c b/kernel/sched/cputime.c index 02163d4260d7..5f611658eeab 100644 --- a/kernel/sched/cputime.c +++ b/kernel/sched/cputime.c @@ -44,12 +44,13 @@ static void irqtime_account_delta(struct irqtime *irqtime, u64 delta, } /* - * Called before incrementing preempt_count on {soft,}irq_enter + * Called after incrementing preempt_count on {soft,}irq_enter * and before decrementing preempt_count on {soft,}irq_exit. */ -void irqtime_account_irq(struct task_struct *curr) +void irqtime_account_irq(struct task_struct *curr, unsigned int offset) { struct irqtime *irqtime = this_cpu_ptr(&cpu_irqtime); + unsigned int pc; s64 delta; int cpu; @@ -59,6 +60,7 @@ void irqtime_account_irq(struct task_struct *curr) cpu = smp_processor_id(); delta = sched_clock_cpu(cpu) - irqtime->irq_start_time; irqtime->irq_start_time += delta; + pc = preempt_count() - offset; /* * We do not account for softirq time from ksoftirqd here. @@ -66,9 +68,9 @@ void irqtime_account_irq(struct task_struct *curr) * in that case, so as not to confuse scheduler with a special task * that do not consume any time, but still wants to run. */ - if (hardirq_count()) + if (pc & HARDIRQ_MASK) irqtime_account_delta(irqtime, delta, CPUTIME_IRQ); - else if (in_serving_softirq() && curr != this_cpu_ksoftirqd()) + else if ((pc & SOFTIRQ_OFFSET) && curr != this_cpu_ksoftirqd()) irqtime_account_delta(irqtime, delta, CPUTIME_SOFTIRQ); } @@ -417,11 +419,13 @@ void vtime_task_switch(struct task_struct *prev) } # endif -void vtime_account_irq(struct task_struct *tsk) +void vtime_account_irq(struct task_struct *tsk, unsigned int offset) { - if (hardirq_count()) { + unsigned int pc = preempt_count() - offset; + + if (pc & HARDIRQ_OFFSET) { vtime_account_hardirq(tsk); - } else if (in_serving_softirq()) { + } else if (pc & SOFTIRQ_OFFSET) { vtime_account_softirq(tsk); } else if (!IS_ENABLED(CONFIG_HAVE_VIRT_CPU_ACCOUNTING_IDLE) && is_idle_task(tsk)) { diff --git a/kernel/softirq.c b/kernel/softirq.c index 617009ccd82c..b8f42b3ba8ca 100644 --- a/kernel/softirq.c +++ b/kernel/softirq.c @@ -315,10 +315,10 @@ asmlinkage __visible void __softirq_entry __do_softirq(void) current->flags &= ~PF_MEMALLOC; pending = local_softirq_pending(); - account_irq_enter_time(current); __local_bh_disable_ip(_RET_IP_, SOFTIRQ_OFFSET); in_hardirq = lockdep_softirq_start(); + account_softirq_enter(current); restart: /* Reset the pending bitmask before enabling irqs */ @@ -365,8 +365,8 @@ restart: wakeup_softirqd(); } + account_softirq_exit(current); lockdep_softirq_end(in_hardirq); - account_irq_exit_time(current); __local_bh_enable(SOFTIRQ_OFFSET); WARN_ON_ONCE(in_interrupt()); current_restore_flags(old_flags, PF_MEMALLOC); @@ -418,7 +418,7 @@ static inline void __irq_exit_rcu(void) #else lockdep_assert_irqs_disabled(); #endif - account_irq_exit_time(current); + account_hardirq_exit(current); preempt_count_sub(HARDIRQ_OFFSET); if (!in_interrupt() && local_softirq_pending()) invoke_softirq(); -- cgit v1.2.3 From d14ce74f1fb376ccbbc0b05ded477ada51253729 Mon Sep 17 00:00:00 2001 From: Frederic Weisbecker Date: Wed, 2 Dec 2020 12:57:32 +0100 Subject: irq: Call tick_irq_enter() inside HARDIRQ_OFFSET Now that account_hardirq_enter() is called after HARDIRQ_OFFSET has been incremented, there is nothing left that prevents us from also moving tick_irq_enter() after HARDIRQ_OFFSET is incremented. The desired outcome is to remove the nasty hack that prevents softirqs from being raised through ksoftirqd instead of the hardirq bottom half. Also tick_irq_enter() then becomes appropriately covered by lockdep. Signed-off-by: Frederic Weisbecker Signed-off-by: Thomas Gleixner Link: https://lore.kernel.org/r/20201202115732.27827-6-frederic@kernel.org --- kernel/softirq.c | 14 +++++--------- 1 file changed, 5 insertions(+), 9 deletions(-) (limited to 'kernel') diff --git a/kernel/softirq.c b/kernel/softirq.c index b8f42b3ba8ca..d5bfd5e661fc 100644 --- a/kernel/softirq.c +++ b/kernel/softirq.c @@ -377,16 +377,12 @@ restart: */ void irq_enter_rcu(void) { - if (is_idle_task(current) && !in_interrupt()) { - /* - * Prevent raise_softirq from needlessly waking up ksoftirqd - * here, as softirq will be serviced on return from interrupt. - */ - local_bh_disable(); + __irq_enter_raw(); + + if (is_idle_task(current) && (irq_count() == HARDIRQ_OFFSET)) tick_irq_enter(); - _local_bh_enable(); - } - __irq_enter(); + + account_hardirq_enter(current); } /** -- cgit v1.2.3 From b388fa50142510fb6477f130bb1b3f05a0a263a1 Mon Sep 17 00:00:00 2001 From: Valentin Schneider Date: Mon, 9 Nov 2020 09:41:21 +0000 Subject: Revert "genirq: Add fasteoi IPI flow" handle_percpu_devid_fasteoi_ipi() has no more users, and handle_percpu_devid_irq() can do all that it was supposed to do. Get rid of it. This reverts commit c5e5ec033c4ab25c53f1fd217849e75deb0bf7bf. Signed-off-by: Valentin Schneider Signed-off-by: Marc Zyngier Link: https://lore.kernel.org/r/20201109094121.29975-6-valentin.schneider@arm.com --- include/linux/irq.h | 1 - kernel/irq/chip.c | 27 --------------------------- 2 files changed, 28 deletions(-) (limited to 'kernel') diff --git a/include/linux/irq.h b/include/linux/irq.h index c54365309e97..ca26bec51cec 100644 --- a/include/linux/irq.h +++ b/include/linux/irq.h @@ -647,7 +647,6 @@ static inline int irq_set_parent(int irq, int parent_irq) */ extern void handle_level_irq(struct irq_desc *desc); extern void handle_fasteoi_irq(struct irq_desc *desc); -extern void handle_percpu_devid_fasteoi_ipi(struct irq_desc *desc); extern void handle_edge_irq(struct irq_desc *desc); extern void handle_edge_eoi_irq(struct irq_desc *desc); extern void handle_simple_irq(struct irq_desc *desc); diff --git a/kernel/irq/chip.c b/kernel/irq/chip.c index b9b9618e1aca..0ae308efa604 100644 --- a/kernel/irq/chip.c +++ b/kernel/irq/chip.c @@ -944,33 +944,6 @@ void handle_percpu_devid_irq(struct irq_desc *desc) chip->irq_eoi(&desc->irq_data); } -/** - * handle_percpu_devid_fasteoi_ipi - Per CPU local IPI handler with per cpu - * dev ids - * @desc: the interrupt description structure for this irq - * - * The biggest difference with the IRQ version is that the interrupt is - * EOIed early, as the IPI could result in a context switch, and we need to - * make sure the IPI can fire again. We also assume that the arch code has - * registered an action. If not, we are positively doomed. - */ -void handle_percpu_devid_fasteoi_ipi(struct irq_desc *desc) -{ - struct irq_chip *chip = irq_desc_get_chip(desc); - struct irqaction *action = desc->action; - unsigned int irq = irq_desc_get_irq(desc); - irqreturn_t res; - - __kstat_incr_irqs_this_cpu(desc); - - if (chip->irq_eoi) - chip->irq_eoi(&desc->irq_data); - - trace_irq_handler_entry(irq, action); - res = action->handler(irq, raw_cpu_ptr(action->percpu_dev_id)); - trace_irq_handler_exit(irq, action, res); -} - /** * handle_percpu_devid_fasteoi_nmi - Per CPU local NMI handler with per cpu * dev ids -- cgit v1.2.3 From 1d3aec89286254487df7641c30f1b14ad1d127a5 Mon Sep 17 00:00:00 2001 From: John Garry Date: Wed, 2 Dec 2020 18:36:53 +0800 Subject: genirq/affinity: Add irq_update_affinity_desc() Add a function to allow the affinity of an interrupt be switched to managed, such that interrupts allocated for platform devices may be managed. This new interface has certain limitations, and attempts to use it in the following circumstances will fail: - For when the kernel is configured for generic IRQ reservation mode (in config GENERIC_IRQ_RESERVATION_MODE). The reason being that it could conflict with managed vs. non-managed interrupt accounting. - The interrupt is already started, which should not be the case during init - The interrupt is already configured as managed, which means double init Suggested-by: Thomas Gleixner Signed-off-by: John Garry Signed-off-by: Marc Zyngier Link: https://lore.kernel.org/r/1606905417-183214-2-git-send-email-john.garry@huawei.com --- include/linux/interrupt.h | 8 ++++++ kernel/irq/manage.c | 70 +++++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 78 insertions(+) (limited to 'kernel') diff --git a/include/linux/interrupt.h b/include/linux/interrupt.h index ee8299eb1f52..870b3251e174 100644 --- a/include/linux/interrupt.h +++ b/include/linux/interrupt.h @@ -352,6 +352,8 @@ extern int irq_can_set_affinity(unsigned int irq); extern int irq_select_affinity(unsigned int irq); extern int irq_set_affinity_hint(unsigned int irq, const struct cpumask *m); +extern int irq_update_affinity_desc(unsigned int irq, + struct irq_affinity_desc *affinity); extern int irq_set_affinity_notifier(unsigned int irq, struct irq_affinity_notify *notify); @@ -387,6 +389,12 @@ static inline int irq_set_affinity_hint(unsigned int irq, return -EINVAL; } +static inline int irq_update_affinity_desc(unsigned int irq, + struct irq_affinity_desc *affinity) +{ + return -EINVAL; +} + static inline int irq_set_affinity_notifier(unsigned int irq, struct irq_affinity_notify *notify) { diff --git a/kernel/irq/manage.c b/kernel/irq/manage.c index c460e0496006..c826ba4141fe 100644 --- a/kernel/irq/manage.c +++ b/kernel/irq/manage.c @@ -371,6 +371,76 @@ int irq_set_affinity_locked(struct irq_data *data, const struct cpumask *mask, return ret; } +/** + * irq_update_affinity_desc - Update affinity management for an interrupt + * @irq: The interrupt number to update + * @affinity: Pointer to the affinity descriptor + * + * This interface can be used to configure the affinity management of + * interrupts which have been allocated already. + * + * There are certain limitations on when it may be used - attempts to use it + * for when the kernel is configured for generic IRQ reservation mode (in + * config GENERIC_IRQ_RESERVATION_MODE) will fail, as it may conflict with + * managed/non-managed interrupt accounting. In addition, attempts to use it on + * an interrupt which is already started or which has already been configured + * as managed will also fail, as these mean invalid init state or double init. + */ +int irq_update_affinity_desc(unsigned int irq, + struct irq_affinity_desc *affinity) +{ + struct irq_desc *desc; + unsigned long flags; + bool activated; + int ret = 0; + + /* + * Supporting this with the reservation scheme used by x86 needs + * some more thought. Fail it for now. + */ + if (IS_ENABLED(CONFIG_GENERIC_IRQ_RESERVATION_MODE)) + return -EOPNOTSUPP; + + desc = irq_get_desc_buslock(irq, &flags, 0); + if (!desc) + return -EINVAL; + + /* Requires the interrupt to be shut down */ + if (irqd_is_started(&desc->irq_data)) { + ret = -EBUSY; + goto out_unlock; + } + + /* Interrupts which are already managed cannot be modified */ + if (irqd_affinity_is_managed(&desc->irq_data)) { + ret = -EBUSY; + goto out_unlock; + } + + /* + * Deactivate the interrupt. That's required to undo + * anything an earlier activation has established. + */ + activated = irqd_is_activated(&desc->irq_data); + if (activated) + irq_domain_deactivate_irq(&desc->irq_data); + + if (affinity->is_managed) { + irqd_set(&desc->irq_data, IRQD_AFFINITY_MANAGED); + irqd_set(&desc->irq_data, IRQD_MANAGED_SHUTDOWN); + } + + cpumask_copy(desc->irq_common_data.affinity, &affinity->mask); + + /* Restore the activation state */ + if (activated) + irq_domain_activate_irq(&desc->irq_data, false); + +out_unlock: + irq_put_desc_busunlock(desc, flags); + return ret; +} + int __irq_set_affinity(unsigned int irq, const struct cpumask *mask, bool force) { struct irq_desc *desc = irq_to_desc(irq); -- cgit v1.2.3