diff options
Diffstat (limited to 'drivers/acpi/apei/ghes.c')
-rw-r--r-- | drivers/acpi/apei/ghes.c | 205 |
1 files changed, 126 insertions, 79 deletions
diff --git a/drivers/acpi/apei/ghes.c b/drivers/acpi/apei/ghes.c index dab7cb7349df..fc5f780bb61d 100644 --- a/drivers/acpi/apei/ghes.c +++ b/drivers/acpi/apei/ghes.c @@ -47,11 +47,11 @@ #include <linux/genalloc.h> #include <linux/pci.h> #include <linux/aer.h> +#include <linux/nmi.h> #include <acpi/ghes.h> -#include <asm/mce.h> +#include <acpi/apei.h> #include <asm/tlbflush.h> -#include <asm/nmi.h> #include "apei-internal.h" @@ -74,20 +74,18 @@ #define GHES_ESTATUS_CACHE_LEN(estatus_len) \ (sizeof(struct ghes_estatus_cache) + (estatus_len)) #define GHES_ESTATUS_FROM_CACHE(estatus_cache) \ - ((struct acpi_generic_status *) \ + ((struct acpi_hest_generic_status *) \ ((struct ghes_estatus_cache *)(estatus_cache) + 1)) #define GHES_ESTATUS_NODE_LEN(estatus_len) \ (sizeof(struct ghes_estatus_node) + (estatus_len)) #define GHES_ESTATUS_FROM_NODE(estatus_node) \ - ((struct acpi_generic_status *) \ + ((struct acpi_hest_generic_status *) \ ((struct ghes_estatus_node *)(estatus_node) + 1)) bool ghes_disable; module_param_named(disable, ghes_disable, bool, 0); -static int ghes_panic_timeout __read_mostly = 30; - /* * All error sources notified with SCI shares one notifier function, * so they need to be linked and checked one by one. This is applied @@ -97,16 +95,9 @@ static int ghes_panic_timeout __read_mostly = 30; * list changing, not for traversing. */ static LIST_HEAD(ghes_sci); -static LIST_HEAD(ghes_nmi); static DEFINE_MUTEX(ghes_list_mutex); /* - * NMI may be triggered on any CPU, so ghes_nmi_lock is used for - * mutual exclusion. - */ -static DEFINE_RAW_SPINLOCK(ghes_nmi_lock); - -/* * Because the memory area used to transfer hardware error information * from BIOS to Linux can be determined only in NMI, IRQ or timer * handler, but general ioremap can not be used in atomic context, so @@ -114,12 +105,16 @@ static DEFINE_RAW_SPINLOCK(ghes_nmi_lock); */ /* - * Two virtual pages are used, one for NMI context, the other for - * IRQ/PROCESS context + * Two virtual pages are used, one for IRQ/PROCESS context, the other for + * NMI context (optionally). */ -#define GHES_IOREMAP_PAGES 2 -#define GHES_IOREMAP_NMI_PAGE(base) (base) -#define GHES_IOREMAP_IRQ_PAGE(base) ((base) + PAGE_SIZE) +#ifdef CONFIG_HAVE_ACPI_APEI_NMI +#define GHES_IOREMAP_PAGES 2 +#else +#define GHES_IOREMAP_PAGES 1 +#endif +#define GHES_IOREMAP_IRQ_PAGE(base) (base) +#define GHES_IOREMAP_NMI_PAGE(base) ((base) + PAGE_SIZE) /* virtual memory area for atomic ioremap */ static struct vm_struct *ghes_ioremap_area; @@ -130,18 +125,8 @@ static struct vm_struct *ghes_ioremap_area; static DEFINE_RAW_SPINLOCK(ghes_ioremap_lock_nmi); static DEFINE_SPINLOCK(ghes_ioremap_lock_irq); -/* - * printk is not safe in NMI context. So in NMI handler, we allocate - * required memory from lock-less memory allocator - * (ghes_estatus_pool), save estatus into it, put them into lock-less - * list (ghes_estatus_llist), then delay printk into IRQ context via - * irq_work (ghes_proc_irq_work). ghes_estatus_size_request record - * required pool size by all NMI error source. - */ static struct gen_pool *ghes_estatus_pool; static unsigned long ghes_estatus_pool_size_request; -static struct llist_head ghes_estatus_llist; -static struct irq_work ghes_proc_irq_work; struct ghes_estatus_cache *ghes_estatus_caches[GHES_ESTATUS_CACHES_SIZE]; static atomic_t ghes_estatus_cache_alloced; @@ -192,7 +177,7 @@ static void ghes_iounmap_nmi(void __iomem *vaddr_ptr) BUG_ON(vaddr != (unsigned long)GHES_IOREMAP_NMI_PAGE(base)); unmap_kernel_range_noflush(vaddr, PAGE_SIZE); - __flush_tlb_one(vaddr); + arch_apei_flush_tlb_one(vaddr); } static void ghes_iounmap_irq(void __iomem *vaddr_ptr) @@ -202,7 +187,7 @@ static void ghes_iounmap_irq(void __iomem *vaddr_ptr) BUG_ON(vaddr != (unsigned long)GHES_IOREMAP_IRQ_PAGE(base)); unmap_kernel_range_noflush(vaddr, PAGE_SIZE); - __flush_tlb_one(vaddr); + arch_apei_flush_tlb_one(vaddr); } static int ghes_estatus_pool_init(void) @@ -249,11 +234,6 @@ static int ghes_estatus_pool_expand(unsigned long len) return 0; } -static void ghes_estatus_pool_shrink(unsigned long len) -{ - ghes_estatus_pool_size_request -= PAGE_ALIGN(len); -} - static struct ghes *ghes_new(struct acpi_hest_generic *generic) { struct ghes *ghes; @@ -408,7 +388,7 @@ static void ghes_clear_estatus(struct ghes *ghes) ghes->flags &= ~GHES_TO_CLEAR; } -static void ghes_handle_memory_failure(struct acpi_generic_data *gdata, int sev) +static void ghes_handle_memory_failure(struct acpi_hest_generic_data *gdata, int sev) { #ifdef CONFIG_ACPI_APEI_MEMORY_FAILURE unsigned long pfn; @@ -441,10 +421,10 @@ static void ghes_handle_memory_failure(struct acpi_generic_data *gdata, int sev) } static void ghes_do_proc(struct ghes *ghes, - const struct acpi_generic_status *estatus) + const struct acpi_hest_generic_status *estatus) { int sev, sec_sev; - struct acpi_generic_data *gdata; + struct acpi_hest_generic_data *gdata; sev = ghes_severity(estatus->error_severity); apei_estatus_for_each_section(estatus, gdata) { @@ -455,9 +435,7 @@ static void ghes_do_proc(struct ghes *ghes, mem_err = (struct cper_sec_mem_err *)(gdata+1); ghes_edac_report_mem_error(ghes, sev, mem_err); -#ifdef CONFIG_X86_MCE - apei_mce_report_mem_error(sev, mem_err); -#endif + arch_apei_report_mem_error(sev, mem_err); ghes_handle_memory_failure(gdata, sev); } #ifdef CONFIG_ACPI_APEI_PCIEAER @@ -498,7 +476,7 @@ static void ghes_do_proc(struct ghes *ghes, static void __ghes_print_estatus(const char *pfx, const struct acpi_hest_generic *generic, - const struct acpi_generic_status *estatus) + const struct acpi_hest_generic_status *estatus) { static atomic_t seqno; unsigned int curr_seqno; @@ -520,7 +498,7 @@ static void __ghes_print_estatus(const char *pfx, static int ghes_print_estatus(const char *pfx, const struct acpi_hest_generic *generic, - const struct acpi_generic_status *estatus) + const struct acpi_hest_generic_status *estatus) { /* Not more than 2 messages every 5 seconds */ static DEFINE_RATELIMIT_STATE(ratelimit_corrected, 5*HZ, 2); @@ -542,13 +520,13 @@ static int ghes_print_estatus(const char *pfx, * GHES error status reporting throttle, to report more kinds of * errors, instead of just most frequently occurred errors. */ -static int ghes_estatus_cached(struct acpi_generic_status *estatus) +static int ghes_estatus_cached(struct acpi_hest_generic_status *estatus) { u32 len; int i, cached = 0; unsigned long long now; struct ghes_estatus_cache *cache; - struct acpi_generic_status *cache_estatus; + struct acpi_hest_generic_status *cache_estatus; len = cper_estatus_len(estatus); rcu_read_lock(); @@ -573,12 +551,12 @@ static int ghes_estatus_cached(struct acpi_generic_status *estatus) static struct ghes_estatus_cache *ghes_estatus_cache_alloc( struct acpi_hest_generic *generic, - struct acpi_generic_status *estatus) + struct acpi_hest_generic_status *estatus) { int alloced; u32 len, cache_len; struct ghes_estatus_cache *cache; - struct acpi_generic_status *cache_estatus; + struct acpi_hest_generic_status *cache_estatus; alloced = atomic_add_return(1, &ghes_estatus_cache_alloced); if (alloced > GHES_ESTATUS_CACHE_ALLOCED_MAX) { @@ -621,7 +599,7 @@ static void ghes_estatus_cache_rcu_free(struct rcu_head *head) static void ghes_estatus_cache_add( struct acpi_hest_generic *generic, - struct acpi_generic_status *estatus) + struct acpi_hest_generic_status *estatus) { int i, slot = -1, count; unsigned long long now, duration, period, max_period = 0; @@ -734,6 +712,32 @@ static int ghes_notify_sci(struct notifier_block *this, return ret; } +static struct notifier_block ghes_notifier_sci = { + .notifier_call = ghes_notify_sci, +}; + +#ifdef CONFIG_HAVE_ACPI_APEI_NMI +/* + * printk is not safe in NMI context. So in NMI handler, we allocate + * required memory from lock-less memory allocator + * (ghes_estatus_pool), save estatus into it, put them into lock-less + * list (ghes_estatus_llist), then delay printk into IRQ context via + * irq_work (ghes_proc_irq_work). ghes_estatus_size_request record + * required pool size by all NMI error source. + */ +static struct llist_head ghes_estatus_llist; +static struct irq_work ghes_proc_irq_work; + +/* + * NMI may be triggered on any CPU, so ghes_nmi_lock is used for + * mutual exclusion. + */ +static DEFINE_RAW_SPINLOCK(ghes_nmi_lock); + +static LIST_HEAD(ghes_nmi); + +static int ghes_panic_timeout __read_mostly = 30; + static struct llist_node *llist_nodes_reverse(struct llist_node *llnode) { struct llist_node *next, *tail = NULL; @@ -753,7 +757,7 @@ static void ghes_proc_in_irq(struct irq_work *irq_work) struct llist_node *llnode, *next; struct ghes_estatus_node *estatus_node; struct acpi_hest_generic *generic; - struct acpi_generic_status *estatus; + struct acpi_hest_generic_status *estatus; u32 len, node_len; llnode = llist_del_all(&ghes_estatus_llist); @@ -786,7 +790,7 @@ static void ghes_print_queued_estatus(void) struct llist_node *llnode; struct ghes_estatus_node *estatus_node; struct acpi_hest_generic *generic; - struct acpi_generic_status *estatus; + struct acpi_hest_generic_status *estatus; u32 len, node_len; llnode = llist_del_all(&ghes_estatus_llist); @@ -845,7 +849,7 @@ static int ghes_notify_nmi(unsigned int cmd, struct pt_regs *regs) #ifdef CONFIG_ARCH_HAVE_NMI_SAFE_CMPXCHG u32 len, node_len; struct ghes_estatus_node *estatus_node; - struct acpi_generic_status *estatus; + struct acpi_hest_generic_status *estatus; #endif if (!(ghes->flags & GHES_TO_CLEAR)) continue; @@ -877,10 +881,6 @@ out: return ret; } -static struct notifier_block ghes_notifier_sci = { - .notifier_call = ghes_notify_sci, -}; - static unsigned long ghes_esource_prealloc_size( const struct acpi_hest_generic *generic) { @@ -896,11 +896,71 @@ static unsigned long ghes_esource_prealloc_size( return prealloc_size; } +static void ghes_estatus_pool_shrink(unsigned long len) +{ + ghes_estatus_pool_size_request -= PAGE_ALIGN(len); +} + +static void ghes_nmi_add(struct ghes *ghes) +{ + unsigned long len; + + len = ghes_esource_prealloc_size(ghes->generic); + ghes_estatus_pool_expand(len); + mutex_lock(&ghes_list_mutex); + if (list_empty(&ghes_nmi)) + register_nmi_handler(NMI_LOCAL, ghes_notify_nmi, 0, "ghes"); + list_add_rcu(&ghes->list, &ghes_nmi); + mutex_unlock(&ghes_list_mutex); +} + +static void ghes_nmi_remove(struct ghes *ghes) +{ + unsigned long len; + + mutex_lock(&ghes_list_mutex); + list_del_rcu(&ghes->list); + if (list_empty(&ghes_nmi)) + unregister_nmi_handler(NMI_LOCAL, "ghes"); + mutex_unlock(&ghes_list_mutex); + /* + * To synchronize with NMI handler, ghes can only be + * freed after NMI handler finishes. + */ + synchronize_rcu(); + len = ghes_esource_prealloc_size(ghes->generic); + ghes_estatus_pool_shrink(len); +} + +static void ghes_nmi_init_cxt(void) +{ + init_irq_work(&ghes_proc_irq_work, ghes_proc_in_irq); +} +#else /* CONFIG_HAVE_ACPI_APEI_NMI */ +static inline void ghes_nmi_add(struct ghes *ghes) +{ + pr_err(GHES_PFX "ID: %d, trying to add NMI notification which is not supported!\n", + ghes->generic->header.source_id); + BUG(); +} + +static inline void ghes_nmi_remove(struct ghes *ghes) +{ + pr_err(GHES_PFX "ID: %d, trying to remove NMI notification which is not supported!\n", + ghes->generic->header.source_id); + BUG(); +} + +static inline void ghes_nmi_init_cxt(void) +{ +} +#endif /* CONFIG_HAVE_ACPI_APEI_NMI */ + static int ghes_probe(struct platform_device *ghes_dev) { struct acpi_hest_generic *generic; struct ghes *ghes = NULL; - unsigned long len; + int rc = -EINVAL; generic = *(struct acpi_hest_generic **)ghes_dev->dev.platform_data; @@ -911,7 +971,13 @@ static int ghes_probe(struct platform_device *ghes_dev) case ACPI_HEST_NOTIFY_POLLED: case ACPI_HEST_NOTIFY_EXTERNAL: case ACPI_HEST_NOTIFY_SCI: + break; case ACPI_HEST_NOTIFY_NMI: + if (!IS_ENABLED(CONFIG_HAVE_ACPI_APEI_NMI)) { + pr_warn(GHES_PFX "Generic hardware error source: %d notified via NMI interrupt is not supported!\n", + generic->header.source_id); + goto err; + } break; case ACPI_HEST_NOTIFY_LOCAL: pr_warning(GHES_PFX "Generic hardware error source: %d notified via local interrupt is not supported!\n", @@ -925,7 +991,7 @@ static int ghes_probe(struct platform_device *ghes_dev) rc = -EIO; if (generic->error_block_length < - sizeof(struct acpi_generic_status)) { + sizeof(struct acpi_hest_generic_status)) { pr_warning(FW_BUG GHES_PFX "Invalid error block length: %u for generic hardware error source: %d\n", generic->error_block_length, generic->header.source_id); @@ -972,14 +1038,7 @@ static int ghes_probe(struct platform_device *ghes_dev) mutex_unlock(&ghes_list_mutex); break; case ACPI_HEST_NOTIFY_NMI: - len = ghes_esource_prealloc_size(generic); - ghes_estatus_pool_expand(len); - mutex_lock(&ghes_list_mutex); - if (list_empty(&ghes_nmi)) - register_nmi_handler(NMI_LOCAL, ghes_notify_nmi, 0, - "ghes"); - list_add_rcu(&ghes->list, &ghes_nmi); - mutex_unlock(&ghes_list_mutex); + ghes_nmi_add(ghes); break; default: BUG(); @@ -1001,7 +1060,6 @@ static int ghes_remove(struct platform_device *ghes_dev) { struct ghes *ghes; struct acpi_hest_generic *generic; - unsigned long len; ghes = platform_get_drvdata(ghes_dev); generic = ghes->generic; @@ -1022,18 +1080,7 @@ static int ghes_remove(struct platform_device *ghes_dev) mutex_unlock(&ghes_list_mutex); break; case ACPI_HEST_NOTIFY_NMI: - mutex_lock(&ghes_list_mutex); - list_del_rcu(&ghes->list); - if (list_empty(&ghes_nmi)) - unregister_nmi_handler(NMI_LOCAL, "ghes"); - mutex_unlock(&ghes_list_mutex); - /* - * To synchronize with NMI handler, ghes can only be - * freed after NMI handler finishes. - */ - synchronize_rcu(); - len = ghes_esource_prealloc_size(generic); - ghes_estatus_pool_shrink(len); + ghes_nmi_remove(ghes); break; default: BUG(); @@ -1077,7 +1124,7 @@ static int __init ghes_init(void) return -EINVAL; } - init_irq_work(&ghes_proc_irq_work, ghes_proc_in_irq); + ghes_nmi_init_cxt(); rc = ghes_ioremap_init(); if (rc) |