diff options
Diffstat (limited to 'drivers/edac')
-rw-r--r-- | drivers/edac/Kconfig | 137 | ||||
-rw-r--r-- | drivers/edac/Makefile | 9 | ||||
-rw-r--r-- | drivers/edac/altera_edac.c | 22 | ||||
-rw-r--r-- | drivers/edac/edac_mc.c | 99 | ||||
-rw-r--r-- | drivers/edac/edac_stub.c | 68 | ||||
-rw-r--r-- | drivers/edac/i5000_edac.c | 2 | ||||
-rw-r--r-- | drivers/edac/i5400_edac.c | 5 | ||||
-rw-r--r-- | drivers/edac/pnd2_edac.c | 1546 | ||||
-rw-r--r-- | drivers/edac/pnd2_edac.h | 301 | ||||
-rw-r--r-- | drivers/edac/sb_edac.c | 4 | ||||
-rw-r--r-- | drivers/edac/skx_edac.c | 2 | ||||
-rw-r--r-- | drivers/edac/thunderx_edac.c | 2174 | ||||
-rw-r--r-- | drivers/edac/xgene_edac.c | 2 |
13 files changed, 4192 insertions, 179 deletions
diff --git a/drivers/edac/Kconfig b/drivers/edac/Kconfig index 82d85cce81f8..96afb2aeed18 100644 --- a/drivers/edac/Kconfig +++ b/drivers/edac/Kconfig @@ -10,26 +10,16 @@ config EDAC_SUPPORT bool menuconfig EDAC - bool "EDAC (Error Detection And Correction) reporting" - depends on HAS_IOMEM && EDAC_SUPPORT + tristate "EDAC (Error Detection And Correction) reporting" + depends on HAS_IOMEM && EDAC_SUPPORT && RAS help - EDAC is designed to report errors in the core system. - These are low-level errors that are reported in the CPU or - supporting chipset or other subsystems: + EDAC is a subsystem along with hardware-specific drivers designed to + report hardware errors. These are low-level errors that are reported + in the CPU or supporting chipset or other subsystems: memory errors, cache errors, PCI errors, thermal throttling, etc.. If unsure, select 'Y'. - If this code is reporting problems on your system, please - see the EDAC project web pages for more information at: - - <http://bluesmoke.sourceforge.net/> - - and: - - <http://buttersideup.com/edacwiki> - - There is also a mailing list for the EDAC project, which can - be found via the sourceforge page. + The mailing list for the EDAC project is linux-edac@vger.kernel.org. if EDAC @@ -43,6 +33,7 @@ config EDAC_LEGACY_SYSFS config EDAC_DEBUG bool "Debugging" + select DEBUG_FS help This turns on debugging information for the entire EDAC subsystem. You do so by inserting edac_module with "edac_debug_level=x." Valid @@ -61,21 +52,9 @@ config EDAC_DECODE_MCE which occur really early upon boot, before the module infrastructure has been initialized. -config EDAC_MM_EDAC - tristate "Main Memory EDAC (Error Detection And Correction) reporting" - select RAS - help - Some systems are able to detect and correct errors in main - memory. EDAC can report statistics on memory error - detection and correction (EDAC - or commonly referred to ECC - errors). EDAC will also try to decode where these errors - occurred so that a particular failing memory module can be - replaced. If unsure, select 'Y'. - config EDAC_GHES bool "Output ACPI APEI/GHES BIOS detected errors via EDAC" - depends on ACPI_APEI_GHES && (EDAC_MM_EDAC=y) - default y + depends on ACPI_APEI_GHES && (EDAC=y) help Not all machines support hardware-driven error report. Some of those provide a BIOS-driven error report mechanism via ACPI, using the @@ -97,7 +76,7 @@ config EDAC_GHES config EDAC_AMD64 tristate "AMD64 (Opteron, Athlon64)" - depends on EDAC_MM_EDAC && AMD_NB && EDAC_DECODE_MCE + depends on AMD_NB && EDAC_DECODE_MCE help Support for error detection and correction of DRAM ECC errors on the AMD64 families (>= K8) of memory controllers. @@ -123,28 +102,28 @@ config EDAC_AMD64_ERROR_INJECTION config EDAC_AMD76X tristate "AMD 76x (760, 762, 768)" - depends on EDAC_MM_EDAC && PCI && X86_32 + depends on PCI && X86_32 help Support for error detection and correction on the AMD 76x series of chipsets used with the Athlon processor. config EDAC_E7XXX tristate "Intel e7xxx (e7205, e7500, e7501, e7505)" - depends on EDAC_MM_EDAC && PCI && X86_32 + depends on PCI && X86_32 help Support for error detection and correction on the Intel E7205, E7500, E7501 and E7505 server chipsets. config EDAC_E752X tristate "Intel e752x (e7520, e7525, e7320) and 3100" - depends on EDAC_MM_EDAC && PCI && X86 + depends on PCI && X86 help Support for error detection and correction on the Intel E7520, E7525, E7320 server chipsets. config EDAC_I82443BXGX tristate "Intel 82443BX/GX (440BX/GX)" - depends on EDAC_MM_EDAC && PCI && X86_32 + depends on PCI && X86_32 depends on BROKEN help Support for error detection and correction on the Intel @@ -152,56 +131,56 @@ config EDAC_I82443BXGX config EDAC_I82875P tristate "Intel 82875p (D82875P, E7210)" - depends on EDAC_MM_EDAC && PCI && X86_32 + depends on PCI && X86_32 help Support for error detection and correction on the Intel DP82785P and E7210 server chipsets. config EDAC_I82975X tristate "Intel 82975x (D82975x)" - depends on EDAC_MM_EDAC && PCI && X86 + depends on PCI && X86 help Support for error detection and correction on the Intel DP82975x server chipsets. config EDAC_I3000 tristate "Intel 3000/3010" - depends on EDAC_MM_EDAC && PCI && X86 + depends on PCI && X86 help Support for error detection and correction on the Intel 3000 and 3010 server chipsets. config EDAC_I3200 tristate "Intel 3200" - depends on EDAC_MM_EDAC && PCI && X86 + depends on PCI && X86 help Support for error detection and correction on the Intel 3200 and 3210 server chipsets. config EDAC_IE31200 tristate "Intel e312xx" - depends on EDAC_MM_EDAC && PCI && X86 + depends on PCI && X86 help Support for error detection and correction on the Intel E3-1200 based DRAM controllers. config EDAC_X38 tristate "Intel X38" - depends on EDAC_MM_EDAC && PCI && X86 + depends on PCI && X86 help Support for error detection and correction on the Intel X38 server chipsets. config EDAC_I5400 tristate "Intel 5400 (Seaburg) chipsets" - depends on EDAC_MM_EDAC && PCI && X86 + depends on PCI && X86 help Support for error detection and correction the Intel i5400 MCH chipset (Seaburg). config EDAC_I7CORE tristate "Intel i7 Core (Nehalem) processors" - depends on EDAC_MM_EDAC && PCI && X86 && X86_MCE_INTEL + depends on PCI && X86 && X86_MCE_INTEL help Support for error detection and correction the Intel i7 Core (Nehalem) Integrated Memory Controller that exists on @@ -210,87 +189,93 @@ config EDAC_I7CORE config EDAC_I82860 tristate "Intel 82860" - depends on EDAC_MM_EDAC && PCI && X86_32 + depends on PCI && X86_32 help Support for error detection and correction on the Intel 82860 chipset. config EDAC_R82600 tristate "Radisys 82600 embedded chipset" - depends on EDAC_MM_EDAC && PCI && X86_32 + depends on PCI && X86_32 help Support for error detection and correction on the Radisys 82600 embedded chipset. config EDAC_I5000 tristate "Intel Greencreek/Blackford chipset" - depends on EDAC_MM_EDAC && X86 && PCI + depends on X86 && PCI help Support for error detection and correction the Intel Greekcreek/Blackford chipsets. config EDAC_I5100 tristate "Intel San Clemente MCH" - depends on EDAC_MM_EDAC && X86 && PCI + depends on X86 && PCI help Support for error detection and correction the Intel San Clemente MCH. config EDAC_I7300 tristate "Intel Clarksboro MCH" - depends on EDAC_MM_EDAC && X86 && PCI + depends on X86 && PCI help Support for error detection and correction the Intel Clarksboro MCH (Intel 7300 chipset). config EDAC_SBRIDGE tristate "Intel Sandy-Bridge/Ivy-Bridge/Haswell Integrated MC" - depends on EDAC_MM_EDAC && PCI && X86_64 && X86_MCE_INTEL - depends on PCI_MMCONFIG + depends on PCI && X86_64 && X86_MCE_INTEL && PCI_MMCONFIG help Support for error detection and correction the Intel Sandy Bridge, Ivy Bridge and Haswell Integrated Memory Controllers. config EDAC_SKX tristate "Intel Skylake server Integrated MC" - depends on EDAC_MM_EDAC && PCI && X86_64 && X86_MCE_INTEL - depends on PCI_MMCONFIG + depends on PCI && X86_64 && X86_MCE_INTEL && PCI_MMCONFIG help Support for error detection and correction the Intel Skylake server Integrated Memory Controllers. +config EDAC_PND2 + tristate "Intel Pondicherry2" + depends on PCI && X86_64 && X86_MCE_INTEL + help + Support for error detection and correction on the Intel + Pondicherry2 Integrated Memory Controller. This SoC IP is + first used on the Apollo Lake platform and Denverton + micro-server but may appear on others in the future. + config EDAC_MPC85XX tristate "Freescale MPC83xx / MPC85xx" - depends on EDAC_MM_EDAC && FSL_SOC + depends on FSL_SOC help Support for error detection and correction on the Freescale MPC8349, MPC8560, MPC8540, MPC8548, T4240 config EDAC_LAYERSCAPE tristate "Freescale Layerscape DDR" - depends on EDAC_MM_EDAC && ARCH_LAYERSCAPE + depends on ARCH_LAYERSCAPE help Support for error detection and correction on Freescale memory controllers on Layerscape SoCs. config EDAC_MV64X60 tristate "Marvell MV64x60" - depends on EDAC_MM_EDAC && MV64X60 + depends on MV64X60 help Support for error detection and correction on the Marvell MV64360 and MV64460 chipsets. config EDAC_PASEMI tristate "PA Semi PWRficient" - depends on EDAC_MM_EDAC && PCI - depends on PPC_PASEMI + depends on PPC_PASEMI && PCI help Support for error detection and correction on PA Semi PWRficient. config EDAC_CELL tristate "Cell Broadband Engine memory controller" - depends on EDAC_MM_EDAC && PPC_CELL_COMMON + depends on PPC_CELL_COMMON help Support for error detection and correction on the Cell Broadband Engine internal memory controller @@ -298,7 +283,7 @@ config EDAC_CELL config EDAC_PPC4XX tristate "PPC4xx IBM DDR2 Memory Controller" - depends on EDAC_MM_EDAC && 4xx + depends on 4xx help This enables support for EDAC on the ECC memory used with the IBM DDR2 memory controller found in various @@ -307,7 +292,7 @@ config EDAC_PPC4XX config EDAC_AMD8131 tristate "AMD8131 HyperTransport PCI-X Tunnel" - depends on EDAC_MM_EDAC && PCI && PPC_MAPLE + depends on PCI && PPC_MAPLE help Support for error detection and correction on the AMD8131 HyperTransport PCI-X Tunnel chip. @@ -316,7 +301,7 @@ config EDAC_AMD8131 config EDAC_AMD8111 tristate "AMD8111 HyperTransport I/O Hub" - depends on EDAC_MM_EDAC && PCI && PPC_MAPLE + depends on PCI && PPC_MAPLE help Support for error detection and correction on the AMD8111 HyperTransport I/O Hub chip. @@ -325,7 +310,7 @@ config EDAC_AMD8111 config EDAC_CPC925 tristate "IBM CPC925 Memory Controller (PPC970FX)" - depends on EDAC_MM_EDAC && PPC64 + depends on PPC64 help Support for error detection and correction on the IBM CPC925 Bridge and Memory Controller, which is @@ -334,7 +319,7 @@ config EDAC_CPC925 config EDAC_TILE tristate "Tilera Memory Controller" - depends on EDAC_MM_EDAC && TILE + depends on TILE default y help Support for error detection and correction on the @@ -342,49 +327,59 @@ config EDAC_TILE config EDAC_HIGHBANK_MC tristate "Highbank Memory Controller" - depends on EDAC_MM_EDAC && ARCH_HIGHBANK + depends on ARCH_HIGHBANK help Support for error detection and correction on the Calxeda Highbank memory controller. config EDAC_HIGHBANK_L2 tristate "Highbank L2 Cache" - depends on EDAC_MM_EDAC && ARCH_HIGHBANK + depends on ARCH_HIGHBANK help Support for error detection and correction on the Calxeda Highbank memory controller. config EDAC_OCTEON_PC tristate "Cavium Octeon Primary Caches" - depends on EDAC_MM_EDAC && CPU_CAVIUM_OCTEON + depends on CPU_CAVIUM_OCTEON help Support for error detection and correction on the primary caches of the cnMIPS cores of Cavium Octeon family SOCs. config EDAC_OCTEON_L2C tristate "Cavium Octeon Secondary Caches (L2C)" - depends on EDAC_MM_EDAC && CAVIUM_OCTEON_SOC + depends on CAVIUM_OCTEON_SOC help Support for error detection and correction on the Cavium Octeon family of SOCs. config EDAC_OCTEON_LMC tristate "Cavium Octeon DRAM Memory Controller (LMC)" - depends on EDAC_MM_EDAC && CAVIUM_OCTEON_SOC + depends on CAVIUM_OCTEON_SOC help Support for error detection and correction on the Cavium Octeon family of SOCs. config EDAC_OCTEON_PCI tristate "Cavium Octeon PCI Controller" - depends on EDAC_MM_EDAC && PCI && CAVIUM_OCTEON_SOC + depends on PCI && CAVIUM_OCTEON_SOC help Support for error detection and correction on the Cavium Octeon family of SOCs. +config EDAC_THUNDERX + tristate "Cavium ThunderX EDAC" + depends on ARM64 + depends on PCI + help + Support for error detection and correction on the + Cavium ThunderX memory controllers (LMC), Cache + Coherent Processor Interconnect (CCPI) and L2 cache + blocks (TAD, CBC, MCI). + config EDAC_ALTERA bool "Altera SOCFPGA ECC" - depends on EDAC_MM_EDAC=y && ARCH_SOCFPGA + depends on EDAC=y && ARCH_SOCFPGA help Support for error detection and correction on the Altera SOCs. This must be selected for SDRAM ECC. @@ -450,14 +445,14 @@ config EDAC_ALTERA_SDMMC config EDAC_SYNOPSYS tristate "Synopsys DDR Memory Controller" - depends on EDAC_MM_EDAC && ARCH_ZYNQ + depends on ARCH_ZYNQ help Support for error detection and correction on the Synopsys DDR memory controller. config EDAC_XGENE tristate "APM X-Gene SoC" - depends on EDAC_MM_EDAC && (ARM64 || COMPILE_TEST) + depends on (ARM64 || COMPILE_TEST) help Support for error detection and correction on the APM X-Gene family of SOCs. diff --git a/drivers/edac/Makefile b/drivers/edac/Makefile index 88e472e8b9a9..0fd9ffa63299 100644 --- a/drivers/edac/Makefile +++ b/drivers/edac/Makefile @@ -6,8 +6,7 @@ # GNU General Public License. # -obj-$(CONFIG_EDAC) := edac_stub.o -obj-$(CONFIG_EDAC_MM_EDAC) += edac_core.o +obj-$(CONFIG_EDAC) := edac_core.o edac_core-y := edac_mc.o edac_device.o edac_mc_sysfs.o edac_core-y += edac_module.o edac_device_sysfs.o wq.o @@ -32,6 +31,7 @@ obj-$(CONFIG_EDAC_I7300) += i7300_edac.o obj-$(CONFIG_EDAC_I7CORE) += i7core_edac.o obj-$(CONFIG_EDAC_SBRIDGE) += sb_edac.o obj-$(CONFIG_EDAC_SKX) += skx_edac.o +obj-$(CONFIG_EDAC_PND2) += pnd2_edac.o obj-$(CONFIG_EDAC_E7XXX) += e7xxx_edac.o obj-$(CONFIG_EDAC_E752X) += e752x_edac.o obj-$(CONFIG_EDAC_I82443BXGX) += i82443bxgx_edac.o @@ -66,13 +66,14 @@ obj-$(CONFIG_EDAC_AMD8131) += amd8131_edac.o obj-$(CONFIG_EDAC_TILE) += tile_edac.o -obj-$(CONFIG_EDAC_HIGHBANK_MC) += highbank_mc_edac.o -obj-$(CONFIG_EDAC_HIGHBANK_L2) += highbank_l2_edac.o +obj-$(CONFIG_EDAC_HIGHBANK_MC) += highbank_mc_edac.o +obj-$(CONFIG_EDAC_HIGHBANK_L2) += highbank_l2_edac.o obj-$(CONFIG_EDAC_OCTEON_PC) += octeon_edac-pc.o obj-$(CONFIG_EDAC_OCTEON_L2C) += octeon_edac-l2c.o obj-$(CONFIG_EDAC_OCTEON_LMC) += octeon_edac-lmc.o obj-$(CONFIG_EDAC_OCTEON_PCI) += octeon_edac-pci.o +obj-$(CONFIG_EDAC_THUNDERX) += thunderx_edac.o obj-$(CONFIG_EDAC_ALTERA) += altera_edac.o obj-$(CONFIG_EDAC_SYNOPSYS) += synopsys_edac.o diff --git a/drivers/edac/altera_edac.c b/drivers/edac/altera_edac.c index c5a5b91f37f0..7717b094fabb 100644 --- a/drivers/edac/altera_edac.c +++ b/drivers/edac/altera_edac.c @@ -1023,13 +1023,23 @@ out: return ret; } +static int socfpga_is_a10(void) +{ + return of_machine_is_compatible("altr,socfpga-arria10"); +} + static int validate_parent_available(struct device_node *np); static const struct of_device_id altr_edac_a10_device_of_match[]; static int __init __maybe_unused altr_init_a10_ecc_device_type(char *compat) { int irq; - struct device_node *child, *np = of_find_compatible_node(NULL, NULL, - "altr,socfpga-a10-ecc-manager"); + struct device_node *child, *np; + + if (!socfpga_is_a10()) + return -ENODEV; + + np = of_find_compatible_node(NULL, NULL, + "altr,socfpga-a10-ecc-manager"); if (!np) { edac_printk(KERN_ERR, EDAC_DEVICE, "ECC Manager not found\n"); return -ENODEV; @@ -1545,8 +1555,12 @@ static const struct edac_device_prv_data a10_sdmmceccb_data = { static int __init socfpga_init_sdmmc_ecc(void) { int rc = -ENODEV; - struct device_node *child = of_find_compatible_node(NULL, NULL, - "altr,socfpga-sdmmc-ecc"); + struct device_node *child; + + if (!socfpga_is_a10()) + return -ENODEV; + + child = of_find_compatible_node(NULL, NULL, "altr,socfpga-sdmmc-ecc"); if (!child) { edac_printk(KERN_WARNING, EDAC_DEVICE, "SDMMC node not found\n"); return -ENODEV; diff --git a/drivers/edac/edac_mc.c b/drivers/edac/edac_mc.c index e5573c56b15e..480072139b7a 100644 --- a/drivers/edac/edac_mc.c +++ b/drivers/edac/edac_mc.c @@ -40,6 +40,11 @@ #define edac_atomic_scrub(va, size) do { } while (0) #endif +int edac_op_state = EDAC_OPSTATE_INVAL; +EXPORT_SYMBOL_GPL(edac_op_state); + +static int edac_report = EDAC_REPORTING_ENABLED; + /* lock to memory controller's control array */ static DEFINE_MUTEX(mem_ctls_mutex); static LIST_HEAD(mc_devices); @@ -52,6 +57,65 @@ static void const *edac_mc_owner; static struct bus_type mc_bus[EDAC_MAX_MCS]; +int edac_get_report_status(void) +{ + return edac_report; +} +EXPORT_SYMBOL_GPL(edac_get_report_status); + +void edac_set_report_status(int new) +{ + if (new == EDAC_REPORTING_ENABLED || + new == EDAC_REPORTING_DISABLED || + new == EDAC_REPORTING_FORCE) + edac_report = new; +} +EXPORT_SYMBOL_GPL(edac_set_report_status); + +static int edac_report_set(const char *str, const struct kernel_param *kp) +{ + if (!str) + return -EINVAL; + + if (!strncmp(str, "on", 2)) + edac_report = EDAC_REPORTING_ENABLED; + else if (!strncmp(str, "off", 3)) + edac_report = EDAC_REPORTING_DISABLED; + else if (!strncmp(str, "force", 5)) + edac_report = EDAC_REPORTING_FORCE; + + return 0; +} + +static int edac_report_get(char *buffer, const struct kernel_param *kp) +{ + int ret = 0; + + switch (edac_report) { + case EDAC_REPORTING_ENABLED: + ret = sprintf(buffer, "on"); + break; + case EDAC_REPORTING_DISABLED: + ret = sprintf(buffer, "off"); + break; + case EDAC_REPORTING_FORCE: + ret = sprintf(buffer, "force"); + break; + default: + ret = -EINVAL; + break; + } + + return ret; +} + +static const struct kernel_param_ops edac_report_ops = { + .set = edac_report_set, + .get = edac_report_get, +}; + +module_param_cb(edac_report, &edac_report_ops, &edac_report, 0644); + unsigned edac_dimm_info_location(struct dimm_info *dimm, char *buf, unsigned len) { @@ -505,22 +569,6 @@ struct mem_ctl_info *find_mci_by_dev(struct device *dev) EXPORT_SYMBOL_GPL(find_mci_by_dev); /* - * handler for EDAC to check if NMI type handler has asserted interrupt - */ -static int edac_mc_assert_error_check_and_clear(void) -{ - int old_state; - - if (edac_op_state == EDAC_OPSTATE_POLL) - return 1; - - old_state = edac_err_assert; - edac_err_assert = 0; - - return old_state; -} - -/* * edac_mc_workq_function * performs the operation scheduled by a workq request */ @@ -536,7 +584,7 @@ static void edac_mc_workq_function(struct work_struct *work_req) return; } - if (edac_mc_assert_error_check_and_clear()) + if (edac_op_state == EDAC_OPSTATE_POLL) mci->edac_check(mci); mutex_unlock(&mem_ctls_mutex); @@ -601,7 +649,6 @@ static int add_mc_to_global_list(struct mem_ctl_info *mci) } list_add_tail_rcu(&mci->link, insert_before); - atomic_inc(&edac_handlers); return 0; fail0: @@ -619,7 +666,6 @@ fail1: static int del_mc_from_global_list(struct mem_ctl_info *mci) { - int handlers = atomic_dec_return(&edac_handlers); list_del_rcu(&mci->link); /* these are for safe removal of devices from global list while @@ -628,7 +674,7 @@ static int del_mc_from_global_list(struct mem_ctl_info *mci) synchronize_rcu(); INIT_LIST_HEAD(&mci->link); - return handlers; + return list_empty(&mc_devices); } struct mem_ctl_info *edac_mc_find(int idx) @@ -763,7 +809,7 @@ struct mem_ctl_info *edac_mc_del_mc(struct device *dev) /* mark MCI offline: */ mci->op_state = OP_OFFLINE; - if (!del_mc_from_global_list(mci)) + if (del_mc_from_global_list(mci)) edac_mc_owner = NULL; mutex_unlock(&mem_ctls_mutex); @@ -1195,10 +1241,13 @@ void edac_mc_handle_error(const enum hw_event_mc_err_type type, /* Report the error via the trace interface */ grain_bits = fls_long(e->grain) + 1; - trace_mc_event(type, e->msg, e->label, e->error_count, - mci->mc_idx, e->top_layer, e->mid_layer, e->low_layer, - (e->page_frame_number << PAGE_SHIFT) | e->offset_in_page, - grain_bits, e->syndrome, e->other_detail); + + if (IS_ENABLED(CONFIG_RAS)) + trace_mc_event(type, e->msg, e->label, e->error_count, + mci->mc_idx, e->top_layer, e->mid_layer, + e->low_layer, + (e->page_frame_number << PAGE_SHIFT) | e->offset_in_page, + grain_bits, e->syndrome, e->other_detail); edac_raw_mc_handle_error(type, mci, e); } diff --git a/drivers/edac/edac_stub.c b/drivers/edac/edac_stub.c deleted file mode 100644 index 952e411f01f2..000000000000 --- a/drivers/edac/edac_stub.c +++ /dev/null @@ -1,68 +0,0 @@ -/* - * common EDAC components that must be in kernel - * - * Author: Dave Jiang <djiang@mvista.com> - * - * 2007 (c) MontaVista Software, Inc. - * 2010 (c) Advanced Micro Devices Inc. - * Borislav Petkov <bp@alien8.de> - * - * This file is licensed under the terms of the GNU General Public - * License version 2. This program is licensed "as is" without any - * warranty of any kind, whether express or implied. - * - */ -#include <linux/module.h> -#include <linux/edac.h> -#include <linux/atomic.h> -#include <linux/device.h> - -int edac_op_state = EDAC_OPSTATE_INVAL; -EXPORT_SYMBOL_GPL(edac_op_state); - -atomic_t edac_handlers = ATOMIC_INIT(0); -EXPORT_SYMBOL_GPL(edac_handlers); - -int edac_err_assert = 0; -EXPORT_SYMBOL_GPL(edac_err_assert); - -int edac_report_status = EDAC_REPORTING_ENABLED; -EXPORT_SYMBOL_GPL(edac_report_status); - -static int __init edac_report_setup(char *str) -{ - if (!str) - return -EINVAL; - - if (!strncmp(str, "on", 2)) - set_edac_report_status(EDAC_REPORTING_ENABLED); - else if (!strncmp(str, "off", 3)) - set_edac_report_status(EDAC_REPORTING_DISABLED); - else if (!strncmp(str, "force", 5)) - set_edac_report_status(EDAC_REPORTING_FORCE); - - return 0; -} -__setup("edac_report=", edac_report_setup); - -/* - * called to determine if there is an EDAC driver interested in - * knowing an event (such as NMI) occurred - */ -int edac_handler_set(void) -{ - if (edac_op_state == EDAC_OPSTATE_POLL) - return 0; - - return atomic_read(&edac_handlers); -} -EXPORT_SYMBOL_GPL(edac_handler_set); - -/* - * handler for NMI type of interrupts to assert error - */ -void edac_atomic_assert_error(void) -{ - edac_err_assert++; -} -EXPORT_SYMBOL_GPL(edac_atomic_assert_error); diff --git a/drivers/edac/i5000_edac.c b/drivers/edac/i5000_edac.c index 1670d27bcac8..f683919981b0 100644 --- a/drivers/edac/i5000_edac.c +++ b/drivers/edac/i5000_edac.c @@ -1293,7 +1293,7 @@ static int i5000_init_csrows(struct mem_ctl_info *mci) dimm->mtype = MEM_FB_DDR2; /* ask what device type on this row */ - if (MTR_DRAM_WIDTH(mtr)) + if (MTR_DRAM_WIDTH(mtr) == 8) dimm->dtype = DEV_X8; else dimm->dtype = DEV_X4; diff --git a/drivers/edac/i5400_edac.c b/drivers/edac/i5400_edac.c index abf6ef22e220..37a9ba71da44 100644 --- a/drivers/edac/i5400_edac.c +++ b/drivers/edac/i5400_edac.c @@ -1207,13 +1207,14 @@ static int i5400_init_dimms(struct mem_ctl_info *mci) dimm->nr_pages = size_mb << 8; dimm->grain = 8; - dimm->dtype = MTR_DRAM_WIDTH(mtr) ? DEV_X8 : DEV_X4; + dimm->dtype = MTR_DRAM_WIDTH(mtr) == 8 ? + DEV_X8 : DEV_X4; dimm->mtype = MEM_FB_DDR2; /* * The eccc mechanism is SDDC (aka SECC), with * is similar to Chipkill. */ - dimm->edac_mode = MTR_DRAM_WIDTH(mtr) ? + dimm->edac_mode = MTR_DRAM_WIDTH(mtr) == 8 ? EDAC_S8ECD8ED : EDAC_S4ECD4ED; ndimms++; } diff --git a/drivers/edac/pnd2_edac.c b/drivers/edac/pnd2_edac.c new file mode 100644 index 000000000000..1cad5a9af8d0 --- /dev/null +++ b/drivers/edac/pnd2_edac.c @@ -0,0 +1,1546 @@ +/* + * Driver for Pondicherry2 memory controller. + * + * Copyright (c) 2016, Intel Corporation. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms and conditions of the GNU General Public License, + * version 2, as published by the Free Software Foundation. + * + * This program is distributed in the hope it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + * more details. + * + * [Derived from sb_edac.c] + * + * Translation of system physical addresses to DIMM addresses + * is a two stage process: + * + * First the Pondicherry 2 memory controller handles slice and channel interleaving + * in "sys2pmi()". This is (almost) completley common between platforms. + * + * Then a platform specific dunit (DIMM unit) completes the process to provide DIMM, + * rank, bank, row and column using the appropriate "dunit_ops" functions/parameters. + */ + +#include <linux/module.h> +#include <linux/init.h> +#include <linux/pci.h> +#include <linux/pci_ids.h> +#include <linux/slab.h> +#include <linux/delay.h> +#include <linux/edac.h> +#include <linux/mmzone.h> +#include <linux/smp.h> +#include <linux/bitmap.h> +#include <linux/math64.h> +#include <linux/mod_devicetable.h> +#include <asm/cpu_device_id.h> +#include <asm/intel-family.h> +#include <asm/processor.h> +#include <asm/mce.h> + +#include "edac_mc.h" +#include "edac_module.h" +#include "pnd2_edac.h" + +#define APL_NUM_CHANNELS 4 +#define DNV_NUM_CHANNELS 2 +#define DNV_MAX_DIMMS 2 /* Max DIMMs per channel */ + +enum type { + APL, + DNV, /* All requests go to PMI CH0 on each slice (CH1 disabled) */ +}; + +struct dram_addr { + int chan; + int dimm; + int rank; + int bank; + int row; + int col; +}; + +struct pnd2_pvt { + int dimm_geom[APL_NUM_CHANNELS]; + u64 tolm, tohm; +}; + +/* + * System address space is divided into multiple regions with + * different interleave rules in each. The as0/as1 regions + * have no interleaving at all. The as2 region is interleaved + * between two channels. The mot region is magic and may overlap + * other regions, with its interleave rules taking precedence. + * Addresses not in any of these regions are interleaved across + * all four channels. + */ +static struct region { + u64 base; + u64 limit; + u8 enabled; +} mot, as0, as1, as2; + +static struct dunit_ops { + char *name; + enum type type; + int pmiaddr_shift; + int pmiidx_shift; + int channels; + int dimms_per_channel; + int (*rd_reg)(int port, int off, int op, void *data, size_t sz, char *name); + int (*get_registers)(void); + int (*check_ecc)(void); + void (*mk_region)(char *name, struct region *rp, void *asym); + void (*get_dimm_config)(struct mem_ctl_info *mci); + int (*pmi2mem)(struct mem_ctl_info *mci, u64 pmiaddr, u32 pmiidx, + struct dram_addr *daddr, char *msg); +} *ops; + +static struct mem_ctl_info *pnd2_mci; + +#define PND2_MSG_SIZE 256 + +/* Debug macros */ +#define pnd2_printk(level, fmt, arg...) \ + edac_printk(level, "pnd2", fmt, ##arg) + +#define pnd2_mc_printk(mci, level, fmt, arg...) \ + edac_mc_chipset_printk(mci, level, "pnd2", fmt, ##arg) + +#define MOT_CHAN_INTLV_BIT_1SLC_2CH 12 +#define MOT_CHAN_INTLV_BIT_2SLC_2CH 13 +#define SELECTOR_DISABLED (-1) +#define _4GB (1ul << 32) + +#define PMI_ADDRESS_WIDTH 31 +#define PND_MAX_PHYS_BIT 39 + +#define APL_ASYMSHIFT 28 +#define DNV_ASYMSHIFT 31 +#define CH_HASH_MASK_LSB 6 +#define SLICE_HASH_MASK_LSB 6 +#define MOT_SLC_INTLV_BIT 12 +#define LOG2_PMI_ADDR_GRANULARITY 5 +#define MOT_SHIFT 24 + +#define GET_BITFIELD(v, lo, hi) (((v) & GENMASK_ULL(hi, lo)) >> (lo)) +#define U64_LSHIFT(val, s) ((u64)(val) << (s)) + +#ifdef CONFIG_X86_INTEL_SBI_APL +#include "linux/platform_data/sbi_apl.h" +int sbi_send(int port, int off, int op, u32 *data) +{ + struct sbi_apl_message sbi_arg; + int ret, read = 0; + + memset(&sbi_arg, 0, sizeof(sbi_arg)); + + if (op == 0 || op == 4 || op == 6) + read = 1; + else + sbi_arg.data = *data; + + sbi_arg.opcode = op; + sbi_arg.port_address = port; + sbi_arg.register_offset = off; + ret = sbi_apl_commit(&sbi_arg); + if (ret || sbi_arg.status) + edac_dbg(2, "sbi_send status=%d ret=%d data=%x\n", + sbi_arg.status, ret, sbi_arg.data); + + if (ret == 0) + ret = sbi_arg.status; + + if (ret == 0 && read) + *data = sbi_arg.data; + + return ret; +} +#else +int sbi_send(int port, int off, int op, u32 *data) +{ + return -EUNATCH; +} +#endif + +static int apl_rd_reg(int port, int off, int op, void *data, size_t sz, char *name) +{ + int ret = 0; + + edac_dbg(2, "Read %s port=%x off=%x op=%x\n", name, port, off, op); + switch (sz) { + case 8: + ret = sbi_send(port, off + 4, op, (u32 *)(data + 4)); + case 4: + ret = sbi_send(port, off, op, (u32 *)data); + pnd2_printk(KERN_DEBUG, "%s=%x%08x ret=%d\n", name, + sz == 8 ? *((u32 *)(data + 4)) : 0, *((u32 *)data), ret); + break; + } + + return ret; +} + +static u64 get_mem_ctrl_hub_base_addr(void) +{ + struct b_cr_mchbar_lo_pci lo; + struct b_cr_mchbar_hi_pci hi; + struct pci_dev *pdev; + + pdev = pci_get_device(PCI_VENDOR_ID_INTEL, 0x1980, NULL); + if (pdev) { + pci_read_config_dword(pdev, 0x48, (u32 *)&lo); + pci_read_config_dword(pdev, 0x4c, (u32 *)&hi); + pci_dev_put(pdev); + } else { + return 0; + } + + if (!lo.enable) { + edac_dbg(2, "MMIO via memory controller hub base address is disabled!\n"); + return 0; + } + + return U64_LSHIFT(hi.base, 32) | U64_LSHIFT(lo.base, 15); +} + +static u64 get_sideband_reg_base_addr(void) +{ + struct pci_dev *pdev; + u32 hi, lo; + + pdev = pci_get_device(PCI_VENDOR_ID_INTEL, 0x19dd, NULL); + if (pdev) { + pci_read_config_dword(pdev, 0x10, &lo); + pci_read_config_dword(pdev, 0x14, &hi); + pci_dev_put(pdev); + return (U64_LSHIFT(hi, 32) | U64_LSHIFT(lo, 0)); + } else { + return 0xfd000000; + } +} + +static int dnv_rd_reg(int port, int off, int op, void *data, size_t sz, char *name) +{ + struct pci_dev *pdev; + char *base; + u64 addr; + + if (op == 4) { + pdev = pci_get_device(PCI_VENDOR_ID_INTEL, 0x1980, NULL); + if (!pdev) + return -ENODEV; + + pci_read_config_dword(pdev, off, data); + pci_dev_put(pdev); + } else { + /* MMIO via memory controller hub base address */ + if (op == 0 && port == 0x4c) { + addr = get_mem_ctrl_hub_base_addr(); + if (!addr) + return -ENODEV; + } else { + /* MMIO via sideband register base address */ + addr = get_sideband_reg_base_addr(); + if (!addr) + return -ENODEV; + addr += (port << 16); + } + + base = ioremap((resource_size_t)addr, 0x10000); + if (!base) + return -ENODEV; + + if (sz == 8) + *(u32 *)(data + 4) = *(u32 *)(base + off + 4); + *(u32 *)data = *(u32 *)(base + off); + + iounmap(base); + } + + edac_dbg(2, "Read %s=%.8x_%.8x\n", name, + (sz == 8) ? *(u32 *)(data + 4) : 0, *(u32 *)data); + + return 0; +} + +#define RD_REGP(regp, regname, port) \ + ops->rd_reg(port, \ + regname##_offset, \ + regname##_r_opcode, \ + regp, sizeof(struct regname), \ + #regname) + +#define RD_REG(regp, regname) \ + ops->rd_reg(regname ## _port, \ + regname##_offset, \ + regname##_r_opcode, \ + regp, sizeof(struct regname), \ + #regname) + +static u64 top_lm, top_hm; +static bool two_slices; +static bool two_channels; /* Both PMI channels in one slice enabled */ + +static u8 sym_chan_mask; +static u8 asym_chan_mask; +static u8 chan_mask; + +static int slice_selector = -1; +static int chan_selector = -1; +static u64 slice_hash_mask; +static u64 chan_hash_mask; + +static void mk_region(char *name, struct region *rp, u64 base, u64 limit) +{ + rp->enabled = 1; + rp->base = base; + rp->limit = limit; + edac_dbg(2, "Region:%s [%llx, %llx]\n", name, base, limit); +} + +static void mk_region_mask(char *name, struct region *rp, u64 base, u64 mask) +{ + if (mask == 0) { + pr_info(FW_BUG "MOT mask cannot be zero\n"); + return; + } + if (mask != GENMASK_ULL(PND_MAX_PHYS_BIT, __ffs(mask))) { + pr_info(FW_BUG "MOT mask not power of two\n"); + return; + } + if (base & ~mask) { + pr_info(FW_BUG "MOT region base/mask alignment error\n"); + return; + } + rp->base = base; + rp->limit = (base | ~mask) & GENMASK_ULL(PND_MAX_PHYS_BIT, 0); + rp->enabled = 1; + edac_dbg(2, "Region:%s [%llx, %llx]\n", name, base, rp->limit); +} + +static bool in_region(struct region *rp, u64 addr) +{ + if (!rp->enabled) + return false; + + return rp->base <= addr && addr <= rp->limit; +} + +static int gen_sym_mask(struct b_cr_slice_channel_hash *p) +{ + int mask = 0; + + if (!p->slice_0_mem_disabled) + mask |= p->sym_slice0_channel_enabled; + + if (!p->slice_1_disabled) + mask |= p->sym_slice1_channel_enabled << 2; + + if (p->ch_1_disabled || p->enable_pmi_dual_data_mode) + mask &= 0x5; + + return mask; +} + +static int gen_asym_mask(struct b_cr_slice_channel_hash *p, + struct b_cr_asym_mem_region0_mchbar *as0, + struct b_cr_asym_mem_region1_mchbar *as1, + struct b_cr_asym_2way_mem_region_mchbar *as2way) +{ + const int intlv[] = { 0x5, 0xA, 0x3, 0xC }; + int mask = 0; + + if (as2way->asym_2way_interleave_enable) + mask = intlv[as2way->asym_2way_intlv_mode]; + if (as0->slice0_asym_enable) + mask |= (1 << as0->slice0_asym_channel_select); + if (as1->slice1_asym_enable) + mask |= (4 << as1->slice1_asym_channel_select); + if (p->slice_0_mem_disabled) + mask &= 0xc; + if (p->slice_1_disabled) + mask &= 0x3; + if (p->ch_1_disabled || p->enable_pmi_dual_data_mode) + mask &= 0x5; + + return mask; +} + +static struct b_cr_tolud_pci tolud; +static struct b_cr_touud_lo_pci touud_lo; +static struct b_cr_touud_hi_pci touud_hi; +static struct b_cr_asym_mem_region0_mchbar asym0; +static struct b_cr_asym_mem_region1_mchbar asym1; +static struct b_cr_asym_2way_mem_region_mchbar asym_2way; +static struct b_cr_mot_out_base_mchbar mot_base; +static struct b_cr_mot_out_mask_mchbar mot_mask; +static struct b_cr_slice_channel_hash chash; + +/* Apollo Lake dunit */ +/* + * Validated on board with just two DIMMs in the [0] and [2] positions + * in this array. Other port number matches documentation, but caution + * advised. + */ +static const int apl_dports[APL_NUM_CHANNELS] = { 0x18, 0x10, 0x11, 0x19 }; +static struct d_cr_drp0 drp0[APL_NUM_CHANNELS]; + +/* Denverton dunit */ +static const int dnv_dports[DNV_NUM_CHANNELS] = { 0x10, 0x12 }; +static struct d_cr_dsch dsch; +static struct d_cr_ecc_ctrl ecc_ctrl[DNV_NUM_CHANNELS]; +static struct d_cr_drp drp[DNV_NUM_CHANNELS]; +static struct d_cr_dmap dmap[DNV_NUM_CHANNELS]; +static struct d_cr_dmap1 dmap1[DNV_NUM_CHANNELS]; +static struct d_cr_dmap2 dmap2[DNV_NUM_CHANNELS]; +static struct d_cr_dmap3 dmap3[DNV_NUM_CHANNELS]; +static struct d_cr_dmap4 dmap4[DNV_NUM_CHANNELS]; +static struct d_cr_dmap5 dmap5[DNV_NUM_CHANNELS]; + +static void apl_mk_region(char *name, struct region *rp, void *asym) +{ + struct b_cr_asym_mem_region0_mchbar *a = asym; + + mk_region(name, rp, + U64_LSHIFT(a->slice0_asym_base, APL_ASYMSHIFT), + U64_LSHIFT(a->slice0_asym_limit, APL_ASYMSHIFT) + + GENMASK_ULL(APL_ASYMSHIFT - 1, 0)); +} + +static void dnv_mk_region(char *name, struct region *rp, void *asym) +{ + struct b_cr_asym_mem_region_denverton *a = asym; + + mk_region(name, rp, + U64_LSHIFT(a->slice_asym_base, DNV_ASYMSHIFT), + U64_LSHIFT(a->slice_asym_limit, DNV_ASYMSHIFT) + + GENMASK_ULL(DNV_ASYMSHIFT - 1, 0)); +} + +static int apl_get_registers(void) +{ + int i; + + if (RD_REG(&asym_2way, b_cr_asym_2way_mem_region_mchbar)) + return -ENODEV; + + for (i = 0; i < APL_NUM_CHANNELS; i++) + if (RD_REGP(&drp0[i], d_cr_drp0, apl_dports[i])) + return -ENODEV; + + return 0; +} + +static int dnv_get_registers(void) +{ + int i; + + if (RD_REG(&dsch, d_cr_dsch)) + return -ENODEV; + + for (i = 0; i < DNV_NUM_CHANNELS; i++) + if (RD_REGP(&ecc_ctrl[i], d_cr_ecc_ctrl, dnv_dports[i]) || + RD_REGP(&drp[i], d_cr_drp, dnv_dports[i]) || + RD_REGP(&dmap[i], d_cr_dmap, dnv_dports[i]) || + RD_REGP(&dmap1[i], d_cr_dmap1, dnv_dports[i]) || + RD_REGP(&dmap2[i], d_cr_dmap2, dnv_dports[i]) || + RD_REGP(&dmap3[i], d_cr_dmap3, dnv_dports[i]) || + RD_REGP(&dmap4[i], d_cr_dmap4, dnv_dports[i]) || + RD_REGP(&dmap5[i], d_cr_dmap5, dnv_dports[i])) + return -ENODEV; + + return 0; +} + +/* + * Read all the h/w config registers once here (they don't + * change at run time. Figure out which address ranges have + * which interleave characteristics. + */ +static int get_registers(void) +{ + const int intlv[] = { 10, 11, 12, 12 }; + + if (RD_REG(&tolud, b_cr_tolud_pci) || + RD_REG(&touud_lo, b_cr_touud_lo_pci) || + RD_REG(&touud_hi, b_cr_touud_hi_pci) || + RD_REG(&asym0, b_cr_asym_mem_region0_mchbar) || + RD_REG(&asym1, b_cr_asym_mem_region1_mchbar) || + RD_REG(&mot_base, b_cr_mot_out_base_mchbar) || + RD_REG(&mot_mask, b_cr_mot_out_mask_mchbar) || + RD_REG(&chash, b_cr_slice_channel_hash)) + return -ENODEV; + + if (ops->get_registers()) + return -ENODEV; + + if (ops->type == DNV) { + /* PMI channel idx (always 0) for asymmetric region */ + asym0.slice0_asym_channel_select = 0; + asym1.slice1_asym_channel_select = 0; + /* PMI channel bitmap (always 1) for symmetric region */ + chash.sym_slice0_channel_enabled = 0x1; + chash.sym_slice1_channel_enabled = 0x1; + } + + if (asym0.slice0_asym_enable) + ops->mk_region("as0", &as0, &asym0); + + if (asym1.slice1_asym_enable) + ops->mk_region("as1", &as1, &asym1); + + if (asym_2way.asym_2way_interleave_enable) { + mk_region("as2way", &as2, + U64_LSHIFT(asym_2way.asym_2way_base, APL_ASYMSHIFT), + U64_LSHIFT(asym_2way.asym_2way_limit, APL_ASYMSHIFT) + + GENMASK_ULL(APL_ASYMSHIFT - 1, 0)); + } + + if (mot_base.imr_en) { + mk_region_mask("mot", &mot, + U64_LSHIFT(mot_base.mot_out_base, MOT_SHIFT), + U64_LSHIFT(mot_mask.mot_out_mask, MOT_SHIFT)); + } + + top_lm = U64_LSHIFT(tolud.tolud, 20); + top_hm = U64_LSHIFT(touud_hi.touud, 32) | U64_LSHIFT(touud_lo.touud, 20); + + two_slices = !chash.slice_1_disabled && + !chash.slice_0_mem_disabled && + (chash.sym_slice0_channel_enabled != 0) && + (chash.sym_slice1_channel_enabled != 0); + two_channels = !chash.ch_1_disabled && + !chash.enable_pmi_dual_data_mode && + ((chash.sym_slice0_channel_enabled == 3) || + (chash.sym_slice1_channel_enabled == 3)); + + sym_chan_mask = gen_sym_mask(&chash); + asym_chan_mask = gen_asym_mask(&chash, &asym0, &asym1, &asym_2way); + chan_mask = sym_chan_mask | asym_chan_mask; + + if (two_slices && !two_channels) { + if (chash.hvm_mode) + slice_selector = 29; + else + slice_selector = intlv[chash.interleave_mode]; + } else if (!two_slices && two_channels) { + if (chash.hvm_mode) + chan_selector = 29; + else + chan_selector = intlv[chash.interleave_mode]; + } else if (two_slices && two_channels) { + if (chash.hvm_mode) { + slice_selector = 29; + chan_selector = 30; + } else { + slice_selector = intlv[chash.interleave_mode]; + chan_selector = intlv[chash.interleave_mode] + 1; + } + } + + if (two_slices) { + if (!chash.hvm_mode) + slice_hash_mask = chash.slice_hash_mask << SLICE_HASH_MASK_LSB; + if (!two_channels) + slice_hash_mask |= BIT_ULL(slice_selector); + } + + if (two_channels) { + if (!chash.hvm_mode) + chan_hash_mask = chash.ch_hash_mask << CH_HASH_MASK_LSB; + if (!two_slices) + chan_hash_mask |= BIT_ULL(chan_selector); + } + + return 0; +} + +/* Get a contiguous memory address (remove the MMIO gap) */ +static u64 remove_mmio_gap(u64 sys) +{ + return (sys < _4GB) ? sys : sys - (_4GB - top_lm); +} + +/* Squeeze out one address bit, shift upper part down to fill gap */ +static void remove_addr_bit(u64 *addr, int bitidx) +{ + u64 mask; + + if (bitidx == -1) + return; + + mask = (1ull << bitidx) - 1; + *addr = ((*addr >> 1) & ~mask) | (*addr & mask); +} + +/* XOR all the bits from addr specified in mask */ +static int hash_by_mask(u64 addr, u64 mask) +{ + u64 result = addr & mask; + + result = (result >> 32) ^ result; + result = (result >> 16) ^ result; + result = (result >> 8) ^ result; + result = (result >> 4) ^ result; + result = (result >> 2) ^ result; + result = (result >> 1) ^ result; + + return (int)result & 1; +} + +/* + * First stage decode. Take the system address and figure out which + * second stage will deal with it based on interleave modes. + */ +static int sys2pmi(const u64 addr, u32 *pmiidx, u64 *pmiaddr, char *msg) +{ + u64 contig_addr, contig_base, contig_offset, contig_base_adj; + int mot_intlv_bit = two_slices ? MOT_CHAN_INTLV_BIT_2SLC_2CH : + MOT_CHAN_INTLV_BIT_1SLC_2CH; + int slice_intlv_bit_rm = SELECTOR_DISABLED; + int chan_intlv_bit_rm = SELECTOR_DISABLED; + /* Determine if address is in the MOT region. */ + bool mot_hit = in_region(&mot, addr); + /* Calculate the number of symmetric regions enabled. */ + int sym_channels = hweight8(sym_chan_mask); + + /* + * The amount we need to shift the asym base can be determined by the + * number of enabled symmetric channels. + * NOTE: This can only work because symmetric memory is not supposed + * to do a 3-way interleave. + */ + int sym_chan_shift = sym_channels >> 1; + + /* Give up if address is out of range, or in MMIO gap */ + if (addr >= (1ul << PND_MAX_PHYS_BIT) || + (addr >= top_lm && addr < _4GB) || addr >= top_hm) { + snprintf(msg, PND2_MSG_SIZE, "Error address 0x%llx is not DRAM", addr); + return -EINVAL; + } + + /* Get a contiguous memory address (remove the MMIO gap) */ + contig_addr = remove_mmio_gap(addr); + + if (in_region(&as0, addr)) { + *pmiidx = asym0.slice0_asym_channel_select; + + contig_base = remove_mmio_gap(as0.base); + contig_offset = contig_addr - contig_base; + contig_base_adj = (contig_base >> sym_chan_shift) * + ((chash.sym_slice0_channel_enabled >> (*pmiidx & 1)) & 1); + contig_addr = contig_offset + ((sym_channels > 0) ? contig_base_adj : 0ull); + } else if (in_region(&as1, addr)) { + *pmiidx = 2u + asym1.slice1_asym_channel_select; + + contig_base = remove_mmio_gap(as1.base); + contig_offset = contig_addr - contig_base; + contig_base_adj = (contig_base >> sym_chan_shift) * + ((chash.sym_slice1_channel_enabled >> (*pmiidx & 1)) & 1); + contig_addr = contig_offset + ((sym_channels > 0) ? contig_base_adj : 0ull); + } else if (in_region(&as2, addr) && (asym_2way.asym_2way_intlv_mode == 0x3ul)) { + bool channel1; + + mot_intlv_bit = MOT_CHAN_INTLV_BIT_1SLC_2CH; + *pmiidx = (asym_2way.asym_2way_intlv_mode & 1) << 1; + channel1 = mot_hit ? ((bool)((addr >> mot_intlv_bit) & 1)) : + hash_by_mask(contig_addr, chan_hash_mask); + *pmiidx |= (u32)channel1; + + contig_base = remove_mmio_gap(as2.base); + chan_intlv_bit_rm = mot_hit ? mot_intlv_bit : chan_selector; + contig_offset = contig_addr - contig_base; + remove_addr_bit(&contig_offset, chan_intlv_bit_rm); + contig_addr = (contig_base >> sym_chan_shift) + contig_offset; + } else { + /* Otherwise we're in normal, boring symmetric mode. */ + *pmiidx = 0u; + + if (two_slices) { + bool slice1; + + if (mot_hit) { + slice_intlv_bit_rm = MOT_SLC_INTLV_BIT; + slice1 = (addr >> MOT_SLC_INTLV_BIT) & 1; + } else { + slice_intlv_bit_rm = slice_selector; + slice1 = hash_by_mask(addr, slice_hash_mask); + } + + *pmiidx = (u32)slice1 << 1; + } + + if (two_channels) { + bool channel1; + + mot_intlv_bit = two_slices ? MOT_CHAN_INTLV_BIT_2SLC_2CH : + MOT_CHAN_INTLV_BIT_1SLC_2CH; + + if (mot_hit) { + chan_intlv_bit_rm = mot_intlv_bit; + channel1 = (addr >> mot_intlv_bit) & 1; + } else { + chan_intlv_bit_rm = chan_selector; + channel1 = hash_by_mask(contig_addr, chan_hash_mask); + } + + *pmiidx |= (u32)channel1; + } + } + + /* Remove the chan_selector bit first */ + remove_addr_bit(&contig_addr, chan_intlv_bit_rm); + /* Remove the slice bit (we remove it second because it must be lower */ + remove_addr_bit(&contig_addr, slice_intlv_bit_rm); + *pmiaddr = contig_addr; + + return 0; +} + +/* Translate PMI address to memory (rank, row, bank, column) */ +#define C(n) (0x10 | (n)) /* column */ +#define B(n) (0x20 | (n)) /* bank */ +#define R(n) (0x40 | (n)) /* row */ +#define RS (0x80) /* rank */ + +/* addrdec values */ +#define AMAP_1KB 0 +#define AMAP_2KB 1 +#define AMAP_4KB 2 +#define AMAP_RSVD 3 + +/* dden values */ +#define DEN_4Gb 0 +#define DEN_8Gb 2 + +/* dwid values */ +#define X8 0 +#define X16 1 + +static struct dimm_geometry { + u8 addrdec; + u8 dden; + u8 dwid; + u8 rowbits, colbits; + u16 bits[PMI_ADDRESS_WIDTH]; +} dimms[] = { + { + .addrdec = AMAP_1KB, .dden = DEN_4Gb, .dwid = X16, + .rowbits = 15, .colbits = 10, + .bits = { + C(2), C(3), C(4), C(5), C(6), B(0), B(1), B(2), R(0), + R(1), R(2), R(3), R(4), R(5), R(6), R(7), R(8), R(9), + R(10), C(7), C(8), C(9), R(11), RS, R(12), R(13), R(14), + 0, 0, 0, 0 + } + }, + { + .addrdec = AMAP_1KB, .dden = DEN_4Gb, .dwid = X8, + .rowbits = 16, .colbits = 10, + .bits = { + C(2), C(3), C(4), C(5), C(6), B(0), B(1), B(2), R(0), + R(1), R(2), R(3), R(4), R(5), R(6), R(7), R(8), R(9), + R(10), C(7), C(8), C(9), R(11), RS, R(12), R(13), R(14), + R(15), 0, 0, 0 + } + }, + { + .addrdec = AMAP_1KB, .dden = DEN_8Gb, .dwid = X16, + .rowbits = 16, .colbits = 10, + .bits = { + C(2), C(3), C(4), C(5), C(6), B(0), B(1), B(2), R(0), + R(1), R(2), R(3), R(4), R(5), R(6), R(7), R(8), R(9), + R(10), C(7), C(8), C(9), R(11), RS, R(12), R(13), R(14), + R(15), 0, 0, 0 + } + }, + { + .addrdec = AMAP_1KB, .dden = DEN_8Gb, .dwid = X8, + .rowbits = 16, .colbits = 11, + .bits = { + C(2), C(3), C(4), C(5), C(6), B(0), B(1), B(2), R(0), + R(1), R(2), R(3), R(4), R(5), R(6), R(7), R(8), R(9), + R(10), C(7), C(8), C(9), R(11), RS, C(11), R(12), R(13), + R(14), R(15), 0, 0 + } + }, + { + .addrdec = AMAP_2KB, .dden = DEN_4Gb, .dwid = X16, + .rowbits = 15, .colbits = 10, + .bits = { + C(2), C(3), C(4), C(5), C(6), C(7), B(0), B(1), B(2), + R(0), R(1), R(2), R(3), R(4), R(5), R(6), R(7), R(8), + R(9), R(10), C(8), C(9), R(11), RS, R(12), R(13), R(14), + 0, 0, 0, 0 + } + }, + { + .addrdec = AMAP_2KB, .dden = DEN_4Gb, .dwid = X8, + .rowbits = 16, .colbits = 10, + .bits = { + C(2), C(3), C(4), C(5), C(6), C(7), B(0), B(1), B(2), + R(0), R(1), R(2), R(3), R(4), R(5), R(6), R(7), R(8), + R(9), R(10), C(8), C(9), R(11), RS, R(12), R(13), R(14), + R(15), 0, 0, 0 + } + }, + { + .addrdec = AMAP_2KB, .dden = DEN_8Gb, .dwid = X16, + .rowbits = 16, .colbits = 10, + .bits = { + C(2), C(3), C(4), C(5), C(6), C(7), B(0), B(1), B(2), + R(0), R(1), R(2), R(3), R(4), R(5), R(6), R(7), R(8), + R(9), R(10), C(8), C(9), R(11), RS, R(12), R(13), R(14), + R(15), 0, 0, 0 + } + }, + { + .addrdec = AMAP_2KB, .dden = DEN_8Gb, .dwid = X8, + .rowbits = 16, .colbits = 11, + .bits = { + C(2), C(3), C(4), C(5), C(6), C(7), B(0), B(1), B(2), + R(0), R(1), R(2), R(3), R(4), R(5), R(6), R(7), R(8), + R(9), R(10), C(8), C(9), R(11), RS, C(11), R(12), R(13), + R(14), R(15), 0, 0 + } + }, + { + .addrdec = AMAP_4KB, .dden = DEN_4Gb, .dwid = X16, + .rowbits = 15, .colbits = 10, + .bits = { + C(2), C(3), C(4), C(5), C(6), C(7), C(8), B(0), B(1), + B(2), R(0), R(1), R(2), R(3), R(4), R(5), R(6), R(7), + R(8), R(9), R(10), C(9), R(11), RS, R(12), R(13), R(14), + 0, 0, 0, 0 + } + }, + { + .addrdec = AMAP_4KB, .dden = DEN_4Gb, .dwid = X8, + .rowbits = 16, .colbits = 10, + .bits = { + C(2), C(3), C(4), C(5), C(6), C(7), C(8), B(0), B(1), + B(2), R(0), R(1), R(2), R(3), R(4), R(5), R(6), R(7), + R(8), R(9), R(10), C(9), R(11), RS, R(12), R(13), R(14), + R(15), 0, 0, 0 + } + }, + { + .addrdec = AMAP_4KB, .dden = DEN_8Gb, .dwid = X16, + .rowbits = 16, .colbits = 10, + .bits = { + C(2), C(3), C(4), C(5), C(6), C(7), C(8), B(0), B(1), + B(2), R(0), R(1), R(2), R(3), R(4), R(5), R(6), R(7), + R(8), R(9), R(10), C(9), R(11), RS, R(12), R(13), R(14), + R(15), 0, 0, 0 + } + }, + { + .addrdec = AMAP_4KB, .dden = DEN_8Gb, .dwid = X8, + .rowbits = 16, .colbits = 11, + .bits = { + C(2), C(3), C(4), C(5), C(6), C(7), C(8), B(0), B(1), + B(2), R(0), R(1), R(2), R(3), R(4), R(5), R(6), R(7), + R(8), R(9), R(10), C(9), R(11), RS, C(11), R(12), R(13), + R(14), R(15), 0, 0 + } + } +}; + +static int bank_hash(u64 pmiaddr, int idx, int shft) +{ + int bhash = 0; + + switch (idx) { + case 0: + bhash ^= ((pmiaddr >> (12 + shft)) ^ (pmiaddr >> (9 + shft))) & 1; + break; + case 1: + bhash ^= (((pmiaddr >> (10 + shft)) ^ (pmiaddr >> (8 + shft))) & 1) << 1; + bhash ^= ((pmiaddr >> 22) & 1) << 1; + break; + case 2: + bhash ^= (((pmiaddr >> (13 + shft)) ^ (pmiaddr >> (11 + shft))) & 1) << 2; + break; + } + + return bhash; +} + +static int rank_hash(u64 pmiaddr) +{ + return ((pmiaddr >> 16) ^ (pmiaddr >> 10)) & 1; +} + +/* Second stage decode. Compute rank, bank, row & column. */ +static int apl_pmi2mem(struct mem_ctl_info *mci, u64 pmiaddr, u32 pmiidx, + struct dram_addr *daddr, char *msg) +{ + struct d_cr_drp0 *cr_drp0 = &drp0[pmiidx]; + struct pnd2_pvt *pvt = mci->pvt_info; + int g = pvt->dimm_geom[pmiidx]; + struct dimm_geometry *d = &dimms[g]; + int column = 0, bank = 0, row = 0, rank = 0; + int i, idx, type, skiprs = 0; + + for (i = 0; i < PMI_ADDRESS_WIDTH; i++) { + int bit = (pmiaddr >> i) & 1; + + if (i + skiprs >= PMI_ADDRESS_WIDTH) { + snprintf(msg, PND2_MSG_SIZE, "Bad dimm_geometry[] table\n"); + return -EINVAL; + } + + type = d->bits[i + skiprs] & ~0xf; + idx = d->bits[i + skiprs] & 0xf; + + /* + * On single rank DIMMs ignore the rank select bit + * and shift remainder of "bits[]" down one place. + */ + if (type == RS && (cr_drp0->rken0 + cr_drp0->rken1) == 1) { + skiprs = 1; + type = d->bits[i + skiprs] & ~0xf; + idx = d->bits[i + skiprs] & 0xf; + } + + switch (type) { + case C(0): + column |= (bit << idx); + break; + case B(0): + bank |= (bit << idx); + if (cr_drp0->bahen) + bank ^= bank_hash(pmiaddr, idx, d->addrdec); + break; + case R(0): + row |= (bit << idx); + break; + case RS: + rank = bit; + if (cr_drp0->rsien) + rank ^= rank_hash(pmiaddr); + break; + default: + if (bit) { + snprintf(msg, PND2_MSG_SIZE, "Bad translation\n"); + return -EINVAL; + } + goto done; + } + } + +done: + daddr->col = column; + daddr->bank = bank; + daddr->row = row; + daddr->rank = rank; + daddr->dimm = 0; + + return 0; +} + +/* Pluck bit "in" from pmiaddr and return value shifted to bit "out" */ +#define dnv_get_bit(pmi, in, out) ((int)(((pmi) >> (in)) & 1u) << (out)) + +static int dnv_pmi2mem(struct mem_ctl_info *mci, u64 pmiaddr, u32 pmiidx, + struct dram_addr *daddr, char *msg) +{ + /* Rank 0 or 1 */ + daddr->rank = dnv_get_bit(pmiaddr, dmap[pmiidx].rs0 + 13, 0); + /* Rank 2 or 3 */ + daddr->rank |= dnv_get_bit(pmiaddr, dmap[pmiidx].rs1 + 13, 1); + + /* + * Normally ranks 0,1 are DIMM0, and 2,3 are DIMM1, but we + * flip them if DIMM1 is larger than DIMM0. + */ + daddr->dimm = (daddr->rank >= 2) ^ drp[pmiidx].dimmflip; + + daddr->bank = dnv_get_bit(pmiaddr, dmap[pmiidx].ba0 + 6, 0); + daddr->bank |= dnv_get_bit(pmiaddr, dmap[pmiidx].ba1 + 6, 1); + daddr->bank |= dnv_get_bit(pmiaddr, dmap[pmiidx].bg0 + 6, 2); + if (dsch.ddr4en) + daddr->bank |= dnv_get_bit(pmiaddr, dmap[pmiidx].bg1 + 6, 3); + if (dmap1[pmiidx].bxor) { + if (dsch.ddr4en) { + daddr->bank ^= dnv_get_bit(pmiaddr, dmap3[pmiidx].row6 + 6, 0); + daddr->bank ^= dnv_get_bit(pmiaddr, dmap3[pmiidx].row7 + 6, 1); + if (dsch.chan_width == 0) + /* 64/72 bit dram channel width */ + daddr->bank ^= dnv_get_bit(pmiaddr, dmap5[pmiidx].ca3 + 6, 2); + else + /* 32/40 bit dram channel width */ + daddr->bank ^= dnv_get_bit(pmiaddr, dmap5[pmiidx].ca4 + 6, 2); + daddr->bank ^= dnv_get_bit(pmiaddr, dmap2[pmiidx].row2 + 6, 3); + } else { + daddr->bank ^= dnv_get_bit(pmiaddr, dmap2[pmiidx].row2 + 6, 0); + daddr->bank ^= dnv_get_bit(pmiaddr, dmap3[pmiidx].row6 + 6, 1); + if (dsch.chan_width == 0) + daddr->bank ^= dnv_get_bit(pmiaddr, dmap5[pmiidx].ca3 + 6, 2); + else + daddr->bank ^= dnv_get_bit(pmiaddr, dmap5[pmiidx].ca4 + 6, 2); + } + } + + daddr->row = dnv_get_bit(pmiaddr, dmap2[pmiidx].row0 + 6, 0); + daddr->row |= dnv_get_bit(pmiaddr, dmap2[pmiidx].row1 + 6, 1); + daddr->row |= dnv_get_bit(pmiaddr, dmap2[pmiidx].row2 + 6, 2); + daddr->row |= dnv_get_bit(pmiaddr, dmap2[pmiidx].row3 + 6, 3); + daddr->row |= dnv_get_bit(pmiaddr, dmap2[pmiidx].row4 + 6, 4); + daddr->row |= dnv_get_bit(pmiaddr, dmap2[pmiidx].row5 + 6, 5); + daddr->row |= dnv_get_bit(pmiaddr, dmap3[pmiidx].row6 + 6, 6); + daddr->row |= dnv_get_bit(pmiaddr, dmap3[pmiidx].row7 + 6, 7); + daddr->row |= dnv_get_bit(pmiaddr, dmap3[pmiidx].row8 + 6, 8); + daddr->row |= dnv_get_bit(pmiaddr, dmap3[pmiidx].row9 + 6, 9); + daddr->row |= dnv_get_bit(pmiaddr, dmap3[pmiidx].row10 + 6, 10); + daddr->row |= dnv_get_bit(pmiaddr, dmap3[pmiidx].row11 + 6, 11); + daddr->row |= dnv_get_bit(pmiaddr, dmap4[pmiidx].row12 + 6, 12); + daddr->row |= dnv_get_bit(pmiaddr, dmap4[pmiidx].row13 + 6, 13); + if (dmap4[pmiidx].row14 != 31) + daddr->row |= dnv_get_bit(pmiaddr, dmap4[pmiidx].row14 + 6, 14); + if (dmap4[pmiidx].row15 != 31) + daddr->row |= dnv_get_bit(pmiaddr, dmap4[pmiidx].row15 + 6, 15); + if (dmap4[pmiidx].row16 != 31) + daddr->row |= dnv_get_bit(pmiaddr, dmap4[pmiidx].row16 + 6, 16); + if (dmap4[pmiidx].row17 != 31) + daddr->row |= dnv_get_bit(pmiaddr, dmap4[pmiidx].row17 + 6, 17); + + daddr->col = dnv_get_bit(pmiaddr, dmap5[pmiidx].ca3 + 6, 3); + daddr->col |= dnv_get_bit(pmiaddr, dmap5[pmiidx].ca4 + 6, 4); + daddr->col |= dnv_get_bit(pmiaddr, dmap5[pmiidx].ca5 + 6, 5); + daddr->col |= dnv_get_bit(pmiaddr, dmap5[pmiidx].ca6 + 6, 6); + daddr->col |= dnv_get_bit(pmiaddr, dmap5[pmiidx].ca7 + 6, 7); + daddr->col |= dnv_get_bit(pmiaddr, dmap5[pmiidx].ca8 + 6, 8); + daddr->col |= dnv_get_bit(pmiaddr, dmap5[pmiidx].ca9 + 6, 9); + if (!dsch.ddr4en && dmap1[pmiidx].ca11 != 0x3f) + daddr->col |= dnv_get_bit(pmiaddr, dmap1[pmiidx].ca11 + 13, 11); + + return 0; +} + +static int check_channel(int ch) +{ + if (drp0[ch].dramtype != 0) { + pnd2_printk(KERN_INFO, "Unsupported DIMM in channel %d\n", ch); + return 1; + } else if (drp0[ch].eccen == 0) { + pnd2_printk(KERN_INFO, "ECC disabled on channel %d\n", ch); + return 1; + } + return 0; +} + +static int apl_check_ecc_active(void) +{ + int i, ret = 0; + + /* Check dramtype and ECC mode for each present DIMM */ + for (i = 0; i < APL_NUM_CHANNELS; i++) + if (chan_mask & BIT(i)) + ret += check_channel(i); + return ret ? -EINVAL : 0; +} + +#define DIMMS_PRESENT(d) ((d)->rken0 + (d)->rken1 + (d)->rken2 + (d)->rken3) + +static int check_unit(int ch) +{ + struct d_cr_drp *d = &drp[ch]; + + if (DIMMS_PRESENT(d) && !ecc_ctrl[ch].eccen) { + pnd2_printk(KERN_INFO, "ECC disabled on channel %d\n", ch); + return 1; + } + return 0; +} + +static int dnv_check_ecc_active(void) +{ + int i, ret = 0; + + for (i = 0; i < DNV_NUM_CHANNELS; i++) + ret += check_unit(i); + return ret ? -EINVAL : 0; +} + +static int get_memory_error_data(struct mem_ctl_info *mci, u64 addr, + struct dram_addr *daddr, char *msg) +{ + u64 pmiaddr; + u32 pmiidx; + int ret; + + ret = sys2pmi(addr, &pmiidx, &pmiaddr, msg); + if (ret) + return ret; + + pmiaddr >>= ops->pmiaddr_shift; + /* pmi channel idx to dimm channel idx */ + pmiidx >>= ops->pmiidx_shift; + daddr->chan = pmiidx; + + ret = ops->pmi2mem(mci, pmiaddr, pmiidx, daddr, msg); + if (ret) + return ret; + + edac_dbg(0, "SysAddr=%llx PmiAddr=%llx Channel=%d DIMM=%d Rank=%d Bank=%d Row=%d Column=%d\n", + addr, pmiaddr, daddr->chan, daddr->dimm, daddr->rank, daddr->bank, daddr->row, daddr->col); + + return 0; +} + +static void pnd2_mce_output_error(struct mem_ctl_info *mci, const struct mce *m, + struct dram_addr *daddr) +{ + enum hw_event_mc_err_type tp_event; + char *optype, msg[PND2_MSG_SIZE]; + bool ripv = m->mcgstatus & MCG_STATUS_RIPV; + bool overflow = m->status & MCI_STATUS_OVER; + bool uc_err = m->status & MCI_STATUS_UC; + bool recov = m->status & MCI_STATUS_S; + u32 core_err_cnt = GET_BITFIELD(m->status, 38, 52); + u32 mscod = GET_BITFIELD(m->status, 16, 31); + u32 errcode = GET_BITFIELD(m->status, 0, 15); + u32 optypenum = GET_BITFIELD(m->status, 4, 6); + int rc; + + tp_event = uc_err ? (ripv ? HW_EVENT_ERR_FATAL : HW_EVENT_ERR_UNCORRECTED) : + HW_EVENT_ERR_CORRECTED; + + /* + * According with Table 15-9 of the Intel Architecture spec vol 3A, + * memory errors should fit in this mask: + * 000f 0000 1mmm cccc (binary) + * where: + * f = Correction Report Filtering Bit. If 1, subsequent errors + * won't be shown + * mmm = error type + * cccc = channel + * If the mask doesn't match, report an error to the parsing logic + */ + if (!((errcode & 0xef80) == 0x80)) { + optype = "Can't parse: it is not a mem"; + } else { + switch (optypenum) { + case 0: + optype = "generic undef request error"; + break; + case 1: + optype = "memory read error"; + break; + case 2: + optype = "memory write error"; + break; + case 3: + optype = "addr/cmd error"; + break; + case 4: + optype = "memory scrubbing error"; + break; + default: + optype = "reserved"; + break; + } + } + + /* Only decode errors with an valid address (ADDRV) */ + if (!(m->status & MCI_STATUS_ADDRV)) + return; + + rc = get_memory_error_data(mci, m->addr, daddr, msg); + if (rc) + goto address_error; + + snprintf(msg, sizeof(msg), + "%s%s err_code:%04x:%04x channel:%d DIMM:%d rank:%d row:%d bank:%d col:%d", + overflow ? " OVERFLOW" : "", (uc_err && recov) ? " recoverable" : "", mscod, + errcode, daddr->chan, daddr->dimm, daddr->rank, daddr->row, daddr->bank, daddr->col); + + edac_dbg(0, "%s\n", msg); + + /* Call the helper to output message */ + edac_mc_handle_error(tp_event, mci, core_err_cnt, m->addr >> PAGE_SHIFT, + m->addr & ~PAGE_MASK, 0, daddr->chan, daddr->dimm, -1, optype, msg); + + return; + +address_error: + edac_mc_handle_error(tp_event, mci, core_err_cnt, 0, 0, 0, -1, -1, -1, msg, ""); +} + +static void apl_get_dimm_config(struct mem_ctl_info *mci) +{ + struct pnd2_pvt *pvt = mci->pvt_info; + struct dimm_info *dimm; + struct d_cr_drp0 *d; + u64 capacity; + int i, g; + + for (i = 0; i < APL_NUM_CHANNELS; i++) { + if (!(chan_mask & BIT(i))) + continue; + + dimm = EDAC_DIMM_PTR(mci->layers, mci->dimms, mci->n_layers, i, 0, 0); + if (!dimm) { + edac_dbg(0, "No allocated DIMM for channel %d\n", i); + continue; + } + + d = &drp0[i]; + for (g = 0; g < ARRAY_SIZE(dimms); g++) + if (dimms[g].addrdec == d->addrdec && + dimms[g].dden == d->dden && + dimms[g].dwid == d->dwid) + break; + + if (g == ARRAY_SIZE(dimms)) { + edac_dbg(0, "Channel %d: unrecognized DIMM\n", i); + continue; + } + + pvt->dimm_geom[i] = g; + capacity = (d->rken0 + d->rken1) * 8 * (1ul << dimms[g].rowbits) * + (1ul << dimms[g].colbits); + edac_dbg(0, "Channel %d: %lld MByte DIMM\n", i, capacity >> (20 - 3)); + dimm->nr_pages = MiB_TO_PAGES(capacity >> (20 - 3)); + dimm->grain = 32; + dimm->dtype = (d->dwid == 0) ? DEV_X8 : DEV_X16; + dimm->mtype = MEM_DDR3; + dimm->edac_mode = EDAC_SECDED; + snprintf(dimm->label, sizeof(dimm->label), "Slice#%d_Chan#%d", i / 2, i % 2); + } +} + +static const int dnv_dtypes[] = { + DEV_X8, DEV_X4, DEV_X16, DEV_UNKNOWN +}; + +static void dnv_get_dimm_config(struct mem_ctl_info *mci) +{ + int i, j, ranks_of_dimm[DNV_MAX_DIMMS], banks, rowbits, colbits, memtype; + struct dimm_info *dimm; + struct d_cr_drp *d; + u64 capacity; + + if (dsch.ddr4en) { + memtype = MEM_DDR4; + banks = 16; + colbits = 10; + } else { + memtype = MEM_DDR3; + banks = 8; + } + + for (i = 0; i < DNV_NUM_CHANNELS; i++) { + if (dmap4[i].row14 == 31) + rowbits = 14; + else if (dmap4[i].row15 == 31) + rowbits = 15; + else if (dmap4[i].row16 == 31) + rowbits = 16; + else if (dmap4[i].row17 == 31) + rowbits = 17; + else + rowbits = 18; + + if (memtype == MEM_DDR3) { + if (dmap1[i].ca11 != 0x3f) + colbits = 12; + else + colbits = 10; + } + + d = &drp[i]; + /* DIMM0 is present if rank0 and/or rank1 is enabled */ + ranks_of_dimm[0] = d->rken0 + d->rken1; + /* DIMM1 is present if rank2 and/or rank3 is enabled */ + ranks_of_dimm[1] = d->rken2 + d->rken3; + + for (j = 0; j < DNV_MAX_DIMMS; j++) { + if (!ranks_of_dimm[j]) + continue; + + dimm = EDAC_DIMM_PTR(mci->layers, mci->dimms, mci->n_layers, i, j, 0); + if (!dimm) { + edac_dbg(0, "No allocated DIMM for channel %d DIMM %d\n", i, j); + continue; + } + + capacity = ranks_of_dimm[j] * banks * (1ul << rowbits) * (1ul << colbits); + edac_dbg(0, "Channel %d DIMM %d: %lld MByte DIMM\n", i, j, capacity >> (20 - 3)); + dimm->nr_pages = MiB_TO_PAGES(capacity >> (20 - 3)); + dimm->grain = 32; + dimm->dtype = dnv_dtypes[j ? d->dimmdwid0 : d->dimmdwid1]; + dimm->mtype = memtype; + dimm->edac_mode = EDAC_SECDED; + snprintf(dimm->label, sizeof(dimm->label), "Chan#%d_DIMM#%d", i, j); + } + } +} + +static int pnd2_register_mci(struct mem_ctl_info **ppmci) +{ + struct edac_mc_layer layers[2]; + struct mem_ctl_info *mci; + struct pnd2_pvt *pvt; + int rc; + + rc = ops->check_ecc(); + if (rc < 0) + return rc; + + /* Allocate a new MC control structure */ + layers[0].type = EDAC_MC_LAYER_CHANNEL; + layers[0].size = ops->channels; + layers[0].is_virt_csrow = false; + layers[1].type = EDAC_MC_LAYER_SLOT; + layers[1].size = ops->dimms_per_channel; + layers[1].is_virt_csrow = true; + mci = edac_mc_alloc(0, ARRAY_SIZE(layers), layers, sizeof(*pvt)); + if (!mci) + return -ENOMEM; + + pvt = mci->pvt_info; + memset(pvt, 0, sizeof(*pvt)); + + mci->mod_name = "pnd2_edac.c"; + mci->dev_name = ops->name; + mci->ctl_name = "Pondicherry2"; + + /* Get dimm basic config and the memory layout */ + ops->get_dimm_config(mci); + + if (edac_mc_add_mc(mci)) { + edac_dbg(0, "MC: failed edac_mc_add_mc()\n"); + edac_mc_free(mci); + return -EINVAL; + } + + *ppmci = mci; + + return 0; +} + +static void pnd2_unregister_mci(struct mem_ctl_info *mci) +{ + if (unlikely(!mci || !mci->pvt_info)) { + pnd2_printk(KERN_ERR, "Couldn't find mci handler\n"); + return; + } + + /* Remove MC sysfs nodes */ + edac_mc_del_mc(NULL); + edac_dbg(1, "%s: free mci struct\n", mci->ctl_name); + edac_mc_free(mci); +} + +/* + * Callback function registered with core kernel mce code. + * Called once for each logged error. + */ +static int pnd2_mce_check_error(struct notifier_block *nb, unsigned long val, void *data) +{ + struct mce *mce = (struct mce *)data; + struct mem_ctl_info *mci; + struct dram_addr daddr; + char *type; + + if (edac_get_report_status() == EDAC_REPORTING_DISABLED) + return NOTIFY_DONE; + + mci = pnd2_mci; + if (!mci) + return NOTIFY_DONE; + + /* + * Just let mcelog handle it if the error is + * outside the memory controller. A memory error + * is indicated by bit 7 = 1 and bits = 8-11,13-15 = 0. + * bit 12 has an special meaning. + */ + if ((mce->status & 0xefff) >> 7 != 1) + return NOTIFY_DONE; + + if (mce->mcgstatus & MCG_STATUS_MCIP) + type = "Exception"; + else + type = "Event"; + + pnd2_mc_printk(mci, KERN_INFO, "HANDLING MCE MEMORY ERROR\n"); + pnd2_mc_printk(mci, KERN_INFO, "CPU %u: Machine Check %s: %llx Bank %u: %llx\n", + mce->extcpu, type, mce->mcgstatus, mce->bank, mce->status); + pnd2_mc_printk(mci, KERN_INFO, "TSC %llx ", mce->tsc); + pnd2_mc_printk(mci, KERN_INFO, "ADDR %llx ", mce->addr); + pnd2_mc_printk(mci, KERN_INFO, "MISC %llx ", mce->misc); + pnd2_mc_printk(mci, KERN_INFO, "PROCESSOR %u:%x TIME %llu SOCKET %u APIC %x\n", + mce->cpuvendor, mce->cpuid, mce->time, mce->socketid, mce->apicid); + + pnd2_mce_output_error(mci, mce, &daddr); + + /* Advice mcelog that the error were handled */ + return NOTIFY_STOP; +} + +static struct notifier_block pnd2_mce_dec = { + .notifier_call = pnd2_mce_check_error, +}; + +#ifdef CONFIG_EDAC_DEBUG +/* + * Write an address to this file to exercise the address decode + * logic in this driver. + */ +static u64 pnd2_fake_addr; +#define PND2_BLOB_SIZE 1024 +static char pnd2_result[PND2_BLOB_SIZE]; +static struct dentry *pnd2_test; +static struct debugfs_blob_wrapper pnd2_blob = { + .data = pnd2_result, + .size = 0 +}; + +static int debugfs_u64_set(void *data, u64 val) +{ + struct dram_addr daddr; + struct mce m; + + *(u64 *)data = val; + m.mcgstatus = 0; + /* ADDRV + MemRd + Unknown channel */ + m.status = MCI_STATUS_ADDRV + 0x9f; + m.addr = val; + pnd2_mce_output_error(pnd2_mci, &m, &daddr); + snprintf(pnd2_blob.data, PND2_BLOB_SIZE, + "SysAddr=%llx Channel=%d DIMM=%d Rank=%d Bank=%d Row=%d Column=%d\n", + m.addr, daddr.chan, daddr.dimm, daddr.rank, daddr.bank, daddr.row, daddr.col); + pnd2_blob.size = strlen(pnd2_blob.data); + + return 0; +} +DEFINE_DEBUGFS_ATTRIBUTE(fops_u64_wo, NULL, debugfs_u64_set, "%llu\n"); + +static void setup_pnd2_debug(void) +{ + pnd2_test = edac_debugfs_create_dir("pnd2_test"); + edac_debugfs_create_file("pnd2_debug_addr", 0200, pnd2_test, + &pnd2_fake_addr, &fops_u64_wo); + debugfs_create_blob("pnd2_debug_results", 0400, pnd2_test, &pnd2_blob); +} + +static void teardown_pnd2_debug(void) +{ + debugfs_remove_recursive(pnd2_test); +} +#else +static void setup_pnd2_debug(void) {} +static void teardown_pnd2_debug(void) {} +#endif /* CONFIG_EDAC_DEBUG */ + + +static int pnd2_probe(void) +{ + int rc; + + edac_dbg(2, "\n"); + rc = get_registers(); + if (rc) + return rc; + + return pnd2_register_mci(&pnd2_mci); +} + +static void pnd2_remove(void) +{ + edac_dbg(0, "\n"); + pnd2_unregister_mci(pnd2_mci); +} + +static struct dunit_ops apl_ops = { + .name = "pnd2/apl", + .type = APL, + .pmiaddr_shift = LOG2_PMI_ADDR_GRANULARITY, + .pmiidx_shift = 0, + .channels = APL_NUM_CHANNELS, + .dimms_per_channel = 1, + .rd_reg = apl_rd_reg, + .get_registers = apl_get_registers, + .check_ecc = apl_check_ecc_active, + .mk_region = apl_mk_region, + .get_dimm_config = apl_get_dimm_config, + .pmi2mem = apl_pmi2mem, +}; + +static struct dunit_ops dnv_ops = { + .name = "pnd2/dnv", + .type = DNV, + .pmiaddr_shift = 0, + .pmiidx_shift = 1, + .channels = DNV_NUM_CHANNELS, + .dimms_per_channel = 2, + .rd_reg = dnv_rd_reg, + .get_registers = dnv_get_registers, + .check_ecc = dnv_check_ecc_active, + .mk_region = dnv_mk_region, + .get_dimm_config = dnv_get_dimm_config, + .pmi2mem = dnv_pmi2mem, +}; + +static const struct x86_cpu_id pnd2_cpuids[] = { + { X86_VENDOR_INTEL, 6, INTEL_FAM6_ATOM_GOLDMONT, 0, (kernel_ulong_t)&apl_ops }, + { X86_VENDOR_INTEL, 6, INTEL_FAM6_ATOM_DENVERTON, 0, (kernel_ulong_t)&dnv_ops }, + { } +}; +MODULE_DEVICE_TABLE(x86cpu, pnd2_cpuids); + +static int __init pnd2_init(void) +{ + const struct x86_cpu_id *id; + int rc; + + edac_dbg(2, "\n"); + + id = x86_match_cpu(pnd2_cpuids); + if (!id) + return -ENODEV; + + ops = (struct dunit_ops *)id->driver_data; + + /* Ensure that the OPSTATE is set correctly for POLL or NMI */ + opstate_init(); + + rc = pnd2_probe(); + if (rc < 0) { + pnd2_printk(KERN_ERR, "Failed to register device with error %d.\n", rc); + return rc; + } + + if (!pnd2_mci) + return -ENODEV; + + mce_register_decode_chain(&pnd2_mce_dec); + setup_pnd2_debug(); + + return 0; +} + +static void __exit pnd2_exit(void) +{ + edac_dbg(2, "\n"); + teardown_pnd2_debug(); + mce_unregister_decode_chain(&pnd2_mce_dec); + pnd2_remove(); +} + +module_init(pnd2_init); +module_exit(pnd2_exit); + +module_param(edac_op_state, int, 0444); +MODULE_PARM_DESC(edac_op_state, "EDAC Error Reporting state: 0=Poll,1=NMI"); + +MODULE_LICENSE("GPL v2"); +MODULE_AUTHOR("Tony Luck"); +MODULE_DESCRIPTION("MC Driver for Intel SoC using Pondicherry memory controller"); diff --git a/drivers/edac/pnd2_edac.h b/drivers/edac/pnd2_edac.h new file mode 100644 index 000000000000..61b6e79492bb --- /dev/null +++ b/drivers/edac/pnd2_edac.h @@ -0,0 +1,301 @@ +/* + * Register bitfield descriptions for Pondicherry2 memory controller. + * + * Copyright (c) 2016, Intel Corporation. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms and conditions of the GNU General Public License, + * version 2, as published by the Free Software Foundation. + * + * This program is distributed in the hope it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + * more details. + */ + +#ifndef _PND2_REGS_H +#define _PND2_REGS_H + +struct b_cr_touud_lo_pci { + u32 lock : 1; + u32 reserved_1 : 19; + u32 touud : 12; +}; + +#define b_cr_touud_lo_pci_port 0x4c +#define b_cr_touud_lo_pci_offset 0xa8 +#define b_cr_touud_lo_pci_r_opcode 0x04 + +struct b_cr_touud_hi_pci { + u32 touud : 7; + u32 reserved_0 : 25; +}; + +#define b_cr_touud_hi_pci_port 0x4c +#define b_cr_touud_hi_pci_offset 0xac +#define b_cr_touud_hi_pci_r_opcode 0x04 + +struct b_cr_tolud_pci { + u32 lock : 1; + u32 reserved_0 : 19; + u32 tolud : 12; +}; + +#define b_cr_tolud_pci_port 0x4c +#define b_cr_tolud_pci_offset 0xbc +#define b_cr_tolud_pci_r_opcode 0x04 + +struct b_cr_mchbar_lo_pci { + u32 enable : 1; + u32 pad_3_1 : 3; + u32 pad_14_4: 11; + u32 base: 17; +}; + +struct b_cr_mchbar_hi_pci { + u32 base : 7; + u32 pad_31_7 : 25; +}; + +/* Symmetric region */ +struct b_cr_slice_channel_hash { + u64 slice_1_disabled : 1; + u64 hvm_mode : 1; + u64 interleave_mode : 2; + u64 slice_0_mem_disabled : 1; + u64 reserved_0 : 1; + u64 slice_hash_mask : 14; + u64 reserved_1 : 11; + u64 enable_pmi_dual_data_mode : 1; + u64 ch_1_disabled : 1; + u64 reserved_2 : 1; + u64 sym_slice0_channel_enabled : 2; + u64 sym_slice1_channel_enabled : 2; + u64 ch_hash_mask : 14; + u64 reserved_3 : 11; + u64 lock : 1; +}; + +#define b_cr_slice_channel_hash_port 0x4c +#define b_cr_slice_channel_hash_offset 0x4c58 +#define b_cr_slice_channel_hash_r_opcode 0x06 + +struct b_cr_mot_out_base_mchbar { + u32 reserved_0 : 14; + u32 mot_out_base : 15; + u32 reserved_1 : 1; + u32 tr_en : 1; + u32 imr_en : 1; +}; + +#define b_cr_mot_out_base_mchbar_port 0x4c +#define b_cr_mot_out_base_mchbar_offset 0x6af0 +#define b_cr_mot_out_base_mchbar_r_opcode 0x00 + +struct b_cr_mot_out_mask_mchbar { + u32 reserved_0 : 14; + u32 mot_out_mask : 15; + u32 reserved_1 : 1; + u32 ia_iwb_en : 1; + u32 gt_iwb_en : 1; +}; + +#define b_cr_mot_out_mask_mchbar_port 0x4c +#define b_cr_mot_out_mask_mchbar_offset 0x6af4 +#define b_cr_mot_out_mask_mchbar_r_opcode 0x00 + +struct b_cr_asym_mem_region0_mchbar { + u32 pad : 4; + u32 slice0_asym_base : 11; + u32 pad_18_15 : 4; + u32 slice0_asym_limit : 11; + u32 slice0_asym_channel_select : 1; + u32 slice0_asym_enable : 1; +}; + +#define b_cr_asym_mem_region0_mchbar_port 0x4c +#define b_cr_asym_mem_region0_mchbar_offset 0x6e40 +#define b_cr_asym_mem_region0_mchbar_r_opcode 0x00 + +struct b_cr_asym_mem_region1_mchbar { + u32 pad : 4; + u32 slice1_asym_base : 11; + u32 pad_18_15 : 4; + u32 slice1_asym_limit : 11; + u32 slice1_asym_channel_select : 1; + u32 slice1_asym_enable : 1; +}; + +#define b_cr_asym_mem_region1_mchbar_port 0x4c +#define b_cr_asym_mem_region1_mchbar_offset 0x6e44 +#define b_cr_asym_mem_region1_mchbar_r_opcode 0x00 + +/* Some bit fields moved in above two structs on Denverton */ +struct b_cr_asym_mem_region_denverton { + u32 pad : 4; + u32 slice_asym_base : 8; + u32 pad_19_12 : 8; + u32 slice_asym_limit : 8; + u32 pad_28_30 : 3; + u32 slice_asym_enable : 1; +}; + +struct b_cr_asym_2way_mem_region_mchbar { + u32 pad : 2; + u32 asym_2way_intlv_mode : 2; + u32 asym_2way_base : 11; + u32 pad_16_15 : 2; + u32 asym_2way_limit : 11; + u32 pad_30_28 : 3; + u32 asym_2way_interleave_enable : 1; +}; + +#define b_cr_asym_2way_mem_region_mchbar_port 0x4c +#define b_cr_asym_2way_mem_region_mchbar_offset 0x6e50 +#define b_cr_asym_2way_mem_region_mchbar_r_opcode 0x00 + +/* Apollo Lake d-unit */ + +struct d_cr_drp0 { + u32 rken0 : 1; + u32 rken1 : 1; + u32 ddmen : 1; + u32 rsvd3 : 1; + u32 dwid : 2; + u32 dden : 3; + u32 rsvd13_9 : 5; + u32 rsien : 1; + u32 bahen : 1; + u32 rsvd18_16 : 3; + u32 caswizzle : 2; + u32 eccen : 1; + u32 dramtype : 3; + u32 blmode : 3; + u32 addrdec : 2; + u32 dramdevice_pr : 2; +}; + +#define d_cr_drp0_offset 0x1400 +#define d_cr_drp0_r_opcode 0x00 + +/* Denverton d-unit */ + +struct d_cr_dsch { + u32 ch0en : 1; + u32 ch1en : 1; + u32 ddr4en : 1; + u32 coldwake : 1; + u32 newbypdis : 1; + u32 chan_width : 1; + u32 rsvd6_6 : 1; + u32 ooodis : 1; + u32 rsvd18_8 : 11; + u32 ic : 1; + u32 rsvd31_20 : 12; +}; + +#define d_cr_dsch_port 0x16 +#define d_cr_dsch_offset 0x0 +#define d_cr_dsch_r_opcode 0x0 + +struct d_cr_ecc_ctrl { + u32 eccen : 1; + u32 rsvd31_1 : 31; +}; + +#define d_cr_ecc_ctrl_offset 0x180 +#define d_cr_ecc_ctrl_r_opcode 0x0 + +struct d_cr_drp { + u32 rken0 : 1; + u32 rken1 : 1; + u32 rken2 : 1; + u32 rken3 : 1; + u32 dimmdwid0 : 2; + u32 dimmdden0 : 2; + u32 dimmdwid1 : 2; + u32 dimmdden1 : 2; + u32 rsvd15_12 : 4; + u32 dimmflip : 1; + u32 rsvd31_17 : 15; +}; + +#define d_cr_drp_offset 0x158 +#define d_cr_drp_r_opcode 0x0 + +struct d_cr_dmap { + u32 ba0 : 5; + u32 ba1 : 5; + u32 bg0 : 5; /* if ddr3, ba2 = bg0 */ + u32 bg1 : 5; /* if ddr3, ba3 = bg1 */ + u32 rs0 : 5; + u32 rs1 : 5; + u32 rsvd : 2; +}; + +#define d_cr_dmap_offset 0x174 +#define d_cr_dmap_r_opcode 0x0 + +struct d_cr_dmap1 { + u32 ca11 : 6; + u32 bxor : 1; + u32 rsvd : 25; +}; + +#define d_cr_dmap1_offset 0xb4 +#define d_cr_dmap1_r_opcode 0x0 + +struct d_cr_dmap2 { + u32 row0 : 5; + u32 row1 : 5; + u32 row2 : 5; + u32 row3 : 5; + u32 row4 : 5; + u32 row5 : 5; + u32 rsvd : 2; +}; + +#define d_cr_dmap2_offset 0x148 +#define d_cr_dmap2_r_opcode 0x0 + +struct d_cr_dmap3 { + u32 row6 : 5; + u32 row7 : 5; + u32 row8 : 5; + u32 row9 : 5; + u32 row10 : 5; + u32 row11 : 5; + u32 rsvd : 2; +}; + +#define d_cr_dmap3_offset 0x14c +#define d_cr_dmap3_r_opcode 0x0 + +struct d_cr_dmap4 { + u32 row12 : 5; + u32 row13 : 5; + u32 row14 : 5; + u32 row15 : 5; + u32 row16 : 5; + u32 row17 : 5; + u32 rsvd : 2; +}; + +#define d_cr_dmap4_offset 0x150 +#define d_cr_dmap4_r_opcode 0x0 + +struct d_cr_dmap5 { + u32 ca3 : 4; + u32 ca4 : 4; + u32 ca5 : 4; + u32 ca6 : 4; + u32 ca7 : 4; + u32 ca8 : 4; + u32 ca9 : 4; + u32 rsvd : 4; +}; + +#define d_cr_dmap5_offset 0x154 +#define d_cr_dmap5_r_opcode 0x0 + +#endif /* _PND2_REGS_H */ diff --git a/drivers/edac/sb_edac.c b/drivers/edac/sb_edac.c index a65ea44e3b0b..ea21cb651b3c 100644 --- a/drivers/edac/sb_edac.c +++ b/drivers/edac/sb_edac.c @@ -3075,7 +3075,7 @@ static int sbridge_mce_check_error(struct notifier_block *nb, unsigned long val, struct sbridge_pvt *pvt; char *type; - if (get_edac_report_status() == EDAC_REPORTING_DISABLED) + if (edac_get_report_status() == EDAC_REPORTING_DISABLED) return NOTIFY_DONE; mci = get_mci_for_node_id(mce->socketid); @@ -3441,7 +3441,7 @@ static int __init sbridge_init(void) if (rc >= 0) { mce_register_decode_chain(&sbridge_mce_dec); - if (get_edac_report_status() == EDAC_REPORTING_DISABLED) + if (edac_get_report_status() == EDAC_REPORTING_DISABLED) sbridge_printk(KERN_WARNING, "Loading driver, error reporting disabled.\n"); return 0; } diff --git a/drivers/edac/skx_edac.c b/drivers/edac/skx_edac.c index 1159dba4671f..64bef6c9cfb4 100644 --- a/drivers/edac/skx_edac.c +++ b/drivers/edac/skx_edac.c @@ -971,7 +971,7 @@ static int skx_mce_check_error(struct notifier_block *nb, unsigned long val, struct mem_ctl_info *mci; char *type; - if (get_edac_report_status() == EDAC_REPORTING_DISABLED) + if (edac_get_report_status() == EDAC_REPORTING_DISABLED) return NOTIFY_DONE; /* ignore unless this is memory related with an address */ diff --git a/drivers/edac/thunderx_edac.c b/drivers/edac/thunderx_edac.c new file mode 100644 index 000000000000..86d585cb6d32 --- /dev/null +++ b/drivers/edac/thunderx_edac.c @@ -0,0 +1,2174 @@ +/* + * Cavium ThunderX memory controller kernel module + * + * This file is subject to the terms and conditions of the GNU General Public + * License. See the file "COPYING" in the main directory of this archive + * for more details. + * + * Copyright Cavium, Inc. (C) 2015-2017. All rights reserved. + * + */ + +#include <linux/module.h> +#include <linux/pci.h> +#include <linux/edac.h> +#include <linux/interrupt.h> +#include <linux/string.h> +#include <linux/stop_machine.h> +#include <linux/delay.h> +#include <linux/sizes.h> +#include <linux/atomic.h> +#include <linux/bitfield.h> +#include <linux/circ_buf.h> + +#include <asm/page.h> + +#include "edac_module.h" + +#define phys_to_pfn(phys) (PFN_DOWN(phys)) + +#define THUNDERX_NODE GENMASK(45, 44) + +enum { + ERR_CORRECTED = 1, + ERR_UNCORRECTED = 2, + ERR_UNKNOWN = 3, +}; + +#define MAX_SYNDROME_REGS 4 + +struct error_syndrome { + u64 reg[MAX_SYNDROME_REGS]; +}; + +struct error_descr { + int type; + u64 mask; + char *descr; +}; + +static void decode_register(char *str, size_t size, + const struct error_descr *descr, + const uint64_t reg) +{ + int ret = 0; + + while (descr->type && descr->mask && descr->descr) { + if (reg & descr->mask) { + ret = snprintf(str, size, "\n\t%s, %s", + descr->type == ERR_CORRECTED ? + "Corrected" : "Uncorrected", + descr->descr); + str += ret; + size -= ret; + } + descr++; + } +} + +static unsigned long get_bits(unsigned long data, int pos, int width) +{ + return (data >> pos) & ((1 << width) - 1); +} + +#define L2C_CTL 0x87E080800000 +#define L2C_CTL_DISIDXALIAS BIT(0) + +#define PCI_DEVICE_ID_THUNDER_LMC 0xa022 + +#define LMC_FADR 0x20 +#define LMC_FADR_FDIMM(x) ((x >> 37) & 0x1) +#define LMC_FADR_FBUNK(x) ((x >> 36) & 0x1) +#define LMC_FADR_FBANK(x) ((x >> 32) & 0xf) +#define LMC_FADR_FROW(x) ((x >> 14) & 0xffff) +#define LMC_FADR_FCOL(x) ((x >> 0) & 0x1fff) + +#define LMC_NXM_FADR 0x28 +#define LMC_ECC_SYND 0x38 + +#define LMC_ECC_PARITY_TEST 0x108 + +#define LMC_INT_W1S 0x150 + +#define LMC_INT_ENA_W1C 0x158 +#define LMC_INT_ENA_W1S 0x160 + +#define LMC_CONFIG 0x188 + +#define LMC_CONFIG_BG2 BIT(62) +#define LMC_CONFIG_RANK_ENA BIT(42) +#define LMC_CONFIG_PBANK_LSB(x) (((x) >> 5) & 0xF) +#define LMC_CONFIG_ROW_LSB(x) (((x) >> 2) & 0x7) + +#define LMC_CONTROL 0x190 +#define LMC_CONTROL_XOR_BANK BIT(16) + +#define LMC_INT 0x1F0 + +#define LMC_INT_DDR_ERR BIT(11) +#define LMC_INT_DED_ERR (0xFUL << 5) +#define LMC_INT_SEC_ERR (0xFUL << 1) +#define LMC_INT_NXM_WR_MASK BIT(0) + +#define LMC_DDR_PLL_CTL 0x258 +#define LMC_DDR_PLL_CTL_DDR4 BIT(29) + +#define LMC_FADR_SCRAMBLED 0x330 + +#define LMC_INT_UE (LMC_INT_DDR_ERR | LMC_INT_DED_ERR | \ + LMC_INT_NXM_WR_MASK) + +#define LMC_INT_CE (LMC_INT_SEC_ERR) + +static const struct error_descr lmc_errors[] = { + { + .type = ERR_CORRECTED, + .mask = LMC_INT_SEC_ERR, + .descr = "Single-bit ECC error", + }, + { + .type = ERR_UNCORRECTED, + .mask = LMC_INT_DDR_ERR, + .descr = "DDR chip error", + }, + { + .type = ERR_UNCORRECTED, + .mask = LMC_INT_DED_ERR, + .descr = "Double-bit ECC error", + }, + { + .type = ERR_UNCORRECTED, + .mask = LMC_INT_NXM_WR_MASK, + .descr = "Non-existent memory write", + }, + {0, 0, NULL}, +}; + +#define LMC_INT_EN_DDR_ERROR_ALERT_ENA BIT(5) +#define LMC_INT_EN_DLCRAM_DED_ERR BIT(4) +#define LMC_INT_EN_DLCRAM_SEC_ERR BIT(3) +#define LMC_INT_INTR_DED_ENA BIT(2) +#define LMC_INT_INTR_SEC_ENA BIT(1) +#define LMC_INT_INTR_NXM_WR_ENA BIT(0) + +#define LMC_INT_ENA_ALL GENMASK(5, 0) + +#define LMC_DDR_PLL_CTL 0x258 +#define LMC_DDR_PLL_CTL_DDR4 BIT(29) + +#define LMC_CONTROL 0x190 +#define LMC_CONTROL_RDIMM BIT(0) + +#define LMC_SCRAM_FADR 0x330 + +#define LMC_CHAR_MASK0 0x228 +#define LMC_CHAR_MASK2 0x238 + +#define RING_ENTRIES 8 + +struct debugfs_entry { + const char *name; + umode_t mode; + const struct file_operations fops; +}; + +struct lmc_err_ctx { + u64 reg_int; + u64 reg_fadr; + u64 reg_nxm_fadr; + u64 reg_scram_fadr; + u64 reg_ecc_synd; +}; + +struct thunderx_lmc { + void __iomem *regs; + struct pci_dev *pdev; + struct msix_entry msix_ent; + + atomic_t ecc_int; + + u64 mask0; + u64 mask2; + u64 parity_test; + u64 node; + + int xbits; + int bank_width; + int pbank_lsb; + int dimm_lsb; + int rank_lsb; + int bank_lsb; + int row_lsb; + int col_hi_lsb; + + int xor_bank; + int l2c_alias; + + struct page *mem; + + struct lmc_err_ctx err_ctx[RING_ENTRIES]; + unsigned long ring_head; + unsigned long ring_tail; +}; + +#define ring_pos(pos, size) ((pos) & (size - 1)) + +#define DEBUGFS_STRUCT(_name, _mode, _write, _read) \ +static struct debugfs_entry debugfs_##_name = { \ + .name = __stringify(_name), \ + .mode = VERIFY_OCTAL_PERMISSIONS(_mode), \ + .fops = { \ + .open = simple_open, \ + .write = _write, \ + .read = _read, \ + .llseek = generic_file_llseek, \ + }, \ +} + +#define DEBUGFS_FIELD_ATTR(_type, _field) \ +static ssize_t thunderx_##_type##_##_field##_read(struct file *file, \ + char __user *data, \ + size_t count, loff_t *ppos) \ +{ \ + struct thunderx_##_type *pdata = file->private_data; \ + char buf[20]; \ + \ + snprintf(buf, count, "0x%016llx", pdata->_field); \ + return simple_read_from_buffer(data, count, ppos, \ + buf, sizeof(buf)); \ +} \ + \ +static ssize_t thunderx_##_type##_##_field##_write(struct file *file, \ + const char __user *data, \ + size_t count, loff_t *ppos) \ +{ \ + struct thunderx_##_type *pdata = file->private_data; \ + int res; \ + \ + res = kstrtoull_from_user(data, count, 0, &pdata->_field); \ + \ + return res ? res : count; \ +} \ + \ +DEBUGFS_STRUCT(_field, 0600, \ + thunderx_##_type##_##_field##_write, \ + thunderx_##_type##_##_field##_read) \ + +#define DEBUGFS_REG_ATTR(_type, _name, _reg) \ +static ssize_t thunderx_##_type##_##_name##_read(struct file *file, \ + char __user *data, \ + size_t count, loff_t *ppos) \ +{ \ + struct thunderx_##_type *pdata = file->private_data; \ + char buf[20]; \ + \ + sprintf(buf, "0x%016llx", readq(pdata->regs + _reg)); \ + return simple_read_from_buffer(data, count, ppos, \ + buf, sizeof(buf)); \ +} \ + \ +static ssize_t thunderx_##_type##_##_name##_write(struct file *file, \ + const char __user *data, \ + size_t count, loff_t *ppos) \ +{ \ + struct thunderx_##_type *pdata = file->private_data; \ + u64 val; \ + int res; \ + \ + res = kstrtoull_from_user(data, count, 0, &val); \ + \ + if (!res) { \ + writeq(val, pdata->regs + _reg); \ + res = count; \ + } \ + \ + return res; \ +} \ + \ +DEBUGFS_STRUCT(_name, 0600, \ + thunderx_##_type##_##_name##_write, \ + thunderx_##_type##_##_name##_read) + +#define LMC_DEBUGFS_ENT(_field) DEBUGFS_FIELD_ATTR(lmc, _field) + +/* + * To get an ECC error injected, the following steps are needed: + * - Setup the ECC injection by writing the appropriate parameters: + * echo <bit mask value> > /sys/kernel/debug/<device number>/ecc_mask0 + * echo <bit mask value> > /sys/kernel/debug/<device number>/ecc_mask2 + * echo 0x802 > /sys/kernel/debug/<device number>/ecc_parity_test + * - Do the actual injection: + * echo 1 > /sys/kernel/debug/<device number>/inject_ecc + */ +static ssize_t thunderx_lmc_inject_int_write(struct file *file, + const char __user *data, + size_t count, loff_t *ppos) +{ + struct thunderx_lmc *lmc = file->private_data; + u64 val; + int res; + + res = kstrtoull_from_user(data, count, 0, &val); + + if (!res) { + /* Trigger the interrupt */ + writeq(val, lmc->regs + LMC_INT_W1S); + res = count; + } + + return res; +} + +static ssize_t thunderx_lmc_int_read(struct file *file, + char __user *data, + size_t count, loff_t *ppos) +{ + struct thunderx_lmc *lmc = file->private_data; + char buf[20]; + u64 lmc_int = readq(lmc->regs + LMC_INT); + + snprintf(buf, sizeof(buf), "0x%016llx", lmc_int); + return simple_read_from_buffer(data, count, ppos, buf, sizeof(buf)); +} + +#define TEST_PATTERN 0xa5 + +static int inject_ecc_fn(void *arg) +{ + struct thunderx_lmc *lmc = arg; + uintptr_t addr, phys; + unsigned int cline_size = cache_line_size(); + const unsigned int lines = PAGE_SIZE / cline_size; + unsigned int i, cl_idx; + + addr = (uintptr_t)page_address(lmc->mem); + phys = (uintptr_t)page_to_phys(lmc->mem); + + cl_idx = (phys & 0x7f) >> 4; + lmc->parity_test &= ~(7ULL << 8); + lmc->parity_test |= (cl_idx << 8); + + writeq(lmc->mask0, lmc->regs + LMC_CHAR_MASK0); + writeq(lmc->mask2, lmc->regs + LMC_CHAR_MASK2); + writeq(lmc->parity_test, lmc->regs + LMC_ECC_PARITY_TEST); + + readq(lmc->regs + LMC_CHAR_MASK0); + readq(lmc->regs + LMC_CHAR_MASK2); + readq(lmc->regs + LMC_ECC_PARITY_TEST); + + for (i = 0; i < lines; i++) { + memset((void *)addr, TEST_PATTERN, cline_size); + barrier(); + + /* + * Flush L1 cachelines to the PoC (L2). + * This will cause cacheline eviction to the L2. + */ + asm volatile("dc civac, %0\n" + "dsb sy\n" + : : "r"(addr + i * cline_size)); + } + + for (i = 0; i < lines; i++) { + /* + * Flush L2 cachelines to the DRAM. + * This will cause cacheline eviction to the DRAM + * and ECC corruption according to the masks set. + */ + __asm__ volatile("sys #0,c11,C1,#2, %0\n" + : : "r"(phys + i * cline_size)); + } + + for (i = 0; i < lines; i++) { + /* + * Invalidate L2 cachelines. + * The subsequent load will cause cacheline fetch + * from the DRAM and an error interrupt + */ + __asm__ volatile("sys #0,c11,C1,#1, %0" + : : "r"(phys + i * cline_size)); + } + + for (i = 0; i < lines; i++) { + /* + * Invalidate L1 cachelines. + * The subsequent load will cause cacheline fetch + * from the L2 and/or DRAM + */ + asm volatile("dc ivac, %0\n" + "dsb sy\n" + : : "r"(addr + i * cline_size)); + } + + return 0; +} + +static ssize_t thunderx_lmc_inject_ecc_write(struct file *file, + const char __user *data, + size_t count, loff_t *ppos) +{ + struct thunderx_lmc *lmc = file->private_data; + + unsigned int cline_size = cache_line_size(); + + u8 tmp[cline_size]; + void __iomem *addr; + unsigned int offs, timeout = 100000; + + atomic_set(&lmc->ecc_int, 0); + + lmc->mem = alloc_pages_node(lmc->node, GFP_KERNEL, 0); + + if (!lmc->mem) + return -ENOMEM; + + addr = page_address(lmc->mem); + + while (!atomic_read(&lmc->ecc_int) && timeout--) { + stop_machine(inject_ecc_fn, lmc, NULL); + + for (offs = 0; offs < PAGE_SIZE; offs += sizeof(tmp)) { + /* + * Do a load from the previously rigged location + * This should generate an error interrupt. + */ + memcpy(tmp, addr + offs, cline_size); + asm volatile("dsb ld\n"); + } + } + + __free_pages(lmc->mem, 0); + + return count; +} + +LMC_DEBUGFS_ENT(mask0); +LMC_DEBUGFS_ENT(mask2); +LMC_DEBUGFS_ENT(parity_test); + +DEBUGFS_STRUCT(inject_int, 0200, thunderx_lmc_inject_int_write, NULL); +DEBUGFS_STRUCT(inject_ecc, 0200, thunderx_lmc_inject_ecc_write, NULL); +DEBUGFS_STRUCT(int_w1c, 0400, NULL, thunderx_lmc_int_read); + +struct debugfs_entry *lmc_dfs_ents[] = { + &debugfs_mask0, + &debugfs_mask2, + &debugfs_parity_test, + &debugfs_inject_ecc, + &debugfs_inject_int, + &debugfs_int_w1c, +}; + +static int thunderx_create_debugfs_nodes(struct dentry *parent, + struct debugfs_entry *attrs[], + void *data, + size_t num) +{ + int i; + struct dentry *ent; + + if (!IS_ENABLED(CONFIG_EDAC_DEBUG)) + return 0; + + if (!parent) + return -ENOENT; + + for (i = 0; i < num; i++) { + ent = edac_debugfs_create_file(attrs[i]->name, attrs[i]->mode, + parent, data, &attrs[i]->fops); + + if (!ent) + break; + } + + return i; +} + +static phys_addr_t thunderx_faddr_to_phys(u64 faddr, struct thunderx_lmc *lmc) +{ + phys_addr_t addr = 0; + int bank, xbits; + + addr |= lmc->node << 40; + addr |= LMC_FADR_FDIMM(faddr) << lmc->dimm_lsb; + addr |= LMC_FADR_FBUNK(faddr) << lmc->rank_lsb; + addr |= LMC_FADR_FROW(faddr) << lmc->row_lsb; + addr |= (LMC_FADR_FCOL(faddr) >> 4) << lmc->col_hi_lsb; + + bank = LMC_FADR_FBANK(faddr) << lmc->bank_lsb; + + if (lmc->xor_bank) + bank ^= get_bits(addr, 12 + lmc->xbits, lmc->bank_width); + + addr |= bank << lmc->bank_lsb; + + xbits = PCI_FUNC(lmc->pdev->devfn); + + if (lmc->l2c_alias) + xbits ^= get_bits(addr, 20, lmc->xbits) ^ + get_bits(addr, 12, lmc->xbits); + + addr |= xbits << 7; + + return addr; +} + +static unsigned int thunderx_get_num_lmcs(unsigned int node) +{ + unsigned int number = 0; + struct pci_dev *pdev = NULL; + + do { + pdev = pci_get_device(PCI_VENDOR_ID_CAVIUM, + PCI_DEVICE_ID_THUNDER_LMC, + pdev); + if (pdev) { +#ifdef CONFIG_NUMA + if (pdev->dev.numa_node == node) + number++; +#else + number++; +#endif + } + } while (pdev); + + return number; +} + +#define LMC_MESSAGE_SIZE 120 +#define LMC_OTHER_SIZE (50 * ARRAY_SIZE(lmc_errors)) + +static irqreturn_t thunderx_lmc_err_isr(int irq, void *dev_id) +{ + struct mem_ctl_info *mci = dev_id; + struct thunderx_lmc *lmc = mci->pvt_info; + + unsigned long head = ring_pos(lmc->ring_head, ARRAY_SIZE(lmc->err_ctx)); + struct lmc_err_ctx *ctx = &lmc->err_ctx[head]; + + writeq(0, lmc->regs + LMC_CHAR_MASK0); + writeq(0, lmc->regs + LMC_CHAR_MASK2); + writeq(0x2, lmc->regs + LMC_ECC_PARITY_TEST); + + ctx->reg_int = readq(lmc->regs + LMC_INT); + ctx->reg_fadr = readq(lmc->regs + LMC_FADR); + ctx->reg_nxm_fadr = readq(lmc->regs + LMC_NXM_FADR); + ctx->reg_scram_fadr = readq(lmc->regs + LMC_SCRAM_FADR); + ctx->reg_ecc_synd = readq(lmc->regs + LMC_ECC_SYND); + + lmc->ring_head++; + + atomic_set(&lmc->ecc_int, 1); + + /* Clear the interrupt */ + writeq(ctx->reg_int, lmc->regs + LMC_INT); + + return IRQ_WAKE_THREAD; +} + +static irqreturn_t thunderx_lmc_threaded_isr(int irq, void *dev_id) +{ + struct mem_ctl_info *mci = dev_id; + struct thunderx_lmc *lmc = mci->pvt_info; + phys_addr_t phys_addr; + + unsigned long tail; + struct lmc_err_ctx *ctx; + + irqreturn_t ret = IRQ_NONE; + + char *msg; + char *other; + + msg = kmalloc(LMC_MESSAGE_SIZE, GFP_KERNEL); + other = kmalloc(LMC_OTHER_SIZE, GFP_KERNEL); + + if (!msg || !other) + goto err_free; + + while (CIRC_CNT(lmc->ring_head, lmc->ring_tail, + ARRAY_SIZE(lmc->err_ctx))) { + tail = ring_pos(lmc->ring_tail, ARRAY_SIZE(lmc->err_ctx)); + + ctx = &lmc->err_ctx[tail]; + + dev_dbg(&lmc->pdev->dev, "LMC_INT: %016llx\n", + ctx->reg_int); + dev_dbg(&lmc->pdev->dev, "LMC_FADR: %016llx\n", + ctx->reg_fadr); + dev_dbg(&lmc->pdev->dev, "LMC_NXM_FADR: %016llx\n", + ctx->reg_nxm_fadr); + dev_dbg(&lmc->pdev->dev, "LMC_SCRAM_FADR: %016llx\n", + ctx->reg_scram_fadr); + dev_dbg(&lmc->pdev->dev, "LMC_ECC_SYND: %016llx\n", + ctx->reg_ecc_synd); + + snprintf(msg, LMC_MESSAGE_SIZE, + "DIMM %lld rank %lld bank %lld row %lld col %lld", + LMC_FADR_FDIMM(ctx->reg_scram_fadr), + LMC_FADR_FBUNK(ctx->reg_scram_fadr), + LMC_FADR_FBANK(ctx->reg_scram_fadr), + LMC_FADR_FROW(ctx->reg_scram_fadr), + LMC_FADR_FCOL(ctx->reg_scram_fadr)); + + decode_register(other, LMC_OTHER_SIZE, lmc_errors, + ctx->reg_int); + + phys_addr = thunderx_faddr_to_phys(ctx->reg_fadr, lmc); + + if (ctx->reg_int & LMC_INT_UE) + edac_mc_handle_error(HW_EVENT_ERR_UNCORRECTED, mci, 1, + phys_to_pfn(phys_addr), + offset_in_page(phys_addr), + 0, -1, -1, -1, msg, other); + else if (ctx->reg_int & LMC_INT_CE) + edac_mc_handle_error(HW_EVENT_ERR_CORRECTED, mci, 1, + phys_to_pfn(phys_addr), + offset_in_page(phys_addr), + 0, -1, -1, -1, msg, other); + + lmc->ring_tail++; + } + + ret = IRQ_HANDLED; + +err_free: + kfree(msg); + kfree(other); + + return ret; +} + +#ifdef CONFIG_PM +static int thunderx_lmc_suspend(struct pci_dev *pdev, pm_message_t state) +{ + pci_save_state(pdev); + pci_disable_device(pdev); + + pci_set_power_state(pdev, pci_choose_state(pdev, state)); + + return 0; +} + +static int thunderx_lmc_resume(struct pci_dev *pdev) +{ + pci_set_power_state(pdev, PCI_D0); + pci_enable_wake(pdev, PCI_D0, 0); + pci_restore_state(pdev); + + return 0; +} +#endif + +static const struct pci_device_id thunderx_lmc_pci_tbl[] = { + { PCI_DEVICE(PCI_VENDOR_ID_CAVIUM, PCI_DEVICE_ID_THUNDER_LMC) }, + { 0, }, +}; + +static inline int pci_dev_to_mc_idx(struct pci_dev *pdev) +{ + int node = dev_to_node(&pdev->dev); + int ret = PCI_FUNC(pdev->devfn); + + ret += max(node, 0) << 3; + + return ret; +} + +static int thunderx_lmc_probe(struct pci_dev *pdev, + const struct pci_device_id *id) +{ + struct thunderx_lmc *lmc; + struct edac_mc_layer layer; + struct mem_ctl_info *mci; + u64 lmc_control, lmc_ddr_pll_ctl, lmc_config; + int ret; + u64 lmc_int; + void *l2c_ioaddr; + + layer.type = EDAC_MC_LAYER_SLOT; + layer.size = 2; + layer.is_virt_csrow = false; + + ret = pcim_enable_device(pdev); + if (ret) { + dev_err(&pdev->dev, "Cannot enable PCI device: %d\n", ret); + return ret; + } + + ret = pcim_iomap_regions(pdev, BIT(0), "thunderx_lmc"); + if (ret) { + dev_err(&pdev->dev, "Cannot map PCI resources: %d\n", ret); + return ret; + } + + mci = edac_mc_alloc(pci_dev_to_mc_idx(pdev), 1, &layer, + sizeof(struct thunderx_lmc)); + if (!mci) + return -ENOMEM; + + mci->pdev = &pdev->dev; + lmc = mci->pvt_info; + + pci_set_drvdata(pdev, mci); + + lmc->regs = pcim_iomap_table(pdev)[0]; + + lmc_control = readq(lmc->regs + LMC_CONTROL); + lmc_ddr_pll_ctl = readq(lmc->regs + LMC_DDR_PLL_CTL); + lmc_config = readq(lmc->regs + LMC_CONFIG); + + if (lmc_control & LMC_CONTROL_RDIMM) { + mci->mtype_cap = FIELD_GET(LMC_DDR_PLL_CTL_DDR4, + lmc_ddr_pll_ctl) ? + MEM_RDDR4 : MEM_RDDR3; + } else { + mci->mtype_cap = FIELD_GET(LMC_DDR_PLL_CTL_DDR4, + lmc_ddr_pll_ctl) ? + MEM_DDR4 : MEM_DDR3; + } + + mci->edac_ctl_cap = EDAC_FLAG_NONE | EDAC_FLAG_SECDED; + mci->edac_cap = EDAC_FLAG_SECDED; + + mci->mod_name = "thunderx-lmc"; + mci->mod_ver = "1"; + mci->ctl_name = "thunderx-lmc"; + mci->dev_name = dev_name(&pdev->dev); + mci->scrub_mode = SCRUB_NONE; + + lmc->pdev = pdev; + lmc->msix_ent.entry = 0; + + lmc->ring_head = 0; + lmc->ring_tail = 0; + + ret = pci_enable_msix_exact(pdev, &lmc->msix_ent, 1); + if (ret) { + dev_err(&pdev->dev, "Cannot enable interrupt: %d\n", ret); + goto err_free; + } + + ret = devm_request_threaded_irq(&pdev->dev, lmc->msix_ent.vector, + thunderx_lmc_err_isr, + thunderx_lmc_threaded_isr, 0, + "[EDAC] ThunderX LMC", mci); + if (ret) { + dev_err(&pdev->dev, "Cannot set ISR: %d\n", ret); + goto err_free; + } + + lmc->node = FIELD_GET(THUNDERX_NODE, pci_resource_start(pdev, 0)); + + lmc->xbits = thunderx_get_num_lmcs(lmc->node) >> 1; + lmc->bank_width = (FIELD_GET(LMC_DDR_PLL_CTL_DDR4, lmc_ddr_pll_ctl) && + FIELD_GET(LMC_CONFIG_BG2, lmc_config)) ? 4 : 3; + + lmc->pbank_lsb = (lmc_config >> 5) & 0xf; + lmc->dimm_lsb = 28 + lmc->pbank_lsb + lmc->xbits; + lmc->rank_lsb = lmc->dimm_lsb; + lmc->rank_lsb -= FIELD_GET(LMC_CONFIG_RANK_ENA, lmc_config) ? 1 : 0; + lmc->bank_lsb = 7 + lmc->xbits; + lmc->row_lsb = 14 + LMC_CONFIG_ROW_LSB(lmc_config) + lmc->xbits; + + lmc->col_hi_lsb = lmc->bank_lsb + lmc->bank_width; + + lmc->xor_bank = lmc_control & LMC_CONTROL_XOR_BANK; + + l2c_ioaddr = ioremap(L2C_CTL | FIELD_PREP(THUNDERX_NODE, lmc->node), + PAGE_SIZE); + + if (!l2c_ioaddr) { + dev_err(&pdev->dev, "Cannot map L2C_CTL\n"); + goto err_free; + } + + lmc->l2c_alias = !(readq(l2c_ioaddr) & L2C_CTL_DISIDXALIAS); + + iounmap(l2c_ioaddr); + + ret = edac_mc_add_mc(mci); + if (ret) { + dev_err(&pdev->dev, "Cannot add the MC: %d\n", ret); + goto err_free; + } + + lmc_int = readq(lmc->regs + LMC_INT); + writeq(lmc_int, lmc->regs + LMC_INT); + + writeq(LMC_INT_ENA_ALL, lmc->regs + LMC_INT_ENA_W1S); + + if (IS_ENABLED(CONFIG_EDAC_DEBUG)) { + ret = thunderx_create_debugfs_nodes(mci->debugfs, + lmc_dfs_ents, + lmc, + ARRAY_SIZE(lmc_dfs_ents)); + + if (ret != ARRAY_SIZE(lmc_dfs_ents)) { + dev_warn(&pdev->dev, "Error creating debugfs entries: %d%s\n", + ret, ret >= 0 ? " created" : ""); + } + } + + return 0; + +err_free: + pci_set_drvdata(pdev, NULL); + edac_mc_free(mci); + + return ret; +} + +static void thunderx_lmc_remove(struct pci_dev *pdev) +{ + struct mem_ctl_info *mci = pci_get_drvdata(pdev); + struct thunderx_lmc *lmc = mci->pvt_info; + + writeq(LMC_INT_ENA_ALL, lmc->regs + LMC_INT_ENA_W1C); + + edac_mc_del_mc(&pdev->dev); + edac_mc_free(mci); +} + +MODULE_DEVICE_TABLE(pci, thunderx_lmc_pci_tbl); + +static struct pci_driver thunderx_lmc_driver = { + .name = "thunderx_lmc_edac", + .probe = thunderx_lmc_probe, + .remove = thunderx_lmc_remove, +#ifdef CONFIG_PM + .suspend = thunderx_lmc_suspend, + .resume = thunderx_lmc_resume, +#endif + .id_table = thunderx_lmc_pci_tbl, +}; + +/*---------------------- OCX driver ---------------------------------*/ + +#define PCI_DEVICE_ID_THUNDER_OCX 0xa013 + +#define OCX_LINK_INTS 3 +#define OCX_INTS (OCX_LINK_INTS + 1) +#define OCX_RX_LANES 24 +#define OCX_RX_LANE_STATS 15 + +#define OCX_COM_INT 0x100 +#define OCX_COM_INT_W1S 0x108 +#define OCX_COM_INT_ENA_W1S 0x110 +#define OCX_COM_INT_ENA_W1C 0x118 + +#define OCX_COM_IO_BADID BIT(54) +#define OCX_COM_MEM_BADID BIT(53) +#define OCX_COM_COPR_BADID BIT(52) +#define OCX_COM_WIN_REQ_BADID BIT(51) +#define OCX_COM_WIN_REQ_TOUT BIT(50) +#define OCX_COM_RX_LANE GENMASK(23, 0) + +#define OCX_COM_INT_CE (OCX_COM_IO_BADID | \ + OCX_COM_MEM_BADID | \ + OCX_COM_COPR_BADID | \ + OCX_COM_WIN_REQ_BADID | \ + OCX_COM_WIN_REQ_TOUT) + +static const struct error_descr ocx_com_errors[] = { + { + .type = ERR_CORRECTED, + .mask = OCX_COM_IO_BADID, + .descr = "Invalid IO transaction node ID", + }, + { + .type = ERR_CORRECTED, + .mask = OCX_COM_MEM_BADID, + .descr = "Invalid memory transaction node ID", + }, + { + .type = ERR_CORRECTED, + .mask = OCX_COM_COPR_BADID, + .descr = "Invalid coprocessor transaction node ID", + }, + { + .type = ERR_CORRECTED, + .mask = OCX_COM_WIN_REQ_BADID, + .descr = "Invalid SLI transaction node ID", + }, + { + .type = ERR_CORRECTED, + .mask = OCX_COM_WIN_REQ_TOUT, + .descr = "Window/core request timeout", + }, + {0, 0, NULL}, +}; + +#define OCX_COM_LINKX_INT(x) (0x120 + (x) * 8) +#define OCX_COM_LINKX_INT_W1S(x) (0x140 + (x) * 8) +#define OCX_COM_LINKX_INT_ENA_W1S(x) (0x160 + (x) * 8) +#define OCX_COM_LINKX_INT_ENA_W1C(x) (0x180 + (x) * 8) + +#define OCX_COM_LINK_BAD_WORD BIT(13) +#define OCX_COM_LINK_ALIGN_FAIL BIT(12) +#define OCX_COM_LINK_ALIGN_DONE BIT(11) +#define OCX_COM_LINK_UP BIT(10) +#define OCX_COM_LINK_STOP BIT(9) +#define OCX_COM_LINK_BLK_ERR BIT(8) +#define OCX_COM_LINK_REINIT BIT(7) +#define OCX_COM_LINK_LNK_DATA BIT(6) +#define OCX_COM_LINK_RXFIFO_DBE BIT(5) +#define OCX_COM_LINK_RXFIFO_SBE BIT(4) +#define OCX_COM_LINK_TXFIFO_DBE BIT(3) +#define OCX_COM_LINK_TXFIFO_SBE BIT(2) +#define OCX_COM_LINK_REPLAY_DBE BIT(1) +#define OCX_COM_LINK_REPLAY_SBE BIT(0) + +static const struct error_descr ocx_com_link_errors[] = { + { + .type = ERR_CORRECTED, + .mask = OCX_COM_LINK_REPLAY_SBE, + .descr = "Replay buffer single-bit error", + }, + { + .type = ERR_CORRECTED, + .mask = OCX_COM_LINK_TXFIFO_SBE, + .descr = "TX FIFO single-bit error", + }, + { + .type = ERR_CORRECTED, + .mask = OCX_COM_LINK_RXFIFO_SBE, + .descr = "RX FIFO single-bit error", + }, + { + .type = ERR_CORRECTED, + .mask = OCX_COM_LINK_BLK_ERR, + .descr = "Block code error", + }, + { + .type = ERR_CORRECTED, + .mask = OCX_COM_LINK_ALIGN_FAIL, + .descr = "Link alignment failure", + }, + { + .type = ERR_CORRECTED, + .mask = OCX_COM_LINK_BAD_WORD, + .descr = "Bad code word", + }, + { + .type = ERR_UNCORRECTED, + .mask = OCX_COM_LINK_REPLAY_DBE, + .descr = "Replay buffer double-bit error", + }, + { + .type = ERR_UNCORRECTED, + .mask = OCX_COM_LINK_TXFIFO_DBE, + .descr = "TX FIFO double-bit error", + }, + { + .type = ERR_UNCORRECTED, + .mask = OCX_COM_LINK_RXFIFO_DBE, + .descr = "RX FIFO double-bit error", + }, + { + .type = ERR_UNCORRECTED, + .mask = OCX_COM_LINK_STOP, + .descr = "Link stopped", + }, + {0, 0, NULL}, +}; + +#define OCX_COM_LINK_INT_UE (OCX_COM_LINK_REPLAY_DBE | \ + OCX_COM_LINK_TXFIFO_DBE | \ + OCX_COM_LINK_RXFIFO_DBE | \ + OCX_COM_LINK_STOP) + +#define OCX_COM_LINK_INT_CE (OCX_COM_LINK_REPLAY_SBE | \ + OCX_COM_LINK_TXFIFO_SBE | \ + OCX_COM_LINK_RXFIFO_SBE | \ + OCX_COM_LINK_BLK_ERR | \ + OCX_COM_LINK_ALIGN_FAIL | \ + OCX_COM_LINK_BAD_WORD) + +#define OCX_LNE_INT(x) (0x8018 + (x) * 0x100) +#define OCX_LNE_INT_EN(x) (0x8020 + (x) * 0x100) +#define OCX_LNE_BAD_CNT(x) (0x8028 + (x) * 0x100) +#define OCX_LNE_CFG(x) (0x8000 + (x) * 0x100) +#define OCX_LNE_STAT(x, y) (0x8040 + (x) * 0x100 + (y) * 8) + +#define OCX_LNE_CFG_RX_BDRY_LOCK_DIS BIT(8) +#define OCX_LNE_CFG_RX_STAT_WRAP_DIS BIT(2) +#define OCX_LNE_CFG_RX_STAT_RDCLR BIT(1) +#define OCX_LNE_CFG_RX_STAT_ENA BIT(0) + + +#define OCX_LANE_BAD_64B67B BIT(8) +#define OCX_LANE_DSKEW_FIFO_OVFL BIT(5) +#define OCX_LANE_SCRM_SYNC_LOSS BIT(4) +#define OCX_LANE_UKWN_CNTL_WORD BIT(3) +#define OCX_LANE_CRC32_ERR BIT(2) +#define OCX_LANE_BDRY_SYNC_LOSS BIT(1) +#define OCX_LANE_SERDES_LOCK_LOSS BIT(0) + +#define OCX_COM_LANE_INT_UE (0) +#define OCX_COM_LANE_INT_CE (OCX_LANE_SERDES_LOCK_LOSS | \ + OCX_LANE_BDRY_SYNC_LOSS | \ + OCX_LANE_CRC32_ERR | \ + OCX_LANE_UKWN_CNTL_WORD | \ + OCX_LANE_SCRM_SYNC_LOSS | \ + OCX_LANE_DSKEW_FIFO_OVFL | \ + OCX_LANE_BAD_64B67B) + +static const struct error_descr ocx_lane_errors[] = { + { + .type = ERR_CORRECTED, + .mask = OCX_LANE_SERDES_LOCK_LOSS, + .descr = "RX SerDes lock lost", + }, + { + .type = ERR_CORRECTED, + .mask = OCX_LANE_BDRY_SYNC_LOSS, + .descr = "RX word boundary lost", + }, + { + .type = ERR_CORRECTED, + .mask = OCX_LANE_CRC32_ERR, + .descr = "CRC32 error", + }, + { + .type = ERR_CORRECTED, + .mask = OCX_LANE_UKWN_CNTL_WORD, + .descr = "Unknown control word", + }, + { + .type = ERR_CORRECTED, + .mask = OCX_LANE_SCRM_SYNC_LOSS, + .descr = "Scrambler synchronization lost", + }, + { + .type = ERR_CORRECTED, + .mask = OCX_LANE_DSKEW_FIFO_OVFL, + .descr = "RX deskew FIFO overflow", + }, + { + .type = ERR_CORRECTED, + .mask = OCX_LANE_BAD_64B67B, + .descr = "Bad 64B/67B codeword", + }, + {0, 0, NULL}, +}; + +#define OCX_LNE_INT_ENA_ALL (GENMASK(9, 8) | GENMASK(6, 0)) +#define OCX_COM_INT_ENA_ALL (GENMASK(54, 50) | GENMASK(23, 0)) +#define OCX_COM_LINKX_INT_ENA_ALL (GENMASK(13, 12) | \ + GENMASK(9, 7) | GENMASK(5, 0)) + +#define OCX_TLKX_ECC_CTL(x) (0x10018 + (x) * 0x2000) +#define OCX_RLKX_ECC_CTL(x) (0x18018 + (x) * 0x2000) + +struct ocx_com_err_ctx { + u64 reg_com_int; + u64 reg_lane_int[OCX_RX_LANES]; + u64 reg_lane_stat11[OCX_RX_LANES]; +}; + +struct ocx_link_err_ctx { + u64 reg_com_link_int; + int link; +}; + +struct thunderx_ocx { + void __iomem *regs; + int com_link; + struct pci_dev *pdev; + struct edac_device_ctl_info *edac_dev; + + struct dentry *debugfs; + struct msix_entry msix_ent[OCX_INTS]; + + struct ocx_com_err_ctx com_err_ctx[RING_ENTRIES]; + struct ocx_link_err_ctx link_err_ctx[RING_ENTRIES]; + + unsigned long com_ring_head; + unsigned long com_ring_tail; + + unsigned long link_ring_head; + unsigned long link_ring_tail; +}; + +#define OCX_MESSAGE_SIZE SZ_1K +#define OCX_OTHER_SIZE (50 * ARRAY_SIZE(ocx_com_link_errors)) + +/* This handler is threaded */ +static irqreturn_t thunderx_ocx_com_isr(int irq, void *irq_id) +{ + struct msix_entry *msix = irq_id; + struct thunderx_ocx *ocx = container_of(msix, struct thunderx_ocx, + msix_ent[msix->entry]); + + int lane; + unsigned long head = ring_pos(ocx->com_ring_head, + ARRAY_SIZE(ocx->com_err_ctx)); + struct ocx_com_err_ctx *ctx = &ocx->com_err_ctx[head]; + + ctx->reg_com_int = readq(ocx->regs + OCX_COM_INT); + + for (lane = 0; lane < OCX_RX_LANES; lane++) { + ctx->reg_lane_int[lane] = + readq(ocx->regs + OCX_LNE_INT(lane)); + ctx->reg_lane_stat11[lane] = + readq(ocx->regs + OCX_LNE_STAT(lane, 11)); + + writeq(ctx->reg_lane_int[lane], ocx->regs + OCX_LNE_INT(lane)); + } + + writeq(ctx->reg_com_int, ocx->regs + OCX_COM_INT); + + ocx->com_ring_head++; + + return IRQ_WAKE_THREAD; +} + +static irqreturn_t thunderx_ocx_com_threaded_isr(int irq, void *irq_id) +{ + struct msix_entry *msix = irq_id; + struct thunderx_ocx *ocx = container_of(msix, struct thunderx_ocx, + msix_ent[msix->entry]); + + irqreturn_t ret = IRQ_NONE; + + unsigned long tail; + struct ocx_com_err_ctx *ctx; + int lane; + char *msg; + char *other; + + msg = kmalloc(OCX_MESSAGE_SIZE, GFP_KERNEL); + other = kmalloc(OCX_OTHER_SIZE, GFP_KERNEL); + + if (!msg || !other) + goto err_free; + + while (CIRC_CNT(ocx->com_ring_head, ocx->com_ring_tail, + ARRAY_SIZE(ocx->com_err_ctx))) { + tail = ring_pos(ocx->com_ring_tail, + ARRAY_SIZE(ocx->com_err_ctx)); + ctx = &ocx->com_err_ctx[tail]; + + snprintf(msg, OCX_MESSAGE_SIZE, "%s: OCX_COM_INT: %016llx", + ocx->edac_dev->ctl_name, ctx->reg_com_int); + + decode_register(other, OCX_OTHER_SIZE, + ocx_com_errors, ctx->reg_com_int); + + strncat(msg, other, OCX_MESSAGE_SIZE); + + for (lane = 0; lane < OCX_RX_LANES; lane++) + if (ctx->reg_com_int & BIT(lane)) { + snprintf(other, OCX_OTHER_SIZE, + "\n\tOCX_LNE_INT[%02d]: %016llx OCX_LNE_STAT11[%02d]: %016llx", + lane, ctx->reg_lane_int[lane], + lane, ctx->reg_lane_stat11[lane]); + + strncat(msg, other, OCX_MESSAGE_SIZE); + + decode_register(other, OCX_OTHER_SIZE, + ocx_lane_errors, + ctx->reg_lane_int[lane]); + strncat(msg, other, OCX_MESSAGE_SIZE); + } + + if (ctx->reg_com_int & OCX_COM_INT_CE) + edac_device_handle_ce(ocx->edac_dev, 0, 0, msg); + + ocx->com_ring_tail++; + } + + ret = IRQ_HANDLED; + +err_free: + kfree(other); + kfree(msg); + + return ret; +} + +static irqreturn_t thunderx_ocx_lnk_isr(int irq, void *irq_id) +{ + struct msix_entry *msix = irq_id; + struct thunderx_ocx *ocx = container_of(msix, struct thunderx_ocx, + msix_ent[msix->entry]); + unsigned long head = ring_pos(ocx->link_ring_head, + ARRAY_SIZE(ocx->link_err_ctx)); + struct ocx_link_err_ctx *ctx = &ocx->link_err_ctx[head]; + + ctx->link = msix->entry; + ctx->reg_com_link_int = readq(ocx->regs + OCX_COM_LINKX_INT(ctx->link)); + + writeq(ctx->reg_com_link_int, ocx->regs + OCX_COM_LINKX_INT(ctx->link)); + + ocx->link_ring_head++; + + return IRQ_WAKE_THREAD; +} + +static irqreturn_t thunderx_ocx_lnk_threaded_isr(int irq, void *irq_id) +{ + struct msix_entry *msix = irq_id; + struct thunderx_ocx *ocx = container_of(msix, struct thunderx_ocx, + msix_ent[msix->entry]); + irqreturn_t ret = IRQ_NONE; + unsigned long tail; + struct ocx_link_err_ctx *ctx; + + char *msg; + char *other; + + msg = kmalloc(OCX_MESSAGE_SIZE, GFP_KERNEL); + other = kmalloc(OCX_OTHER_SIZE, GFP_KERNEL); + + if (!msg || !other) + goto err_free; + + while (CIRC_CNT(ocx->link_ring_head, ocx->link_ring_tail, + ARRAY_SIZE(ocx->link_err_ctx))) { + tail = ring_pos(ocx->link_ring_head, + ARRAY_SIZE(ocx->link_err_ctx)); + + ctx = &ocx->link_err_ctx[tail]; + + snprintf(msg, OCX_MESSAGE_SIZE, + "%s: OCX_COM_LINK_INT[%d]: %016llx", + ocx->edac_dev->ctl_name, + ctx->link, ctx->reg_com_link_int); + + decode_register(other, OCX_OTHER_SIZE, + ocx_com_link_errors, ctx->reg_com_link_int); + + strncat(msg, other, OCX_MESSAGE_SIZE); + + if (ctx->reg_com_link_int & OCX_COM_LINK_INT_UE) + edac_device_handle_ue(ocx->edac_dev, 0, 0, msg); + else if (ctx->reg_com_link_int & OCX_COM_LINK_INT_CE) + edac_device_handle_ce(ocx->edac_dev, 0, 0, msg); + + ocx->link_ring_tail++; + } + + ret = IRQ_HANDLED; +err_free: + kfree(other); + kfree(msg); + + return ret; +} + +#define OCX_DEBUGFS_ATTR(_name, _reg) DEBUGFS_REG_ATTR(ocx, _name, _reg) + +OCX_DEBUGFS_ATTR(tlk0_ecc_ctl, OCX_TLKX_ECC_CTL(0)); +OCX_DEBUGFS_ATTR(tlk1_ecc_ctl, OCX_TLKX_ECC_CTL(1)); +OCX_DEBUGFS_ATTR(tlk2_ecc_ctl, OCX_TLKX_ECC_CTL(2)); + +OCX_DEBUGFS_ATTR(rlk0_ecc_ctl, OCX_RLKX_ECC_CTL(0)); +OCX_DEBUGFS_ATTR(rlk1_ecc_ctl, OCX_RLKX_ECC_CTL(1)); +OCX_DEBUGFS_ATTR(rlk2_ecc_ctl, OCX_RLKX_ECC_CTL(2)); + +OCX_DEBUGFS_ATTR(com_link0_int, OCX_COM_LINKX_INT_W1S(0)); +OCX_DEBUGFS_ATTR(com_link1_int, OCX_COM_LINKX_INT_W1S(1)); +OCX_DEBUGFS_ATTR(com_link2_int, OCX_COM_LINKX_INT_W1S(2)); + +OCX_DEBUGFS_ATTR(lne00_badcnt, OCX_LNE_BAD_CNT(0)); +OCX_DEBUGFS_ATTR(lne01_badcnt, OCX_LNE_BAD_CNT(1)); +OCX_DEBUGFS_ATTR(lne02_badcnt, OCX_LNE_BAD_CNT(2)); +OCX_DEBUGFS_ATTR(lne03_badcnt, OCX_LNE_BAD_CNT(3)); +OCX_DEBUGFS_ATTR(lne04_badcnt, OCX_LNE_BAD_CNT(4)); +OCX_DEBUGFS_ATTR(lne05_badcnt, OCX_LNE_BAD_CNT(5)); +OCX_DEBUGFS_ATTR(lne06_badcnt, OCX_LNE_BAD_CNT(6)); +OCX_DEBUGFS_ATTR(lne07_badcnt, OCX_LNE_BAD_CNT(7)); + +OCX_DEBUGFS_ATTR(lne08_badcnt, OCX_LNE_BAD_CNT(8)); +OCX_DEBUGFS_ATTR(lne09_badcnt, OCX_LNE_BAD_CNT(9)); +OCX_DEBUGFS_ATTR(lne10_badcnt, OCX_LNE_BAD_CNT(10)); +OCX_DEBUGFS_ATTR(lne11_badcnt, OCX_LNE_BAD_CNT(11)); +OCX_DEBUGFS_ATTR(lne12_badcnt, OCX_LNE_BAD_CNT(12)); +OCX_DEBUGFS_ATTR(lne13_badcnt, OCX_LNE_BAD_CNT(13)); +OCX_DEBUGFS_ATTR(lne14_badcnt, OCX_LNE_BAD_CNT(14)); +OCX_DEBUGFS_ATTR(lne15_badcnt, OCX_LNE_BAD_CNT(15)); + +OCX_DEBUGFS_ATTR(lne16_badcnt, OCX_LNE_BAD_CNT(16)); +OCX_DEBUGFS_ATTR(lne17_badcnt, OCX_LNE_BAD_CNT(17)); +OCX_DEBUGFS_ATTR(lne18_badcnt, OCX_LNE_BAD_CNT(18)); +OCX_DEBUGFS_ATTR(lne19_badcnt, OCX_LNE_BAD_CNT(19)); +OCX_DEBUGFS_ATTR(lne20_badcnt, OCX_LNE_BAD_CNT(20)); +OCX_DEBUGFS_ATTR(lne21_badcnt, OCX_LNE_BAD_CNT(21)); +OCX_DEBUGFS_ATTR(lne22_badcnt, OCX_LNE_BAD_CNT(22)); +OCX_DEBUGFS_ATTR(lne23_badcnt, OCX_LNE_BAD_CNT(23)); + +OCX_DEBUGFS_ATTR(com_int, OCX_COM_INT_W1S); + +struct debugfs_entry *ocx_dfs_ents[] = { + &debugfs_tlk0_ecc_ctl, + &debugfs_tlk1_ecc_ctl, + &debugfs_tlk2_ecc_ctl, + + &debugfs_rlk0_ecc_ctl, + &debugfs_rlk1_ecc_ctl, + &debugfs_rlk2_ecc_ctl, + + &debugfs_com_link0_int, + &debugfs_com_link1_int, + &debugfs_com_link2_int, + + &debugfs_lne00_badcnt, + &debugfs_lne01_badcnt, + &debugfs_lne02_badcnt, + &debugfs_lne03_badcnt, + &debugfs_lne04_badcnt, + &debugfs_lne05_badcnt, + &debugfs_lne06_badcnt, + &debugfs_lne07_badcnt, + &debugfs_lne08_badcnt, + &debugfs_lne09_badcnt, + &debugfs_lne10_badcnt, + &debugfs_lne11_badcnt, + &debugfs_lne12_badcnt, + &debugfs_lne13_badcnt, + &debugfs_lne14_badcnt, + &debugfs_lne15_badcnt, + &debugfs_lne16_badcnt, + &debugfs_lne17_badcnt, + &debugfs_lne18_badcnt, + &debugfs_lne19_badcnt, + &debugfs_lne20_badcnt, + &debugfs_lne21_badcnt, + &debugfs_lne22_badcnt, + &debugfs_lne23_badcnt, + + &debugfs_com_int, +}; + +static const struct pci_device_id thunderx_ocx_pci_tbl[] = { + { PCI_DEVICE(PCI_VENDOR_ID_CAVIUM, PCI_DEVICE_ID_THUNDER_OCX) }, + { 0, }, +}; + +static void thunderx_ocx_clearstats(struct thunderx_ocx *ocx) +{ + int lane, stat, cfg; + + for (lane = 0; lane < OCX_RX_LANES; lane++) { + cfg = readq(ocx->regs + OCX_LNE_CFG(lane)); + cfg |= OCX_LNE_CFG_RX_STAT_RDCLR; + cfg &= ~OCX_LNE_CFG_RX_STAT_ENA; + writeq(cfg, ocx->regs + OCX_LNE_CFG(lane)); + + for (stat = 0; stat < OCX_RX_LANE_STATS; stat++) + readq(ocx->regs + OCX_LNE_STAT(lane, stat)); + } +} + +static int thunderx_ocx_probe(struct pci_dev *pdev, + const struct pci_device_id *id) +{ + struct thunderx_ocx *ocx; + struct edac_device_ctl_info *edac_dev; + char name[32]; + int idx; + int i; + int ret; + u64 reg; + + ret = pcim_enable_device(pdev); + if (ret) { + dev_err(&pdev->dev, "Cannot enable PCI device: %d\n", ret); + return ret; + } + + ret = pcim_iomap_regions(pdev, BIT(0), "thunderx_ocx"); + if (ret) { + dev_err(&pdev->dev, "Cannot map PCI resources: %d\n", ret); + return ret; + } + + idx = edac_device_alloc_index(); + snprintf(name, sizeof(name), "OCX%d", idx); + edac_dev = edac_device_alloc_ctl_info(sizeof(struct thunderx_ocx), + name, 1, "CCPI", 1, + 0, NULL, 0, idx); + if (!edac_dev) { + dev_err(&pdev->dev, "Cannot allocate EDAC device: %d\n", ret); + return -ENOMEM; + } + ocx = edac_dev->pvt_info; + ocx->edac_dev = edac_dev; + ocx->com_ring_head = 0; + ocx->com_ring_tail = 0; + ocx->link_ring_head = 0; + ocx->link_ring_tail = 0; + + ocx->regs = pcim_iomap_table(pdev)[0]; + if (!ocx->regs) { + dev_err(&pdev->dev, "Cannot map PCI resources: %d\n", ret); + ret = -ENODEV; + goto err_free; + } + + ocx->pdev = pdev; + + for (i = 0; i < OCX_INTS; i++) { + ocx->msix_ent[i].entry = i; + ocx->msix_ent[i].vector = 0; + } + + ret = pci_enable_msix_exact(pdev, ocx->msix_ent, OCX_INTS); + if (ret) { + dev_err(&pdev->dev, "Cannot enable interrupt: %d\n", ret); + goto err_free; + } + + for (i = 0; i < OCX_INTS; i++) { + ret = devm_request_threaded_irq(&pdev->dev, + ocx->msix_ent[i].vector, + (i == 3) ? + thunderx_ocx_com_isr : + thunderx_ocx_lnk_isr, + (i == 3) ? + thunderx_ocx_com_threaded_isr : + thunderx_ocx_lnk_threaded_isr, + 0, "[EDAC] ThunderX OCX", + &ocx->msix_ent[i]); + if (ret) + goto err_free; + } + + edac_dev->dev = &pdev->dev; + edac_dev->dev_name = dev_name(&pdev->dev); + edac_dev->mod_name = "thunderx-ocx"; + edac_dev->ctl_name = "thunderx-ocx"; + + ret = edac_device_add_device(edac_dev); + if (ret) { + dev_err(&pdev->dev, "Cannot add EDAC device: %d\n", ret); + goto err_free; + } + + if (IS_ENABLED(CONFIG_EDAC_DEBUG)) { + ocx->debugfs = edac_debugfs_create_dir(pdev->dev.kobj.name); + + ret = thunderx_create_debugfs_nodes(ocx->debugfs, + ocx_dfs_ents, + ocx, + ARRAY_SIZE(ocx_dfs_ents)); + if (ret != ARRAY_SIZE(ocx_dfs_ents)) { + dev_warn(&pdev->dev, "Error creating debugfs entries: %d%s\n", + ret, ret >= 0 ? " created" : ""); + } + } + + pci_set_drvdata(pdev, edac_dev); + + thunderx_ocx_clearstats(ocx); + + for (i = 0; i < OCX_RX_LANES; i++) { + writeq(OCX_LNE_INT_ENA_ALL, + ocx->regs + OCX_LNE_INT_EN(i)); + + reg = readq(ocx->regs + OCX_LNE_INT(i)); + writeq(reg, ocx->regs + OCX_LNE_INT(i)); + + } + + for (i = 0; i < OCX_LINK_INTS; i++) { + reg = readq(ocx->regs + OCX_COM_LINKX_INT(i)); + writeq(reg, ocx->regs + OCX_COM_LINKX_INT(i)); + + writeq(OCX_COM_LINKX_INT_ENA_ALL, + ocx->regs + OCX_COM_LINKX_INT_ENA_W1S(i)); + } + + reg = readq(ocx->regs + OCX_COM_INT); + writeq(reg, ocx->regs + OCX_COM_INT); + + writeq(OCX_COM_INT_ENA_ALL, ocx->regs + OCX_COM_INT_ENA_W1S); + + return 0; +err_free: + edac_device_free_ctl_info(edac_dev); + + return ret; +} + +static void thunderx_ocx_remove(struct pci_dev *pdev) +{ + struct edac_device_ctl_info *edac_dev = pci_get_drvdata(pdev); + struct thunderx_ocx *ocx = edac_dev->pvt_info; + int i; + + writeq(OCX_COM_INT_ENA_ALL, ocx->regs + OCX_COM_INT_ENA_W1C); + + for (i = 0; i < OCX_INTS; i++) { + writeq(OCX_COM_LINKX_INT_ENA_ALL, + ocx->regs + OCX_COM_LINKX_INT_ENA_W1C(i)); + } + + edac_debugfs_remove_recursive(ocx->debugfs); + + edac_device_del_device(&pdev->dev); + edac_device_free_ctl_info(edac_dev); +} + +MODULE_DEVICE_TABLE(pci, thunderx_ocx_pci_tbl); + +static struct pci_driver thunderx_ocx_driver = { + .name = "thunderx_ocx_edac", + .probe = thunderx_ocx_probe, + .remove = thunderx_ocx_remove, + .id_table = thunderx_ocx_pci_tbl, +}; + +/*---------------------- L2C driver ---------------------------------*/ + +#define PCI_DEVICE_ID_THUNDER_L2C_TAD 0xa02e +#define PCI_DEVICE_ID_THUNDER_L2C_CBC 0xa02f +#define PCI_DEVICE_ID_THUNDER_L2C_MCI 0xa030 + +#define L2C_TAD_INT_W1C 0x40000 +#define L2C_TAD_INT_W1S 0x40008 + +#define L2C_TAD_INT_ENA_W1C 0x40020 +#define L2C_TAD_INT_ENA_W1S 0x40028 + + +#define L2C_TAD_INT_L2DDBE BIT(1) +#define L2C_TAD_INT_SBFSBE BIT(2) +#define L2C_TAD_INT_SBFDBE BIT(3) +#define L2C_TAD_INT_FBFSBE BIT(4) +#define L2C_TAD_INT_FBFDBE BIT(5) +#define L2C_TAD_INT_TAGDBE BIT(9) +#define L2C_TAD_INT_RDDISLMC BIT(15) +#define L2C_TAD_INT_WRDISLMC BIT(16) +#define L2C_TAD_INT_LFBTO BIT(17) +#define L2C_TAD_INT_GSYNCTO BIT(18) +#define L2C_TAD_INT_RTGSBE BIT(32) +#define L2C_TAD_INT_RTGDBE BIT(33) +#define L2C_TAD_INT_RDDISOCI BIT(34) +#define L2C_TAD_INT_WRDISOCI BIT(35) + +#define L2C_TAD_INT_ECC (L2C_TAD_INT_L2DDBE | \ + L2C_TAD_INT_SBFSBE | L2C_TAD_INT_SBFDBE | \ + L2C_TAD_INT_FBFSBE | L2C_TAD_INT_FBFDBE) + +#define L2C_TAD_INT_CE (L2C_TAD_INT_SBFSBE | \ + L2C_TAD_INT_FBFSBE) + +#define L2C_TAD_INT_UE (L2C_TAD_INT_L2DDBE | \ + L2C_TAD_INT_SBFDBE | \ + L2C_TAD_INT_FBFDBE | \ + L2C_TAD_INT_TAGDBE | \ + L2C_TAD_INT_RTGDBE | \ + L2C_TAD_INT_WRDISOCI | \ + L2C_TAD_INT_RDDISOCI | \ + L2C_TAD_INT_WRDISLMC | \ + L2C_TAD_INT_RDDISLMC | \ + L2C_TAD_INT_LFBTO | \ + L2C_TAD_INT_GSYNCTO) + +static const struct error_descr l2_tad_errors[] = { + { + .type = ERR_CORRECTED, + .mask = L2C_TAD_INT_SBFSBE, + .descr = "SBF single-bit error", + }, + { + .type = ERR_CORRECTED, + .mask = L2C_TAD_INT_FBFSBE, + .descr = "FBF single-bit error", + }, + { + .type = ERR_UNCORRECTED, + .mask = L2C_TAD_INT_L2DDBE, + .descr = "L2D double-bit error", + }, + { + .type = ERR_UNCORRECTED, + .mask = L2C_TAD_INT_SBFDBE, + .descr = "SBF double-bit error", + }, + { + .type = ERR_UNCORRECTED, + .mask = L2C_TAD_INT_FBFDBE, + .descr = "FBF double-bit error", + }, + { + .type = ERR_UNCORRECTED, + .mask = L2C_TAD_INT_TAGDBE, + .descr = "TAG double-bit error", + }, + { + .type = ERR_UNCORRECTED, + .mask = L2C_TAD_INT_RTGDBE, + .descr = "RTG double-bit error", + }, + { + .type = ERR_UNCORRECTED, + .mask = L2C_TAD_INT_WRDISOCI, + .descr = "Write to a disabled CCPI", + }, + { + .type = ERR_UNCORRECTED, + .mask = L2C_TAD_INT_RDDISOCI, + .descr = "Read from a disabled CCPI", + }, + { + .type = ERR_UNCORRECTED, + .mask = L2C_TAD_INT_WRDISLMC, + .descr = "Write to a disabled LMC", + }, + { + .type = ERR_UNCORRECTED, + .mask = L2C_TAD_INT_RDDISLMC, + .descr = "Read from a disabled LMC", + }, + { + .type = ERR_UNCORRECTED, + .mask = L2C_TAD_INT_LFBTO, + .descr = "LFB entry timeout", + }, + { + .type = ERR_UNCORRECTED, + .mask = L2C_TAD_INT_GSYNCTO, + .descr = "Global sync CCPI timeout", + }, + {0, 0, NULL}, +}; + +#define L2C_TAD_INT_TAG (L2C_TAD_INT_TAGDBE) + +#define L2C_TAD_INT_RTG (L2C_TAD_INT_RTGDBE) + +#define L2C_TAD_INT_DISLMC (L2C_TAD_INT_WRDISLMC | L2C_TAD_INT_RDDISLMC) + +#define L2C_TAD_INT_DISOCI (L2C_TAD_INT_WRDISOCI | L2C_TAD_INT_RDDISOCI) + +#define L2C_TAD_INT_ENA_ALL (L2C_TAD_INT_ECC | L2C_TAD_INT_TAG | \ + L2C_TAD_INT_RTG | \ + L2C_TAD_INT_DISLMC | L2C_TAD_INT_DISOCI | \ + L2C_TAD_INT_LFBTO) + +#define L2C_TAD_TIMETWO 0x50000 +#define L2C_TAD_TIMEOUT 0x50100 +#define L2C_TAD_ERR 0x60000 +#define L2C_TAD_TQD_ERR 0x60100 +#define L2C_TAD_TTG_ERR 0x60200 + + +#define L2C_CBC_INT_W1C 0x60000 + +#define L2C_CBC_INT_RSDSBE BIT(0) +#define L2C_CBC_INT_RSDDBE BIT(1) + +#define L2C_CBC_INT_RSD (L2C_CBC_INT_RSDSBE | L2C_CBC_INT_RSDDBE) + +#define L2C_CBC_INT_MIBSBE BIT(4) +#define L2C_CBC_INT_MIBDBE BIT(5) + +#define L2C_CBC_INT_MIB (L2C_CBC_INT_MIBSBE | L2C_CBC_INT_MIBDBE) + +#define L2C_CBC_INT_IORDDISOCI BIT(6) +#define L2C_CBC_INT_IOWRDISOCI BIT(7) + +#define L2C_CBC_INT_IODISOCI (L2C_CBC_INT_IORDDISOCI | \ + L2C_CBC_INT_IOWRDISOCI) + +#define L2C_CBC_INT_CE (L2C_CBC_INT_RSDSBE | L2C_CBC_INT_MIBSBE) +#define L2C_CBC_INT_UE (L2C_CBC_INT_RSDDBE | L2C_CBC_INT_MIBDBE) + + +static const struct error_descr l2_cbc_errors[] = { + { + .type = ERR_CORRECTED, + .mask = L2C_CBC_INT_RSDSBE, + .descr = "RSD single-bit error", + }, + { + .type = ERR_CORRECTED, + .mask = L2C_CBC_INT_MIBSBE, + .descr = "MIB single-bit error", + }, + { + .type = ERR_UNCORRECTED, + .mask = L2C_CBC_INT_RSDDBE, + .descr = "RSD double-bit error", + }, + { + .type = ERR_UNCORRECTED, + .mask = L2C_CBC_INT_MIBDBE, + .descr = "MIB double-bit error", + }, + { + .type = ERR_UNCORRECTED, + .mask = L2C_CBC_INT_IORDDISOCI, + .descr = "Read from a disabled CCPI", + }, + { + .type = ERR_UNCORRECTED, + .mask = L2C_CBC_INT_IOWRDISOCI, + .descr = "Write to a disabled CCPI", + }, + {0, 0, NULL}, +}; + +#define L2C_CBC_INT_W1S 0x60008 +#define L2C_CBC_INT_ENA_W1C 0x60020 + +#define L2C_CBC_INT_ENA_ALL (L2C_CBC_INT_RSD | L2C_CBC_INT_MIB | \ + L2C_CBC_INT_IODISOCI) + +#define L2C_CBC_INT_ENA_W1S 0x60028 + +#define L2C_CBC_IODISOCIERR 0x80008 +#define L2C_CBC_IOCERR 0x80010 +#define L2C_CBC_RSDERR 0x80018 +#define L2C_CBC_MIBERR 0x80020 + + +#define L2C_MCI_INT_W1C 0x0 + +#define L2C_MCI_INT_VBFSBE BIT(0) +#define L2C_MCI_INT_VBFDBE BIT(1) + +static const struct error_descr l2_mci_errors[] = { + { + .type = ERR_CORRECTED, + .mask = L2C_MCI_INT_VBFSBE, + .descr = "VBF single-bit error", + }, + { + .type = ERR_UNCORRECTED, + .mask = L2C_MCI_INT_VBFDBE, + .descr = "VBF double-bit error", + }, + {0, 0, NULL}, +}; + +#define L2C_MCI_INT_W1S 0x8 +#define L2C_MCI_INT_ENA_W1C 0x20 + +#define L2C_MCI_INT_ENA_ALL (L2C_MCI_INT_VBFSBE | L2C_MCI_INT_VBFDBE) + +#define L2C_MCI_INT_ENA_W1S 0x28 + +#define L2C_MCI_ERR 0x10000 + +#define L2C_MESSAGE_SIZE SZ_1K +#define L2C_OTHER_SIZE (50 * ARRAY_SIZE(l2_tad_errors)) + +struct l2c_err_ctx { + char *reg_ext_name; + u64 reg_int; + u64 reg_ext; +}; + +struct thunderx_l2c { + void __iomem *regs; + struct pci_dev *pdev; + struct edac_device_ctl_info *edac_dev; + + struct dentry *debugfs; + + int index; + + struct msix_entry msix_ent; + + struct l2c_err_ctx err_ctx[RING_ENTRIES]; + unsigned long ring_head; + unsigned long ring_tail; +}; + +static irqreturn_t thunderx_l2c_tad_isr(int irq, void *irq_id) +{ + struct msix_entry *msix = irq_id; + struct thunderx_l2c *tad = container_of(msix, struct thunderx_l2c, + msix_ent); + + unsigned long head = ring_pos(tad->ring_head, ARRAY_SIZE(tad->err_ctx)); + struct l2c_err_ctx *ctx = &tad->err_ctx[head]; + + ctx->reg_int = readq(tad->regs + L2C_TAD_INT_W1C); + + if (ctx->reg_int & L2C_TAD_INT_ECC) { + ctx->reg_ext_name = "TQD_ERR"; + ctx->reg_ext = readq(tad->regs + L2C_TAD_TQD_ERR); + } else if (ctx->reg_int & L2C_TAD_INT_TAG) { + ctx->reg_ext_name = "TTG_ERR"; + ctx->reg_ext = readq(tad->regs + L2C_TAD_TTG_ERR); + } else if (ctx->reg_int & L2C_TAD_INT_LFBTO) { + ctx->reg_ext_name = "TIMEOUT"; + ctx->reg_ext = readq(tad->regs + L2C_TAD_TIMEOUT); + } else if (ctx->reg_int & L2C_TAD_INT_DISOCI) { + ctx->reg_ext_name = "ERR"; + ctx->reg_ext = readq(tad->regs + L2C_TAD_ERR); + } + + writeq(ctx->reg_int, tad->regs + L2C_TAD_INT_W1C); + + tad->ring_head++; + + return IRQ_WAKE_THREAD; +} + +static irqreturn_t thunderx_l2c_cbc_isr(int irq, void *irq_id) +{ + struct msix_entry *msix = irq_id; + struct thunderx_l2c *cbc = container_of(msix, struct thunderx_l2c, + msix_ent); + + unsigned long head = ring_pos(cbc->ring_head, ARRAY_SIZE(cbc->err_ctx)); + struct l2c_err_ctx *ctx = &cbc->err_ctx[head]; + + ctx->reg_int = readq(cbc->regs + L2C_CBC_INT_W1C); + + if (ctx->reg_int & L2C_CBC_INT_RSD) { + ctx->reg_ext_name = "RSDERR"; + ctx->reg_ext = readq(cbc->regs + L2C_CBC_RSDERR); + } else if (ctx->reg_int & L2C_CBC_INT_MIB) { + ctx->reg_ext_name = "MIBERR"; + ctx->reg_ext = readq(cbc->regs + L2C_CBC_MIBERR); + } else if (ctx->reg_int & L2C_CBC_INT_IODISOCI) { + ctx->reg_ext_name = "IODISOCIERR"; + ctx->reg_ext = readq(cbc->regs + L2C_CBC_IODISOCIERR); + } + + writeq(ctx->reg_int, cbc->regs + L2C_CBC_INT_W1C); + + cbc->ring_head++; + + return IRQ_WAKE_THREAD; +} + +static irqreturn_t thunderx_l2c_mci_isr(int irq, void *irq_id) +{ + struct msix_entry *msix = irq_id; + struct thunderx_l2c *mci = container_of(msix, struct thunderx_l2c, + msix_ent); + + unsigned long head = ring_pos(mci->ring_head, ARRAY_SIZE(mci->err_ctx)); + struct l2c_err_ctx *ctx = &mci->err_ctx[head]; + + ctx->reg_int = readq(mci->regs + L2C_MCI_INT_W1C); + ctx->reg_ext = readq(mci->regs + L2C_MCI_ERR); + + writeq(ctx->reg_int, mci->regs + L2C_MCI_INT_W1C); + + ctx->reg_ext_name = "ERR"; + + mci->ring_head++; + + return IRQ_WAKE_THREAD; +} + +static irqreturn_t thunderx_l2c_threaded_isr(int irq, void *irq_id) +{ + struct msix_entry *msix = irq_id; + struct thunderx_l2c *l2c = container_of(msix, struct thunderx_l2c, + msix_ent); + + unsigned long tail = ring_pos(l2c->ring_tail, ARRAY_SIZE(l2c->err_ctx)); + struct l2c_err_ctx *ctx = &l2c->err_ctx[tail]; + irqreturn_t ret = IRQ_NONE; + + u64 mask_ue, mask_ce; + const struct error_descr *l2_errors; + char *reg_int_name; + + char *msg; + char *other; + + msg = kmalloc(OCX_MESSAGE_SIZE, GFP_KERNEL); + other = kmalloc(OCX_OTHER_SIZE, GFP_KERNEL); + + if (!msg || !other) + goto err_free; + + switch (l2c->pdev->device) { + case PCI_DEVICE_ID_THUNDER_L2C_TAD: + reg_int_name = "L2C_TAD_INT"; + mask_ue = L2C_TAD_INT_UE; + mask_ce = L2C_TAD_INT_CE; + l2_errors = l2_tad_errors; + break; + case PCI_DEVICE_ID_THUNDER_L2C_CBC: + reg_int_name = "L2C_CBC_INT"; + mask_ue = L2C_CBC_INT_UE; + mask_ce = L2C_CBC_INT_CE; + l2_errors = l2_cbc_errors; + break; + case PCI_DEVICE_ID_THUNDER_L2C_MCI: + reg_int_name = "L2C_MCI_INT"; + mask_ue = L2C_MCI_INT_VBFDBE; + mask_ce = L2C_MCI_INT_VBFSBE; + l2_errors = l2_mci_errors; + break; + default: + dev_err(&l2c->pdev->dev, "Unsupported device: %04x\n", + l2c->pdev->device); + return IRQ_NONE; + } + + while (CIRC_CNT(l2c->ring_head, l2c->ring_tail, + ARRAY_SIZE(l2c->err_ctx))) { + snprintf(msg, L2C_MESSAGE_SIZE, + "%s: %s: %016llx, %s: %016llx", + l2c->edac_dev->ctl_name, reg_int_name, ctx->reg_int, + ctx->reg_ext_name, ctx->reg_ext); + + decode_register(other, L2C_OTHER_SIZE, l2_errors, ctx->reg_int); + + strncat(msg, other, L2C_MESSAGE_SIZE); + + if (ctx->reg_int & mask_ue) + edac_device_handle_ue(l2c->edac_dev, 0, 0, msg); + else if (ctx->reg_int & mask_ce) + edac_device_handle_ce(l2c->edac_dev, 0, 0, msg); + + l2c->ring_tail++; + } + + return IRQ_HANDLED; + +err_free: + kfree(other); + kfree(msg); + + return ret; +} + +#define L2C_DEBUGFS_ATTR(_name, _reg) DEBUGFS_REG_ATTR(l2c, _name, _reg) + +L2C_DEBUGFS_ATTR(tad_int, L2C_TAD_INT_W1S); + +struct debugfs_entry *l2c_tad_dfs_ents[] = { + &debugfs_tad_int, +}; + +L2C_DEBUGFS_ATTR(cbc_int, L2C_CBC_INT_W1S); + +struct debugfs_entry *l2c_cbc_dfs_ents[] = { + &debugfs_cbc_int, +}; + +L2C_DEBUGFS_ATTR(mci_int, L2C_MCI_INT_W1S); + +struct debugfs_entry *l2c_mci_dfs_ents[] = { + &debugfs_mci_int, +}; + +static const struct pci_device_id thunderx_l2c_pci_tbl[] = { + { PCI_DEVICE(PCI_VENDOR_ID_CAVIUM, PCI_DEVICE_ID_THUNDER_L2C_TAD), }, + { PCI_DEVICE(PCI_VENDOR_ID_CAVIUM, PCI_DEVICE_ID_THUNDER_L2C_CBC), }, + { PCI_DEVICE(PCI_VENDOR_ID_CAVIUM, PCI_DEVICE_ID_THUNDER_L2C_MCI), }, + { 0, }, +}; + +static int thunderx_l2c_probe(struct pci_dev *pdev, + const struct pci_device_id *id) +{ + struct thunderx_l2c *l2c; + struct edac_device_ctl_info *edac_dev; + struct debugfs_entry **l2c_devattr; + size_t dfs_entries; + irqreturn_t (*thunderx_l2c_isr)(int, void *) = NULL; + char name[32]; + const char *fmt; + u64 reg_en_offs, reg_en_mask; + int idx; + int ret; + + ret = pcim_enable_device(pdev); + if (ret) { + dev_err(&pdev->dev, "Cannot enable PCI device: %d\n", ret); + return ret; + } + + ret = pcim_iomap_regions(pdev, BIT(0), "thunderx_l2c"); + if (ret) { + dev_err(&pdev->dev, "Cannot map PCI resources: %d\n", ret); + return ret; + } + + switch (pdev->device) { + case PCI_DEVICE_ID_THUNDER_L2C_TAD: + thunderx_l2c_isr = thunderx_l2c_tad_isr; + l2c_devattr = l2c_tad_dfs_ents; + dfs_entries = ARRAY_SIZE(l2c_tad_dfs_ents); + fmt = "L2C-TAD%d"; + reg_en_offs = L2C_TAD_INT_ENA_W1S; + reg_en_mask = L2C_TAD_INT_ENA_ALL; + break; + case PCI_DEVICE_ID_THUNDER_L2C_CBC: + thunderx_l2c_isr = thunderx_l2c_cbc_isr; + l2c_devattr = l2c_cbc_dfs_ents; + dfs_entries = ARRAY_SIZE(l2c_cbc_dfs_ents); + fmt = "L2C-CBC%d"; + reg_en_offs = L2C_CBC_INT_ENA_W1S; + reg_en_mask = L2C_CBC_INT_ENA_ALL; + break; + case PCI_DEVICE_ID_THUNDER_L2C_MCI: + thunderx_l2c_isr = thunderx_l2c_mci_isr; + l2c_devattr = l2c_mci_dfs_ents; + dfs_entries = ARRAY_SIZE(l2c_mci_dfs_ents); + fmt = "L2C-MCI%d"; + reg_en_offs = L2C_MCI_INT_ENA_W1S; + reg_en_mask = L2C_MCI_INT_ENA_ALL; + break; + default: + //Should never ever get here + dev_err(&pdev->dev, "Unsupported PCI device: %04x\n", + pdev->device); + return -EINVAL; + } + + idx = edac_device_alloc_index(); + snprintf(name, sizeof(name), fmt, idx); + + edac_dev = edac_device_alloc_ctl_info(sizeof(struct thunderx_l2c), + name, 1, "L2C", 1, 0, + NULL, 0, idx); + if (!edac_dev) { + dev_err(&pdev->dev, "Cannot allocate EDAC device\n"); + return -ENOMEM; + } + + l2c = edac_dev->pvt_info; + l2c->edac_dev = edac_dev; + + l2c->regs = pcim_iomap_table(pdev)[0]; + if (!l2c->regs) { + dev_err(&pdev->dev, "Cannot map PCI resources\n"); + ret = -ENODEV; + goto err_free; + } + + l2c->pdev = pdev; + + l2c->ring_head = 0; + l2c->ring_tail = 0; + + l2c->msix_ent.entry = 0; + l2c->msix_ent.vector = 0; + + ret = pci_enable_msix_exact(pdev, &l2c->msix_ent, 1); + if (ret) { + dev_err(&pdev->dev, "Cannot enable interrupt: %d\n", ret); + goto err_free; + } + + ret = devm_request_threaded_irq(&pdev->dev, l2c->msix_ent.vector, + thunderx_l2c_isr, + thunderx_l2c_threaded_isr, + 0, "[EDAC] ThunderX L2C", + &l2c->msix_ent); + if (ret) + goto err_free; + + edac_dev->dev = &pdev->dev; + edac_dev->dev_name = dev_name(&pdev->dev); + edac_dev->mod_name = "thunderx-l2c"; + edac_dev->ctl_name = "thunderx-l2c"; + + ret = edac_device_add_device(edac_dev); + if (ret) { + dev_err(&pdev->dev, "Cannot add EDAC device: %d\n", ret); + goto err_free; + } + + if (IS_ENABLED(CONFIG_EDAC_DEBUG)) { + l2c->debugfs = edac_debugfs_create_dir(pdev->dev.kobj.name); + + thunderx_create_debugfs_nodes(l2c->debugfs, l2c_devattr, + l2c, dfs_entries); + + if (ret != dfs_entries) { + dev_warn(&pdev->dev, "Error creating debugfs entries: %d%s\n", + ret, ret >= 0 ? " created" : ""); + } + } + + pci_set_drvdata(pdev, edac_dev); + + writeq(reg_en_mask, l2c->regs + reg_en_offs); + + return 0; + +err_free: + edac_device_free_ctl_info(edac_dev); + + return ret; +} + +static void thunderx_l2c_remove(struct pci_dev *pdev) +{ + struct edac_device_ctl_info *edac_dev = pci_get_drvdata(pdev); + struct thunderx_l2c *l2c = edac_dev->pvt_info; + + switch (pdev->device) { + case PCI_DEVICE_ID_THUNDER_L2C_TAD: + writeq(L2C_TAD_INT_ENA_ALL, l2c->regs + L2C_TAD_INT_ENA_W1C); + break; + case PCI_DEVICE_ID_THUNDER_L2C_CBC: + writeq(L2C_CBC_INT_ENA_ALL, l2c->regs + L2C_CBC_INT_ENA_W1C); + break; + case PCI_DEVICE_ID_THUNDER_L2C_MCI: + writeq(L2C_MCI_INT_ENA_ALL, l2c->regs + L2C_MCI_INT_ENA_W1C); + break; + } + + edac_debugfs_remove_recursive(l2c->debugfs); + + edac_device_del_device(&pdev->dev); + edac_device_free_ctl_info(edac_dev); +} + +MODULE_DEVICE_TABLE(pci, thunderx_l2c_pci_tbl); + +static struct pci_driver thunderx_l2c_driver = { + .name = "thunderx_l2c_edac", + .probe = thunderx_l2c_probe, + .remove = thunderx_l2c_remove, + .id_table = thunderx_l2c_pci_tbl, +}; + +static int __init thunderx_edac_init(void) +{ + int rc = 0; + + rc = pci_register_driver(&thunderx_lmc_driver); + if (rc) + return rc; + + rc = pci_register_driver(&thunderx_ocx_driver); + if (rc) + goto err_lmc; + + rc = pci_register_driver(&thunderx_l2c_driver); + if (rc) + goto err_ocx; + + return rc; +err_ocx: + pci_unregister_driver(&thunderx_ocx_driver); +err_lmc: + pci_unregister_driver(&thunderx_lmc_driver); + + return rc; +} + +static void __exit thunderx_edac_exit(void) +{ + pci_unregister_driver(&thunderx_l2c_driver); + pci_unregister_driver(&thunderx_ocx_driver); + pci_unregister_driver(&thunderx_lmc_driver); + +} + +module_init(thunderx_edac_init); +module_exit(thunderx_edac_exit); + +MODULE_LICENSE("GPL v2"); +MODULE_AUTHOR("Cavium, Inc."); +MODULE_DESCRIPTION("EDAC Driver for Cavium ThunderX"); diff --git a/drivers/edac/xgene_edac.c b/drivers/edac/xgene_edac.c index 6c270d9d304a..669246056812 100644 --- a/drivers/edac/xgene_edac.c +++ b/drivers/edac/xgene_edac.c @@ -1596,7 +1596,7 @@ static void xgene_edac_pa_report(struct edac_device_ctl_info *edac_dev) reg = readl(ctx->dev_csr + IOBPATRANSERRINTSTS); if (!reg) goto chk_iob_axi0; - dev_err(edac_dev->dev, "IOB procesing agent (PA) transaction error\n"); + dev_err(edac_dev->dev, "IOB processing agent (PA) transaction error\n"); if (reg & IOBPA_RDATA_CORRUPT_MASK) dev_err(edac_dev->dev, "IOB PA read data RAM error\n"); if (reg & IOBPA_M_RDATA_CORRUPT_MASK) |