diff options
author | Dan Williams <dan.j.williams@intel.com> | 2023-02-07 22:12:24 +0300 |
---|---|---|
committer | Dan Williams <dan.j.williams@intel.com> | 2023-02-07 22:12:24 +0300 |
commit | 5485eb955994a238eafd08d9266005b1c9ac7991 (patch) | |
tree | fad2a31e1f9340de6aecbec16fa18509409ff5d1 /drivers | |
parent | 711442e29f16f0d39dd0e2460c9baacfccb9d5a7 (diff) | |
parent | 623c0751336e4035ab0047f2c152a02bd26b612b (diff) | |
download | linux-5485eb955994a238eafd08d9266005b1c9ac7991.tar.xz |
Merge branch 'for-6.3/cxl' into cxl/next
Merge the general CXL updates with fixes targeting v6.2-rc for v6.3.
Resolve a conflict with the fix and move of cxl_report_and_clear() from
pci.c to core/pci.c.
Diffstat (limited to 'drivers')
-rw-r--r-- | drivers/acpi/pci_root.c | 3 | ||||
-rw-r--r-- | drivers/cxl/Kconfig | 2 | ||||
-rw-r--r-- | drivers/cxl/acpi.c | 2 | ||||
-rw-r--r-- | drivers/cxl/core/Makefile | 3 | ||||
-rw-r--r-- | drivers/cxl/core/mbox.c | 28 | ||||
-rw-r--r-- | drivers/cxl/core/pci.c | 115 | ||||
-rw-r--r-- | drivers/cxl/core/port.c | 31 | ||||
-rw-r--r-- | drivers/cxl/core/region.c | 23 | ||||
-rw-r--r-- | drivers/cxl/core/trace.c | 5 | ||||
-rw-r--r-- | drivers/cxl/core/trace.h | 109 | ||||
-rw-r--r-- | drivers/cxl/cxl.h | 2 | ||||
-rw-r--r-- | drivers/cxl/cxlpci.h | 3 | ||||
-rw-r--r-- | drivers/cxl/pci.c | 116 | ||||
-rw-r--r-- | drivers/pci/probe.c | 1 |
14 files changed, 317 insertions, 126 deletions
diff --git a/drivers/acpi/pci_root.c b/drivers/acpi/pci_root.c index b3c202d2a433..84030804a763 100644 --- a/drivers/acpi/pci_root.c +++ b/drivers/acpi/pci_root.c @@ -1047,6 +1047,9 @@ struct pci_bus *acpi_pci_root_create(struct acpi_pci_root *root, if (!(root->osc_control_set & OSC_PCI_EXPRESS_DPC_CONTROL)) host_bridge->native_dpc = 0; + if (!(root->osc_ext_control_set & OSC_CXL_ERROR_REPORTING_CONTROL)) + host_bridge->native_cxl_error = 0; + /* * Evaluate the "PCI Boot Configuration" _DSM Function. If it * exists and returns 0, we must preserve any PCI resource diff --git a/drivers/cxl/Kconfig b/drivers/cxl/Kconfig index 0ac53c422c31..9e709ecba50f 100644 --- a/drivers/cxl/Kconfig +++ b/drivers/cxl/Kconfig @@ -116,7 +116,7 @@ config CXL_REGION_INVALIDATION_TEST depends on CXL_REGION help CXL Region management and security operations potentially invalidate - the content of CPU caches without notifiying those caches to + the content of CPU caches without notifying those caches to invalidate the affected cachelines. The CXL Region driver attempts to invalidate caches when those events occur. If that invalidation fails the region will fail to enable. Reasons for cache diff --git a/drivers/cxl/acpi.c b/drivers/cxl/acpi.c index 13cde44c6086..d7159fb3beef 100644 --- a/drivers/cxl/acpi.c +++ b/drivers/cxl/acpi.c @@ -19,7 +19,7 @@ struct cxl_cxims_data { /* * Find a targets entry (n) in the host bridge interleave list. - * CXL Specfication 3.0 Table 9-22 + * CXL Specification 3.0 Table 9-22 */ static int cxl_xor_calc_n(u64 hpa, struct cxl_cxims_data *cximsd, int iw, int ig) diff --git a/drivers/cxl/core/Makefile b/drivers/cxl/core/Makefile index 79c7257f4107..ca4ae31d8f57 100644 --- a/drivers/cxl/core/Makefile +++ b/drivers/cxl/core/Makefile @@ -3,6 +3,8 @@ obj-$(CONFIG_CXL_BUS) += cxl_core.o obj-$(CONFIG_CXL_SUSPEND) += suspend.o ccflags-y += -I$(srctree)/drivers/cxl +CFLAGS_trace.o = -DTRACE_INCLUDE_PATH=. -I$(src) + cxl_core-y := port.o cxl_core-y += pmem.o cxl_core-y += regs.o @@ -10,4 +12,5 @@ cxl_core-y += memdev.o cxl_core-y += mbox.o cxl_core-y += pci.o cxl_core-y += hdm.o +cxl_core-$(CONFIG_TRACING) += trace.o cxl_core-$(CONFIG_CXL_REGION) += region.o diff --git a/drivers/cxl/core/mbox.c b/drivers/cxl/core/mbox.c index b03fba212799..202d49dd9911 100644 --- a/drivers/cxl/core/mbox.c +++ b/drivers/cxl/core/mbox.c @@ -170,6 +170,12 @@ int cxl_internal_send_cmd(struct cxl_dev_state *cxlds, out_size = mbox_cmd->size_out; min_out = mbox_cmd->min_out; rc = cxlds->mbox_send(cxlds, mbox_cmd); + /* + * EIO is reserved for a payload size mismatch and mbox_send() + * may not return this error. + */ + if (WARN_ONCE(rc == -EIO, "Bad return code: -EIO")) + return -ENXIO; if (rc) return rc; @@ -550,9 +556,9 @@ int cxl_send_cmd(struct cxl_memdev *cxlmd, struct cxl_send_command __user *s) return 0; } -static int cxl_xfer_log(struct cxl_dev_state *cxlds, uuid_t *uuid, u32 size, u8 *out) +static int cxl_xfer_log(struct cxl_dev_state *cxlds, uuid_t *uuid, u32 *size, u8 *out) { - u32 remaining = size; + u32 remaining = *size; u32 offset = 0; while (remaining) { @@ -576,6 +582,17 @@ static int cxl_xfer_log(struct cxl_dev_state *cxlds, uuid_t *uuid, u32 size, u8 }; rc = cxl_internal_send_cmd(cxlds, &mbox_cmd); + + /* + * The output payload length that indicates the number + * of valid bytes can be smaller than the Log buffer + * size. + */ + if (rc == -EIO && mbox_cmd.size_out < xfer_size) { + offset += mbox_cmd.size_out; + break; + } + if (rc < 0) return rc; @@ -584,6 +601,8 @@ static int cxl_xfer_log(struct cxl_dev_state *cxlds, uuid_t *uuid, u32 size, u8 offset += xfer_size; } + *size = offset; + return 0; } @@ -610,11 +629,12 @@ static void cxl_walk_cel(struct cxl_dev_state *cxlds, size_t size, u8 *cel) if (!cmd) { dev_dbg(cxlds->dev, - "Opcode 0x%04x unsupported by driver", opcode); + "Opcode 0x%04x unsupported by driver\n", opcode); continue; } set_bit(cmd->info.id, cxlds->enabled_cmds); + dev_dbg(cxlds->dev, "Opcode 0x%04x enabled\n", opcode); } } @@ -694,7 +714,7 @@ int cxl_enumerate_cmds(struct cxl_dev_state *cxlds) goto out; } - rc = cxl_xfer_log(cxlds, &uuid, size, log); + rc = cxl_xfer_log(cxlds, &uuid, &size, log); if (rc) { kvfree(log); goto out; diff --git a/drivers/cxl/core/pci.c b/drivers/cxl/core/pci.c index 57764e9cd19d..184ead6a2796 100644 --- a/drivers/cxl/core/pci.c +++ b/drivers/cxl/core/pci.c @@ -9,6 +9,7 @@ #include <cxlmem.h> #include <cxl.h> #include "core.h" +#include "trace.h" /** * DOC: cxl core pci @@ -622,3 +623,117 @@ void read_cdat_data(struct cxl_port *port) } } EXPORT_SYMBOL_NS_GPL(read_cdat_data, CXL); + +void cxl_cor_error_detected(struct pci_dev *pdev) +{ + struct cxl_dev_state *cxlds = pci_get_drvdata(pdev); + struct cxl_memdev *cxlmd = cxlds->cxlmd; + struct device *dev = &cxlmd->dev; + void __iomem *addr; + u32 status; + + if (!cxlds->regs.ras) + return; + + addr = cxlds->regs.ras + CXL_RAS_CORRECTABLE_STATUS_OFFSET; + status = readl(addr); + if (status & CXL_RAS_CORRECTABLE_STATUS_MASK) { + writel(status & CXL_RAS_CORRECTABLE_STATUS_MASK, addr); + trace_cxl_aer_correctable_error(dev, status); + } +} +EXPORT_SYMBOL_NS_GPL(cxl_cor_error_detected, CXL); + +/* CXL spec rev3.0 8.2.4.16.1 */ +static void header_log_copy(struct cxl_dev_state *cxlds, u32 *log) +{ + void __iomem *addr; + u32 *log_addr; + int i, log_u32_size = CXL_HEADERLOG_SIZE / sizeof(u32); + + addr = cxlds->regs.ras + CXL_RAS_HEADER_LOG_OFFSET; + log_addr = log; + + for (i = 0; i < log_u32_size; i++) { + *log_addr = readl(addr); + log_addr++; + addr += sizeof(u32); + } +} + +/* + * Log the state of the RAS status registers and prepare them to log the + * next error status. Return 1 if reset needed. + */ +static bool cxl_report_and_clear(struct cxl_dev_state *cxlds) +{ + struct cxl_memdev *cxlmd = cxlds->cxlmd; + struct device *dev = &cxlmd->dev; + u32 hl[CXL_HEADERLOG_SIZE_U32]; + void __iomem *addr; + u32 status; + u32 fe; + + if (!cxlds->regs.ras) + return false; + + addr = cxlds->regs.ras + CXL_RAS_UNCORRECTABLE_STATUS_OFFSET; + status = readl(addr); + if (!(status & CXL_RAS_UNCORRECTABLE_STATUS_MASK)) + return false; + + /* If multiple errors, log header points to first error from ctrl reg */ + if (hweight32(status) > 1) { + void __iomem *rcc_addr = + cxlds->regs.ras + CXL_RAS_CAP_CONTROL_OFFSET; + + fe = BIT(FIELD_GET(CXL_RAS_CAP_CONTROL_FE_MASK, + readl(rcc_addr))); + } else { + fe = status; + } + + header_log_copy(cxlds, hl); + trace_cxl_aer_uncorrectable_error(dev, status, fe, hl); + writel(status & CXL_RAS_UNCORRECTABLE_STATUS_MASK, addr); + + return true; +} + +pci_ers_result_t cxl_error_detected(struct pci_dev *pdev, + pci_channel_state_t state) +{ + struct cxl_dev_state *cxlds = pci_get_drvdata(pdev); + struct cxl_memdev *cxlmd = cxlds->cxlmd; + struct device *dev = &cxlmd->dev; + bool ue; + + /* + * A frozen channel indicates an impending reset which is fatal to + * CXL.mem operation, and will likely crash the system. On the off + * chance the situation is recoverable dump the status of the RAS + * capability registers and bounce the active state of the memdev. + */ + ue = cxl_report_and_clear(cxlds); + + switch (state) { + case pci_channel_io_normal: + if (ue) { + device_release_driver(dev); + return PCI_ERS_RESULT_NEED_RESET; + } + return PCI_ERS_RESULT_CAN_RECOVER; + case pci_channel_io_frozen: + dev_warn(&pdev->dev, + "%s: frozen state error detected, disable CXL.mem\n", + dev_name(dev)); + device_release_driver(dev); + return PCI_ERS_RESULT_NEED_RESET; + case pci_channel_io_perm_failure: + dev_warn(&pdev->dev, + "failure state error detected, request disconnect\n"); + return PCI_ERS_RESULT_DISCONNECT; + } + return PCI_ERS_RESULT_NEED_RESET; +} +EXPORT_SYMBOL_NS_GPL(cxl_error_detected, CXL); diff --git a/drivers/cxl/core/port.c b/drivers/cxl/core/port.c index b631a0520456..609aa6801b14 100644 --- a/drivers/cxl/core/port.c +++ b/drivers/cxl/core/port.c @@ -583,6 +583,29 @@ static int devm_cxl_link_uport(struct device *host, struct cxl_port *port) return devm_add_action_or_reset(host, cxl_unlink_uport, port); } +static void cxl_unlink_parent_dport(void *_port) +{ + struct cxl_port *port = _port; + + sysfs_remove_link(&port->dev.kobj, "parent_dport"); +} + +static int devm_cxl_link_parent_dport(struct device *host, + struct cxl_port *port, + struct cxl_dport *parent_dport) +{ + int rc; + + if (!parent_dport) + return 0; + + rc = sysfs_create_link(&port->dev.kobj, &parent_dport->dport->kobj, + "parent_dport"); + if (rc) + return rc; + return devm_add_action_or_reset(host, cxl_unlink_parent_dport, port); +} + static struct lock_class_key cxl_port_key; static struct cxl_port *cxl_port_alloc(struct device *uport, @@ -692,6 +715,10 @@ static struct cxl_port *__devm_cxl_add_port(struct device *host, if (rc) return ERR_PTR(rc); + rc = devm_cxl_link_parent_dport(host, port, parent_dport); + if (rc) + return ERR_PTR(rc); + return port; err: @@ -1137,7 +1164,7 @@ static struct cxl_port *find_cxl_port_at(struct cxl_port *parent_port, } /* - * All users of grandparent() are using it to walk PCIe-like swich port + * All users of grandparent() are using it to walk PCIe-like switch port * hierarchy. A PCIe switch is comprised of a bridge device representing the * upstream switch port and N bridges representing downstream switch ports. When * bridges stack the grand-parent of a downstream switch port is another @@ -1164,6 +1191,7 @@ static void delete_endpoint(void *data) device_lock(parent); if (parent->driver && !endpoint->dead) { + devm_release_action(parent, cxl_unlink_parent_dport, endpoint); devm_release_action(parent, cxl_unlink_uport, endpoint); devm_release_action(parent, unregister_port, endpoint); } @@ -1194,6 +1222,7 @@ EXPORT_SYMBOL_NS_GPL(cxl_endpoint_autoremove, CXL); */ static void delete_switch_port(struct cxl_port *port) { + devm_release_action(port->dev.parent, cxl_unlink_parent_dport, port); devm_release_action(port->dev.parent, cxl_unlink_uport, port); devm_release_action(port->dev.parent, unregister_port, port); } diff --git a/drivers/cxl/core/region.c b/drivers/cxl/core/region.c index 940f805b1534..67e83d961670 100644 --- a/drivers/cxl/core/region.c +++ b/drivers/cxl/core/region.c @@ -157,6 +157,22 @@ static int cxl_region_decode_reset(struct cxl_region *cxlr, int count) return 0; } +static int commit_decoder(struct cxl_decoder *cxld) +{ + struct cxl_switch_decoder *cxlsd = NULL; + + if (cxld->commit) + return cxld->commit(cxld); + + if (is_switch_decoder(&cxld->dev)) + cxlsd = to_cxl_switch_decoder(&cxld->dev); + + if (dev_WARN_ONCE(&cxld->dev, !cxlsd || cxlsd->nr_targets > 1, + "->commit() is required\n")) + return -ENXIO; + return 0; +} + static int cxl_region_decode_commit(struct cxl_region *cxlr) { struct cxl_region_params *p = &cxlr->params; @@ -175,8 +191,7 @@ static int cxl_region_decode_commit(struct cxl_region *cxlr) iter = to_cxl_port(iter->dev.parent)) { cxl_rr = cxl_rr_load(iter, cxlr); cxld = cxl_rr->decoder; - if (cxld->commit) - rc = cxld->commit(cxld); + rc = commit_decoder(cxld); if (rc) break; } @@ -401,7 +416,7 @@ static ssize_t interleave_granularity_store(struct device *dev, * When the host-bridge is interleaved, disallow region granularity != * root granularity. Regions with a granularity less than the root * interleave result in needing multiple endpoints to support a single - * slot in the interleave (possible to suport in the future). Regions + * slot in the interleave (possible to support in the future). Regions * with a granularity greater than the root interleave result in invalid * DPA translations (invalid to support). */ @@ -1969,7 +1984,7 @@ static int cxl_region_invalidate_memregion(struct cxl_region *cxlr) if (!cpu_cache_has_invalidate_memregion()) { if (IS_ENABLED(CONFIG_CXL_REGION_INVALIDATION_TEST)) { - dev_warn( + dev_warn_once( &cxlr->dev, "Bypassing cpu_cache_invalidate_memregion() for testing!\n"); clear_bit(CXL_REGION_F_INCOHERENT, &cxlr->flags); diff --git a/drivers/cxl/core/trace.c b/drivers/cxl/core/trace.c new file mode 100644 index 000000000000..29ae7ce81dc5 --- /dev/null +++ b/drivers/cxl/core/trace.c @@ -0,0 +1,5 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* Copyright(c) 2022 Intel Corporation. All rights reserved. */ + +#define CREATE_TRACE_POINTS +#include "trace.h" diff --git a/drivers/cxl/core/trace.h b/drivers/cxl/core/trace.h new file mode 100644 index 000000000000..20ca2fe2ca8e --- /dev/null +++ b/drivers/cxl/core/trace.h @@ -0,0 +1,109 @@ +// SPDX-License-Identifier: GPL-2.0 +/* Copyright(c) 2022 Intel Corporation. All rights reserved. */ +#undef TRACE_SYSTEM +#define TRACE_SYSTEM cxl + +#if !defined(_CXL_EVENTS_H) || defined(TRACE_HEADER_MULTI_READ) +#define _CXL_EVENTS_H + +#include <cxl.h> +#include <linux/tracepoint.h> + +#define CXL_RAS_UC_CACHE_DATA_PARITY BIT(0) +#define CXL_RAS_UC_CACHE_ADDR_PARITY BIT(1) +#define CXL_RAS_UC_CACHE_BE_PARITY BIT(2) +#define CXL_RAS_UC_CACHE_DATA_ECC BIT(3) +#define CXL_RAS_UC_MEM_DATA_PARITY BIT(4) +#define CXL_RAS_UC_MEM_ADDR_PARITY BIT(5) +#define CXL_RAS_UC_MEM_BE_PARITY BIT(6) +#define CXL_RAS_UC_MEM_DATA_ECC BIT(7) +#define CXL_RAS_UC_REINIT_THRESH BIT(8) +#define CXL_RAS_UC_RSVD_ENCODE BIT(9) +#define CXL_RAS_UC_POISON BIT(10) +#define CXL_RAS_UC_RECV_OVERFLOW BIT(11) +#define CXL_RAS_UC_INTERNAL_ERR BIT(14) +#define CXL_RAS_UC_IDE_TX_ERR BIT(15) +#define CXL_RAS_UC_IDE_RX_ERR BIT(16) + +#define show_uc_errs(status) __print_flags(status, " | ", \ + { CXL_RAS_UC_CACHE_DATA_PARITY, "Cache Data Parity Error" }, \ + { CXL_RAS_UC_CACHE_ADDR_PARITY, "Cache Address Parity Error" }, \ + { CXL_RAS_UC_CACHE_BE_PARITY, "Cache Byte Enable Parity Error" }, \ + { CXL_RAS_UC_CACHE_DATA_ECC, "Cache Data ECC Error" }, \ + { CXL_RAS_UC_MEM_DATA_PARITY, "Memory Data Parity Error" }, \ + { CXL_RAS_UC_MEM_ADDR_PARITY, "Memory Address Parity Error" }, \ + { CXL_RAS_UC_MEM_BE_PARITY, "Memory Byte Enable Parity Error" }, \ + { CXL_RAS_UC_MEM_DATA_ECC, "Memory Data ECC Error" }, \ + { CXL_RAS_UC_REINIT_THRESH, "REINIT Threshold Hit" }, \ + { CXL_RAS_UC_RSVD_ENCODE, "Received Unrecognized Encoding" }, \ + { CXL_RAS_UC_POISON, "Received Poison From Peer" }, \ + { CXL_RAS_UC_RECV_OVERFLOW, "Receiver Overflow" }, \ + { CXL_RAS_UC_INTERNAL_ERR, "Component Specific Error" }, \ + { CXL_RAS_UC_IDE_TX_ERR, "IDE Tx Error" }, \ + { CXL_RAS_UC_IDE_RX_ERR, "IDE Rx Error" } \ +) + +TRACE_EVENT(cxl_aer_uncorrectable_error, + TP_PROTO(const struct device *dev, u32 status, u32 fe, u32 *hl), + TP_ARGS(dev, status, fe, hl), + TP_STRUCT__entry( + __string(dev_name, dev_name(dev)) + __field(u32, status) + __field(u32, first_error) + __array(u32, header_log, CXL_HEADERLOG_SIZE_U32) + ), + TP_fast_assign( + __assign_str(dev_name, dev_name(dev)); + __entry->status = status; + __entry->first_error = fe; + /* + * Embed the 512B headerlog data for user app retrieval and + * parsing, but no need to print this in the trace buffer. + */ + memcpy(__entry->header_log, hl, CXL_HEADERLOG_SIZE); + ), + TP_printk("%s: status: '%s' first_error: '%s'", + __get_str(dev_name), + show_uc_errs(__entry->status), + show_uc_errs(__entry->first_error) + ) +); + +#define CXL_RAS_CE_CACHE_DATA_ECC BIT(0) +#define CXL_RAS_CE_MEM_DATA_ECC BIT(1) +#define CXL_RAS_CE_CRC_THRESH BIT(2) +#define CLX_RAS_CE_RETRY_THRESH BIT(3) +#define CXL_RAS_CE_CACHE_POISON BIT(4) +#define CXL_RAS_CE_MEM_POISON BIT(5) +#define CXL_RAS_CE_PHYS_LAYER_ERR BIT(6) + +#define show_ce_errs(status) __print_flags(status, " | ", \ + { CXL_RAS_CE_CACHE_DATA_ECC, "Cache Data ECC Error" }, \ + { CXL_RAS_CE_MEM_DATA_ECC, "Memory Data ECC Error" }, \ + { CXL_RAS_CE_CRC_THRESH, "CRC Threshold Hit" }, \ + { CLX_RAS_CE_RETRY_THRESH, "Retry Threshold" }, \ + { CXL_RAS_CE_CACHE_POISON, "Received Cache Poison From Peer" }, \ + { CXL_RAS_CE_MEM_POISON, "Received Memory Poison From Peer" }, \ + { CXL_RAS_CE_PHYS_LAYER_ERR, "Received Error From Physical Layer" } \ +) + +TRACE_EVENT(cxl_aer_correctable_error, + TP_PROTO(const struct device *dev, u32 status), + TP_ARGS(dev, status), + TP_STRUCT__entry( + __string(dev_name, dev_name(dev)) + __field(u32, status) + ), + TP_fast_assign( + __assign_str(dev_name, dev_name(dev)); + __entry->status = status; + ), + TP_printk("%s: status: '%s'", + __get_str(dev_name), show_ce_errs(__entry->status) + ) +); + +#endif /* _CXL_EVENTS_H */ + +#define TRACE_INCLUDE_FILE trace +#include <trace/define_trace.h> diff --git a/drivers/cxl/cxl.h b/drivers/cxl/cxl.h index 1b1cf459ac77..aa3af3bb73b2 100644 --- a/drivers/cxl/cxl.h +++ b/drivers/cxl/cxl.h @@ -140,6 +140,8 @@ static inline int ways_to_eiw(unsigned int ways, u8 *eiw) #define CXL_RAS_CAP_CONTROL_FE_MASK GENMASK(5, 0) #define CXL_RAS_HEADER_LOG_OFFSET 0x18 #define CXL_RAS_CAPABILITY_LENGTH 0x58 +#define CXL_HEADERLOG_SIZE SZ_512 +#define CXL_HEADERLOG_SIZE_U32 SZ_512 / sizeof(u32) /* CXL 2.0 8.2.8.1 Device Capabilities Array Register */ #define CXLDEV_CAP_ARRAY_OFFSET 0x0 diff --git a/drivers/cxl/cxlpci.h b/drivers/cxl/cxlpci.h index 920909791bb9..77dbdb980b12 100644 --- a/drivers/cxl/cxlpci.h +++ b/drivers/cxl/cxlpci.h @@ -66,4 +66,7 @@ int devm_cxl_port_enumerate_dports(struct cxl_port *port); struct cxl_dev_state; int cxl_hdm_decode_init(struct cxl_dev_state *cxlds, struct cxl_hdm *cxlhdm); void read_cdat_data(struct cxl_port *port); +void cxl_cor_error_detected(struct pci_dev *pdev); +pci_ers_result_t cxl_error_detected(struct pci_dev *pdev, + pci_channel_state_t state); #endif /* __CXL_PCI_H__ */ diff --git a/drivers/cxl/pci.c b/drivers/cxl/pci.c index 258004f34281..2bbebbc7e032 100644 --- a/drivers/cxl/pci.c +++ b/drivers/cxl/pci.c @@ -14,8 +14,6 @@ #include "cxlmem.h" #include "cxlpci.h" #include "cxl.h" -#define CREATE_TRACE_POINTS -#include <trace/events/cxl.h> /** * DOC: cxl pci @@ -162,7 +160,7 @@ static int __cxl_pci_mbox_send_cmd(struct cxl_dev_state *cxlds, writeq(cmd_reg, cxlds->regs.mbox + CXLDEV_MBOX_CMD_OFFSET); /* #4 */ - dev_dbg(dev, "Sending command\n"); + dev_dbg(dev, "Sending command: 0x%04x\n", mbox_cmd->opcode); writel(CXLDEV_MBOX_CTRL_DOORBELL, cxlds->regs.mbox + CXLDEV_MBOX_CTRL_OFFSET); @@ -514,99 +512,6 @@ static const struct pci_device_id cxl_mem_pci_tbl[] = { }; MODULE_DEVICE_TABLE(pci, cxl_mem_pci_tbl); -/* CXL spec rev3.0 8.2.4.16.1 */ -static void header_log_copy(struct cxl_dev_state *cxlds, u32 *log) -{ - void __iomem *addr; - u32 *log_addr; - int i, log_u32_size = CXL_HEADERLOG_SIZE / sizeof(u32); - - addr = cxlds->regs.ras + CXL_RAS_HEADER_LOG_OFFSET; - log_addr = log; - - for (i = 0; i < log_u32_size; i++) { - *log_addr = readl(addr); - log_addr++; - addr += sizeof(u32); - } -} - -/* - * Log the state of the RAS status registers and prepare them to log the - * next error status. Return 1 if reset needed. - */ -static bool cxl_report_and_clear(struct cxl_dev_state *cxlds) -{ - struct cxl_memdev *cxlmd = cxlds->cxlmd; - struct device *dev = &cxlmd->dev; - u32 hl[CXL_HEADERLOG_SIZE_U32]; - void __iomem *addr; - u32 status; - u32 fe; - - if (!cxlds->regs.ras) - return false; - - addr = cxlds->regs.ras + CXL_RAS_UNCORRECTABLE_STATUS_OFFSET; - status = readl(addr); - if (!(status & CXL_RAS_UNCORRECTABLE_STATUS_MASK)) - return false; - - /* If multiple errors, log header points to first error from ctrl reg */ - if (hweight32(status) > 1) { - void __iomem *rcc_addr = - cxlds->regs.ras + CXL_RAS_CAP_CONTROL_OFFSET; - - fe = BIT(FIELD_GET(CXL_RAS_CAP_CONTROL_FE_MASK, - readl(rcc_addr))); - } else { - fe = status; - } - - header_log_copy(cxlds, hl); - trace_cxl_aer_uncorrectable_error(dev, status, fe, hl); - writel(status & CXL_RAS_UNCORRECTABLE_STATUS_MASK, addr); - - return true; -} - -static pci_ers_result_t cxl_error_detected(struct pci_dev *pdev, - pci_channel_state_t state) -{ - struct cxl_dev_state *cxlds = pci_get_drvdata(pdev); - struct cxl_memdev *cxlmd = cxlds->cxlmd; - struct device *dev = &cxlmd->dev; - bool ue; - - /* - * A frozen channel indicates an impending reset which is fatal to - * CXL.mem operation, and will likely crash the system. On the off - * chance the situation is recoverable dump the status of the RAS - * capability registers and bounce the active state of the memdev. - */ - ue = cxl_report_and_clear(cxlds); - - switch (state) { - case pci_channel_io_normal: - if (ue) { - device_release_driver(dev); - return PCI_ERS_RESULT_NEED_RESET; - } - return PCI_ERS_RESULT_CAN_RECOVER; - case pci_channel_io_frozen: - dev_warn(&pdev->dev, - "%s: frozen state error detected, disable CXL.mem\n", - dev_name(dev)); - device_release_driver(dev); - return PCI_ERS_RESULT_NEED_RESET; - case pci_channel_io_perm_failure: - dev_warn(&pdev->dev, - "failure state error detected, request disconnect\n"); - return PCI_ERS_RESULT_DISCONNECT; - } - return PCI_ERS_RESULT_NEED_RESET; -} - static pci_ers_result_t cxl_slot_reset(struct pci_dev *pdev) { struct cxl_dev_state *cxlds = pci_get_drvdata(pdev); @@ -631,25 +536,6 @@ static void cxl_error_resume(struct pci_dev *pdev) dev->driver ? "successful" : "failed"); } -static void cxl_cor_error_detected(struct pci_dev *pdev) -{ - struct cxl_dev_state *cxlds = pci_get_drvdata(pdev); - struct cxl_memdev *cxlmd = cxlds->cxlmd; - struct device *dev = &cxlmd->dev; - void __iomem *addr; - u32 status; - - if (!cxlds->regs.ras) - return; - - addr = cxlds->regs.ras + CXL_RAS_CORRECTABLE_STATUS_OFFSET; - status = readl(addr); - if (status & CXL_RAS_CORRECTABLE_STATUS_MASK) { - writel(status & CXL_RAS_CORRECTABLE_STATUS_MASK, addr); - trace_cxl_aer_correctable_error(dev, status); - } -} - static const struct pci_error_handlers cxl_error_handlers = { .error_detected = cxl_error_detected, .slot_reset = cxl_slot_reset, diff --git a/drivers/pci/probe.c b/drivers/pci/probe.c index 1779582fb500..f796dfb9b14b 100644 --- a/drivers/pci/probe.c +++ b/drivers/pci/probe.c @@ -596,6 +596,7 @@ static void pci_init_host_bridge(struct pci_host_bridge *bridge) bridge->native_ltr = 1; bridge->native_dpc = 1; bridge->domain_nr = PCI_DOMAIN_NR_NOT_SET; + bridge->native_cxl_error = 1; device_initialize(&bridge->dev); } |