diff options
author | Linus Torvalds <torvalds@linux-foundation.org> | 2023-11-05 05:20:36 +0300 |
---|---|---|
committer | Linus Torvalds <torvalds@linux-foundation.org> | 2023-11-05 05:20:36 +0300 |
commit | b8cc56d0414e2330d9fe05342843512b1ad8cdb7 (patch) | |
tree | a91c8dd64ec831509800f23288d50032da8e1096 /drivers | |
parent | 5e2cb28dd7e182dfa641550dfa225913509ad45d (diff) | |
parent | 4b92894064b3df472b2cf5741c7f080e16dcd1ec (diff) | |
download | linux-b8cc56d0414e2330d9fe05342843512b1ad8cdb7.tar.xz |
Merge tag 'cxl-for-6.7' of git://git.kernel.org/pub/scm/linux/kernel/git/cxl/cxl
Pull CXL (Compute Express Link) updates from Dan Williams:
"The main new functionality this time is work to allow Linux to
natively handle CXL link protocol errors signalled via PCIe AER for
current generation CXL platforms. This required some enlightenment of
the PCIe AER core to workaround the fact that current generation RCH
(Restricted CXL Host) platforms physically hide topology details and
registers via a mechanism called RCRB (Root Complex Register Block).
The next major highlight is reworks to address bugs in parsing region
configurations for next generation VH (Virtual Host) topologies. The
old broken algorithm is replaced with a simpler one that significantly
increases the number of region configurations supported by Linux. This
is again relevant for error handling so that forward and reverse
address translation of memory errors can be carried out by Linux for
memory regions instantiated by platform firmware.
As for other cross-tree work, the ACPI table parsing code has been
refactored for reuse parsing the "CDAT" structure which is an
ACPI-like data structure that is reported by CXL devices. That work is
in preparation for v6.8 support for CXL QoS. Think of this as dynamic
generation of NUMA node topology information generated by Linux rather
than platform firmware.
Lastly, a number of internal object lifetime issues have been resolved
along with misc. fixes and feature updates (decoders_committed sysfs
ABI).
Summary:
- Add support for RCH (Restricted CXL Host) Error recovery
- Fix several region assembly bugs
- Fix mem-device lifetime issues relative to the sanitize command and
RCH topology.
- Refactor ACPI table parsing for CDAT parsing re-use in preparation
for CXL QOS support"
* tag 'cxl-for-6.7' of git://git.kernel.org/pub/scm/linux/kernel/git/cxl/cxl: (50 commits)
lib/fw_table: Remove acpi_parse_entries_array() export
cxl/pci: Change CXL AER support check to use native AER
cxl/hdm: Remove broken error path
cxl/hdm: Fix && vs || bug
acpi: Move common tables helper functions to common lib
cxl: Add support for reading CXL switch CDAT table
cxl: Add checksum verification to CDAT from CXL
cxl: Export QTG ids from CFMWS to sysfs as qos_class attribute
cxl: Add decoders_committed sysfs attribute to cxl_port
cxl: Add cxl_decoders_committed() helper
cxl/core/regs: Rework cxl_map_pmu_regs() to use map->dev for devm
cxl/core/regs: Rename phys_addr in cxl_map_component_regs()
PCI/AER: Unmask RCEC internal errors to enable RCH downstream port error handling
PCI/AER: Forward RCH downstream port-detected errors to the CXL.mem dev handler
cxl/pci: Disable root port interrupts in RCH mode
cxl/pci: Add RCH downstream port error logging
cxl/pci: Map RCH downstream AER registers for logging protocol errors
cxl/pci: Update CXL error logging to use RAS register address
PCI/AER: Refactor cper_print_aer() for use by CXL driver module
cxl/pci: Add RCH downstream port AER register discovery
...
Diffstat (limited to 'drivers')
-rw-r--r-- | drivers/acpi/Kconfig | 1 | ||||
-rw-r--r-- | drivers/acpi/tables.c | 173 | ||||
-rw-r--r-- | drivers/cxl/acpi.c | 3 | ||||
-rw-r--r-- | drivers/cxl/core/core.h | 2 | ||||
-rw-r--r-- | drivers/cxl/core/hdm.c | 93 | ||||
-rw-r--r-- | drivers/cxl/core/mbox.c | 60 | ||||
-rw-r--r-- | drivers/cxl/core/memdev.c | 161 | ||||
-rw-r--r-- | drivers/cxl/core/pci.c | 275 | ||||
-rw-r--r-- | drivers/cxl/core/port.c | 184 | ||||
-rw-r--r-- | drivers/cxl/core/region.c | 266 | ||||
-rw-r--r-- | drivers/cxl/core/regs.c | 73 | ||||
-rw-r--r-- | drivers/cxl/cxl.h | 38 | ||||
-rw-r--r-- | drivers/cxl/cxlmem.h | 17 | ||||
-rw-r--r-- | drivers/cxl/mem.c | 7 | ||||
-rw-r--r-- | drivers/cxl/pci.c | 107 | ||||
-rw-r--r-- | drivers/cxl/port.c | 3 | ||||
-rw-r--r-- | drivers/pci/pcie/Kconfig | 9 | ||||
-rw-r--r-- | drivers/pci/pcie/aer.c | 159 |
18 files changed, 1019 insertions, 612 deletions
diff --git a/drivers/acpi/Kconfig b/drivers/acpi/Kconfig index 554e487cbfab..f819e760ff19 100644 --- a/drivers/acpi/Kconfig +++ b/drivers/acpi/Kconfig @@ -12,6 +12,7 @@ menuconfig ACPI select PNP select NLS select CRC32 + select FIRMWARE_TABLE default y if X86 help Advanced Configuration and Power Interface (ACPI) support for diff --git a/drivers/acpi/tables.c b/drivers/acpi/tables.c index 8ab0a82b4da4..c1516337f668 100644 --- a/drivers/acpi/tables.c +++ b/drivers/acpi/tables.c @@ -37,18 +37,6 @@ static struct acpi_table_desc initial_tables[ACPI_MAX_TABLES] __initdata; static int acpi_apic_instance __initdata_or_acpilib; -enum acpi_subtable_type { - ACPI_SUBTABLE_COMMON, - ACPI_SUBTABLE_HMAT, - ACPI_SUBTABLE_PRMT, - ACPI_SUBTABLE_CEDT, -}; - -struct acpi_subtable_entry { - union acpi_subtable_headers *hdr; - enum acpi_subtable_type type; -}; - /* * Disable table checksum verification for the early stage due to the size * limitation of the current x86 early mapping implementation. @@ -237,167 +225,6 @@ void acpi_table_print_madt_entry(struct acpi_subtable_header *header) } } -static unsigned long __init_or_acpilib -acpi_get_entry_type(struct acpi_subtable_entry *entry) -{ - switch (entry->type) { - case ACPI_SUBTABLE_COMMON: - return entry->hdr->common.type; - case ACPI_SUBTABLE_HMAT: - return entry->hdr->hmat.type; - case ACPI_SUBTABLE_PRMT: - return 0; - case ACPI_SUBTABLE_CEDT: - return entry->hdr->cedt.type; - } - return 0; -} - -static unsigned long __init_or_acpilib -acpi_get_entry_length(struct acpi_subtable_entry *entry) -{ - switch (entry->type) { - case ACPI_SUBTABLE_COMMON: - return entry->hdr->common.length; - case ACPI_SUBTABLE_HMAT: - return entry->hdr->hmat.length; - case ACPI_SUBTABLE_PRMT: - return entry->hdr->prmt.length; - case ACPI_SUBTABLE_CEDT: - return entry->hdr->cedt.length; - } - return 0; -} - -static unsigned long __init_or_acpilib -acpi_get_subtable_header_length(struct acpi_subtable_entry *entry) -{ - switch (entry->type) { - case ACPI_SUBTABLE_COMMON: - return sizeof(entry->hdr->common); - case ACPI_SUBTABLE_HMAT: - return sizeof(entry->hdr->hmat); - case ACPI_SUBTABLE_PRMT: - return sizeof(entry->hdr->prmt); - case ACPI_SUBTABLE_CEDT: - return sizeof(entry->hdr->cedt); - } - return 0; -} - -static enum acpi_subtable_type __init_or_acpilib -acpi_get_subtable_type(char *id) -{ - if (strncmp(id, ACPI_SIG_HMAT, 4) == 0) - return ACPI_SUBTABLE_HMAT; - if (strncmp(id, ACPI_SIG_PRMT, 4) == 0) - return ACPI_SUBTABLE_PRMT; - if (strncmp(id, ACPI_SIG_CEDT, 4) == 0) - return ACPI_SUBTABLE_CEDT; - return ACPI_SUBTABLE_COMMON; -} - -static __init_or_acpilib bool has_handler(struct acpi_subtable_proc *proc) -{ - return proc->handler || proc->handler_arg; -} - -static __init_or_acpilib int call_handler(struct acpi_subtable_proc *proc, - union acpi_subtable_headers *hdr, - unsigned long end) -{ - if (proc->handler) - return proc->handler(hdr, end); - if (proc->handler_arg) - return proc->handler_arg(hdr, proc->arg, end); - return -EINVAL; -} - -/** - * acpi_parse_entries_array - for each proc_num find a suitable subtable - * - * @id: table id (for debugging purposes) - * @table_size: size of the root table - * @table_header: where does the table start? - * @proc: array of acpi_subtable_proc struct containing entry id - * and associated handler with it - * @proc_num: how big proc is? - * @max_entries: how many entries can we process? - * - * For each proc_num find a subtable with proc->id and run proc->handler - * on it. Assumption is that there's only single handler for particular - * entry id. - * - * The table_size is not the size of the complete ACPI table (the length - * field in the header struct), but only the size of the root table; i.e., - * the offset from the very first byte of the complete ACPI table, to the - * first byte of the very first subtable. - * - * On success returns sum of all matching entries for all proc handlers. - * Otherwise, -ENODEV or -EINVAL is returned. - */ -static int __init_or_acpilib acpi_parse_entries_array( - char *id, unsigned long table_size, - struct acpi_table_header *table_header, struct acpi_subtable_proc *proc, - int proc_num, unsigned int max_entries) -{ - struct acpi_subtable_entry entry; - unsigned long table_end, subtable_len, entry_len; - int count = 0; - int errs = 0; - int i; - - table_end = (unsigned long)table_header + table_header->length; - - /* Parse all entries looking for a match. */ - - entry.type = acpi_get_subtable_type(id); - entry.hdr = (union acpi_subtable_headers *) - ((unsigned long)table_header + table_size); - subtable_len = acpi_get_subtable_header_length(&entry); - - while (((unsigned long)entry.hdr) + subtable_len < table_end) { - if (max_entries && count >= max_entries) - break; - - for (i = 0; i < proc_num; i++) { - if (acpi_get_entry_type(&entry) != proc[i].id) - continue; - if (!has_handler(&proc[i]) || - (!errs && - call_handler(&proc[i], entry.hdr, table_end))) { - errs++; - continue; - } - - proc[i].count++; - break; - } - if (i != proc_num) - count++; - - /* - * If entry->length is 0, break from this loop to avoid - * infinite loop. - */ - entry_len = acpi_get_entry_length(&entry); - if (entry_len == 0) { - pr_err("[%4.4s:0x%02x] Invalid zero length\n", id, proc->id); - return -EINVAL; - } - - entry.hdr = (union acpi_subtable_headers *) - ((unsigned long)entry.hdr + entry_len); - } - - if (max_entries && count > max_entries) { - pr_warn("[%4.4s:0x%02x] found the maximum %i entries\n", - id, proc->id, count); - } - - return errs ? -EINVAL : count; -} - int __init_or_acpilib acpi_table_parse_entries_array( char *id, unsigned long table_size, struct acpi_subtable_proc *proc, int proc_num, unsigned int max_entries) diff --git a/drivers/cxl/acpi.c b/drivers/cxl/acpi.c index 40d055560e52..2034eb4ce83f 100644 --- a/drivers/cxl/acpi.c +++ b/drivers/cxl/acpi.c @@ -289,6 +289,9 @@ static int cxl_parse_cfmws(union acpi_subtable_headers *header, void *arg, } } } + + cxlrd->qos_class = cfmws->qtg_id; + rc = cxl_decoder_add(cxld, target_map); err_xormap: if (rc) diff --git a/drivers/cxl/core/core.h b/drivers/cxl/core/core.h index 45e7e044cf4a..86d7ba23235e 100644 --- a/drivers/cxl/core/core.h +++ b/drivers/cxl/core/core.h @@ -73,8 +73,10 @@ struct cxl_rcrb_info; resource_size_t __rcrb_to_component(struct device *dev, struct cxl_rcrb_info *ri, enum cxl_rcrb which); +u16 cxl_rcrb_to_aer(struct device *dev, resource_size_t rcrb); extern struct rw_semaphore cxl_dpa_rwsem; +extern struct rw_semaphore cxl_region_rwsem; int cxl_memdev_init(void); void cxl_memdev_exit(void); diff --git a/drivers/cxl/core/hdm.c b/drivers/cxl/core/hdm.c index 4449b34a80cc..1cc9be85ba4c 100644 --- a/drivers/cxl/core/hdm.c +++ b/drivers/cxl/core/hdm.c @@ -81,26 +81,6 @@ static void parse_hdm_decoder_caps(struct cxl_hdm *cxlhdm) cxlhdm->interleave_mask |= GENMASK(14, 12); } -static int map_hdm_decoder_regs(struct cxl_port *port, void __iomem *crb, - struct cxl_component_regs *regs) -{ - struct cxl_register_map map = { - .dev = &port->dev, - .resource = port->component_reg_phys, - .base = crb, - .max_size = CXL_COMPONENT_REG_BLOCK_SIZE, - }; - - cxl_probe_component_regs(&port->dev, crb, &map.component_map); - if (!map.component_map.hdm_decoder.valid) { - dev_dbg(&port->dev, "HDM decoder registers not implemented\n"); - /* unique error code to indicate no HDM decoder capability */ - return -ENODEV; - } - - return cxl_map_component_regs(&map, regs, BIT(CXL_CM_CAP_CAP_ID_HDM)); -} - static bool should_emulate_decoders(struct cxl_endpoint_dvsec_info *info) { struct cxl_hdm *cxlhdm; @@ -153,9 +133,9 @@ static bool should_emulate_decoders(struct cxl_endpoint_dvsec_info *info) struct cxl_hdm *devm_cxl_setup_hdm(struct cxl_port *port, struct cxl_endpoint_dvsec_info *info) { + struct cxl_register_map *reg_map = &port->reg_map; struct device *dev = &port->dev; struct cxl_hdm *cxlhdm; - void __iomem *crb; int rc; cxlhdm = devm_kzalloc(dev, sizeof(*cxlhdm), GFP_KERNEL); @@ -164,19 +144,29 @@ struct cxl_hdm *devm_cxl_setup_hdm(struct cxl_port *port, cxlhdm->port = port; dev_set_drvdata(dev, cxlhdm); - crb = ioremap(port->component_reg_phys, CXL_COMPONENT_REG_BLOCK_SIZE); - if (!crb && info && info->mem_enabled) { + /* Memory devices can configure device HDM using DVSEC range regs. */ + if (reg_map->resource == CXL_RESOURCE_NONE) { + if (!info || !info->mem_enabled) { + dev_err(dev, "No component registers mapped\n"); + return ERR_PTR(-ENXIO); + } + cxlhdm->decoder_count = info->ranges; return cxlhdm; - } else if (!crb) { - dev_err(dev, "No component registers mapped\n"); - return ERR_PTR(-ENXIO); } - rc = map_hdm_decoder_regs(port, crb, &cxlhdm->regs); - iounmap(crb); - if (rc) + if (!reg_map->component_map.hdm_decoder.valid) { + dev_dbg(&port->dev, "HDM decoder registers not implemented\n"); + /* unique error code to indicate no HDM decoder capability */ + return ERR_PTR(-ENODEV); + } + + rc = cxl_map_component_regs(reg_map, &cxlhdm->regs, + BIT(CXL_CM_CAP_CAP_ID_HDM)); + if (rc) { + dev_err(dev, "Failed to map HDM capability.\n"); return ERR_PTR(rc); + } parse_hdm_decoder_caps(cxlhdm); if (cxlhdm->decoder_count == 0) { @@ -575,17 +565,11 @@ static void cxld_set_type(struct cxl_decoder *cxld, u32 *ctrl) CXL_HDM_DECODER0_CTRL_HOSTONLY); } -static int cxlsd_set_targets(struct cxl_switch_decoder *cxlsd, u64 *tgt) +static void cxlsd_set_targets(struct cxl_switch_decoder *cxlsd, u64 *tgt) { struct cxl_dport **t = &cxlsd->target[0]; int ways = cxlsd->cxld.interleave_ways; - if (dev_WARN_ONCE(&cxlsd->cxld.dev, - ways > 8 || ways > cxlsd->nr_targets, - "ways: %d overflows targets: %d\n", ways, - cxlsd->nr_targets)) - return -ENXIO; - *tgt = FIELD_PREP(GENMASK(7, 0), t[0]->port_id); if (ways > 1) *tgt |= FIELD_PREP(GENMASK(15, 8), t[1]->port_id); @@ -601,8 +585,6 @@ static int cxlsd_set_targets(struct cxl_switch_decoder *cxlsd, u64 *tgt) *tgt |= FIELD_PREP(GENMASK_ULL(55, 48), t[6]->port_id); if (ways > 7) *tgt |= FIELD_PREP(GENMASK_ULL(63, 56), t[7]->port_id); - - return 0; } /* @@ -643,13 +625,33 @@ static int cxl_decoder_commit(struct cxl_decoder *cxld) if (cxld->flags & CXL_DECODER_F_ENABLE) return 0; - if (port->commit_end + 1 != id) { + if (cxl_num_decoders_committed(port) != id) { dev_dbg(&port->dev, "%s: out of order commit, expected decoder%d.%d\n", - dev_name(&cxld->dev), port->id, port->commit_end + 1); + dev_name(&cxld->dev), port->id, + cxl_num_decoders_committed(port)); return -EBUSY; } + /* + * For endpoint decoders hosted on CXL memory devices that + * support the sanitize operation, make sure sanitize is not in-flight. + */ + if (is_endpoint_decoder(&cxld->dev)) { + struct cxl_endpoint_decoder *cxled = + to_cxl_endpoint_decoder(&cxld->dev); + struct cxl_memdev *cxlmd = cxled_to_memdev(cxled); + struct cxl_memdev_state *mds = + to_cxl_memdev_state(cxlmd->cxlds); + + if (mds && mds->security.sanitize_active) { + dev_dbg(&cxlmd->dev, + "attempted to commit %s during sanitize\n", + dev_name(&cxld->dev)); + return -EBUSY; + } + } + down_read(&cxl_dpa_rwsem); /* common decoder settings */ ctrl = readl(hdm + CXL_HDM_DECODER0_CTRL_OFFSET(cxld->id)); @@ -670,13 +672,7 @@ static int cxl_decoder_commit(struct cxl_decoder *cxld) void __iomem *tl_lo = hdm + CXL_HDM_DECODER0_TL_LOW(id); u64 targets; - rc = cxlsd_set_targets(cxlsd, &targets); - if (rc) { - dev_dbg(&port->dev, "%s: target configuration error\n", - dev_name(&cxld->dev)); - goto err; - } - + cxlsd_set_targets(cxlsd, &targets); writel(upper_32_bits(targets), tl_hi); writel(lower_32_bits(targets), tl_lo); } else { @@ -694,7 +690,6 @@ static int cxl_decoder_commit(struct cxl_decoder *cxld) port->commit_end++; rc = cxld_await_commit(hdm, cxld->id); -err: if (rc) { dev_dbg(&port->dev, "%s: error %d committing decoder\n", dev_name(&cxld->dev), rc); @@ -844,7 +839,7 @@ static int init_hdm_decoder(struct cxl_port *port, struct cxl_decoder *cxld, cxld->target_type = CXL_DECODER_HOSTONLYMEM; else cxld->target_type = CXL_DECODER_DEVMEM; - if (cxld->id != port->commit_end + 1) { + if (cxld->id != cxl_num_decoders_committed(port)) { dev_warn(&port->dev, "decoder%d.%d: Committed out of order\n", port->id, cxld->id); diff --git a/drivers/cxl/core/mbox.c b/drivers/cxl/core/mbox.c index 4df4f614f490..36270dcfb42e 100644 --- a/drivers/cxl/core/mbox.c +++ b/drivers/cxl/core/mbox.c @@ -1125,20 +1125,7 @@ int cxl_dev_state_identify(struct cxl_memdev_state *mds) } EXPORT_SYMBOL_NS_GPL(cxl_dev_state_identify, CXL); -/** - * cxl_mem_sanitize() - Send a sanitization command to the device. - * @mds: The device data for the operation - * @cmd: The specific sanitization command opcode - * - * Return: 0 if the command was executed successfully, regardless of - * whether or not the actual security operation is done in the background, - * such as for the Sanitize case. - * Error return values can be the result of the mailbox command, -EINVAL - * when security requirements are not met or invalid contexts. - * - * See CXL 3.0 @8.2.9.8.5.1 Sanitize and @8.2.9.8.5.2 Secure Erase. - */ -int cxl_mem_sanitize(struct cxl_memdev_state *mds, u16 cmd) +static int __cxl_mem_sanitize(struct cxl_memdev_state *mds, u16 cmd) { int rc; u32 sec_out = 0; @@ -1183,7 +1170,45 @@ int cxl_mem_sanitize(struct cxl_memdev_state *mds, u16 cmd) return 0; } -EXPORT_SYMBOL_NS_GPL(cxl_mem_sanitize, CXL); + + +/** + * cxl_mem_sanitize() - Send a sanitization command to the device. + * @cxlmd: The device for the operation + * @cmd: The specific sanitization command opcode + * + * Return: 0 if the command was executed successfully, regardless of + * whether or not the actual security operation is done in the background, + * such as for the Sanitize case. + * Error return values can be the result of the mailbox command, -EINVAL + * when security requirements are not met or invalid contexts, or -EBUSY + * if the sanitize operation is already in flight. + * + * See CXL 3.0 @8.2.9.8.5.1 Sanitize and @8.2.9.8.5.2 Secure Erase. + */ +int cxl_mem_sanitize(struct cxl_memdev *cxlmd, u16 cmd) +{ + struct cxl_memdev_state *mds = to_cxl_memdev_state(cxlmd->cxlds); + struct cxl_port *endpoint; + int rc; + + /* synchronize with cxl_mem_probe() and decoder write operations */ + device_lock(&cxlmd->dev); + endpoint = cxlmd->endpoint; + down_read(&cxl_region_rwsem); + /* + * Require an endpoint to be safe otherwise the driver can not + * be sure that the device is unmapped. + */ + if (endpoint && cxl_num_decoders_committed(endpoint) == 0) + rc = __cxl_mem_sanitize(mds, cmd); + else + rc = -EBUSY; + up_read(&cxl_region_rwsem); + device_unlock(&cxlmd->dev); + + return rc; +} static int add_dpa_res(struct device *dev, struct resource *parent, struct resource *res, resource_size_t start, @@ -1224,8 +1249,7 @@ int cxl_mem_create_range_info(struct cxl_memdev_state *mds) return 0; } - cxlds->dpa_res = - (struct resource)DEFINE_RES_MEM(0, mds->total_bytes); + cxlds->dpa_res = DEFINE_RES_MEM(0, mds->total_bytes); if (mds->partition_align_bytes == 0) { rc = add_dpa_res(dev, &cxlds->dpa_res, &cxlds->ram_res, 0, @@ -1377,6 +1401,8 @@ struct cxl_memdev_state *cxl_memdev_state_create(struct device *dev) mutex_init(&mds->mbox_mutex); mutex_init(&mds->event.log_lock); mds->cxlds.dev = dev; + mds->cxlds.reg_map.host = dev; + mds->cxlds.reg_map.resource = CXL_RESOURCE_NONE; mds->cxlds.type = CXL_DEVTYPE_CLASSMEM; return mds; diff --git a/drivers/cxl/core/memdev.c b/drivers/cxl/core/memdev.c index 14b547c07f54..fc5c2b414793 100644 --- a/drivers/cxl/core/memdev.c +++ b/drivers/cxl/core/memdev.c @@ -125,13 +125,16 @@ static ssize_t security_state_show(struct device *dev, struct cxl_memdev *cxlmd = to_cxl_memdev(dev); struct cxl_dev_state *cxlds = cxlmd->cxlds; struct cxl_memdev_state *mds = to_cxl_memdev_state(cxlds); - u64 reg = readq(cxlds->regs.mbox + CXLDEV_MBOX_BG_CMD_STATUS_OFFSET); - u32 pct = FIELD_GET(CXLDEV_MBOX_BG_CMD_COMMAND_PCT_MASK, reg); - u16 cmd = FIELD_GET(CXLDEV_MBOX_BG_CMD_COMMAND_OPCODE_MASK, reg); unsigned long state = mds->security.state; + int rc = 0; - if (cmd == CXL_MBOX_OP_SANITIZE && pct != 100) - return sysfs_emit(buf, "sanitize\n"); + /* sync with latest submission state */ + mutex_lock(&mds->mbox_mutex); + if (mds->security.sanitize_active) + rc = sysfs_emit(buf, "sanitize\n"); + mutex_unlock(&mds->mbox_mutex); + if (rc) + return rc; if (!(state & CXL_PMEM_SEC_STATE_USER_PASS_SET)) return sysfs_emit(buf, "disabled\n"); @@ -152,24 +155,17 @@ static ssize_t security_sanitize_store(struct device *dev, const char *buf, size_t len) { struct cxl_memdev *cxlmd = to_cxl_memdev(dev); - struct cxl_memdev_state *mds = to_cxl_memdev_state(cxlmd->cxlds); - struct cxl_port *port = cxlmd->endpoint; bool sanitize; ssize_t rc; if (kstrtobool(buf, &sanitize) || !sanitize) return -EINVAL; - if (!port || !is_cxl_endpoint(port)) - return -EINVAL; - - /* ensure no regions are mapped to this memdev */ - if (port->commit_end != -1) - return -EBUSY; - - rc = cxl_mem_sanitize(mds, CXL_MBOX_OP_SANITIZE); + rc = cxl_mem_sanitize(cxlmd, CXL_MBOX_OP_SANITIZE); + if (rc) + return rc; - return rc ? rc : len; + return len; } static struct device_attribute dev_attr_security_sanitize = __ATTR(sanitize, 0200, NULL, security_sanitize_store); @@ -179,24 +175,17 @@ static ssize_t security_erase_store(struct device *dev, const char *buf, size_t len) { struct cxl_memdev *cxlmd = to_cxl_memdev(dev); - struct cxl_memdev_state *mds = to_cxl_memdev_state(cxlmd->cxlds); - struct cxl_port *port = cxlmd->endpoint; ssize_t rc; bool erase; if (kstrtobool(buf, &erase) || !erase) return -EINVAL; - if (!port || !is_cxl_endpoint(port)) - return -EINVAL; - - /* ensure no regions are mapped to this memdev */ - if (port->commit_end != -1) - return -EBUSY; - - rc = cxl_mem_sanitize(mds, CXL_MBOX_OP_SECURE_ERASE); + rc = cxl_mem_sanitize(cxlmd, CXL_MBOX_OP_SECURE_ERASE); + if (rc) + return rc; - return rc ? rc : len; + return len; } static struct device_attribute dev_attr_security_erase = __ATTR(erase, 0200, NULL, security_erase_store); @@ -242,7 +231,7 @@ int cxl_trigger_poison_list(struct cxl_memdev *cxlmd) if (rc) return rc; - if (port->commit_end == -1) { + if (cxl_num_decoders_committed(port) == 0) { /* No regions mapped to this memdev */ rc = cxl_get_poison_by_memdev(cxlmd); } else { @@ -293,7 +282,7 @@ static struct cxl_region *cxl_dpa_to_region(struct cxl_memdev *cxlmd, u64 dpa) .dpa = dpa, }; port = cxlmd->endpoint; - if (port && is_cxl_endpoint(port) && port->commit_end != -1) + if (port && is_cxl_endpoint(port) && cxl_num_decoders_committed(port)) device_for_each_child(&port->dev, &ctx, __cxl_dpa_to_region); return ctx.cxlr; @@ -556,21 +545,11 @@ void clear_exclusive_cxl_commands(struct cxl_memdev_state *mds, } EXPORT_SYMBOL_NS_GPL(clear_exclusive_cxl_commands, CXL); -static void cxl_memdev_security_shutdown(struct device *dev) -{ - struct cxl_memdev *cxlmd = to_cxl_memdev(dev); - struct cxl_memdev_state *mds = to_cxl_memdev_state(cxlmd->cxlds); - - if (mds->security.poll) - cancel_delayed_work_sync(&mds->security.poll_dwork); -} - static void cxl_memdev_shutdown(struct device *dev) { struct cxl_memdev *cxlmd = to_cxl_memdev(dev); down_write(&cxl_memdev_rwsem); - cxl_memdev_security_shutdown(dev); cxlmd->cxlds = NULL; up_write(&cxl_memdev_rwsem); } @@ -580,8 +559,8 @@ static void cxl_memdev_unregister(void *_cxlmd) struct cxl_memdev *cxlmd = _cxlmd; struct device *dev = &cxlmd->dev; - cxl_memdev_shutdown(dev); cdev_device_del(&cxlmd->cdev, dev); + cxl_memdev_shutdown(dev); put_device(dev); } @@ -961,17 +940,16 @@ static const struct fw_upload_ops cxl_memdev_fw_ops = { .cleanup = cxl_fw_cleanup, }; -static void devm_cxl_remove_fw_upload(void *fwl) +static void cxl_remove_fw_upload(void *fwl) { firmware_upload_unregister(fwl); } -int cxl_memdev_setup_fw_upload(struct cxl_memdev_state *mds) +int devm_cxl_setup_fw_upload(struct device *host, struct cxl_memdev_state *mds) { struct cxl_dev_state *cxlds = &mds->cxlds; struct device *dev = &cxlds->cxlmd->dev; struct fw_upload *fwl; - int rc; if (!test_bit(CXL_MEM_COMMAND_ID_GET_FW_INFO, mds->enabled_cmds)) return 0; @@ -979,19 +957,10 @@ int cxl_memdev_setup_fw_upload(struct cxl_memdev_state *mds) fwl = firmware_upload_register(THIS_MODULE, dev, dev_name(dev), &cxl_memdev_fw_ops, mds); if (IS_ERR(fwl)) - return dev_err_probe(dev, PTR_ERR(fwl), - "Failed to register firmware loader\n"); - - rc = devm_add_action_or_reset(cxlds->dev, devm_cxl_remove_fw_upload, - fwl); - if (rc) - dev_err(dev, - "Failed to add firmware loader remove action: %d\n", - rc); - - return rc; + return PTR_ERR(fwl); + return devm_add_action_or_reset(host, cxl_remove_fw_upload, fwl); } -EXPORT_SYMBOL_NS_GPL(cxl_memdev_setup_fw_upload, CXL); +EXPORT_SYMBOL_NS_GPL(devm_cxl_setup_fw_upload, CXL); static const struct file_operations cxl_memdev_fops = { .owner = THIS_MODULE, @@ -1002,36 +971,8 @@ static const struct file_operations cxl_memdev_fops = { .llseek = noop_llseek, }; -static void put_sanitize(void *data) -{ - struct cxl_memdev_state *mds = data; - - sysfs_put(mds->security.sanitize_node); -} - -static int cxl_memdev_security_init(struct cxl_memdev *cxlmd) -{ - struct cxl_dev_state *cxlds = cxlmd->cxlds; - struct cxl_memdev_state *mds = to_cxl_memdev_state(cxlds); - struct device *dev = &cxlmd->dev; - struct kernfs_node *sec; - - sec = sysfs_get_dirent(dev->kobj.sd, "security"); - if (!sec) { - dev_err(dev, "sysfs_get_dirent 'security' failed\n"); - return -ENODEV; - } - mds->security.sanitize_node = sysfs_get_dirent(sec, "state"); - sysfs_put(sec); - if (!mds->security.sanitize_node) { - dev_err(dev, "sysfs_get_dirent 'state' failed\n"); - return -ENODEV; - } - - return devm_add_action_or_reset(cxlds->dev, put_sanitize, mds); - } - -struct cxl_memdev *devm_cxl_add_memdev(struct cxl_dev_state *cxlds) +struct cxl_memdev *devm_cxl_add_memdev(struct device *host, + struct cxl_dev_state *cxlds) { struct cxl_memdev *cxlmd; struct device *dev; @@ -1059,11 +1000,7 @@ struct cxl_memdev *devm_cxl_add_memdev(struct cxl_dev_state *cxlds) if (rc) goto err; - rc = cxl_memdev_security_init(cxlmd); - if (rc) - goto err; - - rc = devm_add_action_or_reset(cxlds->dev, cxl_memdev_unregister, cxlmd); + rc = devm_add_action_or_reset(host, cxl_memdev_unregister, cxlmd); if (rc) return ERR_PTR(rc); return cxlmd; @@ -1079,6 +1016,50 @@ err: } EXPORT_SYMBOL_NS_GPL(devm_cxl_add_memdev, CXL); +static void sanitize_teardown_notifier(void *data) +{ + struct cxl_memdev_state *mds = data; + struct kernfs_node *state; + + /* + * Prevent new irq triggered invocations of the workqueue and + * flush inflight invocations. + */ + mutex_lock(&mds->mbox_mutex); + state = mds->security.sanitize_node; + mds->security.sanitize_node = NULL; + mutex_unlock(&mds->mbox_mutex); + + cancel_delayed_work_sync(&mds->security.poll_dwork); + sysfs_put(state); +} + +int devm_cxl_sanitize_setup_notifier(struct device *host, + struct cxl_memdev *cxlmd) +{ + struct cxl_dev_state *cxlds = cxlmd->cxlds; + struct cxl_memdev_state *mds = to_cxl_memdev_state(cxlds); + struct kernfs_node *sec; + + if (!test_bit(CXL_SEC_ENABLED_SANITIZE, mds->security.enabled_cmds)) + return 0; + + /* + * Note, the expectation is that @cxlmd would have failed to be + * created if these sysfs_get_dirent calls fail. + */ + sec = sysfs_get_dirent(cxlmd->dev.kobj.sd, "security"); + if (!sec) + return -ENOENT; + mds->security.sanitize_node = sysfs_get_dirent(sec, "state"); + sysfs_put(sec); + if (!mds->security.sanitize_node) + return -ENOENT; + + return devm_add_action_or_reset(host, sanitize_teardown_notifier, mds); +} +EXPORT_SYMBOL_NS_GPL(devm_cxl_sanitize_setup_notifier, CXL); + __init int cxl_memdev_init(void) { dev_t devt; diff --git a/drivers/cxl/core/pci.c b/drivers/cxl/core/pci.c index c7a7887ebdcf..eff20e83d0a6 100644 --- a/drivers/cxl/core/pci.c +++ b/drivers/cxl/core/pci.c @@ -5,6 +5,7 @@ #include <linux/delay.h> #include <linux/pci.h> #include <linux/pci-doe.h> +#include <linux/aer.h> #include <cxlpci.h> #include <cxlmem.h> #include <cxl.h> @@ -595,6 +596,16 @@ static int cxl_cdat_read_table(struct device *dev, return 0; } +static unsigned char cdat_checksum(void *buf, size_t size) +{ + unsigned char sum, *data = buf; + size_t i; + + for (sum = 0, i = 0; i < size; i++) + sum += data[i]; + return sum; +} + /** * read_cdat_data - Read the CDAT data on this port * @port: Port to read data from @@ -603,18 +614,30 @@ static int cxl_cdat_read_table(struct device *dev, */ void read_cdat_data(struct cxl_port *port) { - struct cxl_memdev *cxlmd = to_cxl_memdev(port->uport_dev); - struct device *host = cxlmd->dev.parent; + struct device *uport = port->uport_dev; struct device *dev = &port->dev; struct pci_doe_mb *cdat_doe; + struct pci_dev *pdev = NULL; + struct cxl_memdev *cxlmd; size_t cdat_length; void *cdat_table; int rc; - if (!dev_is_pci(host)) + if (is_cxl_memdev(uport)) { + struct device *host; + + cxlmd = to_cxl_memdev(uport); + host = cxlmd->dev.parent; + if (dev_is_pci(host)) + pdev = to_pci_dev(host); + } else if (dev_is_pci(uport)) { + pdev = to_pci_dev(uport); + } + + if (!pdev) return; - cdat_doe = pci_find_doe_mailbox(to_pci_dev(host), - PCI_DVSEC_VENDOR_ID_CXL, + + cdat_doe = pci_find_doe_mailbox(pdev, PCI_DVSEC_VENDOR_ID_CXL, CXL_DOE_PROTOCOL_TABLE_ACCESS); if (!cdat_doe) { dev_dbg(dev, "No CDAT mailbox\n"); @@ -634,44 +657,54 @@ void read_cdat_data(struct cxl_port *port) return; rc = cxl_cdat_read_table(dev, cdat_doe, cdat_table, &cdat_length); - if (rc) { - /* Don't leave table data allocated on error */ - devm_kfree(dev, cdat_table); - dev_err(dev, "CDAT data read error\n"); - return; - } + if (rc) + goto err; + + cdat_table = cdat_table + sizeof(__le32); + if (cdat_checksum(cdat_table, cdat_length)) + goto err; - port->cdat.table = cdat_table + sizeof(__le32); + port->cdat.table = cdat_table; port->cdat.length = cdat_length; + return; + +err: + /* Don't leave table data allocated on error */ + devm_kfree(dev, cdat_table); + dev_err(dev, "Failed to read/validate CDAT.\n"); } EXPORT_SYMBOL_NS_GPL(read_cdat_data, CXL); -void cxl_cor_error_detected(struct pci_dev *pdev) +static void __cxl_handle_cor_ras(struct cxl_dev_state *cxlds, + void __iomem *ras_base) { - struct cxl_dev_state *cxlds = pci_get_drvdata(pdev); void __iomem *addr; u32 status; - if (!cxlds->regs.ras) + if (!ras_base) return; - addr = cxlds->regs.ras + CXL_RAS_CORRECTABLE_STATUS_OFFSET; + addr = ras_base + CXL_RAS_CORRECTABLE_STATUS_OFFSET; status = readl(addr); if (status & CXL_RAS_CORRECTABLE_STATUS_MASK) { writel(status & CXL_RAS_CORRECTABLE_STATUS_MASK, addr); trace_cxl_aer_correctable_error(cxlds->cxlmd, status); } } -EXPORT_SYMBOL_NS_GPL(cxl_cor_error_detected, CXL); + +static void cxl_handle_endpoint_cor_ras(struct cxl_dev_state *cxlds) +{ + return __cxl_handle_cor_ras(cxlds, cxlds->regs.ras); +} /* CXL spec rev3.0 8.2.4.16.1 */ -static void header_log_copy(struct cxl_dev_state *cxlds, u32 *log) +static void header_log_copy(void __iomem *ras_base, u32 *log) { void __iomem *addr; u32 *log_addr; int i, log_u32_size = CXL_HEADERLOG_SIZE / sizeof(u32); - addr = cxlds->regs.ras + CXL_RAS_HEADER_LOG_OFFSET; + addr = ras_base + CXL_RAS_HEADER_LOG_OFFSET; log_addr = log; for (i = 0; i < log_u32_size; i++) { @@ -685,17 +718,18 @@ static void header_log_copy(struct cxl_dev_state *cxlds, u32 *log) * Log the state of the RAS status registers and prepare them to log the * next error status. Return 1 if reset needed. */ -static bool cxl_report_and_clear(struct cxl_dev_state *cxlds) +static bool __cxl_handle_ras(struct cxl_dev_state *cxlds, + void __iomem *ras_base) { u32 hl[CXL_HEADERLOG_SIZE_U32]; void __iomem *addr; u32 status; u32 fe; - if (!cxlds->regs.ras) + if (!ras_base) return false; - addr = cxlds->regs.ras + CXL_RAS_UNCORRECTABLE_STATUS_OFFSET; + addr = ras_base + CXL_RAS_UNCORRECTABLE_STATUS_OFFSET; status = readl(addr); if (!(status & CXL_RAS_UNCORRECTABLE_STATUS_MASK)) return false; @@ -703,7 +737,7 @@ static bool cxl_report_and_clear(struct cxl_dev_state *cxlds) /* If multiple errors, log header points to first error from ctrl reg */ if (hweight32(status) > 1) { void __iomem *rcc_addr = - cxlds->regs.ras + CXL_RAS_CAP_CONTROL_OFFSET; + ras_base + CXL_RAS_CAP_CONTROL_OFFSET; fe = BIT(FIELD_GET(CXL_RAS_CAP_CONTROL_FE_MASK, readl(rcc_addr))); @@ -711,13 +745,201 @@ static bool cxl_report_and_clear(struct cxl_dev_state *cxlds) fe = status; } - header_log_copy(cxlds, hl); + header_log_copy(ras_base, hl); trace_cxl_aer_uncorrectable_error(cxlds->cxlmd, status, fe, hl); writel(status & CXL_RAS_UNCORRECTABLE_STATUS_MASK, addr); return true; } +static bool cxl_handle_endpoint_ras(struct cxl_dev_state *cxlds) +{ + return __cxl_handle_ras(cxlds, cxlds->regs.ras); +} + +#ifdef CONFIG_PCIEAER_CXL + +static void cxl_dport_map_rch_aer(struct cxl_dport *dport) +{ + struct cxl_rcrb_info *ri = &dport->rcrb; + void __iomem *dport_aer = NULL; + resource_size_t aer_phys; + struct device *host; + + if (dport->rch && ri->aer_cap) { + host = dport->reg_map.host; + aer_phys = ri->aer_cap + ri->base; + dport_aer = devm_cxl_iomap_block(host, aer_phys, + sizeof(struct aer_capability_regs)); + } + + dport->regs.dport_aer = dport_aer; +} + +static void cxl_dport_map_regs(struct cxl_dport *dport) +{ + struct cxl_register_map *map = &dport->reg_map; + struct device *dev = dport->dport_dev; + + if (!map->component_map.ras.valid) + dev_dbg(dev, "RAS registers not found\n"); + else if (cxl_map_component_regs(map, &dport->regs.component, + BIT(CXL_CM_CAP_CAP_ID_RAS))) + dev_dbg(dev, "Failed to map RAS capability.\n"); + + if (dport->rch) + cxl_dport_map_rch_aer(dport); +} + +static void cxl_disable_rch_root_ints(struct cxl_dport *dport) +{ + void __iomem *aer_base = dport->regs.dport_aer; + struct pci_host_bridge *bridge; + u32 aer_cmd_mask, aer_cmd; + + if (!aer_base) + return; + + bridge = to_pci_host_bridge(dport->dport_dev); + + /* + * Disable RCH root port command interrupts. + * CXL 3.0 12.2.1.1 - RCH Downstream Port-detected Errors + * + * This sequence may not be necessary. CXL spec states disabling + * the root cmd register's interrupts is required. But, PCI spec + * shows these are disabled by default on reset. + */ + if (bridge->native_aer) { + aer_cmd_mask = (PCI_ERR_ROOT_CMD_COR_EN | + PCI_ERR_ROOT_CMD_NONFATAL_EN | + PCI_ERR_ROOT_CMD_FATAL_EN); + aer_cmd = readl(aer_base + PCI_ERR_ROOT_COMMAND); + aer_cmd &= ~aer_cmd_mask; + writel(aer_cmd, aer_base + PCI_ERR_ROOT_COMMAND); + } +} + +void cxl_setup_parent_dport(struct device *host, struct cxl_dport *dport) +{ + struct device *dport_dev = dport->dport_dev; + struct pci_host_bridge *host_bridge; + + host_bridge = to_pci_host_bridge(dport_dev); + if (host_bridge->native_aer) + dport->rcrb.aer_cap = cxl_rcrb_to_aer(dport_dev, dport->rcrb.base); + + dport->reg_map.host = host; + cxl_dport_map_regs(dport); + + if (dport->rch) + cxl_disable_rch_root_ints(dport); +} +EXPORT_SYMBOL_NS_GPL(cxl_setup_parent_dport, CXL); + +static void cxl_handle_rdport_cor_ras(struct cxl_dev_state *cxlds, + struct cxl_dport *dport) +{ + return __cxl_handle_cor_ras(cxlds, dport->regs.ras); +} + +static bool cxl_handle_rdport_ras(struct cxl_dev_state *cxlds, + struct cxl_dport *dport) +{ + return __cxl_handle_ras(cxlds, dport->regs.ras); +} + +/* + * Copy the AER capability registers using 32 bit read accesses. + * This is necessary because RCRB AER capability is MMIO mapped. Clear the + * status after copying. + * + * @aer_base: base address of AER capability block in RCRB + * @aer_regs: destination for copying AER capability + */ +static bool cxl_rch_get_aer_info(void __iomem *aer_base, + struct aer_capability_regs *aer_regs) +{ + int read_cnt = sizeof(struct aer_capability_regs) / sizeof(u32); + u32 *aer_regs_buf = (u32 *)aer_regs; + int n; + + if (!aer_base) + return false; + + /* Use readl() to guarantee 32-bit accesses */ + for (n = 0; n < read_cnt; n++) + aer_regs_buf[n] = readl(aer_base + n * sizeof(u32)); + + writel(aer_regs->uncor_status, aer_base + PCI_ERR_UNCOR_STATUS); + writel(aer_regs->cor_status, aer_base + PCI_ERR_COR_STATUS); + + return true; +} + +/* Get AER severity. Return false if there is no error. */ +static bool cxl_rch_get_aer_severity(struct aer_capability_regs *aer_regs, + int *severity) +{ + if (aer_regs->uncor_status & ~aer_regs->uncor_mask) { + if (aer_regs->uncor_status & PCI_ERR_ROOT_FATAL_RCV) + *severity = AER_FATAL; + else + *severity = AER_NONFATAL; + return true; + } + + if (aer_regs->cor_status & ~aer_regs->cor_mask) { + *severity = AER_CORRECTABLE; + return true; + } + + return false; +} + +static void cxl_handle_rdport_errors(struct cxl_dev_state *cxlds) +{ + struct pci_dev *pdev = to_pci_dev(cxlds->dev); + struct aer_capability_regs aer_regs; + struct cxl_dport *dport; + struct cxl_port *port; + int severity; + + port = cxl_pci_find_port(pdev, &dport); + if (!port) + return; + + put_device(&port->dev); + + if (!cxl_rch_get_aer_info(dport->regs.dport_aer, &aer_regs)) + return; + + if (!cxl_rch_get_aer_severity(&aer_regs, &severity)) + return; + + pci_print_aer(pdev, severity, &aer_regs); + + if (severity == AER_CORRECTABLE) + cxl_handle_rdport_cor_ras(cxlds, dport); + else + cxl_handle_rdport_ras(cxlds, dport); +} + +#else +static void cxl_handle_rdport_errors(struct cxl_dev_state *cxlds) { } +#endif + +void cxl_cor_error_detected(struct pci_dev *pdev) +{ + struct cxl_dev_state *cxlds = pci_get_drvdata(pdev); + + if (cxlds->rcd) + cxl_handle_rdport_errors(cxlds); + + cxl_handle_endpoint_cor_ras(cxlds); +} +EXPORT_SYMBOL_NS_GPL(cxl_cor_error_detected, CXL); + pci_ers_result_t cxl_error_detected(struct pci_dev *pdev, pci_channel_state_t state) { @@ -726,13 +948,16 @@ pci_ers_result_t cxl_error_detected(struct pci_dev *pdev, struct device *dev = &cxlmd->dev; bool ue; + if (cxlds->rcd) + cxl_handle_rdport_errors(cxlds); + /* * A frozen channel indicates an impending reset which is fatal to * CXL.mem operation, and will likely crash the system. On the off * chance the situation is recoverable dump the status of the RAS * capability registers and bounce the active state of the memdev. */ - ue = cxl_report_and_clear(cxlds); + ue = cxl_handle_endpoint_ras(cxlds); switch (state) { case pci_channel_io_normal: diff --git a/drivers/cxl/core/port.c b/drivers/cxl/core/port.c index 7ca01a834e18..38441634e4c6 100644 --- a/drivers/cxl/core/port.c +++ b/drivers/cxl/core/port.c @@ -28,9 +28,22 @@ * instantiated by the core. */ +/* + * All changes to the interleave configuration occur with this lock held + * for write. + */ +DECLARE_RWSEM(cxl_region_rwsem); + static DEFINE_IDA(cxl_port_ida); static DEFINE_XARRAY(cxl_root_buses); +int cxl_num_decoders_committed(struct cxl_port *port) +{ + lockdep_assert_held(&cxl_region_rwsem); + + return port->commit_end + 1; +} + static ssize_t devtype_show(struct device *dev, struct device_attribute *attr, char *buf) { @@ -278,6 +291,15 @@ static ssize_t interleave_ways_show(struct device *dev, static DEVICE_ATTR_RO(interleave_ways); +static ssize_t qos_class_show(struct device *dev, + struct device_attribute *attr, char *buf) +{ + struct cxl_root_decoder *cxlrd = to_cxl_root_decoder(dev); + + return sysfs_emit(buf, "%d\n", cxlrd->qos_class); +} +static DEVICE_ATTR_RO(qos_class); + static struct attribute *cxl_decoder_base_attrs[] = { &dev_attr_start.attr, &dev_attr_size.attr, @@ -297,6 +319,7 @@ static struct attribute *cxl_decoder_root_attrs[] = { &dev_attr_cap_type2.attr, &dev_attr_cap_type3.attr, &dev_attr_target_list.attr, + &dev_attr_qos_class.attr, SET_CXL_REGION_ATTR(create_pmem_region) SET_CXL_REGION_ATTR(create_ram_region) SET_CXL_REGION_ATTR(delete_region) @@ -521,8 +544,33 @@ static void cxl_port_release(struct device *dev) kfree(port); } +static ssize_t decoders_committed_show(struct device *dev, + struct device_attribute *attr, char *buf) +{ + struct cxl_port *port = to_cxl_port(dev); + int rc; + + down_read(&cxl_region_rwsem); + rc = sysfs_emit(buf, "%d\n", cxl_num_decoders_committed(port)); + up_read(&cxl_region_rwsem); + + return rc; +} + +static DEVICE_ATTR_RO(decoders_committed); + +static struct attribute *cxl_port_attrs[] = { + &dev_attr_decoders_committed.attr, + NULL, +}; + +static struct attribute_group cxl_port_attribute_group = { + .attrs = cxl_port_attrs, +}; + static const struct attribute_group *cxl_port_attribute_groups[] = { &cxl_base_attribute_group, + &cxl_port_attribute_group, NULL, }; @@ -619,7 +667,6 @@ static int devm_cxl_link_parent_dport(struct device *host, static struct lock_class_key cxl_port_key; static struct cxl_port *cxl_port_alloc(struct device *uport_dev, - resource_size_t component_reg_phys, struct cxl_dport *parent_dport) { struct cxl_port *port; @@ -670,7 +717,6 @@ static struct cxl_port *cxl_port_alloc(struct device *uport_dev, } else dev->parent = uport_dev; - port->component_reg_phys = component_reg_phys; ida_init(&port->decoder_ida); port->hdm_end = -1; port->commit_end = -1; @@ -691,19 +737,21 @@ err: return ERR_PTR(rc); } -static int cxl_setup_comp_regs(struct device *dev, struct cxl_register_map *map, +static int cxl_setup_comp_regs(struct device *host, struct cxl_register_map *map, resource_size_t component_reg_phys) { - if (component_reg_phys == CXL_RESOURCE_NONE) - return 0; - *map = (struct cxl_register_map) { - .dev = dev, - .reg_type = CXL_REGLOC_RBI_COMPONENT, + .host = host, + .reg_type = CXL_REGLOC_RBI_EMPTY, .resource = component_reg_phys, - .max_size = CXL_COMPONENT_REG_BLOCK_SIZE, }; + if (component_reg_phys == CXL_RESOURCE_NONE) + return 0; + + map->reg_type = CXL_REGLOC_RBI_COMPONENT; + map->max_size = CXL_COMPONENT_REG_BLOCK_SIZE; + return cxl_setup_regs(map); } @@ -712,17 +760,27 @@ static int cxl_port_setup_regs(struct cxl_port *port, { if (dev_is_platform(port->uport_dev)) return 0; - return cxl_setup_comp_regs(&port->dev, &port->comp_map, + return cxl_setup_comp_regs(&port->dev, &port->reg_map, component_reg_phys); } -static int cxl_dport_setup_regs(struct cxl_dport *dport, +static int cxl_dport_setup_regs(struct device *host, struct cxl_dport *dport, resource_size_t component_reg_phys) { + int rc; + if (dev_is_platform(dport->dport_dev)) return 0; - return cxl_setup_comp_regs(dport->dport_dev, &dport->comp_map, - component_reg_phys); + + /* + * use @dport->dport_dev for the context for error messages during + * register probing, and fixup @host after the fact, since @host may be + * NULL. + */ + rc = cxl_setup_comp_regs(dport->dport_dev, &dport->reg_map, + component_reg_phys); + dport->reg_map.host = host; + return rc; } static struct cxl_port *__devm_cxl_add_port(struct device *host, @@ -734,21 +792,36 @@ static struct cxl_port *__devm_cxl_add_port(struct device *host, struct device *dev; int rc; - port = cxl_port_alloc(uport_dev, component_reg_phys, parent_dport); + port = cxl_port_alloc(uport_dev, parent_dport); if (IS_ERR(port)) return port; dev = &port->dev; - if (is_cxl_memdev(uport_dev)) + if (is_cxl_memdev(uport_dev)) { + struct cxl_memdev *cxlmd = to_cxl_memdev(uport_dev); + struct cxl_dev_state *cxlds = cxlmd->cxlds; + rc = dev_set_name(dev, "endpoint%d", port->id); - else if (parent_dport) + if (rc) + goto err; + + /* + * The endpoint driver already enumerated the component and RAS + * registers. Reuse that enumeration while prepping them to be + * mapped by the cxl_port driver. + */ + port->reg_map = cxlds->reg_map; + port->reg_map.host = &port->dev; + } else if (parent_dport) { rc = dev_set_name(dev, "port%d", port->id); - else - rc = dev_set_name(dev, "root%d", port->id); - if (rc) - goto err; + if (rc) + goto err; - rc = cxl_port_setup_regs(port, component_reg_phys); + rc = cxl_port_setup_regs(port, component_reg_phys); + if (rc) + goto err; + } else + rc = dev_set_name(dev, "root%d", port->id); if (rc) goto err; @@ -983,7 +1056,16 @@ __devm_cxl_add_dport(struct cxl_port *port, struct device *dport_dev, if (!dport) return ERR_PTR(-ENOMEM); - if (rcrb != CXL_RESOURCE_NONE) { + dport->dport_dev = dport_dev; + dport->port_id = port_id; + dport->port = port; + + if (rcrb == CXL_RESOURCE_NONE) { + rc = cxl_dport_setup_regs(&port->dev, dport, + component_reg_phys); + if (rc) + return ERR_PTR(rc); + } else { dport->rcrb.base = rcrb; component_reg_phys = __rcrb_to_component(dport_dev, &dport->rcrb, CXL_RCRB_DOWNSTREAM); @@ -992,6 +1074,14 @@ __devm_cxl_add_dport(struct cxl_port *port, struct device *dport_dev, return ERR_PTR(-ENXIO); } + /* + * RCH @dport is not ready to map until associated with its + * memdev + */ + rc = cxl_dport_setup_regs(NULL, dport, component_reg_phys); + if (rc) + return ERR_PTR(rc); + dport->rch = true; } @@ -999,14 +1089,6 @@ __devm_cxl_add_dport(struct cxl_port *port, struct device *dport_dev, dev_dbg(dport_dev, "Component Registers found for dport: %pa\n", &component_reg_phys); - dport->dport_dev = dport_dev; - dport->port_id = port_id; - dport->port = port; - - rc = cxl_dport_setup_regs(dport, component_reg_phys); - if (rc) - return ERR_PTR(rc); - cond_cxl_root_lock(port); rc = add_dport(port, dport); cond_cxl_root_unlock(port); @@ -1217,35 +1299,39 @@ static struct device *grandparent(struct device *dev) return NULL; } +static struct device *endpoint_host(struct cxl_port *endpoint) +{ + struct cxl_port *port = to_cxl_port(endpoint->dev.parent); + + if (is_cxl_root(port)) + return port->uport_dev; + return &port->dev; +} + static void delete_endpoint(void *data) { struct cxl_memdev *cxlmd = data; struct cxl_port *endpoint = cxlmd->endpoint; - struct cxl_port *parent_port; - struct device *parent; + struct device *host = endpoint_host(endpoint); - parent_port = cxl_mem_find_port(cxlmd, NULL); - if (!parent_port) - goto out; - parent = &parent_port->dev; - - device_lock(parent); - if (parent->driver && !endpoint->dead) { - devm_release_action(parent, cxl_unlink_parent_dport, endpoint); - devm_release_action(parent, cxl_unlink_uport, endpoint); - devm_release_action(parent, unregister_port, endpoint); + device_lock(host); + if (host->driver && !endpoint->dead) { + devm_release_action(host, cxl_unlink_parent_dport, endpoint); + devm_release_action(host, cxl_unlink_uport, endpoint); + devm_release_action(host, unregister_port, endpoint); } cxlmd->endpoint = NULL; - device_unlock(parent); - put_device(parent); -out: + device_unlock(host); put_device(&endpoint->dev); + put_device(host); } int cxl_endpoint_autoremove(struct cxl_memdev *cxlmd, struct cxl_port *endpoint) { + struct device *host = endpoint_host(endpoint); struct device *dev = &cxlmd->dev; + get_device(host); get_device(&endpoint->dev); cxlmd->endpoint = endpoint; cxlmd->depth = endpoint->depth; @@ -1468,7 +1554,11 @@ retry: struct cxl_dport *dport; struct cxl_port *port; - if (!dport_dev) + /* + * The terminal "grandparent" in PCI is NULL and @platform_bus + * for platform devices + */ + if (!dport_dev || dport_dev == &platform_bus) return 0; uport_dev = dport_dev->parent; @@ -1691,6 +1781,7 @@ struct cxl_root_decoder *cxl_root_decoder_alloc(struct cxl_port *port, } atomic_set(&cxlrd->region_id, rc); + cxlrd->qos_class = CXL_QOS_CLASS_INVALID; return cxlrd; } EXPORT_SYMBOL_NS_GPL(cxl_root_decoder_alloc, CXL); @@ -2062,3 +2153,4 @@ static void cxl_core_exit(void) subsys_initcall(cxl_core_init); module_exit(cxl_core_exit); MODULE_LICENSE("GPL v2"); +MODULE_IMPORT_NS(CXL); diff --git a/drivers/cxl/core/region.c b/drivers/cxl/core/region.c index 6d63b8798c29..56e575c79bb4 100644 --- a/drivers/cxl/core/region.c +++ b/drivers/cxl/core/region.c @@ -28,12 +28,6 @@ * 3. Decoder targets */ -/* - * All changes to the interleave configuration occur with this lock held - * for write. - */ -static DECLARE_RWSEM(cxl_region_rwsem); - static struct cxl_region *to_cxl_region(struct device *dev); static ssize_t uuid_show(struct device *dev, struct device_attribute *attr, @@ -129,7 +123,7 @@ static int cxl_region_invalidate_memregion(struct cxl_region *cxlr) { if (!cpu_cache_has_invalidate_memregion()) { if (IS_ENABLED(CONFIG_CXL_REGION_INVALIDATION_TEST)) { - dev_warn_once( + dev_info_once( &cxlr->dev, "Bypassing cpu_cache_invalidate_memregion() for testing!\n"); return 0; @@ -294,7 +288,7 @@ static ssize_t commit_store(struct device *dev, struct device_attribute *attr, */ rc = cxl_region_invalidate_memregion(cxlr); if (rc) - return rc; + goto out; if (commit) { rc = cxl_region_decode_commit(cxlr); @@ -1133,7 +1127,14 @@ static int cxl_port_setup_targets(struct cxl_port *port, } if (is_cxl_root(parent_port)) { - parent_ig = cxlrd->cxlsd.cxld.interleave_granularity; + /* + * Root decoder IG is always set to value in CFMWS which + * may be different than this region's IG. We can use the + * region's IG here since interleave_granularity_store() + * does not allow interleaved host-bridges with + * root IG != region IG. + */ + parent_ig = p->interleave_granularity; parent_iw = cxlrd->cxlsd.cxld.interleave_ways; /* * For purposes of address bit routing, use power-of-2 math for @@ -1195,6 +1196,14 @@ static int cxl_port_setup_targets(struct cxl_port *port, return rc; } + if (iw > 8 || iw > cxlsd->nr_targets) { + dev_dbg(&cxlr->dev, + "%s:%s:%s: ways: %d overflows targets: %d\n", + dev_name(port->uport_dev), dev_name(&port->dev), + dev_name(&cxld->dev), iw, cxlsd->nr_targets); + return -ENXIO; + } + if (test_bit(CXL_REGION_F_AUTO, &cxlr->flags)) { if (cxld->interleave_ways != iw || cxld->interleave_granularity != ig || @@ -1480,6 +1489,14 @@ static int cxl_region_attach_auto(struct cxl_region *cxlr, return 0; } +static int cmp_interleave_pos(const void *a, const void *b) +{ + struct cxl_endpoint_decoder *cxled_a = *(typeof(cxled_a) *)a; + struct cxl_endpoint_decoder *cxled_b = *(typeof(cxled_b) *)b; + + return cxled_a->pos - cxled_b->pos; +} + static struct cxl_port *next_port(struct cxl_port *port) { if (!port->parent_dport) @@ -1487,119 +1504,127 @@ static struct cxl_port *next_port(struct cxl_port *port) return port->parent_dport->port; } -static int decoder_match_range(struct device *dev, void *data) +static int match_switch_decoder_by_range(struct device *dev, void *data) { - struct cxl_endpoint_decoder *cxled = data; struct cxl_switch_decoder *cxlsd; + struct range *r1, *r2 = data; if (!is_switch_decoder(dev)) return 0; cxlsd = to_cxl_switch_decoder(dev); - return range_contains(&cxlsd->cxld.hpa_range, &cxled->cxld.hpa_range); -} - -static void find_positions(const struct cxl_switch_decoder *cxlsd, - const struct cxl_port *iter_a, - const struct cxl_port *iter_b, int *a_pos, - int *b_pos) -{ - int i; + r1 = &cxlsd->cxld.hpa_range; - for (i = 0, *a_pos = -1, *b_pos = -1; i < cxlsd->nr_targets; i++) { - if (cxlsd->target[i] == iter_a->parent_dport) - *a_pos = i; - else if (cxlsd->target[i] == iter_b->parent_dport) - *b_pos = i; - if (*a_pos >= 0 && *b_pos >= 0) - break; - } + if (is_root_decoder(dev)) + return range_contains(r1, r2); + return (r1->start == r2->start && r1->end == r2->end); } -static int cmp_decode_pos(const void *a, const void *b) +static int find_pos_and_ways(struct cxl_port *port, struct range *range, + int *pos, int *ways) { - struct cxl_endpoint_decoder *cxled_a = *(typeof(cxled_a) *)a; - struct cxl_endpoint_decoder *cxled_b = *(typeof(cxled_b) *)b; - struct cxl_memdev *cxlmd_a = cxled_to_memdev(cxled_a); - struct cxl_memdev *cxlmd_b = cxled_to_memdev(cxled_b); - struct cxl_port *port_a = cxled_to_port(cxled_a); - struct cxl_port *port_b = cxled_to_port(cxled_b); - struct cxl_port *iter_a, *iter_b, *port = NULL; struct cxl_switch_decoder *cxlsd; + struct cxl_port *parent; struct device *dev; - int a_pos, b_pos; - unsigned int seq; - - /* Exit early if any prior sorting failed */ - if (cxled_a->pos < 0 || cxled_b->pos < 0) - return 0; + int rc = -ENXIO; - /* - * Walk up the hierarchy to find a shared port, find the decoder that - * maps the range, compare the relative position of those dport - * mappings. - */ - for (iter_a = port_a; iter_a; iter_a = next_port(iter_a)) { - struct cxl_port *next_a, *next_b; + parent = next_port(port); + if (!parent) + return rc; - next_a = next_port(iter_a); - if (!next_a) - break; + dev = device_find_child(&parent->dev, range, + match_switch_decoder_by_range); + if (!dev) { + dev_err(port->uport_dev, + "failed to find decoder mapping %#llx-%#llx\n", + range->start, range->end); + return rc; + } + cxlsd = to_cxl_switch_decoder(dev); + *ways = cxlsd->cxld.interleave_ways; - for (iter_b = port_b; iter_b; iter_b = next_port(iter_b)) { - next_b = next_port(iter_b); - if (next_a != next_b) - continue; - port = next_a; + for (int i = 0; i < *ways; i++) { + if (cxlsd->target[i] == port->parent_dport) { + *pos = i; + rc = 0; break; } - - if (port) - break; } + put_device(dev); - if (!port) { - dev_err(cxlmd_a->dev.parent, - "failed to find shared port with %s\n", - dev_name(cxlmd_b->dev.parent)); - goto err; - } + return rc; +} - dev = device_find_child(&port->dev, cxled_a, decoder_match_range); - if (!dev) { - struct range *range = &cxled_a->cxld.hpa_range; +/** + * cxl_calc_interleave_pos() - calculate an endpoint position in a region + * @cxled: endpoint decoder member of given region + * + * The endpoint position is calculated by traversing the topology from + * the endpoint to the root decoder and iteratively applying this + * calculation: + * + * position = position * parent_ways + parent_pos; + * + * ...where @position is inferred from switch and root decoder target lists. + * + * Return: position >= 0 on success + * -ENXIO on failure + */ +static int cxl_calc_interleave_pos(struct cxl_endpoint_decoder *cxled) +{ + struct cxl_port *iter, *port = cxled_to_port(cxled); + struct cxl_memdev *cxlmd = cxled_to_memdev(cxled); + struct range *range = &cxled->cxld.hpa_range; + int parent_ways = 0, parent_pos = 0, pos = 0; + int rc; - dev_err(port->uport_dev, - "failed to find decoder that maps %#llx-%#llx\n", - range->start, range->end); - goto err; - } + /* + * Example: the expected interleave order of the 4-way region shown + * below is: mem0, mem2, mem1, mem3 + * + * root_port + * / \ + * host_bridge_0 host_bridge_1 + * | | | | + * mem0 mem1 mem2 mem3 + * + * In the example the calculator will iterate twice. The first iteration + * uses the mem position in the host-bridge and the ways of the host- + * bridge to generate the first, or local, position. The second + * iteration uses the host-bridge position in the root_port and the ways + * of the root_port to refine the position. + * + * A trace of the calculation per endpoint looks like this: + * mem0: pos = 0 * 2 + 0 mem2: pos = 0 * 2 + 0 + * pos = 0 * 2 + 0 pos = 0 * 2 + 1 + * pos: 0 pos: 1 + * + * mem1: pos = 0 * 2 + 1 mem3: pos = 0 * 2 + 1 + * pos = 1 * 2 + 0 pos = 1 * 2 + 1 + * pos: 2 pos = 3 + * + * Note that while this example is simple, the method applies to more + * complex topologies, including those with switches. + */ - cxlsd = to_cxl_switch_decoder(dev); - do { - seq = read_seqbegin(&cxlsd->target_lock); - find_positions(cxlsd, iter_a, iter_b, &a_pos, &b_pos); - } while (read_seqretry(&cxlsd->target_lock, seq)); + /* Iterate from endpoint to root_port refining the position */ + for (iter = port; iter; iter = next_port(iter)) { + if (is_cxl_root(iter)) + break; - put_device(dev); + rc = find_pos_and_ways(iter, range, &parent_pos, &parent_ways); + if (rc) + return rc; - if (a_pos < 0 || b_pos < 0) { - dev_err(port->uport_dev, - "failed to find shared decoder for %s and %s\n", - dev_name(cxlmd_a->dev.parent), - dev_name(cxlmd_b->dev.parent)); - goto err; + pos = pos * parent_ways + parent_pos; } - dev_dbg(port->uport_dev, "%s comes %s %s\n", - dev_name(cxlmd_a->dev.parent), - a_pos - b_pos < 0 ? "before" : "after", - dev_name(cxlmd_b->dev.parent)); + dev_dbg(&cxlmd->dev, + "decoder:%s parent:%s port:%s range:%#llx-%#llx pos:%d\n", + dev_name(&cxled->cxld.dev), dev_name(cxlmd->dev.parent), + dev_name(&port->dev), range->start, range->end, pos); - return a_pos - b_pos; -err: - cxled_a->pos = -1; - return 0; + return pos; } static int cxl_region_sort_targets(struct cxl_region *cxlr) @@ -1607,22 +1632,21 @@ static int cxl_region_sort_targets(struct cxl_region *cxlr) struct cxl_region_params *p = &cxlr->params; int i, rc = 0; - sort(p->targets, p->nr_targets, sizeof(p->targets[0]), cmp_decode_pos, - NULL); - for (i = 0; i < p->nr_targets; i++) { struct cxl_endpoint_decoder *cxled = p->targets[i]; + cxled->pos = cxl_calc_interleave_pos(cxled); /* - * Record that sorting failed, but still continue to restore - * cxled->pos with its ->targets[] position so that follow-on - * code paths can reliably do p->targets[cxled->pos] to - * self-reference their entry. + * Record that sorting failed, but still continue to calc + * cxled->pos so that follow-on code paths can reliably + * do p->targets[cxled->pos] to self-reference their entry. */ if (cxled->pos < 0) rc = -ENXIO; - cxled->pos = i; } + /* Keep the cxlr target list in interleave position order */ + sort(p->targets, p->nr_targets, sizeof(p->targets[0]), + cmp_interleave_pos, NULL); dev_dbg(&cxlr->dev, "region sort %s\n", rc ? "failed" : "successful"); return rc; @@ -1658,6 +1682,12 @@ static int cxl_region_attach(struct cxl_region *cxlr, return -ENXIO; } + if (p->nr_targets >= p->interleave_ways) { + dev_dbg(&cxlr->dev, "region already has %d endpoints\n", + p->nr_targets); + return -EINVAL; + } + ep_port = cxled_to_port(cxled); root_port = cxlrd_to_port(cxlrd); dport = cxl_find_dport_by_dev(root_port, ep_port->host_bridge); @@ -1750,7 +1780,7 @@ static int cxl_region_attach(struct cxl_region *cxlr, if (p->nr_targets == p->interleave_ways) { rc = cxl_region_setup_targets(cxlr); if (rc) - goto err_decrement; + return rc; p->state = CXL_CONFIG_ACTIVE; } @@ -1761,13 +1791,27 @@ static int cxl_region_attach(struct cxl_region *cxlr, .end = p->res->end, }; - return 0; + if (p->nr_targets != p->interleave_ways) + return 0; -err_decrement: - p->nr_targets--; - cxled->pos = -1; - p->targets[pos] = NULL; - return rc; + /* + * Test the auto-discovery position calculator function + * against this successfully created user-defined region. + * A fail message here means that this interleave config + * will fail when presented as CXL_REGION_F_AUTO. + */ + for (int i = 0; i < p->nr_targets; i++) { + struct cxl_endpoint_decoder *cxled = p->targets[i]; + int test_pos; + + test_pos = cxl_calc_interleave_pos(cxled); + dev_dbg(&cxled->cxld.dev, + "Test cxl_calc_interleave_pos(): %s test_pos:%d cxled->pos:%d\n", + (test_pos == cxled->pos) ? "success" : "fail", + test_pos, cxled->pos); + } + + return 0; } static int cxl_region_detach(struct cxl_endpoint_decoder *cxled) @@ -2696,7 +2740,7 @@ err: return rc; } -static int match_decoder_by_range(struct device *dev, void *data) +static int match_root_decoder_by_range(struct device *dev, void *data) { struct range *r1, *r2 = data; struct cxl_root_decoder *cxlrd; @@ -2827,7 +2871,7 @@ int cxl_add_to_region(struct cxl_port *root, struct cxl_endpoint_decoder *cxled) int rc; cxlrd_dev = device_find_child(&root->dev, &cxld->hpa_range, - match_decoder_by_range); + match_root_decoder_by_range); if (!cxlrd_dev) { dev_err(cxlmd->dev.parent, "%s:%s no CXL window for range %#llx:%#llx\n", diff --git a/drivers/cxl/core/regs.c b/drivers/cxl/core/regs.c index 6281127b3e9d..372786f80955 100644 --- a/drivers/cxl/core/regs.c +++ b/drivers/cxl/core/regs.c @@ -204,7 +204,7 @@ int cxl_map_component_regs(const struct cxl_register_map *map, struct cxl_component_regs *regs, unsigned long map_mask) { - struct device *dev = map->dev; + struct device *host = map->host; struct mapinfo { const struct cxl_reg_map *rmap; void __iomem **addr; @@ -216,16 +216,16 @@ int cxl_map_component_regs(const struct cxl_register_map *map, for (i = 0; i < ARRAY_SIZE(mapinfo); i++) { struct mapinfo *mi = &mapinfo[i]; - resource_size_t phys_addr; + resource_size_t addr; resource_size_t length; if (!mi->rmap->valid) continue; if (!test_bit(mi->rmap->id, &map_mask)) continue; - phys_addr = map->resource + mi->rmap->offset; + addr = map->resource + mi->rmap->offset; length = mi->rmap->size; - *(mi->addr) = devm_cxl_iomap_block(dev, phys_addr, length); + *(mi->addr) = devm_cxl_iomap_block(host, addr, length); if (!*(mi->addr)) return -ENOMEM; } @@ -237,7 +237,7 @@ EXPORT_SYMBOL_NS_GPL(cxl_map_component_regs, CXL); int cxl_map_device_regs(const struct cxl_register_map *map, struct cxl_device_regs *regs) { - struct device *dev = map->dev; + struct device *host = map->host; resource_size_t phys_addr = map->resource; struct mapinfo { const struct cxl_reg_map *rmap; @@ -259,7 +259,7 @@ int cxl_map_device_regs(const struct cxl_register_map *map, addr = phys_addr + mi->rmap->offset; length = mi->rmap->size; - *(mi->addr) = devm_cxl_iomap_block(dev, addr, length); + *(mi->addr) = devm_cxl_iomap_block(host, addr, length); if (!*(mi->addr)) return -ENOMEM; } @@ -309,7 +309,7 @@ int cxl_find_regblock_instance(struct pci_dev *pdev, enum cxl_regloc_type type, int regloc, i; *map = (struct cxl_register_map) { - .dev = &pdev->dev, + .host = &pdev->dev, .resource = CXL_RESOURCE_NONE, }; @@ -386,10 +386,9 @@ int cxl_count_regblock(struct pci_dev *pdev, enum cxl_regloc_type type) } EXPORT_SYMBOL_NS_GPL(cxl_count_regblock, CXL); -int cxl_map_pmu_regs(struct pci_dev *pdev, struct cxl_pmu_regs *regs, - struct cxl_register_map *map) +int cxl_map_pmu_regs(struct cxl_register_map *map, struct cxl_pmu_regs *regs) { - struct device *dev = &pdev->dev; + struct device *dev = map->host; resource_size_t phys_addr; phys_addr = map->resource; @@ -403,15 +402,15 @@ EXPORT_SYMBOL_NS_GPL(cxl_map_pmu_regs, CXL); static int cxl_map_regblock(struct cxl_register_map *map) { - struct device *dev = map->dev; + struct device *host = map->host; map->base = ioremap(map->resource, map->max_size); if (!map->base) { - dev_err(dev, "failed to map registers\n"); + dev_err(host, "failed to map registers\n"); return -ENOMEM; } - dev_dbg(dev, "Mapped CXL Memory Device resource %pa\n", &map->resource); + dev_dbg(host, "Mapped CXL Memory Device resource %pa\n", &map->resource); return 0; } @@ -425,28 +424,28 @@ static int cxl_probe_regs(struct cxl_register_map *map) { struct cxl_component_reg_map *comp_map; struct cxl_device_reg_map *dev_map; - struct device *dev = map->dev; + struct device *host = map->host; void __iomem *base = map->base; switch (map->reg_type) { case CXL_REGLOC_RBI_COMPONENT: comp_map = &map->component_map; - cxl_probe_component_regs(dev, base, comp_map); - dev_dbg(dev, "Set up component registers\n"); + cxl_probe_component_regs(host, base, comp_map); + dev_dbg(host, "Set up component registers\n"); break; case CXL_REGLOC_RBI_MEMDEV: dev_map = &map->device_map; - cxl_probe_device_regs(dev, base, dev_map); + cxl_probe_device_regs(host, base, dev_map); if (!dev_map->status.valid || !dev_map->mbox.valid || !dev_map->memdev.valid) { - dev_err(dev, "registers not found: %s%s%s\n", + dev_err(host, "registers not found: %s%s%s\n", !dev_map->status.valid ? "status " : "", !dev_map->mbox.valid ? "mbox " : "", !dev_map->memdev.valid ? "memdev " : ""); return -ENXIO; } - dev_dbg(dev, "Probing device registers...\n"); + dev_dbg(host, "Probing device registers...\n"); break; default: break; @@ -470,6 +469,42 @@ int cxl_setup_regs(struct cxl_register_map *map) } EXPORT_SYMBOL_NS_GPL(cxl_setup_regs, CXL); +u16 cxl_rcrb_to_aer(struct device *dev, resource_size_t rcrb) +{ + void __iomem *addr; + u16 offset = 0; + u32 cap_hdr; + + if (WARN_ON_ONCE(rcrb == CXL_RESOURCE_NONE)) + return 0; + + if (!request_mem_region(rcrb, SZ_4K, dev_name(dev))) + return 0; + + addr = ioremap(rcrb, SZ_4K); + if (!addr) + goto out; + + cap_hdr = readl(addr + offset); + while (PCI_EXT_CAP_ID(cap_hdr) != PCI_EXT_CAP_ID_ERR) { + offset = PCI_EXT_CAP_NEXT(cap_hdr); + + /* Offset 0 terminates capability list. */ + if (!offset) + break; + cap_hdr = readl(addr + offset); + } + + if (offset) + dev_dbg(dev, "found AER extended capability (0x%x)\n", offset); + + iounmap(addr); +out: + release_mem_region(rcrb, SZ_4K); + + return offset; +} + resource_size_t __rcrb_to_component(struct device *dev, struct cxl_rcrb_info *ri, enum cxl_rcrb which) { diff --git a/drivers/cxl/cxl.h b/drivers/cxl/cxl.h index 76d92561af29..687043ece101 100644 --- a/drivers/cxl/cxl.h +++ b/drivers/cxl/cxl.h @@ -221,6 +221,14 @@ struct cxl_regs { struct_group_tagged(cxl_pmu_regs, pmu_regs, void __iomem *pmu; ); + + /* + * RCH downstream port specific RAS register + * @aer: CXL 3.0 8.2.1.1 RCH Downstream Port RCRB + */ + struct_group_tagged(cxl_rch_regs, rch_regs, + void __iomem *dport_aer; + ); }; struct cxl_reg_map { @@ -247,7 +255,7 @@ struct cxl_pmu_reg_map { /** * struct cxl_register_map - DVSEC harvested register block mapping parameters - * @dev: device for devm operations and logging + * @host: device for devm operations and logging * @base: virtual base of the register-block-BAR + @block_offset * @resource: physical resource base of the register block * @max_size: maximum mapping size to perform register search @@ -257,7 +265,7 @@ struct cxl_pmu_reg_map { * @pmu_map: cxl_reg_maps for CXL Performance Monitoring Units */ struct cxl_register_map { - struct device *dev; + struct device *host; void __iomem *base; resource_size_t resource; resource_size_t max_size; @@ -278,8 +286,7 @@ int cxl_map_component_regs(const struct cxl_register_map *map, unsigned long map_mask); int cxl_map_device_regs(const struct cxl_register_map *map, struct cxl_device_regs *regs); -int cxl_map_pmu_regs(struct pci_dev *pdev, struct cxl_pmu_regs *regs, - struct cxl_register_map *map); +int cxl_map_pmu_regs(struct cxl_register_map *map, struct cxl_pmu_regs *regs); enum cxl_regloc_type; int cxl_count_regblock(struct pci_dev *pdev, enum cxl_regloc_type type); @@ -321,6 +328,7 @@ enum cxl_decoder_type { */ #define CXL_DECODER_MAX_INTERLEAVE 16 +#define CXL_QOS_CLASS_INVALID -1 /** * struct cxl_decoder - Common CXL HDM Decoder Attributes @@ -432,6 +440,7 @@ typedef struct cxl_dport *(*cxl_calc_hb_fn)(struct cxl_root_decoder *cxlrd, * @calc_hb: which host bridge covers the n'th position by granularity * @platform_data: platform specific configuration data * @range_lock: sync region autodiscovery by address range + * @qos_class: QoS performance class cookie * @cxlsd: base cxl switch decoder */ struct cxl_root_decoder { @@ -440,6 +449,7 @@ struct cxl_root_decoder { cxl_calc_hb_fn calc_hb; void *platform_data; struct mutex range_lock; + int qos_class; struct cxl_switch_decoder cxlsd; }; @@ -572,11 +582,10 @@ struct cxl_dax_region { * @regions: cxl_region_ref instances, regions mapped by this port * @parent_dport: dport that points to this port in the parent * @decoder_ida: allocator for decoder ids - * @comp_map: component register capability mappings + * @reg_map: component and ras register mapping parameters * @nr_dports: number of entries in @dports * @hdm_end: track last allocated HDM decoder instance for allocation ordering * @commit_end: cursor to track highest committed decoder for commit ordering - * @component_reg_phys: component register capability base address (optional) * @dead: last ep has been removed, force port re-creation * @depth: How deep this port is relative to the root. depth 0 is the root. * @cdat: Cached CDAT data @@ -592,11 +601,10 @@ struct cxl_port { struct xarray regions; struct cxl_dport *parent_dport; struct ida decoder_ida; - struct cxl_register_map comp_map; + struct cxl_register_map reg_map; int nr_dports; int hdm_end; int commit_end; - resource_size_t component_reg_phys; bool dead; unsigned int depth; struct cxl_cdat { @@ -620,19 +628,21 @@ struct cxl_rcrb_info { /** * struct cxl_dport - CXL downstream port * @dport_dev: PCI bridge or firmware device representing the downstream link - * @comp_map: component register capability mappings + * @reg_map: component and ras register mapping parameters * @port_id: unique hardware identifier for dport in decoder target list * @rcrb: Data about the Root Complex Register Block layout * @rch: Indicate whether this dport was enumerated in RCH or VH mode * @port: reference to cxl_port that contains this downstream port + * @regs: Dport parsed register blocks */ struct cxl_dport { struct device *dport_dev; - struct cxl_register_map comp_map; + struct cxl_register_map reg_map; int port_id; struct cxl_rcrb_info rcrb; bool rch; struct cxl_port *port; + struct cxl_regs regs; }; /** @@ -679,6 +689,7 @@ static inline bool is_cxl_root(struct cxl_port *port) return port->uport_dev == port->dev.parent; } +int cxl_num_decoders_committed(struct cxl_port *port); bool is_cxl_port(const struct device *dev); struct cxl_port *to_cxl_port(const struct device *dev); struct pci_bus; @@ -706,6 +717,13 @@ struct cxl_dport *devm_cxl_add_rch_dport(struct cxl_port *port, struct device *dport_dev, int port_id, resource_size_t rcrb); +#ifdef CONFIG_PCIEAER_CXL +void cxl_setup_parent_dport(struct device *host, struct cxl_dport *dport); +#else +static inline void cxl_setup_parent_dport(struct device *host, + struct cxl_dport *dport) { } +#endif + struct cxl_decoder *to_cxl_decoder(struct device *dev); struct cxl_root_decoder *to_cxl_root_decoder(struct device *dev); struct cxl_switch_decoder *to_cxl_switch_decoder(struct device *dev); diff --git a/drivers/cxl/cxlmem.h b/drivers/cxl/cxlmem.h index 706f8a6d1ef4..a2fcbca253f3 100644 --- a/drivers/cxl/cxlmem.h +++ b/drivers/cxl/cxlmem.h @@ -84,9 +84,12 @@ static inline bool is_cxl_endpoint(struct cxl_port *port) return is_cxl_memdev(port->uport_dev); } -struct cxl_memdev *devm_cxl_add_memdev(struct cxl_dev_state *cxlds); +struct cxl_memdev *devm_cxl_add_memdev(struct device *host, + struct cxl_dev_state *cxlds); +int devm_cxl_sanitize_setup_notifier(struct device *host, + struct cxl_memdev *cxlmd); struct cxl_memdev_state; -int cxl_memdev_setup_fw_upload(struct cxl_memdev_state *mds); +int devm_cxl_setup_fw_upload(struct device *host, struct cxl_memdev_state *mds); int devm_cxl_dpa_reserve(struct cxl_endpoint_decoder *cxled, resource_size_t base, resource_size_t len, resource_size_t skipped); @@ -360,16 +363,16 @@ struct cxl_fw_state { * * @state: state of last security operation * @enabled_cmds: All security commands enabled in the CEL - * @poll: polling for sanitization is enabled, device has no mbox irq support * @poll_tmo_secs: polling timeout + * @sanitize_active: sanitize completion pending * @poll_dwork: polling work item * @sanitize_node: sanitation sysfs file to notify */ struct cxl_security_state { unsigned long state; DECLARE_BITMAP(enabled_cmds, CXL_SEC_ENABLED_MAX); - bool poll; int poll_tmo_secs; + bool sanitize_active; struct delayed_work poll_dwork; struct kernfs_node *sanitize_node; }; @@ -397,6 +400,7 @@ enum cxl_devtype { * * @dev: The device associated with this CXL state * @cxlmd: The device representing the CXL.mem capabilities of @dev + * @reg_map: component and ras register mapping parameters * @regs: Parsed register blocks * @cxl_dvsec: Offset to the PCIe device DVSEC * @rcd: operating in RCD mode (CXL 3.0 9.11.8 CXL Devices Attached to an RCH) @@ -404,13 +408,13 @@ enum cxl_devtype { * @dpa_res: Overall DPA resource tree for the device * @pmem_res: Active Persistent memory capacity configuration * @ram_res: Active Volatile memory capacity configuration - * @component_reg_phys: register base of component registers * @serial: PCIe Device Serial Number * @type: Generic Memory Class device or Vendor Specific Memory device */ struct cxl_dev_state { struct device *dev; struct cxl_memdev *cxlmd; + struct cxl_register_map reg_map; struct cxl_regs regs; int cxl_dvsec; bool rcd; @@ -418,7 +422,6 @@ struct cxl_dev_state { struct resource dpa_res; struct resource pmem_res; struct resource ram_res; - resource_size_t component_reg_phys; u64 serial; enum cxl_devtype type; }; @@ -883,7 +886,7 @@ static inline void cxl_mem_active_dec(void) } #endif -int cxl_mem_sanitize(struct cxl_memdev_state *mds, u16 cmd); +int cxl_mem_sanitize(struct cxl_memdev *cxlmd, u16 cmd); struct cxl_hdm { struct cxl_component_regs regs; diff --git a/drivers/cxl/mem.c b/drivers/cxl/mem.c index 317c7548e4e9..e087febf9af0 100644 --- a/drivers/cxl/mem.c +++ b/drivers/cxl/mem.c @@ -49,7 +49,6 @@ static int devm_cxl_add_endpoint(struct device *host, struct cxl_memdev *cxlmd, struct cxl_dport *parent_dport) { struct cxl_port *parent_port = parent_dport->port; - struct cxl_dev_state *cxlds = cxlmd->cxlds; struct cxl_port *endpoint, *iter, *down; int rc; @@ -65,8 +64,8 @@ static int devm_cxl_add_endpoint(struct device *host, struct cxl_memdev *cxlmd, ep->next = down; } - endpoint = devm_cxl_add_port(host, &cxlmd->dev, - cxlds->component_reg_phys, + /* Note: endpoint port component registers are derived from @cxlds */ + endpoint = devm_cxl_add_port(host, &cxlmd->dev, CXL_RESOURCE_NONE, parent_dport); if (IS_ERR(endpoint)) return PTR_ERR(endpoint); @@ -158,6 +157,8 @@ static int cxl_mem_probe(struct device *dev) else endpoint_parent = &parent_port->dev; + cxl_setup_parent_dport(dev, dport); + device_lock(endpoint_parent); if (!endpoint_parent->driver) { dev_err(dev, "CXL port topology %s not enabled\n", diff --git a/drivers/cxl/pci.c b/drivers/cxl/pci.c index 44a21ab7add5..0155fb66b580 100644 --- a/drivers/cxl/pci.c +++ b/drivers/cxl/pci.c @@ -85,25 +85,28 @@ static int cxl_pci_mbox_wait_for_doorbell(struct cxl_dev_state *cxlds) status & CXLMDEV_DEV_FATAL ? " fatal" : "", \ status & CXLMDEV_FW_HALT ? " firmware-halt" : "") +/* + * Threaded irq dev_id's must be globally unique. cxl_dev_id provides a unique + * wrapper object for each irq within the same cxlds. + */ struct cxl_dev_id { struct cxl_dev_state *cxlds; }; static int cxl_request_irq(struct cxl_dev_state *cxlds, int irq, - irq_handler_t handler, irq_handler_t thread_fn) + irq_handler_t thread_fn) { struct device *dev = cxlds->dev; struct cxl_dev_id *dev_id; - /* dev_id must be globally unique and must contain the cxlds */ dev_id = devm_kzalloc(dev, sizeof(*dev_id), GFP_KERNEL); if (!dev_id) return -ENOMEM; dev_id->cxlds = cxlds; - return devm_request_threaded_irq(dev, irq, handler, thread_fn, - IRQF_SHARED | IRQF_ONESHOT, - NULL, dev_id); + return devm_request_threaded_irq(dev, irq, NULL, thread_fn, + IRQF_SHARED | IRQF_ONESHOT, NULL, + dev_id); } static bool cxl_mbox_background_complete(struct cxl_dev_state *cxlds) @@ -128,10 +131,10 @@ static irqreturn_t cxl_pci_mbox_irq(int irq, void *id) reg = readq(cxlds->regs.mbox + CXLDEV_MBOX_BG_CMD_STATUS_OFFSET); opcode = FIELD_GET(CXLDEV_MBOX_BG_CMD_COMMAND_OPCODE_MASK, reg); if (opcode == CXL_MBOX_OP_SANITIZE) { + mutex_lock(&mds->mbox_mutex); if (mds->security.sanitize_node) - sysfs_notify_dirent(mds->security.sanitize_node); - - dev_dbg(cxlds->dev, "Sanitization operation ended\n"); + mod_delayed_work(system_wq, &mds->security.poll_dwork, 0); + mutex_unlock(&mds->mbox_mutex); } else { /* short-circuit the wait in __cxl_pci_mbox_send_cmd() */ rcuwait_wake_up(&mds->mbox_wait); @@ -152,18 +155,16 @@ static void cxl_mbox_sanitize_work(struct work_struct *work) mutex_lock(&mds->mbox_mutex); if (cxl_mbox_background_complete(cxlds)) { mds->security.poll_tmo_secs = 0; - put_device(cxlds->dev); - if (mds->security.sanitize_node) sysfs_notify_dirent(mds->security.sanitize_node); + mds->security.sanitize_active = false; dev_dbg(cxlds->dev, "Sanitization operation ended\n"); } else { int timeout = mds->security.poll_tmo_secs + 10; mds->security.poll_tmo_secs = min(15 * 60, timeout); - queue_delayed_work(system_wq, &mds->security.poll_dwork, - timeout * HZ); + schedule_delayed_work(&mds->security.poll_dwork, timeout * HZ); } mutex_unlock(&mds->mbox_mutex); } @@ -295,18 +296,15 @@ static int __cxl_pci_mbox_send_cmd(struct cxl_memdev_state *mds, * and allow userspace to poll(2) for completion. */ if (mbox_cmd->opcode == CXL_MBOX_OP_SANITIZE) { - if (mds->security.poll) { - /* hold the device throughout */ - get_device(cxlds->dev); - - /* give first timeout a second */ - timeout = 1; - mds->security.poll_tmo_secs = timeout; - queue_delayed_work(system_wq, - &mds->security.poll_dwork, - timeout * HZ); - } - + if (mds->security.sanitize_active) + return -EBUSY; + + /* give first timeout a second */ + timeout = 1; + mds->security.poll_tmo_secs = timeout; + mds->security.sanitize_active = true; + schedule_delayed_work(&mds->security.poll_dwork, + timeout * HZ); dev_dbg(dev, "Sanitization operation started\n"); goto success; } @@ -389,7 +387,9 @@ static int cxl_pci_setup_mailbox(struct cxl_memdev_state *mds) const int cap = readl(cxlds->regs.mbox + CXLDEV_MBOX_CAPS_OFFSET); struct device *dev = cxlds->dev; unsigned long timeout; + int irq, msgnum; u64 md_status; + u32 ctrl; timeout = jiffies + mbox_ready_timeout * HZ; do { @@ -437,33 +437,26 @@ static int cxl_pci_setup_mailbox(struct cxl_memdev_state *mds) dev_dbg(dev, "Mailbox payload sized %zu", mds->payload_size); rcuwait_init(&mds->mbox_wait); + INIT_DELAYED_WORK(&mds->security.poll_dwork, cxl_mbox_sanitize_work); - if (cap & CXLDEV_MBOX_CAP_BG_CMD_IRQ) { - u32 ctrl; - int irq, msgnum; - struct pci_dev *pdev = to_pci_dev(cxlds->dev); - - msgnum = FIELD_GET(CXLDEV_MBOX_CAP_IRQ_MSGNUM_MASK, cap); - irq = pci_irq_vector(pdev, msgnum); - if (irq < 0) - goto mbox_poll; - - if (cxl_request_irq(cxlds, irq, cxl_pci_mbox_irq, NULL)) - goto mbox_poll; + /* background command interrupts are optional */ + if (!(cap & CXLDEV_MBOX_CAP_BG_CMD_IRQ)) + return 0; - /* enable background command mbox irq support */ - ctrl = readl(cxlds->regs.mbox + CXLDEV_MBOX_CTRL_OFFSET); - ctrl |= CXLDEV_MBOX_CTRL_BG_CMD_IRQ; - writel(ctrl, cxlds->regs.mbox + CXLDEV_MBOX_CTRL_OFFSET); + msgnum = FIELD_GET(CXLDEV_MBOX_CAP_IRQ_MSGNUM_MASK, cap); + irq = pci_irq_vector(to_pci_dev(cxlds->dev), msgnum); + if (irq < 0) + return 0; + if (cxl_request_irq(cxlds, irq, cxl_pci_mbox_irq)) return 0; - } -mbox_poll: - mds->security.poll = true; - INIT_DELAYED_WORK(&mds->security.poll_dwork, cxl_mbox_sanitize_work); + dev_dbg(cxlds->dev, "Mailbox interrupts enabled\n"); + /* enable background command mbox irq support */ + ctrl = readl(cxlds->regs.mbox + CXLDEV_MBOX_CTRL_OFFSET); + ctrl |= CXLDEV_MBOX_CTRL_BG_CMD_IRQ; + writel(ctrl, cxlds->regs.mbox + CXLDEV_MBOX_CTRL_OFFSET); - dev_dbg(cxlds->dev, "Mailbox interrupts are unsupported"); return 0; } @@ -484,7 +477,7 @@ static int cxl_rcrb_get_comp_regs(struct pci_dev *pdev, resource_size_t component_reg_phys; *map = (struct cxl_register_map) { - .dev = &pdev->dev, + .host = &pdev->dev, .resource = CXL_RESOURCE_NONE, }; @@ -653,7 +646,7 @@ static int cxl_event_req_irq(struct cxl_dev_state *cxlds, u8 setting) if (irq < 0) return irq; - return cxl_request_irq(cxlds, irq, NULL, cxl_event_thread); + return cxl_request_irq(cxlds, irq, cxl_event_thread); } static int cxl_event_get_int_policy(struct cxl_memdev_state *mds, @@ -834,16 +827,14 @@ static int cxl_pci_probe(struct pci_dev *pdev, const struct pci_device_id *id) * If the component registers can't be found, the cxl_pci driver may * still be useful for management functions so don't return an error. */ - cxlds->component_reg_phys = CXL_RESOURCE_NONE; - rc = cxl_pci_setup_regs(pdev, CXL_REGLOC_RBI_COMPONENT, &map); + rc = cxl_pci_setup_regs(pdev, CXL_REGLOC_RBI_COMPONENT, + &cxlds->reg_map); if (rc) dev_warn(&pdev->dev, "No component registers (%d)\n", rc); - else if (!map.component_map.ras.valid) + else if (!cxlds->reg_map.component_map.ras.valid) dev_dbg(&pdev->dev, "RAS registers not found\n"); - cxlds->component_reg_phys = map.resource; - - rc = cxl_map_component_regs(&map, &cxlds->regs.component, + rc = cxl_map_component_regs(&cxlds->reg_map, &cxlds->regs.component, BIT(CXL_CM_CAP_CAP_ID_RAS)); if (rc) dev_dbg(&pdev->dev, "Failed to map RAS capability.\n"); @@ -882,11 +873,15 @@ static int cxl_pci_probe(struct pci_dev *pdev, const struct pci_device_id *id) if (rc) return rc; - cxlmd = devm_cxl_add_memdev(cxlds); + cxlmd = devm_cxl_add_memdev(&pdev->dev, cxlds); if (IS_ERR(cxlmd)) return PTR_ERR(cxlmd); - rc = cxl_memdev_setup_fw_upload(mds); + rc = devm_cxl_setup_fw_upload(&pdev->dev, mds); + if (rc) + return rc; + + rc = devm_cxl_sanitize_setup_notifier(&pdev->dev, cxlmd); if (rc) return rc; @@ -900,7 +895,7 @@ static int cxl_pci_probe(struct pci_dev *pdev, const struct pci_device_id *id) break; } - rc = cxl_map_pmu_regs(pdev, &pmu_regs, &map); + rc = cxl_map_pmu_regs(&map, &pmu_regs); if (rc) { dev_dbg(&pdev->dev, "Could not map PMU regs\n"); break; diff --git a/drivers/cxl/port.c b/drivers/cxl/port.c index 6240e05b9542..47bc8e0b8590 100644 --- a/drivers/cxl/port.c +++ b/drivers/cxl/port.c @@ -62,6 +62,9 @@ static int cxl_switch_port_probe(struct cxl_port *port) struct cxl_hdm *cxlhdm; int rc; + /* Cache the data early to ensure is_visible() works */ + read_cdat_data(port); + rc = devm_cxl_port_enumerate_dports(port); if (rc < 0) return rc; diff --git a/drivers/pci/pcie/Kconfig b/drivers/pci/pcie/Kconfig index 228652a59f27..8999fcebde6a 100644 --- a/drivers/pci/pcie/Kconfig +++ b/drivers/pci/pcie/Kconfig @@ -49,6 +49,15 @@ config PCIEAER_INJECT gotten from: https://git.kernel.org/cgit/linux/kernel/git/gong.chen/aer-inject.git/ +config PCIEAER_CXL + bool "PCI Express CXL RAS support" + default y + depends on PCIEAER && CXL_PCI + help + Enables CXL error handling. + + If unsure, say Y. + # # PCI Express ECRC # diff --git a/drivers/pci/pcie/aer.c b/drivers/pci/pcie/aer.c index dcd35993004e..42a3bd35a3e1 100644 --- a/drivers/pci/pcie/aer.c +++ b/drivers/pci/pcie/aer.c @@ -760,9 +760,10 @@ int cper_severity_to_aer(int cper_severity) } } EXPORT_SYMBOL_GPL(cper_severity_to_aer); +#endif -void cper_print_aer(struct pci_dev *dev, int aer_severity, - struct aer_capability_regs *aer) +void pci_print_aer(struct pci_dev *dev, int aer_severity, + struct aer_capability_regs *aer) { int layer, agent, tlp_header_valid = 0; u32 status, mask; @@ -801,7 +802,7 @@ void cper_print_aer(struct pci_dev *dev, int aer_severity, trace_aer_event(dev_name(&dev->dev), (status & ~mask), aer_severity, tlp_header_valid, &aer->header_log); } -#endif +EXPORT_SYMBOL_NS_GPL(pci_print_aer, CXL); /** * add_error_device - list device to be handled @@ -934,14 +935,153 @@ static bool find_source_device(struct pci_dev *parent, return true; } +#ifdef CONFIG_PCIEAER_CXL + /** - * handle_error_source - handle logging error into an event log + * pci_aer_unmask_internal_errors - unmask internal errors + * @dev: pointer to the pcie_dev data structure + * + * Unmasks internal errors in the Uncorrectable and Correctable Error + * Mask registers. + * + * Note: AER must be enabled and supported by the device which must be + * checked in advance, e.g. with pcie_aer_is_native(). + */ +static void pci_aer_unmask_internal_errors(struct pci_dev *dev) +{ + int aer = dev->aer_cap; + u32 mask; + + pci_read_config_dword(dev, aer + PCI_ERR_UNCOR_MASK, &mask); + mask &= ~PCI_ERR_UNC_INTN; + pci_write_config_dword(dev, aer + PCI_ERR_UNCOR_MASK, mask); + + pci_read_config_dword(dev, aer + PCI_ERR_COR_MASK, &mask); + mask &= ~PCI_ERR_COR_INTERNAL; + pci_write_config_dword(dev, aer + PCI_ERR_COR_MASK, mask); +} + +static bool is_cxl_mem_dev(struct pci_dev *dev) +{ + /* + * The capability, status, and control fields in Device 0, + * Function 0 DVSEC control the CXL functionality of the + * entire device (CXL 3.0, 8.1.3). + */ + if (dev->devfn != PCI_DEVFN(0, 0)) + return false; + + /* + * CXL Memory Devices must have the 502h class code set (CXL + * 3.0, 8.1.12.1). + */ + if ((dev->class >> 8) != PCI_CLASS_MEMORY_CXL) + return false; + + return true; +} + +static bool cxl_error_is_native(struct pci_dev *dev) +{ + struct pci_host_bridge *host = pci_find_host_bridge(dev->bus); + + return (pcie_ports_native || host->native_aer); +} + +static bool is_internal_error(struct aer_err_info *info) +{ + if (info->severity == AER_CORRECTABLE) + return info->status & PCI_ERR_COR_INTERNAL; + + return info->status & PCI_ERR_UNC_INTN; +} + +static int cxl_rch_handle_error_iter(struct pci_dev *dev, void *data) +{ + struct aer_err_info *info = (struct aer_err_info *)data; + const struct pci_error_handlers *err_handler; + + if (!is_cxl_mem_dev(dev) || !cxl_error_is_native(dev)) + return 0; + + /* protect dev->driver */ + device_lock(&dev->dev); + + err_handler = dev->driver ? dev->driver->err_handler : NULL; + if (!err_handler) + goto out; + + if (info->severity == AER_CORRECTABLE) { + if (err_handler->cor_error_detected) + err_handler->cor_error_detected(dev); + } else if (err_handler->error_detected) { + if (info->severity == AER_NONFATAL) + err_handler->error_detected(dev, pci_channel_io_normal); + else if (info->severity == AER_FATAL) + err_handler->error_detected(dev, pci_channel_io_frozen); + } +out: + device_unlock(&dev->dev); + return 0; +} + +static void cxl_rch_handle_error(struct pci_dev *dev, struct aer_err_info *info) +{ + /* + * Internal errors of an RCEC indicate an AER error in an + * RCH's downstream port. Check and handle them in the CXL.mem + * device driver. + */ + if (pci_pcie_type(dev) == PCI_EXP_TYPE_RC_EC && + is_internal_error(info)) + pcie_walk_rcec(dev, cxl_rch_handle_error_iter, info); +} + +static int handles_cxl_error_iter(struct pci_dev *dev, void *data) +{ + bool *handles_cxl = data; + + if (!*handles_cxl) + *handles_cxl = is_cxl_mem_dev(dev) && cxl_error_is_native(dev); + + /* Non-zero terminates iteration */ + return *handles_cxl; +} + +static bool handles_cxl_errors(struct pci_dev *rcec) +{ + bool handles_cxl = false; + + if (pci_pcie_type(rcec) == PCI_EXP_TYPE_RC_EC && + pcie_aer_is_native(rcec)) + pcie_walk_rcec(rcec, handles_cxl_error_iter, &handles_cxl); + + return handles_cxl; +} + +static void cxl_rch_enable_rcec(struct pci_dev *rcec) +{ + if (!handles_cxl_errors(rcec)) + return; + + pci_aer_unmask_internal_errors(rcec); + pci_info(rcec, "CXL: Internal errors unmasked"); +} + +#else +static inline void cxl_rch_enable_rcec(struct pci_dev *dev) { } +static inline void cxl_rch_handle_error(struct pci_dev *dev, + struct aer_err_info *info) { } +#endif + +/** + * pci_aer_handle_error - handle logging error into an event log * @dev: pointer to pci_dev data structure of error source device * @info: comprehensive error information * * Invoked when an error being detected by Root Port. */ -static void handle_error_source(struct pci_dev *dev, struct aer_err_info *info) +static void pci_aer_handle_error(struct pci_dev *dev, struct aer_err_info *info) { int aer = dev->aer_cap; @@ -965,6 +1105,12 @@ static void handle_error_source(struct pci_dev *dev, struct aer_err_info *info) pcie_do_recovery(dev, pci_channel_io_normal, aer_root_reset); else if (info->severity == AER_FATAL) pcie_do_recovery(dev, pci_channel_io_frozen, aer_root_reset); +} + +static void handle_error_source(struct pci_dev *dev, struct aer_err_info *info) +{ + cxl_rch_handle_error(dev, info); + pci_aer_handle_error(dev, info); pci_dev_put(dev); } @@ -997,7 +1143,7 @@ static void aer_recover_work_func(struct work_struct *work) PCI_SLOT(entry.devfn), PCI_FUNC(entry.devfn)); continue; } - cper_print_aer(pdev, entry.severity, entry.regs); + pci_print_aer(pdev, entry.severity, entry.regs); /* * Memory for aer_capability_regs(entry.regs) is being allocated from the * ghes_estatus_pool to protect it from overwriting when multiple sections @@ -1348,6 +1494,7 @@ static int aer_probe(struct pcie_device *dev) return status; } + cxl_rch_enable_rcec(port); aer_enable_rootport(rpc); pci_info(port, "enabled with IRQ %d\n", dev->irq); return 0; |