summaryrefslogtreecommitdiff
path: root/drivers/cxl
diff options
context:
space:
mode:
Diffstat (limited to 'drivers/cxl')
-rw-r--r--drivers/cxl/Kconfig87
-rw-r--r--drivers/cxl/acpi.c24
-rw-r--r--drivers/cxl/core/Makefile5
-rw-r--r--drivers/cxl/core/acpi.c11
-rw-r--r--drivers/cxl/core/cdat.c104
-rw-r--r--drivers/cxl/core/core.h32
-rw-r--r--drivers/cxl/core/edac.c2113
-rw-r--r--drivers/cxl/core/features.c703
-rw-r--r--drivers/cxl/core/hdm.c394
-rw-r--r--drivers/cxl/core/mbox.c276
-rw-r--r--drivers/cxl/core/mce.c65
-rw-r--r--drivers/cxl/core/mce.h20
-rw-r--r--drivers/cxl/core/memdev.c110
-rw-r--r--drivers/cxl/core/pci.c149
-rw-r--r--drivers/cxl/core/port.c61
-rw-r--r--drivers/cxl/core/ras.c126
-rw-r--r--drivers/cxl/core/region.c523
-rw-r--r--drivers/cxl/core/regs.c4
-rw-r--r--drivers/cxl/core/trace.h81
-rw-r--r--drivers/cxl/cxl.h75
-rw-r--r--drivers/cxl/cxlmem.h154
-rw-r--r--drivers/cxl/cxlpci.h6
-rw-r--r--drivers/cxl/mem.c6
-rw-r--r--drivers/cxl/pci.c15
-rw-r--r--drivers/cxl/pmem.c81
-rw-r--r--drivers/cxl/port.c25
26 files changed, 4563 insertions, 687 deletions
diff --git a/drivers/cxl/Kconfig b/drivers/cxl/Kconfig
index 876469e23f7a..48b7314afdb8 100644
--- a/drivers/cxl/Kconfig
+++ b/drivers/cxl/Kconfig
@@ -7,6 +7,7 @@ menuconfig CXL_BUS
select PCI_DOE
select FIRMWARE_TABLE
select NUMA_KEEP_MEMINFO if NUMA_MEMBLKS
+ select FWCTL if CXL_FEATURES
help
CXL is a bus that is electrically compatible with PCI Express, but
layers three protocols on that signalling (CXL.io, CXL.cache, and
@@ -102,6 +103,88 @@ config CXL_MEM
If unsure say 'm'.
+config CXL_FEATURES
+ bool "CXL: Features"
+ depends on CXL_PCI
+ help
+ Enable support for CXL Features. A CXL device that includes a mailbox
+ supports commands that allows listing, getting, and setting of
+ optionally defined features such as memory sparing or post package
+ sparing. Vendors may define custom features for the device.
+
+ If unsure say 'n'
+
+config CXL_EDAC_MEM_FEATURES
+ bool "CXL: EDAC Memory Features"
+ depends on EXPERT
+ depends on CXL_MEM
+ depends on CXL_FEATURES
+ depends on EDAC >= CXL_BUS
+ help
+ The CXL EDAC memory feature is optional and allows host to
+ control the EDAC memory features configurations of CXL memory
+ expander devices.
+
+ Say 'y' if you have an expert need to change default settings
+ of a memory RAS feature established by the platform/device.
+ Otherwise say 'n'.
+
+config CXL_EDAC_SCRUB
+ bool "Enable CXL Patrol Scrub Control (Patrol Read)"
+ depends on CXL_EDAC_MEM_FEATURES
+ depends on EDAC_SCRUB
+ help
+ The CXL EDAC scrub control is optional and allows host to
+ control the scrub feature configurations of CXL memory expander
+ devices.
+
+ When enabled 'cxl_mem' and 'cxl_region' EDAC devices are
+ published with memory scrub control attributes as described by
+ Documentation/ABI/testing/sysfs-edac-scrub.
+
+ Say 'y' if you have an expert need to change default settings
+ of a memory scrub feature established by the platform/device
+ (e.g. scrub rates for the patrol scrub feature).
+ Otherwise say 'n'.
+
+config CXL_EDAC_ECS
+ bool "Enable CXL Error Check Scrub (Repair)"
+ depends on CXL_EDAC_MEM_FEATURES
+ depends on EDAC_ECS
+ help
+ The CXL EDAC ECS control is optional and allows host to
+ control the ECS feature configurations of CXL memory expander
+ devices.
+
+ When enabled 'cxl_mem' EDAC devices are published with memory
+ ECS control attributes as described by
+ Documentation/ABI/testing/sysfs-edac-ecs.
+
+ Say 'y' if you have an expert need to change default settings
+ of a memory ECS feature established by the platform/device.
+ Otherwise say 'n'.
+
+config CXL_EDAC_MEM_REPAIR
+ bool "Enable CXL Memory Repair"
+ depends on CXL_EDAC_MEM_FEATURES
+ depends on EDAC_MEM_REPAIR
+ help
+ The CXL EDAC memory repair control is optional and allows host
+ to control the memory repair features (e.g. sparing, PPR)
+ configurations of CXL memory expander devices.
+
+ When enabled, the memory repair feature requires an additional
+ memory of approximately 43KB to store CXL DRAM and CXL general
+ media event records.
+
+ When enabled 'cxl_mem' EDAC devices are published with memory
+ repair control attributes as described by
+ Documentation/ABI/testing/sysfs-edac-memory-repair.
+
+ Say 'y' if you have an expert need to change default settings
+ of a memory repair feature established by the platform/device.
+ Otherwise say 'n'.
+
config CXL_PORT
default CXL_BUS
tristate
@@ -146,4 +229,8 @@ config CXL_REGION_INVALIDATION_TEST
If unsure, or if this kernel is meant for production environments,
say N.
+config CXL_MCE
+ def_bool y
+ depends on X86_MCE && MEMORY_FAILURE
+
endif
diff --git a/drivers/cxl/acpi.c b/drivers/cxl/acpi.c
index cb14829bb9be..a1a99ec3f12c 100644
--- a/drivers/cxl/acpi.c
+++ b/drivers/cxl/acpi.c
@@ -11,8 +11,6 @@
#include "cxlpci.h"
#include "cxl.h"
-#define CXL_RCRB_SIZE SZ_8K
-
struct cxl_cxims_data {
int nr_maps;
u64 xormaps[] __counted_by(nr_maps);
@@ -421,7 +419,15 @@ static int __cxl_parse_cfmws(struct acpi_cedt_cfmws *cfmws,
rc = cxl_decoder_add(cxld, target_map);
if (rc)
return rc;
- return cxl_root_decoder_autoremove(dev, no_free_ptr(cxlrd));
+
+ rc = cxl_root_decoder_autoremove(dev, no_free_ptr(cxlrd));
+ if (rc)
+ return rc;
+
+ dev_dbg(root_port->dev.parent, "%s added to %s\n",
+ dev_name(&cxld->dev), dev_name(&root_port->dev));
+
+ return 0;
}
static int cxl_parse_cfmws(union acpi_subtable_headers *header, void *arg,
@@ -479,7 +485,11 @@ static int cxl_get_chbs_iter(union acpi_subtable_headers *header, void *arg,
chbs = (struct acpi_cedt_chbs *) header;
if (chbs->cxl_version == ACPI_CEDT_CHBS_VERSION_CXL11 &&
- chbs->length != CXL_RCRB_SIZE)
+ chbs->length != ACPI_CEDT_CHBS_LENGTH_CXL11)
+ return 0;
+
+ if (chbs->cxl_version == ACPI_CEDT_CHBS_VERSION_CXL20 &&
+ chbs->length != ACPI_CEDT_CHBS_LENGTH_CXL20)
return 0;
if (!chbs->base)
@@ -739,10 +749,10 @@ static void remove_cxl_resources(void *data)
* expanding its boundaries to ensure that any conflicting resources become
* children. If a window is expanded it may then conflict with a another window
* entry and require the window to be truncated or trimmed. Consider this
- * situation:
+ * situation::
*
- * |-- "CXL Window 0" --||----- "CXL Window 1" -----|
- * |--------------- "System RAM" -------------|
+ * |-- "CXL Window 0" --||----- "CXL Window 1" -----|
+ * |--------------- "System RAM" -------------|
*
* ...where platform firmware has established as System RAM resource across 2
* windows, but has left some portion of window 1 for dynamic CXL region
diff --git a/drivers/cxl/core/Makefile b/drivers/cxl/core/Makefile
index 9259bcc6773c..79e2ef81fde8 100644
--- a/drivers/cxl/core/Makefile
+++ b/drivers/cxl/core/Makefile
@@ -14,5 +14,10 @@ cxl_core-y += pci.o
cxl_core-y += hdm.o
cxl_core-y += pmu.o
cxl_core-y += cdat.o
+cxl_core-y += ras.o
+cxl_core-y += acpi.o
cxl_core-$(CONFIG_TRACING) += trace.o
cxl_core-$(CONFIG_CXL_REGION) += region.o
+cxl_core-$(CONFIG_CXL_MCE) += mce.o
+cxl_core-$(CONFIG_CXL_FEATURES) += features.o
+cxl_core-$(CONFIG_CXL_EDAC_MEM_FEATURES) += edac.o
diff --git a/drivers/cxl/core/acpi.c b/drivers/cxl/core/acpi.c
new file mode 100644
index 000000000000..f13b4dae6ac5
--- /dev/null
+++ b/drivers/cxl/core/acpi.c
@@ -0,0 +1,11 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/* Copyright(c) 2024 Intel Corporation. All rights reserved. */
+#include <linux/acpi.h>
+#include "cxl.h"
+#include "core.h"
+
+int cxl_acpi_get_extended_linear_cache_size(struct resource *backing_res,
+ int nid, resource_size_t *size)
+{
+ return hmat_get_extended_linear_cache_size(backing_res, nid, size);
+}
diff --git a/drivers/cxl/core/cdat.c b/drivers/cxl/core/cdat.c
index 8153f8d83a16..0ccef2f2a26a 100644
--- a/drivers/cxl/core/cdat.c
+++ b/drivers/cxl/core/cdat.c
@@ -28,7 +28,7 @@ static u32 cdat_normalize(u16 entry, u64 base, u8 type)
*/
if (entry == 0xffff || !entry)
return 0;
- else if (base > (UINT_MAX / (entry)))
+ if (base > (UINT_MAX / (entry)))
return 0;
/*
@@ -258,27 +258,29 @@ static void update_perf_entry(struct device *dev, struct dsmas_entry *dent,
static void cxl_memdev_set_qos_class(struct cxl_dev_state *cxlds,
struct xarray *dsmas_xa)
{
- struct cxl_memdev_state *mds = to_cxl_memdev_state(cxlds);
struct device *dev = cxlds->dev;
- struct range pmem_range = {
- .start = cxlds->pmem_res.start,
- .end = cxlds->pmem_res.end,
- };
- struct range ram_range = {
- .start = cxlds->ram_res.start,
- .end = cxlds->ram_res.end,
- };
struct dsmas_entry *dent;
unsigned long index;
xa_for_each(dsmas_xa, index, dent) {
- if (resource_size(&cxlds->ram_res) &&
- range_contains(&ram_range, &dent->dpa_range))
- update_perf_entry(dev, dent, &mds->ram_perf);
- else if (resource_size(&cxlds->pmem_res) &&
- range_contains(&pmem_range, &dent->dpa_range))
- update_perf_entry(dev, dent, &mds->pmem_perf);
- else
+ bool found = false;
+
+ for (int i = 0; i < cxlds->nr_partitions; i++) {
+ struct resource *res = &cxlds->part[i].res;
+ struct range range = {
+ .start = res->start,
+ .end = res->end,
+ };
+
+ if (range_contains(&range, &dent->dpa_range)) {
+ update_perf_entry(dev, dent,
+ &cxlds->part[i].perf);
+ found = true;
+ break;
+ }
+ }
+
+ if (!found)
dev_dbg(dev, "no partition for dsmas dpa: %pra\n",
&dent->dpa_range);
}
@@ -343,36 +345,46 @@ static int match_cxlrd_hb(struct device *dev, void *data)
return 0;
}
-static int cxl_qos_class_verify(struct cxl_memdev *cxlmd)
+static void cxl_qos_class_verify(struct cxl_memdev *cxlmd)
{
struct cxl_dev_state *cxlds = cxlmd->cxlds;
- struct cxl_memdev_state *mds = to_cxl_memdev_state(cxlds);
struct cxl_port *root_port;
- int rc;
struct cxl_root *cxl_root __free(put_cxl_root) =
find_cxl_root(cxlmd->endpoint);
+ /*
+ * No need to reset_dpa_perf() here as find_cxl_root() is guaranteed to
+ * succeed when called in the cxl_endpoint_port_probe() path.
+ */
if (!cxl_root)
- return -ENODEV;
+ return;
root_port = &cxl_root->port;
- /* Check that the QTG IDs are all sane between end device and root decoders */
- if (!cxl_qos_match(root_port, &mds->ram_perf))
- reset_dpa_perf(&mds->ram_perf);
- if (!cxl_qos_match(root_port, &mds->pmem_perf))
- reset_dpa_perf(&mds->pmem_perf);
-
- /* Check to make sure that the device's host bridge is under a root decoder */
- rc = device_for_each_child(&root_port->dev,
- cxlmd->endpoint->host_bridge, match_cxlrd_hb);
- if (!rc) {
- reset_dpa_perf(&mds->ram_perf);
- reset_dpa_perf(&mds->pmem_perf);
+ /*
+ * Save userspace from needing to check if a qos class has any matches
+ * by hiding qos class info if the memdev is not mapped by a root
+ * decoder, or the partition class does not match any root decoder
+ * class.
+ */
+ if (!device_for_each_child(&root_port->dev,
+ cxlmd->endpoint->host_bridge,
+ match_cxlrd_hb)) {
+ for (int i = 0; i < cxlds->nr_partitions; i++) {
+ struct cxl_dpa_perf *perf = &cxlds->part[i].perf;
+
+ reset_dpa_perf(perf);
+ }
+ return;
}
- return rc;
+ for (int i = 0; i < cxlds->nr_partitions; i++) {
+ struct cxl_dpa_perf *perf = &cxlds->part[i].perf;
+
+ if (!cxl_qos_match(root_port, perf))
+ reset_dpa_perf(perf);
+ }
}
static void discard_dsmas(struct xarray *xa)
@@ -570,23 +582,18 @@ static bool dpa_perf_contains(struct cxl_dpa_perf *perf,
return range_contains(&perf->dpa_range, &dpa);
}
-static struct cxl_dpa_perf *cxled_get_dpa_perf(struct cxl_endpoint_decoder *cxled,
- enum cxl_decoder_mode mode)
+static struct cxl_dpa_perf *cxled_get_dpa_perf(struct cxl_endpoint_decoder *cxled)
{
struct cxl_memdev *cxlmd = cxled_to_memdev(cxled);
- struct cxl_memdev_state *mds = to_cxl_memdev_state(cxlmd->cxlds);
+ struct cxl_dev_state *cxlds = cxlmd->cxlds;
struct cxl_dpa_perf *perf;
- switch (mode) {
- case CXL_DECODER_RAM:
- perf = &mds->ram_perf;
- break;
- case CXL_DECODER_PMEM:
- perf = &mds->pmem_perf;
- break;
- default:
+ if (cxled->part < 0)
+ return ERR_PTR(-EINVAL);
+ perf = &cxlds->part[cxled->part].perf;
+
+ if (!perf)
return ERR_PTR(-EINVAL);
- }
if (!dpa_perf_contains(perf, cxled->dpa_res))
return ERR_PTR(-EINVAL);
@@ -647,11 +654,10 @@ static int cxl_endpoint_gather_bandwidth(struct cxl_region *cxlr,
if (cxlds->rcd)
return -ENODEV;
- perf = cxled_get_dpa_perf(cxled, cxlr->mode);
+ perf = cxled_get_dpa_perf(cxled);
if (IS_ERR(perf))
return PTR_ERR(perf);
- gp_port = to_cxl_port(parent_port->dev.parent);
*gp_is_root = is_cxl_root(gp_port);
/*
@@ -1053,7 +1059,7 @@ void cxl_region_perf_data_calculate(struct cxl_region *cxlr,
lockdep_assert_held(&cxl_dpa_rwsem);
- perf = cxled_get_dpa_perf(cxled, cxlr->mode);
+ perf = cxled_get_dpa_perf(cxled);
if (IS_ERR(perf))
return;
diff --git a/drivers/cxl/core/core.h b/drivers/cxl/core/core.h
index 800466f96a68..6b78b10da3e1 100644
--- a/drivers/cxl/core/core.h
+++ b/drivers/cxl/core/core.h
@@ -4,6 +4,8 @@
#ifndef __CXL_CORE_H__
#define __CXL_CORE_H__
+#include <cxl/mailbox.h>
+
extern const struct device_type cxl_nvdimm_bridge_type;
extern const struct device_type cxl_nvdimm_type;
extern const struct device_type cxl_pmu_type;
@@ -65,19 +67,20 @@ static inline void cxl_region_exit(void)
struct cxl_send_command;
struct cxl_mem_query_commands;
-int cxl_query_cmd(struct cxl_memdev *cxlmd,
+int cxl_query_cmd(struct cxl_mailbox *cxl_mbox,
struct cxl_mem_query_commands __user *q);
-int cxl_send_cmd(struct cxl_memdev *cxlmd, struct cxl_send_command __user *s);
+int cxl_send_cmd(struct cxl_mailbox *cxl_mbox, struct cxl_send_command __user *s);
void __iomem *devm_cxl_iomap_block(struct device *dev, resource_size_t addr,
resource_size_t length);
struct dentry *cxl_debugfs_create_dir(const char *dir);
-int cxl_dpa_set_mode(struct cxl_endpoint_decoder *cxled,
- enum cxl_decoder_mode mode);
-int cxl_dpa_alloc(struct cxl_endpoint_decoder *cxled, unsigned long long size);
+int cxl_dpa_set_part(struct cxl_endpoint_decoder *cxled,
+ enum cxl_partition_mode mode);
+int cxl_dpa_alloc(struct cxl_endpoint_decoder *cxled, u64 size);
int cxl_dpa_free(struct cxl_endpoint_decoder *cxled);
resource_size_t cxl_dpa_size(struct cxl_endpoint_decoder *cxled);
resource_size_t cxl_dpa_resource_start(struct cxl_endpoint_decoder *cxled);
+bool cxl_resource_contains_addr(const struct resource *res, const resource_size_t addr);
enum cxl_rcrb {
CXL_RCRB_DOWNSTREAM,
@@ -115,4 +118,23 @@ bool cxl_need_node_perf_attrs_update(int nid);
int cxl_port_get_switch_dport_bandwidth(struct cxl_port *port,
struct access_coordinate *c);
+int cxl_ras_init(void);
+void cxl_ras_exit(void);
+int cxl_gpf_port_setup(struct cxl_dport *dport);
+int cxl_acpi_get_extended_linear_cache_size(struct resource *backing_res,
+ int nid, resource_size_t *size);
+
+#ifdef CONFIG_CXL_FEATURES
+struct cxl_feat_entry *
+cxl_feature_info(struct cxl_features_state *cxlfs, const uuid_t *uuid);
+size_t cxl_get_feature(struct cxl_mailbox *cxl_mbox, const uuid_t *feat_uuid,
+ enum cxl_get_feat_selection selection,
+ void *feat_out, size_t feat_out_size, u16 offset,
+ u16 *return_code);
+int cxl_set_feature(struct cxl_mailbox *cxl_mbox, const uuid_t *feat_uuid,
+ u8 feat_version, const void *feat_data,
+ size_t feat_data_size, u32 feat_flag, u16 offset,
+ u16 *return_code);
+#endif
+
#endif /* __CXL_CORE_H__ */
diff --git a/drivers/cxl/core/edac.c b/drivers/cxl/core/edac.c
new file mode 100644
index 000000000000..991fa3e70522
--- /dev/null
+++ b/drivers/cxl/core/edac.c
@@ -0,0 +1,2113 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * CXL EDAC memory feature driver.
+ *
+ * Copyright (c) 2024-2025 HiSilicon Limited.
+ *
+ * - Supports functions to configure EDAC features of the
+ * CXL memory devices.
+ * - Registers with the EDAC device subsystem driver to expose
+ * the features sysfs attributes to the user for configuring
+ * CXL memory RAS feature.
+ */
+
+#include <linux/cleanup.h>
+#include <linux/edac.h>
+#include <linux/limits.h>
+#include <linux/unaligned.h>
+#include <linux/xarray.h>
+#include <cxl/features.h>
+#include <cxl.h>
+#include <cxlmem.h>
+#include "core.h"
+#include "trace.h"
+
+#define CXL_NR_EDAC_DEV_FEATURES 7
+
+#define CXL_SCRUB_NO_REGION -1
+
+struct cxl_patrol_scrub_context {
+ u8 instance;
+ u16 get_feat_size;
+ u16 set_feat_size;
+ u8 get_version;
+ u8 set_version;
+ u16 effects;
+ struct cxl_memdev *cxlmd;
+ struct cxl_region *cxlr;
+};
+
+/*
+ * See CXL spec rev 3.2 @8.2.10.9.11.1 Table 8-222 Device Patrol Scrub Control
+ * Feature Readable Attributes.
+ */
+struct cxl_scrub_rd_attrbs {
+ u8 scrub_cycle_cap;
+ __le16 scrub_cycle_hours;
+ u8 scrub_flags;
+} __packed;
+
+/*
+ * See CXL spec rev 3.2 @8.2.10.9.11.1 Table 8-223 Device Patrol Scrub Control
+ * Feature Writable Attributes.
+ */
+struct cxl_scrub_wr_attrbs {
+ u8 scrub_cycle_hours;
+ u8 scrub_flags;
+} __packed;
+
+#define CXL_SCRUB_CONTROL_CHANGEABLE BIT(0)
+#define CXL_SCRUB_CONTROL_REALTIME BIT(1)
+#define CXL_SCRUB_CONTROL_CYCLE_MASK GENMASK(7, 0)
+#define CXL_SCRUB_CONTROL_MIN_CYCLE_MASK GENMASK(15, 8)
+#define CXL_SCRUB_CONTROL_ENABLE BIT(0)
+
+#define CXL_GET_SCRUB_CYCLE_CHANGEABLE(cap) \
+ FIELD_GET(CXL_SCRUB_CONTROL_CHANGEABLE, cap)
+#define CXL_GET_SCRUB_CYCLE(cycle) \
+ FIELD_GET(CXL_SCRUB_CONTROL_CYCLE_MASK, cycle)
+#define CXL_GET_SCRUB_MIN_CYCLE(cycle) \
+ FIELD_GET(CXL_SCRUB_CONTROL_MIN_CYCLE_MASK, cycle)
+#define CXL_GET_SCRUB_EN_STS(flags) FIELD_GET(CXL_SCRUB_CONTROL_ENABLE, flags)
+
+#define CXL_SET_SCRUB_CYCLE(cycle) \
+ FIELD_PREP(CXL_SCRUB_CONTROL_CYCLE_MASK, cycle)
+#define CXL_SET_SCRUB_EN(en) FIELD_PREP(CXL_SCRUB_CONTROL_ENABLE, en)
+
+static int cxl_mem_scrub_get_attrbs(struct cxl_mailbox *cxl_mbox, u8 *cap,
+ u16 *cycle, u8 *flags, u8 *min_cycle)
+{
+ size_t rd_data_size = sizeof(struct cxl_scrub_rd_attrbs);
+ size_t data_size;
+ struct cxl_scrub_rd_attrbs *rd_attrbs __free(kfree) =
+ kzalloc(rd_data_size, GFP_KERNEL);
+ if (!rd_attrbs)
+ return -ENOMEM;
+
+ data_size = cxl_get_feature(cxl_mbox, &CXL_FEAT_PATROL_SCRUB_UUID,
+ CXL_GET_FEAT_SEL_CURRENT_VALUE, rd_attrbs,
+ rd_data_size, 0, NULL);
+ if (!data_size)
+ return -EIO;
+
+ *cap = rd_attrbs->scrub_cycle_cap;
+ *cycle = le16_to_cpu(rd_attrbs->scrub_cycle_hours);
+ *flags = rd_attrbs->scrub_flags;
+ if (min_cycle)
+ *min_cycle = CXL_GET_SCRUB_MIN_CYCLE(*cycle);
+
+ return 0;
+}
+
+static int cxl_scrub_get_attrbs(struct cxl_patrol_scrub_context *cxl_ps_ctx,
+ u8 *cap, u16 *cycle, u8 *flags, u8 *min_cycle)
+{
+ struct cxl_mailbox *cxl_mbox;
+ struct cxl_region_params *p;
+ struct cxl_memdev *cxlmd;
+ struct cxl_region *cxlr;
+ u8 min_scrub_cycle = 0;
+ int i, ret;
+
+ if (!cxl_ps_ctx->cxlr) {
+ cxl_mbox = &cxl_ps_ctx->cxlmd->cxlds->cxl_mbox;
+ return cxl_mem_scrub_get_attrbs(cxl_mbox, cap, cycle,
+ flags, min_cycle);
+ }
+
+ struct rw_semaphore *region_lock __free(rwsem_read_release) =
+ rwsem_read_intr_acquire(&cxl_region_rwsem);
+ if (!region_lock)
+ return -EINTR;
+
+ cxlr = cxl_ps_ctx->cxlr;
+ p = &cxlr->params;
+
+ for (i = 0; i < p->nr_targets; i++) {
+ struct cxl_endpoint_decoder *cxled = p->targets[i];
+
+ cxlmd = cxled_to_memdev(cxled);
+ cxl_mbox = &cxlmd->cxlds->cxl_mbox;
+ ret = cxl_mem_scrub_get_attrbs(cxl_mbox, cap, cycle, flags,
+ min_cycle);
+ if (ret)
+ return ret;
+
+ /*
+ * The min_scrub_cycle of a region is the max of minimum scrub
+ * cycles supported by memdevs that back the region.
+ */
+ if (min_cycle)
+ min_scrub_cycle = max(*min_cycle, min_scrub_cycle);
+ }
+
+ if (min_cycle)
+ *min_cycle = min_scrub_cycle;
+
+ return 0;
+}
+
+static int cxl_scrub_set_attrbs_region(struct device *dev,
+ struct cxl_patrol_scrub_context *cxl_ps_ctx,
+ u8 cycle, u8 flags)
+{
+ struct cxl_scrub_wr_attrbs wr_attrbs;
+ struct cxl_mailbox *cxl_mbox;
+ struct cxl_region_params *p;
+ struct cxl_memdev *cxlmd;
+ struct cxl_region *cxlr;
+ int ret, i;
+
+ struct rw_semaphore *region_lock __free(rwsem_read_release) =
+ rwsem_read_intr_acquire(&cxl_region_rwsem);
+ if (!region_lock)
+ return -EINTR;
+
+ cxlr = cxl_ps_ctx->cxlr;
+ p = &cxlr->params;
+ wr_attrbs.scrub_cycle_hours = cycle;
+ wr_attrbs.scrub_flags = flags;
+
+ for (i = 0; i < p->nr_targets; i++) {
+ struct cxl_endpoint_decoder *cxled = p->targets[i];
+
+ cxlmd = cxled_to_memdev(cxled);
+ cxl_mbox = &cxlmd->cxlds->cxl_mbox;
+ ret = cxl_set_feature(cxl_mbox, &CXL_FEAT_PATROL_SCRUB_UUID,
+ cxl_ps_ctx->set_version, &wr_attrbs,
+ sizeof(wr_attrbs),
+ CXL_SET_FEAT_FLAG_DATA_SAVED_ACROSS_RESET,
+ 0, NULL);
+ if (ret)
+ return ret;
+
+ if (cycle != cxlmd->scrub_cycle) {
+ if (cxlmd->scrub_region_id != CXL_SCRUB_NO_REGION)
+ dev_info(dev,
+ "Device scrub rate(%d hours) set by region%d rate overwritten by region%d scrub rate(%d hours)\n",
+ cxlmd->scrub_cycle,
+ cxlmd->scrub_region_id, cxlr->id,
+ cycle);
+
+ cxlmd->scrub_cycle = cycle;
+ cxlmd->scrub_region_id = cxlr->id;
+ }
+ }
+
+ return 0;
+}
+
+static int cxl_scrub_set_attrbs_device(struct device *dev,
+ struct cxl_patrol_scrub_context *cxl_ps_ctx,
+ u8 cycle, u8 flags)
+{
+ struct cxl_scrub_wr_attrbs wr_attrbs;
+ struct cxl_mailbox *cxl_mbox;
+ struct cxl_memdev *cxlmd;
+ int ret;
+
+ wr_attrbs.scrub_cycle_hours = cycle;
+ wr_attrbs.scrub_flags = flags;
+
+ cxlmd = cxl_ps_ctx->cxlmd;
+ cxl_mbox = &cxlmd->cxlds->cxl_mbox;
+ ret = cxl_set_feature(cxl_mbox, &CXL_FEAT_PATROL_SCRUB_UUID,
+ cxl_ps_ctx->set_version, &wr_attrbs,
+ sizeof(wr_attrbs),
+ CXL_SET_FEAT_FLAG_DATA_SAVED_ACROSS_RESET, 0,
+ NULL);
+ if (ret)
+ return ret;
+
+ if (cycle != cxlmd->scrub_cycle) {
+ if (cxlmd->scrub_region_id != CXL_SCRUB_NO_REGION)
+ dev_info(dev,
+ "Device scrub rate(%d hours) set by region%d rate overwritten with device local scrub rate(%d hours)\n",
+ cxlmd->scrub_cycle, cxlmd->scrub_region_id,
+ cycle);
+
+ cxlmd->scrub_cycle = cycle;
+ cxlmd->scrub_region_id = CXL_SCRUB_NO_REGION;
+ }
+
+ return 0;
+}
+
+static int cxl_scrub_set_attrbs(struct device *dev,
+ struct cxl_patrol_scrub_context *cxl_ps_ctx,
+ u8 cycle, u8 flags)
+{
+ if (cxl_ps_ctx->cxlr)
+ return cxl_scrub_set_attrbs_region(dev, cxl_ps_ctx, cycle, flags);
+
+ return cxl_scrub_set_attrbs_device(dev, cxl_ps_ctx, cycle, flags);
+}
+
+static int cxl_patrol_scrub_get_enabled_bg(struct device *dev, void *drv_data,
+ bool *enabled)
+{
+ struct cxl_patrol_scrub_context *ctx = drv_data;
+ u8 cap, flags;
+ u16 cycle;
+ int ret;
+
+ ret = cxl_scrub_get_attrbs(ctx, &cap, &cycle, &flags, NULL);
+ if (ret)
+ return ret;
+
+ *enabled = CXL_GET_SCRUB_EN_STS(flags);
+
+ return 0;
+}
+
+static int cxl_patrol_scrub_set_enabled_bg(struct device *dev, void *drv_data,
+ bool enable)
+{
+ struct cxl_patrol_scrub_context *ctx = drv_data;
+ u8 cap, flags, wr_cycle;
+ u16 rd_cycle;
+ int ret;
+
+ if (!capable(CAP_SYS_RAWIO))
+ return -EPERM;
+
+ ret = cxl_scrub_get_attrbs(ctx, &cap, &rd_cycle, &flags, NULL);
+ if (ret)
+ return ret;
+
+ wr_cycle = CXL_GET_SCRUB_CYCLE(rd_cycle);
+ flags = CXL_SET_SCRUB_EN(enable);
+
+ return cxl_scrub_set_attrbs(dev, ctx, wr_cycle, flags);
+}
+
+static int cxl_patrol_scrub_get_min_scrub_cycle(struct device *dev,
+ void *drv_data, u32 *min)
+{
+ struct cxl_patrol_scrub_context *ctx = drv_data;
+ u8 cap, flags, min_cycle;
+ u16 cycle;
+ int ret;
+
+ ret = cxl_scrub_get_attrbs(ctx, &cap, &cycle, &flags, &min_cycle);
+ if (ret)
+ return ret;
+
+ *min = min_cycle * 3600;
+
+ return 0;
+}
+
+static int cxl_patrol_scrub_get_max_scrub_cycle(struct device *dev,
+ void *drv_data, u32 *max)
+{
+ *max = U8_MAX * 3600; /* Max set by register size */
+
+ return 0;
+}
+
+static int cxl_patrol_scrub_get_scrub_cycle(struct device *dev, void *drv_data,
+ u32 *scrub_cycle_secs)
+{
+ struct cxl_patrol_scrub_context *ctx = drv_data;
+ u8 cap, flags;
+ u16 cycle;
+ int ret;
+
+ ret = cxl_scrub_get_attrbs(ctx, &cap, &cycle, &flags, NULL);
+ if (ret)
+ return ret;
+
+ *scrub_cycle_secs = CXL_GET_SCRUB_CYCLE(cycle) * 3600;
+
+ return 0;
+}
+
+static int cxl_patrol_scrub_set_scrub_cycle(struct device *dev, void *drv_data,
+ u32 scrub_cycle_secs)
+{
+ struct cxl_patrol_scrub_context *ctx = drv_data;
+ u8 scrub_cycle_hours = scrub_cycle_secs / 3600;
+ u8 cap, wr_cycle, flags, min_cycle;
+ u16 rd_cycle;
+ int ret;
+
+ if (!capable(CAP_SYS_RAWIO))
+ return -EPERM;
+
+ ret = cxl_scrub_get_attrbs(ctx, &cap, &rd_cycle, &flags, &min_cycle);
+ if (ret)
+ return ret;
+
+ if (!CXL_GET_SCRUB_CYCLE_CHANGEABLE(cap))
+ return -EOPNOTSUPP;
+
+ if (scrub_cycle_hours < min_cycle) {
+ dev_dbg(dev, "Invalid CXL patrol scrub cycle(%d) to set\n",
+ scrub_cycle_hours);
+ dev_dbg(dev,
+ "Minimum supported CXL patrol scrub cycle in hour %d\n",
+ min_cycle);
+ return -EINVAL;
+ }
+ wr_cycle = CXL_SET_SCRUB_CYCLE(scrub_cycle_hours);
+
+ return cxl_scrub_set_attrbs(dev, ctx, wr_cycle, flags);
+}
+
+static const struct edac_scrub_ops cxl_ps_scrub_ops = {
+ .get_enabled_bg = cxl_patrol_scrub_get_enabled_bg,
+ .set_enabled_bg = cxl_patrol_scrub_set_enabled_bg,
+ .get_min_cycle = cxl_patrol_scrub_get_min_scrub_cycle,
+ .get_max_cycle = cxl_patrol_scrub_get_max_scrub_cycle,
+ .get_cycle_duration = cxl_patrol_scrub_get_scrub_cycle,
+ .set_cycle_duration = cxl_patrol_scrub_set_scrub_cycle,
+};
+
+static int cxl_memdev_scrub_init(struct cxl_memdev *cxlmd,
+ struct edac_dev_feature *ras_feature,
+ u8 scrub_inst)
+{
+ struct cxl_patrol_scrub_context *cxl_ps_ctx;
+ struct cxl_feat_entry *feat_entry;
+ u8 cap, flags;
+ u16 cycle;
+ int rc;
+
+ feat_entry = cxl_feature_info(to_cxlfs(cxlmd->cxlds),
+ &CXL_FEAT_PATROL_SCRUB_UUID);
+ if (IS_ERR(feat_entry))
+ return -EOPNOTSUPP;
+
+ if (!(le32_to_cpu(feat_entry->flags) & CXL_FEATURE_F_CHANGEABLE))
+ return -EOPNOTSUPP;
+
+ cxl_ps_ctx = devm_kzalloc(&cxlmd->dev, sizeof(*cxl_ps_ctx), GFP_KERNEL);
+ if (!cxl_ps_ctx)
+ return -ENOMEM;
+
+ *cxl_ps_ctx = (struct cxl_patrol_scrub_context){
+ .get_feat_size = le16_to_cpu(feat_entry->get_feat_size),
+ .set_feat_size = le16_to_cpu(feat_entry->set_feat_size),
+ .get_version = feat_entry->get_feat_ver,
+ .set_version = feat_entry->set_feat_ver,
+ .effects = le16_to_cpu(feat_entry->effects),
+ .instance = scrub_inst,
+ .cxlmd = cxlmd,
+ };
+
+ rc = cxl_mem_scrub_get_attrbs(&cxlmd->cxlds->cxl_mbox, &cap, &cycle,
+ &flags, NULL);
+ if (rc)
+ return rc;
+
+ cxlmd->scrub_cycle = CXL_GET_SCRUB_CYCLE(cycle);
+ cxlmd->scrub_region_id = CXL_SCRUB_NO_REGION;
+
+ ras_feature->ft_type = RAS_FEAT_SCRUB;
+ ras_feature->instance = cxl_ps_ctx->instance;
+ ras_feature->scrub_ops = &cxl_ps_scrub_ops;
+ ras_feature->ctx = cxl_ps_ctx;
+
+ return 0;
+}
+
+static int cxl_region_scrub_init(struct cxl_region *cxlr,
+ struct edac_dev_feature *ras_feature,
+ u8 scrub_inst)
+{
+ struct cxl_patrol_scrub_context *cxl_ps_ctx;
+ struct cxl_region_params *p = &cxlr->params;
+ struct cxl_feat_entry *feat_entry = NULL;
+ struct cxl_memdev *cxlmd;
+ u8 cap, flags;
+ u16 cycle;
+ int i, rc;
+
+ /*
+ * The cxl_region_rwsem must be held if the code below is used in a context
+ * other than when the region is in the probe state, as shown here.
+ */
+ for (i = 0; i < p->nr_targets; i++) {
+ struct cxl_endpoint_decoder *cxled = p->targets[i];
+
+ cxlmd = cxled_to_memdev(cxled);
+ feat_entry = cxl_feature_info(to_cxlfs(cxlmd->cxlds),
+ &CXL_FEAT_PATROL_SCRUB_UUID);
+ if (IS_ERR(feat_entry))
+ return -EOPNOTSUPP;
+
+ if (!(le32_to_cpu(feat_entry->flags) &
+ CXL_FEATURE_F_CHANGEABLE))
+ return -EOPNOTSUPP;
+
+ rc = cxl_mem_scrub_get_attrbs(&cxlmd->cxlds->cxl_mbox, &cap,
+ &cycle, &flags, NULL);
+ if (rc)
+ return rc;
+
+ cxlmd->scrub_cycle = CXL_GET_SCRUB_CYCLE(cycle);
+ cxlmd->scrub_region_id = CXL_SCRUB_NO_REGION;
+ }
+
+ cxl_ps_ctx = devm_kzalloc(&cxlr->dev, sizeof(*cxl_ps_ctx), GFP_KERNEL);
+ if (!cxl_ps_ctx)
+ return -ENOMEM;
+
+ *cxl_ps_ctx = (struct cxl_patrol_scrub_context){
+ .get_feat_size = le16_to_cpu(feat_entry->get_feat_size),
+ .set_feat_size = le16_to_cpu(feat_entry->set_feat_size),
+ .get_version = feat_entry->get_feat_ver,
+ .set_version = feat_entry->set_feat_ver,
+ .effects = le16_to_cpu(feat_entry->effects),
+ .instance = scrub_inst,
+ .cxlr = cxlr,
+ };
+
+ ras_feature->ft_type = RAS_FEAT_SCRUB;
+ ras_feature->instance = cxl_ps_ctx->instance;
+ ras_feature->scrub_ops = &cxl_ps_scrub_ops;
+ ras_feature->ctx = cxl_ps_ctx;
+
+ return 0;
+}
+
+struct cxl_ecs_context {
+ u16 num_media_frus;
+ u16 get_feat_size;
+ u16 set_feat_size;
+ u8 get_version;
+ u8 set_version;
+ u16 effects;
+ struct cxl_memdev *cxlmd;
+};
+
+/*
+ * See CXL spec rev 3.2 @8.2.10.9.11.2 Table 8-225 DDR5 ECS Control Feature
+ * Readable Attributes.
+ */
+struct cxl_ecs_fru_rd_attrbs {
+ u8 ecs_cap;
+ __le16 ecs_config;
+ u8 ecs_flags;
+} __packed;
+
+struct cxl_ecs_rd_attrbs {
+ u8 ecs_log_cap;
+ struct cxl_ecs_fru_rd_attrbs fru_attrbs[];
+} __packed;
+
+/*
+ * See CXL spec rev 3.2 @8.2.10.9.11.2 Table 8-226 DDR5 ECS Control Feature
+ * Writable Attributes.
+ */
+struct cxl_ecs_fru_wr_attrbs {
+ __le16 ecs_config;
+} __packed;
+
+struct cxl_ecs_wr_attrbs {
+ u8 ecs_log_cap;
+ struct cxl_ecs_fru_wr_attrbs fru_attrbs[];
+} __packed;
+
+#define CXL_ECS_LOG_ENTRY_TYPE_MASK GENMASK(1, 0)
+#define CXL_ECS_REALTIME_REPORT_CAP_MASK BIT(0)
+#define CXL_ECS_THRESHOLD_COUNT_MASK GENMASK(2, 0)
+#define CXL_ECS_COUNT_MODE_MASK BIT(3)
+#define CXL_ECS_RESET_COUNTER_MASK BIT(4)
+#define CXL_ECS_RESET_COUNTER 1
+
+enum {
+ ECS_THRESHOLD_256 = 256,
+ ECS_THRESHOLD_1024 = 1024,
+ ECS_THRESHOLD_4096 = 4096,
+};
+
+enum {
+ ECS_THRESHOLD_IDX_256 = 3,
+ ECS_THRESHOLD_IDX_1024 = 4,
+ ECS_THRESHOLD_IDX_4096 = 5,
+};
+
+static const u16 ecs_supp_threshold[] = {
+ [ECS_THRESHOLD_IDX_256] = 256,
+ [ECS_THRESHOLD_IDX_1024] = 1024,
+ [ECS_THRESHOLD_IDX_4096] = 4096,
+};
+
+enum {
+ ECS_LOG_ENTRY_TYPE_DRAM = 0x0,
+ ECS_LOG_ENTRY_TYPE_MEM_MEDIA_FRU = 0x1,
+};
+
+enum cxl_ecs_count_mode {
+ ECS_MODE_COUNTS_ROWS = 0,
+ ECS_MODE_COUNTS_CODEWORDS = 1,
+};
+
+static int cxl_mem_ecs_get_attrbs(struct device *dev,
+ struct cxl_ecs_context *cxl_ecs_ctx,
+ int fru_id, u8 *log_cap, u16 *config)
+{
+ struct cxl_memdev *cxlmd = cxl_ecs_ctx->cxlmd;
+ struct cxl_mailbox *cxl_mbox = &cxlmd->cxlds->cxl_mbox;
+ struct cxl_ecs_fru_rd_attrbs *fru_rd_attrbs;
+ size_t rd_data_size;
+ size_t data_size;
+
+ rd_data_size = cxl_ecs_ctx->get_feat_size;
+
+ struct cxl_ecs_rd_attrbs *rd_attrbs __free(kvfree) =
+ kvzalloc(rd_data_size, GFP_KERNEL);
+ if (!rd_attrbs)
+ return -ENOMEM;
+
+ data_size = cxl_get_feature(cxl_mbox, &CXL_FEAT_ECS_UUID,
+ CXL_GET_FEAT_SEL_CURRENT_VALUE, rd_attrbs,
+ rd_data_size, 0, NULL);
+ if (!data_size)
+ return -EIO;
+
+ fru_rd_attrbs = rd_attrbs->fru_attrbs;
+ *log_cap = rd_attrbs->ecs_log_cap;
+ *config = le16_to_cpu(fru_rd_attrbs[fru_id].ecs_config);
+
+ return 0;
+}
+
+static int cxl_mem_ecs_set_attrbs(struct device *dev,
+ struct cxl_ecs_context *cxl_ecs_ctx,
+ int fru_id, u8 log_cap, u16 config)
+{
+ struct cxl_memdev *cxlmd = cxl_ecs_ctx->cxlmd;
+ struct cxl_mailbox *cxl_mbox = &cxlmd->cxlds->cxl_mbox;
+ struct cxl_ecs_fru_rd_attrbs *fru_rd_attrbs;
+ struct cxl_ecs_fru_wr_attrbs *fru_wr_attrbs;
+ size_t rd_data_size, wr_data_size;
+ u16 num_media_frus, count;
+ size_t data_size;
+
+ num_media_frus = cxl_ecs_ctx->num_media_frus;
+ rd_data_size = cxl_ecs_ctx->get_feat_size;
+ wr_data_size = cxl_ecs_ctx->set_feat_size;
+ struct cxl_ecs_rd_attrbs *rd_attrbs __free(kvfree) =
+ kvzalloc(rd_data_size, GFP_KERNEL);
+ if (!rd_attrbs)
+ return -ENOMEM;
+
+ data_size = cxl_get_feature(cxl_mbox, &CXL_FEAT_ECS_UUID,
+ CXL_GET_FEAT_SEL_CURRENT_VALUE, rd_attrbs,
+ rd_data_size, 0, NULL);
+ if (!data_size)
+ return -EIO;
+
+ struct cxl_ecs_wr_attrbs *wr_attrbs __free(kvfree) =
+ kvzalloc(wr_data_size, GFP_KERNEL);
+ if (!wr_attrbs)
+ return -ENOMEM;
+
+ /*
+ * Fill writable attributes from the current attributes read
+ * for all the media FRUs.
+ */
+ fru_rd_attrbs = rd_attrbs->fru_attrbs;
+ fru_wr_attrbs = wr_attrbs->fru_attrbs;
+ wr_attrbs->ecs_log_cap = log_cap;
+ for (count = 0; count < num_media_frus; count++)
+ fru_wr_attrbs[count].ecs_config =
+ fru_rd_attrbs[count].ecs_config;
+
+ fru_wr_attrbs[fru_id].ecs_config = cpu_to_le16(config);
+
+ return cxl_set_feature(cxl_mbox, &CXL_FEAT_ECS_UUID,
+ cxl_ecs_ctx->set_version, wr_attrbs,
+ wr_data_size,
+ CXL_SET_FEAT_FLAG_DATA_SAVED_ACROSS_RESET,
+ 0, NULL);
+}
+
+static u8 cxl_get_ecs_log_entry_type(u8 log_cap, u16 config)
+{
+ return FIELD_GET(CXL_ECS_LOG_ENTRY_TYPE_MASK, log_cap);
+}
+
+static u16 cxl_get_ecs_threshold(u8 log_cap, u16 config)
+{
+ u8 index = FIELD_GET(CXL_ECS_THRESHOLD_COUNT_MASK, config);
+
+ return ecs_supp_threshold[index];
+}
+
+static u8 cxl_get_ecs_count_mode(u8 log_cap, u16 config)
+{
+ return FIELD_GET(CXL_ECS_COUNT_MODE_MASK, config);
+}
+
+#define CXL_ECS_GET_ATTR(attrb) \
+ static int cxl_ecs_get_##attrb(struct device *dev, void *drv_data, \
+ int fru_id, u32 *val) \
+ { \
+ struct cxl_ecs_context *ctx = drv_data; \
+ u8 log_cap; \
+ u16 config; \
+ int ret; \
+ \
+ ret = cxl_mem_ecs_get_attrbs(dev, ctx, fru_id, &log_cap, \
+ &config); \
+ if (ret) \
+ return ret; \
+ \
+ *val = cxl_get_ecs_##attrb(log_cap, config); \
+ \
+ return 0; \
+ }
+
+CXL_ECS_GET_ATTR(log_entry_type)
+CXL_ECS_GET_ATTR(count_mode)
+CXL_ECS_GET_ATTR(threshold)
+
+static int cxl_set_ecs_log_entry_type(struct device *dev, u8 *log_cap,
+ u16 *config, u32 val)
+{
+ if (val != ECS_LOG_ENTRY_TYPE_DRAM &&
+ val != ECS_LOG_ENTRY_TYPE_MEM_MEDIA_FRU)
+ return -EINVAL;
+
+ *log_cap = FIELD_PREP(CXL_ECS_LOG_ENTRY_TYPE_MASK, val);
+
+ return 0;
+}
+
+static int cxl_set_ecs_threshold(struct device *dev, u8 *log_cap, u16 *config,
+ u32 val)
+{
+ *config &= ~CXL_ECS_THRESHOLD_COUNT_MASK;
+
+ switch (val) {
+ case ECS_THRESHOLD_256:
+ *config |= FIELD_PREP(CXL_ECS_THRESHOLD_COUNT_MASK,
+ ECS_THRESHOLD_IDX_256);
+ break;
+ case ECS_THRESHOLD_1024:
+ *config |= FIELD_PREP(CXL_ECS_THRESHOLD_COUNT_MASK,
+ ECS_THRESHOLD_IDX_1024);
+ break;
+ case ECS_THRESHOLD_4096:
+ *config |= FIELD_PREP(CXL_ECS_THRESHOLD_COUNT_MASK,
+ ECS_THRESHOLD_IDX_4096);
+ break;
+ default:
+ dev_dbg(dev, "Invalid CXL ECS threshold count(%d) to set\n",
+ val);
+ dev_dbg(dev, "Supported ECS threshold counts: %u, %u, %u\n",
+ ECS_THRESHOLD_256, ECS_THRESHOLD_1024,
+ ECS_THRESHOLD_4096);
+ return -EINVAL;
+ }
+
+ return 0;
+}
+
+static int cxl_set_ecs_count_mode(struct device *dev, u8 *log_cap, u16 *config,
+ u32 val)
+{
+ if (val != ECS_MODE_COUNTS_ROWS && val != ECS_MODE_COUNTS_CODEWORDS) {
+ dev_dbg(dev, "Invalid CXL ECS scrub mode(%d) to set\n", val);
+ dev_dbg(dev,
+ "Supported ECS Modes: 0: ECS counts rows with errors,"
+ " 1: ECS counts codewords with errors\n");
+ return -EINVAL;
+ }
+
+ *config &= ~CXL_ECS_COUNT_MODE_MASK;
+ *config |= FIELD_PREP(CXL_ECS_COUNT_MODE_MASK, val);
+
+ return 0;
+}
+
+static int cxl_set_ecs_reset_counter(struct device *dev, u8 *log_cap,
+ u16 *config, u32 val)
+{
+ if (val != CXL_ECS_RESET_COUNTER)
+ return -EINVAL;
+
+ *config &= ~CXL_ECS_RESET_COUNTER_MASK;
+ *config |= FIELD_PREP(CXL_ECS_RESET_COUNTER_MASK, val);
+
+ return 0;
+}
+
+#define CXL_ECS_SET_ATTR(attrb) \
+ static int cxl_ecs_set_##attrb(struct device *dev, void *drv_data, \
+ int fru_id, u32 val) \
+ { \
+ struct cxl_ecs_context *ctx = drv_data; \
+ u8 log_cap; \
+ u16 config; \
+ int ret; \
+ \
+ if (!capable(CAP_SYS_RAWIO)) \
+ return -EPERM; \
+ \
+ ret = cxl_mem_ecs_get_attrbs(dev, ctx, fru_id, &log_cap, \
+ &config); \
+ if (ret) \
+ return ret; \
+ \
+ ret = cxl_set_ecs_##attrb(dev, &log_cap, &config, val); \
+ if (ret) \
+ return ret; \
+ \
+ return cxl_mem_ecs_set_attrbs(dev, ctx, fru_id, log_cap, \
+ config); \
+ }
+CXL_ECS_SET_ATTR(log_entry_type)
+CXL_ECS_SET_ATTR(count_mode)
+CXL_ECS_SET_ATTR(reset_counter)
+CXL_ECS_SET_ATTR(threshold)
+
+static const struct edac_ecs_ops cxl_ecs_ops = {
+ .get_log_entry_type = cxl_ecs_get_log_entry_type,
+ .set_log_entry_type = cxl_ecs_set_log_entry_type,
+ .get_mode = cxl_ecs_get_count_mode,
+ .set_mode = cxl_ecs_set_count_mode,
+ .reset = cxl_ecs_set_reset_counter,
+ .get_threshold = cxl_ecs_get_threshold,
+ .set_threshold = cxl_ecs_set_threshold,
+};
+
+static int cxl_memdev_ecs_init(struct cxl_memdev *cxlmd,
+ struct edac_dev_feature *ras_feature)
+{
+ struct cxl_ecs_context *cxl_ecs_ctx;
+ struct cxl_feat_entry *feat_entry;
+ int num_media_frus;
+
+ feat_entry =
+ cxl_feature_info(to_cxlfs(cxlmd->cxlds), &CXL_FEAT_ECS_UUID);
+ if (IS_ERR(feat_entry))
+ return -EOPNOTSUPP;
+
+ if (!(le32_to_cpu(feat_entry->flags) & CXL_FEATURE_F_CHANGEABLE))
+ return -EOPNOTSUPP;
+
+ num_media_frus = (le16_to_cpu(feat_entry->get_feat_size) -
+ sizeof(struct cxl_ecs_rd_attrbs)) /
+ sizeof(struct cxl_ecs_fru_rd_attrbs);
+ if (!num_media_frus)
+ return -EOPNOTSUPP;
+
+ cxl_ecs_ctx =
+ devm_kzalloc(&cxlmd->dev, sizeof(*cxl_ecs_ctx), GFP_KERNEL);
+ if (!cxl_ecs_ctx)
+ return -ENOMEM;
+
+ *cxl_ecs_ctx = (struct cxl_ecs_context){
+ .get_feat_size = le16_to_cpu(feat_entry->get_feat_size),
+ .set_feat_size = le16_to_cpu(feat_entry->set_feat_size),
+ .get_version = feat_entry->get_feat_ver,
+ .set_version = feat_entry->set_feat_ver,
+ .effects = le16_to_cpu(feat_entry->effects),
+ .num_media_frus = num_media_frus,
+ .cxlmd = cxlmd,
+ };
+
+ ras_feature->ft_type = RAS_FEAT_ECS;
+ ras_feature->ecs_ops = &cxl_ecs_ops;
+ ras_feature->ctx = cxl_ecs_ctx;
+ ras_feature->ecs_info.num_media_frus = num_media_frus;
+
+ return 0;
+}
+
+/*
+ * Perform Maintenance CXL 3.2 Spec 8.2.10.7.1
+ */
+
+/*
+ * Perform Maintenance input payload
+ * CXL rev 3.2 section 8.2.10.7.1 Table 8-117
+ */
+struct cxl_mbox_maintenance_hdr {
+ u8 op_class;
+ u8 op_subclass;
+} __packed;
+
+static int cxl_perform_maintenance(struct cxl_mailbox *cxl_mbox, u8 class,
+ u8 subclass, void *data_in,
+ size_t data_in_size)
+{
+ struct cxl_memdev_maintenance_pi {
+ struct cxl_mbox_maintenance_hdr hdr;
+ u8 data[];
+ } __packed;
+ struct cxl_mbox_cmd mbox_cmd;
+ size_t hdr_size;
+
+ struct cxl_memdev_maintenance_pi *pi __free(kvfree) =
+ kvzalloc(cxl_mbox->payload_size, GFP_KERNEL);
+ if (!pi)
+ return -ENOMEM;
+
+ pi->hdr.op_class = class;
+ pi->hdr.op_subclass = subclass;
+ hdr_size = sizeof(pi->hdr);
+ /*
+ * Check minimum mbox payload size is available for
+ * the maintenance data transfer.
+ */
+ if (hdr_size + data_in_size > cxl_mbox->payload_size)
+ return -ENOMEM;
+
+ memcpy(pi->data, data_in, data_in_size);
+ mbox_cmd = (struct cxl_mbox_cmd){
+ .opcode = CXL_MBOX_OP_DO_MAINTENANCE,
+ .size_in = hdr_size + data_in_size,
+ .payload_in = pi,
+ };
+
+ return cxl_internal_send_cmd(cxl_mbox, &mbox_cmd);
+}
+
+/*
+ * Support for finding a memory operation attributes
+ * are from the current boot or not.
+ */
+
+struct cxl_mem_err_rec {
+ struct xarray rec_gen_media;
+ struct xarray rec_dram;
+};
+
+enum cxl_mem_repair_type {
+ CXL_PPR,
+ CXL_CACHELINE_SPARING,
+ CXL_ROW_SPARING,
+ CXL_BANK_SPARING,
+ CXL_RANK_SPARING,
+ CXL_REPAIR_MAX,
+};
+
+/**
+ * struct cxl_mem_repair_attrbs - CXL memory repair attributes
+ * @dpa: DPA of memory to repair
+ * @nibble_mask: nibble mask, identifies one or more nibbles on the memory bus
+ * @row: row of memory to repair
+ * @column: column of memory to repair
+ * @channel: channel of memory to repair
+ * @sub_channel: sub channel of memory to repair
+ * @rank: rank of memory to repair
+ * @bank_group: bank group of memory to repair
+ * @bank: bank of memory to repair
+ * @repair_type: repair type. For eg. PPR, memory sparing etc.
+ */
+struct cxl_mem_repair_attrbs {
+ u64 dpa;
+ u32 nibble_mask;
+ u32 row;
+ u16 column;
+ u8 channel;
+ u8 sub_channel;
+ u8 rank;
+ u8 bank_group;
+ u8 bank;
+ enum cxl_mem_repair_type repair_type;
+};
+
+static struct cxl_event_gen_media *
+cxl_find_rec_gen_media(struct cxl_memdev *cxlmd,
+ struct cxl_mem_repair_attrbs *attrbs)
+{
+ struct cxl_mem_err_rec *array_rec = cxlmd->err_rec_array;
+ struct cxl_event_gen_media *rec;
+
+ if (!array_rec)
+ return NULL;
+
+ rec = xa_load(&array_rec->rec_gen_media, attrbs->dpa);
+ if (!rec)
+ return NULL;
+
+ if (attrbs->repair_type == CXL_PPR)
+ return rec;
+
+ return NULL;
+}
+
+static struct cxl_event_dram *
+cxl_find_rec_dram(struct cxl_memdev *cxlmd,
+ struct cxl_mem_repair_attrbs *attrbs)
+{
+ struct cxl_mem_err_rec *array_rec = cxlmd->err_rec_array;
+ struct cxl_event_dram *rec;
+ u16 validity_flags;
+
+ if (!array_rec)
+ return NULL;
+
+ rec = xa_load(&array_rec->rec_dram, attrbs->dpa);
+ if (!rec)
+ return NULL;
+
+ validity_flags = get_unaligned_le16(rec->media_hdr.validity_flags);
+ if (!(validity_flags & CXL_DER_VALID_CHANNEL) ||
+ !(validity_flags & CXL_DER_VALID_RANK))
+ return NULL;
+
+ switch (attrbs->repair_type) {
+ case CXL_PPR:
+ if (!(validity_flags & CXL_DER_VALID_NIBBLE) ||
+ get_unaligned_le24(rec->nibble_mask) == attrbs->nibble_mask)
+ return rec;
+ break;
+ case CXL_CACHELINE_SPARING:
+ if (!(validity_flags & CXL_DER_VALID_BANK_GROUP) ||
+ !(validity_flags & CXL_DER_VALID_BANK) ||
+ !(validity_flags & CXL_DER_VALID_ROW) ||
+ !(validity_flags & CXL_DER_VALID_COLUMN))
+ return NULL;
+
+ if (rec->media_hdr.channel == attrbs->channel &&
+ rec->media_hdr.rank == attrbs->rank &&
+ rec->bank_group == attrbs->bank_group &&
+ rec->bank == attrbs->bank &&
+ get_unaligned_le24(rec->row) == attrbs->row &&
+ get_unaligned_le16(rec->column) == attrbs->column &&
+ (!(validity_flags & CXL_DER_VALID_NIBBLE) ||
+ get_unaligned_le24(rec->nibble_mask) ==
+ attrbs->nibble_mask) &&
+ (!(validity_flags & CXL_DER_VALID_SUB_CHANNEL) ||
+ rec->sub_channel == attrbs->sub_channel))
+ return rec;
+ break;
+ case CXL_ROW_SPARING:
+ if (!(validity_flags & CXL_DER_VALID_BANK_GROUP) ||
+ !(validity_flags & CXL_DER_VALID_BANK) ||
+ !(validity_flags & CXL_DER_VALID_ROW))
+ return NULL;
+
+ if (rec->media_hdr.channel == attrbs->channel &&
+ rec->media_hdr.rank == attrbs->rank &&
+ rec->bank_group == attrbs->bank_group &&
+ rec->bank == attrbs->bank &&
+ get_unaligned_le24(rec->row) == attrbs->row &&
+ (!(validity_flags & CXL_DER_VALID_NIBBLE) ||
+ get_unaligned_le24(rec->nibble_mask) ==
+ attrbs->nibble_mask))
+ return rec;
+ break;
+ case CXL_BANK_SPARING:
+ if (!(validity_flags & CXL_DER_VALID_BANK_GROUP) ||
+ !(validity_flags & CXL_DER_VALID_BANK))
+ return NULL;
+
+ if (rec->media_hdr.channel == attrbs->channel &&
+ rec->media_hdr.rank == attrbs->rank &&
+ rec->bank_group == attrbs->bank_group &&
+ rec->bank == attrbs->bank &&
+ (!(validity_flags & CXL_DER_VALID_NIBBLE) ||
+ get_unaligned_le24(rec->nibble_mask) ==
+ attrbs->nibble_mask))
+ return rec;
+ break;
+ case CXL_RANK_SPARING:
+ if (rec->media_hdr.channel == attrbs->channel &&
+ rec->media_hdr.rank == attrbs->rank &&
+ (!(validity_flags & CXL_DER_VALID_NIBBLE) ||
+ get_unaligned_le24(rec->nibble_mask) ==
+ attrbs->nibble_mask))
+ return rec;
+ break;
+ default:
+ return NULL;
+ }
+
+ return NULL;
+}
+
+#define CXL_MAX_STORAGE_DAYS 10
+#define CXL_MAX_STORAGE_TIME_SECS (CXL_MAX_STORAGE_DAYS * 24 * 60 * 60)
+
+static void cxl_del_expired_gmedia_recs(struct xarray *rec_xarray,
+ struct cxl_event_gen_media *cur_rec)
+{
+ u64 cur_ts = le64_to_cpu(cur_rec->media_hdr.hdr.timestamp);
+ struct cxl_event_gen_media *rec;
+ unsigned long index;
+ u64 delta_ts_secs;
+
+ xa_for_each(rec_xarray, index, rec) {
+ delta_ts_secs = (cur_ts -
+ le64_to_cpu(rec->media_hdr.hdr.timestamp)) / 1000000000ULL;
+ if (delta_ts_secs >= CXL_MAX_STORAGE_TIME_SECS) {
+ xa_erase(rec_xarray, index);
+ kfree(rec);
+ }
+ }
+}
+
+static void cxl_del_expired_dram_recs(struct xarray *rec_xarray,
+ struct cxl_event_dram *cur_rec)
+{
+ u64 cur_ts = le64_to_cpu(cur_rec->media_hdr.hdr.timestamp);
+ struct cxl_event_dram *rec;
+ unsigned long index;
+ u64 delta_secs;
+
+ xa_for_each(rec_xarray, index, rec) {
+ delta_secs = (cur_ts -
+ le64_to_cpu(rec->media_hdr.hdr.timestamp)) / 1000000000ULL;
+ if (delta_secs >= CXL_MAX_STORAGE_TIME_SECS) {
+ xa_erase(rec_xarray, index);
+ kfree(rec);
+ }
+ }
+}
+
+#define CXL_MAX_REC_STORAGE_COUNT 200
+
+static void cxl_del_overflow_old_recs(struct xarray *rec_xarray)
+{
+ void *err_rec;
+ unsigned long index, count = 0;
+
+ xa_for_each(rec_xarray, index, err_rec)
+ count++;
+
+ if (count <= CXL_MAX_REC_STORAGE_COUNT)
+ return;
+
+ count -= CXL_MAX_REC_STORAGE_COUNT;
+ xa_for_each(rec_xarray, index, err_rec) {
+ xa_erase(rec_xarray, index);
+ kfree(err_rec);
+ count--;
+ if (!count)
+ break;
+ }
+}
+
+int cxl_store_rec_gen_media(struct cxl_memdev *cxlmd, union cxl_event *evt)
+{
+ struct cxl_mem_err_rec *array_rec = cxlmd->err_rec_array;
+ struct cxl_event_gen_media *rec;
+ void *old_rec;
+
+ if (!IS_ENABLED(CONFIG_CXL_EDAC_MEM_REPAIR) || !array_rec)
+ return 0;
+
+ rec = kmemdup(&evt->gen_media, sizeof(*rec), GFP_KERNEL);
+ if (!rec)
+ return -ENOMEM;
+
+ old_rec = xa_store(&array_rec->rec_gen_media,
+ le64_to_cpu(rec->media_hdr.phys_addr), rec,
+ GFP_KERNEL);
+ if (xa_is_err(old_rec)) {
+ kfree(rec);
+ return xa_err(old_rec);
+ }
+
+ kfree(old_rec);
+
+ cxl_del_expired_gmedia_recs(&array_rec->rec_gen_media, rec);
+ cxl_del_overflow_old_recs(&array_rec->rec_gen_media);
+
+ return 0;
+}
+EXPORT_SYMBOL_NS_GPL(cxl_store_rec_gen_media, "CXL");
+
+int cxl_store_rec_dram(struct cxl_memdev *cxlmd, union cxl_event *evt)
+{
+ struct cxl_mem_err_rec *array_rec = cxlmd->err_rec_array;
+ struct cxl_event_dram *rec;
+ void *old_rec;
+
+ if (!IS_ENABLED(CONFIG_CXL_EDAC_MEM_REPAIR) || !array_rec)
+ return 0;
+
+ rec = kmemdup(&evt->dram, sizeof(*rec), GFP_KERNEL);
+ if (!rec)
+ return -ENOMEM;
+
+ old_rec = xa_store(&array_rec->rec_dram,
+ le64_to_cpu(rec->media_hdr.phys_addr), rec,
+ GFP_KERNEL);
+ if (xa_is_err(old_rec)) {
+ kfree(rec);
+ return xa_err(old_rec);
+ }
+
+ kfree(old_rec);
+
+ cxl_del_expired_dram_recs(&array_rec->rec_dram, rec);
+ cxl_del_overflow_old_recs(&array_rec->rec_dram);
+
+ return 0;
+}
+EXPORT_SYMBOL_NS_GPL(cxl_store_rec_dram, "CXL");
+
+static bool cxl_is_memdev_memory_online(const struct cxl_memdev *cxlmd)
+{
+ struct cxl_port *port = cxlmd->endpoint;
+
+ if (port && cxl_num_decoders_committed(port))
+ return true;
+
+ return false;
+}
+
+/*
+ * CXL memory sparing control
+ */
+enum cxl_mem_sparing_granularity {
+ CXL_MEM_SPARING_CACHELINE,
+ CXL_MEM_SPARING_ROW,
+ CXL_MEM_SPARING_BANK,
+ CXL_MEM_SPARING_RANK,
+ CXL_MEM_SPARING_MAX
+};
+
+struct cxl_mem_sparing_context {
+ struct cxl_memdev *cxlmd;
+ uuid_t repair_uuid;
+ u16 get_feat_size;
+ u16 set_feat_size;
+ u16 effects;
+ u8 instance;
+ u8 get_version;
+ u8 set_version;
+ u8 op_class;
+ u8 op_subclass;
+ bool cap_safe_when_in_use;
+ bool cap_hard_sparing;
+ bool cap_soft_sparing;
+ u8 channel;
+ u8 rank;
+ u8 bank_group;
+ u32 nibble_mask;
+ u64 dpa;
+ u32 row;
+ u16 column;
+ u8 bank;
+ u8 sub_channel;
+ enum edac_mem_repair_type repair_type;
+ bool persist_mode;
+};
+
+#define CXL_SPARING_RD_CAP_SAFE_IN_USE_MASK BIT(0)
+#define CXL_SPARING_RD_CAP_HARD_SPARING_MASK BIT(1)
+#define CXL_SPARING_RD_CAP_SOFT_SPARING_MASK BIT(2)
+
+#define CXL_SPARING_WR_DEVICE_INITIATED_MASK BIT(0)
+
+#define CXL_SPARING_QUERY_RESOURCE_FLAG BIT(0)
+#define CXL_SET_HARD_SPARING_FLAG BIT(1)
+#define CXL_SPARING_SUB_CHNL_VALID_FLAG BIT(2)
+#define CXL_SPARING_NIB_MASK_VALID_FLAG BIT(3)
+
+#define CXL_GET_SPARING_SAFE_IN_USE(flags) \
+ (FIELD_GET(CXL_SPARING_RD_CAP_SAFE_IN_USE_MASK, \
+ flags) ^ 1)
+#define CXL_GET_CAP_HARD_SPARING(flags) \
+ FIELD_GET(CXL_SPARING_RD_CAP_HARD_SPARING_MASK, \
+ flags)
+#define CXL_GET_CAP_SOFT_SPARING(flags) \
+ FIELD_GET(CXL_SPARING_RD_CAP_SOFT_SPARING_MASK, \
+ flags)
+
+#define CXL_SET_SPARING_QUERY_RESOURCE(val) \
+ FIELD_PREP(CXL_SPARING_QUERY_RESOURCE_FLAG, val)
+#define CXL_SET_HARD_SPARING(val) \
+ FIELD_PREP(CXL_SET_HARD_SPARING_FLAG, val)
+#define CXL_SET_SPARING_SUB_CHNL_VALID(val) \
+ FIELD_PREP(CXL_SPARING_SUB_CHNL_VALID_FLAG, val)
+#define CXL_SET_SPARING_NIB_MASK_VALID(val) \
+ FIELD_PREP(CXL_SPARING_NIB_MASK_VALID_FLAG, val)
+
+/*
+ * See CXL spec rev 3.2 @8.2.10.7.2.3 Table 8-134 Memory Sparing Feature
+ * Readable Attributes.
+ */
+struct cxl_memdev_repair_rd_attrbs_hdr {
+ u8 max_op_latency;
+ __le16 op_cap;
+ __le16 op_mode;
+ u8 op_class;
+ u8 op_subclass;
+ u8 rsvd[9];
+} __packed;
+
+struct cxl_memdev_sparing_rd_attrbs {
+ struct cxl_memdev_repair_rd_attrbs_hdr hdr;
+ u8 rsvd;
+ __le16 restriction_flags;
+} __packed;
+
+/*
+ * See CXL spec rev 3.2 @8.2.10.7.1.4 Table 8-120 Memory Sparing Input Payload.
+ */
+struct cxl_memdev_sparing_in_payload {
+ u8 flags;
+ u8 channel;
+ u8 rank;
+ u8 nibble_mask[3];
+ u8 bank_group;
+ u8 bank;
+ u8 row[3];
+ __le16 column;
+ u8 sub_channel;
+} __packed;
+
+static int
+cxl_mem_sparing_get_attrbs(struct cxl_mem_sparing_context *cxl_sparing_ctx)
+{
+ size_t rd_data_size = sizeof(struct cxl_memdev_sparing_rd_attrbs);
+ struct cxl_memdev *cxlmd = cxl_sparing_ctx->cxlmd;
+ struct cxl_mailbox *cxl_mbox = &cxlmd->cxlds->cxl_mbox;
+ u16 restriction_flags;
+ size_t data_size;
+ u16 return_code;
+ struct cxl_memdev_sparing_rd_attrbs *rd_attrbs __free(kfree) =
+ kzalloc(rd_data_size, GFP_KERNEL);
+ if (!rd_attrbs)
+ return -ENOMEM;
+
+ data_size = cxl_get_feature(cxl_mbox, &cxl_sparing_ctx->repair_uuid,
+ CXL_GET_FEAT_SEL_CURRENT_VALUE, rd_attrbs,
+ rd_data_size, 0, &return_code);
+ if (!data_size)
+ return -EIO;
+
+ cxl_sparing_ctx->op_class = rd_attrbs->hdr.op_class;
+ cxl_sparing_ctx->op_subclass = rd_attrbs->hdr.op_subclass;
+ restriction_flags = le16_to_cpu(rd_attrbs->restriction_flags);
+ cxl_sparing_ctx->cap_safe_when_in_use =
+ CXL_GET_SPARING_SAFE_IN_USE(restriction_flags);
+ cxl_sparing_ctx->cap_hard_sparing =
+ CXL_GET_CAP_HARD_SPARING(restriction_flags);
+ cxl_sparing_ctx->cap_soft_sparing =
+ CXL_GET_CAP_SOFT_SPARING(restriction_flags);
+
+ return 0;
+}
+
+static struct cxl_event_dram *
+cxl_mem_get_rec_dram(struct cxl_memdev *cxlmd,
+ struct cxl_mem_sparing_context *ctx)
+{
+ struct cxl_mem_repair_attrbs attrbs = { 0 };
+
+ attrbs.dpa = ctx->dpa;
+ attrbs.channel = ctx->channel;
+ attrbs.rank = ctx->rank;
+ attrbs.nibble_mask = ctx->nibble_mask;
+ switch (ctx->repair_type) {
+ case EDAC_REPAIR_CACHELINE_SPARING:
+ attrbs.repair_type = CXL_CACHELINE_SPARING;
+ attrbs.bank_group = ctx->bank_group;
+ attrbs.bank = ctx->bank;
+ attrbs.row = ctx->row;
+ attrbs.column = ctx->column;
+ attrbs.sub_channel = ctx->sub_channel;
+ break;
+ case EDAC_REPAIR_ROW_SPARING:
+ attrbs.repair_type = CXL_ROW_SPARING;
+ attrbs.bank_group = ctx->bank_group;
+ attrbs.bank = ctx->bank;
+ attrbs.row = ctx->row;
+ break;
+ case EDAC_REPAIR_BANK_SPARING:
+ attrbs.repair_type = CXL_BANK_SPARING;
+ attrbs.bank_group = ctx->bank_group;
+ attrbs.bank = ctx->bank;
+ break;
+ case EDAC_REPAIR_RANK_SPARING:
+ attrbs.repair_type = CXL_RANK_SPARING;
+ break;
+ default:
+ return NULL;
+ }
+
+ return cxl_find_rec_dram(cxlmd, &attrbs);
+}
+
+static int
+cxl_mem_perform_sparing(struct device *dev,
+ struct cxl_mem_sparing_context *cxl_sparing_ctx)
+{
+ struct cxl_memdev *cxlmd = cxl_sparing_ctx->cxlmd;
+ struct cxl_memdev_sparing_in_payload sparing_pi;
+ struct cxl_event_dram *rec = NULL;
+ u16 validity_flags = 0;
+
+ struct rw_semaphore *region_lock __free(rwsem_read_release) =
+ rwsem_read_intr_acquire(&cxl_region_rwsem);
+ if (!region_lock)
+ return -EINTR;
+
+ struct rw_semaphore *dpa_lock __free(rwsem_read_release) =
+ rwsem_read_intr_acquire(&cxl_dpa_rwsem);
+ if (!dpa_lock)
+ return -EINTR;
+
+ if (!cxl_sparing_ctx->cap_safe_when_in_use) {
+ /* Memory to repair must be offline */
+ if (cxl_is_memdev_memory_online(cxlmd))
+ return -EBUSY;
+ } else {
+ if (cxl_is_memdev_memory_online(cxlmd)) {
+ rec = cxl_mem_get_rec_dram(cxlmd, cxl_sparing_ctx);
+ if (!rec)
+ return -EINVAL;
+
+ if (!get_unaligned_le16(rec->media_hdr.validity_flags))
+ return -EINVAL;
+ }
+ }
+
+ memset(&sparing_pi, 0, sizeof(sparing_pi));
+ sparing_pi.flags = CXL_SET_SPARING_QUERY_RESOURCE(0);
+ if (cxl_sparing_ctx->persist_mode)
+ sparing_pi.flags |= CXL_SET_HARD_SPARING(1);
+
+ if (rec)
+ validity_flags = get_unaligned_le16(rec->media_hdr.validity_flags);
+
+ switch (cxl_sparing_ctx->repair_type) {
+ case EDAC_REPAIR_CACHELINE_SPARING:
+ sparing_pi.column = cpu_to_le16(cxl_sparing_ctx->column);
+ if (!rec || (validity_flags & CXL_DER_VALID_SUB_CHANNEL)) {
+ sparing_pi.flags |= CXL_SET_SPARING_SUB_CHNL_VALID(1);
+ sparing_pi.sub_channel = cxl_sparing_ctx->sub_channel;
+ }
+ fallthrough;
+ case EDAC_REPAIR_ROW_SPARING:
+ put_unaligned_le24(cxl_sparing_ctx->row, sparing_pi.row);
+ fallthrough;
+ case EDAC_REPAIR_BANK_SPARING:
+ sparing_pi.bank_group = cxl_sparing_ctx->bank_group;
+ sparing_pi.bank = cxl_sparing_ctx->bank;
+ fallthrough;
+ case EDAC_REPAIR_RANK_SPARING:
+ sparing_pi.rank = cxl_sparing_ctx->rank;
+ fallthrough;
+ default:
+ sparing_pi.channel = cxl_sparing_ctx->channel;
+ if ((rec && (validity_flags & CXL_DER_VALID_NIBBLE)) ||
+ (!rec && (!cxl_sparing_ctx->nibble_mask ||
+ (cxl_sparing_ctx->nibble_mask & 0xFFFFFF)))) {
+ sparing_pi.flags |= CXL_SET_SPARING_NIB_MASK_VALID(1);
+ put_unaligned_le24(cxl_sparing_ctx->nibble_mask,
+ sparing_pi.nibble_mask);
+ }
+ break;
+ }
+
+ return cxl_perform_maintenance(&cxlmd->cxlds->cxl_mbox,
+ cxl_sparing_ctx->op_class,
+ cxl_sparing_ctx->op_subclass,
+ &sparing_pi, sizeof(sparing_pi));
+}
+
+static int cxl_mem_sparing_get_repair_type(struct device *dev, void *drv_data,
+ const char **repair_type)
+{
+ struct cxl_mem_sparing_context *ctx = drv_data;
+
+ switch (ctx->repair_type) {
+ case EDAC_REPAIR_CACHELINE_SPARING:
+ case EDAC_REPAIR_ROW_SPARING:
+ case EDAC_REPAIR_BANK_SPARING:
+ case EDAC_REPAIR_RANK_SPARING:
+ *repair_type = edac_repair_type[ctx->repair_type];
+ break;
+ default:
+ return -EINVAL;
+ }
+
+ return 0;
+}
+
+#define CXL_SPARING_GET_ATTR(attrb, data_type) \
+ static int cxl_mem_sparing_get_##attrb( \
+ struct device *dev, void *drv_data, data_type *val) \
+ { \
+ struct cxl_mem_sparing_context *ctx = drv_data; \
+ \
+ *val = ctx->attrb; \
+ \
+ return 0; \
+ }
+CXL_SPARING_GET_ATTR(persist_mode, bool)
+CXL_SPARING_GET_ATTR(dpa, u64)
+CXL_SPARING_GET_ATTR(nibble_mask, u32)
+CXL_SPARING_GET_ATTR(bank_group, u32)
+CXL_SPARING_GET_ATTR(bank, u32)
+CXL_SPARING_GET_ATTR(rank, u32)
+CXL_SPARING_GET_ATTR(row, u32)
+CXL_SPARING_GET_ATTR(column, u32)
+CXL_SPARING_GET_ATTR(channel, u32)
+CXL_SPARING_GET_ATTR(sub_channel, u32)
+
+#define CXL_SPARING_SET_ATTR(attrb, data_type) \
+ static int cxl_mem_sparing_set_##attrb(struct device *dev, \
+ void *drv_data, data_type val) \
+ { \
+ struct cxl_mem_sparing_context *ctx = drv_data; \
+ \
+ ctx->attrb = val; \
+ \
+ return 0; \
+ }
+CXL_SPARING_SET_ATTR(nibble_mask, u32)
+CXL_SPARING_SET_ATTR(bank_group, u32)
+CXL_SPARING_SET_ATTR(bank, u32)
+CXL_SPARING_SET_ATTR(rank, u32)
+CXL_SPARING_SET_ATTR(row, u32)
+CXL_SPARING_SET_ATTR(column, u32)
+CXL_SPARING_SET_ATTR(channel, u32)
+CXL_SPARING_SET_ATTR(sub_channel, u32)
+
+static int cxl_mem_sparing_set_persist_mode(struct device *dev, void *drv_data,
+ bool persist_mode)
+{
+ struct cxl_mem_sparing_context *ctx = drv_data;
+
+ if ((persist_mode && ctx->cap_hard_sparing) ||
+ (!persist_mode && ctx->cap_soft_sparing))
+ ctx->persist_mode = persist_mode;
+ else
+ return -EOPNOTSUPP;
+
+ return 0;
+}
+
+static int cxl_get_mem_sparing_safe_when_in_use(struct device *dev,
+ void *drv_data, bool *safe)
+{
+ struct cxl_mem_sparing_context *ctx = drv_data;
+
+ *safe = ctx->cap_safe_when_in_use;
+
+ return 0;
+}
+
+static int cxl_mem_sparing_get_min_dpa(struct device *dev, void *drv_data,
+ u64 *min_dpa)
+{
+ struct cxl_mem_sparing_context *ctx = drv_data;
+ struct cxl_memdev *cxlmd = ctx->cxlmd;
+ struct cxl_dev_state *cxlds = cxlmd->cxlds;
+
+ *min_dpa = cxlds->dpa_res.start;
+
+ return 0;
+}
+
+static int cxl_mem_sparing_get_max_dpa(struct device *dev, void *drv_data,
+ u64 *max_dpa)
+{
+ struct cxl_mem_sparing_context *ctx = drv_data;
+ struct cxl_memdev *cxlmd = ctx->cxlmd;
+ struct cxl_dev_state *cxlds = cxlmd->cxlds;
+
+ *max_dpa = cxlds->dpa_res.end;
+
+ return 0;
+}
+
+static int cxl_mem_sparing_set_dpa(struct device *dev, void *drv_data, u64 dpa)
+{
+ struct cxl_mem_sparing_context *ctx = drv_data;
+ struct cxl_memdev *cxlmd = ctx->cxlmd;
+ struct cxl_dev_state *cxlds = cxlmd->cxlds;
+
+ if (dpa < cxlds->dpa_res.start || dpa > cxlds->dpa_res.end)
+ return -EINVAL;
+
+ ctx->dpa = dpa;
+
+ return 0;
+}
+
+static int cxl_do_mem_sparing(struct device *dev, void *drv_data, u32 val)
+{
+ struct cxl_mem_sparing_context *ctx = drv_data;
+
+ if (val != EDAC_DO_MEM_REPAIR)
+ return -EINVAL;
+
+ return cxl_mem_perform_sparing(dev, ctx);
+}
+
+#define RANK_OPS \
+ .get_repair_type = cxl_mem_sparing_get_repair_type, \
+ .get_persist_mode = cxl_mem_sparing_get_persist_mode, \
+ .set_persist_mode = cxl_mem_sparing_set_persist_mode, \
+ .get_repair_safe_when_in_use = cxl_get_mem_sparing_safe_when_in_use, \
+ .get_min_dpa = cxl_mem_sparing_get_min_dpa, \
+ .get_max_dpa = cxl_mem_sparing_get_max_dpa, \
+ .get_dpa = cxl_mem_sparing_get_dpa, \
+ .set_dpa = cxl_mem_sparing_set_dpa, \
+ .get_nibble_mask = cxl_mem_sparing_get_nibble_mask, \
+ .set_nibble_mask = cxl_mem_sparing_set_nibble_mask, \
+ .get_rank = cxl_mem_sparing_get_rank, \
+ .set_rank = cxl_mem_sparing_set_rank, \
+ .get_channel = cxl_mem_sparing_get_channel, \
+ .set_channel = cxl_mem_sparing_set_channel, \
+ .do_repair = cxl_do_mem_sparing
+
+#define BANK_OPS \
+ RANK_OPS, .get_bank_group = cxl_mem_sparing_get_bank_group, \
+ .set_bank_group = cxl_mem_sparing_set_bank_group, \
+ .get_bank = cxl_mem_sparing_get_bank, \
+ .set_bank = cxl_mem_sparing_set_bank
+
+#define ROW_OPS \
+ BANK_OPS, .get_row = cxl_mem_sparing_get_row, \
+ .set_row = cxl_mem_sparing_set_row
+
+#define CACHELINE_OPS \
+ ROW_OPS, .get_column = cxl_mem_sparing_get_column, \
+ .set_column = cxl_mem_sparing_set_column, \
+ .get_sub_channel = cxl_mem_sparing_get_sub_channel, \
+ .set_sub_channel = cxl_mem_sparing_set_sub_channel
+
+static const struct edac_mem_repair_ops cxl_rank_sparing_ops = {
+ RANK_OPS,
+};
+
+static const struct edac_mem_repair_ops cxl_bank_sparing_ops = {
+ BANK_OPS,
+};
+
+static const struct edac_mem_repair_ops cxl_row_sparing_ops = {
+ ROW_OPS,
+};
+
+static const struct edac_mem_repair_ops cxl_cacheline_sparing_ops = {
+ CACHELINE_OPS,
+};
+
+struct cxl_mem_sparing_desc {
+ const uuid_t repair_uuid;
+ enum edac_mem_repair_type repair_type;
+ const struct edac_mem_repair_ops *repair_ops;
+};
+
+static const struct cxl_mem_sparing_desc mem_sparing_desc[] = {
+ {
+ .repair_uuid = CXL_FEAT_CACHELINE_SPARING_UUID,
+ .repair_type = EDAC_REPAIR_CACHELINE_SPARING,
+ .repair_ops = &cxl_cacheline_sparing_ops,
+ },
+ {
+ .repair_uuid = CXL_FEAT_ROW_SPARING_UUID,
+ .repair_type = EDAC_REPAIR_ROW_SPARING,
+ .repair_ops = &cxl_row_sparing_ops,
+ },
+ {
+ .repair_uuid = CXL_FEAT_BANK_SPARING_UUID,
+ .repair_type = EDAC_REPAIR_BANK_SPARING,
+ .repair_ops = &cxl_bank_sparing_ops,
+ },
+ {
+ .repair_uuid = CXL_FEAT_RANK_SPARING_UUID,
+ .repair_type = EDAC_REPAIR_RANK_SPARING,
+ .repair_ops = &cxl_rank_sparing_ops,
+ },
+};
+
+static int cxl_memdev_sparing_init(struct cxl_memdev *cxlmd,
+ struct edac_dev_feature *ras_feature,
+ const struct cxl_mem_sparing_desc *desc,
+ u8 repair_inst)
+{
+ struct cxl_mem_sparing_context *cxl_sparing_ctx;
+ struct cxl_feat_entry *feat_entry;
+ int ret;
+
+ feat_entry = cxl_feature_info(to_cxlfs(cxlmd->cxlds),
+ &desc->repair_uuid);
+ if (IS_ERR(feat_entry))
+ return -EOPNOTSUPP;
+
+ if (!(le32_to_cpu(feat_entry->flags) & CXL_FEATURE_F_CHANGEABLE))
+ return -EOPNOTSUPP;
+
+ cxl_sparing_ctx = devm_kzalloc(&cxlmd->dev, sizeof(*cxl_sparing_ctx),
+ GFP_KERNEL);
+ if (!cxl_sparing_ctx)
+ return -ENOMEM;
+
+ *cxl_sparing_ctx = (struct cxl_mem_sparing_context){
+ .get_feat_size = le16_to_cpu(feat_entry->get_feat_size),
+ .set_feat_size = le16_to_cpu(feat_entry->set_feat_size),
+ .get_version = feat_entry->get_feat_ver,
+ .set_version = feat_entry->set_feat_ver,
+ .effects = le16_to_cpu(feat_entry->effects),
+ .cxlmd = cxlmd,
+ .repair_type = desc->repair_type,
+ .instance = repair_inst++,
+ };
+ uuid_copy(&cxl_sparing_ctx->repair_uuid, &desc->repair_uuid);
+
+ ret = cxl_mem_sparing_get_attrbs(cxl_sparing_ctx);
+ if (ret)
+ return ret;
+
+ if ((cxl_sparing_ctx->cap_soft_sparing &&
+ cxl_sparing_ctx->cap_hard_sparing) ||
+ cxl_sparing_ctx->cap_soft_sparing)
+ cxl_sparing_ctx->persist_mode = 0;
+ else if (cxl_sparing_ctx->cap_hard_sparing)
+ cxl_sparing_ctx->persist_mode = 1;
+ else
+ return -EOPNOTSUPP;
+
+ ras_feature->ft_type = RAS_FEAT_MEM_REPAIR;
+ ras_feature->instance = cxl_sparing_ctx->instance;
+ ras_feature->mem_repair_ops = desc->repair_ops;
+ ras_feature->ctx = cxl_sparing_ctx;
+
+ return 0;
+}
+
+/*
+ * CXL memory soft PPR & hard PPR control
+ */
+struct cxl_ppr_context {
+ uuid_t repair_uuid;
+ u8 instance;
+ u16 get_feat_size;
+ u16 set_feat_size;
+ u8 get_version;
+ u8 set_version;
+ u16 effects;
+ u8 op_class;
+ u8 op_subclass;
+ bool cap_dpa;
+ bool cap_nib_mask;
+ bool media_accessible;
+ bool data_retained;
+ struct cxl_memdev *cxlmd;
+ enum edac_mem_repair_type repair_type;
+ bool persist_mode;
+ u64 dpa;
+ u32 nibble_mask;
+};
+
+/*
+ * See CXL rev 3.2 @8.2.10.7.2.1 Table 8-128 sPPR Feature Readable Attributes
+ *
+ * See CXL rev 3.2 @8.2.10.7.2.2 Table 8-131 hPPR Feature Readable Attributes
+ */
+
+#define CXL_PPR_OP_CAP_DEVICE_INITIATED BIT(0)
+#define CXL_PPR_OP_MODE_DEV_INITIATED BIT(0)
+
+#define CXL_PPR_FLAG_DPA_SUPPORT_MASK BIT(0)
+#define CXL_PPR_FLAG_NIB_SUPPORT_MASK BIT(1)
+#define CXL_PPR_FLAG_MEM_SPARING_EV_REC_SUPPORT_MASK BIT(2)
+#define CXL_PPR_FLAG_DEV_INITED_PPR_AT_BOOT_CAP_MASK BIT(3)
+
+#define CXL_PPR_RESTRICTION_FLAG_MEDIA_ACCESSIBLE_MASK BIT(0)
+#define CXL_PPR_RESTRICTION_FLAG_DATA_RETAINED_MASK BIT(2)
+
+#define CXL_PPR_SPARING_EV_REC_EN_MASK BIT(0)
+#define CXL_PPR_DEV_INITED_PPR_AT_BOOT_EN_MASK BIT(1)
+
+#define CXL_PPR_GET_CAP_DPA(flags) \
+ FIELD_GET(CXL_PPR_FLAG_DPA_SUPPORT_MASK, flags)
+#define CXL_PPR_GET_CAP_NIB_MASK(flags) \
+ FIELD_GET(CXL_PPR_FLAG_NIB_SUPPORT_MASK, flags)
+#define CXL_PPR_GET_MEDIA_ACCESSIBLE(restriction_flags) \
+ (FIELD_GET(CXL_PPR_RESTRICTION_FLAG_MEDIA_ACCESSIBLE_MASK, \
+ restriction_flags) ^ 1)
+#define CXL_PPR_GET_DATA_RETAINED(restriction_flags) \
+ (FIELD_GET(CXL_PPR_RESTRICTION_FLAG_DATA_RETAINED_MASK, \
+ restriction_flags) ^ 1)
+
+struct cxl_memdev_ppr_rd_attrbs {
+ struct cxl_memdev_repair_rd_attrbs_hdr hdr;
+ u8 ppr_flags;
+ __le16 restriction_flags;
+ u8 ppr_op_mode;
+} __packed;
+
+/*
+ * See CXL rev 3.2 @8.2.10.7.1.2 Table 8-118 sPPR Maintenance Input Payload
+ *
+ * See CXL rev 3.2 @8.2.10.7.1.3 Table 8-119 hPPR Maintenance Input Payload
+ */
+struct cxl_memdev_ppr_maintenance_attrbs {
+ u8 flags;
+ __le64 dpa;
+ u8 nibble_mask[3];
+} __packed;
+
+static int cxl_mem_ppr_get_attrbs(struct cxl_ppr_context *cxl_ppr_ctx)
+{
+ size_t rd_data_size = sizeof(struct cxl_memdev_ppr_rd_attrbs);
+ struct cxl_memdev *cxlmd = cxl_ppr_ctx->cxlmd;
+ struct cxl_mailbox *cxl_mbox = &cxlmd->cxlds->cxl_mbox;
+ u16 restriction_flags;
+ size_t data_size;
+ u16 return_code;
+
+ struct cxl_memdev_ppr_rd_attrbs *rd_attrbs __free(kfree) =
+ kmalloc(rd_data_size, GFP_KERNEL);
+ if (!rd_attrbs)
+ return -ENOMEM;
+
+ data_size = cxl_get_feature(cxl_mbox, &cxl_ppr_ctx->repair_uuid,
+ CXL_GET_FEAT_SEL_CURRENT_VALUE, rd_attrbs,
+ rd_data_size, 0, &return_code);
+ if (!data_size)
+ return -EIO;
+
+ cxl_ppr_ctx->op_class = rd_attrbs->hdr.op_class;
+ cxl_ppr_ctx->op_subclass = rd_attrbs->hdr.op_subclass;
+ cxl_ppr_ctx->cap_dpa = CXL_PPR_GET_CAP_DPA(rd_attrbs->ppr_flags);
+ cxl_ppr_ctx->cap_nib_mask =
+ CXL_PPR_GET_CAP_NIB_MASK(rd_attrbs->ppr_flags);
+
+ restriction_flags = le16_to_cpu(rd_attrbs->restriction_flags);
+ cxl_ppr_ctx->media_accessible =
+ CXL_PPR_GET_MEDIA_ACCESSIBLE(restriction_flags);
+ cxl_ppr_ctx->data_retained =
+ CXL_PPR_GET_DATA_RETAINED(restriction_flags);
+
+ return 0;
+}
+
+static int cxl_mem_perform_ppr(struct cxl_ppr_context *cxl_ppr_ctx)
+{
+ struct cxl_memdev_ppr_maintenance_attrbs maintenance_attrbs;
+ struct cxl_memdev *cxlmd = cxl_ppr_ctx->cxlmd;
+ struct cxl_mem_repair_attrbs attrbs = { 0 };
+
+ struct rw_semaphore *region_lock __free(rwsem_read_release) =
+ rwsem_read_intr_acquire(&cxl_region_rwsem);
+ if (!region_lock)
+ return -EINTR;
+
+ struct rw_semaphore *dpa_lock __free(rwsem_read_release) =
+ rwsem_read_intr_acquire(&cxl_dpa_rwsem);
+ if (!dpa_lock)
+ return -EINTR;
+
+ if (!cxl_ppr_ctx->media_accessible || !cxl_ppr_ctx->data_retained) {
+ /* Memory to repair must be offline */
+ if (cxl_is_memdev_memory_online(cxlmd))
+ return -EBUSY;
+ } else {
+ if (cxl_is_memdev_memory_online(cxlmd)) {
+ /* Check memory to repair is from the current boot */
+ attrbs.repair_type = CXL_PPR;
+ attrbs.dpa = cxl_ppr_ctx->dpa;
+ attrbs.nibble_mask = cxl_ppr_ctx->nibble_mask;
+ if (!cxl_find_rec_dram(cxlmd, &attrbs) &&
+ !cxl_find_rec_gen_media(cxlmd, &attrbs))
+ return -EINVAL;
+ }
+ }
+
+ memset(&maintenance_attrbs, 0, sizeof(maintenance_attrbs));
+ maintenance_attrbs.flags = 0;
+ maintenance_attrbs.dpa = cpu_to_le64(cxl_ppr_ctx->dpa);
+ put_unaligned_le24(cxl_ppr_ctx->nibble_mask,
+ maintenance_attrbs.nibble_mask);
+
+ return cxl_perform_maintenance(&cxlmd->cxlds->cxl_mbox,
+ cxl_ppr_ctx->op_class,
+ cxl_ppr_ctx->op_subclass,
+ &maintenance_attrbs,
+ sizeof(maintenance_attrbs));
+}
+
+static int cxl_ppr_get_repair_type(struct device *dev, void *drv_data,
+ const char **repair_type)
+{
+ *repair_type = edac_repair_type[EDAC_REPAIR_PPR];
+
+ return 0;
+}
+
+static int cxl_ppr_get_persist_mode(struct device *dev, void *drv_data,
+ bool *persist_mode)
+{
+ struct cxl_ppr_context *cxl_ppr_ctx = drv_data;
+
+ *persist_mode = cxl_ppr_ctx->persist_mode;
+
+ return 0;
+}
+
+static int cxl_get_ppr_safe_when_in_use(struct device *dev, void *drv_data,
+ bool *safe)
+{
+ struct cxl_ppr_context *cxl_ppr_ctx = drv_data;
+
+ *safe = cxl_ppr_ctx->media_accessible & cxl_ppr_ctx->data_retained;
+
+ return 0;
+}
+
+static int cxl_ppr_get_min_dpa(struct device *dev, void *drv_data, u64 *min_dpa)
+{
+ struct cxl_ppr_context *cxl_ppr_ctx = drv_data;
+ struct cxl_memdev *cxlmd = cxl_ppr_ctx->cxlmd;
+ struct cxl_dev_state *cxlds = cxlmd->cxlds;
+
+ *min_dpa = cxlds->dpa_res.start;
+
+ return 0;
+}
+
+static int cxl_ppr_get_max_dpa(struct device *dev, void *drv_data, u64 *max_dpa)
+{
+ struct cxl_ppr_context *cxl_ppr_ctx = drv_data;
+ struct cxl_memdev *cxlmd = cxl_ppr_ctx->cxlmd;
+ struct cxl_dev_state *cxlds = cxlmd->cxlds;
+
+ *max_dpa = cxlds->dpa_res.end;
+
+ return 0;
+}
+
+static int cxl_ppr_get_dpa(struct device *dev, void *drv_data, u64 *dpa)
+{
+ struct cxl_ppr_context *cxl_ppr_ctx = drv_data;
+
+ *dpa = cxl_ppr_ctx->dpa;
+
+ return 0;
+}
+
+static int cxl_ppr_set_dpa(struct device *dev, void *drv_data, u64 dpa)
+{
+ struct cxl_ppr_context *cxl_ppr_ctx = drv_data;
+ struct cxl_memdev *cxlmd = cxl_ppr_ctx->cxlmd;
+ struct cxl_dev_state *cxlds = cxlmd->cxlds;
+
+ if (dpa < cxlds->dpa_res.start || dpa > cxlds->dpa_res.end)
+ return -EINVAL;
+
+ cxl_ppr_ctx->dpa = dpa;
+
+ return 0;
+}
+
+static int cxl_ppr_get_nibble_mask(struct device *dev, void *drv_data,
+ u32 *nibble_mask)
+{
+ struct cxl_ppr_context *cxl_ppr_ctx = drv_data;
+
+ *nibble_mask = cxl_ppr_ctx->nibble_mask;
+
+ return 0;
+}
+
+static int cxl_ppr_set_nibble_mask(struct device *dev, void *drv_data,
+ u32 nibble_mask)
+{
+ struct cxl_ppr_context *cxl_ppr_ctx = drv_data;
+
+ cxl_ppr_ctx->nibble_mask = nibble_mask;
+
+ return 0;
+}
+
+static int cxl_do_ppr(struct device *dev, void *drv_data, u32 val)
+{
+ struct cxl_ppr_context *cxl_ppr_ctx = drv_data;
+ struct cxl_memdev *cxlmd = cxl_ppr_ctx->cxlmd;
+ struct cxl_dev_state *cxlds = cxlmd->cxlds;
+
+ if (val != EDAC_DO_MEM_REPAIR ||
+ !cxl_resource_contains_addr(&cxlds->dpa_res, cxl_ppr_ctx->dpa))
+ return -EINVAL;
+
+ return cxl_mem_perform_ppr(cxl_ppr_ctx);
+}
+
+static const struct edac_mem_repair_ops cxl_sppr_ops = {
+ .get_repair_type = cxl_ppr_get_repair_type,
+ .get_persist_mode = cxl_ppr_get_persist_mode,
+ .get_repair_safe_when_in_use = cxl_get_ppr_safe_when_in_use,
+ .get_min_dpa = cxl_ppr_get_min_dpa,
+ .get_max_dpa = cxl_ppr_get_max_dpa,
+ .get_dpa = cxl_ppr_get_dpa,
+ .set_dpa = cxl_ppr_set_dpa,
+ .get_nibble_mask = cxl_ppr_get_nibble_mask,
+ .set_nibble_mask = cxl_ppr_set_nibble_mask,
+ .do_repair = cxl_do_ppr,
+};
+
+static int cxl_memdev_soft_ppr_init(struct cxl_memdev *cxlmd,
+ struct edac_dev_feature *ras_feature,
+ u8 repair_inst)
+{
+ struct cxl_ppr_context *cxl_sppr_ctx;
+ struct cxl_feat_entry *feat_entry;
+ int ret;
+
+ feat_entry = cxl_feature_info(to_cxlfs(cxlmd->cxlds),
+ &CXL_FEAT_SPPR_UUID);
+ if (IS_ERR(feat_entry))
+ return -EOPNOTSUPP;
+
+ if (!(le32_to_cpu(feat_entry->flags) & CXL_FEATURE_F_CHANGEABLE))
+ return -EOPNOTSUPP;
+
+ cxl_sppr_ctx =
+ devm_kzalloc(&cxlmd->dev, sizeof(*cxl_sppr_ctx), GFP_KERNEL);
+ if (!cxl_sppr_ctx)
+ return -ENOMEM;
+
+ *cxl_sppr_ctx = (struct cxl_ppr_context){
+ .get_feat_size = le16_to_cpu(feat_entry->get_feat_size),
+ .set_feat_size = le16_to_cpu(feat_entry->set_feat_size),
+ .get_version = feat_entry->get_feat_ver,
+ .set_version = feat_entry->set_feat_ver,
+ .effects = le16_to_cpu(feat_entry->effects),
+ .cxlmd = cxlmd,
+ .repair_type = EDAC_REPAIR_PPR,
+ .persist_mode = 0,
+ .instance = repair_inst,
+ };
+ uuid_copy(&cxl_sppr_ctx->repair_uuid, &CXL_FEAT_SPPR_UUID);
+
+ ret = cxl_mem_ppr_get_attrbs(cxl_sppr_ctx);
+ if (ret)
+ return ret;
+
+ ras_feature->ft_type = RAS_FEAT_MEM_REPAIR;
+ ras_feature->instance = cxl_sppr_ctx->instance;
+ ras_feature->mem_repair_ops = &cxl_sppr_ops;
+ ras_feature->ctx = cxl_sppr_ctx;
+
+ return 0;
+}
+
+int devm_cxl_memdev_edac_register(struct cxl_memdev *cxlmd)
+{
+ struct edac_dev_feature ras_features[CXL_NR_EDAC_DEV_FEATURES];
+ int num_ras_features = 0;
+ u8 repair_inst = 0;
+ int rc;
+
+ if (IS_ENABLED(CONFIG_CXL_EDAC_SCRUB)) {
+ rc = cxl_memdev_scrub_init(cxlmd, &ras_features[num_ras_features], 0);
+ if (rc < 0 && rc != -EOPNOTSUPP)
+ return rc;
+
+ if (rc != -EOPNOTSUPP)
+ num_ras_features++;
+ }
+
+ if (IS_ENABLED(CONFIG_CXL_EDAC_ECS)) {
+ rc = cxl_memdev_ecs_init(cxlmd, &ras_features[num_ras_features]);
+ if (rc < 0 && rc != -EOPNOTSUPP)
+ return rc;
+
+ if (rc != -EOPNOTSUPP)
+ num_ras_features++;
+ }
+
+ if (IS_ENABLED(CONFIG_CXL_EDAC_MEM_REPAIR)) {
+ for (int i = 0; i < CXL_MEM_SPARING_MAX; i++) {
+ rc = cxl_memdev_sparing_init(cxlmd,
+ &ras_features[num_ras_features],
+ &mem_sparing_desc[i], repair_inst);
+ if (rc == -EOPNOTSUPP)
+ continue;
+ if (rc < 0)
+ return rc;
+
+ repair_inst++;
+ num_ras_features++;
+ }
+
+ rc = cxl_memdev_soft_ppr_init(cxlmd, &ras_features[num_ras_features],
+ repair_inst);
+ if (rc < 0 && rc != -EOPNOTSUPP)
+ return rc;
+
+ if (rc != -EOPNOTSUPP) {
+ repair_inst++;
+ num_ras_features++;
+ }
+
+ if (repair_inst) {
+ struct cxl_mem_err_rec *array_rec =
+ devm_kzalloc(&cxlmd->dev, sizeof(*array_rec),
+ GFP_KERNEL);
+ if (!array_rec)
+ return -ENOMEM;
+
+ xa_init(&array_rec->rec_gen_media);
+ xa_init(&array_rec->rec_dram);
+ cxlmd->err_rec_array = array_rec;
+ }
+ }
+
+ if (!num_ras_features)
+ return -EINVAL;
+
+ char *cxl_dev_name __free(kfree) =
+ kasprintf(GFP_KERNEL, "cxl_%s", dev_name(&cxlmd->dev));
+ if (!cxl_dev_name)
+ return -ENOMEM;
+
+ return edac_dev_register(&cxlmd->dev, cxl_dev_name, NULL,
+ num_ras_features, ras_features);
+}
+EXPORT_SYMBOL_NS_GPL(devm_cxl_memdev_edac_register, "CXL");
+
+int devm_cxl_region_edac_register(struct cxl_region *cxlr)
+{
+ struct edac_dev_feature ras_features[CXL_NR_EDAC_DEV_FEATURES];
+ int num_ras_features = 0;
+ int rc;
+
+ if (!IS_ENABLED(CONFIG_CXL_EDAC_SCRUB))
+ return 0;
+
+ rc = cxl_region_scrub_init(cxlr, &ras_features[num_ras_features], 0);
+ if (rc < 0)
+ return rc;
+
+ num_ras_features++;
+
+ char *cxl_dev_name __free(kfree) =
+ kasprintf(GFP_KERNEL, "cxl_%s", dev_name(&cxlr->dev));
+ if (!cxl_dev_name)
+ return -ENOMEM;
+
+ return edac_dev_register(&cxlr->dev, cxl_dev_name, NULL,
+ num_ras_features, ras_features);
+}
+EXPORT_SYMBOL_NS_GPL(devm_cxl_region_edac_register, "CXL");
+
+void devm_cxl_memdev_edac_release(struct cxl_memdev *cxlmd)
+{
+ struct cxl_mem_err_rec *array_rec = cxlmd->err_rec_array;
+ struct cxl_event_gen_media *rec_gen_media;
+ struct cxl_event_dram *rec_dram;
+ unsigned long index;
+
+ if (!IS_ENABLED(CONFIG_CXL_EDAC_MEM_REPAIR) || !array_rec)
+ return;
+
+ xa_for_each(&array_rec->rec_dram, index, rec_dram)
+ kfree(rec_dram);
+ xa_destroy(&array_rec->rec_dram);
+
+ xa_for_each(&array_rec->rec_gen_media, index, rec_gen_media)
+ kfree(rec_gen_media);
+ xa_destroy(&array_rec->rec_gen_media);
+}
+EXPORT_SYMBOL_NS_GPL(devm_cxl_memdev_edac_release, "CXL");
diff --git a/drivers/cxl/core/features.c b/drivers/cxl/core/features.c
new file mode 100644
index 000000000000..7c750599ea69
--- /dev/null
+++ b/drivers/cxl/core/features.c
@@ -0,0 +1,703 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/* Copyright(c) 2024-2025 Intel Corporation. All rights reserved. */
+#include <linux/fwctl.h>
+#include <linux/device.h>
+#include <cxl/mailbox.h>
+#include <cxl/features.h>
+#include <uapi/fwctl/cxl.h>
+#include "cxl.h"
+#include "core.h"
+#include "cxlmem.h"
+
+/**
+ * DOC: cxl features
+ *
+ * CXL Features:
+ * A CXL device that includes a mailbox supports commands that allows
+ * listing, getting, and setting of optionally defined features such
+ * as memory sparing or post package sparing. Vendors may define custom
+ * features for the device.
+ */
+
+/* All the features below are exclusive to the kernel */
+static const uuid_t cxl_exclusive_feats[] = {
+ CXL_FEAT_PATROL_SCRUB_UUID,
+ CXL_FEAT_ECS_UUID,
+ CXL_FEAT_SPPR_UUID,
+ CXL_FEAT_HPPR_UUID,
+ CXL_FEAT_CACHELINE_SPARING_UUID,
+ CXL_FEAT_ROW_SPARING_UUID,
+ CXL_FEAT_BANK_SPARING_UUID,
+ CXL_FEAT_RANK_SPARING_UUID,
+};
+
+static bool is_cxl_feature_exclusive_by_uuid(const uuid_t *uuid)
+{
+ for (int i = 0; i < ARRAY_SIZE(cxl_exclusive_feats); i++) {
+ if (uuid_equal(uuid, &cxl_exclusive_feats[i]))
+ return true;
+ }
+
+ return false;
+}
+
+static bool is_cxl_feature_exclusive(struct cxl_feat_entry *entry)
+{
+ return is_cxl_feature_exclusive_by_uuid(&entry->uuid);
+}
+
+struct cxl_features_state *to_cxlfs(struct cxl_dev_state *cxlds)
+{
+ return cxlds->cxlfs;
+}
+EXPORT_SYMBOL_NS_GPL(to_cxlfs, "CXL");
+
+static int cxl_get_supported_features_count(struct cxl_mailbox *cxl_mbox)
+{
+ struct cxl_mbox_get_sup_feats_out mbox_out;
+ struct cxl_mbox_get_sup_feats_in mbox_in;
+ struct cxl_mbox_cmd mbox_cmd;
+ int rc;
+
+ memset(&mbox_in, 0, sizeof(mbox_in));
+ mbox_in.count = cpu_to_le32(sizeof(mbox_out));
+ memset(&mbox_out, 0, sizeof(mbox_out));
+ mbox_cmd = (struct cxl_mbox_cmd) {
+ .opcode = CXL_MBOX_OP_GET_SUPPORTED_FEATURES,
+ .size_in = sizeof(mbox_in),
+ .payload_in = &mbox_in,
+ .size_out = sizeof(mbox_out),
+ .payload_out = &mbox_out,
+ .min_out = sizeof(mbox_out),
+ };
+ rc = cxl_internal_send_cmd(cxl_mbox, &mbox_cmd);
+ if (rc < 0)
+ return rc;
+
+ return le16_to_cpu(mbox_out.supported_feats);
+}
+
+static struct cxl_feat_entries *
+get_supported_features(struct cxl_features_state *cxlfs)
+{
+ int remain_feats, max_size, max_feats, start, rc, hdr_size;
+ struct cxl_mailbox *cxl_mbox = &cxlfs->cxlds->cxl_mbox;
+ int feat_size = sizeof(struct cxl_feat_entry);
+ struct cxl_mbox_get_sup_feats_in mbox_in;
+ struct cxl_feat_entry *entry;
+ struct cxl_mbox_cmd mbox_cmd;
+ int user_feats = 0;
+ int count;
+
+ count = cxl_get_supported_features_count(cxl_mbox);
+ if (count <= 0)
+ return NULL;
+
+ struct cxl_feat_entries *entries __free(kvfree) =
+ kvmalloc(struct_size(entries, ent, count), GFP_KERNEL);
+ if (!entries)
+ return NULL;
+
+ struct cxl_mbox_get_sup_feats_out *mbox_out __free(kvfree) =
+ kvmalloc(cxl_mbox->payload_size, GFP_KERNEL);
+ if (!mbox_out)
+ return NULL;
+
+ hdr_size = struct_size(mbox_out, ents, 0);
+ max_size = cxl_mbox->payload_size - hdr_size;
+ /* max feat entries that can fit in mailbox max payload size */
+ max_feats = max_size / feat_size;
+ entry = entries->ent;
+
+ start = 0;
+ remain_feats = count;
+ do {
+ int retrieved, alloc_size, copy_feats;
+ int num_entries;
+
+ if (remain_feats > max_feats) {
+ alloc_size = struct_size(mbox_out, ents, max_feats);
+ remain_feats = remain_feats - max_feats;
+ copy_feats = max_feats;
+ } else {
+ alloc_size = struct_size(mbox_out, ents, remain_feats);
+ copy_feats = remain_feats;
+ remain_feats = 0;
+ }
+
+ memset(&mbox_in, 0, sizeof(mbox_in));
+ mbox_in.count = cpu_to_le32(alloc_size);
+ mbox_in.start_idx = cpu_to_le16(start);
+ memset(mbox_out, 0, alloc_size);
+ mbox_cmd = (struct cxl_mbox_cmd) {
+ .opcode = CXL_MBOX_OP_GET_SUPPORTED_FEATURES,
+ .size_in = sizeof(mbox_in),
+ .payload_in = &mbox_in,
+ .size_out = alloc_size,
+ .payload_out = mbox_out,
+ .min_out = hdr_size,
+ };
+ rc = cxl_internal_send_cmd(cxl_mbox, &mbox_cmd);
+ if (rc < 0)
+ return NULL;
+
+ if (mbox_cmd.size_out <= hdr_size)
+ return NULL;
+
+ /*
+ * Make sure retrieved out buffer is multiple of feature
+ * entries.
+ */
+ retrieved = mbox_cmd.size_out - hdr_size;
+ if (retrieved % feat_size)
+ return NULL;
+
+ num_entries = le16_to_cpu(mbox_out->num_entries);
+ /*
+ * If the reported output entries * defined entry size !=
+ * retrieved output bytes, then the output package is incorrect.
+ */
+ if (num_entries * feat_size != retrieved)
+ return NULL;
+
+ memcpy(entry, mbox_out->ents, retrieved);
+ for (int i = 0; i < num_entries; i++) {
+ if (!is_cxl_feature_exclusive(entry + i))
+ user_feats++;
+ }
+ entry += num_entries;
+ /*
+ * If the number of output entries is less than expected, add the
+ * remaining entries to the next batch.
+ */
+ remain_feats += copy_feats - num_entries;
+ start += num_entries;
+ } while (remain_feats);
+
+ entries->num_features = count;
+ entries->num_user_features = user_feats;
+
+ return no_free_ptr(entries);
+}
+
+static void free_cxlfs(void *_cxlfs)
+{
+ struct cxl_features_state *cxlfs = _cxlfs;
+ struct cxl_dev_state *cxlds = cxlfs->cxlds;
+
+ cxlds->cxlfs = NULL;
+ kvfree(cxlfs->entries);
+ kfree(cxlfs);
+}
+
+/**
+ * devm_cxl_setup_features() - Allocate and initialize features context
+ * @cxlds: CXL device context
+ *
+ * Return 0 on success or -errno on failure.
+ */
+int devm_cxl_setup_features(struct cxl_dev_state *cxlds)
+{
+ struct cxl_mailbox *cxl_mbox = &cxlds->cxl_mbox;
+
+ if (cxl_mbox->feat_cap < CXL_FEATURES_RO)
+ return -ENODEV;
+
+ struct cxl_features_state *cxlfs __free(kfree) =
+ kzalloc(sizeof(*cxlfs), GFP_KERNEL);
+ if (!cxlfs)
+ return -ENOMEM;
+
+ cxlfs->cxlds = cxlds;
+
+ cxlfs->entries = get_supported_features(cxlfs);
+ if (!cxlfs->entries)
+ return -ENOMEM;
+
+ cxlds->cxlfs = cxlfs;
+
+ return devm_add_action_or_reset(cxlds->dev, free_cxlfs, no_free_ptr(cxlfs));
+}
+EXPORT_SYMBOL_NS_GPL(devm_cxl_setup_features, "CXL");
+
+size_t cxl_get_feature(struct cxl_mailbox *cxl_mbox, const uuid_t *feat_uuid,
+ enum cxl_get_feat_selection selection,
+ void *feat_out, size_t feat_out_size, u16 offset,
+ u16 *return_code)
+{
+ size_t data_to_rd_size, size_out;
+ struct cxl_mbox_get_feat_in pi;
+ struct cxl_mbox_cmd mbox_cmd;
+ size_t data_rcvd_size = 0;
+ int rc;
+
+ if (return_code)
+ *return_code = CXL_MBOX_CMD_RC_INPUT;
+
+ if (!feat_out || !feat_out_size)
+ return 0;
+
+ size_out = min(feat_out_size, cxl_mbox->payload_size);
+ uuid_copy(&pi.uuid, feat_uuid);
+ pi.selection = selection;
+ do {
+ data_to_rd_size = min(feat_out_size - data_rcvd_size,
+ cxl_mbox->payload_size);
+ pi.offset = cpu_to_le16(offset + data_rcvd_size);
+ pi.count = cpu_to_le16(data_to_rd_size);
+
+ mbox_cmd = (struct cxl_mbox_cmd) {
+ .opcode = CXL_MBOX_OP_GET_FEATURE,
+ .size_in = sizeof(pi),
+ .payload_in = &pi,
+ .size_out = size_out,
+ .payload_out = feat_out + data_rcvd_size,
+ .min_out = data_to_rd_size,
+ };
+ rc = cxl_internal_send_cmd(cxl_mbox, &mbox_cmd);
+ if (rc < 0 || !mbox_cmd.size_out) {
+ if (return_code)
+ *return_code = mbox_cmd.return_code;
+ return 0;
+ }
+ data_rcvd_size += mbox_cmd.size_out;
+ } while (data_rcvd_size < feat_out_size);
+
+ if (return_code)
+ *return_code = CXL_MBOX_CMD_RC_SUCCESS;
+
+ return data_rcvd_size;
+}
+
+/*
+ * FEAT_DATA_MIN_PAYLOAD_SIZE - min extra number of bytes should be
+ * available in the mailbox for storing the actual feature data so that
+ * the feature data transfer would work as expected.
+ */
+#define FEAT_DATA_MIN_PAYLOAD_SIZE 10
+int cxl_set_feature(struct cxl_mailbox *cxl_mbox,
+ const uuid_t *feat_uuid, u8 feat_version,
+ const void *feat_data, size_t feat_data_size,
+ u32 feat_flag, u16 offset, u16 *return_code)
+{
+ size_t data_in_size, data_sent_size = 0;
+ struct cxl_mbox_cmd mbox_cmd;
+ size_t hdr_size;
+
+ if (return_code)
+ *return_code = CXL_MBOX_CMD_RC_INPUT;
+
+ struct cxl_mbox_set_feat_in *pi __free(kfree) =
+ kzalloc(cxl_mbox->payload_size, GFP_KERNEL);
+ if (!pi)
+ return -ENOMEM;
+
+ uuid_copy(&pi->uuid, feat_uuid);
+ pi->version = feat_version;
+ feat_flag &= ~CXL_SET_FEAT_FLAG_DATA_TRANSFER_MASK;
+ feat_flag |= CXL_SET_FEAT_FLAG_DATA_SAVED_ACROSS_RESET;
+ hdr_size = sizeof(pi->hdr);
+ /*
+ * Check minimum mbox payload size is available for
+ * the feature data transfer.
+ */
+ if (hdr_size + FEAT_DATA_MIN_PAYLOAD_SIZE > cxl_mbox->payload_size)
+ return -ENOMEM;
+
+ if (hdr_size + feat_data_size <= cxl_mbox->payload_size) {
+ pi->flags = cpu_to_le32(feat_flag |
+ CXL_SET_FEAT_FLAG_FULL_DATA_TRANSFER);
+ data_in_size = feat_data_size;
+ } else {
+ pi->flags = cpu_to_le32(feat_flag |
+ CXL_SET_FEAT_FLAG_INITIATE_DATA_TRANSFER);
+ data_in_size = cxl_mbox->payload_size - hdr_size;
+ }
+
+ do {
+ int rc;
+
+ pi->offset = cpu_to_le16(offset + data_sent_size);
+ memcpy(pi->feat_data, feat_data + data_sent_size, data_in_size);
+ mbox_cmd = (struct cxl_mbox_cmd) {
+ .opcode = CXL_MBOX_OP_SET_FEATURE,
+ .size_in = hdr_size + data_in_size,
+ .payload_in = pi,
+ };
+ rc = cxl_internal_send_cmd(cxl_mbox, &mbox_cmd);
+ if (rc < 0) {
+ if (return_code)
+ *return_code = mbox_cmd.return_code;
+ return rc;
+ }
+
+ data_sent_size += data_in_size;
+ if (data_sent_size >= feat_data_size) {
+ if (return_code)
+ *return_code = CXL_MBOX_CMD_RC_SUCCESS;
+ return 0;
+ }
+
+ if ((feat_data_size - data_sent_size) <= (cxl_mbox->payload_size - hdr_size)) {
+ data_in_size = feat_data_size - data_sent_size;
+ pi->flags = cpu_to_le32(feat_flag |
+ CXL_SET_FEAT_FLAG_FINISH_DATA_TRANSFER);
+ } else {
+ pi->flags = cpu_to_le32(feat_flag |
+ CXL_SET_FEAT_FLAG_CONTINUE_DATA_TRANSFER);
+ }
+ } while (true);
+}
+
+/* FWCTL support */
+
+static inline struct cxl_memdev *fwctl_to_memdev(struct fwctl_device *fwctl_dev)
+{
+ return to_cxl_memdev(fwctl_dev->dev.parent);
+}
+
+static int cxlctl_open_uctx(struct fwctl_uctx *uctx)
+{
+ return 0;
+}
+
+static void cxlctl_close_uctx(struct fwctl_uctx *uctx)
+{
+}
+
+struct cxl_feat_entry *
+cxl_feature_info(struct cxl_features_state *cxlfs,
+ const uuid_t *uuid)
+{
+ struct cxl_feat_entry *feat;
+
+ for (int i = 0; i < cxlfs->entries->num_features; i++) {
+ feat = &cxlfs->entries->ent[i];
+ if (uuid_equal(uuid, &feat->uuid))
+ return feat;
+ }
+
+ return ERR_PTR(-EINVAL);
+}
+
+static void *cxlctl_get_supported_features(struct cxl_features_state *cxlfs,
+ const struct fwctl_rpc_cxl *rpc_in,
+ size_t *out_len)
+{
+ const struct cxl_mbox_get_sup_feats_in *feat_in;
+ struct cxl_mbox_get_sup_feats_out *feat_out;
+ struct cxl_feat_entry *pos;
+ size_t out_size;
+ int requested;
+ u32 count;
+ u16 start;
+ int i;
+
+ if (rpc_in->op_size != sizeof(*feat_in))
+ return ERR_PTR(-EINVAL);
+
+ feat_in = &rpc_in->get_sup_feats_in;
+ count = le32_to_cpu(feat_in->count);
+ start = le16_to_cpu(feat_in->start_idx);
+ requested = count / sizeof(*pos);
+
+ /*
+ * Make sure that the total requested number of entries is not greater
+ * than the total number of supported features allowed for userspace.
+ */
+ if (start >= cxlfs->entries->num_features)
+ return ERR_PTR(-EINVAL);
+
+ requested = min_t(int, requested, cxlfs->entries->num_features - start);
+
+ out_size = sizeof(struct fwctl_rpc_cxl_out) +
+ struct_size(feat_out, ents, requested);
+
+ struct fwctl_rpc_cxl_out *rpc_out __free(kvfree) =
+ kvzalloc(out_size, GFP_KERNEL);
+ if (!rpc_out)
+ return ERR_PTR(-ENOMEM);
+
+ rpc_out->size = struct_size(feat_out, ents, requested);
+ feat_out = &rpc_out->get_sup_feats_out;
+
+ for (i = start, pos = &feat_out->ents[0];
+ i < cxlfs->entries->num_features; i++, pos++) {
+ if (i - start == requested)
+ break;
+
+ memcpy(pos, &cxlfs->entries->ent[i], sizeof(*pos));
+ /*
+ * If the feature is exclusive, set the set_feat_size to 0 to
+ * indicate that the feature is not changeable.
+ */
+ if (is_cxl_feature_exclusive(pos)) {
+ u32 flags;
+
+ pos->set_feat_size = 0;
+ flags = le32_to_cpu(pos->flags);
+ flags &= ~CXL_FEATURE_F_CHANGEABLE;
+ pos->flags = cpu_to_le32(flags);
+ }
+ }
+
+ feat_out->num_entries = cpu_to_le16(requested);
+ feat_out->supported_feats = cpu_to_le16(cxlfs->entries->num_features);
+ rpc_out->retval = CXL_MBOX_CMD_RC_SUCCESS;
+ *out_len = out_size;
+
+ return no_free_ptr(rpc_out);
+}
+
+static void *cxlctl_get_feature(struct cxl_features_state *cxlfs,
+ const struct fwctl_rpc_cxl *rpc_in,
+ size_t *out_len)
+{
+ struct cxl_mailbox *cxl_mbox = &cxlfs->cxlds->cxl_mbox;
+ const struct cxl_mbox_get_feat_in *feat_in;
+ u16 offset, count, return_code;
+ size_t out_size = *out_len;
+
+ if (rpc_in->op_size != sizeof(*feat_in))
+ return ERR_PTR(-EINVAL);
+
+ feat_in = &rpc_in->get_feat_in;
+ offset = le16_to_cpu(feat_in->offset);
+ count = le16_to_cpu(feat_in->count);
+
+ if (!count)
+ return ERR_PTR(-EINVAL);
+
+ struct fwctl_rpc_cxl_out *rpc_out __free(kvfree) =
+ kvzalloc(out_size, GFP_KERNEL);
+ if (!rpc_out)
+ return ERR_PTR(-ENOMEM);
+
+ out_size = cxl_get_feature(cxl_mbox, &feat_in->uuid,
+ feat_in->selection, rpc_out->payload,
+ count, offset, &return_code);
+ *out_len = sizeof(struct fwctl_rpc_cxl_out);
+ if (!out_size) {
+ rpc_out->size = 0;
+ rpc_out->retval = return_code;
+ return no_free_ptr(rpc_out);
+ }
+
+ rpc_out->size = out_size;
+ rpc_out->retval = CXL_MBOX_CMD_RC_SUCCESS;
+ *out_len += out_size;
+
+ return no_free_ptr(rpc_out);
+}
+
+static void *cxlctl_set_feature(struct cxl_features_state *cxlfs,
+ const struct fwctl_rpc_cxl *rpc_in,
+ size_t *out_len)
+{
+ struct cxl_mailbox *cxl_mbox = &cxlfs->cxlds->cxl_mbox;
+ const struct cxl_mbox_set_feat_in *feat_in;
+ size_t out_size, data_size;
+ u16 offset, return_code;
+ u32 flags;
+ int rc;
+
+ if (rpc_in->op_size <= sizeof(feat_in->hdr))
+ return ERR_PTR(-EINVAL);
+
+ feat_in = &rpc_in->set_feat_in;
+
+ if (is_cxl_feature_exclusive_by_uuid(&feat_in->uuid))
+ return ERR_PTR(-EPERM);
+
+ offset = le16_to_cpu(feat_in->offset);
+ flags = le32_to_cpu(feat_in->flags);
+ out_size = *out_len;
+
+ struct fwctl_rpc_cxl_out *rpc_out __free(kvfree) =
+ kvzalloc(out_size, GFP_KERNEL);
+ if (!rpc_out)
+ return ERR_PTR(-ENOMEM);
+
+ rpc_out->size = 0;
+
+ data_size = rpc_in->op_size - sizeof(feat_in->hdr);
+ rc = cxl_set_feature(cxl_mbox, &feat_in->uuid,
+ feat_in->version, feat_in->feat_data,
+ data_size, flags, offset, &return_code);
+ *out_len = sizeof(*rpc_out);
+ if (rc) {
+ rpc_out->retval = return_code;
+ return no_free_ptr(rpc_out);
+ }
+
+ rpc_out->retval = CXL_MBOX_CMD_RC_SUCCESS;
+
+ return no_free_ptr(rpc_out);
+}
+
+static bool cxlctl_validate_set_features(struct cxl_features_state *cxlfs,
+ const struct fwctl_rpc_cxl *rpc_in,
+ enum fwctl_rpc_scope scope)
+{
+ u16 effects, imm_mask, reset_mask;
+ struct cxl_feat_entry *feat;
+ u32 flags;
+
+ if (rpc_in->op_size < sizeof(uuid_t))
+ return false;
+
+ feat = cxl_feature_info(cxlfs, &rpc_in->set_feat_in.uuid);
+ if (IS_ERR(feat))
+ return false;
+
+ /* Ensure that the attribute is changeable */
+ flags = le32_to_cpu(feat->flags);
+ if (!(flags & CXL_FEATURE_F_CHANGEABLE))
+ return false;
+
+ effects = le16_to_cpu(feat->effects);
+
+ /*
+ * Reserved bits are set, rejecting since the effects is not
+ * comprehended by the driver.
+ */
+ if (effects & CXL_CMD_EFFECTS_RESERVED) {
+ dev_warn_once(cxlfs->cxlds->dev,
+ "Reserved bits set in the Feature effects field!\n");
+ return false;
+ }
+
+ /* Currently no user background command support */
+ if (effects & CXL_CMD_BACKGROUND)
+ return false;
+
+ /* Effects cause immediate change, highest security scope is needed */
+ imm_mask = CXL_CMD_CONFIG_CHANGE_IMMEDIATE |
+ CXL_CMD_DATA_CHANGE_IMMEDIATE |
+ CXL_CMD_POLICY_CHANGE_IMMEDIATE |
+ CXL_CMD_LOG_CHANGE_IMMEDIATE;
+
+ reset_mask = CXL_CMD_CONFIG_CHANGE_COLD_RESET |
+ CXL_CMD_CONFIG_CHANGE_CONV_RESET |
+ CXL_CMD_CONFIG_CHANGE_CXL_RESET;
+
+ /* If no immediate or reset effect set, The hardware has a bug */
+ if (!(effects & imm_mask) && !(effects & reset_mask))
+ return false;
+
+ /*
+ * If the Feature setting causes immediate configuration change
+ * then we need the full write permission policy.
+ */
+ if (effects & imm_mask && scope >= FWCTL_RPC_DEBUG_WRITE_FULL)
+ return true;
+
+ /*
+ * If the Feature setting only causes configuration change
+ * after a reset, then the lesser level of write permission
+ * policy is ok.
+ */
+ if (!(effects & imm_mask) && scope >= FWCTL_RPC_DEBUG_WRITE)
+ return true;
+
+ return false;
+}
+
+static bool cxlctl_validate_hw_command(struct cxl_features_state *cxlfs,
+ const struct fwctl_rpc_cxl *rpc_in,
+ enum fwctl_rpc_scope scope,
+ u16 opcode)
+{
+ struct cxl_mailbox *cxl_mbox = &cxlfs->cxlds->cxl_mbox;
+
+ switch (opcode) {
+ case CXL_MBOX_OP_GET_SUPPORTED_FEATURES:
+ case CXL_MBOX_OP_GET_FEATURE:
+ return cxl_mbox->feat_cap >= CXL_FEATURES_RO;
+ case CXL_MBOX_OP_SET_FEATURE:
+ if (cxl_mbox->feat_cap < CXL_FEATURES_RW)
+ return false;
+ return cxlctl_validate_set_features(cxlfs, rpc_in, scope);
+ default:
+ return false;
+ }
+}
+
+static void *cxlctl_handle_commands(struct cxl_features_state *cxlfs,
+ const struct fwctl_rpc_cxl *rpc_in,
+ size_t *out_len, u16 opcode)
+{
+ switch (opcode) {
+ case CXL_MBOX_OP_GET_SUPPORTED_FEATURES:
+ return cxlctl_get_supported_features(cxlfs, rpc_in, out_len);
+ case CXL_MBOX_OP_GET_FEATURE:
+ return cxlctl_get_feature(cxlfs, rpc_in, out_len);
+ case CXL_MBOX_OP_SET_FEATURE:
+ return cxlctl_set_feature(cxlfs, rpc_in, out_len);
+ default:
+ return ERR_PTR(-EOPNOTSUPP);
+ }
+}
+
+static void *cxlctl_fw_rpc(struct fwctl_uctx *uctx, enum fwctl_rpc_scope scope,
+ void *in, size_t in_len, size_t *out_len)
+{
+ struct fwctl_device *fwctl_dev = uctx->fwctl;
+ struct cxl_memdev *cxlmd = fwctl_to_memdev(fwctl_dev);
+ struct cxl_features_state *cxlfs = to_cxlfs(cxlmd->cxlds);
+ const struct fwctl_rpc_cxl *rpc_in = in;
+ u16 opcode = rpc_in->opcode;
+
+ if (!cxlctl_validate_hw_command(cxlfs, rpc_in, scope, opcode))
+ return ERR_PTR(-EINVAL);
+
+ return cxlctl_handle_commands(cxlfs, rpc_in, out_len, opcode);
+}
+
+static const struct fwctl_ops cxlctl_ops = {
+ .device_type = FWCTL_DEVICE_TYPE_CXL,
+ .uctx_size = sizeof(struct fwctl_uctx),
+ .open_uctx = cxlctl_open_uctx,
+ .close_uctx = cxlctl_close_uctx,
+ .fw_rpc = cxlctl_fw_rpc,
+};
+
+DEFINE_FREE(free_fwctl_dev, struct fwctl_device *, if (_T) fwctl_put(_T))
+
+static void free_memdev_fwctl(void *_fwctl_dev)
+{
+ struct fwctl_device *fwctl_dev = _fwctl_dev;
+
+ fwctl_unregister(fwctl_dev);
+ fwctl_put(fwctl_dev);
+}
+
+int devm_cxl_setup_fwctl(struct device *host, struct cxl_memdev *cxlmd)
+{
+ struct cxl_dev_state *cxlds = cxlmd->cxlds;
+ struct cxl_features_state *cxlfs;
+ int rc;
+
+ cxlfs = to_cxlfs(cxlds);
+ if (!cxlfs)
+ return -ENODEV;
+
+ /* No need to setup FWCTL if there are no user allowed features found */
+ if (!cxlfs->entries->num_user_features)
+ return -ENODEV;
+
+ struct fwctl_device *fwctl_dev __free(free_fwctl_dev) =
+ _fwctl_alloc_device(&cxlmd->dev, &cxlctl_ops, sizeof(*fwctl_dev));
+ if (!fwctl_dev)
+ return -ENOMEM;
+
+ rc = fwctl_register(fwctl_dev);
+ if (rc)
+ return rc;
+
+ return devm_add_action_or_reset(host, free_memdev_fwctl,
+ no_free_ptr(fwctl_dev));
+}
+EXPORT_SYMBOL_NS_GPL(devm_cxl_setup_fwctl, "CXL");
+
+MODULE_IMPORT_NS("FWCTL");
diff --git a/drivers/cxl/core/hdm.c b/drivers/cxl/core/hdm.c
index 50e6a45b30ba..088caa6b6f74 100644
--- a/drivers/cxl/core/hdm.c
+++ b/drivers/cxl/core/hdm.c
@@ -34,7 +34,8 @@ static int add_hdm_decoder(struct cxl_port *port, struct cxl_decoder *cxld,
if (rc)
return rc;
- dev_dbg(&cxld->dev, "Added to port %s\n", dev_name(&port->dev));
+ dev_dbg(port->uport_dev, "%s added to %s\n",
+ dev_name(&cxld->dev), dev_name(&port->dev));
return 0;
}
@@ -213,16 +214,46 @@ void cxl_dpa_debug(struct seq_file *file, struct cxl_dev_state *cxlds)
{
struct resource *p1, *p2;
- down_read(&cxl_dpa_rwsem);
+ guard(rwsem_read)(&cxl_dpa_rwsem);
for (p1 = cxlds->dpa_res.child; p1; p1 = p1->sibling) {
__cxl_dpa_debug(file, p1, 0);
for (p2 = p1->child; p2; p2 = p2->sibling)
__cxl_dpa_debug(file, p2, 1);
}
- up_read(&cxl_dpa_rwsem);
}
EXPORT_SYMBOL_NS_GPL(cxl_dpa_debug, "CXL");
+/* See request_skip() kernel-doc */
+static resource_size_t __adjust_skip(struct cxl_dev_state *cxlds,
+ const resource_size_t skip_base,
+ const resource_size_t skip_len,
+ const char *requester)
+{
+ const resource_size_t skip_end = skip_base + skip_len - 1;
+
+ for (int i = 0; i < cxlds->nr_partitions; i++) {
+ const struct resource *part_res = &cxlds->part[i].res;
+ resource_size_t adjust_start, adjust_end, size;
+
+ adjust_start = max(skip_base, part_res->start);
+ adjust_end = min(skip_end, part_res->end);
+
+ if (adjust_end < adjust_start)
+ continue;
+
+ size = adjust_end - adjust_start + 1;
+
+ if (!requester)
+ __release_region(&cxlds->dpa_res, adjust_start, size);
+ else if (!__request_region(&cxlds->dpa_res, adjust_start, size,
+ requester, 0))
+ return adjust_start - skip_base;
+ }
+
+ return skip_len;
+}
+#define release_skip(c, b, l) __adjust_skip((c), (b), (l), NULL)
+
/*
* Must be called in a context that synchronizes against this decoder's
* port ->remove() callback (like an endpoint decoder sysfs attribute)
@@ -241,7 +272,7 @@ static void __cxl_dpa_release(struct cxl_endpoint_decoder *cxled)
skip_start = res->start - cxled->skip;
__release_region(&cxlds->dpa_res, res->start, resource_size(res));
if (cxled->skip)
- __release_region(&cxlds->dpa_res, skip_start, cxled->skip);
+ release_skip(cxlds, skip_start, cxled->skip);
cxled->skip = 0;
cxled->dpa_res = NULL;
put_device(&cxled->cxld.dev);
@@ -250,9 +281,8 @@ static void __cxl_dpa_release(struct cxl_endpoint_decoder *cxled)
static void cxl_dpa_release(void *cxled)
{
- down_write(&cxl_dpa_rwsem);
+ guard(rwsem_write)(&cxl_dpa_rwsem);
__cxl_dpa_release(cxled);
- up_write(&cxl_dpa_rwsem);
}
/*
@@ -268,6 +298,58 @@ static void devm_cxl_dpa_release(struct cxl_endpoint_decoder *cxled)
__cxl_dpa_release(cxled);
}
+/**
+ * request_skip() - Track DPA 'skip' in @cxlds->dpa_res resource tree
+ * @cxlds: CXL.mem device context that parents @cxled
+ * @cxled: Endpoint decoder establishing new allocation that skips lower DPA
+ * @skip_base: DPA < start of new DPA allocation (DPAnew)
+ * @skip_len: @skip_base + @skip_len == DPAnew
+ *
+ * DPA 'skip' arises from out-of-sequence DPA allocation events relative
+ * to free capacity across multiple partitions. It is a wasteful event
+ * as usable DPA gets thrown away, but if a deployment has, for example,
+ * a dual RAM+PMEM device, wants to use PMEM, and has unallocated RAM
+ * DPA, the free RAM DPA must be sacrificed to start allocating PMEM.
+ * See third "Implementation Note" in CXL 3.1 8.2.4.19.13 "Decoder
+ * Protection" for more details.
+ *
+ * A 'skip' always covers the last allocated DPA in a previous partition
+ * to the start of the current partition to allocate. Allocations never
+ * start in the middle of a partition, and allocations are always
+ * de-allocated in reverse order (see cxl_dpa_free(), or natural devm
+ * unwind order from forced in-order allocation).
+ *
+ * If @cxlds->nr_partitions was guaranteed to be <= 2 then the 'skip'
+ * would always be contained to a single partition. Given
+ * @cxlds->nr_partitions may be > 2 it results in cases where the 'skip'
+ * might span "tail capacity of partition[0], all of partition[1], ...,
+ * all of partition[N-1]" to support allocating from partition[N]. That
+ * in turn interacts with the partition 'struct resource' boundaries
+ * within @cxlds->dpa_res whereby 'skip' requests need to be divided by
+ * partition. I.e. this is a quirk of using a 'struct resource' tree to
+ * detect range conflicts while also tracking partition boundaries in
+ * @cxlds->dpa_res.
+ */
+static int request_skip(struct cxl_dev_state *cxlds,
+ struct cxl_endpoint_decoder *cxled,
+ const resource_size_t skip_base,
+ const resource_size_t skip_len)
+{
+ resource_size_t skipped = __adjust_skip(cxlds, skip_base, skip_len,
+ dev_name(&cxled->cxld.dev));
+
+ if (skipped == skip_len)
+ return 0;
+
+ dev_dbg(cxlds->dev,
+ "%s: failed to reserve skipped space (%pa %pa %pa)\n",
+ dev_name(&cxled->cxld.dev), &skip_base, &skip_len, &skipped);
+
+ release_skip(cxlds, skip_base, skipped);
+
+ return -EBUSY;
+}
+
static int __cxl_dpa_reserve(struct cxl_endpoint_decoder *cxled,
resource_size_t base, resource_size_t len,
resource_size_t skipped)
@@ -277,6 +359,7 @@ static int __cxl_dpa_reserve(struct cxl_endpoint_decoder *cxled,
struct cxl_dev_state *cxlds = cxlmd->cxlds;
struct device *dev = &port->dev;
struct resource *res;
+ int rc;
lockdep_assert_held_write(&cxl_dpa_rwsem);
@@ -305,14 +388,9 @@ static int __cxl_dpa_reserve(struct cxl_endpoint_decoder *cxled,
}
if (skipped) {
- res = __request_region(&cxlds->dpa_res, base - skipped, skipped,
- dev_name(&cxled->cxld.dev), 0);
- if (!res) {
- dev_dbg(dev,
- "decoder%d.%d: failed to reserve skipped space\n",
- port->id, cxled->cxld.id);
- return -EBUSY;
- }
+ rc = request_skip(cxlds, cxled, base - skipped, skipped);
+ if (rc)
+ return rc;
}
res = __request_region(&cxlds->dpa_res, base, len,
dev_name(&cxled->cxld.dev), 0);
@@ -320,28 +398,117 @@ static int __cxl_dpa_reserve(struct cxl_endpoint_decoder *cxled,
dev_dbg(dev, "decoder%d.%d: failed to reserve allocation\n",
port->id, cxled->cxld.id);
if (skipped)
- __release_region(&cxlds->dpa_res, base - skipped,
- skipped);
+ release_skip(cxlds, base - skipped, skipped);
return -EBUSY;
}
cxled->dpa_res = res;
cxled->skip = skipped;
- if (resource_contains(&cxlds->pmem_res, res))
- cxled->mode = CXL_DECODER_PMEM;
- else if (resource_contains(&cxlds->ram_res, res))
- cxled->mode = CXL_DECODER_RAM;
- else {
- dev_warn(dev, "decoder%d.%d: %pr mixed mode not supported\n",
- port->id, cxled->cxld.id, cxled->dpa_res);
- cxled->mode = CXL_DECODER_MIXED;
- }
+ /*
+ * When allocating new capacity, ->part is already set, when
+ * discovering decoder settings at initial enumeration, ->part
+ * is not set.
+ */
+ if (cxled->part < 0)
+ for (int i = 0; cxlds->nr_partitions; i++)
+ if (resource_contains(&cxlds->part[i].res, res)) {
+ cxled->part = i;
+ break;
+ }
+
+ if (cxled->part < 0)
+ dev_warn(dev, "decoder%d.%d: %pr does not map any partition\n",
+ port->id, cxled->cxld.id, res);
port->hdm_end++;
get_device(&cxled->cxld.dev);
return 0;
}
+static int add_dpa_res(struct device *dev, struct resource *parent,
+ struct resource *res, resource_size_t start,
+ resource_size_t size, const char *type)
+{
+ int rc;
+
+ *res = (struct resource) {
+ .name = type,
+ .start = start,
+ .end = start + size - 1,
+ .flags = IORESOURCE_MEM,
+ };
+ if (resource_size(res) == 0) {
+ dev_dbg(dev, "DPA(%s): no capacity\n", res->name);
+ return 0;
+ }
+ rc = request_resource(parent, res);
+ if (rc) {
+ dev_err(dev, "DPA(%s): failed to track %pr (%d)\n", res->name,
+ res, rc);
+ return rc;
+ }
+
+ dev_dbg(dev, "DPA(%s): %pr\n", res->name, res);
+
+ return 0;
+}
+
+static const char *cxl_mode_name(enum cxl_partition_mode mode)
+{
+ switch (mode) {
+ case CXL_PARTMODE_RAM:
+ return "ram";
+ case CXL_PARTMODE_PMEM:
+ return "pmem";
+ default:
+ return "";
+ };
+}
+
+/* if this fails the caller must destroy @cxlds, there is no recovery */
+int cxl_dpa_setup(struct cxl_dev_state *cxlds, const struct cxl_dpa_info *info)
+{
+ struct device *dev = cxlds->dev;
+
+ guard(rwsem_write)(&cxl_dpa_rwsem);
+
+ if (cxlds->nr_partitions)
+ return -EBUSY;
+
+ if (!info->size || !info->nr_partitions) {
+ cxlds->dpa_res = DEFINE_RES_MEM(0, 0);
+ cxlds->nr_partitions = 0;
+ return 0;
+ }
+
+ cxlds->dpa_res = DEFINE_RES_MEM(0, info->size);
+
+ for (int i = 0; i < info->nr_partitions; i++) {
+ const struct cxl_dpa_part_info *part = &info->part[i];
+ int rc;
+
+ cxlds->part[i].perf.qos_class = CXL_QOS_CLASS_INVALID;
+ cxlds->part[i].mode = part->mode;
+
+ /* Require ordered + contiguous partitions */
+ if (i) {
+ const struct cxl_dpa_part_info *prev = &info->part[i - 1];
+
+ if (prev->range.end + 1 != part->range.start)
+ return -EINVAL;
+ }
+ rc = add_dpa_res(dev, &cxlds->dpa_res, &cxlds->part[i].res,
+ part->range.start, range_len(&part->range),
+ cxl_mode_name(part->mode));
+ if (rc)
+ return rc;
+ cxlds->nr_partitions++;
+ }
+
+ return 0;
+}
+EXPORT_SYMBOL_GPL(cxl_dpa_setup);
+
int devm_cxl_dpa_reserve(struct cxl_endpoint_decoder *cxled,
resource_size_t base, resource_size_t len,
resource_size_t skipped)
@@ -362,14 +529,11 @@ EXPORT_SYMBOL_NS_GPL(devm_cxl_dpa_reserve, "CXL");
resource_size_t cxl_dpa_size(struct cxl_endpoint_decoder *cxled)
{
- resource_size_t size = 0;
-
- down_read(&cxl_dpa_rwsem);
+ guard(rwsem_read)(&cxl_dpa_rwsem);
if (cxled->dpa_res)
- size = resource_size(cxled->dpa_res);
- up_read(&cxl_dpa_rwsem);
+ return resource_size(cxled->dpa_res);
- return size;
+ return 0;
}
resource_size_t cxl_dpa_resource_start(struct cxl_endpoint_decoder *cxled)
@@ -383,155 +547,147 @@ resource_size_t cxl_dpa_resource_start(struct cxl_endpoint_decoder *cxled)
return base;
}
+bool cxl_resource_contains_addr(const struct resource *res, const resource_size_t addr)
+{
+ struct resource _addr = DEFINE_RES_MEM(addr, 1);
+
+ return resource_contains(res, &_addr);
+}
+
int cxl_dpa_free(struct cxl_endpoint_decoder *cxled)
{
struct cxl_port *port = cxled_to_port(cxled);
struct device *dev = &cxled->cxld.dev;
- int rc;
- down_write(&cxl_dpa_rwsem);
- if (!cxled->dpa_res) {
- rc = 0;
- goto out;
- }
+ guard(rwsem_write)(&cxl_dpa_rwsem);
+ if (!cxled->dpa_res)
+ return 0;
if (cxled->cxld.region) {
dev_dbg(dev, "decoder assigned to: %s\n",
dev_name(&cxled->cxld.region->dev));
- rc = -EBUSY;
- goto out;
+ return -EBUSY;
}
if (cxled->cxld.flags & CXL_DECODER_F_ENABLE) {
dev_dbg(dev, "decoder enabled\n");
- rc = -EBUSY;
- goto out;
+ return -EBUSY;
}
if (cxled->cxld.id != port->hdm_end) {
dev_dbg(dev, "expected decoder%d.%d\n", port->id,
port->hdm_end);
- rc = -EBUSY;
- goto out;
+ return -EBUSY;
}
+
devm_cxl_dpa_release(cxled);
- rc = 0;
-out:
- up_write(&cxl_dpa_rwsem);
- return rc;
+ return 0;
}
-int cxl_dpa_set_mode(struct cxl_endpoint_decoder *cxled,
- enum cxl_decoder_mode mode)
+int cxl_dpa_set_part(struct cxl_endpoint_decoder *cxled,
+ enum cxl_partition_mode mode)
{
struct cxl_memdev *cxlmd = cxled_to_memdev(cxled);
struct cxl_dev_state *cxlds = cxlmd->cxlds;
struct device *dev = &cxled->cxld.dev;
-
- switch (mode) {
- case CXL_DECODER_RAM:
- case CXL_DECODER_PMEM:
- break;
- default:
- dev_dbg(dev, "unsupported mode: %d\n", mode);
- return -EINVAL;
- }
+ int part;
guard(rwsem_write)(&cxl_dpa_rwsem);
if (cxled->cxld.flags & CXL_DECODER_F_ENABLE)
return -EBUSY;
- /*
- * Only allow modes that are supported by the current partition
- * configuration
- */
- if (mode == CXL_DECODER_PMEM && !resource_size(&cxlds->pmem_res)) {
- dev_dbg(dev, "no available pmem capacity\n");
- return -ENXIO;
+ for (part = 0; part < cxlds->nr_partitions; part++)
+ if (cxlds->part[part].mode == mode)
+ break;
+
+ if (part >= cxlds->nr_partitions) {
+ dev_dbg(dev, "unsupported mode: %d\n", mode);
+ return -EINVAL;
}
- if (mode == CXL_DECODER_RAM && !resource_size(&cxlds->ram_res)) {
- dev_dbg(dev, "no available ram capacity\n");
+
+ if (!resource_size(&cxlds->part[part].res)) {
+ dev_dbg(dev, "no available capacity for mode: %d\n", mode);
return -ENXIO;
}
- cxled->mode = mode;
+ cxled->part = part;
return 0;
}
-int cxl_dpa_alloc(struct cxl_endpoint_decoder *cxled, unsigned long long size)
+static int __cxl_dpa_alloc(struct cxl_endpoint_decoder *cxled, u64 size)
{
struct cxl_memdev *cxlmd = cxled_to_memdev(cxled);
- resource_size_t free_ram_start, free_pmem_start;
- struct cxl_port *port = cxled_to_port(cxled);
struct cxl_dev_state *cxlds = cxlmd->cxlds;
struct device *dev = &cxled->cxld.dev;
- resource_size_t start, avail, skip;
+ struct resource *res, *prev = NULL;
+ resource_size_t start, avail, skip, skip_start;
struct resource *p, *last;
- int rc;
+ int part;
- down_write(&cxl_dpa_rwsem);
+ guard(rwsem_write)(&cxl_dpa_rwsem);
if (cxled->cxld.region) {
dev_dbg(dev, "decoder attached to %s\n",
dev_name(&cxled->cxld.region->dev));
- rc = -EBUSY;
- goto out;
+ return -EBUSY;
}
if (cxled->cxld.flags & CXL_DECODER_F_ENABLE) {
dev_dbg(dev, "decoder enabled\n");
- rc = -EBUSY;
- goto out;
+ return -EBUSY;
}
- for (p = cxlds->ram_res.child, last = NULL; p; p = p->sibling)
- last = p;
- if (last)
- free_ram_start = last->end + 1;
- else
- free_ram_start = cxlds->ram_res.start;
+ part = cxled->part;
+ if (part < 0) {
+ dev_dbg(dev, "partition not set\n");
+ return -EBUSY;
+ }
- for (p = cxlds->pmem_res.child, last = NULL; p; p = p->sibling)
+ res = &cxlds->part[part].res;
+ for (p = res->child, last = NULL; p; p = p->sibling)
last = p;
if (last)
- free_pmem_start = last->end + 1;
+ start = last->end + 1;
else
- free_pmem_start = cxlds->pmem_res.start;
-
- if (cxled->mode == CXL_DECODER_RAM) {
- start = free_ram_start;
- avail = cxlds->ram_res.end - start + 1;
- skip = 0;
- } else if (cxled->mode == CXL_DECODER_PMEM) {
- resource_size_t skip_start, skip_end;
+ start = res->start;
- start = free_pmem_start;
- avail = cxlds->pmem_res.end - start + 1;
- skip_start = free_ram_start;
-
- /*
- * If some pmem is already allocated, then that allocation
- * already handled the skip.
- */
- if (cxlds->pmem_res.child &&
- skip_start == cxlds->pmem_res.child->start)
- skip_end = skip_start - 1;
- else
- skip_end = start - 1;
- skip = skip_end - skip_start + 1;
- } else {
- dev_dbg(dev, "mode not set\n");
- rc = -EINVAL;
- goto out;
+ /*
+ * To allocate at partition N, a skip needs to be calculated for all
+ * unallocated space at lower partitions indices.
+ *
+ * If a partition has any allocations, the search can end because a
+ * previous cxl_dpa_alloc() invocation is assumed to have accounted for
+ * all previous partitions.
+ */
+ skip_start = CXL_RESOURCE_NONE;
+ for (int i = part; i; i--) {
+ prev = &cxlds->part[i - 1].res;
+ for (p = prev->child, last = NULL; p; p = p->sibling)
+ last = p;
+ if (last) {
+ skip_start = last->end + 1;
+ break;
+ }
+ skip_start = prev->start;
}
+ avail = res->end - start + 1;
+ if (skip_start == CXL_RESOURCE_NONE)
+ skip = 0;
+ else
+ skip = res->start - skip_start;
+
if (size > avail) {
- dev_dbg(dev, "%pa exceeds available %s capacity: %pa\n", &size,
- cxl_decoder_mode_name(cxled->mode), &avail);
- rc = -ENOSPC;
- goto out;
+ dev_dbg(dev, "%llu exceeds available %s capacity: %llu\n", size,
+ res->name, (u64)avail);
+ return -ENOSPC;
}
- rc = __cxl_dpa_reserve(cxled, start, size, skip);
-out:
- up_write(&cxl_dpa_rwsem);
+ return __cxl_dpa_reserve(cxled, start, size, skip);
+}
+
+int cxl_dpa_alloc(struct cxl_endpoint_decoder *cxled, u64 size)
+{
+ struct cxl_port *port = cxled_to_port(cxled);
+ int rc;
+ rc = __cxl_dpa_alloc(cxled, size);
if (rc)
return rc;
diff --git a/drivers/cxl/core/mbox.c b/drivers/cxl/core/mbox.c
index 548564c770c0..2689e6453c5a 100644
--- a/drivers/cxl/core/mbox.c
+++ b/drivers/cxl/core/mbox.c
@@ -11,6 +11,7 @@
#include "core.h"
#include "trace.h"
+#include "mce.h"
static bool cxl_raw_allow_all;
@@ -349,40 +350,39 @@ static bool cxl_payload_from_user_allowed(u16 opcode, void *payload_in)
return true;
}
-static int cxl_mbox_cmd_ctor(struct cxl_mbox_cmd *mbox,
- struct cxl_memdev_state *mds, u16 opcode,
+static int cxl_mbox_cmd_ctor(struct cxl_mbox_cmd *mbox_cmd,
+ struct cxl_mailbox *cxl_mbox, u16 opcode,
size_t in_size, size_t out_size, u64 in_payload)
{
- struct cxl_mailbox *cxl_mbox = &mds->cxlds.cxl_mbox;
- *mbox = (struct cxl_mbox_cmd) {
+ *mbox_cmd = (struct cxl_mbox_cmd) {
.opcode = opcode,
.size_in = in_size,
};
if (in_size) {
- mbox->payload_in = vmemdup_user(u64_to_user_ptr(in_payload),
- in_size);
- if (IS_ERR(mbox->payload_in))
- return PTR_ERR(mbox->payload_in);
+ mbox_cmd->payload_in = vmemdup_user(u64_to_user_ptr(in_payload),
+ in_size);
+ if (IS_ERR(mbox_cmd->payload_in))
+ return PTR_ERR(mbox_cmd->payload_in);
- if (!cxl_payload_from_user_allowed(opcode, mbox->payload_in)) {
- dev_dbg(mds->cxlds.dev, "%s: input payload not allowed\n",
+ if (!cxl_payload_from_user_allowed(opcode, mbox_cmd->payload_in)) {
+ dev_dbg(cxl_mbox->host, "%s: input payload not allowed\n",
cxl_mem_opcode_to_name(opcode));
- kvfree(mbox->payload_in);
+ kvfree(mbox_cmd->payload_in);
return -EBUSY;
}
}
/* Prepare to handle a full payload for variable sized output */
if (out_size == CXL_VARIABLE_PAYLOAD)
- mbox->size_out = cxl_mbox->payload_size;
+ mbox_cmd->size_out = cxl_mbox->payload_size;
else
- mbox->size_out = out_size;
+ mbox_cmd->size_out = out_size;
- if (mbox->size_out) {
- mbox->payload_out = kvzalloc(mbox->size_out, GFP_KERNEL);
- if (!mbox->payload_out) {
- kvfree(mbox->payload_in);
+ if (mbox_cmd->size_out) {
+ mbox_cmd->payload_out = kvzalloc(mbox_cmd->size_out, GFP_KERNEL);
+ if (!mbox_cmd->payload_out) {
+ kvfree(mbox_cmd->payload_in);
return -ENOMEM;
}
}
@@ -397,10 +397,8 @@ static void cxl_mbox_cmd_dtor(struct cxl_mbox_cmd *mbox)
static int cxl_to_mem_cmd_raw(struct cxl_mem_command *mem_cmd,
const struct cxl_send_command *send_cmd,
- struct cxl_memdev_state *mds)
+ struct cxl_mailbox *cxl_mbox)
{
- struct cxl_mailbox *cxl_mbox = &mds->cxlds.cxl_mbox;
-
if (send_cmd->raw.rsvd)
return -EINVAL;
@@ -415,7 +413,7 @@ static int cxl_to_mem_cmd_raw(struct cxl_mem_command *mem_cmd,
if (!cxl_mem_raw_command_allowed(send_cmd->raw.opcode))
return -EPERM;
- dev_WARN_ONCE(mds->cxlds.dev, true, "raw command path used\n");
+ dev_WARN_ONCE(cxl_mbox->host, true, "raw command path used\n");
*mem_cmd = (struct cxl_mem_command) {
.info = {
@@ -431,7 +429,7 @@ static int cxl_to_mem_cmd_raw(struct cxl_mem_command *mem_cmd,
static int cxl_to_mem_cmd(struct cxl_mem_command *mem_cmd,
const struct cxl_send_command *send_cmd,
- struct cxl_memdev_state *mds)
+ struct cxl_mailbox *cxl_mbox)
{
struct cxl_mem_command *c = &cxl_mem_commands[send_cmd->id];
const struct cxl_command_info *info = &c->info;
@@ -446,11 +444,11 @@ static int cxl_to_mem_cmd(struct cxl_mem_command *mem_cmd,
return -EINVAL;
/* Check that the command is enabled for hardware */
- if (!test_bit(info->id, mds->enabled_cmds))
+ if (!test_bit(info->id, cxl_mbox->enabled_cmds))
return -ENOTTY;
/* Check that the command is not claimed for exclusive kernel use */
- if (test_bit(info->id, mds->exclusive_cmds))
+ if (test_bit(info->id, cxl_mbox->exclusive_cmds))
return -EBUSY;
/* Check the input buffer is the expected size */
@@ -479,7 +477,7 @@ static int cxl_to_mem_cmd(struct cxl_mem_command *mem_cmd,
/**
* cxl_validate_cmd_from_user() - Check fields for CXL_MEM_SEND_COMMAND.
* @mbox_cmd: Sanitized and populated &struct cxl_mbox_cmd.
- * @mds: The driver data for the operation
+ * @cxl_mbox: CXL mailbox context
* @send_cmd: &struct cxl_send_command copied in from userspace.
*
* Return:
@@ -494,10 +492,9 @@ static int cxl_to_mem_cmd(struct cxl_mem_command *mem_cmd,
* safe to send to the hardware.
*/
static int cxl_validate_cmd_from_user(struct cxl_mbox_cmd *mbox_cmd,
- struct cxl_memdev_state *mds,
+ struct cxl_mailbox *cxl_mbox,
const struct cxl_send_command *send_cmd)
{
- struct cxl_mailbox *cxl_mbox = &mds->cxlds.cxl_mbox;
struct cxl_mem_command mem_cmd;
int rc;
@@ -514,24 +511,23 @@ static int cxl_validate_cmd_from_user(struct cxl_mbox_cmd *mbox_cmd,
/* Sanitize and construct a cxl_mem_command */
if (send_cmd->id == CXL_MEM_COMMAND_ID_RAW)
- rc = cxl_to_mem_cmd_raw(&mem_cmd, send_cmd, mds);
+ rc = cxl_to_mem_cmd_raw(&mem_cmd, send_cmd, cxl_mbox);
else
- rc = cxl_to_mem_cmd(&mem_cmd, send_cmd, mds);
+ rc = cxl_to_mem_cmd(&mem_cmd, send_cmd, cxl_mbox);
if (rc)
return rc;
/* Sanitize and construct a cxl_mbox_cmd */
- return cxl_mbox_cmd_ctor(mbox_cmd, mds, mem_cmd.opcode,
+ return cxl_mbox_cmd_ctor(mbox_cmd, cxl_mbox, mem_cmd.opcode,
mem_cmd.info.size_in, mem_cmd.info.size_out,
send_cmd->in.payload);
}
-int cxl_query_cmd(struct cxl_memdev *cxlmd,
+int cxl_query_cmd(struct cxl_mailbox *cxl_mbox,
struct cxl_mem_query_commands __user *q)
{
- struct cxl_memdev_state *mds = to_cxl_memdev_state(cxlmd->cxlds);
- struct device *dev = &cxlmd->dev;
+ struct device *dev = cxl_mbox->host;
struct cxl_mem_command *cmd;
u32 n_commands;
int j = 0;
@@ -552,9 +548,9 @@ int cxl_query_cmd(struct cxl_memdev *cxlmd,
cxl_for_each_cmd(cmd) {
struct cxl_command_info info = cmd->info;
- if (test_bit(info.id, mds->enabled_cmds))
+ if (test_bit(info.id, cxl_mbox->enabled_cmds))
info.flags |= CXL_MEM_COMMAND_FLAG_ENABLED;
- if (test_bit(info.id, mds->exclusive_cmds))
+ if (test_bit(info.id, cxl_mbox->exclusive_cmds))
info.flags |= CXL_MEM_COMMAND_FLAG_EXCLUSIVE;
if (copy_to_user(&q->commands[j++], &info, sizeof(info)))
@@ -569,7 +565,7 @@ int cxl_query_cmd(struct cxl_memdev *cxlmd,
/**
* handle_mailbox_cmd_from_user() - Dispatch a mailbox command for userspace.
- * @mds: The driver data for the operation
+ * @cxl_mbox: The mailbox context for the operation.
* @mbox_cmd: The validated mailbox command.
* @out_payload: Pointer to userspace's output payload.
* @size_out: (Input) Max payload size to copy out.
@@ -590,13 +586,12 @@ int cxl_query_cmd(struct cxl_memdev *cxlmd,
*
* See cxl_send_cmd().
*/
-static int handle_mailbox_cmd_from_user(struct cxl_memdev_state *mds,
+static int handle_mailbox_cmd_from_user(struct cxl_mailbox *cxl_mbox,
struct cxl_mbox_cmd *mbox_cmd,
u64 out_payload, s32 *size_out,
u32 *retval)
{
- struct cxl_mailbox *cxl_mbox = &mds->cxlds.cxl_mbox;
- struct device *dev = mds->cxlds.dev;
+ struct device *dev = cxl_mbox->host;
int rc;
dev_dbg(dev,
@@ -633,10 +628,9 @@ out:
return rc;
}
-int cxl_send_cmd(struct cxl_memdev *cxlmd, struct cxl_send_command __user *s)
+int cxl_send_cmd(struct cxl_mailbox *cxl_mbox, struct cxl_send_command __user *s)
{
- struct cxl_memdev_state *mds = to_cxl_memdev_state(cxlmd->cxlds);
- struct device *dev = &cxlmd->dev;
+ struct device *dev = cxl_mbox->host;
struct cxl_send_command send;
struct cxl_mbox_cmd mbox_cmd;
int rc;
@@ -646,11 +640,11 @@ int cxl_send_cmd(struct cxl_memdev *cxlmd, struct cxl_send_command __user *s)
if (copy_from_user(&send, s, sizeof(send)))
return -EFAULT;
- rc = cxl_validate_cmd_from_user(&mbox_cmd, mds, &send);
+ rc = cxl_validate_cmd_from_user(&mbox_cmd, cxl_mbox, &send);
if (rc)
return rc;
- rc = handle_mailbox_cmd_from_user(mds, &mbox_cmd, send.out.payload,
+ rc = handle_mailbox_cmd_from_user(cxl_mbox, &mbox_cmd, send.out.payload,
&send.out.size, &send.retval);
if (rc)
return rc;
@@ -713,6 +707,35 @@ static int cxl_xfer_log(struct cxl_memdev_state *mds, uuid_t *uuid,
return 0;
}
+static int check_features_opcodes(u16 opcode, int *ro_cmds, int *wr_cmds)
+{
+ switch (opcode) {
+ case CXL_MBOX_OP_GET_SUPPORTED_FEATURES:
+ case CXL_MBOX_OP_GET_FEATURE:
+ (*ro_cmds)++;
+ return 1;
+ case CXL_MBOX_OP_SET_FEATURE:
+ (*wr_cmds)++;
+ return 1;
+ default:
+ return 0;
+ }
+}
+
+/* 'Get Supported Features' and 'Get Feature' */
+#define MAX_FEATURES_READ_CMDS 2
+static void set_features_cap(struct cxl_mailbox *cxl_mbox,
+ int ro_cmds, int wr_cmds)
+{
+ /* Setting up Features capability while walking the CEL */
+ if (ro_cmds == MAX_FEATURES_READ_CMDS) {
+ if (wr_cmds)
+ cxl_mbox->feat_cap = CXL_FEATURES_RW;
+ else
+ cxl_mbox->feat_cap = CXL_FEATURES_RO;
+ }
+}
+
/**
* cxl_walk_cel() - Walk through the Command Effects Log.
* @mds: The driver data for the operation
@@ -724,10 +747,11 @@ static int cxl_xfer_log(struct cxl_memdev_state *mds, uuid_t *uuid,
*/
static void cxl_walk_cel(struct cxl_memdev_state *mds, size_t size, u8 *cel)
{
+ struct cxl_mailbox *cxl_mbox = &mds->cxlds.cxl_mbox;
struct cxl_cel_entry *cel_entry;
const int cel_entries = size / sizeof(*cel_entry);
struct device *dev = mds->cxlds.dev;
- int i;
+ int i, ro_cmds = 0, wr_cmds = 0;
cel_entry = (struct cxl_cel_entry *) cel;
@@ -737,10 +761,13 @@ static void cxl_walk_cel(struct cxl_memdev_state *mds, size_t size, u8 *cel)
int enabled = 0;
if (cmd) {
- set_bit(cmd->info.id, mds->enabled_cmds);
+ set_bit(cmd->info.id, cxl_mbox->enabled_cmds);
enabled++;
}
+ enabled += check_features_opcodes(opcode, &ro_cmds,
+ &wr_cmds);
+
if (cxl_is_poison_command(opcode)) {
cxl_set_poison_cmd_enabled(&mds->poison, opcode);
enabled++;
@@ -754,6 +781,8 @@ static void cxl_walk_cel(struct cxl_memdev_state *mds, size_t size, u8 *cel)
dev_dbg(dev, "Opcode 0x%04x %s\n", opcode,
enabled ? "enabled" : "unsupported by driver");
}
+
+ set_features_cap(cxl_mbox, ro_cmds, wr_cmds);
}
static struct cxl_mbox_get_supported_logs *cxl_get_gsl(struct cxl_memdev_state *mds)
@@ -807,6 +836,7 @@ static const uuid_t log_uuid[] = {
*/
int cxl_enumerate_cmds(struct cxl_memdev_state *mds)
{
+ struct cxl_mailbox *cxl_mbox = &mds->cxlds.cxl_mbox;
struct cxl_mbox_get_supported_logs *gsl;
struct device *dev = mds->cxlds.dev;
struct cxl_mem_command *cmd;
@@ -845,7 +875,7 @@ int cxl_enumerate_cmds(struct cxl_memdev_state *mds)
/* In case CEL was bogus, enable some default commands. */
cxl_for_each_cmd(cmd)
if (cmd->flags & CXL_CMD_FLAG_FORCE_ENABLE)
- set_bit(cmd->info.id, mds->enabled_cmds);
+ set_bit(cmd->info.id, cxl_mbox->enabled_cmds);
/* Found the required CEL */
rc = 0;
@@ -871,7 +901,7 @@ void cxl_event_trace_record(const struct cxl_memdev *cxlmd,
}
if (trace_cxl_general_media_enabled() || trace_cxl_dram_enabled()) {
- u64 dpa, hpa = ULLONG_MAX;
+ u64 dpa, hpa = ULLONG_MAX, hpa_alias = ULLONG_MAX;
struct cxl_region *cxlr;
/*
@@ -884,14 +914,27 @@ void cxl_event_trace_record(const struct cxl_memdev *cxlmd,
dpa = le64_to_cpu(evt->media_hdr.phys_addr) & CXL_DPA_MASK;
cxlr = cxl_dpa_to_region(cxlmd, dpa);
- if (cxlr)
+ if (cxlr) {
+ u64 cache_size = cxlr->params.cache_size;
+
hpa = cxl_dpa_to_hpa(cxlr, cxlmd, dpa);
+ if (cache_size)
+ hpa_alias = hpa - cache_size;
+ }
+
+ if (event_type == CXL_CPER_EVENT_GEN_MEDIA) {
+ if (cxl_store_rec_gen_media((struct cxl_memdev *)cxlmd, evt))
+ dev_dbg(&cxlmd->dev, "CXL store rec_gen_media failed\n");
- if (event_type == CXL_CPER_EVENT_GEN_MEDIA)
trace_cxl_general_media(cxlmd, type, cxlr, hpa,
- &evt->gen_media);
- else if (event_type == CXL_CPER_EVENT_DRAM)
- trace_cxl_dram(cxlmd, type, cxlr, hpa, &evt->dram);
+ hpa_alias, &evt->gen_media);
+ } else if (event_type == CXL_CPER_EVENT_DRAM) {
+ if (cxl_store_rec_dram((struct cxl_memdev *)cxlmd, evt))
+ dev_dbg(&cxlmd->dev, "CXL store rec_dram failed\n");
+
+ trace_cxl_dram(cxlmd, type, cxlr, hpa, hpa_alias,
+ &evt->dram);
+ }
}
}
EXPORT_SYMBOL_NS_GPL(cxl_event_trace_record, "CXL");
@@ -1097,10 +1140,6 @@ static int cxl_mem_get_partition_info(struct cxl_memdev_state *mds)
le64_to_cpu(pi.active_volatile_cap) * CXL_CAPACITY_MULTIPLIER;
mds->active_persistent_bytes =
le64_to_cpu(pi.active_persistent_cap) * CXL_CAPACITY_MULTIPLIER;
- mds->next_volatile_bytes =
- le64_to_cpu(pi.next_volatile_cap) * CXL_CAPACITY_MULTIPLIER;
- mds->next_persistent_bytes =
- le64_to_cpu(pi.next_volatile_cap) * CXL_CAPACITY_MULTIPLIER;
return 0;
}
@@ -1222,74 +1261,54 @@ int cxl_mem_sanitize(struct cxl_memdev *cxlmd, u16 cmd)
{
struct cxl_memdev_state *mds = to_cxl_memdev_state(cxlmd->cxlds);
struct cxl_port *endpoint;
- int rc;
/* synchronize with cxl_mem_probe() and decoder write operations */
guard(device)(&cxlmd->dev);
endpoint = cxlmd->endpoint;
- down_read(&cxl_region_rwsem);
+ guard(rwsem_read)(&cxl_region_rwsem);
/*
* Require an endpoint to be safe otherwise the driver can not
* be sure that the device is unmapped.
*/
if (endpoint && cxl_num_decoders_committed(endpoint) == 0)
- rc = __cxl_mem_sanitize(mds, cmd);
- else
- rc = -EBUSY;
- up_read(&cxl_region_rwsem);
+ return __cxl_mem_sanitize(mds, cmd);
- return rc;
+ return -EBUSY;
}
-static int add_dpa_res(struct device *dev, struct resource *parent,
- struct resource *res, resource_size_t start,
- resource_size_t size, const char *type)
+static void add_part(struct cxl_dpa_info *info, u64 start, u64 size, enum cxl_partition_mode mode)
{
- int rc;
+ int i = info->nr_partitions;
- res->name = type;
- res->start = start;
- res->end = start + size - 1;
- res->flags = IORESOURCE_MEM;
- if (resource_size(res) == 0) {
- dev_dbg(dev, "DPA(%s): no capacity\n", res->name);
- return 0;
- }
- rc = request_resource(parent, res);
- if (rc) {
- dev_err(dev, "DPA(%s): failed to track %pr (%d)\n", res->name,
- res, rc);
- return rc;
- }
-
- dev_dbg(dev, "DPA(%s): %pr\n", res->name, res);
+ if (size == 0)
+ return;
- return 0;
+ info->part[i].range = (struct range) {
+ .start = start,
+ .end = start + size - 1,
+ };
+ info->part[i].mode = mode;
+ info->nr_partitions++;
}
-int cxl_mem_create_range_info(struct cxl_memdev_state *mds)
+int cxl_mem_dpa_fetch(struct cxl_memdev_state *mds, struct cxl_dpa_info *info)
{
struct cxl_dev_state *cxlds = &mds->cxlds;
struct device *dev = cxlds->dev;
int rc;
if (!cxlds->media_ready) {
- cxlds->dpa_res = DEFINE_RES_MEM(0, 0);
- cxlds->ram_res = DEFINE_RES_MEM(0, 0);
- cxlds->pmem_res = DEFINE_RES_MEM(0, 0);
+ info->size = 0;
return 0;
}
- cxlds->dpa_res = DEFINE_RES_MEM(0, mds->total_bytes);
+ info->size = mds->total_bytes;
if (mds->partition_align_bytes == 0) {
- rc = add_dpa_res(dev, &cxlds->dpa_res, &cxlds->ram_res, 0,
- mds->volatile_only_bytes, "ram");
- if (rc)
- return rc;
- return add_dpa_res(dev, &cxlds->dpa_res, &cxlds->pmem_res,
- mds->volatile_only_bytes,
- mds->persistent_only_bytes, "pmem");
+ add_part(info, 0, mds->volatile_only_bytes, CXL_PARTMODE_RAM);
+ add_part(info, mds->volatile_only_bytes,
+ mds->persistent_only_bytes, CXL_PARTMODE_PMEM);
+ return 0;
}
rc = cxl_mem_get_partition_info(mds);
@@ -1298,15 +1317,52 @@ int cxl_mem_create_range_info(struct cxl_memdev_state *mds)
return rc;
}
- rc = add_dpa_res(dev, &cxlds->dpa_res, &cxlds->ram_res, 0,
- mds->active_volatile_bytes, "ram");
- if (rc)
- return rc;
- return add_dpa_res(dev, &cxlds->dpa_res, &cxlds->pmem_res,
- mds->active_volatile_bytes,
- mds->active_persistent_bytes, "pmem");
+ add_part(info, 0, mds->active_volatile_bytes, CXL_PARTMODE_RAM);
+ add_part(info, mds->active_volatile_bytes, mds->active_persistent_bytes,
+ CXL_PARTMODE_PMEM);
+
+ return 0;
}
-EXPORT_SYMBOL_NS_GPL(cxl_mem_create_range_info, "CXL");
+EXPORT_SYMBOL_NS_GPL(cxl_mem_dpa_fetch, "CXL");
+
+int cxl_get_dirty_count(struct cxl_memdev_state *mds, u32 *count)
+{
+ struct cxl_mailbox *cxl_mbox = &mds->cxlds.cxl_mbox;
+ struct cxl_mbox_get_health_info_out hi;
+ struct cxl_mbox_cmd mbox_cmd;
+ int rc;
+
+ mbox_cmd = (struct cxl_mbox_cmd) {
+ .opcode = CXL_MBOX_OP_GET_HEALTH_INFO,
+ .size_out = sizeof(hi),
+ .payload_out = &hi,
+ };
+
+ rc = cxl_internal_send_cmd(cxl_mbox, &mbox_cmd);
+ if (!rc)
+ *count = le32_to_cpu(hi.dirty_shutdown_cnt);
+
+ return rc;
+}
+EXPORT_SYMBOL_NS_GPL(cxl_get_dirty_count, "CXL");
+
+int cxl_arm_dirty_shutdown(struct cxl_memdev_state *mds)
+{
+ struct cxl_mailbox *cxl_mbox = &mds->cxlds.cxl_mbox;
+ struct cxl_mbox_cmd mbox_cmd;
+ struct cxl_mbox_set_shutdown_state_in in = {
+ .state = 1
+ };
+
+ mbox_cmd = (struct cxl_mbox_cmd) {
+ .opcode = CXL_MBOX_OP_SET_SHUTDOWN_STATE,
+ .size_in = sizeof(in),
+ .payload_in = &in,
+ };
+
+ return cxl_internal_send_cmd(cxl_mbox, &mbox_cmd);
+}
+EXPORT_SYMBOL_NS_GPL(cxl_arm_dirty_shutdown, "CXL");
int cxl_set_timestamp(struct cxl_memdev_state *mds)
{
@@ -1438,6 +1494,7 @@ EXPORT_SYMBOL_NS_GPL(cxl_mailbox_init, "CXL");
struct cxl_memdev_state *cxl_memdev_state_create(struct device *dev)
{
struct cxl_memdev_state *mds;
+ int rc;
mds = devm_kzalloc(dev, sizeof(*mds), GFP_KERNEL);
if (!mds) {
@@ -1448,10 +1505,15 @@ struct cxl_memdev_state *cxl_memdev_state_create(struct device *dev)
mutex_init(&mds->event.log_lock);
mds->cxlds.dev = dev;
mds->cxlds.reg_map.host = dev;
+ mds->cxlds.cxl_mbox.host = dev;
mds->cxlds.reg_map.resource = CXL_RESOURCE_NONE;
mds->cxlds.type = CXL_DEVTYPE_CLASSMEM;
- mds->ram_perf.qos_class = CXL_QOS_CLASS_INVALID;
- mds->pmem_perf.qos_class = CXL_QOS_CLASS_INVALID;
+
+ rc = devm_cxl_register_mce_notifier(dev, &mds->mce_notifier);
+ if (rc == -EOPNOTSUPP)
+ dev_warn(dev, "CXL MCE unsupported\n");
+ else if (rc)
+ return ERR_PTR(rc);
return mds;
}
diff --git a/drivers/cxl/core/mce.c b/drivers/cxl/core/mce.c
new file mode 100644
index 000000000000..ff8d078c6ca1
--- /dev/null
+++ b/drivers/cxl/core/mce.c
@@ -0,0 +1,65 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/* Copyright(c) 2024 Intel Corporation. All rights reserved. */
+#include <linux/mm.h>
+#include <linux/notifier.h>
+#include <linux/set_memory.h>
+#include <asm/mce.h>
+#include <cxlmem.h>
+#include "mce.h"
+
+static int cxl_handle_mce(struct notifier_block *nb, unsigned long val,
+ void *data)
+{
+ struct cxl_memdev_state *mds = container_of(nb, struct cxl_memdev_state,
+ mce_notifier);
+ struct cxl_memdev *cxlmd = mds->cxlds.cxlmd;
+ struct cxl_port *endpoint = cxlmd->endpoint;
+ struct mce *mce = data;
+ u64 spa, spa_alias;
+ unsigned long pfn;
+
+ if (!mce || !mce_usable_address(mce))
+ return NOTIFY_DONE;
+
+ if (!endpoint)
+ return NOTIFY_DONE;
+
+ spa = mce->addr & MCI_ADDR_PHYSADDR;
+
+ pfn = spa >> PAGE_SHIFT;
+ if (!pfn_valid(pfn))
+ return NOTIFY_DONE;
+
+ spa_alias = cxl_port_get_spa_cache_alias(endpoint, spa);
+ if (spa_alias == ~0ULL)
+ return NOTIFY_DONE;
+
+ pfn = spa_alias >> PAGE_SHIFT;
+
+ /*
+ * Take down the aliased memory page. The original memory page flagged
+ * by the MCE will be taken cared of by the standard MCE handler.
+ */
+ dev_emerg(mds->cxlds.dev, "Offlining aliased SPA address0: %#llx\n",
+ spa_alias);
+ if (!memory_failure(pfn, 0))
+ set_mce_nospec(pfn);
+
+ return NOTIFY_OK;
+}
+
+static void cxl_unregister_mce_notifier(void *mce_notifier)
+{
+ mce_unregister_decode_chain(mce_notifier);
+}
+
+int devm_cxl_register_mce_notifier(struct device *dev,
+ struct notifier_block *mce_notifier)
+{
+ mce_notifier->notifier_call = cxl_handle_mce;
+ mce_notifier->priority = MCE_PRIO_UC;
+ mce_register_decode_chain(mce_notifier);
+
+ return devm_add_action_or_reset(dev, cxl_unregister_mce_notifier,
+ mce_notifier);
+}
diff --git a/drivers/cxl/core/mce.h b/drivers/cxl/core/mce.h
new file mode 100644
index 000000000000..ace73424eeb6
--- /dev/null
+++ b/drivers/cxl/core/mce.h
@@ -0,0 +1,20 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+/* Copyright(c) 2024 Intel Corporation. All rights reserved. */
+#ifndef _CXL_CORE_MCE_H_
+#define _CXL_CORE_MCE_H_
+
+#include <linux/notifier.h>
+
+#ifdef CONFIG_CXL_MCE
+int devm_cxl_register_mce_notifier(struct device *dev,
+ struct notifier_block *mce_notifer);
+#else
+static inline int
+devm_cxl_register_mce_notifier(struct device *dev,
+ struct notifier_block *mce_notifier)
+{
+ return -EOPNOTSUPP;
+}
+#endif
+
+#endif
diff --git a/drivers/cxl/core/memdev.c b/drivers/cxl/core/memdev.c
index ae3dfcbe8938..f88a13adf7fa 100644
--- a/drivers/cxl/core/memdev.c
+++ b/drivers/cxl/core/memdev.c
@@ -27,6 +27,7 @@ static void cxl_memdev_release(struct device *dev)
struct cxl_memdev *cxlmd = to_cxl_memdev(dev);
ida_free(&cxl_memdev_ida, cxlmd->id);
+ devm_cxl_memdev_edac_release(cxlmd);
kfree(cxlmd);
}
@@ -75,12 +76,20 @@ static ssize_t label_storage_size_show(struct device *dev,
}
static DEVICE_ATTR_RO(label_storage_size);
+static resource_size_t cxl_ram_size(struct cxl_dev_state *cxlds)
+{
+ /* Static RAM is only expected at partition 0. */
+ if (cxlds->part[0].mode != CXL_PARTMODE_RAM)
+ return 0;
+ return resource_size(&cxlds->part[0].res);
+}
+
static ssize_t ram_size_show(struct device *dev, struct device_attribute *attr,
char *buf)
{
struct cxl_memdev *cxlmd = to_cxl_memdev(dev);
struct cxl_dev_state *cxlds = cxlmd->cxlds;
- unsigned long long len = resource_size(&cxlds->ram_res);
+ unsigned long long len = cxl_ram_size(cxlds);
return sysfs_emit(buf, "%#llx\n", len);
}
@@ -93,7 +102,7 @@ static ssize_t pmem_size_show(struct device *dev, struct device_attribute *attr,
{
struct cxl_memdev *cxlmd = to_cxl_memdev(dev);
struct cxl_dev_state *cxlds = cxlmd->cxlds;
- unsigned long long len = resource_size(&cxlds->pmem_res);
+ unsigned long long len = cxl_pmem_size(cxlds);
return sysfs_emit(buf, "%#llx\n", len);
}
@@ -145,8 +154,8 @@ static ssize_t security_state_show(struct device *dev,
return sysfs_emit(buf, "frozen\n");
if (state & CXL_PMEM_SEC_STATE_LOCKED)
return sysfs_emit(buf, "locked\n");
- else
- return sysfs_emit(buf, "unlocked\n");
+
+ return sysfs_emit(buf, "unlocked\n");
}
static struct device_attribute dev_attr_security_state =
__ATTR(state, 0444, security_state_show, NULL);
@@ -198,22 +207,17 @@ static int cxl_get_poison_by_memdev(struct cxl_memdev *cxlmd)
int rc = 0;
/* CXL 3.0 Spec 8.2.9.8.4.1 Separate pmem and ram poison requests */
- if (resource_size(&cxlds->pmem_res)) {
- offset = cxlds->pmem_res.start;
- length = resource_size(&cxlds->pmem_res);
- rc = cxl_mem_get_poison(cxlmd, offset, length, NULL);
- if (rc)
- return rc;
- }
- if (resource_size(&cxlds->ram_res)) {
- offset = cxlds->ram_res.start;
- length = resource_size(&cxlds->ram_res);
+ for (int i = 0; i < cxlds->nr_partitions; i++) {
+ const struct resource *res = &cxlds->part[i].res;
+
+ offset = res->start;
+ length = resource_size(res);
rc = cxl_mem_get_poison(cxlmd, offset, length, NULL);
/*
* Invalid Physical Address is not an error for
* volatile addresses. Device support is optional.
*/
- if (rc == -EFAULT)
+ if (rc == -EFAULT && cxlds->part[i].mode == CXL_PARTMODE_RAM)
rc = 0;
}
return rc;
@@ -404,14 +408,21 @@ static struct attribute *cxl_memdev_attributes[] = {
NULL,
};
+static struct cxl_dpa_perf *to_pmem_perf(struct cxl_dev_state *cxlds)
+{
+ for (int i = 0; i < cxlds->nr_partitions; i++)
+ if (cxlds->part[i].mode == CXL_PARTMODE_PMEM)
+ return &cxlds->part[i].perf;
+ return NULL;
+}
+
static ssize_t pmem_qos_class_show(struct device *dev,
struct device_attribute *attr, char *buf)
{
struct cxl_memdev *cxlmd = to_cxl_memdev(dev);
struct cxl_dev_state *cxlds = cxlmd->cxlds;
- struct cxl_memdev_state *mds = to_cxl_memdev_state(cxlds);
- return sysfs_emit(buf, "%d\n", mds->pmem_perf.qos_class);
+ return sysfs_emit(buf, "%d\n", to_pmem_perf(cxlds)->qos_class);
}
static struct device_attribute dev_attr_pmem_qos_class =
@@ -423,14 +434,20 @@ static struct attribute *cxl_memdev_pmem_attributes[] = {
NULL,
};
+static struct cxl_dpa_perf *to_ram_perf(struct cxl_dev_state *cxlds)
+{
+ if (cxlds->part[0].mode != CXL_PARTMODE_RAM)
+ return NULL;
+ return &cxlds->part[0].perf;
+}
+
static ssize_t ram_qos_class_show(struct device *dev,
struct device_attribute *attr, char *buf)
{
struct cxl_memdev *cxlmd = to_cxl_memdev(dev);
struct cxl_dev_state *cxlds = cxlmd->cxlds;
- struct cxl_memdev_state *mds = to_cxl_memdev_state(cxlds);
- return sysfs_emit(buf, "%d\n", mds->ram_perf.qos_class);
+ return sysfs_emit(buf, "%d\n", to_ram_perf(cxlds)->qos_class);
}
static struct device_attribute dev_attr_ram_qos_class =
@@ -466,11 +483,11 @@ static umode_t cxl_ram_visible(struct kobject *kobj, struct attribute *a, int n)
{
struct device *dev = kobj_to_dev(kobj);
struct cxl_memdev *cxlmd = to_cxl_memdev(dev);
- struct cxl_memdev_state *mds = to_cxl_memdev_state(cxlmd->cxlds);
+ struct cxl_dpa_perf *perf = to_ram_perf(cxlmd->cxlds);
- if (a == &dev_attr_ram_qos_class.attr)
- if (mds->ram_perf.qos_class == CXL_QOS_CLASS_INVALID)
- return 0;
+ if (a == &dev_attr_ram_qos_class.attr &&
+ (!perf || perf->qos_class == CXL_QOS_CLASS_INVALID))
+ return 0;
return a->mode;
}
@@ -485,11 +502,11 @@ static umode_t cxl_pmem_visible(struct kobject *kobj, struct attribute *a, int n
{
struct device *dev = kobj_to_dev(kobj);
struct cxl_memdev *cxlmd = to_cxl_memdev(dev);
- struct cxl_memdev_state *mds = to_cxl_memdev_state(cxlmd->cxlds);
+ struct cxl_dpa_perf *perf = to_pmem_perf(cxlmd->cxlds);
- if (a == &dev_attr_pmem_qos_class.attr)
- if (mds->pmem_perf.qos_class == CXL_QOS_CLASS_INVALID)
- return 0;
+ if (a == &dev_attr_pmem_qos_class.attr &&
+ (!perf || perf->qos_class == CXL_QOS_CLASS_INVALID))
+ return 0;
return a->mode;
}
@@ -564,10 +581,11 @@ EXPORT_SYMBOL_NS_GPL(is_cxl_memdev, "CXL");
void set_exclusive_cxl_commands(struct cxl_memdev_state *mds,
unsigned long *cmds)
{
- down_write(&cxl_memdev_rwsem);
- bitmap_or(mds->exclusive_cmds, mds->exclusive_cmds, cmds,
- CXL_MEM_COMMAND_ID_MAX);
- up_write(&cxl_memdev_rwsem);
+ struct cxl_mailbox *cxl_mbox = &mds->cxlds.cxl_mbox;
+
+ guard(rwsem_write)(&cxl_memdev_rwsem);
+ bitmap_or(cxl_mbox->exclusive_cmds, cxl_mbox->exclusive_cmds,
+ cmds, CXL_MEM_COMMAND_ID_MAX);
}
EXPORT_SYMBOL_NS_GPL(set_exclusive_cxl_commands, "CXL");
@@ -579,10 +597,11 @@ EXPORT_SYMBOL_NS_GPL(set_exclusive_cxl_commands, "CXL");
void clear_exclusive_cxl_commands(struct cxl_memdev_state *mds,
unsigned long *cmds)
{
- down_write(&cxl_memdev_rwsem);
- bitmap_andnot(mds->exclusive_cmds, mds->exclusive_cmds, cmds,
- CXL_MEM_COMMAND_ID_MAX);
- up_write(&cxl_memdev_rwsem);
+ struct cxl_mailbox *cxl_mbox = &mds->cxlds.cxl_mbox;
+
+ guard(rwsem_write)(&cxl_memdev_rwsem);
+ bitmap_andnot(cxl_mbox->exclusive_cmds, cxl_mbox->exclusive_cmds,
+ cmds, CXL_MEM_COMMAND_ID_MAX);
}
EXPORT_SYMBOL_NS_GPL(clear_exclusive_cxl_commands, "CXL");
@@ -590,9 +609,8 @@ static void cxl_memdev_shutdown(struct device *dev)
{
struct cxl_memdev *cxlmd = to_cxl_memdev(dev);
- down_write(&cxl_memdev_rwsem);
+ guard(rwsem_write)(&cxl_memdev_rwsem);
cxlmd->cxlds = NULL;
- up_write(&cxl_memdev_rwsem);
}
static void cxl_memdev_unregister(void *_cxlmd)
@@ -656,11 +674,14 @@ err:
static long __cxl_memdev_ioctl(struct cxl_memdev *cxlmd, unsigned int cmd,
unsigned long arg)
{
+ struct cxl_memdev_state *mds = to_cxl_memdev_state(cxlmd->cxlds);
+ struct cxl_mailbox *cxl_mbox = &mds->cxlds.cxl_mbox;
+
switch (cmd) {
case CXL_MEM_QUERY_COMMANDS:
- return cxl_query_cmd(cxlmd, (void __user *)arg);
+ return cxl_query_cmd(cxl_mbox, (void __user *)arg);
case CXL_MEM_SEND_COMMAND:
- return cxl_send_cmd(cxlmd, (void __user *)arg);
+ return cxl_send_cmd(cxl_mbox, (void __user *)arg);
default:
return -ENOTTY;
}
@@ -671,15 +692,13 @@ static long cxl_memdev_ioctl(struct file *file, unsigned int cmd,
{
struct cxl_memdev *cxlmd = file->private_data;
struct cxl_dev_state *cxlds;
- int rc = -ENXIO;
- down_read(&cxl_memdev_rwsem);
+ guard(rwsem_read)(&cxl_memdev_rwsem);
cxlds = cxlmd->cxlds;
if (cxlds && cxlds->type == CXL_DEVTYPE_CLASSMEM)
- rc = __cxl_memdev_ioctl(cxlmd, cmd, arg);
- up_read(&cxl_memdev_rwsem);
+ return __cxl_memdev_ioctl(cxlmd, cmd, arg);
- return rc;
+ return -ENXIO;
}
static int cxl_memdev_open(struct inode *inode, struct file *file)
@@ -994,10 +1013,11 @@ static void cxl_remove_fw_upload(void *fwl)
int devm_cxl_setup_fw_upload(struct device *host, struct cxl_memdev_state *mds)
{
struct cxl_dev_state *cxlds = &mds->cxlds;
+ struct cxl_mailbox *cxl_mbox = &cxlds->cxl_mbox;
struct device *dev = &cxlds->cxlmd->dev;
struct fw_upload *fwl;
- if (!test_bit(CXL_MEM_COMMAND_ID_GET_FW_INFO, mds->enabled_cmds))
+ if (!test_bit(CXL_MEM_COMMAND_ID_GET_FW_INFO, cxl_mbox->enabled_cmds))
return 0;
fwl = firmware_upload_register(THIS_MODULE, dev, dev_name(dev),
diff --git a/drivers/cxl/core/pci.c b/drivers/cxl/core/pci.c
index 013b869b66cb..b50551601c2e 100644
--- a/drivers/cxl/core/pci.c
+++ b/drivers/cxl/core/pci.c
@@ -415,17 +415,20 @@ int cxl_hdm_decode_init(struct cxl_dev_state *cxlds, struct cxl_hdm *cxlhdm,
*/
if (global_ctrl & CXL_HDM_DECODER_ENABLE || (!hdm && info->mem_enabled))
return devm_cxl_enable_mem(&port->dev, cxlds);
- else if (!hdm)
- return -ENODEV;
- root = to_cxl_port(port->dev.parent);
- while (!is_cxl_root(root) && is_cxl_port(root->dev.parent))
- root = to_cxl_port(root->dev.parent);
- if (!is_cxl_root(root)) {
- dev_err(dev, "Failed to acquire root port for HDM enable\n");
+ /*
+ * If the HDM Decoder Capability does not exist and DVSEC was
+ * not setup, the DVSEC based emulation cannot be used.
+ */
+ if (!hdm)
return -ENODEV;
- }
+ /* The HDM Decoder Capability exists but is globally disabled. */
+
+ /*
+ * If the DVSEC CXL Range registers are not enabled, just
+ * enable and use the HDM Decoder Capability registers.
+ */
if (!info->mem_enabled) {
rc = devm_cxl_enable_hdm(&port->dev, cxlhdm);
if (rc)
@@ -434,6 +437,26 @@ int cxl_hdm_decode_init(struct cxl_dev_state *cxlds, struct cxl_hdm *cxlhdm,
return devm_cxl_enable_mem(&port->dev, cxlds);
}
+ /*
+ * Per CXL 2.0 Section 8.1.3.8.3 and 8.1.3.8.4 DVSEC CXL Range 1 Base
+ * [High,Low] when HDM operation is enabled the range register values
+ * are ignored by the device, but the spec also recommends matching the
+ * DVSEC Range 1,2 to HDM Decoder Range 0,1. So, non-zero info->ranges
+ * are expected even though Linux does not require or maintain that
+ * match. Check if at least one DVSEC range is enabled and allowed by
+ * the platform. That is, the DVSEC range must be covered by a locked
+ * platform window (CFMWS). Fail otherwise as the endpoint's decoders
+ * cannot be used.
+ */
+
+ root = to_cxl_port(port->dev.parent);
+ while (!is_cxl_root(root) && is_cxl_port(root->dev.parent))
+ root = to_cxl_port(root->dev.parent);
+ if (!is_cxl_root(root)) {
+ dev_err(dev, "Failed to acquire root port for HDM enable\n");
+ return -ENODEV;
+ }
+
for (i = 0, allowed = 0; i < info->ranges; i++) {
struct device *cxld_dev;
@@ -453,15 +476,6 @@ int cxl_hdm_decode_init(struct cxl_dev_state *cxlds, struct cxl_hdm *cxlhdm,
return -ENXIO;
}
- /*
- * Per CXL 2.0 Section 8.1.3.8.3 and 8.1.3.8.4 DVSEC CXL Range 1 Base
- * [High,Low] when HDM operation is enabled the range register values
- * are ignored by the device, but the spec also recommends matching the
- * DVSEC Range 1,2 to HDM Decoder Range 0,1. So, non-zero info->ranges
- * are expected even though Linux does not require or maintain that
- * match. If at least one DVSEC range is enabled and allowed, skip HDM
- * Decoder Capability Enable.
- */
return 0;
}
EXPORT_SYMBOL_NS_GPL(cxl_hdm_decode_init, "CXL");
@@ -1054,3 +1068,104 @@ int cxl_pci_get_bandwidth(struct pci_dev *pdev, struct access_coordinate *c)
return 0;
}
+
+/*
+ * Set max timeout such that platforms will optimize GPF flow to avoid
+ * the implied worst-case scenario delays. On a sane platform, all
+ * devices should always complete GPF within the energy budget of
+ * the GPF flow. The kernel does not have enough information to pick
+ * anything better than "maximize timeouts and hope it works".
+ *
+ * A misbehaving device could block forward progress of GPF for all
+ * the other devices, exhausting the energy budget of the platform.
+ * However, the spec seems to assume that moving on from slow to respond
+ * devices is a virtue. It is not possible to know that, in actuality,
+ * the slow to respond device is *the* most critical device in the
+ * system to wait.
+ */
+#define GPF_TIMEOUT_BASE_MAX 2
+#define GPF_TIMEOUT_SCALE_MAX 7 /* 10 seconds */
+
+u16 cxl_gpf_get_dvsec(struct device *dev)
+{
+ struct pci_dev *pdev;
+ bool is_port = true;
+ u16 dvsec;
+
+ if (!dev_is_pci(dev))
+ return 0;
+
+ pdev = to_pci_dev(dev);
+ if (pci_pcie_type(pdev) == PCI_EXP_TYPE_ENDPOINT)
+ is_port = false;
+
+ dvsec = pci_find_dvsec_capability(pdev, PCI_VENDOR_ID_CXL,
+ is_port ? CXL_DVSEC_PORT_GPF : CXL_DVSEC_DEVICE_GPF);
+ if (!dvsec)
+ dev_warn(dev, "%s GPF DVSEC not present\n",
+ is_port ? "Port" : "Device");
+ return dvsec;
+}
+EXPORT_SYMBOL_NS_GPL(cxl_gpf_get_dvsec, "CXL");
+
+static int update_gpf_port_dvsec(struct pci_dev *pdev, int dvsec, int phase)
+{
+ u64 base, scale;
+ int rc, offset;
+ u16 ctrl;
+
+ switch (phase) {
+ case 1:
+ offset = CXL_DVSEC_PORT_GPF_PHASE_1_CONTROL_OFFSET;
+ base = CXL_DVSEC_PORT_GPF_PHASE_1_TMO_BASE_MASK;
+ scale = CXL_DVSEC_PORT_GPF_PHASE_1_TMO_SCALE_MASK;
+ break;
+ case 2:
+ offset = CXL_DVSEC_PORT_GPF_PHASE_2_CONTROL_OFFSET;
+ base = CXL_DVSEC_PORT_GPF_PHASE_2_TMO_BASE_MASK;
+ scale = CXL_DVSEC_PORT_GPF_PHASE_2_TMO_SCALE_MASK;
+ break;
+ default:
+ return -EINVAL;
+ }
+
+ rc = pci_read_config_word(pdev, dvsec + offset, &ctrl);
+ if (rc)
+ return rc;
+
+ if (FIELD_GET(base, ctrl) == GPF_TIMEOUT_BASE_MAX &&
+ FIELD_GET(scale, ctrl) == GPF_TIMEOUT_SCALE_MAX)
+ return 0;
+
+ ctrl = FIELD_PREP(base, GPF_TIMEOUT_BASE_MAX);
+ ctrl |= FIELD_PREP(scale, GPF_TIMEOUT_SCALE_MAX);
+
+ rc = pci_write_config_word(pdev, dvsec + offset, ctrl);
+ if (!rc)
+ pci_dbg(pdev, "Port GPF phase %d timeout: %d0 secs\n",
+ phase, GPF_TIMEOUT_BASE_MAX);
+
+ return rc;
+}
+
+int cxl_gpf_port_setup(struct cxl_dport *dport)
+{
+ if (!dport)
+ return -EINVAL;
+
+ if (!dport->gpf_dvsec) {
+ struct pci_dev *pdev;
+ int dvsec;
+
+ dvsec = cxl_gpf_get_dvsec(dport->dport_dev);
+ if (!dvsec)
+ return -EINVAL;
+
+ dport->gpf_dvsec = dvsec;
+ pdev = to_pci_dev(dport->dport_dev);
+ update_gpf_port_dvsec(pdev, dport->gpf_dvsec, 1);
+ update_gpf_port_dvsec(pdev, dport->gpf_dvsec, 2);
+ }
+
+ return 0;
+}
diff --git a/drivers/cxl/core/port.c b/drivers/cxl/core/port.c
index 78a5c2c25982..eb46c6764d20 100644
--- a/drivers/cxl/core/port.c
+++ b/drivers/cxl/core/port.c
@@ -194,25 +194,35 @@ static ssize_t mode_show(struct device *dev, struct device_attribute *attr,
char *buf)
{
struct cxl_endpoint_decoder *cxled = to_cxl_endpoint_decoder(dev);
+ struct cxl_memdev *cxlmd = cxled_to_memdev(cxled);
+ struct cxl_dev_state *cxlds = cxlmd->cxlds;
+ /* without @cxl_dpa_rwsem, make sure @part is not reloaded */
+ int part = READ_ONCE(cxled->part);
+ const char *desc;
+
+ if (part < 0)
+ desc = "none";
+ else
+ desc = cxlds->part[part].res.name;
- return sysfs_emit(buf, "%s\n", cxl_decoder_mode_name(cxled->mode));
+ return sysfs_emit(buf, "%s\n", desc);
}
static ssize_t mode_store(struct device *dev, struct device_attribute *attr,
const char *buf, size_t len)
{
struct cxl_endpoint_decoder *cxled = to_cxl_endpoint_decoder(dev);
- enum cxl_decoder_mode mode;
+ enum cxl_partition_mode mode;
ssize_t rc;
if (sysfs_streq(buf, "pmem"))
- mode = CXL_DECODER_PMEM;
+ mode = CXL_PARTMODE_PMEM;
else if (sysfs_streq(buf, "ram"))
- mode = CXL_DECODER_RAM;
+ mode = CXL_PARTMODE_RAM;
else
return -EINVAL;
- rc = cxl_dpa_set_mode(cxled, mode);
+ rc = cxl_dpa_set_part(cxled, mode);
if (rc)
return rc;
@@ -549,13 +559,9 @@ static ssize_t decoders_committed_show(struct device *dev,
struct device_attribute *attr, char *buf)
{
struct cxl_port *port = to_cxl_port(dev);
- int rc;
-
- down_read(&cxl_region_rwsem);
- rc = sysfs_emit(buf, "%d\n", cxl_num_decoders_committed(port));
- up_read(&cxl_region_rwsem);
- return rc;
+ guard(rwsem_read)(&cxl_region_rwsem);
+ return sysfs_emit(buf, "%d\n", cxl_num_decoders_committed(port));
}
static DEVICE_ATTR_RO(decoders_committed);
@@ -596,17 +602,19 @@ struct cxl_port *to_cxl_port(const struct device *dev)
}
EXPORT_SYMBOL_NS_GPL(to_cxl_port, "CXL");
+struct cxl_port *parent_port_of(struct cxl_port *port)
+{
+ if (!port || !port->parent_dport)
+ return NULL;
+ return port->parent_dport->port;
+}
+
static void unregister_port(void *_port)
{
struct cxl_port *port = _port;
- struct cxl_port *parent;
+ struct cxl_port *parent = parent_port_of(port);
struct device *lock_dev;
- if (is_cxl_root(port))
- parent = NULL;
- else
- parent = to_cxl_port(port->dev.parent);
-
/*
* CXL root port's and the first level of ports are unregistered
* under the platform firmware device lock, all other ports are
@@ -1029,15 +1037,6 @@ struct cxl_root *find_cxl_root(struct cxl_port *port)
}
EXPORT_SYMBOL_NS_GPL(find_cxl_root, "CXL");
-void put_cxl_root(struct cxl_root *cxl_root)
-{
- if (!cxl_root)
- return;
-
- put_device(&cxl_root->port.dev);
-}
-EXPORT_SYMBOL_NS_GPL(put_cxl_root, "CXL");
-
static struct cxl_dport *find_dport(struct cxl_port *port, int id)
{
struct cxl_dport *dport;
@@ -1672,6 +1671,8 @@ retry:
if (rc && rc != -EBUSY)
return rc;
+ cxl_gpf_port_setup(dport);
+
/* Any more ports to add between this one and the root? */
if (!dev_is_cxl_root_child(&port->dev))
continue;
@@ -1899,6 +1900,7 @@ struct cxl_endpoint_decoder *cxl_endpoint_decoder_alloc(struct cxl_port *port)
return ERR_PTR(-ENOMEM);
cxled->pos = -1;
+ cxled->part = -1;
cxld = &cxled->cxld;
rc = cxl_decoder_init(port, cxld);
if (rc) {
@@ -2339,8 +2341,14 @@ static __init int cxl_core_init(void)
if (rc)
goto err_region;
+ rc = cxl_ras_init();
+ if (rc)
+ goto err_ras;
+
return 0;
+err_ras:
+ cxl_region_exit();
err_region:
bus_unregister(&cxl_bus_type);
err_bus:
@@ -2352,6 +2360,7 @@ err_wq:
static void cxl_core_exit(void)
{
+ cxl_ras_exit();
cxl_region_exit();
bus_unregister(&cxl_bus_type);
destroy_workqueue(cxl_bus_wq);
diff --git a/drivers/cxl/core/ras.c b/drivers/cxl/core/ras.c
new file mode 100644
index 000000000000..2731ba3a0799
--- /dev/null
+++ b/drivers/cxl/core/ras.c
@@ -0,0 +1,126 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/* Copyright(c) 2025 AMD Corporation. All rights reserved. */
+
+#include <linux/pci.h>
+#include <linux/aer.h>
+#include <cxl/event.h>
+#include <cxlmem.h>
+#include "trace.h"
+
+static void cxl_cper_trace_corr_port_prot_err(struct pci_dev *pdev,
+ struct cxl_ras_capability_regs ras_cap)
+{
+ u32 status = ras_cap.cor_status & ~ras_cap.cor_mask;
+
+ trace_cxl_port_aer_correctable_error(&pdev->dev, status);
+}
+
+static void cxl_cper_trace_uncorr_port_prot_err(struct pci_dev *pdev,
+ struct cxl_ras_capability_regs ras_cap)
+{
+ u32 status = ras_cap.uncor_status & ~ras_cap.uncor_mask;
+ u32 fe;
+
+ if (hweight32(status) > 1)
+ fe = BIT(FIELD_GET(CXL_RAS_CAP_CONTROL_FE_MASK,
+ ras_cap.cap_control));
+ else
+ fe = status;
+
+ trace_cxl_port_aer_uncorrectable_error(&pdev->dev, status, fe,
+ ras_cap.header_log);
+}
+
+static void cxl_cper_trace_corr_prot_err(struct cxl_memdev *cxlmd,
+ struct cxl_ras_capability_regs ras_cap)
+{
+ u32 status = ras_cap.cor_status & ~ras_cap.cor_mask;
+
+ trace_cxl_aer_correctable_error(cxlmd, status);
+}
+
+static void
+cxl_cper_trace_uncorr_prot_err(struct cxl_memdev *cxlmd,
+ struct cxl_ras_capability_regs ras_cap)
+{
+ u32 status = ras_cap.uncor_status & ~ras_cap.uncor_mask;
+ u32 fe;
+
+ if (hweight32(status) > 1)
+ fe = BIT(FIELD_GET(CXL_RAS_CAP_CONTROL_FE_MASK,
+ ras_cap.cap_control));
+ else
+ fe = status;
+
+ trace_cxl_aer_uncorrectable_error(cxlmd, status, fe,
+ ras_cap.header_log);
+}
+
+static int match_memdev_by_parent(struct device *dev, const void *uport)
+{
+ if (is_cxl_memdev(dev) && dev->parent == uport)
+ return 1;
+ return 0;
+}
+
+static void cxl_cper_handle_prot_err(struct cxl_cper_prot_err_work_data *data)
+{
+ unsigned int devfn = PCI_DEVFN(data->prot_err.agent_addr.device,
+ data->prot_err.agent_addr.function);
+ struct pci_dev *pdev __free(pci_dev_put) =
+ pci_get_domain_bus_and_slot(data->prot_err.agent_addr.segment,
+ data->prot_err.agent_addr.bus,
+ devfn);
+ struct cxl_memdev *cxlmd;
+ int port_type;
+
+ if (!pdev)
+ return;
+
+ port_type = pci_pcie_type(pdev);
+ if (port_type == PCI_EXP_TYPE_ROOT_PORT ||
+ port_type == PCI_EXP_TYPE_DOWNSTREAM ||
+ port_type == PCI_EXP_TYPE_UPSTREAM) {
+ if (data->severity == AER_CORRECTABLE)
+ cxl_cper_trace_corr_port_prot_err(pdev, data->ras_cap);
+ else
+ cxl_cper_trace_uncorr_port_prot_err(pdev, data->ras_cap);
+
+ return;
+ }
+
+ guard(device)(&pdev->dev);
+ if (!pdev->dev.driver)
+ return;
+
+ struct device *mem_dev __free(put_device) = bus_find_device(
+ &cxl_bus_type, NULL, pdev, match_memdev_by_parent);
+ if (!mem_dev)
+ return;
+
+ cxlmd = to_cxl_memdev(mem_dev);
+ if (data->severity == AER_CORRECTABLE)
+ cxl_cper_trace_corr_prot_err(cxlmd, data->ras_cap);
+ else
+ cxl_cper_trace_uncorr_prot_err(cxlmd, data->ras_cap);
+}
+
+static void cxl_cper_prot_err_work_fn(struct work_struct *work)
+{
+ struct cxl_cper_prot_err_work_data wd;
+
+ while (cxl_cper_prot_err_kfifo_get(&wd))
+ cxl_cper_handle_prot_err(&wd);
+}
+static DECLARE_WORK(cxl_cper_prot_err_work, cxl_cper_prot_err_work_fn);
+
+int cxl_ras_init(void)
+{
+ return cxl_cper_register_prot_err_work(&cxl_cper_prot_err_work);
+}
+
+void cxl_ras_exit(void)
+{
+ cxl_cper_unregister_prot_err_work(&cxl_cper_prot_err_work);
+ cancel_work_sync(&cxl_cper_prot_err_work);
+}
diff --git a/drivers/cxl/core/region.c b/drivers/cxl/core/region.c
index e8d11a988fd9..6e5e1460068d 100644
--- a/drivers/cxl/core/region.c
+++ b/drivers/cxl/core/region.c
@@ -144,7 +144,7 @@ static ssize_t uuid_show(struct device *dev, struct device_attribute *attr,
rc = down_read_interruptible(&cxl_region_rwsem);
if (rc)
return rc;
- if (cxlr->mode != CXL_DECODER_PMEM)
+ if (cxlr->mode != CXL_PARTMODE_PMEM)
rc = sysfs_emit(buf, "\n");
else
rc = sysfs_emit(buf, "%pUb\n", &p->uuid);
@@ -231,11 +231,10 @@ static int cxl_region_invalidate_memregion(struct cxl_region *cxlr)
&cxlr->dev,
"Bypassing cpu_cache_invalidate_memregion() for testing!\n");
return 0;
- } else {
- dev_WARN(&cxlr->dev,
- "Failed to synchronize CPU cache state\n");
- return -ENXIO;
}
+ dev_WARN(&cxlr->dev,
+ "Failed to synchronize CPU cache state\n");
+ return -ENXIO;
}
cpu_cache_invalidate_memregion(IORES_DESC_CXL);
@@ -441,7 +440,7 @@ static umode_t cxl_region_visible(struct kobject *kobj, struct attribute *a,
* Support tooling that expects to find a 'uuid' attribute for all
* regions regardless of mode.
*/
- if (a == &dev_attr_uuid.attr && cxlr->mode != CXL_DECODER_PMEM)
+ if (a == &dev_attr_uuid.attr && cxlr->mode != CXL_PARTMODE_PMEM)
return 0444;
return a->mode;
}
@@ -603,8 +602,16 @@ static ssize_t mode_show(struct device *dev, struct device_attribute *attr,
char *buf)
{
struct cxl_region *cxlr = to_cxl_region(dev);
+ const char *desc;
+
+ if (cxlr->mode == CXL_PARTMODE_RAM)
+ desc = "ram";
+ else if (cxlr->mode == CXL_PARTMODE_PMEM)
+ desc = "pmem";
+ else
+ desc = "";
- return sysfs_emit(buf, "%s\n", cxl_decoder_mode_name(cxlr->mode));
+ return sysfs_emit(buf, "%s\n", desc);
}
static DEVICE_ATTR_RO(mode);
@@ -630,7 +637,7 @@ static int alloc_hpa(struct cxl_region *cxlr, resource_size_t size)
/* ways, granularity and uuid (if PMEM) need to be set before HPA */
if (!p->interleave_ways || !p->interleave_granularity ||
- (cxlr->mode == CXL_DECODER_PMEM && uuid_is_null(&p->uuid)))
+ (cxlr->mode == CXL_PARTMODE_PMEM && uuid_is_null(&p->uuid)))
return -ENXIO;
div64_u64_rem(size, (u64)SZ_256M * p->interleave_ways, &remainder);
@@ -824,6 +831,21 @@ static int match_free_decoder(struct device *dev, const void *data)
return 1;
}
+static bool region_res_match_cxl_range(const struct cxl_region_params *p,
+ struct range *range)
+{
+ if (!p->res)
+ return false;
+
+ /*
+ * If an extended linear cache region then the CXL range is assumed
+ * to be fronted by the DRAM range in current known implementation.
+ * This assumption will be made until a variant implementation exists.
+ */
+ return p->res->start + p->cache_size == range->start &&
+ p->res->end == range->end;
+}
+
static int match_auto_decoder(struct device *dev, const void *data)
{
const struct cxl_region_params *p = data;
@@ -836,16 +858,29 @@ static int match_auto_decoder(struct device *dev, const void *data)
cxld = to_cxl_decoder(dev);
r = &cxld->hpa_range;
- if (p->res && p->res->start == r->start && p->res->end == r->end)
+ if (region_res_match_cxl_range(p, r))
return 1;
return 0;
}
+/**
+ * cxl_port_pick_region_decoder() - assign or lookup a decoder for a region
+ * @port: a port in the ancestry of the endpoint implied by @cxled
+ * @cxled: endpoint decoder to be, or currently, mapped by @port
+ * @cxlr: region to establish, or validate, decode @port
+ *
+ * In the region creation path cxl_port_pick_region_decoder() is an
+ * allocator to find a free port. In the region assembly path, it is
+ * recalling the decoder that platform firmware picked for validation
+ * purposes.
+ *
+ * The result is recorded in a 'struct cxl_region_ref' in @port.
+ */
static struct cxl_decoder *
-cxl_region_find_decoder(struct cxl_port *port,
- struct cxl_endpoint_decoder *cxled,
- struct cxl_region *cxlr)
+cxl_port_pick_region_decoder(struct cxl_port *port,
+ struct cxl_endpoint_decoder *cxled,
+ struct cxl_region *cxlr)
{
struct device *dev;
@@ -893,7 +928,8 @@ static bool auto_order_ok(struct cxl_port *port, struct cxl_region *cxlr_iter,
static struct cxl_region_ref *
alloc_region_ref(struct cxl_port *port, struct cxl_region *cxlr,
- struct cxl_endpoint_decoder *cxled)
+ struct cxl_endpoint_decoder *cxled,
+ struct cxl_decoder *cxld)
{
struct cxl_region_params *p = &cxlr->params;
struct cxl_region_ref *cxl_rr, *iter;
@@ -907,9 +943,6 @@ alloc_region_ref(struct cxl_port *port, struct cxl_region *cxlr,
continue;
if (test_bit(CXL_REGION_F_AUTO, &cxlr->flags)) {
- struct cxl_decoder *cxld;
-
- cxld = cxl_region_find_decoder(port, cxled, cxlr);
if (auto_order_ok(port, iter->region, cxld))
continue;
}
@@ -991,19 +1024,11 @@ static int cxl_rr_ep_add(struct cxl_region_ref *cxl_rr,
return 0;
}
-static int cxl_rr_alloc_decoder(struct cxl_port *port, struct cxl_region *cxlr,
- struct cxl_endpoint_decoder *cxled,
- struct cxl_region_ref *cxl_rr)
+static int cxl_rr_assign_decoder(struct cxl_port *port, struct cxl_region *cxlr,
+ struct cxl_endpoint_decoder *cxled,
+ struct cxl_region_ref *cxl_rr,
+ struct cxl_decoder *cxld)
{
- struct cxl_decoder *cxld;
-
- cxld = cxl_region_find_decoder(port, cxled, cxlr);
- if (!cxld) {
- dev_dbg(&cxlr->dev, "%s: no decoder available\n",
- dev_name(&port->dev));
- return -EBUSY;
- }
-
if (cxld->region) {
dev_dbg(&cxlr->dev, "%s: %s already attached to %s\n",
dev_name(&port->dev), dev_name(&cxld->dev),
@@ -1094,7 +1119,16 @@ static int cxl_port_attach_region(struct cxl_port *port,
nr_targets_inc = true;
}
} else {
- cxl_rr = alloc_region_ref(port, cxlr, cxled);
+ struct cxl_decoder *cxld;
+
+ cxld = cxl_port_pick_region_decoder(port, cxled, cxlr);
+ if (!cxld) {
+ dev_dbg(&cxlr->dev, "%s: no decoder available\n",
+ dev_name(&port->dev));
+ return -EBUSY;
+ }
+
+ cxl_rr = alloc_region_ref(port, cxlr, cxled, cxld);
if (IS_ERR(cxl_rr)) {
dev_dbg(&cxlr->dev,
"%s: failed to allocate region reference\n",
@@ -1103,7 +1137,7 @@ static int cxl_port_attach_region(struct cxl_port *port,
}
nr_targets_inc = true;
- rc = cxl_rr_alloc_decoder(port, cxlr, cxled, cxl_rr);
+ rc = cxl_rr_assign_decoder(port, cxlr, cxled, cxl_rr, cxld);
if (rc)
goto out_erase;
}
@@ -1423,9 +1457,8 @@ static int cxl_port_setup_targets(struct cxl_port *port,
if (test_bit(CXL_REGION_F_AUTO, &cxlr->flags)) {
if (cxld->interleave_ways != iw ||
- cxld->interleave_granularity != ig ||
- cxld->hpa_range.start != p->res->start ||
- cxld->hpa_range.end != p->res->end ||
+ (iw > 1 && cxld->interleave_granularity != ig) ||
+ !region_res_match_cxl_range(p, &cxld->hpa_range) ||
((cxld->flags & CXL_DECODER_F_ENABLE) == 0)) {
dev_err(&cxlr->dev,
"%s:%s %s expected iw: %d ig: %d %pr\n",
@@ -1726,13 +1759,6 @@ static int cmp_interleave_pos(const void *a, const void *b)
return cxled_a->pos - cxled_b->pos;
}
-static struct cxl_port *next_port(struct cxl_port *port)
-{
- if (!port->parent_dport)
- return NULL;
- return port->parent_dport->port;
-}
-
static int match_switch_decoder_by_range(struct device *dev,
const void *data)
{
@@ -1759,7 +1785,7 @@ static int find_pos_and_ways(struct cxl_port *port, struct range *range,
struct device *dev;
int rc = -ENXIO;
- parent = next_port(port);
+ parent = parent_port_of(port);
if (!parent)
return rc;
@@ -1783,6 +1809,13 @@ static int find_pos_and_ways(struct cxl_port *port, struct range *range,
}
put_device(dev);
+ if (rc)
+ dev_err(port->uport_dev,
+ "failed to find %s:%s in target list of %s\n",
+ dev_name(&port->dev),
+ dev_name(port->parent_dport->dport_dev),
+ dev_name(&cxlsd->cxld.dev));
+
return rc;
}
@@ -1839,7 +1872,7 @@ static int cxl_calc_interleave_pos(struct cxl_endpoint_decoder *cxled)
*/
/* Iterate from endpoint to root_port refining the position */
- for (iter = port; iter; iter = next_port(iter)) {
+ for (iter = port; iter; iter = parent_port_of(iter)) {
if (is_cxl_root(iter))
break;
@@ -1888,6 +1921,7 @@ static int cxl_region_attach(struct cxl_region *cxlr,
{
struct cxl_root_decoder *cxlrd = to_cxl_root_decoder(cxlr->dev.parent);
struct cxl_memdev *cxlmd = cxled_to_memdev(cxled);
+ struct cxl_dev_state *cxlds = cxlmd->cxlds;
struct cxl_region_params *p = &cxlr->params;
struct cxl_port *ep_port, *root_port;
struct cxl_dport *dport;
@@ -1902,22 +1936,24 @@ static int cxl_region_attach(struct cxl_region *cxlr,
return rc;
}
- if (cxled->mode != cxlr->mode) {
- dev_dbg(&cxlr->dev, "%s region mode: %d mismatch: %d\n",
- dev_name(&cxled->cxld.dev), cxlr->mode, cxled->mode);
- return -EINVAL;
- }
-
- if (cxled->mode == CXL_DECODER_DEAD) {
+ if (cxled->part < 0) {
dev_dbg(&cxlr->dev, "%s dead\n", dev_name(&cxled->cxld.dev));
return -ENODEV;
}
+ if (cxlds->part[cxled->part].mode != cxlr->mode) {
+ dev_dbg(&cxlr->dev, "%s region mode: %d mismatch\n",
+ dev_name(&cxled->cxld.dev), cxlr->mode);
+ return -EINVAL;
+ }
+
/* all full of members, or interleave config not established? */
if (p->state > CXL_CONFIG_INTERLEAVE_ACTIVE) {
dev_dbg(&cxlr->dev, "region already active\n");
return -EBUSY;
- } else if (p->state < CXL_CONFIG_INTERLEAVE_ACTIVE) {
+ }
+
+ if (p->state < CXL_CONFIG_INTERLEAVE_ACTIVE) {
dev_dbg(&cxlr->dev, "interleave config missing\n");
return -ENXIO;
}
@@ -1951,13 +1987,13 @@ static int cxl_region_attach(struct cxl_region *cxlr,
return -ENXIO;
}
- if (resource_size(cxled->dpa_res) * p->interleave_ways !=
+ if (resource_size(cxled->dpa_res) * p->interleave_ways + p->cache_size !=
resource_size(p->res)) {
dev_dbg(&cxlr->dev,
- "%s:%s: decoder-size-%#llx * ways-%d != region-size-%#llx\n",
+ "%s:%s-size-%#llx * ways-%d + cache-%#llx != region-size-%#llx\n",
dev_name(&cxlmd->dev), dev_name(&cxled->cxld.dev),
(u64)resource_size(cxled->dpa_res), p->interleave_ways,
- (u64)resource_size(p->res));
+ (u64)p->cache_size, (u64)resource_size(p->res));
return -EINVAL;
}
@@ -2115,7 +2151,7 @@ out:
void cxl_decoder_kill_region(struct cxl_endpoint_decoder *cxled)
{
down_write(&cxl_region_rwsem);
- cxled->mode = CXL_DECODER_DEAD;
+ cxled->part = -1;
cxl_region_detach(cxled);
up_write(&cxl_region_rwsem);
}
@@ -2137,6 +2173,12 @@ static int attach_target(struct cxl_region *cxlr,
rc = cxl_region_attach(cxlr, cxled, pos);
up_read(&cxl_dpa_rwsem);
up_write(&cxl_region_rwsem);
+
+ if (rc)
+ dev_warn(cxled->cxld.dev.parent,
+ "failed to attach %s to %s: %d\n",
+ dev_name(&cxled->cxld.dev), dev_name(&cxlr->dev), rc);
+
return rc;
}
@@ -2471,7 +2513,7 @@ static int cxl_region_calculate_adistance(struct notifier_block *nb,
*/
static struct cxl_region *devm_cxl_add_region(struct cxl_root_decoder *cxlrd,
int id,
- enum cxl_decoder_mode mode,
+ enum cxl_partition_mode mode,
enum cxl_decoder_type type)
{
struct cxl_port *port = to_cxl_port(cxlrd->cxlsd.cxld.dev.parent);
@@ -2525,13 +2567,13 @@ static ssize_t create_ram_region_show(struct device *dev,
}
static struct cxl_region *__create_region(struct cxl_root_decoder *cxlrd,
- enum cxl_decoder_mode mode, int id)
+ enum cxl_partition_mode mode, int id)
{
int rc;
switch (mode) {
- case CXL_DECODER_RAM:
- case CXL_DECODER_PMEM:
+ case CXL_PARTMODE_RAM:
+ case CXL_PARTMODE_PMEM:
break;
default:
dev_err(&cxlrd->cxlsd.cxld.dev, "unsupported mode %d\n", mode);
@@ -2551,7 +2593,7 @@ static struct cxl_region *__create_region(struct cxl_root_decoder *cxlrd,
}
static ssize_t create_region_store(struct device *dev, const char *buf,
- size_t len, enum cxl_decoder_mode mode)
+ size_t len, enum cxl_partition_mode mode)
{
struct cxl_root_decoder *cxlrd = to_cxl_root_decoder(dev);
struct cxl_region *cxlr;
@@ -2572,7 +2614,7 @@ static ssize_t create_pmem_region_store(struct device *dev,
struct device_attribute *attr,
const char *buf, size_t len)
{
- return create_region_store(dev, buf, len, CXL_DECODER_PMEM);
+ return create_region_store(dev, buf, len, CXL_PARTMODE_PMEM);
}
DEVICE_ATTR_RW(create_pmem_region);
@@ -2580,7 +2622,7 @@ static ssize_t create_ram_region_store(struct device *dev,
struct device_attribute *attr,
const char *buf, size_t len)
{
- return create_region_store(dev, buf, len, CXL_DECODER_RAM);
+ return create_region_store(dev, buf, len, CXL_PARTMODE_RAM);
}
DEVICE_ATTR_RW(create_ram_region);
@@ -2678,7 +2720,7 @@ EXPORT_SYMBOL_NS_GPL(to_cxl_pmem_region, "CXL");
struct cxl_poison_context {
struct cxl_port *port;
- enum cxl_decoder_mode mode;
+ int part;
u64 offset;
};
@@ -2686,47 +2728,45 @@ static int cxl_get_poison_unmapped(struct cxl_memdev *cxlmd,
struct cxl_poison_context *ctx)
{
struct cxl_dev_state *cxlds = cxlmd->cxlds;
+ const struct resource *res;
+ struct resource *p, *last;
u64 offset, length;
int rc = 0;
+ if (ctx->part < 0)
+ return 0;
+
/*
- * Collect poison for the remaining unmapped resources
- * after poison is collected by committed endpoints.
- *
- * Knowing that PMEM must always follow RAM, get poison
- * for unmapped resources based on the last decoder's mode:
- * ram: scan remains of ram range, then any pmem range
- * pmem: scan remains of pmem range
+ * Collect poison for the remaining unmapped resources after
+ * poison is collected by committed endpoints decoders.
*/
-
- if (ctx->mode == CXL_DECODER_RAM) {
- offset = ctx->offset;
- length = resource_size(&cxlds->ram_res) - offset;
+ for (int i = ctx->part; i < cxlds->nr_partitions; i++) {
+ res = &cxlds->part[i].res;
+ for (p = res->child, last = NULL; p; p = p->sibling)
+ last = p;
+ if (last)
+ offset = last->end + 1;
+ else
+ offset = res->start;
+ length = res->end - offset + 1;
+ if (!length)
+ break;
rc = cxl_mem_get_poison(cxlmd, offset, length, NULL);
- if (rc == -EFAULT)
- rc = 0;
+ if (rc == -EFAULT && cxlds->part[i].mode == CXL_PARTMODE_RAM)
+ continue;
if (rc)
- return rc;
- }
- if (ctx->mode == CXL_DECODER_PMEM) {
- offset = ctx->offset;
- length = resource_size(&cxlds->dpa_res) - offset;
- if (!length)
- return 0;
- } else if (resource_size(&cxlds->pmem_res)) {
- offset = cxlds->pmem_res.start;
- length = resource_size(&cxlds->pmem_res);
- } else {
- return 0;
+ break;
}
- return cxl_mem_get_poison(cxlmd, offset, length, NULL);
+ return rc;
}
static int poison_by_decoder(struct device *dev, void *arg)
{
struct cxl_poison_context *ctx = arg;
struct cxl_endpoint_decoder *cxled;
+ enum cxl_partition_mode mode;
+ struct cxl_dev_state *cxlds;
struct cxl_memdev *cxlmd;
u64 offset, length;
int rc = 0;
@@ -2735,27 +2775,18 @@ static int poison_by_decoder(struct device *dev, void *arg)
return rc;
cxled = to_cxl_endpoint_decoder(dev);
- if (!cxled->dpa_res || !resource_size(cxled->dpa_res))
+ if (!cxled->dpa_res)
return rc;
- /*
- * Regions are only created with single mode decoders: pmem or ram.
- * Linux does not support mixed mode decoders. This means that
- * reading poison per endpoint decoder adheres to the requirement
- * that poison reads of pmem and ram must be separated.
- * CXL 3.0 Spec 8.2.9.8.4.1
- */
- if (cxled->mode == CXL_DECODER_MIXED) {
- dev_dbg(dev, "poison list read unsupported in mixed mode\n");
- return rc;
- }
-
cxlmd = cxled_to_memdev(cxled);
+ cxlds = cxlmd->cxlds;
+ mode = cxlds->part[cxled->part].mode;
+
if (cxled->skip) {
offset = cxled->dpa_res->start - cxled->skip;
length = cxled->skip;
rc = cxl_mem_get_poison(cxlmd, offset, length, NULL);
- if (rc == -EFAULT && cxled->mode == CXL_DECODER_RAM)
+ if (rc == -EFAULT && mode == CXL_PARTMODE_RAM)
rc = 0;
if (rc)
return rc;
@@ -2764,7 +2795,7 @@ static int poison_by_decoder(struct device *dev, void *arg)
offset = cxled->dpa_res->start;
length = cxled->dpa_res->end - offset + 1;
rc = cxl_mem_get_poison(cxlmd, offset, length, cxled->cxld.region);
- if (rc == -EFAULT && cxled->mode == CXL_DECODER_RAM)
+ if (rc == -EFAULT && mode == CXL_PARTMODE_RAM)
rc = 0;
if (rc)
return rc;
@@ -2772,7 +2803,7 @@ static int poison_by_decoder(struct device *dev, void *arg)
/* Iterate until commit_end is reached */
if (cxled->cxld.id == ctx->port->commit_end) {
ctx->offset = cxled->dpa_res->end + 1;
- ctx->mode = cxled->mode;
+ ctx->part = cxled->part;
return 1;
}
@@ -2785,7 +2816,8 @@ int cxl_get_poison_by_endpoint(struct cxl_port *port)
int rc = 0;
ctx = (struct cxl_poison_context) {
- .port = port
+ .port = port,
+ .part = -1,
};
rc = device_for_each_child(&port->dev, &ctx, poison_by_decoder);
@@ -2921,7 +2953,7 @@ u64 cxl_dpa_to_hpa(struct cxl_region *cxlr, const struct cxl_memdev *cxlmd,
hpa_offset |= dpa_offset & GENMASK_ULL(eig + 7, 0);
/* Apply the hpa_offset to the region base address */
- hpa = hpa_offset + p->res->start;
+ hpa = hpa_offset + p->res->start + p->cache_size;
/* Root decoder translation overrides typical modulo decode */
if (cxlrd->hpa_to_spa)
@@ -3038,17 +3070,13 @@ static struct cxl_dax_region *cxl_dax_region_alloc(struct cxl_region *cxlr)
struct cxl_dax_region *cxlr_dax;
struct device *dev;
- down_read(&cxl_region_rwsem);
- if (p->state != CXL_CONFIG_COMMIT) {
- cxlr_dax = ERR_PTR(-ENXIO);
- goto out;
- }
+ guard(rwsem_read)(&cxl_region_rwsem);
+ if (p->state != CXL_CONFIG_COMMIT)
+ return ERR_PTR(-ENXIO);
cxlr_dax = kzalloc(sizeof(*cxlr_dax), GFP_KERNEL);
- if (!cxlr_dax) {
- cxlr_dax = ERR_PTR(-ENOMEM);
- goto out;
- }
+ if (!cxlr_dax)
+ return ERR_PTR(-ENOMEM);
cxlr_dax->hpa_range.start = p->res->start;
cxlr_dax->hpa_range.end = p->res->end;
@@ -3061,8 +3089,6 @@ static struct cxl_dax_region *cxl_dax_region_alloc(struct cxl_region *cxlr)
dev->parent = &cxlr->dev;
dev->bus = &cxl_bus_type;
dev->type = &cxl_dax_region_type;
-out:
- up_read(&cxl_region_rwsem);
return cxlr_dax;
}
@@ -3189,26 +3215,54 @@ err:
return rc;
}
-static int match_root_decoder_by_range(struct device *dev,
- const void *data)
+static int match_decoder_by_range(struct device *dev, const void *data)
{
const struct range *r1, *r2 = data;
- struct cxl_root_decoder *cxlrd;
+ struct cxl_decoder *cxld;
- if (!is_root_decoder(dev))
+ if (!is_switch_decoder(dev))
return 0;
- cxlrd = to_cxl_root_decoder(dev);
- r1 = &cxlrd->cxlsd.cxld.hpa_range;
+ cxld = to_cxl_decoder(dev);
+ r1 = &cxld->hpa_range;
return range_contains(r1, r2);
}
+static struct cxl_decoder *
+cxl_port_find_switch_decoder(struct cxl_port *port, struct range *hpa)
+{
+ struct device *cxld_dev = device_find_child(&port->dev, hpa,
+ match_decoder_by_range);
+
+ return cxld_dev ? to_cxl_decoder(cxld_dev) : NULL;
+}
+
+static struct cxl_root_decoder *
+cxl_find_root_decoder(struct cxl_endpoint_decoder *cxled)
+{
+ struct cxl_memdev *cxlmd = cxled_to_memdev(cxled);
+ struct cxl_port *port = cxled_to_port(cxled);
+ struct cxl_root *cxl_root __free(put_cxl_root) = find_cxl_root(port);
+ struct cxl_decoder *root, *cxld = &cxled->cxld;
+ struct range *hpa = &cxld->hpa_range;
+
+ root = cxl_port_find_switch_decoder(&cxl_root->port, hpa);
+ if (!root) {
+ dev_err(cxlmd->dev.parent,
+ "%s:%s no CXL window for range %#llx:%#llx\n",
+ dev_name(&cxlmd->dev), dev_name(&cxld->dev),
+ cxld->hpa_range.start, cxld->hpa_range.end);
+ return NULL;
+ }
+
+ return to_cxl_root_decoder(&root->dev);
+}
+
static int match_region_by_range(struct device *dev, const void *data)
{
struct cxl_region_params *p;
struct cxl_region *cxlr;
const struct range *r = data;
- int rc = 0;
if (!is_cxl_region(dev))
return 0;
@@ -3216,60 +3270,96 @@ static int match_region_by_range(struct device *dev, const void *data)
cxlr = to_cxl_region(dev);
p = &cxlr->params;
- down_read(&cxl_region_rwsem);
+ guard(rwsem_read)(&cxl_region_rwsem);
if (p->res && p->res->start == r->start && p->res->end == r->end)
- rc = 1;
- up_read(&cxl_region_rwsem);
+ return 1;
- return rc;
+ return 0;
}
-/* Establish an empty region covering the given HPA range */
-static struct cxl_region *construct_region(struct cxl_root_decoder *cxlrd,
- struct cxl_endpoint_decoder *cxled)
+static int cxl_extended_linear_cache_resize(struct cxl_region *cxlr,
+ struct resource *res)
+{
+ struct cxl_root_decoder *cxlrd = to_cxl_root_decoder(cxlr->dev.parent);
+ struct cxl_region_params *p = &cxlr->params;
+ int nid = phys_to_target_node(res->start);
+ resource_size_t size = resource_size(res);
+ resource_size_t cache_size, start;
+ int rc;
+
+ rc = cxl_acpi_get_extended_linear_cache_size(res, nid, &cache_size);
+ if (rc)
+ return rc;
+
+ if (!cache_size)
+ return 0;
+
+ if (size != cache_size) {
+ dev_warn(&cxlr->dev,
+ "Extended Linear Cache size %pa != CXL size %pa. No Support!",
+ &cache_size, &size);
+ return -ENXIO;
+ }
+
+ /*
+ * Move the start of the range to where the cache range starts. The
+ * implementation assumes that the cache range is in front of the
+ * CXL range. This is not dictated by the HMAT spec but is how the
+ * current known implementation is configured.
+ *
+ * The cache range is expected to be within the CFMWS. The adjusted
+ * res->start should not be less than cxlrd->res->start.
+ */
+ start = res->start - cache_size;
+ if (start < cxlrd->res->start)
+ return -ENXIO;
+
+ res->start = start;
+ p->cache_size = cache_size;
+
+ return 0;
+}
+
+static int __construct_region(struct cxl_region *cxlr,
+ struct cxl_root_decoder *cxlrd,
+ struct cxl_endpoint_decoder *cxled)
{
struct cxl_memdev *cxlmd = cxled_to_memdev(cxled);
- struct cxl_port *port = cxlrd_to_port(cxlrd);
struct range *hpa = &cxled->cxld.hpa_range;
struct cxl_region_params *p;
- struct cxl_region *cxlr;
struct resource *res;
int rc;
- do {
- cxlr = __create_region(cxlrd, cxled->mode,
- atomic_read(&cxlrd->region_id));
- } while (IS_ERR(cxlr) && PTR_ERR(cxlr) == -EBUSY);
-
- if (IS_ERR(cxlr)) {
- dev_err(cxlmd->dev.parent,
- "%s:%s: %s failed assign region: %ld\n",
- dev_name(&cxlmd->dev), dev_name(&cxled->cxld.dev),
- __func__, PTR_ERR(cxlr));
- return cxlr;
- }
-
- down_write(&cxl_region_rwsem);
+ guard(rwsem_write)(&cxl_region_rwsem);
p = &cxlr->params;
if (p->state >= CXL_CONFIG_INTERLEAVE_ACTIVE) {
dev_err(cxlmd->dev.parent,
"%s:%s: %s autodiscovery interrupted\n",
dev_name(&cxlmd->dev), dev_name(&cxled->cxld.dev),
__func__);
- rc = -EBUSY;
- goto err;
+ return -EBUSY;
}
set_bit(CXL_REGION_F_AUTO, &cxlr->flags);
res = kmalloc(sizeof(*res), GFP_KERNEL);
- if (!res) {
- rc = -ENOMEM;
- goto err;
- }
+ if (!res)
+ return -ENOMEM;
*res = DEFINE_RES_MEM_NAMED(hpa->start, range_len(hpa),
dev_name(&cxlr->dev));
+
+ rc = cxl_extended_linear_cache_resize(cxlr, res);
+ if (rc && rc != -EOPNOTSUPP) {
+ /*
+ * Failing to support extended linear cache region resize does not
+ * prevent the region from functioning. Only causes cxl list showing
+ * incorrect region size.
+ */
+ dev_warn(cxlmd->dev.parent,
+ "Extended linear cache calculation failed rc:%d\n", rc);
+ }
+
rc = insert_resource(cxlrd->res, res);
if (rc) {
/*
@@ -3289,7 +3379,7 @@ static struct cxl_region *construct_region(struct cxl_root_decoder *cxlrd,
rc = sysfs_update_group(&cxlr->dev.kobj, get_cxl_region_target_group());
if (rc)
- goto err;
+ return rc;
dev_dbg(cxlmd->dev.parent, "%s:%s: %s %s res: %pr iw: %d ig: %d\n",
dev_name(&cxlmd->dev), dev_name(&cxled->cxld.dev), __func__,
@@ -3298,57 +3388,81 @@ static struct cxl_region *construct_region(struct cxl_root_decoder *cxlrd,
/* ...to match put_device() in cxl_add_to_region() */
get_device(&cxlr->dev);
- up_write(&cxl_region_rwsem);
+
+ return 0;
+}
+
+/* Establish an empty region covering the given HPA range */
+static struct cxl_region *construct_region(struct cxl_root_decoder *cxlrd,
+ struct cxl_endpoint_decoder *cxled)
+{
+ struct cxl_memdev *cxlmd = cxled_to_memdev(cxled);
+ struct cxl_port *port = cxlrd_to_port(cxlrd);
+ struct cxl_dev_state *cxlds = cxlmd->cxlds;
+ int rc, part = READ_ONCE(cxled->part);
+ struct cxl_region *cxlr;
+
+ do {
+ cxlr = __create_region(cxlrd, cxlds->part[part].mode,
+ atomic_read(&cxlrd->region_id));
+ } while (IS_ERR(cxlr) && PTR_ERR(cxlr) == -EBUSY);
+
+ if (IS_ERR(cxlr)) {
+ dev_err(cxlmd->dev.parent,
+ "%s:%s: %s failed assign region: %ld\n",
+ dev_name(&cxlmd->dev), dev_name(&cxled->cxld.dev),
+ __func__, PTR_ERR(cxlr));
+ return cxlr;
+ }
+
+ rc = __construct_region(cxlr, cxlrd, cxled);
+ if (rc) {
+ devm_release_action(port->uport_dev, unregister_region, cxlr);
+ return ERR_PTR(rc);
+ }
return cxlr;
+}
-err:
- up_write(&cxl_region_rwsem);
- devm_release_action(port->uport_dev, unregister_region, cxlr);
- return ERR_PTR(rc);
+static struct cxl_region *
+cxl_find_region_by_range(struct cxl_root_decoder *cxlrd, struct range *hpa)
+{
+ struct device *region_dev;
+
+ region_dev = device_find_child(&cxlrd->cxlsd.cxld.dev, hpa,
+ match_region_by_range);
+ if (!region_dev)
+ return NULL;
+
+ return to_cxl_region(region_dev);
}
-int cxl_add_to_region(struct cxl_port *root, struct cxl_endpoint_decoder *cxled)
+int cxl_add_to_region(struct cxl_endpoint_decoder *cxled)
{
- struct cxl_memdev *cxlmd = cxled_to_memdev(cxled);
struct range *hpa = &cxled->cxld.hpa_range;
- struct cxl_decoder *cxld = &cxled->cxld;
- struct device *cxlrd_dev, *region_dev;
- struct cxl_root_decoder *cxlrd;
struct cxl_region_params *p;
- struct cxl_region *cxlr;
bool attach = false;
int rc;
- cxlrd_dev = device_find_child(&root->dev, &cxld->hpa_range,
- match_root_decoder_by_range);
- if (!cxlrd_dev) {
- dev_err(cxlmd->dev.parent,
- "%s:%s no CXL window for range %#llx:%#llx\n",
- dev_name(&cxlmd->dev), dev_name(&cxld->dev),
- cxld->hpa_range.start, cxld->hpa_range.end);
+ struct cxl_root_decoder *cxlrd __free(put_cxl_root_decoder) =
+ cxl_find_root_decoder(cxled);
+ if (!cxlrd)
return -ENXIO;
- }
-
- cxlrd = to_cxl_root_decoder(cxlrd_dev);
/*
* Ensure that if multiple threads race to construct_region() for @hpa
* one does the construction and the others add to that.
*/
mutex_lock(&cxlrd->range_lock);
- region_dev = device_find_child(&cxlrd->cxlsd.cxld.dev, hpa,
- match_region_by_range);
- if (!region_dev) {
+ struct cxl_region *cxlr __free(put_cxl_region) =
+ cxl_find_region_by_range(cxlrd, hpa);
+ if (!cxlr)
cxlr = construct_region(cxlrd, cxled);
- region_dev = &cxlr->dev;
- } else
- cxlr = to_cxl_region(region_dev);
mutex_unlock(&cxlrd->range_lock);
rc = PTR_ERR_OR_ZERO(cxlr);
if (rc)
- goto out;
+ return rc;
attach_target(cxlr, cxled, -1, TASK_UNINTERRUPTIBLE);
@@ -3368,13 +3482,38 @@ int cxl_add_to_region(struct cxl_port *root, struct cxl_endpoint_decoder *cxled)
p->res);
}
- put_device(region_dev);
-out:
- put_device(cxlrd_dev);
return rc;
}
EXPORT_SYMBOL_NS_GPL(cxl_add_to_region, "CXL");
+u64 cxl_port_get_spa_cache_alias(struct cxl_port *endpoint, u64 spa)
+{
+ struct cxl_region_ref *iter;
+ unsigned long index;
+
+ if (!endpoint)
+ return ~0ULL;
+
+ guard(rwsem_write)(&cxl_region_rwsem);
+
+ xa_for_each(&endpoint->regions, index, iter) {
+ struct cxl_region_params *p = &iter->region->params;
+
+ if (p->res->start <= spa && spa <= p->res->end) {
+ if (!p->cache_size)
+ return ~0ULL;
+
+ if (spa >= p->res->start + p->cache_size)
+ return spa - p->cache_size;
+
+ return spa + p->cache_size;
+ }
+ }
+
+ return ~0ULL;
+}
+EXPORT_SYMBOL_NS_GPL(cxl_port_get_spa_cache_alias, "CXL");
+
static int is_system_ram(struct resource *res, void *arg)
{
struct cxl_region *cxlr = arg;
@@ -3440,9 +3579,19 @@ out:
return rc;
switch (cxlr->mode) {
- case CXL_DECODER_PMEM:
+ case CXL_PARTMODE_PMEM:
+ rc = devm_cxl_region_edac_register(cxlr);
+ if (rc)
+ dev_dbg(&cxlr->dev, "CXL EDAC registration for region_id=%d failed\n",
+ cxlr->id);
+
return devm_cxl_add_pmem_region(cxlr);
- case CXL_DECODER_RAM:
+ case CXL_PARTMODE_RAM:
+ rc = devm_cxl_region_edac_register(cxlr);
+ if (rc)
+ dev_dbg(&cxlr->dev, "CXL EDAC registration for region_id=%d failed\n",
+ cxlr->id);
+
/*
* The region can not be manged by CXL if any portion of
* it is already online as 'System RAM'
diff --git a/drivers/cxl/core/regs.c b/drivers/cxl/core/regs.c
index 117c2e94c761..5ca7b0eed568 100644
--- a/drivers/cxl/core/regs.c
+++ b/drivers/cxl/core/regs.c
@@ -581,7 +581,6 @@ resource_size_t __rcrb_to_component(struct device *dev, struct cxl_rcrb_info *ri
resource_size_t rcrb = ri->base;
void __iomem *addr;
u32 bar0, bar1;
- u16 cmd;
u32 id;
if (which == CXL_RCRB_UPSTREAM)
@@ -603,7 +602,6 @@ resource_size_t __rcrb_to_component(struct device *dev, struct cxl_rcrb_info *ri
}
id = readl(addr + PCI_VENDOR_ID);
- cmd = readw(addr + PCI_COMMAND);
bar0 = readl(addr + PCI_BASE_ADDRESS_0);
bar1 = readl(addr + PCI_BASE_ADDRESS_1);
iounmap(addr);
@@ -618,8 +616,6 @@ resource_size_t __rcrb_to_component(struct device *dev, struct cxl_rcrb_info *ri
dev_err(dev, "Failed to access Downstream Port RCRB\n");
return CXL_RESOURCE_NONE;
}
- if (!(cmd & PCI_COMMAND_MEMORY))
- return CXL_RESOURCE_NONE;
/* The RCRB is a Memory Window, and the MEM_TYPE_1M bit is obsolete */
if (bar0 & (PCI_BASE_ADDRESS_MEM_TYPE_1M | PCI_BASE_ADDRESS_SPACE_IO))
return CXL_RESOURCE_NONE;
diff --git a/drivers/cxl/core/trace.h b/drivers/cxl/core/trace.h
index cea706b683b5..25ebfbc1616c 100644
--- a/drivers/cxl/core/trace.h
+++ b/drivers/cxl/core/trace.h
@@ -48,6 +48,34 @@
{ CXL_RAS_UC_IDE_RX_ERR, "IDE Rx Error" } \
)
+TRACE_EVENT(cxl_port_aer_uncorrectable_error,
+ TP_PROTO(struct device *dev, u32 status, u32 fe, u32 *hl),
+ TP_ARGS(dev, status, fe, hl),
+ TP_STRUCT__entry(
+ __string(device, dev_name(dev))
+ __string(host, dev_name(dev->parent))
+ __field(u32, status)
+ __field(u32, first_error)
+ __array(u32, header_log, CXL_HEADERLOG_SIZE_U32)
+ ),
+ TP_fast_assign(
+ __assign_str(device);
+ __assign_str(host);
+ __entry->status = status;
+ __entry->first_error = fe;
+ /*
+ * Embed the 512B headerlog data for user app retrieval and
+ * parsing, but no need to print this in the trace buffer.
+ */
+ memcpy(__entry->header_log, hl, CXL_HEADERLOG_SIZE);
+ ),
+ TP_printk("device=%s host=%s status: '%s' first_error: '%s'",
+ __get_str(device), __get_str(host),
+ show_uc_errs(__entry->status),
+ show_uc_errs(__entry->first_error)
+ )
+);
+
TRACE_EVENT(cxl_aer_uncorrectable_error,
TP_PROTO(const struct cxl_memdev *cxlmd, u32 status, u32 fe, u32 *hl),
TP_ARGS(cxlmd, status, fe, hl),
@@ -96,6 +124,25 @@ TRACE_EVENT(cxl_aer_uncorrectable_error,
{ CXL_RAS_CE_PHYS_LAYER_ERR, "Received Error From Physical Layer" } \
)
+TRACE_EVENT(cxl_port_aer_correctable_error,
+ TP_PROTO(struct device *dev, u32 status),
+ TP_ARGS(dev, status),
+ TP_STRUCT__entry(
+ __string(device, dev_name(dev))
+ __string(host, dev_name(dev->parent))
+ __field(u32, status)
+ ),
+ TP_fast_assign(
+ __assign_str(device);
+ __assign_str(host);
+ __entry->status = status;
+ ),
+ TP_printk("device=%s host=%s status='%s'",
+ __get_str(device), __get_str(host),
+ show_ce_errs(__entry->status)
+ )
+);
+
TRACE_EVENT(cxl_aer_correctable_error,
TP_PROTO(const struct cxl_memdev *cxlmd, u32 status),
TP_ARGS(cxlmd, status),
@@ -392,9 +439,10 @@ TRACE_EVENT(cxl_generic_event,
TRACE_EVENT(cxl_general_media,
TP_PROTO(const struct cxl_memdev *cxlmd, enum cxl_event_log_type log,
- struct cxl_region *cxlr, u64 hpa, struct cxl_event_gen_media *rec),
+ struct cxl_region *cxlr, u64 hpa, u64 hpa_alias0,
+ struct cxl_event_gen_media *rec),
- TP_ARGS(cxlmd, log, cxlr, hpa, rec),
+ TP_ARGS(cxlmd, log, cxlr, hpa, hpa_alias0, rec),
TP_STRUCT__entry(
CXL_EVT_TP_entry
@@ -408,6 +456,7 @@ TRACE_EVENT(cxl_general_media,
__array(u8, comp_id, CXL_EVENT_GEN_MED_COMP_ID_SIZE)
/* Following are out of order to pack trace record */
__field(u64, hpa)
+ __field(u64, hpa_alias0)
__field_struct(uuid_t, region_uuid)
__field(u16, validity_flags)
__field(u8, rank)
@@ -438,6 +487,7 @@ TRACE_EVENT(cxl_general_media,
CXL_EVENT_GEN_MED_COMP_ID_SIZE);
__entry->validity_flags = get_unaligned_le16(&rec->media_hdr.validity_flags);
__entry->hpa = hpa;
+ __entry->hpa_alias0 = hpa_alias0;
if (cxlr) {
__assign_str(region_name);
uuid_copy(&__entry->region_uuid, &cxlr->params.uuid);
@@ -455,7 +505,7 @@ TRACE_EVENT(cxl_general_media,
"device=%x validity_flags='%s' " \
"comp_id=%s comp_id_pldm_valid_flags='%s' " \
"pldm_entity_id=%s pldm_resource_id=%s " \
- "hpa=%llx region=%s region_uuid=%pUb " \
+ "hpa=%llx hpa_alias0=%llx region=%s region_uuid=%pUb " \
"cme_threshold_ev_flags='%s' cme_count=%u",
__entry->dpa, show_dpa_flags(__entry->dpa_flags),
show_event_desc_flags(__entry->descriptor),
@@ -470,7 +520,7 @@ TRACE_EVENT(cxl_general_media,
CXL_GMER_VALID_COMPONENT_ID_FORMAT, __entry->comp_id),
show_pldm_resource_id(__entry->validity_flags, CXL_GMER_VALID_COMPONENT,
CXL_GMER_VALID_COMPONENT_ID_FORMAT, __entry->comp_id),
- __entry->hpa, __get_str(region_name), &__entry->region_uuid,
+ __entry->hpa, __entry->hpa_alias0, __get_str(region_name), &__entry->region_uuid,
show_cme_threshold_ev_flags(__entry->cme_threshold_ev_flags), __entry->cme_count
)
);
@@ -529,9 +579,10 @@ TRACE_EVENT(cxl_general_media,
TRACE_EVENT(cxl_dram,
TP_PROTO(const struct cxl_memdev *cxlmd, enum cxl_event_log_type log,
- struct cxl_region *cxlr, u64 hpa, struct cxl_event_dram *rec),
+ struct cxl_region *cxlr, u64 hpa, u64 hpa_alias0,
+ struct cxl_event_dram *rec),
- TP_ARGS(cxlmd, log, cxlr, hpa, rec),
+ TP_ARGS(cxlmd, log, cxlr, hpa, hpa_alias0, rec),
TP_STRUCT__entry(
CXL_EVT_TP_entry
@@ -547,6 +598,7 @@ TRACE_EVENT(cxl_dram,
__field(u32, row)
__array(u8, cor_mask, CXL_EVENT_DER_CORRECTION_MASK_SIZE)
__field(u64, hpa)
+ __field(u64, hpa_alias0)
__field_struct(uuid_t, region_uuid)
__field(u8, rank) /* Out of order to pack trace record */
__field(u8, bank_group) /* Out of order to pack trace record */
@@ -584,6 +636,7 @@ TRACE_EVENT(cxl_dram,
memcpy(__entry->cor_mask, &rec->correction_mask,
CXL_EVENT_DER_CORRECTION_MASK_SIZE);
__entry->hpa = hpa;
+ __entry->hpa_alias0 = hpa_alias0;
if (cxlr) {
__assign_str(region_name);
uuid_copy(&__entry->region_uuid, &cxlr->params.uuid);
@@ -604,7 +657,7 @@ TRACE_EVENT(cxl_dram,
"validity_flags='%s' " \
"comp_id=%s comp_id_pldm_valid_flags='%s' " \
"pldm_entity_id=%s pldm_resource_id=%s " \
- "hpa=%llx region=%s region_uuid=%pUb " \
+ "hpa=%llx hpa_alias0=%llx region=%s region_uuid=%pUb " \
"sub_channel=%u cme_threshold_ev_flags='%s' cvme_count=%u",
__entry->dpa, show_dpa_flags(__entry->dpa_flags),
show_event_desc_flags(__entry->descriptor),
@@ -622,7 +675,7 @@ TRACE_EVENT(cxl_dram,
CXL_DER_VALID_COMPONENT_ID_FORMAT, __entry->comp_id),
show_pldm_resource_id(__entry->validity_flags, CXL_DER_VALID_COMPONENT,
CXL_DER_VALID_COMPONENT_ID_FORMAT, __entry->comp_id),
- __entry->hpa, __get_str(region_name), &__entry->region_uuid,
+ __entry->hpa, __entry->hpa_alias0, __get_str(region_name), &__entry->region_uuid,
__entry->sub_channel, show_cme_threshold_ev_flags(__entry->cme_threshold_ev_flags),
__entry->cvme_count
)
@@ -870,6 +923,7 @@ TRACE_EVENT(cxl_poison,
__string(region, cxlr ? dev_name(&cxlr->dev) : "")
__field(u64, overflow_ts)
__field(u64, hpa)
+ __field(u64, hpa_alias0)
__field(u64, dpa)
__field(u32, dpa_length)
__array(char, uuid, 16)
@@ -892,16 +946,22 @@ TRACE_EVENT(cxl_poison,
memcpy(__entry->uuid, &cxlr->params.uuid, 16);
__entry->hpa = cxl_dpa_to_hpa(cxlr, cxlmd,
__entry->dpa);
+ if (__entry->hpa != ULLONG_MAX && cxlr->params.cache_size)
+ __entry->hpa_alias0 = __entry->hpa +
+ cxlr->params.cache_size;
+ else
+ __entry->hpa_alias0 = ULLONG_MAX;
} else {
__assign_str(region);
memset(__entry->uuid, 0, 16);
__entry->hpa = ULLONG_MAX;
+ __entry->hpa_alias0 = ULLONG_MAX;
}
),
TP_printk("memdev=%s host=%s serial=%lld trace_type=%s region=%s " \
- "region_uuid=%pU hpa=0x%llx dpa=0x%llx dpa_length=0x%x " \
- "source=%s flags=%s overflow_time=%llu",
+ "region_uuid=%pU hpa=0x%llx hpa_alias0=0x%llx dpa=0x%llx " \
+ "dpa_length=0x%x source=%s flags=%s overflow_time=%llu",
__get_str(memdev),
__get_str(host),
__entry->serial,
@@ -909,6 +969,7 @@ TRACE_EVENT(cxl_poison,
__get_str(region),
__entry->uuid,
__entry->hpa,
+ __entry->hpa_alias0,
__entry->dpa,
__entry->dpa_length,
show_poison_source(__entry->source),
diff --git a/drivers/cxl/cxl.h b/drivers/cxl/cxl.h
index bbbaa0d0a670..3f1695c96abc 100644
--- a/drivers/cxl/cxl.h
+++ b/drivers/cxl/cxl.h
@@ -373,32 +373,6 @@ struct cxl_decoder {
};
/*
- * CXL_DECODER_DEAD prevents endpoints from being reattached to regions
- * while cxld_unregister() is running
- */
-enum cxl_decoder_mode {
- CXL_DECODER_NONE,
- CXL_DECODER_RAM,
- CXL_DECODER_PMEM,
- CXL_DECODER_MIXED,
- CXL_DECODER_DEAD,
-};
-
-static inline const char *cxl_decoder_mode_name(enum cxl_decoder_mode mode)
-{
- static const char * const names[] = {
- [CXL_DECODER_NONE] = "none",
- [CXL_DECODER_RAM] = "ram",
- [CXL_DECODER_PMEM] = "pmem",
- [CXL_DECODER_MIXED] = "mixed",
- };
-
- if (mode >= CXL_DECODER_NONE && mode <= CXL_DECODER_MIXED)
- return names[mode];
- return "mixed";
-}
-
-/*
* Track whether this decoder is reserved for region autodiscovery, or
* free for userspace provisioning.
*/
@@ -412,16 +386,16 @@ enum cxl_decoder_state {
* @cxld: base cxl_decoder_object
* @dpa_res: actively claimed DPA span of this decoder
* @skip: offset into @dpa_res where @cxld.hpa_range maps
- * @mode: which memory type / access-mode-partition this decoder targets
* @state: autodiscovery state
+ * @part: partition index this decoder maps
* @pos: interleave position in @cxld.region
*/
struct cxl_endpoint_decoder {
struct cxl_decoder cxld;
struct resource *dpa_res;
resource_size_t skip;
- enum cxl_decoder_mode mode;
enum cxl_decoder_state state;
+ int part;
int pos;
};
@@ -493,6 +467,7 @@ enum cxl_config_state {
* @res: allocated iomem capacity for this region
* @targets: active ordered targets in current decoder configuration
* @nr_targets: number of targets
+ * @cache_size: extended linear cache size if exists, otherwise zero.
*
* State transitions are protected by the cxl_region_rwsem
*/
@@ -504,6 +479,12 @@ struct cxl_region_params {
struct resource *res;
struct cxl_endpoint_decoder *targets[CXL_DECODER_MAX_INTERLEAVE];
int nr_targets;
+ resource_size_t cache_size;
+};
+
+enum cxl_partition_mode {
+ CXL_PARTMODE_RAM,
+ CXL_PARTMODE_PMEM,
};
/*
@@ -525,7 +506,7 @@ struct cxl_region_params {
* struct cxl_region - CXL region
* @dev: This region's device
* @id: This region's id. Id is globally unique across all regions
- * @mode: Endpoint decoder allocation / access mode
+ * @mode: Operational mode of the mapped capacity
* @type: Endpoint decoder target type
* @cxl_nvb: nvdimm bridge for coordinating @cxlr_pmem setup / shutdown
* @cxlr_pmem: (for pmem regions) cached copy of the nvdimm bridge
@@ -538,7 +519,7 @@ struct cxl_region_params {
struct cxl_region {
struct device dev;
int id;
- enum cxl_decoder_mode mode;
+ enum cxl_partition_mode mode;
enum cxl_decoder_type type;
struct cxl_nvdimm_bridge *cxl_nvb;
struct cxl_pmem_region *cxlr_pmem;
@@ -563,6 +544,7 @@ struct cxl_nvdimm {
struct device dev;
struct cxl_memdev *cxlmd;
u8 dev_id[CXL_DEV_ID_LEN]; /* for nvdimm, string of 'serial' */
+ u64 dirty_shutdowns;
};
struct cxl_pmem_region_mapping {
@@ -680,6 +662,7 @@ struct cxl_rcrb_info {
* @regs: Dport parsed register blocks
* @coord: access coordinates (bandwidth and latency performance attributes)
* @link_latency: calculated PCIe downstream latency
+ * @gpf_dvsec: Cached GPF port DVSEC
*/
struct cxl_dport {
struct device *dport_dev;
@@ -691,6 +674,7 @@ struct cxl_dport {
struct cxl_regs regs;
struct access_coordinate coord[ACCESS_COORDINATE_MAX];
long link_latency;
+ int gpf_dvsec;
};
/**
@@ -740,6 +724,7 @@ static inline bool is_cxl_root(struct cxl_port *port)
int cxl_num_decoders_committed(struct cxl_port *port);
bool is_cxl_port(const struct device *dev);
struct cxl_port *to_cxl_port(const struct device *dev);
+struct cxl_port *parent_port_of(struct cxl_port *port);
void cxl_port_commit_reap(struct cxl_decoder *cxld);
struct pci_bus;
int devm_cxl_register_pci_bus(struct device *host, struct device *uport_dev,
@@ -752,10 +737,12 @@ struct cxl_port *devm_cxl_add_port(struct device *host,
struct cxl_root *devm_cxl_add_root(struct device *host,
const struct cxl_root_ops *ops);
struct cxl_root *find_cxl_root(struct cxl_port *port);
-void put_cxl_root(struct cxl_root *cxl_root);
-DEFINE_FREE(put_cxl_root, struct cxl_root *, if (_T) put_cxl_root(_T))
+DEFINE_FREE(put_cxl_root, struct cxl_root *, if (_T) put_device(&_T->port.dev))
DEFINE_FREE(put_cxl_port, struct cxl_port *, if (!IS_ERR_OR_NULL(_T)) put_device(&_T->dev))
+DEFINE_FREE(put_cxl_root_decoder, struct cxl_root_decoder *, if (!IS_ERR_OR_NULL(_T)) put_device(&_T->cxlsd.cxld.dev))
+DEFINE_FREE(put_cxl_region, struct cxl_region *, if (!IS_ERR_OR_NULL(_T)) put_device(&_T->dev))
+
int devm_cxl_enumerate_ports(struct cxl_memdev *cxlmd);
void cxl_bus_rescan(void);
void cxl_bus_drain(void);
@@ -872,9 +859,9 @@ struct cxl_nvdimm_bridge *cxl_find_nvdimm_bridge(struct cxl_port *port);
#ifdef CONFIG_CXL_REGION
bool is_cxl_pmem_region(struct device *dev);
struct cxl_pmem_region *to_cxl_pmem_region(struct device *dev);
-int cxl_add_to_region(struct cxl_port *root,
- struct cxl_endpoint_decoder *cxled);
+int cxl_add_to_region(struct cxl_endpoint_decoder *cxled);
struct cxl_dax_region *to_cxl_dax_region(struct device *dev);
+u64 cxl_port_get_spa_cache_alias(struct cxl_port *endpoint, u64 spa);
#else
static inline bool is_cxl_pmem_region(struct device *dev)
{
@@ -884,8 +871,7 @@ static inline struct cxl_pmem_region *to_cxl_pmem_region(struct device *dev)
{
return NULL;
}
-static inline int cxl_add_to_region(struct cxl_port *root,
- struct cxl_endpoint_decoder *cxled)
+static inline int cxl_add_to_region(struct cxl_endpoint_decoder *cxled)
{
return 0;
}
@@ -893,6 +879,11 @@ static inline struct cxl_dax_region *to_cxl_dax_region(struct device *dev)
{
return NULL;
}
+static inline u64 cxl_port_get_spa_cache_alias(struct cxl_port *endpoint,
+ u64 spa)
+{
+ return 0;
+}
#endif
void cxl_endpoint_parse_cdat(struct cxl_port *port);
@@ -920,4 +911,16 @@ bool cxl_endpoint_decoder_reset_detected(struct cxl_port *port);
#define __mock static
#endif
+u16 cxl_gpf_get_dvsec(struct device *dev);
+
+static inline struct rw_semaphore *rwsem_read_intr_acquire(struct rw_semaphore *rwsem)
+{
+ if (down_read_interruptible(rwsem))
+ return NULL;
+
+ return rwsem;
+}
+
+DEFINE_FREE(rwsem_read_release, struct rw_semaphore *, if (_T) up_read(_T))
+
#endif /* __CXL_H__ */
diff --git a/drivers/cxl/cxlmem.h b/drivers/cxl/cxlmem.h
index 2a25d1957ddb..551b0ba2caa1 100644
--- a/drivers/cxl/cxlmem.h
+++ b/drivers/cxl/cxlmem.h
@@ -45,6 +45,11 @@
* @endpoint: connection to the CXL port topology for this memory device
* @id: id number of this memdev instance.
* @depth: endpoint port depth
+ * @scrub_cycle: current scrub cycle set for this device
+ * @scrub_region_id: id number of a backed region (if any) for which current scrub cycle set
+ * @err_rec_array: List of xarrarys to store the memdev error records to
+ * check attributes for a memory repair operation are from
+ * current boot.
*/
struct cxl_memdev {
struct device dev;
@@ -56,6 +61,9 @@ struct cxl_memdev {
struct cxl_port *endpoint;
int id;
int depth;
+ u8 scrub_cycle;
+ int scrub_region_id;
+ void *err_rec_array;
};
static inline struct cxl_memdev *to_cxl_memdev(struct device *dev)
@@ -97,6 +105,19 @@ int devm_cxl_dpa_reserve(struct cxl_endpoint_decoder *cxled,
resource_size_t base, resource_size_t len,
resource_size_t skipped);
+#define CXL_NR_PARTITIONS_MAX 2
+
+struct cxl_dpa_info {
+ u64 size;
+ struct cxl_dpa_part_info {
+ struct range range;
+ enum cxl_partition_mode mode;
+ } part[CXL_NR_PARTITIONS_MAX];
+ int nr_partitions;
+};
+
+int cxl_dpa_setup(struct cxl_dev_state *cxlds, const struct cxl_dpa_info *info);
+
static inline struct cxl_ep *cxl_ep_load(struct cxl_port *port,
struct cxl_memdev *cxlmd)
{
@@ -106,42 +127,6 @@ static inline struct cxl_ep *cxl_ep_load(struct cxl_port *port,
return xa_load(&port->endpoints, (unsigned long)&cxlmd->dev);
}
-/**
- * struct cxl_mbox_cmd - A command to be submitted to hardware.
- * @opcode: (input) The command set and command submitted to hardware.
- * @payload_in: (input) Pointer to the input payload.
- * @payload_out: (output) Pointer to the output payload. Must be allocated by
- * the caller.
- * @size_in: (input) Number of bytes to load from @payload_in.
- * @size_out: (input) Max number of bytes loaded into @payload_out.
- * (output) Number of bytes generated by the device. For fixed size
- * outputs commands this is always expected to be deterministic. For
- * variable sized output commands, it tells the exact number of bytes
- * written.
- * @min_out: (input) internal command output payload size validation
- * @poll_count: (input) Number of timeouts to attempt.
- * @poll_interval_ms: (input) Time between mailbox background command polling
- * interval timeouts.
- * @return_code: (output) Error code returned from hardware.
- *
- * This is the primary mechanism used to send commands to the hardware.
- * All the fields except @payload_* correspond exactly to the fields described in
- * Command Register section of the CXL 2.0 8.2.8.4.5. @payload_in and
- * @payload_out are written to, and read from the Command Payload Registers
- * defined in CXL 2.0 8.2.8.4.8.
- */
-struct cxl_mbox_cmd {
- u16 opcode;
- void *payload_in;
- void *payload_out;
- size_t size_in;
- size_t size_out;
- size_t min_out;
- int poll_count;
- int poll_interval_ms;
- u16 return_code;
-};
-
/*
* Per CXL 3.0 Section 8.2.8.4.5.1
*/
@@ -409,6 +394,18 @@ struct cxl_dpa_perf {
};
/**
+ * struct cxl_dpa_partition - DPA partition descriptor
+ * @res: shortcut to the partition in the DPA resource tree (cxlds->dpa_res)
+ * @perf: performance attributes of the partition from CDAT
+ * @mode: operation mode for the DPA capacity, e.g. ram, pmem, dynamic...
+ */
+struct cxl_dpa_partition {
+ struct resource res;
+ struct cxl_dpa_perf perf;
+ enum cxl_partition_mode mode;
+};
+
+/**
* struct cxl_dev_state - The driver device state
*
* cxl_dev_state represents the CXL driver/device state. It provides an
@@ -423,11 +420,12 @@ struct cxl_dpa_perf {
* @rcd: operating in RCD mode (CXL 3.0 9.11.8 CXL Devices Attached to an RCH)
* @media_ready: Indicate whether the device media is usable
* @dpa_res: Overall DPA resource tree for the device
- * @pmem_res: Active Persistent memory capacity configuration
- * @ram_res: Active Volatile memory capacity configuration
+ * @part: DPA partition array
+ * @nr_partitions: Number of DPA partitions
* @serial: PCIe Device Serial Number
* @type: Generic Memory Class device or Vendor Specific Memory device
* @cxl_mbox: CXL mailbox context
+ * @cxlfs: CXL features context
*/
struct cxl_dev_state {
struct device *dev;
@@ -438,13 +436,28 @@ struct cxl_dev_state {
bool rcd;
bool media_ready;
struct resource dpa_res;
- struct resource pmem_res;
- struct resource ram_res;
+ struct cxl_dpa_partition part[CXL_NR_PARTITIONS_MAX];
+ unsigned int nr_partitions;
u64 serial;
enum cxl_devtype type;
struct cxl_mailbox cxl_mbox;
+#ifdef CONFIG_CXL_FEATURES
+ struct cxl_features_state *cxlfs;
+#endif
};
+static inline resource_size_t cxl_pmem_size(struct cxl_dev_state *cxlds)
+{
+ /*
+ * Static PMEM may be at partition index 0 when there is no static RAM
+ * capacity.
+ */
+ for (int i = 0; i < cxlds->nr_partitions; i++)
+ if (cxlds->part[i].mode == CXL_PARTMODE_PMEM)
+ return resource_size(&cxlds->part[i].res);
+ return 0;
+}
+
static inline struct cxl_dev_state *mbox_to_cxlds(struct cxl_mailbox *cxl_mbox)
{
return dev_get_drvdata(cxl_mbox->host);
@@ -461,22 +474,17 @@ static inline struct cxl_dev_state *mbox_to_cxlds(struct cxl_mailbox *cxl_mbox)
* @lsa_size: Size of Label Storage Area
* (CXL 2.0 8.2.9.5.1.1 Identify Memory Device)
* @firmware_version: Firmware version for the memory device.
- * @enabled_cmds: Hardware commands found enabled in CEL.
- * @exclusive_cmds: Commands that are kernel-internal only
* @total_bytes: sum of all possible capacities
* @volatile_only_bytes: hard volatile capacity
* @persistent_only_bytes: hard persistent capacity
* @partition_align_bytes: alignment size for partition-able capacity
* @active_volatile_bytes: sum of hard + soft volatile
* @active_persistent_bytes: sum of hard + soft persistent
- * @next_volatile_bytes: volatile capacity change pending device reset
- * @next_persistent_bytes: persistent capacity change pending device reset
- * @ram_perf: performance data entry matched to RAM partition
- * @pmem_perf: performance data entry matched to PMEM partition
* @event: event log driver state
* @poison: poison driver state info
* @security: security driver state info
* @fw: firmware upload / activation state
+ * @mce_notifier: MCE notifier
*
* See CXL 3.0 8.2.9.8.2 Capacity Configuration and Label Storage for
* details on capacity parameters.
@@ -485,24 +493,18 @@ struct cxl_memdev_state {
struct cxl_dev_state cxlds;
size_t lsa_size;
char firmware_version[0x10];
- DECLARE_BITMAP(enabled_cmds, CXL_MEM_COMMAND_ID_MAX);
- DECLARE_BITMAP(exclusive_cmds, CXL_MEM_COMMAND_ID_MAX);
u64 total_bytes;
u64 volatile_only_bytes;
u64 persistent_only_bytes;
u64 partition_align_bytes;
u64 active_volatile_bytes;
u64 active_persistent_bytes;
- u64 next_volatile_bytes;
- u64 next_persistent_bytes;
-
- struct cxl_dpa_perf ram_perf;
- struct cxl_dpa_perf pmem_perf;
struct cxl_event_state event;
struct cxl_poison_state poison;
struct cxl_security_state security;
struct cxl_fw_state fw;
+ struct notifier_block mce_notifier;
};
static inline struct cxl_memdev_state *
@@ -530,6 +532,10 @@ enum cxl_opcode {
CXL_MBOX_OP_GET_LOG_CAPS = 0x0402,
CXL_MBOX_OP_CLEAR_LOG = 0x0403,
CXL_MBOX_OP_GET_SUP_LOG_SUBLIST = 0x0405,
+ CXL_MBOX_OP_GET_SUPPORTED_FEATURES = 0x0500,
+ CXL_MBOX_OP_GET_FEATURE = 0x0501,
+ CXL_MBOX_OP_SET_FEATURE = 0x0502,
+ CXL_MBOX_OP_DO_MAINTENANCE = 0x0600,
CXL_MBOX_OP_IDENTIFY = 0x4000,
CXL_MBOX_OP_GET_PARTITION_INFO = 0x4100,
CXL_MBOX_OP_SET_PARTITION_INFO = 0x4101,
@@ -693,6 +699,23 @@ struct cxl_mbox_set_partition_info {
#define CXL_SET_PARTITION_IMMEDIATE_FLAG BIT(0)
+/* Get Health Info Output Payload CXL 3.2 Spec 8.2.10.9.3.1 Table 8-148 */
+struct cxl_mbox_get_health_info_out {
+ u8 health_status;
+ u8 media_status;
+ u8 additional_status;
+ u8 life_used;
+ __le16 device_temperature;
+ __le32 dirty_shutdown_cnt;
+ __le32 corrected_volatile_error_cnt;
+ __le32 corrected_persistent_error_cnt;
+} __packed;
+
+/* Set Shutdown State Input Payload CXL 3.2 Spec 8.2.10.9.3.5 Table 8-152 */
+struct cxl_mbox_set_shutdown_state_in {
+ u8 state;
+} __packed;
+
/* Set Timestamp CXL 3.0 Spec 8.2.9.4.2 */
struct cxl_mbox_set_timestamp_in {
__le64 timestamp;
@@ -818,7 +841,7 @@ int cxl_internal_send_cmd(struct cxl_mailbox *cxl_mbox,
int cxl_dev_state_identify(struct cxl_memdev_state *mds);
int cxl_await_media_ready(struct cxl_dev_state *cxlds);
int cxl_enumerate_cmds(struct cxl_memdev_state *mds);
-int cxl_mem_create_range_info(struct cxl_memdev_state *mds);
+int cxl_mem_dpa_fetch(struct cxl_memdev_state *mds, struct cxl_dpa_info *info);
struct cxl_memdev_state *cxl_memdev_state_create(struct device *dev);
void set_exclusive_cxl_commands(struct cxl_memdev_state *mds,
unsigned long *cmds);
@@ -829,6 +852,8 @@ void cxl_event_trace_record(const struct cxl_memdev *cxlmd,
enum cxl_event_log_type type,
enum cxl_event_type event_type,
const uuid_t *uuid, union cxl_event *evt);
+int cxl_get_dirty_count(struct cxl_memdev_state *mds, u32 *count);
+int cxl_arm_dirty_shutdown(struct cxl_memdev_state *mds);
int cxl_set_timestamp(struct cxl_memdev_state *mds);
int cxl_poison_state_init(struct cxl_memdev_state *mds);
int cxl_mem_get_poison(struct cxl_memdev *cxlmd, u64 offset, u64 len,
@@ -837,6 +862,27 @@ int cxl_trigger_poison_list(struct cxl_memdev *cxlmd);
int cxl_inject_poison(struct cxl_memdev *cxlmd, u64 dpa);
int cxl_clear_poison(struct cxl_memdev *cxlmd, u64 dpa);
+#ifdef CONFIG_CXL_EDAC_MEM_FEATURES
+int devm_cxl_memdev_edac_register(struct cxl_memdev *cxlmd);
+int devm_cxl_region_edac_register(struct cxl_region *cxlr);
+int cxl_store_rec_gen_media(struct cxl_memdev *cxlmd, union cxl_event *evt);
+int cxl_store_rec_dram(struct cxl_memdev *cxlmd, union cxl_event *evt);
+void devm_cxl_memdev_edac_release(struct cxl_memdev *cxlmd);
+#else
+static inline int devm_cxl_memdev_edac_register(struct cxl_memdev *cxlmd)
+{ return 0; }
+static inline int devm_cxl_region_edac_register(struct cxl_region *cxlr)
+{ return 0; }
+static inline int cxl_store_rec_gen_media(struct cxl_memdev *cxlmd,
+ union cxl_event *evt)
+{ return 0; }
+static inline int cxl_store_rec_dram(struct cxl_memdev *cxlmd,
+ union cxl_event *evt)
+{ return 0; }
+static inline void devm_cxl_memdev_edac_release(struct cxl_memdev *cxlmd)
+{ return; }
+#endif
+
#ifdef CONFIG_CXL_SUSPEND
void cxl_mem_active_inc(void);
void cxl_mem_active_dec(void);
diff --git a/drivers/cxl/cxlpci.h b/drivers/cxl/cxlpci.h
index 4da07727ab9c..54e219b0049e 100644
--- a/drivers/cxl/cxlpci.h
+++ b/drivers/cxl/cxlpci.h
@@ -40,6 +40,12 @@
/* CXL 2.0 8.1.6: GPF DVSEC for CXL Port */
#define CXL_DVSEC_PORT_GPF 4
+#define CXL_DVSEC_PORT_GPF_PHASE_1_CONTROL_OFFSET 0x0C
+#define CXL_DVSEC_PORT_GPF_PHASE_1_TMO_BASE_MASK GENMASK(3, 0)
+#define CXL_DVSEC_PORT_GPF_PHASE_1_TMO_SCALE_MASK GENMASK(11, 8)
+#define CXL_DVSEC_PORT_GPF_PHASE_2_CONTROL_OFFSET 0xE
+#define CXL_DVSEC_PORT_GPF_PHASE_2_TMO_BASE_MASK GENMASK(3, 0)
+#define CXL_DVSEC_PORT_GPF_PHASE_2_TMO_SCALE_MASK GENMASK(11, 8)
/* CXL 2.0 8.1.7: GPF DVSEC for CXL Device */
#define CXL_DVSEC_DEVICE_GPF 5
diff --git a/drivers/cxl/mem.c b/drivers/cxl/mem.c
index 2f03a4d5606e..6e6777b7bafb 100644
--- a/drivers/cxl/mem.c
+++ b/drivers/cxl/mem.c
@@ -152,7 +152,7 @@ static int cxl_mem_probe(struct device *dev)
return -ENXIO;
}
- if (resource_size(&cxlds->pmem_res) && IS_ENABLED(CONFIG_CXL_PMEM)) {
+ if (cxl_pmem_size(cxlds) && IS_ENABLED(CONFIG_CXL_PMEM)) {
rc = devm_cxl_add_nvdimm(parent_port, cxlmd);
if (rc) {
if (rc == -ENODEV)
@@ -180,6 +180,10 @@ static int cxl_mem_probe(struct device *dev)
return rc;
}
+ rc = devm_cxl_memdev_edac_register(cxlmd);
+ if (rc)
+ dev_dbg(dev, "CXL memdev EDAC registration failed rc=%d\n", rc);
+
/*
* The kernel may be operating out of CXL memory on this device,
* there is no spec defined way to determine whether this device
diff --git a/drivers/cxl/pci.c b/drivers/cxl/pci.c
index a96e54c6259e..785aa2af5eaa 100644
--- a/drivers/cxl/pci.c
+++ b/drivers/cxl/pci.c
@@ -903,6 +903,7 @@ __ATTRIBUTE_GROUPS(cxl_rcd);
static int cxl_pci_probe(struct pci_dev *pdev, const struct pci_device_id *id)
{
struct pci_host_bridge *host_bridge = pci_find_host_bridge(pdev->bus);
+ struct cxl_dpa_info range_info = { 0 };
struct cxl_memdev_state *mds;
struct cxl_dev_state *cxlds;
struct cxl_register_map map;
@@ -993,10 +994,18 @@ static int cxl_pci_probe(struct pci_dev *pdev, const struct pci_device_id *id)
if (rc)
return rc;
- rc = cxl_mem_create_range_info(mds);
+ rc = cxl_mem_dpa_fetch(mds, &range_info);
if (rc)
return rc;
+ rc = cxl_dpa_setup(cxlds, &range_info);
+ if (rc)
+ return rc;
+
+ rc = devm_cxl_setup_features(cxlds);
+ if (rc)
+ dev_dbg(&pdev->dev, "No CXL Features discovered\n");
+
cxlmd = devm_cxl_add_memdev(&pdev->dev, cxlds);
if (IS_ERR(cxlmd))
return PTR_ERR(cxlmd);
@@ -1009,6 +1018,10 @@ static int cxl_pci_probe(struct pci_dev *pdev, const struct pci_device_id *id)
if (rc)
return rc;
+ rc = devm_cxl_setup_fwctl(&pdev->dev, cxlmd);
+ if (rc)
+ dev_dbg(&pdev->dev, "No CXL FWCTL setup\n");
+
pmu_count = cxl_count_regblock(pdev, CXL_REGLOC_RBI_PMU);
if (pmu_count < 0)
return pmu_count;
diff --git a/drivers/cxl/pmem.c b/drivers/cxl/pmem.c
index f9c95996e937..e197883690ef 100644
--- a/drivers/cxl/pmem.c
+++ b/drivers/cxl/pmem.c
@@ -42,15 +42,44 @@ static ssize_t id_show(struct device *dev, struct device_attribute *attr, char *
}
static DEVICE_ATTR_RO(id);
+static ssize_t dirty_shutdown_show(struct device *dev,
+ struct device_attribute *attr, char *buf)
+{
+ struct nvdimm *nvdimm = to_nvdimm(dev);
+ struct cxl_nvdimm *cxl_nvd = nvdimm_provider_data(nvdimm);
+
+ return sysfs_emit(buf, "%llu\n", cxl_nvd->dirty_shutdowns);
+}
+static DEVICE_ATTR_RO(dirty_shutdown);
+
static struct attribute *cxl_dimm_attributes[] = {
&dev_attr_id.attr,
&dev_attr_provider.attr,
+ &dev_attr_dirty_shutdown.attr,
NULL
};
+#define CXL_INVALID_DIRTY_SHUTDOWN_COUNT ULLONG_MAX
+static umode_t cxl_dimm_visible(struct kobject *kobj,
+ struct attribute *a, int n)
+{
+ if (a == &dev_attr_dirty_shutdown.attr) {
+ struct device *dev = kobj_to_dev(kobj);
+ struct nvdimm *nvdimm = to_nvdimm(dev);
+ struct cxl_nvdimm *cxl_nvd = nvdimm_provider_data(nvdimm);
+
+ if (cxl_nvd->dirty_shutdowns ==
+ CXL_INVALID_DIRTY_SHUTDOWN_COUNT)
+ return 0;
+ }
+
+ return a->mode;
+}
+
static const struct attribute_group cxl_dimm_attribute_group = {
.name = "cxl",
.attrs = cxl_dimm_attributes,
+ .is_visible = cxl_dimm_visible
};
static const struct attribute_group *cxl_dimm_attribute_groups[] = {
@@ -58,6 +87,38 @@ static const struct attribute_group *cxl_dimm_attribute_groups[] = {
NULL
};
+static void cxl_nvdimm_arm_dirty_shutdown_tracking(struct cxl_nvdimm *cxl_nvd)
+{
+ struct cxl_memdev *cxlmd = cxl_nvd->cxlmd;
+ struct cxl_dev_state *cxlds = cxlmd->cxlds;
+ struct cxl_memdev_state *mds = to_cxl_memdev_state(cxlds);
+ struct device *dev = &cxl_nvd->dev;
+ u32 count;
+
+ /*
+ * Dirty tracking is enabled and exposed to the user, only when:
+ * - dirty shutdown on the device can be set, and,
+ * - the device has a Device GPF DVSEC (albeit unused), and,
+ * - the Get Health Info cmd can retrieve the device's dirty count.
+ */
+ cxl_nvd->dirty_shutdowns = CXL_INVALID_DIRTY_SHUTDOWN_COUNT;
+
+ if (cxl_arm_dirty_shutdown(mds)) {
+ dev_warn(dev, "GPF: could not set dirty shutdown state\n");
+ return;
+ }
+
+ if (!cxl_gpf_get_dvsec(cxlds->dev))
+ return;
+
+ if (cxl_get_dirty_count(mds, &count)) {
+ dev_warn(dev, "GPF: could not retrieve dirty count\n");
+ return;
+ }
+
+ cxl_nvd->dirty_shutdowns = count;
+}
+
static int cxl_nvdimm_probe(struct device *dev)
{
struct cxl_nvdimm *cxl_nvd = to_cxl_nvdimm(dev);
@@ -78,6 +139,14 @@ static int cxl_nvdimm_probe(struct device *dev)
set_bit(ND_CMD_GET_CONFIG_SIZE, &cmd_mask);
set_bit(ND_CMD_GET_CONFIG_DATA, &cmd_mask);
set_bit(ND_CMD_SET_CONFIG_DATA, &cmd_mask);
+
+ /*
+ * Set dirty shutdown now, with the expectation that the device
+ * clear it upon a successful GPF flow. The exception to this
+ * is upon Viral detection, per CXL 3.2 section 12.4.2.
+ */
+ cxl_nvdimm_arm_dirty_shutdown_tracking(cxl_nvd);
+
nvdimm = __nvdimm_create(cxl_nvb->nvdimm_bus, cxl_nvd,
cxl_dimm_attribute_groups, flags,
cmd_mask, 0, NULL, cxl_nvd->dev_id,
@@ -375,6 +444,16 @@ static int cxl_pmem_region_probe(struct device *dev)
goto out_nvd;
}
+ if (cxlds->serial == 0) {
+ /* include missing alongside invalid in this error message. */
+ dev_err(dev, "%s: invalid or missing serial number\n",
+ dev_name(&cxlmd->dev));
+ rc = -ENXIO;
+ goto out_nvd;
+ }
+ info[i].serial = cxlds->serial;
+ info[i].offset = m->start;
+
m->cxl_nvd = cxl_nvd;
mappings[i] = (struct nd_mapping_desc) {
.nvdimm = nvdimm,
@@ -382,8 +461,6 @@ static int cxl_pmem_region_probe(struct device *dev)
.size = m->size,
.position = i,
};
- info[i].offset = m->start;
- info[i].serial = cxlds->serial;
}
ndr_desc.num_mappings = cxlr_pmem->nr_mappings;
ndr_desc.mapping = mappings;
diff --git a/drivers/cxl/port.c b/drivers/cxl/port.c
index d2bfd1ff5492..fe4b593331da 100644
--- a/drivers/cxl/port.c
+++ b/drivers/cxl/port.c
@@ -30,7 +30,7 @@ static void schedule_detach(void *cxlmd)
schedule_cxl_memdev_detach(cxlmd);
}
-static int discover_region(struct device *dev, void *root)
+static int discover_region(struct device *dev, void *unused)
{
struct cxl_endpoint_decoder *cxled;
int rc;
@@ -49,7 +49,7 @@ static int discover_region(struct device *dev, void *root)
* Region enumeration is opportunistic, if this add-event fails,
* continue to the next endpoint decoder.
*/
- rc = cxl_add_to_region(root, cxled);
+ rc = cxl_add_to_region(cxled);
if (rc)
dev_dbg(dev, "failed to add to region: %#llx-%#llx\n",
cxled->cxld.hpa_range.start, cxled->cxld.hpa_range.end);
@@ -95,7 +95,6 @@ static int cxl_endpoint_port_probe(struct cxl_port *port)
struct cxl_memdev *cxlmd = to_cxl_memdev(port->uport_dev);
struct cxl_dev_state *cxlds = cxlmd->cxlds;
struct cxl_hdm *cxlhdm;
- struct cxl_port *root;
int rc;
rc = cxl_dvsec_rr_decode(cxlds, &info);
@@ -127,18 +126,10 @@ static int cxl_endpoint_port_probe(struct cxl_port *port)
return rc;
/*
- * This can't fail in practice as CXL root exit unregisters all
- * descendant ports and that in turn synchronizes with cxl_port_probe()
- */
- struct cxl_root *cxl_root __free(put_cxl_root) = find_cxl_root(port);
-
- root = &cxl_root->port;
-
- /*
* Now that all endpoint decoders are successfully enumerated, try to
* assemble regions from committed decoders
*/
- device_for_each_child(&port->dev, root, discover_region);
+ device_for_each_child(&port->dev, NULL, discover_region);
return 0;
}
@@ -153,7 +144,7 @@ static int cxl_port_probe(struct device *dev)
}
static ssize_t CDAT_read(struct file *filp, struct kobject *kobj,
- struct bin_attribute *bin_attr, char *buf,
+ const struct bin_attribute *bin_attr, char *buf,
loff_t offset, size_t count)
{
struct device *dev = kobj_to_dev(kobj);
@@ -170,7 +161,7 @@ static ssize_t CDAT_read(struct file *filp, struct kobject *kobj,
port->cdat.length);
}
-static BIN_ATTR_ADMIN_RO(CDAT, 0);
+static const BIN_ATTR_ADMIN_RO(CDAT, 0);
static umode_t cxl_port_bin_attr_is_visible(struct kobject *kobj,
const struct bin_attribute *attr, int i)
@@ -184,13 +175,13 @@ static umode_t cxl_port_bin_attr_is_visible(struct kobject *kobj,
return 0;
}
-static struct bin_attribute *cxl_cdat_bin_attributes[] = {
+static const struct bin_attribute *const cxl_cdat_bin_attributes[] = {
&bin_attr_CDAT,
NULL,
};
-static struct attribute_group cxl_cdat_attribute_group = {
- .bin_attrs = cxl_cdat_bin_attributes,
+static const struct attribute_group cxl_cdat_attribute_group = {
+ .bin_attrs_new = cxl_cdat_bin_attributes,
.is_bin_visible = cxl_port_bin_attr_is_visible,
};