summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--Documentation/ABI/testing/sysfs-bus-cxl26
-rw-r--r--drivers/acpi/apei/ghes.c103
-rw-r--r--drivers/cxl/core/Makefile1
-rw-r--r--drivers/cxl/core/cdat.c102
-rw-r--r--drivers/cxl/core/core.h6
-rw-r--r--drivers/cxl/core/hdm.c382
-rw-r--r--drivers/cxl/core/mbox.c80
-rw-r--r--drivers/cxl/core/memdev.c83
-rw-r--r--drivers/cxl/core/port.c36
-rw-r--r--drivers/cxl/core/ras.c119
-rw-r--r--drivers/cxl/core/region.c225
-rw-r--r--drivers/cxl/core/trace.h47
-rw-r--r--drivers/cxl/cxl.h39
-rw-r--r--drivers/cxl/cxlmem.h56
-rw-r--r--drivers/cxl/mem.c2
-rw-r--r--drivers/cxl/pci.c7
-rw-r--r--drivers/cxl/pmem.c12
-rw-r--r--drivers/firmware/efi/cper.c6
-rw-r--r--drivers/firmware/efi/cper_cxl.c39
-rw-r--r--drivers/firmware/efi/cper_cxl.h66
-rw-r--r--include/cxl/event.h101
-rw-r--r--include/linux/cper.h8
-rw-r--r--tools/testing/cxl/Kbuild1
-rw-r--r--tools/testing/cxl/test/cxl.c22
-rw-r--r--tools/testing/cxl/test/mem.c9
25 files changed, 1025 insertions, 553 deletions
diff --git a/Documentation/ABI/testing/sysfs-bus-cxl b/Documentation/ABI/testing/sysfs-bus-cxl
index a7491d214098..6d911f046a78 100644
--- a/Documentation/ABI/testing/sysfs-bus-cxl
+++ b/Documentation/ABI/testing/sysfs-bus-cxl
@@ -1,5 +1,5 @@
What: /sys/bus/cxl/flush
-Date: Januarry, 2022
+Date: January, 2022
KernelVersion: v5.18
Contact: linux-cxl@vger.kernel.org
Description:
@@ -18,6 +18,24 @@ Description:
specification.
+What: /sys/bus/cxl/devices/memX/payload_max
+Date: December, 2020
+KernelVersion: v5.12
+Contact: linux-cxl@vger.kernel.org
+Description:
+ (RO) Maximum size (in bytes) of the mailbox command payload
+ registers. Linux caps this at 1MB if the device reports a
+ larger size.
+
+
+What: /sys/bus/cxl/devices/memX/label_storage_size
+Date: May, 2021
+KernelVersion: v5.13
+Contact: linux-cxl@vger.kernel.org
+Description:
+ (RO) Size (in bytes) of the Label Storage Area (LSA).
+
+
What: /sys/bus/cxl/devices/memX/ram/size
Date: December, 2020
KernelVersion: v5.12
@@ -33,7 +51,7 @@ Date: May, 2023
KernelVersion: v6.8
Contact: linux-cxl@vger.kernel.org
Description:
- (RO) For CXL host platforms that support "QoS Telemmetry"
+ (RO) For CXL host platforms that support "QoS Telemetry"
this attribute conveys a comma delimited list of platform
specific cookies that identifies a QoS performance class
for the volatile partition of the CXL mem device. These
@@ -60,7 +78,7 @@ Date: May, 2023
KernelVersion: v6.8
Contact: linux-cxl@vger.kernel.org
Description:
- (RO) For CXL host platforms that support "QoS Telemmetry"
+ (RO) For CXL host platforms that support "QoS Telemetry"
this attribute conveys a comma delimited list of platform
specific cookies that identifies a QoS performance class
for the persistent partition of the CXL mem device. These
@@ -423,7 +441,7 @@ Date: May, 2023
KernelVersion: v6.5
Contact: linux-cxl@vger.kernel.org
Description:
- (RO) For CXL host platforms that support "QoS Telemmetry" this
+ (RO) For CXL host platforms that support "QoS Telemetry" this
root-decoder-only attribute conveys a platform specific cookie
that identifies a QoS performance class for the CXL Window.
This class-id can be compared against a similar "qos_class"
diff --git a/drivers/acpi/apei/ghes.c b/drivers/acpi/apei/ghes.c
index b72772494655..289e365f84b2 100644
--- a/drivers/acpi/apei/ghes.c
+++ b/drivers/acpi/apei/ghes.c
@@ -674,6 +674,105 @@ static void ghes_defer_non_standard_event(struct acpi_hest_generic_data *gdata,
schedule_work(&entry->work);
}
+/* Room for 8 entries */
+#define CXL_CPER_PROT_ERR_FIFO_DEPTH 8
+static DEFINE_KFIFO(cxl_cper_prot_err_fifo, struct cxl_cper_prot_err_work_data,
+ CXL_CPER_PROT_ERR_FIFO_DEPTH);
+
+/* Synchronize schedule_work() with cxl_cper_prot_err_work changes */
+static DEFINE_SPINLOCK(cxl_cper_prot_err_work_lock);
+struct work_struct *cxl_cper_prot_err_work;
+
+static void cxl_cper_post_prot_err(struct cxl_cper_sec_prot_err *prot_err,
+ int severity)
+{
+#ifdef CONFIG_ACPI_APEI_PCIEAER
+ struct cxl_cper_prot_err_work_data wd;
+ u8 *dvsec_start, *cap_start;
+
+ if (!(prot_err->valid_bits & PROT_ERR_VALID_AGENT_ADDRESS)) {
+ pr_err_ratelimited("CXL CPER invalid agent type\n");
+ return;
+ }
+
+ if (!(prot_err->valid_bits & PROT_ERR_VALID_ERROR_LOG)) {
+ pr_err_ratelimited("CXL CPER invalid protocol error log\n");
+ return;
+ }
+
+ if (prot_err->err_len != sizeof(struct cxl_ras_capability_regs)) {
+ pr_err_ratelimited("CXL CPER invalid RAS Cap size (%u)\n",
+ prot_err->err_len);
+ return;
+ }
+
+ if (!(prot_err->valid_bits & PROT_ERR_VALID_SERIAL_NUMBER))
+ pr_warn(FW_WARN "CXL CPER no device serial number\n");
+
+ guard(spinlock_irqsave)(&cxl_cper_prot_err_work_lock);
+
+ if (!cxl_cper_prot_err_work)
+ return;
+
+ switch (prot_err->agent_type) {
+ case RCD:
+ case DEVICE:
+ case LD:
+ case FMLD:
+ case RP:
+ case DSP:
+ case USP:
+ memcpy(&wd.prot_err, prot_err, sizeof(wd.prot_err));
+
+ dvsec_start = (u8 *)(prot_err + 1);
+ cap_start = dvsec_start + prot_err->dvsec_len;
+
+ memcpy(&wd.ras_cap, cap_start, sizeof(wd.ras_cap));
+ wd.severity = cper_severity_to_aer(severity);
+ break;
+ default:
+ pr_err_ratelimited("CXL CPER invalid agent type: %d\n",
+ prot_err->agent_type);
+ return;
+ }
+
+ if (!kfifo_put(&cxl_cper_prot_err_fifo, wd)) {
+ pr_err_ratelimited("CXL CPER kfifo overflow\n");
+ return;
+ }
+
+ schedule_work(cxl_cper_prot_err_work);
+#endif
+}
+
+int cxl_cper_register_prot_err_work(struct work_struct *work)
+{
+ if (cxl_cper_prot_err_work)
+ return -EINVAL;
+
+ guard(spinlock)(&cxl_cper_prot_err_work_lock);
+ cxl_cper_prot_err_work = work;
+ return 0;
+}
+EXPORT_SYMBOL_NS_GPL(cxl_cper_register_prot_err_work, "CXL");
+
+int cxl_cper_unregister_prot_err_work(struct work_struct *work)
+{
+ if (cxl_cper_prot_err_work != work)
+ return -EINVAL;
+
+ guard(spinlock)(&cxl_cper_prot_err_work_lock);
+ cxl_cper_prot_err_work = NULL;
+ return 0;
+}
+EXPORT_SYMBOL_NS_GPL(cxl_cper_unregister_prot_err_work, "CXL");
+
+int cxl_cper_prot_err_kfifo_get(struct cxl_cper_prot_err_work_data *wd)
+{
+ return kfifo_get(&cxl_cper_prot_err_fifo, wd);
+}
+EXPORT_SYMBOL_NS_GPL(cxl_cper_prot_err_kfifo_get, "CXL");
+
/* Room for 8 entries for each of the 4 event log queues */
#define CXL_CPER_FIFO_DEPTH 32
DEFINE_KFIFO(cxl_cper_fifo, struct cxl_cper_work_data, CXL_CPER_FIFO_DEPTH);
@@ -777,6 +876,10 @@ static bool ghes_do_proc(struct ghes *ghes,
}
else if (guid_equal(sec_type, &CPER_SEC_PROC_ARM)) {
queued = ghes_handle_arm_hw_error(gdata, sev, sync);
+ } else if (guid_equal(sec_type, &CPER_SEC_CXL_PROT_ERR)) {
+ struct cxl_cper_sec_prot_err *prot_err = acpi_hest_get_payload(gdata);
+
+ cxl_cper_post_prot_err(prot_err, gdata->error_severity);
} else if (guid_equal(sec_type, &CPER_SEC_CXL_GEN_MEDIA_GUID)) {
struct cxl_cper_event_rec *rec = acpi_hest_get_payload(gdata);
diff --git a/drivers/cxl/core/Makefile b/drivers/cxl/core/Makefile
index 9259bcc6773c..ba5f0916d379 100644
--- a/drivers/cxl/core/Makefile
+++ b/drivers/cxl/core/Makefile
@@ -14,5 +14,6 @@ cxl_core-y += pci.o
cxl_core-y += hdm.o
cxl_core-y += pmu.o
cxl_core-y += cdat.o
+cxl_core-y += ras.o
cxl_core-$(CONFIG_TRACING) += trace.o
cxl_core-$(CONFIG_CXL_REGION) += region.o
diff --git a/drivers/cxl/core/cdat.c b/drivers/cxl/core/cdat.c
index 8153f8d83a16..edb4f41eeacc 100644
--- a/drivers/cxl/core/cdat.c
+++ b/drivers/cxl/core/cdat.c
@@ -258,27 +258,29 @@ static void update_perf_entry(struct device *dev, struct dsmas_entry *dent,
static void cxl_memdev_set_qos_class(struct cxl_dev_state *cxlds,
struct xarray *dsmas_xa)
{
- struct cxl_memdev_state *mds = to_cxl_memdev_state(cxlds);
struct device *dev = cxlds->dev;
- struct range pmem_range = {
- .start = cxlds->pmem_res.start,
- .end = cxlds->pmem_res.end,
- };
- struct range ram_range = {
- .start = cxlds->ram_res.start,
- .end = cxlds->ram_res.end,
- };
struct dsmas_entry *dent;
unsigned long index;
xa_for_each(dsmas_xa, index, dent) {
- if (resource_size(&cxlds->ram_res) &&
- range_contains(&ram_range, &dent->dpa_range))
- update_perf_entry(dev, dent, &mds->ram_perf);
- else if (resource_size(&cxlds->pmem_res) &&
- range_contains(&pmem_range, &dent->dpa_range))
- update_perf_entry(dev, dent, &mds->pmem_perf);
- else
+ bool found = false;
+
+ for (int i = 0; i < cxlds->nr_partitions; i++) {
+ struct resource *res = &cxlds->part[i].res;
+ struct range range = {
+ .start = res->start,
+ .end = res->end,
+ };
+
+ if (range_contains(&range, &dent->dpa_range)) {
+ update_perf_entry(dev, dent,
+ &cxlds->part[i].perf);
+ found = true;
+ break;
+ }
+ }
+
+ if (!found)
dev_dbg(dev, "no partition for dsmas dpa: %pra\n",
&dent->dpa_range);
}
@@ -343,36 +345,46 @@ static int match_cxlrd_hb(struct device *dev, void *data)
return 0;
}
-static int cxl_qos_class_verify(struct cxl_memdev *cxlmd)
+static void cxl_qos_class_verify(struct cxl_memdev *cxlmd)
{
struct cxl_dev_state *cxlds = cxlmd->cxlds;
- struct cxl_memdev_state *mds = to_cxl_memdev_state(cxlds);
struct cxl_port *root_port;
- int rc;
struct cxl_root *cxl_root __free(put_cxl_root) =
find_cxl_root(cxlmd->endpoint);
+ /*
+ * No need to reset_dpa_perf() here as find_cxl_root() is guaranteed to
+ * succeed when called in the cxl_endpoint_port_probe() path.
+ */
if (!cxl_root)
- return -ENODEV;
+ return;
root_port = &cxl_root->port;
- /* Check that the QTG IDs are all sane between end device and root decoders */
- if (!cxl_qos_match(root_port, &mds->ram_perf))
- reset_dpa_perf(&mds->ram_perf);
- if (!cxl_qos_match(root_port, &mds->pmem_perf))
- reset_dpa_perf(&mds->pmem_perf);
-
- /* Check to make sure that the device's host bridge is under a root decoder */
- rc = device_for_each_child(&root_port->dev,
- cxlmd->endpoint->host_bridge, match_cxlrd_hb);
- if (!rc) {
- reset_dpa_perf(&mds->ram_perf);
- reset_dpa_perf(&mds->pmem_perf);
+ /*
+ * Save userspace from needing to check if a qos class has any matches
+ * by hiding qos class info if the memdev is not mapped by a root
+ * decoder, or the partition class does not match any root decoder
+ * class.
+ */
+ if (!device_for_each_child(&root_port->dev,
+ cxlmd->endpoint->host_bridge,
+ match_cxlrd_hb)) {
+ for (int i = 0; i < cxlds->nr_partitions; i++) {
+ struct cxl_dpa_perf *perf = &cxlds->part[i].perf;
+
+ reset_dpa_perf(perf);
+ }
+ return;
}
- return rc;
+ for (int i = 0; i < cxlds->nr_partitions; i++) {
+ struct cxl_dpa_perf *perf = &cxlds->part[i].perf;
+
+ if (!cxl_qos_match(root_port, perf))
+ reset_dpa_perf(perf);
+ }
}
static void discard_dsmas(struct xarray *xa)
@@ -570,23 +582,18 @@ static bool dpa_perf_contains(struct cxl_dpa_perf *perf,
return range_contains(&perf->dpa_range, &dpa);
}
-static struct cxl_dpa_perf *cxled_get_dpa_perf(struct cxl_endpoint_decoder *cxled,
- enum cxl_decoder_mode mode)
+static struct cxl_dpa_perf *cxled_get_dpa_perf(struct cxl_endpoint_decoder *cxled)
{
struct cxl_memdev *cxlmd = cxled_to_memdev(cxled);
- struct cxl_memdev_state *mds = to_cxl_memdev_state(cxlmd->cxlds);
+ struct cxl_dev_state *cxlds = cxlmd->cxlds;
struct cxl_dpa_perf *perf;
- switch (mode) {
- case CXL_DECODER_RAM:
- perf = &mds->ram_perf;
- break;
- case CXL_DECODER_PMEM:
- perf = &mds->pmem_perf;
- break;
- default:
+ if (cxled->part < 0)
+ return ERR_PTR(-EINVAL);
+ perf = &cxlds->part[cxled->part].perf;
+
+ if (!perf)
return ERR_PTR(-EINVAL);
- }
if (!dpa_perf_contains(perf, cxled->dpa_res))
return ERR_PTR(-EINVAL);
@@ -647,11 +654,10 @@ static int cxl_endpoint_gather_bandwidth(struct cxl_region *cxlr,
if (cxlds->rcd)
return -ENODEV;
- perf = cxled_get_dpa_perf(cxled, cxlr->mode);
+ perf = cxled_get_dpa_perf(cxled);
if (IS_ERR(perf))
return PTR_ERR(perf);
- gp_port = to_cxl_port(parent_port->dev.parent);
*gp_is_root = is_cxl_root(gp_port);
/*
@@ -1053,7 +1059,7 @@ void cxl_region_perf_data_calculate(struct cxl_region *cxlr,
lockdep_assert_held(&cxl_dpa_rwsem);
- perf = cxled_get_dpa_perf(cxled, cxlr->mode);
+ perf = cxled_get_dpa_perf(cxled);
if (IS_ERR(perf))
return;
diff --git a/drivers/cxl/core/core.h b/drivers/cxl/core/core.h
index 8f2eb76a3c8c..4d8316f97ed8 100644
--- a/drivers/cxl/core/core.h
+++ b/drivers/cxl/core/core.h
@@ -72,8 +72,8 @@ void __iomem *devm_cxl_iomap_block(struct device *dev, resource_size_t addr,
resource_size_t length);
struct dentry *cxl_debugfs_create_dir(const char *dir);
-int cxl_dpa_set_mode(struct cxl_endpoint_decoder *cxled,
- enum cxl_decoder_mode mode);
+int cxl_dpa_set_part(struct cxl_endpoint_decoder *cxled,
+ enum cxl_partition_mode mode);
int cxl_dpa_alloc(struct cxl_endpoint_decoder *cxled, unsigned long long size);
int cxl_dpa_free(struct cxl_endpoint_decoder *cxled);
resource_size_t cxl_dpa_size(struct cxl_endpoint_decoder *cxled);
@@ -115,6 +115,8 @@ bool cxl_need_node_perf_attrs_update(int nid);
int cxl_port_get_switch_dport_bandwidth(struct cxl_port *port,
struct access_coordinate *c);
+int cxl_ras_init(void);
+void cxl_ras_exit(void);
int cxl_gpf_port_setup(struct device *dport_dev, struct cxl_port *port);
#endif /* __CXL_CORE_H__ */
diff --git a/drivers/cxl/core/hdm.c b/drivers/cxl/core/hdm.c
index 50e6a45b30ba..70cae4ebf8a4 100644
--- a/drivers/cxl/core/hdm.c
+++ b/drivers/cxl/core/hdm.c
@@ -213,16 +213,46 @@ void cxl_dpa_debug(struct seq_file *file, struct cxl_dev_state *cxlds)
{
struct resource *p1, *p2;
- down_read(&cxl_dpa_rwsem);
+ guard(rwsem_read)(&cxl_dpa_rwsem);
for (p1 = cxlds->dpa_res.child; p1; p1 = p1->sibling) {
__cxl_dpa_debug(file, p1, 0);
for (p2 = p1->child; p2; p2 = p2->sibling)
__cxl_dpa_debug(file, p2, 1);
}
- up_read(&cxl_dpa_rwsem);
}
EXPORT_SYMBOL_NS_GPL(cxl_dpa_debug, "CXL");
+/* See request_skip() kernel-doc */
+static resource_size_t __adjust_skip(struct cxl_dev_state *cxlds,
+ const resource_size_t skip_base,
+ const resource_size_t skip_len,
+ const char *requester)
+{
+ const resource_size_t skip_end = skip_base + skip_len - 1;
+
+ for (int i = 0; i < cxlds->nr_partitions; i++) {
+ const struct resource *part_res = &cxlds->part[i].res;
+ resource_size_t adjust_start, adjust_end, size;
+
+ adjust_start = max(skip_base, part_res->start);
+ adjust_end = min(skip_end, part_res->end);
+
+ if (adjust_end < adjust_start)
+ continue;
+
+ size = adjust_end - adjust_start + 1;
+
+ if (!requester)
+ __release_region(&cxlds->dpa_res, adjust_start, size);
+ else if (!__request_region(&cxlds->dpa_res, adjust_start, size,
+ requester, 0))
+ return adjust_start - skip_base;
+ }
+
+ return skip_len;
+}
+#define release_skip(c, b, l) __adjust_skip((c), (b), (l), NULL)
+
/*
* Must be called in a context that synchronizes against this decoder's
* port ->remove() callback (like an endpoint decoder sysfs attribute)
@@ -241,7 +271,7 @@ static void __cxl_dpa_release(struct cxl_endpoint_decoder *cxled)
skip_start = res->start - cxled->skip;
__release_region(&cxlds->dpa_res, res->start, resource_size(res));
if (cxled->skip)
- __release_region(&cxlds->dpa_res, skip_start, cxled->skip);
+ release_skip(cxlds, skip_start, cxled->skip);
cxled->skip = 0;
cxled->dpa_res = NULL;
put_device(&cxled->cxld.dev);
@@ -250,9 +280,8 @@ static void __cxl_dpa_release(struct cxl_endpoint_decoder *cxled)
static void cxl_dpa_release(void *cxled)
{
- down_write(&cxl_dpa_rwsem);
+ guard(rwsem_write)(&cxl_dpa_rwsem);
__cxl_dpa_release(cxled);
- up_write(&cxl_dpa_rwsem);
}
/*
@@ -268,6 +297,58 @@ static void devm_cxl_dpa_release(struct cxl_endpoint_decoder *cxled)
__cxl_dpa_release(cxled);
}
+/**
+ * request_skip() - Track DPA 'skip' in @cxlds->dpa_res resource tree
+ * @cxlds: CXL.mem device context that parents @cxled
+ * @cxled: Endpoint decoder establishing new allocation that skips lower DPA
+ * @skip_base: DPA < start of new DPA allocation (DPAnew)
+ * @skip_len: @skip_base + @skip_len == DPAnew
+ *
+ * DPA 'skip' arises from out-of-sequence DPA allocation events relative
+ * to free capacity across multiple partitions. It is a wasteful event
+ * as usable DPA gets thrown away, but if a deployment has, for example,
+ * a dual RAM+PMEM device, wants to use PMEM, and has unallocated RAM
+ * DPA, the free RAM DPA must be sacrificed to start allocating PMEM.
+ * See third "Implementation Note" in CXL 3.1 8.2.4.19.13 "Decoder
+ * Protection" for more details.
+ *
+ * A 'skip' always covers the last allocated DPA in a previous partition
+ * to the start of the current partition to allocate. Allocations never
+ * start in the middle of a partition, and allocations are always
+ * de-allocated in reverse order (see cxl_dpa_free(), or natural devm
+ * unwind order from forced in-order allocation).
+ *
+ * If @cxlds->nr_partitions was guaranteed to be <= 2 then the 'skip'
+ * would always be contained to a single partition. Given
+ * @cxlds->nr_partitions may be > 2 it results in cases where the 'skip'
+ * might span "tail capacity of partition[0], all of partition[1], ...,
+ * all of partition[N-1]" to support allocating from partition[N]. That
+ * in turn interacts with the partition 'struct resource' boundaries
+ * within @cxlds->dpa_res whereby 'skip' requests need to be divided by
+ * partition. I.e. this is a quirk of using a 'struct resource' tree to
+ * detect range conflicts while also tracking partition boundaries in
+ * @cxlds->dpa_res.
+ */
+static int request_skip(struct cxl_dev_state *cxlds,
+ struct cxl_endpoint_decoder *cxled,
+ const resource_size_t skip_base,
+ const resource_size_t skip_len)
+{
+ resource_size_t skipped = __adjust_skip(cxlds, skip_base, skip_len,
+ dev_name(&cxled->cxld.dev));
+
+ if (skipped == skip_len)
+ return 0;
+
+ dev_dbg(cxlds->dev,
+ "%s: failed to reserve skipped space (%pa %pa %pa)\n",
+ dev_name(&cxled->cxld.dev), &skip_base, &skip_len, &skipped);
+
+ release_skip(cxlds, skip_base, skipped);
+
+ return -EBUSY;
+}
+
static int __cxl_dpa_reserve(struct cxl_endpoint_decoder *cxled,
resource_size_t base, resource_size_t len,
resource_size_t skipped)
@@ -277,6 +358,7 @@ static int __cxl_dpa_reserve(struct cxl_endpoint_decoder *cxled,
struct cxl_dev_state *cxlds = cxlmd->cxlds;
struct device *dev = &port->dev;
struct resource *res;
+ int rc;
lockdep_assert_held_write(&cxl_dpa_rwsem);
@@ -305,14 +387,9 @@ static int __cxl_dpa_reserve(struct cxl_endpoint_decoder *cxled,
}
if (skipped) {
- res = __request_region(&cxlds->dpa_res, base - skipped, skipped,
- dev_name(&cxled->cxld.dev), 0);
- if (!res) {
- dev_dbg(dev,
- "decoder%d.%d: failed to reserve skipped space\n",
- port->id, cxled->cxld.id);
- return -EBUSY;
- }
+ rc = request_skip(cxlds, cxled, base - skipped, skipped);
+ if (rc)
+ return rc;
}
res = __request_region(&cxlds->dpa_res, base, len,
dev_name(&cxled->cxld.dev), 0);
@@ -320,28 +397,117 @@ static int __cxl_dpa_reserve(struct cxl_endpoint_decoder *cxled,
dev_dbg(dev, "decoder%d.%d: failed to reserve allocation\n",
port->id, cxled->cxld.id);
if (skipped)
- __release_region(&cxlds->dpa_res, base - skipped,
- skipped);
+ release_skip(cxlds, base - skipped, skipped);
return -EBUSY;
}
cxled->dpa_res = res;
cxled->skip = skipped;
- if (resource_contains(&cxlds->pmem_res, res))
- cxled->mode = CXL_DECODER_PMEM;
- else if (resource_contains(&cxlds->ram_res, res))
- cxled->mode = CXL_DECODER_RAM;
- else {
- dev_warn(dev, "decoder%d.%d: %pr mixed mode not supported\n",
- port->id, cxled->cxld.id, cxled->dpa_res);
- cxled->mode = CXL_DECODER_MIXED;
- }
+ /*
+ * When allocating new capacity, ->part is already set, when
+ * discovering decoder settings at initial enumeration, ->part
+ * is not set.
+ */
+ if (cxled->part < 0)
+ for (int i = 0; cxlds->nr_partitions; i++)
+ if (resource_contains(&cxlds->part[i].res, res)) {
+ cxled->part = i;
+ break;
+ }
+
+ if (cxled->part < 0)
+ dev_warn(dev, "decoder%d.%d: %pr does not map any partition\n",
+ port->id, cxled->cxld.id, res);
port->hdm_end++;
get_device(&cxled->cxld.dev);
return 0;
}
+static int add_dpa_res(struct device *dev, struct resource *parent,
+ struct resource *res, resource_size_t start,
+ resource_size_t size, const char *type)
+{
+ int rc;
+
+ *res = (struct resource) {
+ .name = type,
+ .start = start,
+ .end = start + size - 1,
+ .flags = IORESOURCE_MEM,
+ };
+ if (resource_size(res) == 0) {
+ dev_dbg(dev, "DPA(%s): no capacity\n", res->name);
+ return 0;
+ }
+ rc = request_resource(parent, res);
+ if (rc) {
+ dev_err(dev, "DPA(%s): failed to track %pr (%d)\n", res->name,
+ res, rc);
+ return rc;
+ }
+
+ dev_dbg(dev, "DPA(%s): %pr\n", res->name, res);
+
+ return 0;
+}
+
+static const char *cxl_mode_name(enum cxl_partition_mode mode)
+{
+ switch (mode) {
+ case CXL_PARTMODE_RAM:
+ return "ram";
+ case CXL_PARTMODE_PMEM:
+ return "pmem";
+ default:
+ return "";
+ };
+}
+
+/* if this fails the caller must destroy @cxlds, there is no recovery */
+int cxl_dpa_setup(struct cxl_dev_state *cxlds, const struct cxl_dpa_info *info)
+{
+ struct device *dev = cxlds->dev;
+
+ guard(rwsem_write)(&cxl_dpa_rwsem);
+
+ if (cxlds->nr_partitions)
+ return -EBUSY;
+
+ if (!info->size || !info->nr_partitions) {
+ cxlds->dpa_res = DEFINE_RES_MEM(0, 0);
+ cxlds->nr_partitions = 0;
+ return 0;
+ }
+
+ cxlds->dpa_res = DEFINE_RES_MEM(0, info->size);
+
+ for (int i = 0; i < info->nr_partitions; i++) {
+ const struct cxl_dpa_part_info *part = &info->part[i];
+ int rc;
+
+ cxlds->part[i].perf.qos_class = CXL_QOS_CLASS_INVALID;
+ cxlds->part[i].mode = part->mode;
+
+ /* Require ordered + contiguous partitions */
+ if (i) {
+ const struct cxl_dpa_part_info *prev = &info->part[i - 1];
+
+ if (prev->range.end + 1 != part->range.start)
+ return -EINVAL;
+ }
+ rc = add_dpa_res(dev, &cxlds->dpa_res, &cxlds->part[i].res,
+ part->range.start, range_len(&part->range),
+ cxl_mode_name(part->mode));
+ if (rc)
+ return rc;
+ cxlds->nr_partitions++;
+ }
+
+ return 0;
+}
+EXPORT_SYMBOL_GPL(cxl_dpa_setup);
+
int devm_cxl_dpa_reserve(struct cxl_endpoint_decoder *cxled,
resource_size_t base, resource_size_t len,
resource_size_t skipped)
@@ -362,14 +528,11 @@ EXPORT_SYMBOL_NS_GPL(devm_cxl_dpa_reserve, "CXL");
resource_size_t cxl_dpa_size(struct cxl_endpoint_decoder *cxled)
{
- resource_size_t size = 0;
-
- down_read(&cxl_dpa_rwsem);
+ guard(rwsem_read)(&cxl_dpa_rwsem);
if (cxled->dpa_res)
- size = resource_size(cxled->dpa_res);
- up_read(&cxl_dpa_rwsem);
+ return resource_size(cxled->dpa_res);
- return size;
+ return 0;
}
resource_size_t cxl_dpa_resource_start(struct cxl_endpoint_decoder *cxled)
@@ -387,151 +550,136 @@ int cxl_dpa_free(struct cxl_endpoint_decoder *cxled)
{
struct cxl_port *port = cxled_to_port(cxled);
struct device *dev = &cxled->cxld.dev;
- int rc;
- down_write(&cxl_dpa_rwsem);
- if (!cxled->dpa_res) {
- rc = 0;
- goto out;
- }
+ guard(rwsem_write)(&cxl_dpa_rwsem);
+ if (!cxled->dpa_res)
+ return 0;
if (cxled->cxld.region) {
dev_dbg(dev, "decoder assigned to: %s\n",
dev_name(&cxled->cxld.region->dev));
- rc = -EBUSY;
- goto out;
+ return -EBUSY;
}
if (cxled->cxld.flags & CXL_DECODER_F_ENABLE) {
dev_dbg(dev, "decoder enabled\n");
- rc = -EBUSY;
- goto out;
+ return -EBUSY;
}
if (cxled->cxld.id != port->hdm_end) {
dev_dbg(dev, "expected decoder%d.%d\n", port->id,
port->hdm_end);
- rc = -EBUSY;
- goto out;
+ return -EBUSY;
}
+
devm_cxl_dpa_release(cxled);
- rc = 0;
-out:
- up_write(&cxl_dpa_rwsem);
- return rc;
+ return 0;
}
-int cxl_dpa_set_mode(struct cxl_endpoint_decoder *cxled,
- enum cxl_decoder_mode mode)
+int cxl_dpa_set_part(struct cxl_endpoint_decoder *cxled,
+ enum cxl_partition_mode mode)
{
struct cxl_memdev *cxlmd = cxled_to_memdev(cxled);
struct cxl_dev_state *cxlds = cxlmd->cxlds;
struct device *dev = &cxled->cxld.dev;
-
- switch (mode) {
- case CXL_DECODER_RAM:
- case CXL_DECODER_PMEM:
- break;
- default:
- dev_dbg(dev, "unsupported mode: %d\n", mode);
- return -EINVAL;
- }
+ int part;
guard(rwsem_write)(&cxl_dpa_rwsem);
if (cxled->cxld.flags & CXL_DECODER_F_ENABLE)
return -EBUSY;
- /*
- * Only allow modes that are supported by the current partition
- * configuration
- */
- if (mode == CXL_DECODER_PMEM && !resource_size(&cxlds->pmem_res)) {
- dev_dbg(dev, "no available pmem capacity\n");
- return -ENXIO;
+ for (part = 0; part < cxlds->nr_partitions; part++)
+ if (cxlds->part[part].mode == mode)
+ break;
+
+ if (part >= cxlds->nr_partitions) {
+ dev_dbg(dev, "unsupported mode: %d\n", mode);
+ return -EINVAL;
}
- if (mode == CXL_DECODER_RAM && !resource_size(&cxlds->ram_res)) {
- dev_dbg(dev, "no available ram capacity\n");
+
+ if (!resource_size(&cxlds->part[part].res)) {
+ dev_dbg(dev, "no available capacity for mode: %d\n", mode);
return -ENXIO;
}
- cxled->mode = mode;
+ cxled->part = part;
return 0;
}
-int cxl_dpa_alloc(struct cxl_endpoint_decoder *cxled, unsigned long long size)
+static int __cxl_dpa_alloc(struct cxl_endpoint_decoder *cxled, unsigned long long size)
{
struct cxl_memdev *cxlmd = cxled_to_memdev(cxled);
- resource_size_t free_ram_start, free_pmem_start;
- struct cxl_port *port = cxled_to_port(cxled);
struct cxl_dev_state *cxlds = cxlmd->cxlds;
struct device *dev = &cxled->cxld.dev;
- resource_size_t start, avail, skip;
+ struct resource *res, *prev = NULL;
+ resource_size_t start, avail, skip, skip_start;
struct resource *p, *last;
- int rc;
+ int part;
- down_write(&cxl_dpa_rwsem);
+ guard(rwsem_write)(&cxl_dpa_rwsem);
if (cxled->cxld.region) {
dev_dbg(dev, "decoder attached to %s\n",
dev_name(&cxled->cxld.region->dev));
- rc = -EBUSY;
- goto out;
+ return -EBUSY;
}
if (cxled->cxld.flags & CXL_DECODER_F_ENABLE) {
dev_dbg(dev, "decoder enabled\n");
- rc = -EBUSY;
- goto out;
+ return -EBUSY;
}
- for (p = cxlds->ram_res.child, last = NULL; p; p = p->sibling)
- last = p;
- if (last)
- free_ram_start = last->end + 1;
- else
- free_ram_start = cxlds->ram_res.start;
+ part = cxled->part;
+ if (part < 0) {
+ dev_dbg(dev, "partition not set\n");
+ return -EBUSY;
+ }
- for (p = cxlds->pmem_res.child, last = NULL; p; p = p->sibling)
+ res = &cxlds->part[part].res;
+ for (p = res->child, last = NULL; p; p = p->sibling)
last = p;
if (last)
- free_pmem_start = last->end + 1;
+ start = last->end + 1;
else
- free_pmem_start = cxlds->pmem_res.start;
-
- if (cxled->mode == CXL_DECODER_RAM) {
- start = free_ram_start;
- avail = cxlds->ram_res.end - start + 1;
- skip = 0;
- } else if (cxled->mode == CXL_DECODER_PMEM) {
- resource_size_t skip_start, skip_end;
-
- start = free_pmem_start;
- avail = cxlds->pmem_res.end - start + 1;
- skip_start = free_ram_start;
+ start = res->start;
- /*
- * If some pmem is already allocated, then that allocation
- * already handled the skip.
- */
- if (cxlds->pmem_res.child &&
- skip_start == cxlds->pmem_res.child->start)
- skip_end = skip_start - 1;
- else
- skip_end = start - 1;
- skip = skip_end - skip_start + 1;
- } else {
- dev_dbg(dev, "mode not set\n");
- rc = -EINVAL;
- goto out;
+ /*
+ * To allocate at partition N, a skip needs to be calculated for all
+ * unallocated space at lower partitions indices.
+ *
+ * If a partition has any allocations, the search can end because a
+ * previous cxl_dpa_alloc() invocation is assumed to have accounted for
+ * all previous partitions.
+ */
+ skip_start = CXL_RESOURCE_NONE;
+ for (int i = part; i; i--) {
+ prev = &cxlds->part[i - 1].res;
+ for (p = prev->child, last = NULL; p; p = p->sibling)
+ last = p;
+ if (last) {
+ skip_start = last->end + 1;
+ break;
+ }
+ skip_start = prev->start;
}
+ avail = res->end - start + 1;
+ if (skip_start == CXL_RESOURCE_NONE)
+ skip = 0;
+ else
+ skip = res->start - skip_start;
+
if (size > avail) {
dev_dbg(dev, "%pa exceeds available %s capacity: %pa\n", &size,
- cxl_decoder_mode_name(cxled->mode), &avail);
- rc = -ENOSPC;
- goto out;
+ res->name, &avail);
+ return -ENOSPC;
}
- rc = __cxl_dpa_reserve(cxled, start, size, skip);
-out:
- up_write(&cxl_dpa_rwsem);
+ return __cxl_dpa_reserve(cxled, start, size, skip);
+}
+
+int cxl_dpa_alloc(struct cxl_endpoint_decoder *cxled, unsigned long long size)
+{
+ struct cxl_port *port = cxled_to_port(cxled);
+ int rc;
+ rc = __cxl_dpa_alloc(cxled, size);
if (rc)
return rc;
diff --git a/drivers/cxl/core/mbox.c b/drivers/cxl/core/mbox.c
index ec7b70ae5c06..85a1c1860a03 100644
--- a/drivers/cxl/core/mbox.c
+++ b/drivers/cxl/core/mbox.c
@@ -1097,10 +1097,6 @@ static int cxl_mem_get_partition_info(struct cxl_memdev_state *mds)
le64_to_cpu(pi.active_volatile_cap) * CXL_CAPACITY_MULTIPLIER;
mds->active_persistent_bytes =
le64_to_cpu(pi.active_persistent_cap) * CXL_CAPACITY_MULTIPLIER;
- mds->next_volatile_bytes =
- le64_to_cpu(pi.next_volatile_cap) * CXL_CAPACITY_MULTIPLIER;
- mds->next_persistent_bytes =
- le64_to_cpu(pi.next_volatile_cap) * CXL_CAPACITY_MULTIPLIER;
return 0;
}
@@ -1222,74 +1218,54 @@ int cxl_mem_sanitize(struct cxl_memdev *cxlmd, u16 cmd)
{
struct cxl_memdev_state *mds = to_cxl_memdev_state(cxlmd->cxlds);
struct cxl_port *endpoint;
- int rc;
/* synchronize with cxl_mem_probe() and decoder write operations */
guard(device)(&cxlmd->dev);
endpoint = cxlmd->endpoint;
- down_read(&cxl_region_rwsem);
+ guard(rwsem_read)(&cxl_region_rwsem);
/*
* Require an endpoint to be safe otherwise the driver can not
* be sure that the device is unmapped.
*/
if (endpoint && cxl_num_decoders_committed(endpoint) == 0)
- rc = __cxl_mem_sanitize(mds, cmd);
- else
- rc = -EBUSY;
- up_read(&cxl_region_rwsem);
+ return __cxl_mem_sanitize(mds, cmd);
- return rc;
+ return -EBUSY;
}
-static int add_dpa_res(struct device *dev, struct resource *parent,
- struct resource *res, resource_size_t start,
- resource_size_t size, const char *type)
+static void add_part(struct cxl_dpa_info *info, u64 start, u64 size, enum cxl_partition_mode mode)
{
- int rc;
-
- res->name = type;
- res->start = start;
- res->end = start + size - 1;
- res->flags = IORESOURCE_MEM;
- if (resource_size(res) == 0) {
- dev_dbg(dev, "DPA(%s): no capacity\n", res->name);
- return 0;
- }
- rc = request_resource(parent, res);
- if (rc) {
- dev_err(dev, "DPA(%s): failed to track %pr (%d)\n", res->name,
- res, rc);
- return rc;
- }
+ int i = info->nr_partitions;
- dev_dbg(dev, "DPA(%s): %pr\n", res->name, res);
+ if (size == 0)
+ return;
- return 0;
+ info->part[i].range = (struct range) {
+ .start = start,
+ .end = start + size - 1,
+ };
+ info->part[i].mode = mode;
+ info->nr_partitions++;
}
-int cxl_mem_create_range_info(struct cxl_memdev_state *mds)
+int cxl_mem_dpa_fetch(struct cxl_memdev_state *mds, struct cxl_dpa_info *info)
{
struct cxl_dev_state *cxlds = &mds->cxlds;
struct device *dev = cxlds->dev;
int rc;
if (!cxlds->media_ready) {
- cxlds->dpa_res = DEFINE_RES_MEM(0, 0);
- cxlds->ram_res = DEFINE_RES_MEM(0, 0);
- cxlds->pmem_res = DEFINE_RES_MEM(0, 0);
+ info->size = 0;
return 0;
}
- cxlds->dpa_res = DEFINE_RES_MEM(0, mds->total_bytes);
+ info->size = mds->total_bytes;
if (mds->partition_align_bytes == 0) {
- rc = add_dpa_res(dev, &cxlds->dpa_res, &cxlds->ram_res, 0,
- mds->volatile_only_bytes, "ram");
- if (rc)
- return rc;
- return add_dpa_res(dev, &cxlds->dpa_res, &cxlds->pmem_res,
- mds->volatile_only_bytes,
- mds->persistent_only_bytes, "pmem");
+ add_part(info, 0, mds->volatile_only_bytes, CXL_PARTMODE_RAM);
+ add_part(info, mds->volatile_only_bytes,
+ mds->persistent_only_bytes, CXL_PARTMODE_PMEM);
+ return 0;
}
rc = cxl_mem_get_partition_info(mds);
@@ -1298,15 +1274,13 @@ int cxl_mem_create_range_info(struct cxl_memdev_state *mds)
return rc;
}
- rc = add_dpa_res(dev, &cxlds->dpa_res, &cxlds->ram_res, 0,
- mds->active_volatile_bytes, "ram");
- if (rc)
- return rc;
- return add_dpa_res(dev, &cxlds->dpa_res, &cxlds->pmem_res,
- mds->active_volatile_bytes,
- mds->active_persistent_bytes, "pmem");
+ add_part(info, 0, mds->active_volatile_bytes, CXL_PARTMODE_RAM);
+ add_part(info, mds->active_volatile_bytes, mds->active_persistent_bytes,
+ CXL_PARTMODE_PMEM);
+
+ return 0;
}
-EXPORT_SYMBOL_NS_GPL(cxl_mem_create_range_info, "CXL");
+EXPORT_SYMBOL_NS_GPL(cxl_mem_dpa_fetch, "CXL");
int cxl_get_dirty_count(struct cxl_memdev_state *mds, u32 *count)
{
@@ -1489,8 +1463,6 @@ struct cxl_memdev_state *cxl_memdev_state_create(struct device *dev)
mds->cxlds.reg_map.host = dev;
mds->cxlds.reg_map.resource = CXL_RESOURCE_NONE;
mds->cxlds.type = CXL_DEVTYPE_CLASSMEM;
- mds->ram_perf.qos_class = CXL_QOS_CLASS_INVALID;
- mds->pmem_perf.qos_class = CXL_QOS_CLASS_INVALID;
return mds;
}
diff --git a/drivers/cxl/core/memdev.c b/drivers/cxl/core/memdev.c
index ae3dfcbe8938..f119309d0c9b 100644
--- a/drivers/cxl/core/memdev.c
+++ b/drivers/cxl/core/memdev.c
@@ -75,12 +75,20 @@ static ssize_t label_storage_size_show(struct device *dev,
}
static DEVICE_ATTR_RO(label_storage_size);
+static resource_size_t cxl_ram_size(struct cxl_dev_state *cxlds)
+{
+ /* Static RAM is only expected at partition 0. */
+ if (cxlds->part[0].mode != CXL_PARTMODE_RAM)
+ return 0;
+ return resource_size(&cxlds->part[0].res);
+}
+
static ssize_t ram_size_show(struct device *dev, struct device_attribute *attr,
char *buf)
{
struct cxl_memdev *cxlmd = to_cxl_memdev(dev);
struct cxl_dev_state *cxlds = cxlmd->cxlds;
- unsigned long long len = resource_size(&cxlds->ram_res);
+ unsigned long long len = cxl_ram_size(cxlds);
return sysfs_emit(buf, "%#llx\n", len);
}
@@ -93,7 +101,7 @@ static ssize_t pmem_size_show(struct device *dev, struct device_attribute *attr,
{
struct cxl_memdev *cxlmd = to_cxl_memdev(dev);
struct cxl_dev_state *cxlds = cxlmd->cxlds;
- unsigned long long len = resource_size(&cxlds->pmem_res);
+ unsigned long long len = cxl_pmem_size(cxlds);
return sysfs_emit(buf, "%#llx\n", len);
}
@@ -198,22 +206,17 @@ static int cxl_get_poison_by_memdev(struct cxl_memdev *cxlmd)
int rc = 0;
/* CXL 3.0 Spec 8.2.9.8.4.1 Separate pmem and ram poison requests */
- if (resource_size(&cxlds->pmem_res)) {
- offset = cxlds->pmem_res.start;
- length = resource_size(&cxlds->pmem_res);
- rc = cxl_mem_get_poison(cxlmd, offset, length, NULL);
- if (rc)
- return rc;
- }
- if (resource_size(&cxlds->ram_res)) {
- offset = cxlds->ram_res.start;
- length = resource_size(&cxlds->ram_res);
+ for (int i = 0; i < cxlds->nr_partitions; i++) {
+ const struct resource *res = &cxlds->part[i].res;
+
+ offset = res->start;
+ length = resource_size(res);
rc = cxl_mem_get_poison(cxlmd, offset, length, NULL);
/*
* Invalid Physical Address is not an error for
* volatile addresses. Device support is optional.
*/
- if (rc == -EFAULT)
+ if (rc == -EFAULT && cxlds->part[i].mode == CXL_PARTMODE_RAM)
rc = 0;
}
return rc;
@@ -404,14 +407,21 @@ static struct attribute *cxl_memdev_attributes[] = {
NULL,
};
+static struct cxl_dpa_perf *to_pmem_perf(struct cxl_dev_state *cxlds)
+{
+ for (int i = 0; i < cxlds->nr_partitions; i++)
+ if (cxlds->part[i].mode == CXL_PARTMODE_PMEM)
+ return &cxlds->part[i].perf;
+ return NULL;
+}
+
static ssize_t pmem_qos_class_show(struct device *dev,
struct device_attribute *attr, char *buf)
{
struct cxl_memdev *cxlmd = to_cxl_memdev(dev);
struct cxl_dev_state *cxlds = cxlmd->cxlds;
- struct cxl_memdev_state *mds = to_cxl_memdev_state(cxlds);
- return sysfs_emit(buf, "%d\n", mds->pmem_perf.qos_class);
+ return sysfs_emit(buf, "%d\n", to_pmem_perf(cxlds)->qos_class);
}
static struct device_attribute dev_attr_pmem_qos_class =
@@ -423,14 +433,20 @@ static struct attribute *cxl_memdev_pmem_attributes[] = {
NULL,
};
+static struct cxl_dpa_perf *to_ram_perf(struct cxl_dev_state *cxlds)
+{
+ if (cxlds->part[0].mode != CXL_PARTMODE_RAM)
+ return NULL;
+ return &cxlds->part[0].perf;
+}
+
static ssize_t ram_qos_class_show(struct device *dev,
struct device_attribute *attr, char *buf)
{
struct cxl_memdev *cxlmd = to_cxl_memdev(dev);
struct cxl_dev_state *cxlds = cxlmd->cxlds;
- struct cxl_memdev_state *mds = to_cxl_memdev_state(cxlds);
- return sysfs_emit(buf, "%d\n", mds->ram_perf.qos_class);
+ return sysfs_emit(buf, "%d\n", to_ram_perf(cxlds)->qos_class);
}
static struct device_attribute dev_attr_ram_qos_class =
@@ -466,11 +482,11 @@ static umode_t cxl_ram_visible(struct kobject *kobj, struct attribute *a, int n)
{
struct device *dev = kobj_to_dev(kobj);
struct cxl_memdev *cxlmd = to_cxl_memdev(dev);
- struct cxl_memdev_state *mds = to_cxl_memdev_state(cxlmd->cxlds);
+ struct cxl_dpa_perf *perf = to_ram_perf(cxlmd->cxlds);
- if (a == &dev_attr_ram_qos_class.attr)
- if (mds->ram_perf.qos_class == CXL_QOS_CLASS_INVALID)
- return 0;
+ if (a == &dev_attr_ram_qos_class.attr &&
+ (!perf || perf->qos_class == CXL_QOS_CLASS_INVALID))
+ return 0;
return a->mode;
}
@@ -485,11 +501,11 @@ static umode_t cxl_pmem_visible(struct kobject *kobj, struct attribute *a, int n
{
struct device *dev = kobj_to_dev(kobj);
struct cxl_memdev *cxlmd = to_cxl_memdev(dev);
- struct cxl_memdev_state *mds = to_cxl_memdev_state(cxlmd->cxlds);
+ struct cxl_dpa_perf *perf = to_pmem_perf(cxlmd->cxlds);
- if (a == &dev_attr_pmem_qos_class.attr)
- if (mds->pmem_perf.qos_class == CXL_QOS_CLASS_INVALID)
- return 0;
+ if (a == &dev_attr_pmem_qos_class.attr &&
+ (!perf || perf->qos_class == CXL_QOS_CLASS_INVALID))
+ return 0;
return a->mode;
}
@@ -564,10 +580,9 @@ EXPORT_SYMBOL_NS_GPL(is_cxl_memdev, "CXL");
void set_exclusive_cxl_commands(struct cxl_memdev_state *mds,
unsigned long *cmds)
{
- down_write(&cxl_memdev_rwsem);
+ guard(rwsem_write)(&cxl_memdev_rwsem);
bitmap_or(mds->exclusive_cmds, mds->exclusive_cmds, cmds,
CXL_MEM_COMMAND_ID_MAX);
- up_write(&cxl_memdev_rwsem);
}
EXPORT_SYMBOL_NS_GPL(set_exclusive_cxl_commands, "CXL");
@@ -579,10 +594,9 @@ EXPORT_SYMBOL_NS_GPL(set_exclusive_cxl_commands, "CXL");
void clear_exclusive_cxl_commands(struct cxl_memdev_state *mds,
unsigned long *cmds)
{
- down_write(&cxl_memdev_rwsem);
+ guard(rwsem_write)(&cxl_memdev_rwsem);
bitmap_andnot(mds->exclusive_cmds, mds->exclusive_cmds, cmds,
CXL_MEM_COMMAND_ID_MAX);
- up_write(&cxl_memdev_rwsem);
}
EXPORT_SYMBOL_NS_GPL(clear_exclusive_cxl_commands, "CXL");
@@ -590,9 +604,8 @@ static void cxl_memdev_shutdown(struct device *dev)
{
struct cxl_memdev *cxlmd = to_cxl_memdev(dev);
- down_write(&cxl_memdev_rwsem);
+ guard(rwsem_write)(&cxl_memdev_rwsem);
cxlmd->cxlds = NULL;
- up_write(&cxl_memdev_rwsem);
}
static void cxl_memdev_unregister(void *_cxlmd)
@@ -671,15 +684,13 @@ static long cxl_memdev_ioctl(struct file *file, unsigned int cmd,
{
struct cxl_memdev *cxlmd = file->private_data;
struct cxl_dev_state *cxlds;
- int rc = -ENXIO;
- down_read(&cxl_memdev_rwsem);
+ guard(rwsem_read)(&cxl_memdev_rwsem);
cxlds = cxlmd->cxlds;
if (cxlds && cxlds->type == CXL_DEVTYPE_CLASSMEM)
- rc = __cxl_memdev_ioctl(cxlmd, cmd, arg);
- up_read(&cxl_memdev_rwsem);
+ return __cxl_memdev_ioctl(cxlmd, cmd, arg);
- return rc;
+ return -ENXIO;
}
static int cxl_memdev_open(struct inode *inode, struct file *file)
diff --git a/drivers/cxl/core/port.c b/drivers/cxl/core/port.c
index 95cd6f11bbfa..0fd6646c1a2e 100644
--- a/drivers/cxl/core/port.c
+++ b/drivers/cxl/core/port.c
@@ -194,25 +194,35 @@ static ssize_t mode_show(struct device *dev, struct device_attribute *attr,
char *buf)
{
struct cxl_endpoint_decoder *cxled = to_cxl_endpoint_decoder(dev);
+ struct cxl_memdev *cxlmd = cxled_to_memdev(cxled);
+ struct cxl_dev_state *cxlds = cxlmd->cxlds;
+ /* without @cxl_dpa_rwsem, make sure @part is not reloaded */
+ int part = READ_ONCE(cxled->part);
+ const char *desc;
+
+ if (part < 0)
+ desc = "none";
+ else
+ desc = cxlds->part[part].res.name;
- return sysfs_emit(buf, "%s\n", cxl_decoder_mode_name(cxled->mode));
+ return sysfs_emit(buf, "%s\n", desc);
}
static ssize_t mode_store(struct device *dev, struct device_attribute *attr,
const char *buf, size_t len)
{
struct cxl_endpoint_decoder *cxled = to_cxl_endpoint_decoder(dev);
- enum cxl_decoder_mode mode;
+ enum cxl_partition_mode mode;
ssize_t rc;
if (sysfs_streq(buf, "pmem"))
- mode = CXL_DECODER_PMEM;
+ mode = CXL_PARTMODE_PMEM;
else if (sysfs_streq(buf, "ram"))
- mode = CXL_DECODER_RAM;
+ mode = CXL_PARTMODE_RAM;
else
return -EINVAL;
- rc = cxl_dpa_set_mode(cxled, mode);
+ rc = cxl_dpa_set_part(cxled, mode);
if (rc)
return rc;
@@ -549,13 +559,9 @@ static ssize_t decoders_committed_show(struct device *dev,
struct device_attribute *attr, char *buf)
{
struct cxl_port *port = to_cxl_port(dev);
- int rc;
- down_read(&cxl_region_rwsem);
- rc = sysfs_emit(buf, "%d\n", cxl_num_decoders_committed(port));
- up_read(&cxl_region_rwsem);
-
- return rc;
+ guard(rwsem_read)(&cxl_region_rwsem);
+ return sysfs_emit(buf, "%d\n", cxl_num_decoders_committed(port));
}
static DEVICE_ATTR_RO(decoders_committed);
@@ -1901,6 +1907,7 @@ struct cxl_endpoint_decoder *cxl_endpoint_decoder_alloc(struct cxl_port *port)
return ERR_PTR(-ENOMEM);
cxled->pos = -1;
+ cxled->part = -1;
cxld = &cxled->cxld;
rc = cxl_decoder_init(port, cxld);
if (rc) {
@@ -2341,8 +2348,14 @@ static __init int cxl_core_init(void)
if (rc)
goto err_region;
+ rc = cxl_ras_init();
+ if (rc)
+ goto err_ras;
+
return 0;
+err_ras:
+ cxl_region_exit();
err_region:
bus_unregister(&cxl_bus_type);
err_bus:
@@ -2354,6 +2367,7 @@ err_wq:
static void cxl_core_exit(void)
{
+ cxl_ras_exit();
cxl_region_exit();
bus_unregister(&cxl_bus_type);
destroy_workqueue(cxl_bus_wq);
diff --git a/drivers/cxl/core/ras.c b/drivers/cxl/core/ras.c
new file mode 100644
index 000000000000..485a831695c7
--- /dev/null
+++ b/drivers/cxl/core/ras.c
@@ -0,0 +1,119 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/* Copyright(c) 2025 AMD Corporation. All rights reserved. */
+
+#include <linux/pci.h>
+#include <linux/aer.h>
+#include <cxl/event.h>
+#include <cxlmem.h>
+#include "trace.h"
+
+static void cxl_cper_trace_corr_port_prot_err(struct pci_dev *pdev,
+ struct cxl_ras_capability_regs ras_cap)
+{
+ u32 status = ras_cap.cor_status & ~ras_cap.cor_mask;
+
+ trace_cxl_port_aer_correctable_error(&pdev->dev, status);
+}
+
+static void cxl_cper_trace_uncorr_port_prot_err(struct pci_dev *pdev,
+ struct cxl_ras_capability_regs ras_cap)
+{
+ u32 status = ras_cap.uncor_status & ~ras_cap.uncor_mask;
+ u32 fe;
+
+ if (hweight32(status) > 1)
+ fe = BIT(FIELD_GET(CXL_RAS_CAP_CONTROL_FE_MASK,
+ ras_cap.cap_control));
+ else
+ fe = status;
+
+ trace_cxl_port_aer_uncorrectable_error(&pdev->dev, status, fe,
+ ras_cap.header_log);
+}
+
+static void cxl_cper_trace_corr_prot_err(struct pci_dev *pdev,
+ struct cxl_ras_capability_regs ras_cap)
+{
+ u32 status = ras_cap.cor_status & ~ras_cap.cor_mask;
+ struct cxl_dev_state *cxlds;
+
+ cxlds = pci_get_drvdata(pdev);
+ if (!cxlds)
+ return;
+
+ trace_cxl_aer_correctable_error(cxlds->cxlmd, status);
+}
+
+static void cxl_cper_trace_uncorr_prot_err(struct pci_dev *pdev,
+ struct cxl_ras_capability_regs ras_cap)
+{
+ u32 status = ras_cap.uncor_status & ~ras_cap.uncor_mask;
+ struct cxl_dev_state *cxlds;
+ u32 fe;
+
+ cxlds = pci_get_drvdata(pdev);
+ if (!cxlds)
+ return;
+
+ if (hweight32(status) > 1)
+ fe = BIT(FIELD_GET(CXL_RAS_CAP_CONTROL_FE_MASK,
+ ras_cap.cap_control));
+ else
+ fe = status;
+
+ trace_cxl_aer_uncorrectable_error(cxlds->cxlmd, status, fe,
+ ras_cap.header_log);
+}
+
+static void cxl_cper_handle_prot_err(struct cxl_cper_prot_err_work_data *data)
+{
+ unsigned int devfn = PCI_DEVFN(data->prot_err.agent_addr.device,
+ data->prot_err.agent_addr.function);
+ struct pci_dev *pdev __free(pci_dev_put) =
+ pci_get_domain_bus_and_slot(data->prot_err.agent_addr.segment,
+ data->prot_err.agent_addr.bus,
+ devfn);
+ int port_type;
+
+ if (!pdev)
+ return;
+
+ guard(device)(&pdev->dev);
+
+ port_type = pci_pcie_type(pdev);
+ if (port_type == PCI_EXP_TYPE_ROOT_PORT ||
+ port_type == PCI_EXP_TYPE_DOWNSTREAM ||
+ port_type == PCI_EXP_TYPE_UPSTREAM) {
+ if (data->severity == AER_CORRECTABLE)
+ cxl_cper_trace_corr_port_prot_err(pdev, data->ras_cap);
+ else
+ cxl_cper_trace_uncorr_port_prot_err(pdev, data->ras_cap);
+
+ return;
+ }
+
+ if (data->severity == AER_CORRECTABLE)
+ cxl_cper_trace_corr_prot_err(pdev, data->ras_cap);
+ else
+ cxl_cper_trace_uncorr_prot_err(pdev, data->ras_cap);
+}
+
+static void cxl_cper_prot_err_work_fn(struct work_struct *work)
+{
+ struct cxl_cper_prot_err_work_data wd;
+
+ while (cxl_cper_prot_err_kfifo_get(&wd))
+ cxl_cper_handle_prot_err(&wd);
+}
+static DECLARE_WORK(cxl_cper_prot_err_work, cxl_cper_prot_err_work_fn);
+
+int cxl_ras_init(void)
+{
+ return cxl_cper_register_prot_err_work(&cxl_cper_prot_err_work);
+}
+
+void cxl_ras_exit(void)
+{
+ cxl_cper_unregister_prot_err_work(&cxl_cper_prot_err_work);
+ cancel_work_sync(&cxl_cper_prot_err_work);
+}
diff --git a/drivers/cxl/core/region.c b/drivers/cxl/core/region.c
index e8d11a988fd9..824d356d9d23 100644
--- a/drivers/cxl/core/region.c
+++ b/drivers/cxl/core/region.c
@@ -144,7 +144,7 @@ static ssize_t uuid_show(struct device *dev, struct device_attribute *attr,
rc = down_read_interruptible(&cxl_region_rwsem);
if (rc)
return rc;
- if (cxlr->mode != CXL_DECODER_PMEM)
+ if (cxlr->mode != CXL_PARTMODE_PMEM)
rc = sysfs_emit(buf, "\n");
else
rc = sysfs_emit(buf, "%pUb\n", &p->uuid);
@@ -441,7 +441,7 @@ static umode_t cxl_region_visible(struct kobject *kobj, struct attribute *a,
* Support tooling that expects to find a 'uuid' attribute for all
* regions regardless of mode.
*/
- if (a == &dev_attr_uuid.attr && cxlr->mode != CXL_DECODER_PMEM)
+ if (a == &dev_attr_uuid.attr && cxlr->mode != CXL_PARTMODE_PMEM)
return 0444;
return a->mode;
}
@@ -603,8 +603,16 @@ static ssize_t mode_show(struct device *dev, struct device_attribute *attr,
char *buf)
{
struct cxl_region *cxlr = to_cxl_region(dev);
+ const char *desc;
- return sysfs_emit(buf, "%s\n", cxl_decoder_mode_name(cxlr->mode));
+ if (cxlr->mode == CXL_PARTMODE_RAM)
+ desc = "ram";
+ else if (cxlr->mode == CXL_PARTMODE_PMEM)
+ desc = "pmem";
+ else
+ desc = "";
+
+ return sysfs_emit(buf, "%s\n", desc);
}
static DEVICE_ATTR_RO(mode);
@@ -630,7 +638,7 @@ static int alloc_hpa(struct cxl_region *cxlr, resource_size_t size)
/* ways, granularity and uuid (if PMEM) need to be set before HPA */
if (!p->interleave_ways || !p->interleave_granularity ||
- (cxlr->mode == CXL_DECODER_PMEM && uuid_is_null(&p->uuid)))
+ (cxlr->mode == CXL_PARTMODE_PMEM && uuid_is_null(&p->uuid)))
return -ENXIO;
div64_u64_rem(size, (u64)SZ_256M * p->interleave_ways, &remainder);
@@ -1888,6 +1896,7 @@ static int cxl_region_attach(struct cxl_region *cxlr,
{
struct cxl_root_decoder *cxlrd = to_cxl_root_decoder(cxlr->dev.parent);
struct cxl_memdev *cxlmd = cxled_to_memdev(cxled);
+ struct cxl_dev_state *cxlds = cxlmd->cxlds;
struct cxl_region_params *p = &cxlr->params;
struct cxl_port *ep_port, *root_port;
struct cxl_dport *dport;
@@ -1902,17 +1911,17 @@ static int cxl_region_attach(struct cxl_region *cxlr,
return rc;
}
- if (cxled->mode != cxlr->mode) {
- dev_dbg(&cxlr->dev, "%s region mode: %d mismatch: %d\n",
- dev_name(&cxled->cxld.dev), cxlr->mode, cxled->mode);
- return -EINVAL;
- }
-
- if (cxled->mode == CXL_DECODER_DEAD) {
+ if (cxled->part < 0) {
dev_dbg(&cxlr->dev, "%s dead\n", dev_name(&cxled->cxld.dev));
return -ENODEV;
}
+ if (cxlds->part[cxled->part].mode != cxlr->mode) {
+ dev_dbg(&cxlr->dev, "%s region mode: %d mismatch\n",
+ dev_name(&cxled->cxld.dev), cxlr->mode);
+ return -EINVAL;
+ }
+
/* all full of members, or interleave config not established? */
if (p->state > CXL_CONFIG_INTERLEAVE_ACTIVE) {
dev_dbg(&cxlr->dev, "region already active\n");
@@ -2115,7 +2124,7 @@ out:
void cxl_decoder_kill_region(struct cxl_endpoint_decoder *cxled)
{
down_write(&cxl_region_rwsem);
- cxled->mode = CXL_DECODER_DEAD;
+ cxled->part = -1;
cxl_region_detach(cxled);
up_write(&cxl_region_rwsem);
}
@@ -2471,7 +2480,7 @@ static int cxl_region_calculate_adistance(struct notifier_block *nb,
*/
static struct cxl_region *devm_cxl_add_region(struct cxl_root_decoder *cxlrd,
int id,
- enum cxl_decoder_mode mode,
+ enum cxl_partition_mode mode,
enum cxl_decoder_type type)
{
struct cxl_port *port = to_cxl_port(cxlrd->cxlsd.cxld.dev.parent);
@@ -2525,13 +2534,13 @@ static ssize_t create_ram_region_show(struct device *dev,
}
static struct cxl_region *__create_region(struct cxl_root_decoder *cxlrd,
- enum cxl_decoder_mode mode, int id)
+ enum cxl_partition_mode mode, int id)
{
int rc;
switch (mode) {
- case CXL_DECODER_RAM:
- case CXL_DECODER_PMEM:
+ case CXL_PARTMODE_RAM:
+ case CXL_PARTMODE_PMEM:
break;
default:
dev_err(&cxlrd->cxlsd.cxld.dev, "unsupported mode %d\n", mode);
@@ -2551,7 +2560,7 @@ static struct cxl_region *__create_region(struct cxl_root_decoder *cxlrd,
}
static ssize_t create_region_store(struct device *dev, const char *buf,
- size_t len, enum cxl_decoder_mode mode)
+ size_t len, enum cxl_partition_mode mode)
{
struct cxl_root_decoder *cxlrd = to_cxl_root_decoder(dev);
struct cxl_region *cxlr;
@@ -2572,7 +2581,7 @@ static ssize_t create_pmem_region_store(struct device *dev,
struct device_attribute *attr,
const char *buf, size_t len)
{
- return create_region_store(dev, buf, len, CXL_DECODER_PMEM);
+ return create_region_store(dev, buf, len, CXL_PARTMODE_PMEM);
}
DEVICE_ATTR_RW(create_pmem_region);
@@ -2580,7 +2589,7 @@ static ssize_t create_ram_region_store(struct device *dev,
struct device_attribute *attr,
const char *buf, size_t len)
{
- return create_region_store(dev, buf, len, CXL_DECODER_RAM);
+ return create_region_store(dev, buf, len, CXL_PARTMODE_RAM);
}
DEVICE_ATTR_RW(create_ram_region);
@@ -2678,7 +2687,7 @@ EXPORT_SYMBOL_NS_GPL(to_cxl_pmem_region, "CXL");
struct cxl_poison_context {
struct cxl_port *port;
- enum cxl_decoder_mode mode;
+ int part;
u64 offset;
};
@@ -2686,47 +2695,45 @@ static int cxl_get_poison_unmapped(struct cxl_memdev *cxlmd,
struct cxl_poison_context *ctx)
{
struct cxl_dev_state *cxlds = cxlmd->cxlds;
+ const struct resource *res;
+ struct resource *p, *last;
u64 offset, length;
int rc = 0;
+ if (ctx->part < 0)
+ return 0;
+
/*
- * Collect poison for the remaining unmapped resources
- * after poison is collected by committed endpoints.
- *
- * Knowing that PMEM must always follow RAM, get poison
- * for unmapped resources based on the last decoder's mode:
- * ram: scan remains of ram range, then any pmem range
- * pmem: scan remains of pmem range
+ * Collect poison for the remaining unmapped resources after
+ * poison is collected by committed endpoints decoders.
*/
-
- if (ctx->mode == CXL_DECODER_RAM) {
- offset = ctx->offset;
- length = resource_size(&cxlds->ram_res) - offset;
+ for (int i = ctx->part; i < cxlds->nr_partitions; i++) {
+ res = &cxlds->part[i].res;
+ for (p = res->child, last = NULL; p; p = p->sibling)
+ last = p;
+ if (last)
+ offset = last->end + 1;
+ else
+ offset = res->start;
+ length = res->end - offset + 1;
+ if (!length)
+ break;
rc = cxl_mem_get_poison(cxlmd, offset, length, NULL);
- if (rc == -EFAULT)
- rc = 0;
+ if (rc == -EFAULT && cxlds->part[i].mode == CXL_PARTMODE_RAM)
+ continue;
if (rc)
- return rc;
- }
- if (ctx->mode == CXL_DECODER_PMEM) {
- offset = ctx->offset;
- length = resource_size(&cxlds->dpa_res) - offset;
- if (!length)
- return 0;
- } else if (resource_size(&cxlds->pmem_res)) {
- offset = cxlds->pmem_res.start;
- length = resource_size(&cxlds->pmem_res);
- } else {
- return 0;
+ break;
}
- return cxl_mem_get_poison(cxlmd, offset, length, NULL);
+ return rc;
}
static int poison_by_decoder(struct device *dev, void *arg)
{
struct cxl_poison_context *ctx = arg;
struct cxl_endpoint_decoder *cxled;
+ enum cxl_partition_mode mode;
+ struct cxl_dev_state *cxlds;
struct cxl_memdev *cxlmd;
u64 offset, length;
int rc = 0;
@@ -2735,27 +2742,18 @@ static int poison_by_decoder(struct device *dev, void *arg)
return rc;
cxled = to_cxl_endpoint_decoder(dev);
- if (!cxled->dpa_res || !resource_size(cxled->dpa_res))
+ if (!cxled->dpa_res)
return rc;
- /*
- * Regions are only created with single mode decoders: pmem or ram.
- * Linux does not support mixed mode decoders. This means that
- * reading poison per endpoint decoder adheres to the requirement
- * that poison reads of pmem and ram must be separated.
- * CXL 3.0 Spec 8.2.9.8.4.1
- */
- if (cxled->mode == CXL_DECODER_MIXED) {
- dev_dbg(dev, "poison list read unsupported in mixed mode\n");
- return rc;
- }
-
cxlmd = cxled_to_memdev(cxled);
+ cxlds = cxlmd->cxlds;
+ mode = cxlds->part[cxled->part].mode;
+
if (cxled->skip) {
offset = cxled->dpa_res->start - cxled->skip;
length = cxled->skip;
rc = cxl_mem_get_poison(cxlmd, offset, length, NULL);
- if (rc == -EFAULT && cxled->mode == CXL_DECODER_RAM)
+ if (rc == -EFAULT && mode == CXL_PARTMODE_RAM)
rc = 0;
if (rc)
return rc;
@@ -2764,7 +2762,7 @@ static int poison_by_decoder(struct device *dev, void *arg)
offset = cxled->dpa_res->start;
length = cxled->dpa_res->end - offset + 1;
rc = cxl_mem_get_poison(cxlmd, offset, length, cxled->cxld.region);
- if (rc == -EFAULT && cxled->mode == CXL_DECODER_RAM)
+ if (rc == -EFAULT && mode == CXL_PARTMODE_RAM)
rc = 0;
if (rc)
return rc;
@@ -2772,7 +2770,7 @@ static int poison_by_decoder(struct device *dev, void *arg)
/* Iterate until commit_end is reached */
if (cxled->cxld.id == ctx->port->commit_end) {
ctx->offset = cxled->dpa_res->end + 1;
- ctx->mode = cxled->mode;
+ ctx->part = cxled->part;
return 1;
}
@@ -2785,7 +2783,8 @@ int cxl_get_poison_by_endpoint(struct cxl_port *port)
int rc = 0;
ctx = (struct cxl_poison_context) {
- .port = port
+ .port = port,
+ .part = -1,
};
rc = device_for_each_child(&port->dev, &ctx, poison_by_decoder);
@@ -3038,17 +3037,13 @@ static struct cxl_dax_region *cxl_dax_region_alloc(struct cxl_region *cxlr)
struct cxl_dax_region *cxlr_dax;
struct device *dev;
- down_read(&cxl_region_rwsem);
- if (p->state != CXL_CONFIG_COMMIT) {
- cxlr_dax = ERR_PTR(-ENXIO);
- goto out;
- }
+ guard(rwsem_read)(&cxl_region_rwsem);
+ if (p->state != CXL_CONFIG_COMMIT)
+ return ERR_PTR(-ENXIO);
cxlr_dax = kzalloc(sizeof(*cxlr_dax), GFP_KERNEL);
- if (!cxlr_dax) {
- cxlr_dax = ERR_PTR(-ENOMEM);
- goto out;
- }
+ if (!cxlr_dax)
+ return ERR_PTR(-ENOMEM);
cxlr_dax->hpa_range.start = p->res->start;
cxlr_dax->hpa_range.end = p->res->end;
@@ -3061,8 +3056,6 @@ static struct cxl_dax_region *cxl_dax_region_alloc(struct cxl_region *cxlr)
dev->parent = &cxlr->dev;
dev->bus = &cxl_bus_type;
dev->type = &cxl_dax_region_type;
-out:
- up_read(&cxl_region_rwsem);
return cxlr_dax;
}
@@ -3208,7 +3201,6 @@ static int match_region_by_range(struct device *dev, const void *data)
struct cxl_region_params *p;
struct cxl_region *cxlr;
const struct range *r = data;
- int rc = 0;
if (!is_cxl_region(dev))
return 0;
@@ -3216,57 +3208,38 @@ static int match_region_by_range(struct device *dev, const void *data)
cxlr = to_cxl_region(dev);
p = &cxlr->params;
- down_read(&cxl_region_rwsem);
+ guard(rwsem_read)(&cxl_region_rwsem);
if (p->res && p->res->start == r->start && p->res->end == r->end)
- rc = 1;
- up_read(&cxl_region_rwsem);
+ return 1;
- return rc;
+ return 0;
}
-/* Establish an empty region covering the given HPA range */
-static struct cxl_region *construct_region(struct cxl_root_decoder *cxlrd,
- struct cxl_endpoint_decoder *cxled)
+static int __construct_region(struct cxl_region *cxlr,
+ struct cxl_root_decoder *cxlrd,
+ struct cxl_endpoint_decoder *cxled)
{
struct cxl_memdev *cxlmd = cxled_to_memdev(cxled);
- struct cxl_port *port = cxlrd_to_port(cxlrd);
struct range *hpa = &cxled->cxld.hpa_range;
struct cxl_region_params *p;
- struct cxl_region *cxlr;
struct resource *res;
int rc;
- do {
- cxlr = __create_region(cxlrd, cxled->mode,
- atomic_read(&cxlrd->region_id));
- } while (IS_ERR(cxlr) && PTR_ERR(cxlr) == -EBUSY);
-
- if (IS_ERR(cxlr)) {
- dev_err(cxlmd->dev.parent,
- "%s:%s: %s failed assign region: %ld\n",
- dev_name(&cxlmd->dev), dev_name(&cxled->cxld.dev),
- __func__, PTR_ERR(cxlr));
- return cxlr;
- }
-
- down_write(&cxl_region_rwsem);
+ guard(rwsem_write)(&cxl_region_rwsem);
p = &cxlr->params;
if (p->state >= CXL_CONFIG_INTERLEAVE_ACTIVE) {
dev_err(cxlmd->dev.parent,
"%s:%s: %s autodiscovery interrupted\n",
dev_name(&cxlmd->dev), dev_name(&cxled->cxld.dev),
__func__);
- rc = -EBUSY;
- goto err;
+ return -EBUSY;
}
set_bit(CXL_REGION_F_AUTO, &cxlr->flags);
res = kmalloc(sizeof(*res), GFP_KERNEL);
- if (!res) {
- rc = -ENOMEM;
- goto err;
- }
+ if (!res)
+ return -ENOMEM;
*res = DEFINE_RES_MEM_NAMED(hpa->start, range_len(hpa),
dev_name(&cxlr->dev));
@@ -3289,7 +3262,7 @@ static struct cxl_region *construct_region(struct cxl_root_decoder *cxlrd,
rc = sysfs_update_group(&cxlr->dev.kobj, get_cxl_region_target_group());
if (rc)
- goto err;
+ return rc;
dev_dbg(cxlmd->dev.parent, "%s:%s: %s %s res: %pr iw: %d ig: %d\n",
dev_name(&cxlmd->dev), dev_name(&cxled->cxld.dev), __func__,
@@ -3298,14 +3271,40 @@ static struct cxl_region *construct_region(struct cxl_root_decoder *cxlrd,
/* ...to match put_device() in cxl_add_to_region() */
get_device(&cxlr->dev);
- up_write(&cxl_region_rwsem);
- return cxlr;
+ return 0;
+}
-err:
- up_write(&cxl_region_rwsem);
- devm_release_action(port->uport_dev, unregister_region, cxlr);
- return ERR_PTR(rc);
+/* Establish an empty region covering the given HPA range */
+static struct cxl_region *construct_region(struct cxl_root_decoder *cxlrd,
+ struct cxl_endpoint_decoder *cxled)
+{
+ struct cxl_memdev *cxlmd = cxled_to_memdev(cxled);
+ struct cxl_port *port = cxlrd_to_port(cxlrd);
+ struct cxl_dev_state *cxlds = cxlmd->cxlds;
+ int rc, part = READ_ONCE(cxled->part);
+ struct cxl_region *cxlr;
+
+ do {
+ cxlr = __create_region(cxlrd, cxlds->part[part].mode,
+ atomic_read(&cxlrd->region_id));
+ } while (IS_ERR(cxlr) && PTR_ERR(cxlr) == -EBUSY);
+
+ if (IS_ERR(cxlr)) {
+ dev_err(cxlmd->dev.parent,
+ "%s:%s: %s failed assign region: %ld\n",
+ dev_name(&cxlmd->dev), dev_name(&cxled->cxld.dev),
+ __func__, PTR_ERR(cxlr));
+ return cxlr;
+ }
+
+ rc = __construct_region(cxlr, cxlrd, cxled);
+ if (rc) {
+ devm_release_action(port->uport_dev, unregister_region, cxlr);
+ return ERR_PTR(rc);
+ }
+
+ return cxlr;
}
int cxl_add_to_region(struct cxl_port *root, struct cxl_endpoint_decoder *cxled)
@@ -3440,9 +3439,9 @@ out:
return rc;
switch (cxlr->mode) {
- case CXL_DECODER_PMEM:
+ case CXL_PARTMODE_PMEM:
return devm_cxl_add_pmem_region(cxlr);
- case CXL_DECODER_RAM:
+ case CXL_PARTMODE_RAM:
/*
* The region can not be manged by CXL if any portion of
* it is already online as 'System RAM'
diff --git a/drivers/cxl/core/trace.h b/drivers/cxl/core/trace.h
index cea706b683b5..342ae8d50f32 100644
--- a/drivers/cxl/core/trace.h
+++ b/drivers/cxl/core/trace.h
@@ -48,6 +48,34 @@
{ CXL_RAS_UC_IDE_RX_ERR, "IDE Rx Error" } \
)
+TRACE_EVENT(cxl_port_aer_uncorrectable_error,
+ TP_PROTO(struct device *dev, u32 status, u32 fe, u32 *hl),
+ TP_ARGS(dev, status, fe, hl),
+ TP_STRUCT__entry(
+ __string(device, dev_name(dev))
+ __string(host, dev_name(dev->parent))
+ __field(u32, status)
+ __field(u32, first_error)
+ __array(u32, header_log, CXL_HEADERLOG_SIZE_U32)
+ ),
+ TP_fast_assign(
+ __assign_str(device);
+ __assign_str(host);
+ __entry->status = status;
+ __entry->first_error = fe;
+ /*
+ * Embed the 512B headerlog data for user app retrieval and
+ * parsing, but no need to print this in the trace buffer.
+ */
+ memcpy(__entry->header_log, hl, CXL_HEADERLOG_SIZE);
+ ),
+ TP_printk("device=%s host=%s status: '%s' first_error: '%s'",
+ __get_str(device), __get_str(host),
+ show_uc_errs(__entry->status),
+ show_uc_errs(__entry->first_error)
+ )
+);
+
TRACE_EVENT(cxl_aer_uncorrectable_error,
TP_PROTO(const struct cxl_memdev *cxlmd, u32 status, u32 fe, u32 *hl),
TP_ARGS(cxlmd, status, fe, hl),
@@ -96,6 +124,25 @@ TRACE_EVENT(cxl_aer_uncorrectable_error,
{ CXL_RAS_CE_PHYS_LAYER_ERR, "Received Error From Physical Layer" } \
)
+TRACE_EVENT(cxl_port_aer_correctable_error,
+ TP_PROTO(struct device *dev, u32 status),
+ TP_ARGS(dev, status),
+ TP_STRUCT__entry(
+ __string(device, dev_name(dev))
+ __string(host, dev_name(dev->parent))
+ __field(u32, status)
+ ),
+ TP_fast_assign(
+ __assign_str(device);
+ __assign_str(host);
+ __entry->status = status;
+ ),
+ TP_printk("device=%s host=%s status='%s'",
+ __get_str(device), __get_str(host),
+ show_ce_errs(__entry->status)
+ )
+);
+
TRACE_EVENT(cxl_aer_correctable_error,
TP_PROTO(const struct cxl_memdev *cxlmd, u32 status),
TP_ARGS(cxlmd, status),
diff --git a/drivers/cxl/cxl.h b/drivers/cxl/cxl.h
index b265347cedce..8bdfa536262e 100644
--- a/drivers/cxl/cxl.h
+++ b/drivers/cxl/cxl.h
@@ -373,32 +373,6 @@ struct cxl_decoder {
};
/*
- * CXL_DECODER_DEAD prevents endpoints from being reattached to regions
- * while cxld_unregister() is running
- */
-enum cxl_decoder_mode {
- CXL_DECODER_NONE,
- CXL_DECODER_RAM,
- CXL_DECODER_PMEM,
- CXL_DECODER_MIXED,
- CXL_DECODER_DEAD,
-};
-
-static inline const char *cxl_decoder_mode_name(enum cxl_decoder_mode mode)
-{
- static const char * const names[] = {
- [CXL_DECODER_NONE] = "none",
- [CXL_DECODER_RAM] = "ram",
- [CXL_DECODER_PMEM] = "pmem",
- [CXL_DECODER_MIXED] = "mixed",
- };
-
- if (mode >= CXL_DECODER_NONE && mode <= CXL_DECODER_MIXED)
- return names[mode];
- return "mixed";
-}
-
-/*
* Track whether this decoder is reserved for region autodiscovery, or
* free for userspace provisioning.
*/
@@ -412,16 +386,16 @@ enum cxl_decoder_state {
* @cxld: base cxl_decoder_object
* @dpa_res: actively claimed DPA span of this decoder
* @skip: offset into @dpa_res where @cxld.hpa_range maps
- * @mode: which memory type / access-mode-partition this decoder targets
* @state: autodiscovery state
+ * @part: partition index this decoder maps
* @pos: interleave position in @cxld.region
*/
struct cxl_endpoint_decoder {
struct cxl_decoder cxld;
struct resource *dpa_res;
resource_size_t skip;
- enum cxl_decoder_mode mode;
enum cxl_decoder_state state;
+ int part;
int pos;
};
@@ -506,6 +480,11 @@ struct cxl_region_params {
int nr_targets;
};
+enum cxl_partition_mode {
+ CXL_PARTMODE_RAM,
+ CXL_PARTMODE_PMEM,
+};
+
/*
* Indicate whether this region has been assembled by autodetection or
* userspace assembly. Prevent endpoint decoders outside of automatic
@@ -525,7 +504,7 @@ struct cxl_region_params {
* struct cxl_region - CXL region
* @dev: This region's device
* @id: This region's id. Id is globally unique across all regions
- * @mode: Endpoint decoder allocation / access mode
+ * @mode: Operational mode of the mapped capacity
* @type: Endpoint decoder target type
* @cxl_nvb: nvdimm bridge for coordinating @cxlr_pmem setup / shutdown
* @cxlr_pmem: (for pmem regions) cached copy of the nvdimm bridge
@@ -538,7 +517,7 @@ struct cxl_region_params {
struct cxl_region {
struct device dev;
int id;
- enum cxl_decoder_mode mode;
+ enum cxl_partition_mode mode;
enum cxl_decoder_type type;
struct cxl_nvdimm_bridge *cxl_nvb;
struct cxl_pmem_region *cxlr_pmem;
diff --git a/drivers/cxl/cxlmem.h b/drivers/cxl/cxlmem.h
index 236e418d26f4..a87939c8d394 100644
--- a/drivers/cxl/cxlmem.h
+++ b/drivers/cxl/cxlmem.h
@@ -97,6 +97,19 @@ int devm_cxl_dpa_reserve(struct cxl_endpoint_decoder *cxled,
resource_size_t base, resource_size_t len,
resource_size_t skipped);
+#define CXL_NR_PARTITIONS_MAX 2
+
+struct cxl_dpa_info {
+ u64 size;
+ struct cxl_dpa_part_info {
+ struct range range;
+ enum cxl_partition_mode mode;
+ } part[CXL_NR_PARTITIONS_MAX];
+ int nr_partitions;
+};
+
+int cxl_dpa_setup(struct cxl_dev_state *cxlds, const struct cxl_dpa_info *info);
+
static inline struct cxl_ep *cxl_ep_load(struct cxl_port *port,
struct cxl_memdev *cxlmd)
{
@@ -409,6 +422,18 @@ struct cxl_dpa_perf {
};
/**
+ * struct cxl_dpa_partition - DPA partition descriptor
+ * @res: shortcut to the partition in the DPA resource tree (cxlds->dpa_res)
+ * @perf: performance attributes of the partition from CDAT
+ * @mode: operation mode for the DPA capacity, e.g. ram, pmem, dynamic...
+ */
+struct cxl_dpa_partition {
+ struct resource res;
+ struct cxl_dpa_perf perf;
+ enum cxl_partition_mode mode;
+};
+
+/**
* struct cxl_dev_state - The driver device state
*
* cxl_dev_state represents the CXL driver/device state. It provides an
@@ -423,8 +448,8 @@ struct cxl_dpa_perf {
* @rcd: operating in RCD mode (CXL 3.0 9.11.8 CXL Devices Attached to an RCH)
* @media_ready: Indicate whether the device media is usable
* @dpa_res: Overall DPA resource tree for the device
- * @pmem_res: Active Persistent memory capacity configuration
- * @ram_res: Active Volatile memory capacity configuration
+ * @part: DPA partition array
+ * @nr_partitions: Number of DPA partitions
* @serial: PCIe Device Serial Number
* @type: Generic Memory Class device or Vendor Specific Memory device
* @cxl_mbox: CXL mailbox context
@@ -438,13 +463,25 @@ struct cxl_dev_state {
bool rcd;
bool media_ready;
struct resource dpa_res;
- struct resource pmem_res;
- struct resource ram_res;
+ struct cxl_dpa_partition part[CXL_NR_PARTITIONS_MAX];
+ unsigned int nr_partitions;
u64 serial;
enum cxl_devtype type;
struct cxl_mailbox cxl_mbox;
};
+static inline resource_size_t cxl_pmem_size(struct cxl_dev_state *cxlds)
+{
+ /*
+ * Static PMEM may be at partition index 0 when there is no static RAM
+ * capacity.
+ */
+ for (int i = 0; i < cxlds->nr_partitions; i++)
+ if (cxlds->part[i].mode == CXL_PARTMODE_PMEM)
+ return resource_size(&cxlds->part[i].res);
+ return 0;
+}
+
static inline struct cxl_dev_state *mbox_to_cxlds(struct cxl_mailbox *cxl_mbox)
{
return dev_get_drvdata(cxl_mbox->host);
@@ -469,10 +506,6 @@ static inline struct cxl_dev_state *mbox_to_cxlds(struct cxl_mailbox *cxl_mbox)
* @partition_align_bytes: alignment size for partition-able capacity
* @active_volatile_bytes: sum of hard + soft volatile
* @active_persistent_bytes: sum of hard + soft persistent
- * @next_volatile_bytes: volatile capacity change pending device reset
- * @next_persistent_bytes: persistent capacity change pending device reset
- * @ram_perf: performance data entry matched to RAM partition
- * @pmem_perf: performance data entry matched to PMEM partition
* @event: event log driver state
* @poison: poison driver state info
* @security: security driver state info
@@ -493,11 +526,6 @@ struct cxl_memdev_state {
u64 partition_align_bytes;
u64 active_volatile_bytes;
u64 active_persistent_bytes;
- u64 next_volatile_bytes;
- u64 next_persistent_bytes;
-
- struct cxl_dpa_perf ram_perf;
- struct cxl_dpa_perf pmem_perf;
struct cxl_event_state event;
struct cxl_poison_state poison;
@@ -835,7 +863,7 @@ int cxl_internal_send_cmd(struct cxl_mailbox *cxl_mbox,
int cxl_dev_state_identify(struct cxl_memdev_state *mds);
int cxl_await_media_ready(struct cxl_dev_state *cxlds);
int cxl_enumerate_cmds(struct cxl_memdev_state *mds);
-int cxl_mem_create_range_info(struct cxl_memdev_state *mds);
+int cxl_mem_dpa_fetch(struct cxl_memdev_state *mds, struct cxl_dpa_info *info);
struct cxl_memdev_state *cxl_memdev_state_create(struct device *dev);
void set_exclusive_cxl_commands(struct cxl_memdev_state *mds,
unsigned long *cmds);
diff --git a/drivers/cxl/mem.c b/drivers/cxl/mem.c
index 2f03a4d5606e..9675243bd05b 100644
--- a/drivers/cxl/mem.c
+++ b/drivers/cxl/mem.c
@@ -152,7 +152,7 @@ static int cxl_mem_probe(struct device *dev)
return -ENXIO;
}
- if (resource_size(&cxlds->pmem_res) && IS_ENABLED(CONFIG_CXL_PMEM)) {
+ if (cxl_pmem_size(cxlds) && IS_ENABLED(CONFIG_CXL_PMEM)) {
rc = devm_cxl_add_nvdimm(parent_port, cxlmd);
if (rc) {
if (rc == -ENODEV)
diff --git a/drivers/cxl/pci.c b/drivers/cxl/pci.c
index a96e54c6259e..b2c943a4de0a 100644
--- a/drivers/cxl/pci.c
+++ b/drivers/cxl/pci.c
@@ -903,6 +903,7 @@ __ATTRIBUTE_GROUPS(cxl_rcd);
static int cxl_pci_probe(struct pci_dev *pdev, const struct pci_device_id *id)
{
struct pci_host_bridge *host_bridge = pci_find_host_bridge(pdev->bus);
+ struct cxl_dpa_info range_info = { 0 };
struct cxl_memdev_state *mds;
struct cxl_dev_state *cxlds;
struct cxl_register_map map;
@@ -993,7 +994,11 @@ static int cxl_pci_probe(struct pci_dev *pdev, const struct pci_device_id *id)
if (rc)
return rc;
- rc = cxl_mem_create_range_info(mds);
+ rc = cxl_mem_dpa_fetch(mds, &range_info);
+ if (rc)
+ return rc;
+
+ rc = cxl_dpa_setup(cxlds, &range_info);
if (rc)
return rc;
diff --git a/drivers/cxl/pmem.c b/drivers/cxl/pmem.c
index 81db5d629049..d061fe3d2b86 100644
--- a/drivers/cxl/pmem.c
+++ b/drivers/cxl/pmem.c
@@ -444,6 +444,16 @@ static int cxl_pmem_region_probe(struct device *dev)
goto out_nvd;
}
+ if (cxlds->serial == 0) {
+ /* include missing alongside invalid in this error message. */
+ dev_err(dev, "%s: invalid or missing serial number\n",
+ dev_name(&cxlmd->dev));
+ rc = -ENXIO;
+ goto out_nvd;
+ }
+ info[i].serial = cxlds->serial;
+ info[i].offset = m->start;
+
m->cxl_nvd = cxl_nvd;
mappings[i] = (struct nd_mapping_desc) {
.nvdimm = nvdimm,
@@ -451,8 +461,6 @@ static int cxl_pmem_region_probe(struct device *dev)
.size = m->size,
.position = i,
};
- info[i].offset = m->start;
- info[i].serial = cxlds->serial;
}
ndr_desc.num_mappings = cxlr_pmem->nr_mappings;
ndr_desc.mapping = mappings;
diff --git a/drivers/firmware/efi/cper.c b/drivers/firmware/efi/cper.c
index b69e68ef3f02..928409199a1a 100644
--- a/drivers/firmware/efi/cper.c
+++ b/drivers/firmware/efi/cper.c
@@ -24,7 +24,7 @@
#include <linux/bcd.h>
#include <acpi/ghes.h>
#include <ras/ras_event.h>
-#include "cper_cxl.h"
+#include <cxl/event.h>
/*
* CPER record ID need to be unique even after reboot, because record
@@ -624,11 +624,11 @@ cper_estatus_print_section(const char *pfx, struct acpi_hest_generic_data *gdata
else
goto err_section_too_small;
} else if (guid_equal(sec_type, &CPER_SEC_CXL_PROT_ERR)) {
- struct cper_sec_prot_err *prot_err = acpi_hest_get_payload(gdata);
+ struct cxl_cper_sec_prot_err *prot_err = acpi_hest_get_payload(gdata);
printk("%ssection_type: CXL Protocol Error\n", newpfx);
if (gdata->error_data_length >= sizeof(*prot_err))
- cper_print_prot_err(newpfx, prot_err);
+ cxl_cper_print_prot_err(newpfx, prot_err);
else
goto err_section_too_small;
} else {
diff --git a/drivers/firmware/efi/cper_cxl.c b/drivers/firmware/efi/cper_cxl.c
index a55771b99a97..8a7667faf953 100644
--- a/drivers/firmware/efi/cper_cxl.c
+++ b/drivers/firmware/efi/cper_cxl.c
@@ -8,26 +8,7 @@
*/
#include <linux/cper.h>
-#include "cper_cxl.h"
-
-#define PROT_ERR_VALID_AGENT_TYPE BIT_ULL(0)
-#define PROT_ERR_VALID_AGENT_ADDRESS BIT_ULL(1)
-#define PROT_ERR_VALID_DEVICE_ID BIT_ULL(2)
-#define PROT_ERR_VALID_SERIAL_NUMBER BIT_ULL(3)
-#define PROT_ERR_VALID_CAPABILITY BIT_ULL(4)
-#define PROT_ERR_VALID_DVSEC BIT_ULL(5)
-#define PROT_ERR_VALID_ERROR_LOG BIT_ULL(6)
-
-/* CXL RAS Capability Structure, CXL v3.0 sec 8.2.4.16 */
-struct cxl_ras_capability_regs {
- u32 uncor_status;
- u32 uncor_mask;
- u32 uncor_severity;
- u32 cor_status;
- u32 cor_mask;
- u32 cap_control;
- u32 header_log[16];
-};
+#include <cxl/event.h>
static const char * const prot_err_agent_type_strs[] = {
"Restricted CXL Device",
@@ -40,22 +21,8 @@ static const char * const prot_err_agent_type_strs[] = {
"CXL Upstream Switch Port",
};
-/*
- * The layout of the enumeration and the values matches CXL Agent Type
- * field in the UEFI 2.10 Section N.2.13,
- */
-enum {
- RCD, /* Restricted CXL Device */
- RCH_DP, /* Restricted CXL Host Downstream Port */
- DEVICE, /* CXL Device */
- LD, /* CXL Logical Device */
- FMLD, /* CXL Fabric Manager managed Logical Device */
- RP, /* CXL Root Port */
- DSP, /* CXL Downstream Switch Port */
- USP, /* CXL Upstream Switch Port */
-};
-
-void cper_print_prot_err(const char *pfx, const struct cper_sec_prot_err *prot_err)
+void cxl_cper_print_prot_err(const char *pfx,
+ const struct cxl_cper_sec_prot_err *prot_err)
{
if (prot_err->valid_bits & PROT_ERR_VALID_AGENT_TYPE)
pr_info("%s agent_type: %d, %s\n", pfx, prot_err->agent_type,
diff --git a/drivers/firmware/efi/cper_cxl.h b/drivers/firmware/efi/cper_cxl.h
deleted file mode 100644
index 86bfcf7909ec..000000000000
--- a/drivers/firmware/efi/cper_cxl.h
+++ /dev/null
@@ -1,66 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0-only */
-/*
- * UEFI Common Platform Error Record (CPER) support for CXL Section.
- *
- * Copyright (C) 2022 Advanced Micro Devices, Inc.
- *
- * Author: Smita Koralahalli <Smita.KoralahalliChannabasappa@amd.com>
- */
-
-#ifndef LINUX_CPER_CXL_H
-#define LINUX_CPER_CXL_H
-
-/* CXL Protocol Error Section */
-#define CPER_SEC_CXL_PROT_ERR \
- GUID_INIT(0x80B9EFB4, 0x52B5, 0x4DE3, 0xA7, 0x77, 0x68, 0x78, \
- 0x4B, 0x77, 0x10, 0x48)
-
-#pragma pack(1)
-
-/* Compute Express Link Protocol Error Section, UEFI v2.10 sec N.2.13 */
-struct cper_sec_prot_err {
- u64 valid_bits;
- u8 agent_type;
- u8 reserved[7];
-
- /*
- * Except for RCH Downstream Port, all the remaining CXL Agent
- * types are uniquely identified by the PCIe compatible SBDF number.
- */
- union {
- u64 rcrb_base_addr;
- struct {
- u8 function;
- u8 device;
- u8 bus;
- u16 segment;
- u8 reserved_1[3];
- };
- } agent_addr;
-
- struct {
- u16 vendor_id;
- u16 device_id;
- u16 subsystem_vendor_id;
- u16 subsystem_id;
- u8 class_code[2];
- u16 slot;
- u8 reserved_1[4];
- } device_id;
-
- struct {
- u32 lower_dw;
- u32 upper_dw;
- } dev_serial_num;
-
- u8 capability[60];
- u16 dvsec_len;
- u16 err_len;
- u8 reserved_2[4];
-};
-
-#pragma pack()
-
-void cper_print_prot_err(const char *pfx, const struct cper_sec_prot_err *prot_err);
-
-#endif //__CPER_CXL_
diff --git a/include/cxl/event.h b/include/cxl/event.h
index 04edd44bd26f..f9ae1796da85 100644
--- a/include/cxl/event.h
+++ b/include/cxl/event.h
@@ -164,10 +164,99 @@ struct cxl_cper_work_data {
struct cxl_cper_event_rec rec;
};
+#define PROT_ERR_VALID_AGENT_TYPE BIT_ULL(0)
+#define PROT_ERR_VALID_AGENT_ADDRESS BIT_ULL(1)
+#define PROT_ERR_VALID_DEVICE_ID BIT_ULL(2)
+#define PROT_ERR_VALID_SERIAL_NUMBER BIT_ULL(3)
+#define PROT_ERR_VALID_CAPABILITY BIT_ULL(4)
+#define PROT_ERR_VALID_DVSEC BIT_ULL(5)
+#define PROT_ERR_VALID_ERROR_LOG BIT_ULL(6)
+
+/*
+ * The layout of the enumeration and the values matches CXL Agent Type
+ * field in the UEFI 2.10 Section N.2.13,
+ */
+enum {
+ RCD, /* Restricted CXL Device */
+ RCH_DP, /* Restricted CXL Host Downstream Port */
+ DEVICE, /* CXL Device */
+ LD, /* CXL Logical Device */
+ FMLD, /* CXL Fabric Manager managed Logical Device */
+ RP, /* CXL Root Port */
+ DSP, /* CXL Downstream Switch Port */
+ USP, /* CXL Upstream Switch Port */
+};
+
+#pragma pack(1)
+
+/* Compute Express Link Protocol Error Section, UEFI v2.10 sec N.2.13 */
+struct cxl_cper_sec_prot_err {
+ u64 valid_bits;
+ u8 agent_type;
+ u8 reserved[7];
+
+ /*
+ * Except for RCH Downstream Port, all the remaining CXL Agent
+ * types are uniquely identified by the PCIe compatible SBDF number.
+ */
+ union {
+ u64 rcrb_base_addr;
+ struct {
+ u8 function;
+ u8 device;
+ u8 bus;
+ u16 segment;
+ u8 reserved_1[3];
+ };
+ } agent_addr;
+
+ struct {
+ u16 vendor_id;
+ u16 device_id;
+ u16 subsystem_vendor_id;
+ u16 subsystem_id;
+ u8 class_code[2];
+ u16 slot;
+ u8 reserved_1[4];
+ } device_id;
+
+ struct {
+ u32 lower_dw;
+ u32 upper_dw;
+ } dev_serial_num;
+
+ u8 capability[60];
+ u16 dvsec_len;
+ u16 err_len;
+ u8 reserved_2[4];
+};
+
+#pragma pack()
+
+/* CXL RAS Capability Structure, CXL v3.0 sec 8.2.4.16 */
+struct cxl_ras_capability_regs {
+ u32 uncor_status;
+ u32 uncor_mask;
+ u32 uncor_severity;
+ u32 cor_status;
+ u32 cor_mask;
+ u32 cap_control;
+ u32 header_log[16];
+};
+
+struct cxl_cper_prot_err_work_data {
+ struct cxl_cper_sec_prot_err prot_err;
+ struct cxl_ras_capability_regs ras_cap;
+ int severity;
+};
+
#ifdef CONFIG_ACPI_APEI_GHES
int cxl_cper_register_work(struct work_struct *work);
int cxl_cper_unregister_work(struct work_struct *work);
int cxl_cper_kfifo_get(struct cxl_cper_work_data *wd);
+int cxl_cper_register_prot_err_work(struct work_struct *work);
+int cxl_cper_unregister_prot_err_work(struct work_struct *work);
+int cxl_cper_prot_err_kfifo_get(struct cxl_cper_prot_err_work_data *wd);
#else
static inline int cxl_cper_register_work(struct work_struct *work)
{
@@ -182,6 +271,18 @@ static inline int cxl_cper_kfifo_get(struct cxl_cper_work_data *wd)
{
return 0;
}
+static inline int cxl_cper_register_prot_err_work(struct work_struct *work)
+{
+ return 0;
+}
+static inline int cxl_cper_unregister_prot_err_work(struct work_struct *work)
+{
+ return 0;
+}
+static inline int cxl_cper_prot_err_kfifo_get(struct cxl_cper_prot_err_work_data *wd)
+{
+ return 0;
+}
#endif
#endif /* _LINUX_CXL_EVENT_H */
diff --git a/include/linux/cper.h b/include/linux/cper.h
index 265b0f8fc0b3..0ed60a91eca9 100644
--- a/include/linux/cper.h
+++ b/include/linux/cper.h
@@ -89,6 +89,10 @@ enum {
#define CPER_NOTIFY_DMAR \
GUID_INIT(0x667DD791, 0xC6B3, 0x4c27, 0x8A, 0x6B, 0x0F, 0x8E, \
0x72, 0x2D, 0xEB, 0x41)
+/* CXL Protocol Error Section */
+#define CPER_SEC_CXL_PROT_ERR \
+ GUID_INIT(0x80B9EFB4, 0x52B5, 0x4DE3, 0xA7, 0x77, 0x68, 0x78, \
+ 0x4B, 0x77, 0x10, 0x48)
/* CXL Event record UUIDs are formatted as GUIDs and reported in section type */
/*
@@ -601,4 +605,8 @@ void cper_estatus_print(const char *pfx,
int cper_estatus_check_header(const struct acpi_hest_generic_status *estatus);
int cper_estatus_check(const struct acpi_hest_generic_status *estatus);
+struct cxl_cper_sec_prot_err;
+void cxl_cper_print_prot_err(const char *pfx,
+ const struct cxl_cper_sec_prot_err *prot_err);
+
#endif
diff --git a/tools/testing/cxl/Kbuild b/tools/testing/cxl/Kbuild
index b1256fee3567..3d71447c0bd8 100644
--- a/tools/testing/cxl/Kbuild
+++ b/tools/testing/cxl/Kbuild
@@ -61,6 +61,7 @@ cxl_core-y += $(CXL_CORE_SRC)/pci.o
cxl_core-y += $(CXL_CORE_SRC)/hdm.o
cxl_core-y += $(CXL_CORE_SRC)/pmu.o
cxl_core-y += $(CXL_CORE_SRC)/cdat.o
+cxl_core-y += $(CXL_CORE_SRC)/ras.o
cxl_core-$(CONFIG_TRACING) += $(CXL_CORE_SRC)/trace.o
cxl_core-$(CONFIG_CXL_REGION) += $(CXL_CORE_SRC)/region.o
cxl_core-y += config_check.o
diff --git a/tools/testing/cxl/test/cxl.c b/tools/testing/cxl/test/cxl.c
index cc8948f49117..083a66a52731 100644
--- a/tools/testing/cxl/test/cxl.c
+++ b/tools/testing/cxl/test/cxl.c
@@ -1000,25 +1000,21 @@ static void mock_cxl_endpoint_parse_cdat(struct cxl_port *port)
find_cxl_root(port);
struct cxl_memdev *cxlmd = to_cxl_memdev(port->uport_dev);
struct cxl_dev_state *cxlds = cxlmd->cxlds;
- struct cxl_memdev_state *mds = to_cxl_memdev_state(cxlds);
struct access_coordinate ep_c[ACCESS_COORDINATE_MAX];
- struct range pmem_range = {
- .start = cxlds->pmem_res.start,
- .end = cxlds->pmem_res.end,
- };
- struct range ram_range = {
- .start = cxlds->ram_res.start,
- .end = cxlds->ram_res.end,
- };
if (!cxl_root)
return;
- if (range_len(&ram_range))
- dpa_perf_setup(port, &ram_range, &mds->ram_perf);
+ for (int i = 0; i < cxlds->nr_partitions; i++) {
+ struct resource *res = &cxlds->part[i].res;
+ struct cxl_dpa_perf *perf = &cxlds->part[i].perf;
+ struct range range = {
+ .start = res->start,
+ .end = res->end,
+ };
- if (range_len(&pmem_range))
- dpa_perf_setup(port, &pmem_range, &mds->pmem_perf);
+ dpa_perf_setup(port, &range, perf);
+ }
cxl_memdev_update_perf(cxlmd);
diff --git a/tools/testing/cxl/test/mem.c b/tools/testing/cxl/test/mem.c
index c99105dabe30..9c4fee78b729 100644
--- a/tools/testing/cxl/test/mem.c
+++ b/tools/testing/cxl/test/mem.c
@@ -1517,6 +1517,7 @@ static int cxl_mock_mem_probe(struct platform_device *pdev)
struct cxl_dev_state *cxlds;
struct cxl_mockmem_data *mdata;
struct cxl_mailbox *cxl_mbox;
+ struct cxl_dpa_info range_info = { 0 };
int rc;
mdata = devm_kzalloc(dev, sizeof(*mdata), GFP_KERNEL);
@@ -1556,7 +1557,7 @@ static int cxl_mock_mem_probe(struct platform_device *pdev)
mds->event.buf = (struct cxl_get_event_payload *) mdata->event_buf;
INIT_DELAYED_WORK(&mds->security.poll_dwork, cxl_mockmem_sanitize_work);
- cxlds->serial = pdev->id;
+ cxlds->serial = pdev->id + 1;
if (is_rcd(pdev))
cxlds->rcd = true;
@@ -1577,7 +1578,11 @@ static int cxl_mock_mem_probe(struct platform_device *pdev)
if (rc)
return rc;
- rc = cxl_mem_create_range_info(mds);
+ rc = cxl_mem_dpa_fetch(mds, &range_info);
+ if (rc)
+ return rc;
+
+ rc = cxl_dpa_setup(cxlds, &range_info);
if (rc)
return rc;