diff options
Diffstat (limited to 'drivers/cxl')
-rw-r--r-- | drivers/cxl/Kconfig | 16 | ||||
-rw-r--r-- | drivers/cxl/core/Makefile | 4 | ||||
-rw-r--r-- | drivers/cxl/core/acpi.c | 11 | ||||
-rw-r--r-- | drivers/cxl/core/cdat.c | 102 | ||||
-rw-r--r-- | drivers/cxl/core/core.h | 27 | ||||
-rw-r--r-- | drivers/cxl/core/features.c | 708 | ||||
-rw-r--r-- | drivers/cxl/core/hdm.c | 384 | ||||
-rw-r--r-- | drivers/cxl/core/mbox.c | 265 | ||||
-rw-r--r-- | drivers/cxl/core/mce.c | 65 | ||||
-rw-r--r-- | drivers/cxl/core/mce.h | 20 | ||||
-rw-r--r-- | drivers/cxl/core/memdev.c | 105 | ||||
-rw-r--r-- | drivers/cxl/core/pci.c | 111 | ||||
-rw-r--r-- | drivers/cxl/core/pmem.c | 15 | ||||
-rw-r--r-- | drivers/cxl/core/port.c | 38 | ||||
-rw-r--r-- | drivers/cxl/core/ras.c | 126 | ||||
-rw-r--r-- | drivers/cxl/core/region.c | 368 | ||||
-rw-r--r-- | drivers/cxl/core/regs.c | 60 | ||||
-rw-r--r-- | drivers/cxl/core/trace.h | 332 | ||||
-rw-r--r-- | drivers/cxl/cxl.h | 59 | ||||
-rw-r--r-- | drivers/cxl/cxlmem.h | 124 | ||||
-rw-r--r-- | drivers/cxl/cxlpci.h | 6 | ||||
-rw-r--r-- | drivers/cxl/mem.c | 2 | ||||
-rw-r--r-- | drivers/cxl/pci.c | 21 | ||||
-rw-r--r-- | drivers/cxl/pmem.c | 81 | ||||
-rw-r--r-- | drivers/cxl/port.c | 12 |
25 files changed, 2413 insertions, 649 deletions
diff --git a/drivers/cxl/Kconfig b/drivers/cxl/Kconfig index 876469e23f7a..cf1ba673b8c2 100644 --- a/drivers/cxl/Kconfig +++ b/drivers/cxl/Kconfig @@ -7,6 +7,7 @@ menuconfig CXL_BUS select PCI_DOE select FIRMWARE_TABLE select NUMA_KEEP_MEMINFO if NUMA_MEMBLKS + select FWCTL if CXL_FEATURES help CXL is a bus that is electrically compatible with PCI Express, but layers three protocols on that signalling (CXL.io, CXL.cache, and @@ -102,6 +103,17 @@ config CXL_MEM If unsure say 'm'. +config CXL_FEATURES + bool "CXL: Features" + depends on CXL_PCI + help + Enable support for CXL Features. A CXL device that includes a mailbox + supports commands that allows listing, getting, and setting of + optionally defined features such as memory sparing or post package + sparing. Vendors may define custom features for the device. + + If unsure say 'n' + config CXL_PORT default CXL_BUS tristate @@ -146,4 +158,8 @@ config CXL_REGION_INVALIDATION_TEST If unsure, or if this kernel is meant for production environments, say N. +config CXL_MCE + def_bool y + depends on X86_MCE && MEMORY_FAILURE + endif diff --git a/drivers/cxl/core/Makefile b/drivers/cxl/core/Makefile index 9259bcc6773c..086df97a0fcf 100644 --- a/drivers/cxl/core/Makefile +++ b/drivers/cxl/core/Makefile @@ -14,5 +14,9 @@ cxl_core-y += pci.o cxl_core-y += hdm.o cxl_core-y += pmu.o cxl_core-y += cdat.o +cxl_core-y += ras.o +cxl_core-y += acpi.o cxl_core-$(CONFIG_TRACING) += trace.o cxl_core-$(CONFIG_CXL_REGION) += region.o +cxl_core-$(CONFIG_CXL_MCE) += mce.o +cxl_core-$(CONFIG_CXL_FEATURES) += features.o diff --git a/drivers/cxl/core/acpi.c b/drivers/cxl/core/acpi.c new file mode 100644 index 000000000000..f13b4dae6ac5 --- /dev/null +++ b/drivers/cxl/core/acpi.c @@ -0,0 +1,11 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* Copyright(c) 2024 Intel Corporation. All rights reserved. */ +#include <linux/acpi.h> +#include "cxl.h" +#include "core.h" + +int cxl_acpi_get_extended_linear_cache_size(struct resource *backing_res, + int nid, resource_size_t *size) +{ + return hmat_get_extended_linear_cache_size(backing_res, nid, size); +} diff --git a/drivers/cxl/core/cdat.c b/drivers/cxl/core/cdat.c index 8153f8d83a16..edb4f41eeacc 100644 --- a/drivers/cxl/core/cdat.c +++ b/drivers/cxl/core/cdat.c @@ -258,27 +258,29 @@ static void update_perf_entry(struct device *dev, struct dsmas_entry *dent, static void cxl_memdev_set_qos_class(struct cxl_dev_state *cxlds, struct xarray *dsmas_xa) { - struct cxl_memdev_state *mds = to_cxl_memdev_state(cxlds); struct device *dev = cxlds->dev; - struct range pmem_range = { - .start = cxlds->pmem_res.start, - .end = cxlds->pmem_res.end, - }; - struct range ram_range = { - .start = cxlds->ram_res.start, - .end = cxlds->ram_res.end, - }; struct dsmas_entry *dent; unsigned long index; xa_for_each(dsmas_xa, index, dent) { - if (resource_size(&cxlds->ram_res) && - range_contains(&ram_range, &dent->dpa_range)) - update_perf_entry(dev, dent, &mds->ram_perf); - else if (resource_size(&cxlds->pmem_res) && - range_contains(&pmem_range, &dent->dpa_range)) - update_perf_entry(dev, dent, &mds->pmem_perf); - else + bool found = false; + + for (int i = 0; i < cxlds->nr_partitions; i++) { + struct resource *res = &cxlds->part[i].res; + struct range range = { + .start = res->start, + .end = res->end, + }; + + if (range_contains(&range, &dent->dpa_range)) { + update_perf_entry(dev, dent, + &cxlds->part[i].perf); + found = true; + break; + } + } + + if (!found) dev_dbg(dev, "no partition for dsmas dpa: %pra\n", &dent->dpa_range); } @@ -343,36 +345,46 @@ static int match_cxlrd_hb(struct device *dev, void *data) return 0; } -static int cxl_qos_class_verify(struct cxl_memdev *cxlmd) +static void cxl_qos_class_verify(struct cxl_memdev *cxlmd) { struct cxl_dev_state *cxlds = cxlmd->cxlds; - struct cxl_memdev_state *mds = to_cxl_memdev_state(cxlds); struct cxl_port *root_port; - int rc; struct cxl_root *cxl_root __free(put_cxl_root) = find_cxl_root(cxlmd->endpoint); + /* + * No need to reset_dpa_perf() here as find_cxl_root() is guaranteed to + * succeed when called in the cxl_endpoint_port_probe() path. + */ if (!cxl_root) - return -ENODEV; + return; root_port = &cxl_root->port; - /* Check that the QTG IDs are all sane between end device and root decoders */ - if (!cxl_qos_match(root_port, &mds->ram_perf)) - reset_dpa_perf(&mds->ram_perf); - if (!cxl_qos_match(root_port, &mds->pmem_perf)) - reset_dpa_perf(&mds->pmem_perf); - - /* Check to make sure that the device's host bridge is under a root decoder */ - rc = device_for_each_child(&root_port->dev, - cxlmd->endpoint->host_bridge, match_cxlrd_hb); - if (!rc) { - reset_dpa_perf(&mds->ram_perf); - reset_dpa_perf(&mds->pmem_perf); + /* + * Save userspace from needing to check if a qos class has any matches + * by hiding qos class info if the memdev is not mapped by a root + * decoder, or the partition class does not match any root decoder + * class. + */ + if (!device_for_each_child(&root_port->dev, + cxlmd->endpoint->host_bridge, + match_cxlrd_hb)) { + for (int i = 0; i < cxlds->nr_partitions; i++) { + struct cxl_dpa_perf *perf = &cxlds->part[i].perf; + + reset_dpa_perf(perf); + } + return; } - return rc; + for (int i = 0; i < cxlds->nr_partitions; i++) { + struct cxl_dpa_perf *perf = &cxlds->part[i].perf; + + if (!cxl_qos_match(root_port, perf)) + reset_dpa_perf(perf); + } } static void discard_dsmas(struct xarray *xa) @@ -570,23 +582,18 @@ static bool dpa_perf_contains(struct cxl_dpa_perf *perf, return range_contains(&perf->dpa_range, &dpa); } -static struct cxl_dpa_perf *cxled_get_dpa_perf(struct cxl_endpoint_decoder *cxled, - enum cxl_decoder_mode mode) +static struct cxl_dpa_perf *cxled_get_dpa_perf(struct cxl_endpoint_decoder *cxled) { struct cxl_memdev *cxlmd = cxled_to_memdev(cxled); - struct cxl_memdev_state *mds = to_cxl_memdev_state(cxlmd->cxlds); + struct cxl_dev_state *cxlds = cxlmd->cxlds; struct cxl_dpa_perf *perf; - switch (mode) { - case CXL_DECODER_RAM: - perf = &mds->ram_perf; - break; - case CXL_DECODER_PMEM: - perf = &mds->pmem_perf; - break; - default: + if (cxled->part < 0) + return ERR_PTR(-EINVAL); + perf = &cxlds->part[cxled->part].perf; + + if (!perf) return ERR_PTR(-EINVAL); - } if (!dpa_perf_contains(perf, cxled->dpa_res)) return ERR_PTR(-EINVAL); @@ -647,11 +654,10 @@ static int cxl_endpoint_gather_bandwidth(struct cxl_region *cxlr, if (cxlds->rcd) return -ENODEV; - perf = cxled_get_dpa_perf(cxled, cxlr->mode); + perf = cxled_get_dpa_perf(cxled); if (IS_ERR(perf)) return PTR_ERR(perf); - gp_port = to_cxl_port(parent_port->dev.parent); *gp_is_root = is_cxl_root(gp_port); /* @@ -1053,7 +1059,7 @@ void cxl_region_perf_data_calculate(struct cxl_region *cxlr, lockdep_assert_held(&cxl_dpa_rwsem); - perf = cxled_get_dpa_perf(cxled, cxlr->mode); + perf = cxled_get_dpa_perf(cxled); if (IS_ERR(perf)) return; diff --git a/drivers/cxl/core/core.h b/drivers/cxl/core/core.h index 800466f96a68..17b692eb3257 100644 --- a/drivers/cxl/core/core.h +++ b/drivers/cxl/core/core.h @@ -4,6 +4,8 @@ #ifndef __CXL_CORE_H__ #define __CXL_CORE_H__ +#include <cxl/mailbox.h> + extern const struct device_type cxl_nvdimm_bridge_type; extern const struct device_type cxl_nvdimm_type; extern const struct device_type cxl_pmu_type; @@ -65,15 +67,15 @@ static inline void cxl_region_exit(void) struct cxl_send_command; struct cxl_mem_query_commands; -int cxl_query_cmd(struct cxl_memdev *cxlmd, +int cxl_query_cmd(struct cxl_mailbox *cxl_mbox, struct cxl_mem_query_commands __user *q); -int cxl_send_cmd(struct cxl_memdev *cxlmd, struct cxl_send_command __user *s); +int cxl_send_cmd(struct cxl_mailbox *cxl_mbox, struct cxl_send_command __user *s); void __iomem *devm_cxl_iomap_block(struct device *dev, resource_size_t addr, resource_size_t length); struct dentry *cxl_debugfs_create_dir(const char *dir); -int cxl_dpa_set_mode(struct cxl_endpoint_decoder *cxled, - enum cxl_decoder_mode mode); +int cxl_dpa_set_part(struct cxl_endpoint_decoder *cxled, + enum cxl_partition_mode mode); int cxl_dpa_alloc(struct cxl_endpoint_decoder *cxled, unsigned long long size); int cxl_dpa_free(struct cxl_endpoint_decoder *cxled); resource_size_t cxl_dpa_size(struct cxl_endpoint_decoder *cxled); @@ -115,4 +117,21 @@ bool cxl_need_node_perf_attrs_update(int nid); int cxl_port_get_switch_dport_bandwidth(struct cxl_port *port, struct access_coordinate *c); +int cxl_ras_init(void); +void cxl_ras_exit(void); +int cxl_gpf_port_setup(struct cxl_dport *dport); +int cxl_acpi_get_extended_linear_cache_size(struct resource *backing_res, + int nid, resource_size_t *size); + +#ifdef CONFIG_CXL_FEATURES +size_t cxl_get_feature(struct cxl_mailbox *cxl_mbox, const uuid_t *feat_uuid, + enum cxl_get_feat_selection selection, + void *feat_out, size_t feat_out_size, u16 offset, + u16 *return_code); +int cxl_set_feature(struct cxl_mailbox *cxl_mbox, const uuid_t *feat_uuid, + u8 feat_version, const void *feat_data, + size_t feat_data_size, u32 feat_flag, u16 offset, + u16 *return_code); +#endif + #endif /* __CXL_CORE_H__ */ diff --git a/drivers/cxl/core/features.c b/drivers/cxl/core/features.c new file mode 100644 index 000000000000..1498e2369c37 --- /dev/null +++ b/drivers/cxl/core/features.c @@ -0,0 +1,708 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* Copyright(c) 2024-2025 Intel Corporation. All rights reserved. */ +#include <linux/fwctl.h> +#include <linux/device.h> +#include <cxl/mailbox.h> +#include <cxl/features.h> +#include <uapi/fwctl/cxl.h> +#include "cxl.h" +#include "core.h" +#include "cxlmem.h" + +/* All the features below are exclusive to the kernel */ +static const uuid_t cxl_exclusive_feats[] = { + CXL_FEAT_PATROL_SCRUB_UUID, + CXL_FEAT_ECS_UUID, + CXL_FEAT_SPPR_UUID, + CXL_FEAT_HPPR_UUID, + CXL_FEAT_CACHELINE_SPARING_UUID, + CXL_FEAT_ROW_SPARING_UUID, + CXL_FEAT_BANK_SPARING_UUID, + CXL_FEAT_RANK_SPARING_UUID, +}; + +static bool is_cxl_feature_exclusive_by_uuid(const uuid_t *uuid) +{ + for (int i = 0; i < ARRAY_SIZE(cxl_exclusive_feats); i++) { + if (uuid_equal(uuid, &cxl_exclusive_feats[i])) + return true; + } + + return false; +} + +static bool is_cxl_feature_exclusive(struct cxl_feat_entry *entry) +{ + return is_cxl_feature_exclusive_by_uuid(&entry->uuid); +} + +inline struct cxl_features_state *to_cxlfs(struct cxl_dev_state *cxlds) +{ + return cxlds->cxlfs; +} +EXPORT_SYMBOL_NS_GPL(to_cxlfs, "CXL"); + +static int cxl_get_supported_features_count(struct cxl_mailbox *cxl_mbox) +{ + struct cxl_mbox_get_sup_feats_out mbox_out; + struct cxl_mbox_get_sup_feats_in mbox_in; + struct cxl_mbox_cmd mbox_cmd; + int rc; + + memset(&mbox_in, 0, sizeof(mbox_in)); + mbox_in.count = cpu_to_le32(sizeof(mbox_out)); + memset(&mbox_out, 0, sizeof(mbox_out)); + mbox_cmd = (struct cxl_mbox_cmd) { + .opcode = CXL_MBOX_OP_GET_SUPPORTED_FEATURES, + .size_in = sizeof(mbox_in), + .payload_in = &mbox_in, + .size_out = sizeof(mbox_out), + .payload_out = &mbox_out, + .min_out = sizeof(mbox_out), + }; + rc = cxl_internal_send_cmd(cxl_mbox, &mbox_cmd); + if (rc < 0) + return rc; + + return le16_to_cpu(mbox_out.supported_feats); +} + +static struct cxl_feat_entries * +get_supported_features(struct cxl_features_state *cxlfs) +{ + int remain_feats, max_size, max_feats, start, rc, hdr_size; + struct cxl_mailbox *cxl_mbox = &cxlfs->cxlds->cxl_mbox; + int feat_size = sizeof(struct cxl_feat_entry); + struct cxl_mbox_get_sup_feats_in mbox_in; + struct cxl_feat_entry *entry; + struct cxl_mbox_cmd mbox_cmd; + int user_feats = 0; + int count; + + count = cxl_get_supported_features_count(cxl_mbox); + if (count <= 0) + return NULL; + + struct cxl_feat_entries *entries __free(kvfree) = + kvmalloc(struct_size(entries, ent, count), GFP_KERNEL); + if (!entries) + return NULL; + + struct cxl_mbox_get_sup_feats_out *mbox_out __free(kvfree) = + kvmalloc(cxl_mbox->payload_size, GFP_KERNEL); + if (!mbox_out) + return NULL; + + hdr_size = struct_size(mbox_out, ents, 0); + max_size = cxl_mbox->payload_size - hdr_size; + /* max feat entries that can fit in mailbox max payload size */ + max_feats = max_size / feat_size; + entry = entries->ent; + + start = 0; + remain_feats = count; + do { + int retrieved, alloc_size, copy_feats; + int num_entries; + + if (remain_feats > max_feats) { + alloc_size = struct_size(mbox_out, ents, max_feats); + remain_feats = remain_feats - max_feats; + copy_feats = max_feats; + } else { + alloc_size = struct_size(mbox_out, ents, remain_feats); + copy_feats = remain_feats; + remain_feats = 0; + } + + memset(&mbox_in, 0, sizeof(mbox_in)); + mbox_in.count = cpu_to_le32(alloc_size); + mbox_in.start_idx = cpu_to_le16(start); + memset(mbox_out, 0, alloc_size); + mbox_cmd = (struct cxl_mbox_cmd) { + .opcode = CXL_MBOX_OP_GET_SUPPORTED_FEATURES, + .size_in = sizeof(mbox_in), + .payload_in = &mbox_in, + .size_out = alloc_size, + .payload_out = mbox_out, + .min_out = hdr_size, + }; + rc = cxl_internal_send_cmd(cxl_mbox, &mbox_cmd); + if (rc < 0) + return NULL; + + if (mbox_cmd.size_out <= hdr_size) + return NULL; + + /* + * Make sure retrieved out buffer is multiple of feature + * entries. + */ + retrieved = mbox_cmd.size_out - hdr_size; + if (retrieved % feat_size) + return NULL; + + num_entries = le16_to_cpu(mbox_out->num_entries); + /* + * If the reported output entries * defined entry size != + * retrieved output bytes, then the output package is incorrect. + */ + if (num_entries * feat_size != retrieved) + return NULL; + + memcpy(entry, mbox_out->ents, retrieved); + for (int i = 0; i < num_entries; i++) { + if (!is_cxl_feature_exclusive(entry + i)) + user_feats++; + } + entry += num_entries; + /* + * If the number of output entries is less than expected, add the + * remaining entries to the next batch. + */ + remain_feats += copy_feats - num_entries; + start += num_entries; + } while (remain_feats); + + entries->num_features = count; + entries->num_user_features = user_feats; + + return no_free_ptr(entries); +} + +static void free_cxlfs(void *_cxlfs) +{ + struct cxl_features_state *cxlfs = _cxlfs; + struct cxl_dev_state *cxlds = cxlfs->cxlds; + + cxlds->cxlfs = NULL; + kvfree(cxlfs->entries); + kfree(cxlfs); +} + +/** + * devm_cxl_setup_features() - Allocate and initialize features context + * @cxlds: CXL device context + * + * Return 0 on success or -errno on failure. + */ +int devm_cxl_setup_features(struct cxl_dev_state *cxlds) +{ + struct cxl_mailbox *cxl_mbox = &cxlds->cxl_mbox; + + if (cxl_mbox->feat_cap < CXL_FEATURES_RO) + return -ENODEV; + + struct cxl_features_state *cxlfs __free(kfree) = + kzalloc(sizeof(*cxlfs), GFP_KERNEL); + if (!cxlfs) + return -ENOMEM; + + cxlfs->cxlds = cxlds; + + cxlfs->entries = get_supported_features(cxlfs); + if (!cxlfs->entries) + return -ENOMEM; + + cxlds->cxlfs = cxlfs; + + return devm_add_action_or_reset(cxlds->dev, free_cxlfs, no_free_ptr(cxlfs)); +} +EXPORT_SYMBOL_NS_GPL(devm_cxl_setup_features, "CXL"); + +size_t cxl_get_feature(struct cxl_mailbox *cxl_mbox, const uuid_t *feat_uuid, + enum cxl_get_feat_selection selection, + void *feat_out, size_t feat_out_size, u16 offset, + u16 *return_code) +{ + size_t data_to_rd_size, size_out; + struct cxl_mbox_get_feat_in pi; + struct cxl_mbox_cmd mbox_cmd; + size_t data_rcvd_size = 0; + int rc; + + if (return_code) + *return_code = CXL_MBOX_CMD_RC_INPUT; + + if (!feat_out || !feat_out_size) + return 0; + + size_out = min(feat_out_size, cxl_mbox->payload_size); + uuid_copy(&pi.uuid, feat_uuid); + pi.selection = selection; + do { + data_to_rd_size = min(feat_out_size - data_rcvd_size, + cxl_mbox->payload_size); + pi.offset = cpu_to_le16(offset + data_rcvd_size); + pi.count = cpu_to_le16(data_to_rd_size); + + mbox_cmd = (struct cxl_mbox_cmd) { + .opcode = CXL_MBOX_OP_GET_FEATURE, + .size_in = sizeof(pi), + .payload_in = &pi, + .size_out = size_out, + .payload_out = feat_out + data_rcvd_size, + .min_out = data_to_rd_size, + }; + rc = cxl_internal_send_cmd(cxl_mbox, &mbox_cmd); + if (rc < 0 || !mbox_cmd.size_out) { + if (return_code) + *return_code = mbox_cmd.return_code; + return 0; + } + data_rcvd_size += mbox_cmd.size_out; + } while (data_rcvd_size < feat_out_size); + + if (return_code) + *return_code = CXL_MBOX_CMD_RC_SUCCESS; + + return data_rcvd_size; +} + +/* + * FEAT_DATA_MIN_PAYLOAD_SIZE - min extra number of bytes should be + * available in the mailbox for storing the actual feature data so that + * the feature data transfer would work as expected. + */ +#define FEAT_DATA_MIN_PAYLOAD_SIZE 10 +int cxl_set_feature(struct cxl_mailbox *cxl_mbox, + const uuid_t *feat_uuid, u8 feat_version, + const void *feat_data, size_t feat_data_size, + u32 feat_flag, u16 offset, u16 *return_code) +{ + size_t data_in_size, data_sent_size = 0; + struct cxl_mbox_cmd mbox_cmd; + size_t hdr_size; + + if (return_code) + *return_code = CXL_MBOX_CMD_RC_INPUT; + + struct cxl_mbox_set_feat_in *pi __free(kfree) = + kzalloc(cxl_mbox->payload_size, GFP_KERNEL); + if (!pi) + return -ENOMEM; + + uuid_copy(&pi->uuid, feat_uuid); + pi->version = feat_version; + feat_flag &= ~CXL_SET_FEAT_FLAG_DATA_TRANSFER_MASK; + feat_flag |= CXL_SET_FEAT_FLAG_DATA_SAVED_ACROSS_RESET; + hdr_size = sizeof(pi->hdr); + /* + * Check minimum mbox payload size is available for + * the feature data transfer. + */ + if (hdr_size + FEAT_DATA_MIN_PAYLOAD_SIZE > cxl_mbox->payload_size) + return -ENOMEM; + + if (hdr_size + feat_data_size <= cxl_mbox->payload_size) { + pi->flags = cpu_to_le32(feat_flag | + CXL_SET_FEAT_FLAG_FULL_DATA_TRANSFER); + data_in_size = feat_data_size; + } else { + pi->flags = cpu_to_le32(feat_flag | + CXL_SET_FEAT_FLAG_INITIATE_DATA_TRANSFER); + data_in_size = cxl_mbox->payload_size - hdr_size; + } + + do { + int rc; + + pi->offset = cpu_to_le16(offset + data_sent_size); + memcpy(pi->feat_data, feat_data + data_sent_size, data_in_size); + mbox_cmd = (struct cxl_mbox_cmd) { + .opcode = CXL_MBOX_OP_SET_FEATURE, + .size_in = hdr_size + data_in_size, + .payload_in = pi, + }; + rc = cxl_internal_send_cmd(cxl_mbox, &mbox_cmd); + if (rc < 0) { + if (return_code) + *return_code = mbox_cmd.return_code; + return rc; + } + + data_sent_size += data_in_size; + if (data_sent_size >= feat_data_size) { + if (return_code) + *return_code = CXL_MBOX_CMD_RC_SUCCESS; + return 0; + } + + if ((feat_data_size - data_sent_size) <= (cxl_mbox->payload_size - hdr_size)) { + data_in_size = feat_data_size - data_sent_size; + pi->flags = cpu_to_le32(feat_flag | + CXL_SET_FEAT_FLAG_FINISH_DATA_TRANSFER); + } else { + pi->flags = cpu_to_le32(feat_flag | + CXL_SET_FEAT_FLAG_CONTINUE_DATA_TRANSFER); + } + } while (true); +} + +/* FWCTL support */ + +static inline struct cxl_memdev *fwctl_to_memdev(struct fwctl_device *fwctl_dev) +{ + return to_cxl_memdev(fwctl_dev->dev.parent); +} + +static int cxlctl_open_uctx(struct fwctl_uctx *uctx) +{ + return 0; +} + +static void cxlctl_close_uctx(struct fwctl_uctx *uctx) +{ +} + +static struct cxl_feat_entry * +get_support_feature_info(struct cxl_features_state *cxlfs, + const struct fwctl_rpc_cxl *rpc_in) +{ + struct cxl_feat_entry *feat; + const uuid_t *uuid; + + if (rpc_in->op_size < sizeof(uuid)) + return ERR_PTR(-EINVAL); + + uuid = &rpc_in->set_feat_in.uuid; + + for (int i = 0; i < cxlfs->entries->num_features; i++) { + feat = &cxlfs->entries->ent[i]; + if (uuid_equal(uuid, &feat->uuid)) + return feat; + } + + return ERR_PTR(-EINVAL); +} + +static void *cxlctl_get_supported_features(struct cxl_features_state *cxlfs, + const struct fwctl_rpc_cxl *rpc_in, + size_t *out_len) +{ + const struct cxl_mbox_get_sup_feats_in *feat_in; + struct cxl_mbox_get_sup_feats_out *feat_out; + struct cxl_feat_entry *pos; + size_t out_size; + int requested; + u32 count; + u16 start; + int i; + + if (rpc_in->op_size != sizeof(*feat_in)) + return ERR_PTR(-EINVAL); + + feat_in = &rpc_in->get_sup_feats_in; + count = le32_to_cpu(feat_in->count); + start = le16_to_cpu(feat_in->start_idx); + requested = count / sizeof(*pos); + + /* + * Make sure that the total requested number of entries is not greater + * than the total number of supported features allowed for userspace. + */ + if (start >= cxlfs->entries->num_features) + return ERR_PTR(-EINVAL); + + requested = min_t(int, requested, cxlfs->entries->num_features - start); + + out_size = sizeof(struct fwctl_rpc_cxl_out) + + struct_size(feat_out, ents, requested); + + struct fwctl_rpc_cxl_out *rpc_out __free(kvfree) = + kvzalloc(out_size, GFP_KERNEL); + if (!rpc_out) + return ERR_PTR(-ENOMEM); + + rpc_out->size = struct_size(feat_out, ents, requested); + feat_out = &rpc_out->get_sup_feats_out; + if (requested == 0) { + feat_out->num_entries = cpu_to_le16(requested); + feat_out->supported_feats = + cpu_to_le16(cxlfs->entries->num_features); + rpc_out->retval = CXL_MBOX_CMD_RC_SUCCESS; + *out_len = out_size; + return no_free_ptr(rpc_out); + } + + for (i = start, pos = &feat_out->ents[0]; + i < cxlfs->entries->num_features; i++, pos++) { + if (i - start == requested) + break; + + memcpy(pos, &cxlfs->entries->ent[i], sizeof(*pos)); + /* + * If the feature is exclusive, set the set_feat_size to 0 to + * indicate that the feature is not changeable. + */ + if (is_cxl_feature_exclusive(pos)) { + u32 flags; + + pos->set_feat_size = 0; + flags = le32_to_cpu(pos->flags); + flags &= ~CXL_FEATURE_F_CHANGEABLE; + pos->flags = cpu_to_le32(flags); + } + } + + feat_out->num_entries = cpu_to_le16(requested); + feat_out->supported_feats = cpu_to_le16(cxlfs->entries->num_features); + rpc_out->retval = CXL_MBOX_CMD_RC_SUCCESS; + *out_len = out_size; + + return no_free_ptr(rpc_out); +} + +static void *cxlctl_get_feature(struct cxl_features_state *cxlfs, + const struct fwctl_rpc_cxl *rpc_in, + size_t *out_len) +{ + struct cxl_mailbox *cxl_mbox = &cxlfs->cxlds->cxl_mbox; + const struct cxl_mbox_get_feat_in *feat_in; + u16 offset, count, return_code; + size_t out_size = *out_len; + + if (rpc_in->op_size != sizeof(*feat_in)) + return ERR_PTR(-EINVAL); + + feat_in = &rpc_in->get_feat_in; + offset = le16_to_cpu(feat_in->offset); + count = le16_to_cpu(feat_in->count); + + if (!count) + return ERR_PTR(-EINVAL); + + struct fwctl_rpc_cxl_out *rpc_out __free(kvfree) = + kvzalloc(out_size, GFP_KERNEL); + if (!rpc_out) + return ERR_PTR(-ENOMEM); + + out_size = cxl_get_feature(cxl_mbox, &feat_in->uuid, + feat_in->selection, rpc_out->payload, + count, offset, &return_code); + *out_len = sizeof(struct fwctl_rpc_cxl_out); + if (!out_size) { + rpc_out->size = 0; + rpc_out->retval = return_code; + return no_free_ptr(rpc_out); + } + + rpc_out->size = out_size; + rpc_out->retval = CXL_MBOX_CMD_RC_SUCCESS; + *out_len += out_size; + + return no_free_ptr(rpc_out); +} + +static void *cxlctl_set_feature(struct cxl_features_state *cxlfs, + const struct fwctl_rpc_cxl *rpc_in, + size_t *out_len) +{ + struct cxl_mailbox *cxl_mbox = &cxlfs->cxlds->cxl_mbox; + const struct cxl_mbox_set_feat_in *feat_in; + size_t out_size, data_size; + u16 offset, return_code; + u32 flags; + int rc; + + if (rpc_in->op_size <= sizeof(feat_in->hdr)) + return ERR_PTR(-EINVAL); + + feat_in = &rpc_in->set_feat_in; + + if (is_cxl_feature_exclusive_by_uuid(&feat_in->uuid)) + return ERR_PTR(-EPERM); + + offset = le16_to_cpu(feat_in->offset); + flags = le32_to_cpu(feat_in->flags); + out_size = *out_len; + + struct fwctl_rpc_cxl_out *rpc_out __free(kvfree) = + kvzalloc(out_size, GFP_KERNEL); + if (!rpc_out) + return ERR_PTR(-ENOMEM); + + rpc_out->size = 0; + + data_size = rpc_in->op_size - sizeof(feat_in->hdr); + rc = cxl_set_feature(cxl_mbox, &feat_in->uuid, + feat_in->version, feat_in->feat_data, + data_size, flags, offset, &return_code); + *out_len = sizeof(*rpc_out); + if (rc) { + rpc_out->retval = return_code; + return no_free_ptr(rpc_out); + } + + rpc_out->retval = CXL_MBOX_CMD_RC_SUCCESS; + + return no_free_ptr(rpc_out); +} + +static bool cxlctl_validate_set_features(struct cxl_features_state *cxlfs, + const struct fwctl_rpc_cxl *rpc_in, + enum fwctl_rpc_scope scope) +{ + u16 effects, imm_mask, reset_mask; + struct cxl_feat_entry *feat; + u32 flags; + + feat = get_support_feature_info(cxlfs, rpc_in); + if (IS_ERR(feat)) + return false; + + /* Ensure that the attribute is changeable */ + flags = le32_to_cpu(feat->flags); + if (!(flags & CXL_FEATURE_F_CHANGEABLE)) + return false; + + effects = le16_to_cpu(feat->effects); + + /* + * Reserved bits are set, rejecting since the effects is not + * comprehended by the driver. + */ + if (effects & CXL_CMD_EFFECTS_RESERVED) { + dev_warn_once(cxlfs->cxlds->dev, + "Reserved bits set in the Feature effects field!\n"); + return false; + } + + /* Currently no user background command support */ + if (effects & CXL_CMD_BACKGROUND) + return false; + + /* Effects cause immediate change, highest security scope is needed */ + imm_mask = CXL_CMD_CONFIG_CHANGE_IMMEDIATE | + CXL_CMD_DATA_CHANGE_IMMEDIATE | + CXL_CMD_POLICY_CHANGE_IMMEDIATE | + CXL_CMD_LOG_CHANGE_IMMEDIATE; + + reset_mask = CXL_CMD_CONFIG_CHANGE_COLD_RESET | + CXL_CMD_CONFIG_CHANGE_CONV_RESET | + CXL_CMD_CONFIG_CHANGE_CXL_RESET; + + /* If no immediate or reset effect set, The hardware has a bug */ + if (!(effects & imm_mask) && !(effects & reset_mask)) + return false; + + /* + * If the Feature setting causes immediate configuration change + * then we need the full write permission policy. + */ + if (effects & imm_mask && scope >= FWCTL_RPC_DEBUG_WRITE_FULL) + return true; + + /* + * If the Feature setting only causes configuration change + * after a reset, then the lesser level of write permission + * policy is ok. + */ + if (!(effects & imm_mask) && scope >= FWCTL_RPC_DEBUG_WRITE) + return true; + + return false; +} + +static bool cxlctl_validate_hw_command(struct cxl_features_state *cxlfs, + const struct fwctl_rpc_cxl *rpc_in, + enum fwctl_rpc_scope scope, + u16 opcode) +{ + struct cxl_mailbox *cxl_mbox = &cxlfs->cxlds->cxl_mbox; + + switch (opcode) { + case CXL_MBOX_OP_GET_SUPPORTED_FEATURES: + case CXL_MBOX_OP_GET_FEATURE: + if (cxl_mbox->feat_cap < CXL_FEATURES_RO) + return false; + if (scope >= FWCTL_RPC_CONFIGURATION) + return true; + return false; + case CXL_MBOX_OP_SET_FEATURE: + if (cxl_mbox->feat_cap < CXL_FEATURES_RW) + return false; + return cxlctl_validate_set_features(cxlfs, rpc_in, scope); + default: + return false; + } +} + +static void *cxlctl_handle_commands(struct cxl_features_state *cxlfs, + const struct fwctl_rpc_cxl *rpc_in, + size_t *out_len, u16 opcode) +{ + switch (opcode) { + case CXL_MBOX_OP_GET_SUPPORTED_FEATURES: + return cxlctl_get_supported_features(cxlfs, rpc_in, out_len); + case CXL_MBOX_OP_GET_FEATURE: + return cxlctl_get_feature(cxlfs, rpc_in, out_len); + case CXL_MBOX_OP_SET_FEATURE: + return cxlctl_set_feature(cxlfs, rpc_in, out_len); + default: + return ERR_PTR(-EOPNOTSUPP); + } +} + +static void *cxlctl_fw_rpc(struct fwctl_uctx *uctx, enum fwctl_rpc_scope scope, + void *in, size_t in_len, size_t *out_len) +{ + struct fwctl_device *fwctl_dev = uctx->fwctl; + struct cxl_memdev *cxlmd = fwctl_to_memdev(fwctl_dev); + struct cxl_features_state *cxlfs = to_cxlfs(cxlmd->cxlds); + const struct fwctl_rpc_cxl *rpc_in = in; + u16 opcode = rpc_in->opcode; + + if (!cxlctl_validate_hw_command(cxlfs, rpc_in, scope, opcode)) + return ERR_PTR(-EINVAL); + + return cxlctl_handle_commands(cxlfs, rpc_in, out_len, opcode); +} + +static const struct fwctl_ops cxlctl_ops = { + .device_type = FWCTL_DEVICE_TYPE_CXL, + .uctx_size = sizeof(struct fwctl_uctx), + .open_uctx = cxlctl_open_uctx, + .close_uctx = cxlctl_close_uctx, + .fw_rpc = cxlctl_fw_rpc, +}; + +DEFINE_FREE(free_fwctl_dev, struct fwctl_device *, if (_T) fwctl_put(_T)) + +static void free_memdev_fwctl(void *_fwctl_dev) +{ + struct fwctl_device *fwctl_dev = _fwctl_dev; + + fwctl_unregister(fwctl_dev); + fwctl_put(fwctl_dev); +} + +int devm_cxl_setup_fwctl(struct device *host, struct cxl_memdev *cxlmd) +{ + struct cxl_dev_state *cxlds = cxlmd->cxlds; + struct cxl_features_state *cxlfs; + int rc; + + cxlfs = to_cxlfs(cxlds); + if (!cxlfs) + return -ENODEV; + + /* No need to setup FWCTL if there are no user allowed features found */ + if (!cxlfs->entries->num_user_features) + return -ENODEV; + + struct fwctl_device *fwctl_dev __free(free_fwctl_dev) = + _fwctl_alloc_device(&cxlmd->dev, &cxlctl_ops, sizeof(*fwctl_dev)); + if (!fwctl_dev) + return -ENOMEM; + + rc = fwctl_register(fwctl_dev); + if (rc) + return rc; + + return devm_add_action_or_reset(host, free_memdev_fwctl, + no_free_ptr(fwctl_dev)); +} +EXPORT_SYMBOL_NS_GPL(devm_cxl_setup_fwctl, "CXL"); + +MODULE_IMPORT_NS("FWCTL"); diff --git a/drivers/cxl/core/hdm.c b/drivers/cxl/core/hdm.c index 28edd5822486..70cae4ebf8a4 100644 --- a/drivers/cxl/core/hdm.c +++ b/drivers/cxl/core/hdm.c @@ -213,16 +213,46 @@ void cxl_dpa_debug(struct seq_file *file, struct cxl_dev_state *cxlds) { struct resource *p1, *p2; - down_read(&cxl_dpa_rwsem); + guard(rwsem_read)(&cxl_dpa_rwsem); for (p1 = cxlds->dpa_res.child; p1; p1 = p1->sibling) { __cxl_dpa_debug(file, p1, 0); for (p2 = p1->child; p2; p2 = p2->sibling) __cxl_dpa_debug(file, p2, 1); } - up_read(&cxl_dpa_rwsem); } EXPORT_SYMBOL_NS_GPL(cxl_dpa_debug, "CXL"); +/* See request_skip() kernel-doc */ +static resource_size_t __adjust_skip(struct cxl_dev_state *cxlds, + const resource_size_t skip_base, + const resource_size_t skip_len, + const char *requester) +{ + const resource_size_t skip_end = skip_base + skip_len - 1; + + for (int i = 0; i < cxlds->nr_partitions; i++) { + const struct resource *part_res = &cxlds->part[i].res; + resource_size_t adjust_start, adjust_end, size; + + adjust_start = max(skip_base, part_res->start); + adjust_end = min(skip_end, part_res->end); + + if (adjust_end < adjust_start) + continue; + + size = adjust_end - adjust_start + 1; + + if (!requester) + __release_region(&cxlds->dpa_res, adjust_start, size); + else if (!__request_region(&cxlds->dpa_res, adjust_start, size, + requester, 0)) + return adjust_start - skip_base; + } + + return skip_len; +} +#define release_skip(c, b, l) __adjust_skip((c), (b), (l), NULL) + /* * Must be called in a context that synchronizes against this decoder's * port ->remove() callback (like an endpoint decoder sysfs attribute) @@ -241,7 +271,7 @@ static void __cxl_dpa_release(struct cxl_endpoint_decoder *cxled) skip_start = res->start - cxled->skip; __release_region(&cxlds->dpa_res, res->start, resource_size(res)); if (cxled->skip) - __release_region(&cxlds->dpa_res, skip_start, cxled->skip); + release_skip(cxlds, skip_start, cxled->skip); cxled->skip = 0; cxled->dpa_res = NULL; put_device(&cxled->cxld.dev); @@ -250,9 +280,8 @@ static void __cxl_dpa_release(struct cxl_endpoint_decoder *cxled) static void cxl_dpa_release(void *cxled) { - down_write(&cxl_dpa_rwsem); + guard(rwsem_write)(&cxl_dpa_rwsem); __cxl_dpa_release(cxled); - up_write(&cxl_dpa_rwsem); } /* @@ -268,6 +297,58 @@ static void devm_cxl_dpa_release(struct cxl_endpoint_decoder *cxled) __cxl_dpa_release(cxled); } +/** + * request_skip() - Track DPA 'skip' in @cxlds->dpa_res resource tree + * @cxlds: CXL.mem device context that parents @cxled + * @cxled: Endpoint decoder establishing new allocation that skips lower DPA + * @skip_base: DPA < start of new DPA allocation (DPAnew) + * @skip_len: @skip_base + @skip_len == DPAnew + * + * DPA 'skip' arises from out-of-sequence DPA allocation events relative + * to free capacity across multiple partitions. It is a wasteful event + * as usable DPA gets thrown away, but if a deployment has, for example, + * a dual RAM+PMEM device, wants to use PMEM, and has unallocated RAM + * DPA, the free RAM DPA must be sacrificed to start allocating PMEM. + * See third "Implementation Note" in CXL 3.1 8.2.4.19.13 "Decoder + * Protection" for more details. + * + * A 'skip' always covers the last allocated DPA in a previous partition + * to the start of the current partition to allocate. Allocations never + * start in the middle of a partition, and allocations are always + * de-allocated in reverse order (see cxl_dpa_free(), or natural devm + * unwind order from forced in-order allocation). + * + * If @cxlds->nr_partitions was guaranteed to be <= 2 then the 'skip' + * would always be contained to a single partition. Given + * @cxlds->nr_partitions may be > 2 it results in cases where the 'skip' + * might span "tail capacity of partition[0], all of partition[1], ..., + * all of partition[N-1]" to support allocating from partition[N]. That + * in turn interacts with the partition 'struct resource' boundaries + * within @cxlds->dpa_res whereby 'skip' requests need to be divided by + * partition. I.e. this is a quirk of using a 'struct resource' tree to + * detect range conflicts while also tracking partition boundaries in + * @cxlds->dpa_res. + */ +static int request_skip(struct cxl_dev_state *cxlds, + struct cxl_endpoint_decoder *cxled, + const resource_size_t skip_base, + const resource_size_t skip_len) +{ + resource_size_t skipped = __adjust_skip(cxlds, skip_base, skip_len, + dev_name(&cxled->cxld.dev)); + + if (skipped == skip_len) + return 0; + + dev_dbg(cxlds->dev, + "%s: failed to reserve skipped space (%pa %pa %pa)\n", + dev_name(&cxled->cxld.dev), &skip_base, &skip_len, &skipped); + + release_skip(cxlds, skip_base, skipped); + + return -EBUSY; +} + static int __cxl_dpa_reserve(struct cxl_endpoint_decoder *cxled, resource_size_t base, resource_size_t len, resource_size_t skipped) @@ -277,6 +358,7 @@ static int __cxl_dpa_reserve(struct cxl_endpoint_decoder *cxled, struct cxl_dev_state *cxlds = cxlmd->cxlds; struct device *dev = &port->dev; struct resource *res; + int rc; lockdep_assert_held_write(&cxl_dpa_rwsem); @@ -305,14 +387,9 @@ static int __cxl_dpa_reserve(struct cxl_endpoint_decoder *cxled, } if (skipped) { - res = __request_region(&cxlds->dpa_res, base - skipped, skipped, - dev_name(&cxled->cxld.dev), 0); - if (!res) { - dev_dbg(dev, - "decoder%d.%d: failed to reserve skipped space\n", - port->id, cxled->cxld.id); - return -EBUSY; - } + rc = request_skip(cxlds, cxled, base - skipped, skipped); + if (rc) + return rc; } res = __request_region(&cxlds->dpa_res, base, len, dev_name(&cxled->cxld.dev), 0); @@ -320,28 +397,117 @@ static int __cxl_dpa_reserve(struct cxl_endpoint_decoder *cxled, dev_dbg(dev, "decoder%d.%d: failed to reserve allocation\n", port->id, cxled->cxld.id); if (skipped) - __release_region(&cxlds->dpa_res, base - skipped, - skipped); + release_skip(cxlds, base - skipped, skipped); return -EBUSY; } cxled->dpa_res = res; cxled->skip = skipped; - if (resource_contains(&cxlds->pmem_res, res)) - cxled->mode = CXL_DECODER_PMEM; - else if (resource_contains(&cxlds->ram_res, res)) - cxled->mode = CXL_DECODER_RAM; - else { - dev_warn(dev, "decoder%d.%d: %pr mixed mode not supported\n", - port->id, cxled->cxld.id, cxled->dpa_res); - cxled->mode = CXL_DECODER_MIXED; - } + /* + * When allocating new capacity, ->part is already set, when + * discovering decoder settings at initial enumeration, ->part + * is not set. + */ + if (cxled->part < 0) + for (int i = 0; cxlds->nr_partitions; i++) + if (resource_contains(&cxlds->part[i].res, res)) { + cxled->part = i; + break; + } + + if (cxled->part < 0) + dev_warn(dev, "decoder%d.%d: %pr does not map any partition\n", + port->id, cxled->cxld.id, res); port->hdm_end++; get_device(&cxled->cxld.dev); return 0; } +static int add_dpa_res(struct device *dev, struct resource *parent, + struct resource *res, resource_size_t start, + resource_size_t size, const char *type) +{ + int rc; + + *res = (struct resource) { + .name = type, + .start = start, + .end = start + size - 1, + .flags = IORESOURCE_MEM, + }; + if (resource_size(res) == 0) { + dev_dbg(dev, "DPA(%s): no capacity\n", res->name); + return 0; + } + rc = request_resource(parent, res); + if (rc) { + dev_err(dev, "DPA(%s): failed to track %pr (%d)\n", res->name, + res, rc); + return rc; + } + + dev_dbg(dev, "DPA(%s): %pr\n", res->name, res); + + return 0; +} + +static const char *cxl_mode_name(enum cxl_partition_mode mode) +{ + switch (mode) { + case CXL_PARTMODE_RAM: + return "ram"; + case CXL_PARTMODE_PMEM: + return "pmem"; + default: + return ""; + }; +} + +/* if this fails the caller must destroy @cxlds, there is no recovery */ +int cxl_dpa_setup(struct cxl_dev_state *cxlds, const struct cxl_dpa_info *info) +{ + struct device *dev = cxlds->dev; + + guard(rwsem_write)(&cxl_dpa_rwsem); + + if (cxlds->nr_partitions) + return -EBUSY; + + if (!info->size || !info->nr_partitions) { + cxlds->dpa_res = DEFINE_RES_MEM(0, 0); + cxlds->nr_partitions = 0; + return 0; + } + + cxlds->dpa_res = DEFINE_RES_MEM(0, info->size); + + for (int i = 0; i < info->nr_partitions; i++) { + const struct cxl_dpa_part_info *part = &info->part[i]; + int rc; + + cxlds->part[i].perf.qos_class = CXL_QOS_CLASS_INVALID; + cxlds->part[i].mode = part->mode; + + /* Require ordered + contiguous partitions */ + if (i) { + const struct cxl_dpa_part_info *prev = &info->part[i - 1]; + + if (prev->range.end + 1 != part->range.start) + return -EINVAL; + } + rc = add_dpa_res(dev, &cxlds->dpa_res, &cxlds->part[i].res, + part->range.start, range_len(&part->range), + cxl_mode_name(part->mode)); + if (rc) + return rc; + cxlds->nr_partitions++; + } + + return 0; +} +EXPORT_SYMBOL_GPL(cxl_dpa_setup); + int devm_cxl_dpa_reserve(struct cxl_endpoint_decoder *cxled, resource_size_t base, resource_size_t len, resource_size_t skipped) @@ -362,14 +528,11 @@ EXPORT_SYMBOL_NS_GPL(devm_cxl_dpa_reserve, "CXL"); resource_size_t cxl_dpa_size(struct cxl_endpoint_decoder *cxled) { - resource_size_t size = 0; - - down_read(&cxl_dpa_rwsem); + guard(rwsem_read)(&cxl_dpa_rwsem); if (cxled->dpa_res) - size = resource_size(cxled->dpa_res); - up_read(&cxl_dpa_rwsem); + return resource_size(cxled->dpa_res); - return size; + return 0; } resource_size_t cxl_dpa_resource_start(struct cxl_endpoint_decoder *cxled) @@ -387,151 +550,136 @@ int cxl_dpa_free(struct cxl_endpoint_decoder *cxled) { struct cxl_port *port = cxled_to_port(cxled); struct device *dev = &cxled->cxld.dev; - int rc; - down_write(&cxl_dpa_rwsem); - if (!cxled->dpa_res) { - rc = 0; - goto out; - } + guard(rwsem_write)(&cxl_dpa_rwsem); + if (!cxled->dpa_res) + return 0; if (cxled->cxld.region) { dev_dbg(dev, "decoder assigned to: %s\n", dev_name(&cxled->cxld.region->dev)); - rc = -EBUSY; - goto out; + return -EBUSY; } if (cxled->cxld.flags & CXL_DECODER_F_ENABLE) { dev_dbg(dev, "decoder enabled\n"); - rc = -EBUSY; - goto out; + return -EBUSY; } if (cxled->cxld.id != port->hdm_end) { dev_dbg(dev, "expected decoder%d.%d\n", port->id, port->hdm_end); - rc = -EBUSY; - goto out; + return -EBUSY; } + devm_cxl_dpa_release(cxled); - rc = 0; -out: - up_write(&cxl_dpa_rwsem); - return rc; + return 0; } -int cxl_dpa_set_mode(struct cxl_endpoint_decoder *cxled, - enum cxl_decoder_mode mode) +int cxl_dpa_set_part(struct cxl_endpoint_decoder *cxled, + enum cxl_partition_mode mode) { struct cxl_memdev *cxlmd = cxled_to_memdev(cxled); struct cxl_dev_state *cxlds = cxlmd->cxlds; struct device *dev = &cxled->cxld.dev; - - switch (mode) { - case CXL_DECODER_RAM: - case CXL_DECODER_PMEM: - break; - default: - dev_dbg(dev, "unsupported mode: %d\n", mode); - return -EINVAL; - } + int part; guard(rwsem_write)(&cxl_dpa_rwsem); if (cxled->cxld.flags & CXL_DECODER_F_ENABLE) return -EBUSY; - /* - * Only allow modes that are supported by the current partition - * configuration - */ - if (mode == CXL_DECODER_PMEM && !resource_size(&cxlds->pmem_res)) { - dev_dbg(dev, "no available pmem capacity\n"); - return -ENXIO; + for (part = 0; part < cxlds->nr_partitions; part++) + if (cxlds->part[part].mode == mode) + break; + + if (part >= cxlds->nr_partitions) { + dev_dbg(dev, "unsupported mode: %d\n", mode); + return -EINVAL; } - if (mode == CXL_DECODER_RAM && !resource_size(&cxlds->ram_res)) { - dev_dbg(dev, "no available ram capacity\n"); + + if (!resource_size(&cxlds->part[part].res)) { + dev_dbg(dev, "no available capacity for mode: %d\n", mode); return -ENXIO; } - cxled->mode = mode; + cxled->part = part; return 0; } -int cxl_dpa_alloc(struct cxl_endpoint_decoder *cxled, unsigned long long size) +static int __cxl_dpa_alloc(struct cxl_endpoint_decoder *cxled, unsigned long long size) { struct cxl_memdev *cxlmd = cxled_to_memdev(cxled); - resource_size_t free_ram_start, free_pmem_start; - struct cxl_port *port = cxled_to_port(cxled); struct cxl_dev_state *cxlds = cxlmd->cxlds; struct device *dev = &cxled->cxld.dev; - resource_size_t start, avail, skip; + struct resource *res, *prev = NULL; + resource_size_t start, avail, skip, skip_start; struct resource *p, *last; - int rc; + int part; - down_write(&cxl_dpa_rwsem); + guard(rwsem_write)(&cxl_dpa_rwsem); if (cxled->cxld.region) { dev_dbg(dev, "decoder attached to %s\n", dev_name(&cxled->cxld.region->dev)); - rc = -EBUSY; - goto out; + return -EBUSY; } if (cxled->cxld.flags & CXL_DECODER_F_ENABLE) { dev_dbg(dev, "decoder enabled\n"); - rc = -EBUSY; - goto out; + return -EBUSY; } - for (p = cxlds->ram_res.child, last = NULL; p; p = p->sibling) - last = p; - if (last) - free_ram_start = last->end + 1; - else - free_ram_start = cxlds->ram_res.start; + part = cxled->part; + if (part < 0) { + dev_dbg(dev, "partition not set\n"); + return -EBUSY; + } - for (p = cxlds->pmem_res.child, last = NULL; p; p = p->sibling) + res = &cxlds->part[part].res; + for (p = res->child, last = NULL; p; p = p->sibling) last = p; if (last) - free_pmem_start = last->end + 1; + start = last->end + 1; else - free_pmem_start = cxlds->pmem_res.start; - - if (cxled->mode == CXL_DECODER_RAM) { - start = free_ram_start; - avail = cxlds->ram_res.end - start + 1; - skip = 0; - } else if (cxled->mode == CXL_DECODER_PMEM) { - resource_size_t skip_start, skip_end; - - start = free_pmem_start; - avail = cxlds->pmem_res.end - start + 1; - skip_start = free_ram_start; + start = res->start; - /* - * If some pmem is already allocated, then that allocation - * already handled the skip. - */ - if (cxlds->pmem_res.child && - skip_start == cxlds->pmem_res.child->start) - skip_end = skip_start - 1; - else - skip_end = start - 1; - skip = skip_end - skip_start + 1; - } else { - dev_dbg(dev, "mode not set\n"); - rc = -EINVAL; - goto out; + /* + * To allocate at partition N, a skip needs to be calculated for all + * unallocated space at lower partitions indices. + * + * If a partition has any allocations, the search can end because a + * previous cxl_dpa_alloc() invocation is assumed to have accounted for + * all previous partitions. + */ + skip_start = CXL_RESOURCE_NONE; + for (int i = part; i; i--) { + prev = &cxlds->part[i - 1].res; + for (p = prev->child, last = NULL; p; p = p->sibling) + last = p; + if (last) { + skip_start = last->end + 1; + break; + } + skip_start = prev->start; } + avail = res->end - start + 1; + if (skip_start == CXL_RESOURCE_NONE) + skip = 0; + else + skip = res->start - skip_start; + if (size > avail) { dev_dbg(dev, "%pa exceeds available %s capacity: %pa\n", &size, - cxl_decoder_mode_name(cxled->mode), &avail); - rc = -ENOSPC; - goto out; + res->name, &avail); + return -ENOSPC; } - rc = __cxl_dpa_reserve(cxled, start, size, skip); -out: - up_write(&cxl_dpa_rwsem); + return __cxl_dpa_reserve(cxled, start, size, skip); +} + +int cxl_dpa_alloc(struct cxl_endpoint_decoder *cxled, unsigned long long size) +{ + struct cxl_port *port = cxled_to_port(cxled); + int rc; + rc = __cxl_dpa_alloc(cxled, size); if (rc) return rc; @@ -703,7 +851,7 @@ static int cxl_decoder_commit(struct cxl_decoder *cxld) return 0; } -static int commit_reap(struct device *dev, const void *data) +static int commit_reap(struct device *dev, void *data) { struct cxl_port *port = to_cxl_port(dev->parent); struct cxl_decoder *cxld; diff --git a/drivers/cxl/core/mbox.c b/drivers/cxl/core/mbox.c index 548564c770c0..d72764056ce6 100644 --- a/drivers/cxl/core/mbox.c +++ b/drivers/cxl/core/mbox.c @@ -11,6 +11,7 @@ #include "core.h" #include "trace.h" +#include "mce.h" static bool cxl_raw_allow_all; @@ -349,40 +350,39 @@ static bool cxl_payload_from_user_allowed(u16 opcode, void *payload_in) return true; } -static int cxl_mbox_cmd_ctor(struct cxl_mbox_cmd *mbox, - struct cxl_memdev_state *mds, u16 opcode, +static int cxl_mbox_cmd_ctor(struct cxl_mbox_cmd *mbox_cmd, + struct cxl_mailbox *cxl_mbox, u16 opcode, size_t in_size, size_t out_size, u64 in_payload) { - struct cxl_mailbox *cxl_mbox = &mds->cxlds.cxl_mbox; - *mbox = (struct cxl_mbox_cmd) { + *mbox_cmd = (struct cxl_mbox_cmd) { .opcode = opcode, .size_in = in_size, }; if (in_size) { - mbox->payload_in = vmemdup_user(u64_to_user_ptr(in_payload), - in_size); - if (IS_ERR(mbox->payload_in)) - return PTR_ERR(mbox->payload_in); + mbox_cmd->payload_in = vmemdup_user(u64_to_user_ptr(in_payload), + in_size); + if (IS_ERR(mbox_cmd->payload_in)) + return PTR_ERR(mbox_cmd->payload_in); - if (!cxl_payload_from_user_allowed(opcode, mbox->payload_in)) { - dev_dbg(mds->cxlds.dev, "%s: input payload not allowed\n", + if (!cxl_payload_from_user_allowed(opcode, mbox_cmd->payload_in)) { + dev_dbg(cxl_mbox->host, "%s: input payload not allowed\n", cxl_mem_opcode_to_name(opcode)); - kvfree(mbox->payload_in); + kvfree(mbox_cmd->payload_in); return -EBUSY; } } /* Prepare to handle a full payload for variable sized output */ if (out_size == CXL_VARIABLE_PAYLOAD) - mbox->size_out = cxl_mbox->payload_size; + mbox_cmd->size_out = cxl_mbox->payload_size; else - mbox->size_out = out_size; + mbox_cmd->size_out = out_size; - if (mbox->size_out) { - mbox->payload_out = kvzalloc(mbox->size_out, GFP_KERNEL); - if (!mbox->payload_out) { - kvfree(mbox->payload_in); + if (mbox_cmd->size_out) { + mbox_cmd->payload_out = kvzalloc(mbox_cmd->size_out, GFP_KERNEL); + if (!mbox_cmd->payload_out) { + kvfree(mbox_cmd->payload_in); return -ENOMEM; } } @@ -397,10 +397,8 @@ static void cxl_mbox_cmd_dtor(struct cxl_mbox_cmd *mbox) static int cxl_to_mem_cmd_raw(struct cxl_mem_command *mem_cmd, const struct cxl_send_command *send_cmd, - struct cxl_memdev_state *mds) + struct cxl_mailbox *cxl_mbox) { - struct cxl_mailbox *cxl_mbox = &mds->cxlds.cxl_mbox; - if (send_cmd->raw.rsvd) return -EINVAL; @@ -415,7 +413,7 @@ static int cxl_to_mem_cmd_raw(struct cxl_mem_command *mem_cmd, if (!cxl_mem_raw_command_allowed(send_cmd->raw.opcode)) return -EPERM; - dev_WARN_ONCE(mds->cxlds.dev, true, "raw command path used\n"); + dev_WARN_ONCE(cxl_mbox->host, true, "raw command path used\n"); *mem_cmd = (struct cxl_mem_command) { .info = { @@ -431,7 +429,7 @@ static int cxl_to_mem_cmd_raw(struct cxl_mem_command *mem_cmd, static int cxl_to_mem_cmd(struct cxl_mem_command *mem_cmd, const struct cxl_send_command *send_cmd, - struct cxl_memdev_state *mds) + struct cxl_mailbox *cxl_mbox) { struct cxl_mem_command *c = &cxl_mem_commands[send_cmd->id]; const struct cxl_command_info *info = &c->info; @@ -446,11 +444,11 @@ static int cxl_to_mem_cmd(struct cxl_mem_command *mem_cmd, return -EINVAL; /* Check that the command is enabled for hardware */ - if (!test_bit(info->id, mds->enabled_cmds)) + if (!test_bit(info->id, cxl_mbox->enabled_cmds)) return -ENOTTY; /* Check that the command is not claimed for exclusive kernel use */ - if (test_bit(info->id, mds->exclusive_cmds)) + if (test_bit(info->id, cxl_mbox->exclusive_cmds)) return -EBUSY; /* Check the input buffer is the expected size */ @@ -479,7 +477,7 @@ static int cxl_to_mem_cmd(struct cxl_mem_command *mem_cmd, /** * cxl_validate_cmd_from_user() - Check fields for CXL_MEM_SEND_COMMAND. * @mbox_cmd: Sanitized and populated &struct cxl_mbox_cmd. - * @mds: The driver data for the operation + * @cxl_mbox: CXL mailbox context * @send_cmd: &struct cxl_send_command copied in from userspace. * * Return: @@ -494,10 +492,9 @@ static int cxl_to_mem_cmd(struct cxl_mem_command *mem_cmd, * safe to send to the hardware. */ static int cxl_validate_cmd_from_user(struct cxl_mbox_cmd *mbox_cmd, - struct cxl_memdev_state *mds, + struct cxl_mailbox *cxl_mbox, const struct cxl_send_command *send_cmd) { - struct cxl_mailbox *cxl_mbox = &mds->cxlds.cxl_mbox; struct cxl_mem_command mem_cmd; int rc; @@ -514,24 +511,23 @@ static int cxl_validate_cmd_from_user(struct cxl_mbox_cmd *mbox_cmd, /* Sanitize and construct a cxl_mem_command */ if (send_cmd->id == CXL_MEM_COMMAND_ID_RAW) - rc = cxl_to_mem_cmd_raw(&mem_cmd, send_cmd, mds); + rc = cxl_to_mem_cmd_raw(&mem_cmd, send_cmd, cxl_mbox); else - rc = cxl_to_mem_cmd(&mem_cmd, send_cmd, mds); + rc = cxl_to_mem_cmd(&mem_cmd, send_cmd, cxl_mbox); if (rc) return rc; /* Sanitize and construct a cxl_mbox_cmd */ - return cxl_mbox_cmd_ctor(mbox_cmd, mds, mem_cmd.opcode, + return cxl_mbox_cmd_ctor(mbox_cmd, cxl_mbox, mem_cmd.opcode, mem_cmd.info.size_in, mem_cmd.info.size_out, send_cmd->in.payload); } -int cxl_query_cmd(struct cxl_memdev *cxlmd, +int cxl_query_cmd(struct cxl_mailbox *cxl_mbox, struct cxl_mem_query_commands __user *q) { - struct cxl_memdev_state *mds = to_cxl_memdev_state(cxlmd->cxlds); - struct device *dev = &cxlmd->dev; + struct device *dev = cxl_mbox->host; struct cxl_mem_command *cmd; u32 n_commands; int j = 0; @@ -552,9 +548,9 @@ int cxl_query_cmd(struct cxl_memdev *cxlmd, cxl_for_each_cmd(cmd) { struct cxl_command_info info = cmd->info; - if (test_bit(info.id, mds->enabled_cmds)) + if (test_bit(info.id, cxl_mbox->enabled_cmds)) info.flags |= CXL_MEM_COMMAND_FLAG_ENABLED; - if (test_bit(info.id, mds->exclusive_cmds)) + if (test_bit(info.id, cxl_mbox->exclusive_cmds)) info.flags |= CXL_MEM_COMMAND_FLAG_EXCLUSIVE; if (copy_to_user(&q->commands[j++], &info, sizeof(info))) @@ -569,7 +565,7 @@ int cxl_query_cmd(struct cxl_memdev *cxlmd, /** * handle_mailbox_cmd_from_user() - Dispatch a mailbox command for userspace. - * @mds: The driver data for the operation + * @cxl_mbox: The mailbox context for the operation. * @mbox_cmd: The validated mailbox command. * @out_payload: Pointer to userspace's output payload. * @size_out: (Input) Max payload size to copy out. @@ -590,13 +586,12 @@ int cxl_query_cmd(struct cxl_memdev *cxlmd, * * See cxl_send_cmd(). */ -static int handle_mailbox_cmd_from_user(struct cxl_memdev_state *mds, +static int handle_mailbox_cmd_from_user(struct cxl_mailbox *cxl_mbox, struct cxl_mbox_cmd *mbox_cmd, u64 out_payload, s32 *size_out, u32 *retval) { - struct cxl_mailbox *cxl_mbox = &mds->cxlds.cxl_mbox; - struct device *dev = mds->cxlds.dev; + struct device *dev = cxl_mbox->host; int rc; dev_dbg(dev, @@ -633,10 +628,9 @@ out: return rc; } -int cxl_send_cmd(struct cxl_memdev *cxlmd, struct cxl_send_command __user *s) +int cxl_send_cmd(struct cxl_mailbox *cxl_mbox, struct cxl_send_command __user *s) { - struct cxl_memdev_state *mds = to_cxl_memdev_state(cxlmd->cxlds); - struct device *dev = &cxlmd->dev; + struct device *dev = cxl_mbox->host; struct cxl_send_command send; struct cxl_mbox_cmd mbox_cmd; int rc; @@ -646,11 +640,11 @@ int cxl_send_cmd(struct cxl_memdev *cxlmd, struct cxl_send_command __user *s) if (copy_from_user(&send, s, sizeof(send))) return -EFAULT; - rc = cxl_validate_cmd_from_user(&mbox_cmd, mds, &send); + rc = cxl_validate_cmd_from_user(&mbox_cmd, cxl_mbox, &send); if (rc) return rc; - rc = handle_mailbox_cmd_from_user(mds, &mbox_cmd, send.out.payload, + rc = handle_mailbox_cmd_from_user(cxl_mbox, &mbox_cmd, send.out.payload, &send.out.size, &send.retval); if (rc) return rc; @@ -713,6 +707,35 @@ static int cxl_xfer_log(struct cxl_memdev_state *mds, uuid_t *uuid, return 0; } +static int check_features_opcodes(u16 opcode, int *ro_cmds, int *wr_cmds) +{ + switch (opcode) { + case CXL_MBOX_OP_GET_SUPPORTED_FEATURES: + case CXL_MBOX_OP_GET_FEATURE: + (*ro_cmds)++; + return 1; + case CXL_MBOX_OP_SET_FEATURE: + (*wr_cmds)++; + return 1; + default: + return 0; + } +} + +/* 'Get Supported Features' and 'Get Feature' */ +#define MAX_FEATURES_READ_CMDS 2 +static void set_features_cap(struct cxl_mailbox *cxl_mbox, + int ro_cmds, int wr_cmds) +{ + /* Setting up Features capability while walking the CEL */ + if (ro_cmds == MAX_FEATURES_READ_CMDS) { + if (wr_cmds) + cxl_mbox->feat_cap = CXL_FEATURES_RW; + else + cxl_mbox->feat_cap = CXL_FEATURES_RO; + } +} + /** * cxl_walk_cel() - Walk through the Command Effects Log. * @mds: The driver data for the operation @@ -724,10 +747,11 @@ static int cxl_xfer_log(struct cxl_memdev_state *mds, uuid_t *uuid, */ static void cxl_walk_cel(struct cxl_memdev_state *mds, size_t size, u8 *cel) { + struct cxl_mailbox *cxl_mbox = &mds->cxlds.cxl_mbox; struct cxl_cel_entry *cel_entry; const int cel_entries = size / sizeof(*cel_entry); struct device *dev = mds->cxlds.dev; - int i; + int i, ro_cmds = 0, wr_cmds = 0; cel_entry = (struct cxl_cel_entry *) cel; @@ -737,10 +761,13 @@ static void cxl_walk_cel(struct cxl_memdev_state *mds, size_t size, u8 *cel) int enabled = 0; if (cmd) { - set_bit(cmd->info.id, mds->enabled_cmds); + set_bit(cmd->info.id, cxl_mbox->enabled_cmds); enabled++; } + enabled += check_features_opcodes(opcode, &ro_cmds, + &wr_cmds); + if (cxl_is_poison_command(opcode)) { cxl_set_poison_cmd_enabled(&mds->poison, opcode); enabled++; @@ -754,6 +781,8 @@ static void cxl_walk_cel(struct cxl_memdev_state *mds, size_t size, u8 *cel) dev_dbg(dev, "Opcode 0x%04x %s\n", opcode, enabled ? "enabled" : "unsupported by driver"); } + + set_features_cap(cxl_mbox, ro_cmds, wr_cmds); } static struct cxl_mbox_get_supported_logs *cxl_get_gsl(struct cxl_memdev_state *mds) @@ -807,6 +836,7 @@ static const uuid_t log_uuid[] = { */ int cxl_enumerate_cmds(struct cxl_memdev_state *mds) { + struct cxl_mailbox *cxl_mbox = &mds->cxlds.cxl_mbox; struct cxl_mbox_get_supported_logs *gsl; struct device *dev = mds->cxlds.dev; struct cxl_mem_command *cmd; @@ -845,7 +875,7 @@ int cxl_enumerate_cmds(struct cxl_memdev_state *mds) /* In case CEL was bogus, enable some default commands. */ cxl_for_each_cmd(cmd) if (cmd->flags & CXL_CMD_FLAG_FORCE_ENABLE) - set_bit(cmd->info.id, mds->enabled_cmds); + set_bit(cmd->info.id, cxl_mbox->enabled_cmds); /* Found the required CEL */ rc = 0; @@ -871,7 +901,7 @@ void cxl_event_trace_record(const struct cxl_memdev *cxlmd, } if (trace_cxl_general_media_enabled() || trace_cxl_dram_enabled()) { - u64 dpa, hpa = ULLONG_MAX; + u64 dpa, hpa = ULLONG_MAX, hpa_alias = ULLONG_MAX; struct cxl_region *cxlr; /* @@ -884,14 +914,20 @@ void cxl_event_trace_record(const struct cxl_memdev *cxlmd, dpa = le64_to_cpu(evt->media_hdr.phys_addr) & CXL_DPA_MASK; cxlr = cxl_dpa_to_region(cxlmd, dpa); - if (cxlr) + if (cxlr) { + u64 cache_size = cxlr->params.cache_size; + hpa = cxl_dpa_to_hpa(cxlr, cxlmd, dpa); + if (cache_size) + hpa_alias = hpa - cache_size; + } if (event_type == CXL_CPER_EVENT_GEN_MEDIA) trace_cxl_general_media(cxlmd, type, cxlr, hpa, - &evt->gen_media); + hpa_alias, &evt->gen_media); else if (event_type == CXL_CPER_EVENT_DRAM) - trace_cxl_dram(cxlmd, type, cxlr, hpa, &evt->dram); + trace_cxl_dram(cxlmd, type, cxlr, hpa, hpa_alias, + &evt->dram); } } EXPORT_SYMBOL_NS_GPL(cxl_event_trace_record, "CXL"); @@ -1097,10 +1133,6 @@ static int cxl_mem_get_partition_info(struct cxl_memdev_state *mds) le64_to_cpu(pi.active_volatile_cap) * CXL_CAPACITY_MULTIPLIER; mds->active_persistent_bytes = le64_to_cpu(pi.active_persistent_cap) * CXL_CAPACITY_MULTIPLIER; - mds->next_volatile_bytes = - le64_to_cpu(pi.next_volatile_cap) * CXL_CAPACITY_MULTIPLIER; - mds->next_persistent_bytes = - le64_to_cpu(pi.next_volatile_cap) * CXL_CAPACITY_MULTIPLIER; return 0; } @@ -1222,74 +1254,54 @@ int cxl_mem_sanitize(struct cxl_memdev *cxlmd, u16 cmd) { struct cxl_memdev_state *mds = to_cxl_memdev_state(cxlmd->cxlds); struct cxl_port *endpoint; - int rc; /* synchronize with cxl_mem_probe() and decoder write operations */ guard(device)(&cxlmd->dev); endpoint = cxlmd->endpoint; - down_read(&cxl_region_rwsem); + guard(rwsem_read)(&cxl_region_rwsem); /* * Require an endpoint to be safe otherwise the driver can not * be sure that the device is unmapped. */ if (endpoint && cxl_num_decoders_committed(endpoint) == 0) - rc = __cxl_mem_sanitize(mds, cmd); - else - rc = -EBUSY; - up_read(&cxl_region_rwsem); + return __cxl_mem_sanitize(mds, cmd); - return rc; + return -EBUSY; } -static int add_dpa_res(struct device *dev, struct resource *parent, - struct resource *res, resource_size_t start, - resource_size_t size, const char *type) +static void add_part(struct cxl_dpa_info *info, u64 start, u64 size, enum cxl_partition_mode mode) { - int rc; - - res->name = type; - res->start = start; - res->end = start + size - 1; - res->flags = IORESOURCE_MEM; - if (resource_size(res) == 0) { - dev_dbg(dev, "DPA(%s): no capacity\n", res->name); - return 0; - } - rc = request_resource(parent, res); - if (rc) { - dev_err(dev, "DPA(%s): failed to track %pr (%d)\n", res->name, - res, rc); - return rc; - } + int i = info->nr_partitions; - dev_dbg(dev, "DPA(%s): %pr\n", res->name, res); + if (size == 0) + return; - return 0; + info->part[i].range = (struct range) { + .start = start, + .end = start + size - 1, + }; + info->part[i].mode = mode; + info->nr_partitions++; } -int cxl_mem_create_range_info(struct cxl_memdev_state *mds) +int cxl_mem_dpa_fetch(struct cxl_memdev_state *mds, struct cxl_dpa_info *info) { struct cxl_dev_state *cxlds = &mds->cxlds; struct device *dev = cxlds->dev; int rc; if (!cxlds->media_ready) { - cxlds->dpa_res = DEFINE_RES_MEM(0, 0); - cxlds->ram_res = DEFINE_RES_MEM(0, 0); - cxlds->pmem_res = DEFINE_RES_MEM(0, 0); + info->size = 0; return 0; } - cxlds->dpa_res = DEFINE_RES_MEM(0, mds->total_bytes); + info->size = mds->total_bytes; if (mds->partition_align_bytes == 0) { - rc = add_dpa_res(dev, &cxlds->dpa_res, &cxlds->ram_res, 0, - mds->volatile_only_bytes, "ram"); - if (rc) - return rc; - return add_dpa_res(dev, &cxlds->dpa_res, &cxlds->pmem_res, - mds->volatile_only_bytes, - mds->persistent_only_bytes, "pmem"); + add_part(info, 0, mds->volatile_only_bytes, CXL_PARTMODE_RAM); + add_part(info, mds->volatile_only_bytes, + mds->persistent_only_bytes, CXL_PARTMODE_PMEM); + return 0; } rc = cxl_mem_get_partition_info(mds); @@ -1298,15 +1310,52 @@ int cxl_mem_create_range_info(struct cxl_memdev_state *mds) return rc; } - rc = add_dpa_res(dev, &cxlds->dpa_res, &cxlds->ram_res, 0, - mds->active_volatile_bytes, "ram"); - if (rc) - return rc; - return add_dpa_res(dev, &cxlds->dpa_res, &cxlds->pmem_res, - mds->active_volatile_bytes, - mds->active_persistent_bytes, "pmem"); + add_part(info, 0, mds->active_volatile_bytes, CXL_PARTMODE_RAM); + add_part(info, mds->active_volatile_bytes, mds->active_persistent_bytes, + CXL_PARTMODE_PMEM); + + return 0; +} +EXPORT_SYMBOL_NS_GPL(cxl_mem_dpa_fetch, "CXL"); + +int cxl_get_dirty_count(struct cxl_memdev_state *mds, u32 *count) +{ + struct cxl_mailbox *cxl_mbox = &mds->cxlds.cxl_mbox; + struct cxl_mbox_get_health_info_out hi; + struct cxl_mbox_cmd mbox_cmd; + int rc; + + mbox_cmd = (struct cxl_mbox_cmd) { + .opcode = CXL_MBOX_OP_GET_HEALTH_INFO, + .size_out = sizeof(hi), + .payload_out = &hi, + }; + + rc = cxl_internal_send_cmd(cxl_mbox, &mbox_cmd); + if (!rc) + *count = le32_to_cpu(hi.dirty_shutdown_cnt); + + return rc; } -EXPORT_SYMBOL_NS_GPL(cxl_mem_create_range_info, "CXL"); +EXPORT_SYMBOL_NS_GPL(cxl_get_dirty_count, "CXL"); + +int cxl_arm_dirty_shutdown(struct cxl_memdev_state *mds) +{ + struct cxl_mailbox *cxl_mbox = &mds->cxlds.cxl_mbox; + struct cxl_mbox_cmd mbox_cmd; + struct cxl_mbox_set_shutdown_state_in in = { + .state = 1 + }; + + mbox_cmd = (struct cxl_mbox_cmd) { + .opcode = CXL_MBOX_OP_SET_SHUTDOWN_STATE, + .size_in = sizeof(in), + .payload_in = &in, + }; + + return cxl_internal_send_cmd(cxl_mbox, &mbox_cmd); +} +EXPORT_SYMBOL_NS_GPL(cxl_arm_dirty_shutdown, "CXL"); int cxl_set_timestamp(struct cxl_memdev_state *mds) { @@ -1438,6 +1487,7 @@ EXPORT_SYMBOL_NS_GPL(cxl_mailbox_init, "CXL"); struct cxl_memdev_state *cxl_memdev_state_create(struct device *dev) { struct cxl_memdev_state *mds; + int rc; mds = devm_kzalloc(dev, sizeof(*mds), GFP_KERNEL); if (!mds) { @@ -1448,10 +1498,15 @@ struct cxl_memdev_state *cxl_memdev_state_create(struct device *dev) mutex_init(&mds->event.log_lock); mds->cxlds.dev = dev; mds->cxlds.reg_map.host = dev; + mds->cxlds.cxl_mbox.host = dev; mds->cxlds.reg_map.resource = CXL_RESOURCE_NONE; mds->cxlds.type = CXL_DEVTYPE_CLASSMEM; - mds->ram_perf.qos_class = CXL_QOS_CLASS_INVALID; - mds->pmem_perf.qos_class = CXL_QOS_CLASS_INVALID; + + rc = devm_cxl_register_mce_notifier(dev, &mds->mce_notifier); + if (rc == -EOPNOTSUPP) + dev_warn(dev, "CXL MCE unsupported\n"); + else if (rc) + return ERR_PTR(rc); return mds; } diff --git a/drivers/cxl/core/mce.c b/drivers/cxl/core/mce.c new file mode 100644 index 000000000000..ff8d078c6ca1 --- /dev/null +++ b/drivers/cxl/core/mce.c @@ -0,0 +1,65 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* Copyright(c) 2024 Intel Corporation. All rights reserved. */ +#include <linux/mm.h> +#include <linux/notifier.h> +#include <linux/set_memory.h> +#include <asm/mce.h> +#include <cxlmem.h> +#include "mce.h" + +static int cxl_handle_mce(struct notifier_block *nb, unsigned long val, + void *data) +{ + struct cxl_memdev_state *mds = container_of(nb, struct cxl_memdev_state, + mce_notifier); + struct cxl_memdev *cxlmd = mds->cxlds.cxlmd; + struct cxl_port *endpoint = cxlmd->endpoint; + struct mce *mce = data; + u64 spa, spa_alias; + unsigned long pfn; + + if (!mce || !mce_usable_address(mce)) + return NOTIFY_DONE; + + if (!endpoint) + return NOTIFY_DONE; + + spa = mce->addr & MCI_ADDR_PHYSADDR; + + pfn = spa >> PAGE_SHIFT; + if (!pfn_valid(pfn)) + return NOTIFY_DONE; + + spa_alias = cxl_port_get_spa_cache_alias(endpoint, spa); + if (spa_alias == ~0ULL) + return NOTIFY_DONE; + + pfn = spa_alias >> PAGE_SHIFT; + + /* + * Take down the aliased memory page. The original memory page flagged + * by the MCE will be taken cared of by the standard MCE handler. + */ + dev_emerg(mds->cxlds.dev, "Offlining aliased SPA address0: %#llx\n", + spa_alias); + if (!memory_failure(pfn, 0)) + set_mce_nospec(pfn); + + return NOTIFY_OK; +} + +static void cxl_unregister_mce_notifier(void *mce_notifier) +{ + mce_unregister_decode_chain(mce_notifier); +} + +int devm_cxl_register_mce_notifier(struct device *dev, + struct notifier_block *mce_notifier) +{ + mce_notifier->notifier_call = cxl_handle_mce; + mce_notifier->priority = MCE_PRIO_UC; + mce_register_decode_chain(mce_notifier); + + return devm_add_action_or_reset(dev, cxl_unregister_mce_notifier, + mce_notifier); +} diff --git a/drivers/cxl/core/mce.h b/drivers/cxl/core/mce.h new file mode 100644 index 000000000000..ace73424eeb6 --- /dev/null +++ b/drivers/cxl/core/mce.h @@ -0,0 +1,20 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ +/* Copyright(c) 2024 Intel Corporation. All rights reserved. */ +#ifndef _CXL_CORE_MCE_H_ +#define _CXL_CORE_MCE_H_ + +#include <linux/notifier.h> + +#ifdef CONFIG_CXL_MCE +int devm_cxl_register_mce_notifier(struct device *dev, + struct notifier_block *mce_notifer); +#else +static inline int +devm_cxl_register_mce_notifier(struct device *dev, + struct notifier_block *mce_notifier) +{ + return -EOPNOTSUPP; +} +#endif + +#endif diff --git a/drivers/cxl/core/memdev.c b/drivers/cxl/core/memdev.c index ae3dfcbe8938..a16a5886d40a 100644 --- a/drivers/cxl/core/memdev.c +++ b/drivers/cxl/core/memdev.c @@ -75,12 +75,20 @@ static ssize_t label_storage_size_show(struct device *dev, } static DEVICE_ATTR_RO(label_storage_size); +static resource_size_t cxl_ram_size(struct cxl_dev_state *cxlds) +{ + /* Static RAM is only expected at partition 0. */ + if (cxlds->part[0].mode != CXL_PARTMODE_RAM) + return 0; + return resource_size(&cxlds->part[0].res); +} + static ssize_t ram_size_show(struct device *dev, struct device_attribute *attr, char *buf) { struct cxl_memdev *cxlmd = to_cxl_memdev(dev); struct cxl_dev_state *cxlds = cxlmd->cxlds; - unsigned long long len = resource_size(&cxlds->ram_res); + unsigned long long len = cxl_ram_size(cxlds); return sysfs_emit(buf, "%#llx\n", len); } @@ -93,7 +101,7 @@ static ssize_t pmem_size_show(struct device *dev, struct device_attribute *attr, { struct cxl_memdev *cxlmd = to_cxl_memdev(dev); struct cxl_dev_state *cxlds = cxlmd->cxlds; - unsigned long long len = resource_size(&cxlds->pmem_res); + unsigned long long len = cxl_pmem_size(cxlds); return sysfs_emit(buf, "%#llx\n", len); } @@ -198,22 +206,17 @@ static int cxl_get_poison_by_memdev(struct cxl_memdev *cxlmd) int rc = 0; /* CXL 3.0 Spec 8.2.9.8.4.1 Separate pmem and ram poison requests */ - if (resource_size(&cxlds->pmem_res)) { - offset = cxlds->pmem_res.start; - length = resource_size(&cxlds->pmem_res); - rc = cxl_mem_get_poison(cxlmd, offset, length, NULL); - if (rc) - return rc; - } - if (resource_size(&cxlds->ram_res)) { - offset = cxlds->ram_res.start; - length = resource_size(&cxlds->ram_res); + for (int i = 0; i < cxlds->nr_partitions; i++) { + const struct resource *res = &cxlds->part[i].res; + + offset = res->start; + length = resource_size(res); rc = cxl_mem_get_poison(cxlmd, offset, length, NULL); /* * Invalid Physical Address is not an error for * volatile addresses. Device support is optional. */ - if (rc == -EFAULT) + if (rc == -EFAULT && cxlds->part[i].mode == CXL_PARTMODE_RAM) rc = 0; } return rc; @@ -404,14 +407,21 @@ static struct attribute *cxl_memdev_attributes[] = { NULL, }; +static struct cxl_dpa_perf *to_pmem_perf(struct cxl_dev_state *cxlds) +{ + for (int i = 0; i < cxlds->nr_partitions; i++) + if (cxlds->part[i].mode == CXL_PARTMODE_PMEM) + return &cxlds->part[i].perf; + return NULL; +} + static ssize_t pmem_qos_class_show(struct device *dev, struct device_attribute *attr, char *buf) { struct cxl_memdev *cxlmd = to_cxl_memdev(dev); struct cxl_dev_state *cxlds = cxlmd->cxlds; - struct cxl_memdev_state *mds = to_cxl_memdev_state(cxlds); - return sysfs_emit(buf, "%d\n", mds->pmem_perf.qos_class); + return sysfs_emit(buf, "%d\n", to_pmem_perf(cxlds)->qos_class); } static struct device_attribute dev_attr_pmem_qos_class = @@ -423,14 +433,20 @@ static struct attribute *cxl_memdev_pmem_attributes[] = { NULL, }; +static struct cxl_dpa_perf *to_ram_perf(struct cxl_dev_state *cxlds) +{ + if (cxlds->part[0].mode != CXL_PARTMODE_RAM) + return NULL; + return &cxlds->part[0].perf; +} + static ssize_t ram_qos_class_show(struct device *dev, struct device_attribute *attr, char *buf) { struct cxl_memdev *cxlmd = to_cxl_memdev(dev); struct cxl_dev_state *cxlds = cxlmd->cxlds; - struct cxl_memdev_state *mds = to_cxl_memdev_state(cxlds); - return sysfs_emit(buf, "%d\n", mds->ram_perf.qos_class); + return sysfs_emit(buf, "%d\n", to_ram_perf(cxlds)->qos_class); } static struct device_attribute dev_attr_ram_qos_class = @@ -466,11 +482,11 @@ static umode_t cxl_ram_visible(struct kobject *kobj, struct attribute *a, int n) { struct device *dev = kobj_to_dev(kobj); struct cxl_memdev *cxlmd = to_cxl_memdev(dev); - struct cxl_memdev_state *mds = to_cxl_memdev_state(cxlmd->cxlds); + struct cxl_dpa_perf *perf = to_ram_perf(cxlmd->cxlds); - if (a == &dev_attr_ram_qos_class.attr) - if (mds->ram_perf.qos_class == CXL_QOS_CLASS_INVALID) - return 0; + if (a == &dev_attr_ram_qos_class.attr && + (!perf || perf->qos_class == CXL_QOS_CLASS_INVALID)) + return 0; return a->mode; } @@ -485,11 +501,11 @@ static umode_t cxl_pmem_visible(struct kobject *kobj, struct attribute *a, int n { struct device *dev = kobj_to_dev(kobj); struct cxl_memdev *cxlmd = to_cxl_memdev(dev); - struct cxl_memdev_state *mds = to_cxl_memdev_state(cxlmd->cxlds); + struct cxl_dpa_perf *perf = to_pmem_perf(cxlmd->cxlds); - if (a == &dev_attr_pmem_qos_class.attr) - if (mds->pmem_perf.qos_class == CXL_QOS_CLASS_INVALID) - return 0; + if (a == &dev_attr_pmem_qos_class.attr && + (!perf || perf->qos_class == CXL_QOS_CLASS_INVALID)) + return 0; return a->mode; } @@ -564,10 +580,11 @@ EXPORT_SYMBOL_NS_GPL(is_cxl_memdev, "CXL"); void set_exclusive_cxl_commands(struct cxl_memdev_state *mds, unsigned long *cmds) { - down_write(&cxl_memdev_rwsem); - bitmap_or(mds->exclusive_cmds, mds->exclusive_cmds, cmds, - CXL_MEM_COMMAND_ID_MAX); - up_write(&cxl_memdev_rwsem); + struct cxl_mailbox *cxl_mbox = &mds->cxlds.cxl_mbox; + + guard(rwsem_write)(&cxl_memdev_rwsem); + bitmap_or(cxl_mbox->exclusive_cmds, cxl_mbox->exclusive_cmds, + cmds, CXL_MEM_COMMAND_ID_MAX); } EXPORT_SYMBOL_NS_GPL(set_exclusive_cxl_commands, "CXL"); @@ -579,10 +596,11 @@ EXPORT_SYMBOL_NS_GPL(set_exclusive_cxl_commands, "CXL"); void clear_exclusive_cxl_commands(struct cxl_memdev_state *mds, unsigned long *cmds) { - down_write(&cxl_memdev_rwsem); - bitmap_andnot(mds->exclusive_cmds, mds->exclusive_cmds, cmds, - CXL_MEM_COMMAND_ID_MAX); - up_write(&cxl_memdev_rwsem); + struct cxl_mailbox *cxl_mbox = &mds->cxlds.cxl_mbox; + + guard(rwsem_write)(&cxl_memdev_rwsem); + bitmap_andnot(cxl_mbox->exclusive_cmds, cxl_mbox->exclusive_cmds, + cmds, CXL_MEM_COMMAND_ID_MAX); } EXPORT_SYMBOL_NS_GPL(clear_exclusive_cxl_commands, "CXL"); @@ -590,9 +608,8 @@ static void cxl_memdev_shutdown(struct device *dev) { struct cxl_memdev *cxlmd = to_cxl_memdev(dev); - down_write(&cxl_memdev_rwsem); + guard(rwsem_write)(&cxl_memdev_rwsem); cxlmd->cxlds = NULL; - up_write(&cxl_memdev_rwsem); } static void cxl_memdev_unregister(void *_cxlmd) @@ -656,11 +673,14 @@ err: static long __cxl_memdev_ioctl(struct cxl_memdev *cxlmd, unsigned int cmd, unsigned long arg) { + struct cxl_memdev_state *mds = to_cxl_memdev_state(cxlmd->cxlds); + struct cxl_mailbox *cxl_mbox = &mds->cxlds.cxl_mbox; + switch (cmd) { case CXL_MEM_QUERY_COMMANDS: - return cxl_query_cmd(cxlmd, (void __user *)arg); + return cxl_query_cmd(cxl_mbox, (void __user *)arg); case CXL_MEM_SEND_COMMAND: - return cxl_send_cmd(cxlmd, (void __user *)arg); + return cxl_send_cmd(cxl_mbox, (void __user *)arg); default: return -ENOTTY; } @@ -671,15 +691,13 @@ static long cxl_memdev_ioctl(struct file *file, unsigned int cmd, { struct cxl_memdev *cxlmd = file->private_data; struct cxl_dev_state *cxlds; - int rc = -ENXIO; - down_read(&cxl_memdev_rwsem); + guard(rwsem_read)(&cxl_memdev_rwsem); cxlds = cxlmd->cxlds; if (cxlds && cxlds->type == CXL_DEVTYPE_CLASSMEM) - rc = __cxl_memdev_ioctl(cxlmd, cmd, arg); - up_read(&cxl_memdev_rwsem); + return __cxl_memdev_ioctl(cxlmd, cmd, arg); - return rc; + return -ENXIO; } static int cxl_memdev_open(struct inode *inode, struct file *file) @@ -994,10 +1012,11 @@ static void cxl_remove_fw_upload(void *fwl) int devm_cxl_setup_fw_upload(struct device *host, struct cxl_memdev_state *mds) { struct cxl_dev_state *cxlds = &mds->cxlds; + struct cxl_mailbox *cxl_mbox = &cxlds->cxl_mbox; struct device *dev = &cxlds->cxlmd->dev; struct fw_upload *fwl; - if (!test_bit(CXL_MEM_COMMAND_ID_GET_FW_INFO, mds->enabled_cmds)) + if (!test_bit(CXL_MEM_COMMAND_ID_GET_FW_INFO, cxl_mbox->enabled_cmds)) return 0; fwl = firmware_upload_register(THIS_MODULE, dev, dev_name(dev), diff --git a/drivers/cxl/core/pci.c b/drivers/cxl/core/pci.c index 9d58ab9d33c5..3b80e9a76ba8 100644 --- a/drivers/cxl/core/pci.c +++ b/drivers/cxl/core/pci.c @@ -252,9 +252,9 @@ static int devm_cxl_enable_mem(struct device *host, struct cxl_dev_state *cxlds) } /* require dvsec ranges to be covered by a locked platform window */ -static int dvsec_range_allowed(struct device *dev, void *arg) +static int dvsec_range_allowed(struct device *dev, const void *arg) { - struct range *dev_range = arg; + const struct range *dev_range = arg; struct cxl_decoder *cxld; if (!is_root_decoder(dev)) @@ -291,11 +291,11 @@ static int devm_cxl_enable_hdm(struct device *host, struct cxl_hdm *cxlhdm) return devm_add_action_or_reset(host, disable_hdm, cxlhdm); } -int cxl_dvsec_rr_decode(struct device *dev, struct cxl_port *port, +int cxl_dvsec_rr_decode(struct cxl_dev_state *cxlds, struct cxl_endpoint_dvsec_info *info) { - struct pci_dev *pdev = to_pci_dev(dev); - struct cxl_dev_state *cxlds = pci_get_drvdata(pdev); + struct pci_dev *pdev = to_pci_dev(cxlds->dev); + struct device *dev = cxlds->dev; int hdm_count, rc, i, ranges = 0; int d = cxlds->cxl_dvsec; u16 cap, ctrl; @@ -1054,3 +1054,104 @@ int cxl_pci_get_bandwidth(struct pci_dev *pdev, struct access_coordinate *c) return 0; } + +/* + * Set max timeout such that platforms will optimize GPF flow to avoid + * the implied worst-case scenario delays. On a sane platform, all + * devices should always complete GPF within the energy budget of + * the GPF flow. The kernel does not have enough information to pick + * anything better than "maximize timeouts and hope it works". + * + * A misbehaving device could block forward progress of GPF for all + * the other devices, exhausting the energy budget of the platform. + * However, the spec seems to assume that moving on from slow to respond + * devices is a virtue. It is not possible to know that, in actuality, + * the slow to respond device is *the* most critical device in the + * system to wait. + */ +#define GPF_TIMEOUT_BASE_MAX 2 +#define GPF_TIMEOUT_SCALE_MAX 7 /* 10 seconds */ + +u16 cxl_gpf_get_dvsec(struct device *dev) +{ + struct pci_dev *pdev; + bool is_port = true; + u16 dvsec; + + if (!dev_is_pci(dev)) + return 0; + + pdev = to_pci_dev(dev); + if (pci_pcie_type(pdev) == PCI_EXP_TYPE_ENDPOINT) + is_port = false; + + dvsec = pci_find_dvsec_capability(pdev, PCI_VENDOR_ID_CXL, + is_port ? CXL_DVSEC_PORT_GPF : CXL_DVSEC_DEVICE_GPF); + if (!dvsec) + dev_warn(dev, "%s GPF DVSEC not present\n", + is_port ? "Port" : "Device"); + return dvsec; +} +EXPORT_SYMBOL_NS_GPL(cxl_gpf_get_dvsec, "CXL"); + +static int update_gpf_port_dvsec(struct pci_dev *pdev, int dvsec, int phase) +{ + u64 base, scale; + int rc, offset; + u16 ctrl; + + switch (phase) { + case 1: + offset = CXL_DVSEC_PORT_GPF_PHASE_1_CONTROL_OFFSET; + base = CXL_DVSEC_PORT_GPF_PHASE_1_TMO_BASE_MASK; + scale = CXL_DVSEC_PORT_GPF_PHASE_1_TMO_SCALE_MASK; + break; + case 2: + offset = CXL_DVSEC_PORT_GPF_PHASE_2_CONTROL_OFFSET; + base = CXL_DVSEC_PORT_GPF_PHASE_2_TMO_BASE_MASK; + scale = CXL_DVSEC_PORT_GPF_PHASE_2_TMO_SCALE_MASK; + break; + default: + return -EINVAL; + } + + rc = pci_read_config_word(pdev, dvsec + offset, &ctrl); + if (rc) + return rc; + + if (FIELD_GET(base, ctrl) == GPF_TIMEOUT_BASE_MAX && + FIELD_GET(scale, ctrl) == GPF_TIMEOUT_SCALE_MAX) + return 0; + + ctrl = FIELD_PREP(base, GPF_TIMEOUT_BASE_MAX); + ctrl |= FIELD_PREP(scale, GPF_TIMEOUT_SCALE_MAX); + + rc = pci_write_config_word(pdev, dvsec + offset, ctrl); + if (!rc) + pci_dbg(pdev, "Port GPF phase %d timeout: %d0 secs\n", + phase, GPF_TIMEOUT_BASE_MAX); + + return rc; +} + +int cxl_gpf_port_setup(struct cxl_dport *dport) +{ + if (!dport) + return -EINVAL; + + if (!dport->gpf_dvsec) { + struct pci_dev *pdev; + int dvsec; + + dvsec = cxl_gpf_get_dvsec(dport->dport_dev); + if (!dvsec) + return -EINVAL; + + dport->gpf_dvsec = dvsec; + pdev = to_pci_dev(dport->dport_dev); + update_gpf_port_dvsec(pdev, dport->gpf_dvsec, 1); + update_gpf_port_dvsec(pdev, dport->gpf_dvsec, 2); + } + + return 0; +} diff --git a/drivers/cxl/core/pmem.c b/drivers/cxl/core/pmem.c index b3378d3f6acb..8853415c106a 100644 --- a/drivers/cxl/core/pmem.c +++ b/drivers/cxl/core/pmem.c @@ -51,17 +51,6 @@ struct cxl_nvdimm_bridge *to_cxl_nvdimm_bridge(struct device *dev) } EXPORT_SYMBOL_NS_GPL(to_cxl_nvdimm_bridge, "CXL"); -bool is_cxl_nvdimm_bridge(struct device *dev) -{ - return dev->type == &cxl_nvdimm_bridge_type; -} -EXPORT_SYMBOL_NS_GPL(is_cxl_nvdimm_bridge, "CXL"); - -static int match_nvdimm_bridge(struct device *dev, void *data) -{ - return is_cxl_nvdimm_bridge(dev); -} - /** * cxl_find_nvdimm_bridge() - find a bridge device relative to a port * @port: any descendant port of an nvdimm-bridge associated @@ -75,7 +64,9 @@ struct cxl_nvdimm_bridge *cxl_find_nvdimm_bridge(struct cxl_port *port) if (!cxl_root) return NULL; - dev = device_find_child(&cxl_root->port.dev, NULL, match_nvdimm_bridge); + dev = device_find_child(&cxl_root->port.dev, + &cxl_nvdimm_bridge_type, + device_match_type); if (!dev) return NULL; diff --git a/drivers/cxl/core/port.c b/drivers/cxl/core/port.c index 78a5c2c25982..726bd4a7de27 100644 --- a/drivers/cxl/core/port.c +++ b/drivers/cxl/core/port.c @@ -194,25 +194,35 @@ static ssize_t mode_show(struct device *dev, struct device_attribute *attr, char *buf) { struct cxl_endpoint_decoder *cxled = to_cxl_endpoint_decoder(dev); + struct cxl_memdev *cxlmd = cxled_to_memdev(cxled); + struct cxl_dev_state *cxlds = cxlmd->cxlds; + /* without @cxl_dpa_rwsem, make sure @part is not reloaded */ + int part = READ_ONCE(cxled->part); + const char *desc; + + if (part < 0) + desc = "none"; + else + desc = cxlds->part[part].res.name; - return sysfs_emit(buf, "%s\n", cxl_decoder_mode_name(cxled->mode)); + return sysfs_emit(buf, "%s\n", desc); } static ssize_t mode_store(struct device *dev, struct device_attribute *attr, const char *buf, size_t len) { struct cxl_endpoint_decoder *cxled = to_cxl_endpoint_decoder(dev); - enum cxl_decoder_mode mode; + enum cxl_partition_mode mode; ssize_t rc; if (sysfs_streq(buf, "pmem")) - mode = CXL_DECODER_PMEM; + mode = CXL_PARTMODE_PMEM; else if (sysfs_streq(buf, "ram")) - mode = CXL_DECODER_RAM; + mode = CXL_PARTMODE_RAM; else return -EINVAL; - rc = cxl_dpa_set_mode(cxled, mode); + rc = cxl_dpa_set_part(cxled, mode); if (rc) return rc; @@ -549,13 +559,9 @@ static ssize_t decoders_committed_show(struct device *dev, struct device_attribute *attr, char *buf) { struct cxl_port *port = to_cxl_port(dev); - int rc; - - down_read(&cxl_region_rwsem); - rc = sysfs_emit(buf, "%d\n", cxl_num_decoders_committed(port)); - up_read(&cxl_region_rwsem); - return rc; + guard(rwsem_read)(&cxl_region_rwsem); + return sysfs_emit(buf, "%d\n", cxl_num_decoders_committed(port)); } static DEVICE_ATTR_RO(decoders_committed); @@ -1672,6 +1678,8 @@ retry: if (rc && rc != -EBUSY) return rc; + cxl_gpf_port_setup(dport); + /* Any more ports to add between this one and the root? */ if (!dev_is_cxl_root_child(&port->dev)) continue; @@ -1899,6 +1907,7 @@ struct cxl_endpoint_decoder *cxl_endpoint_decoder_alloc(struct cxl_port *port) return ERR_PTR(-ENOMEM); cxled->pos = -1; + cxled->part = -1; cxld = &cxled->cxld; rc = cxl_decoder_init(port, cxld); if (rc) { @@ -2339,8 +2348,14 @@ static __init int cxl_core_init(void) if (rc) goto err_region; + rc = cxl_ras_init(); + if (rc) + goto err_ras; + return 0; +err_ras: + cxl_region_exit(); err_region: bus_unregister(&cxl_bus_type); err_bus: @@ -2352,6 +2367,7 @@ err_wq: static void cxl_core_exit(void) { + cxl_ras_exit(); cxl_region_exit(); bus_unregister(&cxl_bus_type); destroy_workqueue(cxl_bus_wq); diff --git a/drivers/cxl/core/ras.c b/drivers/cxl/core/ras.c new file mode 100644 index 000000000000..2731ba3a0799 --- /dev/null +++ b/drivers/cxl/core/ras.c @@ -0,0 +1,126 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* Copyright(c) 2025 AMD Corporation. All rights reserved. */ + +#include <linux/pci.h> +#include <linux/aer.h> +#include <cxl/event.h> +#include <cxlmem.h> +#include "trace.h" + +static void cxl_cper_trace_corr_port_prot_err(struct pci_dev *pdev, + struct cxl_ras_capability_regs ras_cap) +{ + u32 status = ras_cap.cor_status & ~ras_cap.cor_mask; + + trace_cxl_port_aer_correctable_error(&pdev->dev, status); +} + +static void cxl_cper_trace_uncorr_port_prot_err(struct pci_dev *pdev, + struct cxl_ras_capability_regs ras_cap) +{ + u32 status = ras_cap.uncor_status & ~ras_cap.uncor_mask; + u32 fe; + + if (hweight32(status) > 1) + fe = BIT(FIELD_GET(CXL_RAS_CAP_CONTROL_FE_MASK, + ras_cap.cap_control)); + else + fe = status; + + trace_cxl_port_aer_uncorrectable_error(&pdev->dev, status, fe, + ras_cap.header_log); +} + +static void cxl_cper_trace_corr_prot_err(struct cxl_memdev *cxlmd, + struct cxl_ras_capability_regs ras_cap) +{ + u32 status = ras_cap.cor_status & ~ras_cap.cor_mask; + + trace_cxl_aer_correctable_error(cxlmd, status); +} + +static void +cxl_cper_trace_uncorr_prot_err(struct cxl_memdev *cxlmd, + struct cxl_ras_capability_regs ras_cap) +{ + u32 status = ras_cap.uncor_status & ~ras_cap.uncor_mask; + u32 fe; + + if (hweight32(status) > 1) + fe = BIT(FIELD_GET(CXL_RAS_CAP_CONTROL_FE_MASK, + ras_cap.cap_control)); + else + fe = status; + + trace_cxl_aer_uncorrectable_error(cxlmd, status, fe, + ras_cap.header_log); +} + +static int match_memdev_by_parent(struct device *dev, const void *uport) +{ + if (is_cxl_memdev(dev) && dev->parent == uport) + return 1; + return 0; +} + +static void cxl_cper_handle_prot_err(struct cxl_cper_prot_err_work_data *data) +{ + unsigned int devfn = PCI_DEVFN(data->prot_err.agent_addr.device, + data->prot_err.agent_addr.function); + struct pci_dev *pdev __free(pci_dev_put) = + pci_get_domain_bus_and_slot(data->prot_err.agent_addr.segment, + data->prot_err.agent_addr.bus, + devfn); + struct cxl_memdev *cxlmd; + int port_type; + + if (!pdev) + return; + + port_type = pci_pcie_type(pdev); + if (port_type == PCI_EXP_TYPE_ROOT_PORT || + port_type == PCI_EXP_TYPE_DOWNSTREAM || + port_type == PCI_EXP_TYPE_UPSTREAM) { + if (data->severity == AER_CORRECTABLE) + cxl_cper_trace_corr_port_prot_err(pdev, data->ras_cap); + else + cxl_cper_trace_uncorr_port_prot_err(pdev, data->ras_cap); + + return; + } + + guard(device)(&pdev->dev); + if (!pdev->dev.driver) + return; + + struct device *mem_dev __free(put_device) = bus_find_device( + &cxl_bus_type, NULL, pdev, match_memdev_by_parent); + if (!mem_dev) + return; + + cxlmd = to_cxl_memdev(mem_dev); + if (data->severity == AER_CORRECTABLE) + cxl_cper_trace_corr_prot_err(cxlmd, data->ras_cap); + else + cxl_cper_trace_uncorr_prot_err(cxlmd, data->ras_cap); +} + +static void cxl_cper_prot_err_work_fn(struct work_struct *work) +{ + struct cxl_cper_prot_err_work_data wd; + + while (cxl_cper_prot_err_kfifo_get(&wd)) + cxl_cper_handle_prot_err(&wd); +} +static DECLARE_WORK(cxl_cper_prot_err_work, cxl_cper_prot_err_work_fn); + +int cxl_ras_init(void) +{ + return cxl_cper_register_prot_err_work(&cxl_cper_prot_err_work); +} + +void cxl_ras_exit(void) +{ + cxl_cper_unregister_prot_err_work(&cxl_cper_prot_err_work); + cancel_work_sync(&cxl_cper_prot_err_work); +} diff --git a/drivers/cxl/core/region.c b/drivers/cxl/core/region.c index b98b1ccffd1c..7585f0302f3a 100644 --- a/drivers/cxl/core/region.c +++ b/drivers/cxl/core/region.c @@ -144,7 +144,7 @@ static ssize_t uuid_show(struct device *dev, struct device_attribute *attr, rc = down_read_interruptible(&cxl_region_rwsem); if (rc) return rc; - if (cxlr->mode != CXL_DECODER_PMEM) + if (cxlr->mode != CXL_PARTMODE_PMEM) rc = sysfs_emit(buf, "\n"); else rc = sysfs_emit(buf, "%pUb\n", &p->uuid); @@ -441,7 +441,7 @@ static umode_t cxl_region_visible(struct kobject *kobj, struct attribute *a, * Support tooling that expects to find a 'uuid' attribute for all * regions regardless of mode. */ - if (a == &dev_attr_uuid.attr && cxlr->mode != CXL_DECODER_PMEM) + if (a == &dev_attr_uuid.attr && cxlr->mode != CXL_PARTMODE_PMEM) return 0444; return a->mode; } @@ -603,8 +603,16 @@ static ssize_t mode_show(struct device *dev, struct device_attribute *attr, char *buf) { struct cxl_region *cxlr = to_cxl_region(dev); + const char *desc; - return sysfs_emit(buf, "%s\n", cxl_decoder_mode_name(cxlr->mode)); + if (cxlr->mode == CXL_PARTMODE_RAM) + desc = "ram"; + else if (cxlr->mode == CXL_PARTMODE_PMEM) + desc = "pmem"; + else + desc = ""; + + return sysfs_emit(buf, "%s\n", desc); } static DEVICE_ATTR_RO(mode); @@ -630,7 +638,7 @@ static int alloc_hpa(struct cxl_region *cxlr, resource_size_t size) /* ways, granularity and uuid (if PMEM) need to be set before HPA */ if (!p->interleave_ways || !p->interleave_granularity || - (cxlr->mode == CXL_DECODER_PMEM && uuid_is_null(&p->uuid))) + (cxlr->mode == CXL_PARTMODE_PMEM && uuid_is_null(&p->uuid))) return -ENXIO; div64_u64_rem(size, (u64)SZ_256M * p->interleave_ways, &remainder); @@ -778,7 +786,7 @@ out: return rc; } -static int check_commit_order(struct device *dev, const void *data) +static int check_commit_order(struct device *dev, void *data) { struct cxl_decoder *cxld = to_cxl_decoder(dev); @@ -792,7 +800,7 @@ static int check_commit_order(struct device *dev, const void *data) return 0; } -static int match_free_decoder(struct device *dev, void *data) +static int match_free_decoder(struct device *dev, const void *data) { struct cxl_port *port = to_cxl_port(dev->parent); struct cxl_decoder *cxld; @@ -824,9 +832,24 @@ static int match_free_decoder(struct device *dev, void *data) return 1; } -static int match_auto_decoder(struct device *dev, void *data) +static bool region_res_match_cxl_range(const struct cxl_region_params *p, + struct range *range) { - struct cxl_region_params *p = data; + if (!p->res) + return false; + + /* + * If an extended linear cache region then the CXL range is assumed + * to be fronted by the DRAM range in current known implementation. + * This assumption will be made until a variant implementation exists. + */ + return p->res->start + p->cache_size == range->start && + p->res->end == range->end; +} + +static int match_auto_decoder(struct device *dev, const void *data) +{ + const struct cxl_region_params *p = data; struct cxl_decoder *cxld; struct range *r; @@ -836,7 +859,7 @@ static int match_auto_decoder(struct device *dev, void *data) cxld = to_cxl_decoder(dev); r = &cxld->hpa_range; - if (p->res && p->res->start == r->start && p->res->end == r->end) + if (region_res_match_cxl_range(p, r)) return 1; return 0; @@ -1423,9 +1446,8 @@ static int cxl_port_setup_targets(struct cxl_port *port, if (test_bit(CXL_REGION_F_AUTO, &cxlr->flags)) { if (cxld->interleave_ways != iw || - cxld->interleave_granularity != ig || - cxld->hpa_range.start != p->res->start || - cxld->hpa_range.end != p->res->end || + (iw > 1 && cxld->interleave_granularity != ig) || + !region_res_match_cxl_range(p, &cxld->hpa_range) || ((cxld->flags & CXL_DECODER_F_ENABLE) == 0)) { dev_err(&cxlr->dev, "%s:%s %s expected iw: %d ig: %d %pr\n", @@ -1733,10 +1755,12 @@ static struct cxl_port *next_port(struct cxl_port *port) return port->parent_dport->port; } -static int match_switch_decoder_by_range(struct device *dev, void *data) +static int match_switch_decoder_by_range(struct device *dev, + const void *data) { struct cxl_switch_decoder *cxlsd; - struct range *r1, *r2 = data; + const struct range *r1, *r2 = data; + if (!is_switch_decoder(dev)) return 0; @@ -1781,6 +1805,13 @@ static int find_pos_and_ways(struct cxl_port *port, struct range *range, } put_device(dev); + if (rc) + dev_err(port->uport_dev, + "failed to find %s:%s in target list of %s\n", + dev_name(&port->dev), + dev_name(port->parent_dport->dport_dev), + dev_name(&cxlsd->cxld.dev)); + return rc; } @@ -1886,6 +1917,7 @@ static int cxl_region_attach(struct cxl_region *cxlr, { struct cxl_root_decoder *cxlrd = to_cxl_root_decoder(cxlr->dev.parent); struct cxl_memdev *cxlmd = cxled_to_memdev(cxled); + struct cxl_dev_state *cxlds = cxlmd->cxlds; struct cxl_region_params *p = &cxlr->params; struct cxl_port *ep_port, *root_port; struct cxl_dport *dport; @@ -1900,17 +1932,17 @@ static int cxl_region_attach(struct cxl_region *cxlr, return rc; } - if (cxled->mode != cxlr->mode) { - dev_dbg(&cxlr->dev, "%s region mode: %d mismatch: %d\n", - dev_name(&cxled->cxld.dev), cxlr->mode, cxled->mode); - return -EINVAL; - } - - if (cxled->mode == CXL_DECODER_DEAD) { + if (cxled->part < 0) { dev_dbg(&cxlr->dev, "%s dead\n", dev_name(&cxled->cxld.dev)); return -ENODEV; } + if (cxlds->part[cxled->part].mode != cxlr->mode) { + dev_dbg(&cxlr->dev, "%s region mode: %d mismatch\n", + dev_name(&cxled->cxld.dev), cxlr->mode); + return -EINVAL; + } + /* all full of members, or interleave config not established? */ if (p->state > CXL_CONFIG_INTERLEAVE_ACTIVE) { dev_dbg(&cxlr->dev, "region already active\n"); @@ -1949,13 +1981,13 @@ static int cxl_region_attach(struct cxl_region *cxlr, return -ENXIO; } - if (resource_size(cxled->dpa_res) * p->interleave_ways != + if (resource_size(cxled->dpa_res) * p->interleave_ways + p->cache_size != resource_size(p->res)) { dev_dbg(&cxlr->dev, - "%s:%s: decoder-size-%#llx * ways-%d != region-size-%#llx\n", + "%s:%s-size-%#llx * ways-%d + cache-%#llx != region-size-%#llx\n", dev_name(&cxlmd->dev), dev_name(&cxled->cxld.dev), (u64)resource_size(cxled->dpa_res), p->interleave_ways, - (u64)resource_size(p->res)); + (u64)p->cache_size, (u64)resource_size(p->res)); return -EINVAL; } @@ -2113,7 +2145,7 @@ out: void cxl_decoder_kill_region(struct cxl_endpoint_decoder *cxled) { down_write(&cxl_region_rwsem); - cxled->mode = CXL_DECODER_DEAD; + cxled->part = -1; cxl_region_detach(cxled); up_write(&cxl_region_rwsem); } @@ -2469,7 +2501,7 @@ static int cxl_region_calculate_adistance(struct notifier_block *nb, */ static struct cxl_region *devm_cxl_add_region(struct cxl_root_decoder *cxlrd, int id, - enum cxl_decoder_mode mode, + enum cxl_partition_mode mode, enum cxl_decoder_type type) { struct cxl_port *port = to_cxl_port(cxlrd->cxlsd.cxld.dev.parent); @@ -2523,13 +2555,13 @@ static ssize_t create_ram_region_show(struct device *dev, } static struct cxl_region *__create_region(struct cxl_root_decoder *cxlrd, - enum cxl_decoder_mode mode, int id) + enum cxl_partition_mode mode, int id) { int rc; switch (mode) { - case CXL_DECODER_RAM: - case CXL_DECODER_PMEM: + case CXL_PARTMODE_RAM: + case CXL_PARTMODE_PMEM: break; default: dev_err(&cxlrd->cxlsd.cxld.dev, "unsupported mode %d\n", mode); @@ -2549,7 +2581,7 @@ static struct cxl_region *__create_region(struct cxl_root_decoder *cxlrd, } static ssize_t create_region_store(struct device *dev, const char *buf, - size_t len, enum cxl_decoder_mode mode) + size_t len, enum cxl_partition_mode mode) { struct cxl_root_decoder *cxlrd = to_cxl_root_decoder(dev); struct cxl_region *cxlr; @@ -2570,7 +2602,7 @@ static ssize_t create_pmem_region_store(struct device *dev, struct device_attribute *attr, const char *buf, size_t len) { - return create_region_store(dev, buf, len, CXL_DECODER_PMEM); + return create_region_store(dev, buf, len, CXL_PARTMODE_PMEM); } DEVICE_ATTR_RW(create_pmem_region); @@ -2578,7 +2610,7 @@ static ssize_t create_ram_region_store(struct device *dev, struct device_attribute *attr, const char *buf, size_t len) { - return create_region_store(dev, buf, len, CXL_DECODER_RAM); + return create_region_store(dev, buf, len, CXL_PARTMODE_RAM); } DEVICE_ATTR_RW(create_ram_region); @@ -2676,7 +2708,7 @@ EXPORT_SYMBOL_NS_GPL(to_cxl_pmem_region, "CXL"); struct cxl_poison_context { struct cxl_port *port; - enum cxl_decoder_mode mode; + int part; u64 offset; }; @@ -2684,47 +2716,45 @@ static int cxl_get_poison_unmapped(struct cxl_memdev *cxlmd, struct cxl_poison_context *ctx) { struct cxl_dev_state *cxlds = cxlmd->cxlds; + const struct resource *res; + struct resource *p, *last; u64 offset, length; int rc = 0; + if (ctx->part < 0) + return 0; + /* - * Collect poison for the remaining unmapped resources - * after poison is collected by committed endpoints. - * - * Knowing that PMEM must always follow RAM, get poison - * for unmapped resources based on the last decoder's mode: - * ram: scan remains of ram range, then any pmem range - * pmem: scan remains of pmem range + * Collect poison for the remaining unmapped resources after + * poison is collected by committed endpoints decoders. */ - - if (ctx->mode == CXL_DECODER_RAM) { - offset = ctx->offset; - length = resource_size(&cxlds->ram_res) - offset; + for (int i = ctx->part; i < cxlds->nr_partitions; i++) { + res = &cxlds->part[i].res; + for (p = res->child, last = NULL; p; p = p->sibling) + last = p; + if (last) + offset = last->end + 1; + else + offset = res->start; + length = res->end - offset + 1; + if (!length) + break; rc = cxl_mem_get_poison(cxlmd, offset, length, NULL); - if (rc == -EFAULT) - rc = 0; + if (rc == -EFAULT && cxlds->part[i].mode == CXL_PARTMODE_RAM) + continue; if (rc) - return rc; - } - if (ctx->mode == CXL_DECODER_PMEM) { - offset = ctx->offset; - length = resource_size(&cxlds->dpa_res) - offset; - if (!length) - return 0; - } else if (resource_size(&cxlds->pmem_res)) { - offset = cxlds->pmem_res.start; - length = resource_size(&cxlds->pmem_res); - } else { - return 0; + break; } - return cxl_mem_get_poison(cxlmd, offset, length, NULL); + return rc; } static int poison_by_decoder(struct device *dev, void *arg) { struct cxl_poison_context *ctx = arg; struct cxl_endpoint_decoder *cxled; + enum cxl_partition_mode mode; + struct cxl_dev_state *cxlds; struct cxl_memdev *cxlmd; u64 offset, length; int rc = 0; @@ -2733,27 +2763,18 @@ static int poison_by_decoder(struct device *dev, void *arg) return rc; cxled = to_cxl_endpoint_decoder(dev); - if (!cxled->dpa_res || !resource_size(cxled->dpa_res)) + if (!cxled->dpa_res) return rc; - /* - * Regions are only created with single mode decoders: pmem or ram. - * Linux does not support mixed mode decoders. This means that - * reading poison per endpoint decoder adheres to the requirement - * that poison reads of pmem and ram must be separated. - * CXL 3.0 Spec 8.2.9.8.4.1 - */ - if (cxled->mode == CXL_DECODER_MIXED) { - dev_dbg(dev, "poison list read unsupported in mixed mode\n"); - return rc; - } - cxlmd = cxled_to_memdev(cxled); + cxlds = cxlmd->cxlds; + mode = cxlds->part[cxled->part].mode; + if (cxled->skip) { offset = cxled->dpa_res->start - cxled->skip; length = cxled->skip; rc = cxl_mem_get_poison(cxlmd, offset, length, NULL); - if (rc == -EFAULT && cxled->mode == CXL_DECODER_RAM) + if (rc == -EFAULT && mode == CXL_PARTMODE_RAM) rc = 0; if (rc) return rc; @@ -2762,7 +2783,7 @@ static int poison_by_decoder(struct device *dev, void *arg) offset = cxled->dpa_res->start; length = cxled->dpa_res->end - offset + 1; rc = cxl_mem_get_poison(cxlmd, offset, length, cxled->cxld.region); - if (rc == -EFAULT && cxled->mode == CXL_DECODER_RAM) + if (rc == -EFAULT && mode == CXL_PARTMODE_RAM) rc = 0; if (rc) return rc; @@ -2770,7 +2791,7 @@ static int poison_by_decoder(struct device *dev, void *arg) /* Iterate until commit_end is reached */ if (cxled->cxld.id == ctx->port->commit_end) { ctx->offset = cxled->dpa_res->end + 1; - ctx->mode = cxled->mode; + ctx->part = cxled->part; return 1; } @@ -2783,7 +2804,8 @@ int cxl_get_poison_by_endpoint(struct cxl_port *port) int rc = 0; ctx = (struct cxl_poison_context) { - .port = port + .port = port, + .part = -1, }; rc = device_for_each_child(&port->dev, &ctx, poison_by_decoder); @@ -2919,7 +2941,7 @@ u64 cxl_dpa_to_hpa(struct cxl_region *cxlr, const struct cxl_memdev *cxlmd, hpa_offset |= dpa_offset & GENMASK_ULL(eig + 7, 0); /* Apply the hpa_offset to the region base address */ - hpa = hpa_offset + p->res->start; + hpa = hpa_offset + p->res->start + p->cache_size; /* Root decoder translation overrides typical modulo decode */ if (cxlrd->hpa_to_spa) @@ -3036,17 +3058,13 @@ static struct cxl_dax_region *cxl_dax_region_alloc(struct cxl_region *cxlr) struct cxl_dax_region *cxlr_dax; struct device *dev; - down_read(&cxl_region_rwsem); - if (p->state != CXL_CONFIG_COMMIT) { - cxlr_dax = ERR_PTR(-ENXIO); - goto out; - } + guard(rwsem_read)(&cxl_region_rwsem); + if (p->state != CXL_CONFIG_COMMIT) + return ERR_PTR(-ENXIO); cxlr_dax = kzalloc(sizeof(*cxlr_dax), GFP_KERNEL); - if (!cxlr_dax) { - cxlr_dax = ERR_PTR(-ENOMEM); - goto out; - } + if (!cxlr_dax) + return ERR_PTR(-ENOMEM); cxlr_dax->hpa_range.start = p->res->start; cxlr_dax->hpa_range.end = p->res->end; @@ -3059,8 +3077,6 @@ static struct cxl_dax_region *cxl_dax_region_alloc(struct cxl_region *cxlr) dev->parent = &cxlr->dev; dev->bus = &cxl_bus_type; dev->type = &cxl_dax_region_type; -out: - up_read(&cxl_region_rwsem); return cxlr_dax; } @@ -3187,9 +3203,10 @@ err: return rc; } -static int match_root_decoder_by_range(struct device *dev, void *data) +static int match_root_decoder_by_range(struct device *dev, + const void *data) { - struct range *r1, *r2 = data; + const struct range *r1, *r2 = data; struct cxl_root_decoder *cxlrd; if (!is_root_decoder(dev)) @@ -3200,12 +3217,11 @@ static int match_root_decoder_by_range(struct device *dev, void *data) return range_contains(r1, r2); } -static int match_region_by_range(struct device *dev, void *data) +static int match_region_by_range(struct device *dev, const void *data) { struct cxl_region_params *p; struct cxl_region *cxlr; - struct range *r = data; - int rc = 0; + const struct range *r = data; if (!is_cxl_region(dev)) return 0; @@ -3213,60 +3229,96 @@ static int match_region_by_range(struct device *dev, void *data) cxlr = to_cxl_region(dev); p = &cxlr->params; - down_read(&cxl_region_rwsem); + guard(rwsem_read)(&cxl_region_rwsem); if (p->res && p->res->start == r->start && p->res->end == r->end) - rc = 1; - up_read(&cxl_region_rwsem); + return 1; - return rc; + return 0; } -/* Establish an empty region covering the given HPA range */ -static struct cxl_region *construct_region(struct cxl_root_decoder *cxlrd, - struct cxl_endpoint_decoder *cxled) +static int cxl_extended_linear_cache_resize(struct cxl_region *cxlr, + struct resource *res) +{ + struct cxl_root_decoder *cxlrd = to_cxl_root_decoder(cxlr->dev.parent); + struct cxl_region_params *p = &cxlr->params; + int nid = phys_to_target_node(res->start); + resource_size_t size = resource_size(res); + resource_size_t cache_size, start; + int rc; + + rc = cxl_acpi_get_extended_linear_cache_size(res, nid, &cache_size); + if (rc) + return rc; + + if (!cache_size) + return 0; + + if (size != cache_size) { + dev_warn(&cxlr->dev, + "Extended Linear Cache size %pa != CXL size %pa. No Support!", + &cache_size, &size); + return -ENXIO; + } + + /* + * Move the start of the range to where the cache range starts. The + * implementation assumes that the cache range is in front of the + * CXL range. This is not dictated by the HMAT spec but is how the + * current known implementation is configured. + * + * The cache range is expected to be within the CFMWS. The adjusted + * res->start should not be less than cxlrd->res->start. + */ + start = res->start - cache_size; + if (start < cxlrd->res->start) + return -ENXIO; + + res->start = start; + p->cache_size = cache_size; + + return 0; +} + +static int __construct_region(struct cxl_region *cxlr, + struct cxl_root_decoder *cxlrd, + struct cxl_endpoint_decoder *cxled) { struct cxl_memdev *cxlmd = cxled_to_memdev(cxled); - struct cxl_port *port = cxlrd_to_port(cxlrd); struct range *hpa = &cxled->cxld.hpa_range; struct cxl_region_params *p; - struct cxl_region *cxlr; struct resource *res; int rc; - do { - cxlr = __create_region(cxlrd, cxled->mode, - atomic_read(&cxlrd->region_id)); - } while (IS_ERR(cxlr) && PTR_ERR(cxlr) == -EBUSY); - - if (IS_ERR(cxlr)) { - dev_err(cxlmd->dev.parent, - "%s:%s: %s failed assign region: %ld\n", - dev_name(&cxlmd->dev), dev_name(&cxled->cxld.dev), - __func__, PTR_ERR(cxlr)); - return cxlr; - } - - down_write(&cxl_region_rwsem); + guard(rwsem_write)(&cxl_region_rwsem); p = &cxlr->params; if (p->state >= CXL_CONFIG_INTERLEAVE_ACTIVE) { dev_err(cxlmd->dev.parent, "%s:%s: %s autodiscovery interrupted\n", dev_name(&cxlmd->dev), dev_name(&cxled->cxld.dev), __func__); - rc = -EBUSY; - goto err; + return -EBUSY; } set_bit(CXL_REGION_F_AUTO, &cxlr->flags); res = kmalloc(sizeof(*res), GFP_KERNEL); - if (!res) { - rc = -ENOMEM; - goto err; - } + if (!res) + return -ENOMEM; *res = DEFINE_RES_MEM_NAMED(hpa->start, range_len(hpa), dev_name(&cxlr->dev)); + + rc = cxl_extended_linear_cache_resize(cxlr, res); + if (rc && rc != -EOPNOTSUPP) { + /* + * Failing to support extended linear cache region resize does not + * prevent the region from functioning. Only causes cxl list showing + * incorrect region size. + */ + dev_warn(cxlmd->dev.parent, + "Extended linear cache calculation failed rc:%d\n", rc); + } + rc = insert_resource(cxlrd->res, res); if (rc) { /* @@ -3286,7 +3338,7 @@ static struct cxl_region *construct_region(struct cxl_root_decoder *cxlrd, rc = sysfs_update_group(&cxlr->dev.kobj, get_cxl_region_target_group()); if (rc) - goto err; + return rc; dev_dbg(cxlmd->dev.parent, "%s:%s: %s %s res: %pr iw: %d ig: %d\n", dev_name(&cxlmd->dev), dev_name(&cxled->cxld.dev), __func__, @@ -3295,14 +3347,40 @@ static struct cxl_region *construct_region(struct cxl_root_decoder *cxlrd, /* ...to match put_device() in cxl_add_to_region() */ get_device(&cxlr->dev); - up_write(&cxl_region_rwsem); - return cxlr; + return 0; +} -err: - up_write(&cxl_region_rwsem); - devm_release_action(port->uport_dev, unregister_region, cxlr); - return ERR_PTR(rc); +/* Establish an empty region covering the given HPA range */ +static struct cxl_region *construct_region(struct cxl_root_decoder *cxlrd, + struct cxl_endpoint_decoder *cxled) +{ + struct cxl_memdev *cxlmd = cxled_to_memdev(cxled); + struct cxl_port *port = cxlrd_to_port(cxlrd); + struct cxl_dev_state *cxlds = cxlmd->cxlds; + int rc, part = READ_ONCE(cxled->part); + struct cxl_region *cxlr; + + do { + cxlr = __create_region(cxlrd, cxlds->part[part].mode, + atomic_read(&cxlrd->region_id)); + } while (IS_ERR(cxlr) && PTR_ERR(cxlr) == -EBUSY); + + if (IS_ERR(cxlr)) { + dev_err(cxlmd->dev.parent, + "%s:%s: %s failed assign region: %ld\n", + dev_name(&cxlmd->dev), dev_name(&cxled->cxld.dev), + __func__, PTR_ERR(cxlr)); + return cxlr; + } + + rc = __construct_region(cxlr, cxlrd, cxled); + if (rc) { + devm_release_action(port->uport_dev, unregister_region, cxlr); + return ERR_PTR(rc); + } + + return cxlr; } int cxl_add_to_region(struct cxl_port *root, struct cxl_endpoint_decoder *cxled) @@ -3372,6 +3450,34 @@ out: } EXPORT_SYMBOL_NS_GPL(cxl_add_to_region, "CXL"); +u64 cxl_port_get_spa_cache_alias(struct cxl_port *endpoint, u64 spa) +{ + struct cxl_region_ref *iter; + unsigned long index; + + if (!endpoint) + return ~0ULL; + + guard(rwsem_write)(&cxl_region_rwsem); + + xa_for_each(&endpoint->regions, index, iter) { + struct cxl_region_params *p = &iter->region->params; + + if (p->res->start <= spa && spa <= p->res->end) { + if (!p->cache_size) + return ~0ULL; + + if (spa >= p->res->start + p->cache_size) + return spa - p->cache_size; + + return spa + p->cache_size; + } + } + + return ~0ULL; +} +EXPORT_SYMBOL_NS_GPL(cxl_port_get_spa_cache_alias, "CXL"); + static int is_system_ram(struct resource *res, void *arg) { struct cxl_region *cxlr = arg; @@ -3437,9 +3543,9 @@ out: return rc; switch (cxlr->mode) { - case CXL_DECODER_PMEM: + case CXL_PARTMODE_PMEM: return devm_cxl_add_pmem_region(cxlr); - case CXL_DECODER_RAM: + case CXL_PARTMODE_RAM: /* * The region can not be manged by CXL if any portion of * it is already online as 'System RAM' diff --git a/drivers/cxl/core/regs.c b/drivers/cxl/core/regs.c index 59cb35b40c7e..5ca7b0eed568 100644 --- a/drivers/cxl/core/regs.c +++ b/drivers/cxl/core/regs.c @@ -289,21 +289,17 @@ static bool cxl_decode_regblock(struct pci_dev *pdev, u32 reg_lo, u32 reg_hi, return true; } -/** - * cxl_find_regblock_instance() - Locate a register block by type / index - * @pdev: The CXL PCI device to enumerate. - * @type: Register Block Indicator id - * @map: Enumeration output, clobbered on error - * @index: Index into which particular instance of a regblock wanted in the - * order found in register locator DVSEC. - * - * Return: 0 if register block enumerated, negative error code otherwise +/* + * __cxl_find_regblock_instance() - Locate a register block or count instances by type / index + * Use CXL_INSTANCES_COUNT for @index if counting instances. * - * A CXL DVSEC may point to one or more register blocks, search for them - * by @type and @index. + * __cxl_find_regblock_instance() may return: + * 0 - if register block enumerated. + * >= 0 - if counting instances. + * < 0 - error code otherwise. */ -int cxl_find_regblock_instance(struct pci_dev *pdev, enum cxl_regloc_type type, - struct cxl_register_map *map, int index) +static int __cxl_find_regblock_instance(struct pci_dev *pdev, enum cxl_regloc_type type, + struct cxl_register_map *map, int index) { u32 regloc_size, regblocks; int instance = 0; @@ -342,8 +338,30 @@ int cxl_find_regblock_instance(struct pci_dev *pdev, enum cxl_regloc_type type, } map->resource = CXL_RESOURCE_NONE; + if (index == CXL_INSTANCES_COUNT) + return instance; + return -ENODEV; } + +/** + * cxl_find_regblock_instance() - Locate a register block by type / index + * @pdev: The CXL PCI device to enumerate. + * @type: Register Block Indicator id + * @map: Enumeration output, clobbered on error + * @index: Index into which particular instance of a regblock wanted in the + * order found in register locator DVSEC. + * + * Return: 0 if register block enumerated, negative error code otherwise + * + * A CXL DVSEC may point to one or more register blocks, search for them + * by @type and @index. + */ +int cxl_find_regblock_instance(struct pci_dev *pdev, enum cxl_regloc_type type, + struct cxl_register_map *map, unsigned int index) +{ + return __cxl_find_regblock_instance(pdev, type, map, index); +} EXPORT_SYMBOL_NS_GPL(cxl_find_regblock_instance, "CXL"); /** @@ -360,7 +378,7 @@ EXPORT_SYMBOL_NS_GPL(cxl_find_regblock_instance, "CXL"); int cxl_find_regblock(struct pci_dev *pdev, enum cxl_regloc_type type, struct cxl_register_map *map) { - return cxl_find_regblock_instance(pdev, type, map, 0); + return __cxl_find_regblock_instance(pdev, type, map, 0); } EXPORT_SYMBOL_NS_GPL(cxl_find_regblock, "CXL"); @@ -371,19 +389,13 @@ EXPORT_SYMBOL_NS_GPL(cxl_find_regblock, "CXL"); * * Some regblocks may be repeated. Count how many instances. * - * Return: count of matching regblocks. + * Return: non-negative count of matching regblocks, negative error code otherwise. */ int cxl_count_regblock(struct pci_dev *pdev, enum cxl_regloc_type type) { struct cxl_register_map map; - int rc, count = 0; - while (1) { - rc = cxl_find_regblock_instance(pdev, type, &map, count); - if (rc) - return count; - count++; - } + return __cxl_find_regblock_instance(pdev, type, &map, CXL_INSTANCES_COUNT); } EXPORT_SYMBOL_NS_GPL(cxl_count_regblock, "CXL"); @@ -569,7 +581,6 @@ resource_size_t __rcrb_to_component(struct device *dev, struct cxl_rcrb_info *ri resource_size_t rcrb = ri->base; void __iomem *addr; u32 bar0, bar1; - u16 cmd; u32 id; if (which == CXL_RCRB_UPSTREAM) @@ -591,7 +602,6 @@ resource_size_t __rcrb_to_component(struct device *dev, struct cxl_rcrb_info *ri } id = readl(addr + PCI_VENDOR_ID); - cmd = readw(addr + PCI_COMMAND); bar0 = readl(addr + PCI_BASE_ADDRESS_0); bar1 = readl(addr + PCI_BASE_ADDRESS_1); iounmap(addr); @@ -606,8 +616,6 @@ resource_size_t __rcrb_to_component(struct device *dev, struct cxl_rcrb_info *ri dev_err(dev, "Failed to access Downstream Port RCRB\n"); return CXL_RESOURCE_NONE; } - if (!(cmd & PCI_COMMAND_MEMORY)) - return CXL_RESOURCE_NONE; /* The RCRB is a Memory Window, and the MEM_TYPE_1M bit is obsolete */ if (bar0 & (PCI_BASE_ADDRESS_MEM_TYPE_1M | PCI_BASE_ADDRESS_SPACE_IO)) return CXL_RESOURCE_NONE; diff --git a/drivers/cxl/core/trace.h b/drivers/cxl/core/trace.h index 8389a94adb1a..25ebfbc1616c 100644 --- a/drivers/cxl/core/trace.h +++ b/drivers/cxl/core/trace.h @@ -48,6 +48,34 @@ { CXL_RAS_UC_IDE_RX_ERR, "IDE Rx Error" } \ ) +TRACE_EVENT(cxl_port_aer_uncorrectable_error, + TP_PROTO(struct device *dev, u32 status, u32 fe, u32 *hl), + TP_ARGS(dev, status, fe, hl), + TP_STRUCT__entry( + __string(device, dev_name(dev)) + __string(host, dev_name(dev->parent)) + __field(u32, status) + __field(u32, first_error) + __array(u32, header_log, CXL_HEADERLOG_SIZE_U32) + ), + TP_fast_assign( + __assign_str(device); + __assign_str(host); + __entry->status = status; + __entry->first_error = fe; + /* + * Embed the 512B headerlog data for user app retrieval and + * parsing, but no need to print this in the trace buffer. + */ + memcpy(__entry->header_log, hl, CXL_HEADERLOG_SIZE); + ), + TP_printk("device=%s host=%s status: '%s' first_error: '%s'", + __get_str(device), __get_str(host), + show_uc_errs(__entry->status), + show_uc_errs(__entry->first_error) + ) +); + TRACE_EVENT(cxl_aer_uncorrectable_error, TP_PROTO(const struct cxl_memdev *cxlmd, u32 status, u32 fe, u32 *hl), TP_ARGS(cxlmd, status, fe, hl), @@ -96,6 +124,25 @@ TRACE_EVENT(cxl_aer_uncorrectable_error, { CXL_RAS_CE_PHYS_LAYER_ERR, "Received Error From Physical Layer" } \ ) +TRACE_EVENT(cxl_port_aer_correctable_error, + TP_PROTO(struct device *dev, u32 status), + TP_ARGS(dev, status), + TP_STRUCT__entry( + __string(device, dev_name(dev)) + __string(host, dev_name(dev->parent)) + __field(u32, status) + ), + TP_fast_assign( + __assign_str(device); + __assign_str(host); + __entry->status = status; + ), + TP_printk("device=%s host=%s status='%s'", + __get_str(device), __get_str(host), + show_ce_errs(__entry->status) + ) +); + TRACE_EVENT(cxl_aer_correctable_error, TP_PROTO(const struct cxl_memdev *cxlmd, u32 status), TP_ARGS(cxlmd, status), @@ -166,11 +213,13 @@ TRACE_EVENT(cxl_overflow, #define CXL_EVENT_RECORD_FLAG_MAINT_NEEDED BIT(3) #define CXL_EVENT_RECORD_FLAG_PERF_DEGRADED BIT(4) #define CXL_EVENT_RECORD_FLAG_HW_REPLACE BIT(5) +#define CXL_EVENT_RECORD_FLAG_MAINT_OP_SUB_CLASS_VALID BIT(6) #define show_hdr_flags(flags) __print_flags(flags, " | ", \ { CXL_EVENT_RECORD_FLAG_PERMANENT, "PERMANENT_CONDITION" }, \ { CXL_EVENT_RECORD_FLAG_MAINT_NEEDED, "MAINTENANCE_NEEDED" }, \ { CXL_EVENT_RECORD_FLAG_PERF_DEGRADED, "PERFORMANCE_DEGRADED" }, \ - { CXL_EVENT_RECORD_FLAG_HW_REPLACE, "HARDWARE_REPLACEMENT_NEEDED" } \ + { CXL_EVENT_RECORD_FLAG_HW_REPLACE, "HARDWARE_REPLACEMENT_NEEDED" }, \ + { CXL_EVENT_RECORD_FLAG_MAINT_OP_SUB_CLASS_VALID, "MAINT_OP_SUB_CLASS_VALID" } \ ) /* @@ -197,7 +246,8 @@ TRACE_EVENT(cxl_overflow, __field(u16, hdr_related_handle) \ __field(u64, hdr_timestamp) \ __field(u8, hdr_length) \ - __field(u8, hdr_maint_op_class) + __field(u8, hdr_maint_op_class) \ + __field(u8, hdr_maint_op_sub_class) #define CXL_EVT_TP_fast_assign(cxlmd, l, hdr) \ __assign_str(memdev); \ @@ -209,17 +259,19 @@ TRACE_EVENT(cxl_overflow, __entry->hdr_handle = le16_to_cpu((hdr).handle); \ __entry->hdr_related_handle = le16_to_cpu((hdr).related_handle); \ __entry->hdr_timestamp = le64_to_cpu((hdr).timestamp); \ - __entry->hdr_maint_op_class = (hdr).maint_op_class + __entry->hdr_maint_op_class = (hdr).maint_op_class; \ + __entry->hdr_maint_op_sub_class = (hdr).maint_op_sub_class #define CXL_EVT_TP_printk(fmt, ...) \ TP_printk("memdev=%s host=%s serial=%lld log=%s : time=%llu uuid=%pUb " \ "len=%d flags='%s' handle=%x related_handle=%x " \ - "maint_op_class=%u : " fmt, \ + "maint_op_class=%u maint_op_sub_class=%u : " fmt, \ __get_str(memdev), __get_str(host), __entry->serial, \ cxl_event_log_type_str(__entry->log), \ __entry->hdr_timestamp, &__entry->hdr_uuid, __entry->hdr_length,\ show_hdr_flags(__entry->hdr_flags), __entry->hdr_handle, \ __entry->hdr_related_handle, __entry->hdr_maint_op_class, \ + __entry->hdr_maint_op_sub_class, \ ##__VA_ARGS__) TRACE_EVENT(cxl_generic_event, @@ -264,8 +316,30 @@ TRACE_EVENT(cxl_generic_event, ) /* + * Component ID Format + * CXL 3.1 section 8.2.9.2.1; Table 8-44 + */ +#define CXL_PLDM_COMPONENT_ID_ENTITY_VALID BIT(0) +#define CXL_PLDM_COMPONENT_ID_RES_VALID BIT(1) + +#define show_comp_id_pldm_flags(flags) __print_flags(flags, " | ", \ + { CXL_PLDM_COMPONENT_ID_ENTITY_VALID, "PLDM Entity ID" }, \ + { CXL_PLDM_COMPONENT_ID_RES_VALID, "Resource ID" } \ +) + +#define show_pldm_entity_id(flags, valid_comp_id, valid_id_format, comp_id) \ + (flags & valid_comp_id && flags & valid_id_format) ? \ + (comp_id[0] & CXL_PLDM_COMPONENT_ID_ENTITY_VALID) ? \ + __print_hex(&comp_id[1], 6) : "0x00" : "0x00" + +#define show_pldm_resource_id(flags, valid_comp_id, valid_id_format, comp_id) \ + (flags & valid_comp_id && flags & valid_id_format) ? \ + (comp_id[0] & CXL_PLDM_COMPONENT_ID_RES_VALID) ? \ + __print_hex(&comp_id[7], 4) : "0x00" : "0x00" + +/* * General Media Event Record - GMER - * CXL rev 3.0 Section 8.2.9.2.1.1; Table 8-43 + * CXL rev 3.1 Section 8.2.9.2.1.1; Table 8-45 */ #define CXL_GMER_EVT_DESC_UNCORECTABLE_EVENT BIT(0) #define CXL_GMER_EVT_DESC_THRESHOLD_EVENT BIT(1) @@ -279,10 +353,18 @@ TRACE_EVENT(cxl_generic_event, #define CXL_GMER_MEM_EVT_TYPE_ECC_ERROR 0x00 #define CXL_GMER_MEM_EVT_TYPE_INV_ADDR 0x01 #define CXL_GMER_MEM_EVT_TYPE_DATA_PATH_ERROR 0x02 -#define show_gmer_mem_event_type(type) __print_symbolic(type, \ - { CXL_GMER_MEM_EVT_TYPE_ECC_ERROR, "ECC Error" }, \ - { CXL_GMER_MEM_EVT_TYPE_INV_ADDR, "Invalid Address" }, \ - { CXL_GMER_MEM_EVT_TYPE_DATA_PATH_ERROR, "Data Path Error" } \ +#define CXL_GMER_MEM_EVT_TYPE_TE_STATE_VIOLATION 0x03 +#define CXL_GMER_MEM_EVT_TYPE_SCRUB_MEDIA_ECC_ERROR 0x04 +#define CXL_GMER_MEM_EVT_TYPE_AP_CME_COUNTER_EXPIRE 0x05 +#define CXL_GMER_MEM_EVT_TYPE_CKID_VIOLATION 0x06 +#define show_gmer_mem_event_type(type) __print_symbolic(type, \ + { CXL_GMER_MEM_EVT_TYPE_ECC_ERROR, "ECC Error" }, \ + { CXL_GMER_MEM_EVT_TYPE_INV_ADDR, "Invalid Address" }, \ + { CXL_GMER_MEM_EVT_TYPE_DATA_PATH_ERROR, "Data Path Error" }, \ + { CXL_GMER_MEM_EVT_TYPE_TE_STATE_VIOLATION, "TE State Violation" }, \ + { CXL_GMER_MEM_EVT_TYPE_SCRUB_MEDIA_ECC_ERROR, "Scrub Media ECC Error" }, \ + { CXL_GMER_MEM_EVT_TYPE_AP_CME_COUNTER_EXPIRE, "Adv Prog CME Counter Expiration" }, \ + { CXL_GMER_MEM_EVT_TYPE_CKID_VIOLATION, "CKID Violation" } \ ) #define CXL_GMER_TRANS_UNKNOWN 0x00 @@ -292,6 +374,8 @@ TRACE_EVENT(cxl_generic_event, #define CXL_GMER_TRANS_HOST_INJECT_POISON 0x04 #define CXL_GMER_TRANS_INTERNAL_MEDIA_SCRUB 0x05 #define CXL_GMER_TRANS_INTERNAL_MEDIA_MANAGEMENT 0x06 +#define CXL_GMER_TRANS_INTERNAL_MEDIA_ECS 0x07 +#define CXL_GMER_TRANS_MEDIA_INITIALIZATION 0x08 #define show_trans_type(type) __print_symbolic(type, \ { CXL_GMER_TRANS_UNKNOWN, "Unknown" }, \ { CXL_GMER_TRANS_HOST_READ, "Host Read" }, \ @@ -299,26 +383,66 @@ TRACE_EVENT(cxl_generic_event, { CXL_GMER_TRANS_HOST_SCAN_MEDIA, "Host Scan Media" }, \ { CXL_GMER_TRANS_HOST_INJECT_POISON, "Host Inject Poison" }, \ { CXL_GMER_TRANS_INTERNAL_MEDIA_SCRUB, "Internal Media Scrub" }, \ - { CXL_GMER_TRANS_INTERNAL_MEDIA_MANAGEMENT, "Internal Media Management" } \ + { CXL_GMER_TRANS_INTERNAL_MEDIA_MANAGEMENT, "Internal Media Management" }, \ + { CXL_GMER_TRANS_INTERNAL_MEDIA_ECS, "Internal Media Error Check Scrub" }, \ + { CXL_GMER_TRANS_MEDIA_INITIALIZATION, "Media Initialization" } \ ) #define CXL_GMER_VALID_CHANNEL BIT(0) #define CXL_GMER_VALID_RANK BIT(1) #define CXL_GMER_VALID_DEVICE BIT(2) #define CXL_GMER_VALID_COMPONENT BIT(3) +#define CXL_GMER_VALID_COMPONENT_ID_FORMAT BIT(4) #define show_valid_flags(flags) __print_flags(flags, "|", \ { CXL_GMER_VALID_CHANNEL, "CHANNEL" }, \ { CXL_GMER_VALID_RANK, "RANK" }, \ { CXL_GMER_VALID_DEVICE, "DEVICE" }, \ - { CXL_GMER_VALID_COMPONENT, "COMPONENT" } \ + { CXL_GMER_VALID_COMPONENT, "COMPONENT" }, \ + { CXL_GMER_VALID_COMPONENT_ID_FORMAT, "COMPONENT PLDM FORMAT" } \ +) + +#define CXL_GMER_CME_EV_FLAG_CME_MULTIPLE_MEDIA BIT(0) +#define CXL_GMER_CME_EV_FLAG_THRESHOLD_EXCEEDED BIT(1) +#define show_cme_threshold_ev_flags(flags) __print_flags(flags, "|", \ + { \ + CXL_GMER_CME_EV_FLAG_CME_MULTIPLE_MEDIA, \ + "Corrected Memory Errors in Multiple Media Components" \ + }, { \ + CXL_GMER_CME_EV_FLAG_THRESHOLD_EXCEEDED, \ + "Exceeded Programmable Threshold" \ + } \ +) + +#define CXL_GMER_MEM_EVT_SUB_TYPE_NOT_REPORTED 0x00 +#define CXL_GMER_MEM_EVT_SUB_TYPE_INTERNAL_DATAPATH_ERROR 0x01 +#define CXL_GMER_MEM_EVT_SUB_TYPE_MEDIA_LINK_COMMAND_TRAINING_ERROR 0x02 +#define CXL_GMER_MEM_EVT_SUB_TYPE_MEDIA_LINK_CONTROL_TRAINING_ERROR 0x03 +#define CXL_GMER_MEM_EVT_SUB_TYPE_MEDIA_LINK_DATA_TRAINING_ERROR 0x04 +#define CXL_GMER_MEM_EVT_SUB_TYPE_MEDIA_LINK_CRC_ERROR 0x05 +#define show_mem_event_sub_type(sub_type) __print_symbolic(sub_type, \ + { CXL_GMER_MEM_EVT_SUB_TYPE_NOT_REPORTED, "Not Reported" }, \ + { CXL_GMER_MEM_EVT_SUB_TYPE_INTERNAL_DATAPATH_ERROR, "Internal Datapath Error" }, \ + { \ + CXL_GMER_MEM_EVT_SUB_TYPE_MEDIA_LINK_COMMAND_TRAINING_ERROR, \ + "Media Link Command Training Error" \ + }, { \ + CXL_GMER_MEM_EVT_SUB_TYPE_MEDIA_LINK_CONTROL_TRAINING_ERROR, \ + "Media Link Control Training Error" \ + }, { \ + CXL_GMER_MEM_EVT_SUB_TYPE_MEDIA_LINK_DATA_TRAINING_ERROR, \ + "Media Link Data Training Error" \ + }, { \ + CXL_GMER_MEM_EVT_SUB_TYPE_MEDIA_LINK_CRC_ERROR, "Media Link CRC Error" \ + } \ ) TRACE_EVENT(cxl_general_media, TP_PROTO(const struct cxl_memdev *cxlmd, enum cxl_event_log_type log, - struct cxl_region *cxlr, u64 hpa, struct cxl_event_gen_media *rec), + struct cxl_region *cxlr, u64 hpa, u64 hpa_alias0, + struct cxl_event_gen_media *rec), - TP_ARGS(cxlmd, log, cxlr, hpa, rec), + TP_ARGS(cxlmd, log, cxlr, hpa, hpa_alias0, rec), TP_STRUCT__entry( CXL_EVT_TP_entry @@ -332,10 +456,14 @@ TRACE_EVENT(cxl_general_media, __array(u8, comp_id, CXL_EVENT_GEN_MED_COMP_ID_SIZE) /* Following are out of order to pack trace record */ __field(u64, hpa) + __field(u64, hpa_alias0) __field_struct(uuid_t, region_uuid) __field(u16, validity_flags) __field(u8, rank) __field(u8, dpa_flags) + __field(u32, cme_count) + __field(u8, sub_type) + __field(u8, cme_threshold_ev_flags) __string(region_name, cxlr ? dev_name(&cxlr->dev) : "") ), @@ -350,6 +478,7 @@ TRACE_EVENT(cxl_general_media, __entry->dpa &= CXL_DPA_MASK; __entry->descriptor = rec->media_hdr.descriptor; __entry->type = rec->media_hdr.type; + __entry->sub_type = rec->sub_type; __entry->transaction_type = rec->media_hdr.transaction_type; __entry->channel = rec->media_hdr.channel; __entry->rank = rec->media_hdr.rank; @@ -358,6 +487,7 @@ TRACE_EVENT(cxl_general_media, CXL_EVENT_GEN_MED_COMP_ID_SIZE); __entry->validity_flags = get_unaligned_le16(&rec->media_hdr.validity_flags); __entry->hpa = hpa; + __entry->hpa_alias0 = hpa_alias0; if (cxlr) { __assign_str(region_name); uuid_copy(&__entry->region_uuid, &cxlr->params.uuid); @@ -365,27 +495,40 @@ TRACE_EVENT(cxl_general_media, __assign_str(region_name); uuid_copy(&__entry->region_uuid, &uuid_null); } + __entry->cme_threshold_ev_flags = rec->cme_threshold_ev_flags; + __entry->cme_count = get_unaligned_le24(rec->cme_count); ), CXL_EVT_TP_printk("dpa=%llx dpa_flags='%s' " \ - "descriptor='%s' type='%s' transaction_type='%s' channel=%u rank=%u " \ - "device=%x comp_id=%s validity_flags='%s' " \ - "hpa=%llx region=%s region_uuid=%pUb", + "descriptor='%s' type='%s' sub_type='%s' " \ + "transaction_type='%s' channel=%u rank=%u " \ + "device=%x validity_flags='%s' " \ + "comp_id=%s comp_id_pldm_valid_flags='%s' " \ + "pldm_entity_id=%s pldm_resource_id=%s " \ + "hpa=%llx hpa_alias0=%llx region=%s region_uuid=%pUb " \ + "cme_threshold_ev_flags='%s' cme_count=%u", __entry->dpa, show_dpa_flags(__entry->dpa_flags), show_event_desc_flags(__entry->descriptor), show_gmer_mem_event_type(__entry->type), + show_mem_event_sub_type(__entry->sub_type), show_trans_type(__entry->transaction_type), __entry->channel, __entry->rank, __entry->device, - __print_hex(__entry->comp_id, CXL_EVENT_GEN_MED_COMP_ID_SIZE), show_valid_flags(__entry->validity_flags), - __entry->hpa, __get_str(region_name), &__entry->region_uuid + __print_hex(__entry->comp_id, CXL_EVENT_GEN_MED_COMP_ID_SIZE), + show_comp_id_pldm_flags(__entry->comp_id[0]), + show_pldm_entity_id(__entry->validity_flags, CXL_GMER_VALID_COMPONENT, + CXL_GMER_VALID_COMPONENT_ID_FORMAT, __entry->comp_id), + show_pldm_resource_id(__entry->validity_flags, CXL_GMER_VALID_COMPONENT, + CXL_GMER_VALID_COMPONENT_ID_FORMAT, __entry->comp_id), + __entry->hpa, __entry->hpa_alias0, __get_str(region_name), &__entry->region_uuid, + show_cme_threshold_ev_flags(__entry->cme_threshold_ev_flags), __entry->cme_count ) ); /* * DRAM Event Record - DER * - * CXL rev 3.0 section 8.2.9.2.1.2; Table 8-44 + * CXL rev 3.1 section 8.2.9.2.1.2; Table 8-46 */ /* * DRAM Event Record defines many fields the same as the General Media Event @@ -395,11 +538,17 @@ TRACE_EVENT(cxl_general_media, #define CXL_DER_MEM_EVT_TYPE_SCRUB_MEDIA_ECC_ERROR 0x01 #define CXL_DER_MEM_EVT_TYPE_INV_ADDR 0x02 #define CXL_DER_MEM_EVT_TYPE_DATA_PATH_ERROR 0x03 -#define show_dram_mem_event_type(type) __print_symbolic(type, \ - { CXL_DER_MEM_EVT_TYPE_ECC_ERROR, "ECC Error" }, \ - { CXL_DER_MEM_EVT_TYPE_SCRUB_MEDIA_ECC_ERROR, "Scrub Media ECC Error" }, \ - { CXL_DER_MEM_EVT_TYPE_INV_ADDR, "Invalid Address" }, \ - { CXL_DER_MEM_EVT_TYPE_DATA_PATH_ERROR, "Data Path Error" } \ +#define CXL_DER_MEM_EVT_TYPE_TE_STATE_VIOLATION 0x04 +#define CXL_DER_MEM_EVT_TYPE_AP_CME_COUNTER_EXPIRE 0x05 +#define CXL_DER_MEM_EVT_TYPE_CKID_VIOLATION 0x06 +#define show_dram_mem_event_type(type) __print_symbolic(type, \ + { CXL_DER_MEM_EVT_TYPE_ECC_ERROR, "ECC Error" }, \ + { CXL_DER_MEM_EVT_TYPE_SCRUB_MEDIA_ECC_ERROR, "Scrub Media ECC Error" }, \ + { CXL_DER_MEM_EVT_TYPE_INV_ADDR, "Invalid Address" }, \ + { CXL_DER_MEM_EVT_TYPE_DATA_PATH_ERROR, "Data Path Error" }, \ + { CXL_DER_MEM_EVT_TYPE_TE_STATE_VIOLATION, "TE State Violation" }, \ + { CXL_DER_MEM_EVT_TYPE_AP_CME_COUNTER_EXPIRE, "Adv Prog CME Counter Expiration" }, \ + { CXL_DER_MEM_EVT_TYPE_CKID_VIOLATION, "CKID Violation" } \ ) #define CXL_DER_VALID_CHANNEL BIT(0) @@ -410,23 +559,30 @@ TRACE_EVENT(cxl_general_media, #define CXL_DER_VALID_ROW BIT(5) #define CXL_DER_VALID_COLUMN BIT(6) #define CXL_DER_VALID_CORRECTION_MASK BIT(7) -#define show_dram_valid_flags(flags) __print_flags(flags, "|", \ - { CXL_DER_VALID_CHANNEL, "CHANNEL" }, \ - { CXL_DER_VALID_RANK, "RANK" }, \ - { CXL_DER_VALID_NIBBLE, "NIBBLE" }, \ - { CXL_DER_VALID_BANK_GROUP, "BANK GROUP" }, \ - { CXL_DER_VALID_BANK, "BANK" }, \ - { CXL_DER_VALID_ROW, "ROW" }, \ - { CXL_DER_VALID_COLUMN, "COLUMN" }, \ - { CXL_DER_VALID_CORRECTION_MASK, "CORRECTION MASK" } \ +#define CXL_DER_VALID_COMPONENT BIT(8) +#define CXL_DER_VALID_COMPONENT_ID_FORMAT BIT(9) +#define CXL_DER_VALID_SUB_CHANNEL BIT(10) +#define show_dram_valid_flags(flags) __print_flags(flags, "|", \ + { CXL_DER_VALID_CHANNEL, "CHANNEL" }, \ + { CXL_DER_VALID_RANK, "RANK" }, \ + { CXL_DER_VALID_NIBBLE, "NIBBLE" }, \ + { CXL_DER_VALID_BANK_GROUP, "BANK GROUP" }, \ + { CXL_DER_VALID_BANK, "BANK" }, \ + { CXL_DER_VALID_ROW, "ROW" }, \ + { CXL_DER_VALID_COLUMN, "COLUMN" }, \ + { CXL_DER_VALID_CORRECTION_MASK, "CORRECTION MASK" }, \ + { CXL_DER_VALID_COMPONENT, "COMPONENT" }, \ + { CXL_DER_VALID_COMPONENT_ID_FORMAT, "COMPONENT PLDM FORMAT" }, \ + { CXL_DER_VALID_SUB_CHANNEL, "SUB CHANNEL" } \ ) TRACE_EVENT(cxl_dram, TP_PROTO(const struct cxl_memdev *cxlmd, enum cxl_event_log_type log, - struct cxl_region *cxlr, u64 hpa, struct cxl_event_dram *rec), + struct cxl_region *cxlr, u64 hpa, u64 hpa_alias0, + struct cxl_event_dram *rec), - TP_ARGS(cxlmd, log, cxlr, hpa, rec), + TP_ARGS(cxlmd, log, cxlr, hpa, hpa_alias0, rec), TP_STRUCT__entry( CXL_EVT_TP_entry @@ -442,11 +598,18 @@ TRACE_EVENT(cxl_dram, __field(u32, row) __array(u8, cor_mask, CXL_EVENT_DER_CORRECTION_MASK_SIZE) __field(u64, hpa) + __field(u64, hpa_alias0) __field_struct(uuid_t, region_uuid) __field(u8, rank) /* Out of order to pack trace record */ __field(u8, bank_group) /* Out of order to pack trace record */ __field(u8, bank) /* Out of order to pack trace record */ __field(u8, dpa_flags) /* Out of order to pack trace record */ + /* Following are out of order to pack trace record */ + __array(u8, comp_id, CXL_EVENT_GEN_MED_COMP_ID_SIZE) + __field(u32, cvme_count) + __field(u8, sub_type) + __field(u8, sub_channel) + __field(u8, cme_threshold_ev_flags) __string(region_name, cxlr ? dev_name(&cxlr->dev) : "") ), @@ -460,6 +623,7 @@ TRACE_EVENT(cxl_dram, __entry->dpa &= CXL_DPA_MASK; __entry->descriptor = rec->media_hdr.descriptor; __entry->type = rec->media_hdr.type; + __entry->sub_type = rec->sub_type; __entry->transaction_type = rec->media_hdr.transaction_type; __entry->validity_flags = get_unaligned_le16(rec->media_hdr.validity_flags); __entry->channel = rec->media_hdr.channel; @@ -472,6 +636,7 @@ TRACE_EVENT(cxl_dram, memcpy(__entry->cor_mask, &rec->correction_mask, CXL_EVENT_DER_CORRECTION_MASK_SIZE); __entry->hpa = hpa; + __entry->hpa_alias0 = hpa_alias0; if (cxlr) { __assign_str(region_name); uuid_copy(&__entry->region_uuid, &cxlr->params.uuid); @@ -479,30 +644,47 @@ TRACE_EVENT(cxl_dram, __assign_str(region_name); uuid_copy(&__entry->region_uuid, &uuid_null); } + memcpy(__entry->comp_id, &rec->component_id, + CXL_EVENT_GEN_MED_COMP_ID_SIZE); + __entry->sub_channel = rec->sub_channel; + __entry->cme_threshold_ev_flags = rec->cme_threshold_ev_flags; + __entry->cvme_count = get_unaligned_le24(rec->cvme_count); ), - CXL_EVT_TP_printk("dpa=%llx dpa_flags='%s' descriptor='%s' type='%s' " \ + CXL_EVT_TP_printk("dpa=%llx dpa_flags='%s' descriptor='%s' type='%s' sub_type='%s' " \ "transaction_type='%s' channel=%u rank=%u nibble_mask=%x " \ "bank_group=%u bank=%u row=%u column=%u cor_mask=%s " \ "validity_flags='%s' " \ - "hpa=%llx region=%s region_uuid=%pUb", + "comp_id=%s comp_id_pldm_valid_flags='%s' " \ + "pldm_entity_id=%s pldm_resource_id=%s " \ + "hpa=%llx hpa_alias0=%llx region=%s region_uuid=%pUb " \ + "sub_channel=%u cme_threshold_ev_flags='%s' cvme_count=%u", __entry->dpa, show_dpa_flags(__entry->dpa_flags), show_event_desc_flags(__entry->descriptor), show_dram_mem_event_type(__entry->type), + show_mem_event_sub_type(__entry->sub_type), show_trans_type(__entry->transaction_type), __entry->channel, __entry->rank, __entry->nibble_mask, __entry->bank_group, __entry->bank, __entry->row, __entry->column, __print_hex(__entry->cor_mask, CXL_EVENT_DER_CORRECTION_MASK_SIZE), show_dram_valid_flags(__entry->validity_flags), - __entry->hpa, __get_str(region_name), &__entry->region_uuid + __print_hex(__entry->comp_id, CXL_EVENT_GEN_MED_COMP_ID_SIZE), + show_comp_id_pldm_flags(__entry->comp_id[0]), + show_pldm_entity_id(__entry->validity_flags, CXL_DER_VALID_COMPONENT, + CXL_DER_VALID_COMPONENT_ID_FORMAT, __entry->comp_id), + show_pldm_resource_id(__entry->validity_flags, CXL_DER_VALID_COMPONENT, + CXL_DER_VALID_COMPONENT_ID_FORMAT, __entry->comp_id), + __entry->hpa, __entry->hpa_alias0, __get_str(region_name), &__entry->region_uuid, + __entry->sub_channel, show_cme_threshold_ev_flags(__entry->cme_threshold_ev_flags), + __entry->cvme_count ) ); /* * Memory Module Event Record - MMER * - * CXL res 3.0 section 8.2.9.2.1.3; Table 8-45 + * CXL res 3.1 section 8.2.9.2.1.3; Table 8-47 */ #define CXL_MMER_HEALTH_STATUS_CHANGE 0x00 #define CXL_MMER_MEDIA_STATUS_CHANGE 0x01 @@ -510,27 +692,35 @@ TRACE_EVENT(cxl_dram, #define CXL_MMER_TEMP_CHANGE 0x03 #define CXL_MMER_DATA_PATH_ERROR 0x04 #define CXL_MMER_LSA_ERROR 0x05 +#define CXL_MMER_UNRECOV_SIDEBAND_BUS_ERROR 0x06 +#define CXL_MMER_MEMORY_MEDIA_FRU_ERROR 0x07 +#define CXL_MMER_POWER_MANAGEMENT_FAULT 0x08 #define show_dev_evt_type(type) __print_symbolic(type, \ { CXL_MMER_HEALTH_STATUS_CHANGE, "Health Status Change" }, \ { CXL_MMER_MEDIA_STATUS_CHANGE, "Media Status Change" }, \ { CXL_MMER_LIFE_USED_CHANGE, "Life Used Change" }, \ { CXL_MMER_TEMP_CHANGE, "Temperature Change" }, \ { CXL_MMER_DATA_PATH_ERROR, "Data Path Error" }, \ - { CXL_MMER_LSA_ERROR, "LSA Error" } \ + { CXL_MMER_LSA_ERROR, "LSA Error" }, \ + { CXL_MMER_UNRECOV_SIDEBAND_BUS_ERROR, "Unrecoverable Internal Sideband Bus Error" }, \ + { CXL_MMER_MEMORY_MEDIA_FRU_ERROR, "Memory Media FRU Error" }, \ + { CXL_MMER_POWER_MANAGEMENT_FAULT, "Power Management Fault" } \ ) /* * Device Health Information - DHI * - * CXL res 3.0 section 8.2.9.8.3.1; Table 8-100 + * CXL res 3.1 section 8.2.9.9.3.1; Table 8-133 */ #define CXL_DHI_HS_MAINTENANCE_NEEDED BIT(0) #define CXL_DHI_HS_PERFORMANCE_DEGRADED BIT(1) #define CXL_DHI_HS_HW_REPLACEMENT_NEEDED BIT(2) +#define CXL_DHI_HS_MEM_CAPACITY_DEGRADED BIT(3) #define show_health_status_flags(flags) __print_flags(flags, "|", \ { CXL_DHI_HS_MAINTENANCE_NEEDED, "MAINTENANCE_NEEDED" }, \ { CXL_DHI_HS_PERFORMANCE_DEGRADED, "PERFORMANCE_DEGRADED" }, \ - { CXL_DHI_HS_HW_REPLACEMENT_NEEDED, "REPLACEMENT_NEEDED" } \ + { CXL_DHI_HS_HW_REPLACEMENT_NEEDED, "REPLACEMENT_NEEDED" }, \ + { CXL_DHI_HS_MEM_CAPACITY_DEGRADED, "MEM_CAPACITY_DEGRADED" } \ ) #define CXL_DHI_MS_NORMAL 0x00 @@ -584,6 +774,26 @@ TRACE_EVENT(cxl_dram, #define CXL_DHI_AS_COR_VOL_ERR_CNT(as) ((as & 0x10) >> 4) #define CXL_DHI_AS_COR_PER_ERR_CNT(as) ((as & 0x20) >> 5) +#define CXL_MMER_VALID_COMPONENT BIT(0) +#define CXL_MMER_VALID_COMPONENT_ID_FORMAT BIT(1) +#define show_mem_module_valid_flags(flags) __print_flags(flags, "|", \ + { CXL_MMER_VALID_COMPONENT, "COMPONENT" }, \ + { CXL_MMER_VALID_COMPONENT_ID_FORMAT, "COMPONENT PLDM FORMAT" } \ +) +#define CXL_MMER_DEV_EVT_SUB_TYPE_NOT_REPORTED 0x00 +#define CXL_MMER_DEV_EVT_SUB_TYPE_INVALID_CONFIG_DATA 0x01 +#define CXL_MMER_DEV_EVT_SUB_TYPE_UNSUPP_CONFIG_DATA 0x02 +#define CXL_MMER_DEV_EVT_SUB_TYPE_UNSUPP_MEM_MEDIA_FRU 0x03 +#define show_dev_event_sub_type(sub_type) __print_symbolic(sub_type, \ + { CXL_MMER_DEV_EVT_SUB_TYPE_NOT_REPORTED, "Not Reported" }, \ + { CXL_MMER_DEV_EVT_SUB_TYPE_INVALID_CONFIG_DATA, "Invalid Config Data" }, \ + { CXL_MMER_DEV_EVT_SUB_TYPE_UNSUPP_CONFIG_DATA, "Unsupported Config Data" }, \ + { \ + CXL_MMER_DEV_EVT_SUB_TYPE_UNSUPP_MEM_MEDIA_FRU, \ + "Unsupported Memory Media FRU" \ + } \ +) + TRACE_EVENT(cxl_memory_module, TP_PROTO(const struct cxl_memdev *cxlmd, enum cxl_event_log_type log, @@ -606,6 +816,9 @@ TRACE_EVENT(cxl_memory_module, __field(u32, cor_per_err_cnt) __field(s16, device_temp) __field(u8, add_status) + __field(u8, event_sub_type) + __array(u8, comp_id, CXL_EVENT_GEN_MED_COMP_ID_SIZE) + __field(u16, validity_flags) ), TP_fast_assign( @@ -614,6 +827,7 @@ TRACE_EVENT(cxl_memory_module, /* Memory Module Event */ __entry->event_type = rec->event_type; + __entry->event_sub_type = rec->event_sub_type; /* Device Health Info */ __entry->health_status = rec->info.health_status; @@ -624,13 +838,20 @@ TRACE_EVENT(cxl_memory_module, __entry->cor_per_err_cnt = get_unaligned_le32(rec->info.cor_per_err_cnt); __entry->device_temp = get_unaligned_le16(rec->info.device_temp); __entry->add_status = rec->info.add_status; + __entry->validity_flags = get_unaligned_le16(rec->validity_flags); + memcpy(__entry->comp_id, &rec->component_id, + CXL_EVENT_GEN_MED_COMP_ID_SIZE); ), - CXL_EVT_TP_printk("event_type='%s' health_status='%s' media_status='%s' " \ - "as_life_used=%s as_dev_temp=%s as_cor_vol_err_cnt=%s " \ + CXL_EVT_TP_printk("event_type='%s' event_sub_type='%s' health_status='%s' " \ + "media_status='%s' as_life_used=%s as_dev_temp=%s as_cor_vol_err_cnt=%s " \ "as_cor_per_err_cnt=%s life_used=%u device_temp=%d " \ - "dirty_shutdown_cnt=%u cor_vol_err_cnt=%u cor_per_err_cnt=%u", + "dirty_shutdown_cnt=%u cor_vol_err_cnt=%u cor_per_err_cnt=%u " \ + "validity_flags='%s' " \ + "comp_id=%s comp_id_pldm_valid_flags='%s' " \ + "pldm_entity_id=%s pldm_resource_id=%s", show_dev_evt_type(__entry->event_type), + show_dev_event_sub_type(__entry->event_sub_type), show_health_status_flags(__entry->health_status), show_media_status(__entry->media_status), show_two_bit_status(CXL_DHI_AS_LIFE_USED(__entry->add_status)), @@ -639,7 +860,14 @@ TRACE_EVENT(cxl_memory_module, show_one_bit_status(CXL_DHI_AS_COR_PER_ERR_CNT(__entry->add_status)), __entry->life_used, __entry->device_temp, __entry->dirty_shutdown_cnt, __entry->cor_vol_err_cnt, - __entry->cor_per_err_cnt + __entry->cor_per_err_cnt, + show_mem_module_valid_flags(__entry->validity_flags), + __print_hex(__entry->comp_id, CXL_EVENT_GEN_MED_COMP_ID_SIZE), + show_comp_id_pldm_flags(__entry->comp_id[0]), + show_pldm_entity_id(__entry->validity_flags, CXL_MMER_VALID_COMPONENT, + CXL_MMER_VALID_COMPONENT_ID_FORMAT, __entry->comp_id), + show_pldm_resource_id(__entry->validity_flags, CXL_MMER_VALID_COMPONENT, + CXL_MMER_VALID_COMPONENT_ID_FORMAT, __entry->comp_id) ) ); @@ -695,6 +923,7 @@ TRACE_EVENT(cxl_poison, __string(region, cxlr ? dev_name(&cxlr->dev) : "") __field(u64, overflow_ts) __field(u64, hpa) + __field(u64, hpa_alias0) __field(u64, dpa) __field(u32, dpa_length) __array(char, uuid, 16) @@ -717,16 +946,22 @@ TRACE_EVENT(cxl_poison, memcpy(__entry->uuid, &cxlr->params.uuid, 16); __entry->hpa = cxl_dpa_to_hpa(cxlr, cxlmd, __entry->dpa); + if (__entry->hpa != ULLONG_MAX && cxlr->params.cache_size) + __entry->hpa_alias0 = __entry->hpa + + cxlr->params.cache_size; + else + __entry->hpa_alias0 = ULLONG_MAX; } else { __assign_str(region); memset(__entry->uuid, 0, 16); __entry->hpa = ULLONG_MAX; + __entry->hpa_alias0 = ULLONG_MAX; } ), TP_printk("memdev=%s host=%s serial=%lld trace_type=%s region=%s " \ - "region_uuid=%pU hpa=0x%llx dpa=0x%llx dpa_length=0x%x " \ - "source=%s flags=%s overflow_time=%llu", + "region_uuid=%pU hpa=0x%llx hpa_alias0=0x%llx dpa=0x%llx " \ + "dpa_length=0x%x source=%s flags=%s overflow_time=%llu", __get_str(memdev), __get_str(host), __entry->serial, @@ -734,6 +969,7 @@ TRACE_EVENT(cxl_poison, __get_str(region), __entry->uuid, __entry->hpa, + __entry->hpa_alias0, __entry->dpa, __entry->dpa_length, show_poison_source(__entry->source), diff --git a/drivers/cxl/cxl.h b/drivers/cxl/cxl.h index f6015f24ad38..a9ab46eb0610 100644 --- a/drivers/cxl/cxl.h +++ b/drivers/cxl/cxl.h @@ -302,10 +302,11 @@ int cxl_map_device_regs(const struct cxl_register_map *map, struct cxl_device_regs *regs); int cxl_map_pmu_regs(struct cxl_register_map *map, struct cxl_pmu_regs *regs); +#define CXL_INSTANCES_COUNT -1 enum cxl_regloc_type; int cxl_count_regblock(struct pci_dev *pdev, enum cxl_regloc_type type); int cxl_find_regblock_instance(struct pci_dev *pdev, enum cxl_regloc_type type, - struct cxl_register_map *map, int index); + struct cxl_register_map *map, unsigned int index); int cxl_find_regblock(struct pci_dev *pdev, enum cxl_regloc_type type, struct cxl_register_map *map); int cxl_setup_regs(struct cxl_register_map *map); @@ -372,32 +373,6 @@ struct cxl_decoder { }; /* - * CXL_DECODER_DEAD prevents endpoints from being reattached to regions - * while cxld_unregister() is running - */ -enum cxl_decoder_mode { - CXL_DECODER_NONE, - CXL_DECODER_RAM, - CXL_DECODER_PMEM, - CXL_DECODER_MIXED, - CXL_DECODER_DEAD, -}; - -static inline const char *cxl_decoder_mode_name(enum cxl_decoder_mode mode) -{ - static const char * const names[] = { - [CXL_DECODER_NONE] = "none", - [CXL_DECODER_RAM] = "ram", - [CXL_DECODER_PMEM] = "pmem", - [CXL_DECODER_MIXED] = "mixed", - }; - - if (mode >= CXL_DECODER_NONE && mode <= CXL_DECODER_MIXED) - return names[mode]; - return "mixed"; -} - -/* * Track whether this decoder is reserved for region autodiscovery, or * free for userspace provisioning. */ @@ -411,16 +386,16 @@ enum cxl_decoder_state { * @cxld: base cxl_decoder_object * @dpa_res: actively claimed DPA span of this decoder * @skip: offset into @dpa_res where @cxld.hpa_range maps - * @mode: which memory type / access-mode-partition this decoder targets * @state: autodiscovery state + * @part: partition index this decoder maps * @pos: interleave position in @cxld.region */ struct cxl_endpoint_decoder { struct cxl_decoder cxld; struct resource *dpa_res; resource_size_t skip; - enum cxl_decoder_mode mode; enum cxl_decoder_state state; + int part; int pos; }; @@ -492,6 +467,7 @@ enum cxl_config_state { * @res: allocated iomem capacity for this region * @targets: active ordered targets in current decoder configuration * @nr_targets: number of targets + * @cache_size: extended linear cache size if exists, otherwise zero. * * State transitions are protected by the cxl_region_rwsem */ @@ -503,6 +479,12 @@ struct cxl_region_params { struct resource *res; struct cxl_endpoint_decoder *targets[CXL_DECODER_MAX_INTERLEAVE]; int nr_targets; + resource_size_t cache_size; +}; + +enum cxl_partition_mode { + CXL_PARTMODE_RAM, + CXL_PARTMODE_PMEM, }; /* @@ -524,7 +506,7 @@ struct cxl_region_params { * struct cxl_region - CXL region * @dev: This region's device * @id: This region's id. Id is globally unique across all regions - * @mode: Endpoint decoder allocation / access mode + * @mode: Operational mode of the mapped capacity * @type: Endpoint decoder target type * @cxl_nvb: nvdimm bridge for coordinating @cxlr_pmem setup / shutdown * @cxlr_pmem: (for pmem regions) cached copy of the nvdimm bridge @@ -537,7 +519,7 @@ struct cxl_region_params { struct cxl_region { struct device dev; int id; - enum cxl_decoder_mode mode; + enum cxl_partition_mode mode; enum cxl_decoder_type type; struct cxl_nvdimm_bridge *cxl_nvb; struct cxl_pmem_region *cxlr_pmem; @@ -562,6 +544,7 @@ struct cxl_nvdimm { struct device dev; struct cxl_memdev *cxlmd; u8 dev_id[CXL_DEV_ID_LEN]; /* for nvdimm, string of 'serial' */ + u64 dirty_shutdowns; }; struct cxl_pmem_region_mapping { @@ -679,6 +662,7 @@ struct cxl_rcrb_info { * @regs: Dport parsed register blocks * @coord: access coordinates (bandwidth and latency performance attributes) * @link_latency: calculated PCIe downstream latency + * @gpf_dvsec: Cached GPF port DVSEC */ struct cxl_dport { struct device *dport_dev; @@ -690,6 +674,7 @@ struct cxl_dport { struct cxl_regs regs; struct access_coordinate coord[ACCESS_COORDINATE_MAX]; long link_latency; + int gpf_dvsec; }; /** @@ -821,7 +806,8 @@ struct cxl_hdm *devm_cxl_setup_hdm(struct cxl_port *port, int devm_cxl_enumerate_decoders(struct cxl_hdm *cxlhdm, struct cxl_endpoint_dvsec_info *info); int devm_cxl_add_passthrough_decoder(struct cxl_port *port); -int cxl_dvsec_rr_decode(struct device *dev, struct cxl_port *port, +struct cxl_dev_state; +int cxl_dvsec_rr_decode(struct cxl_dev_state *cxlds, struct cxl_endpoint_dvsec_info *info); bool is_cxl_region(struct device *dev); @@ -864,7 +850,6 @@ struct cxl_nvdimm_bridge *devm_cxl_add_nvdimm_bridge(struct device *host, struct cxl_port *port); struct cxl_nvdimm *to_cxl_nvdimm(struct device *dev); bool is_cxl_nvdimm(struct device *dev); -bool is_cxl_nvdimm_bridge(struct device *dev); int devm_cxl_add_nvdimm(struct cxl_port *parent_port, struct cxl_memdev *cxlmd); struct cxl_nvdimm_bridge *cxl_find_nvdimm_bridge(struct cxl_port *port); @@ -874,6 +859,7 @@ struct cxl_pmem_region *to_cxl_pmem_region(struct device *dev); int cxl_add_to_region(struct cxl_port *root, struct cxl_endpoint_decoder *cxled); struct cxl_dax_region *to_cxl_dax_region(struct device *dev); +u64 cxl_port_get_spa_cache_alias(struct cxl_port *endpoint, u64 spa); #else static inline bool is_cxl_pmem_region(struct device *dev) { @@ -892,6 +878,11 @@ static inline struct cxl_dax_region *to_cxl_dax_region(struct device *dev) { return NULL; } +static inline u64 cxl_port_get_spa_cache_alias(struct cxl_port *endpoint, + u64 spa) +{ + return 0; +} #endif void cxl_endpoint_parse_cdat(struct cxl_port *port); @@ -919,4 +910,6 @@ bool cxl_endpoint_decoder_reset_detected(struct cxl_port *port); #define __mock static #endif +u16 cxl_gpf_get_dvsec(struct device *dev); + #endif /* __CXL_H__ */ diff --git a/drivers/cxl/cxlmem.h b/drivers/cxl/cxlmem.h index 2a25d1957ddb..3ec6b906371b 100644 --- a/drivers/cxl/cxlmem.h +++ b/drivers/cxl/cxlmem.h @@ -97,6 +97,19 @@ int devm_cxl_dpa_reserve(struct cxl_endpoint_decoder *cxled, resource_size_t base, resource_size_t len, resource_size_t skipped); +#define CXL_NR_PARTITIONS_MAX 2 + +struct cxl_dpa_info { + u64 size; + struct cxl_dpa_part_info { + struct range range; + enum cxl_partition_mode mode; + } part[CXL_NR_PARTITIONS_MAX]; + int nr_partitions; +}; + +int cxl_dpa_setup(struct cxl_dev_state *cxlds, const struct cxl_dpa_info *info); + static inline struct cxl_ep *cxl_ep_load(struct cxl_port *port, struct cxl_memdev *cxlmd) { @@ -106,42 +119,6 @@ static inline struct cxl_ep *cxl_ep_load(struct cxl_port *port, return xa_load(&port->endpoints, (unsigned long)&cxlmd->dev); } -/** - * struct cxl_mbox_cmd - A command to be submitted to hardware. - * @opcode: (input) The command set and command submitted to hardware. - * @payload_in: (input) Pointer to the input payload. - * @payload_out: (output) Pointer to the output payload. Must be allocated by - * the caller. - * @size_in: (input) Number of bytes to load from @payload_in. - * @size_out: (input) Max number of bytes loaded into @payload_out. - * (output) Number of bytes generated by the device. For fixed size - * outputs commands this is always expected to be deterministic. For - * variable sized output commands, it tells the exact number of bytes - * written. - * @min_out: (input) internal command output payload size validation - * @poll_count: (input) Number of timeouts to attempt. - * @poll_interval_ms: (input) Time between mailbox background command polling - * interval timeouts. - * @return_code: (output) Error code returned from hardware. - * - * This is the primary mechanism used to send commands to the hardware. - * All the fields except @payload_* correspond exactly to the fields described in - * Command Register section of the CXL 2.0 8.2.8.4.5. @payload_in and - * @payload_out are written to, and read from the Command Payload Registers - * defined in CXL 2.0 8.2.8.4.8. - */ -struct cxl_mbox_cmd { - u16 opcode; - void *payload_in; - void *payload_out; - size_t size_in; - size_t size_out; - size_t min_out; - int poll_count; - int poll_interval_ms; - u16 return_code; -}; - /* * Per CXL 3.0 Section 8.2.8.4.5.1 */ @@ -409,6 +386,18 @@ struct cxl_dpa_perf { }; /** + * struct cxl_dpa_partition - DPA partition descriptor + * @res: shortcut to the partition in the DPA resource tree (cxlds->dpa_res) + * @perf: performance attributes of the partition from CDAT + * @mode: operation mode for the DPA capacity, e.g. ram, pmem, dynamic... + */ +struct cxl_dpa_partition { + struct resource res; + struct cxl_dpa_perf perf; + enum cxl_partition_mode mode; +}; + +/** * struct cxl_dev_state - The driver device state * * cxl_dev_state represents the CXL driver/device state. It provides an @@ -423,11 +412,12 @@ struct cxl_dpa_perf { * @rcd: operating in RCD mode (CXL 3.0 9.11.8 CXL Devices Attached to an RCH) * @media_ready: Indicate whether the device media is usable * @dpa_res: Overall DPA resource tree for the device - * @pmem_res: Active Persistent memory capacity configuration - * @ram_res: Active Volatile memory capacity configuration + * @part: DPA partition array + * @nr_partitions: Number of DPA partitions * @serial: PCIe Device Serial Number * @type: Generic Memory Class device or Vendor Specific Memory device * @cxl_mbox: CXL mailbox context + * @cxlfs: CXL features context */ struct cxl_dev_state { struct device *dev; @@ -438,13 +428,28 @@ struct cxl_dev_state { bool rcd; bool media_ready; struct resource dpa_res; - struct resource pmem_res; - struct resource ram_res; + struct cxl_dpa_partition part[CXL_NR_PARTITIONS_MAX]; + unsigned int nr_partitions; u64 serial; enum cxl_devtype type; struct cxl_mailbox cxl_mbox; +#ifdef CONFIG_CXL_FEATURES + struct cxl_features_state *cxlfs; +#endif }; +static inline resource_size_t cxl_pmem_size(struct cxl_dev_state *cxlds) +{ + /* + * Static PMEM may be at partition index 0 when there is no static RAM + * capacity. + */ + for (int i = 0; i < cxlds->nr_partitions; i++) + if (cxlds->part[i].mode == CXL_PARTMODE_PMEM) + return resource_size(&cxlds->part[i].res); + return 0; +} + static inline struct cxl_dev_state *mbox_to_cxlds(struct cxl_mailbox *cxl_mbox) { return dev_get_drvdata(cxl_mbox->host); @@ -461,22 +466,17 @@ static inline struct cxl_dev_state *mbox_to_cxlds(struct cxl_mailbox *cxl_mbox) * @lsa_size: Size of Label Storage Area * (CXL 2.0 8.2.9.5.1.1 Identify Memory Device) * @firmware_version: Firmware version for the memory device. - * @enabled_cmds: Hardware commands found enabled in CEL. - * @exclusive_cmds: Commands that are kernel-internal only * @total_bytes: sum of all possible capacities * @volatile_only_bytes: hard volatile capacity * @persistent_only_bytes: hard persistent capacity * @partition_align_bytes: alignment size for partition-able capacity * @active_volatile_bytes: sum of hard + soft volatile * @active_persistent_bytes: sum of hard + soft persistent - * @next_volatile_bytes: volatile capacity change pending device reset - * @next_persistent_bytes: persistent capacity change pending device reset - * @ram_perf: performance data entry matched to RAM partition - * @pmem_perf: performance data entry matched to PMEM partition * @event: event log driver state * @poison: poison driver state info * @security: security driver state info * @fw: firmware upload / activation state + * @mce_notifier: MCE notifier * * See CXL 3.0 8.2.9.8.2 Capacity Configuration and Label Storage for * details on capacity parameters. @@ -485,24 +485,18 @@ struct cxl_memdev_state { struct cxl_dev_state cxlds; size_t lsa_size; char firmware_version[0x10]; - DECLARE_BITMAP(enabled_cmds, CXL_MEM_COMMAND_ID_MAX); - DECLARE_BITMAP(exclusive_cmds, CXL_MEM_COMMAND_ID_MAX); u64 total_bytes; u64 volatile_only_bytes; u64 persistent_only_bytes; u64 partition_align_bytes; u64 active_volatile_bytes; u64 active_persistent_bytes; - u64 next_volatile_bytes; - u64 next_persistent_bytes; - - struct cxl_dpa_perf ram_perf; - struct cxl_dpa_perf pmem_perf; struct cxl_event_state event; struct cxl_poison_state poison; struct cxl_security_state security; struct cxl_fw_state fw; + struct notifier_block mce_notifier; }; static inline struct cxl_memdev_state * @@ -530,6 +524,9 @@ enum cxl_opcode { CXL_MBOX_OP_GET_LOG_CAPS = 0x0402, CXL_MBOX_OP_CLEAR_LOG = 0x0403, CXL_MBOX_OP_GET_SUP_LOG_SUBLIST = 0x0405, + CXL_MBOX_OP_GET_SUPPORTED_FEATURES = 0x0500, + CXL_MBOX_OP_GET_FEATURE = 0x0501, + CXL_MBOX_OP_SET_FEATURE = 0x0502, CXL_MBOX_OP_IDENTIFY = 0x4000, CXL_MBOX_OP_GET_PARTITION_INFO = 0x4100, CXL_MBOX_OP_SET_PARTITION_INFO = 0x4101, @@ -693,6 +690,23 @@ struct cxl_mbox_set_partition_info { #define CXL_SET_PARTITION_IMMEDIATE_FLAG BIT(0) +/* Get Health Info Output Payload CXL 3.2 Spec 8.2.10.9.3.1 Table 8-148 */ +struct cxl_mbox_get_health_info_out { + u8 health_status; + u8 media_status; + u8 additional_status; + u8 life_used; + __le16 device_temperature; + __le32 dirty_shutdown_cnt; + __le32 corrected_volatile_error_cnt; + __le32 corrected_persistent_error_cnt; +} __packed; + +/* Set Shutdown State Input Payload CXL 3.2 Spec 8.2.10.9.3.5 Table 8-152 */ +struct cxl_mbox_set_shutdown_state_in { + u8 state; +} __packed; + /* Set Timestamp CXL 3.0 Spec 8.2.9.4.2 */ struct cxl_mbox_set_timestamp_in { __le64 timestamp; @@ -818,7 +832,7 @@ int cxl_internal_send_cmd(struct cxl_mailbox *cxl_mbox, int cxl_dev_state_identify(struct cxl_memdev_state *mds); int cxl_await_media_ready(struct cxl_dev_state *cxlds); int cxl_enumerate_cmds(struct cxl_memdev_state *mds); -int cxl_mem_create_range_info(struct cxl_memdev_state *mds); +int cxl_mem_dpa_fetch(struct cxl_memdev_state *mds, struct cxl_dpa_info *info); struct cxl_memdev_state *cxl_memdev_state_create(struct device *dev); void set_exclusive_cxl_commands(struct cxl_memdev_state *mds, unsigned long *cmds); @@ -829,6 +843,8 @@ void cxl_event_trace_record(const struct cxl_memdev *cxlmd, enum cxl_event_log_type type, enum cxl_event_type event_type, const uuid_t *uuid, union cxl_event *evt); +int cxl_get_dirty_count(struct cxl_memdev_state *mds, u32 *count); +int cxl_arm_dirty_shutdown(struct cxl_memdev_state *mds); int cxl_set_timestamp(struct cxl_memdev_state *mds); int cxl_poison_state_init(struct cxl_memdev_state *mds); int cxl_mem_get_poison(struct cxl_memdev *cxlmd, u64 offset, u64 len, diff --git a/drivers/cxl/cxlpci.h b/drivers/cxl/cxlpci.h index 4da07727ab9c..54e219b0049e 100644 --- a/drivers/cxl/cxlpci.h +++ b/drivers/cxl/cxlpci.h @@ -40,6 +40,12 @@ /* CXL 2.0 8.1.6: GPF DVSEC for CXL Port */ #define CXL_DVSEC_PORT_GPF 4 +#define CXL_DVSEC_PORT_GPF_PHASE_1_CONTROL_OFFSET 0x0C +#define CXL_DVSEC_PORT_GPF_PHASE_1_TMO_BASE_MASK GENMASK(3, 0) +#define CXL_DVSEC_PORT_GPF_PHASE_1_TMO_SCALE_MASK GENMASK(11, 8) +#define CXL_DVSEC_PORT_GPF_PHASE_2_CONTROL_OFFSET 0xE +#define CXL_DVSEC_PORT_GPF_PHASE_2_TMO_BASE_MASK GENMASK(3, 0) +#define CXL_DVSEC_PORT_GPF_PHASE_2_TMO_SCALE_MASK GENMASK(11, 8) /* CXL 2.0 8.1.7: GPF DVSEC for CXL Device */ #define CXL_DVSEC_DEVICE_GPF 5 diff --git a/drivers/cxl/mem.c b/drivers/cxl/mem.c index 2f03a4d5606e..9675243bd05b 100644 --- a/drivers/cxl/mem.c +++ b/drivers/cxl/mem.c @@ -152,7 +152,7 @@ static int cxl_mem_probe(struct device *dev) return -ENXIO; } - if (resource_size(&cxlds->pmem_res) && IS_ENABLED(CONFIG_CXL_PMEM)) { + if (cxl_pmem_size(cxlds) && IS_ENABLED(CONFIG_CXL_PMEM)) { rc = devm_cxl_add_nvdimm(parent_port, cxlmd); if (rc) { if (rc == -ENODEV) diff --git a/drivers/cxl/pci.c b/drivers/cxl/pci.c index 6d94ff4a4f1a..785aa2af5eaa 100644 --- a/drivers/cxl/pci.c +++ b/drivers/cxl/pci.c @@ -903,11 +903,13 @@ __ATTRIBUTE_GROUPS(cxl_rcd); static int cxl_pci_probe(struct pci_dev *pdev, const struct pci_device_id *id) { struct pci_host_bridge *host_bridge = pci_find_host_bridge(pdev->bus); + struct cxl_dpa_info range_info = { 0 }; struct cxl_memdev_state *mds; struct cxl_dev_state *cxlds; struct cxl_register_map map; struct cxl_memdev *cxlmd; - int i, rc, pmu_count; + int rc, pmu_count; + unsigned int i; bool irq_avail; /* @@ -992,10 +994,18 @@ static int cxl_pci_probe(struct pci_dev *pdev, const struct pci_device_id *id) if (rc) return rc; - rc = cxl_mem_create_range_info(mds); + rc = cxl_mem_dpa_fetch(mds, &range_info); if (rc) return rc; + rc = cxl_dpa_setup(cxlds, &range_info); + if (rc) + return rc; + + rc = devm_cxl_setup_features(cxlds); + if (rc) + dev_dbg(&pdev->dev, "No CXL Features discovered\n"); + cxlmd = devm_cxl_add_memdev(&pdev->dev, cxlds); if (IS_ERR(cxlmd)) return PTR_ERR(cxlmd); @@ -1008,7 +1018,14 @@ static int cxl_pci_probe(struct pci_dev *pdev, const struct pci_device_id *id) if (rc) return rc; + rc = devm_cxl_setup_fwctl(&pdev->dev, cxlmd); + if (rc) + dev_dbg(&pdev->dev, "No CXL FWCTL setup\n"); + pmu_count = cxl_count_regblock(pdev, CXL_REGLOC_RBI_PMU); + if (pmu_count < 0) + return pmu_count; + for (i = 0; i < pmu_count; i++) { struct cxl_pmu_regs pmu_regs; diff --git a/drivers/cxl/pmem.c b/drivers/cxl/pmem.c index f9c95996e937..e197883690ef 100644 --- a/drivers/cxl/pmem.c +++ b/drivers/cxl/pmem.c @@ -42,15 +42,44 @@ static ssize_t id_show(struct device *dev, struct device_attribute *attr, char * } static DEVICE_ATTR_RO(id); +static ssize_t dirty_shutdown_show(struct device *dev, + struct device_attribute *attr, char *buf) +{ + struct nvdimm *nvdimm = to_nvdimm(dev); + struct cxl_nvdimm *cxl_nvd = nvdimm_provider_data(nvdimm); + + return sysfs_emit(buf, "%llu\n", cxl_nvd->dirty_shutdowns); +} +static DEVICE_ATTR_RO(dirty_shutdown); + static struct attribute *cxl_dimm_attributes[] = { &dev_attr_id.attr, &dev_attr_provider.attr, + &dev_attr_dirty_shutdown.attr, NULL }; +#define CXL_INVALID_DIRTY_SHUTDOWN_COUNT ULLONG_MAX +static umode_t cxl_dimm_visible(struct kobject *kobj, + struct attribute *a, int n) +{ + if (a == &dev_attr_dirty_shutdown.attr) { + struct device *dev = kobj_to_dev(kobj); + struct nvdimm *nvdimm = to_nvdimm(dev); + struct cxl_nvdimm *cxl_nvd = nvdimm_provider_data(nvdimm); + + if (cxl_nvd->dirty_shutdowns == + CXL_INVALID_DIRTY_SHUTDOWN_COUNT) + return 0; + } + + return a->mode; +} + static const struct attribute_group cxl_dimm_attribute_group = { .name = "cxl", .attrs = cxl_dimm_attributes, + .is_visible = cxl_dimm_visible }; static const struct attribute_group *cxl_dimm_attribute_groups[] = { @@ -58,6 +87,38 @@ static const struct attribute_group *cxl_dimm_attribute_groups[] = { NULL }; +static void cxl_nvdimm_arm_dirty_shutdown_tracking(struct cxl_nvdimm *cxl_nvd) +{ + struct cxl_memdev *cxlmd = cxl_nvd->cxlmd; + struct cxl_dev_state *cxlds = cxlmd->cxlds; + struct cxl_memdev_state *mds = to_cxl_memdev_state(cxlds); + struct device *dev = &cxl_nvd->dev; + u32 count; + + /* + * Dirty tracking is enabled and exposed to the user, only when: + * - dirty shutdown on the device can be set, and, + * - the device has a Device GPF DVSEC (albeit unused), and, + * - the Get Health Info cmd can retrieve the device's dirty count. + */ + cxl_nvd->dirty_shutdowns = CXL_INVALID_DIRTY_SHUTDOWN_COUNT; + + if (cxl_arm_dirty_shutdown(mds)) { + dev_warn(dev, "GPF: could not set dirty shutdown state\n"); + return; + } + + if (!cxl_gpf_get_dvsec(cxlds->dev)) + return; + + if (cxl_get_dirty_count(mds, &count)) { + dev_warn(dev, "GPF: could not retrieve dirty count\n"); + return; + } + + cxl_nvd->dirty_shutdowns = count; +} + static int cxl_nvdimm_probe(struct device *dev) { struct cxl_nvdimm *cxl_nvd = to_cxl_nvdimm(dev); @@ -78,6 +139,14 @@ static int cxl_nvdimm_probe(struct device *dev) set_bit(ND_CMD_GET_CONFIG_SIZE, &cmd_mask); set_bit(ND_CMD_GET_CONFIG_DATA, &cmd_mask); set_bit(ND_CMD_SET_CONFIG_DATA, &cmd_mask); + + /* + * Set dirty shutdown now, with the expectation that the device + * clear it upon a successful GPF flow. The exception to this + * is upon Viral detection, per CXL 3.2 section 12.4.2. + */ + cxl_nvdimm_arm_dirty_shutdown_tracking(cxl_nvd); + nvdimm = __nvdimm_create(cxl_nvb->nvdimm_bus, cxl_nvd, cxl_dimm_attribute_groups, flags, cmd_mask, 0, NULL, cxl_nvd->dev_id, @@ -375,6 +444,16 @@ static int cxl_pmem_region_probe(struct device *dev) goto out_nvd; } + if (cxlds->serial == 0) { + /* include missing alongside invalid in this error message. */ + dev_err(dev, "%s: invalid or missing serial number\n", + dev_name(&cxlmd->dev)); + rc = -ENXIO; + goto out_nvd; + } + info[i].serial = cxlds->serial; + info[i].offset = m->start; + m->cxl_nvd = cxl_nvd; mappings[i] = (struct nd_mapping_desc) { .nvdimm = nvdimm, @@ -382,8 +461,6 @@ static int cxl_pmem_region_probe(struct device *dev) .size = m->size, .position = i, }; - info[i].offset = m->start; - info[i].serial = cxlds->serial; } ndr_desc.num_mappings = cxlr_pmem->nr_mappings; ndr_desc.mapping = mappings; diff --git a/drivers/cxl/port.c b/drivers/cxl/port.c index 4c83f6a22e58..a35fc5552845 100644 --- a/drivers/cxl/port.c +++ b/drivers/cxl/port.c @@ -98,7 +98,7 @@ static int cxl_endpoint_port_probe(struct cxl_port *port) struct cxl_port *root; int rc; - rc = cxl_dvsec_rr_decode(cxlds->dev, port, &info); + rc = cxl_dvsec_rr_decode(cxlds, &info); if (rc < 0) return rc; @@ -153,7 +153,7 @@ static int cxl_port_probe(struct device *dev) } static ssize_t CDAT_read(struct file *filp, struct kobject *kobj, - struct bin_attribute *bin_attr, char *buf, + const struct bin_attribute *bin_attr, char *buf, loff_t offset, size_t count) { struct device *dev = kobj_to_dev(kobj); @@ -170,7 +170,7 @@ static ssize_t CDAT_read(struct file *filp, struct kobject *kobj, port->cdat.length); } -static BIN_ATTR_ADMIN_RO(CDAT, 0); +static const BIN_ATTR_ADMIN_RO(CDAT, 0); static umode_t cxl_port_bin_attr_is_visible(struct kobject *kobj, const struct bin_attribute *attr, int i) @@ -184,13 +184,13 @@ static umode_t cxl_port_bin_attr_is_visible(struct kobject *kobj, return 0; } -static struct bin_attribute *cxl_cdat_bin_attributes[] = { +static const struct bin_attribute *const cxl_cdat_bin_attributes[] = { &bin_attr_CDAT, NULL, }; -static struct attribute_group cxl_cdat_attribute_group = { - .bin_attrs = cxl_cdat_bin_attributes, +static const struct attribute_group cxl_cdat_attribute_group = { + .bin_attrs_new = cxl_cdat_bin_attributes, .is_bin_visible = cxl_port_bin_attr_is_visible, }; |