diff options
-rw-r--r-- | arch/x86/include/asm/pmem.h | 5 | ||||
-rw-r--r-- | drivers/acpi/nfit.c | 798 | ||||
-rw-r--r-- | drivers/acpi/nfit.h | 30 | ||||
-rw-r--r-- | drivers/nvdimm/blk.c | 18 | ||||
-rw-r--r-- | drivers/nvdimm/btt.c | 19 | ||||
-rw-r--r-- | drivers/nvdimm/bus.c | 131 | ||||
-rw-r--r-- | drivers/nvdimm/core.c | 110 | ||||
-rw-r--r-- | drivers/nvdimm/dimm_devs.c | 6 | ||||
-rw-r--r-- | drivers/nvdimm/namespace_devs.c | 7 | ||||
-rw-r--r-- | drivers/nvdimm/nd.h | 4 | ||||
-rw-r--r-- | drivers/nvdimm/pfn.h | 23 | ||||
-rw-r--r-- | drivers/nvdimm/pfn_devs.c | 61 | ||||
-rw-r--r-- | drivers/nvdimm/pmem.c | 219 | ||||
-rw-r--r-- | drivers/nvdimm/region.c | 12 | ||||
-rw-r--r-- | include/linux/ioport.h | 1 | ||||
-rw-r--r-- | include/linux/libnvdimm.h | 5 | ||||
-rw-r--r-- | include/linux/nd.h | 7 | ||||
-rw-r--r-- | include/linux/pmem.h | 19 | ||||
-rw-r--r-- | include/uapi/linux/ndctl.h | 13 | ||||
-rw-r--r-- | kernel/resource.c | 60 | ||||
-rw-r--r-- | tools/testing/nvdimm/test/nfit.c | 285 |
21 files changed, 1394 insertions, 439 deletions
diff --git a/arch/x86/include/asm/pmem.h b/arch/x86/include/asm/pmem.h index c57fd1ea9689..bf8b35d2035a 100644 --- a/arch/x86/include/asm/pmem.h +++ b/arch/x86/include/asm/pmem.h @@ -137,6 +137,11 @@ static inline void arch_clear_pmem(void __pmem *addr, size_t size) arch_wb_cache_pmem(addr, size); } +static inline void arch_invalidate_pmem(void __pmem *addr, size_t size) +{ + clflush_cache_range((void __force *) addr, size); +} + static inline bool __arch_has_wmb_pmem(void) { /* diff --git a/drivers/acpi/nfit.c b/drivers/acpi/nfit.c index 35947ac87644..d0f35e63640b 100644 --- a/drivers/acpi/nfit.c +++ b/drivers/acpi/nfit.c @@ -21,6 +21,7 @@ #include <linux/sort.h> #include <linux/pmem.h> #include <linux/io.h> +#include <linux/nd.h> #include <asm/cacheflush.h> #include "nfit.h" @@ -34,6 +35,18 @@ static bool force_enable_dimms; module_param(force_enable_dimms, bool, S_IRUGO|S_IWUSR); MODULE_PARM_DESC(force_enable_dimms, "Ignore _STA (ACPI DIMM device) status"); +static unsigned int scrub_timeout = NFIT_ARS_TIMEOUT; +module_param(scrub_timeout, uint, S_IRUGO|S_IWUSR); +MODULE_PARM_DESC(scrub_timeout, "Initial scrub timeout in seconds"); + +/* after three payloads of overflow, it's dead jim */ +static unsigned int scrub_overflow_abort = 3; +module_param(scrub_overflow_abort, uint, S_IRUGO|S_IWUSR); +MODULE_PARM_DESC(scrub_overflow_abort, + "Number of times we overflow ARS results before abort"); + +static struct workqueue_struct *nfit_wq; + struct nfit_table_prev { struct list_head spas; struct list_head memdevs; @@ -72,9 +85,90 @@ static struct acpi_device *to_acpi_dev(struct acpi_nfit_desc *acpi_desc) return to_acpi_device(acpi_desc->dev); } +static int xlat_status(void *buf, unsigned int cmd) +{ + struct nd_cmd_clear_error *clear_err; + struct nd_cmd_ars_status *ars_status; + struct nd_cmd_ars_start *ars_start; + struct nd_cmd_ars_cap *ars_cap; + u16 flags; + + switch (cmd) { + case ND_CMD_ARS_CAP: + ars_cap = buf; + if ((ars_cap->status & 0xffff) == NFIT_ARS_CAP_NONE) + return -ENOTTY; + + /* Command failed */ + if (ars_cap->status & 0xffff) + return -EIO; + + /* No supported scan types for this range */ + flags = ND_ARS_PERSISTENT | ND_ARS_VOLATILE; + if ((ars_cap->status >> 16 & flags) == 0) + return -ENOTTY; + break; + case ND_CMD_ARS_START: + ars_start = buf; + /* ARS is in progress */ + if ((ars_start->status & 0xffff) == NFIT_ARS_START_BUSY) + return -EBUSY; + + /* Command failed */ + if (ars_start->status & 0xffff) + return -EIO; + break; + case ND_CMD_ARS_STATUS: + ars_status = buf; + /* Command failed */ + if (ars_status->status & 0xffff) + return -EIO; + /* Check extended status (Upper two bytes) */ + if (ars_status->status == NFIT_ARS_STATUS_DONE) + return 0; + + /* ARS is in progress */ + if (ars_status->status == NFIT_ARS_STATUS_BUSY) + return -EBUSY; + + /* No ARS performed for the current boot */ + if (ars_status->status == NFIT_ARS_STATUS_NONE) + return -EAGAIN; + + /* + * ARS interrupted, either we overflowed or some other + * agent wants the scan to stop. If we didn't overflow + * then just continue with the returned results. + */ + if (ars_status->status == NFIT_ARS_STATUS_INTR) { + if (ars_status->flags & NFIT_ARS_F_OVERFLOW) + return -ENOSPC; + return 0; + } + + /* Unknown status */ + if (ars_status->status >> 16) + return -EIO; + break; + case ND_CMD_CLEAR_ERROR: + clear_err = buf; + if (clear_err->status & 0xffff) + return -EIO; + if (!clear_err->cleared) + return -EIO; + if (clear_err->length > clear_err->cleared) + return clear_err->cleared; + break; + default: + break; + } + + return 0; +} + static int acpi_nfit_ctl(struct nvdimm_bus_descriptor *nd_desc, struct nvdimm *nvdimm, unsigned int cmd, void *buf, - unsigned int buf_len) + unsigned int buf_len, int *cmd_rc) { struct acpi_nfit_desc *acpi_desc = to_acpi_nfit_desc(nd_desc); const struct nd_cmd_desc *desc = NULL; @@ -185,6 +279,8 @@ static int acpi_nfit_ctl(struct nvdimm_bus_descriptor *nd_desc, * unfilled in the output buffer */ rc = buf_len - offset - in_buf.buffer.length; + if (cmd_rc) + *cmd_rc = xlat_status(buf, cmd); } else { dev_err(dev, "%s:%s underrun cmd: %s buf_len: %d out_len: %d\n", __func__, dimm_name, cmd_name, buf_len, @@ -675,12 +771,11 @@ static struct attribute_group acpi_nfit_attribute_group = { .attrs = acpi_nfit_attributes, }; -const struct attribute_group *acpi_nfit_attribute_groups[] = { +static const struct attribute_group *acpi_nfit_attribute_groups[] = { &nvdimm_bus_attribute_group, &acpi_nfit_attribute_group, NULL, }; -EXPORT_SYMBOL_GPL(acpi_nfit_attribute_groups); static struct acpi_nfit_memory_map *to_nfit_memdev(struct device *dev) { @@ -917,7 +1012,7 @@ static void acpi_nfit_init_dsms(struct acpi_nfit_desc *acpi_desc) if (!adev) return; - for (i = ND_CMD_ARS_CAP; i <= ND_CMD_ARS_STATUS; i++) + for (i = ND_CMD_ARS_CAP; i <= ND_CMD_CLEAR_ERROR; i++) if (acpi_check_dsm(adev->handle, uuid, 1, 1ULL << i)) set_bit(i, &nd_desc->dsm_mask); } @@ -1105,7 +1200,7 @@ static void write_blk_ctl(struct nfit_blk *nfit_blk, unsigned int bw, writeq(cmd, mmio->addr.base + offset); wmb_blk(nfit_blk); - if (nfit_blk->dimm_flags & ND_BLK_DCR_LATCH) + if (nfit_blk->dimm_flags & NFIT_BLK_DCR_LATCH) readq(mmio->addr.base + offset); } @@ -1141,7 +1236,7 @@ static int acpi_nfit_blk_single_io(struct nfit_blk *nfit_blk, memcpy_to_pmem(mmio->addr.aperture + offset, iobuf + copied, c); else { - if (nfit_blk->dimm_flags & ND_BLK_READ_FLUSH) + if (nfit_blk->dimm_flags & NFIT_BLK_READ_FLUSH) mmio_flush_range((void __force *) mmio->addr.aperture + offset, c); @@ -1328,13 +1423,13 @@ static int acpi_nfit_blk_get_flags(struct nvdimm_bus_descriptor *nd_desc, memset(&flags, 0, sizeof(flags)); rc = nd_desc->ndctl(nd_desc, nvdimm, ND_CMD_DIMM_FLAGS, &flags, - sizeof(flags)); + sizeof(flags), NULL); if (rc >= 0 && flags.status == 0) nfit_blk->dimm_flags = flags.flags; else if (rc == -ENOTTY) { /* fall back to a conservative default */ - nfit_blk->dimm_flags = ND_BLK_DCR_LATCH | ND_BLK_READ_FLUSH; + nfit_blk->dimm_flags = NFIT_BLK_DCR_LATCH | NFIT_BLK_READ_FLUSH; rc = 0; } else rc = -ENXIO; @@ -1473,93 +1568,85 @@ static void acpi_nfit_blk_region_disable(struct nvdimm_bus *nvdimm_bus, /* devm will free nfit_blk */ } -static int ars_get_cap(struct nvdimm_bus_descriptor *nd_desc, - struct nd_cmd_ars_cap *cmd, u64 addr, u64 length) +static int ars_get_cap(struct acpi_nfit_desc *acpi_desc, + struct nd_cmd_ars_cap *cmd, struct nfit_spa *nfit_spa) { - cmd->address = addr; - cmd->length = length; + struct nvdimm_bus_descriptor *nd_desc = &acpi_desc->nd_desc; + struct acpi_nfit_system_address *spa = nfit_spa->spa; + int cmd_rc, rc; - return nd_desc->ndctl(nd_desc, NULL, ND_CMD_ARS_CAP, cmd, - sizeof(*cmd)); + cmd->address = spa->address; + cmd->length = spa->length; + rc = nd_desc->ndctl(nd_desc, NULL, ND_CMD_ARS_CAP, cmd, + sizeof(*cmd), &cmd_rc); + if (rc < 0) + return rc; + return cmd_rc; } -static int ars_do_start(struct nvdimm_bus_descriptor *nd_desc, - struct nd_cmd_ars_start *cmd, u64 addr, u64 length) +static int ars_start(struct acpi_nfit_desc *acpi_desc, struct nfit_spa *nfit_spa) { int rc; + int cmd_rc; + struct nd_cmd_ars_start ars_start; + struct acpi_nfit_system_address *spa = nfit_spa->spa; + struct nvdimm_bus_descriptor *nd_desc = &acpi_desc->nd_desc; - cmd->address = addr; - cmd->length = length; - cmd->type = ND_ARS_PERSISTENT; + memset(&ars_start, 0, sizeof(ars_start)); + ars_start.address = spa->address; + ars_start.length = spa->length; + if (nfit_spa_type(spa) == NFIT_SPA_PM) + ars_start.type = ND_ARS_PERSISTENT; + else if (nfit_spa_type(spa) == NFIT_SPA_VOLATILE) + ars_start.type = ND_ARS_VOLATILE; + else + return -ENOTTY; - while (1) { - rc = nd_desc->ndctl(nd_desc, NULL, ND_CMD_ARS_START, cmd, - sizeof(*cmd)); - if (rc) - return rc; - switch (cmd->status) { - case 0: - return 0; - case 1: - /* ARS unsupported, but we should never get here */ - return 0; - case 6: - /* ARS is in progress */ - msleep(1000); - break; - default: - return -ENXIO; - } - } + rc = nd_desc->ndctl(nd_desc, NULL, ND_CMD_ARS_START, &ars_start, + sizeof(ars_start), &cmd_rc); + + if (rc < 0) + return rc; + return cmd_rc; } -static int ars_get_status(struct nvdimm_bus_descriptor *nd_desc, - struct nd_cmd_ars_status *cmd, u32 size) +static int ars_continue(struct acpi_nfit_desc *acpi_desc) { - int rc; + int rc, cmd_rc; + struct nd_cmd_ars_start ars_start; + struct nvdimm_bus_descriptor *nd_desc = &acpi_desc->nd_desc; + struct nd_cmd_ars_status *ars_status = acpi_desc->ars_status; + + memset(&ars_start, 0, sizeof(ars_start)); + ars_start.address = ars_status->restart_address; + ars_start.length = ars_status->restart_length; + ars_start.type = ars_status->type; + rc = nd_desc->ndctl(nd_desc, NULL, ND_CMD_ARS_START, &ars_start, + sizeof(ars_start), &cmd_rc); + if (rc < 0) + return rc; + return cmd_rc; +} - while (1) { - rc = nd_desc->ndctl(nd_desc, NULL, ND_CMD_ARS_STATUS, cmd, - size); - if (rc || cmd->status & 0xffff) - return -ENXIO; +static int ars_get_status(struct acpi_nfit_desc *acpi_desc) +{ + struct nvdimm_bus_descriptor *nd_desc = &acpi_desc->nd_desc; + struct nd_cmd_ars_status *ars_status = acpi_desc->ars_status; + int rc, cmd_rc; - /* Check extended status (Upper two bytes) */ - switch (cmd->status >> 16) { - case 0: - return 0; - case 1: - /* ARS is in progress */ - msleep(1000); - break; - case 2: - /* No ARS performed for the current boot */ - return 0; - case 3: - /* TODO: error list overflow support */ - default: - return -ENXIO; - } - } + rc = nd_desc->ndctl(nd_desc, NULL, ND_CMD_ARS_STATUS, ars_status, + acpi_desc->ars_status_size, &cmd_rc); + if (rc < 0) + return rc; + return cmd_rc; } static int ars_status_process_records(struct nvdimm_bus *nvdimm_bus, - struct nd_cmd_ars_status *ars_status, u64 start) + struct nd_cmd_ars_status *ars_status) { int rc; u32 i; - /* - * The address field returned by ars_status should be either - * less than or equal to the address we last started ARS for. - * The (start, length) returned by ars_status should also have - * non-zero overlap with the range we started ARS for. - * If this is not the case, bail. - */ - if (ars_status->address > start || - (ars_status->address + ars_status->length < start)) - return -ENXIO; - for (i = 0; i < ars_status->num_records; i++) { rc = nvdimm_bus_add_poison(nvdimm_bus, ars_status->records[i].err_address, @@ -1571,118 +1658,56 @@ static int ars_status_process_records(struct nvdimm_bus *nvdimm_bus, return 0; } -static int acpi_nfit_find_poison(struct acpi_nfit_desc *acpi_desc, - struct nd_region_desc *ndr_desc) +static void acpi_nfit_remove_resource(void *data) { - struct nvdimm_bus_descriptor *nd_desc = &acpi_desc->nd_desc; - struct nvdimm_bus *nvdimm_bus = acpi_desc->nvdimm_bus; - struct nd_cmd_ars_status *ars_status = NULL; - struct nd_cmd_ars_start *ars_start = NULL; - struct nd_cmd_ars_cap *ars_cap = NULL; - u64 start, len, cur, remaining; - u32 ars_status_size; - int rc; - - ars_cap = kzalloc(sizeof(*ars_cap), GFP_KERNEL); - if (!ars_cap) - return -ENOMEM; + struct resource *res = data; - start = ndr_desc->res->start; - len = ndr_desc->res->end - ndr_desc->res->start + 1; - - /* - * If ARS is unimplemented, unsupported, or if the 'Persistent Memory - * Scrub' flag in extended status is not set, skip this but continue - * initialization - */ - rc = ars_get_cap(nd_desc, ars_cap, start, len); - if (rc == -ENOTTY) { - dev_dbg(acpi_desc->dev, - "Address Range Scrub is not implemented, won't create an error list\n"); - rc = 0; - goto out; - } - if (rc) - goto out; - - if ((ars_cap->status & 0xffff) || - !(ars_cap->status >> 16 & ND_ARS_PERSISTENT)) { - dev_warn(acpi_desc->dev, - "ARS unsupported (status: 0x%x), won't create an error list\n", - ars_cap->status); - goto out; - } - - /* - * Check if a full-range ARS has been run. If so, use those results - * without having to start a new ARS. - */ - ars_status_size = ars_cap->max_ars_out; - ars_status = kzalloc(ars_status_size, GFP_KERNEL); - if (!ars_status) { - rc = -ENOMEM; - goto out; - } + remove_resource(res); +} - rc = ars_get_status(nd_desc, ars_status, ars_status_size); - if (rc) - goto out; +static int acpi_nfit_insert_resource(struct acpi_nfit_desc *acpi_desc, + struct nd_region_desc *ndr_desc) +{ + struct resource *res, *nd_res = ndr_desc->res; + int is_pmem, ret; - if (ars_status->address <= start && - (ars_status->address + ars_status->length >= start + len)) { - rc = ars_status_process_records(nvdimm_bus, ars_status, start); - goto out; - } + /* No operation if the region is already registered as PMEM */ + is_pmem = region_intersects(nd_res->start, resource_size(nd_res), + IORESOURCE_MEM, IORES_DESC_PERSISTENT_MEMORY); + if (is_pmem == REGION_INTERSECTS) + return 0; - /* - * ARS_STATUS can overflow if the number of poison entries found is - * greater than the maximum buffer size (ars_cap->max_ars_out) - * To detect overflow, check if the length field of ars_status - * is less than the length we supplied. If so, process the - * error entries we got, adjust the start point, and start again - */ - ars_start = kzalloc(sizeof(*ars_start), GFP_KERNEL); - if (!ars_start) + res = devm_kzalloc(acpi_desc->dev, sizeof(*res), GFP_KERNEL); + if (!res) return -ENOMEM; - cur = start; - remaining = len; - do { - u64 done, end; - - rc = ars_do_start(nd_desc, ars_start, cur, remaining); - if (rc) - goto out; - - rc = ars_get_status(nd_desc, ars_status, ars_status_size); - if (rc) - goto out; + res->name = "Persistent Memory"; + res->start = nd_res->start; + res->end = nd_res->end; + res->flags = IORESOURCE_MEM; + res->desc = IORES_DESC_PERSISTENT_MEMORY; - rc = ars_status_process_records(nvdimm_bus, ars_status, cur); - if (rc) - goto out; + ret = insert_resource(&iomem_resource, res); + if (ret) + return ret; - end = min(cur + remaining, - ars_status->address + ars_status->length); - done = end - cur; - cur += done; - remaining -= done; - } while (remaining); + ret = devm_add_action(acpi_desc->dev, acpi_nfit_remove_resource, res); + if (ret) { + remove_resource(res); + return ret; + } - out: - kfree(ars_cap); - kfree(ars_start); - kfree(ars_status); - return rc; + return 0; } static int acpi_nfit_init_mapping(struct acpi_nfit_desc *acpi_desc, struct nd_mapping *nd_mapping, struct nd_region_desc *ndr_desc, struct acpi_nfit_memory_map *memdev, - struct acpi_nfit_system_address *spa) + struct nfit_spa *nfit_spa) { struct nvdimm *nvdimm = acpi_nfit_dimm_by_handle(acpi_desc, memdev->device_handle); + struct acpi_nfit_system_address *spa = nfit_spa->spa; struct nd_blk_region_desc *ndbr_desc; struct nfit_mem *nfit_mem; int blk_valid = 0; @@ -1718,7 +1743,9 @@ static int acpi_nfit_init_mapping(struct acpi_nfit_desc *acpi_desc, ndbr_desc->enable = acpi_nfit_blk_region_enable; ndbr_desc->disable = acpi_nfit_blk_region_disable; ndbr_desc->do_io = acpi_desc->blk_do_io; - if (!nvdimm_blk_region_create(acpi_desc->nvdimm_bus, ndr_desc)) + nfit_spa->nd_region = nvdimm_blk_region_create(acpi_desc->nvdimm_bus, + ndr_desc); + if (!nfit_spa->nd_region) return -ENOMEM; break; } @@ -1738,7 +1765,7 @@ static int acpi_nfit_register_region(struct acpi_nfit_desc *acpi_desc, struct resource res; int count = 0, rc; - if (nfit_spa->is_registered) + if (nfit_spa->nd_region) return 0; if (spa->range_index == 0) { @@ -1775,47 +1802,332 @@ static int acpi_nfit_register_region(struct acpi_nfit_desc *acpi_desc, } nd_mapping = &nd_mappings[count++]; rc = acpi_nfit_init_mapping(acpi_desc, nd_mapping, ndr_desc, - memdev, spa); + memdev, nfit_spa); if (rc) - return rc; + goto out; } ndr_desc->nd_mapping = nd_mappings; ndr_desc->num_mappings = count; rc = acpi_nfit_init_interleave_set(acpi_desc, ndr_desc, spa); if (rc) - return rc; + goto out; nvdimm_bus = acpi_desc->nvdimm_bus; if (nfit_spa_type(spa) == NFIT_SPA_PM) { - rc = acpi_nfit_find_poison(acpi_desc, ndr_desc); + rc = acpi_nfit_insert_resource(acpi_desc, ndr_desc); if (rc) { - dev_err(acpi_desc->dev, - "error while performing ARS to find poison: %d\n", + dev_warn(acpi_desc->dev, + "failed to insert pmem resource to iomem: %d\n", rc); - return rc; + goto out; } - if (!nvdimm_pmem_region_create(nvdimm_bus, ndr_desc)) - return -ENOMEM; + + nfit_spa->nd_region = nvdimm_pmem_region_create(nvdimm_bus, + ndr_desc); + if (!nfit_spa->nd_region) + rc = -ENOMEM; } else if (nfit_spa_type(spa) == NFIT_SPA_VOLATILE) { - if (!nvdimm_volatile_region_create(nvdimm_bus, ndr_desc)) - return -ENOMEM; + nfit_spa->nd_region = nvdimm_volatile_region_create(nvdimm_bus, + ndr_desc); + if (!nfit_spa->nd_region) + rc = -ENOMEM; } - nfit_spa->is_registered = 1; + out: + if (rc) + dev_err(acpi_desc->dev, "failed to register spa range %d\n", + nfit_spa->spa->range_index); + return rc; +} + +static int ars_status_alloc(struct acpi_nfit_desc *acpi_desc, + u32 max_ars) +{ + struct device *dev = acpi_desc->dev; + struct nd_cmd_ars_status *ars_status; + + if (acpi_desc->ars_status && acpi_desc->ars_status_size >= max_ars) { + memset(acpi_desc->ars_status, 0, acpi_desc->ars_status_size); + return 0; + } + + if (acpi_desc->ars_status) + devm_kfree(dev, acpi_desc->ars_status); + acpi_desc->ars_status = NULL; + ars_status = devm_kzalloc(dev, max_ars, GFP_KERNEL); + if (!ars_status) + return -ENOMEM; + acpi_desc->ars_status = ars_status; + acpi_desc->ars_status_size = max_ars; return 0; } -static int acpi_nfit_register_regions(struct acpi_nfit_desc *acpi_desc) +static int acpi_nfit_query_poison(struct acpi_nfit_desc *acpi_desc, + struct nfit_spa *nfit_spa) { + struct acpi_nfit_system_address *spa = nfit_spa->spa; + int rc; + + if (!nfit_spa->max_ars) { + struct nd_cmd_ars_cap ars_cap; + + memset(&ars_cap, 0, sizeof(ars_cap)); + rc = ars_get_cap(acpi_desc, &ars_cap, nfit_spa); + if (rc < 0) + return rc; + nfit_spa->max_ars = ars_cap.max_ars_out; + nfit_spa->clear_err_unit = ars_cap.clear_err_unit; + /* check that the supported scrub types match the spa type */ + if (nfit_spa_type(spa) == NFIT_SPA_VOLATILE && + ((ars_cap.status >> 16) & ND_ARS_VOLATILE) == 0) + return -ENOTTY; + else if (nfit_spa_type(spa) == NFIT_SPA_PM && + ((ars_cap.status >> 16) & ND_ARS_PERSISTENT) == 0) + return -ENOTTY; + } + + if (ars_status_alloc(acpi_desc, nfit_spa->max_ars)) + return -ENOMEM; + + rc = ars_get_status(acpi_desc); + if (rc < 0 && rc != -ENOSPC) + return rc; + + if (ars_status_process_records(acpi_desc->nvdimm_bus, + acpi_desc->ars_status)) + return -ENOMEM; + + return 0; +} + +static void acpi_nfit_async_scrub(struct acpi_nfit_desc *acpi_desc, + struct nfit_spa *nfit_spa) +{ + struct acpi_nfit_system_address *spa = nfit_spa->spa; + unsigned int overflow_retry = scrub_overflow_abort; + u64 init_ars_start = 0, init_ars_len = 0; + struct device *dev = acpi_desc->dev; + unsigned int tmo = scrub_timeout; + int rc; + + if (nfit_spa->ars_done || !nfit_spa->nd_region) + return; + + rc = ars_start(acpi_desc, nfit_spa); + /* + * If we timed out the initial scan we'll still be busy here, + * and will wait another timeout before giving up permanently. + */ + if (rc < 0 && rc != -EBUSY) + return; + + do { + u64 ars_start, ars_len; + + if (acpi_desc->cancel) + break; + rc = acpi_nfit_query_poison(acpi_desc, nfit_spa); + if (rc == -ENOTTY) + break; + if (rc == -EBUSY && !tmo) { + dev_warn(dev, "range %d ars timeout, aborting\n", + spa->range_index); + break; + } + + if (rc == -EBUSY) { + /* + * Note, entries may be appended to the list + * while the lock is dropped, but the workqueue + * being active prevents entries being deleted / + * freed. + */ + mutex_unlock(&acpi_desc->init_mutex); + ssleep(1); + tmo--; + mutex_lock(&acpi_desc->init_mutex); + continue; + } + + /* we got some results, but there are more pending... */ + if (rc == -ENOSPC && overflow_retry--) { + if (!init_ars_len) { + init_ars_len = acpi_desc->ars_status->length; + init_ars_start = acpi_desc->ars_status->address; + } + rc = ars_continue(acpi_desc); + } + + if (rc < 0) { + dev_warn(dev, "range %d ars continuation failed\n", + spa->range_index); + break; + } + + if (init_ars_len) { + ars_start = init_ars_start; + ars_len = init_ars_len; + } else { + ars_start = acpi_desc->ars_status->address; + ars_len = acpi_desc->ars_status->length; + } + dev_dbg(dev, "spa range: %d ars from %#llx + %#llx complete\n", + spa->range_index, ars_start, ars_len); + /* notify the region about new poison entries */ + nvdimm_region_notify(nfit_spa->nd_region, + NVDIMM_REVALIDATE_POISON); + break; + } while (1); +} + +static void acpi_nfit_scrub(struct work_struct *work) +{ + struct device *dev; + u64 init_scrub_length = 0; struct nfit_spa *nfit_spa; + u64 init_scrub_address = 0; + bool init_ars_done = false; + struct acpi_nfit_desc *acpi_desc; + unsigned int tmo = scrub_timeout; + unsigned int overflow_retry = scrub_overflow_abort; + + acpi_desc = container_of(work, typeof(*acpi_desc), work); + dev = acpi_desc->dev; + /* + * We scrub in 2 phases. The first phase waits for any platform + * firmware initiated scrubs to complete and then we go search for the + * affected spa regions to mark them scanned. In the second phase we + * initiate a directed scrub for every range that was not scrubbed in + * phase 1. + */ + + /* process platform firmware initiated scrubs */ + retry: + mutex_lock(&acpi_desc->init_mutex); list_for_each_entry(nfit_spa, &acpi_desc->spas, list) { - int rc = acpi_nfit_register_region(acpi_desc, nfit_spa); + struct nd_cmd_ars_status *ars_status; + struct acpi_nfit_system_address *spa; + u64 ars_start, ars_len; + int rc; - if (rc) - return rc; + if (acpi_desc->cancel) + break; + + if (nfit_spa->nd_region) + continue; + + if (init_ars_done) { + /* + * No need to re-query, we're now just + * reconciling all the ranges covered by the + * initial scrub + */ + rc = 0; + } else + rc = acpi_nfit_query_poison(acpi_desc, nfit_spa); + + if (rc == -ENOTTY) { + /* no ars capability, just register spa and move on */ + acpi_nfit_register_region(acpi_desc, nfit_spa); + continue; + } + + if (rc == -EBUSY && !tmo) { + /* fallthrough to directed scrub in phase 2 */ + dev_warn(dev, "timeout awaiting ars results, continuing...\n"); + break; + } else if (rc == -EBUSY) { + mutex_unlock(&acpi_desc->init_mutex); + ssleep(1); + tmo--; + goto retry; + } + + /* we got some results, but there are more pending... */ + if (rc == -ENOSPC && overflow_retry--) { + ars_status = acpi_desc->ars_status; + /* + * Record the original scrub range, so that we + * can recall all the ranges impacted by the + * initial scrub. + */ + if (!init_scrub_length) { + init_scrub_length = ars_status->length; + init_scrub_address = ars_status->address; + } + rc = ars_continue(acpi_desc); + if (rc == 0) { + mutex_unlock(&acpi_desc->init_mutex); + goto retry; + } + } + + if (rc < 0) { + /* + * Initial scrub failed, we'll give it one more + * try below... + */ + break; + } + + /* We got some final results, record completed ranges */ + ars_status = acpi_desc->ars_status; + if (init_scrub_length) { + ars_start = init_scrub_address; + ars_len = ars_start + init_scrub_length; + } else { + ars_start = ars_status->address; + ars_len = ars_status->length; + } + spa = nfit_spa->spa; + + if (!init_ars_done) { + init_ars_done = true; + dev_dbg(dev, "init scrub %#llx + %#llx complete\n", + ars_start, ars_len); + } + if (ars_start <= spa->address && ars_start + ars_len + >= spa->address + spa->length) + acpi_nfit_register_region(acpi_desc, nfit_spa); } + + /* + * For all the ranges not covered by an initial scrub we still + * want to see if there are errors, but it's ok to discover them + * asynchronously. + */ + list_for_each_entry(nfit_spa, &acpi_desc->spas, list) { + /* + * Flag all the ranges that still need scrubbing, but + * register them now to make data available. + */ + if (nfit_spa->nd_region) + nfit_spa->ars_done = 1; + else + acpi_nfit_register_region(acpi_desc, nfit_spa); + } + + list_for_each_entry(nfit_spa, &acpi_desc->spas, list) + acpi_nfit_async_scrub(acpi_desc, nfit_spa); + mutex_unlock(&acpi_desc->init_mutex); +} + +static int acpi_nfit_register_regions(struct acpi_nfit_desc *acpi_desc) +{ + struct nfit_spa *nfit_spa; + int rc; + + list_for_each_entry(nfit_spa, &acpi_desc->spas, list) + if (nfit_spa_type(nfit_spa->spa) == NFIT_SPA_DCR) { + /* BLK regions don't need to wait for ars results */ + rc = acpi_nfit_register_region(acpi_desc, nfit_spa); + if (rc) + return rc; + } + + queue_work(nfit_wq, &acpi_desc->work); return 0; } @@ -1901,15 +2213,64 @@ int acpi_nfit_init(struct acpi_nfit_desc *acpi_desc, acpi_size sz) } EXPORT_SYMBOL_GPL(acpi_nfit_init); -static struct acpi_nfit_desc *acpi_nfit_desc_init(struct acpi_device *adev) +struct acpi_nfit_flush_work { + struct work_struct work; + struct completion cmp; +}; + +static void flush_probe(struct work_struct *work) { - struct nvdimm_bus_descriptor *nd_desc; - struct acpi_nfit_desc *acpi_desc; - struct device *dev = &adev->dev; + struct acpi_nfit_flush_work *flush; - acpi_desc = devm_kzalloc(dev, sizeof(*acpi_desc), GFP_KERNEL); - if (!acpi_desc) - return ERR_PTR(-ENOMEM); + flush = container_of(work, typeof(*flush), work); + complete(&flush->cmp); +} + +static int acpi_nfit_flush_probe(struct nvdimm_bus_descriptor *nd_desc) +{ + struct acpi_nfit_desc *acpi_desc = to_acpi_nfit_desc(nd_desc); + struct device *dev = acpi_desc->dev; + struct acpi_nfit_flush_work flush; + + /* bounce the device lock to flush acpi_nfit_add / acpi_nfit_notify */ + device_lock(dev); + device_unlock(dev); + + /* + * Scrub work could take 10s of seconds, userspace may give up so we + * need to be interruptible while waiting. + */ + INIT_WORK_ONSTACK(&flush.work, flush_probe); + COMPLETION_INITIALIZER_ONSTACK(flush.cmp); + queue_work(nfit_wq, &flush.work); + return wait_for_completion_interruptible(&flush.cmp); +} + +static int acpi_nfit_clear_to_send(struct nvdimm_bus_descriptor *nd_desc, + struct nvdimm *nvdimm, unsigned int cmd) +{ + struct acpi_nfit_desc *acpi_desc = to_acpi_nfit_desc(nd_desc); + + if (nvdimm) + return 0; + if (cmd != ND_CMD_ARS_START) + return 0; + + /* + * The kernel and userspace may race to initiate a scrub, but + * the scrub thread is prepared to lose that initial race. It + * just needs guarantees that any ars it initiates are not + * interrupted by any intervening start reqeusts from userspace. + */ + if (work_busy(&acpi_desc->work)) + return -EBUSY; + + return 0; +} + +void acpi_nfit_desc_init(struct acpi_nfit_desc *acpi_desc, struct device *dev) +{ + struct nvdimm_bus_descriptor *nd_desc; dev_set_drvdata(dev, acpi_desc); acpi_desc->dev = dev; @@ -1917,14 +2278,10 @@ static struct acpi_nfit_desc *acpi_nfit_desc_init(struct acpi_device *adev) nd_desc = &acpi_desc->nd_desc; nd_desc->provider_name = "ACPI.NFIT"; nd_desc->ndctl = acpi_nfit_ctl; + nd_desc->flush_probe = acpi_nfit_flush_probe; + nd_desc->clear_to_send = acpi_nfit_clear_to_send; nd_desc->attr_groups = acpi_nfit_attribute_groups; - acpi_desc->nvdimm_bus = nvdimm_bus_register(dev, nd_desc); - if (!acpi_desc->nvdimm_bus) { - devm_kfree(dev, acpi_desc); - return ERR_PTR(-ENXIO); - } - INIT_LIST_HEAD(&acpi_desc->spa_maps); INIT_LIST_HEAD(&acpi_desc->spas); INIT_LIST_HEAD(&acpi_desc->dcrs); @@ -1935,9 +2292,9 @@ static struct acpi_nfit_desc *acpi_nfit_desc_init(struct acpi_device *adev) INIT_LIST_HEAD(&acpi_desc->dimms); mutex_init(&acpi_desc->spa_map_mutex); mutex_init(&acpi_desc->init_mutex); - - return acpi_desc; + INIT_WORK(&acpi_desc->work, acpi_nfit_scrub); } +EXPORT_SYMBOL_GPL(acpi_nfit_desc_init); static int acpi_nfit_add(struct acpi_device *adev) { @@ -1956,12 +2313,13 @@ static int acpi_nfit_add(struct acpi_device *adev) return 0; } - acpi_desc = acpi_nfit_desc_init(adev); - if (IS_ERR(acpi_desc)) { - dev_err(dev, "%s: error initializing acpi_desc: %ld\n", - __func__, PTR_ERR(acpi_desc)); - return PTR_ERR(acpi_desc); - } + acpi_desc = devm_kzalloc(dev, sizeof(*acpi_desc), GFP_KERNEL); + if (!acpi_desc) + return -ENOMEM; + acpi_nfit_desc_init(acpi_desc, &adev->dev); + acpi_desc->nvdimm_bus = nvdimm_bus_register(dev, &acpi_desc->nd_desc); + if (!acpi_desc->nvdimm_bus) + return -ENOMEM; /* * Save the acpi header for later and then skip it, @@ -2000,6 +2358,8 @@ static int acpi_nfit_remove(struct acpi_device *adev) { struct acpi_nfit_desc *acpi_desc = dev_get_drvdata(&adev->dev); + acpi_desc->cancel = 1; + flush_workqueue(nfit_wq); nvdimm_bus_unregister(acpi_desc->nvdimm_bus); return 0; } @@ -2024,12 +2384,19 @@ static void acpi_nfit_notify(struct acpi_device *adev, u32 event) } if (!acpi_desc) { - acpi_desc = acpi_nfit_desc_init(adev); - if (IS_ERR(acpi_desc)) { - dev_err(dev, "%s: error initializing acpi_desc: %ld\n", - __func__, PTR_ERR(acpi_desc)); + acpi_desc = devm_kzalloc(dev, sizeof(*acpi_desc), GFP_KERNEL); + if (!acpi_desc) goto out_unlock; - } + acpi_nfit_desc_init(acpi_desc, &adev->dev); + acpi_desc->nvdimm_bus = nvdimm_bus_register(dev, &acpi_desc->nd_desc); + if (!acpi_desc->nvdimm_bus) + goto out_unlock; + } else { + /* + * Finish previous registration before considering new + * regions. + */ + flush_workqueue(nfit_wq); } /* Evaluate _FIT */ @@ -2097,12 +2464,17 @@ static __init int nfit_init(void) acpi_str_to_uuid(UUID_NFIT_BUS, nfit_uuid[NFIT_DEV_BUS]); acpi_str_to_uuid(UUID_NFIT_DIMM, nfit_uuid[NFIT_DEV_DIMM]); + nfit_wq = create_singlethread_workqueue("nfit"); + if (!nfit_wq) + return -ENOMEM; + return acpi_bus_register_driver(&acpi_nfit_driver); } static __exit void nfit_exit(void) { acpi_bus_unregister_driver(&acpi_nfit_driver); + destroy_workqueue(nfit_wq); } module_init(nfit_init); diff --git a/drivers/acpi/nfit.h b/drivers/acpi/nfit.h index 3d549a383659..c75576b2d50e 100644 --- a/drivers/acpi/nfit.h +++ b/drivers/acpi/nfit.h @@ -14,6 +14,7 @@ */ #ifndef __NFIT_H__ #define __NFIT_H__ +#include <linux/workqueue.h> #include <linux/libnvdimm.h> #include <linux/types.h> #include <linux/uuid.h> @@ -40,15 +41,32 @@ enum nfit_uuids { NFIT_UUID_MAX, }; +enum nfit_fic { + NFIT_FIC_BYTE = 0x101, /* byte-addressable energy backed */ + NFIT_FIC_BLK = 0x201, /* block-addressable non-energy backed */ + NFIT_FIC_BYTEN = 0x301, /* byte-addressable non-energy backed */ +}; + enum { - ND_BLK_READ_FLUSH = 1, - ND_BLK_DCR_LATCH = 2, + NFIT_BLK_READ_FLUSH = 1, + NFIT_BLK_DCR_LATCH = 2, + NFIT_ARS_STATUS_DONE = 0, + NFIT_ARS_STATUS_BUSY = 1 << 16, + NFIT_ARS_STATUS_NONE = 2 << 16, + NFIT_ARS_STATUS_INTR = 3 << 16, + NFIT_ARS_START_BUSY = 6, + NFIT_ARS_CAP_NONE = 1, + NFIT_ARS_F_OVERFLOW = 1, + NFIT_ARS_TIMEOUT = 90, }; struct nfit_spa { struct acpi_nfit_system_address *spa; struct list_head list; - int is_registered; + struct nd_region *nd_region; + unsigned int ars_done:1; + u32 clear_err_unit; + u32 max_ars; }; struct nfit_dcr { @@ -110,6 +128,10 @@ struct acpi_nfit_desc { struct list_head idts; struct nvdimm_bus *nvdimm_bus; struct device *dev; + struct nd_cmd_ars_status *ars_status; + size_t ars_status_size; + struct work_struct work; + unsigned int cancel:1; unsigned long dimm_dsm_force_en; unsigned long bus_dsm_force_en; int (*blk_do_io)(struct nd_blk_region *ndbr, resource_size_t dpa, @@ -182,5 +204,5 @@ static inline struct acpi_nfit_desc *to_acpi_desc( const u8 *to_nfit_uuid(enum nfit_uuids id); int acpi_nfit_init(struct acpi_nfit_desc *nfit, acpi_size sz); -extern const struct attribute_group *acpi_nfit_attribute_groups[]; +void acpi_nfit_desc_init(struct acpi_nfit_desc *acpi_desc, struct device *dev); #endif /* __NFIT_H__ */ diff --git a/drivers/nvdimm/blk.c b/drivers/nvdimm/blk.c index 91a336ea8c4f..e9ff9229d942 100644 --- a/drivers/nvdimm/blk.c +++ b/drivers/nvdimm/blk.c @@ -31,8 +31,6 @@ struct nd_blk_device { u32 internal_lbasize; }; -static int nd_blk_major; - static u32 nd_blk_meta_size(struct nd_blk_device *blk_dev) { return blk_dev->nsblk->lbasize - blk_dev->sector_size; @@ -264,7 +262,6 @@ static int nd_blk_attach_disk(struct nd_namespace_common *ndns, } disk->driverfs_dev = &ndns->dev; - disk->major = nd_blk_major; disk->first_minor = 0; disk->fops = &nd_blk_fops; disk->private_data = blk_dev; @@ -358,25 +355,12 @@ static struct nd_device_driver nd_blk_driver = { static int __init nd_blk_init(void) { - int rc; - - rc = register_blkdev(0, "nd_blk"); - if (rc < 0) - return rc; - - nd_blk_major = rc; - rc = nd_driver_register(&nd_blk_driver); - - if (rc < 0) - unregister_blkdev(nd_blk_major, "nd_blk"); - - return rc; + return nd_driver_register(&nd_blk_driver); } static void __exit nd_blk_exit(void) { driver_unregister(&nd_blk_driver.drv); - unregister_blkdev(nd_blk_major, "nd_blk"); } MODULE_AUTHOR("Ross Zwisler <ross.zwisler@linux.intel.com>"); diff --git a/drivers/nvdimm/btt.c b/drivers/nvdimm/btt.c index efb2c1ceef98..c32cbb593600 100644 --- a/drivers/nvdimm/btt.c +++ b/drivers/nvdimm/btt.c @@ -31,8 +31,6 @@ enum log_ent_request { LOG_OLD_ENT }; -static int btt_major; - static int arena_read_bytes(struct arena_info *arena, resource_size_t offset, void *buf, size_t n) { @@ -1246,7 +1244,6 @@ static int btt_blk_init(struct btt *btt) nvdimm_namespace_disk_name(ndns, btt->btt_disk->disk_name); btt->btt_disk->driverfs_dev = &btt->nd_btt->dev; - btt->btt_disk->major = btt_major; btt->btt_disk->first_minor = 0; btt->btt_disk->fops = &btt_fops; btt->btt_disk->private_data = btt; @@ -1423,22 +1420,11 @@ EXPORT_SYMBOL(nvdimm_namespace_detach_btt); static int __init nd_btt_init(void) { - int rc; - - btt_major = register_blkdev(0, "btt"); - if (btt_major < 0) - return btt_major; + int rc = 0; debugfs_root = debugfs_create_dir("btt", NULL); - if (IS_ERR_OR_NULL(debugfs_root)) { + if (IS_ERR_OR_NULL(debugfs_root)) rc = -ENXIO; - goto err_debugfs; - } - - return 0; - - err_debugfs: - unregister_blkdev(btt_major, "btt"); return rc; } @@ -1446,7 +1432,6 @@ static int __init nd_btt_init(void) static void __exit nd_btt_exit(void) { debugfs_remove_recursive(debugfs_root); - unregister_blkdev(btt_major, "btt"); } MODULE_ALIAS_ND_DEVICE(ND_DEVICE_BTT); diff --git a/drivers/nvdimm/bus.c b/drivers/nvdimm/bus.c index 5d28e9405f32..33557481d452 100644 --- a/drivers/nvdimm/bus.c +++ b/drivers/nvdimm/bus.c @@ -133,6 +133,78 @@ static int nvdimm_bus_remove(struct device *dev) return rc; } +void nd_device_notify(struct device *dev, enum nvdimm_event event) +{ + device_lock(dev); + if (dev->driver) { + struct nd_device_driver *nd_drv; + + nd_drv = to_nd_device_driver(dev->driver); + if (nd_drv->notify) + nd_drv->notify(dev, event); + } + device_unlock(dev); +} +EXPORT_SYMBOL(nd_device_notify); + +void nvdimm_region_notify(struct nd_region *nd_region, enum nvdimm_event event) +{ + struct nvdimm_bus *nvdimm_bus = walk_to_nvdimm_bus(&nd_region->dev); + + if (!nvdimm_bus) + return; + + /* caller is responsible for holding a reference on the device */ + nd_device_notify(&nd_region->dev, event); +} +EXPORT_SYMBOL_GPL(nvdimm_region_notify); + +long nvdimm_clear_poison(struct device *dev, phys_addr_t phys, + unsigned int len) +{ + struct nvdimm_bus *nvdimm_bus = walk_to_nvdimm_bus(dev); + struct nvdimm_bus_descriptor *nd_desc; + struct nd_cmd_clear_error clear_err; + struct nd_cmd_ars_cap ars_cap; + u32 clear_err_unit, mask; + int cmd_rc, rc; + + if (!nvdimm_bus) + return -ENXIO; + + nd_desc = nvdimm_bus->nd_desc; + if (!nd_desc->ndctl) + return -ENXIO; + + memset(&ars_cap, 0, sizeof(ars_cap)); + ars_cap.address = phys; + ars_cap.length = len; + rc = nd_desc->ndctl(nd_desc, NULL, ND_CMD_ARS_CAP, &ars_cap, + sizeof(ars_cap), &cmd_rc); + if (rc < 0) + return rc; + if (cmd_rc < 0) + return cmd_rc; + clear_err_unit = ars_cap.clear_err_unit; + if (!clear_err_unit || !is_power_of_2(clear_err_unit)) + return -ENXIO; + + mask = clear_err_unit - 1; + if ((phys | len) & mask) + return -ENXIO; + memset(&clear_err, 0, sizeof(clear_err)); + clear_err.address = phys; + clear_err.length = len; + rc = nd_desc->ndctl(nd_desc, NULL, ND_CMD_CLEAR_ERROR, &clear_err, + sizeof(clear_err), &cmd_rc); + if (rc < 0) + return rc; + if (cmd_rc < 0) + return cmd_rc; + return clear_err.cleared; +} +EXPORT_SYMBOL_GPL(nvdimm_clear_poison); + static struct bus_type nvdimm_bus_type = { .name = "nd", .uevent = nvdimm_bus_uevent, @@ -395,6 +467,12 @@ static const struct nd_cmd_desc __nd_cmd_bus_descs[] = { .out_num = 3, .out_sizes = { 4, 4, UINT_MAX, }, }, + [ND_CMD_CLEAR_ERROR] = { + .in_num = 2, + .in_sizes = { 8, 8, }, + .out_num = 3, + .out_sizes = { 4, 4, 8, }, + }, }; const struct nd_cmd_desc *nd_cmd_bus_desc(int cmd) @@ -463,17 +541,37 @@ void wait_nvdimm_bus_probe_idle(struct device *dev) } while (true); } +static int pmem_active(struct device *dev, void *data) +{ + if (is_nd_pmem(dev) && dev->driver) + return -EBUSY; + return 0; +} + /* set_config requires an idle interleave set */ -static int nd_cmd_clear_to_send(struct nvdimm *nvdimm, unsigned int cmd) +static int nd_cmd_clear_to_send(struct nvdimm_bus *nvdimm_bus, + struct nvdimm *nvdimm, unsigned int cmd) { - struct nvdimm_bus *nvdimm_bus; + struct nvdimm_bus_descriptor *nd_desc = nvdimm_bus->nd_desc; + + /* ask the bus provider if it would like to block this request */ + if (nd_desc->clear_to_send) { + int rc = nd_desc->clear_to_send(nd_desc, nvdimm, cmd); + + if (rc) + return rc; + } + + /* require clear error to go through the pmem driver */ + if (!nvdimm && cmd == ND_CMD_CLEAR_ERROR) + return device_for_each_child(&nvdimm_bus->dev, NULL, + pmem_active); if (!nvdimm || cmd != ND_CMD_SET_CONFIG_DATA) return 0; - nvdimm_bus = walk_to_nvdimm_bus(&nvdimm->dev); + /* prevent label manipulation while the kernel owns label updates */ wait_nvdimm_bus_probe_idle(&nvdimm_bus->dev); - if (atomic_read(&nvdimm->busy)) return -EBUSY; return 0; @@ -513,10 +611,11 @@ static int __nd_ioctl(struct nvdimm_bus *nvdimm_bus, struct nvdimm *nvdimm, /* fail write commands (when read-only) */ if (read_only) - switch (ioctl_cmd) { - case ND_IOCTL_VENDOR: - case ND_IOCTL_SET_CONFIG_DATA: - case ND_IOCTL_ARS_START: + switch (cmd) { + case ND_CMD_VENDOR: + case ND_CMD_SET_CONFIG_DATA: + case ND_CMD_ARS_START: + case ND_CMD_CLEAR_ERROR: dev_dbg(&nvdimm_bus->dev, "'%s' command while read-only.\n", nvdimm ? nvdimm_cmd_name(cmd) : nvdimm_bus_cmd_name(cmd)); @@ -583,11 +682,11 @@ static int __nd_ioctl(struct nvdimm_bus *nvdimm_bus, struct nvdimm *nvdimm, } nvdimm_bus_lock(&nvdimm_bus->dev); - rc = nd_cmd_clear_to_send(nvdimm, cmd); + rc = nd_cmd_clear_to_send(nvdimm_bus, nvdimm, cmd); if (rc) goto out_unlock; - rc = nd_desc->ndctl(nd_desc, nvdimm, cmd, buf, buf_len); + rc = nd_desc->ndctl(nd_desc, nvdimm, cmd, buf, buf_len, NULL); if (rc < 0) goto out_unlock; if (copy_to_user(p, buf, buf_len)) @@ -602,14 +701,14 @@ static int __nd_ioctl(struct nvdimm_bus *nvdimm_bus, struct nvdimm *nvdimm, static long nd_ioctl(struct file *file, unsigned int cmd, unsigned long arg) { long id = (long) file->private_data; - int rc = -ENXIO, read_only; + int rc = -ENXIO, ro; struct nvdimm_bus *nvdimm_bus; - read_only = (O_RDWR != (file->f_flags & O_ACCMODE)); + ro = ((file->f_flags & O_ACCMODE) == O_RDONLY); mutex_lock(&nvdimm_bus_list_mutex); list_for_each_entry(nvdimm_bus, &nvdimm_bus_list, list) { if (nvdimm_bus->id == id) { - rc = __nd_ioctl(nvdimm_bus, NULL, read_only, cmd, arg); + rc = __nd_ioctl(nvdimm_bus, NULL, ro, cmd, arg); break; } } @@ -633,10 +732,10 @@ static int match_dimm(struct device *dev, void *data) static long nvdimm_ioctl(struct file *file, unsigned int cmd, unsigned long arg) { - int rc = -ENXIO, read_only; + int rc = -ENXIO, ro; struct nvdimm_bus *nvdimm_bus; - read_only = (O_RDWR != (file->f_flags & O_ACCMODE)); + ro = ((file->f_flags & O_ACCMODE) == O_RDONLY); mutex_lock(&nvdimm_bus_list_mutex); list_for_each_entry(nvdimm_bus, &nvdimm_bus_list, list) { struct device *dev = device_find_child(&nvdimm_bus->dev, @@ -647,7 +746,7 @@ static long nvdimm_ioctl(struct file *file, unsigned int cmd, unsigned long arg) continue; nvdimm = to_nvdimm(dev); - rc = __nd_ioctl(nvdimm_bus, nvdimm, read_only, cmd, arg); + rc = __nd_ioctl(nvdimm_bus, nvdimm, ro, cmd, arg); put_device(dev); break; } diff --git a/drivers/nvdimm/core.c b/drivers/nvdimm/core.c index 2e2832b83c93..79646d0c3277 100644 --- a/drivers/nvdimm/core.c +++ b/drivers/nvdimm/core.c @@ -298,6 +298,15 @@ static int flush_regions_dimms(struct device *dev, void *data) static ssize_t wait_probe_show(struct device *dev, struct device_attribute *attr, char *buf) { + struct nvdimm_bus *nvdimm_bus = to_nvdimm_bus(dev); + struct nvdimm_bus_descriptor *nd_desc = nvdimm_bus->nd_desc; + int rc; + + if (nd_desc->flush_probe) { + rc = nd_desc->flush_probe(nd_desc); + if (rc) + return rc; + } nd_synchronize(); device_for_each_child(dev, NULL, flush_regions_dimms); return sprintf(buf, "1\n"); @@ -408,33 +417,11 @@ static void __add_badblock_range(struct badblocks *bb, u64 ns_offset, u64 len) set_badblock(bb, start_sector, num_sectors); } -/** - * nvdimm_namespace_add_poison() - Convert a list of poison ranges to badblocks - * @ndns: the namespace containing poison ranges - * @bb: badblocks instance to populate - * @offset: offset at the start of the namespace before 'sector 0' - * - * The poison list generated during NFIT initialization may contain multiple, - * possibly overlapping ranges in the SPA (System Physical Address) space. - * Compare each of these ranges to the namespace currently being initialized, - * and add badblocks to the gendisk for all matching sub-ranges - */ -void nvdimm_namespace_add_poison(struct nd_namespace_common *ndns, - struct badblocks *bb, resource_size_t offset) +static void namespace_add_poison(struct list_head *poison_list, + struct badblocks *bb, struct resource *res) { - struct nd_namespace_io *nsio = to_nd_namespace_io(&ndns->dev); - struct nd_region *nd_region = to_nd_region(ndns->dev.parent); - struct nvdimm_bus *nvdimm_bus; - struct list_head *poison_list; - u64 ns_start, ns_end, ns_size; struct nd_poison *pl; - ns_size = nvdimm_namespace_capacity(ndns) - offset; - ns_start = nsio->res.start + offset; - ns_end = nsio->res.end; - - nvdimm_bus = to_nvdimm_bus(nd_region->dev.parent); - poison_list = &nvdimm_bus->poison_list; if (list_empty(poison_list)) return; @@ -442,37 +429,69 @@ void nvdimm_namespace_add_poison(struct nd_namespace_common *ndns, u64 pl_end = pl->start + pl->length - 1; /* Discard intervals with no intersection */ - if (pl_end < ns_start) + if (pl_end < res->start) continue; - if (pl->start > ns_end) + if (pl->start > res->end) continue; /* Deal with any overlap after start of the namespace */ - if (pl->start >= ns_start) { + if (pl->start >= res->start) { u64 start = pl->start; u64 len; - if (pl_end <= ns_end) + if (pl_end <= res->end) len = pl->length; else - len = ns_start + ns_size - pl->start; - __add_badblock_range(bb, start - ns_start, len); + len = res->start + resource_size(res) + - pl->start; + __add_badblock_range(bb, start - res->start, len); continue; } /* Deal with overlap for poison starting before the namespace */ - if (pl->start < ns_start) { + if (pl->start < res->start) { u64 len; - if (pl_end < ns_end) - len = pl->start + pl->length - ns_start; + if (pl_end < res->end) + len = pl->start + pl->length - res->start; else - len = ns_size; + len = resource_size(res); __add_badblock_range(bb, 0, len); } } } + +/** + * nvdimm_namespace_add_poison() - Convert a list of poison ranges to badblocks + * @ndns: the namespace containing poison ranges + * @bb: badblocks instance to populate + * @offset: offset at the start of the namespace before 'sector 0' + * + * The poison list generated during NFIT initialization may contain multiple, + * possibly overlapping ranges in the SPA (System Physical Address) space. + * Compare each of these ranges to the namespace currently being initialized, + * and add badblocks to the gendisk for all matching sub-ranges + */ +void nvdimm_namespace_add_poison(struct nd_namespace_common *ndns, + struct badblocks *bb, resource_size_t offset) +{ + struct nd_namespace_io *nsio = to_nd_namespace_io(&ndns->dev); + struct nd_region *nd_region = to_nd_region(ndns->dev.parent); + struct nvdimm_bus *nvdimm_bus; + struct list_head *poison_list; + struct resource res = { + .start = nsio->res.start + offset, + .end = nsio->res.end, + }; + + nvdimm_bus = to_nvdimm_bus(nd_region->dev.parent); + poison_list = &nvdimm_bus->poison_list; + + nvdimm_bus_lock(&nvdimm_bus->dev); + namespace_add_poison(poison_list, bb, &res); + nvdimm_bus_unlock(&nvdimm_bus->dev); +} EXPORT_SYMBOL_GPL(nvdimm_namespace_add_poison); -static int __add_poison(struct nvdimm_bus *nvdimm_bus, u64 addr, u64 length) +static int add_poison(struct nvdimm_bus *nvdimm_bus, u64 addr, u64 length) { struct nd_poison *pl; @@ -487,12 +506,12 @@ static int __add_poison(struct nvdimm_bus *nvdimm_bus, u64 addr, u64 length) return 0; } -int nvdimm_bus_add_poison(struct nvdimm_bus *nvdimm_bus, u64 addr, u64 length) +static int bus_add_poison(struct nvdimm_bus *nvdimm_bus, u64 addr, u64 length) { struct nd_poison *pl; if (list_empty(&nvdimm_bus->poison_list)) - return __add_poison(nvdimm_bus, addr, length); + return add_poison(nvdimm_bus, addr, length); /* * There is a chance this is a duplicate, check for those first. @@ -512,7 +531,18 @@ int nvdimm_bus_add_poison(struct nvdimm_bus *nvdimm_bus, u64 addr, u64 length) * as any overlapping ranges will get resolved when the list is consumed * and converted to badblocks */ - return __add_poison(nvdimm_bus, addr, length); + return add_poison(nvdimm_bus, addr, length); +} + +int nvdimm_bus_add_poison(struct nvdimm_bus *nvdimm_bus, u64 addr, u64 length) +{ + int rc; + + nvdimm_bus_lock(&nvdimm_bus->dev); + rc = bus_add_poison(nvdimm_bus, addr, length); + nvdimm_bus_unlock(&nvdimm_bus->dev); + + return rc; } EXPORT_SYMBOL_GPL(nvdimm_bus_add_poison); @@ -553,7 +583,11 @@ void nvdimm_bus_unregister(struct nvdimm_bus *nvdimm_bus) nd_synchronize(); device_for_each_child(&nvdimm_bus->dev, NULL, child_unregister); + + nvdimm_bus_lock(&nvdimm_bus->dev); free_poison_list(&nvdimm_bus->poison_list); + nvdimm_bus_unlock(&nvdimm_bus->dev); + nvdimm_bus_destroy_ndctl(nvdimm_bus); device_unregister(&nvdimm_bus->dev); diff --git a/drivers/nvdimm/dimm_devs.c b/drivers/nvdimm/dimm_devs.c index 651b8d19d324..c56f88217924 100644 --- a/drivers/nvdimm/dimm_devs.c +++ b/drivers/nvdimm/dimm_devs.c @@ -75,7 +75,7 @@ int nvdimm_init_nsarea(struct nvdimm_drvdata *ndd) memset(cmd, 0, sizeof(*cmd)); nd_desc = nvdimm_bus->nd_desc; return nd_desc->ndctl(nd_desc, to_nvdimm(ndd->dev), - ND_CMD_GET_CONFIG_SIZE, cmd, sizeof(*cmd)); + ND_CMD_GET_CONFIG_SIZE, cmd, sizeof(*cmd), NULL); } int nvdimm_init_config_data(struct nvdimm_drvdata *ndd) @@ -120,7 +120,7 @@ int nvdimm_init_config_data(struct nvdimm_drvdata *ndd) cmd->in_offset = offset; rc = nd_desc->ndctl(nd_desc, to_nvdimm(ndd->dev), ND_CMD_GET_CONFIG_DATA, cmd, - cmd->in_length + sizeof(*cmd)); + cmd->in_length + sizeof(*cmd), NULL); if (rc || cmd->status) { rc = -ENXIO; break; @@ -171,7 +171,7 @@ int nvdimm_set_config_data(struct nvdimm_drvdata *ndd, size_t offset, status = ((void *) cmd) + cmd_size - sizeof(u32); rc = nd_desc->ndctl(nd_desc, to_nvdimm(ndd->dev), - ND_CMD_SET_CONFIG_DATA, cmd, cmd_size); + ND_CMD_SET_CONFIG_DATA, cmd, cmd_size, NULL); if (rc || *status) { rc = rc ? rc : -ENXIO; break; diff --git a/drivers/nvdimm/namespace_devs.c b/drivers/nvdimm/namespace_devs.c index 9edf7eb7d17c..f5cb88601359 100644 --- a/drivers/nvdimm/namespace_devs.c +++ b/drivers/nvdimm/namespace_devs.c @@ -133,6 +133,7 @@ bool nd_is_uuid_unique(struct device *dev, u8 *uuid) bool pmem_should_map_pages(struct device *dev) { struct nd_region *nd_region = to_nd_region(dev->parent); + struct nd_namespace_io *nsio; if (!IS_ENABLED(CONFIG_ZONE_DEVICE)) return false; @@ -143,6 +144,12 @@ bool pmem_should_map_pages(struct device *dev) if (is_nd_pfn(dev) || is_nd_btt(dev)) return false; + nsio = to_nd_namespace_io(dev); + if (region_intersects(nsio->res.start, resource_size(&nsio->res), + IORESOURCE_SYSTEM_RAM, + IORES_DESC_NONE) == REGION_MIXED) + return false; + #ifdef ARCH_MEMREMAP_PMEM return ARCH_MEMREMAP_PMEM == MEMREMAP_WB; #else diff --git a/drivers/nvdimm/nd.h b/drivers/nvdimm/nd.h index ba1633b9da31..1799bd97a9ce 100644 --- a/drivers/nvdimm/nd.h +++ b/drivers/nvdimm/nd.h @@ -18,6 +18,7 @@ #include <linux/mutex.h> #include <linux/ndctl.h> #include <linux/types.h> +#include <linux/nd.h> #include "label.h" enum { @@ -168,6 +169,7 @@ int nd_integrity_init(struct gendisk *disk, unsigned long meta_size); void wait_nvdimm_bus_probe_idle(struct device *dev); void nd_device_register(struct device *dev); void nd_device_unregister(struct device *dev, enum nd_async_mode mode); +void nd_device_notify(struct device *dev, enum nvdimm_event event); int nd_uuid_store(struct device *dev, u8 **uuid_out, const char *buf, size_t len); ssize_t nd_sector_size_show(unsigned long current_lbasize, @@ -184,6 +186,8 @@ int nvdimm_init_nsarea(struct nvdimm_drvdata *ndd); int nvdimm_init_config_data(struct nvdimm_drvdata *ndd); int nvdimm_set_config_data(struct nvdimm_drvdata *ndd, size_t offset, void *buf, size_t len); +long nvdimm_clear_poison(struct device *dev, phys_addr_t phys, + unsigned int len); struct nd_btt *to_nd_btt(struct device *dev); struct nd_gen_sb { diff --git a/drivers/nvdimm/pfn.h b/drivers/nvdimm/pfn.h index cc243754acef..8e343a3ca873 100644 --- a/drivers/nvdimm/pfn.h +++ b/drivers/nvdimm/pfn.h @@ -15,6 +15,7 @@ #define __NVDIMM_PFN_H #include <linux/types.h> +#include <linux/mmzone.h> #define PFN_SIG_LEN 16 #define PFN_SIG "NVDIMM_PFN_INFO\0" @@ -26,10 +27,28 @@ struct nd_pfn_sb { __le32 flags; __le16 version_major; __le16 version_minor; - __le64 dataoff; + __le64 dataoff; /* relative to namespace_base + start_pad */ __le64 npfns; __le32 mode; - u8 padding[4012]; + /* minor-version-1 additions for section alignment */ + __le32 start_pad; + __le32 end_trunc; + u8 padding[4004]; __le64 checksum; }; + +#ifdef CONFIG_SPARSEMEM +#define PFN_SECTION_ALIGN_DOWN(x) SECTION_ALIGN_DOWN(x) +#define PFN_SECTION_ALIGN_UP(x) SECTION_ALIGN_UP(x) +#else +/* + * In this case ZONE_DEVICE=n and we will disable 'pfn' device support, + * but we still want pmem to compile. + */ +#define PFN_SECTION_ALIGN_DOWN(x) (x) +#define PFN_SECTION_ALIGN_UP(x) (x) +#endif + +#define PHYS_SECTION_ALIGN_DOWN(x) PFN_PHYS(PFN_SECTION_ALIGN_DOWN(PHYS_PFN(x))) +#define PHYS_SECTION_ALIGN_UP(x) PFN_PHYS(PFN_SECTION_ALIGN_UP(PHYS_PFN(x))) #endif /* __NVDIMM_PFN_H */ diff --git a/drivers/nvdimm/pfn_devs.c b/drivers/nvdimm/pfn_devs.c index ae81a2f1da50..254d3bc13f70 100644 --- a/drivers/nvdimm/pfn_devs.c +++ b/drivers/nvdimm/pfn_devs.c @@ -205,11 +205,67 @@ static ssize_t namespace_store(struct device *dev, } static DEVICE_ATTR_RW(namespace); +static ssize_t resource_show(struct device *dev, + struct device_attribute *attr, char *buf) +{ + struct nd_pfn *nd_pfn = to_nd_pfn(dev); + ssize_t rc; + + device_lock(dev); + if (dev->driver) { + struct nd_pfn_sb *pfn_sb = nd_pfn->pfn_sb; + u64 offset = __le64_to_cpu(pfn_sb->dataoff); + struct nd_namespace_common *ndns = nd_pfn->ndns; + u32 start_pad = __le32_to_cpu(pfn_sb->start_pad); + struct nd_namespace_io *nsio = to_nd_namespace_io(&ndns->dev); + + rc = sprintf(buf, "%#llx\n", (unsigned long long) nsio->res.start + + start_pad + offset); + } else { + /* no address to convey if the pfn instance is disabled */ + rc = -ENXIO; + } + device_unlock(dev); + + return rc; +} +static DEVICE_ATTR_RO(resource); + +static ssize_t size_show(struct device *dev, + struct device_attribute *attr, char *buf) +{ + struct nd_pfn *nd_pfn = to_nd_pfn(dev); + ssize_t rc; + + device_lock(dev); + if (dev->driver) { + struct nd_pfn_sb *pfn_sb = nd_pfn->pfn_sb; + u64 offset = __le64_to_cpu(pfn_sb->dataoff); + struct nd_namespace_common *ndns = nd_pfn->ndns; + u32 start_pad = __le32_to_cpu(pfn_sb->start_pad); + u32 end_trunc = __le32_to_cpu(pfn_sb->end_trunc); + struct nd_namespace_io *nsio = to_nd_namespace_io(&ndns->dev); + + rc = sprintf(buf, "%llu\n", (unsigned long long) + resource_size(&nsio->res) - start_pad + - end_trunc - offset); + } else { + /* no size to convey if the pfn instance is disabled */ + rc = -ENXIO; + } + device_unlock(dev); + + return rc; +} +static DEVICE_ATTR_RO(size); + static struct attribute *nd_pfn_attributes[] = { &dev_attr_mode.attr, &dev_attr_namespace.attr, &dev_attr_uuid.attr, &dev_attr_align.attr, + &dev_attr_resource.attr, + &dev_attr_size.attr, NULL, }; @@ -299,6 +355,11 @@ int nd_pfn_validate(struct nd_pfn *nd_pfn) if (memcmp(pfn_sb->parent_uuid, parent_uuid, 16) != 0) return -ENODEV; + if (__le16_to_cpu(pfn_sb->version_minor) < 1) { + pfn_sb->start_pad = 0; + pfn_sb->end_trunc = 0; + } + switch (le32_to_cpu(pfn_sb->mode)) { case PFN_MODE_RAM: case PFN_MODE_PMEM: diff --git a/drivers/nvdimm/pmem.c b/drivers/nvdimm/pmem.c index 8d0b54670184..ca5721c306bb 100644 --- a/drivers/nvdimm/pmem.c +++ b/drivers/nvdimm/pmem.c @@ -43,12 +43,13 @@ struct pmem_device { phys_addr_t data_offset; u64 pfn_flags; void __pmem *virt_addr; + /* immutable base size of the namespace */ size_t size; + /* trim size when namespace capacity has been section aligned */ + u32 pfn_pad; struct badblocks bb; }; -static int pmem_major; - static bool is_bad_pmem(struct badblocks *bb, sector_t sector, unsigned int len) { if (bb->count) { @@ -62,26 +63,56 @@ static bool is_bad_pmem(struct badblocks *bb, sector_t sector, unsigned int len) return false; } +static void pmem_clear_poison(struct pmem_device *pmem, phys_addr_t offset, + unsigned int len) +{ + struct device *dev = disk_to_dev(pmem->pmem_disk); + sector_t sector; + long cleared; + + sector = (offset - pmem->data_offset) / 512; + cleared = nvdimm_clear_poison(dev, pmem->phys_addr + offset, len); + + if (cleared > 0 && cleared / 512) { + dev_dbg(dev, "%s: %llx clear %ld sector%s\n", + __func__, (unsigned long long) sector, + cleared / 512, cleared / 512 > 1 ? "s" : ""); + badblocks_clear(&pmem->bb, sector, cleared / 512); + } + invalidate_pmem(pmem->virt_addr + offset, len); +} + static int pmem_do_bvec(struct pmem_device *pmem, struct page *page, unsigned int len, unsigned int off, int rw, sector_t sector) { + int rc = 0; + bool bad_pmem = false; void *mem = kmap_atomic(page); phys_addr_t pmem_off = sector * 512 + pmem->data_offset; void __pmem *pmem_addr = pmem->virt_addr + pmem_off; + if (unlikely(is_bad_pmem(&pmem->bb, sector, len))) + bad_pmem = true; + if (rw == READ) { - if (unlikely(is_bad_pmem(&pmem->bb, sector, len))) - return -EIO; - memcpy_from_pmem(mem + off, pmem_addr, len); - flush_dcache_page(page); + if (unlikely(bad_pmem)) + rc = -EIO; + else { + memcpy_from_pmem(mem + off, pmem_addr, len); + flush_dcache_page(page); + } } else { flush_dcache_page(page); memcpy_to_pmem(pmem_addr, mem + off, len); + if (unlikely(bad_pmem)) { + pmem_clear_poison(pmem, pmem_off, len); + memcpy_to_pmem(pmem_addr, mem + off, len); + } } kunmap_atomic(mem); - return 0; + return rc; } static blk_qc_t pmem_make_request(struct request_queue *q, struct bio *bio) @@ -145,7 +176,7 @@ static long pmem_direct_access(struct block_device *bdev, sector_t sector, *kaddr = pmem->virt_addr + offset; *pfn = phys_to_pfn_t(pmem->phys_addr + offset, pmem->pfn_flags); - return pmem->size - offset; + return pmem->size - pmem->pfn_pad - offset; } static const struct block_device_operations pmem_fops = { @@ -228,15 +259,14 @@ static int pmem_attach_disk(struct device *dev, return -ENOMEM; } - disk->major = pmem_major; - disk->first_minor = 0; disk->fops = &pmem_fops; disk->private_data = pmem; disk->queue = pmem->pmem_queue; disk->flags = GENHD_FL_EXT_DEVT; nvdimm_namespace_disk_name(ndns, disk->disk_name); disk->driverfs_dev = dev; - set_capacity(disk, (pmem->size - pmem->data_offset) / 512); + set_capacity(disk, (pmem->size - pmem->pfn_pad - pmem->data_offset) + / 512); pmem->pmem_disk = disk; devm_exit_badblocks(dev, &pmem->bb); if (devm_init_badblocks(dev, &pmem->bb)) @@ -279,6 +309,9 @@ static int nd_pfn_init(struct nd_pfn *nd_pfn) struct nd_pfn_sb *pfn_sb = kzalloc(sizeof(*pfn_sb), GFP_KERNEL); struct pmem_device *pmem = dev_get_drvdata(&nd_pfn->dev); struct nd_namespace_common *ndns = nd_pfn->ndns; + u32 start_pad = 0, end_trunc = 0; + resource_size_t start, size; + struct nd_namespace_io *nsio; struct nd_region *nd_region; unsigned long npfns; phys_addr_t offset; @@ -304,21 +337,56 @@ static int nd_pfn_init(struct nd_pfn *nd_pfn) } memset(pfn_sb, 0, sizeof(*pfn_sb)); - npfns = (pmem->size - SZ_8K) / SZ_4K; + + /* + * Check if pmem collides with 'System RAM' when section aligned and + * trim it accordingly + */ + nsio = to_nd_namespace_io(&ndns->dev); + start = PHYS_SECTION_ALIGN_DOWN(nsio->res.start); + size = resource_size(&nsio->res); + if (region_intersects(start, size, IORESOURCE_SYSTEM_RAM, + IORES_DESC_NONE) == REGION_MIXED) { + + start = nsio->res.start; + start_pad = PHYS_SECTION_ALIGN_UP(start) - start; + } + + start = nsio->res.start; + size = PHYS_SECTION_ALIGN_UP(start + size) - start; + if (region_intersects(start, size, IORESOURCE_SYSTEM_RAM, + IORES_DESC_NONE) == REGION_MIXED) { + size = resource_size(&nsio->res); + end_trunc = start + size - PHYS_SECTION_ALIGN_DOWN(start + size); + } + + if (start_pad + end_trunc) + dev_info(&nd_pfn->dev, "%s section collision, truncate %d bytes\n", + dev_name(&ndns->dev), start_pad + end_trunc); + /* * Note, we use 64 here for the standard size of struct page, * debugging options may cause it to be larger in which case the * implementation will limit the pfns advertised through * ->direct_access() to those that are included in the memmap. */ + start += start_pad; + npfns = (pmem->size - start_pad - end_trunc - SZ_8K) / SZ_4K; if (nd_pfn->mode == PFN_MODE_PMEM) - offset = ALIGN(SZ_8K + 64 * npfns, nd_pfn->align); + offset = ALIGN(start + SZ_8K + 64 * npfns, nd_pfn->align) + - start; else if (nd_pfn->mode == PFN_MODE_RAM) - offset = ALIGN(SZ_8K, nd_pfn->align); + offset = ALIGN(start + SZ_8K, nd_pfn->align) - start; else goto err; - npfns = (pmem->size - offset) / SZ_4K; + if (offset + start_pad + end_trunc >= pmem->size) { + dev_err(&nd_pfn->dev, "%s unable to satisfy requested alignment\n", + dev_name(&ndns->dev)); + goto err; + } + + npfns = (pmem->size - offset - start_pad - end_trunc) / SZ_4K; pfn_sb->mode = cpu_to_le32(nd_pfn->mode); pfn_sb->dataoff = cpu_to_le64(offset); pfn_sb->npfns = cpu_to_le64(npfns); @@ -326,6 +394,9 @@ static int nd_pfn_init(struct nd_pfn *nd_pfn) memcpy(pfn_sb->uuid, nd_pfn->uuid, 16); memcpy(pfn_sb->parent_uuid, nd_dev_to_uuid(&ndns->dev), 16); pfn_sb->version_major = cpu_to_le16(1); + pfn_sb->version_minor = cpu_to_le16(1); + pfn_sb->start_pad = cpu_to_le32(start_pad); + pfn_sb->end_trunc = cpu_to_le32(end_trunc); checksum = nd_sb_checksum((struct nd_gen_sb *) pfn_sb); pfn_sb->checksum = cpu_to_le64(checksum); @@ -356,41 +427,56 @@ static int nvdimm_namespace_detach_pfn(struct nd_namespace_common *ndns) return 0; } -static int nvdimm_namespace_attach_pfn(struct nd_namespace_common *ndns) +/* + * We hotplug memory at section granularity, pad the reserved area from + * the previous section base to the namespace base address. + */ +static unsigned long init_altmap_base(resource_size_t base) +{ + unsigned long base_pfn = PHYS_PFN(base); + + return PFN_SECTION_ALIGN_DOWN(base_pfn); +} + +static unsigned long init_altmap_reserve(resource_size_t base) +{ + unsigned long reserve = PHYS_PFN(SZ_8K); + unsigned long base_pfn = PHYS_PFN(base); + + reserve += base_pfn - PFN_SECTION_ALIGN_DOWN(base_pfn); + return reserve; +} + +static int __nvdimm_namespace_attach_pfn(struct nd_pfn *nd_pfn) { - struct nd_namespace_io *nsio = to_nd_namespace_io(&ndns->dev); - struct nd_pfn *nd_pfn = to_nd_pfn(ndns->claim); - struct device *dev = &nd_pfn->dev; - struct nd_region *nd_region; - struct vmem_altmap *altmap; - struct nd_pfn_sb *pfn_sb; - struct pmem_device *pmem; - struct request_queue *q; - phys_addr_t offset; int rc; + struct resource res; + struct request_queue *q; + struct pmem_device *pmem; + struct vmem_altmap *altmap; + struct device *dev = &nd_pfn->dev; + struct nd_pfn_sb *pfn_sb = nd_pfn->pfn_sb; + struct nd_namespace_common *ndns = nd_pfn->ndns; + u32 start_pad = __le32_to_cpu(pfn_sb->start_pad); + u32 end_trunc = __le32_to_cpu(pfn_sb->end_trunc); + struct nd_namespace_io *nsio = to_nd_namespace_io(&ndns->dev); + resource_size_t base = nsio->res.start + start_pad; struct vmem_altmap __altmap = { - .base_pfn = __phys_to_pfn(nsio->res.start), - .reserve = __phys_to_pfn(SZ_8K), + .base_pfn = init_altmap_base(base), + .reserve = init_altmap_reserve(base), }; - if (!nd_pfn->uuid || !nd_pfn->ndns) - return -ENODEV; - - nd_region = to_nd_region(dev->parent); - rc = nd_pfn_init(nd_pfn); - if (rc) - return rc; - - pfn_sb = nd_pfn->pfn_sb; - offset = le64_to_cpu(pfn_sb->dataoff); + pmem = dev_get_drvdata(dev); + pmem->data_offset = le64_to_cpu(pfn_sb->dataoff); + pmem->pfn_pad = start_pad + end_trunc; nd_pfn->mode = le32_to_cpu(nd_pfn->pfn_sb->mode); if (nd_pfn->mode == PFN_MODE_RAM) { - if (offset < SZ_8K) + if (pmem->data_offset < SZ_8K) return -EINVAL; nd_pfn->npfns = le64_to_cpu(pfn_sb->npfns); altmap = NULL; } else if (nd_pfn->mode == PFN_MODE_PMEM) { - nd_pfn->npfns = (resource_size(&nsio->res) - offset) + nd_pfn->npfns = (pmem->size - pmem->pfn_pad - pmem->data_offset) / PAGE_SIZE; if (le64_to_cpu(nd_pfn->pfn_sb->npfns) > nd_pfn->npfns) dev_info(&nd_pfn->dev, @@ -398,7 +484,7 @@ static int nvdimm_namespace_attach_pfn(struct nd_namespace_common *ndns) le64_to_cpu(nd_pfn->pfn_sb->npfns), nd_pfn->npfns); altmap = & __altmap; - altmap->free = __phys_to_pfn(offset - SZ_8K); + altmap->free = PHYS_PFN(pmem->data_offset - SZ_8K); altmap->alloc = 0; } else { rc = -ENXIO; @@ -406,10 +492,12 @@ static int nvdimm_namespace_attach_pfn(struct nd_namespace_common *ndns) } /* establish pfn range for lookup, and switch to direct map */ - pmem = dev_get_drvdata(dev); q = pmem->pmem_queue; + memcpy(&res, &nsio->res, sizeof(res)); + res.start += start_pad; + res.end -= end_trunc; devm_memunmap(dev, (void __force *) pmem->virt_addr); - pmem->virt_addr = (void __pmem *) devm_memremap_pages(dev, &nsio->res, + pmem->virt_addr = (void __pmem *) devm_memremap_pages(dev, &res, &q->q_usage_counter, altmap); pmem->pfn_flags |= PFN_MAP; if (IS_ERR(pmem->virt_addr)) { @@ -418,7 +506,6 @@ static int nvdimm_namespace_attach_pfn(struct nd_namespace_common *ndns) } /* attach pmem disk in "pfn-mode" */ - pmem->data_offset = offset; rc = pmem_attach_disk(dev, ndns, pmem); if (rc) goto err; @@ -427,6 +514,22 @@ static int nvdimm_namespace_attach_pfn(struct nd_namespace_common *ndns) err: nvdimm_namespace_detach_pfn(ndns); return rc; + +} + +static int nvdimm_namespace_attach_pfn(struct nd_namespace_common *ndns) +{ + struct nd_pfn *nd_pfn = to_nd_pfn(ndns->claim); + int rc; + + if (!nd_pfn->uuid || !nd_pfn->ndns) + return -ENODEV; + + rc = nd_pfn_init(nd_pfn); + if (rc) + return rc; + /* we need a valid pfn_sb before we can init a vmem_altmap */ + return __nvdimm_namespace_attach_pfn(nd_pfn); } static int nd_pmem_probe(struct device *dev) @@ -488,12 +591,27 @@ static int nd_pmem_remove(struct device *dev) return 0; } +static void nd_pmem_notify(struct device *dev, enum nvdimm_event event) +{ + struct pmem_device *pmem = dev_get_drvdata(dev); + struct nd_namespace_common *ndns = pmem->ndns; + + if (event != NVDIMM_REVALIDATE_POISON) + return; + + if (is_nd_btt(dev)) + nvdimm_namespace_add_poison(ndns, &pmem->bb, 0); + else + nvdimm_namespace_add_poison(ndns, &pmem->bb, pmem->data_offset); +} + MODULE_ALIAS("pmem"); MODULE_ALIAS_ND_DEVICE(ND_DEVICE_NAMESPACE_IO); MODULE_ALIAS_ND_DEVICE(ND_DEVICE_NAMESPACE_PMEM); static struct nd_device_driver nd_pmem_driver = { .probe = nd_pmem_probe, .remove = nd_pmem_remove, + .notify = nd_pmem_notify, .drv = { .name = "nd_pmem", }, @@ -502,26 +620,13 @@ static struct nd_device_driver nd_pmem_driver = { static int __init pmem_init(void) { - int error; - - pmem_major = register_blkdev(0, "pmem"); - if (pmem_major < 0) - return pmem_major; - - error = nd_driver_register(&nd_pmem_driver); - if (error) { - unregister_blkdev(pmem_major, "pmem"); - return error; - } - - return 0; + return nd_driver_register(&nd_pmem_driver); } module_init(pmem_init); static void pmem_exit(void) { driver_unregister(&nd_pmem_driver.drv); - unregister_blkdev(pmem_major, "pmem"); } module_exit(pmem_exit); diff --git a/drivers/nvdimm/region.c b/drivers/nvdimm/region.c index 7da63eac78ee..4b7715e29cff 100644 --- a/drivers/nvdimm/region.c +++ b/drivers/nvdimm/region.c @@ -93,9 +93,21 @@ static int nd_region_remove(struct device *dev) return 0; } +static int child_notify(struct device *dev, void *data) +{ + nd_device_notify(dev, *(enum nvdimm_event *) data); + return 0; +} + +static void nd_region_notify(struct device *dev, enum nvdimm_event event) +{ + device_for_each_child(dev, &event, child_notify); +} + static struct nd_device_driver nd_region_driver = { .probe = nd_region_probe, .remove = nd_region_remove, + .notify = nd_region_notify, .drv = { .name = "nd_region", }, diff --git a/include/linux/ioport.h b/include/linux/ioport.h index 212a8e69d7c0..0b65543dc6cf 100644 --- a/include/linux/ioport.h +++ b/include/linux/ioport.h @@ -172,6 +172,7 @@ extern void reserve_region_with_split(struct resource *root, extern struct resource *insert_resource_conflict(struct resource *parent, struct resource *new); extern int insert_resource(struct resource *parent, struct resource *new); extern void insert_resource_expand_to_fit(struct resource *root, struct resource *new); +extern int remove_resource(struct resource *old); extern void arch_remove_reservations(struct resource *avail); extern int allocate_resource(struct resource *root, struct resource *new, resource_size_t size, resource_size_t min, diff --git a/include/linux/libnvdimm.h b/include/linux/libnvdimm.h index 141ffdd59960..833867b9ddc2 100644 --- a/include/linux/libnvdimm.h +++ b/include/linux/libnvdimm.h @@ -48,7 +48,7 @@ struct nvdimm; struct nvdimm_bus_descriptor; typedef int (*ndctl_fn)(struct nvdimm_bus_descriptor *nd_desc, struct nvdimm *nvdimm, unsigned int cmd, void *buf, - unsigned int buf_len); + unsigned int buf_len, int *cmd_rc); struct nd_namespace_label; struct nvdimm_drvdata; @@ -71,6 +71,9 @@ struct nvdimm_bus_descriptor { unsigned long dsm_mask; char *provider_name; ndctl_fn ndctl; + int (*flush_probe)(struct nvdimm_bus_descriptor *nd_desc); + int (*clear_to_send)(struct nvdimm_bus_descriptor *nd_desc, + struct nvdimm *nvdimm, unsigned int cmd); }; struct nd_cmd_desc { diff --git a/include/linux/nd.h b/include/linux/nd.h index 507e47c86737..5489ab756d1a 100644 --- a/include/linux/nd.h +++ b/include/linux/nd.h @@ -16,11 +16,16 @@ #include <linux/ndctl.h> #include <linux/device.h> +enum nvdimm_event { + NVDIMM_REVALIDATE_POISON, +}; + struct nd_device_driver { struct device_driver drv; unsigned long type; int (*probe)(struct device *dev); int (*remove)(struct device *dev); + void (*notify)(struct device *dev, enum nvdimm_event event); }; static inline struct nd_device_driver *to_nd_device_driver( @@ -144,6 +149,8 @@ static inline int nvdimm_write_bytes(struct nd_namespace_common *ndns, MODULE_ALIAS("nd:t" __stringify(type) "*") #define ND_DEVICE_MODALIAS_FMT "nd:t%d" +struct nd_region; +void nvdimm_region_notify(struct nd_region *nd_region, enum nvdimm_event event); int __must_check __nd_driver_register(struct nd_device_driver *nd_drv, struct module *module, const char *mod_name); #define nd_driver_register(driver) \ diff --git a/include/linux/pmem.h b/include/linux/pmem.h index 7c3d11a6b4ad..3ec5309e29f3 100644 --- a/include/linux/pmem.h +++ b/include/linux/pmem.h @@ -58,6 +58,11 @@ static inline void arch_wb_cache_pmem(void __pmem *addr, size_t size) { BUG(); } + +static inline void arch_invalidate_pmem(void __pmem *addr, size_t size) +{ + BUG(); +} #endif /* @@ -186,6 +191,20 @@ static inline void clear_pmem(void __pmem *addr, size_t size) } /** + * invalidate_pmem - flush a pmem range from the cache hierarchy + * @addr: virtual start address + * @size: bytes to invalidate (internally aligned to cache line size) + * + * For platforms that support clearing poison this flushes any poisoned + * ranges out of the cache + */ +static inline void invalidate_pmem(void __pmem *addr, size_t size) +{ + if (arch_has_pmem_api()) + arch_invalidate_pmem(addr, size); +} + +/** * wb_cache_pmem - write back processor cache for PMEM memory range * @addr: virtual start address * @size: number of bytes to write back diff --git a/include/uapi/linux/ndctl.h b/include/uapi/linux/ndctl.h index cc68b92124d4..7cc28ab05b87 100644 --- a/include/uapi/linux/ndctl.h +++ b/include/uapi/linux/ndctl.h @@ -98,6 +98,14 @@ struct nd_cmd_ars_status { } __packed records[0]; } __packed; +struct nd_cmd_clear_error { + __u64 address; + __u64 length; + __u32 status; + __u8 reserved[4]; + __u64 cleared; +} __packed; + enum { ND_CMD_IMPLEMENTED = 0, @@ -105,6 +113,7 @@ enum { ND_CMD_ARS_CAP = 1, ND_CMD_ARS_START = 2, ND_CMD_ARS_STATUS = 3, + ND_CMD_CLEAR_ERROR = 4, /* per-dimm commands */ ND_CMD_SMART = 1, @@ -129,6 +138,7 @@ static inline const char *nvdimm_bus_cmd_name(unsigned cmd) [ND_CMD_ARS_CAP] = "ars_cap", [ND_CMD_ARS_START] = "ars_start", [ND_CMD_ARS_STATUS] = "ars_status", + [ND_CMD_CLEAR_ERROR] = "clear_error", }; if (cmd < ARRAY_SIZE(names) && names[cmd]) @@ -187,6 +197,9 @@ static inline const char *nvdimm_cmd_name(unsigned cmd) #define ND_IOCTL_ARS_STATUS _IOWR(ND_IOCTL, ND_CMD_ARS_STATUS,\ struct nd_cmd_ars_status) +#define ND_IOCTL_CLEAR_ERROR _IOWR(ND_IOCTL, ND_CMD_CLEAR_ERROR,\ + struct nd_cmd_clear_error) + #define ND_DEVICE_DIMM 1 /* nd_dimm: container for "config data" */ #define ND_DEVICE_REGION_PMEM 2 /* nd_region: (parent of PMEM namespaces) */ #define ND_DEVICE_REGION_BLK 3 /* nd_region: (parent of BLK namespaces) */ diff --git a/kernel/resource.c b/kernel/resource.c index 4d466052426b..2e78ead30934 100644 --- a/kernel/resource.c +++ b/kernel/resource.c @@ -233,9 +233,9 @@ static struct resource * __request_resource(struct resource *root, struct resour } } -static int __release_resource(struct resource *old) +static int __release_resource(struct resource *old, bool release_child) { - struct resource *tmp, **p; + struct resource *tmp, **p, *chd; p = &old->parent->child; for (;;) { @@ -243,7 +243,17 @@ static int __release_resource(struct resource *old) if (!tmp) break; if (tmp == old) { - *p = tmp->sibling; + if (release_child || !(tmp->child)) { + *p = tmp->sibling; + } else { + for (chd = tmp->child;; chd = chd->sibling) { + chd->parent = tmp->parent; + if (!(chd->sibling)) + break; + } + *p = tmp->child; + chd->sibling = tmp->sibling; + } old->parent = NULL; return 0; } @@ -325,7 +335,7 @@ int release_resource(struct resource *old) int retval; write_lock(&resource_lock); - retval = __release_resource(old); + retval = __release_resource(old, true); write_unlock(&resource_lock); return retval; } @@ -679,7 +689,7 @@ static int reallocate_resource(struct resource *root, struct resource *old, old->start = new.start; old->end = new.end; } else { - __release_resource(old); + __release_resource(old, true); *old = new; conflict = __request_resource(root, old); BUG_ON(conflict); @@ -825,6 +835,9 @@ static struct resource * __insert_resource(struct resource *parent, struct resou * entirely fit within the range of the new resource, then the new * resource is inserted and the conflicting resources become children of * the new resource. + * + * This function is intended for producers of resources, such as FW modules + * and bus drivers. */ struct resource *insert_resource_conflict(struct resource *parent, struct resource *new) { @@ -842,6 +855,9 @@ struct resource *insert_resource_conflict(struct resource *parent, struct resour * @new: new resource to insert * * Returns 0 on success, -EBUSY if the resource can't be inserted. + * + * This function is intended for producers of resources, such as FW modules + * and bus drivers. */ int insert_resource(struct resource *parent, struct resource *new) { @@ -850,6 +866,7 @@ int insert_resource(struct resource *parent, struct resource *new) conflict = insert_resource_conflict(parent, new); return conflict ? -EBUSY : 0; } +EXPORT_SYMBOL_GPL(insert_resource); /** * insert_resource_expand_to_fit - Insert a resource into the resource tree @@ -885,6 +902,32 @@ void insert_resource_expand_to_fit(struct resource *root, struct resource *new) write_unlock(&resource_lock); } +/** + * remove_resource - Remove a resource in the resource tree + * @old: resource to remove + * + * Returns 0 on success, -EINVAL if the resource is not valid. + * + * This function removes a resource previously inserted by insert_resource() + * or insert_resource_conflict(), and moves the children (if any) up to + * where they were before. insert_resource() and insert_resource_conflict() + * insert a new resource, and move any conflicting resources down to the + * children of the new resource. + * + * insert_resource(), insert_resource_conflict() and remove_resource() are + * intended for producers of resources, such as FW modules and bus drivers. + */ +int remove_resource(struct resource *old) +{ + int retval; + + write_lock(&resource_lock); + retval = __release_resource(old, false); + write_unlock(&resource_lock); + return retval; +} +EXPORT_SYMBOL_GPL(remove_resource); + static int __adjust_resource(struct resource *res, resource_size_t start, resource_size_t size) { @@ -1085,15 +1128,16 @@ struct resource * __request_region(struct resource *parent, res->name = name; res->start = start; res->end = start + n - 1; - res->flags = resource_type(parent) | resource_ext_type(parent); - res->flags |= IORESOURCE_BUSY | flags; - res->desc = IORES_DESC_NONE; write_lock(&resource_lock); for (;;) { struct resource *conflict; + res->flags = resource_type(parent) | resource_ext_type(parent); + res->flags |= IORESOURCE_BUSY | flags; + res->desc = parent->desc; + conflict = __request_resource(parent, res); if (!conflict) break; diff --git a/tools/testing/nvdimm/test/nfit.c b/tools/testing/nvdimm/test/nfit.c index b3281dcd4a5d..3187322eeed7 100644 --- a/tools/testing/nvdimm/test/nfit.c +++ b/tools/testing/nvdimm/test/nfit.c @@ -151,6 +151,11 @@ struct nfit_test { int (*alloc)(struct nfit_test *t); void (*setup)(struct nfit_test *t); int setup_hotplug; + struct ars_state { + struct nd_cmd_ars_status *ars_status; + unsigned long deadline; + spinlock_t lock; + } ars_state; }; static struct nfit_test *to_nfit_test(struct device *dev) @@ -218,6 +223,7 @@ static int nfit_test_cmd_set_config_data(struct nd_cmd_set_config_hdr *nd_cmd, } #define NFIT_TEST_ARS_RECORDS 4 +#define NFIT_TEST_CLEAR_ERR_UNIT 256 static int nfit_test_cmd_ars_cap(struct nd_cmd_ars_cap *nd_cmd, unsigned int buf_len) @@ -228,44 +234,113 @@ static int nfit_test_cmd_ars_cap(struct nd_cmd_ars_cap *nd_cmd, nd_cmd->max_ars_out = sizeof(struct nd_cmd_ars_status) + NFIT_TEST_ARS_RECORDS * sizeof(struct nd_ars_record); nd_cmd->status = (ND_ARS_PERSISTENT | ND_ARS_VOLATILE) << 16; + nd_cmd->clear_err_unit = NFIT_TEST_CLEAR_ERR_UNIT; return 0; } -static int nfit_test_cmd_ars_start(struct nd_cmd_ars_start *nd_cmd, - unsigned int buf_len) +/* + * Initialize the ars_state to return an ars_result 1 second in the future with + * a 4K error range in the middle of the requested address range. + */ +static void post_ars_status(struct ars_state *ars_state, u64 addr, u64 len) { - if (buf_len < sizeof(*nd_cmd)) + struct nd_cmd_ars_status *ars_status; + struct nd_ars_record *ars_record; + + ars_state->deadline = jiffies + 1*HZ; + ars_status = ars_state->ars_status; + ars_status->status = 0; + ars_status->out_length = sizeof(struct nd_cmd_ars_status) + + sizeof(struct nd_ars_record); + ars_status->address = addr; + ars_status->length = len; + ars_status->type = ND_ARS_PERSISTENT; + ars_status->num_records = 1; + ars_record = &ars_status->records[0]; + ars_record->handle = 0; + ars_record->err_address = addr + len / 2; + ars_record->length = SZ_4K; +} + +static int nfit_test_cmd_ars_start(struct ars_state *ars_state, + struct nd_cmd_ars_start *ars_start, unsigned int buf_len, + int *cmd_rc) +{ + if (buf_len < sizeof(*ars_start)) return -EINVAL; - nd_cmd->status = 0; + spin_lock(&ars_state->lock); + if (time_before(jiffies, ars_state->deadline)) { + ars_start->status = NFIT_ARS_START_BUSY; + *cmd_rc = -EBUSY; + } else { + ars_start->status = 0; + ars_start->scrub_time = 1; + post_ars_status(ars_state, ars_start->address, + ars_start->length); + *cmd_rc = 0; + } + spin_unlock(&ars_state->lock); return 0; } -static int nfit_test_cmd_ars_status(struct nd_cmd_ars_status *nd_cmd, - unsigned int buf_len) +static int nfit_test_cmd_ars_status(struct ars_state *ars_state, + struct nd_cmd_ars_status *ars_status, unsigned int buf_len, + int *cmd_rc) { - if (buf_len < sizeof(*nd_cmd)) + if (buf_len < ars_state->ars_status->out_length) return -EINVAL; - nd_cmd->out_length = sizeof(struct nd_cmd_ars_status); - /* TODO: emit error records */ - nd_cmd->num_records = 0; - nd_cmd->address = 0; - nd_cmd->length = -1ULL; - nd_cmd->status = 0; + spin_lock(&ars_state->lock); + if (time_before(jiffies, ars_state->deadline)) { + memset(ars_status, 0, buf_len); + ars_status->status = NFIT_ARS_STATUS_BUSY; + ars_status->out_length = sizeof(*ars_status); + *cmd_rc = -EBUSY; + } else { + memcpy(ars_status, ars_state->ars_status, + ars_state->ars_status->out_length); + *cmd_rc = 0; + } + spin_unlock(&ars_state->lock); + return 0; +} +static int nfit_test_cmd_clear_error(struct nd_cmd_clear_error *clear_err, + unsigned int buf_len, int *cmd_rc) +{ + const u64 mask = NFIT_TEST_CLEAR_ERR_UNIT - 1; + if (buf_len < sizeof(*clear_err)) + return -EINVAL; + + if ((clear_err->address & mask) || (clear_err->length & mask)) + return -EINVAL; + + /* + * Report 'all clear' success for all commands even though a new + * scrub will find errors again. This is enough to have the + * error removed from the 'badblocks' tracking in the pmem + * driver. + */ + clear_err->status = 0; + clear_err->cleared = clear_err->length; + *cmd_rc = 0; return 0; } static int nfit_test_ctl(struct nvdimm_bus_descriptor *nd_desc, struct nvdimm *nvdimm, unsigned int cmd, void *buf, - unsigned int buf_len) + unsigned int buf_len, int *cmd_rc) { struct acpi_nfit_desc *acpi_desc = to_acpi_desc(nd_desc); struct nfit_test *t = container_of(acpi_desc, typeof(*t), acpi_desc); - int i, rc = 0; + int i, rc = 0, __cmd_rc; + + if (!cmd_rc) + cmd_rc = &__cmd_rc; + *cmd_rc = 0; if (nvdimm) { struct nfit_mem *nfit_mem = nvdimm_provider_data(nvdimm); @@ -297,6 +372,8 @@ static int nfit_test_ctl(struct nvdimm_bus_descriptor *nd_desc, return -ENOTTY; } } else { + struct ars_state *ars_state = &t->ars_state; + if (!nd_desc || !test_bit(cmd, &nd_desc->dsm_mask)) return -ENOTTY; @@ -305,10 +382,15 @@ static int nfit_test_ctl(struct nvdimm_bus_descriptor *nd_desc, rc = nfit_test_cmd_ars_cap(buf, buf_len); break; case ND_CMD_ARS_START: - rc = nfit_test_cmd_ars_start(buf, buf_len); + rc = nfit_test_cmd_ars_start(ars_state, buf, buf_len, + cmd_rc); break; case ND_CMD_ARS_STATUS: - rc = nfit_test_cmd_ars_status(buf, buf_len); + rc = nfit_test_cmd_ars_status(ars_state, buf, buf_len, + cmd_rc); + break; + case ND_CMD_CLEAR_ERROR: + rc = nfit_test_cmd_clear_error(buf, buf_len, cmd_rc); break; default: return -ENOTTY; @@ -424,11 +506,25 @@ static struct nfit_test_resource *nfit_test_lookup(resource_size_t addr) return NULL; } +static int ars_state_init(struct device *dev, struct ars_state *ars_state) +{ + ars_state->ars_status = devm_kzalloc(dev, + sizeof(struct nd_cmd_ars_status) + + sizeof(struct nd_ars_record) * NFIT_TEST_ARS_RECORDS, + GFP_KERNEL); + if (!ars_state->ars_status) + return -ENOMEM; + spin_lock_init(&ars_state->lock); + return 0; +} + static int nfit_test0_alloc(struct nfit_test *t) { size_t nfit_size = sizeof(struct acpi_nfit_system_address) * NUM_SPA + sizeof(struct acpi_nfit_memory_map) * NUM_MEM + sizeof(struct acpi_nfit_control_region) * NUM_DCR + + offsetof(struct acpi_nfit_control_region, + window_size) * NUM_DCR + sizeof(struct acpi_nfit_data_region) * NUM_BDW + sizeof(struct acpi_nfit_flush_address) * NUM_DCR; int i; @@ -471,14 +567,14 @@ static int nfit_test0_alloc(struct nfit_test *t) return -ENOMEM; } - return 0; + return ars_state_init(&t->pdev.dev, &t->ars_state); } static int nfit_test1_alloc(struct nfit_test *t) { size_t nfit_size = sizeof(struct acpi_nfit_system_address) + sizeof(struct acpi_nfit_memory_map) - + sizeof(struct acpi_nfit_control_region); + + offsetof(struct acpi_nfit_control_region, window_size); t->nfit_buf = test_alloc(t, nfit_size, &t->nfit_dma); if (!t->nfit_buf) @@ -489,12 +585,11 @@ static int nfit_test1_alloc(struct nfit_test *t) if (!t->spa_set[0]) return -ENOMEM; - return 0; + return ars_state_init(&t->pdev.dev, &t->ars_state); } static void nfit_test0_setup(struct nfit_test *t) { - struct nvdimm_bus_descriptor *nd_desc; struct acpi_nfit_desc *acpi_desc; struct acpi_nfit_memory_map *memdev; void *nfit_buf = t->nfit_buf; @@ -611,7 +706,7 @@ static void nfit_test0_setup(struct nfit_test *t) memdev->physical_id = 0; memdev->region_id = 0; memdev->range_index = 0+1; - memdev->region_index = 0+1; + memdev->region_index = 4+1; memdev->region_size = SPA0_SIZE/2; memdev->region_offset = t->spa_set_dma[0]; memdev->address = 0; @@ -626,7 +721,7 @@ static void nfit_test0_setup(struct nfit_test *t) memdev->physical_id = 1; memdev->region_id = 0; memdev->range_index = 0+1; - memdev->region_index = 1+1; + memdev->region_index = 5+1; memdev->region_size = SPA0_SIZE/2; memdev->region_offset = t->spa_set_dma[0] + SPA0_SIZE/2; memdev->address = 0; @@ -641,7 +736,7 @@ static void nfit_test0_setup(struct nfit_test *t) memdev->physical_id = 0; memdev->region_id = 1; memdev->range_index = 1+1; - memdev->region_index = 0+1; + memdev->region_index = 4+1; memdev->region_size = SPA1_SIZE/4; memdev->region_offset = t->spa_set_dma[1]; memdev->address = SPA0_SIZE/2; @@ -656,7 +751,7 @@ static void nfit_test0_setup(struct nfit_test *t) memdev->physical_id = 1; memdev->region_id = 1; memdev->range_index = 1+1; - memdev->region_index = 1+1; + memdev->region_index = 5+1; memdev->region_size = SPA1_SIZE/4; memdev->region_offset = t->spa_set_dma[1] + SPA1_SIZE/4; memdev->address = SPA0_SIZE/2; @@ -671,7 +766,7 @@ static void nfit_test0_setup(struct nfit_test *t) memdev->physical_id = 2; memdev->region_id = 0; memdev->range_index = 1+1; - memdev->region_index = 2+1; + memdev->region_index = 6+1; memdev->region_size = SPA1_SIZE/4; memdev->region_offset = t->spa_set_dma[1] + 2*SPA1_SIZE/4; memdev->address = SPA0_SIZE/2; @@ -686,7 +781,7 @@ static void nfit_test0_setup(struct nfit_test *t) memdev->physical_id = 3; memdev->region_id = 0; memdev->range_index = 1+1; - memdev->region_index = 3+1; + memdev->region_index = 7+1; memdev->region_size = SPA1_SIZE/4; memdev->region_offset = t->spa_set_dma[1] + 3*SPA1_SIZE/4; memdev->address = SPA0_SIZE/2; @@ -814,7 +909,7 @@ static void nfit_test0_setup(struct nfit_test *t) memdev->interleave_ways = 1; offset = offset + sizeof(struct acpi_nfit_memory_map) * 14; - /* dcr-descriptor0 */ + /* dcr-descriptor0: blk */ dcr = nfit_buf + offset; dcr->header.type = ACPI_NFIT_TYPE_CONTROL_REGION; dcr->header.length = sizeof(struct acpi_nfit_control_region); @@ -823,6 +918,7 @@ static void nfit_test0_setup(struct nfit_test *t) dcr->device_id = 0; dcr->revision_id = 1; dcr->serial_number = ~handle[0]; + dcr->code = NFIT_FIC_BLK; dcr->windows = 1; dcr->window_size = DCR_SIZE; dcr->command_offset = 0; @@ -830,7 +926,7 @@ static void nfit_test0_setup(struct nfit_test *t) dcr->status_offset = 8; dcr->status_size = 4; - /* dcr-descriptor1 */ + /* dcr-descriptor1: blk */ dcr = nfit_buf + offset + sizeof(struct acpi_nfit_control_region); dcr->header.type = ACPI_NFIT_TYPE_CONTROL_REGION; dcr->header.length = sizeof(struct acpi_nfit_control_region); @@ -839,6 +935,7 @@ static void nfit_test0_setup(struct nfit_test *t) dcr->device_id = 0; dcr->revision_id = 1; dcr->serial_number = ~handle[1]; + dcr->code = NFIT_FIC_BLK; dcr->windows = 1; dcr->window_size = DCR_SIZE; dcr->command_offset = 0; @@ -846,7 +943,7 @@ static void nfit_test0_setup(struct nfit_test *t) dcr->status_offset = 8; dcr->status_size = 4; - /* dcr-descriptor2 */ + /* dcr-descriptor2: blk */ dcr = nfit_buf + offset + sizeof(struct acpi_nfit_control_region) * 2; dcr->header.type = ACPI_NFIT_TYPE_CONTROL_REGION; dcr->header.length = sizeof(struct acpi_nfit_control_region); @@ -855,6 +952,7 @@ static void nfit_test0_setup(struct nfit_test *t) dcr->device_id = 0; dcr->revision_id = 1; dcr->serial_number = ~handle[2]; + dcr->code = NFIT_FIC_BLK; dcr->windows = 1; dcr->window_size = DCR_SIZE; dcr->command_offset = 0; @@ -862,7 +960,7 @@ static void nfit_test0_setup(struct nfit_test *t) dcr->status_offset = 8; dcr->status_size = 4; - /* dcr-descriptor3 */ + /* dcr-descriptor3: blk */ dcr = nfit_buf + offset + sizeof(struct acpi_nfit_control_region) * 3; dcr->header.type = ACPI_NFIT_TYPE_CONTROL_REGION; dcr->header.length = sizeof(struct acpi_nfit_control_region); @@ -871,6 +969,7 @@ static void nfit_test0_setup(struct nfit_test *t) dcr->device_id = 0; dcr->revision_id = 1; dcr->serial_number = ~handle[3]; + dcr->code = NFIT_FIC_BLK; dcr->windows = 1; dcr->window_size = DCR_SIZE; dcr->command_offset = 0; @@ -879,6 +978,63 @@ static void nfit_test0_setup(struct nfit_test *t) dcr->status_size = 4; offset = offset + sizeof(struct acpi_nfit_control_region) * 4; + /* dcr-descriptor0: pmem */ + dcr = nfit_buf + offset; + dcr->header.type = ACPI_NFIT_TYPE_CONTROL_REGION; + dcr->header.length = offsetof(struct acpi_nfit_control_region, + window_size); + dcr->region_index = 4+1; + dcr->vendor_id = 0xabcd; + dcr->device_id = 0; + dcr->revision_id = 1; + dcr->serial_number = ~handle[0]; + dcr->code = NFIT_FIC_BYTEN; + dcr->windows = 0; + + /* dcr-descriptor1: pmem */ + dcr = nfit_buf + offset + offsetof(struct acpi_nfit_control_region, + window_size); + dcr->header.type = ACPI_NFIT_TYPE_CONTROL_REGION; + dcr->header.length = offsetof(struct acpi_nfit_control_region, + window_size); + dcr->region_index = 5+1; + dcr->vendor_id = 0xabcd; + dcr->device_id = 0; + dcr->revision_id = 1; + dcr->serial_number = ~handle[1]; + dcr->code = NFIT_FIC_BYTEN; + dcr->windows = 0; + + /* dcr-descriptor2: pmem */ + dcr = nfit_buf + offset + offsetof(struct acpi_nfit_control_region, + window_size) * 2; + dcr->header.type = ACPI_NFIT_TYPE_CONTROL_REGION; + dcr->header.length = offsetof(struct acpi_nfit_control_region, + window_size); + dcr->region_index = 6+1; + dcr->vendor_id = 0xabcd; + dcr->device_id = 0; + dcr->revision_id = 1; + dcr->serial_number = ~handle[2]; + dcr->code = NFIT_FIC_BYTEN; + dcr->windows = 0; + + /* dcr-descriptor3: pmem */ + dcr = nfit_buf + offset + offsetof(struct acpi_nfit_control_region, + window_size) * 3; + dcr->header.type = ACPI_NFIT_TYPE_CONTROL_REGION; + dcr->header.length = offsetof(struct acpi_nfit_control_region, + window_size); + dcr->region_index = 7+1; + dcr->vendor_id = 0xabcd; + dcr->device_id = 0; + dcr->revision_id = 1; + dcr->serial_number = ~handle[3]; + dcr->code = NFIT_FIC_BYTEN; + dcr->windows = 0; + + offset = offset + offsetof(struct acpi_nfit_control_region, + window_size) * 4; /* bdw0 (spa/dcr0, dimm0) */ bdw = nfit_buf + offset; bdw->header.type = ACPI_NFIT_TYPE_DATA_REGION; @@ -958,15 +1114,16 @@ static void nfit_test0_setup(struct nfit_test *t) if (t->setup_hotplug) { offset = offset + sizeof(struct acpi_nfit_flush_address) * 4; - /* dcr-descriptor4 */ + /* dcr-descriptor4: blk */ dcr = nfit_buf + offset; dcr->header.type = ACPI_NFIT_TYPE_CONTROL_REGION; dcr->header.length = sizeof(struct acpi_nfit_control_region); - dcr->region_index = 4+1; + dcr->region_index = 8+1; dcr->vendor_id = 0xabcd; dcr->device_id = 0; dcr->revision_id = 1; dcr->serial_number = ~handle[4]; + dcr->code = NFIT_FIC_BLK; dcr->windows = 1; dcr->window_size = DCR_SIZE; dcr->command_offset = 0; @@ -975,11 +1132,26 @@ static void nfit_test0_setup(struct nfit_test *t) dcr->status_size = 4; offset = offset + sizeof(struct acpi_nfit_control_region); + /* dcr-descriptor4: pmem */ + dcr = nfit_buf + offset; + dcr->header.type = ACPI_NFIT_TYPE_CONTROL_REGION; + dcr->header.length = offsetof(struct acpi_nfit_control_region, + window_size); + dcr->region_index = 9+1; + dcr->vendor_id = 0xabcd; + dcr->device_id = 0; + dcr->revision_id = 1; + dcr->serial_number = ~handle[4]; + dcr->code = NFIT_FIC_BYTEN; + dcr->windows = 0; + + offset = offset + offsetof(struct acpi_nfit_control_region, + window_size); /* bdw4 (spa/dcr4, dimm4) */ bdw = nfit_buf + offset; bdw->header.type = ACPI_NFIT_TYPE_DATA_REGION; bdw->header.length = sizeof(struct acpi_nfit_data_region); - bdw->region_index = 4+1; + bdw->region_index = 8+1; bdw->windows = 1; bdw->offset = 0; bdw->size = BDW_SIZE; @@ -1027,7 +1199,7 @@ static void nfit_test0_setup(struct nfit_test *t) memdev->physical_id = 4; memdev->region_id = 0; memdev->range_index = 10+1; - memdev->region_index = 4+1; + memdev->region_index = 8+1; memdev->region_size = 0; memdev->region_offset = 0; memdev->address = 0; @@ -1043,14 +1215,14 @@ static void nfit_test0_setup(struct nfit_test *t) memdev->physical_id = 4; memdev->region_id = 0; memdev->range_index = 11+1; - memdev->region_index = 4+1; + memdev->region_index = 9+1; memdev->region_size = SPA0_SIZE; memdev->region_offset = t->spa_set_dma[2]; memdev->address = 0; memdev->interleave_index = 0; memdev->interleave_ways = 1; - /* mem-region16 (spa/dcr4, dimm4) */ + /* mem-region16 (spa/bdw4, dimm4) */ memdev = nfit_buf + offset + sizeof(struct acpi_nfit_memory_map) * 2; memdev->header.type = ACPI_NFIT_TYPE_MEMORY_MAP; @@ -1059,7 +1231,7 @@ static void nfit_test0_setup(struct nfit_test *t) memdev->physical_id = 4; memdev->region_id = 0; memdev->range_index = 12+1; - memdev->region_index = 4+1; + memdev->region_index = 8+1; memdev->region_size = 0; memdev->region_offset = 0; memdev->address = 0; @@ -1076,6 +1248,8 @@ static void nfit_test0_setup(struct nfit_test *t) flush->hint_address[0] = t->flush_dma[4]; } + post_ars_status(&t->ars_state, t->spa_set_dma[0], SPA0_SIZE); + acpi_desc = &t->acpi_desc; set_bit(ND_CMD_GET_CONFIG_SIZE, &acpi_desc->dimm_dsm_force_en); set_bit(ND_CMD_GET_CONFIG_DATA, &acpi_desc->dimm_dsm_force_en); @@ -1083,8 +1257,7 @@ static void nfit_test0_setup(struct nfit_test *t) set_bit(ND_CMD_ARS_CAP, &acpi_desc->bus_dsm_force_en); set_bit(ND_CMD_ARS_START, &acpi_desc->bus_dsm_force_en); set_bit(ND_CMD_ARS_STATUS, &acpi_desc->bus_dsm_force_en); - nd_desc = &acpi_desc->nd_desc; - nd_desc->ndctl = nfit_test_ctl; + set_bit(ND_CMD_CLEAR_ERROR, &acpi_desc->bus_dsm_force_en); } static void nfit_test1_setup(struct nfit_test *t) @@ -1094,7 +1267,6 @@ static void nfit_test1_setup(struct nfit_test *t) struct acpi_nfit_memory_map *memdev; struct acpi_nfit_control_region *dcr; struct acpi_nfit_system_address *spa; - struct nvdimm_bus_descriptor *nd_desc; struct acpi_nfit_desc *acpi_desc; offset = 0; @@ -1130,26 +1302,23 @@ static void nfit_test1_setup(struct nfit_test *t) /* dcr-descriptor0 */ dcr = nfit_buf + offset; dcr->header.type = ACPI_NFIT_TYPE_CONTROL_REGION; - dcr->header.length = sizeof(struct acpi_nfit_control_region); + dcr->header.length = offsetof(struct acpi_nfit_control_region, + window_size); dcr->region_index = 0+1; dcr->vendor_id = 0xabcd; dcr->device_id = 0; dcr->revision_id = 1; dcr->serial_number = ~0; - dcr->code = 0x201; + dcr->code = NFIT_FIC_BYTE; dcr->windows = 0; - dcr->window_size = 0; - dcr->command_offset = 0; - dcr->command_size = 0; - dcr->status_offset = 0; - dcr->status_size = 0; + + post_ars_status(&t->ars_state, t->spa_set_dma[0], SPA2_SIZE); acpi_desc = &t->acpi_desc; set_bit(ND_CMD_ARS_CAP, &acpi_desc->bus_dsm_force_en); set_bit(ND_CMD_ARS_START, &acpi_desc->bus_dsm_force_en); set_bit(ND_CMD_ARS_STATUS, &acpi_desc->bus_dsm_force_en); - nd_desc = &acpi_desc->nd_desc; - nd_desc->ndctl = nfit_test_ctl; + set_bit(ND_CMD_CLEAR_ERROR, &acpi_desc->bus_dsm_force_en); } static int nfit_test_blk_do_io(struct nd_blk_region *ndbr, resource_size_t dpa, @@ -1232,26 +1401,16 @@ static int nfit_test_probe(struct platform_device *pdev) nfit_test->setup(nfit_test); acpi_desc = &nfit_test->acpi_desc; - acpi_desc->dev = &pdev->dev; + acpi_nfit_desc_init(acpi_desc, &pdev->dev); acpi_desc->nfit = nfit_test->nfit_buf; acpi_desc->blk_do_io = nfit_test_blk_do_io; nd_desc = &acpi_desc->nd_desc; - nd_desc->attr_groups = acpi_nfit_attribute_groups; + nd_desc->provider_name = NULL; + nd_desc->ndctl = nfit_test_ctl; acpi_desc->nvdimm_bus = nvdimm_bus_register(&pdev->dev, nd_desc); if (!acpi_desc->nvdimm_bus) return -ENXIO; - INIT_LIST_HEAD(&acpi_desc->spa_maps); - INIT_LIST_HEAD(&acpi_desc->spas); - INIT_LIST_HEAD(&acpi_desc->dcrs); - INIT_LIST_HEAD(&acpi_desc->bdws); - INIT_LIST_HEAD(&acpi_desc->idts); - INIT_LIST_HEAD(&acpi_desc->flushes); - INIT_LIST_HEAD(&acpi_desc->memdevs); - INIT_LIST_HEAD(&acpi_desc->dimms); - mutex_init(&acpi_desc->spa_map_mutex); - mutex_init(&acpi_desc->init_mutex); - rc = acpi_nfit_init(acpi_desc, nfit_test->nfit_size); if (rc) { nvdimm_bus_unregister(acpi_desc->nvdimm_bus); |