diff options
Diffstat (limited to 'drivers')
-rw-r--r-- | drivers/acpi/nfit/core.c | 210 | ||||
-rw-r--r-- | drivers/acpi/nfit/mce.c | 24 | ||||
-rw-r--r-- | drivers/acpi/nfit/nfit.h | 17 | ||||
-rw-r--r-- | drivers/nvdimm/bus.c | 2 | ||||
-rw-r--r-- | drivers/nvdimm/core.c | 73 | ||||
-rw-r--r-- | drivers/nvdimm/dimm.c | 11 | ||||
-rw-r--r-- | drivers/nvdimm/dimm_devs.c | 226 | ||||
-rw-r--r-- | drivers/nvdimm/label.c | 192 | ||||
-rw-r--r-- | drivers/nvdimm/namespace_devs.c | 792 | ||||
-rw-r--r-- | drivers/nvdimm/nd-core.h | 24 | ||||
-rw-r--r-- | drivers/nvdimm/nd.h | 29 | ||||
-rw-r--r-- | drivers/nvdimm/pmem.c | 28 | ||||
-rw-r--r-- | drivers/nvdimm/region_devs.c | 58 |
13 files changed, 1251 insertions, 435 deletions
diff --git a/drivers/acpi/nfit/core.c b/drivers/acpi/nfit/core.c index e1d5ea6d5e40..71a7d07c28c9 100644 --- a/drivers/acpi/nfit/core.c +++ b/drivers/acpi/nfit/core.c @@ -886,6 +886,58 @@ static ssize_t revision_show(struct device *dev, } static DEVICE_ATTR_RO(revision); +static ssize_t hw_error_scrub_show(struct device *dev, + struct device_attribute *attr, char *buf) +{ + struct nvdimm_bus *nvdimm_bus = to_nvdimm_bus(dev); + struct nvdimm_bus_descriptor *nd_desc = to_nd_desc(nvdimm_bus); + struct acpi_nfit_desc *acpi_desc = to_acpi_desc(nd_desc); + + return sprintf(buf, "%d\n", acpi_desc->scrub_mode); +} + +/* + * The 'hw_error_scrub' attribute can have the following values written to it: + * '0': Switch to the default mode where an exception will only insert + * the address of the memory error into the poison and badblocks lists. + * '1': Enable a full scrub to happen if an exception for a memory error is + * received. + */ +static ssize_t hw_error_scrub_store(struct device *dev, + struct device_attribute *attr, const char *buf, size_t size) +{ + struct nvdimm_bus_descriptor *nd_desc; + ssize_t rc; + long val; + + rc = kstrtol(buf, 0, &val); + if (rc) + return rc; + + device_lock(dev); + nd_desc = dev_get_drvdata(dev); + if (nd_desc) { + struct acpi_nfit_desc *acpi_desc = to_acpi_desc(nd_desc); + + switch (val) { + case HW_ERROR_SCRUB_ON: + acpi_desc->scrub_mode = HW_ERROR_SCRUB_ON; + break; + case HW_ERROR_SCRUB_OFF: + acpi_desc->scrub_mode = HW_ERROR_SCRUB_OFF; + break; + default: + rc = -EINVAL; + break; + } + } + device_unlock(dev); + if (rc) + return rc; + return size; +} +static DEVICE_ATTR_RW(hw_error_scrub); + /* * This shows the number of full Address Range Scrubs that have been * completed since driver load time. Userspace can wait on this using @@ -958,6 +1010,7 @@ static umode_t nfit_visible(struct kobject *kobj, struct attribute *a, int n) static struct attribute *acpi_nfit_attributes[] = { &dev_attr_revision.attr, &dev_attr_scrub.attr, + &dev_attr_hw_error_scrub.attr, NULL, }; @@ -1256,6 +1309,44 @@ static struct nvdimm *acpi_nfit_dimm_by_handle(struct acpi_nfit_desc *acpi_desc, return NULL; } +void __acpi_nvdimm_notify(struct device *dev, u32 event) +{ + struct nfit_mem *nfit_mem; + struct acpi_nfit_desc *acpi_desc; + + dev_dbg(dev->parent, "%s: %s: event: %d\n", dev_name(dev), __func__, + event); + + if (event != NFIT_NOTIFY_DIMM_HEALTH) { + dev_dbg(dev->parent, "%s: unknown event: %d\n", dev_name(dev), + event); + return; + } + + acpi_desc = dev_get_drvdata(dev->parent); + if (!acpi_desc) + return; + + /* + * If we successfully retrieved acpi_desc, then we know nfit_mem data + * is still valid. + */ + nfit_mem = dev_get_drvdata(dev); + if (nfit_mem && nfit_mem->flags_attr) + sysfs_notify_dirent(nfit_mem->flags_attr); +} +EXPORT_SYMBOL_GPL(__acpi_nvdimm_notify); + +static void acpi_nvdimm_notify(acpi_handle handle, u32 event, void *data) +{ + struct acpi_device *adev = data; + struct device *dev = &adev->dev; + + device_lock(dev->parent); + __acpi_nvdimm_notify(dev, event); + device_unlock(dev->parent); +} + static int acpi_nfit_add_dimm(struct acpi_nfit_desc *acpi_desc, struct nfit_mem *nfit_mem, u32 device_handle) { @@ -1280,6 +1371,13 @@ static int acpi_nfit_add_dimm(struct acpi_nfit_desc *acpi_desc, return force_enable_dimms ? 0 : -ENODEV; } + if (ACPI_FAILURE(acpi_install_notify_handler(adev_dimm->handle, + ACPI_DEVICE_NOTIFY, acpi_nvdimm_notify, adev_dimm))) { + dev_err(dev, "%s: notification registration failed\n", + dev_name(&adev_dimm->dev)); + return -ENXIO; + } + /* * Until standardization materializes we need to consider 4 * different command sets. Note, that checking for function0 (bit0) @@ -1318,18 +1416,41 @@ static int acpi_nfit_add_dimm(struct acpi_nfit_desc *acpi_desc, return 0; } +static void shutdown_dimm_notify(void *data) +{ + struct acpi_nfit_desc *acpi_desc = data; + struct nfit_mem *nfit_mem; + + mutex_lock(&acpi_desc->init_mutex); + /* + * Clear out the nfit_mem->flags_attr and shut down dimm event + * notifications. + */ + list_for_each_entry(nfit_mem, &acpi_desc->dimms, list) { + struct acpi_device *adev_dimm = nfit_mem->adev; + + if (nfit_mem->flags_attr) { + sysfs_put(nfit_mem->flags_attr); + nfit_mem->flags_attr = NULL; + } + if (adev_dimm) + acpi_remove_notify_handler(adev_dimm->handle, + ACPI_DEVICE_NOTIFY, acpi_nvdimm_notify); + } + mutex_unlock(&acpi_desc->init_mutex); +} + static int acpi_nfit_register_dimms(struct acpi_nfit_desc *acpi_desc) { struct nfit_mem *nfit_mem; - int dimm_count = 0; + int dimm_count = 0, rc; + struct nvdimm *nvdimm; list_for_each_entry(nfit_mem, &acpi_desc->dimms, list) { struct acpi_nfit_flush_address *flush; unsigned long flags = 0, cmd_mask; - struct nvdimm *nvdimm; u32 device_handle; u16 mem_flags; - int rc; device_handle = __to_nfit_memdev(nfit_mem)->device_handle; nvdimm = acpi_nfit_dimm_by_handle(acpi_desc, device_handle); @@ -1382,7 +1503,30 @@ static int acpi_nfit_register_dimms(struct acpi_nfit_desc *acpi_desc) } - return nvdimm_bus_check_dimm_count(acpi_desc->nvdimm_bus, dimm_count); + rc = nvdimm_bus_check_dimm_count(acpi_desc->nvdimm_bus, dimm_count); + if (rc) + return rc; + + /* + * Now that dimms are successfully registered, and async registration + * is flushed, attempt to enable event notification. + */ + list_for_each_entry(nfit_mem, &acpi_desc->dimms, list) { + struct kernfs_node *nfit_kernfs; + + nvdimm = nfit_mem->nvdimm; + nfit_kernfs = sysfs_get_dirent(nvdimm_kobj(nvdimm)->sd, "nfit"); + if (nfit_kernfs) + nfit_mem->flags_attr = sysfs_get_dirent(nfit_kernfs, + "flags"); + sysfs_put(nfit_kernfs); + if (!nfit_mem->flags_attr) + dev_warn(acpi_desc->dev, "%s: notifications disabled\n", + nvdimm_name(nvdimm)); + } + + return devm_add_action_or_reset(acpi_desc->dev, shutdown_dimm_notify, + acpi_desc); } static void acpi_nfit_init_dsms(struct acpi_nfit_desc *acpi_desc) @@ -1491,9 +1635,9 @@ static int acpi_nfit_init_interleave_set(struct acpi_nfit_desc *acpi_desc, if (!info) return -ENOMEM; for (i = 0; i < nr; i++) { - struct nd_mapping *nd_mapping = &ndr_desc->nd_mapping[i]; + struct nd_mapping_desc *mapping = &ndr_desc->mapping[i]; struct nfit_set_info_map *map = &info->mapping[i]; - struct nvdimm *nvdimm = nd_mapping->nvdimm; + struct nvdimm *nvdimm = mapping->nvdimm; struct nfit_mem *nfit_mem = nvdimm_provider_data(nvdimm); struct acpi_nfit_memory_map *memdev = memdev_from_spa(acpi_desc, spa->range_index, i); @@ -1917,7 +2061,7 @@ static int acpi_nfit_insert_resource(struct acpi_nfit_desc *acpi_desc, } static int acpi_nfit_init_mapping(struct acpi_nfit_desc *acpi_desc, - struct nd_mapping *nd_mapping, struct nd_region_desc *ndr_desc, + struct nd_mapping_desc *mapping, struct nd_region_desc *ndr_desc, struct acpi_nfit_memory_map *memdev, struct nfit_spa *nfit_spa) { @@ -1934,12 +2078,12 @@ static int acpi_nfit_init_mapping(struct acpi_nfit_desc *acpi_desc, return -ENODEV; } - nd_mapping->nvdimm = nvdimm; + mapping->nvdimm = nvdimm; switch (nfit_spa_type(spa)) { case NFIT_SPA_PM: case NFIT_SPA_VOLATILE: - nd_mapping->start = memdev->address; - nd_mapping->size = memdev->region_size; + mapping->start = memdev->address; + mapping->size = memdev->region_size; break; case NFIT_SPA_DCR: nfit_mem = nvdimm_provider_data(nvdimm); @@ -1947,13 +2091,13 @@ static int acpi_nfit_init_mapping(struct acpi_nfit_desc *acpi_desc, dev_dbg(acpi_desc->dev, "spa%d %s missing bdw\n", spa->range_index, nvdimm_name(nvdimm)); } else { - nd_mapping->size = nfit_mem->bdw->capacity; - nd_mapping->start = nfit_mem->bdw->start_address; + mapping->size = nfit_mem->bdw->capacity; + mapping->start = nfit_mem->bdw->start_address; ndr_desc->num_lanes = nfit_mem->bdw->windows; blk_valid = 1; } - ndr_desc->nd_mapping = nd_mapping; + ndr_desc->mapping = mapping; ndr_desc->num_mappings = blk_valid; ndbr_desc = to_blk_region_desc(ndr_desc); ndbr_desc->enable = acpi_nfit_blk_region_enable; @@ -1979,7 +2123,7 @@ static bool nfit_spa_is_virtual(struct acpi_nfit_system_address *spa) static int acpi_nfit_register_region(struct acpi_nfit_desc *acpi_desc, struct nfit_spa *nfit_spa) { - static struct nd_mapping nd_mappings[ND_MAX_MAPPINGS]; + static struct nd_mapping_desc mappings[ND_MAX_MAPPINGS]; struct acpi_nfit_system_address *spa = nfit_spa->spa; struct nd_blk_region_desc ndbr_desc; struct nd_region_desc *ndr_desc; @@ -1998,7 +2142,7 @@ static int acpi_nfit_register_region(struct acpi_nfit_desc *acpi_desc, } memset(&res, 0, sizeof(res)); - memset(&nd_mappings, 0, sizeof(nd_mappings)); + memset(&mappings, 0, sizeof(mappings)); memset(&ndbr_desc, 0, sizeof(ndbr_desc)); res.start = spa->address; res.end = res.start + spa->length - 1; @@ -2014,7 +2158,7 @@ static int acpi_nfit_register_region(struct acpi_nfit_desc *acpi_desc, list_for_each_entry(nfit_memdev, &acpi_desc->memdevs, list) { struct acpi_nfit_memory_map *memdev = nfit_memdev->memdev; - struct nd_mapping *nd_mapping; + struct nd_mapping_desc *mapping; if (memdev->range_index != spa->range_index) continue; @@ -2023,14 +2167,14 @@ static int acpi_nfit_register_region(struct acpi_nfit_desc *acpi_desc, spa->range_index, ND_MAX_MAPPINGS); return -ENXIO; } - nd_mapping = &nd_mappings[count++]; - rc = acpi_nfit_init_mapping(acpi_desc, nd_mapping, ndr_desc, + mapping = &mappings[count++]; + rc = acpi_nfit_init_mapping(acpi_desc, mapping, ndr_desc, memdev, nfit_spa); if (rc) goto out; } - ndr_desc->nd_mapping = nd_mappings; + ndr_desc->mapping = mappings; ndr_desc->num_mappings = count; rc = acpi_nfit_init_interleave_set(acpi_desc, ndr_desc, spa); if (rc) @@ -2678,29 +2822,30 @@ static int acpi_nfit_remove(struct acpi_device *adev) return 0; } -static void acpi_nfit_notify(struct acpi_device *adev, u32 event) +void __acpi_nfit_notify(struct device *dev, acpi_handle handle, u32 event) { - struct acpi_nfit_desc *acpi_desc = dev_get_drvdata(&adev->dev); + struct acpi_nfit_desc *acpi_desc = dev_get_drvdata(dev); struct acpi_buffer buf = { ACPI_ALLOCATE_BUFFER, NULL }; - struct device *dev = &adev->dev; union acpi_object *obj; acpi_status status; int ret; dev_dbg(dev, "%s: event: %d\n", __func__, event); - device_lock(dev); + if (event != NFIT_NOTIFY_UPDATE) + return; + if (!dev->driver) { /* dev->driver may be null if we're being removed */ dev_dbg(dev, "%s: no driver found for dev\n", __func__); - goto out_unlock; + return; } if (!acpi_desc) { acpi_desc = devm_kzalloc(dev, sizeof(*acpi_desc), GFP_KERNEL); if (!acpi_desc) - goto out_unlock; - acpi_nfit_desc_init(acpi_desc, &adev->dev); + return; + acpi_nfit_desc_init(acpi_desc, dev); } else { /* * Finish previous registration before considering new @@ -2710,10 +2855,10 @@ static void acpi_nfit_notify(struct acpi_device *adev, u32 event) } /* Evaluate _FIT */ - status = acpi_evaluate_object(adev->handle, "_FIT", NULL, &buf); + status = acpi_evaluate_object(handle, "_FIT", NULL, &buf); if (ACPI_FAILURE(status)) { dev_err(dev, "failed to evaluate _FIT\n"); - goto out_unlock; + return; } obj = buf.pointer; @@ -2725,9 +2870,14 @@ static void acpi_nfit_notify(struct acpi_device *adev, u32 event) } else dev_err(dev, "Invalid _FIT\n"); kfree(buf.pointer); +} +EXPORT_SYMBOL_GPL(__acpi_nfit_notify); - out_unlock: - device_unlock(dev); +static void acpi_nfit_notify(struct acpi_device *adev, u32 event) +{ + device_lock(&adev->dev); + __acpi_nfit_notify(&adev->dev, adev->handle, event); + device_unlock(&adev->dev); } static const struct acpi_device_id acpi_nfit_ids[] = { diff --git a/drivers/acpi/nfit/mce.c b/drivers/acpi/nfit/mce.c index 161f91539ae6..e5ce81c38eed 100644 --- a/drivers/acpi/nfit/mce.c +++ b/drivers/acpi/nfit/mce.c @@ -14,6 +14,7 @@ */ #include <linux/notifier.h> #include <linux/acpi.h> +#include <linux/nd.h> #include <asm/mce.h> #include "nfit.h" @@ -62,12 +63,25 @@ static int nfit_handle_mce(struct notifier_block *nb, unsigned long val, } mutex_unlock(&acpi_desc->init_mutex); - /* - * We can ignore an -EBUSY here because if an ARS is already - * in progress, just let that be the last authoritative one - */ - if (found_match) + if (!found_match) + continue; + + /* If this fails due to an -ENOMEM, there is little we can do */ + nvdimm_bus_add_poison(acpi_desc->nvdimm_bus, + ALIGN(mce->addr, L1_CACHE_BYTES), + L1_CACHE_BYTES); + nvdimm_region_notify(nfit_spa->nd_region, + NVDIMM_REVALIDATE_POISON); + + if (acpi_desc->scrub_mode == HW_ERROR_SCRUB_ON) { + /* + * We can ignore an -EBUSY here because if an ARS is + * already in progress, just let that be the last + * authoritative one + */ acpi_nfit_ars_rescan(acpi_desc); + } + break; } mutex_unlock(&acpi_desc_lock); diff --git a/drivers/acpi/nfit/nfit.h b/drivers/acpi/nfit/nfit.h index e894ded24d99..14296f5267c8 100644 --- a/drivers/acpi/nfit/nfit.h +++ b/drivers/acpi/nfit/nfit.h @@ -78,6 +78,14 @@ enum { NFIT_ARS_TIMEOUT = 90, }; +enum nfit_root_notifiers { + NFIT_NOTIFY_UPDATE = 0x80, +}; + +enum nfit_dimm_notifiers { + NFIT_NOTIFY_DIMM_HEALTH = 0x81, +}; + struct nfit_spa { struct list_head list; struct nd_region *nd_region; @@ -124,6 +132,7 @@ struct nfit_mem { struct acpi_nfit_system_address *spa_bdw; struct acpi_nfit_interleave *idt_dcr; struct acpi_nfit_interleave *idt_bdw; + struct kernfs_node *flags_attr; struct nfit_flush *nfit_flush; struct list_head list; struct acpi_device *adev; @@ -152,6 +161,7 @@ struct acpi_nfit_desc { struct list_head list; struct kernfs_node *scrub_count_state; unsigned int scrub_count; + unsigned int scrub_mode; unsigned int cancel:1; unsigned long dimm_cmd_force_en; unsigned long bus_cmd_force_en; @@ -159,6 +169,11 @@ struct acpi_nfit_desc { void *iobuf, u64 len, int rw); }; +enum scrub_mode { + HW_ERROR_SCRUB_OFF, + HW_ERROR_SCRUB_ON, +}; + enum nd_blk_mmio_selector { BDW, DCR, @@ -223,5 +238,7 @@ static inline struct acpi_nfit_desc *to_acpi_desc( const u8 *to_nfit_uuid(enum nfit_uuids id); int acpi_nfit_init(struct acpi_nfit_desc *acpi_desc, void *nfit, acpi_size sz); +void __acpi_nfit_notify(struct device *dev, acpi_handle handle, u32 event); +void __acpi_nvdimm_notify(struct device *dev, u32 event); void acpi_nfit_desc_init(struct acpi_nfit_desc *acpi_desc, struct device *dev); #endif /* __NFIT_H__ */ diff --git a/drivers/nvdimm/bus.c b/drivers/nvdimm/bus.c index 935866fe5ec2..a8b6949a8778 100644 --- a/drivers/nvdimm/bus.c +++ b/drivers/nvdimm/bus.c @@ -217,6 +217,8 @@ long nvdimm_clear_poison(struct device *dev, phys_addr_t phys, return rc; if (cmd_rc < 0) return cmd_rc; + + nvdimm_clear_from_poison_list(nvdimm_bus, phys, len); return clear_err.cleared; } EXPORT_SYMBOL_GPL(nvdimm_clear_poison); diff --git a/drivers/nvdimm/core.c b/drivers/nvdimm/core.c index 4d7bbd2df5c0..7ceba08774b6 100644 --- a/drivers/nvdimm/core.c +++ b/drivers/nvdimm/core.c @@ -547,11 +547,12 @@ void nvdimm_badblocks_populate(struct nd_region *nd_region, } EXPORT_SYMBOL_GPL(nvdimm_badblocks_populate); -static int add_poison(struct nvdimm_bus *nvdimm_bus, u64 addr, u64 length) +static int add_poison(struct nvdimm_bus *nvdimm_bus, u64 addr, u64 length, + gfp_t flags) { struct nd_poison *pl; - pl = kzalloc(sizeof(*pl), GFP_KERNEL); + pl = kzalloc(sizeof(*pl), flags); if (!pl) return -ENOMEM; @@ -567,7 +568,7 @@ static int bus_add_poison(struct nvdimm_bus *nvdimm_bus, u64 addr, u64 length) struct nd_poison *pl; if (list_empty(&nvdimm_bus->poison_list)) - return add_poison(nvdimm_bus, addr, length); + return add_poison(nvdimm_bus, addr, length, GFP_KERNEL); /* * There is a chance this is a duplicate, check for those first. @@ -587,7 +588,7 @@ static int bus_add_poison(struct nvdimm_bus *nvdimm_bus, u64 addr, u64 length) * as any overlapping ranges will get resolved when the list is consumed * and converted to badblocks */ - return add_poison(nvdimm_bus, addr, length); + return add_poison(nvdimm_bus, addr, length, GFP_KERNEL); } int nvdimm_bus_add_poison(struct nvdimm_bus *nvdimm_bus, u64 addr, u64 length) @@ -602,6 +603,70 @@ int nvdimm_bus_add_poison(struct nvdimm_bus *nvdimm_bus, u64 addr, u64 length) } EXPORT_SYMBOL_GPL(nvdimm_bus_add_poison); +void nvdimm_clear_from_poison_list(struct nvdimm_bus *nvdimm_bus, + phys_addr_t start, unsigned int len) +{ + struct list_head *poison_list = &nvdimm_bus->poison_list; + u64 clr_end = start + len - 1; + struct nd_poison *pl, *next; + + nvdimm_bus_lock(&nvdimm_bus->dev); + WARN_ON_ONCE(list_empty(poison_list)); + + /* + * [start, clr_end] is the poison interval being cleared. + * [pl->start, pl_end] is the poison_list entry we're comparing + * the above interval against. The poison list entry may need + * to be modified (update either start or length), deleted, or + * split into two based on the overlap characteristics + */ + + list_for_each_entry_safe(pl, next, poison_list, list) { + u64 pl_end = pl->start + pl->length - 1; + + /* Skip intervals with no intersection */ + if (pl_end < start) + continue; + if (pl->start > clr_end) + continue; + /* Delete completely overlapped poison entries */ + if ((pl->start >= start) && (pl_end <= clr_end)) { + list_del(&pl->list); + kfree(pl); + continue; + } + /* Adjust start point of partially cleared entries */ + if ((start <= pl->start) && (clr_end > pl->start)) { + pl->length -= clr_end - pl->start + 1; + pl->start = clr_end + 1; + continue; + } + /* Adjust pl->length for partial clearing at the tail end */ + if ((pl->start < start) && (pl_end <= clr_end)) { + /* pl->start remains the same */ + pl->length = start - pl->start; + continue; + } + /* + * If clearing in the middle of an entry, we split it into + * two by modifying the current entry to represent one half of + * the split, and adding a new entry for the second half. + */ + if ((pl->start < start) && (pl_end > clr_end)) { + u64 new_start = clr_end + 1; + u64 new_len = pl_end - new_start + 1; + + /* Add new entry covering the right half */ + add_poison(nvdimm_bus, new_start, new_len, GFP_NOIO); + /* Adjust this entry to cover the left half */ + pl->length = start - pl->start; + continue; + } + } + nvdimm_bus_unlock(&nvdimm_bus->dev); +} +EXPORT_SYMBOL_GPL(nvdimm_clear_from_poison_list); + #ifdef CONFIG_BLK_DEV_INTEGRITY int nd_integrity_init(struct gendisk *disk, unsigned long meta_size) { diff --git a/drivers/nvdimm/dimm.c b/drivers/nvdimm/dimm.c index 71d12bb67339..619834e144d1 100644 --- a/drivers/nvdimm/dimm.c +++ b/drivers/nvdimm/dimm.c @@ -26,6 +26,14 @@ static int nvdimm_probe(struct device *dev) struct nvdimm_drvdata *ndd; int rc; + rc = nvdimm_check_config_data(dev); + if (rc) { + /* not required for non-aliased nvdimm, ex. NVDIMM-N */ + if (rc == -ENOTTY) + rc = 0; + return rc; + } + ndd = kzalloc(sizeof(*ndd), GFP_KERNEL); if (!ndd) return -ENOMEM; @@ -72,6 +80,9 @@ static int nvdimm_remove(struct device *dev) { struct nvdimm_drvdata *ndd = dev_get_drvdata(dev); + if (!ndd) + return 0; + nvdimm_bus_lock(dev); dev_set_drvdata(dev, NULL); nvdimm_bus_unlock(dev); diff --git a/drivers/nvdimm/dimm_devs.c b/drivers/nvdimm/dimm_devs.c index d9bba5edd8dc..d614493ad5ac 100644 --- a/drivers/nvdimm/dimm_devs.c +++ b/drivers/nvdimm/dimm_devs.c @@ -28,28 +28,30 @@ static DEFINE_IDA(dimm_ida); * Retrieve bus and dimm handle and return if this bus supports * get_config_data commands */ -static int __validate_dimm(struct nvdimm_drvdata *ndd) +int nvdimm_check_config_data(struct device *dev) { - struct nvdimm *nvdimm; - - if (!ndd) - return -EINVAL; - - nvdimm = to_nvdimm(ndd->dev); + struct nvdimm *nvdimm = to_nvdimm(dev); - if (!nvdimm->cmd_mask) - return -ENXIO; - if (!test_bit(ND_CMD_GET_CONFIG_DATA, &nvdimm->cmd_mask)) - return -ENXIO; + if (!nvdimm->cmd_mask || + !test_bit(ND_CMD_GET_CONFIG_DATA, &nvdimm->cmd_mask)) { + if (nvdimm->flags & NDD_ALIASING) + return -ENXIO; + else + return -ENOTTY; + } return 0; } static int validate_dimm(struct nvdimm_drvdata *ndd) { - int rc = __validate_dimm(ndd); + int rc; - if (rc && ndd) + if (!ndd) + return -EINVAL; + + rc = nvdimm_check_config_data(ndd->dev); + if (rc) dev_dbg(ndd->dev, "%pf: %s error: %d\n", __builtin_return_address(0), __func__, rc); return rc; @@ -263,6 +265,12 @@ const char *nvdimm_name(struct nvdimm *nvdimm) } EXPORT_SYMBOL_GPL(nvdimm_name); +struct kobject *nvdimm_kobj(struct nvdimm *nvdimm) +{ + return &nvdimm->dev.kobj; +} +EXPORT_SYMBOL_GPL(nvdimm_kobj); + unsigned long nvdimm_cmd_mask(struct nvdimm *nvdimm) { return nvdimm->cmd_mask; @@ -378,40 +386,166 @@ struct nvdimm *nvdimm_create(struct nvdimm_bus *nvdimm_bus, void *provider_data, } EXPORT_SYMBOL_GPL(nvdimm_create); +int alias_dpa_busy(struct device *dev, void *data) +{ + resource_size_t map_end, blk_start, new, busy; + struct blk_alloc_info *info = data; + struct nd_mapping *nd_mapping; + struct nd_region *nd_region; + struct nvdimm_drvdata *ndd; + struct resource *res; + int i; + + if (!is_nd_pmem(dev)) + return 0; + + nd_region = to_nd_region(dev); + for (i = 0; i < nd_region->ndr_mappings; i++) { + nd_mapping = &nd_region->mapping[i]; + if (nd_mapping->nvdimm == info->nd_mapping->nvdimm) + break; + } + + if (i >= nd_region->ndr_mappings) + return 0; + + ndd = to_ndd(nd_mapping); + map_end = nd_mapping->start + nd_mapping->size - 1; + blk_start = nd_mapping->start; + + /* + * In the allocation case ->res is set to free space that we are + * looking to validate against PMEM aliasing collision rules + * (i.e. BLK is allocated after all aliased PMEM). + */ + if (info->res) { + if (info->res->start >= nd_mapping->start + && info->res->start < map_end) + /* pass */; + else + return 0; + } + + retry: + /* + * Find the free dpa from the end of the last pmem allocation to + * the end of the interleave-set mapping that is not already + * covered by a blk allocation. + */ + busy = 0; + for_each_dpa_resource(ndd, res) { + if ((res->start >= blk_start && res->start < map_end) + || (res->end >= blk_start + && res->end <= map_end)) { + if (strncmp(res->name, "pmem", 4) == 0) { + new = max(blk_start, min(map_end + 1, + res->end + 1)); + if (new != blk_start) { + blk_start = new; + goto retry; + } + } else + busy += min(map_end, res->end) + - max(nd_mapping->start, res->start) + 1; + } else if (nd_mapping->start > res->start + && map_end < res->end) { + /* total eclipse of the PMEM region mapping */ + busy += nd_mapping->size; + break; + } + } + + /* update the free space range with the probed blk_start */ + if (info->res && blk_start > info->res->start) { + info->res->start = max(info->res->start, blk_start); + if (info->res->start > info->res->end) + info->res->end = info->res->start - 1; + return 1; + } + + info->available -= blk_start - nd_mapping->start + busy; + + return 0; +} + +static int blk_dpa_busy(struct device *dev, void *data) +{ + struct blk_alloc_info *info = data; + struct nd_mapping *nd_mapping; + struct nd_region *nd_region; + resource_size_t map_end; + int i; + + if (!is_nd_pmem(dev)) + return 0; + + nd_region = to_nd_region(dev); + for (i = 0; i < nd_region->ndr_mappings; i++) { + nd_mapping = &nd_region->mapping[i]; + if (nd_mapping->nvdimm == info->nd_mapping->nvdimm) + break; + } + + if (i >= nd_region->ndr_mappings) + return 0; + + map_end = nd_mapping->start + nd_mapping->size - 1; + if (info->res->start >= nd_mapping->start + && info->res->start < map_end) { + if (info->res->end <= map_end) { + info->busy = 0; + return 1; + } else { + info->busy -= info->res->end - map_end; + return 0; + } + } else if (info->res->end >= nd_mapping->start + && info->res->end <= map_end) { + info->busy -= nd_mapping->start - info->res->start; + return 0; + } else { + info->busy -= nd_mapping->size; + return 0; + } +} + /** * nd_blk_available_dpa - account the unused dpa of BLK region * @nd_mapping: container of dpa-resource-root + labels * - * Unlike PMEM, BLK namespaces can occupy discontiguous DPA ranges. + * Unlike PMEM, BLK namespaces can occupy discontiguous DPA ranges, but + * we arrange for them to never start at an lower dpa than the last + * PMEM allocation in an aliased region. */ -resource_size_t nd_blk_available_dpa(struct nd_mapping *nd_mapping) +resource_size_t nd_blk_available_dpa(struct nd_region *nd_region) { + struct nvdimm_bus *nvdimm_bus = walk_to_nvdimm_bus(&nd_region->dev); + struct nd_mapping *nd_mapping = &nd_region->mapping[0]; struct nvdimm_drvdata *ndd = to_ndd(nd_mapping); - resource_size_t map_end, busy = 0, available; + struct blk_alloc_info info = { + .nd_mapping = nd_mapping, + .available = nd_mapping->size, + .res = NULL, + }; struct resource *res; if (!ndd) return 0; - map_end = nd_mapping->start + nd_mapping->size - 1; - for_each_dpa_resource(ndd, res) - if (res->start >= nd_mapping->start && res->start < map_end) { - resource_size_t end = min(map_end, res->end); + device_for_each_child(&nvdimm_bus->dev, &info, alias_dpa_busy); - busy += end - res->start + 1; - } else if (res->end >= nd_mapping->start - && res->end <= map_end) { - busy += res->end - nd_mapping->start; - } else if (nd_mapping->start > res->start - && nd_mapping->start < res->end) { - /* total eclipse of the BLK region mapping */ - busy += nd_mapping->size; - } + /* now account for busy blk allocations in unaliased dpa */ + for_each_dpa_resource(ndd, res) { + if (strncmp(res->name, "blk", 3) != 0) + continue; - available = map_end - nd_mapping->start + 1; - if (busy < available) - return available - busy; - return 0; + info.res = res; + info.busy = resource_size(res); + device_for_each_child(&nvdimm_bus->dev, &info, blk_dpa_busy); + info.available -= info.busy; + } + + return info.available; } /** @@ -443,21 +577,16 @@ resource_size_t nd_pmem_available_dpa(struct nd_region *nd_region, map_start = nd_mapping->start; map_end = map_start + nd_mapping->size - 1; blk_start = max(map_start, map_end + 1 - *overlap); - for_each_dpa_resource(ndd, res) + for_each_dpa_resource(ndd, res) { if (res->start >= map_start && res->start < map_end) { if (strncmp(res->name, "blk", 3) == 0) - blk_start = min(blk_start, res->start); - else if (res->start != map_start) { + blk_start = min(blk_start, + max(map_start, res->start)); + else if (res->end > map_end) { reason = "misaligned to iset"; goto err; - } else { - if (busy) { - reason = "duplicate overlapping PMEM reservations?"; - goto err; - } + } else busy += resource_size(res); - continue; - } } else if (res->end >= map_start && res->end <= map_end) { if (strncmp(res->name, "blk", 3) == 0) { /* @@ -466,15 +595,14 @@ resource_size_t nd_pmem_available_dpa(struct nd_region *nd_region, * be used for BLK. */ blk_start = map_start; - } else { - reason = "misaligned to iset"; - goto err; - } + } else + busy += resource_size(res); } else if (map_start > res->start && map_start < res->end) { /* total eclipse of the mapping */ busy += nd_mapping->size; blk_start = map_start; } + } *overlap = map_end + 1 - blk_start; available = blk_start - map_start; @@ -483,10 +611,6 @@ resource_size_t nd_pmem_available_dpa(struct nd_region *nd_region, return 0; err: - /* - * Something is wrong, PMEM must align with the start of the - * interleave set, and there can only be one allocation per set. - */ nd_dbg_dpa(nd_region, ndd, res, "%s\n", reason); return 0; } diff --git a/drivers/nvdimm/label.c b/drivers/nvdimm/label.c index 96526dcfdd37..fac7cabe8f56 100644 --- a/drivers/nvdimm/label.c +++ b/drivers/nvdimm/label.c @@ -494,11 +494,13 @@ static int __pmem_label_update(struct nd_region *nd_region, struct nd_mapping *nd_mapping, struct nd_namespace_pmem *nspm, int pos) { - u64 cookie = nd_region_interleave_set_cookie(nd_region), rawsize; + u64 cookie = nd_region_interleave_set_cookie(nd_region); struct nvdimm_drvdata *ndd = to_ndd(nd_mapping); - struct nd_namespace_label *victim_label; + struct nd_label_ent *label_ent, *victim = NULL; struct nd_namespace_label *nd_label; struct nd_namespace_index *nsindex; + struct nd_label_id label_id; + struct resource *res; unsigned long *free; u32 nslot, slot; size_t offset; @@ -507,6 +509,16 @@ static int __pmem_label_update(struct nd_region *nd_region, if (!preamble_next(ndd, &nsindex, &free, &nslot)) return -ENXIO; + nd_label_gen_id(&label_id, nspm->uuid, 0); + for_each_dpa_resource(ndd, res) + if (strcmp(res->name, label_id.id) == 0) + break; + + if (!res) { + WARN_ON_ONCE(1); + return -ENXIO; + } + /* allocate and write the label to the staging (next) index */ slot = nd_label_alloc_slot(ndd); if (slot == UINT_MAX) @@ -522,11 +534,10 @@ static int __pmem_label_update(struct nd_region *nd_region, nd_label->nlabel = __cpu_to_le16(nd_region->ndr_mappings); nd_label->position = __cpu_to_le16(pos); nd_label->isetcookie = __cpu_to_le64(cookie); - rawsize = div_u64(resource_size(&nspm->nsio.res), - nd_region->ndr_mappings); - nd_label->rawsize = __cpu_to_le64(rawsize); - nd_label->dpa = __cpu_to_le64(nd_mapping->start); + nd_label->rawsize = __cpu_to_le64(resource_size(res)); + nd_label->dpa = __cpu_to_le64(res->start); nd_label->slot = __cpu_to_le32(slot); + nd_dbg_dpa(nd_region, ndd, res, "%s\n", __func__); /* update label */ offset = nd_label_offset(ndd, nd_label); @@ -536,38 +547,43 @@ static int __pmem_label_update(struct nd_region *nd_region, return rc; /* Garbage collect the previous label */ - victim_label = nd_mapping->labels[0]; - if (victim_label) { - slot = to_slot(ndd, victim_label); - nd_label_free_slot(ndd, slot); + mutex_lock(&nd_mapping->lock); + list_for_each_entry(label_ent, &nd_mapping->labels, list) { + if (!label_ent->label) + continue; + if (memcmp(nspm->uuid, label_ent->label->uuid, + NSLABEL_UUID_LEN) != 0) + continue; + victim = label_ent; + list_move_tail(&victim->list, &nd_mapping->labels); + break; + } + if (victim) { dev_dbg(ndd->dev, "%s: free: %d\n", __func__, slot); + slot = to_slot(ndd, victim->label); + nd_label_free_slot(ndd, slot); + victim->label = NULL; } /* update index */ rc = nd_label_write_index(ndd, ndd->ns_next, nd_inc_seq(__le32_to_cpu(nsindex->seq)), 0); - if (rc < 0) - return rc; - - nd_mapping->labels[0] = nd_label; - - return 0; -} - -static void del_label(struct nd_mapping *nd_mapping, int l) -{ - struct nd_namespace_label *next_label, *nd_label; - struct nvdimm_drvdata *ndd = to_ndd(nd_mapping); - unsigned int slot; - int j; - - nd_label = nd_mapping->labels[l]; - slot = to_slot(ndd, nd_label); - dev_vdbg(ndd->dev, "%s: clear: %d\n", __func__, slot); + if (rc == 0) { + list_for_each_entry(label_ent, &nd_mapping->labels, list) + if (!label_ent->label) { + label_ent->label = nd_label; + nd_label = NULL; + break; + } + dev_WARN_ONCE(&nspm->nsio.common.dev, nd_label, + "failed to track label: %d\n", + to_slot(ndd, nd_label)); + if (nd_label) + rc = -ENXIO; + } + mutex_unlock(&nd_mapping->lock); - for (j = l; (next_label = nd_mapping->labels[j + 1]); j++) - nd_mapping->labels[j] = next_label; - nd_mapping->labels[j] = NULL; + return rc; } static bool is_old_resource(struct resource *res, struct resource **list, int n) @@ -607,14 +623,16 @@ static int __blk_label_update(struct nd_region *nd_region, struct nd_mapping *nd_mapping, struct nd_namespace_blk *nsblk, int num_labels) { - int i, l, alloc, victims, nfree, old_num_resources, nlabel, rc = -ENXIO; + int i, alloc, victims, nfree, old_num_resources, nlabel, rc = -ENXIO; struct nvdimm_drvdata *ndd = to_ndd(nd_mapping); struct nd_namespace_label *nd_label; + struct nd_label_ent *label_ent, *e; struct nd_namespace_index *nsindex; unsigned long *free, *victim_map = NULL; struct resource *res, **old_res_list; struct nd_label_id label_id; u8 uuid[NSLABEL_UUID_LEN]; + LIST_HEAD(list); u32 nslot, slot; if (!preamble_next(ndd, &nsindex, &free, &nslot)) @@ -736,15 +754,22 @@ static int __blk_label_update(struct nd_region *nd_region, * entries in nd_mapping->labels */ nlabel = 0; - for_each_label(l, nd_label, nd_mapping->labels) { + mutex_lock(&nd_mapping->lock); + list_for_each_entry_safe(label_ent, e, &nd_mapping->labels, list) { + nd_label = label_ent->label; + if (!nd_label) + continue; nlabel++; memcpy(uuid, nd_label->uuid, NSLABEL_UUID_LEN); if (memcmp(uuid, nsblk->uuid, NSLABEL_UUID_LEN) != 0) continue; nlabel--; - del_label(nd_mapping, l); - l--; /* retry with the new label at this index */ + list_move(&label_ent->list, &list); + label_ent->label = NULL; } + list_splice_tail_init(&list, &nd_mapping->labels); + mutex_unlock(&nd_mapping->lock); + if (nlabel + nsblk->num_resources > num_labels) { /* * Bug, we can't end up with more resources than @@ -755,6 +780,15 @@ static int __blk_label_update(struct nd_region *nd_region, goto out; } + mutex_lock(&nd_mapping->lock); + label_ent = list_first_entry_or_null(&nd_mapping->labels, + typeof(*label_ent), list); + if (!label_ent) { + WARN_ON(1); + mutex_unlock(&nd_mapping->lock); + rc = -ENXIO; + goto out; + } for_each_clear_bit_le(slot, free, nslot) { nd_label = nd_label_base(ndd) + slot; memcpy(uuid, nd_label->uuid, NSLABEL_UUID_LEN); @@ -762,11 +796,19 @@ static int __blk_label_update(struct nd_region *nd_region, continue; res = to_resource(ndd, nd_label); res->flags &= ~DPA_RESOURCE_ADJUSTED; - dev_vdbg(&nsblk->common.dev, "assign label[%d] slot: %d\n", - l, slot); - nd_mapping->labels[l++] = nd_label; + dev_vdbg(&nsblk->common.dev, "assign label slot: %d\n", slot); + list_for_each_entry_from(label_ent, &nd_mapping->labels, list) { + if (label_ent->label) + continue; + label_ent->label = nd_label; + nd_label = NULL; + break; + } + if (nd_label) + dev_WARN(&nsblk->common.dev, + "failed to track label slot%d\n", slot); } - nd_mapping->labels[l] = NULL; + mutex_unlock(&nd_mapping->lock); out: kfree(old_res_list); @@ -788,32 +830,28 @@ static int __blk_label_update(struct nd_region *nd_region, static int init_labels(struct nd_mapping *nd_mapping, int num_labels) { - int i, l, old_num_labels = 0; + int i, old_num_labels = 0; + struct nd_label_ent *label_ent; struct nd_namespace_index *nsindex; - struct nd_namespace_label *nd_label; struct nvdimm_drvdata *ndd = to_ndd(nd_mapping); - size_t size = (num_labels + 1) * sizeof(struct nd_namespace_label *); - for_each_label(l, nd_label, nd_mapping->labels) + mutex_lock(&nd_mapping->lock); + list_for_each_entry(label_ent, &nd_mapping->labels, list) old_num_labels++; + mutex_unlock(&nd_mapping->lock); /* * We need to preserve all the old labels for the mapping so * they can be garbage collected after writing the new labels. */ - if (num_labels > old_num_labels) { - struct nd_namespace_label **labels; - - labels = krealloc(nd_mapping->labels, size, GFP_KERNEL); - if (!labels) + for (i = old_num_labels; i < num_labels; i++) { + label_ent = kzalloc(sizeof(*label_ent), GFP_KERNEL); + if (!label_ent) return -ENOMEM; - nd_mapping->labels = labels; + mutex_lock(&nd_mapping->lock); + list_add_tail(&label_ent->list, &nd_mapping->labels); + mutex_unlock(&nd_mapping->lock); } - if (!nd_mapping->labels) - return -ENOMEM; - - for (i = old_num_labels; i <= num_labels; i++) - nd_mapping->labels[i] = NULL; if (ndd->ns_current == -1 || ndd->ns_next == -1) /* pass */; @@ -837,42 +875,45 @@ static int init_labels(struct nd_mapping *nd_mapping, int num_labels) static int del_labels(struct nd_mapping *nd_mapping, u8 *uuid) { struct nvdimm_drvdata *ndd = to_ndd(nd_mapping); - struct nd_namespace_label *nd_label; + struct nd_label_ent *label_ent, *e; struct nd_namespace_index *nsindex; u8 label_uuid[NSLABEL_UUID_LEN]; - int l, num_freed = 0; unsigned long *free; + LIST_HEAD(list); u32 nslot, slot; + int active = 0; if (!uuid) return 0; /* no index || no labels == nothing to delete */ - if (!preamble_next(ndd, &nsindex, &free, &nslot) - || !nd_mapping->labels) + if (!preamble_next(ndd, &nsindex, &free, &nslot)) return 0; - for_each_label(l, nd_label, nd_mapping->labels) { + mutex_lock(&nd_mapping->lock); + list_for_each_entry_safe(label_ent, e, &nd_mapping->labels, list) { + struct nd_namespace_label *nd_label = label_ent->label; + + if (!nd_label) + continue; + active++; memcpy(label_uuid, nd_label->uuid, NSLABEL_UUID_LEN); if (memcmp(label_uuid, uuid, NSLABEL_UUID_LEN) != 0) continue; + active--; slot = to_slot(ndd, nd_label); nd_label_free_slot(ndd, slot); dev_dbg(ndd->dev, "%s: free: %d\n", __func__, slot); - del_label(nd_mapping, l); - num_freed++; - l--; /* retry with new label at this index */ + list_move_tail(&label_ent->list, &list); + label_ent->label = NULL; } + list_splice_tail_init(&list, &nd_mapping->labels); - if (num_freed > l) { - /* - * num_freed will only ever be > l when we delete the last - * label - */ - kfree(nd_mapping->labels); - nd_mapping->labels = NULL; - dev_dbg(ndd->dev, "%s: no more labels\n", __func__); + if (active == 0) { + nd_mapping_free_labels(nd_mapping); + dev_dbg(ndd->dev, "%s: no more active labels\n", __func__); } + mutex_unlock(&nd_mapping->lock); return nd_label_write_index(ndd, ndd->ns_next, nd_inc_seq(__le32_to_cpu(nsindex->seq)), 0); @@ -885,7 +926,9 @@ int nd_pmem_namespace_label_update(struct nd_region *nd_region, for (i = 0; i < nd_region->ndr_mappings; i++) { struct nd_mapping *nd_mapping = &nd_region->mapping[i]; - int rc; + struct nvdimm_drvdata *ndd = to_ndd(nd_mapping); + struct resource *res; + int rc, count = 0; if (size == 0) { rc = del_labels(nd_mapping, nspm->uuid); @@ -894,7 +937,12 @@ int nd_pmem_namespace_label_update(struct nd_region *nd_region, continue; } - rc = init_labels(nd_mapping, 1); + for_each_dpa_resource(ndd, res) + if (strncmp(res->name, "pmem", 3) == 0) + count++; + WARN_ON_ONCE(!count); + + rc = init_labels(nd_mapping, count); if (rc < 0) return rc; diff --git a/drivers/nvdimm/namespace_devs.c b/drivers/nvdimm/namespace_devs.c index c5e3196c45b0..3509cff68ef9 100644 --- a/drivers/nvdimm/namespace_devs.c +++ b/drivers/nvdimm/namespace_devs.c @@ -12,8 +12,10 @@ */ #include <linux/module.h> #include <linux/device.h> +#include <linux/sort.h> #include <linux/slab.h> #include <linux/pmem.h> +#include <linux/list.h> #include <linux/nd.h> #include "nd-core.h" #include "nd.h" @@ -28,7 +30,10 @@ static void namespace_io_release(struct device *dev) static void namespace_pmem_release(struct device *dev) { struct nd_namespace_pmem *nspm = to_nd_namespace_pmem(dev); + struct nd_region *nd_region = to_nd_region(dev->parent); + if (nspm->id >= 0) + ida_simple_remove(&nd_region->ns_ida, nspm->id); kfree(nspm->alt_name); kfree(nspm->uuid); kfree(nspm); @@ -62,17 +67,17 @@ static struct device_type namespace_blk_device_type = { .release = namespace_blk_release, }; -static bool is_namespace_pmem(struct device *dev) +static bool is_namespace_pmem(const struct device *dev) { return dev ? dev->type == &namespace_pmem_device_type : false; } -static bool is_namespace_blk(struct device *dev) +static bool is_namespace_blk(const struct device *dev) { return dev ? dev->type == &namespace_blk_device_type : false; } -static bool is_namespace_io(struct device *dev) +static bool is_namespace_io(const struct device *dev) { return dev ? dev->type == &namespace_io_device_type : false; } @@ -168,7 +173,21 @@ const char *nvdimm_namespace_disk_name(struct nd_namespace_common *ndns, suffix = "s"; if (is_namespace_pmem(&ndns->dev) || is_namespace_io(&ndns->dev)) { - sprintf(name, "pmem%d%s", nd_region->id, suffix ? suffix : ""); + int nsidx = 0; + + if (is_namespace_pmem(&ndns->dev)) { + struct nd_namespace_pmem *nspm; + + nspm = to_nd_namespace_pmem(&ndns->dev); + nsidx = nspm->id; + } + + if (nsidx) + sprintf(name, "pmem%d.%d%s", nd_region->id, nsidx, + suffix ? suffix : ""); + else + sprintf(name, "pmem%d%s", nd_region->id, + suffix ? suffix : ""); } else if (is_namespace_blk(&ndns->dev)) { struct nd_namespace_blk *nsblk; @@ -294,7 +313,7 @@ static bool __nd_namespace_blk_validate(struct nd_namespace_blk *nsblk) if (strcmp(res->name, label_id.id) != 0) continue; /* - * Resources with unacknoweldged adjustments indicate a + * Resources with unacknowledged adjustments indicate a * failure to update labels */ if (res->flags & DPA_RESOURCE_ADJUSTED) @@ -510,19 +529,68 @@ static resource_size_t init_dpa_allocation(struct nd_label_id *label_id, return rc ? n : 0; } -static bool space_valid(bool is_pmem, bool is_reserve, - struct nd_label_id *label_id, struct resource *res) + +/** + * space_valid() - validate free dpa space against constraints + * @nd_region: hosting region of the free space + * @ndd: dimm device data for debug + * @label_id: namespace id to allocate space + * @prev: potential allocation that precedes free space + * @next: allocation that follows the given free space range + * @exist: first allocation with same id in the mapping + * @n: range that must satisfied for pmem allocations + * @valid: free space range to validate + * + * BLK-space is valid as long as it does not precede a PMEM + * allocation in a given region. PMEM-space must be contiguous + * and adjacent to an existing existing allocation (if one + * exists). If reserving PMEM any space is valid. + */ +static void space_valid(struct nd_region *nd_region, struct nvdimm_drvdata *ndd, + struct nd_label_id *label_id, struct resource *prev, + struct resource *next, struct resource *exist, + resource_size_t n, struct resource *valid) { - /* - * For BLK-space any space is valid, for PMEM-space, it must be - * contiguous with an existing allocation unless we are - * reserving pmem. - */ - if (is_reserve || !is_pmem) - return true; - if (!res || strcmp(res->name, label_id->id) == 0) - return true; - return false; + bool is_reserve = strcmp(label_id->id, "pmem-reserve") == 0; + bool is_pmem = strncmp(label_id->id, "pmem", 4) == 0; + + if (valid->start >= valid->end) + goto invalid; + + if (is_reserve) + return; + + if (!is_pmem) { + struct nd_mapping *nd_mapping = &nd_region->mapping[0]; + struct nvdimm_bus *nvdimm_bus; + struct blk_alloc_info info = { + .nd_mapping = nd_mapping, + .available = nd_mapping->size, + .res = valid, + }; + + WARN_ON(!is_nd_blk(&nd_region->dev)); + nvdimm_bus = walk_to_nvdimm_bus(&nd_region->dev); + device_for_each_child(&nvdimm_bus->dev, &info, alias_dpa_busy); + return; + } + + /* allocation needs to be contiguous, so this is all or nothing */ + if (resource_size(valid) < n) + goto invalid; + + /* we've got all the space we need and no existing allocation */ + if (!exist) + return; + + /* allocation needs to be contiguous with the existing namespace */ + if (valid->start == exist->end + 1 + || valid->end == exist->start - 1) + return; + + invalid: + /* truncate @valid size to 0 */ + valid->end = valid->start - 1; } enum alloc_loc { @@ -534,18 +602,24 @@ static resource_size_t scan_allocate(struct nd_region *nd_region, resource_size_t n) { resource_size_t mapping_end = nd_mapping->start + nd_mapping->size - 1; - bool is_reserve = strcmp(label_id->id, "pmem-reserve") == 0; bool is_pmem = strncmp(label_id->id, "pmem", 4) == 0; struct nvdimm_drvdata *ndd = to_ndd(nd_mapping); + struct resource *res, *exist = NULL, valid; const resource_size_t to_allocate = n; - struct resource *res; int first; + for_each_dpa_resource(ndd, res) + if (strcmp(label_id->id, res->name) == 0) + exist = res; + + valid.start = nd_mapping->start; + valid.end = mapping_end; + valid.name = "free space"; retry: first = 0; for_each_dpa_resource(ndd, res) { - resource_size_t allocate, available = 0, free_start, free_end; struct resource *next = res->sibling, *new_res = NULL; + resource_size_t allocate, available = 0; enum alloc_loc loc = ALLOC_ERR; const char *action; int rc = 0; @@ -558,32 +632,35 @@ static resource_size_t scan_allocate(struct nd_region *nd_region, /* space at the beginning of the mapping */ if (!first++ && res->start > nd_mapping->start) { - free_start = nd_mapping->start; - available = res->start - free_start; - if (space_valid(is_pmem, is_reserve, label_id, NULL)) + valid.start = nd_mapping->start; + valid.end = res->start - 1; + space_valid(nd_region, ndd, label_id, NULL, next, exist, + to_allocate, &valid); + available = resource_size(&valid); + if (available) loc = ALLOC_BEFORE; } /* space between allocations */ if (!loc && next) { - free_start = res->start + resource_size(res); - free_end = min(mapping_end, next->start - 1); - if (space_valid(is_pmem, is_reserve, label_id, res) - && free_start < free_end) { - available = free_end + 1 - free_start; + valid.start = res->start + resource_size(res); + valid.end = min(mapping_end, next->start - 1); + space_valid(nd_region, ndd, label_id, res, next, exist, + to_allocate, &valid); + available = resource_size(&valid); + if (available) loc = ALLOC_MID; - } } /* space at the end of the mapping */ if (!loc && !next) { - free_start = res->start + resource_size(res); - free_end = mapping_end; - if (space_valid(is_pmem, is_reserve, label_id, res) - && free_start < free_end) { - available = free_end + 1 - free_start; + valid.start = res->start + resource_size(res); + valid.end = mapping_end; + space_valid(nd_region, ndd, label_id, res, next, exist, + to_allocate, &valid); + available = resource_size(&valid); + if (available) loc = ALLOC_AFTER; - } } if (!loc || !available) @@ -593,8 +670,6 @@ static resource_size_t scan_allocate(struct nd_region *nd_region, case ALLOC_BEFORE: if (strcmp(res->name, label_id->id) == 0) { /* adjust current resource up */ - if (is_pmem && !is_reserve) - return n; rc = adjust_resource(res, res->start - allocate, resource_size(res) + allocate); action = "cur grow up"; @@ -604,8 +679,6 @@ static resource_size_t scan_allocate(struct nd_region *nd_region, case ALLOC_MID: if (strcmp(next->name, label_id->id) == 0) { /* adjust next resource up */ - if (is_pmem && !is_reserve) - return n; rc = adjust_resource(next, next->start - allocate, resource_size(next) + allocate); @@ -629,12 +702,10 @@ static resource_size_t scan_allocate(struct nd_region *nd_region, if (strcmp(action, "allocate") == 0) { /* BLK allocate bottom up */ if (!is_pmem) - free_start += available - allocate; - else if (!is_reserve && free_start != nd_mapping->start) - return n; + valid.start += available - allocate; new_res = nvdimm_allocate_dpa(ndd, label_id, - free_start, allocate); + valid.start, allocate); if (!new_res) rc = -EBUSY; } else if (strcmp(action, "grow down") == 0) { @@ -832,13 +903,45 @@ static int grow_dpa_allocation(struct nd_region *nd_region, return 0; } -static void nd_namespace_pmem_set_size(struct nd_region *nd_region, +static void nd_namespace_pmem_set_resource(struct nd_region *nd_region, struct nd_namespace_pmem *nspm, resource_size_t size) { struct resource *res = &nspm->nsio.res; + resource_size_t offset = 0; - res->start = nd_region->ndr_start; - res->end = nd_region->ndr_start + size - 1; + if (size && !nspm->uuid) { + WARN_ON_ONCE(1); + size = 0; + } + + if (size && nspm->uuid) { + struct nd_mapping *nd_mapping = &nd_region->mapping[0]; + struct nvdimm_drvdata *ndd = to_ndd(nd_mapping); + struct nd_label_id label_id; + struct resource *res; + + if (!ndd) { + size = 0; + goto out; + } + + nd_label_gen_id(&label_id, nspm->uuid, 0); + + /* calculate a spa offset from the dpa allocation offset */ + for_each_dpa_resource(ndd, res) + if (strcmp(res->name, label_id.id) == 0) { + offset = (res->start - nd_mapping->start) + * nd_region->ndr_mappings; + goto out; + } + + WARN_ON_ONCE(1); + size = 0; + } + + out: + res->start = nd_region->ndr_start + offset; + res->end = res->start + size - 1; } static bool uuid_not_set(const u8 *uuid, struct device *dev, const char *where) @@ -929,7 +1032,7 @@ static ssize_t __size_store(struct device *dev, unsigned long long val) if (is_namespace_pmem(dev)) { struct nd_namespace_pmem *nspm = to_nd_namespace_pmem(dev); - nd_namespace_pmem_set_size(nd_region, nspm, + nd_namespace_pmem_set_resource(nd_region, nspm, val * nd_region->ndr_mappings); } else if (is_namespace_blk(dev)) { struct nd_namespace_blk *nsblk = to_nd_namespace_blk(dev); @@ -1031,22 +1134,27 @@ static ssize_t size_show(struct device *dev, } static DEVICE_ATTR(size, S_IRUGO, size_show, size_store); -static ssize_t uuid_show(struct device *dev, - struct device_attribute *attr, char *buf) +static u8 *namespace_to_uuid(struct device *dev) { - u8 *uuid; - if (is_namespace_pmem(dev)) { struct nd_namespace_pmem *nspm = to_nd_namespace_pmem(dev); - uuid = nspm->uuid; + return nspm->uuid; } else if (is_namespace_blk(dev)) { struct nd_namespace_blk *nsblk = to_nd_namespace_blk(dev); - uuid = nsblk->uuid; + return nsblk->uuid; } else - return -ENXIO; + return ERR_PTR(-ENXIO); +} +static ssize_t uuid_show(struct device *dev, + struct device_attribute *attr, char *buf) +{ + u8 *uuid = namespace_to_uuid(dev); + + if (IS_ERR(uuid)) + return PTR_ERR(uuid); if (uuid) return sprintf(buf, "%pUb\n", uuid); return sprintf(buf, "\n"); @@ -1089,7 +1197,7 @@ static int namespace_update_uuid(struct nd_region *nd_region, * * FIXME: can we delete uuid with zero dpa allocated? */ - if (nd_mapping->labels) + if (list_empty(&nd_mapping->labels)) return -EBUSY; } @@ -1491,14 +1599,19 @@ static bool has_uuid_at_pos(struct nd_region *nd_region, u8 *uuid, for (i = 0; i < nd_region->ndr_mappings; i++) { struct nd_mapping *nd_mapping = &nd_region->mapping[i]; - struct nd_namespace_label *nd_label; + struct nd_label_ent *label_ent; bool found_uuid = false; - int l; - for_each_label(l, nd_label, nd_mapping->labels) { - u64 isetcookie = __le64_to_cpu(nd_label->isetcookie); - u16 position = __le16_to_cpu(nd_label->position); - u16 nlabel = __le16_to_cpu(nd_label->nlabel); + list_for_each_entry(label_ent, &nd_mapping->labels, list) { + struct nd_namespace_label *nd_label = label_ent->label; + u16 position, nlabel; + u64 isetcookie; + + if (!nd_label) + continue; + isetcookie = __le64_to_cpu(nd_label->isetcookie); + position = __le16_to_cpu(nd_label->position); + nlabel = __le16_to_cpu(nd_label->nlabel); if (isetcookie != cookie) continue; @@ -1528,7 +1641,6 @@ static bool has_uuid_at_pos(struct nd_region *nd_region, u8 *uuid, static int select_pmem_id(struct nd_region *nd_region, u8 *pmem_id) { - struct nd_namespace_label *select = NULL; int i; if (!pmem_id) @@ -1536,90 +1648,106 @@ static int select_pmem_id(struct nd_region *nd_region, u8 *pmem_id) for (i = 0; i < nd_region->ndr_mappings; i++) { struct nd_mapping *nd_mapping = &nd_region->mapping[i]; - struct nd_namespace_label *nd_label; + struct nvdimm_drvdata *ndd = to_ndd(nd_mapping); + struct nd_namespace_label *nd_label = NULL; u64 hw_start, hw_end, pmem_start, pmem_end; - int l; + struct nd_label_ent *label_ent; - for_each_label(l, nd_label, nd_mapping->labels) + WARN_ON(!mutex_is_locked(&nd_mapping->lock)); + list_for_each_entry(label_ent, &nd_mapping->labels, list) { + nd_label = label_ent->label; + if (!nd_label) + continue; if (memcmp(nd_label->uuid, pmem_id, NSLABEL_UUID_LEN) == 0) break; + nd_label = NULL; + } if (!nd_label) { WARN_ON(1); return -EINVAL; } - select = nd_label; /* * Check that this label is compliant with the dpa * range published in NFIT */ hw_start = nd_mapping->start; hw_end = hw_start + nd_mapping->size; - pmem_start = __le64_to_cpu(select->dpa); - pmem_end = pmem_start + __le64_to_cpu(select->rawsize); - if (pmem_start == hw_start && pmem_end <= hw_end) + pmem_start = __le64_to_cpu(nd_label->dpa); + pmem_end = pmem_start + __le64_to_cpu(nd_label->rawsize); + if (pmem_start >= hw_start && pmem_start < hw_end + && pmem_end <= hw_end && pmem_end > hw_start) /* pass */; - else + else { + dev_dbg(&nd_region->dev, "%s invalid label for %pUb\n", + dev_name(ndd->dev), nd_label->uuid); return -EINVAL; + } - nd_mapping->labels[0] = select; - nd_mapping->labels[1] = NULL; + /* move recently validated label to the front of the list */ + list_move(&label_ent->list, &nd_mapping->labels); } return 0; } /** - * find_pmem_label_set - validate interleave set labelling, retrieve label0 + * create_namespace_pmem - validate interleave set labelling, retrieve label0 * @nd_region: region with mappings to validate + * @nspm: target namespace to create + * @nd_label: target pmem namespace label to evaluate */ -static int find_pmem_label_set(struct nd_region *nd_region, - struct nd_namespace_pmem *nspm) +struct device *create_namespace_pmem(struct nd_region *nd_region, + struct nd_namespace_label *nd_label) { u64 cookie = nd_region_interleave_set_cookie(nd_region); - struct nd_namespace_label *nd_label; - u8 select_id[NSLABEL_UUID_LEN]; + struct nd_label_ent *label_ent; + struct nd_namespace_pmem *nspm; + struct nd_mapping *nd_mapping; resource_size_t size = 0; - u8 *pmem_id = NULL; - int rc = -ENODEV, l; + struct resource *res; + struct device *dev; + int rc = 0; u16 i; - if (cookie == 0) - return -ENXIO; + if (cookie == 0) { + dev_dbg(&nd_region->dev, "invalid interleave-set-cookie\n"); + return ERR_PTR(-ENXIO); + } - /* - * Find a complete set of labels by uuid. By definition we can start - * with any mapping as the reference label - */ - for_each_label(l, nd_label, nd_region->mapping[0].labels) { - u64 isetcookie = __le64_to_cpu(nd_label->isetcookie); + if (__le64_to_cpu(nd_label->isetcookie) != cookie) { + dev_dbg(&nd_region->dev, "invalid cookie in label: %pUb\n", + nd_label->uuid); + return ERR_PTR(-EAGAIN); + } - if (isetcookie != cookie) - continue; + nspm = kzalloc(sizeof(*nspm), GFP_KERNEL); + if (!nspm) + return ERR_PTR(-ENOMEM); - for (i = 0; nd_region->ndr_mappings; i++) - if (!has_uuid_at_pos(nd_region, nd_label->uuid, - cookie, i)) - break; - if (i < nd_region->ndr_mappings) { - /* - * Give up if we don't find an instance of a - * uuid at each position (from 0 to - * nd_region->ndr_mappings - 1), or if we find a - * dimm with two instances of the same uuid. - */ - rc = -EINVAL; - goto err; - } else if (pmem_id) { - /* - * If there is more than one valid uuid set, we - * need userspace to clean this up. - */ - rc = -EBUSY; - goto err; - } - memcpy(select_id, nd_label->uuid, NSLABEL_UUID_LEN); - pmem_id = select_id; + nspm->id = -1; + dev = &nspm->nsio.common.dev; + dev->type = &namespace_pmem_device_type; + dev->parent = &nd_region->dev; + res = &nspm->nsio.res; + res->name = dev_name(&nd_region->dev); + res->flags = IORESOURCE_MEM; + + for (i = 0; i < nd_region->ndr_mappings; i++) + if (!has_uuid_at_pos(nd_region, nd_label->uuid, cookie, i)) + break; + if (i < nd_region->ndr_mappings) { + struct nvdimm_drvdata *ndd = to_ndd(&nd_region->mapping[i]); + + /* + * Give up if we don't find an instance of a uuid at each + * position (from 0 to nd_region->ndr_mappings - 1), or if we + * find a dimm with two instances of the same uuid. + */ + dev_err(&nd_region->dev, "%s missing label for %pUb\n", + dev_name(ndd->dev), nd_label->uuid); + rc = -EINVAL; + goto err; } /* @@ -1630,14 +1758,23 @@ static int find_pmem_label_set(struct nd_region *nd_region, * the dimm being enabled (i.e. nd_label_reserve_dpa() * succeeded). */ - rc = select_pmem_id(nd_region, pmem_id); + rc = select_pmem_id(nd_region, nd_label->uuid); if (rc) goto err; /* Calculate total size and populate namespace properties from label0 */ for (i = 0; i < nd_region->ndr_mappings; i++) { - struct nd_mapping *nd_mapping = &nd_region->mapping[i]; - struct nd_namespace_label *label0 = nd_mapping->labels[0]; + struct nd_namespace_label *label0; + + nd_mapping = &nd_region->mapping[i]; + label_ent = list_first_entry_or_null(&nd_mapping->labels, + typeof(*label_ent), list); + label0 = label_ent ? label_ent->label : 0; + + if (!label0) { + WARN_ON(1); + continue; + } size += __le64_to_cpu(label0->rawsize); if (__le16_to_cpu(label0->position) != 0) @@ -1654,10 +1791,11 @@ static int find_pmem_label_set(struct nd_region *nd_region, goto err; } - nd_namespace_pmem_set_size(nd_region, nspm, size); + nd_namespace_pmem_set_resource(nd_region, nspm, size); - return 0; + return dev; err: + namespace_pmem_release(dev); switch (rc) { case -EINVAL: dev_dbg(&nd_region->dev, "%s: invalid label(s)\n", __func__); @@ -1670,55 +1808,7 @@ static int find_pmem_label_set(struct nd_region *nd_region, __func__, rc); break; } - return rc; -} - -static struct device **create_namespace_pmem(struct nd_region *nd_region) -{ - struct nd_namespace_pmem *nspm; - struct device *dev, **devs; - struct resource *res; - int rc; - - nspm = kzalloc(sizeof(*nspm), GFP_KERNEL); - if (!nspm) - return NULL; - - dev = &nspm->nsio.common.dev; - dev->type = &namespace_pmem_device_type; - dev->parent = &nd_region->dev; - res = &nspm->nsio.res; - res->name = dev_name(&nd_region->dev); - res->flags = IORESOURCE_MEM; - rc = find_pmem_label_set(nd_region, nspm); - if (rc == -ENODEV) { - int i; - - /* Pass, try to permit namespace creation... */ - for (i = 0; i < nd_region->ndr_mappings; i++) { - struct nd_mapping *nd_mapping = &nd_region->mapping[i]; - - kfree(nd_mapping->labels); - nd_mapping->labels = NULL; - } - - /* Publish a zero-sized namespace for userspace to configure. */ - nd_namespace_pmem_set_size(nd_region, nspm, 0); - - rc = 0; - } else if (rc) - goto err; - - devs = kcalloc(2, sizeof(struct device *), GFP_KERNEL); - if (!devs) - goto err; - - devs[0] = dev; - return devs; - - err: - namespace_pmem_release(&nspm->nsio.common.dev); - return NULL; + return ERR_PTR(rc); } struct resource *nsblk_add_resource(struct nd_region *nd_region, @@ -1770,16 +1860,58 @@ static struct device *nd_namespace_blk_create(struct nd_region *nd_region) return &nsblk->common.dev; } -void nd_region_create_blk_seed(struct nd_region *nd_region) +static struct device *nd_namespace_pmem_create(struct nd_region *nd_region) +{ + struct nd_namespace_pmem *nspm; + struct resource *res; + struct device *dev; + + if (!is_nd_pmem(&nd_region->dev)) + return NULL; + + nspm = kzalloc(sizeof(*nspm), GFP_KERNEL); + if (!nspm) + return NULL; + + dev = &nspm->nsio.common.dev; + dev->type = &namespace_pmem_device_type; + dev->parent = &nd_region->dev; + res = &nspm->nsio.res; + res->name = dev_name(&nd_region->dev); + res->flags = IORESOURCE_MEM; + + nspm->id = ida_simple_get(&nd_region->ns_ida, 0, 0, GFP_KERNEL); + if (nspm->id < 0) { + kfree(nspm); + return NULL; + } + dev_set_name(dev, "namespace%d.%d", nd_region->id, nspm->id); + dev->parent = &nd_region->dev; + dev->groups = nd_namespace_attribute_groups; + nd_namespace_pmem_set_resource(nd_region, nspm, 0); + + return dev; +} + +void nd_region_create_ns_seed(struct nd_region *nd_region) { WARN_ON(!is_nvdimm_bus_locked(&nd_region->dev)); - nd_region->ns_seed = nd_namespace_blk_create(nd_region); + + if (nd_region_to_nstype(nd_region) == ND_DEVICE_NAMESPACE_IO) + return; + + if (is_nd_blk(&nd_region->dev)) + nd_region->ns_seed = nd_namespace_blk_create(nd_region); + else + nd_region->ns_seed = nd_namespace_pmem_create(nd_region); + /* * Seed creation failures are not fatal, provisioning is simply * disabled until memory becomes available */ if (!nd_region->ns_seed) - dev_err(&nd_region->dev, "failed to create blk namespace\n"); + dev_err(&nd_region->dev, "failed to create %s namespace\n", + is_nd_blk(&nd_region->dev) ? "blk" : "pmem"); else nd_device_register(nd_region->ns_seed); } @@ -1820,43 +1952,137 @@ void nd_region_create_btt_seed(struct nd_region *nd_region) dev_err(&nd_region->dev, "failed to create btt namespace\n"); } -static struct device **create_namespace_blk(struct nd_region *nd_region) +static int add_namespace_resource(struct nd_region *nd_region, + struct nd_namespace_label *nd_label, struct device **devs, + int count) { struct nd_mapping *nd_mapping = &nd_region->mapping[0]; - struct nd_namespace_label *nd_label; - struct device *dev, **devs = NULL; + struct nvdimm_drvdata *ndd = to_ndd(nd_mapping); + int i; + + for (i = 0; i < count; i++) { + u8 *uuid = namespace_to_uuid(devs[i]); + struct resource *res; + + if (IS_ERR_OR_NULL(uuid)) { + WARN_ON(1); + continue; + } + + if (memcmp(uuid, nd_label->uuid, NSLABEL_UUID_LEN) != 0) + continue; + if (is_namespace_blk(devs[i])) { + res = nsblk_add_resource(nd_region, ndd, + to_nd_namespace_blk(devs[i]), + __le64_to_cpu(nd_label->dpa)); + if (!res) + return -ENXIO; + nd_dbg_dpa(nd_region, ndd, res, "%d assign\n", count); + } else { + dev_err(&nd_region->dev, + "error: conflicting extents for uuid: %pUb\n", + nd_label->uuid); + return -ENXIO; + } + break; + } + + return i; +} + +struct device *create_namespace_blk(struct nd_region *nd_region, + struct nd_namespace_label *nd_label, int count) +{ + + struct nd_mapping *nd_mapping = &nd_region->mapping[0]; + struct nvdimm_drvdata *ndd = to_ndd(nd_mapping); struct nd_namespace_blk *nsblk; - struct nvdimm_drvdata *ndd; - int i, l, count = 0; + char *name[NSLABEL_NAME_LEN]; + struct device *dev = NULL; struct resource *res; - if (nd_region->ndr_mappings == 0) - return NULL; + nsblk = kzalloc(sizeof(*nsblk), GFP_KERNEL); + if (!nsblk) + return ERR_PTR(-ENOMEM); + dev = &nsblk->common.dev; + dev->type = &namespace_blk_device_type; + dev->parent = &nd_region->dev; + nsblk->id = -1; + nsblk->lbasize = __le64_to_cpu(nd_label->lbasize); + nsblk->uuid = kmemdup(nd_label->uuid, NSLABEL_UUID_LEN, + GFP_KERNEL); + if (!nsblk->uuid) + goto blk_err; + memcpy(name, nd_label->name, NSLABEL_NAME_LEN); + if (name[0]) + nsblk->alt_name = kmemdup(name, NSLABEL_NAME_LEN, + GFP_KERNEL); + res = nsblk_add_resource(nd_region, ndd, nsblk, + __le64_to_cpu(nd_label->dpa)); + if (!res) + goto blk_err; + nd_dbg_dpa(nd_region, ndd, res, "%d: assign\n", count); + return dev; + blk_err: + namespace_blk_release(dev); + return ERR_PTR(-ENXIO); +} + +static int cmp_dpa(const void *a, const void *b) +{ + const struct device *dev_a = *(const struct device **) a; + const struct device *dev_b = *(const struct device **) b; + struct nd_namespace_blk *nsblk_a, *nsblk_b; + struct nd_namespace_pmem *nspm_a, *nspm_b; + + if (is_namespace_io(dev_a)) + return 0; + + if (is_namespace_blk(dev_a)) { + nsblk_a = to_nd_namespace_blk(dev_a); + nsblk_b = to_nd_namespace_blk(dev_b); + + return memcmp(&nsblk_a->res[0]->start, &nsblk_b->res[0]->start, + sizeof(resource_size_t)); + } + + nspm_a = to_nd_namespace_pmem(dev_a); + nspm_b = to_nd_namespace_pmem(dev_b); + + return memcmp(&nspm_a->nsio.res.start, &nspm_b->nsio.res.start, + sizeof(resource_size_t)); +} - ndd = to_ndd(nd_mapping); - for_each_label(l, nd_label, nd_mapping->labels) { - u32 flags = __le32_to_cpu(nd_label->flags); - char *name[NSLABEL_NAME_LEN]; +static struct device **scan_labels(struct nd_region *nd_region) +{ + int i, count = 0; + struct device *dev, **devs = NULL; + struct nd_label_ent *label_ent, *e; + struct nd_mapping *nd_mapping = &nd_region->mapping[0]; + resource_size_t map_end = nd_mapping->start + nd_mapping->size - 1; + + /* "safe" because create_namespace_pmem() might list_move() label_ent */ + list_for_each_entry_safe(label_ent, e, &nd_mapping->labels, list) { + struct nd_namespace_label *nd_label = label_ent->label; struct device **__devs; + u32 flags; - if (flags & NSLABEL_FLAG_LOCAL) - /* pass */; + if (!nd_label) + continue; + flags = __le32_to_cpu(nd_label->flags); + if (is_nd_blk(&nd_region->dev) + == !!(flags & NSLABEL_FLAG_LOCAL)) + /* pass, region matches label type */; else continue; - for (i = 0; i < count; i++) { - nsblk = to_nd_namespace_blk(devs[i]); - if (memcmp(nsblk->uuid, nd_label->uuid, - NSLABEL_UUID_LEN) == 0) { - res = nsblk_add_resource(nd_region, ndd, nsblk, - __le64_to_cpu(nd_label->dpa)); - if (!res) - goto err; - nd_dbg_dpa(nd_region, ndd, res, "%s assign\n", - dev_name(&nsblk->common.dev)); - break; - } - } + /* skip labels that describe extents outside of the region */ + if (nd_label->dpa < nd_mapping->start || nd_label->dpa > map_end) + continue; + + i = add_namespace_resource(nd_region, nd_label, devs, count); + if (i < 0) + goto err; if (i < count) continue; __devs = kcalloc(count + 2, sizeof(dev), GFP_KERNEL); @@ -1866,67 +2092,126 @@ static struct device **create_namespace_blk(struct nd_region *nd_region) kfree(devs); devs = __devs; - nsblk = kzalloc(sizeof(*nsblk), GFP_KERNEL); - if (!nsblk) - goto err; - dev = &nsblk->common.dev; - dev->type = &namespace_blk_device_type; - dev->parent = &nd_region->dev; - dev_set_name(dev, "namespace%d.%d", nd_region->id, count); - devs[count++] = dev; - nsblk->id = -1; - nsblk->lbasize = __le64_to_cpu(nd_label->lbasize); - nsblk->uuid = kmemdup(nd_label->uuid, NSLABEL_UUID_LEN, - GFP_KERNEL); - if (!nsblk->uuid) - goto err; - memcpy(name, nd_label->name, NSLABEL_NAME_LEN); - if (name[0]) - nsblk->alt_name = kmemdup(name, NSLABEL_NAME_LEN, - GFP_KERNEL); - res = nsblk_add_resource(nd_region, ndd, nsblk, - __le64_to_cpu(nd_label->dpa)); - if (!res) - goto err; - nd_dbg_dpa(nd_region, ndd, res, "%s assign\n", - dev_name(&nsblk->common.dev)); + if (is_nd_blk(&nd_region->dev)) { + dev = create_namespace_blk(nd_region, nd_label, count); + if (IS_ERR(dev)) + goto err; + devs[count++] = dev; + } else { + dev = create_namespace_pmem(nd_region, nd_label); + if (IS_ERR(dev)) { + switch (PTR_ERR(dev)) { + case -EAGAIN: + /* skip invalid labels */ + continue; + case -ENODEV: + /* fallthrough to seed creation */ + break; + default: + goto err; + } + } else + devs[count++] = dev; + } } - dev_dbg(&nd_region->dev, "%s: discovered %d blk namespace%s\n", - __func__, count, count == 1 ? "" : "s"); + dev_dbg(&nd_region->dev, "%s: discovered %d %s namespace%s\n", + __func__, count, is_nd_blk(&nd_region->dev) + ? "blk" : "pmem", count == 1 ? "" : "s"); if (count == 0) { /* Publish a zero-sized namespace for userspace to configure. */ - for (i = 0; i < nd_region->ndr_mappings; i++) { - struct nd_mapping *nd_mapping = &nd_region->mapping[i]; - - kfree(nd_mapping->labels); - nd_mapping->labels = NULL; - } + nd_mapping_free_labels(nd_mapping); devs = kcalloc(2, sizeof(dev), GFP_KERNEL); if (!devs) goto err; - nsblk = kzalloc(sizeof(*nsblk), GFP_KERNEL); - if (!nsblk) - goto err; - dev = &nsblk->common.dev; - dev->type = &namespace_blk_device_type; + if (is_nd_blk(&nd_region->dev)) { + struct nd_namespace_blk *nsblk; + + nsblk = kzalloc(sizeof(*nsblk), GFP_KERNEL); + if (!nsblk) + goto err; + dev = &nsblk->common.dev; + dev->type = &namespace_blk_device_type; + } else { + struct nd_namespace_pmem *nspm; + + nspm = kzalloc(sizeof(*nspm), GFP_KERNEL); + if (!nspm) + goto err; + dev = &nspm->nsio.common.dev; + dev->type = &namespace_pmem_device_type; + nd_namespace_pmem_set_resource(nd_region, nspm, 0); + } dev->parent = &nd_region->dev; devs[count++] = dev; + } else if (is_nd_pmem(&nd_region->dev)) { + /* clean unselected labels */ + for (i = 0; i < nd_region->ndr_mappings; i++) { + struct list_head *l, *e; + LIST_HEAD(list); + int j; + + nd_mapping = &nd_region->mapping[i]; + if (list_empty(&nd_mapping->labels)) { + WARN_ON(1); + continue; + } + + j = count; + list_for_each_safe(l, e, &nd_mapping->labels) { + if (!j--) + break; + list_move_tail(l, &list); + } + nd_mapping_free_labels(nd_mapping); + list_splice_init(&list, &nd_mapping->labels); + } } + if (count > 1) + sort(devs, count, sizeof(struct device *), cmp_dpa, NULL); + return devs; -err: - for (i = 0; i < count; i++) { - nsblk = to_nd_namespace_blk(devs[i]); - namespace_blk_release(&nsblk->common.dev); - } + err: + for (i = 0; devs[i]; i++) + if (is_nd_blk(&nd_region->dev)) + namespace_blk_release(devs[i]); + else + namespace_pmem_release(devs[i]); kfree(devs); return NULL; } +static struct device **create_namespaces(struct nd_region *nd_region) +{ + struct nd_mapping *nd_mapping = &nd_region->mapping[0]; + struct device **devs; + int i; + + if (nd_region->ndr_mappings == 0) + return NULL; + + /* lock down all mappings while we scan labels */ + for (i = 0; i < nd_region->ndr_mappings; i++) { + nd_mapping = &nd_region->mapping[i]; + mutex_lock_nested(&nd_mapping->lock, i); + } + + devs = scan_labels(nd_region); + + for (i = 0; i < nd_region->ndr_mappings; i++) { + int reverse = nd_region->ndr_mappings - 1 - i; + + nd_mapping = &nd_region->mapping[reverse]; + mutex_unlock(&nd_mapping->lock); + } + + return devs; +} + static int init_active_labels(struct nd_region *nd_region) { int i; @@ -1935,6 +2220,7 @@ static int init_active_labels(struct nd_region *nd_region) struct nd_mapping *nd_mapping = &nd_region->mapping[i]; struct nvdimm_drvdata *ndd = to_ndd(nd_mapping); struct nvdimm *nvdimm = nd_mapping->nvdimm; + struct nd_label_ent *label_ent; int count, j; /* @@ -1956,16 +2242,27 @@ static int init_active_labels(struct nd_region *nd_region) dev_dbg(ndd->dev, "%s: %d\n", __func__, count); if (!count) continue; - nd_mapping->labels = kcalloc(count + 1, sizeof(void *), - GFP_KERNEL); - if (!nd_mapping->labels) - return -ENOMEM; for (j = 0; j < count; j++) { struct nd_namespace_label *label; + label_ent = kzalloc(sizeof(*label_ent), GFP_KERNEL); + if (!label_ent) + break; label = nd_label_active(ndd, j); - nd_mapping->labels[j] = label; + label_ent->label = label; + + mutex_lock(&nd_mapping->lock); + list_add_tail(&label_ent->list, &nd_mapping->labels); + mutex_unlock(&nd_mapping->lock); } + + if (j >= count) + continue; + + mutex_lock(&nd_mapping->lock); + nd_mapping_free_labels(nd_mapping); + mutex_unlock(&nd_mapping->lock); + return -ENOMEM; } return 0; @@ -1990,10 +2287,8 @@ int nd_region_register_namespaces(struct nd_region *nd_region, int *err) devs = create_namespace_io(nd_region); break; case ND_DEVICE_NAMESPACE_PMEM: - devs = create_namespace_pmem(nd_region); - break; case ND_DEVICE_NAMESPACE_BLK: - devs = create_namespace_blk(nd_region); + devs = create_namespaces(nd_region); break; default: break; @@ -2014,6 +2309,13 @@ int nd_region_register_namespaces(struct nd_region *nd_region, int *err) id = ida_simple_get(&nd_region->ns_ida, 0, 0, GFP_KERNEL); nsblk->id = id; + } else if (type == ND_DEVICE_NAMESPACE_PMEM) { + struct nd_namespace_pmem *nspm; + + nspm = to_nd_namespace_pmem(dev); + id = ida_simple_get(&nd_region->ns_ida, 0, 0, + GFP_KERNEL); + nspm->id = id; } else id = i; diff --git a/drivers/nvdimm/nd-core.h b/drivers/nvdimm/nd-core.h index 38ce6bbbc170..8623e57c2ce3 100644 --- a/drivers/nvdimm/nd-core.h +++ b/drivers/nvdimm/nd-core.h @@ -44,6 +44,23 @@ struct nvdimm { struct resource *flush_wpq; }; +/** + * struct blk_alloc_info - tracking info for BLK dpa scanning + * @nd_mapping: blk region mapping boundaries + * @available: decremented in alias_dpa_busy as aliased PMEM is scanned + * @busy: decremented in blk_dpa_busy to account for ranges already + * handled by alias_dpa_busy + * @res: alias_dpa_busy interprets this a free space range that needs to + * be truncated to the valid BLK allocation starting DPA, blk_dpa_busy + * treats it as a busy range that needs the aliased PMEM ranges + * truncated. + */ +struct blk_alloc_info { + struct nd_mapping *nd_mapping; + resource_size_t available, busy; + struct resource *res; +}; + bool is_nvdimm(struct device *dev); bool is_nd_pmem(struct device *dev); bool is_nd_blk(struct device *dev); @@ -54,7 +71,7 @@ void nvdimm_devs_exit(void); void nd_region_devs_exit(void); void nd_region_probe_success(struct nvdimm_bus *nvdimm_bus, struct device *dev); struct nd_region; -void nd_region_create_blk_seed(struct nd_region *nd_region); +void nd_region_create_ns_seed(struct nd_region *nd_region); void nd_region_create_btt_seed(struct nd_region *nd_region); void nd_region_create_pfn_seed(struct nd_region *nd_region); void nd_region_create_dax_seed(struct nd_region *nd_region); @@ -73,13 +90,14 @@ bool nd_is_uuid_unique(struct device *dev, u8 *uuid); struct nd_region; struct nvdimm_drvdata; struct nd_mapping; +void nd_mapping_free_labels(struct nd_mapping *nd_mapping); resource_size_t nd_pmem_available_dpa(struct nd_region *nd_region, struct nd_mapping *nd_mapping, resource_size_t *overlap); -resource_size_t nd_blk_available_dpa(struct nd_mapping *nd_mapping); +resource_size_t nd_blk_available_dpa(struct nd_region *nd_region); resource_size_t nd_region_available_dpa(struct nd_region *nd_region); resource_size_t nvdimm_allocated_dpa(struct nvdimm_drvdata *ndd, struct nd_label_id *label_id); -struct nd_mapping; +int alias_dpa_busy(struct device *dev, void *data); struct resource *nsblk_add_resource(struct nd_region *nd_region, struct nvdimm_drvdata *ndd, struct nd_namespace_blk *nsblk, resource_size_t start); diff --git a/drivers/nvdimm/nd.h b/drivers/nvdimm/nd.h index 0b78a8211f4a..d3b2fca8deec 100644 --- a/drivers/nvdimm/nd.h +++ b/drivers/nvdimm/nd.h @@ -101,9 +101,6 @@ static inline struct nd_namespace_index *to_next_namespace_index( (unsigned long long) (res ? resource_size(res) : 0), \ (unsigned long long) (res ? res->start : 0), ##arg) -#define for_each_label(l, label, labels) \ - for (l = 0; (label = labels ? labels[l] : NULL); l++) - #define for_each_dpa_resource(ndd, res) \ for (res = (ndd)->dpa.child; res; res = res->sibling) @@ -116,6 +113,31 @@ struct nd_percpu_lane { spinlock_t lock; }; +struct nd_label_ent { + struct list_head list; + struct nd_namespace_label *label; +}; + +enum nd_mapping_lock_class { + ND_MAPPING_CLASS0, + ND_MAPPING_UUID_SCAN, +}; + +struct nd_mapping { + struct nvdimm *nvdimm; + u64 start; + u64 size; + struct list_head labels; + struct mutex lock; + /* + * @ndd is for private use at region enable / disable time for + * get_ndd() + put_ndd(), all other nd_mapping to ndd + * conversions use to_ndd() which respects enabled state of the + * nvdimm. + */ + struct nvdimm_drvdata *ndd; +}; + struct nd_region { struct device dev; struct ida ns_ida; @@ -209,6 +231,7 @@ void nvdimm_exit(void); void nd_region_exit(void); struct nvdimm; struct nvdimm_drvdata *to_ndd(struct nd_mapping *nd_mapping); +int nvdimm_check_config_data(struct device *dev); int nvdimm_init_nsarea(struct nvdimm_drvdata *ndd); int nvdimm_init_config_data(struct nvdimm_drvdata *ndd); int nvdimm_set_config_data(struct nvdimm_drvdata *ndd, size_t offset, diff --git a/drivers/nvdimm/pmem.c b/drivers/nvdimm/pmem.c index 571a6c7ee2fc..42b3a8217073 100644 --- a/drivers/nvdimm/pmem.c +++ b/drivers/nvdimm/pmem.c @@ -66,13 +66,32 @@ static void pmem_clear_poison(struct pmem_device *pmem, phys_addr_t offset, invalidate_pmem(pmem->virt_addr + offset, len); } +static void write_pmem(void *pmem_addr, struct page *page, + unsigned int off, unsigned int len) +{ + void *mem = kmap_atomic(page); + + memcpy_to_pmem(pmem_addr, mem + off, len); + kunmap_atomic(mem); +} + +static int read_pmem(struct page *page, unsigned int off, + void *pmem_addr, unsigned int len) +{ + int rc; + void *mem = kmap_atomic(page); + + rc = memcpy_from_pmem(mem + off, pmem_addr, len); + kunmap_atomic(mem); + return rc; +} + static int pmem_do_bvec(struct pmem_device *pmem, struct page *page, unsigned int len, unsigned int off, bool is_write, sector_t sector) { int rc = 0; bool bad_pmem = false; - void *mem = kmap_atomic(page); phys_addr_t pmem_off = sector * 512 + pmem->data_offset; void *pmem_addr = pmem->virt_addr + pmem_off; @@ -83,7 +102,7 @@ static int pmem_do_bvec(struct pmem_device *pmem, struct page *page, if (unlikely(bad_pmem)) rc = -EIO; else { - rc = memcpy_from_pmem(mem + off, pmem_addr, len); + rc = read_pmem(page, off, pmem_addr, len); flush_dcache_page(page); } } else { @@ -102,14 +121,13 @@ static int pmem_do_bvec(struct pmem_device *pmem, struct page *page, * after clear poison. */ flush_dcache_page(page); - memcpy_to_pmem(pmem_addr, mem + off, len); + write_pmem(pmem_addr, page, off, len); if (unlikely(bad_pmem)) { pmem_clear_poison(pmem, pmem_off, len); - memcpy_to_pmem(pmem_addr, mem + off, len); + write_pmem(pmem_addr, page, off, len); } } - kunmap_atomic(mem); return rc; } diff --git a/drivers/nvdimm/region_devs.c b/drivers/nvdimm/region_devs.c index f9d58c2b5341..6af5e629140c 100644 --- a/drivers/nvdimm/region_devs.c +++ b/drivers/nvdimm/region_devs.c @@ -313,9 +313,8 @@ resource_size_t nd_region_available_dpa(struct nd_region *nd_region) blk_max_overlap = overlap; goto retry; } - } else if (is_nd_blk(&nd_region->dev)) { - available += nd_blk_available_dpa(nd_mapping); - } + } else if (is_nd_blk(&nd_region->dev)) + available += nd_blk_available_dpa(nd_region); } return available; @@ -506,6 +505,17 @@ u64 nd_region_interleave_set_cookie(struct nd_region *nd_region) return 0; } +void nd_mapping_free_labels(struct nd_mapping *nd_mapping) +{ + struct nd_label_ent *label_ent, *e; + + WARN_ON(!mutex_is_locked(&nd_mapping->lock)); + list_for_each_entry_safe(label_ent, e, &nd_mapping->labels, list) { + list_del(&label_ent->list); + kfree(label_ent); + } +} + /* * Upon successful probe/remove, take/release a reference on the * associated interleave set (if present), and plant new btt + namespace @@ -526,8 +536,10 @@ static void nd_region_notify_driver_action(struct nvdimm_bus *nvdimm_bus, struct nvdimm_drvdata *ndd = nd_mapping->ndd; struct nvdimm *nvdimm = nd_mapping->nvdimm; - kfree(nd_mapping->labels); - nd_mapping->labels = NULL; + mutex_lock(&nd_mapping->lock); + nd_mapping_free_labels(nd_mapping); + mutex_unlock(&nd_mapping->lock); + put_ndd(ndd); nd_mapping->ndd = NULL; if (ndd) @@ -537,11 +549,12 @@ static void nd_region_notify_driver_action(struct nvdimm_bus *nvdimm_bus, if (is_nd_pmem(dev)) return; } - if (dev->parent && is_nd_blk(dev->parent) && probe) { + if (dev->parent && (is_nd_blk(dev->parent) || is_nd_pmem(dev->parent)) + && probe) { nd_region = to_nd_region(dev->parent); nvdimm_bus_lock(dev); if (nd_region->ns_seed == dev) - nd_region_create_blk_seed(nd_region); + nd_region_create_ns_seed(nd_region); nvdimm_bus_unlock(dev); } if (is_nd_btt(dev) && probe) { @@ -551,23 +564,30 @@ static void nd_region_notify_driver_action(struct nvdimm_bus *nvdimm_bus, nvdimm_bus_lock(dev); if (nd_region->btt_seed == dev) nd_region_create_btt_seed(nd_region); - if (nd_region->ns_seed == &nd_btt->ndns->dev && - is_nd_blk(dev->parent)) - nd_region_create_blk_seed(nd_region); + if (nd_region->ns_seed == &nd_btt->ndns->dev) + nd_region_create_ns_seed(nd_region); nvdimm_bus_unlock(dev); } if (is_nd_pfn(dev) && probe) { + struct nd_pfn *nd_pfn = to_nd_pfn(dev); + nd_region = to_nd_region(dev->parent); nvdimm_bus_lock(dev); if (nd_region->pfn_seed == dev) nd_region_create_pfn_seed(nd_region); + if (nd_region->ns_seed == &nd_pfn->ndns->dev) + nd_region_create_ns_seed(nd_region); nvdimm_bus_unlock(dev); } if (is_nd_dax(dev) && probe) { + struct nd_dax *nd_dax = to_nd_dax(dev); + nd_region = to_nd_region(dev->parent); nvdimm_bus_lock(dev); if (nd_region->dax_seed == dev) nd_region_create_dax_seed(nd_region); + if (nd_region->ns_seed == &nd_dax->nd_pfn.ndns->dev) + nd_region_create_ns_seed(nd_region); nvdimm_bus_unlock(dev); } } @@ -774,10 +794,10 @@ static struct nd_region *nd_region_create(struct nvdimm_bus *nvdimm_bus, int ro = 0; for (i = 0; i < ndr_desc->num_mappings; i++) { - struct nd_mapping *nd_mapping = &ndr_desc->nd_mapping[i]; - struct nvdimm *nvdimm = nd_mapping->nvdimm; + struct nd_mapping_desc *mapping = &ndr_desc->mapping[i]; + struct nvdimm *nvdimm = mapping->nvdimm; - if ((nd_mapping->start | nd_mapping->size) % SZ_4K) { + if ((mapping->start | mapping->size) % SZ_4K) { dev_err(&nvdimm_bus->dev, "%s: %s mapping%d is not 4K aligned\n", caller, dev_name(&nvdimm->dev), i); @@ -828,11 +848,15 @@ static struct nd_region *nd_region_create(struct nvdimm_bus *nvdimm_bus, ndl->count = 0; } - memcpy(nd_region->mapping, ndr_desc->nd_mapping, - sizeof(struct nd_mapping) * ndr_desc->num_mappings); for (i = 0; i < ndr_desc->num_mappings; i++) { - struct nd_mapping *nd_mapping = &ndr_desc->nd_mapping[i]; - struct nvdimm *nvdimm = nd_mapping->nvdimm; + struct nd_mapping_desc *mapping = &ndr_desc->mapping[i]; + struct nvdimm *nvdimm = mapping->nvdimm; + + nd_region->mapping[i].nvdimm = nvdimm; + nd_region->mapping[i].start = mapping->start; + nd_region->mapping[i].size = mapping->size; + INIT_LIST_HEAD(&nd_region->mapping[i].labels); + mutex_init(&nd_region->mapping[i].lock); get_device(&nvdimm->dev); } |