From 62232e45f4a265abb43f0acf16e58f5d0b6e1ec9 Mon Sep 17 00:00:00 2001 From: Dan Williams Date: Mon, 8 Jun 2015 14:27:06 -0400 Subject: libnvdimm: control (ioctl) messages for nvdimm_bus and nvdimm devices Most discovery/configuration of the nvdimm-subsystem is done via sysfs attributes. However, some nvdimm_bus instances, particularly the ACPI.NFIT bus, define a small set of messages that can be passed to the platform. For convenience we derive the initial libnvdimm-ioctl command formats directly from the NFIT DSM Interface Example formats. ND_CMD_SMART: media health and diagnostics ND_CMD_GET_CONFIG_SIZE: size of the label space ND_CMD_GET_CONFIG_DATA: read label space ND_CMD_SET_CONFIG_DATA: write label space ND_CMD_VENDOR: vendor-specific command passthrough ND_CMD_ARS_CAP: report address-range-scrubbing capabilities ND_CMD_ARS_START: initiate scrubbing ND_CMD_ARS_STATUS: report on scrubbing state ND_CMD_SMART_THRESHOLD: configure alarm thresholds for smart events If a platform later defines different commands than this set it is straightforward to extend support to those formats. Most of the commands target a specific dimm. However, the address-range-scrubbing commands target the bus. The 'commands' attribute in sysfs of an nvdimm_bus, or nvdimm, enumerate the supported commands for that object. Cc: Cc: Robert Moore Cc: Rafael J. Wysocki Reported-by: Nicholas Moulin Acked-by: Christoph Hellwig Signed-off-by: Dan Williams --- include/uapi/linux/Kbuild | 1 + include/uapi/linux/ndctl.h | 178 +++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 179 insertions(+) create mode 100644 include/uapi/linux/ndctl.h (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/Kbuild b/include/uapi/linux/Kbuild index 1a0006a76b00..200cc5ea2998 100644 --- a/include/uapi/linux/Kbuild +++ b/include/uapi/linux/Kbuild @@ -271,6 +271,7 @@ header-y += ncp_fs.h header-y += ncp.h header-y += ncp_mount.h header-y += ncp_no.h +header-y += ndctl.h header-y += neighbour.h header-y += netconf.h header-y += netdevice.h diff --git a/include/uapi/linux/ndctl.h b/include/uapi/linux/ndctl.h new file mode 100644 index 000000000000..ff13c23b26df --- /dev/null +++ b/include/uapi/linux/ndctl.h @@ -0,0 +1,178 @@ +/* + * Copyright (c) 2014-2015, Intel Corporation. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms and conditions of the GNU Lesser General Public License, + * version 2.1, as published by the Free Software Foundation. + * + * This program is distributed in the hope it will be useful, but WITHOUT ANY + * WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS + * FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License for + * more details. + */ +#ifndef __NDCTL_H__ +#define __NDCTL_H__ + +#include + +struct nd_cmd_smart { + __u32 status; + __u8 data[128]; +} __packed; + +struct nd_cmd_smart_threshold { + __u32 status; + __u8 data[8]; +} __packed; + +struct nd_cmd_dimm_flags { + __u32 status; + __u32 flags; +} __packed; + +struct nd_cmd_get_config_size { + __u32 status; + __u32 config_size; + __u32 max_xfer; +} __packed; + +struct nd_cmd_get_config_data_hdr { + __u32 in_offset; + __u32 in_length; + __u32 status; + __u8 out_buf[0]; +} __packed; + +struct nd_cmd_set_config_hdr { + __u32 in_offset; + __u32 in_length; + __u8 in_buf[0]; +} __packed; + +struct nd_cmd_vendor_hdr { + __u32 opcode; + __u32 in_length; + __u8 in_buf[0]; +} __packed; + +struct nd_cmd_vendor_tail { + __u32 status; + __u32 out_length; + __u8 out_buf[0]; +} __packed; + +struct nd_cmd_ars_cap { + __u64 address; + __u64 length; + __u32 status; + __u32 max_ars_out; +} __packed; + +struct nd_cmd_ars_start { + __u64 address; + __u64 length; + __u16 type; + __u8 reserved[6]; + __u32 status; +} __packed; + +struct nd_cmd_ars_status { + __u32 status; + __u32 out_length; + __u64 address; + __u64 length; + __u16 type; + __u32 num_records; + struct nd_ars_record { + __u32 handle; + __u32 flags; + __u64 err_address; + __u64 mask; + } __packed records[0]; +} __packed; + +enum { + ND_CMD_IMPLEMENTED = 0, + + /* bus commands */ + ND_CMD_ARS_CAP = 1, + ND_CMD_ARS_START = 2, + ND_CMD_ARS_STATUS = 3, + + /* per-dimm commands */ + ND_CMD_SMART = 1, + ND_CMD_SMART_THRESHOLD = 2, + ND_CMD_DIMM_FLAGS = 3, + ND_CMD_GET_CONFIG_SIZE = 4, + ND_CMD_GET_CONFIG_DATA = 5, + ND_CMD_SET_CONFIG_DATA = 6, + ND_CMD_VENDOR_EFFECT_LOG_SIZE = 7, + ND_CMD_VENDOR_EFFECT_LOG = 8, + ND_CMD_VENDOR = 9, +}; + +static inline const char *nvdimm_bus_cmd_name(unsigned cmd) +{ + static const char * const names[] = { + [ND_CMD_ARS_CAP] = "ars_cap", + [ND_CMD_ARS_START] = "ars_start", + [ND_CMD_ARS_STATUS] = "ars_status", + }; + + if (cmd < ARRAY_SIZE(names) && names[cmd]) + return names[cmd]; + return "unknown"; +} + +static inline const char *nvdimm_cmd_name(unsigned cmd) +{ + static const char * const names[] = { + [ND_CMD_SMART] = "smart", + [ND_CMD_SMART_THRESHOLD] = "smart_thresh", + [ND_CMD_DIMM_FLAGS] = "flags", + [ND_CMD_GET_CONFIG_SIZE] = "get_size", + [ND_CMD_GET_CONFIG_DATA] = "get_data", + [ND_CMD_SET_CONFIG_DATA] = "set_data", + [ND_CMD_VENDOR_EFFECT_LOG_SIZE] = "effect_size", + [ND_CMD_VENDOR_EFFECT_LOG] = "effect_log", + [ND_CMD_VENDOR] = "vendor", + }; + + if (cmd < ARRAY_SIZE(names) && names[cmd]) + return names[cmd]; + return "unknown"; +} + +#define ND_IOCTL 'N' + +#define ND_IOCTL_SMART _IOWR(ND_IOCTL, ND_CMD_SMART,\ + struct nd_cmd_smart) + +#define ND_IOCTL_SMART_THRESHOLD _IOWR(ND_IOCTL, ND_CMD_SMART_THRESHOLD,\ + struct nd_cmd_smart_threshold) + +#define ND_IOCTL_DIMM_FLAGS _IOWR(ND_IOCTL, ND_CMD_DIMM_FLAGS,\ + struct nd_cmd_dimm_flags) + +#define ND_IOCTL_GET_CONFIG_SIZE _IOWR(ND_IOCTL, ND_CMD_GET_CONFIG_SIZE,\ + struct nd_cmd_get_config_size) + +#define ND_IOCTL_GET_CONFIG_DATA _IOWR(ND_IOCTL, ND_CMD_GET_CONFIG_DATA,\ + struct nd_cmd_get_config_data_hdr) + +#define ND_IOCTL_SET_CONFIG_DATA _IOWR(ND_IOCTL, ND_CMD_SET_CONFIG_DATA,\ + struct nd_cmd_set_config_hdr) + +#define ND_IOCTL_VENDOR _IOWR(ND_IOCTL, ND_CMD_VENDOR,\ + struct nd_cmd_vendor_hdr) + +#define ND_IOCTL_ARS_CAP _IOWR(ND_IOCTL, ND_CMD_ARS_CAP,\ + struct nd_cmd_ars_cap) + +#define ND_IOCTL_ARS_START _IOWR(ND_IOCTL, ND_CMD_ARS_START,\ + struct nd_cmd_ars_start) + +#define ND_IOCTL_ARS_STATUS _IOWR(ND_IOCTL, ND_CMD_ARS_STATUS,\ + struct nd_cmd_ars_status) + +#endif /* __NDCTL_H__ */ -- cgit v1.2.3 From 4d88a97aa9e8cfa6460aab119c5da60ad2267423 Mon Sep 17 00:00:00 2001 From: Dan Williams Date: Sun, 31 May 2015 14:41:48 -0400 Subject: libnvdimm, nvdimm: dimm driver and base libnvdimm device-driver infrastructure * Implement the device-model infrastructure for loading modules and attaching drivers to nvdimm devices. This is a simple association of a nd-device-type number with a driver that has a bitmask of supported device types. To facilitate userspace bind/unbind operations 'modalias' and 'devtype', that also appear in the uevent, are added as generic sysfs attributes for all nvdimm devices. The reason for the device-type number is to support sub-types within a given parent devtype, be it a vendor-specific sub-type or otherwise. * The first consumer of this infrastructure is the driver for dimm devices. It simply uses control messages to retrieve and store the configuration-data image (label set) from each dimm. Note: nd_device_register() arranges for asynchronous registration of nvdimm bus devices by default. Cc: Greg KH Cc: Neil Brown Acked-by: Christoph Hellwig Tested-by: Toshi Kani Signed-off-by: Dan Williams --- drivers/acpi/nfit.c | 13 +++- drivers/nvdimm/Makefile | 1 + drivers/nvdimm/bus.c | 168 ++++++++++++++++++++++++++++++++++++++++++++- drivers/nvdimm/core.c | 43 +++++++++++- drivers/nvdimm/dimm.c | 92 +++++++++++++++++++++++++ drivers/nvdimm/dimm_devs.c | 136 ++++++++++++++++++++++++++++++++++-- drivers/nvdimm/nd-core.h | 6 +- drivers/nvdimm/nd.h | 36 ++++++++++ include/linux/libnvdimm.h | 2 + include/linux/nd.h | 39 +++++++++++ include/uapi/linux/ndctl.h | 6 ++ 11 files changed, 527 insertions(+), 15 deletions(-) create mode 100644 drivers/nvdimm/dimm.c create mode 100644 drivers/nvdimm/nd.h create mode 100644 include/linux/nd.h (limited to 'include/uapi/linux') diff --git a/drivers/acpi/nfit.c b/drivers/acpi/nfit.c index 9112a6210a4b..c4ccec1bc60b 100644 --- a/drivers/acpi/nfit.c +++ b/drivers/acpi/nfit.c @@ -18,6 +18,10 @@ #include #include "nfit.h" +static bool force_enable_dimms; +module_param(force_enable_dimms, bool, S_IRUGO|S_IWUSR); +MODULE_PARM_DESC(force_enable_dimms, "Ignore _STA (ACPI DIMM device) status"); + static u8 nfit_uuid[NFIT_UUID_MAX][16]; static const u8 *to_nfit_uuid(enum nfit_uuids id) @@ -633,6 +637,7 @@ static struct attribute_group acpi_nfit_dimm_attribute_group = { static const struct attribute_group *acpi_nfit_dimm_attribute_groups[] = { &nvdimm_attribute_group, + &nd_device_attribute_group, &acpi_nfit_dimm_attribute_group, NULL, }; @@ -669,7 +674,7 @@ static int acpi_nfit_add_dimm(struct acpi_nfit_desc *acpi_desc, if (!adev_dimm) { dev_err(dev, "no ACPI.NFIT device with _ADR %#x, disabling...\n", device_handle); - return -ENODEV; + return force_enable_dimms ? 0 : -ENODEV; } status = acpi_evaluate_integer(adev_dimm->handle, "_STA", NULL, &sta); @@ -690,12 +695,13 @@ static int acpi_nfit_add_dimm(struct acpi_nfit_desc *acpi_desc, if (acpi_check_dsm(adev_dimm->handle, uuid, 1, 1ULL << i)) set_bit(i, &nfit_mem->dsm_mask); - return rc; + return force_enable_dimms ? 0 : rc; } static int acpi_nfit_register_dimms(struct acpi_nfit_desc *acpi_desc) { struct nfit_mem *nfit_mem; + int dimm_count = 0; list_for_each_entry(nfit_mem, &acpi_desc->dimms, list) { struct nvdimm *nvdimm; @@ -729,9 +735,10 @@ static int acpi_nfit_register_dimms(struct acpi_nfit_desc *acpi_desc) return -ENOMEM; nfit_mem->nvdimm = nvdimm; + dimm_count++; } - return 0; + return nvdimm_bus_check_dimm_count(acpi_desc->nvdimm_bus, dimm_count); } static void acpi_nfit_init_dsms(struct acpi_nfit_desc *acpi_desc) diff --git a/drivers/nvdimm/Makefile b/drivers/nvdimm/Makefile index 5b68738ba406..d44b5c1fcd3b 100644 --- a/drivers/nvdimm/Makefile +++ b/drivers/nvdimm/Makefile @@ -3,3 +3,4 @@ obj-$(CONFIG_LIBNVDIMM) += libnvdimm.o libnvdimm-y := core.o libnvdimm-y += bus.o libnvdimm-y += dimm_devs.o +libnvdimm-y += dimm.o diff --git a/drivers/nvdimm/bus.c b/drivers/nvdimm/bus.c index 15f3a3ddc225..a0308f1872bf 100644 --- a/drivers/nvdimm/bus.c +++ b/drivers/nvdimm/bus.c @@ -16,19 +16,183 @@ #include #include #include +#include #include #include #include #include +#include #include "nd-core.h" +#include "nd.h" int nvdimm_major; static int nvdimm_bus_major; static struct class *nd_class; -struct bus_type nvdimm_bus_type = { +static int to_nd_device_type(struct device *dev) +{ + if (is_nvdimm(dev)) + return ND_DEVICE_DIMM; + + return 0; +} + +static int nvdimm_bus_uevent(struct device *dev, struct kobj_uevent_env *env) +{ + return add_uevent_var(env, "MODALIAS=" ND_DEVICE_MODALIAS_FMT, + to_nd_device_type(dev)); +} + +static int nvdimm_bus_match(struct device *dev, struct device_driver *drv) +{ + struct nd_device_driver *nd_drv = to_nd_device_driver(drv); + + return test_bit(to_nd_device_type(dev), &nd_drv->type); +} + +static int nvdimm_bus_probe(struct device *dev) +{ + struct nd_device_driver *nd_drv = to_nd_device_driver(dev->driver); + struct nvdimm_bus *nvdimm_bus = walk_to_nvdimm_bus(dev); + int rc; + + rc = nd_drv->probe(dev); + dev_dbg(&nvdimm_bus->dev, "%s.probe(%s) = %d\n", dev->driver->name, + dev_name(dev), rc); + return rc; +} + +static int nvdimm_bus_remove(struct device *dev) +{ + struct nd_device_driver *nd_drv = to_nd_device_driver(dev->driver); + struct nvdimm_bus *nvdimm_bus = walk_to_nvdimm_bus(dev); + int rc; + + rc = nd_drv->remove(dev); + dev_dbg(&nvdimm_bus->dev, "%s.remove(%s) = %d\n", dev->driver->name, + dev_name(dev), rc); + return rc; +} + +static struct bus_type nvdimm_bus_type = { .name = "nd", + .uevent = nvdimm_bus_uevent, + .match = nvdimm_bus_match, + .probe = nvdimm_bus_probe, + .remove = nvdimm_bus_remove, +}; + +static ASYNC_DOMAIN_EXCLUSIVE(nd_async_domain); + +void nd_synchronize(void) +{ + async_synchronize_full_domain(&nd_async_domain); +} +EXPORT_SYMBOL_GPL(nd_synchronize); + +static void nd_async_device_register(void *d, async_cookie_t cookie) +{ + struct device *dev = d; + + if (device_add(dev) != 0) { + dev_err(dev, "%s: failed\n", __func__); + put_device(dev); + } + put_device(dev); +} + +static void nd_async_device_unregister(void *d, async_cookie_t cookie) +{ + struct device *dev = d; + + device_unregister(dev); + put_device(dev); +} + +void nd_device_register(struct device *dev) +{ + dev->bus = &nvdimm_bus_type; + device_initialize(dev); + get_device(dev); + async_schedule_domain(nd_async_device_register, dev, + &nd_async_domain); +} +EXPORT_SYMBOL(nd_device_register); + +void nd_device_unregister(struct device *dev, enum nd_async_mode mode) +{ + switch (mode) { + case ND_ASYNC: + get_device(dev); + async_schedule_domain(nd_async_device_unregister, dev, + &nd_async_domain); + break; + case ND_SYNC: + nd_synchronize(); + device_unregister(dev); + break; + } +} +EXPORT_SYMBOL(nd_device_unregister); + +/** + * __nd_driver_register() - register a region or a namespace driver + * @nd_drv: driver to register + * @owner: automatically set by nd_driver_register() macro + * @mod_name: automatically set by nd_driver_register() macro + */ +int __nd_driver_register(struct nd_device_driver *nd_drv, struct module *owner, + const char *mod_name) +{ + struct device_driver *drv = &nd_drv->drv; + + if (!nd_drv->type) { + pr_debug("driver type bitmask not set (%pf)\n", + __builtin_return_address(0)); + return -EINVAL; + } + + if (!nd_drv->probe || !nd_drv->remove) { + pr_debug("->probe() and ->remove() must be specified\n"); + return -EINVAL; + } + + drv->bus = &nvdimm_bus_type; + drv->owner = owner; + drv->mod_name = mod_name; + + return driver_register(drv); +} +EXPORT_SYMBOL(__nd_driver_register); + +static ssize_t modalias_show(struct device *dev, struct device_attribute *attr, + char *buf) +{ + return sprintf(buf, ND_DEVICE_MODALIAS_FMT "\n", + to_nd_device_type(dev)); +} +static DEVICE_ATTR_RO(modalias); + +static ssize_t devtype_show(struct device *dev, struct device_attribute *attr, + char *buf) +{ + return sprintf(buf, "%s\n", dev->type->name); +} +static DEVICE_ATTR_RO(devtype); + +static struct attribute *nd_device_attributes[] = { + &dev_attr_modalias.attr, + &dev_attr_devtype.attr, + NULL, +}; + +/** + * nd_device_attribute_group - generic attributes for all devices on an nd bus + */ +struct attribute_group nd_device_attribute_group = { + .attrs = nd_device_attributes, }; +EXPORT_SYMBOL_GPL(nd_device_attribute_group); int nvdimm_bus_create_ndctl(struct nvdimm_bus *nvdimm_bus) { @@ -404,7 +568,7 @@ int __init nvdimm_bus_init(void) return rc; } -void __exit nvdimm_bus_exit(void) +void nvdimm_bus_exit(void) { class_destroy(nd_class); unregister_chrdev(nvdimm_bus_major, "ndctl"); diff --git a/drivers/nvdimm/core.c b/drivers/nvdimm/core.c index 1ce159095c52..50ab880f0dc0 100644 --- a/drivers/nvdimm/core.c +++ b/drivers/nvdimm/core.c @@ -18,6 +18,7 @@ #include #include #include "nd-core.h" +#include "nd.h" LIST_HEAD(nvdimm_bus_list); DEFINE_MUTEX(nvdimm_bus_list_mutex); @@ -98,8 +99,33 @@ static ssize_t provider_show(struct device *dev, } static DEVICE_ATTR_RO(provider); +static int flush_namespaces(struct device *dev, void *data) +{ + device_lock(dev); + device_unlock(dev); + return 0; +} + +static int flush_regions_dimms(struct device *dev, void *data) +{ + device_lock(dev); + device_unlock(dev); + device_for_each_child(dev, NULL, flush_namespaces); + return 0; +} + +static ssize_t wait_probe_show(struct device *dev, + struct device_attribute *attr, char *buf) +{ + nd_synchronize(); + device_for_each_child(dev, NULL, flush_regions_dimms); + return sprintf(buf, "1\n"); +} +static DEVICE_ATTR_RO(wait_probe); + static struct attribute *nvdimm_bus_attributes[] = { &dev_attr_commands.attr, + &dev_attr_wait_probe.attr, &dev_attr_provider.attr, NULL, }; @@ -161,7 +187,7 @@ static int child_unregister(struct device *dev, void *data) if (dev->class) /* pass */; else - device_unregister(dev); + nd_device_unregister(dev, ND_SYNC); return 0; } @@ -174,6 +200,7 @@ void nvdimm_bus_unregister(struct nvdimm_bus *nvdimm_bus) list_del_init(&nvdimm_bus->list); mutex_unlock(&nvdimm_bus_list_mutex); + nd_synchronize(); device_for_each_child(&nvdimm_bus->dev, NULL, child_unregister); nvdimm_bus_destroy_ndctl(nvdimm_bus); @@ -183,12 +210,24 @@ EXPORT_SYMBOL_GPL(nvdimm_bus_unregister); static __init int libnvdimm_init(void) { - return nvdimm_bus_init(); + int rc; + + rc = nvdimm_bus_init(); + if (rc) + return rc; + rc = nvdimm_init(); + if (rc) + goto err_dimm; + return 0; + err_dimm: + nvdimm_bus_exit(); + return rc; } static __exit void libnvdimm_exit(void) { WARN_ON(!list_empty(&nvdimm_bus_list)); + nvdimm_exit(); nvdimm_bus_exit(); } diff --git a/drivers/nvdimm/dimm.c b/drivers/nvdimm/dimm.c new file mode 100644 index 000000000000..28001a6ccd4e --- /dev/null +++ b/drivers/nvdimm/dimm.c @@ -0,0 +1,92 @@ +/* + * Copyright(c) 2013-2015 Intel Corporation. All rights reserved. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of version 2 of the GNU General Public License as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + */ +#include +#include +#include +#include +#include +#include +#include +#include +#include "nd.h" + +static void free_data(struct nvdimm_drvdata *ndd) +{ + if (!ndd) + return; + + if (ndd->data && is_vmalloc_addr(ndd->data)) + vfree(ndd->data); + else + kfree(ndd->data); + kfree(ndd); +} + +static int nvdimm_probe(struct device *dev) +{ + struct nvdimm_drvdata *ndd; + int rc; + + ndd = kzalloc(sizeof(*ndd), GFP_KERNEL); + if (!ndd) + return -ENOMEM; + + dev_set_drvdata(dev, ndd); + ndd->dev = dev; + + rc = nvdimm_init_nsarea(ndd); + if (rc) + goto err; + + rc = nvdimm_init_config_data(ndd); + if (rc) + goto err; + + dev_dbg(dev, "config data size: %d\n", ndd->nsarea.config_size); + + return 0; + + err: + free_data(ndd); + return rc; +} + +static int nvdimm_remove(struct device *dev) +{ + struct nvdimm_drvdata *ndd = dev_get_drvdata(dev); + + free_data(ndd); + + return 0; +} + +static struct nd_device_driver nvdimm_driver = { + .probe = nvdimm_probe, + .remove = nvdimm_remove, + .drv = { + .name = "nvdimm", + }, + .type = ND_DRIVER_DIMM, +}; + +int __init nvdimm_init(void) +{ + return nd_driver_register(&nvdimm_driver); +} + +void __exit nvdimm_exit(void) +{ + driver_unregister(&nvdimm_driver.drv); +} + +MODULE_ALIAS_ND_DEVICE(ND_DEVICE_DIMM); diff --git a/drivers/nvdimm/dimm_devs.c b/drivers/nvdimm/dimm_devs.c index c3dd7227d1bb..b3ae86f2e1da 100644 --- a/drivers/nvdimm/dimm_devs.c +++ b/drivers/nvdimm/dimm_devs.c @@ -11,6 +11,7 @@ * General Public License for more details. */ #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt +#include #include #include #include @@ -18,9 +19,115 @@ #include #include #include "nd-core.h" +#include "nd.h" static DEFINE_IDA(dimm_ida); +/* + * Retrieve bus and dimm handle and return if this bus supports + * get_config_data commands + */ +static int __validate_dimm(struct nvdimm_drvdata *ndd) +{ + struct nvdimm *nvdimm; + + if (!ndd) + return -EINVAL; + + nvdimm = to_nvdimm(ndd->dev); + + if (!nvdimm->dsm_mask) + return -ENXIO; + if (!test_bit(ND_CMD_GET_CONFIG_DATA, nvdimm->dsm_mask)) + return -ENXIO; + + return 0; +} + +static int validate_dimm(struct nvdimm_drvdata *ndd) +{ + int rc = __validate_dimm(ndd); + + if (rc && ndd) + dev_dbg(ndd->dev, "%pf: %s error: %d\n", + __builtin_return_address(0), __func__, rc); + return rc; +} + +/** + * nvdimm_init_nsarea - determine the geometry of a dimm's namespace area + * @nvdimm: dimm to initialize + */ +int nvdimm_init_nsarea(struct nvdimm_drvdata *ndd) +{ + struct nd_cmd_get_config_size *cmd = &ndd->nsarea; + struct nvdimm_bus *nvdimm_bus = walk_to_nvdimm_bus(ndd->dev); + struct nvdimm_bus_descriptor *nd_desc; + int rc = validate_dimm(ndd); + + if (rc) + return rc; + + if (cmd->config_size) + return 0; /* already valid */ + + memset(cmd, 0, sizeof(*cmd)); + nd_desc = nvdimm_bus->nd_desc; + return nd_desc->ndctl(nd_desc, to_nvdimm(ndd->dev), + ND_CMD_GET_CONFIG_SIZE, cmd, sizeof(*cmd)); +} + +int nvdimm_init_config_data(struct nvdimm_drvdata *ndd) +{ + struct nvdimm_bus *nvdimm_bus = walk_to_nvdimm_bus(ndd->dev); + struct nd_cmd_get_config_data_hdr *cmd; + struct nvdimm_bus_descriptor *nd_desc; + int rc = validate_dimm(ndd); + u32 max_cmd_size, config_size; + size_t offset; + + if (rc) + return rc; + + if (ndd->data) + return 0; + + if (ndd->nsarea.status || ndd->nsarea.max_xfer == 0) + return -ENXIO; + + ndd->data = kmalloc(ndd->nsarea.config_size, GFP_KERNEL); + if (!ndd->data) + ndd->data = vmalloc(ndd->nsarea.config_size); + + if (!ndd->data) + return -ENOMEM; + + max_cmd_size = min_t(u32, PAGE_SIZE, ndd->nsarea.max_xfer); + cmd = kzalloc(max_cmd_size + sizeof(*cmd), GFP_KERNEL); + if (!cmd) + return -ENOMEM; + + nd_desc = nvdimm_bus->nd_desc; + for (config_size = ndd->nsarea.config_size, offset = 0; + config_size; config_size -= cmd->in_length, + offset += cmd->in_length) { + cmd->in_length = min(config_size, max_cmd_size); + cmd->in_offset = offset; + rc = nd_desc->ndctl(nd_desc, to_nvdimm(ndd->dev), + ND_CMD_GET_CONFIG_DATA, cmd, + cmd->in_length + sizeof(*cmd)); + if (rc || cmd->status) { + rc = -ENXIO; + break; + } + memcpy(ndd->data + offset, cmd->out_buf, cmd->in_length); + } + dev_dbg(ndd->dev, "%s: len: %zu rc: %d\n", __func__, offset, rc); + kfree(cmd); + + return rc; +} + static void nvdimm_release(struct device *dev) { struct nvdimm *nvdimm = to_nvdimm(dev); @@ -111,14 +218,33 @@ struct nvdimm *nvdimm_create(struct nvdimm_bus *nvdimm_bus, void *provider_data, dev_set_name(dev, "nmem%d", nvdimm->id); dev->parent = &nvdimm_bus->dev; dev->type = &nvdimm_device_type; - dev->bus = &nvdimm_bus_type; dev->devt = MKDEV(nvdimm_major, nvdimm->id); dev->groups = groups; - if (device_register(dev) != 0) { - put_device(dev); - return NULL; - } + nd_device_register(dev); return nvdimm; } EXPORT_SYMBOL_GPL(nvdimm_create); + +static int count_dimms(struct device *dev, void *c) +{ + int *count = c; + + if (is_nvdimm(dev)) + (*count)++; + return 0; +} + +int nvdimm_bus_check_dimm_count(struct nvdimm_bus *nvdimm_bus, int dimm_count) +{ + int count = 0; + /* Flush any possible dimm registration failures */ + nd_synchronize(); + + device_for_each_child(&nvdimm_bus->dev, &count, count_dimms); + dev_dbg(&nvdimm_bus->dev, "%s: count: %d\n", __func__, count); + if (count != dimm_count) + return -ENXIO; + return 0; +} +EXPORT_SYMBOL_GPL(nvdimm_bus_check_dimm_count); diff --git a/drivers/nvdimm/nd-core.h b/drivers/nvdimm/nd-core.h index 59528b3c9de8..f2004b790874 100644 --- a/drivers/nvdimm/nd-core.h +++ b/drivers/nvdimm/nd-core.h @@ -17,7 +17,6 @@ extern struct list_head nvdimm_bus_list; extern struct mutex nvdimm_bus_list_mutex; -extern struct bus_type nvdimm_bus_type; extern int nvdimm_major; struct nvdimm_bus { @@ -35,10 +34,11 @@ struct nvdimm { int id; }; -bool is_nvdimm(struct device *dev); struct nvdimm_bus *walk_to_nvdimm_bus(struct device *nd_dev); int __init nvdimm_bus_init(void); -void __exit nvdimm_bus_exit(void); +void nvdimm_bus_exit(void); int nvdimm_bus_create_ndctl(struct nvdimm_bus *nvdimm_bus); void nvdimm_bus_destroy_ndctl(struct nvdimm_bus *nvdimm_bus); +void nd_synchronize(void); +bool is_nvdimm(struct device *dev); #endif /* __ND_CORE_H__ */ diff --git a/drivers/nvdimm/nd.h b/drivers/nvdimm/nd.h new file mode 100644 index 000000000000..1f7f6ecab0fc --- /dev/null +++ b/drivers/nvdimm/nd.h @@ -0,0 +1,36 @@ +/* + * Copyright(c) 2013-2015 Intel Corporation. All rights reserved. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of version 2 of the GNU General Public License as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + */ +#ifndef __ND_H__ +#define __ND_H__ +#include +#include +#include + +struct nvdimm_drvdata { + struct device *dev; + struct nd_cmd_get_config_size nsarea; + void *data; +}; + +enum nd_async_mode { + ND_SYNC, + ND_ASYNC, +}; + +void nd_device_register(struct device *dev); +void nd_device_unregister(struct device *dev, enum nd_async_mode mode); +int __init nvdimm_init(void); +void nvdimm_exit(void); +int nvdimm_init_nsarea(struct nvdimm_drvdata *ndd); +int nvdimm_init_config_data(struct nvdimm_drvdata *ndd); +#endif /* __ND_H__ */ diff --git a/include/linux/libnvdimm.h b/include/linux/libnvdimm.h index a39235819af3..d3ebccf4ea8b 100644 --- a/include/linux/libnvdimm.h +++ b/include/linux/libnvdimm.h @@ -30,6 +30,7 @@ enum { extern struct attribute_group nvdimm_bus_attribute_group; extern struct attribute_group nvdimm_attribute_group; +extern struct attribute_group nd_device_attribute_group; struct nvdimm; struct nvdimm_bus_descriptor; @@ -71,4 +72,5 @@ u32 nd_cmd_in_size(struct nvdimm *nvdimm, int cmd, u32 nd_cmd_out_size(struct nvdimm *nvdimm, int cmd, const struct nd_cmd_desc *desc, int idx, const u32 *in_field, const u32 *out_field); +int nvdimm_bus_check_dimm_count(struct nvdimm_bus *nvdimm_bus, int dimm_count); #endif /* __LIBNVDIMM_H__ */ diff --git a/include/linux/nd.h b/include/linux/nd.h new file mode 100644 index 000000000000..e074f67e53a3 --- /dev/null +++ b/include/linux/nd.h @@ -0,0 +1,39 @@ +/* + * Copyright(c) 2013-2015 Intel Corporation. All rights reserved. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of version 2 of the GNU General Public License as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + */ +#ifndef __LINUX_ND_H__ +#define __LINUX_ND_H__ +#include +#include + +struct nd_device_driver { + struct device_driver drv; + unsigned long type; + int (*probe)(struct device *dev); + int (*remove)(struct device *dev); +}; + +static inline struct nd_device_driver *to_nd_device_driver( + struct device_driver *drv) +{ + return container_of(drv, struct nd_device_driver, drv); +} + +#define MODULE_ALIAS_ND_DEVICE(type) \ + MODULE_ALIAS("nd:t" __stringify(type) "*") +#define ND_DEVICE_MODALIAS_FMT "nd:t%d" + +int __must_check __nd_driver_register(struct nd_device_driver *nd_drv, + struct module *module, const char *mod_name); +#define nd_driver_register(driver) \ + __nd_driver_register(driver, THIS_MODULE, KBUILD_MODNAME) +#endif /* __LINUX_ND_H__ */ diff --git a/include/uapi/linux/ndctl.h b/include/uapi/linux/ndctl.h index ff13c23b26df..37640916d146 100644 --- a/include/uapi/linux/ndctl.h +++ b/include/uapi/linux/ndctl.h @@ -175,4 +175,10 @@ static inline const char *nvdimm_cmd_name(unsigned cmd) #define ND_IOCTL_ARS_STATUS _IOWR(ND_IOCTL, ND_CMD_ARS_STATUS,\ struct nd_cmd_ars_status) + +#define ND_DEVICE_DIMM 1 /* nd_dimm: container for "config data" */ + +enum nd_driver_flags { + ND_DRIVER_DIMM = 1 << ND_DEVICE_DIMM, +}; #endif /* __NDCTL_H__ */ -- cgit v1.2.3 From 3d88002e4a7bd40f355550284c6cd140e6fe29dc Mon Sep 17 00:00:00 2001 From: Dan Williams Date: Sun, 31 May 2015 15:02:11 -0400 Subject: libnvdimm: support for legacy (non-aliasing) nvdimms The libnvdimm region driver is an intermediary driver that translates non-volatile "region"s into "namespace" sub-devices that are surfaced by persistent memory block-device drivers (PMEM and BLK). ACPI 6 introduces the concept that a given nvdimm may simultaneously offer multiple access modes to its media through direct PMEM load/store access, or windowed BLK mode. Existing nvdimms mostly implement a PMEM interface, some offer a BLK-like mode, but never both as ACPI 6 defines. If an nvdimm is single interfaced, then there is no need for dimm metadata labels. For these devices we can take the region boundaries directly to create a child namespace device (nd_namespace_io). Acked-by: Christoph Hellwig Tested-by: Toshi Kani Signed-off-by: Dan Williams --- drivers/acpi/nfit.c | 1 + drivers/nvdimm/Makefile | 2 + drivers/nvdimm/bus.c | 26 ++++++++++ drivers/nvdimm/core.c | 44 ++++++++++++++-- drivers/nvdimm/dimm.c | 2 +- drivers/nvdimm/namespace_devs.c | 111 ++++++++++++++++++++++++++++++++++++++++ drivers/nvdimm/nd-core.h | 6 ++- drivers/nvdimm/nd.h | 13 +++++ drivers/nvdimm/region.c | 93 +++++++++++++++++++++++++++++++++ drivers/nvdimm/region_devs.c | 66 +++++++++++++++++++++++- include/linux/libnvdimm.h | 7 ++- include/linux/nd.h | 10 ++++ include/uapi/linux/ndctl.h | 10 ++++ 13 files changed, 383 insertions(+), 8 deletions(-) create mode 100644 drivers/nvdimm/namespace_devs.c create mode 100644 drivers/nvdimm/region.c (limited to 'include/uapi/linux') diff --git a/drivers/acpi/nfit.c b/drivers/acpi/nfit.c index 068f69d70c9e..ce290748fe36 100644 --- a/drivers/acpi/nfit.c +++ b/drivers/acpi/nfit.c @@ -780,6 +780,7 @@ static struct attribute_group acpi_nfit_region_attribute_group = { static const struct attribute_group *acpi_nfit_region_attribute_groups[] = { &nd_region_attribute_group, &nd_mapping_attribute_group, + &nd_device_attribute_group, &acpi_nfit_region_attribute_group, NULL, }; diff --git a/drivers/nvdimm/Makefile b/drivers/nvdimm/Makefile index 88afd0d849c3..af5e2760ddbd 100644 --- a/drivers/nvdimm/Makefile +++ b/drivers/nvdimm/Makefile @@ -5,3 +5,5 @@ libnvdimm-y += bus.o libnvdimm-y += dimm_devs.o libnvdimm-y += dimm.o libnvdimm-y += region_devs.o +libnvdimm-y += region.o +libnvdimm-y += namespace_devs.o diff --git a/drivers/nvdimm/bus.c b/drivers/nvdimm/bus.c index a0308f1872bf..4b77665a6cc8 100644 --- a/drivers/nvdimm/bus.c +++ b/drivers/nvdimm/bus.c @@ -13,6 +13,7 @@ #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt #include #include +#include #include #include #include @@ -33,6 +34,12 @@ static int to_nd_device_type(struct device *dev) { if (is_nvdimm(dev)) return ND_DEVICE_DIMM; + else if (is_nd_pmem(dev)) + return ND_DEVICE_REGION_PMEM; + else if (is_nd_blk(dev)) + return ND_DEVICE_REGION_BLK; + else if (is_nd_pmem(dev->parent) || is_nd_blk(dev->parent)) + return nd_region_to_nstype(to_nd_region(dev->parent)); return 0; } @@ -50,27 +57,46 @@ static int nvdimm_bus_match(struct device *dev, struct device_driver *drv) return test_bit(to_nd_device_type(dev), &nd_drv->type); } +static struct module *to_bus_provider(struct device *dev) +{ + /* pin bus providers while regions are enabled */ + if (is_nd_pmem(dev) || is_nd_blk(dev)) { + struct nvdimm_bus *nvdimm_bus = walk_to_nvdimm_bus(dev); + + return nvdimm_bus->module; + } + return NULL; +} + static int nvdimm_bus_probe(struct device *dev) { struct nd_device_driver *nd_drv = to_nd_device_driver(dev->driver); + struct module *provider = to_bus_provider(dev); struct nvdimm_bus *nvdimm_bus = walk_to_nvdimm_bus(dev); int rc; + if (!try_module_get(provider)) + return -ENXIO; + rc = nd_drv->probe(dev); dev_dbg(&nvdimm_bus->dev, "%s.probe(%s) = %d\n", dev->driver->name, dev_name(dev), rc); + if (rc != 0) + module_put(provider); return rc; } static int nvdimm_bus_remove(struct device *dev) { struct nd_device_driver *nd_drv = to_nd_device_driver(dev->driver); + struct module *provider = to_bus_provider(dev); struct nvdimm_bus *nvdimm_bus = walk_to_nvdimm_bus(dev); int rc; rc = nd_drv->remove(dev); dev_dbg(&nvdimm_bus->dev, "%s.remove(%s) = %d\n", dev->driver->name, dev_name(dev), rc); + module_put(provider); return rc; } diff --git a/drivers/nvdimm/core.c b/drivers/nvdimm/core.c index 50ab880f0dc0..1b6b15d11f54 100644 --- a/drivers/nvdimm/core.c +++ b/drivers/nvdimm/core.c @@ -24,6 +24,36 @@ LIST_HEAD(nvdimm_bus_list); DEFINE_MUTEX(nvdimm_bus_list_mutex); static DEFINE_IDA(nd_ida); +void nvdimm_bus_lock(struct device *dev) +{ + struct nvdimm_bus *nvdimm_bus = walk_to_nvdimm_bus(dev); + + if (!nvdimm_bus) + return; + mutex_lock(&nvdimm_bus->reconfig_mutex); +} +EXPORT_SYMBOL(nvdimm_bus_lock); + +void nvdimm_bus_unlock(struct device *dev) +{ + struct nvdimm_bus *nvdimm_bus = walk_to_nvdimm_bus(dev); + + if (!nvdimm_bus) + return; + mutex_unlock(&nvdimm_bus->reconfig_mutex); +} +EXPORT_SYMBOL(nvdimm_bus_unlock); + +bool is_nvdimm_bus_locked(struct device *dev) +{ + struct nvdimm_bus *nvdimm_bus = walk_to_nvdimm_bus(dev); + + if (!nvdimm_bus) + return false; + return mutex_is_locked(&nvdimm_bus->reconfig_mutex); +} +EXPORT_SYMBOL(is_nvdimm_bus_locked); + static void nvdimm_bus_release(struct device *dev) { struct nvdimm_bus *nvdimm_bus; @@ -135,8 +165,8 @@ struct attribute_group nvdimm_bus_attribute_group = { }; EXPORT_SYMBOL_GPL(nvdimm_bus_attribute_group); -struct nvdimm_bus *nvdimm_bus_register(struct device *parent, - struct nvdimm_bus_descriptor *nd_desc) +struct nvdimm_bus *__nvdimm_bus_register(struct device *parent, + struct nvdimm_bus_descriptor *nd_desc, struct module *module) { struct nvdimm_bus *nvdimm_bus; int rc; @@ -146,11 +176,13 @@ struct nvdimm_bus *nvdimm_bus_register(struct device *parent, return NULL; INIT_LIST_HEAD(&nvdimm_bus->list); nvdimm_bus->id = ida_simple_get(&nd_ida, 0, 0, GFP_KERNEL); + mutex_init(&nvdimm_bus->reconfig_mutex); if (nvdimm_bus->id < 0) { kfree(nvdimm_bus); return NULL; } nvdimm_bus->nd_desc = nd_desc; + nvdimm_bus->module = module; nvdimm_bus->dev.parent = parent; nvdimm_bus->dev.release = nvdimm_bus_release; nvdimm_bus->dev.groups = nd_desc->attr_groups; @@ -174,7 +206,7 @@ struct nvdimm_bus *nvdimm_bus_register(struct device *parent, put_device(&nvdimm_bus->dev); return NULL; } -EXPORT_SYMBOL_GPL(nvdimm_bus_register); +EXPORT_SYMBOL_GPL(__nvdimm_bus_register); static int child_unregister(struct device *dev, void *data) { @@ -218,7 +250,12 @@ static __init int libnvdimm_init(void) rc = nvdimm_init(); if (rc) goto err_dimm; + rc = nd_region_init(); + if (rc) + goto err_region; return 0; + err_region: + nvdimm_exit(); err_dimm: nvdimm_bus_exit(); return rc; @@ -227,6 +264,7 @@ static __init int libnvdimm_init(void) static __exit void libnvdimm_exit(void) { WARN_ON(!list_empty(&nvdimm_bus_list)); + nd_region_exit(); nvdimm_exit(); nvdimm_bus_exit(); } diff --git a/drivers/nvdimm/dimm.c b/drivers/nvdimm/dimm.c index 28001a6ccd4e..eb20fc2df32b 100644 --- a/drivers/nvdimm/dimm.c +++ b/drivers/nvdimm/dimm.c @@ -84,7 +84,7 @@ int __init nvdimm_init(void) return nd_driver_register(&nvdimm_driver); } -void __exit nvdimm_exit(void) +void nvdimm_exit(void) { driver_unregister(&nvdimm_driver.drv); } diff --git a/drivers/nvdimm/namespace_devs.c b/drivers/nvdimm/namespace_devs.c new file mode 100644 index 000000000000..4f653d1e61ad --- /dev/null +++ b/drivers/nvdimm/namespace_devs.c @@ -0,0 +1,111 @@ +/* + * Copyright(c) 2013-2015 Intel Corporation. All rights reserved. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of version 2 of the GNU General Public License as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + */ +#include +#include +#include +#include +#include "nd.h" + +static void namespace_io_release(struct device *dev) +{ + struct nd_namespace_io *nsio = to_nd_namespace_io(dev); + + kfree(nsio); +} + +static struct device_type namespace_io_device_type = { + .name = "nd_namespace_io", + .release = namespace_io_release, +}; + +static ssize_t nstype_show(struct device *dev, + struct device_attribute *attr, char *buf) +{ + struct nd_region *nd_region = to_nd_region(dev->parent); + + return sprintf(buf, "%d\n", nd_region_to_nstype(nd_region)); +} +static DEVICE_ATTR_RO(nstype); + +static struct attribute *nd_namespace_attributes[] = { + &dev_attr_nstype.attr, + NULL, +}; + +static struct attribute_group nd_namespace_attribute_group = { + .attrs = nd_namespace_attributes, +}; + +static const struct attribute_group *nd_namespace_attribute_groups[] = { + &nd_device_attribute_group, + &nd_namespace_attribute_group, + NULL, +}; + +static struct device **create_namespace_io(struct nd_region *nd_region) +{ + struct nd_namespace_io *nsio; + struct device *dev, **devs; + struct resource *res; + + nsio = kzalloc(sizeof(*nsio), GFP_KERNEL); + if (!nsio) + return NULL; + + devs = kcalloc(2, sizeof(struct device *), GFP_KERNEL); + if (!devs) { + kfree(nsio); + return NULL; + } + + dev = &nsio->dev; + dev->type = &namespace_io_device_type; + dev->parent = &nd_region->dev; + res = &nsio->res; + res->name = dev_name(&nd_region->dev); + res->flags = IORESOURCE_MEM; + res->start = nd_region->ndr_start; + res->end = res->start + nd_region->ndr_size - 1; + + devs[0] = dev; + return devs; +} + +int nd_region_register_namespaces(struct nd_region *nd_region, int *err) +{ + struct device **devs = NULL; + int i; + + *err = 0; + switch (nd_region_to_nstype(nd_region)) { + case ND_DEVICE_NAMESPACE_IO: + devs = create_namespace_io(nd_region); + break; + default: + break; + } + + if (!devs) + return -ENODEV; + + for (i = 0; devs[i]; i++) { + struct device *dev = devs[i]; + + dev_set_name(dev, "namespace%d.%d", nd_region->id, i); + dev->groups = nd_namespace_attribute_groups; + nd_device_register(dev); + } + kfree(devs); + + return i; +} diff --git a/drivers/nvdimm/nd-core.h b/drivers/nvdimm/nd-core.h index 1d760bf24857..0e9b41fd2546 100644 --- a/drivers/nvdimm/nd-core.h +++ b/drivers/nvdimm/nd-core.h @@ -21,9 +21,11 @@ extern int nvdimm_major; struct nvdimm_bus { struct nvdimm_bus_descriptor *nd_desc; + struct module *module; struct list_head list; struct device dev; int id; + struct mutex reconfig_mutex; }; struct nvdimm { @@ -34,6 +36,9 @@ struct nvdimm { int id; }; +bool is_nvdimm(struct device *dev); +bool is_nd_blk(struct device *dev); +bool is_nd_pmem(struct device *dev); struct nvdimm_bus *walk_to_nvdimm_bus(struct device *nd_dev); int __init nvdimm_bus_init(void); void nvdimm_bus_exit(void); @@ -43,5 +48,4 @@ void nd_synchronize(void); int nvdimm_bus_register_dimms(struct nvdimm_bus *nvdimm_bus); int nvdimm_bus_register_regions(struct nvdimm_bus *nvdimm_bus); int nd_match_dimm(struct device *dev, void *data); -bool is_nvdimm(struct device *dev); #endif /* __ND_CORE_H__ */ diff --git a/drivers/nvdimm/nd.h b/drivers/nvdimm/nd.h index ea0cca337aa6..bc5a08e36a25 100644 --- a/drivers/nvdimm/nd.h +++ b/drivers/nvdimm/nd.h @@ -23,6 +23,11 @@ struct nvdimm_drvdata { void *data; }; +struct nd_region_namespaces { + int count; + int active; +}; + struct nd_region { struct device dev; u16 ndr_mappings; @@ -41,7 +46,15 @@ enum nd_async_mode { void nd_device_register(struct device *dev); void nd_device_unregister(struct device *dev, enum nd_async_mode mode); int __init nvdimm_init(void); +int __init nd_region_init(void); void nvdimm_exit(void); +void nd_region_exit(void); int nvdimm_init_nsarea(struct nvdimm_drvdata *ndd); int nvdimm_init_config_data(struct nvdimm_drvdata *ndd); +struct nd_region *to_nd_region(struct device *dev); +int nd_region_to_nstype(struct nd_region *nd_region); +int nd_region_register_namespaces(struct nd_region *nd_region, int *err); +void nvdimm_bus_lock(struct device *dev); +void nvdimm_bus_unlock(struct device *dev); +bool is_nvdimm_bus_locked(struct device *dev); #endif /* __ND_H__ */ diff --git a/drivers/nvdimm/region.c b/drivers/nvdimm/region.c new file mode 100644 index 000000000000..ade3dba81afd --- /dev/null +++ b/drivers/nvdimm/region.c @@ -0,0 +1,93 @@ +/* + * Copyright(c) 2013-2015 Intel Corporation. All rights reserved. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of version 2 of the GNU General Public License as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + */ +#include +#include +#include +#include "nd.h" + +static int nd_region_probe(struct device *dev) +{ + int err; + struct nd_region_namespaces *num_ns; + struct nd_region *nd_region = to_nd_region(dev); + int rc = nd_region_register_namespaces(nd_region, &err); + + num_ns = devm_kzalloc(dev, sizeof(*num_ns), GFP_KERNEL); + if (!num_ns) + return -ENOMEM; + + if (rc < 0) + return rc; + + num_ns->active = rc; + num_ns->count = rc + err; + dev_set_drvdata(dev, num_ns); + + if (err == 0) + return 0; + + if (rc == err) + return -ENODEV; + + /* + * Given multiple namespaces per region, we do not want to + * disable all the successfully registered peer namespaces upon + * a single registration failure. If userspace is missing a + * namespace that it expects it can disable/re-enable the region + * to retry discovery after correcting the failure. + * /namespaces returns the current + * "/" namespace count. + */ + dev_err(dev, "failed to register %d namespace%s, continuing...\n", + err, err == 1 ? "" : "s"); + return 0; +} + +static int child_unregister(struct device *dev, void *data) +{ + nd_device_unregister(dev, ND_SYNC); + return 0; +} + +static int nd_region_remove(struct device *dev) +{ + /* flush attribute readers and disable */ + nvdimm_bus_lock(dev); + dev_set_drvdata(dev, NULL); + nvdimm_bus_unlock(dev); + + device_for_each_child(dev, NULL, child_unregister); + return 0; +} + +static struct nd_device_driver nd_region_driver = { + .probe = nd_region_probe, + .remove = nd_region_remove, + .drv = { + .name = "nd_region", + }, + .type = ND_DRIVER_REGION_BLK | ND_DRIVER_REGION_PMEM, +}; + +int __init nd_region_init(void) +{ + return nd_driver_register(&nd_region_driver); +} + +void nd_region_exit(void) +{ + driver_unregister(&nd_region_driver.drv); +} + +MODULE_ALIAS_ND_DEVICE(ND_DEVICE_REGION_PMEM); +MODULE_ALIAS_ND_DEVICE(ND_DEVICE_REGION_BLK); diff --git a/drivers/nvdimm/region_devs.c b/drivers/nvdimm/region_devs.c index 4bda2e0df8f7..b5c5b9095b28 100644 --- a/drivers/nvdimm/region_devs.c +++ b/drivers/nvdimm/region_devs.c @@ -47,11 +47,16 @@ static struct device_type nd_volatile_device_type = { .release = nd_region_release, }; -static bool is_nd_pmem(struct device *dev) +bool is_nd_pmem(struct device *dev) { return dev ? dev->type == &nd_pmem_device_type : false; } +bool is_nd_blk(struct device *dev) +{ + return dev ? dev->type == &nd_blk_device_type : false; +} + struct nd_region *to_nd_region(struct device *dev) { struct nd_region *nd_region = container_of(dev, struct nd_region, dev); @@ -61,6 +66,37 @@ struct nd_region *to_nd_region(struct device *dev) } EXPORT_SYMBOL_GPL(to_nd_region); +/** + * nd_region_to_nstype() - region to an integer namespace type + * @nd_region: region-device to interrogate + * + * This is the 'nstype' attribute of a region as well, an input to the + * MODALIAS for namespace devices, and bit number for a nvdimm_bus to match + * namespace devices with namespace drivers. + */ +int nd_region_to_nstype(struct nd_region *nd_region) +{ + if (is_nd_pmem(&nd_region->dev)) { + u16 i, alias; + + for (i = 0, alias = 0; i < nd_region->ndr_mappings; i++) { + struct nd_mapping *nd_mapping = &nd_region->mapping[i]; + struct nvdimm *nvdimm = nd_mapping->nvdimm; + + if (nvdimm->flags & NDD_ALIASING) + alias++; + } + if (alias) + return ND_DEVICE_NAMESPACE_PMEM; + else + return ND_DEVICE_NAMESPACE_IO; + } else if (is_nd_blk(&nd_region->dev)) { + return ND_DEVICE_NAMESPACE_BLK; + } + + return 0; +} + static ssize_t size_show(struct device *dev, struct device_attribute *attr, char *buf) { @@ -88,9 +124,37 @@ static ssize_t mappings_show(struct device *dev, } static DEVICE_ATTR_RO(mappings); +static ssize_t nstype_show(struct device *dev, + struct device_attribute *attr, char *buf) +{ + struct nd_region *nd_region = to_nd_region(dev); + + return sprintf(buf, "%d\n", nd_region_to_nstype(nd_region)); +} +static DEVICE_ATTR_RO(nstype); + +static ssize_t init_namespaces_show(struct device *dev, + struct device_attribute *attr, char *buf) +{ + struct nd_region_namespaces *num_ns = dev_get_drvdata(dev); + ssize_t rc; + + nvdimm_bus_lock(dev); + if (num_ns) + rc = sprintf(buf, "%d/%d\n", num_ns->active, num_ns->count); + else + rc = -ENXIO; + nvdimm_bus_unlock(dev); + + return rc; +} +static DEVICE_ATTR_RO(init_namespaces); + static struct attribute *nd_region_attributes[] = { &dev_attr_size.attr, + &dev_attr_nstype.attr, &dev_attr_mappings.attr, + &dev_attr_init_namespaces.attr, NULL, }; diff --git a/include/linux/libnvdimm.h b/include/linux/libnvdimm.h index 39e7e606092a..37f966aff386 100644 --- a/include/linux/libnvdimm.h +++ b/include/linux/libnvdimm.h @@ -71,8 +71,11 @@ struct nd_region_desc { struct nvdimm_bus; struct device; -struct nvdimm_bus *nvdimm_bus_register(struct device *parent, - struct nvdimm_bus_descriptor *nfit_desc); +struct module; +struct nvdimm_bus *__nvdimm_bus_register(struct device *parent, + struct nvdimm_bus_descriptor *nfit_desc, struct module *module); +#define nvdimm_bus_register(parent, desc) \ + __nvdimm_bus_register(parent, desc, THIS_MODULE) void nvdimm_bus_unregister(struct nvdimm_bus *nvdimm_bus); struct nvdimm_bus *to_nvdimm_bus(struct device *dev); struct nvdimm *to_nvdimm(struct device *dev); diff --git a/include/linux/nd.h b/include/linux/nd.h index e074f67e53a3..da70e9962197 100644 --- a/include/linux/nd.h +++ b/include/linux/nd.h @@ -26,6 +26,16 @@ static inline struct nd_device_driver *to_nd_device_driver( struct device_driver *drv) { return container_of(drv, struct nd_device_driver, drv); +}; + +struct nd_namespace_io { + struct device dev; + struct resource res; +}; + +static inline struct nd_namespace_io *to_nd_namespace_io(struct device *dev) +{ + return container_of(dev, struct nd_namespace_io, dev); } #define MODULE_ALIAS_ND_DEVICE(type) \ diff --git a/include/uapi/linux/ndctl.h b/include/uapi/linux/ndctl.h index 37640916d146..174b6371dcc1 100644 --- a/include/uapi/linux/ndctl.h +++ b/include/uapi/linux/ndctl.h @@ -177,8 +177,18 @@ static inline const char *nvdimm_cmd_name(unsigned cmd) #define ND_DEVICE_DIMM 1 /* nd_dimm: container for "config data" */ +#define ND_DEVICE_REGION_PMEM 2 /* nd_region: (parent of PMEM namespaces) */ +#define ND_DEVICE_REGION_BLK 3 /* nd_region: (parent of BLK namespaces) */ +#define ND_DEVICE_NAMESPACE_IO 4 /* legacy persistent memory */ +#define ND_DEVICE_NAMESPACE_PMEM 5 /* PMEM namespace (may alias with BLK) */ +#define ND_DEVICE_NAMESPACE_BLK 6 /* BLK namespace (may alias with PMEM) */ enum nd_driver_flags { ND_DRIVER_DIMM = 1 << ND_DEVICE_DIMM, + ND_DRIVER_REGION_PMEM = 1 << ND_DEVICE_REGION_PMEM, + ND_DRIVER_REGION_BLK = 1 << ND_DEVICE_REGION_BLK, + ND_DRIVER_NAMESPACE_IO = 1 << ND_DEVICE_NAMESPACE_IO, + ND_DRIVER_NAMESPACE_PMEM = 1 << ND_DEVICE_NAMESPACE_PMEM, + ND_DRIVER_NAMESPACE_BLK = 1 << ND_DEVICE_NAMESPACE_BLK, }; #endif /* __NDCTL_H__ */ -- cgit v1.2.3 From 4a826c83db4edc040da3a66dbefd53f0cfcf457d Mon Sep 17 00:00:00 2001 From: Dan Williams Date: Tue, 9 Jun 2015 16:09:36 -0400 Subject: libnvdimm: namespace indices: read and validate This on media label format [1] consists of two index blocks followed by an array of labels. None of these structures are ever updated in place. A sequence number tracks the current active index and the next one to write, while labels are written to free slots. +------------+ | | | nsindex0 | | | +------------+ | | | nsindex1 | | | +------------+ | label0 | +------------+ | label1 | +------------+ | | ....nslot... | | +------------+ | labelN | +------------+ After reading valid labels, store the dpa ranges they claim into per-dimm resource trees. [1]: http://pmem.io/documents/NVDIMM_Namespace_Spec.pdf Cc: Neil Brown Acked-by: Christoph Hellwig Signed-off-by: Dan Williams --- drivers/nvdimm/Makefile | 1 + drivers/nvdimm/dimm.c | 23 ++++ drivers/nvdimm/dimm_devs.c | 30 ++++- drivers/nvdimm/label.c | 290 +++++++++++++++++++++++++++++++++++++++++++++ drivers/nvdimm/label.h | 128 ++++++++++++++++++++ drivers/nvdimm/nd.h | 49 ++++++++ include/uapi/linux/ndctl.h | 1 - 7 files changed, 520 insertions(+), 2 deletions(-) create mode 100644 drivers/nvdimm/label.c create mode 100644 drivers/nvdimm/label.h (limited to 'include/uapi/linux') diff --git a/drivers/nvdimm/Makefile b/drivers/nvdimm/Makefile index 4d2a27f52faa..abce98f87f16 100644 --- a/drivers/nvdimm/Makefile +++ b/drivers/nvdimm/Makefile @@ -10,3 +10,4 @@ libnvdimm-y += dimm.o libnvdimm-y += region_devs.o libnvdimm-y += region.o libnvdimm-y += namespace_devs.o +libnvdimm-y += label.o diff --git a/drivers/nvdimm/dimm.c b/drivers/nvdimm/dimm.c index eb20fc2df32b..2df97c3c3b34 100644 --- a/drivers/nvdimm/dimm.c +++ b/drivers/nvdimm/dimm.c @@ -18,6 +18,7 @@ #include #include #include +#include "label.h" #include "nd.h" static void free_data(struct nvdimm_drvdata *ndd) @@ -42,6 +43,11 @@ static int nvdimm_probe(struct device *dev) return -ENOMEM; dev_set_drvdata(dev, ndd); + ndd->dpa.name = dev_name(dev); + ndd->ns_current = -1; + ndd->ns_next = -1; + ndd->dpa.start = 0; + ndd->dpa.end = -1; ndd->dev = dev; rc = nvdimm_init_nsarea(ndd); @@ -54,6 +60,17 @@ static int nvdimm_probe(struct device *dev) dev_dbg(dev, "config data size: %d\n", ndd->nsarea.config_size); + nvdimm_bus_lock(dev); + ndd->ns_current = nd_label_validate(ndd); + ndd->ns_next = nd_label_next_nsindex(ndd->ns_current); + nd_label_copy(ndd, to_next_namespace_index(ndd), + to_current_namespace_index(ndd)); + rc = nd_label_reserve_dpa(ndd); + nvdimm_bus_unlock(dev); + + if (rc) + goto err; + return 0; err: @@ -64,7 +81,13 @@ static int nvdimm_probe(struct device *dev) static int nvdimm_remove(struct device *dev) { struct nvdimm_drvdata *ndd = dev_get_drvdata(dev); + struct resource *res, *_r; + nvdimm_bus_lock(dev); + dev_set_drvdata(dev, NULL); + for_each_dpa_resource_safe(ndd, res, _r) + nvdimm_free_dpa(ndd, res); + nvdimm_bus_unlock(dev); free_data(ndd); return 0; diff --git a/drivers/nvdimm/dimm_devs.c b/drivers/nvdimm/dimm_devs.c index bdf8241b6525..d2ef02e4be6c 100644 --- a/drivers/nvdimm/dimm_devs.c +++ b/drivers/nvdimm/dimm_devs.c @@ -92,8 +92,12 @@ int nvdimm_init_config_data(struct nvdimm_drvdata *ndd) if (ndd->data) return 0; - if (ndd->nsarea.status || ndd->nsarea.max_xfer == 0) + if (ndd->nsarea.status || ndd->nsarea.max_xfer == 0 + || ndd->nsarea.config_size < ND_LABEL_MIN_SIZE) { + dev_dbg(ndd->dev, "failed to init config data area: (%d:%d)\n", + ndd->nsarea.max_xfer, ndd->nsarea.config_size); return -ENXIO; + } ndd->data = kmalloc(ndd->nsarea.config_size, GFP_KERNEL); if (!ndd->data) @@ -243,6 +247,30 @@ struct nvdimm *nvdimm_create(struct nvdimm_bus *nvdimm_bus, void *provider_data, } EXPORT_SYMBOL_GPL(nvdimm_create); +void nvdimm_free_dpa(struct nvdimm_drvdata *ndd, struct resource *res) +{ + WARN_ON_ONCE(!is_nvdimm_bus_locked(ndd->dev)); + kfree(res->name); + __release_region(&ndd->dpa, res->start, resource_size(res)); +} + +struct resource *nvdimm_allocate_dpa(struct nvdimm_drvdata *ndd, + struct nd_label_id *label_id, resource_size_t start, + resource_size_t n) +{ + char *name = kmemdup(label_id, sizeof(*label_id), GFP_KERNEL); + struct resource *res; + + if (!name) + return NULL; + + WARN_ON_ONCE(!is_nvdimm_bus_locked(ndd->dev)); + res = __request_region(&ndd->dpa, start, n, name, 0); + if (!res) + kfree(name); + return res; +} + static int count_dimms(struct device *dev, void *c) { int *count = c; diff --git a/drivers/nvdimm/label.c b/drivers/nvdimm/label.c new file mode 100644 index 000000000000..db5d7492dc8d --- /dev/null +++ b/drivers/nvdimm/label.c @@ -0,0 +1,290 @@ +/* + * Copyright(c) 2013-2015 Intel Corporation. All rights reserved. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of version 2 of the GNU General Public License as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + */ +#include +#include +#include +#include +#include "nd-core.h" +#include "label.h" +#include "nd.h" + +static u32 best_seq(u32 a, u32 b) +{ + a &= NSINDEX_SEQ_MASK; + b &= NSINDEX_SEQ_MASK; + + if (a == 0 || a == b) + return b; + else if (b == 0) + return a; + else if (nd_inc_seq(a) == b) + return b; + else + return a; +} + +size_t sizeof_namespace_index(struct nvdimm_drvdata *ndd) +{ + u32 index_span; + + if (ndd->nsindex_size) + return ndd->nsindex_size; + + /* + * The minimum index space is 512 bytes, with that amount of + * index we can describe ~1400 labels which is less than a byte + * of overhead per label. Round up to a byte of overhead per + * label and determine the size of the index region. Yes, this + * starts to waste space at larger config_sizes, but it's + * unlikely we'll ever see anything but 128K. + */ + index_span = ndd->nsarea.config_size / 129; + index_span /= NSINDEX_ALIGN * 2; + ndd->nsindex_size = index_span * NSINDEX_ALIGN; + + return ndd->nsindex_size; +} + +int nd_label_validate(struct nvdimm_drvdata *ndd) +{ + /* + * On media label format consists of two index blocks followed + * by an array of labels. None of these structures are ever + * updated in place. A sequence number tracks the current + * active index and the next one to write, while labels are + * written to free slots. + * + * +------------+ + * | | + * | nsindex0 | + * | | + * +------------+ + * | | + * | nsindex1 | + * | | + * +------------+ + * | label0 | + * +------------+ + * | label1 | + * +------------+ + * | | + * ....nslot... + * | | + * +------------+ + * | labelN | + * +------------+ + */ + struct nd_namespace_index *nsindex[] = { + to_namespace_index(ndd, 0), + to_namespace_index(ndd, 1), + }; + const int num_index = ARRAY_SIZE(nsindex); + struct device *dev = ndd->dev; + bool valid[2] = { 0 }; + int i, num_valid = 0; + u32 seq; + + for (i = 0; i < num_index; i++) { + u32 nslot; + u8 sig[NSINDEX_SIG_LEN]; + u64 sum_save, sum, size; + + memcpy(sig, nsindex[i]->sig, NSINDEX_SIG_LEN); + if (memcmp(sig, NSINDEX_SIGNATURE, NSINDEX_SIG_LEN) != 0) { + dev_dbg(dev, "%s: nsindex%d signature invalid\n", + __func__, i); + continue; + } + sum_save = __le64_to_cpu(nsindex[i]->checksum); + nsindex[i]->checksum = __cpu_to_le64(0); + sum = nd_fletcher64(nsindex[i], sizeof_namespace_index(ndd), 1); + nsindex[i]->checksum = __cpu_to_le64(sum_save); + if (sum != sum_save) { + dev_dbg(dev, "%s: nsindex%d checksum invalid\n", + __func__, i); + continue; + } + + seq = __le32_to_cpu(nsindex[i]->seq); + if ((seq & NSINDEX_SEQ_MASK) == 0) { + dev_dbg(dev, "%s: nsindex%d sequence: %#x invalid\n", + __func__, i, seq); + continue; + } + + /* sanity check the index against expected values */ + if (__le64_to_cpu(nsindex[i]->myoff) + != i * sizeof_namespace_index(ndd)) { + dev_dbg(dev, "%s: nsindex%d myoff: %#llx invalid\n", + __func__, i, (unsigned long long) + __le64_to_cpu(nsindex[i]->myoff)); + continue; + } + if (__le64_to_cpu(nsindex[i]->otheroff) + != (!i) * sizeof_namespace_index(ndd)) { + dev_dbg(dev, "%s: nsindex%d otheroff: %#llx invalid\n", + __func__, i, (unsigned long long) + __le64_to_cpu(nsindex[i]->otheroff)); + continue; + } + + size = __le64_to_cpu(nsindex[i]->mysize); + if (size > sizeof_namespace_index(ndd) + || size < sizeof(struct nd_namespace_index)) { + dev_dbg(dev, "%s: nsindex%d mysize: %#llx invalid\n", + __func__, i, size); + continue; + } + + nslot = __le32_to_cpu(nsindex[i]->nslot); + if (nslot * sizeof(struct nd_namespace_label) + + 2 * sizeof_namespace_index(ndd) + > ndd->nsarea.config_size) { + dev_dbg(dev, "%s: nsindex%d nslot: %u invalid, config_size: %#x\n", + __func__, i, nslot, + ndd->nsarea.config_size); + continue; + } + valid[i] = true; + num_valid++; + } + + switch (num_valid) { + case 0: + break; + case 1: + for (i = 0; i < num_index; i++) + if (valid[i]) + return i; + /* can't have num_valid > 0 but valid[] = { false, false } */ + WARN_ON(1); + break; + default: + /* pick the best index... */ + seq = best_seq(__le32_to_cpu(nsindex[0]->seq), + __le32_to_cpu(nsindex[1]->seq)); + if (seq == (__le32_to_cpu(nsindex[1]->seq) & NSINDEX_SEQ_MASK)) + return 1; + else + return 0; + break; + } + + return -1; +} + +void nd_label_copy(struct nvdimm_drvdata *ndd, struct nd_namespace_index *dst, + struct nd_namespace_index *src) +{ + if (dst && src) + /* pass */; + else + return; + + memcpy(dst, src, sizeof_namespace_index(ndd)); +} + +static struct nd_namespace_label *nd_label_base(struct nvdimm_drvdata *ndd) +{ + void *base = to_namespace_index(ndd, 0); + + return base + 2 * sizeof_namespace_index(ndd); +} + +#define for_each_clear_bit_le(bit, addr, size) \ + for ((bit) = find_next_zero_bit_le((addr), (size), 0); \ + (bit) < (size); \ + (bit) = find_next_zero_bit_le((addr), (size), (bit) + 1)) + +/** + * preamble_current - common variable initialization for nd_label_* routines + * @ndd: dimm container for the relevant label set + * @nsindex_out: on return set to the currently active namespace index + * @free: on return set to the free label bitmap in the index + * @nslot: on return set to the number of slots in the label space + */ +static bool preamble_current(struct nvdimm_drvdata *ndd, + struct nd_namespace_index **nsindex_out, + unsigned long **free, u32 *nslot) +{ + struct nd_namespace_index *nsindex; + + nsindex = to_current_namespace_index(ndd); + if (nsindex == NULL) + return false; + + *free = (unsigned long *) nsindex->free; + *nslot = __le32_to_cpu(nsindex->nslot); + *nsindex_out = nsindex; + + return true; +} + +static char *nd_label_gen_id(struct nd_label_id *label_id, u8 *uuid, u32 flags) +{ + if (!label_id || !uuid) + return NULL; + snprintf(label_id->id, ND_LABEL_ID_SIZE, "%s-%pUb", + flags & NSLABEL_FLAG_LOCAL ? "blk" : "pmem", uuid); + return label_id->id; +} + +static bool slot_valid(struct nd_namespace_label *nd_label, u32 slot) +{ + /* check that we are written where we expect to be written */ + if (slot != __le32_to_cpu(nd_label->slot)) + return false; + + /* check that DPA allocations are page aligned */ + if ((__le64_to_cpu(nd_label->dpa) + | __le64_to_cpu(nd_label->rawsize)) % SZ_4K) + return false; + + return true; +} + +int nd_label_reserve_dpa(struct nvdimm_drvdata *ndd) +{ + struct nd_namespace_index *nsindex; + unsigned long *free; + u32 nslot, slot; + + if (!preamble_current(ndd, &nsindex, &free, &nslot)) + return 0; /* no label, nothing to reserve */ + + for_each_clear_bit_le(slot, free, nslot) { + struct nd_namespace_label *nd_label; + struct nd_region *nd_region = NULL; + u8 label_uuid[NSLABEL_UUID_LEN]; + struct nd_label_id label_id; + struct resource *res; + u32 flags; + + nd_label = nd_label_base(ndd) + slot; + + if (!slot_valid(nd_label, slot)) + continue; + + memcpy(label_uuid, nd_label->uuid, NSLABEL_UUID_LEN); + flags = __le32_to_cpu(nd_label->flags); + nd_label_gen_id(&label_id, label_uuid, flags); + res = nvdimm_allocate_dpa(ndd, &label_id, + __le64_to_cpu(nd_label->dpa), + __le64_to_cpu(nd_label->rawsize)); + nd_dbg_dpa(nd_region, ndd, res, "reserve\n"); + if (!res) + return -EBUSY; + } + + return 0; +} diff --git a/drivers/nvdimm/label.h b/drivers/nvdimm/label.h new file mode 100644 index 000000000000..d6aa0d5c6b4e --- /dev/null +++ b/drivers/nvdimm/label.h @@ -0,0 +1,128 @@ +/* + * Copyright(c) 2013-2015 Intel Corporation. All rights reserved. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of version 2 of the GNU General Public License as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + */ +#ifndef __LABEL_H__ +#define __LABEL_H__ + +#include +#include +#include + +enum { + NSINDEX_SIG_LEN = 16, + NSINDEX_ALIGN = 256, + NSINDEX_SEQ_MASK = 0x3, + NSLABEL_UUID_LEN = 16, + NSLABEL_NAME_LEN = 64, + NSLABEL_FLAG_ROLABEL = 0x1, /* read-only label */ + NSLABEL_FLAG_LOCAL = 0x2, /* DIMM-local namespace */ + NSLABEL_FLAG_BTT = 0x4, /* namespace contains a BTT */ + NSLABEL_FLAG_UPDATING = 0x8, /* label being updated */ + BTT_ALIGN = 4096, /* all btt structures */ + BTTINFO_SIG_LEN = 16, + BTTINFO_UUID_LEN = 16, + BTTINFO_FLAG_ERROR = 0x1, /* error state (read-only) */ + BTTINFO_MAJOR_VERSION = 1, + ND_LABEL_MIN_SIZE = 512 * 129, /* see sizeof_namespace_index() */ + ND_LABEL_ID_SIZE = 50, +}; + +static const char NSINDEX_SIGNATURE[] = "NAMESPACE_INDEX\0"; + +/** + * struct nd_namespace_index - label set superblock + * @sig: NAMESPACE_INDEX\0 + * @flags: placeholder + * @seq: sequence number for this index + * @myoff: offset of this index in label area + * @mysize: size of this index struct + * @otheroff: offset of other index + * @labeloff: offset of first label slot + * @nslot: total number of label slots + * @major: label area major version + * @minor: label area minor version + * @checksum: fletcher64 of all fields + * @free[0]: bitmap, nlabel bits + * + * The size of free[] is rounded up so the total struct size is a + * multiple of NSINDEX_ALIGN bytes. Any bits this allocates beyond + * nlabel bits must be zero. + */ +struct nd_namespace_index { + u8 sig[NSINDEX_SIG_LEN]; + __le32 flags; + __le32 seq; + __le64 myoff; + __le64 mysize; + __le64 otheroff; + __le64 labeloff; + __le32 nslot; + __le16 major; + __le16 minor; + __le64 checksum; + u8 free[0]; +}; + +/** + * struct nd_namespace_label - namespace superblock + * @uuid: UUID per RFC 4122 + * @name: optional name (NULL-terminated) + * @flags: see NSLABEL_FLAG_* + * @nlabel: num labels to describe this ns + * @position: labels position in set + * @isetcookie: interleave set cookie + * @lbasize: LBA size in bytes or 0 for pmem + * @dpa: DPA of NVM range on this DIMM + * @rawsize: size of namespace + * @slot: slot of this label in label area + * @unused: must be zero + */ +struct nd_namespace_label { + u8 uuid[NSLABEL_UUID_LEN]; + u8 name[NSLABEL_NAME_LEN]; + __le32 flags; + __le16 nlabel; + __le16 position; + __le64 isetcookie; + __le64 lbasize; + __le64 dpa; + __le64 rawsize; + __le32 slot; + __le32 unused; +}; + +/** + * struct nd_label_id - identifier string for dpa allocation + * @id: "{blk|pmem}-" + */ +struct nd_label_id { + char id[ND_LABEL_ID_SIZE]; +}; + +/* + * If the 'best' index is invalid, so is the 'next' index. Otherwise, + * the next index is MOD(index+1, 2) + */ +static inline int nd_label_next_nsindex(int index) +{ + if (index < 0) + return -1; + + return (index + 1) % 2; +} + +struct nvdimm_drvdata; +int nd_label_validate(struct nvdimm_drvdata *ndd); +void nd_label_copy(struct nvdimm_drvdata *ndd, struct nd_namespace_index *dst, + struct nd_namespace_index *src); +size_t sizeof_namespace_index(struct nvdimm_drvdata *ndd); +#endif /* __LABEL_H__ */ diff --git a/drivers/nvdimm/nd.h b/drivers/nvdimm/nd.h index 0285e4588b03..401fa0d5b6ea 100644 --- a/drivers/nvdimm/nd.h +++ b/drivers/nvdimm/nd.h @@ -16,11 +16,15 @@ #include #include #include +#include "label.h" struct nvdimm_drvdata { struct device *dev; + int nsindex_size; struct nd_cmd_get_config_size nsarea; void *data; + int ns_current, ns_next; + struct resource dpa; }; struct nd_region_namespaces { @@ -28,6 +32,37 @@ struct nd_region_namespaces { int active; }; +static inline struct nd_namespace_index *to_namespace_index( + struct nvdimm_drvdata *ndd, int i) +{ + if (i < 0) + return NULL; + + return ndd->data + sizeof_namespace_index(ndd) * i; +} + +static inline struct nd_namespace_index *to_current_namespace_index( + struct nvdimm_drvdata *ndd) +{ + return to_namespace_index(ndd, ndd->ns_current); +} + +static inline struct nd_namespace_index *to_next_namespace_index( + struct nvdimm_drvdata *ndd) +{ + return to_namespace_index(ndd, ndd->ns_next); +} + +#define nd_dbg_dpa(r, d, res, fmt, arg...) \ + dev_dbg((r) ? &(r)->dev : (d)->dev, "%s: %.13s: %#llx @ %#llx " fmt, \ + (r) ? dev_name((d)->dev) : "", res ? res->name : "null", \ + (unsigned long long) (res ? resource_size(res) : 0), \ + (unsigned long long) (res ? res->start : 0), ##arg) + +#define for_each_dpa_resource_safe(ndd, res, next) \ + for (res = (ndd)->dpa.child, next = res ? res->sibling : NULL; \ + res; res = next, next = next ? next->sibling : NULL) + struct nd_region { struct device dev; u16 ndr_mappings; @@ -39,6 +74,15 @@ struct nd_region { struct nd_mapping mapping[0]; }; +/* + * Lookup next in the repeating sequence of 01, 10, and 11. + */ +static inline unsigned nd_inc_seq(unsigned seq) +{ + static const unsigned next[] = { 0, 2, 3, 1 }; + + return next[seq & 3]; +} enum nd_async_mode { ND_SYNC, ND_ASYNC, @@ -58,4 +102,9 @@ int nd_region_register_namespaces(struct nd_region *nd_region, int *err); void nvdimm_bus_lock(struct device *dev); void nvdimm_bus_unlock(struct device *dev); bool is_nvdimm_bus_locked(struct device *dev); +int nd_label_reserve_dpa(struct nvdimm_drvdata *ndd); +void nvdimm_free_dpa(struct nvdimm_drvdata *ndd, struct resource *res); +struct resource *nvdimm_allocate_dpa(struct nvdimm_drvdata *ndd, + struct nd_label_id *label_id, resource_size_t start, + resource_size_t n); #endif /* __ND_H__ */ diff --git a/include/uapi/linux/ndctl.h b/include/uapi/linux/ndctl.h index 174b6371dcc1..1357a87b8714 100644 --- a/include/uapi/linux/ndctl.h +++ b/include/uapi/linux/ndctl.h @@ -175,7 +175,6 @@ static inline const char *nvdimm_cmd_name(unsigned cmd) #define ND_IOCTL_ARS_STATUS _IOWR(ND_IOCTL, ND_CMD_ARS_STATUS,\ struct nd_cmd_ars_status) - #define ND_DEVICE_DIMM 1 /* nd_dimm: container for "config data" */ #define ND_DEVICE_REGION_PMEM 2 /* nd_region: (parent of PMEM namespaces) */ #define ND_DEVICE_REGION_BLK 3 /* nd_region: (parent of BLK namespaces) */ -- cgit v1.2.3 From bf9bccc14c05dae8caba29df6187c731710f5380 Mon Sep 17 00:00:00 2001 From: Dan Williams Date: Wed, 17 Jun 2015 17:14:46 -0400 Subject: libnvdimm: pmem label sets and namespace instantiation. A complete label set is a PMEM-label per-dimm per-interleave-set where all the UUIDs match and the interleave set cookie matches the hosting interleave set. Present sysfs attributes for manipulation of a PMEM-namespace's 'alt_name', 'uuid', and 'size' attributes. A later patch will make these settings persistent by writing back the label. Note that PMEM allocations grow forwards from the start of an interleave set (lowest dimm-physical-address (DPA)). BLK-namespaces that alias with a PMEM interleave set will grow allocations backward from the highest DPA. Cc: Greg KH Cc: Neil Brown Acked-by: Christoph Hellwig Signed-off-by: Dan Williams --- drivers/nvdimm/bus.c | 8 +- drivers/nvdimm/core.c | 64 +++ drivers/nvdimm/dimm.c | 21 +- drivers/nvdimm/dimm_devs.c | 137 ++++++ drivers/nvdimm/label.c | 55 ++- drivers/nvdimm/label.h | 2 + drivers/nvdimm/namespace_devs.c | 1002 ++++++++++++++++++++++++++++++++++++++- drivers/nvdimm/nd-core.h | 12 + drivers/nvdimm/nd.h | 17 + drivers/nvdimm/pmem.c | 20 +- drivers/nvdimm/region.c | 3 + drivers/nvdimm/region_devs.c | 158 +++++- include/linux/libnvdimm.h | 10 + include/linux/nd.h | 24 + include/uapi/linux/ndctl.h | 4 + 15 files changed, 1506 insertions(+), 31 deletions(-) (limited to 'include/uapi/linux') diff --git a/drivers/nvdimm/bus.c b/drivers/nvdimm/bus.c index ffb43cada625..fddc3f2a8f80 100644 --- a/drivers/nvdimm/bus.c +++ b/drivers/nvdimm/bus.c @@ -97,6 +97,8 @@ static int nvdimm_bus_probe(struct device *dev) rc = nd_drv->probe(dev); if (rc == 0) nd_region_probe_success(nvdimm_bus, dev); + else + nd_region_disable(nvdimm_bus, dev); nvdimm_bus_probe_end(nvdimm_bus); dev_dbg(&nvdimm_bus->dev, "%s.probe(%s) = %d\n", dev->driver->name, @@ -381,8 +383,10 @@ u32 nd_cmd_out_size(struct nvdimm *nvdimm, int cmd, } EXPORT_SYMBOL_GPL(nd_cmd_out_size); -static void wait_nvdimm_bus_probe_idle(struct nvdimm_bus *nvdimm_bus) +void wait_nvdimm_bus_probe_idle(struct device *dev) { + struct nvdimm_bus *nvdimm_bus = walk_to_nvdimm_bus(dev); + do { if (nvdimm_bus->probe_active == 0) break; @@ -402,7 +406,7 @@ static int nd_cmd_clear_to_send(struct nvdimm *nvdimm, unsigned int cmd) return 0; nvdimm_bus = walk_to_nvdimm_bus(&nvdimm->dev); - wait_nvdimm_bus_probe_idle(nvdimm_bus); + wait_nvdimm_bus_probe_idle(&nvdimm_bus->dev); if (atomic_read(&nvdimm->busy)) return -EBUSY; diff --git a/drivers/nvdimm/core.c b/drivers/nvdimm/core.c index 7806eaaf4707..cf99cce8ef33 100644 --- a/drivers/nvdimm/core.c +++ b/drivers/nvdimm/core.c @@ -14,6 +14,7 @@ #include #include #include +#include #include #include #include @@ -109,6 +110,69 @@ struct nvdimm_bus *walk_to_nvdimm_bus(struct device *nd_dev) return NULL; } +static bool is_uuid_sep(char sep) +{ + if (sep == '\n' || sep == '-' || sep == ':' || sep == '\0') + return true; + return false; +} + +static int nd_uuid_parse(struct device *dev, u8 *uuid_out, const char *buf, + size_t len) +{ + const char *str = buf; + u8 uuid[16]; + int i; + + for (i = 0; i < 16; i++) { + if (!isxdigit(str[0]) || !isxdigit(str[1])) { + dev_dbg(dev, "%s: pos: %d buf[%zd]: %c buf[%zd]: %c\n", + __func__, i, str - buf, str[0], + str + 1 - buf, str[1]); + return -EINVAL; + } + + uuid[i] = (hex_to_bin(str[0]) << 4) | hex_to_bin(str[1]); + str += 2; + if (is_uuid_sep(*str)) + str++; + } + + memcpy(uuid_out, uuid, sizeof(uuid)); + return 0; +} + +/** + * nd_uuid_store: common implementation for writing 'uuid' sysfs attributes + * @dev: container device for the uuid property + * @uuid_out: uuid buffer to replace + * @buf: raw sysfs buffer to parse + * + * Enforce that uuids can only be changed while the device is disabled + * (driver detached) + * LOCKING: expects device_lock() is held on entry + */ +int nd_uuid_store(struct device *dev, u8 **uuid_out, const char *buf, + size_t len) +{ + u8 uuid[16]; + int rc; + + if (dev->driver) + return -EBUSY; + + rc = nd_uuid_parse(dev, uuid, buf, len); + if (rc) + return rc; + + kfree(*uuid_out); + *uuid_out = kmemdup(uuid, sizeof(uuid), GFP_KERNEL); + if (!(*uuid_out)) + return -ENOMEM; + + return 0; +} + static ssize_t commands_show(struct device *dev, struct device_attribute *attr, char *buf) { diff --git a/drivers/nvdimm/dimm.c b/drivers/nvdimm/dimm.c index 2df97c3c3b34..71d12bb67339 100644 --- a/drivers/nvdimm/dimm.c +++ b/drivers/nvdimm/dimm.c @@ -21,18 +21,6 @@ #include "label.h" #include "nd.h" -static void free_data(struct nvdimm_drvdata *ndd) -{ - if (!ndd) - return; - - if (ndd->data && is_vmalloc_addr(ndd->data)) - vfree(ndd->data); - else - kfree(ndd->data); - kfree(ndd); -} - static int nvdimm_probe(struct device *dev) { struct nvdimm_drvdata *ndd; @@ -49,6 +37,8 @@ static int nvdimm_probe(struct device *dev) ndd->dpa.start = 0; ndd->dpa.end = -1; ndd->dev = dev; + get_device(dev); + kref_init(&ndd->kref); rc = nvdimm_init_nsarea(ndd); if (rc) @@ -74,21 +64,18 @@ static int nvdimm_probe(struct device *dev) return 0; err: - free_data(ndd); + put_ndd(ndd); return rc; } static int nvdimm_remove(struct device *dev) { struct nvdimm_drvdata *ndd = dev_get_drvdata(dev); - struct resource *res, *_r; nvdimm_bus_lock(dev); dev_set_drvdata(dev, NULL); - for_each_dpa_resource_safe(ndd, res, _r) - nvdimm_free_dpa(ndd, res); nvdimm_bus_unlock(dev); - free_data(ndd); + put_ndd(ndd); return 0; } diff --git a/drivers/nvdimm/dimm_devs.c b/drivers/nvdimm/dimm_devs.c index d2ef02e4be6c..b55acef179ba 100644 --- a/drivers/nvdimm/dimm_devs.c +++ b/drivers/nvdimm/dimm_devs.c @@ -159,6 +159,48 @@ struct nvdimm *to_nvdimm(struct device *dev) } EXPORT_SYMBOL_GPL(to_nvdimm); +struct nvdimm_drvdata *to_ndd(struct nd_mapping *nd_mapping) +{ + struct nvdimm *nvdimm = nd_mapping->nvdimm; + + WARN_ON_ONCE(!is_nvdimm_bus_locked(&nvdimm->dev)); + + return dev_get_drvdata(&nvdimm->dev); +} +EXPORT_SYMBOL(to_ndd); + +void nvdimm_drvdata_release(struct kref *kref) +{ + struct nvdimm_drvdata *ndd = container_of(kref, typeof(*ndd), kref); + struct device *dev = ndd->dev; + struct resource *res, *_r; + + dev_dbg(dev, "%s\n", __func__); + + nvdimm_bus_lock(dev); + for_each_dpa_resource_safe(ndd, res, _r) + nvdimm_free_dpa(ndd, res); + nvdimm_bus_unlock(dev); + + if (ndd->data && is_vmalloc_addr(ndd->data)) + vfree(ndd->data); + else + kfree(ndd->data); + kfree(ndd); + put_device(dev); +} + +void get_ndd(struct nvdimm_drvdata *ndd) +{ + kref_get(&ndd->kref); +} + +void put_ndd(struct nvdimm_drvdata *ndd) +{ + if (ndd) + kref_put(&ndd->kref, nvdimm_drvdata_release); +} + const char *nvdimm_name(struct nvdimm *nvdimm) { return dev_name(&nvdimm->dev); @@ -247,6 +289,83 @@ struct nvdimm *nvdimm_create(struct nvdimm_bus *nvdimm_bus, void *provider_data, } EXPORT_SYMBOL_GPL(nvdimm_create); +/** + * nd_pmem_available_dpa - for the given dimm+region account unallocated dpa + * @nd_mapping: container of dpa-resource-root + labels + * @nd_region: constrain available space check to this reference region + * @overlap: calculate available space assuming this level of overlap + * + * Validate that a PMEM label, if present, aligns with the start of an + * interleave set and truncate the available size at the lowest BLK + * overlap point. + * + * The expectation is that this routine is called multiple times as it + * probes for the largest BLK encroachment for any single member DIMM of + * the interleave set. Once that value is determined the PMEM-limit for + * the set can be established. + */ +resource_size_t nd_pmem_available_dpa(struct nd_region *nd_region, + struct nd_mapping *nd_mapping, resource_size_t *overlap) +{ + resource_size_t map_start, map_end, busy = 0, available, blk_start; + struct nvdimm_drvdata *ndd = to_ndd(nd_mapping); + struct resource *res; + const char *reason; + + if (!ndd) + return 0; + + map_start = nd_mapping->start; + map_end = map_start + nd_mapping->size - 1; + blk_start = max(map_start, map_end + 1 - *overlap); + for_each_dpa_resource(ndd, res) + if (res->start >= map_start && res->start < map_end) { + if (strncmp(res->name, "blk", 3) == 0) + blk_start = min(blk_start, res->start); + else if (res->start != map_start) { + reason = "misaligned to iset"; + goto err; + } else { + if (busy) { + reason = "duplicate overlapping PMEM reservations?"; + goto err; + } + busy += resource_size(res); + continue; + } + } else if (res->end >= map_start && res->end <= map_end) { + if (strncmp(res->name, "blk", 3) == 0) { + /* + * If a BLK allocation overlaps the start of + * PMEM the entire interleave set may now only + * be used for BLK. + */ + blk_start = map_start; + } else { + reason = "misaligned to iset"; + goto err; + } + } else if (map_start > res->start && map_start < res->end) { + /* total eclipse of the mapping */ + busy += nd_mapping->size; + blk_start = map_start; + } + + *overlap = map_end + 1 - blk_start; + available = blk_start - map_start; + if (busy < available) + return available - busy; + return 0; + + err: + /* + * Something is wrong, PMEM must align with the start of the + * interleave set, and there can only be one allocation per set. + */ + nd_dbg_dpa(nd_region, ndd, res, "%s\n", reason); + return 0; +} + void nvdimm_free_dpa(struct nvdimm_drvdata *ndd, struct resource *res) { WARN_ON_ONCE(!is_nvdimm_bus_locked(ndd->dev)); @@ -271,6 +390,24 @@ struct resource *nvdimm_allocate_dpa(struct nvdimm_drvdata *ndd, return res; } +/** + * nvdimm_allocated_dpa - sum up the dpa currently allocated to this label_id + * @nvdimm: container of dpa-resource-root + labels + * @label_id: dpa resource name of the form {pmem|blk}- + */ +resource_size_t nvdimm_allocated_dpa(struct nvdimm_drvdata *ndd, + struct nd_label_id *label_id) +{ + resource_size_t allocated = 0; + struct resource *res; + + for_each_dpa_resource(ndd, res) + if (strcmp(res->name, label_id->id) == 0) + allocated += resource_size(res); + + return allocated; +} + static int count_dimms(struct device *dev, void *c) { int *count = c; diff --git a/drivers/nvdimm/label.c b/drivers/nvdimm/label.c index db5d7492dc8d..1a3bcd27a57a 100644 --- a/drivers/nvdimm/label.c +++ b/drivers/nvdimm/label.c @@ -230,7 +230,7 @@ static bool preamble_current(struct nvdimm_drvdata *ndd, return true; } -static char *nd_label_gen_id(struct nd_label_id *label_id, u8 *uuid, u32 flags) +char *nd_label_gen_id(struct nd_label_id *label_id, u8 *uuid, u32 flags) { if (!label_id || !uuid) return NULL; @@ -288,3 +288,56 @@ int nd_label_reserve_dpa(struct nvdimm_drvdata *ndd) return 0; } + +int nd_label_active_count(struct nvdimm_drvdata *ndd) +{ + struct nd_namespace_index *nsindex; + unsigned long *free; + u32 nslot, slot; + int count = 0; + + if (!preamble_current(ndd, &nsindex, &free, &nslot)) + return 0; + + for_each_clear_bit_le(slot, free, nslot) { + struct nd_namespace_label *nd_label; + + nd_label = nd_label_base(ndd) + slot; + + if (!slot_valid(nd_label, slot)) { + u32 label_slot = __le32_to_cpu(nd_label->slot); + u64 size = __le64_to_cpu(nd_label->rawsize); + u64 dpa = __le64_to_cpu(nd_label->dpa); + + dev_dbg(ndd->dev, + "%s: slot%d invalid slot: %d dpa: %llx size: %llx\n", + __func__, slot, label_slot, dpa, size); + continue; + } + count++; + } + return count; +} + +struct nd_namespace_label *nd_label_active(struct nvdimm_drvdata *ndd, int n) +{ + struct nd_namespace_index *nsindex; + unsigned long *free; + u32 nslot, slot; + + if (!preamble_current(ndd, &nsindex, &free, &nslot)) + return NULL; + + for_each_clear_bit_le(slot, free, nslot) { + struct nd_namespace_label *nd_label; + + nd_label = nd_label_base(ndd) + slot; + if (!slot_valid(nd_label, slot)) + continue; + + if (n-- == 0) + return nd_label_base(ndd) + slot; + } + + return NULL; +} diff --git a/drivers/nvdimm/label.h b/drivers/nvdimm/label.h index d6aa0d5c6b4e..8ee1376526c7 100644 --- a/drivers/nvdimm/label.h +++ b/drivers/nvdimm/label.h @@ -125,4 +125,6 @@ int nd_label_validate(struct nvdimm_drvdata *ndd); void nd_label_copy(struct nvdimm_drvdata *ndd, struct nd_namespace_index *dst, struct nd_namespace_index *src); size_t sizeof_namespace_index(struct nvdimm_drvdata *ndd); +int nd_label_active_count(struct nvdimm_drvdata *ndd); +struct nd_namespace_label *nd_label_active(struct nvdimm_drvdata *ndd, int n); #endif /* __LABEL_H__ */ diff --git a/drivers/nvdimm/namespace_devs.c b/drivers/nvdimm/namespace_devs.c index 4f653d1e61ad..5d81032fcfc5 100644 --- a/drivers/nvdimm/namespace_devs.c +++ b/drivers/nvdimm/namespace_devs.c @@ -14,6 +14,7 @@ #include #include #include +#include "nd-core.h" #include "nd.h" static void namespace_io_release(struct device *dev) @@ -23,11 +24,50 @@ static void namespace_io_release(struct device *dev) kfree(nsio); } +static void namespace_pmem_release(struct device *dev) +{ + struct nd_namespace_pmem *nspm = to_nd_namespace_pmem(dev); + + kfree(nspm->alt_name); + kfree(nspm->uuid); + kfree(nspm); +} + +static void namespace_blk_release(struct device *dev) +{ + /* TODO: blk namespace support */ +} + static struct device_type namespace_io_device_type = { .name = "nd_namespace_io", .release = namespace_io_release, }; +static struct device_type namespace_pmem_device_type = { + .name = "nd_namespace_pmem", + .release = namespace_pmem_release, +}; + +static struct device_type namespace_blk_device_type = { + .name = "nd_namespace_blk", + .release = namespace_blk_release, +}; + +static bool is_namespace_pmem(struct device *dev) +{ + return dev ? dev->type == &namespace_pmem_device_type : false; +} + +static bool is_namespace_blk(struct device *dev) +{ + return dev ? dev->type == &namespace_blk_device_type : false; +} + +static bool is_namespace_io(struct device *dev) +{ + return dev ? dev->type == &namespace_io_device_type : false; +} + static ssize_t nstype_show(struct device *dev, struct device_attribute *attr, char *buf) { @@ -37,13 +77,676 @@ static ssize_t nstype_show(struct device *dev, } static DEVICE_ATTR_RO(nstype); +static ssize_t __alt_name_store(struct device *dev, const char *buf, + const size_t len) +{ + char *input, *pos, *alt_name, **ns_altname; + ssize_t rc; + + if (is_namespace_pmem(dev)) { + struct nd_namespace_pmem *nspm = to_nd_namespace_pmem(dev); + + ns_altname = &nspm->alt_name; + } else if (is_namespace_blk(dev)) { + /* TODO: blk namespace support */ + return -ENXIO; + } else + return -ENXIO; + + if (dev->driver) + return -EBUSY; + + input = kmemdup(buf, len + 1, GFP_KERNEL); + if (!input) + return -ENOMEM; + + input[len] = '\0'; + pos = strim(input); + if (strlen(pos) + 1 > NSLABEL_NAME_LEN) { + rc = -EINVAL; + goto out; + } + + alt_name = kzalloc(NSLABEL_NAME_LEN, GFP_KERNEL); + if (!alt_name) { + rc = -ENOMEM; + goto out; + } + kfree(*ns_altname); + *ns_altname = alt_name; + sprintf(*ns_altname, "%s", pos); + rc = len; + +out: + kfree(input); + return rc; +} + +static ssize_t alt_name_store(struct device *dev, + struct device_attribute *attr, const char *buf, size_t len) +{ + ssize_t rc; + + device_lock(dev); + nvdimm_bus_lock(dev); + wait_nvdimm_bus_probe_idle(dev); + rc = __alt_name_store(dev, buf, len); + dev_dbg(dev, "%s: %s(%zd)\n", __func__, rc < 0 ? "fail " : "", rc); + nvdimm_bus_unlock(dev); + device_unlock(dev); + + return rc; +} + +static ssize_t alt_name_show(struct device *dev, + struct device_attribute *attr, char *buf) +{ + char *ns_altname; + + if (is_namespace_pmem(dev)) { + struct nd_namespace_pmem *nspm = to_nd_namespace_pmem(dev); + + ns_altname = nspm->alt_name; + } else if (is_namespace_blk(dev)) { + /* TODO: blk namespace support */ + return -ENXIO; + } else + return -ENXIO; + + return sprintf(buf, "%s\n", ns_altname ? ns_altname : ""); +} +static DEVICE_ATTR_RW(alt_name); + +static int scan_free(struct nd_region *nd_region, + struct nd_mapping *nd_mapping, struct nd_label_id *label_id, + resource_size_t n) +{ + bool is_blk = strncmp(label_id->id, "blk", 3) == 0; + struct nvdimm_drvdata *ndd = to_ndd(nd_mapping); + int rc = 0; + + while (n) { + struct resource *res, *last; + resource_size_t new_start; + + last = NULL; + for_each_dpa_resource(ndd, res) + if (strcmp(res->name, label_id->id) == 0) + last = res; + res = last; + if (!res) + return 0; + + if (n >= resource_size(res)) { + n -= resource_size(res); + nd_dbg_dpa(nd_region, ndd, res, "delete %d\n", rc); + nvdimm_free_dpa(ndd, res); + /* retry with last resource deleted */ + continue; + } + + /* + * Keep BLK allocations relegated to high DPA as much as + * possible + */ + if (is_blk) + new_start = res->start + n; + else + new_start = res->start; + + rc = adjust_resource(res, new_start, resource_size(res) - n); + nd_dbg_dpa(nd_region, ndd, res, "shrink %d\n", rc); + break; + } + + return rc; +} + +/** + * shrink_dpa_allocation - for each dimm in region free n bytes for label_id + * @nd_region: the set of dimms to reclaim @n bytes from + * @label_id: unique identifier for the namespace consuming this dpa range + * @n: number of bytes per-dimm to release + * + * Assumes resources are ordered. Starting from the end try to + * adjust_resource() the allocation to @n, but if @n is larger than the + * allocation delete it and find the 'new' last allocation in the label + * set. + */ +static int shrink_dpa_allocation(struct nd_region *nd_region, + struct nd_label_id *label_id, resource_size_t n) +{ + int i; + + for (i = 0; i < nd_region->ndr_mappings; i++) { + struct nd_mapping *nd_mapping = &nd_region->mapping[i]; + int rc; + + rc = scan_free(nd_region, nd_mapping, label_id, n); + if (rc) + return rc; + } + + return 0; +} + +static resource_size_t init_dpa_allocation(struct nd_label_id *label_id, + struct nd_region *nd_region, struct nd_mapping *nd_mapping, + resource_size_t n) +{ + bool is_blk = strncmp(label_id->id, "blk", 3) == 0; + struct nvdimm_drvdata *ndd = to_ndd(nd_mapping); + resource_size_t first_dpa; + struct resource *res; + int rc = 0; + + /* allocate blk from highest dpa first */ + if (is_blk) + first_dpa = nd_mapping->start + nd_mapping->size - n; + else + first_dpa = nd_mapping->start; + + /* first resource allocation for this label-id or dimm */ + res = nvdimm_allocate_dpa(ndd, label_id, first_dpa, n); + if (!res) + rc = -EBUSY; + + nd_dbg_dpa(nd_region, ndd, res, "init %d\n", rc); + return rc ? n : 0; +} + +static bool space_valid(bool is_pmem, struct nd_label_id *label_id, + struct resource *res) +{ + /* + * For BLK-space any space is valid, for PMEM-space, it must be + * contiguous with an existing allocation. + */ + if (!is_pmem) + return true; + if (!res || strcmp(res->name, label_id->id) == 0) + return true; + return false; +} + +enum alloc_loc { + ALLOC_ERR = 0, ALLOC_BEFORE, ALLOC_MID, ALLOC_AFTER, +}; + +static resource_size_t scan_allocate(struct nd_region *nd_region, + struct nd_mapping *nd_mapping, struct nd_label_id *label_id, + resource_size_t n) +{ + resource_size_t mapping_end = nd_mapping->start + nd_mapping->size - 1; + bool is_pmem = strncmp(label_id->id, "pmem", 4) == 0; + struct nvdimm_drvdata *ndd = to_ndd(nd_mapping); + const resource_size_t to_allocate = n; + struct resource *res; + int first; + + retry: + first = 0; + for_each_dpa_resource(ndd, res) { + resource_size_t allocate, available = 0, free_start, free_end; + struct resource *next = res->sibling, *new_res = NULL; + enum alloc_loc loc = ALLOC_ERR; + const char *action; + int rc = 0; + + /* ignore resources outside this nd_mapping */ + if (res->start > mapping_end) + continue; + if (res->end < nd_mapping->start) + continue; + + /* space at the beginning of the mapping */ + if (!first++ && res->start > nd_mapping->start) { + free_start = nd_mapping->start; + available = res->start - free_start; + if (space_valid(is_pmem, label_id, NULL)) + loc = ALLOC_BEFORE; + } + + /* space between allocations */ + if (!loc && next) { + free_start = res->start + resource_size(res); + free_end = min(mapping_end, next->start - 1); + if (space_valid(is_pmem, label_id, res) + && free_start < free_end) { + available = free_end + 1 - free_start; + loc = ALLOC_MID; + } + } + + /* space at the end of the mapping */ + if (!loc && !next) { + free_start = res->start + resource_size(res); + free_end = mapping_end; + if (space_valid(is_pmem, label_id, res) + && free_start < free_end) { + available = free_end + 1 - free_start; + loc = ALLOC_AFTER; + } + } + + if (!loc || !available) + continue; + allocate = min(available, n); + switch (loc) { + case ALLOC_BEFORE: + if (strcmp(res->name, label_id->id) == 0) { + /* adjust current resource up */ + if (is_pmem) + return n; + rc = adjust_resource(res, res->start - allocate, + resource_size(res) + allocate); + action = "cur grow up"; + } else + action = "allocate"; + break; + case ALLOC_MID: + if (strcmp(next->name, label_id->id) == 0) { + /* adjust next resource up */ + if (is_pmem) + return n; + rc = adjust_resource(next, next->start + - allocate, resource_size(next) + + allocate); + new_res = next; + action = "next grow up"; + } else if (strcmp(res->name, label_id->id) == 0) { + action = "grow down"; + } else + action = "allocate"; + break; + case ALLOC_AFTER: + if (strcmp(res->name, label_id->id) == 0) + action = "grow down"; + else + action = "allocate"; + break; + default: + return n; + } + + if (strcmp(action, "allocate") == 0) { + /* BLK allocate bottom up */ + if (!is_pmem) + free_start += available - allocate; + else if (free_start != nd_mapping->start) + return n; + + new_res = nvdimm_allocate_dpa(ndd, label_id, + free_start, allocate); + if (!new_res) + rc = -EBUSY; + } else if (strcmp(action, "grow down") == 0) { + /* adjust current resource down */ + rc = adjust_resource(res, res->start, resource_size(res) + + allocate); + } + + if (!new_res) + new_res = res; + + nd_dbg_dpa(nd_region, ndd, new_res, "%s(%d) %d\n", + action, loc, rc); + + if (rc) + return n; + + n -= allocate; + if (n) { + /* + * Retry scan with newly inserted resources. + * For example, if we did an ALLOC_BEFORE + * insertion there may also have been space + * available for an ALLOC_AFTER insertion, so we + * need to check this same resource again + */ + goto retry; + } else + return 0; + } + + if (is_pmem && n == to_allocate) + return init_dpa_allocation(label_id, nd_region, nd_mapping, n); + return n; +} + +/** + * grow_dpa_allocation - for each dimm allocate n bytes for @label_id + * @nd_region: the set of dimms to allocate @n more bytes from + * @label_id: unique identifier for the namespace consuming this dpa range + * @n: number of bytes per-dimm to add to the existing allocation + * + * Assumes resources are ordered. For BLK regions, first consume + * BLK-only available DPA free space, then consume PMEM-aliased DPA + * space starting at the highest DPA. For PMEM regions start + * allocations from the start of an interleave set and end at the first + * BLK allocation or the end of the interleave set, whichever comes + * first. + */ +static int grow_dpa_allocation(struct nd_region *nd_region, + struct nd_label_id *label_id, resource_size_t n) +{ + int i; + + for (i = 0; i < nd_region->ndr_mappings; i++) { + struct nd_mapping *nd_mapping = &nd_region->mapping[i]; + int rc; + + rc = scan_allocate(nd_region, nd_mapping, label_id, n); + if (rc) + return rc; + } + + return 0; +} + +static void nd_namespace_pmem_set_size(struct nd_region *nd_region, + struct nd_namespace_pmem *nspm, resource_size_t size) +{ + struct resource *res = &nspm->nsio.res; + + res->start = nd_region->ndr_start; + res->end = nd_region->ndr_start + size - 1; +} + +static ssize_t __size_store(struct device *dev, unsigned long long val) +{ + resource_size_t allocated = 0, available = 0; + struct nd_region *nd_region = to_nd_region(dev->parent); + struct nd_mapping *nd_mapping; + struct nvdimm_drvdata *ndd; + struct nd_label_id label_id; + u32 flags = 0, remainder; + u8 *uuid = NULL; + int rc, i; + + if (dev->driver) + return -EBUSY; + + if (is_namespace_pmem(dev)) { + struct nd_namespace_pmem *nspm = to_nd_namespace_pmem(dev); + + uuid = nspm->uuid; + } else if (is_namespace_blk(dev)) { + /* TODO: blk namespace support */ + return -ENXIO; + } + + /* + * We need a uuid for the allocation-label and dimm(s) on which + * to store the label. + */ + if (!uuid || nd_region->ndr_mappings == 0) + return -ENXIO; + + div_u64_rem(val, SZ_4K * nd_region->ndr_mappings, &remainder); + if (remainder) { + dev_dbg(dev, "%llu is not %dK aligned\n", val, + (SZ_4K * nd_region->ndr_mappings) / SZ_1K); + return -EINVAL; + } + + nd_label_gen_id(&label_id, uuid, flags); + for (i = 0; i < nd_region->ndr_mappings; i++) { + nd_mapping = &nd_region->mapping[i]; + ndd = to_ndd(nd_mapping); + + /* + * All dimms in an interleave set, or the base dimm for a blk + * region, need to be enabled for the size to be changed. + */ + if (!ndd) + return -ENXIO; + + allocated += nvdimm_allocated_dpa(ndd, &label_id); + } + available = nd_region_available_dpa(nd_region); + + if (val > available + allocated) + return -ENOSPC; + + if (val == allocated) + return 0; + + val = div_u64(val, nd_region->ndr_mappings); + allocated = div_u64(allocated, nd_region->ndr_mappings); + if (val < allocated) + rc = shrink_dpa_allocation(nd_region, &label_id, + allocated - val); + else + rc = grow_dpa_allocation(nd_region, &label_id, val - allocated); + + if (rc) + return rc; + + if (is_namespace_pmem(dev)) { + struct nd_namespace_pmem *nspm = to_nd_namespace_pmem(dev); + + nd_namespace_pmem_set_size(nd_region, nspm, + val * nd_region->ndr_mappings); + } + + return rc; +} + +static ssize_t size_store(struct device *dev, + struct device_attribute *attr, const char *buf, size_t len) +{ + unsigned long long val; + u8 **uuid = NULL; + int rc; + + rc = kstrtoull(buf, 0, &val); + if (rc) + return rc; + + device_lock(dev); + nvdimm_bus_lock(dev); + wait_nvdimm_bus_probe_idle(dev); + rc = __size_store(dev, val); + + if (is_namespace_pmem(dev)) { + struct nd_namespace_pmem *nspm = to_nd_namespace_pmem(dev); + + uuid = &nspm->uuid; + } else if (is_namespace_blk(dev)) { + /* TODO: blk namespace support */ + rc = -ENXIO; + } + + if (rc == 0 && val == 0 && uuid) { + /* setting size zero == 'delete namespace' */ + kfree(*uuid); + *uuid = NULL; + } + + dev_dbg(dev, "%s: %llx %s (%d)\n", __func__, val, rc < 0 + ? "fail" : "success", rc); + + nvdimm_bus_unlock(dev); + device_unlock(dev); + + return rc ? rc : len; +} + +static ssize_t size_show(struct device *dev, + struct device_attribute *attr, char *buf) +{ + if (is_namespace_pmem(dev)) { + struct nd_namespace_pmem *nspm = to_nd_namespace_pmem(dev); + + return sprintf(buf, "%llu\n", (unsigned long long) + resource_size(&nspm->nsio.res)); + } else if (is_namespace_blk(dev)) { + /* TODO: blk namespace support */ + return -ENXIO; + } else if (is_namespace_io(dev)) { + struct nd_namespace_io *nsio = to_nd_namespace_io(dev); + + return sprintf(buf, "%llu\n", (unsigned long long) + resource_size(&nsio->res)); + } else + return -ENXIO; +} +static DEVICE_ATTR(size, S_IRUGO, size_show, size_store); + +static ssize_t uuid_show(struct device *dev, + struct device_attribute *attr, char *buf) +{ + u8 *uuid; + + if (is_namespace_pmem(dev)) { + struct nd_namespace_pmem *nspm = to_nd_namespace_pmem(dev); + + uuid = nspm->uuid; + } else if (is_namespace_blk(dev)) { + /* TODO: blk namespace support */ + return -ENXIO; + } else + return -ENXIO; + + if (uuid) + return sprintf(buf, "%pUb\n", uuid); + return sprintf(buf, "\n"); +} + +/** + * namespace_update_uuid - check for a unique uuid and whether we're "renaming" + * @nd_region: parent region so we can updates all dimms in the set + * @dev: namespace type for generating label_id + * @new_uuid: incoming uuid + * @old_uuid: reference to the uuid storage location in the namespace object + */ +static int namespace_update_uuid(struct nd_region *nd_region, + struct device *dev, u8 *new_uuid, u8 **old_uuid) +{ + u32 flags = is_namespace_blk(dev) ? NSLABEL_FLAG_LOCAL : 0; + struct nd_label_id old_label_id; + struct nd_label_id new_label_id; + int i, rc; + + rc = nd_is_uuid_unique(dev, new_uuid) ? 0 : -EINVAL; + if (rc) { + kfree(new_uuid); + return rc; + } + + if (*old_uuid == NULL) + goto out; + + nd_label_gen_id(&old_label_id, *old_uuid, flags); + nd_label_gen_id(&new_label_id, new_uuid, flags); + for (i = 0; i < nd_region->ndr_mappings; i++) { + struct nd_mapping *nd_mapping = &nd_region->mapping[i]; + struct nvdimm_drvdata *ndd = to_ndd(nd_mapping); + struct resource *res; + + for_each_dpa_resource(ndd, res) + if (strcmp(res->name, old_label_id.id) == 0) + sprintf((void *) res->name, "%s", + new_label_id.id); + } + kfree(*old_uuid); + out: + *old_uuid = new_uuid; + return 0; +} + +static ssize_t uuid_store(struct device *dev, + struct device_attribute *attr, const char *buf, size_t len) +{ + struct nd_region *nd_region = to_nd_region(dev->parent); + u8 *uuid = NULL; + u8 **ns_uuid; + ssize_t rc; + + if (is_namespace_pmem(dev)) { + struct nd_namespace_pmem *nspm = to_nd_namespace_pmem(dev); + + ns_uuid = &nspm->uuid; + } else if (is_namespace_blk(dev)) { + /* TODO: blk namespace support */ + return -ENXIO; + } else + return -ENXIO; + + device_lock(dev); + nvdimm_bus_lock(dev); + wait_nvdimm_bus_probe_idle(dev); + rc = nd_uuid_store(dev, &uuid, buf, len); + if (rc >= 0) + rc = namespace_update_uuid(nd_region, dev, uuid, ns_uuid); + dev_dbg(dev, "%s: result: %zd wrote: %s%s", __func__, + rc, buf, buf[len - 1] == '\n' ? "" : "\n"); + nvdimm_bus_unlock(dev); + device_unlock(dev); + + return rc ? rc : len; +} +static DEVICE_ATTR_RW(uuid); + +static ssize_t resource_show(struct device *dev, + struct device_attribute *attr, char *buf) +{ + struct resource *res; + + if (is_namespace_pmem(dev)) { + struct nd_namespace_pmem *nspm = to_nd_namespace_pmem(dev); + + res = &nspm->nsio.res; + } else if (is_namespace_io(dev)) { + struct nd_namespace_io *nsio = to_nd_namespace_io(dev); + + res = &nsio->res; + } else + return -ENXIO; + + /* no address to convey if the namespace has no allocation */ + if (resource_size(res) == 0) + return -ENXIO; + return sprintf(buf, "%#llx\n", (unsigned long long) res->start); +} +static DEVICE_ATTR_RO(resource); + static struct attribute *nd_namespace_attributes[] = { &dev_attr_nstype.attr, + &dev_attr_size.attr, + &dev_attr_uuid.attr, + &dev_attr_resource.attr, + &dev_attr_alt_name.attr, NULL, }; +static umode_t namespace_visible(struct kobject *kobj, + struct attribute *a, int n) +{ + struct device *dev = container_of(kobj, struct device, kobj); + + if (a == &dev_attr_resource.attr) { + if (is_namespace_blk(dev)) + return 0; + return a->mode; + } + + if (is_namespace_pmem(dev) || is_namespace_blk(dev)) { + if (a == &dev_attr_size.attr) + return S_IWUSR | S_IRUGO; + return a->mode; + } + + if (a == &dev_attr_nstype.attr || a == &dev_attr_size.attr) + return a->mode; + + return 0; +} + static struct attribute_group nd_namespace_attribute_group = { .attrs = nd_namespace_attributes, + .is_visible = namespace_visible, }; static const struct attribute_group *nd_namespace_attribute_groups[] = { @@ -81,23 +784,318 @@ static struct device **create_namespace_io(struct nd_region *nd_region) return devs; } +static bool has_uuid_at_pos(struct nd_region *nd_region, u8 *uuid, + u64 cookie, u16 pos) +{ + struct nd_namespace_label *found = NULL; + int i; + + for (i = 0; i < nd_region->ndr_mappings; i++) { + struct nd_mapping *nd_mapping = &nd_region->mapping[i]; + struct nd_namespace_label *nd_label; + bool found_uuid = false; + int l; + + for_each_label(l, nd_label, nd_mapping->labels) { + u64 isetcookie = __le64_to_cpu(nd_label->isetcookie); + u16 position = __le16_to_cpu(nd_label->position); + u16 nlabel = __le16_to_cpu(nd_label->nlabel); + + if (isetcookie != cookie) + continue; + + if (memcmp(nd_label->uuid, uuid, NSLABEL_UUID_LEN) != 0) + continue; + + if (found_uuid) { + dev_dbg(to_ndd(nd_mapping)->dev, + "%s duplicate entry for uuid\n", + __func__); + return false; + } + found_uuid = true; + if (nlabel != nd_region->ndr_mappings) + continue; + if (position != pos) + continue; + found = nd_label; + break; + } + if (found) + break; + } + return found != NULL; +} + +static int select_pmem_id(struct nd_region *nd_region, u8 *pmem_id) +{ + struct nd_namespace_label *select = NULL; + int i; + + if (!pmem_id) + return -ENODEV; + + for (i = 0; i < nd_region->ndr_mappings; i++) { + struct nd_mapping *nd_mapping = &nd_region->mapping[i]; + struct nd_namespace_label *nd_label; + u64 hw_start, hw_end, pmem_start, pmem_end; + int l; + + for_each_label(l, nd_label, nd_mapping->labels) + if (memcmp(nd_label->uuid, pmem_id, NSLABEL_UUID_LEN) == 0) + break; + + if (!nd_label) { + WARN_ON(1); + return -EINVAL; + } + + select = nd_label; + /* + * Check that this label is compliant with the dpa + * range published in NFIT + */ + hw_start = nd_mapping->start; + hw_end = hw_start + nd_mapping->size; + pmem_start = __le64_to_cpu(select->dpa); + pmem_end = pmem_start + __le64_to_cpu(select->rawsize); + if (pmem_start == hw_start && pmem_end <= hw_end) + /* pass */; + else + return -EINVAL; + + nd_mapping->labels[0] = select; + nd_mapping->labels[1] = NULL; + } + return 0; +} + +/** + * find_pmem_label_set - validate interleave set labelling, retrieve label0 + * @nd_region: region with mappings to validate + */ +static int find_pmem_label_set(struct nd_region *nd_region, + struct nd_namespace_pmem *nspm) +{ + u64 cookie = nd_region_interleave_set_cookie(nd_region); + struct nd_namespace_label *nd_label; + u8 select_id[NSLABEL_UUID_LEN]; + resource_size_t size = 0; + u8 *pmem_id = NULL; + int rc = -ENODEV, l; + u16 i; + + if (cookie == 0) + return -ENXIO; + + /* + * Find a complete set of labels by uuid. By definition we can start + * with any mapping as the reference label + */ + for_each_label(l, nd_label, nd_region->mapping[0].labels) { + u64 isetcookie = __le64_to_cpu(nd_label->isetcookie); + + if (isetcookie != cookie) + continue; + + for (i = 0; nd_region->ndr_mappings; i++) + if (!has_uuid_at_pos(nd_region, nd_label->uuid, + cookie, i)) + break; + if (i < nd_region->ndr_mappings) { + /* + * Give up if we don't find an instance of a + * uuid at each position (from 0 to + * nd_region->ndr_mappings - 1), or if we find a + * dimm with two instances of the same uuid. + */ + rc = -EINVAL; + goto err; + } else if (pmem_id) { + /* + * If there is more than one valid uuid set, we + * need userspace to clean this up. + */ + rc = -EBUSY; + goto err; + } + memcpy(select_id, nd_label->uuid, NSLABEL_UUID_LEN); + pmem_id = select_id; + } + + /* + * Fix up each mapping's 'labels' to have the validated pmem label for + * that position at labels[0], and NULL at labels[1]. In the process, + * check that the namespace aligns with interleave-set. We know + * that it does not overlap with any blk namespaces by virtue of + * the dimm being enabled (i.e. nd_label_reserve_dpa() + * succeeded). + */ + rc = select_pmem_id(nd_region, pmem_id); + if (rc) + goto err; + + /* Calculate total size and populate namespace properties from label0 */ + for (i = 0; i < nd_region->ndr_mappings; i++) { + struct nd_mapping *nd_mapping = &nd_region->mapping[i]; + struct nd_namespace_label *label0 = nd_mapping->labels[0]; + + size += __le64_to_cpu(label0->rawsize); + if (__le16_to_cpu(label0->position) != 0) + continue; + WARN_ON(nspm->alt_name || nspm->uuid); + nspm->alt_name = kmemdup((void __force *) label0->name, + NSLABEL_NAME_LEN, GFP_KERNEL); + nspm->uuid = kmemdup((void __force *) label0->uuid, + NSLABEL_UUID_LEN, GFP_KERNEL); + } + + if (!nspm->alt_name || !nspm->uuid) { + rc = -ENOMEM; + goto err; + } + + nd_namespace_pmem_set_size(nd_region, nspm, size); + + return 0; + err: + switch (rc) { + case -EINVAL: + dev_dbg(&nd_region->dev, "%s: invalid label(s)\n", __func__); + break; + case -ENODEV: + dev_dbg(&nd_region->dev, "%s: label not found\n", __func__); + break; + default: + dev_dbg(&nd_region->dev, "%s: unexpected err: %d\n", + __func__, rc); + break; + } + return rc; +} + +static struct device **create_namespace_pmem(struct nd_region *nd_region) +{ + struct nd_namespace_pmem *nspm; + struct device *dev, **devs; + struct resource *res; + int rc; + + nspm = kzalloc(sizeof(*nspm), GFP_KERNEL); + if (!nspm) + return NULL; + + dev = &nspm->nsio.dev; + dev->type = &namespace_pmem_device_type; + dev->parent = &nd_region->dev; + res = &nspm->nsio.res; + res->name = dev_name(&nd_region->dev); + res->flags = IORESOURCE_MEM; + rc = find_pmem_label_set(nd_region, nspm); + if (rc == -ENODEV) { + int i; + + /* Pass, try to permit namespace creation... */ + for (i = 0; i < nd_region->ndr_mappings; i++) { + struct nd_mapping *nd_mapping = &nd_region->mapping[i]; + + kfree(nd_mapping->labels); + nd_mapping->labels = NULL; + } + + /* Publish a zero-sized namespace for userspace to configure. */ + nd_namespace_pmem_set_size(nd_region, nspm, 0); + + rc = 0; + } else if (rc) + goto err; + + devs = kcalloc(2, sizeof(struct device *), GFP_KERNEL); + if (!devs) + goto err; + + devs[0] = dev; + return devs; + + err: + namespace_pmem_release(&nspm->nsio.dev); + return NULL; +} + +static int init_active_labels(struct nd_region *nd_region) +{ + int i; + + for (i = 0; i < nd_region->ndr_mappings; i++) { + struct nd_mapping *nd_mapping = &nd_region->mapping[i]; + struct nvdimm_drvdata *ndd = to_ndd(nd_mapping); + struct nvdimm *nvdimm = nd_mapping->nvdimm; + int count, j; + + /* + * If the dimm is disabled then prevent the region from + * being activated if it aliases DPA. + */ + if (!ndd) { + if ((nvdimm->flags & NDD_ALIASING) == 0) + return 0; + dev_dbg(&nd_region->dev, "%s: is disabled, failing probe\n", + dev_name(&nd_mapping->nvdimm->dev)); + return -ENXIO; + } + nd_mapping->ndd = ndd; + atomic_inc(&nvdimm->busy); + get_ndd(ndd); + + count = nd_label_active_count(ndd); + dev_dbg(ndd->dev, "%s: %d\n", __func__, count); + if (!count) + continue; + nd_mapping->labels = kcalloc(count + 1, sizeof(void *), + GFP_KERNEL); + if (!nd_mapping->labels) + return -ENOMEM; + for (j = 0; j < count; j++) { + struct nd_namespace_label *label; + + label = nd_label_active(ndd, j); + nd_mapping->labels[j] = label; + } + } + + return 0; +} + int nd_region_register_namespaces(struct nd_region *nd_region, int *err) { struct device **devs = NULL; - int i; + int i, rc = 0, type; *err = 0; - switch (nd_region_to_nstype(nd_region)) { + nvdimm_bus_lock(&nd_region->dev); + rc = init_active_labels(nd_region); + if (rc) { + nvdimm_bus_unlock(&nd_region->dev); + return rc; + } + + type = nd_region_to_nstype(nd_region); + switch (type) { case ND_DEVICE_NAMESPACE_IO: devs = create_namespace_io(nd_region); break; + case ND_DEVICE_NAMESPACE_PMEM: + devs = create_namespace_pmem(nd_region); + break; default: break; } + nvdimm_bus_unlock(&nd_region->dev); if (!devs) return -ENODEV; + nd_region->ns_seed = devs[0]; for (i = 0; devs[i]; i++) { struct device *dev = devs[i]; diff --git a/drivers/nvdimm/nd-core.h b/drivers/nvdimm/nd-core.h index 6a4b2c066ee7..c6c889292bab 100644 --- a/drivers/nvdimm/nd-core.h +++ b/drivers/nvdimm/nd-core.h @@ -56,4 +56,16 @@ int nvdimm_bus_register_dimms(struct nvdimm_bus *nvdimm_bus); int nvdimm_bus_register_regions(struct nvdimm_bus *nvdimm_bus); int nvdimm_bus_init_interleave_sets(struct nvdimm_bus *nvdimm_bus); int nd_match_dimm(struct device *dev, void *data); +struct nd_label_id; +char *nd_label_gen_id(struct nd_label_id *label_id, u8 *uuid, u32 flags); +bool nd_is_uuid_unique(struct device *dev, u8 *uuid); +struct nd_region; +struct nvdimm_drvdata; +struct nd_mapping; +resource_size_t nd_pmem_available_dpa(struct nd_region *nd_region, + struct nd_mapping *nd_mapping, resource_size_t *overlap); +resource_size_t nd_region_available_dpa(struct nd_region *nd_region); +resource_size_t nvdimm_allocated_dpa(struct nvdimm_drvdata *ndd, + struct nd_label_id *label_id); +void get_ndd(struct nvdimm_drvdata *ndd); #endif /* __ND_CORE_H__ */ diff --git a/drivers/nvdimm/nd.h b/drivers/nvdimm/nd.h index 401fa0d5b6ea..03e610cd9f43 100644 --- a/drivers/nvdimm/nd.h +++ b/drivers/nvdimm/nd.h @@ -16,6 +16,7 @@ #include #include #include +#include #include "label.h" struct nvdimm_drvdata { @@ -25,6 +26,7 @@ struct nvdimm_drvdata { void *data; int ns_current, ns_next; struct resource dpa; + struct kref kref; }; struct nd_region_namespaces { @@ -59,12 +61,19 @@ static inline struct nd_namespace_index *to_next_namespace_index( (unsigned long long) (res ? resource_size(res) : 0), \ (unsigned long long) (res ? res->start : 0), ##arg) +#define for_each_label(l, label, labels) \ + for (l = 0; (label = labels ? labels[l] : NULL); l++) + +#define for_each_dpa_resource(ndd, res) \ + for (res = (ndd)->dpa.child; res; res = res->sibling) + #define for_each_dpa_resource_safe(ndd, res, next) \ for (res = (ndd)->dpa.child, next = res ? res->sibling : NULL; \ res; res = next, next = next ? next->sibling : NULL) struct nd_region { struct device dev; + struct device *ns_seed; u16 ndr_mappings; u64 ndr_size; u64 ndr_start; @@ -88,20 +97,28 @@ enum nd_async_mode { ND_ASYNC, }; +void wait_nvdimm_bus_probe_idle(struct device *dev); void nd_device_register(struct device *dev); void nd_device_unregister(struct device *dev, enum nd_async_mode mode); +int nd_uuid_store(struct device *dev, u8 **uuid_out, const char *buf, + size_t len); int __init nvdimm_init(void); int __init nd_region_init(void); void nvdimm_exit(void); void nd_region_exit(void); +struct nvdimm; +struct nvdimm_drvdata *to_ndd(struct nd_mapping *nd_mapping); int nvdimm_init_nsarea(struct nvdimm_drvdata *ndd); int nvdimm_init_config_data(struct nvdimm_drvdata *ndd); struct nd_region *to_nd_region(struct device *dev); int nd_region_to_nstype(struct nd_region *nd_region); int nd_region_register_namespaces(struct nd_region *nd_region, int *err); +u64 nd_region_interleave_set_cookie(struct nd_region *nd_region); void nvdimm_bus_lock(struct device *dev); void nvdimm_bus_unlock(struct device *dev); bool is_nvdimm_bus_locked(struct device *dev); +void nvdimm_drvdata_release(struct kref *kref); +void put_ndd(struct nvdimm_drvdata *ndd); int nd_label_reserve_dpa(struct nvdimm_drvdata *ndd); void nvdimm_free_dpa(struct nvdimm_drvdata *ndd, struct resource *res); struct resource *nvdimm_allocate_dpa(struct nvdimm_drvdata *ndd, diff --git a/drivers/nvdimm/pmem.c b/drivers/nvdimm/pmem.c index d46975ed9e40..90902a142e35 100644 --- a/drivers/nvdimm/pmem.c +++ b/drivers/nvdimm/pmem.c @@ -203,6 +203,23 @@ static int nd_pmem_probe(struct device *dev) struct nd_namespace_io *nsio = to_nd_namespace_io(dev); struct pmem_device *pmem; + if (resource_size(&nsio->res) < ND_MIN_NAMESPACE_SIZE) { + resource_size_t size = resource_size(&nsio->res); + + dev_dbg(dev, "%s: size: %pa, too small must be at least %#x\n", + __func__, &size, ND_MIN_NAMESPACE_SIZE); + return -ENODEV; + } + + if (nd_region_to_nstype(nd_region) == ND_DEVICE_NAMESPACE_PMEM) { + struct nd_namespace_pmem *nspm = to_nd_namespace_pmem(dev); + + if (!nspm->uuid) { + dev_dbg(dev, "%s: uuid not set\n", __func__); + return -ENODEV; + } + } + pmem = pmem_alloc(dev, &nsio->res, nd_region->id); if (IS_ERR(pmem)) return PTR_ERR(pmem); @@ -222,13 +239,14 @@ static int nd_pmem_remove(struct device *dev) MODULE_ALIAS("pmem"); MODULE_ALIAS_ND_DEVICE(ND_DEVICE_NAMESPACE_IO); +MODULE_ALIAS_ND_DEVICE(ND_DEVICE_NAMESPACE_PMEM); static struct nd_device_driver nd_pmem_driver = { .probe = nd_pmem_probe, .remove = nd_pmem_remove, .drv = { .name = "nd_pmem", }, - .type = ND_DRIVER_NAMESPACE_IO, + .type = ND_DRIVER_NAMESPACE_IO | ND_DRIVER_NAMESPACE_PMEM, }; static int __init pmem_init(void) diff --git a/drivers/nvdimm/region.c b/drivers/nvdimm/region.c index ade3dba81afd..9aba44e483e0 100644 --- a/drivers/nvdimm/region.c +++ b/drivers/nvdimm/region.c @@ -61,8 +61,11 @@ static int child_unregister(struct device *dev, void *data) static int nd_region_remove(struct device *dev) { + struct nd_region *nd_region = to_nd_region(dev); + /* flush attribute readers and disable */ nvdimm_bus_lock(dev); + nd_region->ns_seed = NULL; dev_set_drvdata(dev, NULL); nvdimm_bus_unlock(dev); diff --git a/drivers/nvdimm/region_devs.c b/drivers/nvdimm/region_devs.c index 1571424578f0..b45806f7176d 100644 --- a/drivers/nvdimm/region_devs.c +++ b/drivers/nvdimm/region_devs.c @@ -15,6 +15,7 @@ #include #include #include +#include #include "nd-core.h" #include "nd.h" @@ -99,6 +100,58 @@ int nd_region_to_nstype(struct nd_region *nd_region) return 0; } +EXPORT_SYMBOL(nd_region_to_nstype); + +static int is_uuid_busy(struct device *dev, void *data) +{ + struct nd_region *nd_region = to_nd_region(dev->parent); + u8 *uuid = data; + + switch (nd_region_to_nstype(nd_region)) { + case ND_DEVICE_NAMESPACE_PMEM: { + struct nd_namespace_pmem *nspm = to_nd_namespace_pmem(dev); + + if (!nspm->uuid) + break; + if (memcmp(uuid, nspm->uuid, NSLABEL_UUID_LEN) == 0) + return -EBUSY; + break; + } + case ND_DEVICE_NAMESPACE_BLK: { + /* TODO: blk namespace support */ + break; + } + default: + break; + } + + return 0; +} + +static int is_namespace_uuid_busy(struct device *dev, void *data) +{ + if (is_nd_pmem(dev) || is_nd_blk(dev)) + return device_for_each_child(dev, data, is_uuid_busy); + return 0; +} + +/** + * nd_is_uuid_unique - verify that no other namespace has @uuid + * @dev: any device on a nvdimm_bus + * @uuid: uuid to check + */ +bool nd_is_uuid_unique(struct device *dev, u8 *uuid) +{ + struct nvdimm_bus *nvdimm_bus = walk_to_nvdimm_bus(dev); + + if (!nvdimm_bus) + return false; + WARN_ON_ONCE(!is_nvdimm_bus_locked(&nvdimm_bus->dev)); + if (device_for_each_child(&nvdimm_bus->dev, uuid, + is_namespace_uuid_busy) != 0) + return false; + return true; +} static ssize_t size_show(struct device *dev, struct device_attribute *attr, char *buf) @@ -151,6 +204,60 @@ static ssize_t set_cookie_show(struct device *dev, } static DEVICE_ATTR_RO(set_cookie); +resource_size_t nd_region_available_dpa(struct nd_region *nd_region) +{ + resource_size_t blk_max_overlap = 0, available, overlap; + int i; + + WARN_ON(!is_nvdimm_bus_locked(&nd_region->dev)); + + retry: + available = 0; + overlap = blk_max_overlap; + for (i = 0; i < nd_region->ndr_mappings; i++) { + struct nd_mapping *nd_mapping = &nd_region->mapping[i]; + struct nvdimm_drvdata *ndd = to_ndd(nd_mapping); + + /* if a dimm is disabled the available capacity is zero */ + if (!ndd) + return 0; + + if (is_nd_pmem(&nd_region->dev)) { + available += nd_pmem_available_dpa(nd_region, + nd_mapping, &overlap); + if (overlap > blk_max_overlap) { + blk_max_overlap = overlap; + goto retry; + } + } else if (is_nd_blk(&nd_region->dev)) { + /* TODO: BLK Namespace support */ + } + } + + return available; +} + +static ssize_t available_size_show(struct device *dev, + struct device_attribute *attr, char *buf) +{ + struct nd_region *nd_region = to_nd_region(dev); + unsigned long long available = 0; + + /* + * Flush in-flight updates and grab a snapshot of the available + * size. Of course, this value is potentially invalidated the + * memory nvdimm_bus_lock() is dropped, but that's userspace's + * problem to not race itself. + */ + nvdimm_bus_lock(dev); + wait_nvdimm_bus_probe_idle(dev); + available = nd_region_available_dpa(nd_region); + nvdimm_bus_unlock(dev); + + return sprintf(buf, "%llu\n", available); +} +static DEVICE_ATTR_RO(available_size); + static ssize_t init_namespaces_show(struct device *dev, struct device_attribute *attr, char *buf) { @@ -168,11 +275,29 @@ static ssize_t init_namespaces_show(struct device *dev, } static DEVICE_ATTR_RO(init_namespaces); +static ssize_t namespace_seed_show(struct device *dev, + struct device_attribute *attr, char *buf) +{ + struct nd_region *nd_region = to_nd_region(dev); + ssize_t rc; + + nvdimm_bus_lock(dev); + if (nd_region->ns_seed) + rc = sprintf(buf, "%s\n", dev_name(nd_region->ns_seed)); + else + rc = sprintf(buf, "\n"); + nvdimm_bus_unlock(dev); + return rc; +} +static DEVICE_ATTR_RO(namespace_seed); + static struct attribute *nd_region_attributes[] = { &dev_attr_size.attr, &dev_attr_nstype.attr, &dev_attr_mappings.attr, &dev_attr_set_cookie.attr, + &dev_attr_available_size.attr, + &dev_attr_namespace_seed.attr, &dev_attr_init_namespaces.attr, NULL, }; @@ -182,12 +307,18 @@ static umode_t region_visible(struct kobject *kobj, struct attribute *a, int n) struct device *dev = container_of(kobj, typeof(*dev), kobj); struct nd_region *nd_region = to_nd_region(dev); struct nd_interleave_set *nd_set = nd_region->nd_set; + int type = nd_region_to_nstype(nd_region); - if (a != &dev_attr_set_cookie.attr) + if (a != &dev_attr_set_cookie.attr + && a != &dev_attr_available_size.attr) return a->mode; - if (is_nd_pmem(dev) && nd_set) - return a->mode; + if ((type == ND_DEVICE_NAMESPACE_PMEM + || type == ND_DEVICE_NAMESPACE_BLK) + && a == &dev_attr_available_size.attr) + return a->mode; + else if (is_nd_pmem(dev) && nd_set) + return a->mode; return 0; } @@ -198,6 +329,15 @@ struct attribute_group nd_region_attribute_group = { }; EXPORT_SYMBOL_GPL(nd_region_attribute_group); +u64 nd_region_interleave_set_cookie(struct nd_region *nd_region) +{ + struct nd_interleave_set *nd_set = nd_region->nd_set; + + if (nd_set) + return nd_set->cookie; + return 0; +} + /* * Upon successful probe/remove, take/release a reference on the * associated interleave set (if present) @@ -205,18 +345,20 @@ EXPORT_SYMBOL_GPL(nd_region_attribute_group); static void nd_region_notify_driver_action(struct nvdimm_bus *nvdimm_bus, struct device *dev, bool probe) { - if (is_nd_pmem(dev) || is_nd_blk(dev)) { + if (!probe && (is_nd_pmem(dev) || is_nd_blk(dev))) { struct nd_region *nd_region = to_nd_region(dev); int i; for (i = 0; i < nd_region->ndr_mappings; i++) { struct nd_mapping *nd_mapping = &nd_region->mapping[i]; + struct nvdimm_drvdata *ndd = nd_mapping->ndd; struct nvdimm *nvdimm = nd_mapping->nvdimm; - if (probe) - atomic_inc(&nvdimm->busy); - else - atomic_dec(&nvdimm->busy); + kfree(nd_mapping->labels); + nd_mapping->labels = NULL; + put_ndd(ndd); + nd_mapping->ndd = NULL; + atomic_dec(&nvdimm->busy); } } } diff --git a/include/linux/libnvdimm.h b/include/linux/libnvdimm.h index 1b627b109360..c130972e08c4 100644 --- a/include/linux/libnvdimm.h +++ b/include/linux/libnvdimm.h @@ -41,10 +41,20 @@ typedef int (*ndctl_fn)(struct nvdimm_bus_descriptor *nd_desc, struct nvdimm *nvdimm, unsigned int cmd, void *buf, unsigned int buf_len); +struct nd_namespace_label; +struct nvdimm_drvdata; struct nd_mapping { struct nvdimm *nvdimm; + struct nd_namespace_label **labels; u64 start; u64 size; + /* + * @ndd is for private use at region enable / disable time for + * get_ndd() + put_ndd(), all other nd_mapping to ndd + * conversions use to_ndd() which respects enabled state of the + * nvdimm. + */ + struct nvdimm_drvdata *ndd; }; struct nvdimm_bus_descriptor { diff --git a/include/linux/nd.h b/include/linux/nd.h index da70e9962197..255c38a83083 100644 --- a/include/linux/nd.h +++ b/include/linux/nd.h @@ -28,16 +28,40 @@ static inline struct nd_device_driver *to_nd_device_driver( return container_of(drv, struct nd_device_driver, drv); }; +/** + * struct nd_namespace_io - infrastructure for loading an nd_pmem instance + * @dev: namespace device created by the nd region driver + * @res: struct resource conversion of a NFIT SPA table + */ struct nd_namespace_io { struct device dev; struct resource res; }; +/** + * struct nd_namespace_pmem - namespace device for dimm-backed interleaved memory + * @nsio: device and system physical address range to drive + * @alt_name: namespace name supplied in the dimm label + * @uuid: namespace name supplied in the dimm label + */ +struct nd_namespace_pmem { + struct nd_namespace_io nsio; + char *alt_name; + u8 *uuid; +}; + static inline struct nd_namespace_io *to_nd_namespace_io(struct device *dev) { return container_of(dev, struct nd_namespace_io, dev); } +static inline struct nd_namespace_pmem *to_nd_namespace_pmem(struct device *dev) +{ + struct nd_namespace_io *nsio = to_nd_namespace_io(dev); + + return container_of(nsio, struct nd_namespace_pmem, nsio); +} + #define MODULE_ALIAS_ND_DEVICE(type) \ MODULE_ALIAS("nd:t" __stringify(type) "*") #define ND_DEVICE_MODALIAS_FMT "nd:t%d" diff --git a/include/uapi/linux/ndctl.h b/include/uapi/linux/ndctl.h index 1357a87b8714..2b94ea2287bb 100644 --- a/include/uapi/linux/ndctl.h +++ b/include/uapi/linux/ndctl.h @@ -190,4 +190,8 @@ enum nd_driver_flags { ND_DRIVER_NAMESPACE_PMEM = 1 << ND_DEVICE_NAMESPACE_PMEM, ND_DRIVER_NAMESPACE_BLK = 1 << ND_DEVICE_NAMESPACE_BLK, }; + +enum { + ND_MIN_NAMESPACE_SIZE = 0x00400000, +}; #endif /* __NDCTL_H__ */ -- cgit v1.2.3