summaryrefslogtreecommitdiff
path: root/drivers/nvdimm
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2017-07-07 19:44:06 +0300
committerLinus Torvalds <torvalds@linux-foundation.org>2017-07-07 19:44:06 +0300
commitb6ffe9ba46016f8351896ccee33bebcd0e5ea7c0 (patch)
tree839a5a070eabe8851797330ea77ca7eb7c93bcc1 /drivers/nvdimm
parent9f45efb9286268e01d5022d34a58a68f53ca3072 (diff)
parent9d92573fff3ec70785ef1815cc80573f70e7a921 (diff)
downloadlinux-b6ffe9ba46016f8351896ccee33bebcd0e5ea7c0.tar.xz
Merge tag 'libnvdimm-for-4.13' of git://git.kernel.org/pub/scm/linux/kernel/git/nvdimm/nvdimm
Pull libnvdimm updates from Dan Williams: "libnvdimm updates for the latest ACPI and UEFI specifications. This pull request also includes new 'struct dax_operations' enabling to undo the abuse of copy_user_nocache() for copy operations to pmem. The dax work originally missed 4.12 to address concerns raised by Al. Summary: - Introduce the _flushcache() family of memory copy helpers and use them for persistent memory write operations on x86. The _flushcache() semantic indicates that the cache is either bypassed for the copy operation (movnt) or any lines dirtied by the copy operation are written back (clwb, clflushopt, or clflush). - Extend dax_operations with ->copy_from_iter() and ->flush() operations. These operations and other infrastructure updates allow all persistent memory specific dax functionality to be pushed into libnvdimm and the pmem driver directly. It also allows dax-specific sysfs attributes to be linked to a host device, for example: /sys/block/pmem0/dax/write_cache - Add support for the new NVDIMM platform/firmware mechanisms introduced in ACPI 6.2 and UEFI 2.7. This support includes the v1.2 namespace label format, extensions to the address-range-scrub command set, new error injection commands, and a new BTT (block-translation-table) layout. These updates support inter-OS and pre-OS compatibility. - Fix a longstanding memory corruption bug in nfit_test. - Make the pmem and nvdimm-region 'badblocks' sysfs files poll(2) capable. - Miscellaneous fixes and small updates across libnvdimm and the nfit driver. Acknowledgements that came after the branch was pushed: commit 6aa734a2f38e ("libnvdimm, region, pmem: fix 'badblocks' sysfs_get_dirent() reference lifetime") was reviewed by Toshi Kani <toshi.kani@hpe.com>" * tag 'libnvdimm-for-4.13' of git://git.kernel.org/pub/scm/linux/kernel/git/nvdimm/nvdimm: (42 commits) libnvdimm, namespace: record 'lbasize' for pmem namespaces acpi/nfit: Issue Start ARS to retrieve existing records libnvdimm: New ACPI 6.2 DSM functions acpi, nfit: Show bus_dsm_mask in sysfs libnvdimm, acpi, nfit: Add bus level dsm mask for pass thru. acpi, nfit: Enable DSM pass thru for root functions. libnvdimm: passthru functions clear to send libnvdimm, btt: convert some info messages to warn/err libnvdimm, region, pmem: fix 'badblocks' sysfs_get_dirent() reference lifetime libnvdimm: fix the clear-error check in nsio_rw_bytes libnvdimm, btt: fix btt_rw_page not returning errors acpi, nfit: quiet invalid block-aperture-region warnings libnvdimm, btt: BTT updates for UEFI 2.7 format acpi, nfit: constify *_attribute_group libnvdimm, pmem: disable dax flushing when pmem is fronting a volatile region libnvdimm, pmem, dax: export a cache control attribute dax: convert to bitmask for flags dax: remove default copy_from_iter fallback libnvdimm, nfit: enable support for volatile ranges libnvdimm, pmem: fix persistence warning ...
Diffstat (limited to 'drivers/nvdimm')
-rw-r--r--drivers/nvdimm/btt.c45
-rw-r--r--drivers/nvdimm/btt.h2
-rw-r--r--drivers/nvdimm/btt_devs.c54
-rw-r--r--drivers/nvdimm/bus.c15
-rw-r--r--drivers/nvdimm/claim.c38
-rw-r--r--drivers/nvdimm/core.c5
-rw-r--r--drivers/nvdimm/dax_devs.c10
-rw-r--r--drivers/nvdimm/dimm_devs.c10
-rw-r--r--drivers/nvdimm/label.c251
-rw-r--r--drivers/nvdimm/label.h21
-rw-r--r--drivers/nvdimm/namespace_devs.c282
-rw-r--r--drivers/nvdimm/nd-core.h9
-rw-r--r--drivers/nvdimm/nd.h17
-rw-r--r--drivers/nvdimm/pfn_devs.c12
-rw-r--r--drivers/nvdimm/pmem.c63
-rw-r--r--drivers/nvdimm/pmem.h15
-rw-r--r--drivers/nvdimm/region.c17
-rw-r--r--drivers/nvdimm/region_devs.c88
18 files changed, 815 insertions, 139 deletions
diff --git a/drivers/nvdimm/btt.c b/drivers/nvdimm/btt.c
index b6ba0618ea46..64216dea5278 100644
--- a/drivers/nvdimm/btt.c
+++ b/drivers/nvdimm/btt.c
@@ -37,8 +37,8 @@ static int arena_read_bytes(struct arena_info *arena, resource_size_t offset,
struct nd_btt *nd_btt = arena->nd_btt;
struct nd_namespace_common *ndns = nd_btt->ndns;
- /* arena offsets are 4K from the base of the device */
- offset += SZ_4K;
+ /* arena offsets may be shifted from the base of the device */
+ offset += arena->nd_btt->initial_offset;
return nvdimm_read_bytes(ndns, offset, buf, n, flags);
}
@@ -48,8 +48,8 @@ static int arena_write_bytes(struct arena_info *arena, resource_size_t offset,
struct nd_btt *nd_btt = arena->nd_btt;
struct nd_namespace_common *ndns = nd_btt->ndns;
- /* arena offsets are 4K from the base of the device */
- offset += SZ_4K;
+ /* arena offsets may be shifted from the base of the device */
+ offset += arena->nd_btt->initial_offset;
return nvdimm_write_bytes(ndns, offset, buf, n, flags);
}
@@ -323,7 +323,7 @@ static int btt_log_read(struct arena_info *arena, u32 lane,
old_ent = btt_log_get_old(log);
if (old_ent < 0 || old_ent > 1) {
- dev_info(to_dev(arena),
+ dev_err(to_dev(arena),
"log corruption (%d): lane %d seq [%d, %d]\n",
old_ent, lane, log[0].seq, log[1].seq);
/* TODO set error state? */
@@ -576,8 +576,8 @@ static struct arena_info *alloc_arena(struct btt *btt, size_t size,
arena->internal_lbasize = roundup(arena->external_lbasize,
INT_LBASIZE_ALIGNMENT);
arena->nfree = BTT_DEFAULT_NFREE;
- arena->version_major = 1;
- arena->version_minor = 1;
+ arena->version_major = btt->nd_btt->version_major;
+ arena->version_minor = btt->nd_btt->version_minor;
if (available % BTT_PG_SIZE)
available -= (available % BTT_PG_SIZE);
@@ -684,7 +684,7 @@ static int discover_arenas(struct btt *btt)
dev_info(to_dev(arena), "No existing arenas\n");
goto out;
} else {
- dev_info(to_dev(arena),
+ dev_err(to_dev(arena),
"Found corrupted metadata!\n");
ret = -ENODEV;
goto out;
@@ -1227,7 +1227,7 @@ static blk_qc_t btt_make_request(struct request_queue *q, struct bio *bio)
err = btt_do_bvec(btt, bip, bvec.bv_page, len, bvec.bv_offset,
op_is_write(bio_op(bio)), iter.bi_sector);
if (err) {
- dev_info(&btt->nd_btt->dev,
+ dev_err(&btt->nd_btt->dev,
"io error in %s sector %lld, len %d,\n",
(op_is_write(bio_op(bio))) ? "WRITE" :
"READ",
@@ -1248,10 +1248,13 @@ static int btt_rw_page(struct block_device *bdev, sector_t sector,
struct page *page, bool is_write)
{
struct btt *btt = bdev->bd_disk->private_data;
+ int rc;
- btt_do_bvec(btt, NULL, page, PAGE_SIZE, 0, is_write, sector);
- page_endio(page, is_write, 0);
- return 0;
+ rc = btt_do_bvec(btt, NULL, page, PAGE_SIZE, 0, is_write, sector);
+ if (rc == 0)
+ page_endio(page, is_write, 0);
+
+ return rc;
}
@@ -1369,7 +1372,7 @@ static struct btt *btt_init(struct nd_btt *nd_btt, unsigned long long rawsize,
}
if (btt->init_state != INIT_READY && nd_region->ro) {
- dev_info(dev, "%s is read-only, unable to init btt metadata\n",
+ dev_warn(dev, "%s is read-only, unable to init btt metadata\n",
dev_name(&nd_region->dev));
return NULL;
} else if (btt->init_state != INIT_READY) {
@@ -1424,6 +1427,7 @@ int nvdimm_namespace_attach_btt(struct nd_namespace_common *ndns)
{
struct nd_btt *nd_btt = to_nd_btt(ndns->claim);
struct nd_region *nd_region;
+ struct btt_sb *btt_sb;
struct btt *btt;
size_t rawsize;
@@ -1432,10 +1436,21 @@ int nvdimm_namespace_attach_btt(struct nd_namespace_common *ndns)
return -ENODEV;
}
- rawsize = nvdimm_namespace_capacity(ndns) - SZ_4K;
+ btt_sb = devm_kzalloc(&nd_btt->dev, sizeof(*btt_sb), GFP_KERNEL);
+
+ /*
+ * If this returns < 0, that is ok as it just means there wasn't
+ * an existing BTT, and we're creating a new one. We still need to
+ * call this as we need the version dependent fields in nd_btt to be
+ * set correctly based on the holder class
+ */
+ nd_btt_version(nd_btt, ndns, btt_sb);
+
+ rawsize = nvdimm_namespace_capacity(ndns) - nd_btt->initial_offset;
if (rawsize < ARENA_MIN_SIZE) {
dev_dbg(&nd_btt->dev, "%s must be at least %ld bytes\n",
- dev_name(&ndns->dev), ARENA_MIN_SIZE + SZ_4K);
+ dev_name(&ndns->dev),
+ ARENA_MIN_SIZE + nd_btt->initial_offset);
return -ENXIO;
}
nd_region = to_nd_region(nd_btt->dev.parent);
diff --git a/drivers/nvdimm/btt.h b/drivers/nvdimm/btt.h
index b2f8651e5395..888e862907a0 100644
--- a/drivers/nvdimm/btt.h
+++ b/drivers/nvdimm/btt.h
@@ -184,5 +184,7 @@ struct btt {
};
bool nd_btt_arena_is_valid(struct nd_btt *nd_btt, struct btt_sb *super);
+int nd_btt_version(struct nd_btt *nd_btt, struct nd_namespace_common *ndns,
+ struct btt_sb *btt_sb);
#endif
diff --git a/drivers/nvdimm/btt_devs.c b/drivers/nvdimm/btt_devs.c
index 4c989bb9a8a0..3e359d282f8e 100644
--- a/drivers/nvdimm/btt_devs.c
+++ b/drivers/nvdimm/btt_devs.c
@@ -260,20 +260,55 @@ bool nd_btt_arena_is_valid(struct nd_btt *nd_btt, struct btt_sb *super)
}
EXPORT_SYMBOL(nd_btt_arena_is_valid);
+int nd_btt_version(struct nd_btt *nd_btt, struct nd_namespace_common *ndns,
+ struct btt_sb *btt_sb)
+{
+ if (ndns->claim_class == NVDIMM_CCLASS_BTT2) {
+ /* Probe/setup for BTT v2.0 */
+ nd_btt->initial_offset = 0;
+ nd_btt->version_major = 2;
+ nd_btt->version_minor = 0;
+ if (nvdimm_read_bytes(ndns, 0, btt_sb, sizeof(*btt_sb), 0))
+ return -ENXIO;
+ if (!nd_btt_arena_is_valid(nd_btt, btt_sb))
+ return -ENODEV;
+ if ((le16_to_cpu(btt_sb->version_major) != 2) ||
+ (le16_to_cpu(btt_sb->version_minor) != 0))
+ return -ENODEV;
+ } else {
+ /*
+ * Probe/setup for BTT v1.1 (NVDIMM_CCLASS_NONE or
+ * NVDIMM_CCLASS_BTT)
+ */
+ nd_btt->initial_offset = SZ_4K;
+ nd_btt->version_major = 1;
+ nd_btt->version_minor = 1;
+ if (nvdimm_read_bytes(ndns, SZ_4K, btt_sb, sizeof(*btt_sb), 0))
+ return -ENXIO;
+ if (!nd_btt_arena_is_valid(nd_btt, btt_sb))
+ return -ENODEV;
+ if ((le16_to_cpu(btt_sb->version_major) != 1) ||
+ (le16_to_cpu(btt_sb->version_minor) != 1))
+ return -ENODEV;
+ }
+ return 0;
+}
+EXPORT_SYMBOL(nd_btt_version);
+
static int __nd_btt_probe(struct nd_btt *nd_btt,
struct nd_namespace_common *ndns, struct btt_sb *btt_sb)
{
+ int rc;
+
if (!btt_sb || !ndns || !nd_btt)
return -ENODEV;
- if (nvdimm_read_bytes(ndns, SZ_4K, btt_sb, sizeof(*btt_sb), 0))
- return -ENXIO;
-
if (nvdimm_namespace_capacity(ndns) < SZ_16M)
return -ENXIO;
- if (!nd_btt_arena_is_valid(nd_btt, btt_sb))
- return -ENODEV;
+ rc = nd_btt_version(nd_btt, ndns, btt_sb);
+ if (rc < 0)
+ return rc;
nd_btt->lbasize = le32_to_cpu(btt_sb->external_lbasize);
nd_btt->uuid = kmemdup(btt_sb->uuid, 16, GFP_KERNEL);
@@ -295,6 +330,15 @@ int nd_btt_probe(struct device *dev, struct nd_namespace_common *ndns)
if (ndns->force_raw)
return -ENODEV;
+ switch (ndns->claim_class) {
+ case NVDIMM_CCLASS_NONE:
+ case NVDIMM_CCLASS_BTT:
+ case NVDIMM_CCLASS_BTT2:
+ break;
+ default:
+ return -ENODEV;
+ }
+
nvdimm_bus_lock(&ndns->dev);
btt_dev = __nd_btt_create(nd_region, 0, NULL, ndns);
nvdimm_bus_unlock(&ndns->dev);
diff --git a/drivers/nvdimm/bus.c b/drivers/nvdimm/bus.c
index e9361bffe5ee..937fafa1886a 100644
--- a/drivers/nvdimm/bus.c
+++ b/drivers/nvdimm/bus.c
@@ -38,13 +38,13 @@ static int to_nd_device_type(struct device *dev)
{
if (is_nvdimm(dev))
return ND_DEVICE_DIMM;
- else if (is_nd_pmem(dev))
+ else if (is_memory(dev))
return ND_DEVICE_REGION_PMEM;
else if (is_nd_blk(dev))
return ND_DEVICE_REGION_BLK;
else if (is_nd_dax(dev))
return ND_DEVICE_DAX_PMEM;
- else if (is_nd_pmem(dev->parent) || is_nd_blk(dev->parent))
+ else if (is_nd_region(dev->parent))
return nd_region_to_nstype(to_nd_region(dev->parent));
return 0;
@@ -56,7 +56,7 @@ static int nvdimm_bus_uevent(struct device *dev, struct kobj_uevent_env *env)
* Ensure that region devices always have their numa node set as
* early as possible.
*/
- if (is_nd_pmem(dev) || is_nd_blk(dev))
+ if (is_nd_region(dev))
set_dev_node(dev, to_nd_region(dev)->numa_node);
return add_uevent_var(env, "MODALIAS=" ND_DEVICE_MODALIAS_FMT,
to_nd_device_type(dev));
@@ -65,7 +65,7 @@ static int nvdimm_bus_uevent(struct device *dev, struct kobj_uevent_env *env)
static struct module *to_bus_provider(struct device *dev)
{
/* pin bus providers while regions are enabled */
- if (is_nd_pmem(dev) || is_nd_blk(dev)) {
+ if (is_nd_region(dev)) {
struct nvdimm_bus *nvdimm_bus = walk_to_nvdimm_bus(dev);
return nvdimm_bus->nd_desc->module;
@@ -198,6 +198,9 @@ static int nvdimm_clear_badblocks_region(struct device *dev, void *data)
sector = (ctx->phys - nd_region->ndr_start) / 512;
badblocks_clear(&nd_region->bb, sector, ctx->cleared / 512);
+ if (nd_region->bb_state)
+ sysfs_notify_dirent(nd_region->bb_state);
+
return 0;
}
@@ -907,6 +910,7 @@ static int __nd_ioctl(struct nvdimm_bus *nvdimm_bus, struct nvdimm *nvdimm,
static char in_env[ND_CMD_MAX_ENVELOPE];
const struct nd_cmd_desc *desc = NULL;
unsigned int cmd = _IOC_NR(ioctl_cmd);
+ unsigned int func = cmd;
void __user *p = (void __user *) arg;
struct device *dev = &nvdimm_bus->dev;
struct nd_cmd_pkg pkg;
@@ -972,6 +976,7 @@ static int __nd_ioctl(struct nvdimm_bus *nvdimm_bus, struct nvdimm *nvdimm,
}
if (cmd == ND_CMD_CALL) {
+ func = pkg.nd_command;
dev_dbg(dev, "%s:%s, idx: %llu, in: %zu, out: %zu, len %zu\n",
__func__, dimm_name, pkg.nd_command,
in_len, out_len, buf_len);
@@ -1020,7 +1025,7 @@ static int __nd_ioctl(struct nvdimm_bus *nvdimm_bus, struct nvdimm *nvdimm,
}
nvdimm_bus_lock(&nvdimm_bus->dev);
- rc = nd_cmd_clear_to_send(nvdimm_bus, nvdimm, cmd, buf);
+ rc = nd_cmd_clear_to_send(nvdimm_bus, nvdimm, func, buf);
if (rc)
goto out_unlock;
diff --git a/drivers/nvdimm/claim.c b/drivers/nvdimm/claim.c
index 7ceb5fa4f2a1..47770460f3d3 100644
--- a/drivers/nvdimm/claim.c
+++ b/drivers/nvdimm/claim.c
@@ -12,8 +12,8 @@
*/
#include <linux/device.h>
#include <linux/sizes.h>
-#include <linux/pmem.h>
#include "nd-core.h"
+#include "pmem.h"
#include "pfn.h"
#include "btt.h"
#include "nd.h"
@@ -184,6 +184,35 @@ ssize_t nd_namespace_store(struct device *dev,
}
ndns = to_ndns(found);
+
+ switch (ndns->claim_class) {
+ case NVDIMM_CCLASS_NONE:
+ break;
+ case NVDIMM_CCLASS_BTT:
+ case NVDIMM_CCLASS_BTT2:
+ if (!is_nd_btt(dev)) {
+ len = -EBUSY;
+ goto out_attach;
+ }
+ break;
+ case NVDIMM_CCLASS_PFN:
+ if (!is_nd_pfn(dev)) {
+ len = -EBUSY;
+ goto out_attach;
+ }
+ break;
+ case NVDIMM_CCLASS_DAX:
+ if (!is_nd_dax(dev)) {
+ len = -EBUSY;
+ goto out_attach;
+ }
+ break;
+ default:
+ len = -EBUSY;
+ goto out_attach;
+ break;
+ }
+
if (__nvdimm_namespace_capacity(ndns) < SZ_16M) {
dev_dbg(dev, "%s too small to host\n", name);
len = -ENXIO;
@@ -260,8 +289,7 @@ static int nsio_rw_bytes(struct nd_namespace_common *ndns,
* work around this collision.
*/
if (IS_ALIGNED(offset, 512) && IS_ALIGNED(size, 512)
- && !(flags & NVDIMM_IO_ATOMIC)
- && !ndns->claim) {
+ && !(flags & NVDIMM_IO_ATOMIC)) {
long cleared;
cleared = nvdimm_clear_poison(&ndns->dev,
@@ -272,12 +300,12 @@ static int nsio_rw_bytes(struct nd_namespace_common *ndns,
cleared /= 512;
badblocks_clear(&nsio->bb, sector, cleared);
}
- invalidate_pmem(nsio->addr + offset, size);
+ arch_invalidate_pmem(nsio->addr + offset, size);
} else
rc = -EIO;
}
- memcpy_to_pmem(nsio->addr + offset, buf, size);
+ memcpy_flushcache(nsio->addr + offset, buf, size);
nvdimm_flush(to_nd_region(ndns->dev.parent));
return rc;
diff --git a/drivers/nvdimm/core.c b/drivers/nvdimm/core.c
index 2dee908e4bae..7cd99b1f8596 100644
--- a/drivers/nvdimm/core.c
+++ b/drivers/nvdimm/core.c
@@ -504,7 +504,7 @@ void nvdimm_badblocks_populate(struct nd_region *nd_region,
struct nvdimm_bus *nvdimm_bus;
struct list_head *poison_list;
- if (!is_nd_pmem(&nd_region->dev)) {
+ if (!is_memory(&nd_region->dev)) {
dev_WARN_ONCE(&nd_region->dev, 1,
"%s only valid for pmem regions\n", __func__);
return;
@@ -699,6 +699,9 @@ static __init int libnvdimm_init(void)
rc = nd_region_init();
if (rc)
goto err_region;
+
+ nd_label_init();
+
return 0;
err_region:
nvdimm_exit();
diff --git a/drivers/nvdimm/dax_devs.c b/drivers/nvdimm/dax_devs.c
index c1b6556aea6e..1bf2bd318371 100644
--- a/drivers/nvdimm/dax_devs.c
+++ b/drivers/nvdimm/dax_devs.c
@@ -89,7 +89,7 @@ struct device *nd_dax_create(struct nd_region *nd_region)
struct device *dev = NULL;
struct nd_dax *nd_dax;
- if (!is_nd_pmem(&nd_region->dev))
+ if (!is_memory(&nd_region->dev))
return NULL;
nd_dax = nd_dax_alloc(nd_region);
@@ -111,6 +111,14 @@ int nd_dax_probe(struct device *dev, struct nd_namespace_common *ndns)
if (ndns->force_raw)
return -ENODEV;
+ switch (ndns->claim_class) {
+ case NVDIMM_CCLASS_NONE:
+ case NVDIMM_CCLASS_DAX:
+ break;
+ default:
+ return -ENODEV;
+ }
+
nvdimm_bus_lock(&ndns->dev);
nd_dax = nd_dax_alloc(nd_region);
nd_pfn = &nd_dax->nd_pfn;
diff --git a/drivers/nvdimm/dimm_devs.c b/drivers/nvdimm/dimm_devs.c
index 9852a3355509..f0d1b7e5de01 100644
--- a/drivers/nvdimm/dimm_devs.c
+++ b/drivers/nvdimm/dimm_devs.c
@@ -20,6 +20,7 @@
#include <linux/mm.h>
#include "nd-core.h"
#include "label.h"
+#include "pmem.h"
#include "nd.h"
static DEFINE_IDA(dimm_ida);
@@ -235,6 +236,13 @@ struct nvdimm *nd_blk_region_to_dimm(struct nd_blk_region *ndbr)
}
EXPORT_SYMBOL_GPL(nd_blk_region_to_dimm);
+unsigned long nd_blk_memremap_flags(struct nd_blk_region *ndbr)
+{
+ /* pmem mapping properties are private to libnvdimm */
+ return ARCH_MEMREMAP_PMEM;
+}
+EXPORT_SYMBOL_GPL(nd_blk_memremap_flags);
+
struct nvdimm_drvdata *to_ndd(struct nd_mapping *nd_mapping)
{
struct nvdimm *nvdimm = nd_mapping->nvdimm;
@@ -411,7 +419,7 @@ int alias_dpa_busy(struct device *dev, void *data)
struct resource *res;
int i;
- if (!is_nd_pmem(dev))
+ if (!is_memory(dev))
return 0;
nd_region = to_nd_region(dev);
diff --git a/drivers/nvdimm/label.c b/drivers/nvdimm/label.c
index dd615345699f..87796f840777 100644
--- a/drivers/nvdimm/label.c
+++ b/drivers/nvdimm/label.c
@@ -12,6 +12,7 @@
*/
#include <linux/device.h>
#include <linux/ndctl.h>
+#include <linux/uuid.h>
#include <linux/slab.h>
#include <linux/io.h>
#include <linux/nd.h>
@@ -19,6 +20,11 @@
#include "label.h"
#include "nd.h"
+static guid_t nvdimm_btt_guid;
+static guid_t nvdimm_btt2_guid;
+static guid_t nvdimm_pfn_guid;
+static guid_t nvdimm_dax_guid;
+
static u32 best_seq(u32 a, u32 b)
{
a &= NSINDEX_SEQ_MASK;
@@ -34,6 +40,11 @@ static u32 best_seq(u32 a, u32 b)
return a;
}
+unsigned sizeof_namespace_label(struct nvdimm_drvdata *ndd)
+{
+ return ndd->nslabel_size;
+}
+
size_t sizeof_namespace_index(struct nvdimm_drvdata *ndd)
{
u32 index_span;
@@ -49,7 +60,7 @@ size_t sizeof_namespace_index(struct nvdimm_drvdata *ndd)
* starts to waste space at larger config_sizes, but it's
* unlikely we'll ever see anything but 128K.
*/
- index_span = ndd->nsarea.config_size / 129;
+ index_span = ndd->nsarea.config_size / (sizeof_namespace_label(ndd) + 1);
index_span /= NSINDEX_ALIGN * 2;
ndd->nsindex_size = index_span * NSINDEX_ALIGN;
@@ -58,10 +69,10 @@ size_t sizeof_namespace_index(struct nvdimm_drvdata *ndd)
int nvdimm_num_label_slots(struct nvdimm_drvdata *ndd)
{
- return ndd->nsarea.config_size / 129;
+ return ndd->nsarea.config_size / (sizeof_namespace_label(ndd) + 1);
}
-int nd_label_validate(struct nvdimm_drvdata *ndd)
+static int __nd_label_validate(struct nvdimm_drvdata *ndd)
{
/*
* On media label format consists of two index blocks followed
@@ -104,6 +115,7 @@ int nd_label_validate(struct nvdimm_drvdata *ndd)
u32 nslot;
u8 sig[NSINDEX_SIG_LEN];
u64 sum_save, sum, size;
+ unsigned int version, labelsize;
memcpy(sig, nsindex[i]->sig, NSINDEX_SIG_LEN);
if (memcmp(sig, NSINDEX_SIGNATURE, NSINDEX_SIG_LEN) != 0) {
@@ -111,6 +123,21 @@ int nd_label_validate(struct nvdimm_drvdata *ndd)
__func__, i);
continue;
}
+
+ /* label sizes larger than 128 arrived with v1.2 */
+ version = __le16_to_cpu(nsindex[i]->major) * 100
+ + __le16_to_cpu(nsindex[i]->minor);
+ if (version >= 102)
+ labelsize = 1 << (7 + nsindex[i]->labelsize);
+ else
+ labelsize = 128;
+
+ if (labelsize != sizeof_namespace_label(ndd)) {
+ dev_dbg(dev, "%s: nsindex%d labelsize %d invalid\n",
+ __func__, i, nsindex[i]->labelsize);
+ continue;
+ }
+
sum_save = __le64_to_cpu(nsindex[i]->checksum);
nsindex[i]->checksum = __cpu_to_le64(0);
sum = nd_fletcher64(nsindex[i], sizeof_namespace_index(ndd), 1);
@@ -153,7 +180,7 @@ int nd_label_validate(struct nvdimm_drvdata *ndd)
}
nslot = __le32_to_cpu(nsindex[i]->nslot);
- if (nslot * sizeof(struct nd_namespace_label)
+ if (nslot * sizeof_namespace_label(ndd)
+ 2 * sizeof_namespace_index(ndd)
> ndd->nsarea.config_size) {
dev_dbg(dev, "%s: nsindex%d nslot: %u invalid, config_size: %#x\n",
@@ -189,6 +216,29 @@ int nd_label_validate(struct nvdimm_drvdata *ndd)
return -1;
}
+int nd_label_validate(struct nvdimm_drvdata *ndd)
+{
+ /*
+ * In order to probe for and validate namespace index blocks we
+ * need to know the size of the labels, and we can't trust the
+ * size of the labels until we validate the index blocks.
+ * Resolve this dependency loop by probing for known label
+ * sizes, but default to v1.2 256-byte namespace labels if
+ * discovery fails.
+ */
+ int label_size[] = { 128, 256 };
+ int i, rc;
+
+ for (i = 0; i < ARRAY_SIZE(label_size); i++) {
+ ndd->nslabel_size = label_size[i];
+ rc = __nd_label_validate(ndd);
+ if (rc >= 0)
+ return rc;
+ }
+
+ return -1;
+}
+
void nd_label_copy(struct nvdimm_drvdata *ndd, struct nd_namespace_index *dst,
struct nd_namespace_index *src)
{
@@ -210,7 +260,22 @@ static struct nd_namespace_label *nd_label_base(struct nvdimm_drvdata *ndd)
static int to_slot(struct nvdimm_drvdata *ndd,
struct nd_namespace_label *nd_label)
{
- return nd_label - nd_label_base(ndd);
+ unsigned long label, base;
+
+ label = (unsigned long) nd_label;
+ base = (unsigned long) nd_label_base(ndd);
+
+ return (label - base) / sizeof_namespace_label(ndd);
+}
+
+static struct nd_namespace_label *to_label(struct nvdimm_drvdata *ndd, int slot)
+{
+ unsigned long label, base;
+
+ base = (unsigned long) nd_label_base(ndd);
+ label = base + sizeof_namespace_label(ndd) * slot;
+
+ return (struct nd_namespace_label *) label;
}
#define for_each_clear_bit_le(bit, addr, size) \
@@ -268,7 +333,8 @@ static bool preamble_next(struct nvdimm_drvdata *ndd,
free, nslot);
}
-static bool slot_valid(struct nd_namespace_label *nd_label, u32 slot)
+static bool slot_valid(struct nvdimm_drvdata *ndd,
+ struct nd_namespace_label *nd_label, u32 slot)
{
/* check that we are written where we expect to be written */
if (slot != __le32_to_cpu(nd_label->slot))
@@ -279,6 +345,21 @@ static bool slot_valid(struct nd_namespace_label *nd_label, u32 slot)
| __le64_to_cpu(nd_label->rawsize)) % SZ_4K)
return false;
+ /* check checksum */
+ if (namespace_label_has(ndd, checksum)) {
+ u64 sum, sum_save;
+
+ sum_save = __le64_to_cpu(nd_label->checksum);
+ nd_label->checksum = __cpu_to_le64(0);
+ sum = nd_fletcher64(nd_label, sizeof_namespace_label(ndd), 1);
+ nd_label->checksum = __cpu_to_le64(sum_save);
+ if (sum != sum_save) {
+ dev_dbg(ndd->dev, "%s fail checksum. slot: %d expect: %#llx\n",
+ __func__, slot, sum);
+ return false;
+ }
+ }
+
return true;
}
@@ -299,9 +380,9 @@ int nd_label_reserve_dpa(struct nvdimm_drvdata *ndd)
struct resource *res;
u32 flags;
- nd_label = nd_label_base(ndd) + slot;
+ nd_label = to_label(ndd, slot);
- if (!slot_valid(nd_label, slot))
+ if (!slot_valid(ndd, nd_label, slot))
continue;
memcpy(label_uuid, nd_label->uuid, NSLABEL_UUID_LEN);
@@ -331,9 +412,9 @@ int nd_label_active_count(struct nvdimm_drvdata *ndd)
for_each_clear_bit_le(slot, free, nslot) {
struct nd_namespace_label *nd_label;
- nd_label = nd_label_base(ndd) + slot;
+ nd_label = to_label(ndd, slot);
- if (!slot_valid(nd_label, slot)) {
+ if (!slot_valid(ndd, nd_label, slot)) {
u32 label_slot = __le32_to_cpu(nd_label->slot);
u64 size = __le64_to_cpu(nd_label->rawsize);
u64 dpa = __le64_to_cpu(nd_label->dpa);
@@ -360,12 +441,12 @@ struct nd_namespace_label *nd_label_active(struct nvdimm_drvdata *ndd, int n)
for_each_clear_bit_le(slot, free, nslot) {
struct nd_namespace_label *nd_label;
- nd_label = nd_label_base(ndd) + slot;
- if (!slot_valid(nd_label, slot))
+ nd_label = to_label(ndd, slot);
+ if (!slot_valid(ndd, nd_label, slot))
continue;
if (n-- == 0)
- return nd_label_base(ndd) + slot;
+ return to_label(ndd, slot);
}
return NULL;
@@ -437,7 +518,8 @@ static int nd_label_write_index(struct nvdimm_drvdata *ndd, int index, u32 seq,
nslot = __le32_to_cpu(nsindex->nslot);
memcpy(nsindex->sig, NSINDEX_SIGNATURE, NSINDEX_SIG_LEN);
- nsindex->flags = __cpu_to_le32(0);
+ memset(&nsindex->flags, 0, 3);
+ nsindex->labelsize = sizeof_namespace_label(ndd) >> 8;
nsindex->seq = __cpu_to_le32(seq);
offset = (unsigned long) nsindex
- (unsigned long) to_namespace_index(ndd, 0);
@@ -452,7 +534,10 @@ static int nd_label_write_index(struct nvdimm_drvdata *ndd, int index, u32 seq,
nsindex->labeloff = __cpu_to_le64(offset);
nsindex->nslot = __cpu_to_le32(nslot);
nsindex->major = __cpu_to_le16(1);
- nsindex->minor = __cpu_to_le16(1);
+ if (sizeof_namespace_label(ndd) < 256)
+ nsindex->minor = __cpu_to_le16(1);
+ else
+ nsindex->minor = __cpu_to_le16(2);
nsindex->checksum = __cpu_to_le64(0);
if (flags & ND_NSINDEX_INIT) {
unsigned long *free = (unsigned long *) nsindex->free;
@@ -490,11 +575,49 @@ static unsigned long nd_label_offset(struct nvdimm_drvdata *ndd,
- (unsigned long) to_namespace_index(ndd, 0);
}
+enum nvdimm_claim_class to_nvdimm_cclass(guid_t *guid)
+{
+ if (guid_equal(guid, &nvdimm_btt_guid))
+ return NVDIMM_CCLASS_BTT;
+ else if (guid_equal(guid, &nvdimm_btt2_guid))
+ return NVDIMM_CCLASS_BTT2;
+ else if (guid_equal(guid, &nvdimm_pfn_guid))
+ return NVDIMM_CCLASS_PFN;
+ else if (guid_equal(guid, &nvdimm_dax_guid))
+ return NVDIMM_CCLASS_DAX;
+ else if (guid_equal(guid, &guid_null))
+ return NVDIMM_CCLASS_NONE;
+
+ return NVDIMM_CCLASS_UNKNOWN;
+}
+
+static const guid_t *to_abstraction_guid(enum nvdimm_claim_class claim_class,
+ guid_t *target)
+{
+ if (claim_class == NVDIMM_CCLASS_BTT)
+ return &nvdimm_btt_guid;
+ else if (claim_class == NVDIMM_CCLASS_BTT2)
+ return &nvdimm_btt2_guid;
+ else if (claim_class == NVDIMM_CCLASS_PFN)
+ return &nvdimm_pfn_guid;
+ else if (claim_class == NVDIMM_CCLASS_DAX)
+ return &nvdimm_dax_guid;
+ else if (claim_class == NVDIMM_CCLASS_UNKNOWN) {
+ /*
+ * If we're modifying a namespace for which we don't
+ * know the claim_class, don't touch the existing guid.
+ */
+ return target;
+ } else
+ return &guid_null;
+}
+
static int __pmem_label_update(struct nd_region *nd_region,
struct nd_mapping *nd_mapping, struct nd_namespace_pmem *nspm,
int pos)
{
- u64 cookie = nd_region_interleave_set_cookie(nd_region);
+ struct nd_namespace_common *ndns = &nspm->nsio.common;
+ struct nd_interleave_set *nd_set = nd_region->nd_set;
struct nvdimm_drvdata *ndd = to_ndd(nd_mapping);
struct nd_label_ent *label_ent, *victim = NULL;
struct nd_namespace_label *nd_label;
@@ -504,11 +627,13 @@ static int __pmem_label_update(struct nd_region *nd_region,
unsigned long *free;
u32 nslot, slot;
size_t offset;
+ u64 cookie;
int rc;
if (!preamble_next(ndd, &nsindex, &free, &nslot))
return -ENXIO;
+ cookie = nd_region_interleave_set_cookie(nd_region, nsindex);
nd_label_gen_id(&label_id, nspm->uuid, 0);
for_each_dpa_resource(ndd, res)
if (strcmp(res->name, label_id.id) == 0)
@@ -525,8 +650,8 @@ static int __pmem_label_update(struct nd_region *nd_region,
return -ENXIO;
dev_dbg(ndd->dev, "%s: allocated: %d\n", __func__, slot);
- nd_label = nd_label_base(ndd) + slot;
- memset(nd_label, 0, sizeof(struct nd_namespace_label));
+ nd_label = to_label(ndd, slot);
+ memset(nd_label, 0, sizeof_namespace_label(ndd));
memcpy(nd_label->uuid, nspm->uuid, NSLABEL_UUID_LEN);
if (nspm->alt_name)
memcpy(nd_label->name, nspm->alt_name, NSLABEL_NAME_LEN);
@@ -535,14 +660,28 @@ static int __pmem_label_update(struct nd_region *nd_region,
nd_label->position = __cpu_to_le16(pos);
nd_label->isetcookie = __cpu_to_le64(cookie);
nd_label->rawsize = __cpu_to_le64(resource_size(res));
+ nd_label->lbasize = __cpu_to_le64(nspm->lbasize);
nd_label->dpa = __cpu_to_le64(res->start);
nd_label->slot = __cpu_to_le32(slot);
+ if (namespace_label_has(ndd, type_guid))
+ guid_copy(&nd_label->type_guid, &nd_set->type_guid);
+ if (namespace_label_has(ndd, abstraction_guid))
+ guid_copy(&nd_label->abstraction_guid,
+ to_abstraction_guid(ndns->claim_class,
+ &nd_label->abstraction_guid));
+ if (namespace_label_has(ndd, checksum)) {
+ u64 sum;
+
+ nd_label->checksum = __cpu_to_le64(0);
+ sum = nd_fletcher64(nd_label, sizeof_namespace_label(ndd), 1);
+ nd_label->checksum = __cpu_to_le64(sum);
+ }
nd_dbg_dpa(nd_region, ndd, res, "%s\n", __func__);
/* update label */
offset = nd_label_offset(ndd, nd_label);
rc = nvdimm_set_config_data(ndd, offset, nd_label,
- sizeof(struct nd_namespace_label));
+ sizeof_namespace_label(ndd));
if (rc < 0)
return rc;
@@ -624,6 +763,8 @@ static int __blk_label_update(struct nd_region *nd_region,
int num_labels)
{
int i, alloc, victims, nfree, old_num_resources, nlabel, rc = -ENXIO;
+ struct nd_interleave_set *nd_set = nd_region->nd_set;
+ struct nd_namespace_common *ndns = &nsblk->common;
struct nvdimm_drvdata *ndd = to_ndd(nd_mapping);
struct nd_namespace_label *nd_label;
struct nd_label_ent *label_ent, *e;
@@ -632,6 +773,7 @@ static int __blk_label_update(struct nd_region *nd_region,
struct resource *res, **old_res_list;
struct nd_label_id label_id;
u8 uuid[NSLABEL_UUID_LEN];
+ int min_dpa_idx = 0;
LIST_HEAD(list);
u32 nslot, slot;
@@ -668,7 +810,7 @@ static int __blk_label_update(struct nd_region *nd_region,
/* mark unused labels for garbage collection */
for_each_clear_bit_le(slot, free, nslot) {
- nd_label = nd_label_base(ndd) + slot;
+ nd_label = to_label(ndd, slot);
memcpy(uuid, nd_label->uuid, NSLABEL_UUID_LEN);
if (memcmp(uuid, nsblk->uuid, NSLABEL_UUID_LEN) != 0)
continue;
@@ -703,6 +845,18 @@ static int __blk_label_update(struct nd_region *nd_region,
}
}
+ /*
+ * Find the resource associated with the first label in the set
+ * per the v1.2 namespace specification.
+ */
+ for (i = 0; i < nsblk->num_resources; i++) {
+ struct resource *min = nsblk->res[min_dpa_idx];
+
+ res = nsblk->res[i];
+ if (res->start < min->start)
+ min_dpa_idx = i;
+ }
+
for (i = 0; i < nsblk->num_resources; i++) {
size_t offset;
@@ -714,25 +868,58 @@ static int __blk_label_update(struct nd_region *nd_region,
goto abort;
dev_dbg(ndd->dev, "%s: allocated: %d\n", __func__, slot);
- nd_label = nd_label_base(ndd) + slot;
- memset(nd_label, 0, sizeof(struct nd_namespace_label));
+ nd_label = to_label(ndd, slot);
+ memset(nd_label, 0, sizeof_namespace_label(ndd));
memcpy(nd_label->uuid, nsblk->uuid, NSLABEL_UUID_LEN);
if (nsblk->alt_name)
memcpy(nd_label->name, nsblk->alt_name,
NSLABEL_NAME_LEN);
nd_label->flags = __cpu_to_le32(NSLABEL_FLAG_LOCAL);
- nd_label->nlabel = __cpu_to_le16(0); /* N/A */
- nd_label->position = __cpu_to_le16(0); /* N/A */
- nd_label->isetcookie = __cpu_to_le64(0); /* N/A */
+
+ /*
+ * Use the presence of the type_guid as a flag to
+ * determine isetcookie usage and nlabel + position
+ * policy for blk-aperture namespaces.
+ */
+ if (namespace_label_has(ndd, type_guid)) {
+ if (i == min_dpa_idx) {
+ nd_label->nlabel = __cpu_to_le16(nsblk->num_resources);
+ nd_label->position = __cpu_to_le16(0);
+ } else {
+ nd_label->nlabel = __cpu_to_le16(0xffff);
+ nd_label->position = __cpu_to_le16(0xffff);
+ }
+ nd_label->isetcookie = __cpu_to_le64(nd_set->cookie2);
+ } else {
+ nd_label->nlabel = __cpu_to_le16(0); /* N/A */
+ nd_label->position = __cpu_to_le16(0); /* N/A */
+ nd_label->isetcookie = __cpu_to_le64(0); /* N/A */
+ }
+
nd_label->dpa = __cpu_to_le64(res->start);
nd_label->rawsize = __cpu_to_le64(resource_size(res));
nd_label->lbasize = __cpu_to_le64(nsblk->lbasize);
nd_label->slot = __cpu_to_le32(slot);
+ if (namespace_label_has(ndd, type_guid))
+ guid_copy(&nd_label->type_guid, &nd_set->type_guid);
+ if (namespace_label_has(ndd, abstraction_guid))
+ guid_copy(&nd_label->abstraction_guid,
+ to_abstraction_guid(ndns->claim_class,
+ &nd_label->abstraction_guid));
+
+ if (namespace_label_has(ndd, checksum)) {
+ u64 sum;
+
+ nd_label->checksum = __cpu_to_le64(0);
+ sum = nd_fletcher64(nd_label,
+ sizeof_namespace_label(ndd), 1);
+ nd_label->checksum = __cpu_to_le64(sum);
+ }
/* update label */
offset = nd_label_offset(ndd, nd_label);
rc = nvdimm_set_config_data(ndd, offset, nd_label,
- sizeof(struct nd_namespace_label));
+ sizeof_namespace_label(ndd));
if (rc < 0)
goto abort;
}
@@ -790,7 +977,7 @@ static int __blk_label_update(struct nd_region *nd_region,
goto out;
}
for_each_clear_bit_le(slot, free, nslot) {
- nd_label = nd_label_base(ndd) + slot;
+ nd_label = to_label(ndd, slot);
memcpy(uuid, nd_label->uuid, NSLABEL_UUID_LEN);
if (memcmp(uuid, nsblk->uuid, NSLABEL_UUID_LEN) != 0)
continue;
@@ -973,3 +1160,13 @@ int nd_blk_namespace_label_update(struct nd_region *nd_region,
return __blk_label_update(nd_region, nd_mapping, nsblk, count);
}
+
+int __init nd_label_init(void)
+{
+ WARN_ON(guid_parse(NVDIMM_BTT_GUID, &nvdimm_btt_guid));
+ WARN_ON(guid_parse(NVDIMM_BTT2_GUID, &nvdimm_btt2_guid));
+ WARN_ON(guid_parse(NVDIMM_PFN_GUID, &nvdimm_pfn_guid));
+ WARN_ON(guid_parse(NVDIMM_DAX_GUID, &nvdimm_dax_guid));
+
+ return 0;
+}
diff --git a/drivers/nvdimm/label.h b/drivers/nvdimm/label.h
index a59ef6eef2a3..1ebf4d3d01ba 100644
--- a/drivers/nvdimm/label.h
+++ b/drivers/nvdimm/label.h
@@ -15,6 +15,7 @@
#include <linux/ndctl.h>
#include <linux/sizes.h>
+#include <linux/uuid.h>
#include <linux/io.h>
enum {
@@ -60,7 +61,8 @@ static const char NSINDEX_SIGNATURE[] = "NAMESPACE_INDEX\0";
*/
struct nd_namespace_index {
u8 sig[NSINDEX_SIG_LEN];
- __le32 flags;
+ u8 flags[3];
+ u8 labelsize;
__le32 seq;
__le64 myoff;
__le64 mysize;
@@ -98,9 +100,23 @@ struct nd_namespace_label {
__le64 dpa;
__le64 rawsize;
__le32 slot;
- __le32 unused;
+ /*
+ * Accessing fields past this point should be gated by a
+ * namespace_label_has() check.
+ */
+ u8 align;
+ u8 reserved[3];
+ guid_t type_guid;
+ guid_t abstraction_guid;
+ u8 reserved2[88];
+ __le64 checksum;
};
+#define NVDIMM_BTT_GUID "8aed63a2-29a2-4c66-8b12-f05d15d3922a"
+#define NVDIMM_BTT2_GUID "18633bfc-1735-4217-8ac9-17239282d3f8"
+#define NVDIMM_PFN_GUID "266400ba-fb9f-4677-bcb0-968f11d0d225"
+#define NVDIMM_DAX_GUID "97a86d9c-3cdd-4eda-986f-5068b4f80088"
+
/**
* struct nd_label_id - identifier string for dpa allocation
* @id: "{blk|pmem}-<namespace uuid>"
@@ -131,6 +147,7 @@ struct nd_namespace_label *nd_label_active(struct nvdimm_drvdata *ndd, int n);
u32 nd_label_alloc_slot(struct nvdimm_drvdata *ndd);
bool nd_label_free_slot(struct nvdimm_drvdata *ndd, u32 slot);
u32 nd_label_nfree(struct nvdimm_drvdata *ndd);
+enum nvdimm_claim_class to_nvdimm_cclass(guid_t *guid);
struct nd_region;
struct nd_namespace_pmem;
struct nd_namespace_blk;
diff --git a/drivers/nvdimm/namespace_devs.c b/drivers/nvdimm/namespace_devs.c
index 2f9dfbd2dbec..5f1c6756e57c 100644
--- a/drivers/nvdimm/namespace_devs.c
+++ b/drivers/nvdimm/namespace_devs.c
@@ -14,10 +14,10 @@
#include <linux/device.h>
#include <linux/sort.h>
#include <linux/slab.h>
-#include <linux/pmem.h>
#include <linux/list.h>
#include <linux/nd.h>
#include "nd-core.h"
+#include "pmem.h"
#include "nd.h"
static void namespace_io_release(struct device *dev)
@@ -112,7 +112,7 @@ static int is_uuid_busy(struct device *dev, void *data)
static int is_namespace_uuid_busy(struct device *dev, void *data)
{
- if (is_nd_pmem(dev) || is_nd_blk(dev))
+ if (is_nd_region(dev))
return device_for_each_child(dev, data, is_uuid_busy);
return 0;
}
@@ -155,14 +155,33 @@ bool pmem_should_map_pages(struct device *dev)
IORES_DESC_NONE) == REGION_MIXED)
return false;
-#ifdef ARCH_MEMREMAP_PMEM
return ARCH_MEMREMAP_PMEM == MEMREMAP_WB;
-#else
- return false;
-#endif
}
EXPORT_SYMBOL(pmem_should_map_pages);
+unsigned int pmem_sector_size(struct nd_namespace_common *ndns)
+{
+ if (is_namespace_pmem(&ndns->dev)) {
+ struct nd_namespace_pmem *nspm;
+
+ nspm = to_nd_namespace_pmem(&ndns->dev);
+ if (nspm->lbasize == 0 || nspm->lbasize == 512)
+ /* default */;
+ else if (nspm->lbasize == 4096)
+ return 4096;
+ else
+ dev_WARN(&ndns->dev, "unsupported sector size: %ld\n",
+ nspm->lbasize);
+ }
+
+ /*
+ * There is no namespace label (is_namespace_io()), or the label
+ * indicates the default sector size.
+ */
+ return 512;
+}
+EXPORT_SYMBOL(pmem_sector_size);
+
const char *nvdimm_namespace_disk_name(struct nd_namespace_common *ndns,
char *name)
{
@@ -787,7 +806,7 @@ static int __reserve_free_pmem(struct device *dev, void *data)
struct nd_label_id label_id;
int i;
- if (!is_nd_pmem(dev))
+ if (!is_memory(dev))
return 0;
nd_region = to_nd_region(dev);
@@ -1283,28 +1302,49 @@ static ssize_t resource_show(struct device *dev,
}
static DEVICE_ATTR_RO(resource);
-static const unsigned long ns_lbasize_supported[] = { 512, 520, 528,
+static const unsigned long blk_lbasize_supported[] = { 512, 520, 528,
4096, 4104, 4160, 4224, 0 };
+static const unsigned long pmem_lbasize_supported[] = { 512, 4096, 0 };
+
static ssize_t sector_size_show(struct device *dev,
struct device_attribute *attr, char *buf)
{
- struct nd_namespace_blk *nsblk = to_nd_namespace_blk(dev);
+ if (is_namespace_blk(dev)) {
+ struct nd_namespace_blk *nsblk = to_nd_namespace_blk(dev);
- if (!is_namespace_blk(dev))
- return -ENXIO;
+ return nd_sector_size_show(nsblk->lbasize,
+ blk_lbasize_supported, buf);
+ }
+
+ if (is_namespace_pmem(dev)) {
+ struct nd_namespace_pmem *nspm = to_nd_namespace_pmem(dev);
- return nd_sector_size_show(nsblk->lbasize, ns_lbasize_supported, buf);
+ return nd_sector_size_show(nspm->lbasize,
+ pmem_lbasize_supported, buf);
+ }
+ return -ENXIO;
}
static ssize_t sector_size_store(struct device *dev,
struct device_attribute *attr, const char *buf, size_t len)
{
- struct nd_namespace_blk *nsblk = to_nd_namespace_blk(dev);
struct nd_region *nd_region = to_nd_region(dev->parent);
+ const unsigned long *supported;
+ unsigned long *lbasize;
ssize_t rc = 0;
- if (!is_namespace_blk(dev))
+ if (is_namespace_blk(dev)) {
+ struct nd_namespace_blk *nsblk = to_nd_namespace_blk(dev);
+
+ lbasize = &nsblk->lbasize;
+ supported = blk_lbasize_supported;
+ } else if (is_namespace_pmem(dev)) {
+ struct nd_namespace_pmem *nspm = to_nd_namespace_pmem(dev);
+
+ lbasize = &nspm->lbasize;
+ supported = pmem_lbasize_supported;
+ } else
return -ENXIO;
device_lock(dev);
@@ -1312,8 +1352,7 @@ static ssize_t sector_size_store(struct device *dev,
if (to_ndns(dev)->claim)
rc = -EBUSY;
if (rc >= 0)
- rc = nd_sector_size_store(dev, buf, &nsblk->lbasize,
- ns_lbasize_supported);
+ rc = nd_sector_size_store(dev, buf, lbasize, supported);
if (rc >= 0)
rc = nd_namespace_label_update(nd_region, dev);
dev_dbg(dev, "%s: result: %zd %s: %s%s", __func__,
@@ -1368,6 +1407,58 @@ static ssize_t dpa_extents_show(struct device *dev,
}
static DEVICE_ATTR_RO(dpa_extents);
+static int btt_claim_class(struct device *dev)
+{
+ struct nd_region *nd_region = to_nd_region(dev->parent);
+ int i, loop_bitmask = 0;
+
+ for (i = 0; i < nd_region->ndr_mappings; i++) {
+ struct nd_mapping *nd_mapping = &nd_region->mapping[i];
+ struct nvdimm_drvdata *ndd = to_ndd(nd_mapping);
+ struct nd_namespace_index *nsindex;
+
+ nsindex = to_namespace_index(ndd, ndd->ns_current);
+ if (nsindex == NULL)
+ loop_bitmask |= 1;
+ else {
+ /* check whether existing labels are v1.1 or v1.2 */
+ if (__le16_to_cpu(nsindex->major) == 1
+ && __le16_to_cpu(nsindex->minor) == 1)
+ loop_bitmask |= 2;
+ else
+ loop_bitmask |= 4;
+ }
+ }
+ /*
+ * If nsindex is null loop_bitmask's bit 0 will be set, and if an index
+ * block is found, a v1.1 label for any mapping will set bit 1, and a
+ * v1.2 label will set bit 2.
+ *
+ * At the end of the loop, at most one of the three bits must be set.
+ * If multiple bits were set, it means the different mappings disagree
+ * about their labels, and this must be cleaned up first.
+ *
+ * If all the label index blocks are found to agree, nsindex of NULL
+ * implies labels haven't been initialized yet, and when they will,
+ * they will be of the 1.2 format, so we can assume BTT2.0
+ *
+ * If 1.1 labels are found, we enforce BTT1.1, and if 1.2 labels are
+ * found, we enforce BTT2.0
+ *
+ * If the loop was never entered, default to BTT1.1 (legacy namespaces)
+ */
+ switch (loop_bitmask) {
+ case 0:
+ case 2:
+ return NVDIMM_CCLASS_BTT;
+ case 1:
+ case 4:
+ return NVDIMM_CCLASS_BTT2;
+ default:
+ return -ENXIO;
+ }
+}
+
static ssize_t holder_show(struct device *dev,
struct device_attribute *attr, char *buf)
{
@@ -1382,6 +1473,74 @@ static ssize_t holder_show(struct device *dev,
}
static DEVICE_ATTR_RO(holder);
+static ssize_t __holder_class_store(struct device *dev, const char *buf)
+{
+ struct nd_namespace_common *ndns = to_ndns(dev);
+
+ if (dev->driver || ndns->claim)
+ return -EBUSY;
+
+ if (strcmp(buf, "btt") == 0 || strcmp(buf, "btt\n") == 0)
+ ndns->claim_class = btt_claim_class(dev);
+ else if (strcmp(buf, "pfn") == 0 || strcmp(buf, "pfn\n") == 0)
+ ndns->claim_class = NVDIMM_CCLASS_PFN;
+ else if (strcmp(buf, "dax") == 0 || strcmp(buf, "dax\n") == 0)
+ ndns->claim_class = NVDIMM_CCLASS_DAX;
+ else if (strcmp(buf, "") == 0 || strcmp(buf, "\n") == 0)
+ ndns->claim_class = NVDIMM_CCLASS_NONE;
+ else
+ return -EINVAL;
+
+ /* btt_claim_class() could've returned an error */
+ if (ndns->claim_class < 0)
+ return ndns->claim_class;
+
+ return 0;
+}
+
+static ssize_t holder_class_store(struct device *dev,
+ struct device_attribute *attr, const char *buf, size_t len)
+{
+ struct nd_region *nd_region = to_nd_region(dev->parent);
+ ssize_t rc;
+
+ device_lock(dev);
+ nvdimm_bus_lock(dev);
+ wait_nvdimm_bus_probe_idle(dev);
+ rc = __holder_class_store(dev, buf);
+ if (rc >= 0)
+ rc = nd_namespace_label_update(nd_region, dev);
+ dev_dbg(dev, "%s: %s(%zd)\n", __func__, rc < 0 ? "fail " : "", rc);
+ nvdimm_bus_unlock(dev);
+ device_unlock(dev);
+
+ return rc < 0 ? rc : len;
+}
+
+static ssize_t holder_class_show(struct device *dev,
+ struct device_attribute *attr, char *buf)
+{
+ struct nd_namespace_common *ndns = to_ndns(dev);
+ ssize_t rc;
+
+ device_lock(dev);
+ if (ndns->claim_class == NVDIMM_CCLASS_NONE)
+ rc = sprintf(buf, "\n");
+ else if ((ndns->claim_class == NVDIMM_CCLASS_BTT) ||
+ (ndns->claim_class == NVDIMM_CCLASS_BTT2))
+ rc = sprintf(buf, "btt\n");
+ else if (ndns->claim_class == NVDIMM_CCLASS_PFN)
+ rc = sprintf(buf, "pfn\n");
+ else if (ndns->claim_class == NVDIMM_CCLASS_DAX)
+ rc = sprintf(buf, "dax\n");
+ else
+ rc = sprintf(buf, "<unknown>\n");
+ device_unlock(dev);
+
+ return rc;
+}
+static DEVICE_ATTR_RW(holder_class);
+
static ssize_t mode_show(struct device *dev,
struct device_attribute *attr, char *buf)
{
@@ -1440,6 +1599,7 @@ static struct attribute *nd_namespace_attributes[] = {
&dev_attr_force_raw.attr,
&dev_attr_sector_size.attr,
&dev_attr_dpa_extents.attr,
+ &dev_attr_holder_class.attr,
NULL,
};
@@ -1458,14 +1618,12 @@ static umode_t namespace_visible(struct kobject *kobj,
if (a == &dev_attr_size.attr)
return 0644;
- if (is_namespace_pmem(dev) && a == &dev_attr_sector_size.attr)
- return 0;
-
return a->mode;
}
if (a == &dev_attr_nstype.attr || a == &dev_attr_size.attr
|| a == &dev_attr_holder.attr
+ || a == &dev_attr_holder_class.attr
|| a == &dev_attr_force_raw.attr
|| a == &dev_attr_mode.attr)
return a->mode;
@@ -1599,6 +1757,8 @@ static bool has_uuid_at_pos(struct nd_region *nd_region, u8 *uuid,
for (i = 0; i < nd_region->ndr_mappings; i++) {
struct nd_mapping *nd_mapping = &nd_region->mapping[i];
+ struct nd_interleave_set *nd_set = nd_region->nd_set;
+ struct nvdimm_drvdata *ndd = to_ndd(nd_mapping);
struct nd_label_ent *label_ent;
bool found_uuid = false;
@@ -1619,8 +1779,17 @@ static bool has_uuid_at_pos(struct nd_region *nd_region, u8 *uuid,
if (memcmp(nd_label->uuid, uuid, NSLABEL_UUID_LEN) != 0)
continue;
+ if (namespace_label_has(ndd, type_guid)
+ && !guid_equal(&nd_set->type_guid,
+ &nd_label->type_guid)) {
+ dev_dbg(ndd->dev, "expect type_guid %pUb got %pUb\n",
+ nd_set->type_guid.b,
+ nd_label->type_guid.b);
+ continue;
+ }
+
if (found_uuid) {
- dev_dbg(to_ndd(nd_mapping)->dev,
+ dev_dbg(ndd->dev,
"%s duplicate entry for uuid\n",
__func__);
return false;
@@ -1698,10 +1867,11 @@ static int select_pmem_id(struct nd_region *nd_region, u8 *pmem_id)
* @nd_label: target pmem namespace label to evaluate
*/
struct device *create_namespace_pmem(struct nd_region *nd_region,
+ struct nd_namespace_index *nsindex,
struct nd_namespace_label *nd_label)
{
+ u64 cookie = nd_region_interleave_set_cookie(nd_region, nsindex);
u64 altcookie = nd_region_interleave_set_altcookie(nd_region);
- u64 cookie = nd_region_interleave_set_cookie(nd_region);
struct nd_label_ent *label_ent;
struct nd_namespace_pmem *nspm;
struct nd_mapping *nd_mapping;
@@ -1775,6 +1945,7 @@ struct device *create_namespace_pmem(struct nd_region *nd_region,
/* Calculate total size and populate namespace properties from label0 */
for (i = 0; i < nd_region->ndr_mappings; i++) {
struct nd_namespace_label *label0;
+ struct nvdimm_drvdata *ndd;
nd_mapping = &nd_region->mapping[i];
label_ent = list_first_entry_or_null(&nd_mapping->labels,
@@ -1794,6 +1965,12 @@ struct device *create_namespace_pmem(struct nd_region *nd_region,
NSLABEL_NAME_LEN, GFP_KERNEL);
nspm->uuid = kmemdup((void __force *) label0->uuid,
NSLABEL_UUID_LEN, GFP_KERNEL);
+ nspm->lbasize = __le64_to_cpu(label0->lbasize);
+ ndd = to_ndd(nd_mapping);
+ if (namespace_label_has(ndd, abstraction_guid))
+ nspm->nsio.common.claim_class
+ = to_nvdimm_cclass(&label0->abstraction_guid);
+
}
if (!nspm->alt_name || !nspm->uuid) {
@@ -1876,7 +2053,7 @@ static struct device *nd_namespace_pmem_create(struct nd_region *nd_region)
struct resource *res;
struct device *dev;
- if (!is_nd_pmem(&nd_region->dev))
+ if (!is_memory(&nd_region->dev))
return NULL;
nspm = kzalloc(sizeof(*nspm), GFP_KERNEL);
@@ -2005,12 +2182,29 @@ struct device *create_namespace_blk(struct nd_region *nd_region,
{
struct nd_mapping *nd_mapping = &nd_region->mapping[0];
+ struct nd_interleave_set *nd_set = nd_region->nd_set;
struct nvdimm_drvdata *ndd = to_ndd(nd_mapping);
struct nd_namespace_blk *nsblk;
char name[NSLABEL_NAME_LEN];
struct device *dev = NULL;
struct resource *res;
+ if (namespace_label_has(ndd, type_guid)) {
+ if (!guid_equal(&nd_set->type_guid, &nd_label->type_guid)) {
+ dev_dbg(ndd->dev, "expect type_guid %pUb got %pUb\n",
+ nd_set->type_guid.b,
+ nd_label->type_guid.b);
+ return ERR_PTR(-EAGAIN);
+ }
+
+ if (nd_label->isetcookie != __cpu_to_le64(nd_set->cookie2)) {
+ dev_dbg(ndd->dev, "expect cookie %#llx got %#llx\n",
+ nd_set->cookie2,
+ __le64_to_cpu(nd_label->isetcookie));
+ return ERR_PTR(-EAGAIN);
+ }
+ }
+
nsblk = kzalloc(sizeof(*nsblk), GFP_KERNEL);
if (!nsblk)
return ERR_PTR(-ENOMEM);
@@ -2021,6 +2215,9 @@ struct device *create_namespace_blk(struct nd_region *nd_region,
nsblk->lbasize = __le64_to_cpu(nd_label->lbasize);
nsblk->uuid = kmemdup(nd_label->uuid, NSLABEL_UUID_LEN,
GFP_KERNEL);
+ if (namespace_label_has(ndd, abstraction_guid))
+ nsblk->common.claim_class
+ = to_nvdimm_cclass(&nd_label->abstraction_guid);
if (!nsblk->uuid)
goto blk_err;
memcpy(name, nd_label->name, NSLABEL_NAME_LEN);
@@ -2102,27 +2299,30 @@ static struct device **scan_labels(struct nd_region *nd_region)
kfree(devs);
devs = __devs;
- if (is_nd_blk(&nd_region->dev)) {
+ if (is_nd_blk(&nd_region->dev))
dev = create_namespace_blk(nd_region, nd_label, count);
- if (IS_ERR(dev))
+ else {
+ struct nvdimm_drvdata *ndd = to_ndd(nd_mapping);
+ struct nd_namespace_index *nsindex;
+
+ nsindex = to_namespace_index(ndd, ndd->ns_current);
+ dev = create_namespace_pmem(nd_region, nsindex, nd_label);
+ }
+
+ if (IS_ERR(dev)) {
+ switch (PTR_ERR(dev)) {
+ case -EAGAIN:
+ /* skip invalid labels */
+ continue;
+ case -ENODEV:
+ /* fallthrough to seed creation */
+ break;
+ default:
goto err;
+ }
+ } else
devs[count++] = dev;
- } else {
- dev = create_namespace_pmem(nd_region, nd_label);
- if (IS_ERR(dev)) {
- switch (PTR_ERR(dev)) {
- case -EAGAIN:
- /* skip invalid labels */
- continue;
- case -ENODEV:
- /* fallthrough to seed creation */
- break;
- default:
- goto err;
- }
- } else
- devs[count++] = dev;
- }
+
}
dev_dbg(&nd_region->dev, "%s: discovered %d %s namespace%s\n",
@@ -2156,7 +2356,7 @@ static struct device **scan_labels(struct nd_region *nd_region)
}
dev->parent = &nd_region->dev;
devs[count++] = dev;
- } else if (is_nd_pmem(&nd_region->dev)) {
+ } else if (is_memory(&nd_region->dev)) {
/* clean unselected labels */
for (i = 0; i < nd_region->ndr_mappings; i++) {
struct list_head *l, *e;
diff --git a/drivers/nvdimm/nd-core.h b/drivers/nvdimm/nd-core.h
index 4c4bd209e725..86bc19ae30da 100644
--- a/drivers/nvdimm/nd-core.h
+++ b/drivers/nvdimm/nd-core.h
@@ -64,7 +64,16 @@ struct blk_alloc_info {
bool is_nvdimm(struct device *dev);
bool is_nd_pmem(struct device *dev);
+bool is_nd_volatile(struct device *dev);
bool is_nd_blk(struct device *dev);
+static inline bool is_nd_region(struct device *dev)
+{
+ return is_nd_pmem(dev) || is_nd_blk(dev) || is_nd_volatile(dev);
+}
+static inline bool is_memory(struct device *dev)
+{
+ return is_nd_pmem(dev) || is_nd_volatile(dev);
+}
struct nvdimm_bus *walk_to_nvdimm_bus(struct device *nd_dev);
int __init nvdimm_bus_init(void);
void nvdimm_bus_exit(void);
diff --git a/drivers/nvdimm/nd.h b/drivers/nvdimm/nd.h
index 03852d738eec..e1b5715bd91f 100644
--- a/drivers/nvdimm/nd.h
+++ b/drivers/nvdimm/nd.h
@@ -42,7 +42,7 @@ struct nd_poison {
struct nvdimm_drvdata {
struct device *dev;
- int nsindex_size;
+ int nsindex_size, nslabel_size;
struct nd_cmd_get_config_size nsarea;
void *data;
int ns_current, ns_next;
@@ -96,6 +96,12 @@ static inline struct nd_namespace_index *to_next_namespace_index(
return to_namespace_index(ndd, ndd->ns_next);
}
+unsigned sizeof_namespace_label(struct nvdimm_drvdata *ndd);
+
+#define namespace_label_has(ndd, field) \
+ (offsetof(struct nd_namespace_label, field) \
+ < sizeof_namespace_label(ndd))
+
#define nd_dbg_dpa(r, d, res, fmt, arg...) \
dev_dbg((r) ? &(r)->dev : (d)->dev, "%s: %.13s: %#llx @ %#llx " fmt, \
(r) ? dev_name((d)->dev) : "", res ? res->name : "null", \
@@ -155,6 +161,7 @@ struct nd_region {
u64 ndr_start;
int id, num_lanes, ro, numa_node;
void *provider_data;
+ struct kernfs_node *bb_state;
struct badblocks bb;
struct nd_interleave_set *nd_set;
struct nd_percpu_lane __percpu *lane;
@@ -188,6 +195,9 @@ struct nd_btt {
u64 size;
u8 *uuid;
int id;
+ int initial_offset;
+ u16 version_major;
+ u16 version_minor;
};
enum nd_pfn_mode {
@@ -229,6 +239,7 @@ ssize_t nd_sector_size_store(struct device *dev, const char *buf,
unsigned long *current_lbasize, const unsigned long *supported);
int __init nvdimm_init(void);
int __init nd_region_init(void);
+int __init nd_label_init(void);
void nvdimm_exit(void);
void nd_region_exit(void);
struct nvdimm;
@@ -330,7 +341,8 @@ static inline struct device *nd_dax_create(struct nd_region *nd_region)
struct nd_region *to_nd_region(struct device *dev);
int nd_region_to_nstype(struct nd_region *nd_region);
int nd_region_register_namespaces(struct nd_region *nd_region, int *err);
-u64 nd_region_interleave_set_cookie(struct nd_region *nd_region);
+u64 nd_region_interleave_set_cookie(struct nd_region *nd_region,
+ struct nd_namespace_index *nsindex);
u64 nd_region_interleave_set_altcookie(struct nd_region *nd_region);
void nvdimm_bus_lock(struct device *dev);
void nvdimm_bus_unlock(struct device *dev);
@@ -349,6 +361,7 @@ int nvdimm_namespace_attach_btt(struct nd_namespace_common *ndns);
int nvdimm_namespace_detach_btt(struct nd_btt *nd_btt);
const char *nvdimm_namespace_disk_name(struct nd_namespace_common *ndns,
char *name);
+unsigned int pmem_sector_size(struct nd_namespace_common *ndns);
void nvdimm_badblocks_populate(struct nd_region *nd_region,
struct badblocks *bb, const struct resource *res);
#if IS_ENABLED(CONFIG_ND_CLAIM)
diff --git a/drivers/nvdimm/pfn_devs.c b/drivers/nvdimm/pfn_devs.c
index a6c403600d19..5fcb6f5b22a2 100644
--- a/drivers/nvdimm/pfn_devs.c
+++ b/drivers/nvdimm/pfn_devs.c
@@ -331,7 +331,7 @@ struct device *nd_pfn_create(struct nd_region *nd_region)
struct nd_pfn *nd_pfn;
struct device *dev;
- if (!is_nd_pmem(&nd_region->dev))
+ if (!is_memory(&nd_region->dev))
return NULL;
nd_pfn = nd_pfn_alloc(nd_region);
@@ -354,7 +354,7 @@ int nd_pfn_validate(struct nd_pfn *nd_pfn, const char *sig)
if (!pfn_sb || !ndns)
return -ENODEV;
- if (!is_nd_pmem(nd_pfn->dev.parent))
+ if (!is_memory(nd_pfn->dev.parent))
return -ENODEV;
if (nvdimm_read_bytes(ndns, SZ_4K, pfn_sb, sizeof(*pfn_sb), 0))
@@ -471,6 +471,14 @@ int nd_pfn_probe(struct device *dev, struct nd_namespace_common *ndns)
if (ndns->force_raw)
return -ENODEV;
+ switch (ndns->claim_class) {
+ case NVDIMM_CCLASS_NONE:
+ case NVDIMM_CCLASS_PFN:
+ break;
+ default:
+ return -ENODEV;
+ }
+
nvdimm_bus_lock(&ndns->dev);
nd_pfn = nd_pfn_alloc(nd_region);
pfn_dev = nd_pfn_devinit(nd_pfn, ndns);
diff --git a/drivers/nvdimm/pmem.c b/drivers/nvdimm/pmem.c
index 6b577afb1d44..f7099adaabc0 100644
--- a/drivers/nvdimm/pmem.c
+++ b/drivers/nvdimm/pmem.c
@@ -28,7 +28,7 @@
#include <linux/blk-mq.h>
#include <linux/pfn_t.h>
#include <linux/slab.h>
-#include <linux/pmem.h>
+#include <linux/uio.h>
#include <linux/dax.h>
#include <linux/nd.h>
#include "pmem.h"
@@ -68,9 +68,11 @@ static blk_status_t pmem_clear_poison(struct pmem_device *pmem,
(unsigned long long) sector, cleared,
cleared > 1 ? "s" : "");
badblocks_clear(&pmem->bb, sector, cleared);
+ if (pmem->bb_state)
+ sysfs_notify_dirent(pmem->bb_state);
}
- invalidate_pmem(pmem->virt_addr + offset, len);
+ arch_invalidate_pmem(pmem->virt_addr + offset, len);
return rc;
}
@@ -80,7 +82,7 @@ static void write_pmem(void *pmem_addr, struct page *page,
{
void *mem = kmap_atomic(page);
- memcpy_to_pmem(pmem_addr, mem + off, len);
+ memcpy_flushcache(pmem_addr, mem + off, len);
kunmap_atomic(mem);
}
@@ -235,8 +237,27 @@ static long pmem_dax_direct_access(struct dax_device *dax_dev,
return __pmem_direct_access(pmem, pgoff, nr_pages, kaddr, pfn);
}
+static size_t pmem_copy_from_iter(struct dax_device *dax_dev, pgoff_t pgoff,
+ void *addr, size_t bytes, struct iov_iter *i)
+{
+ return copy_from_iter_flushcache(addr, bytes, i);
+}
+
+static void pmem_dax_flush(struct dax_device *dax_dev, pgoff_t pgoff,
+ void *addr, size_t size)
+{
+ arch_wb_cache_pmem(addr, size);
+}
+
static const struct dax_operations pmem_dax_ops = {
.direct_access = pmem_dax_direct_access,
+ .copy_from_iter = pmem_copy_from_iter,
+ .flush = pmem_dax_flush,
+};
+
+static const struct attribute_group *pmem_attribute_groups[] = {
+ &dax_attribute_group,
+ NULL,
};
static void pmem_release_queue(void *q)
@@ -265,14 +286,15 @@ static int pmem_attach_disk(struct device *dev,
struct nd_namespace_io *nsio = to_nd_namespace_io(&ndns->dev);
struct nd_region *nd_region = to_nd_region(dev->parent);
struct vmem_altmap __altmap, *altmap = NULL;
+ int nid = dev_to_node(dev), fua, wbc;
struct resource *res = &nsio->res;
struct nd_pfn *nd_pfn = NULL;
struct dax_device *dax_dev;
- int nid = dev_to_node(dev);
struct nd_pfn_sb *pfn_sb;
struct pmem_device *pmem;
struct resource pfn_res;
struct request_queue *q;
+ struct device *gendev;
struct gendisk *disk;
void *addr;
@@ -294,8 +316,12 @@ static int pmem_attach_disk(struct device *dev,
dev_set_drvdata(dev, pmem);
pmem->phys_addr = res->start;
pmem->size = resource_size(res);
- if (nvdimm_has_flush(nd_region) < 0)
+ fua = nvdimm_has_flush(nd_region);
+ if (!IS_ENABLED(CONFIG_ARCH_HAS_UACCESS_FLUSHCACHE) || fua < 0) {
dev_warn(dev, "unable to guarantee persistence of writes\n");
+ fua = 0;
+ }
+ wbc = nvdimm_has_cache(nd_region);
if (!devm_request_mem_region(dev, res->start, resource_size(res),
dev_name(&ndns->dev))) {
@@ -339,9 +365,10 @@ static int pmem_attach_disk(struct device *dev,
return PTR_ERR(addr);
pmem->virt_addr = addr;
- blk_queue_write_cache(q, true, true);
+ blk_queue_write_cache(q, wbc, fua);
blk_queue_make_request(q, pmem_make_request);
blk_queue_physical_block_size(q, PAGE_SIZE);
+ blk_queue_logical_block_size(q, pmem_sector_size(ndns));
blk_queue_max_hw_sectors(q, UINT_MAX);
queue_flag_set_unlocked(QUEUE_FLAG_NONROT, q);
queue_flag_set_unlocked(QUEUE_FLAG_DAX, q);
@@ -368,14 +395,23 @@ static int pmem_attach_disk(struct device *dev,
put_disk(disk);
return -ENOMEM;
}
+ dax_write_cache(dax_dev, wbc);
pmem->dax_dev = dax_dev;
+ gendev = disk_to_dev(disk);
+ gendev->groups = pmem_attribute_groups;
+
device_add_disk(dev, disk);
if (devm_add_action_or_reset(dev, pmem_release_disk, pmem))
return -ENOMEM;
revalidate_disk(disk);
+ pmem->bb_state = sysfs_get_dirent(disk_to_dev(disk)->kobj.sd,
+ "badblocks");
+ if (!pmem->bb_state)
+ dev_warn(dev, "'badblocks' notification disabled\n");
+
return 0;
}
@@ -407,8 +443,18 @@ static int nd_pmem_probe(struct device *dev)
static int nd_pmem_remove(struct device *dev)
{
+ struct pmem_device *pmem = dev_get_drvdata(dev);
+
if (is_nd_btt(dev))
nvdimm_namespace_detach_btt(to_nd_btt(dev));
+ else {
+ /*
+ * Note, this assumes device_lock() context to not race
+ * nd_pmem_notify()
+ */
+ sysfs_put(pmem->bb_state);
+ pmem->bb_state = NULL;
+ }
nvdimm_flush(to_nd_region(dev->parent));
return 0;
@@ -427,6 +473,7 @@ static void nd_pmem_notify(struct device *dev, enum nvdimm_event event)
struct nd_namespace_io *nsio;
struct resource res;
struct badblocks *bb;
+ struct kernfs_node *bb_state;
if (event != NVDIMM_REVALIDATE_POISON)
return;
@@ -438,11 +485,13 @@ static void nd_pmem_notify(struct device *dev, enum nvdimm_event event)
nd_region = to_nd_region(ndns->dev.parent);
nsio = to_nd_namespace_io(&ndns->dev);
bb = &nsio->bb;
+ bb_state = NULL;
} else {
struct pmem_device *pmem = dev_get_drvdata(dev);
nd_region = to_region(pmem);
bb = &pmem->bb;
+ bb_state = pmem->bb_state;
if (is_nd_pfn(dev)) {
struct nd_pfn *nd_pfn = to_nd_pfn(dev);
@@ -462,6 +511,8 @@ static void nd_pmem_notify(struct device *dev, enum nvdimm_event event)
res.start = nsio->res.start + offset;
res.end = nsio->res.end - end_trunc;
nvdimm_badblocks_populate(nd_region, bb, &res);
+ if (bb_state)
+ sysfs_notify_dirent(bb_state);
}
MODULE_ALIAS("pmem");
diff --git a/drivers/nvdimm/pmem.h b/drivers/nvdimm/pmem.h
index 7f4dbd72a90a..5434321cad67 100644
--- a/drivers/nvdimm/pmem.h
+++ b/drivers/nvdimm/pmem.h
@@ -5,6 +5,20 @@
#include <linux/pfn_t.h>
#include <linux/fs.h>
+#ifdef CONFIG_ARCH_HAS_PMEM_API
+#define ARCH_MEMREMAP_PMEM MEMREMAP_WB
+void arch_wb_cache_pmem(void *addr, size_t size);
+void arch_invalidate_pmem(void *addr, size_t size);
+#else
+#define ARCH_MEMREMAP_PMEM MEMREMAP_WT
+static inline void arch_wb_cache_pmem(void *addr, size_t size)
+{
+}
+static inline void arch_invalidate_pmem(void *addr, size_t size)
+{
+}
+#endif
+
/* this definition is in it's own header for tools/testing/nvdimm to consume */
struct pmem_device {
/* One contiguous memory region per device */
@@ -17,6 +31,7 @@ struct pmem_device {
size_t size;
/* trim size when namespace capacity has been section aligned */
u32 pfn_pad;
+ struct kernfs_node *bb_state;
struct badblocks bb;
struct dax_device *dax_dev;
struct gendisk *disk;
diff --git a/drivers/nvdimm/region.c b/drivers/nvdimm/region.c
index 869a886c292e..034f0a07d627 100644
--- a/drivers/nvdimm/region.c
+++ b/drivers/nvdimm/region.c
@@ -58,10 +58,14 @@ static int nd_region_probe(struct device *dev)
if (devm_init_badblocks(dev, &nd_region->bb))
return -ENODEV;
+ nd_region->bb_state = sysfs_get_dirent(nd_region->dev.kobj.sd,
+ "badblocks");
+ if (!nd_region->bb_state)
+ dev_warn(&nd_region->dev,
+ "'badblocks' notification disabled\n");
ndr_res.start = nd_region->ndr_start;
ndr_res.end = nd_region->ndr_start + nd_region->ndr_size - 1;
- nvdimm_badblocks_populate(nd_region,
- &nd_region->bb, &ndr_res);
+ nvdimm_badblocks_populate(nd_region, &nd_region->bb, &ndr_res);
}
nd_region->btt_seed = nd_btt_create(nd_region);
@@ -105,6 +109,13 @@ static int nd_region_remove(struct device *dev)
dev_set_drvdata(dev, NULL);
nvdimm_bus_unlock(dev);
+ /*
+ * Note, this assumes device_lock() context to not race
+ * nd_region_notify()
+ */
+ sysfs_put(nd_region->bb_state);
+ nd_region->bb_state = NULL;
+
return 0;
}
@@ -126,6 +137,8 @@ static void nd_region_notify(struct device *dev, enum nvdimm_event event)
nd_region->ndr_size - 1;
nvdimm_badblocks_populate(nd_region,
&nd_region->bb, &res);
+ if (nd_region->bb_state)
+ sysfs_notify_dirent(nd_region->bb_state);
}
}
device_for_each_child(dev, &event, child_notify);
diff --git a/drivers/nvdimm/region_devs.c b/drivers/nvdimm/region_devs.c
index b550edf2571f..5954cfbea3fc 100644
--- a/drivers/nvdimm/region_devs.c
+++ b/drivers/nvdimm/region_devs.c
@@ -15,7 +15,6 @@
#include <linux/sched.h>
#include <linux/slab.h>
#include <linux/hash.h>
-#include <linux/pmem.h>
#include <linux/sort.h>
#include <linux/io.h>
#include <linux/nd.h>
@@ -169,6 +168,11 @@ bool is_nd_blk(struct device *dev)
return dev ? dev->type == &nd_blk_device_type : false;
}
+bool is_nd_volatile(struct device *dev)
+{
+ return dev ? dev->type == &nd_volatile_device_type : false;
+}
+
struct nd_region *to_nd_region(struct device *dev)
{
struct nd_region *nd_region = container_of(dev, struct nd_region, dev);
@@ -215,7 +219,7 @@ EXPORT_SYMBOL_GPL(nd_blk_region_set_provider_data);
*/
int nd_region_to_nstype(struct nd_region *nd_region)
{
- if (is_nd_pmem(&nd_region->dev)) {
+ if (is_memory(&nd_region->dev)) {
u16 i, alias;
for (i = 0, alias = 0; i < nd_region->ndr_mappings; i++) {
@@ -243,7 +247,7 @@ static ssize_t size_show(struct device *dev,
struct nd_region *nd_region = to_nd_region(dev);
unsigned long long size = 0;
- if (is_nd_pmem(dev)) {
+ if (is_memory(dev)) {
size = nd_region->ndr_size;
} else if (nd_region->ndr_mappings == 1) {
struct nd_mapping *nd_mapping = &nd_region->mapping[0];
@@ -307,13 +311,41 @@ static ssize_t set_cookie_show(struct device *dev,
{
struct nd_region *nd_region = to_nd_region(dev);
struct nd_interleave_set *nd_set = nd_region->nd_set;
+ ssize_t rc = 0;
- if (is_nd_pmem(dev) && nd_set)
+ if (is_memory(dev) && nd_set)
/* pass, should be precluded by region_visible */;
else
return -ENXIO;
- return sprintf(buf, "%#llx\n", nd_set->cookie);
+ /*
+ * The cookie to show depends on which specification of the
+ * labels we are using. If there are not labels then default to
+ * the v1.1 namespace label cookie definition. To read all this
+ * data we need to wait for probing to settle.
+ */
+ device_lock(dev);
+ nvdimm_bus_lock(dev);
+ wait_nvdimm_bus_probe_idle(dev);
+ if (nd_region->ndr_mappings) {
+ struct nd_mapping *nd_mapping = &nd_region->mapping[0];
+ struct nvdimm_drvdata *ndd = to_ndd(nd_mapping);
+
+ if (ndd) {
+ struct nd_namespace_index *nsindex;
+
+ nsindex = to_namespace_index(ndd, ndd->ns_current);
+ rc = sprintf(buf, "%#llx\n",
+ nd_region_interleave_set_cookie(nd_region,
+ nsindex));
+ }
+ }
+ nvdimm_bus_unlock(dev);
+ device_unlock(dev);
+
+ if (rc)
+ return rc;
+ return sprintf(buf, "%#llx\n", nd_set->cookie1);
}
static DEVICE_ATTR_RO(set_cookie);
@@ -335,7 +367,7 @@ resource_size_t nd_region_available_dpa(struct nd_region *nd_region)
if (!ndd)
return 0;
- if (is_nd_pmem(&nd_region->dev)) {
+ if (is_memory(&nd_region->dev)) {
available += nd_pmem_available_dpa(nd_region,
nd_mapping, &overlap);
if (overlap > blk_max_overlap) {
@@ -521,10 +553,10 @@ static umode_t region_visible(struct kobject *kobj, struct attribute *a, int n)
struct nd_interleave_set *nd_set = nd_region->nd_set;
int type = nd_region_to_nstype(nd_region);
- if (!is_nd_pmem(dev) && a == &dev_attr_pfn_seed.attr)
+ if (!is_memory(dev) && a == &dev_attr_pfn_seed.attr)
return 0;
- if (!is_nd_pmem(dev) && a == &dev_attr_dax_seed.attr)
+ if (!is_memory(dev) && a == &dev_attr_dax_seed.attr)
return 0;
if (!is_nd_pmem(dev) && a == &dev_attr_badblocks.attr)
@@ -552,7 +584,7 @@ static umode_t region_visible(struct kobject *kobj, struct attribute *a, int n)
|| type == ND_DEVICE_NAMESPACE_BLK)
&& a == &dev_attr_available_size.attr)
return a->mode;
- else if (is_nd_pmem(dev) && nd_set)
+ else if (is_memory(dev) && nd_set)
return a->mode;
return 0;
@@ -564,13 +596,18 @@ struct attribute_group nd_region_attribute_group = {
};
EXPORT_SYMBOL_GPL(nd_region_attribute_group);
-u64 nd_region_interleave_set_cookie(struct nd_region *nd_region)
+u64 nd_region_interleave_set_cookie(struct nd_region *nd_region,
+ struct nd_namespace_index *nsindex)
{
struct nd_interleave_set *nd_set = nd_region->nd_set;
- if (nd_set)
- return nd_set->cookie;
- return 0;
+ if (!nd_set)
+ return 0;
+
+ if (nsindex && __le16_to_cpu(nsindex->major) == 1
+ && __le16_to_cpu(nsindex->minor) == 1)
+ return nd_set->cookie1;
+ return nd_set->cookie2;
}
u64 nd_region_interleave_set_altcookie(struct nd_region *nd_region)
@@ -604,7 +641,7 @@ static void nd_region_notify_driver_action(struct nvdimm_bus *nvdimm_bus,
{
struct nd_region *nd_region;
- if (!probe && (is_nd_pmem(dev) || is_nd_blk(dev))) {
+ if (!probe && is_nd_region(dev)) {
int i;
nd_region = to_nd_region(dev);
@@ -622,12 +659,8 @@ static void nd_region_notify_driver_action(struct nvdimm_bus *nvdimm_bus,
if (ndd)
atomic_dec(&nvdimm->busy);
}
-
- if (is_nd_pmem(dev))
- return;
}
- if (dev->parent && (is_nd_blk(dev->parent) || is_nd_pmem(dev->parent))
- && probe) {
+ if (dev->parent && is_nd_region(dev->parent) && probe) {
nd_region = to_nd_region(dev->parent);
nvdimm_bus_lock(dev);
if (nd_region->ns_seed == dev)
@@ -800,7 +833,7 @@ int nd_blk_region_init(struct nd_region *nd_region)
return 0;
if (nd_region->ndr_mappings < 1) {
- dev_err(dev, "invalid BLK region\n");
+ dev_dbg(dev, "invalid BLK region\n");
return -ENXIO;
}
@@ -1015,8 +1048,8 @@ void nvdimm_flush(struct nd_region *nd_region)
* The first wmb() is needed to 'sfence' all previous writes
* such that they are architecturally visible for the platform
* buffer flush. Note that we've already arranged for pmem
- * writes to avoid the cache via arch_memcpy_to_pmem(). The
- * final wmb() ensures ordering for the NVDIMM flush write.
+ * writes to avoid the cache via memcpy_flushcache(). The final
+ * wmb() ensures ordering for the NVDIMM flush write.
*/
wmb();
for (i = 0; i < nd_region->ndr_mappings; i++)
@@ -1038,8 +1071,9 @@ int nvdimm_has_flush(struct nd_region *nd_region)
{
int i;
- /* no nvdimm == flushing capability unknown */
- if (nd_region->ndr_mappings == 0)
+ /* no nvdimm or pmem api == flushing capability unknown */
+ if (nd_region->ndr_mappings == 0
+ || !IS_ENABLED(CONFIG_ARCH_HAS_PMEM_API))
return -ENXIO;
for (i = 0; i < nd_region->ndr_mappings; i++) {
@@ -1059,6 +1093,12 @@ int nvdimm_has_flush(struct nd_region *nd_region)
}
EXPORT_SYMBOL_GPL(nvdimm_has_flush);
+int nvdimm_has_cache(struct nd_region *nd_region)
+{
+ return is_nd_pmem(&nd_region->dev);
+}
+EXPORT_SYMBOL_GPL(nvdimm_has_cache);
+
void __exit nd_region_devs_exit(void)
{
ida_destroy(&region_ida);