diff options
author | Linus Torvalds <torvalds@linux-foundation.org> | 2016-07-29 03:22:07 +0300 |
---|---|---|
committer | Linus Torvalds <torvalds@linux-foundation.org> | 2016-07-29 03:38:16 +0300 |
commit | f0c98ebc57c2d5e535bc4f9167f35650d2ba3c90 (patch) | |
tree | ad584aa321c0a2dbdaa49e0754f6c9f233b79a48 /drivers/nvdimm/pmem.c | |
parent | d94ba9e7d8d5c821d0442f13b30b0140c1109c38 (diff) | |
parent | 0606263f24f3d64960de742c55894190b5df903b (diff) | |
download | linux-f0c98ebc57c2d5e535bc4f9167f35650d2ba3c90.tar.xz |
Merge tag 'libnvdimm-for-4.8' of git://git.kernel.org/pub/scm/linux/kernel/git/nvdimm/nvdimm
Pull libnvdimm updates from Dan Williams:
- Replace pcommit with ADR / directed-flushing.
The pcommit instruction, which has not shipped on any product, is
deprecated. Instead, the requirement is that platforms implement
either ADR, or provide one or more flush addresses per nvdimm.
ADR (Asynchronous DRAM Refresh) flushes data in posted write buffers
to the memory controller on a power-fail event.
Flush addresses are defined in ACPI 6.x as an NVDIMM Firmware
Interface Table (NFIT) sub-structure: "Flush Hint Address Structure".
A flush hint is an mmio address that when written and fenced assures
that all previous posted writes targeting a given dimm have been
flushed to media.
- On-demand ARS (address range scrub).
Linux uses the results of the ACPI ARS commands to track bad blocks
in pmem devices. When latent errors are detected we re-scrub the
media to refresh the bad block list, userspace can also request a
re-scrub at any time.
- Support for the Microsoft DSM (device specific method) command
format.
- Support for EDK2/OVMF virtual disk device memory ranges.
- Various fixes and cleanups across the subsystem.
* tag 'libnvdimm-for-4.8' of git://git.kernel.org/pub/scm/linux/kernel/git/nvdimm/nvdimm: (41 commits)
libnvdimm-btt: Delete an unnecessary check before the function call "__nd_device_register"
nfit: do an ARS scrub on hitting a latent media error
nfit: move to nfit/ sub-directory
nfit, libnvdimm: allow an ARS scrub to be triggered on demand
libnvdimm: register nvdimm_bus devices with an nd_bus driver
pmem: clarify a debug print in pmem_clear_poison
x86/insn: remove pcommit
Revert "KVM: x86: add pcommit support"
nfit, tools/testing/nvdimm/: unify shutdown paths
libnvdimm: move ->module to struct nvdimm_bus_descriptor
nfit: cleanup acpi_nfit_init calling convention
nfit: fix _FIT evaluation memory leak + use after free
tools/testing/nvdimm: add manufacturing_{date|location} dimm properties
tools/testing/nvdimm: add virtual ramdisk range
acpi, nfit: treat virtual ramdisk SPA as pmem region
pmem: kill __pmem address space
pmem: kill wmb_pmem()
libnvdimm, pmem: use nvdimm_flush() for namespace I/O writes
fs/dax: remove wmb_pmem()
libnvdimm, pmem: flush posted-write queues on shutdown
...
Diffstat (limited to 'drivers/nvdimm/pmem.c')
-rw-r--r-- | drivers/nvdimm/pmem.c | 85 |
1 files changed, 51 insertions, 34 deletions
diff --git a/drivers/nvdimm/pmem.c b/drivers/nvdimm/pmem.c index 36cb39047d5b..b511099457db 100644 --- a/drivers/nvdimm/pmem.c +++ b/drivers/nvdimm/pmem.c @@ -29,27 +29,28 @@ #include <linux/slab.h> #include <linux/pmem.h> #include <linux/nd.h> +#include "pmem.h" #include "pfn.h" #include "nd.h" -struct pmem_device { - /* One contiguous memory region per device */ - phys_addr_t phys_addr; - /* when non-zero this device is hosting a 'pfn' instance */ - phys_addr_t data_offset; - u64 pfn_flags; - void __pmem *virt_addr; - /* immutable base size of the namespace */ - size_t size; - /* trim size when namespace capacity has been section aligned */ - u32 pfn_pad; - struct badblocks bb; -}; +static struct device *to_dev(struct pmem_device *pmem) +{ + /* + * nvdimm bus services need a 'dev' parameter, and we record the device + * at init in bb.dev. + */ + return pmem->bb.dev; +} + +static struct nd_region *to_region(struct pmem_device *pmem) +{ + return to_nd_region(to_dev(pmem)->parent); +} static void pmem_clear_poison(struct pmem_device *pmem, phys_addr_t offset, unsigned int len) { - struct device *dev = pmem->bb.dev; + struct device *dev = to_dev(pmem); sector_t sector; long cleared; @@ -57,7 +58,7 @@ static void pmem_clear_poison(struct pmem_device *pmem, phys_addr_t offset, cleared = nvdimm_clear_poison(dev, pmem->phys_addr + offset, len); if (cleared > 0 && cleared / 512) { - dev_dbg(dev, "%s: %llx clear %ld sector%s\n", + dev_dbg(dev, "%s: %#llx clear %ld sector%s\n", __func__, (unsigned long long) sector, cleared / 512, cleared / 512 > 1 ? "s" : ""); badblocks_clear(&pmem->bb, sector, cleared / 512); @@ -73,7 +74,7 @@ static int pmem_do_bvec(struct pmem_device *pmem, struct page *page, bool bad_pmem = false; void *mem = kmap_atomic(page); phys_addr_t pmem_off = sector * 512 + pmem->data_offset; - void __pmem *pmem_addr = pmem->virt_addr + pmem_off; + void *pmem_addr = pmem->virt_addr + pmem_off; if (unlikely(is_bad_pmem(&pmem->bb, sector, len))) bad_pmem = true; @@ -112,6 +113,11 @@ static int pmem_do_bvec(struct pmem_device *pmem, struct page *page, return rc; } +/* account for REQ_FLUSH rename, replace with REQ_PREFLUSH after v4.8-rc1 */ +#ifndef REQ_FLUSH +#define REQ_FLUSH REQ_PREFLUSH +#endif + static blk_qc_t pmem_make_request(struct request_queue *q, struct bio *bio) { int rc = 0; @@ -120,6 +126,10 @@ static blk_qc_t pmem_make_request(struct request_queue *q, struct bio *bio) struct bio_vec bvec; struct bvec_iter iter; struct pmem_device *pmem = q->queuedata; + struct nd_region *nd_region = to_region(pmem); + + if (bio->bi_rw & REQ_FLUSH) + nvdimm_flush(nd_region); do_acct = nd_iostat_start(bio, &start); bio_for_each_segment(bvec, bio, iter) { @@ -134,8 +144,8 @@ static blk_qc_t pmem_make_request(struct request_queue *q, struct bio *bio) if (do_acct) nd_iostat_end(bio, start); - if (bio_data_dir(bio)) - wmb_pmem(); + if (bio->bi_rw & REQ_FUA) + nvdimm_flush(nd_region); bio_endio(bio); return BLK_QC_T_NONE; @@ -148,8 +158,6 @@ static int pmem_rw_page(struct block_device *bdev, sector_t sector, int rc; rc = pmem_do_bvec(pmem, page, PAGE_SIZE, 0, rw, sector); - if (rw & WRITE) - wmb_pmem(); /* * The ->rw_page interface is subtle and tricky. The core @@ -163,8 +171,9 @@ static int pmem_rw_page(struct block_device *bdev, sector_t sector, return rc; } -static long pmem_direct_access(struct block_device *bdev, sector_t sector, - void __pmem **kaddr, pfn_t *pfn, long size) +/* see "strong" declaration in tools/testing/nvdimm/pmem-dax.c */ +__weak long pmem_direct_access(struct block_device *bdev, sector_t sector, + void **kaddr, pfn_t *pfn, long size) { struct pmem_device *pmem = bdev->bd_queue->queuedata; resource_size_t offset = sector * 512 + pmem->data_offset; @@ -195,7 +204,7 @@ static void pmem_release_queue(void *q) blk_cleanup_queue(q); } -void pmem_release_disk(void *disk) +static void pmem_release_disk(void *disk) { del_gendisk(disk); put_disk(disk); @@ -205,6 +214,7 @@ static int pmem_attach_disk(struct device *dev, struct nd_namespace_common *ndns) { struct nd_namespace_io *nsio = to_nd_namespace_io(&ndns->dev); + struct nd_region *nd_region = to_nd_region(dev->parent); struct vmem_altmap __altmap, *altmap = NULL; struct resource *res = &nsio->res; struct nd_pfn *nd_pfn = NULL; @@ -234,7 +244,7 @@ static int pmem_attach_disk(struct device *dev, dev_set_drvdata(dev, pmem); pmem->phys_addr = res->start; pmem->size = resource_size(res); - if (!arch_has_wmb_pmem()) + if (nvdimm_has_flush(nd_region) < 0) dev_warn(dev, "unable to guarantee persistence of writes\n"); if (!devm_request_mem_region(dev, res->start, resource_size(res), @@ -269,15 +279,14 @@ static int pmem_attach_disk(struct device *dev, * At release time the queue must be dead before * devm_memremap_pages is unwound */ - if (devm_add_action(dev, pmem_release_queue, q)) { - blk_cleanup_queue(q); + if (devm_add_action_or_reset(dev, pmem_release_queue, q)) return -ENOMEM; - } if (IS_ERR(addr)) return PTR_ERR(addr); - pmem->virt_addr = (void __pmem *) addr; + pmem->virt_addr = addr; + blk_queue_write_cache(q, true, true); blk_queue_make_request(q, pmem_make_request); blk_queue_physical_block_size(q, PAGE_SIZE); blk_queue_max_hw_sectors(q, UINT_MAX); @@ -289,10 +298,6 @@ static int pmem_attach_disk(struct device *dev, disk = alloc_disk_node(0, nid); if (!disk) return -ENOMEM; - if (devm_add_action(dev, pmem_release_disk, disk)) { - put_disk(disk); - return -ENOMEM; - } disk->fops = &pmem_fops; disk->queue = q; @@ -302,9 +307,13 @@ static int pmem_attach_disk(struct device *dev, / 512); if (devm_init_badblocks(dev, &pmem->bb)) return -ENOMEM; - nvdimm_badblocks_populate(to_nd_region(dev->parent), &pmem->bb, res); + nvdimm_badblocks_populate(nd_region, &pmem->bb, res); disk->bb = &pmem->bb; device_add_disk(dev, disk); + + if (devm_add_action_or_reset(dev, pmem_release_disk, disk)) + return -ENOMEM; + revalidate_disk(disk); return 0; @@ -340,13 +349,20 @@ static int nd_pmem_remove(struct device *dev) { if (is_nd_btt(dev)) nvdimm_namespace_detach_btt(to_nd_btt(dev)); + nvdimm_flush(to_nd_region(dev->parent)); + return 0; } +static void nd_pmem_shutdown(struct device *dev) +{ + nvdimm_flush(to_nd_region(dev->parent)); +} + static void nd_pmem_notify(struct device *dev, enum nvdimm_event event) { - struct nd_region *nd_region = to_nd_region(dev->parent); struct pmem_device *pmem = dev_get_drvdata(dev); + struct nd_region *nd_region = to_region(pmem); resource_size_t offset = 0, end_trunc = 0; struct nd_namespace_common *ndns; struct nd_namespace_io *nsio; @@ -382,6 +398,7 @@ static struct nd_device_driver nd_pmem_driver = { .probe = nd_pmem_probe, .remove = nd_pmem_remove, .notify = nd_pmem_notify, + .shutdown = nd_pmem_shutdown, .drv = { .name = "nd_pmem", }, |