diff options
Diffstat (limited to 'drivers/nvdimm/pmem.c')
-rw-r--r-- | drivers/nvdimm/pmem.c | 92 |
1 files changed, 71 insertions, 21 deletions
diff --git a/drivers/nvdimm/pmem.c b/drivers/nvdimm/pmem.c index c544d466ea51..f7099adaabc0 100644 --- a/drivers/nvdimm/pmem.c +++ b/drivers/nvdimm/pmem.c @@ -28,7 +28,7 @@ #include <linux/blk-mq.h> #include <linux/pfn_t.h> #include <linux/slab.h> -#include <linux/pmem.h> +#include <linux/uio.h> #include <linux/dax.h> #include <linux/nd.h> #include "pmem.h" @@ -49,28 +49,30 @@ static struct nd_region *to_region(struct pmem_device *pmem) return to_nd_region(to_dev(pmem)->parent); } -static int pmem_clear_poison(struct pmem_device *pmem, phys_addr_t offset, - unsigned int len) +static blk_status_t pmem_clear_poison(struct pmem_device *pmem, + phys_addr_t offset, unsigned int len) { struct device *dev = to_dev(pmem); sector_t sector; long cleared; - int rc = 0; + blk_status_t rc = BLK_STS_OK; sector = (offset - pmem->data_offset) / 512; cleared = nvdimm_clear_poison(dev, pmem->phys_addr + offset, len); if (cleared < len) - rc = -EIO; + rc = BLK_STS_IOERR; if (cleared > 0 && cleared / 512) { cleared /= 512; dev_dbg(dev, "%s: %#llx clear %ld sector%s\n", __func__, (unsigned long long) sector, cleared, cleared > 1 ? "s" : ""); badblocks_clear(&pmem->bb, sector, cleared); + if (pmem->bb_state) + sysfs_notify_dirent(pmem->bb_state); } - invalidate_pmem(pmem->virt_addr + offset, len); + arch_invalidate_pmem(pmem->virt_addr + offset, len); return rc; } @@ -80,11 +82,11 @@ static void write_pmem(void *pmem_addr, struct page *page, { void *mem = kmap_atomic(page); - memcpy_to_pmem(pmem_addr, mem + off, len); + memcpy_flushcache(pmem_addr, mem + off, len); kunmap_atomic(mem); } -static int read_pmem(struct page *page, unsigned int off, +static blk_status_t read_pmem(struct page *page, unsigned int off, void *pmem_addr, unsigned int len) { int rc; @@ -93,15 +95,15 @@ static int read_pmem(struct page *page, unsigned int off, rc = memcpy_mcsafe(mem + off, pmem_addr, len); kunmap_atomic(mem); if (rc) - return -EIO; - return 0; + return BLK_STS_IOERR; + return BLK_STS_OK; } -static int pmem_do_bvec(struct pmem_device *pmem, struct page *page, +static blk_status_t pmem_do_bvec(struct pmem_device *pmem, struct page *page, unsigned int len, unsigned int off, bool is_write, sector_t sector) { - int rc = 0; + blk_status_t rc = BLK_STS_OK; bool bad_pmem = false; phys_addr_t pmem_off = sector * 512 + pmem->data_offset; void *pmem_addr = pmem->virt_addr + pmem_off; @@ -111,7 +113,7 @@ static int pmem_do_bvec(struct pmem_device *pmem, struct page *page, if (!is_write) { if (unlikely(bad_pmem)) - rc = -EIO; + rc = BLK_STS_IOERR; else { rc = read_pmem(page, off, pmem_addr, len); flush_dcache_page(page); @@ -149,7 +151,7 @@ static int pmem_do_bvec(struct pmem_device *pmem, struct page *page, static blk_qc_t pmem_make_request(struct request_queue *q, struct bio *bio) { - int rc = 0; + blk_status_t rc = 0; bool do_acct; unsigned long start; struct bio_vec bvec; @@ -166,7 +168,7 @@ static blk_qc_t pmem_make_request(struct request_queue *q, struct bio *bio) bvec.bv_offset, op_is_write(bio_op(bio)), iter.bi_sector); if (rc) { - bio->bi_error = rc; + bio->bi_status = rc; break; } } @@ -184,7 +186,7 @@ static int pmem_rw_page(struct block_device *bdev, sector_t sector, struct page *page, bool is_write) { struct pmem_device *pmem = bdev->bd_queue->queuedata; - int rc; + blk_status_t rc; rc = pmem_do_bvec(pmem, page, PAGE_SIZE, 0, is_write, sector); @@ -197,7 +199,7 @@ static int pmem_rw_page(struct block_device *bdev, sector_t sector, if (rc == 0) page_endio(page, is_write, 0); - return rc; + return blk_status_to_errno(rc); } /* see "strong" declaration in tools/testing/nvdimm/pmem-dax.c */ @@ -235,8 +237,27 @@ static long pmem_dax_direct_access(struct dax_device *dax_dev, return __pmem_direct_access(pmem, pgoff, nr_pages, kaddr, pfn); } +static size_t pmem_copy_from_iter(struct dax_device *dax_dev, pgoff_t pgoff, + void *addr, size_t bytes, struct iov_iter *i) +{ + return copy_from_iter_flushcache(addr, bytes, i); +} + +static void pmem_dax_flush(struct dax_device *dax_dev, pgoff_t pgoff, + void *addr, size_t size) +{ + arch_wb_cache_pmem(addr, size); +} + static const struct dax_operations pmem_dax_ops = { .direct_access = pmem_dax_direct_access, + .copy_from_iter = pmem_copy_from_iter, + .flush = pmem_dax_flush, +}; + +static const struct attribute_group *pmem_attribute_groups[] = { + &dax_attribute_group, + NULL, }; static void pmem_release_queue(void *q) @@ -265,14 +286,15 @@ static int pmem_attach_disk(struct device *dev, struct nd_namespace_io *nsio = to_nd_namespace_io(&ndns->dev); struct nd_region *nd_region = to_nd_region(dev->parent); struct vmem_altmap __altmap, *altmap = NULL; + int nid = dev_to_node(dev), fua, wbc; struct resource *res = &nsio->res; struct nd_pfn *nd_pfn = NULL; struct dax_device *dax_dev; - int nid = dev_to_node(dev); struct nd_pfn_sb *pfn_sb; struct pmem_device *pmem; struct resource pfn_res; struct request_queue *q; + struct device *gendev; struct gendisk *disk; void *addr; @@ -294,8 +316,12 @@ static int pmem_attach_disk(struct device *dev, dev_set_drvdata(dev, pmem); pmem->phys_addr = res->start; pmem->size = resource_size(res); - if (nvdimm_has_flush(nd_region) < 0) + fua = nvdimm_has_flush(nd_region); + if (!IS_ENABLED(CONFIG_ARCH_HAS_UACCESS_FLUSHCACHE) || fua < 0) { dev_warn(dev, "unable to guarantee persistence of writes\n"); + fua = 0; + } + wbc = nvdimm_has_cache(nd_region); if (!devm_request_mem_region(dev, res->start, resource_size(res), dev_name(&ndns->dev))) { @@ -339,11 +365,11 @@ static int pmem_attach_disk(struct device *dev, return PTR_ERR(addr); pmem->virt_addr = addr; - blk_queue_write_cache(q, true, true); + blk_queue_write_cache(q, wbc, fua); blk_queue_make_request(q, pmem_make_request); blk_queue_physical_block_size(q, PAGE_SIZE); + blk_queue_logical_block_size(q, pmem_sector_size(ndns)); blk_queue_max_hw_sectors(q, UINT_MAX); - blk_queue_bounce_limit(q, BLK_BOUNCE_ANY); queue_flag_set_unlocked(QUEUE_FLAG_NONROT, q); queue_flag_set_unlocked(QUEUE_FLAG_DAX, q); q->queuedata = pmem; @@ -369,14 +395,23 @@ static int pmem_attach_disk(struct device *dev, put_disk(disk); return -ENOMEM; } + dax_write_cache(dax_dev, wbc); pmem->dax_dev = dax_dev; + gendev = disk_to_dev(disk); + gendev->groups = pmem_attribute_groups; + device_add_disk(dev, disk); if (devm_add_action_or_reset(dev, pmem_release_disk, pmem)) return -ENOMEM; revalidate_disk(disk); + pmem->bb_state = sysfs_get_dirent(disk_to_dev(disk)->kobj.sd, + "badblocks"); + if (!pmem->bb_state) + dev_warn(dev, "'badblocks' notification disabled\n"); + return 0; } @@ -408,8 +443,18 @@ static int nd_pmem_probe(struct device *dev) static int nd_pmem_remove(struct device *dev) { + struct pmem_device *pmem = dev_get_drvdata(dev); + if (is_nd_btt(dev)) nvdimm_namespace_detach_btt(to_nd_btt(dev)); + else { + /* + * Note, this assumes device_lock() context to not race + * nd_pmem_notify() + */ + sysfs_put(pmem->bb_state); + pmem->bb_state = NULL; + } nvdimm_flush(to_nd_region(dev->parent)); return 0; @@ -428,6 +473,7 @@ static void nd_pmem_notify(struct device *dev, enum nvdimm_event event) struct nd_namespace_io *nsio; struct resource res; struct badblocks *bb; + struct kernfs_node *bb_state; if (event != NVDIMM_REVALIDATE_POISON) return; @@ -439,11 +485,13 @@ static void nd_pmem_notify(struct device *dev, enum nvdimm_event event) nd_region = to_nd_region(ndns->dev.parent); nsio = to_nd_namespace_io(&ndns->dev); bb = &nsio->bb; + bb_state = NULL; } else { struct pmem_device *pmem = dev_get_drvdata(dev); nd_region = to_region(pmem); bb = &pmem->bb; + bb_state = pmem->bb_state; if (is_nd_pfn(dev)) { struct nd_pfn *nd_pfn = to_nd_pfn(dev); @@ -463,6 +511,8 @@ static void nd_pmem_notify(struct device *dev, enum nvdimm_event event) res.start = nsio->res.start + offset; res.end = nsio->res.end - end_trunc; nvdimm_badblocks_populate(nd_region, bb, &res); + if (bb_state) + sysfs_notify_dirent(bb_state); } MODULE_ALIAS("pmem"); |