From 6b47496a6fc81816e7edaf8224dfb88e402a05f5 Mon Sep 17 00:00:00 2001 From: Vishal Verma Date: Thu, 23 Jul 2015 11:58:48 -0600 Subject: libnvdimm, pmem: Change pmem physical sector size to PAGE_SIZE Based on a patch: c8fa317 brd: Request from fdisk 4k alignment by Boaz Harrosh, allow fdisk to create properly aligned partitions for DAX. This will also cause mkfs.ext4 to emit a warning if using a file system block size of less than PAGE_SIZE. Cc: Dan Williams Cc: Ross Zwisler Cc: Matthew Wilcox Cc: Christoph Hellwig Cc: Elliott, Robert Signed-off-by: Vishal Verma Acked-by: Boaz Harrosh Acked-by: Ross Zwisler Signed-off-by: Dan Williams --- drivers/nvdimm/pmem.c | 1 + 1 file changed, 1 insertion(+) (limited to 'drivers/nvdimm/pmem.c') diff --git a/drivers/nvdimm/pmem.c b/drivers/nvdimm/pmem.c index ade9eb917a4d..bcf48f133443 100644 --- a/drivers/nvdimm/pmem.c +++ b/drivers/nvdimm/pmem.c @@ -162,6 +162,7 @@ static int pmem_attach_disk(struct nd_namespace_common *ndns, return -ENOMEM; blk_queue_make_request(pmem->pmem_queue, pmem_make_request); + blk_queue_physical_block_size(pmem->pmem_queue, PAGE_SIZE); blk_queue_max_hw_sectors(pmem->pmem_queue, UINT_MAX); blk_queue_bounce_limit(pmem->pmem_queue, BLK_BOUNCE_ANY); queue_flag_set_unlocked(QUEUE_FLAG_NONROT, pmem->pmem_queue); -- cgit v1.2.3 From 708ab62bef1ed3a3cf065a4138bd87f5d083cfeb Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Mon, 10 Aug 2015 23:07:08 -0400 Subject: pmem: switch to devm_ allocations Signed-off-by: Christoph Hellwig [djbw: tools/testing/nvdimm/ and memunmap_pmem support] Reviewed-by: Ross Zwisler Signed-off-by: Dan Williams --- drivers/nvdimm/pmem.c | 36 ++++++++++-------------------- include/linux/pmem.h | 14 +++++++----- tools/testing/nvdimm/Kbuild | 4 ++-- tools/testing/nvdimm/test/iomap.c | 46 ++++++++++++++++++++++----------------- 4 files changed, 47 insertions(+), 53 deletions(-) (limited to 'drivers/nvdimm/pmem.c') diff --git a/drivers/nvdimm/pmem.c b/drivers/nvdimm/pmem.c index bcf48f133443..eb7552d939e1 100644 --- a/drivers/nvdimm/pmem.c +++ b/drivers/nvdimm/pmem.c @@ -119,7 +119,7 @@ static struct pmem_device *pmem_alloc(struct device *dev, { struct pmem_device *pmem; - pmem = kzalloc(sizeof(*pmem), GFP_KERNEL); + pmem = devm_kzalloc(dev, sizeof(*pmem), GFP_KERNEL); if (!pmem) return ERR_PTR(-ENOMEM); @@ -128,19 +128,16 @@ static struct pmem_device *pmem_alloc(struct device *dev, if (!arch_has_pmem_api()) dev_warn(dev, "unable to guarantee persistence of writes\n"); - if (!request_mem_region(pmem->phys_addr, pmem->size, dev_name(dev))) { + if (!devm_request_mem_region(dev, pmem->phys_addr, pmem->size, + dev_name(dev))) { dev_warn(dev, "could not reserve region [0x%pa:0x%zx]\n", &pmem->phys_addr, pmem->size); - kfree(pmem); return ERR_PTR(-EBUSY); } - pmem->virt_addr = memremap_pmem(pmem->phys_addr, pmem->size); - if (!pmem->virt_addr) { - release_mem_region(pmem->phys_addr, pmem->size); - kfree(pmem); + pmem->virt_addr = memremap_pmem(dev, pmem->phys_addr, pmem->size); + if (!pmem->virt_addr) return ERR_PTR(-ENXIO); - } return pmem; } @@ -210,20 +207,12 @@ static int pmem_rw_bytes(struct nd_namespace_common *ndns, return 0; } -static void pmem_free(struct pmem_device *pmem) -{ - memunmap_pmem(pmem->virt_addr); - release_mem_region(pmem->phys_addr, pmem->size); - kfree(pmem); -} - static int nd_pmem_probe(struct device *dev) { struct nd_region *nd_region = to_nd_region(dev->parent); struct nd_namespace_common *ndns; struct nd_namespace_io *nsio; struct pmem_device *pmem; - int rc; ndns = nvdimm_namespace_common_probe(dev); if (IS_ERR(ndns)) @@ -236,16 +225,14 @@ static int nd_pmem_probe(struct device *dev) dev_set_drvdata(dev, pmem); ndns->rw_bytes = pmem_rw_bytes; + if (is_nd_btt(dev)) - rc = nvdimm_namespace_attach_btt(ndns); - else if (nd_btt_probe(ndns, pmem) == 0) { + return nvdimm_namespace_attach_btt(ndns); + + if (nd_btt_probe(ndns, pmem) == 0) /* we'll come back as btt-pmem */ - rc = -ENXIO; - } else - rc = pmem_attach_disk(ndns, pmem); - if (rc) - pmem_free(pmem); - return rc; + return -ENXIO; + return pmem_attach_disk(ndns, pmem); } static int nd_pmem_remove(struct device *dev) @@ -256,7 +243,6 @@ static int nd_pmem_remove(struct device *dev) nvdimm_namespace_detach_btt(to_nd_btt(dev)->ndns); else pmem_detach_disk(pmem); - pmem_free(pmem); return 0; } diff --git a/include/linux/pmem.h b/include/linux/pmem.h index 093c35ecefcc..20c367cd76e6 100644 --- a/include/linux/pmem.h +++ b/include/linux/pmem.h @@ -46,9 +46,9 @@ static inline void memcpy_from_pmem(void *dst, void __pmem const *src, size_t si memcpy(dst, (void __force const *) src, size); } -static inline void memunmap_pmem(void __pmem *addr) +static inline void memunmap_pmem(struct device *dev, void __pmem *addr) { - memunmap((void __force *) addr); + devm_memunmap(dev, (void __force *) addr); } /** @@ -97,13 +97,15 @@ static inline void default_memcpy_to_pmem(void __pmem *dst, const void *src, * wmb_pmem() arrange for the data to be written through the * cache to persistent media. */ -static inline void __pmem *memremap_pmem(resource_size_t offset, - unsigned long size) +static inline void __pmem *memremap_pmem(struct device *dev, + resource_size_t offset, unsigned long size) { #ifdef ARCH_MEMREMAP_PMEM - return (void __pmem *) memremap(offset, size, ARCH_MEMREMAP_PMEM); + return (void __pmem *) devm_memremap(dev, offset, size, + ARCH_MEMREMAP_PMEM); #else - return (void __pmem *) memremap(offset, size, MEMREMAP_WT); + return (void __pmem *) devm_memremap(dev, offset, size, + MEMREMAP_WT); #endif } diff --git a/tools/testing/nvdimm/Kbuild b/tools/testing/nvdimm/Kbuild index 8032a49f7873..04c5fc09576d 100644 --- a/tools/testing/nvdimm/Kbuild +++ b/tools/testing/nvdimm/Kbuild @@ -1,9 +1,9 @@ ldflags-y += --wrap=ioremap_wc ldflags-y += --wrap=devm_ioremap_nocache -ldflags-y += --wrap=memremap -ldflags-y += --wrap=memunmap +ldflags-y += --wrap=devm_memremap ldflags-y += --wrap=ioremap_nocache ldflags-y += --wrap=iounmap +ldflags-y += --wrap=__devm_request_region ldflags-y += --wrap=__request_region ldflags-y += --wrap=__release_region diff --git a/tools/testing/nvdimm/test/iomap.c b/tools/testing/nvdimm/test/iomap.c index 21288f34a5ca..ff1e00458864 100644 --- a/tools/testing/nvdimm/test/iomap.c +++ b/tools/testing/nvdimm/test/iomap.c @@ -80,8 +80,8 @@ void __iomem *__wrap_devm_ioremap_nocache(struct device *dev, } EXPORT_SYMBOL(__wrap_devm_ioremap_nocache); -void *__wrap_memremap(resource_size_t offset, size_t size, - unsigned long flags) +void *__wrap_devm_memremap(struct device *dev, resource_size_t offset, + size_t size, unsigned long flags) { struct nfit_test_resource *nfit_res; @@ -91,9 +91,9 @@ void *__wrap_memremap(resource_size_t offset, size_t size, if (nfit_res) return (void __iomem *) nfit_res->buf + offset - nfit_res->res->start; - return memremap(offset, size, flags); + return devm_memremap(dev, offset, size, flags); } -EXPORT_SYMBOL(__wrap_memremap); +EXPORT_SYMBOL(__wrap_devm_memremap); void __iomem *__wrap_ioremap_nocache(resource_size_t offset, unsigned long size) { @@ -120,22 +120,9 @@ void __wrap_iounmap(volatile void __iomem *addr) } EXPORT_SYMBOL(__wrap_iounmap); -void __wrap_memunmap(void *addr) -{ - struct nfit_test_resource *nfit_res; - - rcu_read_lock(); - nfit_res = get_nfit_res((unsigned long) addr); - rcu_read_unlock(); - if (nfit_res) - return; - return memunmap(addr); -} -EXPORT_SYMBOL(__wrap_memunmap); - -struct resource *__wrap___request_region(struct resource *parent, - resource_size_t start, resource_size_t n, const char *name, - int flags) +static struct resource *nfit_test_request_region(struct device *dev, + struct resource *parent, resource_size_t start, + resource_size_t n, const char *name, int flags) { struct nfit_test_resource *nfit_res; @@ -163,10 +150,29 @@ struct resource *__wrap___request_region(struct resource *parent, return res; } } + if (dev) + return __devm_request_region(dev, parent, start, n, name); return __request_region(parent, start, n, name, flags); } + +struct resource *__wrap___request_region(struct resource *parent, + resource_size_t start, resource_size_t n, const char *name, + int flags) +{ + return nfit_test_request_region(NULL, parent, start, n, name, flags); +} EXPORT_SYMBOL(__wrap___request_region); +struct resource *__wrap___devm_request_region(struct device *dev, + struct resource *parent, resource_size_t start, + resource_size_t n, const char *name) +{ + if (!dev) + return NULL; + return nfit_test_request_region(dev, parent, start, n, name, 0); +} +EXPORT_SYMBOL(__wrap___devm_request_region); + void __wrap___release_region(struct resource *parent, resource_size_t start, resource_size_t n) { -- cgit v1.2.3 From e2e05394e4a3420dab96f728df4531893494e15d Mon Sep 17 00:00:00 2001 From: Ross Zwisler Date: Tue, 18 Aug 2015 13:55:41 -0600 Subject: pmem, dax: have direct_access use __pmem annotation Update the annotation for the kaddr pointer returned by direct_access() so that it is a __pmem pointer. This is consistent with the PMEM driver and with how this direct_access() pointer is used in the DAX code. Signed-off-by: Ross Zwisler Reviewed-by: Christoph Hellwig Signed-off-by: Dan Williams --- Documentation/filesystems/Locking | 3 ++- arch/powerpc/sysdev/axonram.c | 7 ++++--- drivers/block/brd.c | 4 ++-- drivers/nvdimm/pmem.c | 4 ++-- drivers/s390/block/dcssblk.c | 10 ++++++---- fs/block_dev.c | 2 +- fs/dax.c | 37 ++++++++++++++++++++----------------- include/linux/blkdev.h | 8 ++++---- 8 files changed, 41 insertions(+), 34 deletions(-) (limited to 'drivers/nvdimm/pmem.c') diff --git a/Documentation/filesystems/Locking b/Documentation/filesystems/Locking index 6a34a0f4d37c..06d443450f21 100644 --- a/Documentation/filesystems/Locking +++ b/Documentation/filesystems/Locking @@ -397,7 +397,8 @@ prototypes: int (*release) (struct gendisk *, fmode_t); int (*ioctl) (struct block_device *, fmode_t, unsigned, unsigned long); int (*compat_ioctl) (struct block_device *, fmode_t, unsigned, unsigned long); - int (*direct_access) (struct block_device *, sector_t, void **, unsigned long *); + int (*direct_access) (struct block_device *, sector_t, void __pmem **, + unsigned long *); int (*media_changed) (struct gendisk *); void (*unlock_native_capacity) (struct gendisk *); int (*revalidate_disk) (struct gendisk *); diff --git a/arch/powerpc/sysdev/axonram.c b/arch/powerpc/sysdev/axonram.c index ee90db17b097..a2be2a66dab6 100644 --- a/arch/powerpc/sysdev/axonram.c +++ b/arch/powerpc/sysdev/axonram.c @@ -141,13 +141,14 @@ axon_ram_make_request(struct request_queue *queue, struct bio *bio) */ static long axon_ram_direct_access(struct block_device *device, sector_t sector, - void **kaddr, unsigned long *pfn, long size) + void __pmem **kaddr, unsigned long *pfn, long size) { struct axon_ram_bank *bank = device->bd_disk->private_data; loff_t offset = (loff_t)sector << AXON_RAM_SECTOR_SHIFT; + void *addr = (void *)(bank->ph_addr + offset); - *kaddr = (void *)(bank->ph_addr + offset); - *pfn = virt_to_phys(*kaddr) >> PAGE_SHIFT; + *kaddr = (void __pmem *)addr; + *pfn = virt_to_phys(addr) >> PAGE_SHIFT; return bank->size - offset; } diff --git a/drivers/block/brd.c b/drivers/block/brd.c index 64ab4951e9d6..c96402fd1560 100644 --- a/drivers/block/brd.c +++ b/drivers/block/brd.c @@ -371,7 +371,7 @@ static int brd_rw_page(struct block_device *bdev, sector_t sector, #ifdef CONFIG_BLK_DEV_RAM_DAX static long brd_direct_access(struct block_device *bdev, sector_t sector, - void **kaddr, unsigned long *pfn, long size) + void __pmem **kaddr, unsigned long *pfn, long size) { struct brd_device *brd = bdev->bd_disk->private_data; struct page *page; @@ -381,7 +381,7 @@ static long brd_direct_access(struct block_device *bdev, sector_t sector, page = brd_insert_page(brd, sector); if (!page) return -ENOSPC; - *kaddr = page_address(page); + *kaddr = (void __pmem *)page_address(page); *pfn = page_to_pfn(page); /* diff --git a/drivers/nvdimm/pmem.c b/drivers/nvdimm/pmem.c index eb7552d939e1..f3b629779266 100644 --- a/drivers/nvdimm/pmem.c +++ b/drivers/nvdimm/pmem.c @@ -92,7 +92,7 @@ static int pmem_rw_page(struct block_device *bdev, sector_t sector, } static long pmem_direct_access(struct block_device *bdev, sector_t sector, - void **kaddr, unsigned long *pfn, long size) + void __pmem **kaddr, unsigned long *pfn, long size) { struct pmem_device *pmem = bdev->bd_disk->private_data; size_t offset = sector << 9; @@ -101,7 +101,7 @@ static long pmem_direct_access(struct block_device *bdev, sector_t sector, return -ENODEV; /* FIXME convert DAX to comprehend that this mapping has a lifetime */ - *kaddr = (void __force *) pmem->virt_addr + offset; + *kaddr = pmem->virt_addr + offset; *pfn = (pmem->phys_addr + offset) >> PAGE_SHIFT; return pmem->size - offset; diff --git a/drivers/s390/block/dcssblk.c b/drivers/s390/block/dcssblk.c index da212813f2d5..2c5a397b9f3e 100644 --- a/drivers/s390/block/dcssblk.c +++ b/drivers/s390/block/dcssblk.c @@ -29,7 +29,7 @@ static int dcssblk_open(struct block_device *bdev, fmode_t mode); static void dcssblk_release(struct gendisk *disk, fmode_t mode); static void dcssblk_make_request(struct request_queue *q, struct bio *bio); static long dcssblk_direct_access(struct block_device *bdev, sector_t secnum, - void **kaddr, unsigned long *pfn, long size); + void __pmem **kaddr, unsigned long *pfn, long size); static char dcssblk_segments[DCSSBLK_PARM_LEN] = "\0"; @@ -879,18 +879,20 @@ fail: static long dcssblk_direct_access (struct block_device *bdev, sector_t secnum, - void **kaddr, unsigned long *pfn, long size) + void __pmem **kaddr, unsigned long *pfn, long size) { struct dcssblk_dev_info *dev_info; unsigned long offset, dev_sz; + void *addr; dev_info = bdev->bd_disk->private_data; if (!dev_info) return -ENODEV; dev_sz = dev_info->end - dev_info->start; offset = secnum * 512; - *kaddr = (void *) (dev_info->start + offset); - *pfn = virt_to_phys(*kaddr) >> PAGE_SHIFT; + addr = (void *) (dev_info->start + offset); + *pfn = virt_to_phys(addr) >> PAGE_SHIFT; + *kaddr = (void __pmem *) addr; return dev_sz - offset; } diff --git a/fs/block_dev.c b/fs/block_dev.c index 198243717da5..2345a9870e2c 100644 --- a/fs/block_dev.c +++ b/fs/block_dev.c @@ -441,7 +441,7 @@ EXPORT_SYMBOL_GPL(bdev_write_page); * accessible at this address. */ long bdev_direct_access(struct block_device *bdev, sector_t sector, - void **addr, unsigned long *pfn, long size) + void __pmem **addr, unsigned long *pfn, long size) { long avail; const struct block_device_operations *ops = bdev->bd_disk->fops; diff --git a/fs/dax.c b/fs/dax.c index e07fecc93f80..7c634ac797b1 100644 --- a/fs/dax.c +++ b/fs/dax.c @@ -35,7 +35,7 @@ int dax_clear_blocks(struct inode *inode, sector_t block, long size) might_sleep(); do { - void *addr; + void __pmem *addr; unsigned long pfn; long count; @@ -47,7 +47,7 @@ int dax_clear_blocks(struct inode *inode, sector_t block, long size) unsigned pgsz = PAGE_SIZE - offset_in_page(addr); if (pgsz > count) pgsz = count; - clear_pmem((void __pmem *)addr, pgsz); + clear_pmem(addr, pgsz); addr += pgsz; size -= pgsz; count -= pgsz; @@ -62,7 +62,8 @@ int dax_clear_blocks(struct inode *inode, sector_t block, long size) } EXPORT_SYMBOL_GPL(dax_clear_blocks); -static long dax_get_addr(struct buffer_head *bh, void **addr, unsigned blkbits) +static long dax_get_addr(struct buffer_head *bh, void __pmem **addr, + unsigned blkbits) { unsigned long pfn; sector_t sector = bh->b_blocknr << (blkbits - 9); @@ -70,15 +71,15 @@ static long dax_get_addr(struct buffer_head *bh, void **addr, unsigned blkbits) } /* the clear_pmem() calls are ordered by a wmb_pmem() in the caller */ -static void dax_new_buf(void *addr, unsigned size, unsigned first, loff_t pos, - loff_t end) +static void dax_new_buf(void __pmem *addr, unsigned size, unsigned first, + loff_t pos, loff_t end) { loff_t final = end - pos + first; /* The final byte of the buffer */ if (first > 0) - clear_pmem((void __pmem *)addr, first); + clear_pmem(addr, first); if (final < size) - clear_pmem((void __pmem *)addr + final, size - final); + clear_pmem(addr + final, size - final); } static bool buffer_written(struct buffer_head *bh) @@ -106,7 +107,7 @@ static ssize_t dax_io(struct inode *inode, struct iov_iter *iter, loff_t pos = start; loff_t max = start; loff_t bh_max = start; - void *addr; + void __pmem *addr; bool hole = false; bool need_wmb = false; @@ -158,11 +159,11 @@ static ssize_t dax_io(struct inode *inode, struct iov_iter *iter, } if (iov_iter_rw(iter) == WRITE) { - len = copy_from_iter_pmem((void __pmem *)addr, - max - pos, iter); + len = copy_from_iter_pmem(addr, max - pos, iter); need_wmb = true; } else if (!hole) - len = copy_to_iter(addr, max - pos, iter); + len = copy_to_iter((void __force *)addr, max - pos, + iter); else len = iov_iter_zero(max - pos, iter); @@ -268,11 +269,13 @@ static int dax_load_hole(struct address_space *mapping, struct page *page, static int copy_user_bh(struct page *to, struct buffer_head *bh, unsigned blkbits, unsigned long vaddr) { - void *vfrom, *vto; + void __pmem *vfrom; + void *vto; + if (dax_get_addr(bh, &vfrom, blkbits) < 0) return -EIO; vto = kmap_atomic(to); - copy_user_page(vto, vfrom, vaddr, to); + copy_user_page(vto, (void __force *)vfrom, vaddr, to); kunmap_atomic(vto); return 0; } @@ -283,7 +286,7 @@ static int dax_insert_mapping(struct inode *inode, struct buffer_head *bh, struct address_space *mapping = inode->i_mapping; sector_t sector = bh->b_blocknr << (inode->i_blkbits - 9); unsigned long vaddr = (unsigned long)vmf->virtual_address; - void *addr; + void __pmem *addr; unsigned long pfn; pgoff_t size; int error; @@ -312,7 +315,7 @@ static int dax_insert_mapping(struct inode *inode, struct buffer_head *bh, } if (buffer_unwritten(bh) || buffer_new(bh)) { - clear_pmem((void __pmem *)addr, PAGE_SIZE); + clear_pmem(addr, PAGE_SIZE); wmb_pmem(); } @@ -548,11 +551,11 @@ int dax_zero_page_range(struct inode *inode, loff_t from, unsigned length, if (err < 0) return err; if (buffer_written(&bh)) { - void *addr; + void __pmem *addr; err = dax_get_addr(&bh, &addr, inode->i_blkbits); if (err < 0) return err; - clear_pmem((void __pmem *)addr + offset, length); + clear_pmem(addr + offset, length); wmb_pmem(); } diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index d4068c17d0df..c401ecdff9cb 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -1555,8 +1555,8 @@ struct block_device_operations { int (*rw_page)(struct block_device *, sector_t, struct page *, int rw); int (*ioctl) (struct block_device *, fmode_t, unsigned, unsigned long); int (*compat_ioctl) (struct block_device *, fmode_t, unsigned, unsigned long); - long (*direct_access)(struct block_device *, sector_t, - void **, unsigned long *pfn, long size); + long (*direct_access)(struct block_device *, sector_t, void __pmem **, + unsigned long *pfn, long size); unsigned int (*check_events) (struct gendisk *disk, unsigned int clearing); /* ->media_changed() is DEPRECATED, use ->check_events() instead */ @@ -1574,8 +1574,8 @@ extern int __blkdev_driver_ioctl(struct block_device *, fmode_t, unsigned int, extern int bdev_read_page(struct block_device *, sector_t, struct page *); extern int bdev_write_page(struct block_device *, sector_t, struct page *, struct writeback_control *); -extern long bdev_direct_access(struct block_device *, sector_t, void **addr, - unsigned long *pfn, long size); +extern long bdev_direct_access(struct block_device *, sector_t, + void __pmem **addr, unsigned long *pfn, long size); #else /* CONFIG_BLOCK */ struct block_device; -- cgit v1.2.3 From cb389b9c0e00c30c9daf20287f7d91e2466edbb1 Mon Sep 17 00:00:00 2001 From: Dan Williams Date: Fri, 7 Aug 2015 17:41:00 -0400 Subject: dax: drop size parameter to ->direct_access() None of the implementations currently use it. The common bdev_direct_access() entry point handles all the size checks before calling ->direct_access(). Signed-off-by: Christoph Hellwig Signed-off-by: Dan Williams --- arch/powerpc/sysdev/axonram.c | 2 +- drivers/block/brd.c | 6 +----- drivers/nvdimm/pmem.c | 2 +- drivers/s390/block/dcssblk.c | 4 ++-- fs/block_dev.c | 2 +- include/linux/blkdev.h | 2 +- 6 files changed, 7 insertions(+), 11 deletions(-) (limited to 'drivers/nvdimm/pmem.c') diff --git a/arch/powerpc/sysdev/axonram.c b/arch/powerpc/sysdev/axonram.c index a2be2a66dab6..4419c84ac15a 100644 --- a/arch/powerpc/sysdev/axonram.c +++ b/arch/powerpc/sysdev/axonram.c @@ -141,7 +141,7 @@ axon_ram_make_request(struct request_queue *queue, struct bio *bio) */ static long axon_ram_direct_access(struct block_device *device, sector_t sector, - void __pmem **kaddr, unsigned long *pfn, long size) + void __pmem **kaddr, unsigned long *pfn) { struct axon_ram_bank *bank = device->bd_disk->private_data; loff_t offset = (loff_t)sector << AXON_RAM_SECTOR_SHIFT; diff --git a/drivers/block/brd.c b/drivers/block/brd.c index c96402fd1560..03c45c41bdfa 100644 --- a/drivers/block/brd.c +++ b/drivers/block/brd.c @@ -371,7 +371,7 @@ static int brd_rw_page(struct block_device *bdev, sector_t sector, #ifdef CONFIG_BLK_DEV_RAM_DAX static long brd_direct_access(struct block_device *bdev, sector_t sector, - void __pmem **kaddr, unsigned long *pfn, long size) + void __pmem **kaddr, unsigned long *pfn) { struct brd_device *brd = bdev->bd_disk->private_data; struct page *page; @@ -384,10 +384,6 @@ static long brd_direct_access(struct block_device *bdev, sector_t sector, *kaddr = (void __pmem *)page_address(page); *pfn = page_to_pfn(page); - /* - * TODO: If size > PAGE_SIZE, we could look to see if the next page in - * the file happens to be mapped to the next page of physical RAM. - */ return PAGE_SIZE; } #else diff --git a/drivers/nvdimm/pmem.c b/drivers/nvdimm/pmem.c index f3b629779266..3b5b9cb758b6 100644 --- a/drivers/nvdimm/pmem.c +++ b/drivers/nvdimm/pmem.c @@ -92,7 +92,7 @@ static int pmem_rw_page(struct block_device *bdev, sector_t sector, } static long pmem_direct_access(struct block_device *bdev, sector_t sector, - void __pmem **kaddr, unsigned long *pfn, long size) + void __pmem **kaddr, unsigned long *pfn) { struct pmem_device *pmem = bdev->bd_disk->private_data; size_t offset = sector << 9; diff --git a/drivers/s390/block/dcssblk.c b/drivers/s390/block/dcssblk.c index 2c5a397b9f3e..8c027a9e4e8a 100644 --- a/drivers/s390/block/dcssblk.c +++ b/drivers/s390/block/dcssblk.c @@ -29,7 +29,7 @@ static int dcssblk_open(struct block_device *bdev, fmode_t mode); static void dcssblk_release(struct gendisk *disk, fmode_t mode); static void dcssblk_make_request(struct request_queue *q, struct bio *bio); static long dcssblk_direct_access(struct block_device *bdev, sector_t secnum, - void __pmem **kaddr, unsigned long *pfn, long size); + void __pmem **kaddr, unsigned long *pfn); static char dcssblk_segments[DCSSBLK_PARM_LEN] = "\0"; @@ -879,7 +879,7 @@ fail: static long dcssblk_direct_access (struct block_device *bdev, sector_t secnum, - void __pmem **kaddr, unsigned long *pfn, long size) + void __pmem **kaddr, unsigned long *pfn) { struct dcssblk_dev_info *dev_info; unsigned long offset, dev_sz; diff --git a/fs/block_dev.c b/fs/block_dev.c index 2345a9870e2c..3831e5691b32 100644 --- a/fs/block_dev.c +++ b/fs/block_dev.c @@ -462,7 +462,7 @@ long bdev_direct_access(struct block_device *bdev, sector_t sector, sector += get_start_sect(bdev); if (sector % (PAGE_SIZE / 512)) return -EINVAL; - avail = ops->direct_access(bdev, sector, addr, pfn, size); + avail = ops->direct_access(bdev, sector, addr, pfn); if (!avail) return -ERANGE; return min(avail, size); diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index c401ecdff9cb..c22064f326b2 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -1556,7 +1556,7 @@ struct block_device_operations { int (*ioctl) (struct block_device *, fmode_t, unsigned, unsigned long); int (*compat_ioctl) (struct block_device *, fmode_t, unsigned, unsigned long); long (*direct_access)(struct block_device *, sector_t, void __pmem **, - unsigned long *pfn, long size); + unsigned long *pfn); unsigned int (*check_events) (struct gendisk *disk, unsigned int clearing); /* ->media_changed() is DEPRECATED, use ->check_events() instead */ -- cgit v1.2.3 From 96601adb745186ccbcf5b078d4756f13381ec2af Mon Sep 17 00:00:00 2001 From: Dan Williams Date: Mon, 24 Aug 2015 18:29:38 -0400 Subject: x86, pmem: clarify that ARCH_HAS_PMEM_API implies PMEM mapped WB Given that a write-back (WB) mapping plus non-temporal stores is expected to be the most efficient way to access PMEM, update the definition of ARCH_HAS_PMEM_API to imply arch support for WB-mapped-PMEM. This is needed as a pre-requisite for adding PMEM to the direct map and mapping it with struct page. The above clarification for X86_64 means that memcpy_to_pmem() is permitted to use the non-temporal arch_memcpy_to_pmem() rather than needlessly fall back to default_memcpy_to_pmem() when the pcommit instruction is not available. When arch_memcpy_to_pmem() is not guaranteed to flush writes out of cache, i.e. on older X86_32 implementations where non-temporal stores may just dirty cache, ARCH_HAS_PMEM_API is simply disabled. The default fall back for persistent memory handling remains. Namely, map it with the WT (write-through) cache-type and hope for the best. arch_has_pmem_api() is updated to only indicate whether the arch provides the proper helpers to meet the minimum "writes are visible outside the cache hierarchy after memcpy_to_pmem() + wmb_pmem()". Code that cares whether wmb_pmem() actually flushes writes to pmem must now call arch_has_wmb_pmem() directly. Cc: Thomas Gleixner Cc: Ingo Molnar Cc: "H. Peter Anvin" Reviewed-by: Ross Zwisler [hch: set ARCH_HAS_PMEM_API=n on x86_32] Reviewed-by: Christoph Hellwig [toshi: x86_32 compile fixes] Signed-off-by: Toshi Kani Signed-off-by: Dan Williams --- arch/x86/Kconfig | 2 +- arch/x86/include/asm/pmem.h | 9 +-------- drivers/acpi/nfit.c | 3 ++- drivers/nvdimm/pmem.c | 2 +- include/linux/pmem.h | 36 ++++++++++++++++++++++-------------- 5 files changed, 27 insertions(+), 25 deletions(-) (limited to 'drivers/nvdimm/pmem.c') diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig index 03ab6122325a..ef4c6bbb3af1 100644 --- a/arch/x86/Kconfig +++ b/arch/x86/Kconfig @@ -27,7 +27,7 @@ config X86 select ARCH_HAS_ELF_RANDOMIZE select ARCH_HAS_FAST_MULTIPLIER select ARCH_HAS_GCOV_PROFILE_ALL - select ARCH_HAS_PMEM_API + select ARCH_HAS_PMEM_API if X86_64 select ARCH_HAS_MMIO_FLUSH select ARCH_HAS_SG_CHAIN select ARCH_HAVE_NMI_SAFE_CMPXCHG diff --git a/arch/x86/include/asm/pmem.h b/arch/x86/include/asm/pmem.h index bb026c5adf8a..d8ce3ec816ab 100644 --- a/arch/x86/include/asm/pmem.h +++ b/arch/x86/include/asm/pmem.h @@ -18,8 +18,6 @@ #include #include -#define ARCH_MEMREMAP_PMEM MEMREMAP_WB - #ifdef CONFIG_ARCH_HAS_PMEM_API /** * arch_memcpy_to_pmem - copy data to persistent memory @@ -143,18 +141,13 @@ static inline void arch_clear_pmem(void __pmem *addr, size_t size) __arch_wb_cache_pmem(vaddr, size); } -static inline bool arch_has_wmb_pmem(void) +static inline bool __arch_has_wmb_pmem(void) { -#ifdef CONFIG_X86_64 /* * We require that wmb() be an 'sfence', that is only guaranteed on * 64-bit builds */ return static_cpu_has(X86_FEATURE_PCOMMIT); -#else - return false; -#endif } #endif /* CONFIG_ARCH_HAS_PMEM_API */ - #endif /* __ASM_X86_PMEM_H__ */ diff --git a/drivers/acpi/nfit.c b/drivers/acpi/nfit.c index 56fff0141636..f61e69fa2ad1 100644 --- a/drivers/acpi/nfit.c +++ b/drivers/acpi/nfit.c @@ -20,6 +20,7 @@ #include #include #include +#include #include "nfit.h" /* @@ -1371,7 +1372,7 @@ static int acpi_nfit_blk_region_enable(struct nvdimm_bus *nvdimm_bus, return -ENOMEM; } - if (!arch_has_pmem_api() && !nfit_blk->nvdimm_flush) + if (!arch_has_wmb_pmem() && !nfit_blk->nvdimm_flush) dev_warn(dev, "unable to guarantee persistence of writes\n"); if (mmio->line_size == 0) diff --git a/drivers/nvdimm/pmem.c b/drivers/nvdimm/pmem.c index 3b5b9cb758b6..20bf122328da 100644 --- a/drivers/nvdimm/pmem.c +++ b/drivers/nvdimm/pmem.c @@ -125,7 +125,7 @@ static struct pmem_device *pmem_alloc(struct device *dev, pmem->phys_addr = res->start; pmem->size = resource_size(res); - if (!arch_has_pmem_api()) + if (!arch_has_wmb_pmem()) dev_warn(dev, "unable to guarantee persistence of writes\n"); if (!devm_request_mem_region(dev, pmem->phys_addr, pmem->size, diff --git a/include/linux/pmem.h b/include/linux/pmem.h index a9d84bf335ee..85f810b33917 100644 --- a/include/linux/pmem.h +++ b/include/linux/pmem.h @@ -17,16 +17,23 @@ #include #ifdef CONFIG_ARCH_HAS_PMEM_API +#define ARCH_MEMREMAP_PMEM MEMREMAP_WB #include #else -static inline void arch_wmb_pmem(void) +#define ARCH_MEMREMAP_PMEM MEMREMAP_WT +/* + * These are simply here to enable compilation, all call sites gate + * calling these symbols with arch_has_pmem_api() and redirect to the + * implementation in asm/pmem.h. + */ +static inline bool __arch_has_wmb_pmem(void) { - BUG(); + return false; } -static inline bool arch_has_wmb_pmem(void) +static inline void arch_wmb_pmem(void) { - return false; + BUG(); } static inline void arch_memcpy_to_pmem(void __pmem *dst, const void *src, @@ -53,7 +60,6 @@ static inline void arch_clear_pmem(void __pmem *addr, size_t size) * implementations for arch_memcpy_to_pmem(), arch_wmb_pmem(), * arch_copy_from_iter_pmem(), arch_clear_pmem() and arch_has_wmb_pmem(). */ - static inline void memcpy_from_pmem(void *dst, void __pmem const *src, size_t size) { memcpy(dst, (void __force const *) src, size); @@ -64,8 +70,13 @@ static inline void memunmap_pmem(struct device *dev, void __pmem *addr) devm_memunmap(dev, (void __force *) addr); } +static inline bool arch_has_pmem_api(void) +{ + return IS_ENABLED(CONFIG_ARCH_HAS_PMEM_API); +} + /** - * arch_has_pmem_api - true if wmb_pmem() ensures durability + * arch_has_wmb_pmem - true if wmb_pmem() ensures durability * * For a given cpu implementation within an architecture it is possible * that wmb_pmem() resolves to a nop. In the case this returns @@ -73,9 +84,9 @@ static inline void memunmap_pmem(struct device *dev, void __pmem *addr) * fall back to a different data consistency model, or otherwise notify * the user. */ -static inline bool arch_has_pmem_api(void) +static inline bool arch_has_wmb_pmem(void) { - return IS_ENABLED(CONFIG_ARCH_HAS_PMEM_API) && arch_has_wmb_pmem(); + return arch_has_pmem_api() && __arch_has_wmb_pmem(); } /* @@ -120,13 +131,8 @@ static inline void default_clear_pmem(void __pmem *addr, size_t size) static inline void __pmem *memremap_pmem(struct device *dev, resource_size_t offset, unsigned long size) { -#ifdef ARCH_MEMREMAP_PMEM return (void __pmem *) devm_memremap(dev, offset, size, ARCH_MEMREMAP_PMEM); -#else - return (void __pmem *) devm_memremap(dev, offset, size, - MEMREMAP_WT); -#endif } /** @@ -158,8 +164,10 @@ static inline void memcpy_to_pmem(void __pmem *dst, const void *src, size_t n) */ static inline void wmb_pmem(void) { - if (arch_has_pmem_api()) + if (arch_has_wmb_pmem()) arch_wmb_pmem(); + else + wmb(); } /** -- cgit v1.2.3 From 32ab0a3f51701cb37ab960635254d5f84ec3de0a Mon Sep 17 00:00:00 2001 From: Dan Williams Date: Sat, 1 Aug 2015 02:16:37 -0400 Subject: libnvdimm, pmem: 'struct page' for pmem Enable the pmem driver to handle PFN device instances. Attaching a pmem namespace to a pfn device triggers the driver to allocate and initialize struct page entries for pmem. Memory capacity for this allocation comes exclusively from RAM for now which is suitable for low PMEM to RAM ratios. This mechanism will be expanded later for setting an "allocate from PMEM" policy. Cc: Boaz Harrosh Cc: Ross Zwisler Cc: Christoph Hellwig Signed-off-by: Dan Williams --- drivers/nvdimm/Kconfig | 1 + drivers/nvdimm/nd.h | 6 ++ drivers/nvdimm/pfn_devs.c | 9 +- drivers/nvdimm/pmem.c | 203 +++++++++++++++++++++++++++++++++++--- tools/testing/nvdimm/Kbuild | 1 + tools/testing/nvdimm/test/iomap.c | 13 +++ 6 files changed, 216 insertions(+), 17 deletions(-) (limited to 'drivers/nvdimm/pmem.c') diff --git a/drivers/nvdimm/Kconfig b/drivers/nvdimm/Kconfig index ace25b53b755..53c11621d5b1 100644 --- a/drivers/nvdimm/Kconfig +++ b/drivers/nvdimm/Kconfig @@ -76,6 +76,7 @@ config ND_PFN config NVDIMM_PFN bool "PFN: Map persistent (device) memory" default LIBNVDIMM + depends on ZONE_DEVICE select ND_CLAIM help Map persistent memory, i.e. advertise it to the memory diff --git a/drivers/nvdimm/nd.h b/drivers/nvdimm/nd.h index 8da2be1cecb8..83e5d0935012 100644 --- a/drivers/nvdimm/nd.h +++ b/drivers/nvdimm/nd.h @@ -217,6 +217,7 @@ struct nd_pfn *to_nd_pfn(struct device *dev); int nd_pfn_probe(struct nd_namespace_common *ndns, void *drvdata); bool is_nd_pfn(struct device *dev); struct device *nd_pfn_create(struct nd_region *nd_region); +int nd_pfn_validate(struct nd_pfn *nd_pfn); #else static inline int nd_pfn_probe(struct nd_namespace_common *ndns, void *drvdata) { @@ -232,6 +233,11 @@ static inline struct device *nd_pfn_create(struct nd_region *nd_region) { return NULL; } + +static inline int nd_pfn_validate(struct nd_pfn *nd_pfn) +{ + return -ENODEV; +} #endif struct nd_region *to_nd_region(struct device *dev); diff --git a/drivers/nvdimm/pfn_devs.c b/drivers/nvdimm/pfn_devs.c index f708d63709a5..3fd7d0d81a47 100644 --- a/drivers/nvdimm/pfn_devs.c +++ b/drivers/nvdimm/pfn_devs.c @@ -228,7 +228,7 @@ struct device *nd_pfn_create(struct nd_region *nd_region) return dev; } -static int nd_pfn_validate(struct nd_pfn *nd_pfn) +int nd_pfn_validate(struct nd_pfn *nd_pfn) { struct nd_namespace_common *ndns = nd_pfn->ndns; struct nd_pfn_sb *pfn_sb = nd_pfn->pfn_sb; @@ -286,10 +286,10 @@ static int nd_pfn_validate(struct nd_pfn *nd_pfn) */ offset = le64_to_cpu(pfn_sb->dataoff); nsio = to_nd_namespace_io(&ndns->dev); - if ((nsio->res.start + offset) & (ND_PFN_ALIGN - 1)) { + if (nsio->res.start & ND_PFN_MASK) { dev_err(&nd_pfn->dev, - "init failed: %s with offset %#llx not section aligned\n", - dev_name(&ndns->dev), offset); + "init failed: %s not section aligned\n", + dev_name(&ndns->dev)); return -EBUSY; } else if (offset >= resource_size(&nsio->res)) { dev_err(&nd_pfn->dev, "pfn array size exceeds capacity of %s\n", @@ -299,6 +299,7 @@ static int nd_pfn_validate(struct nd_pfn *nd_pfn) return 0; } +EXPORT_SYMBOL(nd_pfn_validate); int nd_pfn_probe(struct nd_namespace_common *ndns, void *drvdata) { diff --git a/drivers/nvdimm/pmem.c b/drivers/nvdimm/pmem.c index 20bf122328da..2f885e5d9c36 100644 --- a/drivers/nvdimm/pmem.c +++ b/drivers/nvdimm/pmem.c @@ -21,18 +21,24 @@ #include #include #include +#include #include +#include #include #include #include +#include "pfn.h" #include "nd.h" struct pmem_device { struct request_queue *pmem_queue; struct gendisk *pmem_disk; + struct nd_namespace_common *ndns; /* One contiguous memory region per device */ phys_addr_t phys_addr; + /* when non-zero this device is hosting a 'pfn' instance */ + phys_addr_t data_offset; void __pmem *virt_addr; size_t size; }; @@ -44,7 +50,7 @@ static void pmem_do_bvec(struct pmem_device *pmem, struct page *page, sector_t sector) { void *mem = kmap_atomic(page); - size_t pmem_off = sector << 9; + phys_addr_t pmem_off = sector * 512 + pmem->data_offset; void __pmem *pmem_addr = pmem->virt_addr + pmem_off; if (rw == READ) { @@ -95,16 +101,23 @@ static long pmem_direct_access(struct block_device *bdev, sector_t sector, void __pmem **kaddr, unsigned long *pfn) { struct pmem_device *pmem = bdev->bd_disk->private_data; - size_t offset = sector << 9; - - if (!pmem) - return -ENODEV; + resource_size_t offset = sector * 512 + pmem->data_offset; + resource_size_t size; + + if (pmem->data_offset) { + /* + * Limit the direct_access() size to what is covered by + * the memmap + */ + size = (pmem->size - offset) & ~ND_PFN_MASK; + } else + size = pmem->size - offset; /* FIXME convert DAX to comprehend that this mapping has a lifetime */ *kaddr = pmem->virt_addr + offset; *pfn = (pmem->phys_addr + offset) >> PAGE_SHIFT; - return pmem->size - offset; + return size; } static const struct block_device_operations pmem_fops = { @@ -144,13 +157,16 @@ static struct pmem_device *pmem_alloc(struct device *dev, static void pmem_detach_disk(struct pmem_device *pmem) { + if (!pmem->pmem_disk) + return; + del_gendisk(pmem->pmem_disk); put_disk(pmem->pmem_disk); blk_cleanup_queue(pmem->pmem_queue); } -static int pmem_attach_disk(struct nd_namespace_common *ndns, - struct pmem_device *pmem) +static int pmem_attach_disk(struct device *dev, + struct nd_namespace_common *ndns, struct pmem_device *pmem) { struct gendisk *disk; @@ -177,8 +193,8 @@ static int pmem_attach_disk(struct nd_namespace_common *ndns, disk->queue = pmem->pmem_queue; disk->flags = GENHD_FL_EXT_DEVT; nvdimm_namespace_disk_name(ndns, disk->disk_name); - disk->driverfs_dev = &ndns->dev; - set_capacity(disk, pmem->size >> 9); + disk->driverfs_dev = dev; + set_capacity(disk, (pmem->size - pmem->data_offset) / 512); pmem->pmem_disk = disk; add_disk(disk); @@ -207,6 +223,154 @@ static int pmem_rw_bytes(struct nd_namespace_common *ndns, return 0; } +static int nd_pfn_init(struct nd_pfn *nd_pfn) +{ + struct nd_pfn_sb *pfn_sb = kzalloc(sizeof(*pfn_sb), GFP_KERNEL); + struct pmem_device *pmem = dev_get_drvdata(&nd_pfn->dev); + struct nd_namespace_common *ndns = nd_pfn->ndns; + struct nd_region *nd_region; + unsigned long npfns; + phys_addr_t offset; + u64 checksum; + int rc; + + if (!pfn_sb) + return -ENOMEM; + + nd_pfn->pfn_sb = pfn_sb; + rc = nd_pfn_validate(nd_pfn); + if (rc == 0 || rc == -EBUSY) + return rc; + + /* section alignment for simple hotplug */ + if (nvdimm_namespace_capacity(ndns) < ND_PFN_ALIGN + || pmem->phys_addr & ND_PFN_MASK) + return -ENODEV; + + nd_region = to_nd_region(nd_pfn->dev.parent); + if (nd_region->ro) { + dev_info(&nd_pfn->dev, + "%s is read-only, unable to init metadata\n", + dev_name(&nd_region->dev)); + goto err; + } + + memset(pfn_sb, 0, sizeof(*pfn_sb)); + npfns = (pmem->size - SZ_8K) / SZ_4K; + /* + * Note, we use 64 here for the standard size of struct page, + * debugging options may cause it to be larger in which case the + * implementation will limit the pfns advertised through + * ->direct_access() to those that are included in the memmap. + */ + if (nd_pfn->mode == PFN_MODE_PMEM) + offset = ALIGN(SZ_8K + 64 * npfns, PMD_SIZE); + else if (nd_pfn->mode == PFN_MODE_RAM) + offset = SZ_8K; + else + goto err; + + npfns = (pmem->size - offset) / SZ_4K; + pfn_sb->mode = cpu_to_le32(nd_pfn->mode); + pfn_sb->dataoff = cpu_to_le64(offset); + pfn_sb->npfns = cpu_to_le64(npfns); + memcpy(pfn_sb->signature, PFN_SIG, PFN_SIG_LEN); + memcpy(pfn_sb->uuid, nd_pfn->uuid, 16); + pfn_sb->version_major = cpu_to_le16(1); + checksum = nd_sb_checksum((struct nd_gen_sb *) pfn_sb); + pfn_sb->checksum = cpu_to_le64(checksum); + + rc = nvdimm_write_bytes(ndns, SZ_4K, pfn_sb, sizeof(*pfn_sb)); + if (rc) + goto err; + + return 0; + err: + nd_pfn->pfn_sb = NULL; + kfree(pfn_sb); + return -ENXIO; +} + +static int nvdimm_namespace_detach_pfn(struct nd_namespace_common *ndns) +{ + struct nd_pfn *nd_pfn = to_nd_pfn(ndns->claim); + struct pmem_device *pmem; + + /* free pmem disk */ + pmem = dev_get_drvdata(&nd_pfn->dev); + pmem_detach_disk(pmem); + + /* release nd_pfn resources */ + kfree(nd_pfn->pfn_sb); + nd_pfn->pfn_sb = NULL; + + return 0; +} + +static int nvdimm_namespace_attach_pfn(struct nd_namespace_common *ndns) +{ + struct nd_namespace_io *nsio = to_nd_namespace_io(&ndns->dev); + struct nd_pfn *nd_pfn = to_nd_pfn(ndns->claim); + struct device *dev = &nd_pfn->dev; + struct vmem_altmap *altmap; + struct nd_region *nd_region; + struct nd_pfn_sb *pfn_sb; + struct pmem_device *pmem; + phys_addr_t offset; + int rc; + + if (!nd_pfn->uuid || !nd_pfn->ndns) + return -ENODEV; + + nd_region = to_nd_region(dev->parent); + rc = nd_pfn_init(nd_pfn); + if (rc) + return rc; + + if (PAGE_SIZE != SZ_4K) { + dev_err(dev, "only supported on systems with 4K PAGE_SIZE\n"); + return -ENXIO; + } + if (nsio->res.start & ND_PFN_MASK) { + dev_err(dev, "%s not memory hotplug section aligned\n", + dev_name(&ndns->dev)); + return -ENXIO; + } + + pfn_sb = nd_pfn->pfn_sb; + offset = le64_to_cpu(pfn_sb->dataoff); + nd_pfn->mode = le32_to_cpu(nd_pfn->pfn_sb->mode); + if (nd_pfn->mode == PFN_MODE_RAM) { + if (offset != SZ_8K) + return -EINVAL; + nd_pfn->npfns = le64_to_cpu(pfn_sb->npfns); + altmap = NULL; + } else { + rc = -ENXIO; + goto err; + } + + /* establish pfn range for lookup, and switch to direct map */ + pmem = dev_get_drvdata(dev); + memunmap_pmem(dev, pmem->virt_addr); + pmem->virt_addr = (void __pmem *)devm_memremap_pages(dev, &nsio->res); + if (IS_ERR(pmem->virt_addr)) { + rc = PTR_ERR(pmem->virt_addr); + goto err; + } + + /* attach pmem disk in "pfn-mode" */ + pmem->data_offset = offset; + rc = pmem_attach_disk(dev, ndns, pmem); + if (rc) + goto err; + + return rc; + err: + nvdimm_namespace_detach_pfn(ndns); + return rc; +} + static int nd_pmem_probe(struct device *dev) { struct nd_region *nd_region = to_nd_region(dev->parent); @@ -223,16 +387,27 @@ static int nd_pmem_probe(struct device *dev) if (IS_ERR(pmem)) return PTR_ERR(pmem); + pmem->ndns = ndns; dev_set_drvdata(dev, pmem); ndns->rw_bytes = pmem_rw_bytes; if (is_nd_btt(dev)) return nvdimm_namespace_attach_btt(ndns); - if (nd_btt_probe(ndns, pmem) == 0) + if (is_nd_pfn(dev)) + return nvdimm_namespace_attach_pfn(ndns); + + if (nd_btt_probe(ndns, pmem) == 0) { /* we'll come back as btt-pmem */ return -ENXIO; - return pmem_attach_disk(ndns, pmem); + } + + if (nd_pfn_probe(ndns, pmem) == 0) { + /* we'll come back as pfn-pmem */ + return -ENXIO; + } + + return pmem_attach_disk(dev, ndns, pmem); } static int nd_pmem_remove(struct device *dev) @@ -240,7 +415,9 @@ static int nd_pmem_remove(struct device *dev) struct pmem_device *pmem = dev_get_drvdata(dev); if (is_nd_btt(dev)) - nvdimm_namespace_detach_btt(to_nd_btt(dev)->ndns); + nvdimm_namespace_detach_btt(pmem->ndns); + else if (is_nd_pfn(dev)) + nvdimm_namespace_detach_pfn(pmem->ndns); else pmem_detach_disk(pmem); diff --git a/tools/testing/nvdimm/Kbuild b/tools/testing/nvdimm/Kbuild index 99e70f0729be..38b00ecb2ed5 100644 --- a/tools/testing/nvdimm/Kbuild +++ b/tools/testing/nvdimm/Kbuild @@ -2,6 +2,7 @@ ldflags-y += --wrap=ioremap_wc ldflags-y += --wrap=memremap ldflags-y += --wrap=devm_ioremap_nocache ldflags-y += --wrap=devm_memremap +ldflags-y += --wrap=devm_memunmap ldflags-y += --wrap=ioremap_nocache ldflags-y += --wrap=iounmap ldflags-y += --wrap=memunmap diff --git a/tools/testing/nvdimm/test/iomap.c b/tools/testing/nvdimm/test/iomap.c index 179d2289f3a8..b7251314bbc0 100644 --- a/tools/testing/nvdimm/test/iomap.c +++ b/tools/testing/nvdimm/test/iomap.c @@ -108,6 +108,19 @@ void *__wrap_memremap(resource_size_t offset, size_t size, } EXPORT_SYMBOL(__wrap_memremap); +void __wrap_devm_memunmap(struct device *dev, void *addr) +{ + struct nfit_test_resource *nfit_res; + + rcu_read_lock(); + nfit_res = get_nfit_res((unsigned long) addr); + rcu_read_unlock(); + if (nfit_res) + return; + return devm_memunmap(dev, addr); +} +EXPORT_SYMBOL(__wrap_devm_memunmap); + void __iomem *__wrap_ioremap_nocache(resource_size_t offset, unsigned long size) { return __nfit_test_ioremap(offset, size, ioremap_nocache); -- cgit v1.2.3 From 004f1afbe199e6ab20805b95aefd83ccd24bc5c7 Mon Sep 17 00:00:00 2001 From: Dan Williams Date: Mon, 24 Aug 2015 19:20:23 -0400 Subject: libnvdimm, pmem: direct map legacy pmem by default The expectation is that the legacy / non-standard pmem discovery method (e820 type-12) will only ever be used to describe small quantities of persistent memory. Larger capacities will be described via the ACPI NFIT. When "allocate struct page from pmem" support is added this default policy can be overridden by assigning a legacy pmem namespace to a pfn device, however this would be only be necessary if a platform used the legacy mechanism to define a very large range. Cc: Christoph Hellwig Signed-off-by: Dan Williams --- drivers/nvdimm/e820.c | 1 + drivers/nvdimm/namespace_devs.c | 35 ++++++++++++++++++++++++++++++----- drivers/nvdimm/nd.h | 2 ++ drivers/nvdimm/pmem.c | 15 ++++++++++++--- drivers/nvdimm/region_devs.c | 1 + include/linux/libnvdimm.h | 4 ++++ 6 files changed, 50 insertions(+), 8 deletions(-) (limited to 'drivers/nvdimm/pmem.c') diff --git a/drivers/nvdimm/e820.c b/drivers/nvdimm/e820.c index 1b5743ad92db..8282db2ef99e 100644 --- a/drivers/nvdimm/e820.c +++ b/drivers/nvdimm/e820.c @@ -49,6 +49,7 @@ static int e820_pmem_probe(struct platform_device *pdev) ndr_desc.res = p; ndr_desc.attr_groups = e820_pmem_region_attribute_groups; ndr_desc.numa_node = NUMA_NO_NODE; + set_bit(ND_REGION_PAGEMAP, &ndr_desc.flags); if (!nvdimm_pmem_region_create(nvdimm_bus, &ndr_desc)) goto err; } diff --git a/drivers/nvdimm/namespace_devs.c b/drivers/nvdimm/namespace_devs.c index 9303ca29be9b..0955b2cb10fe 100644 --- a/drivers/nvdimm/namespace_devs.c +++ b/drivers/nvdimm/namespace_devs.c @@ -13,6 +13,7 @@ #include #include #include +#include #include #include "nd-core.h" #include "nd.h" @@ -76,11 +77,32 @@ static bool is_namespace_io(struct device *dev) return dev ? dev->type == &namespace_io_device_type : false; } +bool pmem_should_map_pages(struct device *dev) +{ + struct nd_region *nd_region = to_nd_region(dev->parent); + + if (!IS_ENABLED(CONFIG_ZONE_DEVICE)) + return false; + + if (!test_bit(ND_REGION_PAGEMAP, &nd_region->flags)) + return false; + + if (is_nd_pfn(dev) || is_nd_btt(dev)) + return false; + +#ifdef ARCH_MEMREMAP_PMEM + return ARCH_MEMREMAP_PMEM == MEMREMAP_WB; +#else + return false; +#endif +} +EXPORT_SYMBOL(pmem_should_map_pages); + const char *nvdimm_namespace_disk_name(struct nd_namespace_common *ndns, char *name) { struct nd_region *nd_region = to_nd_region(ndns->dev.parent); - const char *suffix = ""; + const char *suffix = NULL; if (ndns->claim) { if (is_nd_btt(ndns->claim)) @@ -93,13 +115,16 @@ const char *nvdimm_namespace_disk_name(struct nd_namespace_common *ndns, dev_name(ndns->claim)); } - if (is_namespace_pmem(&ndns->dev) || is_namespace_io(&ndns->dev)) - sprintf(name, "pmem%d%s", nd_region->id, suffix); - else if (is_namespace_blk(&ndns->dev)) { + if (is_namespace_pmem(&ndns->dev) || is_namespace_io(&ndns->dev)) { + if (!suffix && pmem_should_map_pages(&ndns->dev)) + suffix = "m"; + sprintf(name, "pmem%d%s", nd_region->id, suffix ? suffix : ""); + } else if (is_namespace_blk(&ndns->dev)) { struct nd_namespace_blk *nsblk; nsblk = to_nd_namespace_blk(&ndns->dev); - sprintf(name, "ndblk%d.%d%s", nd_region->id, nsblk->id, suffix); + sprintf(name, "ndblk%d.%d%s", nd_region->id, nsblk->id, + suffix ? suffix : ""); } else { return NULL; } diff --git a/drivers/nvdimm/nd.h b/drivers/nvdimm/nd.h index 83e5d0935012..417e521d299c 100644 --- a/drivers/nvdimm/nd.h +++ b/drivers/nvdimm/nd.h @@ -100,6 +100,7 @@ struct nd_region { struct ida ns_ida; struct ida btt_ida; struct ida pfn_ida; + unsigned long flags; struct device *ns_seed; struct device *btt_seed; struct device *pfn_seed; @@ -276,4 +277,5 @@ static inline bool nd_iostat_start(struct bio *bio, unsigned long *start) void nd_iostat_end(struct bio *bio, unsigned long start); resource_size_t nd_namespace_blk_validate(struct nd_namespace_blk *nsblk); const u8 *nd_dev_to_uuid(struct device *dev); +bool pmem_should_map_pages(struct device *dev); #endif /* __ND_H__ */ diff --git a/drivers/nvdimm/pmem.c b/drivers/nvdimm/pmem.c index 2f885e5d9c36..c5ae2e579288 100644 --- a/drivers/nvdimm/pmem.c +++ b/drivers/nvdimm/pmem.c @@ -148,9 +148,18 @@ static struct pmem_device *pmem_alloc(struct device *dev, return ERR_PTR(-EBUSY); } - pmem->virt_addr = memremap_pmem(dev, pmem->phys_addr, pmem->size); - if (!pmem->virt_addr) - return ERR_PTR(-ENXIO); + if (pmem_should_map_pages(dev)) { + void *addr = devm_memremap_pages(dev, res); + + if (IS_ERR(addr)) + return addr; + pmem->virt_addr = (void __pmem *) addr; + } else { + pmem->virt_addr = memremap_pmem(dev, pmem->phys_addr, + pmem->size); + if (!pmem->virt_addr) + return ERR_PTR(-ENXIO); + } return pmem; } diff --git a/drivers/nvdimm/region_devs.c b/drivers/nvdimm/region_devs.c index da4338154ad2..529f3f02e7b2 100644 --- a/drivers/nvdimm/region_devs.c +++ b/drivers/nvdimm/region_devs.c @@ -758,6 +758,7 @@ static struct nd_region *nd_region_create(struct nvdimm_bus *nvdimm_bus, nd_region->provider_data = ndr_desc->provider_data; nd_region->nd_set = ndr_desc->nd_set; nd_region->num_lanes = ndr_desc->num_lanes; + nd_region->flags = ndr_desc->flags; nd_region->ro = ro; nd_region->numa_node = ndr_desc->numa_node; ida_init(&nd_region->ns_ida); diff --git a/include/linux/libnvdimm.h b/include/linux/libnvdimm.h index 75e3af01ee32..3f021dc5da8c 100644 --- a/include/linux/libnvdimm.h +++ b/include/linux/libnvdimm.h @@ -31,6 +31,9 @@ enum { ND_CMD_ARS_STATUS_MAX = SZ_4K, ND_MAX_MAPPINGS = 32, + /* region flag indicating to direct-map persistent memory by default */ + ND_REGION_PAGEMAP = 0, + /* mark newly adjusted resources as requiring a label update */ DPA_RESOURCE_ADJUSTED = 1 << 0, }; @@ -91,6 +94,7 @@ struct nd_region_desc { void *provider_data; int num_lanes; int numa_node; + unsigned long flags; }; struct nvdimm_bus; -- cgit v1.2.3