diff options
author | Linus Torvalds <torvalds@linux-foundation.org> | 2018-03-04 01:32:00 +0300 |
---|---|---|
committer | Linus Torvalds <torvalds@linux-foundation.org> | 2018-03-04 01:32:00 +0300 |
commit | 20f14172cbb8d711a47d336355fa947631444f60 (patch) | |
tree | 4991434323d5568d72037e7d9557ff4a0cdc70e3 | |
parent | 0eb3412a68c354c0aa4adc4a24ec533154bd2172 (diff) | |
parent | 949b93250a566cc7a578b4f829cf76b70d19a62c (diff) | |
download | linux-20f14172cbb8d711a47d336355fa947631444f60.tar.xz |
Merge branch 'libnvdimm-fixes' of git://git.kernel.org/pub/scm/linux/kernel/git/nvdimm/nvdimm
Pull libnvdimm fixes from Dan Williams:
"A 4.16 regression fix, three fixes for -stable, and a cleanup fix:
- During the merge window support for the new ACPI NVDIMM Platform
Capabilities structure disabled support for "deep flush", a
force-unit- access like mechanism for persistent memory. Restore
that mechanism.
- VFIO like RDMA is yet one more memory registration / pinning
interface that is incompatible with Filesystem-DAX. Disable long
term pins of Filesystem-DAX mappings via VFIO.
- The Filesystem-DAX detection to prevent long terms pins mistakenly
also disabled Device-DAX pins which are not subject to the same
block- map collision concerns.
- Similar to the setup path, softlockup warnings can trigger in the
shutdown path for large persistent memory namespaces. Teach
for_each_device_pfn() to perform cond_resched() in all cases.
- Boaz noticed that the might_sleep() in dax_direct_access() is stale
as of the v4.15 kernel.
These have received a build success notification from the 0day robot,
and the longterm pin fixes have appeared in -next. However, I recently
rebased the tree to remove some other fixes that need to be reworked
after review feedback.
* 'libnvdimm-fixes' of git://git.kernel.org/pub/scm/linux/kernel/git/nvdimm/nvdimm:
memremap: fix softlockup reports at teardown
libnvdimm: re-enable deep flush for pmem devices via fsync()
vfio: disable filesystem-dax page pinning
dax: fix vma_is_fsdax() helper
dax: ->direct_access does not sleep anymore
-rw-r--r-- | drivers/dax/super.c | 6 | ||||
-rw-r--r-- | drivers/nvdimm/pmem.c | 3 | ||||
-rw-r--r-- | drivers/vfio/vfio_iommu_type1.c | 18 | ||||
-rw-r--r-- | include/linux/fs.h | 2 | ||||
-rw-r--r-- | kernel/memremap.c | 15 |
5 files changed, 27 insertions, 17 deletions
diff --git a/drivers/dax/super.c b/drivers/dax/super.c index 473af694ad1c..ecdc292aa4e4 100644 --- a/drivers/dax/super.c +++ b/drivers/dax/super.c @@ -246,12 +246,6 @@ long dax_direct_access(struct dax_device *dax_dev, pgoff_t pgoff, long nr_pages, { long avail; - /* - * The device driver is allowed to sleep, in order to make the - * memory directly accessible. - */ - might_sleep(); - if (!dax_dev) return -EOPNOTSUPP; diff --git a/drivers/nvdimm/pmem.c b/drivers/nvdimm/pmem.c index 10041ac4032c..06f8dcc52ca6 100644 --- a/drivers/nvdimm/pmem.c +++ b/drivers/nvdimm/pmem.c @@ -335,8 +335,7 @@ static int pmem_attach_disk(struct device *dev, dev_warn(dev, "unable to guarantee persistence of writes\n"); fua = 0; } - wbc = nvdimm_has_cache(nd_region) && - !test_bit(ND_REGION_PERSIST_CACHE, &nd_region->flags); + wbc = nvdimm_has_cache(nd_region); if (!devm_request_mem_region(dev, res->start, resource_size(res), dev_name(&ndns->dev))) { diff --git a/drivers/vfio/vfio_iommu_type1.c b/drivers/vfio/vfio_iommu_type1.c index e30e29ae4819..45657e2b1ff7 100644 --- a/drivers/vfio/vfio_iommu_type1.c +++ b/drivers/vfio/vfio_iommu_type1.c @@ -338,11 +338,12 @@ static int vaddr_get_pfn(struct mm_struct *mm, unsigned long vaddr, { struct page *page[1]; struct vm_area_struct *vma; + struct vm_area_struct *vmas[1]; int ret; if (mm == current->mm) { - ret = get_user_pages_fast(vaddr, 1, !!(prot & IOMMU_WRITE), - page); + ret = get_user_pages_longterm(vaddr, 1, !!(prot & IOMMU_WRITE), + page, vmas); } else { unsigned int flags = 0; @@ -351,7 +352,18 @@ static int vaddr_get_pfn(struct mm_struct *mm, unsigned long vaddr, down_read(&mm->mmap_sem); ret = get_user_pages_remote(NULL, mm, vaddr, 1, flags, page, - NULL, NULL); + vmas, NULL); + /* + * The lifetime of a vaddr_get_pfn() page pin is + * userspace-controlled. In the fs-dax case this could + * lead to indefinite stalls in filesystem operations. + * Disallow attempts to pin fs-dax pages via this + * interface. + */ + if (ret > 0 && vma_is_fsdax(vmas[0])) { + ret = -EOPNOTSUPP; + put_page(page[0]); + } up_read(&mm->mmap_sem); } diff --git a/include/linux/fs.h b/include/linux/fs.h index 2a815560fda0..79c413985305 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -3198,7 +3198,7 @@ static inline bool vma_is_fsdax(struct vm_area_struct *vma) if (!vma_is_dax(vma)) return false; inode = file_inode(vma->vm_file); - if (inode->i_mode == S_IFCHR) + if (S_ISCHR(inode->i_mode)) return false; /* device-dax */ return true; } diff --git a/kernel/memremap.c b/kernel/memremap.c index 4849be5f9b3c..4dd4274cabe2 100644 --- a/kernel/memremap.c +++ b/kernel/memremap.c @@ -275,8 +275,15 @@ static unsigned long pfn_end(struct dev_pagemap *pgmap) return (res->start + resource_size(res)) >> PAGE_SHIFT; } +static unsigned long pfn_next(unsigned long pfn) +{ + if (pfn % 1024 == 0) + cond_resched(); + return pfn + 1; +} + #define for_each_device_pfn(pfn, map) \ - for (pfn = pfn_first(map); pfn < pfn_end(map); pfn++) + for (pfn = pfn_first(map); pfn < pfn_end(map); pfn = pfn_next(pfn)) static void devm_memremap_pages_release(void *data) { @@ -337,10 +344,10 @@ void *devm_memremap_pages(struct device *dev, struct dev_pagemap *pgmap) resource_size_t align_start, align_size, align_end; struct vmem_altmap *altmap = pgmap->altmap_valid ? &pgmap->altmap : NULL; + struct resource *res = &pgmap->res; unsigned long pfn, pgoff, order; pgprot_t pgprot = PAGE_KERNEL; - int error, nid, is_ram, i = 0; - struct resource *res = &pgmap->res; + int error, nid, is_ram; align_start = res->start & ~(SECTION_SIZE - 1); align_size = ALIGN(res->start + resource_size(res), SECTION_SIZE) @@ -409,8 +416,6 @@ void *devm_memremap_pages(struct device *dev, struct dev_pagemap *pgmap) list_del(&page->lru); page->pgmap = pgmap; percpu_ref_get(pgmap->ref); - if (!(++i % 1024)) - cond_resched(); } devm_add_action(dev, devm_memremap_pages_release, pgmap); |