diff options
author | Linus Torvalds <torvalds@linux-foundation.org> | 2022-01-15 21:37:06 +0300 |
---|---|---|
committer | Linus Torvalds <torvalds@linux-foundation.org> | 2022-01-15 21:37:06 +0300 |
commit | f56caedaf94f9ced5dbfcdb0060a3e788d2078af (patch) | |
tree | e213532d1b3d32f9f0e81948f3b23804baff287d /drivers/dax | |
parent | a33f5c380c4bd3fa5278d690421b72052456d9fe (diff) | |
parent | 76fd0285b447991267e838842c0be7395eb454bb (diff) | |
download | linux-f56caedaf94f9ced5dbfcdb0060a3e788d2078af.tar.xz |
Merge branch 'akpm' (patches from Andrew)
Merge misc updates from Andrew Morton:
"146 patches.
Subsystems affected by this patch series: kthread, ia64, scripts,
ntfs, squashfs, ocfs2, vfs, and mm (slab-generic, slab, kmemleak,
dax, kasan, debug, pagecache, gup, shmem, frontswap, memremap,
memcg, selftests, pagemap, dma, vmalloc, memory-failure, hugetlb,
userfaultfd, vmscan, mempolicy, oom-kill, hugetlbfs, migration, thp,
ksm, page-poison, percpu, rmap, zswap, zram, cleanups, hmm, and
damon)"
* emailed patches from Andrew Morton <akpm@linux-foundation.org>: (146 commits)
mm/damon: hide kernel pointer from tracepoint event
mm/damon/vaddr: hide kernel pointer from damon_va_three_regions() failure log
mm/damon/vaddr: use pr_debug() for damon_va_three_regions() failure logging
mm/damon/dbgfs: remove an unnecessary variable
mm/damon: move the implementation of damon_insert_region to damon.h
mm/damon: add access checking for hugetlb pages
Docs/admin-guide/mm/damon/usage: update for schemes statistics
mm/damon/dbgfs: support all DAMOS stats
Docs/admin-guide/mm/damon/reclaim: document statistics parameters
mm/damon/reclaim: provide reclamation statistics
mm/damon/schemes: account how many times quota limit has exceeded
mm/damon/schemes: account scheme actions that successfully applied
mm/damon: remove a mistakenly added comment for a future feature
Docs/admin-guide/mm/damon/usage: update for kdamond_pid and (mk|rm)_contexts
Docs/admin-guide/mm/damon/usage: mention tracepoint at the beginning
Docs/admin-guide/mm/damon/usage: remove redundant information
Docs/admin-guide/mm/damon/usage: update for scheme quotas and watermarks
mm/damon: convert macro functions to static inline functions
mm/damon: modify damon_rand() macro to static inline function
mm/damon: move damon_rand() definition into damon.h
...
Diffstat (limited to 'drivers/dax')
-rw-r--r-- | drivers/dax/bus.c | 32 | ||||
-rw-r--r-- | drivers/dax/bus.h | 1 | ||||
-rw-r--r-- | drivers/dax/device.c | 126 |
3 files changed, 109 insertions, 50 deletions
diff --git a/drivers/dax/bus.c b/drivers/dax/bus.c index ee4568ef757c..1dad813ee4a6 100644 --- a/drivers/dax/bus.c +++ b/drivers/dax/bus.c @@ -127,11 +127,35 @@ ATTRIBUTE_GROUPS(dax_drv); static int dax_bus_match(struct device *dev, struct device_driver *drv); +/* + * Static dax regions are regions created by an external subsystem + * nvdimm where a single range is assigned. Its boundaries are by the external + * subsystem and are usually limited to one physical memory range. For example, + * for PMEM it is usually defined by NVDIMM Namespace boundaries (i.e. a + * single contiguous range) + * + * On dynamic dax regions, the assigned region can be partitioned by dax core + * into multiple subdivisions. A subdivision is represented into one + * /dev/daxN.M device composed by one or more potentially discontiguous ranges. + * + * When allocating a dax region, drivers must set whether it's static + * (IORESOURCE_DAX_STATIC). On static dax devices, the @pgmap is pre-assigned + * to dax core when calling devm_create_dev_dax(), whereas in dynamic dax + * devices it is NULL but afterwards allocated by dax core on device ->probe(). + * Care is needed to make sure that dynamic dax devices are torn down with a + * cleared @pgmap field (see kill_dev_dax()). + */ static bool is_static(struct dax_region *dax_region) { return (dax_region->res.flags & IORESOURCE_DAX_STATIC) != 0; } +bool static_dev_dax(struct dev_dax *dev_dax) +{ + return is_static(dev_dax->region); +} +EXPORT_SYMBOL_GPL(static_dev_dax); + static u64 dev_dax_size(struct dev_dax *dev_dax) { u64 size = 0; @@ -361,6 +385,14 @@ void kill_dev_dax(struct dev_dax *dev_dax) kill_dax(dax_dev); unmap_mapping_range(inode->i_mapping, 0, 0, 1); + + /* + * Dynamic dax region have the pgmap allocated via dev_kzalloc() + * and thus freed by devm. Clear the pgmap to not have stale pgmap + * ranges on probe() from previous reconfigurations of region devices. + */ + if (!static_dev_dax(dev_dax)) + dev_dax->pgmap = NULL; } EXPORT_SYMBOL_GPL(kill_dev_dax); diff --git a/drivers/dax/bus.h b/drivers/dax/bus.h index 381cec9ff05c..fbb940293d6d 100644 --- a/drivers/dax/bus.h +++ b/drivers/dax/bus.h @@ -39,6 +39,7 @@ int __dax_driver_register(struct dax_device_driver *dax_drv, __dax_driver_register(driver, THIS_MODULE, KBUILD_MODNAME) void dax_driver_unregister(struct dax_device_driver *dax_drv); void kill_dev_dax(struct dev_dax *dev_dax); +bool static_dev_dax(struct dev_dax *dev_dax); /* * While run_dax() is potentially a generic operation that could be diff --git a/drivers/dax/device.c b/drivers/dax/device.c index e58d597f0415..d33a0613ed0c 100644 --- a/drivers/dax/device.c +++ b/drivers/dax/device.c @@ -73,11 +73,39 @@ __weak phys_addr_t dax_pgoff_to_phys(struct dev_dax *dev_dax, pgoff_t pgoff, return -1; } +static void dax_set_mapping(struct vm_fault *vmf, pfn_t pfn, + unsigned long fault_size) +{ + unsigned long i, nr_pages = fault_size / PAGE_SIZE; + struct file *filp = vmf->vma->vm_file; + struct dev_dax *dev_dax = filp->private_data; + pgoff_t pgoff; + + /* mapping is only set on the head */ + if (dev_dax->pgmap->vmemmap_shift) + nr_pages = 1; + + pgoff = linear_page_index(vmf->vma, + ALIGN(vmf->address, fault_size)); + + for (i = 0; i < nr_pages; i++) { + struct page *page = pfn_to_page(pfn_t_to_pfn(pfn) + i); + + page = compound_head(page); + if (page->mapping) + continue; + + page->mapping = filp->f_mapping; + page->index = pgoff + i; + } +} + static vm_fault_t __dev_dax_pte_fault(struct dev_dax *dev_dax, - struct vm_fault *vmf, pfn_t *pfn) + struct vm_fault *vmf) { struct device *dev = &dev_dax->dev; phys_addr_t phys; + pfn_t pfn; unsigned int fault_size = PAGE_SIZE; if (check_vma(dev_dax, vmf->vma, __func__)) @@ -98,18 +126,21 @@ static vm_fault_t __dev_dax_pte_fault(struct dev_dax *dev_dax, return VM_FAULT_SIGBUS; } - *pfn = phys_to_pfn_t(phys, PFN_DEV|PFN_MAP); + pfn = phys_to_pfn_t(phys, PFN_DEV|PFN_MAP); - return vmf_insert_mixed(vmf->vma, vmf->address, *pfn); + dax_set_mapping(vmf, pfn, fault_size); + + return vmf_insert_mixed(vmf->vma, vmf->address, pfn); } static vm_fault_t __dev_dax_pmd_fault(struct dev_dax *dev_dax, - struct vm_fault *vmf, pfn_t *pfn) + struct vm_fault *vmf) { unsigned long pmd_addr = vmf->address & PMD_MASK; struct device *dev = &dev_dax->dev; phys_addr_t phys; pgoff_t pgoff; + pfn_t pfn; unsigned int fault_size = PMD_SIZE; if (check_vma(dev_dax, vmf->vma, __func__)) @@ -138,19 +169,22 @@ static vm_fault_t __dev_dax_pmd_fault(struct dev_dax *dev_dax, return VM_FAULT_SIGBUS; } - *pfn = phys_to_pfn_t(phys, PFN_DEV|PFN_MAP); + pfn = phys_to_pfn_t(phys, PFN_DEV|PFN_MAP); - return vmf_insert_pfn_pmd(vmf, *pfn, vmf->flags & FAULT_FLAG_WRITE); + dax_set_mapping(vmf, pfn, fault_size); + + return vmf_insert_pfn_pmd(vmf, pfn, vmf->flags & FAULT_FLAG_WRITE); } #ifdef CONFIG_HAVE_ARCH_TRANSPARENT_HUGEPAGE_PUD static vm_fault_t __dev_dax_pud_fault(struct dev_dax *dev_dax, - struct vm_fault *vmf, pfn_t *pfn) + struct vm_fault *vmf) { unsigned long pud_addr = vmf->address & PUD_MASK; struct device *dev = &dev_dax->dev; phys_addr_t phys; pgoff_t pgoff; + pfn_t pfn; unsigned int fault_size = PUD_SIZE; @@ -180,13 +214,15 @@ static vm_fault_t __dev_dax_pud_fault(struct dev_dax *dev_dax, return VM_FAULT_SIGBUS; } - *pfn = phys_to_pfn_t(phys, PFN_DEV|PFN_MAP); + pfn = phys_to_pfn_t(phys, PFN_DEV|PFN_MAP); - return vmf_insert_pfn_pud(vmf, *pfn, vmf->flags & FAULT_FLAG_WRITE); + dax_set_mapping(vmf, pfn, fault_size); + + return vmf_insert_pfn_pud(vmf, pfn, vmf->flags & FAULT_FLAG_WRITE); } #else static vm_fault_t __dev_dax_pud_fault(struct dev_dax *dev_dax, - struct vm_fault *vmf, pfn_t *pfn) + struct vm_fault *vmf) { return VM_FAULT_FALLBACK; } @@ -196,10 +232,8 @@ static vm_fault_t dev_dax_huge_fault(struct vm_fault *vmf, enum page_entry_size pe_size) { struct file *filp = vmf->vma->vm_file; - unsigned long fault_size; vm_fault_t rc = VM_FAULT_SIGBUS; int id; - pfn_t pfn; struct dev_dax *dev_dax = filp->private_data; dev_dbg(&dev_dax->dev, "%s: %s (%#lx - %#lx) size = %d\n", current->comm, @@ -209,43 +243,18 @@ static vm_fault_t dev_dax_huge_fault(struct vm_fault *vmf, id = dax_read_lock(); switch (pe_size) { case PE_SIZE_PTE: - fault_size = PAGE_SIZE; - rc = __dev_dax_pte_fault(dev_dax, vmf, &pfn); + rc = __dev_dax_pte_fault(dev_dax, vmf); break; case PE_SIZE_PMD: - fault_size = PMD_SIZE; - rc = __dev_dax_pmd_fault(dev_dax, vmf, &pfn); + rc = __dev_dax_pmd_fault(dev_dax, vmf); break; case PE_SIZE_PUD: - fault_size = PUD_SIZE; - rc = __dev_dax_pud_fault(dev_dax, vmf, &pfn); + rc = __dev_dax_pud_fault(dev_dax, vmf); break; default: rc = VM_FAULT_SIGBUS; } - if (rc == VM_FAULT_NOPAGE) { - unsigned long i; - pgoff_t pgoff; - - /* - * In the device-dax case the only possibility for a - * VM_FAULT_NOPAGE result is when device-dax capacity is - * mapped. No need to consider the zero page, or racing - * conflicting mappings. - */ - pgoff = linear_page_index(vmf->vma, vmf->address - & ~(fault_size - 1)); - for (i = 0; i < fault_size / PAGE_SIZE; i++) { - struct page *page; - - page = pfn_to_page(pfn_t_to_pfn(pfn) + i); - if (page->mapping) - continue; - page->mapping = filp->f_mapping; - page->index = pgoff + i; - } - } dax_read_unlock(id); return rc; @@ -398,17 +407,34 @@ int dev_dax_probe(struct dev_dax *dev_dax) void *addr; int rc, i; - pgmap = dev_dax->pgmap; - if (dev_WARN_ONCE(dev, pgmap && dev_dax->nr_range > 1, - "static pgmap / multi-range device conflict\n")) - return -EINVAL; + if (static_dev_dax(dev_dax)) { + if (dev_dax->nr_range > 1) { + dev_warn(dev, + "static pgmap / multi-range device conflict\n"); + return -EINVAL; + } - if (!pgmap) { - pgmap = devm_kzalloc(dev, sizeof(*pgmap) + sizeof(struct range) - * (dev_dax->nr_range - 1), GFP_KERNEL); + pgmap = dev_dax->pgmap; + } else { + if (dev_dax->pgmap) { + dev_warn(dev, + "dynamic-dax with pre-populated page map\n"); + return -EINVAL; + } + + pgmap = devm_kzalloc(dev, + struct_size(pgmap, ranges, dev_dax->nr_range - 1), + GFP_KERNEL); if (!pgmap) return -ENOMEM; + pgmap->nr_range = dev_dax->nr_range; + dev_dax->pgmap = pgmap; + + for (i = 0; i < dev_dax->nr_range; i++) { + struct range *range = &dev_dax->ranges[i].range; + pgmap->ranges[i] = *range; + } } for (i = 0; i < dev_dax->nr_range; i++) { @@ -420,12 +446,12 @@ int dev_dax_probe(struct dev_dax *dev_dax) i, range->start, range->end); return -EBUSY; } - /* don't update the range for static pgmap */ - if (!dev_dax->pgmap) - pgmap->ranges[i] = *range; } pgmap->type = MEMORY_DEVICE_GENERIC; + if (dev_dax->align > PAGE_SIZE) + pgmap->vmemmap_shift = + order_base_2(dev_dax->align >> PAGE_SHIFT); addr = devm_memremap_pages(dev, pgmap); if (IS_ERR(addr)) return PTR_ERR(addr); |