diff options
Diffstat (limited to 'fs/dax.c')
| -rw-r--r-- | fs/dax.c | 147 |
1 files changed, 52 insertions, 95 deletions
@@ -20,12 +20,11 @@ #include <linux/sched/signal.h> #include <linux/uio.h> #include <linux/vmstat.h> -#include <linux/pfn_t.h> #include <linux/sizes.h> #include <linux/mmu_notifier.h> #include <linux/iomap.h> #include <linux/rmap.h> -#include <asm/pgalloc.h> +#include <linux/pgalloc.h> #define CREATE_TRACE_POINTS #include <trace/events/fs_dax.h> @@ -76,9 +75,9 @@ static struct folio *dax_to_folio(void *entry) return page_folio(pfn_to_page(dax_to_pfn(entry))); } -static void *dax_make_entry(pfn_t pfn, unsigned long flags) +static void *dax_make_entry(unsigned long pfn, unsigned long flags) { - return xa_mk_value(flags | (pfn_t_to_pfn(pfn) << DAX_SHIFT)); + return xa_mk_value(flags | (pfn << DAX_SHIFT)); } static bool dax_is_locked(void *entry) @@ -449,9 +448,6 @@ static void dax_associate_entry(void *entry, struct address_space *mapping, if (dax_is_zero_entry(entry) || dax_is_empty_entry(entry)) return; - if (IS_ENABLED(CONFIG_FS_DAX_LIMITED)) - return; - index = linear_page_index(vma, address & ~(size - 1)); if (shared && (folio->mapping || dax_folio_is_shared(folio))) { if (folio->mapping) @@ -474,9 +470,6 @@ static void dax_disassociate_entry(void *entry, struct address_space *mapping, { struct folio *folio = dax_to_folio(entry); - if (IS_ENABLED(CONFIG_FS_DAX_LIMITED)) - return; - if (dax_is_zero_entry(entry) || dax_is_empty_entry(entry)) return; @@ -719,7 +712,7 @@ retry: if (order > 0) flags |= DAX_PMD; - entry = dax_make_entry(pfn_to_pfn_t(0), flags); + entry = dax_make_entry(0, flags); dax_lock_entry(xas, entry); if (xas_error(xas)) goto out_unlock; @@ -768,12 +761,6 @@ struct page *dax_layout_busy_page_range(struct address_space *mapping, pgoff_t end_idx; XA_STATE(xas, &mapping->i_pages, start_idx); - /* - * In the 'limited' case get_user_pages() for dax is disabled. - */ - if (IS_ENABLED(CONFIG_FS_DAX_LIMITED)) - return NULL; - if (!dax_mapping(mapping)) return NULL; @@ -1053,7 +1040,7 @@ static bool dax_fault_is_synchronous(const struct iomap_iter *iter, * appropriate. */ static void *dax_insert_entry(struct xa_state *xas, struct vm_fault *vmf, - const struct iomap_iter *iter, void *entry, pfn_t pfn, + const struct iomap_iter *iter, void *entry, unsigned long pfn, unsigned long flags) { struct address_space *mapping = vmf->vma->vm_file->f_mapping; @@ -1251,7 +1238,7 @@ int dax_writeback_mapping_range(struct address_space *mapping, EXPORT_SYMBOL_GPL(dax_writeback_mapping_range); static int dax_iomap_direct_access(const struct iomap *iomap, loff_t pos, - size_t size, void **kaddr, pfn_t *pfnp) + size_t size, void **kaddr, unsigned long *pfnp) { pgoff_t pgoff = dax_iomap_pgoff(iomap, pos); int id, rc = 0; @@ -1269,7 +1256,7 @@ static int dax_iomap_direct_access(const struct iomap *iomap, loff_t pos, rc = -EINVAL; if (PFN_PHYS(length) < size) goto out; - if (pfn_t_to_pfn(*pfnp) & (PHYS_PFN(size)-1)) + if (*pfnp & (PHYS_PFN(size)-1)) goto out; rc = 0; @@ -1373,12 +1360,12 @@ static vm_fault_t dax_load_hole(struct xa_state *xas, struct vm_fault *vmf, { struct inode *inode = iter->inode; unsigned long vaddr = vmf->address; - pfn_t pfn = pfn_to_pfn_t(my_zero_pfn(vaddr)); + unsigned long pfn = my_zero_pfn(vaddr); vm_fault_t ret; *entry = dax_insert_entry(xas, vmf, iter, *entry, pfn, DAX_ZERO_PAGE); - ret = vmf_insert_page_mkwrite(vmf, pfn_t_to_page(pfn), false); + ret = vmf_insert_page_mkwrite(vmf, pfn_to_page(pfn), false); trace_dax_load_hole(inode, vmf, ret); return ret; } @@ -1388,51 +1375,24 @@ static vm_fault_t dax_pmd_load_hole(struct xa_state *xas, struct vm_fault *vmf, const struct iomap_iter *iter, void **entry) { struct address_space *mapping = vmf->vma->vm_file->f_mapping; - unsigned long pmd_addr = vmf->address & PMD_MASK; - struct vm_area_struct *vma = vmf->vma; struct inode *inode = mapping->host; - pgtable_t pgtable = NULL; struct folio *zero_folio; - spinlock_t *ptl; - pmd_t pmd_entry; - pfn_t pfn; + vm_fault_t ret; zero_folio = mm_get_huge_zero_folio(vmf->vma->vm_mm); - if (unlikely(!zero_folio)) - goto fallback; - - pfn = page_to_pfn_t(&zero_folio->page); - *entry = dax_insert_entry(xas, vmf, iter, *entry, pfn, - DAX_PMD | DAX_ZERO_PAGE); - - if (arch_needs_pgtable_deposit()) { - pgtable = pte_alloc_one(vma->vm_mm); - if (!pgtable) - return VM_FAULT_OOM; - } - - ptl = pmd_lock(vmf->vma->vm_mm, vmf->pmd); - if (!pmd_none(*(vmf->pmd))) { - spin_unlock(ptl); - goto fallback; + if (unlikely(!zero_folio)) { + trace_dax_pmd_load_hole_fallback(inode, vmf, zero_folio, *entry); + return VM_FAULT_FALLBACK; } - if (pgtable) { - pgtable_trans_huge_deposit(vma->vm_mm, vmf->pmd, pgtable); - mm_inc_nr_ptes(vma->vm_mm); - } - pmd_entry = folio_mk_pmd(zero_folio, vmf->vma->vm_page_prot); - set_pmd_at(vmf->vma->vm_mm, pmd_addr, vmf->pmd, pmd_entry); - spin_unlock(ptl); - trace_dax_pmd_load_hole(inode, vmf, zero_folio, *entry); - return VM_FAULT_NOPAGE; + *entry = dax_insert_entry(xas, vmf, iter, *entry, folio_pfn(zero_folio), + DAX_PMD | DAX_ZERO_PAGE); -fallback: - if (pgtable) - pte_free(vma->vm_mm, pgtable); - trace_dax_pmd_load_hole_fallback(inode, vmf, zero_folio, *entry); - return VM_FAULT_FALLBACK; + ret = vmf_insert_folio_pmd(vmf, zero_folio, false); + if (ret == VM_FAULT_NOPAGE) + trace_dax_pmd_load_hole(inode, vmf, zero_folio, *entry); + return ret; } #else static vm_fault_t dax_pmd_load_hole(struct xa_state *xas, struct vm_fault *vmf, @@ -1547,7 +1507,7 @@ static int dax_zero_iter(struct iomap_iter *iter, bool *did_zero) /* already zeroed? we're done. */ if (srcmap->type == IOMAP_HOLE || srcmap->type == IOMAP_UNWRITTEN) - return iomap_iter_advance(iter, &length); + return iomap_iter_advance(iter, length); /* * invalidate the pages whose sharing state is to be changed @@ -1576,10 +1536,10 @@ static int dax_zero_iter(struct iomap_iter *iter, bool *did_zero) if (ret < 0) return ret; - ret = iomap_iter_advance(iter, &length); + ret = iomap_iter_advance(iter, length); if (ret) return ret; - } while (length > 0); + } while ((length = iomap_length(iter)) > 0); if (did_zero) *did_zero = true; @@ -1637,7 +1597,7 @@ static int dax_iomap_iter(struct iomap_iter *iomi, struct iov_iter *iter) if (iomap->type == IOMAP_HOLE || iomap->type == IOMAP_UNWRITTEN) { done = iov_iter_zero(min(length, end - pos), iter); - return iomap_iter_advance(iomi, &done); + return iomap_iter_advance(iomi, done); } } @@ -1721,12 +1681,12 @@ static int dax_iomap_iter(struct iomap_iter *iomi, struct iov_iter *iter) xfer = dax_copy_to_iter(dax_dev, pgoff, kaddr, map_len, iter); - length = xfer; - ret = iomap_iter_advance(iomi, &length); + ret = iomap_iter_advance(iomi, xfer); if (!ret && xfer == 0) ret = -EFAULT; if (xfer < map_len) break; + length = iomap_length(iomi); } dax_read_unlock(id); @@ -1756,13 +1716,16 @@ dax_iomap_rw(struct kiocb *iocb, struct iov_iter *iter, loff_t done = 0; int ret; + if (WARN_ON_ONCE(iocb->ki_flags & IOCB_ATOMIC)) + return -EIO; + if (!iomi.len) return 0; if (iov_iter_rw(iter) == WRITE) { lockdep_assert_held_write(&iomi.inode->i_rwsem); iomi.flags |= IOMAP_WRITE; - } else { + } else if (!sb_rdonly(iomi.inode->i_sb)) { lockdep_assert_held(&iomi.inode->i_rwsem); } @@ -1791,7 +1754,8 @@ static vm_fault_t dax_fault_return(int error) * insertion for now and return the pfn so that caller can insert it after the * fsync is done. */ -static vm_fault_t dax_fault_synchronous_pfnp(pfn_t *pfnp, pfn_t pfn) +static vm_fault_t dax_fault_synchronous_pfnp(unsigned long *pfnp, + unsigned long pfn) { if (WARN_ON_ONCE(!pfnp)) return VM_FAULT_SIGBUS; @@ -1839,7 +1803,7 @@ static vm_fault_t dax_fault_cow_page(struct vm_fault *vmf, * @pmd: distinguish whether it is a pmd fault */ static vm_fault_t dax_fault_iter(struct vm_fault *vmf, - const struct iomap_iter *iter, pfn_t *pfnp, + const struct iomap_iter *iter, unsigned long *pfnp, struct xa_state *xas, void **entry, bool pmd) { const struct iomap *iomap = &iter->iomap; @@ -1850,7 +1814,7 @@ static vm_fault_t dax_fault_iter(struct vm_fault *vmf, unsigned long entry_flags = pmd ? DAX_PMD : 0; struct folio *folio; int ret, err = 0; - pfn_t pfn; + unsigned long pfn; void *kaddr; if (!pmd && vmf->cow_page) @@ -1887,16 +1851,15 @@ static vm_fault_t dax_fault_iter(struct vm_fault *vmf, folio_ref_inc(folio); if (pmd) - ret = vmf_insert_folio_pmd(vmf, pfn_folio(pfn_t_to_pfn(pfn)), - write); + ret = vmf_insert_folio_pmd(vmf, pfn_folio(pfn), write); else - ret = vmf_insert_page_mkwrite(vmf, pfn_t_to_page(pfn), write); + ret = vmf_insert_page_mkwrite(vmf, pfn_to_page(pfn), write); folio_put(folio); return ret; } -static vm_fault_t dax_iomap_pte_fault(struct vm_fault *vmf, pfn_t *pfnp, +static vm_fault_t dax_iomap_pte_fault(struct vm_fault *vmf, unsigned long *pfnp, int *iomap_errp, const struct iomap_ops *ops) { struct address_space *mapping = vmf->vma->vm_file->f_mapping; @@ -1937,7 +1900,7 @@ static vm_fault_t dax_iomap_pte_fault(struct vm_fault *vmf, pfn_t *pfnp, * the PTE we need to set up. If so just return and the fault will be * retried. */ - if (pmd_trans_huge(*vmf->pmd) || pmd_devmap(*vmf->pmd)) { + if (pmd_trans_huge(*vmf->pmd)) { ret = VM_FAULT_NOPAGE; goto unlock_entry; } @@ -1956,10 +1919,8 @@ static vm_fault_t dax_iomap_pte_fault(struct vm_fault *vmf, pfn_t *pfnp, ret |= VM_FAULT_MAJOR; } - if (!(ret & VM_FAULT_ERROR)) { - u64 length = PAGE_SIZE; - iter.status = iomap_iter_advance(&iter, &length); - } + if (!(ret & VM_FAULT_ERROR)) + iter.status = iomap_iter_advance(&iter, PAGE_SIZE); } if (iomap_errp) @@ -2008,7 +1969,7 @@ static bool dax_fault_check_fallback(struct vm_fault *vmf, struct xa_state *xas, return false; } -static vm_fault_t dax_iomap_pmd_fault(struct vm_fault *vmf, pfn_t *pfnp, +static vm_fault_t dax_iomap_pmd_fault(struct vm_fault *vmf, unsigned long *pfnp, const struct iomap_ops *ops) { struct address_space *mapping = vmf->vma->vm_file->f_mapping; @@ -2060,8 +2021,7 @@ static vm_fault_t dax_iomap_pmd_fault(struct vm_fault *vmf, pfn_t *pfnp, * the PMD we need to set up. If so just return and the fault will be * retried. */ - if (!pmd_none(*vmf->pmd) && !pmd_trans_huge(*vmf->pmd) && - !pmd_devmap(*vmf->pmd)) { + if (!pmd_none(*vmf->pmd) && !pmd_trans_huge(*vmf->pmd)) { ret = 0; goto unlock_entry; } @@ -2072,10 +2032,8 @@ static vm_fault_t dax_iomap_pmd_fault(struct vm_fault *vmf, pfn_t *pfnp, continue; /* actually breaks out of the loop */ ret = dax_fault_iter(vmf, &iter, pfnp, &xas, &entry, true); - if (ret != VM_FAULT_FALLBACK) { - u64 length = PMD_SIZE; - iter.status = iomap_iter_advance(&iter, &length); - } + if (ret != VM_FAULT_FALLBACK) + iter.status = iomap_iter_advance(&iter, PMD_SIZE); } unlock_entry: @@ -2090,7 +2048,7 @@ out: return ret; } #else -static vm_fault_t dax_iomap_pmd_fault(struct vm_fault *vmf, pfn_t *pfnp, +static vm_fault_t dax_iomap_pmd_fault(struct vm_fault *vmf, unsigned long *pfnp, const struct iomap_ops *ops) { return VM_FAULT_FALLBACK; @@ -2111,7 +2069,8 @@ static vm_fault_t dax_iomap_pmd_fault(struct vm_fault *vmf, pfn_t *pfnp, * successfully. */ vm_fault_t dax_iomap_fault(struct vm_fault *vmf, unsigned int order, - pfn_t *pfnp, int *iomap_errp, const struct iomap_ops *ops) + unsigned long *pfnp, int *iomap_errp, + const struct iomap_ops *ops) { if (order == 0) return dax_iomap_pte_fault(vmf, pfnp, iomap_errp, ops); @@ -2131,8 +2090,8 @@ EXPORT_SYMBOL_GPL(dax_iomap_fault); * This function inserts a writeable PTE or PMD entry into the page tables * for an mmaped DAX file. It also marks the page cache entry as dirty. */ -static vm_fault_t -dax_insert_pfn_mkwrite(struct vm_fault *vmf, pfn_t pfn, unsigned int order) +static vm_fault_t dax_insert_pfn_mkwrite(struct vm_fault *vmf, + unsigned long pfn, unsigned int order) { struct address_space *mapping = vmf->vma->vm_file->f_mapping; XA_STATE_ORDER(xas, &mapping->i_pages, vmf->pgoff, order); @@ -2154,7 +2113,7 @@ dax_insert_pfn_mkwrite(struct vm_fault *vmf, pfn_t pfn, unsigned int order) xas_set_mark(&xas, PAGECACHE_TAG_DIRTY); dax_lock_entry(&xas, entry); xas_unlock_irq(&xas); - folio = pfn_folio(pfn_t_to_pfn(pfn)); + folio = pfn_folio(pfn); folio_ref_inc(folio); if (order == 0) ret = vmf_insert_page_mkwrite(vmf, &folio->page, true); @@ -2181,7 +2140,7 @@ dax_insert_pfn_mkwrite(struct vm_fault *vmf, pfn_t pfn, unsigned int order) * table entry. */ vm_fault_t dax_finish_sync_fault(struct vm_fault *vmf, unsigned int order, - pfn_t pfn) + unsigned long pfn) { int err; loff_t start = ((loff_t)vmf->pgoff) << PAGE_SHIFT; @@ -2200,7 +2159,6 @@ static int dax_range_compare_iter(struct iomap_iter *it_src, const struct iomap *smap = &it_src->iomap; const struct iomap *dmap = &it_dest->iomap; loff_t pos1 = it_src->pos, pos2 = it_dest->pos; - u64 dest_len; void *saddr, *daddr; int id, ret; @@ -2233,10 +2191,9 @@ static int dax_range_compare_iter(struct iomap_iter *it_src, dax_read_unlock(id); advance: - dest_len = len; - ret = iomap_iter_advance(it_src, &len); + ret = iomap_iter_advance(it_src, len); if (!ret) - ret = iomap_iter_advance(it_dest, &dest_len); + ret = iomap_iter_advance(it_dest, len); return ret; out_unlock: |
