summaryrefslogtreecommitdiff
path: root/fs/dax.c
diff options
context:
space:
mode:
Diffstat (limited to 'fs/dax.c')
-rw-r--r--fs/dax.c147
1 files changed, 52 insertions, 95 deletions
diff --git a/fs/dax.c b/fs/dax.c
index ea0c35794bf9..289e6254aa30 100644
--- a/fs/dax.c
+++ b/fs/dax.c
@@ -20,12 +20,11 @@
#include <linux/sched/signal.h>
#include <linux/uio.h>
#include <linux/vmstat.h>
-#include <linux/pfn_t.h>
#include <linux/sizes.h>
#include <linux/mmu_notifier.h>
#include <linux/iomap.h>
#include <linux/rmap.h>
-#include <asm/pgalloc.h>
+#include <linux/pgalloc.h>
#define CREATE_TRACE_POINTS
#include <trace/events/fs_dax.h>
@@ -76,9 +75,9 @@ static struct folio *dax_to_folio(void *entry)
return page_folio(pfn_to_page(dax_to_pfn(entry)));
}
-static void *dax_make_entry(pfn_t pfn, unsigned long flags)
+static void *dax_make_entry(unsigned long pfn, unsigned long flags)
{
- return xa_mk_value(flags | (pfn_t_to_pfn(pfn) << DAX_SHIFT));
+ return xa_mk_value(flags | (pfn << DAX_SHIFT));
}
static bool dax_is_locked(void *entry)
@@ -449,9 +448,6 @@ static void dax_associate_entry(void *entry, struct address_space *mapping,
if (dax_is_zero_entry(entry) || dax_is_empty_entry(entry))
return;
- if (IS_ENABLED(CONFIG_FS_DAX_LIMITED))
- return;
-
index = linear_page_index(vma, address & ~(size - 1));
if (shared && (folio->mapping || dax_folio_is_shared(folio))) {
if (folio->mapping)
@@ -474,9 +470,6 @@ static void dax_disassociate_entry(void *entry, struct address_space *mapping,
{
struct folio *folio = dax_to_folio(entry);
- if (IS_ENABLED(CONFIG_FS_DAX_LIMITED))
- return;
-
if (dax_is_zero_entry(entry) || dax_is_empty_entry(entry))
return;
@@ -719,7 +712,7 @@ retry:
if (order > 0)
flags |= DAX_PMD;
- entry = dax_make_entry(pfn_to_pfn_t(0), flags);
+ entry = dax_make_entry(0, flags);
dax_lock_entry(xas, entry);
if (xas_error(xas))
goto out_unlock;
@@ -768,12 +761,6 @@ struct page *dax_layout_busy_page_range(struct address_space *mapping,
pgoff_t end_idx;
XA_STATE(xas, &mapping->i_pages, start_idx);
- /*
- * In the 'limited' case get_user_pages() for dax is disabled.
- */
- if (IS_ENABLED(CONFIG_FS_DAX_LIMITED))
- return NULL;
-
if (!dax_mapping(mapping))
return NULL;
@@ -1053,7 +1040,7 @@ static bool dax_fault_is_synchronous(const struct iomap_iter *iter,
* appropriate.
*/
static void *dax_insert_entry(struct xa_state *xas, struct vm_fault *vmf,
- const struct iomap_iter *iter, void *entry, pfn_t pfn,
+ const struct iomap_iter *iter, void *entry, unsigned long pfn,
unsigned long flags)
{
struct address_space *mapping = vmf->vma->vm_file->f_mapping;
@@ -1251,7 +1238,7 @@ int dax_writeback_mapping_range(struct address_space *mapping,
EXPORT_SYMBOL_GPL(dax_writeback_mapping_range);
static int dax_iomap_direct_access(const struct iomap *iomap, loff_t pos,
- size_t size, void **kaddr, pfn_t *pfnp)
+ size_t size, void **kaddr, unsigned long *pfnp)
{
pgoff_t pgoff = dax_iomap_pgoff(iomap, pos);
int id, rc = 0;
@@ -1269,7 +1256,7 @@ static int dax_iomap_direct_access(const struct iomap *iomap, loff_t pos,
rc = -EINVAL;
if (PFN_PHYS(length) < size)
goto out;
- if (pfn_t_to_pfn(*pfnp) & (PHYS_PFN(size)-1))
+ if (*pfnp & (PHYS_PFN(size)-1))
goto out;
rc = 0;
@@ -1373,12 +1360,12 @@ static vm_fault_t dax_load_hole(struct xa_state *xas, struct vm_fault *vmf,
{
struct inode *inode = iter->inode;
unsigned long vaddr = vmf->address;
- pfn_t pfn = pfn_to_pfn_t(my_zero_pfn(vaddr));
+ unsigned long pfn = my_zero_pfn(vaddr);
vm_fault_t ret;
*entry = dax_insert_entry(xas, vmf, iter, *entry, pfn, DAX_ZERO_PAGE);
- ret = vmf_insert_page_mkwrite(vmf, pfn_t_to_page(pfn), false);
+ ret = vmf_insert_page_mkwrite(vmf, pfn_to_page(pfn), false);
trace_dax_load_hole(inode, vmf, ret);
return ret;
}
@@ -1388,51 +1375,24 @@ static vm_fault_t dax_pmd_load_hole(struct xa_state *xas, struct vm_fault *vmf,
const struct iomap_iter *iter, void **entry)
{
struct address_space *mapping = vmf->vma->vm_file->f_mapping;
- unsigned long pmd_addr = vmf->address & PMD_MASK;
- struct vm_area_struct *vma = vmf->vma;
struct inode *inode = mapping->host;
- pgtable_t pgtable = NULL;
struct folio *zero_folio;
- spinlock_t *ptl;
- pmd_t pmd_entry;
- pfn_t pfn;
+ vm_fault_t ret;
zero_folio = mm_get_huge_zero_folio(vmf->vma->vm_mm);
- if (unlikely(!zero_folio))
- goto fallback;
-
- pfn = page_to_pfn_t(&zero_folio->page);
- *entry = dax_insert_entry(xas, vmf, iter, *entry, pfn,
- DAX_PMD | DAX_ZERO_PAGE);
-
- if (arch_needs_pgtable_deposit()) {
- pgtable = pte_alloc_one(vma->vm_mm);
- if (!pgtable)
- return VM_FAULT_OOM;
- }
-
- ptl = pmd_lock(vmf->vma->vm_mm, vmf->pmd);
- if (!pmd_none(*(vmf->pmd))) {
- spin_unlock(ptl);
- goto fallback;
+ if (unlikely(!zero_folio)) {
+ trace_dax_pmd_load_hole_fallback(inode, vmf, zero_folio, *entry);
+ return VM_FAULT_FALLBACK;
}
- if (pgtable) {
- pgtable_trans_huge_deposit(vma->vm_mm, vmf->pmd, pgtable);
- mm_inc_nr_ptes(vma->vm_mm);
- }
- pmd_entry = folio_mk_pmd(zero_folio, vmf->vma->vm_page_prot);
- set_pmd_at(vmf->vma->vm_mm, pmd_addr, vmf->pmd, pmd_entry);
- spin_unlock(ptl);
- trace_dax_pmd_load_hole(inode, vmf, zero_folio, *entry);
- return VM_FAULT_NOPAGE;
+ *entry = dax_insert_entry(xas, vmf, iter, *entry, folio_pfn(zero_folio),
+ DAX_PMD | DAX_ZERO_PAGE);
-fallback:
- if (pgtable)
- pte_free(vma->vm_mm, pgtable);
- trace_dax_pmd_load_hole_fallback(inode, vmf, zero_folio, *entry);
- return VM_FAULT_FALLBACK;
+ ret = vmf_insert_folio_pmd(vmf, zero_folio, false);
+ if (ret == VM_FAULT_NOPAGE)
+ trace_dax_pmd_load_hole(inode, vmf, zero_folio, *entry);
+ return ret;
}
#else
static vm_fault_t dax_pmd_load_hole(struct xa_state *xas, struct vm_fault *vmf,
@@ -1547,7 +1507,7 @@ static int dax_zero_iter(struct iomap_iter *iter, bool *did_zero)
/* already zeroed? we're done. */
if (srcmap->type == IOMAP_HOLE || srcmap->type == IOMAP_UNWRITTEN)
- return iomap_iter_advance(iter, &length);
+ return iomap_iter_advance(iter, length);
/*
* invalidate the pages whose sharing state is to be changed
@@ -1576,10 +1536,10 @@ static int dax_zero_iter(struct iomap_iter *iter, bool *did_zero)
if (ret < 0)
return ret;
- ret = iomap_iter_advance(iter, &length);
+ ret = iomap_iter_advance(iter, length);
if (ret)
return ret;
- } while (length > 0);
+ } while ((length = iomap_length(iter)) > 0);
if (did_zero)
*did_zero = true;
@@ -1637,7 +1597,7 @@ static int dax_iomap_iter(struct iomap_iter *iomi, struct iov_iter *iter)
if (iomap->type == IOMAP_HOLE || iomap->type == IOMAP_UNWRITTEN) {
done = iov_iter_zero(min(length, end - pos), iter);
- return iomap_iter_advance(iomi, &done);
+ return iomap_iter_advance(iomi, done);
}
}
@@ -1721,12 +1681,12 @@ static int dax_iomap_iter(struct iomap_iter *iomi, struct iov_iter *iter)
xfer = dax_copy_to_iter(dax_dev, pgoff, kaddr,
map_len, iter);
- length = xfer;
- ret = iomap_iter_advance(iomi, &length);
+ ret = iomap_iter_advance(iomi, xfer);
if (!ret && xfer == 0)
ret = -EFAULT;
if (xfer < map_len)
break;
+ length = iomap_length(iomi);
}
dax_read_unlock(id);
@@ -1756,13 +1716,16 @@ dax_iomap_rw(struct kiocb *iocb, struct iov_iter *iter,
loff_t done = 0;
int ret;
+ if (WARN_ON_ONCE(iocb->ki_flags & IOCB_ATOMIC))
+ return -EIO;
+
if (!iomi.len)
return 0;
if (iov_iter_rw(iter) == WRITE) {
lockdep_assert_held_write(&iomi.inode->i_rwsem);
iomi.flags |= IOMAP_WRITE;
- } else {
+ } else if (!sb_rdonly(iomi.inode->i_sb)) {
lockdep_assert_held(&iomi.inode->i_rwsem);
}
@@ -1791,7 +1754,8 @@ static vm_fault_t dax_fault_return(int error)
* insertion for now and return the pfn so that caller can insert it after the
* fsync is done.
*/
-static vm_fault_t dax_fault_synchronous_pfnp(pfn_t *pfnp, pfn_t pfn)
+static vm_fault_t dax_fault_synchronous_pfnp(unsigned long *pfnp,
+ unsigned long pfn)
{
if (WARN_ON_ONCE(!pfnp))
return VM_FAULT_SIGBUS;
@@ -1839,7 +1803,7 @@ static vm_fault_t dax_fault_cow_page(struct vm_fault *vmf,
* @pmd: distinguish whether it is a pmd fault
*/
static vm_fault_t dax_fault_iter(struct vm_fault *vmf,
- const struct iomap_iter *iter, pfn_t *pfnp,
+ const struct iomap_iter *iter, unsigned long *pfnp,
struct xa_state *xas, void **entry, bool pmd)
{
const struct iomap *iomap = &iter->iomap;
@@ -1850,7 +1814,7 @@ static vm_fault_t dax_fault_iter(struct vm_fault *vmf,
unsigned long entry_flags = pmd ? DAX_PMD : 0;
struct folio *folio;
int ret, err = 0;
- pfn_t pfn;
+ unsigned long pfn;
void *kaddr;
if (!pmd && vmf->cow_page)
@@ -1887,16 +1851,15 @@ static vm_fault_t dax_fault_iter(struct vm_fault *vmf,
folio_ref_inc(folio);
if (pmd)
- ret = vmf_insert_folio_pmd(vmf, pfn_folio(pfn_t_to_pfn(pfn)),
- write);
+ ret = vmf_insert_folio_pmd(vmf, pfn_folio(pfn), write);
else
- ret = vmf_insert_page_mkwrite(vmf, pfn_t_to_page(pfn), write);
+ ret = vmf_insert_page_mkwrite(vmf, pfn_to_page(pfn), write);
folio_put(folio);
return ret;
}
-static vm_fault_t dax_iomap_pte_fault(struct vm_fault *vmf, pfn_t *pfnp,
+static vm_fault_t dax_iomap_pte_fault(struct vm_fault *vmf, unsigned long *pfnp,
int *iomap_errp, const struct iomap_ops *ops)
{
struct address_space *mapping = vmf->vma->vm_file->f_mapping;
@@ -1937,7 +1900,7 @@ static vm_fault_t dax_iomap_pte_fault(struct vm_fault *vmf, pfn_t *pfnp,
* the PTE we need to set up. If so just return and the fault will be
* retried.
*/
- if (pmd_trans_huge(*vmf->pmd) || pmd_devmap(*vmf->pmd)) {
+ if (pmd_trans_huge(*vmf->pmd)) {
ret = VM_FAULT_NOPAGE;
goto unlock_entry;
}
@@ -1956,10 +1919,8 @@ static vm_fault_t dax_iomap_pte_fault(struct vm_fault *vmf, pfn_t *pfnp,
ret |= VM_FAULT_MAJOR;
}
- if (!(ret & VM_FAULT_ERROR)) {
- u64 length = PAGE_SIZE;
- iter.status = iomap_iter_advance(&iter, &length);
- }
+ if (!(ret & VM_FAULT_ERROR))
+ iter.status = iomap_iter_advance(&iter, PAGE_SIZE);
}
if (iomap_errp)
@@ -2008,7 +1969,7 @@ static bool dax_fault_check_fallback(struct vm_fault *vmf, struct xa_state *xas,
return false;
}
-static vm_fault_t dax_iomap_pmd_fault(struct vm_fault *vmf, pfn_t *pfnp,
+static vm_fault_t dax_iomap_pmd_fault(struct vm_fault *vmf, unsigned long *pfnp,
const struct iomap_ops *ops)
{
struct address_space *mapping = vmf->vma->vm_file->f_mapping;
@@ -2060,8 +2021,7 @@ static vm_fault_t dax_iomap_pmd_fault(struct vm_fault *vmf, pfn_t *pfnp,
* the PMD we need to set up. If so just return and the fault will be
* retried.
*/
- if (!pmd_none(*vmf->pmd) && !pmd_trans_huge(*vmf->pmd) &&
- !pmd_devmap(*vmf->pmd)) {
+ if (!pmd_none(*vmf->pmd) && !pmd_trans_huge(*vmf->pmd)) {
ret = 0;
goto unlock_entry;
}
@@ -2072,10 +2032,8 @@ static vm_fault_t dax_iomap_pmd_fault(struct vm_fault *vmf, pfn_t *pfnp,
continue; /* actually breaks out of the loop */
ret = dax_fault_iter(vmf, &iter, pfnp, &xas, &entry, true);
- if (ret != VM_FAULT_FALLBACK) {
- u64 length = PMD_SIZE;
- iter.status = iomap_iter_advance(&iter, &length);
- }
+ if (ret != VM_FAULT_FALLBACK)
+ iter.status = iomap_iter_advance(&iter, PMD_SIZE);
}
unlock_entry:
@@ -2090,7 +2048,7 @@ out:
return ret;
}
#else
-static vm_fault_t dax_iomap_pmd_fault(struct vm_fault *vmf, pfn_t *pfnp,
+static vm_fault_t dax_iomap_pmd_fault(struct vm_fault *vmf, unsigned long *pfnp,
const struct iomap_ops *ops)
{
return VM_FAULT_FALLBACK;
@@ -2111,7 +2069,8 @@ static vm_fault_t dax_iomap_pmd_fault(struct vm_fault *vmf, pfn_t *pfnp,
* successfully.
*/
vm_fault_t dax_iomap_fault(struct vm_fault *vmf, unsigned int order,
- pfn_t *pfnp, int *iomap_errp, const struct iomap_ops *ops)
+ unsigned long *pfnp, int *iomap_errp,
+ const struct iomap_ops *ops)
{
if (order == 0)
return dax_iomap_pte_fault(vmf, pfnp, iomap_errp, ops);
@@ -2131,8 +2090,8 @@ EXPORT_SYMBOL_GPL(dax_iomap_fault);
* This function inserts a writeable PTE or PMD entry into the page tables
* for an mmaped DAX file. It also marks the page cache entry as dirty.
*/
-static vm_fault_t
-dax_insert_pfn_mkwrite(struct vm_fault *vmf, pfn_t pfn, unsigned int order)
+static vm_fault_t dax_insert_pfn_mkwrite(struct vm_fault *vmf,
+ unsigned long pfn, unsigned int order)
{
struct address_space *mapping = vmf->vma->vm_file->f_mapping;
XA_STATE_ORDER(xas, &mapping->i_pages, vmf->pgoff, order);
@@ -2154,7 +2113,7 @@ dax_insert_pfn_mkwrite(struct vm_fault *vmf, pfn_t pfn, unsigned int order)
xas_set_mark(&xas, PAGECACHE_TAG_DIRTY);
dax_lock_entry(&xas, entry);
xas_unlock_irq(&xas);
- folio = pfn_folio(pfn_t_to_pfn(pfn));
+ folio = pfn_folio(pfn);
folio_ref_inc(folio);
if (order == 0)
ret = vmf_insert_page_mkwrite(vmf, &folio->page, true);
@@ -2181,7 +2140,7 @@ dax_insert_pfn_mkwrite(struct vm_fault *vmf, pfn_t pfn, unsigned int order)
* table entry.
*/
vm_fault_t dax_finish_sync_fault(struct vm_fault *vmf, unsigned int order,
- pfn_t pfn)
+ unsigned long pfn)
{
int err;
loff_t start = ((loff_t)vmf->pgoff) << PAGE_SHIFT;
@@ -2200,7 +2159,6 @@ static int dax_range_compare_iter(struct iomap_iter *it_src,
const struct iomap *smap = &it_src->iomap;
const struct iomap *dmap = &it_dest->iomap;
loff_t pos1 = it_src->pos, pos2 = it_dest->pos;
- u64 dest_len;
void *saddr, *daddr;
int id, ret;
@@ -2233,10 +2191,9 @@ static int dax_range_compare_iter(struct iomap_iter *it_src,
dax_read_unlock(id);
advance:
- dest_len = len;
- ret = iomap_iter_advance(it_src, &len);
+ ret = iomap_iter_advance(it_src, len);
if (!ret)
- ret = iomap_iter_advance(it_dest, &dest_len);
+ ret = iomap_iter_advance(it_dest, len);
return ret;
out_unlock: