diff options
Diffstat (limited to 'include/linux')
| -rw-r--r-- | include/linux/mm.h | 9 | ||||
| -rw-r--r-- | include/linux/rmap.h | 66 |
2 files changed, 68 insertions, 7 deletions
diff --git a/include/linux/mm.h b/include/linux/mm.h index e98ef2cb1176..8a5ad9d050bf 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -2999,8 +2999,8 @@ static inline int vm_fault_to_errno(vm_fault_t vm_fault, int foll_flags) * PageAnonExclusive() has to protect against concurrent GUP: * * Ordinary GUP: Using the PT lock * * GUP-fast and fork(): mm->write_protect_seq - * * GUP-fast and KSM or temporary unmapping (swap, migration): - * clear/invalidate+flush of the page table entry + * * GUP-fast and KSM or temporary unmapping (swap, migration): see + * page_try_share_anon_rmap() * * Must be called with the (sub)page that's actually referenced via the * page table entry, which might not necessarily be the head page for a @@ -3021,6 +3021,11 @@ static inline bool gup_must_unshare(unsigned int flags, struct page *page) */ if (!PageAnon(page)) return false; + + /* Paired with a memory barrier in page_try_share_anon_rmap(). */ + if (IS_ENABLED(CONFIG_HAVE_FAST_GUP)) + smp_rmb(); + /* * Note that PageKsm() pages cannot be exclusive, and consequently, * cannot get pinned. diff --git a/include/linux/rmap.h b/include/linux/rmap.h index bf80adca980b..72b2bcc37f73 100644 --- a/include/linux/rmap.h +++ b/include/linux/rmap.h @@ -267,7 +267,7 @@ dup: * @page: the exclusive anonymous page to try marking possibly shared * * The caller needs to hold the PT lock and has to have the page table entry - * cleared/invalidated+flushed, to properly sync against GUP-fast. + * cleared/invalidated. * * This is similar to page_try_dup_anon_rmap(), however, not used during fork() * to duplicate a mapping, but instead to prepare for KSM or temporarily @@ -283,12 +283,68 @@ static inline int page_try_share_anon_rmap(struct page *page) { VM_BUG_ON_PAGE(!PageAnon(page) || !PageAnonExclusive(page), page); - /* See page_try_dup_anon_rmap(). */ - if (likely(!is_device_private_page(page) && - unlikely(page_maybe_dma_pinned(page)))) - return -EBUSY; + /* device private pages cannot get pinned via GUP. */ + if (unlikely(is_device_private_page(page))) { + ClearPageAnonExclusive(page); + return 0; + } + /* + * We have to make sure that when we clear PageAnonExclusive, that + * the page is not pinned and that concurrent GUP-fast won't succeed in + * concurrently pinning the page. + * + * Conceptually, PageAnonExclusive clearing consists of: + * (A1) Clear PTE + * (A2) Check if the page is pinned; back off if so. + * (A3) Clear PageAnonExclusive + * (A4) Restore PTE (optional, but certainly not writable) + * + * When clearing PageAnonExclusive, we cannot possibly map the page + * writable again, because anon pages that may be shared must never + * be writable. So in any case, if the PTE was writable it cannot + * be writable anymore afterwards and there would be a PTE change. Only + * if the PTE wasn't writable, there might not be a PTE change. + * + * Conceptually, GUP-fast pinning of an anon page consists of: + * (B1) Read the PTE + * (B2) FOLL_WRITE: check if the PTE is not writable; back off if so. + * (B3) Pin the mapped page + * (B4) Check if the PTE changed by re-reading it; back off if so. + * (B5) If the original PTE is not writable, check if + * PageAnonExclusive is not set; back off if so. + * + * If the PTE was writable, we only have to make sure that GUP-fast + * observes a PTE change and properly backs off. + * + * If the PTE was not writable, we have to make sure that GUP-fast either + * detects a (temporary) PTE change or that PageAnonExclusive is cleared + * and properly backs off. + * + * Consequently, when clearing PageAnonExclusive(), we have to make + * sure that (A1), (A2)/(A3) and (A4) happen in the right memory + * order. In GUP-fast pinning code, we have to make sure that (B3),(B4) + * and (B5) happen in the right memory order. + * + * We assume that there might not be a memory barrier after + * clearing/invalidating the PTE (A1) and before restoring the PTE (A4), + * so we use explicit ones here. + */ + + /* Paired with the memory barrier in try_grab_folio(). */ + if (IS_ENABLED(CONFIG_HAVE_FAST_GUP)) + smp_mb(); + + if (unlikely(page_maybe_dma_pinned(page))) + return -EBUSY; ClearPageAnonExclusive(page); + + /* + * This is conceptually a smp_wmb() paired with the smp_rmb() in + * gup_must_unshare(). + */ + if (IS_ENABLED(CONFIG_HAVE_FAST_GUP)) + smp_mb__after_atomic(); return 0; } |
