summaryrefslogtreecommitdiff
path: root/fs/userfaultfd.c
diff options
context:
space:
mode:
Diffstat (limited to 'fs/userfaultfd.c')
-rw-r--r--fs/userfaultfd.c107
1 files changed, 89 insertions, 18 deletions
diff --git a/fs/userfaultfd.c b/fs/userfaultfd.c
index 886085b47c75..1c713fd5b3e6 100644
--- a/fs/userfaultfd.c
+++ b/fs/userfaultfd.c
@@ -178,7 +178,8 @@ static inline void msg_init(struct uffd_msg *msg)
static inline struct uffd_msg userfault_msg(unsigned long address,
unsigned int flags,
- unsigned long reason)
+ unsigned long reason,
+ unsigned int features)
{
struct uffd_msg msg;
msg_init(&msg);
@@ -202,6 +203,8 @@ static inline struct uffd_msg userfault_msg(unsigned long address,
* write protect fault.
*/
msg.arg.pagefault.flags |= UFFD_PAGEFAULT_FLAG_WP;
+ if (features & UFFD_FEATURE_THREAD_ID)
+ msg.arg.pagefault.feat.ptid = task_pid_vnr(current);
return msg;
}
@@ -370,13 +373,34 @@ int handle_userfault(struct vm_fault *vmf, unsigned long reason)
VM_BUG_ON(reason & ~(VM_UFFD_MISSING|VM_UFFD_WP));
VM_BUG_ON(!(reason & VM_UFFD_MISSING) ^ !!(reason & VM_UFFD_WP));
+ if (ctx->features & UFFD_FEATURE_SIGBUS)
+ goto out;
+
/*
* If it's already released don't get it. This avoids to loop
* in __get_user_pages if userfaultfd_release waits on the
* caller of handle_userfault to release the mmap_sem.
*/
- if (unlikely(ACCESS_ONCE(ctx->released)))
+ if (unlikely(ACCESS_ONCE(ctx->released))) {
+ /*
+ * Don't return VM_FAULT_SIGBUS in this case, so a non
+ * cooperative manager can close the uffd after the
+ * last UFFDIO_COPY, without risking to trigger an
+ * involuntary SIGBUS if the process was starting the
+ * userfaultfd while the userfaultfd was still armed
+ * (but after the last UFFDIO_COPY). If the uffd
+ * wasn't already closed when the userfault reached
+ * this point, that would normally be solved by
+ * userfaultfd_must_wait returning 'false'.
+ *
+ * If we were to return VM_FAULT_SIGBUS here, the non
+ * cooperative manager would be instead forced to
+ * always call UFFDIO_UNREGISTER before it can safely
+ * close the uffd.
+ */
+ ret = VM_FAULT_NOPAGE;
goto out;
+ }
/*
* Check that we can return VM_FAULT_RETRY.
@@ -419,7 +443,8 @@ int handle_userfault(struct vm_fault *vmf, unsigned long reason)
init_waitqueue_func_entry(&uwq.wq, userfaultfd_wake_function);
uwq.wq.private = current;
- uwq.msg = userfault_msg(vmf->address, vmf->flags, reason);
+ uwq.msg = userfault_msg(vmf->address, vmf->flags, reason,
+ ctx->features);
uwq.ctx = ctx;
uwq.waken = false;
@@ -563,6 +588,12 @@ static void userfaultfd_event_wait_completion(struct userfaultfd_ctx *ctx,
break;
if (ACCESS_ONCE(ctx->released) ||
fatal_signal_pending(current)) {
+ /*
+ * &ewq->wq may be queued in fork_event, but
+ * __remove_wait_queue ignores the head
+ * parameter. It would be a problem if it
+ * didn't.
+ */
__remove_wait_queue(&ctx->event_wqh, &ewq->wq);
if (ewq->msg.event == UFFD_EVENT_FORK) {
struct userfaultfd_ctx *new;
@@ -1036,6 +1067,12 @@ static ssize_t userfaultfd_ctx_read(struct userfaultfd_ctx *ctx, int no_wait,
(unsigned long)
uwq->msg.arg.reserved.reserved1;
list_move(&uwq->wq.entry, &fork_event);
+ /*
+ * fork_nctx can be freed as soon as
+ * we drop the lock, unless we take a
+ * reference on it.
+ */
+ userfaultfd_ctx_get(fork_nctx);
spin_unlock(&ctx->event_wqh.lock);
ret = 0;
break;
@@ -1066,19 +1103,53 @@ static ssize_t userfaultfd_ctx_read(struct userfaultfd_ctx *ctx, int no_wait,
if (!ret && msg->event == UFFD_EVENT_FORK) {
ret = resolve_userfault_fork(ctx, fork_nctx, msg);
+ spin_lock(&ctx->event_wqh.lock);
+ if (!list_empty(&fork_event)) {
+ /*
+ * The fork thread didn't abort, so we can
+ * drop the temporary refcount.
+ */
+ userfaultfd_ctx_put(fork_nctx);
- if (!ret) {
- spin_lock(&ctx->event_wqh.lock);
- if (!list_empty(&fork_event)) {
- uwq = list_first_entry(&fork_event,
- typeof(*uwq),
- wq.entry);
- list_del(&uwq->wq.entry);
- __add_wait_queue(&ctx->event_wqh, &uwq->wq);
+ uwq = list_first_entry(&fork_event,
+ typeof(*uwq),
+ wq.entry);
+ /*
+ * If fork_event list wasn't empty and in turn
+ * the event wasn't already released by fork
+ * (the event is allocated on fork kernel
+ * stack), put the event back to its place in
+ * the event_wq. fork_event head will be freed
+ * as soon as we return so the event cannot
+ * stay queued there no matter the current
+ * "ret" value.
+ */
+ list_del(&uwq->wq.entry);
+ __add_wait_queue(&ctx->event_wqh, &uwq->wq);
+
+ /*
+ * Leave the event in the waitqueue and report
+ * error to userland if we failed to resolve
+ * the userfault fork.
+ */
+ if (likely(!ret))
userfaultfd_event_complete(ctx, uwq);
- }
- spin_unlock(&ctx->event_wqh.lock);
+ } else {
+ /*
+ * Here the fork thread aborted and the
+ * refcount from the fork thread on fork_nctx
+ * has already been released. We still hold
+ * the reference we took before releasing the
+ * lock above. If resolve_userfault_fork
+ * failed we've to drop it because the
+ * fork_nctx has to be freed in such case. If
+ * it succeeded we'll hold it because the new
+ * uffd references it.
+ */
+ if (ret)
+ userfaultfd_ctx_put(fork_nctx);
}
+ spin_unlock(&ctx->event_wqh.lock);
}
return ret;
@@ -1194,7 +1265,7 @@ static int userfaultfd_register(struct userfaultfd_ctx *ctx,
struct uffdio_register __user *user_uffdio_register;
unsigned long vm_flags, new_flags;
bool found;
- bool non_anon_pages;
+ bool basic_ioctls;
unsigned long start, end, vma_end;
user_uffdio_register = (struct uffdio_register __user *) arg;
@@ -1260,7 +1331,7 @@ static int userfaultfd_register(struct userfaultfd_ctx *ctx,
* Search for not compatible vmas.
*/
found = false;
- non_anon_pages = false;
+ basic_ioctls = false;
for (cur = vma; cur && cur->vm_start < end; cur = cur->vm_next) {
cond_resched();
@@ -1299,8 +1370,8 @@ static int userfaultfd_register(struct userfaultfd_ctx *ctx,
/*
* Note vmas containing huge pages
*/
- if (is_vm_hugetlb_page(cur) || vma_is_shmem(cur))
- non_anon_pages = true;
+ if (is_vm_hugetlb_page(cur))
+ basic_ioctls = true;
found = true;
}
@@ -1371,7 +1442,7 @@ out_unlock:
* userland which ioctls methods are guaranteed to
* succeed on this range.
*/
- if (put_user(non_anon_pages ? UFFD_API_RANGE_IOCTLS_BASIC :
+ if (put_user(basic_ioctls ? UFFD_API_RANGE_IOCTLS_BASIC :
UFFD_API_RANGE_IOCTLS,
&user_uffdio_register->ioctls))
ret = -EFAULT;