diff options
author | Linus Torvalds <torvalds@linux-foundation.org> | 2016-12-15 04:25:18 +0300 |
---|---|---|
committer | Linus Torvalds <torvalds@linux-foundation.org> | 2016-12-15 04:25:18 +0300 |
commit | a57cb1c1d7974c62a5c80f7869e35b492ace12cd (patch) | |
tree | 5a42ee9a668f171143464bc86013954c1bbe94ad /ipc/sem.c | |
parent | cf1b3341afab9d3ad02a76b3a619ea027dcf4e28 (diff) | |
parent | e1e14ab8411df344a17687821f8f78f0a1e73cbb (diff) | |
download | linux-a57cb1c1d7974c62a5c80f7869e35b492ace12cd.tar.xz |
Merge branch 'akpm' (patches from Andrew)
Merge more updates from Andrew Morton:
- a few misc things
- kexec updates
- DMA-mapping updates to better support networking DMA operations
- IPC updates
- various MM changes to improve DAX fault handling
- lots of radix-tree changes, mainly to the test suite. All leading up
to reimplementing the IDA/IDR code to be a wrapper layer over the
radix-tree. However the final trigger-pulling patch is held off for
4.11.
* emailed patches from Andrew Morton <akpm@linux-foundation.org>: (114 commits)
radix tree test suite: delete unused rcupdate.c
radix tree test suite: add new tag check
radix-tree: ensure counts are initialised
radix tree test suite: cache recently freed objects
radix tree test suite: add some more functionality
idr: reduce the number of bits per level from 8 to 6
rxrpc: abstract away knowledge of IDR internals
tpm: use idr_find(), not idr_find_slowpath()
idr: add ida_is_empty
radix tree test suite: check multiorder iteration
radix-tree: fix replacement for multiorder entries
radix-tree: add radix_tree_split_preload()
radix-tree: add radix_tree_split
radix-tree: add radix_tree_join
radix-tree: delete radix_tree_range_tag_if_tagged()
radix-tree: delete radix_tree_locate_item()
radix-tree: improve multiorder iterators
btrfs: fix race in btrfs_free_dummy_fs_info()
radix-tree: improve dump output
radix-tree: make radix_tree_find_next_bit more useful
...
Diffstat (limited to 'ipc/sem.c')
-rw-r--r-- | ipc/sem.c | 512 |
1 files changed, 236 insertions, 276 deletions
diff --git a/ipc/sem.c b/ipc/sem.c index 10b94bc59d4a..e08b94851922 100644 --- a/ipc/sem.c +++ b/ipc/sem.c @@ -11,6 +11,7 @@ * (c) 2001 Red Hat Inc * Lockless wakeup * (c) 2003 Manfred Spraul <manfred@colorfullife.com> + * (c) 2016 Davidlohr Bueso <dave@stgolabs.net> * Further wakeup optimizations, documentation * (c) 2010 Manfred Spraul <manfred@colorfullife.com> * @@ -53,15 +54,11 @@ * Semaphores are actively given to waiting tasks (necessary for FIFO). * (see update_queue()) * - To improve the scalability, the actual wake-up calls are performed after - * dropping all locks. (see wake_up_sem_queue_prepare(), - * wake_up_sem_queue_do()) + * dropping all locks. (see wake_up_sem_queue_prepare()) * - All work is done by the waker, the woken up task does not have to do * anything - not even acquiring a lock or dropping a refcount. * - A woken up task may not even touch the semaphore array anymore, it may * have been destroyed already by a semctl(RMID). - * - The synchronizations between wake-ups due to a timeout/signal and a - * wake-up due to a completed semaphore operation is achieved by using an - * intermediate state (IN_WAKEUP). * - UNDO values are stored in an array (one per process and per * semaphore array, lazily allocated). For backwards compatibility, multiple * modes for the UNDO variables are supported (per process, per thread) @@ -118,7 +115,8 @@ struct sem_queue { struct sembuf *sops; /* array of pending operations */ struct sembuf *blocking; /* the operation that blocked */ int nsops; /* number of operations */ - int alter; /* does *sops alter the array? */ + bool alter; /* does *sops alter the array? */ + bool dupsop; /* sops on more than one sem_num */ }; /* Each task has a list of undo requests. They are executed automatically @@ -416,29 +414,6 @@ static inline void sem_unlock(struct sem_array *sma, int locknum) * * The caller holds the RCU read lock. */ -static inline struct sem_array *sem_obtain_lock(struct ipc_namespace *ns, - int id, struct sembuf *sops, int nsops, int *locknum) -{ - struct kern_ipc_perm *ipcp; - struct sem_array *sma; - - ipcp = ipc_obtain_object_idr(&sem_ids(ns), id); - if (IS_ERR(ipcp)) - return ERR_CAST(ipcp); - - sma = container_of(ipcp, struct sem_array, sem_perm); - *locknum = sem_lock(sma, sops, nsops); - - /* ipc_rmid() may have already freed the ID while sem_lock - * was spinning: verify that the structure is still valid - */ - if (ipc_valid_object(ipcp)) - return container_of(ipcp, struct sem_array, sem_perm); - - sem_unlock(sma, *locknum); - return ERR_PTR(-EINVAL); -} - static inline struct sem_array *sem_obtain_object(struct ipc_namespace *ns, int id) { struct kern_ipc_perm *ipcp = ipc_obtain_object_idr(&sem_ids(ns), id); @@ -471,40 +446,6 @@ static inline void sem_rmid(struct ipc_namespace *ns, struct sem_array *s) ipc_rmid(&sem_ids(ns), &s->sem_perm); } -/* - * Lockless wakeup algorithm: - * Without the check/retry algorithm a lockless wakeup is possible: - * - queue.status is initialized to -EINTR before blocking. - * - wakeup is performed by - * * unlinking the queue entry from the pending list - * * setting queue.status to IN_WAKEUP - * This is the notification for the blocked thread that a - * result value is imminent. - * * call wake_up_process - * * set queue.status to the final value. - * - the previously blocked thread checks queue.status: - * * if it's IN_WAKEUP, then it must wait until the value changes - * * if it's not -EINTR, then the operation was completed by - * update_queue. semtimedop can return queue.status without - * performing any operation on the sem array. - * * otherwise it must acquire the spinlock and check what's up. - * - * The two-stage algorithm is necessary to protect against the following - * races: - * - if queue.status is set after wake_up_process, then the woken up idle - * thread could race forward and try (and fail) to acquire sma->lock - * before update_queue had a chance to set queue.status - * - if queue.status is written before wake_up_process and if the - * blocked process is woken up by a signal between writing - * queue.status and the wake_up_process, then the woken up - * process could return from semtimedop and die by calling - * sys_exit before wake_up_process is called. Then wake_up_process - * will oops, because the task structure is already invalid. - * (yes, this happened on s390 with sysv msg). - * - */ -#define IN_WAKEUP 1 - /** * newary - Create a new semaphore set * @ns: namespace @@ -624,15 +565,23 @@ SYSCALL_DEFINE3(semget, key_t, key, int, nsems, int, semflg) } /** - * perform_atomic_semop - Perform (if possible) a semaphore operation + * perform_atomic_semop[_slow] - Attempt to perform semaphore + * operations on a given array. * @sma: semaphore array * @q: struct sem_queue that describes the operation * + * Caller blocking are as follows, based the value + * indicated by the semaphore operation (sem_op): + * + * (1) >0 never blocks. + * (2) 0 (wait-for-zero operation): semval is non-zero. + * (3) <0 attempting to decrement semval to a value smaller than zero. + * * Returns 0 if the operation was possible. * Returns 1 if the operation is impossible, the caller must sleep. - * Negative values are error codes. + * Returns <0 for error codes. */ -static int perform_atomic_semop(struct sem_array *sma, struct sem_queue *q) +static int perform_atomic_semop_slow(struct sem_array *sma, struct sem_queue *q) { int result, sem_op, nsops, pid; struct sembuf *sop; @@ -703,51 +652,84 @@ undo: return result; } -/** wake_up_sem_queue_prepare(q, error): Prepare wake-up - * @q: queue entry that must be signaled - * @error: Error value for the signal - * - * Prepare the wake-up of the queue entry q. - */ -static void wake_up_sem_queue_prepare(struct list_head *pt, - struct sem_queue *q, int error) +static int perform_atomic_semop(struct sem_array *sma, struct sem_queue *q) { - if (list_empty(pt)) { - /* - * Hold preempt off so that we don't get preempted and have the - * wakee busy-wait until we're scheduled back on. - */ - preempt_disable(); + int result, sem_op, nsops; + struct sembuf *sop; + struct sem *curr; + struct sembuf *sops; + struct sem_undo *un; + + sops = q->sops; + nsops = q->nsops; + un = q->undo; + + if (unlikely(q->dupsop)) + return perform_atomic_semop_slow(sma, q); + + /* + * We scan the semaphore set twice, first to ensure that the entire + * operation can succeed, therefore avoiding any pointless writes + * to shared memory and having to undo such changes in order to block + * until the operations can go through. + */ + for (sop = sops; sop < sops + nsops; sop++) { + curr = sma->sem_base + sop->sem_num; + sem_op = sop->sem_op; + result = curr->semval; + + if (!sem_op && result) + goto would_block; /* wait-for-zero */ + + result += sem_op; + if (result < 0) + goto would_block; + + if (result > SEMVMX) + return -ERANGE; + + if (sop->sem_flg & SEM_UNDO) { + int undo = un->semadj[sop->sem_num] - sem_op; + + /* Exceeding the undo range is an error. */ + if (undo < (-SEMAEM - 1) || undo > SEMAEM) + return -ERANGE; + } + } + + for (sop = sops; sop < sops + nsops; sop++) { + curr = sma->sem_base + sop->sem_num; + sem_op = sop->sem_op; + result = curr->semval; + + if (sop->sem_flg & SEM_UNDO) { + int undo = un->semadj[sop->sem_num] - sem_op; + + un->semadj[sop->sem_num] = undo; + } + curr->semval += sem_op; + curr->sempid = q->pid; } - q->status = IN_WAKEUP; - q->pid = error; - list_add_tail(&q->list, pt); + return 0; + +would_block: + q->blocking = sop; + return sop->sem_flg & IPC_NOWAIT ? -EAGAIN : 1; } -/** - * wake_up_sem_queue_do - do the actual wake-up - * @pt: list of tasks to be woken up - * - * Do the actual wake-up. - * The function is called without any locks held, thus the semaphore array - * could be destroyed already and the tasks can disappear as soon as the - * status is set to the actual return code. - */ -static void wake_up_sem_queue_do(struct list_head *pt) +static inline void wake_up_sem_queue_prepare(struct sem_queue *q, int error, + struct wake_q_head *wake_q) { - struct sem_queue *q, *t; - int did_something; - - did_something = !list_empty(pt); - list_for_each_entry_safe(q, t, pt, list) { - wake_up_process(q->sleeper); - /* q can disappear immediately after writing q->status. */ - smp_wmb(); - q->status = q->pid; - } - if (did_something) - preempt_enable(); + wake_q_add(wake_q, q->sleeper); + /* + * Rely on the above implicit barrier, such that we can + * ensure that we hold reference to the task before setting + * q->status. Otherwise we could race with do_exit if the + * task is awoken by an external event before calling + * wake_up_process(). + */ + WRITE_ONCE(q->status, error); } static void unlink_queue(struct sem_array *sma, struct sem_queue *q) @@ -767,7 +749,7 @@ static void unlink_queue(struct sem_array *sma, struct sem_queue *q) * modified the array. * Note that wait-for-zero operations are handled without restart. */ -static int check_restart(struct sem_array *sma, struct sem_queue *q) +static inline int check_restart(struct sem_array *sma, struct sem_queue *q) { /* pending complex alter operations are too difficult to analyse */ if (!list_empty(&sma->pending_alter)) @@ -795,21 +777,20 @@ static int check_restart(struct sem_array *sma, struct sem_queue *q) * wake_const_ops - wake up non-alter tasks * @sma: semaphore array. * @semnum: semaphore that was modified. - * @pt: list head for the tasks that must be woken up. + * @wake_q: lockless wake-queue head. * * wake_const_ops must be called after a semaphore in a semaphore array * was set to 0. If complex const operations are pending, wake_const_ops must * be called with semnum = -1, as well as with the number of each modified * semaphore. - * The tasks that must be woken up are added to @pt. The return code + * The tasks that must be woken up are added to @wake_q. The return code * is stored in q->pid. * The function returns 1 if at least one operation was completed successfully. */ static int wake_const_ops(struct sem_array *sma, int semnum, - struct list_head *pt) + struct wake_q_head *wake_q) { - struct sem_queue *q; - struct list_head *walk; + struct sem_queue *q, *tmp; struct list_head *pending_list; int semop_completed = 0; @@ -818,25 +799,19 @@ static int wake_const_ops(struct sem_array *sma, int semnum, else pending_list = &sma->sem_base[semnum].pending_const; - walk = pending_list->next; - while (walk != pending_list) { - int error; - - q = container_of(walk, struct sem_queue, list); - walk = walk->next; - - error = perform_atomic_semop(sma, q); - - if (error <= 0) { - /* operation completed, remove from queue & wakeup */ + list_for_each_entry_safe(q, tmp, pending_list, list) { + int error = perform_atomic_semop(sma, q); - unlink_queue(sma, q); + if (error > 0) + continue; + /* operation completed, remove from queue & wakeup */ + unlink_queue(sma, q); - wake_up_sem_queue_prepare(pt, q, error); - if (error == 0) - semop_completed = 1; - } + wake_up_sem_queue_prepare(q, error, wake_q); + if (error == 0) + semop_completed = 1; } + return semop_completed; } @@ -845,14 +820,14 @@ static int wake_const_ops(struct sem_array *sma, int semnum, * @sma: semaphore array * @sops: operations that were performed * @nsops: number of operations - * @pt: list head of the tasks that must be woken up. + * @wake_q: lockless wake-queue head * * Checks all required queue for wait-for-zero operations, based * on the actual changes that were performed on the semaphore array. * The function returns 1 if at least one operation was completed successfully. */ static int do_smart_wakeup_zero(struct sem_array *sma, struct sembuf *sops, - int nsops, struct list_head *pt) + int nsops, struct wake_q_head *wake_q) { int i; int semop_completed = 0; @@ -865,7 +840,7 @@ static int do_smart_wakeup_zero(struct sem_array *sma, struct sembuf *sops, if (sma->sem_base[num].semval == 0) { got_zero = 1; - semop_completed |= wake_const_ops(sma, num, pt); + semop_completed |= wake_const_ops(sma, num, wake_q); } } } else { @@ -876,7 +851,7 @@ static int do_smart_wakeup_zero(struct sem_array *sma, struct sembuf *sops, for (i = 0; i < sma->sem_nsems; i++) { if (sma->sem_base[i].semval == 0) { got_zero = 1; - semop_completed |= wake_const_ops(sma, i, pt); + semop_completed |= wake_const_ops(sma, i, wake_q); } } } @@ -885,7 +860,7 @@ static int do_smart_wakeup_zero(struct sem_array *sma, struct sembuf *sops, * then check the global queue, too. */ if (got_zero) - semop_completed |= wake_const_ops(sma, -1, pt); + semop_completed |= wake_const_ops(sma, -1, wake_q); return semop_completed; } @@ -895,22 +870,21 @@ static int do_smart_wakeup_zero(struct sem_array *sma, struct sembuf *sops, * update_queue - look for tasks that can be completed. * @sma: semaphore array. * @semnum: semaphore that was modified. - * @pt: list head for the tasks that must be woken up. + * @wake_q: lockless wake-queue head. * * update_queue must be called after a semaphore in a semaphore array * was modified. If multiple semaphores were modified, update_queue must * be called with semnum = -1, as well as with the number of each modified * semaphore. - * The tasks that must be woken up are added to @pt. The return code + * The tasks that must be woken up are added to @wake_q. The return code * is stored in q->pid. * The function internally checks if const operations can now succeed. * * The function return 1 if at least one semop was completed successfully. */ -static int update_queue(struct sem_array *sma, int semnum, struct list_head *pt) +static int update_queue(struct sem_array *sma, int semnum, struct wake_q_head *wake_q) { - struct sem_queue *q; - struct list_head *walk; + struct sem_queue *q, *tmp; struct list_head *pending_list; int semop_completed = 0; @@ -920,13 +894,9 @@ static int update_queue(struct sem_array *sma, int semnum, struct list_head *pt) pending_list = &sma->sem_base[semnum].pending_alter; again: - walk = pending_list->next; - while (walk != pending_list) { + list_for_each_entry_safe(q, tmp, pending_list, list) { int error, restart; - q = container_of(walk, struct sem_queue, list); - walk = walk->next; - /* If we are scanning the single sop, per-semaphore list of * one semaphore and that semaphore is 0, then it is not * necessary to scan further: simple increments @@ -949,11 +919,11 @@ again: restart = 0; } else { semop_completed = 1; - do_smart_wakeup_zero(sma, q->sops, q->nsops, pt); + do_smart_wakeup_zero(sma, q->sops, q->nsops, wake_q); restart = check_restart(sma, q); } - wake_up_sem_queue_prepare(pt, q, error); + wake_up_sem_queue_prepare(q, error, wake_q); if (restart) goto again; } @@ -984,24 +954,24 @@ static void set_semotime(struct sem_array *sma, struct sembuf *sops) * @sops: operations that were performed * @nsops: number of operations * @otime: force setting otime - * @pt: list head of the tasks that must be woken up. + * @wake_q: lockless wake-queue head * * do_smart_update() does the required calls to update_queue and wakeup_zero, * based on the actual changes that were performed on the semaphore array. * Note that the function does not do the actual wake-up: the caller is - * responsible for calling wake_up_sem_queue_do(@pt). + * responsible for calling wake_up_q(). * It is safe to perform this call after dropping all locks. */ static void do_smart_update(struct sem_array *sma, struct sembuf *sops, int nsops, - int otime, struct list_head *pt) + int otime, struct wake_q_head *wake_q) { int i; - otime |= do_smart_wakeup_zero(sma, sops, nsops, pt); + otime |= do_smart_wakeup_zero(sma, sops, nsops, wake_q); if (!list_empty(&sma->pending_alter)) { /* semaphore array uses the global queue - just process it. */ - otime |= update_queue(sma, -1, pt); + otime |= update_queue(sma, -1, wake_q); } else { if (!sops) { /* @@ -1009,7 +979,7 @@ static void do_smart_update(struct sem_array *sma, struct sembuf *sops, int nsop * known. Check all. */ for (i = 0; i < sma->sem_nsems; i++) - otime |= update_queue(sma, i, pt); + otime |= update_queue(sma, i, wake_q); } else { /* * Check the semaphores that were increased: @@ -1023,7 +993,7 @@ static void do_smart_update(struct sem_array *sma, struct sembuf *sops, int nsop for (i = 0; i < nsops; i++) { if (sops[i].sem_op > 0) { otime |= update_queue(sma, - sops[i].sem_num, pt); + sops[i].sem_num, wake_q); } } } @@ -1111,8 +1081,8 @@ static void freeary(struct ipc_namespace *ns, struct kern_ipc_perm *ipcp) struct sem_undo *un, *tu; struct sem_queue *q, *tq; struct sem_array *sma = container_of(ipcp, struct sem_array, sem_perm); - struct list_head tasks; int i; + DEFINE_WAKE_Q(wake_q); /* Free the existing undo structures for this semaphore set. */ ipc_assert_locked_object(&sma->sem_perm); @@ -1126,25 +1096,24 @@ static void freeary(struct ipc_namespace *ns, struct kern_ipc_perm *ipcp) } /* Wake up all pending processes and let them fail with EIDRM. */ - INIT_LIST_HEAD(&tasks); list_for_each_entry_safe(q, tq, &sma->pending_const, list) { unlink_queue(sma, q); - wake_up_sem_queue_prepare(&tasks, q, -EIDRM); + wake_up_sem_queue_prepare(q, -EIDRM, &wake_q); } list_for_each_entry_safe(q, tq, &sma->pending_alter, list) { unlink_queue(sma, q); - wake_up_sem_queue_prepare(&tasks, q, -EIDRM); + wake_up_sem_queue_prepare(q, -EIDRM, &wake_q); } for (i = 0; i < sma->sem_nsems; i++) { struct sem *sem = sma->sem_base + i; list_for_each_entry_safe(q, tq, &sem->pending_const, list) { unlink_queue(sma, q); - wake_up_sem_queue_prepare(&tasks, q, -EIDRM); + wake_up_sem_queue_prepare(q, -EIDRM, &wake_q); } list_for_each_entry_safe(q, tq, &sem->pending_alter, list) { unlink_queue(sma, q); - wake_up_sem_queue_prepare(&tasks, q, -EIDRM); + wake_up_sem_queue_prepare(q, -EIDRM, &wake_q); } } @@ -1153,7 +1122,7 @@ static void freeary(struct ipc_namespace *ns, struct kern_ipc_perm *ipcp) sem_unlock(sma, -1); rcu_read_unlock(); - wake_up_sem_queue_do(&tasks); + wake_up_q(&wake_q); ns->used_sems -= sma->sem_nsems; ipc_rcu_putref(sma, sem_rcu_free); } @@ -1292,9 +1261,9 @@ static int semctl_setval(struct ipc_namespace *ns, int semid, int semnum, struct sem_undo *un; struct sem_array *sma; struct sem *curr; - int err; - struct list_head tasks; - int val; + int err, val; + DEFINE_WAKE_Q(wake_q); + #if defined(CONFIG_64BIT) && defined(__BIG_ENDIAN) /* big-endian 64bit */ val = arg >> 32; @@ -1306,8 +1275,6 @@ static int semctl_setval(struct ipc_namespace *ns, int semid, int semnum, if (val > SEMVMX || val < 0) return -ERANGE; - INIT_LIST_HEAD(&tasks); - rcu_read_lock(); sma = sem_obtain_object_check(ns, semid); if (IS_ERR(sma)) { @@ -1350,10 +1317,10 @@ static int semctl_setval(struct ipc_namespace *ns, int semid, int semnum, curr->sempid = task_tgid_vnr(current); sma->sem_ctime = get_seconds(); /* maybe some queued-up processes were waiting for this */ - do_smart_update(sma, NULL, 0, 0, &tasks); + do_smart_update(sma, NULL, 0, 0, &wake_q); sem_unlock(sma, -1); rcu_read_unlock(); - wake_up_sem_queue_do(&tasks); + wake_up_q(&wake_q); return 0; } @@ -1365,9 +1332,7 @@ static int semctl_main(struct ipc_namespace *ns, int semid, int semnum, int err, nsems; ushort fast_sem_io[SEMMSL_FAST]; ushort *sem_io = fast_sem_io; - struct list_head tasks; - - INIT_LIST_HEAD(&tasks); + DEFINE_WAKE_Q(wake_q); rcu_read_lock(); sma = sem_obtain_object_check(ns, semid); @@ -1478,7 +1443,7 @@ static int semctl_main(struct ipc_namespace *ns, int semid, int semnum, } sma->sem_ctime = get_seconds(); /* maybe some queued-up processes were waiting for this */ - do_smart_update(sma, NULL, 0, 0, &tasks); + do_smart_update(sma, NULL, 0, 0, &wake_q); err = 0; goto out_unlock; } @@ -1514,7 +1479,7 @@ out_unlock: sem_unlock(sma, -1); out_rcu_wakeup: rcu_read_unlock(); - wake_up_sem_queue_do(&tasks); + wake_up_q(&wake_q); out_free: if (sem_io != fast_sem_io) ipc_free(sem_io); @@ -1787,32 +1752,6 @@ out: return un; } - -/** - * get_queue_result - retrieve the result code from sem_queue - * @q: Pointer to queue structure - * - * Retrieve the return code from the pending queue. If IN_WAKEUP is found in - * q->status, then we must loop until the value is replaced with the final - * value: This may happen if a task is woken up by an unrelated event (e.g. - * signal) and in parallel the task is woken up by another task because it got - * the requested semaphores. - * - * The function can be called with or without holding the semaphore spinlock. - */ -static int get_queue_result(struct sem_queue *q) -{ - int error; - - error = q->status; - while (unlikely(error == IN_WAKEUP)) { - cpu_relax(); - error = q->status; - } - - return error; -} - SYSCALL_DEFINE4(semtimedop, int, semid, struct sembuf __user *, tsops, unsigned, nsops, const struct timespec __user *, timeout) { @@ -1821,11 +1760,11 @@ SYSCALL_DEFINE4(semtimedop, int, semid, struct sembuf __user *, tsops, struct sembuf fast_sops[SEMOPM_FAST]; struct sembuf *sops = fast_sops, *sop; struct sem_undo *un; - int undos = 0, alter = 0, max, locknum; + int max, locknum; + bool undos = false, alter = false, dupsop = false; struct sem_queue queue; - unsigned long jiffies_left = 0; + unsigned long dup = 0, jiffies_left = 0; struct ipc_namespace *ns; - struct list_head tasks; ns = current->nsproxy->ipc_ns; @@ -1838,10 +1777,12 @@ SYSCALL_DEFINE4(semtimedop, int, semid, struct sembuf __user *, tsops, if (sops == NULL) return -ENOMEM; } + if (copy_from_user(sops, tsops, nsops * sizeof(*tsops))) { error = -EFAULT; goto out_free; } + if (timeout) { struct timespec _timeout; if (copy_from_user(&_timeout, timeout, sizeof(*timeout))) { @@ -1855,18 +1796,30 @@ SYSCALL_DEFINE4(semtimedop, int, semid, struct sembuf __user *, tsops, } jiffies_left = timespec_to_jiffies(&_timeout); } + max = 0; for (sop = sops; sop < sops + nsops; sop++) { + unsigned long mask = 1ULL << ((sop->sem_num) % BITS_PER_LONG); + if (sop->sem_num >= max) max = sop->sem_num; if (sop->sem_flg & SEM_UNDO) - undos = 1; - if (sop->sem_op != 0) - alter = 1; + undos = true; + if (dup & mask) { + /* + * There was a previous alter access that appears + * to have accessed the same semaphore, thus use + * the dupsop logic. "appears", because the detection + * can only check % BITS_PER_LONG. + */ + dupsop = true; + } + if (sop->sem_op != 0) { + alter = true; + dup |= mask; + } } - INIT_LIST_HEAD(&tasks); - if (undos) { /* On success, find_alloc_undo takes the rcu_read_lock */ un = find_alloc_undo(ns, semid); @@ -1887,16 +1840,22 @@ SYSCALL_DEFINE4(semtimedop, int, semid, struct sembuf __user *, tsops, } error = -EFBIG; - if (max >= sma->sem_nsems) - goto out_rcu_wakeup; + if (max >= sma->sem_nsems) { + rcu_read_unlock(); + goto out_free; + } error = -EACCES; - if (ipcperms(ns, &sma->sem_perm, alter ? S_IWUGO : S_IRUGO)) - goto out_rcu_wakeup; + if (ipcperms(ns, &sma->sem_perm, alter ? S_IWUGO : S_IRUGO)) { + rcu_read_unlock(); + goto out_free; + } error = security_sem_semop(sma, sops, nsops, alter); - if (error) - goto out_rcu_wakeup; + if (error) { + rcu_read_unlock(); + goto out_free; + } error = -EIDRM; locknum = sem_lock(sma, sops, nsops); @@ -1925,24 +1884,34 @@ SYSCALL_DEFINE4(semtimedop, int, semid, struct sembuf __user *, tsops, queue.undo = un; queue.pid = task_tgid_vnr(current); queue.alter = alter; + queue.dupsop = dupsop; error = perform_atomic_semop(sma, &queue); - if (error == 0) { - /* If the operation was successful, then do + if (error == 0) { /* non-blocking succesfull path */ + DEFINE_WAKE_Q(wake_q); + + /* + * If the operation was successful, then do * the required updates. */ if (alter) - do_smart_update(sma, sops, nsops, 1, &tasks); + do_smart_update(sma, sops, nsops, 1, &wake_q); else set_semotime(sma, sops); + + sem_unlock(sma, locknum); + rcu_read_unlock(); + wake_up_q(&wake_q); + + goto out_free; } - if (error <= 0) + if (error < 0) /* non-blocking error path */ goto out_unlock_free; - /* We need to sleep on this operation, so we put the current + /* + * We need to sleep on this operation, so we put the current * task into the pending queue and go to sleep. */ - if (nsops == 1) { struct sem *curr; curr = &sma->sem_base[sops->sem_num]; @@ -1971,77 +1940,69 @@ SYSCALL_DEFINE4(semtimedop, int, semid, struct sembuf __user *, tsops, sma->complex_count++; } - queue.status = -EINTR; - queue.sleeper = current; + do { + queue.status = -EINTR; + queue.sleeper = current; -sleep_again: - __set_current_state(TASK_INTERRUPTIBLE); - sem_unlock(sma, locknum); - rcu_read_unlock(); - - if (timeout) - jiffies_left = schedule_timeout(jiffies_left); - else - schedule(); + __set_current_state(TASK_INTERRUPTIBLE); + sem_unlock(sma, locknum); + rcu_read_unlock(); - error = get_queue_result(&queue); + if (timeout) + jiffies_left = schedule_timeout(jiffies_left); + else + schedule(); - if (error != -EINTR) { - /* fast path: update_queue already obtained all requested - * resources. - * Perform a smp_mb(): User space could assume that semop() - * is a memory barrier: Without the mb(), the cpu could - * speculatively read in user space stale data that was - * overwritten by the previous owner of the semaphore. + /* + * fastpath: the semop has completed, either successfully or + * not, from the syscall pov, is quite irrelevant to us at this + * point; we're done. + * + * We _do_ care, nonetheless, about being awoken by a signal or + * spuriously. The queue.status is checked again in the + * slowpath (aka after taking sem_lock), such that we can detect + * scenarios where we were awakened externally, during the + * window between wake_q_add() and wake_up_q(). */ - smp_mb(); - - goto out_free; - } - - rcu_read_lock(); - sma = sem_obtain_lock(ns, semid, sops, nsops, &locknum); - - /* - * Wait until it's guaranteed that no wakeup_sem_queue_do() is ongoing. - */ - error = get_queue_result(&queue); + error = READ_ONCE(queue.status); + if (error != -EINTR) { + /* + * User space could assume that semop() is a memory + * barrier: Without the mb(), the cpu could + * speculatively read in userspace stale data that was + * overwritten by the previous owner of the semaphore. + */ + smp_mb(); + goto out_free; + } - /* - * Array removed? If yes, leave without sem_unlock(). - */ - if (IS_ERR(sma)) { - rcu_read_unlock(); - goto out_free; - } + rcu_read_lock(); + sem_lock(sma, sops, nsops); + if (!ipc_valid_object(&sma->sem_perm)) + goto out_unlock_free; - /* - * If queue.status != -EINTR we are woken up by another process. - * Leave without unlink_queue(), but with sem_unlock(). - */ - if (error != -EINTR) - goto out_unlock_free; + error = READ_ONCE(queue.status); - /* - * If an interrupt occurred we have to clean up the queue - */ - if (timeout && jiffies_left == 0) - error = -EAGAIN; + /* + * If queue.status != -EINTR we are woken up by another process. + * Leave without unlink_queue(), but with sem_unlock(). + */ + if (error != -EINTR) + goto out_unlock_free; - /* - * If the wakeup was spurious, just retry - */ - if (error == -EINTR && !signal_pending(current)) - goto sleep_again; + /* + * If an interrupt occurred we have to clean up the queue. + */ + if (timeout && jiffies_left == 0) + error = -EAGAIN; + } while (error == -EINTR && !signal_pending(current)); /* spurious */ unlink_queue(sma, &queue); out_unlock_free: sem_unlock(sma, locknum); -out_rcu_wakeup: rcu_read_unlock(); - wake_up_sem_queue_do(&tasks); out_free: if (sops != fast_sops) kfree(sops); @@ -2102,8 +2063,8 @@ void exit_sem(struct task_struct *tsk) for (;;) { struct sem_array *sma; struct sem_undo *un; - struct list_head tasks; int semid, i; + DEFINE_WAKE_Q(wake_q); cond_resched(); @@ -2191,11 +2152,10 @@ void exit_sem(struct task_struct *tsk) } } /* maybe some queued-up processes were waiting for this */ - INIT_LIST_HEAD(&tasks); - do_smart_update(sma, NULL, 0, 1, &tasks); + do_smart_update(sma, NULL, 0, 1, &wake_q); sem_unlock(sma, -1); rcu_read_unlock(); - wake_up_sem_queue_do(&tasks); + wake_up_q(&wake_q); kfree_rcu(un, rcu); } |