diff options
author | Jiri Kosina <jkosina@suse.cz> | 2017-05-02 12:02:41 +0300 |
---|---|---|
committer | Jiri Kosina <jkosina@suse.cz> | 2017-05-02 12:02:41 +0300 |
commit | 4d6ca227c768b50b05cf183974b40abe444e9d0c (patch) | |
tree | bf953d8e895281053548b9967a2c4b58d641df00 /ipc | |
parent | 800f3eef8ebc1264e9c135bfa892c8ae41fa4792 (diff) | |
parent | af22a610bc38508d5ea760507d31be6b6983dfa8 (diff) | |
download | linux-4d6ca227c768b50b05cf183974b40abe444e9d0c.tar.xz |
Merge branch 'for-4.12/asus' into for-linus
Diffstat (limited to 'ipc')
-rw-r--r-- | ipc/mqueue.c | 4 | ||||
-rw-r--r-- | ipc/msg.c | 2 | ||||
-rw-r--r-- | ipc/namespace.c | 2 | ||||
-rw-r--r-- | ipc/sem.c | 110 | ||||
-rw-r--r-- | ipc/shm.c | 31 |
5 files changed, 89 insertions, 60 deletions
diff --git a/ipc/mqueue.c b/ipc/mqueue.c index 7a2d8f0c8ae5..e8d41ff57241 100644 --- a/ipc/mqueue.c +++ b/ipc/mqueue.c @@ -35,6 +35,9 @@ #include <linux/ipc_namespace.h> #include <linux/user_namespace.h> #include <linux/slab.h> +#include <linux/sched/wake_q.h> +#include <linux/sched/signal.h> +#include <linux/sched/user.h> #include <net/sock.h> #include "util.h" @@ -558,6 +561,7 @@ static void wq_add(struct mqueue_inode_info *info, int sr, */ static int wq_sleep(struct mqueue_inode_info *info, int sr, ktime_t *timeout, struct ext_wait_queue *ewp) + __releases(&info->lock) { int retval; signed long time; diff --git a/ipc/msg.c b/ipc/msg.c index e3e52ce01123..104926dc72be 100644 --- a/ipc/msg.c +++ b/ipc/msg.c @@ -30,7 +30,7 @@ #include <linux/proc_fs.h> #include <linux/list.h> #include <linux/security.h> -#include <linux/sched.h> +#include <linux/sched/wake_q.h> #include <linux/syscalls.h> #include <linux/audit.h> #include <linux/seq_file.h> diff --git a/ipc/namespace.c b/ipc/namespace.c index 0abdea496493..b4d80f9f7246 100644 --- a/ipc/namespace.c +++ b/ipc/namespace.c @@ -9,10 +9,12 @@ #include <linux/rcupdate.h> #include <linux/nsproxy.h> #include <linux/slab.h> +#include <linux/cred.h> #include <linux/fs.h> #include <linux/mount.h> #include <linux/user_namespace.h> #include <linux/proc_ns.h> +#include <linux/sched/task.h> #include "util.h" diff --git a/ipc/sem.c b/ipc/sem.c index 3ec5742b5640..947dc2348271 100644 --- a/ipc/sem.c +++ b/ipc/sem.c @@ -82,6 +82,7 @@ #include <linux/rwsem.h> #include <linux/nsproxy.h> #include <linux/ipc_namespace.h> +#include <linux/sched/wake_q.h> #include <linux/uaccess.h> #include "util.h" @@ -159,22 +160,42 @@ static int sysvipc_sem_proc_show(struct seq_file *s, void *it); #define SEMOPM_FAST 64 /* ~ 372 bytes on stack */ /* + * Switching from the mode suitable for simple ops + * to the mode for complex ops is costly. Therefore: + * use some hysteresis + */ +#define USE_GLOBAL_LOCK_HYSTERESIS 10 + +/* * Locking: * a) global sem_lock() for read/write * sem_undo.id_next, * sem_array.complex_count, - * sem_array.complex_mode * sem_array.pending{_alter,_const}, * sem_array.sem_undo * * b) global or semaphore sem_lock() for read/write: * sem_array.sem_base[i].pending_{const,alter}: - * sem_array.complex_mode (for read) * * c) special: * sem_undo_list.list_proc: * * undo_list->lock for write * * rcu for read + * use_global_lock: + * * global sem_lock() for write + * * either local or global sem_lock() for read. + * + * Memory ordering: + * Most ordering is enforced by using spin_lock() and spin_unlock(). + * The special case is use_global_lock: + * Setting it from non-zero to 0 is a RELEASE, this is ensured by + * using smp_store_release(). + * Testing if it is non-zero is an ACQUIRE, this is ensured by using + * smp_load_acquire(). + * Setting it from 0 to non-zero must be ordered with regards to + * this smp_load_acquire(), this is guaranteed because the smp_load_acquire() + * is inside a spin_lock() and after a write from 0 to non-zero a + * spin_lock()+spin_unlock() is done. */ #define sc_semmsl sem_ctls[0] @@ -273,29 +294,22 @@ static void complexmode_enter(struct sem_array *sma) int i; struct sem *sem; - if (sma->complex_mode) { - /* We are already in complex_mode. Nothing to do */ + if (sma->use_global_lock > 0) { + /* + * We are already in global lock mode. + * Nothing to do, just reset the + * counter until we return to simple mode. + */ + sma->use_global_lock = USE_GLOBAL_LOCK_HYSTERESIS; return; } - - /* We need a full barrier after seting complex_mode: - * The write to complex_mode must be visible - * before we read the first sem->lock spinlock state. - */ - smp_store_mb(sma->complex_mode, true); + sma->use_global_lock = USE_GLOBAL_LOCK_HYSTERESIS; for (i = 0; i < sma->sem_nsems; i++) { sem = sma->sem_base + i; - spin_unlock_wait(&sem->lock); + spin_lock(&sem->lock); + spin_unlock(&sem->lock); } - /* - * spin_unlock_wait() is not a memory barriers, it is only a - * control barrier. The code must pair with spin_unlock(&sem->lock), - * thus just the control barrier is insufficient. - * - * smp_rmb() is sufficient, as writes cannot pass the control barrier. - */ - smp_rmb(); } /* @@ -310,13 +324,17 @@ static void complexmode_tryleave(struct sem_array *sma) */ return; } - /* - * Immediately after setting complex_mode to false, - * a simple op can start. Thus: all memory writes - * performed by the current operation must be visible - * before we set complex_mode to false. - */ - smp_store_release(&sma->complex_mode, false); + if (sma->use_global_lock == 1) { + /* + * Immediately after setting use_global_lock to 0, + * a simple op can start. Thus: all memory writes + * performed by the current operation must be visible + * before we set use_global_lock to 0. + */ + smp_store_release(&sma->use_global_lock, 0); + } else { + sma->use_global_lock--; + } } #define SEM_GLOBAL_LOCK (-1) @@ -346,30 +364,23 @@ static inline int sem_lock(struct sem_array *sma, struct sembuf *sops, * Optimized locking is possible if no complex operation * is either enqueued or processed right now. * - * Both facts are tracked by complex_mode. + * Both facts are tracked by use_global_mode. */ sem = sma->sem_base + sops->sem_num; /* - * Initial check for complex_mode. Just an optimization, + * Initial check for use_global_lock. Just an optimization, * no locking, no memory barrier. */ - if (!sma->complex_mode) { + if (!sma->use_global_lock) { /* * It appears that no complex operation is around. * Acquire the per-semaphore lock. */ spin_lock(&sem->lock); - /* - * See 51d7d5205d33 - * ("powerpc: Add smp_mb() to arch_spin_is_locked()"): - * A full barrier is required: the write of sem->lock - * must be visible before the read is executed - */ - smp_mb(); - - if (!smp_load_acquire(&sma->complex_mode)) { + /* pairs with smp_store_release() */ + if (!smp_load_acquire(&sma->use_global_lock)) { /* fast path successful! */ return sops->sem_num; } @@ -379,19 +390,26 @@ static inline int sem_lock(struct sem_array *sma, struct sembuf *sops, /* slow path: acquire the full lock */ ipc_lock_object(&sma->sem_perm); - if (sma->complex_count == 0) { - /* False alarm: - * There is no complex operation, thus we can switch - * back to the fast path. + if (sma->use_global_lock == 0) { + /* + * The use_global_lock mode ended while we waited for + * sma->sem_perm.lock. Thus we must switch to locking + * with sem->lock. + * Unlike in the fast path, there is no need to recheck + * sma->use_global_lock after we have acquired sem->lock: + * We own sma->sem_perm.lock, thus use_global_lock cannot + * change. */ spin_lock(&sem->lock); + ipc_unlock_object(&sma->sem_perm); return sops->sem_num; } else { - /* Not a false alarm, thus complete the sequence for a - * full lock. + /* + * Not a false alarm, thus continue to use the global lock + * mode. No need for complexmode_enter(), this was done by + * the caller that has set use_global_mode to non-zero. */ - complexmode_enter(sma); return SEM_GLOBAL_LOCK; } } @@ -495,7 +513,7 @@ static int newary(struct ipc_namespace *ns, struct ipc_params *params) } sma->complex_count = 0; - sma->complex_mode = true; /* dropped by sem_unlock below */ + sma->use_global_lock = USE_GLOBAL_LOCK_HYSTERESIS; INIT_LIST_HEAD(&sma->pending_alter); INIT_LIST_HEAD(&sma->pending_const); INIT_LIST_HEAD(&sma->list_id); diff --git a/ipc/shm.c b/ipc/shm.c index 81203e8ba013..481d2a9c298a 100644 --- a/ipc/shm.c +++ b/ipc/shm.c @@ -374,12 +374,12 @@ void exit_shm(struct task_struct *task) up_write(&shm_ids(ns).rwsem); } -static int shm_fault(struct vm_area_struct *vma, struct vm_fault *vmf) +static int shm_fault(struct vm_fault *vmf) { - struct file *file = vma->vm_file; + struct file *file = vmf->vma->vm_file; struct shm_file_data *sfd = shm_file_data(file); - return sfd->vm_ops->fault(vma, vmf); + return sfd->vm_ops->fault(vmf); } #ifdef CONFIG_NUMA @@ -423,7 +423,7 @@ static int shm_mmap(struct file *file, struct vm_area_struct *vma) if (ret) return ret; - ret = sfd->file->f_op->mmap(sfd->file, vma); + ret = call_mmap(sfd->file, vma); if (ret) { shm_close(vma); return ret; @@ -452,7 +452,7 @@ static int shm_fsync(struct file *file, loff_t start, loff_t end, int datasync) if (!sfd->file->f_op->fsync) return -EINVAL; - return sfd->file->f_op->fsync(sfd->file, start, end, datasync); + return call_fsync(sfd->file, start, end, datasync); } static long shm_fallocate(struct file *file, int mode, loff_t offset, @@ -1091,8 +1091,8 @@ out_unlock1: * "raddr" thing points to kernel space, and there has to be a wrapper around * this. */ -long do_shmat(int shmid, char __user *shmaddr, int shmflg, ulong *raddr, - unsigned long shmlba) +long do_shmat(int shmid, char __user *shmaddr, int shmflg, + ulong *raddr, unsigned long shmlba) { struct shmid_kernel *shp; unsigned long addr; @@ -1113,8 +1113,13 @@ long do_shmat(int shmid, char __user *shmaddr, int shmflg, ulong *raddr, goto out; else if ((addr = (ulong)shmaddr)) { if (addr & (shmlba - 1)) { - if (shmflg & SHM_RND) - addr &= ~(shmlba - 1); /* round down */ + /* + * Round down to the nearest multiple of shmlba. + * For sane do_mmap_pgoff() parameters, avoid + * round downs that trigger nil-page and MAP_FIXED. + */ + if ((shmflg & SHM_RND) && addr >= shmlba) + addr &= ~(shmlba - 1); else #ifndef __ARCH_FORCE_SHMLBA if (addr & ~PAGE_MASK) @@ -1222,7 +1227,7 @@ long do_shmat(int shmid, char __user *shmaddr, int shmflg, ulong *raddr, goto invalid; } - addr = do_mmap_pgoff(file, addr, size, prot, flags, 0, &populate); + addr = do_mmap_pgoff(file, addr, size, prot, flags, 0, &populate, NULL); *raddr = addr; err = 0; if (IS_ERR_VALUE(addr)) @@ -1329,7 +1334,7 @@ SYSCALL_DEFINE1(shmdt, char __user *, shmaddr) */ file = vma->vm_file; size = i_size_read(file_inode(vma->vm_file)); - do_munmap(mm, vma->vm_start, vma->vm_end - vma->vm_start); + do_munmap(mm, vma->vm_start, vma->vm_end - vma->vm_start, NULL); /* * We discovered the size of the shm segment, so * break out of here and fall through to the next @@ -1356,7 +1361,7 @@ SYSCALL_DEFINE1(shmdt, char __user *, shmaddr) if ((vma->vm_ops == &shm_vm_ops) && ((vma->vm_start - addr)/PAGE_SIZE == vma->vm_pgoff) && (vma->vm_file == file)) - do_munmap(mm, vma->vm_start, vma->vm_end - vma->vm_start); + do_munmap(mm, vma->vm_start, vma->vm_end - vma->vm_start, NULL); vma = next; } @@ -1365,7 +1370,7 @@ SYSCALL_DEFINE1(shmdt, char __user *, shmaddr) * given */ if (vma && vma->vm_start == addr && vma->vm_ops == &shm_vm_ops) { - do_munmap(mm, vma->vm_start, vma->vm_end - vma->vm_start); + do_munmap(mm, vma->vm_start, vma->vm_end - vma->vm_start, NULL); retval = 0; } |