From 2926a2aa5c14fb2add75e6584845b1c03022235f Mon Sep 17 00:00:00 2001 From: Joerg Roedel Date: Mon, 14 Aug 2017 17:19:26 +0200 Subject: iommu: Fix wrong freeing of iommu_device->dev The struct iommu_device has a 'struct device' embedded into it, not as a pointer, but the whole struct. In the conversion of the iommu drivers to use struct iommu_device it was forgotten that the relase function for that struct device simply calls kfree() on the pointer. This frees memory that was never allocated and causes memory corruption. To fix this issue, use a pointer to struct device instead of embedding the whole struct. This needs some updates in the iommu sysfs code as well as the Intel VT-d and AMD IOMMU driver. Reported-by: Sebastian Ott Fixes: 39ab9555c241 ('iommu: Add sysfs bindings for struct iommu_device') Cc: stable@vger.kernel.org # >= v4.11 Signed-off-by: Joerg Roedel --- include/linux/iommu.h | 12 +++++++++++- 1 file changed, 11 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/iommu.h b/include/linux/iommu.h index 2cb54adc4a33..176f7569d874 100644 --- a/include/linux/iommu.h +++ b/include/linux/iommu.h @@ -240,7 +240,7 @@ struct iommu_device { struct list_head list; const struct iommu_ops *ops; struct fwnode_handle *fwnode; - struct device dev; + struct device *dev; }; int iommu_device_register(struct iommu_device *iommu); @@ -265,6 +265,11 @@ static inline void iommu_device_set_fwnode(struct iommu_device *iommu, iommu->fwnode = fwnode; } +static inline struct iommu_device *dev_to_iommu_device(struct device *dev) +{ + return (struct iommu_device *)dev_get_drvdata(dev); +} + #define IOMMU_GROUP_NOTIFY_ADD_DEVICE 1 /* Device added */ #define IOMMU_GROUP_NOTIFY_DEL_DEVICE 2 /* Pre Device removed */ #define IOMMU_GROUP_NOTIFY_BIND_DRIVER 3 /* Pre Driver bind */ @@ -589,6 +594,11 @@ static inline void iommu_device_set_fwnode(struct iommu_device *iommu, { } +static inline struct iommu_device *dev_to_iommu_device(struct device *dev) +{ + return NULL; +} + static inline void iommu_device_unregister(struct iommu_device *iommu) { } -- cgit v1.2.3 From 81fbfe8adaf38d4f5a98c19bebfd41c5d6acaee8 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Wed, 16 Aug 2017 10:36:47 -0700 Subject: ptr_ring: use kmalloc_array() As found by syzkaller, malicious users can set whatever tx_queue_len on a tun device and eventually crash the kernel. Lets remove the ALIGN(XXX, SMP_CACHE_BYTES) thing since a small ring buffer is not fast anyway. Fixes: 2e0ab8ca83c1 ("ptr_ring: array based FIFO for pointers") Signed-off-by: Eric Dumazet Reported-by: Dmitry Vyukov Cc: Michael S. Tsirkin Cc: Jason Wang Signed-off-by: David S. Miller --- include/linux/ptr_ring.h | 9 +++++---- include/linux/skb_array.h | 3 ++- 2 files changed, 7 insertions(+), 5 deletions(-) (limited to 'include/linux') diff --git a/include/linux/ptr_ring.h b/include/linux/ptr_ring.h index d8c97ec8a8e6..37b4bb2545b3 100644 --- a/include/linux/ptr_ring.h +++ b/include/linux/ptr_ring.h @@ -436,9 +436,9 @@ static inline int ptr_ring_consume_batched_bh(struct ptr_ring *r, __PTR_RING_PEEK_CALL_v; \ }) -static inline void **__ptr_ring_init_queue_alloc(int size, gfp_t gfp) +static inline void **__ptr_ring_init_queue_alloc(unsigned int size, gfp_t gfp) { - return kzalloc(ALIGN(size * sizeof(void *), SMP_CACHE_BYTES), gfp); + return kcalloc(size, sizeof(void *), gfp); } static inline void __ptr_ring_set_size(struct ptr_ring *r, int size) @@ -582,7 +582,8 @@ static inline int ptr_ring_resize(struct ptr_ring *r, int size, gfp_t gfp, * In particular if you consume ring in interrupt or BH context, you must * disable interrupts/BH when doing so. */ -static inline int ptr_ring_resize_multiple(struct ptr_ring **rings, int nrings, +static inline int ptr_ring_resize_multiple(struct ptr_ring **rings, + unsigned int nrings, int size, gfp_t gfp, void (*destroy)(void *)) { @@ -590,7 +591,7 @@ static inline int ptr_ring_resize_multiple(struct ptr_ring **rings, int nrings, void ***queues; int i; - queues = kmalloc(nrings * sizeof *queues, gfp); + queues = kmalloc_array(nrings, sizeof(*queues), gfp); if (!queues) goto noqueues; diff --git a/include/linux/skb_array.h b/include/linux/skb_array.h index 35226cd4efb0..8621ffdeecbf 100644 --- a/include/linux/skb_array.h +++ b/include/linux/skb_array.h @@ -193,7 +193,8 @@ static inline int skb_array_resize(struct skb_array *a, int size, gfp_t gfp) } static inline int skb_array_resize_multiple(struct skb_array **rings, - int nrings, int size, gfp_t gfp) + int nrings, unsigned int size, + gfp_t gfp) { BUILD_BUG_ON(offsetof(struct skb_array, ring)); return ptr_ring_resize_multiple((struct ptr_ring **)rings, -- cgit v1.2.3 From f00fd7ae4f409abb7b2e5d099248832548199f0c Mon Sep 17 00:00:00 2001 From: Jonathan Corbet Date: Sun, 30 Jul 2017 16:14:42 -0600 Subject: PATCH] iio: Fix some documentation warnings The kerneldoc description for the trig_readonly field of struct iio_dev lacked a colon, leading to this doc build warning: ./include/linux/iio/iio.h:603: warning: No description found for parameter 'trig_readonly' A similar issue for iio_trigger_set_immutable() in trigger.h yielded: ./include/linux/iio/trigger.h:151: warning: No description found for parameter 'indio_dev' ./include/linux/iio/trigger.h:151: warning: No description found for parameter 'trig' Fix the formatting and silence the warnings. Signed-off-by: Jonathan Corbet Signed-off-by: Jonathan Cameron --- include/linux/iio/iio.h | 2 +- include/linux/iio/trigger.h | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) (limited to 'include/linux') diff --git a/include/linux/iio/iio.h b/include/linux/iio/iio.h index d68bec297a45..c380daa40c0e 100644 --- a/include/linux/iio/iio.h +++ b/include/linux/iio/iio.h @@ -535,7 +535,7 @@ struct iio_buffer_setup_ops { * @scan_timestamp: [INTERN] set if any buffers have requested timestamp * @scan_index_timestamp:[INTERN] cache of the index to the timestamp * @trig: [INTERN] current device trigger (buffer modes) - * @trig_readonly [INTERN] mark the current trigger immutable + * @trig_readonly: [INTERN] mark the current trigger immutable * @pollfunc: [DRIVER] function run on trigger being received * @pollfunc_event: [DRIVER] function run on events trigger being received * @channels: [DRIVER] channel specification structure table diff --git a/include/linux/iio/trigger.h b/include/linux/iio/trigger.h index ea08302f2d7b..7142d8d6e470 100644 --- a/include/linux/iio/trigger.h +++ b/include/linux/iio/trigger.h @@ -144,8 +144,8 @@ void devm_iio_trigger_unregister(struct device *dev, /** * iio_trigger_set_immutable() - set an immutable trigger on destination * - * @indio_dev - IIO device structure containing the device - * @trig - trigger to assign to device + * @indio_dev: IIO device structure containing the device + * @trig: trigger to assign to device * **/ int iio_trigger_set_immutable(struct iio_dev *indio_dev, struct iio_trigger *trig); -- cgit v1.2.3 From dd1c1f2f2028a7b851f701fc6a8ebe39dcb95e7c Mon Sep 17 00:00:00 2001 From: Oleg Nesterov Date: Mon, 21 Aug 2017 17:35:02 +0200 Subject: pids: make task_tgid_nr_ns() safe This was reported many times, and this was even mentioned in commit 52ee2dfdd4f5 ("pids: refactor vnr/nr_ns helpers to make them safe") but somehow nobody bothered to fix the obvious problem: task_tgid_nr_ns() is not safe because task->group_leader points to nowhere after the exiting task passes exit_notify(), rcu_read_lock() can not help. We really need to change __unhash_process() to nullify group_leader, parent, and real_parent, but this needs some cleanups. Until then we can turn task_tgid_nr_ns() into another user of __task_pid_nr_ns() and fix the problem. Reported-by: Troy Kensinger Signed-off-by: Oleg Nesterov Signed-off-by: Linus Torvalds --- include/linux/pid.h | 4 +++- include/linux/sched.h | 51 +++++++++++++++++++++++++++------------------------ kernel/pid.c | 11 ++++------- 3 files changed, 34 insertions(+), 32 deletions(-) (limited to 'include/linux') diff --git a/include/linux/pid.h b/include/linux/pid.h index 4d179316e431..719582744a2e 100644 --- a/include/linux/pid.h +++ b/include/linux/pid.h @@ -8,7 +8,9 @@ enum pid_type PIDTYPE_PID, PIDTYPE_PGID, PIDTYPE_SID, - PIDTYPE_MAX + PIDTYPE_MAX, + /* only valid to __task_pid_nr_ns() */ + __PIDTYPE_TGID }; /* diff --git a/include/linux/sched.h b/include/linux/sched.h index 8337e2db0bb2..c05ac5f5aa03 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -1163,13 +1163,6 @@ static inline pid_t task_tgid_nr(struct task_struct *tsk) return tsk->tgid; } -extern pid_t task_tgid_nr_ns(struct task_struct *tsk, struct pid_namespace *ns); - -static inline pid_t task_tgid_vnr(struct task_struct *tsk) -{ - return pid_vnr(task_tgid(tsk)); -} - /** * pid_alive - check that a task structure is not stale * @p: Task structure to be checked. @@ -1185,23 +1178,6 @@ static inline int pid_alive(const struct task_struct *p) return p->pids[PIDTYPE_PID].pid != NULL; } -static inline pid_t task_ppid_nr_ns(const struct task_struct *tsk, struct pid_namespace *ns) -{ - pid_t pid = 0; - - rcu_read_lock(); - if (pid_alive(tsk)) - pid = task_tgid_nr_ns(rcu_dereference(tsk->real_parent), ns); - rcu_read_unlock(); - - return pid; -} - -static inline pid_t task_ppid_nr(const struct task_struct *tsk) -{ - return task_ppid_nr_ns(tsk, &init_pid_ns); -} - static inline pid_t task_pgrp_nr_ns(struct task_struct *tsk, struct pid_namespace *ns) { return __task_pid_nr_ns(tsk, PIDTYPE_PGID, ns); @@ -1223,6 +1199,33 @@ static inline pid_t task_session_vnr(struct task_struct *tsk) return __task_pid_nr_ns(tsk, PIDTYPE_SID, NULL); } +static inline pid_t task_tgid_nr_ns(struct task_struct *tsk, struct pid_namespace *ns) +{ + return __task_pid_nr_ns(tsk, __PIDTYPE_TGID, ns); +} + +static inline pid_t task_tgid_vnr(struct task_struct *tsk) +{ + return __task_pid_nr_ns(tsk, __PIDTYPE_TGID, NULL); +} + +static inline pid_t task_ppid_nr_ns(const struct task_struct *tsk, struct pid_namespace *ns) +{ + pid_t pid = 0; + + rcu_read_lock(); + if (pid_alive(tsk)) + pid = task_tgid_nr_ns(rcu_dereference(tsk->real_parent), ns); + rcu_read_unlock(); + + return pid; +} + +static inline pid_t task_ppid_nr(const struct task_struct *tsk) +{ + return task_ppid_nr_ns(tsk, &init_pid_ns); +} + /* Obsolete, do not use: */ static inline pid_t task_pgrp_nr(struct task_struct *tsk) { diff --git a/kernel/pid.c b/kernel/pid.c index c69c30d827e5..020dedbdf066 100644 --- a/kernel/pid.c +++ b/kernel/pid.c @@ -527,8 +527,11 @@ pid_t __task_pid_nr_ns(struct task_struct *task, enum pid_type type, if (!ns) ns = task_active_pid_ns(current); if (likely(pid_alive(task))) { - if (type != PIDTYPE_PID) + if (type != PIDTYPE_PID) { + if (type == __PIDTYPE_TGID) + type = PIDTYPE_PID; task = task->group_leader; + } nr = pid_nr_ns(rcu_dereference(task->pids[type].pid), ns); } rcu_read_unlock(); @@ -537,12 +540,6 @@ pid_t __task_pid_nr_ns(struct task_struct *task, enum pid_type type, } EXPORT_SYMBOL(__task_pid_nr_ns); -pid_t task_tgid_nr_ns(struct task_struct *tsk, struct pid_namespace *ns) -{ - return pid_nr_ns(task_tgid(tsk), ns); -} -EXPORT_SYMBOL(task_tgid_nr_ns); - struct pid_namespace *task_active_pid_ns(struct task_struct *tsk) { return ns_of_pid(task_pid(tsk)); -- cgit v1.2.3 From 143c97cc652949893c8056c679012f0aeccb80e5 Mon Sep 17 00:00:00 2001 From: Linus Torvalds Date: Wed, 23 Aug 2017 18:16:11 -0700 Subject: Revert "pty: fix the cached path of the pty slave file descriptor in the master" This reverts commit c8c03f1858331e85d397bacccd34ef409aae993c. It turns out that while fixing the ptmx file descriptor to have the correct 'struct path' to the associated slave pty is a really good thing, it breaks some user space tools for a very annoying reason. The problem is that /dev/ptmx and its associated slave pty (/dev/pts/X) are on different mounts. That was what caused us to have the wrong path in the first place (we would mix up the vfsmount of the 'ptmx' node, with the dentry of the pty slave node), but it also means that now while we use the right vfsmount, having the pty master open also keeps the pts mount busy. And it turn sout that that makes 'pbuilder' very unhappy, as noted by Stefan Lippers-Hollmann: "This patch introduces a regression for me when using pbuilder 0.228.7[2] (a helper to build Debian packages in a chroot and to create and update its chroots) when trying to umount /dev/ptmx (inside the chroot) on Debian/ unstable (full log and pbuilder configuration file[3] attached). [...] Setting up build-essential (12.3) ... Processing triggers for libc-bin (2.24-15) ... I: unmounting dev/ptmx filesystem W: Could not unmount dev/ptmx: umount: /var/cache/pbuilder/build/1340/dev/ptmx: target is busy (In some cases useful info about processes that use the device is found by lsof(8) or fuser(1).)" apparently pbuilder tries to unmount the /dev/pts filesystem while still holding at least one master node open, which is arguably not very nice, but we don't break user space even when fixing other bugs. So this commit has to be reverted. I'll try to figure out a way to avoid caching the path to the slave pty in the master pty. The only thing that actually wants that slave pty path is the "TIOCGPTPEER" ioctl, and I think we could just recreate the path at that time. Reported-by: Stefan Lippers-Hollmann Cc: Eric W Biederman Cc: Christian Brauner Cc: Al Viro Signed-off-by: Linus Torvalds --- drivers/tty/pty.c | 7 ++----- fs/devpts/inode.c | 4 +--- include/linux/devpts_fs.h | 2 +- 3 files changed, 4 insertions(+), 9 deletions(-) (limited to 'include/linux') diff --git a/drivers/tty/pty.c b/drivers/tty/pty.c index 1fc80ea87c13..284749fb0f6b 100644 --- a/drivers/tty/pty.c +++ b/drivers/tty/pty.c @@ -793,7 +793,6 @@ static int ptmx_open(struct inode *inode, struct file *filp) struct tty_struct *tty; struct path *pts_path; struct dentry *dentry; - struct vfsmount *mnt; int retval; int index; @@ -806,7 +805,7 @@ static int ptmx_open(struct inode *inode, struct file *filp) if (retval) return retval; - fsi = devpts_acquire(filp, &mnt); + fsi = devpts_acquire(filp); if (IS_ERR(fsi)) { retval = PTR_ERR(fsi); goto out_free_file; @@ -850,7 +849,7 @@ static int ptmx_open(struct inode *inode, struct file *filp) pts_path = kmalloc(sizeof(struct path), GFP_KERNEL); if (!pts_path) goto err_release; - pts_path->mnt = mnt; + pts_path->mnt = filp->f_path.mnt; pts_path->dentry = dentry; path_get(pts_path); tty->link->driver_data = pts_path; @@ -867,7 +866,6 @@ err_path_put: path_put(pts_path); kfree(pts_path); err_release: - mntput(mnt); tty_unlock(tty); // This will also put-ref the fsi tty_release(inode, filp); @@ -876,7 +874,6 @@ out: devpts_kill_index(fsi, index); out_put_fsi: devpts_release(fsi); - mntput(mnt); out_free_file: tty_free_file(filp); return retval; diff --git a/fs/devpts/inode.c b/fs/devpts/inode.c index 44dfbca9306f..108df2e3602c 100644 --- a/fs/devpts/inode.c +++ b/fs/devpts/inode.c @@ -133,7 +133,7 @@ static inline struct pts_fs_info *DEVPTS_SB(struct super_block *sb) return sb->s_fs_info; } -struct pts_fs_info *devpts_acquire(struct file *filp, struct vfsmount **ptsmnt) +struct pts_fs_info *devpts_acquire(struct file *filp) { struct pts_fs_info *result; struct path path; @@ -142,7 +142,6 @@ struct pts_fs_info *devpts_acquire(struct file *filp, struct vfsmount **ptsmnt) path = filp->f_path; path_get(&path); - *ptsmnt = NULL; /* Has the devpts filesystem already been found? */ sb = path.mnt->mnt_sb; @@ -166,7 +165,6 @@ struct pts_fs_info *devpts_acquire(struct file *filp, struct vfsmount **ptsmnt) * pty code needs to hold extra references in case of last /dev/tty close */ atomic_inc(&sb->s_active); - *ptsmnt = mntget(path.mnt); result = DEVPTS_SB(sb); out: diff --git a/include/linux/devpts_fs.h b/include/linux/devpts_fs.h index 7883e901f65c..277ab9af9ac2 100644 --- a/include/linux/devpts_fs.h +++ b/include/linux/devpts_fs.h @@ -19,7 +19,7 @@ struct pts_fs_info; -struct pts_fs_info *devpts_acquire(struct file *, struct vfsmount **ptsmnt); +struct pts_fs_info *devpts_acquire(struct file *); void devpts_release(struct pts_fs_info *); int devpts_new_index(struct pts_fs_info *); -- cgit v1.2.3 From 50b4d485528d1dbe0bd249f2073140e3444f4a7b Mon Sep 17 00:00:00 2001 From: Benjamin Block Date: Thu, 24 Aug 2017 01:57:56 +0200 Subject: bsg-lib: fix kernel panic resulting from missing allocation of reply-buffer Since we split the scsi_request out of struct request bsg fails to provide a reply-buffer for the drivers. This was done via the pointer for sense-data, that is not preallocated anymore. Failing to allocate/assign it results in illegal dereferences because LLDs use this pointer unquestioned. An example panic on s390x, using the zFCP driver, looks like this (I had debugging on, otherwise NULL-pointer dereferences wouldn't even panic on s390x): Unable to handle kernel pointer dereference in virtual kernel address space Failing address: 6b6b6b6b6b6b6000 TEID: 6b6b6b6b6b6b6403 Fault in home space mode while using kernel ASCE. AS:0000000001590007 R3:0000000000000024 Oops: 0038 ilc:2 [#1] PREEMPT SMP DEBUG_PAGEALLOC Modules linked in: CPU: 2 PID: 0 Comm: swapper/2 Not tainted 4.12.0-bsg-regression+ #3 Hardware name: IBM 2964 N96 702 (z/VM 6.4.0) task: 0000000065cb0100 task.stack: 0000000065cb4000 Krnl PSW : 0704e00180000000 000003ff801e4156 (zfcp_fc_ct_els_job_handler+0x16/0x58 [zfcp]) R:0 T:1 IO:1 EX:1 Key:0 M:1 W:0 P:0 AS:3 CC:2 PM:0 RI:0 EA:3 Krnl GPRS: 0000000000000001 000000005fa9d0d0 000000005fa9d078 0000000000e16866 000003ff00000290 6b6b6b6b6b6b6b6b 0000000059f78f00 000000000000000f 00000000593a0958 00000000593a0958 0000000060d88800 000000005ddd4c38 0000000058b50100 07000000659cba08 000003ff801e8556 00000000659cb9a8 Krnl Code: 000003ff801e4146: e31020500004 lg %r1,80(%r2) 000003ff801e414c: 58402040 l %r4,64(%r2) #000003ff801e4150: e35020200004 lg %r5,32(%r2) >000003ff801e4156: 50405004 st %r4,4(%r5) 000003ff801e415a: e54c50080000 mvhi 8(%r5),0 000003ff801e4160: e33010280012 lt %r3,40(%r1) 000003ff801e4166: a718fffb lhi %r1,-5 000003ff801e416a: 1803 lr %r0,%r3 Call Trace: ([<000003ff801e8556>] zfcp_fsf_req_complete+0x726/0x768 [zfcp]) [<000003ff801ea82a>] zfcp_fsf_reqid_check+0x102/0x180 [zfcp] [<000003ff801eb980>] zfcp_qdio_int_resp+0x230/0x278 [zfcp] [<00000000009b91b6>] qdio_kick_handler+0x2ae/0x2c8 [<00000000009b9e3e>] __tiqdio_inbound_processing+0x406/0xc10 [<00000000001684c2>] tasklet_action+0x15a/0x1d8 [<0000000000bd28ec>] __do_softirq+0x3ec/0x848 [<00000000001675a4>] irq_exit+0x74/0xf8 [<000000000010dd6a>] do_IRQ+0xba/0xf0 [<0000000000bd19e8>] io_int_handler+0x104/0x2d4 [<00000000001033b6>] enabled_wait+0xb6/0x188 ([<000000000010339e>] enabled_wait+0x9e/0x188) [<000000000010396a>] arch_cpu_idle+0x32/0x50 [<0000000000bd0112>] default_idle_call+0x52/0x68 [<00000000001cd0fa>] do_idle+0x102/0x188 [<00000000001cd41e>] cpu_startup_entry+0x3e/0x48 [<0000000000118c64>] smp_start_secondary+0x11c/0x130 [<0000000000bd2016>] restart_int_handler+0x62/0x78 [<0000000000000000>] (null) INFO: lockdep is turned off. Last Breaking-Event-Address: [<000003ff801e41d6>] zfcp_fc_ct_job_handler+0x3e/0x48 [zfcp] Kernel panic - not syncing: Fatal exception in interrupt This patch moves bsg-lib to allocate and setup struct bsg_job ahead of time, including the allocation of a buffer for the reply-data. This means, struct bsg_job is not allocated separately anymore, but as part of struct request allocation - similar to struct scsi_cmd. Reflect this in the function names that used to handle creation/destruction of struct bsg_job. Reported-by: Steffen Maier Suggested-by: Christoph Hellwig Reviewed-by: Christoph Hellwig Signed-off-by: Benjamin Block Fixes: 82ed4db499b8 ("block: split scsi_request out of struct request") Cc: #4.11+ Signed-off-by: Jens Axboe --- block/bsg-lib.c | 74 +++++++++++++++++++++++++++++-------------------- include/linux/blkdev.h | 1 - include/linux/bsg-lib.h | 2 ++ 3 files changed, 46 insertions(+), 31 deletions(-) (limited to 'include/linux') diff --git a/block/bsg-lib.c b/block/bsg-lib.c index c4513b23f57a..dd56d7460cb9 100644 --- a/block/bsg-lib.c +++ b/block/bsg-lib.c @@ -29,26 +29,25 @@ #include /** - * bsg_destroy_job - routine to teardown/delete a bsg job + * bsg_teardown_job - routine to teardown a bsg job * @job: bsg_job that is to be torn down */ -static void bsg_destroy_job(struct kref *kref) +static void bsg_teardown_job(struct kref *kref) { struct bsg_job *job = container_of(kref, struct bsg_job, kref); struct request *rq = job->req; - blk_end_request_all(rq, BLK_STS_OK); - put_device(job->dev); /* release reference for the request */ kfree(job->request_payload.sg_list); kfree(job->reply_payload.sg_list); - kfree(job); + + blk_end_request_all(rq, BLK_STS_OK); } void bsg_job_put(struct bsg_job *job) { - kref_put(&job->kref, bsg_destroy_job); + kref_put(&job->kref, bsg_teardown_job); } EXPORT_SYMBOL_GPL(bsg_job_put); @@ -100,7 +99,7 @@ EXPORT_SYMBOL_GPL(bsg_job_done); */ static void bsg_softirq_done(struct request *rq) { - struct bsg_job *job = rq->special; + struct bsg_job *job = blk_mq_rq_to_pdu(rq); bsg_job_put(job); } @@ -122,33 +121,20 @@ static int bsg_map_buffer(struct bsg_buffer *buf, struct request *req) } /** - * bsg_create_job - create the bsg_job structure for the bsg request + * bsg_prepare_job - create the bsg_job structure for the bsg request * @dev: device that is being sent the bsg request * @req: BSG request that needs a job structure */ -static int bsg_create_job(struct device *dev, struct request *req) +static int bsg_prepare_job(struct device *dev, struct request *req) { struct request *rsp = req->next_rq; - struct request_queue *q = req->q; struct scsi_request *rq = scsi_req(req); - struct bsg_job *job; + struct bsg_job *job = blk_mq_rq_to_pdu(req); int ret; - BUG_ON(req->special); - - job = kzalloc(sizeof(struct bsg_job) + q->bsg_job_size, GFP_KERNEL); - if (!job) - return -ENOMEM; - - req->special = job; - job->req = req; - if (q->bsg_job_size) - job->dd_data = (void *)&job[1]; job->request = rq->cmd; job->request_len = rq->cmd_len; - job->reply = rq->sense; - job->reply_len = SCSI_SENSE_BUFFERSIZE; /* Size of sense buffer - * allocated */ + if (req->bio) { ret = bsg_map_buffer(&job->request_payload, req); if (ret) @@ -187,7 +173,6 @@ static void bsg_request_fn(struct request_queue *q) { struct device *dev = q->queuedata; struct request *req; - struct bsg_job *job; int ret; if (!get_device(dev)) @@ -199,7 +184,7 @@ static void bsg_request_fn(struct request_queue *q) break; spin_unlock_irq(q->queue_lock); - ret = bsg_create_job(dev, req); + ret = bsg_prepare_job(dev, req); if (ret) { scsi_req(req)->result = ret; blk_end_request_all(req, BLK_STS_OK); @@ -207,8 +192,7 @@ static void bsg_request_fn(struct request_queue *q) continue; } - job = req->special; - ret = q->bsg_job_fn(job); + ret = q->bsg_job_fn(blk_mq_rq_to_pdu(req)); spin_lock_irq(q->queue_lock); if (ret) break; @@ -219,6 +203,35 @@ static void bsg_request_fn(struct request_queue *q) spin_lock_irq(q->queue_lock); } +static int bsg_init_rq(struct request_queue *q, struct request *req, gfp_t gfp) +{ + struct bsg_job *job = blk_mq_rq_to_pdu(req); + struct scsi_request *sreq = &job->sreq; + + memset(job, 0, sizeof(*job)); + + scsi_req_init(sreq); + sreq->sense_len = SCSI_SENSE_BUFFERSIZE; + sreq->sense = kzalloc(sreq->sense_len, gfp); + if (!sreq->sense) + return -ENOMEM; + + job->req = req; + job->reply = sreq->sense; + job->reply_len = sreq->sense_len; + job->dd_data = job + 1; + + return 0; +} + +static void bsg_exit_rq(struct request_queue *q, struct request *req) +{ + struct bsg_job *job = blk_mq_rq_to_pdu(req); + struct scsi_request *sreq = &job->sreq; + + kfree(sreq->sense); +} + /** * bsg_setup_queue - Create and add the bsg hooks so we can receive requests * @dev: device to attach bsg device to @@ -235,7 +248,9 @@ struct request_queue *bsg_setup_queue(struct device *dev, char *name, q = blk_alloc_queue(GFP_KERNEL); if (!q) return ERR_PTR(-ENOMEM); - q->cmd_size = sizeof(struct scsi_request); + q->cmd_size = sizeof(struct bsg_job) + dd_job_size; + q->init_rq_fn = bsg_init_rq; + q->exit_rq_fn = bsg_exit_rq; q->request_fn = bsg_request_fn; ret = blk_init_allocated_queue(q); @@ -243,7 +258,6 @@ struct request_queue *bsg_setup_queue(struct device *dev, char *name, goto out_cleanup_queue; q->queuedata = dev; - q->bsg_job_size = dd_job_size; q->bsg_job_fn = job_fn; queue_flag_set_unlocked(QUEUE_FLAG_BIDI, q); queue_flag_set_unlocked(QUEUE_FLAG_SCSI_PASSTHROUGH, q); diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index 25f6a0cb27d3..2a5d52fa90f5 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -568,7 +568,6 @@ struct request_queue { #if defined(CONFIG_BLK_DEV_BSG) bsg_job_fn *bsg_job_fn; - int bsg_job_size; struct bsg_class_device bsg_dev; #endif diff --git a/include/linux/bsg-lib.h b/include/linux/bsg-lib.h index e34dde2da0ef..637a20cfb237 100644 --- a/include/linux/bsg-lib.h +++ b/include/linux/bsg-lib.h @@ -24,6 +24,7 @@ #define _BLK_BSG_ #include +#include struct request; struct device; @@ -37,6 +38,7 @@ struct bsg_buffer { }; struct bsg_job { + struct scsi_request sreq; struct device *dev; struct request *req; -- cgit v1.2.3 From 311fc65c9fb9c966bca8e6f3ff8132ce57344ab9 Mon Sep 17 00:00:00 2001 From: "Eric W. Biederman" Date: Thu, 24 Aug 2017 15:13:29 -0500 Subject: pty: Repair TIOCGPTPEER The implementation of TIOCGPTPEER has two issues. When /dev/ptmx (as opposed to /dev/pts/ptmx) is opened the wrong vfsmount is passed to dentry_open. Which results in the kernel displaying the wrong pathname for the peer. The second is simply by caching the vfsmount and dentry of the peer it leaves them open, in a way they were not previously Which because of the inreased reference counts can cause unnecessary behaviour differences resulting in regressions. To fix these move the ioctl into tty_io.c at a generic level allowing the ioctl to have access to the struct file on which the ioctl is being called. This allows the path of the slave to be derived when opening the slave through TIOCGPTPEER instead of requiring the path to the slave be cached. Thus removing the need for caching the path. A new function devpts_ptmx_path is factored out of devpts_acquire and used to implement a function devpts_mntget. The new function devpts_mntget takes a filp to perform the lookup on and fsi so that it can confirm that the superblock that is found by devpts_ptmx_path is the proper superblock. v2: Lots of fixes to make the code actually work v3: Suggestions by Linus - Removed the unnecessary initialization of filp in ptm_open_peer - Simplified devpts_ptmx_path as gotos are no longer required [ This is the fix for the issue that was reverted in commit 143c97cc6529, but this time without breaking 'pbuilder' due to increased reference counts - Linus ] Fixes: 54ebbfb16034 ("tty: add TIOCGPTPEER ioctl") Reported-by: Christian Brauner Reported-and-tested-by: Stefan Lippers-Hollmann Signed-off-by: "Eric W. Biederman" Signed-off-by: Linus Torvalds --- drivers/tty/pty.c | 64 ++++++++++++++++++++-------------------------- drivers/tty/tty_io.c | 3 +++ fs/devpts/inode.c | 65 +++++++++++++++++++++++++++++++++++------------ include/linux/devpts_fs.h | 10 ++++++++ 4 files changed, 89 insertions(+), 53 deletions(-) (limited to 'include/linux') diff --git a/drivers/tty/pty.c b/drivers/tty/pty.c index 284749fb0f6b..a6d5164c33a9 100644 --- a/drivers/tty/pty.c +++ b/drivers/tty/pty.c @@ -69,13 +69,8 @@ static void pty_close(struct tty_struct *tty, struct file *filp) #ifdef CONFIG_UNIX98_PTYS if (tty->driver == ptm_driver) { mutex_lock(&devpts_mutex); - if (tty->link->driver_data) { - struct path *path = tty->link->driver_data; - - devpts_pty_kill(path->dentry); - path_put(path); - kfree(path); - } + if (tty->link->driver_data) + devpts_pty_kill(tty->link->driver_data); mutex_unlock(&devpts_mutex); } #endif @@ -607,25 +602,24 @@ static inline void legacy_pty_init(void) { } static struct cdev ptmx_cdev; /** - * pty_open_peer - open the peer of a pty - * @tty: the peer of the pty being opened + * ptm_open_peer - open the peer of a pty + * @master: the open struct file of the ptmx device node + * @tty: the master of the pty being opened + * @flags: the flags for open * - * Open the cached dentry in tty->link, providing a safe way for userspace - * to get the slave end of a pty (where they have the master fd and cannot - * access or trust the mount namespace /dev/pts was mounted inside). + * Provide a race free way for userspace to open the slave end of a pty + * (where they have the master fd and cannot access or trust the mount + * namespace /dev/pts was mounted inside). */ -static struct file *pty_open_peer(struct tty_struct *tty, int flags) -{ - if (tty->driver->subtype != PTY_TYPE_MASTER) - return ERR_PTR(-EIO); - return dentry_open(tty->link->driver_data, flags, current_cred()); -} - -static int pty_get_peer(struct tty_struct *tty, int flags) +int ptm_open_peer(struct file *master, struct tty_struct *tty, int flags) { int fd = -1; - struct file *filp = NULL; + struct file *filp; int retval = -EINVAL; + struct path path; + + if (tty->driver != ptm_driver) + return -EIO; fd = get_unused_fd_flags(0); if (fd < 0) { @@ -633,7 +627,16 @@ static int pty_get_peer(struct tty_struct *tty, int flags) goto err; } - filp = pty_open_peer(tty, flags); + /* Compute the slave's path */ + path.mnt = devpts_mntget(master, tty->driver_data); + if (IS_ERR(path.mnt)) { + retval = PTR_ERR(path.mnt); + goto err_put; + } + path.dentry = tty->link->driver_data; + + filp = dentry_open(&path, flags, current_cred()); + mntput(path.mnt); if (IS_ERR(filp)) { retval = PTR_ERR(filp); goto err_put; @@ -662,8 +665,6 @@ static int pty_unix98_ioctl(struct tty_struct *tty, return pty_get_pktmode(tty, (int __user *)arg); case TIOCGPTN: /* Get PT Number */ return put_user(tty->index, (unsigned int __user *)arg); - case TIOCGPTPEER: /* Open the other end */ - return pty_get_peer(tty, (int) arg); case TIOCSIG: /* Send signal to other side of pty */ return pty_signal(tty, (int) arg); } @@ -791,7 +792,6 @@ static int ptmx_open(struct inode *inode, struct file *filp) { struct pts_fs_info *fsi; struct tty_struct *tty; - struct path *pts_path; struct dentry *dentry; int retval; int index; @@ -845,26 +845,16 @@ static int ptmx_open(struct inode *inode, struct file *filp) retval = PTR_ERR(dentry); goto err_release; } - /* We need to cache a fake path for TIOCGPTPEER. */ - pts_path = kmalloc(sizeof(struct path), GFP_KERNEL); - if (!pts_path) - goto err_release; - pts_path->mnt = filp->f_path.mnt; - pts_path->dentry = dentry; - path_get(pts_path); - tty->link->driver_data = pts_path; + tty->link->driver_data = dentry; retval = ptm_driver->ops->open(tty, filp); if (retval) - goto err_path_put; + goto err_release; tty_debug_hangup(tty, "opening (count=%d)\n", tty->count); tty_unlock(tty); return 0; -err_path_put: - path_put(pts_path); - kfree(pts_path); err_release: tty_unlock(tty); // This will also put-ref the fsi diff --git a/drivers/tty/tty_io.c b/drivers/tty/tty_io.c index 974b13d24401..10c4038c0e8d 100644 --- a/drivers/tty/tty_io.c +++ b/drivers/tty/tty_io.c @@ -2518,6 +2518,9 @@ long tty_ioctl(struct file *file, unsigned int cmd, unsigned long arg) case TIOCSSERIAL: tty_warn_deprecated_flags(p); break; + case TIOCGPTPEER: + /* Special because the struct file is needed */ + return ptm_open_peer(file, tty, (int)arg); default: retval = tty_jobctrl_ioctl(tty, real_tty, file, cmd, arg); if (retval != -ENOIOCTLCMD) diff --git a/fs/devpts/inode.c b/fs/devpts/inode.c index 108df2e3602c..7eae33ffa3fc 100644 --- a/fs/devpts/inode.c +++ b/fs/devpts/inode.c @@ -133,6 +133,50 @@ static inline struct pts_fs_info *DEVPTS_SB(struct super_block *sb) return sb->s_fs_info; } +static int devpts_ptmx_path(struct path *path) +{ + struct super_block *sb; + int err; + + /* Has the devpts filesystem already been found? */ + if (path->mnt->mnt_sb->s_magic == DEVPTS_SUPER_MAGIC) + return 0; + + /* Is a devpts filesystem at "pts" in the same directory? */ + err = path_pts(path); + if (err) + return err; + + /* Is the path the root of a devpts filesystem? */ + sb = path->mnt->mnt_sb; + if ((sb->s_magic != DEVPTS_SUPER_MAGIC) || + (path->mnt->mnt_root != sb->s_root)) + return -ENODEV; + + return 0; +} + +struct vfsmount *devpts_mntget(struct file *filp, struct pts_fs_info *fsi) +{ + struct path path; + int err; + + path = filp->f_path; + path_get(&path); + + err = devpts_ptmx_path(&path); + dput(path.dentry); + if (err) { + mntput(path.mnt); + path.mnt = ERR_PTR(err); + } + if (DEVPTS_SB(path.mnt->mnt_sb) != fsi) { + mntput(path.mnt); + path.mnt = ERR_PTR(-ENODEV); + } + return path.mnt; +} + struct pts_fs_info *devpts_acquire(struct file *filp) { struct pts_fs_info *result; @@ -143,27 +187,16 @@ struct pts_fs_info *devpts_acquire(struct file *filp) path = filp->f_path; path_get(&path); - /* Has the devpts filesystem already been found? */ - sb = path.mnt->mnt_sb; - if (sb->s_magic != DEVPTS_SUPER_MAGIC) { - /* Is a devpts filesystem at "pts" in the same directory? */ - err = path_pts(&path); - if (err) { - result = ERR_PTR(err); - goto out; - } - - /* Is the path the root of a devpts filesystem? */ - result = ERR_PTR(-ENODEV); - sb = path.mnt->mnt_sb; - if ((sb->s_magic != DEVPTS_SUPER_MAGIC) || - (path.mnt->mnt_root != sb->s_root)) - goto out; + err = devpts_ptmx_path(&path); + if (err) { + result = ERR_PTR(err); + goto out; } /* * pty code needs to hold extra references in case of last /dev/tty close */ + sb = path.mnt->mnt_sb; atomic_inc(&sb->s_active); result = DEVPTS_SB(sb); diff --git a/include/linux/devpts_fs.h b/include/linux/devpts_fs.h index 277ab9af9ac2..100cb4343763 100644 --- a/include/linux/devpts_fs.h +++ b/include/linux/devpts_fs.h @@ -19,6 +19,7 @@ struct pts_fs_info; +struct vfsmount *devpts_mntget(struct file *, struct pts_fs_info *); struct pts_fs_info *devpts_acquire(struct file *); void devpts_release(struct pts_fs_info *); @@ -32,6 +33,15 @@ void *devpts_get_priv(struct dentry *); /* unlink */ void devpts_pty_kill(struct dentry *); +/* in pty.c */ +int ptm_open_peer(struct file *master, struct tty_struct *tty, int flags); + +#else +static inline int +ptm_open_peer(struct file *master, struct tty_struct *tty, int flags) +{ + return -EIO; +} #endif -- cgit v1.2.3 From 0cc3b0ec23ce4c69e1e890ed2b8d2fa932b14aad Mon Sep 17 00:00:00 2001 From: Linus Torvalds Date: Sun, 27 Aug 2017 12:12:25 -0700 Subject: Clarify (and fix) MAX_LFS_FILESIZE macros We have a MAX_LFS_FILESIZE macro that is meant to be filled in by filesystems (and other IO targets) that know they are 64-bit clean and don't have any 32-bit limits in their IO path. It turns out that our 32-bit value for that limit was bogus. On 32-bit, the VM layer is limited by the page cache to only 32-bit index values, but our logic for that was confusing and actually wrong. We used to define that value to (((loff_t)PAGE_SIZE << (BITS_PER_LONG-1))-1) which is actually odd in several ways: it limits the index to 31 bits, and then it limits files so that they can't have data in that last byte of a page that has the highest 31-bit index (ie page index 0x7fffffff). Neither of those limitations make sense. The index is actually the full 32 bit unsigned value, and we can use that whole full page. So the maximum size of the file would logically be "PAGE_SIZE << BITS_PER_LONG". However, we do wan tto avoid the maximum index, because we have code that iterates over the page indexes, and we don't want that code to overflow. So the maximum size of a file on a 32-bit host should actually be one page less than the full 32-bit index. So the actual limit is ULONG_MAX << PAGE_SHIFT. That means that we will not actually be using the page of that last index (ULONG_MAX), but we can grow a file up to that limit. The wrong value of MAX_LFS_FILESIZE actually caused problems for Doug Nazar, who was still using a 32-bit host, but with a 9.7TB 2 x RAID5 volume. It turns out that our old MAX_LFS_FILESIZE was 8TiB (well, one byte less), but the actual true VM limit is one page less than 16TiB. This was invisible until commit c2a9737f45e2 ("vfs,mm: fix a dead loop in truncate_inode_pages_range()"), which started applying that MAX_LFS_FILESIZE limit to block devices too. NOTE! On 64-bit, the page index isn't a limiter at all, and the limit is actually just the offset type itself (loff_t), which is signed. But for clarity, on 64-bit, just use the maximum signed value, and don't make people have to count the number of 'f' characters in the hex constant. So just use LLONG_MAX for the 64-bit case. That was what the value had been before too, just written out as a hex constant. Fixes: c2a9737f45e2 ("vfs,mm: fix a dead loop in truncate_inode_pages_range()") Reported-and-tested-by: Doug Nazar Cc: Andreas Dilger Cc: Mark Fasheh Cc: Joel Becker Cc: Dave Kleikamp Cc: stable@kernel.org Signed-off-by: Linus Torvalds --- include/linux/fs.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/fs.h b/include/linux/fs.h index 6e1fd5d21248..cbfe127bccf8 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -907,9 +907,9 @@ static inline struct file *get_file(struct file *f) /* Page cache limit. The filesystems should put that into their s_maxbytes limits, otherwise bad things can happen in VM. */ #if BITS_PER_LONG==32 -#define MAX_LFS_FILESIZE (((loff_t)PAGE_SIZE << (BITS_PER_LONG-1))-1) +#define MAX_LFS_FILESIZE ((loff_t)ULONG_MAX << PAGE_SHIFT) #elif BITS_PER_LONG==64 -#define MAX_LFS_FILESIZE ((loff_t)0x7fffffffffffffffLL) +#define MAX_LFS_FILESIZE ((loff_t)LLONG_MAX) #endif #define FL_POSIX 1 -- cgit v1.2.3