From 85a8ce62c2eabe28b9d76ca4eecf37922402df93 Mon Sep 17 00:00:00 2001 From: Ming Lei Date: Sat, 28 Dec 2019 07:05:48 +0800 Subject: block: add bio_truncate to fix guard_bio_eod Some filesystem, such as vfat, may send bio which crosses device boundary, and the worse thing is that the IO request starting within device boundaries can contain more than one segment past EOD. Commit dce30ca9e3b6 ("fs: fix guard_bio_eod to check for real EOD errors") tries to fix this issue by returning -EIO for this situation. However, this way lets fs user code lose chance to handle -EIO, then sync_inodes_sb() may hang for ever. Also the current truncating on last segment is dangerous by updating the last bvec, given bvec table becomes not immutable any more, and fs bio users may not retrieve the truncated pages via bio_for_each_segment_all() in its .end_io callback. Fixes this issue by supporting multi-segment truncating. And the approach is simpler: - just update bio size since block layer can make correct bvec with the updated bio size. Then bvec table becomes really immutable. - zero all truncated segments for read bio Cc: Carlos Maiolino Cc: linux-fsdevel@vger.kernel.org Fixed-by: dce30ca9e3b6 ("fs: fix guard_bio_eod to check for real EOD errors") Reported-by: syzbot+2b9e54155c8c25d8d165@syzkaller.appspotmail.com Signed-off-by: Ming Lei Signed-off-by: Jens Axboe --- fs/buffer.c | 25 +------------------------ 1 file changed, 1 insertion(+), 24 deletions(-) (limited to 'fs/buffer.c') diff --git a/fs/buffer.c b/fs/buffer.c index d8c7242426bb..e94a6619464c 100644 --- a/fs/buffer.c +++ b/fs/buffer.c @@ -3034,8 +3034,6 @@ static void end_bio_bh_io_sync(struct bio *bio) void guard_bio_eod(int op, struct bio *bio) { sector_t maxsector; - struct bio_vec *bvec = bio_last_bvec_all(bio); - unsigned truncated_bytes; struct hd_struct *part; rcu_read_lock(); @@ -3061,28 +3059,7 @@ void guard_bio_eod(int op, struct bio *bio) if (likely((bio->bi_iter.bi_size >> 9) <= maxsector)) return; - /* Uhhuh. We've got a bio that straddles the device size! */ - truncated_bytes = bio->bi_iter.bi_size - (maxsector << 9); - - /* - * The bio contains more than one segment which spans EOD, just return - * and let IO layer turn it into an EIO - */ - if (truncated_bytes > bvec->bv_len) - return; - - /* Truncate the bio.. */ - bio->bi_iter.bi_size -= truncated_bytes; - bvec->bv_len -= truncated_bytes; - - /* ..and clear the end of the buffer for reads */ - if (op == REQ_OP_READ) { - struct bio_vec bv; - - mp_bvec_last_segment(bvec, &bv); - zero_user(bv.bv_page, bv.bv_offset + bv.bv_len, - truncated_bytes); - } + bio_truncate(bio, maxsector << 9); } static int submit_bh_wbc(int op, int op_flags, struct buffer_head *bh, -- cgit v1.2.3 From 83c9c547168e8b914ea6398430473a4de68c52cc Mon Sep 17 00:00:00 2001 From: Ming Lei Date: Sun, 5 Jan 2020 09:41:14 +0800 Subject: fs: move guard_bio_eod() after bio_set_op_attrs Commit 85a8ce62c2ea ("block: add bio_truncate to fix guard_bio_eod") adds bio_truncate() for handling bio EOD. However, bio_truncate() doesn't use the passed 'op' parameter from guard_bio_eod's callers. So bio_trunacate() may retrieve wrong 'op', and zering pages may not be done for READ bio. Fixes this issue by moving guard_bio_eod() after bio_set_op_attrs() in submit_bh_wbc() so that bio_truncate() can always retrieve correct op info. Meantime remove the 'op' parameter from guard_bio_eod() because it isn't used any more. Cc: Carlos Maiolino Cc: linux-fsdevel@vger.kernel.org Fixes: 85a8ce62c2ea ("block: add bio_truncate to fix guard_bio_eod") Signed-off-by: Ming Lei Fold in kerneldoc and bio_op() change. Signed-off-by: Jens Axboe --- block/bio.c | 12 +++++++++++- fs/buffer.c | 8 ++++---- fs/internal.h | 2 +- fs/mpage.c | 2 +- 4 files changed, 17 insertions(+), 7 deletions(-) (limited to 'fs/buffer.c') diff --git a/block/bio.c b/block/bio.c index 006bcc52a77e..94d697217887 100644 --- a/block/bio.c +++ b/block/bio.c @@ -538,6 +538,16 @@ void zero_fill_bio_iter(struct bio *bio, struct bvec_iter start) } EXPORT_SYMBOL(zero_fill_bio_iter); +/** + * bio_truncate - truncate the bio to small size of @new_size + * @bio: the bio to be truncated + * @new_size: new size for truncating the bio + * + * Description: + * Truncate the bio to new size of @new_size. If bio_op(bio) is + * REQ_OP_READ, zero the truncated part. This function should only + * be used for handling corner cases, such as bio eod. + */ void bio_truncate(struct bio *bio, unsigned new_size) { struct bio_vec bv; @@ -548,7 +558,7 @@ void bio_truncate(struct bio *bio, unsigned new_size) if (new_size >= bio->bi_iter.bi_size) return; - if (bio_data_dir(bio) != READ) + if (bio_op(bio) != REQ_OP_READ) goto exit; bio_for_each_segment(bv, bio, iter) { diff --git a/fs/buffer.c b/fs/buffer.c index e94a6619464c..18a87ec8a465 100644 --- a/fs/buffer.c +++ b/fs/buffer.c @@ -3031,7 +3031,7 @@ static void end_bio_bh_io_sync(struct bio *bio) * errors, this only handles the "we need to be able to * do IO at the final sector" case. */ -void guard_bio_eod(int op, struct bio *bio) +void guard_bio_eod(struct bio *bio) { sector_t maxsector; struct hd_struct *part; @@ -3095,15 +3095,15 @@ static int submit_bh_wbc(int op, int op_flags, struct buffer_head *bh, bio->bi_end_io = end_bio_bh_io_sync; bio->bi_private = bh; - /* Take care of bh's that straddle the end of the device */ - guard_bio_eod(op, bio); - if (buffer_meta(bh)) op_flags |= REQ_META; if (buffer_prio(bh)) op_flags |= REQ_PRIO; bio_set_op_attrs(bio, op, op_flags); + /* Take care of bh's that straddle the end of the device */ + guard_bio_eod(bio); + if (wbc) { wbc_init_bio(wbc, bio); wbc_account_cgroup_owner(wbc, bh->b_page, bh->b_size); diff --git a/fs/internal.h b/fs/internal.h index 4a7da1df573d..e3fa69544b66 100644 --- a/fs/internal.h +++ b/fs/internal.h @@ -38,7 +38,7 @@ static inline int __sync_blockdev(struct block_device *bdev, int wait) /* * buffer.c */ -extern void guard_bio_eod(int rw, struct bio *bio); +extern void guard_bio_eod(struct bio *bio); extern int __block_write_begin_int(struct page *page, loff_t pos, unsigned len, get_block_t *get_block, struct iomap *iomap); diff --git a/fs/mpage.c b/fs/mpage.c index a63620cdb73a..ccba3c4c4479 100644 --- a/fs/mpage.c +++ b/fs/mpage.c @@ -62,7 +62,7 @@ static struct bio *mpage_bio_submit(int op, int op_flags, struct bio *bio) { bio->bi_end_io = mpage_end_io; bio_set_op_attrs(bio, op, op_flags); - guard_bio_eod(op, bio); + guard_bio_eod(bio); submit_bio(bio); return NULL; } -- cgit v1.2.3 From cb923159bbb8cc8fe09c19a3435ee11fd546f3d3 Mon Sep 17 00:00:00 2001 From: Sebastian Andrzej Siewior Date: Fri, 17 Jan 2020 10:01:37 +0100 Subject: smp: Remove allocation mask from on_each_cpu_cond.*() The allocation mask is no longer used by on_each_cpu_cond() and on_each_cpu_cond_mask() and can be removed. Signed-off-by: Sebastian Andrzej Siewior Signed-off-by: Thomas Gleixner Acked-by: Peter Zijlstra (Intel) Link: https://lore.kernel.org/r/20200117090137.1205765-4-bigeasy@linutronix.de --- arch/x86/mm/tlb.c | 2 +- fs/buffer.c | 2 +- include/linux/smp.h | 5 ++--- kernel/smp.c | 13 +++---------- kernel/up.c | 7 +++---- mm/slub.c | 2 +- 6 files changed, 11 insertions(+), 20 deletions(-) (limited to 'fs/buffer.c') diff --git a/arch/x86/mm/tlb.c b/arch/x86/mm/tlb.c index e6a9edc5baaf..66f96f21a7b6 100644 --- a/arch/x86/mm/tlb.c +++ b/arch/x86/mm/tlb.c @@ -708,7 +708,7 @@ void native_flush_tlb_others(const struct cpumask *cpumask, (void *)info, 1); else on_each_cpu_cond_mask(tlb_is_not_lazy, flush_tlb_func_remote, - (void *)info, 1, GFP_ATOMIC, cpumask); + (void *)info, 1, cpumask); } /* diff --git a/fs/buffer.c b/fs/buffer.c index 18a87ec8a465..b8d28370cfd7 100644 --- a/fs/buffer.c +++ b/fs/buffer.c @@ -1433,7 +1433,7 @@ static bool has_bh_in_lru(int cpu, void *dummy) void invalidate_bh_lrus(void) { - on_each_cpu_cond(has_bh_in_lru, invalidate_bh_lru, NULL, 1, GFP_KERNEL); + on_each_cpu_cond(has_bh_in_lru, invalidate_bh_lru, NULL, 1); } EXPORT_SYMBOL_GPL(invalidate_bh_lrus); diff --git a/include/linux/smp.h b/include/linux/smp.h index 4734416855aa..cbc9162689d0 100644 --- a/include/linux/smp.h +++ b/include/linux/smp.h @@ -51,11 +51,10 @@ void on_each_cpu_mask(const struct cpumask *mask, smp_call_func_t func, * processor. */ void on_each_cpu_cond(smp_cond_func_t cond_func, smp_call_func_t func, - void *info, bool wait, gfp_t gfp_flags); + void *info, bool wait); void on_each_cpu_cond_mask(smp_cond_func_t cond_func, smp_call_func_t func, - void *info, bool wait, gfp_t gfp_flags, - const struct cpumask *mask); + void *info, bool wait, const struct cpumask *mask); int smp_call_function_single_async(int cpu, call_single_data_t *csd); diff --git a/kernel/smp.c b/kernel/smp.c index e17e6344ab54..3b7bedc97af3 100644 --- a/kernel/smp.c +++ b/kernel/smp.c @@ -679,11 +679,6 @@ EXPORT_SYMBOL(on_each_cpu_mask); * @info: An arbitrary pointer to pass to both functions. * @wait: If true, wait (atomically) until function has * completed on other CPUs. - * @gfp_flags: GFP flags to use when allocating the cpumask - * used internally by the function. - * - * The function might sleep if the GFP flags indicates a non - * atomic allocation is allowed. * * Preemption is disabled to protect against CPUs going offline but not online. * CPUs going online during the call will not be seen or sent an IPI. @@ -692,8 +687,7 @@ EXPORT_SYMBOL(on_each_cpu_mask); * from a hardware interrupt handler or from a bottom half handler. */ void on_each_cpu_cond_mask(smp_cond_func_t cond_func, smp_call_func_t func, - void *info, bool wait, gfp_t gfp_flags, - const struct cpumask *mask) + void *info, bool wait, const struct cpumask *mask) { int cpu = get_cpu(); @@ -710,10 +704,9 @@ void on_each_cpu_cond_mask(smp_cond_func_t cond_func, smp_call_func_t func, EXPORT_SYMBOL(on_each_cpu_cond_mask); void on_each_cpu_cond(smp_cond_func_t cond_func, smp_call_func_t func, - void *info, bool wait, gfp_t gfp_flags) + void *info, bool wait) { - on_each_cpu_cond_mask(cond_func, func, info, wait, gfp_flags, - cpu_online_mask); + on_each_cpu_cond_mask(cond_func, func, info, wait, cpu_online_mask); } EXPORT_SYMBOL(on_each_cpu_cond); diff --git a/kernel/up.c b/kernel/up.c index 5c0d4f2bece2..53144d056252 100644 --- a/kernel/up.c +++ b/kernel/up.c @@ -69,8 +69,7 @@ EXPORT_SYMBOL(on_each_cpu_mask); * same condtions in UP and SMP. */ void on_each_cpu_cond_mask(smp_cond_func_t cond_func, smp_call_func_t func, - void *info, bool wait, gfp_t gfp_flags, - const struct cpumask *mask) + void *info, bool wait, const struct cpumask *mask) { unsigned long flags; @@ -85,9 +84,9 @@ void on_each_cpu_cond_mask(smp_cond_func_t cond_func, smp_call_func_t func, EXPORT_SYMBOL(on_each_cpu_cond_mask); void on_each_cpu_cond(smp_cond_func_t cond_func, smp_call_func_t func, - void *info, bool wait, gfp_t gfp_flags) + void *info, bool wait) { - on_each_cpu_cond_mask(cond_func, func, info, wait, gfp_flags, NULL); + on_each_cpu_cond_mask(cond_func, func, info, wait, NULL); } EXPORT_SYMBOL(on_each_cpu_cond); diff --git a/mm/slub.c b/mm/slub.c index 8eafccf75940..2e1a57723f8e 100644 --- a/mm/slub.c +++ b/mm/slub.c @@ -2341,7 +2341,7 @@ static bool has_cpu_slab(int cpu, void *info) static void flush_all(struct kmem_cache *s) { - on_each_cpu_cond(has_cpu_slab, flush_cpu_slab, s, 1, GFP_ATOMIC); + on_each_cpu_cond(has_cpu_slab, flush_cpu_slab, s, 1); } /* -- cgit v1.2.3