From 77f88796cee819b9c4562b0b6b44691b3b7755b1 Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Thu, 16 Mar 2017 16:54:24 -0400 Subject: cgroup, kthread: close race window where new kthreads can be migrated to non-root cgroups Creation of a kthread goes through a couple interlocked stages between the kthread itself and its creator. Once the new kthread starts running, it initializes itself and wakes up the creator. The creator then can further configure the kthread and then let it start doing its job by waking it up. In this configuration-by-creator stage, the creator is the only one that can wake it up but the kthread is visible to userland. When altering the kthread's attributes from userland is allowed, this is fine; however, for cases where CPU affinity is critical, kthread_bind() is used to first disable affinity changes from userland and then set the affinity. This also prevents the kthread from being migrated into non-root cgroups as that can affect the CPU affinity and many other things. Unfortunately, the cgroup side of protection is racy. While the PF_NO_SETAFFINITY flag prevents further migrations, userland can win the race before the creator sets the flag with kthread_bind() and put the kthread in a non-root cgroup, which can lead to all sorts of problems including incorrect CPU affinity and starvation. This bug got triggered by userland which periodically tries to migrate all processes in the root cpuset cgroup to a non-root one. Per-cpu workqueue workers got caught while being created and ended up with incorrected CPU affinity breaking concurrency management and sometimes stalling workqueue execution. This patch adds task->no_cgroup_migration which disallows the task to be migrated by userland. kthreadd starts with the flag set making every child kthread start in the root cgroup with migration disallowed. The flag is cleared after the kthread finishes initialization by which time PF_NO_SETAFFINITY is set if the kthread should stay in the root cgroup. It'd be better to wait for the initialization instead of failing but I couldn't think of a way of implementing that without adding either a new PF flag, or sleeping and retrying from waiting side. Even if userland depends on changing cgroup membership of a kthread, it either has to be synchronized with kthread_create() or periodically repeat, so it's unlikely that this would break anything. v2: Switch to a simpler implementation using a new task_struct bit field suggested by Oleg. Signed-off-by: Tejun Heo Suggested-by: Oleg Nesterov Cc: Linus Torvalds Cc: Andrew Morton Cc: Peter Zijlstra (Intel) Cc: Thomas Gleixner Reported-and-debugged-by: Chris Mason Cc: stable@vger.kernel.org # v4.3+ (we can't close the race on < v4.3) Signed-off-by: Tejun Heo --- include/linux/cgroup.h | 21 +++++++++++++++++++++ include/linux/sched.h | 4 ++++ 2 files changed, 25 insertions(+) (limited to 'include/linux') diff --git a/include/linux/cgroup.h b/include/linux/cgroup.h index f6b43fbb141c..af9c86e958bd 100644 --- a/include/linux/cgroup.h +++ b/include/linux/cgroup.h @@ -570,6 +570,25 @@ static inline void pr_cont_cgroup_path(struct cgroup *cgrp) pr_cont_kernfs_path(cgrp->kn); } +static inline void cgroup_init_kthreadd(void) +{ + /* + * kthreadd is inherited by all kthreads, keep it in the root so + * that the new kthreads are guaranteed to stay in the root until + * initialization is finished. + */ + current->no_cgroup_migration = 1; +} + +static inline void cgroup_kthread_ready(void) +{ + /* + * This kthread finished initialization. The creator should have + * set PF_NO_SETAFFINITY if this kthread should stay in the root. + */ + current->no_cgroup_migration = 0; +} + #else /* !CONFIG_CGROUPS */ struct cgroup_subsys_state; @@ -590,6 +609,8 @@ static inline void cgroup_free(struct task_struct *p) {} static inline int cgroup_init_early(void) { return 0; } static inline int cgroup_init(void) { return 0; } +static inline void cgroup_init_kthreadd(void) {} +static inline void cgroup_kthread_ready(void) {} static inline bool task_under_cgroup_hierarchy(struct task_struct *task, struct cgroup *ancestor) diff --git a/include/linux/sched.h b/include/linux/sched.h index d67eee84fd43..4cf9a59a4d08 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -604,6 +604,10 @@ struct task_struct { #ifdef CONFIG_COMPAT_BRK unsigned brk_randomized:1; #endif +#ifdef CONFIG_CGROUPS + /* disallow userland-initiated cgroup migration */ + unsigned no_cgroup_migration:1; +#endif unsigned long atomic_flags; /* Flags requiring atomic access. */ -- cgit v1.2.3 From 698eff6355f735d46d1b7113df8b422874cd7988 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Fri, 17 Mar 2017 12:48:18 +0100 Subject: sched/clock, x86/perf: Fix "perf test tsc" People reported that commit: 5680d8094ffa ("sched/clock: Provide better clock continuity") broke "perf test tsc". That commit added another offset to the reported clock value; so take that into account when computing the provided offset values. Reported-by: Adrian Hunter Reported-by: Arnaldo Carvalho de Melo Tested-by: Alexander Shishkin Signed-off-by: Peter Zijlstra (Intel) Cc: Linus Torvalds Cc: Mike Galbraith Cc: Peter Zijlstra Cc: Thomas Gleixner Fixes: 5680d8094ffa ("sched/clock: Provide better clock continuity") Signed-off-by: Ingo Molnar --- arch/x86/events/core.c | 9 ++++++--- arch/x86/include/asm/timer.h | 2 ++ arch/x86/kernel/tsc.c | 4 ++-- include/linux/sched/clock.h | 13 +++++++------ kernel/sched/clock.c | 22 +++++++++++----------- 5 files changed, 28 insertions(+), 22 deletions(-) (limited to 'include/linux') diff --git a/arch/x86/events/core.c b/arch/x86/events/core.c index 2aa1ad194db2..580b60f5ac83 100644 --- a/arch/x86/events/core.c +++ b/arch/x86/events/core.c @@ -2256,6 +2256,7 @@ void arch_perf_update_userpage(struct perf_event *event, struct perf_event_mmap_page *userpg, u64 now) { struct cyc2ns_data *data; + u64 offset; userpg->cap_user_time = 0; userpg->cap_user_time_zero = 0; @@ -2263,11 +2264,13 @@ void arch_perf_update_userpage(struct perf_event *event, !!(event->hw.flags & PERF_X86_EVENT_RDPMC_ALLOWED); userpg->pmc_width = x86_pmu.cntval_bits; - if (!sched_clock_stable()) + if (!using_native_sched_clock() || !sched_clock_stable()) return; data = cyc2ns_read_begin(); + offset = data->cyc2ns_offset + __sched_clock_offset; + /* * Internal timekeeping for enabled/running/stopped times * is always in the local_clock domain. @@ -2275,7 +2278,7 @@ void arch_perf_update_userpage(struct perf_event *event, userpg->cap_user_time = 1; userpg->time_mult = data->cyc2ns_mul; userpg->time_shift = data->cyc2ns_shift; - userpg->time_offset = data->cyc2ns_offset - now; + userpg->time_offset = offset - now; /* * cap_user_time_zero doesn't make sense when we're using a different @@ -2283,7 +2286,7 @@ void arch_perf_update_userpage(struct perf_event *event, */ if (!event->attr.use_clockid) { userpg->cap_user_time_zero = 1; - userpg->time_zero = data->cyc2ns_offset; + userpg->time_zero = offset; } cyc2ns_read_end(data); diff --git a/arch/x86/include/asm/timer.h b/arch/x86/include/asm/timer.h index a04eabd43d06..27e9f9d769b8 100644 --- a/arch/x86/include/asm/timer.h +++ b/arch/x86/include/asm/timer.h @@ -12,6 +12,8 @@ extern int recalibrate_cpu_khz(void); extern int no_timer_check; +extern bool using_native_sched_clock(void); + /* * We use the full linear equation: f(x) = a + b*x, in order to allow * a continuous function in the face of dynamic freq changes. diff --git a/arch/x86/kernel/tsc.c b/arch/x86/kernel/tsc.c index c73a7f9e881a..714dfba6a1e7 100644 --- a/arch/x86/kernel/tsc.c +++ b/arch/x86/kernel/tsc.c @@ -328,7 +328,7 @@ unsigned long long sched_clock(void) return paravirt_sched_clock(); } -static inline bool using_native_sched_clock(void) +bool using_native_sched_clock(void) { return pv_time_ops.sched_clock == native_sched_clock; } @@ -336,7 +336,7 @@ static inline bool using_native_sched_clock(void) unsigned long long sched_clock(void) __attribute__((alias("native_sched_clock"))); -static inline bool using_native_sched_clock(void) { return true; } +bool using_native_sched_clock(void) { return true; } #endif int check_tsc_unstable(void) diff --git a/include/linux/sched/clock.h b/include/linux/sched/clock.h index 4a68c6791207..34fe92ce1ebd 100644 --- a/include/linux/sched/clock.h +++ b/include/linux/sched/clock.h @@ -54,15 +54,16 @@ static inline u64 local_clock(void) } #else extern void sched_clock_init_late(void); -/* - * Architectures can set this to 1 if they have specified - * CONFIG_HAVE_UNSTABLE_SCHED_CLOCK in their arch Kconfig, - * but then during bootup it turns out that sched_clock() - * is reliable after all: - */ extern int sched_clock_stable(void); extern void clear_sched_clock_stable(void); +/* + * When sched_clock_stable(), __sched_clock_offset provides the offset + * between local_clock() and sched_clock(). + */ +extern u64 __sched_clock_offset; + + extern void sched_clock_tick(void); extern void sched_clock_idle_sleep_event(void); extern void sched_clock_idle_wakeup_event(u64 delta_ns); diff --git a/kernel/sched/clock.c b/kernel/sched/clock.c index fec0f58c8dee..24a3e01bf8cb 100644 --- a/kernel/sched/clock.c +++ b/kernel/sched/clock.c @@ -96,10 +96,10 @@ static DEFINE_STATIC_KEY_FALSE(__sched_clock_stable); static int __sched_clock_stable_early = 1; /* - * We want: ktime_get_ns() + gtod_offset == sched_clock() + raw_offset + * We want: ktime_get_ns() + __gtod_offset == sched_clock() + __sched_clock_offset */ -static __read_mostly u64 raw_offset; -static __read_mostly u64 gtod_offset; +__read_mostly u64 __sched_clock_offset; +static __read_mostly u64 __gtod_offset; struct sched_clock_data { u64 tick_raw; @@ -131,11 +131,11 @@ static void __set_sched_clock_stable(void) /* * Attempt to make the (initial) unstable->stable transition continuous. */ - raw_offset = (scd->tick_gtod + gtod_offset) - (scd->tick_raw); + __sched_clock_offset = (scd->tick_gtod + __gtod_offset) - (scd->tick_raw); printk(KERN_INFO "sched_clock: Marking stable (%lld, %lld)->(%lld, %lld)\n", - scd->tick_gtod, gtod_offset, - scd->tick_raw, raw_offset); + scd->tick_gtod, __gtod_offset, + scd->tick_raw, __sched_clock_offset); static_branch_enable(&__sched_clock_stable); tick_dep_clear(TICK_DEP_BIT_CLOCK_UNSTABLE); @@ -161,11 +161,11 @@ static void __clear_sched_clock_stable(void) * * Still do what we can. */ - gtod_offset = (scd->tick_raw + raw_offset) - (scd->tick_gtod); + __gtod_offset = (scd->tick_raw + __sched_clock_offset) - (scd->tick_gtod); printk(KERN_INFO "sched_clock: Marking unstable (%lld, %lld)<-(%lld, %lld)\n", - scd->tick_gtod, gtod_offset, - scd->tick_raw, raw_offset); + scd->tick_gtod, __gtod_offset, + scd->tick_raw, __sched_clock_offset); tick_dep_set(TICK_DEP_BIT_CLOCK_UNSTABLE); @@ -238,7 +238,7 @@ again: * scd->tick_gtod + TICK_NSEC); */ - clock = scd->tick_gtod + gtod_offset + delta; + clock = scd->tick_gtod + __gtod_offset + delta; min_clock = wrap_max(scd->tick_gtod, old_clock); max_clock = wrap_max(old_clock, scd->tick_gtod + TICK_NSEC); @@ -324,7 +324,7 @@ u64 sched_clock_cpu(int cpu) u64 clock; if (sched_clock_stable()) - return sched_clock() + raw_offset; + return sched_clock() + __sched_clock_offset; if (unlikely(!sched_clock_running)) return 0ull; -- cgit v1.2.3 From 07de36b378a58f1d1426829acf0ab7cf86f651f3 Mon Sep 17 00:00:00 2001 From: Alexander Kochetkov Date: Wed, 22 Mar 2017 17:32:49 +0300 Subject: clockevents: Fix syntax error in clkevt-of macro The patch fix syntax errors introduced by commit 0c8893c9095d ("clockevents: Add a clkevt-of mechanism like clksrc-of"). Fixes: 0c8893c9095d ("clockevents: Add a clkevt-of mechanism like clksrc-of") Signed-off-by: Alexander Kochetkov Signed-off-by: Daniel Lezcano --- drivers/clocksource/clkevt-probe.c | 2 +- include/linux/clockchips.h | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/drivers/clocksource/clkevt-probe.c b/drivers/clocksource/clkevt-probe.c index 8c30fec86094..eb89b502acbd 100644 --- a/drivers/clocksource/clkevt-probe.c +++ b/drivers/clocksource/clkevt-probe.c @@ -17,7 +17,7 @@ #include #include -#include +#include extern struct of_device_id __clkevt_of_table[]; diff --git a/include/linux/clockchips.h b/include/linux/clockchips.h index 5d3053c34fb3..6d7edc3082f9 100644 --- a/include/linux/clockchips.h +++ b/include/linux/clockchips.h @@ -229,7 +229,7 @@ static inline void tick_setup_hrtimer_broadcast(void) { } #ifdef CONFIG_CLKEVT_PROBE extern int clockevent_probe(void); -#els +#else static inline int clockevent_probe(void) { return 0; } #endif -- cgit v1.2.3 From 597b7305dd8bafdb3aef4957d97128bc90af8e9f Mon Sep 17 00:00:00 2001 From: Michal Hocko Date: Fri, 31 Mar 2017 15:11:47 -0700 Subject: mm: move mm_percpu_wq initialization earlier Yang Li has reported that drain_all_pages triggers a WARN_ON which means that this function is called earlier than the mm_percpu_wq is initialized on arm64 with CMA configured: WARNING: CPU: 2 PID: 1 at mm/page_alloc.c:2423 drain_all_pages+0x244/0x25c Modules linked in: CPU: 2 PID: 1 Comm: swapper/0 Not tainted 4.11.0-rc1-next-20170310-00027-g64dfbc5 #127 Hardware name: Freescale Layerscape 2088A RDB Board (DT) task: ffffffc07c4a6d00 task.stack: ffffffc07c4a8000 PC is at drain_all_pages+0x244/0x25c LR is at start_isolate_page_range+0x14c/0x1f0 [...] drain_all_pages+0x244/0x25c start_isolate_page_range+0x14c/0x1f0 alloc_contig_range+0xec/0x354 cma_alloc+0x100/0x1fc dma_alloc_from_contiguous+0x3c/0x44 atomic_pool_init+0x7c/0x208 arm64_dma_init+0x44/0x4c do_one_initcall+0x38/0x128 kernel_init_freeable+0x1a0/0x240 kernel_init+0x10/0xfc ret_from_fork+0x10/0x20 Fix this by moving the whole setup_vmstat which is an initcall right now to init_mm_internals which will be called right after the WQ subsystem is initialized. Link: http://lkml.kernel.org/r/20170315164021.28532-1-mhocko@kernel.org Signed-off-by: Michal Hocko Reported-by: Yang Li Tested-by: Yang Li Tested-by: Xiaolong Ye Cc: Mel Gorman Cc: Vlastimil Babka Cc: Tetsuo Handa Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/mm.h | 2 ++ init/main.c | 2 ++ mm/vmstat.c | 4 +--- 3 files changed, 5 insertions(+), 3 deletions(-) (limited to 'include/linux') diff --git a/include/linux/mm.h b/include/linux/mm.h index 5f01c88f0800..00a8fa7e366a 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -32,6 +32,8 @@ struct user_struct; struct writeback_control; struct bdi_writeback; +void init_mm_internals(void); + #ifndef CONFIG_NEED_MULTIPLE_NODES /* Don't use mapnrs, do it properly */ extern unsigned long max_mapnr; diff --git a/init/main.c b/init/main.c index f9c9d9948203..b0c11cbf5ddf 100644 --- a/init/main.c +++ b/init/main.c @@ -1022,6 +1022,8 @@ static noinline void __init kernel_init_freeable(void) workqueue_init(); + init_mm_internals(); + do_pre_smp_initcalls(); lockup_detector_init(); diff --git a/mm/vmstat.c b/mm/vmstat.c index b1947f0cbee2..89f95396ec46 100644 --- a/mm/vmstat.c +++ b/mm/vmstat.c @@ -1764,7 +1764,7 @@ static int vmstat_cpu_dead(unsigned int cpu) #endif -static int __init setup_vmstat(void) +void __init init_mm_internals(void) { #ifdef CONFIG_SMP int ret; @@ -1792,9 +1792,7 @@ static int __init setup_vmstat(void) proc_create("vmstat", S_IRUGO, NULL, &proc_vmstat_file_operations); proc_create("zoneinfo", S_IRUGO, NULL, &proc_zoneinfo_file_operations); #endif - return 0; } -module_init(setup_vmstat) #if defined(CONFIG_DEBUG_FS) && defined(CONFIG_COMPACTION) -- cgit v1.2.3 From 553af430e7c981e6e8fa5007c5b7b5773acc63dd Mon Sep 17 00:00:00 2001 From: Johannes Weiner Date: Fri, 31 Mar 2017 15:11:50 -0700 Subject: mm: rmap: fix huge file mmap accounting in the memcg stats Huge pages are accounted as single units in the memcg's "file_mapped" counter. Account the correct number of base pages, like we do in the corresponding node counter. Link: http://lkml.kernel.org/r/20170322005111.3156-1-hannes@cmpxchg.org Signed-off-by: Johannes Weiner Reviewed-by: Kirill A. Shutemov Acked-by: Michal Hocko Cc: Vladimir Davydov Cc: [4.8+] Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/memcontrol.h | 6 ++++++ mm/rmap.c | 4 ++-- 2 files changed, 8 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/memcontrol.h b/include/linux/memcontrol.h index 5af377303880..bb7250c45cb8 100644 --- a/include/linux/memcontrol.h +++ b/include/linux/memcontrol.h @@ -740,6 +740,12 @@ static inline bool mem_cgroup_oom_synchronize(bool wait) return false; } +static inline void mem_cgroup_update_page_stat(struct page *page, + enum mem_cgroup_stat_index idx, + int nr) +{ +} + static inline void mem_cgroup_inc_page_stat(struct page *page, enum mem_cgroup_stat_index idx) { diff --git a/mm/rmap.c b/mm/rmap.c index 49ed681ccc7b..f6838015810f 100644 --- a/mm/rmap.c +++ b/mm/rmap.c @@ -1159,7 +1159,7 @@ void page_add_file_rmap(struct page *page, bool compound) goto out; } __mod_node_page_state(page_pgdat(page), NR_FILE_MAPPED, nr); - mem_cgroup_inc_page_stat(page, MEM_CGROUP_STAT_FILE_MAPPED); + mem_cgroup_update_page_stat(page, MEM_CGROUP_STAT_FILE_MAPPED, nr); out: unlock_page_memcg(page); } @@ -1199,7 +1199,7 @@ static void page_remove_file_rmap(struct page *page, bool compound) * pte lock(a spinlock) is held, which implies preemption disabled. */ __mod_node_page_state(page_pgdat(page), NR_FILE_MAPPED, -nr); - mem_cgroup_dec_page_stat(page, MEM_CGROUP_STAT_FILE_MAPPED); + mem_cgroup_update_page_stat(page, MEM_CGROUP_STAT_FILE_MAPPED, -nr); if (unlikely(PageMlocked(page))) clear_page_mlock(page); -- cgit v1.2.3 From b0845ce58379d11dcad4cdb6824a6410de260216 Mon Sep 17 00:00:00 2001 From: Mark Rutland Date: Fri, 31 Mar 2017 15:12:04 -0700 Subject: kasan: report only the first error by default Disable kasan after the first report. There are several reasons for this: - Single bug quite often has multiple invalid memory accesses causing storm in the dmesg. - Write OOB access might corrupt metadata so the next report will print bogus alloc/free stacktraces. - Reports after the first easily could be not bugs by itself but just side effects of the first one. Given that multiple reports usually only do harm, it makes sense to disable kasan after the first one. If user wants to see all the reports, the boot-time parameter kasan_multi_shot must be used. [aryabinin@virtuozzo.com: wrote changelog and doc, added missing include] Link: http://lkml.kernel.org/r/20170323154416.30257-1-aryabinin@virtuozzo.com Signed-off-by: Mark Rutland Signed-off-by: Andrey Ryabinin Cc: Andrey Konovalov Cc: Alexander Potapenko Cc: Dmitry Vyukov Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- Documentation/admin-guide/kernel-parameters.txt | 6 +++++ include/linux/kasan.h | 3 +++ lib/test_kasan.c | 10 +++++++ mm/kasan/kasan.h | 5 ---- mm/kasan/report.c | 36 +++++++++++++++++++++++++ 5 files changed, 55 insertions(+), 5 deletions(-) (limited to 'include/linux') diff --git a/Documentation/admin-guide/kernel-parameters.txt b/Documentation/admin-guide/kernel-parameters.txt index 2ba45caabada..facc20a3f962 100644 --- a/Documentation/admin-guide/kernel-parameters.txt +++ b/Documentation/admin-guide/kernel-parameters.txt @@ -1725,6 +1725,12 @@ kernel and module base offset ASLR (Address Space Layout Randomization). + kasan_multi_shot + [KNL] Enforce KASAN (Kernel Address Sanitizer) to print + report on every invalid memory access. Without this + parameter KASAN will print report only for the first + invalid access. + keepinitrd [HW,ARM] kernelcore= [KNL,X86,IA-64,PPC] diff --git a/include/linux/kasan.h b/include/linux/kasan.h index 5734480c9590..a5c7046f26b4 100644 --- a/include/linux/kasan.h +++ b/include/linux/kasan.h @@ -76,6 +76,9 @@ size_t ksize(const void *); static inline void kasan_unpoison_slab(const void *ptr) { ksize(ptr); } size_t kasan_metadata_size(struct kmem_cache *cache); +bool kasan_save_enable_multi_shot(void); +void kasan_restore_multi_shot(bool enabled); + #else /* CONFIG_KASAN */ static inline void kasan_unpoison_shadow(const void *address, size_t size) {} diff --git a/lib/test_kasan.c b/lib/test_kasan.c index 0b1d3140fbb8..a25c9763fce1 100644 --- a/lib/test_kasan.c +++ b/lib/test_kasan.c @@ -20,6 +20,7 @@ #include #include #include +#include /* * Note: test functions are marked noinline so that their names appear in @@ -474,6 +475,12 @@ static noinline void __init use_after_scope_test(void) static int __init kmalloc_tests_init(void) { + /* + * Temporarily enable multi-shot mode. Otherwise, we'd only get a + * report for the first case. + */ + bool multishot = kasan_save_enable_multi_shot(); + kmalloc_oob_right(); kmalloc_oob_left(); kmalloc_node_oob_right(); @@ -499,6 +506,9 @@ static int __init kmalloc_tests_init(void) ksize_unpoisons_memory(); copy_user_test(); use_after_scope_test(); + + kasan_restore_multi_shot(multishot); + return -EAGAIN; } diff --git a/mm/kasan/kasan.h b/mm/kasan/kasan.h index 1c260e6b3b3c..dd2dea8eb077 100644 --- a/mm/kasan/kasan.h +++ b/mm/kasan/kasan.h @@ -96,11 +96,6 @@ static inline const void *kasan_shadow_to_mem(const void *shadow_addr) << KASAN_SHADOW_SCALE_SHIFT); } -static inline bool kasan_report_enabled(void) -{ - return !current->kasan_depth; -} - void kasan_report(unsigned long addr, size_t size, bool is_write, unsigned long ip); void kasan_report_double_free(struct kmem_cache *cache, void *object, diff --git a/mm/kasan/report.c b/mm/kasan/report.c index f479365530b6..ab42a0803f16 100644 --- a/mm/kasan/report.c +++ b/mm/kasan/report.c @@ -13,7 +13,9 @@ * */ +#include #include +#include #include #include #include @@ -293,6 +295,40 @@ static void kasan_report_error(struct kasan_access_info *info) kasan_end_report(&flags); } +static unsigned long kasan_flags; + +#define KASAN_BIT_REPORTED 0 +#define KASAN_BIT_MULTI_SHOT 1 + +bool kasan_save_enable_multi_shot(void) +{ + return test_and_set_bit(KASAN_BIT_MULTI_SHOT, &kasan_flags); +} +EXPORT_SYMBOL_GPL(kasan_save_enable_multi_shot); + +void kasan_restore_multi_shot(bool enabled) +{ + if (!enabled) + clear_bit(KASAN_BIT_MULTI_SHOT, &kasan_flags); +} +EXPORT_SYMBOL_GPL(kasan_restore_multi_shot); + +static int __init kasan_set_multi_shot(char *str) +{ + set_bit(KASAN_BIT_MULTI_SHOT, &kasan_flags); + return 1; +} +__setup("kasan_multi_shot", kasan_set_multi_shot); + +static inline bool kasan_report_enabled(void) +{ + if (current->kasan_depth) + return false; + if (test_bit(KASAN_BIT_MULTI_SHOT, &kasan_flags)) + return true; + return !test_and_set_bit(KASAN_BIT_REPORTED, &kasan_flags); +} + void kasan_report(unsigned long addr, size_t size, bool is_write, unsigned long ip) { -- cgit v1.2.3 From 27c0e3748e41ca79171ffa3e97415a20af6facd0 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Fri, 17 Feb 2017 18:42:24 -0500 Subject: [iov_iter] new privimitive: iov_iter_revert() opposite to iov_iter_advance(); the caller is responsible for never using it to move back past the initial position. Cc: stable@vger.kernel.org Signed-off-by: Al Viro --- include/linux/uio.h | 6 ++++- lib/iov_iter.c | 63 +++++++++++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 68 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/uio.h b/include/linux/uio.h index 804e34c6f981..f2d36a3d3005 100644 --- a/include/linux/uio.h +++ b/include/linux/uio.h @@ -39,7 +39,10 @@ struct iov_iter { }; union { unsigned long nr_segs; - int idx; + struct { + int idx; + int start_idx; + }; }; }; @@ -81,6 +84,7 @@ unsigned long iov_shorten(struct iovec *iov, unsigned long nr_segs, size_t to); size_t iov_iter_copy_from_user_atomic(struct page *page, struct iov_iter *i, unsigned long offset, size_t bytes); void iov_iter_advance(struct iov_iter *i, size_t bytes); +void iov_iter_revert(struct iov_iter *i, size_t bytes); int iov_iter_fault_in_readable(struct iov_iter *i, size_t bytes); size_t iov_iter_single_seg_count(const struct iov_iter *i); size_t copy_page_to_iter(struct page *page, size_t offset, size_t bytes, diff --git a/lib/iov_iter.c b/lib/iov_iter.c index e68604ae3ced..60abc44385b7 100644 --- a/lib/iov_iter.c +++ b/lib/iov_iter.c @@ -786,6 +786,68 @@ void iov_iter_advance(struct iov_iter *i, size_t size) } EXPORT_SYMBOL(iov_iter_advance); +void iov_iter_revert(struct iov_iter *i, size_t unroll) +{ + if (!unroll) + return; + i->count += unroll; + if (unlikely(i->type & ITER_PIPE)) { + struct pipe_inode_info *pipe = i->pipe; + int idx = i->idx; + size_t off = i->iov_offset; + while (1) { + size_t n = off - pipe->bufs[idx].offset; + if (unroll < n) { + off -= (n - unroll); + break; + } + unroll -= n; + if (!unroll && idx == i->start_idx) { + off = 0; + break; + } + if (!idx--) + idx = pipe->buffers - 1; + off = pipe->bufs[idx].offset + pipe->bufs[idx].len; + } + i->iov_offset = off; + i->idx = idx; + pipe_truncate(i); + return; + } + if (unroll <= i->iov_offset) { + i->iov_offset -= unroll; + return; + } + unroll -= i->iov_offset; + if (i->type & ITER_BVEC) { + const struct bio_vec *bvec = i->bvec; + while (1) { + size_t n = (--bvec)->bv_len; + i->nr_segs++; + if (unroll <= n) { + i->bvec = bvec; + i->iov_offset = n - unroll; + return; + } + unroll -= n; + } + } else { /* same logics for iovec and kvec */ + const struct iovec *iov = i->iov; + while (1) { + size_t n = (--iov)->iov_len; + i->nr_segs++; + if (unroll <= n) { + i->iov = iov; + i->iov_offset = n - unroll; + return; + } + unroll -= n; + } + } +} +EXPORT_SYMBOL(iov_iter_revert); + /* * Return the count of just the current iov_iter segment. */ @@ -839,6 +901,7 @@ void iov_iter_pipe(struct iov_iter *i, int direction, i->idx = (pipe->curbuf + pipe->nrbufs) & (pipe->buffers - 1); i->iov_offset = 0; i->count = count; + i->start_idx = i->idx; } EXPORT_SYMBOL(iov_iter_pipe); -- cgit v1.2.3 From 3209f68b3ca4667069923a325c88b21131bfdf9f Mon Sep 17 00:00:00 2001 From: David Howells Date: Fri, 31 Mar 2017 18:32:17 +0100 Subject: statx: Include a mask for stx_attributes in struct statx Include a mask in struct stat to indicate which bits of stx_attributes the filesystem actually supports. This would also be useful if we add another system call that allows you to do a 'bulk attribute set' and pass in a statx struct with the masks appropriately set to say what you want to set. Signed-off-by: David Howells Signed-off-by: Al Viro --- fs/ext4/inode.c | 6 ++++++ fs/stat.c | 1 + include/linux/stat.h | 1 + include/uapi/linux/stat.h | 4 ++-- samples/statx/test-statx.c | 12 ++++++++---- 5 files changed, 18 insertions(+), 6 deletions(-) (limited to 'include/linux') diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c index 5d02b922afa3..b9ffa9f4191f 100644 --- a/fs/ext4/inode.c +++ b/fs/ext4/inode.c @@ -5413,6 +5413,12 @@ int ext4_getattr(const struct path *path, struct kstat *stat, if (flags & EXT4_NODUMP_FL) stat->attributes |= STATX_ATTR_NODUMP; + stat->attributes_mask |= (STATX_ATTR_APPEND | + STATX_ATTR_COMPRESSED | + STATX_ATTR_ENCRYPTED | + STATX_ATTR_IMMUTABLE | + STATX_ATTR_NODUMP); + generic_fillattr(inode, stat); return 0; } diff --git a/fs/stat.c b/fs/stat.c index 0c7e6cdc435c..c6c963b2546b 100644 --- a/fs/stat.c +++ b/fs/stat.c @@ -527,6 +527,7 @@ cp_statx(const struct kstat *stat, struct statx __user *buffer) tmp.stx_ino = stat->ino; tmp.stx_size = stat->size; tmp.stx_blocks = stat->blocks; + tmp.stx_attributes_mask = stat->attributes_mask; tmp.stx_atime.tv_sec = stat->atime.tv_sec; tmp.stx_atime.tv_nsec = stat->atime.tv_nsec; tmp.stx_btime.tv_sec = stat->btime.tv_sec; diff --git a/include/linux/stat.h b/include/linux/stat.h index c76e524fb34b..64b6b3aece21 100644 --- a/include/linux/stat.h +++ b/include/linux/stat.h @@ -26,6 +26,7 @@ struct kstat { unsigned int nlink; uint32_t blksize; /* Preferred I/O size */ u64 attributes; + u64 attributes_mask; #define KSTAT_ATTR_FS_IOC_FLAGS \ (STATX_ATTR_COMPRESSED | \ STATX_ATTR_IMMUTABLE | \ diff --git a/include/uapi/linux/stat.h b/include/uapi/linux/stat.h index 0869b9eaa8ce..d538897b8e08 100644 --- a/include/uapi/linux/stat.h +++ b/include/uapi/linux/stat.h @@ -114,7 +114,7 @@ struct statx { __u64 stx_ino; /* Inode number */ __u64 stx_size; /* File size */ __u64 stx_blocks; /* Number of 512-byte blocks allocated */ - __u64 __spare1[1]; + __u64 stx_attributes_mask; /* Mask to show what's supported in stx_attributes */ /* 0x40 */ struct statx_timestamp stx_atime; /* Last access time */ struct statx_timestamp stx_btime; /* File creation time */ @@ -155,7 +155,7 @@ struct statx { #define STATX__RESERVED 0x80000000U /* Reserved for future struct statx expansion */ /* - * Attributes to be found in stx_attributes + * Attributes to be found in stx_attributes and masked in stx_attributes_mask. * * These give information about the features or the state of a file that might * be of use to ordinary userspace programs such as GUIs or ls rather than diff --git a/samples/statx/test-statx.c b/samples/statx/test-statx.c index 8571d766331d..d4d77b09412c 100644 --- a/samples/statx/test-statx.c +++ b/samples/statx/test-statx.c @@ -141,8 +141,8 @@ static void dump_statx(struct statx *stx) if (stx->stx_mask & STATX_BTIME) print_time(" Birth: ", &stx->stx_btime); - if (stx->stx_attributes) { - unsigned char bits; + if (stx->stx_attributes_mask) { + unsigned char bits, mbits; int loop, byte; static char attr_representation[64 + 1] = @@ -160,14 +160,18 @@ static void dump_statx(struct statx *stx) printf("Attributes: %016llx (", stx->stx_attributes); for (byte = 64 - 8; byte >= 0; byte -= 8) { bits = stx->stx_attributes >> byte; + mbits = stx->stx_attributes_mask >> byte; for (loop = 7; loop >= 0; loop--) { int bit = byte + loop; - if (bits & 0x80) + if (!(mbits & 0x80)) + putchar('.'); /* Not supported */ + else if (bits & 0x80) putchar(attr_representation[63 - bit]); else - putchar('-'); + putchar('-'); /* Not set */ bits <<= 1; + mbits <<= 1; } if (byte) putchar(' '); -- cgit v1.2.3 From 6d56111c92d247bb64301029fe88365aa4caf16e Mon Sep 17 00:00:00 2001 From: Christoffer Dall Date: Tue, 21 Mar 2017 22:05:22 +0100 Subject: KVM: arm/arm64: vgic: Fix GICC_PMR uaccess on GICv3 and clarify ABI As an oversight, for GICv2, we accidentally export the GICC_PMR register in the format of the GICH_VMCR.VMPriMask field in the lower 5 bits of a word, meaning that userspace must always use the lower 5 bits to communicate with the KVM device and must shift the value left by 3 places to obtain the actual priority mask level. Since GICv3 supports the full 8 bits of priority masking in the ICH_VMCR, we have to fix the value we export when emulating a GICv2 on top of a hardware GICv3 and exporting the emulated GICv2 state to userspace. Take the chance to clarify this aspect of the ABI. Reviewed-by: Marc Zyngier Signed-off-by: Christoffer Dall --- Documentation/virtual/kvm/devices/arm-vgic.txt | 6 ++++++ include/linux/irqchip/arm-gic.h | 3 +++ virt/kvm/arm/vgic/vgic-mmio-v2.c | 20 ++++++++++++++++++-- virt/kvm/arm/vgic/vgic-v2.c | 8 ++++---- virt/kvm/arm/vgic/vgic.h | 9 ++++++++- 5 files changed, 39 insertions(+), 7 deletions(-) (limited to 'include/linux') diff --git a/Documentation/virtual/kvm/devices/arm-vgic.txt b/Documentation/virtual/kvm/devices/arm-vgic.txt index 76e61c883347..b2f60ca8b60c 100644 --- a/Documentation/virtual/kvm/devices/arm-vgic.txt +++ b/Documentation/virtual/kvm/devices/arm-vgic.txt @@ -83,6 +83,12 @@ Groups: Bits for undefined preemption levels are RAZ/WI. + For historical reasons and to provide ABI compatibility with userspace we + export the GICC_PMR register in the format of the GICH_VMCR.VMPriMask + field in the lower 5 bits of a word, meaning that userspace must always + use the lower 5 bits to communicate with the KVM device and must shift the + value left by 3 places to obtain the actual priority mask level. + Limitations: - Priorities are not implemented, and registers are RAZ/WI - Currently only implemented for KVM_DEV_TYPE_ARM_VGIC_V2. diff --git a/include/linux/irqchip/arm-gic.h b/include/linux/irqchip/arm-gic.h index eafc965b3eb8..dc30f3d057eb 100644 --- a/include/linux/irqchip/arm-gic.h +++ b/include/linux/irqchip/arm-gic.h @@ -96,6 +96,9 @@ #define GICH_MISR_EOI (1 << 0) #define GICH_MISR_U (1 << 1) +#define GICV_PMR_PRIORITY_SHIFT 3 +#define GICV_PMR_PRIORITY_MASK (0x1f << GICV_PMR_PRIORITY_SHIFT) + #ifndef __ASSEMBLY__ #include diff --git a/virt/kvm/arm/vgic/vgic-mmio-v2.c b/virt/kvm/arm/vgic/vgic-mmio-v2.c index a3ad7ff95c9b..0a4283ed9aa7 100644 --- a/virt/kvm/arm/vgic/vgic-mmio-v2.c +++ b/virt/kvm/arm/vgic/vgic-mmio-v2.c @@ -229,7 +229,15 @@ static unsigned long vgic_mmio_read_vcpuif(struct kvm_vcpu *vcpu, val = vmcr.ctlr; break; case GIC_CPU_PRIMASK: - val = vmcr.pmr; + /* + * Our KVM_DEV_TYPE_ARM_VGIC_V2 device ABI exports the + * the PMR field as GICH_VMCR.VMPriMask rather than + * GICC_PMR.Priority, so we expose the upper five bits of + * priority mask to userspace using the lower bits in the + * unsigned long. + */ + val = (vmcr.pmr & GICV_PMR_PRIORITY_MASK) >> + GICV_PMR_PRIORITY_SHIFT; break; case GIC_CPU_BINPOINT: val = vmcr.bpr; @@ -262,7 +270,15 @@ static void vgic_mmio_write_vcpuif(struct kvm_vcpu *vcpu, vmcr.ctlr = val; break; case GIC_CPU_PRIMASK: - vmcr.pmr = val; + /* + * Our KVM_DEV_TYPE_ARM_VGIC_V2 device ABI exports the + * the PMR field as GICH_VMCR.VMPriMask rather than + * GICC_PMR.Priority, so we expose the upper five bits of + * priority mask to userspace using the lower bits in the + * unsigned long. + */ + vmcr.pmr = (val << GICV_PMR_PRIORITY_SHIFT) & + GICV_PMR_PRIORITY_MASK; break; case GIC_CPU_BINPOINT: vmcr.bpr = val; diff --git a/virt/kvm/arm/vgic/vgic-v2.c b/virt/kvm/arm/vgic/vgic-v2.c index 94cf4b9b6471..b637d9c7afe3 100644 --- a/virt/kvm/arm/vgic/vgic-v2.c +++ b/virt/kvm/arm/vgic/vgic-v2.c @@ -206,8 +206,8 @@ void vgic_v2_set_vmcr(struct kvm_vcpu *vcpu, struct vgic_vmcr *vmcrp) GICH_VMCR_ALIAS_BINPOINT_MASK; vmcr |= (vmcrp->bpr << GICH_VMCR_BINPOINT_SHIFT) & GICH_VMCR_BINPOINT_MASK; - vmcr |= (vmcrp->pmr << GICH_VMCR_PRIMASK_SHIFT) & - GICH_VMCR_PRIMASK_MASK; + vmcr |= ((vmcrp->pmr >> GICV_PMR_PRIORITY_SHIFT) << + GICH_VMCR_PRIMASK_SHIFT) & GICH_VMCR_PRIMASK_MASK; vcpu->arch.vgic_cpu.vgic_v2.vgic_vmcr = vmcr; } @@ -222,8 +222,8 @@ void vgic_v2_get_vmcr(struct kvm_vcpu *vcpu, struct vgic_vmcr *vmcrp) GICH_VMCR_ALIAS_BINPOINT_SHIFT; vmcrp->bpr = (vmcr & GICH_VMCR_BINPOINT_MASK) >> GICH_VMCR_BINPOINT_SHIFT; - vmcrp->pmr = (vmcr & GICH_VMCR_PRIMASK_MASK) >> - GICH_VMCR_PRIMASK_SHIFT; + vmcrp->pmr = ((vmcr & GICH_VMCR_PRIMASK_MASK) >> + GICH_VMCR_PRIMASK_SHIFT) << GICV_PMR_PRIORITY_SHIFT; } void vgic_v2_enable(struct kvm_vcpu *vcpu) diff --git a/virt/kvm/arm/vgic/vgic.h b/virt/kvm/arm/vgic/vgic.h index 91566f5aac9b..6cf557e9f718 100644 --- a/virt/kvm/arm/vgic/vgic.h +++ b/virt/kvm/arm/vgic/vgic.h @@ -81,11 +81,18 @@ static inline bool irq_is_pending(struct vgic_irq *irq) return irq->pending_latch || irq->line_level; } +/* + * This struct provides an intermediate representation of the fields contained + * in the GICH_VMCR and ICH_VMCR registers, such that code exporting the GIC + * state to userspace can generate either GICv2 or GICv3 CPU interface + * registers regardless of the hardware backed GIC used. + */ struct vgic_vmcr { u32 ctlr; u32 abpr; u32 bpr; - u32 pmr; + u32 pmr; /* Priority mask field in the GICC_PMR and + * ICC_PMR_EL1 priority field format */ /* Below member variable are valid only for GICv3 */ u32 grpen0; u32 grpen1; -- cgit v1.2.3 From 62e24c5775ecb387a3eb33701378ccfa6dbc98ee Mon Sep 17 00:00:00 2001 From: Philipp Zabel Date: Fri, 5 Feb 2016 13:41:39 +0100 Subject: reset: add exported __reset_control_get, return NULL if optional Rename the internal __reset_control_get/put functions to __reset_control_get/put_internal and add an exported __reset_control_get equivalent to __of_reset_control_get that takes a struct device parameter. This avoids the confusing call to __of_reset_control_get in the non-DT case and fixes the devm_reset_control_get_optional function to return NULL if RESET_CONTROLLER is enabled but dev->of_node == NULL. Fixes: bb475230b8e5 ("reset: make optional functions really optional") Reported-by: Andy Shevchenko Tested-by: Andy Shevchenko Cc: Ramiro Oliveira Signed-off-by: Philipp Zabel --- drivers/reset/core.c | 22 ++++++++++++++++------ include/linux/reset.h | 22 ++++++++++++++-------- 2 files changed, 30 insertions(+), 14 deletions(-) (limited to 'include/linux') diff --git a/drivers/reset/core.c b/drivers/reset/core.c index f1e5e65388bb..cd739d2fa160 100644 --- a/drivers/reset/core.c +++ b/drivers/reset/core.c @@ -275,7 +275,7 @@ int reset_control_status(struct reset_control *rstc) } EXPORT_SYMBOL_GPL(reset_control_status); -static struct reset_control *__reset_control_get( +static struct reset_control *__reset_control_get_internal( struct reset_controller_dev *rcdev, unsigned int index, bool shared) { @@ -308,7 +308,7 @@ static struct reset_control *__reset_control_get( return rstc; } -static void __reset_control_put(struct reset_control *rstc) +static void __reset_control_put_internal(struct reset_control *rstc) { lockdep_assert_held(&reset_list_mutex); @@ -377,7 +377,7 @@ struct reset_control *__of_reset_control_get(struct device_node *node, } /* reset_list_mutex also protects the rcdev's reset_control list */ - rstc = __reset_control_get(rcdev, rstc_id, shared); + rstc = __reset_control_get_internal(rcdev, rstc_id, shared); mutex_unlock(&reset_list_mutex); @@ -385,6 +385,17 @@ struct reset_control *__of_reset_control_get(struct device_node *node, } EXPORT_SYMBOL_GPL(__of_reset_control_get); +struct reset_control *__reset_control_get(struct device *dev, const char *id, + int index, bool shared, bool optional) +{ + if (dev->of_node) + return __of_reset_control_get(dev->of_node, id, index, shared, + optional); + + return optional ? NULL : ERR_PTR(-EINVAL); +} +EXPORT_SYMBOL_GPL(__reset_control_get); + /** * reset_control_put - free the reset controller * @rstc: reset controller @@ -396,7 +407,7 @@ void reset_control_put(struct reset_control *rstc) return; mutex_lock(&reset_list_mutex); - __reset_control_put(rstc); + __reset_control_put_internal(rstc); mutex_unlock(&reset_list_mutex); } EXPORT_SYMBOL_GPL(reset_control_put); @@ -417,8 +428,7 @@ struct reset_control *__devm_reset_control_get(struct device *dev, if (!ptr) return ERR_PTR(-ENOMEM); - rstc = __of_reset_control_get(dev ? dev->of_node : NULL, - id, index, shared, optional); + rstc = __reset_control_get(dev, id, index, shared, optional); if (!IS_ERR(rstc)) { *ptr = rstc; devres_add(dev, ptr); diff --git a/include/linux/reset.h b/include/linux/reset.h index 96fb139bdd08..13d8681210d5 100644 --- a/include/linux/reset.h +++ b/include/linux/reset.h @@ -15,6 +15,9 @@ int reset_control_status(struct reset_control *rstc); struct reset_control *__of_reset_control_get(struct device_node *node, const char *id, int index, bool shared, bool optional); +struct reset_control *__reset_control_get(struct device *dev, const char *id, + int index, bool shared, + bool optional); void reset_control_put(struct reset_control *rstc); struct reset_control *__devm_reset_control_get(struct device *dev, const char *id, int index, bool shared, @@ -72,6 +75,13 @@ static inline struct reset_control *__of_reset_control_get( return optional ? NULL : ERR_PTR(-ENOTSUPP); } +static inline struct reset_control *__reset_control_get( + struct device *dev, const char *id, + int index, bool shared, bool optional) +{ + return optional ? NULL : ERR_PTR(-ENOTSUPP); +} + static inline struct reset_control *__devm_reset_control_get( struct device *dev, const char *id, int index, bool shared, bool optional) @@ -102,8 +112,7 @@ __must_check reset_control_get_exclusive(struct device *dev, const char *id) #ifndef CONFIG_RESET_CONTROLLER WARN_ON(1); #endif - return __of_reset_control_get(dev ? dev->of_node : NULL, id, 0, false, - false); + return __reset_control_get(dev, id, 0, false, false); } /** @@ -131,22 +140,19 @@ __must_check reset_control_get_exclusive(struct device *dev, const char *id) static inline struct reset_control *reset_control_get_shared( struct device *dev, const char *id) { - return __of_reset_control_get(dev ? dev->of_node : NULL, id, 0, true, - false); + return __reset_control_get(dev, id, 0, true, false); } static inline struct reset_control *reset_control_get_optional_exclusive( struct device *dev, const char *id) { - return __of_reset_control_get(dev ? dev->of_node : NULL, id, 0, false, - true); + return __reset_control_get(dev, id, 0, false, true); } static inline struct reset_control *reset_control_get_optional_shared( struct device *dev, const char *id) { - return __of_reset_control_get(dev ? dev->of_node : NULL, id, 0, true, - true); + return __reset_control_get(dev, id, 0, true, true); } /** -- cgit v1.2.3 From b2376407f98920c9b0c411948675f58a9640be35 Mon Sep 17 00:00:00 2001 From: Vic Yang Date: Fri, 24 Mar 2017 18:44:01 +0100 Subject: mfd: cros-ec: Fix host command buffer size For SPI, we can get up to 32 additional bytes for response preamble. The current overhead (2 bytes) may cause problems when we try to receive a big response. Update it to 32 bytes. Without this fix we could see a kernel BUG when we receive a big response from the Chrome EC when is connected via SPI. Signed-off-by: Vic Yang Tested-by: Enric Balletbo i Serra Signed-off-by: Lee Jones --- include/linux/mfd/cros_ec.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/mfd/cros_ec.h b/include/linux/mfd/cros_ec.h index 7a01c94496f1..3eef9fb9968a 100644 --- a/include/linux/mfd/cros_ec.h +++ b/include/linux/mfd/cros_ec.h @@ -35,10 +35,11 @@ * Max bus-specific overhead incurred by request/responses. * I2C requires 1 additional byte for requests. * I2C requires 2 additional bytes for responses. + * SPI requires up to 32 additional bytes for responses. * */ #define EC_PROTO_VERSION_UNKNOWN 0 #define EC_MAX_REQUEST_OVERHEAD 1 -#define EC_MAX_RESPONSE_OVERHEAD 2 +#define EC_MAX_RESPONSE_OVERHEAD 32 /* * Command interface between EC and AP, for LPC, I2C and SPI interfaces. -- cgit v1.2.3 From 6118714275f0a313ecc296a87ed1af32d9691bed Mon Sep 17 00:00:00 2001 From: Tony Lindgren Date: Thu, 30 Mar 2017 09:16:39 -0700 Subject: pinctrl: core: Fix pinctrl_register_and_init() with pinctrl_enable() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Recent pinctrl changes to allow dynamic allocation of pins exposed one more issue with the pinctrl pins claimed early by the controller itself. This caused a regression for IMX6 pinctrl hogs. Before enabling the pin controller driver we need to wait until it has been properly initialized, then claim the hogs, and only then enable it. To fix the regression, split the code into pinctrl_claim_hogs() and pinctrl_enable(). And then let's require that pinctrl_enable() is always called by the pin controller driver when ready after calling pinctrl_register_and_init(). Depends-on: 950b0d91dc10 ("pinctrl: core: Fix regression caused by delayed work for hogs") Fixes: df61b366af26 ("pinctrl: core: Use delayed work for hogs") Fixes: e566fc11ea76 ("pinctrl: imx: use generic pinctrl helpers for managing groups") Cc: Haojian Zhuang Cc: Masahiro Yamada Cc: Mika Penttilä Cc: Mika Westerberg Cc: Nishanth Menon Cc: Shawn Guo Cc: Stefan Agner Tested-by: Geert Uytterhoeven Tested-by: Gary Bisson Tested-by: Fabio Estevam Signed-off-by: Tony Lindgren Signed-off-by: Linus Walleij --- Documentation/pinctrl.txt | 8 ++- drivers/pinctrl/core.c | 97 +++++++++++++++++++++------------ drivers/pinctrl/freescale/pinctrl-imx.c | 2 +- drivers/pinctrl/pinctrl-single.c | 2 +- drivers/pinctrl/sh-pfc/pinctrl.c | 11 +++- drivers/pinctrl/ti/pinctrl-ti-iodelay.c | 2 + include/linux/pinctrl/pinctrl.h | 3 +- 7 files changed, 83 insertions(+), 42 deletions(-) (limited to 'include/linux') diff --git a/Documentation/pinctrl.txt b/Documentation/pinctrl.txt index 54bd5faa8782..f2af35f6d6b2 100644 --- a/Documentation/pinctrl.txt +++ b/Documentation/pinctrl.txt @@ -77,9 +77,15 @@ static struct pinctrl_desc foo_desc = { int __init foo_probe(void) { + int error; + struct pinctrl_dev *pctl; - return pinctrl_register_and_init(&foo_desc, , NULL, &pctl); + error = pinctrl_register_and_init(&foo_desc, , NULL, &pctl); + if (error) + return error; + + return pinctrl_enable(pctl); } To enable the pinctrl subsystem and the subgroups for PINMUX and PINCONF and diff --git a/drivers/pinctrl/core.c b/drivers/pinctrl/core.c index d69046537b75..32822b0d9cd0 100644 --- a/drivers/pinctrl/core.c +++ b/drivers/pinctrl/core.c @@ -2010,29 +2010,57 @@ out_err: return ERR_PTR(ret); } -static int pinctrl_create_and_start(struct pinctrl_dev *pctldev) +static int pinctrl_claim_hogs(struct pinctrl_dev *pctldev) { pctldev->p = create_pinctrl(pctldev->dev, pctldev); - if (!IS_ERR(pctldev->p)) { - kref_get(&pctldev->p->users); - pctldev->hog_default = - pinctrl_lookup_state(pctldev->p, PINCTRL_STATE_DEFAULT); - if (IS_ERR(pctldev->hog_default)) { - dev_dbg(pctldev->dev, - "failed to lookup the default state\n"); - } else { - if (pinctrl_select_state(pctldev->p, - pctldev->hog_default)) - dev_err(pctldev->dev, - "failed to select default state\n"); - } + if (PTR_ERR(pctldev->p) == -ENODEV) { + dev_dbg(pctldev->dev, "no hogs found\n"); - pctldev->hog_sleep = - pinctrl_lookup_state(pctldev->p, - PINCTRL_STATE_SLEEP); - if (IS_ERR(pctldev->hog_sleep)) - dev_dbg(pctldev->dev, - "failed to lookup the sleep state\n"); + return 0; + } + + if (IS_ERR(pctldev->p)) { + dev_err(pctldev->dev, "error claiming hogs: %li\n", + PTR_ERR(pctldev->p)); + + return PTR_ERR(pctldev->p); + } + + kref_get(&pctldev->p->users); + pctldev->hog_default = + pinctrl_lookup_state(pctldev->p, PINCTRL_STATE_DEFAULT); + if (IS_ERR(pctldev->hog_default)) { + dev_dbg(pctldev->dev, + "failed to lookup the default state\n"); + } else { + if (pinctrl_select_state(pctldev->p, + pctldev->hog_default)) + dev_err(pctldev->dev, + "failed to select default state\n"); + } + + pctldev->hog_sleep = + pinctrl_lookup_state(pctldev->p, + PINCTRL_STATE_SLEEP); + if (IS_ERR(pctldev->hog_sleep)) + dev_dbg(pctldev->dev, + "failed to lookup the sleep state\n"); + + return 0; +} + +int pinctrl_enable(struct pinctrl_dev *pctldev) +{ + int error; + + error = pinctrl_claim_hogs(pctldev); + if (error) { + dev_err(pctldev->dev, "could not claim hogs: %i\n", + error); + mutex_destroy(&pctldev->mutex); + kfree(pctldev); + + return error; } mutex_lock(&pinctrldev_list_mutex); @@ -2043,6 +2071,7 @@ static int pinctrl_create_and_start(struct pinctrl_dev *pctldev) return 0; } +EXPORT_SYMBOL_GPL(pinctrl_enable); /** * pinctrl_register() - register a pin controller device @@ -2065,25 +2094,30 @@ struct pinctrl_dev *pinctrl_register(struct pinctrl_desc *pctldesc, if (IS_ERR(pctldev)) return pctldev; - error = pinctrl_create_and_start(pctldev); - if (error) { - mutex_destroy(&pctldev->mutex); - kfree(pctldev); - + error = pinctrl_enable(pctldev); + if (error) return ERR_PTR(error); - } return pctldev; } EXPORT_SYMBOL_GPL(pinctrl_register); +/** + * pinctrl_register_and_init() - register and init pin controller device + * @pctldesc: descriptor for this pin controller + * @dev: parent device for this pin controller + * @driver_data: private pin controller data for this pin controller + * @pctldev: pin controller device + * + * Note that pinctrl_enable() still needs to be manually called after + * this once the driver is ready. + */ int pinctrl_register_and_init(struct pinctrl_desc *pctldesc, struct device *dev, void *driver_data, struct pinctrl_dev **pctldev) { struct pinctrl_dev *p; - int error; p = pinctrl_init_controller(pctldesc, dev, driver_data); if (IS_ERR(p)) @@ -2097,15 +2131,6 @@ int pinctrl_register_and_init(struct pinctrl_desc *pctldesc, */ *pctldev = p; - error = pinctrl_create_and_start(p); - if (error) { - mutex_destroy(&p->mutex); - kfree(p); - *pctldev = NULL; - - return error; - } - return 0; } EXPORT_SYMBOL_GPL(pinctrl_register_and_init); diff --git a/drivers/pinctrl/freescale/pinctrl-imx.c b/drivers/pinctrl/freescale/pinctrl-imx.c index a7ace9e1ad81..74bd90dfd7b1 100644 --- a/drivers/pinctrl/freescale/pinctrl-imx.c +++ b/drivers/pinctrl/freescale/pinctrl-imx.c @@ -790,7 +790,7 @@ int imx_pinctrl_probe(struct platform_device *pdev, dev_info(&pdev->dev, "initialized IMX pinctrl driver\n"); - return 0; + return pinctrl_enable(ipctl->pctl); free: imx_free_resources(ipctl); diff --git a/drivers/pinctrl/pinctrl-single.c b/drivers/pinctrl/pinctrl-single.c index 8b2d45e85bae..9c267dcda094 100644 --- a/drivers/pinctrl/pinctrl-single.c +++ b/drivers/pinctrl/pinctrl-single.c @@ -1781,7 +1781,7 @@ static int pcs_probe(struct platform_device *pdev) dev_info(pcs->dev, "%i pins at pa %p size %u\n", pcs->desc.npins, pcs->base, pcs->size); - return 0; + return pinctrl_enable(pcs->pctl); free: pcs_free_resources(pcs); diff --git a/drivers/pinctrl/sh-pfc/pinctrl.c b/drivers/pinctrl/sh-pfc/pinctrl.c index 08150a321be6..a70157f0acf4 100644 --- a/drivers/pinctrl/sh-pfc/pinctrl.c +++ b/drivers/pinctrl/sh-pfc/pinctrl.c @@ -816,6 +816,13 @@ int sh_pfc_register_pinctrl(struct sh_pfc *pfc) pmx->pctl_desc.pins = pmx->pins; pmx->pctl_desc.npins = pfc->info->nr_pins; - return devm_pinctrl_register_and_init(pfc->dev, &pmx->pctl_desc, pmx, - &pmx->pctl); + ret = devm_pinctrl_register_and_init(pfc->dev, &pmx->pctl_desc, pmx, + &pmx->pctl); + if (ret) { + dev_err(pfc->dev, "could not register: %i\n", ret); + + return ret; + } + + return pinctrl_enable(pmx->pctl); } diff --git a/drivers/pinctrl/ti/pinctrl-ti-iodelay.c b/drivers/pinctrl/ti/pinctrl-ti-iodelay.c index 717e3404900c..362c50918c13 100644 --- a/drivers/pinctrl/ti/pinctrl-ti-iodelay.c +++ b/drivers/pinctrl/ti/pinctrl-ti-iodelay.c @@ -893,6 +893,8 @@ static int ti_iodelay_probe(struct platform_device *pdev) platform_set_drvdata(pdev, iod); + return pinctrl_enable(iod->pctl); + exit_out: of_node_put(np); return ret; diff --git a/include/linux/pinctrl/pinctrl.h b/include/linux/pinctrl/pinctrl.h index 8ce2d87a238b..5e45385c5bdc 100644 --- a/include/linux/pinctrl/pinctrl.h +++ b/include/linux/pinctrl/pinctrl.h @@ -145,8 +145,9 @@ struct pinctrl_desc { extern int pinctrl_register_and_init(struct pinctrl_desc *pctldesc, struct device *dev, void *driver_data, struct pinctrl_dev **pctldev); +extern int pinctrl_enable(struct pinctrl_dev *pctldev); -/* Please use pinctrl_register_and_init() instead */ +/* Please use pinctrl_register_and_init() and pinctrl_enable() instead */ extern struct pinctrl_dev *pinctrl_register(struct pinctrl_desc *pctldesc, struct device *dev, void *driver_data); -- cgit v1.2.3 From 404123c2db798027e852480ed9c4accef9f1d9e6 Mon Sep 17 00:00:00 2001 From: "Michael S. Tsirkin" Date: Wed, 29 Mar 2017 19:06:20 +0300 Subject: virtio: allow drivers to validate features Some drivers can't support all features in all configurations. At the moment we blindly set FEATURES_OK and later FAILED. Support this better by adding a callback drivers can use to do some early checks. Signed-off-by: Michael S. Tsirkin --- drivers/virtio/virtio.c | 6 ++++++ include/linux/virtio.h | 1 + 2 files changed, 7 insertions(+) (limited to 'include/linux') diff --git a/drivers/virtio/virtio.c b/drivers/virtio/virtio.c index 400d70b69379..48230a5e12f2 100644 --- a/drivers/virtio/virtio.c +++ b/drivers/virtio/virtio.c @@ -232,6 +232,12 @@ static int virtio_dev_probe(struct device *_d) if (device_features & (1ULL << i)) __virtio_set_bit(dev, i); + if (drv->validate) { + err = drv->validate(dev); + if (err) + goto err; + } + err = virtio_finalize_features(dev); if (err) goto err; diff --git a/include/linux/virtio.h b/include/linux/virtio.h index 04b0d3f95043..7edfbdb55a99 100644 --- a/include/linux/virtio.h +++ b/include/linux/virtio.h @@ -167,6 +167,7 @@ struct virtio_driver { unsigned int feature_table_size; const unsigned int *feature_table_legacy; unsigned int feature_table_size_legacy; + int (*validate)(struct virtio_device *dev); int (*probe)(struct virtio_device *dev); void (*scan)(struct virtio_device *dev); void (*remove)(struct virtio_device *dev); -- cgit v1.2.3 From c0c379e2931b05facef538e53bf3b21f283d9a0b Mon Sep 17 00:00:00 2001 From: "Kirill A. Shutemov" Date: Thu, 13 Apr 2017 14:56:23 -0700 Subject: mm: drop unused pmdp_huge_get_and_clear_notify() Dave noticed that after fixing MADV_DONTNEED vs numa balancing race the last pmdp_huge_get_and_clear_notify() user is gone. Let's drop the helper. Link: http://lkml.kernel.org/r/20170306112047.24809-1-kirill.shutemov@linux.intel.com Signed-off-by: Kirill A. Shutemov Cc: Dave Hansen Cc: Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/mmu_notifier.h | 13 ------------- 1 file changed, 13 deletions(-) (limited to 'include/linux') diff --git a/include/linux/mmu_notifier.h b/include/linux/mmu_notifier.h index 51891fb0d3ce..c91b3bcd158f 100644 --- a/include/linux/mmu_notifier.h +++ b/include/linux/mmu_notifier.h @@ -394,18 +394,6 @@ static inline void mmu_notifier_mm_destroy(struct mm_struct *mm) ___pud; \ }) -#define pmdp_huge_get_and_clear_notify(__mm, __haddr, __pmd) \ -({ \ - unsigned long ___haddr = __haddr & HPAGE_PMD_MASK; \ - pmd_t ___pmd; \ - \ - ___pmd = pmdp_huge_get_and_clear(__mm, __haddr, __pmd); \ - mmu_notifier_invalidate_range(__mm, ___haddr, \ - ___haddr + HPAGE_PMD_SIZE); \ - \ - ___pmd; \ -}) - /* * set_pte_at_notify() sets the pte _after_ running the notifier. * This is safe to start by updating the secondary MMUs, because the primary MMU @@ -489,7 +477,6 @@ static inline void mmu_notifier_mm_destroy(struct mm_struct *mm) #define ptep_clear_flush_notify ptep_clear_flush #define pmdp_huge_clear_flush_notify pmdp_huge_clear_flush #define pudp_huge_clear_flush_notify pudp_huge_clear_flush -#define pmdp_huge_get_and_clear_notify pmdp_huge_get_and_clear #define set_pte_at_notify set_pte_at #endif /* CONFIG_MMU_NOTIFIER */ -- cgit v1.2.3 From 5a8d75a1b8c99bdc926ba69b7b7dbe4fae81a5af Mon Sep 17 00:00:00 2001 From: Ming Lei Date: Fri, 14 Apr 2017 13:58:29 -0600 Subject: block: fix bio_will_gap() for first bvec with offset Commit 729204ef49ec("block: relax check on sg gap") allows us to merge bios, if both are physically contiguous. This change can merge a huge number of small bios, through mkfs for example, mkfs.ntfs running time can be decreased to ~1/10. But if one rq starts with a non-aligned buffer (the 1st bvec's bv_offset is non-zero) and if we allow the merge, it is quite difficult to respect sg gap limit, especially the max segment size, or we risk having an unaligned virtual boundary. This patch tries to avoid the issue by disallowing a merge, if the req starts with an unaligned buffer. Also add comments to explain why the merged segment can't end in unaligned virt boundary. Fixes: 729204ef49ec ("block: relax check on sg gap") Tested-by: Johannes Thumshirn Reviewed-by: Johannes Thumshirn Signed-off-by: Ming Lei Rewrote parts of the commit message and comments. Signed-off-by: Jens Axboe --- include/linux/blkdev.h | 32 ++++++++++++++++++++++++++++---- 1 file changed, 28 insertions(+), 4 deletions(-) (limited to 'include/linux') diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index 7548f332121a..01a696b0a4d3 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -1672,12 +1672,36 @@ static inline bool bios_segs_mergeable(struct request_queue *q, return true; } -static inline bool bio_will_gap(struct request_queue *q, struct bio *prev, - struct bio *next) +static inline bool bio_will_gap(struct request_queue *q, + struct request *prev_rq, + struct bio *prev, + struct bio *next) { if (bio_has_data(prev) && queue_virt_boundary(q)) { struct bio_vec pb, nb; + /* + * don't merge if the 1st bio starts with non-zero + * offset, otherwise it is quite difficult to respect + * sg gap limit. We work hard to merge a huge number of small + * single bios in case of mkfs. + */ + if (prev_rq) + bio_get_first_bvec(prev_rq->bio, &pb); + else + bio_get_first_bvec(prev, &pb); + if (pb.bv_offset) + return true; + + /* + * We don't need to worry about the situation that the + * merged segment ends in unaligned virt boundary: + * + * - if 'pb' ends aligned, the merged segment ends aligned + * - if 'pb' ends unaligned, the next bio must include + * one single bvec of 'nb', otherwise the 'nb' can't + * merge with 'pb' + */ bio_get_last_bvec(prev, &pb); bio_get_first_bvec(next, &nb); @@ -1690,12 +1714,12 @@ static inline bool bio_will_gap(struct request_queue *q, struct bio *prev, static inline bool req_gap_back_merge(struct request *req, struct bio *bio) { - return bio_will_gap(req->q, req->biotail, bio); + return bio_will_gap(req->q, req, req->biotail, bio); } static inline bool req_gap_front_merge(struct request *req, struct bio *bio) { - return bio_will_gap(req->q, bio, req->bio); + return bio_will_gap(req->q, NULL, bio, req->bio); } int kblockd_schedule_work(struct work_struct *work); -- cgit v1.2.3 From 5ef1ecf060f28ecef313b5723f1fd39bf5a35f56 Mon Sep 17 00:00:00 2001 From: Heiner Kallweit Date: Wed, 29 Mar 2017 20:54:37 +0200 Subject: mmc: sdio: fix alignment issue in struct sdio_func Certain 64-bit systems (e.g. Amlogic Meson GX) require buffers to be used for DMA to be 8-byte-aligned. struct sdio_func has an embedded small DMA buffer not meeting this requirement. When testing switching to descriptor chain mode in meson-gx driver SDIO is broken therefore. Fix this by allocating the small DMA buffer separately as kmalloc ensures that the returned memory area is properly aligned for every basic data type. Signed-off-by: Heiner Kallweit Tested-by: Helmut Klein Signed-off-by: Ulf Hansson --- drivers/mmc/core/sdio_bus.c | 12 +++++++++++- include/linux/mmc/sdio_func.h | 2 +- 2 files changed, 12 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/drivers/mmc/core/sdio_bus.c b/drivers/mmc/core/sdio_bus.c index e992a7f8a16f..2b32b88949ba 100644 --- a/drivers/mmc/core/sdio_bus.c +++ b/drivers/mmc/core/sdio_bus.c @@ -267,7 +267,7 @@ static void sdio_release_func(struct device *dev) sdio_free_func_cis(func); kfree(func->info); - + kfree(func->tmpbuf); kfree(func); } @@ -282,6 +282,16 @@ struct sdio_func *sdio_alloc_func(struct mmc_card *card) if (!func) return ERR_PTR(-ENOMEM); + /* + * allocate buffer separately to make sure it's properly aligned for + * DMA usage (incl. 64 bit DMA) + */ + func->tmpbuf = kmalloc(4, GFP_KERNEL); + if (!func->tmpbuf) { + kfree(func); + return ERR_PTR(-ENOMEM); + } + func->card = card; device_initialize(&func->dev); diff --git a/include/linux/mmc/sdio_func.h b/include/linux/mmc/sdio_func.h index aab032a6ae61..97ca105347a6 100644 --- a/include/linux/mmc/sdio_func.h +++ b/include/linux/mmc/sdio_func.h @@ -53,7 +53,7 @@ struct sdio_func { unsigned int state; /* function state */ #define SDIO_STATE_PRESENT (1<<0) /* present in sysfs */ - u8 tmpbuf[4]; /* DMA:able scratch buffer */ + u8 *tmpbuf; /* DMA:able scratch buffer */ unsigned num_info; /* number of info strings */ const char **info; /* info strings */ -- cgit v1.2.3