diff options
98 files changed, 1256 insertions, 450 deletions
diff --git a/Documentation/watchdog/watchdog-parameters.txt b/Documentation/watchdog/watchdog-parameters.txt index 9f9ec9f76039..4e4b6f10d841 100644 --- a/Documentation/watchdog/watchdog-parameters.txt +++ b/Documentation/watchdog/watchdog-parameters.txt @@ -400,3 +400,7 @@ wm8350_wdt: nowayout: Watchdog cannot be stopped once started (default=kernel config parameter) ------------------------------------------------- +sun4v_wdt: +timeout_ms: Watchdog timeout in milliseconds 1..180000, default=60000) +nowayout: Watchdog cannot be stopped once started +------------------------------------------------- diff --git a/MAINTAINERS b/MAINTAINERS index da3e4d8016d0..f5e6a535bc34 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -4518,6 +4518,12 @@ L: linuxppc-dev@lists.ozlabs.org S: Maintained F: drivers/dma/fsldma.* +FREESCALE GPMI NAND DRIVER +M: Han Xu <han.xu@nxp.com> +L: linux-mtd@lists.infradead.org +S: Maintained +F: drivers/mtd/nand/gpmi-nand/* + FREESCALE I2C CPM DRIVER M: Jochen Friedrich <jochen@scram.de> L: linuxppc-dev@lists.ozlabs.org @@ -4534,7 +4540,7 @@ F: include/linux/platform_data/video-imxfb.h F: drivers/video/fbdev/imxfb.c FREESCALE QUAD SPI DRIVER -M: Han Xu <han.xu@freescale.com> +M: Han Xu <han.xu@nxp.com> L: linux-mtd@lists.infradead.org S: Maintained F: drivers/mtd/spi-nor/fsl-quadspi.c diff --git a/arch/arm/kvm/guest.c b/arch/arm/kvm/guest.c index 5fa69d7bae58..99361f11354a 100644 --- a/arch/arm/kvm/guest.c +++ b/arch/arm/kvm/guest.c @@ -161,7 +161,7 @@ static int get_timer_reg(struct kvm_vcpu *vcpu, const struct kvm_one_reg *reg) u64 val; val = kvm_arm_timer_get_reg(vcpu, reg->id); - return copy_to_user(uaddr, &val, KVM_REG_SIZE(reg->id)); + return copy_to_user(uaddr, &val, KVM_REG_SIZE(reg->id)) ? -EFAULT : 0; } static unsigned long num_core_regs(void) diff --git a/arch/arm64/include/asm/pgtable.h b/arch/arm64/include/asm/pgtable.h index bf464de33f52..f50608674580 100644 --- a/arch/arm64/include/asm/pgtable.h +++ b/arch/arm64/include/asm/pgtable.h @@ -34,13 +34,13 @@ /* * VMALLOC and SPARSEMEM_VMEMMAP ranges. * - * VMEMAP_SIZE: allows the whole VA space to be covered by a struct page array + * VMEMAP_SIZE: allows the whole linear region to be covered by a struct page array * (rounded up to PUD_SIZE). * VMALLOC_START: beginning of the kernel VA space * VMALLOC_END: extends to the available space below vmmemmap, PCI I/O space, * fixed mappings and modules */ -#define VMEMMAP_SIZE ALIGN((1UL << (VA_BITS - PAGE_SHIFT)) * sizeof(struct page), PUD_SIZE) +#define VMEMMAP_SIZE ALIGN((1UL << (VA_BITS - PAGE_SHIFT - 1)) * sizeof(struct page), PUD_SIZE) #ifndef CONFIG_KASAN #define VMALLOC_START (VA_START) @@ -51,7 +51,8 @@ #define VMALLOC_END (PAGE_OFFSET - PUD_SIZE - VMEMMAP_SIZE - SZ_64K) -#define vmemmap ((struct page *)(VMALLOC_END + SZ_64K)) +#define VMEMMAP_START (VMALLOC_END + SZ_64K) +#define vmemmap ((struct page *)VMEMMAP_START - (memstart_addr >> PAGE_SHIFT)) #define FIRST_USER_ADDRESS 0UL diff --git a/arch/arm64/kvm/guest.c b/arch/arm64/kvm/guest.c index fcb778899a38..9e54ad7c240a 100644 --- a/arch/arm64/kvm/guest.c +++ b/arch/arm64/kvm/guest.c @@ -194,7 +194,7 @@ static int get_timer_reg(struct kvm_vcpu *vcpu, const struct kvm_one_reg *reg) u64 val; val = kvm_arm_timer_get_reg(vcpu, reg->id); - return copy_to_user(uaddr, &val, KVM_REG_SIZE(reg->id)); + return copy_to_user(uaddr, &val, KVM_REG_SIZE(reg->id)) ? -EFAULT : 0; } /** diff --git a/arch/arm64/mm/init.c b/arch/arm64/mm/init.c index f3b061e67bfe..7802f216a67a 100644 --- a/arch/arm64/mm/init.c +++ b/arch/arm64/mm/init.c @@ -319,8 +319,8 @@ void __init mem_init(void) #endif MLG(VMALLOC_START, VMALLOC_END), #ifdef CONFIG_SPARSEMEM_VMEMMAP - MLG((unsigned long)vmemmap, - (unsigned long)vmemmap + VMEMMAP_SIZE), + MLG(VMEMMAP_START, + VMEMMAP_START + VMEMMAP_SIZE), MLM((unsigned long)virt_to_page(PAGE_OFFSET), (unsigned long)virt_to_page(high_memory)), #endif diff --git a/arch/mips/kvm/mips.c b/arch/mips/kvm/mips.c index 8bc3977576e6..3110447ab1e9 100644 --- a/arch/mips/kvm/mips.c +++ b/arch/mips/kvm/mips.c @@ -702,7 +702,7 @@ static int kvm_mips_get_reg(struct kvm_vcpu *vcpu, } else if ((reg->id & KVM_REG_SIZE_MASK) == KVM_REG_SIZE_U128) { void __user *uaddr = (void __user *)(long)reg->addr; - return copy_to_user(uaddr, vs, 16); + return copy_to_user(uaddr, vs, 16) ? -EFAULT : 0; } else { return -EINVAL; } @@ -732,7 +732,7 @@ static int kvm_mips_set_reg(struct kvm_vcpu *vcpu, } else if ((reg->id & KVM_REG_SIZE_MASK) == KVM_REG_SIZE_U128) { void __user *uaddr = (void __user *)(long)reg->addr; - return copy_from_user(vs, uaddr, 16); + return copy_from_user(vs, uaddr, 16) ? -EFAULT : 0; } else { return -EINVAL; } diff --git a/arch/parisc/include/asm/floppy.h b/arch/parisc/include/asm/floppy.h index f84ff12574b7..6d8276cd25ca 100644 --- a/arch/parisc/include/asm/floppy.h +++ b/arch/parisc/include/asm/floppy.h @@ -33,7 +33,7 @@ * floppy accesses go through the track buffer. */ #define _CROSS_64KB(a,s,vdma) \ -(!vdma && ((unsigned long)(a)/K_64 != ((unsigned long)(a) + (s) - 1) / K_64)) +(!(vdma) && ((unsigned long)(a)/K_64 != ((unsigned long)(a) + (s) - 1) / K_64)) #define CROSS_64KB(a,s) _CROSS_64KB(a,s,use_virtual_dma & 1) diff --git a/arch/parisc/include/uapi/asm/unistd.h b/arch/parisc/include/uapi/asm/unistd.h index 35bdccbb2036..b75039f92116 100644 --- a/arch/parisc/include/uapi/asm/unistd.h +++ b/arch/parisc/include/uapi/asm/unistd.h @@ -361,8 +361,9 @@ #define __NR_membarrier (__NR_Linux + 343) #define __NR_userfaultfd (__NR_Linux + 344) #define __NR_mlock2 (__NR_Linux + 345) +#define __NR_copy_file_range (__NR_Linux + 346) -#define __NR_Linux_syscalls (__NR_mlock2 + 1) +#define __NR_Linux_syscalls (__NR_copy_file_range + 1) #define __IGNORE_select /* newselect */ diff --git a/arch/parisc/kernel/ptrace.c b/arch/parisc/kernel/ptrace.c index 9585c81f755f..ce0b2b4075c7 100644 --- a/arch/parisc/kernel/ptrace.c +++ b/arch/parisc/kernel/ptrace.c @@ -269,14 +269,19 @@ long compat_arch_ptrace(struct task_struct *child, compat_long_t request, long do_syscall_trace_enter(struct pt_regs *regs) { - long ret = 0; - /* Do the secure computing check first. */ secure_computing_strict(regs->gr[20]); if (test_thread_flag(TIF_SYSCALL_TRACE) && - tracehook_report_syscall_entry(regs)) - ret = -1L; + tracehook_report_syscall_entry(regs)) { + /* + * Tracing decided this syscall should not happen or the + * debugger stored an invalid system call number. Skip + * the system call and the system call restart handling. + */ + regs->gr[20] = -1UL; + goto out; + } #ifdef CONFIG_64BIT if (!is_compat_task()) @@ -290,7 +295,8 @@ long do_syscall_trace_enter(struct pt_regs *regs) regs->gr[24] & 0xffffffff, regs->gr[23] & 0xffffffff); - return ret ? : regs->gr[20]; +out: + return regs->gr[20]; } void do_syscall_trace_exit(struct pt_regs *regs) diff --git a/arch/parisc/kernel/syscall.S b/arch/parisc/kernel/syscall.S index 3fbd7252a4b2..fbafa0d0e2bf 100644 --- a/arch/parisc/kernel/syscall.S +++ b/arch/parisc/kernel/syscall.S @@ -343,7 +343,7 @@ tracesys_next: #endif comiclr,>>= __NR_Linux_syscalls, %r20, %r0 - b,n .Lsyscall_nosys + b,n .Ltracesys_nosys LDREGX %r20(%r19), %r19 @@ -359,6 +359,9 @@ tracesys_next: be 0(%sr7,%r19) ldo R%tracesys_exit(%r2),%r2 +.Ltracesys_nosys: + ldo -ENOSYS(%r0),%r28 /* set errno */ + /* Do *not* call this function on the gateway page, because it makes a direct call to syscall_trace. */ diff --git a/arch/parisc/kernel/syscall_table.S b/arch/parisc/kernel/syscall_table.S index d4ffcfbc9885..585d50fc75c0 100644 --- a/arch/parisc/kernel/syscall_table.S +++ b/arch/parisc/kernel/syscall_table.S @@ -441,6 +441,7 @@ ENTRY_SAME(membarrier) ENTRY_SAME(userfaultfd) ENTRY_SAME(mlock2) /* 345 */ + ENTRY_SAME(copy_file_range) .ifne (. - 90b) - (__NR_Linux_syscalls * (91b - 90b)) diff --git a/arch/sparc/Makefile b/arch/sparc/Makefile index eaee14637d93..8496a074bd0e 100644 --- a/arch/sparc/Makefile +++ b/arch/sparc/Makefile @@ -24,7 +24,13 @@ LDFLAGS := -m elf32_sparc export BITS := 32 UTS_MACHINE := sparc +# We are adding -Wa,-Av8 to KBUILD_CFLAGS to deal with a specs bug in some +# versions of gcc. Some gcc versions won't pass -Av8 to binutils when you +# give -mcpu=v8. This silently worked with older bintutils versions but +# does not any more. KBUILD_CFLAGS += -m32 -mcpu=v8 -pipe -mno-fpu -fcall-used-g5 -fcall-used-g7 +KBUILD_CFLAGS += -Wa,-Av8 + KBUILD_AFLAGS += -m32 -Wa,-Av8 else diff --git a/arch/sparc/include/uapi/asm/unistd.h b/arch/sparc/include/uapi/asm/unistd.h index 1c26d440d288..b6de8b10a55b 100644 --- a/arch/sparc/include/uapi/asm/unistd.h +++ b/arch/sparc/include/uapi/asm/unistd.h @@ -422,8 +422,9 @@ #define __NR_listen 354 #define __NR_setsockopt 355 #define __NR_mlock2 356 +#define __NR_copy_file_range 357 -#define NR_syscalls 357 +#define NR_syscalls 358 /* Bitmask values returned from kern_features system call. */ #define KERN_FEATURE_MIXED_MODE_STACK 0x00000001 diff --git a/arch/sparc/kernel/entry.S b/arch/sparc/kernel/entry.S index 33c02b15f478..a83707c83be8 100644 --- a/arch/sparc/kernel/entry.S +++ b/arch/sparc/kernel/entry.S @@ -948,7 +948,24 @@ linux_syscall_trace: cmp %o0, 0 bne 3f mov -ENOSYS, %o0 + + /* Syscall tracing can modify the registers. */ + ld [%sp + STACKFRAME_SZ + PT_G1], %g1 + sethi %hi(sys_call_table), %l7 + ld [%sp + STACKFRAME_SZ + PT_I0], %i0 + or %l7, %lo(sys_call_table), %l7 + ld [%sp + STACKFRAME_SZ + PT_I1], %i1 + ld [%sp + STACKFRAME_SZ + PT_I2], %i2 + ld [%sp + STACKFRAME_SZ + PT_I3], %i3 + ld [%sp + STACKFRAME_SZ + PT_I4], %i4 + ld [%sp + STACKFRAME_SZ + PT_I5], %i5 + cmp %g1, NR_syscalls + bgeu 3f + mov -ENOSYS, %o0 + + sll %g1, 2, %l4 mov %i0, %o0 + ld [%l7 + %l4], %l7 mov %i1, %o1 mov %i2, %o2 mov %i3, %o3 diff --git a/arch/sparc/kernel/hvcalls.S b/arch/sparc/kernel/hvcalls.S index afbaba52d2f1..d127130bf424 100644 --- a/arch/sparc/kernel/hvcalls.S +++ b/arch/sparc/kernel/hvcalls.S @@ -338,8 +338,9 @@ ENTRY(sun4v_mach_set_watchdog) mov %o1, %o4 mov HV_FAST_MACH_SET_WATCHDOG, %o5 ta HV_FAST_TRAP + brnz,a,pn %o4, 0f stx %o1, [%o4] - retl +0: retl nop ENDPROC(sun4v_mach_set_watchdog) diff --git a/arch/sparc/kernel/signal_64.c b/arch/sparc/kernel/signal_64.c index d88beff47bab..39aaec173f66 100644 --- a/arch/sparc/kernel/signal_64.c +++ b/arch/sparc/kernel/signal_64.c @@ -52,7 +52,7 @@ asmlinkage void sparc64_set_context(struct pt_regs *regs) unsigned char fenab; int err; - flush_user_windows(); + synchronize_user_stack(); if (get_thread_wsaved() || (((unsigned long)ucp) & (sizeof(unsigned long)-1)) || (!__access_ok(ucp, sizeof(*ucp)))) diff --git a/arch/sparc/kernel/sparc_ksyms_64.c b/arch/sparc/kernel/sparc_ksyms_64.c index a92d5d2c46a3..9e034f29dcc5 100644 --- a/arch/sparc/kernel/sparc_ksyms_64.c +++ b/arch/sparc/kernel/sparc_ksyms_64.c @@ -37,6 +37,7 @@ EXPORT_SYMBOL(sun4v_niagara_getperf); EXPORT_SYMBOL(sun4v_niagara_setperf); EXPORT_SYMBOL(sun4v_niagara2_getperf); EXPORT_SYMBOL(sun4v_niagara2_setperf); +EXPORT_SYMBOL(sun4v_mach_set_watchdog); /* from hweight.S */ EXPORT_SYMBOL(__arch_hweight8); diff --git a/arch/sparc/kernel/syscalls.S b/arch/sparc/kernel/syscalls.S index bb0008927598..c4a1b5c40e4e 100644 --- a/arch/sparc/kernel/syscalls.S +++ b/arch/sparc/kernel/syscalls.S @@ -158,7 +158,25 @@ linux_syscall_trace32: add %sp, PTREGS_OFF, %o0 brnz,pn %o0, 3f mov -ENOSYS, %o0 + + /* Syscall tracing can modify the registers. */ + ldx [%sp + PTREGS_OFF + PT_V9_G1], %g1 + sethi %hi(sys_call_table32), %l7 + ldx [%sp + PTREGS_OFF + PT_V9_I0], %i0 + or %l7, %lo(sys_call_table32), %l7 + ldx [%sp + PTREGS_OFF + PT_V9_I1], %i1 + ldx [%sp + PTREGS_OFF + PT_V9_I2], %i2 + ldx [%sp + PTREGS_OFF + PT_V9_I3], %i3 + ldx [%sp + PTREGS_OFF + PT_V9_I4], %i4 + ldx [%sp + PTREGS_OFF + PT_V9_I5], %i5 + + cmp %g1, NR_syscalls + bgeu,pn %xcc, 3f + mov -ENOSYS, %o0 + + sll %g1, 2, %l4 srl %i0, 0, %o0 + lduw [%l7 + %l4], %l7 srl %i4, 0, %o4 srl %i1, 0, %o1 srl %i2, 0, %o2 @@ -170,7 +188,25 @@ linux_syscall_trace: add %sp, PTREGS_OFF, %o0 brnz,pn %o0, 3f mov -ENOSYS, %o0 + + /* Syscall tracing can modify the registers. */ + ldx [%sp + PTREGS_OFF + PT_V9_G1], %g1 + sethi %hi(sys_call_table64), %l7 + ldx [%sp + PTREGS_OFF + PT_V9_I0], %i0 + or %l7, %lo(sys_call_table64), %l7 + ldx [%sp + PTREGS_OFF + PT_V9_I1], %i1 + ldx [%sp + PTREGS_OFF + PT_V9_I2], %i2 + ldx [%sp + PTREGS_OFF + PT_V9_I3], %i3 + ldx [%sp + PTREGS_OFF + PT_V9_I4], %i4 + ldx [%sp + PTREGS_OFF + PT_V9_I5], %i5 + + cmp %g1, NR_syscalls + bgeu,pn %xcc, 3f + mov -ENOSYS, %o0 + + sll %g1, 2, %l4 mov %i0, %o0 + lduw [%l7 + %l4], %l7 mov %i1, %o1 mov %i2, %o2 mov %i3, %o3 diff --git a/arch/sparc/kernel/systbls_32.S b/arch/sparc/kernel/systbls_32.S index e663b6c78de2..6c3dd6c52f8b 100644 --- a/arch/sparc/kernel/systbls_32.S +++ b/arch/sparc/kernel/systbls_32.S @@ -88,4 +88,4 @@ sys_call_table: /*340*/ .long sys_ni_syscall, sys_kcmp, sys_finit_module, sys_sched_setattr, sys_sched_getattr /*345*/ .long sys_renameat2, sys_seccomp, sys_getrandom, sys_memfd_create, sys_bpf /*350*/ .long sys_execveat, sys_membarrier, sys_userfaultfd, sys_bind, sys_listen -/*355*/ .long sys_setsockopt, sys_mlock2 +/*355*/ .long sys_setsockopt, sys_mlock2, sys_copy_file_range diff --git a/arch/sparc/kernel/systbls_64.S b/arch/sparc/kernel/systbls_64.S index 1557121f4cdc..12b524cfcfa0 100644 --- a/arch/sparc/kernel/systbls_64.S +++ b/arch/sparc/kernel/systbls_64.S @@ -89,7 +89,7 @@ sys_call_table32: /*340*/ .word sys_kern_features, sys_kcmp, sys_finit_module, sys_sched_setattr, sys_sched_getattr .word sys32_renameat2, sys_seccomp, sys_getrandom, sys_memfd_create, sys_bpf /*350*/ .word sys32_execveat, sys_membarrier, sys_userfaultfd, sys_bind, sys_listen - .word compat_sys_setsockopt, sys_mlock2 + .word compat_sys_setsockopt, sys_mlock2, sys_copy_file_range #endif /* CONFIG_COMPAT */ @@ -170,4 +170,4 @@ sys_call_table: /*340*/ .word sys_kern_features, sys_kcmp, sys_finit_module, sys_sched_setattr, sys_sched_getattr .word sys_renameat2, sys_seccomp, sys_getrandom, sys_memfd_create, sys_bpf /*350*/ .word sys64_execveat, sys_membarrier, sys_userfaultfd, sys_bind, sys_listen - .word sys_setsockopt, sys_mlock2 + .word sys_setsockopt, sys_mlock2, sys_copy_file_range diff --git a/arch/x86/kernel/acpi/sleep.c b/arch/x86/kernel/acpi/sleep.c index d1daead5fcdd..adb3eaf8fe2a 100644 --- a/arch/x86/kernel/acpi/sleep.c +++ b/arch/x86/kernel/acpi/sleep.c @@ -16,6 +16,7 @@ #include <asm/cacheflush.h> #include <asm/realmode.h> +#include <linux/ftrace.h> #include "../../realmode/rm/wakeup.h" #include "sleep.h" @@ -107,7 +108,13 @@ int x86_acpi_suspend_lowlevel(void) saved_magic = 0x123456789abcdef0L; #endif /* CONFIG_64BIT */ + /* + * Pause/unpause graph tracing around do_suspend_lowlevel as it has + * inconsistent call/return info after it jumps to the wakeup vector. + */ + pause_graph_tracing(); do_suspend_lowlevel(); + unpause_graph_tracing(); return 0; } diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c index e2951b6edbbc..0ff453749a90 100644 --- a/arch/x86/kvm/vmx.c +++ b/arch/x86/kvm/vmx.c @@ -596,6 +596,8 @@ struct vcpu_vmx { /* Support for PML */ #define PML_ENTITY_NUM 512 struct page *pml_pg; + + u64 current_tsc_ratio; }; enum segment_cache_field { @@ -2127,14 +2129,16 @@ static void vmx_vcpu_load(struct kvm_vcpu *vcpu, int cpu) rdmsrl(MSR_IA32_SYSENTER_ESP, sysenter_esp); vmcs_writel(HOST_IA32_SYSENTER_ESP, sysenter_esp); /* 22.2.3 */ - /* Setup TSC multiplier */ - if (cpu_has_vmx_tsc_scaling()) - vmcs_write64(TSC_MULTIPLIER, - vcpu->arch.tsc_scaling_ratio); - vmx->loaded_vmcs->cpu = cpu; } + /* Setup TSC multiplier */ + if (kvm_has_tsc_control && + vmx->current_tsc_ratio != vcpu->arch.tsc_scaling_ratio) { + vmx->current_tsc_ratio = vcpu->arch.tsc_scaling_ratio; + vmcs_write64(TSC_MULTIPLIER, vmx->current_tsc_ratio); + } + vmx_vcpu_pi_load(vcpu, cpu); } diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index f4891f2ece23..eaf6ee8c28b8 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -2752,7 +2752,6 @@ void kvm_arch_vcpu_load(struct kvm_vcpu *vcpu, int cpu) } kvm_make_request(KVM_REQ_STEAL_UPDATE, vcpu); - vcpu->arch.switch_db_regs |= KVM_DEBUGREG_RELOAD; } void kvm_arch_vcpu_put(struct kvm_vcpu *vcpu) @@ -6619,12 +6618,12 @@ static int vcpu_enter_guest(struct kvm_vcpu *vcpu) * KVM_DEBUGREG_WONT_EXIT again. */ if (unlikely(vcpu->arch.switch_db_regs & KVM_DEBUGREG_WONT_EXIT)) { - int i; - WARN_ON(vcpu->guest_debug & KVM_GUESTDBG_USE_HW_BP); kvm_x86_ops->sync_dirty_debug_regs(vcpu); - for (i = 0; i < KVM_NR_DB_REGS; i++) - vcpu->arch.eff_db[i] = vcpu->arch.db[i]; + kvm_update_dr0123(vcpu); + kvm_update_dr6(vcpu); + kvm_update_dr7(vcpu); + vcpu->arch.switch_db_regs &= ~KVM_DEBUGREG_RELOAD; } /* diff --git a/block/blk-map.c b/block/blk-map.c index f565e11f465a..a54f0543b956 100644 --- a/block/blk-map.c +++ b/block/blk-map.c @@ -57,6 +57,49 @@ static int __blk_rq_unmap_user(struct bio *bio) return ret; } +static int __blk_rq_map_user_iov(struct request *rq, + struct rq_map_data *map_data, struct iov_iter *iter, + gfp_t gfp_mask, bool copy) +{ + struct request_queue *q = rq->q; + struct bio *bio, *orig_bio; + int ret; + + if (copy) + bio = bio_copy_user_iov(q, map_data, iter, gfp_mask); + else + bio = bio_map_user_iov(q, iter, gfp_mask); + + if (IS_ERR(bio)) + return PTR_ERR(bio); + + if (map_data && map_data->null_mapped) + bio_set_flag(bio, BIO_NULL_MAPPED); + + iov_iter_advance(iter, bio->bi_iter.bi_size); + if (map_data) + map_data->offset += bio->bi_iter.bi_size; + + orig_bio = bio; + blk_queue_bounce(q, &bio); + + /* + * We link the bounce buffer in and could have to traverse it + * later so we have to get a ref to prevent it from being freed + */ + bio_get(bio); + + ret = blk_rq_append_bio(q, rq, bio); + if (ret) { + bio_endio(bio); + __blk_rq_unmap_user(orig_bio); + bio_put(bio); + return ret; + } + + return 0; +} + /** * blk_rq_map_user_iov - map user data to a request, for REQ_TYPE_BLOCK_PC usage * @q: request queue where request should be inserted @@ -82,10 +125,11 @@ int blk_rq_map_user_iov(struct request_queue *q, struct request *rq, struct rq_map_data *map_data, const struct iov_iter *iter, gfp_t gfp_mask) { - struct bio *bio; - int unaligned = 0; - struct iov_iter i; struct iovec iov, prv = {.iov_base = NULL, .iov_len = 0}; + bool copy = (q->dma_pad_mask & iter->count) || map_data; + struct bio *bio = NULL; + struct iov_iter i; + int ret; if (!iter || !iter->count) return -EINVAL; @@ -101,42 +145,29 @@ int blk_rq_map_user_iov(struct request_queue *q, struct request *rq, */ if ((uaddr & queue_dma_alignment(q)) || iovec_gap_to_prv(q, &prv, &iov)) - unaligned = 1; + copy = true; prv.iov_base = iov.iov_base; prv.iov_len = iov.iov_len; } - if (unaligned || (q->dma_pad_mask & iter->count) || map_data) - bio = bio_copy_user_iov(q, map_data, iter, gfp_mask); - else - bio = bio_map_user_iov(q, iter, gfp_mask); - - if (IS_ERR(bio)) - return PTR_ERR(bio); - - if (map_data && map_data->null_mapped) - bio_set_flag(bio, BIO_NULL_MAPPED); - - if (bio->bi_iter.bi_size != iter->count) { - /* - * Grab an extra reference to this bio, as bio_unmap_user() - * expects to be able to drop it twice as it happens on the - * normal IO completion path - */ - bio_get(bio); - bio_endio(bio); - __blk_rq_unmap_user(bio); - return -EINVAL; - } + i = *iter; + do { + ret =__blk_rq_map_user_iov(rq, map_data, &i, gfp_mask, copy); + if (ret) + goto unmap_rq; + if (!bio) + bio = rq->bio; + } while (iov_iter_count(&i)); if (!bio_flagged(bio, BIO_USER_MAPPED)) rq->cmd_flags |= REQ_COPY_USER; - - blk_queue_bounce(q, &bio); - bio_get(bio); - blk_rq_bio_prep(q, rq, bio); return 0; + +unmap_rq: + __blk_rq_unmap_user(bio); + rq->bio = NULL; + return -EINVAL; } EXPORT_SYMBOL(blk_rq_map_user_iov); diff --git a/block/blk-merge.c b/block/blk-merge.c index 888a7fec81f7..261353166dcf 100644 --- a/block/blk-merge.c +++ b/block/blk-merge.c @@ -304,7 +304,6 @@ static int blk_phys_contig_segment(struct request_queue *q, struct bio *bio, struct bio *nxt) { struct bio_vec end_bv = { NULL }, nxt_bv; - struct bvec_iter iter; if (!blk_queue_cluster(q)) return 0; @@ -316,11 +315,8 @@ static int blk_phys_contig_segment(struct request_queue *q, struct bio *bio, if (!bio_has_data(bio)) return 1; - bio_for_each_segment(end_bv, bio, iter) - if (end_bv.bv_len == iter.bi_size) - break; - - nxt_bv = bio_iovec(nxt); + bio_get_last_bvec(bio, &end_bv); + bio_get_first_bvec(nxt, &nxt_bv); if (!BIOVEC_PHYS_MERGEABLE(&end_bv, &nxt_bv)) return 0; diff --git a/drivers/ata/ahci.c b/drivers/ata/ahci.c index 546a3692774f..146dc0b8ec61 100644 --- a/drivers/ata/ahci.c +++ b/drivers/ata/ahci.c @@ -367,15 +367,21 @@ static const struct pci_device_id ahci_pci_tbl[] = { { PCI_VDEVICE(INTEL, 0xa107), board_ahci }, /* Sunrise Point-H RAID */ { PCI_VDEVICE(INTEL, 0xa10f), board_ahci }, /* Sunrise Point-H RAID */ { PCI_VDEVICE(INTEL, 0x2822), board_ahci }, /* Lewisburg RAID*/ + { PCI_VDEVICE(INTEL, 0x2823), board_ahci }, /* Lewisburg AHCI*/ { PCI_VDEVICE(INTEL, 0x2826), board_ahci }, /* Lewisburg RAID*/ + { PCI_VDEVICE(INTEL, 0x2827), board_ahci }, /* Lewisburg RAID*/ { PCI_VDEVICE(INTEL, 0xa182), board_ahci }, /* Lewisburg AHCI*/ { PCI_VDEVICE(INTEL, 0xa184), board_ahci }, /* Lewisburg RAID*/ { PCI_VDEVICE(INTEL, 0xa186), board_ahci }, /* Lewisburg RAID*/ { PCI_VDEVICE(INTEL, 0xa18e), board_ahci }, /* Lewisburg RAID*/ + { PCI_VDEVICE(INTEL, 0xa1d2), board_ahci }, /* Lewisburg RAID*/ + { PCI_VDEVICE(INTEL, 0xa1d6), board_ahci }, /* Lewisburg RAID*/ { PCI_VDEVICE(INTEL, 0xa202), board_ahci }, /* Lewisburg AHCI*/ { PCI_VDEVICE(INTEL, 0xa204), board_ahci }, /* Lewisburg RAID*/ { PCI_VDEVICE(INTEL, 0xa206), board_ahci }, /* Lewisburg RAID*/ { PCI_VDEVICE(INTEL, 0xa20e), board_ahci }, /* Lewisburg RAID*/ + { PCI_VDEVICE(INTEL, 0xa252), board_ahci }, /* Lewisburg RAID*/ + { PCI_VDEVICE(INTEL, 0xa256), board_ahci }, /* Lewisburg RAID*/ /* JMicron 360/1/3/5/6, match class to avoid IDE function */ { PCI_VENDOR_ID_JMICRON, PCI_ANY_ID, PCI_ANY_ID, PCI_ANY_ID, @@ -1325,6 +1331,44 @@ static inline void ahci_gtf_filter_workaround(struct ata_host *host) {} #endif +#ifdef CONFIG_ARM64 +/* + * Due to ERRATA#22536, ThunderX needs to handle HOST_IRQ_STAT differently. + * Workaround is to make sure all pending IRQs are served before leaving + * handler. + */ +static irqreturn_t ahci_thunderx_irq_handler(int irq, void *dev_instance) +{ + struct ata_host *host = dev_instance; + struct ahci_host_priv *hpriv; + unsigned int rc = 0; + void __iomem *mmio; + u32 irq_stat, irq_masked; + unsigned int handled = 1; + + VPRINTK("ENTER\n"); + hpriv = host->private_data; + mmio = hpriv->mmio; + irq_stat = readl(mmio + HOST_IRQ_STAT); + if (!irq_stat) + return IRQ_NONE; + + do { + irq_masked = irq_stat & hpriv->port_map; + spin_lock(&host->lock); + rc = ahci_handle_port_intr(host, irq_masked); + if (!rc) + handled = 0; + writel(irq_stat, mmio + HOST_IRQ_STAT); + irq_stat = readl(mmio + HOST_IRQ_STAT); + spin_unlock(&host->lock); + } while (irq_stat); + VPRINTK("EXIT\n"); + + return IRQ_RETVAL(handled); +} +#endif + /* * ahci_init_msix() - optionally enable per-port MSI-X otherwise defer * to single msi. @@ -1560,6 +1604,11 @@ static int ahci_init_one(struct pci_dev *pdev, const struct pci_device_id *ent) if (ahci_broken_devslp(pdev)) hpriv->flags |= AHCI_HFLAG_NO_DEVSLP; +#ifdef CONFIG_ARM64 + if (pdev->vendor == 0x177d && pdev->device == 0xa01c) + hpriv->irq_handler = ahci_thunderx_irq_handler; +#endif + /* save initial config */ ahci_pci_save_initial_config(pdev, hpriv); diff --git a/drivers/ata/ahci.h b/drivers/ata/ahci.h index a44c75d4c284..167ba7e3b92e 100644 --- a/drivers/ata/ahci.h +++ b/drivers/ata/ahci.h @@ -240,8 +240,7 @@ enum { error-handling stage) */ AHCI_HFLAG_NO_DEVSLP = (1 << 17), /* no device sleep */ AHCI_HFLAG_NO_FBS = (1 << 18), /* no FBS */ - AHCI_HFLAG_EDGE_IRQ = (1 << 19), /* HOST_IRQ_STAT behaves as - Edge Triggered */ + #ifdef CONFIG_PCI_MSI AHCI_HFLAG_MULTI_MSI = (1 << 20), /* multiple PCI MSIs */ AHCI_HFLAG_MULTI_MSIX = (1 << 21), /* per-port MSI-X */ @@ -361,6 +360,7 @@ struct ahci_host_priv { * be overridden anytime before the host is activated. */ void (*start_engine)(struct ata_port *ap); + irqreturn_t (*irq_handler)(int irq, void *dev_instance); }; #ifdef CONFIG_PCI_MSI @@ -424,6 +424,7 @@ int ahci_reset_em(struct ata_host *host); void ahci_print_info(struct ata_host *host, const char *scc_s); int ahci_host_activate(struct ata_host *host, struct scsi_host_template *sht); void ahci_error_handler(struct ata_port *ap); +u32 ahci_handle_port_intr(struct ata_host *host, u32 irq_masked); static inline void __iomem *__ahci_port_base(struct ata_host *host, unsigned int port_no) diff --git a/drivers/ata/ahci_xgene.c b/drivers/ata/ahci_xgene.c index e2c6d9e0c5ac..8e3f7faf00d3 100644 --- a/drivers/ata/ahci_xgene.c +++ b/drivers/ata/ahci_xgene.c @@ -548,6 +548,88 @@ softreset_retry: return rc; } +/** + * xgene_ahci_handle_broken_edge_irq - Handle the broken irq. + * @ata_host: Host that recieved the irq + * @irq_masked: HOST_IRQ_STAT value + * + * For hardware with broken edge trigger latch + * the HOST_IRQ_STAT register misses the edge interrupt + * when clearing of HOST_IRQ_STAT register and hardware + * reporting the PORT_IRQ_STAT register at the + * same clock cycle. + * As such, the algorithm below outlines the workaround. + * + * 1. Read HOST_IRQ_STAT register and save the state. + * 2. Clear the HOST_IRQ_STAT register. + * 3. Read back the HOST_IRQ_STAT register. + * 4. If HOST_IRQ_STAT register equals to zero, then + * traverse the rest of port's PORT_IRQ_STAT register + * to check if an interrupt is triggered at that point else + * go to step 6. + * 5. If PORT_IRQ_STAT register of rest ports is not equal to zero + * then update the state of HOST_IRQ_STAT saved in step 1. + * 6. Handle port interrupts. + * 7. Exit + */ +static int xgene_ahci_handle_broken_edge_irq(struct ata_host *host, + u32 irq_masked) +{ + struct ahci_host_priv *hpriv = host->private_data; + void __iomem *port_mmio; + int i; + + if (!readl(hpriv->mmio + HOST_IRQ_STAT)) { + for (i = 0; i < host->n_ports; i++) { + if (irq_masked & (1 << i)) + continue; + + port_mmio = ahci_port_base(host->ports[i]); + if (readl(port_mmio + PORT_IRQ_STAT)) + irq_masked |= (1 << i); + } + } + + return ahci_handle_port_intr(host, irq_masked); +} + +static irqreturn_t xgene_ahci_irq_intr(int irq, void *dev_instance) +{ + struct ata_host *host = dev_instance; + struct ahci_host_priv *hpriv; + unsigned int rc = 0; + void __iomem *mmio; + u32 irq_stat, irq_masked; + + VPRINTK("ENTER\n"); + + hpriv = host->private_data; + mmio = hpriv->mmio; + + /* sigh. 0xffffffff is a valid return from h/w */ + irq_stat = readl(mmio + HOST_IRQ_STAT); + if (!irq_stat) + return IRQ_NONE; + + irq_masked = irq_stat & hpriv->port_map; + + spin_lock(&host->lock); + + /* + * HOST_IRQ_STAT behaves as edge triggered latch meaning that + * it should be cleared before all the port events are cleared. + */ + writel(irq_stat, mmio + HOST_IRQ_STAT); + + rc = xgene_ahci_handle_broken_edge_irq(host, irq_masked); + + spin_unlock(&host->lock); + + VPRINTK("EXIT\n"); + + return IRQ_RETVAL(rc); +} + static struct ata_port_operations xgene_ahci_v1_ops = { .inherits = &ahci_ops, .host_stop = xgene_ahci_host_stop, @@ -779,7 +861,8 @@ skip_clk_phy: hpriv->flags = AHCI_HFLAG_NO_NCQ; break; case XGENE_AHCI_V2: - hpriv->flags |= AHCI_HFLAG_YES_FBS | AHCI_HFLAG_EDGE_IRQ; + hpriv->flags |= AHCI_HFLAG_YES_FBS; + hpriv->irq_handler = xgene_ahci_irq_intr; break; default: break; diff --git a/drivers/ata/libahci.c b/drivers/ata/libahci.c index 402967902cbe..85ea5142a095 100644 --- a/drivers/ata/libahci.c +++ b/drivers/ata/libahci.c @@ -113,6 +113,7 @@ static ssize_t ahci_store_em_buffer(struct device *dev, const char *buf, size_t size); static ssize_t ahci_show_em_supported(struct device *dev, struct device_attribute *attr, char *buf); +static irqreturn_t ahci_single_level_irq_intr(int irq, void *dev_instance); static DEVICE_ATTR(ahci_host_caps, S_IRUGO, ahci_show_host_caps, NULL); static DEVICE_ATTR(ahci_host_cap2, S_IRUGO, ahci_show_host_cap2, NULL); @@ -512,6 +513,9 @@ void ahci_save_initial_config(struct device *dev, struct ahci_host_priv *hpriv) if (!hpriv->start_engine) hpriv->start_engine = ahci_start_engine; + + if (!hpriv->irq_handler) + hpriv->irq_handler = ahci_single_level_irq_intr; } EXPORT_SYMBOL_GPL(ahci_save_initial_config); @@ -1164,8 +1168,7 @@ static void ahci_port_init(struct device *dev, struct ata_port *ap, /* mark esata ports */ tmp = readl(port_mmio + PORT_CMD); - if ((tmp & PORT_CMD_HPCP) || - ((tmp & PORT_CMD_ESP) && (hpriv->cap & HOST_CAP_SXS))) + if ((tmp & PORT_CMD_ESP) && (hpriv->cap & HOST_CAP_SXS)) ap->pflags |= ATA_PFLAG_EXTERNAL; } @@ -1846,7 +1849,7 @@ static irqreturn_t ahci_multi_irqs_intr_hard(int irq, void *dev_instance) return IRQ_HANDLED; } -static u32 ahci_handle_port_intr(struct ata_host *host, u32 irq_masked) +u32 ahci_handle_port_intr(struct ata_host *host, u32 irq_masked) { unsigned int i, handled = 0; @@ -1872,43 +1875,7 @@ static u32 ahci_handle_port_intr(struct ata_host *host, u32 irq_masked) return handled; } - -static irqreturn_t ahci_single_edge_irq_intr(int irq, void *dev_instance) -{ - struct ata_host *host = dev_instance; - struct ahci_host_priv *hpriv; - unsigned int rc = 0; - void __iomem *mmio; - u32 irq_stat, irq_masked; - - VPRINTK("ENTER\n"); - - hpriv = host->private_data; - mmio = hpriv->mmio; - - /* sigh. 0xffffffff is a valid return from h/w */ - irq_stat = readl(mmio + HOST_IRQ_STAT); - if (!irq_stat) - return IRQ_NONE; - - irq_masked = irq_stat & hpriv->port_map; - - spin_lock(&host->lock); - - /* - * HOST_IRQ_STAT behaves as edge triggered latch meaning that - * it should be cleared before all the port events are cleared. - */ - writel(irq_stat, mmio + HOST_IRQ_STAT); - - rc = ahci_handle_port_intr(host, irq_masked); - - spin_unlock(&host->lock); - - VPRINTK("EXIT\n"); - - return IRQ_RETVAL(rc); -} +EXPORT_SYMBOL_GPL(ahci_handle_port_intr); static irqreturn_t ahci_single_level_irq_intr(int irq, void *dev_instance) { @@ -2535,14 +2502,18 @@ int ahci_host_activate(struct ata_host *host, struct scsi_host_template *sht) int irq = hpriv->irq; int rc; - if (hpriv->flags & (AHCI_HFLAG_MULTI_MSI | AHCI_HFLAG_MULTI_MSIX)) + if (hpriv->flags & (AHCI_HFLAG_MULTI_MSI | AHCI_HFLAG_MULTI_MSIX)) { + if (hpriv->irq_handler) + dev_warn(host->dev, "both AHCI_HFLAG_MULTI_MSI flag set \ + and custom irq handler implemented\n"); + rc = ahci_host_activate_multi_irqs(host, sht); - else if (hpriv->flags & AHCI_HFLAG_EDGE_IRQ) - rc = ata_host_activate(host, irq, ahci_single_edge_irq_intr, - IRQF_SHARED, sht); - else - rc = ata_host_activate(host, irq, ahci_single_level_irq_intr, + } else { + rc = ata_host_activate(host, irq, hpriv->irq_handler, IRQF_SHARED, sht); + } + + return rc; } EXPORT_SYMBOL_GPL(ahci_host_activate); diff --git a/drivers/ata/libata-scsi.c b/drivers/ata/libata-scsi.c index 7e959f90c020..e417e1a1d02c 100644 --- a/drivers/ata/libata-scsi.c +++ b/drivers/ata/libata-scsi.c @@ -675,19 +675,18 @@ static int ata_ioc32(struct ata_port *ap) int ata_sas_scsi_ioctl(struct ata_port *ap, struct scsi_device *scsidev, int cmd, void __user *arg) { - int val = -EINVAL, rc = -EINVAL; + unsigned long val; + int rc = -EINVAL; unsigned long flags; switch (cmd) { - case ATA_IOC_GET_IO32: + case HDIO_GET_32BIT: spin_lock_irqsave(ap->lock, flags); val = ata_ioc32(ap); spin_unlock_irqrestore(ap->lock, flags); - if (copy_to_user(arg, &val, 1)) - return -EFAULT; - return 0; + return put_user(val, (unsigned long __user *)arg); - case ATA_IOC_SET_IO32: + case HDIO_SET_32BIT: val = (unsigned long) arg; rc = 0; spin_lock_irqsave(ap->lock, flags); diff --git a/drivers/ata/pata_rb532_cf.c b/drivers/ata/pata_rb532_cf.c index 12fe0f3bb7e9..c8b6a780a290 100644 --- a/drivers/ata/pata_rb532_cf.c +++ b/drivers/ata/pata_rb532_cf.c @@ -32,6 +32,8 @@ #include <linux/libata.h> #include <scsi/scsi_host.h> +#include <asm/mach-rc32434/rb.h> + #define DRV_NAME "pata-rb532-cf" #define DRV_VERSION "0.1.0" #define DRV_DESC "PATA driver for RouterBOARD 532 Compact Flash" @@ -107,6 +109,7 @@ static int rb532_pata_driver_probe(struct platform_device *pdev) int gpio; struct resource *res; struct ata_host *ah; + struct cf_device *pdata; struct rb532_cf_info *info; int ret; @@ -122,7 +125,13 @@ static int rb532_pata_driver_probe(struct platform_device *pdev) return -ENOENT; } - gpio = irq_to_gpio(irq); + pdata = dev_get_platdata(&pdev->dev); + if (!pdata) { + dev_err(&pdev->dev, "no platform data specified\n"); + return -EINVAL; + } + + gpio = pdata->gpio_pin; if (gpio < 0) { dev_err(&pdev->dev, "no GPIO found for irq%d\n", irq); return -ENOENT; diff --git a/drivers/cpufreq/Kconfig b/drivers/cpufreq/Kconfig index 659879a56dba..f93511031177 100644 --- a/drivers/cpufreq/Kconfig +++ b/drivers/cpufreq/Kconfig @@ -296,6 +296,7 @@ endif config QORIQ_CPUFREQ tristate "CPU frequency scaling driver for Freescale QorIQ SoCs" depends on OF && COMMON_CLK && (PPC_E500MC || ARM) + depends on !CPU_THERMAL || THERMAL select CLK_QORIQ help This adds the CPUFreq driver support for Freescale QorIQ SoCs diff --git a/drivers/cpufreq/Kconfig.arm b/drivers/cpufreq/Kconfig.arm index 0031069b64c9..14b1f9393b05 100644 --- a/drivers/cpufreq/Kconfig.arm +++ b/drivers/cpufreq/Kconfig.arm @@ -84,10 +84,10 @@ config ARM_KIRKWOOD_CPUFREQ SoCs. config ARM_MT8173_CPUFREQ - bool "Mediatek MT8173 CPUFreq support" + tristate "Mediatek MT8173 CPUFreq support" depends on ARCH_MEDIATEK && REGULATOR depends on ARM64 || (ARM_CPU_TOPOLOGY && COMPILE_TEST) - depends on !CPU_THERMAL || THERMAL=y + depends on !CPU_THERMAL || THERMAL select PM_OPP help This adds the CPUFreq driver support for Mediatek MT8173 SoC. diff --git a/drivers/cpufreq/mt8173-cpufreq.c b/drivers/cpufreq/mt8173-cpufreq.c index 1efba340456d..2058e6d292ce 100644 --- a/drivers/cpufreq/mt8173-cpufreq.c +++ b/drivers/cpufreq/mt8173-cpufreq.c @@ -17,6 +17,7 @@ #include <linux/cpu_cooling.h> #include <linux/cpufreq.h> #include <linux/cpumask.h> +#include <linux/module.h> #include <linux/of.h> #include <linux/platform_device.h> #include <linux/pm_opp.h> diff --git a/drivers/gpio/gpio-rcar.c b/drivers/gpio/gpio-rcar.c index cf41440aff91..d9ab0cd1d205 100644 --- a/drivers/gpio/gpio-rcar.c +++ b/drivers/gpio/gpio-rcar.c @@ -196,6 +196,44 @@ static int gpio_rcar_irq_set_wake(struct irq_data *d, unsigned int on) return 0; } +static void gpio_rcar_irq_bus_lock(struct irq_data *d) +{ + struct gpio_chip *gc = irq_data_get_irq_chip_data(d); + struct gpio_rcar_priv *p = gpiochip_get_data(gc); + + pm_runtime_get_sync(&p->pdev->dev); +} + +static void gpio_rcar_irq_bus_sync_unlock(struct irq_data *d) +{ + struct gpio_chip *gc = irq_data_get_irq_chip_data(d); + struct gpio_rcar_priv *p = gpiochip_get_data(gc); + + pm_runtime_put(&p->pdev->dev); +} + + +static int gpio_rcar_irq_request_resources(struct irq_data *d) +{ + struct gpio_chip *gc = irq_data_get_irq_chip_data(d); + struct gpio_rcar_priv *p = gpiochip_get_data(gc); + int error; + + error = pm_runtime_get_sync(&p->pdev->dev); + if (error < 0) + return error; + + return 0; +} + +static void gpio_rcar_irq_release_resources(struct irq_data *d) +{ + struct gpio_chip *gc = irq_data_get_irq_chip_data(d); + struct gpio_rcar_priv *p = gpiochip_get_data(gc); + + pm_runtime_put(&p->pdev->dev); +} + static irqreturn_t gpio_rcar_irq_handler(int irq, void *dev_id) { struct gpio_rcar_priv *p = dev_id; @@ -450,6 +488,10 @@ static int gpio_rcar_probe(struct platform_device *pdev) irq_chip->irq_unmask = gpio_rcar_irq_enable; irq_chip->irq_set_type = gpio_rcar_irq_set_type; irq_chip->irq_set_wake = gpio_rcar_irq_set_wake; + irq_chip->irq_bus_lock = gpio_rcar_irq_bus_lock; + irq_chip->irq_bus_sync_unlock = gpio_rcar_irq_bus_sync_unlock; + irq_chip->irq_request_resources = gpio_rcar_irq_request_resources; + irq_chip->irq_release_resources = gpio_rcar_irq_release_resources; irq_chip->flags = IRQCHIP_SET_TYPE_MASKED | IRQCHIP_MASK_ON_SUSPEND; ret = gpiochip_add_data(gpio_chip, p); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_connectors.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_connectors.c index 89c3dd62ba21..119cdc2c43e7 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_connectors.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_connectors.c @@ -77,7 +77,7 @@ void amdgpu_connector_hotplug(struct drm_connector *connector) } else if (amdgpu_atombios_dp_needs_link_train(amdgpu_connector)) { /* Don't try to start link training before we * have the dpcd */ - if (!amdgpu_atombios_dp_get_dpcd(amdgpu_connector)) + if (amdgpu_atombios_dp_get_dpcd(amdgpu_connector)) return; /* set it to OFF so that drm_helper_connector_dpms() diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_pm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_pm.c index 66855b62a603..95a4a25d8df9 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_pm.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_pm.c @@ -649,9 +649,6 @@ force: /* update display watermarks based on new power state */ amdgpu_display_bandwidth_update(adev); - adev->pm.dpm.current_active_crtcs = adev->pm.dpm.new_active_crtcs; - adev->pm.dpm.current_active_crtc_count = adev->pm.dpm.new_active_crtc_count; - /* wait for the rings to drain */ for (i = 0; i < AMDGPU_MAX_RINGS; i++) { struct amdgpu_ring *ring = adev->rings[i]; @@ -670,6 +667,9 @@ force: /* update displays */ amdgpu_dpm_display_configuration_changed(adev); + adev->pm.dpm.current_active_crtcs = adev->pm.dpm.new_active_crtcs; + adev->pm.dpm.current_active_crtc_count = adev->pm.dpm.new_active_crtc_count; + if (adev->pm.funcs->force_performance_level) { if (adev->pm.dpm.thermal_active) { enum amdgpu_dpm_forced_level level = adev->pm.dpm.forced_level; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_powerplay.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_powerplay.c index b9d0d55f6b47..3cb6d6c413c7 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_powerplay.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_powerplay.c @@ -143,8 +143,10 @@ static int amdgpu_pp_late_init(void *handle) adev->powerplay.pp_handle); #ifdef CONFIG_DRM_AMD_POWERPLAY - if (adev->pp_enabled) + if (adev->pp_enabled) { amdgpu_pm_sysfs_init(adev); + amdgpu_dpm_dispatch_task(adev, AMD_PP_EVENT_COMPLETE_INIT, NULL, NULL); + } #endif return ret; } diff --git a/drivers/gpu/drm/amd/amdgpu/cz_dpm.c b/drivers/gpu/drm/amd/amdgpu/cz_dpm.c index 9056355309d1..e7ef2261ff4a 100644 --- a/drivers/gpu/drm/amd/amdgpu/cz_dpm.c +++ b/drivers/gpu/drm/amd/amdgpu/cz_dpm.c @@ -2202,8 +2202,7 @@ static void cz_dpm_powergate_vce(struct amdgpu_device *adev, bool gate) AMD_PG_STATE_GATE); cz_enable_vce_dpm(adev, false); - /* TODO: to figure out why vce can't be poweroff. */ - /* cz_send_msg_to_smc(adev, PPSMC_MSG_VCEPowerOFF); */ + cz_send_msg_to_smc(adev, PPSMC_MSG_VCEPowerOFF); pi->vce_power_gated = true; } else { cz_send_msg_to_smc(adev, PPSMC_MSG_VCEPowerON); @@ -2226,10 +2225,8 @@ static void cz_dpm_powergate_vce(struct amdgpu_device *adev, bool gate) } } else { /*pi->caps_vce_pg*/ cz_update_vce_dpm(adev); - cz_enable_vce_dpm(adev, true); + cz_enable_vce_dpm(adev, !gate); } - - return; } const struct amd_ip_funcs cz_dpm_ip_funcs = { diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v7_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v7_0.c index 7732059ae30f..06602df707f8 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v7_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v7_0.c @@ -3628,6 +3628,19 @@ static void gfx_v7_0_ring_emit_vm_flush(struct amdgpu_ring *ring, unsigned vm_id, uint64_t pd_addr) { int usepfp = (ring->type == AMDGPU_RING_TYPE_GFX); + uint32_t seq = ring->fence_drv.sync_seq; + uint64_t addr = ring->fence_drv.gpu_addr; + + amdgpu_ring_write(ring, PACKET3(PACKET3_WAIT_REG_MEM, 5)); + amdgpu_ring_write(ring, (WAIT_REG_MEM_MEM_SPACE(1) | /* memory */ + WAIT_REG_MEM_FUNCTION(3) | /* equal */ + WAIT_REG_MEM_ENGINE(usepfp))); /* pfp or me */ + amdgpu_ring_write(ring, addr & 0xfffffffc); + amdgpu_ring_write(ring, upper_32_bits(addr) & 0xffffffff); + amdgpu_ring_write(ring, seq); + amdgpu_ring_write(ring, 0xffffffff); + amdgpu_ring_write(ring, 4); /* poll interval */ + if (usepfp) { /* synce CE with ME to prevent CE fetch CEIB before context switch done */ amdgpu_ring_write(ring, PACKET3(PACKET3_SWITCH_BUFFER, 0)); diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c index 1c40bd90afbb..7086ac17abee 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c @@ -4809,7 +4809,8 @@ static void gfx_v8_0_ring_emit_vm_flush(struct amdgpu_ring *ring, amdgpu_ring_write(ring, PACKET3(PACKET3_WAIT_REG_MEM, 5)); amdgpu_ring_write(ring, (WAIT_REG_MEM_MEM_SPACE(1) | /* memory */ - WAIT_REG_MEM_FUNCTION(3))); /* equal */ + WAIT_REG_MEM_FUNCTION(3) | /* equal */ + WAIT_REG_MEM_ENGINE(usepfp))); /* pfp or me */ amdgpu_ring_write(ring, addr & 0xfffffffc); amdgpu_ring_write(ring, upper_32_bits(addr) & 0xffffffff); amdgpu_ring_write(ring, seq); diff --git a/drivers/gpu/drm/amd/powerplay/amd_powerplay.c b/drivers/gpu/drm/amd/powerplay/amd_powerplay.c index aa67244a77ae..589599f66fcc 100644 --- a/drivers/gpu/drm/amd/powerplay/amd_powerplay.c +++ b/drivers/gpu/drm/amd/powerplay/amd_powerplay.c @@ -402,8 +402,11 @@ int pp_dpm_dispatch_tasks(void *handle, enum amd_pp_event event_id, void *input, data.requested_ui_label = power_state_convert(ps); ret = pem_handle_event(pp_handle->eventmgr, event_id, &data); + break; } - break; + case AMD_PP_EVENT_COMPLETE_INIT: + ret = pem_handle_event(pp_handle->eventmgr, event_id, &data); + break; default: break; } diff --git a/drivers/gpu/drm/amd/powerplay/eventmgr/eventactionchains.c b/drivers/gpu/drm/amd/powerplay/eventmgr/eventactionchains.c index 83be3cf210e0..6b52c78cb404 100644 --- a/drivers/gpu/drm/amd/powerplay/eventmgr/eventactionchains.c +++ b/drivers/gpu/drm/amd/powerplay/eventmgr/eventactionchains.c @@ -165,6 +165,7 @@ const struct action_chain resume_action_chain = { }; static const pem_event_action *complete_init_event[] = { + unblock_adjust_power_state_tasks, adjust_power_state_tasks, enable_gfx_clock_gating_tasks, enable_gfx_voltage_island_power_gating_tasks, diff --git a/drivers/gpu/drm/amd/powerplay/hwmgr/cz_clockpowergating.c b/drivers/gpu/drm/amd/powerplay/hwmgr/cz_clockpowergating.c index ad7700822a1c..ff08ce41bde9 100644 --- a/drivers/gpu/drm/amd/powerplay/hwmgr/cz_clockpowergating.c +++ b/drivers/gpu/drm/amd/powerplay/hwmgr/cz_clockpowergating.c @@ -226,7 +226,7 @@ int cz_dpm_powergate_vce(struct pp_hwmgr *hwmgr, bool bgate) } } else { cz_dpm_update_vce_dpm(hwmgr); - cz_enable_disable_vce_dpm(hwmgr, true); + cz_enable_disable_vce_dpm(hwmgr, !bgate); return 0; } diff --git a/drivers/gpu/drm/ast/ast_main.c b/drivers/gpu/drm/ast/ast_main.c index 9759009d1da3..b1480acbb3c3 100644 --- a/drivers/gpu/drm/ast/ast_main.c +++ b/drivers/gpu/drm/ast/ast_main.c @@ -227,7 +227,7 @@ static int ast_get_dram_info(struct drm_device *dev) } while (ast_read32(ast, 0x10000) != 0x01); data = ast_read32(ast, 0x10004); - if (data & 0x400) + if (data & 0x40) ast->dram_bus_width = 16; else ast->dram_bus_width = 32; diff --git a/drivers/gpu/drm/i915/intel_runtime_pm.c b/drivers/gpu/drm/i915/intel_runtime_pm.c index 678ed3475d7e..4f43d9b32e66 100644 --- a/drivers/gpu/drm/i915/intel_runtime_pm.c +++ b/drivers/gpu/drm/i915/intel_runtime_pm.c @@ -2303,15 +2303,15 @@ void intel_power_domains_init_hw(struct drm_i915_private *dev_priv, bool resume) */ void intel_power_domains_suspend(struct drm_i915_private *dev_priv) { - if (IS_SKYLAKE(dev_priv) || IS_KABYLAKE(dev_priv)) - skl_display_core_uninit(dev_priv); - /* * Even if power well support was disabled we still want to disable * power wells while we are system suspended. */ if (!i915.disable_power_well) intel_display_power_put(dev_priv, POWER_DOMAIN_INIT); + + if (IS_SKYLAKE(dev_priv) || IS_KABYLAKE(dev_priv)) + skl_display_core_uninit(dev_priv); } /** @@ -2349,22 +2349,20 @@ bool intel_runtime_pm_get_if_in_use(struct drm_i915_private *dev_priv) { struct drm_device *dev = dev_priv->dev; struct device *device = &dev->pdev->dev; - int ret; - - if (!IS_ENABLED(CONFIG_PM)) - return true; - ret = pm_runtime_get_if_in_use(device); + if (IS_ENABLED(CONFIG_PM)) { + int ret = pm_runtime_get_if_in_use(device); - /* - * In cases runtime PM is disabled by the RPM core and we get an - * -EINVAL return value we are not supposed to call this function, - * since the power state is undefined. This applies atm to the - * late/early system suspend/resume handlers. - */ - WARN_ON_ONCE(ret < 0); - if (ret <= 0) - return false; + /* + * In cases runtime PM is disabled by the RPM core and we get + * an -EINVAL return value we are not supposed to call this + * function, since the power state is undefined. This applies + * atm to the late/early system suspend/resume handlers. + */ + WARN_ON_ONCE(ret < 0); + if (ret <= 0) + return false; + } atomic_inc(&dev_priv->pm.wakeref_count); assert_rpm_wakelock_held(dev_priv); diff --git a/drivers/gpu/drm/radeon/radeon_pm.c b/drivers/gpu/drm/radeon/radeon_pm.c index ca3be90a3bb4..0f14d897baf9 100644 --- a/drivers/gpu/drm/radeon/radeon_pm.c +++ b/drivers/gpu/drm/radeon/radeon_pm.c @@ -1080,10 +1080,6 @@ force: /* update display watermarks based on new power state */ radeon_bandwidth_update(rdev); - rdev->pm.dpm.current_active_crtcs = rdev->pm.dpm.new_active_crtcs; - rdev->pm.dpm.current_active_crtc_count = rdev->pm.dpm.new_active_crtc_count; - rdev->pm.dpm.single_display = single_display; - /* wait for the rings to drain */ for (i = 0; i < RADEON_NUM_RINGS; i++) { struct radeon_ring *ring = &rdev->ring[i]; @@ -1102,6 +1098,10 @@ force: /* update displays */ radeon_dpm_display_configuration_changed(rdev); + rdev->pm.dpm.current_active_crtcs = rdev->pm.dpm.new_active_crtcs; + rdev->pm.dpm.current_active_crtc_count = rdev->pm.dpm.new_active_crtc_count; + rdev->pm.dpm.single_display = single_display; + if (rdev->asic->dpm.force_performance_level) { if (rdev->pm.dpm.thermal_active) { enum radeon_dpm_forced_level level = rdev->pm.dpm.forced_level; diff --git a/drivers/gpu/host1x/bus.c b/drivers/gpu/host1x/bus.c index da462afcb225..dd2dbb9746ce 100644 --- a/drivers/gpu/host1x/bus.c +++ b/drivers/gpu/host1x/bus.c @@ -18,6 +18,7 @@ #include <linux/host1x.h> #include <linux/of.h> #include <linux/slab.h> +#include <linux/of_device.h> #include "bus.h" #include "dev.h" @@ -394,6 +395,7 @@ static int host1x_device_add(struct host1x *host1x, device->dev.coherent_dma_mask = host1x->dev->coherent_dma_mask; device->dev.dma_mask = &device->dev.coherent_dma_mask; dev_set_name(&device->dev, "%s", driver->driver.name); + of_dma_configure(&device->dev, host1x->dev->of_node); device->dev.release = host1x_device_release; device->dev.bus = &host1x_bus_type; device->dev.parent = host1x->dev; diff --git a/drivers/gpu/host1x/dev.c b/drivers/gpu/host1x/dev.c index 314bf3718cc7..ff348690df94 100644 --- a/drivers/gpu/host1x/dev.c +++ b/drivers/gpu/host1x/dev.c @@ -23,6 +23,7 @@ #include <linux/of_device.h> #include <linux/clk.h> #include <linux/io.h> +#include <linux/dma-mapping.h> #define CREATE_TRACE_POINTS #include <trace/events/host1x.h> @@ -68,6 +69,7 @@ static const struct host1x_info host1x01_info = { .nb_bases = 8, .init = host1x01_init, .sync_offset = 0x3000, + .dma_mask = DMA_BIT_MASK(32), }; static const struct host1x_info host1x02_info = { @@ -77,6 +79,7 @@ static const struct host1x_info host1x02_info = { .nb_bases = 12, .init = host1x02_init, .sync_offset = 0x3000, + .dma_mask = DMA_BIT_MASK(32), }; static const struct host1x_info host1x04_info = { @@ -86,6 +89,7 @@ static const struct host1x_info host1x04_info = { .nb_bases = 64, .init = host1x04_init, .sync_offset = 0x2100, + .dma_mask = DMA_BIT_MASK(34), }; static const struct host1x_info host1x05_info = { @@ -95,6 +99,7 @@ static const struct host1x_info host1x05_info = { .nb_bases = 64, .init = host1x05_init, .sync_offset = 0x2100, + .dma_mask = DMA_BIT_MASK(34), }; static struct of_device_id host1x_of_match[] = { @@ -148,6 +153,8 @@ static int host1x_probe(struct platform_device *pdev) if (IS_ERR(host->regs)) return PTR_ERR(host->regs); + dma_set_mask_and_coherent(host->dev, host->info->dma_mask); + if (host->info->init) { err = host->info->init(host); if (err) diff --git a/drivers/gpu/host1x/dev.h b/drivers/gpu/host1x/dev.h index 0b6e8e9629c5..dace124994bb 100644 --- a/drivers/gpu/host1x/dev.h +++ b/drivers/gpu/host1x/dev.h @@ -96,6 +96,7 @@ struct host1x_info { int nb_mlocks; /* host1x: number of mlocks */ int (*init)(struct host1x *); /* initialize per SoC ops */ int sync_offset; + u64 dma_mask; /* mask of addressable memory */ }; struct host1x { diff --git a/drivers/infiniband/core/device.c b/drivers/infiniband/core/device.c index 00da80e02154..94b80a51ab68 100644 --- a/drivers/infiniband/core/device.c +++ b/drivers/infiniband/core/device.c @@ -358,6 +358,7 @@ int ib_register_device(struct ib_device *device, ret = device->query_device(device, &device->attrs, &uhw); if (ret) { printk(KERN_WARNING "Couldn't query the device attributes\n"); + ib_cache_cleanup_one(device); goto out; } diff --git a/drivers/infiniband/core/sa_query.c b/drivers/infiniband/core/sa_query.c index f334090bb612..1e37f3515d98 100644 --- a/drivers/infiniband/core/sa_query.c +++ b/drivers/infiniband/core/sa_query.c @@ -1071,7 +1071,7 @@ int ib_init_ah_from_path(struct ib_device *device, u8 port_num, } } - if (rec->hop_limit > 1 || use_roce) { + if (rec->hop_limit > 0 || use_roce) { ah_attr->ah_flags = IB_AH_GRH; ah_attr->grh.dgid = rec->dgid; diff --git a/drivers/infiniband/core/uverbs_cmd.c b/drivers/infiniband/core/uverbs_cmd.c index 6ffc9c4e93af..6c6fbff19752 100644 --- a/drivers/infiniband/core/uverbs_cmd.c +++ b/drivers/infiniband/core/uverbs_cmd.c @@ -1970,7 +1970,8 @@ ssize_t ib_uverbs_create_qp(struct ib_uverbs_file *file, resp_size); INIT_UDATA(&uhw, buf + sizeof(cmd), (unsigned long)cmd.response + resp_size, - in_len - sizeof(cmd), out_len - resp_size); + in_len - sizeof(cmd) - sizeof(struct ib_uverbs_cmd_hdr), + out_len - resp_size); memset(&cmd_ex, 0, sizeof(cmd_ex)); cmd_ex.user_handle = cmd.user_handle; @@ -3413,7 +3414,8 @@ ssize_t ib_uverbs_create_srq(struct ib_uverbs_file *file, INIT_UDATA(&udata, buf + sizeof cmd, (unsigned long) cmd.response + sizeof resp, - in_len - sizeof cmd, out_len - sizeof resp); + in_len - sizeof cmd - sizeof(struct ib_uverbs_cmd_hdr), + out_len - sizeof resp); ret = __uverbs_create_xsrq(file, ib_dev, &xcmd, &udata); if (ret) @@ -3439,7 +3441,8 @@ ssize_t ib_uverbs_create_xsrq(struct ib_uverbs_file *file, INIT_UDATA(&udata, buf + sizeof cmd, (unsigned long) cmd.response + sizeof resp, - in_len - sizeof cmd, out_len - sizeof resp); + in_len - sizeof cmd - sizeof(struct ib_uverbs_cmd_hdr), + out_len - sizeof resp); ret = __uverbs_create_xsrq(file, ib_dev, &cmd, &udata); if (ret) diff --git a/drivers/infiniband/hw/mlx5/srq.c b/drivers/infiniband/hw/mlx5/srq.c index 4659256cd95e..3b2ddd64a371 100644 --- a/drivers/infiniband/hw/mlx5/srq.c +++ b/drivers/infiniband/hw/mlx5/srq.c @@ -75,7 +75,8 @@ static void mlx5_ib_srq_event(struct mlx5_core_srq *srq, enum mlx5_event type) static int create_srq_user(struct ib_pd *pd, struct mlx5_ib_srq *srq, struct mlx5_create_srq_mbox_in **in, - struct ib_udata *udata, int buf_size, int *inlen) + struct ib_udata *udata, int buf_size, int *inlen, + int is_xrc) { struct mlx5_ib_dev *dev = to_mdev(pd->device); struct mlx5_ib_create_srq ucmd = {}; @@ -87,13 +88,8 @@ static int create_srq_user(struct ib_pd *pd, struct mlx5_ib_srq *srq, int ncont; u32 offset; u32 uidx = MLX5_IB_DEFAULT_UIDX; - int drv_data = udata->inlen - sizeof(struct ib_uverbs_cmd_hdr); - if (drv_data < 0) - return -EINVAL; - - ucmdlen = (drv_data < sizeof(ucmd)) ? - drv_data : sizeof(ucmd); + ucmdlen = min(udata->inlen, sizeof(ucmd)); if (ib_copy_from_udata(&ucmd, udata, ucmdlen)) { mlx5_ib_dbg(dev, "failed copy udata\n"); @@ -103,15 +99,17 @@ static int create_srq_user(struct ib_pd *pd, struct mlx5_ib_srq *srq, if (ucmd.reserved0 || ucmd.reserved1) return -EINVAL; - if (drv_data > sizeof(ucmd) && + if (udata->inlen > sizeof(ucmd) && !ib_is_udata_cleared(udata, sizeof(ucmd), - drv_data - sizeof(ucmd))) + udata->inlen - sizeof(ucmd))) return -EINVAL; - err = get_srq_user_index(to_mucontext(pd->uobject->context), - &ucmd, udata->inlen, &uidx); - if (err) - return err; + if (is_xrc) { + err = get_srq_user_index(to_mucontext(pd->uobject->context), + &ucmd, udata->inlen, &uidx); + if (err) + return err; + } srq->wq_sig = !!(ucmd.flags & MLX5_SRQ_FLAG_SIGNATURE); @@ -151,7 +149,8 @@ static int create_srq_user(struct ib_pd *pd, struct mlx5_ib_srq *srq, (*in)->ctx.log_pg_sz = page_shift - MLX5_ADAPTER_PAGE_SHIFT; (*in)->ctx.pgoff_cqn = cpu_to_be32(offset << 26); - if (MLX5_CAP_GEN(dev->mdev, cqe_version) == MLX5_CQE_VERSION_V1) { + if ((MLX5_CAP_GEN(dev->mdev, cqe_version) == MLX5_CQE_VERSION_V1) && + is_xrc){ xsrqc = MLX5_ADDR_OF(create_xrc_srq_in, *in, xrc_srq_context_entry); MLX5_SET(xrc_srqc, xsrqc, user_index, uidx); @@ -170,7 +169,7 @@ err_umem: static int create_srq_kernel(struct mlx5_ib_dev *dev, struct mlx5_ib_srq *srq, struct mlx5_create_srq_mbox_in **in, int buf_size, - int *inlen) + int *inlen, int is_xrc) { int err; int i; @@ -224,7 +223,8 @@ static int create_srq_kernel(struct mlx5_ib_dev *dev, struct mlx5_ib_srq *srq, (*in)->ctx.log_pg_sz = page_shift - MLX5_ADAPTER_PAGE_SHIFT; - if (MLX5_CAP_GEN(dev->mdev, cqe_version) == MLX5_CQE_VERSION_V1) { + if ((MLX5_CAP_GEN(dev->mdev, cqe_version) == MLX5_CQE_VERSION_V1) && + is_xrc){ xsrqc = MLX5_ADDR_OF(create_xrc_srq_in, *in, xrc_srq_context_entry); /* 0xffffff means we ask to work with cqe version 0 */ @@ -302,10 +302,14 @@ struct ib_srq *mlx5_ib_create_srq(struct ib_pd *pd, desc_size, init_attr->attr.max_wr, srq->msrq.max, srq->msrq.max_gs, srq->msrq.max_avail_gather); + is_xrc = (init_attr->srq_type == IB_SRQT_XRC); + if (pd->uobject) - err = create_srq_user(pd, srq, &in, udata, buf_size, &inlen); + err = create_srq_user(pd, srq, &in, udata, buf_size, &inlen, + is_xrc); else - err = create_srq_kernel(dev, srq, &in, buf_size, &inlen); + err = create_srq_kernel(dev, srq, &in, buf_size, &inlen, + is_xrc); if (err) { mlx5_ib_warn(dev, "create srq %s failed, err %d\n", @@ -313,7 +317,6 @@ struct ib_srq *mlx5_ib_create_srq(struct ib_pd *pd, goto err_srq; } - is_xrc = (init_attr->srq_type == IB_SRQT_XRC); in->ctx.state_log_sz = ilog2(srq->msrq.max); flgs = ((srq->msrq.wqe_shift - 4) | (is_xrc << 5) | (srq->wq_sig << 7)) << 24; xrcdn = 0; diff --git a/drivers/iommu/amd_iommu.c b/drivers/iommu/amd_iommu.c index e5e223938eec..374c129219ef 100644 --- a/drivers/iommu/amd_iommu.c +++ b/drivers/iommu/amd_iommu.c @@ -114,6 +114,7 @@ struct kmem_cache *amd_iommu_irq_cache; static void update_domain(struct protection_domain *domain); static int protection_domain_init(struct protection_domain *domain); +static void detach_device(struct device *dev); /* * For dynamic growth the aperture size is split into ranges of 128MB of @@ -384,6 +385,9 @@ static void iommu_uninit_device(struct device *dev) if (!dev_data) return; + if (dev_data->domain) + detach_device(dev); + iommu_device_unlink(amd_iommu_rlookup_table[dev_data->devid]->iommu_dev, dev); diff --git a/drivers/iommu/amd_iommu_init.c b/drivers/iommu/amd_iommu_init.c index 013bdfff2d4d..bf4959f4225b 100644 --- a/drivers/iommu/amd_iommu_init.c +++ b/drivers/iommu/amd_iommu_init.c @@ -228,6 +228,10 @@ static int amd_iommu_enable_interrupts(void); static int __init iommu_go_to_state(enum iommu_init_state state); static void init_device_table_dma(void); +static int iommu_pc_get_set_reg_val(struct amd_iommu *iommu, + u8 bank, u8 cntr, u8 fxn, + u64 *value, bool is_write); + static inline void update_last_devid(u16 devid) { if (devid > amd_iommu_last_bdf) @@ -1016,6 +1020,34 @@ static void amd_iommu_erratum_746_workaround(struct amd_iommu *iommu) } /* + * Family15h Model 30h-3fh (IOMMU Mishandles ATS Write Permission) + * Workaround: + * BIOS should enable ATS write permission check by setting + * L2_DEBUG_3[AtsIgnoreIWDis](D0F2xF4_x47[0]) = 1b + */ +static void amd_iommu_ats_write_check_workaround(struct amd_iommu *iommu) +{ + u32 value; + + if ((boot_cpu_data.x86 != 0x15) || + (boot_cpu_data.x86_model < 0x30) || + (boot_cpu_data.x86_model > 0x3f)) + return; + + /* Test L2_DEBUG_3[AtsIgnoreIWDis] == 1 */ + value = iommu_read_l2(iommu, 0x47); + + if (value & BIT(0)) + return; + + /* Set L2_DEBUG_3[AtsIgnoreIWDis] = 1 */ + iommu_write_l2(iommu, 0x47, value | BIT(0)); + + pr_info("AMD-Vi: Applying ATS write check workaround for IOMMU at %s\n", + dev_name(&iommu->dev->dev)); +} + +/* * This function clues the initialization function for one IOMMU * together and also allocates the command buffer and programs the * hardware. It does NOT enable the IOMMU. This is done afterwards. @@ -1142,8 +1174,8 @@ static void init_iommu_perf_ctr(struct amd_iommu *iommu) amd_iommu_pc_present = true; /* Check if the performance counters can be written to */ - if ((0 != amd_iommu_pc_get_set_reg_val(0, 0, 0, 0, &val, true)) || - (0 != amd_iommu_pc_get_set_reg_val(0, 0, 0, 0, &val2, false)) || + if ((0 != iommu_pc_get_set_reg_val(iommu, 0, 0, 0, &val, true)) || + (0 != iommu_pc_get_set_reg_val(iommu, 0, 0, 0, &val2, false)) || (val != val2)) { pr_err("AMD-Vi: Unable to write to IOMMU perf counter.\n"); amd_iommu_pc_present = false; @@ -1284,6 +1316,7 @@ static int iommu_init_pci(struct amd_iommu *iommu) } amd_iommu_erratum_746_workaround(iommu); + amd_iommu_ats_write_check_workaround(iommu); iommu->iommu_dev = iommu_device_create(&iommu->dev->dev, iommu, amd_iommu_groups, "ivhd%d", @@ -2283,22 +2316,15 @@ u8 amd_iommu_pc_get_max_counters(u16 devid) } EXPORT_SYMBOL(amd_iommu_pc_get_max_counters); -int amd_iommu_pc_get_set_reg_val(u16 devid, u8 bank, u8 cntr, u8 fxn, +static int iommu_pc_get_set_reg_val(struct amd_iommu *iommu, + u8 bank, u8 cntr, u8 fxn, u64 *value, bool is_write) { - struct amd_iommu *iommu; u32 offset; u32 max_offset_lim; - /* Make sure the IOMMU PC resource is available */ - if (!amd_iommu_pc_present) - return -ENODEV; - - /* Locate the iommu associated with the device ID */ - iommu = amd_iommu_rlookup_table[devid]; - /* Check for valid iommu and pc register indexing */ - if (WARN_ON((iommu == NULL) || (fxn > 0x28) || (fxn & 7))) + if (WARN_ON((fxn > 0x28) || (fxn & 7))) return -ENODEV; offset = (u32)(((0x40|bank) << 12) | (cntr << 8) | fxn); @@ -2322,3 +2348,16 @@ int amd_iommu_pc_get_set_reg_val(u16 devid, u8 bank, u8 cntr, u8 fxn, return 0; } EXPORT_SYMBOL(amd_iommu_pc_get_set_reg_val); + +int amd_iommu_pc_get_set_reg_val(u16 devid, u8 bank, u8 cntr, u8 fxn, + u64 *value, bool is_write) +{ + struct amd_iommu *iommu = amd_iommu_rlookup_table[devid]; + + /* Make sure the IOMMU PC resource is available */ + if (!amd_iommu_pc_present || iommu == NULL) + return -ENODEV; + + return iommu_pc_get_set_reg_val(iommu, bank, cntr, fxn, + value, is_write); +} diff --git a/drivers/iommu/dmar.c b/drivers/iommu/dmar.c index fb092f3f11cb..8ffd7568fc91 100644 --- a/drivers/iommu/dmar.c +++ b/drivers/iommu/dmar.c @@ -329,7 +329,8 @@ static int dmar_pci_bus_notifier(struct notifier_block *nb, /* Only care about add/remove events for physical functions */ if (pdev->is_virtfn) return NOTIFY_DONE; - if (action != BUS_NOTIFY_ADD_DEVICE && action != BUS_NOTIFY_DEL_DEVICE) + if (action != BUS_NOTIFY_ADD_DEVICE && + action != BUS_NOTIFY_REMOVED_DEVICE) return NOTIFY_DONE; info = dmar_alloc_pci_notify_info(pdev, action); @@ -339,7 +340,7 @@ static int dmar_pci_bus_notifier(struct notifier_block *nb, down_write(&dmar_global_lock); if (action == BUS_NOTIFY_ADD_DEVICE) dmar_pci_bus_add_dev(info); - else if (action == BUS_NOTIFY_DEL_DEVICE) + else if (action == BUS_NOTIFY_REMOVED_DEVICE) dmar_pci_bus_del_dev(info); up_write(&dmar_global_lock); diff --git a/drivers/iommu/intel-iommu.c b/drivers/iommu/intel-iommu.c index 986a53e3eb96..a2e1b7f14df2 100644 --- a/drivers/iommu/intel-iommu.c +++ b/drivers/iommu/intel-iommu.c @@ -4367,7 +4367,7 @@ int dmar_iommu_notify_scope_dev(struct dmar_pci_notify_info *info) rmrru->devices_cnt); if(ret < 0) return ret; - } else if (info->event == BUS_NOTIFY_DEL_DEVICE) { + } else if (info->event == BUS_NOTIFY_REMOVED_DEVICE) { dmar_remove_dev_scope(info, rmrr->segment, rmrru->devices, rmrru->devices_cnt); } @@ -4387,7 +4387,7 @@ int dmar_iommu_notify_scope_dev(struct dmar_pci_notify_info *info) break; else if(ret < 0) return ret; - } else if (info->event == BUS_NOTIFY_DEL_DEVICE) { + } else if (info->event == BUS_NOTIFY_REMOVED_DEVICE) { if (dmar_remove_dev_scope(info, atsr->segment, atsru->devices, atsru->devices_cnt)) break; diff --git a/drivers/nvme/host/core.c b/drivers/nvme/host/core.c index 3cd921e6121e..03c46412fff4 100644 --- a/drivers/nvme/host/core.c +++ b/drivers/nvme/host/core.c @@ -55,8 +55,9 @@ static void nvme_free_ns(struct kref *kref) ns->disk->private_data = NULL; spin_unlock(&dev_list_lock); - nvme_put_ctrl(ns->ctrl); put_disk(ns->disk); + ida_simple_remove(&ns->ctrl->ns_ida, ns->instance); + nvme_put_ctrl(ns->ctrl); kfree(ns); } @@ -183,7 +184,7 @@ int __nvme_submit_user_cmd(struct request_queue *q, struct nvme_command *cmd, goto out_unmap; } - if (meta_buffer) { + if (meta_buffer && meta_len) { struct bio_integrity_payload *bip; meta = kmalloc(meta_len, GFP_KERNEL); @@ -373,6 +374,8 @@ static int nvme_submit_io(struct nvme_ns *ns, struct nvme_user_io __user *uio) if (copy_from_user(&io, uio, sizeof(io))) return -EFAULT; + if (io.flags) + return -EINVAL; switch (io.opcode) { case nvme_cmd_write: @@ -424,6 +427,8 @@ static int nvme_user_cmd(struct nvme_ctrl *ctrl, struct nvme_ns *ns, return -EACCES; if (copy_from_user(&cmd, ucmd, sizeof(cmd))) return -EFAULT; + if (cmd.flags) + return -EINVAL; memset(&c, 0, sizeof(c)); c.common.opcode = cmd.opcode; @@ -556,6 +561,10 @@ static int nvme_revalidate_disk(struct gendisk *disk) u16 old_ms; unsigned short bs; + if (test_bit(NVME_NS_DEAD, &ns->flags)) { + set_capacity(disk, 0); + return -ENODEV; + } if (nvme_identify_ns(ns->ctrl, ns->ns_id, &id)) { dev_warn(ns->ctrl->dev, "%s: Identify failure nvme%dn%d\n", __func__, ns->ctrl->instance, ns->ns_id); @@ -831,6 +840,23 @@ int nvme_shutdown_ctrl(struct nvme_ctrl *ctrl) return ret; } +static void nvme_set_queue_limits(struct nvme_ctrl *ctrl, + struct request_queue *q) +{ + if (ctrl->max_hw_sectors) { + u32 max_segments = + (ctrl->max_hw_sectors / (ctrl->page_size >> 9)) + 1; + + blk_queue_max_hw_sectors(q, ctrl->max_hw_sectors); + blk_queue_max_segments(q, min_t(u32, max_segments, USHRT_MAX)); + } + if (ctrl->stripe_size) + blk_queue_chunk_sectors(q, ctrl->stripe_size >> 9); + if (ctrl->vwc & NVME_CTRL_VWC_PRESENT) + blk_queue_flush(q, REQ_FLUSH | REQ_FUA); + blk_queue_virt_boundary(q, ctrl->page_size - 1); +} + /* * Initialize the cached copies of the Identify data and various controller * register in our nvme_ctrl structure. This should be called as soon as @@ -888,6 +914,8 @@ int nvme_init_identify(struct nvme_ctrl *ctrl) } } + nvme_set_queue_limits(ctrl, ctrl->admin_q); + kfree(id); return 0; } @@ -1118,9 +1146,13 @@ static void nvme_alloc_ns(struct nvme_ctrl *ctrl, unsigned nsid) if (!ns) return; + ns->instance = ida_simple_get(&ctrl->ns_ida, 1, 0, GFP_KERNEL); + if (ns->instance < 0) + goto out_free_ns; + ns->queue = blk_mq_init_queue(ctrl->tagset); if (IS_ERR(ns->queue)) - goto out_free_ns; + goto out_release_instance; queue_flag_set_unlocked(QUEUE_FLAG_NONROT, ns->queue); ns->queue->queuedata = ns; ns->ctrl = ctrl; @@ -1134,17 +1166,9 @@ static void nvme_alloc_ns(struct nvme_ctrl *ctrl, unsigned nsid) ns->disk = disk; ns->lba_shift = 9; /* set to a default value for 512 until disk is validated */ + blk_queue_logical_block_size(ns->queue, 1 << ns->lba_shift); - if (ctrl->max_hw_sectors) { - blk_queue_max_hw_sectors(ns->queue, ctrl->max_hw_sectors); - blk_queue_max_segments(ns->queue, - (ctrl->max_hw_sectors / (ctrl->page_size >> 9)) + 1); - } - if (ctrl->stripe_size) - blk_queue_chunk_sectors(ns->queue, ctrl->stripe_size >> 9); - if (ctrl->vwc & NVME_CTRL_VWC_PRESENT) - blk_queue_flush(ns->queue, REQ_FLUSH | REQ_FUA); - blk_queue_virt_boundary(ns->queue, ctrl->page_size - 1); + nvme_set_queue_limits(ctrl, ns->queue); disk->major = nvme_major; disk->first_minor = 0; @@ -1153,7 +1177,7 @@ static void nvme_alloc_ns(struct nvme_ctrl *ctrl, unsigned nsid) disk->queue = ns->queue; disk->driverfs_dev = ctrl->device; disk->flags = GENHD_FL_EXT_DEVT; - sprintf(disk->disk_name, "nvme%dn%d", ctrl->instance, nsid); + sprintf(disk->disk_name, "nvme%dn%d", ctrl->instance, ns->instance); if (nvme_revalidate_disk(ns->disk)) goto out_free_disk; @@ -1173,40 +1197,29 @@ static void nvme_alloc_ns(struct nvme_ctrl *ctrl, unsigned nsid) kfree(disk); out_free_queue: blk_cleanup_queue(ns->queue); + out_release_instance: + ida_simple_remove(&ctrl->ns_ida, ns->instance); out_free_ns: kfree(ns); } static void nvme_ns_remove(struct nvme_ns *ns) { - bool kill = nvme_io_incapable(ns->ctrl) && - !blk_queue_dying(ns->queue); - - lockdep_assert_held(&ns->ctrl->namespaces_mutex); - - if (kill) { - blk_set_queue_dying(ns->queue); + if (test_and_set_bit(NVME_NS_REMOVING, &ns->flags)) + return; - /* - * The controller was shutdown first if we got here through - * device removal. The shutdown may requeue outstanding - * requests. These need to be aborted immediately so - * del_gendisk doesn't block indefinitely for their completion. - */ - blk_mq_abort_requeue_list(ns->queue); - } if (ns->disk->flags & GENHD_FL_UP) { if (blk_get_integrity(ns->disk)) blk_integrity_unregister(ns->disk); sysfs_remove_group(&disk_to_dev(ns->disk)->kobj, &nvme_ns_attr_group); del_gendisk(ns->disk); - } - if (kill || !blk_queue_dying(ns->queue)) { blk_mq_abort_requeue_list(ns->queue); blk_cleanup_queue(ns->queue); } + mutex_lock(&ns->ctrl->namespaces_mutex); list_del_init(&ns->list); + mutex_unlock(&ns->ctrl->namespaces_mutex); nvme_put_ns(ns); } @@ -1300,10 +1313,8 @@ void nvme_remove_namespaces(struct nvme_ctrl *ctrl) { struct nvme_ns *ns, *next; - mutex_lock(&ctrl->namespaces_mutex); list_for_each_entry_safe(ns, next, &ctrl->namespaces, list) nvme_ns_remove(ns); - mutex_unlock(&ctrl->namespaces_mutex); } static DEFINE_IDA(nvme_instance_ida); @@ -1350,6 +1361,7 @@ static void nvme_free_ctrl(struct kref *kref) put_device(ctrl->device); nvme_release_instance(ctrl); + ida_destroy(&ctrl->ns_ida); ctrl->ops->free_ctrl(ctrl); } @@ -1390,6 +1402,7 @@ int nvme_init_ctrl(struct nvme_ctrl *ctrl, struct device *dev, } get_device(ctrl->device); dev_set_drvdata(ctrl->device, ctrl); + ida_init(&ctrl->ns_ida); spin_lock(&dev_list_lock); list_add_tail(&ctrl->node, &nvme_ctrl_list); @@ -1402,6 +1415,38 @@ out: return ret; } +/** + * nvme_kill_queues(): Ends all namespace queues + * @ctrl: the dead controller that needs to end + * + * Call this function when the driver determines it is unable to get the + * controller in a state capable of servicing IO. + */ +void nvme_kill_queues(struct nvme_ctrl *ctrl) +{ + struct nvme_ns *ns; + + mutex_lock(&ctrl->namespaces_mutex); + list_for_each_entry(ns, &ctrl->namespaces, list) { + if (!kref_get_unless_zero(&ns->kref)) + continue; + + /* + * Revalidating a dead namespace sets capacity to 0. This will + * end buffered writers dirtying pages that can't be synced. + */ + if (!test_and_set_bit(NVME_NS_DEAD, &ns->flags)) + revalidate_disk(ns->disk); + + blk_set_queue_dying(ns->queue); + blk_mq_abort_requeue_list(ns->queue); + blk_mq_start_stopped_hw_queues(ns->queue, true); + + nvme_put_ns(ns); + } + mutex_unlock(&ctrl->namespaces_mutex); +} + void nvme_stop_queues(struct nvme_ctrl *ctrl) { struct nvme_ns *ns; diff --git a/drivers/nvme/host/nvme.h b/drivers/nvme/host/nvme.h index 9664d07d807d..fb15ba5f5d19 100644 --- a/drivers/nvme/host/nvme.h +++ b/drivers/nvme/host/nvme.h @@ -72,6 +72,7 @@ struct nvme_ctrl { struct mutex namespaces_mutex; struct device *device; /* char device */ struct list_head node; + struct ida ns_ida; char name[12]; char serial[20]; @@ -102,6 +103,7 @@ struct nvme_ns { struct request_queue *queue; struct gendisk *disk; struct kref kref; + int instance; u8 eui[8]; u8 uuid[16]; @@ -112,6 +114,11 @@ struct nvme_ns { bool ext; u8 pi_type; int type; + unsigned long flags; + +#define NVME_NS_REMOVING 0 +#define NVME_NS_DEAD 1 + u64 mode_select_num_blocks; u32 mode_select_block_len; }; @@ -240,6 +247,7 @@ void nvme_remove_namespaces(struct nvme_ctrl *ctrl); void nvme_stop_queues(struct nvme_ctrl *ctrl); void nvme_start_queues(struct nvme_ctrl *ctrl); +void nvme_kill_queues(struct nvme_ctrl *ctrl); struct request *nvme_alloc_request(struct request_queue *q, struct nvme_command *cmd, unsigned int flags); diff --git a/drivers/nvme/host/pci.c b/drivers/nvme/host/pci.c index a128672472ec..680f5780750c 100644 --- a/drivers/nvme/host/pci.c +++ b/drivers/nvme/host/pci.c @@ -86,7 +86,6 @@ struct nvme_queue; static int nvme_reset(struct nvme_dev *dev); static void nvme_process_cq(struct nvme_queue *nvmeq); -static void nvme_remove_dead_ctrl(struct nvme_dev *dev); static void nvme_dev_disable(struct nvme_dev *dev, bool shutdown); /* @@ -120,6 +119,7 @@ struct nvme_dev { unsigned long flags; #define NVME_CTRL_RESETTING 0 +#define NVME_CTRL_REMOVING 1 struct nvme_ctrl ctrl; struct completion ioq_wait; @@ -286,6 +286,17 @@ static int nvme_init_request(void *data, struct request *req, return 0; } +static void nvme_queue_scan(struct nvme_dev *dev) +{ + /* + * Do not queue new scan work when a controller is reset during + * removal. + */ + if (test_bit(NVME_CTRL_REMOVING, &dev->flags)) + return; + queue_work(nvme_workq, &dev->scan_work); +} + static void nvme_complete_async_event(struct nvme_dev *dev, struct nvme_completion *cqe) { @@ -300,7 +311,7 @@ static void nvme_complete_async_event(struct nvme_dev *dev, switch (result & 0xff07) { case NVME_AER_NOTICE_NS_CHANGED: dev_info(dev->dev, "rescanning\n"); - queue_work(nvme_workq, &dev->scan_work); + nvme_queue_scan(dev); default: dev_warn(dev->dev, "async event result %08x\n", result); } @@ -679,7 +690,10 @@ static int nvme_queue_rq(struct blk_mq_hw_ctx *hctx, spin_lock_irq(&nvmeq->q_lock); if (unlikely(nvmeq->cq_vector < 0)) { - ret = BLK_MQ_RQ_QUEUE_BUSY; + if (ns && !test_bit(NVME_NS_DEAD, &ns->flags)) + ret = BLK_MQ_RQ_QUEUE_BUSY; + else + ret = BLK_MQ_RQ_QUEUE_ERROR; spin_unlock_irq(&nvmeq->q_lock); goto out; } @@ -1250,6 +1264,12 @@ static struct blk_mq_ops nvme_mq_ops = { static void nvme_dev_remove_admin(struct nvme_dev *dev) { if (dev->ctrl.admin_q && !blk_queue_dying(dev->ctrl.admin_q)) { + /* + * If the controller was reset during removal, it's possible + * user requests may be waiting on a stopped queue. Start the + * queue to flush these to completion. + */ + blk_mq_start_stopped_hw_queues(dev->ctrl.admin_q, true); blk_cleanup_queue(dev->ctrl.admin_q); blk_mq_free_tag_set(&dev->admin_tagset); } @@ -1690,14 +1710,14 @@ static int nvme_dev_add(struct nvme_dev *dev) return 0; dev->ctrl.tagset = &dev->tagset; } - queue_work(nvme_workq, &dev->scan_work); + nvme_queue_scan(dev); return 0; } -static int nvme_dev_map(struct nvme_dev *dev) +static int nvme_pci_enable(struct nvme_dev *dev) { u64 cap; - int bars, result = -ENOMEM; + int result = -ENOMEM; struct pci_dev *pdev = to_pci_dev(dev->dev); if (pci_enable_device_mem(pdev)) @@ -1705,24 +1725,14 @@ static int nvme_dev_map(struct nvme_dev *dev) dev->entry[0].vector = pdev->irq; pci_set_master(pdev); - bars = pci_select_bars(pdev, IORESOURCE_MEM); - if (!bars) - goto disable_pci; - - if (pci_request_selected_regions(pdev, bars, "nvme")) - goto disable_pci; if (dma_set_mask_and_coherent(dev->dev, DMA_BIT_MASK(64)) && dma_set_mask_and_coherent(dev->dev, DMA_BIT_MASK(32))) goto disable; - dev->bar = ioremap(pci_resource_start(pdev, 0), 8192); - if (!dev->bar) - goto disable; - if (readl(dev->bar + NVME_REG_CSTS) == -1) { result = -ENODEV; - goto unmap; + goto disable; } /* @@ -1732,7 +1742,7 @@ static int nvme_dev_map(struct nvme_dev *dev) if (!pdev->irq) { result = pci_enable_msix(pdev, dev->entry, 1); if (result < 0) - goto unmap; + goto disable; } cap = lo_hi_readq(dev->bar + NVME_REG_CAP); @@ -1759,18 +1769,20 @@ static int nvme_dev_map(struct nvme_dev *dev) pci_save_state(pdev); return 0; - unmap: - iounmap(dev->bar); - dev->bar = NULL; disable: - pci_release_regions(pdev); - disable_pci: pci_disable_device(pdev); return result; } static void nvme_dev_unmap(struct nvme_dev *dev) { + if (dev->bar) + iounmap(dev->bar); + pci_release_regions(to_pci_dev(dev->dev)); +} + +static void nvme_pci_disable(struct nvme_dev *dev) +{ struct pci_dev *pdev = to_pci_dev(dev->dev); if (pdev->msi_enabled) @@ -1778,12 +1790,6 @@ static void nvme_dev_unmap(struct nvme_dev *dev) else if (pdev->msix_enabled) pci_disable_msix(pdev); - if (dev->bar) { - iounmap(dev->bar); - dev->bar = NULL; - pci_release_regions(pdev); - } - if (pci_is_enabled(pdev)) { pci_disable_pcie_error_reporting(pdev); pci_disable_device(pdev); @@ -1842,7 +1848,7 @@ static void nvme_dev_disable(struct nvme_dev *dev, bool shutdown) nvme_dev_list_remove(dev); mutex_lock(&dev->shutdown_lock); - if (dev->bar) { + if (pci_is_enabled(to_pci_dev(dev->dev))) { nvme_stop_queues(&dev->ctrl); csts = readl(dev->bar + NVME_REG_CSTS); } @@ -1855,7 +1861,7 @@ static void nvme_dev_disable(struct nvme_dev *dev, bool shutdown) nvme_disable_io_queues(dev); nvme_disable_admin_queue(dev, shutdown); } - nvme_dev_unmap(dev); + nvme_pci_disable(dev); for (i = dev->queue_count - 1; i >= 0; i--) nvme_clear_queue(dev->queues[i]); @@ -1899,10 +1905,20 @@ static void nvme_pci_free_ctrl(struct nvme_ctrl *ctrl) kfree(dev); } +static void nvme_remove_dead_ctrl(struct nvme_dev *dev, int status) +{ + dev_warn(dev->dev, "Removing after probe failure status: %d\n", status); + + kref_get(&dev->ctrl.kref); + nvme_dev_disable(dev, false); + if (!schedule_work(&dev->remove_work)) + nvme_put_ctrl(&dev->ctrl); +} + static void nvme_reset_work(struct work_struct *work) { struct nvme_dev *dev = container_of(work, struct nvme_dev, reset_work); - int result; + int result = -ENODEV; if (WARN_ON(test_bit(NVME_CTRL_RESETTING, &dev->flags))) goto out; @@ -1911,37 +1927,37 @@ static void nvme_reset_work(struct work_struct *work) * If we're called to reset a live controller first shut it down before * moving on. */ - if (dev->bar) + if (dev->ctrl.ctrl_config & NVME_CC_ENABLE) nvme_dev_disable(dev, false); set_bit(NVME_CTRL_RESETTING, &dev->flags); - result = nvme_dev_map(dev); + result = nvme_pci_enable(dev); if (result) goto out; result = nvme_configure_admin_queue(dev); if (result) - goto unmap; + goto out; nvme_init_queue(dev->queues[0], 0); result = nvme_alloc_admin_tags(dev); if (result) - goto disable; + goto out; result = nvme_init_identify(&dev->ctrl); if (result) - goto free_tags; + goto out; result = nvme_setup_io_queues(dev); if (result) - goto free_tags; + goto out; dev->ctrl.event_limit = NVME_NR_AEN_COMMANDS; result = nvme_dev_list_add(dev); if (result) - goto remove; + goto out; /* * Keep the controller around but remove all namespaces if we don't have @@ -1958,19 +1974,8 @@ static void nvme_reset_work(struct work_struct *work) clear_bit(NVME_CTRL_RESETTING, &dev->flags); return; - remove: - nvme_dev_list_remove(dev); - free_tags: - nvme_dev_remove_admin(dev); - blk_put_queue(dev->ctrl.admin_q); - dev->ctrl.admin_q = NULL; - dev->queues[0]->tags = NULL; - disable: - nvme_disable_admin_queue(dev, false); - unmap: - nvme_dev_unmap(dev); out: - nvme_remove_dead_ctrl(dev); + nvme_remove_dead_ctrl(dev, result); } static void nvme_remove_dead_ctrl_work(struct work_struct *work) @@ -1978,19 +1983,12 @@ static void nvme_remove_dead_ctrl_work(struct work_struct *work) struct nvme_dev *dev = container_of(work, struct nvme_dev, remove_work); struct pci_dev *pdev = to_pci_dev(dev->dev); + nvme_kill_queues(&dev->ctrl); if (pci_get_drvdata(pdev)) pci_stop_and_remove_bus_device_locked(pdev); nvme_put_ctrl(&dev->ctrl); } -static void nvme_remove_dead_ctrl(struct nvme_dev *dev) -{ - dev_warn(dev->dev, "Removing after probe failure\n"); - kref_get(&dev->ctrl.kref); - if (!schedule_work(&dev->remove_work)) - nvme_put_ctrl(&dev->ctrl); -} - static int nvme_reset(struct nvme_dev *dev) { if (!dev->ctrl.admin_q || blk_queue_dying(dev->ctrl.admin_q)) @@ -2042,6 +2040,27 @@ static const struct nvme_ctrl_ops nvme_pci_ctrl_ops = { .free_ctrl = nvme_pci_free_ctrl, }; +static int nvme_dev_map(struct nvme_dev *dev) +{ + int bars; + struct pci_dev *pdev = to_pci_dev(dev->dev); + + bars = pci_select_bars(pdev, IORESOURCE_MEM); + if (!bars) + return -ENODEV; + if (pci_request_selected_regions(pdev, bars, "nvme")) + return -ENODEV; + + dev->bar = ioremap(pci_resource_start(pdev, 0), 8192); + if (!dev->bar) + goto release; + + return 0; + release: + pci_release_regions(pdev); + return -ENODEV; +} + static int nvme_probe(struct pci_dev *pdev, const struct pci_device_id *id) { int node, result = -ENOMEM; @@ -2066,6 +2085,10 @@ static int nvme_probe(struct pci_dev *pdev, const struct pci_device_id *id) dev->dev = get_device(&pdev->dev); pci_set_drvdata(pdev, dev); + result = nvme_dev_map(dev); + if (result) + goto free; + INIT_LIST_HEAD(&dev->node); INIT_WORK(&dev->scan_work, nvme_dev_scan); INIT_WORK(&dev->reset_work, nvme_reset_work); @@ -2089,6 +2112,7 @@ static int nvme_probe(struct pci_dev *pdev, const struct pci_device_id *id) nvme_release_prp_pools(dev); put_pci: put_device(dev->dev); + nvme_dev_unmap(dev); free: kfree(dev->queues); kfree(dev->entry); @@ -2112,10 +2136,16 @@ static void nvme_shutdown(struct pci_dev *pdev) nvme_dev_disable(dev, true); } +/* + * The driver's remove may be called on a device in a partially initialized + * state. This function must not have any dependencies on the device state in + * order to proceed. + */ static void nvme_remove(struct pci_dev *pdev) { struct nvme_dev *dev = pci_get_drvdata(pdev); + set_bit(NVME_CTRL_REMOVING, &dev->flags); pci_set_drvdata(pdev, NULL); flush_work(&dev->scan_work); nvme_remove_namespaces(&dev->ctrl); @@ -2126,6 +2156,7 @@ static void nvme_remove(struct pci_dev *pdev) nvme_free_queues(dev, 0); nvme_release_cmb(dev); nvme_release_prp_pools(dev); + nvme_dev_unmap(dev); nvme_put_ctrl(&dev->ctrl); } diff --git a/drivers/pci/host/pci-keystone-dw.c b/drivers/pci/host/pci-keystone-dw.c index ed34c9520a02..6153853ca9c3 100644 --- a/drivers/pci/host/pci-keystone-dw.c +++ b/drivers/pci/host/pci-keystone-dw.c @@ -58,11 +58,6 @@ #define to_keystone_pcie(x) container_of(x, struct keystone_pcie, pp) -static inline struct pcie_port *sys_to_pcie(struct pci_sys_data *sys) -{ - return sys->private_data; -} - static inline void update_reg_offset_bit_pos(u32 offset, u32 *reg_offset, u32 *bit_pos) { @@ -108,7 +103,7 @@ static void ks_dw_pcie_msi_irq_ack(struct irq_data *d) struct pcie_port *pp; msi = irq_data_get_msi_desc(d); - pp = sys_to_pcie(msi_desc_to_pci_sysdata(msi)); + pp = (struct pcie_port *) msi_desc_to_pci_sysdata(msi); ks_pcie = to_keystone_pcie(pp); offset = d->irq - irq_linear_revmap(pp->irq_domain, 0); update_reg_offset_bit_pos(offset, ®_offset, &bit_pos); @@ -146,7 +141,7 @@ static void ks_dw_pcie_msi_irq_mask(struct irq_data *d) u32 offset; msi = irq_data_get_msi_desc(d); - pp = sys_to_pcie(msi_desc_to_pci_sysdata(msi)); + pp = (struct pcie_port *) msi_desc_to_pci_sysdata(msi); ks_pcie = to_keystone_pcie(pp); offset = d->irq - irq_linear_revmap(pp->irq_domain, 0); @@ -167,7 +162,7 @@ static void ks_dw_pcie_msi_irq_unmask(struct irq_data *d) u32 offset; msi = irq_data_get_msi_desc(d); - pp = sys_to_pcie(msi_desc_to_pci_sysdata(msi)); + pp = (struct pcie_port *) msi_desc_to_pci_sysdata(msi); ks_pcie = to_keystone_pcie(pp); offset = d->irq - irq_linear_revmap(pp->irq_domain, 0); diff --git a/drivers/pci/host/pci-layerscape.c b/drivers/pci/host/pci-layerscape.c index 3923bed93c7e..f39961bcf7aa 100644 --- a/drivers/pci/host/pci-layerscape.c +++ b/drivers/pci/host/pci-layerscape.c @@ -77,6 +77,16 @@ static void ls_pcie_fix_class(struct ls_pcie *pcie) iowrite16(PCI_CLASS_BRIDGE_PCI, pcie->dbi + PCI_CLASS_DEVICE); } +/* Drop MSG TLP except for Vendor MSG */ +static void ls_pcie_drop_msg_tlp(struct ls_pcie *pcie) +{ + u32 val; + + val = ioread32(pcie->dbi + PCIE_STRFMR1); + val &= 0xDFFFFFFF; + iowrite32(val, pcie->dbi + PCIE_STRFMR1); +} + static int ls1021_pcie_link_up(struct pcie_port *pp) { u32 state; @@ -97,7 +107,7 @@ static int ls1021_pcie_link_up(struct pcie_port *pp) static void ls1021_pcie_host_init(struct pcie_port *pp) { struct ls_pcie *pcie = to_ls_pcie(pp); - u32 val, index[2]; + u32 index[2]; pcie->scfg = syscon_regmap_lookup_by_phandle(pp->dev->of_node, "fsl,pcie-scfg"); @@ -116,13 +126,7 @@ static void ls1021_pcie_host_init(struct pcie_port *pp) dw_pcie_setup_rc(pp); - /* - * LS1021A Workaround for internal TKT228622 - * to fix the INTx hang issue - */ - val = ioread32(pcie->dbi + PCIE_STRFMR1); - val &= 0xffff; - iowrite32(val, pcie->dbi + PCIE_STRFMR1); + ls_pcie_drop_msg_tlp(pcie); } static int ls_pcie_link_up(struct pcie_port *pp) @@ -147,6 +151,7 @@ static void ls_pcie_host_init(struct pcie_port *pp) iowrite32(1, pcie->dbi + PCIE_DBI_RO_WR_EN); ls_pcie_fix_class(pcie); ls_pcie_clear_multifunction(pcie); + ls_pcie_drop_msg_tlp(pcie); iowrite32(0, pcie->dbi + PCIE_DBI_RO_WR_EN); } diff --git a/drivers/scsi/ipr.c b/drivers/scsi/ipr.c index 3b3e0998fa6e..d6a691e27d33 100644 --- a/drivers/scsi/ipr.c +++ b/drivers/scsi/ipr.c @@ -4002,6 +4002,7 @@ static ssize_t ipr_store_update_fw(struct device *dev, struct ipr_sglist *sglist; char fname[100]; char *src; + char *endline; int result, dnld_size; if (!capable(CAP_SYS_ADMIN)) @@ -4009,6 +4010,10 @@ static ssize_t ipr_store_update_fw(struct device *dev, snprintf(fname, sizeof(fname), "%s", buf); + endline = strchr(fname, '\n'); + if (endline) + *endline = '\0'; + if (request_firmware(&fw_entry, fname, &ioa_cfg->pdev->dev)) { dev_err(&ioa_cfg->pdev->dev, "Firmware file %s not found\n", fname); return -EIO; diff --git a/drivers/scsi/scsi_lib.c b/drivers/scsi/scsi_lib.c index fa6b2c4eb7a2..8c6e31874171 100644 --- a/drivers/scsi/scsi_lib.c +++ b/drivers/scsi/scsi_lib.c @@ -1344,6 +1344,7 @@ scsi_prep_return(struct request_queue *q, struct request *req, int ret) switch (ret) { case BLKPREP_KILL: + case BLKPREP_INVALID: req->errors = DID_NO_CONNECT << 16; /* release the command and kill it */ if (req->special) { diff --git a/drivers/vfio/pci/vfio_pci.c b/drivers/vfio/pci/vfio_pci.c index 2760a7ba3f30..8c80a48e3233 100644 --- a/drivers/vfio/pci/vfio_pci.c +++ b/drivers/vfio/pci/vfio_pci.c @@ -446,7 +446,8 @@ static long vfio_pci_ioctl(void *device_data, info.num_regions = VFIO_PCI_NUM_REGIONS; info.num_irqs = VFIO_PCI_NUM_IRQS; - return copy_to_user((void __user *)arg, &info, minsz); + return copy_to_user((void __user *)arg, &info, minsz) ? + -EFAULT : 0; } else if (cmd == VFIO_DEVICE_GET_REGION_INFO) { struct pci_dev *pdev = vdev->pdev; @@ -520,7 +521,8 @@ static long vfio_pci_ioctl(void *device_data, return -EINVAL; } - return copy_to_user((void __user *)arg, &info, minsz); + return copy_to_user((void __user *)arg, &info, minsz) ? + -EFAULT : 0; } else if (cmd == VFIO_DEVICE_GET_IRQ_INFO) { struct vfio_irq_info info; @@ -555,7 +557,8 @@ static long vfio_pci_ioctl(void *device_data, else info.flags |= VFIO_IRQ_INFO_NORESIZE; - return copy_to_user((void __user *)arg, &info, minsz); + return copy_to_user((void __user *)arg, &info, minsz) ? + -EFAULT : 0; } else if (cmd == VFIO_DEVICE_SET_IRQS) { struct vfio_irq_set hdr; diff --git a/drivers/vfio/platform/vfio_platform_common.c b/drivers/vfio/platform/vfio_platform_common.c index 418cdd9ba3f4..e65b142d3422 100644 --- a/drivers/vfio/platform/vfio_platform_common.c +++ b/drivers/vfio/platform/vfio_platform_common.c @@ -219,7 +219,8 @@ static long vfio_platform_ioctl(void *device_data, info.num_regions = vdev->num_regions; info.num_irqs = vdev->num_irqs; - return copy_to_user((void __user *)arg, &info, minsz); + return copy_to_user((void __user *)arg, &info, minsz) ? + -EFAULT : 0; } else if (cmd == VFIO_DEVICE_GET_REGION_INFO) { struct vfio_region_info info; @@ -240,7 +241,8 @@ static long vfio_platform_ioctl(void *device_data, info.size = vdev->regions[info.index].size; info.flags = vdev->regions[info.index].flags; - return copy_to_user((void __user *)arg, &info, minsz); + return copy_to_user((void __user *)arg, &info, minsz) ? + -EFAULT : 0; } else if (cmd == VFIO_DEVICE_GET_IRQ_INFO) { struct vfio_irq_info info; @@ -259,7 +261,8 @@ static long vfio_platform_ioctl(void *device_data, info.flags = vdev->irqs[info.index].flags; info.count = vdev->irqs[info.index].count; - return copy_to_user((void __user *)arg, &info, minsz); + return copy_to_user((void __user *)arg, &info, minsz) ? + -EFAULT : 0; } else if (cmd == VFIO_DEVICE_SET_IRQS) { struct vfio_irq_set hdr; diff --git a/drivers/vfio/vfio_iommu_type1.c b/drivers/vfio/vfio_iommu_type1.c index 6f1ea3dddbad..75b24e93cedb 100644 --- a/drivers/vfio/vfio_iommu_type1.c +++ b/drivers/vfio/vfio_iommu_type1.c @@ -999,7 +999,8 @@ static long vfio_iommu_type1_ioctl(void *iommu_data, info.iova_pgsizes = vfio_pgsize_bitmap(iommu); - return copy_to_user((void __user *)arg, &info, minsz); + return copy_to_user((void __user *)arg, &info, minsz) ? + -EFAULT : 0; } else if (cmd == VFIO_IOMMU_MAP_DMA) { struct vfio_iommu_type1_dma_map map; @@ -1032,7 +1033,8 @@ static long vfio_iommu_type1_ioctl(void *iommu_data, if (ret) return ret; - return copy_to_user((void __user *)arg, &unmap, minsz); + return copy_to_user((void __user *)arg, &unmap, minsz) ? + -EFAULT : 0; } return -ENOTTY; diff --git a/drivers/vhost/vhost.c b/drivers/vhost/vhost.c index ad2146a9ab2d..236553e81027 100644 --- a/drivers/vhost/vhost.c +++ b/drivers/vhost/vhost.c @@ -1156,6 +1156,8 @@ int vhost_init_used(struct vhost_virtqueue *vq) { __virtio16 last_used_idx; int r; + bool is_le = vq->is_le; + if (!vq->private_data) { vq->is_le = virtio_legacy_is_little_endian(); return 0; @@ -1165,15 +1167,20 @@ int vhost_init_used(struct vhost_virtqueue *vq) r = vhost_update_used_flags(vq); if (r) - return r; + goto err; vq->signalled_used_valid = false; - if (!access_ok(VERIFY_READ, &vq->used->idx, sizeof vq->used->idx)) - return -EFAULT; + if (!access_ok(VERIFY_READ, &vq->used->idx, sizeof vq->used->idx)) { + r = -EFAULT; + goto err; + } r = __get_user(last_used_idx, &vq->used->idx); if (r) - return r; + goto err; vq->last_used_idx = vhost16_to_cpu(vq, last_used_idx); return 0; +err: + vq->is_le = is_le; + return r; } EXPORT_SYMBOL_GPL(vhost_init_used); diff --git a/drivers/video/console/fbcon.c b/drivers/video/console/fbcon.c index 92f394927f24..6e92917ba77a 100644 --- a/drivers/video/console/fbcon.c +++ b/drivers/video/console/fbcon.c @@ -709,6 +709,7 @@ static int con2fb_acquire_newinfo(struct vc_data *vc, struct fb_info *info, } if (!err) { + ops->cur_blink_jiffies = HZ / 5; info->fbcon_par = ops; if (vc) @@ -956,6 +957,7 @@ static const char *fbcon_startup(void) ops->currcon = -1; ops->graphics = 1; ops->cur_rotate = -1; + ops->cur_blink_jiffies = HZ / 5; info->fbcon_par = ops; p->con_rotate = initial_rotation; set_blitting_type(vc, info); diff --git a/drivers/virtio/virtio_pci_modern.c b/drivers/virtio/virtio_pci_modern.c index c0c11fad4611..7760fc1a2218 100644 --- a/drivers/virtio/virtio_pci_modern.c +++ b/drivers/virtio/virtio_pci_modern.c @@ -679,7 +679,7 @@ int virtio_pci_modern_probe(struct virtio_pci_device *vp_dev) pci_read_config_dword(pci_dev, notify + offsetof(struct virtio_pci_notify_cap, - cap.length), + cap.offset), ¬ify_offset); /* We don't know how many VQs we'll map, ahead of the time. diff --git a/drivers/watchdog/Kconfig b/drivers/watchdog/Kconfig index 0f6d8515ba4f..80825a7e8e48 100644 --- a/drivers/watchdog/Kconfig +++ b/drivers/watchdog/Kconfig @@ -1569,6 +1569,17 @@ config WATCHDOG_RIO machines. The watchdog timeout period is normally one minute but can be changed with a boot-time parameter. +config WATCHDOG_SUN4V + tristate "Sun4v Watchdog support" + select WATCHDOG_CORE + depends on SPARC64 + help + Say Y here to support the hypervisor watchdog capability embedded + in the SPARC sun4v architecture. + + To compile this driver as a module, choose M here. The module will + be called sun4v_wdt. + # XTENSA Architecture # Xen Architecture diff --git a/drivers/watchdog/Makefile b/drivers/watchdog/Makefile index f566753256ab..f6a6a387c6c7 100644 --- a/drivers/watchdog/Makefile +++ b/drivers/watchdog/Makefile @@ -179,6 +179,7 @@ obj-$(CONFIG_SH_WDT) += shwdt.o obj-$(CONFIG_WATCHDOG_RIO) += riowd.o obj-$(CONFIG_WATCHDOG_CP1XXX) += cpwd.o +obj-$(CONFIG_WATCHDOG_SUN4V) += sun4v_wdt.o # XTENSA Architecture diff --git a/drivers/watchdog/sun4v_wdt.c b/drivers/watchdog/sun4v_wdt.c new file mode 100644 index 000000000000..1467fe50a76f --- /dev/null +++ b/drivers/watchdog/sun4v_wdt.c @@ -0,0 +1,191 @@ +/* + * sun4v watchdog timer + * (c) Copyright 2016 Oracle Corporation + * + * Implement a simple watchdog driver using the built-in sun4v hypervisor + * watchdog support. If time expires, the hypervisor stops or bounces + * the guest domain. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + */ + +#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt + +#include <linux/errno.h> +#include <linux/init.h> +#include <linux/kernel.h> +#include <linux/module.h> +#include <linux/moduleparam.h> +#include <linux/watchdog.h> +#include <asm/hypervisor.h> +#include <asm/mdesc.h> + +#define WDT_TIMEOUT 60 +#define WDT_MAX_TIMEOUT 31536000 +#define WDT_MIN_TIMEOUT 1 +#define WDT_DEFAULT_RESOLUTION_MS 1000 /* 1 second */ + +static unsigned int timeout; +module_param(timeout, uint, 0); +MODULE_PARM_DESC(timeout, "Watchdog timeout in seconds (default=" + __MODULE_STRING(WDT_TIMEOUT) ")"); + +static bool nowayout = WATCHDOG_NOWAYOUT; +module_param(nowayout, bool, S_IRUGO); +MODULE_PARM_DESC(nowayout, "Watchdog cannot be stopped once started (default=" + __MODULE_STRING(WATCHDOG_NOWAYOUT) ")"); + +static int sun4v_wdt_stop(struct watchdog_device *wdd) +{ + sun4v_mach_set_watchdog(0, NULL); + + return 0; +} + +static int sun4v_wdt_ping(struct watchdog_device *wdd) +{ + int hverr; + + /* + * HV watchdog timer will round up the timeout + * passed in to the nearest multiple of the + * watchdog resolution in milliseconds. + */ + hverr = sun4v_mach_set_watchdog(wdd->timeout * 1000, NULL); + if (hverr == HV_EINVAL) + return -EINVAL; + + return 0; +} + +static int sun4v_wdt_set_timeout(struct watchdog_device *wdd, + unsigned int timeout) +{ + wdd->timeout = timeout; + + return 0; +} + +static const struct watchdog_info sun4v_wdt_ident = { + .options = WDIOF_SETTIMEOUT | + WDIOF_MAGICCLOSE | + WDIOF_KEEPALIVEPING, + .identity = "sun4v hypervisor watchdog", + .firmware_version = 0, +}; + +static struct watchdog_ops sun4v_wdt_ops = { + .owner = THIS_MODULE, + .start = sun4v_wdt_ping, + .stop = sun4v_wdt_stop, + .ping = sun4v_wdt_ping, + .set_timeout = sun4v_wdt_set_timeout, +}; + +static struct watchdog_device wdd = { + .info = &sun4v_wdt_ident, + .ops = &sun4v_wdt_ops, + .min_timeout = WDT_MIN_TIMEOUT, + .max_timeout = WDT_MAX_TIMEOUT, + .timeout = WDT_TIMEOUT, +}; + +static int __init sun4v_wdt_init(void) +{ + struct mdesc_handle *handle; + u64 node; + const u64 *value; + int err = 0; + unsigned long major = 1, minor = 1; + + /* + * There are 2 properties that can be set from the control + * domain for the watchdog. + * watchdog-resolution + * watchdog-max-timeout + * + * We can expect a handle to be returned otherwise something + * serious is wrong. Correct to return -ENODEV here. + */ + + handle = mdesc_grab(); + if (!handle) + return -ENODEV; + + node = mdesc_node_by_name(handle, MDESC_NODE_NULL, "platform"); + err = -ENODEV; + if (node == MDESC_NODE_NULL) + goto out_release; + + /* + * This is a safe way to validate if we are on the right + * platform. + */ + if (sun4v_hvapi_register(HV_GRP_CORE, major, &minor)) + goto out_hv_unreg; + + /* Allow value of watchdog-resolution up to 1s (default) */ + value = mdesc_get_property(handle, node, "watchdog-resolution", NULL); + err = -EINVAL; + if (value) { + if (*value == 0 || + *value > WDT_DEFAULT_RESOLUTION_MS) + goto out_hv_unreg; + } + + value = mdesc_get_property(handle, node, "watchdog-max-timeout", NULL); + if (value) { + /* + * If the property value (in ms) is smaller than + * min_timeout, return -EINVAL. + */ + if (*value < wdd.min_timeout * 1000) + goto out_hv_unreg; + + /* + * If the property value is smaller than + * default max_timeout then set watchdog max_timeout to + * the value of the property in seconds. + */ + if (*value < wdd.max_timeout * 1000) + wdd.max_timeout = *value / 1000; + } + + watchdog_init_timeout(&wdd, timeout, NULL); + + watchdog_set_nowayout(&wdd, nowayout); + + err = watchdog_register_device(&wdd); + if (err) + goto out_hv_unreg; + + pr_info("initialized (timeout=%ds, nowayout=%d)\n", + wdd.timeout, nowayout); + + mdesc_release(handle); + + return 0; + +out_hv_unreg: + sun4v_hvapi_unregister(HV_GRP_CORE); + +out_release: + mdesc_release(handle); + return err; +} + +static void __exit sun4v_wdt_exit(void) +{ + sun4v_hvapi_unregister(HV_GRP_CORE); + watchdog_unregister_device(&wdd); +} + +module_init(sun4v_wdt_init); +module_exit(sun4v_wdt_exit); + +MODULE_AUTHOR("Wim Coekaerts <wim.coekaerts@oracle.com>"); +MODULE_DESCRIPTION("sun4v watchdog driver"); +MODULE_LICENSE("GPL"); diff --git a/fs/btrfs/root-tree.c b/fs/btrfs/root-tree.c index 7cf8509deda7..2c849b08a91b 100644 --- a/fs/btrfs/root-tree.c +++ b/fs/btrfs/root-tree.c @@ -310,8 +310,16 @@ int btrfs_find_orphan_roots(struct btrfs_root *tree_root) set_bit(BTRFS_ROOT_ORPHAN_ITEM_INSERTED, &root->state); err = btrfs_insert_fs_root(root->fs_info, root); + /* + * The root might have been inserted already, as before we look + * for orphan roots, log replay might have happened, which + * triggers a transaction commit and qgroup accounting, which + * in turn reads and inserts fs roots while doing backref + * walking. + */ + if (err == -EEXIST) + err = 0; if (err) { - BUG_ON(err == -EEXIST); btrfs_free_fs_root(root); break; } diff --git a/fs/cifs/cifsfs.c b/fs/cifs/cifsfs.c index c48ca13673e3..2eea40353e60 100644 --- a/fs/cifs/cifsfs.c +++ b/fs/cifs/cifsfs.c @@ -1013,7 +1013,6 @@ const struct file_operations cifs_file_strict_ops = { .llseek = cifs_llseek, .unlocked_ioctl = cifs_ioctl, .clone_file_range = cifs_clone_file_range, - .clone_file_range = cifs_clone_file_range, .setlease = cifs_setlease, .fallocate = cifs_fallocate, }; diff --git a/fs/cifs/cifsfs.h b/fs/cifs/cifsfs.h index 68c4547528c4..83aac8ba50b0 100644 --- a/fs/cifs/cifsfs.h +++ b/fs/cifs/cifsfs.h @@ -31,19 +31,15 @@ * so that it will fit. We use hash_64 to convert the value to 31 bits, and * then add 1, to ensure that we don't end up with a 0 as the value. */ -#if BITS_PER_LONG == 64 static inline ino_t cifs_uniqueid_to_ino_t(u64 fileid) { + if ((sizeof(ino_t)) < (sizeof(u64))) + return (ino_t)hash_64(fileid, (sizeof(ino_t) * 8) - 1) + 1; + return (ino_t)fileid; + } -#else -static inline ino_t -cifs_uniqueid_to_ino_t(u64 fileid) -{ - return (ino_t)hash_64(fileid, (sizeof(ino_t) * 8) - 1) + 1; -} -#endif extern struct file_system_type cifs_fs_type; extern const struct address_space_operations cifs_addr_ops; diff --git a/fs/cifs/cifssmb.c b/fs/cifs/cifssmb.c index 90b4f9f7de66..76fcb50295a3 100644 --- a/fs/cifs/cifssmb.c +++ b/fs/cifs/cifssmb.c @@ -1396,11 +1396,10 @@ openRetry: * current bigbuf. */ static int -cifs_readv_discard(struct TCP_Server_Info *server, struct mid_q_entry *mid) +discard_remaining_data(struct TCP_Server_Info *server) { unsigned int rfclen = get_rfc1002_length(server->smallbuf); int remaining = rfclen + 4 - server->total_read; - struct cifs_readdata *rdata = mid->callback_data; while (remaining > 0) { int length; @@ -1414,10 +1413,20 @@ cifs_readv_discard(struct TCP_Server_Info *server, struct mid_q_entry *mid) remaining -= length; } - dequeue_mid(mid, rdata->result); return 0; } +static int +cifs_readv_discard(struct TCP_Server_Info *server, struct mid_q_entry *mid) +{ + int length; + struct cifs_readdata *rdata = mid->callback_data; + + length = discard_remaining_data(server); + dequeue_mid(mid, rdata->result); + return length; +} + int cifs_readv_receive(struct TCP_Server_Info *server, struct mid_q_entry *mid) { @@ -1446,6 +1455,12 @@ cifs_readv_receive(struct TCP_Server_Info *server, struct mid_q_entry *mid) return length; server->total_read += length; + if (server->ops->is_status_pending && + server->ops->is_status_pending(buf, server, 0)) { + discard_remaining_data(server); + return -1; + } + /* Was the SMB read successful? */ rdata->result = server->ops->map_error(buf, false); if (rdata->result != 0) { diff --git a/fs/cifs/smb2pdu.c b/fs/cifs/smb2pdu.c index 10f8d5cf5681..42e1f440eb1e 100644 --- a/fs/cifs/smb2pdu.c +++ b/fs/cifs/smb2pdu.c @@ -1106,21 +1106,25 @@ parse_lease_state(struct TCP_Server_Info *server, struct smb2_create_rsp *rsp, { char *data_offset; struct create_context *cc; - unsigned int next = 0; + unsigned int next; + unsigned int remaining; char *name; data_offset = (char *)rsp + 4 + le32_to_cpu(rsp->CreateContextsOffset); + remaining = le32_to_cpu(rsp->CreateContextsLength); cc = (struct create_context *)data_offset; - do { - cc = (struct create_context *)((char *)cc + next); + while (remaining >= sizeof(struct create_context)) { name = le16_to_cpu(cc->NameOffset) + (char *)cc; - if (le16_to_cpu(cc->NameLength) != 4 || - strncmp(name, "RqLs", 4)) { - next = le32_to_cpu(cc->Next); - continue; - } - return server->ops->parse_lease_buf(cc, epoch); - } while (next != 0); + if (le16_to_cpu(cc->NameLength) == 4 && + strncmp(name, "RqLs", 4) == 0) + return server->ops->parse_lease_buf(cc, epoch); + + next = le32_to_cpu(cc->Next); + if (!next) + break; + remaining -= next; + cc = (struct create_context *)((char *)cc + next); + } return 0; } diff --git a/fs/dcache.c b/fs/dcache.c index 92d5140de851..2398f9f94337 100644 --- a/fs/dcache.c +++ b/fs/dcache.c @@ -269,9 +269,6 @@ static inline int dname_external(const struct dentry *dentry) return dentry->d_name.name != dentry->d_iname; } -/* - * Make sure other CPUs see the inode attached before the type is set. - */ static inline void __d_set_inode_and_type(struct dentry *dentry, struct inode *inode, unsigned type_flags) @@ -279,28 +276,18 @@ static inline void __d_set_inode_and_type(struct dentry *dentry, unsigned flags; dentry->d_inode = inode; - smp_wmb(); flags = READ_ONCE(dentry->d_flags); flags &= ~(DCACHE_ENTRY_TYPE | DCACHE_FALLTHRU); flags |= type_flags; WRITE_ONCE(dentry->d_flags, flags); } -/* - * Ideally, we want to make sure that other CPUs see the flags cleared before - * the inode is detached, but this is really a violation of RCU principles - * since the ordering suggests we should always set inode before flags. - * - * We should instead replace or discard the entire dentry - but that sucks - * performancewise on mass deletion/rename. - */ static inline void __d_clear_type_and_inode(struct dentry *dentry) { unsigned flags = READ_ONCE(dentry->d_flags); flags &= ~(DCACHE_ENTRY_TYPE | DCACHE_FALLTHRU); WRITE_ONCE(dentry->d_flags, flags); - smp_wmb(); dentry->d_inode = NULL; } @@ -370,9 +357,11 @@ static void dentry_unlink_inode(struct dentry * dentry) __releases(dentry->d_inode->i_lock) { struct inode *inode = dentry->d_inode; + + raw_write_seqcount_begin(&dentry->d_seq); __d_clear_type_and_inode(dentry); hlist_del_init(&dentry->d_u.d_alias); - dentry_rcuwalk_invalidate(dentry); + raw_write_seqcount_end(&dentry->d_seq); spin_unlock(&dentry->d_lock); spin_unlock(&inode->i_lock); if (!inode->i_nlink) @@ -1758,8 +1747,9 @@ static void __d_instantiate(struct dentry *dentry, struct inode *inode) spin_lock(&dentry->d_lock); if (inode) hlist_add_head(&dentry->d_u.d_alias, &inode->i_dentry); + raw_write_seqcount_begin(&dentry->d_seq); __d_set_inode_and_type(dentry, inode, add_flags); - dentry_rcuwalk_invalidate(dentry); + raw_write_seqcount_end(&dentry->d_seq); spin_unlock(&dentry->d_lock); fsnotify_d_instantiate(dentry, inode); } diff --git a/fs/fs-writeback.c b/fs/fs-writeback.c index 1f76d8950a57..5c46ed9f3e14 100644 --- a/fs/fs-writeback.c +++ b/fs/fs-writeback.c @@ -223,6 +223,9 @@ static void wb_wait_for_completion(struct backing_dev_info *bdi, #define WB_FRN_HIST_MAX_SLOTS (WB_FRN_HIST_THR_SLOTS / 2 + 1) /* one round can affect upto 5 slots */ +static atomic_t isw_nr_in_flight = ATOMIC_INIT(0); +static struct workqueue_struct *isw_wq; + void __inode_attach_wb(struct inode *inode, struct page *page) { struct backing_dev_info *bdi = inode_to_bdi(inode); @@ -317,7 +320,6 @@ static void inode_switch_wbs_work_fn(struct work_struct *work) struct inode_switch_wbs_context *isw = container_of(work, struct inode_switch_wbs_context, work); struct inode *inode = isw->inode; - struct super_block *sb = inode->i_sb; struct address_space *mapping = inode->i_mapping; struct bdi_writeback *old_wb = inode->i_wb; struct bdi_writeback *new_wb = isw->new_wb; @@ -424,8 +426,9 @@ skip_switch: wb_put(new_wb); iput(inode); - deactivate_super(sb); kfree(isw); + + atomic_dec(&isw_nr_in_flight); } static void inode_switch_wbs_rcu_fn(struct rcu_head *rcu_head) @@ -435,7 +438,7 @@ static void inode_switch_wbs_rcu_fn(struct rcu_head *rcu_head) /* needs to grab bh-unsafe locks, bounce to work item */ INIT_WORK(&isw->work, inode_switch_wbs_work_fn); - schedule_work(&isw->work); + queue_work(isw_wq, &isw->work); } /** @@ -471,20 +474,20 @@ static void inode_switch_wbs(struct inode *inode, int new_wb_id) /* while holding I_WB_SWITCH, no one else can update the association */ spin_lock(&inode->i_lock); - - if (inode->i_state & (I_WB_SWITCH | I_FREEING) || - inode_to_wb(inode) == isw->new_wb) - goto out_unlock; - - if (!atomic_inc_not_zero(&inode->i_sb->s_active)) - goto out_unlock; - + if (!(inode->i_sb->s_flags & MS_ACTIVE) || + inode->i_state & (I_WB_SWITCH | I_FREEING) || + inode_to_wb(inode) == isw->new_wb) { + spin_unlock(&inode->i_lock); + goto out_free; + } inode->i_state |= I_WB_SWITCH; spin_unlock(&inode->i_lock); ihold(inode); isw->inode = inode; + atomic_inc(&isw_nr_in_flight); + /* * In addition to synchronizing among switchers, I_WB_SWITCH tells * the RCU protected stat update paths to grab the mapping's @@ -494,8 +497,6 @@ static void inode_switch_wbs(struct inode *inode, int new_wb_id) call_rcu(&isw->rcu_head, inode_switch_wbs_rcu_fn); return; -out_unlock: - spin_unlock(&inode->i_lock); out_free: if (isw->new_wb) wb_put(isw->new_wb); @@ -847,6 +848,33 @@ restart: wb_put(last_wb); } +/** + * cgroup_writeback_umount - flush inode wb switches for umount + * + * This function is called when a super_block is about to be destroyed and + * flushes in-flight inode wb switches. An inode wb switch goes through + * RCU and then workqueue, so the two need to be flushed in order to ensure + * that all previously scheduled switches are finished. As wb switches are + * rare occurrences and synchronize_rcu() can take a while, perform + * flushing iff wb switches are in flight. + */ +void cgroup_writeback_umount(void) +{ + if (atomic_read(&isw_nr_in_flight)) { + synchronize_rcu(); + flush_workqueue(isw_wq); + } +} + +static int __init cgroup_writeback_init(void) +{ + isw_wq = alloc_workqueue("inode_switch_wbs", 0, 0); + if (!isw_wq) + return -ENOMEM; + return 0; +} +fs_initcall(cgroup_writeback_init); + #else /* CONFIG_CGROUP_WRITEBACK */ static struct bdi_writeback * diff --git a/fs/jffs2/README.Locking b/fs/jffs2/README.Locking index 3ea36554107f..8918ac905a3b 100644 --- a/fs/jffs2/README.Locking +++ b/fs/jffs2/README.Locking @@ -2,10 +2,6 @@ JFFS2 LOCKING DOCUMENTATION --------------------------- -At least theoretically, JFFS2 does not require the Big Kernel Lock -(BKL), which was always helpfully obtained for it by Linux 2.4 VFS -code. It has its own locking, as described below. - This document attempts to describe the existing locking rules for JFFS2. It is not expected to remain perfectly up to date, but ought to be fairly close. @@ -69,6 +65,7 @@ Ordering constraints: any f->sem held. 2. Never attempt to lock two file mutexes in one thread. No ordering rules have been made for doing so. + 3. Never lock a page cache page with f->sem held. erase_completion_lock spinlock diff --git a/fs/jffs2/build.c b/fs/jffs2/build.c index 0ae91ad6df2d..b288c8ae1236 100644 --- a/fs/jffs2/build.c +++ b/fs/jffs2/build.c @@ -50,7 +50,8 @@ next_inode(int *i, struct jffs2_inode_cache *ic, struct jffs2_sb_info *c) static void jffs2_build_inode_pass1(struct jffs2_sb_info *c, - struct jffs2_inode_cache *ic) + struct jffs2_inode_cache *ic, + int *dir_hardlinks) { struct jffs2_full_dirent *fd; @@ -69,19 +70,21 @@ static void jffs2_build_inode_pass1(struct jffs2_sb_info *c, dbg_fsbuild("child \"%s\" (ino #%u) of dir ino #%u doesn't exist!\n", fd->name, fd->ino, ic->ino); jffs2_mark_node_obsolete(c, fd->raw); + /* Clear the ic/raw union so it doesn't cause problems later. */ + fd->ic = NULL; continue; } + /* From this point, fd->raw is no longer used so we can set fd->ic */ + fd->ic = child_ic; + child_ic->pino_nlink++; + /* If we appear (at this stage) to have hard-linked directories, + * set a flag to trigger a scan later */ if (fd->type == DT_DIR) { - if (child_ic->pino_nlink) { - JFFS2_ERROR("child dir \"%s\" (ino #%u) of dir ino #%u appears to be a hard link\n", - fd->name, fd->ino, ic->ino); - /* TODO: What do we do about it? */ - } else { - child_ic->pino_nlink = ic->ino; - } - } else - child_ic->pino_nlink++; + child_ic->flags |= INO_FLAGS_IS_DIR; + if (child_ic->pino_nlink > 1) + *dir_hardlinks = 1; + } dbg_fsbuild("increased nlink for child \"%s\" (ino #%u)\n", fd->name, fd->ino); /* Can't free scan_dents so far. We might need them in pass 2 */ @@ -95,8 +98,7 @@ static void jffs2_build_inode_pass1(struct jffs2_sb_info *c, */ static int jffs2_build_filesystem(struct jffs2_sb_info *c) { - int ret; - int i; + int ret, i, dir_hardlinks = 0; struct jffs2_inode_cache *ic; struct jffs2_full_dirent *fd; struct jffs2_full_dirent *dead_fds = NULL; @@ -120,7 +122,7 @@ static int jffs2_build_filesystem(struct jffs2_sb_info *c) /* Now scan the directory tree, increasing nlink according to every dirent found. */ for_each_inode(i, c, ic) { if (ic->scan_dents) { - jffs2_build_inode_pass1(c, ic); + jffs2_build_inode_pass1(c, ic, &dir_hardlinks); cond_resched(); } } @@ -156,6 +158,20 @@ static int jffs2_build_filesystem(struct jffs2_sb_info *c) } dbg_fsbuild("pass 2a complete\n"); + + if (dir_hardlinks) { + /* If we detected directory hardlinks earlier, *hopefully* + * they are gone now because some of the links were from + * dead directories which still had some old dirents lying + * around and not yet garbage-collected, but which have + * been discarded above. So clear the pino_nlink field + * in each directory, so that the final scan below can + * print appropriate warnings. */ + for_each_inode(i, c, ic) { + if (ic->flags & INO_FLAGS_IS_DIR) + ic->pino_nlink = 0; + } + } dbg_fsbuild("freeing temporary data structures\n"); /* Finally, we can scan again and free the dirent structs */ @@ -163,6 +179,33 @@ static int jffs2_build_filesystem(struct jffs2_sb_info *c) while(ic->scan_dents) { fd = ic->scan_dents; ic->scan_dents = fd->next; + /* We do use the pino_nlink field to count nlink of + * directories during fs build, so set it to the + * parent ino# now. Now that there's hopefully only + * one. */ + if (fd->type == DT_DIR) { + if (!fd->ic) { + /* We'll have complained about it and marked the coresponding + raw node obsolete already. Just skip it. */ + continue; + } + + /* We *have* to have set this in jffs2_build_inode_pass1() */ + BUG_ON(!(fd->ic->flags & INO_FLAGS_IS_DIR)); + + /* We clear ic->pino_nlink ∀ directories' ic *only* if dir_hardlinks + * is set. Otherwise, we know this should never trigger anyway, so + * we don't do the check. And ic->pino_nlink still contains the nlink + * value (which is 1). */ + if (dir_hardlinks && fd->ic->pino_nlink) { + JFFS2_ERROR("child dir \"%s\" (ino #%u) of dir ino #%u is also hard linked from dir ino #%u\n", + fd->name, fd->ino, ic->ino, fd->ic->pino_nlink); + /* Should we unlink it from its previous parent? */ + } + + /* For directories, ic->pino_nlink holds that parent inode # */ + fd->ic->pino_nlink = ic->ino; + } jffs2_free_full_dirent(fd); } ic->scan_dents = NULL; @@ -241,11 +284,7 @@ static void jffs2_build_remove_unlinked_inode(struct jffs2_sb_info *c, /* Reduce nlink of the child. If it's now zero, stick it on the dead_fds list to be cleaned up later. Else just free the fd */ - - if (fd->type == DT_DIR) - child_ic->pino_nlink = 0; - else - child_ic->pino_nlink--; + child_ic->pino_nlink--; if (!child_ic->pino_nlink) { dbg_fsbuild("inode #%u (\"%s\") now has no links; adding to dead_fds list.\n", diff --git a/fs/jffs2/file.c b/fs/jffs2/file.c index c5ac5944bc1b..cad86bac3453 100644 --- a/fs/jffs2/file.c +++ b/fs/jffs2/file.c @@ -137,39 +137,33 @@ static int jffs2_write_begin(struct file *filp, struct address_space *mapping, struct page *pg; struct inode *inode = mapping->host; struct jffs2_inode_info *f = JFFS2_INODE_INFO(inode); - struct jffs2_sb_info *c = JFFS2_SB_INFO(inode->i_sb); - struct jffs2_raw_inode ri; - uint32_t alloc_len = 0; pgoff_t index = pos >> PAGE_CACHE_SHIFT; uint32_t pageofs = index << PAGE_CACHE_SHIFT; int ret = 0; - jffs2_dbg(1, "%s()\n", __func__); - - if (pageofs > inode->i_size) { - ret = jffs2_reserve_space(c, sizeof(ri), &alloc_len, - ALLOC_NORMAL, JFFS2_SUMMARY_INODE_SIZE); - if (ret) - return ret; - } - - mutex_lock(&f->sem); pg = grab_cache_page_write_begin(mapping, index, flags); - if (!pg) { - if (alloc_len) - jffs2_complete_reservation(c); - mutex_unlock(&f->sem); + if (!pg) return -ENOMEM; - } *pagep = pg; - if (alloc_len) { + jffs2_dbg(1, "%s()\n", __func__); + + if (pageofs > inode->i_size) { /* Make new hole frag from old EOF to new page */ + struct jffs2_sb_info *c = JFFS2_SB_INFO(inode->i_sb); + struct jffs2_raw_inode ri; struct jffs2_full_dnode *fn; + uint32_t alloc_len; jffs2_dbg(1, "Writing new hole frag 0x%x-0x%x between current EOF and new page\n", (unsigned int)inode->i_size, pageofs); + ret = jffs2_reserve_space(c, sizeof(ri), &alloc_len, + ALLOC_NORMAL, JFFS2_SUMMARY_INODE_SIZE); + if (ret) + goto out_page; + + mutex_lock(&f->sem); memset(&ri, 0, sizeof(ri)); ri.magic = cpu_to_je16(JFFS2_MAGIC_BITMASK); @@ -196,6 +190,7 @@ static int jffs2_write_begin(struct file *filp, struct address_space *mapping, if (IS_ERR(fn)) { ret = PTR_ERR(fn); jffs2_complete_reservation(c); + mutex_unlock(&f->sem); goto out_page; } ret = jffs2_add_full_dnode_to_inode(c, f, fn); @@ -210,10 +205,12 @@ static int jffs2_write_begin(struct file *filp, struct address_space *mapping, jffs2_mark_node_obsolete(c, fn->raw); jffs2_free_full_dnode(fn); jffs2_complete_reservation(c); + mutex_unlock(&f->sem); goto out_page; } jffs2_complete_reservation(c); inode->i_size = pageofs; + mutex_unlock(&f->sem); } /* @@ -222,18 +219,18 @@ static int jffs2_write_begin(struct file *filp, struct address_space *mapping, * case of a short-copy. */ if (!PageUptodate(pg)) { + mutex_lock(&f->sem); ret = jffs2_do_readpage_nolock(inode, pg); + mutex_unlock(&f->sem); if (ret) goto out_page; } - mutex_unlock(&f->sem); jffs2_dbg(1, "end write_begin(). pg->flags %lx\n", pg->flags); return ret; out_page: unlock_page(pg); page_cache_release(pg); - mutex_unlock(&f->sem); return ret; } diff --git a/fs/jffs2/gc.c b/fs/jffs2/gc.c index 5a2dec2b064c..95d5880a63ee 100644 --- a/fs/jffs2/gc.c +++ b/fs/jffs2/gc.c @@ -1296,14 +1296,17 @@ static int jffs2_garbage_collect_dnode(struct jffs2_sb_info *c, struct jffs2_era BUG_ON(start > orig_start); } - /* First, use readpage() to read the appropriate page into the page cache */ - /* Q: What happens if we actually try to GC the _same_ page for which commit_write() - * triggered garbage collection in the first place? - * A: I _think_ it's OK. read_cache_page shouldn't deadlock, we'll write out the - * page OK. We'll actually write it out again in commit_write, which is a little - * suboptimal, but at least we're correct. - */ + /* The rules state that we must obtain the page lock *before* f->sem, so + * drop f->sem temporarily. Since we also hold c->alloc_sem, nothing's + * actually going to *change* so we're safe; we only allow reading. + * + * It is important to note that jffs2_write_begin() will ensure that its + * page is marked Uptodate before allocating space. That means that if we + * end up here trying to GC the *same* page that jffs2_write_begin() is + * trying to write out, read_cache_page() will not deadlock. */ + mutex_unlock(&f->sem); pg_ptr = jffs2_gc_fetch_page(c, f, start, &pg); + mutex_lock(&f->sem); if (IS_ERR(pg_ptr)) { pr_warn("read_cache_page() returned error: %ld\n", diff --git a/fs/jffs2/nodelist.h b/fs/jffs2/nodelist.h index fa35ff79ab35..0637271f3770 100644 --- a/fs/jffs2/nodelist.h +++ b/fs/jffs2/nodelist.h @@ -194,6 +194,7 @@ struct jffs2_inode_cache { #define INO_STATE_CLEARING 6 /* In clear_inode() */ #define INO_FLAGS_XATTR_CHECKED 0x01 /* has no duplicate xattr_ref */ +#define INO_FLAGS_IS_DIR 0x02 /* is a directory */ #define RAWNODE_CLASS_INODE_CACHE 0 #define RAWNODE_CLASS_XATTR_DATUM 1 @@ -249,7 +250,10 @@ struct jffs2_readinode_info struct jffs2_full_dirent { - struct jffs2_raw_node_ref *raw; + union { + struct jffs2_raw_node_ref *raw; + struct jffs2_inode_cache *ic; /* Just during part of build */ + }; struct jffs2_full_dirent *next; uint32_t version; uint32_t ino; /* == zero for unlink */ diff --git a/fs/super.c b/fs/super.c index 1182af8fd5ff..74914b1bae70 100644 --- a/fs/super.c +++ b/fs/super.c @@ -415,6 +415,7 @@ void generic_shutdown_super(struct super_block *sb) sb->s_flags &= ~MS_ACTIVE; fsnotify_unmount_inodes(sb); + cgroup_writeback_umount(); evict_inodes(sb); diff --git a/fs/userfaultfd.c b/fs/userfaultfd.c index 50311703135b..66cdb44616d5 100644 --- a/fs/userfaultfd.c +++ b/fs/userfaultfd.c @@ -287,6 +287,12 @@ int handle_userfault(struct vm_area_struct *vma, unsigned long address, goto out; /* + * We don't do userfault handling for the final child pid update. + */ + if (current->flags & PF_EXITING) + goto out; + + /* * Check that we can return VM_FAULT_RETRY. * * NOTE: it should become possible to return VM_FAULT_RETRY diff --git a/include/linux/ata.h b/include/linux/ata.h index d2992bfa1706..c1a2f345cbe6 100644 --- a/include/linux/ata.h +++ b/include/linux/ata.h @@ -487,8 +487,8 @@ enum ata_tf_protocols { }; enum ata_ioctls { - ATA_IOC_GET_IO32 = 0x309, - ATA_IOC_SET_IO32 = 0x324, + ATA_IOC_GET_IO32 = 0x309, /* HDIO_GET_32BIT */ + ATA_IOC_SET_IO32 = 0x324, /* HDIO_SET_32BIT */ }; /* core structures */ diff --git a/include/linux/bio.h b/include/linux/bio.h index 5349e6816cbb..cb6888824108 100644 --- a/include/linux/bio.h +++ b/include/linux/bio.h @@ -310,6 +310,43 @@ static inline void bio_clear_flag(struct bio *bio, unsigned int bit) bio->bi_flags &= ~(1U << bit); } +static inline void bio_get_first_bvec(struct bio *bio, struct bio_vec *bv) +{ + *bv = bio_iovec(bio); +} + +static inline void bio_get_last_bvec(struct bio *bio, struct bio_vec *bv) +{ + struct bvec_iter iter = bio->bi_iter; + int idx; + + if (!bio_flagged(bio, BIO_CLONED)) { + *bv = bio->bi_io_vec[bio->bi_vcnt - 1]; + return; + } + + if (unlikely(!bio_multiple_segments(bio))) { + *bv = bio_iovec(bio); + return; + } + + bio_advance_iter(bio, &iter, iter.bi_size); + + if (!iter.bi_bvec_done) + idx = iter.bi_idx - 1; + else /* in the middle of bvec */ + idx = iter.bi_idx; + + *bv = bio->bi_io_vec[idx]; + + /* + * iter.bi_bvec_done records actual length of the last bvec + * if this bio ends in the middle of one io vector + */ + if (iter.bi_bvec_done) + bv->bv_len = iter.bi_bvec_done; +} + enum bip_flags { BIP_BLOCK_INTEGRITY = 1 << 0, /* block layer owns integrity data */ BIP_MAPPED_INTEGRITY = 1 << 1, /* ref tag has been remapped */ diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index 4571ef1a12a9..413c84fbc4ed 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -895,7 +895,7 @@ static inline unsigned int blk_rq_get_max_sectors(struct request *rq) { struct request_queue *q = rq->q; - if (unlikely(rq->cmd_type == REQ_TYPE_BLOCK_PC)) + if (unlikely(rq->cmd_type != REQ_TYPE_FS)) return q->limits.max_hw_sectors; if (!q->limits.chunk_sectors || (rq->cmd_flags & REQ_DISCARD)) @@ -1372,6 +1372,13 @@ static inline void put_dev_sector(Sector p) page_cache_release(p.v); } +static inline bool __bvec_gap_to_prev(struct request_queue *q, + struct bio_vec *bprv, unsigned int offset) +{ + return offset || + ((bprv->bv_offset + bprv->bv_len) & queue_virt_boundary(q)); +} + /* * Check if adding a bio_vec after bprv with offset would create a gap in * the SG list. Most drivers don't care about this, but some do. @@ -1381,18 +1388,22 @@ static inline bool bvec_gap_to_prev(struct request_queue *q, { if (!queue_virt_boundary(q)) return false; - return offset || - ((bprv->bv_offset + bprv->bv_len) & queue_virt_boundary(q)); + return __bvec_gap_to_prev(q, bprv, offset); } static inline bool bio_will_gap(struct request_queue *q, struct bio *prev, struct bio *next) { - if (!bio_has_data(prev)) - return false; + if (bio_has_data(prev) && queue_virt_boundary(q)) { + struct bio_vec pb, nb; + + bio_get_last_bvec(prev, &pb); + bio_get_first_bvec(next, &nb); - return bvec_gap_to_prev(q, &prev->bi_io_vec[prev->bi_vcnt - 1], - next->bi_io_vec[0].bv_offset); + return __bvec_gap_to_prev(q, &pb, nb.bv_offset); + } + + return false; } static inline bool req_gap_back_merge(struct request *req, struct bio *bio) diff --git a/include/linux/dcache.h b/include/linux/dcache.h index 7781ce110503..c4b5f4b3f8f8 100644 --- a/include/linux/dcache.h +++ b/include/linux/dcache.h @@ -409,9 +409,7 @@ static inline bool d_mountpoint(const struct dentry *dentry) */ static inline unsigned __d_entry_type(const struct dentry *dentry) { - unsigned type = READ_ONCE(dentry->d_flags); - smp_rmb(); - return type & DCACHE_ENTRY_TYPE; + return dentry->d_flags & DCACHE_ENTRY_TYPE; } static inline bool d_is_miss(const struct dentry *dentry) diff --git a/include/linux/libata.h b/include/linux/libata.h index bec2abbd7ab2..2c4ebef79d0c 100644 --- a/include/linux/libata.h +++ b/include/linux/libata.h @@ -720,7 +720,7 @@ struct ata_device { union { u16 id[ATA_ID_WORDS]; /* IDENTIFY xxx DEVICE data */ u32 gscr[SATA_PMP_GSCR_DWORDS]; /* PMP GSCR block */ - }; + } ____cacheline_aligned; /* DEVSLP Timing Variables from Identify Device Data Log */ u8 devslp_timing[ATA_LOG_DEVSLP_SIZE]; diff --git a/include/linux/trace_events.h b/include/linux/trace_events.h index 429fdfc3baf5..925730bc9fc1 100644 --- a/include/linux/trace_events.h +++ b/include/linux/trace_events.h @@ -568,6 +568,8 @@ enum { FILTER_DYN_STRING, FILTER_PTR_STRING, FILTER_TRACE_FN, + FILTER_COMM, + FILTER_CPU, }; extern int trace_event_raw_init(struct trace_event_call *call); diff --git a/include/linux/writeback.h b/include/linux/writeback.h index b333c945e571..d0b5ca5d4e08 100644 --- a/include/linux/writeback.h +++ b/include/linux/writeback.h @@ -198,6 +198,7 @@ void wbc_attach_and_unlock_inode(struct writeback_control *wbc, void wbc_detach_inode(struct writeback_control *wbc); void wbc_account_io(struct writeback_control *wbc, struct page *page, size_t bytes); +void cgroup_writeback_umount(void); /** * inode_attach_wb - associate an inode with its wb @@ -301,6 +302,10 @@ static inline void wbc_account_io(struct writeback_control *wbc, { } +static inline void cgroup_writeback_umount(void) +{ +} + #endif /* CONFIG_CGROUP_WRITEBACK */ /* diff --git a/kernel/trace/trace_events.c b/kernel/trace/trace_events.c index ab09829d3b97..05ddc0820771 100644 --- a/kernel/trace/trace_events.c +++ b/kernel/trace/trace_events.c @@ -97,16 +97,16 @@ trace_find_event_field(struct trace_event_call *call, char *name) struct ftrace_event_field *field; struct list_head *head; - field = __find_event_field(&ftrace_generic_fields, name); + head = trace_get_fields(call); + field = __find_event_field(head, name); if (field) return field; - field = __find_event_field(&ftrace_common_fields, name); + field = __find_event_field(&ftrace_generic_fields, name); if (field) return field; - head = trace_get_fields(call); - return __find_event_field(head, name); + return __find_event_field(&ftrace_common_fields, name); } static int __trace_define_field(struct list_head *head, const char *type, @@ -171,8 +171,10 @@ static int trace_define_generic_fields(void) { int ret; - __generic_field(int, cpu, FILTER_OTHER); - __generic_field(char *, comm, FILTER_PTR_STRING); + __generic_field(int, CPU, FILTER_CPU); + __generic_field(int, cpu, FILTER_CPU); + __generic_field(char *, COMM, FILTER_COMM); + __generic_field(char *, comm, FILTER_COMM); return ret; } diff --git a/kernel/trace/trace_events_filter.c b/kernel/trace/trace_events_filter.c index f93a219b18da..6816302542b2 100644 --- a/kernel/trace/trace_events_filter.c +++ b/kernel/trace/trace_events_filter.c @@ -1043,13 +1043,14 @@ static int init_pred(struct filter_parse_state *ps, return -EINVAL; } - if (is_string_field(field)) { + if (field->filter_type == FILTER_COMM) { + filter_build_regex(pred); + fn = filter_pred_comm; + pred->regex.field_len = TASK_COMM_LEN; + } else if (is_string_field(field)) { filter_build_regex(pred); - if (!strcmp(field->name, "comm")) { - fn = filter_pred_comm; - pred->regex.field_len = TASK_COMM_LEN; - } else if (field->filter_type == FILTER_STATIC_STRING) { + if (field->filter_type == FILTER_STATIC_STRING) { fn = filter_pred_string; pred->regex.field_len = field->size; } else if (field->filter_type == FILTER_DYN_STRING) @@ -1072,7 +1073,7 @@ static int init_pred(struct filter_parse_state *ps, } pred->val = val; - if (!strcmp(field->name, "cpu")) + if (field->filter_type == FILTER_CPU) fn = filter_pred_cpu; else fn = select_comparison_fn(pred->op, field->size, |