diff options
95 files changed, 1949 insertions, 416 deletions
@@ -21,6 +21,7 @@ Andrey Ryabinin <ryabinin.a.a@gmail.com> <a.ryabinin@samsung.com> Andrew Morton <akpm@linux-foundation.org> Andrew Vasquez <andrew.vasquez@qlogic.com> Andy Adamson <andros@citi.umich.edu> +Antonio Ospite <ao2@ao2.it> <ao2@amarulasolutions.com> Archit Taneja <archit@ti.com> Arnaud Patard <arnaud.patard@rtp-net.org> Arnd Bergmann <arnd@arndb.de> diff --git a/Documentation/virtual/kvm/api.txt b/Documentation/virtual/kvm/api.txt index 053f613fc9a9..07e4cdf02407 100644 --- a/Documentation/virtual/kvm/api.txt +++ b/Documentation/virtual/kvm/api.txt @@ -3025,7 +3025,7 @@ len must be a multiple of sizeof(struct kvm_s390_irq). It must be > 0 and it must not exceed (max_vcpus + 32) * sizeof(struct kvm_s390_irq), which is the maximum number of possibly pending cpu-local interrupts. -4.90 KVM_SMI +4.96 KVM_SMI Capability: KVM_CAP_X86_SMM Architectures: x86 diff --git a/arch/arm64/Makefile b/arch/arm64/Makefile index cd822d8454c0..307237cfe728 100644 --- a/arch/arm64/Makefile +++ b/arch/arm64/Makefile @@ -27,6 +27,8 @@ $(warning LSE atomics not supported by binutils) endif KBUILD_CFLAGS += -mgeneral-regs-only $(lseinstr) +KBUILD_CFLAGS += -fno-asynchronous-unwind-tables +KBUILD_CFLAGS += $(call cc-option, -mpc-relative-literal-loads) KBUILD_AFLAGS += $(lseinstr) ifeq ($(CONFIG_CPU_BIG_ENDIAN), y) diff --git a/arch/arm64/configs/defconfig b/arch/arm64/configs/defconfig index 18ca9fb9e65f..86581f793e39 100644 --- a/arch/arm64/configs/defconfig +++ b/arch/arm64/configs/defconfig @@ -16,7 +16,6 @@ CONFIG_IKCONFIG_PROC=y CONFIG_LOG_BUF_SHIFT=14 CONFIG_MEMCG=y CONFIG_MEMCG_SWAP=y -CONFIG_MEMCG_KMEM=y CONFIG_CGROUP_HUGETLB=y # CONFIG_UTS_NS is not set # CONFIG_IPC_NS is not set @@ -37,15 +36,13 @@ CONFIG_ARCH_EXYNOS7=y CONFIG_ARCH_LAYERSCAPE=y CONFIG_ARCH_HISI=y CONFIG_ARCH_MEDIATEK=y +CONFIG_ARCH_QCOM=y CONFIG_ARCH_ROCKCHIP=y CONFIG_ARCH_SEATTLE=y CONFIG_ARCH_RENESAS=y CONFIG_ARCH_R8A7795=y CONFIG_ARCH_STRATIX10=y CONFIG_ARCH_TEGRA=y -CONFIG_ARCH_TEGRA_132_SOC=y -CONFIG_ARCH_TEGRA_210_SOC=y -CONFIG_ARCH_QCOM=y CONFIG_ARCH_SPRD=y CONFIG_ARCH_THUNDER=y CONFIG_ARCH_UNIPHIER=y @@ -54,14 +51,19 @@ CONFIG_ARCH_XGENE=y CONFIG_ARCH_ZYNQMP=y CONFIG_PCI=y CONFIG_PCI_MSI=y +CONFIG_PCI_IOV=y +CONFIG_PCI_RCAR_GEN2_PCIE=y CONFIG_PCI_HOST_GENERIC=y CONFIG_PCI_XGENE=y -CONFIG_SMP=y +CONFIG_PCI_LAYERSCAPE=y +CONFIG_PCI_HISI=y +CONFIG_PCIE_QCOM=y CONFIG_SCHED_MC=y CONFIG_PREEMPT=y CONFIG_KSM=y CONFIG_TRANSPARENT_HUGEPAGE=y CONFIG_CMA=y +CONFIG_XEN=y CONFIG_CMDLINE="console=ttyAMA0" # CONFIG_CORE_DUMP_DEFAULT_ELF_HEADERS is not set CONFIG_COMPAT=y @@ -100,7 +102,11 @@ CONFIG_PATA_OF_PLATFORM=y CONFIG_NETDEVICES=y CONFIG_TUN=y CONFIG_VIRTIO_NET=y +CONFIG_AMD_XGBE=y CONFIG_NET_XGENE=y +CONFIG_E1000E=y +CONFIG_IGB=y +CONFIG_IGBVF=y CONFIG_SKY2=y CONFIG_RAVB=y CONFIG_SMC91X=y @@ -117,25 +123,23 @@ CONFIG_SERIAL_8250_CONSOLE=y CONFIG_SERIAL_8250_DW=y CONFIG_SERIAL_8250_MT6577=y CONFIG_SERIAL_8250_UNIPHIER=y +CONFIG_SERIAL_OF_PLATFORM=y CONFIG_SERIAL_AMBA_PL011=y CONFIG_SERIAL_AMBA_PL011_CONSOLE=y CONFIG_SERIAL_SAMSUNG=y -CONFIG_SERIAL_SAMSUNG_UARTS_4=y -CONFIG_SERIAL_SAMSUNG_UARTS=4 CONFIG_SERIAL_SAMSUNG_CONSOLE=y +CONFIG_SERIAL_TEGRA=y CONFIG_SERIAL_SH_SCI=y CONFIG_SERIAL_SH_SCI_NR_UARTS=11 CONFIG_SERIAL_SH_SCI_CONSOLE=y -CONFIG_SERIAL_TEGRA=y CONFIG_SERIAL_MSM=y CONFIG_SERIAL_MSM_CONSOLE=y -CONFIG_SERIAL_OF_PLATFORM=y CONFIG_SERIAL_XILINX_PS_UART=y CONFIG_SERIAL_XILINX_PS_UART_CONSOLE=y CONFIG_VIRTIO_CONSOLE=y # CONFIG_HW_RANDOM is not set -CONFIG_I2C=y CONFIG_I2C_QUP=y +CONFIG_I2C_UNIPHIER_F=y CONFIG_I2C_RCAR=y CONFIG_SPI=y CONFIG_SPI_PL022=y @@ -176,8 +180,6 @@ CONFIG_MMC_SDHCI_PLTFM=y CONFIG_MMC_SDHCI_TEGRA=y CONFIG_MMC_SPI=y CONFIG_MMC_DW=y -CONFIG_MMC_DW_IDMAC=y -CONFIG_MMC_DW_PLTFM=y CONFIG_MMC_DW_EXYNOS=y CONFIG_NEW_LEDS=y CONFIG_LEDS_CLASS=y @@ -187,28 +189,33 @@ CONFIG_LEDS_TRIGGER_HEARTBEAT=y CONFIG_LEDS_TRIGGER_CPU=y CONFIG_RTC_CLASS=y CONFIG_RTC_DRV_EFI=y +CONFIG_RTC_DRV_PL031=y CONFIG_RTC_DRV_XGENE=y CONFIG_DMADEVICES=y -CONFIG_RCAR_DMAC=y CONFIG_QCOM_BAM_DMA=y CONFIG_TEGRA20_APB_DMA=y +CONFIG_RCAR_DMAC=y +CONFIG_VFIO=y +CONFIG_VFIO_PCI=y CONFIG_VIRTIO_PCI=y CONFIG_VIRTIO_BALLOON=y CONFIG_VIRTIO_MMIO=y +CONFIG_XEN_GNTDEV=y +CONFIG_XEN_GRANT_DEV_ALLOC=y CONFIG_COMMON_CLK_CS2000_CP=y CONFIG_COMMON_CLK_QCOM=y CONFIG_MSM_GCC_8916=y CONFIG_HWSPINLOCK_QCOM=y -# CONFIG_IOMMU_SUPPORT is not set +CONFIG_ARM_SMMU=y CONFIG_QCOM_SMEM=y CONFIG_QCOM_SMD=y CONFIG_QCOM_SMD_RPM=y +CONFIG_ARCH_TEGRA_132_SOC=y +CONFIG_ARCH_TEGRA_210_SOC=y +CONFIG_HISILICON_IRQ_MBIGEN=y CONFIG_PHY_XGENE=y CONFIG_EXT2_FS=y CONFIG_EXT3_FS=y -# CONFIG_EXT3_DEFAULTS_TO_ORDERED is not set -# CONFIG_EXT3_FS_XATTR is not set -CONFIG_EXT4_FS=y CONFIG_FANOTIFY=y CONFIG_FANOTIFY_ACCESS_PERMISSIONS=y CONFIG_QUOTA=y @@ -239,6 +246,7 @@ CONFIG_LOCKUP_DETECTOR=y # CONFIG_FTRACE is not set CONFIG_MEMTEST=y CONFIG_SECURITY=y +CONFIG_CRYPTO_ECHAINIV=y CONFIG_CRYPTO_ANSI_CPRNG=y CONFIG_ARM64_CRYPTO=y CONFIG_CRYPTO_SHA1_ARM64_CE=y diff --git a/arch/arm64/include/asm/pgtable.h b/arch/arm64/include/asm/pgtable.h index 2d545d7aa80b..bf464de33f52 100644 --- a/arch/arm64/include/asm/pgtable.h +++ b/arch/arm64/include/asm/pgtable.h @@ -67,11 +67,11 @@ extern void __pgd_error(const char *file, int line, unsigned long val); #define PROT_DEFAULT (PTE_TYPE_PAGE | PTE_AF | PTE_SHARED) #define PROT_SECT_DEFAULT (PMD_TYPE_SECT | PMD_SECT_AF | PMD_SECT_S) -#define PROT_DEVICE_nGnRnE (PROT_DEFAULT | PTE_PXN | PTE_UXN | PTE_ATTRINDX(MT_DEVICE_nGnRnE)) -#define PROT_DEVICE_nGnRE (PROT_DEFAULT | PTE_PXN | PTE_UXN | PTE_ATTRINDX(MT_DEVICE_nGnRE)) -#define PROT_NORMAL_NC (PROT_DEFAULT | PTE_PXN | PTE_UXN | PTE_ATTRINDX(MT_NORMAL_NC)) -#define PROT_NORMAL_WT (PROT_DEFAULT | PTE_PXN | PTE_UXN | PTE_ATTRINDX(MT_NORMAL_WT)) -#define PROT_NORMAL (PROT_DEFAULT | PTE_PXN | PTE_UXN | PTE_ATTRINDX(MT_NORMAL)) +#define PROT_DEVICE_nGnRnE (PROT_DEFAULT | PTE_PXN | PTE_UXN | PTE_DIRTY | PTE_WRITE | PTE_ATTRINDX(MT_DEVICE_nGnRnE)) +#define PROT_DEVICE_nGnRE (PROT_DEFAULT | PTE_PXN | PTE_UXN | PTE_DIRTY | PTE_WRITE | PTE_ATTRINDX(MT_DEVICE_nGnRE)) +#define PROT_NORMAL_NC (PROT_DEFAULT | PTE_PXN | PTE_UXN | PTE_DIRTY | PTE_WRITE | PTE_ATTRINDX(MT_NORMAL_NC)) +#define PROT_NORMAL_WT (PROT_DEFAULT | PTE_PXN | PTE_UXN | PTE_DIRTY | PTE_WRITE | PTE_ATTRINDX(MT_NORMAL_WT)) +#define PROT_NORMAL (PROT_DEFAULT | PTE_PXN | PTE_UXN | PTE_DIRTY | PTE_WRITE | PTE_ATTRINDX(MT_NORMAL)) #define PROT_SECT_DEVICE_nGnRE (PROT_SECT_DEFAULT | PMD_SECT_PXN | PMD_SECT_UXN | PMD_ATTRINDX(MT_DEVICE_nGnRE)) #define PROT_SECT_NORMAL (PROT_SECT_DEFAULT | PMD_SECT_PXN | PMD_SECT_UXN | PMD_ATTRINDX(MT_NORMAL)) @@ -81,7 +81,7 @@ extern void __pgd_error(const char *file, int line, unsigned long val); #define PAGE_KERNEL __pgprot(_PAGE_DEFAULT | PTE_PXN | PTE_UXN | PTE_DIRTY | PTE_WRITE) #define PAGE_KERNEL_RO __pgprot(_PAGE_DEFAULT | PTE_PXN | PTE_UXN | PTE_DIRTY | PTE_RDONLY) -#define PAGE_KERNEL_ROX __pgprot(_PAGE_DEFAULT | PTE_UXN | PTE_DIRTY | PTE_RDONLY) +#define PAGE_KERNEL_ROX __pgprot(_PAGE_DEFAULT | PTE_UXN | PTE_DIRTY | PTE_RDONLY) #define PAGE_KERNEL_EXEC __pgprot(_PAGE_DEFAULT | PTE_UXN | PTE_DIRTY | PTE_WRITE) #define PAGE_KERNEL_EXEC_CONT __pgprot(_PAGE_DEFAULT | PTE_UXN | PTE_DIRTY | PTE_WRITE | PTE_CONT) @@ -153,6 +153,7 @@ extern struct page *empty_zero_page; #define pte_write(pte) (!!(pte_val(pte) & PTE_WRITE)) #define pte_exec(pte) (!(pte_val(pte) & PTE_UXN)) #define pte_cont(pte) (!!(pte_val(pte) & PTE_CONT)) +#define pte_user(pte) (!!(pte_val(pte) & PTE_USER)) #ifdef CONFIG_ARM64_HW_AFDBM #define pte_hw_dirty(pte) (pte_write(pte) && !(pte_val(pte) & PTE_RDONLY)) @@ -163,8 +164,6 @@ extern struct page *empty_zero_page; #define pte_dirty(pte) (pte_sw_dirty(pte) || pte_hw_dirty(pte)) #define pte_valid(pte) (!!(pte_val(pte) & PTE_VALID)) -#define pte_valid_user(pte) \ - ((pte_val(pte) & (PTE_VALID | PTE_USER)) == (PTE_VALID | PTE_USER)) #define pte_valid_not_user(pte) \ ((pte_val(pte) & (PTE_VALID | PTE_USER)) == PTE_VALID) #define pte_valid_young(pte) \ @@ -278,13 +277,13 @@ extern void __sync_icache_dcache(pte_t pteval, unsigned long addr); static inline void set_pte_at(struct mm_struct *mm, unsigned long addr, pte_t *ptep, pte_t pte) { - if (pte_valid_user(pte)) { - if (!pte_special(pte) && pte_exec(pte)) - __sync_icache_dcache(pte, addr); + if (pte_valid(pte)) { if (pte_sw_dirty(pte) && pte_write(pte)) pte_val(pte) &= ~PTE_RDONLY; else pte_val(pte) |= PTE_RDONLY; + if (pte_user(pte) && pte_exec(pte) && !pte_special(pte)) + __sync_icache_dcache(pte, addr); } /* diff --git a/arch/arm64/kernel/head.S b/arch/arm64/kernel/head.S index ffe9c2b6431b..917d98108b3f 100644 --- a/arch/arm64/kernel/head.S +++ b/arch/arm64/kernel/head.S @@ -514,9 +514,14 @@ CPU_LE( movk x0, #0x30d0, lsl #16 ) // Clear EE and E0E on LE systems #endif /* EL2 debug */ + mrs x0, id_aa64dfr0_el1 // Check ID_AA64DFR0_EL1 PMUVer + sbfx x0, x0, #8, #4 + cmp x0, #1 + b.lt 4f // Skip if no PMU present mrs x0, pmcr_el0 // Disable debug access traps ubfx x0, x0, #11, #5 // to EL2 and allow access to msr mdcr_el2, x0 // all PMU counters from EL1 +4: /* Stage-2 translation */ msr vttbr_el2, xzr diff --git a/arch/arm64/kernel/image.h b/arch/arm64/kernel/image.h index bc2abb8b1599..999633bd7294 100644 --- a/arch/arm64/kernel/image.h +++ b/arch/arm64/kernel/image.h @@ -65,6 +65,16 @@ #ifdef CONFIG_EFI /* + * Prevent the symbol aliases below from being emitted into the kallsyms + * table, by forcing them to be absolute symbols (which are conveniently + * ignored by scripts/kallsyms) rather than section relative symbols. + * The distinction is only relevant for partial linking, and only for symbols + * that are defined within a section declaration (which is not the case for + * the definitions below) so the resulting values will be identical. + */ +#define KALLSYMS_HIDE(sym) ABSOLUTE(sym) + +/* * The EFI stub has its own symbol namespace prefixed by __efistub_, to * isolate it from the kernel proper. The following symbols are legally * accessed by the stub, so provide some aliases to make them accessible. @@ -73,25 +83,25 @@ * linked at. The routines below are all implemented in assembler in a * position independent manner */ -__efistub_memcmp = __pi_memcmp; -__efistub_memchr = __pi_memchr; -__efistub_memcpy = __pi_memcpy; -__efistub_memmove = __pi_memmove; -__efistub_memset = __pi_memset; -__efistub_strlen = __pi_strlen; -__efistub_strcmp = __pi_strcmp; -__efistub_strncmp = __pi_strncmp; -__efistub___flush_dcache_area = __pi___flush_dcache_area; +__efistub_memcmp = KALLSYMS_HIDE(__pi_memcmp); +__efistub_memchr = KALLSYMS_HIDE(__pi_memchr); +__efistub_memcpy = KALLSYMS_HIDE(__pi_memcpy); +__efistub_memmove = KALLSYMS_HIDE(__pi_memmove); +__efistub_memset = KALLSYMS_HIDE(__pi_memset); +__efistub_strlen = KALLSYMS_HIDE(__pi_strlen); +__efistub_strcmp = KALLSYMS_HIDE(__pi_strcmp); +__efistub_strncmp = KALLSYMS_HIDE(__pi_strncmp); +__efistub___flush_dcache_area = KALLSYMS_HIDE(__pi___flush_dcache_area); #ifdef CONFIG_KASAN -__efistub___memcpy = __pi_memcpy; -__efistub___memmove = __pi_memmove; -__efistub___memset = __pi_memset; +__efistub___memcpy = KALLSYMS_HIDE(__pi_memcpy); +__efistub___memmove = KALLSYMS_HIDE(__pi_memmove); +__efistub___memset = KALLSYMS_HIDE(__pi_memset); #endif -__efistub__text = _text; -__efistub__end = _end; -__efistub__edata = _edata; +__efistub__text = KALLSYMS_HIDE(_text); +__efistub__end = KALLSYMS_HIDE(_end); +__efistub__edata = KALLSYMS_HIDE(_edata); #endif diff --git a/arch/arm64/mm/dump.c b/arch/arm64/mm/dump.c index 5a22a119a74c..0adbebbc2803 100644 --- a/arch/arm64/mm/dump.c +++ b/arch/arm64/mm/dump.c @@ -46,7 +46,7 @@ enum address_markers_idx { PCI_START_NR, PCI_END_NR, MODULES_START_NR, - MODUELS_END_NR, + MODULES_END_NR, KERNEL_SPACE_NR, }; diff --git a/arch/arm64/mm/kasan_init.c b/arch/arm64/mm/kasan_init.c index cf038c7d9fa9..cab7a5be40aa 100644 --- a/arch/arm64/mm/kasan_init.c +++ b/arch/arm64/mm/kasan_init.c @@ -120,6 +120,7 @@ static void __init cpu_set_ttbr1(unsigned long ttbr1) void __init kasan_init(void) { struct memblock_region *reg; + int i; /* * We are going to perform proper setup of shadow memory. @@ -155,6 +156,14 @@ void __init kasan_init(void) pfn_to_nid(virt_to_pfn(start))); } + /* + * KAsan may reuse the contents of kasan_zero_pte directly, so we + * should make sure that it maps the zero page read-only. + */ + for (i = 0; i < PTRS_PER_PTE; i++) + set_pte(&kasan_zero_pte[i], + pfn_pte(virt_to_pfn(kasan_zero_page), PAGE_KERNEL_RO)); + memset(kasan_zero_page, 0, PAGE_SIZE); cpu_set_ttbr1(__pa(swapper_pg_dir)); flush_tlb_all(); diff --git a/arch/arm64/mm/pageattr.c b/arch/arm64/mm/pageattr.c index 3571c7309c5e..cf6240741134 100644 --- a/arch/arm64/mm/pageattr.c +++ b/arch/arm64/mm/pageattr.c @@ -57,6 +57,9 @@ static int change_memory_common(unsigned long addr, int numpages, if (end < MODULES_VADDR || end >= MODULES_END) return -EINVAL; + if (!numpages) + return 0; + data.set_mask = set_mask; data.clear_mask = clear_mask; diff --git a/arch/arm64/mm/proc-macros.S b/arch/arm64/mm/proc-macros.S index 146bd99a7532..e6a30e1268a8 100644 --- a/arch/arm64/mm/proc-macros.S +++ b/arch/arm64/mm/proc-macros.S @@ -84,3 +84,15 @@ b.lo 9998b dsb \domain .endm + +/* + * reset_pmuserenr_el0 - reset PMUSERENR_EL0 if PMUv3 present + */ + .macro reset_pmuserenr_el0, tmpreg + mrs \tmpreg, id_aa64dfr0_el1 // Check ID_AA64DFR0_EL1 PMUVer + sbfx \tmpreg, \tmpreg, #8, #4 + cmp \tmpreg, #1 // Skip if no PMU present + b.lt 9000f + msr pmuserenr_el0, xzr // Disable PMU access from EL0 +9000: + .endm diff --git a/arch/arm64/mm/proc.S b/arch/arm64/mm/proc.S index a3d867e723b4..c164d2cb35c0 100644 --- a/arch/arm64/mm/proc.S +++ b/arch/arm64/mm/proc.S @@ -117,7 +117,7 @@ ENTRY(cpu_do_resume) */ ubfx x11, x11, #1, #1 msr oslar_el1, x11 - msr pmuserenr_el0, xzr // Disable PMU access from EL0 + reset_pmuserenr_el0 x0 // Disable PMU access from EL0 mov x0, x12 dsb nsh // Make sure local tlb invalidation completed isb @@ -154,7 +154,7 @@ ENTRY(__cpu_setup) msr cpacr_el1, x0 // Enable FP/ASIMD mov x0, #1 << 12 // Reset mdscr_el1 and disable msr mdscr_el1, x0 // access to the DCC from EL0 - msr pmuserenr_el0, xzr // Disable PMU access from EL0 + reset_pmuserenr_el0 x0 // Disable PMU access from EL0 /* * Memory region attributes for LPAE: * diff --git a/arch/powerpc/include/asm/kvm_host.h b/arch/powerpc/include/asm/kvm_host.h index 271fefbbe521..9d08d8cbed1a 100644 --- a/arch/powerpc/include/asm/kvm_host.h +++ b/arch/powerpc/include/asm/kvm_host.h @@ -38,8 +38,7 @@ #define KVM_MAX_VCPUS NR_CPUS #define KVM_MAX_VCORES NR_CPUS -#define KVM_USER_MEM_SLOTS 32 -#define KVM_MEM_SLOTS_NUM KVM_USER_MEM_SLOTS +#define KVM_USER_MEM_SLOTS 512 #ifdef CONFIG_KVM_MMIO #define KVM_COALESCED_MMIO_PAGE_OFFSET 1 diff --git a/arch/powerpc/kvm/book3s_64_mmu.c b/arch/powerpc/kvm/book3s_64_mmu.c index 774a253ca4e1..9bf7031a67ff 100644 --- a/arch/powerpc/kvm/book3s_64_mmu.c +++ b/arch/powerpc/kvm/book3s_64_mmu.c @@ -377,15 +377,12 @@ no_seg_found: static void kvmppc_mmu_book3s_64_slbmte(struct kvm_vcpu *vcpu, u64 rs, u64 rb) { - struct kvmppc_vcpu_book3s *vcpu_book3s; u64 esid, esid_1t; int slb_nr; struct kvmppc_slb *slbe; dprintk("KVM MMU: slbmte(0x%llx, 0x%llx)\n", rs, rb); - vcpu_book3s = to_book3s(vcpu); - esid = GET_ESID(rb); esid_1t = GET_ESID_1T(rb); slb_nr = rb & 0xfff; diff --git a/arch/powerpc/kvm/book3s_hv.c b/arch/powerpc/kvm/book3s_hv.c index cff207b72c46..baeddb06811d 100644 --- a/arch/powerpc/kvm/book3s_hv.c +++ b/arch/powerpc/kvm/book3s_hv.c @@ -833,6 +833,24 @@ static int kvmppc_handle_exit_hv(struct kvm_run *run, struct kvm_vcpu *vcpu, vcpu->stat.sum_exits++; + /* + * This can happen if an interrupt occurs in the last stages + * of guest entry or the first stages of guest exit (i.e. after + * setting paca->kvm_hstate.in_guest to KVM_GUEST_MODE_GUEST_HV + * and before setting it to KVM_GUEST_MODE_HOST_HV). + * That can happen due to a bug, or due to a machine check + * occurring at just the wrong time. + */ + if (vcpu->arch.shregs.msr & MSR_HV) { + printk(KERN_EMERG "KVM trap in HV mode!\n"); + printk(KERN_EMERG "trap=0x%x | pc=0x%lx | msr=0x%llx\n", + vcpu->arch.trap, kvmppc_get_pc(vcpu), + vcpu->arch.shregs.msr); + kvmppc_dump_regs(vcpu); + run->exit_reason = KVM_EXIT_INTERNAL_ERROR; + run->hw.hardware_exit_reason = vcpu->arch.trap; + return RESUME_HOST; + } run->exit_reason = KVM_EXIT_UNKNOWN; run->ready_for_interrupt_injection = 1; switch (vcpu->arch.trap) { diff --git a/arch/powerpc/kvm/book3s_hv_rmhandlers.S b/arch/powerpc/kvm/book3s_hv_rmhandlers.S index 3c6badcd53ef..6ee26de9a1de 100644 --- a/arch/powerpc/kvm/book3s_hv_rmhandlers.S +++ b/arch/powerpc/kvm/book3s_hv_rmhandlers.S @@ -2153,7 +2153,7 @@ END_FTR_SECTION_IFSET(CPU_FTR_ARCH_207S) /* Emulate H_SET_DABR/X on P8 for the sake of compat mode guests */ 2: rlwimi r5, r4, 5, DAWRX_DR | DAWRX_DW - rlwimi r5, r4, 1, DAWRX_WT + rlwimi r5, r4, 2, DAWRX_WT clrrdi r4, r4, 3 std r4, VCPU_DAWR(r3) std r5, VCPU_DAWRX(r3) @@ -2404,6 +2404,8 @@ machine_check_realmode: * guest as machine check causing guest to crash. */ ld r11, VCPU_MSR(r9) + rldicl. r0, r11, 64-MSR_HV_LG, 63 /* check if it happened in HV mode */ + bne mc_cont /* if so, exit to host */ andi. r10, r11, MSR_RI /* check for unrecoverable exception */ beq 1f /* Deliver a machine check to guest */ ld r10, VCPU_PC(r9) diff --git a/arch/powerpc/kvm/powerpc.c b/arch/powerpc/kvm/powerpc.c index 6fd2405c7f4a..a3b182dcb823 100644 --- a/arch/powerpc/kvm/powerpc.c +++ b/arch/powerpc/kvm/powerpc.c @@ -919,21 +919,17 @@ int kvm_vcpu_ioctl_get_one_reg(struct kvm_vcpu *vcpu, struct kvm_one_reg *reg) r = -ENXIO; break; } - vcpu->arch.vr.vr[reg->id - KVM_REG_PPC_VR0] = val.vval; + val.vval = vcpu->arch.vr.vr[reg->id - KVM_REG_PPC_VR0]; break; case KVM_REG_PPC_VSCR: if (!cpu_has_feature(CPU_FTR_ALTIVEC)) { r = -ENXIO; break; } - vcpu->arch.vr.vscr.u[3] = set_reg_val(reg->id, val); + val = get_reg_val(reg->id, vcpu->arch.vr.vscr.u[3]); break; case KVM_REG_PPC_VRSAVE: - if (!cpu_has_feature(CPU_FTR_ALTIVEC)) { - r = -ENXIO; - break; - } - vcpu->arch.vrsave = set_reg_val(reg->id, val); + val = get_reg_val(reg->id, vcpu->arch.vrsave); break; #endif /* CONFIG_ALTIVEC */ default: @@ -974,17 +970,21 @@ int kvm_vcpu_ioctl_set_one_reg(struct kvm_vcpu *vcpu, struct kvm_one_reg *reg) r = -ENXIO; break; } - val.vval = vcpu->arch.vr.vr[reg->id - KVM_REG_PPC_VR0]; + vcpu->arch.vr.vr[reg->id - KVM_REG_PPC_VR0] = val.vval; break; case KVM_REG_PPC_VSCR: if (!cpu_has_feature(CPU_FTR_ALTIVEC)) { r = -ENXIO; break; } - val = get_reg_val(reg->id, vcpu->arch.vr.vscr.u[3]); + vcpu->arch.vr.vscr.u[3] = set_reg_val(reg->id, val); break; case KVM_REG_PPC_VRSAVE: - val = get_reg_val(reg->id, vcpu->arch.vrsave); + if (!cpu_has_feature(CPU_FTR_ALTIVEC)) { + r = -ENXIO; + break; + } + vcpu->arch.vrsave = set_reg_val(reg->id, val); break; #endif /* CONFIG_ALTIVEC */ default: diff --git a/arch/s390/include/asm/kvm_host.h b/arch/s390/include/asm/kvm_host.h index 6742414dbd6f..8959ebb6d2c9 100644 --- a/arch/s390/include/asm/kvm_host.h +++ b/arch/s390/include/asm/kvm_host.h @@ -546,7 +546,6 @@ struct kvm_vcpu_arch { struct kvm_s390_sie_block *sie_block; unsigned int host_acrs[NUM_ACRS]; struct fpu host_fpregs; - struct fpu guest_fpregs; struct kvm_s390_local_interrupt local_int; struct hrtimer ckc_timer; struct kvm_s390_pgm_info pgm; diff --git a/arch/s390/kvm/Kconfig b/arch/s390/kvm/Kconfig index 5fce52cf0e57..5ea5af3c7db7 100644 --- a/arch/s390/kvm/Kconfig +++ b/arch/s390/kvm/Kconfig @@ -29,6 +29,7 @@ config KVM select HAVE_KVM_IRQFD select HAVE_KVM_IRQ_ROUTING select SRCU + select KVM_VFIO ---help--- Support hosting paravirtualized guest machines using the SIE virtualization capability on the mainframe. This should work diff --git a/arch/s390/kvm/Makefile b/arch/s390/kvm/Makefile index b3b553469650..d42fa38c2429 100644 --- a/arch/s390/kvm/Makefile +++ b/arch/s390/kvm/Makefile @@ -7,7 +7,7 @@ # as published by the Free Software Foundation. KVM := ../../../virt/kvm -common-objs = $(KVM)/kvm_main.o $(KVM)/eventfd.o $(KVM)/async_pf.o $(KVM)/irqchip.o +common-objs = $(KVM)/kvm_main.o $(KVM)/eventfd.o $(KVM)/async_pf.o $(KVM)/irqchip.o $(KVM)/vfio.o ccflags-y := -Ivirt/kvm -Iarch/s390/kvm diff --git a/arch/s390/kvm/kvm-s390.c b/arch/s390/kvm/kvm-s390.c index 835d60bedb54..4af21c771f9b 100644 --- a/arch/s390/kvm/kvm-s390.c +++ b/arch/s390/kvm/kvm-s390.c @@ -1423,44 +1423,18 @@ int kvm_arch_vcpu_init(struct kvm_vcpu *vcpu) return 0; } -/* - * Backs up the current FP/VX register save area on a particular - * destination. Used to switch between different register save - * areas. - */ -static inline void save_fpu_to(struct fpu *dst) -{ - dst->fpc = current->thread.fpu.fpc; - dst->regs = current->thread.fpu.regs; -} - -/* - * Switches the FP/VX register save area from which to lazy - * restore register contents. - */ -static inline void load_fpu_from(struct fpu *from) -{ - current->thread.fpu.fpc = from->fpc; - current->thread.fpu.regs = from->regs; -} - void kvm_arch_vcpu_load(struct kvm_vcpu *vcpu, int cpu) { /* Save host register state */ save_fpu_regs(); - save_fpu_to(&vcpu->arch.host_fpregs); - - if (test_kvm_facility(vcpu->kvm, 129)) { - current->thread.fpu.fpc = vcpu->run->s.regs.fpc; - /* - * Use the register save area in the SIE-control block - * for register restore and save in kvm_arch_vcpu_put() - */ - current->thread.fpu.vxrs = - (__vector128 *)&vcpu->run->s.regs.vrs; - } else - load_fpu_from(&vcpu->arch.guest_fpregs); + vcpu->arch.host_fpregs.fpc = current->thread.fpu.fpc; + vcpu->arch.host_fpregs.regs = current->thread.fpu.regs; + /* Depending on MACHINE_HAS_VX, data stored to vrs either + * has vector register or floating point register format. + */ + current->thread.fpu.regs = vcpu->run->s.regs.vrs; + current->thread.fpu.fpc = vcpu->run->s.regs.fpc; if (test_fp_ctl(current->thread.fpu.fpc)) /* User space provided an invalid FPC, let's clear it */ current->thread.fpu.fpc = 0; @@ -1476,19 +1450,13 @@ void kvm_arch_vcpu_put(struct kvm_vcpu *vcpu) atomic_andnot(CPUSTAT_RUNNING, &vcpu->arch.sie_block->cpuflags); gmap_disable(vcpu->arch.gmap); + /* Save guest register state */ save_fpu_regs(); + vcpu->run->s.regs.fpc = current->thread.fpu.fpc; - if (test_kvm_facility(vcpu->kvm, 129)) - /* - * kvm_arch_vcpu_load() set up the register save area to - * the &vcpu->run->s.regs.vrs and, thus, the vector registers - * are already saved. Only the floating-point control must be - * copied. - */ - vcpu->run->s.regs.fpc = current->thread.fpu.fpc; - else - save_fpu_to(&vcpu->arch.guest_fpregs); - load_fpu_from(&vcpu->arch.host_fpregs); + /* Restore host register state */ + current->thread.fpu.fpc = vcpu->arch.host_fpregs.fpc; + current->thread.fpu.regs = vcpu->arch.host_fpregs.regs; save_access_regs(vcpu->run->s.regs.acrs); restore_access_regs(vcpu->arch.host_acrs); @@ -1506,8 +1474,9 @@ static void kvm_s390_vcpu_initial_reset(struct kvm_vcpu *vcpu) memset(vcpu->arch.sie_block->gcr, 0, 16 * sizeof(__u64)); vcpu->arch.sie_block->gcr[0] = 0xE0UL; vcpu->arch.sie_block->gcr[14] = 0xC2000000UL; - vcpu->arch.guest_fpregs.fpc = 0; - asm volatile("lfpc %0" : : "Q" (vcpu->arch.guest_fpregs.fpc)); + /* make sure the new fpc will be lazily loaded */ + save_fpu_regs(); + current->thread.fpu.fpc = 0; vcpu->arch.sie_block->gbea = 1; vcpu->arch.sie_block->pp = 0; vcpu->arch.pfault_token = KVM_S390_PFAULT_TOKEN_INVALID; @@ -1648,17 +1617,6 @@ struct kvm_vcpu *kvm_arch_vcpu_create(struct kvm *kvm, vcpu->arch.local_int.wq = &vcpu->wq; vcpu->arch.local_int.cpuflags = &vcpu->arch.sie_block->cpuflags; - /* - * Allocate a save area for floating-point registers. If the vector - * extension is available, register contents are saved in the SIE - * control block. The allocated save area is still required in - * particular places, for example, in kvm_s390_vcpu_store_status(). - */ - vcpu->arch.guest_fpregs.fprs = kzalloc(sizeof(freg_t) * __NUM_FPRS, - GFP_KERNEL); - if (!vcpu->arch.guest_fpregs.fprs) - goto out_free_sie_block; - rc = kvm_vcpu_init(vcpu, kvm, id); if (rc) goto out_free_sie_block; @@ -1879,19 +1837,27 @@ int kvm_arch_vcpu_ioctl_get_sregs(struct kvm_vcpu *vcpu, int kvm_arch_vcpu_ioctl_set_fpu(struct kvm_vcpu *vcpu, struct kvm_fpu *fpu) { + /* make sure the new values will be lazily loaded */ + save_fpu_regs(); if (test_fp_ctl(fpu->fpc)) return -EINVAL; - memcpy(vcpu->arch.guest_fpregs.fprs, &fpu->fprs, sizeof(fpu->fprs)); - vcpu->arch.guest_fpregs.fpc = fpu->fpc; - save_fpu_regs(); - load_fpu_from(&vcpu->arch.guest_fpregs); + current->thread.fpu.fpc = fpu->fpc; + if (MACHINE_HAS_VX) + convert_fp_to_vx(current->thread.fpu.vxrs, (freg_t *)fpu->fprs); + else + memcpy(current->thread.fpu.fprs, &fpu->fprs, sizeof(fpu->fprs)); return 0; } int kvm_arch_vcpu_ioctl_get_fpu(struct kvm_vcpu *vcpu, struct kvm_fpu *fpu) { - memcpy(&fpu->fprs, vcpu->arch.guest_fpregs.fprs, sizeof(fpu->fprs)); - fpu->fpc = vcpu->arch.guest_fpregs.fpc; + /* make sure we have the latest values */ + save_fpu_regs(); + if (MACHINE_HAS_VX) + convert_vx_to_fp((freg_t *)fpu->fprs, current->thread.fpu.vxrs); + else + memcpy(fpu->fprs, current->thread.fpu.fprs, sizeof(fpu->fprs)); + fpu->fpc = current->thread.fpu.fpc; return 0; } @@ -2396,6 +2362,7 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) int kvm_s390_store_status_unloaded(struct kvm_vcpu *vcpu, unsigned long gpa) { unsigned char archmode = 1; + freg_t fprs[NUM_FPRS]; unsigned int px; u64 clkcomp; int rc; @@ -2411,8 +2378,16 @@ int kvm_s390_store_status_unloaded(struct kvm_vcpu *vcpu, unsigned long gpa) gpa = px; } else gpa -= __LC_FPREGS_SAVE_AREA; - rc = write_guest_abs(vcpu, gpa + __LC_FPREGS_SAVE_AREA, - vcpu->arch.guest_fpregs.fprs, 128); + + /* manually convert vector registers if necessary */ + if (MACHINE_HAS_VX) { + convert_vx_to_fp(fprs, current->thread.fpu.vxrs); + rc = write_guest_abs(vcpu, gpa + __LC_FPREGS_SAVE_AREA, + fprs, 128); + } else { + rc = write_guest_abs(vcpu, gpa + __LC_FPREGS_SAVE_AREA, + vcpu->run->s.regs.vrs, 128); + } rc |= write_guest_abs(vcpu, gpa + __LC_GPREGS_SAVE_AREA, vcpu->run->s.regs.gprs, 128); rc |= write_guest_abs(vcpu, gpa + __LC_PSW_SAVE_AREA, @@ -2420,7 +2395,7 @@ int kvm_s390_store_status_unloaded(struct kvm_vcpu *vcpu, unsigned long gpa) rc |= write_guest_abs(vcpu, gpa + __LC_PREFIX_SAVE_AREA, &px, 4); rc |= write_guest_abs(vcpu, gpa + __LC_FP_CREG_SAVE_AREA, - &vcpu->arch.guest_fpregs.fpc, 4); + &vcpu->run->s.regs.fpc, 4); rc |= write_guest_abs(vcpu, gpa + __LC_TOD_PROGREG_SAVE_AREA, &vcpu->arch.sie_block->todpr, 4); rc |= write_guest_abs(vcpu, gpa + __LC_CPU_TIMER_SAVE_AREA, @@ -2443,19 +2418,7 @@ int kvm_s390_vcpu_store_status(struct kvm_vcpu *vcpu, unsigned long addr) * it into the save area */ save_fpu_regs(); - if (test_kvm_facility(vcpu->kvm, 129)) { - /* - * If the vector extension is available, the vector registers - * which overlaps with floating-point registers are saved in - * the SIE-control block. Hence, extract the floating-point - * registers and the FPC value and store them in the - * guest_fpregs structure. - */ - vcpu->arch.guest_fpregs.fpc = current->thread.fpu.fpc; - convert_vx_to_fp(vcpu->arch.guest_fpregs.fprs, - current->thread.fpu.vxrs); - } else - save_fpu_to(&vcpu->arch.guest_fpregs); + vcpu->run->s.regs.fpc = current->thread.fpu.fpc; save_access_regs(vcpu->run->s.regs.acrs); return kvm_s390_store_status_unloaded(vcpu, addr); diff --git a/arch/sh/include/asm/barrier.h b/arch/sh/include/asm/barrier.h index f887c6465a82..8a84e05adb2e 100644 --- a/arch/sh/include/asm/barrier.h +++ b/arch/sh/include/asm/barrier.h @@ -33,7 +33,6 @@ #endif #define __smp_store_mb(var, value) do { (void)xchg(&var, value); } while (0) -#define smp_store_mb(var, value) __smp_store_mb(var, value) #include <asm-generic/barrier.h> diff --git a/block/blk-merge.c b/block/blk-merge.c index 1699df5b0493..888a7fec81f7 100644 --- a/block/blk-merge.c +++ b/block/blk-merge.c @@ -70,6 +70,18 @@ static struct bio *blk_bio_write_same_split(struct request_queue *q, return bio_split(bio, q->limits.max_write_same_sectors, GFP_NOIO, bs); } +static inline unsigned get_max_io_size(struct request_queue *q, + struct bio *bio) +{ + unsigned sectors = blk_max_size_offset(q, bio->bi_iter.bi_sector); + unsigned mask = queue_logical_block_size(q) - 1; + + /* aligned to logical block size */ + sectors &= ~(mask >> 9); + + return sectors; +} + static struct bio *blk_bio_segment_split(struct request_queue *q, struct bio *bio, struct bio_set *bs, @@ -81,6 +93,7 @@ static struct bio *blk_bio_segment_split(struct request_queue *q, unsigned front_seg_size = bio->bi_seg_front_size; bool do_split = true; struct bio *new = NULL; + const unsigned max_sectors = get_max_io_size(q, bio); bio_for_each_segment(bv, bio, iter) { /* @@ -90,20 +103,19 @@ static struct bio *blk_bio_segment_split(struct request_queue *q, if (bvprvp && bvec_gap_to_prev(q, bvprvp, bv.bv_offset)) goto split; - if (sectors + (bv.bv_len >> 9) > - blk_max_size_offset(q, bio->bi_iter.bi_sector)) { + if (sectors + (bv.bv_len >> 9) > max_sectors) { /* * Consider this a new segment if we're splitting in * the middle of this vector. */ if (nsegs < queue_max_segments(q) && - sectors < blk_max_size_offset(q, - bio->bi_iter.bi_sector)) { + sectors < max_sectors) { nsegs++; - sectors = blk_max_size_offset(q, - bio->bi_iter.bi_sector); + sectors = max_sectors; } - goto split; + if (sectors) + goto split; + /* Make this single bvec as the 1st segment */ } if (bvprvp && blk_queue_cluster(q)) { diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h b/drivers/gpu/drm/amd/amdgpu/amdgpu.h index 313b0cc8d676..82edf95b7740 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h @@ -2278,60 +2278,60 @@ amdgpu_get_sdma_instance(struct amdgpu_ring *ring) #define amdgpu_dpm_enable_bapm(adev, e) (adev)->pm.funcs->enable_bapm((adev), (e)) #define amdgpu_dpm_get_temperature(adev) \ - (adev)->pp_enabled ? \ + ((adev)->pp_enabled ? \ (adev)->powerplay.pp_funcs->get_temperature((adev)->powerplay.pp_handle) : \ - (adev)->pm.funcs->get_temperature((adev)) + (adev)->pm.funcs->get_temperature((adev))) #define amdgpu_dpm_set_fan_control_mode(adev, m) \ - (adev)->pp_enabled ? \ + ((adev)->pp_enabled ? \ (adev)->powerplay.pp_funcs->set_fan_control_mode((adev)->powerplay.pp_handle, (m)) : \ - (adev)->pm.funcs->set_fan_control_mode((adev), (m)) + (adev)->pm.funcs->set_fan_control_mode((adev), (m))) #define amdgpu_dpm_get_fan_control_mode(adev) \ - (adev)->pp_enabled ? \ + ((adev)->pp_enabled ? \ (adev)->powerplay.pp_funcs->get_fan_control_mode((adev)->powerplay.pp_handle) : \ - (adev)->pm.funcs->get_fan_control_mode((adev)) + (adev)->pm.funcs->get_fan_control_mode((adev))) #define amdgpu_dpm_set_fan_speed_percent(adev, s) \ - (adev)->pp_enabled ? \ + ((adev)->pp_enabled ? \ (adev)->powerplay.pp_funcs->set_fan_speed_percent((adev)->powerplay.pp_handle, (s)) : \ - (adev)->pm.funcs->set_fan_speed_percent((adev), (s)) + (adev)->pm.funcs->set_fan_speed_percent((adev), (s))) #define amdgpu_dpm_get_fan_speed_percent(adev, s) \ - (adev)->pp_enabled ? \ + ((adev)->pp_enabled ? \ (adev)->powerplay.pp_funcs->get_fan_speed_percent((adev)->powerplay.pp_handle, (s)) : \ - (adev)->pm.funcs->get_fan_speed_percent((adev), (s)) + (adev)->pm.funcs->get_fan_speed_percent((adev), (s))) #define amdgpu_dpm_get_sclk(adev, l) \ - (adev)->pp_enabled ? \ + ((adev)->pp_enabled ? \ (adev)->powerplay.pp_funcs->get_sclk((adev)->powerplay.pp_handle, (l)) : \ - (adev)->pm.funcs->get_sclk((adev), (l)) + (adev)->pm.funcs->get_sclk((adev), (l))) #define amdgpu_dpm_get_mclk(adev, l) \ - (adev)->pp_enabled ? \ + ((adev)->pp_enabled ? \ (adev)->powerplay.pp_funcs->get_mclk((adev)->powerplay.pp_handle, (l)) : \ - (adev)->pm.funcs->get_mclk((adev), (l)) + (adev)->pm.funcs->get_mclk((adev), (l))) #define amdgpu_dpm_force_performance_level(adev, l) \ - (adev)->pp_enabled ? \ + ((adev)->pp_enabled ? \ (adev)->powerplay.pp_funcs->force_performance_level((adev)->powerplay.pp_handle, (l)) : \ - (adev)->pm.funcs->force_performance_level((adev), (l)) + (adev)->pm.funcs->force_performance_level((adev), (l))) #define amdgpu_dpm_powergate_uvd(adev, g) \ - (adev)->pp_enabled ? \ + ((adev)->pp_enabled ? \ (adev)->powerplay.pp_funcs->powergate_uvd((adev)->powerplay.pp_handle, (g)) : \ - (adev)->pm.funcs->powergate_uvd((adev), (g)) + (adev)->pm.funcs->powergate_uvd((adev), (g))) #define amdgpu_dpm_powergate_vce(adev, g) \ - (adev)->pp_enabled ? \ + ((adev)->pp_enabled ? \ (adev)->powerplay.pp_funcs->powergate_vce((adev)->powerplay.pp_handle, (g)) : \ - (adev)->pm.funcs->powergate_vce((adev), (g)) + (adev)->pm.funcs->powergate_vce((adev), (g))) #define amdgpu_dpm_debugfs_print_current_performance_level(adev, m) \ - (adev)->pp_enabled ? \ + ((adev)->pp_enabled ? \ (adev)->powerplay.pp_funcs->print_current_performance_level((adev)->powerplay.pp_handle, (m)) : \ - (adev)->pm.funcs->debugfs_print_current_performance_level((adev), (m)) + (adev)->pm.funcs->debugfs_print_current_performance_level((adev), (m))) #define amdgpu_dpm_get_current_power_state(adev) \ (adev)->powerplay.pp_funcs->get_current_power_state((adev)->powerplay.pp_handle) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c index 6f89f8e034d0..b882e8175615 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c @@ -478,9 +478,9 @@ static void amdgpu_cs_parser_fini(struct amdgpu_cs_parser *parser, int error, bo struct amdgpu_fpriv *fpriv = parser->filp->driver_priv; unsigned i; - amdgpu_vm_move_pt_bos_in_lru(parser->adev, &fpriv->vm); - if (!error) { + amdgpu_vm_move_pt_bos_in_lru(parser->adev, &fpriv->vm); + /* Sort the buffer list from the smallest to largest buffer, * which affects the order of buffers in the LRU list. * This assures that the smallest buffers are added first diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_fb.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_fb.c index cfb6caad2a73..919146780a15 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_fb.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_fb.c @@ -333,6 +333,10 @@ int amdgpu_fbdev_init(struct amdgpu_device *adev) if (!adev->mode_info.mode_config_initialized) return 0; + /* don't init fbdev if there are no connectors */ + if (list_empty(&adev->ddev->mode_config.connector_list)) + return 0; + /* select 8 bpp console on low vram cards */ if (adev->mc.real_vram_size <= (32*1024*1024)) bpp_sel = 8; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c index c3ce103b6a33..a2a16acee34d 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c @@ -399,7 +399,8 @@ int amdgpu_bo_pin_restricted(struct amdgpu_bo *bo, u32 domain, } if (fpfn > bo->placements[i].fpfn) bo->placements[i].fpfn = fpfn; - if (lpfn && lpfn < bo->placements[i].lpfn) + if (!bo->placements[i].lpfn || + (lpfn && lpfn < bo->placements[i].lpfn)) bo->placements[i].lpfn = lpfn; bo->placements[i].flags |= TTM_PL_FLAG_NO_EVICT; } diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_powerplay.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_powerplay.c index 5ee9a0690278..b9d0d55f6b47 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_powerplay.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_powerplay.c @@ -99,13 +99,24 @@ static int amdgpu_pp_early_init(void *handle) #ifdef CONFIG_DRM_AMD_POWERPLAY switch (adev->asic_type) { - case CHIP_TONGA: - case CHIP_FIJI: - adev->pp_enabled = (amdgpu_powerplay > 0) ? true : false; - break; - default: - adev->pp_enabled = (amdgpu_powerplay > 0) ? true : false; - break; + case CHIP_TONGA: + case CHIP_FIJI: + adev->pp_enabled = (amdgpu_powerplay == 0) ? false : true; + break; + case CHIP_CARRIZO: + case CHIP_STONEY: + adev->pp_enabled = (amdgpu_powerplay > 0) ? true : false; + break; + /* These chips don't have powerplay implemenations */ + case CHIP_BONAIRE: + case CHIP_HAWAII: + case CHIP_KABINI: + case CHIP_MULLINS: + case CHIP_KAVERI: + case CHIP_TOPAZ: + default: + adev->pp_enabled = false; + break; } #else adev->pp_enabled = false; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.c index 78e9b0f14661..d1f234dd2126 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.c @@ -487,7 +487,7 @@ static int amdgpu_debugfs_ring_info(struct seq_file *m, void *data) seq_printf(m, "rptr: 0x%08x [%5d]\n", rptr, rptr); - rptr_next = ~0; + rptr_next = le32_to_cpu(*ring->next_rptr_cpu_addr); seq_printf(m, "driver's copy of the wptr: 0x%08x [%5d]\n", ring->wptr, ring->wptr); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c index aefc668e6b5d..9599f7559b3d 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c @@ -1282,7 +1282,7 @@ int amdgpu_vm_init(struct amdgpu_device *adev, struct amdgpu_vm *vm) { const unsigned align = min(AMDGPU_VM_PTB_ALIGN_SIZE, AMDGPU_VM_PTE_COUNT * 8); - unsigned pd_size, pd_entries, pts_size; + unsigned pd_size, pd_entries; int i, r; for (i = 0; i < AMDGPU_MAX_RINGS; ++i) { @@ -1300,8 +1300,7 @@ int amdgpu_vm_init(struct amdgpu_device *adev, struct amdgpu_vm *vm) pd_entries = amdgpu_vm_num_pdes(adev); /* allocate page table array */ - pts_size = pd_entries * sizeof(struct amdgpu_vm_pt); - vm->page_tables = kzalloc(pts_size, GFP_KERNEL); + vm->page_tables = drm_calloc_large(pd_entries, sizeof(struct amdgpu_vm_pt)); if (vm->page_tables == NULL) { DRM_ERROR("Cannot allocate memory for page table array\n"); return -ENOMEM; @@ -1361,7 +1360,7 @@ void amdgpu_vm_fini(struct amdgpu_device *adev, struct amdgpu_vm *vm) for (i = 0; i < amdgpu_vm_num_pdes(adev); i++) amdgpu_bo_unref(&vm->page_tables[i].entry.robj); - kfree(vm->page_tables); + drm_free_large(vm->page_tables); amdgpu_bo_unref(&vm->page_directory); fence_put(vm->page_directory_fence); diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c index 13235d84e5a6..95c0cdfbd1b3 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c @@ -4186,7 +4186,18 @@ static int gfx_v8_0_soft_reset(void *handle) gfx_v8_0_cp_gfx_enable(adev, false); /* Disable MEC parsing/prefetching */ - /* XXX todo */ + gfx_v8_0_cp_compute_enable(adev, false); + + if (grbm_soft_reset || srbm_soft_reset) { + tmp = RREG32(mmGMCON_DEBUG); + tmp = REG_SET_FIELD(tmp, + GMCON_DEBUG, GFX_STALL, 1); + tmp = REG_SET_FIELD(tmp, + GMCON_DEBUG, GFX_CLEAR, 1); + WREG32(mmGMCON_DEBUG, tmp); + + udelay(50); + } if (grbm_soft_reset) { tmp = RREG32(mmGRBM_SOFT_RESET); @@ -4215,6 +4226,16 @@ static int gfx_v8_0_soft_reset(void *handle) WREG32(mmSRBM_SOFT_RESET, tmp); tmp = RREG32(mmSRBM_SOFT_RESET); } + + if (grbm_soft_reset || srbm_soft_reset) { + tmp = RREG32(mmGMCON_DEBUG); + tmp = REG_SET_FIELD(tmp, + GMCON_DEBUG, GFX_STALL, 0); + tmp = REG_SET_FIELD(tmp, + GMCON_DEBUG, GFX_CLEAR, 0); + WREG32(mmGMCON_DEBUG, tmp); + } + /* Wait a little for things to settle down */ udelay(50); gfx_v8_0_print_status((void *)adev); diff --git a/drivers/gpu/drm/amd/amdgpu/tonga_dpm.c b/drivers/gpu/drm/amd/amdgpu/tonga_dpm.c index f4a1346525fe..0497784b3652 100644 --- a/drivers/gpu/drm/amd/amdgpu/tonga_dpm.c +++ b/drivers/gpu/drm/amd/amdgpu/tonga_dpm.c @@ -122,25 +122,12 @@ static int tonga_dpm_hw_fini(void *handle) static int tonga_dpm_suspend(void *handle) { - return 0; + return tonga_dpm_hw_fini(handle); } static int tonga_dpm_resume(void *handle) { - int ret; - struct amdgpu_device *adev = (struct amdgpu_device *)handle; - - mutex_lock(&adev->pm.mutex); - - ret = tonga_smu_start(adev); - if (ret) { - DRM_ERROR("SMU start failed\n"); - goto fail; - } - -fail: - mutex_unlock(&adev->pm.mutex); - return ret; + return tonga_dpm_hw_init(handle); } static int tonga_dpm_set_clockgating_state(void *handle, diff --git a/drivers/gpu/drm/amd/powerplay/amd_powerplay.c b/drivers/gpu/drm/amd/powerplay/amd_powerplay.c index 8f5d5edcf193..aa67244a77ae 100644 --- a/drivers/gpu/drm/amd/powerplay/amd_powerplay.c +++ b/drivers/gpu/drm/amd/powerplay/amd_powerplay.c @@ -64,6 +64,11 @@ static int pp_sw_init(void *handle) if (ret == 0) ret = hwmgr->hwmgr_func->backend_init(hwmgr); + if (ret) + printk("amdgpu: powerplay initialization failed\n"); + else + printk("amdgpu: powerplay initialized\n"); + return ret; } diff --git a/drivers/gpu/drm/amd/powerplay/smumgr/cz_smumgr.c b/drivers/gpu/drm/amd/powerplay/smumgr/cz_smumgr.c index 873a8d264d5c..ec222c665602 100644 --- a/drivers/gpu/drm/amd/powerplay/smumgr/cz_smumgr.c +++ b/drivers/gpu/drm/amd/powerplay/smumgr/cz_smumgr.c @@ -272,6 +272,9 @@ static int cz_start_smu(struct pp_smumgr *smumgr) UCODE_ID_CP_MEC_JT1_MASK | UCODE_ID_CP_MEC_JT2_MASK; + if (smumgr->chip_id == CHIP_STONEY) + fw_to_check &= ~(UCODE_ID_SDMA1_MASK | UCODE_ID_CP_MEC_JT2_MASK); + cz_request_smu_load_fw(smumgr); cz_check_fw_load_finish(smumgr, fw_to_check); @@ -282,7 +285,7 @@ static int cz_start_smu(struct pp_smumgr *smumgr) return ret; } -static uint8_t cz_translate_firmware_enum_to_arg( +static uint8_t cz_translate_firmware_enum_to_arg(struct pp_smumgr *smumgr, enum cz_scratch_entry firmware_enum) { uint8_t ret = 0; @@ -292,7 +295,10 @@ static uint8_t cz_translate_firmware_enum_to_arg( ret = UCODE_ID_SDMA0; break; case CZ_SCRATCH_ENTRY_UCODE_ID_SDMA1: - ret = UCODE_ID_SDMA1; + if (smumgr->chip_id == CHIP_STONEY) + ret = UCODE_ID_SDMA0; + else + ret = UCODE_ID_SDMA1; break; case CZ_SCRATCH_ENTRY_UCODE_ID_CP_CE: ret = UCODE_ID_CP_CE; @@ -307,7 +313,10 @@ static uint8_t cz_translate_firmware_enum_to_arg( ret = UCODE_ID_CP_MEC_JT1; break; case CZ_SCRATCH_ENTRY_UCODE_ID_CP_MEC_JT2: - ret = UCODE_ID_CP_MEC_JT2; + if (smumgr->chip_id == CHIP_STONEY) + ret = UCODE_ID_CP_MEC_JT1; + else + ret = UCODE_ID_CP_MEC_JT2; break; case CZ_SCRATCH_ENTRY_UCODE_ID_GMCON_RENG: ret = UCODE_ID_GMCON_RENG; @@ -396,7 +405,7 @@ static int cz_smu_populate_single_scratch_task( struct SMU_Task *task = &toc->tasks[cz_smu->toc_entry_used_count++]; task->type = type; - task->arg = cz_translate_firmware_enum_to_arg(fw_enum); + task->arg = cz_translate_firmware_enum_to_arg(smumgr, fw_enum); task->next = is_last ? END_OF_TASK_LIST : cz_smu->toc_entry_used_count; for (i = 0; i < cz_smu->scratch_buffer_length; i++) @@ -433,7 +442,7 @@ static int cz_smu_populate_single_ucode_load_task( struct SMU_Task *task = &toc->tasks[cz_smu->toc_entry_used_count++]; task->type = TASK_TYPE_UCODE_LOAD; - task->arg = cz_translate_firmware_enum_to_arg(fw_enum); + task->arg = cz_translate_firmware_enum_to_arg(smumgr, fw_enum); task->next = is_last ? END_OF_TASK_LIST : cz_smu->toc_entry_used_count; for (i = 0; i < cz_smu->driver_buffer_length; i++) @@ -509,8 +518,14 @@ static int cz_smu_construct_toc_for_vddgfx_exit(struct pp_smumgr *smumgr) CZ_SCRATCH_ENTRY_UCODE_ID_CP_ME, false); cz_smu_populate_single_ucode_load_task(smumgr, CZ_SCRATCH_ENTRY_UCODE_ID_CP_MEC_JT1, false); - cz_smu_populate_single_ucode_load_task(smumgr, + + if (smumgr->chip_id == CHIP_STONEY) + cz_smu_populate_single_ucode_load_task(smumgr, + CZ_SCRATCH_ENTRY_UCODE_ID_CP_MEC_JT1, false); + else + cz_smu_populate_single_ucode_load_task(smumgr, CZ_SCRATCH_ENTRY_UCODE_ID_CP_MEC_JT2, false); + cz_smu_populate_single_ucode_load_task(smumgr, CZ_SCRATCH_ENTRY_UCODE_ID_RLC_G, false); @@ -551,7 +566,11 @@ static int cz_smu_construct_toc_for_bootup(struct pp_smumgr *smumgr) cz_smu_populate_single_ucode_load_task(smumgr, CZ_SCRATCH_ENTRY_UCODE_ID_SDMA0, false); - cz_smu_populate_single_ucode_load_task(smumgr, + if (smumgr->chip_id == CHIP_STONEY) + cz_smu_populate_single_ucode_load_task(smumgr, + CZ_SCRATCH_ENTRY_UCODE_ID_SDMA0, false); + else + cz_smu_populate_single_ucode_load_task(smumgr, CZ_SCRATCH_ENTRY_UCODE_ID_SDMA1, false); cz_smu_populate_single_ucode_load_task(smumgr, CZ_SCRATCH_ENTRY_UCODE_ID_CP_CE, false); @@ -561,7 +580,11 @@ static int cz_smu_construct_toc_for_bootup(struct pp_smumgr *smumgr) CZ_SCRATCH_ENTRY_UCODE_ID_CP_ME, false); cz_smu_populate_single_ucode_load_task(smumgr, CZ_SCRATCH_ENTRY_UCODE_ID_CP_MEC_JT1, false); - cz_smu_populate_single_ucode_load_task(smumgr, + if (smumgr->chip_id == CHIP_STONEY) + cz_smu_populate_single_ucode_load_task(smumgr, + CZ_SCRATCH_ENTRY_UCODE_ID_CP_MEC_JT1, false); + else + cz_smu_populate_single_ucode_load_task(smumgr, CZ_SCRATCH_ENTRY_UCODE_ID_CP_MEC_JT2, false); cz_smu_populate_single_ucode_load_task(smumgr, CZ_SCRATCH_ENTRY_UCODE_ID_RLC_G, true); @@ -618,7 +641,7 @@ static int cz_smu_populate_firmware_entries(struct pp_smumgr *smumgr) for (i = 0; i < sizeof(firmware_list)/sizeof(*firmware_list); i++) { - firmware_type = cz_translate_firmware_enum_to_arg( + firmware_type = cz_translate_firmware_enum_to_arg(smumgr, firmware_list[i]); ucode_id = cz_convert_fw_type_to_cgs(firmware_type); diff --git a/drivers/gpu/drm/drm_atomic_helper.c b/drivers/gpu/drm/drm_atomic_helper.c index 57cccd68ca52..7c523060a076 100644 --- a/drivers/gpu/drm/drm_atomic_helper.c +++ b/drivers/gpu/drm/drm_atomic_helper.c @@ -946,9 +946,23 @@ static void wait_for_fences(struct drm_device *dev, } } -static bool framebuffer_changed(struct drm_device *dev, - struct drm_atomic_state *old_state, - struct drm_crtc *crtc) +/** + * drm_atomic_helper_framebuffer_changed - check if framebuffer has changed + * @dev: DRM device + * @old_state: atomic state object with old state structures + * @crtc: DRM crtc + * + * Checks whether the framebuffer used for this CRTC changes as a result of + * the atomic update. This is useful for drivers which cannot use + * drm_atomic_helper_wait_for_vblanks() and need to reimplement its + * functionality. + * + * Returns: + * true if the framebuffer changed. + */ +bool drm_atomic_helper_framebuffer_changed(struct drm_device *dev, + struct drm_atomic_state *old_state, + struct drm_crtc *crtc) { struct drm_plane *plane; struct drm_plane_state *old_plane_state; @@ -965,6 +979,7 @@ static bool framebuffer_changed(struct drm_device *dev, return false; } +EXPORT_SYMBOL(drm_atomic_helper_framebuffer_changed); /** * drm_atomic_helper_wait_for_vblanks - wait for vblank on crtcs @@ -999,7 +1014,8 @@ drm_atomic_helper_wait_for_vblanks(struct drm_device *dev, if (old_state->legacy_cursor_update) continue; - if (!framebuffer_changed(dev, old_state, crtc)) + if (!drm_atomic_helper_framebuffer_changed(dev, + old_state, crtc)) continue; ret = drm_crtc_vblank_get(crtc); diff --git a/drivers/gpu/drm/etnaviv/common.xml.h b/drivers/gpu/drm/etnaviv/common.xml.h index 9e585d51fb78..e881482b5971 100644 --- a/drivers/gpu/drm/etnaviv/common.xml.h +++ b/drivers/gpu/drm/etnaviv/common.xml.h @@ -8,8 +8,8 @@ http://0x04.net/cgit/index.cgi/rules-ng-ng git clone git://0x04.net/rules-ng-ng The rules-ng-ng source files this header was generated from are: -- state_vg.xml ( 5973 bytes, from 2015-03-25 11:26:01) -- common.xml ( 18437 bytes, from 2015-03-25 11:27:41) +- state_hi.xml ( 24309 bytes, from 2015-12-12 09:02:53) +- common.xml ( 18379 bytes, from 2015-12-12 09:02:53) Copyright (C) 2015 */ @@ -30,15 +30,19 @@ Copyright (C) 2015 #define ENDIAN_MODE_NO_SWAP 0x00000000 #define ENDIAN_MODE_SWAP_16 0x00000001 #define ENDIAN_MODE_SWAP_32 0x00000002 +#define chipModel_GC200 0x00000200 #define chipModel_GC300 0x00000300 #define chipModel_GC320 0x00000320 +#define chipModel_GC328 0x00000328 #define chipModel_GC350 0x00000350 #define chipModel_GC355 0x00000355 #define chipModel_GC400 0x00000400 #define chipModel_GC410 0x00000410 #define chipModel_GC420 0x00000420 +#define chipModel_GC428 0x00000428 #define chipModel_GC450 0x00000450 #define chipModel_GC500 0x00000500 +#define chipModel_GC520 0x00000520 #define chipModel_GC530 0x00000530 #define chipModel_GC600 0x00000600 #define chipModel_GC700 0x00000700 @@ -46,9 +50,16 @@ Copyright (C) 2015 #define chipModel_GC860 0x00000860 #define chipModel_GC880 0x00000880 #define chipModel_GC1000 0x00001000 +#define chipModel_GC1500 0x00001500 #define chipModel_GC2000 0x00002000 #define chipModel_GC2100 0x00002100 +#define chipModel_GC2200 0x00002200 +#define chipModel_GC2500 0x00002500 +#define chipModel_GC3000 0x00003000 #define chipModel_GC4000 0x00004000 +#define chipModel_GC5000 0x00005000 +#define chipModel_GC5200 0x00005200 +#define chipModel_GC6400 0x00006400 #define RGBA_BITS_R 0x00000001 #define RGBA_BITS_G 0x00000002 #define RGBA_BITS_B 0x00000004 @@ -160,7 +171,7 @@ Copyright (C) 2015 #define chipMinorFeatures2_UNK8 0x00000100 #define chipMinorFeatures2_UNK9 0x00000200 #define chipMinorFeatures2_UNK10 0x00000400 -#define chipMinorFeatures2_SAMPLERBASE_16 0x00000800 +#define chipMinorFeatures2_HALTI1 0x00000800 #define chipMinorFeatures2_UNK12 0x00001000 #define chipMinorFeatures2_UNK13 0x00002000 #define chipMinorFeatures2_UNK14 0x00004000 @@ -189,7 +200,7 @@ Copyright (C) 2015 #define chipMinorFeatures3_UNK5 0x00000020 #define chipMinorFeatures3_UNK6 0x00000040 #define chipMinorFeatures3_UNK7 0x00000080 -#define chipMinorFeatures3_UNK8 0x00000100 +#define chipMinorFeatures3_FAST_MSAA 0x00000100 #define chipMinorFeatures3_UNK9 0x00000200 #define chipMinorFeatures3_BUG_FIXES10 0x00000400 #define chipMinorFeatures3_UNK11 0x00000800 @@ -199,7 +210,7 @@ Copyright (C) 2015 #define chipMinorFeatures3_UNK15 0x00008000 #define chipMinorFeatures3_UNK16 0x00010000 #define chipMinorFeatures3_UNK17 0x00020000 -#define chipMinorFeatures3_UNK18 0x00040000 +#define chipMinorFeatures3_ACE 0x00040000 #define chipMinorFeatures3_UNK19 0x00080000 #define chipMinorFeatures3_UNK20 0x00100000 #define chipMinorFeatures3_UNK21 0x00200000 @@ -207,7 +218,7 @@ Copyright (C) 2015 #define chipMinorFeatures3_UNK23 0x00800000 #define chipMinorFeatures3_UNK24 0x01000000 #define chipMinorFeatures3_UNK25 0x02000000 -#define chipMinorFeatures3_UNK26 0x04000000 +#define chipMinorFeatures3_NEW_HZ 0x04000000 #define chipMinorFeatures3_UNK27 0x08000000 #define chipMinorFeatures3_UNK28 0x10000000 #define chipMinorFeatures3_UNK29 0x20000000 @@ -229,9 +240,9 @@ Copyright (C) 2015 #define chipMinorFeatures4_UNK13 0x00002000 #define chipMinorFeatures4_UNK14 0x00004000 #define chipMinorFeatures4_UNK15 0x00008000 -#define chipMinorFeatures4_UNK16 0x00010000 +#define chipMinorFeatures4_HALTI2 0x00010000 #define chipMinorFeatures4_UNK17 0x00020000 -#define chipMinorFeatures4_UNK18 0x00040000 +#define chipMinorFeatures4_SMALL_MSAA 0x00040000 #define chipMinorFeatures4_UNK19 0x00080000 #define chipMinorFeatures4_UNK20 0x00100000 #define chipMinorFeatures4_UNK21 0x00200000 @@ -245,5 +256,37 @@ Copyright (C) 2015 #define chipMinorFeatures4_UNK29 0x20000000 #define chipMinorFeatures4_UNK30 0x40000000 #define chipMinorFeatures4_UNK31 0x80000000 +#define chipMinorFeatures5_UNK0 0x00000001 +#define chipMinorFeatures5_UNK1 0x00000002 +#define chipMinorFeatures5_UNK2 0x00000004 +#define chipMinorFeatures5_UNK3 0x00000008 +#define chipMinorFeatures5_UNK4 0x00000010 +#define chipMinorFeatures5_UNK5 0x00000020 +#define chipMinorFeatures5_UNK6 0x00000040 +#define chipMinorFeatures5_UNK7 0x00000080 +#define chipMinorFeatures5_UNK8 0x00000100 +#define chipMinorFeatures5_HALTI3 0x00000200 +#define chipMinorFeatures5_UNK10 0x00000400 +#define chipMinorFeatures5_UNK11 0x00000800 +#define chipMinorFeatures5_UNK12 0x00001000 +#define chipMinorFeatures5_UNK13 0x00002000 +#define chipMinorFeatures5_UNK14 0x00004000 +#define chipMinorFeatures5_UNK15 0x00008000 +#define chipMinorFeatures5_UNK16 0x00010000 +#define chipMinorFeatures5_UNK17 0x00020000 +#define chipMinorFeatures5_UNK18 0x00040000 +#define chipMinorFeatures5_UNK19 0x00080000 +#define chipMinorFeatures5_UNK20 0x00100000 +#define chipMinorFeatures5_UNK21 0x00200000 +#define chipMinorFeatures5_UNK22 0x00400000 +#define chipMinorFeatures5_UNK23 0x00800000 +#define chipMinorFeatures5_UNK24 0x01000000 +#define chipMinorFeatures5_UNK25 0x02000000 +#define chipMinorFeatures5_UNK26 0x04000000 +#define chipMinorFeatures5_UNK27 0x08000000 +#define chipMinorFeatures5_UNK28 0x10000000 +#define chipMinorFeatures5_UNK29 0x20000000 +#define chipMinorFeatures5_UNK30 0x40000000 +#define chipMinorFeatures5_UNK31 0x80000000 #endif /* COMMON_XML */ diff --git a/drivers/gpu/drm/etnaviv/etnaviv_drv.c b/drivers/gpu/drm/etnaviv/etnaviv_drv.c index 5c89ebb52fd2..e8858985f01e 100644 --- a/drivers/gpu/drm/etnaviv/etnaviv_drv.c +++ b/drivers/gpu/drm/etnaviv/etnaviv_drv.c @@ -668,7 +668,6 @@ static struct platform_driver etnaviv_platform_driver = { .probe = etnaviv_pdev_probe, .remove = etnaviv_pdev_remove, .driver = { - .owner = THIS_MODULE, .name = "etnaviv", .of_match_table = dt_match, }, diff --git a/drivers/gpu/drm/etnaviv/etnaviv_drv.h b/drivers/gpu/drm/etnaviv/etnaviv_drv.h index d6bd438bd5be..1cd6046e76b1 100644 --- a/drivers/gpu/drm/etnaviv/etnaviv_drv.h +++ b/drivers/gpu/drm/etnaviv/etnaviv_drv.h @@ -85,7 +85,7 @@ struct drm_gem_object *etnaviv_gem_prime_import_sg_table(struct drm_device *dev, struct dma_buf_attachment *attach, struct sg_table *sg); int etnaviv_gem_prime_pin(struct drm_gem_object *obj); void etnaviv_gem_prime_unpin(struct drm_gem_object *obj); -void *etnaviv_gem_vaddr(struct drm_gem_object *obj); +void *etnaviv_gem_vmap(struct drm_gem_object *obj); int etnaviv_gem_cpu_prep(struct drm_gem_object *obj, u32 op, struct timespec *timeout); int etnaviv_gem_cpu_fini(struct drm_gem_object *obj); diff --git a/drivers/gpu/drm/etnaviv/etnaviv_dump.c b/drivers/gpu/drm/etnaviv/etnaviv_dump.c index bf8fa859e8be..4a29eeadbf1e 100644 --- a/drivers/gpu/drm/etnaviv/etnaviv_dump.c +++ b/drivers/gpu/drm/etnaviv/etnaviv_dump.c @@ -201,7 +201,9 @@ void etnaviv_core_dump(struct etnaviv_gpu *gpu) obj = vram->object; + mutex_lock(&obj->lock); pages = etnaviv_gem_get_pages(obj); + mutex_unlock(&obj->lock); if (pages) { int j; @@ -213,8 +215,8 @@ void etnaviv_core_dump(struct etnaviv_gpu *gpu) iter.hdr->iova = cpu_to_le64(vram->iova); - vaddr = etnaviv_gem_vaddr(&obj->base); - if (vaddr && !IS_ERR(vaddr)) + vaddr = etnaviv_gem_vmap(&obj->base); + if (vaddr) memcpy(iter.data, vaddr, obj->base.size); etnaviv_core_dump_header(&iter, ETDUMP_BUF_BO, iter.data + diff --git a/drivers/gpu/drm/etnaviv/etnaviv_gem.c b/drivers/gpu/drm/etnaviv/etnaviv_gem.c index 9f77c3b94cc6..4b519e4309b2 100644 --- a/drivers/gpu/drm/etnaviv/etnaviv_gem.c +++ b/drivers/gpu/drm/etnaviv/etnaviv_gem.c @@ -353,25 +353,39 @@ void etnaviv_gem_put_iova(struct etnaviv_gpu *gpu, struct drm_gem_object *obj) drm_gem_object_unreference_unlocked(obj); } -void *etnaviv_gem_vaddr(struct drm_gem_object *obj) +void *etnaviv_gem_vmap(struct drm_gem_object *obj) { struct etnaviv_gem_object *etnaviv_obj = to_etnaviv_bo(obj); - mutex_lock(&etnaviv_obj->lock); - if (!etnaviv_obj->vaddr) { - struct page **pages = etnaviv_gem_get_pages(etnaviv_obj); - - if (IS_ERR(pages)) - return ERR_CAST(pages); + if (etnaviv_obj->vaddr) + return etnaviv_obj->vaddr; - etnaviv_obj->vaddr = vmap(pages, obj->size >> PAGE_SHIFT, - VM_MAP, pgprot_writecombine(PAGE_KERNEL)); - } + mutex_lock(&etnaviv_obj->lock); + /* + * Need to check again, as we might have raced with another thread + * while waiting for the mutex. + */ + if (!etnaviv_obj->vaddr) + etnaviv_obj->vaddr = etnaviv_obj->ops->vmap(etnaviv_obj); mutex_unlock(&etnaviv_obj->lock); return etnaviv_obj->vaddr; } +static void *etnaviv_gem_vmap_impl(struct etnaviv_gem_object *obj) +{ + struct page **pages; + + lockdep_assert_held(&obj->lock); + + pages = etnaviv_gem_get_pages(obj); + if (IS_ERR(pages)) + return NULL; + + return vmap(pages, obj->base.size >> PAGE_SHIFT, + VM_MAP, pgprot_writecombine(PAGE_KERNEL)); +} + static inline enum dma_data_direction etnaviv_op_to_dma_dir(u32 op) { if (op & ETNA_PREP_READ) @@ -522,6 +536,7 @@ static void etnaviv_gem_shmem_release(struct etnaviv_gem_object *etnaviv_obj) static const struct etnaviv_gem_ops etnaviv_gem_shmem_ops = { .get_pages = etnaviv_gem_shmem_get_pages, .release = etnaviv_gem_shmem_release, + .vmap = etnaviv_gem_vmap_impl, }; void etnaviv_gem_free_object(struct drm_gem_object *obj) @@ -866,6 +881,7 @@ static void etnaviv_gem_userptr_release(struct etnaviv_gem_object *etnaviv_obj) static const struct etnaviv_gem_ops etnaviv_gem_userptr_ops = { .get_pages = etnaviv_gem_userptr_get_pages, .release = etnaviv_gem_userptr_release, + .vmap = etnaviv_gem_vmap_impl, }; int etnaviv_gem_new_userptr(struct drm_device *dev, struct drm_file *file, diff --git a/drivers/gpu/drm/etnaviv/etnaviv_gem.h b/drivers/gpu/drm/etnaviv/etnaviv_gem.h index a300b4b3d545..ab5df8147a5f 100644 --- a/drivers/gpu/drm/etnaviv/etnaviv_gem.h +++ b/drivers/gpu/drm/etnaviv/etnaviv_gem.h @@ -78,6 +78,7 @@ struct etnaviv_gem_object *to_etnaviv_bo(struct drm_gem_object *obj) struct etnaviv_gem_ops { int (*get_pages)(struct etnaviv_gem_object *); void (*release)(struct etnaviv_gem_object *); + void *(*vmap)(struct etnaviv_gem_object *); }; static inline bool is_active(struct etnaviv_gem_object *etnaviv_obj) diff --git a/drivers/gpu/drm/etnaviv/etnaviv_gem_prime.c b/drivers/gpu/drm/etnaviv/etnaviv_gem_prime.c index e94db4f95770..4e67395f5fa1 100644 --- a/drivers/gpu/drm/etnaviv/etnaviv_gem_prime.c +++ b/drivers/gpu/drm/etnaviv/etnaviv_gem_prime.c @@ -31,7 +31,7 @@ struct sg_table *etnaviv_gem_prime_get_sg_table(struct drm_gem_object *obj) void *etnaviv_gem_prime_vmap(struct drm_gem_object *obj) { - return etnaviv_gem_vaddr(obj); + return etnaviv_gem_vmap(obj); } void etnaviv_gem_prime_vunmap(struct drm_gem_object *obj, void *vaddr) @@ -77,9 +77,17 @@ static void etnaviv_gem_prime_release(struct etnaviv_gem_object *etnaviv_obj) drm_prime_gem_destroy(&etnaviv_obj->base, etnaviv_obj->sgt); } +static void *etnaviv_gem_prime_vmap_impl(struct etnaviv_gem_object *etnaviv_obj) +{ + lockdep_assert_held(&etnaviv_obj->lock); + + return dma_buf_vmap(etnaviv_obj->base.import_attach->dmabuf); +} + static const struct etnaviv_gem_ops etnaviv_gem_prime_ops = { /* .get_pages should never be called */ .release = etnaviv_gem_prime_release, + .vmap = etnaviv_gem_prime_vmap_impl, }; struct drm_gem_object *etnaviv_gem_prime_import_sg_table(struct drm_device *dev, diff --git a/drivers/gpu/drm/etnaviv/etnaviv_gpu.c b/drivers/gpu/drm/etnaviv/etnaviv_gpu.c index 056a72e6ed26..a33162cf4f4c 100644 --- a/drivers/gpu/drm/etnaviv/etnaviv_gpu.c +++ b/drivers/gpu/drm/etnaviv/etnaviv_gpu.c @@ -72,6 +72,14 @@ int etnaviv_gpu_get_param(struct etnaviv_gpu *gpu, u32 param, u64 *value) *value = gpu->identity.minor_features3; break; + case ETNAVIV_PARAM_GPU_FEATURES_5: + *value = gpu->identity.minor_features4; + break; + + case ETNAVIV_PARAM_GPU_FEATURES_6: + *value = gpu->identity.minor_features5; + break; + case ETNAVIV_PARAM_GPU_STREAM_COUNT: *value = gpu->identity.stream_count; break; @@ -112,6 +120,10 @@ int etnaviv_gpu_get_param(struct etnaviv_gpu *gpu, u32 param, u64 *value) *value = gpu->identity.num_constants; break; + case ETNAVIV_PARAM_GPU_NUM_VARYINGS: + *value = gpu->identity.varyings_count; + break; + default: DBG("%s: invalid param: %u", dev_name(gpu->dev), param); return -EINVAL; @@ -120,46 +132,56 @@ int etnaviv_gpu_get_param(struct etnaviv_gpu *gpu, u32 param, u64 *value) return 0; } + +#define etnaviv_is_model_rev(gpu, mod, rev) \ + ((gpu)->identity.model == chipModel_##mod && \ + (gpu)->identity.revision == rev) +#define etnaviv_field(val, field) \ + (((val) & field##__MASK) >> field##__SHIFT) + static void etnaviv_hw_specs(struct etnaviv_gpu *gpu) { if (gpu->identity.minor_features0 & chipMinorFeatures0_MORE_MINOR_FEATURES) { - u32 specs[2]; + u32 specs[4]; + unsigned int streams; specs[0] = gpu_read(gpu, VIVS_HI_CHIP_SPECS); specs[1] = gpu_read(gpu, VIVS_HI_CHIP_SPECS_2); - - gpu->identity.stream_count = - (specs[0] & VIVS_HI_CHIP_SPECS_STREAM_COUNT__MASK) - >> VIVS_HI_CHIP_SPECS_STREAM_COUNT__SHIFT; - gpu->identity.register_max = - (specs[0] & VIVS_HI_CHIP_SPECS_REGISTER_MAX__MASK) - >> VIVS_HI_CHIP_SPECS_REGISTER_MAX__SHIFT; - gpu->identity.thread_count = - (specs[0] & VIVS_HI_CHIP_SPECS_THREAD_COUNT__MASK) - >> VIVS_HI_CHIP_SPECS_THREAD_COUNT__SHIFT; - gpu->identity.vertex_cache_size = - (specs[0] & VIVS_HI_CHIP_SPECS_VERTEX_CACHE_SIZE__MASK) - >> VIVS_HI_CHIP_SPECS_VERTEX_CACHE_SIZE__SHIFT; - gpu->identity.shader_core_count = - (specs[0] & VIVS_HI_CHIP_SPECS_SHADER_CORE_COUNT__MASK) - >> VIVS_HI_CHIP_SPECS_SHADER_CORE_COUNT__SHIFT; - gpu->identity.pixel_pipes = - (specs[0] & VIVS_HI_CHIP_SPECS_PIXEL_PIPES__MASK) - >> VIVS_HI_CHIP_SPECS_PIXEL_PIPES__SHIFT; + specs[2] = gpu_read(gpu, VIVS_HI_CHIP_SPECS_3); + specs[3] = gpu_read(gpu, VIVS_HI_CHIP_SPECS_4); + + gpu->identity.stream_count = etnaviv_field(specs[0], + VIVS_HI_CHIP_SPECS_STREAM_COUNT); + gpu->identity.register_max = etnaviv_field(specs[0], + VIVS_HI_CHIP_SPECS_REGISTER_MAX); + gpu->identity.thread_count = etnaviv_field(specs[0], + VIVS_HI_CHIP_SPECS_THREAD_COUNT); + gpu->identity.vertex_cache_size = etnaviv_field(specs[0], + VIVS_HI_CHIP_SPECS_VERTEX_CACHE_SIZE); + gpu->identity.shader_core_count = etnaviv_field(specs[0], + VIVS_HI_CHIP_SPECS_SHADER_CORE_COUNT); + gpu->identity.pixel_pipes = etnaviv_field(specs[0], + VIVS_HI_CHIP_SPECS_PIXEL_PIPES); gpu->identity.vertex_output_buffer_size = - (specs[0] & VIVS_HI_CHIP_SPECS_VERTEX_OUTPUT_BUFFER_SIZE__MASK) - >> VIVS_HI_CHIP_SPECS_VERTEX_OUTPUT_BUFFER_SIZE__SHIFT; - - gpu->identity.buffer_size = - (specs[1] & VIVS_HI_CHIP_SPECS_2_BUFFER_SIZE__MASK) - >> VIVS_HI_CHIP_SPECS_2_BUFFER_SIZE__SHIFT; - gpu->identity.instruction_count = - (specs[1] & VIVS_HI_CHIP_SPECS_2_INSTRUCTION_COUNT__MASK) - >> VIVS_HI_CHIP_SPECS_2_INSTRUCTION_COUNT__SHIFT; - gpu->identity.num_constants = - (specs[1] & VIVS_HI_CHIP_SPECS_2_NUM_CONSTANTS__MASK) - >> VIVS_HI_CHIP_SPECS_2_NUM_CONSTANTS__SHIFT; + etnaviv_field(specs[0], + VIVS_HI_CHIP_SPECS_VERTEX_OUTPUT_BUFFER_SIZE); + + gpu->identity.buffer_size = etnaviv_field(specs[1], + VIVS_HI_CHIP_SPECS_2_BUFFER_SIZE); + gpu->identity.instruction_count = etnaviv_field(specs[1], + VIVS_HI_CHIP_SPECS_2_INSTRUCTION_COUNT); + gpu->identity.num_constants = etnaviv_field(specs[1], + VIVS_HI_CHIP_SPECS_2_NUM_CONSTANTS); + + gpu->identity.varyings_count = etnaviv_field(specs[2], + VIVS_HI_CHIP_SPECS_3_VARYINGS_COUNT); + + /* This overrides the value from older register if non-zero */ + streams = etnaviv_field(specs[3], + VIVS_HI_CHIP_SPECS_4_STREAM_COUNT); + if (streams) + gpu->identity.stream_count = streams; } /* Fill in the stream count if not specified */ @@ -173,7 +195,7 @@ static void etnaviv_hw_specs(struct etnaviv_gpu *gpu) /* Convert the register max value */ if (gpu->identity.register_max) gpu->identity.register_max = 1 << gpu->identity.register_max; - else if (gpu->identity.model == 0x0400) + else if (gpu->identity.model == chipModel_GC400) gpu->identity.register_max = 32; else gpu->identity.register_max = 64; @@ -181,10 +203,10 @@ static void etnaviv_hw_specs(struct etnaviv_gpu *gpu) /* Convert thread count */ if (gpu->identity.thread_count) gpu->identity.thread_count = 1 << gpu->identity.thread_count; - else if (gpu->identity.model == 0x0400) + else if (gpu->identity.model == chipModel_GC400) gpu->identity.thread_count = 64; - else if (gpu->identity.model == 0x0500 || - gpu->identity.model == 0x0530) + else if (gpu->identity.model == chipModel_GC500 || + gpu->identity.model == chipModel_GC530) gpu->identity.thread_count = 128; else gpu->identity.thread_count = 256; @@ -206,7 +228,7 @@ static void etnaviv_hw_specs(struct etnaviv_gpu *gpu) if (gpu->identity.vertex_output_buffer_size) { gpu->identity.vertex_output_buffer_size = 1 << gpu->identity.vertex_output_buffer_size; - } else if (gpu->identity.model == 0x0400) { + } else if (gpu->identity.model == chipModel_GC400) { if (gpu->identity.revision < 0x4000) gpu->identity.vertex_output_buffer_size = 512; else if (gpu->identity.revision < 0x4200) @@ -219,9 +241,8 @@ static void etnaviv_hw_specs(struct etnaviv_gpu *gpu) switch (gpu->identity.instruction_count) { case 0: - if ((gpu->identity.model == 0x2000 && - gpu->identity.revision == 0x5108) || - gpu->identity.model == 0x880) + if (etnaviv_is_model_rev(gpu, GC2000, 0x5108) || + gpu->identity.model == chipModel_GC880) gpu->identity.instruction_count = 512; else gpu->identity.instruction_count = 256; @@ -242,6 +263,30 @@ static void etnaviv_hw_specs(struct etnaviv_gpu *gpu) if (gpu->identity.num_constants == 0) gpu->identity.num_constants = 168; + + if (gpu->identity.varyings_count == 0) { + if (gpu->identity.minor_features1 & chipMinorFeatures1_HALTI0) + gpu->identity.varyings_count = 12; + else + gpu->identity.varyings_count = 8; + } + + /* + * For some cores, two varyings are consumed for position, so the + * maximum varying count needs to be reduced by one. + */ + if (etnaviv_is_model_rev(gpu, GC5000, 0x5434) || + etnaviv_is_model_rev(gpu, GC4000, 0x5222) || + etnaviv_is_model_rev(gpu, GC4000, 0x5245) || + etnaviv_is_model_rev(gpu, GC4000, 0x5208) || + etnaviv_is_model_rev(gpu, GC3000, 0x5435) || + etnaviv_is_model_rev(gpu, GC2200, 0x5244) || + etnaviv_is_model_rev(gpu, GC2100, 0x5108) || + etnaviv_is_model_rev(gpu, GC2000, 0x5108) || + etnaviv_is_model_rev(gpu, GC1500, 0x5246) || + etnaviv_is_model_rev(gpu, GC880, 0x5107) || + etnaviv_is_model_rev(gpu, GC880, 0x5106)) + gpu->identity.varyings_count -= 1; } static void etnaviv_hw_identify(struct etnaviv_gpu *gpu) @@ -251,12 +296,10 @@ static void etnaviv_hw_identify(struct etnaviv_gpu *gpu) chipIdentity = gpu_read(gpu, VIVS_HI_CHIP_IDENTITY); /* Special case for older graphic cores. */ - if (((chipIdentity & VIVS_HI_CHIP_IDENTITY_FAMILY__MASK) - >> VIVS_HI_CHIP_IDENTITY_FAMILY__SHIFT) == 0x01) { - gpu->identity.model = 0x500; /* gc500 */ - gpu->identity.revision = - (chipIdentity & VIVS_HI_CHIP_IDENTITY_REVISION__MASK) - >> VIVS_HI_CHIP_IDENTITY_REVISION__SHIFT; + if (etnaviv_field(chipIdentity, VIVS_HI_CHIP_IDENTITY_FAMILY) == 0x01) { + gpu->identity.model = chipModel_GC500; + gpu->identity.revision = etnaviv_field(chipIdentity, + VIVS_HI_CHIP_IDENTITY_REVISION); } else { gpu->identity.model = gpu_read(gpu, VIVS_HI_CHIP_MODEL); @@ -269,13 +312,12 @@ static void etnaviv_hw_identify(struct etnaviv_gpu *gpu) * same. Only for GC400 family. */ if ((gpu->identity.model & 0xff00) == 0x0400 && - gpu->identity.model != 0x0420) { + gpu->identity.model != chipModel_GC420) { gpu->identity.model = gpu->identity.model & 0x0400; } /* Another special case */ - if (gpu->identity.model == 0x300 && - gpu->identity.revision == 0x2201) { + if (etnaviv_is_model_rev(gpu, GC300, 0x2201)) { u32 chipDate = gpu_read(gpu, VIVS_HI_CHIP_DATE); u32 chipTime = gpu_read(gpu, VIVS_HI_CHIP_TIME); @@ -295,11 +337,13 @@ static void etnaviv_hw_identify(struct etnaviv_gpu *gpu) gpu->identity.features = gpu_read(gpu, VIVS_HI_CHIP_FEATURE); /* Disable fast clear on GC700. */ - if (gpu->identity.model == 0x700) + if (gpu->identity.model == chipModel_GC700) gpu->identity.features &= ~chipFeatures_FAST_CLEAR; - if ((gpu->identity.model == 0x500 && gpu->identity.revision < 2) || - (gpu->identity.model == 0x300 && gpu->identity.revision < 0x2000)) { + if ((gpu->identity.model == chipModel_GC500 && + gpu->identity.revision < 2) || + (gpu->identity.model == chipModel_GC300 && + gpu->identity.revision < 0x2000)) { /* * GC500 rev 1.x and GC300 rev < 2.0 doesn't have these @@ -309,6 +353,8 @@ static void etnaviv_hw_identify(struct etnaviv_gpu *gpu) gpu->identity.minor_features1 = 0; gpu->identity.minor_features2 = 0; gpu->identity.minor_features3 = 0; + gpu->identity.minor_features4 = 0; + gpu->identity.minor_features5 = 0; } else gpu->identity.minor_features0 = gpu_read(gpu, VIVS_HI_CHIP_MINOR_FEATURE_0); @@ -321,6 +367,10 @@ static void etnaviv_hw_identify(struct etnaviv_gpu *gpu) gpu_read(gpu, VIVS_HI_CHIP_MINOR_FEATURE_2); gpu->identity.minor_features3 = gpu_read(gpu, VIVS_HI_CHIP_MINOR_FEATURE_3); + gpu->identity.minor_features4 = + gpu_read(gpu, VIVS_HI_CHIP_MINOR_FEATURE_4); + gpu->identity.minor_features5 = + gpu_read(gpu, VIVS_HI_CHIP_MINOR_FEATURE_5); } /* GC600 idle register reports zero bits where modules aren't present */ @@ -441,10 +491,9 @@ static void etnaviv_gpu_hw_init(struct etnaviv_gpu *gpu) { u16 prefetch; - if (gpu->identity.model == chipModel_GC320 && - gpu_read(gpu, VIVS_HI_CHIP_TIME) != 0x2062400 && - (gpu->identity.revision == 0x5007 || - gpu->identity.revision == 0x5220)) { + if ((etnaviv_is_model_rev(gpu, GC320, 0x5007) || + etnaviv_is_model_rev(gpu, GC320, 0x5220)) && + gpu_read(gpu, VIVS_HI_CHIP_TIME) != 0x2062400) { u32 mc_memory_debug; mc_memory_debug = gpu_read(gpu, VIVS_MC_DEBUG_MEMORY) & ~0xff; @@ -466,7 +515,7 @@ static void etnaviv_gpu_hw_init(struct etnaviv_gpu *gpu) VIVS_HI_AXI_CONFIG_ARCACHE(2)); /* GC2000 rev 5108 needs a special bus config */ - if (gpu->identity.model == 0x2000 && gpu->identity.revision == 0x5108) { + if (etnaviv_is_model_rev(gpu, GC2000, 0x5108)) { u32 bus_config = gpu_read(gpu, VIVS_MC_BUS_CONFIG); bus_config &= ~(VIVS_MC_BUS_CONFIG_FE_BUS_CONFIG__MASK | VIVS_MC_BUS_CONFIG_TX_BUS_CONFIG__MASK); @@ -511,8 +560,16 @@ int etnaviv_gpu_init(struct etnaviv_gpu *gpu) if (gpu->identity.model == 0) { dev_err(gpu->dev, "Unknown GPU model\n"); - pm_runtime_put_autosuspend(gpu->dev); - return -ENXIO; + ret = -ENXIO; + goto fail; + } + + /* Exclude VG cores with FE2.0 */ + if (gpu->identity.features & chipFeatures_PIPE_VG && + gpu->identity.features & chipFeatures_FE20) { + dev_info(gpu->dev, "Ignoring GPU with VG and FE2.0\n"); + ret = -ENXIO; + goto fail; } ret = etnaviv_hw_reset(gpu); @@ -539,10 +596,9 @@ int etnaviv_gpu_init(struct etnaviv_gpu *gpu) goto fail; } - /* TODO: we will leak here memory - fix it! */ - gpu->mmu = etnaviv_iommu_new(gpu, iommu, version); if (!gpu->mmu) { + iommu_domain_free(iommu); ret = -ENOMEM; goto fail; } @@ -552,7 +608,7 @@ int etnaviv_gpu_init(struct etnaviv_gpu *gpu) if (!gpu->buffer) { ret = -ENOMEM; dev_err(gpu->dev, "could not create command buffer\n"); - goto fail; + goto destroy_iommu; } if (gpu->buffer->paddr - gpu->memory_base > 0x80000000) { ret = -EINVAL; @@ -582,6 +638,9 @@ int etnaviv_gpu_init(struct etnaviv_gpu *gpu) free_buffer: etnaviv_gpu_cmdbuf_free(gpu->buffer); gpu->buffer = NULL; +destroy_iommu: + etnaviv_iommu_destroy(gpu->mmu); + gpu->mmu = NULL; fail: pm_runtime_mark_last_busy(gpu->dev); pm_runtime_put_autosuspend(gpu->dev); @@ -642,6 +701,10 @@ int etnaviv_gpu_debugfs(struct etnaviv_gpu *gpu, struct seq_file *m) gpu->identity.minor_features2); seq_printf(m, "\t minor_features3: 0x%08x\n", gpu->identity.minor_features3); + seq_printf(m, "\t minor_features4: 0x%08x\n", + gpu->identity.minor_features4); + seq_printf(m, "\t minor_features5: 0x%08x\n", + gpu->identity.minor_features5); seq_puts(m, "\tspecs\n"); seq_printf(m, "\t stream_count: %d\n", @@ -664,6 +727,8 @@ int etnaviv_gpu_debugfs(struct etnaviv_gpu *gpu, struct seq_file *m) gpu->identity.instruction_count); seq_printf(m, "\t num_constants: %d\n", gpu->identity.num_constants); + seq_printf(m, "\t varyings_count: %d\n", + gpu->identity.varyings_count); seq_printf(m, "\taxi: 0x%08x\n", axi); seq_printf(m, "\tidle: 0x%08x\n", idle); diff --git a/drivers/gpu/drm/etnaviv/etnaviv_gpu.h b/drivers/gpu/drm/etnaviv/etnaviv_gpu.h index c75d50359ab0..f233ac4c7c1c 100644 --- a/drivers/gpu/drm/etnaviv/etnaviv_gpu.h +++ b/drivers/gpu/drm/etnaviv/etnaviv_gpu.h @@ -46,6 +46,12 @@ struct etnaviv_chip_identity { /* Supported minor feature 3 fields. */ u32 minor_features3; + /* Supported minor feature 4 fields. */ + u32 minor_features4; + + /* Supported minor feature 5 fields. */ + u32 minor_features5; + /* Number of streams supported. */ u32 stream_count; @@ -75,6 +81,9 @@ struct etnaviv_chip_identity { /* Buffer size */ u32 buffer_size; + + /* Number of varyings */ + u8 varyings_count; }; struct etnaviv_event { diff --git a/drivers/gpu/drm/etnaviv/state_hi.xml.h b/drivers/gpu/drm/etnaviv/state_hi.xml.h index 0064f2640396..6a7de5f1454a 100644 --- a/drivers/gpu/drm/etnaviv/state_hi.xml.h +++ b/drivers/gpu/drm/etnaviv/state_hi.xml.h @@ -8,8 +8,8 @@ http://0x04.net/cgit/index.cgi/rules-ng-ng git clone git://0x04.net/rules-ng-ng The rules-ng-ng source files this header was generated from are: -- state_hi.xml ( 23420 bytes, from 2015-03-25 11:47:21) -- common.xml ( 18437 bytes, from 2015-03-25 11:27:41) +- state_hi.xml ( 24309 bytes, from 2015-12-12 09:02:53) +- common.xml ( 18437 bytes, from 2015-12-12 09:02:53) Copyright (C) 2015 */ @@ -182,8 +182,25 @@ Copyright (C) 2015 #define VIVS_HI_CHIP_MINOR_FEATURE_3 0x00000088 +#define VIVS_HI_CHIP_SPECS_3 0x0000008c +#define VIVS_HI_CHIP_SPECS_3_VARYINGS_COUNT__MASK 0x000001f0 +#define VIVS_HI_CHIP_SPECS_3_VARYINGS_COUNT__SHIFT 4 +#define VIVS_HI_CHIP_SPECS_3_VARYINGS_COUNT(x) (((x) << VIVS_HI_CHIP_SPECS_3_VARYINGS_COUNT__SHIFT) & VIVS_HI_CHIP_SPECS_3_VARYINGS_COUNT__MASK) +#define VIVS_HI_CHIP_SPECS_3_GPU_CORE_COUNT__MASK 0x00000007 +#define VIVS_HI_CHIP_SPECS_3_GPU_CORE_COUNT__SHIFT 0 +#define VIVS_HI_CHIP_SPECS_3_GPU_CORE_COUNT(x) (((x) << VIVS_HI_CHIP_SPECS_3_GPU_CORE_COUNT__SHIFT) & VIVS_HI_CHIP_SPECS_3_GPU_CORE_COUNT__MASK) + #define VIVS_HI_CHIP_MINOR_FEATURE_4 0x00000094 +#define VIVS_HI_CHIP_SPECS_4 0x0000009c +#define VIVS_HI_CHIP_SPECS_4_STREAM_COUNT__MASK 0x0001f000 +#define VIVS_HI_CHIP_SPECS_4_STREAM_COUNT__SHIFT 12 +#define VIVS_HI_CHIP_SPECS_4_STREAM_COUNT(x) (((x) << VIVS_HI_CHIP_SPECS_4_STREAM_COUNT__SHIFT) & VIVS_HI_CHIP_SPECS_4_STREAM_COUNT__MASK) + +#define VIVS_HI_CHIP_MINOR_FEATURE_5 0x000000a0 + +#define VIVS_HI_CHIP_PRODUCT_ID 0x000000a8 + #define VIVS_PM 0x00000000 #define VIVS_PM_POWER_CONTROLS 0x00000100 @@ -206,6 +223,11 @@ Copyright (C) 2015 #define VIVS_PM_MODULE_STATUS_MODULE_CLOCK_GATED_FE 0x00000001 #define VIVS_PM_MODULE_STATUS_MODULE_CLOCK_GATED_DE 0x00000002 #define VIVS_PM_MODULE_STATUS_MODULE_CLOCK_GATED_PE 0x00000004 +#define VIVS_PM_MODULE_STATUS_MODULE_CLOCK_GATED_SH 0x00000008 +#define VIVS_PM_MODULE_STATUS_MODULE_CLOCK_GATED_PA 0x00000010 +#define VIVS_PM_MODULE_STATUS_MODULE_CLOCK_GATED_SE 0x00000020 +#define VIVS_PM_MODULE_STATUS_MODULE_CLOCK_GATED_RA 0x00000040 +#define VIVS_PM_MODULE_STATUS_MODULE_CLOCK_GATED_TX 0x00000080 #define VIVS_PM_PULSE_EATER 0x0000010c diff --git a/drivers/gpu/drm/radeon/dce6_afmt.c b/drivers/gpu/drm/radeon/dce6_afmt.c index 6bfc46369db1..367a916f364e 100644 --- a/drivers/gpu/drm/radeon/dce6_afmt.c +++ b/drivers/gpu/drm/radeon/dce6_afmt.c @@ -304,18 +304,10 @@ void dce6_dp_audio_set_dto(struct radeon_device *rdev, unsigned int div = (RREG32(DENTIST_DISPCLK_CNTL) & DENTIST_DPREFCLK_WDIVIDER_MASK) >> DENTIST_DPREFCLK_WDIVIDER_SHIFT; - - if (div < 128 && div >= 96) - div -= 64; - else if (div >= 64) - div = div / 2 - 16; - else if (div >= 8) - div /= 4; - else - div = 0; + div = radeon_audio_decode_dfs_div(div); if (div) - clock = rdev->clock.gpupll_outputfreq * 10 / div; + clock = clock * 100 / div; WREG32(DCE8_DCCG_AUDIO_DTO1_PHASE, 24000); WREG32(DCE8_DCCG_AUDIO_DTO1_MODULE, clock); diff --git a/drivers/gpu/drm/radeon/evergreen_hdmi.c b/drivers/gpu/drm/radeon/evergreen_hdmi.c index 9953356fe263..3cf04a2f44bb 100644 --- a/drivers/gpu/drm/radeon/evergreen_hdmi.c +++ b/drivers/gpu/drm/radeon/evergreen_hdmi.c @@ -289,6 +289,16 @@ void dce4_dp_audio_set_dto(struct radeon_device *rdev, * number (coefficient of two integer numbers. DCCG_AUDIO_DTOx_PHASE * is the numerator, DCCG_AUDIO_DTOx_MODULE is the denominator */ + if (ASIC_IS_DCE41(rdev)) { + unsigned int div = (RREG32(DCE41_DENTIST_DISPCLK_CNTL) & + DENTIST_DPREFCLK_WDIVIDER_MASK) >> + DENTIST_DPREFCLK_WDIVIDER_SHIFT; + div = radeon_audio_decode_dfs_div(div); + + if (div) + clock = 100 * clock / div; + } + WREG32(DCCG_AUDIO_DTO1_PHASE, 24000); WREG32(DCCG_AUDIO_DTO1_MODULE, clock); } diff --git a/drivers/gpu/drm/radeon/evergreend.h b/drivers/gpu/drm/radeon/evergreend.h index 4aa5f755572b..13b6029d65cc 100644 --- a/drivers/gpu/drm/radeon/evergreend.h +++ b/drivers/gpu/drm/radeon/evergreend.h @@ -511,6 +511,11 @@ #define DCCG_AUDIO_DTO1_CNTL 0x05cc # define DCCG_AUDIO_DTO1_USE_512FBR_DTO (1 << 3) +#define DCE41_DENTIST_DISPCLK_CNTL 0x049c +# define DENTIST_DPREFCLK_WDIVIDER(x) (((x) & 0x7f) << 24) +# define DENTIST_DPREFCLK_WDIVIDER_MASK (0x7f << 24) +# define DENTIST_DPREFCLK_WDIVIDER_SHIFT 24 + /* DCE 4.0 AFMT */ #define HDMI_CONTROL 0x7030 # define HDMI_KEEPOUT_MODE (1 << 0) diff --git a/drivers/gpu/drm/radeon/radeon.h b/drivers/gpu/drm/radeon/radeon.h index 5ae6db98aa4d..78a51b3eda10 100644 --- a/drivers/gpu/drm/radeon/radeon.h +++ b/drivers/gpu/drm/radeon/radeon.h @@ -268,7 +268,7 @@ struct radeon_clock { uint32_t current_dispclk; uint32_t dp_extclk; uint32_t max_pixel_clock; - uint32_t gpupll_outputfreq; + uint32_t vco_freq; }; /* diff --git a/drivers/gpu/drm/radeon/radeon_atombios.c b/drivers/gpu/drm/radeon/radeon_atombios.c index 08fc1b5effa8..de9a2ffcf5f7 100644 --- a/drivers/gpu/drm/radeon/radeon_atombios.c +++ b/drivers/gpu/drm/radeon/radeon_atombios.c @@ -1106,6 +1106,31 @@ union firmware_info { ATOM_FIRMWARE_INFO_V2_2 info_22; }; +union igp_info { + struct _ATOM_INTEGRATED_SYSTEM_INFO info; + struct _ATOM_INTEGRATED_SYSTEM_INFO_V2 info_2; + struct _ATOM_INTEGRATED_SYSTEM_INFO_V6 info_6; + struct _ATOM_INTEGRATED_SYSTEM_INFO_V1_7 info_7; + struct _ATOM_INTEGRATED_SYSTEM_INFO_V1_8 info_8; +}; + +static void radeon_atombios_get_dentist_vco_freq(struct radeon_device *rdev) +{ + struct radeon_mode_info *mode_info = &rdev->mode_info; + int index = GetIndexIntoMasterTable(DATA, IntegratedSystemInfo); + union igp_info *igp_info; + u8 frev, crev; + u16 data_offset; + + if (atom_parse_data_header(mode_info->atom_context, index, NULL, + &frev, &crev, &data_offset)) { + igp_info = (union igp_info *)(mode_info->atom_context->bios + + data_offset); + rdev->clock.vco_freq = + le32_to_cpu(igp_info->info_6.ulDentistVCOFreq); + } +} + bool radeon_atom_get_clock_info(struct drm_device *dev) { struct radeon_device *rdev = dev->dev_private; @@ -1257,12 +1282,18 @@ bool radeon_atom_get_clock_info(struct drm_device *dev) rdev->mode_info.firmware_flags = le16_to_cpu(firmware_info->info.usFirmwareCapability.susAccess); - if (ASIC_IS_DCE8(rdev)) { - rdev->clock.gpupll_outputfreq = + if (ASIC_IS_DCE8(rdev)) + rdev->clock.vco_freq = le32_to_cpu(firmware_info->info_22.ulGPUPLL_OutputFreq); - if (rdev->clock.gpupll_outputfreq == 0) - rdev->clock.gpupll_outputfreq = 360000; /* 3.6 GHz */ - } + else if (ASIC_IS_DCE5(rdev)) + rdev->clock.vco_freq = rdev->clock.current_dispclk; + else if (ASIC_IS_DCE41(rdev)) + radeon_atombios_get_dentist_vco_freq(rdev); + else + rdev->clock.vco_freq = rdev->clock.current_dispclk; + + if (rdev->clock.vco_freq == 0) + rdev->clock.vco_freq = 360000; /* 3.6 GHz */ return true; } @@ -1270,14 +1301,6 @@ bool radeon_atom_get_clock_info(struct drm_device *dev) return false; } -union igp_info { - struct _ATOM_INTEGRATED_SYSTEM_INFO info; - struct _ATOM_INTEGRATED_SYSTEM_INFO_V2 info_2; - struct _ATOM_INTEGRATED_SYSTEM_INFO_V6 info_6; - struct _ATOM_INTEGRATED_SYSTEM_INFO_V1_7 info_7; - struct _ATOM_INTEGRATED_SYSTEM_INFO_V1_8 info_8; -}; - bool radeon_atombios_sideport_present(struct radeon_device *rdev) { struct radeon_mode_info *mode_info = &rdev->mode_info; diff --git a/drivers/gpu/drm/radeon/radeon_audio.c b/drivers/gpu/drm/radeon/radeon_audio.c index 2c02e99b5f95..b214663b370d 100644 --- a/drivers/gpu/drm/radeon/radeon_audio.c +++ b/drivers/gpu/drm/radeon/radeon_audio.c @@ -739,9 +739,6 @@ static void radeon_audio_dp_mode_set(struct drm_encoder *encoder, struct radeon_encoder *radeon_encoder = to_radeon_encoder(encoder); struct radeon_encoder_atom_dig *dig = radeon_encoder->enc_priv; struct drm_connector *connector = radeon_get_connector_for_encoder(encoder); - struct radeon_connector *radeon_connector = to_radeon_connector(connector); - struct radeon_connector_atom_dig *dig_connector = - radeon_connector->con_priv; if (!dig || !dig->afmt) return; @@ -753,10 +750,7 @@ static void radeon_audio_dp_mode_set(struct drm_encoder *encoder, radeon_audio_write_speaker_allocation(encoder); radeon_audio_write_sad_regs(encoder); radeon_audio_write_latency_fields(encoder, mode); - if (rdev->clock.dp_extclk || ASIC_IS_DCE5(rdev)) - radeon_audio_set_dto(encoder, rdev->clock.default_dispclk * 10); - else - radeon_audio_set_dto(encoder, dig_connector->dp_clock); + radeon_audio_set_dto(encoder, rdev->clock.vco_freq * 10); radeon_audio_set_audio_packet(encoder); radeon_audio_select_pin(encoder); @@ -781,3 +775,15 @@ void radeon_audio_dpms(struct drm_encoder *encoder, int mode) if (radeon_encoder->audio && radeon_encoder->audio->dpms) radeon_encoder->audio->dpms(encoder, mode == DRM_MODE_DPMS_ON); } + +unsigned int radeon_audio_decode_dfs_div(unsigned int div) +{ + if (div >= 8 && div < 64) + return (div - 8) * 25 + 200; + else if (div >= 64 && div < 96) + return (div - 64) * 50 + 1600; + else if (div >= 96 && div < 128) + return (div - 96) * 100 + 3200; + else + return 0; +} diff --git a/drivers/gpu/drm/radeon/radeon_audio.h b/drivers/gpu/drm/radeon/radeon_audio.h index 059cc3012062..5c70cceaa4a6 100644 --- a/drivers/gpu/drm/radeon/radeon_audio.h +++ b/drivers/gpu/drm/radeon/radeon_audio.h @@ -79,5 +79,6 @@ void radeon_audio_fini(struct radeon_device *rdev); void radeon_audio_mode_set(struct drm_encoder *encoder, struct drm_display_mode *mode); void radeon_audio_dpms(struct drm_encoder *encoder, int mode); +unsigned int radeon_audio_decode_dfs_div(unsigned int div); #endif diff --git a/drivers/gpu/drm/radeon/radeon_display.c b/drivers/gpu/drm/radeon/radeon_display.c index b3bb92368ae0..298ea1c453c3 100644 --- a/drivers/gpu/drm/radeon/radeon_display.c +++ b/drivers/gpu/drm/radeon/radeon_display.c @@ -1670,8 +1670,10 @@ int radeon_modeset_init(struct radeon_device *rdev) /* setup afmt */ radeon_afmt_init(rdev); - radeon_fbdev_init(rdev); - drm_kms_helper_poll_init(rdev->ddev); + if (!list_empty(&rdev->ddev->mode_config.connector_list)) { + radeon_fbdev_init(rdev); + drm_kms_helper_poll_init(rdev->ddev); + } /* do pm late init */ ret = radeon_pm_late_init(rdev); diff --git a/drivers/gpu/drm/radeon/radeon_gem.c b/drivers/gpu/drm/radeon/radeon_gem.c index 3dcc5733ff69..e26c963f2e93 100644 --- a/drivers/gpu/drm/radeon/radeon_gem.c +++ b/drivers/gpu/drm/radeon/radeon_gem.c @@ -663,6 +663,7 @@ int radeon_gem_va_ioctl(struct drm_device *dev, void *data, bo_va = radeon_vm_bo_find(&fpriv->vm, rbo); if (!bo_va) { args->operation = RADEON_VA_RESULT_ERROR; + radeon_bo_unreserve(rbo); drm_gem_object_unreference_unlocked(gobj); return -ENOENT; } diff --git a/drivers/gpu/drm/radeon/vce_v1_0.c b/drivers/gpu/drm/radeon/vce_v1_0.c index 07a0d378e122..a01efe39a820 100644 --- a/drivers/gpu/drm/radeon/vce_v1_0.c +++ b/drivers/gpu/drm/radeon/vce_v1_0.c @@ -178,12 +178,12 @@ int vce_v1_0_load_fw(struct radeon_device *rdev, uint32_t *data) return -EINVAL; } - for (i = 0; i < sign->num; ++i) { - if (sign->val[i].chip_id == chip_id) + for (i = 0; i < le32_to_cpu(sign->num); ++i) { + if (le32_to_cpu(sign->val[i].chip_id) == chip_id) break; } - if (i == sign->num) + if (i == le32_to_cpu(sign->num)) return -EINVAL; data += (256 - 64) / 4; @@ -191,18 +191,18 @@ int vce_v1_0_load_fw(struct radeon_device *rdev, uint32_t *data) data[1] = sign->val[i].nonce[1]; data[2] = sign->val[i].nonce[2]; data[3] = sign->val[i].nonce[3]; - data[4] = sign->len + 64; + data[4] = cpu_to_le32(le32_to_cpu(sign->len) + 64); memset(&data[5], 0, 44); memcpy(&data[16], &sign[1], rdev->vce_fw->size - sizeof(*sign)); - data += data[4] / 4; + data += le32_to_cpu(data[4]) / 4; data[0] = sign->val[i].sigval[0]; data[1] = sign->val[i].sigval[1]; data[2] = sign->val[i].sigval[2]; data[3] = sign->val[i].sigval[3]; - rdev->vce.keyselect = sign->val[i].keyselect; + rdev->vce.keyselect = le32_to_cpu(sign->val[i].keyselect); return 0; } diff --git a/drivers/gpu/drm/rockchip/Makefile b/drivers/gpu/drm/rockchip/Makefile index d1dc0f7b01db..f6a809afceec 100644 --- a/drivers/gpu/drm/rockchip/Makefile +++ b/drivers/gpu/drm/rockchip/Makefile @@ -2,11 +2,11 @@ # Makefile for the drm device driver. This driver provides support for the # Direct Rendering Infrastructure (DRI) in XFree86 4.1.0 and higher. -rockchipdrm-y := rockchip_drm_drv.o rockchip_drm_fb.o rockchip_drm_fbdev.o \ - rockchip_drm_gem.o +rockchipdrm-y := rockchip_drm_drv.o rockchip_drm_fb.o \ + rockchip_drm_gem.o rockchip_drm_vop.o +rockchipdrm-$(CONFIG_DRM_FBDEV_EMULATION) += rockchip_drm_fbdev.o obj-$(CONFIG_ROCKCHIP_DW_HDMI) += dw_hdmi-rockchip.o obj-$(CONFIG_ROCKCHIP_DW_MIPI_DSI) += dw-mipi-dsi.o -obj-$(CONFIG_DRM_ROCKCHIP) += rockchipdrm.o rockchip_drm_vop.o \ - rockchip_vop_reg.o +obj-$(CONFIG_DRM_ROCKCHIP) += rockchipdrm.o rockchip_vop_reg.o diff --git a/drivers/gpu/drm/rockchip/dw-mipi-dsi.c b/drivers/gpu/drm/rockchip/dw-mipi-dsi.c index 7bfe243c6173..f8f8f29fb7c3 100644 --- a/drivers/gpu/drm/rockchip/dw-mipi-dsi.c +++ b/drivers/gpu/drm/rockchip/dw-mipi-dsi.c @@ -461,10 +461,11 @@ static int dw_mipi_dsi_phy_init(struct dw_mipi_dsi *dsi) static int dw_mipi_dsi_get_lane_bps(struct dw_mipi_dsi *dsi) { - unsigned int bpp, i, pre; + unsigned int i, pre; unsigned long mpclk, pllref, tmp; unsigned int m = 1, n = 1, target_mbps = 1000; unsigned int max_mbps = dptdin_map[ARRAY_SIZE(dptdin_map) - 1].max_mbps; + int bpp; bpp = mipi_dsi_pixel_format_to_bpp(dsi->format); if (bpp < 0) { diff --git a/drivers/gpu/drm/rockchip/rockchip_drm_drv.c b/drivers/gpu/drm/rockchip/rockchip_drm_drv.c index 8397d1b62ef9..a0d51ccb6ea4 100644 --- a/drivers/gpu/drm/rockchip/rockchip_drm_drv.c +++ b/drivers/gpu/drm/rockchip/rockchip_drm_drv.c @@ -55,14 +55,12 @@ int rockchip_drm_dma_attach_device(struct drm_device *drm_dev, return arm_iommu_attach_device(dev, mapping); } -EXPORT_SYMBOL_GPL(rockchip_drm_dma_attach_device); void rockchip_drm_dma_detach_device(struct drm_device *drm_dev, struct device *dev) { arm_iommu_detach_device(dev); } -EXPORT_SYMBOL_GPL(rockchip_drm_dma_detach_device); int rockchip_register_crtc_funcs(struct drm_crtc *crtc, const struct rockchip_crtc_funcs *crtc_funcs) @@ -77,7 +75,6 @@ int rockchip_register_crtc_funcs(struct drm_crtc *crtc, return 0; } -EXPORT_SYMBOL_GPL(rockchip_register_crtc_funcs); void rockchip_unregister_crtc_funcs(struct drm_crtc *crtc) { @@ -89,7 +86,6 @@ void rockchip_unregister_crtc_funcs(struct drm_crtc *crtc) priv->crtc_funcs[pipe] = NULL; } -EXPORT_SYMBOL_GPL(rockchip_unregister_crtc_funcs); static struct drm_crtc *rockchip_crtc_from_pipe(struct drm_device *drm, int pipe) diff --git a/drivers/gpu/drm/rockchip/rockchip_drm_fb.c b/drivers/gpu/drm/rockchip/rockchip_drm_fb.c index f7844883cb76..3b8f652698f8 100644 --- a/drivers/gpu/drm/rockchip/rockchip_drm_fb.c +++ b/drivers/gpu/drm/rockchip/rockchip_drm_fb.c @@ -39,7 +39,6 @@ struct drm_gem_object *rockchip_fb_get_gem_obj(struct drm_framebuffer *fb, return rk_fb->obj[plane]; } -EXPORT_SYMBOL_GPL(rockchip_fb_get_gem_obj); static void rockchip_drm_fb_destroy(struct drm_framebuffer *fb) { @@ -177,8 +176,23 @@ static void rockchip_crtc_wait_for_update(struct drm_crtc *crtc) crtc_funcs->wait_for_update(crtc); } +/* + * We can't use drm_atomic_helper_wait_for_vblanks() because rk3288 and rk3066 + * have hardware counters for neither vblanks nor scanlines, which results in + * a race where: + * | <-- HW vsync irq and reg take effect + * plane_commit --> | + * get_vblank and wait --> | + * | <-- handle_vblank, vblank->count + 1 + * cleanup_fb --> | + * iommu crash --> | + * | <-- HW vsync irq and reg take effect + * + * This function is equivalent but uses rockchip_crtc_wait_for_update() instead + * of waiting for vblank_count to change. + */ static void -rockchip_atomic_wait_for_complete(struct drm_atomic_state *old_state) +rockchip_atomic_wait_for_complete(struct drm_device *dev, struct drm_atomic_state *old_state) { struct drm_crtc_state *old_crtc_state; struct drm_crtc *crtc; @@ -194,6 +208,10 @@ rockchip_atomic_wait_for_complete(struct drm_atomic_state *old_state) if (!crtc->state->active) continue; + if (!drm_atomic_helper_framebuffer_changed(dev, + old_state, crtc)) + continue; + ret = drm_crtc_vblank_get(crtc); if (ret != 0) continue; @@ -241,7 +259,7 @@ rockchip_atomic_commit_complete(struct rockchip_atomic_commit *commit) drm_atomic_helper_commit_planes(dev, state, true); - rockchip_atomic_wait_for_complete(state); + rockchip_atomic_wait_for_complete(dev, state); drm_atomic_helper_cleanup_planes(dev, state); diff --git a/drivers/gpu/drm/rockchip/rockchip_drm_fbdev.h b/drivers/gpu/drm/rockchip/rockchip_drm_fbdev.h index 50432e9b5b37..73718c5f5bbf 100644 --- a/drivers/gpu/drm/rockchip/rockchip_drm_fbdev.h +++ b/drivers/gpu/drm/rockchip/rockchip_drm_fbdev.h @@ -15,7 +15,18 @@ #ifndef _ROCKCHIP_DRM_FBDEV_H #define _ROCKCHIP_DRM_FBDEV_H +#ifdef CONFIG_DRM_FBDEV_EMULATION int rockchip_drm_fbdev_init(struct drm_device *dev); void rockchip_drm_fbdev_fini(struct drm_device *dev); +#else +static inline int rockchip_drm_fbdev_init(struct drm_device *dev) +{ + return 0; +} + +static inline void rockchip_drm_fbdev_fini(struct drm_device *dev) +{ +} +#endif #endif /* _ROCKCHIP_DRM_FBDEV_H */ diff --git a/drivers/gpu/drm/rockchip/rockchip_drm_gem.c b/drivers/gpu/drm/rockchip/rockchip_drm_gem.c index d908321b94ce..18e07338c6e5 100644 --- a/drivers/gpu/drm/rockchip/rockchip_drm_gem.c +++ b/drivers/gpu/drm/rockchip/rockchip_drm_gem.c @@ -234,13 +234,8 @@ int rockchip_gem_dumb_create(struct drm_file *file_priv, /* * align to 64 bytes since Mali requires it. */ - min_pitch = ALIGN(min_pitch, 64); - - if (args->pitch < min_pitch) - args->pitch = min_pitch; - - if (args->size < args->pitch * args->height) - args->size = args->pitch * args->height; + args->pitch = ALIGN(min_pitch, 64); + args->size = args->pitch * args->height; rk_obj = rockchip_gem_create_with_handle(file_priv, dev, args->size, &args->handle); diff --git a/drivers/gpu/drm/rockchip/rockchip_drm_vop.c b/drivers/gpu/drm/rockchip/rockchip_drm_vop.c index 46c2a8dfd8aa..fd370548d7d7 100644 --- a/drivers/gpu/drm/rockchip/rockchip_drm_vop.c +++ b/drivers/gpu/drm/rockchip/rockchip_drm_vop.c @@ -43,8 +43,8 @@ #define REG_SET(x, base, reg, v, mode) \ __REG_SET_##mode(x, base + reg.offset, reg.mask, reg.shift, v) -#define REG_SET_MASK(x, base, reg, v, mode) \ - __REG_SET_##mode(x, base + reg.offset, reg.mask, reg.shift, v) +#define REG_SET_MASK(x, base, reg, mask, v, mode) \ + __REG_SET_##mode(x, base + reg.offset, mask, reg.shift, v) #define VOP_WIN_SET(x, win, name, v) \ REG_SET(x, win->base, win->phy->name, v, RELAXED) @@ -58,16 +58,18 @@ #define VOP_INTR_GET(vop, name) \ vop_read_reg(vop, 0, &vop->data->ctrl->name) -#define VOP_INTR_SET(vop, name, v) \ - REG_SET(vop, 0, vop->data->intr->name, v, NORMAL) +#define VOP_INTR_SET(vop, name, mask, v) \ + REG_SET_MASK(vop, 0, vop->data->intr->name, mask, v, NORMAL) #define VOP_INTR_SET_TYPE(vop, name, type, v) \ do { \ - int i, reg = 0; \ + int i, reg = 0, mask = 0; \ for (i = 0; i < vop->data->intr->nintrs; i++) { \ - if (vop->data->intr->intrs[i] & type) \ + if (vop->data->intr->intrs[i] & type) { \ reg |= (v) << i; \ + mask |= 1 << i; \ + } \ } \ - VOP_INTR_SET(vop, name, reg); \ + VOP_INTR_SET(vop, name, mask, reg); \ } while (0) #define VOP_INTR_GET_TYPE(vop, name, type) \ vop_get_intr_type(vop, &vop->data->intr->name, type) diff --git a/drivers/gpu/drm/vc4/vc4_v3d.c b/drivers/gpu/drm/vc4/vc4_v3d.c index 424d515ffcda..314ff71db978 100644 --- a/drivers/gpu/drm/vc4/vc4_v3d.c +++ b/drivers/gpu/drm/vc4/vc4_v3d.c @@ -144,19 +144,16 @@ int vc4_v3d_debugfs_ident(struct seq_file *m, void *unused) } #endif /* CONFIG_DEBUG_FS */ -/* - * Asks the firmware to turn on power to the V3D engine. - * - * This may be doable with just the clocks interface, though this - * packet does some other register setup from the firmware, too. - */ int vc4_v3d_set_power(struct vc4_dev *vc4, bool on) { - if (on) - return pm_generic_poweroff(&vc4->v3d->pdev->dev); - else - return pm_generic_resume(&vc4->v3d->pdev->dev); + /* XXX: This interface is needed for GPU reset, and the way to + * do it is to turn our power domain off and back on. We + * can't just reset from within the driver, because the reset + * bits are in the power domain's register area, and get set + * during the poweron process. + */ + return 0; } static void vc4_v3d_init_hw(struct drm_device *dev) diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_drv.c b/drivers/gpu/drm/vmwgfx/vmwgfx_drv.c index c49812b80dd0..24fb348a44e1 100644 --- a/drivers/gpu/drm/vmwgfx/vmwgfx_drv.c +++ b/drivers/gpu/drm/vmwgfx/vmwgfx_drv.c @@ -25,6 +25,7 @@ * **************************************************************************/ #include <linux/module.h> +#include <linux/console.h> #include <drm/drmP.h> #include "vmwgfx_drv.h" @@ -1538,6 +1539,12 @@ static int vmw_probe(struct pci_dev *pdev, const struct pci_device_id *ent) static int __init vmwgfx_init(void) { int ret; + +#ifdef CONFIG_VGA_CONSOLE + if (vgacon_text_force()) + return -EINVAL; +#endif + ret = drm_pci_init(&driver, &vmw_pci_driver); if (ret) DRM_ERROR("Failed initializing DRM.\n"); diff --git a/drivers/virtio/virtio_pci_common.c b/drivers/virtio/virtio_pci_common.c index 36205c27c4d0..f6bed86c17f9 100644 --- a/drivers/virtio/virtio_pci_common.c +++ b/drivers/virtio/virtio_pci_common.c @@ -545,6 +545,7 @@ err_enable_device: static void virtio_pci_remove(struct pci_dev *pci_dev) { struct virtio_pci_device *vp_dev = pci_get_drvdata(pci_dev); + struct device *dev = get_device(&vp_dev->vdev.dev); unregister_virtio_device(&vp_dev->vdev); @@ -554,6 +555,7 @@ static void virtio_pci_remove(struct pci_dev *pci_dev) virtio_pci_modern_remove(vp_dev); pci_disable_device(pci_dev); + put_device(dev); } static struct pci_driver virtio_pci_driver = { diff --git a/include/drm/drm_atomic_helper.h b/include/drm/drm_atomic_helper.h index 89d008dc08e2..fe5efada9d68 100644 --- a/include/drm/drm_atomic_helper.h +++ b/include/drm/drm_atomic_helper.h @@ -42,6 +42,10 @@ int drm_atomic_helper_commit(struct drm_device *dev, struct drm_atomic_state *state, bool async); +bool drm_atomic_helper_framebuffer_changed(struct drm_device *dev, + struct drm_atomic_state *old_state, + struct drm_crtc *crtc); + void drm_atomic_helper_wait_for_vblanks(struct drm_device *dev, struct drm_atomic_state *old_state); diff --git a/include/linux/ftrace.h b/include/linux/ftrace.h index 0639dcc98195..81de7123959d 100644 --- a/include/linux/ftrace.h +++ b/include/linux/ftrace.h @@ -165,7 +165,6 @@ struct ftrace_ops { ftrace_func_t saved_func; int __percpu *disabled; #ifdef CONFIG_DYNAMIC_FTRACE - int nr_trampolines; struct ftrace_ops_hash local_hash; struct ftrace_ops_hash *func_hash; struct ftrace_ops_hash old_hash; diff --git a/include/trace/events/fence.h b/include/trace/events/fence.h index 98feb1b82896..d6dfa05ba322 100644 --- a/include/trace/events/fence.h +++ b/include/trace/events/fence.h @@ -17,7 +17,7 @@ TRACE_EVENT(fence_annotate_wait_on, TP_STRUCT__entry( __string(driver, fence->ops->get_driver_name(fence)) - __string(timeline, fence->ops->get_driver_name(fence)) + __string(timeline, fence->ops->get_timeline_name(fence)) __field(unsigned int, context) __field(unsigned int, seqno) diff --git a/include/uapi/drm/etnaviv_drm.h b/include/uapi/drm/etnaviv_drm.h index 4cc989ad6851..f95e1c43c3fb 100644 --- a/include/uapi/drm/etnaviv_drm.h +++ b/include/uapi/drm/etnaviv_drm.h @@ -48,6 +48,8 @@ struct drm_etnaviv_timespec { #define ETNAVIV_PARAM_GPU_FEATURES_2 0x05 #define ETNAVIV_PARAM_GPU_FEATURES_3 0x06 #define ETNAVIV_PARAM_GPU_FEATURES_4 0x07 +#define ETNAVIV_PARAM_GPU_FEATURES_5 0x08 +#define ETNAVIV_PARAM_GPU_FEATURES_6 0x09 #define ETNAVIV_PARAM_GPU_STREAM_COUNT 0x10 #define ETNAVIV_PARAM_GPU_REGISTER_MAX 0x11 @@ -59,6 +61,7 @@ struct drm_etnaviv_timespec { #define ETNAVIV_PARAM_GPU_BUFFER_SIZE 0x17 #define ETNAVIV_PARAM_GPU_INSTRUCTION_COUNT 0x18 #define ETNAVIV_PARAM_GPU_NUM_CONSTANTS 0x19 +#define ETNAVIV_PARAM_GPU_NUM_VARYINGS 0x1a #define ETNA_MAX_PIPES 4 diff --git a/kernel/seccomp.c b/kernel/seccomp.c index 580ac2d4024f..15a1795bbba1 100644 --- a/kernel/seccomp.c +++ b/kernel/seccomp.c @@ -316,24 +316,24 @@ static inline void seccomp_sync_threads(void) put_seccomp_filter(thread); smp_store_release(&thread->seccomp.filter, caller->seccomp.filter); + + /* + * Don't let an unprivileged task work around + * the no_new_privs restriction by creating + * a thread that sets it up, enters seccomp, + * then dies. + */ + if (task_no_new_privs(caller)) + task_set_no_new_privs(thread); + /* * Opt the other thread into seccomp if needed. * As threads are considered to be trust-realm * equivalent (see ptrace_may_access), it is safe to * allow one thread to transition the other. */ - if (thread->seccomp.mode == SECCOMP_MODE_DISABLED) { - /* - * Don't let an unprivileged task work around - * the no_new_privs restriction by creating - * a thread that sets it up, enters seccomp, - * then dies. - */ - if (task_no_new_privs(caller)) - task_set_no_new_privs(thread); - + if (thread->seccomp.mode == SECCOMP_MODE_DISABLED) seccomp_assign_mode(thread, SECCOMP_MODE_FILTER); - } } } diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c index 87fb9801bd9e..d9293402ee68 100644 --- a/kernel/trace/trace.c +++ b/kernel/trace/trace.c @@ -1751,7 +1751,7 @@ void trace_buffer_unlock_commit_regs(struct trace_array *tr, { __buffer_unlock_commit(buffer, event); - ftrace_trace_stack(tr, buffer, flags, 6, pc, regs); + ftrace_trace_stack(tr, buffer, flags, 0, pc, regs); ftrace_trace_userstack(buffer, flags, pc); } EXPORT_SYMBOL_GPL(trace_buffer_unlock_commit_regs); diff --git a/security/keys/key.c b/security/keys/key.c index 07a87311055c..09ef276c4bdc 100644 --- a/security/keys/key.c +++ b/security/keys/key.c @@ -430,7 +430,8 @@ static int __key_instantiate_and_link(struct key *key, /* and link it into the destination keyring */ if (keyring) { - set_bit(KEY_FLAG_KEEP, &key->flags); + if (test_bit(KEY_FLAG_KEEP, &keyring->flags)) + set_bit(KEY_FLAG_KEEP, &key->flags); __key_link(key, _edit); } diff --git a/sound/core/Kconfig b/sound/core/Kconfig index e3e949126a56..a2a1e24becc6 100644 --- a/sound/core/Kconfig +++ b/sound/core/Kconfig @@ -97,11 +97,11 @@ config SND_PCM_TIMER bool "PCM timer interface" if EXPERT default y help - If you disable this option, pcm timer will be inavailable, so - those stubs used pcm timer (e.g. dmix, dsnoop & co) may work + If you disable this option, pcm timer will be unavailable, so + those stubs that use pcm timer (e.g. dmix, dsnoop & co) may work incorrectlly. - For some embedded device, we may disable it to reduce memory + For some embedded devices, we may disable it to reduce memory footprint, about 20KB on x86_64 platform. config SND_SEQUENCER_OSS diff --git a/sound/core/compress_offload.c b/sound/core/compress_offload.c index 18b8dc45bb8f..7fac3cae8abd 100644 --- a/sound/core/compress_offload.c +++ b/sound/core/compress_offload.c @@ -46,6 +46,13 @@ #include <sound/compress_offload.h> #include <sound/compress_driver.h> +/* struct snd_compr_codec_caps overflows the ioctl bit size for some + * architectures, so we need to disable the relevant ioctls. + */ +#if _IOC_SIZEBITS < 14 +#define COMPR_CODEC_CAPS_OVERFLOW +#endif + /* TODO: * - add substream support for multiple devices in case of * SND_DYNAMIC_MINORS is not used @@ -440,6 +447,7 @@ out: return retval; } +#ifndef COMPR_CODEC_CAPS_OVERFLOW static int snd_compr_get_codec_caps(struct snd_compr_stream *stream, unsigned long arg) { @@ -463,6 +471,7 @@ out: kfree(caps); return retval; } +#endif /* !COMPR_CODEC_CAPS_OVERFLOW */ /* revisit this with snd_pcm_preallocate_xxx */ static int snd_compr_allocate_buffer(struct snd_compr_stream *stream, @@ -801,9 +810,11 @@ static long snd_compr_ioctl(struct file *f, unsigned int cmd, unsigned long arg) case _IOC_NR(SNDRV_COMPRESS_GET_CAPS): retval = snd_compr_get_caps(stream, arg); break; +#ifndef COMPR_CODEC_CAPS_OVERFLOW case _IOC_NR(SNDRV_COMPRESS_GET_CODEC_CAPS): retval = snd_compr_get_codec_caps(stream, arg); break; +#endif case _IOC_NR(SNDRV_COMPRESS_SET_PARAMS): retval = snd_compr_set_params(stream, arg); break; diff --git a/sound/core/seq/oss/seq_oss_init.c b/sound/core/seq/oss/seq_oss_init.c index b1221b29728e..6779e82b46dd 100644 --- a/sound/core/seq/oss/seq_oss_init.c +++ b/sound/core/seq/oss/seq_oss_init.c @@ -202,7 +202,7 @@ snd_seq_oss_open(struct file *file, int level) dp->index = i; if (i >= SNDRV_SEQ_OSS_MAX_CLIENTS) { - pr_err("ALSA: seq_oss: too many applications\n"); + pr_debug("ALSA: seq_oss: too many applications\n"); rc = -ENOMEM; goto _error; } diff --git a/sound/core/seq/oss/seq_oss_synth.c b/sound/core/seq/oss/seq_oss_synth.c index 0f3b38184fe5..b16dbef04174 100644 --- a/sound/core/seq/oss/seq_oss_synth.c +++ b/sound/core/seq/oss/seq_oss_synth.c @@ -308,7 +308,7 @@ snd_seq_oss_synth_cleanup(struct seq_oss_devinfo *dp) struct seq_oss_synth *rec; struct seq_oss_synthinfo *info; - if (snd_BUG_ON(dp->max_synthdev >= SNDRV_SEQ_OSS_MAX_SYNTH_DEVS)) + if (snd_BUG_ON(dp->max_synthdev > SNDRV_SEQ_OSS_MAX_SYNTH_DEVS)) return; for (i = 0; i < dp->max_synthdev; i++) { info = &dp->synths[i]; diff --git a/sound/drivers/dummy.c b/sound/drivers/dummy.c index 75b74850c005..bde33308f0d6 100644 --- a/sound/drivers/dummy.c +++ b/sound/drivers/dummy.c @@ -87,7 +87,7 @@ MODULE_PARM_DESC(pcm_substreams, "PCM substreams # (1-128) for dummy driver."); module_param(fake_buffer, bool, 0444); MODULE_PARM_DESC(fake_buffer, "Fake buffer allocations."); #ifdef CONFIG_HIGH_RES_TIMERS -module_param(hrtimer, bool, 0644); +module_param(hrtimer, bool, 0444); MODULE_PARM_DESC(hrtimer, "Use hrtimer as the timer source."); #endif diff --git a/sound/firewire/bebob/bebob_stream.c b/sound/firewire/bebob/bebob_stream.c index 926e5dcbb66a..5022c9b97ddf 100644 --- a/sound/firewire/bebob/bebob_stream.c +++ b/sound/firewire/bebob/bebob_stream.c @@ -47,14 +47,16 @@ static const unsigned int bridgeco_freq_table[] = { [6] = 0x07, }; -static unsigned int -get_formation_index(unsigned int rate) +static int +get_formation_index(unsigned int rate, unsigned int *index) { unsigned int i; for (i = 0; i < ARRAY_SIZE(snd_bebob_rate_table); i++) { - if (snd_bebob_rate_table[i] == rate) - return i; + if (snd_bebob_rate_table[i] == rate) { + *index = i; + return 0; + } } return -EINVAL; } @@ -425,7 +427,9 @@ make_both_connections(struct snd_bebob *bebob, unsigned int rate) goto end; /* confirm params for both streams */ - index = get_formation_index(rate); + err = get_formation_index(rate, &index); + if (err < 0) + goto end; pcm_channels = bebob->tx_stream_formations[index].pcm; midi_channels = bebob->tx_stream_formations[index].midi; err = amdtp_am824_set_parameters(&bebob->tx_stream, rate, diff --git a/sound/isa/Kconfig b/sound/isa/Kconfig index 0216475fc759..37adcc6cbe6b 100644 --- a/sound/isa/Kconfig +++ b/sound/isa/Kconfig @@ -3,6 +3,7 @@ config SND_WSS_LIB tristate select SND_PCM + select SND_TIMER config SND_SB_COMMON tristate @@ -42,6 +43,7 @@ config SND_AD1816A select SND_OPL3_LIB select SND_MPU401_UART select SND_PCM + select SND_TIMER help Say Y here to include support for Analog Devices SoundPort AD1816A or compatible sound chips. @@ -209,6 +211,7 @@ config SND_GUSCLASSIC tristate "Gravis UltraSound Classic" select SND_RAWMIDI select SND_PCM + select SND_TIMER help Say Y here to include support for Gravis UltraSound Classic soundcards. @@ -221,6 +224,7 @@ config SND_GUSEXTREME select SND_OPL3_LIB select SND_MPU401_UART select SND_PCM + select SND_TIMER help Say Y here to include support for Gravis UltraSound Extreme soundcards. diff --git a/sound/pci/Kconfig b/sound/pci/Kconfig index 656ce39bddbc..8f6594a7d37f 100644 --- a/sound/pci/Kconfig +++ b/sound/pci/Kconfig @@ -155,6 +155,7 @@ config SND_AZT3328 select SND_PCM select SND_RAWMIDI select SND_AC97_CODEC + select SND_TIMER depends on ZONE_DMA help Say Y here to include support for Aztech AZF3328 (PCI168) @@ -463,6 +464,7 @@ config SND_EMU10K1 select SND_HWDEP select SND_RAWMIDI select SND_AC97_CODEC + select SND_TIMER depends on ZONE_DMA help Say Y to include support for Sound Blaster PCI 512, Live!, @@ -889,6 +891,7 @@ config SND_YMFPCI select SND_OPL3_LIB select SND_MPU401_UART select SND_AC97_CODEC + select SND_TIMER help Say Y here to include support for Yamaha PCI audio chips - YMF724, YMF724F, YMF740, YMF740C, YMF744, YMF754. diff --git a/sound/pci/hda/hda_intel.c b/sound/pci/hda/hda_intel.c index 256e6cda218f..4045dca3d699 100644 --- a/sound/pci/hda/hda_intel.c +++ b/sound/pci/hda/hda_intel.c @@ -90,6 +90,8 @@ enum { #define NVIDIA_HDA_ENABLE_COHBIT 0x01 /* Defines for Intel SCH HDA snoop control */ +#define INTEL_HDA_CGCTL 0x48 +#define INTEL_HDA_CGCTL_MISCBDCGE (0x1 << 6) #define INTEL_SCH_HDA_DEVC 0x78 #define INTEL_SCH_HDA_DEVC_NOSNOOP (0x1<<11) @@ -534,10 +536,21 @@ static void hda_intel_init_chip(struct azx *chip, bool full_reset) { struct hdac_bus *bus = azx_bus(chip); struct pci_dev *pci = chip->pci; + u32 val; if (chip->driver_caps & AZX_DCAPS_I915_POWERWELL) snd_hdac_set_codec_wakeup(bus, true); + if (IS_BROXTON(pci)) { + pci_read_config_dword(pci, INTEL_HDA_CGCTL, &val); + val = val & ~INTEL_HDA_CGCTL_MISCBDCGE; + pci_write_config_dword(pci, INTEL_HDA_CGCTL, val); + } azx_init_chip(chip, full_reset); + if (IS_BROXTON(pci)) { + pci_read_config_dword(pci, INTEL_HDA_CGCTL, &val); + val = val | INTEL_HDA_CGCTL_MISCBDCGE; + pci_write_config_dword(pci, INTEL_HDA_CGCTL, val); + } if (chip->driver_caps & AZX_DCAPS_I915_POWERWELL) snd_hdac_set_codec_wakeup(bus, false); diff --git a/sound/pci/hda/patch_hdmi.c b/sound/pci/hda/patch_hdmi.c index 426a29a1c19b..1f52b55d77c9 100644 --- a/sound/pci/hda/patch_hdmi.c +++ b/sound/pci/hda/patch_hdmi.c @@ -3653,6 +3653,7 @@ HDA_CODEC_ENTRY(0x10de0070, "GPU 70 HDMI/DP", patch_nvhdmi), HDA_CODEC_ENTRY(0x10de0071, "GPU 71 HDMI/DP", patch_nvhdmi), HDA_CODEC_ENTRY(0x10de0072, "GPU 72 HDMI/DP", patch_nvhdmi), HDA_CODEC_ENTRY(0x10de007d, "GPU 7d HDMI/DP", patch_nvhdmi), +HDA_CODEC_ENTRY(0x10de0083, "GPU 83 HDMI/DP", patch_nvhdmi), HDA_CODEC_ENTRY(0x10de8001, "MCP73 HDMI", patch_nvhdmi_2ch), HDA_CODEC_ENTRY(0x11069f80, "VX900 HDMI/DP", patch_via_hdmi), HDA_CODEC_ENTRY(0x11069f81, "VX900 HDMI/DP", patch_via_hdmi), diff --git a/sound/sparc/Kconfig b/sound/sparc/Kconfig index d75deba5617d..dfcd38647606 100644 --- a/sound/sparc/Kconfig +++ b/sound/sparc/Kconfig @@ -22,6 +22,7 @@ config SND_SUN_AMD7930 config SND_SUN_CS4231 tristate "Sun CS4231" select SND_PCM + select SND_TIMER help Say Y here to include support for CS4231 sound device on Sun. diff --git a/sound/usb/quirks.c b/sound/usb/quirks.c index 23ea6d800c4c..a75d9ce7d77a 100644 --- a/sound/usb/quirks.c +++ b/sound/usb/quirks.c @@ -1205,8 +1205,12 @@ void snd_usb_set_interface_quirk(struct usb_device *dev) * "Playback Design" products need a 50ms delay after setting the * USB interface. */ - if (le16_to_cpu(dev->descriptor.idVendor) == 0x23ba) + switch (le16_to_cpu(dev->descriptor.idVendor)) { + case 0x23ba: /* Playback Design */ + case 0x0644: /* TEAC Corp. */ mdelay(50); + break; + } } void snd_usb_ctl_msg_quirk(struct usb_device *dev, unsigned int pipe, @@ -1221,6 +1225,14 @@ void snd_usb_ctl_msg_quirk(struct usb_device *dev, unsigned int pipe, (requesttype & USB_TYPE_MASK) == USB_TYPE_CLASS) mdelay(20); + /* + * "TEAC Corp." products need a 20ms delay after each + * class compliant request + */ + if ((le16_to_cpu(dev->descriptor.idVendor) == 0x0644) && + (requesttype & USB_TYPE_MASK) == USB_TYPE_CLASS) + mdelay(20); + /* Marantz/Denon devices with USB DAC functionality need a delay * after each class compliant request */ diff --git a/tools/virtio/asm/barrier.h b/tools/virtio/asm/barrier.h index 26b7926bda88..ba34f9e96efd 100644 --- a/tools/virtio/asm/barrier.h +++ b/tools/virtio/asm/barrier.h @@ -1,15 +1,19 @@ #if defined(__i386__) || defined(__x86_64__) #define barrier() asm volatile("" ::: "memory") -#define mb() __sync_synchronize() - -#define smp_mb() mb() -# define dma_rmb() barrier() -# define dma_wmb() barrier() -# define smp_rmb() barrier() -# define smp_wmb() barrier() +#define virt_mb() __sync_synchronize() +#define virt_rmb() barrier() +#define virt_wmb() barrier() +/* Atomic store should be enough, but gcc generates worse code in that case. */ +#define virt_store_mb(var, value) do { \ + typeof(var) virt_store_mb_value = (value); \ + __atomic_exchange(&(var), &virt_store_mb_value, &virt_store_mb_value, \ + __ATOMIC_SEQ_CST); \ + barrier(); \ +} while (0); /* Weak barriers should be used. If not - it's a bug */ -# define rmb() abort() -# define wmb() abort() +# define mb() abort() +# define rmb() abort() +# define wmb() abort() #else #error Please fill in barrier macros #endif diff --git a/tools/virtio/linux/compiler.h b/tools/virtio/linux/compiler.h new file mode 100644 index 000000000000..845960e1cbf2 --- /dev/null +++ b/tools/virtio/linux/compiler.h @@ -0,0 +1,9 @@ +#ifndef LINUX_COMPILER_H +#define LINUX_COMPILER_H + +#define WRITE_ONCE(var, val) \ + (*((volatile typeof(val) *)(&(var))) = (val)) + +#define READ_ONCE(var) (*((volatile typeof(val) *)(&(var)))) + +#endif diff --git a/tools/virtio/linux/kernel.h b/tools/virtio/linux/kernel.h index 4db7d5691ba7..033849948215 100644 --- a/tools/virtio/linux/kernel.h +++ b/tools/virtio/linux/kernel.h @@ -8,6 +8,7 @@ #include <assert.h> #include <stdarg.h> +#include <linux/compiler.h> #include <linux/types.h> #include <linux/printk.h> #include <linux/bug.h> diff --git a/tools/virtio/ringtest/Makefile b/tools/virtio/ringtest/Makefile new file mode 100644 index 000000000000..feaa64ac4630 --- /dev/null +++ b/tools/virtio/ringtest/Makefile @@ -0,0 +1,22 @@ +all: + +all: ring virtio_ring_0_9 virtio_ring_poll + +CFLAGS += -Wall +CFLAGS += -pthread -O2 -ggdb +LDFLAGS += -pthread -O2 -ggdb + +main.o: main.c main.h +ring.o: ring.c main.h +virtio_ring_0_9.o: virtio_ring_0_9.c main.h +virtio_ring_poll.o: virtio_ring_poll.c virtio_ring_0_9.c main.h +ring: ring.o main.o +virtio_ring_0_9: virtio_ring_0_9.o main.o +virtio_ring_poll: virtio_ring_poll.o main.o +clean: + -rm main.o + -rm ring.o ring + -rm virtio_ring_0_9.o virtio_ring_0_9 + -rm virtio_ring_poll.o virtio_ring_poll + +.PHONY: all clean diff --git a/tools/virtio/ringtest/README b/tools/virtio/ringtest/README new file mode 100644 index 000000000000..34e94c46104f --- /dev/null +++ b/tools/virtio/ringtest/README @@ -0,0 +1,2 @@ +Partial implementation of various ring layouts, useful to tune virtio design. +Uses shared memory heavily. diff --git a/tools/virtio/ringtest/main.c b/tools/virtio/ringtest/main.c new file mode 100644 index 000000000000..3a5ff438bd62 --- /dev/null +++ b/tools/virtio/ringtest/main.c @@ -0,0 +1,366 @@ +/* + * Copyright (C) 2016 Red Hat, Inc. + * Author: Michael S. Tsirkin <mst@redhat.com> + * This work is licensed under the terms of the GNU GPL, version 2. + * + * Command line processing and common functions for ring benchmarking. + */ +#define _GNU_SOURCE +#include <getopt.h> +#include <pthread.h> +#include <assert.h> +#include <sched.h> +#include "main.h" +#include <sys/eventfd.h> +#include <stdlib.h> +#include <stdio.h> +#include <unistd.h> +#include <limits.h> + +int runcycles = 10000000; +int max_outstanding = INT_MAX; +int batch = 1; + +bool do_sleep = false; +bool do_relax = false; +bool do_exit = true; + +unsigned ring_size = 256; + +static int kickfd = -1; +static int callfd = -1; + +void notify(int fd) +{ + unsigned long long v = 1; + int r; + + vmexit(); + r = write(fd, &v, sizeof v); + assert(r == sizeof v); + vmentry(); +} + +void wait_for_notify(int fd) +{ + unsigned long long v = 1; + int r; + + vmexit(); + r = read(fd, &v, sizeof v); + assert(r == sizeof v); + vmentry(); +} + +void kick(void) +{ + notify(kickfd); +} + +void wait_for_kick(void) +{ + wait_for_notify(kickfd); +} + +void call(void) +{ + notify(callfd); +} + +void wait_for_call(void) +{ + wait_for_notify(callfd); +} + +void set_affinity(const char *arg) +{ + cpu_set_t cpuset; + int ret; + pthread_t self; + long int cpu; + char *endptr; + + if (!arg) + return; + + cpu = strtol(arg, &endptr, 0); + assert(!*endptr); + + assert(cpu >= 0 || cpu < CPU_SETSIZE); + + self = pthread_self(); + CPU_ZERO(&cpuset); + CPU_SET(cpu, &cpuset); + + ret = pthread_setaffinity_np(self, sizeof(cpu_set_t), &cpuset); + assert(!ret); +} + +static void run_guest(void) +{ + int completed_before; + int completed = 0; + int started = 0; + int bufs = runcycles; + int spurious = 0; + int r; + unsigned len; + void *buf; + int tokick = batch; + + for (;;) { + if (do_sleep) + disable_call(); + completed_before = completed; + do { + if (started < bufs && + started - completed < max_outstanding) { + r = add_inbuf(0, NULL, "Hello, world!"); + if (__builtin_expect(r == 0, true)) { + ++started; + if (!--tokick) { + tokick = batch; + if (do_sleep) + kick_available(); + } + + } + } else + r = -1; + + /* Flush out completed bufs if any */ + if (get_buf(&len, &buf)) { + ++completed; + if (__builtin_expect(completed == bufs, false)) + return; + r = 0; + } + } while (r == 0); + if (completed == completed_before) + ++spurious; + assert(completed <= bufs); + assert(started <= bufs); + if (do_sleep) { + if (enable_call()) + wait_for_call(); + } else { + poll_used(); + } + } +} + +static void run_host(void) +{ + int completed_before; + int completed = 0; + int spurious = 0; + int bufs = runcycles; + unsigned len; + void *buf; + + for (;;) { + if (do_sleep) { + if (enable_kick()) + wait_for_kick(); + } else { + poll_avail(); + } + if (do_sleep) + disable_kick(); + completed_before = completed; + while (__builtin_expect(use_buf(&len, &buf), true)) { + if (do_sleep) + call_used(); + ++completed; + if (__builtin_expect(completed == bufs, false)) + return; + } + if (completed == completed_before) + ++spurious; + assert(completed <= bufs); + if (completed == bufs) + break; + } +} + +void *start_guest(void *arg) +{ + set_affinity(arg); + run_guest(); + pthread_exit(NULL); +} + +void *start_host(void *arg) +{ + set_affinity(arg); + run_host(); + pthread_exit(NULL); +} + +static const char optstring[] = ""; +static const struct option longopts[] = { + { + .name = "help", + .has_arg = no_argument, + .val = 'h', + }, + { + .name = "host-affinity", + .has_arg = required_argument, + .val = 'H', + }, + { + .name = "guest-affinity", + .has_arg = required_argument, + .val = 'G', + }, + { + .name = "ring-size", + .has_arg = required_argument, + .val = 'R', + }, + { + .name = "run-cycles", + .has_arg = required_argument, + .val = 'C', + }, + { + .name = "outstanding", + .has_arg = required_argument, + .val = 'o', + }, + { + .name = "batch", + .has_arg = required_argument, + .val = 'b', + }, + { + .name = "sleep", + .has_arg = no_argument, + .val = 's', + }, + { + .name = "relax", + .has_arg = no_argument, + .val = 'x', + }, + { + .name = "exit", + .has_arg = no_argument, + .val = 'e', + }, + { + } +}; + +static void help(void) +{ + fprintf(stderr, "Usage: <test> [--help]" + " [--host-affinity H]" + " [--guest-affinity G]" + " [--ring-size R (default: %d)]" + " [--run-cycles C (default: %d)]" + " [--batch b]" + " [--outstanding o]" + " [--sleep]" + " [--relax]" + " [--exit]" + "\n", + ring_size, + runcycles); +} + +int main(int argc, char **argv) +{ + int ret; + pthread_t host, guest; + void *tret; + char *host_arg = NULL; + char *guest_arg = NULL; + char *endptr; + long int c; + + kickfd = eventfd(0, 0); + assert(kickfd >= 0); + callfd = eventfd(0, 0); + assert(callfd >= 0); + + for (;;) { + int o = getopt_long(argc, argv, optstring, longopts, NULL); + switch (o) { + case -1: + goto done; + case '?': + help(); + exit(2); + case 'H': + host_arg = optarg; + break; + case 'G': + guest_arg = optarg; + break; + case 'R': + ring_size = strtol(optarg, &endptr, 0); + assert(ring_size && !(ring_size & (ring_size - 1))); + assert(!*endptr); + break; + case 'C': + c = strtol(optarg, &endptr, 0); + assert(!*endptr); + assert(c > 0 && c < INT_MAX); + runcycles = c; + break; + case 'o': + c = strtol(optarg, &endptr, 0); + assert(!*endptr); + assert(c > 0 && c < INT_MAX); + max_outstanding = c; + break; + case 'b': + c = strtol(optarg, &endptr, 0); + assert(!*endptr); + assert(c > 0 && c < INT_MAX); + batch = c; + break; + case 's': + do_sleep = true; + break; + case 'x': + do_relax = true; + break; + case 'e': + do_exit = true; + break; + default: + help(); + exit(4); + break; + } + } + + /* does nothing here, used to make sure all smp APIs compile */ + smp_acquire(); + smp_release(); + smp_mb(); +done: + + if (batch > max_outstanding) + batch = max_outstanding; + + if (optind < argc) { + help(); + exit(4); + } + alloc_ring(); + + ret = pthread_create(&host, NULL, start_host, host_arg); + assert(!ret); + ret = pthread_create(&guest, NULL, start_guest, guest_arg); + assert(!ret); + + ret = pthread_join(guest, &tret); + assert(!ret); + ret = pthread_join(host, &tret); + assert(!ret); + return 0; +} diff --git a/tools/virtio/ringtest/main.h b/tools/virtio/ringtest/main.h new file mode 100644 index 000000000000..16917acb0ade --- /dev/null +++ b/tools/virtio/ringtest/main.h @@ -0,0 +1,119 @@ +/* + * Copyright (C) 2016 Red Hat, Inc. + * Author: Michael S. Tsirkin <mst@redhat.com> + * This work is licensed under the terms of the GNU GPL, version 2. + * + * Common macros and functions for ring benchmarking. + */ +#ifndef MAIN_H +#define MAIN_H + +#include <stdbool.h> + +extern bool do_exit; + +#if defined(__x86_64__) || defined(__i386__) +#include "x86intrin.h" + +static inline void wait_cycles(unsigned long long cycles) +{ + unsigned long long t; + + t = __rdtsc(); + while (__rdtsc() - t < cycles) {} +} + +#define VMEXIT_CYCLES 500 +#define VMENTRY_CYCLES 500 + +#else +static inline void wait_cycles(unsigned long long cycles) +{ + _Exit(5); +} +#define VMEXIT_CYCLES 0 +#define VMENTRY_CYCLES 0 +#endif + +static inline void vmexit(void) +{ + if (!do_exit) + return; + + wait_cycles(VMEXIT_CYCLES); +} +static inline void vmentry(void) +{ + if (!do_exit) + return; + + wait_cycles(VMENTRY_CYCLES); +} + +/* implemented by ring */ +void alloc_ring(void); +/* guest side */ +int add_inbuf(unsigned, void *, void *); +void *get_buf(unsigned *, void **); +void disable_call(); +bool enable_call(); +void kick_available(); +void poll_used(); +/* host side */ +void disable_kick(); +bool enable_kick(); +bool use_buf(unsigned *, void **); +void call_used(); +void poll_avail(); + +/* implemented by main */ +extern bool do_sleep; +void kick(void); +void wait_for_kick(void); +void call(void); +void wait_for_call(void); + +extern unsigned ring_size; + +/* Compiler barrier - similar to what Linux uses */ +#define barrier() asm volatile("" ::: "memory") + +/* Is there a portable way to do this? */ +#if defined(__x86_64__) || defined(__i386__) +#define cpu_relax() asm ("rep; nop" ::: "memory") +#else +#define cpu_relax() assert(0) +#endif + +extern bool do_relax; + +static inline void busy_wait(void) +{ + if (do_relax) + cpu_relax(); + else + /* prevent compiler from removing busy loops */ + barrier(); +} + +/* + * Not using __ATOMIC_SEQ_CST since gcc docs say they are only synchronized + * with other __ATOMIC_SEQ_CST calls. + */ +#define smp_mb() __sync_synchronize() + +/* + * This abuses the atomic builtins for thread fences, and + * adds a compiler barrier. + */ +#define smp_release() do { \ + barrier(); \ + __atomic_thread_fence(__ATOMIC_RELEASE); \ +} while (0) + +#define smp_acquire() do { \ + __atomic_thread_fence(__ATOMIC_ACQUIRE); \ + barrier(); \ +} while (0) + +#endif diff --git a/tools/virtio/ringtest/ring.c b/tools/virtio/ringtest/ring.c new file mode 100644 index 000000000000..c25c8d248b6b --- /dev/null +++ b/tools/virtio/ringtest/ring.c @@ -0,0 +1,272 @@ +/* + * Copyright (C) 2016 Red Hat, Inc. + * Author: Michael S. Tsirkin <mst@redhat.com> + * This work is licensed under the terms of the GNU GPL, version 2. + * + * Simple descriptor-based ring. virtio 0.9 compatible event index is used for + * signalling, unconditionally. + */ +#define _GNU_SOURCE +#include "main.h" +#include <stdlib.h> +#include <stdio.h> +#include <string.h> + +/* Next - Where next entry will be written. + * Prev - "Next" value when event triggered previously. + * Event - Peer requested event after writing this entry. + */ +static inline bool need_event(unsigned short event, + unsigned short next, + unsigned short prev) +{ + return (unsigned short)(next - event - 1) < (unsigned short)(next - prev); +} + +/* Design: + * Guest adds descriptors with unique index values and DESC_HW in flags. + * Host overwrites used descriptors with correct len, index, and DESC_HW clear. + * Flags are always set last. + */ +#define DESC_HW 0x1 + +struct desc { + unsigned short flags; + unsigned short index; + unsigned len; + unsigned long long addr; +}; + +/* how much padding is needed to avoid false cache sharing */ +#define HOST_GUEST_PADDING 0x80 + +/* Mostly read */ +struct event { + unsigned short kick_index; + unsigned char reserved0[HOST_GUEST_PADDING - 2]; + unsigned short call_index; + unsigned char reserved1[HOST_GUEST_PADDING - 2]; +}; + +struct data { + void *buf; /* descriptor is writeable, we can't get buf from there */ + void *data; +} *data; + +struct desc *ring; +struct event *event; + +struct guest { + unsigned avail_idx; + unsigned last_used_idx; + unsigned num_free; + unsigned kicked_avail_idx; + unsigned char reserved[HOST_GUEST_PADDING - 12]; +} guest; + +struct host { + /* we do not need to track last avail index + * unless we have more than one in flight. + */ + unsigned used_idx; + unsigned called_used_idx; + unsigned char reserved[HOST_GUEST_PADDING - 4]; +} host; + +/* implemented by ring */ +void alloc_ring(void) +{ + int ret; + int i; + + ret = posix_memalign((void **)&ring, 0x1000, ring_size * sizeof *ring); + if (ret) { + perror("Unable to allocate ring buffer.\n"); + exit(3); + } + event = malloc(sizeof *event); + if (!event) { + perror("Unable to allocate event buffer.\n"); + exit(3); + } + memset(event, 0, sizeof *event); + guest.avail_idx = 0; + guest.kicked_avail_idx = -1; + guest.last_used_idx = 0; + host.used_idx = 0; + host.called_used_idx = -1; + for (i = 0; i < ring_size; ++i) { + struct desc desc = { + .index = i, + }; + ring[i] = desc; + } + guest.num_free = ring_size; + data = malloc(ring_size * sizeof *data); + if (!data) { + perror("Unable to allocate data buffer.\n"); + exit(3); + } + memset(data, 0, ring_size * sizeof *data); +} + +/* guest side */ +int add_inbuf(unsigned len, void *buf, void *datap) +{ + unsigned head, index; + + if (!guest.num_free) + return -1; + + guest.num_free--; + head = (ring_size - 1) & (guest.avail_idx++); + + /* Start with a write. On MESI architectures this helps + * avoid a shared state with consumer that is polling this descriptor. + */ + ring[head].addr = (unsigned long)(void*)buf; + ring[head].len = len; + /* read below might bypass write above. That is OK because it's just an + * optimization. If this happens, we will get the cache line in a + * shared state which is unfortunate, but probably not worth it to + * add an explicit full barrier to avoid this. + */ + barrier(); + index = ring[head].index; + data[index].buf = buf; + data[index].data = datap; + /* Barrier A (for pairing) */ + smp_release(); + ring[head].flags = DESC_HW; + + return 0; +} + +void *get_buf(unsigned *lenp, void **bufp) +{ + unsigned head = (ring_size - 1) & guest.last_used_idx; + unsigned index; + void *datap; + + if (ring[head].flags & DESC_HW) + return NULL; + /* Barrier B (for pairing) */ + smp_acquire(); + *lenp = ring[head].len; + index = ring[head].index & (ring_size - 1); + datap = data[index].data; + *bufp = data[index].buf; + data[index].buf = NULL; + data[index].data = NULL; + guest.num_free++; + guest.last_used_idx++; + return datap; +} + +void poll_used(void) +{ + unsigned head = (ring_size - 1) & guest.last_used_idx; + + while (ring[head].flags & DESC_HW) + busy_wait(); +} + +void disable_call() +{ + /* Doing nothing to disable calls might cause + * extra interrupts, but reduces the number of cache misses. + */ +} + +bool enable_call() +{ + unsigned head = (ring_size - 1) & guest.last_used_idx; + + event->call_index = guest.last_used_idx; + /* Flush call index write */ + /* Barrier D (for pairing) */ + smp_mb(); + return ring[head].flags & DESC_HW; +} + +void kick_available(void) +{ + /* Flush in previous flags write */ + /* Barrier C (for pairing) */ + smp_mb(); + if (!need_event(event->kick_index, + guest.avail_idx, + guest.kicked_avail_idx)) + return; + + guest.kicked_avail_idx = guest.avail_idx; + kick(); +} + +/* host side */ +void disable_kick() +{ + /* Doing nothing to disable kicks might cause + * extra interrupts, but reduces the number of cache misses. + */ +} + +bool enable_kick() +{ + unsigned head = (ring_size - 1) & host.used_idx; + + event->kick_index = host.used_idx; + /* Barrier C (for pairing) */ + smp_mb(); + return !(ring[head].flags & DESC_HW); +} + +void poll_avail(void) +{ + unsigned head = (ring_size - 1) & host.used_idx; + + while (!(ring[head].flags & DESC_HW)) + busy_wait(); +} + +bool use_buf(unsigned *lenp, void **bufp) +{ + unsigned head = (ring_size - 1) & host.used_idx; + + if (!(ring[head].flags & DESC_HW)) + return false; + + /* make sure length read below is not speculated */ + /* Barrier A (for pairing) */ + smp_acquire(); + + /* simple in-order completion: we don't need + * to touch index at all. This also means we + * can just modify the descriptor in-place. + */ + ring[head].len--; + /* Make sure len is valid before flags. + * Note: alternative is to write len and flags in one access - + * possible on 64 bit architectures but wmb is free on Intel anyway + * so I have no way to test whether it's a gain. + */ + /* Barrier B (for pairing) */ + smp_release(); + ring[head].flags = 0; + host.used_idx++; + return true; +} + +void call_used(void) +{ + /* Flush in previous flags write */ + /* Barrier D (for pairing) */ + smp_mb(); + if (!need_event(event->call_index, + host.used_idx, + host.called_used_idx)) + return; + + host.called_used_idx = host.used_idx; + call(); +} diff --git a/tools/virtio/ringtest/run-on-all.sh b/tools/virtio/ringtest/run-on-all.sh new file mode 100755 index 000000000000..52b0f71ffa8d --- /dev/null +++ b/tools/virtio/ringtest/run-on-all.sh @@ -0,0 +1,24 @@ +#!/bin/sh + +#use last CPU for host. Why not the first? +#many devices tend to use cpu0 by default so +#it tends to be busier +HOST_AFFINITY=$(cd /dev/cpu; ls|grep -v '[a-z]'|sort -n|tail -1) + +#run command on all cpus +for cpu in $(cd /dev/cpu; ls|grep -v '[a-z]'|sort -n); +do + #Don't run guest and host on same CPU + #It actually works ok if using signalling + if + (echo "$@" | grep -e "--sleep" > /dev/null) || \ + test $HOST_AFFINITY '!=' $cpu + then + echo "GUEST AFFINITY $cpu" + "$@" --host-affinity $HOST_AFFINITY --guest-affinity $cpu + fi +done +echo "NO GUEST AFFINITY" +"$@" --host-affinity $HOST_AFFINITY +echo "NO AFFINITY" +"$@" diff --git a/tools/virtio/ringtest/virtio_ring_0_9.c b/tools/virtio/ringtest/virtio_ring_0_9.c new file mode 100644 index 000000000000..47c9a1a18d36 --- /dev/null +++ b/tools/virtio/ringtest/virtio_ring_0_9.c @@ -0,0 +1,316 @@ +/* + * Copyright (C) 2016 Red Hat, Inc. + * Author: Michael S. Tsirkin <mst@redhat.com> + * This work is licensed under the terms of the GNU GPL, version 2. + * + * Partial implementation of virtio 0.9. event index is used for signalling, + * unconditionally. Design roughly follows linux kernel implementation in order + * to be able to judge its performance. + */ +#define _GNU_SOURCE +#include "main.h" +#include <stdlib.h> +#include <stdio.h> +#include <assert.h> +#include <string.h> +#include <linux/virtio_ring.h> + +struct data { + void *data; +} *data; + +struct vring ring; + +/* enabling the below activates experimental ring polling code + * (which skips index reads on consumer in favor of looking at + * high bits of ring id ^ 0x8000). + */ +/* #ifdef RING_POLL */ + +/* how much padding is needed to avoid false cache sharing */ +#define HOST_GUEST_PADDING 0x80 + +struct guest { + unsigned short avail_idx; + unsigned short last_used_idx; + unsigned short num_free; + unsigned short kicked_avail_idx; + unsigned short free_head; + unsigned char reserved[HOST_GUEST_PADDING - 10]; +} guest; + +struct host { + /* we do not need to track last avail index + * unless we have more than one in flight. + */ + unsigned short used_idx; + unsigned short called_used_idx; + unsigned char reserved[HOST_GUEST_PADDING - 4]; +} host; + +/* implemented by ring */ +void alloc_ring(void) +{ + int ret; + int i; + void *p; + + ret = posix_memalign(&p, 0x1000, vring_size(ring_size, 0x1000)); + if (ret) { + perror("Unable to allocate ring buffer.\n"); + exit(3); + } + memset(p, 0, vring_size(ring_size, 0x1000)); + vring_init(&ring, ring_size, p, 0x1000); + + guest.avail_idx = 0; + guest.kicked_avail_idx = -1; + guest.last_used_idx = 0; + /* Put everything in free lists. */ + guest.free_head = 0; + for (i = 0; i < ring_size - 1; i++) + ring.desc[i].next = i + 1; + host.used_idx = 0; + host.called_used_idx = -1; + guest.num_free = ring_size; + data = malloc(ring_size * sizeof *data); + if (!data) { + perror("Unable to allocate data buffer.\n"); + exit(3); + } + memset(data, 0, ring_size * sizeof *data); +} + +/* guest side */ +int add_inbuf(unsigned len, void *buf, void *datap) +{ + unsigned head, avail; + struct vring_desc *desc; + + if (!guest.num_free) + return -1; + + head = guest.free_head; + guest.num_free--; + + desc = ring.desc; + desc[head].flags = VRING_DESC_F_NEXT; + desc[head].addr = (unsigned long)(void *)buf; + desc[head].len = len; + /* We do it like this to simulate the way + * we'd have to flip it if we had multiple + * descriptors. + */ + desc[head].flags &= ~VRING_DESC_F_NEXT; + guest.free_head = desc[head].next; + + data[head].data = datap; + +#ifdef RING_POLL + /* Barrier A (for pairing) */ + smp_release(); + avail = guest.avail_idx++; + ring.avail->ring[avail & (ring_size - 1)] = + (head | (avail & ~(ring_size - 1))) ^ 0x8000; +#else + avail = (ring_size - 1) & (guest.avail_idx++); + ring.avail->ring[avail] = head; + /* Barrier A (for pairing) */ + smp_release(); +#endif + ring.avail->idx = guest.avail_idx; + return 0; +} + +void *get_buf(unsigned *lenp, void **bufp) +{ + unsigned head; + unsigned index; + void *datap; + +#ifdef RING_POLL + head = (ring_size - 1) & guest.last_used_idx; + index = ring.used->ring[head].id; + if ((index ^ guest.last_used_idx ^ 0x8000) & ~(ring_size - 1)) + return NULL; + /* Barrier B (for pairing) */ + smp_acquire(); + index &= ring_size - 1; +#else + if (ring.used->idx == guest.last_used_idx) + return NULL; + /* Barrier B (for pairing) */ + smp_acquire(); + head = (ring_size - 1) & guest.last_used_idx; + index = ring.used->ring[head].id; +#endif + *lenp = ring.used->ring[head].len; + datap = data[index].data; + *bufp = (void*)(unsigned long)ring.desc[index].addr; + data[index].data = NULL; + ring.desc[index].next = guest.free_head; + guest.free_head = index; + guest.num_free++; + guest.last_used_idx++; + return datap; +} + +void poll_used(void) +{ +#ifdef RING_POLL + unsigned head = (ring_size - 1) & guest.last_used_idx; + + for (;;) { + unsigned index = ring.used->ring[head].id; + + if ((index ^ guest.last_used_idx ^ 0x8000) & ~(ring_size - 1)) + busy_wait(); + else + break; + } +#else + unsigned head = guest.last_used_idx; + + while (ring.used->idx == head) + busy_wait(); +#endif +} + +void disable_call() +{ + /* Doing nothing to disable calls might cause + * extra interrupts, but reduces the number of cache misses. + */ +} + +bool enable_call() +{ + unsigned short last_used_idx; + + vring_used_event(&ring) = (last_used_idx = guest.last_used_idx); + /* Flush call index write */ + /* Barrier D (for pairing) */ + smp_mb(); +#ifdef RING_POLL + { + unsigned short head = last_used_idx & (ring_size - 1); + unsigned index = ring.used->ring[head].id; + + return (index ^ last_used_idx ^ 0x8000) & ~(ring_size - 1); + } +#else + return ring.used->idx == last_used_idx; +#endif +} + +void kick_available(void) +{ + /* Flush in previous flags write */ + /* Barrier C (for pairing) */ + smp_mb(); + if (!vring_need_event(vring_avail_event(&ring), + guest.avail_idx, + guest.kicked_avail_idx)) + return; + + guest.kicked_avail_idx = guest.avail_idx; + kick(); +} + +/* host side */ +void disable_kick() +{ + /* Doing nothing to disable kicks might cause + * extra interrupts, but reduces the number of cache misses. + */ +} + +bool enable_kick() +{ + unsigned head = host.used_idx; + + vring_avail_event(&ring) = head; + /* Barrier C (for pairing) */ + smp_mb(); +#ifdef RING_POLL + { + unsigned index = ring.avail->ring[head & (ring_size - 1)]; + + return (index ^ head ^ 0x8000) & ~(ring_size - 1); + } +#else + return head == ring.avail->idx; +#endif +} + +void poll_avail(void) +{ + unsigned head = host.used_idx; +#ifdef RING_POLL + for (;;) { + unsigned index = ring.avail->ring[head & (ring_size - 1)]; + if ((index ^ head ^ 0x8000) & ~(ring_size - 1)) + busy_wait(); + else + break; + } +#else + while (ring.avail->idx == head) + busy_wait(); +#endif +} + +bool use_buf(unsigned *lenp, void **bufp) +{ + unsigned used_idx = host.used_idx; + struct vring_desc *desc; + unsigned head; + +#ifdef RING_POLL + head = ring.avail->ring[used_idx & (ring_size - 1)]; + if ((used_idx ^ head ^ 0x8000) & ~(ring_size - 1)) + return false; + /* Barrier A (for pairing) */ + smp_acquire(); + + used_idx &= ring_size - 1; + desc = &ring.desc[head & (ring_size - 1)]; +#else + if (used_idx == ring.avail->idx) + return false; + + /* Barrier A (for pairing) */ + smp_acquire(); + + used_idx &= ring_size - 1; + head = ring.avail->ring[used_idx]; + desc = &ring.desc[head]; +#endif + + *lenp = desc->len; + *bufp = (void *)(unsigned long)desc->addr; + + /* now update used ring */ + ring.used->ring[used_idx].id = head; + ring.used->ring[used_idx].len = desc->len - 1; + /* Barrier B (for pairing) */ + smp_release(); + host.used_idx++; + ring.used->idx = host.used_idx; + + return true; +} + +void call_used(void) +{ + /* Flush in previous flags write */ + /* Barrier D (for pairing) */ + smp_mb(); + if (!vring_need_event(vring_used_event(&ring), + host.used_idx, + host.called_used_idx)) + return; + + host.called_used_idx = host.used_idx; + call(); +} diff --git a/tools/virtio/ringtest/virtio_ring_poll.c b/tools/virtio/ringtest/virtio_ring_poll.c new file mode 100644 index 000000000000..84fc2c557aaa --- /dev/null +++ b/tools/virtio/ringtest/virtio_ring_poll.c @@ -0,0 +1,2 @@ +#define RING_POLL 1 +#include "virtio_ring_0_9.c" |