diff options
232 files changed, 4931 insertions, 1535 deletions
diff --git a/Documentation/devicetree/bindings/pwm/brcm,bcm7038-pwm.txt b/Documentation/devicetree/bindings/pwm/brcm,bcm7038-pwm.txt new file mode 100644 index 000000000000..d9254a6da5ed --- /dev/null +++ b/Documentation/devicetree/bindings/pwm/brcm,bcm7038-pwm.txt @@ -0,0 +1,20 @@ +Broadcom BCM7038 PWM controller (BCM7xxx Set Top Box PWM controller) + +Required properties: + +- compatible: must be "brcm,bcm7038-pwm" +- reg: physical base address and length for this controller +- #pwm-cells: should be 2. See pwm.txt in this directory for a description + of the cells format +- clocks: a phandle to the reference clock for this block which is fed through + its internal variable clock frequency generator + + +Example: + + pwm: pwm@f0408000 { + compatible = "brcm,bcm7038-pwm"; + reg = <0xf0408000 0x28>; + #pwm-cells = <2>; + clocks = <&upg_fixed>; + }; diff --git a/Documentation/devicetree/bindings/pwm/pwm-berlin.txt b/Documentation/devicetree/bindings/pwm/pwm-berlin.txt new file mode 100644 index 000000000000..82cbe16fcbbc --- /dev/null +++ b/Documentation/devicetree/bindings/pwm/pwm-berlin.txt @@ -0,0 +1,17 @@ +Berlin PWM controller + +Required properties: +- compatible: should be "marvell,berlin-pwm" +- reg: physical base address and length of the controller's registers +- clocks: phandle to the input clock +- #pwm-cells: should be 3. See pwm.txt in this directory for a description of + the cells format. + +Example: + +pwm: pwm@f7f20000 { + compatible = "marvell,berlin-pwm"; + reg = <0xf7f20000 0x40>; + clocks = <&chip_clk CLKID_CFG>; + #pwm-cells = <3>; +} diff --git a/Documentation/devicetree/bindings/pwm/pwm-mtk-disp.txt b/Documentation/devicetree/bindings/pwm/pwm-mtk-disp.txt new file mode 100644 index 000000000000..f8f59baf6b67 --- /dev/null +++ b/Documentation/devicetree/bindings/pwm/pwm-mtk-disp.txt @@ -0,0 +1,42 @@ +MediaTek display PWM controller + +Required properties: + - compatible: should be "mediatek,<name>-disp-pwm": + - "mediatek,mt8173-disp-pwm": found on mt8173 SoC. + - "mediatek,mt6595-disp-pwm": found on mt6595 SoC. + - reg: physical base address and length of the controller's registers. + - #pwm-cells: must be 2. See pwm.txt in this directory for a description of + the cell format. + - clocks: phandle and clock specifier of the PWM reference clock. + - clock-names: must contain the following: + - "main": clock used to generate PWM signals. + - "mm": sync signals from the modules of mmsys. + - pinctrl-names: Must contain a "default" entry. + - pinctrl-0: One property must exist for each entry in pinctrl-names. + See pinctrl/pinctrl-bindings.txt for details of the property values. + +Example: + pwm0: pwm@1401e000 { + compatible = "mediatek,mt8173-disp-pwm", + "mediatek,mt6595-disp-pwm"; + reg = <0 0x1401e000 0 0x1000>; + #pwm-cells = <2>; + clocks = <&mmsys CLK_MM_DISP_PWM026M>, + <&mmsys CLK_MM_DISP_PWM0MM>; + clock-names = "main", "mm"; + pinctrl-names = "default"; + pinctrl-0 = <&disp_pwm0_pins>; + }; + + backlight_lcd: backlight_lcd { + compatible = "pwm-backlight"; + pwms = <&pwm0 0 1000000>; + brightness-levels = < + 0 16 32 48 64 80 96 112 + 128 144 160 176 192 208 224 240 + 255 + >; + default-brightness-level = <9>; + power-supply = <&mt6397_vio18_reg>; + enable-gpios = <&pio 95 GPIO_ACTIVE_HIGH>; + }; diff --git a/Documentation/devicetree/bindings/pwm/pwm-sun4i.txt b/Documentation/devicetree/bindings/pwm/pwm-sun4i.txt index ae0273e19506..cf6068b8e974 100644 --- a/Documentation/devicetree/bindings/pwm/pwm-sun4i.txt +++ b/Documentation/devicetree/bindings/pwm/pwm-sun4i.txt @@ -3,6 +3,8 @@ Allwinner sun4i and sun7i SoC PWM controller Required properties: - compatible: should be one of: - "allwinner,sun4i-a10-pwm" + - "allwinner,sun5i-a10s-pwm" + - "allwinner,sun5i-a13-pwm" - "allwinner,sun7i-a20-pwm" - reg: physical base address and length of the controller's registers - #pwm-cells: should be 3. See pwm.txt in this directory for a description of diff --git a/Documentation/devicetree/bindings/pwm/renesas,pwm-rcar.txt b/Documentation/devicetree/bindings/pwm/renesas,pwm-rcar.txt new file mode 100644 index 000000000000..0822a083fc57 --- /dev/null +++ b/Documentation/devicetree/bindings/pwm/renesas,pwm-rcar.txt @@ -0,0 +1,26 @@ +* Renesas R-Car PWM Timer Controller + +Required Properties: +- compatible: should be "renesas,pwm-rcar" and one of the following. + - "renesas,pwm-r8a7778": for R-Car M1A + - "renesas,pwm-r8a7779": for R-Car H1 + - "renesas,pwm-r8a7790": for R-Car H2 + - "renesas,pwm-r8a7791": for R-Car M2-W + - "renesas,pwm-r8a7794": for R-Car E2 +- reg: base address and length of the registers block for the PWM. +- #pwm-cells: should be 2. See pwm.txt in this directory for a description of + the cells format. +- clocks: clock phandle and specifier pair. +- pinctrl-0: phandle, referring to a default pin configuration node. +- pinctrl-names: Set to "default". + +Example: R8A7790 (R-Car H2) PWM Timer node + + pwm0: pwm@e6e30000 { + compatible = "renesas,pwm-r8a7790", "renesas,pwm-rcar"; + reg = <0 0xe6e30000 0 0x8>; + #pwm-cells = <2>; + clocks = <&mstp5_clks R8A7790_CLK_PWM>; + pinctrl-0 = <&pwm0_pins>; + pinctrl-names = "default"; + }; diff --git a/Documentation/devicetree/bindings/sound/ak4554.c b/Documentation/devicetree/bindings/sound/ak4554.txt index 934fa02754b3..934fa02754b3 100644 --- a/Documentation/devicetree/bindings/sound/ak4554.c +++ b/Documentation/devicetree/bindings/sound/ak4554.txt diff --git a/Documentation/devicetree/bindings/thermal/rockchip-thermal.txt b/Documentation/devicetree/bindings/thermal/rockchip-thermal.txt index ef802de4957a..b38200d2583a 100644 --- a/Documentation/devicetree/bindings/thermal/rockchip-thermal.txt +++ b/Documentation/devicetree/bindings/thermal/rockchip-thermal.txt @@ -12,6 +12,11 @@ Required properties: - resets : Must contain an entry for each entry in reset-names. See ../reset/reset.txt for details. - reset-names : Must include the name "tsadc-apb". +- pinctrl-names : The pin control state names; +- pinctrl-0 : The "init" pinctrl state, it will be set before device probe. +- pinctrl-1 : The "default" pinctrl state, it will be set after reset the + TSADC controller. +- pinctrl-2 : The "sleep" pinctrl state, it will be in for suspend. - #thermal-sensor-cells : Should be 1. See ./thermal.txt for a description. - rockchip,hw-tshut-temp : The hardware-controlled shutdown temperature value. - rockchip,hw-tshut-mode : The hardware-controlled shutdown mode 0:CRU 1:GPIO. @@ -27,8 +32,10 @@ tsadc: tsadc@ff280000 { clock-names = "tsadc", "apb_pclk"; resets = <&cru SRST_TSADC>; reset-names = "tsadc-apb"; - pinctrl-names = "default"; - pinctrl-0 = <&otp_out>; + pinctrl-names = "init", "default", "sleep"; + pinctrl-0 = <&otp_gpio>; + pinctrl-1 = <&otp_out>; + pinctrl-2 = <&otp_gpio>; #thermal-sensor-cells = <1>; rockchip,hw-tshut-temp = <95000>; rockchip,hw-tshut-mode = <0>; diff --git a/Documentation/devicetree/bindings/thermal/ti_soc_thermal.txt b/Documentation/devicetree/bindings/thermal/ti_soc_thermal.txt index 0c9222d27fae..6299dd8de339 100644 --- a/Documentation/devicetree/bindings/thermal/ti_soc_thermal.txt +++ b/Documentation/devicetree/bindings/thermal/ti_soc_thermal.txt @@ -10,6 +10,8 @@ to the silicon temperature. Required properties: - compatible : Should be: + - "ti,omap34xx-bandgap" : for OMAP34xx bandgap + - "ti,omap36xx-bandgap" : for OMAP36xx bandgap - "ti,omap4430-bandgap" : for OMAP4430 bandgap - "ti,omap4460-bandgap" : for OMAP4460 bandgap - "ti,omap4470-bandgap" : for OMAP4470 bandgap @@ -25,6 +27,18 @@ to each bandgap version, because the mapping may change from soc to soc, apart of depending on available features. Example: +OMAP34xx: +bandgap { + reg = <0x48002524 0x4>; + compatible = "ti,omap34xx-bandgap"; +}; + +OMAP36xx: +bandgap { + reg = <0x48002524 0x4>; + compatible = "ti,omap36xx-bandgap"; +}; + OMAP4430: bandgap { reg = <0x4a002260 0x4 0x4a00232C 0x4>; diff --git a/MAINTAINERS b/MAINTAINERS index 372ee63c0099..ecc43c255eb8 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -5506,7 +5506,8 @@ S: Supported F: drivers/idle/intel_idle.c INTEL PSTATE DRIVER -M: Kristen Carlson Accardi <kristen@linux.intel.com> +M: Srinivas Pandruvada <srinivas.pandruvada@linux.intel.com> +M: Len Brown <lenb@kernel.org> L: linux-pm@vger.kernel.org S: Supported F: drivers/cpufreq/intel_pstate.c diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig index 851fe11c6069..9ac16a482ff1 100644 --- a/arch/arm64/Kconfig +++ b/arch/arm64/Kconfig @@ -27,6 +27,7 @@ config ARM64 select CPU_PM if (SUSPEND || CPU_IDLE) select DCACHE_WORD_ACCESS select EDAC_SUPPORT + select FRAME_POINTER select GENERIC_ALLOCATOR select GENERIC_CLOCKEVENTS select GENERIC_CLOCKEVENTS_BROADCAST diff --git a/arch/arm64/Kconfig.debug b/arch/arm64/Kconfig.debug index c24d6adc0420..04fb73b973f1 100644 --- a/arch/arm64/Kconfig.debug +++ b/arch/arm64/Kconfig.debug @@ -2,10 +2,6 @@ menu "Kernel hacking" source "lib/Kconfig.debug" -config FRAME_POINTER - bool - default y - config ARM64_PTDUMP bool "Export kernel pagetable layout to userspace via debugfs" depends on DEBUG_KERNEL diff --git a/arch/arm64/configs/defconfig b/arch/arm64/configs/defconfig index 2f71f9cdd39c..bdd7aa358d2a 100644 --- a/arch/arm64/configs/defconfig +++ b/arch/arm64/configs/defconfig @@ -224,3 +224,4 @@ CONFIG_CRYPTO_GHASH_ARM64_CE=y CONFIG_CRYPTO_AES_ARM64_CE_CCM=y CONFIG_CRYPTO_AES_ARM64_CE_BLK=y CONFIG_CRYPTO_AES_ARM64_NEON_BLK=y +CONFIG_CRYPTO_CRC32_ARM64=y diff --git a/arch/arm64/include/asm/atomic_ll_sc.h b/arch/arm64/include/asm/atomic_ll_sc.h index 74d0b8eb0799..f61c84f6ba02 100644 --- a/arch/arm64/include/asm/atomic_ll_sc.h +++ b/arch/arm64/include/asm/atomic_ll_sc.h @@ -233,7 +233,7 @@ __CMPXCHG_CASE( , , mb_8, dmb ish, , l, "memory") #undef __CMPXCHG_CASE #define __CMPXCHG_DBL(name, mb, rel, cl) \ -__LL_SC_INLINE int \ +__LL_SC_INLINE long \ __LL_SC_PREFIX(__cmpxchg_double##name(unsigned long old1, \ unsigned long old2, \ unsigned long new1, \ diff --git a/arch/arm64/include/asm/atomic_lse.h b/arch/arm64/include/asm/atomic_lse.h index 1fce7908e690..197e06afbf71 100644 --- a/arch/arm64/include/asm/atomic_lse.h +++ b/arch/arm64/include/asm/atomic_lse.h @@ -387,7 +387,7 @@ __CMPXCHG_CASE(x, , mb_8, al, "memory") #define __LL_SC_CMPXCHG_DBL(op) __LL_SC_CALL(__cmpxchg_double##op) #define __CMPXCHG_DBL(name, mb, cl...) \ -static inline int __cmpxchg_double##name(unsigned long old1, \ +static inline long __cmpxchg_double##name(unsigned long old1, \ unsigned long old2, \ unsigned long new1, \ unsigned long new2, \ diff --git a/arch/arm64/include/asm/pgtable.h b/arch/arm64/include/asm/pgtable.h index f3acf421ded4..9819a9426b69 100644 --- a/arch/arm64/include/asm/pgtable.h +++ b/arch/arm64/include/asm/pgtable.h @@ -80,6 +80,7 @@ extern void __pgd_error(const char *file, int line, unsigned long val); #define _PAGE_DEFAULT (PROT_DEFAULT | PTE_ATTRINDX(MT_NORMAL)) #define PAGE_KERNEL __pgprot(_PAGE_DEFAULT | PTE_PXN | PTE_UXN | PTE_DIRTY | PTE_WRITE) +#define PAGE_KERNEL_RO __pgprot(_PAGE_DEFAULT | PTE_PXN | PTE_UXN | PTE_DIRTY | PTE_RDONLY) #define PAGE_KERNEL_EXEC __pgprot(_PAGE_DEFAULT | PTE_UXN | PTE_DIRTY | PTE_WRITE) #define PAGE_KERNEL_EXEC_CONT __pgprot(_PAGE_DEFAULT | PTE_UXN | PTE_DIRTY | PTE_WRITE | PTE_CONT) diff --git a/arch/arm64/include/uapi/asm/kvm.h b/arch/arm64/include/uapi/asm/kvm.h index 0cd7b5947dfc..2d4ca4bb0dd3 100644 --- a/arch/arm64/include/uapi/asm/kvm.h +++ b/arch/arm64/include/uapi/asm/kvm.h @@ -32,7 +32,7 @@ #ifndef __ASSEMBLY__ #include <linux/psci.h> -#include <asm/types.h> +#include <linux/types.h> #include <asm/ptrace.h> #define __KVM_HAVE_GUEST_DEBUG diff --git a/arch/arm64/kernel/cpufeature.c b/arch/arm64/kernel/cpufeature.c index 52f0d7a5a1c2..c8cf89223b5a 100644 --- a/arch/arm64/kernel/cpufeature.c +++ b/arch/arm64/kernel/cpufeature.c @@ -696,7 +696,7 @@ static void cap_set_hwcap(const struct arm64_cpu_capabilities *cap) } /* Check if we have a particular HWCAP enabled */ -static bool cpus_have_hwcap(const struct arm64_cpu_capabilities *cap) +static bool __maybe_unused cpus_have_hwcap(const struct arm64_cpu_capabilities *cap) { bool rc; diff --git a/arch/arm64/kernel/smp.c b/arch/arm64/kernel/smp.c index 2bbdc0e4fd14..b1adc51b2c2e 100644 --- a/arch/arm64/kernel/smp.c +++ b/arch/arm64/kernel/smp.c @@ -473,7 +473,7 @@ acpi_parse_gic_cpu_interface(struct acpi_subtable_header *header, * cpu logical map array containing MPIDR values related to logical * cpus. Assumes that cpu_logical_map(0) has already been initialized. */ -void __init of_parse_and_init_cpus(void) +static void __init of_parse_and_init_cpus(void) { struct device_node *dn = NULL; diff --git a/arch/arm64/kernel/suspend.c b/arch/arm64/kernel/suspend.c index 40f7b33a22da..fce95e17cf7f 100644 --- a/arch/arm64/kernel/suspend.c +++ b/arch/arm64/kernel/suspend.c @@ -41,7 +41,7 @@ void notrace __cpu_suspend_save(struct cpu_suspend_ctx *ptr, * time the notifier runs debug exceptions might have been enabled already, * with HW breakpoints registers content still in an unknown state. */ -void (*hw_breakpoint_restore)(void *); +static void (*hw_breakpoint_restore)(void *); void __init cpu_suspend_set_dbg_restorer(void (*hw_bp_restore)(void *)) { /* Prevent multiple restore hook initializations */ diff --git a/arch/arm64/kernel/vdso/Makefile b/arch/arm64/kernel/vdso/Makefile index f6fe17d88da5..b467fd0a384b 100644 --- a/arch/arm64/kernel/vdso/Makefile +++ b/arch/arm64/kernel/vdso/Makefile @@ -15,6 +15,9 @@ ccflags-y := -shared -fno-common -fno-builtin ccflags-y += -nostdlib -Wl,-soname=linux-vdso.so.1 \ $(call cc-ldoption, -Wl$(comma)--hash-style=sysv) +# Disable gcov profiling for VDSO code +GCOV_PROFILE := n + # Workaround for bare-metal (ELF) toolchains that neglect to pass -shared # down to collect2, resulting in silent corruption of the vDSO image. ccflags-y += -Wl,-shared diff --git a/arch/arm64/mm/mmu.c b/arch/arm64/mm/mmu.c index c2fa6b56613c..e3f563c81c48 100644 --- a/arch/arm64/mm/mmu.c +++ b/arch/arm64/mm/mmu.c @@ -146,7 +146,7 @@ static void alloc_init_pte(pmd_t *pmd, unsigned long addr, if (((addr | next | phys) & ~CONT_MASK) == 0) { /* a block of CONT_PTES */ __populate_init_pte(pte, addr, next, phys, - prot | __pgprot(PTE_CONT)); + __pgprot(pgprot_val(prot) | PTE_CONT)); } else { /* * If the range being split is already inside of a @@ -165,7 +165,7 @@ static void alloc_init_pte(pmd_t *pmd, unsigned long addr, } while (addr != end); } -void split_pud(pud_t *old_pud, pmd_t *pmd) +static void split_pud(pud_t *old_pud, pmd_t *pmd) { unsigned long addr = pud_pfn(*old_pud) << PAGE_SHIFT; pgprot_t prot = __pgprot(pud_val(*old_pud) ^ addr); @@ -447,7 +447,7 @@ static void __init map_mem(void) memblock_set_current_limit(MEMBLOCK_ALLOC_ANYWHERE); } -void __init fixup_executable(void) +static void __init fixup_executable(void) { #ifdef CONFIG_DEBUG_RODATA /* now that we are actually fully mapped, make the start/end more fine grained */ @@ -691,7 +691,7 @@ void __set_fixmap(enum fixed_addresses idx, void *__init fixmap_remap_fdt(phys_addr_t dt_phys) { const u64 dt_virt_base = __fix_to_virt(FIX_FDT); - pgprot_t prot = PAGE_KERNEL | PTE_RDONLY; + pgprot_t prot = PAGE_KERNEL_RO; int size, offset; void *dt_virt; diff --git a/arch/arm64/net/bpf_jit.h b/arch/arm64/net/bpf_jit.h index 98a26ce82d26..aee5637ea436 100644 --- a/arch/arm64/net/bpf_jit.h +++ b/arch/arm64/net/bpf_jit.h @@ -1,7 +1,7 @@ /* * BPF JIT compiler for ARM64 * - * Copyright (C) 2014 Zi Shen Lim <zlim.lnx@gmail.com> + * Copyright (C) 2014-2015 Zi Shen Lim <zlim.lnx@gmail.com> * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License version 2 as @@ -35,6 +35,7 @@ aarch64_insn_gen_comp_branch_imm(0, offset, Rt, A64_VARIANT(sf), \ AARCH64_INSN_BRANCH_COMP_##type) #define A64_CBZ(sf, Rt, imm19) A64_COMP_BRANCH(sf, Rt, (imm19) << 2, ZERO) +#define A64_CBNZ(sf, Rt, imm19) A64_COMP_BRANCH(sf, Rt, (imm19) << 2, NONZERO) /* Conditional branch (immediate) */ #define A64_COND_BRANCH(cond, offset) \ diff --git a/arch/arm64/net/bpf_jit_comp.c b/arch/arm64/net/bpf_jit_comp.c index a44e5293c6f5..cf3c7d4a1b58 100644 --- a/arch/arm64/net/bpf_jit_comp.c +++ b/arch/arm64/net/bpf_jit_comp.c @@ -1,7 +1,7 @@ /* * BPF JIT compiler for ARM64 * - * Copyright (C) 2014 Zi Shen Lim <zlim.lnx@gmail.com> + * Copyright (C) 2014-2015 Zi Shen Lim <zlim.lnx@gmail.com> * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License version 2 as @@ -225,6 +225,17 @@ static int build_insn(const struct bpf_insn *insn, struct jit_ctx *ctx) u8 jmp_cond; s32 jmp_offset; +#define check_imm(bits, imm) do { \ + if ((((imm) > 0) && ((imm) >> (bits))) || \ + (((imm) < 0) && (~(imm) >> (bits)))) { \ + pr_info("[%2d] imm=%d(0x%x) out of range\n", \ + i, imm, imm); \ + return -EINVAL; \ + } \ +} while (0) +#define check_imm19(imm) check_imm(19, imm) +#define check_imm26(imm) check_imm(26, imm) + switch (code) { /* dst = src */ case BPF_ALU | BPF_MOV | BPF_X: @@ -258,15 +269,33 @@ static int build_insn(const struct bpf_insn *insn, struct jit_ctx *ctx) break; case BPF_ALU | BPF_DIV | BPF_X: case BPF_ALU64 | BPF_DIV | BPF_X: - emit(A64_UDIV(is64, dst, dst, src), ctx); - break; case BPF_ALU | BPF_MOD | BPF_X: case BPF_ALU64 | BPF_MOD | BPF_X: - ctx->tmp_used = 1; - emit(A64_UDIV(is64, tmp, dst, src), ctx); - emit(A64_MUL(is64, tmp, tmp, src), ctx); - emit(A64_SUB(is64, dst, dst, tmp), ctx); + { + const u8 r0 = bpf2a64[BPF_REG_0]; + + /* if (src == 0) return 0 */ + jmp_offset = 3; /* skip ahead to else path */ + check_imm19(jmp_offset); + emit(A64_CBNZ(is64, src, jmp_offset), ctx); + emit(A64_MOVZ(1, r0, 0, 0), ctx); + jmp_offset = epilogue_offset(ctx); + check_imm26(jmp_offset); + emit(A64_B(jmp_offset), ctx); + /* else */ + switch (BPF_OP(code)) { + case BPF_DIV: + emit(A64_UDIV(is64, dst, dst, src), ctx); + break; + case BPF_MOD: + ctx->tmp_used = 1; + emit(A64_UDIV(is64, tmp, dst, src), ctx); + emit(A64_MUL(is64, tmp, tmp, src), ctx); + emit(A64_SUB(is64, dst, dst, tmp), ctx); + break; + } break; + } case BPF_ALU | BPF_LSH | BPF_X: case BPF_ALU64 | BPF_LSH | BPF_X: emit(A64_LSLV(is64, dst, dst, src), ctx); @@ -393,17 +422,6 @@ emit_bswap_uxt: emit(A64_ASR(is64, dst, dst, imm), ctx); break; -#define check_imm(bits, imm) do { \ - if ((((imm) > 0) && ((imm) >> (bits))) || \ - (((imm) < 0) && (~(imm) >> (bits)))) { \ - pr_info("[%2d] imm=%d(0x%x) out of range\n", \ - i, imm, imm); \ - return -EINVAL; \ - } \ -} while (0) -#define check_imm19(imm) check_imm(19, imm) -#define check_imm26(imm) check_imm(26, imm) - /* JUMP off */ case BPF_JMP | BPF_JA: jmp_offset = bpf2a64_offset(i + off, i, ctx); diff --git a/arch/h8300/Kconfig b/arch/h8300/Kconfig index db589167838c..dd3ac75776ad 100644 --- a/arch/h8300/Kconfig +++ b/arch/h8300/Kconfig @@ -16,6 +16,7 @@ config H8300 select OF_EARLY_FLATTREE select HAVE_MEMBLOCK select HAVE_DMA_ATTRS + select CLKSRC_OF config RWSEM_GENERIC_SPINLOCK def_bool y diff --git a/arch/h8300/Makefile b/arch/h8300/Makefile index 0d2d96e52d9f..e1c02ca230cb 100644 --- a/arch/h8300/Makefile +++ b/arch/h8300/Makefile @@ -22,7 +22,9 @@ KBUILD_CFLAGS += -DUTS_SYSNAME=\"uClinux\" KBUILD_AFLAGS += $(aflags-y) LDFLAGS += $(ldflags-y) +ifeq ($(CROSS_COMPILE),) CROSS_COMPILE := h8300-unknown-linux- +endif core-y += arch/$(ARCH)/kernel/ arch/$(ARCH)/mm/ ifneq '$(CONFIG_H8300_BUILTIN_DTB)' '""' diff --git a/arch/h8300/boot/compressed/Makefile b/arch/h8300/boot/compressed/Makefile index 87d03b7ee97e..d7bc3fa7f2c6 100644 --- a/arch/h8300/boot/compressed/Makefile +++ b/arch/h8300/boot/compressed/Makefile @@ -14,11 +14,12 @@ OBJECTS = $(obj)/head.o $(obj)/misc.o # in order to suppress error message. # CONFIG_MEMORY_START ?= 0x00400000 -CONFIG_BOOT_LINK_OFFSET ?= 0x00140000 +CONFIG_BOOT_LINK_OFFSET ?= 0x00280000 IMAGE_OFFSET := $(shell printf "0x%08x" $$(($(CONFIG_MEMORY_START)+$(CONFIG_BOOT_LINK_OFFSET)))) LIBGCC := $(shell $(CROSS-COMPILE)$(CC) $(KBUILD_CFLAGS) -print-libgcc-file-name) -LDFLAGS_vmlinux := -Ttext $(IMAGE_OFFSET) -estartup $(obj)/vmlinux.lds +LDFLAGS_vmlinux := -Ttext $(IMAGE_OFFSET) -estartup -T $(obj)/vmlinux.lds \ + --defsym output=$(CONFIG_MEMORY_START) $(obj)/vmlinux: $(OBJECTS) $(obj)/piggy.o $(LIBGCC) FORCE $(call if_changed,ld) diff --git a/arch/h8300/boot/compressed/head.S b/arch/h8300/boot/compressed/head.S index 74c0d8cc40ba..0436350c1df5 100644 --- a/arch/h8300/boot/compressed/head.S +++ b/arch/h8300/boot/compressed/head.S @@ -9,8 +9,8 @@ .section .text..startup,"ax" .global startup startup: + mov.l #startup, sp mov.l er0, er4 - mov.l er0, sp mov.l #__sbss, er0 mov.l #__ebss, er1 sub.l er0, er1 @@ -24,7 +24,7 @@ startup: bne 1b jsr @decompress_kernel mov.l er4, er0 - jmp @0x400000 + jmp @output .align 9 fake_headers_as_bzImage: diff --git a/arch/h8300/boot/compressed/misc.c b/arch/h8300/boot/compressed/misc.c index c4f2cfcb117b..6029c5351895 100644 --- a/arch/h8300/boot/compressed/misc.c +++ b/arch/h8300/boot/compressed/misc.c @@ -28,7 +28,7 @@ static unsigned long free_mem_end_ptr; extern char input_data[]; extern int input_len; -static unsigned char *output; +extern char output[]; #define HEAP_SIZE 0x10000 @@ -56,15 +56,10 @@ void *memcpy(void *dest, const void *src, size_t n) static void error(char *x) { - while (1) ; /* Halt */ } -#define STACK_SIZE (4096) -long user_stack[STACK_SIZE]; -long *stack_start = &user_stack[STACK_SIZE]; - void decompress_kernel(void) { free_mem_ptr = (unsigned long)&_end; diff --git a/arch/h8300/boot/compressed/vmlinux.lds b/arch/h8300/boot/compressed/vmlinux.lds index a0a3a0ed54ef..44fd209db88a 100644 --- a/arch/h8300/boot/compressed/vmlinux.lds +++ b/arch/h8300/boot/compressed/vmlinux.lds @@ -27,6 +27,6 @@ SECTIONS *(.bss*) . = ALIGN(0x4) ; __ebss = . ; - __end = . ; } + _end = . ; } diff --git a/arch/h8300/boot/dts/edosk2674.dts b/arch/h8300/boot/dts/edosk2674.dts index dfb5c102f8da..4ce9fa874a57 100644 --- a/arch/h8300/boot/dts/edosk2674.dts +++ b/arch/h8300/boot/dts/edosk2674.dts @@ -7,7 +7,7 @@ chosen { bootargs = "console=ttySC2,38400"; - stdout-path = <&sci2>; + stdout-path = &sci2; }; aliases { serial0 = &sci0; @@ -25,13 +25,13 @@ compatible = "renesas,h8s2678-pll-clock"; clocks = <&xclk>; #clock-cells = <0>; - reg = <0xfee03b 2>, <0xfee045 2>; + reg = <0xffff3b 1>, <0xffff45 1>; }; core_clk: core_clk { compatible = "renesas,h8300-div-clock"; clocks = <&pllclk>; #clock-cells = <0>; - reg = <0xfee03b 2>; + reg = <0xffff3b 1>; renesas,width = <3>; }; fclk: fclk { diff --git a/arch/h8300/include/asm/io.h b/arch/h8300/include/asm/io.h index 1d09b2f2e0fe..bb837cded268 100644 --- a/arch/h8300/include/asm/io.h +++ b/arch/h8300/include/asm/io.h @@ -36,20 +36,20 @@ static inline void ctrl_outl(unsigned long b, unsigned long addr) *(volatile unsigned long *)addr = b; } -static inline void ctrl_bclr(int b, unsigned long addr) +static inline void ctrl_bclr(int b, unsigned char *addr) { if (__builtin_constant_p(b)) - __asm__("bclr %1,%0" : : "WU"(addr), "i"(b)); + __asm__("bclr %1,%0" : "+WU"(*addr): "i"(b)); else - __asm__("bclr %w1,%0" : : "WU"(addr), "r"(b)); + __asm__("bclr %w1,%0" : "+WU"(*addr): "r"(b)); } -static inline void ctrl_bset(int b, unsigned long addr) +static inline void ctrl_bset(int b, unsigned char *addr) { if (__builtin_constant_p(b)) - __asm__("bset %1,%0" : : "WU"(addr), "i"(b)); + __asm__("bset %1,%0" : "+WU"(*addr): "i"(b)); else - __asm__("bset %w1,%0" : : "WU"(addr), "r"(b)); + __asm__("bset %w1,%0" : "+WU"(*addr): "r"(b)); } #endif /* __KERNEL__ */ diff --git a/arch/h8300/include/asm/thread_info.h b/arch/h8300/include/asm/thread_info.h index 544c30785ad4..b408fe660cf8 100644 --- a/arch/h8300/include/asm/thread_info.h +++ b/arch/h8300/include/asm/thread_info.h @@ -13,6 +13,12 @@ #ifdef __KERNEL__ +/* + * Size of kernel stack for each process. This must be a power of 2... + */ +#define THREAD_SIZE_ORDER 1 +#define THREAD_SIZE 8192 /* 2 pages */ + #ifndef __ASSEMBLY__ /* @@ -46,14 +52,6 @@ struct thread_info { #define init_thread_info (init_thread_union.thread_info) #define init_stack (init_thread_union.stack) - -/* - * Size of kernel stack for each process. This must be a power of 2... - */ -#define THREAD_SIZE_ORDER 1 -#define THREAD_SIZE 8192 /* 2 pages */ - - /* how to get the thread information struct from C */ static inline struct thread_info *current_thread_info(void) { diff --git a/arch/h8300/kernel/setup.c b/arch/h8300/kernel/setup.c index 0fd1fe65c0b8..c772abe6d19c 100644 --- a/arch/h8300/kernel/setup.c +++ b/arch/h8300/kernel/setup.c @@ -29,6 +29,7 @@ #include <linux/clk-provider.h> #include <linux/memblock.h> #include <linux/screen_info.h> +#include <linux/clocksource.h> #include <asm/setup.h> #include <asm/irq.h> @@ -252,4 +253,5 @@ void __init calibrate_delay(void) void __init time_init(void) { of_clk_init(NULL); + clocksource_probe(); } diff --git a/arch/h8300/kernel/vmlinux.lds.S b/arch/h8300/kernel/vmlinux.lds.S index 7c302dcf5249..cb5dfb02c88d 100644 --- a/arch/h8300/kernel/vmlinux.lds.S +++ b/arch/h8300/kernel/vmlinux.lds.S @@ -1,5 +1,6 @@ #include <asm-generic/vmlinux.lds.h> #include <asm/page.h> +#include <asm/thread_info.h> #define ROMTOP 0x000000 #define RAMTOP 0x400000 @@ -42,11 +43,10 @@ SECTIONS . = RAMTOP; _ramstart = .; #define ADDR(x) ROMEND -#else #endif _sdata = . ; __data_start = . ; - RW_DATA_SECTION(0,0,0) + RW_DATA_SECTION(0, PAGE_SIZE, THREAD_SIZE) #if defined(CONFIG_ROMKERNEL) #undef ADDR #endif diff --git a/arch/powerpc/kvm/book3s_hv.c b/arch/powerpc/kvm/book3s_hv.c index 9c26c5a96ea2..54b45b73195f 100644 --- a/arch/powerpc/kvm/book3s_hv.c +++ b/arch/powerpc/kvm/book3s_hv.c @@ -2019,7 +2019,7 @@ static bool can_split_piggybacked_subcores(struct core_info *cip) return false; n_subcores += (cip->subcore_threads[sub] - 1) >> 1; } - if (n_subcores > 3 || large_sub < 0) + if (large_sub < 0 || !subcore_config_ok(n_subcores + 1, 2)) return false; /* diff --git a/arch/powerpc/kvm/book3s_hv_rmhandlers.S b/arch/powerpc/kvm/book3s_hv_rmhandlers.S index b1dab8d1d885..3c6badcd53ef 100644 --- a/arch/powerpc/kvm/book3s_hv_rmhandlers.S +++ b/arch/powerpc/kvm/book3s_hv_rmhandlers.S @@ -1749,7 +1749,8 @@ kvmppc_hdsi: beq 3f clrrdi r0, r4, 28 PPC_SLBFEE_DOT(R5, R0) /* if so, look up SLB */ - bne 1f /* if no SLB entry found */ + li r0, BOOK3S_INTERRUPT_DATA_SEGMENT + bne 7f /* if no SLB entry found */ 4: std r4, VCPU_FAULT_DAR(r9) stw r6, VCPU_FAULT_DSISR(r9) @@ -1768,14 +1769,15 @@ kvmppc_hdsi: cmpdi r3, -2 /* MMIO emulation; need instr word */ beq 2f - /* Synthesize a DSI for the guest */ + /* Synthesize a DSI (or DSegI) for the guest */ ld r4, VCPU_FAULT_DAR(r9) mr r6, r3 -1: mtspr SPRN_DAR, r4 +1: li r0, BOOK3S_INTERRUPT_DATA_STORAGE mtspr SPRN_DSISR, r6 +7: mtspr SPRN_DAR, r4 mtspr SPRN_SRR0, r10 mtspr SPRN_SRR1, r11 - li r10, BOOK3S_INTERRUPT_DATA_STORAGE + mr r10, r0 bl kvmppc_msr_interrupt fast_interrupt_c_return: 6: ld r7, VCPU_CTR(r9) @@ -1823,7 +1825,8 @@ kvmppc_hisi: beq 3f clrrdi r0, r10, 28 PPC_SLBFEE_DOT(R5, R0) /* if so, look up SLB */ - bne 1f /* if no SLB entry found */ + li r0, BOOK3S_INTERRUPT_INST_SEGMENT + bne 7f /* if no SLB entry found */ 4: /* Search the hash table. */ mr r3, r9 /* vcpu pointer */ @@ -1840,11 +1843,12 @@ kvmppc_hisi: cmpdi r3, -1 /* handle in kernel mode */ beq guest_exit_cont - /* Synthesize an ISI for the guest */ + /* Synthesize an ISI (or ISegI) for the guest */ mr r11, r3 -1: mtspr SPRN_SRR0, r10 +1: li r0, BOOK3S_INTERRUPT_INST_STORAGE +7: mtspr SPRN_SRR0, r10 mtspr SPRN_SRR1, r11 - li r10, BOOK3S_INTERRUPT_INST_STORAGE + mr r10, r0 bl kvmppc_msr_interrupt b fast_interrupt_c_return diff --git a/arch/unicore32/kernel/puv3-nb0916.c b/arch/unicore32/kernel/puv3-nb0916.c index 46ebfdccbc31..aab5f341dec0 100644 --- a/arch/unicore32/kernel/puv3-nb0916.c +++ b/arch/unicore32/kernel/puv3-nb0916.c @@ -19,6 +19,7 @@ #include <linux/reboot.h> #include <linux/interrupt.h> #include <linux/i2c.h> +#include <linux/pwm.h> #include <linux/pwm_backlight.h> #include <linux/gpio.h> #include <linux/gpio_keys.h> @@ -49,11 +50,14 @@ static struct resource puv3_i2c_resources[] = { } }; +static struct pwm_lookup nb0916_pwm_lookup[] = { + PWM_LOOKUP("PKUnity-v3-PWM", 0, "pwm-backlight", NULL, 70 * 1024, + PWM_POLARITY_NORMAL), +}; + static struct platform_pwm_backlight_data nb0916_backlight_data = { - .pwm_id = 0, .max_brightness = 100, .dft_brightness = 100, - .pwm_period_ns = 70 * 1024, .enable_gpio = -1, }; @@ -112,6 +116,8 @@ int __init mach_nb0916_init(void) platform_device_register_simple("PKUnity-v3-I2C", -1, puv3_i2c_resources, ARRAY_SIZE(puv3_i2c_resources)); + pwm_add_table(nb0916_pwm_lookup, ARRAY_SIZE(nb0916_pwm_lookup)); + platform_device_register_data(NULL, "pwm-backlight", -1, &nb0916_backlight_data, sizeof(nb0916_backlight_data)); diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h index 9265196e877f..30cfd64295a0 100644 --- a/arch/x86/include/asm/kvm_host.h +++ b/arch/x86/include/asm/kvm_host.h @@ -505,6 +505,7 @@ struct kvm_vcpu_arch { u32 virtual_tsc_mult; u32 virtual_tsc_khz; s64 ia32_tsc_adjust_msr; + u64 tsc_scaling_ratio; atomic_t nmi_queued; /* unprocessed asynchronous NMIs */ unsigned nmi_pending; /* NMI queued after currently running handler */ @@ -777,7 +778,7 @@ struct kvm_x86_ops { void (*vcpu_load)(struct kvm_vcpu *vcpu, int cpu); void (*vcpu_put)(struct kvm_vcpu *vcpu); - void (*update_db_bp_intercept)(struct kvm_vcpu *vcpu); + void (*update_bp_intercept)(struct kvm_vcpu *vcpu); int (*get_msr)(struct kvm_vcpu *vcpu, struct msr_data *msr); int (*set_msr)(struct kvm_vcpu *vcpu, struct msr_data *msr); u64 (*get_segment_base)(struct kvm_vcpu *vcpu, int seg); @@ -844,7 +845,7 @@ struct kvm_x86_ops { int (*get_lpage_level)(void); bool (*rdtscp_supported)(void); bool (*invpcid_supported)(void); - void (*adjust_tsc_offset)(struct kvm_vcpu *vcpu, s64 adjustment, bool host); + void (*adjust_tsc_offset_guest)(struct kvm_vcpu *vcpu, s64 adjustment); void (*set_tdp_cr3)(struct kvm_vcpu *vcpu, unsigned long cr3); @@ -852,11 +853,9 @@ struct kvm_x86_ops { bool (*has_wbinvd_exit)(void); - void (*set_tsc_khz)(struct kvm_vcpu *vcpu, u32 user_tsc_khz, bool scale); u64 (*read_tsc_offset)(struct kvm_vcpu *vcpu); void (*write_tsc_offset)(struct kvm_vcpu *vcpu, u64 offset); - u64 (*compute_tsc_offset)(struct kvm_vcpu *vcpu, u64 target_tsc); u64 (*read_l1_tsc)(struct kvm_vcpu *vcpu, u64 host_tsc); void (*get_exit_info)(struct kvm_vcpu *vcpu, u64 *info1, u64 *info2); @@ -923,17 +922,6 @@ struct kvm_arch_async_pf { extern struct kvm_x86_ops *kvm_x86_ops; -static inline void adjust_tsc_offset_guest(struct kvm_vcpu *vcpu, - s64 adjustment) -{ - kvm_x86_ops->adjust_tsc_offset(vcpu, adjustment, false); -} - -static inline void adjust_tsc_offset_host(struct kvm_vcpu *vcpu, s64 adjustment) -{ - kvm_x86_ops->adjust_tsc_offset(vcpu, adjustment, true); -} - int kvm_mmu_module_init(void); void kvm_mmu_module_exit(void); @@ -986,10 +974,12 @@ u64 vcpu_tsc_khz(struct kvm_vcpu *vcpu); /* control of guest tsc rate supported? */ extern bool kvm_has_tsc_control; -/* minimum supported tsc_khz for guests */ -extern u32 kvm_min_guest_tsc_khz; /* maximum supported tsc_khz for guests */ extern u32 kvm_max_guest_tsc_khz; +/* number of bits of the fractional part of the TSC scaling ratio */ +extern u8 kvm_tsc_scaling_ratio_frac_bits; +/* maximum allowed value of TSC scaling ratio */ +extern u64 kvm_max_tsc_scaling_ratio; enum emulation_result { EMULATE_DONE, /* no further processing */ @@ -1235,6 +1225,9 @@ void kvm_arch_mmu_notifier_invalidate_page(struct kvm *kvm, void kvm_define_shared_msr(unsigned index, u32 msr); int kvm_set_shared_msr(unsigned index, u64 val, u64 mask); +u64 kvm_scale_tsc(struct kvm_vcpu *vcpu, u64 tsc); +u64 kvm_read_l1_tsc(struct kvm_vcpu *vcpu, u64 host_tsc); + unsigned long kvm_get_linear_rip(struct kvm_vcpu *vcpu); bool kvm_is_linear_rip(struct kvm_vcpu *vcpu, unsigned long linear_rip); diff --git a/arch/x86/include/asm/vmx.h b/arch/x86/include/asm/vmx.h index aa336ff3e03e..14c63c7e8337 100644 --- a/arch/x86/include/asm/vmx.h +++ b/arch/x86/include/asm/vmx.h @@ -73,6 +73,7 @@ #define SECONDARY_EXEC_ENABLE_PML 0x00020000 #define SECONDARY_EXEC_XSAVES 0x00100000 #define SECONDARY_EXEC_PCOMMIT 0x00200000 +#define SECONDARY_EXEC_TSC_SCALING 0x02000000 #define PIN_BASED_EXT_INTR_MASK 0x00000001 #define PIN_BASED_NMI_EXITING 0x00000008 @@ -167,6 +168,8 @@ enum vmcs_field { VMWRITE_BITMAP = 0x00002028, XSS_EXIT_BITMAP = 0x0000202C, XSS_EXIT_BITMAP_HIGH = 0x0000202D, + TSC_MULTIPLIER = 0x00002032, + TSC_MULTIPLIER_HIGH = 0x00002033, GUEST_PHYSICAL_ADDRESS = 0x00002400, GUEST_PHYSICAL_ADDRESS_HIGH = 0x00002401, VMCS_LINK_POINTER = 0x00002800, diff --git a/arch/x86/include/uapi/asm/svm.h b/arch/x86/include/uapi/asm/svm.h index b5d7640abc5d..8a4add8e4639 100644 --- a/arch/x86/include/uapi/asm/svm.h +++ b/arch/x86/include/uapi/asm/svm.h @@ -100,6 +100,7 @@ { SVM_EXIT_EXCP_BASE + UD_VECTOR, "UD excp" }, \ { SVM_EXIT_EXCP_BASE + PF_VECTOR, "PF excp" }, \ { SVM_EXIT_EXCP_BASE + NM_VECTOR, "NM excp" }, \ + { SVM_EXIT_EXCP_BASE + AC_VECTOR, "AC excp" }, \ { SVM_EXIT_EXCP_BASE + MC_VECTOR, "MC excp" }, \ { SVM_EXIT_INTR, "interrupt" }, \ { SVM_EXIT_NMI, "nmi" }, \ diff --git a/arch/x86/kvm/lapic.c b/arch/x86/kvm/lapic.c index ecd4ea1d28a8..4d30b865be30 100644 --- a/arch/x86/kvm/lapic.c +++ b/arch/x86/kvm/lapic.c @@ -1250,7 +1250,7 @@ void wait_lapic_expire(struct kvm_vcpu *vcpu) tsc_deadline = apic->lapic_timer.expired_tscdeadline; apic->lapic_timer.expired_tscdeadline = 0; - guest_tsc = kvm_x86_ops->read_l1_tsc(vcpu, rdtsc()); + guest_tsc = kvm_read_l1_tsc(vcpu, rdtsc()); trace_kvm_wait_lapic_expire(vcpu->vcpu_id, guest_tsc - tsc_deadline); /* __delay is delay_tsc whenever the hardware has TSC, thus always. */ @@ -1318,7 +1318,7 @@ static void start_apic_timer(struct kvm_lapic *apic) local_irq_save(flags); now = apic->lapic_timer.timer.base->get_time(); - guest_tsc = kvm_x86_ops->read_l1_tsc(vcpu, rdtsc()); + guest_tsc = kvm_read_l1_tsc(vcpu, rdtsc()); if (likely(tscdeadline > guest_tsc)) { ns = (tscdeadline - guest_tsc) * 1000000ULL; do_div(ns, this_tsc_khz); diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c index 7d85bcae3332..e7c2c1428a69 100644 --- a/arch/x86/kvm/mmu.c +++ b/arch/x86/kvm/mmu.c @@ -3359,7 +3359,7 @@ exit: return reserved; } -int handle_mmio_page_fault_common(struct kvm_vcpu *vcpu, u64 addr, bool direct) +int handle_mmio_page_fault(struct kvm_vcpu *vcpu, u64 addr, bool direct) { u64 spte; bool reserved; @@ -3368,7 +3368,7 @@ int handle_mmio_page_fault_common(struct kvm_vcpu *vcpu, u64 addr, bool direct) return RET_MMIO_PF_EMULATE; reserved = walk_shadow_page_get_mmio_spte(vcpu, addr, &spte); - if (unlikely(reserved)) + if (WARN_ON(reserved)) return RET_MMIO_PF_BUG; if (is_mmio_spte(spte)) { @@ -3392,17 +3392,7 @@ int handle_mmio_page_fault_common(struct kvm_vcpu *vcpu, u64 addr, bool direct) */ return RET_MMIO_PF_RETRY; } -EXPORT_SYMBOL_GPL(handle_mmio_page_fault_common); - -static int handle_mmio_page_fault(struct kvm_vcpu *vcpu, u64 addr, - u32 error_code, bool direct) -{ - int ret; - - ret = handle_mmio_page_fault_common(vcpu, addr, direct); - WARN_ON(ret == RET_MMIO_PF_BUG); - return ret; -} +EXPORT_SYMBOL_GPL(handle_mmio_page_fault); static int nonpaging_page_fault(struct kvm_vcpu *vcpu, gva_t gva, u32 error_code, bool prefault) @@ -3413,7 +3403,7 @@ static int nonpaging_page_fault(struct kvm_vcpu *vcpu, gva_t gva, pgprintk("%s: gva %lx error %x\n", __func__, gva, error_code); if (unlikely(error_code & PFERR_RSVD_MASK)) { - r = handle_mmio_page_fault(vcpu, gva, error_code, true); + r = handle_mmio_page_fault(vcpu, gva, true); if (likely(r != RET_MMIO_PF_INVALID)) return r; @@ -3503,7 +3493,7 @@ static int tdp_page_fault(struct kvm_vcpu *vcpu, gva_t gpa, u32 error_code, MMU_WARN_ON(!VALID_PAGE(vcpu->arch.mmu.root_hpa)); if (unlikely(error_code & PFERR_RSVD_MASK)) { - r = handle_mmio_page_fault(vcpu, gpa, error_code, true); + r = handle_mmio_page_fault(vcpu, gpa, true); if (likely(r != RET_MMIO_PF_INVALID)) return r; diff --git a/arch/x86/kvm/mmu.h b/arch/x86/kvm/mmu.h index e4202e41d535..55ffb7b0f95e 100644 --- a/arch/x86/kvm/mmu.h +++ b/arch/x86/kvm/mmu.h @@ -56,13 +56,13 @@ void reset_shadow_zero_bits_mask(struct kvm_vcpu *vcpu, struct kvm_mmu *context); /* - * Return values of handle_mmio_page_fault_common: + * Return values of handle_mmio_page_fault: * RET_MMIO_PF_EMULATE: it is a real mmio page fault, emulate the instruction * directly. * RET_MMIO_PF_INVALID: invalid spte is detected then let the real page * fault path update the mmio spte. * RET_MMIO_PF_RETRY: let CPU fault again on the address. - * RET_MMIO_PF_BUG: bug is detected. + * RET_MMIO_PF_BUG: a bug was detected (and a WARN was printed). */ enum { RET_MMIO_PF_EMULATE = 1, @@ -71,7 +71,7 @@ enum { RET_MMIO_PF_BUG = -1 }; -int handle_mmio_page_fault_common(struct kvm_vcpu *vcpu, u64 addr, bool direct); +int handle_mmio_page_fault(struct kvm_vcpu *vcpu, u64 addr, bool direct); void kvm_init_shadow_mmu(struct kvm_vcpu *vcpu); void kvm_init_shadow_ept_mmu(struct kvm_vcpu *vcpu, bool execonly); diff --git a/arch/x86/kvm/paging_tmpl.h b/arch/x86/kvm/paging_tmpl.h index b41faa91a6f9..3058a22a658d 100644 --- a/arch/x86/kvm/paging_tmpl.h +++ b/arch/x86/kvm/paging_tmpl.h @@ -705,8 +705,7 @@ static int FNAME(page_fault)(struct kvm_vcpu *vcpu, gva_t addr, u32 error_code, pgprintk("%s: addr %lx err %x\n", __func__, addr, error_code); if (unlikely(error_code & PFERR_RSVD_MASK)) { - r = handle_mmio_page_fault(vcpu, addr, error_code, - mmu_is_nested(vcpu)); + r = handle_mmio_page_fault(vcpu, addr, mmu_is_nested(vcpu)); if (likely(r != RET_MMIO_PF_INVALID)) return r; diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c index f2c8e4917688..83a1c643f9a5 100644 --- a/arch/x86/kvm/svm.c +++ b/arch/x86/kvm/svm.c @@ -158,8 +158,6 @@ struct vcpu_svm { unsigned long int3_rip; u32 apf_reason; - u64 tsc_ratio; - /* cached guest cpuid flags for faster access */ bool nrips_enabled : 1; }; @@ -214,7 +212,6 @@ static int nested_svm_intercept(struct vcpu_svm *svm); static int nested_svm_vmexit(struct vcpu_svm *svm); static int nested_svm_check_exception(struct vcpu_svm *svm, unsigned nr, bool has_error_code, u32 error_code); -static u64 __scale_tsc(u64 ratio, u64 tsc); enum { VMCB_INTERCEPTS, /* Intercept vectors, TSC offset, @@ -894,20 +891,9 @@ static __init int svm_hardware_setup(void) kvm_enable_efer_bits(EFER_FFXSR); if (boot_cpu_has(X86_FEATURE_TSCRATEMSR)) { - u64 max; - kvm_has_tsc_control = true; - - /* - * Make sure the user can only configure tsc_khz values that - * fit into a signed integer. - * A min value is not calculated needed because it will always - * be 1 on all machines and a value of 0 is used to disable - * tsc-scaling for the vcpu. - */ - max = min(0x7fffffffULL, __scale_tsc(tsc_khz, TSC_RATIO_MAX)); - - kvm_max_guest_tsc_khz = max; + kvm_max_tsc_scaling_ratio = TSC_RATIO_MAX; + kvm_tsc_scaling_ratio_frac_bits = 32; } if (nested) { @@ -971,68 +957,6 @@ static void init_sys_seg(struct vmcb_seg *seg, uint32_t type) seg->base = 0; } -static u64 __scale_tsc(u64 ratio, u64 tsc) -{ - u64 mult, frac, _tsc; - - mult = ratio >> 32; - frac = ratio & ((1ULL << 32) - 1); - - _tsc = tsc; - _tsc *= mult; - _tsc += (tsc >> 32) * frac; - _tsc += ((tsc & ((1ULL << 32) - 1)) * frac) >> 32; - - return _tsc; -} - -static u64 svm_scale_tsc(struct kvm_vcpu *vcpu, u64 tsc) -{ - struct vcpu_svm *svm = to_svm(vcpu); - u64 _tsc = tsc; - - if (svm->tsc_ratio != TSC_RATIO_DEFAULT) - _tsc = __scale_tsc(svm->tsc_ratio, tsc); - - return _tsc; -} - -static void svm_set_tsc_khz(struct kvm_vcpu *vcpu, u32 user_tsc_khz, bool scale) -{ - struct vcpu_svm *svm = to_svm(vcpu); - u64 ratio; - u64 khz; - - /* Guest TSC same frequency as host TSC? */ - if (!scale) { - svm->tsc_ratio = TSC_RATIO_DEFAULT; - return; - } - - /* TSC scaling supported? */ - if (!boot_cpu_has(X86_FEATURE_TSCRATEMSR)) { - if (user_tsc_khz > tsc_khz) { - vcpu->arch.tsc_catchup = 1; - vcpu->arch.tsc_always_catchup = 1; - } else - WARN(1, "user requested TSC rate below hardware speed\n"); - return; - } - - khz = user_tsc_khz; - - /* TSC scaling required - calculate ratio */ - ratio = khz << 32; - do_div(ratio, tsc_khz); - - if (ratio == 0 || ratio & TSC_RATIO_RSVD) { - WARN_ONCE(1, "Invalid TSC ratio - virtual-tsc-khz=%u\n", - user_tsc_khz); - return; - } - svm->tsc_ratio = ratio; -} - static u64 svm_read_tsc_offset(struct kvm_vcpu *vcpu) { struct vcpu_svm *svm = to_svm(vcpu); @@ -1059,16 +983,10 @@ static void svm_write_tsc_offset(struct kvm_vcpu *vcpu, u64 offset) mark_dirty(svm->vmcb, VMCB_INTERCEPTS); } -static void svm_adjust_tsc_offset(struct kvm_vcpu *vcpu, s64 adjustment, bool host) +static void svm_adjust_tsc_offset_guest(struct kvm_vcpu *vcpu, s64 adjustment) { struct vcpu_svm *svm = to_svm(vcpu); - if (host) { - if (svm->tsc_ratio != TSC_RATIO_DEFAULT) - WARN_ON(adjustment < 0); - adjustment = svm_scale_tsc(vcpu, (u64)adjustment); - } - svm->vmcb->control.tsc_offset += adjustment; if (is_guest_mode(vcpu)) svm->nested.hsave->control.tsc_offset += adjustment; @@ -1080,15 +998,6 @@ static void svm_adjust_tsc_offset(struct kvm_vcpu *vcpu, s64 adjustment, bool ho mark_dirty(svm->vmcb, VMCB_INTERCEPTS); } -static u64 svm_compute_tsc_offset(struct kvm_vcpu *vcpu, u64 target_tsc) -{ - u64 tsc; - - tsc = svm_scale_tsc(vcpu, rdtsc()); - - return target_tsc - tsc; -} - static void init_vmcb(struct vcpu_svm *svm) { struct vmcb_control_area *control = &svm->vmcb->control; @@ -1110,6 +1019,8 @@ static void init_vmcb(struct vcpu_svm *svm) set_exception_intercept(svm, PF_VECTOR); set_exception_intercept(svm, UD_VECTOR); set_exception_intercept(svm, MC_VECTOR); + set_exception_intercept(svm, AC_VECTOR); + set_exception_intercept(svm, DB_VECTOR); set_intercept(svm, INTERCEPT_INTR); set_intercept(svm, INTERCEPT_NMI); @@ -1235,8 +1146,6 @@ static struct kvm_vcpu *svm_create_vcpu(struct kvm *kvm, unsigned int id) goto out; } - svm->tsc_ratio = TSC_RATIO_DEFAULT; - err = kvm_vcpu_init(&svm->vcpu, kvm, id); if (err) goto free_svm; @@ -1322,10 +1231,12 @@ static void svm_vcpu_load(struct kvm_vcpu *vcpu, int cpu) for (i = 0; i < NR_HOST_SAVE_USER_MSRS; i++) rdmsrl(host_save_user_msrs[i], svm->host_user_msrs[i]); - if (static_cpu_has(X86_FEATURE_TSCRATEMSR) && - svm->tsc_ratio != __this_cpu_read(current_tsc_ratio)) { - __this_cpu_write(current_tsc_ratio, svm->tsc_ratio); - wrmsrl(MSR_AMD64_TSC_RATIO, svm->tsc_ratio); + if (static_cpu_has(X86_FEATURE_TSCRATEMSR)) { + u64 tsc_ratio = vcpu->arch.tsc_scaling_ratio; + if (tsc_ratio != __this_cpu_read(current_tsc_ratio)) { + __this_cpu_write(current_tsc_ratio, tsc_ratio); + wrmsrl(MSR_AMD64_TSC_RATIO, tsc_ratio); + } } } @@ -1644,20 +1555,13 @@ static void svm_set_segment(struct kvm_vcpu *vcpu, mark_dirty(svm->vmcb, VMCB_SEG); } -static void update_db_bp_intercept(struct kvm_vcpu *vcpu) +static void update_bp_intercept(struct kvm_vcpu *vcpu) { struct vcpu_svm *svm = to_svm(vcpu); - clr_exception_intercept(svm, DB_VECTOR); clr_exception_intercept(svm, BP_VECTOR); - if (svm->nmi_singlestep) - set_exception_intercept(svm, DB_VECTOR); - if (vcpu->guest_debug & KVM_GUESTDBG_ENABLE) { - if (vcpu->guest_debug & - (KVM_GUESTDBG_SINGLESTEP | KVM_GUESTDBG_USE_HW_BP)) - set_exception_intercept(svm, DB_VECTOR); if (vcpu->guest_debug & KVM_GUESTDBG_USE_SW_BP) set_exception_intercept(svm, BP_VECTOR); } else @@ -1763,7 +1667,6 @@ static int db_interception(struct vcpu_svm *svm) if (!(svm->vcpu.guest_debug & KVM_GUESTDBG_SINGLESTEP)) svm->vmcb->save.rflags &= ~(X86_EFLAGS_TF | X86_EFLAGS_RF); - update_db_bp_intercept(&svm->vcpu); } if (svm->vcpu.guest_debug & @@ -1798,6 +1701,12 @@ static int ud_interception(struct vcpu_svm *svm) return 1; } +static int ac_interception(struct vcpu_svm *svm) +{ + kvm_queue_exception_e(&svm->vcpu, AC_VECTOR, 0); + return 1; +} + static void svm_fpu_activate(struct kvm_vcpu *vcpu) { struct vcpu_svm *svm = to_svm(vcpu); @@ -3075,8 +2984,7 @@ static int cr8_write_interception(struct vcpu_svm *svm) static u64 svm_read_l1_tsc(struct kvm_vcpu *vcpu, u64 host_tsc) { struct vmcb *vmcb = get_host_vmcb(to_svm(vcpu)); - return vmcb->control.tsc_offset + - svm_scale_tsc(vcpu, host_tsc); + return vmcb->control.tsc_offset + host_tsc; } static int svm_get_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info) @@ -3086,7 +2994,7 @@ static int svm_get_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info) switch (msr_info->index) { case MSR_IA32_TSC: { msr_info->data = svm->vmcb->control.tsc_offset + - svm_scale_tsc(vcpu, rdtsc()); + kvm_scale_tsc(vcpu, rdtsc()); break; } @@ -3362,6 +3270,7 @@ static int (*const svm_exit_handlers[])(struct vcpu_svm *svm) = { [SVM_EXIT_EXCP_BASE + PF_VECTOR] = pf_interception, [SVM_EXIT_EXCP_BASE + NM_VECTOR] = nm_interception, [SVM_EXIT_EXCP_BASE + MC_VECTOR] = mc_interception, + [SVM_EXIT_EXCP_BASE + AC_VECTOR] = ac_interception, [SVM_EXIT_INTR] = intr_interception, [SVM_EXIT_NMI] = nmi_interception, [SVM_EXIT_SMI] = nop_on_interception, @@ -3745,7 +3654,6 @@ static void enable_nmi_window(struct kvm_vcpu *vcpu) */ svm->nmi_singlestep = true; svm->vmcb->save.rflags |= (X86_EFLAGS_TF | X86_EFLAGS_RF); - update_db_bp_intercept(vcpu); } static int svm_set_tss_addr(struct kvm *kvm, unsigned int addr) @@ -4371,7 +4279,7 @@ static struct kvm_x86_ops svm_x86_ops = { .vcpu_load = svm_vcpu_load, .vcpu_put = svm_vcpu_put, - .update_db_bp_intercept = update_db_bp_intercept, + .update_bp_intercept = update_bp_intercept, .get_msr = svm_get_msr, .set_msr = svm_set_msr, .get_segment_base = svm_get_segment_base, @@ -4443,11 +4351,9 @@ static struct kvm_x86_ops svm_x86_ops = { .has_wbinvd_exit = svm_has_wbinvd_exit, - .set_tsc_khz = svm_set_tsc_khz, .read_tsc_offset = svm_read_tsc_offset, .write_tsc_offset = svm_write_tsc_offset, - .adjust_tsc_offset = svm_adjust_tsc_offset, - .compute_tsc_offset = svm_compute_tsc_offset, + .adjust_tsc_offset_guest = svm_adjust_tsc_offset_guest, .read_l1_tsc = svm_read_l1_tsc, .set_tdp_cr3 = set_tdp_cr3, diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c index 5eb56ed77c1f..87acc5221740 100644 --- a/arch/x86/kvm/vmx.c +++ b/arch/x86/kvm/vmx.c @@ -107,6 +107,8 @@ static u64 __read_mostly host_xss; static bool __read_mostly enable_pml = 1; module_param_named(pml, enable_pml, bool, S_IRUGO); +#define KVM_VMX_TSC_MULTIPLIER_MAX 0xffffffffffffffffULL + #define KVM_GUEST_CR0_MASK (X86_CR0_NW | X86_CR0_CD) #define KVM_VM_CR0_ALWAYS_ON_UNRESTRICTED_GUEST (X86_CR0_WP | X86_CR0_NE) #define KVM_VM_CR0_ALWAYS_ON \ @@ -1172,6 +1174,12 @@ static inline bool cpu_has_vmx_pml(void) return vmcs_config.cpu_based_2nd_exec_ctrl & SECONDARY_EXEC_ENABLE_PML; } +static inline bool cpu_has_vmx_tsc_scaling(void) +{ + return vmcs_config.cpu_based_2nd_exec_ctrl & + SECONDARY_EXEC_TSC_SCALING; +} + static inline bool report_flexpriority(void) { return flexpriority_enabled; @@ -1631,7 +1639,7 @@ static void update_exception_bitmap(struct kvm_vcpu *vcpu) u32 eb; eb = (1u << PF_VECTOR) | (1u << UD_VECTOR) | (1u << MC_VECTOR) | - (1u << NM_VECTOR) | (1u << DB_VECTOR); + (1u << NM_VECTOR) | (1u << DB_VECTOR) | (1u << AC_VECTOR); if ((vcpu->guest_debug & (KVM_GUESTDBG_ENABLE | KVM_GUESTDBG_USE_SW_BP)) == (KVM_GUESTDBG_ENABLE | KVM_GUESTDBG_USE_SW_BP)) @@ -2053,6 +2061,12 @@ static void vmx_vcpu_load(struct kvm_vcpu *vcpu, int cpu) rdmsrl(MSR_IA32_SYSENTER_ESP, sysenter_esp); vmcs_writel(HOST_IA32_SYSENTER_ESP, sysenter_esp); /* 22.2.3 */ + + /* Setup TSC multiplier */ + if (cpu_has_vmx_tsc_scaling()) + vmcs_write64(TSC_MULTIPLIER, + vcpu->arch.tsc_scaling_ratio); + vmx->loaded_vmcs->cpu = cpu; } @@ -2357,15 +2371,16 @@ static void setup_msrs(struct vcpu_vmx *vmx) /* * reads and returns guest's timestamp counter "register" - * guest_tsc = host_tsc + tsc_offset -- 21.3 + * guest_tsc = (host_tsc * tsc multiplier) >> 48 + tsc_offset + * -- Intel TSC Scaling for Virtualization White Paper, sec 1.3 */ -static u64 guest_read_tsc(void) +static u64 guest_read_tsc(struct kvm_vcpu *vcpu) { u64 host_tsc, tsc_offset; host_tsc = rdtsc(); tsc_offset = vmcs_read64(TSC_OFFSET); - return host_tsc + tsc_offset; + return kvm_scale_tsc(vcpu, host_tsc) + tsc_offset; } /* @@ -2382,22 +2397,6 @@ static u64 vmx_read_l1_tsc(struct kvm_vcpu *vcpu, u64 host_tsc) return host_tsc + tsc_offset; } -/* - * Engage any workarounds for mis-matched TSC rates. Currently limited to - * software catchup for faster rates on slower CPUs. - */ -static void vmx_set_tsc_khz(struct kvm_vcpu *vcpu, u32 user_tsc_khz, bool scale) -{ - if (!scale) - return; - - if (user_tsc_khz > tsc_khz) { - vcpu->arch.tsc_catchup = 1; - vcpu->arch.tsc_always_catchup = 1; - } else - WARN(1, "user requested TSC rate below hardware speed\n"); -} - static u64 vmx_read_tsc_offset(struct kvm_vcpu *vcpu) { return vmcs_read64(TSC_OFFSET); @@ -2429,7 +2428,7 @@ static void vmx_write_tsc_offset(struct kvm_vcpu *vcpu, u64 offset) } } -static void vmx_adjust_tsc_offset(struct kvm_vcpu *vcpu, s64 adjustment, bool host) +static void vmx_adjust_tsc_offset_guest(struct kvm_vcpu *vcpu, s64 adjustment) { u64 offset = vmcs_read64(TSC_OFFSET); @@ -2442,11 +2441,6 @@ static void vmx_adjust_tsc_offset(struct kvm_vcpu *vcpu, s64 adjustment, bool ho offset + adjustment); } -static u64 vmx_compute_tsc_offset(struct kvm_vcpu *vcpu, u64 target_tsc) -{ - return target_tsc - rdtsc(); -} - static bool guest_cpuid_has_vmx(struct kvm_vcpu *vcpu) { struct kvm_cpuid_entry2 *best = kvm_find_cpuid_entry(vcpu, 1, 0); @@ -2778,7 +2772,7 @@ static int vmx_get_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info) case MSR_EFER: return kvm_get_msr_common(vcpu, msr_info); case MSR_IA32_TSC: - msr_info->data = guest_read_tsc(); + msr_info->data = guest_read_tsc(vcpu); break; case MSR_IA32_SYSENTER_CS: msr_info->data = vmcs_read32(GUEST_SYSENTER_CS); @@ -3154,7 +3148,8 @@ static __init int setup_vmcs_config(struct vmcs_config *vmcs_conf) SECONDARY_EXEC_SHADOW_VMCS | SECONDARY_EXEC_XSAVES | SECONDARY_EXEC_ENABLE_PML | - SECONDARY_EXEC_PCOMMIT; + SECONDARY_EXEC_PCOMMIT | + SECONDARY_EXEC_TSC_SCALING; if (adjust_vmx_controls(min2, opt2, MSR_IA32_VMX_PROCBASED_CTLS2, &_cpu_based_2nd_exec_control) < 0) @@ -5266,6 +5261,9 @@ static int handle_exception(struct kvm_vcpu *vcpu) return handle_rmode_exception(vcpu, ex_no, error_code); switch (ex_no) { + case AC_VECTOR: + kvm_queue_exception_e(vcpu, AC_VECTOR, error_code); + return 1; case DB_VECTOR: dr6 = vmcs_readl(EXIT_QUALIFICATION); if (!(vcpu->guest_debug & @@ -5908,7 +5906,7 @@ static int handle_ept_misconfig(struct kvm_vcpu *vcpu) return 1; } - ret = handle_mmio_page_fault_common(vcpu, gpa, true); + ret = handle_mmio_page_fault(vcpu, gpa, true); if (likely(ret == RET_MMIO_PF_EMULATE)) return x86_emulate_instruction(vcpu, gpa, 0, NULL, 0) == EMULATE_DONE; @@ -6199,6 +6197,12 @@ static __init int hardware_setup(void) if (!cpu_has_vmx_apicv()) enable_apicv = 0; + if (cpu_has_vmx_tsc_scaling()) { + kvm_has_tsc_control = true; + kvm_max_tsc_scaling_ratio = KVM_VMX_TSC_MULTIPLIER_MAX; + kvm_tsc_scaling_ratio_frac_bits = 48; + } + if (enable_apicv) kvm_x86_ops->update_cr8_intercept = NULL; else { @@ -8008,6 +8012,9 @@ static void dump_vmcs(void) vmcs_read32(IDT_VECTORING_INFO_FIELD), vmcs_read32(IDT_VECTORING_ERROR_CODE)); pr_err("TSC Offset = 0x%016lx\n", vmcs_readl(TSC_OFFSET)); + if (secondary_exec_control & SECONDARY_EXEC_TSC_SCALING) + pr_err("TSC Multiplier = 0x%016lx\n", + vmcs_readl(TSC_MULTIPLIER)); if (cpu_based_exec_ctrl & CPU_BASED_TPR_SHADOW) pr_err("TPR Threshold = 0x%02x\n", vmcs_read32(TPR_THRESHOLD)); if (pin_based_exec_ctrl & PIN_BASED_POSTED_INTR) @@ -10752,7 +10759,7 @@ static struct kvm_x86_ops vmx_x86_ops = { .vcpu_load = vmx_vcpu_load, .vcpu_put = vmx_vcpu_put, - .update_db_bp_intercept = update_exception_bitmap, + .update_bp_intercept = update_exception_bitmap, .get_msr = vmx_get_msr, .set_msr = vmx_set_msr, .get_segment_base = vmx_get_segment_base, @@ -10826,11 +10833,9 @@ static struct kvm_x86_ops vmx_x86_ops = { .has_wbinvd_exit = cpu_has_vmx_wbinvd_exit, - .set_tsc_khz = vmx_set_tsc_khz, .read_tsc_offset = vmx_read_tsc_offset, .write_tsc_offset = vmx_write_tsc_offset, - .adjust_tsc_offset = vmx_adjust_tsc_offset, - .compute_tsc_offset = vmx_compute_tsc_offset, + .adjust_tsc_offset_guest = vmx_adjust_tsc_offset_guest, .read_l1_tsc = vmx_read_l1_tsc, .set_tdp_cr3 = vmx_set_cr3, diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index 4a6eff166fc6..00462bd63129 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -93,10 +93,10 @@ static void update_cr8_intercept(struct kvm_vcpu *vcpu); static void process_nmi(struct kvm_vcpu *vcpu); static void __kvm_set_rflags(struct kvm_vcpu *vcpu, unsigned long rflags); -struct kvm_x86_ops *kvm_x86_ops; +struct kvm_x86_ops *kvm_x86_ops __read_mostly; EXPORT_SYMBOL_GPL(kvm_x86_ops); -static bool ignore_msrs = 0; +static bool __read_mostly ignore_msrs = 0; module_param(ignore_msrs, bool, S_IRUGO | S_IWUSR); unsigned int min_timer_period_us = 500; @@ -105,20 +105,25 @@ module_param(min_timer_period_us, uint, S_IRUGO | S_IWUSR); static bool __read_mostly kvmclock_periodic_sync = true; module_param(kvmclock_periodic_sync, bool, S_IRUGO); -bool kvm_has_tsc_control; +bool __read_mostly kvm_has_tsc_control; EXPORT_SYMBOL_GPL(kvm_has_tsc_control); -u32 kvm_max_guest_tsc_khz; +u32 __read_mostly kvm_max_guest_tsc_khz; EXPORT_SYMBOL_GPL(kvm_max_guest_tsc_khz); +u8 __read_mostly kvm_tsc_scaling_ratio_frac_bits; +EXPORT_SYMBOL_GPL(kvm_tsc_scaling_ratio_frac_bits); +u64 __read_mostly kvm_max_tsc_scaling_ratio; +EXPORT_SYMBOL_GPL(kvm_max_tsc_scaling_ratio); +static u64 __read_mostly kvm_default_tsc_scaling_ratio; /* tsc tolerance in parts per million - default to 1/2 of the NTP threshold */ -static u32 tsc_tolerance_ppm = 250; +static u32 __read_mostly tsc_tolerance_ppm = 250; module_param(tsc_tolerance_ppm, uint, S_IRUGO | S_IWUSR); /* lapic timer advance (tscdeadline mode only) in nanoseconds */ -unsigned int lapic_timer_advance_ns = 0; +unsigned int __read_mostly lapic_timer_advance_ns = 0; module_param(lapic_timer_advance_ns, uint, S_IRUGO | S_IWUSR); -static bool backwards_tsc_observed = false; +static bool __read_mostly backwards_tsc_observed = false; #define KVM_NR_SHARED_MSRS 16 @@ -1249,14 +1254,53 @@ static u32 adjust_tsc_khz(u32 khz, s32 ppm) return v; } -static void kvm_set_tsc_khz(struct kvm_vcpu *vcpu, u32 this_tsc_khz) +static int set_tsc_khz(struct kvm_vcpu *vcpu, u32 user_tsc_khz, bool scale) +{ + u64 ratio; + + /* Guest TSC same frequency as host TSC? */ + if (!scale) { + vcpu->arch.tsc_scaling_ratio = kvm_default_tsc_scaling_ratio; + return 0; + } + + /* TSC scaling supported? */ + if (!kvm_has_tsc_control) { + if (user_tsc_khz > tsc_khz) { + vcpu->arch.tsc_catchup = 1; + vcpu->arch.tsc_always_catchup = 1; + return 0; + } else { + WARN(1, "user requested TSC rate below hardware speed\n"); + return -1; + } + } + + /* TSC scaling required - calculate ratio */ + ratio = mul_u64_u32_div(1ULL << kvm_tsc_scaling_ratio_frac_bits, + user_tsc_khz, tsc_khz); + + if (ratio == 0 || ratio >= kvm_max_tsc_scaling_ratio) { + WARN_ONCE(1, "Invalid TSC scaling ratio - virtual-tsc-khz=%u\n", + user_tsc_khz); + return -1; + } + + vcpu->arch.tsc_scaling_ratio = ratio; + return 0; +} + +static int kvm_set_tsc_khz(struct kvm_vcpu *vcpu, u32 this_tsc_khz) { u32 thresh_lo, thresh_hi; int use_scaling = 0; /* tsc_khz can be zero if TSC calibration fails */ - if (this_tsc_khz == 0) - return; + if (this_tsc_khz == 0) { + /* set tsc_scaling_ratio to a safe value */ + vcpu->arch.tsc_scaling_ratio = kvm_default_tsc_scaling_ratio; + return -1; + } /* Compute a scale to convert nanoseconds in TSC cycles */ kvm_get_time_scale(this_tsc_khz, NSEC_PER_SEC / 1000, @@ -1276,7 +1320,7 @@ static void kvm_set_tsc_khz(struct kvm_vcpu *vcpu, u32 this_tsc_khz) pr_debug("kvm: requested TSC rate %u falls outside tolerance [%u,%u]\n", this_tsc_khz, thresh_lo, thresh_hi); use_scaling = 1; } - kvm_x86_ops->set_tsc_khz(vcpu, this_tsc_khz, use_scaling); + return set_tsc_khz(vcpu, this_tsc_khz, use_scaling); } static u64 compute_guest_tsc(struct kvm_vcpu *vcpu, s64 kernel_ns) @@ -1322,6 +1366,48 @@ static void update_ia32_tsc_adjust_msr(struct kvm_vcpu *vcpu, s64 offset) vcpu->arch.ia32_tsc_adjust_msr += offset - curr_offset; } +/* + * Multiply tsc by a fixed point number represented by ratio. + * + * The most significant 64-N bits (mult) of ratio represent the + * integral part of the fixed point number; the remaining N bits + * (frac) represent the fractional part, ie. ratio represents a fixed + * point number (mult + frac * 2^(-N)). + * + * N equals to kvm_tsc_scaling_ratio_frac_bits. + */ +static inline u64 __scale_tsc(u64 ratio, u64 tsc) +{ + return mul_u64_u64_shr(tsc, ratio, kvm_tsc_scaling_ratio_frac_bits); +} + +u64 kvm_scale_tsc(struct kvm_vcpu *vcpu, u64 tsc) +{ + u64 _tsc = tsc; + u64 ratio = vcpu->arch.tsc_scaling_ratio; + + if (ratio != kvm_default_tsc_scaling_ratio) + _tsc = __scale_tsc(ratio, tsc); + + return _tsc; +} +EXPORT_SYMBOL_GPL(kvm_scale_tsc); + +static u64 kvm_compute_tsc_offset(struct kvm_vcpu *vcpu, u64 target_tsc) +{ + u64 tsc; + + tsc = kvm_scale_tsc(vcpu, rdtsc()); + + return target_tsc - tsc; +} + +u64 kvm_read_l1_tsc(struct kvm_vcpu *vcpu, u64 host_tsc) +{ + return kvm_x86_ops->read_l1_tsc(vcpu, kvm_scale_tsc(vcpu, host_tsc)); +} +EXPORT_SYMBOL_GPL(kvm_read_l1_tsc); + void kvm_write_tsc(struct kvm_vcpu *vcpu, struct msr_data *msr) { struct kvm *kvm = vcpu->kvm; @@ -1333,7 +1419,7 @@ void kvm_write_tsc(struct kvm_vcpu *vcpu, struct msr_data *msr) u64 data = msr->data; raw_spin_lock_irqsave(&kvm->arch.tsc_write_lock, flags); - offset = kvm_x86_ops->compute_tsc_offset(vcpu, data); + offset = kvm_compute_tsc_offset(vcpu, data); ns = get_kernel_ns(); elapsed = ns - kvm->arch.last_tsc_nsec; @@ -1390,7 +1476,7 @@ void kvm_write_tsc(struct kvm_vcpu *vcpu, struct msr_data *msr) } else { u64 delta = nsec_to_cycles(vcpu, elapsed); data += delta; - offset = kvm_x86_ops->compute_tsc_offset(vcpu, data); + offset = kvm_compute_tsc_offset(vcpu, data); pr_debug("kvm: adjusted tsc offset by %llu\n", delta); } matched = true; @@ -1447,6 +1533,20 @@ void kvm_write_tsc(struct kvm_vcpu *vcpu, struct msr_data *msr) EXPORT_SYMBOL_GPL(kvm_write_tsc); +static inline void adjust_tsc_offset_guest(struct kvm_vcpu *vcpu, + s64 adjustment) +{ + kvm_x86_ops->adjust_tsc_offset_guest(vcpu, adjustment); +} + +static inline void adjust_tsc_offset_host(struct kvm_vcpu *vcpu, s64 adjustment) +{ + if (vcpu->arch.tsc_scaling_ratio != kvm_default_tsc_scaling_ratio) + WARN_ON(adjustment < 0); + adjustment = kvm_scale_tsc(vcpu, (u64) adjustment); + kvm_x86_ops->adjust_tsc_offset_guest(vcpu, adjustment); +} + #ifdef CONFIG_X86_64 static cycle_t read_tsc(void) @@ -1608,7 +1708,7 @@ static void kvm_gen_update_masterclock(struct kvm *kvm) static int kvm_guest_time_update(struct kvm_vcpu *v) { - unsigned long flags, this_tsc_khz; + unsigned long flags, this_tsc_khz, tgt_tsc_khz; struct kvm_vcpu_arch *vcpu = &v->arch; struct kvm_arch *ka = &v->kvm->arch; s64 kernel_ns; @@ -1645,7 +1745,7 @@ static int kvm_guest_time_update(struct kvm_vcpu *v) kernel_ns = get_kernel_ns(); } - tsc_timestamp = kvm_x86_ops->read_l1_tsc(v, host_tsc); + tsc_timestamp = kvm_read_l1_tsc(v, host_tsc); /* * We may have to catch up the TSC to match elapsed wall clock @@ -1671,7 +1771,9 @@ static int kvm_guest_time_update(struct kvm_vcpu *v) return 0; if (unlikely(vcpu->hw_tsc_khz != this_tsc_khz)) { - kvm_get_time_scale(NSEC_PER_SEC / 1000, this_tsc_khz, + tgt_tsc_khz = kvm_has_tsc_control ? + vcpu->virtual_tsc_khz : this_tsc_khz; + kvm_get_time_scale(NSEC_PER_SEC / 1000, tgt_tsc_khz, &vcpu->hv_clock.tsc_shift, &vcpu->hv_clock.tsc_to_system_mul); vcpu->hw_tsc_khz = this_tsc_khz; @@ -2617,7 +2719,7 @@ void kvm_arch_vcpu_load(struct kvm_vcpu *vcpu, int cpu) if (tsc_delta < 0) mark_tsc_unstable("KVM discovered backwards TSC"); if (check_tsc_unstable()) { - u64 offset = kvm_x86_ops->compute_tsc_offset(vcpu, + u64 offset = kvm_compute_tsc_offset(vcpu, vcpu->arch.last_guest_tsc); kvm_x86_ops->write_tsc_offset(vcpu, offset); vcpu->arch.tsc_catchup = 1; @@ -3319,9 +3421,9 @@ long kvm_arch_vcpu_ioctl(struct file *filp, if (user_tsc_khz == 0) user_tsc_khz = tsc_khz; - kvm_set_tsc_khz(vcpu, user_tsc_khz); + if (!kvm_set_tsc_khz(vcpu, user_tsc_khz)) + r = 0; - r = 0; goto out; } case KVM_GET_TSC_KHZ: { @@ -6452,8 +6554,7 @@ static int vcpu_enter_guest(struct kvm_vcpu *vcpu) if (hw_breakpoint_active()) hw_breakpoint_restore(); - vcpu->arch.last_guest_tsc = kvm_x86_ops->read_l1_tsc(vcpu, - rdtsc()); + vcpu->arch.last_guest_tsc = kvm_read_l1_tsc(vcpu, rdtsc()); vcpu->mode = OUTSIDE_GUEST_MODE; smp_wmb(); @@ -7015,7 +7116,7 @@ int kvm_arch_vcpu_ioctl_set_guest_debug(struct kvm_vcpu *vcpu, */ kvm_set_rflags(vcpu, rflags); - kvm_x86_ops->update_db_bp_intercept(vcpu); + kvm_x86_ops->update_bp_intercept(vcpu); r = 0; @@ -7364,6 +7465,20 @@ int kvm_arch_hardware_setup(void) if (r != 0) return r; + if (kvm_has_tsc_control) { + /* + * Make sure the user can only configure tsc_khz values that + * fit into a signed integer. + * A min value is not calculated needed because it will always + * be 1 on all machines. + */ + u64 max = min(0x7fffffffULL, + __scale_tsc(kvm_max_tsc_scaling_ratio, tsc_khz)); + kvm_max_guest_tsc_khz = max; + + kvm_default_tsc_scaling_ratio = 1ULL << kvm_tsc_scaling_ratio_frac_bits; + } + kvm_init_msr_list(); return 0; } diff --git a/crypto/algif_hash.c b/crypto/algif_hash.c index 1396ad0787fc..b4c24fe3dcfb 100644 --- a/crypto/algif_hash.c +++ b/crypto/algif_hash.c @@ -181,9 +181,14 @@ static int hash_accept(struct socket *sock, struct socket *newsock, int flags) struct sock *sk2; struct alg_sock *ask2; struct hash_ctx *ctx2; + bool more; int err; - err = crypto_ahash_export(req, state); + lock_sock(sk); + more = ctx->more; + err = more ? crypto_ahash_export(req, state) : 0; + release_sock(sk); + if (err) return err; @@ -194,7 +199,10 @@ static int hash_accept(struct socket *sock, struct socket *newsock, int flags) sk2 = newsock->sk; ask2 = alg_sk(sk2); ctx2 = ask2->private; - ctx2->more = 1; + ctx2->more = more; + + if (!more) + return err; err = crypto_ahash_import(&ctx2->req, state); if (err) { diff --git a/crypto/asymmetric_keys/x509_cert_parser.c b/crypto/asymmetric_keys/x509_cert_parser.c index 3000ea3b6687..021d39c0ba75 100644 --- a/crypto/asymmetric_keys/x509_cert_parser.c +++ b/crypto/asymmetric_keys/x509_cert_parser.c @@ -531,7 +531,11 @@ int x509_decode_time(time64_t *_t, size_t hdrlen, if (*p != 'Z') goto unsupported_time; - mon_len = month_lengths[mon]; + if (year < 1970 || + mon < 1 || mon > 12) + goto invalid_time; + + mon_len = month_lengths[mon - 1]; if (mon == 2) { if (year % 4 == 0) { mon_len = 29; @@ -543,14 +547,12 @@ int x509_decode_time(time64_t *_t, size_t hdrlen, } } - if (year < 1970 || - mon < 1 || mon > 12 || - day < 1 || day > mon_len || + if (day < 1 || day > mon_len || hour > 23 || min > 59 || sec > 59) goto invalid_time; - + *_t = mktime64(year, mon, day, hour, min, sec); return 0; diff --git a/drivers/acpi/acpi_platform.c b/drivers/acpi/acpi_platform.c index 06a67d5f2846..296b7a14893a 100644 --- a/drivers/acpi/acpi_platform.c +++ b/drivers/acpi/acpi_platform.c @@ -103,7 +103,12 @@ struct platform_device *acpi_create_platform_device(struct acpi_device *adev) pdevinfo.res = resources; pdevinfo.num_res = count; pdevinfo.fwnode = acpi_fwnode_handle(adev); - pdevinfo.dma_mask = acpi_check_dma(adev, NULL) ? DMA_BIT_MASK(32) : 0; + + if (acpi_dma_supported(adev)) + pdevinfo.dma_mask = DMA_BIT_MASK(32); + else + pdevinfo.dma_mask = 0; + pdev = platform_device_register_full(&pdevinfo); if (IS_ERR(pdev)) dev_err(&adev->dev, "platform device creation failed: %ld\n", diff --git a/drivers/acpi/acpi_video.c b/drivers/acpi/acpi_video.c index 5778e8e4313a..3405f7a41e25 100644 --- a/drivers/acpi/acpi_video.c +++ b/drivers/acpi/acpi_video.c @@ -77,6 +77,12 @@ module_param(allow_duplicates, bool, 0644); static int disable_backlight_sysfs_if = -1; module_param(disable_backlight_sysfs_if, int, 0444); +static bool device_id_scheme = false; +module_param(device_id_scheme, bool, 0444); + +static bool only_lcd = false; +module_param(only_lcd, bool, 0444); + static int register_count; static DEFINE_MUTEX(register_count_mutex); static struct mutex video_list_lock; @@ -394,6 +400,18 @@ static int video_disable_backlight_sysfs_if( return 0; } +static int video_set_device_id_scheme(const struct dmi_system_id *d) +{ + device_id_scheme = true; + return 0; +} + +static int video_enable_only_lcd(const struct dmi_system_id *d) +{ + only_lcd = true; + return 0; +} + static struct dmi_system_id video_dmi_table[] = { /* * Broken _BQC workaround http://bugzilla.kernel.org/show_bug.cgi?id=13121 @@ -455,6 +473,33 @@ static struct dmi_system_id video_dmi_table[] = { DMI_MATCH(DMI_PRODUCT_NAME, "PORTEGE R830"), }, }, + /* + * Some machine's _DOD IDs don't have bit 31(Device ID Scheme) set + * but the IDs actually follow the Device ID Scheme. + */ + { + /* https://bugzilla.kernel.org/show_bug.cgi?id=104121 */ + .callback = video_set_device_id_scheme, + .ident = "ESPRIMO Mobile M9410", + .matches = { + DMI_MATCH(DMI_SYS_VENDOR, "FUJITSU SIEMENS"), + DMI_MATCH(DMI_PRODUCT_NAME, "ESPRIMO Mobile M9410"), + }, + }, + /* + * Some machines have multiple video output devices, but only the one + * that is the type of LCD can do the backlight control so we should not + * register backlight interface for other video output devices. + */ + { + /* https://bugzilla.kernel.org/show_bug.cgi?id=104121 */ + .callback = video_enable_only_lcd, + .ident = "ESPRIMO Mobile M9410", + .matches = { + DMI_MATCH(DMI_SYS_VENDOR, "FUJITSU SIEMENS"), + DMI_MATCH(DMI_PRODUCT_NAME, "ESPRIMO Mobile M9410"), + }, + }, {} }; @@ -1003,7 +1048,7 @@ acpi_video_bus_get_one_device(struct acpi_device *device, attribute = acpi_video_get_device_attr(video, device_id); - if (attribute && attribute->device_id_scheme) { + if (attribute && (attribute->device_id_scheme || device_id_scheme)) { switch (attribute->display_type) { case ACPI_VIDEO_DISPLAY_CRT: data->flags.crt = 1; @@ -1568,15 +1613,6 @@ static void acpi_video_dev_register_backlight(struct acpi_video_device *device) static int count; char *name; - /* - * Do not create backlight device for video output - * device that is not in the enumerated list. - */ - if (!acpi_video_device_in_dod(device)) { - dev_dbg(&device->dev->dev, "not in _DOD list, ignore\n"); - return; - } - result = acpi_video_init_brightness(device); if (result) return; @@ -1657,6 +1693,22 @@ static void acpi_video_run_bcl_for_osi(struct acpi_video_bus *video) mutex_unlock(&video->device_list_lock); } +static bool acpi_video_should_register_backlight(struct acpi_video_device *dev) +{ + /* + * Do not create backlight device for video output + * device that is not in the enumerated list. + */ + if (!acpi_video_device_in_dod(dev)) { + dev_dbg(&dev->dev->dev, "not in _DOD list, ignore\n"); + return false; + } + + if (only_lcd) + return dev->flags.lcd; + return true; +} + static int acpi_video_bus_register_backlight(struct acpi_video_bus *video) { struct acpi_video_device *dev; @@ -1670,8 +1722,10 @@ static int acpi_video_bus_register_backlight(struct acpi_video_bus *video) return 0; mutex_lock(&video->device_list_lock); - list_for_each_entry(dev, &video->video_device_list, entry) - acpi_video_dev_register_backlight(dev); + list_for_each_entry(dev, &video->video_device_list, entry) { + if (acpi_video_should_register_backlight(dev)) + acpi_video_dev_register_backlight(dev); + } mutex_unlock(&video->device_list_lock); video->backlight_registered = true; diff --git a/drivers/acpi/glue.c b/drivers/acpi/glue.c index 1470ae4f98c0..5ea5dc219f56 100644 --- a/drivers/acpi/glue.c +++ b/drivers/acpi/glue.c @@ -168,7 +168,7 @@ int acpi_bind_one(struct device *dev, struct acpi_device *acpi_dev) struct list_head *physnode_list; unsigned int node_id; int retval = -EINVAL; - bool coherent; + enum dev_dma_attr attr; if (has_acpi_companion(dev)) { if (acpi_dev) { @@ -225,8 +225,10 @@ int acpi_bind_one(struct device *dev, struct acpi_device *acpi_dev) if (!has_acpi_companion(dev)) ACPI_COMPANION_SET(dev, acpi_dev); - if (acpi_check_dma(acpi_dev, &coherent)) - arch_setup_dma_ops(dev, 0, 0, NULL, coherent); + attr = acpi_get_dma_attr(acpi_dev); + if (attr != DEV_DMA_NOT_SUPPORTED) + arch_setup_dma_ops(dev, 0, 0, NULL, + attr == DEV_DMA_COHERENT); acpi_physnode_link_name(physical_node_name, node_id); retval = sysfs_create_link(&acpi_dev->dev.kobj, &dev->kobj, diff --git a/drivers/acpi/scan.c b/drivers/acpi/scan.c index daf9fc8329e6..78d5f02a073b 100644 --- a/drivers/acpi/scan.c +++ b/drivers/acpi/scan.c @@ -1308,6 +1308,48 @@ void acpi_free_pnp_ids(struct acpi_device_pnp *pnp) kfree(pnp->unique_id); } +/** + * acpi_dma_supported - Check DMA support for the specified device. + * @adev: The pointer to acpi device + * + * Return false if DMA is not supported. Otherwise, return true + */ +bool acpi_dma_supported(struct acpi_device *adev) +{ + if (!adev) + return false; + + if (adev->flags.cca_seen) + return true; + + /* + * Per ACPI 6.0 sec 6.2.17, assume devices can do cache-coherent + * DMA on "Intel platforms". Presumably that includes all x86 and + * ia64, and other arches will set CONFIG_ACPI_CCA_REQUIRED=y. + */ + if (!IS_ENABLED(CONFIG_ACPI_CCA_REQUIRED)) + return true; + + return false; +} + +/** + * acpi_get_dma_attr - Check the supported DMA attr for the specified device. + * @adev: The pointer to acpi device + * + * Return enum dev_dma_attr. + */ +enum dev_dma_attr acpi_get_dma_attr(struct acpi_device *adev) +{ + if (!acpi_dma_supported(adev)) + return DEV_DMA_NOT_SUPPORTED; + + if (adev->flags.coherent_dma) + return DEV_DMA_COHERENT; + else + return DEV_DMA_NON_COHERENT; +} + static void acpi_init_coherency(struct acpi_device *adev) { unsigned long long cca = 0; diff --git a/drivers/acpi/thermal.c b/drivers/acpi/thermal.c index 30d8518b25fb..82707f9824ca 100644 --- a/drivers/acpi/thermal.c +++ b/drivers/acpi/thermal.c @@ -315,7 +315,7 @@ static int acpi_thermal_trips_update(struct acpi_thermal *tz, int flag) if (crt == -1) { tz->trips.critical.flags.valid = 0; } else if (crt > 0) { - unsigned long crt_k = CELSIUS_TO_KELVIN(crt); + unsigned long crt_k = CELSIUS_TO_DECI_KELVIN(crt); /* * Allow override critical threshold */ @@ -351,7 +351,7 @@ static int acpi_thermal_trips_update(struct acpi_thermal *tz, int flag) if (psv == -1) { status = AE_SUPPORT; } else if (psv > 0) { - tmp = CELSIUS_TO_KELVIN(psv); + tmp = CELSIUS_TO_DECI_KELVIN(psv); status = AE_OK; } else { status = acpi_evaluate_integer(tz->device->handle, @@ -431,7 +431,7 @@ static int acpi_thermal_trips_update(struct acpi_thermal *tz, int flag) break; if (i == 1) tz->trips.active[0].temperature = - CELSIUS_TO_KELVIN(act); + CELSIUS_TO_DECI_KELVIN(act); else /* * Don't allow override higher than @@ -439,9 +439,9 @@ static int acpi_thermal_trips_update(struct acpi_thermal *tz, int flag) */ tz->trips.active[i - 1].temperature = (tz->trips.active[i - 2].temperature < - CELSIUS_TO_KELVIN(act) ? + CELSIUS_TO_DECI_KELVIN(act) ? tz->trips.active[i - 2].temperature : - CELSIUS_TO_KELVIN(act)); + CELSIUS_TO_DECI_KELVIN(act)); break; } else { tz->trips.active[i].temperature = tmp; @@ -1105,7 +1105,7 @@ static int acpi_thermal_add(struct acpi_device *device) INIT_WORK(&tz->thermal_check_work, acpi_thermal_check_fn); pr_info(PREFIX "%s [%s] (%ld C)\n", acpi_device_name(device), - acpi_device_bid(device), KELVIN_TO_CELSIUS(tz->temperature)); + acpi_device_bid(device), DECI_KELVIN_TO_CELSIUS(tz->temperature)); goto end; free_memory: diff --git a/drivers/acpi/video_detect.c b/drivers/acpi/video_detect.c index 0d3a384b508a..daaf1c4e1e0f 100644 --- a/drivers/acpi/video_detect.c +++ b/drivers/acpi/video_detect.c @@ -233,6 +233,15 @@ static const struct dmi_system_id video_detect_dmi_table[] = { }, }, { + /* https://bugzilla.redhat.com/show_bug.cgi?id=1272633 */ + .callback = video_detect_force_video, + .ident = "Dell XPS14 L421X", + .matches = { + DMI_MATCH(DMI_SYS_VENDOR, "Dell Inc."), + DMI_MATCH(DMI_PRODUCT_NAME, "XPS L421X"), + }, + }, + { /* https://bugzilla.redhat.com/show_bug.cgi?id=1163574 */ .callback = video_detect_force_video, .ident = "Dell XPS15 L521X", diff --git a/drivers/base/power/domain.c b/drivers/base/power/domain.c index 80e298870388..e03b1ad25a90 100644 --- a/drivers/base/power/domain.c +++ b/drivers/base/power/domain.c @@ -321,8 +321,7 @@ static int genpd_poweroff(struct generic_pm_domain *genpd, bool is_async) if (stat > PM_QOS_FLAGS_NONE) return -EBUSY; - if (pdd->dev->driver && (!pm_runtime_suspended(pdd->dev) - || pdd->dev->power.irq_safe)) + if (!pm_runtime_suspended(pdd->dev) || pdd->dev->power.irq_safe) not_suspended++; } @@ -1312,13 +1311,17 @@ int pm_genpd_remove_device(struct generic_pm_domain *genpd, int pm_genpd_add_subdomain(struct generic_pm_domain *genpd, struct generic_pm_domain *subdomain) { - struct gpd_link *link; + struct gpd_link *link, *itr; int ret = 0; if (IS_ERR_OR_NULL(genpd) || IS_ERR_OR_NULL(subdomain) || genpd == subdomain) return -EINVAL; + link = kzalloc(sizeof(*link), GFP_KERNEL); + if (!link) + return -ENOMEM; + mutex_lock(&genpd->lock); mutex_lock_nested(&subdomain->lock, SINGLE_DEPTH_NESTING); @@ -1328,18 +1331,13 @@ int pm_genpd_add_subdomain(struct generic_pm_domain *genpd, goto out; } - list_for_each_entry(link, &genpd->master_links, master_node) { - if (link->slave == subdomain && link->master == genpd) { + list_for_each_entry(itr, &genpd->master_links, master_node) { + if (itr->slave == subdomain && itr->master == genpd) { ret = -EINVAL; goto out; } } - link = kzalloc(sizeof(*link), GFP_KERNEL); - if (!link) { - ret = -ENOMEM; - goto out; - } link->master = genpd; list_add_tail(&link->master_node, &genpd->master_links); link->slave = subdomain; @@ -1350,7 +1348,8 @@ int pm_genpd_add_subdomain(struct generic_pm_domain *genpd, out: mutex_unlock(&subdomain->lock); mutex_unlock(&genpd->lock); - + if (ret) + kfree(link); return ret; } EXPORT_SYMBOL_GPL(pm_genpd_add_subdomain); diff --git a/drivers/base/power/opp/core.c b/drivers/base/power/opp/core.c index d5c1149ff123..b8e76f75073b 100644 --- a/drivers/base/power/opp/core.c +++ b/drivers/base/power/opp/core.c @@ -11,6 +11,8 @@ * published by the Free Software Foundation. */ +#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt + #include <linux/errno.h> #include <linux/err.h> #include <linux/slab.h> @@ -27,7 +29,7 @@ */ static LIST_HEAD(dev_opp_list); /* Lock to allow exclusive modification to the device and opp lists */ -static DEFINE_MUTEX(dev_opp_list_lock); +DEFINE_MUTEX(dev_opp_list_lock); #define opp_rcu_lockdep_assert() \ do { \ @@ -79,14 +81,18 @@ static struct device_opp *_managed_opp(const struct device_node *np) * Return: pointer to 'struct device_opp' if found, otherwise -ENODEV or * -EINVAL based on type of error. * - * Locking: This function must be called under rcu_read_lock(). device_opp - * is a RCU protected pointer. This means that device_opp is valid as long - * as we are under RCU lock. + * Locking: For readers, this function must be called under rcu_read_lock(). + * device_opp is a RCU protected pointer, which means that device_opp is valid + * as long as we are under RCU lock. + * + * For Writers, this function must be called with dev_opp_list_lock held. */ struct device_opp *_find_device_opp(struct device *dev) { struct device_opp *dev_opp; + opp_rcu_lockdep_assert(); + if (IS_ERR_OR_NULL(dev)) { pr_err("%s: Invalid parameters\n", __func__); return ERR_PTR(-EINVAL); @@ -100,7 +106,7 @@ struct device_opp *_find_device_opp(struct device *dev) } /** - * dev_pm_opp_get_voltage() - Gets the voltage corresponding to an available opp + * dev_pm_opp_get_voltage() - Gets the voltage corresponding to an opp * @opp: opp for which voltage has to be returned for * * Return: voltage in micro volt corresponding to the opp, else @@ -122,7 +128,7 @@ unsigned long dev_pm_opp_get_voltage(struct dev_pm_opp *opp) opp_rcu_lockdep_assert(); tmp_opp = rcu_dereference(opp); - if (IS_ERR_OR_NULL(tmp_opp) || !tmp_opp->available) + if (IS_ERR_OR_NULL(tmp_opp)) pr_err("%s: Invalid parameters\n", __func__); else v = tmp_opp->u_volt; @@ -701,7 +707,7 @@ static int _opp_add(struct device *dev, struct dev_pm_opp *new_opp, } /** - * _opp_add_dynamic() - Allocate a dynamic OPP. + * _opp_add_v1() - Allocate a OPP based on v1 bindings. * @dev: device for which we do this operation * @freq: Frequency in Hz for this OPP * @u_volt: Voltage in uVolts for this OPP @@ -727,8 +733,8 @@ static int _opp_add(struct device *dev, struct dev_pm_opp *new_opp, * Duplicate OPPs (both freq and volt are same) and !opp->available * -ENOMEM Memory allocation failure */ -static int _opp_add_dynamic(struct device *dev, unsigned long freq, - long u_volt, bool dynamic) +static int _opp_add_v1(struct device *dev, unsigned long freq, long u_volt, + bool dynamic) { struct device_opp *dev_opp; struct dev_pm_opp *new_opp; @@ -770,9 +776,10 @@ unlock: } /* TODO: Support multiple regulators */ -static int opp_get_microvolt(struct dev_pm_opp *opp, struct device *dev) +static int opp_parse_supplies(struct dev_pm_opp *opp, struct device *dev) { u32 microvolt[3] = {0}; + u32 val; int count, ret; /* Missing property isn't a problem, but an invalid entry is */ @@ -805,6 +812,9 @@ static int opp_get_microvolt(struct dev_pm_opp *opp, struct device *dev) opp->u_volt_min = microvolt[1]; opp->u_volt_max = microvolt[2]; + if (!of_property_read_u32(opp->np, "opp-microamp", &val)) + opp->u_amp = val; + return 0; } @@ -869,13 +879,10 @@ static int _opp_add_static_v2(struct device *dev, struct device_node *np) if (!of_property_read_u32(np, "clock-latency-ns", &val)) new_opp->clock_latency_ns = val; - ret = opp_get_microvolt(new_opp, dev); + ret = opp_parse_supplies(new_opp, dev); if (ret) goto free_opp; - if (!of_property_read_u32(new_opp->np, "opp-microamp", &val)) - new_opp->u_amp = val; - ret = _opp_add(dev, new_opp, dev_opp); if (ret) goto free_opp; @@ -939,7 +946,7 @@ unlock: */ int dev_pm_opp_add(struct device *dev, unsigned long freq, unsigned long u_volt) { - return _opp_add_dynamic(dev, freq, u_volt, true); + return _opp_add_v1(dev, freq, u_volt, true); } EXPORT_SYMBOL_GPL(dev_pm_opp_add); @@ -1172,13 +1179,17 @@ static int _of_add_opp_table_v2(struct device *dev, struct device_node *opp_np) struct device_opp *dev_opp; int ret = 0, count = 0; + mutex_lock(&dev_opp_list_lock); + dev_opp = _managed_opp(opp_np); if (dev_opp) { /* OPPs are already managed */ if (!_add_list_dev(dev, dev_opp)) ret = -ENOMEM; + mutex_unlock(&dev_opp_list_lock); return ret; } + mutex_unlock(&dev_opp_list_lock); /* We have opp-list node now, iterate over it and add OPPs */ for_each_available_child_of_node(opp_np, np) { @@ -1196,15 +1207,20 @@ static int _of_add_opp_table_v2(struct device *dev, struct device_node *opp_np) if (WARN_ON(!count)) return -ENOENT; + mutex_lock(&dev_opp_list_lock); + dev_opp = _find_device_opp(dev); if (WARN_ON(IS_ERR(dev_opp))) { ret = PTR_ERR(dev_opp); + mutex_unlock(&dev_opp_list_lock); goto free_table; } dev_opp->np = opp_np; dev_opp->shared_opp = of_property_read_bool(opp_np, "opp-shared"); + mutex_unlock(&dev_opp_list_lock); + return 0; free_table: @@ -1241,7 +1257,7 @@ static int _of_add_opp_table_v1(struct device *dev) unsigned long freq = be32_to_cpup(val++) * 1000; unsigned long volt = be32_to_cpup(val++); - if (_opp_add_dynamic(dev, freq, volt, false)) + if (_opp_add_v1(dev, freq, volt, false)) dev_warn(dev, "%s: Failed to add OPP %ld\n", __func__, freq); nr -= 2; diff --git a/drivers/base/power/opp/cpu.c b/drivers/base/power/opp/cpu.c index 7654c5606307..7b445e88a0d5 100644 --- a/drivers/base/power/opp/cpu.c +++ b/drivers/base/power/opp/cpu.c @@ -10,6 +10,9 @@ * it under the terms of the GNU General Public License version 2 as * published by the Free Software Foundation. */ + +#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt + #include <linux/cpu.h> #include <linux/cpufreq.h> #include <linux/err.h> @@ -124,12 +127,12 @@ int dev_pm_opp_set_sharing_cpus(struct device *cpu_dev, cpumask_var_t cpumask) struct device *dev; int cpu, ret = 0; - rcu_read_lock(); + mutex_lock(&dev_opp_list_lock); dev_opp = _find_device_opp(cpu_dev); if (IS_ERR(dev_opp)) { ret = -EINVAL; - goto out_rcu_read_unlock; + goto unlock; } for_each_cpu(cpu, cpumask) { @@ -150,10 +153,10 @@ int dev_pm_opp_set_sharing_cpus(struct device *cpu_dev, cpumask_var_t cpumask) continue; } } -out_rcu_read_unlock: - rcu_read_unlock(); +unlock: + mutex_unlock(&dev_opp_list_lock); - return 0; + return ret; } EXPORT_SYMBOL_GPL(dev_pm_opp_set_sharing_cpus); diff --git a/drivers/base/power/opp/opp.h b/drivers/base/power/opp/opp.h index dcb38f78dae4..7366b2aa8997 100644 --- a/drivers/base/power/opp/opp.h +++ b/drivers/base/power/opp/opp.h @@ -21,6 +21,9 @@ #include <linux/rculist.h> #include <linux/rcupdate.h> +/* Lock to allow exclusive modification to the device and opp lists */ +extern struct mutex dev_opp_list_lock; + /* * Internal data structure organization with the OPP layer library is as * follows: diff --git a/drivers/base/property.c b/drivers/base/property.c index de40623bbd8a..1325ff225cc4 100644 --- a/drivers/base/property.c +++ b/drivers/base/property.c @@ -598,18 +598,34 @@ unsigned int device_get_child_node_count(struct device *dev) } EXPORT_SYMBOL_GPL(device_get_child_node_count); -bool device_dma_is_coherent(struct device *dev) +bool device_dma_supported(struct device *dev) { - bool coherent = false; - + /* For DT, this is always supported. + * For ACPI, this depends on CCA, which + * is determined by the acpi_dma_supported(). + */ if (IS_ENABLED(CONFIG_OF) && dev->of_node) - coherent = of_dma_is_coherent(dev->of_node); - else - acpi_check_dma(ACPI_COMPANION(dev), &coherent); + return true; + + return acpi_dma_supported(ACPI_COMPANION(dev)); +} +EXPORT_SYMBOL_GPL(device_dma_supported); - return coherent; +enum dev_dma_attr device_get_dma_attr(struct device *dev) +{ + enum dev_dma_attr attr = DEV_DMA_NOT_SUPPORTED; + + if (IS_ENABLED(CONFIG_OF) && dev->of_node) { + if (of_dma_is_coherent(dev->of_node)) + attr = DEV_DMA_COHERENT; + else + attr = DEV_DMA_NON_COHERENT; + } else + attr = acpi_get_dma_attr(ACPI_COMPANION(dev)); + + return attr; } -EXPORT_SYMBOL_GPL(device_dma_is_coherent); +EXPORT_SYMBOL_GPL(device_get_dma_attr); /** * device_get_phy_mode - Get phy mode for given device diff --git a/drivers/char/tpm/tpm-chip.c b/drivers/char/tpm/tpm-chip.c index f26b0ae23bea..45cc39aabeee 100644 --- a/drivers/char/tpm/tpm-chip.c +++ b/drivers/char/tpm/tpm-chip.c @@ -226,21 +226,23 @@ int tpm_chip_register(struct tpm_chip *chip) if (rc) goto out_err; - if (!(chip->flags & TPM_CHIP_FLAG_TPM2)) { - rc = __compat_only_sysfs_link_entry_to_kobj(&chip->pdev->kobj, - &chip->dev.kobj, - "ppi"); - if (rc) - goto out_err; - } - /* Make the chip available. */ spin_lock(&driver_lock); - list_add_rcu(&chip->list, &tpm_chip_list); + list_add_tail_rcu(&chip->list, &tpm_chip_list); spin_unlock(&driver_lock); chip->flags |= TPM_CHIP_FLAG_REGISTERED; + if (!(chip->flags & TPM_CHIP_FLAG_TPM2)) { + rc = __compat_only_sysfs_link_entry_to_kobj(&chip->pdev->kobj, + &chip->dev.kobj, + "ppi"); + if (rc && rc != -ENOENT) { + tpm_chip_unregister(chip); + return rc; + } + } + return 0; out_err: tpm1_chip_unregister(chip); diff --git a/drivers/char/tpm/tpm2-cmd.c b/drivers/char/tpm/tpm2-cmd.c index bd7039fafa8a..c12130485fc1 100644 --- a/drivers/char/tpm/tpm2-cmd.c +++ b/drivers/char/tpm/tpm2-cmd.c @@ -443,12 +443,13 @@ int tpm2_seal_trusted(struct tpm_chip *chip, TPM_DIGEST_SIZE); /* sensitive */ - tpm_buf_append_u16(&buf, 4 + TPM_DIGEST_SIZE + payload->key_len); + tpm_buf_append_u16(&buf, 4 + TPM_DIGEST_SIZE + payload->key_len + 1); tpm_buf_append_u16(&buf, TPM_DIGEST_SIZE); tpm_buf_append(&buf, options->blobauth, TPM_DIGEST_SIZE); - tpm_buf_append_u16(&buf, payload->key_len); + tpm_buf_append_u16(&buf, payload->key_len + 1); tpm_buf_append(&buf, payload->key, payload->key_len); + tpm_buf_append_u8(&buf, payload->migratable); /* public */ tpm_buf_append_u16(&buf, 14); @@ -573,6 +574,8 @@ static int tpm2_unseal(struct tpm_chip *chip, u32 blob_handle) { struct tpm_buf buf; + u16 data_len; + u8 *data; int rc; rc = tpm_buf_init(&buf, TPM2_ST_SESSIONS, TPM2_CC_UNSEAL); @@ -591,11 +594,13 @@ static int tpm2_unseal(struct tpm_chip *chip, rc = -EPERM; if (!rc) { - payload->key_len = be16_to_cpup( + data_len = be16_to_cpup( (__be16 *) &buf.data[TPM_HEADER_SIZE + 4]); + data = &buf.data[TPM_HEADER_SIZE + 6]; - memcpy(payload->key, &buf.data[TPM_HEADER_SIZE + 6], - payload->key_len); + memcpy(payload->key, data, data_len - 1); + payload->key_len = data_len - 1; + payload->migratable = data[data_len - 1]; } tpm_buf_destroy(&buf); diff --git a/drivers/char/tpm/tpm_of.c b/drivers/char/tpm/tpm_of.c index 1141456a4b1f..570f30c5c5f4 100644 --- a/drivers/char/tpm/tpm_of.c +++ b/drivers/char/tpm/tpm_of.c @@ -53,17 +53,18 @@ int read_log(struct tpm_bios_log *log) goto cleanup_eio; } - of_node_put(np); log->bios_event_log = kmalloc(*sizep, GFP_KERNEL); if (!log->bios_event_log) { pr_err("%s: ERROR - Not enough memory for BIOS measurements\n", __func__); + of_node_put(np); return -ENOMEM; } log->bios_event_log_end = log->bios_event_log + *sizep; memcpy(log->bios_event_log, __va(*basep), *sizep); + of_node_put(np); return 0; diff --git a/drivers/char/tpm/tpm_tis.c b/drivers/char/tpm/tpm_tis.c index 696ef1d56b4f..65f7eecc45b0 100644 --- a/drivers/char/tpm/tpm_tis.c +++ b/drivers/char/tpm/tpm_tis.c @@ -645,6 +645,7 @@ static int tpm_tis_init(struct device *dev, struct tpm_info *tpm_info, { u32 vendor, intfcaps, intmask; int rc, i, irq_s, irq_e, probe; + int irq_r = -1; struct tpm_chip *chip; struct priv_data *priv; @@ -751,6 +752,7 @@ static int tpm_tis_init(struct device *dev, struct tpm_info *tpm_info, irq_s = ioread8(chip->vendor.iobase + TPM_INT_VECTOR(chip->vendor.locality)); + irq_r = irq_s; if (irq_s) { irq_e = irq_s; } else { @@ -805,6 +807,8 @@ static int tpm_tis_init(struct device *dev, struct tpm_info *tpm_info, iowrite32(intmask, chip->vendor.iobase + TPM_INT_ENABLE(chip->vendor.locality)); + + devm_free_irq(dev, i, chip); } } if (chip->vendor.irq) { @@ -831,7 +835,9 @@ static int tpm_tis_init(struct device *dev, struct tpm_info *tpm_info, chip->vendor.iobase + TPM_INT_ENABLE(chip->vendor.locality)); } - } + } else if (irq_r != -1) + iowrite8(irq_r, chip->vendor.iobase + + TPM_INT_VECTOR(chip->vendor.locality)); if (chip->flags & TPM_CHIP_FLAG_TPM2) { chip->vendor.timeout_a = msecs_to_jiffies(TPM2_TIMEOUT_A); diff --git a/drivers/clk/h8300/clk-div.c b/drivers/clk/h8300/clk-div.c index 1dd5d14d5dbe..d71d01157dbb 100644 --- a/drivers/clk/h8300/clk-div.c +++ b/drivers/clk/h8300/clk-div.c @@ -19,6 +19,7 @@ static void __init h8300_div_clk_setup(struct device_node *node) const char *parent_name; void __iomem *divcr = NULL; int width; + int offset; num_parents = of_clk_get_parent_count(node); if (num_parents < 1) { @@ -31,11 +32,14 @@ static void __init h8300_div_clk_setup(struct device_node *node) pr_err("%s: failed to map divide register", clk_name); goto error; } + offset = (unsigned long)divcr & 3; + offset = (3 - offset) * 8; + divcr = (void *)((unsigned long)divcr & ~3); parent_name = of_clk_get_parent_name(node, 0); of_property_read_u32(node, "renesas,width", &width); clk = clk_register_divider(NULL, clk_name, parent_name, - CLK_SET_RATE_GATE, divcr, 0, width, + CLK_SET_RATE_GATE, divcr, offset, width, CLK_DIVIDER_POWER_OF_TWO, &clklock); if (!IS_ERR(clk)) { of_clk_add_provider(node, of_clk_src_simple_get, clk); diff --git a/drivers/cpufreq/arm_big_little.c b/drivers/cpufreq/arm_big_little.c index f1e42f8ce0fc..c5d256caa664 100644 --- a/drivers/cpufreq/arm_big_little.c +++ b/drivers/cpufreq/arm_big_little.c @@ -149,6 +149,19 @@ bL_cpufreq_set_rate(u32 cpu, u32 old_cluster, u32 new_cluster, u32 rate) __func__, cpu, old_cluster, new_cluster, new_rate); ret = clk_set_rate(clk[new_cluster], new_rate * 1000); + if (!ret) { + /* + * FIXME: clk_set_rate hasn't returned an error here however it + * may be that clk_change_rate failed due to hardware or + * firmware issues and wasn't able to report that due to the + * current design of the clk core layer. To work around this + * problem we will read back the clock rate and check it is + * correct. This needs to be removed once clk core is fixed. + */ + if (clk_get_rate(clk[new_cluster]) != new_rate * 1000) + ret = -EIO; + } + if (WARN_ON(ret)) { pr_err("clk_set_rate failed: %d, new cluster: %d\n", ret, new_cluster); @@ -189,15 +202,6 @@ bL_cpufreq_set_rate(u32 cpu, u32 old_cluster, u32 new_cluster, u32 rate) mutex_unlock(&cluster_lock[old_cluster]); } - /* - * FIXME: clk_set_rate has to handle the case where clk_change_rate - * can fail due to hardware or firmware issues. Until the clk core - * layer is fixed, we can check here. In most of the cases we will - * be reading only the cached value anyway. This needs to be removed - * once clk core is fixed. - */ - if (bL_cpufreq_get_rate(cpu) != new_rate) - return -EIO; return 0; } diff --git a/drivers/cpufreq/cppc_cpufreq.c b/drivers/cpufreq/cppc_cpufreq.c index 93c219fab850..e8cb334094b0 100644 --- a/drivers/cpufreq/cppc_cpufreq.c +++ b/drivers/cpufreq/cppc_cpufreq.c @@ -166,8 +166,7 @@ static int __init cppc_cpufreq_init(void) out: for_each_possible_cpu(i) - if (all_cpu_data[i]) - kfree(all_cpu_data[i]); + kfree(all_cpu_data[i]); kfree(all_cpu_data); return -ENODEV; diff --git a/drivers/cpufreq/cpufreq_governor.c b/drivers/cpufreq/cpufreq_governor.c index 11258c4c1b17..b260576ddb12 100644 --- a/drivers/cpufreq/cpufreq_governor.c +++ b/drivers/cpufreq/cpufreq_governor.c @@ -171,10 +171,6 @@ void gov_queue_work(struct dbs_data *dbs_data, struct cpufreq_policy *policy, { int i; - mutex_lock(&cpufreq_governor_lock); - if (!policy->governor_enabled) - goto out_unlock; - if (!all_cpus) { /* * Use raw_smp_processor_id() to avoid preemptible warnings. @@ -188,9 +184,6 @@ void gov_queue_work(struct dbs_data *dbs_data, struct cpufreq_policy *policy, for_each_cpu(i, policy->cpus) __gov_queue_work(i, dbs_data, delay); } - -out_unlock: - mutex_unlock(&cpufreq_governor_lock); } EXPORT_SYMBOL_GPL(gov_queue_work); @@ -229,13 +222,24 @@ static void dbs_timer(struct work_struct *work) struct cpu_dbs_info *cdbs = container_of(work, struct cpu_dbs_info, dwork.work); struct cpu_common_dbs_info *shared = cdbs->shared; - struct cpufreq_policy *policy = shared->policy; - struct dbs_data *dbs_data = policy->governor_data; + struct cpufreq_policy *policy; + struct dbs_data *dbs_data; unsigned int sampling_rate, delay; bool modify_all = true; mutex_lock(&shared->timer_mutex); + policy = shared->policy; + + /* + * Governor might already be disabled and there is no point continuing + * with the work-handler. + */ + if (!policy) + goto unlock; + + dbs_data = policy->governor_data; + if (dbs_data->cdata->governor == GOV_CONSERVATIVE) { struct cs_dbs_tuners *cs_tuners = dbs_data->tuners; @@ -252,6 +256,7 @@ static void dbs_timer(struct work_struct *work) delay = dbs_data->cdata->gov_dbs_timer(cdbs, dbs_data, modify_all); gov_queue_work(dbs_data, policy, delay, modify_all); +unlock: mutex_unlock(&shared->timer_mutex); } @@ -478,9 +483,17 @@ static int cpufreq_governor_stop(struct cpufreq_policy *policy, if (!shared || !shared->policy) return -EBUSY; + /* + * Work-handler must see this updated, as it should not proceed any + * further after governor is disabled. And so timer_mutex is taken while + * updating this value. + */ + mutex_lock(&shared->timer_mutex); + shared->policy = NULL; + mutex_unlock(&shared->timer_mutex); + gov_cancel_work(dbs_data, policy); - shared->policy = NULL; mutex_destroy(&shared->timer_mutex); return 0; } diff --git a/drivers/cpufreq/intel_pstate.c b/drivers/cpufreq/intel_pstate.c index 93a3c635ea27..2e31d097def6 100644 --- a/drivers/cpufreq/intel_pstate.c +++ b/drivers/cpufreq/intel_pstate.c @@ -684,8 +684,6 @@ static void __init intel_pstate_sysfs_expose_params(void) static void intel_pstate_hwp_enable(struct cpudata *cpudata) { - pr_info("intel_pstate: HWP enabled\n"); - wrmsrl_on_cpu(cpudata->cpu, MSR_PM_ENABLE, 0x1); } @@ -1557,8 +1555,10 @@ static int __init intel_pstate_init(void) if (!all_cpu_data) return -ENOMEM; - if (static_cpu_has_safe(X86_FEATURE_HWP) && !no_hwp) + if (static_cpu_has_safe(X86_FEATURE_HWP) && !no_hwp) { + pr_info("intel_pstate: HWP enabled\n"); hwp_active++; + } if (!hwp_active && hwp_only) goto out; @@ -1593,8 +1593,10 @@ static int __init intel_pstate_setup(char *str) if (!strcmp(str, "disable")) no_load = 1; - if (!strcmp(str, "no_hwp")) + if (!strcmp(str, "no_hwp")) { + pr_info("intel_pstate: HWP disabled\n"); no_hwp = 1; + } if (!strcmp(str, "force")) force_load = 1; if (!strcmp(str, "hwp_only")) diff --git a/drivers/cpufreq/s5pv210-cpufreq.c b/drivers/cpufreq/s5pv210-cpufreq.c index 9e231f52150c..051a8a8224cd 100644 --- a/drivers/cpufreq/s5pv210-cpufreq.c +++ b/drivers/cpufreq/s5pv210-cpufreq.c @@ -212,11 +212,11 @@ static void s5pv210_set_refresh(enum s5pv210_dmc_port ch, unsigned long freq) /* Find current DRAM frequency */ tmp = s5pv210_dram_conf[ch].freq; - do_div(tmp, freq); + tmp /= freq; tmp1 = s5pv210_dram_conf[ch].refresh; - do_div(tmp1, tmp); + tmp1 /= tmp; __raw_writel(tmp1, reg); } diff --git a/drivers/crypto/ccp/ccp-platform.c b/drivers/crypto/ccp/ccp-platform.c index 8b923b7e9389..01b50cb4c982 100644 --- a/drivers/crypto/ccp/ccp-platform.c +++ b/drivers/crypto/ccp/ccp-platform.c @@ -94,6 +94,7 @@ static int ccp_platform_probe(struct platform_device *pdev) struct ccp_device *ccp; struct ccp_platform *ccp_platform; struct device *dev = &pdev->dev; + enum dev_dma_attr attr; struct resource *ior; int ret; @@ -118,18 +119,24 @@ static int ccp_platform_probe(struct platform_device *pdev) } ccp->io_regs = ccp->io_map; - ret = dma_set_mask_and_coherent(dev, DMA_BIT_MASK(48)); - if (ret) { - dev_err(dev, "dma_set_mask_and_coherent failed (%d)\n", ret); + attr = device_get_dma_attr(dev); + if (attr == DEV_DMA_NOT_SUPPORTED) { + dev_err(dev, "DMA is not supported"); goto e_err; } - ccp_platform->coherent = device_dma_is_coherent(ccp->dev); + ccp_platform->coherent = (attr == DEV_DMA_COHERENT); if (ccp_platform->coherent) ccp->axcache = CACHE_WB_NO_ALLOC; else ccp->axcache = CACHE_NONE; + ret = dma_set_mask_and_coherent(dev, DMA_BIT_MASK(48)); + if (ret) { + dev_err(dev, "dma_set_mask_and_coherent failed (%d)\n", ret); + goto e_err; + } + dev_set_drvdata(dev, ccp); ret = ccp_init(ccp); diff --git a/drivers/firewire/ohci.c b/drivers/firewire/ohci.c index f51d376d10ba..c2f5117fd8cb 100644 --- a/drivers/firewire/ohci.c +++ b/drivers/firewire/ohci.c @@ -3675,6 +3675,11 @@ static int pci_probe(struct pci_dev *dev, reg_write(ohci, OHCI1394_IsoXmitIntMaskSet, ~0); ohci->it_context_support = reg_read(ohci, OHCI1394_IsoXmitIntMaskSet); + /* JMicron JMB38x often shows 0 at first read, just ignore it */ + if (!ohci->it_context_support) { + ohci_notice(ohci, "overriding IsoXmitIntMask\n"); + ohci->it_context_support = 0xf; + } reg_write(ohci, OHCI1394_IsoXmitIntMaskClear, ~0); ohci->it_context_mask = ohci->it_context_support; ohci->n_it = hweight32(ohci->it_context_mask); diff --git a/drivers/net/ethernet/amd/xgbe/xgbe-main.c b/drivers/net/ethernet/amd/xgbe/xgbe-main.c index 7dd893331785..618d952c2984 100644 --- a/drivers/net/ethernet/amd/xgbe/xgbe-main.c +++ b/drivers/net/ethernet/amd/xgbe/xgbe-main.c @@ -342,6 +342,7 @@ static int xgbe_probe(struct platform_device *pdev) struct resource *res; const char *phy_mode; unsigned int i, phy_memnum, phy_irqnum; + enum dev_dma_attr attr; int ret; DBGPR("--> xgbe_probe\n"); @@ -609,7 +610,12 @@ static int xgbe_probe(struct platform_device *pdev) goto err_io; /* Set the DMA coherency values */ - pdata->coherent = device_dma_is_coherent(pdata->dev); + attr = device_get_dma_attr(dev); + if (attr == DEV_DMA_NOT_SUPPORTED) { + dev_err(dev, "DMA is not supported"); + goto err_io; + } + pdata->coherent = (attr == DEV_DMA_COHERENT); if (pdata->coherent) { pdata->axdomain = XGBE_DMA_OS_AXDOMAIN; pdata->arcache = XGBE_DMA_OS_ARCACHE; diff --git a/drivers/of/of_pci.c b/drivers/of/of_pci.c index ff27177f49ed..b1449f71601c 100644 --- a/drivers/of/of_pci.c +++ b/drivers/of/of_pci.c @@ -143,26 +143,6 @@ void of_pci_check_probe_only(void) } EXPORT_SYMBOL_GPL(of_pci_check_probe_only); -/** - * of_pci_dma_configure - Setup DMA configuration - * @dev: ptr to pci_dev struct of the PCI device - * - * Function to update PCI devices's DMA configuration using the same - * info from the OF node of host bridge's parent (if any). - */ -void of_pci_dma_configure(struct pci_dev *pci_dev) -{ - struct device *dev = &pci_dev->dev; - struct device *bridge = pci_get_host_bridge_device(pci_dev); - - if (!bridge->parent) - return; - - of_dma_configure(dev, bridge->parent->of_node); - pci_put_host_bridge_device(bridge); -} -EXPORT_SYMBOL_GPL(of_pci_dma_configure); - #if defined(CONFIG_OF_ADDRESS) /** * of_pci_get_host_bridge_resources - Parse PCI host bridge resources from DT diff --git a/drivers/of/of_reserved_mem.c b/drivers/of/of_reserved_mem.c index 62f467b8ccae..be77e75c587d 100644 --- a/drivers/of/of_reserved_mem.c +++ b/drivers/of/of_reserved_mem.c @@ -124,6 +124,10 @@ static int __init __reserved_mem_alloc_size(unsigned long node, align = dt_mem_next_cell(dt_root_addr_cells, &prop); } + /* Need adjust the alignment to satisfy the CMA requirement */ + if (IS_ENABLED(CONFIG_CMA) && of_flat_dt_is_compatible(node, "shared-dma-pool")) + align = max(align, (phys_addr_t)PAGE_SIZE << max(MAX_ORDER - 1, pageblock_order)); + prop = of_get_flat_dt_prop(node, "alloc-ranges", &len); if (prop) { @@ -226,10 +230,9 @@ static void __init __rmem_check_for_overlap(void) this_end = this->base + this->size; next_end = next->base + next->size; - WARN(1, - "Reserved memory: OVERLAP DETECTED!\n%s (%pa--%pa) overlaps with %s (%pa--%pa)\n", - this->name, &this->base, &this_end, - next->name, &next->base, &next_end); + pr_err("Reserved memory: OVERLAP DETECTED!\n%s (%pa--%pa) overlaps with %s (%pa--%pa)\n", + this->name, &this->base, &this_end, + next->name, &next->base, &next_end); } } } diff --git a/drivers/pci/probe.c b/drivers/pci/probe.c index f53b8e85f137..e735c728e3b3 100644 --- a/drivers/pci/probe.c +++ b/drivers/pci/probe.c @@ -6,6 +6,7 @@ #include <linux/delay.h> #include <linux/init.h> #include <linux/pci.h> +#include <linux/of_device.h> #include <linux/of_pci.h> #include <linux/pci_hotplug.h> #include <linux/slab.h> @@ -13,6 +14,7 @@ #include <linux/cpumask.h> #include <linux/pci-aspm.h> #include <linux/aer.h> +#include <linux/acpi.h> #include <asm-generic/pci-bridge.h> #include "pci.h" @@ -1672,6 +1674,34 @@ static void pci_set_msi_domain(struct pci_dev *dev) dev_set_msi_domain(&dev->dev, d); } +/** + * pci_dma_configure - Setup DMA configuration + * @dev: ptr to pci_dev struct of the PCI device + * + * Function to update PCI devices's DMA configuration using the same + * info from the OF node or ACPI node of host bridge's parent (if any). + */ +static void pci_dma_configure(struct pci_dev *dev) +{ + struct device *bridge = pci_get_host_bridge_device(dev); + + if (IS_ENABLED(CONFIG_OF) && dev->dev.of_node) { + if (bridge->parent) + of_dma_configure(&dev->dev, bridge->parent->of_node); + } else if (has_acpi_companion(bridge)) { + struct acpi_device *adev = to_acpi_device_node(bridge->fwnode); + enum dev_dma_attr attr = acpi_get_dma_attr(adev); + + if (attr == DEV_DMA_NOT_SUPPORTED) + dev_warn(&dev->dev, "DMA not supported.\n"); + else + arch_setup_dma_ops(&dev->dev, 0, 0, NULL, + attr == DEV_DMA_COHERENT); + } + + pci_put_host_bridge_device(bridge); +} + void pci_device_add(struct pci_dev *dev, struct pci_bus *bus) { int ret; @@ -1685,7 +1715,7 @@ void pci_device_add(struct pci_dev *dev, struct pci_bus *bus) dev->dev.dma_mask = &dev->dma_mask; dev->dev.dma_parms = &dev->dma_parms; dev->dev.coherent_dma_mask = 0xffffffffull; - of_pci_dma_configure(dev); + pci_dma_configure(dev); pci_set_dma_max_seg_size(dev, 65536); pci_set_dma_seg_boundary(dev, 0xffffffff); diff --git a/drivers/platform/x86/asus-wmi.c b/drivers/platform/x86/asus-wmi.c index 1f7d80ff8cb4..e3a750224ae2 100644 --- a/drivers/platform/x86/asus-wmi.c +++ b/drivers/platform/x86/asus-wmi.c @@ -1320,7 +1320,7 @@ static ssize_t asus_hwmon_temp1(struct device *dev, if (err < 0) return err; - value = KELVIN_TO_CELSIUS((value & 0xFFFF)) * 1000; + value = DECI_KELVIN_TO_CELSIUS((value & 0xFFFF)) * 1000; return sprintf(buf, "%d\n", value); } diff --git a/drivers/platform/x86/intel_menlow.c b/drivers/platform/x86/intel_menlow.c index e8b46d2c468c..0a919d81662c 100644 --- a/drivers/platform/x86/intel_menlow.c +++ b/drivers/platform/x86/intel_menlow.c @@ -315,7 +315,7 @@ static ssize_t aux0_show(struct device *dev, result = sensor_get_auxtrip(attr->handle, 0, &value); - return result ? result : sprintf(buf, "%lu", KELVIN_TO_CELSIUS(value)); + return result ? result : sprintf(buf, "%lu", DECI_KELVIN_TO_CELSIUS(value)); } static ssize_t aux1_show(struct device *dev, @@ -327,7 +327,7 @@ static ssize_t aux1_show(struct device *dev, result = sensor_get_auxtrip(attr->handle, 1, &value); - return result ? result : sprintf(buf, "%lu", KELVIN_TO_CELSIUS(value)); + return result ? result : sprintf(buf, "%lu", DECI_KELVIN_TO_CELSIUS(value)); } static ssize_t aux0_store(struct device *dev, @@ -345,7 +345,7 @@ static ssize_t aux0_store(struct device *dev, if (value < 0) return -EINVAL; - result = sensor_set_auxtrip(attr->handle, 0, CELSIUS_TO_KELVIN(value)); + result = sensor_set_auxtrip(attr->handle, 0, CELSIUS_TO_DECI_KELVIN(value)); return result ? result : count; } @@ -364,7 +364,7 @@ static ssize_t aux1_store(struct device *dev, if (value < 0) return -EINVAL; - result = sensor_set_auxtrip(attr->handle, 1, CELSIUS_TO_KELVIN(value)); + result = sensor_set_auxtrip(attr->handle, 1, CELSIUS_TO_DECI_KELVIN(value)); return result ? result : count; } diff --git a/drivers/pwm/Kconfig b/drivers/pwm/Kconfig index 062630ab7424..2f4641a0e88b 100644 --- a/drivers/pwm/Kconfig +++ b/drivers/pwm/Kconfig @@ -92,6 +92,15 @@ config PWM_BCM2835 To compile this driver as a module, choose M here: the module will be called pwm-bcm2835. +config PWM_BERLIN + tristate "Marvell Berlin PWM support" + depends on ARCH_BERLIN + help + PWM framework driver for Marvell Berlin SoCs. + + To compile this driver as a module, choose M here: the module + will be called pwm-berlin. + config PWM_BFIN tristate "Blackfin PWM support" depends on BFIN_GPTIMERS @@ -101,6 +110,16 @@ config PWM_BFIN To compile this driver as a module, choose M here: the module will be called pwm-bfin. +config PWM_BRCMSTB + tristate "Broadcom STB PWM support" + depends on ARCH_BRCMSTB || BMIPS_GENERIC + help + Generic PWM framework driver for the Broadcom Set-top-Box + SoCs (BCM7xxx). + + To compile this driver as a module, choose M Here: the module + will be called pwm-brcmstb.c. + config PWM_CLPS711X tristate "CLPS711X PWM support" depends on ARCH_CLPS711X || COMPILE_TEST @@ -230,6 +249,17 @@ config PWM_LPSS_PLATFORM To compile this driver as a module, choose M here: the module will be called pwm-lpss-platform. +config PWM_MTK_DISP + tristate "MediaTek display PWM driver" + depends on ARCH_MEDIATEK || COMPILE_TEST + depends on HAS_IOMEM + help + Generic PWM framework driver for MediaTek disp-pwm device. + The PWM is used to control the backlight brightness for display. + + To compile this driver as a module, choose M here: the module + will be called pwm-mtk-disp. + config PWM_MXS tristate "Freescale MXS PWM support" depends on ARCH_MXS && OF @@ -242,7 +272,7 @@ config PWM_MXS config PWM_PCA9685 tristate "NXP PCA9685 PWM driver" - depends on OF && I2C + depends on I2C select REGMAP_I2C help Generic PWM framework driver for NXP PCA9685 LED controller. @@ -268,6 +298,17 @@ config PWM_PXA To compile this driver as a module, choose M here: the module will be called pwm-pxa. +config PWM_RCAR + tristate "Renesas R-Car PWM support" + depends on ARCH_RCAR_GEN1 || ARCH_RCAR_GEN2 || COMPILE_TEST + depends on HAS_IOMEM + help + This driver exposes the PWM Timer controller found in Renesas + R-Car chips through the PWM API. + + To compile this driver as a module, choose M here: the module + will be called pwm-rcar. + config PWM_RENESAS_TPU tristate "Renesas TPU PWM support" depends on ARCH_SHMOBILE || COMPILE_TEST @@ -338,7 +379,7 @@ config PWM_TEGRA config PWM_TIECAP tristate "ECAP PWM support" - depends on SOC_AM33XX || ARCH_DAVINCI_DA8XX + depends on ARCH_OMAP2PLUS || ARCH_DAVINCI_DA8XX help PWM driver support for the ECAP APWM controller found on AM33XX TI SOC @@ -348,7 +389,7 @@ config PWM_TIECAP config PWM_TIEHRPWM tristate "EHRPWM PWM support" - depends on SOC_AM33XX || ARCH_DAVINCI_DA8XX + depends on ARCH_OMAP2PLUS || ARCH_DAVINCI_DA8XX help PWM driver support for the EHRPWM controller found on AM33XX TI SOC @@ -358,7 +399,7 @@ config PWM_TIEHRPWM config PWM_TIPWMSS bool - default y if SOC_AM33XX && (PWM_TIECAP || PWM_TIEHRPWM) + default y if (ARCH_OMAP2PLUS) && (PWM_TIECAP || PWM_TIEHRPWM) help PWM Subsystem driver support for AM33xx SOC. diff --git a/drivers/pwm/Makefile b/drivers/pwm/Makefile index a0e00c09ead3..69b8275f3c08 100644 --- a/drivers/pwm/Makefile +++ b/drivers/pwm/Makefile @@ -6,7 +6,9 @@ obj-$(CONFIG_PWM_ATMEL_HLCDC_PWM) += pwm-atmel-hlcdc.o obj-$(CONFIG_PWM_ATMEL_TCB) += pwm-atmel-tcb.o obj-$(CONFIG_PWM_BCM_KONA) += pwm-bcm-kona.o obj-$(CONFIG_PWM_BCM2835) += pwm-bcm2835.o +obj-$(CONFIG_PWM_BERLIN) += pwm-berlin.o obj-$(CONFIG_PWM_BFIN) += pwm-bfin.o +obj-$(CONFIG_PWM_BRCMSTB) += pwm-brcmstb.o obj-$(CONFIG_PWM_CLPS711X) += pwm-clps711x.o obj-$(CONFIG_PWM_CRC) += pwm-crc.o obj-$(CONFIG_PWM_EP93XX) += pwm-ep93xx.o @@ -20,10 +22,12 @@ obj-$(CONFIG_PWM_LPC32XX) += pwm-lpc32xx.o obj-$(CONFIG_PWM_LPSS) += pwm-lpss.o obj-$(CONFIG_PWM_LPSS_PCI) += pwm-lpss-pci.o obj-$(CONFIG_PWM_LPSS_PLATFORM) += pwm-lpss-platform.o +obj-$(CONFIG_PWM_MTK_DISP) += pwm-mtk-disp.o obj-$(CONFIG_PWM_MXS) += pwm-mxs.o obj-$(CONFIG_PWM_PCA9685) += pwm-pca9685.o obj-$(CONFIG_PWM_PUV3) += pwm-puv3.o obj-$(CONFIG_PWM_PXA) += pwm-pxa.o +obj-$(CONFIG_PWM_RCAR) += pwm-rcar.o obj-$(CONFIG_PWM_RENESAS_TPU) += pwm-renesas-tpu.o obj-$(CONFIG_PWM_ROCKCHIP) += pwm-rockchip.o obj-$(CONFIG_PWM_SAMSUNG) += pwm-samsung.o diff --git a/drivers/pwm/core.c b/drivers/pwm/core.c index 3f9df3ea3350..d24ca5f281b4 100644 --- a/drivers/pwm/core.c +++ b/drivers/pwm/core.c @@ -269,6 +269,7 @@ int pwmchip_add_with_polarity(struct pwm_chip *chip, pwm->pwm = chip->base + i; pwm->hwpwm = i; pwm->polarity = polarity; + mutex_init(&pwm->lock); radix_tree_insert(&pwm_tree, pwm->pwm, pwm); } @@ -473,16 +474,22 @@ int pwm_set_polarity(struct pwm_device *pwm, enum pwm_polarity polarity) if (!pwm->chip->ops->set_polarity) return -ENOSYS; - if (pwm_is_enabled(pwm)) - return -EBUSY; + mutex_lock(&pwm->lock); + + if (pwm_is_enabled(pwm)) { + err = -EBUSY; + goto unlock; + } err = pwm->chip->ops->set_polarity(pwm->chip, pwm, polarity); if (err) - return err; + goto unlock; pwm->polarity = polarity; - return 0; +unlock: + mutex_unlock(&pwm->lock); + return err; } EXPORT_SYMBOL_GPL(pwm_set_polarity); @@ -494,10 +501,22 @@ EXPORT_SYMBOL_GPL(pwm_set_polarity); */ int pwm_enable(struct pwm_device *pwm) { - if (pwm && !test_and_set_bit(PWMF_ENABLED, &pwm->flags)) - return pwm->chip->ops->enable(pwm->chip, pwm); + int err = 0; - return pwm ? 0 : -EINVAL; + if (!pwm) + return -EINVAL; + + mutex_lock(&pwm->lock); + + if (!test_and_set_bit(PWMF_ENABLED, &pwm->flags)) { + err = pwm->chip->ops->enable(pwm->chip, pwm); + if (err) + clear_bit(PWMF_ENABLED, &pwm->flags); + } + + mutex_unlock(&pwm->lock); + + return err; } EXPORT_SYMBOL_GPL(pwm_enable); @@ -719,8 +738,10 @@ struct pwm_device *pwm_get(struct device *dev, const char *con_id) } } - if (!chosen) + if (!chosen) { + pwm = ERR_PTR(-ENODEV); goto out; + } chip = pwmchip_find_by_name(chosen->provider); if (!chip) diff --git a/drivers/pwm/pwm-atmel-hlcdc.c b/drivers/pwm/pwm-atmel-hlcdc.c index 5df1db40fc07..f994c7eaf41c 100644 --- a/drivers/pwm/pwm-atmel-hlcdc.c +++ b/drivers/pwm/pwm-atmel-hlcdc.c @@ -227,6 +227,9 @@ static const struct of_device_id atmel_hlcdc_dt_ids[] = { .data = &atmel_hlcdc_pwm_at91sam9x5_errata, }, { + .compatible = "atmel,sama5d2-hlcdc", + }, + { .compatible = "atmel,sama5d3-hlcdc", .data = &atmel_hlcdc_pwm_sama5d3_errata, }, @@ -236,6 +239,7 @@ static const struct of_device_id atmel_hlcdc_dt_ids[] = { }, { /* sentinel */ }, }; +MODULE_DEVICE_TABLE(of, atmel_hlcdc_dt_ids); static int atmel_hlcdc_pwm_probe(struct platform_device *pdev) { diff --git a/drivers/pwm/pwm-berlin.c b/drivers/pwm/pwm-berlin.c new file mode 100644 index 000000000000..65108129d505 --- /dev/null +++ b/drivers/pwm/pwm-berlin.c @@ -0,0 +1,219 @@ +/* + * Marvell Berlin PWM driver + * + * Copyright (C) 2015 Marvell Technology Group Ltd. + * + * Author: Antoine Tenart <antoine.tenart@free-electrons.com> + * + * This file is licensed under the terms of the GNU General Public + * License version 2. This program is licensed "as is" without any + * warranty of any kind, whether express or implied. + */ + +#include <linux/clk.h> +#include <linux/io.h> +#include <linux/kernel.h> +#include <linux/module.h> +#include <linux/platform_device.h> +#include <linux/pwm.h> + +#define BERLIN_PWM_EN 0x0 +#define BERLIN_PWM_ENABLE BIT(0) +#define BERLIN_PWM_CONTROL 0x4 +#define BERLIN_PWM_PRESCALE_MASK 0x7 +#define BERLIN_PWM_PRESCALE_MAX 4096 +#define BERLIN_PWM_INVERT_POLARITY BIT(3) +#define BERLIN_PWM_DUTY 0x8 +#define BERLIN_PWM_TCNT 0xc +#define BERLIN_PWM_MAX_TCNT 65535 + +struct berlin_pwm_chip { + struct pwm_chip chip; + struct clk *clk; + void __iomem *base; +}; + +static inline struct berlin_pwm_chip *to_berlin_pwm_chip(struct pwm_chip *chip) +{ + return container_of(chip, struct berlin_pwm_chip, chip); +} + +static const u32 prescaler_table[] = { + 1, 4, 8, 16, 64, 256, 1024, 4096 +}; + +static inline u32 berlin_pwm_readl(struct berlin_pwm_chip *chip, + unsigned int channel, unsigned long offset) +{ + return readl_relaxed(chip->base + channel * 0x10 + offset); +} + +static inline void berlin_pwm_writel(struct berlin_pwm_chip *chip, + unsigned int channel, u32 value, + unsigned long offset) +{ + writel_relaxed(value, chip->base + channel * 0x10 + offset); +} + +static int berlin_pwm_config(struct pwm_chip *chip, struct pwm_device *pwm_dev, + int duty_ns, int period_ns) +{ + struct berlin_pwm_chip *pwm = to_berlin_pwm_chip(chip); + unsigned int prescale; + u32 value, duty, period; + u64 cycles, tmp; + + cycles = clk_get_rate(pwm->clk); + cycles *= period_ns; + do_div(cycles, NSEC_PER_SEC); + + for (prescale = 0; prescale < ARRAY_SIZE(prescaler_table); prescale++) { + tmp = cycles; + do_div(tmp, prescaler_table[prescale]); + + if (tmp <= BERLIN_PWM_MAX_TCNT) + break; + } + + if (tmp > BERLIN_PWM_MAX_TCNT) + return -ERANGE; + + period = tmp; + cycles = tmp * duty_ns; + do_div(cycles, period_ns); + duty = cycles; + + value = berlin_pwm_readl(pwm, pwm_dev->hwpwm, BERLIN_PWM_CONTROL); + value &= ~BERLIN_PWM_PRESCALE_MASK; + value |= prescale; + berlin_pwm_writel(pwm, pwm_dev->hwpwm, value, BERLIN_PWM_CONTROL); + + berlin_pwm_writel(pwm, pwm_dev->hwpwm, duty, BERLIN_PWM_DUTY); + berlin_pwm_writel(pwm, pwm_dev->hwpwm, period, BERLIN_PWM_TCNT); + + return 0; +} + +static int berlin_pwm_set_polarity(struct pwm_chip *chip, + struct pwm_device *pwm_dev, + enum pwm_polarity polarity) +{ + struct berlin_pwm_chip *pwm = to_berlin_pwm_chip(chip); + u32 value; + + value = berlin_pwm_readl(pwm, pwm_dev->hwpwm, BERLIN_PWM_CONTROL); + + if (polarity == PWM_POLARITY_NORMAL) + value &= ~BERLIN_PWM_INVERT_POLARITY; + else + value |= BERLIN_PWM_INVERT_POLARITY; + + berlin_pwm_writel(pwm, pwm_dev->hwpwm, value, BERLIN_PWM_CONTROL); + + return 0; +} + +static int berlin_pwm_enable(struct pwm_chip *chip, struct pwm_device *pwm_dev) +{ + struct berlin_pwm_chip *pwm = to_berlin_pwm_chip(chip); + u32 value; + + value = berlin_pwm_readl(pwm, pwm_dev->hwpwm, BERLIN_PWM_EN); + value |= BERLIN_PWM_ENABLE; + berlin_pwm_writel(pwm, pwm_dev->hwpwm, value, BERLIN_PWM_EN); + + return 0; +} + +static void berlin_pwm_disable(struct pwm_chip *chip, + struct pwm_device *pwm_dev) +{ + struct berlin_pwm_chip *pwm = to_berlin_pwm_chip(chip); + u32 value; + + value = berlin_pwm_readl(pwm, pwm_dev->hwpwm, BERLIN_PWM_EN); + value &= ~BERLIN_PWM_ENABLE; + berlin_pwm_writel(pwm, pwm_dev->hwpwm, value, BERLIN_PWM_EN); +} + +static const struct pwm_ops berlin_pwm_ops = { + .config = berlin_pwm_config, + .set_polarity = berlin_pwm_set_polarity, + .enable = berlin_pwm_enable, + .disable = berlin_pwm_disable, + .owner = THIS_MODULE, +}; + +static const struct of_device_id berlin_pwm_match[] = { + { .compatible = "marvell,berlin-pwm" }, + { }, +}; +MODULE_DEVICE_TABLE(of, berlin_pwm_match); + +static int berlin_pwm_probe(struct platform_device *pdev) +{ + struct berlin_pwm_chip *pwm; + struct resource *res; + int ret; + + pwm = devm_kzalloc(&pdev->dev, sizeof(*pwm), GFP_KERNEL); + if (!pwm) + return -ENOMEM; + + res = platform_get_resource(pdev, IORESOURCE_MEM, 0); + pwm->base = devm_ioremap_resource(&pdev->dev, res); + if (IS_ERR(pwm->base)) + return PTR_ERR(pwm->base); + + pwm->clk = devm_clk_get(&pdev->dev, NULL); + if (IS_ERR(pwm->clk)) + return PTR_ERR(pwm->clk); + + ret = clk_prepare_enable(pwm->clk); + if (ret) + return ret; + + pwm->chip.dev = &pdev->dev; + pwm->chip.ops = &berlin_pwm_ops; + pwm->chip.base = -1; + pwm->chip.npwm = 4; + pwm->chip.can_sleep = true; + pwm->chip.of_xlate = of_pwm_xlate_with_flags; + pwm->chip.of_pwm_n_cells = 3; + + ret = pwmchip_add(&pwm->chip); + if (ret < 0) { + dev_err(&pdev->dev, "failed to add PWM chip: %d\n", ret); + clk_disable_unprepare(pwm->clk); + return ret; + } + + platform_set_drvdata(pdev, pwm); + + return 0; +} + +static int berlin_pwm_remove(struct platform_device *pdev) +{ + struct berlin_pwm_chip *pwm = platform_get_drvdata(pdev); + int ret; + + ret = pwmchip_remove(&pwm->chip); + clk_disable_unprepare(pwm->clk); + + return ret; +} + +static struct platform_driver berlin_pwm_driver = { + .probe = berlin_pwm_probe, + .remove = berlin_pwm_remove, + .driver = { + .name = "berlin-pwm", + .of_match_table = berlin_pwm_match, + }, +}; +module_platform_driver(berlin_pwm_driver); + +MODULE_AUTHOR("Antoine Tenart <antoine.tenart@free-electrons.com>"); +MODULE_DESCRIPTION("Marvell Berlin PWM driver"); +MODULE_LICENSE("GPL v2"); diff --git a/drivers/pwm/pwm-brcmstb.c b/drivers/pwm/pwm-brcmstb.c new file mode 100644 index 000000000000..423ce087cd9c --- /dev/null +++ b/drivers/pwm/pwm-brcmstb.c @@ -0,0 +1,343 @@ +/* + * Broadcom BCM7038 PWM driver + * Author: Florian Fainelli + * + * Copyright (C) 2015 Broadcom Corporation + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + */ + +#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt + +#include <linux/clk.h> +#include <linux/export.h> +#include <linux/init.h> +#include <linux/io.h> +#include <linux/kernel.h> +#include <linux/module.h> +#include <linux/of.h> +#include <linux/platform_device.h> +#include <linux/pwm.h> +#include <linux/spinlock.h> + +#define PWM_CTRL 0x00 +#define CTRL_START BIT(0) +#define CTRL_OEB BIT(1) +#define CTRL_FORCE_HIGH BIT(2) +#define CTRL_OPENDRAIN BIT(3) +#define CTRL_CHAN_OFFS 4 + +#define PWM_CTRL2 0x04 +#define CTRL2_OUT_SELECT BIT(0) + +#define PWM_CH_SIZE 0x8 + +#define PWM_CWORD_MSB(ch) (0x08 + ((ch) * PWM_CH_SIZE)) +#define PWM_CWORD_LSB(ch) (0x0c + ((ch) * PWM_CH_SIZE)) + +/* Number of bits for the CWORD value */ +#define CWORD_BIT_SIZE 16 + +/* + * Maximum control word value allowed when variable-frequency PWM is used as a + * clock for the constant-frequency PMW. + */ +#define CONST_VAR_F_MAX 32768 +#define CONST_VAR_F_MIN 1 + +#define PWM_ON(ch) (0x18 + ((ch) * PWM_CH_SIZE)) +#define PWM_ON_MIN 1 +#define PWM_PERIOD(ch) (0x1c + ((ch) * PWM_CH_SIZE)) +#define PWM_PERIOD_MIN 0 + +#define PWM_ON_PERIOD_MAX 0xff + +struct brcmstb_pwm { + void __iomem *base; + spinlock_t lock; + struct clk *clk; + struct pwm_chip chip; +}; + +static inline u32 brcmstb_pwm_readl(struct brcmstb_pwm *p, + unsigned int offset) +{ + if (IS_ENABLED(CONFIG_MIPS) && IS_ENABLED(CONFIG_CPU_BIG_ENDIAN)) + return __raw_readl(p->base + offset); + else + return readl_relaxed(p->base + offset); +} + +static inline void brcmstb_pwm_writel(struct brcmstb_pwm *p, u32 value, + unsigned int offset) +{ + if (IS_ENABLED(CONFIG_MIPS) && IS_ENABLED(CONFIG_CPU_BIG_ENDIAN)) + __raw_writel(value, p->base + offset); + else + writel_relaxed(value, p->base + offset); +} + +static inline struct brcmstb_pwm *to_brcmstb_pwm(struct pwm_chip *chip) +{ + return container_of(chip, struct brcmstb_pwm, chip); +} + +/* + * Fv is derived from the variable frequency output. The variable frequency + * output is configured using this formula: + * + * W = cword, if cword < 2 ^ 15 else 16-bit 2's complement of cword + * + * Fv = W x 2 ^ -16 x 27Mhz (reference clock) + * + * The period is: (period + 1) / Fv and "on" time is on / (period + 1) + * + * The PWM core framework specifies that the "duty_ns" parameter is in fact the + * "on" time, so this translates directly into our HW programming here. + */ +static int brcmstb_pwm_config(struct pwm_chip *chip, struct pwm_device *pwm, + int duty_ns, int period_ns) +{ + struct brcmstb_pwm *p = to_brcmstb_pwm(chip); + unsigned long pc, dc, cword = CONST_VAR_F_MAX; + unsigned int channel = pwm->hwpwm; + u32 value; + + /* + * If asking for a duty_ns equal to period_ns, we need to substract + * the period value by 1 to make it shorter than the "on" time and + * produce a flat 100% duty cycle signal, and max out the "on" time + */ + if (duty_ns == period_ns) { + dc = PWM_ON_PERIOD_MAX; + pc = PWM_ON_PERIOD_MAX - 1; + goto done; + } + + while (1) { + u64 rate, tmp; + + /* + * Calculate the base rate from base frequency and current + * cword + */ + rate = (u64)clk_get_rate(p->clk) * (u64)cword; + do_div(rate, 1 << CWORD_BIT_SIZE); + + tmp = period_ns * rate; + do_div(tmp, NSEC_PER_SEC); + pc = tmp; + + tmp = (duty_ns + 1) * rate; + do_div(tmp, NSEC_PER_SEC); + dc = tmp; + + /* + * We can be called with separate duty and period updates, + * so do not reject dc == 0 right away + */ + if (pc == PWM_PERIOD_MIN || (dc < PWM_ON_MIN && duty_ns)) + return -EINVAL; + + /* We converged on a calculation */ + if (pc <= PWM_ON_PERIOD_MAX && dc <= PWM_ON_PERIOD_MAX) + break; + + /* + * The cword needs to be a power of 2 for the variable + * frequency generator to output a 50% duty cycle variable + * frequency which is used as input clock to the fixed + * frequency generator. + */ + cword >>= 1; + + /* + * Desired periods are too large, we do not have a divider + * for them + */ + if (cword < CONST_VAR_F_MIN) + return -EINVAL; + } + +done: + /* + * Configure the defined "cword" value to have the variable frequency + * generator output a base frequency for the constant frequency + * generator to derive from. + */ + spin_lock(&p->lock); + brcmstb_pwm_writel(p, cword >> 8, PWM_CWORD_MSB(channel)); + brcmstb_pwm_writel(p, cword & 0xff, PWM_CWORD_LSB(channel)); + + /* Select constant frequency signal output */ + value = brcmstb_pwm_readl(p, PWM_CTRL2); + value |= CTRL2_OUT_SELECT << (channel * CTRL_CHAN_OFFS); + brcmstb_pwm_writel(p, value, PWM_CTRL2); + + /* Configure on and period value */ + brcmstb_pwm_writel(p, pc, PWM_PERIOD(channel)); + brcmstb_pwm_writel(p, dc, PWM_ON(channel)); + spin_unlock(&p->lock); + + return 0; +} + +static inline void brcmstb_pwm_enable_set(struct brcmstb_pwm *p, + unsigned int channel, bool enable) +{ + unsigned int shift = channel * CTRL_CHAN_OFFS; + u32 value; + + spin_lock(&p->lock); + value = brcmstb_pwm_readl(p, PWM_CTRL); + + if (enable) { + value &= ~(CTRL_OEB << shift); + value |= (CTRL_START | CTRL_OPENDRAIN) << shift; + } else { + value &= ~((CTRL_START | CTRL_OPENDRAIN) << shift); + value |= CTRL_OEB << shift; + } + + brcmstb_pwm_writel(p, value, PWM_CTRL); + spin_unlock(&p->lock); +} + +static int brcmstb_pwm_enable(struct pwm_chip *chip, struct pwm_device *pwm) +{ + struct brcmstb_pwm *p = to_brcmstb_pwm(chip); + + brcmstb_pwm_enable_set(p, pwm->hwpwm, true); + + return 0; +} + +static void brcmstb_pwm_disable(struct pwm_chip *chip, struct pwm_device *pwm) +{ + struct brcmstb_pwm *p = to_brcmstb_pwm(chip); + + brcmstb_pwm_enable_set(p, pwm->hwpwm, false); +} + +static const struct pwm_ops brcmstb_pwm_ops = { + .config = brcmstb_pwm_config, + .enable = brcmstb_pwm_enable, + .disable = brcmstb_pwm_disable, + .owner = THIS_MODULE, +}; + +static const struct of_device_id brcmstb_pwm_of_match[] = { + { .compatible = "brcm,bcm7038-pwm", }, + { /* sentinel */ } +}; +MODULE_DEVICE_TABLE(of, brcmstb_pwm_of_match); + +static int brcmstb_pwm_probe(struct platform_device *pdev) +{ + struct brcmstb_pwm *p; + struct resource *res; + int ret; + + p = devm_kzalloc(&pdev->dev, sizeof(*p), GFP_KERNEL); + if (!p) + return -ENOMEM; + + spin_lock_init(&p->lock); + + p->clk = devm_clk_get(&pdev->dev, NULL); + if (IS_ERR(p->clk)) { + dev_err(&pdev->dev, "failed to obtain clock\n"); + return PTR_ERR(p->clk); + } + + ret = clk_prepare_enable(p->clk); + if (ret < 0) { + dev_err(&pdev->dev, "failed to enable clock: %d\n", ret); + return ret; + } + + platform_set_drvdata(pdev, p); + + p->chip.dev = &pdev->dev; + p->chip.ops = &brcmstb_pwm_ops; + p->chip.base = -1; + p->chip.npwm = 2; + p->chip.can_sleep = true; + + res = platform_get_resource(pdev, IORESOURCE_MEM, 0); + p->base = devm_ioremap_resource(&pdev->dev, res); + if (!p->base) { + ret = -ENOMEM; + goto out_clk; + } + + ret = pwmchip_add(&p->chip); + if (ret) { + dev_err(&pdev->dev, "failed to add PWM chip: %d\n", ret); + goto out_clk; + } + + return 0; + +out_clk: + clk_disable_unprepare(p->clk); + return ret; +} + +static int brcmstb_pwm_remove(struct platform_device *pdev) +{ + struct brcmstb_pwm *p = platform_get_drvdata(pdev); + int ret; + + ret = pwmchip_remove(&p->chip); + clk_disable_unprepare(p->clk); + + return ret; +} + +#ifdef CONFIG_PM_SLEEP +static int brcmstb_pwm_suspend(struct device *dev) +{ + struct brcmstb_pwm *p = dev_get_drvdata(dev); + + clk_disable(p->clk); + + return 0; +} + +static int brcmstb_pwm_resume(struct device *dev) +{ + struct brcmstb_pwm *p = dev_get_drvdata(dev); + + clk_enable(p->clk); + + return 0; +} +#endif + +static SIMPLE_DEV_PM_OPS(brcmstb_pwm_pm_ops, brcmstb_pwm_suspend, + brcmstb_pwm_resume); + +static struct platform_driver brcmstb_pwm_driver = { + .probe = brcmstb_pwm_probe, + .remove = brcmstb_pwm_remove, + .driver = { + .name = "pwm-brcmstb", + .of_match_table = brcmstb_pwm_of_match, + .pm = &brcmstb_pwm_pm_ops, + }, +}; +module_platform_driver(brcmstb_pwm_driver); + +MODULE_AUTHOR("Florian Fainelli <f.fainelli@gmail.com>"); +MODULE_DESCRIPTION("Broadcom STB PWM driver"); +MODULE_ALIAS("platform:pwm-brcmstb"); +MODULE_LICENSE("GPL"); diff --git a/drivers/pwm/pwm-lpss-pci.c b/drivers/pwm/pwm-lpss-pci.c index 45042c1b2046..7160e8ab38a4 100644 --- a/drivers/pwm/pwm-lpss-pci.c +++ b/drivers/pwm/pwm-lpss-pci.c @@ -13,6 +13,7 @@ #include <linux/kernel.h> #include <linux/module.h> #include <linux/pci.h> +#include <linux/pm_runtime.h> #include "pwm-lpss.h" @@ -33,6 +34,10 @@ static int pwm_lpss_probe_pci(struct pci_dev *pdev, return PTR_ERR(lpwm); pci_set_drvdata(pdev, lpwm); + + pm_runtime_put(&pdev->dev); + pm_runtime_allow(&pdev->dev); + return 0; } @@ -40,16 +45,41 @@ static void pwm_lpss_remove_pci(struct pci_dev *pdev) { struct pwm_lpss_chip *lpwm = pci_get_drvdata(pdev); + pm_runtime_forbid(&pdev->dev); + pm_runtime_get_sync(&pdev->dev); + pwm_lpss_remove(lpwm); } +#ifdef CONFIG_PM +static int pwm_lpss_runtime_suspend_pci(struct device *dev) +{ + /* + * The PCI core will handle transition to D3 automatically. We only + * need to provide runtime PM hooks for that to happen. + */ + return 0; +} + +static int pwm_lpss_runtime_resume_pci(struct device *dev) +{ + return 0; +} +#endif + +static const struct dev_pm_ops pwm_lpss_pci_pm = { + SET_RUNTIME_PM_OPS(pwm_lpss_runtime_suspend_pci, + pwm_lpss_runtime_resume_pci, NULL) +}; + static const struct pci_device_id pwm_lpss_pci_ids[] = { - { PCI_VDEVICE(INTEL, 0x0ac8), (unsigned long)&pwm_lpss_bsw_info}, + { PCI_VDEVICE(INTEL, 0x0ac8), (unsigned long)&pwm_lpss_bxt_info}, { PCI_VDEVICE(INTEL, 0x0f08), (unsigned long)&pwm_lpss_byt_info}, { PCI_VDEVICE(INTEL, 0x0f09), (unsigned long)&pwm_lpss_byt_info}, - { PCI_VDEVICE(INTEL, 0x1ac8), (unsigned long)&pwm_lpss_bsw_info}, + { PCI_VDEVICE(INTEL, 0x1ac8), (unsigned long)&pwm_lpss_bxt_info}, { PCI_VDEVICE(INTEL, 0x2288), (unsigned long)&pwm_lpss_bsw_info}, { PCI_VDEVICE(INTEL, 0x2289), (unsigned long)&pwm_lpss_bsw_info}, + { PCI_VDEVICE(INTEL, 0x5ac8), (unsigned long)&pwm_lpss_bxt_info}, { }, }; MODULE_DEVICE_TABLE(pci, pwm_lpss_pci_ids); @@ -59,6 +89,9 @@ static struct pci_driver pwm_lpss_driver_pci = { .id_table = pwm_lpss_pci_ids, .probe = pwm_lpss_probe_pci, .remove = pwm_lpss_remove_pci, + .driver = { + .pm = &pwm_lpss_pci_pm, + }, }; module_pci_driver(pwm_lpss_driver_pci); diff --git a/drivers/pwm/pwm-lpss-platform.c b/drivers/pwm/pwm-lpss-platform.c index 18a9c880a76d..54433fc6d1a4 100644 --- a/drivers/pwm/pwm-lpss-platform.c +++ b/drivers/pwm/pwm-lpss-platform.c @@ -14,6 +14,7 @@ #include <linux/kernel.h> #include <linux/module.h> #include <linux/platform_device.h> +#include <linux/pm_runtime.h> #include "pwm-lpss.h" @@ -36,6 +37,10 @@ static int pwm_lpss_probe_platform(struct platform_device *pdev) return PTR_ERR(lpwm); platform_set_drvdata(pdev, lpwm); + + pm_runtime_set_active(&pdev->dev); + pm_runtime_enable(&pdev->dev); + return 0; } @@ -43,12 +48,14 @@ static int pwm_lpss_remove_platform(struct platform_device *pdev) { struct pwm_lpss_chip *lpwm = platform_get_drvdata(pdev); + pm_runtime_disable(&pdev->dev); return pwm_lpss_remove(lpwm); } static const struct acpi_device_id pwm_lpss_acpi_match[] = { { "80860F09", (unsigned long)&pwm_lpss_byt_info }, { "80862288", (unsigned long)&pwm_lpss_bsw_info }, + { "80865AC8", (unsigned long)&pwm_lpss_bxt_info }, { }, }; MODULE_DEVICE_TABLE(acpi, pwm_lpss_acpi_match); diff --git a/drivers/pwm/pwm-lpss.c b/drivers/pwm/pwm-lpss.c index e9798253a16f..25044104003b 100644 --- a/drivers/pwm/pwm-lpss.c +++ b/drivers/pwm/pwm-lpss.c @@ -16,6 +16,7 @@ #include <linux/io.h> #include <linux/kernel.h> #include <linux/module.h> +#include <linux/pm_runtime.h> #include "pwm-lpss.h" @@ -29,6 +30,9 @@ #define PWM_LIMIT (0x8000 + PWM_DIVISION_CORRECTION) #define NSECS_PER_SEC 1000000000UL +/* Size of each PWM register space if multiple */ +#define PWM_SIZE 0x400 + struct pwm_lpss_chip { struct pwm_chip chip; void __iomem *regs; @@ -37,21 +41,44 @@ struct pwm_lpss_chip { /* BayTrail */ const struct pwm_lpss_boardinfo pwm_lpss_byt_info = { - .clk_rate = 25000000 + .clk_rate = 25000000, + .npwm = 1, }; EXPORT_SYMBOL_GPL(pwm_lpss_byt_info); /* Braswell */ const struct pwm_lpss_boardinfo pwm_lpss_bsw_info = { - .clk_rate = 19200000 + .clk_rate = 19200000, + .npwm = 1, }; EXPORT_SYMBOL_GPL(pwm_lpss_bsw_info); +/* Broxton */ +const struct pwm_lpss_boardinfo pwm_lpss_bxt_info = { + .clk_rate = 19200000, + .npwm = 4, +}; +EXPORT_SYMBOL_GPL(pwm_lpss_bxt_info); + static inline struct pwm_lpss_chip *to_lpwm(struct pwm_chip *chip) { return container_of(chip, struct pwm_lpss_chip, chip); } +static inline u32 pwm_lpss_read(const struct pwm_device *pwm) +{ + struct pwm_lpss_chip *lpwm = to_lpwm(pwm->chip); + + return readl(lpwm->regs + pwm->hwpwm * PWM_SIZE + PWM); +} + +static inline void pwm_lpss_write(const struct pwm_device *pwm, u32 value) +{ + struct pwm_lpss_chip *lpwm = to_lpwm(pwm->chip); + + writel(value, lpwm->regs + pwm->hwpwm * PWM_SIZE + PWM); +} + static int pwm_lpss_config(struct pwm_chip *chip, struct pwm_device *pwm, int duty_ns, int period_ns) { @@ -79,38 +106,36 @@ static int pwm_lpss_config(struct pwm_chip *chip, struct pwm_device *pwm, duty_ns = 1; on_time_div = 255 - (255 * duty_ns / period_ns); - ctrl = readl(lpwm->regs + PWM); + pm_runtime_get_sync(chip->dev); + + ctrl = pwm_lpss_read(pwm); ctrl &= ~(PWM_BASE_UNIT_MASK | PWM_ON_TIME_DIV_MASK); ctrl |= (u16) base_unit << PWM_BASE_UNIT_SHIFT; ctrl |= on_time_div; /* request PWM to update on next cycle */ ctrl |= PWM_SW_UPDATE; - writel(ctrl, lpwm->regs + PWM); + pwm_lpss_write(pwm, ctrl); + + pm_runtime_put(chip->dev); return 0; } static int pwm_lpss_enable(struct pwm_chip *chip, struct pwm_device *pwm) { - struct pwm_lpss_chip *lpwm = to_lpwm(chip); - u32 ctrl; - - ctrl = readl(lpwm->regs + PWM); - writel(ctrl | PWM_ENABLE, lpwm->regs + PWM); - + pm_runtime_get_sync(chip->dev); + pwm_lpss_write(pwm, pwm_lpss_read(pwm) | PWM_ENABLE); return 0; } static void pwm_lpss_disable(struct pwm_chip *chip, struct pwm_device *pwm) { - struct pwm_lpss_chip *lpwm = to_lpwm(chip); - u32 ctrl; - - ctrl = readl(lpwm->regs + PWM); - writel(ctrl & ~PWM_ENABLE, lpwm->regs + PWM); + pwm_lpss_write(pwm, pwm_lpss_read(pwm) & ~PWM_ENABLE); + pm_runtime_put(chip->dev); } static const struct pwm_ops pwm_lpss_ops = { + .free = pwm_lpss_disable, .config = pwm_lpss_config, .enable = pwm_lpss_enable, .disable = pwm_lpss_disable, @@ -135,7 +160,7 @@ struct pwm_lpss_chip *pwm_lpss_probe(struct device *dev, struct resource *r, lpwm->chip.dev = dev; lpwm->chip.ops = &pwm_lpss_ops; lpwm->chip.base = -1; - lpwm->chip.npwm = 1; + lpwm->chip.npwm = info->npwm; ret = pwmchip_add(&lpwm->chip); if (ret) { @@ -149,11 +174,6 @@ EXPORT_SYMBOL_GPL(pwm_lpss_probe); int pwm_lpss_remove(struct pwm_lpss_chip *lpwm) { - u32 ctrl; - - ctrl = readl(lpwm->regs + PWM); - writel(ctrl & ~PWM_ENABLE, lpwm->regs + PWM); - return pwmchip_remove(&lpwm->chip); } EXPORT_SYMBOL_GPL(pwm_lpss_remove); diff --git a/drivers/pwm/pwm-lpss.h b/drivers/pwm/pwm-lpss.h index aa041bb1b67d..e8cf337ae1d1 100644 --- a/drivers/pwm/pwm-lpss.h +++ b/drivers/pwm/pwm-lpss.h @@ -20,10 +20,12 @@ struct pwm_lpss_chip; struct pwm_lpss_boardinfo { unsigned long clk_rate; + unsigned int npwm; }; extern const struct pwm_lpss_boardinfo pwm_lpss_byt_info; extern const struct pwm_lpss_boardinfo pwm_lpss_bsw_info; +extern const struct pwm_lpss_boardinfo pwm_lpss_bxt_info; struct pwm_lpss_chip *pwm_lpss_probe(struct device *dev, struct resource *r, const struct pwm_lpss_boardinfo *info); diff --git a/drivers/pwm/pwm-mtk-disp.c b/drivers/pwm/pwm-mtk-disp.c new file mode 100644 index 000000000000..0ad3385298c0 --- /dev/null +++ b/drivers/pwm/pwm-mtk-disp.c @@ -0,0 +1,243 @@ +/* + * MediaTek display pulse-width-modulation controller driver. + * Copyright (c) 2015 MediaTek Inc. + * Author: YH Huang <yh.huang@mediatek.com> + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + */ + +#include <linux/clk.h> +#include <linux/err.h> +#include <linux/io.h> +#include <linux/module.h> +#include <linux/of.h> +#include <linux/platform_device.h> +#include <linux/pwm.h> +#include <linux/slab.h> + +#define DISP_PWM_EN 0x00 +#define PWM_ENABLE_MASK BIT(0) + +#define DISP_PWM_COMMIT 0x08 +#define PWM_COMMIT_MASK BIT(0) + +#define DISP_PWM_CON_0 0x10 +#define PWM_CLKDIV_SHIFT 16 +#define PWM_CLKDIV_MAX 0x3ff +#define PWM_CLKDIV_MASK (PWM_CLKDIV_MAX << PWM_CLKDIV_SHIFT) + +#define DISP_PWM_CON_1 0x14 +#define PWM_PERIOD_BIT_WIDTH 12 +#define PWM_PERIOD_MASK ((1 << PWM_PERIOD_BIT_WIDTH) - 1) + +#define PWM_HIGH_WIDTH_SHIFT 16 +#define PWM_HIGH_WIDTH_MASK (0x1fff << PWM_HIGH_WIDTH_SHIFT) + +struct mtk_disp_pwm { + struct pwm_chip chip; + struct clk *clk_main; + struct clk *clk_mm; + void __iomem *base; +}; + +static inline struct mtk_disp_pwm *to_mtk_disp_pwm(struct pwm_chip *chip) +{ + return container_of(chip, struct mtk_disp_pwm, chip); +} + +static void mtk_disp_pwm_update_bits(struct mtk_disp_pwm *mdp, u32 offset, + u32 mask, u32 data) +{ + void __iomem *address = mdp->base + offset; + u32 value; + + value = readl(address); + value &= ~mask; + value |= data; + writel(value, address); +} + +static int mtk_disp_pwm_config(struct pwm_chip *chip, struct pwm_device *pwm, + int duty_ns, int period_ns) +{ + struct mtk_disp_pwm *mdp = to_mtk_disp_pwm(chip); + u32 clk_div, period, high_width, value; + u64 div, rate; + int err; + + /* + * Find period, high_width and clk_div to suit duty_ns and period_ns. + * Calculate proper div value to keep period value in the bound. + * + * period_ns = 10^9 * (clk_div + 1) * (period + 1) / PWM_CLK_RATE + * duty_ns = 10^9 * (clk_div + 1) * high_width / PWM_CLK_RATE + * + * period = (PWM_CLK_RATE * period_ns) / (10^9 * (clk_div + 1)) - 1 + * high_width = (PWM_CLK_RATE * duty_ns) / (10^9 * (clk_div + 1)) + */ + rate = clk_get_rate(mdp->clk_main); + clk_div = div_u64(rate * period_ns, NSEC_PER_SEC) >> + PWM_PERIOD_BIT_WIDTH; + if (clk_div > PWM_CLKDIV_MAX) + return -EINVAL; + + div = NSEC_PER_SEC * (clk_div + 1); + period = div64_u64(rate * period_ns, div); + if (period > 0) + period--; + + high_width = div64_u64(rate * duty_ns, div); + value = period | (high_width << PWM_HIGH_WIDTH_SHIFT); + + err = clk_enable(mdp->clk_main); + if (err < 0) + return err; + + err = clk_enable(mdp->clk_mm); + if (err < 0) { + clk_disable(mdp->clk_main); + return err; + } + + mtk_disp_pwm_update_bits(mdp, DISP_PWM_CON_0, PWM_CLKDIV_MASK, + clk_div << PWM_CLKDIV_SHIFT); + mtk_disp_pwm_update_bits(mdp, DISP_PWM_CON_1, + PWM_PERIOD_MASK | PWM_HIGH_WIDTH_MASK, value); + mtk_disp_pwm_update_bits(mdp, DISP_PWM_COMMIT, PWM_COMMIT_MASK, 1); + mtk_disp_pwm_update_bits(mdp, DISP_PWM_COMMIT, PWM_COMMIT_MASK, 0); + + clk_disable(mdp->clk_mm); + clk_disable(mdp->clk_main); + + return 0; +} + +static int mtk_disp_pwm_enable(struct pwm_chip *chip, struct pwm_device *pwm) +{ + struct mtk_disp_pwm *mdp = to_mtk_disp_pwm(chip); + int err; + + err = clk_enable(mdp->clk_main); + if (err < 0) + return err; + + err = clk_enable(mdp->clk_mm); + if (err < 0) { + clk_disable(mdp->clk_main); + return err; + } + + mtk_disp_pwm_update_bits(mdp, DISP_PWM_EN, PWM_ENABLE_MASK, 1); + + return 0; +} + +static void mtk_disp_pwm_disable(struct pwm_chip *chip, struct pwm_device *pwm) +{ + struct mtk_disp_pwm *mdp = to_mtk_disp_pwm(chip); + + mtk_disp_pwm_update_bits(mdp, DISP_PWM_EN, PWM_ENABLE_MASK, 0); + + clk_disable(mdp->clk_mm); + clk_disable(mdp->clk_main); +} + +static const struct pwm_ops mtk_disp_pwm_ops = { + .config = mtk_disp_pwm_config, + .enable = mtk_disp_pwm_enable, + .disable = mtk_disp_pwm_disable, + .owner = THIS_MODULE, +}; + +static int mtk_disp_pwm_probe(struct platform_device *pdev) +{ + struct mtk_disp_pwm *mdp; + struct resource *r; + int ret; + + mdp = devm_kzalloc(&pdev->dev, sizeof(*mdp), GFP_KERNEL); + if (!mdp) + return -ENOMEM; + + r = platform_get_resource(pdev, IORESOURCE_MEM, 0); + mdp->base = devm_ioremap_resource(&pdev->dev, r); + if (IS_ERR(mdp->base)) + return PTR_ERR(mdp->base); + + mdp->clk_main = devm_clk_get(&pdev->dev, "main"); + if (IS_ERR(mdp->clk_main)) + return PTR_ERR(mdp->clk_main); + + mdp->clk_mm = devm_clk_get(&pdev->dev, "mm"); + if (IS_ERR(mdp->clk_mm)) + return PTR_ERR(mdp->clk_mm); + + ret = clk_prepare(mdp->clk_main); + if (ret < 0) + return ret; + + ret = clk_prepare(mdp->clk_mm); + if (ret < 0) + goto disable_clk_main; + + mdp->chip.dev = &pdev->dev; + mdp->chip.ops = &mtk_disp_pwm_ops; + mdp->chip.base = -1; + mdp->chip.npwm = 1; + + ret = pwmchip_add(&mdp->chip); + if (ret < 0) { + dev_err(&pdev->dev, "pwmchip_add() failed: %d\n", ret); + goto disable_clk_mm; + } + + platform_set_drvdata(pdev, mdp); + + return 0; + +disable_clk_mm: + clk_unprepare(mdp->clk_mm); +disable_clk_main: + clk_unprepare(mdp->clk_main); + return ret; +} + +static int mtk_disp_pwm_remove(struct platform_device *pdev) +{ + struct mtk_disp_pwm *mdp = platform_get_drvdata(pdev); + int ret; + + ret = pwmchip_remove(&mdp->chip); + clk_unprepare(mdp->clk_mm); + clk_unprepare(mdp->clk_main); + + return ret; +} + +static const struct of_device_id mtk_disp_pwm_of_match[] = { + { .compatible = "mediatek,mt8173-disp-pwm" }, + { .compatible = "mediatek,mt6595-disp-pwm" }, + { } +}; +MODULE_DEVICE_TABLE(of, mtk_disp_pwm_of_match); + +static struct platform_driver mtk_disp_pwm_driver = { + .driver = { + .name = "mediatek-disp-pwm", + .of_match_table = mtk_disp_pwm_of_match, + }, + .probe = mtk_disp_pwm_probe, + .remove = mtk_disp_pwm_remove, +}; +module_platform_driver(mtk_disp_pwm_driver); + +MODULE_AUTHOR("YH Huang <yh.huang@mediatek.com>"); +MODULE_DESCRIPTION("MediaTek SoC display PWM driver"); +MODULE_LICENSE("GPL v2"); diff --git a/drivers/pwm/pwm-pca9685.c b/drivers/pwm/pwm-pca9685.c index 70448a6079b0..117fccf7934a 100644 --- a/drivers/pwm/pwm-pca9685.c +++ b/drivers/pwm/pwm-pca9685.c @@ -19,9 +19,11 @@ * this program. If not, see <http://www.gnu.org/licenses/>. */ +#include <linux/acpi.h> #include <linux/i2c.h> #include <linux/module.h> #include <linux/platform_device.h> +#include <linux/property.h> #include <linux/pwm.h> #include <linux/regmap.h> #include <linux/slab.h> @@ -297,7 +299,6 @@ static const struct regmap_config pca9685_regmap_i2c_config = { static int pca9685_pwm_probe(struct i2c_client *client, const struct i2c_device_id *id) { - struct device_node *np = client->dev.of_node; struct pca9685 *pca; int ret; int mode2; @@ -320,12 +321,12 @@ static int pca9685_pwm_probe(struct i2c_client *client, regmap_read(pca->regmap, PCA9685_MODE2, &mode2); - if (of_property_read_bool(np, "invert")) + if (device_property_read_bool(&client->dev, "invert")) mode2 |= MODE2_INVRT; else mode2 &= ~MODE2_INVRT; - if (of_property_read_bool(np, "open-drain")) + if (device_property_read_bool(&client->dev, "open-drain")) mode2 &= ~MODE2_OUTDRV; else mode2 |= MODE2_OUTDRV; @@ -363,16 +364,27 @@ static const struct i2c_device_id pca9685_id[] = { }; MODULE_DEVICE_TABLE(i2c, pca9685_id); +#ifdef CONFIG_ACPI +static const struct acpi_device_id pca9685_acpi_ids[] = { + { "INT3492", 0 }, + { /* sentinel */ }, +}; +MODULE_DEVICE_TABLE(acpi, pca9685_acpi_ids); +#endif + +#ifdef CONFIG_OF static const struct of_device_id pca9685_dt_ids[] = { { .compatible = "nxp,pca9685-pwm", }, { /* sentinel */ } }; MODULE_DEVICE_TABLE(of, pca9685_dt_ids); +#endif static struct i2c_driver pca9685_i2c_driver = { .driver = { .name = "pca9685-pwm", - .of_match_table = pca9685_dt_ids, + .acpi_match_table = ACPI_PTR(pca9685_acpi_ids), + .of_match_table = of_match_ptr(pca9685_dt_ids), }, .probe = pca9685_pwm_probe, .remove = pca9685_pwm_remove, diff --git a/drivers/pwm/pwm-rcar.c b/drivers/pwm/pwm-rcar.c new file mode 100644 index 000000000000..6e99a63ffa29 --- /dev/null +++ b/drivers/pwm/pwm-rcar.c @@ -0,0 +1,274 @@ +/* + * R-Car PWM Timer driver + * + * Copyright (C) 2015 Renesas Electronics Corporation + * + * This is free software; you can redistribute it and/or modify + * it under the terms of version 2 of the GNU General Public License as + * published by the Free Software Foundation. + */ + +#include <linux/clk.h> +#include <linux/err.h> +#include <linux/io.h> +#include <linux/module.h> +#include <linux/of.h> +#include <linux/platform_device.h> +#include <linux/pm_runtime.h> +#include <linux/pwm.h> +#include <linux/slab.h> + +#define RCAR_PWM_MAX_DIVISION 24 +#define RCAR_PWM_MAX_CYCLE 1023 + +#define RCAR_PWMCR 0x00 +#define RCAR_PWMCR_CC0_MASK 0x000f0000 +#define RCAR_PWMCR_CC0_SHIFT 16 +#define RCAR_PWMCR_CCMD BIT(15) +#define RCAR_PWMCR_SYNC BIT(11) +#define RCAR_PWMCR_SS0 BIT(4) +#define RCAR_PWMCR_EN0 BIT(0) + +#define RCAR_PWMCNT 0x04 +#define RCAR_PWMCNT_CYC0_MASK 0x03ff0000 +#define RCAR_PWMCNT_CYC0_SHIFT 16 +#define RCAR_PWMCNT_PH0_MASK 0x000003ff +#define RCAR_PWMCNT_PH0_SHIFT 0 + +struct rcar_pwm_chip { + struct pwm_chip chip; + void __iomem *base; + struct clk *clk; +}; + +static inline struct rcar_pwm_chip *to_rcar_pwm_chip(struct pwm_chip *chip) +{ + return container_of(chip, struct rcar_pwm_chip, chip); +} + +static void rcar_pwm_write(struct rcar_pwm_chip *rp, u32 data, + unsigned int offset) +{ + writel(data, rp->base + offset); +} + +static u32 rcar_pwm_read(struct rcar_pwm_chip *rp, unsigned int offset) +{ + return readl(rp->base + offset); +} + +static void rcar_pwm_update(struct rcar_pwm_chip *rp, u32 mask, u32 data, + unsigned int offset) +{ + u32 value; + + value = rcar_pwm_read(rp, offset); + value &= ~mask; + value |= data & mask; + rcar_pwm_write(rp, value, offset); +} + +static int rcar_pwm_get_clock_division(struct rcar_pwm_chip *rp, int period_ns) +{ + unsigned long clk_rate = clk_get_rate(rp->clk); + unsigned long long max; /* max cycle / nanoseconds */ + unsigned int div; + + if (clk_rate == 0) + return -EINVAL; + + for (div = 0; div <= RCAR_PWM_MAX_DIVISION; div++) { + max = (unsigned long long)NSEC_PER_SEC * RCAR_PWM_MAX_CYCLE * + (1 << div); + do_div(max, clk_rate); + if (period_ns < max) + break; + } + + return (div <= RCAR_PWM_MAX_DIVISION) ? div : -ERANGE; +} + +static void rcar_pwm_set_clock_control(struct rcar_pwm_chip *rp, + unsigned int div) +{ + u32 value; + + value = rcar_pwm_read(rp, RCAR_PWMCR); + value &= ~(RCAR_PWMCR_CCMD | RCAR_PWMCR_CC0_MASK); + + if (div & 1) + value |= RCAR_PWMCR_CCMD; + + div >>= 1; + + value |= div << RCAR_PWMCR_CC0_SHIFT; + rcar_pwm_write(rp, value, RCAR_PWMCR); +} + +static int rcar_pwm_set_counter(struct rcar_pwm_chip *rp, int div, int duty_ns, + int period_ns) +{ + unsigned long long one_cycle, tmp; /* 0.01 nanoseconds */ + unsigned long clk_rate = clk_get_rate(rp->clk); + u32 cyc, ph; + + one_cycle = (unsigned long long)NSEC_PER_SEC * 100ULL * (1 << div); + do_div(one_cycle, clk_rate); + + tmp = period_ns * 100ULL; + do_div(tmp, one_cycle); + cyc = (tmp << RCAR_PWMCNT_CYC0_SHIFT) & RCAR_PWMCNT_CYC0_MASK; + + tmp = duty_ns * 100ULL; + do_div(tmp, one_cycle); + ph = tmp & RCAR_PWMCNT_PH0_MASK; + + /* Avoid prohibited setting */ + if (cyc == 0 || ph == 0) + return -EINVAL; + + rcar_pwm_write(rp, cyc | ph, RCAR_PWMCNT); + + return 0; +} + +static int rcar_pwm_request(struct pwm_chip *chip, struct pwm_device *pwm) +{ + struct rcar_pwm_chip *rp = to_rcar_pwm_chip(chip); + + return clk_prepare_enable(rp->clk); +} + +static void rcar_pwm_free(struct pwm_chip *chip, struct pwm_device *pwm) +{ + struct rcar_pwm_chip *rp = to_rcar_pwm_chip(chip); + + clk_disable_unprepare(rp->clk); +} + +static int rcar_pwm_config(struct pwm_chip *chip, struct pwm_device *pwm, + int duty_ns, int period_ns) +{ + struct rcar_pwm_chip *rp = to_rcar_pwm_chip(chip); + int div, ret; + + div = rcar_pwm_get_clock_division(rp, period_ns); + if (div < 0) + return div; + + /* Let the core driver set pwm->period if disabled and duty_ns == 0 */ + if (!test_bit(PWMF_ENABLED, &pwm->flags) && !duty_ns) + return 0; + + rcar_pwm_update(rp, RCAR_PWMCR_SYNC, RCAR_PWMCR_SYNC, RCAR_PWMCR); + + ret = rcar_pwm_set_counter(rp, div, duty_ns, period_ns); + if (!ret) + rcar_pwm_set_clock_control(rp, div); + + /* The SYNC should be set to 0 even if rcar_pwm_set_counter failed */ + rcar_pwm_update(rp, RCAR_PWMCR_SYNC, 0, RCAR_PWMCR); + + return ret; +} + +static int rcar_pwm_enable(struct pwm_chip *chip, struct pwm_device *pwm) +{ + struct rcar_pwm_chip *rp = to_rcar_pwm_chip(chip); + u32 value; + + /* Don't enable the PWM device if CYC0 or PH0 is 0 */ + value = rcar_pwm_read(rp, RCAR_PWMCNT); + if ((value & RCAR_PWMCNT_CYC0_MASK) == 0 || + (value & RCAR_PWMCNT_PH0_MASK) == 0) + return -EINVAL; + + rcar_pwm_update(rp, RCAR_PWMCR_EN0, RCAR_PWMCR_EN0, RCAR_PWMCR); + + return 0; +} + +static void rcar_pwm_disable(struct pwm_chip *chip, struct pwm_device *pwm) +{ + struct rcar_pwm_chip *rp = to_rcar_pwm_chip(chip); + + rcar_pwm_update(rp, RCAR_PWMCR_EN0, 0, RCAR_PWMCR); +} + +static const struct pwm_ops rcar_pwm_ops = { + .request = rcar_pwm_request, + .free = rcar_pwm_free, + .config = rcar_pwm_config, + .enable = rcar_pwm_enable, + .disable = rcar_pwm_disable, + .owner = THIS_MODULE, +}; + +static int rcar_pwm_probe(struct platform_device *pdev) +{ + struct rcar_pwm_chip *rcar_pwm; + struct resource *res; + int ret; + + rcar_pwm = devm_kzalloc(&pdev->dev, sizeof(*rcar_pwm), GFP_KERNEL); + if (rcar_pwm == NULL) + return -ENOMEM; + + res = platform_get_resource(pdev, IORESOURCE_MEM, 0); + rcar_pwm->base = devm_ioremap_resource(&pdev->dev, res); + if (IS_ERR(rcar_pwm->base)) + return PTR_ERR(rcar_pwm->base); + + rcar_pwm->clk = devm_clk_get(&pdev->dev, NULL); + if (IS_ERR(rcar_pwm->clk)) { + dev_err(&pdev->dev, "cannot get clock\n"); + return PTR_ERR(rcar_pwm->clk); + } + + platform_set_drvdata(pdev, rcar_pwm); + + rcar_pwm->chip.dev = &pdev->dev; + rcar_pwm->chip.ops = &rcar_pwm_ops; + rcar_pwm->chip.base = -1; + rcar_pwm->chip.npwm = 1; + + ret = pwmchip_add(&rcar_pwm->chip); + if (ret < 0) { + dev_err(&pdev->dev, "failed to register PWM chip: %d\n", ret); + return ret; + } + + pm_runtime_enable(&pdev->dev); + + return 0; +} + +static int rcar_pwm_remove(struct platform_device *pdev) +{ + struct rcar_pwm_chip *rcar_pwm = platform_get_drvdata(pdev); + + pm_runtime_disable(&pdev->dev); + + return pwmchip_remove(&rcar_pwm->chip); +} + +static const struct of_device_id rcar_pwm_of_table[] = { + { .compatible = "renesas,pwm-rcar", }, + { }, +}; +MODULE_DEVICE_TABLE(of, rcar_pwm_of_table); + +static struct platform_driver rcar_pwm_driver = { + .probe = rcar_pwm_probe, + .remove = rcar_pwm_remove, + .driver = { + .name = "pwm-rcar", + .of_match_table = of_match_ptr(rcar_pwm_of_table), + } +}; +module_platform_driver(rcar_pwm_driver); + +MODULE_AUTHOR("Yoshihiro Shimoda <yoshihiro.shimoda.uh@renesas.com>"); +MODULE_DESCRIPTION("Renesas PWM Timer Driver"); +MODULE_LICENSE("GPL v2"); +MODULE_ALIAS("platform:pwm-rcar"); diff --git a/drivers/pwm/pwm-sun4i.c b/drivers/pwm/pwm-sun4i.c index cd9dde563018..67af9f62361f 100644 --- a/drivers/pwm/pwm-sun4i.c +++ b/drivers/pwm/pwm-sun4i.c @@ -68,6 +68,7 @@ static const u32 prescaler_table[] = { struct sun4i_pwm_data { bool has_prescaler_bypass; bool has_rdy; + unsigned int npwm; }; struct sun4i_pwm_chip { @@ -114,7 +115,7 @@ static int sun4i_pwm_config(struct pwm_chip *chip, struct pwm_device *pwm, * is not an integer so round it half up instead of * truncating to get less surprising values. */ - div = clk_rate * period_ns + NSEC_PER_SEC/2; + div = clk_rate * period_ns + NSEC_PER_SEC / 2; do_div(div, NSEC_PER_SEC); if (div - 1 > PWM_PRD_MASK) prescaler = 0; @@ -262,11 +263,25 @@ static const struct pwm_ops sun4i_pwm_ops = { static const struct sun4i_pwm_data sun4i_pwm_data_a10 = { .has_prescaler_bypass = false, .has_rdy = false, + .npwm = 2, +}; + +static const struct sun4i_pwm_data sun4i_pwm_data_a10s = { + .has_prescaler_bypass = true, + .has_rdy = true, + .npwm = 2, +}; + +static const struct sun4i_pwm_data sun4i_pwm_data_a13 = { + .has_prescaler_bypass = true, + .has_rdy = true, + .npwm = 1, }; static const struct sun4i_pwm_data sun4i_pwm_data_a20 = { .has_prescaler_bypass = true, .has_rdy = true, + .npwm = 2, }; static const struct of_device_id sun4i_pwm_dt_ids[] = { @@ -274,6 +289,12 @@ static const struct of_device_id sun4i_pwm_dt_ids[] = { .compatible = "allwinner,sun4i-a10-pwm", .data = &sun4i_pwm_data_a10, }, { + .compatible = "allwinner,sun5i-a10s-pwm", + .data = &sun4i_pwm_data_a10s, + }, { + .compatible = "allwinner,sun5i-a13-pwm", + .data = &sun4i_pwm_data_a13, + }, { .compatible = "allwinner,sun7i-a20-pwm", .data = &sun4i_pwm_data_a20, }, { @@ -305,14 +326,14 @@ static int sun4i_pwm_probe(struct platform_device *pdev) if (IS_ERR(pwm->clk)) return PTR_ERR(pwm->clk); + pwm->data = match->data; pwm->chip.dev = &pdev->dev; pwm->chip.ops = &sun4i_pwm_ops; pwm->chip.base = -1; - pwm->chip.npwm = 2; + pwm->chip.npwm = pwm->data->npwm; pwm->chip.can_sleep = true; pwm->chip.of_xlate = of_pwm_xlate_with_flags; pwm->chip.of_pwm_n_cells = 3; - pwm->data = match->data; spin_lock_init(&pwm->ctrl_lock); diff --git a/drivers/pwm/sysfs.c b/drivers/pwm/sysfs.c index c472772f00a7..9c90886f4123 100644 --- a/drivers/pwm/sysfs.c +++ b/drivers/pwm/sysfs.c @@ -40,18 +40,18 @@ static struct pwm_device *child_to_pwm_device(struct device *child) return export->pwm; } -static ssize_t pwm_period_show(struct device *child, - struct device_attribute *attr, - char *buf) +static ssize_t period_show(struct device *child, + struct device_attribute *attr, + char *buf) { const struct pwm_device *pwm = child_to_pwm_device(child); return sprintf(buf, "%u\n", pwm_get_period(pwm)); } -static ssize_t pwm_period_store(struct device *child, - struct device_attribute *attr, - const char *buf, size_t size) +static ssize_t period_store(struct device *child, + struct device_attribute *attr, + const char *buf, size_t size) { struct pwm_device *pwm = child_to_pwm_device(child); unsigned int val; @@ -66,18 +66,18 @@ static ssize_t pwm_period_store(struct device *child, return ret ? : size; } -static ssize_t pwm_duty_cycle_show(struct device *child, - struct device_attribute *attr, - char *buf) +static ssize_t duty_cycle_show(struct device *child, + struct device_attribute *attr, + char *buf) { const struct pwm_device *pwm = child_to_pwm_device(child); return sprintf(buf, "%u\n", pwm_get_duty_cycle(pwm)); } -static ssize_t pwm_duty_cycle_store(struct device *child, - struct device_attribute *attr, - const char *buf, size_t size) +static ssize_t duty_cycle_store(struct device *child, + struct device_attribute *attr, + const char *buf, size_t size) { struct pwm_device *pwm = child_to_pwm_device(child); unsigned int val; @@ -92,19 +92,18 @@ static ssize_t pwm_duty_cycle_store(struct device *child, return ret ? : size; } -static ssize_t pwm_enable_show(struct device *child, - struct device_attribute *attr, - char *buf) +static ssize_t enable_show(struct device *child, + struct device_attribute *attr, + char *buf) { const struct pwm_device *pwm = child_to_pwm_device(child); - int enabled = pwm_is_enabled(pwm); - return sprintf(buf, "%d\n", enabled); + return sprintf(buf, "%d\n", pwm_is_enabled(pwm)); } -static ssize_t pwm_enable_store(struct device *child, - struct device_attribute *attr, - const char *buf, size_t size) +static ssize_t enable_store(struct device *child, + struct device_attribute *attr, + const char *buf, size_t size) { struct pwm_device *pwm = child_to_pwm_device(child); int val, ret; @@ -128,9 +127,9 @@ static ssize_t pwm_enable_store(struct device *child, return ret ? : size; } -static ssize_t pwm_polarity_show(struct device *child, - struct device_attribute *attr, - char *buf) +static ssize_t polarity_show(struct device *child, + struct device_attribute *attr, + char *buf) { const struct pwm_device *pwm = child_to_pwm_device(child); const char *polarity = "unknown"; @@ -148,9 +147,9 @@ static ssize_t pwm_polarity_show(struct device *child, return sprintf(buf, "%s\n", polarity); } -static ssize_t pwm_polarity_store(struct device *child, - struct device_attribute *attr, - const char *buf, size_t size) +static ssize_t polarity_store(struct device *child, + struct device_attribute *attr, + const char *buf, size_t size) { struct pwm_device *pwm = child_to_pwm_device(child); enum pwm_polarity polarity; @@ -168,10 +167,10 @@ static ssize_t pwm_polarity_store(struct device *child, return ret ? : size; } -static DEVICE_ATTR(period, 0644, pwm_period_show, pwm_period_store); -static DEVICE_ATTR(duty_cycle, 0644, pwm_duty_cycle_show, pwm_duty_cycle_store); -static DEVICE_ATTR(enable, 0644, pwm_enable_show, pwm_enable_store); -static DEVICE_ATTR(polarity, 0644, pwm_polarity_show, pwm_polarity_store); +static DEVICE_ATTR_RW(period); +static DEVICE_ATTR_RW(duty_cycle); +static DEVICE_ATTR_RW(enable); +static DEVICE_ATTR_RW(polarity); static struct attribute *pwm_attrs[] = { &dev_attr_period.attr, @@ -245,9 +244,9 @@ static int pwm_unexport_child(struct device *parent, struct pwm_device *pwm) return 0; } -static ssize_t pwm_export_store(struct device *parent, - struct device_attribute *attr, - const char *buf, size_t len) +static ssize_t export_store(struct device *parent, + struct device_attribute *attr, + const char *buf, size_t len) { struct pwm_chip *chip = dev_get_drvdata(parent); struct pwm_device *pwm; @@ -271,11 +270,11 @@ static ssize_t pwm_export_store(struct device *parent, return ret ? : len; } -static DEVICE_ATTR(export, 0200, NULL, pwm_export_store); +static DEVICE_ATTR_WO(export); -static ssize_t pwm_unexport_store(struct device *parent, - struct device_attribute *attr, - const char *buf, size_t len) +static ssize_t unexport_store(struct device *parent, + struct device_attribute *attr, + const char *buf, size_t len) { struct pwm_chip *chip = dev_get_drvdata(parent); unsigned int hwpwm; @@ -292,7 +291,7 @@ static ssize_t pwm_unexport_store(struct device *parent, return ret ? : len; } -static DEVICE_ATTR(unexport, 0200, NULL, pwm_unexport_store); +static DEVICE_ATTR_WO(unexport); static ssize_t npwm_show(struct device *parent, struct device_attribute *attr, char *buf) diff --git a/drivers/thermal/Kconfig b/drivers/thermal/Kconfig index 5aabc4bc0d75..c463c89b90ef 100644 --- a/drivers/thermal/Kconfig +++ b/drivers/thermal/Kconfig @@ -147,6 +147,20 @@ config CLOCK_THERMAL device that is configured to use this cooling mechanism will be controlled to reduce clock frequency whenever temperature is high. +config DEVFREQ_THERMAL + bool "Generic device cooling support" + depends on PM_DEVFREQ + depends on PM_OPP + help + This implements the generic devfreq cooling mechanism through + frequency reduction for devices using devfreq. + + This will throttle the device by limiting the maximum allowed DVFS + frequency corresponding to the cooling level. + + In order to use the power extensions of the cooling device, + devfreq should use the simple_ondemand governor. + If you want this support, you should say Y here. config THERMAL_EMULATION @@ -275,6 +289,7 @@ config X86_PKG_TEMP_THERMAL tristate "X86 package temperature thermal driver" depends on X86_THERMAL_VECTOR select THERMAL_GOV_USER_SPACE + select THERMAL_WRITABLE_TRIPS default m help Enable this to register CPU digital sensor for package temperature as @@ -296,6 +311,7 @@ config INTEL_SOC_DTS_THERMAL tristate "Intel SoCs DTS thermal driver" depends on X86 select INTEL_SOC_DTS_IOSF_CORE + select THERMAL_WRITABLE_TRIPS help Enable this to register Intel SoCs (e.g. Bay Trail) platform digital temperature sensor (DTS). These SoCs have two additional DTSs in @@ -322,6 +338,7 @@ config INT340X_THERMAL select ACPI_THERMAL_REL select ACPI_FAN select INTEL_SOC_DTS_IOSF_CORE + select THERMAL_WRITABLE_TRIPS help Newer laptops and tablets that use ACPI may have thermal sensors and other devices with thermal control capabilities outside the core diff --git a/drivers/thermal/Makefile b/drivers/thermal/Makefile index 26f160809959..cfae6a654793 100644 --- a/drivers/thermal/Makefile +++ b/drivers/thermal/Makefile @@ -22,6 +22,9 @@ thermal_sys-$(CONFIG_CPU_THERMAL) += cpu_cooling.o # clock cooling thermal_sys-$(CONFIG_CLOCK_THERMAL) += clock_cooling.o +# devfreq cooling +thermal_sys-$(CONFIG_DEVFREQ_THERMAL) += devfreq_cooling.o + # platform thermal drivers obj-$(CONFIG_QCOM_SPMI_TEMP_ALARM) += qcom-spmi-temp-alarm.o obj-$(CONFIG_SPEAR_THERMAL) += spear_thermal.o diff --git a/drivers/thermal/armada_thermal.c b/drivers/thermal/armada_thermal.c index 26b8d326546a..ae75328945f7 100644 --- a/drivers/thermal/armada_thermal.c +++ b/drivers/thermal/armada_thermal.c @@ -224,9 +224,9 @@ static const struct armada_thermal_data armada380_data = { .is_valid_shift = 10, .temp_shift = 0, .temp_mask = 0x3ff, - .coef_b = 2931108200UL, - .coef_m = 5000000UL, - .coef_div = 10502, + .coef_b = 1172499100UL, + .coef_m = 2000096UL, + .coef_div = 4201, .inverted = true, }; diff --git a/drivers/thermal/cpu_cooling.c b/drivers/thermal/cpu_cooling.c index 42c6f71bdcc1..e3fbc5a5d88f 100644 --- a/drivers/thermal/cpu_cooling.c +++ b/drivers/thermal/cpu_cooling.c @@ -591,8 +591,7 @@ static int cpufreq_get_requested_power(struct thermal_cooling_device *cdev, if (trace_thermal_power_cpu_get_power_enabled()) { u32 ncpus = cpumask_weight(&cpufreq_device->allowed_cpus); - load_cpu = devm_kcalloc(&cdev->device, ncpus, sizeof(*load_cpu), - GFP_KERNEL); + load_cpu = kcalloc(ncpus, sizeof(*load_cpu), GFP_KERNEL); } for_each_cpu(cpu, &cpufreq_device->allowed_cpus) { @@ -615,8 +614,7 @@ static int cpufreq_get_requested_power(struct thermal_cooling_device *cdev, dynamic_power = get_dynamic_power(cpufreq_device, freq); ret = get_static_power(cpufreq_device, tz, freq, &static_power); if (ret) { - if (load_cpu) - devm_kfree(&cdev->device, load_cpu); + kfree(load_cpu); return ret; } @@ -625,7 +623,7 @@ static int cpufreq_get_requested_power(struct thermal_cooling_device *cdev, &cpufreq_device->allowed_cpus, freq, load_cpu, i, dynamic_power, static_power); - devm_kfree(&cdev->device, load_cpu); + kfree(load_cpu); } *power = static_power + dynamic_power; diff --git a/drivers/thermal/devfreq_cooling.c b/drivers/thermal/devfreq_cooling.c new file mode 100644 index 000000000000..01f0015f80dc --- /dev/null +++ b/drivers/thermal/devfreq_cooling.c @@ -0,0 +1,573 @@ +/* + * devfreq_cooling: Thermal cooling device implementation for devices using + * devfreq + * + * Copyright (C) 2014-2015 ARM Limited + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + * + * This program is distributed "as is" WITHOUT ANY WARRANTY of any + * kind, whether express or implied; without even the implied warranty + * of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * TODO: + * - If OPPs are added or removed after devfreq cooling has + * registered, the devfreq cooling won't react to it. + */ + +#include <linux/devfreq.h> +#include <linux/devfreq_cooling.h> +#include <linux/export.h> +#include <linux/slab.h> +#include <linux/pm_opp.h> +#include <linux/thermal.h> + +#include <trace/events/thermal.h> + +static DEFINE_MUTEX(devfreq_lock); +static DEFINE_IDR(devfreq_idr); + +/** + * struct devfreq_cooling_device - Devfreq cooling device + * @id: unique integer value corresponding to each + * devfreq_cooling_device registered. + * @cdev: Pointer to associated thermal cooling device. + * @devfreq: Pointer to associated devfreq device. + * @cooling_state: Current cooling state. + * @power_table: Pointer to table with maximum power draw for each + * cooling state. State is the index into the table, and + * the power is in mW. + * @freq_table: Pointer to a table with the frequencies sorted in descending + * order. You can index the table by cooling device state + * @freq_table_size: Size of the @freq_table and @power_table + * @power_ops: Pointer to devfreq_cooling_power, used to generate the + * @power_table. + */ +struct devfreq_cooling_device { + int id; + struct thermal_cooling_device *cdev; + struct devfreq *devfreq; + unsigned long cooling_state; + u32 *power_table; + u32 *freq_table; + size_t freq_table_size; + struct devfreq_cooling_power *power_ops; +}; + +/** + * get_idr - function to get a unique id. + * @idr: struct idr * handle used to create a id. + * @id: int * value generated by this function. + * + * This function will populate @id with an unique + * id, using the idr API. + * + * Return: 0 on success, an error code on failure. + */ +static int get_idr(struct idr *idr, int *id) +{ + int ret; + + mutex_lock(&devfreq_lock); + ret = idr_alloc(idr, NULL, 0, 0, GFP_KERNEL); + mutex_unlock(&devfreq_lock); + if (unlikely(ret < 0)) + return ret; + *id = ret; + + return 0; +} + +/** + * release_idr - function to free the unique id. + * @idr: struct idr * handle used for creating the id. + * @id: int value representing the unique id. + */ +static void release_idr(struct idr *idr, int id) +{ + mutex_lock(&devfreq_lock); + idr_remove(idr, id); + mutex_unlock(&devfreq_lock); +} + +/** + * partition_enable_opps() - disable all opps above a given state + * @dfc: Pointer to devfreq we are operating on + * @cdev_state: cooling device state we're setting + * + * Go through the OPPs of the device, enabling all OPPs until + * @cdev_state and disabling those frequencies above it. + */ +static int partition_enable_opps(struct devfreq_cooling_device *dfc, + unsigned long cdev_state) +{ + int i; + struct device *dev = dfc->devfreq->dev.parent; + + for (i = 0; i < dfc->freq_table_size; i++) { + struct dev_pm_opp *opp; + int ret = 0; + unsigned int freq = dfc->freq_table[i]; + bool want_enable = i >= cdev_state ? true : false; + + rcu_read_lock(); + opp = dev_pm_opp_find_freq_exact(dev, freq, !want_enable); + rcu_read_unlock(); + + if (PTR_ERR(opp) == -ERANGE) + continue; + else if (IS_ERR(opp)) + return PTR_ERR(opp); + + if (want_enable) + ret = dev_pm_opp_enable(dev, freq); + else + ret = dev_pm_opp_disable(dev, freq); + + if (ret) + return ret; + } + + return 0; +} + +static int devfreq_cooling_get_max_state(struct thermal_cooling_device *cdev, + unsigned long *state) +{ + struct devfreq_cooling_device *dfc = cdev->devdata; + + *state = dfc->freq_table_size - 1; + + return 0; +} + +static int devfreq_cooling_get_cur_state(struct thermal_cooling_device *cdev, + unsigned long *state) +{ + struct devfreq_cooling_device *dfc = cdev->devdata; + + *state = dfc->cooling_state; + + return 0; +} + +static int devfreq_cooling_set_cur_state(struct thermal_cooling_device *cdev, + unsigned long state) +{ + struct devfreq_cooling_device *dfc = cdev->devdata; + struct devfreq *df = dfc->devfreq; + struct device *dev = df->dev.parent; + int ret; + + if (state == dfc->cooling_state) + return 0; + + dev_dbg(dev, "Setting cooling state %lu\n", state); + + if (state >= dfc->freq_table_size) + return -EINVAL; + + ret = partition_enable_opps(dfc, state); + if (ret) + return ret; + + dfc->cooling_state = state; + + return 0; +} + +/** + * freq_get_state() - get the cooling state corresponding to a frequency + * @dfc: Pointer to devfreq cooling device + * @freq: frequency in Hz + * + * Return: the cooling state associated with the @freq, or + * THERMAL_CSTATE_INVALID if it wasn't found. + */ +static unsigned long +freq_get_state(struct devfreq_cooling_device *dfc, unsigned long freq) +{ + int i; + + for (i = 0; i < dfc->freq_table_size; i++) { + if (dfc->freq_table[i] == freq) + return i; + } + + return THERMAL_CSTATE_INVALID; +} + +/** + * get_static_power() - calculate the static power + * @dfc: Pointer to devfreq cooling device + * @freq: Frequency in Hz + * + * Calculate the static power in milliwatts using the supplied + * get_static_power(). The current voltage is calculated using the + * OPP library. If no get_static_power() was supplied, assume the + * static power is negligible. + */ +static unsigned long +get_static_power(struct devfreq_cooling_device *dfc, unsigned long freq) +{ + struct devfreq *df = dfc->devfreq; + struct device *dev = df->dev.parent; + unsigned long voltage; + struct dev_pm_opp *opp; + + if (!dfc->power_ops->get_static_power) + return 0; + + rcu_read_lock(); + + opp = dev_pm_opp_find_freq_exact(dev, freq, true); + if (IS_ERR(opp) && (PTR_ERR(opp) == -ERANGE)) + opp = dev_pm_opp_find_freq_exact(dev, freq, false); + + voltage = dev_pm_opp_get_voltage(opp) / 1000; /* mV */ + + rcu_read_unlock(); + + if (voltage == 0) { + dev_warn_ratelimited(dev, + "Failed to get voltage for frequency %lu: %ld\n", + freq, IS_ERR(opp) ? PTR_ERR(opp) : 0); + return 0; + } + + return dfc->power_ops->get_static_power(voltage); +} + +/** + * get_dynamic_power - calculate the dynamic power + * @dfc: Pointer to devfreq cooling device + * @freq: Frequency in Hz + * @voltage: Voltage in millivolts + * + * Calculate the dynamic power in milliwatts consumed by the device at + * frequency @freq and voltage @voltage. If the get_dynamic_power() + * was supplied as part of the devfreq_cooling_power struct, then that + * function is used. Otherwise, a simple power model (Pdyn = Coeff * + * Voltage^2 * Frequency) is used. + */ +static unsigned long +get_dynamic_power(struct devfreq_cooling_device *dfc, unsigned long freq, + unsigned long voltage) +{ + u64 power; + u32 freq_mhz; + struct devfreq_cooling_power *dfc_power = dfc->power_ops; + + if (dfc_power->get_dynamic_power) + return dfc_power->get_dynamic_power(freq, voltage); + + freq_mhz = freq / 1000000; + power = (u64)dfc_power->dyn_power_coeff * freq_mhz * voltage * voltage; + do_div(power, 1000000000); + + return power; +} + +static int devfreq_cooling_get_requested_power(struct thermal_cooling_device *cdev, + struct thermal_zone_device *tz, + u32 *power) +{ + struct devfreq_cooling_device *dfc = cdev->devdata; + struct devfreq *df = dfc->devfreq; + struct devfreq_dev_status *status = &df->last_status; + unsigned long state; + unsigned long freq = status->current_frequency; + u32 dyn_power, static_power; + + /* Get dynamic power for state */ + state = freq_get_state(dfc, freq); + if (state == THERMAL_CSTATE_INVALID) + return -EAGAIN; + + dyn_power = dfc->power_table[state]; + + /* Scale dynamic power for utilization */ + dyn_power = (dyn_power * status->busy_time) / status->total_time; + + /* Get static power */ + static_power = get_static_power(dfc, freq); + + trace_thermal_power_devfreq_get_power(cdev, status, freq, dyn_power, + static_power); + + *power = dyn_power + static_power; + + return 0; +} + +static int devfreq_cooling_state2power(struct thermal_cooling_device *cdev, + struct thermal_zone_device *tz, + unsigned long state, + u32 *power) +{ + struct devfreq_cooling_device *dfc = cdev->devdata; + unsigned long freq; + u32 static_power; + + if (state < 0 || state >= dfc->freq_table_size) + return -EINVAL; + + freq = dfc->freq_table[state]; + static_power = get_static_power(dfc, freq); + + *power = dfc->power_table[state] + static_power; + return 0; +} + +static int devfreq_cooling_power2state(struct thermal_cooling_device *cdev, + struct thermal_zone_device *tz, + u32 power, unsigned long *state) +{ + struct devfreq_cooling_device *dfc = cdev->devdata; + struct devfreq *df = dfc->devfreq; + struct devfreq_dev_status *status = &df->last_status; + unsigned long freq = status->current_frequency; + unsigned long busy_time; + s32 dyn_power; + u32 static_power; + int i; + + static_power = get_static_power(dfc, freq); + + dyn_power = power - static_power; + dyn_power = dyn_power > 0 ? dyn_power : 0; + + /* Scale dynamic power for utilization */ + busy_time = status->busy_time ?: 1; + dyn_power = (dyn_power * status->total_time) / busy_time; + + /* + * Find the first cooling state that is within the power + * budget for dynamic power. + */ + for (i = 0; i < dfc->freq_table_size - 1; i++) + if (dyn_power >= dfc->power_table[i]) + break; + + *state = i; + trace_thermal_power_devfreq_limit(cdev, freq, *state, power); + return 0; +} + +static struct thermal_cooling_device_ops devfreq_cooling_ops = { + .get_max_state = devfreq_cooling_get_max_state, + .get_cur_state = devfreq_cooling_get_cur_state, + .set_cur_state = devfreq_cooling_set_cur_state, +}; + +/** + * devfreq_cooling_gen_tables() - Generate power and freq tables. + * @dfc: Pointer to devfreq cooling device. + * + * Generate power and frequency tables: the power table hold the + * device's maximum power usage at each cooling state (OPP). The + * static and dynamic power using the appropriate voltage and + * frequency for the state, is acquired from the struct + * devfreq_cooling_power, and summed to make the maximum power draw. + * + * The frequency table holds the frequencies in descending order. + * That way its indexed by cooling device state. + * + * The tables are malloced, and pointers put in dfc. They must be + * freed when unregistering the devfreq cooling device. + * + * Return: 0 on success, negative error code on failure. + */ +static int devfreq_cooling_gen_tables(struct devfreq_cooling_device *dfc) +{ + struct devfreq *df = dfc->devfreq; + struct device *dev = df->dev.parent; + int ret, num_opps; + unsigned long freq; + u32 *power_table = NULL; + u32 *freq_table; + int i; + + num_opps = dev_pm_opp_get_opp_count(dev); + + if (dfc->power_ops) { + power_table = kcalloc(num_opps, sizeof(*power_table), + GFP_KERNEL); + if (!power_table) + return -ENOMEM; + } + + freq_table = kcalloc(num_opps, sizeof(*freq_table), + GFP_KERNEL); + if (!freq_table) { + ret = -ENOMEM; + goto free_power_table; + } + + for (i = 0, freq = ULONG_MAX; i < num_opps; i++, freq--) { + unsigned long power_dyn, voltage; + struct dev_pm_opp *opp; + + rcu_read_lock(); + + opp = dev_pm_opp_find_freq_floor(dev, &freq); + if (IS_ERR(opp)) { + rcu_read_unlock(); + ret = PTR_ERR(opp); + goto free_tables; + } + + voltage = dev_pm_opp_get_voltage(opp) / 1000; /* mV */ + + rcu_read_unlock(); + + if (dfc->power_ops) { + power_dyn = get_dynamic_power(dfc, freq, voltage); + + dev_dbg(dev, "Dynamic power table: %lu MHz @ %lu mV: %lu = %lu mW\n", + freq / 1000000, voltage, power_dyn, power_dyn); + + power_table[i] = power_dyn; + } + + freq_table[i] = freq; + } + + if (dfc->power_ops) + dfc->power_table = power_table; + + dfc->freq_table = freq_table; + dfc->freq_table_size = num_opps; + + return 0; + +free_tables: + kfree(freq_table); +free_power_table: + kfree(power_table); + + return ret; +} + +/** + * of_devfreq_cooling_register_power() - Register devfreq cooling device, + * with OF and power information. + * @np: Pointer to OF device_node. + * @df: Pointer to devfreq device. + * @dfc_power: Pointer to devfreq_cooling_power. + * + * Register a devfreq cooling device. The available OPPs must be + * registered on the device. + * + * If @dfc_power is provided, the cooling device is registered with the + * power extensions. For the power extensions to work correctly, + * devfreq should use the simple_ondemand governor, other governors + * are not currently supported. + */ +struct thermal_cooling_device * +of_devfreq_cooling_register_power(struct device_node *np, struct devfreq *df, + struct devfreq_cooling_power *dfc_power) +{ + struct thermal_cooling_device *cdev; + struct devfreq_cooling_device *dfc; + char dev_name[THERMAL_NAME_LENGTH]; + int err; + + dfc = kzalloc(sizeof(*dfc), GFP_KERNEL); + if (!dfc) + return ERR_PTR(-ENOMEM); + + dfc->devfreq = df; + + if (dfc_power) { + dfc->power_ops = dfc_power; + + devfreq_cooling_ops.get_requested_power = + devfreq_cooling_get_requested_power; + devfreq_cooling_ops.state2power = devfreq_cooling_state2power; + devfreq_cooling_ops.power2state = devfreq_cooling_power2state; + } + + err = devfreq_cooling_gen_tables(dfc); + if (err) + goto free_dfc; + + err = get_idr(&devfreq_idr, &dfc->id); + if (err) + goto free_tables; + + snprintf(dev_name, sizeof(dev_name), "thermal-devfreq-%d", dfc->id); + + cdev = thermal_of_cooling_device_register(np, dev_name, dfc, + &devfreq_cooling_ops); + if (IS_ERR(cdev)) { + err = PTR_ERR(cdev); + dev_err(df->dev.parent, + "Failed to register devfreq cooling device (%d)\n", + err); + goto release_idr; + } + + dfc->cdev = cdev; + + return cdev; + +release_idr: + release_idr(&devfreq_idr, dfc->id); +free_tables: + kfree(dfc->power_table); + kfree(dfc->freq_table); +free_dfc: + kfree(dfc); + + return ERR_PTR(err); +} +EXPORT_SYMBOL_GPL(of_devfreq_cooling_register_power); + +/** + * of_devfreq_cooling_register() - Register devfreq cooling device, + * with OF information. + * @np: Pointer to OF device_node. + * @df: Pointer to devfreq device. + */ +struct thermal_cooling_device * +of_devfreq_cooling_register(struct device_node *np, struct devfreq *df) +{ + return of_devfreq_cooling_register_power(np, df, NULL); +} +EXPORT_SYMBOL_GPL(of_devfreq_cooling_register); + +/** + * devfreq_cooling_register() - Register devfreq cooling device. + * @df: Pointer to devfreq device. + */ +struct thermal_cooling_device *devfreq_cooling_register(struct devfreq *df) +{ + return of_devfreq_cooling_register(NULL, df); +} +EXPORT_SYMBOL_GPL(devfreq_cooling_register); + +/** + * devfreq_cooling_unregister() - Unregister devfreq cooling device. + * @dfc: Pointer to devfreq cooling device to unregister. + */ +void devfreq_cooling_unregister(struct thermal_cooling_device *cdev) +{ + struct devfreq_cooling_device *dfc; + + if (!cdev) + return; + + dfc = cdev->devdata; + + thermal_cooling_device_unregister(dfc->cdev); + release_idr(&devfreq_idr, dfc->id); + kfree(dfc->power_table); + kfree(dfc->freq_table); + + kfree(dfc); +} +EXPORT_SYMBOL_GPL(devfreq_cooling_unregister); diff --git a/drivers/thermal/imx_thermal.c b/drivers/thermal/imx_thermal.c index 4bec1d3c3d27..c8fe3cac2e0e 100644 --- a/drivers/thermal/imx_thermal.c +++ b/drivers/thermal/imx_thermal.c @@ -288,7 +288,7 @@ static int imx_set_trip_temp(struct thermal_zone_device *tz, int trip, if (trip == IMX_TRIP_CRITICAL) return -EPERM; - if (temp > IMX_TEMP_PASSIVE) + if (temp < 0 || temp > IMX_TEMP_PASSIVE) return -EINVAL; data->temp_passive = temp; @@ -487,14 +487,6 @@ static int imx_thermal_probe(struct platform_device *pdev) if (data->irq < 0) return data->irq; - ret = devm_request_threaded_irq(&pdev->dev, data->irq, - imx_thermal_alarm_irq, imx_thermal_alarm_irq_thread, - 0, "imx_thermal", data); - if (ret < 0) { - dev_err(&pdev->dev, "failed to request alarm irq: %d\n", ret); - return ret; - } - platform_set_drvdata(pdev, data); ret = imx_get_sensor_data(pdev); @@ -571,6 +563,17 @@ static int imx_thermal_probe(struct platform_device *pdev) regmap_write(map, TEMPSENSE0 + REG_CLR, TEMPSENSE0_POWER_DOWN); regmap_write(map, TEMPSENSE0 + REG_SET, TEMPSENSE0_MEASURE_TEMP); + ret = devm_request_threaded_irq(&pdev->dev, data->irq, + imx_thermal_alarm_irq, imx_thermal_alarm_irq_thread, + 0, "imx_thermal", data); + if (ret < 0) { + dev_err(&pdev->dev, "failed to request alarm irq: %d\n", ret); + clk_disable_unprepare(data->thermal_clk); + thermal_zone_device_unregister(data->tz); + cpufreq_cooling_unregister(data->cdev); + return ret; + } + data->irq_enabled = true; data->mode = THERMAL_DEVICE_ENABLED; diff --git a/drivers/thermal/rockchip_thermal.c b/drivers/thermal/rockchip_thermal.c index c89ffb26a354..9787e8aa509f 100644 --- a/drivers/thermal/rockchip_thermal.c +++ b/drivers/thermal/rockchip_thermal.c @@ -22,6 +22,7 @@ #include <linux/platform_device.h> #include <linux/reset.h> #include <linux/thermal.h> +#include <linux/pinctrl/consumer.h> /** * If the temperature over a period of time High, @@ -106,16 +107,14 @@ struct rockchip_thermal_data { #define TSADCV2_AUTO_PERIOD_HT 0x6c #define TSADCV2_AUTO_EN BIT(0) -#define TSADCV2_AUTO_DISABLE ~BIT(0) #define TSADCV2_AUTO_SRC_EN(chn) BIT(4 + (chn)) #define TSADCV2_AUTO_TSHUT_POLARITY_HIGH BIT(8) -#define TSADCV2_AUTO_TSHUT_POLARITY_LOW ~BIT(8) #define TSADCV2_INT_SRC_EN(chn) BIT(chn) #define TSADCV2_SHUT_2GPIO_SRC_EN(chn) BIT(4 + (chn)) #define TSADCV2_SHUT_2CRU_SRC_EN(chn) BIT(8 + (chn)) -#define TSADCV2_INT_PD_CLEAR ~BIT(8) +#define TSADCV2_INT_PD_CLEAR_MASK ~BIT(8) #define TSADCV2_DATA_MASK 0xfff #define TSADCV2_HIGHT_INT_DEBOUNCE_COUNT 4 @@ -124,7 +123,7 @@ struct rockchip_thermal_data { #define TSADCV2_AUTO_PERIOD_HT_TIME 50 /* msec */ struct tsadc_table { - unsigned long code; + u32 code; long temp; }; @@ -164,7 +163,6 @@ static const struct tsadc_table v2_code_table[] = { {3452, 115000}, {3437, 120000}, {3421, 125000}, - {0, 125000}, }; static u32 rk_tsadcv2_temp_to_code(long temp) @@ -191,19 +189,21 @@ static u32 rk_tsadcv2_temp_to_code(long temp) return 0; } -static int rk_tsadcv2_code_to_temp(u32 code) +static int rk_tsadcv2_code_to_temp(u32 code, int *temp) { - unsigned int low = 0; + unsigned int low = 1; unsigned int high = ARRAY_SIZE(v2_code_table) - 1; unsigned int mid = (low + high) / 2; unsigned int num; unsigned long denom; - /* Invalid code, return -EAGAIN */ - if (code > TSADCV2_DATA_MASK) - return -EAGAIN; + BUILD_BUG_ON(ARRAY_SIZE(v2_code_table) < 2); - while (low <= high && mid) { + code &= TSADCV2_DATA_MASK; + if (code < v2_code_table[high].code) + return -EAGAIN; /* Incorrect reading */ + + while (low <= high) { if (code >= v2_code_table[mid].code && code < v2_code_table[mid - 1].code) break; @@ -223,7 +223,9 @@ static int rk_tsadcv2_code_to_temp(u32 code) num = v2_code_table[mid].temp - v2_code_table[mid - 1].temp; num *= v2_code_table[mid - 1].code - code; denom = v2_code_table[mid - 1].code - v2_code_table[mid].code; - return v2_code_table[mid - 1].temp + (num / denom); + *temp = v2_code_table[mid - 1].temp + (num / denom); + + return 0; } /** @@ -241,10 +243,10 @@ static void rk_tsadcv2_initialize(void __iomem *regs, enum tshut_polarity tshut_polarity) { if (tshut_polarity == TSHUT_HIGH_ACTIVE) - writel_relaxed(0 | (TSADCV2_AUTO_TSHUT_POLARITY_HIGH), + writel_relaxed(0U | TSADCV2_AUTO_TSHUT_POLARITY_HIGH, regs + TSADCV2_AUTO_CON); else - writel_relaxed(0 | (TSADCV2_AUTO_TSHUT_POLARITY_LOW), + writel_relaxed(0U & ~TSADCV2_AUTO_TSHUT_POLARITY_HIGH, regs + TSADCV2_AUTO_CON); writel_relaxed(TSADCV2_AUTO_PERIOD_TIME, regs + TSADCV2_AUTO_PERIOD); @@ -261,7 +263,7 @@ static void rk_tsadcv2_irq_ack(void __iomem *regs) u32 val; val = readl_relaxed(regs + TSADCV2_INT_PD); - writel_relaxed(val & TSADCV2_INT_PD_CLEAR, regs + TSADCV2_INT_PD); + writel_relaxed(val & TSADCV2_INT_PD_CLEAR_MASK, regs + TSADCV2_INT_PD); } static void rk_tsadcv2_control(void __iomem *regs, bool enable) @@ -281,14 +283,9 @@ static int rk_tsadcv2_get_temp(int chn, void __iomem *regs, int *temp) { u32 val; - /* the A/D value of the channel last conversion need some time */ val = readl_relaxed(regs + TSADCV2_DATA(chn)); - if (val == 0) - return -EAGAIN; - *temp = rk_tsadcv2_code_to_temp(val); - - return 0; + return rk_tsadcv2_code_to_temp(val, temp); } static void rk_tsadcv2_tshut_temp(int chn, void __iomem *regs, long temp) @@ -642,6 +639,8 @@ static int __maybe_unused rockchip_thermal_suspend(struct device *dev) clk_disable(thermal->pclk); clk_disable(thermal->clk); + pinctrl_pm_select_sleep_state(dev); + return 0; } @@ -678,6 +677,8 @@ static int __maybe_unused rockchip_thermal_resume(struct device *dev) for (i = 0; i < ARRAY_SIZE(thermal->sensors); i++) rockchip_thermal_toggle_sensor(&thermal->sensors[i], true); + pinctrl_pm_select_default_state(dev); + return 0; } diff --git a/drivers/thermal/samsung/exynos_tmu.c b/drivers/thermal/samsung/exynos_tmu.c index ca920b0ecf8f..fa61eff88496 100644 --- a/drivers/thermal/samsung/exynos_tmu.c +++ b/drivers/thermal/samsung/exynos_tmu.c @@ -548,7 +548,7 @@ static int exynos5433_tmu_initialize(struct platform_device *pdev) default: pdata->cal_type = TYPE_ONE_POINT_TRIMMING; break; - }; + } dev_info(&pdev->dev, "Calibration type is %d-point calibration\n", cal_type ? 2 : 1); @@ -608,7 +608,7 @@ static int exynos5440_tmu_initialize(struct platform_device *pdev) { struct exynos_tmu_data *data = platform_get_drvdata(pdev); unsigned int trim_info = 0, con, rising_threshold; - int ret = 0, threshold_code; + int threshold_code; int crit_temp = 0; /* @@ -651,7 +651,8 @@ static int exynos5440_tmu_initialize(struct platform_device *pdev) /* Clear the PMIN in the common TMU register */ if (!data->id) writel(0, data->base_second + EXYNOS5440_TMU_PMIN); - return ret; + + return 0; } static int exynos7_tmu_initialize(struct platform_device *pdev) @@ -1168,27 +1169,10 @@ static int exynos_map_dt_data(struct platform_device *pdev) struct exynos_tmu_data *data = platform_get_drvdata(pdev); struct exynos_tmu_platform_data *pdata; struct resource res; - int ret; if (!data || !pdev->dev.of_node) return -ENODEV; - /* - * Try enabling the regulator if found - * TODO: Add regulator as an SOC feature, so that regulator enable - * is a compulsory call. - */ - data->regulator = devm_regulator_get(&pdev->dev, "vtmu"); - if (!IS_ERR(data->regulator)) { - ret = regulator_enable(data->regulator); - if (ret) { - dev_err(&pdev->dev, "failed to enable vtmu\n"); - return ret; - } - } else { - dev_info(&pdev->dev, "Regulator node (vtmu) not found\n"); - } - data->id = of_alias_get_id(pdev->dev.of_node, "tmuctrl"); if (data->id < 0) data->id = 0; @@ -1306,12 +1290,22 @@ static int exynos_tmu_probe(struct platform_device *pdev) platform_set_drvdata(pdev, data); mutex_init(&data->lock); - data->tzd = thermal_zone_of_sensor_register(&pdev->dev, 0, data, - &exynos_sensor_ops); - if (IS_ERR(data->tzd)) { - pr_err("thermal: tz: %p ERROR\n", data->tzd); - return PTR_ERR(data->tzd); + /* + * Try enabling the regulator if found + * TODO: Add regulator as an SOC feature, so that regulator enable + * is a compulsory call. + */ + data->regulator = devm_regulator_get(&pdev->dev, "vtmu"); + if (!IS_ERR(data->regulator)) { + ret = regulator_enable(data->regulator); + if (ret) { + dev_err(&pdev->dev, "failed to enable vtmu\n"); + return ret; + } + } else { + dev_info(&pdev->dev, "Regulator node (vtmu) not found\n"); } + ret = exynos_map_dt_data(pdev); if (ret) goto err_sensor; @@ -1363,23 +1357,38 @@ static int exynos_tmu_probe(struct platform_device *pdev) break; default: break; - }; + } + + /* + * data->tzd must be registered before calling exynos_tmu_initialize(), + * requesting irq and calling exynos_tmu_control(). + */ + data->tzd = thermal_zone_of_sensor_register(&pdev->dev, 0, data, + &exynos_sensor_ops); + if (IS_ERR(data->tzd)) { + ret = PTR_ERR(data->tzd); + dev_err(&pdev->dev, "Failed to register sensor: %d\n", ret); + goto err_sclk; + } ret = exynos_tmu_initialize(pdev); if (ret) { dev_err(&pdev->dev, "Failed to initialize TMU\n"); - goto err_sclk; + goto err_thermal; } ret = devm_request_irq(&pdev->dev, data->irq, exynos_tmu_irq, IRQF_TRIGGER_RISING | IRQF_SHARED, dev_name(&pdev->dev), data); if (ret) { dev_err(&pdev->dev, "Failed to request irq: %d\n", data->irq); - goto err_sclk; + goto err_thermal; } exynos_tmu_control(pdev, true); return 0; + +err_thermal: + thermal_zone_of_sensor_unregister(&pdev->dev, data->tzd); err_sclk: clk_disable_unprepare(data->sclk); err_clk: @@ -1388,9 +1397,8 @@ err_clk_sec: if (!IS_ERR(data->clk_sec)) clk_unprepare(data->clk_sec); err_sensor: - if (!IS_ERR_OR_NULL(data->regulator)) + if (!IS_ERR(data->regulator)) regulator_disable(data->regulator); - thermal_zone_of_sensor_unregister(&pdev->dev, data->tzd); return ret; } diff --git a/drivers/thermal/ti-soc-thermal/Kconfig b/drivers/thermal/ti-soc-thermal/Kconfig index cb6686ff09ae..ea8283f08aa6 100644 --- a/drivers/thermal/ti-soc-thermal/Kconfig +++ b/drivers/thermal/ti-soc-thermal/Kconfig @@ -19,6 +19,21 @@ config TI_THERMAL This includes trip points definitions, extrapolation rules and CPU cooling device bindings. +config OMAP3_THERMAL + bool "Texas Instruments OMAP3 thermal support" + depends on TI_SOC_THERMAL + depends on ARCH_OMAP3 || COMPILE_TEST + help + If you say yes here you get thermal support for the Texas Instruments + OMAP3 SoC family. The current chips supported are: + - OMAP3430 + + OMAP3 chips normally don't need thermal management, and sensors in + this generation are not accurate, nor they are very close to + the important hotspots. + + Say 'N' here. + config OMAP4_THERMAL bool "Texas Instruments OMAP4 thermal support" depends on TI_SOC_THERMAL diff --git a/drivers/thermal/ti-soc-thermal/Makefile b/drivers/thermal/ti-soc-thermal/Makefile index 1226b2484e55..0f89bdf03790 100644 --- a/drivers/thermal/ti-soc-thermal/Makefile +++ b/drivers/thermal/ti-soc-thermal/Makefile @@ -2,5 +2,6 @@ obj-$(CONFIG_TI_SOC_THERMAL) += ti-soc-thermal.o ti-soc-thermal-y := ti-bandgap.o ti-soc-thermal-$(CONFIG_TI_THERMAL) += ti-thermal-common.o ti-soc-thermal-$(CONFIG_DRA752_THERMAL) += dra752-thermal-data.o +ti-soc-thermal-$(CONFIG_OMAP3_THERMAL) += omap3-thermal-data.o ti-soc-thermal-$(CONFIG_OMAP4_THERMAL) += omap4-thermal-data.o ti-soc-thermal-$(CONFIG_OMAP5_THERMAL) += omap5-thermal-data.o diff --git a/drivers/thermal/ti-soc-thermal/omap3-thermal-data.c b/drivers/thermal/ti-soc-thermal/omap3-thermal-data.c new file mode 100644 index 000000000000..3ee34340edab --- /dev/null +++ b/drivers/thermal/ti-soc-thermal/omap3-thermal-data.c @@ -0,0 +1,176 @@ +/* + * OMAP3 thermal driver. + * + * Copyright (C) 2011-2012 Texas Instruments Inc. + * Copyright (C) 2014 Pavel Machek <pavel@ucw.cz> + * + * This software is licensed under the terms of the GNU General Public + * License version 2, as published by the Free Software Foundation, and + * may be copied, distributed, and modified under those terms. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * Note + * http://www.ti.com/lit/er/sprz278f/sprz278f.pdf "Advisory + * 3.1.1.186 MMC OCP Clock Not Gated When Thermal Sensor Is Used" + * + * Also TI says: + * Just be careful when you try to make thermal policy like decisions + * based on this sensor. Placement of the sensor w.r.t the actual logic + * generating heat has to be a factor as well. If you are just looking + * for an approximation temperature (thermometerish kind), you might be + * ok with this. I am not sure we'd find any TI data around this.. just a + * heads up. + */ + +#include "ti-thermal.h" +#include "ti-bandgap.h" + +/* + * OMAP34XX has one instance of thermal sensor for MPU + * need to describe the individual bit fields + */ +static struct temp_sensor_registers +omap34xx_mpu_temp_sensor_registers = { + .temp_sensor_ctrl = 0, + .bgap_soc_mask = BIT(8), + .bgap_eocz_mask = BIT(7), + .bgap_dtemp_mask = 0x7f, + + .bgap_mode_ctrl = 0, + .mode_ctrl_mask = BIT(9), +}; + +/* Thresholds and limits for OMAP34XX MPU temperature sensor */ +static struct temp_sensor_data omap34xx_mpu_temp_sensor_data = { + .min_freq = 32768, + .max_freq = 32768, + .max_temp = 125000, + .min_temp = -40000, + .hyst_val = 5000, +}; + +/* + * Temperature values in milli degree celsius + */ +static const int +omap34xx_adc_to_temp[128] = { + -40000, -40000, -40000, -40000, -40000, -39000, -38000, -36000, + -34000, -32000, -31000, -29000, -28000, -26000, -25000, -24000, + -22000, -21000, -19000, -18000, -17000, -15000, -14000, -12000, + -11000, -9000, -8000, -7000, -5000, -4000, -2000, -1000, 0000, + 1000, 3000, 4000, 5000, 7000, 8000, 10000, 11000, 13000, 14000, + 15000, 17000, 18000, 20000, 21000, 22000, 24000, 25000, 27000, + 28000, 30000, 31000, 32000, 34000, 35000, 37000, 38000, 39000, + 41000, 42000, 44000, 45000, 47000, 48000, 49000, 51000, 52000, + 53000, 55000, 56000, 58000, 59000, 60000, 62000, 63000, 65000, + 66000, 67000, 69000, 70000, 72000, 73000, 74000, 76000, 77000, + 79000, 80000, 81000, 83000, 84000, 85000, 87000, 88000, 89000, + 91000, 92000, 94000, 95000, 96000, 98000, 99000, 100000, + 102000, 103000, 105000, 106000, 107000, 109000, 110000, 111000, + 113000, 114000, 116000, 117000, 118000, 120000, 121000, 122000, + 124000, 124000, 125000, 125000, 125000, 125000, 125000 +}; + +/* OMAP34XX data */ +const struct ti_bandgap_data omap34xx_data = { + .features = TI_BANDGAP_FEATURE_CLK_CTRL | TI_BANDGAP_FEATURE_UNRELIABLE, + .fclock_name = "ts_fck", + .div_ck_name = "ts_fck", + .conv_table = omap34xx_adc_to_temp, + .adc_start_val = 0, + .adc_end_val = 127, + .expose_sensor = ti_thermal_expose_sensor, + .remove_sensor = ti_thermal_remove_sensor, + + .sensors = { + { + .registers = &omap34xx_mpu_temp_sensor_registers, + .ts_data = &omap34xx_mpu_temp_sensor_data, + .domain = "cpu", + .slope = 0, + .constant = 20000, + .slope_pcb = 0, + .constant_pcb = 20000, + .register_cooling = NULL, + .unregister_cooling = NULL, + }, + }, + .sensor_count = 1, +}; + +/* + * OMAP36XX has one instance of thermal sensor for MPU + * need to describe the individual bit fields + */ +static struct temp_sensor_registers +omap36xx_mpu_temp_sensor_registers = { + .temp_sensor_ctrl = 0, + .bgap_soc_mask = BIT(9), + .bgap_eocz_mask = BIT(8), + .bgap_dtemp_mask = 0xFF, + + .bgap_mode_ctrl = 0, + .mode_ctrl_mask = BIT(10), +}; + +/* Thresholds and limits for OMAP36XX MPU temperature sensor */ +static struct temp_sensor_data omap36xx_mpu_temp_sensor_data = { + .min_freq = 32768, + .max_freq = 32768, + .max_temp = 125000, + .min_temp = -40000, + .hyst_val = 5000, +}; + +/* + * Temperature values in milli degree celsius + */ +static const int +omap36xx_adc_to_temp[128] = { + -40000, -40000, -40000, -40000, -40000, -40000, -40000, -40000, + -40000, -40000, -40000, -40000, -40000, -38000, -35000, -34000, + -32000, -30000, -28000, -26000, -24000, -22000, -20000, -18500, + -17000, -15000, -13500, -12000, -10000, -8000, -6500, -5000, -3500, + -1500, 0, 2000, 3500, 5000, 6500, 8500, 10000, 12000, 13500, + 15000, 17000, 19000, 21000, 23000, 25000, 27000, 28500, 30000, + 32000, 33500, 35000, 37000, 38500, 40000, 42000, 43500, 45000, + 47000, 48500, 50000, 52000, 53500, 55000, 57000, 58500, 60000, + 62000, 64000, 66000, 68000, 70000, 71500, 73500, 75000, 77000, + 78500, 80000, 82000, 83500, 85000, 87000, 88500, 90000, 92000, + 93500, 95000, 97000, 98500, 100000, 102000, 103500, 105000, 107000, + 109000, 111000, 113000, 115000, 117000, 118500, 120000, 122000, + 123500, 125000, 125000, 125000, 125000, 125000, 125000, 125000, + 125000, 125000, 125000, 125000, 125000, 125000, 125000, 125000, + 125000, 125000, 125000, 125000, 125000, 125000, 125000 +}; + +/* OMAP36XX data */ +const struct ti_bandgap_data omap36xx_data = { + .features = TI_BANDGAP_FEATURE_CLK_CTRL | TI_BANDGAP_FEATURE_UNRELIABLE, + .fclock_name = "ts_fck", + .div_ck_name = "ts_fck", + .conv_table = omap36xx_adc_to_temp, + .adc_start_val = 0, + .adc_end_val = 127, + .expose_sensor = ti_thermal_expose_sensor, + .remove_sensor = ti_thermal_remove_sensor, + + .sensors = { + { + .registers = &omap36xx_mpu_temp_sensor_registers, + .ts_data = &omap36xx_mpu_temp_sensor_data, + .domain = "cpu", + .slope = 0, + .constant = 20000, + .slope_pcb = 0, + .constant_pcb = 20000, + .register_cooling = NULL, + .unregister_cooling = NULL, + }, + }, + .sensor_count = 1, +}; diff --git a/drivers/thermal/ti-soc-thermal/ti-bandgap.c b/drivers/thermal/ti-soc-thermal/ti-bandgap.c index 10c47c048f7a..1e34a1efc554 100644 --- a/drivers/thermal/ti-soc-thermal/ti-bandgap.c +++ b/drivers/thermal/ti-soc-thermal/ti-bandgap.c @@ -1274,6 +1274,10 @@ int ti_bandgap_probe(struct platform_device *pdev) } bgp->dev = &pdev->dev; + if (TI_BANDGAP_HAS(bgp, UNRELIABLE)) + dev_warn(&pdev->dev, + "This OMAP thermal sensor is unreliable. You've been warned\n"); + if (TI_BANDGAP_HAS(bgp, TSHUT)) { ret = ti_bandgap_tshut_init(bgp, pdev); if (ret) { @@ -1579,6 +1583,16 @@ static SIMPLE_DEV_PM_OPS(ti_bandgap_dev_pm_ops, ti_bandgap_suspend, #endif static const struct of_device_id of_ti_bandgap_match[] = { +#ifdef CONFIG_OMAP3_THERMAL + { + .compatible = "ti,omap34xx-bandgap", + .data = (void *)&omap34xx_data, + }, + { + .compatible = "ti,omap36xx-bandgap", + .data = (void *)&omap36xx_data, + }, +#endif #ifdef CONFIG_OMAP4_THERMAL { .compatible = "ti,omap4430-bandgap", diff --git a/drivers/thermal/ti-soc-thermal/ti-bandgap.h b/drivers/thermal/ti-soc-thermal/ti-bandgap.h index 0c52f7afba00..fe0adb898764 100644 --- a/drivers/thermal/ti-soc-thermal/ti-bandgap.h +++ b/drivers/thermal/ti-soc-thermal/ti-bandgap.h @@ -322,6 +322,8 @@ struct ti_temp_sensor { * has Errata 814 * TI_BANDGAP_FEATURE_ERRATA_813 - used to workaorund when the bandgap device * has Errata 813 + * TI_BANDGAP_FEATURE_UNRELIABLE - used when the sensor readings are too + * inaccurate. * TI_BANDGAP_HAS(b, f) - macro to check if a bandgap device is capable of a * specific feature (above) or not. Return non-zero, if yes. */ @@ -337,6 +339,7 @@ struct ti_temp_sensor { #define TI_BANDGAP_FEATURE_HISTORY_BUFFER BIT(9) #define TI_BANDGAP_FEATURE_ERRATA_814 BIT(10) #define TI_BANDGAP_FEATURE_ERRATA_813 BIT(11) +#define TI_BANDGAP_FEATURE_UNRELIABLE BIT(12) #define TI_BANDGAP_HAS(b, f) \ ((b)->conf->features & TI_BANDGAP_FEATURE_ ## f) @@ -390,6 +393,14 @@ int ti_bandgap_set_sensor_data(struct ti_bandgap *bgp, int id, void *data); void *ti_bandgap_get_sensor_data(struct ti_bandgap *bgp, int id); int ti_bandgap_get_trend(struct ti_bandgap *bgp, int id, int *trend); +#ifdef CONFIG_OMAP3_THERMAL +extern const struct ti_bandgap_data omap34xx_data; +extern const struct ti_bandgap_data omap36xx_data; +#else +#define omap34xx_data NULL +#define omap36xx_data NULL +#endif + #ifdef CONFIG_OMAP4_THERMAL extern const struct ti_bandgap_data omap4430_data; extern const struct ti_bandgap_data omap4460_data; diff --git a/fs/9p/cache.h b/fs/9p/cache.h index 2f9675491095..247e47e54bcc 100644 --- a/fs/9p/cache.h +++ b/fs/9p/cache.h @@ -21,6 +21,7 @@ */ #ifndef _9P_CACHE_H +#define _9P_CACHE_H #ifdef CONFIG_9P_FSCACHE #include <linux/fscache.h> #include <linux/spinlock.h> diff --git a/fs/binfmt_elf.c b/fs/binfmt_elf.c index 5f399ea1d20a..3a93755e880f 100644 --- a/fs/binfmt_elf.c +++ b/fs/binfmt_elf.c @@ -488,7 +488,7 @@ static inline int arch_elf_pt_proc(struct elfhdr *ehdr, } /** - * arch_check_elf() - check a PT_LOPROC..PT_HIPROC ELF program header + * arch_check_elf() - check an ELF executable * @ehdr: The main ELF header * @has_interp: True if the ELF has an interpreter, else false. * @state: Architecture-specific state preserved throughout the process @@ -760,16 +760,16 @@ static int load_elf_binary(struct linux_binprm *bprm) */ would_dump(bprm, interpreter); - retval = kernel_read(interpreter, 0, bprm->buf, - BINPRM_BUF_SIZE); - if (retval != BINPRM_BUF_SIZE) { + /* Get the exec headers */ + retval = kernel_read(interpreter, 0, + (void *)&loc->interp_elf_ex, + sizeof(loc->interp_elf_ex)); + if (retval != sizeof(loc->interp_elf_ex)) { if (retval >= 0) retval = -EIO; goto out_free_dentry; } - /* Get the exec headers */ - loc->interp_elf_ex = *((struct elfhdr *)bprm->buf); break; } elf_ppnt++; diff --git a/fs/buffer.c b/fs/buffer.c index 51aff0296ce2..4f4cd959da7c 100644 --- a/fs/buffer.c +++ b/fs/buffer.c @@ -2420,9 +2420,9 @@ EXPORT_SYMBOL(block_commit_write); * unlock the page. * * Direct callers of this function should protect against filesystem freezing - * using sb_start_write() - sb_end_write() functions. + * using sb_start_pagefault() - sb_end_pagefault() functions. */ -int __block_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf, +int block_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf, get_block_t get_block) { struct page *page = vmf->page; @@ -2459,26 +2459,6 @@ out_unlock: unlock_page(page); return ret; } -EXPORT_SYMBOL(__block_page_mkwrite); - -int block_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf, - get_block_t get_block) -{ - int ret; - struct super_block *sb = file_inode(vma->vm_file)->i_sb; - - sb_start_pagefault(sb); - - /* - * Update file times before taking page lock. We may end up failing the - * fault so this update may be superfluous but who really cares... - */ - file_update_time(vma->vm_file); - - ret = __block_page_mkwrite(vma, vmf, get_block); - sb_end_pagefault(sb); - return block_page_mkwrite_return(ret); -} EXPORT_SYMBOL(block_page_mkwrite); /* diff --git a/fs/cachefiles/namei.c b/fs/cachefiles/namei.c index fc1056f5c96a..c4b893453e0e 100644 --- a/fs/cachefiles/namei.c +++ b/fs/cachefiles/namei.c @@ -655,6 +655,8 @@ lookup_again: aops = d_backing_inode(object->dentry)->i_mapping->a_ops; if (!aops->bmap) goto check_error; + if (object->dentry->d_sb->s_blocksize > PAGE_SIZE) + goto check_error; object->backer = object->dentry; } else { diff --git a/fs/cachefiles/rdwr.c b/fs/cachefiles/rdwr.c index 3cbb0e834694..7a6b02f72787 100644 --- a/fs/cachefiles/rdwr.c +++ b/fs/cachefiles/rdwr.c @@ -414,9 +414,6 @@ int cachefiles_read_or_alloc_page(struct fscache_retrieval *op, ASSERT(inode->i_mapping->a_ops->readpages); /* calculate the shift required to use bmap */ - if (inode->i_sb->s_blocksize > PAGE_SIZE) - goto enobufs; - shift = PAGE_SHIFT - inode->i_sb->s_blocksize_bits; op->op.flags &= FSCACHE_OP_KEEP_FLAGS; @@ -711,9 +708,6 @@ int cachefiles_read_or_alloc_pages(struct fscache_retrieval *op, ASSERT(inode->i_mapping->a_ops->readpages); /* calculate the shift required to use bmap */ - if (inode->i_sb->s_blocksize > PAGE_SIZE) - goto all_enobufs; - shift = PAGE_SHIFT - inode->i_sb->s_blocksize_bits; pagevec_init(&pagevec, 0); @@ -905,6 +899,15 @@ int cachefiles_write_page(struct fscache_storage *op, struct page *page) cache = container_of(object->fscache.cache, struct cachefiles_cache, cache); + pos = (loff_t)page->index << PAGE_SHIFT; + + /* We mustn't write more data than we have, so we have to beware of a + * partial page at EOF. + */ + eof = object->fscache.store_limit_l; + if (pos >= eof) + goto error; + /* write the page to the backing filesystem and let it store it in its * own time */ path.mnt = cache->mnt; @@ -912,40 +915,38 @@ int cachefiles_write_page(struct fscache_storage *op, struct page *page) file = dentry_open(&path, O_RDWR | O_LARGEFILE, cache->cache_cred); if (IS_ERR(file)) { ret = PTR_ERR(file); - } else { - pos = (loff_t) page->index << PAGE_SHIFT; - - /* we mustn't write more data than we have, so we have - * to beware of a partial page at EOF */ - eof = object->fscache.store_limit_l; - len = PAGE_SIZE; - if (eof & ~PAGE_MASK) { - ASSERTCMP(pos, <, eof); - if (eof - pos < PAGE_SIZE) { - _debug("cut short %llx to %llx", - pos, eof); - len = eof - pos; - ASSERTCMP(pos + len, ==, eof); - } - } - - data = kmap(page); - ret = __kernel_write(file, data, len, &pos); - kunmap(page); - if (ret != len) - ret = -EIO; - fput(file); + goto error_2; } - if (ret < 0) { - if (ret == -EIO) - cachefiles_io_error_obj( - object, "Write page to backing file failed"); - ret = -ENOBUFS; + len = PAGE_SIZE; + if (eof & ~PAGE_MASK) { + if (eof - pos < PAGE_SIZE) { + _debug("cut short %llx to %llx", + pos, eof); + len = eof - pos; + ASSERTCMP(pos + len, ==, eof); + } } - _leave(" = %d", ret); - return ret; + data = kmap(page); + ret = __kernel_write(file, data, len, &pos); + kunmap(page); + fput(file); + if (ret != len) + goto error_eio; + + _leave(" = 0"); + return 0; + +error_eio: + ret = -EIO; +error_2: + if (ret == -EIO) + cachefiles_io_error_obj(object, + "Write page to backing file failed"); +error: + _leave(" = -ENOBUFS [%d]", ret); + return -ENOBUFS; } /* @@ -29,6 +29,11 @@ #include <linux/uio.h> #include <linux/vmstat.h> +/* + * dax_clear_blocks() is called from within transaction context from XFS, + * and hence this means the stack from this point must follow GFP_NOFS + * semantics for all operations. + */ int dax_clear_blocks(struct inode *inode, sector_t block, long size) { struct block_device *bdev = inode->i_sb->s_bdev; diff --git a/fs/debugfs/inode.c b/fs/debugfs/inode.c index 5d8f35f1382a..b7fcc0de0b2f 100644 --- a/fs/debugfs/inode.c +++ b/fs/debugfs/inode.c @@ -271,8 +271,12 @@ static struct dentry *start_creating(const char *name, struct dentry *parent) dput(dentry); dentry = ERR_PTR(-EEXIST); } - if (IS_ERR(dentry)) + + if (IS_ERR(dentry)) { mutex_unlock(&d_inode(parent)->i_mutex); + simple_release_fs(&debugfs_mount, &debugfs_mount_count); + } + return dentry; } diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c index 7d1aad1d9313..ea433a7f4bca 100644 --- a/fs/ext4/inode.c +++ b/fs/ext4/inode.c @@ -5283,7 +5283,7 @@ int ext4_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf) !ext4_should_journal_data(inode) && !ext4_nonda_switch(inode->i_sb)) { do { - ret = __block_page_mkwrite(vma, vmf, + ret = block_page_mkwrite(vma, vmf, ext4_da_get_block_prep); } while (ret == -ENOSPC && ext4_should_retry_alloc(inode->i_sb, &retries)); @@ -5330,7 +5330,7 @@ retry_alloc: ret = VM_FAULT_SIGBUS; goto out; } - ret = __block_page_mkwrite(vma, vmf, get_block); + ret = block_page_mkwrite(vma, vmf, get_block); if (!ret && ext4_should_journal_data(inode)) { if (ext4_walk_page_buffers(handle, page_buffers(page), 0, PAGE_CACHE_SIZE, NULL, do_journal_get_write_access)) { diff --git a/fs/fscache/netfs.c b/fs/fscache/netfs.c index 6d941f56faf4..9b28649df3a1 100644 --- a/fs/fscache/netfs.c +++ b/fs/fscache/netfs.c @@ -22,6 +22,7 @@ static LIST_HEAD(fscache_netfs_list); int __fscache_register_netfs(struct fscache_netfs *netfs) { struct fscache_netfs *ptr; + struct fscache_cookie *cookie; int ret; _enter("{%s}", netfs->name); @@ -29,29 +30,25 @@ int __fscache_register_netfs(struct fscache_netfs *netfs) INIT_LIST_HEAD(&netfs->link); /* allocate a cookie for the primary index */ - netfs->primary_index = - kmem_cache_zalloc(fscache_cookie_jar, GFP_KERNEL); + cookie = kmem_cache_zalloc(fscache_cookie_jar, GFP_KERNEL); - if (!netfs->primary_index) { + if (!cookie) { _leave(" = -ENOMEM"); return -ENOMEM; } /* initialise the primary index cookie */ - atomic_set(&netfs->primary_index->usage, 1); - atomic_set(&netfs->primary_index->n_children, 0); - atomic_set(&netfs->primary_index->n_active, 1); + atomic_set(&cookie->usage, 1); + atomic_set(&cookie->n_children, 0); + atomic_set(&cookie->n_active, 1); - netfs->primary_index->def = &fscache_fsdef_netfs_def; - netfs->primary_index->parent = &fscache_fsdef_index; - netfs->primary_index->netfs_data = netfs; - netfs->primary_index->flags = 1 << FSCACHE_COOKIE_ENABLED; + cookie->def = &fscache_fsdef_netfs_def; + cookie->parent = &fscache_fsdef_index; + cookie->netfs_data = netfs; + cookie->flags = 1 << FSCACHE_COOKIE_ENABLED; - atomic_inc(&netfs->primary_index->parent->usage); - atomic_inc(&netfs->primary_index->parent->n_children); - - spin_lock_init(&netfs->primary_index->lock); - INIT_HLIST_HEAD(&netfs->primary_index->backing_objects); + spin_lock_init(&cookie->lock); + INIT_HLIST_HEAD(&cookie->backing_objects); /* check the netfs type is not already present */ down_write(&fscache_addremove_sem); @@ -62,6 +59,10 @@ int __fscache_register_netfs(struct fscache_netfs *netfs) goto already_registered; } + atomic_inc(&cookie->parent->usage); + atomic_inc(&cookie->parent->n_children); + + netfs->primary_index = cookie; list_add(&netfs->link, &fscache_netfs_list); ret = 0; @@ -70,11 +71,8 @@ int __fscache_register_netfs(struct fscache_netfs *netfs) already_registered: up_write(&fscache_addremove_sem); - if (ret < 0) { - netfs->primary_index->parent = NULL; - __fscache_cookie_put(netfs->primary_index); - netfs->primary_index = NULL; - } + if (ret < 0) + kmem_cache_free(fscache_cookie_jar, cookie); _leave(" = %d", ret); return ret; diff --git a/fs/fscache/page.c b/fs/fscache/page.c index 79483b3d8c6f..6b35fc4860a0 100644 --- a/fs/fscache/page.c +++ b/fs/fscache/page.c @@ -816,7 +816,7 @@ static void fscache_write_op(struct fscache_operation *_op) goto superseded; page = results[0]; _debug("gang %d [%lx]", n, page->index); - if (page->index > op->store_limit) { + if (page->index >= op->store_limit) { fscache_stat(&fscache_n_store_pages_over_limit); goto superseded; } diff --git a/fs/lockd/host.c b/fs/lockd/host.c index 969d589c848d..d716c9993a26 100644 --- a/fs/lockd/host.c +++ b/fs/lockd/host.c @@ -116,7 +116,7 @@ static struct nlm_host *nlm_alloc_host(struct nlm_lookup_host_info *ni, atomic_inc(&nsm->sm_count); else { host = NULL; - nsm = nsm_get_handle(ni->sap, ni->salen, + nsm = nsm_get_handle(ni->net, ni->sap, ni->salen, ni->hostname, ni->hostname_len); if (unlikely(nsm == NULL)) { dprintk("lockd: %s failed; no nsm handle\n", @@ -161,6 +161,7 @@ static struct nlm_host *nlm_alloc_host(struct nlm_lookup_host_info *ni, host->h_nsmhandle = nsm; host->h_addrbuf = nsm->sm_addrbuf; host->net = ni->net; + strlcpy(host->nodename, utsname()->nodename, sizeof(host->nodename)); out: return host; @@ -534,17 +535,18 @@ static struct nlm_host *next_host_state(struct hlist_head *cache, /** * nlm_host_rebooted - Release all resources held by rebooted host + * @net: network namespace * @info: pointer to decoded results of NLM_SM_NOTIFY call * * We were notified that the specified host has rebooted. Release * all resources held by that peer. */ -void nlm_host_rebooted(const struct nlm_reboot *info) +void nlm_host_rebooted(const struct net *net, const struct nlm_reboot *info) { struct nsm_handle *nsm; struct nlm_host *host; - nsm = nsm_reboot_lookup(info); + nsm = nsm_reboot_lookup(net, info); if (unlikely(nsm == NULL)) return; diff --git a/fs/lockd/mon.c b/fs/lockd/mon.c index 47a32b6d9b90..19166d4a8d31 100644 --- a/fs/lockd/mon.c +++ b/fs/lockd/mon.c @@ -42,7 +42,7 @@ struct nsm_args { u32 proc; char *mon_name; - char *nodename; + const char *nodename; }; struct nsm_res { @@ -51,7 +51,6 @@ struct nsm_res { }; static const struct rpc_program nsm_program; -static LIST_HEAD(nsm_handles); static DEFINE_SPINLOCK(nsm_lock); /* @@ -87,69 +86,18 @@ static struct rpc_clnt *nsm_create(struct net *net, const char *nodename) return rpc_create(&args); } -static struct rpc_clnt *nsm_client_set(struct lockd_net *ln, - struct rpc_clnt *clnt) -{ - spin_lock(&ln->nsm_clnt_lock); - if (ln->nsm_users == 0) { - if (clnt == NULL) - goto out; - ln->nsm_clnt = clnt; - } - clnt = ln->nsm_clnt; - ln->nsm_users++; -out: - spin_unlock(&ln->nsm_clnt_lock); - return clnt; -} - -static struct rpc_clnt *nsm_client_get(struct net *net, const char *nodename) -{ - struct rpc_clnt *clnt, *new; - struct lockd_net *ln = net_generic(net, lockd_net_id); - - clnt = nsm_client_set(ln, NULL); - if (clnt != NULL) - goto out; - - clnt = new = nsm_create(net, nodename); - if (IS_ERR(clnt)) - goto out; - - clnt = nsm_client_set(ln, new); - if (clnt != new) - rpc_shutdown_client(new); -out: - return clnt; -} - -static void nsm_client_put(struct net *net) -{ - struct lockd_net *ln = net_generic(net, lockd_net_id); - struct rpc_clnt *clnt = NULL; - - spin_lock(&ln->nsm_clnt_lock); - ln->nsm_users--; - if (ln->nsm_users == 0) { - clnt = ln->nsm_clnt; - ln->nsm_clnt = NULL; - } - spin_unlock(&ln->nsm_clnt_lock); - if (clnt != NULL) - rpc_shutdown_client(clnt); -} - static int nsm_mon_unmon(struct nsm_handle *nsm, u32 proc, struct nsm_res *res, - struct rpc_clnt *clnt) + const struct nlm_host *host) { int status; + struct rpc_clnt *clnt; struct nsm_args args = { .priv = &nsm->sm_priv, .prog = NLM_PROGRAM, .vers = 3, .proc = NLMPROC_NSM_NOTIFY, .mon_name = nsm->sm_mon_name, - .nodename = clnt->cl_nodename, + .nodename = host->nodename, }; struct rpc_message msg = { .rpc_argp = &args, @@ -158,6 +106,13 @@ static int nsm_mon_unmon(struct nsm_handle *nsm, u32 proc, struct nsm_res *res, memset(res, 0, sizeof(*res)); + clnt = nsm_create(host->net, host->nodename); + if (IS_ERR(clnt)) { + dprintk("lockd: failed to create NSM upcall transport, " + "status=%ld, net=%p\n", PTR_ERR(clnt), host->net); + return PTR_ERR(clnt); + } + msg.rpc_proc = &clnt->cl_procinfo[proc]; status = rpc_call_sync(clnt, &msg, RPC_TASK_SOFTCONN); if (status == -ECONNREFUSED) { @@ -171,6 +126,8 @@ static int nsm_mon_unmon(struct nsm_handle *nsm, u32 proc, struct nsm_res *res, status); else status = 0; + + rpc_shutdown_client(clnt); return status; } @@ -190,32 +147,19 @@ int nsm_monitor(const struct nlm_host *host) struct nsm_handle *nsm = host->h_nsmhandle; struct nsm_res res; int status; - struct rpc_clnt *clnt; - const char *nodename = NULL; dprintk("lockd: nsm_monitor(%s)\n", nsm->sm_name); if (nsm->sm_monitored) return 0; - if (host->h_rpcclnt) - nodename = host->h_rpcclnt->cl_nodename; - /* * Choose whether to record the caller_name or IP address of * this peer in the local rpc.statd's database. */ nsm->sm_mon_name = nsm_use_hostnames ? nsm->sm_name : nsm->sm_addrbuf; - clnt = nsm_client_get(host->net, nodename); - if (IS_ERR(clnt)) { - status = PTR_ERR(clnt); - dprintk("lockd: failed to create NSM upcall transport, " - "status=%d, net=%p\n", status, host->net); - return status; - } - - status = nsm_mon_unmon(nsm, NSMPROC_MON, &res, clnt); + status = nsm_mon_unmon(nsm, NSMPROC_MON, &res, host); if (unlikely(res.status != 0)) status = -EIO; if (unlikely(status < 0)) { @@ -247,11 +191,9 @@ void nsm_unmonitor(const struct nlm_host *host) if (atomic_read(&nsm->sm_count) == 1 && nsm->sm_monitored && !nsm->sm_sticky) { - struct lockd_net *ln = net_generic(host->net, lockd_net_id); - dprintk("lockd: nsm_unmonitor(%s)\n", nsm->sm_name); - status = nsm_mon_unmon(nsm, NSMPROC_UNMON, &res, ln->nsm_clnt); + status = nsm_mon_unmon(nsm, NSMPROC_UNMON, &res, host); if (res.status != 0) status = -EIO; if (status < 0) @@ -259,38 +201,38 @@ void nsm_unmonitor(const struct nlm_host *host) nsm->sm_name); else nsm->sm_monitored = 0; - - nsm_client_put(host->net); } } -static struct nsm_handle *nsm_lookup_hostname(const char *hostname, - const size_t len) +static struct nsm_handle *nsm_lookup_hostname(const struct list_head *nsm_handles, + const char *hostname, const size_t len) { struct nsm_handle *nsm; - list_for_each_entry(nsm, &nsm_handles, sm_link) + list_for_each_entry(nsm, nsm_handles, sm_link) if (strlen(nsm->sm_name) == len && memcmp(nsm->sm_name, hostname, len) == 0) return nsm; return NULL; } -static struct nsm_handle *nsm_lookup_addr(const struct sockaddr *sap) +static struct nsm_handle *nsm_lookup_addr(const struct list_head *nsm_handles, + const struct sockaddr *sap) { struct nsm_handle *nsm; - list_for_each_entry(nsm, &nsm_handles, sm_link) + list_for_each_entry(nsm, nsm_handles, sm_link) if (rpc_cmp_addr(nsm_addr(nsm), sap)) return nsm; return NULL; } -static struct nsm_handle *nsm_lookup_priv(const struct nsm_private *priv) +static struct nsm_handle *nsm_lookup_priv(const struct list_head *nsm_handles, + const struct nsm_private *priv) { struct nsm_handle *nsm; - list_for_each_entry(nsm, &nsm_handles, sm_link) + list_for_each_entry(nsm, nsm_handles, sm_link) if (memcmp(nsm->sm_priv.data, priv->data, sizeof(priv->data)) == 0) return nsm; @@ -353,6 +295,7 @@ static struct nsm_handle *nsm_create_handle(const struct sockaddr *sap, /** * nsm_get_handle - Find or create a cached nsm_handle + * @net: network namespace * @sap: pointer to socket address of handle to find * @salen: length of socket address * @hostname: pointer to C string containing hostname to find @@ -365,11 +308,13 @@ static struct nsm_handle *nsm_create_handle(const struct sockaddr *sap, * @hostname cannot be found in the handle cache. Returns NULL if * an error occurs. */ -struct nsm_handle *nsm_get_handle(const struct sockaddr *sap, +struct nsm_handle *nsm_get_handle(const struct net *net, + const struct sockaddr *sap, const size_t salen, const char *hostname, const size_t hostname_len) { struct nsm_handle *cached, *new = NULL; + struct lockd_net *ln = net_generic(net, lockd_net_id); if (hostname && memchr(hostname, '/', hostname_len) != NULL) { if (printk_ratelimit()) { @@ -384,9 +329,10 @@ retry: spin_lock(&nsm_lock); if (nsm_use_hostnames && hostname != NULL) - cached = nsm_lookup_hostname(hostname, hostname_len); + cached = nsm_lookup_hostname(&ln->nsm_handles, + hostname, hostname_len); else - cached = nsm_lookup_addr(sap); + cached = nsm_lookup_addr(&ln->nsm_handles, sap); if (cached != NULL) { atomic_inc(&cached->sm_count); @@ -400,7 +346,7 @@ retry: } if (new != NULL) { - list_add(&new->sm_link, &nsm_handles); + list_add(&new->sm_link, &ln->nsm_handles); spin_unlock(&nsm_lock); dprintk("lockd: created nsm_handle for %s (%s)\n", new->sm_name, new->sm_addrbuf); @@ -417,19 +363,22 @@ retry: /** * nsm_reboot_lookup - match NLMPROC_SM_NOTIFY arguments to an nsm_handle + * @net: network namespace * @info: pointer to NLMPROC_SM_NOTIFY arguments * * Returns a matching nsm_handle if found in the nsm cache. The returned * nsm_handle's reference count is bumped. Otherwise returns NULL if some * error occurred. */ -struct nsm_handle *nsm_reboot_lookup(const struct nlm_reboot *info) +struct nsm_handle *nsm_reboot_lookup(const struct net *net, + const struct nlm_reboot *info) { struct nsm_handle *cached; + struct lockd_net *ln = net_generic(net, lockd_net_id); spin_lock(&nsm_lock); - cached = nsm_lookup_priv(&info->priv); + cached = nsm_lookup_priv(&ln->nsm_handles, &info->priv); if (unlikely(cached == NULL)) { spin_unlock(&nsm_lock); dprintk("lockd: never saw rebooted peer '%.*s' before\n", diff --git a/fs/lockd/netns.h b/fs/lockd/netns.h index 097bfa3adb1c..5426189406c1 100644 --- a/fs/lockd/netns.h +++ b/fs/lockd/netns.h @@ -12,9 +12,7 @@ struct lockd_net { struct delayed_work grace_period_end; struct lock_manager lockd_manager; - spinlock_t nsm_clnt_lock; - unsigned int nsm_users; - struct rpc_clnt *nsm_clnt; + struct list_head nsm_handles; }; extern int lockd_net_id; diff --git a/fs/lockd/svc.c b/fs/lockd/svc.c index d678bcc3cbcb..5f31ebd96c06 100644 --- a/fs/lockd/svc.c +++ b/fs/lockd/svc.c @@ -592,7 +592,7 @@ static int lockd_init_net(struct net *net) INIT_DELAYED_WORK(&ln->grace_period_end, grace_ender); INIT_LIST_HEAD(&ln->lockd_manager.list); ln->lockd_manager.block_opens = false; - spin_lock_init(&ln->nsm_clnt_lock); + INIT_LIST_HEAD(&ln->nsm_handles); return 0; } diff --git a/fs/lockd/svc4proc.c b/fs/lockd/svc4proc.c index b147d1ae71fd..09c576f26c7b 100644 --- a/fs/lockd/svc4proc.c +++ b/fs/lockd/svc4proc.c @@ -421,7 +421,7 @@ nlm4svc_proc_sm_notify(struct svc_rqst *rqstp, struct nlm_reboot *argp, return rpc_system_err; } - nlm_host_rebooted(argp); + nlm_host_rebooted(SVC_NET(rqstp), argp); return rpc_success; } diff --git a/fs/lockd/svcproc.c b/fs/lockd/svcproc.c index 21171f0c6477..fb26b9f522e7 100644 --- a/fs/lockd/svcproc.c +++ b/fs/lockd/svcproc.c @@ -464,7 +464,7 @@ nlmsvc_proc_sm_notify(struct svc_rqst *rqstp, struct nlm_reboot *argp, return rpc_system_err; } - nlm_host_rebooted(argp); + nlm_host_rebooted(SVC_NET(rqstp), argp); return rpc_success; } diff --git a/fs/nfsd/nfs3xdr.c b/fs/nfsd/nfs3xdr.c index f6e7cbabac5a..00575d776d91 100644 --- a/fs/nfsd/nfs3xdr.c +++ b/fs/nfsd/nfs3xdr.c @@ -262,11 +262,11 @@ void fill_post_wcc(struct svc_fh *fhp) err = fh_getattr(fhp, &fhp->fh_post_attr); fhp->fh_post_change = d_inode(fhp->fh_dentry)->i_version; if (err) { - fhp->fh_post_saved = 0; + fhp->fh_post_saved = false; /* Grab the ctime anyway - set_change_info might use it */ fhp->fh_post_attr.ctime = d_inode(fhp->fh_dentry)->i_ctime; } else - fhp->fh_post_saved = 1; + fhp->fh_post_saved = true; } /* diff --git a/fs/nfsd/nfs4layouts.c b/fs/nfsd/nfs4layouts.c index ebf90e487c75..9ffef06b30d5 100644 --- a/fs/nfsd/nfs4layouts.c +++ b/fs/nfsd/nfs4layouts.c @@ -201,6 +201,7 @@ nfsd4_alloc_layout_stateid(struct nfsd4_compound_state *cstate, INIT_LIST_HEAD(&ls->ls_perfile); spin_lock_init(&ls->ls_lock); INIT_LIST_HEAD(&ls->ls_layouts); + mutex_init(&ls->ls_mutex); ls->ls_layout_type = layout_type; nfsd4_init_cb(&ls->ls_recall, clp, &nfsd4_cb_layout_ops, NFSPROC4_CLNT_CB_LAYOUT); @@ -262,19 +263,23 @@ nfsd4_preprocess_layout_stateid(struct svc_rqst *rqstp, status = nfserr_jukebox; if (!ls) goto out; + mutex_lock(&ls->ls_mutex); } else { ls = container_of(stid, struct nfs4_layout_stateid, ls_stid); status = nfserr_bad_stateid; + mutex_lock(&ls->ls_mutex); if (stateid->si_generation > stid->sc_stateid.si_generation) - goto out_put_stid; + goto out_unlock_stid; if (layout_type != ls->ls_layout_type) - goto out_put_stid; + goto out_unlock_stid; } *lsp = ls; return 0; +out_unlock_stid: + mutex_unlock(&ls->ls_mutex); out_put_stid: nfs4_put_stid(stid); out: @@ -296,8 +301,6 @@ nfsd4_recall_file_layout(struct nfs4_layout_stateid *ls) trace_layout_recall(&ls->ls_stid.sc_stateid); atomic_inc(&ls->ls_stid.sc_count); - update_stateid(&ls->ls_stid.sc_stateid); - memcpy(&ls->ls_recall_sid, &ls->ls_stid.sc_stateid, sizeof(stateid_t)); nfsd4_run_cb(&ls->ls_recall); out_unlock: @@ -406,8 +409,7 @@ nfsd4_insert_layout(struct nfsd4_layoutget *lgp, struct nfs4_layout_stateid *ls) list_add_tail(&new->lo_perstate, &ls->ls_layouts); new = NULL; done: - update_stateid(&ls->ls_stid.sc_stateid); - memcpy(&lgp->lg_sid, &ls->ls_stid.sc_stateid, sizeof(stateid_t)); + nfs4_inc_and_copy_stateid(&lgp->lg_sid, &ls->ls_stid); spin_unlock(&ls->ls_lock); out: spin_unlock(&fp->fi_lock); @@ -481,11 +483,8 @@ nfsd4_return_file_layouts(struct svc_rqst *rqstp, } } if (!list_empty(&ls->ls_layouts)) { - if (found) { - update_stateid(&ls->ls_stid.sc_stateid); - memcpy(&lrp->lr_sid, &ls->ls_stid.sc_stateid, - sizeof(stateid_t)); - } + if (found) + nfs4_inc_and_copy_stateid(&lrp->lr_sid, &ls->ls_stid); lrp->lrs_present = 1; } else { trace_layoutstate_unhash(&ls->ls_stid.sc_stateid); @@ -494,6 +493,7 @@ nfsd4_return_file_layouts(struct svc_rqst *rqstp, } spin_unlock(&ls->ls_lock); + mutex_unlock(&ls->ls_mutex); nfs4_put_stid(&ls->ls_stid); nfsd4_free_layouts(&reaplist); return nfs_ok; @@ -608,6 +608,16 @@ nfsd4_cb_layout_fail(struct nfs4_layout_stateid *ls) } } +static void +nfsd4_cb_layout_prepare(struct nfsd4_callback *cb) +{ + struct nfs4_layout_stateid *ls = + container_of(cb, struct nfs4_layout_stateid, ls_recall); + + mutex_lock(&ls->ls_mutex); + nfs4_inc_and_copy_stateid(&ls->ls_recall_sid, &ls->ls_stid); +} + static int nfsd4_cb_layout_done(struct nfsd4_callback *cb, struct rpc_task *task) { @@ -649,12 +659,14 @@ nfsd4_cb_layout_release(struct nfsd4_callback *cb) trace_layout_recall_release(&ls->ls_stid.sc_stateid); + mutex_unlock(&ls->ls_mutex); nfsd4_return_all_layouts(ls, &reaplist); nfsd4_free_layouts(&reaplist); nfs4_put_stid(&ls->ls_stid); } static struct nfsd4_callback_ops nfsd4_cb_layout_ops = { + .prepare = nfsd4_cb_layout_prepare, .done = nfsd4_cb_layout_done, .release = nfsd4_cb_layout_release, }; diff --git a/fs/nfsd/nfs4proc.c b/fs/nfsd/nfs4proc.c index 4ce6b97b31ad..a9f096c7e99f 100644 --- a/fs/nfsd/nfs4proc.c +++ b/fs/nfsd/nfs4proc.c @@ -1309,6 +1309,7 @@ nfsd4_layoutget(struct svc_rqst *rqstp, nfserr = nfsd4_insert_layout(lgp, ls); out_put_stid: + mutex_unlock(&ls->ls_mutex); nfs4_put_stid(&ls->ls_stid); out: return nfserr; @@ -1362,6 +1363,9 @@ nfsd4_layoutcommit(struct svc_rqst *rqstp, goto out; } + /* LAYOUTCOMMIT does not require any serialization */ + mutex_unlock(&ls->ls_mutex); + if (new_size > i_size_read(inode)) { lcp->lc_size_chg = 1; lcp->lc_newsize = new_size; diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c index 0f1d5691b795..6b800b5b8fed 100644 --- a/fs/nfsd/nfs4state.c +++ b/fs/nfsd/nfs4state.c @@ -575,6 +575,7 @@ struct nfs4_stid *nfs4_alloc_stid(struct nfs4_client *cl, stid->sc_stateid.si_opaque.so_clid = cl->cl_clientid; /* Will be incremented before return to client: */ atomic_set(&stid->sc_count, 1); + spin_lock_init(&stid->sc_lock); /* * It shouldn't be a problem to reuse an opaque stateid value. @@ -745,6 +746,18 @@ nfs4_put_stid(struct nfs4_stid *s) put_nfs4_file(fp); } +void +nfs4_inc_and_copy_stateid(stateid_t *dst, struct nfs4_stid *stid) +{ + stateid_t *src = &stid->sc_stateid; + + spin_lock(&stid->sc_lock); + if (unlikely(++src->si_generation == 0)) + src->si_generation = 1; + memcpy(dst, src, sizeof(*dst)); + spin_unlock(&stid->sc_lock); +} + static void nfs4_put_deleg_lease(struct nfs4_file *fp) { struct file *filp = NULL; @@ -765,16 +778,68 @@ void nfs4_unhash_stid(struct nfs4_stid *s) s->sc_type = 0; } -static void +/** + * nfs4_get_existing_delegation - Discover if this delegation already exists + * @clp: a pointer to the nfs4_client we're granting a delegation to + * @fp: a pointer to the nfs4_file we're granting a delegation on + * + * Return: + * On success: NULL if an existing delegation was not found. + * + * On error: -EAGAIN if one was previously granted to this nfs4_client + * for this nfs4_file. + * + */ + +static int +nfs4_get_existing_delegation(struct nfs4_client *clp, struct nfs4_file *fp) +{ + struct nfs4_delegation *searchdp = NULL; + struct nfs4_client *searchclp = NULL; + + lockdep_assert_held(&state_lock); + lockdep_assert_held(&fp->fi_lock); + + list_for_each_entry(searchdp, &fp->fi_delegations, dl_perfile) { + searchclp = searchdp->dl_stid.sc_client; + if (clp == searchclp) { + return -EAGAIN; + } + } + return 0; +} + +/** + * hash_delegation_locked - Add a delegation to the appropriate lists + * @dp: a pointer to the nfs4_delegation we are adding. + * @fp: a pointer to the nfs4_file we're granting a delegation on + * + * Return: + * On success: NULL if the delegation was successfully hashed. + * + * On error: -EAGAIN if one was previously granted to this + * nfs4_client for this nfs4_file. Delegation is not hashed. + * + */ + +static int hash_delegation_locked(struct nfs4_delegation *dp, struct nfs4_file *fp) { + int status; + struct nfs4_client *clp = dp->dl_stid.sc_client; + lockdep_assert_held(&state_lock); lockdep_assert_held(&fp->fi_lock); + status = nfs4_get_existing_delegation(clp, fp); + if (status) + return status; + ++fp->fi_delegees; atomic_inc(&dp->dl_stid.sc_count); dp->dl_stid.sc_type = NFS4_DELEG_STID; list_add(&dp->dl_perfile, &fp->fi_delegations); - list_add(&dp->dl_perclnt, &dp->dl_stid.sc_client->cl_delegations); + list_add(&dp->dl_perclnt, &clp->cl_delegations); + return 0; } static bool @@ -2256,15 +2321,20 @@ nfsd4_set_ex_flags(struct nfs4_client *new, struct nfsd4_exchange_id *clid) clid->flags = new->cl_exchange_flags; } +static bool client_has_openowners(struct nfs4_client *clp) +{ + struct nfs4_openowner *oo; + + list_for_each_entry(oo, &clp->cl_openowners, oo_perclient) { + if (!list_empty(&oo->oo_owner.so_stateids)) + return true; + } + return false; +} + static bool client_has_state(struct nfs4_client *clp) { - /* - * Note clp->cl_openowners check isn't quite right: there's no - * need to count owners without stateid's. - * - * Also note we should probably be using this in 4.0 case too. - */ - return !list_empty(&clp->cl_openowners) + return client_has_openowners(clp) #ifdef CONFIG_NFSD_PNFS || !list_empty(&clp->cl_lo_states) #endif @@ -3049,7 +3119,7 @@ nfsd4_setclientid(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, /* Cases below refer to rfc 3530 section 14.2.33: */ spin_lock(&nn->client_lock); conf = find_confirmed_client_by_name(&clname, nn); - if (conf) { + if (conf && client_has_state(conf)) { /* case 0: */ status = nfserr_clid_inuse; if (clp_used_exchangeid(conf)) @@ -3136,6 +3206,11 @@ nfsd4_setclientid_confirm(struct svc_rqst *rqstp, } else { /* case 3: normal case; new or rebooted client */ old = find_confirmed_client_by_name(&unconf->cl_name, nn); if (old) { + status = nfserr_clid_inuse; + if (client_has_state(old) + && !same_creds(&unconf->cl_cred, + &old->cl_cred)) + goto out; status = mark_client_expired_locked(old); if (status) { old = NULL; @@ -3317,6 +3392,27 @@ static const struct nfs4_stateowner_operations openowner_ops = { .so_free = nfs4_free_openowner, }; +static struct nfs4_ol_stateid * +nfsd4_find_existing_open(struct nfs4_file *fp, struct nfsd4_open *open) +{ + struct nfs4_ol_stateid *local, *ret = NULL; + struct nfs4_openowner *oo = open->op_openowner; + + lockdep_assert_held(&fp->fi_lock); + + list_for_each_entry(local, &fp->fi_stateids, st_perfile) { + /* ignore lock owners */ + if (local->st_stateowner->so_is_open_owner == 0) + continue; + if (local->st_stateowner == &oo->oo_owner) { + ret = local; + atomic_inc(&ret->st_stid.sc_count); + break; + } + } + return ret; +} + static struct nfs4_openowner * alloc_init_open_stateowner(unsigned int strhashval, struct nfsd4_open *open, struct nfsd4_compound_state *cstate) @@ -3348,9 +3444,20 @@ alloc_init_open_stateowner(unsigned int strhashval, struct nfsd4_open *open, return ret; } -static void init_open_stateid(struct nfs4_ol_stateid *stp, struct nfs4_file *fp, struct nfsd4_open *open) { +static struct nfs4_ol_stateid * +init_open_stateid(struct nfs4_ol_stateid *stp, struct nfs4_file *fp, + struct nfsd4_open *open) +{ + struct nfs4_openowner *oo = open->op_openowner; + struct nfs4_ol_stateid *retstp = NULL; + spin_lock(&oo->oo_owner.so_client->cl_lock); + spin_lock(&fp->fi_lock); + + retstp = nfsd4_find_existing_open(fp, open); + if (retstp) + goto out_unlock; atomic_inc(&stp->st_stid.sc_count); stp->st_stid.sc_type = NFS4_OPEN_STID; INIT_LIST_HEAD(&stp->st_locks); @@ -3360,12 +3467,14 @@ static void init_open_stateid(struct nfs4_ol_stateid *stp, struct nfs4_file *fp, stp->st_access_bmap = 0; stp->st_deny_bmap = 0; stp->st_openstp = NULL; - spin_lock(&oo->oo_owner.so_client->cl_lock); + init_rwsem(&stp->st_rwsem); list_add(&stp->st_perstateowner, &oo->oo_owner.so_stateids); - spin_lock(&fp->fi_lock); list_add(&stp->st_perfile, &fp->fi_stateids); + +out_unlock: spin_unlock(&fp->fi_lock); spin_unlock(&oo->oo_owner.so_client->cl_lock); + return retstp; } /* @@ -3776,27 +3885,6 @@ out: return nfs_ok; } -static struct nfs4_ol_stateid * -nfsd4_find_existing_open(struct nfs4_file *fp, struct nfsd4_open *open) -{ - struct nfs4_ol_stateid *local, *ret = NULL; - struct nfs4_openowner *oo = open->op_openowner; - - spin_lock(&fp->fi_lock); - list_for_each_entry(local, &fp->fi_stateids, st_perfile) { - /* ignore lock owners */ - if (local->st_stateowner->so_is_open_owner == 0) - continue; - if (local->st_stateowner == &oo->oo_owner) { - ret = local; - atomic_inc(&ret->st_stid.sc_count); - break; - } - } - spin_unlock(&fp->fi_lock); - return ret; -} - static inline int nfs4_access_to_access(u32 nfs4_access) { int flags = 0; @@ -3945,6 +4033,18 @@ static struct file_lock *nfs4_alloc_init_lease(struct nfs4_file *fp, int flag) return fl; } +/** + * nfs4_setlease - Obtain a delegation by requesting lease from vfs layer + * @dp: a pointer to the nfs4_delegation we're adding. + * + * Return: + * On success: Return code will be 0 on success. + * + * On error: -EAGAIN if there was an existing delegation. + * nonzero if there is an error in other cases. + * + */ + static int nfs4_setlease(struct nfs4_delegation *dp) { struct nfs4_file *fp = dp->dl_stid.sc_file; @@ -3976,16 +4076,19 @@ static int nfs4_setlease(struct nfs4_delegation *dp) goto out_unlock; /* Race breaker */ if (fp->fi_deleg_file) { - status = 0; - ++fp->fi_delegees; - hash_delegation_locked(dp, fp); + status = hash_delegation_locked(dp, fp); goto out_unlock; } fp->fi_deleg_file = filp; - fp->fi_delegees = 1; - hash_delegation_locked(dp, fp); + fp->fi_delegees = 0; + status = hash_delegation_locked(dp, fp); spin_unlock(&fp->fi_lock); spin_unlock(&state_lock); + if (status) { + /* Should never happen, this is a new fi_deleg_file */ + WARN_ON_ONCE(1); + goto out_fput; + } return 0; out_unlock: spin_unlock(&fp->fi_lock); @@ -4005,6 +4108,15 @@ nfs4_set_delegation(struct nfs4_client *clp, struct svc_fh *fh, if (fp->fi_had_conflict) return ERR_PTR(-EAGAIN); + spin_lock(&state_lock); + spin_lock(&fp->fi_lock); + status = nfs4_get_existing_delegation(clp, fp); + spin_unlock(&fp->fi_lock); + spin_unlock(&state_lock); + + if (status) + return ERR_PTR(status); + dp = alloc_init_deleg(clp, fh, odstate); if (!dp) return ERR_PTR(-ENOMEM); @@ -4023,9 +4135,7 @@ nfs4_set_delegation(struct nfs4_client *clp, struct svc_fh *fh, status = -EAGAIN; goto out_unlock; } - ++fp->fi_delegees; - hash_delegation_locked(dp, fp); - status = 0; + status = hash_delegation_locked(dp, fp); out_unlock: spin_unlock(&fp->fi_lock); spin_unlock(&state_lock); @@ -4160,6 +4270,7 @@ nfsd4_process_open2(struct svc_rqst *rqstp, struct svc_fh *current_fh, struct nf struct nfs4_client *cl = open->op_openowner->oo_owner.so_client; struct nfs4_file *fp = NULL; struct nfs4_ol_stateid *stp = NULL; + struct nfs4_ol_stateid *swapstp = NULL; struct nfs4_delegation *dp = NULL; __be32 status; @@ -4173,7 +4284,9 @@ nfsd4_process_open2(struct svc_rqst *rqstp, struct svc_fh *current_fh, struct nf status = nfs4_check_deleg(cl, open, &dp); if (status) goto out; + spin_lock(&fp->fi_lock); stp = nfsd4_find_existing_open(fp, open); + spin_unlock(&fp->fi_lock); } else { open->op_file = NULL; status = nfserr_bad_stateid; @@ -4187,15 +4300,32 @@ nfsd4_process_open2(struct svc_rqst *rqstp, struct svc_fh *current_fh, struct nf */ if (stp) { /* Stateid was found, this is an OPEN upgrade */ + down_read(&stp->st_rwsem); status = nfs4_upgrade_open(rqstp, fp, current_fh, stp, open); - if (status) + if (status) { + up_read(&stp->st_rwsem); goto out; + } } else { stp = open->op_stp; open->op_stp = NULL; - init_open_stateid(stp, fp, open); + swapstp = init_open_stateid(stp, fp, open); + if (swapstp) { + nfs4_put_stid(&stp->st_stid); + stp = swapstp; + down_read(&stp->st_rwsem); + status = nfs4_upgrade_open(rqstp, fp, current_fh, + stp, open); + if (status) { + up_read(&stp->st_rwsem); + goto out; + } + goto upgrade_out; + } + down_read(&stp->st_rwsem); status = nfs4_get_vfs_file(rqstp, fp, current_fh, stp, open); if (status) { + up_read(&stp->st_rwsem); release_open_stateid(stp); goto out; } @@ -4205,8 +4335,9 @@ nfsd4_process_open2(struct svc_rqst *rqstp, struct svc_fh *current_fh, struct nf if (stp->st_clnt_odstate == open->op_odstate) open->op_odstate = NULL; } - update_stateid(&stp->st_stid.sc_stateid); - memcpy(&open->op_stateid, &stp->st_stid.sc_stateid, sizeof(stateid_t)); +upgrade_out: + nfs4_inc_and_copy_stateid(&open->op_stateid, &stp->st_stid); + up_read(&stp->st_rwsem); if (nfsd4_has_session(&resp->cstate)) { if (open->op_deleg_want & NFS4_SHARE_WANT_NO_DELEG) { @@ -4819,10 +4950,13 @@ static __be32 nfs4_seqid_op_checks(struct nfsd4_compound_state *cstate, stateid_ * revoked delegations are kept only for free_stateid. */ return nfserr_bad_stateid; + down_write(&stp->st_rwsem); status = check_stateid_generation(stateid, &stp->st_stid.sc_stateid, nfsd4_has_session(cstate)); - if (status) - return status; - return nfs4_check_fh(current_fh, &stp->st_stid); + if (status == nfs_ok) + status = nfs4_check_fh(current_fh, &stp->st_stid); + if (status != nfs_ok) + up_write(&stp->st_rwsem); + return status; } /* @@ -4869,6 +5003,7 @@ static __be32 nfs4_preprocess_confirmed_seqid_op(struct nfsd4_compound_state *cs return status; oo = openowner(stp->st_stateowner); if (!(oo->oo_flags & NFS4_OO_CONFIRMED)) { + up_write(&stp->st_rwsem); nfs4_put_stid(&stp->st_stid); return nfserr_bad_stateid; } @@ -4899,11 +5034,13 @@ nfsd4_open_confirm(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, goto out; oo = openowner(stp->st_stateowner); status = nfserr_bad_stateid; - if (oo->oo_flags & NFS4_OO_CONFIRMED) + if (oo->oo_flags & NFS4_OO_CONFIRMED) { + up_write(&stp->st_rwsem); goto put_stateid; + } oo->oo_flags |= NFS4_OO_CONFIRMED; - update_stateid(&stp->st_stid.sc_stateid); - memcpy(&oc->oc_resp_stateid, &stp->st_stid.sc_stateid, sizeof(stateid_t)); + nfs4_inc_and_copy_stateid(&oc->oc_resp_stateid, &stp->st_stid); + up_write(&stp->st_rwsem); dprintk("NFSD: %s: success, seqid=%d stateid=" STATEID_FMT "\n", __func__, oc->oc_seqid, STATEID_VAL(&stp->st_stid.sc_stateid)); @@ -4975,13 +5112,11 @@ nfsd4_open_downgrade(struct svc_rqst *rqstp, goto put_stateid; } nfs4_stateid_downgrade(stp, od->od_share_access); - reset_union_bmap_deny(od->od_share_deny, stp); - - update_stateid(&stp->st_stid.sc_stateid); - memcpy(&od->od_stateid, &stp->st_stid.sc_stateid, sizeof(stateid_t)); + nfs4_inc_and_copy_stateid(&od->od_stateid, &stp->st_stid); status = nfs_ok; put_stateid: + up_write(&stp->st_rwsem); nfs4_put_stid(&stp->st_stid); out: nfsd4_bump_seqid(cstate, status); @@ -5033,8 +5168,8 @@ nfsd4_close(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, nfsd4_bump_seqid(cstate, status); if (status) goto out; - update_stateid(&stp->st_stid.sc_stateid); - memcpy(&close->cl_stateid, &stp->st_stid.sc_stateid, sizeof(stateid_t)); + nfs4_inc_and_copy_stateid(&close->cl_stateid, &stp->st_stid); + up_write(&stp->st_rwsem); nfsd4_close_open_stateid(stp); @@ -5260,6 +5395,7 @@ init_lock_stateid(struct nfs4_ol_stateid *stp, struct nfs4_lockowner *lo, stp->st_access_bmap = 0; stp->st_deny_bmap = open_stp->st_deny_bmap; stp->st_openstp = open_stp; + init_rwsem(&stp->st_rwsem); list_add(&stp->st_locks, &open_stp->st_locks); list_add(&stp->st_perstateowner, &lo->lo_owner.so_stateids); spin_lock(&fp->fi_lock); @@ -5428,6 +5564,7 @@ nfsd4_lock(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, &open_stp, nn); if (status) goto out; + up_write(&open_stp->st_rwsem); open_sop = openowner(open_stp->st_stateowner); status = nfserr_bad_stateid; if (!same_clid(&open_sop->oo_owner.so_client->cl_clientid, @@ -5435,6 +5572,8 @@ nfsd4_lock(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, goto out; status = lookup_or_create_lock_state(cstate, open_stp, lock, &lock_stp, &new); + if (status == nfs_ok) + down_write(&lock_stp->st_rwsem); } else { status = nfs4_preprocess_seqid_op(cstate, lock->lk_old_lock_seqid, @@ -5512,9 +5651,7 @@ nfsd4_lock(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, err = vfs_lock_file(filp, F_SETLK, file_lock, conflock); switch (-err) { case 0: /* success! */ - update_stateid(&lock_stp->st_stid.sc_stateid); - memcpy(&lock->lk_resp_stateid, &lock_stp->st_stid.sc_stateid, - sizeof(stateid_t)); + nfs4_inc_and_copy_stateid(&lock->lk_resp_stateid, &lock_stp->st_stid); status = 0; break; case (EAGAIN): /* conflock holds conflicting lock */ @@ -5540,6 +5677,8 @@ out: seqid_mutating_err(ntohl(status))) lock_sop->lo_owner.so_seqid++; + up_write(&lock_stp->st_rwsem); + /* * If this is a new, never-before-used stateid, and we are * returning an error, then just go ahead and release it. @@ -5704,11 +5843,11 @@ nfsd4_locku(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, dprintk("NFSD: nfs4_locku: vfs_lock_file failed!\n"); goto out_nfserr; } - update_stateid(&stp->st_stid.sc_stateid); - memcpy(&locku->lu_stateid, &stp->st_stid.sc_stateid, sizeof(stateid_t)); + nfs4_inc_and_copy_stateid(&locku->lu_stateid, &stp->st_stid); fput: fput(filp); put_stateid: + up_write(&stp->st_rwsem); nfs4_put_stid(&stp->st_stid); out: nfsd4_bump_seqid(cstate, status); diff --git a/fs/nfsd/nfscache.c b/fs/nfsd/nfscache.c index 46ec934f5dee..54cde9a5864e 100644 --- a/fs/nfsd/nfscache.c +++ b/fs/nfsd/nfscache.c @@ -63,7 +63,6 @@ static unsigned int longest_chain; static unsigned int longest_chain_cachesize; static int nfsd_cache_append(struct svc_rqst *rqstp, struct kvec *vec); -static void cache_cleaner_func(struct work_struct *unused); static unsigned long nfsd_reply_cache_count(struct shrinker *shrink, struct shrink_control *sc); static unsigned long nfsd_reply_cache_scan(struct shrinker *shrink, @@ -76,13 +75,6 @@ static struct shrinker nfsd_reply_cache_shrinker = { }; /* - * locking for the reply cache: - * A cache entry is "single use" if c_state == RC_INPROG - * Otherwise, it when accessing _prev or _next, the lock must be held. - */ -static DECLARE_DELAYED_WORK(cache_cleaner, cache_cleaner_func); - -/* * Put a cap on the size of the DRC based on the amount of available * low memory in the machine. * @@ -203,7 +195,6 @@ void nfsd_reply_cache_shutdown(void) unsigned int i; unregister_shrinker(&nfsd_reply_cache_shrinker); - cancel_delayed_work_sync(&cache_cleaner); for (i = 0; i < drc_hashsize; i++) { struct list_head *head = &drc_hashtbl[i].lru_head; @@ -217,10 +208,8 @@ void nfsd_reply_cache_shutdown(void) drc_hashtbl = NULL; drc_hashsize = 0; - if (drc_slab) { - kmem_cache_destroy(drc_slab); - drc_slab = NULL; - } + kmem_cache_destroy(drc_slab); + drc_slab = NULL; } /* @@ -232,7 +221,6 @@ lru_put_end(struct nfsd_drc_bucket *b, struct svc_cacherep *rp) { rp->c_timestamp = jiffies; list_move_tail(&rp->c_lru, &b->lru_head); - schedule_delayed_work(&cache_cleaner, RC_EXPIRE); } static long @@ -266,7 +254,6 @@ prune_cache_entries(void) { unsigned int i; long freed = 0; - bool cancel = true; for (i = 0; i < drc_hashsize; i++) { struct nfsd_drc_bucket *b = &drc_hashtbl[i]; @@ -275,26 +262,11 @@ prune_cache_entries(void) continue; spin_lock(&b->cache_lock); freed += prune_bucket(b); - if (!list_empty(&b->lru_head)) - cancel = false; spin_unlock(&b->cache_lock); } - - /* - * Conditionally rearm the job to run in RC_EXPIRE since we just - * ran the pruner. - */ - if (!cancel) - mod_delayed_work(system_wq, &cache_cleaner, RC_EXPIRE); return freed; } -static void -cache_cleaner_func(struct work_struct *unused) -{ - prune_cache_entries(); -} - static unsigned long nfsd_reply_cache_count(struct shrinker *shrink, struct shrink_control *sc) { diff --git a/fs/nfsd/nfsfh.c b/fs/nfsd/nfsfh.c index 350041a40fe5..c1681ce894c5 100644 --- a/fs/nfsd/nfsfh.c +++ b/fs/nfsd/nfsfh.c @@ -631,10 +631,7 @@ fh_put(struct svc_fh *fhp) fh_unlock(fhp); fhp->fh_dentry = NULL; dput(dentry); -#ifdef CONFIG_NFSD_V3 - fhp->fh_pre_saved = 0; - fhp->fh_post_saved = 0; -#endif + fh_clear_wcc(fhp); } fh_drop_write(fhp); if (exp) { diff --git a/fs/nfsd/nfsfh.h b/fs/nfsd/nfsfh.h index 1e90dad4926b..2087bae17582 100644 --- a/fs/nfsd/nfsfh.h +++ b/fs/nfsd/nfsfh.h @@ -26,16 +26,16 @@ static inline ino_t u32_to_ino_t(__u32 uino) */ typedef struct svc_fh { struct knfsd_fh fh_handle; /* FH data */ + int fh_maxsize; /* max size for fh_handle */ struct dentry * fh_dentry; /* validated dentry */ struct svc_export * fh_export; /* export pointer */ - int fh_maxsize; /* max size for fh_handle */ - unsigned char fh_locked; /* inode locked by us */ - unsigned char fh_want_write; /* remount protection taken */ + bool fh_locked; /* inode locked by us */ + bool fh_want_write; /* remount protection taken */ #ifdef CONFIG_NFSD_V3 - unsigned char fh_post_saved; /* post-op attrs saved */ - unsigned char fh_pre_saved; /* pre-op attrs saved */ + bool fh_post_saved; /* post-op attrs saved */ + bool fh_pre_saved; /* pre-op attrs saved */ /* Pre-op attributes saved during fh_lock */ __u64 fh_pre_size; /* size before operation */ @@ -213,8 +213,8 @@ static inline bool fh_fsid_match(struct knfsd_fh *fh1, struct knfsd_fh *fh2) static inline void fh_clear_wcc(struct svc_fh *fhp) { - fhp->fh_post_saved = 0; - fhp->fh_pre_saved = 0; + fhp->fh_post_saved = false; + fhp->fh_pre_saved = false; } /* @@ -231,7 +231,7 @@ fill_pre_wcc(struct svc_fh *fhp) fhp->fh_pre_ctime = inode->i_ctime; fhp->fh_pre_size = inode->i_size; fhp->fh_pre_change = inode->i_version; - fhp->fh_pre_saved = 1; + fhp->fh_pre_saved = true; } } @@ -267,7 +267,7 @@ fh_lock_nested(struct svc_fh *fhp, unsigned int subclass) inode = d_inode(dentry); mutex_lock_nested(&inode->i_mutex, subclass); fill_pre_wcc(fhp); - fhp->fh_locked = 1; + fhp->fh_locked = true; } static inline void @@ -285,7 +285,7 @@ fh_unlock(struct svc_fh *fhp) if (fhp->fh_locked) { fill_post_wcc(fhp); mutex_unlock(&d_inode(fhp->fh_dentry)->i_mutex); - fhp->fh_locked = 0; + fhp->fh_locked = false; } } diff --git a/fs/nfsd/state.h b/fs/nfsd/state.h index 583ffc13cae2..77fdf4de91ba 100644 --- a/fs/nfsd/state.h +++ b/fs/nfsd/state.h @@ -84,7 +84,7 @@ struct nfsd4_callback_ops { * fields that are of general use to any stateid. */ struct nfs4_stid { - atomic_t sc_count; + atomic_t sc_count; #define NFS4_OPEN_STID 1 #define NFS4_LOCK_STID 2 #define NFS4_DELEG_STID 4 @@ -94,11 +94,12 @@ struct nfs4_stid { #define NFS4_REVOKED_DELEG_STID 16 #define NFS4_CLOSED_DELEG_STID 32 #define NFS4_LAYOUT_STID 64 - unsigned char sc_type; - stateid_t sc_stateid; - struct nfs4_client *sc_client; - struct nfs4_file *sc_file; - void (*sc_free)(struct nfs4_stid *); + unsigned char sc_type; + stateid_t sc_stateid; + spinlock_t sc_lock; + struct nfs4_client *sc_client; + struct nfs4_file *sc_file; + void (*sc_free)(struct nfs4_stid *); }; /* @@ -364,15 +365,6 @@ struct nfs4_client_reclaim { char cr_recdir[HEXDIR_LEN]; /* recover dir */ }; -static inline void -update_stateid(stateid_t *stateid) -{ - stateid->si_generation++; - /* Wraparound recommendation from 3530bis-13 9.1.3.2: */ - if (stateid->si_generation == 0) - stateid->si_generation = 1; -} - /* A reasonable value for REPLAY_ISIZE was estimated as follows: * The OPEN response, typically the largest, requires * 4(status) + 8(stateid) + 20(changeinfo) + 4(rflags) + 8(verifier) + @@ -534,15 +526,16 @@ struct nfs4_file { * Better suggestions welcome. */ struct nfs4_ol_stateid { - struct nfs4_stid st_stid; /* must be first field */ - struct list_head st_perfile; - struct list_head st_perstateowner; - struct list_head st_locks; - struct nfs4_stateowner * st_stateowner; - struct nfs4_clnt_odstate * st_clnt_odstate; - unsigned char st_access_bmap; - unsigned char st_deny_bmap; - struct nfs4_ol_stateid * st_openstp; + struct nfs4_stid st_stid; + struct list_head st_perfile; + struct list_head st_perstateowner; + struct list_head st_locks; + struct nfs4_stateowner *st_stateowner; + struct nfs4_clnt_odstate *st_clnt_odstate; + unsigned char st_access_bmap; + unsigned char st_deny_bmap; + struct nfs4_ol_stateid *st_openstp; + struct rw_semaphore st_rwsem; }; static inline struct nfs4_ol_stateid *openlockstateid(struct nfs4_stid *s) @@ -561,6 +554,7 @@ struct nfs4_layout_stateid { struct nfsd4_callback ls_recall; stateid_t ls_recall_sid; bool ls_recalled; + struct mutex ls_mutex; }; static inline struct nfs4_layout_stateid *layoutstateid(struct nfs4_stid *s) @@ -593,6 +587,7 @@ struct nfs4_stid *nfs4_alloc_stid(struct nfs4_client *cl, struct kmem_cache *slab); void nfs4_unhash_stid(struct nfs4_stid *s); void nfs4_put_stid(struct nfs4_stid *s); +void nfs4_inc_and_copy_stateid(stateid_t *dst, struct nfs4_stid *stid); void nfs4_remove_reclaim_record(struct nfs4_client_reclaim *, struct nfsd_net *); extern void nfs4_release_reclaim(struct nfsd_net *); extern struct nfs4_client_reclaim *nfsd4_find_reclaim_client(const char *recdir, diff --git a/fs/nfsd/trace.c b/fs/nfsd/trace.c index 82f89070594c..90967466a1e5 100644 --- a/fs/nfsd/trace.c +++ b/fs/nfsd/trace.c @@ -1,5 +1,3 @@ -#include "state.h" - #define CREATE_TRACE_POINTS #include "trace.h" diff --git a/fs/nfsd/trace.h b/fs/nfsd/trace.h index c668520c344b..0befe762762b 100644 --- a/fs/nfsd/trace.h +++ b/fs/nfsd/trace.h @@ -9,6 +9,8 @@ #include <linux/tracepoint.h> +#include "state.h" + DECLARE_EVENT_CLASS(nfsd_stateid_class, TP_PROTO(stateid_t *stp), TP_ARGS(stp), diff --git a/fs/nfsd/vfs.c b/fs/nfsd/vfs.c index 45c04979e7b3..994d66fbb446 100644 --- a/fs/nfsd/vfs.c +++ b/fs/nfsd/vfs.c @@ -1631,7 +1631,7 @@ nfsd_rename(struct svc_rqst *rqstp, struct svc_fh *ffhp, char *fname, int flen, /* cannot use fh_lock as we need deadlock protective ordering * so do it by hand */ trap = lock_rename(tdentry, fdentry); - ffhp->fh_locked = tfhp->fh_locked = 1; + ffhp->fh_locked = tfhp->fh_locked = true; fill_pre_wcc(ffhp); fill_pre_wcc(tfhp); @@ -1681,7 +1681,7 @@ nfsd_rename(struct svc_rqst *rqstp, struct svc_fh *ffhp, char *fname, int flen, fill_post_wcc(ffhp); fill_post_wcc(tfhp); unlock_rename(tdentry, fdentry); - ffhp->fh_locked = tfhp->fh_locked = 0; + ffhp->fh_locked = tfhp->fh_locked = false; fh_drop_write(ffhp); out: diff --git a/fs/nfsd/vfs.h b/fs/nfsd/vfs.h index fee2451ae248..fcfc48cbe136 100644 --- a/fs/nfsd/vfs.h +++ b/fs/nfsd/vfs.h @@ -112,14 +112,14 @@ static inline int fh_want_write(struct svc_fh *fh) int ret = mnt_want_write(fh->fh_export->ex_path.mnt); if (!ret) - fh->fh_want_write = 1; + fh->fh_want_write = true; return ret; } static inline void fh_drop_write(struct svc_fh *fh) { if (fh->fh_want_write) { - fh->fh_want_write = 0; + fh->fh_want_write = false; mnt_drop_write(fh->fh_export->ex_path.mnt); } } diff --git a/fs/nfsd/xdr4.h b/fs/nfsd/xdr4.h index 9f991007a578..ce7362c88b48 100644 --- a/fs/nfsd/xdr4.h +++ b/fs/nfsd/xdr4.h @@ -632,7 +632,7 @@ static inline void set_change_info(struct nfsd4_change_info *cinfo, struct svc_fh *fhp) { BUG_ON(!fhp->fh_pre_saved); - cinfo->atomic = fhp->fh_post_saved; + cinfo->atomic = (u32)fhp->fh_post_saved; cinfo->change_supported = IS_I_VERSION(d_inode(fhp->fh_dentry)); cinfo->before_change = fhp->fh_pre_change; diff --git a/fs/nilfs2/file.c b/fs/nilfs2/file.c index 54575e3cc1a2..088ba001c6ef 100644 --- a/fs/nilfs2/file.c +++ b/fs/nilfs2/file.c @@ -109,7 +109,7 @@ static int nilfs_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf) goto out; file_update_time(vma->vm_file); - ret = __block_page_mkwrite(vma, vmf, nilfs_get_block); + ret = block_page_mkwrite(vma, vmf, nilfs_get_block); if (ret) { nilfs_transaction_abort(inode->i_sb); goto out; diff --git a/fs/pipe.c b/fs/pipe.c index 8865f7963700..42cf8ddf0e55 100644 --- a/fs/pipe.c +++ b/fs/pipe.c @@ -366,18 +366,17 @@ pipe_write(struct kiocb *iocb, struct iov_iter *from) int offset = buf->offset + buf->len; if (ops->can_merge && offset + chars <= PAGE_SIZE) { - int error = ops->confirm(pipe, buf); - if (error) + ret = ops->confirm(pipe, buf); + if (ret) goto out; ret = copy_page_from_iter(buf->page, offset, chars, from); if (unlikely(ret < chars)) { - error = -EFAULT; + ret = -EFAULT; goto out; } do_wakeup = 1; - buf->len += chars; - ret = chars; + buf->len += ret; if (!iov_iter_count(from)) goto out; } @@ -693,17 +692,20 @@ int create_pipe_files(struct file **res, int flags) d_instantiate(path.dentry, inode); - err = -ENFILE; f = alloc_file(&path, FMODE_WRITE, &pipefifo_fops); - if (IS_ERR(f)) + if (IS_ERR(f)) { + err = PTR_ERR(f); goto err_dentry; + } f->f_flags = O_WRONLY | (flags & (O_NONBLOCK | O_DIRECT)); f->private_data = inode->i_pipe; res[0] = alloc_file(&path, FMODE_READ, &pipefifo_fops); - if (IS_ERR(res[0])) + if (IS_ERR(res[0])) { + err = PTR_ERR(res[0]); goto err_file; + } path_get(&path); res[0]->private_data = inode->i_pipe; diff --git a/fs/xfs/Makefile b/fs/xfs/Makefile index a096841bd06c..f64639176670 100644 --- a/fs/xfs/Makefile +++ b/fs/xfs/Makefile @@ -84,6 +84,7 @@ xfs-y += xfs_aops.o \ xfs_message.o \ xfs_mount.o \ xfs_mru_cache.o \ + xfs_stats.o \ xfs_super.o \ xfs_symlink.o \ xfs_sysfs.o \ @@ -118,7 +119,6 @@ xfs-$(CONFIG_XFS_QUOTA) += xfs_dquot.o \ xfs-$(CONFIG_XFS_RT) += xfs_rtalloc.o xfs-$(CONFIG_XFS_POSIX_ACL) += xfs_acl.o -xfs-$(CONFIG_PROC_FS) += xfs_stats.o xfs-$(CONFIG_SYSCTL) += xfs_sysctl.o xfs-$(CONFIG_COMPAT) += xfs_ioctl32.o xfs-$(CONFIG_NFSD_PNFS) += xfs_pnfs.o diff --git a/fs/xfs/kmem.c b/fs/xfs/kmem.c index a7a3a63bb360..686ba6fb20dd 100644 --- a/fs/xfs/kmem.c +++ b/fs/xfs/kmem.c @@ -55,8 +55,9 @@ kmem_alloc(size_t size, xfs_km_flags_t flags) return ptr; if (!(++retries % 100)) xfs_err(NULL, - "possible memory allocation deadlock in %s (mode:0x%x)", - __func__, lflags); + "%s(%u) possible memory allocation deadlock size %u in %s (mode:0x%x)", + current->comm, current->pid, + (unsigned int)size, __func__, lflags); congestion_wait(BLK_RW_ASYNC, HZ/50); } while (1); } @@ -120,8 +121,9 @@ kmem_zone_alloc(kmem_zone_t *zone, xfs_km_flags_t flags) return ptr; if (!(++retries % 100)) xfs_err(NULL, - "possible memory allocation deadlock in %s (mode:0x%x)", - __func__, lflags); + "%s(%u) possible memory allocation deadlock in %s (mode:0x%x)", + current->comm, current->pid, + __func__, lflags); congestion_wait(BLK_RW_ASYNC, HZ/50); } while (1); } diff --git a/fs/xfs/libxfs/xfs_alloc.c b/fs/xfs/libxfs/xfs_alloc.c index ffad7f20342f..3479294c1d58 100644 --- a/fs/xfs/libxfs/xfs_alloc.c +++ b/fs/xfs/libxfs/xfs_alloc.c @@ -482,7 +482,9 @@ xfs_agfl_verify( be32_to_cpu(agfl->agfl_bno[i]) >= mp->m_sb.sb_agblocks) return false; } - return true; + + return xfs_log_check_lsn(mp, + be64_to_cpu(XFS_BUF_TO_AGFL(bp)->agfl_lsn)); } static void @@ -651,8 +653,8 @@ xfs_alloc_ag_vextent( -((long)(args->len))); } - XFS_STATS_INC(xs_allocx); - XFS_STATS_ADD(xs_allocb, args->len); + XFS_STATS_INC(args->mp, xs_allocx); + XFS_STATS_ADD(args->mp, xs_allocb, args->len); return error; } @@ -1808,8 +1810,8 @@ xfs_free_ag_extent( if (!isfl) xfs_trans_mod_sb(tp, XFS_TRANS_SB_FDBLOCKS, (long)len); - XFS_STATS_INC(xs_freex); - XFS_STATS_ADD(xs_freeb, len); + XFS_STATS_INC(mp, xs_freex); + XFS_STATS_ADD(mp, xs_freeb, len); trace_xfs_free_extent(mp, agno, bno, len, isfl, haveleft, haveright); @@ -2259,9 +2261,13 @@ xfs_agf_verify( { struct xfs_agf *agf = XFS_BUF_TO_AGF(bp); - if (xfs_sb_version_hascrc(&mp->m_sb) && - !uuid_equal(&agf->agf_uuid, &mp->m_sb.sb_meta_uuid)) + if (xfs_sb_version_hascrc(&mp->m_sb)) { + if (!uuid_equal(&agf->agf_uuid, &mp->m_sb.sb_meta_uuid)) return false; + if (!xfs_log_check_lsn(mp, + be64_to_cpu(XFS_BUF_TO_AGF(bp)->agf_lsn))) + return false; + } if (!(agf->agf_magicnum == cpu_to_be32(XFS_AGF_MAGIC) && XFS_AGF_GOOD_VERSION(be32_to_cpu(agf->agf_versionnum)) && @@ -2503,7 +2509,7 @@ xfs_alloc_vextent( * Try near allocation first, then anywhere-in-ag after * the first a.g. fails. */ - if ((args->userdata == XFS_ALLOC_INITIAL_USER_DATA) && + if ((args->userdata & XFS_ALLOC_INITIAL_USER_DATA) && (mp->m_flags & XFS_MOUNT_32BITINODES)) { args->fsbno = XFS_AGB_TO_FSB(mp, ((mp->m_agfrotor / rotorstep) % @@ -2634,6 +2640,14 @@ xfs_alloc_vextent( XFS_AG_CHECK_DADDR(mp, XFS_FSB_TO_DADDR(mp, args->fsbno), args->len); #endif + + /* Zero the extent if we were asked to do so */ + if (args->userdata & XFS_ALLOC_USERDATA_ZERO) { + error = xfs_zero_extent(args->ip, args->fsbno, args->len); + if (error) + goto error0; + } + } xfs_perag_put(args->pag); return 0; diff --git a/fs/xfs/libxfs/xfs_alloc.h b/fs/xfs/libxfs/xfs_alloc.h index ca1c8168373a..0ecde4d5cac8 100644 --- a/fs/xfs/libxfs/xfs_alloc.h +++ b/fs/xfs/libxfs/xfs_alloc.h @@ -101,6 +101,7 @@ typedef struct xfs_alloc_arg { struct xfs_mount *mp; /* file system mount point */ struct xfs_buf *agbp; /* buffer for a.g. freelist header */ struct xfs_perag *pag; /* per-ag struct for this agno */ + struct xfs_inode *ip; /* for userdata zeroing method */ xfs_fsblock_t fsbno; /* file system block number */ xfs_agnumber_t agno; /* allocation group number */ xfs_agblock_t agbno; /* allocation group-relative block # */ @@ -120,15 +121,16 @@ typedef struct xfs_alloc_arg { char wasdel; /* set if allocation was prev delayed */ char wasfromfl; /* set if allocation is from freelist */ char isfl; /* set if is freelist blocks - !acctg */ - char userdata; /* set if this is user data */ + char userdata; /* mask defining userdata treatment */ xfs_fsblock_t firstblock; /* io first block allocated */ } xfs_alloc_arg_t; /* * Defines for userdata */ -#define XFS_ALLOC_USERDATA 1 /* allocation is for user data*/ -#define XFS_ALLOC_INITIAL_USER_DATA 2 /* special case start of file */ +#define XFS_ALLOC_USERDATA (1 << 0)/* allocation is for user data*/ +#define XFS_ALLOC_INITIAL_USER_DATA (1 << 1)/* special case start of file */ +#define XFS_ALLOC_USERDATA_ZERO (1 << 2)/* zero extent on allocation */ xfs_extlen_t xfs_alloc_longest_free_extent(struct xfs_mount *mp, struct xfs_perag *pag, xfs_extlen_t need); diff --git a/fs/xfs/libxfs/xfs_attr.c b/fs/xfs/libxfs/xfs_attr.c index ff065578969f..f949818fa1c7 100644 --- a/fs/xfs/libxfs/xfs_attr.c +++ b/fs/xfs/libxfs/xfs_attr.c @@ -125,7 +125,7 @@ xfs_attr_get( uint lock_mode; int error; - XFS_STATS_INC(xs_attr_get); + XFS_STATS_INC(ip->i_mount, xs_attr_get); if (XFS_FORCED_SHUTDOWN(ip->i_mount)) return -EIO; @@ -209,7 +209,7 @@ xfs_attr_set( int rsvd = (flags & ATTR_ROOT) != 0; int error, err2, committed, local; - XFS_STATS_INC(xs_attr_set); + XFS_STATS_INC(mp, xs_attr_set); if (XFS_FORCED_SHUTDOWN(dp->i_mount)) return -EIO; @@ -412,7 +412,7 @@ xfs_attr_remove( xfs_fsblock_t firstblock; int error; - XFS_STATS_INC(xs_attr_remove); + XFS_STATS_INC(mp, xs_attr_remove); if (XFS_FORCED_SHUTDOWN(dp->i_mount)) return -EIO; diff --git a/fs/xfs/libxfs/xfs_attr_leaf.c b/fs/xfs/libxfs/xfs_attr_leaf.c index 33df52d97ec7..aa187f7ba2dd 100644 --- a/fs/xfs/libxfs/xfs_attr_leaf.c +++ b/fs/xfs/libxfs/xfs_attr_leaf.c @@ -41,6 +41,7 @@ #include "xfs_buf_item.h" #include "xfs_cksum.h" #include "xfs_dir2.h" +#include "xfs_log.h" /* @@ -266,6 +267,8 @@ xfs_attr3_leaf_verify( return false; if (be64_to_cpu(hdr3->info.blkno) != bp->b_bn) return false; + if (!xfs_log_check_lsn(mp, be64_to_cpu(hdr3->info.lsn))) + return false; } else { if (ichdr.magic != XFS_ATTR_LEAF_MAGIC) return false; diff --git a/fs/xfs/libxfs/xfs_attr_remote.c b/fs/xfs/libxfs/xfs_attr_remote.c index f38f9bd81557..5ab95ffa4ae9 100644 --- a/fs/xfs/libxfs/xfs_attr_remote.c +++ b/fs/xfs/libxfs/xfs_attr_remote.c @@ -107,7 +107,7 @@ xfs_attr3_rmt_verify( if (be32_to_cpu(rmt->rm_bytes) > fsbsize - sizeof(*rmt)) return false; if (be32_to_cpu(rmt->rm_offset) + - be32_to_cpu(rmt->rm_bytes) > XATTR_SIZE_MAX) + be32_to_cpu(rmt->rm_bytes) > XFS_XATTR_SIZE_MAX) return false; if (rmt->rm_owner == 0) return false; diff --git a/fs/xfs/libxfs/xfs_bmap.c b/fs/xfs/libxfs/xfs_bmap.c index 8e2010d53b07..119c2422aac7 100644 --- a/fs/xfs/libxfs/xfs_bmap.c +++ b/fs/xfs/libxfs/xfs_bmap.c @@ -948,14 +948,16 @@ xfs_bmap_local_to_extents( bp = xfs_btree_get_bufl(args.mp, tp, args.fsbno, 0); /* - * Initialise the block and copy the data + * Initialize the block, copy the data and log the remote buffer. * - * Note: init_fn must set the buffer log item type correctly! + * The callout is responsible for logging because the remote format + * might differ from the local format and thus we don't know how much to + * log here. Note that init_fn must also set the buffer log item type + * correctly. */ init_fn(tp, bp, ip, ifp); - /* account for the change in fork size and log everything */ - xfs_trans_log_buf(tp, bp, 0, ifp->if_bytes - 1); + /* account for the change in fork size */ xfs_idata_realloc(ip, -ifp->if_bytes, whichfork); xfs_bmap_local_to_extents_empty(ip, whichfork); flags |= XFS_ILOG_CORE; @@ -1435,7 +1437,7 @@ xfs_bmap_search_extents( xfs_ifork_t *ifp; /* inode fork pointer */ xfs_bmbt_rec_host_t *ep; /* extent record pointer */ - XFS_STATS_INC(xs_look_exlist); + XFS_STATS_INC(ip->i_mount, xs_look_exlist); ifp = XFS_IFORK_PTR(ip, fork); ep = xfs_bmap_search_multi_extents(ifp, bno, eofp, lastxp, gotp, prevp); @@ -1732,7 +1734,7 @@ xfs_bmap_add_extent_delay_real( ASSERT(!bma->cur || (bma->cur->bc_private.b.flags & XFS_BTCUR_BPRV_WASDEL)); - XFS_STATS_INC(xs_add_exlist); + XFS_STATS_INC(mp, xs_add_exlist); #define LEFT r[0] #define RIGHT r[1] @@ -2286,7 +2288,7 @@ xfs_bmap_add_extent_unwritten_real( ASSERT(*idx <= ifp->if_bytes / sizeof(struct xfs_bmbt_rec)); ASSERT(!isnullstartblock(new->br_startblock)); - XFS_STATS_INC(xs_add_exlist); + XFS_STATS_INC(mp, xs_add_exlist); #define LEFT r[0] #define RIGHT r[1] @@ -2946,7 +2948,7 @@ xfs_bmap_add_extent_hole_real( ASSERT(!bma->cur || !(bma->cur->bc_private.b.flags & XFS_BTCUR_BPRV_WASDEL)); - XFS_STATS_INC(xs_add_exlist); + XFS_STATS_INC(mp, xs_add_exlist); state = 0; if (whichfork == XFS_ATTR_FORK) @@ -3800,8 +3802,13 @@ xfs_bmap_btalloc( args.wasdel = ap->wasdel; args.isfl = 0; args.userdata = ap->userdata; - if ((error = xfs_alloc_vextent(&args))) + if (ap->userdata & XFS_ALLOC_USERDATA_ZERO) + args.ip = ap->ip; + + error = xfs_alloc_vextent(&args); + if (error) return error; + if (tryagain && args.fsbno == NULLFSBLOCK) { /* * Exact allocation failed. Now try with alignment @@ -4036,7 +4043,7 @@ xfs_bmapi_read( if (XFS_FORCED_SHUTDOWN(mp)) return -EIO; - XFS_STATS_INC(xs_blk_mapr); + XFS_STATS_INC(mp, xs_blk_mapr); ifp = XFS_IFORK_PTR(ip, whichfork); @@ -4221,7 +4228,7 @@ xfs_bmapi_delay( if (XFS_FORCED_SHUTDOWN(mp)) return -EIO; - XFS_STATS_INC(xs_blk_mapw); + XFS_STATS_INC(mp, xs_blk_mapw); if (!(ifp->if_flags & XFS_IFEXTENTS)) { error = xfs_iread_extents(NULL, ip, XFS_DATA_FORK); @@ -4300,11 +4307,14 @@ xfs_bmapi_allocate( /* * Indicate if this is the first user data in the file, or just any - * user data. + * user data. And if it is userdata, indicate whether it needs to + * be initialised to zero during allocation. */ if (!(bma->flags & XFS_BMAPI_METADATA)) { bma->userdata = (bma->offset == 0) ? XFS_ALLOC_INITIAL_USER_DATA : XFS_ALLOC_USERDATA; + if (bma->flags & XFS_BMAPI_ZERO) + bma->userdata |= XFS_ALLOC_USERDATA_ZERO; } bma->minlen = (bma->flags & XFS_BMAPI_CONTIG) ? bma->length : 1; @@ -4419,6 +4429,17 @@ xfs_bmapi_convert_unwritten( mval->br_state = (mval->br_state == XFS_EXT_UNWRITTEN) ? XFS_EXT_NORM : XFS_EXT_UNWRITTEN; + /* + * Before insertion into the bmbt, zero the range being converted + * if required. + */ + if (flags & XFS_BMAPI_ZERO) { + error = xfs_zero_extent(bma->ip, mval->br_startblock, + mval->br_blockcount); + if (error) + return error; + } + error = xfs_bmap_add_extent_unwritten_real(bma->tp, bma->ip, &bma->idx, &bma->cur, mval, bma->firstblock, bma->flist, &tmp_logflags); @@ -4512,6 +4533,18 @@ xfs_bmapi_write( ASSERT(XFS_IFORK_FORMAT(ip, whichfork) != XFS_DINODE_FMT_LOCAL); ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL)); + /* zeroing is for currently only for data extents, not metadata */ + ASSERT((flags & (XFS_BMAPI_METADATA | XFS_BMAPI_ZERO)) != + (XFS_BMAPI_METADATA | XFS_BMAPI_ZERO)); + /* + * we can allocate unwritten extents or pre-zero allocated blocks, + * but it makes no sense to do both at once. This would result in + * zeroing the unwritten extent twice, but it still being an + * unwritten extent.... + */ + ASSERT((flags & (XFS_BMAPI_PREALLOC | XFS_BMAPI_ZERO)) != + (XFS_BMAPI_PREALLOC | XFS_BMAPI_ZERO)); + if (unlikely(XFS_TEST_ERROR( (XFS_IFORK_FORMAT(ip, whichfork) != XFS_DINODE_FMT_EXTENTS && XFS_IFORK_FORMAT(ip, whichfork) != XFS_DINODE_FMT_BTREE), @@ -4525,7 +4558,7 @@ xfs_bmapi_write( ifp = XFS_IFORK_PTR(ip, whichfork); - XFS_STATS_INC(xs_blk_mapw); + XFS_STATS_INC(mp, xs_blk_mapw); if (*firstblock == NULLFSBLOCK) { if (XFS_IFORK_FORMAT(ip, whichfork) == XFS_DINODE_FMT_BTREE) @@ -4718,12 +4751,12 @@ xfs_bmap_del_extent( xfs_filblks_t temp2; /* for indirect length calculations */ int state = 0; - XFS_STATS_INC(xs_del_exlist); + mp = ip->i_mount; + XFS_STATS_INC(mp, xs_del_exlist); if (whichfork == XFS_ATTR_FORK) state |= BMAP_ATTRFORK; - mp = ip->i_mount; ifp = XFS_IFORK_PTR(ip, whichfork); ASSERT((*idx >= 0) && (*idx < ifp->if_bytes / (uint)sizeof(xfs_bmbt_rec_t))); @@ -5070,7 +5103,7 @@ xfs_bunmapi( *done = 1; return 0; } - XFS_STATS_INC(xs_blk_unmap); + XFS_STATS_INC(mp, xs_blk_unmap); isrt = (whichfork == XFS_DATA_FORK) && XFS_IS_REALTIME_INODE(ip); start = bno; bno = start + len - 1; diff --git a/fs/xfs/libxfs/xfs_bmap.h b/fs/xfs/libxfs/xfs_bmap.h index 6aaa0c1c7200..a160f8a5a3fc 100644 --- a/fs/xfs/libxfs/xfs_bmap.h +++ b/fs/xfs/libxfs/xfs_bmap.h @@ -52,9 +52,9 @@ struct xfs_bmalloca { xfs_extlen_t minleft; /* amount must be left after alloc */ bool eof; /* set if allocating past last extent */ bool wasdel; /* replacing a delayed allocation */ - bool userdata;/* set if is user data */ bool aeof; /* allocated space at eof */ bool conv; /* overwriting unwritten extents */ + char userdata;/* userdata mask */ int flags; }; @@ -109,6 +109,14 @@ typedef struct xfs_bmap_free */ #define XFS_BMAPI_CONVERT 0x040 +/* + * allocate zeroed extents - this requires all newly allocated user data extents + * to be initialised to zero. It will be ignored if XFS_BMAPI_METADATA is set. + * Use in conjunction with XFS_BMAPI_CONVERT to convert unwritten extents found + * during the allocation range to zeroed written extents. + */ +#define XFS_BMAPI_ZERO 0x080 + #define XFS_BMAPI_FLAGS \ { XFS_BMAPI_ENTIRE, "ENTIRE" }, \ { XFS_BMAPI_METADATA, "METADATA" }, \ @@ -116,7 +124,8 @@ typedef struct xfs_bmap_free { XFS_BMAPI_PREALLOC, "PREALLOC" }, \ { XFS_BMAPI_IGSTATE, "IGSTATE" }, \ { XFS_BMAPI_CONTIG, "CONTIG" }, \ - { XFS_BMAPI_CONVERT, "CONVERT" } + { XFS_BMAPI_CONVERT, "CONVERT" }, \ + { XFS_BMAPI_ZERO, "ZERO" } static inline int xfs_bmapi_aflag(int w) diff --git a/fs/xfs/libxfs/xfs_btree.c b/fs/xfs/libxfs/xfs_btree.c index f7d7ee7a2607..af1bbee5586e 100644 --- a/fs/xfs/libxfs/xfs_btree.c +++ b/fs/xfs/libxfs/xfs_btree.c @@ -32,6 +32,7 @@ #include "xfs_trace.h" #include "xfs_cksum.h" #include "xfs_alloc.h" +#include "xfs_log.h" /* * Cursor allocation zone. @@ -222,7 +223,7 @@ xfs_btree_check_ptr( * long-form btree header. * * Prior to calculting the CRC, pull the LSN out of the buffer log item and put - * it into the buffer so recovery knows what the last modifcation was that made + * it into the buffer so recovery knows what the last modification was that made * it to disk. */ void @@ -243,8 +244,14 @@ bool xfs_btree_lblock_verify_crc( struct xfs_buf *bp) { - if (xfs_sb_version_hascrc(&bp->b_target->bt_mount->m_sb)) + struct xfs_btree_block *block = XFS_BUF_TO_BLOCK(bp); + struct xfs_mount *mp = bp->b_target->bt_mount; + + if (xfs_sb_version_hascrc(&mp->m_sb)) { + if (!xfs_log_check_lsn(mp, be64_to_cpu(block->bb_u.l.bb_lsn))) + return false; return xfs_buf_verify_cksum(bp, XFS_BTREE_LBLOCK_CRC_OFF); + } return true; } @@ -254,7 +261,7 @@ xfs_btree_lblock_verify_crc( * short-form btree header. * * Prior to calculting the CRC, pull the LSN out of the buffer log item and put - * it into the buffer so recovery knows what the last modifcation was that made + * it into the buffer so recovery knows what the last modification was that made * it to disk. */ void @@ -275,8 +282,14 @@ bool xfs_btree_sblock_verify_crc( struct xfs_buf *bp) { - if (xfs_sb_version_hascrc(&bp->b_target->bt_mount->m_sb)) + struct xfs_btree_block *block = XFS_BUF_TO_BLOCK(bp); + struct xfs_mount *mp = bp->b_target->bt_mount; + + if (xfs_sb_version_hascrc(&mp->m_sb)) { + if (!xfs_log_check_lsn(mp, be64_to_cpu(block->bb_u.s.bb_lsn))) + return false; return xfs_buf_verify_cksum(bp, XFS_BTREE_SBLOCK_CRC_OFF); + } return true; } diff --git a/fs/xfs/libxfs/xfs_btree.h b/fs/xfs/libxfs/xfs_btree.h index 8f18bab73ea5..992dec0638f3 100644 --- a/fs/xfs/libxfs/xfs_btree.h +++ b/fs/xfs/libxfs/xfs_btree.h @@ -84,31 +84,38 @@ union xfs_btree_rec { /* * Generic stats interface */ -#define __XFS_BTREE_STATS_INC(type, stat) \ - XFS_STATS_INC(xs_ ## type ## _2_ ## stat) -#define XFS_BTREE_STATS_INC(cur, stat) \ +#define __XFS_BTREE_STATS_INC(mp, type, stat) \ + XFS_STATS_INC(mp, xs_ ## type ## _2_ ## stat) +#define XFS_BTREE_STATS_INC(cur, stat) \ do { \ + struct xfs_mount *__mp = cur->bc_mp; \ switch (cur->bc_btnum) { \ - case XFS_BTNUM_BNO: __XFS_BTREE_STATS_INC(abtb, stat); break; \ - case XFS_BTNUM_CNT: __XFS_BTREE_STATS_INC(abtc, stat); break; \ - case XFS_BTNUM_BMAP: __XFS_BTREE_STATS_INC(bmbt, stat); break; \ - case XFS_BTNUM_INO: __XFS_BTREE_STATS_INC(ibt, stat); break; \ - case XFS_BTNUM_FINO: __XFS_BTREE_STATS_INC(fibt, stat); break; \ + case XFS_BTNUM_BNO: __XFS_BTREE_STATS_INC(__mp, abtb, stat); break; \ + case XFS_BTNUM_CNT: __XFS_BTREE_STATS_INC(__mp, abtc, stat); break; \ + case XFS_BTNUM_BMAP: __XFS_BTREE_STATS_INC(__mp, bmbt, stat); break; \ + case XFS_BTNUM_INO: __XFS_BTREE_STATS_INC(__mp, ibt, stat); break; \ + case XFS_BTNUM_FINO: __XFS_BTREE_STATS_INC(__mp, fibt, stat); break; \ case XFS_BTNUM_MAX: ASSERT(0); /* fucking gcc */ ; break; \ } \ } while (0) -#define __XFS_BTREE_STATS_ADD(type, stat, val) \ - XFS_STATS_ADD(xs_ ## type ## _2_ ## stat, val) +#define __XFS_BTREE_STATS_ADD(mp, type, stat, val) \ + XFS_STATS_ADD(mp, xs_ ## type ## _2_ ## stat, val) #define XFS_BTREE_STATS_ADD(cur, stat, val) \ do { \ + struct xfs_mount *__mp = cur->bc_mp; \ switch (cur->bc_btnum) { \ - case XFS_BTNUM_BNO: __XFS_BTREE_STATS_ADD(abtb, stat, val); break; \ - case XFS_BTNUM_CNT: __XFS_BTREE_STATS_ADD(abtc, stat, val); break; \ - case XFS_BTNUM_BMAP: __XFS_BTREE_STATS_ADD(bmbt, stat, val); break; \ - case XFS_BTNUM_INO: __XFS_BTREE_STATS_ADD(ibt, stat, val); break; \ - case XFS_BTNUM_FINO: __XFS_BTREE_STATS_ADD(fibt, stat, val); break; \ - case XFS_BTNUM_MAX: ASSERT(0); /* fucking gcc */ ; break; \ + case XFS_BTNUM_BNO: \ + __XFS_BTREE_STATS_ADD(__mp, abtb, stat, val); break; \ + case XFS_BTNUM_CNT: \ + __XFS_BTREE_STATS_ADD(__mp, abtc, stat, val); break; \ + case XFS_BTNUM_BMAP: \ + __XFS_BTREE_STATS_ADD(__mp, bmbt, stat, val); break; \ + case XFS_BTNUM_INO: \ + __XFS_BTREE_STATS_ADD(__mp, ibt, stat, val); break; \ + case XFS_BTNUM_FINO: \ + __XFS_BTREE_STATS_ADD(__mp, fibt, stat, val); break; \ + case XFS_BTNUM_MAX: ASSERT(0); /* fucking gcc */ ; break; \ } \ } while (0) diff --git a/fs/xfs/libxfs/xfs_da_btree.c b/fs/xfs/libxfs/xfs_da_btree.c index be43248a5822..e89a0f8f827c 100644 --- a/fs/xfs/libxfs/xfs_da_btree.c +++ b/fs/xfs/libxfs/xfs_da_btree.c @@ -39,6 +39,7 @@ #include "xfs_trace.h" #include "xfs_cksum.h" #include "xfs_buf_item.h" +#include "xfs_log.h" /* * xfs_da_btree.c @@ -150,6 +151,8 @@ xfs_da3_node_verify( return false; if (be64_to_cpu(hdr3->info.blkno) != bp->b_bn) return false; + if (!xfs_log_check_lsn(mp, be64_to_cpu(hdr3->info.lsn))) + return false; } else { if (ichdr.magic != XFS_DA_NODE_MAGIC) return false; @@ -322,6 +325,7 @@ xfs_da3_node_create( if (xfs_sb_version_hascrc(&mp->m_sb)) { struct xfs_da3_node_hdr *hdr3 = bp->b_addr; + memset(hdr3, 0, sizeof(struct xfs_da3_node_hdr)); ichdr.magic = XFS_DA3_NODE_MAGIC; hdr3->info.blkno = cpu_to_be64(bp->b_bn); hdr3->info.owner = cpu_to_be64(args->dp->i_ino); diff --git a/fs/xfs/libxfs/xfs_dir2.c b/fs/xfs/libxfs/xfs_dir2.c index 9de401d297e5..2fb53a5c0a74 100644 --- a/fs/xfs/libxfs/xfs_dir2.c +++ b/fs/xfs/libxfs/xfs_dir2.c @@ -271,7 +271,7 @@ xfs_dir_createname( rval = xfs_dir_ino_validate(tp->t_mountp, inum); if (rval) return rval; - XFS_STATS_INC(xs_dir_create); + XFS_STATS_INC(dp->i_mount, xs_dir_create); } args = kmem_zalloc(sizeof(*args), KM_SLEEP | KM_NOFS); @@ -365,7 +365,7 @@ xfs_dir_lookup( int lock_mode; ASSERT(S_ISDIR(dp->i_d.di_mode)); - XFS_STATS_INC(xs_dir_lookup); + XFS_STATS_INC(dp->i_mount, xs_dir_lookup); /* * We need to use KM_NOFS here so that lockdep will not throw false @@ -444,7 +444,7 @@ xfs_dir_removename( int v; /* type-checking value */ ASSERT(S_ISDIR(dp->i_d.di_mode)); - XFS_STATS_INC(xs_dir_remove); + XFS_STATS_INC(dp->i_mount, xs_dir_remove); args = kmem_zalloc(sizeof(*args), KM_SLEEP | KM_NOFS); if (!args) diff --git a/fs/xfs/libxfs/xfs_dir2_block.c b/fs/xfs/libxfs/xfs_dir2_block.c index 4778d1dd511a..9c10e2b8cfcb 100644 --- a/fs/xfs/libxfs/xfs_dir2_block.c +++ b/fs/xfs/libxfs/xfs_dir2_block.c @@ -34,6 +34,7 @@ #include "xfs_error.h" #include "xfs_trace.h" #include "xfs_cksum.h" +#include "xfs_log.h" /* * Local function prototypes. @@ -71,6 +72,8 @@ xfs_dir3_block_verify( return false; if (be64_to_cpu(hdr3->blkno) != bp->b_bn) return false; + if (!xfs_log_check_lsn(mp, be64_to_cpu(hdr3->lsn))) + return false; } else { if (hdr3->magic != cpu_to_be32(XFS_DIR2_BLOCK_MAGIC)) return false; diff --git a/fs/xfs/libxfs/xfs_dir2_data.c b/fs/xfs/libxfs/xfs_dir2_data.c index 824131e71bc5..af71a84f343c 100644 --- a/fs/xfs/libxfs/xfs_dir2_data.c +++ b/fs/xfs/libxfs/xfs_dir2_data.c @@ -31,6 +31,7 @@ #include "xfs_trans.h" #include "xfs_buf_item.h" #include "xfs_cksum.h" +#include "xfs_log.h" /* * Check the consistency of the data block. @@ -224,6 +225,8 @@ xfs_dir3_data_verify( return false; if (be64_to_cpu(hdr3->blkno) != bp->b_bn) return false; + if (!xfs_log_check_lsn(mp, be64_to_cpu(hdr3->lsn))) + return false; } else { if (hdr3->magic != cpu_to_be32(XFS_DIR2_DATA_MAGIC)) return false; diff --git a/fs/xfs/libxfs/xfs_dir2_leaf.c b/fs/xfs/libxfs/xfs_dir2_leaf.c index f300240ebb8d..3923e1f94697 100644 --- a/fs/xfs/libxfs/xfs_dir2_leaf.c +++ b/fs/xfs/libxfs/xfs_dir2_leaf.c @@ -33,6 +33,7 @@ #include "xfs_trans.h" #include "xfs_buf_item.h" #include "xfs_cksum.h" +#include "xfs_log.h" /* * Local function declarations. @@ -164,6 +165,8 @@ xfs_dir3_leaf_verify( return false; if (be64_to_cpu(leaf3->info.blkno) != bp->b_bn) return false; + if (!xfs_log_check_lsn(mp, be64_to_cpu(leaf3->info.lsn))) + return false; } else { if (leaf->hdr.info.magic != cpu_to_be16(magic)) return false; diff --git a/fs/xfs/libxfs/xfs_dir2_node.c b/fs/xfs/libxfs/xfs_dir2_node.c index cc28e924545b..70b0cb2fd556 100644 --- a/fs/xfs/libxfs/xfs_dir2_node.c +++ b/fs/xfs/libxfs/xfs_dir2_node.c @@ -33,6 +33,7 @@ #include "xfs_trans.h" #include "xfs_buf_item.h" #include "xfs_cksum.h" +#include "xfs_log.h" /* * Function declarations. @@ -97,6 +98,8 @@ xfs_dir3_free_verify( return false; if (be64_to_cpu(hdr3->blkno) != bp->b_bn) return false; + if (!xfs_log_check_lsn(mp, be64_to_cpu(hdr3->lsn))) + return false; } else { if (hdr->magic != cpu_to_be32(XFS_DIR2_FREE_MAGIC)) return false; diff --git a/fs/xfs/libxfs/xfs_format.h b/fs/xfs/libxfs/xfs_format.h index 9590a069e556..8774498ce0ff 100644 --- a/fs/xfs/libxfs/xfs_format.h +++ b/fs/xfs/libxfs/xfs_format.h @@ -60,6 +60,14 @@ struct xfs_ifork; #define XFS_SB_VERSION_MOREBITSBIT 0x8000 /* + * The size of a single extended attribute on disk is limited by + * the size of index values within the attribute entries themselves. + * These are be16 fields, so we can only support attribute data + * sizes up to 2^16 bytes in length. + */ +#define XFS_XATTR_SIZE_MAX (1 << 16) + +/* * Supported feature bit list is just all bits in the versionnum field because * we've used them all up and understand them all. Except, of course, for the * shared superblock bit, which nobody knows what it does and so is unsupported. @@ -1483,13 +1491,17 @@ struct xfs_acl { */ #define XFS_ACL_MAX_ENTRIES(mp) \ (xfs_sb_version_hascrc(&mp->m_sb) \ - ? (XATTR_SIZE_MAX - sizeof(struct xfs_acl)) / \ + ? (XFS_XATTR_SIZE_MAX - sizeof(struct xfs_acl)) / \ sizeof(struct xfs_acl_entry) \ : 25) -#define XFS_ACL_MAX_SIZE(mp) \ +#define XFS_ACL_SIZE(cnt) \ (sizeof(struct xfs_acl) + \ - sizeof(struct xfs_acl_entry) * XFS_ACL_MAX_ENTRIES((mp))) + sizeof(struct xfs_acl_entry) * cnt) + +#define XFS_ACL_MAX_SIZE(mp) \ + XFS_ACL_SIZE(XFS_ACL_MAX_ENTRIES((mp))) + /* On-disk XFS extended attribute names */ #define SGI_ACL_FILE "SGI_ACL_FILE" diff --git a/fs/xfs/libxfs/xfs_fs.h b/fs/xfs/libxfs/xfs_fs.h index 89689c6a43e2..b2b73a998d42 100644 --- a/fs/xfs/libxfs/xfs_fs.h +++ b/fs/xfs/libxfs/xfs_fs.h @@ -490,6 +490,16 @@ typedef struct xfs_swapext #define XFS_FSOP_GOING_FLAGS_NOLOGFLUSH 0x2 /* don't flush log nor data */ /* + * ioctl limits + */ +#ifdef XATTR_LIST_MAX +# define XFS_XATTR_LIST_MAX XATTR_LIST_MAX +#else +# define XFS_XATTR_LIST_MAX 65536 +#endif + + +/* * ioctl commands that are used by Linux filesystems */ #define XFS_IOC_GETXFLAGS FS_IOC_GETFLAGS diff --git a/fs/xfs/libxfs/xfs_ialloc.c b/fs/xfs/libxfs/xfs_ialloc.c index 54deb2d12ac6..70c1db99f6a7 100644 --- a/fs/xfs/libxfs/xfs_ialloc.c +++ b/fs/xfs/libxfs/xfs_ialloc.c @@ -38,6 +38,7 @@ #include "xfs_icreate_item.h" #include "xfs_icache.h" #include "xfs_trace.h" +#include "xfs_log.h" /* @@ -2500,9 +2501,14 @@ xfs_agi_verify( struct xfs_mount *mp = bp->b_target->bt_mount; struct xfs_agi *agi = XFS_BUF_TO_AGI(bp); - if (xfs_sb_version_hascrc(&mp->m_sb) && - !uuid_equal(&agi->agi_uuid, &mp->m_sb.sb_meta_uuid)) + if (xfs_sb_version_hascrc(&mp->m_sb)) { + if (!uuid_equal(&agi->agi_uuid, &mp->m_sb.sb_meta_uuid)) + return false; + if (!xfs_log_check_lsn(mp, + be64_to_cpu(XFS_BUF_TO_AGI(bp)->agi_lsn))) return false; + } + /* * Validate the magic number of the agi block. */ diff --git a/fs/xfs/libxfs/xfs_sb.c b/fs/xfs/libxfs/xfs_sb.c index 47425140f343..a0b071d881a0 100644 --- a/fs/xfs/libxfs/xfs_sb.c +++ b/fs/xfs/libxfs/xfs_sb.c @@ -35,6 +35,7 @@ #include "xfs_bmap_btree.h" #include "xfs_alloc_btree.h" #include "xfs_ialloc_btree.h" +#include "xfs_log.h" /* * Physical superblock buffer manipulations. Shared with libxfs in userspace. @@ -163,6 +164,15 @@ xfs_mount_validate_sb( "Filesystem can not be safely mounted by this kernel."); return -EINVAL; } + } else if (xfs_sb_version_hascrc(sbp)) { + /* + * We can't read verify the sb LSN because the read verifier is + * called before the log is allocated and processed. We know the + * log is set up before write verifier (!check_version) calls, + * so just check it here. + */ + if (!xfs_log_check_lsn(mp, sbp->sb_lsn)) + return -EFSCORRUPTED; } if (xfs_sb_version_has_pquotino(sbp)) { diff --git a/fs/xfs/libxfs/xfs_symlink_remote.c b/fs/xfs/libxfs/xfs_symlink_remote.c index 8f8af05b3f13..cb6fd20a4d3d 100644 --- a/fs/xfs/libxfs/xfs_symlink_remote.c +++ b/fs/xfs/libxfs/xfs_symlink_remote.c @@ -31,6 +31,7 @@ #include "xfs_cksum.h" #include "xfs_trans.h" #include "xfs_buf_item.h" +#include "xfs_log.h" /* @@ -60,6 +61,7 @@ xfs_symlink_hdr_set( if (!xfs_sb_version_hascrc(&mp->m_sb)) return 0; + memset(dsl, 0, sizeof(struct xfs_dsymlink_hdr)); dsl->sl_magic = cpu_to_be32(XFS_SYMLINK_MAGIC); dsl->sl_offset = cpu_to_be32(offset); dsl->sl_bytes = cpu_to_be32(size); @@ -116,6 +118,8 @@ xfs_symlink_verify( return false; if (dsl->sl_owner == 0) return false; + if (!xfs_log_check_lsn(mp, be64_to_cpu(dsl->sl_lsn))) + return false; return true; } @@ -183,6 +187,7 @@ xfs_symlink_local_to_remote( if (!xfs_sb_version_hascrc(&mp->m_sb)) { bp->b_ops = NULL; memcpy(bp->b_addr, ifp->if_u1.if_data, ifp->if_bytes); + xfs_trans_log_buf(tp, bp, 0, ifp->if_bytes - 1); return; } @@ -198,4 +203,6 @@ xfs_symlink_local_to_remote( buf = bp->b_addr; buf += xfs_symlink_hdr_set(mp, ip->i_ino, 0, ifp->if_bytes, bp); memcpy(buf, ifp->if_u1.if_data, ifp->if_bytes); + xfs_trans_log_buf(tp, bp, 0, sizeof(struct xfs_dsymlink_hdr) + + ifp->if_bytes - 1); } diff --git a/fs/xfs/xfs_acl.c b/fs/xfs/xfs_acl.c index 4b641676f258..6bb470fbb8e8 100644 --- a/fs/xfs/xfs_acl.c +++ b/fs/xfs/xfs_acl.c @@ -37,16 +37,19 @@ STATIC struct posix_acl * xfs_acl_from_disk( - struct xfs_acl *aclp, - int max_entries) + const struct xfs_acl *aclp, + int len, + int max_entries) { struct posix_acl_entry *acl_e; struct posix_acl *acl; - struct xfs_acl_entry *ace; + const struct xfs_acl_entry *ace; unsigned int count, i; + if (len < sizeof(*aclp)) + return ERR_PTR(-EFSCORRUPTED); count = be32_to_cpu(aclp->acl_cnt); - if (count > max_entries) + if (count > max_entries || XFS_ACL_SIZE(count) != len) return ERR_PTR(-EFSCORRUPTED); acl = posix_acl_alloc(count, GFP_KERNEL); @@ -160,10 +163,11 @@ xfs_get_acl(struct inode *inode, int type) */ if (error == -ENOATTR) goto out_update_cache; + acl = ERR_PTR(error); goto out; } - acl = xfs_acl_from_disk(xfs_acl, XFS_ACL_MAX_ENTRIES(ip->i_mount)); + acl = xfs_acl_from_disk(xfs_acl, len, XFS_ACL_MAX_ENTRIES(ip->i_mount)); if (IS_ERR(acl)) goto out; diff --git a/fs/xfs/xfs_acl.h b/fs/xfs/xfs_acl.h index 3841b07f27bf..52f8255d6bdf 100644 --- a/fs/xfs/xfs_acl.h +++ b/fs/xfs/xfs_acl.h @@ -20,7 +20,6 @@ struct inode; struct posix_acl; -struct xfs_inode; #ifdef CONFIG_XFS_POSIX_ACL extern struct posix_acl *xfs_get_acl(struct inode *inode, int type); @@ -36,4 +35,7 @@ static inline struct posix_acl *xfs_get_acl(struct inode *inode, int type) # define posix_acl_access_exists(inode) 0 # define posix_acl_default_exists(inode) 0 #endif /* CONFIG_XFS_POSIX_ACL */ + +extern void xfs_forget_acl(struct inode *inode, const char *name, int xflags); + #endif /* __XFS_ACL_H__ */ diff --git a/fs/xfs/xfs_aops.c b/fs/xfs/xfs_aops.c index 50ab2879b9da..29e7e5dd5178 100644 --- a/fs/xfs/xfs_aops.c +++ b/fs/xfs/xfs_aops.c @@ -172,6 +172,12 @@ xfs_setfilesize_ioend( current_set_flags_nested(&tp->t_pflags, PF_FSTRANS); __sb_writers_acquired(VFS_I(ip)->i_sb, SB_FREEZE_FS); + /* we abort the update if there was an IO error */ + if (ioend->io_error) { + xfs_trans_cancel(tp); + return ioend->io_error; + } + return xfs_setfilesize(ip, tp, ioend->io_offset, ioend->io_size); } @@ -212,14 +218,17 @@ xfs_end_io( ioend->io_error = -EIO; goto done; } - if (ioend->io_error) - goto done; /* * For unwritten extents we need to issue transactions to convert a * range to normal written extens after the data I/O has finished. + * Detecting and handling completion IO errors is done individually + * for each case as different cleanup operations need to be performed + * on error. */ if (ioend->io_type == XFS_IO_UNWRITTEN) { + if (ioend->io_error) + goto done; error = xfs_iomap_write_unwritten(ip, ioend->io_offset, ioend->io_size); } else if (ioend->io_append_trans) { @@ -1250,13 +1259,28 @@ xfs_vm_releasepage( * the DIO. There is only going to be one reference to the ioend and its life * cycle is constrained by the DIO completion code. hence we don't need * reference counting here. + * + * Note that for DIO, an IO to the highest supported file block offset (i.e. + * 2^63 - 1FSB bytes) will result in the offset + count overflowing a signed 64 + * bit variable. Hence if we see this overflow, we have to assume that the IO is + * extending the file size. We won't know for sure until IO completion is run + * and the actual max write offset is communicated to the IO completion + * routine. + * + * For DAX page faults, we are preparing to never see unwritten extents here, + * nor should we ever extend the inode size. Hence we will soon have nothing to + * do here for this case, ensuring we don't have to provide an IO completion + * callback to free an ioend that we don't actually need for a fault into the + * page at offset (2^63 - 1FSB) bytes. */ + static void xfs_map_direct( struct inode *inode, struct buffer_head *bh_result, struct xfs_bmbt_irec *imap, - xfs_off_t offset) + xfs_off_t offset, + bool dax_fault) { struct xfs_ioend *ioend; xfs_off_t size = bh_result->b_size; @@ -1269,6 +1293,13 @@ xfs_map_direct( trace_xfs_gbmap_direct(XFS_I(inode), offset, size, type, imap); + if (dax_fault) { + ASSERT(type == XFS_IO_OVERWRITE); + trace_xfs_gbmap_direct_none(XFS_I(inode), offset, size, type, + imap); + return; + } + if (bh_result->b_private) { ioend = bh_result->b_private; ASSERT(ioend->io_size > 0); @@ -1283,7 +1314,8 @@ xfs_map_direct( ioend->io_size, ioend->io_type, imap); } else if (type == XFS_IO_UNWRITTEN || - offset + size > i_size_read(inode)) { + offset + size > i_size_read(inode) || + offset + size < 0) { ioend = xfs_alloc_ioend(inode, type); ioend->io_offset = offset; ioend->io_size = size; @@ -1345,7 +1377,8 @@ __xfs_get_blocks( sector_t iblock, struct buffer_head *bh_result, int create, - bool direct) + bool direct, + bool dax_fault) { struct xfs_inode *ip = XFS_I(inode); struct xfs_mount *mp = ip->i_mount; @@ -1393,18 +1426,20 @@ __xfs_get_blocks( if (error) goto out_unlock; + /* for DAX, we convert unwritten extents directly */ if (create && (!nimaps || (imap.br_startblock == HOLESTARTBLOCK || - imap.br_startblock == DELAYSTARTBLOCK))) { + imap.br_startblock == DELAYSTARTBLOCK) || + (IS_DAX(inode) && ISUNWRITTEN(&imap)))) { if (direct || xfs_get_extsz_hint(ip)) { /* - * Drop the ilock in preparation for starting the block - * allocation transaction. It will be retaken - * exclusively inside xfs_iomap_write_direct for the - * actual allocation. + * xfs_iomap_write_direct() expects the shared lock. It + * is unlocked on return. */ - xfs_iunlock(ip, lockmode); + if (lockmode == XFS_ILOCK_EXCL) + xfs_ilock_demote(ip, lockmode); + error = xfs_iomap_write_direct(ip, offset, size, &imap, nimaps); if (error) @@ -1441,6 +1476,12 @@ __xfs_get_blocks( goto out_unlock; } + if (IS_DAX(inode) && create) { + ASSERT(!ISUNWRITTEN(&imap)); + /* zeroing is not needed at a higher layer */ + new = 0; + } + /* trim mapping down to size requested */ if (direct || size > (1 << inode->i_blkbits)) xfs_map_trim_size(inode, iblock, bh_result, @@ -1458,7 +1499,8 @@ __xfs_get_blocks( set_buffer_unwritten(bh_result); /* direct IO needs special help */ if (create && direct) - xfs_map_direct(inode, bh_result, &imap, offset); + xfs_map_direct(inode, bh_result, &imap, offset, + dax_fault); } /* @@ -1505,7 +1547,7 @@ xfs_get_blocks( struct buffer_head *bh_result, int create) { - return __xfs_get_blocks(inode, iblock, bh_result, create, false); + return __xfs_get_blocks(inode, iblock, bh_result, create, false, false); } int @@ -1515,7 +1557,17 @@ xfs_get_blocks_direct( struct buffer_head *bh_result, int create) { - return __xfs_get_blocks(inode, iblock, bh_result, create, true); + return __xfs_get_blocks(inode, iblock, bh_result, create, true, false); +} + +int +xfs_get_blocks_dax_fault( + struct inode *inode, + sector_t iblock, + struct buffer_head *bh_result, + int create) +{ + return __xfs_get_blocks(inode, iblock, bh_result, create, true, true); } static void @@ -1614,45 +1666,6 @@ xfs_end_io_direct_write( __xfs_end_io_direct_write(inode, ioend, offset, size); } -/* - * For DAX we need a mapping buffer callback for unwritten extent conversion - * when page faults allocate blocks and then zero them. Note that in this - * case the mapping indicated by the ioend may extend beyond EOF. We most - * definitely do not want to extend EOF here, so we trim back the ioend size to - * EOF. - */ -#ifdef CONFIG_FS_DAX -void -xfs_end_io_dax_write( - struct buffer_head *bh, - int uptodate) -{ - struct xfs_ioend *ioend = bh->b_private; - struct inode *inode = ioend->io_inode; - ssize_t size = ioend->io_size; - - ASSERT(IS_DAX(ioend->io_inode)); - - /* if there was an error zeroing, then don't convert it */ - if (!uptodate) - ioend->io_error = -EIO; - - /* - * Trim update to EOF, so we don't extend EOF during unwritten extent - * conversion of partial EOF blocks. - */ - spin_lock(&XFS_I(inode)->i_flags_lock); - if (ioend->io_offset + size > i_size_read(inode)) - size = i_size_read(inode) - ioend->io_offset; - spin_unlock(&XFS_I(inode)->i_flags_lock); - - __xfs_end_io_direct_write(inode, ioend, ioend->io_offset, size); - -} -#else -void xfs_end_io_dax_write(struct buffer_head *bh, int uptodate) { } -#endif - static inline ssize_t xfs_vm_do_dio( struct inode *inode, diff --git a/fs/xfs/xfs_aops.h b/fs/xfs/xfs_aops.h index 86afd1ac7895..f6ffc9ae5ceb 100644 --- a/fs/xfs/xfs_aops.h +++ b/fs/xfs/xfs_aops.h @@ -58,7 +58,8 @@ int xfs_get_blocks(struct inode *inode, sector_t offset, struct buffer_head *map_bh, int create); int xfs_get_blocks_direct(struct inode *inode, sector_t offset, struct buffer_head *map_bh, int create); -void xfs_end_io_dax_write(struct buffer_head *bh, int uptodate); +int xfs_get_blocks_dax_fault(struct inode *inode, sector_t offset, + struct buffer_head *map_bh, int create); extern void xfs_count_page_state(struct page *, int *, int *); diff --git a/fs/xfs/xfs_attr_list.c b/fs/xfs/xfs_attr_list.c index 65fb37a18e92..0ef7c2ed3f8a 100644 --- a/fs/xfs/xfs_attr_list.c +++ b/fs/xfs/xfs_attr_list.c @@ -511,7 +511,7 @@ xfs_attr_list_int( xfs_inode_t *dp = context->dp; uint lock_mode; - XFS_STATS_INC(xs_attr_list); + XFS_STATS_INC(dp->i_mount, xs_attr_list); if (XFS_FORCED_SHUTDOWN(dp->i_mount)) return -EIO; diff --git a/fs/xfs/xfs_bmap_util.c b/fs/xfs/xfs_bmap_util.c index 3bf4ad0d19e4..dbae6490a79a 100644 --- a/fs/xfs/xfs_bmap_util.c +++ b/fs/xfs/xfs_bmap_util.c @@ -57,6 +57,35 @@ xfs_fsb_to_db(struct xfs_inode *ip, xfs_fsblock_t fsb) } /* + * Routine to zero an extent on disk allocated to the specific inode. + * + * The VFS functions take a linearised filesystem block offset, so we have to + * convert the sparse xfs fsb to the right format first. + * VFS types are real funky, too. + */ +int +xfs_zero_extent( + struct xfs_inode *ip, + xfs_fsblock_t start_fsb, + xfs_off_t count_fsb) +{ + struct xfs_mount *mp = ip->i_mount; + xfs_daddr_t sector = xfs_fsb_to_db(ip, start_fsb); + sector_t block = XFS_BB_TO_FSBT(mp, sector); + ssize_t size = XFS_FSB_TO_B(mp, count_fsb); + + if (IS_DAX(VFS_I(ip))) + return dax_clear_blocks(VFS_I(ip), block, size); + + /* + * let the block layer decide on the fastest method of + * implementing the zeroing. + */ + return sb_issue_zeroout(mp->m_super, block, count_fsb, GFP_NOFS); + +} + +/* * Routine to be called at transaction's end by xfs_bmapi, xfs_bunmapi * caller. Frees all the extents that need freeing, which must be done * last due to locking considerations. We never free any extents in @@ -229,6 +258,13 @@ xfs_bmap_rtalloc( xfs_trans_mod_dquot_byino(ap->tp, ap->ip, ap->wasdel ? XFS_TRANS_DQ_DELRTBCOUNT : XFS_TRANS_DQ_RTBCOUNT, (long) ralen); + + /* Zero the extent if we were asked to do so */ + if (ap->userdata & XFS_ALLOC_USERDATA_ZERO) { + error = xfs_zero_extent(ap->ip, ap->blkno, ap->length); + if (error) + return error; + } } else { ap->length = 0; } @@ -1027,7 +1063,7 @@ xfs_alloc_file_space( xfs_bmap_init(&free_list, &firstfsb); error = xfs_bmapi_write(tp, ip, startoffset_fsb, allocatesize_fsb, alloc_type, &firstfsb, - 0, imapp, &nimaps, &free_list); + resblks, imapp, &nimaps, &free_list); if (error) { goto error0; } diff --git a/fs/xfs/xfs_buf.c b/fs/xfs/xfs_buf.c index 8ecffb35935b..3243cdf97f33 100644 --- a/fs/xfs/xfs_buf.c +++ b/fs/xfs/xfs_buf.c @@ -201,7 +201,7 @@ _xfs_buf_alloc( atomic_set(&bp->b_pin_count, 0); init_waitqueue_head(&bp->b_waiters); - XFS_STATS_INC(xb_create); + XFS_STATS_INC(target->bt_mount, xb_create); trace_xfs_buf_init(bp, _RET_IP_); return bp; @@ -354,15 +354,16 @@ retry: */ if (!(++retries % 100)) xfs_err(NULL, - "possible memory allocation deadlock in %s (mode:0x%x)", + "%s(%u) possible memory allocation deadlock in %s (mode:0x%x)", + current->comm, current->pid, __func__, gfp_mask); - XFS_STATS_INC(xb_page_retries); + XFS_STATS_INC(bp->b_target->bt_mount, xb_page_retries); congestion_wait(BLK_RW_ASYNC, HZ/50); goto retry; } - XFS_STATS_INC(xb_page_found); + XFS_STATS_INC(bp->b_target->bt_mount, xb_page_found); nbytes = min_t(size_t, size, PAGE_SIZE - offset); size -= nbytes; @@ -516,7 +517,7 @@ _xfs_buf_find( new_bp->b_pag = pag; spin_unlock(&pag->pag_buf_lock); } else { - XFS_STATS_INC(xb_miss_locked); + XFS_STATS_INC(btp->bt_mount, xb_miss_locked); spin_unlock(&pag->pag_buf_lock); xfs_perag_put(pag); } @@ -529,11 +530,11 @@ found: if (!xfs_buf_trylock(bp)) { if (flags & XBF_TRYLOCK) { xfs_buf_rele(bp); - XFS_STATS_INC(xb_busy_locked); + XFS_STATS_INC(btp->bt_mount, xb_busy_locked); return NULL; } xfs_buf_lock(bp); - XFS_STATS_INC(xb_get_locked_waited); + XFS_STATS_INC(btp->bt_mount, xb_get_locked_waited); } /* @@ -549,7 +550,7 @@ found: } trace_xfs_buf_find(bp, flags, _RET_IP_); - XFS_STATS_INC(xb_get_locked); + XFS_STATS_INC(btp->bt_mount, xb_get_locked); return bp; } @@ -603,7 +604,7 @@ found: } } - XFS_STATS_INC(xb_get); + XFS_STATS_INC(target->bt_mount, xb_get); trace_xfs_buf_get(bp, flags, _RET_IP_); return bp; } @@ -643,7 +644,7 @@ xfs_buf_read_map( trace_xfs_buf_read(bp, flags, _RET_IP_); if (!XFS_BUF_ISDONE(bp)) { - XFS_STATS_INC(xb_get_read); + XFS_STATS_INC(target->bt_mount, xb_get_read); bp->b_ops = ops; _xfs_buf_read(bp, flags); } else if (flags & XBF_ASYNC) { diff --git a/fs/xfs/xfs_dir2_readdir.c b/fs/xfs/xfs_dir2_readdir.c index a989a9c7edb7..642d55d10075 100644 --- a/fs/xfs/xfs_dir2_readdir.c +++ b/fs/xfs/xfs_dir2_readdir.c @@ -666,7 +666,7 @@ xfs_readdir( return -EIO; ASSERT(S_ISDIR(dp->i_d.di_mode)); - XFS_STATS_INC(xs_dir_getdents); + XFS_STATS_INC(dp->i_mount, xs_dir_getdents); args.dp = dp; args.geo = dp->i_mount->m_dir_geo; diff --git a/fs/xfs/xfs_dquot.c b/fs/xfs/xfs_dquot.c index 30cb3afb67f0..7ac6c5c586cb 100644 --- a/fs/xfs/xfs_dquot.c +++ b/fs/xfs/xfs_dquot.c @@ -75,9 +75,9 @@ xfs_qm_dqdestroy( ASSERT(list_empty(&dqp->q_lru)); mutex_destroy(&dqp->q_qlock); - kmem_zone_free(xfs_qm_dqzone, dqp); - XFS_STATS_DEC(xs_qm_dquot); + XFS_STATS_DEC(dqp->q_mount, xs_qm_dquot); + kmem_zone_free(xfs_qm_dqzone, dqp); } /* @@ -605,7 +605,7 @@ xfs_qm_dqread( break; } - XFS_STATS_INC(xs_qm_dquot); + XFS_STATS_INC(mp, xs_qm_dquot); trace_xfs_dqread(dqp); @@ -747,12 +747,12 @@ restart: mutex_unlock(&qi->qi_tree_lock); trace_xfs_dqget_hit(dqp); - XFS_STATS_INC(xs_qm_dqcachehits); + XFS_STATS_INC(mp, xs_qm_dqcachehits); *O_dqpp = dqp; return 0; } mutex_unlock(&qi->qi_tree_lock); - XFS_STATS_INC(xs_qm_dqcachemisses); + XFS_STATS_INC(mp, xs_qm_dqcachemisses); /* * Dquot cache miss. We don't want to keep the inode lock across @@ -806,7 +806,7 @@ restart: mutex_unlock(&qi->qi_tree_lock); trace_xfs_dqget_dup(dqp); xfs_qm_dqdestroy(dqp); - XFS_STATS_INC(xs_qm_dquot_dups); + XFS_STATS_INC(mp, xs_qm_dquot_dups); goto restart; } @@ -846,7 +846,7 @@ xfs_qm_dqput( trace_xfs_dqput_free(dqp); if (list_lru_add(&qi->qi_lru, &dqp->q_lru)) - XFS_STATS_INC(xs_qm_dquot_unused); + XFS_STATS_INC(dqp->q_mount, xs_qm_dquot_unused); } xfs_dqunlock(dqp); } diff --git a/fs/xfs/xfs_file.c b/fs/xfs/xfs_file.c index e78feb400e22..f5392ab2def1 100644 --- a/fs/xfs/xfs_file.c +++ b/fs/xfs/xfs_file.c @@ -242,19 +242,30 @@ xfs_file_fsync( } /* - * All metadata updates are logged, which means that we just have - * to flush the log up to the latest LSN that touched the inode. + * All metadata updates are logged, which means that we just have to + * flush the log up to the latest LSN that touched the inode. If we have + * concurrent fsync/fdatasync() calls, we need them to all block on the + * log force before we clear the ili_fsync_fields field. This ensures + * that we don't get a racing sync operation that does not wait for the + * metadata to hit the journal before returning. If we race with + * clearing the ili_fsync_fields, then all that will happen is the log + * force will do nothing as the lsn will already be on disk. We can't + * race with setting ili_fsync_fields because that is done under + * XFS_ILOCK_EXCL, and that can't happen because we hold the lock shared + * until after the ili_fsync_fields is cleared. */ xfs_ilock(ip, XFS_ILOCK_SHARED); if (xfs_ipincount(ip)) { if (!datasync || - (ip->i_itemp->ili_fields & ~XFS_ILOG_TIMESTAMP)) + (ip->i_itemp->ili_fsync_fields & ~XFS_ILOG_TIMESTAMP)) lsn = ip->i_itemp->ili_last_lsn; } - xfs_iunlock(ip, XFS_ILOCK_SHARED); - if (lsn) + if (lsn) { error = _xfs_log_force_lsn(mp, lsn, XFS_LOG_SYNC, &log_flushed); + ip->i_itemp->ili_fsync_fields = 0; + } + xfs_iunlock(ip, XFS_ILOCK_SHARED); /* * If we only have a single device, and the log force about was @@ -287,7 +298,7 @@ xfs_file_read_iter( xfs_fsize_t n; loff_t pos = iocb->ki_pos; - XFS_STATS_INC(xs_read_calls); + XFS_STATS_INC(mp, xs_read_calls); if (unlikely(iocb->ki_flags & IOCB_DIRECT)) ioflags |= XFS_IO_ISDIRECT; @@ -365,7 +376,7 @@ xfs_file_read_iter( ret = generic_file_read_iter(iocb, to); if (ret > 0) - XFS_STATS_ADD(xs_read_bytes, ret); + XFS_STATS_ADD(mp, xs_read_bytes, ret); xfs_rw_iunlock(ip, XFS_IOLOCK_SHARED); return ret; @@ -383,7 +394,7 @@ xfs_file_splice_read( int ioflags = 0; ssize_t ret; - XFS_STATS_INC(xs_read_calls); + XFS_STATS_INC(ip->i_mount, xs_read_calls); if (infilp->f_mode & FMODE_NOCMTIME) ioflags |= XFS_IO_INVIS; @@ -401,7 +412,7 @@ xfs_file_splice_read( else ret = generic_file_splice_read(infilp, ppos, pipe, count, flags); if (ret > 0) - XFS_STATS_ADD(xs_read_bytes, ret); + XFS_STATS_ADD(ip->i_mount, xs_read_bytes, ret); xfs_rw_iunlock(ip, XFS_IOLOCK_SHARED); return ret; @@ -482,6 +493,8 @@ xfs_zero_eof( ASSERT(xfs_isilocked(ip, XFS_IOLOCK_EXCL)); ASSERT(offset > isize); + trace_xfs_zero_eof(ip, isize, offset - isize); + /* * First handle zeroing the block on which isize resides. * @@ -574,6 +587,7 @@ xfs_file_aio_write_checks( struct xfs_inode *ip = XFS_I(inode); ssize_t error = 0; size_t count = iov_iter_count(from); + bool drained_dio = false; restart: error = generic_write_checks(iocb, from); @@ -611,12 +625,13 @@ restart: bool zero = false; spin_unlock(&ip->i_flags_lock); - if (*iolock == XFS_IOLOCK_SHARED) { - xfs_rw_iunlock(ip, *iolock); - *iolock = XFS_IOLOCK_EXCL; - xfs_rw_ilock(ip, *iolock); - iov_iter_reexpand(from, count); - + if (!drained_dio) { + if (*iolock == XFS_IOLOCK_SHARED) { + xfs_rw_iunlock(ip, *iolock); + *iolock = XFS_IOLOCK_EXCL; + xfs_rw_ilock(ip, *iolock); + iov_iter_reexpand(from, count); + } /* * We now have an IO submission barrier in place, but * AIO can do EOF updates during IO completion and hence @@ -626,6 +641,7 @@ restart: * no-op. */ inode_dio_wait(inode); + drained_dio = true; goto restart; } error = xfs_zero_eof(ip, iocb->ki_pos, i_size_read(inode), &zero); @@ -867,7 +883,7 @@ xfs_file_write_iter( ssize_t ret; size_t ocount = iov_iter_count(from); - XFS_STATS_INC(xs_write_calls); + XFS_STATS_INC(ip->i_mount, xs_write_calls); if (ocount == 0) return 0; @@ -883,7 +899,7 @@ xfs_file_write_iter( if (ret > 0) { ssize_t err; - XFS_STATS_ADD(xs_write_bytes, ret); + XFS_STATS_ADD(ip->i_mount, xs_write_bytes, ret); /* Handle various SYNC-type writes */ err = generic_write_sync(file, iocb->ki_pos - ret, ret); @@ -1477,7 +1493,7 @@ xfs_file_llseek( * * mmap_sem (MM) * sb_start_pagefault(vfs, freeze) - * i_mmap_lock (XFS - truncate serialisation) + * i_mmaplock (XFS - truncate serialisation) * page_lock (MM) * i_lock (XFS - extent map serialisation) */ @@ -1503,10 +1519,9 @@ xfs_filemap_page_mkwrite( xfs_ilock(XFS_I(inode), XFS_MMAPLOCK_SHARED); if (IS_DAX(inode)) { - ret = __dax_mkwrite(vma, vmf, xfs_get_blocks_direct, - xfs_end_io_dax_write); + ret = __dax_mkwrite(vma, vmf, xfs_get_blocks_dax_fault, NULL); } else { - ret = __block_page_mkwrite(vma, vmf, xfs_get_blocks); + ret = block_page_mkwrite(vma, vmf, xfs_get_blocks); ret = block_page_mkwrite_return(ret); } @@ -1538,7 +1553,7 @@ xfs_filemap_fault( * changes to xfs_get_blocks_direct() to map unwritten extent * ioend for conversion on read-only mappings. */ - ret = __dax_fault(vma, vmf, xfs_get_blocks_direct, NULL); + ret = __dax_fault(vma, vmf, xfs_get_blocks_dax_fault, NULL); } else ret = filemap_fault(vma, vmf); xfs_iunlock(XFS_I(inode), XFS_MMAPLOCK_SHARED); @@ -1546,6 +1561,13 @@ xfs_filemap_fault( return ret; } +/* + * Similar to xfs_filemap_fault(), the DAX fault path can call into here on + * both read and write faults. Hence we need to handle both cases. There is no + * ->pmd_mkwrite callout for huge pages, so we have a single function here to + * handle both cases here. @flags carries the information on the type of fault + * occuring. + */ STATIC int xfs_filemap_pmd_fault( struct vm_area_struct *vma, @@ -1562,15 +1584,54 @@ xfs_filemap_pmd_fault( trace_xfs_filemap_pmd_fault(ip); - sb_start_pagefault(inode->i_sb); - file_update_time(vma->vm_file); + if (flags & FAULT_FLAG_WRITE) { + sb_start_pagefault(inode->i_sb); + file_update_time(vma->vm_file); + } + xfs_ilock(XFS_I(inode), XFS_MMAPLOCK_SHARED); - ret = __dax_pmd_fault(vma, addr, pmd, flags, xfs_get_blocks_direct, - xfs_end_io_dax_write); + ret = __dax_pmd_fault(vma, addr, pmd, flags, xfs_get_blocks_dax_fault, + NULL); xfs_iunlock(XFS_I(inode), XFS_MMAPLOCK_SHARED); - sb_end_pagefault(inode->i_sb); + if (flags & FAULT_FLAG_WRITE) + sb_end_pagefault(inode->i_sb); + + return ret; +} + +/* + * pfn_mkwrite was originally inteneded to ensure we capture time stamp + * updates on write faults. In reality, it's need to serialise against + * truncate similar to page_mkwrite. Hence we open-code dax_pfn_mkwrite() + * here and cycle the XFS_MMAPLOCK_SHARED to ensure we serialise the fault + * barrier in place. + */ +static int +xfs_filemap_pfn_mkwrite( + struct vm_area_struct *vma, + struct vm_fault *vmf) +{ + + struct inode *inode = file_inode(vma->vm_file); + struct xfs_inode *ip = XFS_I(inode); + int ret = VM_FAULT_NOPAGE; + loff_t size; + + trace_xfs_filemap_pfn_mkwrite(ip); + + sb_start_pagefault(inode->i_sb); + file_update_time(vma->vm_file); + + /* check if the faulting page hasn't raced with truncate */ + xfs_ilock(ip, XFS_MMAPLOCK_SHARED); + size = (i_size_read(inode) + PAGE_SIZE - 1) >> PAGE_SHIFT; + if (vmf->pgoff >= size) + ret = VM_FAULT_SIGBUS; + xfs_iunlock(ip, XFS_MMAPLOCK_SHARED); + sb_end_pagefault(inode->i_sb); return ret; + } static const struct vm_operations_struct xfs_file_vm_ops = { @@ -1578,6 +1639,7 @@ static const struct vm_operations_struct xfs_file_vm_ops = { .pmd_fault = xfs_filemap_pmd_fault, .map_pages = filemap_map_pages, .page_mkwrite = xfs_filemap_page_mkwrite, + .pfn_mkwrite = xfs_filemap_pfn_mkwrite, }; STATIC int diff --git a/fs/xfs/xfs_icache.c b/fs/xfs/xfs_icache.c index 0a326bd64d4e..d7a490f24ead 100644 --- a/fs/xfs/xfs_icache.c +++ b/fs/xfs/xfs_icache.c @@ -63,7 +63,7 @@ xfs_inode_alloc( return NULL; } - XFS_STATS_INC(vn_active); + XFS_STATS_INC(mp, vn_active); ASSERT(atomic_read(&ip->i_pincount) == 0); ASSERT(!spin_is_locked(&ip->i_flags_lock)); ASSERT(!xfs_isiflocked(ip)); @@ -129,7 +129,7 @@ xfs_inode_free( /* asserts to verify all state is correct here */ ASSERT(atomic_read(&ip->i_pincount) == 0); ASSERT(!xfs_isiflocked(ip)); - XFS_STATS_DEC(vn_active); + XFS_STATS_DEC(ip->i_mount, vn_active); call_rcu(&VFS_I(ip)->i_rcu, xfs_inode_free_callback); } @@ -159,7 +159,7 @@ xfs_iget_cache_hit( spin_lock(&ip->i_flags_lock); if (ip->i_ino != ino) { trace_xfs_iget_skip(ip); - XFS_STATS_INC(xs_ig_frecycle); + XFS_STATS_INC(mp, xs_ig_frecycle); error = -EAGAIN; goto out_error; } @@ -177,7 +177,7 @@ xfs_iget_cache_hit( */ if (ip->i_flags & (XFS_INEW|XFS_IRECLAIM)) { trace_xfs_iget_skip(ip); - XFS_STATS_INC(xs_ig_frecycle); + XFS_STATS_INC(mp, xs_ig_frecycle); error = -EAGAIN; goto out_error; } @@ -259,7 +259,7 @@ xfs_iget_cache_hit( xfs_ilock(ip, lock_flags); xfs_iflags_clear(ip, XFS_ISTALE | XFS_IDONTCACHE); - XFS_STATS_INC(xs_ig_found); + XFS_STATS_INC(mp, xs_ig_found); return 0; @@ -342,7 +342,7 @@ xfs_iget_cache_miss( error = radix_tree_insert(&pag->pag_ici_root, agino, ip); if (unlikely(error)) { WARN_ON(error != -EEXIST); - XFS_STATS_INC(xs_ig_dup); + XFS_STATS_INC(mp, xs_ig_dup); error = -EAGAIN; goto out_preload_end; } @@ -412,7 +412,7 @@ xfs_iget( if (!ino || XFS_INO_TO_AGNO(mp, ino) >= mp->m_sb.sb_agcount) return -EINVAL; - XFS_STATS_INC(xs_ig_attempts); + XFS_STATS_INC(mp, xs_ig_attempts); /* get the perag structure and ensure that it's inode capable */ pag = xfs_perag_get(mp, XFS_INO_TO_AGNO(mp, ino)); @@ -429,7 +429,7 @@ again: goto out_error_or_again; } else { rcu_read_unlock(); - XFS_STATS_INC(xs_ig_missed); + XFS_STATS_INC(mp, xs_ig_missed); error = xfs_iget_cache_miss(mp, pag, tp, ino, &ip, flags, lock_flags); @@ -965,7 +965,7 @@ reclaim: xfs_ifunlock(ip); xfs_iunlock(ip, XFS_ILOCK_EXCL); - XFS_STATS_INC(xs_ig_reclaims); + XFS_STATS_INC(ip->i_mount, xs_ig_reclaims); /* * Remove the inode from the per-AG radix tree. * diff --git a/fs/xfs/xfs_inode.c b/fs/xfs/xfs_inode.c index dc40a6d5ae0d..8ee393996b7d 100644 --- a/fs/xfs/xfs_inode.c +++ b/fs/xfs/xfs_inode.c @@ -2365,6 +2365,7 @@ retry: iip->ili_last_fields = iip->ili_fields; iip->ili_fields = 0; + iip->ili_fsync_fields = 0; iip->ili_logged = 1; xfs_trans_ail_copy_lsn(mp->m_ail, &iip->ili_flush_lsn, &iip->ili_item.li_lsn); @@ -3271,8 +3272,8 @@ xfs_iflush_cluster( } if (clcount) { - XFS_STATS_INC(xs_icluster_flushcnt); - XFS_STATS_ADD(xs_icluster_flushinode, clcount); + XFS_STATS_INC(mp, xs_icluster_flushcnt); + XFS_STATS_ADD(mp, xs_icluster_flushinode, clcount); } out_free: @@ -3345,7 +3346,7 @@ xfs_iflush( struct xfs_dinode *dip; int error; - XFS_STATS_INC(xs_iflush_count); + XFS_STATS_INC(mp, xs_iflush_count); ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL|XFS_ILOCK_SHARED)); ASSERT(xfs_isiflocked(ip)); @@ -3560,6 +3561,7 @@ xfs_iflush_int( */ iip->ili_last_fields = iip->ili_fields; iip->ili_fields = 0; + iip->ili_fsync_fields = 0; iip->ili_logged = 1; xfs_trans_ail_copy_lsn(mp->m_ail, &iip->ili_flush_lsn, diff --git a/fs/xfs/xfs_inode_item.c b/fs/xfs/xfs_inode_item.c index 62bd80f4edd9..d14b12b8cfef 100644 --- a/fs/xfs/xfs_inode_item.c +++ b/fs/xfs/xfs_inode_item.c @@ -719,6 +719,7 @@ xfs_iflush_abort( * attempted. */ iip->ili_fields = 0; + iip->ili_fsync_fields = 0; } /* * Release the inode's flush lock since we're done with it. diff --git a/fs/xfs/xfs_inode_item.h b/fs/xfs/xfs_inode_item.h index 488d81254e28..4c7722e325b3 100644 --- a/fs/xfs/xfs_inode_item.h +++ b/fs/xfs/xfs_inode_item.h @@ -34,6 +34,7 @@ typedef struct xfs_inode_log_item { unsigned short ili_logged; /* flushed logged data */ unsigned int ili_last_fields; /* fields when flushed */ unsigned int ili_fields; /* fields to be logged */ + unsigned int ili_fsync_fields; /* logged since last fsync */ } xfs_inode_log_item_t; static inline int xfs_inode_clean(xfs_inode_t *ip) diff --git a/fs/xfs/xfs_ioctl.c b/fs/xfs/xfs_ioctl.c index ea7d85af5310..d42738deec6d 100644 --- a/fs/xfs/xfs_ioctl.c +++ b/fs/xfs/xfs_ioctl.c @@ -40,6 +40,7 @@ #include "xfs_symlink.h" #include "xfs_trans.h" #include "xfs_pnfs.h" +#include "xfs_acl.h" #include <linux/capability.h> #include <linux/dcache.h> @@ -411,7 +412,7 @@ xfs_attrlist_by_handle( if (copy_from_user(&al_hreq, arg, sizeof(xfs_fsop_attrlist_handlereq_t))) return -EFAULT; if (al_hreq.buflen < sizeof(struct attrlist) || - al_hreq.buflen > XATTR_LIST_MAX) + al_hreq.buflen > XFS_XATTR_LIST_MAX) return -EINVAL; /* @@ -455,7 +456,7 @@ xfs_attrmulti_attr_get( unsigned char *kbuf; int error = -EFAULT; - if (*len > XATTR_SIZE_MAX) + if (*len > XFS_XATTR_SIZE_MAX) return -EINVAL; kbuf = kmem_zalloc_large(*len, KM_SLEEP); if (!kbuf) @@ -482,17 +483,22 @@ xfs_attrmulti_attr_set( __uint32_t flags) { unsigned char *kbuf; + int error; if (IS_IMMUTABLE(inode) || IS_APPEND(inode)) return -EPERM; - if (len > XATTR_SIZE_MAX) + if (len > XFS_XATTR_SIZE_MAX) return -EINVAL; kbuf = memdup_user(ubuf, len); if (IS_ERR(kbuf)) return PTR_ERR(kbuf); - return xfs_attr_set(XFS_I(inode), name, kbuf, len, flags); + error = xfs_attr_set(XFS_I(inode), name, kbuf, len, flags); + if (!error) + xfs_forget_acl(inode, name, flags); + kfree(kbuf); + return error; } int @@ -501,9 +507,14 @@ xfs_attrmulti_attr_remove( unsigned char *name, __uint32_t flags) { + int error; + if (IS_IMMUTABLE(inode) || IS_APPEND(inode)) return -EPERM; - return xfs_attr_remove(XFS_I(inode), name, flags); + error = xfs_attr_remove(XFS_I(inode), name, flags); + if (!error) + xfs_forget_acl(inode, name, flags); + return error; } STATIC int @@ -1028,7 +1039,7 @@ xfs_ioctl_setattr_xflags( xfs_diflags_to_linux(ip); xfs_trans_ichgtime(tp, ip, XFS_ICHGTIME_CHG); xfs_trans_log_inode(tp, ip, XFS_ILOG_CORE); - XFS_STATS_INC(xs_ig_attrchg); + XFS_STATS_INC(mp, xs_ig_attrchg); return 0; } diff --git a/fs/xfs/xfs_ioctl32.c b/fs/xfs/xfs_ioctl32.c index b88bdc85dd3d..1a05d8ae327d 100644 --- a/fs/xfs/xfs_ioctl32.c +++ b/fs/xfs/xfs_ioctl32.c @@ -356,7 +356,7 @@ xfs_compat_attrlist_by_handle( sizeof(compat_xfs_fsop_attrlist_handlereq_t))) return -EFAULT; if (al_hreq.buflen < sizeof(struct attrlist) || - al_hreq.buflen > XATTR_LIST_MAX) + al_hreq.buflen > XFS_XATTR_LIST_MAX) return -EINVAL; /* diff --git a/fs/xfs/xfs_iomap.c b/fs/xfs/xfs_iomap.c index 1f86033171c8..f4f5b43cf647 100644 --- a/fs/xfs/xfs_iomap.c +++ b/fs/xfs/xfs_iomap.c @@ -131,20 +131,30 @@ xfs_iomap_write_direct( uint qblocks, resblks, resrtextents; int committed; int error; - - error = xfs_qm_dqattach(ip, 0); - if (error) - return error; + int lockmode; + int bmapi_flags = XFS_BMAPI_PREALLOC; rt = XFS_IS_REALTIME_INODE(ip); extsz = xfs_get_extsz_hint(ip); + lockmode = XFS_ILOCK_SHARED; /* locked by caller */ + + ASSERT(xfs_isilocked(ip, lockmode)); offset_fsb = XFS_B_TO_FSBT(mp, offset); last_fsb = XFS_B_TO_FSB(mp, ((xfs_ufsize_t)(offset + count))); if ((offset + count) > XFS_ISIZE(ip)) { + /* + * Assert that the in-core extent list is present since this can + * call xfs_iread_extents() and we only have the ilock shared. + * This should be safe because the lock was held around a bmapi + * call in the caller and we only need it to access the in-core + * list. + */ + ASSERT(XFS_IFORK_PTR(ip, XFS_DATA_FORK)->if_flags & + XFS_IFEXTENTS); error = xfs_iomap_eof_align_last_fsb(mp, ip, extsz, &last_fsb); if (error) - return error; + goto out_unlock; } else { if (nmaps && (imap->br_startblock == HOLESTARTBLOCK)) last_fsb = MIN(last_fsb, (xfs_fileoff_t) @@ -174,9 +184,35 @@ xfs_iomap_write_direct( } /* + * Drop the shared lock acquired by the caller, attach the dquot if + * necessary and move on to transaction setup. + */ + xfs_iunlock(ip, lockmode); + error = xfs_qm_dqattach(ip, 0); + if (error) + return error; + + /* * Allocate and setup the transaction */ tp = xfs_trans_alloc(mp, XFS_TRANS_DIOSTRAT); + + /* + * For DAX, we do not allocate unwritten extents, but instead we zero + * the block before we commit the transaction. Ideally we'd like to do + * this outside the transaction context, but if we commit and then crash + * we may not have zeroed the blocks and this will be exposed on + * recovery of the allocation. Hence we must zero before commit. + * Further, if we are mapping unwritten extents here, we need to zero + * and convert them to written so that we don't need an unwritten extent + * callback for DAX. This also means that we need to be able to dip into + * the reserve block pool if there is no space left but we need to do + * unwritten extent conversion. + */ + if (IS_DAX(VFS_I(ip))) { + bmapi_flags = XFS_BMAPI_CONVERT | XFS_BMAPI_ZERO; + tp->t_flags |= XFS_TRANS_RESERVE; + } error = xfs_trans_reserve(tp, &M_RES(mp)->tr_write, resblks, resrtextents); /* @@ -187,7 +223,8 @@ xfs_iomap_write_direct( return error; } - xfs_ilock(ip, XFS_ILOCK_EXCL); + lockmode = XFS_ILOCK_EXCL; + xfs_ilock(ip, lockmode); error = xfs_trans_reserve_quota_nblks(tp, ip, qblocks, 0, quota_flag); if (error) @@ -202,8 +239,8 @@ xfs_iomap_write_direct( xfs_bmap_init(&free_list, &firstfsb); nimaps = 1; error = xfs_bmapi_write(tp, ip, offset_fsb, count_fsb, - XFS_BMAPI_PREALLOC, &firstfsb, 0, - imap, &nimaps, &free_list); + bmapi_flags, &firstfsb, resblks, imap, + &nimaps, &free_list); if (error) goto out_bmap_cancel; @@ -213,6 +250,7 @@ xfs_iomap_write_direct( error = xfs_bmap_finish(&tp, &free_list, &committed); if (error) goto out_bmap_cancel; + error = xfs_trans_commit(tp); if (error) goto out_unlock; @@ -229,7 +267,7 @@ xfs_iomap_write_direct( error = xfs_alert_fsblock_zero(ip, imap); out_unlock: - xfs_iunlock(ip, XFS_ILOCK_EXCL); + xfs_iunlock(ip, lockmode); return error; out_bmap_cancel: @@ -670,7 +708,7 @@ xfs_iomap_write_allocate( count_fsb = imap->br_blockcount; map_start_fsb = imap->br_startoff; - XFS_STATS_ADD(xs_xstrat_bytes, XFS_FSB_TO_B(mp, count_fsb)); + XFS_STATS_ADD(mp, xs_xstrat_bytes, XFS_FSB_TO_B(mp, count_fsb)); while (count_fsb != 0) { /* @@ -750,9 +788,9 @@ xfs_iomap_write_allocate( * pointer that the caller gave to us. */ error = xfs_bmapi_write(tp, ip, map_start_fsb, - count_fsb, 0, - &first_block, 1, - imap, &nimaps, &free_list); + count_fsb, 0, &first_block, + nres, imap, &nimaps, + &free_list); if (error) goto trans_cancel; @@ -777,7 +815,7 @@ xfs_iomap_write_allocate( if ((offset_fsb >= imap->br_startoff) && (offset_fsb < (imap->br_startoff + imap->br_blockcount))) { - XFS_STATS_INC(xs_xstrat_quick); + XFS_STATS_INC(mp, xs_xstrat_quick); return 0; } @@ -866,8 +904,8 @@ xfs_iomap_write_unwritten( xfs_bmap_init(&free_list, &firstfsb); nimaps = 1; error = xfs_bmapi_write(tp, ip, offset_fsb, count_fsb, - XFS_BMAPI_CONVERT, &firstfsb, - 1, &imap, &nimaps, &free_list); + XFS_BMAPI_CONVERT, &firstfsb, resblks, + &imap, &nimaps, &free_list); if (error) goto error_on_bmapi_transaction; diff --git a/fs/xfs/xfs_iops.c b/fs/xfs/xfs_iops.c index 8294132e6a3c..245268a0cdf0 100644 --- a/fs/xfs/xfs_iops.c +++ b/fs/xfs/xfs_iops.c @@ -695,7 +695,7 @@ xfs_setattr_nonsize( xfs_trans_log_inode(tp, ip, XFS_ILOG_CORE); - XFS_STATS_INC(xs_ig_attrchg); + XFS_STATS_INC(mp, xs_ig_attrchg); if (mp->m_flags & XFS_MOUNT_WSYNC) xfs_trans_set_sync(tp); @@ -922,7 +922,7 @@ xfs_setattr_size( xfs_trans_log_inode(tp, ip, XFS_ILOG_CORE); - XFS_STATS_INC(xs_ig_attrchg); + XFS_STATS_INC(mp, xs_ig_attrchg); if (mp->m_flags & XFS_MOUNT_WSYNC) xfs_trans_set_sync(tp); diff --git a/fs/xfs/xfs_linux.h b/fs/xfs/xfs_linux.h index 85f883dd6207..ec0e239a0fa9 100644 --- a/fs/xfs/xfs_linux.h +++ b/fs/xfs/xfs_linux.h @@ -171,6 +171,13 @@ struct xfs_kobj { struct completion complete; }; +struct xstats { + struct xfsstats __percpu *xs_stats; + struct xfs_kobj xs_kobj; +}; + +extern struct xstats xfsstats; + /* Kernel uid/gid conversion. These are used to convert to/from the on disk * uid_t/gid_t types to the kuid_t/kgid_t types that the kernel uses internally. * The conversion here is type only, the value will remain the same since we diff --git a/fs/xfs/xfs_log.c b/fs/xfs/xfs_log.c index aaadee0969c9..f52c72a1a06f 100644 --- a/fs/xfs/xfs_log.c +++ b/fs/xfs/xfs_log.c @@ -268,7 +268,7 @@ xlog_grant_head_wait( __set_current_state(TASK_UNINTERRUPTIBLE); spin_unlock(&head->lock); - XFS_STATS_INC(xs_sleep_logspace); + XFS_STATS_INC(log->l_mp, xs_sleep_logspace); trace_xfs_log_grant_sleep(log, tic); schedule(); @@ -379,7 +379,7 @@ xfs_log_regrant( if (XLOG_FORCED_SHUTDOWN(log)) return -EIO; - XFS_STATS_INC(xs_try_logspace); + XFS_STATS_INC(mp, xs_try_logspace); /* * This is a new transaction on the ticket, so we need to change the @@ -448,7 +448,7 @@ xfs_log_reserve( if (XLOG_FORCED_SHUTDOWN(log)) return -EIO; - XFS_STATS_INC(xs_try_logspace); + XFS_STATS_INC(mp, xs_try_logspace); ASSERT(*ticp == NULL); tic = xlog_ticket_alloc(log, unit_bytes, cnt, client, permanent, @@ -1768,7 +1768,7 @@ xlog_sync( int v2 = xfs_sb_version_haslogv2(&log->l_mp->m_sb); int size; - XFS_STATS_INC(xs_log_writes); + XFS_STATS_INC(log->l_mp, xs_log_writes); ASSERT(atomic_read(&iclog->ic_refcnt) == 0); /* Add for LR header */ @@ -1805,7 +1805,7 @@ xlog_sync( bp = iclog->ic_bp; XFS_BUF_SET_ADDR(bp, BLOCK_LSN(be64_to_cpu(iclog->ic_header.h_lsn))); - XFS_STATS_ADD(xs_log_blocks, BTOBB(count)); + XFS_STATS_ADD(log->l_mp, xs_log_blocks, BTOBB(count)); /* Do we need to split this write into 2 parts? */ if (XFS_BUF_ADDR(bp) + BTOBB(count) > log->l_logBBsize) { @@ -2422,11 +2422,20 @@ xlog_write( &partial_copy_len); xlog_verify_dest_ptr(log, ptr); - /* copy region */ + /* + * Copy region. + * + * Unmount records just log an opheader, so can have + * empty payloads with no data region to copy. Hence we + * only copy the payload if the vector says it has data + * to copy. + */ ASSERT(copy_len >= 0); - memcpy(ptr, reg->i_addr + copy_off, copy_len); - xlog_write_adv_cnt(&ptr, &len, &log_offset, copy_len); - + if (copy_len > 0) { + memcpy(ptr, reg->i_addr + copy_off, copy_len); + xlog_write_adv_cnt(&ptr, &len, &log_offset, + copy_len); + } copy_len += start_rec_copy + sizeof(xlog_op_header_t); record_cnt++; data_cnt += contwr ? copy_len : 0; @@ -2913,7 +2922,7 @@ restart: iclog = log->l_iclog; if (iclog->ic_state != XLOG_STATE_ACTIVE) { - XFS_STATS_INC(xs_log_noiclogs); + XFS_STATS_INC(log->l_mp, xs_log_noiclogs); /* Wait for log writes to have flushed */ xlog_wait(&log->l_flush_wait, &log->l_icloglock); @@ -3165,11 +3174,19 @@ xlog_state_switch_iclogs( } if (log->l_curr_block >= log->l_logBBsize) { + /* + * Rewind the current block before the cycle is bumped to make + * sure that the combined LSN never transiently moves forward + * when the log wraps to the next cycle. This is to support the + * unlocked sample of these fields from xlog_valid_lsn(). Most + * other cases should acquire l_icloglock. + */ + log->l_curr_block -= log->l_logBBsize; + ASSERT(log->l_curr_block >= 0); + smp_wmb(); log->l_curr_cycle++; if (log->l_curr_cycle == XLOG_HEADER_MAGIC_NUM) log->l_curr_cycle++; - log->l_curr_block -= log->l_logBBsize; - ASSERT(log->l_curr_block >= 0); } ASSERT(iclog == log->l_iclog); log->l_iclog = iclog->ic_next; @@ -3212,7 +3229,7 @@ _xfs_log_force( struct xlog_in_core *iclog; xfs_lsn_t lsn; - XFS_STATS_INC(xs_log_force); + XFS_STATS_INC(mp, xs_log_force); xlog_cil_force(log); @@ -3297,7 +3314,7 @@ maybe_sleep: spin_unlock(&log->l_icloglock); return -EIO; } - XFS_STATS_INC(xs_log_force_sleep); + XFS_STATS_INC(mp, xs_log_force_sleep); xlog_wait(&iclog->ic_force_wait, &log->l_icloglock); /* * No need to grab the log lock here since we're @@ -3362,7 +3379,7 @@ _xfs_log_force_lsn( ASSERT(lsn != 0); - XFS_STATS_INC(xs_log_force); + XFS_STATS_INC(mp, xs_log_force); lsn = xlog_cil_force_lsn(log, lsn); if (lsn == NULLCOMMITLSN) @@ -3411,7 +3428,7 @@ try_again: (XLOG_STATE_WANT_SYNC | XLOG_STATE_SYNCING))) { ASSERT(!(iclog->ic_state & XLOG_STATE_IOERROR)); - XFS_STATS_INC(xs_log_force_sleep); + XFS_STATS_INC(mp, xs_log_force_sleep); xlog_wait(&iclog->ic_prev->ic_write_wait, &log->l_icloglock); @@ -3441,7 +3458,7 @@ try_again: spin_unlock(&log->l_icloglock); return -EIO; } - XFS_STATS_INC(xs_log_force_sleep); + XFS_STATS_INC(mp, xs_log_force_sleep); xlog_wait(&iclog->ic_force_wait, &log->l_icloglock); /* * No need to grab the log lock here since we're @@ -4023,3 +4040,45 @@ xlog_iclogs_empty( return 1; } +/* + * Verify that an LSN stamped into a piece of metadata is valid. This is + * intended for use in read verifiers on v5 superblocks. + */ +bool +xfs_log_check_lsn( + struct xfs_mount *mp, + xfs_lsn_t lsn) +{ + struct xlog *log = mp->m_log; + bool valid; + + /* + * norecovery mode skips mount-time log processing and unconditionally + * resets the in-core LSN. We can't validate in this mode, but + * modifications are not allowed anyways so just return true. + */ + if (mp->m_flags & XFS_MOUNT_NORECOVERY) + return true; + + /* + * Some metadata LSNs are initialized to NULL (e.g., the agfl). This is + * handled by recovery and thus safe to ignore here. + */ + if (lsn == NULLCOMMITLSN) + return true; + + valid = xlog_valid_lsn(mp->m_log, lsn); + + /* warn the user about what's gone wrong before verifier failure */ + if (!valid) { + spin_lock(&log->l_icloglock); + xfs_warn(mp, +"Corruption warning: Metadata has LSN (%d:%d) ahead of current LSN (%d:%d). " +"Please unmount and run xfs_repair (>= v4.3) to resolve.", + CYCLE_LSN(lsn), BLOCK_LSN(lsn), + log->l_curr_cycle, log->l_curr_block); + spin_unlock(&log->l_icloglock); + } + + return valid; +} diff --git a/fs/xfs/xfs_log.h b/fs/xfs/xfs_log.h index 09d91d3166cd..aa533a7d50f2 100644 --- a/fs/xfs/xfs_log.h +++ b/fs/xfs/xfs_log.h @@ -181,5 +181,6 @@ bool xfs_log_item_in_current_chkpt(struct xfs_log_item *lip); void xfs_log_work_queue(struct xfs_mount *mp); void xfs_log_worker(struct work_struct *work); void xfs_log_quiesce(struct xfs_mount *mp); +bool xfs_log_check_lsn(struct xfs_mount *, xfs_lsn_t); #endif /* __XFS_LOG_H__ */ diff --git a/fs/xfs/xfs_log_priv.h b/fs/xfs/xfs_log_priv.h index 950f3f94720c..8daba7491b13 100644 --- a/fs/xfs/xfs_log_priv.h +++ b/fs/xfs/xfs_log_priv.h @@ -560,4 +560,55 @@ static inline void xlog_wait(wait_queue_head_t *wq, spinlock_t *lock) remove_wait_queue(wq, &wait); } +/* + * The LSN is valid so long as it is behind the current LSN. If it isn't, this + * means that the next log record that includes this metadata could have a + * smaller LSN. In turn, this means that the modification in the log would not + * replay. + */ +static inline bool +xlog_valid_lsn( + struct xlog *log, + xfs_lsn_t lsn) +{ + int cur_cycle; + int cur_block; + bool valid = true; + + /* + * First, sample the current lsn without locking to avoid added + * contention from metadata I/O. The current cycle and block are updated + * (in xlog_state_switch_iclogs()) and read here in a particular order + * to avoid false negatives (e.g., thinking the metadata LSN is valid + * when it is not). + * + * The current block is always rewound before the cycle is bumped in + * xlog_state_switch_iclogs() to ensure the current LSN is never seen in + * a transiently forward state. Instead, we can see the LSN in a + * transiently behind state if we happen to race with a cycle wrap. + */ + cur_cycle = ACCESS_ONCE(log->l_curr_cycle); + smp_rmb(); + cur_block = ACCESS_ONCE(log->l_curr_block); + + if ((CYCLE_LSN(lsn) > cur_cycle) || + (CYCLE_LSN(lsn) == cur_cycle && BLOCK_LSN(lsn) > cur_block)) { + /* + * If the metadata LSN appears invalid, it's possible the check + * above raced with a wrap to the next log cycle. Grab the lock + * to check for sure. + */ + spin_lock(&log->l_icloglock); + cur_cycle = log->l_curr_cycle; + cur_block = log->l_curr_block; + spin_unlock(&log->l_icloglock); + + if ((CYCLE_LSN(lsn) > cur_cycle) || + (CYCLE_LSN(lsn) == cur_cycle && BLOCK_LSN(lsn) > cur_block)) + valid = false; + } + + return valid; +} + #endif /* __XFS_LOG_PRIV_H__ */ diff --git a/fs/xfs/xfs_log_recover.c b/fs/xfs/xfs_log_recover.c index 512a0945d52a..c5ecaacdd218 100644 --- a/fs/xfs/xfs_log_recover.c +++ b/fs/xfs/xfs_log_recover.c @@ -3431,7 +3431,7 @@ xlog_recover_add_to_cont_trans( * previous record. Copy the rest of the header. */ if (list_empty(&trans->r_itemq)) { - ASSERT(len < sizeof(struct xfs_trans_header)); + ASSERT(len <= sizeof(struct xfs_trans_header)); if (len > sizeof(struct xfs_trans_header)) { xfs_warn(log->l_mp, "%s: bad header length", __func__); return -EIO; @@ -4609,9 +4609,19 @@ xlog_recover( int error; /* find the tail of the log */ - if ((error = xlog_find_tail(log, &head_blk, &tail_blk))) + error = xlog_find_tail(log, &head_blk, &tail_blk); + if (error) return error; + /* + * The superblock was read before the log was available and thus the LSN + * could not be verified. Check the superblock LSN against the current + * LSN now that it's known. + */ + if (xfs_sb_version_hascrc(&log->l_mp->m_sb) && + !xfs_log_check_lsn(log->l_mp, log->l_mp->m_sb.sb_lsn)) + return -EINVAL; + if (tail_blk != head_blk) { /* There used to be a comment here: * diff --git a/fs/xfs/xfs_message.c b/fs/xfs/xfs_message.c index d8b67547ab34..11792d888e4e 100644 --- a/fs/xfs/xfs_message.c +++ b/fs/xfs/xfs_message.c @@ -17,6 +17,7 @@ #include "xfs.h" #include "xfs_fs.h" +#include "xfs_error.h" #include "xfs_format.h" #include "xfs_log_format.h" #include "xfs_trans_resv.h" @@ -43,6 +44,7 @@ void func(const struct xfs_mount *mp, const char *fmt, ...) \ { \ struct va_format vaf; \ va_list args; \ + int level; \ \ va_start(args, fmt); \ \ @@ -51,6 +53,11 @@ void func(const struct xfs_mount *mp, const char *fmt, ...) \ \ __xfs_printk(kern_level, mp, &vaf); \ va_end(args); \ + \ + if (!kstrtoint(kern_level, 0, &level) && \ + level <= LOGLEVEL_ERR && \ + xfs_error_level >= XFS_ERRLEVEL_HIGH) \ + xfs_stack_trace(); \ } \ define_xfs_printk_level(xfs_emerg, KERN_EMERG); diff --git a/fs/xfs/xfs_mount.c b/fs/xfs/xfs_mount.c index bf92e0c037c7..bb753b359bee 100644 --- a/fs/xfs/xfs_mount.c +++ b/fs/xfs/xfs_mount.c @@ -47,6 +47,16 @@ static DEFINE_MUTEX(xfs_uuid_table_mutex); static int xfs_uuid_table_size; static uuid_t *xfs_uuid_table; +void +xfs_uuid_table_free(void) +{ + if (xfs_uuid_table_size == 0) + return; + kmem_free(xfs_uuid_table); + xfs_uuid_table = NULL; + xfs_uuid_table_size = 0; +} + /* * See if the UUID is unique among mounted XFS filesystems. * Mount fails if UUID is nil or a FS with the same UUID is already mounted. @@ -693,10 +703,15 @@ xfs_mountfs( if (error) goto out; - error = xfs_uuid_mount(mp); + error = xfs_sysfs_init(&mp->m_stats.xs_kobj, &xfs_stats_ktype, + &mp->m_kobj, "stats"); if (error) goto out_remove_sysfs; + error = xfs_uuid_mount(mp); + if (error) + goto out_del_stats; + /* * Set the minimum read and write sizes */ @@ -971,6 +986,8 @@ xfs_mountfs( xfs_da_unmount(mp); out_remove_uuid: xfs_uuid_unmount(mp); + out_del_stats: + xfs_sysfs_del(&mp->m_stats.xs_kobj); out_remove_sysfs: xfs_sysfs_del(&mp->m_kobj); out: @@ -1047,6 +1064,7 @@ xfs_unmountfs( xfs_warn(mp, "Unable to update superblock counters. " "Freespace may not be correct on next mount."); + xfs_log_unmount(mp); xfs_da_unmount(mp); xfs_uuid_unmount(mp); @@ -1056,6 +1074,7 @@ xfs_unmountfs( #endif xfs_free_perag(mp); + xfs_sysfs_del(&mp->m_stats.xs_kobj); xfs_sysfs_del(&mp->m_kobj); } diff --git a/fs/xfs/xfs_mount.h b/fs/xfs/xfs_mount.h index 7999e91cd49a..b57098481c10 100644 --- a/fs/xfs/xfs_mount.h +++ b/fs/xfs/xfs_mount.h @@ -127,6 +127,7 @@ typedef struct xfs_mount { int64_t m_low_space[XFS_LOWSP_MAX]; /* low free space thresholds */ struct xfs_kobj m_kobj; + struct xstats m_stats; /* per-fs stats */ struct workqueue_struct *m_buf_workqueue; struct workqueue_struct *m_data_workqueue; @@ -312,6 +313,7 @@ typedef struct xfs_perag { int pagb_count; /* pagb slots in use */ } xfs_perag_t; +extern void xfs_uuid_table_free(void); extern int xfs_log_sbcount(xfs_mount_t *); extern __uint64_t xfs_default_resblks(xfs_mount_t *mp); extern int xfs_mountfs(xfs_mount_t *mp); @@ -336,4 +338,7 @@ extern int xfs_dev_is_read_only(struct xfs_mount *, char *); extern void xfs_set_low_space_thresholds(struct xfs_mount *); +int xfs_zero_extent(struct xfs_inode *ip, xfs_fsblock_t start_fsb, + xfs_off_t count_fsb); + #endif /* __XFS_MOUNT_H__ */ diff --git a/fs/xfs/xfs_pnfs.c b/fs/xfs/xfs_pnfs.c index ab4a6066f7ca..dc6221942b85 100644 --- a/fs/xfs/xfs_pnfs.c +++ b/fs/xfs/xfs_pnfs.c @@ -181,6 +181,11 @@ xfs_fs_map_blocks( ASSERT(imap.br_startblock != DELAYSTARTBLOCK); if (!nimaps || imap.br_startblock == HOLESTARTBLOCK) { + /* + * xfs_iomap_write_direct() expects to take ownership of + * the shared ilock. + */ + xfs_ilock(ip, XFS_ILOCK_SHARED); error = xfs_iomap_write_direct(ip, offset, length, &imap, nimaps); if (error) diff --git a/fs/xfs/xfs_qm.c b/fs/xfs/xfs_qm.c index 587174fd4f2c..532ab79d38fe 100644 --- a/fs/xfs/xfs_qm.c +++ b/fs/xfs/xfs_qm.c @@ -184,7 +184,7 @@ xfs_qm_dqpurge( */ ASSERT(!list_empty(&dqp->q_lru)); list_lru_del(&qi->qi_lru, &dqp->q_lru); - XFS_STATS_DEC(xs_qm_dquot_unused); + XFS_STATS_DEC(mp, xs_qm_dquot_unused); xfs_qm_dqdestroy(dqp); return 0; @@ -448,11 +448,11 @@ xfs_qm_dquot_isolate( */ if (dqp->q_nrefs) { xfs_dqunlock(dqp); - XFS_STATS_INC(xs_qm_dqwants); + XFS_STATS_INC(dqp->q_mount, xs_qm_dqwants); trace_xfs_dqreclaim_want(dqp); list_lru_isolate(lru, &dqp->q_lru); - XFS_STATS_DEC(xs_qm_dquot_unused); + XFS_STATS_DEC(dqp->q_mount, xs_qm_dquot_unused); return LRU_REMOVED; } @@ -496,19 +496,19 @@ xfs_qm_dquot_isolate( ASSERT(dqp->q_nrefs == 0); list_lru_isolate_move(lru, &dqp->q_lru, &isol->dispose); - XFS_STATS_DEC(xs_qm_dquot_unused); + XFS_STATS_DEC(dqp->q_mount, xs_qm_dquot_unused); trace_xfs_dqreclaim_done(dqp); - XFS_STATS_INC(xs_qm_dqreclaims); + XFS_STATS_INC(dqp->q_mount, xs_qm_dqreclaims); return LRU_REMOVED; out_miss_busy: trace_xfs_dqreclaim_busy(dqp); - XFS_STATS_INC(xs_qm_dqreclaim_misses); + XFS_STATS_INC(dqp->q_mount, xs_qm_dqreclaim_misses); return LRU_SKIP; out_unlock_dirty: trace_xfs_dqreclaim_busy(dqp); - XFS_STATS_INC(xs_qm_dqreclaim_misses); + XFS_STATS_INC(dqp->q_mount, xs_qm_dqreclaim_misses); xfs_dqunlock(dqp); spin_lock(lru_lock); return LRU_RETRY; diff --git a/fs/xfs/xfs_stats.c b/fs/xfs/xfs_stats.c index f2240383d4bb..8686df6c7609 100644 --- a/fs/xfs/xfs_stats.c +++ b/fs/xfs/xfs_stats.c @@ -18,20 +18,21 @@ #include "xfs.h" #include <linux/proc_fs.h> -DEFINE_PER_CPU(struct xfsstats, xfsstats); +struct xstats xfsstats; -static int counter_val(int idx) +static int counter_val(struct xfsstats __percpu *stats, int idx) { int val = 0, cpu; for_each_possible_cpu(cpu) - val += *(((__u32 *)&per_cpu(xfsstats, cpu) + idx)); + val += *(((__u32 *)per_cpu_ptr(stats, cpu) + idx)); return val; } -static int xfs_stat_proc_show(struct seq_file *m, void *v) +int xfs_stats_format(struct xfsstats __percpu *stats, char *buf) { int i, j; + int len = 0; __uint64_t xs_xstrat_bytes = 0; __uint64_t xs_write_bytes = 0; __uint64_t xs_read_bytes = 0; @@ -65,54 +66,59 @@ static int xfs_stat_proc_show(struct seq_file *m, void *v) }; /* Loop over all stats groups */ + for (i = j = 0; i < ARRAY_SIZE(xstats); i++) { - seq_printf(m, "%s", xstats[i].desc); + len += snprintf(buf + len, PATH_MAX - len, "%s", + xstats[i].desc); /* inner loop does each group */ for (; j < xstats[i].endpoint; j++) - seq_printf(m, " %u", counter_val(j)); - seq_putc(m, '\n'); + len += snprintf(buf + len, PATH_MAX - len, " %u", + counter_val(stats, j)); + len += snprintf(buf + len, PATH_MAX - len, "\n"); } /* extra precision counters */ for_each_possible_cpu(i) { - xs_xstrat_bytes += per_cpu(xfsstats, i).xs_xstrat_bytes; - xs_write_bytes += per_cpu(xfsstats, i).xs_write_bytes; - xs_read_bytes += per_cpu(xfsstats, i).xs_read_bytes; + xs_xstrat_bytes += per_cpu_ptr(stats, i)->xs_xstrat_bytes; + xs_write_bytes += per_cpu_ptr(stats, i)->xs_write_bytes; + xs_read_bytes += per_cpu_ptr(stats, i)->xs_read_bytes; } - seq_printf(m, "xpc %Lu %Lu %Lu\n", + len += snprintf(buf + len, PATH_MAX-len, "xpc %Lu %Lu %Lu\n", xs_xstrat_bytes, xs_write_bytes, xs_read_bytes); - seq_printf(m, "debug %u\n", + len += snprintf(buf + len, PATH_MAX-len, "debug %u\n", #if defined(DEBUG) 1); #else 0); #endif - return 0; + + return len; } -static int xfs_stat_proc_open(struct inode *inode, struct file *file) +void xfs_stats_clearall(struct xfsstats __percpu *stats) { - return single_open(file, xfs_stat_proc_show, NULL); + int c; + __uint32_t vn_active; + + xfs_notice(NULL, "Clearing xfsstats"); + for_each_possible_cpu(c) { + preempt_disable(); + /* save vn_active, it's a universal truth! */ + vn_active = per_cpu_ptr(stats, c)->vn_active; + memset(per_cpu_ptr(stats, c), 0, sizeof(*stats)); + per_cpu_ptr(stats, c)->vn_active = vn_active; + preempt_enable(); + } } -static const struct file_operations xfs_stat_proc_fops = { - .owner = THIS_MODULE, - .open = xfs_stat_proc_open, - .read = seq_read, - .llseek = seq_lseek, - .release = single_release, -}; - /* legacy quota interfaces */ #ifdef CONFIG_XFS_QUOTA static int xqm_proc_show(struct seq_file *m, void *v) { /* maximum; incore; ratio free to inuse; freelist */ seq_printf(m, "%d\t%d\t%d\t%u\n", - 0, - counter_val(XFSSTAT_END_XQMSTAT), - 0, - counter_val(XFSSTAT_END_XQMSTAT + 1)); + 0, counter_val(xfsstats.xs_stats, XFSSTAT_END_XQMSTAT), + 0, counter_val(xfsstats.xs_stats, XFSSTAT_END_XQMSTAT + 1)); return 0; } @@ -136,7 +142,7 @@ static int xqmstat_proc_show(struct seq_file *m, void *v) seq_printf(m, "qm"); for (j = XFSSTAT_END_IBT_V2; j < XFSSTAT_END_XQMSTAT; j++) - seq_printf(m, " %u", counter_val(j)); + seq_printf(m, " %u", counter_val(xfsstats.xs_stats, j)); seq_putc(m, '\n'); return 0; } @@ -155,44 +161,35 @@ static const struct file_operations xqmstat_proc_fops = { }; #endif /* CONFIG_XFS_QUOTA */ +#ifdef CONFIG_PROC_FS int xfs_init_procfs(void) { if (!proc_mkdir("fs/xfs", NULL)) + return -ENOMEM; + + if (!proc_symlink("fs/xfs/stat", NULL, + "/sys/fs/xfs/stats/stats")) goto out; - if (!proc_create("fs/xfs/stat", 0, NULL, - &xfs_stat_proc_fops)) - goto out_remove_xfs_dir; #ifdef CONFIG_XFS_QUOTA if (!proc_create("fs/xfs/xqmstat", 0, NULL, &xqmstat_proc_fops)) - goto out_remove_stat_file; + goto out; if (!proc_create("fs/xfs/xqm", 0, NULL, &xqm_proc_fops)) - goto out_remove_xqmstat_file; + goto out; #endif return 0; -#ifdef CONFIG_XFS_QUOTA - out_remove_xqmstat_file: - remove_proc_entry("fs/xfs/xqmstat", NULL); - out_remove_stat_file: - remove_proc_entry("fs/xfs/stat", NULL); -#endif - out_remove_xfs_dir: - remove_proc_entry("fs/xfs", NULL); - out: +out: + remove_proc_subtree("fs/xfs", NULL); return -ENOMEM; } void xfs_cleanup_procfs(void) { -#ifdef CONFIG_XFS_QUOTA - remove_proc_entry("fs/xfs/xqm", NULL); - remove_proc_entry("fs/xfs/xqmstat", NULL); -#endif - remove_proc_entry("fs/xfs/stat", NULL); - remove_proc_entry("fs/xfs", NULL); + remove_proc_subtree("fs/xfs", NULL); } +#endif /* CONFIG_PROC_FS */ diff --git a/fs/xfs/xfs_stats.h b/fs/xfs/xfs_stats.h index c8f238b8299a..483b0eff1988 100644 --- a/fs/xfs/xfs_stats.h +++ b/fs/xfs/xfs_stats.h @@ -19,8 +19,6 @@ #define __XFS_STATS_H__ -#if defined(CONFIG_PROC_FS) && !defined(XFS_STATS_OFF) - #include <linux/percpu.h> /* @@ -215,15 +213,29 @@ struct xfsstats { __uint64_t xs_read_bytes; }; -DECLARE_PER_CPU(struct xfsstats, xfsstats); +int xfs_stats_format(struct xfsstats __percpu *stats, char *buf); +void xfs_stats_clearall(struct xfsstats __percpu *stats); +extern struct xstats xfsstats; -/* - * We don't disable preempt, not too worried about poking the - * wrong CPU's stat for now (also aggregated before reporting). - */ -#define XFS_STATS_INC(v) (per_cpu(xfsstats, current_cpu()).v++) -#define XFS_STATS_DEC(v) (per_cpu(xfsstats, current_cpu()).v--) -#define XFS_STATS_ADD(v, inc) (per_cpu(xfsstats, current_cpu()).v += (inc)) +#define XFS_STATS_INC(mp, v) \ +do { \ + per_cpu_ptr(xfsstats.xs_stats, current_cpu())->v++; \ + per_cpu_ptr(mp->m_stats.xs_stats, current_cpu())->v++; \ +} while (0) + +#define XFS_STATS_DEC(mp, v) \ +do { \ + per_cpu_ptr(xfsstats.xs_stats, current_cpu())->v--; \ + per_cpu_ptr(mp->m_stats.xs_stats, current_cpu())->v--; \ +} while (0) + +#define XFS_STATS_ADD(mp, v, inc) \ +do { \ + per_cpu_ptr(xfsstats.xs_stats, current_cpu())->v += (inc); \ + per_cpu_ptr(mp->m_stats.xs_stats, current_cpu())->v += (inc); \ +} while (0) + +#if defined(CONFIG_PROC_FS) extern int xfs_init_procfs(void); extern void xfs_cleanup_procfs(void); @@ -231,10 +243,6 @@ extern void xfs_cleanup_procfs(void); #else /* !CONFIG_PROC_FS */ -# define XFS_STATS_INC(count) -# define XFS_STATS_DEC(count) -# define XFS_STATS_ADD(count, inc) - static inline int xfs_init_procfs(void) { return 0; diff --git a/fs/xfs/xfs_super.c b/fs/xfs/xfs_super.c index 904f637cfa5f..36bd8825bfb0 100644 --- a/fs/xfs/xfs_super.c +++ b/fs/xfs/xfs_super.c @@ -838,17 +838,18 @@ xfs_init_mount_workqueues( goto out_destroy_unwritten; mp->m_reclaim_workqueue = alloc_workqueue("xfs-reclaim/%s", - WQ_FREEZABLE, 0, mp->m_fsname); + WQ_MEM_RECLAIM|WQ_FREEZABLE, 0, mp->m_fsname); if (!mp->m_reclaim_workqueue) goto out_destroy_cil; mp->m_log_workqueue = alloc_workqueue("xfs-log/%s", - WQ_FREEZABLE|WQ_HIGHPRI, 0, mp->m_fsname); + WQ_MEM_RECLAIM|WQ_FREEZABLE|WQ_HIGHPRI, 0, + mp->m_fsname); if (!mp->m_log_workqueue) goto out_destroy_reclaim; mp->m_eofblocks_workqueue = alloc_workqueue("xfs-eofblocks/%s", - WQ_FREEZABLE, 0, mp->m_fsname); + WQ_MEM_RECLAIM|WQ_FREEZABLE, 0, mp->m_fsname); if (!mp->m_eofblocks_workqueue) goto out_destroy_log; @@ -922,7 +923,7 @@ xfs_fs_destroy_inode( trace_xfs_destroy_inode(ip); - XFS_STATS_INC(vn_reclaim); + XFS_STATS_INC(ip->i_mount, vn_reclaim); ASSERT(XFS_FORCED_SHUTDOWN(ip->i_mount) || ip->i_delayed_blks == 0); @@ -983,8 +984,8 @@ xfs_fs_evict_inode( truncate_inode_pages_final(&inode->i_data); clear_inode(inode); - XFS_STATS_INC(vn_rele); - XFS_STATS_INC(vn_remove); + XFS_STATS_INC(ip->i_mount, vn_rele); + XFS_STATS_INC(ip->i_mount, vn_remove); xfs_inactive(ip); } @@ -1474,9 +1475,16 @@ xfs_fs_fill_super( if (error) goto out_destroy_workqueues; + /* Allocate stats memory before we do operations that might use it */ + mp->m_stats.xs_stats = alloc_percpu(struct xfsstats); + if (!mp->m_stats.xs_stats) { + error = -ENOMEM; + goto out_destroy_counters; + } + error = xfs_readsb(mp, flags); if (error) - goto out_destroy_counters; + goto out_free_stats; error = xfs_finish_flags(mp); if (error) @@ -1545,9 +1553,11 @@ xfs_fs_fill_super( xfs_filestream_unmount(mp); out_free_sb: xfs_freesb(mp); + out_free_stats: + free_percpu(mp->m_stats.xs_stats); out_destroy_counters: xfs_destroy_percpu_counters(mp); -out_destroy_workqueues: + out_destroy_workqueues: xfs_destroy_mount_workqueues(mp); out_close_devices: xfs_close_devices(mp); @@ -1574,6 +1584,7 @@ xfs_fs_put_super( xfs_unmountfs(mp); xfs_freesb(mp); + free_percpu(mp->m_stats.xs_stats); xfs_destroy_percpu_counters(mp); xfs_destroy_mount_workqueues(mp); xfs_close_devices(mp); @@ -1838,19 +1849,32 @@ init_xfs_fs(void) xfs_kset = kset_create_and_add("xfs", NULL, fs_kobj); if (!xfs_kset) { error = -ENOMEM; - goto out_sysctl_unregister;; + goto out_sysctl_unregister; } + xfsstats.xs_kobj.kobject.kset = xfs_kset; + + xfsstats.xs_stats = alloc_percpu(struct xfsstats); + if (!xfsstats.xs_stats) { + error = -ENOMEM; + goto out_kset_unregister; + } + + error = xfs_sysfs_init(&xfsstats.xs_kobj, &xfs_stats_ktype, NULL, + "stats"); + if (error) + goto out_free_stats; + #ifdef DEBUG xfs_dbg_kobj.kobject.kset = xfs_kset; error = xfs_sysfs_init(&xfs_dbg_kobj, &xfs_dbg_ktype, NULL, "debug"); if (error) - goto out_kset_unregister; + goto out_remove_stats_kobj; #endif error = xfs_qm_init(); if (error) - goto out_remove_kobj; + goto out_remove_dbg_kobj; error = register_filesystem(&xfs_fs_type); if (error) @@ -1859,11 +1883,15 @@ init_xfs_fs(void) out_qm_exit: xfs_qm_exit(); - out_remove_kobj: + out_remove_dbg_kobj: #ifdef DEBUG xfs_sysfs_del(&xfs_dbg_kobj); - out_kset_unregister: + out_remove_stats_kobj: #endif + xfs_sysfs_del(&xfsstats.xs_kobj); + out_free_stats: + free_percpu(xfsstats.xs_stats); + out_kset_unregister: kset_unregister(xfs_kset); out_sysctl_unregister: xfs_sysctl_unregister(); @@ -1889,6 +1917,8 @@ exit_xfs_fs(void) #ifdef DEBUG xfs_sysfs_del(&xfs_dbg_kobj); #endif + xfs_sysfs_del(&xfsstats.xs_kobj); + free_percpu(xfsstats.xs_stats); kset_unregister(xfs_kset); xfs_sysctl_unregister(); xfs_cleanup_procfs(); @@ -1896,6 +1926,7 @@ exit_xfs_fs(void) xfs_mru_cache_uninit(); xfs_destroy_workqueues(); xfs_destroy_zones(); + xfs_uuid_table_free(); } module_init(init_xfs_fs); diff --git a/fs/xfs/xfs_sysctl.c b/fs/xfs/xfs_sysctl.c index a0c8067cea6f..aed74d3f8da9 100644 --- a/fs/xfs/xfs_sysctl.c +++ b/fs/xfs/xfs_sysctl.c @@ -19,6 +19,7 @@ #include <linux/sysctl.h> #include <linux/proc_fs.h> #include "xfs_error.h" +#include "xfs_stats.h" static struct ctl_table_header *xfs_table_header; @@ -31,22 +32,12 @@ xfs_stats_clear_proc_handler( size_t *lenp, loff_t *ppos) { - int c, ret, *valp = ctl->data; - __uint32_t vn_active; + int ret, *valp = ctl->data; ret = proc_dointvec_minmax(ctl, write, buffer, lenp, ppos); if (!ret && write && *valp) { - xfs_notice(NULL, "Clearing xfsstats"); - for_each_possible_cpu(c) { - preempt_disable(); - /* save vn_active, it's a universal truth! */ - vn_active = per_cpu(xfsstats, c).vn_active; - memset(&per_cpu(xfsstats, c), 0, - sizeof(struct xfsstats)); - per_cpu(xfsstats, c).vn_active = vn_active; - preempt_enable(); - } + xfs_stats_clearall(xfsstats.xs_stats); xfs_stats_clear = 0; } diff --git a/fs/xfs/xfs_sysfs.c b/fs/xfs/xfs_sysfs.c index aa03670851d8..ee70f5dec9dc 100644 --- a/fs/xfs/xfs_sysfs.c +++ b/fs/xfs/xfs_sysfs.c @@ -21,11 +21,13 @@ #include "xfs_log_format.h" #include "xfs_log.h" #include "xfs_log_priv.h" +#include "xfs_stats.h" struct xfs_sysfs_attr { struct attribute attr; - ssize_t (*show)(char *buf, void *data); - ssize_t (*store)(const char *buf, size_t count, void *data); + ssize_t (*show)(struct kobject *kobject, char *buf); + ssize_t (*store)(struct kobject *kobject, const char *buf, + size_t count); }; static inline struct xfs_sysfs_attr * @@ -38,6 +40,8 @@ to_attr(struct attribute *attr) static struct xfs_sysfs_attr xfs_sysfs_attr_##name = __ATTR_RW(name) #define XFS_SYSFS_ATTR_RO(name) \ static struct xfs_sysfs_attr xfs_sysfs_attr_##name = __ATTR_RO(name) +#define XFS_SYSFS_ATTR_WO(name) \ + static struct xfs_sysfs_attr xfs_sysfs_attr_##name = __ATTR_WO(name) #define ATTR_LIST(name) &xfs_sysfs_attr_##name.attr @@ -51,14 +55,42 @@ struct kobj_type xfs_mp_ktype = { .release = xfs_sysfs_release, }; +STATIC ssize_t +xfs_sysfs_object_show( + struct kobject *kobject, + struct attribute *attr, + char *buf) +{ + struct xfs_sysfs_attr *xfs_attr = to_attr(attr); + + return xfs_attr->show ? xfs_attr->show(kobject, buf) : 0; +} + +STATIC ssize_t +xfs_sysfs_object_store( + struct kobject *kobject, + struct attribute *attr, + const char *buf, + size_t count) +{ + struct xfs_sysfs_attr *xfs_attr = to_attr(attr); + + return xfs_attr->store ? xfs_attr->store(kobject, buf, count) : 0; +} + +static const struct sysfs_ops xfs_sysfs_ops = { + .show = xfs_sysfs_object_show, + .store = xfs_sysfs_object_store, +}; + #ifdef DEBUG /* debug */ STATIC ssize_t log_recovery_delay_store( + struct kobject *kobject, const char *buf, - size_t count, - void *data) + size_t count) { int ret; int val; @@ -77,8 +109,8 @@ log_recovery_delay_store( STATIC ssize_t log_recovery_delay_show( - char *buf, - void *data) + struct kobject *kobject, + char *buf) { return snprintf(buf, PAGE_SIZE, "%d\n", xfs_globals.log_recovery_delay); } @@ -89,52 +121,87 @@ static struct attribute *xfs_dbg_attrs[] = { NULL, }; +struct kobj_type xfs_dbg_ktype = { + .release = xfs_sysfs_release, + .sysfs_ops = &xfs_sysfs_ops, + .default_attrs = xfs_dbg_attrs, +}; + +#endif /* DEBUG */ + +/* stats */ + +static inline struct xstats * +to_xstats(struct kobject *kobject) +{ + struct xfs_kobj *kobj = to_kobj(kobject); + + return container_of(kobj, struct xstats, xs_kobj); +} + STATIC ssize_t -xfs_dbg_show( - struct kobject *kobject, - struct attribute *attr, - char *buf) +stats_show( + struct kobject *kobject, + char *buf) { - struct xfs_sysfs_attr *xfs_attr = to_attr(attr); + struct xstats *stats = to_xstats(kobject); - return xfs_attr->show ? xfs_attr->show(buf, NULL) : 0; + return xfs_stats_format(stats->xs_stats, buf); } +XFS_SYSFS_ATTR_RO(stats); STATIC ssize_t -xfs_dbg_store( - struct kobject *kobject, - struct attribute *attr, - const char *buf, - size_t count) +stats_clear_store( + struct kobject *kobject, + const char *buf, + size_t count) { - struct xfs_sysfs_attr *xfs_attr = to_attr(attr); + int ret; + int val; + struct xstats *stats = to_xstats(kobject); + + ret = kstrtoint(buf, 0, &val); + if (ret) + return ret; - return xfs_attr->store ? xfs_attr->store(buf, count, NULL) : 0; + if (val != 1) + return -EINVAL; + + xfs_stats_clearall(stats->xs_stats); + return count; } +XFS_SYSFS_ATTR_WO(stats_clear); -static struct sysfs_ops xfs_dbg_ops = { - .show = xfs_dbg_show, - .store = xfs_dbg_store, +static struct attribute *xfs_stats_attrs[] = { + ATTR_LIST(stats), + ATTR_LIST(stats_clear), + NULL, }; -struct kobj_type xfs_dbg_ktype = { +struct kobj_type xfs_stats_ktype = { .release = xfs_sysfs_release, - .sysfs_ops = &xfs_dbg_ops, - .default_attrs = xfs_dbg_attrs, + .sysfs_ops = &xfs_sysfs_ops, + .default_attrs = xfs_stats_attrs, }; -#endif /* DEBUG */ - /* xlog */ +static inline struct xlog * +to_xlog(struct kobject *kobject) +{ + struct xfs_kobj *kobj = to_kobj(kobject); + + return container_of(kobj, struct xlog, l_kobj); +} + STATIC ssize_t log_head_lsn_show( - char *buf, - void *data) + struct kobject *kobject, + char *buf) { - struct xlog *log = data; int cycle; int block; + struct xlog *log = to_xlog(kobject); spin_lock(&log->l_icloglock); cycle = log->l_curr_cycle; @@ -147,12 +214,12 @@ XFS_SYSFS_ATTR_RO(log_head_lsn); STATIC ssize_t log_tail_lsn_show( - char *buf, - void *data) + struct kobject *kobject, + char *buf) { - struct xlog *log = data; int cycle; int block; + struct xlog *log = to_xlog(kobject); xlog_crack_atomic_lsn(&log->l_tail_lsn, &cycle, &block); return snprintf(buf, PAGE_SIZE, "%d:%d\n", cycle, block); @@ -161,12 +228,13 @@ XFS_SYSFS_ATTR_RO(log_tail_lsn); STATIC ssize_t reserve_grant_head_show( - char *buf, - void *data) + struct kobject *kobject, + char *buf) + { - struct xlog *log = data; int cycle; int bytes; + struct xlog *log = to_xlog(kobject); xlog_crack_grant_head(&log->l_reserve_head.grant, &cycle, &bytes); return snprintf(buf, PAGE_SIZE, "%d:%d\n", cycle, bytes); @@ -175,12 +243,12 @@ XFS_SYSFS_ATTR_RO(reserve_grant_head); STATIC ssize_t write_grant_head_show( - char *buf, - void *data) + struct kobject *kobject, + char *buf) { - struct xlog *log = data; int cycle; int bytes; + struct xlog *log = to_xlog(kobject); xlog_crack_grant_head(&log->l_write_head.grant, &cycle, &bytes); return snprintf(buf, PAGE_SIZE, "%d:%d\n", cycle, bytes); @@ -195,45 +263,8 @@ static struct attribute *xfs_log_attrs[] = { NULL, }; -static inline struct xlog * -to_xlog(struct kobject *kobject) -{ - struct xfs_kobj *kobj = to_kobj(kobject); - return container_of(kobj, struct xlog, l_kobj); -} - -STATIC ssize_t -xfs_log_show( - struct kobject *kobject, - struct attribute *attr, - char *buf) -{ - struct xlog *log = to_xlog(kobject); - struct xfs_sysfs_attr *xfs_attr = to_attr(attr); - - return xfs_attr->show ? xfs_attr->show(buf, log) : 0; -} - -STATIC ssize_t -xfs_log_store( - struct kobject *kobject, - struct attribute *attr, - const char *buf, - size_t count) -{ - struct xlog *log = to_xlog(kobject); - struct xfs_sysfs_attr *xfs_attr = to_attr(attr); - - return xfs_attr->store ? xfs_attr->store(buf, count, log) : 0; -} - -static struct sysfs_ops xfs_log_ops = { - .show = xfs_log_show, - .store = xfs_log_store, -}; - struct kobj_type xfs_log_ktype = { .release = xfs_sysfs_release, - .sysfs_ops = &xfs_log_ops, + .sysfs_ops = &xfs_sysfs_ops, .default_attrs = xfs_log_attrs, }; diff --git a/fs/xfs/xfs_sysfs.h b/fs/xfs/xfs_sysfs.h index 240eee35f342..be692e59938d 100644 --- a/fs/xfs/xfs_sysfs.h +++ b/fs/xfs/xfs_sysfs.h @@ -22,6 +22,7 @@ extern struct kobj_type xfs_mp_ktype; /* xfs_mount */ extern struct kobj_type xfs_dbg_ktype; /* debug */ extern struct kobj_type xfs_log_ktype; /* xlog */ +extern struct kobj_type xfs_stats_ktype; /* stats */ static inline struct xfs_kobj * to_kobj(struct kobject *kobject) diff --git a/fs/xfs/xfs_trace.h b/fs/xfs/xfs_trace.h index 5ed36b1e04c1..877079eb0f8f 100644 --- a/fs/xfs/xfs_trace.h +++ b/fs/xfs/xfs_trace.h @@ -689,6 +689,7 @@ DEFINE_INODE_EVENT(xfs_inode_free_eofblocks_invalid); DEFINE_INODE_EVENT(xfs_filemap_fault); DEFINE_INODE_EVENT(xfs_filemap_pmd_fault); DEFINE_INODE_EVENT(xfs_filemap_page_mkwrite); +DEFINE_INODE_EVENT(xfs_filemap_pfn_mkwrite); DECLARE_EVENT_CLASS(xfs_iref_class, TP_PROTO(struct xfs_inode *ip, unsigned long caller_ip), @@ -1312,6 +1313,7 @@ DEFINE_SIMPLE_IO_EVENT(xfs_delalloc_enospc); DEFINE_SIMPLE_IO_EVENT(xfs_unwritten_convert); DEFINE_SIMPLE_IO_EVENT(xfs_get_blocks_notfound); DEFINE_SIMPLE_IO_EVENT(xfs_setfilesize); +DEFINE_SIMPLE_IO_EVENT(xfs_zero_eof); DECLARE_EVENT_CLASS(xfs_itrunc_class, TP_PROTO(struct xfs_inode *ip, xfs_fsize_t new_size), diff --git a/fs/xfs/xfs_trans.c b/fs/xfs/xfs_trans.c index a0ab1dae9c31..748b16aff45a 100644 --- a/fs/xfs/xfs_trans.c +++ b/fs/xfs/xfs_trans.c @@ -930,9 +930,9 @@ __xfs_trans_commit( */ if (sync) { error = _xfs_log_force_lsn(mp, commit_lsn, XFS_LOG_SYNC, NULL); - XFS_STATS_INC(xs_trans_sync); + XFS_STATS_INC(mp, xs_trans_sync); } else { - XFS_STATS_INC(xs_trans_async); + XFS_STATS_INC(mp, xs_trans_async); } return error; @@ -955,7 +955,7 @@ out_unreserve: xfs_trans_free_items(tp, NULLCOMMITLSN, !!error); xfs_trans_free(tp); - XFS_STATS_INC(xs_trans_empty); + XFS_STATS_INC(mp, xs_trans_empty); return error; } diff --git a/fs/xfs/xfs_trans_ail.c b/fs/xfs/xfs_trans_ail.c index 1098cf490189..aa67339b9537 100644 --- a/fs/xfs/xfs_trans_ail.c +++ b/fs/xfs/xfs_trans_ail.c @@ -349,7 +349,7 @@ xfsaild_push( xfs_ail_min_lsn(ailp))) { ailp->xa_log_flush = 0; - XFS_STATS_INC(xs_push_ail_flush); + XFS_STATS_INC(mp, xs_push_ail_flush); xfs_log_force(mp, XFS_LOG_SYNC); } @@ -371,7 +371,7 @@ xfsaild_push( goto out_done; } - XFS_STATS_INC(xs_push_ail); + XFS_STATS_INC(mp, xs_push_ail); lsn = lip->li_lsn; while ((XFS_LSN_CMP(lip->li_lsn, target) <= 0)) { @@ -385,7 +385,7 @@ xfsaild_push( lock_result = lip->li_ops->iop_push(lip, &ailp->xa_buf_list); switch (lock_result) { case XFS_ITEM_SUCCESS: - XFS_STATS_INC(xs_push_ail_success); + XFS_STATS_INC(mp, xs_push_ail_success); trace_xfs_ail_push(lip); ailp->xa_last_pushed_lsn = lsn; @@ -403,7 +403,7 @@ xfsaild_push( * re-try the flushing relatively soon if most of the * AIL is beeing flushed. */ - XFS_STATS_INC(xs_push_ail_flushing); + XFS_STATS_INC(mp, xs_push_ail_flushing); trace_xfs_ail_flushing(lip); flushing++; @@ -411,14 +411,14 @@ xfsaild_push( break; case XFS_ITEM_PINNED: - XFS_STATS_INC(xs_push_ail_pinned); + XFS_STATS_INC(mp, xs_push_ail_pinned); trace_xfs_ail_pinned(lip); stuck++; ailp->xa_log_flush++; break; case XFS_ITEM_LOCKED: - XFS_STATS_INC(xs_push_ail_locked); + XFS_STATS_INC(mp, xs_push_ail_locked); trace_xfs_ail_locked(lip); stuck++; @@ -497,6 +497,7 @@ xfsaild( long tout = 0; /* milliseconds */ current->flags |= PF_MEMALLOC; + set_freezable(); while (!kthread_should_stop()) { if (tout && tout <= 20) diff --git a/fs/xfs/xfs_trans_inode.c b/fs/xfs/xfs_trans_inode.c index 17280cd71934..b97f1df910ab 100644 --- a/fs/xfs/xfs_trans_inode.c +++ b/fs/xfs/xfs_trans_inode.c @@ -108,6 +108,15 @@ xfs_trans_log_inode( ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL)); /* + * Record the specific change for fdatasync optimisation. This + * allows fdatasync to skip log forces for inodes that are only + * timestamp dirty. We do this before the change count so that + * the core being logged in this case does not impact on fdatasync + * behaviour. + */ + ip->i_itemp->ili_fsync_fields |= flags; + + /* * First time we log the inode in a transaction, bump the inode change * counter if it is configured for this to occur. We don't use * inode_inc_version() because there is no need for extra locking around diff --git a/fs/xfs/xfs_xattr.c b/fs/xfs/xfs_xattr.c index c036815183cb..8294f86441bf 100644 --- a/fs/xfs/xfs_xattr.c +++ b/fs/xfs/xfs_xattr.c @@ -53,11 +53,34 @@ xfs_xattr_get(struct dentry *dentry, const char *name, return asize; } +void +xfs_forget_acl( + struct inode *inode, + const char *name, + int xflags) +{ + /* + * Invalidate any cached ACLs if the user has bypassed the ACL + * interface. We don't validate the content whatsoever so it is caller + * responsibility to provide data in valid format and ensure i_mode is + * consistent. + */ + if (xflags & ATTR_ROOT) { +#ifdef CONFIG_XFS_POSIX_ACL + if (!strcmp(name, SGI_ACL_FILE)) + forget_cached_acl(inode, ACL_TYPE_ACCESS); + else if (!strcmp(name, SGI_ACL_DEFAULT)) + forget_cached_acl(inode, ACL_TYPE_DEFAULT); +#endif + } +} + static int xfs_xattr_set(struct dentry *dentry, const char *name, const void *value, size_t size, int flags, int xflags) { - struct xfs_inode *ip = XFS_I(d_inode(dentry)); + struct xfs_inode *ip = XFS_I(d_inode(dentry)); + int error; if (strcmp(name, "") == 0) return -EINVAL; @@ -70,8 +93,12 @@ xfs_xattr_set(struct dentry *dentry, const char *name, const void *value, if (!value) return xfs_attr_remove(ip, (unsigned char *)name, xflags); - return xfs_attr_set(ip, (unsigned char *)name, + error = xfs_attr_set(ip, (unsigned char *)name, (void *)value, size, xflags); + if (!error) + xfs_forget_acl(d_inode(dentry), name, xflags); + + return error; } static const struct xattr_handler xfs_xattr_user_handler = { diff --git a/include/acpi/acpi_bus.h b/include/acpi/acpi_bus.h index d11eff8a4efe..ad0a5ff3d4cd 100644 --- a/include/acpi/acpi_bus.h +++ b/include/acpi/acpi_bus.h @@ -390,39 +390,6 @@ struct acpi_data_node { struct completion kobj_done; }; -static inline bool acpi_check_dma(struct acpi_device *adev, bool *coherent) -{ - bool ret = false; - - if (!adev) - return ret; - - /** - * Currently, we only support _CCA=1 (i.e. coherent_dma=1) - * This should be equivalent to specifyig dma-coherent for - * a device in OF. - * - * For the case when _CCA=0 (i.e. coherent_dma=0 && cca_seen=1), - * There are two cases: - * case 1. Do not support and disable DMA. - * case 2. Support but rely on arch-specific cache maintenance for - * non-coherence DMA operations. - * Currently, we implement case 1 above. - * - * For the case when _CCA is missing (i.e. cca_seen=0) and - * platform specifies ACPI_CCA_REQUIRED, we do not support DMA, - * and fallback to arch-specific default handling. - * - * See acpi_init_coherency() for more info. - */ - if (adev->flags.coherent_dma) { - ret = true; - if (coherent) - *coherent = adev->flags.coherent_dma; - } - return ret; -} - static inline bool is_acpi_node(struct fwnode_handle *fwnode) { return fwnode && (fwnode->type == FWNODE_ACPI @@ -595,6 +562,9 @@ struct acpi_pci_root { /* helper */ +bool acpi_dma_supported(struct acpi_device *adev); +enum dev_dma_attr acpi_get_dma_attr(struct acpi_device *adev); + struct acpi_device *acpi_find_child_device(struct acpi_device *parent, u64 address, bool check_children); int acpi_is_root_bridge(acpi_handle); diff --git a/include/asm-generic/uaccess.h b/include/asm-generic/uaccess.h index 72d8803832ff..1bfa602958f2 100644 --- a/include/asm-generic/uaccess.h +++ b/include/asm-generic/uaccess.h @@ -163,9 +163,10 @@ static inline __must_check long __copy_to_user(void __user *to, #define put_user(x, ptr) \ ({ \ + void *__p = (ptr); \ might_fault(); \ - access_ok(VERIFY_WRITE, ptr, sizeof(*ptr)) ? \ - __put_user(x, ptr) : \ + access_ok(VERIFY_WRITE, __p, sizeof(*ptr)) ? \ + __put_user((x), ((__typeof__(*(ptr)) *)__p)) : \ -EFAULT; \ }) @@ -225,9 +226,10 @@ extern int __put_user_bad(void) __attribute__((noreturn)); #define get_user(x, ptr) \ ({ \ + const void *__p = (ptr); \ might_fault(); \ - access_ok(VERIFY_READ, ptr, sizeof(*ptr)) ? \ - __get_user(x, ptr) : \ + access_ok(VERIFY_READ, __p, sizeof(*ptr)) ? \ + __get_user((x), (__typeof__(*(ptr)) *)__p) : \ -EFAULT; \ }) diff --git a/include/linux/acpi.h b/include/linux/acpi.h index ebfac2fe0c81..054833939995 100644 --- a/include/linux/acpi.h +++ b/include/linux/acpi.h @@ -601,11 +601,16 @@ static inline int acpi_device_modalias(struct device *dev, return -ENODEV; } -static inline bool acpi_check_dma(struct acpi_device *adev, bool *coherent) +static inline bool acpi_dma_supported(struct acpi_device *adev) { return false; } +static inline enum dev_dma_attr acpi_get_dma_attr(struct acpi_device *adev) +{ + return DEV_DMA_NOT_SUPPORTED; +} + #define ACPI_PTR(_ptr) (NULL) #endif /* !CONFIG_ACPI */ diff --git a/include/linux/buffer_head.h b/include/linux/buffer_head.h index e6797ded700e..89d9aa9e79bf 100644 --- a/include/linux/buffer_head.h +++ b/include/linux/buffer_head.h @@ -227,8 +227,6 @@ int cont_write_begin(struct file *, struct address_space *, loff_t, get_block_t *, loff_t *); int generic_cont_expand_simple(struct inode *inode, loff_t size); int block_commit_write(struct page *page, unsigned from, unsigned to); -int __block_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf, - get_block_t get_block); int block_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf, get_block_t get_block); /* Convert errno to return value from ->page_mkwrite() call */ diff --git a/include/linux/context_tracking.h b/include/linux/context_tracking.h index 008fc67d0d96..68b575afe5f5 100644 --- a/include/linux/context_tracking.h +++ b/include/linux/context_tracking.h @@ -10,6 +10,10 @@ #ifdef CONFIG_CONTEXT_TRACKING extern void context_tracking_cpu_set(int cpu); +/* Called with interrupts disabled. */ +extern void __context_tracking_enter(enum ctx_state state); +extern void __context_tracking_exit(enum ctx_state state); + extern void context_tracking_enter(enum ctx_state state); extern void context_tracking_exit(enum ctx_state state); extern void context_tracking_user_enter(void); @@ -18,13 +22,13 @@ extern void context_tracking_user_exit(void); static inline void user_enter(void) { if (context_tracking_is_enabled()) - context_tracking_user_enter(); + context_tracking_enter(CONTEXT_USER); } static inline void user_exit(void) { if (context_tracking_is_enabled()) - context_tracking_user_exit(); + context_tracking_exit(CONTEXT_USER); } static inline enum ctx_state exception_enter(void) @@ -88,13 +92,13 @@ static inline void guest_enter(void) current->flags |= PF_VCPU; if (context_tracking_is_enabled()) - context_tracking_enter(CONTEXT_GUEST); + __context_tracking_enter(CONTEXT_GUEST); } static inline void guest_exit(void) { if (context_tracking_is_enabled()) - context_tracking_exit(CONTEXT_GUEST); + __context_tracking_exit(CONTEXT_GUEST); if (vtime_accounting_enabled()) vtime_guest_exit(current); diff --git a/include/linux/devfreq_cooling.h b/include/linux/devfreq_cooling.h new file mode 100644 index 000000000000..7adf6cc4b305 --- /dev/null +++ b/include/linux/devfreq_cooling.h @@ -0,0 +1,81 @@ +/* + * devfreq_cooling: Thermal cooling device implementation for devices using + * devfreq + * + * Copyright (C) 2014-2015 ARM Limited + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + * + * This program is distributed "as is" WITHOUT ANY WARRANTY of any + * kind, whether express or implied; without even the implied warranty + * of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + */ + +#ifndef __DEVFREQ_COOLING_H__ +#define __DEVFREQ_COOLING_H__ + +#include <linux/devfreq.h> +#include <linux/thermal.h> + +#ifdef CONFIG_DEVFREQ_THERMAL + +/** + * struct devfreq_cooling_power - Devfreq cooling power ops + * @get_static_power: Take voltage, in mV, and return the static power + * in mW. If NULL, the static power is assumed + * to be 0. + * @get_dynamic_power: Take voltage, in mV, and frequency, in HZ, and + * return the dynamic power draw in mW. If NULL, + * a simple power model is used. + * @dyn_power_coeff: Coefficient for the simple dynamic power model in + * mW/(MHz mV mV). + * If get_dynamic_power() is NULL, then the + * dynamic power is calculated as + * @dyn_power_coeff * frequency * voltage^2 + */ +struct devfreq_cooling_power { + unsigned long (*get_static_power)(unsigned long voltage); + unsigned long (*get_dynamic_power)(unsigned long freq, + unsigned long voltage); + unsigned long dyn_power_coeff; +}; + +struct thermal_cooling_device * +of_devfreq_cooling_register_power(struct device_node *np, struct devfreq *df, + struct devfreq_cooling_power *dfc_power); +struct thermal_cooling_device * +of_devfreq_cooling_register(struct device_node *np, struct devfreq *df); +struct thermal_cooling_device *devfreq_cooling_register(struct devfreq *df); +void devfreq_cooling_unregister(struct thermal_cooling_device *dfc); + +#else /* !CONFIG_DEVFREQ_THERMAL */ + +struct thermal_cooling_device * +of_devfreq_cooling_register_power(struct device_node *np, struct devfreq *df, + struct devfreq_cooling_power *dfc_power) +{ + return ERR_PTR(-EINVAL); +} + +static inline struct thermal_cooling_device * +of_devfreq_cooling_register(struct device_node *np, struct devfreq *df) +{ + return ERR_PTR(-EINVAL); +} + +static inline struct thermal_cooling_device * +devfreq_cooling_register(struct devfreq *df) +{ + return ERR_PTR(-EINVAL); +} + +static inline void +devfreq_cooling_unregister(struct thermal_cooling_device *dfc) +{ +} + +#endif /* CONFIG_DEVFREQ_THERMAL */ +#endif /* __DEVFREQ_COOLING_H__ */ diff --git a/include/linux/fs.h b/include/linux/fs.h index 6230eb2a9cca..3aa514254161 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -1665,8 +1665,6 @@ struct inode_operations { umode_t create_mode, int *opened); int (*tmpfile) (struct inode *, struct dentry *, umode_t); int (*set_acl)(struct inode *, struct posix_acl *, int); - - /* WARNING: probably going away soon, do not use! */ } ____cacheline_aligned; ssize_t rw_copy_check_uvector(int type, const struct iovec __user * uvector, diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h index 242a6d2b53ff..5706a2108f0a 100644 --- a/include/linux/kvm_host.h +++ b/include/linux/kvm_host.h @@ -1183,4 +1183,5 @@ void kvm_arch_irq_bypass_start(struct irq_bypass_consumer *); int kvm_arch_update_irqfd_routing(struct kvm *kvm, unsigned int host_irq, uint32_t guest_irq, bool set); #endif /* CONFIG_HAVE_KVM_IRQ_BYPASS */ + #endif diff --git a/include/linux/lockd/lockd.h b/include/linux/lockd/lockd.h index ff82a32871b5..c15373894a42 100644 --- a/include/linux/lockd/lockd.h +++ b/include/linux/lockd/lockd.h @@ -68,6 +68,7 @@ struct nlm_host { struct nsm_handle *h_nsmhandle; /* NSM status handle */ char *h_addrbuf; /* address eyecatcher */ struct net *net; /* host net */ + char nodename[UNX_MAXNODENAME + 1]; }; /* @@ -235,7 +236,8 @@ void nlm_rebind_host(struct nlm_host *); struct nlm_host * nlm_get_host(struct nlm_host *); void nlm_shutdown_hosts(void); void nlm_shutdown_hosts_net(struct net *net); -void nlm_host_rebooted(const struct nlm_reboot *); +void nlm_host_rebooted(const struct net *net, + const struct nlm_reboot *); /* * Host monitoring @@ -243,11 +245,13 @@ void nlm_host_rebooted(const struct nlm_reboot *); int nsm_monitor(const struct nlm_host *host); void nsm_unmonitor(const struct nlm_host *host); -struct nsm_handle *nsm_get_handle(const struct sockaddr *sap, +struct nsm_handle *nsm_get_handle(const struct net *net, + const struct sockaddr *sap, const size_t salen, const char *hostname, const size_t hostname_len); -struct nsm_handle *nsm_reboot_lookup(const struct nlm_reboot *info); +struct nsm_handle *nsm_reboot_lookup(const struct net *net, + const struct nlm_reboot *info); void nsm_release(struct nsm_handle *nsm); /* diff --git a/include/linux/math64.h b/include/linux/math64.h index c45c089bfdac..6e8b5b270ffe 100644 --- a/include/linux/math64.h +++ b/include/linux/math64.h @@ -142,6 +142,13 @@ static inline u64 mul_u64_u32_shr(u64 a, u32 mul, unsigned int shift) } #endif /* mul_u64_u32_shr */ +#ifndef mul_u64_u64_shr +static inline u64 mul_u64_u64_shr(u64 a, u64 mul, unsigned int shift) +{ + return (u64)(((unsigned __int128)a * mul) >> shift); +} +#endif /* mul_u64_u64_shr */ + #else #ifndef mul_u64_u32_shr @@ -161,6 +168,79 @@ static inline u64 mul_u64_u32_shr(u64 a, u32 mul, unsigned int shift) } #endif /* mul_u64_u32_shr */ +#ifndef mul_u64_u64_shr +static inline u64 mul_u64_u64_shr(u64 a, u64 b, unsigned int shift) +{ + union { + u64 ll; + struct { +#ifdef __BIG_ENDIAN + u32 high, low; +#else + u32 low, high; +#endif + } l; + } rl, rm, rn, rh, a0, b0; + u64 c; + + a0.ll = a; + b0.ll = b; + + rl.ll = (u64)a0.l.low * b0.l.low; + rm.ll = (u64)a0.l.low * b0.l.high; + rn.ll = (u64)a0.l.high * b0.l.low; + rh.ll = (u64)a0.l.high * b0.l.high; + + /* + * Each of these lines computes a 64-bit intermediate result into "c", + * starting at bits 32-95. The low 32-bits go into the result of the + * multiplication, the high 32-bits are carried into the next step. + */ + rl.l.high = c = (u64)rl.l.high + rm.l.low + rn.l.low; + rh.l.low = c = (c >> 32) + rm.l.high + rn.l.high + rh.l.low; + rh.l.high = (c >> 32) + rh.l.high; + + /* + * The 128-bit result of the multiplication is in rl.ll and rh.ll, + * shift it right and throw away the high part of the result. + */ + if (shift == 0) + return rl.ll; + if (shift < 64) + return (rl.ll >> shift) | (rh.ll << (64 - shift)); + return rh.ll >> (shift & 63); +} +#endif /* mul_u64_u64_shr */ + #endif +#ifndef mul_u64_u32_div +static inline u64 mul_u64_u32_div(u64 a, u32 mul, u32 divisor) +{ + union { + u64 ll; + struct { +#ifdef __BIG_ENDIAN + u32 high, low; +#else + u32 low, high; +#endif + } l; + } u, rl, rh; + + u.ll = a; + rl.ll = (u64)u.l.low * mul; + rh.ll = (u64)u.l.high * mul + rl.l.high; + + /* Bits 32-63 of the result will be in rh.l.low. */ + rl.l.high = do_div(rh.ll, divisor); + + /* Bits 0-31 of the result will be in rl.l.low. */ + do_div(rl.ll, divisor); + + rl.l.high = rh.l.low; + return rl.ll; +} +#endif /* mul_u64_u32_div */ + #endif /* _LINUX_MATH64_H */ diff --git a/include/linux/of.h b/include/linux/of.h index 2194b8ca41f9..dd10626a615f 100644 --- a/include/linux/of.h +++ b/include/linux/of.h @@ -126,6 +126,8 @@ extern raw_spinlock_t devtree_lock; #define OF_POPULATED 3 /* device already created for the node */ #define OF_POPULATED_BUS 4 /* of_platform_populate recursed to children of this node */ +#define OF_BAD_ADDR ((u64)-1) + #ifdef CONFIG_OF void of_core_init(void); @@ -229,8 +231,6 @@ static inline unsigned long of_read_ulong(const __be32 *cell, int size) #define OF_IS_DYNAMIC(x) test_bit(OF_DYNAMIC, &x->_flags) #define OF_MARK_DYNAMIC(x) set_bit(OF_DYNAMIC, &x->_flags) -#define OF_BAD_ADDR ((u64)-1) - static inline const char *of_node_full_name(const struct device_node *np) { return np ? np->full_name : "<no-node>"; diff --git a/include/linux/of_address.h b/include/linux/of_address.h index d88e81be6368..507daad0bc8d 100644 --- a/include/linux/of_address.h +++ b/include/linux/of_address.h @@ -57,6 +57,13 @@ extern int of_dma_get_range(struct device_node *np, u64 *dma_addr, u64 *paddr, u64 *size); extern bool of_dma_is_coherent(struct device_node *np); #else /* CONFIG_OF_ADDRESS */ + +static inline u64 of_translate_address(struct device_node *np, + const __be32 *addr) +{ + return OF_BAD_ADDR; +} + static inline struct device_node *of_find_matching_node_by_address( struct device_node *from, const struct of_device_id *matches, diff --git a/include/linux/of_pci.h b/include/linux/of_pci.h index 38c0533a3359..2c51ee78b1c0 100644 --- a/include/linux/of_pci.h +++ b/include/linux/of_pci.h @@ -16,7 +16,6 @@ int of_pci_get_devfn(struct device_node *np); int of_irq_parse_and_map_pci(const struct pci_dev *dev, u8 slot, u8 pin); int of_pci_parse_bus_range(struct device_node *node, struct resource *res); int of_get_pci_domain_nr(struct device_node *node); -void of_pci_dma_configure(struct pci_dev *pci_dev); void of_pci_check_probe_only(void); #else static inline int of_irq_parse_pci(const struct pci_dev *pdev, struct of_phandle_args *out_irq) @@ -53,8 +52,6 @@ of_get_pci_domain_nr(struct device_node *node) return -1; } -static inline void of_pci_dma_configure(struct pci_dev *pci_dev) { } - static inline void of_pci_check_probe_only(void) { } #endif diff --git a/include/linux/property.h b/include/linux/property.h index 463de52fe891..0a3705a7c9f2 100644 --- a/include/linux/property.h +++ b/include/linux/property.h @@ -27,6 +27,12 @@ enum dev_prop_type { DEV_PROP_MAX, }; +enum dev_dma_attr { + DEV_DMA_NOT_SUPPORTED, + DEV_DMA_NON_COHERENT, + DEV_DMA_COHERENT, +}; + bool device_property_present(struct device *dev, const char *propname); int device_property_read_u8_array(struct device *dev, const char *propname, u8 *val, size_t nval); @@ -168,7 +174,9 @@ struct property_set { void device_add_property_set(struct device *dev, struct property_set *pset); -bool device_dma_is_coherent(struct device *dev); +bool device_dma_supported(struct device *dev); + +enum dev_dma_attr device_get_dma_attr(struct device *dev); int device_get_phy_mode(struct device *dev); diff --git a/include/linux/pwm.h b/include/linux/pwm.h index d681f6875aef..cfc3ed46cad2 100644 --- a/include/linux/pwm.h +++ b/include/linux/pwm.h @@ -2,6 +2,7 @@ #define __LINUX_PWM_H #include <linux/err.h> +#include <linux/mutex.h> #include <linux/of.h> struct pwm_device; @@ -87,6 +88,7 @@ enum { * @pwm: global index of the PWM device * @chip: PWM chip providing this PWM device * @chip_data: chip-private data associated with the PWM device + * @lock: used to serialize accesses to the PWM device where necessary * @period: period of the PWM signal (in nanoseconds) * @duty_cycle: duty cycle of the PWM signal (in nanoseconds) * @polarity: polarity of the PWM signal @@ -98,6 +100,7 @@ struct pwm_device { unsigned int pwm; struct pwm_chip *chip; void *chip_data; + struct mutex lock; unsigned int period; unsigned int duty_cycle; diff --git a/include/linux/sunrpc/cache.h b/include/linux/sunrpc/cache.h index 03d3b4c92d9f..ed03c9f7f908 100644 --- a/include/linux/sunrpc/cache.h +++ b/include/linux/sunrpc/cache.h @@ -48,8 +48,10 @@ struct cache_head { struct hlist_node cache_list; time_t expiry_time; /* After time time, don't use the data */ - time_t last_refresh; /* If CACHE_PENDING, this is when upcall - * was sent, else this is when update was received + time_t last_refresh; /* If CACHE_PENDING, this is when upcall was + * sent, else this is when update was + * received, though it is alway set to + * be *after* ->flush_time. */ struct kref ref; unsigned long flags; @@ -105,8 +107,12 @@ struct cache_detail { /* fields below this comment are for internal use * and should not be touched by cache owners */ - time_t flush_time; /* flush all cache items with last_refresh - * earlier than this */ + time_t flush_time; /* flush all cache items with + * last_refresh at or earlier + * than this. last_refresh + * is never set at or earlier + * than this. + */ struct list_head others; time_t nextcheck; int entries; @@ -203,7 +209,7 @@ static inline void cache_put(struct cache_head *h, struct cache_detail *cd) static inline int cache_is_expired(struct cache_detail *detail, struct cache_head *h) { return (h->expiry_time < seconds_since_boot()) || - (detail->flush_time > h->last_refresh); + (detail->flush_time >= h->last_refresh); } extern int cache_check(struct cache_detail *detail, diff --git a/include/linux/thermal.h b/include/linux/thermal.h index 157d366e761b..4014a59828fc 100644 --- a/include/linux/thermal.h +++ b/include/linux/thermal.h @@ -44,9 +44,11 @@ #define THERMAL_WEIGHT_DEFAULT 0 /* Unit conversion macros */ -#define KELVIN_TO_CELSIUS(t) (long)(((long)t-2732 >= 0) ? \ - ((long)t-2732+5)/10 : ((long)t-2732-5)/10) -#define CELSIUS_TO_KELVIN(t) ((t)*10+2732) +#define DECI_KELVIN_TO_CELSIUS(t) ({ \ + long _t = (t); \ + ((_t-2732 >= 0) ? (_t-2732+5)/10 : (_t-2732-5)/10); \ +}) +#define CELSIUS_TO_DECI_KELVIN(t) ((t)*10+2732) #define DECI_KELVIN_TO_MILLICELSIUS_WITH_OFFSET(t, off) (((t) - (off)) * 100) #define DECI_KELVIN_TO_MILLICELSIUS(t) DECI_KELVIN_TO_MILLICELSIUS_WITH_OFFSET(t, 2732) #define MILLICELSIUS_TO_DECI_KELVIN_WITH_OFFSET(t, off) (((t) / 100) + (off)) diff --git a/include/trace/events/thermal.h b/include/trace/events/thermal.h index 8b1f80682b80..5738bb3e2343 100644 --- a/include/trace/events/thermal.h +++ b/include/trace/events/thermal.h @@ -4,6 +4,7 @@ #if !defined(_TRACE_THERMAL_H) || defined(TRACE_HEADER_MULTI_READ) #define _TRACE_THERMAL_H +#include <linux/devfreq.h> #include <linux/thermal.h> #include <linux/tracepoint.h> @@ -135,6 +136,58 @@ TRACE_EVENT(thermal_power_cpu_limit, __entry->power) ); +TRACE_EVENT(thermal_power_devfreq_get_power, + TP_PROTO(struct thermal_cooling_device *cdev, + struct devfreq_dev_status *status, unsigned long freq, + u32 dynamic_power, u32 static_power), + + TP_ARGS(cdev, status, freq, dynamic_power, static_power), + + TP_STRUCT__entry( + __string(type, cdev->type ) + __field(unsigned long, freq ) + __field(u32, load ) + __field(u32, dynamic_power ) + __field(u32, static_power ) + ), + + TP_fast_assign( + __assign_str(type, cdev->type); + __entry->freq = freq; + __entry->load = (100 * status->busy_time) / status->total_time; + __entry->dynamic_power = dynamic_power; + __entry->static_power = static_power; + ), + + TP_printk("type=%s freq=%lu load=%u dynamic_power=%u static_power=%u", + __get_str(type), __entry->freq, + __entry->load, __entry->dynamic_power, __entry->static_power) +); + +TRACE_EVENT(thermal_power_devfreq_limit, + TP_PROTO(struct thermal_cooling_device *cdev, unsigned long freq, + unsigned long cdev_state, u32 power), + + TP_ARGS(cdev, freq, cdev_state, power), + + TP_STRUCT__entry( + __string(type, cdev->type) + __field(unsigned int, freq ) + __field(unsigned long, cdev_state) + __field(u32, power ) + ), + + TP_fast_assign( + __assign_str(type, cdev->type); + __entry->freq = freq; + __entry->cdev_state = cdev_state; + __entry->power = power; + ), + + TP_printk("type=%s freq=%u cdev_state=%lu power=%u", + __get_str(type), __entry->freq, __entry->cdev_state, + __entry->power) +); #endif /* _TRACE_THERMAL_H */ /* This part must be outside protection */ diff --git a/kernel/context_tracking.c b/kernel/context_tracking.c index 0a495ab35bc7..d8560ee3bab7 100644 --- a/kernel/context_tracking.c +++ b/kernel/context_tracking.c @@ -58,36 +58,13 @@ static void context_tracking_recursion_exit(void) * instructions to execute won't use any RCU read side critical section * because this function sets RCU in extended quiescent state. */ -void context_tracking_enter(enum ctx_state state) +void __context_tracking_enter(enum ctx_state state) { - unsigned long flags; - - /* - * Repeat the user_enter() check here because some archs may be calling - * this from asm and if no CPU needs context tracking, they shouldn't - * go further. Repeat the check here until they support the inline static - * key check. - */ - if (!context_tracking_is_enabled()) - return; - - /* - * Some contexts may involve an exception occuring in an irq, - * leading to that nesting: - * rcu_irq_enter() rcu_user_exit() rcu_user_exit() rcu_irq_exit() - * This would mess up the dyntick_nesting count though. And rcu_irq_*() - * helpers are enough to protect RCU uses inside the exception. So - * just return immediately if we detect we are in an IRQ. - */ - if (in_interrupt()) - return; - /* Kernel threads aren't supposed to go to userspace */ WARN_ON_ONCE(!current->mm); - local_irq_save(flags); if (!context_tracking_recursion_enter()) - goto out_irq_restore; + return; if ( __this_cpu_read(context_tracking.state) != state) { if (__this_cpu_read(context_tracking.active)) { @@ -120,7 +97,27 @@ void context_tracking_enter(enum ctx_state state) __this_cpu_write(context_tracking.state, state); } context_tracking_recursion_exit(); -out_irq_restore: +} +NOKPROBE_SYMBOL(__context_tracking_enter); +EXPORT_SYMBOL_GPL(__context_tracking_enter); + +void context_tracking_enter(enum ctx_state state) +{ + unsigned long flags; + + /* + * Some contexts may involve an exception occuring in an irq, + * leading to that nesting: + * rcu_irq_enter() rcu_user_exit() rcu_user_exit() rcu_irq_exit() + * This would mess up the dyntick_nesting count though. And rcu_irq_*() + * helpers are enough to protect RCU uses inside the exception. So + * just return immediately if we detect we are in an IRQ. + */ + if (in_interrupt()) + return; + + local_irq_save(flags); + __context_tracking_enter(state); local_irq_restore(flags); } NOKPROBE_SYMBOL(context_tracking_enter); @@ -128,7 +125,7 @@ EXPORT_SYMBOL_GPL(context_tracking_enter); void context_tracking_user_enter(void) { - context_tracking_enter(CONTEXT_USER); + user_enter(); } NOKPROBE_SYMBOL(context_tracking_user_enter); @@ -144,19 +141,10 @@ NOKPROBE_SYMBOL(context_tracking_user_enter); * This call supports re-entrancy. This way it can be called from any exception * handler without needing to know if we came from userspace or not. */ -void context_tracking_exit(enum ctx_state state) +void __context_tracking_exit(enum ctx_state state) { - unsigned long flags; - - if (!context_tracking_is_enabled()) - return; - - if (in_interrupt()) - return; - - local_irq_save(flags); if (!context_tracking_recursion_enter()) - goto out_irq_restore; + return; if (__this_cpu_read(context_tracking.state) == state) { if (__this_cpu_read(context_tracking.active)) { @@ -173,7 +161,19 @@ void context_tracking_exit(enum ctx_state state) __this_cpu_write(context_tracking.state, CONTEXT_KERNEL); } context_tracking_recursion_exit(); -out_irq_restore: +} +NOKPROBE_SYMBOL(__context_tracking_exit); +EXPORT_SYMBOL_GPL(__context_tracking_exit); + +void context_tracking_exit(enum ctx_state state) +{ + unsigned long flags; + + if (in_interrupt()) + return; + + local_irq_save(flags); + __context_tracking_exit(state); local_irq_restore(flags); } NOKPROBE_SYMBOL(context_tracking_exit); @@ -181,7 +181,7 @@ EXPORT_SYMBOL_GPL(context_tracking_exit); void context_tracking_user_exit(void) { - context_tracking_exit(CONTEXT_USER); + user_exit(); } NOKPROBE_SYMBOL(context_tracking_user_exit); diff --git a/net/sunrpc/auth_gss/auth_gss.c b/net/sunrpc/auth_gss/auth_gss.c index dace13d7638e..799e65b944b9 100644 --- a/net/sunrpc/auth_gss/auth_gss.c +++ b/net/sunrpc/auth_gss/auth_gss.c @@ -1411,17 +1411,16 @@ gss_key_timeout(struct rpc_cred *rc) { struct gss_cred *gss_cred = container_of(rc, struct gss_cred, gc_base); struct gss_cl_ctx *ctx; - unsigned long now = jiffies; - unsigned long expire; + unsigned long timeout = jiffies + (gss_key_expire_timeo * HZ); + int ret = 0; rcu_read_lock(); ctx = rcu_dereference(gss_cred->gc_ctx); - if (ctx) - expire = ctx->gc_expiry - (gss_key_expire_timeo * HZ); + if (!ctx || time_after(timeout, ctx->gc_expiry)) + ret = -EACCES; rcu_read_unlock(); - if (!ctx || time_after(now, expire)) - return -EACCES; - return 0; + + return ret; } static int diff --git a/net/sunrpc/cache.c b/net/sunrpc/cache.c index 4a2340a54401..5e4f815c2b34 100644 --- a/net/sunrpc/cache.c +++ b/net/sunrpc/cache.c @@ -41,13 +41,16 @@ static bool cache_defer_req(struct cache_req *req, struct cache_head *item); static void cache_revisit_request(struct cache_head *item); -static void cache_init(struct cache_head *h) +static void cache_init(struct cache_head *h, struct cache_detail *detail) { time_t now = seconds_since_boot(); INIT_HLIST_NODE(&h->cache_list); h->flags = 0; kref_init(&h->ref); h->expiry_time = now + CACHE_NEW_EXPIRY; + if (now <= detail->flush_time) + /* ensure it isn't already expired */ + now = detail->flush_time + 1; h->last_refresh = now; } @@ -81,7 +84,7 @@ struct cache_head *sunrpc_cache_lookup(struct cache_detail *detail, * we might get lose if we need to * cache_put it soon. */ - cache_init(new); + cache_init(new, detail); detail->init(new, key); write_lock(&detail->hash_lock); @@ -116,10 +119,15 @@ EXPORT_SYMBOL_GPL(sunrpc_cache_lookup); static void cache_dequeue(struct cache_detail *detail, struct cache_head *ch); -static void cache_fresh_locked(struct cache_head *head, time_t expiry) +static void cache_fresh_locked(struct cache_head *head, time_t expiry, + struct cache_detail *detail) { + time_t now = seconds_since_boot(); + if (now <= detail->flush_time) + /* ensure it isn't immediately treated as expired */ + now = detail->flush_time + 1; head->expiry_time = expiry; - head->last_refresh = seconds_since_boot(); + head->last_refresh = now; smp_wmb(); /* paired with smp_rmb() in cache_is_valid() */ set_bit(CACHE_VALID, &head->flags); } @@ -149,7 +157,7 @@ struct cache_head *sunrpc_cache_update(struct cache_detail *detail, set_bit(CACHE_NEGATIVE, &old->flags); else detail->update(old, new); - cache_fresh_locked(old, new->expiry_time); + cache_fresh_locked(old, new->expiry_time, detail); write_unlock(&detail->hash_lock); cache_fresh_unlocked(old, detail); return old; @@ -162,7 +170,7 @@ struct cache_head *sunrpc_cache_update(struct cache_detail *detail, cache_put(old, detail); return NULL; } - cache_init(tmp); + cache_init(tmp, detail); detail->init(tmp, old); write_lock(&detail->hash_lock); @@ -173,8 +181,8 @@ struct cache_head *sunrpc_cache_update(struct cache_detail *detail, hlist_add_head(&tmp->cache_list, &detail->hash_table[hash]); detail->entries++; cache_get(tmp); - cache_fresh_locked(tmp, new->expiry_time); - cache_fresh_locked(old, 0); + cache_fresh_locked(tmp, new->expiry_time, detail); + cache_fresh_locked(old, 0, detail); write_unlock(&detail->hash_lock); cache_fresh_unlocked(tmp, detail); cache_fresh_unlocked(old, detail); @@ -219,7 +227,8 @@ static int try_to_negate_entry(struct cache_detail *detail, struct cache_head *h rv = cache_is_valid(h); if (rv == -EAGAIN) { set_bit(CACHE_NEGATIVE, &h->flags); - cache_fresh_locked(h, seconds_since_boot()+CACHE_NEW_EXPIRY); + cache_fresh_locked(h, seconds_since_boot()+CACHE_NEW_EXPIRY, + detail); rv = -ENOENT; } write_unlock(&detail->hash_lock); @@ -487,10 +496,13 @@ EXPORT_SYMBOL_GPL(cache_flush); void cache_purge(struct cache_detail *detail) { - detail->flush_time = LONG_MAX; + time_t now = seconds_since_boot(); + if (detail->flush_time >= now) + now = detail->flush_time + 1; + /* 'now' is the maximum value any 'last_refresh' can have */ + detail->flush_time = now; detail->nextcheck = seconds_since_boot(); cache_flush(); - detail->flush_time = 1; } EXPORT_SYMBOL_GPL(cache_purge); @@ -1436,6 +1448,7 @@ static ssize_t write_flush(struct file *file, const char __user *buf, { char tbuf[20]; char *bp, *ep; + time_t then, now; if (*ppos || count > sizeof(tbuf)-1) return -EINVAL; @@ -1447,8 +1460,22 @@ static ssize_t write_flush(struct file *file, const char __user *buf, return -EINVAL; bp = tbuf; - cd->flush_time = get_expiry(&bp); - cd->nextcheck = seconds_since_boot(); + then = get_expiry(&bp); + now = seconds_since_boot(); + cd->nextcheck = now; + /* Can only set flush_time to 1 second beyond "now", or + * possibly 1 second beyond flushtime. This is because + * flush_time never goes backwards so it mustn't get too far + * ahead of time. + */ + if (then >= now) { + /* Want to flush everything, so behave like cache_purge() */ + if (cd->flush_time >= now) + now = cd->flush_time + 1; + then = now; + } + + cd->flush_time = then; cache_flush(); *ppos += count; diff --git a/net/sunrpc/svcsock.c b/net/sunrpc/svcsock.c index 0c8120229a03..1413cdcc131c 100644 --- a/net/sunrpc/svcsock.c +++ b/net/sunrpc/svcsock.c @@ -181,7 +181,7 @@ int svc_send_common(struct socket *sock, struct xdr_buf *xdr, struct page **ppage = xdr->pages; size_t base = xdr->page_base; unsigned int pglen = xdr->page_len; - unsigned int flags = MSG_MORE; + unsigned int flags = MSG_MORE | MSG_SENDPAGE_NOTLAST; int slen; int len = 0; @@ -399,6 +399,31 @@ static int svc_sock_secure_port(struct svc_rqst *rqstp) return svc_port_is_privileged(svc_addr(rqstp)); } +static bool sunrpc_waitqueue_active(wait_queue_head_t *wq) +{ + if (!wq) + return false; + /* + * There should normally be a memory * barrier here--see + * wq_has_sleeper(). + * + * It appears that isn't currently necessary, though, basically + * because callers all appear to have sufficient memory barriers + * between the time the relevant change is made and the + * time they call these callbacks. + * + * The nfsd code itself doesn't actually explicitly wait on + * these waitqueues, but it may wait on them for example in + * sendpage() or sendmsg() calls. (And those may be the only + * places, since it it uses nonblocking reads.) + * + * Maybe we should add the memory barriers anyway, but these are + * hot paths so we'd need to be convinced there's no sigificant + * penalty. + */ + return waitqueue_active(wq); +} + /* * INET callback when data has been received on the socket. */ @@ -414,7 +439,7 @@ static void svc_udp_data_ready(struct sock *sk) set_bit(XPT_DATA, &svsk->sk_xprt.xpt_flags); svc_xprt_enqueue(&svsk->sk_xprt); } - if (wq && waitqueue_active(wq)) + if (sunrpc_waitqueue_active(wq)) wake_up_interruptible(wq); } @@ -432,7 +457,7 @@ static void svc_write_space(struct sock *sk) svc_xprt_enqueue(&svsk->sk_xprt); } - if (wq && waitqueue_active(wq)) { + if (sunrpc_waitqueue_active(wq)) { dprintk("RPC svc_write_space: someone sleeping on %p\n", svsk); wake_up_interruptible(wq); @@ -787,7 +812,7 @@ static void svc_tcp_listen_data_ready(struct sock *sk) } wq = sk_sleep(sk); - if (wq && waitqueue_active(wq)) + if (sunrpc_waitqueue_active(wq)) wake_up_interruptible_all(wq); } @@ -808,7 +833,7 @@ static void svc_tcp_state_change(struct sock *sk) set_bit(XPT_CLOSE, &svsk->sk_xprt.xpt_flags); svc_xprt_enqueue(&svsk->sk_xprt); } - if (wq && waitqueue_active(wq)) + if (sunrpc_waitqueue_active(wq)) wake_up_interruptible_all(wq); } @@ -823,7 +848,7 @@ static void svc_tcp_data_ready(struct sock *sk) set_bit(XPT_DATA, &svsk->sk_xprt.xpt_flags); svc_xprt_enqueue(&svsk->sk_xprt); } - if (wq && waitqueue_active(wq)) + if (sunrpc_waitqueue_active(wq)) wake_up_interruptible(wq); } @@ -1367,7 +1392,6 @@ EXPORT_SYMBOL_GPL(svc_sock_update_bufs); /* * Initialize socket for RPC use and create svc_sock struct - * XXX: May want to setsockopt SO_SNDBUF and SO_RCVBUF. */ static struct svc_sock *svc_setup_socket(struct svc_serv *serv, struct socket *sock, @@ -1594,7 +1618,7 @@ static void svc_sock_detach(struct svc_xprt *xprt) sk->sk_write_space = svsk->sk_owspace; wq = sk_sleep(sk); - if (wq && waitqueue_active(wq)) + if (sunrpc_waitqueue_active(wq)) wake_up_interruptible(wq); } diff --git a/tools/power/cpupower/debug/i386/dump_psb.c b/tools/power/cpupower/debug/i386/dump_psb.c index 8d6a47514253..2c768cf70128 100644 --- a/tools/power/cpupower/debug/i386/dump_psb.c +++ b/tools/power/cpupower/debug/i386/dump_psb.c @@ -134,7 +134,7 @@ next_one: } static struct option info_opts[] = { - {.name = "numpst", .has_arg=no_argument, .flag=NULL, .val='n'}, + {"numpst", no_argument, NULL, 'n'}, }; void print_help(void) diff --git a/tools/power/cpupower/man/cpupower-idle-set.1 b/tools/power/cpupower/man/cpupower-idle-set.1 index 3e6799d7a79f..580c4e3ea92a 100644 --- a/tools/power/cpupower/man/cpupower-idle-set.1 +++ b/tools/power/cpupower/man/cpupower-idle-set.1 @@ -20,7 +20,9 @@ Disable a specific processor sleep state. Enable a specific processor sleep state. .TP \fB\-D\fR \fB\-\-disable-by-latency\fR <LATENCY> -Disable all idle states with a equal or higher latency than <LATENCY> +Disable all idle states with a equal or higher latency than <LATENCY>. + +Enable all idle states with a latency lower than <LATENCY>. .TP \fB\-E\fR \fB\-\-enable-all\fR Enable all idle states if not enabled already. diff --git a/tools/power/cpupower/utils/cpufreq-info.c b/tools/power/cpupower/utils/cpufreq-info.c index b4b90a97662c..0e6764330241 100644 --- a/tools/power/cpupower/utils/cpufreq-info.c +++ b/tools/power/cpupower/utils/cpufreq-info.c @@ -536,21 +536,21 @@ static int get_latency(unsigned int cpu, unsigned int human) } static struct option info_opts[] = { - { .name = "debug", .has_arg = no_argument, .flag = NULL, .val = 'e'}, - { .name = "boost", .has_arg = no_argument, .flag = NULL, .val = 'b'}, - { .name = "freq", .has_arg = no_argument, .flag = NULL, .val = 'f'}, - { .name = "hwfreq", .has_arg = no_argument, .flag = NULL, .val = 'w'}, - { .name = "hwlimits", .has_arg = no_argument, .flag = NULL, .val = 'l'}, - { .name = "driver", .has_arg = no_argument, .flag = NULL, .val = 'd'}, - { .name = "policy", .has_arg = no_argument, .flag = NULL, .val = 'p'}, - { .name = "governors", .has_arg = no_argument, .flag = NULL, .val = 'g'}, - { .name = "related-cpus", .has_arg = no_argument, .flag = NULL, .val = 'r'}, - { .name = "affected-cpus",.has_arg = no_argument, .flag = NULL, .val = 'a'}, - { .name = "stats", .has_arg = no_argument, .flag = NULL, .val = 's'}, - { .name = "latency", .has_arg = no_argument, .flag = NULL, .val = 'y'}, - { .name = "proc", .has_arg = no_argument, .flag = NULL, .val = 'o'}, - { .name = "human", .has_arg = no_argument, .flag = NULL, .val = 'm'}, - { .name = "no-rounding", .has_arg = no_argument, .flag = NULL, .val = 'n'}, + {"debug", no_argument, NULL, 'e'}, + {"boost", no_argument, NULL, 'b'}, + {"freq", no_argument, NULL, 'f'}, + {"hwfreq", no_argument, NULL, 'w'}, + {"hwlimits", no_argument, NULL, 'l'}, + {"driver", no_argument, NULL, 'd'}, + {"policy", no_argument, NULL, 'p'}, + {"governors", no_argument, NULL, 'g'}, + {"related-cpus", no_argument, NULL, 'r'}, + {"affected-cpus", no_argument, NULL, 'a'}, + {"stats", no_argument, NULL, 's'}, + {"latency", no_argument, NULL, 'y'}, + {"proc", no_argument, NULL, 'o'}, + {"human", no_argument, NULL, 'm'}, + {"no-rounding", no_argument, NULL, 'n'}, { }, }; diff --git a/tools/power/cpupower/utils/cpufreq-set.c b/tools/power/cpupower/utils/cpufreq-set.c index 4e213576381e..0fbd1a22c0a9 100644 --- a/tools/power/cpupower/utils/cpufreq-set.c +++ b/tools/power/cpupower/utils/cpufreq-set.c @@ -22,11 +22,11 @@ #define NORM_FREQ_LEN 32 static struct option set_opts[] = { - { .name = "min", .has_arg = required_argument, .flag = NULL, .val = 'd'}, - { .name = "max", .has_arg = required_argument, .flag = NULL, .val = 'u'}, - { .name = "governor", .has_arg = required_argument, .flag = NULL, .val = 'g'}, - { .name = "freq", .has_arg = required_argument, .flag = NULL, .val = 'f'}, - { .name = "related", .has_arg = no_argument, .flag = NULL, .val='r'}, + {"min", required_argument, NULL, 'd'}, + {"max", required_argument, NULL, 'u'}, + {"governor", required_argument, NULL, 'g'}, + {"freq", required_argument, NULL, 'f'}, + {"related", no_argument, NULL, 'r'}, { }, }; diff --git a/tools/power/cpupower/utils/cpuidle-info.c b/tools/power/cpupower/utils/cpuidle-info.c index 75e66de7e7a7..750c1d82c3f7 100644 --- a/tools/power/cpupower/utils/cpuidle-info.c +++ b/tools/power/cpupower/utils/cpuidle-info.c @@ -126,8 +126,8 @@ static void proc_cpuidle_cpu_output(unsigned int cpu) } static struct option info_opts[] = { - { .name = "silent", .has_arg = no_argument, .flag = NULL, .val = 's'}, - { .name = "proc", .has_arg = no_argument, .flag = NULL, .val = 'o'}, + {"silent", no_argument, NULL, 's'}, + {"proc", no_argument, NULL, 'o'}, { }, }; diff --git a/tools/power/cpupower/utils/cpuidle-set.c b/tools/power/cpupower/utils/cpuidle-set.c index d45d8d775c02..d6b6ae44b8c2 100644 --- a/tools/power/cpupower/utils/cpuidle-set.c +++ b/tools/power/cpupower/utils/cpuidle-set.c @@ -13,15 +13,11 @@ #include "helpers/sysfs.h" static struct option info_opts[] = { - { .name = "disable", - .has_arg = required_argument, .flag = NULL, .val = 'd'}, - { .name = "enable", - .has_arg = required_argument, .flag = NULL, .val = 'e'}, - { .name = "disable-by-latency", - .has_arg = required_argument, .flag = NULL, .val = 'D'}, - { .name = "enable-all", - .has_arg = no_argument, .flag = NULL, .val = 'E'}, - { }, + {"disable", required_argument, NULL, 'd'}, + {"enable", required_argument, NULL, 'e'}, + {"disable-by-latency", required_argument, NULL, 'D'}, + {"enable-all", no_argument, NULL, 'E'}, + { }, }; @@ -148,14 +144,21 @@ int cmd_idle_set(int argc, char **argv) (cpu, idlestate); state_latency = sysfs_get_idlestate_latency (cpu, idlestate); - printf("CPU: %u - idlestate %u - state_latency: %llu - latency: %llu\n", - cpu, idlestate, state_latency, latency); - if (disabled == 1 || latency > state_latency) + if (disabled == 1) { + if (latency > state_latency){ + ret = sysfs_idlestate_disable + (cpu, idlestate, 0); + if (ret == 0) + printf(_("Idlestate %u enabled on CPU %u\n"), idlestate, cpu); + } continue; - ret = sysfs_idlestate_disable - (cpu, idlestate, 1); - if (ret == 0) + } + if (latency <= state_latency){ + ret = sysfs_idlestate_disable + (cpu, idlestate, 1); + if (ret == 0) printf(_("Idlestate %u disabled on CPU %u\n"), idlestate, cpu); + } } break; case 'E': diff --git a/tools/power/cpupower/utils/cpupower-info.c b/tools/power/cpupower/utils/cpupower-info.c index 136d979e9586..10299f2e9d2a 100644 --- a/tools/power/cpupower/utils/cpupower-info.c +++ b/tools/power/cpupower/utils/cpupower-info.c @@ -17,8 +17,8 @@ #include "helpers/sysfs.h" static struct option set_opts[] = { - { .name = "perf-bias", .has_arg = optional_argument, .flag = NULL, .val = 'b'}, - { }, + {"perf-bias", optional_argument, NULL, 'b'}, + { }, }; static void print_wrong_arg_exit(void) diff --git a/tools/power/cpupower/utils/cpupower-set.c b/tools/power/cpupower/utils/cpupower-set.c index 573c75f8e3f5..3e6f374f8dd7 100644 --- a/tools/power/cpupower/utils/cpupower-set.c +++ b/tools/power/cpupower/utils/cpupower-set.c @@ -18,7 +18,7 @@ #include "helpers/bitmask.h" static struct option set_opts[] = { - { .name = "perf-bias", .has_arg = required_argument, .flag = NULL, .val = 'b'}, + {"perf-bias", required_argument, NULL, 'b'}, { }, }; diff --git a/tools/power/cpupower/utils/helpers/topology.c b/tools/power/cpupower/utils/helpers/topology.c index cea398c176e7..9cbb7fd75171 100644 --- a/tools/power/cpupower/utils/helpers/topology.c +++ b/tools/power/cpupower/utils/helpers/topology.c @@ -73,18 +73,22 @@ int get_cpu_topology(struct cpupower_topology *cpu_top) for (cpu = 0; cpu < cpus; cpu++) { cpu_top->core_info[cpu].cpu = cpu; cpu_top->core_info[cpu].is_online = sysfs_is_cpu_online(cpu); - if (!cpu_top->core_info[cpu].is_online) - continue; if(sysfs_topology_read_file( cpu, "physical_package_id", - &(cpu_top->core_info[cpu].pkg)) < 0) - return -1; + &(cpu_top->core_info[cpu].pkg)) < 0) { + cpu_top->core_info[cpu].pkg = -1; + cpu_top->core_info[cpu].core = -1; + continue; + } if(sysfs_topology_read_file( cpu, "core_id", - &(cpu_top->core_info[cpu].core)) < 0) - return -1; + &(cpu_top->core_info[cpu].core)) < 0) { + cpu_top->core_info[cpu].pkg = -1; + cpu_top->core_info[cpu].core = -1; + continue; + } } qsort(cpu_top->core_info, cpus, sizeof(struct cpuid_core_info), @@ -95,12 +99,15 @@ int get_cpu_topology(struct cpupower_topology *cpu_top) done by pkg value. */ last_pkg = cpu_top->core_info[0].pkg; for(cpu = 1; cpu < cpus; cpu++) { - if(cpu_top->core_info[cpu].pkg != last_pkg) { + if (cpu_top->core_info[cpu].pkg != last_pkg && + cpu_top->core_info[cpu].pkg != -1) { + last_pkg = cpu_top->core_info[cpu].pkg; cpu_top->pkgs++; } } - cpu_top->pkgs++; + if (!cpu_top->core_info[0].pkg == -1) + cpu_top->pkgs++; /* Intel's cores count is not consecutively numbered, there may * be a core_id of 3, but none of 2. Assume there always is 0 diff --git a/tools/power/cpupower/utils/idle_monitor/cpupower-monitor.c b/tools/power/cpupower/utils/idle_monitor/cpupower-monitor.c index c4bae9203a69..05f953f0f0a0 100644 --- a/tools/power/cpupower/utils/idle_monitor/cpupower-monitor.c +++ b/tools/power/cpupower/utils/idle_monitor/cpupower-monitor.c @@ -143,6 +143,9 @@ void print_results(int topology_depth, int cpu) /* Be careful CPUs may got resorted for pkg value do not just use cpu */ if (!bitmask_isbitset(cpus_chosen, cpu_top.core_info[cpu].cpu)) return; + if (!cpu_top.core_info[cpu].is_online && + cpu_top.core_info[cpu].pkg == -1) + return; if (topology_depth > 2) printf("%4d|", cpu_top.core_info[cpu].pkg); @@ -191,7 +194,8 @@ void print_results(int topology_depth, int cpu) * It's up to the monitor plug-in to check .is_online, this one * is just for additional info. */ - if (!cpu_top.core_info[cpu].is_online) { + if (!cpu_top.core_info[cpu].is_online && + cpu_top.core_info[cpu].pkg != -1) { printf(_(" *is offline\n")); return; } else @@ -388,6 +392,9 @@ int cmd_monitor(int argc, char **argv) return EXIT_FAILURE; } + if (!cpu_top.core_info[0].is_online) + printf("WARNING: at least one cpu is offline\n"); + /* Default is: monitor all CPUs */ if (bitmask_isallclear(cpus_chosen)) bitmask_setall(cpus_chosen); diff --git a/tools/power/x86/turbostat/turbostat.c b/tools/power/x86/turbostat/turbostat.c index bde0ef1a63df..d8e4b20b6d54 100644 --- a/tools/power/x86/turbostat/turbostat.c +++ b/tools/power/x86/turbostat/turbostat.c @@ -75,6 +75,7 @@ unsigned int aperf_mperf_multiplier = 1; int do_smi; double bclk; double base_hz; +unsigned int has_base_hz; double tsc_tweak = 1.0; unsigned int show_pkg; unsigned int show_core; @@ -96,6 +97,7 @@ unsigned int do_ring_perf_limit_reasons; unsigned int crystal_hz; unsigned long long tsc_hz; int base_cpu; +double discover_bclk(unsigned int family, unsigned int model); #define RAPL_PKG (1 << 0) /* 0x610 MSR_PKG_POWER_LIMIT */ @@ -511,9 +513,13 @@ int format_counters(struct thread_data *t, struct core_data *c, } /* Bzy_MHz */ - if (has_aperf) - outp += sprintf(outp, "%8.0f", - 1.0 * t->tsc * tsc_tweak / units * t->aperf / t->mperf / interval_float); + if (has_aperf) { + if (has_base_hz) + outp += sprintf(outp, "%8.0f", base_hz / units * t->aperf / t->mperf); + else + outp += sprintf(outp, "%8.0f", + 1.0 * t->tsc / units * t->aperf / t->mperf / interval_float); + } /* TSC_MHz */ outp += sprintf(outp, "%8.0f", 1.0 * t->tsc/units/interval_float); @@ -1158,12 +1164,6 @@ int phi_pkg_cstate_limits[16] = {PCL__0, PCL__2, PCL_6N, PCL_6R, PCLRSV, PCLRSV, static void calculate_tsc_tweak() { - unsigned long long msr; - unsigned int base_ratio; - - get_msr(base_cpu, MSR_NHM_PLATFORM_INFO, &msr); - base_ratio = (msr >> 8) & 0xFF; - base_hz = base_ratio * bclk * 1000000; tsc_tweak = base_hz / tsc_hz; } @@ -1440,7 +1440,7 @@ dump_config_tdp(void) get_msr(base_cpu, MSR_TURBO_ACTIVATION_RATIO, &msr); fprintf(stderr, "cpu%d: MSR_TURBO_ACTIVATION_RATIO: 0x%08llx (", base_cpu, msr); - fprintf(stderr, "MAX_NON_TURBO_RATIO=%d", (unsigned int)(msr) & 0xEF); + fprintf(stderr, "MAX_NON_TURBO_RATIO=%d", (unsigned int)(msr) & 0x7F); fprintf(stderr, " lock=%d", (unsigned int)(msr >> 31) & 1); fprintf(stderr, ")\n"); } @@ -1821,6 +1821,7 @@ void check_permissions() int probe_nhm_msrs(unsigned int family, unsigned int model) { unsigned long long msr; + unsigned int base_ratio; int *pkg_cstate_limits; if (!genuine_intel) @@ -1829,6 +1830,8 @@ int probe_nhm_msrs(unsigned int family, unsigned int model) if (family != 6) return 0; + bclk = discover_bclk(family, model); + switch (model) { case 0x1A: /* Core i7, Xeon 5500 series - Bloomfield, Gainstown NHM-EP */ case 0x1E: /* Core i7 and i5 Processor - Clarksfield, Lynnfield, Jasper Forest */ @@ -1871,9 +1874,13 @@ int probe_nhm_msrs(unsigned int family, unsigned int model) return 0; } get_msr(base_cpu, MSR_NHM_SNB_PKG_CST_CFG_CTL, &msr); - pkg_cstate_limit = pkg_cstate_limits[msr & 0xF]; + get_msr(base_cpu, MSR_NHM_PLATFORM_INFO, &msr); + base_ratio = (msr >> 8) & 0xFF; + + base_hz = base_ratio * bclk * 1000000; + has_base_hz = 1; return 1; } int has_nhm_turbo_ratio_limit(unsigned int family, unsigned int model) @@ -2780,7 +2787,6 @@ void process_cpuid() do_skl_residency = has_skl_msrs(family, model); do_slm_cstates = is_slm(family, model); do_knl_cstates = is_knl(family, model); - bclk = discover_bclk(family, model); rapl_probe(family, model); perf_limit_reasons_probe(family, model); diff --git a/tools/thermal/tmon/Makefile b/tools/thermal/tmon/Makefile index 2e83dd3655a2..3a961e998281 100644 --- a/tools/thermal/tmon/Makefile +++ b/tools/thermal/tmon/Makefile @@ -22,6 +22,9 @@ TMON_LIBS += $(shell pkg-config --libs $(STATIC) panelw ncursesw 2> /dev/null || pkg-config --libs $(STATIC) panel ncurses 2> /dev/null || \ echo -lpanel -lncurses) +CFLAGS += $(shell pkg-config --cflags $(STATIC) panelw ncursesw 2> /dev/null || \ + pkg-config --cflags $(STATIC) panel ncurses 2> /dev/null) + OBJS = tmon.o tui.o sysfs.o pid.o OBJS += |