diff options
127 files changed, 6726 insertions, 1225 deletions
diff --git a/Documentation/devicetree/bindings/devfreq/rk3399_dmc.txt b/Documentation/devicetree/bindings/devfreq/rk3399_dmc.txt deleted file mode 100644 index 58fc8a6cebc7..000000000000 --- a/Documentation/devicetree/bindings/devfreq/rk3399_dmc.txt +++ /dev/null @@ -1,212 +0,0 @@ -* Rockchip rk3399 DMC (Dynamic Memory Controller) device - -Required properties: -- compatible: Must be "rockchip,rk3399-dmc". -- devfreq-events: Node to get DDR loading, Refer to - Documentation/devicetree/bindings/devfreq/event/ - rockchip-dfi.txt -- clocks: Phandles for clock specified in "clock-names" property -- clock-names : The name of clock used by the DFI, must be - "pclk_ddr_mon"; -- operating-points-v2: Refer to Documentation/devicetree/bindings/opp/opp-v2.yaml - for details. -- center-supply: DMC supply node. -- status: Marks the node enabled/disabled. -- rockchip,pmu: Phandle to the syscon managing the "PMU general register - files". - -Optional properties: -- interrupts: The CPU interrupt number. The interrupt specifier - format depends on the interrupt controller. - It should be a DCF interrupt. When DDR DVFS finishes - a DCF interrupt is triggered. -- rockchip,pmu: Phandle to the syscon managing the "PMU general register - files". - -Following properties relate to DDR timing: - -- rockchip,dram_speed_bin : Value reference include/dt-bindings/clock/rk3399-ddr.h, - it selects the DDR3 cl-trp-trcd type. It must be - set according to "Speed Bin" in DDR3 datasheet, - DO NOT use a smaller "Speed Bin" than specified - for the DDR3 being used. - -- rockchip,pd_idle : Configure the PD_IDLE value. Defines the - power-down idle period in which memories are - placed into power-down mode if bus is idle - for PD_IDLE DFI clock cycles. - -- rockchip,sr_idle : Configure the SR_IDLE value. Defines the - self-refresh idle period in which memories are - placed into self-refresh mode if bus is idle - for SR_IDLE * 1024 DFI clock cycles (DFI - clocks freq is half of DRAM clock), default - value is "0". - -- rockchip,sr_mc_gate_idle : Defines the memory self-refresh and controller - clock gating idle period. Memories are placed - into self-refresh mode and memory controller - clock arg gating started if bus is idle for - sr_mc_gate_idle*1024 DFI clock cycles. - -- rockchip,srpd_lite_idle : Defines the self-refresh power down idle - period in which memories are placed into - self-refresh power down mode if bus is idle - for srpd_lite_idle * 1024 DFI clock cycles. - This parameter is for LPDDR4 only. - -- rockchip,standby_idle : Defines the standby idle period in which - memories are placed into self-refresh mode. - The controller, pi, PHY and DRAM clock will - be gated if bus is idle for standby_idle * DFI - clock cycles. - -- rockchip,dram_dll_dis_freq : Defines the DDR3 DLL bypass frequency in MHz. - When DDR frequency is less than DRAM_DLL_DISB_FREQ, - DDR3 DLL will be bypassed. Note: if DLL was bypassed, - the odt will also stop working. - -- rockchip,phy_dll_dis_freq : Defines the PHY dll bypass frequency in - MHz (Mega Hz). When DDR frequency is less than - DRAM_DLL_DISB_FREQ, PHY DLL will be bypassed. - Note: PHY DLL and PHY ODT are independent. - -- rockchip,ddr3_odt_dis_freq : When the DRAM type is DDR3, this parameter defines - the ODT disable frequency in MHz (Mega Hz). - when the DDR frequency is less then ddr3_odt_dis_freq, - the ODT on the DRAM side and controller side are - both disabled. - -- rockchip,ddr3_drv : When the DRAM type is DDR3, this parameter defines - the DRAM side driver strength in ohms. Default - value is 40. - -- rockchip,ddr3_odt : When the DRAM type is DDR3, this parameter defines - the DRAM side ODT strength in ohms. Default value - is 120. - -- rockchip,phy_ddr3_ca_drv : When the DRAM type is DDR3, this parameter defines - the phy side CA line (incluing command line, - address line and clock line) driver strength. - Default value is 40. - -- rockchip,phy_ddr3_dq_drv : When the DRAM type is DDR3, this parameter defines - the PHY side DQ line (including DQS/DQ/DM line) - driver strength. Default value is 40. - -- rockchip,phy_ddr3_odt : When the DRAM type is DDR3, this parameter defines - the PHY side ODT strength. Default value is 240. - -- rockchip,lpddr3_odt_dis_freq : When the DRAM type is LPDDR3, this parameter defines - then ODT disable frequency in MHz (Mega Hz). - When DDR frequency is less then ddr3_odt_dis_freq, - the ODT on the DRAM side and controller side are - both disabled. - -- rockchip,lpddr3_drv : When the DRAM type is LPDDR3, this parameter defines - the DRAM side driver strength in ohms. Default - value is 34. - -- rockchip,lpddr3_odt : When the DRAM type is LPDDR3, this parameter defines - the DRAM side ODT strength in ohms. Default value - is 240. - -- rockchip,phy_lpddr3_ca_drv : When the DRAM type is LPDDR3, this parameter defines - the PHY side CA line (including command line, - address line and clock line) driver strength. - Default value is 40. - -- rockchip,phy_lpddr3_dq_drv : When the DRAM type is LPDDR3, this parameter defines - the PHY side DQ line (including DQS/DQ/DM line) - driver strength. Default value is 40. - -- rockchip,phy_lpddr3_odt : When dram type is LPDDR3, this parameter define - the phy side odt strength, default value is 240. - -- rockchip,lpddr4_odt_dis_freq : When the DRAM type is LPDDR4, this parameter - defines the ODT disable frequency in - MHz (Mega Hz). When the DDR frequency is less then - ddr3_odt_dis_freq, the ODT on the DRAM side and - controller side are both disabled. - -- rockchip,lpddr4_drv : When the DRAM type is LPDDR4, this parameter defines - the DRAM side driver strength in ohms. Default - value is 60. - -- rockchip,lpddr4_dq_odt : When the DRAM type is LPDDR4, this parameter defines - the DRAM side ODT on DQS/DQ line strength in ohms. - Default value is 40. - -- rockchip,lpddr4_ca_odt : When the DRAM type is LPDDR4, this parameter defines - the DRAM side ODT on CA line strength in ohms. - Default value is 40. - -- rockchip,phy_lpddr4_ca_drv : When the DRAM type is LPDDR4, this parameter defines - the PHY side CA line (including command address - line) driver strength. Default value is 40. - -- rockchip,phy_lpddr4_ck_cs_drv : When the DRAM type is LPDDR4, this parameter defines - the PHY side clock line and CS line driver - strength. Default value is 80. - -- rockchip,phy_lpddr4_dq_drv : When the DRAM type is LPDDR4, this parameter defines - the PHY side DQ line (including DQS/DQ/DM line) - driver strength. Default value is 80. - -- rockchip,phy_lpddr4_odt : When the DRAM type is LPDDR4, this parameter defines - the PHY side ODT strength. Default value is 60. - -Example: - dmc_opp_table: dmc_opp_table { - compatible = "operating-points-v2"; - - opp00 { - opp-hz = /bits/ 64 <300000000>; - opp-microvolt = <900000>; - }; - opp01 { - opp-hz = /bits/ 64 <666000000>; - opp-microvolt = <900000>; - }; - }; - - dmc: dmc { - compatible = "rockchip,rk3399-dmc"; - devfreq-events = <&dfi>; - interrupts = <GIC_SPI 1 IRQ_TYPE_LEVEL_HIGH>; - clocks = <&cru SCLK_DDRC>; - clock-names = "dmc_clk"; - operating-points-v2 = <&dmc_opp_table>; - center-supply = <&ppvar_centerlogic>; - upthreshold = <15>; - downdifferential = <10>; - rockchip,ddr3_speed_bin = <21>; - rockchip,pd_idle = <0x40>; - rockchip,sr_idle = <0x2>; - rockchip,sr_mc_gate_idle = <0x3>; - rockchip,srpd_lite_idle = <0x4>; - rockchip,standby_idle = <0x2000>; - rockchip,dram_dll_dis_freq = <300>; - rockchip,phy_dll_dis_freq = <125>; - rockchip,auto_pd_dis_freq = <666>; - rockchip,ddr3_odt_dis_freq = <333>; - rockchip,ddr3_drv = <40>; - rockchip,ddr3_odt = <120>; - rockchip,phy_ddr3_ca_drv = <40>; - rockchip,phy_ddr3_dq_drv = <40>; - rockchip,phy_ddr3_odt = <240>; - rockchip,lpddr3_odt_dis_freq = <333>; - rockchip,lpddr3_drv = <34>; - rockchip,lpddr3_odt = <240>; - rockchip,phy_lpddr3_ca_drv = <40>; - rockchip,phy_lpddr3_dq_drv = <40>; - rockchip,phy_lpddr3_odt = <240>; - rockchip,lpddr4_odt_dis_freq = <333>; - rockchip,lpddr4_drv = <60>; - rockchip,lpddr4_dq_odt = <40>; - rockchip,lpddr4_ca_odt = <40>; - rockchip,phy_lpddr4_ca_drv = <40>; - rockchip,phy_lpddr4_ck_cs_drv = <80>; - rockchip,phy_lpddr4_dq_drv = <80>; - rockchip,phy_lpddr4_odt = <60>; - }; diff --git a/Documentation/devicetree/bindings/memory-controllers/rockchip,rk3399-dmc.yaml b/Documentation/devicetree/bindings/memory-controllers/rockchip,rk3399-dmc.yaml new file mode 100644 index 000000000000..fb4920397d08 --- /dev/null +++ b/Documentation/devicetree/bindings/memory-controllers/rockchip,rk3399-dmc.yaml @@ -0,0 +1,384 @@ +# SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause) +# %YAML 1.2 +--- +$id: http://devicetree.org/schemas/memory-controllers/rockchip,rk3399-dmc.yaml# +$schema: http://devicetree.org/meta-schemas/core.yaml# + +title: Rockchip rk3399 DMC (Dynamic Memory Controller) device + +maintainers: + - Brian Norris <briannorris@chromium.org> + +properties: + compatible: + enum: + - rockchip,rk3399-dmc + + devfreq-events: + $ref: /schemas/types.yaml#/definitions/phandle + description: + Node to get DDR loading. Refer to + Documentation/devicetree/bindings/devfreq/event/rockchip-dfi.txt. + + clocks: + maxItems: 1 + + clock-names: + items: + - const: dmc_clk + + operating-points-v2: true + + center-supply: + description: + DMC regulator supply. + + rockchip,pmu: + $ref: /schemas/types.yaml#/definitions/phandle + description: + Phandle to the syscon managing the "PMU general register files". + + interrupts: + maxItems: 1 + description: + The CPU interrupt number. It should be a DCF interrupt. When DDR DVFS + finishes, a DCF interrupt is triggered. + + rockchip,ddr3_speed_bin: + deprecated: true + $ref: /schemas/types.yaml#/definitions/uint32 + description: + For values, reference include/dt-bindings/clock/rk3399-ddr.h. Selects the + DDR3 cl-trp-trcd type. It must be set according to "Speed Bin" in DDR3 + datasheet; DO NOT use a smaller "Speed Bin" than specified for the DDR3 + being used. + + rockchip,pd_idle: + deprecated: true + $ref: /schemas/types.yaml#/definitions/uint32 + description: + Configure the PD_IDLE value. Defines the power-down idle period in which + memories are placed into power-down mode if bus is idle for PD_IDLE DFI + clock cycles. + See also rockchip,pd-idle-ns. + + rockchip,sr_idle: + deprecated: true + $ref: /schemas/types.yaml#/definitions/uint32 + description: + Configure the SR_IDLE value. Defines the self-refresh idle period in + which memories are placed into self-refresh mode if bus is idle for + SR_IDLE * 1024 DFI clock cycles (DFI clocks freq is half of DRAM clock). + See also rockchip,sr-idle-ns. + default: 0 + + rockchip,sr_mc_gate_idle: + deprecated: true + $ref: /schemas/types.yaml#/definitions/uint32 + description: + Defines the memory self-refresh and controller clock gating idle period. + Memories are placed into self-refresh mode and memory controller clock + arg gating started if bus is idle for sr_mc_gate_idle*1024 DFI clock + cycles. + See also rockchip,sr-mc-gate-idle-ns. + + rockchip,srpd_lite_idle: + deprecated: true + $ref: /schemas/types.yaml#/definitions/uint32 + description: + Defines the self-refresh power down idle period in which memories are + placed into self-refresh power down mode if bus is idle for + srpd_lite_idle * 1024 DFI clock cycles. This parameter is for LPDDR4 + only. + See also rockchip,srpd-lite-idle-ns. + + rockchip,standby_idle: + deprecated: true + $ref: /schemas/types.yaml#/definitions/uint32 + description: + Defines the standby idle period in which memories are placed into + self-refresh mode. The controller, pi, PHY and DRAM clock will be gated + if bus is idle for standby_idle * DFI clock cycles. + See also rockchip,standby-idle-ns. + + rockchip,dram_dll_dis_freq: + deprecated: true + $ref: /schemas/types.yaml#/definitions/uint32 + description: | + Defines the DDR3 DLL bypass frequency in MHz. When DDR frequency is less + than DRAM_DLL_DISB_FREQ, DDR3 DLL will be bypassed. + Note: if DLL was bypassed, the odt will also stop working. + + rockchip,phy_dll_dis_freq: + deprecated: true + $ref: /schemas/types.yaml#/definitions/uint32 + description: | + Defines the PHY dll bypass frequency in MHz (Mega Hz). When DDR frequency + is less than DRAM_DLL_DISB_FREQ, PHY DLL will be bypassed. + Note: PHY DLL and PHY ODT are independent. + + rockchip,auto_pd_dis_freq: + deprecated: true + $ref: /schemas/types.yaml#/definitions/uint32 + description: + Defines the auto PD disable frequency in MHz. + + rockchip,ddr3_odt_dis_freq: + $ref: /schemas/types.yaml#/definitions/uint32 + minimum: 1000000 # In case anyone thought this was MHz. + description: + When the DRAM type is DDR3, this parameter defines the ODT disable + frequency in Hz. When the DDR frequency is less then ddr3_odt_dis_freq, + the ODT on the DRAM side and controller side are both disabled. + + rockchip,ddr3_drv: + deprecated: true + $ref: /schemas/types.yaml#/definitions/uint32 + description: + When the DRAM type is DDR3, this parameter defines the DRAM side drive + strength in ohms. + default: 40 + + rockchip,ddr3_odt: + deprecated: true + $ref: /schemas/types.yaml#/definitions/uint32 + description: + When the DRAM type is DDR3, this parameter defines the DRAM side ODT + strength in ohms. + default: 120 + + rockchip,phy_ddr3_ca_drv: + deprecated: true + $ref: /schemas/types.yaml#/definitions/uint32 + description: + When the DRAM type is DDR3, this parameter defines the phy side CA line + (incluing command line, address line and clock line) drive strength. + default: 40 + + rockchip,phy_ddr3_dq_drv: + deprecated: true + $ref: /schemas/types.yaml#/definitions/uint32 + description: + When the DRAM type is DDR3, this parameter defines the PHY side DQ line + (including DQS/DQ/DM line) drive strength. + default: 40 + + rockchip,phy_ddr3_odt: + deprecated: true + $ref: /schemas/types.yaml#/definitions/uint32 + description: + When the DRAM type is DDR3, this parameter defines the PHY side ODT + strength. + default: 240 + + rockchip,lpddr3_odt_dis_freq: + $ref: /schemas/types.yaml#/definitions/uint32 + minimum: 1000000 # In case anyone thought this was MHz. + description: + When the DRAM type is LPDDR3, this parameter defines then ODT disable + frequency in Hz. When DDR frequency is less then ddr3_odt_dis_freq, the + ODT on the DRAM side and controller side are both disabled. + + rockchip,lpddr3_drv: + deprecated: true + $ref: /schemas/types.yaml#/definitions/uint32 + description: + When the DRAM type is LPDDR3, this parameter defines the DRAM side drive + strength in ohms. + default: 34 + + rockchip,lpddr3_odt: + deprecated: true + $ref: /schemas/types.yaml#/definitions/uint32 + description: + When the DRAM type is LPDDR3, this parameter defines the DRAM side ODT + strength in ohms. + default: 240 + + rockchip,phy_lpddr3_ca_drv: + deprecated: true + $ref: /schemas/types.yaml#/definitions/uint32 + description: + When the DRAM type is LPDDR3, this parameter defines the PHY side CA line + (including command line, address line and clock line) drive strength. + default: 40 + + rockchip,phy_lpddr3_dq_drv: + deprecated: true + $ref: /schemas/types.yaml#/definitions/uint32 + description: + When the DRAM type is LPDDR3, this parameter defines the PHY side DQ line + (including DQS/DQ/DM line) drive strength. + default: 40 + + rockchip,phy_lpddr3_odt: + deprecated: true + $ref: /schemas/types.yaml#/definitions/uint32 + description: + When dram type is LPDDR3, this parameter define the phy side odt + strength, default value is 240. + + rockchip,lpddr4_odt_dis_freq: + $ref: /schemas/types.yaml#/definitions/uint32 + minimum: 1000000 # In case anyone thought this was MHz. + description: + When the DRAM type is LPDDR4, this parameter defines the ODT disable + frequency in Hz. When the DDR frequency is less then ddr3_odt_dis_freq, + the ODT on the DRAM side and controller side are both disabled. + + rockchip,lpddr4_drv: + deprecated: true + $ref: /schemas/types.yaml#/definitions/uint32 + description: + When the DRAM type is LPDDR4, this parameter defines the DRAM side drive + strength in ohms. + default: 60 + + rockchip,lpddr4_dq_odt: + deprecated: true + $ref: /schemas/types.yaml#/definitions/uint32 + description: + When the DRAM type is LPDDR4, this parameter defines the DRAM side ODT on + DQS/DQ line strength in ohms. + default: 40 + + rockchip,lpddr4_ca_odt: + deprecated: true + $ref: /schemas/types.yaml#/definitions/uint32 + description: + When the DRAM type is LPDDR4, this parameter defines the DRAM side ODT on + CA line strength in ohms. + default: 40 + + rockchip,phy_lpddr4_ca_drv: + deprecated: true + $ref: /schemas/types.yaml#/definitions/uint32 + description: + When the DRAM type is LPDDR4, this parameter defines the PHY side CA line + (including command address line) drive strength. + default: 40 + + rockchip,phy_lpddr4_ck_cs_drv: + deprecated: true + $ref: /schemas/types.yaml#/definitions/uint32 + description: + When the DRAM type is LPDDR4, this parameter defines the PHY side clock + line and CS line drive strength. + default: 80 + + rockchip,phy_lpddr4_dq_drv: + deprecated: true + $ref: /schemas/types.yaml#/definitions/uint32 + description: + When the DRAM type is LPDDR4, this parameter defines the PHY side DQ line + (including DQS/DQ/DM line) drive strength. + default: 80 + + rockchip,phy_lpddr4_odt: + deprecated: true + $ref: /schemas/types.yaml#/definitions/uint32 + description: + When the DRAM type is LPDDR4, this parameter defines the PHY side ODT + strength. + default: 60 + + rockchip,pd-idle-ns: + description: + Configure the PD_IDLE value in nanoseconds. Defines the power-down idle + period in which memories are placed into power-down mode if bus is idle + for PD_IDLE nanoseconds. + + rockchip,sr-idle-ns: + description: + Configure the SR_IDLE value in nanoseconds. Defines the self-refresh idle + period in which memories are placed into self-refresh mode if bus is idle + for SR_IDLE nanoseconds. + default: 0 + + rockchip,sr-mc-gate-idle-ns: + description: + Defines the memory self-refresh and controller clock gating idle period in nanoseconds. + Memories are placed into self-refresh mode and memory controller clock + arg gating started if bus is idle for sr_mc_gate_idle nanoseconds. + + rockchip,srpd-lite-idle-ns: + description: + Defines the self-refresh power down idle period in which memories are + placed into self-refresh power down mode if bus is idle for + srpd_lite_idle nanoseonds. This parameter is for LPDDR4 only. + + rockchip,standby-idle-ns: + description: + Defines the standby idle period in which memories are placed into + self-refresh mode. The controller, pi, PHY and DRAM clock will be gated + if bus is idle for standby_idle nanoseconds. + + rockchip,pd-idle-dis-freq-hz: + description: + Defines the power-down idle disable frequency in Hz. When the DDR + frequency is greater than pd-idle-dis-freq, power-down idle is disabled. + See also rockchip,pd-idle-ns. + + rockchip,sr-idle-dis-freq-hz: + description: + Defines the self-refresh idle disable frequency in Hz. When the DDR + frequency is greater than sr-idle-dis-freq, self-refresh idle is + disabled. See also rockchip,sr-idle-ns. + + rockchip,sr-mc-gate-idle-dis-freq-hz: + description: + Defines the self-refresh and memory-controller clock gating disable + frequency in Hz. When the DDR frequency is greater than + sr-mc-gate-idle-dis-freq, the clock will not be gated when idle. See also + rockchip,sr-mc-gate-idle-ns. + + rockchip,srpd-lite-idle-dis-freq-hz: + description: + Defines the self-refresh power down idle disable frequency in Hz. When + the DDR frequency is greater than srpd-lite-idle-dis-freq, memory will + not be placed into self-refresh power down mode when idle. See also + rockchip,srpd-lite-idle-ns. + + rockchip,standby-idle-dis-freq-hz: + description: + Defines the standby idle disable frequency in Hz. When the DDR frequency + is greater than standby-idle-dis-freq, standby idle is disabled. See also + rockchip,standby-idle-ns. + +required: + - compatible + - devfreq-events + - clocks + - clock-names + - operating-points-v2 + - center-supply + +additionalProperties: false + +examples: + - | + #include <dt-bindings/clock/rk3399-cru.h> + #include <dt-bindings/interrupt-controller/arm-gic.h> + memory-controller { + compatible = "rockchip,rk3399-dmc"; + devfreq-events = <&dfi>; + rockchip,pmu = <&pmu>; + interrupts = <GIC_SPI 1 IRQ_TYPE_LEVEL_HIGH>; + clocks = <&cru SCLK_DDRC>; + clock-names = "dmc_clk"; + operating-points-v2 = <&dmc_opp_table>; + center-supply = <&ppvar_centerlogic>; + rockchip,pd-idle-ns = <160>; + rockchip,sr-idle-ns = <10240>; + rockchip,sr-mc-gate-idle-ns = <40960>; + rockchip,srpd-lite-idle-ns = <61440>; + rockchip,standby-idle-ns = <81920>; + rockchip,ddr3_odt_dis_freq = <333000000>; + rockchip,lpddr3_odt_dis_freq = <333000000>; + rockchip,lpddr4_odt_dis_freq = <333000000>; + rockchip,pd-idle-dis-freq-hz = <1000000000>; + rockchip,sr-idle-dis-freq-hz = <1000000000>; + rockchip,sr-mc-gate-idle-dis-freq-hz = <1000000000>; + rockchip,srpd-lite-idle-dis-freq-hz = <0>; + rockchip,standby-idle-dis-freq-hz = <928000000>; + }; diff --git a/Documentation/devicetree/bindings/thermal/qcom-lmh.yaml b/Documentation/devicetree/bindings/thermal/qcom-lmh.yaml index a9b7388ca9ac..e1587ddf7de3 100644 --- a/Documentation/devicetree/bindings/thermal/qcom-lmh.yaml +++ b/Documentation/devicetree/bindings/thermal/qcom-lmh.yaml @@ -18,6 +18,7 @@ description: properties: compatible: enum: + - qcom,sc8180x-lmh - qcom,sdm845-lmh - qcom,sm8150-lmh diff --git a/Documentation/devicetree/bindings/thermal/qcom-spmi-adc-tm5.yaml b/Documentation/devicetree/bindings/thermal/qcom-spmi-adc-tm5.yaml index 3ea8c0c1f45f..feb390d50696 100644 --- a/Documentation/devicetree/bindings/thermal/qcom-spmi-adc-tm5.yaml +++ b/Documentation/devicetree/bindings/thermal/qcom-spmi-adc-tm5.yaml @@ -10,7 +10,9 @@ maintainers: properties: compatible: - const: qcom,spmi-adc-tm5 + enum: + - qcom,spmi-adc-tm5 + - qcom,spmi-adc-tm5-gen2 reg: maxItems: 1 @@ -33,6 +35,7 @@ properties: qcom,avg-samples: $ref: /schemas/types.yaml#/definitions/uint32 description: Number of samples to be used for measurement. + Not applicable for Gen2 ADC_TM peripheral. enum: - 1 - 2 @@ -45,6 +48,7 @@ properties: $ref: /schemas/types.yaml#/definitions/uint32 description: This parameter is used to decrease ADC sampling rate. Quicker measurements can be made by reducing decimation ratio. + Not applicable for Gen2 ADC_TM peripheral. enum: - 250 - 420 @@ -93,6 +97,29 @@ patternProperties: - const: 1 - enum: [ 1, 3, 4, 6, 20, 8, 10 ] + qcom,avg-samples: + $ref: /schemas/types.yaml#/definitions/uint32 + description: Number of samples to be used for measurement. + This property in child node is applicable only for Gen2 ADC_TM peripheral. + enum: + - 1 + - 2 + - 4 + - 8 + - 16 + default: 1 + + qcom,decimation: + $ref: /schemas/types.yaml#/definitions/uint32 + description: This parameter is used to decrease ADC sampling rate. + Quicker measurements can be made by reducing decimation ratio. + This property in child node is applicable only for Gen2 ADC_TM peripheral. + enum: + - 85 + - 340 + - 1360 + default: 1360 + required: - reg - io-channels @@ -100,6 +127,31 @@ patternProperties: additionalProperties: false +allOf: + - if: + properties: + compatible: + contains: + const: qcom,spmi-adc-tm5 + + then: + patternProperties: + "^([-a-z0-9]*)@[0-7]$": + properties: + qcom,decimation: false + qcom,avg-samples: false + + - if: + properties: + compatible: + contains: + const: qcom,spmi-adc-tm5-gen2 + + then: + properties: + qcom,avg-samples: false + qcom,decimation: false + required: - compatible - reg @@ -124,7 +176,7 @@ examples: #size-cells = <0>; #io-channel-cells = <1>; - /* Other propreties are omitted */ + /* Other properties are omitted */ conn-therm@4f { reg = <ADC5_AMUX_THM3_100K_PU>; qcom,ratiometric; @@ -148,4 +200,58 @@ examples: }; }; }; + + - | + #include <dt-bindings/iio/qcom,spmi-adc7-pmk8350.h> + #include <dt-bindings/iio/qcom,spmi-adc7-pm8350.h> + #include <dt-bindings/interrupt-controller/irq.h> + spmi_bus { + #address-cells = <1>; + #size-cells = <0>; + pmk8350_vadc: adc@3100 { + reg = <0x3100>; + compatible = "qcom,spmi-adc7"; + #address-cells = <1>; + #size-cells = <0>; + #io-channel-cells = <1>; + + /* Other properties are omitted */ + xo-therm@44 { + reg = <PMK8350_ADC7_AMUX_THM1_100K_PU>; + qcom,ratiometric; + qcom,hw-settle-time = <200>; + }; + + conn-therm@47 { + reg = <PM8350_ADC7_AMUX_THM4_100K_PU>; + qcom,ratiometric; + qcom,hw-settle-time = <200>; + }; + }; + + pmk8350_adc_tm: adc-tm@3400 { + compatible = "qcom,spmi-adc-tm5-gen2"; + reg = <0x3400>; + interrupts = <0x0 0x34 0x0 IRQ_TYPE_EDGE_RISING>; + #thermal-sensor-cells = <1>; + #address-cells = <1>; + #size-cells = <0>; + + pmk8350-xo-therm@0 { + reg = <0>; + io-channels = <&pmk8350_vadc PMK8350_ADC7_AMUX_THM1_100K_PU>; + qcom,decimation = <340>; + qcom,ratiometric; + qcom,hw-settle-time-us = <200>; + }; + + conn-therm@1 { + reg = <1>; + io-channels = <&pmk8350_vadc PM8350_ADC7_AMUX_THM4_100K_PU>; + qcom,avg-samples = <2>; + qcom,ratiometric; + qcom,hw-settle-time-us = <200>; + }; + }; + }; ... diff --git a/Documentation/devicetree/bindings/thermal/qcom-tsens.yaml b/Documentation/devicetree/bindings/thermal/qcom-tsens.yaml index b6406bcc683f..a24baf9b9f64 100644 --- a/Documentation/devicetree/bindings/thermal/qcom-tsens.yaml +++ b/Documentation/devicetree/bindings/thermal/qcom-tsens.yaml @@ -19,10 +19,11 @@ description: | properties: compatible: oneOf: - - description: msm9860 TSENS based + - description: msm8960 TSENS based items: - enum: - qcom,ipq8064-tsens + - qcom,msm8960-tsens - description: v0.1 of TSENS items: @@ -49,6 +50,7 @@ properties: - qcom,sc7180-tsens - qcom,sc7280-tsens - qcom,sc8180x-tsens + - qcom,sc8280xp-tsens - qcom,sdm630-tsens - qcom,sdm845-tsens - qcom,sm8150-tsens @@ -116,6 +118,7 @@ allOf: - qcom,ipq8064-tsens - qcom,mdm9607-tsens - qcom,msm8916-tsens + - qcom,msm8960-tsens - qcom,msm8974-tsens - qcom,msm8976-tsens - qcom,qcs404-tsens diff --git a/Documentation/devicetree/bindings/thermal/rzg2l-thermal.yaml b/Documentation/devicetree/bindings/thermal/rzg2l-thermal.yaml index ccab9511a042..1d8373397848 100644 --- a/Documentation/devicetree/bindings/thermal/rzg2l-thermal.yaml +++ b/Documentation/devicetree/bindings/thermal/rzg2l-thermal.yaml @@ -17,7 +17,9 @@ properties: compatible: items: - enum: + - renesas,r9a07g043-tsu # RZ/G2UL - renesas,r9a07g044-tsu # RZ/G2{L,LC} + - renesas,r9a07g054-tsu # RZ/V2L - const: renesas,rzg2l-tsu reg: diff --git a/Documentation/devicetree/bindings/thermal/ti,j72xx-thermal.yaml b/Documentation/devicetree/bindings/thermal/ti,j72xx-thermal.yaml new file mode 100644 index 000000000000..c74f124ebfc0 --- /dev/null +++ b/Documentation/devicetree/bindings/thermal/ti,j72xx-thermal.yaml @@ -0,0 +1,63 @@ +# SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause) +%YAML 1.2 +--- +$id: http://devicetree.org/schemas/thermal/ti,j72xx-thermal.yaml# +$schema: http://devicetree.org/meta-schemas/core.yaml# + +title: Texas Instruments J72XX VTM (DTS) binding + +maintainers: + - Keerthy <j-keerthy@ti.com> + +properties: + compatible: + enum: + - ti,j721e-vtm + - ti,j7200-vtm + + reg: + items: + - description: VTM cfg1 register space + - description: VTM cfg2 register space + - description: VTM efuse register space + + power-domains: + maxItems: 1 + + "#thermal-sensor-cells": + const: 1 + +required: + - compatible + - reg + - power-domains + - "#thermal-sensor-cells" + +additionalProperties: false + +examples: + - | + #include <dt-bindings/soc/ti,sci_pm_domain.h> + wkup_vtm0: thermal-sensor@42040000 { + compatible = "ti,j721e-vtm"; + reg = <0x42040000 0x350>, + <0x42050000 0x350>, + <0x43000300 0x10>; + power-domains = <&k3_pds 154 TI_SCI_PD_EXCLUSIVE>; + #thermal-sensor-cells = <1>; + }; + + mpu_thermal: mpu-thermal { + polling-delay-passive = <250>; /* milliseconds */ + polling-delay = <500>; /* milliseconds */ + thermal-sensors = <&wkup_vtm0 0>; + + trips { + mpu_crit: mpu-crit { + temperature = <125000>; /* milliCelsius */ + hysteresis = <2000>; /* milliCelsius */ + type = "critical"; + }; + }; + }; +... diff --git a/Documentation/power/energy-model.rst b/Documentation/power/energy-model.rst index 49549aab41b4..feb257b7f350 100644 --- a/Documentation/power/energy-model.rst +++ b/Documentation/power/energy-model.rst @@ -123,6 +123,26 @@ allows a platform to register EM power values which are reflecting total power (static + dynamic). These power values might be coming directly from experiments and measurements. +Registration of 'artificial' EM +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +There is an option to provide a custom callback for drivers missing detailed +knowledge about power value for each performance state. The callback +.get_cost() is optional and provides the 'cost' values used by the EAS. +This is useful for platforms that only provide information on relative +efficiency between CPU types, where one could use the information to +create an abstract power model. But even an abstract power model can +sometimes be hard to fit in, given the input power value size restrictions. +The .get_cost() allows to provide the 'cost' values which reflect the +efficiency of the CPUs. This would allow to provide EAS information which +has different relation than what would be forced by the EM internal +formulas calculating 'cost' values. To register an EM for such platform, the +driver must set the flag 'milliwatts' to 0, provide .get_power() callback +and provide .get_cost() callback. The EM framework would handle such platform +properly during registration. A flag EM_PERF_DOMAIN_ARTIFICIAL is set for such +platform. Special care should be taken by other frameworks which are using EM +to test and treat this flag properly. + Registration of 'simple' EM ~~~~~~~~~~~~~~~~~~~~~~~~~~~ @@ -181,8 +201,8 @@ EM framework:: -> drivers/cpufreq/foo_cpufreq.c - 01 static int est_power(unsigned long *mW, unsigned long *KHz, - 02 struct device *dev) + 01 static int est_power(struct device *dev, unsigned long *mW, + 02 unsigned long *KHz) 03 { 04 long freq, power; 05 diff --git a/MAINTAINERS b/MAINTAINERS index e7b0bfcf0f43..3ad3328b8a46 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -19589,6 +19589,7 @@ F: drivers/thermal/ F: include/linux/cpu_cooling.h F: include/linux/thermal.h F: include/uapi/linux/thermal.h +F: tools/lib/thermal/ F: tools/thermal/ THERMAL DRIVER FOR AMLOGIC SOCS diff --git a/arch/arm64/kernel/smp.c b/arch/arm64/kernel/smp.c index 3b46041f2b97..62ed361a4376 100644 --- a/arch/arm64/kernel/smp.c +++ b/arch/arm64/kernel/smp.c @@ -512,6 +512,7 @@ struct acpi_madt_generic_interrupt *acpi_cpu_get_madt_gicc(int cpu) { return &cpu_madt_gicc[cpu]; } +EXPORT_SYMBOL_GPL(acpi_cpu_get_madt_gicc); /* * acpi_map_gic_cpu_interface - parse processor MADT entry diff --git a/arch/x86/include/asm/msr-index.h b/arch/x86/include/asm/msr-index.h index 14154b8b1720..403e83b4adc8 100644 --- a/arch/x86/include/asm/msr-index.h +++ b/arch/x86/include/asm/msr-index.h @@ -319,6 +319,7 @@ /* Run Time Average Power Limiting (RAPL) Interface */ +#define MSR_VR_CURRENT_CONFIG 0x00000601 #define MSR_RAPL_POWER_UNIT 0x00000606 #define MSR_PKG_POWER_LIMIT 0x00000610 diff --git a/arch/x86/kernel/acpi/boot.c b/arch/x86/kernel/acpi/boot.c index 058e7b924189..907cc98b1938 100644 --- a/arch/x86/kernel/acpi/boot.c +++ b/arch/x86/kernel/acpi/boot.c @@ -1862,7 +1862,7 @@ int __acpi_release_global_lock(unsigned int *lock) void __init arch_reserve_mem_area(acpi_physical_address addr, size_t size) { - e820__range_add(addr, size, E820_TYPE_ACPI); + e820__range_add(addr, size, E820_TYPE_NVS); e820__update_table_print(); } diff --git a/drivers/acpi/bus.c b/drivers/acpi/bus.c index fe0000eb7cae..b67d2ee77cd1 100644 --- a/drivers/acpi/bus.c +++ b/drivers/acpi/bus.c @@ -279,6 +279,20 @@ bool osc_pc_lpi_support_confirmed; EXPORT_SYMBOL_GPL(osc_pc_lpi_support_confirmed); /* + * ACPI 6.2 Section 6.2.11.2 'Platform-Wide OSPM Capabilities': + * Starting with ACPI Specification 6.2, all _CPC registers can be in + * PCC, System Memory, System IO, or Functional Fixed Hardware address + * spaces. OSPM support for this more flexible register space scheme is + * indicated by the “Flexible Address Space for CPPC Registers” _OSC bit. + * + * Otherwise (cf ACPI 6.1, s8.4.7.1.1.X), _CPC registers must be in: + * - PCC or Functional Fixed Hardware address space if defined + * - SystemMemory address space (NULL register) if not defined + */ +bool osc_cpc_flexible_adr_space_confirmed; +EXPORT_SYMBOL_GPL(osc_cpc_flexible_adr_space_confirmed); + +/* * ACPI 6.4 Operating System Capabilities for USB. */ bool osc_sb_native_usb4_support_confirmed; @@ -315,12 +329,15 @@ static void acpi_bus_osc_negotiate_platform_control(void) #endif #ifdef CONFIG_X86 capbuf[OSC_SUPPORT_DWORD] |= OSC_SB_GENERIC_INITIATOR_SUPPORT; - if (boot_cpu_has(X86_FEATURE_HWP)) { - capbuf[OSC_SUPPORT_DWORD] |= OSC_SB_CPC_SUPPORT; - capbuf[OSC_SUPPORT_DWORD] |= OSC_SB_CPCV2_SUPPORT; - } #endif +#ifdef CONFIG_ACPI_CPPC_LIB + capbuf[OSC_SUPPORT_DWORD] |= OSC_SB_CPC_SUPPORT; + capbuf[OSC_SUPPORT_DWORD] |= OSC_SB_CPCV2_SUPPORT; +#endif + + capbuf[OSC_SUPPORT_DWORD] |= OSC_SB_CPC_FLEXIBLE_ADR_SPACE; + if (IS_ENABLED(CONFIG_SCHED_MC_PRIO)) capbuf[OSC_SUPPORT_DWORD] |= OSC_SB_CPC_DIVERSE_HIGH_SUPPORT; @@ -341,10 +358,9 @@ static void acpi_bus_osc_negotiate_platform_control(void) return; } -#ifdef CONFIG_X86 - if (boot_cpu_has(X86_FEATURE_HWP)) - osc_sb_cppc_not_supported = !(capbuf_ret[OSC_SUPPORT_DWORD] & - (OSC_SB_CPC_SUPPORT | OSC_SB_CPCV2_SUPPORT)); +#ifdef CONFIG_ACPI_CPPC_LIB + osc_sb_cppc_not_supported = !(capbuf_ret[OSC_SUPPORT_DWORD] & + (OSC_SB_CPC_SUPPORT | OSC_SB_CPCV2_SUPPORT)); #endif /* @@ -366,6 +382,8 @@ static void acpi_bus_osc_negotiate_platform_control(void) capbuf_ret[OSC_SUPPORT_DWORD] & OSC_SB_PCLPI_SUPPORT; osc_sb_native_usb4_support_confirmed = capbuf_ret[OSC_SUPPORT_DWORD] & OSC_SB_NATIVE_USB4_SUPPORT; + osc_cpc_flexible_adr_space_confirmed = + capbuf_ret[OSC_SUPPORT_DWORD] & OSC_SB_CPC_FLEXIBLE_ADR_SPACE; } kfree(context.ret.pointer); diff --git a/drivers/acpi/cppc_acpi.c b/drivers/acpi/cppc_acpi.c index e6c1df0b3cc4..903528f7e187 100644 --- a/drivers/acpi/cppc_acpi.c +++ b/drivers/acpi/cppc_acpi.c @@ -100,6 +100,16 @@ static DEFINE_PER_CPU(struct cpc_desc *, cpc_desc_ptr); (cpc)->cpc_entry.reg.space_id == \ ACPI_ADR_SPACE_PLATFORM_COMM) +/* Check if a CPC register is in SystemMemory */ +#define CPC_IN_SYSTEM_MEMORY(cpc) ((cpc)->type == ACPI_TYPE_BUFFER && \ + (cpc)->cpc_entry.reg.space_id == \ + ACPI_ADR_SPACE_SYSTEM_MEMORY) + +/* Check if a CPC register is in SystemIo */ +#define CPC_IN_SYSTEM_IO(cpc) ((cpc)->type == ACPI_TYPE_BUFFER && \ + (cpc)->cpc_entry.reg.space_id == \ + ACPI_ADR_SPACE_SYSTEM_IO) + /* Evaluates to True if reg is a NULL register descriptor */ #define IS_NULL_REG(reg) ((reg)->space_id == ACPI_ADR_SPACE_SYSTEM_MEMORY && \ (reg)->address == 0 && \ @@ -424,6 +434,24 @@ bool acpi_cpc_valid(void) } EXPORT_SYMBOL_GPL(acpi_cpc_valid); +bool cppc_allow_fast_switch(void) +{ + struct cpc_register_resource *desired_reg; + struct cpc_desc *cpc_ptr; + int cpu; + + for_each_possible_cpu(cpu) { + cpc_ptr = per_cpu(cpc_desc_ptr, cpu); + desired_reg = &cpc_ptr->cpc_regs[DESIRED_PERF]; + if (!CPC_IN_SYSTEM_MEMORY(desired_reg) && + !CPC_IN_SYSTEM_IO(desired_reg)) + return false; + } + + return true; +} +EXPORT_SYMBOL_GPL(cppc_allow_fast_switch); + /** * acpi_get_psd_map - Map the CPUs in the freq domain of a given cpu * @cpu: Find all CPUs that share a domain with cpu. @@ -736,6 +764,11 @@ int acpi_cppc_processor_probe(struct acpi_processor *pr) if (gas_t->address) { void __iomem *addr; + if (!osc_cpc_flexible_adr_space_confirmed) { + pr_debug("Flexible address space capability not supported\n"); + goto out_free; + } + addr = ioremap(gas_t->address, gas_t->bit_width/8); if (!addr) goto out_free; @@ -758,6 +791,10 @@ int acpi_cppc_processor_probe(struct acpi_processor *pr) gas_t->address); goto out_free; } + if (!osc_cpc_flexible_adr_space_confirmed) { + pr_debug("Flexible address space capability not supported\n"); + goto out_free; + } } else { if (gas_t->space_id != ACPI_ADR_SPACE_FIXED_HARDWARE || !cpc_ffh_supported()) { /* Support only PCC, SystemMemory, SystemIO, and FFH type regs. */ @@ -1447,6 +1484,9 @@ EXPORT_SYMBOL_GPL(cppc_set_perf); * transition latency for performance change requests. The closest we have * is the timing information from the PCCT tables which provides the info * on the number and frequency of PCC commands the platform can handle. + * + * If desired_reg is in the SystemMemory or SystemIo ACPI address space, + * then assume there is no latency. */ unsigned int cppc_get_transition_latency(int cpu_num) { @@ -1472,7 +1512,9 @@ unsigned int cppc_get_transition_latency(int cpu_num) return CPUFREQ_ETERNAL; desired_reg = &cpc_desc->cpc_regs[DESIRED_PERF]; - if (!CPC_IN_PCC(desired_reg)) + if (CPC_IN_SYSTEM_MEMORY(desired_reg) || CPC_IN_SYSTEM_IO(desired_reg)) + return 0; + else if (!CPC_IN_PCC(desired_reg)) return CPUFREQ_ETERNAL; if (pcc_ss_id < 0) diff --git a/drivers/acpi/dptf/dptf_pch_fivr.c b/drivers/acpi/dptf/dptf_pch_fivr.c index c0da24c9f8c3..4919e7abe93f 100644 --- a/drivers/acpi/dptf/dptf_pch_fivr.c +++ b/drivers/acpi/dptf/dptf_pch_fivr.c @@ -151,6 +151,7 @@ static int pch_fivr_remove(struct platform_device *pdev) static const struct acpi_device_id pch_fivr_device_ids[] = { {"INTC1045", 0}, {"INTC1049", 0}, + {"INTC1064", 0}, {"INTC10A3", 0}, {"", 0}, }; diff --git a/drivers/acpi/dptf/dptf_power.c b/drivers/acpi/dptf/dptf_power.c index 407b89d8a2ce..86561eda939f 100644 --- a/drivers/acpi/dptf/dptf_power.c +++ b/drivers/acpi/dptf/dptf_power.c @@ -232,6 +232,8 @@ static const struct acpi_device_id int3407_device_ids[] = { {"INTC1050", 0}, {"INTC1060", 0}, {"INTC1061", 0}, + {"INTC1065", 0}, + {"INTC1066", 0}, {"INTC10A4", 0}, {"INTC10A5", 0}, {"", 0}, diff --git a/drivers/acpi/dptf/int340x_thermal.c b/drivers/acpi/dptf/int340x_thermal.c index 42a556346548..b7113fa92fa6 100644 --- a/drivers/acpi/dptf/int340x_thermal.c +++ b/drivers/acpi/dptf/int340x_thermal.c @@ -27,6 +27,7 @@ static const struct acpi_device_id int340x_thermal_device_ids[] = { {"INT3532"}, {"INTC1040"}, {"INTC1041"}, + {"INTC1042"}, {"INTC1043"}, {"INTC1044"}, {"INTC1045"}, @@ -37,6 +38,11 @@ static const struct acpi_device_id int340x_thermal_device_ids[] = { {"INTC1050"}, {"INTC1060"}, {"INTC1061"}, + {"INTC1062"}, + {"INTC1063"}, + {"INTC1064"}, + {"INTC1065"}, + {"INTC1066"}, {"INTC10A0"}, {"INTC10A1"}, {"INTC10A2"}, diff --git a/drivers/acpi/fan.h b/drivers/acpi/fan.h index 44728529a5b6..e7b4b4e4a55e 100644 --- a/drivers/acpi/fan.h +++ b/drivers/acpi/fan.h @@ -14,6 +14,7 @@ {"INT3404", }, /* Fan */ \ {"INTC1044", }, /* Fan for Tiger Lake generation */ \ {"INTC1048", }, /* Fan for Alder Lake generation */ \ + {"INTC1063", }, /* Fan for Meteor Lake generation */ \ {"INTC10A2", }, /* Fan for Raptor Lake generation */ \ {"PNP0C0B", } /* Generic ACPI fan */ diff --git a/drivers/acpi/glue.c b/drivers/acpi/glue.c index ef104809f27b..8d769114a048 100644 --- a/drivers/acpi/glue.c +++ b/drivers/acpi/glue.c @@ -79,17 +79,17 @@ static struct acpi_bus_type *acpi_get_bus_type(struct device *dev) static int find_child_checks(struct acpi_device *adev, bool check_children) { - bool sta_present = true; unsigned long long sta; acpi_status status; + if (check_children && list_empty(&adev->children)) + return -ENODEV; + status = acpi_evaluate_integer(adev->handle, "_STA", NULL, &sta); if (status == AE_NOT_FOUND) - sta_present = false; - else if (ACPI_FAILURE(status) || !(sta & ACPI_STA_DEVICE_ENABLED)) - return -ENODEV; + return FIND_CHILD_MIN_SCORE; - if (check_children && list_empty(&adev->children)) + if (ACPI_FAILURE(status) || !(sta & ACPI_STA_DEVICE_ENABLED)) return -ENODEV; /* @@ -99,8 +99,10 @@ static int find_child_checks(struct acpi_device *adev, bool check_children) * matched going forward. [This means a second spec violation in a row, * so whatever we do here is best effort anyway.] */ - return sta_present && !adev->pnp.type.platform_id ? - FIND_CHILD_MAX_SCORE : FIND_CHILD_MIN_SCORE; + if (adev->pnp.type.platform_id) + return FIND_CHILD_MIN_SCORE; + + return FIND_CHILD_MAX_SCORE; } struct acpi_device *acpi_find_child_device(struct acpi_device *parent, diff --git a/drivers/acpi/osl.c b/drivers/acpi/osl.c index 7a70c4bfc23c..3269a888fb7a 100644 --- a/drivers/acpi/osl.c +++ b/drivers/acpi/osl.c @@ -36,7 +36,6 @@ #include <linux/io-64-nonatomic-lo-hi.h> #include "acpica/accommon.h" -#include "acpica/acnamesp.h" #include "internal.h" /* Definitions for ACPI_DEBUG_PRINT() */ @@ -1496,91 +1495,6 @@ int acpi_check_region(resource_size_t start, resource_size_t n, } EXPORT_SYMBOL(acpi_check_region); -static acpi_status acpi_deactivate_mem_region(acpi_handle handle, u32 level, - void *_res, void **return_value) -{ - struct acpi_mem_space_context **mem_ctx; - union acpi_operand_object *handler_obj; - union acpi_operand_object *region_obj2; - union acpi_operand_object *region_obj; - struct resource *res = _res; - acpi_status status; - - region_obj = acpi_ns_get_attached_object(handle); - if (!region_obj) - return AE_OK; - - handler_obj = region_obj->region.handler; - if (!handler_obj) - return AE_OK; - - if (region_obj->region.space_id != ACPI_ADR_SPACE_SYSTEM_MEMORY) - return AE_OK; - - if (!(region_obj->region.flags & AOPOBJ_SETUP_COMPLETE)) - return AE_OK; - - region_obj2 = acpi_ns_get_secondary_object(region_obj); - if (!region_obj2) - return AE_OK; - - mem_ctx = (void *)®ion_obj2->extra.region_context; - - if (!(mem_ctx[0]->address >= res->start && - mem_ctx[0]->address < res->end)) - return AE_OK; - - status = handler_obj->address_space.setup(region_obj, - ACPI_REGION_DEACTIVATE, - NULL, (void **)mem_ctx); - if (ACPI_SUCCESS(status)) - region_obj->region.flags &= ~(AOPOBJ_SETUP_COMPLETE); - - return status; -} - -/** - * acpi_release_memory - Release any mappings done to a memory region - * @handle: Handle to namespace node - * @res: Memory resource - * @level: A level that terminates the search - * - * Walks through @handle and unmaps all SystemMemory Operation Regions that - * overlap with @res and that have already been activated (mapped). - * - * This is a helper that allows drivers to place special requirements on memory - * region that may overlap with operation regions, primarily allowing them to - * safely map the region as non-cached memory. - * - * The unmapped Operation Regions will be automatically remapped next time they - * are called, so the drivers do not need to do anything else. - */ -acpi_status acpi_release_memory(acpi_handle handle, struct resource *res, - u32 level) -{ - acpi_status status; - - if (!(res->flags & IORESOURCE_MEM)) - return AE_TYPE; - - status = acpi_walk_namespace(ACPI_TYPE_REGION, handle, level, - acpi_deactivate_mem_region, NULL, - res, NULL); - if (ACPI_FAILURE(status)) - return status; - - /* - * Wait for all of the mappings queued up for removal by - * acpi_deactivate_mem_region() to actually go away. - */ - synchronize_rcu(); - rcu_barrier(); - flush_scheduled_work(); - - return AE_OK; -} -EXPORT_SYMBOL_GPL(acpi_release_memory); - /* * Let drivers know whether the resource checks are effective */ diff --git a/drivers/acpi/processor_idle.c b/drivers/acpi/processor_idle.c index e9c84d0ac55b..6a5572a1a80c 100644 --- a/drivers/acpi/processor_idle.c +++ b/drivers/acpi/processor_idle.c @@ -38,11 +38,11 @@ #define ACPI_IDLE_STATE_START (IS_ENABLED(CONFIG_ARCH_HAS_CPU_RELAX) ? 1 : 0) static unsigned int max_cstate __read_mostly = ACPI_PROCESSOR_MAX_POWER; -module_param(max_cstate, uint, 0000); -static unsigned int nocst __read_mostly; -module_param(nocst, uint, 0000); -static int bm_check_disable __read_mostly; -module_param(bm_check_disable, uint, 0000); +module_param(max_cstate, uint, 0400); +static bool nocst __read_mostly; +module_param(nocst, bool, 0400); +static bool bm_check_disable __read_mostly; +module_param(bm_check_disable, bool, 0400); static unsigned int latency_factor __read_mostly = 2; module_param(latency_factor, uint, 0644); diff --git a/drivers/base/power/common.c b/drivers/base/power/common.c index bbddb267c2e6..72115917e0bd 100644 --- a/drivers/base/power/common.c +++ b/drivers/base/power/common.c @@ -172,10 +172,10 @@ EXPORT_SYMBOL_GPL(dev_pm_domain_attach_by_name); * @dev: Device to detach. * @power_off: Used to indicate whether we should power off the device. * - * This functions will reverse the actions from dev_pm_domain_attach() and - * dev_pm_domain_attach_by_id(), thus it detaches @dev from its PM domain. - * Typically it should be invoked during the remove phase, either from - * subsystem level code or from drivers. + * This functions will reverse the actions from dev_pm_domain_attach(), + * dev_pm_domain_attach_by_id() and dev_pm_domain_attach_by_name(), thus it + * detaches @dev from its PM domain. Typically it should be invoked during the + * remove phase, either from subsystem level code or from drivers. * * Callers must ensure proper synchronization of this function with power * management callbacks. diff --git a/drivers/base/power/domain.c b/drivers/base/power/domain.c index 1ee878d126fd..739e52cd4aba 100644 --- a/drivers/base/power/domain.c +++ b/drivers/base/power/domain.c @@ -131,7 +131,7 @@ static const struct genpd_lock_ops genpd_spin_ops = { #define genpd_is_cpu_domain(genpd) (genpd->flags & GENPD_FLAG_CPU_DOMAIN) #define genpd_is_rpm_always_on(genpd) (genpd->flags & GENPD_FLAG_RPM_ALWAYS_ON) -static inline bool irq_safe_dev_in_no_sleep_domain(struct device *dev, +static inline bool irq_safe_dev_in_sleep_domain(struct device *dev, const struct generic_pm_domain *genpd) { bool ret; @@ -139,11 +139,14 @@ static inline bool irq_safe_dev_in_no_sleep_domain(struct device *dev, ret = pm_runtime_is_irq_safe(dev) && !genpd_is_irq_safe(genpd); /* - * Warn once if an IRQ safe device is attached to a no sleep domain, as - * to indicate a suboptimal configuration for PM. For an always on - * domain this isn't case, thus don't warn. + * Warn once if an IRQ safe device is attached to a domain, which + * callbacks are allowed to sleep. This indicates a suboptimal + * configuration for PM, but it doesn't matter for an always on domain. */ - if (ret && !genpd_is_always_on(genpd)) + if (genpd_is_always_on(genpd) || genpd_is_rpm_always_on(genpd)) + return ret; + + if (ret) dev_warn_once(dev, "PM domain %s will not be powered off\n", genpd->name); @@ -225,24 +228,23 @@ static void genpd_debug_remove(struct generic_pm_domain *genpd) static void genpd_update_accounting(struct generic_pm_domain *genpd) { - ktime_t delta, now; + u64 delta, now; - now = ktime_get(); - delta = ktime_sub(now, genpd->accounting_time); + now = ktime_get_mono_fast_ns(); + if (now <= genpd->accounting_time) + return; + + delta = now - genpd->accounting_time; /* * If genpd->status is active, it means we are just * out of off and so update the idle time and vice * versa. */ - if (genpd->status == GENPD_STATE_ON) { - int state_idx = genpd->state_idx; - - genpd->states[state_idx].idle_time = - ktime_add(genpd->states[state_idx].idle_time, delta); - } else { - genpd->on_time = ktime_add(genpd->on_time, delta); - } + if (genpd->status == GENPD_STATE_ON) + genpd->states[genpd->state_idx].idle_time += delta; + else + genpd->on_time += delta; genpd->accounting_time = now; } @@ -476,15 +478,16 @@ EXPORT_SYMBOL_GPL(dev_pm_genpd_set_performance_state); */ void dev_pm_genpd_set_next_wakeup(struct device *dev, ktime_t next) { - struct generic_pm_domain_data *gpd_data; struct generic_pm_domain *genpd; + struct gpd_timing_data *td; genpd = dev_to_genpd_safe(dev); if (!genpd) return; - gpd_data = to_gpd_data(dev->power.subsys_data->domain_data); - gpd_data->next_wakeup = next; + td = to_gpd_data(dev->power.subsys_data->domain_data)->td; + if (td) + td->next_wakeup = next; } EXPORT_SYMBOL_GPL(dev_pm_genpd_set_next_wakeup); @@ -506,6 +509,7 @@ static int _genpd_power_on(struct generic_pm_domain *genpd, bool timed) if (!genpd->power_on) goto out; + timed = timed && genpd->gd && !genpd->states[state_idx].fwnode; if (!timed) { ret = genpd->power_on(genpd); if (ret) @@ -524,7 +528,7 @@ static int _genpd_power_on(struct generic_pm_domain *genpd, bool timed) goto out; genpd->states[state_idx].power_on_latency_ns = elapsed_ns; - genpd->max_off_time_changed = true; + genpd->gd->max_off_time_changed = true; pr_debug("%s: Power-%s latency exceeded, new value %lld ns\n", genpd->name, "on", elapsed_ns); @@ -555,6 +559,7 @@ static int _genpd_power_off(struct generic_pm_domain *genpd, bool timed) if (!genpd->power_off) goto out; + timed = timed && genpd->gd && !genpd->states[state_idx].fwnode; if (!timed) { ret = genpd->power_off(genpd); if (ret) @@ -573,7 +578,7 @@ static int _genpd_power_off(struct generic_pm_domain *genpd, bool timed) goto out; genpd->states[state_idx].power_off_latency_ns = elapsed_ns; - genpd->max_off_time_changed = true; + genpd->gd->max_off_time_changed = true; pr_debug("%s: Power-%s latency exceeded, new value %lld ns\n", genpd->name, "off", elapsed_ns); @@ -649,18 +654,12 @@ static int genpd_power_off(struct generic_pm_domain *genpd, bool one_dev_on, } list_for_each_entry(pdd, &genpd->dev_list, list_node) { - enum pm_qos_flags_status stat; - - stat = dev_pm_qos_flags(pdd->dev, PM_QOS_FLAG_NO_POWER_OFF); - if (stat > PM_QOS_FLAGS_NONE) - return -EBUSY; - /* * Do not allow PM domain to be powered off, when an IRQ safe * device is part of a non-IRQ safe domain. */ if (!pm_runtime_suspended(pdd->dev) || - irq_safe_dev_in_no_sleep_domain(pdd->dev, genpd)) + irq_safe_dev_in_sleep_domain(pdd->dev, genpd)) not_suspended++; } @@ -775,25 +774,27 @@ static int genpd_dev_pm_qos_notifier(struct notifier_block *nb, dev = gpd_data->base.dev; for (;;) { - struct generic_pm_domain *genpd; + struct generic_pm_domain *genpd = ERR_PTR(-ENODATA); struct pm_domain_data *pdd; + struct gpd_timing_data *td; spin_lock_irq(&dev->power.lock); pdd = dev->power.subsys_data ? dev->power.subsys_data->domain_data : NULL; if (pdd) { - to_gpd_data(pdd)->td.constraint_changed = true; - genpd = dev_to_genpd(dev); - } else { - genpd = ERR_PTR(-ENODATA); + td = to_gpd_data(pdd)->td; + if (td) { + td->constraint_changed = true; + genpd = dev_to_genpd(dev); + } } spin_unlock_irq(&dev->power.lock); if (!IS_ERR(genpd)) { genpd_lock(genpd); - genpd->max_off_time_changed = true; + genpd->gd->max_off_time_changed = true; genpd_unlock(genpd); } @@ -879,9 +880,9 @@ static int genpd_runtime_suspend(struct device *dev) struct generic_pm_domain *genpd; bool (*suspend_ok)(struct device *__dev); struct generic_pm_domain_data *gpd_data = dev_gpd_data(dev); - struct gpd_timing_data *td = &gpd_data->td; + struct gpd_timing_data *td = gpd_data->td; bool runtime_pm = pm_runtime_enabled(dev); - ktime_t time_start; + ktime_t time_start = 0; s64 elapsed_ns; int ret; @@ -902,8 +903,7 @@ static int genpd_runtime_suspend(struct device *dev) return -EBUSY; /* Measure suspend latency. */ - time_start = 0; - if (runtime_pm) + if (td && runtime_pm) time_start = ktime_get(); ret = __genpd_runtime_suspend(dev); @@ -917,13 +917,13 @@ static int genpd_runtime_suspend(struct device *dev) } /* Update suspend latency value if the measured time exceeds it. */ - if (runtime_pm) { + if (td && runtime_pm) { elapsed_ns = ktime_to_ns(ktime_sub(ktime_get(), time_start)); if (elapsed_ns > td->suspend_latency_ns) { td->suspend_latency_ns = elapsed_ns; dev_dbg(dev, "suspend latency exceeded, %lld ns\n", elapsed_ns); - genpd->max_off_time_changed = true; + genpd->gd->max_off_time_changed = true; td->constraint_changed = true; } } @@ -932,7 +932,7 @@ static int genpd_runtime_suspend(struct device *dev) * If power.irq_safe is set, this routine may be run with * IRQs disabled, so suspend only if the PM domain also is irq_safe. */ - if (irq_safe_dev_in_no_sleep_domain(dev, genpd)) + if (irq_safe_dev_in_sleep_domain(dev, genpd)) return 0; genpd_lock(genpd); @@ -955,12 +955,11 @@ static int genpd_runtime_resume(struct device *dev) { struct generic_pm_domain *genpd; struct generic_pm_domain_data *gpd_data = dev_gpd_data(dev); - struct gpd_timing_data *td = &gpd_data->td; - bool runtime_pm = pm_runtime_enabled(dev); - ktime_t time_start; + struct gpd_timing_data *td = gpd_data->td; + bool timed = td && pm_runtime_enabled(dev); + ktime_t time_start = 0; s64 elapsed_ns; int ret; - bool timed = true; dev_dbg(dev, "%s()\n", __func__); @@ -972,10 +971,8 @@ static int genpd_runtime_resume(struct device *dev) * As we don't power off a non IRQ safe domain, which holds * an IRQ safe device, we don't need to restore power to it. */ - if (irq_safe_dev_in_no_sleep_domain(dev, genpd)) { - timed = false; + if (irq_safe_dev_in_sleep_domain(dev, genpd)) goto out; - } genpd_lock(genpd); ret = genpd_power_on(genpd, 0); @@ -988,8 +985,7 @@ static int genpd_runtime_resume(struct device *dev) out: /* Measure resume latency. */ - time_start = 0; - if (timed && runtime_pm) + if (timed) time_start = ktime_get(); ret = genpd_start_dev(genpd, dev); @@ -1001,13 +997,13 @@ static int genpd_runtime_resume(struct device *dev) goto err_stop; /* Update resume latency value if the measured time exceeds it. */ - if (timed && runtime_pm) { + if (timed) { elapsed_ns = ktime_to_ns(ktime_sub(ktime_get(), time_start)); if (elapsed_ns > td->resume_latency_ns) { td->resume_latency_ns = elapsed_ns; dev_dbg(dev, "resume latency exceeded, %lld ns\n", elapsed_ns); - genpd->max_off_time_changed = true; + genpd->gd->max_off_time_changed = true; td->constraint_changed = true; } } @@ -1500,9 +1496,11 @@ EXPORT_SYMBOL_GPL(dev_pm_genpd_resume); #endif /* CONFIG_PM_SLEEP */ -static struct generic_pm_domain_data *genpd_alloc_dev_data(struct device *dev) +static struct generic_pm_domain_data *genpd_alloc_dev_data(struct device *dev, + bool has_governor) { struct generic_pm_domain_data *gpd_data; + struct gpd_timing_data *td; int ret; ret = dev_pm_get_subsys_data(dev); @@ -1516,26 +1514,38 @@ static struct generic_pm_domain_data *genpd_alloc_dev_data(struct device *dev) } gpd_data->base.dev = dev; - gpd_data->td.constraint_changed = true; - gpd_data->td.effective_constraint_ns = PM_QOS_RESUME_LATENCY_NO_CONSTRAINT_NS; gpd_data->nb.notifier_call = genpd_dev_pm_qos_notifier; - gpd_data->next_wakeup = KTIME_MAX; - spin_lock_irq(&dev->power.lock); + /* Allocate data used by a governor. */ + if (has_governor) { + td = kzalloc(sizeof(*td), GFP_KERNEL); + if (!td) { + ret = -ENOMEM; + goto err_free; + } - if (dev->power.subsys_data->domain_data) { - ret = -EINVAL; - goto err_free; + td->constraint_changed = true; + td->effective_constraint_ns = PM_QOS_RESUME_LATENCY_NO_CONSTRAINT_NS; + td->next_wakeup = KTIME_MAX; + gpd_data->td = td; } - dev->power.subsys_data->domain_data = &gpd_data->base; + spin_lock_irq(&dev->power.lock); + + if (dev->power.subsys_data->domain_data) + ret = -EINVAL; + else + dev->power.subsys_data->domain_data = &gpd_data->base; spin_unlock_irq(&dev->power.lock); + if (ret) + goto err_free; + return gpd_data; err_free: - spin_unlock_irq(&dev->power.lock); + kfree(gpd_data->td); kfree(gpd_data); err_put: dev_pm_put_subsys_data(dev); @@ -1551,6 +1561,7 @@ static void genpd_free_dev_data(struct device *dev, spin_unlock_irq(&dev->power.lock); + kfree(gpd_data->td); kfree(gpd_data); dev_pm_put_subsys_data(dev); } @@ -1607,6 +1618,7 @@ static int genpd_get_cpu(struct generic_pm_domain *genpd, struct device *dev) static int genpd_add_device(struct generic_pm_domain *genpd, struct device *dev, struct device *base_dev) { + struct genpd_governor_data *gd = genpd->gd; struct generic_pm_domain_data *gpd_data; int ret; @@ -1615,7 +1627,7 @@ static int genpd_add_device(struct generic_pm_domain *genpd, struct device *dev, if (IS_ERR_OR_NULL(genpd) || IS_ERR_OR_NULL(dev)) return -EINVAL; - gpd_data = genpd_alloc_dev_data(dev); + gpd_data = genpd_alloc_dev_data(dev, gd); if (IS_ERR(gpd_data)) return PTR_ERR(gpd_data); @@ -1631,7 +1643,8 @@ static int genpd_add_device(struct generic_pm_domain *genpd, struct device *dev, dev_pm_domain_set(dev, &genpd->domain); genpd->device_count++; - genpd->max_off_time_changed = true; + if (gd) + gd->max_off_time_changed = true; list_add_tail(&gpd_data->base.list_node, &genpd->dev_list); @@ -1685,7 +1698,8 @@ static int genpd_remove_device(struct generic_pm_domain *genpd, } genpd->device_count--; - genpd->max_off_time_changed = true; + if (genpd->gd) + genpd->gd->max_off_time_changed = true; genpd_clear_cpumask(genpd, gpd_data->cpu); dev_pm_domain_set(dev, NULL); @@ -1958,6 +1972,53 @@ static int genpd_set_default_power_state(struct generic_pm_domain *genpd) return 0; } +static int genpd_alloc_data(struct generic_pm_domain *genpd) +{ + struct genpd_governor_data *gd = NULL; + int ret; + + if (genpd_is_cpu_domain(genpd) && + !zalloc_cpumask_var(&genpd->cpus, GFP_KERNEL)) + return -ENOMEM; + + if (genpd->gov) { + gd = kzalloc(sizeof(*gd), GFP_KERNEL); + if (!gd) { + ret = -ENOMEM; + goto free; + } + + gd->max_off_time_ns = -1; + gd->max_off_time_changed = true; + gd->next_wakeup = KTIME_MAX; + } + + /* Use only one "off" state if there were no states declared */ + if (genpd->state_count == 0) { + ret = genpd_set_default_power_state(genpd); + if (ret) + goto free; + } + + genpd->gd = gd; + return 0; + +free: + if (genpd_is_cpu_domain(genpd)) + free_cpumask_var(genpd->cpus); + kfree(gd); + return ret; +} + +static void genpd_free_data(struct generic_pm_domain *genpd) +{ + if (genpd_is_cpu_domain(genpd)) + free_cpumask_var(genpd->cpus); + if (genpd->free_states) + genpd->free_states(genpd->states, genpd->state_count); + kfree(genpd->gd); +} + static void genpd_lock_init(struct generic_pm_domain *genpd) { if (genpd->flags & GENPD_FLAG_IRQ_SAFE) { @@ -1995,11 +2056,9 @@ int pm_genpd_init(struct generic_pm_domain *genpd, atomic_set(&genpd->sd_count, 0); genpd->status = is_off ? GENPD_STATE_OFF : GENPD_STATE_ON; genpd->device_count = 0; - genpd->max_off_time_ns = -1; - genpd->max_off_time_changed = true; genpd->provider = NULL; genpd->has_provider = false; - genpd->accounting_time = ktime_get(); + genpd->accounting_time = ktime_get_mono_fast_ns(); genpd->domain.ops.runtime_suspend = genpd_runtime_suspend; genpd->domain.ops.runtime_resume = genpd_runtime_resume; genpd->domain.ops.prepare = genpd_prepare; @@ -2017,26 +2076,22 @@ int pm_genpd_init(struct generic_pm_domain *genpd, genpd->dev_ops.start = pm_clk_resume; } + /* The always-on governor works better with the corresponding flag. */ + if (gov == &pm_domain_always_on_gov) + genpd->flags |= GENPD_FLAG_RPM_ALWAYS_ON; + /* Always-on domains must be powered on at initialization. */ if ((genpd_is_always_on(genpd) || genpd_is_rpm_always_on(genpd)) && !genpd_status_on(genpd)) return -EINVAL; - if (genpd_is_cpu_domain(genpd) && - !zalloc_cpumask_var(&genpd->cpus, GFP_KERNEL)) - return -ENOMEM; - - /* Use only one "off" state if there were no states declared */ - if (genpd->state_count == 0) { - ret = genpd_set_default_power_state(genpd); - if (ret) { - if (genpd_is_cpu_domain(genpd)) - free_cpumask_var(genpd->cpus); - return ret; - } - } else if (!gov && genpd->state_count > 1) { + /* Multiple states but no governor doesn't make sense. */ + if (!gov && genpd->state_count > 1) pr_warn("%s: no governor for states\n", genpd->name); - } + + ret = genpd_alloc_data(genpd); + if (ret) + return ret; device_initialize(&genpd->dev); dev_set_name(&genpd->dev, "%s", genpd->name); @@ -2081,10 +2136,7 @@ static int genpd_remove(struct generic_pm_domain *genpd) genpd_unlock(genpd); genpd_debug_remove(genpd); cancel_work_sync(&genpd->power_off_work); - if (genpd_is_cpu_domain(genpd)) - free_cpumask_var(genpd->cpus); - if (genpd->free_states) - genpd->free_states(genpd->states, genpd->state_count); + genpd_free_data(genpd); pr_debug("%s: removed %s\n", __func__, genpd->name); @@ -3163,6 +3215,7 @@ static int sub_domains_show(struct seq_file *s, void *data) static int idle_states_show(struct seq_file *s, void *data) { struct generic_pm_domain *genpd = s->private; + u64 now, delta, idle_time = 0; unsigned int i; int ret = 0; @@ -3173,17 +3226,19 @@ static int idle_states_show(struct seq_file *s, void *data) seq_puts(s, "State Time Spent(ms) Usage Rejected\n"); for (i = 0; i < genpd->state_count; i++) { - ktime_t delta = 0; - s64 msecs; + idle_time += genpd->states[i].idle_time; - if ((genpd->status == GENPD_STATE_OFF) && - (genpd->state_idx == i)) - delta = ktime_sub(ktime_get(), genpd->accounting_time); + if (genpd->status == GENPD_STATE_OFF && genpd->state_idx == i) { + now = ktime_get_mono_fast_ns(); + if (now > genpd->accounting_time) { + delta = now - genpd->accounting_time; + idle_time += delta; + } + } - msecs = ktime_to_ms( - ktime_add(genpd->states[i].idle_time, delta)); - seq_printf(s, "S%-13i %-14lld %-14llu %llu\n", i, msecs, - genpd->states[i].usage, genpd->states[i].rejected); + do_div(idle_time, NSEC_PER_MSEC); + seq_printf(s, "S%-13i %-14llu %-14llu %llu\n", i, idle_time, + genpd->states[i].usage, genpd->states[i].rejected); } genpd_unlock(genpd); @@ -3193,18 +3248,22 @@ static int idle_states_show(struct seq_file *s, void *data) static int active_time_show(struct seq_file *s, void *data) { struct generic_pm_domain *genpd = s->private; - ktime_t delta = 0; + u64 now, on_time, delta = 0; int ret = 0; ret = genpd_lock_interruptible(genpd); if (ret) return -ERESTARTSYS; - if (genpd->status == GENPD_STATE_ON) - delta = ktime_sub(ktime_get(), genpd->accounting_time); + if (genpd->status == GENPD_STATE_ON) { + now = ktime_get_mono_fast_ns(); + if (now > genpd->accounting_time) + delta = now - genpd->accounting_time; + } - seq_printf(s, "%lld ms\n", ktime_to_ms( - ktime_add(genpd->on_time, delta))); + on_time = genpd->on_time + delta; + do_div(on_time, NSEC_PER_MSEC); + seq_printf(s, "%llu ms\n", on_time); genpd_unlock(genpd); return ret; @@ -3213,7 +3272,7 @@ static int active_time_show(struct seq_file *s, void *data) static int total_idle_time_show(struct seq_file *s, void *data) { struct generic_pm_domain *genpd = s->private; - ktime_t delta = 0, total = 0; + u64 now, delta, total = 0; unsigned int i; int ret = 0; @@ -3222,16 +3281,19 @@ static int total_idle_time_show(struct seq_file *s, void *data) return -ERESTARTSYS; for (i = 0; i < genpd->state_count; i++) { + total += genpd->states[i].idle_time; - if ((genpd->status == GENPD_STATE_OFF) && - (genpd->state_idx == i)) - delta = ktime_sub(ktime_get(), genpd->accounting_time); - - total = ktime_add(total, genpd->states[i].idle_time); + if (genpd->status == GENPD_STATE_OFF && genpd->state_idx == i) { + now = ktime_get_mono_fast_ns(); + if (now > genpd->accounting_time) { + delta = now - genpd->accounting_time; + total += delta; + } + } } - total = ktime_add(total, delta); - seq_printf(s, "%lld ms\n", ktime_to_ms(total)); + do_div(total, NSEC_PER_MSEC); + seq_printf(s, "%llu ms\n", total); genpd_unlock(genpd); return ret; diff --git a/drivers/base/power/domain_governor.c b/drivers/base/power/domain_governor.c index cd08c5885190..282a3a135827 100644 --- a/drivers/base/power/domain_governor.c +++ b/drivers/base/power/domain_governor.c @@ -18,6 +18,8 @@ static int dev_update_qos_constraint(struct device *dev, void *data) s64 constraint_ns; if (dev->power.subsys_data && dev->power.subsys_data->domain_data) { + struct gpd_timing_data *td = dev_gpd_data(dev)->td; + /* * Only take suspend-time QoS constraints of devices into * account, because constraints updated after the device has @@ -25,7 +27,8 @@ static int dev_update_qos_constraint(struct device *dev, void *data) * anyway. In order for them to take effect, the device has to * be resumed and suspended again. */ - constraint_ns = dev_gpd_data(dev)->td.effective_constraint_ns; + constraint_ns = td ? td->effective_constraint_ns : + PM_QOS_RESUME_LATENCY_NO_CONSTRAINT_NS; } else { /* * The child is not in a domain and there's no info on its @@ -49,7 +52,7 @@ static int dev_update_qos_constraint(struct device *dev, void *data) */ static bool default_suspend_ok(struct device *dev) { - struct gpd_timing_data *td = &dev_gpd_data(dev)->td; + struct gpd_timing_data *td = dev_gpd_data(dev)->td; unsigned long flags; s64 constraint_ns; @@ -136,26 +139,28 @@ static void update_domain_next_wakeup(struct generic_pm_domain *genpd, ktime_t n * is able to enter its optimal idle state. */ list_for_each_entry(pdd, &genpd->dev_list, list_node) { - next_wakeup = to_gpd_data(pdd)->next_wakeup; + next_wakeup = to_gpd_data(pdd)->td->next_wakeup; if (next_wakeup != KTIME_MAX && !ktime_before(next_wakeup, now)) if (ktime_before(next_wakeup, domain_wakeup)) domain_wakeup = next_wakeup; } list_for_each_entry(link, &genpd->parent_links, parent_node) { - next_wakeup = link->child->next_wakeup; + struct genpd_governor_data *cgd = link->child->gd; + + next_wakeup = cgd ? cgd->next_wakeup : KTIME_MAX; if (next_wakeup != KTIME_MAX && !ktime_before(next_wakeup, now)) if (ktime_before(next_wakeup, domain_wakeup)) domain_wakeup = next_wakeup; } - genpd->next_wakeup = domain_wakeup; + genpd->gd->next_wakeup = domain_wakeup; } static bool next_wakeup_allows_state(struct generic_pm_domain *genpd, unsigned int state, ktime_t now) { - ktime_t domain_wakeup = genpd->next_wakeup; + ktime_t domain_wakeup = genpd->gd->next_wakeup; s64 idle_time_ns, min_sleep_ns; min_sleep_ns = genpd->states[state].power_off_latency_ns + @@ -185,8 +190,9 @@ static bool __default_power_down_ok(struct dev_pm_domain *pd, * All subdomains have been powered off already at this point. */ list_for_each_entry(link, &genpd->parent_links, parent_node) { - struct generic_pm_domain *sd = link->child; - s64 sd_max_off_ns = sd->max_off_time_ns; + struct genpd_governor_data *cgd = link->child->gd; + + s64 sd_max_off_ns = cgd ? cgd->max_off_time_ns : -1; if (sd_max_off_ns < 0) continue; @@ -215,7 +221,7 @@ static bool __default_power_down_ok(struct dev_pm_domain *pd, * domain to turn off and on (that's how much time it will * have to wait worst case). */ - td = &to_gpd_data(pdd)->td; + td = to_gpd_data(pdd)->td; constraint_ns = td->effective_constraint_ns; /* * Zero means "no suspend at all" and this runs only when all @@ -244,7 +250,7 @@ static bool __default_power_down_ok(struct dev_pm_domain *pd, * time and the time needed to turn the domain on is the maximum * theoretical time this domain can spend in the "off" state. */ - genpd->max_off_time_ns = min_off_time_ns - + genpd->gd->max_off_time_ns = min_off_time_ns - genpd->states[state].power_on_latency_ns; return true; } @@ -259,6 +265,7 @@ static bool __default_power_down_ok(struct dev_pm_domain *pd, static bool _default_power_down_ok(struct dev_pm_domain *pd, ktime_t now) { struct generic_pm_domain *genpd = pd_to_genpd(pd); + struct genpd_governor_data *gd = genpd->gd; int state_idx = genpd->state_count - 1; struct gpd_link *link; @@ -269,11 +276,11 @@ static bool _default_power_down_ok(struct dev_pm_domain *pd, ktime_t now) * cannot be met. */ update_domain_next_wakeup(genpd, now); - if ((genpd->flags & GENPD_FLAG_MIN_RESIDENCY) && (genpd->next_wakeup != KTIME_MAX)) { + if ((genpd->flags & GENPD_FLAG_MIN_RESIDENCY) && (gd->next_wakeup != KTIME_MAX)) { /* Let's find out the deepest domain idle state, the devices prefer */ while (state_idx >= 0) { if (next_wakeup_allows_state(genpd, state_idx, now)) { - genpd->max_off_time_changed = true; + gd->max_off_time_changed = true; break; } state_idx--; @@ -281,14 +288,14 @@ static bool _default_power_down_ok(struct dev_pm_domain *pd, ktime_t now) if (state_idx < 0) { state_idx = 0; - genpd->cached_power_down_ok = false; + gd->cached_power_down_ok = false; goto done; } } - if (!genpd->max_off_time_changed) { - genpd->state_idx = genpd->cached_power_down_state_idx; - return genpd->cached_power_down_ok; + if (!gd->max_off_time_changed) { + genpd->state_idx = gd->cached_power_down_state_idx; + return gd->cached_power_down_ok; } /* @@ -297,12 +304,16 @@ static bool _default_power_down_ok(struct dev_pm_domain *pd, ktime_t now) * going to be called for any parent until this instance * returns. */ - list_for_each_entry(link, &genpd->child_links, child_node) - link->parent->max_off_time_changed = true; + list_for_each_entry(link, &genpd->child_links, child_node) { + struct genpd_governor_data *pgd = link->parent->gd; + + if (pgd) + pgd->max_off_time_changed = true; + } - genpd->max_off_time_ns = -1; - genpd->max_off_time_changed = false; - genpd->cached_power_down_ok = true; + gd->max_off_time_ns = -1; + gd->max_off_time_changed = false; + gd->cached_power_down_ok = true; /* * Find a state to power down to, starting from the state @@ -310,7 +321,7 @@ static bool _default_power_down_ok(struct dev_pm_domain *pd, ktime_t now) */ while (!__default_power_down_ok(pd, state_idx)) { if (state_idx == 0) { - genpd->cached_power_down_ok = false; + gd->cached_power_down_ok = false; break; } state_idx--; @@ -318,8 +329,8 @@ static bool _default_power_down_ok(struct dev_pm_domain *pd, ktime_t now) done: genpd->state_idx = state_idx; - genpd->cached_power_down_state_idx = genpd->state_idx; - return genpd->cached_power_down_ok; + gd->cached_power_down_state_idx = genpd->state_idx; + return gd->cached_power_down_ok; } static bool default_power_down_ok(struct dev_pm_domain *pd) @@ -327,11 +338,6 @@ static bool default_power_down_ok(struct dev_pm_domain *pd) return _default_power_down_ok(pd, ktime_get()); } -static bool always_on_power_down_ok(struct dev_pm_domain *domain) -{ - return false; -} - #ifdef CONFIG_CPU_IDLE static bool cpu_power_down_ok(struct dev_pm_domain *pd) { @@ -401,6 +407,5 @@ struct dev_power_governor simple_qos_governor = { * pm_genpd_gov_always_on - A governor implementing an always-on policy */ struct dev_power_governor pm_domain_always_on_gov = { - .power_down_ok = always_on_power_down_ok, .suspend_ok = default_suspend_ok, }; diff --git a/drivers/base/power/runtime.c b/drivers/base/power/runtime.c index d4059e6ffeae..676dc72d912d 100644 --- a/drivers/base/power/runtime.c +++ b/drivers/base/power/runtime.c @@ -263,7 +263,7 @@ static int rpm_check_suspend_allowed(struct device *dev) retval = -EINVAL; else if (dev->power.disable_depth > 0) retval = -EACCES; - else if (atomic_read(&dev->power.usage_count) > 0) + else if (atomic_read(&dev->power.usage_count)) retval = -EAGAIN; else if (!dev->power.ignore_children && atomic_read(&dev->power.child_count)) @@ -1039,13 +1039,33 @@ int pm_schedule_suspend(struct device *dev, unsigned int delay) } EXPORT_SYMBOL_GPL(pm_schedule_suspend); +static int rpm_drop_usage_count(struct device *dev) +{ + int ret; + + ret = atomic_sub_return(1, &dev->power.usage_count); + if (ret >= 0) + return ret; + + /* + * Because rpm_resume() does not check the usage counter, it will resume + * the device even if the usage counter is 0 or negative, so it is + * sufficient to increment the usage counter here to reverse the change + * made above. + */ + atomic_inc(&dev->power.usage_count); + dev_warn(dev, "Runtime PM usage count underflow!\n"); + return -EINVAL; +} + /** * __pm_runtime_idle - Entry point for runtime idle operations. * @dev: Device to send idle notification for. * @rpmflags: Flag bits. * * If the RPM_GET_PUT flag is set, decrement the device's usage count and - * return immediately if it is larger than zero. Then carry out an idle + * return immediately if it is larger than zero (if it becomes negative, log a + * warning, increment it, and return an error). Then carry out an idle * notification, either synchronous or asynchronous. * * This routine may be called in atomic context if the RPM_ASYNC flag is set, @@ -1057,7 +1077,10 @@ int __pm_runtime_idle(struct device *dev, int rpmflags) int retval; if (rpmflags & RPM_GET_PUT) { - if (!atomic_dec_and_test(&dev->power.usage_count)) { + retval = rpm_drop_usage_count(dev); + if (retval < 0) { + return retval; + } else if (retval > 0) { trace_rpm_usage_rcuidle(dev, rpmflags); return 0; } @@ -1079,7 +1102,8 @@ EXPORT_SYMBOL_GPL(__pm_runtime_idle); * @rpmflags: Flag bits. * * If the RPM_GET_PUT flag is set, decrement the device's usage count and - * return immediately if it is larger than zero. Then carry out a suspend, + * return immediately if it is larger than zero (if it becomes negative, log a + * warning, increment it, and return an error). Then carry out a suspend, * either synchronous or asynchronous. * * This routine may be called in atomic context if the RPM_ASYNC flag is set, @@ -1091,7 +1115,10 @@ int __pm_runtime_suspend(struct device *dev, int rpmflags) int retval; if (rpmflags & RPM_GET_PUT) { - if (!atomic_dec_and_test(&dev->power.usage_count)) { + retval = rpm_drop_usage_count(dev); + if (retval < 0) { + return retval; + } else if (retval > 0) { trace_rpm_usage_rcuidle(dev, rpmflags); return 0; } @@ -1210,12 +1237,13 @@ int __pm_runtime_set_status(struct device *dev, unsigned int status) { struct device *parent = dev->parent; bool notify_parent = false; + unsigned long flags; int error = 0; if (status != RPM_ACTIVE && status != RPM_SUSPENDED) return -EINVAL; - spin_lock_irq(&dev->power.lock); + spin_lock_irqsave(&dev->power.lock, flags); /* * Prevent PM-runtime from being enabled for the device or return an @@ -1226,7 +1254,7 @@ int __pm_runtime_set_status(struct device *dev, unsigned int status) else error = -EAGAIN; - spin_unlock_irq(&dev->power.lock); + spin_unlock_irqrestore(&dev->power.lock, flags); if (error) return error; @@ -1247,7 +1275,7 @@ int __pm_runtime_set_status(struct device *dev, unsigned int status) device_links_read_unlock(idx); } - spin_lock_irq(&dev->power.lock); + spin_lock_irqsave(&dev->power.lock, flags); if (dev->power.runtime_status == status || !parent) goto out_set; @@ -1288,7 +1316,7 @@ int __pm_runtime_set_status(struct device *dev, unsigned int status) dev->power.runtime_error = 0; out: - spin_unlock_irq(&dev->power.lock); + spin_unlock_irqrestore(&dev->power.lock, flags); if (notify_parent) pm_request_idle(parent); @@ -1527,14 +1555,17 @@ EXPORT_SYMBOL_GPL(pm_runtime_forbid); */ void pm_runtime_allow(struct device *dev) { + int ret; + spin_lock_irq(&dev->power.lock); if (dev->power.runtime_auto) goto out; dev->power.runtime_auto = true; - if (atomic_dec_and_test(&dev->power.usage_count)) + ret = rpm_drop_usage_count(dev); + if (ret == 0) rpm_idle(dev, RPM_AUTO | RPM_ASYNC); - else + else if (ret > 0) trace_rpm_usage_rcuidle(dev, RPM_AUTO | RPM_ASYNC); out: diff --git a/drivers/base/power/wakeup.c b/drivers/base/power/wakeup.c index a57d469676ca..11a4ffe91367 100644 --- a/drivers/base/power/wakeup.c +++ b/drivers/base/power/wakeup.c @@ -930,6 +930,7 @@ bool pm_wakeup_pending(void) return ret || atomic_read(&pm_abort_suspend) > 0; } +EXPORT_SYMBOL_GPL(pm_wakeup_pending); void pm_system_wakeup(void) { diff --git a/drivers/cpufreq/cppc_cpufreq.c b/drivers/cpufreq/cppc_cpufreq.c index 82d370ae6a4a..d092c9bb4ba3 100644 --- a/drivers/cpufreq/cppc_cpufreq.c +++ b/drivers/cpufreq/cppc_cpufreq.c @@ -389,6 +389,27 @@ static int cppc_cpufreq_set_target(struct cpufreq_policy *policy, return ret; } +static unsigned int cppc_cpufreq_fast_switch(struct cpufreq_policy *policy, + unsigned int target_freq) +{ + struct cppc_cpudata *cpu_data = policy->driver_data; + unsigned int cpu = policy->cpu; + u32 desired_perf; + int ret; + + desired_perf = cppc_cpufreq_khz_to_perf(cpu_data, target_freq); + cpu_data->perf_ctrls.desired_perf = desired_perf; + ret = cppc_set_perf(cpu, &cpu_data->perf_ctrls); + + if (ret) { + pr_debug("Failed to set target on CPU:%d. ret:%d\n", + cpu, ret); + return 0; + } + + return target_freq; +} + static int cppc_verify_policy(struct cpufreq_policy_data *policy) { cpufreq_verify_within_cpu_limits(policy); @@ -420,12 +441,197 @@ static unsigned int cppc_cpufreq_get_transition_delay_us(unsigned int cpu) return cppc_get_transition_latency(cpu) / NSEC_PER_USEC; } +static DEFINE_PER_CPU(unsigned int, efficiency_class); +static void cppc_cpufreq_register_em(struct cpufreq_policy *policy); + +/* Create an artificial performance state every CPPC_EM_CAP_STEP capacity unit. */ +#define CPPC_EM_CAP_STEP (20) +/* Increase the cost value by CPPC_EM_COST_STEP every performance state. */ +#define CPPC_EM_COST_STEP (1) +/* Add a cost gap correspnding to the energy of 4 CPUs. */ +#define CPPC_EM_COST_GAP (4 * SCHED_CAPACITY_SCALE * CPPC_EM_COST_STEP \ + / CPPC_EM_CAP_STEP) + +static unsigned int get_perf_level_count(struct cpufreq_policy *policy) +{ + struct cppc_perf_caps *perf_caps; + unsigned int min_cap, max_cap; + struct cppc_cpudata *cpu_data; + int cpu = policy->cpu; + + cpu_data = policy->driver_data; + perf_caps = &cpu_data->perf_caps; + max_cap = arch_scale_cpu_capacity(cpu); + min_cap = div_u64(max_cap * perf_caps->lowest_perf, perf_caps->highest_perf); + if ((min_cap == 0) || (max_cap < min_cap)) + return 0; + return 1 + max_cap / CPPC_EM_CAP_STEP - min_cap / CPPC_EM_CAP_STEP; +} + +/* + * The cost is defined as: + * cost = power * max_frequency / frequency + */ +static inline unsigned long compute_cost(int cpu, int step) +{ + return CPPC_EM_COST_GAP * per_cpu(efficiency_class, cpu) + + step * CPPC_EM_COST_STEP; +} + +static int cppc_get_cpu_power(struct device *cpu_dev, + unsigned long *power, unsigned long *KHz) +{ + unsigned long perf_step, perf_prev, perf, perf_check; + unsigned int min_step, max_step, step, step_check; + unsigned long prev_freq = *KHz; + unsigned int min_cap, max_cap; + struct cpufreq_policy *policy; + + struct cppc_perf_caps *perf_caps; + struct cppc_cpudata *cpu_data; + + policy = cpufreq_cpu_get_raw(cpu_dev->id); + cpu_data = policy->driver_data; + perf_caps = &cpu_data->perf_caps; + max_cap = arch_scale_cpu_capacity(cpu_dev->id); + min_cap = div_u64(max_cap * perf_caps->lowest_perf, + perf_caps->highest_perf); + + perf_step = CPPC_EM_CAP_STEP * perf_caps->highest_perf / max_cap; + min_step = min_cap / CPPC_EM_CAP_STEP; + max_step = max_cap / CPPC_EM_CAP_STEP; + + perf_prev = cppc_cpufreq_khz_to_perf(cpu_data, *KHz); + step = perf_prev / perf_step; + + if (step > max_step) + return -EINVAL; + + if (min_step == max_step) { + step = max_step; + perf = perf_caps->highest_perf; + } else if (step < min_step) { + step = min_step; + perf = perf_caps->lowest_perf; + } else { + step++; + if (step == max_step) + perf = perf_caps->highest_perf; + else + perf = step * perf_step; + } + + *KHz = cppc_cpufreq_perf_to_khz(cpu_data, perf); + perf_check = cppc_cpufreq_khz_to_perf(cpu_data, *KHz); + step_check = perf_check / perf_step; + + /* + * To avoid bad integer approximation, check that new frequency value + * increased and that the new frequency will be converted to the + * desired step value. + */ + while ((*KHz == prev_freq) || (step_check != step)) { + perf++; + *KHz = cppc_cpufreq_perf_to_khz(cpu_data, perf); + perf_check = cppc_cpufreq_khz_to_perf(cpu_data, *KHz); + step_check = perf_check / perf_step; + } + + /* + * With an artificial EM, only the cost value is used. Still the power + * is populated such as 0 < power < EM_MAX_POWER. This allows to add + * more sense to the artificial performance states. + */ + *power = compute_cost(cpu_dev->id, step); + + return 0; +} + +static int cppc_get_cpu_cost(struct device *cpu_dev, unsigned long KHz, + unsigned long *cost) +{ + unsigned long perf_step, perf_prev; + struct cppc_perf_caps *perf_caps; + struct cpufreq_policy *policy; + struct cppc_cpudata *cpu_data; + unsigned int max_cap; + int step; + + policy = cpufreq_cpu_get_raw(cpu_dev->id); + cpu_data = policy->driver_data; + perf_caps = &cpu_data->perf_caps; + max_cap = arch_scale_cpu_capacity(cpu_dev->id); + + perf_prev = cppc_cpufreq_khz_to_perf(cpu_data, KHz); + perf_step = CPPC_EM_CAP_STEP * perf_caps->highest_perf / max_cap; + step = perf_prev / perf_step; + + *cost = compute_cost(cpu_dev->id, step); + + return 0; +} + +static int populate_efficiency_class(void) +{ + struct acpi_madt_generic_interrupt *gicc; + DECLARE_BITMAP(used_classes, 256) = {}; + int class, cpu, index; + + for_each_possible_cpu(cpu) { + gicc = acpi_cpu_get_madt_gicc(cpu); + class = gicc->efficiency_class; + bitmap_set(used_classes, class, 1); + } + + if (bitmap_weight(used_classes, 256) <= 1) { + pr_debug("Efficiency classes are all equal (=%d). " + "No EM registered", class); + return -EINVAL; + } + + /* + * Squeeze efficiency class values on [0:#efficiency_class-1]. + * Values are per spec in [0:255]. + */ + index = 0; + for_each_set_bit(class, used_classes, 256) { + for_each_possible_cpu(cpu) { + gicc = acpi_cpu_get_madt_gicc(cpu); + if (gicc->efficiency_class == class) + per_cpu(efficiency_class, cpu) = index; + } + index++; + } + cppc_cpufreq_driver.register_em = cppc_cpufreq_register_em; + + return 0; +} + +static void cppc_cpufreq_register_em(struct cpufreq_policy *policy) +{ + struct cppc_cpudata *cpu_data; + struct em_data_callback em_cb = + EM_ADV_DATA_CB(cppc_get_cpu_power, cppc_get_cpu_cost); + + cpu_data = policy->driver_data; + em_dev_register_perf_domain(get_cpu_device(policy->cpu), + get_perf_level_count(policy), &em_cb, + cpu_data->shared_cpu_map, 0); +} + #else static unsigned int cppc_cpufreq_get_transition_delay_us(unsigned int cpu) { return cppc_get_transition_latency(cpu) / NSEC_PER_USEC; } +static int populate_efficiency_class(void) +{ + return 0; +} +static void cppc_cpufreq_register_em(struct cpufreq_policy *policy) +{ +} #endif @@ -536,6 +742,9 @@ static int cppc_cpufreq_cpu_init(struct cpufreq_policy *policy) goto out; } + policy->fast_switch_possible = cppc_allow_fast_switch(); + policy->dvfs_possible_from_any_cpu = true; + /* * If 'highest_perf' is greater than 'nominal_perf', we assume CPU Boost * is supported. @@ -681,6 +890,7 @@ static struct cpufreq_driver cppc_cpufreq_driver = { .verify = cppc_verify_policy, .target = cppc_cpufreq_set_target, .get = cppc_cpufreq_get_rate, + .fast_switch = cppc_cpufreq_fast_switch, .init = cppc_cpufreq_cpu_init, .exit = cppc_cpufreq_cpu_exit, .set_boost = cppc_cpufreq_set_boost, @@ -742,6 +952,7 @@ static int __init cppc_cpufreq_init(void) cppc_check_hisi_workaround(); cppc_freq_invariance_init(); + populate_efficiency_class(); ret = cpufreq_register_driver(&cppc_cpufreq_driver); if (ret) diff --git a/drivers/cpufreq/cpufreq.c b/drivers/cpufreq/cpufreq.c index 80f535cc8a75..2cad42774164 100644 --- a/drivers/cpufreq/cpufreq.c +++ b/drivers/cpufreq/cpufreq.c @@ -28,6 +28,7 @@ #include <linux/suspend.h> #include <linux/syscore_ops.h> #include <linux/tick.h> +#include <linux/units.h> #include <trace/events/power.h> static LIST_HEAD(cpufreq_policy_list); @@ -947,13 +948,14 @@ static ssize_t show(struct kobject *kobj, struct attribute *attr, char *buf) { struct cpufreq_policy *policy = to_policy(kobj); struct freq_attr *fattr = to_attr(attr); - ssize_t ret; + ssize_t ret = -EBUSY; if (!fattr->show) return -EIO; down_read(&policy->rwsem); - ret = fattr->show(policy, buf); + if (likely(!policy_is_inactive(policy))) + ret = fattr->show(policy, buf); up_read(&policy->rwsem); return ret; @@ -964,7 +966,7 @@ static ssize_t store(struct kobject *kobj, struct attribute *attr, { struct cpufreq_policy *policy = to_policy(kobj); struct freq_attr *fattr = to_attr(attr); - ssize_t ret = -EINVAL; + ssize_t ret = -EBUSY; if (!fattr->store) return -EIO; @@ -978,7 +980,8 @@ static ssize_t store(struct kobject *kobj, struct attribute *attr, if (cpu_online(policy->cpu)) { down_write(&policy->rwsem); - ret = fattr->store(policy, buf, count); + if (likely(!policy_is_inactive(policy))) + ret = fattr->store(policy, buf, count); up_write(&policy->rwsem); } @@ -1019,11 +1022,12 @@ static void add_cpu_dev_symlink(struct cpufreq_policy *policy, unsigned int cpu, dev_err(dev, "cpufreq symlink creation failed\n"); } -static void remove_cpu_dev_symlink(struct cpufreq_policy *policy, +static void remove_cpu_dev_symlink(struct cpufreq_policy *policy, int cpu, struct device *dev) { dev_dbg(dev, "%s: Removing symlink\n", __func__); sysfs_remove_link(&dev->kobj, "cpufreq"); + cpumask_clear_cpu(cpu, policy->real_cpus); } static int cpufreq_add_dev_interface(struct cpufreq_policy *policy) @@ -1337,12 +1341,12 @@ static int cpufreq_online(unsigned int cpu) down_write(&policy->rwsem); policy->cpu = cpu; policy->governor = NULL; - up_write(&policy->rwsem); } else { new_policy = true; policy = cpufreq_policy_alloc(cpu); if (!policy) return -ENOMEM; + down_write(&policy->rwsem); } if (!new_policy && cpufreq_driver->online) { @@ -1382,7 +1386,6 @@ static int cpufreq_online(unsigned int cpu) cpumask_copy(policy->related_cpus, policy->cpus); } - down_write(&policy->rwsem); /* * affected cpus must always be the one, which are online. We aren't * managing offline cpus here. @@ -1531,9 +1534,9 @@ static int cpufreq_online(unsigned int cpu) out_destroy_policy: for_each_cpu(j, policy->real_cpus) - remove_cpu_dev_symlink(policy, get_cpu_device(j)); + remove_cpu_dev_symlink(policy, j, get_cpu_device(j)); - up_write(&policy->rwsem); + cpumask_clear(policy->cpus); out_offline_policy: if (cpufreq_driver->offline) @@ -1544,6 +1547,8 @@ out_exit_policy: cpufreq_driver->exit(policy); out_free_policy: + up_write(&policy->rwsem); + cpufreq_policy_free(policy); return ret; } @@ -1575,47 +1580,36 @@ static int cpufreq_add_dev(struct device *dev, struct subsys_interface *sif) return 0; } -static int cpufreq_offline(unsigned int cpu) +static void __cpufreq_offline(unsigned int cpu, struct cpufreq_policy *policy) { - struct cpufreq_policy *policy; int ret; - pr_debug("%s: unregistering CPU %u\n", __func__, cpu); - - policy = cpufreq_cpu_get_raw(cpu); - if (!policy) { - pr_debug("%s: No cpu_data found\n", __func__); - return 0; - } - - down_write(&policy->rwsem); if (has_target()) cpufreq_stop_governor(policy); cpumask_clear_cpu(cpu, policy->cpus); - if (policy_is_inactive(policy)) { - if (has_target()) - strncpy(policy->last_governor, policy->governor->name, - CPUFREQ_NAME_LEN); - else - policy->last_policy = policy->policy; - } else if (cpu == policy->cpu) { - /* Nominate new CPU */ - policy->cpu = cpumask_any(policy->cpus); - } - - /* Start governor again for active policy */ if (!policy_is_inactive(policy)) { + /* Nominate a new CPU if necessary. */ + if (cpu == policy->cpu) + policy->cpu = cpumask_any(policy->cpus); + + /* Start the governor again for the active policy. */ if (has_target()) { ret = cpufreq_start_governor(policy); if (ret) pr_err("%s: Failed to start governor\n", __func__); } - goto unlock; + return; } + if (has_target()) + strncpy(policy->last_governor, policy->governor->name, + CPUFREQ_NAME_LEN); + else + policy->last_policy = policy->policy; + if (cpufreq_thermal_control_enabled(cpufreq_driver)) { cpufreq_cooling_unregister(policy->cdev); policy->cdev = NULL; @@ -1634,8 +1628,24 @@ static int cpufreq_offline(unsigned int cpu) cpufreq_driver->exit(policy); policy->freq_table = NULL; } +} + +static int cpufreq_offline(unsigned int cpu) +{ + struct cpufreq_policy *policy; + + pr_debug("%s: unregistering CPU %u\n", __func__, cpu); + + policy = cpufreq_cpu_get_raw(cpu); + if (!policy) { + pr_debug("%s: No cpu_data found\n", __func__); + return 0; + } + + down_write(&policy->rwsem); + + __cpufreq_offline(cpu, policy); -unlock: up_write(&policy->rwsem); return 0; } @@ -1653,19 +1663,25 @@ static void cpufreq_remove_dev(struct device *dev, struct subsys_interface *sif) if (!policy) return; - if (cpu_online(cpu)) - cpufreq_offline(cpu); + down_write(&policy->rwsem); - cpumask_clear_cpu(cpu, policy->real_cpus); - remove_cpu_dev_symlink(policy, dev); + if (cpu_online(cpu)) + __cpufreq_offline(cpu, policy); - if (cpumask_empty(policy->real_cpus)) { - /* We did light-weight exit earlier, do full tear down now */ - if (cpufreq_driver->offline) - cpufreq_driver->exit(policy); + remove_cpu_dev_symlink(policy, cpu, dev); - cpufreq_policy_free(policy); + if (!cpumask_empty(policy->real_cpus)) { + up_write(&policy->rwsem); + return; } + + /* We did light-weight exit earlier, do full tear down now */ + if (cpufreq_driver->offline) + cpufreq_driver->exit(policy); + + up_write(&policy->rwsem); + + cpufreq_policy_free(policy); } /** @@ -1707,6 +1723,16 @@ static unsigned int cpufreq_verify_current_freq(struct cpufreq_policy *policy, b return new_freq; if (policy->cur != new_freq) { + /* + * For some platforms, the frequency returned by hardware may be + * slightly different from what is provided in the frequency + * table, for example hardware may return 499 MHz instead of 500 + * MHz. In such cases it is better to avoid getting into + * unnecessary frequency updates. + */ + if (abs(policy->cur - new_freq) < HZ_PER_MHZ) + return policy->cur; + cpufreq_out_of_sync(policy, new_freq); if (update) schedule_work(&policy->update); diff --git a/drivers/cpufreq/cpufreq_governor.c b/drivers/cpufreq/cpufreq_governor.c index 0d42cf8b88d8..85da677c43d6 100644 --- a/drivers/cpufreq/cpufreq_governor.c +++ b/drivers/cpufreq/cpufreq_governor.c @@ -388,6 +388,15 @@ static void free_policy_dbs_info(struct policy_dbs_info *policy_dbs, gov->free(policy_dbs); } +static void cpufreq_dbs_data_release(struct kobject *kobj) +{ + struct dbs_data *dbs_data = to_dbs_data(to_gov_attr_set(kobj)); + struct dbs_governor *gov = dbs_data->gov; + + gov->exit(dbs_data); + kfree(dbs_data); +} + int cpufreq_dbs_governor_init(struct cpufreq_policy *policy) { struct dbs_governor *gov = dbs_governor_of(policy); @@ -425,6 +434,7 @@ int cpufreq_dbs_governor_init(struct cpufreq_policy *policy) goto free_policy_dbs_info; } + dbs_data->gov = gov; gov_attr_set_init(&dbs_data->attr_set, &policy_dbs->list); ret = gov->init(dbs_data); @@ -447,6 +457,7 @@ int cpufreq_dbs_governor_init(struct cpufreq_policy *policy) policy->governor_data = policy_dbs; gov->kobj_type.sysfs_ops = &governor_sysfs_ops; + gov->kobj_type.release = cpufreq_dbs_data_release; ret = kobject_init_and_add(&dbs_data->attr_set.kobj, &gov->kobj_type, get_governor_parent_kobj(policy), "%s", gov->gov.name); @@ -488,13 +499,8 @@ void cpufreq_dbs_governor_exit(struct cpufreq_policy *policy) policy->governor_data = NULL; - if (!count) { - if (!have_governor_per_policy()) - gov->gdbs_data = NULL; - - gov->exit(dbs_data); - kfree(dbs_data); - } + if (!count && !have_governor_per_policy()) + gov->gdbs_data = NULL; free_policy_dbs_info(policy_dbs, gov); diff --git a/drivers/cpufreq/cpufreq_governor.h b/drivers/cpufreq/cpufreq_governor.h index a5a0bc3cc23e..168c23fd7fca 100644 --- a/drivers/cpufreq/cpufreq_governor.h +++ b/drivers/cpufreq/cpufreq_governor.h @@ -37,6 +37,7 @@ enum {OD_NORMAL_SAMPLE, OD_SUB_SAMPLE}; /* Governor demand based switching data (per-policy or global). */ struct dbs_data { struct gov_attr_set attr_set; + struct dbs_governor *gov; void *tuners; unsigned int ignore_nice_load; unsigned int sampling_rate; diff --git a/drivers/cpufreq/intel_pstate.c b/drivers/cpufreq/intel_pstate.c index 846bb3a78788..57cdb3679885 100644 --- a/drivers/cpufreq/intel_pstate.c +++ b/drivers/cpufreq/intel_pstate.c @@ -1322,6 +1322,7 @@ static ssize_t store_no_turbo(struct kobject *a, struct kobj_attribute *b, mutex_unlock(&intel_pstate_limits_lock); intel_pstate_update_policies(); + arch_set_max_freq_ratio(global.no_turbo); mutex_unlock(&intel_pstate_driver_lock); @@ -2424,6 +2425,7 @@ static const struct x86_cpu_id intel_pstate_cpu_oob_ids[] __initconst = { X86_MATCH(BROADWELL_X, core_funcs), X86_MATCH(SKYLAKE_X, core_funcs), X86_MATCH(ICELAKE_X, core_funcs), + X86_MATCH(SAPPHIRERAPIDS_X, core_funcs), {} }; diff --git a/drivers/cpufreq/mediatek-cpufreq-hw.c b/drivers/cpufreq/mediatek-cpufreq-hw.c index 0a94c56ddad2..813cccbfe934 100644 --- a/drivers/cpufreq/mediatek-cpufreq-hw.c +++ b/drivers/cpufreq/mediatek-cpufreq-hw.c @@ -51,8 +51,8 @@ static const u16 cpufreq_mtk_offsets[REG_ARRAY_SIZE] = { }; static int __maybe_unused -mtk_cpufreq_get_cpu_power(unsigned long *mW, - unsigned long *KHz, struct device *cpu_dev) +mtk_cpufreq_get_cpu_power(struct device *cpu_dev, unsigned long *mW, + unsigned long *KHz) { struct mtk_cpufreq_data *data; struct cpufreq_policy *policy; diff --git a/drivers/cpufreq/pasemi-cpufreq.c b/drivers/cpufreq/pasemi-cpufreq.c index 815645170c4d..039a66bbe1be 100644 --- a/drivers/cpufreq/pasemi-cpufreq.c +++ b/drivers/cpufreq/pasemi-cpufreq.c @@ -18,7 +18,6 @@ #include <asm/hw_irq.h> #include <asm/io.h> -#include <asm/prom.h> #include <asm/time.h> #include <asm/smp.h> diff --git a/drivers/cpufreq/pmac32-cpufreq.c b/drivers/cpufreq/pmac32-cpufreq.c index 4f20c6a9108d..20f64a8b0a35 100644 --- a/drivers/cpufreq/pmac32-cpufreq.c +++ b/drivers/cpufreq/pmac32-cpufreq.c @@ -24,7 +24,7 @@ #include <linux/device.h> #include <linux/hardirq.h> #include <linux/of_device.h> -#include <asm/prom.h> + #include <asm/machdep.h> #include <asm/irq.h> #include <asm/pmac_feature.h> diff --git a/drivers/cpufreq/pmac64-cpufreq.c b/drivers/cpufreq/pmac64-cpufreq.c index d7542a106e6b..ba9c31d98bd6 100644 --- a/drivers/cpufreq/pmac64-cpufreq.c +++ b/drivers/cpufreq/pmac64-cpufreq.c @@ -22,7 +22,7 @@ #include <linux/completion.h> #include <linux/mutex.h> #include <linux/of_device.h> -#include <asm/prom.h> + #include <asm/machdep.h> #include <asm/irq.h> #include <asm/sections.h> diff --git a/drivers/cpufreq/ppc_cbe_cpufreq.c b/drivers/cpufreq/ppc_cbe_cpufreq.c index c58abb4cca3a..e3313ce63b38 100644 --- a/drivers/cpufreq/ppc_cbe_cpufreq.c +++ b/drivers/cpufreq/ppc_cbe_cpufreq.c @@ -12,7 +12,6 @@ #include <linux/of_platform.h> #include <asm/machdep.h> -#include <asm/prom.h> #include <asm/cell-regs.h> #include "ppc_cbe_cpufreq.h" diff --git a/drivers/cpufreq/ppc_cbe_cpufreq_pmi.c b/drivers/cpufreq/ppc_cbe_cpufreq_pmi.c index 037fe23bc6ed..4fba3637b115 100644 --- a/drivers/cpufreq/ppc_cbe_cpufreq_pmi.c +++ b/drivers/cpufreq/ppc_cbe_cpufreq_pmi.c @@ -13,9 +13,9 @@ #include <linux/init.h> #include <linux/of_platform.h> #include <linux/pm_qos.h> +#include <linux/slab.h> #include <asm/processor.h> -#include <asm/prom.h> #include <asm/pmi.h> #include <asm/cell-regs.h> diff --git a/drivers/cpufreq/scmi-cpufreq.c b/drivers/cpufreq/scmi-cpufreq.c index 919fa6e3f462..6d2a4cf46db7 100644 --- a/drivers/cpufreq/scmi-cpufreq.c +++ b/drivers/cpufreq/scmi-cpufreq.c @@ -96,8 +96,8 @@ scmi_get_sharing_cpus(struct device *cpu_dev, struct cpumask *cpumask) } static int __maybe_unused -scmi_get_cpu_power(unsigned long *power, unsigned long *KHz, - struct device *cpu_dev) +scmi_get_cpu_power(struct device *cpu_dev, unsigned long *power, + unsigned long *KHz) { unsigned long Hz; int ret, domain; diff --git a/drivers/cpuidle/cpuidle-psci-domain.c b/drivers/cpuidle/cpuidle-psci-domain.c index 755bbdfc5b82..3db4fca1172b 100644 --- a/drivers/cpuidle/cpuidle-psci-domain.c +++ b/drivers/cpuidle/cpuidle-psci-domain.c @@ -52,7 +52,7 @@ static int psci_pd_init(struct device_node *np, bool use_osi) struct generic_pm_domain *pd; struct psci_pd_provider *pd_provider; struct dev_power_governor *pd_gov; - int ret = -ENOMEM, state_count = 0; + int ret = -ENOMEM; pd = dt_idle_pd_alloc(np, psci_dt_parse_state_node); if (!pd) @@ -71,7 +71,7 @@ static int psci_pd_init(struct device_node *np, bool use_osi) pd->flags |= GENPD_FLAG_ALWAYS_ON; /* Use governor for CPU PM domains if it has some states to manage. */ - pd_gov = state_count > 0 ? &pm_domain_cpu_gov : NULL; + pd_gov = pd->states ? &pm_domain_cpu_gov : NULL; ret = pm_genpd_init(pd, pd_gov, false); if (ret) diff --git a/drivers/cpuidle/cpuidle-psci.c b/drivers/cpuidle/cpuidle-psci.c index b51b5df08450..540105ca0781 100644 --- a/drivers/cpuidle/cpuidle-psci.c +++ b/drivers/cpuidle/cpuidle-psci.c @@ -23,6 +23,7 @@ #include <linux/pm_runtime.h> #include <linux/slab.h> #include <linux/string.h> +#include <linux/syscore_ops.h> #include <asm/cpuidle.h> @@ -131,6 +132,49 @@ static int psci_idle_cpuhp_down(unsigned int cpu) return 0; } +static void psci_idle_syscore_switch(bool suspend) +{ + bool cleared = false; + struct device *dev; + int cpu; + + for_each_possible_cpu(cpu) { + dev = per_cpu_ptr(&psci_cpuidle_data, cpu)->dev; + + if (dev && suspend) { + dev_pm_genpd_suspend(dev); + } else if (dev) { + dev_pm_genpd_resume(dev); + + /* Account for userspace having offlined a CPU. */ + if (pm_runtime_status_suspended(dev)) + pm_runtime_set_active(dev); + + /* Clear domain state to re-start fresh. */ + if (!cleared) { + psci_set_domain_state(0); + cleared = true; + } + } + } +} + +static int psci_idle_syscore_suspend(void) +{ + psci_idle_syscore_switch(true); + return 0; +} + +static void psci_idle_syscore_resume(void) +{ + psci_idle_syscore_switch(false); +} + +static struct syscore_ops psci_idle_syscore_ops = { + .suspend = psci_idle_syscore_suspend, + .resume = psci_idle_syscore_resume, +}; + static void psci_idle_init_cpuhp(void) { int err; @@ -138,6 +182,8 @@ static void psci_idle_init_cpuhp(void) if (!psci_cpuidle_use_cpuhp) return; + register_syscore_ops(&psci_idle_syscore_ops); + err = cpuhp_setup_state_nocalls(CPUHP_AP_CPU_PM_STARTING, "cpuidle/psci:online", psci_idle_cpuhp_up, diff --git a/drivers/cpuidle/cpuidle-riscv-sbi.c b/drivers/cpuidle/cpuidle-riscv-sbi.c index 5c852e671992..1151e5e2ba82 100644 --- a/drivers/cpuidle/cpuidle-riscv-sbi.c +++ b/drivers/cpuidle/cpuidle-riscv-sbi.c @@ -414,7 +414,7 @@ static int sbi_pd_init(struct device_node *np) struct generic_pm_domain *pd; struct sbi_pd_provider *pd_provider; struct dev_power_governor *pd_gov; - int ret = -ENOMEM, state_count = 0; + int ret = -ENOMEM; pd = dt_idle_pd_alloc(np, sbi_dt_parse_state_node); if (!pd) @@ -433,7 +433,7 @@ static int sbi_pd_init(struct device_node *np) pd->flags |= GENPD_FLAG_ALWAYS_ON; /* Use governor for CPU PM domains if it has some states to manage. */ - pd_gov = state_count > 0 ? &pm_domain_cpu_gov : NULL; + pd_gov = pd->states ? &pm_domain_cpu_gov : NULL; ret = pm_genpd_init(pd, pd_gov, false); if (ret) diff --git a/drivers/devfreq/devfreq.c b/drivers/devfreq/devfreq.c index a525a609dfc6..01474daf4548 100644 --- a/drivers/devfreq/devfreq.c +++ b/drivers/devfreq/devfreq.c @@ -112,16 +112,16 @@ static unsigned long find_available_max_freq(struct devfreq *devfreq) } /** - * get_freq_range() - Get the current freq range + * devfreq_get_freq_range() - Get the current freq range * @devfreq: the devfreq instance * @min_freq: the min frequency * @max_freq: the max frequency * * This takes into consideration all constraints. */ -static void get_freq_range(struct devfreq *devfreq, - unsigned long *min_freq, - unsigned long *max_freq) +void devfreq_get_freq_range(struct devfreq *devfreq, + unsigned long *min_freq, + unsigned long *max_freq) { unsigned long *freq_table = devfreq->profile->freq_table; s32 qos_min_freq, qos_max_freq; @@ -158,6 +158,7 @@ static void get_freq_range(struct devfreq *devfreq, if (*min_freq > *max_freq) *min_freq = *max_freq; } +EXPORT_SYMBOL(devfreq_get_freq_range); /** * devfreq_get_freq_level() - Lookup freq_table for the frequency @@ -418,7 +419,7 @@ int devfreq_update_target(struct devfreq *devfreq, unsigned long freq) err = devfreq->governor->get_target_freq(devfreq, &freq); if (err) return err; - get_freq_range(devfreq, &min_freq, &max_freq); + devfreq_get_freq_range(devfreq, &min_freq, &max_freq); if (freq < min_freq) { freq = min_freq; @@ -785,6 +786,7 @@ struct devfreq *devfreq_add_device(struct device *dev, { struct devfreq *devfreq; struct devfreq_governor *governor; + unsigned long min_freq, max_freq; int err = 0; if (!dev || !profile || !governor_name) { @@ -849,6 +851,8 @@ struct devfreq *devfreq_add_device(struct device *dev, goto err_dev; } + devfreq_get_freq_range(devfreq, &min_freq, &max_freq); + devfreq->suspend_freq = dev_pm_opp_get_suspend_opp_freq(dev); devfreq->opp_table = dev_pm_opp_get_opp_table(dev); if (IS_ERR(devfreq->opp_table)) @@ -1587,7 +1591,7 @@ static ssize_t min_freq_show(struct device *dev, struct device_attribute *attr, unsigned long min_freq, max_freq; mutex_lock(&df->lock); - get_freq_range(df, &min_freq, &max_freq); + devfreq_get_freq_range(df, &min_freq, &max_freq); mutex_unlock(&df->lock); return sprintf(buf, "%lu\n", min_freq); @@ -1641,7 +1645,7 @@ static ssize_t max_freq_show(struct device *dev, struct device_attribute *attr, unsigned long min_freq, max_freq; mutex_lock(&df->lock); - get_freq_range(df, &min_freq, &max_freq); + devfreq_get_freq_range(df, &min_freq, &max_freq); mutex_unlock(&df->lock); return sprintf(buf, "%lu\n", max_freq); @@ -1955,7 +1959,7 @@ static int devfreq_summary_show(struct seq_file *s, void *data) mutex_lock(&devfreq->lock); cur_freq = devfreq->previous_freq; - get_freq_range(devfreq, &min_freq, &max_freq); + devfreq_get_freq_range(devfreq, &min_freq, &max_freq); timer = devfreq->profile->timer; if (IS_SUPPORTED_ATTR(devfreq->governor->attrs, POLLING_INTERVAL)) diff --git a/drivers/devfreq/governor.h b/drivers/devfreq/governor.h index 002a7d67e39d..0adfebc0467a 100644 --- a/drivers/devfreq/governor.h +++ b/drivers/devfreq/governor.h @@ -48,6 +48,31 @@ #define DEVFREQ_GOV_ATTR_TIMER BIT(1) /** + * struct devfreq_cpu_data - Hold the per-cpu data + * @node: list node + * @dev: reference to cpu device. + * @first_cpu: the cpumask of the first cpu of a policy. + * @opp_table: reference to cpu opp table. + * @cur_freq: the current frequency of the cpu. + * @min_freq: the min frequency of the cpu. + * @max_freq: the max frequency of the cpu. + * + * This structure stores the required cpu_data of a cpu. + * This is auto-populated by the governor. + */ +struct devfreq_cpu_data { + struct list_head node; + + struct device *dev; + unsigned int first_cpu; + + struct opp_table *opp_table; + unsigned int cur_freq; + unsigned int min_freq; + unsigned int max_freq; +}; + +/** * struct devfreq_governor - Devfreq policy governor * @node: list node - contains registered devfreq governors * @name: Governor's name @@ -89,6 +114,8 @@ int devm_devfreq_add_governor(struct device *dev, int devfreq_update_status(struct devfreq *devfreq, unsigned long freq); int devfreq_update_target(struct devfreq *devfreq, unsigned long freq); +void devfreq_get_freq_range(struct devfreq *devfreq, unsigned long *min_freq, + unsigned long *max_freq); static inline int devfreq_update_stats(struct devfreq *df) { diff --git a/drivers/devfreq/governor_passive.c b/drivers/devfreq/governor_passive.c index fc09324a03e0..72c67979ebe1 100644 --- a/drivers/devfreq/governor_passive.c +++ b/drivers/devfreq/governor_passive.c @@ -1,4 +1,4 @@ -// SPDX-License-Identifier: GPL-2.0-only + // SPDX-License-Identifier: GPL-2.0-only /* * linux/drivers/devfreq/governor_passive.c * @@ -8,76 +8,129 @@ */ #include <linux/module.h> +#include <linux/cpu.h> +#include <linux/cpufreq.h> +#include <linux/cpumask.h> +#include <linux/slab.h> #include <linux/device.h> #include <linux/devfreq.h> #include "governor.h" -static int devfreq_passive_get_target_freq(struct devfreq *devfreq, - unsigned long *freq) +#define HZ_PER_KHZ 1000 + +static struct devfreq_cpu_data * +get_parent_cpu_data(struct devfreq_passive_data *p_data, + struct cpufreq_policy *policy) { - struct devfreq_passive_data *p_data - = (struct devfreq_passive_data *)devfreq->data; - struct devfreq *parent_devfreq = (struct devfreq *)p_data->parent; - unsigned long child_freq = ULONG_MAX; - struct dev_pm_opp *opp, *p_opp; - int i, count; + struct devfreq_cpu_data *parent_cpu_data; - /* - * If the devfreq device with passive governor has the specific method - * to determine the next frequency, should use the get_target_freq() - * of struct devfreq_passive_data. - */ - if (p_data->get_target_freq) - return p_data->get_target_freq(devfreq, freq); + if (!p_data || !policy) + return NULL; - /* - * If the parent and passive devfreq device uses the OPP table, - * get the next frequency by using the OPP table. - */ + list_for_each_entry(parent_cpu_data, &p_data->cpu_data_list, node) + if (parent_cpu_data->first_cpu == cpumask_first(policy->related_cpus)) + return parent_cpu_data; - /* - * - parent devfreq device uses the governors except for passive. - * - passive devfreq device uses the passive governor. - * - * Each devfreq has the OPP table. After deciding the new frequency - * from the governor of parent devfreq device, the passive governor - * need to get the index of new frequency on OPP table of parent - * device. And then the index is used for getting the suitable - * new frequency for passive devfreq device. - */ - if (!devfreq->profile || !devfreq->profile->freq_table - || devfreq->profile->max_state <= 0) - return -EINVAL; + return NULL; +} - /* - * The passive governor have to get the correct frequency from OPP - * list of parent device. Because in this case, *freq is temporary - * value which is decided by ondemand governor. - */ - if (devfreq->opp_table && parent_devfreq->opp_table) { - p_opp = devfreq_recommended_opp(parent_devfreq->dev.parent, - freq, 0); - if (IS_ERR(p_opp)) - return PTR_ERR(p_opp); +static unsigned long get_target_freq_by_required_opp(struct device *p_dev, + struct opp_table *p_opp_table, + struct opp_table *opp_table, + unsigned long *freq) +{ + struct dev_pm_opp *opp = NULL, *p_opp = NULL; + unsigned long target_freq; - opp = dev_pm_opp_xlate_required_opp(parent_devfreq->opp_table, - devfreq->opp_table, p_opp); - dev_pm_opp_put(p_opp); + if (!p_dev || !p_opp_table || !opp_table || !freq) + return 0; - if (IS_ERR(opp)) - goto no_required_opp; + p_opp = devfreq_recommended_opp(p_dev, freq, 0); + if (IS_ERR(p_opp)) + return 0; - *freq = dev_pm_opp_get_freq(opp); - dev_pm_opp_put(opp); + opp = dev_pm_opp_xlate_required_opp(p_opp_table, opp_table, p_opp); + dev_pm_opp_put(p_opp); + if (IS_ERR(opp)) return 0; + + target_freq = dev_pm_opp_get_freq(opp); + dev_pm_opp_put(opp); + + return target_freq; +} + +static int get_target_freq_with_cpufreq(struct devfreq *devfreq, + unsigned long *target_freq) +{ + struct devfreq_passive_data *p_data = + (struct devfreq_passive_data *)devfreq->data; + struct devfreq_cpu_data *parent_cpu_data; + struct cpufreq_policy *policy; + unsigned long cpu, cpu_cur, cpu_min, cpu_max, cpu_percent; + unsigned long dev_min, dev_max; + unsigned long freq = 0; + int ret = 0; + + for_each_online_cpu(cpu) { + policy = cpufreq_cpu_get(cpu); + if (!policy) { + ret = -EINVAL; + continue; + } + + parent_cpu_data = get_parent_cpu_data(p_data, policy); + if (!parent_cpu_data) { + cpufreq_cpu_put(policy); + continue; + } + + /* Get target freq via required opps */ + cpu_cur = parent_cpu_data->cur_freq * HZ_PER_KHZ; + freq = get_target_freq_by_required_opp(parent_cpu_data->dev, + parent_cpu_data->opp_table, + devfreq->opp_table, &cpu_cur); + if (freq) { + *target_freq = max(freq, *target_freq); + cpufreq_cpu_put(policy); + continue; + } + + /* Use interpolation if required opps is not available */ + devfreq_get_freq_range(devfreq, &dev_min, &dev_max); + + cpu_min = parent_cpu_data->min_freq; + cpu_max = parent_cpu_data->max_freq; + cpu_cur = parent_cpu_data->cur_freq; + + cpu_percent = ((cpu_cur - cpu_min) * 100) / (cpu_max - cpu_min); + freq = dev_min + mult_frac(dev_max - dev_min, cpu_percent, 100); + + *target_freq = max(freq, *target_freq); + cpufreq_cpu_put(policy); } -no_required_opp: - /* - * Get the OPP table's index of decided frequency by governor - * of parent device. - */ + return ret; +} + +static int get_target_freq_with_devfreq(struct devfreq *devfreq, + unsigned long *freq) +{ + struct devfreq_passive_data *p_data + = (struct devfreq_passive_data *)devfreq->data; + struct devfreq *parent_devfreq = (struct devfreq *)p_data->parent; + unsigned long child_freq = ULONG_MAX; + int i, count; + + /* Get target freq via required opps */ + child_freq = get_target_freq_by_required_opp(parent_devfreq->dev.parent, + parent_devfreq->opp_table, + devfreq->opp_table, freq); + if (child_freq) + goto out; + + /* Use interpolation if required opps is not available */ for (i = 0; i < parent_devfreq->profile->max_state; i++) if (parent_devfreq->profile->freq_table[i] == *freq) break; @@ -85,7 +138,6 @@ no_required_opp: if (i == parent_devfreq->profile->max_state) return -EINVAL; - /* Get the suitable frequency by using index of parent device. */ if (i < devfreq->profile->max_state) { child_freq = devfreq->profile->freq_table[i]; } else { @@ -93,12 +145,202 @@ no_required_opp: child_freq = devfreq->profile->freq_table[count - 1]; } - /* Return the suitable frequency for passive device. */ +out: *freq = child_freq; return 0; } +static int devfreq_passive_get_target_freq(struct devfreq *devfreq, + unsigned long *freq) +{ + struct devfreq_passive_data *p_data = + (struct devfreq_passive_data *)devfreq->data; + int ret; + + if (!p_data) + return -EINVAL; + + /* + * If the devfreq device with passive governor has the specific method + * to determine the next frequency, should use the get_target_freq() + * of struct devfreq_passive_data. + */ + if (p_data->get_target_freq) + return p_data->get_target_freq(devfreq, freq); + + switch (p_data->parent_type) { + case DEVFREQ_PARENT_DEV: + ret = get_target_freq_with_devfreq(devfreq, freq); + break; + case CPUFREQ_PARENT_DEV: + ret = get_target_freq_with_cpufreq(devfreq, freq); + break; + default: + ret = -EINVAL; + dev_err(&devfreq->dev, "Invalid parent type\n"); + break; + } + + return ret; +} + +static int cpufreq_passive_notifier_call(struct notifier_block *nb, + unsigned long event, void *ptr) +{ + struct devfreq_passive_data *p_data = + container_of(nb, struct devfreq_passive_data, nb); + struct devfreq *devfreq = (struct devfreq *)p_data->this; + struct devfreq_cpu_data *parent_cpu_data; + struct cpufreq_freqs *freqs = ptr; + unsigned int cur_freq; + int ret; + + if (event != CPUFREQ_POSTCHANGE || !freqs) + return 0; + + parent_cpu_data = get_parent_cpu_data(p_data, freqs->policy); + if (!parent_cpu_data || parent_cpu_data->cur_freq == freqs->new) + return 0; + + cur_freq = parent_cpu_data->cur_freq; + parent_cpu_data->cur_freq = freqs->new; + + mutex_lock(&devfreq->lock); + ret = devfreq_update_target(devfreq, freqs->new); + mutex_unlock(&devfreq->lock); + if (ret) { + parent_cpu_data->cur_freq = cur_freq; + dev_err(&devfreq->dev, "failed to update the frequency.\n"); + return ret; + } + + return 0; +} + +static int cpufreq_passive_unregister_notifier(struct devfreq *devfreq) +{ + struct devfreq_passive_data *p_data + = (struct devfreq_passive_data *)devfreq->data; + struct devfreq_cpu_data *parent_cpu_data; + int cpu, ret = 0; + + if (p_data->nb.notifier_call) { + ret = cpufreq_unregister_notifier(&p_data->nb, + CPUFREQ_TRANSITION_NOTIFIER); + if (ret < 0) + return ret; + } + + for_each_possible_cpu(cpu) { + struct cpufreq_policy *policy = cpufreq_cpu_get(cpu); + if (!policy) { + ret = -EINVAL; + continue; + } + + parent_cpu_data = get_parent_cpu_data(p_data, policy); + if (!parent_cpu_data) { + cpufreq_cpu_put(policy); + continue; + } + + list_del(&parent_cpu_data->node); + if (parent_cpu_data->opp_table) + dev_pm_opp_put_opp_table(parent_cpu_data->opp_table); + kfree(parent_cpu_data); + cpufreq_cpu_put(policy); + } + + return ret; +} + +static int cpufreq_passive_register_notifier(struct devfreq *devfreq) +{ + struct devfreq_passive_data *p_data + = (struct devfreq_passive_data *)devfreq->data; + struct device *dev = devfreq->dev.parent; + struct opp_table *opp_table = NULL; + struct devfreq_cpu_data *parent_cpu_data; + struct cpufreq_policy *policy; + struct device *cpu_dev; + unsigned int cpu; + int ret; + + p_data->cpu_data_list + = (struct list_head)LIST_HEAD_INIT(p_data->cpu_data_list); + + p_data->nb.notifier_call = cpufreq_passive_notifier_call; + ret = cpufreq_register_notifier(&p_data->nb, CPUFREQ_TRANSITION_NOTIFIER); + if (ret) { + dev_err(dev, "failed to register cpufreq notifier\n"); + p_data->nb.notifier_call = NULL; + goto err; + } + + for_each_possible_cpu(cpu) { + policy = cpufreq_cpu_get(cpu); + if (!policy) { + ret = -EPROBE_DEFER; + goto err; + } + + parent_cpu_data = get_parent_cpu_data(p_data, policy); + if (parent_cpu_data) { + cpufreq_cpu_put(policy); + continue; + } + + parent_cpu_data = kzalloc(sizeof(*parent_cpu_data), + GFP_KERNEL); + if (!parent_cpu_data) { + ret = -ENOMEM; + goto err_put_policy; + } + + cpu_dev = get_cpu_device(cpu); + if (!cpu_dev) { + dev_err(dev, "failed to get cpu device\n"); + ret = -ENODEV; + goto err_free_cpu_data; + } + + opp_table = dev_pm_opp_get_opp_table(cpu_dev); + if (IS_ERR(opp_table)) { + dev_err(dev, "failed to get opp_table of cpu%d\n", cpu); + ret = PTR_ERR(opp_table); + goto err_free_cpu_data; + } + + parent_cpu_data->dev = cpu_dev; + parent_cpu_data->opp_table = opp_table; + parent_cpu_data->first_cpu = cpumask_first(policy->related_cpus); + parent_cpu_data->cur_freq = policy->cur; + parent_cpu_data->min_freq = policy->cpuinfo.min_freq; + parent_cpu_data->max_freq = policy->cpuinfo.max_freq; + + list_add_tail(&parent_cpu_data->node, &p_data->cpu_data_list); + cpufreq_cpu_put(policy); + } + + mutex_lock(&devfreq->lock); + ret = devfreq_update_target(devfreq, 0L); + mutex_unlock(&devfreq->lock); + if (ret) + dev_err(dev, "failed to update the frequency\n"); + + return ret; + +err_free_cpu_data: + kfree(parent_cpu_data); +err_put_policy: + cpufreq_cpu_put(policy); +err: + WARN_ON(cpufreq_passive_unregister_notifier(devfreq)); + + return ret; +} + static int devfreq_passive_notifier_call(struct notifier_block *nb, unsigned long event, void *ptr) { @@ -131,30 +373,55 @@ static int devfreq_passive_notifier_call(struct notifier_block *nb, return NOTIFY_DONE; } -static int devfreq_passive_event_handler(struct devfreq *devfreq, - unsigned int event, void *data) +static int devfreq_passive_unregister_notifier(struct devfreq *devfreq) +{ + struct devfreq_passive_data *p_data + = (struct devfreq_passive_data *)devfreq->data; + struct devfreq *parent = (struct devfreq *)p_data->parent; + struct notifier_block *nb = &p_data->nb; + + return devfreq_unregister_notifier(parent, nb, DEVFREQ_TRANSITION_NOTIFIER); +} + +static int devfreq_passive_register_notifier(struct devfreq *devfreq) { struct devfreq_passive_data *p_data = (struct devfreq_passive_data *)devfreq->data; struct devfreq *parent = (struct devfreq *)p_data->parent; struct notifier_block *nb = &p_data->nb; - int ret = 0; if (!parent) return -EPROBE_DEFER; + nb->notifier_call = devfreq_passive_notifier_call; + return devfreq_register_notifier(parent, nb, DEVFREQ_TRANSITION_NOTIFIER); +} + +static int devfreq_passive_event_handler(struct devfreq *devfreq, + unsigned int event, void *data) +{ + struct devfreq_passive_data *p_data + = (struct devfreq_passive_data *)devfreq->data; + int ret = 0; + + if (!p_data) + return -EINVAL; + + if (!p_data->this) + p_data->this = devfreq; + switch (event) { case DEVFREQ_GOV_START: - if (!p_data->this) - p_data->this = devfreq; - - nb->notifier_call = devfreq_passive_notifier_call; - ret = devfreq_register_notifier(parent, nb, - DEVFREQ_TRANSITION_NOTIFIER); + if (p_data->parent_type == DEVFREQ_PARENT_DEV) + ret = devfreq_passive_register_notifier(devfreq); + else if (p_data->parent_type == CPUFREQ_PARENT_DEV) + ret = cpufreq_passive_register_notifier(devfreq); break; case DEVFREQ_GOV_STOP: - WARN_ON(devfreq_unregister_notifier(parent, nb, - DEVFREQ_TRANSITION_NOTIFIER)); + if (p_data->parent_type == DEVFREQ_PARENT_DEV) + WARN_ON(devfreq_passive_unregister_notifier(devfreq)); + else if (p_data->parent_type == CPUFREQ_PARENT_DEV) + WARN_ON(cpufreq_passive_unregister_notifier(devfreq)); break; default: break; diff --git a/drivers/devfreq/rk3399_dmc.c b/drivers/devfreq/rk3399_dmc.c index 293857ebfd75..daff40702615 100644 --- a/drivers/devfreq/rk3399_dmc.c +++ b/drivers/devfreq/rk3399_dmc.c @@ -5,6 +5,7 @@ */ #include <linux/arm-smccc.h> +#include <linux/bitfield.h> #include <linux/clk.h> #include <linux/delay.h> #include <linux/devfreq.h> @@ -20,55 +21,49 @@ #include <linux/rwsem.h> #include <linux/suspend.h> +#include <soc/rockchip/pm_domains.h> #include <soc/rockchip/rk3399_grf.h> #include <soc/rockchip/rockchip_sip.h> -struct dram_timing { - unsigned int ddr3_speed_bin; - unsigned int pd_idle; - unsigned int sr_idle; - unsigned int sr_mc_gate_idle; - unsigned int srpd_lite_idle; - unsigned int standby_idle; - unsigned int auto_pd_dis_freq; - unsigned int dram_dll_dis_freq; - unsigned int phy_dll_dis_freq; - unsigned int ddr3_odt_dis_freq; - unsigned int ddr3_drv; - unsigned int ddr3_odt; - unsigned int phy_ddr3_ca_drv; - unsigned int phy_ddr3_dq_drv; - unsigned int phy_ddr3_odt; - unsigned int lpddr3_odt_dis_freq; - unsigned int lpddr3_drv; - unsigned int lpddr3_odt; - unsigned int phy_lpddr3_ca_drv; - unsigned int phy_lpddr3_dq_drv; - unsigned int phy_lpddr3_odt; - unsigned int lpddr4_odt_dis_freq; - unsigned int lpddr4_drv; - unsigned int lpddr4_dq_odt; - unsigned int lpddr4_ca_odt; - unsigned int phy_lpddr4_ca_drv; - unsigned int phy_lpddr4_ck_cs_drv; - unsigned int phy_lpddr4_dq_drv; - unsigned int phy_lpddr4_odt; -}; +#define NS_TO_CYCLE(NS, MHz) (((NS) * (MHz)) / NSEC_PER_USEC) + +#define RK3399_SET_ODT_PD_0_SR_IDLE GENMASK(7, 0) +#define RK3399_SET_ODT_PD_0_SR_MC_GATE_IDLE GENMASK(15, 8) +#define RK3399_SET_ODT_PD_0_STANDBY_IDLE GENMASK(31, 16) + +#define RK3399_SET_ODT_PD_1_PD_IDLE GENMASK(11, 0) +#define RK3399_SET_ODT_PD_1_SRPD_LITE_IDLE GENMASK(27, 16) + +#define RK3399_SET_ODT_PD_2_ODT_ENABLE BIT(0) struct rk3399_dmcfreq { struct device *dev; struct devfreq *devfreq; + struct devfreq_dev_profile profile; struct devfreq_simple_ondemand_data ondemand_data; struct clk *dmc_clk; struct devfreq_event_dev *edev; struct mutex lock; - struct dram_timing timing; struct regulator *vdd_center; struct regmap *regmap_pmu; unsigned long rate, target_rate; unsigned long volt, target_volt; unsigned int odt_dis_freq; - int odt_pd_arg0, odt_pd_arg1; + + unsigned int pd_idle_ns; + unsigned int sr_idle_ns; + unsigned int sr_mc_gate_idle_ns; + unsigned int srpd_lite_idle_ns; + unsigned int standby_idle_ns; + unsigned int ddr3_odt_dis_freq; + unsigned int lpddr3_odt_dis_freq; + unsigned int lpddr4_odt_dis_freq; + + unsigned int pd_idle_dis_freq; + unsigned int sr_idle_dis_freq; + unsigned int sr_mc_gate_idle_dis_freq; + unsigned int srpd_lite_idle_dis_freq; + unsigned int standby_idle_dis_freq; }; static int rk3399_dmcfreq_target(struct device *dev, unsigned long *freq, @@ -78,10 +73,14 @@ static int rk3399_dmcfreq_target(struct device *dev, unsigned long *freq, struct dev_pm_opp *opp; unsigned long old_clk_rate = dmcfreq->rate; unsigned long target_volt, target_rate; + unsigned int ddrcon_mhz; struct arm_smccc_res res; - bool odt_enable = false; int err; + u32 odt_pd_arg0 = 0; + u32 odt_pd_arg1 = 0; + u32 odt_pd_arg2 = 0; + opp = devfreq_recommended_opp(dev, freq, flags); if (IS_ERR(opp)) return PTR_ERR(opp); @@ -95,19 +94,71 @@ static int rk3399_dmcfreq_target(struct device *dev, unsigned long *freq, mutex_lock(&dmcfreq->lock); + /* + * Ensure power-domain transitions don't interfere with ARM Trusted + * Firmware power-domain idling. + */ + err = rockchip_pmu_block(); + if (err) { + dev_err(dev, "Failed to block PMU: %d\n", err); + goto out_unlock; + } + + /* + * Some idle parameters may be based on the DDR controller clock, which + * is half of the DDR frequency. + * pd_idle and standby_idle are based on the controller clock cycle. + * sr_idle_cycle, sr_mc_gate_idle_cycle, and srpd_lite_idle_cycle + * are based on the 1024 controller clock cycle + */ + ddrcon_mhz = target_rate / USEC_PER_SEC / 2; + + u32p_replace_bits(&odt_pd_arg1, + NS_TO_CYCLE(dmcfreq->pd_idle_ns, ddrcon_mhz), + RK3399_SET_ODT_PD_1_PD_IDLE); + u32p_replace_bits(&odt_pd_arg0, + NS_TO_CYCLE(dmcfreq->standby_idle_ns, ddrcon_mhz), + RK3399_SET_ODT_PD_0_STANDBY_IDLE); + u32p_replace_bits(&odt_pd_arg0, + DIV_ROUND_UP(NS_TO_CYCLE(dmcfreq->sr_idle_ns, + ddrcon_mhz), 1024), + RK3399_SET_ODT_PD_0_SR_IDLE); + u32p_replace_bits(&odt_pd_arg0, + DIV_ROUND_UP(NS_TO_CYCLE(dmcfreq->sr_mc_gate_idle_ns, + ddrcon_mhz), 1024), + RK3399_SET_ODT_PD_0_SR_MC_GATE_IDLE); + u32p_replace_bits(&odt_pd_arg1, + DIV_ROUND_UP(NS_TO_CYCLE(dmcfreq->srpd_lite_idle_ns, + ddrcon_mhz), 1024), + RK3399_SET_ODT_PD_1_SRPD_LITE_IDLE); + if (dmcfreq->regmap_pmu) { + if (target_rate >= dmcfreq->sr_idle_dis_freq) + odt_pd_arg0 &= ~RK3399_SET_ODT_PD_0_SR_IDLE; + + if (target_rate >= dmcfreq->sr_mc_gate_idle_dis_freq) + odt_pd_arg0 &= ~RK3399_SET_ODT_PD_0_SR_MC_GATE_IDLE; + + if (target_rate >= dmcfreq->standby_idle_dis_freq) + odt_pd_arg0 &= ~RK3399_SET_ODT_PD_0_STANDBY_IDLE; + + if (target_rate >= dmcfreq->pd_idle_dis_freq) + odt_pd_arg1 &= ~RK3399_SET_ODT_PD_1_PD_IDLE; + + if (target_rate >= dmcfreq->srpd_lite_idle_dis_freq) + odt_pd_arg1 &= ~RK3399_SET_ODT_PD_1_SRPD_LITE_IDLE; + if (target_rate >= dmcfreq->odt_dis_freq) - odt_enable = true; + odt_pd_arg2 |= RK3399_SET_ODT_PD_2_ODT_ENABLE; /* * This makes a SMC call to the TF-A to set the DDR PD * (power-down) timings and to enable or disable the * ODT (on-die termination) resistors. */ - arm_smccc_smc(ROCKCHIP_SIP_DRAM_FREQ, dmcfreq->odt_pd_arg0, - dmcfreq->odt_pd_arg1, - ROCKCHIP_SIP_CONFIG_DRAM_SET_ODT_PD, - odt_enable, 0, 0, 0, &res); + arm_smccc_smc(ROCKCHIP_SIP_DRAM_FREQ, odt_pd_arg0, odt_pd_arg1, + ROCKCHIP_SIP_CONFIG_DRAM_SET_ODT_PD, odt_pd_arg2, + 0, 0, 0, &res); } /* @@ -158,6 +209,8 @@ static int rk3399_dmcfreq_target(struct device *dev, unsigned long *freq, dmcfreq->volt = target_volt; out: + rockchip_pmu_unblock(); +out_unlock: mutex_unlock(&dmcfreq->lock); return err; } @@ -189,13 +242,6 @@ static int rk3399_dmcfreq_get_cur_freq(struct device *dev, unsigned long *freq) return 0; } -static struct devfreq_dev_profile rk3399_devfreq_dmc_profile = { - .polling_ms = 200, - .target = rk3399_dmcfreq_target, - .get_dev_status = rk3399_dmcfreq_get_dev_status, - .get_cur_freq = rk3399_dmcfreq_get_cur_freq, -}; - static __maybe_unused int rk3399_dmcfreq_suspend(struct device *dev) { struct rk3399_dmcfreq *dmcfreq = dev_get_drvdata(dev); @@ -238,69 +284,48 @@ static __maybe_unused int rk3399_dmcfreq_resume(struct device *dev) static SIMPLE_DEV_PM_OPS(rk3399_dmcfreq_pm, rk3399_dmcfreq_suspend, rk3399_dmcfreq_resume); -static int of_get_ddr_timings(struct dram_timing *timing, - struct device_node *np) +static int rk3399_dmcfreq_of_props(struct rk3399_dmcfreq *data, + struct device_node *np) { int ret = 0; - ret = of_property_read_u32(np, "rockchip,ddr3_speed_bin", - &timing->ddr3_speed_bin); - ret |= of_property_read_u32(np, "rockchip,pd_idle", - &timing->pd_idle); - ret |= of_property_read_u32(np, "rockchip,sr_idle", - &timing->sr_idle); - ret |= of_property_read_u32(np, "rockchip,sr_mc_gate_idle", - &timing->sr_mc_gate_idle); - ret |= of_property_read_u32(np, "rockchip,srpd_lite_idle", - &timing->srpd_lite_idle); - ret |= of_property_read_u32(np, "rockchip,standby_idle", - &timing->standby_idle); - ret |= of_property_read_u32(np, "rockchip,auto_pd_dis_freq", - &timing->auto_pd_dis_freq); - ret |= of_property_read_u32(np, "rockchip,dram_dll_dis_freq", - &timing->dram_dll_dis_freq); - ret |= of_property_read_u32(np, "rockchip,phy_dll_dis_freq", - &timing->phy_dll_dis_freq); + /* + * These are all optional, and serve as minimum bounds. Give them large + * (i.e., never "disabled") values if the DT doesn't specify one. + */ + data->pd_idle_dis_freq = + data->sr_idle_dis_freq = + data->sr_mc_gate_idle_dis_freq = + data->srpd_lite_idle_dis_freq = + data->standby_idle_dis_freq = UINT_MAX; + + ret |= of_property_read_u32(np, "rockchip,pd-idle-ns", + &data->pd_idle_ns); + ret |= of_property_read_u32(np, "rockchip,sr-idle-ns", + &data->sr_idle_ns); + ret |= of_property_read_u32(np, "rockchip,sr-mc-gate-idle-ns", + &data->sr_mc_gate_idle_ns); + ret |= of_property_read_u32(np, "rockchip,srpd-lite-idle-ns", + &data->srpd_lite_idle_ns); + ret |= of_property_read_u32(np, "rockchip,standby-idle-ns", + &data->standby_idle_ns); ret |= of_property_read_u32(np, "rockchip,ddr3_odt_dis_freq", - &timing->ddr3_odt_dis_freq); - ret |= of_property_read_u32(np, "rockchip,ddr3_drv", - &timing->ddr3_drv); - ret |= of_property_read_u32(np, "rockchip,ddr3_odt", - &timing->ddr3_odt); - ret |= of_property_read_u32(np, "rockchip,phy_ddr3_ca_drv", - &timing->phy_ddr3_ca_drv); - ret |= of_property_read_u32(np, "rockchip,phy_ddr3_dq_drv", - &timing->phy_ddr3_dq_drv); - ret |= of_property_read_u32(np, "rockchip,phy_ddr3_odt", - &timing->phy_ddr3_odt); + &data->ddr3_odt_dis_freq); ret |= of_property_read_u32(np, "rockchip,lpddr3_odt_dis_freq", - &timing->lpddr3_odt_dis_freq); - ret |= of_property_read_u32(np, "rockchip,lpddr3_drv", - &timing->lpddr3_drv); - ret |= of_property_read_u32(np, "rockchip,lpddr3_odt", - &timing->lpddr3_odt); - ret |= of_property_read_u32(np, "rockchip,phy_lpddr3_ca_drv", - &timing->phy_lpddr3_ca_drv); - ret |= of_property_read_u32(np, "rockchip,phy_lpddr3_dq_drv", - &timing->phy_lpddr3_dq_drv); - ret |= of_property_read_u32(np, "rockchip,phy_lpddr3_odt", - &timing->phy_lpddr3_odt); + &data->lpddr3_odt_dis_freq); ret |= of_property_read_u32(np, "rockchip,lpddr4_odt_dis_freq", - &timing->lpddr4_odt_dis_freq); - ret |= of_property_read_u32(np, "rockchip,lpddr4_drv", - &timing->lpddr4_drv); - ret |= of_property_read_u32(np, "rockchip,lpddr4_dq_odt", - &timing->lpddr4_dq_odt); - ret |= of_property_read_u32(np, "rockchip,lpddr4_ca_odt", - &timing->lpddr4_ca_odt); - ret |= of_property_read_u32(np, "rockchip,phy_lpddr4_ca_drv", - &timing->phy_lpddr4_ca_drv); - ret |= of_property_read_u32(np, "rockchip,phy_lpddr4_ck_cs_drv", - &timing->phy_lpddr4_ck_cs_drv); - ret |= of_property_read_u32(np, "rockchip,phy_lpddr4_dq_drv", - &timing->phy_lpddr4_dq_drv); - ret |= of_property_read_u32(np, "rockchip,phy_lpddr4_odt", - &timing->phy_lpddr4_odt); + &data->lpddr4_odt_dis_freq); + + ret |= of_property_read_u32(np, "rockchip,pd-idle-dis-freq-hz", + &data->pd_idle_dis_freq); + ret |= of_property_read_u32(np, "rockchip,sr-idle-dis-freq-hz", + &data->sr_idle_dis_freq); + ret |= of_property_read_u32(np, "rockchip,sr-mc-gate-idle-dis-freq-hz", + &data->sr_mc_gate_idle_dis_freq); + ret |= of_property_read_u32(np, "rockchip,srpd-lite-idle-dis-freq-hz", + &data->srpd_lite_idle_dis_freq); + ret |= of_property_read_u32(np, "rockchip,standby-idle-dis-freq-hz", + &data->standby_idle_dis_freq); return ret; } @@ -311,8 +336,7 @@ static int rk3399_dmcfreq_probe(struct platform_device *pdev) struct device *dev = &pdev->dev; struct device_node *np = pdev->dev.of_node, *node; struct rk3399_dmcfreq *data; - int ret, index, size; - uint32_t *timing; + int ret; struct dev_pm_opp *opp; u32 ddr_type; u32 val; @@ -343,26 +367,7 @@ static int rk3399_dmcfreq_probe(struct platform_device *pdev) return ret; } - /* - * Get dram timing and pass it to arm trust firmware, - * the dram driver in arm trust firmware will get these - * timing and to do dram initial. - */ - if (!of_get_ddr_timings(&data->timing, np)) { - timing = &data->timing.ddr3_speed_bin; - size = sizeof(struct dram_timing) / 4; - for (index = 0; index < size; index++) { - arm_smccc_smc(ROCKCHIP_SIP_DRAM_FREQ, *timing++, index, - ROCKCHIP_SIP_CONFIG_DRAM_SET_PARAM, - 0, 0, 0, 0, &res); - if (res.a0) { - dev_err(dev, "Failed to set dram param: %ld\n", - res.a0); - ret = -EINVAL; - goto err_edev; - } - } - } + rk3399_dmcfreq_of_props(data, np); node = of_parse_phandle(np, "rockchip,pmu", 0); if (!node) @@ -381,13 +386,13 @@ static int rk3399_dmcfreq_probe(struct platform_device *pdev) switch (ddr_type) { case RK3399_PMUGRF_DDRTYPE_DDR3: - data->odt_dis_freq = data->timing.ddr3_odt_dis_freq; + data->odt_dis_freq = data->ddr3_odt_dis_freq; break; case RK3399_PMUGRF_DDRTYPE_LPDDR3: - data->odt_dis_freq = data->timing.lpddr3_odt_dis_freq; + data->odt_dis_freq = data->lpddr3_odt_dis_freq; break; case RK3399_PMUGRF_DDRTYPE_LPDDR4: - data->odt_dis_freq = data->timing.lpddr4_odt_dis_freq; + data->odt_dis_freq = data->lpddr4_odt_dis_freq; break; default: ret = -EINVAL; @@ -400,62 +405,45 @@ no_pmu: 0, 0, 0, 0, &res); /* - * In TF-A there is a platform SIP call to set the PD (power-down) - * timings and to enable or disable the ODT (on-die termination). - * This call needs three arguments as follows: - * - * arg0: - * bit[0-7] : sr_idle - * bit[8-15] : sr_mc_gate_idle - * bit[16-31] : standby idle - * arg1: - * bit[0-11] : pd_idle - * bit[16-27] : srpd_lite_idle - * arg2: - * bit[0] : odt enable - */ - data->odt_pd_arg0 = (data->timing.sr_idle & 0xff) | - ((data->timing.sr_mc_gate_idle & 0xff) << 8) | - ((data->timing.standby_idle & 0xffff) << 16); - data->odt_pd_arg1 = (data->timing.pd_idle & 0xfff) | - ((data->timing.srpd_lite_idle & 0xfff) << 16); - - /* * We add a devfreq driver to our parent since it has a device tree node * with operating points. */ - if (dev_pm_opp_of_add_table(dev)) { + if (devm_pm_opp_of_add_table(dev)) { dev_err(dev, "Invalid operating-points in device tree.\n"); ret = -EINVAL; goto err_edev; } - of_property_read_u32(np, "upthreshold", - &data->ondemand_data.upthreshold); - of_property_read_u32(np, "downdifferential", - &data->ondemand_data.downdifferential); + data->ondemand_data.upthreshold = 25; + data->ondemand_data.downdifferential = 15; data->rate = clk_get_rate(data->dmc_clk); opp = devfreq_recommended_opp(dev, &data->rate, 0); if (IS_ERR(opp)) { ret = PTR_ERR(opp); - goto err_free_opp; + goto err_edev; } data->rate = dev_pm_opp_get_freq(opp); data->volt = dev_pm_opp_get_voltage(opp); dev_pm_opp_put(opp); - rk3399_devfreq_dmc_profile.initial_freq = data->rate; + data->profile = (struct devfreq_dev_profile) { + .polling_ms = 200, + .target = rk3399_dmcfreq_target, + .get_dev_status = rk3399_dmcfreq_get_dev_status, + .get_cur_freq = rk3399_dmcfreq_get_cur_freq, + .initial_freq = data->rate, + }; data->devfreq = devm_devfreq_add_device(dev, - &rk3399_devfreq_dmc_profile, + &data->profile, DEVFREQ_GOV_SIMPLE_ONDEMAND, &data->ondemand_data); if (IS_ERR(data->devfreq)) { ret = PTR_ERR(data->devfreq); - goto err_free_opp; + goto err_edev; } devm_devfreq_register_opp_notifier(dev, data->devfreq); @@ -465,8 +453,6 @@ no_pmu: return 0; -err_free_opp: - dev_pm_opp_of_remove_table(&pdev->dev); err_edev: devfreq_event_disable_edev(data->edev); @@ -477,11 +463,7 @@ static int rk3399_dmcfreq_remove(struct platform_device *pdev) { struct rk3399_dmcfreq *dmcfreq = dev_get_drvdata(&pdev->dev); - /* - * Before remove the opp table we need to unregister the opp notifier. - */ - devm_devfreq_unregister_opp_notifier(dmcfreq->dev, dmcfreq->devfreq); - dev_pm_opp_of_remove_table(dmcfreq->dev); + devfreq_event_disable_edev(dmcfreq->edev); return 0; } diff --git a/drivers/idle/intel_idle.c b/drivers/idle/intel_idle.c index 47551ab73ca8..b9bb94bd0f67 100644 --- a/drivers/idle/intel_idle.c +++ b/drivers/idle/intel_idle.c @@ -765,6 +765,106 @@ static struct cpuidle_state icx_cstates[] __initdata = { }; /* + * On AlderLake C1 has to be disabled if C1E is enabled, and vice versa. + * C1E is enabled only if "C1E promotion" bit is set in MSR_IA32_POWER_CTL. + * But in this case there is effectively no C1, because C1 requests are + * promoted to C1E. If the "C1E promotion" bit is cleared, then both C1 + * and C1E requests end up with C1, so there is effectively no C1E. + * + * By default we enable C1E and disable C1 by marking it with + * 'CPUIDLE_FLAG_UNUSABLE'. + */ +static struct cpuidle_state adl_cstates[] __initdata = { + { + .name = "C1", + .desc = "MWAIT 0x00", + .flags = MWAIT2flg(0x00) | CPUIDLE_FLAG_UNUSABLE, + .exit_latency = 1, + .target_residency = 1, + .enter = &intel_idle, + .enter_s2idle = intel_idle_s2idle, }, + { + .name = "C1E", + .desc = "MWAIT 0x01", + .flags = MWAIT2flg(0x01) | CPUIDLE_FLAG_ALWAYS_ENABLE, + .exit_latency = 2, + .target_residency = 4, + .enter = &intel_idle, + .enter_s2idle = intel_idle_s2idle, }, + { + .name = "C6", + .desc = "MWAIT 0x20", + .flags = MWAIT2flg(0x20) | CPUIDLE_FLAG_TLB_FLUSHED, + .exit_latency = 220, + .target_residency = 600, + .enter = &intel_idle, + .enter_s2idle = intel_idle_s2idle, }, + { + .name = "C8", + .desc = "MWAIT 0x40", + .flags = MWAIT2flg(0x40) | CPUIDLE_FLAG_TLB_FLUSHED, + .exit_latency = 280, + .target_residency = 800, + .enter = &intel_idle, + .enter_s2idle = intel_idle_s2idle, }, + { + .name = "C10", + .desc = "MWAIT 0x60", + .flags = MWAIT2flg(0x60) | CPUIDLE_FLAG_TLB_FLUSHED, + .exit_latency = 680, + .target_residency = 2000, + .enter = &intel_idle, + .enter_s2idle = intel_idle_s2idle, }, + { + .enter = NULL } +}; + +static struct cpuidle_state adl_l_cstates[] __initdata = { + { + .name = "C1", + .desc = "MWAIT 0x00", + .flags = MWAIT2flg(0x00) | CPUIDLE_FLAG_UNUSABLE, + .exit_latency = 1, + .target_residency = 1, + .enter = &intel_idle, + .enter_s2idle = intel_idle_s2idle, }, + { + .name = "C1E", + .desc = "MWAIT 0x01", + .flags = MWAIT2flg(0x01) | CPUIDLE_FLAG_ALWAYS_ENABLE, + .exit_latency = 2, + .target_residency = 4, + .enter = &intel_idle, + .enter_s2idle = intel_idle_s2idle, }, + { + .name = "C6", + .desc = "MWAIT 0x20", + .flags = MWAIT2flg(0x20) | CPUIDLE_FLAG_TLB_FLUSHED, + .exit_latency = 170, + .target_residency = 500, + .enter = &intel_idle, + .enter_s2idle = intel_idle_s2idle, }, + { + .name = "C8", + .desc = "MWAIT 0x40", + .flags = MWAIT2flg(0x40) | CPUIDLE_FLAG_TLB_FLUSHED, + .exit_latency = 200, + .target_residency = 600, + .enter = &intel_idle, + .enter_s2idle = intel_idle_s2idle, }, + { + .name = "C10", + .desc = "MWAIT 0x60", + .flags = MWAIT2flg(0x60) | CPUIDLE_FLAG_TLB_FLUSHED, + .exit_latency = 230, + .target_residency = 700, + .enter = &intel_idle, + .enter_s2idle = intel_idle_s2idle, }, + { + .enter = NULL } +}; + +/* * On Sapphire Rapids Xeon C1 has to be disabled if C1E is enabled, and vice * versa. On SPR C1E is enabled only if "C1E promotion" bit is set in * MSR_IA32_POWER_CTL. But in this case there effectively no C1, because C1 @@ -1147,6 +1247,14 @@ static const struct idle_cpu idle_cpu_icx __initconst = { .use_acpi = true, }; +static const struct idle_cpu idle_cpu_adl __initconst = { + .state_table = adl_cstates, +}; + +static const struct idle_cpu idle_cpu_adl_l __initconst = { + .state_table = adl_l_cstates, +}; + static const struct idle_cpu idle_cpu_spr __initconst = { .state_table = spr_cstates, .disable_promotion_to_c1e = true, @@ -1215,6 +1323,8 @@ static const struct x86_cpu_id intel_idle_ids[] __initconst = { X86_MATCH_INTEL_FAM6_MODEL(SKYLAKE_X, &idle_cpu_skx), X86_MATCH_INTEL_FAM6_MODEL(ICELAKE_X, &idle_cpu_icx), X86_MATCH_INTEL_FAM6_MODEL(ICELAKE_D, &idle_cpu_icx), + X86_MATCH_INTEL_FAM6_MODEL(ALDERLAKE, &idle_cpu_adl), + X86_MATCH_INTEL_FAM6_MODEL(ALDERLAKE_L, &idle_cpu_adl_l), X86_MATCH_INTEL_FAM6_MODEL(SAPPHIRERAPIDS_X, &idle_cpu_spr), X86_MATCH_INTEL_FAM6_MODEL(XEON_PHI_KNL, &idle_cpu_knl), X86_MATCH_INTEL_FAM6_MODEL(XEON_PHI_KNM, &idle_cpu_knl), @@ -1574,6 +1684,25 @@ static void __init skx_idle_state_table_update(void) } /** + * adl_idle_state_table_update - Adjust AlderLake idle states table. + */ +static void __init adl_idle_state_table_update(void) +{ + /* Check if user prefers C1 over C1E. */ + if (preferred_states_mask & BIT(1) && !(preferred_states_mask & BIT(2))) { + cpuidle_state_table[0].flags &= ~CPUIDLE_FLAG_UNUSABLE; + cpuidle_state_table[1].flags |= CPUIDLE_FLAG_UNUSABLE; + + /* Disable C1E by clearing the "C1E promotion" bit. */ + c1e_promotion = C1E_PROMOTION_DISABLE; + return; + } + + /* Make sure C1E is enabled by default */ + c1e_promotion = C1E_PROMOTION_ENABLE; +} + +/** * spr_idle_state_table_update - Adjust Sapphire Rapids idle states table. */ static void __init spr_idle_state_table_update(void) @@ -1642,6 +1771,10 @@ static void __init intel_idle_init_cstates_icpu(struct cpuidle_driver *drv) case INTEL_FAM6_SAPPHIRERAPIDS_X: spr_idle_state_table_update(); break; + case INTEL_FAM6_ALDERLAKE: + case INTEL_FAM6_ALDERLAKE_L: + adl_idle_state_table_update(); + break; } for (cstate = 0; cstate < CPUIDLE_STATE_MAX; ++cstate) { diff --git a/drivers/iio/adc/qcom-vadc-common.c b/drivers/iio/adc/qcom-vadc-common.c index 6c6aec848f98..d5209f32adb3 100644 --- a/drivers/iio/adc/qcom-vadc-common.c +++ b/drivers/iio/adc/qcom-vadc-common.c @@ -677,6 +677,17 @@ u16 qcom_adc_tm5_temp_volt_scale(unsigned int prescale_ratio, } EXPORT_SYMBOL(qcom_adc_tm5_temp_volt_scale); +u16 qcom_adc_tm5_gen2_temp_res_scale(int temp) +{ + int64_t resistance; + + resistance = qcom_vadc_map_temp_voltage(adcmap7_100k, + ARRAY_SIZE(adcmap7_100k), temp); + + return div64_s64(resistance * RATIO_MAX_ADC7, resistance + R_PU_100K); +} +EXPORT_SYMBOL(qcom_adc_tm5_gen2_temp_res_scale); + int qcom_adc5_hw_scale(enum vadc_scale_fn_type scaletype, unsigned int prescale_ratio, const struct adc5_data *data, diff --git a/drivers/iio/chemical/scd30.h b/drivers/iio/chemical/scd30.h index f60127bfe0f4..1ac9f3f79271 100644 --- a/drivers/iio/chemical/scd30.h +++ b/drivers/iio/chemical/scd30.h @@ -68,10 +68,7 @@ struct scd30_state { scd30_command_t command; }; -int scd30_suspend(struct device *dev); -int scd30_resume(struct device *dev); - -static __maybe_unused SIMPLE_DEV_PM_OPS(scd30_pm_ops, scd30_suspend, scd30_resume); +extern const struct dev_pm_ops scd30_pm_ops; int scd30_probe(struct device *dev, int irq, const char *name, void *priv, scd30_command_t command); diff --git a/drivers/iio/chemical/scd30_core.c b/drivers/iio/chemical/scd30_core.c index 9fe6bbe9ee04..682fca39d14d 100644 --- a/drivers/iio/chemical/scd30_core.c +++ b/drivers/iio/chemical/scd30_core.c @@ -517,7 +517,7 @@ static const struct iio_chan_spec scd30_channels[] = { IIO_CHAN_SOFT_TIMESTAMP(3), }; -int __maybe_unused scd30_suspend(struct device *dev) +static int scd30_suspend(struct device *dev) { struct iio_dev *indio_dev = dev_get_drvdata(dev); struct scd30_state *state = iio_priv(indio_dev); @@ -529,9 +529,8 @@ int __maybe_unused scd30_suspend(struct device *dev) return regulator_disable(state->vdd); } -EXPORT_SYMBOL(scd30_suspend); -int __maybe_unused scd30_resume(struct device *dev) +static int scd30_resume(struct device *dev) { struct iio_dev *indio_dev = dev_get_drvdata(dev); struct scd30_state *state = iio_priv(indio_dev); @@ -543,7 +542,8 @@ int __maybe_unused scd30_resume(struct device *dev) return scd30_command_write(state, CMD_START_MEAS, state->pressure_comp); } -EXPORT_SYMBOL(scd30_resume); + +EXPORT_NS_SIMPLE_DEV_PM_OPS(scd30_pm_ops, scd30_suspend, scd30_resume, IIO_SCD30); static void scd30_stop_meas(void *data) { @@ -759,7 +759,7 @@ int scd30_probe(struct device *dev, int irq, const char *name, void *priv, return devm_iio_device_register(dev, indio_dev); } -EXPORT_SYMBOL(scd30_probe); +EXPORT_SYMBOL_NS(scd30_probe, IIO_SCD30); MODULE_AUTHOR("Tomasz Duszynski <tomasz.duszynski@octakon.com>"); MODULE_DESCRIPTION("Sensirion SCD30 carbon dioxide sensor core driver"); diff --git a/drivers/iio/chemical/scd30_i2c.c b/drivers/iio/chemical/scd30_i2c.c index 875892a070ee..bae479a4721f 100644 --- a/drivers/iio/chemical/scd30_i2c.c +++ b/drivers/iio/chemical/scd30_i2c.c @@ -128,7 +128,7 @@ static struct i2c_driver scd30_i2c_driver = { .driver = { .name = KBUILD_MODNAME, .of_match_table = scd30_i2c_of_match, - .pm = &scd30_pm_ops, + .pm = pm_sleep_ptr(&scd30_pm_ops), }, .probe_new = scd30_i2c_probe, }; @@ -137,3 +137,4 @@ module_i2c_driver(scd30_i2c_driver); MODULE_AUTHOR("Tomasz Duszynski <tomasz.duszynski@octakon.com>"); MODULE_DESCRIPTION("Sensirion SCD30 carbon dioxide sensor i2c driver"); MODULE_LICENSE("GPL v2"); +MODULE_IMPORT_NS(IIO_SCD30); diff --git a/drivers/iio/chemical/scd30_serial.c b/drivers/iio/chemical/scd30_serial.c index 568b34486c44..3c519103d30b 100644 --- a/drivers/iio/chemical/scd30_serial.c +++ b/drivers/iio/chemical/scd30_serial.c @@ -252,7 +252,7 @@ static struct serdev_device_driver scd30_serdev_driver = { .driver = { .name = KBUILD_MODNAME, .of_match_table = scd30_serdev_of_match, - .pm = &scd30_pm_ops, + .pm = pm_sleep_ptr(&scd30_pm_ops), }, .probe = scd30_serdev_probe, }; @@ -261,3 +261,4 @@ module_serdev_device_driver(scd30_serdev_driver); MODULE_AUTHOR("Tomasz Duszynski <tomasz.duszynski@octakon.com>"); MODULE_DESCRIPTION("Sensirion SCD30 carbon dioxide sensor serial driver"); MODULE_LICENSE("GPL v2"); +MODULE_IMPORT_NS(IIO_SCD30); diff --git a/drivers/opp/of.c b/drivers/opp/of.c index 440ab5a03df9..485ea980bde7 100644 --- a/drivers/opp/of.c +++ b/drivers/opp/of.c @@ -1448,7 +1448,7 @@ EXPORT_SYMBOL_GPL(dev_pm_opp_get_of_node); * Returns 0 on success or a proper -EINVAL value in case of error. */ static int __maybe_unused -_get_dt_power(unsigned long *mW, unsigned long *kHz, struct device *dev) +_get_dt_power(struct device *dev, unsigned long *mW, unsigned long *kHz) { struct dev_pm_opp *opp; unsigned long opp_freq, opp_power; @@ -1482,8 +1482,8 @@ _get_dt_power(unsigned long *mW, unsigned long *kHz, struct device *dev) * Returns -EINVAL if the power calculation failed because of missing * parameters, 0 otherwise. */ -static int __maybe_unused _get_power(unsigned long *mW, unsigned long *kHz, - struct device *dev) +static int __maybe_unused _get_power(struct device *dev, unsigned long *mW, + unsigned long *kHz) { struct dev_pm_opp *opp; struct device_node *np; diff --git a/drivers/powercap/dtpm_cpu.c b/drivers/powercap/dtpm_cpu.c index bca2f912d349..f5eced0842b3 100644 --- a/drivers/powercap/dtpm_cpu.c +++ b/drivers/powercap/dtpm_cpu.c @@ -211,7 +211,7 @@ static int __dtpm_cpu_setup(int cpu, struct dtpm *parent) return 0; pd = em_cpu_get(cpu); - if (!pd) + if (!pd || em_is_artificial(pd)) return -EINVAL; dtpm_cpu = kzalloc(sizeof(*dtpm_cpu), GFP_KERNEL); diff --git a/drivers/powercap/intel_rapl_common.c b/drivers/powercap/intel_rapl_common.c index 07611a00b78f..a9c99d9e8b42 100644 --- a/drivers/powercap/intel_rapl_common.c +++ b/drivers/powercap/intel_rapl_common.c @@ -1010,7 +1010,7 @@ static u64 rapl_compute_time_window_atom(struct rapl_package *rp, u64 value, * where time_unit is default to 1 sec. Never 0. */ if (!to_raw) - return (value) ? value *= rp->time_unit : rp->time_unit; + return (value) ? value * rp->time_unit : rp->time_unit; value = div64_u64(value, rp->time_unit); @@ -1107,6 +1107,8 @@ static const struct x86_cpu_id rapl_ids[] __initconst = { X86_MATCH_INTEL_FAM6_MODEL(ROCKETLAKE, &rapl_defaults_core), X86_MATCH_INTEL_FAM6_MODEL(ALDERLAKE, &rapl_defaults_core), X86_MATCH_INTEL_FAM6_MODEL(ALDERLAKE_L, &rapl_defaults_core), + X86_MATCH_INTEL_FAM6_MODEL(ALDERLAKE_N, &rapl_defaults_core), + X86_MATCH_INTEL_FAM6_MODEL(RAPTORLAKE, &rapl_defaults_core), X86_MATCH_INTEL_FAM6_MODEL(SAPPHIRERAPIDS_X, &rapl_defaults_spr_server), X86_MATCH_INTEL_FAM6_MODEL(LAKEFIELD, &rapl_defaults_core), diff --git a/drivers/powercap/intel_rapl_msr.c b/drivers/powercap/intel_rapl_msr.c index 1be45f36ab6c..9d23984d8931 100644 --- a/drivers/powercap/intel_rapl_msr.c +++ b/drivers/powercap/intel_rapl_msr.c @@ -140,6 +140,7 @@ static const struct x86_cpu_id pl4_support_ids[] = { { X86_VENDOR_INTEL, 6, INTEL_FAM6_TIGERLAKE_L, X86_FEATURE_ANY }, { X86_VENDOR_INTEL, 6, INTEL_FAM6_ALDERLAKE, X86_FEATURE_ANY }, { X86_VENDOR_INTEL, 6, INTEL_FAM6_ALDERLAKE_L, X86_FEATURE_ANY }, + { X86_VENDOR_INTEL, 6, INTEL_FAM6_RAPTORLAKE, X86_FEATURE_ANY }, {} }; diff --git a/drivers/soc/rockchip/pm_domains.c b/drivers/soc/rockchip/pm_domains.c index 0868b7d406fb..b1cf7d29dafd 100644 --- a/drivers/soc/rockchip/pm_domains.c +++ b/drivers/soc/rockchip/pm_domains.c @@ -8,6 +8,7 @@ #include <linux/io.h> #include <linux/iopoll.h> #include <linux/err.h> +#include <linux/mutex.h> #include <linux/pm_clock.h> #include <linux/pm_domain.h> #include <linux/of_address.h> @@ -16,6 +17,7 @@ #include <linux/clk.h> #include <linux/regmap.h> #include <linux/mfd/syscon.h> +#include <soc/rockchip/pm_domains.h> #include <dt-bindings/power/px30-power.h> #include <dt-bindings/power/rk3036-power.h> #include <dt-bindings/power/rk3066-power.h> @@ -139,6 +141,109 @@ struct rockchip_pmu { #define DOMAIN_RK3568(name, pwr, req, wakeup) \ DOMAIN_M(name, pwr, pwr, req, req, req, wakeup) +/* + * Dynamic Memory Controller may need to coordinate with us -- see + * rockchip_pmu_block(). + * + * dmc_pmu_mutex protects registration-time races, so DMC driver doesn't try to + * block() while we're initializing the PMU. + */ +static DEFINE_MUTEX(dmc_pmu_mutex); +static struct rockchip_pmu *dmc_pmu; + +/* + * Block PMU transitions and make sure they don't interfere with ARM Trusted + * Firmware operations. There are two conflicts, noted in the comments below. + * + * Caller must unblock PMU transitions via rockchip_pmu_unblock(). + */ +int rockchip_pmu_block(void) +{ + struct rockchip_pmu *pmu; + struct generic_pm_domain *genpd; + struct rockchip_pm_domain *pd; + int i, ret; + + mutex_lock(&dmc_pmu_mutex); + + /* No PMU (yet)? Then we just block rockchip_pmu_probe(). */ + if (!dmc_pmu) + return 0; + pmu = dmc_pmu; + + /* + * mutex blocks all idle transitions: we can't touch the + * PMU_BUS_IDLE_REQ (our ".idle_offset") register while ARM Trusted + * Firmware might be using it. + */ + mutex_lock(&pmu->mutex); + + /* + * Power domain clocks: Per Rockchip, we *must* keep certain clocks + * enabled for the duration of power-domain transitions. Most + * transitions are handled by this driver, but some cases (in + * particular, DRAM DVFS / memory-controller idle) must be handled by + * firmware. Firmware can handle most clock management via a special + * "ungate" register (PMU_CRU_GATEDIS_CON0), but unfortunately, this + * doesn't handle PLLs. We can assist this transition by doing the + * clock management on behalf of firmware. + */ + for (i = 0; i < pmu->genpd_data.num_domains; i++) { + genpd = pmu->genpd_data.domains[i]; + if (genpd) { + pd = to_rockchip_pd(genpd); + ret = clk_bulk_enable(pd->num_clks, pd->clks); + if (ret < 0) { + dev_err(pmu->dev, + "failed to enable clks for domain '%s': %d\n", + genpd->name, ret); + goto err; + } + } + } + + return 0; + +err: + for (i = i - 1; i >= 0; i--) { + genpd = pmu->genpd_data.domains[i]; + if (genpd) { + pd = to_rockchip_pd(genpd); + clk_bulk_disable(pd->num_clks, pd->clks); + } + } + mutex_unlock(&pmu->mutex); + mutex_unlock(&dmc_pmu_mutex); + + return ret; +} +EXPORT_SYMBOL_GPL(rockchip_pmu_block); + +/* Unblock PMU transitions. */ +void rockchip_pmu_unblock(void) +{ + struct rockchip_pmu *pmu; + struct generic_pm_domain *genpd; + struct rockchip_pm_domain *pd; + int i; + + if (dmc_pmu) { + pmu = dmc_pmu; + for (i = 0; i < pmu->genpd_data.num_domains; i++) { + genpd = pmu->genpd_data.domains[i]; + if (genpd) { + pd = to_rockchip_pd(genpd); + clk_bulk_disable(pd->num_clks, pd->clks); + } + } + + mutex_unlock(&pmu->mutex); + } + + mutex_unlock(&dmc_pmu_mutex); +} +EXPORT_SYMBOL_GPL(rockchip_pmu_unblock); + static bool rockchip_pmu_domain_is_idle(struct rockchip_pm_domain *pd) { struct rockchip_pmu *pmu = pd->pmu; @@ -690,6 +795,12 @@ static int rockchip_pm_domain_probe(struct platform_device *pdev) error = -ENODEV; + /* + * Prevent any rockchip_pmu_block() from racing with the remainder of + * setup (clocks, register initialization). + */ + mutex_lock(&dmc_pmu_mutex); + for_each_available_child_of_node(np, node) { error = rockchip_pm_add_one_domain(pmu, node); if (error) { @@ -719,10 +830,17 @@ static int rockchip_pm_domain_probe(struct platform_device *pdev) goto err_out; } + /* We only expect one PMU. */ + if (!WARN_ON_ONCE(dmc_pmu)) + dmc_pmu = pmu; + + mutex_unlock(&dmc_pmu_mutex); + return 0; err_out: rockchip_pm_domain_cleanup(pmu); + mutex_unlock(&dmc_pmu_mutex); return error; } diff --git a/drivers/thermal/Makefile b/drivers/thermal/Makefile index f0c36a1530d5..def8e1a0399c 100644 --- a/drivers/thermal/Makefile +++ b/drivers/thermal/Makefile @@ -28,7 +28,7 @@ thermal_sys-$(CONFIG_CPU_IDLE_THERMAL) += cpuidle_cooling.o # devfreq cooling thermal_sys-$(CONFIG_DEVFREQ_THERMAL) += devfreq_cooling.o -obj-$(CONFIG_K3_THERMAL) += k3_bandgap.o +obj-$(CONFIG_K3_THERMAL) += k3_bandgap.o k3_j72xx_bandgap.o # platform thermal drivers obj-y += broadcom/ obj-$(CONFIG_THERMAL_MMIO) += thermal_mmio.o diff --git a/drivers/thermal/broadcom/bcm2711_thermal.c b/drivers/thermal/broadcom/bcm2711_thermal.c index 1ec57d9ecf53..e9bef5c3414b 100644 --- a/drivers/thermal/broadcom/bcm2711_thermal.c +++ b/drivers/thermal/broadcom/bcm2711_thermal.c @@ -38,7 +38,6 @@ static int bcm2711_get_temp(void *data, int *temp) int offset = thermal_zone_get_offset(priv->thermal); u32 val; int ret; - long t; ret = regmap_read(priv->regmap, AVS_RO_TEMP_STATUS, &val); if (ret) @@ -50,9 +49,7 @@ static int bcm2711_get_temp(void *data, int *temp) val &= AVS_RO_TEMP_STATUS_DATA_MSK; /* Convert a HW code to a temperature reading (millidegree celsius) */ - t = slope * val + offset; - - *temp = t < 0 ? 0 : t; + *temp = slope * val + offset; return 0; } diff --git a/drivers/thermal/broadcom/sr-thermal.c b/drivers/thermal/broadcom/sr-thermal.c index 475ce2900771..85ab9edd580c 100644 --- a/drivers/thermal/broadcom/sr-thermal.c +++ b/drivers/thermal/broadcom/sr-thermal.c @@ -60,6 +60,9 @@ static int sr_thermal_probe(struct platform_device *pdev) return -ENOMEM; res = platform_get_resource(pdev, IORESOURCE_MEM, 0); + if (!res) + return -ENOENT; + sr_thermal->regs = (void __iomem *)devm_memremap(&pdev->dev, res->start, resource_size(res), MEMREMAP_WB); diff --git a/drivers/thermal/cpufreq_cooling.c b/drivers/thermal/cpufreq_cooling.c index 0bfb8eebd126..b8151d95a806 100644 --- a/drivers/thermal/cpufreq_cooling.c +++ b/drivers/thermal/cpufreq_cooling.c @@ -328,7 +328,7 @@ static inline bool em_is_sane(struct cpufreq_cooling_device *cpufreq_cdev, struct cpufreq_policy *policy; unsigned int nr_levels; - if (!em) + if (!em || em_is_artificial(em)) return false; policy = cpufreq_cdev->policy; diff --git a/drivers/thermal/devfreq_cooling.c b/drivers/thermal/devfreq_cooling.c index 4310cb342a9f..8c76f9655e57 100644 --- a/drivers/thermal/devfreq_cooling.c +++ b/drivers/thermal/devfreq_cooling.c @@ -358,28 +358,37 @@ of_devfreq_cooling_register_power(struct device_node *np, struct devfreq *df, struct thermal_cooling_device *cdev; struct device *dev = df->dev.parent; struct devfreq_cooling_device *dfc; + struct em_perf_domain *em; + struct thermal_cooling_device_ops *ops; char *name; int err, num_opps; - dfc = kzalloc(sizeof(*dfc), GFP_KERNEL); - if (!dfc) + ops = kmemdup(&devfreq_cooling_ops, sizeof(*ops), GFP_KERNEL); + if (!ops) return ERR_PTR(-ENOMEM); + dfc = kzalloc(sizeof(*dfc), GFP_KERNEL); + if (!dfc) { + err = -ENOMEM; + goto free_ops; + } + dfc->devfreq = df; - dfc->em_pd = em_pd_get(dev); - if (dfc->em_pd) { - devfreq_cooling_ops.get_requested_power = + em = em_pd_get(dev); + if (em && !em_is_artificial(em)) { + dfc->em_pd = em; + ops->get_requested_power = devfreq_cooling_get_requested_power; - devfreq_cooling_ops.state2power = devfreq_cooling_state2power; - devfreq_cooling_ops.power2state = devfreq_cooling_power2state; + ops->state2power = devfreq_cooling_state2power; + ops->power2state = devfreq_cooling_power2state; dfc->power_ops = dfc_power; num_opps = em_pd_nr_perf_states(dfc->em_pd); } else { /* Backward compatibility for drivers which do not use IPA */ - dev_dbg(dev, "missing EM for cooling device\n"); + dev_dbg(dev, "missing proper EM for cooling device\n"); num_opps = dev_pm_opp_get_opp_count(dev); @@ -407,8 +416,7 @@ of_devfreq_cooling_register_power(struct device_node *np, struct devfreq *df, if (!name) goto remove_qos_req; - cdev = thermal_of_cooling_device_register(np, name, dfc, - &devfreq_cooling_ops); + cdev = thermal_of_cooling_device_register(np, name, dfc, ops); kfree(name); if (IS_ERR(cdev)) { @@ -429,6 +437,8 @@ free_table: kfree(dfc->freq_table); free_dfc: kfree(dfc); +free_ops: + kfree(ops); return ERR_PTR(err); } @@ -510,11 +520,13 @@ EXPORT_SYMBOL_GPL(devfreq_cooling_em_register); void devfreq_cooling_unregister(struct thermal_cooling_device *cdev) { struct devfreq_cooling_device *dfc; + const struct thermal_cooling_device_ops *ops; struct device *dev; if (IS_ERR_OR_NULL(cdev)) return; + ops = cdev->ops; dfc = cdev->devdata; dev = dfc->devfreq->dev.parent; @@ -525,5 +537,6 @@ void devfreq_cooling_unregister(struct thermal_cooling_device *cdev) kfree(dfc->freq_table); kfree(dfc); + kfree(ops); } EXPORT_SYMBOL_GPL(devfreq_cooling_unregister); diff --git a/drivers/thermal/hisi_thermal.c b/drivers/thermal/hisi_thermal.c index 9a21ac0ceb11..b29ab09040d5 100644 --- a/drivers/thermal/hisi_thermal.c +++ b/drivers/thermal/hisi_thermal.c @@ -629,7 +629,6 @@ static int hisi_thermal_remove(struct platform_device *pdev) return 0; } -#ifdef CONFIG_PM_SLEEP static int hisi_thermal_suspend(struct device *dev) { struct hisi_thermal_data *data = dev_get_drvdata(dev); @@ -651,15 +650,14 @@ static int hisi_thermal_resume(struct device *dev) return ret; } -#endif -static SIMPLE_DEV_PM_OPS(hisi_thermal_pm_ops, +static DEFINE_SIMPLE_DEV_PM_OPS(hisi_thermal_pm_ops, hisi_thermal_suspend, hisi_thermal_resume); static struct platform_driver hisi_thermal_driver = { .driver = { .name = "hisi_thermal", - .pm = &hisi_thermal_pm_ops, + .pm = pm_sleep_ptr(&hisi_thermal_pm_ops), .of_match_table = of_hisi_thermal_match, }, .probe = hisi_thermal_probe, diff --git a/drivers/thermal/imx_sc_thermal.c b/drivers/thermal/imx_sc_thermal.c index 8d76dbfde6a9..331a241eb0ef 100644 --- a/drivers/thermal/imx_sc_thermal.c +++ b/drivers/thermal/imx_sc_thermal.c @@ -94,8 +94,8 @@ static int imx_sc_thermal_probe(struct platform_device *pdev) sensor = devm_kzalloc(&pdev->dev, sizeof(*sensor), GFP_KERNEL); if (!sensor) { of_node_put(child); - of_node_put(sensor_np); - return -ENOMEM; + ret = -ENOMEM; + goto put_node; } ret = thermal_zone_of_get_sensor_id(child, @@ -124,7 +124,9 @@ static int imx_sc_thermal_probe(struct platform_device *pdev) dev_warn(&pdev->dev, "failed to add hwmon sysfs attributes\n"); } +put_node: of_node_put(sensor_np); + of_node_put(np); return ret; } diff --git a/drivers/thermal/intel/int340x_thermal/int3400_thermal.c b/drivers/thermal/intel/int340x_thermal/int3400_thermal.c index 79931ddc582a..80d4e0676083 100644 --- a/drivers/thermal/intel/int340x_thermal/int3400_thermal.c +++ b/drivers/thermal/intel/int340x_thermal/int3400_thermal.c @@ -169,28 +169,25 @@ static int int3400_thermal_run_osc(acpi_handle handle, char *uuid_str, int *enab acpi_status status; int result = 0; struct acpi_osc_context context = { - .uuid_str = NULL, + .uuid_str = uuid_str, .rev = 1, .cap.length = 8, + .cap.pointer = buf, }; - context.uuid_str = uuid_str; - buf[OSC_QUERY_DWORD] = 0; buf[OSC_SUPPORT_DWORD] = *enable; - context.cap.pointer = buf; - status = acpi_run_osc(handle, &context); if (ACPI_SUCCESS(status)) { ret = *((u32 *)(context.ret.pointer + 4)); if (ret != *enable) result = -EPERM; + + kfree(context.ret.pointer); } else result = -EPERM; - kfree(context.ret.pointer); - return result; } @@ -524,21 +521,18 @@ static void int3400_setup_gddv(struct int3400_thermal_priv *priv) obj = buffer.pointer; if (obj->type != ACPI_TYPE_PACKAGE || obj->package.count != 1 - || obj->package.elements[0].type != ACPI_TYPE_BUFFER) { - kfree(buffer.pointer); - return; - } + || obj->package.elements[0].type != ACPI_TYPE_BUFFER) + goto out_free; priv->data_vault = kmemdup(obj->package.elements[0].buffer.pointer, obj->package.elements[0].buffer.length, GFP_KERNEL); - if (!priv->data_vault) { - kfree(buffer.pointer); - return; - } + if (!priv->data_vault) + goto out_free; bin_attr_data_vault.private = priv->data_vault; bin_attr_data_vault.size = obj->package.elements[0].buffer.length; +out_free: kfree(buffer.pointer); } @@ -669,6 +663,7 @@ static const struct acpi_device_id int3400_thermal_match[] = { {"INT3400", 0}, {"INTC1040", 0}, {"INTC1041", 0}, + {"INTC1042", 0}, {"INTC10A0", 0}, {} }; diff --git a/drivers/thermal/intel/int340x_thermal/int3403_thermal.c b/drivers/thermal/intel/int340x_thermal/int3403_thermal.c index 07e25321dfe3..71d084c4c456 100644 --- a/drivers/thermal/intel/int340x_thermal/int3403_thermal.c +++ b/drivers/thermal/intel/int340x_thermal/int3403_thermal.c @@ -285,6 +285,7 @@ static const struct acpi_device_id int3403_device_ids[] = { {"INT3403", 0}, {"INTC1043", 0}, {"INTC1046", 0}, + {"INTC1062", 0}, {"INTC10A1", 0}, {"", 0}, }; diff --git a/drivers/thermal/intel/intel_hfi.c b/drivers/thermal/intel/intel_hfi.c index 730fd121df6e..a0640f762dc5 100644 --- a/drivers/thermal/intel/intel_hfi.c +++ b/drivers/thermal/intel/intel_hfi.c @@ -243,8 +243,6 @@ static void hfi_update_work_fn(struct work_struct *work) hfi_instance = container_of(to_delayed_work(work), struct hfi_instance, update_work); - if (!hfi_instance) - return; update_capabilities(hfi_instance); } diff --git a/drivers/thermal/intel/intel_pch_thermal.c b/drivers/thermal/intel/intel_pch_thermal.c index 527c91f5960b..c1fa2b29b153 100644 --- a/drivers/thermal/intel/intel_pch_thermal.c +++ b/drivers/thermal/intel/intel_pch_thermal.c @@ -70,8 +70,8 @@ static unsigned int delay_timeout = 100; module_param(delay_timeout, int, 0644); MODULE_PARM_DESC(delay_timeout, "amount of time delay for each iteration."); -/* Number of iterations for cooling delay, 10 counts by default for now */ -static unsigned int delay_cnt = 10; +/* Number of iterations for cooling delay, 600 counts by default for now */ +static unsigned int delay_cnt = 600; module_param(delay_cnt, int, 0644); MODULE_PARM_DESC(delay_cnt, "total number of iterations for time delay."); @@ -193,10 +193,11 @@ static int pch_wpt_get_temp(struct pch_thermal_device *ptd, int *temp) return 0; } +/* Cool the PCH when it's overheat in .suspend_noirq phase */ static int pch_wpt_suspend(struct pch_thermal_device *ptd) { u8 tsel; - u8 pch_delay_cnt = 1; + int pch_delay_cnt = 0; u16 pch_thr_temp, pch_cur_temp; /* Shutdown the thermal sensor if it is not enabled by BIOS */ @@ -232,26 +233,38 @@ static int pch_wpt_suspend(struct pch_thermal_device *ptd) * temperature stays above threshold, notify the warning message * which helps to indentify the reason why S0ix entry was rejected. */ - while (pch_delay_cnt <= delay_cnt) { - if (pch_cur_temp <= pch_thr_temp) + while (pch_delay_cnt < delay_cnt) { + if (pch_cur_temp < pch_thr_temp) break; - dev_warn(&ptd->pdev->dev, + if (pm_wakeup_pending()) { + dev_warn(&ptd->pdev->dev, "Wakeup event detected, abort cooling\n"); + return 0; + } + + pch_delay_cnt++; + dev_dbg(&ptd->pdev->dev, "CPU-PCH current temp [%dC] higher than the threshold temp [%dC], sleep %d times for %d ms duration\n", pch_cur_temp, pch_thr_temp, pch_delay_cnt, delay_timeout); msleep(delay_timeout); /* Read the PCH current temperature for next cycle. */ pch_cur_temp = GET_PCH_TEMP(WPT_TEMP_TSR & readw(ptd->hw_base + WPT_TEMP)); - pch_delay_cnt++; } - if (pch_cur_temp > pch_thr_temp) + if (pch_cur_temp >= pch_thr_temp) dev_warn(&ptd->pdev->dev, - "CPU-PCH is hot [%dC] even after delay, continue to suspend. S0ix might fail\n", - pch_cur_temp); - else - dev_info(&ptd->pdev->dev, - "CPU-PCH is cool [%dC], continue to suspend\n", pch_cur_temp); + "CPU-PCH is hot [%dC] after %d ms delay. S0ix might fail\n", + pch_cur_temp, pch_delay_cnt * delay_timeout); + else { + if (pch_delay_cnt) + dev_info(&ptd->pdev->dev, + "CPU-PCH is cool [%dC] after %d ms delay\n", + pch_cur_temp, pch_delay_cnt * delay_timeout); + else + dev_info(&ptd->pdev->dev, + "CPU-PCH is cool [%dC]\n", + pch_cur_temp); + } return 0; } @@ -455,7 +468,7 @@ static void intel_pch_thermal_remove(struct pci_dev *pdev) pci_disable_device(pdev); } -static int intel_pch_thermal_suspend(struct device *device) +static int intel_pch_thermal_suspend_noirq(struct device *device) { struct pch_thermal_device *ptd = dev_get_drvdata(device); @@ -495,7 +508,7 @@ static const struct pci_device_id intel_pch_thermal_id[] = { MODULE_DEVICE_TABLE(pci, intel_pch_thermal_id); static const struct dev_pm_ops intel_pch_pm_ops = { - .suspend = intel_pch_thermal_suspend, + .suspend_noirq = intel_pch_thermal_suspend_noirq, .resume = intel_pch_thermal_resume, }; diff --git a/drivers/thermal/k3_bandgap.c b/drivers/thermal/k3_bandgap.c index 35f41e8a0b75..5d0b3ffc6f46 100644 --- a/drivers/thermal/k3_bandgap.c +++ b/drivers/thermal/k3_bandgap.c @@ -16,6 +16,8 @@ #include <linux/thermal.h> #include <linux/types.h> +#include "thermal_hwmon.h" + #define K3_VTM_DEVINFO_PWR0_OFFSET 0x4 #define K3_VTM_DEVINFO_PWR0_TEMPSENS_CT_MASK 0xf0 #define K3_VTM_TMPSENS0_CTRL_OFFSET 0x80 @@ -219,6 +221,9 @@ static int k3_bandgap_probe(struct platform_device *pdev) ret = PTR_ERR(data[id].tzd); goto err_alloc; } + + if (devm_thermal_add_hwmon_sysfs(data[id].tzd)) + dev_warn(dev, "Failed to add hwmon sysfs attributes\n"); } platform_set_drvdata(pdev, bgp); diff --git a/drivers/thermal/k3_j72xx_bandgap.c b/drivers/thermal/k3_j72xx_bandgap.c new file mode 100644 index 000000000000..64e323158952 --- /dev/null +++ b/drivers/thermal/k3_j72xx_bandgap.c @@ -0,0 +1,566 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * TI Bandgap temperature sensor driver for J72XX SoC Family + * + * Copyright (C) 2021 Texas Instruments Incorporated - http://www.ti.com/ + */ + +#include <linux/math.h> +#include <linux/math64.h> +#include <linux/module.h> +#include <linux/init.h> +#include <linux/kernel.h> +#include <linux/pm_runtime.h> +#include <linux/err.h> +#include <linux/types.h> +#include <linux/of_platform.h> +#include <linux/io.h> +#include <linux/thermal.h> +#include <linux/of.h> +#include <linux/delay.h> +#include <linux/slab.h> + +#define K3_VTM_DEVINFO_PWR0_OFFSET 0x4 +#define K3_VTM_DEVINFO_PWR0_TEMPSENS_CT_MASK 0xf0 +#define K3_VTM_TMPSENS0_CTRL_OFFSET 0x300 +#define K3_VTM_MISC_CTRL_OFFSET 0xc +#define K3_VTM_TMPSENS_STAT_OFFSET 0x8 +#define K3_VTM_ANYMAXT_OUTRG_ALERT_EN 0x1 +#define K3_VTM_MISC_CTRL2_OFFSET 0x10 +#define K3_VTM_TS_STAT_DTEMP_MASK 0x3ff +#define K3_VTM_MAX_NUM_TS 8 +#define K3_VTM_TMPSENS_CTRL_SOC BIT(5) +#define K3_VTM_TMPSENS_CTRL_CLRZ BIT(6) +#define K3_VTM_TMPSENS_CTRL_CLKON_REQ BIT(7) +#define K3_VTM_TMPSENS_CTRL_MAXT_OUTRG_EN BIT(11) + +#define K3_VTM_CORRECTION_TEMP_CNT 3 + +#define MINUS40CREF 5 +#define PLUS30CREF 253 +#define PLUS125CREF 730 +#define PLUS150CREF 940 + +#define TABLE_SIZE 1024 +#define MAX_TEMP 123000 +#define COOL_DOWN_TEMP 105000 + +#define FACTORS_REDUCTION 13 +static int *derived_table; + +static int compute_value(int index, const s64 *factors, int nr_factors, + int reduction) +{ + s64 value = 0; + int i; + + for (i = 0; i < nr_factors; i++) + value += factors[i] * int_pow(index, i); + + return (int)div64_s64(value, int_pow(10, reduction)); +} + +static void init_table(int factors_size, int *table, const s64 *factors) +{ + int i; + + for (i = 0; i < TABLE_SIZE; i++) + table[i] = compute_value(i, factors, factors_size, + FACTORS_REDUCTION); +} + +/** + * struct err_values - structure containing error/reference values + * @refs: reference error values for -40C, 30C, 125C & 150C + * @errs: Actual error values for -40C, 30C, 125C & 150C read from the efuse + */ +struct err_values { + int refs[4]; + int errs[4]; +}; + +static void create_table_segments(struct err_values *err_vals, int seg, + int *ref_table) +{ + int m = 0, c, num, den, i, err, idx1, idx2, err1, err2, ref1, ref2; + + if (seg == 0) + idx1 = 0; + else + idx1 = err_vals->refs[seg]; + + idx2 = err_vals->refs[seg + 1]; + err1 = err_vals->errs[seg]; + err2 = err_vals->errs[seg + 1]; + ref1 = err_vals->refs[seg]; + ref2 = err_vals->refs[seg + 1]; + + /* + * Calculate the slope with adc values read from the register + * as the y-axis param and err in adc value as x-axis param + */ + num = ref2 - ref1; + den = err2 - err1; + if (den) + m = num / den; + c = ref2 - m * err2; + + /* + * Take care of divide by zero error if error values are same + * Or when the slope is 0 + */ + if (den != 0 && m != 0) { + for (i = idx1; i <= idx2; i++) { + err = (i - c) / m; + if (((i + err) < 0) || ((i + err) >= TABLE_SIZE)) + continue; + derived_table[i] = ref_table[i + err]; + } + } else { /* Constant error take care of divide by zero */ + for (i = idx1; i <= idx2; i++) { + if (((i + err1) < 0) || ((i + err1) >= TABLE_SIZE)) + continue; + derived_table[i] = ref_table[i + err1]; + } + } +} + +static int prep_lookup_table(struct err_values *err_vals, int *ref_table) +{ + int inc, i, seg; + + /* + * Fill up the lookup table under 3 segments + * region -40C to +30C + * region +30C to +125C + * region +125C to +150C + */ + for (seg = 0; seg < 3; seg++) + create_table_segments(err_vals, seg, ref_table); + + /* Get to the first valid temperature */ + i = 0; + while (!derived_table[i]) + i++; + + /* + * Get to the last zero index and back fill the temperature for + * sake of continuity + */ + if (i) { + /* 300 milli celsius steps */ + while (i--) + derived_table[i] = derived_table[i + 1] - 300; + /* case 0 */ + derived_table[i] = derived_table[i + 1] - 300; + } + + /* + * Fill the last trailing 0s which are unfilled with increments of + * 100 milli celsius till 1023 code + */ + i = TABLE_SIZE - 1; + while (!derived_table[i]) + i--; + + i++; + inc = 1; + while (i < TABLE_SIZE) { + derived_table[i] = derived_table[i - 1] + inc * 100; + i++; + } + + return 0; +} + +struct k3_thermal_data; + +struct k3_j72xx_bandgap { + struct device *dev; + void __iomem *base; + void __iomem *cfg2_base; + void __iomem *fuse_base; + struct k3_thermal_data *ts_data[K3_VTM_MAX_NUM_TS]; +}; + +/* common data structures */ +struct k3_thermal_data { + struct k3_j72xx_bandgap *bgp; + u32 ctrl_offset; + u32 stat_offset; +}; + +static int two_cmp(int tmp, int mask) +{ + tmp = ~(tmp); + tmp &= mask; + tmp += 1; + + /* Return negative value */ + return (0 - tmp); +} + +static unsigned int vtm_get_best_value(unsigned int s0, unsigned int s1, + unsigned int s2) +{ + int d01 = abs(s0 - s1); + int d02 = abs(s0 - s2); + int d12 = abs(s1 - s2); + + if (d01 <= d02 && d01 <= d12) + return (s0 + s1) / 2; + + if (d02 <= d01 && d02 <= d12) + return (s0 + s2) / 2; + + return (s1 + s2) / 2; +} + +static inline int k3_bgp_read_temp(struct k3_thermal_data *devdata, + int *temp) +{ + struct k3_j72xx_bandgap *bgp; + unsigned int dtemp, s0, s1, s2; + + bgp = devdata->bgp; + /* + * Errata is applicable for am654 pg 1.0 silicon/J7ES. There + * is a variation of the order for certain degree centigrade on AM654. + * Work around that by getting the average of two closest + * readings out of three readings everytime we want to + * report temperatures. + * + * Errata workaround. + */ + s0 = readl(bgp->base + devdata->stat_offset) & + K3_VTM_TS_STAT_DTEMP_MASK; + s1 = readl(bgp->base + devdata->stat_offset) & + K3_VTM_TS_STAT_DTEMP_MASK; + s2 = readl(bgp->base + devdata->stat_offset) & + K3_VTM_TS_STAT_DTEMP_MASK; + dtemp = vtm_get_best_value(s0, s1, s2); + + if (dtemp < 0 || dtemp >= TABLE_SIZE) + return -EINVAL; + + *temp = derived_table[dtemp]; + + return 0; +} + +/* Get temperature callback function for thermal zone */ +static int k3_thermal_get_temp(void *devdata, int *temp) +{ + struct k3_thermal_data *data = devdata; + int ret = 0; + + ret = k3_bgp_read_temp(data, temp); + if (ret) + return ret; + + return ret; +} + +static const struct thermal_zone_of_device_ops k3_of_thermal_ops = { + .get_temp = k3_thermal_get_temp, +}; + +static int k3_j72xx_bandgap_temp_to_adc_code(int temp) +{ + int low = 0, high = TABLE_SIZE - 1, mid; + + if (temp > 160000 || temp < -50000) + return -EINVAL; + + /* Binary search to find the adc code */ + while (low < (high - 1)) { + mid = (low + high) / 2; + if (temp <= derived_table[mid]) + high = mid; + else + low = mid; + } + + return mid; +} + +static void get_efuse_values(int id, struct k3_thermal_data *data, int *err, + struct k3_j72xx_bandgap *bgp) +{ + int i, tmp, pow; + int ct_offsets[5][K3_VTM_CORRECTION_TEMP_CNT] = { + { 0x0, 0x8, 0x4 }, + { 0x0, 0x8, 0x4 }, + { 0x0, -1, 0x4 }, + { 0x0, 0xC, -1 }, + { 0x0, 0xc, 0x8 } + }; + int ct_bm[5][K3_VTM_CORRECTION_TEMP_CNT] = { + { 0x3f, 0x1fe000, 0x1ff }, + { 0xfc0, 0x1fe000, 0x3fe00 }, + { 0x3f000, 0x7f800000, 0x7fc0000 }, + { 0xfc0000, 0x1fe0, 0x1f800000 }, + { 0x3f000000, 0x1fe000, 0x1ff0 } + }; + + for (i = 0; i < 3; i++) { + /* Extract the offset value using bit-mask */ + if (ct_offsets[id][i] == -1 && i == 1) { + /* 25C offset Case of Sensor 2 split between 2 regs */ + tmp = (readl(bgp->fuse_base + 0x8) & 0xE0000000) >> (29); + tmp |= ((readl(bgp->fuse_base + 0xC) & 0x1F) << 3); + pow = tmp & 0x80; + } else if (ct_offsets[id][i] == -1 && i == 2) { + /* 125C Case of Sensor 3 split between 2 regs */ + tmp = (readl(bgp->fuse_base + 0x4) & 0xF8000000) >> (27); + tmp |= ((readl(bgp->fuse_base + 0x8) & 0xF) << 5); + pow = tmp & 0x100; + } else { + tmp = readl(bgp->fuse_base + ct_offsets[id][i]); + tmp &= ct_bm[id][i]; + tmp = tmp >> __ffs(ct_bm[id][i]); + + /* Obtain the sign bit pow*/ + pow = ct_bm[id][i] >> __ffs(ct_bm[id][i]); + pow += 1; + pow /= 2; + } + + /* Check for negative value */ + if (tmp & pow) { + /* 2's complement value */ + tmp = two_cmp(tmp, ct_bm[id][i] >> __ffs(ct_bm[id][i])); + } + err[i] = tmp; + } + + /* Err value for 150C is set to 0 */ + err[i] = 0; +} + +static void print_look_up_table(struct device *dev, int *ref_table) +{ + int i; + + dev_dbg(dev, "The contents of derived array\n"); + dev_dbg(dev, "Code Temperature\n"); + for (i = 0; i < TABLE_SIZE; i++) + dev_dbg(dev, "%d %d %d\n", i, derived_table[i], ref_table[i]); +} + +struct k3_j72xx_bandgap_data { + unsigned int has_errata_i2128; +}; + +static int k3_j72xx_bandgap_probe(struct platform_device *pdev) +{ + int ret = 0, cnt, val, id; + int high_max, low_temp; + struct resource *res; + struct device *dev = &pdev->dev; + struct k3_j72xx_bandgap *bgp; + struct k3_thermal_data *data; + int workaround_needed = 0; + const struct k3_j72xx_bandgap_data *driver_data; + struct thermal_zone_device *ti_thermal; + int *ref_table; + struct err_values err_vals; + + const s64 golden_factors[] = { + -490019999999999936, + 3251200000000000, + -1705800000000, + 603730000, + -92627, + }; + + const s64 pvt_wa_factors[] = { + -415230000000000000, + 3126600000000000, + -1157800000000, + }; + + bgp = devm_kzalloc(&pdev->dev, sizeof(*bgp), GFP_KERNEL); + if (!bgp) + return -ENOMEM; + + bgp->dev = dev; + res = platform_get_resource(pdev, IORESOURCE_MEM, 0); + bgp->base = devm_ioremap_resource(dev, res); + if (IS_ERR(bgp->base)) + return PTR_ERR(bgp->base); + + res = platform_get_resource(pdev, IORESOURCE_MEM, 1); + bgp->cfg2_base = devm_ioremap_resource(dev, res); + if (IS_ERR(bgp->cfg2_base)) + return PTR_ERR(bgp->cfg2_base); + + res = platform_get_resource(pdev, IORESOURCE_MEM, 2); + bgp->fuse_base = devm_ioremap_resource(dev, res); + if (IS_ERR(bgp->fuse_base)) + return PTR_ERR(bgp->fuse_base); + + driver_data = of_device_get_match_data(dev); + if (driver_data) + workaround_needed = driver_data->has_errata_i2128; + + pm_runtime_enable(dev); + ret = pm_runtime_get_sync(dev); + if (ret < 0) { + pm_runtime_put_noidle(dev); + pm_runtime_disable(dev); + return ret; + } + + /* Get the sensor count in the VTM */ + val = readl(bgp->base + K3_VTM_DEVINFO_PWR0_OFFSET); + cnt = val & K3_VTM_DEVINFO_PWR0_TEMPSENS_CT_MASK; + cnt >>= __ffs(K3_VTM_DEVINFO_PWR0_TEMPSENS_CT_MASK); + + data = devm_kcalloc(bgp->dev, cnt, sizeof(*data), GFP_KERNEL); + if (!data) { + ret = -ENOMEM; + goto err_alloc; + } + + ref_table = kzalloc(sizeof(*ref_table) * TABLE_SIZE, GFP_KERNEL); + if (!ref_table) { + ret = -ENOMEM; + goto err_alloc; + } + + derived_table = devm_kzalloc(bgp->dev, sizeof(*derived_table) * TABLE_SIZE, + GFP_KERNEL); + if (!derived_table) { + ret = -ENOMEM; + goto err_alloc; + } + + /* Workaround not needed if bit30/bit31 is set even for J721e */ + if (workaround_needed && (readl(bgp->fuse_base + 0x0) & 0xc0000000) == 0xc0000000) + workaround_needed = false; + + dev_dbg(bgp->dev, "Work around %sneeded\n", + workaround_needed ? "not " : ""); + + if (!workaround_needed) + init_table(5, ref_table, golden_factors); + else + init_table(3, ref_table, pvt_wa_factors); + + /* Register the thermal sensors */ + for (id = 0; id < cnt; id++) { + data[id].bgp = bgp; + data[id].ctrl_offset = K3_VTM_TMPSENS0_CTRL_OFFSET + id * 0x20; + data[id].stat_offset = data[id].ctrl_offset + + K3_VTM_TMPSENS_STAT_OFFSET; + + if (workaround_needed) { + /* ref adc values for -40C, 30C & 125C respectively */ + err_vals.refs[0] = MINUS40CREF; + err_vals.refs[1] = PLUS30CREF; + err_vals.refs[2] = PLUS125CREF; + err_vals.refs[3] = PLUS150CREF; + get_efuse_values(id, &data[id], err_vals.errs, bgp); + } + + if (id == 0 && workaround_needed) + prep_lookup_table(&err_vals, ref_table); + else if (id == 0 && !workaround_needed) + memcpy(derived_table, ref_table, TABLE_SIZE * 4); + + val = readl(data[id].bgp->cfg2_base + data[id].ctrl_offset); + val |= (K3_VTM_TMPSENS_CTRL_MAXT_OUTRG_EN | + K3_VTM_TMPSENS_CTRL_SOC | + K3_VTM_TMPSENS_CTRL_CLRZ | BIT(4)); + writel(val, data[id].bgp->cfg2_base + data[id].ctrl_offset); + + bgp->ts_data[id] = &data[id]; + ti_thermal = + devm_thermal_zone_of_sensor_register(bgp->dev, id, + &data[id], + &k3_of_thermal_ops); + if (IS_ERR(ti_thermal)) { + dev_err(bgp->dev, "thermal zone device is NULL\n"); + ret = PTR_ERR(ti_thermal); + goto err_alloc; + } + } + + /* + * Program TSHUT thresholds + * Step 1: set the thresholds to ~123C and 105C WKUP_VTM_MISC_CTRL2 + * Step 2: WKUP_VTM_TMPSENS_CTRL_j set the MAXT_OUTRG_EN bit + * This is already taken care as per of init + * Step 3: WKUP_VTM_MISC_CTRL set the ANYMAXT_OUTRG_ALERT_EN bit + */ + high_max = k3_j72xx_bandgap_temp_to_adc_code(MAX_TEMP); + low_temp = k3_j72xx_bandgap_temp_to_adc_code(COOL_DOWN_TEMP); + + writel((low_temp << 16) | high_max, data[0].bgp->cfg2_base + + K3_VTM_MISC_CTRL2_OFFSET); + mdelay(100); + writel(K3_VTM_ANYMAXT_OUTRG_ALERT_EN, data[0].bgp->cfg2_base + + K3_VTM_MISC_CTRL_OFFSET); + + platform_set_drvdata(pdev, bgp); + + print_look_up_table(dev, ref_table); + /* + * Now that the derived_table has the appropriate look up values + * Free up the ref_table + */ + kfree(ref_table); + + return 0; + +err_alloc: + pm_runtime_put_sync(&pdev->dev); + pm_runtime_disable(&pdev->dev); + + return ret; +} + +static int k3_j72xx_bandgap_remove(struct platform_device *pdev) +{ + pm_runtime_put_sync(&pdev->dev); + pm_runtime_disable(&pdev->dev); + + return 0; +} + +const struct k3_j72xx_bandgap_data k3_j72xx_bandgap_j721e_data = { + .has_errata_i2128 = 1, +}; + +const struct k3_j72xx_bandgap_data k3_j72xx_bandgap_j7200_data = { + .has_errata_i2128 = 0, +}; + +static const struct of_device_id of_k3_j72xx_bandgap_match[] = { + { + .compatible = "ti,j721e-vtm", + .data = &k3_j72xx_bandgap_j721e_data, + }, + { + .compatible = "ti,j7200-vtm", + .data = &k3_j72xx_bandgap_j7200_data, + }, + { /* sentinel */ }, +}; +MODULE_DEVICE_TABLE(of, of_k3_j72xx_bandgap_match); + +static struct platform_driver k3_j72xx_bandgap_sensor_driver = { + .probe = k3_j72xx_bandgap_probe, + .remove = k3_j72xx_bandgap_remove, + .driver = { + .name = "k3-j72xx-soc-thermal", + .of_match_table = of_k3_j72xx_bandgap_match, + }, +}; + +module_platform_driver(k3_j72xx_bandgap_sensor_driver); + +MODULE_DESCRIPTION("K3 bandgap temperature sensor driver"); +MODULE_LICENSE("GPL"); +MODULE_AUTHOR("J Keerthy <j-keerthy@ti.com>"); diff --git a/drivers/thermal/qcom/lmh.c b/drivers/thermal/qcom/lmh.c index c7f91cbdccc7..d3d9b9fa49e8 100644 --- a/drivers/thermal/qcom/lmh.c +++ b/drivers/thermal/qcom/lmh.c @@ -220,6 +220,7 @@ static int lmh_probe(struct platform_device *pdev) } static const struct of_device_id lmh_table[] = { + { .compatible = "qcom,sc8180x-lmh", }, { .compatible = "qcom,sdm845-lmh", .data = (void *)LMH_ENABLE_ALGOS}, { .compatible = "qcom,sm8150-lmh", }, {} diff --git a/drivers/thermal/qcom/qcom-spmi-adc-tm5.c b/drivers/thermal/qcom/qcom-spmi-adc-tm5.c index 824671cf494a..d9c9c975f931 100644 --- a/drivers/thermal/qcom/qcom-spmi-adc-tm5.c +++ b/drivers/thermal/qcom/qcom-spmi-adc-tm5.c @@ -4,7 +4,10 @@ * * Based on original driver: * Copyright (c) 2012-2020, The Linux Foundation. All rights reserved. + * + * Copyright (c) 2022 Qualcomm Innovation Center, Inc. All rights reserved. */ + #include <linux/bitfield.h> #include <linux/iio/adc/qcom-vadc-common.h> #include <linux/iio/consumer.h> @@ -15,6 +18,7 @@ #include <linux/platform_device.h> #include <linux/regmap.h> #include <linux/thermal.h> +#include <asm-generic/unaligned.h> /* * Thermal monitoring block consists of 8 (ADC_TM5_NUM_CHANNELS) channels. Each @@ -71,6 +75,60 @@ #define ADC_TM5_M_HIGH_THR_INT_EN BIT(1) #define ADC_TM5_M_LOW_THR_INT_EN BIT(0) +#define ADC_TM_GEN2_STATUS1 0x08 +#define ADC_TM_GEN2_STATUS_LOW_SET 0x09 +#define ADC_TM_GEN2_STATUS_LOW_CLR 0x0a +#define ADC_TM_GEN2_STATUS_HIGH_SET 0x0b +#define ADC_TM_GEN2_STATUS_HIGH_CLR 0x0c + +#define ADC_TM_GEN2_CFG_HS_SET 0x0d +#define ADC_TM_GEN2_CFG_HS_FLAG BIT(0) +#define ADC_TM_GEN2_CFG_HS_CLR 0x0e + +#define ADC_TM_GEN2_SID 0x40 + +#define ADC_TM_GEN2_CH_CTL 0x41 +#define ADC_TM_GEN2_TM_CH_SEL GENMASK(7, 5) +#define ADC_TM_GEN2_MEAS_INT_SEL GENMASK(3, 2) + +#define ADC_TM_GEN2_ADC_DIG_PARAM 0x42 +#define ADC_TM_GEN2_CTL_CAL_SEL GENMASK(5, 4) +#define ADC_TM_GEN2_CTL_DEC_RATIO_MASK GENMASK(3, 2) + +#define ADC_TM_GEN2_FAST_AVG_CTL 0x43 +#define ADC_TM_GEN2_FAST_AVG_EN BIT(7) + +#define ADC_TM_GEN2_ADC_CH_SEL_CTL 0x44 + +#define ADC_TM_GEN2_DELAY_CTL 0x45 +#define ADC_TM_GEN2_HW_SETTLE_DELAY GENMASK(3, 0) + +#define ADC_TM_GEN2_EN_CTL1 0x46 +#define ADC_TM_GEN2_EN BIT(7) + +#define ADC_TM_GEN2_CONV_REQ 0x47 +#define ADC_TM_GEN2_CONV_REQ_EN BIT(7) + +#define ADC_TM_GEN2_LOW_THR0 0x49 +#define ADC_TM_GEN2_LOW_THR1 0x4a +#define ADC_TM_GEN2_HIGH_THR0 0x4b +#define ADC_TM_GEN2_HIGH_THR1 0x4c +#define ADC_TM_GEN2_LOWER_MASK(n) ((n) & GENMASK(7, 0)) +#define ADC_TM_GEN2_UPPER_MASK(n) (((n) & GENMASK(15, 8)) >> 8) + +#define ADC_TM_GEN2_MEAS_IRQ_EN 0x4d +#define ADC_TM_GEN2_MEAS_EN BIT(7) +#define ADC_TM5_GEN2_HIGH_THR_INT_EN BIT(1) +#define ADC_TM5_GEN2_LOW_THR_INT_EN BIT(0) + +#define ADC_TM_GEN2_MEAS_INT_LSB 0x50 +#define ADC_TM_GEN2_MEAS_INT_MSB 0x51 +#define ADC_TM_GEN2_MEAS_INT_MODE 0x52 + +#define ADC_TM_GEN2_Mn_DATA0(n) ((n * 2) + 0xa0) +#define ADC_TM_GEN2_Mn_DATA1(n) ((n * 2) + 0xa1) +#define ADC_TM_GEN2_DATA_SHIFT 8 + enum adc5_timer_select { ADC5_TIMER_SEL_1 = 0, ADC5_TIMER_SEL_2, @@ -78,11 +136,11 @@ enum adc5_timer_select { ADC5_TIMER_SEL_NONE, }; -struct adc_tm5_data { - const u32 full_scale_code_volt; - unsigned int *decimation; - unsigned int *hw_settle; - bool is_hc; +enum adc5_gen { + ADC_TM5, + ADC_TM_HC, + ADC_TM5_GEN2, + ADC_TM5_MAX }; enum adc_tm5_cal_method { @@ -91,7 +149,28 @@ enum adc_tm5_cal_method { ADC_TM5_ABSOLUTE_CAL }; +enum adc_tm_gen2_time_select { + MEAS_INT_50MS = 0, + MEAS_INT_100MS, + MEAS_INT_1S, + MEAS_INT_SET, + MEAS_INT_NONE, +}; + struct adc_tm5_chip; +struct adc_tm5_channel; + +struct adc_tm5_data { + const u32 full_scale_code_volt; + unsigned int *decimation; + unsigned int *hw_settle; + int (*disable_channel)(struct adc_tm5_channel *channel); + int (*configure)(struct adc_tm5_channel *channel, int low, int high); + irqreturn_t (*isr)(int irq, void *data); + int (*init)(struct adc_tm5_chip *chip); + char *irq_name; + int gen; +}; /** * struct adc_tm5_channel - ADC Thermal Monitoring channel data. @@ -101,6 +180,12 @@ struct adc_tm5_chip; * @prescale: channel scaling performed on the input signal. * @hw_settle_time: the time between AMUX being configured and the * start of conversion. + * @decimation: sampling rate supported for the channel. + * @avg_samples: ability to provide single result from the ADC + * that is an average of multiple measurements. + * @high_thr_en: channel upper voltage threshold enable state. + * @low_thr_en: channel lower voltage threshold enable state. + * @meas_en: recurring measurement enable state * @iio: IIO channel instance used by this channel. * @chip: ADC TM chip instance. * @tzd: thermal zone device used by this channel. @@ -111,6 +196,11 @@ struct adc_tm5_channel { enum adc_tm5_cal_method cal_method; unsigned int prescale; unsigned int hw_settle_time; + unsigned int decimation; /* For Gen2 ADC_TM */ + unsigned int avg_samples; /* For Gen2 ADC_TM */ + bool high_thr_en; /* For Gen2 ADC_TM */ + bool low_thr_en; /* For Gen2 ADC_TM */ + bool meas_en; /* For Gen2 ADC_TM */ struct iio_channel *iio; struct adc_tm5_chip *chip; struct thermal_zone_device *tzd; @@ -124,9 +214,15 @@ struct adc_tm5_channel { * @channels: array of ADC TM channel data. * @nchannels: amount of channels defined/allocated * @decimation: sampling rate supported for the channel. + * Applies to all channels, used only on Gen1 ADC_TM. * @avg_samples: ability to provide single result from the ADC - * that is an average of multiple measurements. + * that is an average of multiple measurements. Applies to all + * channels, used only on Gen1 ADC_TM. * @base: base address of TM registers. + * @adc_mutex_lock: ADC_TM mutex lock, used only on Gen2 ADC_TM. + * It is used to ensure only one ADC channel configuration + * is done at a time using the shared set of configuration + * registers. */ struct adc_tm5_chip { struct regmap *regmap; @@ -137,22 +233,7 @@ struct adc_tm5_chip { unsigned int decimation; unsigned int avg_samples; u16 base; -}; - -static const struct adc_tm5_data adc_tm5_data_pmic = { - .full_scale_code_volt = 0x70e4, - .decimation = (unsigned int []) { 250, 420, 840 }, - .hw_settle = (unsigned int []) { 15, 100, 200, 300, 400, 500, 600, 700, - 1000, 2000, 4000, 8000, 16000, 32000, - 64000, 128000 }, -}; - -static const struct adc_tm5_data adc_tm_hc_data_pmic = { - .full_scale_code_volt = 0x70e4, - .decimation = (unsigned int []) { 256, 512, 1024 }, - .hw_settle = (unsigned int []) { 0, 100, 200, 300, 400, 500, 600, 700, - 1000, 2000, 4000, 6000, 8000, 10000 }, - .is_hc = true, + struct mutex adc_mutex_lock; }; static int adc_tm5_read(struct adc_tm5_chip *adc_tm, u16 offset, u8 *data, int len) @@ -219,6 +300,61 @@ static irqreturn_t adc_tm5_isr(int irq, void *data) return IRQ_HANDLED; } +static irqreturn_t adc_tm5_gen2_isr(int irq, void *data) +{ + struct adc_tm5_chip *chip = data; + u8 status_low, status_high; + int ret, i; + + ret = adc_tm5_read(chip, ADC_TM_GEN2_STATUS_LOW_CLR, &status_low, sizeof(status_low)); + if (ret) { + dev_err(chip->dev, "read status_low failed: %d\n", ret); + return IRQ_HANDLED; + } + + ret = adc_tm5_read(chip, ADC_TM_GEN2_STATUS_HIGH_CLR, &status_high, sizeof(status_high)); + if (ret) { + dev_err(chip->dev, "read status_high failed: %d\n", ret); + return IRQ_HANDLED; + } + + ret = adc_tm5_write(chip, ADC_TM_GEN2_STATUS_LOW_CLR, &status_low, sizeof(status_low)); + if (ret < 0) { + dev_err(chip->dev, "clear status low failed with %d\n", ret); + return IRQ_HANDLED; + } + + ret = adc_tm5_write(chip, ADC_TM_GEN2_STATUS_HIGH_CLR, &status_high, sizeof(status_high)); + if (ret < 0) { + dev_err(chip->dev, "clear status high failed with %d\n", ret); + return IRQ_HANDLED; + } + + for (i = 0; i < chip->nchannels; i++) { + bool upper_set = false, lower_set = false; + unsigned int ch = chip->channels[i].channel; + + /* No TZD, we warned at the boot time */ + if (!chip->channels[i].tzd) + continue; + + if (!chip->channels[i].meas_en) + continue; + + lower_set = (status_low & BIT(ch)) && + (chip->channels[i].low_thr_en); + + upper_set = (status_high & BIT(ch)) && + (chip->channels[i].high_thr_en); + + if (upper_set || lower_set) + thermal_zone_device_update(chip->channels[i].tzd, + THERMAL_EVENT_UNSPECIFIED); + } + + return IRQ_HANDLED; +} + static int adc_tm5_get_temp(void *data, int *temp) { struct adc_tm5_channel *channel = data; @@ -249,6 +385,104 @@ static int adc_tm5_disable_channel(struct adc_tm5_channel *channel) 0); } +#define ADC_TM_GEN2_POLL_DELAY_MIN_US 100 +#define ADC_TM_GEN2_POLL_DELAY_MAX_US 110 +#define ADC_TM_GEN2_POLL_RETRY_COUNT 3 + +static int32_t adc_tm5_gen2_conv_req(struct adc_tm5_chip *chip) +{ + int ret; + u8 data; + unsigned int count; + + data = ADC_TM_GEN2_EN; + ret = adc_tm5_write(chip, ADC_TM_GEN2_EN_CTL1, &data, 1); + if (ret < 0) { + dev_err(chip->dev, "adc-tm enable failed with %d\n", ret); + return ret; + } + + data = ADC_TM_GEN2_CFG_HS_FLAG; + ret = adc_tm5_write(chip, ADC_TM_GEN2_CFG_HS_SET, &data, 1); + if (ret < 0) { + dev_err(chip->dev, "adc-tm handshake failed with %d\n", ret); + return ret; + } + + data = ADC_TM_GEN2_CONV_REQ_EN; + ret = adc_tm5_write(chip, ADC_TM_GEN2_CONV_REQ, &data, 1); + if (ret < 0) { + dev_err(chip->dev, "adc-tm request conversion failed with %d\n", ret); + return ret; + } + + /* + * SW sets a handshake bit and waits for PBS to clear it + * before the next conversion request can be queued. + */ + + for (count = 0; count < ADC_TM_GEN2_POLL_RETRY_COUNT; count++) { + ret = adc_tm5_read(chip, ADC_TM_GEN2_CFG_HS_SET, &data, sizeof(data)); + if (ret < 0) { + dev_err(chip->dev, "adc-tm read failed with %d\n", ret); + return ret; + } + + if (!(data & ADC_TM_GEN2_CFG_HS_FLAG)) + return ret; + usleep_range(ADC_TM_GEN2_POLL_DELAY_MIN_US, + ADC_TM_GEN2_POLL_DELAY_MAX_US); + } + + dev_err(chip->dev, "adc-tm conversion request handshake timed out\n"); + + return -ETIMEDOUT; +} + +static int adc_tm5_gen2_disable_channel(struct adc_tm5_channel *channel) +{ + struct adc_tm5_chip *chip = channel->chip; + int ret; + u8 val; + + mutex_lock(&chip->adc_mutex_lock); + + channel->meas_en = false; + channel->high_thr_en = false; + channel->low_thr_en = false; + + ret = adc_tm5_read(chip, ADC_TM_GEN2_CH_CTL, &val, sizeof(val)); + if (ret < 0) { + dev_err(chip->dev, "adc-tm block read failed with %d\n", ret); + goto disable_fail; + } + + val &= ~ADC_TM_GEN2_TM_CH_SEL; + val |= FIELD_PREP(ADC_TM_GEN2_TM_CH_SEL, channel->channel); + + ret = adc_tm5_write(chip, ADC_TM_GEN2_CH_CTL, &val, 1); + if (ret < 0) { + dev_err(chip->dev, "adc-tm channel disable failed with %d\n", ret); + goto disable_fail; + } + + val = 0; + ret = adc_tm5_write(chip, ADC_TM_GEN2_MEAS_IRQ_EN, &val, 1); + if (ret < 0) { + dev_err(chip->dev, "adc-tm interrupt disable failed with %d\n", ret); + goto disable_fail; + } + + + ret = adc_tm5_gen2_conv_req(channel->chip); + if (ret < 0) + dev_err(chip->dev, "adc-tm channel configure failed with %d\n", ret); + +disable_fail: + mutex_unlock(&chip->adc_mutex_lock); + return ret; +} + static int adc_tm5_enable(struct adc_tm5_chip *chip) { int ret; @@ -291,8 +525,7 @@ static int adc_tm5_configure(struct adc_tm5_channel *channel, int low, int high) u16 adc_code = qcom_adc_tm5_temp_volt_scale(channel->prescale, chip->data->full_scale_code_volt, high); - buf[1] = adc_code & 0xff; - buf[2] = adc_code >> 8; + put_unaligned_le16(adc_code, &buf[1]); buf[7] |= ADC_TM5_M_LOW_THR_INT_EN; } else { buf[7] &= ~ADC_TM5_M_LOW_THR_INT_EN; @@ -303,8 +536,7 @@ static int adc_tm5_configure(struct adc_tm5_channel *channel, int low, int high) u16 adc_code = qcom_adc_tm5_temp_volt_scale(channel->prescale, chip->data->full_scale_code_volt, low); - buf[3] = adc_code & 0xff; - buf[4] = adc_code >> 8; + put_unaligned_le16(adc_code, &buf[3]); buf[7] |= ADC_TM5_M_HIGH_THR_INT_EN; } else { buf[7] &= ~ADC_TM5_M_HIGH_THR_INT_EN; @@ -329,6 +561,82 @@ static int adc_tm5_configure(struct adc_tm5_channel *channel, int low, int high) return adc_tm5_enable(chip); } +static int adc_tm5_gen2_configure(struct adc_tm5_channel *channel, int low, int high) +{ + struct adc_tm5_chip *chip = channel->chip; + int ret; + u8 buf[14]; + u16 adc_code; + + mutex_lock(&chip->adc_mutex_lock); + + channel->meas_en = true; + + ret = adc_tm5_read(chip, ADC_TM_GEN2_SID, buf, sizeof(buf)); + if (ret < 0) { + dev_err(chip->dev, "adc-tm block read failed with %d\n", ret); + goto config_fail; + } + + /* Set SID from virtual channel number */ + buf[0] = channel->adc_channel >> 8; + + /* Set TM channel number used and measurement interval */ + buf[1] &= ~ADC_TM_GEN2_TM_CH_SEL; + buf[1] |= FIELD_PREP(ADC_TM_GEN2_TM_CH_SEL, channel->channel); + buf[1] &= ~ADC_TM_GEN2_MEAS_INT_SEL; + buf[1] |= FIELD_PREP(ADC_TM_GEN2_MEAS_INT_SEL, MEAS_INT_1S); + + buf[2] &= ~ADC_TM_GEN2_CTL_DEC_RATIO_MASK; + buf[2] |= FIELD_PREP(ADC_TM_GEN2_CTL_DEC_RATIO_MASK, channel->decimation); + buf[2] &= ~ADC_TM_GEN2_CTL_CAL_SEL; + buf[2] |= FIELD_PREP(ADC_TM_GEN2_CTL_CAL_SEL, channel->cal_method); + + buf[3] = channel->avg_samples | ADC_TM_GEN2_FAST_AVG_EN; + + buf[4] = channel->adc_channel & 0xff; + + buf[5] = channel->hw_settle_time & ADC_TM_GEN2_HW_SETTLE_DELAY; + + /* High temperature corresponds to low voltage threshold */ + if (high != INT_MAX) { + channel->low_thr_en = true; + adc_code = qcom_adc_tm5_gen2_temp_res_scale(high); + put_unaligned_le16(adc_code, &buf[9]); + } else { + channel->low_thr_en = false; + } + + /* Low temperature corresponds to high voltage threshold */ + if (low != -INT_MAX) { + channel->high_thr_en = true; + adc_code = qcom_adc_tm5_gen2_temp_res_scale(low); + put_unaligned_le16(adc_code, &buf[11]); + } else { + channel->high_thr_en = false; + } + + buf[13] = ADC_TM_GEN2_MEAS_EN; + if (channel->high_thr_en) + buf[13] |= ADC_TM5_GEN2_HIGH_THR_INT_EN; + if (channel->low_thr_en) + buf[13] |= ADC_TM5_GEN2_LOW_THR_INT_EN; + + ret = adc_tm5_write(chip, ADC_TM_GEN2_SID, buf, sizeof(buf)); + if (ret) { + dev_err(chip->dev, "channel %d params write failed: %d\n", channel->channel, ret); + goto config_fail; + } + + ret = adc_tm5_gen2_conv_req(channel->chip); + if (ret < 0) + dev_err(chip->dev, "adc-tm channel configure failed with %d\n", ret); + +config_fail: + mutex_unlock(&chip->adc_mutex_lock); + return ret; +} + static int adc_tm5_set_trips(void *data, int low, int high) { struct adc_tm5_channel *channel = data; @@ -343,14 +651,14 @@ static int adc_tm5_set_trips(void *data, int low, int high) channel->channel, low, high); if (high == INT_MAX && low <= -INT_MAX) - ret = adc_tm5_disable_channel(channel); + ret = chip->data->disable_channel(channel); else - ret = adc_tm5_configure(channel, low, high); + ret = chip->data->configure(channel, low, high); return ret; } -static struct thermal_zone_of_device_ops adc_tm5_ops = { +static struct thermal_zone_of_device_ops adc_tm5_thermal_ops = { .get_temp = adc_tm5_get_temp, .set_trips = adc_tm5_set_trips, }; @@ -366,7 +674,7 @@ static int adc_tm5_register_tzd(struct adc_tm5_chip *adc_tm) tzd = devm_thermal_zone_of_sensor_register(adc_tm->dev, adc_tm->channels[i].channel, &adc_tm->channels[i], - &adc_tm5_ops); + &adc_tm5_thermal_ops); if (IS_ERR(tzd)) { if (PTR_ERR(tzd) == -ENODEV) { dev_warn(adc_tm->dev, "thermal sensor on channel %d is not used\n", @@ -442,12 +750,37 @@ static int adc_tm5_init(struct adc_tm5_chip *chip) return ret; } +static int adc_tm5_gen2_init(struct adc_tm5_chip *chip) +{ + u8 channels_available; + int ret; + unsigned int i; + + ret = adc_tm5_read(chip, ADC_TM5_NUM_BTM, + &channels_available, sizeof(channels_available)); + if (ret) { + dev_err(chip->dev, "read failed for BTM channels\n"); + return ret; + } + + for (i = 0; i < chip->nchannels; i++) { + if (chip->channels[i].channel >= channels_available) { + dev_err(chip->dev, "Invalid channel %d\n", chip->channels[i].channel); + return -EINVAL; + } + } + + mutex_init(&chip->adc_mutex_lock); + + return ret; +} + static int adc_tm5_get_dt_channel_data(struct adc_tm5_chip *adc_tm, struct adc_tm5_channel *channel, struct device_node *node) { const char *name = node->name; - u32 chan, value, varr[2]; + u32 chan, value, adc_channel, varr[2]; int ret; struct device *dev = adc_tm->dev; struct of_phandle_args args; @@ -477,7 +810,16 @@ static int adc_tm5_get_dt_channel_data(struct adc_tm5_chip *adc_tm, } of_node_put(args.np); - if (args.args_count != 1 || args.args[0] >= ADC5_MAX_CHANNEL) { + if (args.args_count != 1) { + dev_err(dev, "%s: invalid args count for ADC channel %d\n", name, chan); + return -EINVAL; + } + + adc_channel = args.args[0]; + if (adc_tm->data->gen == ADC_TM5_GEN2) + adc_channel &= 0xff; + + if (adc_channel >= ADC5_MAX_CHANNEL) { dev_err(dev, "%s: invalid ADC channel number %d\n", name, chan); return -EINVAL; } @@ -523,9 +865,76 @@ static int adc_tm5_get_dt_channel_data(struct adc_tm5_chip *adc_tm, else channel->cal_method = ADC_TM5_ABSOLUTE_CAL; + if (adc_tm->data->gen == ADC_TM5_GEN2) { + ret = of_property_read_u32(node, "qcom,decimation", &value); + if (!ret) { + ret = qcom_adc5_decimation_from_dt(value, adc_tm->data->decimation); + if (ret < 0) { + dev_err(dev, "invalid decimation %d\n", value); + return ret; + } + channel->decimation = ret; + } else { + channel->decimation = ADC5_DECIMATION_DEFAULT; + } + + ret = of_property_read_u32(node, "qcom,avg-samples", &value); + if (!ret) { + ret = qcom_adc5_avg_samples_from_dt(value); + if (ret < 0) { + dev_err(dev, "invalid avg-samples %d\n", value); + return ret; + } + channel->avg_samples = ret; + } else { + channel->avg_samples = VADC_DEF_AVG_SAMPLES; + } + } + return 0; } +static const struct adc_tm5_data adc_tm5_data_pmic = { + .full_scale_code_volt = 0x70e4, + .decimation = (unsigned int []) { 250, 420, 840 }, + .hw_settle = (unsigned int []) { 15, 100, 200, 300, 400, 500, 600, 700, + 1000, 2000, 4000, 8000, 16000, 32000, + 64000, 128000 }, + .disable_channel = adc_tm5_disable_channel, + .configure = adc_tm5_configure, + .isr = adc_tm5_isr, + .init = adc_tm5_init, + .irq_name = "pm-adc-tm5", + .gen = ADC_TM5, +}; + +static const struct adc_tm5_data adc_tm_hc_data_pmic = { + .full_scale_code_volt = 0x70e4, + .decimation = (unsigned int []) { 256, 512, 1024 }, + .hw_settle = (unsigned int []) { 0, 100, 200, 300, 400, 500, 600, 700, + 1000, 2000, 4000, 6000, 8000, 10000 }, + .disable_channel = adc_tm5_disable_channel, + .configure = adc_tm5_configure, + .isr = adc_tm5_isr, + .init = adc_tm_hc_init, + .irq_name = "pm-adc-tm5", + .gen = ADC_TM_HC, +}; + +static const struct adc_tm5_data adc_tm5_gen2_data_pmic = { + .full_scale_code_volt = 0x70e4, + .decimation = (unsigned int []) { 85, 340, 1360 }, + .hw_settle = (unsigned int []) { 15, 100, 200, 300, 400, 500, 600, 700, + 1000, 2000, 4000, 8000, 16000, 32000, + 64000, 128000 }, + .disable_channel = adc_tm5_gen2_disable_channel, + .configure = adc_tm5_gen2_configure, + .isr = adc_tm5_gen2_isr, + .init = adc_tm5_gen2_init, + .irq_name = "pm-adc-tm5-gen2", + .gen = ADC_TM5_GEN2, +}; + static int adc_tm5_get_dt_data(struct adc_tm5_chip *adc_tm, struct device_node *node) { struct adc_tm5_channel *channels; @@ -623,10 +1032,7 @@ static int adc_tm5_probe(struct platform_device *pdev) return ret; } - if (adc_tm->data->is_hc) - ret = adc_tm_hc_init(adc_tm); - else - ret = adc_tm5_init(adc_tm); + ret = adc_tm->data->init(adc_tm); if (ret) { dev_err(dev, "adc-tm init failed\n"); return ret; @@ -638,8 +1044,8 @@ static int adc_tm5_probe(struct platform_device *pdev) return ret; } - return devm_request_threaded_irq(dev, irq, NULL, adc_tm5_isr, - IRQF_ONESHOT, "pm-adc-tm5", adc_tm); + return devm_request_threaded_irq(dev, irq, NULL, adc_tm->data->isr, + IRQF_ONESHOT, adc_tm->data->irq_name, adc_tm); } static const struct of_device_id adc_tm5_match_table[] = { @@ -651,6 +1057,10 @@ static const struct of_device_id adc_tm5_match_table[] = { .compatible = "qcom,spmi-adc-tm-hc", .data = &adc_tm_hc_data_pmic, }, + { + .compatible = "qcom,spmi-adc-tm5-gen2", + .data = &adc_tm5_gen2_data_pmic, + }, { } }; MODULE_DEVICE_TABLE(of, adc_tm5_match_table); diff --git a/drivers/thermal/qcom/tsens.c b/drivers/thermal/qcom/tsens.c index 154d3cb19c88..7963ee33bf75 100644 --- a/drivers/thermal/qcom/tsens.c +++ b/drivers/thermal/qcom/tsens.c @@ -980,6 +980,9 @@ static const struct of_device_id tsens_table[] = { .compatible = "qcom,msm8939-tsens", .data = &data_8939, }, { + .compatible = "qcom,msm8960-tsens", + .data = &data_8960, + }, { .compatible = "qcom,msm8974-tsens", .data = &data_8974, }, { diff --git a/drivers/thermal/rcar_thermal.c b/drivers/thermal/rcar_thermal.c index b49f04daaf47..1d729ed4d685 100644 --- a/drivers/thermal/rcar_thermal.c +++ b/drivers/thermal/rcar_thermal.c @@ -445,7 +445,7 @@ static int rcar_thermal_probe(struct platform_device *pdev) struct rcar_thermal_common *common; struct rcar_thermal_priv *priv; struct device *dev = &pdev->dev; - struct resource *res, *irq; + struct resource *res; const struct rcar_thermal_chip *chip = of_device_get_match_data(dev); int mres = 0; int i; @@ -467,9 +467,16 @@ static int rcar_thermal_probe(struct platform_device *pdev) pm_runtime_get_sync(dev); for (i = 0; i < chip->nirqs; i++) { - irq = platform_get_resource(pdev, IORESOURCE_IRQ, i); - if (!irq) - continue; + int irq; + + ret = platform_get_irq_optional(pdev, i); + if (ret < 0 && ret != -ENXIO) + goto error_unregister; + if (ret > 0) + irq = ret; + else + break; + if (!common->base) { /* * platform has IRQ support. @@ -487,7 +494,7 @@ static int rcar_thermal_probe(struct platform_device *pdev) idle = 0; /* polling delay is not needed */ } - ret = devm_request_irq(dev, irq->start, rcar_thermal_irq, + ret = devm_request_irq(dev, irq, rcar_thermal_irq, IRQF_SHARED, dev_name(dev), common); if (ret) { dev_err(dev, "irq request failed\n "); diff --git a/drivers/thermal/rzg2l_thermal.c b/drivers/thermal/rzg2l_thermal.c index 7a9cdc1f37ca..be07e04c6926 100644 --- a/drivers/thermal/rzg2l_thermal.c +++ b/drivers/thermal/rzg2l_thermal.c @@ -32,6 +32,8 @@ #define TSU_SS 0x10 #define OTPTSUTRIM_REG(n) (0x18 + ((n) * 0x4)) +#define OTPTSUTRIM_EN_MASK BIT(31) +#define OTPTSUTRIM_MASK GENMASK(11, 0) /* Sensor Mode Register(TSU_SM) */ #define TSU_SM_EN_TS BIT(0) @@ -183,11 +185,15 @@ static int rzg2l_thermal_probe(struct platform_device *pdev) pm_runtime_get_sync(dev); priv->calib0 = rzg2l_thermal_read(priv, OTPTSUTRIM_REG(0)); - if (!priv->calib0) + if (priv->calib0 & OTPTSUTRIM_EN_MASK) + priv->calib0 &= OTPTSUTRIM_MASK; + else priv->calib0 = SW_CALIB0_VAL; priv->calib1 = rzg2l_thermal_read(priv, OTPTSUTRIM_REG(1)); - if (!priv->calib1) + if (priv->calib1 & OTPTSUTRIM_EN_MASK) + priv->calib1 &= OTPTSUTRIM_MASK; + else priv->calib1 = SW_CALIB1_VAL; platform_set_drvdata(pdev, priv); diff --git a/drivers/thermal/thermal_core.c b/drivers/thermal/thermal_core.c index 82654dc8382b..cdc0552e8c42 100644 --- a/drivers/thermal/thermal_core.c +++ b/drivers/thermal/thermal_core.c @@ -947,6 +947,7 @@ __thermal_cooling_device_register(struct device_node *np, return cdev; out_kfree_type: + thermal_cooling_device_destroy_sysfs(cdev); kfree(cdev->type); put_device(&cdev->device); cdev = NULL; diff --git a/drivers/thermal/thermal_of.c b/drivers/thermal/thermal_of.c index 9233f7e74454..b65d435cb92f 100644 --- a/drivers/thermal/thermal_of.c +++ b/drivers/thermal/thermal_of.c @@ -35,7 +35,7 @@ struct __thermal_cooling_bind_param { }; /** - * struct __thermal_bind_param - a match between trip and cooling device + * struct __thermal_bind_params - a match between trip and cooling device * @tcbp: a pointer to an array of cooling devices * @count: number of elements in array * @trip_id: the trip point index @@ -203,6 +203,14 @@ static int of_thermal_get_trend(struct thermal_zone_device *tz, int trip, return data->ops->get_trend(data->sensor_data, trip, trend); } +static int of_thermal_change_mode(struct thermal_zone_device *tz, + enum thermal_device_mode mode) +{ + struct __thermal_zone *data = tz->devdata; + + return data->ops->change_mode(data->sensor_data, mode); +} + static int of_thermal_bind(struct thermal_zone_device *thermal, struct thermal_cooling_device *cdev) { @@ -408,6 +416,9 @@ thermal_zone_of_add_sensor(struct device_node *zone, if (ops->set_emul_temp) tzd->ops->set_emul_temp = of_thermal_set_emul_temp; + if (ops->change_mode) + tzd->ops->change_mode = of_thermal_change_mode; + mutex_unlock(&tzd->lock); return tzd; @@ -569,6 +580,7 @@ void thermal_zone_of_sensor_unregister(struct device *dev, tzd->ops->get_temp = NULL; tzd->ops->get_trend = NULL; tzd->ops->set_emul_temp = NULL; + tzd->ops->change_mode = NULL; tz->ops = NULL; tz->sensor_data = NULL; diff --git a/drivers/usb/typec/ucsi/ucsi_acpi.c b/drivers/usb/typec/ucsi/ucsi_acpi.c index 6771f05e32c2..8873c1644a29 100644 --- a/drivers/usb/typec/ucsi/ucsi_acpi.c +++ b/drivers/usb/typec/ucsi/ucsi_acpi.c @@ -19,7 +19,7 @@ struct ucsi_acpi { struct device *dev; struct ucsi *ucsi; - void __iomem *base; + void *base; struct completion complete; unsigned long flags; guid_t guid; @@ -51,7 +51,7 @@ static int ucsi_acpi_read(struct ucsi *ucsi, unsigned int offset, if (ret) return ret; - memcpy(val, (const void __force *)(ua->base + offset), val_len); + memcpy(val, ua->base + offset, val_len); return 0; } @@ -61,7 +61,7 @@ static int ucsi_acpi_async_write(struct ucsi *ucsi, unsigned int offset, { struct ucsi_acpi *ua = ucsi_get_drvdata(ucsi); - memcpy((void __force *)(ua->base + offset), val, val_len); + memcpy(ua->base + offset, val, val_len); return ucsi_acpi_dsm(ua, UCSI_DSM_FUNC_WRITE); } @@ -132,20 +132,9 @@ static int ucsi_acpi_probe(struct platform_device *pdev) return -ENODEV; } - /* This will make sure we can use ioremap() */ - status = acpi_release_memory(ACPI_HANDLE(&pdev->dev), res, 1); - if (ACPI_FAILURE(status)) - return -ENOMEM; - - /* - * NOTE: The memory region for the data structures is used also in an - * operation region, which means ACPI has already reserved it. Therefore - * it can not be requested here, and we can not use - * devm_ioremap_resource(). - */ - ua->base = devm_ioremap(&pdev->dev, res->start, resource_size(res)); - if (!ua->base) - return -ENOMEM; + ua->base = devm_memremap(&pdev->dev, res->start, resource_size(res), MEMREMAP_WB); + if (IS_ERR(ua->base)) + return PTR_ERR(ua->base); ret = guid_parse(UCSI_DSM_UUID, &ua->guid); if (ret) diff --git a/include/acpi/cppc_acpi.h b/include/acpi/cppc_acpi.h index 92b7ea8d8f5e..c6108581d97d 100644 --- a/include/acpi/cppc_acpi.h +++ b/include/acpi/cppc_acpi.h @@ -141,6 +141,7 @@ extern int cppc_set_perf(int cpu, struct cppc_perf_ctrls *perf_ctrls); extern int cppc_set_enable(int cpu, bool enable); extern int cppc_get_perf_caps(int cpu, struct cppc_perf_caps *caps); extern bool acpi_cpc_valid(void); +extern bool cppc_allow_fast_switch(void); extern int acpi_get_psd_map(unsigned int cpu, struct cppc_cpudata *cpu_data); extern unsigned int cppc_get_transition_latency(int cpu); extern bool cpc_ffh_supported(void); @@ -175,6 +176,10 @@ static inline bool acpi_cpc_valid(void) { return false; } +static inline bool cppc_allow_fast_switch(void) +{ + return false; +} static inline unsigned int cppc_get_transition_latency(int cpu) { return CPUFREQ_ETERNAL; diff --git a/include/linux/acpi.h b/include/linux/acpi.h index d7136d13aa44..4781c2a07f71 100644 --- a/include/linux/acpi.h +++ b/include/linux/acpi.h @@ -520,9 +520,6 @@ int acpi_check_resource_conflict(const struct resource *res); int acpi_check_region(resource_size_t start, resource_size_t n, const char *name); -acpi_status acpi_release_memory(acpi_handle handle, struct resource *res, - u32 level); - int acpi_resources_are_enforced(void); #ifdef CONFIG_HIBERNATION @@ -574,6 +571,7 @@ acpi_status acpi_run_osc(acpi_handle handle, struct acpi_osc_context *context); #define OSC_SB_OSLPI_SUPPORT 0x00000100 #define OSC_SB_CPC_DIVERSE_HIGH_SUPPORT 0x00001000 #define OSC_SB_GENERIC_INITIATOR_SUPPORT 0x00002000 +#define OSC_SB_CPC_FLEXIBLE_ADR_SPACE 0x00004000 #define OSC_SB_NATIVE_USB4_SUPPORT 0x00040000 #define OSC_SB_PRM_SUPPORT 0x00200000 @@ -581,6 +579,7 @@ extern bool osc_sb_apei_support_acked; extern bool osc_pc_lpi_support_confirmed; extern bool osc_sb_native_usb4_support_confirmed; extern bool osc_sb_cppc_not_supported; +extern bool osc_cpc_flexible_adr_space_confirmed; /* USB4 Capabilities */ #define OSC_USB_USB3_TUNNELING 0x00000001 diff --git a/include/linux/devfreq.h b/include/linux/devfreq.h index 142474b4af96..dc10bee75a72 100644 --- a/include/linux/devfreq.h +++ b/include/linux/devfreq.h @@ -38,6 +38,7 @@ enum devfreq_timer { struct devfreq; struct devfreq_governor; +struct devfreq_cpu_data; struct thermal_cooling_device; /** @@ -288,6 +289,11 @@ struct devfreq_simple_ondemand_data { #endif #if IS_ENABLED(CONFIG_DEVFREQ_GOV_PASSIVE) +enum devfreq_parent_dev_type { + DEVFREQ_PARENT_DEV, + CPUFREQ_PARENT_DEV, +}; + /** * struct devfreq_passive_data - ``void *data`` fed to struct devfreq * and devfreq_add_device @@ -299,8 +305,11 @@ struct devfreq_simple_ondemand_data { * using governors except for passive governor. * If the devfreq device has the specific method to decide * the next frequency, should use this callback. - * @this: the devfreq instance of own device. - * @nb: the notifier block for DEVFREQ_TRANSITION_NOTIFIER list + * @parent_type: the parent type of the device. + * @this: the devfreq instance of own device. + * @nb: the notifier block for DEVFREQ_TRANSITION_NOTIFIER or + * CPUFREQ_TRANSITION_NOTIFIER list. + * @cpu_data_list: the list of cpu frequency data for all cpufreq_policy. * * The devfreq_passive_data have to set the devfreq instance of parent * device with governors except for the passive governor. But, don't need to @@ -314,9 +323,13 @@ struct devfreq_passive_data { /* Optional callback to decide the next frequency of passvice device */ int (*get_target_freq)(struct devfreq *this, unsigned long *freq); + /* Should set the type of parent device */ + enum devfreq_parent_dev_type parent_type; + /* For passive governor's internal use. Don't need to set them */ struct devfreq *this; struct notifier_block nb; + struct list_head cpu_data_list; }; #endif diff --git a/include/linux/energy_model.h b/include/linux/energy_model.h index 9f3c400bc52d..8419bffb4398 100644 --- a/include/linux/energy_model.h +++ b/include/linux/energy_model.h @@ -67,11 +67,16 @@ struct em_perf_domain { * * EM_PERF_DOMAIN_SKIP_INEFFICIENCIES: Skip inefficient states when estimating * energy consumption. + * + * EM_PERF_DOMAIN_ARTIFICIAL: The power values are artificial and might be + * created by platform missing real power information */ #define EM_PERF_DOMAIN_MILLIWATTS BIT(0) #define EM_PERF_DOMAIN_SKIP_INEFFICIENCIES BIT(1) +#define EM_PERF_DOMAIN_ARTIFICIAL BIT(2) #define em_span_cpus(em) (to_cpumask((em)->cpus)) +#define em_is_artificial(em) ((em)->flags & EM_PERF_DOMAIN_ARTIFICIAL) #ifdef CONFIG_ENERGY_MODEL #define EM_MAX_POWER 0xFFFF @@ -96,11 +101,11 @@ struct em_data_callback { /** * active_power() - Provide power at the next performance state of * a device + * @dev : Device for which we do this operation (can be a CPU) * @power : Active power at the performance state * (modified) * @freq : Frequency at the performance state in kHz * (modified) - * @dev : Device for which we do this operation (can be a CPU) * * active_power() must find the lowest performance state of 'dev' above * 'freq' and update 'power' and 'freq' to the matching active power @@ -112,11 +117,32 @@ struct em_data_callback { * * Return 0 on success. */ - int (*active_power)(unsigned long *power, unsigned long *freq, - struct device *dev); + int (*active_power)(struct device *dev, unsigned long *power, + unsigned long *freq); + + /** + * get_cost() - Provide the cost at the given performance state of + * a device + * @dev : Device for which we do this operation (can be a CPU) + * @freq : Frequency at the performance state in kHz + * @cost : The cost value for the performance state + * (modified) + * + * In case of CPUs, the cost is the one of a single CPU in the domain. + * It is expected to fit in the [0, EM_MAX_POWER] range due to internal + * usage in EAS calculation. + * + * Return 0 on success, or appropriate error value in case of failure. + */ + int (*get_cost)(struct device *dev, unsigned long freq, + unsigned long *cost); }; -#define EM_DATA_CB(_active_power_cb) { .active_power = &_active_power_cb } #define EM_SET_ACTIVE_POWER_CB(em_cb, cb) ((em_cb).active_power = cb) +#define EM_ADV_DATA_CB(_active_power_cb, _cost_cb) \ + { .active_power = _active_power_cb, \ + .get_cost = _cost_cb } +#define EM_DATA_CB(_active_power_cb) \ + EM_ADV_DATA_CB(_active_power_cb, NULL) struct em_perf_domain *em_cpu_get(int cpu); struct em_perf_domain *em_pd_get(struct device *dev); @@ -264,6 +290,7 @@ static inline int em_pd_nr_perf_states(struct em_perf_domain *pd) #else struct em_data_callback {}; +#define EM_ADV_DATA_CB(_active_power_cb, _cost_cb) { } #define EM_DATA_CB(_active_power_cb) { } #define EM_SET_ACTIVE_POWER_CB(em_cb, cb) do { } while (0) diff --git a/include/linux/iio/adc/qcom-vadc-common.h b/include/linux/iio/adc/qcom-vadc-common.h index ce78d4804994..aa21b032e861 100644 --- a/include/linux/iio/adc/qcom-vadc-common.h +++ b/include/linux/iio/adc/qcom-vadc-common.h @@ -152,6 +152,8 @@ int qcom_adc5_hw_scale(enum vadc_scale_fn_type scaletype, u16 qcom_adc_tm5_temp_volt_scale(unsigned int prescale_ratio, u32 full_scale_code_volt, int temp); +u16 qcom_adc_tm5_gen2_temp_res_scale(int temp); + int qcom_adc5_prescaling_from_dt(u32 num, u32 den); int qcom_adc5_hw_settle_time_from_dt(u32 value, const unsigned int *hw_settle); diff --git a/include/linux/pm.h b/include/linux/pm.h index e65b3ab28377..ffe941958501 100644 --- a/include/linux/pm.h +++ b/include/linux/pm.h @@ -368,13 +368,13 @@ const struct dev_pm_ops name = { \ #ifdef CONFIG_PM #define _EXPORT_DEV_PM_OPS(name, suspend_fn, resume_fn, runtime_suspend_fn, \ - runtime_resume_fn, idle_fn, sec) \ + runtime_resume_fn, idle_fn, sec, ns) \ _DEFINE_DEV_PM_OPS(name, suspend_fn, resume_fn, runtime_suspend_fn, \ runtime_resume_fn, idle_fn); \ - _EXPORT_SYMBOL(name, sec) + __EXPORT_SYMBOL(name, sec, ns) #else #define _EXPORT_DEV_PM_OPS(name, suspend_fn, resume_fn, runtime_suspend_fn, \ - runtime_resume_fn, idle_fn, sec) \ + runtime_resume_fn, idle_fn, sec, ns) \ static __maybe_unused _DEFINE_DEV_PM_OPS(__static_##name, suspend_fn, \ resume_fn, runtime_suspend_fn, \ runtime_resume_fn, idle_fn) @@ -391,9 +391,13 @@ static __maybe_unused _DEFINE_DEV_PM_OPS(__static_##name, suspend_fn, \ _DEFINE_DEV_PM_OPS(name, suspend_fn, resume_fn, NULL, NULL, NULL) #define EXPORT_SIMPLE_DEV_PM_OPS(name, suspend_fn, resume_fn) \ - _EXPORT_DEV_PM_OPS(name, suspend_fn, resume_fn, NULL, NULL, NULL, "") + _EXPORT_DEV_PM_OPS(name, suspend_fn, resume_fn, NULL, NULL, NULL, "", "") #define EXPORT_GPL_SIMPLE_DEV_PM_OPS(name, suspend_fn, resume_fn) \ - _EXPORT_DEV_PM_OPS(name, suspend_fn, resume_fn, NULL, NULL, NULL, "_gpl") + _EXPORT_DEV_PM_OPS(name, suspend_fn, resume_fn, NULL, NULL, NULL, "_gpl", "") +#define EXPORT_NS_SIMPLE_DEV_PM_OPS(name, suspend_fn, resume_fn, ns) \ + _EXPORT_DEV_PM_OPS(name, suspend_fn, resume_fn, NULL, NULL, NULL, "", #ns) +#define EXPORT_NS_GPL_SIMPLE_DEV_PM_OPS(name, suspend_fn, resume_fn, ns) \ + _EXPORT_DEV_PM_OPS(name, suspend_fn, resume_fn, NULL, NULL, NULL, "_gpl", #ns) /* Deprecated. Use DEFINE_SIMPLE_DEV_PM_OPS() instead. */ #define SIMPLE_DEV_PM_OPS(name, suspend_fn, resume_fn) \ diff --git a/include/linux/pm_domain.h b/include/linux/pm_domain.h index 67017c9390c8..ebc351698090 100644 --- a/include/linux/pm_domain.h +++ b/include/linux/pm_domain.h @@ -91,6 +91,14 @@ struct gpd_dev_ops { int (*stop)(struct device *dev); }; +struct genpd_governor_data { + s64 max_off_time_ns; + bool max_off_time_changed; + ktime_t next_wakeup; + bool cached_power_down_ok; + bool cached_power_down_state_idx; +}; + struct genpd_power_state { s64 power_off_latency_ns; s64 power_on_latency_ns; @@ -98,7 +106,7 @@ struct genpd_power_state { u64 usage; u64 rejected; struct fwnode_handle *fwnode; - ktime_t idle_time; + u64 idle_time; void *data; }; @@ -114,6 +122,7 @@ struct generic_pm_domain { struct list_head child_links; /* Links with PM domain as a child */ struct list_head dev_list; /* List of devices */ struct dev_power_governor *gov; + struct genpd_governor_data *gd; /* Data used by a genpd governor. */ struct work_struct power_off_work; struct fwnode_handle *provider; /* Identity of the domain provider */ bool has_provider; @@ -134,11 +143,6 @@ struct generic_pm_domain { int (*set_performance_state)(struct generic_pm_domain *genpd, unsigned int state); struct gpd_dev_ops dev_ops; - s64 max_off_time_ns; /* Maximum allowed "suspended" time. */ - ktime_t next_wakeup; /* Maintained by the domain governor */ - bool max_off_time_changed; - bool cached_power_down_ok; - bool cached_power_down_state_idx; int (*attach_dev)(struct generic_pm_domain *domain, struct device *dev); void (*detach_dev)(struct generic_pm_domain *domain, @@ -149,8 +153,8 @@ struct generic_pm_domain { unsigned int state_count); unsigned int state_count; /* number of states */ unsigned int state_idx; /* state that genpd will go to when off */ - ktime_t on_time; - ktime_t accounting_time; + u64 on_time; + u64 accounting_time; const struct genpd_lock_ops *lock_ops; union { struct mutex mlock; @@ -182,6 +186,7 @@ struct gpd_timing_data { s64 suspend_latency_ns; s64 resume_latency_ns; s64 effective_constraint_ns; + ktime_t next_wakeup; bool constraint_changed; bool cached_suspend_ok; }; @@ -193,14 +198,13 @@ struct pm_domain_data { struct generic_pm_domain_data { struct pm_domain_data base; - struct gpd_timing_data td; + struct gpd_timing_data *td; struct notifier_block nb; struct notifier_block *power_nb; int cpu; unsigned int performance_state; unsigned int default_pstate; unsigned int rpm_pstate; - ktime_t next_wakeup; void *data; }; diff --git a/include/linux/pm_runtime.h b/include/linux/pm_runtime.h index 2bff6a10095d..9e4d056967c6 100644 --- a/include/linux/pm_runtime.h +++ b/include/linux/pm_runtime.h @@ -41,10 +41,16 @@ #define EXPORT_RUNTIME_DEV_PM_OPS(name, suspend_fn, resume_fn, idle_fn) \ _EXPORT_DEV_PM_OPS(name, pm_runtime_force_suspend, pm_runtime_force_resume, \ - suspend_fn, resume_fn, idle_fn, "") + suspend_fn, resume_fn, idle_fn, "", "") #define EXPORT_GPL_RUNTIME_DEV_PM_OPS(name, suspend_fn, resume_fn, idle_fn) \ _EXPORT_DEV_PM_OPS(name, pm_runtime_force_suspend, pm_runtime_force_resume, \ - suspend_fn, resume_fn, idle_fn, "_gpl") + suspend_fn, resume_fn, idle_fn, "_gpl", "") +#define EXPORT_NS_RUNTIME_DEV_PM_OPS(name, suspend_fn, resume_fn, idle_fn, ns) \ + _EXPORT_DEV_PM_OPS(name, pm_runtime_force_suspend, pm_runtime_force_resume, \ + suspend_fn, resume_fn, idle_fn, "", #ns) +#define EXPORT_NS_GPL_RUNTIME_DEV_PM_OPS(name, suspend_fn, resume_fn, idle_fn, ns) \ + _EXPORT_DEV_PM_OPS(name, pm_runtime_force_suspend, pm_runtime_force_resume, \ + suspend_fn, resume_fn, idle_fn, "_gpl", #ns) #ifdef CONFIG_PM extern struct workqueue_struct *pm_wq; diff --git a/include/linux/suspend.h b/include/linux/suspend.h index 300273ff40cc..70f2921e2e70 100644 --- a/include/linux/suspend.h +++ b/include/linux/suspend.h @@ -542,22 +542,56 @@ static inline void unlock_system_sleep(void) {} #ifdef CONFIG_PM_SLEEP_DEBUG extern bool pm_print_times_enabled; extern bool pm_debug_messages_on; -extern __printf(2, 3) void __pm_pr_dbg(bool defer, const char *fmt, ...); +static inline int pm_dyn_debug_messages_on(void) +{ +#ifdef CONFIG_DYNAMIC_DEBUG + return 1; +#else + return 0; +#endif +} +#ifndef pr_fmt +#define pr_fmt(fmt) "PM: " fmt +#endif +#define __pm_pr_dbg(fmt, ...) \ + do { \ + if (pm_debug_messages_on) \ + printk(KERN_DEBUG pr_fmt(fmt), ##__VA_ARGS__); \ + else if (pm_dyn_debug_messages_on()) \ + pr_debug(fmt, ##__VA_ARGS__); \ + } while (0) +#define __pm_deferred_pr_dbg(fmt, ...) \ + do { \ + if (pm_debug_messages_on) \ + printk_deferred(KERN_DEBUG pr_fmt(fmt), ##__VA_ARGS__); \ + } while (0) #else #define pm_print_times_enabled (false) #define pm_debug_messages_on (false) #include <linux/printk.h> -#define __pm_pr_dbg(defer, fmt, ...) \ - no_printk(KERN_DEBUG fmt, ##__VA_ARGS__) +#define __pm_pr_dbg(fmt, ...) \ + no_printk(KERN_DEBUG pr_fmt(fmt), ##__VA_ARGS__) +#define __pm_deferred_pr_dbg(fmt, ...) \ + no_printk(KERN_DEBUG pr_fmt(fmt), ##__VA_ARGS__) #endif +/** + * pm_pr_dbg - print pm sleep debug messages + * + * If pm_debug_messages_on is enabled, print message. + * If pm_debug_messages_on is disabled and CONFIG_DYNAMIC_DEBUG is enabled, + * print message only from instances explicitly enabled on dynamic debug's + * control. + * If pm_debug_messages_on is disabled and CONFIG_DYNAMIC_DEBUG is disabled, + * don't print message. + */ #define pm_pr_dbg(fmt, ...) \ - __pm_pr_dbg(false, fmt, ##__VA_ARGS__) + __pm_pr_dbg(fmt, ##__VA_ARGS__) #define pm_deferred_pr_dbg(fmt, ...) \ - __pm_pr_dbg(true, fmt, ##__VA_ARGS__) + __pm_deferred_pr_dbg(fmt, ##__VA_ARGS__) #ifdef CONFIG_PM_AUTOSLEEP diff --git a/include/linux/thermal.h b/include/linux/thermal.h index c314893970b3..365733b428d8 100644 --- a/include/linux/thermal.h +++ b/include/linux/thermal.h @@ -299,6 +299,8 @@ struct thermal_zone_params { * temperature. * @set_trip_temp: a pointer to a function that sets the trip temperature on * hardware. + * @change_mode: a pointer to a function that notifies the thermal zone + * mode change. */ struct thermal_zone_of_device_ops { int (*get_temp)(void *, int *); @@ -306,6 +308,7 @@ struct thermal_zone_of_device_ops { int (*set_trips)(void *, int, int); int (*set_emul_temp)(void *, int); int (*set_trip_temp)(void *, int, int); + int (*change_mode) (void *, enum thermal_device_mode); }; /* Function declarations */ diff --git a/include/soc/rockchip/pm_domains.h b/include/soc/rockchip/pm_domains.h new file mode 100644 index 000000000000..7dbd941fc937 --- /dev/null +++ b/include/soc/rockchip/pm_domains.h @@ -0,0 +1,25 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * Copyright 2022, The Chromium OS Authors. All rights reserved. + */ + +#ifndef __SOC_ROCKCHIP_PM_DOMAINS_H__ +#define __SOC_ROCKCHIP_PM_DOMAINS_H__ + +#ifdef CONFIG_ROCKCHIP_PM_DOMAINS + +int rockchip_pmu_block(void); +void rockchip_pmu_unblock(void); + +#else /* CONFIG_ROCKCHIP_PM_DOMAINS */ + +static inline int rockchip_pmu_block(void) +{ + return 0; +} + +static inline void rockchip_pmu_unblock(void) { } + +#endif /* CONFIG_ROCKCHIP_PM_DOMAINS */ + +#endif /* __SOC_ROCKCHIP_PM_DOMAINS_H__ */ diff --git a/kernel/power/Makefile b/kernel/power/Makefile index 5899260a8bef..874ad834dc8d 100644 --- a/kernel/power/Makefile +++ b/kernel/power/Makefile @@ -1,6 +1,10 @@ # SPDX-License-Identifier: GPL-2.0 -ccflags-$(CONFIG_PM_DEBUG) := -DDEBUG +ifeq ($(CONFIG_DYNAMIC_DEBUG), y) +CFLAGS_swap.o := -DDEBUG +CFLAGS_snapshot.o := -DDEBUG +CFLAGS_energy_model.o := -DDEBUG +endif KASAN_SANITIZE_snapshot.o := n diff --git a/kernel/power/energy_model.c b/kernel/power/energy_model.c index 0153b0ca7b23..6c373f2960e7 100644 --- a/kernel/power/energy_model.c +++ b/kernel/power/energy_model.c @@ -54,28 +54,15 @@ static int em_debug_cpus_show(struct seq_file *s, void *unused) } DEFINE_SHOW_ATTRIBUTE(em_debug_cpus); -static int em_debug_units_show(struct seq_file *s, void *unused) +static int em_debug_flags_show(struct seq_file *s, void *unused) { struct em_perf_domain *pd = s->private; - char *units = (pd->flags & EM_PERF_DOMAIN_MILLIWATTS) ? - "milliWatts" : "bogoWatts"; - seq_printf(s, "%s\n", units); + seq_printf(s, "%#lx\n", pd->flags); return 0; } -DEFINE_SHOW_ATTRIBUTE(em_debug_units); - -static int em_debug_skip_inefficiencies_show(struct seq_file *s, void *unused) -{ - struct em_perf_domain *pd = s->private; - int enabled = (pd->flags & EM_PERF_DOMAIN_SKIP_INEFFICIENCIES) ? 1 : 0; - - seq_printf(s, "%d\n", enabled); - - return 0; -} -DEFINE_SHOW_ATTRIBUTE(em_debug_skip_inefficiencies); +DEFINE_SHOW_ATTRIBUTE(em_debug_flags); static void em_debug_create_pd(struct device *dev) { @@ -89,9 +76,8 @@ static void em_debug_create_pd(struct device *dev) debugfs_create_file("cpus", 0444, d, dev->em_pd->cpus, &em_debug_cpus_fops); - debugfs_create_file("units", 0444, d, dev->em_pd, &em_debug_units_fops); - debugfs_create_file("skip-inefficiencies", 0444, d, dev->em_pd, - &em_debug_skip_inefficiencies_fops); + debugfs_create_file("flags", 0444, d, dev->em_pd, + &em_debug_flags_fops); /* Create a sub-directory for each performance state */ for (i = 0; i < dev->em_pd->nr_perf_states; i++) @@ -121,7 +107,8 @@ static void em_debug_remove_pd(struct device *dev) {} #endif static int em_create_perf_table(struct device *dev, struct em_perf_domain *pd, - int nr_states, struct em_data_callback *cb) + int nr_states, struct em_data_callback *cb, + unsigned long flags) { unsigned long power, freq, prev_freq = 0, prev_cost = ULONG_MAX; struct em_perf_state *table; @@ -139,7 +126,7 @@ static int em_create_perf_table(struct device *dev, struct em_perf_domain *pd, * lowest performance state of 'dev' above 'freq' and updates * 'power' and 'freq' accordingly. */ - ret = cb->active_power(&power, &freq, dev); + ret = cb->active_power(dev, &power, &freq); if (ret) { dev_err(dev, "EM: invalid perf. state: %d\n", ret); @@ -173,10 +160,22 @@ static int em_create_perf_table(struct device *dev, struct em_perf_domain *pd, /* Compute the cost of each performance state. */ fmax = (u64) table[nr_states - 1].frequency; for (i = nr_states - 1; i >= 0; i--) { - unsigned long power_res = em_scale_power(table[i].power); + unsigned long power_res, cost; + + if (flags & EM_PERF_DOMAIN_ARTIFICIAL) { + ret = cb->get_cost(dev, table[i].frequency, &cost); + if (ret || !cost || cost > EM_MAX_POWER) { + dev_err(dev, "EM: invalid cost %lu %d\n", + cost, ret); + goto free_ps_table; + } + } else { + power_res = em_scale_power(table[i].power); + cost = div64_u64(fmax * power_res, table[i].frequency); + } + + table[i].cost = cost; - table[i].cost = div64_u64(fmax * power_res, - table[i].frequency); if (table[i].cost >= prev_cost) { table[i].flags = EM_PERF_STATE_INEFFICIENT; dev_dbg(dev, "EM: OPP:%lu is inefficient\n", @@ -197,7 +196,8 @@ free_ps_table: } static int em_create_pd(struct device *dev, int nr_states, - struct em_data_callback *cb, cpumask_t *cpus) + struct em_data_callback *cb, cpumask_t *cpus, + unsigned long flags) { struct em_perf_domain *pd; struct device *cpu_dev; @@ -215,7 +215,7 @@ static int em_create_pd(struct device *dev, int nr_states, return -ENOMEM; } - ret = em_create_perf_table(dev, pd, nr_states, cb); + ret = em_create_perf_table(dev, pd, nr_states, cb, flags); if (ret) { kfree(pd); return ret; @@ -259,6 +259,8 @@ static void em_cpufreq_update_efficiencies(struct device *dev) found++; } + cpufreq_cpu_put(policy); + if (!found) return; @@ -332,6 +334,7 @@ int em_dev_register_perf_domain(struct device *dev, unsigned int nr_states, bool milliwatts) { unsigned long cap, prev_cap = 0; + unsigned long flags = 0; int cpu, ret; if (!dev || !nr_states || !cb) @@ -378,12 +381,16 @@ int em_dev_register_perf_domain(struct device *dev, unsigned int nr_states, } } - ret = em_create_pd(dev, nr_states, cb, cpus); + if (milliwatts) + flags |= EM_PERF_DOMAIN_MILLIWATTS; + else if (cb->get_cost) + flags |= EM_PERF_DOMAIN_ARTIFICIAL; + + ret = em_create_pd(dev, nr_states, cb, cpus, flags); if (ret) goto unlock; - if (milliwatts) - dev->em_pd->flags |= EM_PERF_DOMAIN_MILLIWATTS; + dev->em_pd->flags |= flags; em_cpufreq_update_efficiencies(dev); diff --git a/kernel/power/main.c b/kernel/power/main.c index 7e646079fbeb..5242bf2ee469 100644 --- a/kernel/power/main.c +++ b/kernel/power/main.c @@ -545,35 +545,6 @@ static int __init pm_debug_messages_setup(char *str) } __setup("pm_debug_messages", pm_debug_messages_setup); -/** - * __pm_pr_dbg - Print a suspend debug message to the kernel log. - * @defer: Whether or not to use printk_deferred() to print the message. - * @fmt: Message format. - * - * The message will be emitted if enabled through the pm_debug_messages - * sysfs attribute. - */ -void __pm_pr_dbg(bool defer, const char *fmt, ...) -{ - struct va_format vaf; - va_list args; - - if (!pm_debug_messages_on) - return; - - va_start(args, fmt); - - vaf.fmt = fmt; - vaf.va = &args; - - if (defer) - printk_deferred(KERN_DEBUG "PM: %pV", &vaf); - else - printk(KERN_DEBUG "PM: %pV", &vaf); - - va_end(args); -} - #else /* !CONFIG_PM_SLEEP_DEBUG */ static inline void pm_print_times_init(void) {} #endif /* CONFIG_PM_SLEEP_DEBUG */ diff --git a/kernel/power/process.c b/kernel/power/process.c index 11b570fcf049..3068601e585a 100644 --- a/kernel/power/process.c +++ b/kernel/power/process.c @@ -6,9 +6,6 @@ * Originally from swsusp. */ - -#undef DEBUG - #include <linux/interrupt.h> #include <linux/oom.h> #include <linux/suspend.h> diff --git a/kernel/power/snapshot.c b/kernel/power/snapshot.c index 330d49937692..2a406753af90 100644 --- a/kernel/power/snapshot.c +++ b/kernel/power/snapshot.c @@ -326,7 +326,7 @@ static void *chain_alloc(struct chain_allocator *ca, unsigned int size) return ret; } -/** +/* * Data types related to memory bitmaps. * * Memory bitmap is a structure consisting of many linked lists of @@ -427,6 +427,10 @@ struct memory_bitmap { /** * alloc_rtree_node - Allocate a new node and add it to the radix tree. + * @gfp_mask: GFP mask for the allocation. + * @safe_needed: Get pages not used before hibernation (restore only) + * @ca: Pointer to a linked list of pages ("a chain") to allocate from + * @list: Radix Tree node to add. * * This function is used to allocate inner nodes as well as the * leave nodes of the radix tree. It also adds the node to the @@ -902,7 +906,7 @@ static bool rtree_next_node(struct memory_bitmap *bm) } /** - * memory_bm_rtree_next_pfn - Find the next set bit in a memory bitmap. + * memory_bm_next_pfn - Find the next set bit in a memory bitmap. * @bm: Memory bitmap. * * Starting from the last returned position this function searches for the next @@ -1937,7 +1941,7 @@ static inline int get_highmem_buffer(int safe_needed) } /** - * alloc_highmem_image_pages - Allocate some highmem pages for the image. + * alloc_highmem_pages - Allocate some highmem pages for the image. * * Try to allocate as many pages as needed, but if the number of free highmem * pages is less than that, allocate them all. @@ -2224,7 +2228,7 @@ static int check_header(struct swsusp_info *info) } /** - * load header - Check the image header and copy the data from it. + * load_header - Check the image header and copy the data from it. */ static int load_header(struct swsusp_info *info) { diff --git a/tools/Makefile b/tools/Makefile index 724134f0e56c..c074e42fd92f 100644 --- a/tools/Makefile +++ b/tools/Makefile @@ -32,6 +32,9 @@ help: @echo ' bootconfig - boot config tool' @echo ' spi - spi tools' @echo ' tmon - thermal monitoring and tuning tool' + @echo ' thermometer - temperature capture tool' + @echo ' thermal-engine - thermal monitoring tool' + @echo ' thermal - thermal library' @echo ' tracing - misc tracing tools' @echo ' turbostat - Intel CPU idle stats and freq reporting tool' @echo ' usb - USB testing tools' @@ -89,12 +92,21 @@ perf: FORCE selftests: FORCE $(call descend,testing/$@) +thermal: FORCE + $(call descend,lib/$@) + turbostat x86_energy_perf_policy intel-speed-select: FORCE $(call descend,power/x86/$@) tmon: FORCE $(call descend,thermal/$@) +thermometer: FORCE + $(call descend,thermal/$@) + +thermal-engine: FORCE thermal + $(call descend,thermal/$@) + freefall: FORCE $(call descend,laptop/$@) @@ -105,7 +117,7 @@ all: acpi cgroup counter cpupower gpio hv firewire \ perf selftests bootconfig spi turbostat usb \ virtio vm bpf x86_energy_perf_policy \ tmon freefall iio objtool kvm_stat wmi \ - pci debugging tracing + pci debugging tracing thermal thermometer thermal-engine acpi_install: $(call descend,power/$(@:_install=),install) @@ -119,12 +131,21 @@ cgroup_install counter_install firewire_install gpio_install hv_install iio_inst selftests_install: $(call descend,testing/$(@:_install=),install) +thermal_install: + $(call descend,lib/$(@:_install=),install) + turbostat_install x86_energy_perf_policy_install intel-speed-select_install: $(call descend,power/x86/$(@:_install=),install) tmon_install: $(call descend,thermal/$(@:_install=),install) +thermometer_install: + $(call descend,thermal/$(@:_install=),install) + +thermal-engine_install: + $(call descend,thermal/$(@:_install=),install) + freefall_install: $(call descend,laptop/$(@:_install=),install) @@ -137,7 +158,7 @@ install: acpi_install cgroup_install counter_install cpupower_install gpio_insta virtio_install vm_install bpf_install x86_energy_perf_policy_install \ tmon_install freefall_install objtool_install kvm_stat_install \ wmi_install pci_install debugging_install intel-speed-select_install \ - tracing_install + tracing_install thermometer_install thermal-engine_install acpi_clean: $(call descend,power/acpi,clean) @@ -164,9 +185,18 @@ perf_clean: selftests_clean: $(call descend,testing/$(@:_clean=),clean) +thermal_clean: + $(call descend,lib/thermal,clean) + turbostat_clean x86_energy_perf_policy_clean intel-speed-select_clean: $(call descend,power/x86/$(@:_clean=),clean) +thermometer_clean: + $(call descend,thermal/thermometer,clean) + +thermal-engine_clean: + $(call descend,thermal/thermal-engine,clean) + tmon_clean: $(call descend,thermal/tmon,clean) @@ -181,6 +211,6 @@ clean: acpi_clean cgroup_clean counter_clean cpupower_clean hv_clean firewire_cl vm_clean bpf_clean iio_clean x86_energy_perf_policy_clean tmon_clean \ freefall_clean build_clean libbpf_clean libsubcmd_clean \ gpio_clean objtool_clean leds_clean wmi_clean pci_clean firmware_clean debugging_clean \ - intel-speed-select_clean tracing_clean + intel-speed-select_clean tracing_clean thermal_clean thermometer_clean thermal-engine_clean .PHONY: FORCE diff --git a/tools/lib/thermal/.gitignore b/tools/lib/thermal/.gitignore new file mode 100644 index 000000000000..5d2aeda80fea --- /dev/null +++ b/tools/lib/thermal/.gitignore @@ -0,0 +1,2 @@ +libthermal.so* +libthermal.pc diff --git a/tools/lib/thermal/Build b/tools/lib/thermal/Build new file mode 100644 index 000000000000..4a892d9e24f9 --- /dev/null +++ b/tools/lib/thermal/Build @@ -0,0 +1,5 @@ +libthermal-y += commands.o +libthermal-y += events.o +libthermal-y += thermal_nl.o +libthermal-y += sampling.o +libthermal-y += thermal.o diff --git a/tools/lib/thermal/Makefile b/tools/lib/thermal/Makefile new file mode 100644 index 000000000000..2d0d255fd0e1 --- /dev/null +++ b/tools/lib/thermal/Makefile @@ -0,0 +1,165 @@ +# SPDX-License-Identifier: (LGPL-2.1 OR BSD-2-Clause) +# Most of this file is copied from tools/lib/perf/Makefile + +LIBTHERMAL_VERSION = 0 +LIBTHERMAL_PATCHLEVEL = 0 +LIBTHERMAL_EXTRAVERSION = 1 + +MAKEFLAGS += --no-print-directory + +ifeq ($(srctree),) +srctree := $(patsubst %/,%,$(dir $(CURDIR))) +srctree := $(patsubst %/,%,$(dir $(srctree))) +srctree := $(patsubst %/,%,$(dir $(srctree))) +# $(info Determined 'srctree' to be $(srctree)) +endif + +INSTALL = install + +# Use DESTDIR for installing into a different root directory. +# This is useful for building a package. The program will be +# installed in this directory as if it was the root directory. +# Then the build tool can move it later. +DESTDIR ?= +DESTDIR_SQ = '$(subst ','\'',$(DESTDIR))' + +include $(srctree)/tools/scripts/Makefile.include +include $(srctree)/tools/scripts/Makefile.arch + +ifeq ($(LP64), 1) + libdir_relative = lib64 +else + libdir_relative = lib +endif + +prefix ?= +libdir = $(prefix)/$(libdir_relative) + +# Shell quotes +libdir_SQ = $(subst ','\'',$(libdir)) +libdir_relative_SQ = $(subst ','\'',$(libdir_relative)) + +ifeq ("$(origin V)", "command line") + VERBOSE = $(V) +endif +ifndef VERBOSE + VERBOSE = 0 +endif + +ifeq ($(VERBOSE),1) + Q = +else + Q = @ +endif + +# Set compile option CFLAGS +ifdef EXTRA_CFLAGS + CFLAGS := $(EXTRA_CFLAGS) +else + CFLAGS := -g -Wall +endif + +INCLUDES = \ +-I/usr/include/libnl3 \ +-I$(srctree)/tools/lib/thermal/include \ +-I$(srctree)/tools/lib/ \ +-I$(srctree)/tools/include \ +-I$(srctree)/tools/arch/$(SRCARCH)/include/ \ +-I$(srctree)/tools/arch/$(SRCARCH)/include/uapi \ +-I$(srctree)/tools/include/uapi + +# Append required CFLAGS +override CFLAGS += $(EXTRA_WARNINGS) +override CFLAGS += -Werror -Wall +override CFLAGS += -fPIC +override CFLAGS += $(INCLUDES) +override CFLAGS += -fvisibility=hidden +override CFGLAS += -Wl,-L. +override CFGLAS += -Wl,-lthermal + +all: + +export srctree OUTPUT CC LD CFLAGS V +export DESTDIR DESTDIR_SQ + +include $(srctree)/tools/build/Makefile.include + +VERSION_SCRIPT := libthermal.map + +PATCHLEVEL = $(LIBTHERMAL_PATCHLEVEL) +EXTRAVERSION = $(LIBTHERMAL_EXTRAVERSION) +VERSION = $(LIBTHERMAL_VERSION).$(LIBTHERMAL_PATCHLEVEL).$(LIBTHERMAL_EXTRAVERSION) + +LIBTHERMAL_SO := $(OUTPUT)libthermal.so.$(VERSION) +LIBTHERMAL_A := $(OUTPUT)libthermal.a +LIBTHERMAL_IN := $(OUTPUT)libthermal-in.o +LIBTHERMAL_PC := $(OUTPUT)libthermal.pc +LIBTHERMAL_ALL := $(LIBTHERMAL_A) $(OUTPUT)libthermal.so* + +THERMAL_UAPI := include/uapi/linux/thermal.h + +$(THERMAL_UAPI): FORCE + ln -sf $(srctree)/$@ $(srctree)/tools/$@ + +$(LIBTHERMAL_IN): FORCE + $(Q)$(MAKE) $(build)=libthermal + +$(LIBTHERMAL_A): $(LIBTHERMAL_IN) + $(QUIET_AR)$(RM) $@ && $(AR) rcs $@ $(LIBTHERMAL_IN) + +$(LIBTHERMAL_SO): $(LIBTHERMAL_IN) + $(QUIET_LINK)$(CC) --shared -Wl,-soname,libthermal.so \ + -Wl,--version-script=$(VERSION_SCRIPT) $^ -o $@ + @ln -sf $(@F) $(OUTPUT)libthermal.so + @ln -sf $(@F) $(OUTPUT)libthermal.so.$(LIBTHERMAL_VERSION) + + +libs: $(THERMAL_UAPI) $(LIBTHERMAL_A) $(LIBTHERMAL_SO) $(LIBTHERMAL_PC) + +all: fixdep + $(Q)$(MAKE) libs + +clean: + $(call QUIET_CLEAN, libthermal) $(RM) $(LIBTHERMAL_A) \ + *.o *~ *.a *.so *.so.$(VERSION) *.so.$(LIBTHERMAL_VERSION) .*.d .*.cmd LIBTHERMAL-CFLAGS $(LIBTHERMAL_PC) + +$(LIBTHERMAL_PC): + $(QUIET_GEN)sed -e "s|@PREFIX@|$(prefix)|" \ + -e "s|@LIBDIR@|$(libdir_SQ)|" \ + -e "s|@VERSION@|$(VERSION)|" \ + < libthermal.pc.template > $@ + +define do_install_mkdir + if [ ! -d '$(DESTDIR_SQ)$1' ]; then \ + $(INSTALL) -d -m 755 '$(DESTDIR_SQ)$1'; \ + fi +endef + +define do_install + if [ ! -d '$(DESTDIR_SQ)$2' ]; then \ + $(INSTALL) -d -m 755 '$(DESTDIR_SQ)$2'; \ + fi; \ + $(INSTALL) $1 $(if $3,-m $3,) '$(DESTDIR_SQ)$2' +endef + +install_lib: libs + $(call QUIET_INSTALL, $(LIBTHERMAL_ALL)) \ + $(call do_install_mkdir,$(libdir_SQ)); \ + cp -fpR $(LIBTHERMAL_ALL) $(DESTDIR)$(libdir_SQ) + +install_headers: + $(call QUIET_INSTALL, headers) \ + $(call do_install,include/thermal.h,$(prefix)/include/thermal,644); \ + +install_pkgconfig: $(LIBTHERMAL_PC) + $(call QUIET_INSTALL, $(LIBTHERMAL_PC)) \ + $(call do_install,$(LIBTHERMAL_PC),$(libdir_SQ)/pkgconfig,644) + +install_doc: + $(Q)$(MAKE) -C Documentation install-man install-html install-examples + +install: install_lib install_headers install_pkgconfig + +FORCE: + +.PHONY: all install clean FORCE diff --git a/tools/lib/thermal/commands.c b/tools/lib/thermal/commands.c new file mode 100644 index 000000000000..73d4d4e8d6ec --- /dev/null +++ b/tools/lib/thermal/commands.c @@ -0,0 +1,349 @@ +// SPDX-License-Identifier: LGPL-2.1+ +// Copyright (C) 2022, Linaro Ltd - Daniel Lezcano <daniel.lezcano@linaro.org> +#define _GNU_SOURCE +#include <errno.h> +#include <stdio.h> +#include <stdlib.h> +#include <unistd.h> + +#include <thermal.h> +#include "thermal_nl.h" + +static struct nla_policy thermal_genl_policy[THERMAL_GENL_ATTR_MAX + 1] = { + /* Thermal zone */ + [THERMAL_GENL_ATTR_TZ] = { .type = NLA_NESTED }, + [THERMAL_GENL_ATTR_TZ_ID] = { .type = NLA_U32 }, + [THERMAL_GENL_ATTR_TZ_TEMP] = { .type = NLA_U32 }, + [THERMAL_GENL_ATTR_TZ_TRIP] = { .type = NLA_NESTED }, + [THERMAL_GENL_ATTR_TZ_TRIP_ID] = { .type = NLA_U32 }, + [THERMAL_GENL_ATTR_TZ_TRIP_TEMP] = { .type = NLA_U32 }, + [THERMAL_GENL_ATTR_TZ_TRIP_TYPE] = { .type = NLA_U32 }, + [THERMAL_GENL_ATTR_TZ_TRIP_HYST] = { .type = NLA_U32 }, + [THERMAL_GENL_ATTR_TZ_MODE] = { .type = NLA_U32 }, + [THERMAL_GENL_ATTR_TZ_CDEV_WEIGHT] = { .type = NLA_U32 }, + [THERMAL_GENL_ATTR_TZ_NAME] = { .type = NLA_STRING }, + + /* Governor(s) */ + [THERMAL_GENL_ATTR_TZ_GOV] = { .type = NLA_NESTED }, + [THERMAL_GENL_ATTR_TZ_GOV_NAME] = { .type = NLA_STRING }, + + /* Cooling devices */ + [THERMAL_GENL_ATTR_CDEV] = { .type = NLA_NESTED }, + [THERMAL_GENL_ATTR_CDEV_ID] = { .type = NLA_U32 }, + [THERMAL_GENL_ATTR_CDEV_CUR_STATE] = { .type = NLA_U32 }, + [THERMAL_GENL_ATTR_CDEV_MAX_STATE] = { .type = NLA_U32 }, + [THERMAL_GENL_ATTR_CDEV_NAME] = { .type = NLA_STRING }, +}; + +static int parse_tz_get(struct genl_info *info, struct thermal_zone **tz) +{ + struct nlattr *attr; + struct thermal_zone *__tz = NULL; + size_t size = 0; + int rem; + + nla_for_each_nested(attr, info->attrs[THERMAL_GENL_ATTR_TZ], rem) { + + if (nla_type(attr) == THERMAL_GENL_ATTR_TZ_ID) { + + size++; + + __tz = realloc(__tz, sizeof(*__tz) * (size + 2)); + if (!__tz) + return THERMAL_ERROR; + + __tz[size - 1].id = nla_get_u32(attr); + } + + + if (nla_type(attr) == THERMAL_GENL_ATTR_TZ_NAME) + nla_strlcpy(__tz[size - 1].name, attr, + THERMAL_NAME_LENGTH); + } + + if (__tz) + __tz[size].id = -1; + + *tz = __tz; + + return THERMAL_SUCCESS; +} + +static int parse_cdev_get(struct genl_info *info, struct thermal_cdev **cdev) +{ + struct nlattr *attr; + struct thermal_cdev *__cdev = NULL; + size_t size = 0; + int rem; + + nla_for_each_nested(attr, info->attrs[THERMAL_GENL_ATTR_CDEV], rem) { + + if (nla_type(attr) == THERMAL_GENL_ATTR_CDEV_ID) { + + size++; + + __cdev = realloc(__cdev, sizeof(*__cdev) * (size + 2)); + if (!__cdev) + return THERMAL_ERROR; + + __cdev[size - 1].id = nla_get_u32(attr); + } + + if (nla_type(attr) == THERMAL_GENL_ATTR_CDEV_NAME) { + nla_strlcpy(__cdev[size - 1].name, attr, + THERMAL_NAME_LENGTH); + } + + if (nla_type(attr) == THERMAL_GENL_ATTR_CDEV_CUR_STATE) + __cdev[size - 1].cur_state = nla_get_u32(attr); + + if (nla_type(attr) == THERMAL_GENL_ATTR_CDEV_MAX_STATE) + __cdev[size - 1].max_state = nla_get_u32(attr); + } + + if (__cdev) + __cdev[size].id = -1; + + *cdev = __cdev; + + return THERMAL_SUCCESS; +} + +static int parse_tz_get_trip(struct genl_info *info, struct thermal_zone *tz) +{ + struct nlattr *attr; + struct thermal_trip *__tt = NULL; + size_t size = 0; + int rem; + + nla_for_each_nested(attr, info->attrs[THERMAL_GENL_ATTR_TZ_TRIP], rem) { + + if (nla_type(attr) == THERMAL_GENL_ATTR_TZ_TRIP_ID) { + + size++; + + __tt = realloc(__tt, sizeof(*__tt) * (size + 2)); + if (!__tt) + return THERMAL_ERROR; + + __tt[size - 1].id = nla_get_u32(attr); + } + + if (nla_type(attr) == THERMAL_GENL_ATTR_TZ_TRIP_TYPE) + __tt[size - 1].type = nla_get_u32(attr); + + if (nla_type(attr) == THERMAL_GENL_ATTR_TZ_TRIP_TEMP) + __tt[size - 1].temp = nla_get_u32(attr); + + if (nla_type(attr) == THERMAL_GENL_ATTR_TZ_TRIP_HYST) + __tt[size - 1].hyst = nla_get_u32(attr); + } + + if (__tt) + __tt[size].id = -1; + + tz->trip = __tt; + + return THERMAL_SUCCESS; +} + +static int parse_tz_get_temp(struct genl_info *info, struct thermal_zone *tz) +{ + int id = -1; + + if (info->attrs[THERMAL_GENL_ATTR_TZ_ID]) + id = nla_get_u32(info->attrs[THERMAL_GENL_ATTR_TZ_ID]); + + if (tz->id != id) + return THERMAL_ERROR; + + if (info->attrs[THERMAL_GENL_ATTR_TZ_TEMP]) + tz->temp = nla_get_u32(info->attrs[THERMAL_GENL_ATTR_TZ_TEMP]); + + return THERMAL_SUCCESS; +} + +static int parse_tz_get_gov(struct genl_info *info, struct thermal_zone *tz) +{ + int id = -1; + + if (info->attrs[THERMAL_GENL_ATTR_TZ_ID]) + id = nla_get_u32(info->attrs[THERMAL_GENL_ATTR_TZ_ID]); + + if (tz->id != id) + return THERMAL_ERROR; + + if (info->attrs[THERMAL_GENL_ATTR_TZ_GOV_NAME]) { + nla_strlcpy(tz->governor, + info->attrs[THERMAL_GENL_ATTR_TZ_GOV_NAME], + THERMAL_NAME_LENGTH); + } + + return THERMAL_SUCCESS; +} + +static int handle_netlink(struct nl_cache_ops *unused, + struct genl_cmd *cmd, + struct genl_info *info, void *arg) +{ + int ret; + + switch (cmd->c_id) { + + case THERMAL_GENL_CMD_TZ_GET_ID: + ret = parse_tz_get(info, arg); + break; + + case THERMAL_GENL_CMD_CDEV_GET: + ret = parse_cdev_get(info, arg); + break; + + case THERMAL_GENL_CMD_TZ_GET_TEMP: + ret = parse_tz_get_temp(info, arg); + break; + + case THERMAL_GENL_CMD_TZ_GET_TRIP: + ret = parse_tz_get_trip(info, arg); + break; + + case THERMAL_GENL_CMD_TZ_GET_GOV: + ret = parse_tz_get_gov(info, arg); + break; + + default: + return THERMAL_ERROR; + } + + return ret; +} + +static struct genl_cmd thermal_cmds[] = { + { + .c_id = THERMAL_GENL_CMD_TZ_GET_ID, + .c_name = (char *)"List thermal zones", + .c_msg_parser = handle_netlink, + .c_maxattr = THERMAL_GENL_ATTR_MAX, + .c_attr_policy = thermal_genl_policy, + }, + { + .c_id = THERMAL_GENL_CMD_TZ_GET_GOV, + .c_name = (char *)"Get governor", + .c_msg_parser = handle_netlink, + .c_maxattr = THERMAL_GENL_ATTR_MAX, + .c_attr_policy = thermal_genl_policy, + }, + { + .c_id = THERMAL_GENL_CMD_TZ_GET_TEMP, + .c_name = (char *)"Get thermal zone temperature", + .c_msg_parser = handle_netlink, + .c_maxattr = THERMAL_GENL_ATTR_MAX, + .c_attr_policy = thermal_genl_policy, + }, + { + .c_id = THERMAL_GENL_CMD_TZ_GET_TRIP, + .c_name = (char *)"Get thermal zone trip points", + .c_msg_parser = handle_netlink, + .c_maxattr = THERMAL_GENL_ATTR_MAX, + .c_attr_policy = thermal_genl_policy, + }, + { + .c_id = THERMAL_GENL_CMD_CDEV_GET, + .c_name = (char *)"Get cooling devices", + .c_msg_parser = handle_netlink, + .c_maxattr = THERMAL_GENL_ATTR_MAX, + .c_attr_policy = thermal_genl_policy, + }, +}; + +static struct genl_ops thermal_cmd_ops = { + .o_name = (char *)"thermal", + .o_cmds = thermal_cmds, + .o_ncmds = ARRAY_SIZE(thermal_cmds), +}; + +static thermal_error_t thermal_genl_auto(struct thermal_handler *th, int id, int cmd, + int flags, void *arg) +{ + struct nl_msg *msg; + void *hdr; + + msg = nlmsg_alloc(); + if (!msg) + return THERMAL_ERROR; + + hdr = genlmsg_put(msg, NL_AUTO_PORT, NL_AUTO_SEQ, thermal_cmd_ops.o_id, + 0, flags, cmd, THERMAL_GENL_VERSION); + if (!hdr) + return THERMAL_ERROR; + + if (id >= 0 && nla_put_u32(msg, THERMAL_GENL_ATTR_TZ_ID, id)) + return THERMAL_ERROR; + + if (nl_send_msg(th->sk_cmd, th->cb_cmd, msg, genl_handle_msg, arg)) + return THERMAL_ERROR; + + nlmsg_free(msg); + + return THERMAL_SUCCESS; +} + +thermal_error_t thermal_cmd_get_tz(struct thermal_handler *th, struct thermal_zone **tz) +{ + return thermal_genl_auto(th, -1, THERMAL_GENL_CMD_TZ_GET_ID, + NLM_F_DUMP | NLM_F_ACK, tz); +} + +thermal_error_t thermal_cmd_get_cdev(struct thermal_handler *th, struct thermal_cdev **tc) +{ + return thermal_genl_auto(th, -1, THERMAL_GENL_CMD_CDEV_GET, + NLM_F_DUMP | NLM_F_ACK, tc); +} + +thermal_error_t thermal_cmd_get_trip(struct thermal_handler *th, struct thermal_zone *tz) +{ + return thermal_genl_auto(th, tz->id, THERMAL_GENL_CMD_TZ_GET_TRIP, + 0, tz); +} + +thermal_error_t thermal_cmd_get_governor(struct thermal_handler *th, struct thermal_zone *tz) +{ + return thermal_genl_auto(th, tz->id, THERMAL_GENL_CMD_TZ_GET_GOV, 0, tz); +} + +thermal_error_t thermal_cmd_get_temp(struct thermal_handler *th, struct thermal_zone *tz) +{ + return thermal_genl_auto(th, tz->id, THERMAL_GENL_CMD_TZ_GET_TEMP, 0, tz); +} + +thermal_error_t thermal_cmd_exit(struct thermal_handler *th) +{ + if (genl_unregister_family(&thermal_cmd_ops)) + return THERMAL_ERROR; + + nl_thermal_disconnect(th->sk_cmd, th->cb_cmd); + + return THERMAL_SUCCESS; +} + +thermal_error_t thermal_cmd_init(struct thermal_handler *th) +{ + int ret; + int family; + + if (nl_thermal_connect(&th->sk_cmd, &th->cb_cmd)) + return THERMAL_ERROR; + + ret = genl_register_family(&thermal_cmd_ops); + if (ret) + return THERMAL_ERROR; + + ret = genl_ops_resolve(th->sk_cmd, &thermal_cmd_ops); + if (ret) + return THERMAL_ERROR; + + family = genl_ctrl_resolve(th->sk_cmd, "nlctrl"); + if (family != GENL_ID_CTRL) + return THERMAL_ERROR; + + return THERMAL_SUCCESS; +} diff --git a/tools/lib/thermal/events.c b/tools/lib/thermal/events.c new file mode 100644 index 000000000000..a7a55d1a0c4c --- /dev/null +++ b/tools/lib/thermal/events.c @@ -0,0 +1,164 @@ +// SPDX-License-Identifier: LGPL-2.1+ +// Copyright (C) 2022, Linaro Ltd - Daniel Lezcano <daniel.lezcano@linaro.org> +#include <linux/netlink.h> +#include <stdio.h> +#include <stdlib.h> +#include <unistd.h> + + +#include <thermal.h> +#include "thermal_nl.h" + +/* + * Optimization: fill this array to tell which event we do want to pay + * attention to. That happens at init time with the ops + * structure. Each ops will enable the event and the general handler + * will be able to discard the event if there is not ops associated + * with it. + */ +static int enabled_ops[__THERMAL_GENL_EVENT_MAX]; + +static int handle_thermal_event(struct nl_msg *n, void *arg) +{ + struct nlmsghdr *nlh = nlmsg_hdr(n); + struct genlmsghdr *genlhdr = genlmsg_hdr(nlh); + struct nlattr *attrs[THERMAL_GENL_ATTR_MAX + 1]; + struct thermal_handler_param *thp = arg; + struct thermal_events_ops *ops = &thp->th->ops->events; + + genlmsg_parse(nlh, 0, attrs, THERMAL_GENL_ATTR_MAX, NULL); + + arg = thp->arg; + + /* + * This is an event we don't care of, bail out. + */ + if (!enabled_ops[genlhdr->cmd]) + return THERMAL_SUCCESS; + + switch (genlhdr->cmd) { + + case THERMAL_GENL_EVENT_TZ_CREATE: + return ops->tz_create(nla_get_string(attrs[THERMAL_GENL_ATTR_TZ_NAME]), + nla_get_u32(attrs[THERMAL_GENL_ATTR_TZ_ID]), arg); + + case THERMAL_GENL_EVENT_TZ_DELETE: + return ops->tz_delete(nla_get_u32(attrs[THERMAL_GENL_ATTR_TZ_ID]), arg); + + case THERMAL_GENL_EVENT_TZ_ENABLE: + return ops->tz_enable(nla_get_u32(attrs[THERMAL_GENL_ATTR_TZ_ID]), arg); + + case THERMAL_GENL_EVENT_TZ_DISABLE: + return ops->tz_disable(nla_get_u32(attrs[THERMAL_GENL_ATTR_TZ_ID]), arg); + + case THERMAL_GENL_EVENT_TZ_TRIP_CHANGE: + return ops->trip_change(nla_get_u32(attrs[THERMAL_GENL_ATTR_TZ_ID]), + nla_get_u32(attrs[THERMAL_GENL_ATTR_TZ_TRIP_ID]), + nla_get_u32(attrs[THERMAL_GENL_ATTR_TZ_TRIP_TYPE]), + nla_get_u32(attrs[THERMAL_GENL_ATTR_TZ_TRIP_TEMP]), + nla_get_u32(attrs[THERMAL_GENL_ATTR_TZ_TRIP_HYST]), arg); + + case THERMAL_GENL_EVENT_TZ_TRIP_ADD: + return ops->trip_add(nla_get_u32(attrs[THERMAL_GENL_ATTR_TZ_ID]), + nla_get_u32(attrs[THERMAL_GENL_ATTR_TZ_TRIP_ID]), + nla_get_u32(attrs[THERMAL_GENL_ATTR_TZ_TRIP_TYPE]), + nla_get_u32(attrs[THERMAL_GENL_ATTR_TZ_TRIP_TEMP]), + nla_get_u32(attrs[THERMAL_GENL_ATTR_TZ_TRIP_HYST]), arg); + + case THERMAL_GENL_EVENT_TZ_TRIP_DELETE: + return ops->trip_delete(nla_get_u32(attrs[THERMAL_GENL_ATTR_TZ_ID]), + nla_get_u32(attrs[THERMAL_GENL_ATTR_TZ_TRIP_ID]), arg); + + case THERMAL_GENL_EVENT_TZ_TRIP_UP: + return ops->trip_high(nla_get_u32(attrs[THERMAL_GENL_ATTR_TZ_ID]), + nla_get_u32(attrs[THERMAL_GENL_ATTR_TZ_TRIP_ID]), + nla_get_u32(attrs[THERMAL_GENL_ATTR_TZ_TEMP]), arg); + + case THERMAL_GENL_EVENT_TZ_TRIP_DOWN: + return ops->trip_low(nla_get_u32(attrs[THERMAL_GENL_ATTR_TZ_ID]), + nla_get_u32(attrs[THERMAL_GENL_ATTR_TZ_TRIP_ID]), + nla_get_u32(attrs[THERMAL_GENL_ATTR_TZ_TEMP]), arg); + + case THERMAL_GENL_EVENT_CDEV_ADD: + return ops->cdev_add(nla_get_string(attrs[THERMAL_GENL_ATTR_CDEV_NAME]), + nla_get_u32(attrs[THERMAL_GENL_ATTR_CDEV_ID]), + nla_get_u32(attrs[THERMAL_GENL_ATTR_CDEV_MAX_STATE]), arg); + + case THERMAL_GENL_EVENT_CDEV_DELETE: + return ops->cdev_delete(nla_get_u32(attrs[THERMAL_GENL_ATTR_CDEV_ID]), arg); + + case THERMAL_GENL_EVENT_CDEV_STATE_UPDATE: + return ops->cdev_update(nla_get_u32(attrs[THERMAL_GENL_ATTR_CDEV_ID]), + nla_get_u32(attrs[THERMAL_GENL_ATTR_CDEV_CUR_STATE]), arg); + + case THERMAL_GENL_EVENT_TZ_GOV_CHANGE: + return ops->gov_change(nla_get_u32(attrs[THERMAL_GENL_ATTR_TZ_ID]), + nla_get_string(attrs[THERMAL_GENL_ATTR_GOV_NAME]), arg); + default: + return -1; + } +} + +static void thermal_events_ops_init(struct thermal_events_ops *ops) +{ + enabled_ops[THERMAL_GENL_EVENT_TZ_CREATE] = !!ops->tz_create; + enabled_ops[THERMAL_GENL_EVENT_TZ_DELETE] = !!ops->tz_delete; + enabled_ops[THERMAL_GENL_EVENT_TZ_DISABLE] = !!ops->tz_disable; + enabled_ops[THERMAL_GENL_EVENT_TZ_ENABLE] = !!ops->tz_enable; + enabled_ops[THERMAL_GENL_EVENT_TZ_TRIP_UP] = !!ops->trip_high; + enabled_ops[THERMAL_GENL_EVENT_TZ_TRIP_DOWN] = !!ops->trip_low; + enabled_ops[THERMAL_GENL_EVENT_TZ_TRIP_CHANGE] = !!ops->trip_change; + enabled_ops[THERMAL_GENL_EVENT_TZ_TRIP_ADD] = !!ops->trip_add; + enabled_ops[THERMAL_GENL_EVENT_TZ_TRIP_DELETE] = !!ops->trip_delete; + enabled_ops[THERMAL_GENL_EVENT_CDEV_ADD] = !!ops->cdev_add; + enabled_ops[THERMAL_GENL_EVENT_CDEV_DELETE] = !!ops->cdev_delete; + enabled_ops[THERMAL_GENL_EVENT_CDEV_STATE_UPDATE] = !!ops->cdev_update; + enabled_ops[THERMAL_GENL_EVENT_TZ_GOV_CHANGE] = !!ops->gov_change; +} + +thermal_error_t thermal_events_handle(struct thermal_handler *th, void *arg) +{ + struct thermal_handler_param thp = { .th = th, .arg = arg }; + + if (!th) + return THERMAL_ERROR; + + if (nl_cb_set(th->cb_event, NL_CB_VALID, NL_CB_CUSTOM, + handle_thermal_event, &thp)) + return THERMAL_ERROR; + + return nl_recvmsgs(th->sk_event, th->cb_event); +} + +int thermal_events_fd(struct thermal_handler *th) +{ + if (!th) + return -1; + + return nl_socket_get_fd(th->sk_event); +} + +thermal_error_t thermal_events_exit(struct thermal_handler *th) +{ + if (nl_unsubscribe_thermal(th->sk_event, th->cb_event, + THERMAL_GENL_EVENT_GROUP_NAME)) + return THERMAL_ERROR; + + nl_thermal_disconnect(th->sk_event, th->cb_event); + + return THERMAL_SUCCESS; +} + +thermal_error_t thermal_events_init(struct thermal_handler *th) +{ + thermal_events_ops_init(&th->ops->events); + + if (nl_thermal_connect(&th->sk_event, &th->cb_event)) + return THERMAL_ERROR; + + if (nl_subscribe_thermal(th->sk_event, th->cb_event, + THERMAL_GENL_EVENT_GROUP_NAME)) + return THERMAL_ERROR; + + return THERMAL_SUCCESS; +} diff --git a/tools/lib/thermal/include/thermal.h b/tools/lib/thermal/include/thermal.h new file mode 100644 index 000000000000..1abc560602cf --- /dev/null +++ b/tools/lib/thermal/include/thermal.h @@ -0,0 +1,142 @@ +/* SPDX-License-Identifier: LGPL-2.1+ */ +/* Copyright (C) 2022, Linaro Ltd - Daniel Lezcano <daniel.lezcano@linaro.org> */ +#ifndef __LIBTHERMAL_H +#define __LIBTHERMAL_H + +#include <linux/thermal.h> + +#ifndef LIBTHERMAL_API +#define LIBTHERMAL_API __attribute__((visibility("default"))) +#endif + +#ifdef __cplusplus +extern "C" { +#endif + +struct thermal_sampling_ops { + int (*tz_temp)(int tz_id, int temp, void *arg); +}; + +struct thermal_events_ops { + int (*tz_create)(const char *name, int tz_id, void *arg); + int (*tz_delete)(int tz_id, void *arg); + int (*tz_enable)(int tz_id, void *arg); + int (*tz_disable)(int tz_id, void *arg); + int (*trip_high)(int tz_id, int trip_id, int temp, void *arg); + int (*trip_low)(int tz_id, int trip_id, int temp, void *arg); + int (*trip_add)(int tz_id, int trip_id, int type, int temp, int hyst, void *arg); + int (*trip_change)(int tz_id, int trip_id, int type, int temp, int hyst, void *arg); + int (*trip_delete)(int tz_id, int trip_id, void *arg); + int (*cdev_add)(const char *name, int cdev_id, int max_state, void *arg); + int (*cdev_delete)(int cdev_id, void *arg); + int (*cdev_update)(int cdev_id, int cur_state, void *arg); + int (*gov_change)(int tz_id, const char *gov_name, void *arg); +}; + +struct thermal_ops { + struct thermal_sampling_ops sampling; + struct thermal_events_ops events; +}; + +struct thermal_trip { + int id; + int type; + int temp; + int hyst; +}; + +struct thermal_zone { + int id; + int temp; + char name[THERMAL_NAME_LENGTH]; + char governor[THERMAL_NAME_LENGTH]; + struct thermal_trip *trip; +}; + +struct thermal_cdev { + int id; + char name[THERMAL_NAME_LENGTH]; + int max_state; + int min_state; + int cur_state; +}; + +typedef enum { + THERMAL_ERROR = -1, + THERMAL_SUCCESS = 0, +} thermal_error_t; + +struct thermal_handler; + +typedef int (*cb_tz_t)(struct thermal_zone *, void *); + +typedef int (*cb_tt_t)(struct thermal_trip *, void *); + +typedef int (*cb_tc_t)(struct thermal_cdev *, void *); + +LIBTHERMAL_API int for_each_thermal_zone(struct thermal_zone *tz, cb_tz_t cb, void *arg); + +LIBTHERMAL_API int for_each_thermal_trip(struct thermal_trip *tt, cb_tt_t cb, void *arg); + +LIBTHERMAL_API int for_each_thermal_cdev(struct thermal_cdev *cdev, cb_tc_t cb, void *arg); + +LIBTHERMAL_API struct thermal_zone *thermal_zone_find_by_name(struct thermal_zone *tz, + const char *name); + +LIBTHERMAL_API struct thermal_zone *thermal_zone_find_by_id(struct thermal_zone *tz, int id); + +LIBTHERMAL_API struct thermal_zone *thermal_zone_discover(struct thermal_handler *th); + +LIBTHERMAL_API struct thermal_handler *thermal_init(struct thermal_ops *ops); + +LIBTHERMAL_API void thermal_exit(struct thermal_handler *th); + +/* + * Netlink thermal events + */ +LIBTHERMAL_API thermal_error_t thermal_events_exit(struct thermal_handler *th); + +LIBTHERMAL_API thermal_error_t thermal_events_init(struct thermal_handler *th); + +LIBTHERMAL_API thermal_error_t thermal_events_handle(struct thermal_handler *th, void *arg); + +LIBTHERMAL_API int thermal_events_fd(struct thermal_handler *th); + +/* + * Netlink thermal commands + */ +LIBTHERMAL_API thermal_error_t thermal_cmd_exit(struct thermal_handler *th); + +LIBTHERMAL_API thermal_error_t thermal_cmd_init(struct thermal_handler *th); + +LIBTHERMAL_API thermal_error_t thermal_cmd_get_tz(struct thermal_handler *th, + struct thermal_zone **tz); + +LIBTHERMAL_API thermal_error_t thermal_cmd_get_cdev(struct thermal_handler *th, + struct thermal_cdev **tc); + +LIBTHERMAL_API thermal_error_t thermal_cmd_get_trip(struct thermal_handler *th, + struct thermal_zone *tz); + +LIBTHERMAL_API thermal_error_t thermal_cmd_get_governor(struct thermal_handler *th, + struct thermal_zone *tz); + +LIBTHERMAL_API thermal_error_t thermal_cmd_get_temp(struct thermal_handler *th, + struct thermal_zone *tz); + +/* + * Netlink thermal samples + */ +LIBTHERMAL_API thermal_error_t thermal_sampling_exit(struct thermal_handler *th); + +LIBTHERMAL_API thermal_error_t thermal_sampling_init(struct thermal_handler *th); + +LIBTHERMAL_API thermal_error_t thermal_sampling_handle(struct thermal_handler *th, void *arg); + +LIBTHERMAL_API int thermal_sampling_fd(struct thermal_handler *th); + +#endif /* __LIBTHERMAL_H */ + +#ifdef __cplusplus +} +#endif diff --git a/tools/lib/thermal/libthermal.map b/tools/lib/thermal/libthermal.map new file mode 100644 index 000000000000..d5e77738c7a4 --- /dev/null +++ b/tools/lib/thermal/libthermal.map @@ -0,0 +1,25 @@ +LIBTHERMAL_0.0.1 { + global: + thermal_init; + for_each_thermal_zone; + for_each_thermal_trip; + for_each_thermal_cdev; + thermal_zone_find_by_name; + thermal_zone_find_by_id; + thermal_zone_discover; + thermal_init; + thermal_events_init; + thermal_events_handle; + thermal_events_fd; + thermal_cmd_init; + thermal_cmd_get_tz; + thermal_cmd_get_cdev; + thermal_cmd_get_trip; + thermal_cmd_get_governor; + thermal_cmd_get_temp; + thermal_sampling_init; + thermal_sampling_handle; + thermal_sampling_fd; +local: + *; +}; diff --git a/tools/lib/thermal/libthermal.pc.template b/tools/lib/thermal/libthermal.pc.template new file mode 100644 index 000000000000..6f3769731b59 --- /dev/null +++ b/tools/lib/thermal/libthermal.pc.template @@ -0,0 +1,12 @@ +# SPDX-License-Identifier: (LGPL-2.1 OR BSD-2-Clause) + +prefix=@PREFIX@ +libdir=@LIBDIR@ +includedir=${prefix}/include + +Name: libthermal +Description: thermal library +Requires: libnl-3.0 libnl-genl-3.0 +Version: @VERSION@ +Libs: -L${libdir} -lnl-genl-3 -lnl-3 +Cflags: -I${includedir} -I{include}/libnl3 diff --git a/tools/lib/thermal/sampling.c b/tools/lib/thermal/sampling.c new file mode 100644 index 000000000000..ee818f4e9654 --- /dev/null +++ b/tools/lib/thermal/sampling.c @@ -0,0 +1,75 @@ +// SPDX-License-Identifier: LGPL-2.1+ +// Copyright (C) 2022, Linaro Ltd - Daniel Lezcano <daniel.lezcano@linaro.org> +#include <errno.h> +#include <stdio.h> +#include <stdlib.h> +#include <unistd.h> + +#include <thermal.h> +#include "thermal_nl.h" + +static int handle_thermal_sample(struct nl_msg *n, void *arg) +{ + struct nlmsghdr *nlh = nlmsg_hdr(n); + struct genlmsghdr *genlhdr = genlmsg_hdr(nlh); + struct nlattr *attrs[THERMAL_GENL_ATTR_MAX + 1]; + struct thermal_handler_param *thp = arg; + struct thermal_handler *th = thp->th; + + genlmsg_parse(nlh, 0, attrs, THERMAL_GENL_ATTR_MAX, NULL); + + switch (genlhdr->cmd) { + + case THERMAL_GENL_SAMPLING_TEMP: + return th->ops->sampling.tz_temp( + nla_get_u32(attrs[THERMAL_GENL_ATTR_TZ_ID]), + nla_get_u32(attrs[THERMAL_GENL_ATTR_TZ_TEMP]), arg); + default: + return THERMAL_ERROR; + } +} + +thermal_error_t thermal_sampling_handle(struct thermal_handler *th, void *arg) +{ + struct thermal_handler_param thp = { .th = th, .arg = arg }; + + if (!th) + return THERMAL_ERROR; + + if (nl_cb_set(th->cb_sampling, NL_CB_VALID, NL_CB_CUSTOM, + handle_thermal_sample, &thp)) + return THERMAL_ERROR; + + return nl_recvmsgs(th->sk_sampling, th->cb_sampling); +} + +int thermal_sampling_fd(struct thermal_handler *th) +{ + if (!th) + return -1; + + return nl_socket_get_fd(th->sk_sampling); +} + +thermal_error_t thermal_sampling_exit(struct thermal_handler *th) +{ + if (nl_unsubscribe_thermal(th->sk_sampling, th->cb_sampling, + THERMAL_GENL_EVENT_GROUP_NAME)) + return THERMAL_ERROR; + + nl_thermal_disconnect(th->sk_sampling, th->cb_sampling); + + return THERMAL_SUCCESS; +} + +thermal_error_t thermal_sampling_init(struct thermal_handler *th) +{ + if (nl_thermal_connect(&th->sk_sampling, &th->cb_sampling)) + return THERMAL_ERROR; + + if (nl_subscribe_thermal(th->sk_sampling, th->cb_sampling, + THERMAL_GENL_SAMPLING_GROUP_NAME)) + return THERMAL_ERROR; + + return THERMAL_SUCCESS; +} diff --git a/tools/lib/thermal/thermal.c b/tools/lib/thermal/thermal.c new file mode 100644 index 000000000000..72a76dc205bc --- /dev/null +++ b/tools/lib/thermal/thermal.c @@ -0,0 +1,135 @@ +// SPDX-License-Identifier: LGPL-2.1+ +// Copyright (C) 2022, Linaro Ltd - Daniel Lezcano <daniel.lezcano@linaro.org> +#include <stdio.h> +#include <thermal.h> + +#include "thermal_nl.h" + +int for_each_thermal_cdev(struct thermal_cdev *cdev, cb_tc_t cb, void *arg) +{ + int i, ret = 0; + + if (!cdev) + return 0; + + for (i = 0; cdev[i].id != -1; i++) + ret |= cb(&cdev[i], arg); + + return ret; +} + +int for_each_thermal_trip(struct thermal_trip *tt, cb_tt_t cb, void *arg) +{ + int i, ret = 0; + + if (!tt) + return 0; + + for (i = 0; tt[i].id != -1; i++) + ret |= cb(&tt[i], arg); + + return ret; +} + +int for_each_thermal_zone(struct thermal_zone *tz, cb_tz_t cb, void *arg) +{ + int i, ret = 0; + + if (!tz) + return 0; + + for (i = 0; tz[i].id != -1; i++) + ret |= cb(&tz[i], arg); + + return ret; +} + +struct thermal_zone *thermal_zone_find_by_name(struct thermal_zone *tz, + const char *name) +{ + int i; + + if (!tz || !name) + return NULL; + + for (i = 0; tz[i].id != -1; i++) { + if (!strcmp(tz[i].name, name)) + return &tz[i]; + } + + return NULL; +} + +struct thermal_zone *thermal_zone_find_by_id(struct thermal_zone *tz, int id) +{ + int i; + + if (!tz || id < 0) + return NULL; + + for (i = 0; tz[i].id != -1; i++) { + if (tz[i].id == id) + return &tz[i]; + } + + return NULL; +} + +static int __thermal_zone_discover(struct thermal_zone *tz, void *th) +{ + if (thermal_cmd_get_trip(th, tz) < 0) + return -1; + + if (thermal_cmd_get_governor(th, tz)) + return -1; + + return 0; +} + +struct thermal_zone *thermal_zone_discover(struct thermal_handler *th) +{ + struct thermal_zone *tz; + + if (thermal_cmd_get_tz(th, &tz) < 0) + return NULL; + + if (for_each_thermal_zone(tz, __thermal_zone_discover, th)) + return NULL; + + return tz; +} + +void thermal_exit(struct thermal_handler *th) +{ + thermal_cmd_exit(th); + thermal_events_exit(th); + thermal_sampling_exit(th); + + free(th); +} + +struct thermal_handler *thermal_init(struct thermal_ops *ops) +{ + struct thermal_handler *th; + + th = malloc(sizeof(*th)); + if (!th) + return NULL; + th->ops = ops; + + if (thermal_events_init(th)) + goto out_free; + + if (thermal_sampling_init(th)) + goto out_free; + + if (thermal_cmd_init(th)) + goto out_free; + + return th; + +out_free: + free(th); + + return NULL; +} diff --git a/tools/lib/thermal/thermal_nl.c b/tools/lib/thermal/thermal_nl.c new file mode 100644 index 000000000000..b05cf9569858 --- /dev/null +++ b/tools/lib/thermal/thermal_nl.c @@ -0,0 +1,215 @@ +// SPDX-License-Identifier: LGPL-2.1+ +// Copyright (C) 2022, Linaro Ltd - Daniel Lezcano <daniel.lezcano@linaro.org> +#include <errno.h> +#include <stdio.h> +#include <stdlib.h> +#include <unistd.h> + +#include <thermal.h> +#include "thermal_nl.h" + +struct handler_args { + const char *group; + int id; +}; + +static __thread int err; +static __thread int done; + +static int nl_seq_check_handler(struct nl_msg *msg, void *arg) +{ + return NL_OK; +} + +static int nl_error_handler(struct sockaddr_nl *nla, struct nlmsgerr *nl_err, + void *arg) +{ + int *ret = arg; + + if (ret) + *ret = nl_err->error; + + return NL_STOP; +} + +static int nl_finish_handler(struct nl_msg *msg, void *arg) +{ + int *ret = arg; + + if (ret) + *ret = 1; + + return NL_OK; +} + +static int nl_ack_handler(struct nl_msg *msg, void *arg) +{ + int *ret = arg; + + if (ret) + *ret = 1; + + return NL_OK; +} + +int nl_send_msg(struct nl_sock *sock, struct nl_cb *cb, struct nl_msg *msg, + int (*rx_handler)(struct nl_msg *, void *), void *data) +{ + if (!rx_handler) + return THERMAL_ERROR; + + err = nl_send_auto_complete(sock, msg); + if (err < 0) + return err; + + nl_cb_set(cb, NL_CB_VALID, NL_CB_CUSTOM, rx_handler, data); + + err = done = 0; + + while (err == 0 && done == 0) + nl_recvmsgs(sock, cb); + + return err; +} + +static int nl_family_handler(struct nl_msg *msg, void *arg) +{ + struct handler_args *grp = arg; + struct nlattr *tb[CTRL_ATTR_MAX + 1]; + struct genlmsghdr *gnlh = nlmsg_data(nlmsg_hdr(msg)); + struct nlattr *mcgrp; + int rem_mcgrp; + + nla_parse(tb, CTRL_ATTR_MAX, genlmsg_attrdata(gnlh, 0), + genlmsg_attrlen(gnlh, 0), NULL); + + if (!tb[CTRL_ATTR_MCAST_GROUPS]) + return THERMAL_ERROR; + + nla_for_each_nested(mcgrp, tb[CTRL_ATTR_MCAST_GROUPS], rem_mcgrp) { + + struct nlattr *tb_mcgrp[CTRL_ATTR_MCAST_GRP_MAX + 1]; + + nla_parse(tb_mcgrp, CTRL_ATTR_MCAST_GRP_MAX, + nla_data(mcgrp), nla_len(mcgrp), NULL); + + if (!tb_mcgrp[CTRL_ATTR_MCAST_GRP_NAME] || + !tb_mcgrp[CTRL_ATTR_MCAST_GRP_ID]) + continue; + + if (strncmp(nla_data(tb_mcgrp[CTRL_ATTR_MCAST_GRP_NAME]), + grp->group, + nla_len(tb_mcgrp[CTRL_ATTR_MCAST_GRP_NAME]))) + continue; + + grp->id = nla_get_u32(tb_mcgrp[CTRL_ATTR_MCAST_GRP_ID]); + + break; + } + + return THERMAL_SUCCESS; +} + +static int nl_get_multicast_id(struct nl_sock *sock, struct nl_cb *cb, + const char *family, const char *group) +{ + struct nl_msg *msg; + int ret = 0, ctrlid; + struct handler_args grp = { + .group = group, + .id = -ENOENT, + }; + + msg = nlmsg_alloc(); + if (!msg) + return THERMAL_ERROR; + + ctrlid = genl_ctrl_resolve(sock, "nlctrl"); + + genlmsg_put(msg, 0, 0, ctrlid, 0, 0, CTRL_CMD_GETFAMILY, 0); + + nla_put_string(msg, CTRL_ATTR_FAMILY_NAME, family); + + ret = nl_send_msg(sock, cb, msg, nl_family_handler, &grp); + if (ret) + goto nla_put_failure; + + ret = grp.id; + +nla_put_failure: + nlmsg_free(msg); + return ret; +} + +int nl_thermal_connect(struct nl_sock **nl_sock, struct nl_cb **nl_cb) +{ + struct nl_cb *cb; + struct nl_sock *sock; + + cb = nl_cb_alloc(NL_CB_DEFAULT); + if (!cb) + return THERMAL_ERROR; + + sock = nl_socket_alloc(); + if (!sock) + goto out_cb_free; + + if (genl_connect(sock)) + goto out_socket_free; + + if (nl_cb_err(cb, NL_CB_CUSTOM, nl_error_handler, &err) || + nl_cb_set(cb, NL_CB_FINISH, NL_CB_CUSTOM, nl_finish_handler, &done) || + nl_cb_set(cb, NL_CB_ACK, NL_CB_CUSTOM, nl_ack_handler, &done) || + nl_cb_set(cb, NL_CB_SEQ_CHECK, NL_CB_CUSTOM, nl_seq_check_handler, &done)) + return THERMAL_ERROR; + + *nl_sock = sock; + *nl_cb = cb; + + return THERMAL_SUCCESS; + +out_socket_free: + nl_socket_free(sock); +out_cb_free: + nl_cb_put(cb); + return THERMAL_ERROR; +} + +void nl_thermal_disconnect(struct nl_sock *nl_sock, struct nl_cb *nl_cb) +{ + nl_close(nl_sock); + nl_socket_free(nl_sock); + nl_cb_put(nl_cb); +} + +int nl_unsubscribe_thermal(struct nl_sock *nl_sock, struct nl_cb *nl_cb, + const char *group) +{ + int mcid; + + mcid = nl_get_multicast_id(nl_sock, nl_cb, THERMAL_GENL_FAMILY_NAME, + group); + if (mcid < 0) + return THERMAL_ERROR; + + if (nl_socket_drop_membership(nl_sock, mcid)) + return THERMAL_ERROR; + + return THERMAL_SUCCESS; +} + +int nl_subscribe_thermal(struct nl_sock *nl_sock, struct nl_cb *nl_cb, + const char *group) +{ + int mcid; + + mcid = nl_get_multicast_id(nl_sock, nl_cb, THERMAL_GENL_FAMILY_NAME, + group); + if (mcid < 0) + return THERMAL_ERROR; + + if (nl_socket_add_membership(nl_sock, mcid)) + return THERMAL_ERROR; + + return THERMAL_SUCCESS; +} diff --git a/tools/lib/thermal/thermal_nl.h b/tools/lib/thermal/thermal_nl.h new file mode 100644 index 000000000000..ddf635642f07 --- /dev/null +++ b/tools/lib/thermal/thermal_nl.h @@ -0,0 +1,46 @@ +/* SPDX-License-Identifier: LGPL-2.1+ */ +/* Copyright (C) 2022, Linaro Ltd - Daniel Lezcano <daniel.lezcano@linaro.org> */ +#ifndef __THERMAL_H +#define __THERMAL_H + +#include <netlink/netlink.h> +#include <netlink/genl/genl.h> +#include <netlink/genl/mngt.h> +#include <netlink/genl/ctrl.h> + +struct thermal_handler { + int done; + int error; + struct thermal_ops *ops; + struct nl_msg *msg; + struct nl_sock *sk_event; + struct nl_sock *sk_sampling; + struct nl_sock *sk_cmd; + struct nl_cb *cb_cmd; + struct nl_cb *cb_event; + struct nl_cb *cb_sampling; +}; + +struct thermal_handler_param { + struct thermal_handler *th; + void *arg; +}; + +/* + * Low level netlink + */ +extern int nl_subscribe_thermal(struct nl_sock *nl_sock, struct nl_cb *nl_cb, + const char *group); + +extern int nl_unsubscribe_thermal(struct nl_sock *nl_sock, struct nl_cb *nl_cb, + const char *group); + +extern int nl_thermal_connect(struct nl_sock **nl_sock, struct nl_cb **nl_cb); + +extern void nl_thermal_disconnect(struct nl_sock *nl_sock, struct nl_cb *nl_cb); + +extern int nl_send_msg(struct nl_sock *sock, struct nl_cb *nl_cb, struct nl_msg *msg, + int (*rx_handler)(struct nl_msg *, void *), + void *data); + +#endif /* __THERMAL_H */ diff --git a/tools/power/x86/turbostat/Makefile b/tools/power/x86/turbostat/Makefile index f3e3c94ab9bd..92e139b9c792 100644 --- a/tools/power/x86/turbostat/Makefile +++ b/tools/power/x86/turbostat/Makefile @@ -9,7 +9,7 @@ ifeq ("$(origin O)", "command line") endif turbostat : turbostat.c -override CFLAGS += -O2 -Wall -I../../../include +override CFLAGS += -O2 -Wall -Wextra -I../../../include override CFLAGS += -DMSRHEADER='"../../../../arch/x86/include/asm/msr-index.h"' override CFLAGS += -DINTEL_FAMILY_HEADER='"../../../../arch/x86/include/asm/intel-family.h"' override CFLAGS += -D_FILE_OFFSET_BITS=64 diff --git a/tools/power/x86/turbostat/turbostat.8 b/tools/power/x86/turbostat/turbostat.8 index 9b17097bc3d7..1e7d3de55a94 100644 --- a/tools/power/x86/turbostat/turbostat.8 +++ b/tools/power/x86/turbostat/turbostat.8 @@ -292,7 +292,7 @@ starts a new interval. must be run as root. Alternatively, non-root users can be enabled to run turbostat this way: -# setcap cap_sys_rawio=ep ./turbostat +# setcap cap_sys_admin,cap_sys_rawio,cap_sys_nice=+ep ./turbostat # chmod +r /dev/cpu/*/msr diff --git a/tools/power/x86/turbostat/turbostat.c b/tools/power/x86/turbostat/turbostat.c index bc5ae0872fed..ede31a4287a0 100644 --- a/tools/power/x86/turbostat/turbostat.c +++ b/tools/power/x86/turbostat/turbostat.c @@ -3,7 +3,7 @@ * turbostat -- show CPU frequency and C-state residency * on modern Intel and AMD processors. * - * Copyright (c) 2021 Intel Corporation. + * Copyright (c) 2022 Intel Corporation. * Len Brown <len.brown@intel.com> */ @@ -37,6 +37,171 @@ #include <asm/unistd.h> #include <stdbool.h> +#define UNUSED(x) (void)(x) + +/* + * This list matches the column headers, except + * 1. built-in only, the sysfs counters are not here -- we learn of those at run-time + * 2. Core and CPU are moved to the end, we can't have strings that contain them + * matching on them for --show and --hide. + */ + +/* + * buffer size used by sscanf() for added column names + * Usually truncated to 7 characters, but also handles 18 columns for raw 64-bit counters + */ +#define NAME_BYTES 20 +#define PATH_BYTES 128 + +enum counter_scope { SCOPE_CPU, SCOPE_CORE, SCOPE_PACKAGE }; +enum counter_type { COUNTER_ITEMS, COUNTER_CYCLES, COUNTER_SECONDS, COUNTER_USEC }; +enum counter_format { FORMAT_RAW, FORMAT_DELTA, FORMAT_PERCENT }; + +struct msr_counter { + unsigned int msr_num; + char name[NAME_BYTES]; + char path[PATH_BYTES]; + unsigned int width; + enum counter_type type; + enum counter_format format; + struct msr_counter *next; + unsigned int flags; +#define FLAGS_HIDE (1 << 0) +#define FLAGS_SHOW (1 << 1) +#define SYSFS_PERCPU (1 << 1) +}; + +struct msr_counter bic[] = { + { 0x0, "usec", "", 0, 0, 0, NULL, 0 }, + { 0x0, "Time_Of_Day_Seconds", "", 0, 0, 0, NULL, 0 }, + { 0x0, "Package", "", 0, 0, 0, NULL, 0 }, + { 0x0, "Node", "", 0, 0, 0, NULL, 0 }, + { 0x0, "Avg_MHz", "", 0, 0, 0, NULL, 0 }, + { 0x0, "Busy%", "", 0, 0, 0, NULL, 0 }, + { 0x0, "Bzy_MHz", "", 0, 0, 0, NULL, 0 }, + { 0x0, "TSC_MHz", "", 0, 0, 0, NULL, 0 }, + { 0x0, "IRQ", "", 0, 0, 0, NULL, 0 }, + { 0x0, "SMI", "", 32, 0, FORMAT_DELTA, NULL, 0 }, + { 0x0, "sysfs", "", 0, 0, 0, NULL, 0 }, + { 0x0, "CPU%c1", "", 0, 0, 0, NULL, 0 }, + { 0x0, "CPU%c3", "", 0, 0, 0, NULL, 0 }, + { 0x0, "CPU%c6", "", 0, 0, 0, NULL, 0 }, + { 0x0, "CPU%c7", "", 0, 0, 0, NULL, 0 }, + { 0x0, "ThreadC", "", 0, 0, 0, NULL, 0 }, + { 0x0, "CoreTmp", "", 0, 0, 0, NULL, 0 }, + { 0x0, "CoreCnt", "", 0, 0, 0, NULL, 0 }, + { 0x0, "PkgTmp", "", 0, 0, 0, NULL, 0 }, + { 0x0, "GFX%rc6", "", 0, 0, 0, NULL, 0 }, + { 0x0, "GFXMHz", "", 0, 0, 0, NULL, 0 }, + { 0x0, "Pkg%pc2", "", 0, 0, 0, NULL, 0 }, + { 0x0, "Pkg%pc3", "", 0, 0, 0, NULL, 0 }, + { 0x0, "Pkg%pc6", "", 0, 0, 0, NULL, 0 }, + { 0x0, "Pkg%pc7", "", 0, 0, 0, NULL, 0 }, + { 0x0, "Pkg%pc8", "", 0, 0, 0, NULL, 0 }, + { 0x0, "Pkg%pc9", "", 0, 0, 0, NULL, 0 }, + { 0x0, "Pk%pc10", "", 0, 0, 0, NULL, 0 }, + { 0x0, "CPU%LPI", "", 0, 0, 0, NULL, 0 }, + { 0x0, "SYS%LPI", "", 0, 0, 0, NULL, 0 }, + { 0x0, "PkgWatt", "", 0, 0, 0, NULL, 0 }, + { 0x0, "CorWatt", "", 0, 0, 0, NULL, 0 }, + { 0x0, "GFXWatt", "", 0, 0, 0, NULL, 0 }, + { 0x0, "PkgCnt", "", 0, 0, 0, NULL, 0 }, + { 0x0, "RAMWatt", "", 0, 0, 0, NULL, 0 }, + { 0x0, "PKG_%", "", 0, 0, 0, NULL, 0 }, + { 0x0, "RAM_%", "", 0, 0, 0, NULL, 0 }, + { 0x0, "Pkg_J", "", 0, 0, 0, NULL, 0 }, + { 0x0, "Cor_J", "", 0, 0, 0, NULL, 0 }, + { 0x0, "GFX_J", "", 0, 0, 0, NULL, 0 }, + { 0x0, "RAM_J", "", 0, 0, 0, NULL, 0 }, + { 0x0, "Mod%c6", "", 0, 0, 0, NULL, 0 }, + { 0x0, "Totl%C0", "", 0, 0, 0, NULL, 0 }, + { 0x0, "Any%C0", "", 0, 0, 0, NULL, 0 }, + { 0x0, "GFX%C0", "", 0, 0, 0, NULL, 0 }, + { 0x0, "CPUGFX%", "", 0, 0, 0, NULL, 0 }, + { 0x0, "Core", "", 0, 0, 0, NULL, 0 }, + { 0x0, "CPU", "", 0, 0, 0, NULL, 0 }, + { 0x0, "APIC", "", 0, 0, 0, NULL, 0 }, + { 0x0, "X2APIC", "", 0, 0, 0, NULL, 0 }, + { 0x0, "Die", "", 0, 0, 0, NULL, 0 }, + { 0x0, "GFXAMHz", "", 0, 0, 0, NULL, 0 }, + { 0x0, "IPC", "", 0, 0, 0, NULL, 0 }, + { 0x0, "CoreThr", "", 0, 0, 0, NULL, 0 }, +}; + +#define MAX_BIC (sizeof(bic) / sizeof(struct msr_counter)) +#define BIC_USEC (1ULL << 0) +#define BIC_TOD (1ULL << 1) +#define BIC_Package (1ULL << 2) +#define BIC_Node (1ULL << 3) +#define BIC_Avg_MHz (1ULL << 4) +#define BIC_Busy (1ULL << 5) +#define BIC_Bzy_MHz (1ULL << 6) +#define BIC_TSC_MHz (1ULL << 7) +#define BIC_IRQ (1ULL << 8) +#define BIC_SMI (1ULL << 9) +#define BIC_sysfs (1ULL << 10) +#define BIC_CPU_c1 (1ULL << 11) +#define BIC_CPU_c3 (1ULL << 12) +#define BIC_CPU_c6 (1ULL << 13) +#define BIC_CPU_c7 (1ULL << 14) +#define BIC_ThreadC (1ULL << 15) +#define BIC_CoreTmp (1ULL << 16) +#define BIC_CoreCnt (1ULL << 17) +#define BIC_PkgTmp (1ULL << 18) +#define BIC_GFX_rc6 (1ULL << 19) +#define BIC_GFXMHz (1ULL << 20) +#define BIC_Pkgpc2 (1ULL << 21) +#define BIC_Pkgpc3 (1ULL << 22) +#define BIC_Pkgpc6 (1ULL << 23) +#define BIC_Pkgpc7 (1ULL << 24) +#define BIC_Pkgpc8 (1ULL << 25) +#define BIC_Pkgpc9 (1ULL << 26) +#define BIC_Pkgpc10 (1ULL << 27) +#define BIC_CPU_LPI (1ULL << 28) +#define BIC_SYS_LPI (1ULL << 29) +#define BIC_PkgWatt (1ULL << 30) +#define BIC_CorWatt (1ULL << 31) +#define BIC_GFXWatt (1ULL << 32) +#define BIC_PkgCnt (1ULL << 33) +#define BIC_RAMWatt (1ULL << 34) +#define BIC_PKG__ (1ULL << 35) +#define BIC_RAM__ (1ULL << 36) +#define BIC_Pkg_J (1ULL << 37) +#define BIC_Cor_J (1ULL << 38) +#define BIC_GFX_J (1ULL << 39) +#define BIC_RAM_J (1ULL << 40) +#define BIC_Mod_c6 (1ULL << 41) +#define BIC_Totl_c0 (1ULL << 42) +#define BIC_Any_c0 (1ULL << 43) +#define BIC_GFX_c0 (1ULL << 44) +#define BIC_CPUGFX (1ULL << 45) +#define BIC_Core (1ULL << 46) +#define BIC_CPU (1ULL << 47) +#define BIC_APIC (1ULL << 48) +#define BIC_X2APIC (1ULL << 49) +#define BIC_Die (1ULL << 50) +#define BIC_GFXACTMHz (1ULL << 51) +#define BIC_IPC (1ULL << 52) +#define BIC_CORE_THROT_CNT (1ULL << 53) + +#define BIC_TOPOLOGY (BIC_Package | BIC_Node | BIC_CoreCnt | BIC_PkgCnt | BIC_Core | BIC_CPU | BIC_Die ) +#define BIC_THERMAL_PWR ( BIC_CoreTmp | BIC_PkgTmp | BIC_PkgWatt | BIC_CorWatt | BIC_GFXWatt | BIC_RAMWatt | BIC_PKG__ | BIC_RAM__) +#define BIC_FREQUENCY ( BIC_Avg_MHz | BIC_Busy | BIC_Bzy_MHz | BIC_TSC_MHz | BIC_GFXMHz | BIC_GFXACTMHz ) +#define BIC_IDLE ( BIC_sysfs | BIC_CPU_c1 | BIC_CPU_c3 | BIC_CPU_c6 | BIC_CPU_c7 | BIC_GFX_rc6 | BIC_Pkgpc2 | BIC_Pkgpc3 | BIC_Pkgpc6 | BIC_Pkgpc7 | BIC_Pkgpc8 | BIC_Pkgpc9 | BIC_Pkgpc10 | BIC_CPU_LPI | BIC_SYS_LPI | BIC_Mod_c6 | BIC_Totl_c0 | BIC_Any_c0 | BIC_GFX_c0 | BIC_CPUGFX) +#define BIC_OTHER ( BIC_IRQ | BIC_SMI | BIC_ThreadC | BIC_CoreTmp | BIC_IPC) + +#define BIC_DISABLED_BY_DEFAULT (BIC_USEC | BIC_TOD | BIC_APIC | BIC_X2APIC) + +unsigned long long bic_enabled = (0xFFFFFFFFFFFFFFFFULL & ~BIC_DISABLED_BY_DEFAULT); +unsigned long long bic_present = BIC_USEC | BIC_TOD | BIC_sysfs | BIC_APIC | BIC_X2APIC; + +#define DO_BIC(COUNTER_NAME) (bic_enabled & bic_present & COUNTER_NAME) +#define DO_BIC_READ(COUNTER_NAME) (bic_present & COUNTER_NAME) +#define ENABLE_BIC(COUNTER_NAME) (bic_enabled |= COUNTER_NAME) +#define BIC_PRESENT(COUNTER_BIT) (bic_present |= COUNTER_BIT) +#define BIC_NOT_PRESENT(COUNTER_BIT) (bic_present &= ~COUNTER_BIT) +#define BIC_IS_ENABLED(COUNTER_BIT) (bic_enabled & COUNTER_BIT) + char *proc_stat = "/proc/stat"; FILE *outf; int *fd_percpu; @@ -48,6 +213,7 @@ struct timespec interval_ts = { 5, 0 }; unsigned int model_orig; unsigned int num_iterations; +unsigned int header_iterations; unsigned int debug; unsigned int quiet; unsigned int shown; @@ -159,13 +325,6 @@ int ignore_stdin; #define MAX(a, b) ((a) > (b) ? (a) : (b)) -/* - * buffer size used by sscanf() for added column names - * Usually truncated to 7 characters, but also handles 18 columns for raw 64-bit counters - */ -#define NAME_BYTES 20 -#define PATH_BYTES 128 - int backwards_count; char *progname; @@ -205,6 +364,7 @@ struct core_data { unsigned int core_temp_c; unsigned int core_energy; /* MSR_CORE_ENERGY_STAT */ unsigned int core_id; + unsigned long long core_throt_cnt; unsigned long long counter[MAX_ADDED_COUNTERS]; } *core_even, *core_odd; @@ -255,24 +415,6 @@ struct pkg_data { #define GET_PKG(pkg_base, pkg_no) (pkg_base + pkg_no) -enum counter_scope { SCOPE_CPU, SCOPE_CORE, SCOPE_PACKAGE }; -enum counter_type { COUNTER_ITEMS, COUNTER_CYCLES, COUNTER_SECONDS, COUNTER_USEC }; -enum counter_format { FORMAT_RAW, FORMAT_DELTA, FORMAT_PERCENT }; - -struct msr_counter { - unsigned int msr_num; - char name[NAME_BYTES]; - char path[PATH_BYTES]; - unsigned int width; - enum counter_type type; - enum counter_format format; - struct msr_counter *next; - unsigned int flags; -#define FLAGS_HIDE (1 << 0) -#define FLAGS_SHOW (1 << 1) -#define SYSFS_PERCPU (1 << 1) -}; - /* * The accumulated sum of MSR is defined as a monotonic * increasing MSR, it will be accumulated periodically, @@ -522,8 +664,10 @@ static int perf_instr_count_open(int cpu_num) /* counter for cpu_num, including user + kernel and all processes */ fd = perf_event_open(&pea, -1, cpu_num, -1, 0); - if (fd == -1) - err(-1, "cpu%d: perf instruction counter\n", cpu_num); + if (fd == -1) { + warn("cpu%d: perf instruction counter", cpu_num); + BIC_NOT_PRESENT(BIC_IPC); + } return fd; } @@ -550,143 +694,10 @@ int get_msr(int cpu, off_t offset, unsigned long long *msr) return 0; } -/* - * This list matches the column headers, except - * 1. built-in only, the sysfs counters are not here -- we learn of those at run-time - * 2. Core and CPU are moved to the end, we can't have strings that contain them - * matching on them for --show and --hide. - */ -struct msr_counter bic[] = { - { 0x0, "usec" }, - { 0x0, "Time_Of_Day_Seconds" }, - { 0x0, "Package" }, - { 0x0, "Node" }, - { 0x0, "Avg_MHz" }, - { 0x0, "Busy%" }, - { 0x0, "Bzy_MHz" }, - { 0x0, "TSC_MHz" }, - { 0x0, "IRQ" }, - { 0x0, "SMI", "", 32, 0, FORMAT_DELTA, NULL }, - { 0x0, "sysfs" }, - { 0x0, "CPU%c1" }, - { 0x0, "CPU%c3" }, - { 0x0, "CPU%c6" }, - { 0x0, "CPU%c7" }, - { 0x0, "ThreadC" }, - { 0x0, "CoreTmp" }, - { 0x0, "CoreCnt" }, - { 0x0, "PkgTmp" }, - { 0x0, "GFX%rc6" }, - { 0x0, "GFXMHz" }, - { 0x0, "Pkg%pc2" }, - { 0x0, "Pkg%pc3" }, - { 0x0, "Pkg%pc6" }, - { 0x0, "Pkg%pc7" }, - { 0x0, "Pkg%pc8" }, - { 0x0, "Pkg%pc9" }, - { 0x0, "Pk%pc10" }, - { 0x0, "CPU%LPI" }, - { 0x0, "SYS%LPI" }, - { 0x0, "PkgWatt" }, - { 0x0, "CorWatt" }, - { 0x0, "GFXWatt" }, - { 0x0, "PkgCnt" }, - { 0x0, "RAMWatt" }, - { 0x0, "PKG_%" }, - { 0x0, "RAM_%" }, - { 0x0, "Pkg_J" }, - { 0x0, "Cor_J" }, - { 0x0, "GFX_J" }, - { 0x0, "RAM_J" }, - { 0x0, "Mod%c6" }, - { 0x0, "Totl%C0" }, - { 0x0, "Any%C0" }, - { 0x0, "GFX%C0" }, - { 0x0, "CPUGFX%" }, - { 0x0, "Core" }, - { 0x0, "CPU" }, - { 0x0, "APIC" }, - { 0x0, "X2APIC" }, - { 0x0, "Die" }, - { 0x0, "GFXAMHz" }, - { 0x0, "IPC" }, -}; - -#define MAX_BIC (sizeof(bic) / sizeof(struct msr_counter)) -#define BIC_USEC (1ULL << 0) -#define BIC_TOD (1ULL << 1) -#define BIC_Package (1ULL << 2) -#define BIC_Node (1ULL << 3) -#define BIC_Avg_MHz (1ULL << 4) -#define BIC_Busy (1ULL << 5) -#define BIC_Bzy_MHz (1ULL << 6) -#define BIC_TSC_MHz (1ULL << 7) -#define BIC_IRQ (1ULL << 8) -#define BIC_SMI (1ULL << 9) -#define BIC_sysfs (1ULL << 10) -#define BIC_CPU_c1 (1ULL << 11) -#define BIC_CPU_c3 (1ULL << 12) -#define BIC_CPU_c6 (1ULL << 13) -#define BIC_CPU_c7 (1ULL << 14) -#define BIC_ThreadC (1ULL << 15) -#define BIC_CoreTmp (1ULL << 16) -#define BIC_CoreCnt (1ULL << 17) -#define BIC_PkgTmp (1ULL << 18) -#define BIC_GFX_rc6 (1ULL << 19) -#define BIC_GFXMHz (1ULL << 20) -#define BIC_Pkgpc2 (1ULL << 21) -#define BIC_Pkgpc3 (1ULL << 22) -#define BIC_Pkgpc6 (1ULL << 23) -#define BIC_Pkgpc7 (1ULL << 24) -#define BIC_Pkgpc8 (1ULL << 25) -#define BIC_Pkgpc9 (1ULL << 26) -#define BIC_Pkgpc10 (1ULL << 27) -#define BIC_CPU_LPI (1ULL << 28) -#define BIC_SYS_LPI (1ULL << 29) -#define BIC_PkgWatt (1ULL << 30) -#define BIC_CorWatt (1ULL << 31) -#define BIC_GFXWatt (1ULL << 32) -#define BIC_PkgCnt (1ULL << 33) -#define BIC_RAMWatt (1ULL << 34) -#define BIC_PKG__ (1ULL << 35) -#define BIC_RAM__ (1ULL << 36) -#define BIC_Pkg_J (1ULL << 37) -#define BIC_Cor_J (1ULL << 38) -#define BIC_GFX_J (1ULL << 39) -#define BIC_RAM_J (1ULL << 40) -#define BIC_Mod_c6 (1ULL << 41) -#define BIC_Totl_c0 (1ULL << 42) -#define BIC_Any_c0 (1ULL << 43) -#define BIC_GFX_c0 (1ULL << 44) -#define BIC_CPUGFX (1ULL << 45) -#define BIC_Core (1ULL << 46) -#define BIC_CPU (1ULL << 47) -#define BIC_APIC (1ULL << 48) -#define BIC_X2APIC (1ULL << 49) -#define BIC_Die (1ULL << 50) -#define BIC_GFXACTMHz (1ULL << 51) -#define BIC_IPC (1ULL << 52) - -#define BIC_TOPOLOGY (BIC_Package | BIC_Node | BIC_CoreCnt | BIC_PkgCnt | BIC_Core | BIC_CPU | BIC_Die ) -#define BIC_THERMAL_PWR ( BIC_CoreTmp | BIC_PkgTmp | BIC_PkgWatt | BIC_CorWatt | BIC_GFXWatt | BIC_RAMWatt | BIC_PKG__ | BIC_RAM__) -#define BIC_FREQUENCY ( BIC_Avg_MHz | BIC_Busy | BIC_Bzy_MHz | BIC_TSC_MHz | BIC_GFXMHz | BIC_GFXACTMHz ) -#define BIC_IDLE ( BIC_sysfs | BIC_CPU_c1 | BIC_CPU_c3 | BIC_CPU_c6 | BIC_CPU_c7 | BIC_GFX_rc6 | BIC_Pkgpc2 | BIC_Pkgpc3 | BIC_Pkgpc6 | BIC_Pkgpc7 | BIC_Pkgpc8 | BIC_Pkgpc9 | BIC_Pkgpc10 | BIC_CPU_LPI | BIC_SYS_LPI | BIC_Mod_c6 | BIC_Totl_c0 | BIC_Any_c0 | BIC_GFX_c0 | BIC_CPUGFX) -#define BIC_OTHER ( BIC_IRQ | BIC_SMI | BIC_ThreadC | BIC_CoreTmp | BIC_IPC) - -#define BIC_DISABLED_BY_DEFAULT (BIC_USEC | BIC_TOD | BIC_APIC | BIC_X2APIC) - -unsigned long long bic_enabled = (0xFFFFFFFFFFFFFFFFULL & ~BIC_DISABLED_BY_DEFAULT); -unsigned long long bic_present = BIC_USEC | BIC_TOD | BIC_sysfs | BIC_APIC | BIC_X2APIC; - -#define DO_BIC(COUNTER_NAME) (bic_enabled & bic_present & COUNTER_NAME) -#define DO_BIC_READ(COUNTER_NAME) (bic_present & COUNTER_NAME) -#define ENABLE_BIC(COUNTER_NAME) (bic_enabled |= COUNTER_NAME) -#define BIC_PRESENT(COUNTER_BIT) (bic_present |= COUNTER_BIT) -#define BIC_NOT_PRESENT(COUNTER_BIT) (bic_present &= ~COUNTER_BIT) -#define BIC_IS_ENABLED(COUNTER_BIT) (bic_enabled & COUNTER_BIT) - #define MAX_DEFERRED 16 +char *deferred_add_names[MAX_DEFERRED]; char *deferred_skip_names[MAX_DEFERRED]; +int deferred_add_index; int deferred_skip_index; /* @@ -720,6 +731,8 @@ void help(void) " -l, --list list column headers only\n" " -n, --num_iterations num\n" " number of the measurement iterations\n" + " -N, --header_iterations num\n" + " print header every num iterations\n" " -o, --out file\n" " create or truncate \"file\" for all output\n" " -q, --quiet skip decoding system configuration header\n" @@ -741,7 +754,7 @@ void help(void) */ unsigned long long bic_lookup(char *name_list, enum show_hide_mode mode) { - int i; + unsigned int i; unsigned long long retval = 0; while (name_list) { @@ -752,40 +765,51 @@ unsigned long long bic_lookup(char *name_list, enum show_hide_mode mode) if (comma) *comma = '\0'; - if (!strcmp(name_list, "all")) - return ~0; - if (!strcmp(name_list, "topology")) - return BIC_TOPOLOGY; - if (!strcmp(name_list, "power")) - return BIC_THERMAL_PWR; - if (!strcmp(name_list, "idle")) - return BIC_IDLE; - if (!strcmp(name_list, "frequency")) - return BIC_FREQUENCY; - if (!strcmp(name_list, "other")) - return BIC_OTHER; - if (!strcmp(name_list, "all")) - return 0; - for (i = 0; i < MAX_BIC; ++i) { if (!strcmp(name_list, bic[i].name)) { retval |= (1ULL << i); break; } + if (!strcmp(name_list, "all")) { + retval |= ~0; + break; + } else if (!strcmp(name_list, "topology")) { + retval |= BIC_TOPOLOGY; + break; + } else if (!strcmp(name_list, "power")) { + retval |= BIC_THERMAL_PWR; + break; + } else if (!strcmp(name_list, "idle")) { + retval |= BIC_IDLE; + break; + } else if (!strcmp(name_list, "frequency")) { + retval |= BIC_FREQUENCY; + break; + } else if (!strcmp(name_list, "other")) { + retval |= BIC_OTHER; + break; + } + } if (i == MAX_BIC) { if (mode == SHOW_LIST) { - fprintf(stderr, "Invalid counter name: %s\n", name_list); - exit(-1); - } - deferred_skip_names[deferred_skip_index++] = name_list; - if (debug) - fprintf(stderr, "deferred \"%s\"\n", name_list); - if (deferred_skip_index >= MAX_DEFERRED) { - fprintf(stderr, "More than max %d un-recognized --skip options '%s'\n", - MAX_DEFERRED, name_list); - help(); - exit(1); + deferred_add_names[deferred_add_index++] = name_list; + if (deferred_add_index >= MAX_DEFERRED) { + fprintf(stderr, "More than max %d un-recognized --add options '%s'\n", + MAX_DEFERRED, name_list); + help(); + exit(1); + } + } else { + deferred_skip_names[deferred_skip_index++] = name_list; + if (debug) + fprintf(stderr, "deferred \"%s\"\n", name_list); + if (deferred_skip_index >= MAX_DEFERRED) { + fprintf(stderr, "More than max %d un-recognized --skip options '%s'\n", + MAX_DEFERRED, name_list); + help(); + exit(1); + } } } @@ -872,6 +896,9 @@ void print_header(char *delim) if (DO_BIC(BIC_CoreTmp)) outp += sprintf(outp, "%sCoreTmp", (printed++ ? delim : "")); + if (DO_BIC(BIC_CORE_THROT_CNT)) + outp += sprintf(outp, "%sCoreThr", (printed++ ? delim : "")); + if (do_rapl && !rapl_joules) { if (DO_BIC(BIC_CorWatt) && (do_rapl & RAPL_PER_CORE_ENERGY)) outp += sprintf(outp, "%sCorWatt", (printed++ ? delim : "")); @@ -1011,6 +1038,7 @@ int dump_counters(struct thread_data *t, struct core_data *c, struct pkg_data *p outp += sprintf(outp, "c6: %016llX\n", c->c6); outp += sprintf(outp, "c7: %016llX\n", c->c7); outp += sprintf(outp, "DTS: %dC\n", c->core_temp_c); + outp += sprintf(outp, "cpu_throt_count: %016llX\n", c->core_throt_cnt); outp += sprintf(outp, "Joules: %0X\n", c->core_energy); for (i = 0, mp = sys.cp; mp; i++, mp = mp->next) { @@ -1225,6 +1253,10 @@ int format_counters(struct thread_data *t, struct core_data *c, struct pkg_data if (DO_BIC(BIC_CoreTmp)) outp += sprintf(outp, "%s%d", (printed++ ? delim : ""), c->core_temp_c); + /* Core throttle count */ + if (DO_BIC(BIC_CORE_THROT_CNT)) + outp += sprintf(outp, "%s%lld", (printed++ ? delim : ""), c->core_throt_cnt); + for (i = 0, mp = sys.cp; mp; i++, mp = mp->next) { if (mp->format == FORMAT_RAW) { if (mp->width == 32) @@ -1311,6 +1343,7 @@ int format_counters(struct thread_data *t, struct core_data *c, struct pkg_data if (DO_BIC(BIC_PkgWatt)) outp += sprintf(outp, fmt8, (printed++ ? delim : ""), p->energy_pkg * rapl_energy_units / interval_float); + if (DO_BIC(BIC_CorWatt) && !(do_rapl & RAPL_PER_CORE_ENERGY)) outp += sprintf(outp, fmt8, (printed++ ? delim : ""), p->energy_cores * rapl_energy_units / interval_float); @@ -1386,14 +1419,14 @@ void flush_output_stderr(void) void format_all_counters(struct thread_data *t, struct core_data *c, struct pkg_data *p) { - static int printed; + static int count; - if (!printed || !summary_only) + if ((!count || (header_iterations && !(count % header_iterations))) || !summary_only) print_header("\t"); format_counters(&average.threads, &average.cores, &average.packages); - printed = 1; + count++; if (summary_only) return; @@ -1467,6 +1500,7 @@ void delta_core(struct core_data *new, struct core_data *old) old->c6 = new->c6 - old->c6; old->c7 = new->c7 - old->c7; old->core_temp_c = new->core_temp_c; + old->core_throt_cnt = new->core_throt_cnt; old->mc6_us = new->mc6_us - old->mc6_us; DELTA_WRAP32(new->core_energy, old->core_energy); @@ -1626,6 +1660,7 @@ void clear_counters(struct thread_data *t, struct core_data *c, struct pkg_data c->mc6_us = 0; c->core_temp_c = 0; c->core_energy = 0; + c->core_throt_cnt = 0; p->pkg_wtd_core_c0 = 0; p->pkg_any_core_c0 = 0; @@ -1710,6 +1745,7 @@ int sum_counters(struct thread_data *t, struct core_data *c, struct pkg_data *p) average.cores.mc6_us += c->mc6_us; average.cores.core_temp_c = MAX(average.cores.core_temp_c, c->core_temp_c); + average.cores.core_throt_cnt = MAX(average.cores.core_throt_cnt, c->core_throt_cnt); average.cores.core_energy += c->core_energy; @@ -1987,6 +2023,26 @@ void get_apic_id(struct thread_data *t) fprintf(outf, "cpu%d: BIOS BUG: apic 0x%x x2apic 0x%x\n", t->cpu_id, t->apic_id, t->x2apic_id); } +int get_core_throt_cnt(int cpu, unsigned long long *cnt) +{ + char path[128 + PATH_BYTES]; + unsigned long long tmp; + FILE *fp; + int ret; + + sprintf(path, "/sys/devices/system/cpu/cpu%d/thermal_throttle/core_throttle_count", cpu); + fp = fopen(path, "r"); + if (!fp) + return -1; + ret = fscanf(fp, "%lld", &tmp); + if (ret != 1) + return -1; + fclose(fp); + *cnt = tmp; + + return 0; +} + /* * get_counters(...) * migrate to cpu @@ -2129,6 +2185,9 @@ retry: c->core_temp_c = tj_max - ((msr >> 16) & 0x7F); } + if (DO_BIC(BIC_CORE_THROT_CNT)) + get_core_throt_cnt(cpu, &c->core_throt_cnt); + if (do_rapl & RAPL_AMD_F17H) { if (get_msr(cpu, MSR_CORE_ENERGY_STAT, &msr)) return -14; @@ -2428,6 +2487,9 @@ int has_turbo_ratio_group_limits(int family, int model) if (!genuine_intel) return 0; + if (family != 6) + return 0; + switch (model) { case INTEL_FAM6_ATOM_GOLDMONT: case INTEL_FAM6_SKYLAKE_X: @@ -2435,8 +2497,9 @@ int has_turbo_ratio_group_limits(int family, int model) case INTEL_FAM6_ATOM_GOLDMONT_D: case INTEL_FAM6_ATOM_TREMONT_D: return 1; + default: + return 0; } - return 0; } static void dump_turbo_ratio_limits(int family, int model) @@ -3027,6 +3090,8 @@ void set_max_cpu_num(void) */ int count_cpus(int cpu) { + UNUSED(cpu); + topo.num_cpus++; return 0; } @@ -3361,6 +3426,9 @@ static int update_msr_sum(struct thread_data *t, struct core_data *c, struct pkg int i, ret; int cpu = t->cpu_id; + UNUSED(c); + UNUSED(p); + for (i = IDX_PKG_ENERGY; i < IDX_COUNT; i++) { unsigned long long msr_cur, msr_last; off_t offset; @@ -3387,6 +3455,8 @@ static int update_msr_sum(struct thread_data *t, struct core_data *c, struct pkg static void msr_record_handler(union sigval v) { + UNUSED(v); + for_all_cpus(update_msr_sum, EVEN_COUNTERS); } @@ -3439,6 +3509,9 @@ release_msr: /* * set_my_sched_priority(pri) * return previous + * + * if non-root, do this: + * # /sbin/setcap cap_sys_rawio,cap_sys_nice=+ep /usr/bin/turbostat */ int set_my_sched_priority(int priority) { @@ -3457,7 +3530,7 @@ int set_my_sched_priority(int priority) errno = 0; retval = getpriority(PRIO_PROCESS, 0); if (retval != priority) - err(-1, "getpriority(%d) != setpriority(%d)", retval, priority); + err(retval, "getpriority(%d) != setpriority(%d)", retval, priority); return original_priority; } @@ -3466,7 +3539,7 @@ void turbostat_loop() { int retval; int restarted = 0; - int done_iters = 0; + unsigned int done_iters = 0; setup_signal_handler(); @@ -3678,6 +3751,7 @@ int probe_nhm_msrs(unsigned int family, unsigned int model) break; case INTEL_FAM6_ATOM_SILVERMONT: /* BYT */ no_MSR_MISC_PWR_MGMT = 1; + /* FALLTHRU */ case INTEL_FAM6_ATOM_SILVERMONT_D: /* AVN */ pkg_cstate_limits = slv_pkg_cstate_limits; break; @@ -3721,6 +3795,9 @@ int has_slv_msrs(unsigned int family, unsigned int model) if (!genuine_intel) return 0; + if (family != 6) + return 0; + switch (model) { case INTEL_FAM6_ATOM_SILVERMONT: case INTEL_FAM6_ATOM_SILVERMONT_MID: @@ -3736,6 +3813,9 @@ int is_dnv(unsigned int family, unsigned int model) if (!genuine_intel) return 0; + if (family != 6) + return 0; + switch (model) { case INTEL_FAM6_ATOM_GOLDMONT_D: return 1; @@ -3749,6 +3829,9 @@ int is_bdx(unsigned int family, unsigned int model) if (!genuine_intel) return 0; + if (family != 6) + return 0; + switch (model) { case INTEL_FAM6_BROADWELL_X: return 1; @@ -3762,6 +3845,9 @@ int is_skx(unsigned int family, unsigned int model) if (!genuine_intel) return 0; + if (family != 6) + return 0; + switch (model) { case INTEL_FAM6_SKYLAKE_X: return 1; @@ -3775,6 +3861,9 @@ int is_icx(unsigned int family, unsigned int model) if (!genuine_intel) return 0; + if (family != 6) + return 0; + switch (model) { case INTEL_FAM6_ICELAKE_X: return 1; @@ -3787,6 +3876,9 @@ int is_ehl(unsigned int family, unsigned int model) if (!genuine_intel) return 0; + if (family != 6) + return 0; + switch (model) { case INTEL_FAM6_ATOM_TREMONT: return 1; @@ -3799,6 +3891,9 @@ int is_jvl(unsigned int family, unsigned int model) if (!genuine_intel) return 0; + if (family != 6) + return 0; + switch (model) { case INTEL_FAM6_ATOM_TREMONT_D: return 1; @@ -3811,6 +3906,9 @@ int has_turbo_ratio_limit(unsigned int family, unsigned int model) if (has_slv_msrs(family, model)) return 0; + if (family != 6) + return 0; + switch (model) { /* Nehalem compatible, but do not include turbo-ratio limit support */ case INTEL_FAM6_NEHALEM_EX: /* Nehalem-EX Xeon - Beckton */ @@ -4125,6 +4223,9 @@ int print_epb(struct thread_data *t, struct core_data *c, struct pkg_data *p) char *epb_string; int cpu, epb; + UNUSED(c); + UNUSED(p); + if (!has_epb) return 0; @@ -4171,6 +4272,9 @@ int print_hwp(struct thread_data *t, struct core_data *c, struct pkg_data *p) unsigned long long msr; int cpu; + UNUSED(c); + UNUSED(p); + if (!has_hwp) return 0; @@ -4254,6 +4358,9 @@ int print_perf_limit(struct thread_data *t, struct core_data *c, struct pkg_data unsigned long long msr; int cpu; + UNUSED(c); + UNUSED(p); + cpu = t->cpu_id; /* per-package */ @@ -4359,6 +4466,8 @@ double get_tdp_intel(unsigned int model) double get_tdp_amd(unsigned int family) { + UNUSED(family); + /* This is the max stock TDP of HEDT/Server Fam17h+ chips */ return 280.0; } @@ -4376,6 +4485,7 @@ static double rapl_dram_energy_units_probe(int model, double rapl_energy_units) case INTEL_FAM6_BROADWELL_X: /* BDX */ case INTEL_FAM6_SKYLAKE_X: /* SKX */ case INTEL_FAM6_XEON_PHI_KNL: /* KNL */ + case INTEL_FAM6_ICELAKE_X: /* ICX */ return (rapl_dram_energy_units = 15.3 / 1000000); default: return (rapl_energy_units); @@ -4559,6 +4669,8 @@ void rapl_probe_amd(unsigned int family, unsigned int model) unsigned int has_rapl = 0; double tdp; + UNUSED(model); + if (max_extended_level >= 0x80000007) { __cpuid(0x80000007, eax, ebx, ecx, edx); /* RAPL (Fam 17h+) */ @@ -4617,6 +4729,7 @@ void perf_limit_reasons_probe(unsigned int family, unsigned int model) case INTEL_FAM6_HASWELL_L: /* HSW */ case INTEL_FAM6_HASWELL_G: /* HSW */ do_gfx_perf_limit_reasons = 1; + /* FALLTHRU */ case INTEL_FAM6_HASWELL_X: /* HSX */ do_core_perf_limit_reasons = 1; do_ring_perf_limit_reasons = 1; @@ -4643,6 +4756,9 @@ int print_thermal(struct thread_data *t, struct core_data *c, struct pkg_data *p unsigned int dts, dts2; int cpu; + UNUSED(c); + UNUSED(p); + if (!(do_dts || do_ptm)) return 0; @@ -4698,7 +4814,7 @@ int print_thermal(struct thread_data *t, struct core_data *c, struct pkg_data *p void print_power_limit_msr(int cpu, unsigned long long msr, char *label) { - fprintf(outf, "cpu%d: %s: %sabled (%f Watts, %f sec, clamp %sabled)\n", + fprintf(outf, "cpu%d: %s: %sabled (%0.3f Watts, %f sec, clamp %sabled)\n", cpu, label, ((msr >> 15) & 1) ? "EN" : "DIS", ((msr >> 0) & 0x7FFF) * rapl_power_units, @@ -4714,6 +4830,9 @@ int print_rapl(struct thread_data *t, struct core_data *c, struct pkg_data *p) const char *msr_name; int cpu; + UNUSED(c); + UNUSED(p); + if (!do_rapl) return 0; @@ -4762,12 +4881,19 @@ int print_rapl(struct thread_data *t, struct core_data *c, struct pkg_data *p) cpu, msr, (msr >> 63) & 1 ? "" : "UN"); print_power_limit_msr(cpu, msr, "PKG Limit #1"); - fprintf(outf, "cpu%d: PKG Limit #2: %sabled (%f Watts, %f* sec, clamp %sabled)\n", + fprintf(outf, "cpu%d: PKG Limit #2: %sabled (%0.3f Watts, %f* sec, clamp %sabled)\n", cpu, ((msr >> 47) & 1) ? "EN" : "DIS", ((msr >> 32) & 0x7FFF) * rapl_power_units, (1.0 + (((msr >> 54) & 0x3) / 4.0)) * (1 << ((msr >> 49) & 0x1F)) * rapl_time_units, ((msr >> 48) & 1) ? "EN" : "DIS"); + + if (get_msr(cpu, MSR_VR_CURRENT_CONFIG, &msr)) + return -9; + + fprintf(outf, "cpu%d: MSR_VR_CURRENT_CONFIG: 0x%08llx\n", cpu, msr); + fprintf(outf, "cpu%d: PKG Limit #4: %f Watts (%slocked)\n", + cpu, ((msr >> 0) & 0x1FFF) * rapl_power_units, (msr >> 31) & 1 ? "" : "UN"); } if (do_rapl & RAPL_DRAM_POWER_INFO) { @@ -4830,6 +4956,9 @@ int has_snb_msrs(unsigned int family, unsigned int model) if (!genuine_intel) return 0; + if (family != 6) + return 0; + switch (model) { case INTEL_FAM6_SANDYBRIDGE: case INTEL_FAM6_SANDYBRIDGE_X: @@ -4873,6 +5002,9 @@ int has_c8910_msrs(unsigned int family, unsigned int model) if (!genuine_intel) return 0; + if (family != 6) + return 0; + switch (model) { case INTEL_FAM6_HASWELL_L: /* HSW */ case INTEL_FAM6_BROADWELL: /* BDW */ @@ -4899,6 +5031,9 @@ int has_skl_msrs(unsigned int family, unsigned int model) if (!genuine_intel) return 0; + if (family != 6) + return 0; + switch (model) { case INTEL_FAM6_SKYLAKE_L: /* SKL */ case INTEL_FAM6_CANNONLAKE_L: /* CNL */ @@ -4911,6 +5046,10 @@ int is_slm(unsigned int family, unsigned int model) { if (!genuine_intel) return 0; + + if (family != 6) + return 0; + switch (model) { case INTEL_FAM6_ATOM_SILVERMONT: /* BYT */ case INTEL_FAM6_ATOM_SILVERMONT_D: /* AVN */ @@ -4923,6 +5062,10 @@ int is_knl(unsigned int family, unsigned int model) { if (!genuine_intel) return 0; + + if (family != 6) + return 0; + switch (model) { case INTEL_FAM6_XEON_PHI_KNL: /* KNL */ return 1; @@ -4935,6 +5078,9 @@ int is_cnl(unsigned int family, unsigned int model) if (!genuine_intel) return 0; + if (family != 6) + return 0; + switch (model) { case INTEL_FAM6_CANNONLAKE_L: /* CNL */ return 1; @@ -4989,6 +5135,9 @@ int get_cpu_type(struct thread_data *t, struct core_data *c, struct pkg_data *p) { unsigned int eax, ebx, ecx, edx; + UNUSED(c); + UNUSED(p); + if (!genuine_intel) return 0; @@ -5025,6 +5174,9 @@ int set_temperature_target(struct thread_data *t, struct core_data *c, struct pk unsigned int tcc_default, tcc_offset; int cpu; + UNUSED(c); + UNUSED(p); + /* tj_max is used only for dts or ptm */ if (!(do_dts || do_ptm)) return 0; @@ -5572,6 +5724,11 @@ void process_cpuid() else BIC_NOT_PRESENT(BIC_CPU_LPI); + if (!access("/sys/devices/system/cpu/cpu0/thermal_throttle/core_throttle_count", R_OK)) + BIC_PRESENT(BIC_CORE_THROT_CNT); + else + BIC_NOT_PRESENT(BIC_CORE_THROT_CNT); + if (!access(sys_lpi_file_sysfs, R_OK)) { sys_lpi_file = sys_lpi_file_sysfs; BIC_PRESENT(BIC_SYS_LPI); @@ -5601,11 +5758,6 @@ int dir_filter(const struct dirent *dirp) return 0; } -int open_dev_cpu_msr(int dummy1) -{ - return 0; -} - void topology_probe() { int i; @@ -5896,6 +6048,9 @@ void turbostat_init() if (!quiet && do_irtl_snb) print_irtl(); + + if (DO_BIC(BIC_IPC)) + (void)get_instr_count_fd(base_cpu); } int fork_it(char **argv) @@ -5973,7 +6128,7 @@ int get_and_dump_counters(void) void print_version() { - fprintf(outf, "turbostat version 21.05.04" " - Len Brown <lenb@kernel.org>\n"); + fprintf(outf, "turbostat version 2022.04.16 - Len Brown <lenb@kernel.org>\n"); } int add_counter(unsigned int msr_num, char *path, char *name, @@ -6138,6 +6293,16 @@ next: } } +int is_deferred_add(char *name) +{ + int i; + + for (i = 0; i < deferred_add_index; ++i) + if (!strcmp(name, deferred_add_names[i])) + return 1; + return 0; +} + int is_deferred_skip(char *name) { int i; @@ -6156,9 +6321,6 @@ void probe_sysfs(void) int state; char *sp; - if (!DO_BIC(BIC_sysfs)) - return; - for (state = 10; state >= 0; --state) { sprintf(path, "/sys/devices/system/cpu/cpu%d/cpuidle/state%d/name", base_cpu, state); @@ -6181,6 +6343,9 @@ void probe_sysfs(void) sprintf(path, "cpuidle/state%d/time", state); + if (!DO_BIC(BIC_sysfs) && !is_deferred_add(name_buf)) + continue; + if (is_deferred_skip(name_buf)) continue; @@ -6206,6 +6371,9 @@ void probe_sysfs(void) sprintf(path, "cpuidle/state%d/usage", state); + if (!DO_BIC(BIC_sysfs) && !is_deferred_add(name_buf)) + continue; + if (is_deferred_skip(name_buf)) continue; @@ -6313,6 +6481,7 @@ void cmdline(int argc, char **argv) { "interval", required_argument, 0, 'i' }, { "IPC", no_argument, 0, 'I' }, { "num_iterations", required_argument, 0, 'n' }, + { "header_iterations", required_argument, 0, 'N' }, { "help", no_argument, 0, 'h' }, { "hide", required_argument, 0, 'H' }, // meh, -h taken by --help { "Joules", no_argument, 0, 'J' }, @@ -6394,6 +6563,14 @@ void cmdline(int argc, char **argv) exit(2); } break; + case 'N': + header_iterations = strtod(optarg, NULL); + + if (header_iterations <= 0) { + fprintf(outf, "iterations %d should be positive number\n", header_iterations); + exit(2); + } + break; case 's': /* * --show: show only those specified @@ -6432,6 +6609,8 @@ int main(int argc, char **argv) turbostat_init(); + msr_sum_record(); + /* dump counters and exit */ if (dump_only) return get_and_dump_counters(); @@ -6443,7 +6622,6 @@ int main(int argc, char **argv) return 0; } - msr_sum_record(); /* * if any params left, it must be a command to fork */ diff --git a/tools/thermal/lib/Build b/tools/thermal/lib/Build new file mode 100644 index 000000000000..06f22760a272 --- /dev/null +++ b/tools/thermal/lib/Build @@ -0,0 +1,3 @@ +libthermal_tools-y += mainloop.o +libthermal_tools-y += log.o +libthermal_tools-y += uptimeofday.o diff --git a/tools/thermal/lib/Makefile b/tools/thermal/lib/Makefile new file mode 100644 index 000000000000..82db451935c5 --- /dev/null +++ b/tools/thermal/lib/Makefile @@ -0,0 +1,158 @@ +# SPDX-License-Identifier: (LGPL-2.1 OR BSD-2-Clause) +# Most of this file is copied from tools/lib/perf/Makefile + +LIBTHERMAL_TOOLS_VERSION = 0 +LIBTHERMAL_TOOLS_PATCHLEVEL = 0 +LIBTHERMAL_TOOLS_EXTRAVERSION = 1 + +MAKEFLAGS += --no-print-directory + +ifeq ($(srctree),) +srctree := $(patsubst %/,%,$(dir $(CURDIR))) +srctree := $(patsubst %/,%,$(dir $(srctree))) +srctree := $(patsubst %/,%,$(dir $(srctree))) +# $(info Determined 'srctree' to be $(srctree)) +endif + +INSTALL = install + +# Use DESTDIR for installing into a different root directory. +# This is useful for building a package. The program will be +# installed in this directory as if it was the root directory. +# Then the build tool can move it later. +DESTDIR ?= +DESTDIR_SQ = '$(subst ','\'',$(DESTDIR))' + +include $(srctree)/tools/scripts/Makefile.include +include $(srctree)/tools/scripts/Makefile.arch + +ifeq ($(LP64), 1) + libdir_relative = lib64 +else + libdir_relative = lib +endif + +prefix ?= +libdir = $(prefix)/$(libdir_relative) + +# Shell quotes +libdir_SQ = $(subst ','\'',$(libdir)) +libdir_relative_SQ = $(subst ','\'',$(libdir_relative)) + +ifeq ("$(origin V)", "command line") + VERBOSE = $(V) +endif +ifndef VERBOSE + VERBOSE = 0 +endif + +ifeq ($(VERBOSE),1) + Q = +else + Q = @ +endif + +# Set compile option CFLAGS +ifdef EXTRA_CFLAGS + CFLAGS := $(EXTRA_CFLAGS) +else + CFLAGS := -g -Wall +endif + +INCLUDES = \ +-I/usr/include/libnl3 \ +-I$(srctree)/tools/lib/thermal/include \ +-I$(srctree)/tools/lib/ \ +-I$(srctree)/tools/include \ +-I$(srctree)/tools/arch/$(SRCARCH)/include/ \ +-I$(srctree)/tools/arch/$(SRCARCH)/include/uapi \ +-I$(srctree)/tools/include/uapi + +# Append required CFLAGS +override CFLAGS += $(EXTRA_WARNINGS) +override CFLAGS += -Werror -Wall +override CFLAGS += -fPIC +override CFLAGS += $(INCLUDES) +override CFGLAS += -Wl,-L. +override CFGLAS += -Wl,-lthermal + +all: + +export srctree OUTPUT CC LD CFLAGS V +export DESTDIR DESTDIR_SQ + +include $(srctree)/tools/build/Makefile.include + +PATCHLEVEL = $(LIBTHERMAL_TOOLS_PATCHLEVEL) +EXTRAVERSION = $(LIBTHERMAL_TOOLS_EXTRAVERSION) +VERSION = $(LIBTHERMAL_TOOLS_VERSION).$(LIBTHERMAL_TOOLS_PATCHLEVEL).$(LIBTHERMAL_TOOLS_EXTRAVERSION) + +LIBTHERMAL_TOOLS_SO := $(OUTPUT)libthermal_tools.so.$(VERSION) +LIBTHERMAL_TOOLS_A := $(OUTPUT)libthermal_tools.a +LIBTHERMAL_TOOLS_IN := $(OUTPUT)libthermal_tools-in.o +LIBTHERMAL_TOOLS_PC := $(OUTPUT)libthermal_tools.pc + +LIBTHERMAL_TOOLS_ALL := $(LIBTHERMAL_TOOLS_A) $(OUTPUT)libthermal_tools.so* + +$(LIBTHERMAL_TOOLS_IN): FORCE + $(Q)$(MAKE) $(build)=libthermal_tools + +$(LIBTHERMAL_TOOLS_A): $(LIBTHERMAL_TOOLS_IN) + $(QUIET_AR)$(RM) $@ && $(AR) rcs $@ $(LIBTHERMAL_TOOLS_IN) + +$(LIBTHERMAL_TOOLS_SO): $(LIBTHERMAL_TOOLS_IN) + $(QUIET_LINK)$(CC) --shared -Wl,-soname,libthermal_tools.so $^ -o $@ + @ln -sf $(@F) $(OUTPUT)libthermal_tools.so + @ln -sf $(@F) $(OUTPUT)libthermal_tools.so.$(LIBTHERMAL_TOOLS_VERSION) + + +libs: $(LIBTHERMAL_TOOLS_A) $(LIBTHERMAL_TOOLS_SO) $(LIBTHERMAL_TOOLS_PC) + +all: fixdep + $(Q)$(MAKE) libs + +clean: + $(call QUIET_CLEAN, libthermal_tools) $(RM) $(LIBTHERMAL_TOOLS_A) \ + *.o *~ *.a *.so *.so.$(VERSION) *.so.$(LIBTHERMAL_TOOLS_VERSION) .*.d .*.cmd LIBTHERMAL_TOOLS-CFLAGS $(LIBTHERMAL_TOOLS_PC) + +$(LIBTHERMAL_TOOLS_PC): + $(QUIET_GEN)sed -e "s|@PREFIX@|$(prefix)|" \ + -e "s|@LIBDIR@|$(libdir_SQ)|" \ + -e "s|@VERSION@|$(VERSION)|" \ + < libthermal_tools.pc.template > $@ + +define do_install_mkdir + if [ ! -d '$(DESTDIR_SQ)$1' ]; then \ + $(INSTALL) -d -m 755 '$(DESTDIR_SQ)$1'; \ + fi +endef + +define do_install + if [ ! -d '$(DESTDIR_SQ)$2' ]; then \ + $(INSTALL) -d -m 755 '$(DESTDIR_SQ)$2'; \ + fi; \ + $(INSTALL) $1 $(if $3,-m $3,) '$(DESTDIR_SQ)$2' +endef + +install_lib: libs + $(call QUIET_INSTALL, $(LIBTHERMAL_TOOLS_ALL)) \ + $(call do_install_mkdir,$(libdir_SQ)); \ + cp -fpR $(LIBTHERMAL_TOOLS_ALL) $(DESTDIR)$(libdir_SQ) + +install_headers: + $(call QUIET_INSTALL, headers) \ + $(call do_install,include/thermal.h,$(prefix)/include/thermal,644); \ + +install_pkgconfig: $(LIBTHERMAL_TOOLS_PC) + $(call QUIET_INSTALL, $(LIBTHERMAL_TOOLS_PC)) \ + $(call do_install,$(LIBTHERMAL_TOOLS_PC),$(libdir_SQ)/pkgconfig,644) + +install_doc: + $(Q)$(MAKE) -C Documentation install-man install-html install-examples + +#install: install_lib install_headers install_pkgconfig install_doc +install: install_lib install_headers install_pkgconfig + +FORCE: + +.PHONY: all install clean FORCE diff --git a/tools/thermal/lib/libthermal_tools.pc.template b/tools/thermal/lib/libthermal_tools.pc.template new file mode 100644 index 000000000000..6f3769731b59 --- /dev/null +++ b/tools/thermal/lib/libthermal_tools.pc.template @@ -0,0 +1,12 @@ +# SPDX-License-Identifier: (LGPL-2.1 OR BSD-2-Clause) + +prefix=@PREFIX@ +libdir=@LIBDIR@ +includedir=${prefix}/include + +Name: libthermal +Description: thermal library +Requires: libnl-3.0 libnl-genl-3.0 +Version: @VERSION@ +Libs: -L${libdir} -lnl-genl-3 -lnl-3 +Cflags: -I${includedir} -I{include}/libnl3 diff --git a/tools/thermal/lib/log.c b/tools/thermal/lib/log.c new file mode 100644 index 000000000000..597d6e7f7858 --- /dev/null +++ b/tools/thermal/lib/log.c @@ -0,0 +1,77 @@ +// SPDX-License-Identifier: LGPL-2.1+ +// Copyright (C) 2022, Linaro Ltd - Daniel Lezcano <daniel.lezcano@linaro.org> +#include <stdarg.h> +#include <stdio.h> +#include <string.h> +#include <syslog.h> +#include "log.h" + +static const char *__ident = "unknown"; +static int __options; + +static const char * const loglvl[] = { + [LOG_DEBUG] = "DEBUG", + [LOG_INFO] = "INFO", + [LOG_NOTICE] = "NOTICE", + [LOG_WARNING] = "WARN", + [LOG_ERR] = "ERROR", + [LOG_CRIT] = "CRITICAL", + [LOG_ALERT] = "ALERT", + [LOG_EMERG] = "EMERG", +}; + +int log_str2level(const char *lvl) +{ + int i; + + for (i = 0; i < sizeof(loglvl) / sizeof(loglvl[LOG_DEBUG]); i++) + if (!strcmp(lvl, loglvl[i])) + return i; + + return LOG_DEBUG; +} + +extern void logit(int level, const char *format, ...) +{ + va_list args; + + va_start(args, format); + + if (__options & TO_SYSLOG) + vsyslog(level, format, args); + + if (__options & TO_STDERR) + vfprintf(stderr, format, args); + + if (__options & TO_STDOUT) + vfprintf(stdout, format, args); + + va_end(args); +} + +int log_init(int level, const char *ident, int options) +{ + if (!options) + return -1; + + if (level > LOG_DEBUG) + return -1; + + if (!ident) + return -1; + + __ident = ident; + __options = options; + + if (options & TO_SYSLOG) { + openlog(__ident, options | LOG_NDELAY, LOG_USER); + setlogmask(LOG_UPTO(level)); + } + + return 0; +} + +void log_exit(void) +{ + closelog(); +} diff --git a/tools/thermal/lib/log.h b/tools/thermal/lib/log.h new file mode 100644 index 000000000000..be8ab5144938 --- /dev/null +++ b/tools/thermal/lib/log.h @@ -0,0 +1,31 @@ +/* SPDX-License-Identifier: LGPL-2.1+ */ +/* Copyright (C) 2022, Linaro Ltd - Daniel Lezcano <daniel.lezcano@linaro.org> */ +#ifndef __THERMAL_TOOLS_LOG_H +#define __THERMAL_TOOLS_LOG_H + +#include <syslog.h> + +#ifndef __maybe_unused +#define __maybe_unused __attribute__((__unused__)) +#endif + +#define TO_SYSLOG 0x1 +#define TO_STDOUT 0x2 +#define TO_STDERR 0x4 + +extern void logit(int level, const char *format, ...); + +#define DEBUG(fmt, ...) logit(LOG_DEBUG, "%s:%d: " fmt, __func__, __LINE__, ##__VA_ARGS__) +#define INFO(fmt, ...) logit(LOG_INFO, fmt, ##__VA_ARGS__) +#define NOTICE(fmt, ...) logit(LOG_NOTICE, fmt, ##__VA_ARGS__) +#define WARN(fmt, ...) logit(LOG_WARNING, fmt, ##__VA_ARGS__) +#define ERROR(fmt, ...) logit(LOG_ERR, fmt, ##__VA_ARGS__) +#define CRITICAL(fmt, ...) logit(LOG_CRIT, fmt, ##__VA_ARGS__) +#define ALERT(fmt, ...) logit(LOG_ALERT, fmt, ##__VA_ARGS__) +#define EMERG(fmt, ...) logit(LOG_EMERG, fmt, ##__VA_ARGS__) + +int log_init(int level, const char *ident, int options); +int log_str2level(const char *lvl); +void log_exit(void); + +#endif diff --git a/tools/thermal/lib/mainloop.c b/tools/thermal/lib/mainloop.c new file mode 100644 index 000000000000..94cbbcbd1c14 --- /dev/null +++ b/tools/thermal/lib/mainloop.c @@ -0,0 +1,120 @@ +// SPDX-License-Identifier: LGPL-2.1+ +// Copyright (C) 2022, Linaro Ltd - Daniel Lezcano <daniel.lezcano@linaro.org> +#include <stdlib.h> +#include <errno.h> +#include <unistd.h> +#include <signal.h> +#include <sys/epoll.h> +#include "mainloop.h" +#include "log.h" + +static int epfd = -1; +static unsigned short nrhandler; +static sig_atomic_t exit_mainloop; + +struct mainloop_data { + mainloop_callback_t cb; + void *data; + int fd; +}; + +static struct mainloop_data **mds; + +#define MAX_EVENTS 10 + +int mainloop(unsigned int timeout) +{ + int i, nfds; + struct epoll_event events[MAX_EVENTS]; + struct mainloop_data *md; + + if (epfd < 0) + return -1; + + for (;;) { + + nfds = epoll_wait(epfd, events, MAX_EVENTS, timeout); + + if (exit_mainloop || !nfds) + return 0; + + if (nfds < 0) { + if (errno == EINTR) + continue; + return -1; + } + + for (i = 0; i < nfds; i++) { + md = events[i].data.ptr; + + if (md->cb(md->fd, md->data) > 0) + return 0; + } + } +} + +int mainloop_add(int fd, mainloop_callback_t cb, void *data) +{ + struct epoll_event ev = { + .events = EPOLLIN, + }; + + struct mainloop_data *md; + + if (fd >= nrhandler) { + mds = realloc(mds, sizeof(*mds) * (fd + 1)); + if (!mds) + return -1; + nrhandler = fd + 1; + } + + md = malloc(sizeof(*md)); + if (!md) + return -1; + + md->data = data; + md->cb = cb; + md->fd = fd; + + mds[fd] = md; + ev.data.ptr = md; + + if (epoll_ctl(epfd, EPOLL_CTL_ADD, fd, &ev) < 0) { + free(md); + return -1; + } + + return 0; +} + +int mainloop_del(int fd) +{ + if (fd >= nrhandler) + return -1; + + if (epoll_ctl(epfd, EPOLL_CTL_DEL, fd, NULL) < 0) + return -1; + + free(mds[fd]); + + return 0; +} + +int mainloop_init(void) +{ + epfd = epoll_create(2); + if (epfd < 0) + return -1; + + return 0; +} + +void mainloop_exit(void) +{ + exit_mainloop = 1; +} + +void mainloop_fini(void) +{ + close(epfd); +} diff --git a/tools/thermal/lib/mainloop.h b/tools/thermal/lib/mainloop.h new file mode 100644 index 000000000000..89b61e89d905 --- /dev/null +++ b/tools/thermal/lib/mainloop.h @@ -0,0 +1,15 @@ +/* SPDX-License-Identifier: LGPL-2.1+ */ +/* Copyright (C) 2022, Linaro Ltd - Daniel Lezcano <daniel.lezcano@linaro.org> */ +#ifndef __THERMAL_TOOLS_MAINLOOP_H +#define __THERMAL_TOOLS_MAINLOOP_H + +typedef int (*mainloop_callback_t)(int fd, void *data); + +extern int mainloop(unsigned int timeout); +extern int mainloop_add(int fd, mainloop_callback_t cb, void *data); +extern int mainloop_del(int fd); +extern void mainloop_exit(void); +extern int mainloop_init(void); +extern void mainloop_fini(void); + +#endif diff --git a/tools/thermal/lib/thermal-tools.h b/tools/thermal/lib/thermal-tools.h new file mode 100644 index 000000000000..f43939a468a3 --- /dev/null +++ b/tools/thermal/lib/thermal-tools.h @@ -0,0 +1,10 @@ +/* SPDX-License-Identifier: LGPL-2.1+ */ +/* Copyright (C) 2022, Linaro Ltd - Daniel Lezcano <daniel.lezcano@linaro.org> */ +#ifndef __THERMAL_TOOLS +#define __THERMAL_TOOLS + +#include "log.h" +#include "mainloop.h" +#include "uptimeofday.h" + +#endif diff --git a/tools/thermal/lib/uptimeofday.c b/tools/thermal/lib/uptimeofday.c new file mode 100644 index 000000000000..dacb02956a68 --- /dev/null +++ b/tools/thermal/lib/uptimeofday.c @@ -0,0 +1,40 @@ +// SPDX-License-Identifier: LGPL-2.1+ +// Copyright (C) 2022, Linaro Ltd - Daniel Lezcano <daniel.lezcano@linaro.org> +#include <stdio.h> +#include <sys/time.h> +#include <linux/sysinfo.h> +#include "thermal-tools.h" + +static unsigned long __offset; +static struct timeval __tv; + +int uptimeofday_init(void) +{ + struct sysinfo info; + + if (sysinfo(&info)) + return -1; + + gettimeofday(&__tv, NULL); + + __offset = __tv.tv_sec - info.uptime; + + return 0; +} + +unsigned long getuptimeofday_ms(void) +{ + gettimeofday(&__tv, NULL); + + return ((__tv.tv_sec - __offset) * 1000) + (__tv.tv_usec / 1000); +} + +struct timespec msec_to_timespec(int msec) +{ + struct timespec tv = { + .tv_sec = (msec / 1000), + .tv_nsec = (msec % 1000) * 1000000, + }; + + return tv; +} diff --git a/tools/thermal/lib/uptimeofday.h b/tools/thermal/lib/uptimeofday.h new file mode 100644 index 000000000000..c0da5de41325 --- /dev/null +++ b/tools/thermal/lib/uptimeofday.h @@ -0,0 +1,12 @@ +/* SPDX-License-Identifier: LGPL-2.1+ */ +/* Copyright (C) 2022, Linaro Ltd - Daniel Lezcano <daniel.lezcano@linaro.org> */ +#ifndef __THERMAL_TOOLS_UPTIMEOFDAY_H +#define __THERMAL_TOOLS_UPTIMEOFDAY_H +#include <sys/sysinfo.h> +#include <sys/time.h> + +int uptimeofday_init(void); +unsigned long getuptimeofday_ms(void); +struct timespec msec_to_timespec(int msec); + +#endif diff --git a/tools/thermal/thermal-engine/Build b/tools/thermal/thermal-engine/Build new file mode 100644 index 000000000000..20c3c478b88d --- /dev/null +++ b/tools/thermal/thermal-engine/Build @@ -0,0 +1 @@ +thermal-engine-y += thermal-engine.o diff --git a/tools/thermal/thermal-engine/Makefile b/tools/thermal/thermal-engine/Makefile new file mode 100644 index 000000000000..6bd05ff89485 --- /dev/null +++ b/tools/thermal/thermal-engine/Makefile @@ -0,0 +1,28 @@ +# SPDX-License-Identifier: GPL-2.0 +# Makefile for thermal tools + +ifeq ($(srctree),) +srctree := $(patsubst %/,%,$(dir $(CURDIR))) +srctree := $(patsubst %/,%,$(dir $(srctree))) +srctree := $(patsubst %/,%,$(dir $(srctree))) +# $(info Determined 'srctree' to be $(srctree)) +endif + +CFLAGS = -Wall -Wextra +CFLAGS += -I$(srctree)/tools/thermal/lib +CFLAGS += -I$(srctree)/tools/lib/thermal/include + +LDFLAGS = -L$(srctree)/tools/thermal/lib +LDFLAGS += -L$(srctree)/tools/lib/thermal +LDFLAGS += -lthermal_tools +LDFLAGS += -lthermal +LDFLAGS += -lconfig +LDFLAGS += -lnl-genl-3 -lnl-3 + +VERSION = 0.0.1 + +all: thermal-engine +%: %.c + $(CC) $(CFLAGS) -D VERSION=\"$(VERSION)\" -o $@ $^ $(LDFLAGS) +clean: + $(RM) thermal-engine diff --git a/tools/thermal/thermal-engine/thermal-engine.c b/tools/thermal/thermal-engine/thermal-engine.c new file mode 100644 index 000000000000..9b1476a2680f --- /dev/null +++ b/tools/thermal/thermal-engine/thermal-engine.c @@ -0,0 +1,341 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * Thermal monitoring tool based on the thermal netlink events. + * + * Copyright (C) 2022 Linaro Ltd. + * + * Author: Daniel Lezcano <daniel.lezcano@kernel.org> + */ +#include <errno.h> +#include <fcntl.h> +#include <getopt.h> +#include <libgen.h> +#include <limits.h> +#include <stdio.h> +#include <stdlib.h> +#include <string.h> +#include <signal.h> +#include <unistd.h> + +#include <syslog.h> + +#include <sys/epoll.h> +#include <sys/stat.h> +#include <sys/types.h> + +#include <thermal.h> +#include "thermal-tools.h" + +struct options { + int loglevel; + int logopt; + int interactive; + int daemonize; +}; + +struct thermal_data { + struct thermal_zone *tz; + struct thermal_handler *th; +}; + +static int show_trip(struct thermal_trip *tt, __maybe_unused void *arg) +{ + INFO("trip id=%d, type=%d, temp=%d, hyst=%d\n", + tt->id, tt->type, tt->temp, tt->hyst); + + return 0; +} + +static int show_temp(struct thermal_zone *tz, __maybe_unused void *arg) +{ + thermal_cmd_get_temp(arg, tz); + + INFO("temperature: %d\n", tz->temp); + + return 0; +} + +static int show_governor(struct thermal_zone *tz, __maybe_unused void *arg) +{ + thermal_cmd_get_governor(arg, tz); + + INFO("governor: '%s'\n", tz->governor); + + return 0; +} + +static int show_tz(struct thermal_zone *tz, __maybe_unused void *arg) +{ + INFO("thermal zone '%s', id=%d\n", tz->name, tz->id); + + for_each_thermal_trip(tz->trip, show_trip, NULL); + + show_temp(tz, arg); + + show_governor(tz, arg); + + return 0; +} + +static int tz_create(const char *name, int tz_id, __maybe_unused void *arg) +{ + INFO("Thermal zone '%s'/%d created\n", name, tz_id); + + return 0; +} + +static int tz_delete(int tz_id, __maybe_unused void *arg) +{ + INFO("Thermal zone %d deleted\n", tz_id); + + return 0; +} + +static int tz_disable(int tz_id, void *arg) +{ + struct thermal_data *td = arg; + struct thermal_zone *tz = thermal_zone_find_by_id(td->tz, tz_id); + + INFO("Thermal zone %d ('%s') disabled\n", tz_id, tz->name); + + return 0; +} + +static int tz_enable(int tz_id, void *arg) +{ + struct thermal_data *td = arg; + struct thermal_zone *tz = thermal_zone_find_by_id(td->tz, tz_id); + + INFO("Thermal zone %d ('%s') enabled\n", tz_id, tz->name); + + return 0; +} + +static int trip_high(int tz_id, int trip_id, int temp, void *arg) +{ + struct thermal_data *td = arg; + struct thermal_zone *tz = thermal_zone_find_by_id(td->tz, tz_id); + + INFO("Thermal zone %d ('%s'): trip point %d crossed way up with %d °C\n", + tz_id, tz->name, trip_id, temp); + + return 0; +} + +static int trip_low(int tz_id, int trip_id, int temp, void *arg) +{ + struct thermal_data *td = arg; + struct thermal_zone *tz = thermal_zone_find_by_id(td->tz, tz_id); + + INFO("Thermal zone %d ('%s'): trip point %d crossed way down with %d °C\n", + tz_id, tz->name, trip_id, temp); + + return 0; +} + +static int trip_add(int tz_id, int trip_id, int type, int temp, int hyst, __maybe_unused void *arg) +{ + INFO("Trip point added %d: id=%d, type=%d, temp=%d, hyst=%d\n", + tz_id, trip_id, type, temp, hyst); + + return 0; +} + +static int trip_delete(int tz_id, int trip_id, __maybe_unused void *arg) +{ + INFO("Trip point deleted %d: id=%d\n", tz_id, trip_id); + + return 0; +} + +static int trip_change(int tz_id, int trip_id, int type, int temp, + int hyst, __maybe_unused void *arg) +{ + struct thermal_data *td = arg; + struct thermal_zone *tz = thermal_zone_find_by_id(td->tz, tz_id); + + INFO("Trip point changed %d: id=%d, type=%d, temp=%d, hyst=%d\n", + tz_id, trip_id, type, temp, hyst); + + tz->trip[trip_id].type = type; + tz->trip[trip_id].temp = temp; + tz->trip[trip_id].hyst = hyst; + + return 0; +} + +static int cdev_add(const char *name, int cdev_id, int max_state, __maybe_unused void *arg) +{ + INFO("Cooling device '%s'/%d (max state=%d) added\n", name, cdev_id, max_state); + + return 0; +} + +static int cdev_delete(int cdev_id, __maybe_unused void *arg) +{ + INFO("Cooling device %d deleted", cdev_id); + + return 0; +} + +static int cdev_update(int cdev_id, int cur_state, __maybe_unused void *arg) +{ + INFO("cdev:%d state:%d\n", cdev_id, cur_state); + + return 0; +} + +static int gov_change(int tz_id, const char *name, __maybe_unused void *arg) +{ + struct thermal_data *td = arg; + struct thermal_zone *tz = thermal_zone_find_by_id(td->tz, tz_id); + + INFO("%s: governor changed %s -> %s\n", tz->name, tz->governor, name); + + strcpy(tz->governor, name); + + return 0; +} + +static struct thermal_ops ops = { + .events.tz_create = tz_create, + .events.tz_delete = tz_delete, + .events.tz_disable = tz_disable, + .events.tz_enable = tz_enable, + .events.trip_high = trip_high, + .events.trip_low = trip_low, + .events.trip_add = trip_add, + .events.trip_delete = trip_delete, + .events.trip_change = trip_change, + .events.cdev_add = cdev_add, + .events.cdev_delete = cdev_delete, + .events.cdev_update = cdev_update, + .events.gov_change = gov_change +}; + +static int thermal_event(__maybe_unused int fd, __maybe_unused void *arg) +{ + struct thermal_data *td = arg; + + return thermal_events_handle(td->th, td); +} + +static void usage(const char *cmd) +{ + printf("%s : A thermal monitoring engine based on notifications\n", cmd); + printf("Usage: %s [options]\n", cmd); + printf("\t-h, --help\t\tthis help\n"); + printf("\t-d, --daemonize\n"); + printf("\t-l <level>, --loglevel <level>\tlog level: "); + printf("DEBUG, INFO, NOTICE, WARN, ERROR\n"); + printf("\t-s, --syslog\t\toutput to syslog\n"); + printf("\n"); + exit(0); +} + +static int options_init(int argc, char *argv[], struct options *options) +{ + int opt; + + struct option long_options[] = { + { "help", no_argument, NULL, 'h' }, + { "daemonize", no_argument, NULL, 'd' }, + { "syslog", no_argument, NULL, 's' }, + { "loglevel", required_argument, NULL, 'l' }, + { 0, 0, 0, 0 } + }; + + while (1) { + + int optindex = 0; + + opt = getopt_long(argc, argv, "l:dhs", long_options, &optindex); + if (opt == -1) + break; + + switch (opt) { + case 'l': + options->loglevel = log_str2level(optarg); + break; + case 'd': + options->daemonize = 1; + break; + case 's': + options->logopt = TO_SYSLOG; + break; + case 'h': + usage(basename(argv[0])); + break; + default: /* '?' */ + return -1; + } + } + + return 0; +} + +enum { + THERMAL_ENGINE_SUCCESS = 0, + THERMAL_ENGINE_OPTION_ERROR, + THERMAL_ENGINE_DAEMON_ERROR, + THERMAL_ENGINE_LOG_ERROR, + THERMAL_ENGINE_THERMAL_ERROR, + THERMAL_ENGINE_MAINLOOP_ERROR, +}; + +int main(int argc, char *argv[]) +{ + struct thermal_data td; + struct options options = { + .loglevel = LOG_INFO, + .logopt = TO_STDOUT, + }; + + if (options_init(argc, argv, &options)) { + ERROR("Usage: %s --help\n", argv[0]); + return THERMAL_ENGINE_OPTION_ERROR; + } + + if (options.daemonize && daemon(0, 0)) { + ERROR("Failed to daemonize: %p\n"); + return THERMAL_ENGINE_DAEMON_ERROR; + } + + if (log_init(options.loglevel, basename(argv[0]), options.logopt)) { + ERROR("Failed to initialize logging facility\n"); + return THERMAL_ENGINE_LOG_ERROR; + } + + td.th = thermal_init(&ops); + if (!td.th) { + ERROR("Failed to initialize the thermal library\n"); + return THERMAL_ENGINE_THERMAL_ERROR; + } + + td.tz = thermal_zone_discover(td.th); + if (!td.tz) { + ERROR("No thermal zone available\n"); + return THERMAL_ENGINE_THERMAL_ERROR; + } + + for_each_thermal_zone(td.tz, show_tz, td.th); + + if (mainloop_init()) { + ERROR("Failed to initialize the mainloop\n"); + return THERMAL_ENGINE_MAINLOOP_ERROR; + } + + if (mainloop_add(thermal_events_fd(td.th), thermal_event, &td)) { + ERROR("Failed to setup the mainloop\n"); + return THERMAL_ENGINE_MAINLOOP_ERROR; + } + + INFO("Waiting for thermal events ...\n"); + + if (mainloop(-1)) { + ERROR("Mainloop failed\n"); + return THERMAL_ENGINE_MAINLOOP_ERROR; + } + + return THERMAL_ENGINE_SUCCESS; +} diff --git a/tools/thermal/thermometer/Build b/tools/thermal/thermometer/Build new file mode 100644 index 000000000000..1b96c159c3c8 --- /dev/null +++ b/tools/thermal/thermometer/Build @@ -0,0 +1 @@ +thermometer-y += thermometer.o diff --git a/tools/thermal/thermometer/Makefile b/tools/thermal/thermometer/Makefile new file mode 100644 index 000000000000..d8f8bc82fe3b --- /dev/null +++ b/tools/thermal/thermometer/Makefile @@ -0,0 +1,26 @@ +# SPDX-License-Identifier: GPL-2.0 +# Makefile for cgroup tools + +ifeq ($(srctree),) +srctree := $(patsubst %/,%,$(dir $(CURDIR))) +srctree := $(patsubst %/,%,$(dir $(srctree))) +srctree := $(patsubst %/,%,$(dir $(srctree))) +# $(info Determined 'srctree' to be $(srctree)) +endif + +CFLAGS = -Wall -Wextra +CFLAGS += -I$(srctree)/tools/thermal/lib + +LDFLAGS = -L$(srctree)/tools/thermal/lib +LDFLAGS += -lthermal_tools +LDFLAGS += -lconfig + +VERSION = 0.0.1 +TARGET=thermometer + +all: $(TARGET) +%: %.c + $(CC) $(CFLAGS) -D VERSION=\"$(VERSION)\" -o $@ $^ $(LDFLAGS) + +clean: + $(RM) $(TARGET) diff --git a/tools/thermal/thermometer/thermometer.8 b/tools/thermal/thermometer/thermometer.8 new file mode 100644 index 000000000000..d090fbca4cba --- /dev/null +++ b/tools/thermal/thermometer/thermometer.8 @@ -0,0 +1,92 @@ +.TH THERMOMETER 8 +# SPDX-License-Identifier: GPL-2.0 +.SH NAME +\fBthermometer\fP - A thermal profiling tool + +.SH SYNOPSIS +.ft B +.B thermometer +.RB [ options ] +.RB [ command ] +.br +.SH DESCRIPTION +\fBthermometer \fP captures the thermal zones temperature at a +specified sampling period. It is optimized to reduce as much as +possible the overhead while doing the temperature acquisition in order +to prevent disrupting the running application we may want to profile. + +This low overhead also allows a high rate sampling for the temperature +which could be necessary to spot overshots and undershots. + +If no configuration file is specified, then all the thermal zones will +be monitored at 4Hz, so every 250ms. A configuration file specifies +the thermal zone names and the desired sampling period. A thermal zone +name can be a regular expression to specify a group of thermal zone. + +The sampling of the different thermal zones will be written into +separate files with the thermal zone name. It is possible to specify a +postfix to identify them for example for a specific scenario. The +output directory can be specified in addition. + +Without any parameters, \fBthermometer \fP captures all the thermal +zone temperatures every 250ms and write to the current directory the +captured files postfixed with the current date. + +If a running \fBduration\fP is specified or a \fBcommand\fP, the +capture ends at the end of the duration if the command did not +finished before. The \fBduration\fP can be specified alone as well as +the \fBcommand\fP. If none is specified, the capture will continue +indefinitively until interrupted by \fBSIGINT\fP or \fBSIGQUIT\fP. +.PP + +.SS Options +.PP +The \fB-h, --help\fP option shows a short usage help +.PP +The \fB-o <dir>, --output <dir>\fP option defines the output directory to put the +sampling files +.PP +The \fB-c <config>, --config <config>\fP option specifies the configuration file to use +.PP +The \fB-d <seconds>, --duration <seconds>\fP option specifies the duration of the capture +.PP +The \fB-l <loglevel>, --loglevel <loglevel>\fP option sets the loglevel [DEBUG,INFO,NOTICE,WARN,ERROR] +.PP +The \fB-p <string>, --postfix <string>\fP option appends \fBstring\fP at the end of the capture filenames +.PP +The \fB-s, --syslog\fP option sets the output to syslog, default is \fBstdout\fP +.PP +The \fB-w, --overwrite\fP overwrites the output files if they exist +.PP + +.PP + +.SS "Exit status:" +.TP +0 +if OK, +.TP +1 +Error with the options specified as parameters +.TP +2 +Error when configuring the logging facility +.TP +3 +Error when configuring the time +.TP +4 +Error in the initialization routine +.TP +5 +Error during the runtime + +.SH Capture file format + +Every file contains two columns. The first one is the uptime timestamp +in order to find a point in time since the system started up if there +is any thermal event. The second one is the temperature in milli +degree. The first line contains the label of each column. + +.SH AUTHOR +Daniel Lezcano <daniel.lezcano@kernel.org> diff --git a/tools/thermal/thermometer/thermometer.c b/tools/thermal/thermometer/thermometer.c new file mode 100644 index 000000000000..1a87a0a77f9f --- /dev/null +++ b/tools/thermal/thermometer/thermometer.c @@ -0,0 +1,572 @@ +// SPDX-License-Identifier: GPL-2.0-only +// Copyright (C) 2022, Linaro Ltd - Daniel Lezcano <daniel.lezcano@linaro.org> +#define _GNU_SOURCE +#include <dirent.h> +#include <fcntl.h> +#include <getopt.h> +#include <regex.h> +#include <signal.h> +#include <stdio.h> +#include <stdlib.h> +#include <string.h> +#include <sys/stat.h> +#include <sys/signalfd.h> +#include <sys/timerfd.h> +#include <sys/types.h> +#include <sys/wait.h> +#include <time.h> +#include <unistd.h> +#include <linux/thermal.h> + +#include <libconfig.h> +#include "thermal-tools.h" + +#define CLASS_THERMAL "/sys/class/thermal" + +enum { + THERMOMETER_SUCCESS = 0, + THERMOMETER_OPTION_ERROR, + THERMOMETER_LOG_ERROR, + THERMOMETER_CONFIG_ERROR, + THERMOMETER_TIME_ERROR, + THERMOMETER_INIT_ERROR, + THERMOMETER_RUNTIME_ERROR +}; + +struct options { + int loglvl; + int logopt; + int overwrite; + int duration; + const char *config; + char postfix[PATH_MAX]; + char output[PATH_MAX]; +}; + +struct tz_regex { + regex_t regex; + int polling; +}; + +struct configuration { + struct tz_regex *tz_regex; + int nr_tz_regex; + +}; + +struct tz { + FILE *file_out; + int fd_temp; + int fd_timer; + int polling; + const char *name; +}; + +struct thermometer { + struct tz *tz; + int nr_tz; +}; + +static struct tz_regex *configuration_tz_match(const char *expr, + struct configuration *config) +{ + int i; + + for (i = 0; i < config->nr_tz_regex; i++) { + + if (!regexec(&config->tz_regex[i].regex, expr, 0, NULL, 0)) + return &config->tz_regex[i]; + } + + return NULL; +} + +static int configuration_default_init(struct configuration *config) +{ + config->tz_regex = realloc(config->tz_regex, sizeof(*config->tz_regex) * + (config->nr_tz_regex + 1)); + + if (regcomp(&config->tz_regex[config->nr_tz_regex].regex, ".*", + REG_NOSUB | REG_EXTENDED)) { + ERROR("Invalid regular expression\n"); + return -1; + } + + config->tz_regex[config->nr_tz_regex].polling = 250; + config->nr_tz_regex = 1; + + return 0; +} + +static int configuration_init(const char *path, struct configuration *config) +{ + config_t cfg; + + config_setting_t *tz; + int i, length; + + if (path && access(path, F_OK)) { + ERROR("'%s' is not accessible\n", path); + return -1; + } + + if (!path && !config->nr_tz_regex) { + INFO("No thermal zones configured, using wildcard for all of them\n"); + return configuration_default_init(config); + } + + config_init(&cfg); + + if (!config_read_file(&cfg, path)) { + ERROR("Failed to parse %s:%d - %s\n", config_error_file(&cfg), + config_error_line(&cfg), config_error_text(&cfg)); + + return -1; + } + + tz = config_lookup(&cfg, "thermal-zones"); + if (!tz) { + ERROR("No thermal zone configured to be monitored\n"); + return -1; + } + + length = config_setting_length(tz); + + INFO("Found %d thermal zone(s) regular expression\n", length); + + for (i = 0; i < length; i++) { + + config_setting_t *node; + const char *name; + int polling; + + node = config_setting_get_elem(tz, i); + if (!node) { + ERROR("Missing node name '%d'\n", i); + return -1; + } + + if (!config_setting_lookup_string(node, "name", &name)) { + ERROR("Thermal zone name not found\n"); + return -1; + } + + if (!config_setting_lookup_int(node, "polling", &polling)) { + ERROR("Polling value not found"); + return -1; + } + + config->tz_regex = realloc(config->tz_regex, sizeof(*config->tz_regex) * + (config->nr_tz_regex + 1)); + + if (regcomp(&config->tz_regex[config->nr_tz_regex].regex, name, + REG_NOSUB | REG_EXTENDED)) { + ERROR("Invalid regular expression '%s'\n", name); + continue; + } + + config->tz_regex[config->nr_tz_regex].polling = polling; + config->nr_tz_regex++; + + INFO("Thermal zone regular expression '%s' with polling %d\n", + name, polling); + } + + return 0; +} + +static void usage(const char *cmd) +{ + printf("%s Version: %s\n", cmd, VERSION); + printf("Usage: %s [options]\n", cmd); + printf("\t-h, --help\t\tthis help\n"); + printf("\t-o, --output <dir>\toutput directory for temperature capture\n"); + printf("\t-c, --config <file>\tconfiguration file\n"); + printf("\t-d, --duration <seconds>\tcapture duration\n"); + printf("\t-l, --loglevel <level>\tlog level: "); + printf("DEBUG, INFO, NOTICE, WARN, ERROR\n"); + printf("\t-p, --postfix <string>\tpostfix to be happened at the end of the files\n"); + printf("\t-s, --syslog\t\toutput to syslog\n"); + printf("\t-w, --overwrite\t\toverwrite the temperature capture files if they exist\n"); + printf("\n"); + exit(0); +} + +static int options_init(int argc, char *argv[], struct options *options) +{ + int opt; + time_t now = time(NULL); + + struct option long_options[] = { + { "help", no_argument, NULL, 'h' }, + { "config", required_argument, NULL, 'c' }, + { "duration", required_argument, NULL, 'd' }, + { "loglevel", required_argument, NULL, 'l' }, + { "postfix", required_argument, NULL, 'p' }, + { "output", required_argument, NULL, 'o' }, + { "syslog", required_argument, NULL, 's' }, + { "overwrite", no_argument, NULL, 'w' }, + { 0, 0, 0, 0 } + }; + + strftime(options->postfix, sizeof(options->postfix), + "-%Y-%m-%d_%H:%M:%S", gmtime(&now)); + + while (1) { + + int optindex = 0; + + opt = getopt_long(argc, argv, "ho:c:d:l:p:sw", long_options, &optindex); + if (opt == -1) + break; + + switch (opt) { + case 'c': + options->config = optarg; + break; + case 'd': + options->duration = atoi(optarg) * 1000; + break; + case 'l': + options->loglvl = log_str2level(optarg); + break; + case 'h': + usage(basename(argv[0])); + break; + case 'p': + strcpy(options->postfix, optarg); + break; + case 'o': + strcpy(options->output, optarg); + break; + case 's': + options->logopt = TO_SYSLOG; + break; + case 'w': + options->overwrite = 1; + break; + default: /* '?' */ + ERROR("Usage: %s --help\n", argv[0]); + return -1; + } + } + + return 0; +} + +static int thermometer_add_tz(const char *path, const char *name, int polling, + struct thermometer *thermometer) +{ + int fd; + char tz_path[PATH_MAX]; + + sprintf(tz_path, CLASS_THERMAL"/%s/temp", path); + + fd = open(tz_path, O_RDONLY); + if (fd < 0) { + ERROR("Failed to open '%s': %m\n", tz_path); + return -1; + } + + thermometer->tz = realloc(thermometer->tz, + sizeof(*thermometer->tz) * (thermometer->nr_tz + 1)); + if (!thermometer->tz) { + ERROR("Failed to allocate thermometer->tz\n"); + return -1; + } + + thermometer->tz[thermometer->nr_tz].fd_temp = fd; + thermometer->tz[thermometer->nr_tz].name = strdup(name); + thermometer->tz[thermometer->nr_tz].polling = polling; + thermometer->nr_tz++; + + INFO("Added thermal zone '%s->%s (polling:%d)'\n", path, name, polling); + + return 0; +} + +static int thermometer_init(struct configuration *config, + struct thermometer *thermometer) +{ + DIR *dir; + struct dirent *dirent; + struct tz_regex *tz_regex; + const char *tz_dirname = "thermal_zone"; + + if (mainloop_init()) { + ERROR("Failed to start mainloop\n"); + return -1; + } + + dir = opendir(CLASS_THERMAL); + if (!dir) { + ERROR("failed to open '%s'\n", CLASS_THERMAL); + return -1; + } + + while ((dirent = readdir(dir))) { + char tz_type[THERMAL_NAME_LENGTH]; + char tz_path[PATH_MAX]; + FILE *tz_file; + + if (strncmp(dirent->d_name, tz_dirname, strlen(tz_dirname))) + continue; + + sprintf(tz_path, CLASS_THERMAL"/%s/type", dirent->d_name); + + tz_file = fopen(tz_path, "r"); + if (!tz_file) { + ERROR("Failed to open '%s': %m", tz_path); + continue; + } + + fscanf(tz_file, "%s", tz_type); + + fclose(tz_file); + + tz_regex = configuration_tz_match(tz_type, config); + if (!tz_regex) + continue; + + if (thermometer_add_tz(dirent->d_name, tz_type, + tz_regex->polling, thermometer)) + continue; + } + + closedir(dir); + + return 0; +} + +static int timer_temperature_callback(int fd, void *arg) +{ + struct tz *tz = arg; + char buf[16] = { 0 }; + + pread(tz->fd_temp, buf, sizeof(buf), 0); + + fprintf(tz->file_out, "%ld %s", getuptimeofday_ms(), buf); + + read(fd, buf, sizeof(buf)); + + return 0; +} + +static int thermometer_start(struct thermometer *thermometer, + struct options *options) +{ + struct itimerspec timer_it = { 0 }; + char *path; + FILE *f; + int i; + + INFO("Capturing %d thermal zone(s) temperature...\n", thermometer->nr_tz); + + if (access(options->output, F_OK) && mkdir(options->output, 0700)) { + ERROR("Failed to create directory '%s'\n", options->output); + return -1; + } + + for (i = 0; i < thermometer->nr_tz; i++) { + + asprintf(&path, "%s/%s%s", options->output, + thermometer->tz[i].name, options->postfix); + + if (!options->overwrite && !access(path, F_OK)) { + ERROR("'%s' already exists\n", path); + return -1; + } + + f = fopen(path, "w"); + if (!f) { + ERROR("Failed to create '%s':%m\n", path); + return -1; + } + + fprintf(f, "timestamp(ms) %s(°mC)\n", thermometer->tz[i].name); + + thermometer->tz[i].file_out = f; + + DEBUG("Created '%s' file for thermal zone '%s'\n", path, thermometer->tz[i].name); + + /* + * Create polling timer + */ + thermometer->tz[i].fd_timer = timerfd_create(CLOCK_MONOTONIC, 0); + if (thermometer->tz[i].fd_timer < 0) { + ERROR("Failed to create timer for '%s': %m\n", + thermometer->tz[i].name); + return -1; + } + + DEBUG("Watching '%s' every %d ms\n", + thermometer->tz[i].name, thermometer->tz[i].polling); + + timer_it.it_interval = timer_it.it_value = + msec_to_timespec(thermometer->tz[i].polling); + + if (timerfd_settime(thermometer->tz[i].fd_timer, 0, + &timer_it, NULL) < 0) + return -1; + + if (mainloop_add(thermometer->tz[i].fd_timer, + timer_temperature_callback, + &thermometer->tz[i])) + return -1; + } + + return 0; +} + +static int thermometer_execute(int argc, char *argv[], char *const envp[], pid_t *pid) +{ + if (!argc) + return 0; + + *pid = fork(); + if (*pid < 0) { + ERROR("Failed to fork process: %m"); + return -1; + } + + if (!(*pid)) { + execvpe(argv[0], argv, envp); + exit(1); + } + + return 0; +} + +static int kill_process(__maybe_unused int fd, void *arg) +{ + pid_t pid = *(pid_t *)arg; + + if (kill(pid, SIGTERM)) + ERROR("Failed to send SIGTERM signal to '%d': %p\n", pid); + else if (waitpid(pid, NULL, 0)) + ERROR("Failed to wait pid '%d': %p\n", pid); + + mainloop_exit(); + + return 0; +} + +static int exit_mainloop(__maybe_unused int fd, __maybe_unused void *arg) +{ + mainloop_exit(); + return 0; +} + +static int thermometer_wait(struct options *options, pid_t pid) +{ + int fd; + sigset_t mask; + + /* + * If there is a duration specified, we will exit the mainloop + * and gracefully close all the files which will flush the + * file system cache + */ + if (options->duration) { + struct itimerspec timer_it = { 0 }; + + timer_it.it_value = msec_to_timespec(options->duration); + + fd = timerfd_create(CLOCK_MONOTONIC, 0); + if (fd < 0) { + ERROR("Failed to create duration timer: %m\n"); + return -1; + } + + if (timerfd_settime(fd, 0, &timer_it, NULL)) { + ERROR("Failed to set timer time: %m\n"); + return -1; + } + + if (mainloop_add(fd, pid < 0 ? exit_mainloop : kill_process, &pid)) { + ERROR("Failed to set timer exit mainloop callback\n"); + return -1; + } + } + + /* + * We want to catch any keyboard interrupt, as well as child + * signals if any in order to exit properly + */ + sigemptyset(&mask); + sigaddset(&mask, SIGINT); + sigaddset(&mask, SIGQUIT); + sigaddset(&mask, SIGCHLD); + + if (sigprocmask(SIG_BLOCK, &mask, NULL)) { + ERROR("Failed to set sigprocmask: %m\n"); + return -1; + } + + fd = signalfd(-1, &mask, 0); + if (fd < 0) { + ERROR("Failed to set the signalfd: %m\n"); + return -1; + } + + if (mainloop_add(fd, exit_mainloop, NULL)) { + ERROR("Failed to set timer exit mainloop callback\n"); + return -1; + } + + return mainloop(-1); +} + +static int thermometer_stop(struct thermometer *thermometer) +{ + int i; + + INFO("Closing/flushing output files\n"); + + for (i = 0; i < thermometer->nr_tz; i++) + fclose(thermometer->tz[i].file_out); + + return 0; +} + +int main(int argc, char *argv[], char *const envp[]) +{ + struct options options = { + .loglvl = LOG_DEBUG, + .logopt = TO_STDOUT, + .output = ".", + }; + struct configuration config = { 0 }; + struct thermometer thermometer = { 0 }; + + pid_t pid = -1; + + if (options_init(argc, argv, &options)) + return THERMOMETER_OPTION_ERROR; + + if (log_init(options.loglvl, argv[0], options.logopt)) + return THERMOMETER_LOG_ERROR; + + if (configuration_init(options.config, &config)) + return THERMOMETER_CONFIG_ERROR; + + if (uptimeofday_init()) + return THERMOMETER_TIME_ERROR; + + if (thermometer_init(&config, &thermometer)) + return THERMOMETER_INIT_ERROR; + + if (thermometer_start(&thermometer, &options)) + return THERMOMETER_RUNTIME_ERROR; + + if (thermometer_execute(argc - optind, &argv[optind], envp, &pid)) + return THERMOMETER_RUNTIME_ERROR; + + if (thermometer_wait(&options, pid)) + return THERMOMETER_RUNTIME_ERROR; + + if (thermometer_stop(&thermometer)) + return THERMOMETER_RUNTIME_ERROR; + + return THERMOMETER_SUCCESS; +} diff --git a/tools/thermal/thermometer/thermometer.conf b/tools/thermal/thermometer/thermometer.conf new file mode 100644 index 000000000000..02c6dab3b1b3 --- /dev/null +++ b/tools/thermal/thermometer/thermometer.conf @@ -0,0 +1,5 @@ + +thermal-zones = ( + { name = "cpu[0-9]-thermal"; + polling = 100; } + ) |