diff options
| author | Dave Airlie <airlied@redhat.com> | 2026-06-03 23:41:21 +0300 |
|---|---|---|
| committer | Dave Airlie <airlied@redhat.com> | 2026-06-03 23:41:22 +0300 |
| commit | 44c460d2cc8b87c08360fe60f861660c8045ef90 (patch) | |
| tree | abf861669de56e434fde015841d1f355c01c137d | |
| parent | 9bb8af2770b7b24619e0f95422322dd55384f7ca (diff) | |
| parent | 9a967125427e03c7ebc24d7ad26e9307e8403d4e (diff) | |
| download | linux-44c460d2cc8b87c08360fe60f861660c8045ef90.tar.xz | |
Merge tag 'drm-msm-next-2026-05-30' of https://gitlab.freedesktop.org/drm/msm into drm-next
Changes for v7.2
Core:
- Fixed documentation for msm_gem_shrinker functions
- IFPC related enablement/fixes for gen8
- PERFCNTR_CONFIG ioctl support
GPU
- Reworked handling of UBWC configuration
- a810 suppport
MDSS:
- Added Milos platform support
- Reworked handling of UBWC configuration
DisplayPort:
- Reworked HPD handling, preparing for the MST support
DPU:
- Added Milos platform support
- Reworked handling of UBWC configuration
DSI:
- Added Milos platform support
Signed-off-by: Dave Airlie <airlied@redhat.com>
From: Rob Clark <rob.clark@oss.qualcomm.com>
Link: https://patch.msgid.link/CACSVV00DXZcvFH2-C3fouve5DGs0DGa-vvsJPuaRmUZZVNKOfg@mail.gmail.com
68 files changed, 7511 insertions, 3064 deletions
diff --git a/Documentation/devicetree/bindings/display/msm/dsi-controller-main.yaml b/Documentation/devicetree/bindings/display/msm/dsi-controller-main.yaml index a24fcb914418..dbc0613e427e 100644 --- a/Documentation/devicetree/bindings/display/msm/dsi-controller-main.yaml +++ b/Documentation/devicetree/bindings/display/msm/dsi-controller-main.yaml @@ -16,6 +16,7 @@ properties: - enum: - qcom,apq8064-dsi-ctrl - qcom,kaanapali-dsi-ctrl + - qcom,milos-dsi-ctrl - qcom,msm8226-dsi-ctrl - qcom,msm8916-dsi-ctrl - qcom,msm8953-dsi-ctrl @@ -339,6 +340,7 @@ allOf: compatible: contains: enum: + - qcom,milos-dsi-ctrl - qcom,msm8998-dsi-ctrl - qcom,sa8775p-dsi-ctrl - qcom,sar2130p-dsi-ctrl diff --git a/Documentation/devicetree/bindings/display/msm/gmu.yaml b/Documentation/devicetree/bindings/display/msm/gmu.yaml index 93e5e6e19754..8578c2f8122e 100644 --- a/Documentation/devicetree/bindings/display/msm/gmu.yaml +++ b/Documentation/devicetree/bindings/display/msm/gmu.yaml @@ -304,6 +304,36 @@ allOf: properties: compatible: contains: + const: qcom,adreno-gmu-810.0 + then: + properties: + reg: + items: + - description: Core GMU registers + reg-names: + items: + - const: gmu + clocks: + items: + - description: GPU AHB clock + - description: GMU clock + - description: GPU CX clock + - description: GPU AXI clock + - description: GPU MEMNOC clock + - description: GMU HUB clock + clock-names: + items: + - const: ahb + - const: gmu + - const: cxo + - const: axi + - const: memnoc + - const: hub + + - if: + properties: + compatible: + contains: const: qcom,adreno-gmu-840.1 then: properties: diff --git a/Documentation/devicetree/bindings/display/msm/gpu.yaml b/Documentation/devicetree/bindings/display/msm/gpu.yaml index 04b2328903ca..a40899e5ea58 100644 --- a/Documentation/devicetree/bindings/display/msm/gpu.yaml +++ b/Documentation/devicetree/bindings/display/msm/gpu.yaml @@ -416,6 +416,23 @@ allOf: compatible: contains: enum: + - qcom,adreno-44010000 + - qcom,adreno-44070001 + then: + properties: + reg: + minItems: 2 + maxItems: 2 + + reg-names: + minItems: 2 + maxItems: 2 + + - if: + properties: + compatible: + contains: + enum: - qcom,adreno-615.0 - qcom,adreno-618.0 - qcom,adreno-619.0 @@ -434,6 +451,8 @@ allOf: - qcom,adreno-43050a01 - qcom,adreno-43050c01 - qcom,adreno-43051401 + - qcom,adreno-44010000 + - qcom,adreno-44070001 then: # Starting with A6xx, the clocks are usually defined in the GMU node properties: diff --git a/Documentation/devicetree/bindings/display/msm/qcom,milos-mdss.yaml b/Documentation/devicetree/bindings/display/msm/qcom,milos-mdss.yaml new file mode 100644 index 000000000000..7010ffa0ae35 --- /dev/null +++ b/Documentation/devicetree/bindings/display/msm/qcom,milos-mdss.yaml @@ -0,0 +1,286 @@ +# SPDX-License-Identifier: GPL-2.0-only OR BSD-2-Clause +%YAML 1.2 +--- +$id: http://devicetree.org/schemas/display/msm/qcom,milos-mdss.yaml# +$schema: http://devicetree.org/meta-schemas/core.yaml# + +title: Qualcomm Milos Display MDSS + +maintainers: + - Luca Weiss <luca.weiss@fairphone.com> + +description: + Milos MSM Mobile Display Subsystem(MDSS), which encapsulates sub-blocks like + DPU display controller, DSI and DP interfaces etc. + +$ref: /schemas/display/msm/mdss-common.yaml# + +properties: + compatible: + const: qcom,milos-mdss + + clocks: + items: + - description: Display AHB + - description: Display hf AXI + - description: Display core + + iommus: + maxItems: 1 + + interconnects: + items: + - description: Interconnect path from mdp0 port to the data bus + - description: Interconnect path from CPU to the reg bus + + interconnect-names: + items: + - const: mdp0-mem + - const: cpu-cfg + +patternProperties: + "^display-controller@[0-9a-f]+$": + type: object + additionalProperties: true + properties: + compatible: + const: qcom,milos-dpu + + "^displayport-controller@[0-9a-f]+$": + type: object + additionalProperties: true + properties: + compatible: + const: qcom,milos-dp + + "^dsi@[0-9a-f]+$": + type: object + additionalProperties: true + properties: + compatible: + contains: + const: qcom,milos-dsi-ctrl + + "^phy@[0-9a-f]+$": + type: object + additionalProperties: true + properties: + compatible: + const: qcom,milos-dsi-phy-4nm + +required: + - compatible + +unevaluatedProperties: false + +examples: + - | + #include <dt-bindings/clock/qcom,dsi-phy-28nm.h> + #include <dt-bindings/clock/qcom,milos-dispcc.h> + #include <dt-bindings/clock/qcom,milos-gcc.h> + #include <dt-bindings/clock/qcom,rpmh.h> + #include <dt-bindings/interconnect/qcom,icc.h> + #include <dt-bindings/interconnect/qcom,milos-rpmh.h> + #include <dt-bindings/interrupt-controller/arm-gic.h> + #include <dt-bindings/power/qcom,rpmhpd.h> + + display-subsystem@ae00000 { + compatible = "qcom,milos-mdss"; + reg = <0x0ae00000 0x1000>; + reg-names = "mdss"; + + interrupts = <GIC_SPI 83 IRQ_TYPE_LEVEL_HIGH 0>; + + clocks = <&dispcc DISP_CC_MDSS_AHB_CLK>, + <&gcc GCC_DISP_HF_AXI_CLK>, + <&dispcc DISP_CC_MDSS_MDP_CLK>; + + resets = <&dispcc DISP_CC_MDSS_CORE_BCR>; + + interconnects = <&mmss_noc MASTER_MDP QCOM_ICC_TAG_ALWAYS + &mc_virt SLAVE_EBI1 QCOM_ICC_TAG_ALWAYS>, + <&gem_noc MASTER_APPSS_PROC QCOM_ICC_TAG_ACTIVE_ONLY + &cnoc_main SLAVE_DISPLAY_CFG QCOM_ICC_TAG_ACTIVE_ONLY>; + interconnect-names = "mdp0-mem", + "cpu-cfg"; + + power-domains = <&dispcc DISP_CC_MDSS_CORE_GDSC>; + + iommus = <&apps_smmu 0x1c00 0x2>; + + interrupt-controller; + #interrupt-cells = <1>; + + #address-cells = <1>; + #size-cells = <1>; + ranges; + + display-controller@ae01000 { + compatible = "qcom,milos-dpu"; + reg = <0x0ae01000 0x8f000>, + <0x0aeb0000 0x3000>; + reg-names = "mdp", + "vbif"; + + interrupts-extended = <&mdss 0>; + + clocks = <&gcc GCC_DISP_HF_AXI_CLK>, + <&dispcc DISP_CC_MDSS_AHB_CLK>, + <&dispcc DISP_CC_MDSS_MDP_LUT_CLK>, + <&dispcc DISP_CC_MDSS_MDP_CLK>, + <&dispcc DISP_CC_MDSS_VSYNC_CLK>; + clock-names = "nrt_bus", + "iface", + "lut", + "core", + "vsync"; + + assigned-clocks = <&dispcc DISP_CC_MDSS_VSYNC_CLK>; + assigned-clock-rates = <19200000>; + + operating-points-v2 = <&mdp_opp_table>; + + power-domains = <&rpmhpd RPMHPD_CX>; + + ports { + #address-cells = <1>; + #size-cells = <0>; + + port@0 { + reg = <0>; + + dpu_intf1_out: endpoint { + remote-endpoint = <&mdss_dsi0_in>; + }; + }; + }; + + mdp_opp_table: opp-table { + compatible = "operating-points-v2"; + + opp-200000000 { + opp-hz = /bits/ 64 <200000000>; + required-opps = <&rpmhpd_opp_low_svs>; + }; + + opp-342000000 { + opp-hz = /bits/ 64 <342000000>; + required-opps = <&rpmhpd_opp_svs>; + }; + + opp-402000000 { + opp-hz = /bits/ 64 <402000000>; + required-opps = <&rpmhpd_opp_svs_l1>; + }; + + opp-535000000 { + opp-hz = /bits/ 64 <535000000>; + required-opps = <&rpmhpd_opp_nom>; + }; + + opp-600000000 { + opp-hz = /bits/ 64 <600000000>; + required-opps = <&rpmhpd_opp_nom_l1>; + }; + + opp-630000000 { + opp-hz = /bits/ 64 <630000000>; + required-opps = <&rpmhpd_opp_turbo>; + }; + }; + }; + + dsi@ae94000 { + compatible = "qcom,milos-dsi-ctrl", "qcom,mdss-dsi-ctrl"; + reg = <0x0ae94000 0x1000>; + reg-names = "dsi_ctrl"; + + interrupts-extended = <&mdss 4>; + + clocks = <&dispcc DISP_CC_MDSS_BYTE0_CLK>, + <&dispcc DISP_CC_MDSS_BYTE0_INTF_CLK>, + <&dispcc DISP_CC_MDSS_PCLK0_CLK>, + <&dispcc DISP_CC_MDSS_ESC0_CLK>, + <&dispcc DISP_CC_MDSS_AHB_CLK>, + <&gcc GCC_DISP_HF_AXI_CLK>; + clock-names = "byte", + "byte_intf", + "pixel", + "core", + "iface", + "bus"; + + assigned-clocks = <&dispcc DISP_CC_MDSS_BYTE0_CLK_SRC>, + <&dispcc DISP_CC_MDSS_PCLK0_CLK_SRC>; + assigned-clock-parents = <&mdss_dsi0_phy DSI_BYTE_PLL_CLK>, + <&mdss_dsi0_phy DSI_PIXEL_PLL_CLK>; + + operating-points-v2 = <&mdss_dsi_opp_table>; + + power-domains = <&rpmhpd RPMHPD_CX>; + + phys = <&mdss_dsi0_phy>; + phy-names = "dsi"; + + #address-cells = <1>; + #size-cells = <0>; + + ports { + #address-cells = <1>; + #size-cells = <0>; + + port@0 { + reg = <0>; + + mdss_dsi0_in: endpoint { + remote-endpoint = <&dpu_intf1_out>; + }; + }; + + port@1 { + reg = <1>; + + mdss_dsi0_out: endpoint { + }; + }; + }; + + mdss_dsi_opp_table: opp-table { + compatible = "operating-points-v2"; + + opp-187500000 { + opp-hz = /bits/ 64 <187500000>; + required-opps = <&rpmhpd_opp_low_svs>; + }; + + opp-300000000 { + opp-hz = /bits/ 64 <300000000>; + required-opps = <&rpmhpd_opp_svs>; + }; + + opp-358000000 { + opp-hz = /bits/ 64 <358000000>; + required-opps = <&rpmhpd_opp_svs_l1>; + }; + }; + }; + + mdss_dsi0_phy: phy@ae95000 { + compatible = "qcom,milos-dsi-phy-4nm"; + reg = <0x0ae95000 0x200>, + <0x0ae95200 0x300>, + <0x0ae95500 0x400>; + reg-names = "dsi_phy", + "dsi_phy_lane", + "dsi_pll"; + + clocks = <&dispcc DISP_CC_MDSS_AHB_CLK>, + <&rpmhcc RPMH_CXO_CLK>; + clock-names = "iface", + "ref"; + + #clock-cells = <1>; + #phy-cells = <0>; + }; + }; +... diff --git a/Documentation/devicetree/bindings/display/msm/qcom,sm8650-dpu.yaml b/Documentation/devicetree/bindings/display/msm/qcom,sm8650-dpu.yaml index 134321b50897..aa0cf0ec5b93 100644 --- a/Documentation/devicetree/bindings/display/msm/qcom,sm8650-dpu.yaml +++ b/Documentation/devicetree/bindings/display/msm/qcom,sm8650-dpu.yaml @@ -18,6 +18,7 @@ properties: - qcom,eliza-dpu - qcom,glymur-dpu - qcom,kaanapali-dpu + - qcom,milos-dpu - qcom,sa8775p-dpu - qcom,sm8650-dpu - qcom,sm8750-dpu diff --git a/Documentation/devicetree/bindings/phy/qcom,dsi-phy-7nm.yaml b/Documentation/devicetree/bindings/phy/qcom,dsi-phy-7nm.yaml index 966c70d746aa..f397ba3fa84a 100644 --- a/Documentation/devicetree/bindings/phy/qcom,dsi-phy-7nm.yaml +++ b/Documentation/devicetree/bindings/phy/qcom,dsi-phy-7nm.yaml @@ -20,6 +20,7 @@ properties: - qcom,dsi-phy-7nm - qcom,dsi-phy-7nm-8150 - qcom,kaanapali-dsi-phy-3nm + - qcom,milos-dsi-phy-4nm - qcom,sa8775p-dsi-phy-5nm - qcom,sar2130p-dsi-phy-5nm - qcom,sc7280-dsi-phy-7nm diff --git a/drivers/gpu/drm/ci/gitlab-ci.yml b/drivers/gpu/drm/ci/gitlab-ci.yml index 56088c5393cd..c1087731d1a2 100644 --- a/drivers/gpu/drm/ci/gitlab-ci.yml +++ b/drivers/gpu/drm/ci/gitlab-ci.yml @@ -386,6 +386,10 @@ linkcheck-docs: rules: - when: never +mr-label-maker-test: + rules: + - when: never + test-docs: rules: - when: never diff --git a/drivers/gpu/drm/msm/Makefile b/drivers/gpu/drm/msm/Makefile index ba45e99be05b..d0c3a4c6703b 100644 --- a/drivers/gpu/drm/msm/Makefile +++ b/drivers/gpu/drm/msm/Makefile @@ -122,7 +122,7 @@ msm-y += \ msm_gpu_devfreq.o \ msm_io_utils.o \ msm_iommu.o \ - msm_perf.o \ + msm_perfcntr.o \ msm_rd.o \ msm_ringbuffer.o \ msm_submitqueue.o \ @@ -177,6 +177,11 @@ quiet_cmd_headergen = GENHDR $@ cmd_headergen = mkdir -p $(obj)/generated && $(PYTHON3) $(src)/registers/gen_header.py \ $(headergen-opts) --rnn $(src)/registers --xml $< c-defines > $@ +# TODO how to do this for a2xx/a5xx which have different .xml arg? +quiet_cmd_headergen_json = GENHDRJSN $@ + cmd_headergen_json = mkdir -p $(obj)/generated && $(PYTHON3) $(src)/registers/gen_header.py \ + $(headergen-opts) --rnn $(src)/registers --xml $(filter %.xml,$^) perfcntrs --json $< > $@ + $(obj)/generated/%.xml.h: $(src)/registers/adreno/%.xml \ $(src)/registers/adreno/adreno_common.xml \ $(src)/registers/adreno/adreno_pm4.xml \ @@ -193,6 +198,24 @@ $(obj)/generated/%.xml.h: $(src)/registers/display/%.xml \ FORCE $(call if_changed,headergen) +ADRENO_PERFCNTRS = + +define adreno_perfcntrs +ADRENO_PERFCNTRS += generated/$(1)_perfcntrs.json.c +$$(obj)/generated/$(1)_perfcntrs.json.c: $$(src)/registers/adreno/$(1)_perfcntrs.json \ + $$(src)/registers/adreno/$(2).xml \ + FORCE + $$(call if_changed,headergen_json) +endef + +$(eval $(call adreno_perfcntrs,a2xx,a2xx)) +$(eval $(call adreno_perfcntrs,a5xx,a5xx)) +$(eval $(call adreno_perfcntrs,a6xx,a6xx)) +$(eval $(call adreno_perfcntrs,a7xx,a6xx)) +$(eval $(call adreno_perfcntrs,a8xx,a6xx)) + +adreno-y += $(ADRENO_PERFCNTRS:.c=.o) + ADRENO_HEADERS = \ generated/a2xx.xml.h \ generated/a3xx.xml.h \ @@ -224,7 +247,7 @@ DISPLAY_HEADERS = \ generated/mdss.xml.h \ generated/sfpb.xml.h -$(addprefix $(obj)/,$(adreno-y)): $(addprefix $(obj)/,$(ADRENO_HEADERS)) +$(addprefix $(obj)/,$(msm-y)): $(addprefix $(obj)/,$(ADRENO_HEADERS)) $(addprefix $(obj)/,$(msm-display-y)): $(addprefix $(obj)/,$(DISPLAY_HEADERS)) -targets += $(ADRENO_HEADERS) $(DISPLAY_HEADERS) +targets += $(ADRENO_HEADERS) $(DISPLAY_HEADERS) $(ADRENO_PERFCNTRS) diff --git a/drivers/gpu/drm/msm/adreno/a2xx_gpu.c b/drivers/gpu/drm/msm/adreno/a2xx_gpu.c index d5a5fa9e2cf8..df4cded9143f 100644 --- a/drivers/gpu/drm/msm/adreno/a2xx_gpu.c +++ b/drivers/gpu/drm/msm/adreno/a2xx_gpu.c @@ -489,10 +489,6 @@ static u32 a2xx_get_rptr(struct msm_gpu *gpu, struct msm_ringbuffer *ring) return ring->memptrs->rptr; } -static const struct msm_gpu_perfcntr perfcntrs[] = { -/* TODO */ -}; - static struct msm_gpu *a2xx_gpu_init(struct drm_device *dev) { struct a2xx_gpu *a2xx_gpu = NULL; @@ -518,9 +514,6 @@ static struct msm_gpu *a2xx_gpu_init(struct drm_device *dev) adreno_gpu = &a2xx_gpu->base; gpu = &adreno_gpu->base; - gpu->perfcntrs = perfcntrs; - gpu->num_perfcntrs = ARRAY_SIZE(perfcntrs); - ret = adreno_gpu_init(dev, pdev, adreno_gpu, config->info->funcs, 1); if (ret) goto fail; diff --git a/drivers/gpu/drm/msm/adreno/a3xx_gpu.c b/drivers/gpu/drm/msm/adreno/a3xx_gpu.c index 018183e0ac3f..c17e9777beae 100644 --- a/drivers/gpu/drm/msm/adreno/a3xx_gpu.c +++ b/drivers/gpu/drm/msm/adreno/a3xx_gpu.c @@ -266,12 +266,6 @@ static int a3xx_hw_init(struct msm_gpu *gpu) /* Turn on performance counters: */ gpu_write(gpu, REG_A3XX_RBBM_PERFCTR_CTL, 0x01); - /* Enable the perfcntrs that we use.. */ - for (i = 0; i < gpu->num_perfcntrs; i++) { - const struct msm_gpu_perfcntr *perfcntr = &gpu->perfcntrs[i]; - gpu_write(gpu, perfcntr->select_reg, perfcntr->select_val); - } - gpu_write(gpu, REG_A3XX_RBBM_INT_0_MASK, A3XX_INT0_MASK); ret = adreno_hw_init(gpu); @@ -508,13 +502,6 @@ static u32 a3xx_get_rptr(struct msm_gpu *gpu, struct msm_ringbuffer *ring) return ring->memptrs->rptr; } -static const struct msm_gpu_perfcntr perfcntrs[] = { - { REG_A3XX_SP_PERFCOUNTER6_SELECT, REG_A3XX_RBBM_PERFCTR_SP_6_LO, - SP_ALU_ACTIVE_CYCLES, "ALUACTIVE" }, - { REG_A3XX_SP_PERFCOUNTER7_SELECT, REG_A3XX_RBBM_PERFCTR_SP_7_LO, - SP_FS_FULL_ALU_INSTRUCTIONS, "ALUFULL" }, -}; - static struct msm_gpu *a3xx_gpu_init(struct drm_device *dev) { struct a3xx_gpu *a3xx_gpu = NULL; @@ -542,9 +529,6 @@ static struct msm_gpu *a3xx_gpu_init(struct drm_device *dev) adreno_gpu = &a3xx_gpu->base; gpu = &adreno_gpu->base; - gpu->perfcntrs = perfcntrs; - gpu->num_perfcntrs = ARRAY_SIZE(perfcntrs); - adreno_gpu->registers = a3xx_registers; ret = adreno_gpu_init(dev, pdev, adreno_gpu, config->info->funcs, 1); diff --git a/drivers/gpu/drm/msm/adreno/a4xx_gpu.c b/drivers/gpu/drm/msm/adreno/a4xx_gpu.c index e6ab731f8e9a..6392126f48f2 100644 --- a/drivers/gpu/drm/msm/adreno/a4xx_gpu.c +++ b/drivers/gpu/drm/msm/adreno/a4xx_gpu.c @@ -652,9 +652,6 @@ static struct msm_gpu *a4xx_gpu_init(struct drm_device *dev) adreno_gpu = &a4xx_gpu->base; gpu = &adreno_gpu->base; - gpu->perfcntrs = NULL; - gpu->num_perfcntrs = 0; - ret = adreno_gpu_init(dev, pdev, adreno_gpu, config->info->funcs, 1); if (ret) goto fail; diff --git a/drivers/gpu/drm/msm/adreno/a5xx_gpu.c b/drivers/gpu/drm/msm/adreno/a5xx_gpu.c index 79acae11154a..2c0bbac43c52 100644 --- a/drivers/gpu/drm/msm/adreno/a5xx_gpu.c +++ b/drivers/gpu/drm/msm/adreno/a5xx_gpu.c @@ -752,17 +752,13 @@ static int a5xx_hw_init(struct msm_gpu *gpu) gpu_write(gpu, REG_A5XX_UCHE_CACHE_WAYS, 0x02); /* Disable L2 bypass in the UCHE */ - gpu_write(gpu, REG_A5XX_UCHE_TRAP_BASE_LO, lower_32_bits(adreno_gpu->uche_trap_base)); - gpu_write(gpu, REG_A5XX_UCHE_TRAP_BASE_HI, upper_32_bits(adreno_gpu->uche_trap_base)); - gpu_write(gpu, REG_A5XX_UCHE_WRITE_THRU_BASE_LO, lower_32_bits(adreno_gpu->uche_trap_base)); - gpu_write(gpu, REG_A5XX_UCHE_WRITE_THRU_BASE_HI, upper_32_bits(adreno_gpu->uche_trap_base)); + gpu_write64(gpu, REG_A5XX_UCHE_TRAP_BASE, adreno_gpu->uche_trap_base); + gpu_write64(gpu, REG_A5XX_UCHE_WRITE_THRU_BASE, adreno_gpu->uche_trap_base); /* Set the GMEM VA range (0 to gpu->gmem) */ - gpu_write(gpu, REG_A5XX_UCHE_GMEM_RANGE_MIN_LO, 0x00100000); - gpu_write(gpu, REG_A5XX_UCHE_GMEM_RANGE_MIN_HI, 0x00000000); - gpu_write(gpu, REG_A5XX_UCHE_GMEM_RANGE_MAX_LO, + gpu_write64(gpu, REG_A5XX_UCHE_GMEM_RANGE_MIN, 0x00100000); + gpu_write64(gpu, REG_A5XX_UCHE_GMEM_RANGE_MAX, 0x00100000 + adreno_gpu->info->gmem - 1); - gpu_write(gpu, REG_A5XX_UCHE_GMEM_RANGE_MAX_HI, 0x00000000); if (adreno_is_a505(adreno_gpu) || adreno_is_a506(adreno_gpu) || adreno_is_a508(adreno_gpu) || adreno_is_a510(adreno_gpu)) { @@ -1217,9 +1213,7 @@ static void a5xx_rbbm_err_irq(struct msm_gpu *gpu, u32 status) static void a5xx_uche_err_irq(struct msm_gpu *gpu) { - uint64_t addr = (uint64_t) gpu_read(gpu, REG_A5XX_UCHE_TRAP_LOG_HI); - - addr |= gpu_read(gpu, REG_A5XX_UCHE_TRAP_LOG_LO); + uint64_t addr = gpu_read64(gpu, REG_A5XX_UCHE_TRAP_LOG); dev_err_ratelimited(gpu->dev->dev, "UCHE | Out of bounds access | addr=0x%llX\n", addr); @@ -1725,7 +1719,6 @@ static struct msm_gpu *a5xx_gpu_init(struct drm_device *dev) struct msm_drm_private *priv = dev->dev_private; struct platform_device *pdev = priv->gpu_pdev; struct adreno_platform_config *config = pdev->dev.platform_data; - const struct qcom_ubwc_cfg_data *common_cfg; struct a5xx_gpu *a5xx_gpu = NULL; struct adreno_gpu *adreno_gpu; struct msm_gpu *gpu; @@ -1769,13 +1762,9 @@ static struct msm_gpu *a5xx_gpu_init(struct drm_device *dev) a5xx_preempt_init(gpu); /* Inherit the common config and make some necessary fixups */ - common_cfg = qcom_ubwc_config_get_data(); - if (IS_ERR(common_cfg)) - return ERR_CAST(common_cfg); - - /* Copy the data into the internal struct to drop the const qualifier (temporarily) */ - adreno_gpu->_ubwc_config = *common_cfg; - adreno_gpu->ubwc_config = &adreno_gpu->_ubwc_config; + adreno_gpu->ubwc_config = qcom_ubwc_config_get_data(); + if (IS_ERR(adreno_gpu->ubwc_config)) + return ERR_CAST(adreno_gpu->ubwc_config); adreno_gpu->uche_trap_base = 0x0001ffffffff0000ull; diff --git a/drivers/gpu/drm/msm/adreno/a6xx_catalog.c b/drivers/gpu/drm/msm/adreno/a6xx_catalog.c index 550ff3a9b82e..3e6f409d13a2 100644 --- a/drivers/gpu/drm/msm/adreno/a6xx_catalog.c +++ b/drivers/gpu/drm/msm/adreno/a6xx_catalog.c @@ -1799,6 +1799,261 @@ static const struct adreno_reglist_pipe x285_dyn_pwrup_reglist_regs[] = { }; DECLARE_ADRENO_REGLIST_PIPE_LIST(x285_dyn_pwrup_reglist); +static const struct adreno_reglist_pipe a810_nonctxt_regs[] = { + { REG_A8XX_CP_SMMU_STREAM_ID_LPAC, 0x00000101, BIT(PIPE_NONE) }, + { REG_A8XX_GRAS_DBG_ECO_CNTL, 0x00f80800, BIT(PIPE_BV) | BIT(PIPE_BR) }, + { REG_A6XX_PC_AUTO_VERTEX_STRIDE, 0x00000001, BIT(PIPE_BV) | BIT(PIPE_BR) }, + { REG_A8XX_PC_VIS_STREAM_CNTL, 0x10010000, BIT(PIPE_BV) | BIT(PIPE_BR) }, + { REG_A8XX_PC_CONTEXT_SWITCH_STABILIZE_CNTL_1, 0x00000002, BIT(PIPE_BV) | BIT(PIPE_BR) }, + { REG_A8XX_PC_CHICKEN_BITS_1, 0x00000003, BIT(PIPE_BR) }, + { REG_A8XX_PC_CHICKEN_BITS_1, 0x00000023, BIT(PIPE_BV) }, /* Avoid partial waves at VFD */ + { REG_A8XX_PC_CHICKEN_BITS_2, 0x00000200, BIT(PIPE_BV) | BIT(PIPE_BR) }, + { REG_A8XX_PC_CHICKEN_BITS_3, 0x00500000, BIT(PIPE_BV) | BIT(PIPE_BR) }, + { REG_A8XX_PC_CHICKEN_BITS_4, 0x00500050, BIT(PIPE_BV) | BIT(PIPE_BR) }, + { REG_A7XX_RB_CCU_CNTL, 0x00000068, BIT(PIPE_BR) }, + /* Partially enable perf clear, Disable DINT to c/z be data forwarding */ + { REG_A7XX_RB_CCU_DBG_ECO_CNTL, 0x00002200, BIT(PIPE_BR) }, + { REG_A8XX_RB_RESOLVE_PREFETCH_CNTL, 0x00000007, BIT(PIPE_BR) }, + { REG_A8XX_RB_CMP_DBG_ECO_CNTL, 0x00004000, BIT(PIPE_BR) }, + { REG_A8XX_RBBM_NC_MODE_CNTL, 0x00000001, BIT(PIPE_NONE) }, + { REG_A8XX_RBBM_SLICE_NC_MODE_CNTL, 0x00000001, BIT(PIPE_NONE) }, + { REG_A8XX_RBBM_WAIT_IDLE_CLOCKS_CNTL, 0x00000030, BIT(PIPE_NONE) }, + { REG_A8XX_RBBM_WAIT_IDLE_CLOCKS_CNTL2, 0x00000030, BIT(PIPE_NONE) }, + { REG_A8XX_UCHE_GBIF_GX_CONFIG, 0x010240e0, BIT(PIPE_NONE) }, + { REG_A8XX_RBBM_GBIF_CLIENT_QOS_CNTL, 0x22122212, BIT(PIPE_NONE) }, + { REG_A8XX_RBBM_CGC_P2S_CNTL, 0x00000040, BIT(PIPE_NONE) }, + /* + * BIT(22): Disable PS out of order retire + * BIT(23): Enable half wave mode and MM instruction src&dst is half precision + */ + { REG_A7XX_SP_CHICKEN_BITS_2, BIT(22) | BIT(23), BIT(PIPE_NONE) }, + { REG_A7XX_SP_CHICKEN_BITS_3, 0x00300000, BIT(PIPE_NONE) }, + { REG_A6XX_SP_PERFCTR_SHADER_MASK, 0x0000003f, BIT(PIPE_NONE) }, + { REG_A7XX_SP_HLSQ_TIMEOUT_THRESHOLD_DP, 0x00000080, BIT(PIPE_NONE) }, + { REG_A6XX_TPL1_DBG_ECO_CNTL, 0x10100000, BIT(PIPE_NONE) }, + { REG_A6XX_TPL1_DBG_ECO_CNTL1, 0x04000724, BIT(PIPE_NONE) }, + { REG_A6XX_UCHE_MODE_CNTL, 0x00020000, BIT(PIPE_NONE) }, + { REG_A8XX_UCHE_CCHE_MODE_CNTL, 0x00001000, BIT(PIPE_NONE) }, + { REG_A8XX_UCHE_CCHE_CACHE_WAYS, 0x00000800, BIT(PIPE_NONE) }, + /* Disable write slow pointer in data phase queue */ + { REG_A8XX_UCHE_HW_DBG_CNTL, BIT(8), BIT(PIPE_NONE) }, + { REG_A8XX_UCHE_VARB_IDLE_TIMEOUT, 0x00000020, BIT(PIPE_NONE) }, + { REG_A7XX_VFD_DBG_ECO_CNTL, 0x00008000, BIT(PIPE_BV) | BIT(PIPE_BR) }, + { REG_A8XX_VFD_CB_BV_THRESHOLD, 0x00500050, BIT(PIPE_BV) | BIT(PIPE_BR) }, + { REG_A8XX_VFD_CB_BR_THRESHOLD, 0x00600060, BIT(PIPE_BV) | BIT(PIPE_BR) }, + { REG_A8XX_VFD_CB_BUSY_REQ_CNT, 0x00200020, BIT(PIPE_BV) | BIT(PIPE_BR) }, + { REG_A8XX_VFD_CB_LP_REQ_CNT, 0x00100020, BIT(PIPE_BV) | BIT(PIPE_BR) }, + { REG_A8XX_VPC_FLATSHADE_MODE_CNTL, 0x00000001, BIT(PIPE_BV) | BIT(PIPE_BR) }, + { REG_A8XX_RB_GC_GMEM_PROTECT, 0x00900000, BIT(PIPE_BR) }, + { }, +}; + +static const u32 a810_protect_regs[] = { + A6XX_PROTECT_RDONLY(0x00000, 0x03a3), + A6XX_PROTECT_RDONLY(0x003b4, 0x008b), + A6XX_PROTECT_NORDWR(0x00440, 0x001f), + A6XX_PROTECT_RDONLY(0x00580, 0x005f), + A6XX_PROTECT_NORDWR(0x005e0, 0x011f), + A6XX_PROTECT_RDONLY(0x0074a, 0x0005), + A6XX_PROTECT_RDONLY(0x00759, 0x0026), + A6XX_PROTECT_RDONLY(0x00789, 0x0000), + A6XX_PROTECT_RDONLY(0x0078c, 0x0013), + A6XX_PROTECT_NORDWR(0x00800, 0x0029), + A6XX_PROTECT_NORDWR(0x00837, 0x00af), + A6XX_PROTECT_RDONLY(0x008e7, 0x00c9), + A6XX_PROTECT_NORDWR(0x008ec, 0x00c3), + A6XX_PROTECT_NORDWR(0x009b1, 0x0250), + A6XX_PROTECT_RDONLY(0x00ce0, 0x0001), + A6XX_PROTECT_RDONLY(0x00df0, 0x0000), + A6XX_PROTECT_NORDWR(0x00df1, 0x0000), + A6XX_PROTECT_NORDWR(0x00e01, 0x0000), + A6XX_PROTECT_NORDWR(0x00e03, 0x1fff), + A6XX_PROTECT_NORDWR(0x03c00, 0x00c5), + A6XX_PROTECT_RDONLY(0x03cc6, 0x1fff), + A6XX_PROTECT_NORDWR(0x08600, 0x01ff), + A6XX_PROTECT_NORDWR(0x08e00, 0x00ff), + A6XX_PROTECT_RDONLY(0x08f00, 0x0000), + A6XX_PROTECT_NORDWR(0x08f01, 0x01be), + A6XX_PROTECT_NORDWR(0x09600, 0x01ff), + A6XX_PROTECT_RDONLY(0x0981a, 0x02e5), + A6XX_PROTECT_NORDWR(0x09e00, 0x01ff), + A6XX_PROTECT_NORDWR(0x0a600, 0x01ff), + A6XX_PROTECT_NORDWR(0x0ae00, 0x0006), + A6XX_PROTECT_NORDWR(0x0ae08, 0x0006), + A6XX_PROTECT_NORDWR(0x0ae10, 0x036f), + A6XX_PROTECT_NORDWR(0x0b600, 0x1fff), + A6XX_PROTECT_NORDWR(0x0dc00, 0x1fff), + A6XX_PROTECT_RDONLY(0x0fc00, 0x1fff), + A6XX_PROTECT_NORDWR(0x18400, 0x003f), + A6XX_PROTECT_RDONLY(0x18440, 0x013f), + A6XX_PROTECT_NORDWR(0x18580, 0x1fff), + A6XX_PROTECT_NORDWR(0x1b400, 0x1fff), + A6XX_PROTECT_NORDWR(0x1f400, 0x0477), + A6XX_PROTECT_RDONLY(0x1f878, 0x0787), + A6XX_PROTECT_NORDWR(0x1f930, 0x0329), + A6XX_PROTECT_NORDWR(0x20000, 0x1fff), + A6XX_PROTECT_NORDWR(0x27800, 0x007f), + A6XX_PROTECT_RDONLY(0x27880, 0x0381), + A6XX_PROTECT_NORDWR(0x27882, 0x0001), + A6XX_PROTECT_NORDWR(0x27c02, 0x0000), +}; +DECLARE_ADRENO_PROTECT(a810_protect, 64); + +static const uint32_t a810_pwrup_reglist_regs[] = { + REG_A6XX_UCHE_MODE_CNTL, + REG_A8XX_UCHE_VARB_IDLE_TIMEOUT, + REG_A8XX_UCHE_GBIF_GX_CONFIG, + REG_A8XX_UCHE_CACHE_WAYS, + REG_A8XX_UCHE_CCHE_MODE_CNTL, + REG_A8XX_UCHE_CCHE_CACHE_WAYS, + REG_A8XX_UCHE_CCHE_GC_GMEM_RANGE_MIN, + REG_A8XX_UCHE_CCHE_GC_GMEM_RANGE_MIN + 1, + REG_A8XX_UCHE_CCHE_TRAP_BASE, + REG_A8XX_UCHE_CCHE_TRAP_BASE + 1, + REG_A8XX_UCHE_CCHE_WRITE_THRU_BASE, + REG_A8XX_UCHE_CCHE_WRITE_THRU_BASE + 1, + REG_A8XX_UCHE_HW_DBG_CNTL, + REG_A8XX_UCHE_WRITE_THRU_BASE, + REG_A8XX_UCHE_WRITE_THRU_BASE + 1, + REG_A8XX_UCHE_TRAP_BASE, + REG_A8XX_UCHE_TRAP_BASE + 1, + REG_A8XX_UCHE_CLIENT_PF, + REG_A8XX_RB_CMP_NC_MODE_CNTL, + REG_A7XX_SP_HLSQ_TIMEOUT_THRESHOLD_DP, + REG_A8XX_SP_HLSQ_GC_GMEM_RANGE_MIN, + REG_A8XX_SP_HLSQ_GC_GMEM_RANGE_MIN + 1, + REG_A7XX_SP_READ_SEL, + REG_A6XX_TPL1_NC_MODE_CNTL, + REG_A6XX_TPL1_DBG_ECO_CNTL, + REG_A6XX_TPL1_DBG_ECO_CNTL1, + REG_A8XX_TPL1_BICUBIC_WEIGHTS_TABLE(1), + REG_A8XX_TPL1_BICUBIC_WEIGHTS_TABLE(2), + REG_A8XX_TPL1_BICUBIC_WEIGHTS_TABLE(3), + REG_A8XX_TPL1_BICUBIC_WEIGHTS_TABLE(4), + REG_A8XX_TPL1_BICUBIC_WEIGHTS_TABLE(5), + REG_A8XX_TPL1_BICUBIC_WEIGHTS_TABLE(6), + REG_A8XX_TPL1_BICUBIC_WEIGHTS_TABLE(7), + REG_A8XX_TPL1_BICUBIC_WEIGHTS_TABLE(8), + REG_A8XX_TPL1_BICUBIC_WEIGHTS_TABLE(9), + REG_A8XX_TPL1_BICUBIC_WEIGHTS_TABLE(10), + REG_A8XX_TPL1_BICUBIC_WEIGHTS_TABLE(11), + REG_A8XX_TPL1_BICUBIC_WEIGHTS_TABLE(12), + REG_A8XX_TPL1_BICUBIC_WEIGHTS_TABLE(13), + REG_A8XX_TPL1_BICUBIC_WEIGHTS_TABLE(14), + REG_A8XX_TPL1_BICUBIC_WEIGHTS_TABLE(15), + REG_A8XX_TPL1_BICUBIC_WEIGHTS_TABLE(16), + REG_A8XX_TPL1_BICUBIC_WEIGHTS_TABLE(17), + REG_A8XX_TPL1_BICUBIC_WEIGHTS_TABLE(18), + REG_A8XX_TPL1_BICUBIC_WEIGHTS_TABLE(19), +}; +DECLARE_ADRENO_REGLIST_LIST(a810_pwrup_reglist); + +static const u32 a810_ifpc_reglist_regs[] = { + REG_A8XX_RBBM_NC_MODE_CNTL, + REG_A8XX_RBBM_PERFCTR_CNTL, + REG_A8XX_RBBM_SLICE_INTERFACE_HANG_INT_CNTL, + REG_A8XX_RBBM_SLICE_NC_MODE_CNTL, + REG_A6XX_SP_NC_MODE_CNTL, + REG_A7XX_SP_CHICKEN_BITS_2, + REG_A7XX_SP_CHICKEN_BITS_3, + REG_A6XX_SP_PERFCTR_SHADER_MASK, + REG_A8XX_CP_PROTECT_GLOBAL(0), + REG_A8XX_CP_PROTECT_GLOBAL(1), + REG_A8XX_CP_PROTECT_GLOBAL(2), + REG_A8XX_CP_PROTECT_GLOBAL(3), + REG_A8XX_CP_PROTECT_GLOBAL(4), + REG_A8XX_CP_PROTECT_GLOBAL(5), + REG_A8XX_CP_PROTECT_GLOBAL(6), + REG_A8XX_CP_PROTECT_GLOBAL(7), + REG_A8XX_CP_PROTECT_GLOBAL(8), + REG_A8XX_CP_PROTECT_GLOBAL(9), + REG_A8XX_CP_PROTECT_GLOBAL(10), + REG_A8XX_CP_PROTECT_GLOBAL(11), + REG_A8XX_CP_PROTECT_GLOBAL(12), + REG_A8XX_CP_PROTECT_GLOBAL(13), + REG_A8XX_CP_PROTECT_GLOBAL(14), + REG_A8XX_CP_PROTECT_GLOBAL(15), + REG_A8XX_CP_PROTECT_GLOBAL(16), + REG_A8XX_CP_PROTECT_GLOBAL(17), + REG_A8XX_CP_PROTECT_GLOBAL(18), + REG_A8XX_CP_PROTECT_GLOBAL(19), + REG_A8XX_CP_PROTECT_GLOBAL(20), + REG_A8XX_CP_PROTECT_GLOBAL(21), + REG_A8XX_CP_PROTECT_GLOBAL(22), + REG_A8XX_CP_PROTECT_GLOBAL(23), + REG_A8XX_CP_PROTECT_GLOBAL(24), + REG_A8XX_CP_PROTECT_GLOBAL(25), + REG_A8XX_CP_PROTECT_GLOBAL(26), + REG_A8XX_CP_PROTECT_GLOBAL(27), + REG_A8XX_CP_PROTECT_GLOBAL(28), + REG_A8XX_CP_PROTECT_GLOBAL(29), + REG_A8XX_CP_PROTECT_GLOBAL(30), + REG_A8XX_CP_PROTECT_GLOBAL(31), + REG_A8XX_CP_PROTECT_GLOBAL(32), + REG_A8XX_CP_PROTECT_GLOBAL(33), + REG_A8XX_CP_PROTECT_GLOBAL(34), + REG_A8XX_CP_PROTECT_GLOBAL(35), + REG_A8XX_CP_PROTECT_GLOBAL(36), + REG_A8XX_CP_PROTECT_GLOBAL(37), + REG_A8XX_CP_PROTECT_GLOBAL(38), + REG_A8XX_CP_PROTECT_GLOBAL(39), + REG_A8XX_CP_PROTECT_GLOBAL(40), + REG_A8XX_CP_PROTECT_GLOBAL(41), + REG_A8XX_CP_PROTECT_GLOBAL(42), + REG_A8XX_CP_PROTECT_GLOBAL(43), + REG_A8XX_CP_PROTECT_GLOBAL(44), + REG_A8XX_CP_PROTECT_GLOBAL(45), + REG_A8XX_CP_PROTECT_GLOBAL(46), + REG_A8XX_CP_PROTECT_GLOBAL(47), + REG_A8XX_CP_PROTECT_GLOBAL(48), + REG_A8XX_CP_PROTECT_GLOBAL(49), + REG_A8XX_CP_PROTECT_GLOBAL(50), + REG_A8XX_CP_PROTECT_GLOBAL(51), + REG_A8XX_CP_PROTECT_GLOBAL(52), + REG_A8XX_CP_PROTECT_GLOBAL(53), + REG_A8XX_CP_PROTECT_GLOBAL(54), + REG_A8XX_CP_PROTECT_GLOBAL(55), + REG_A8XX_CP_PROTECT_GLOBAL(56), + REG_A8XX_CP_PROTECT_GLOBAL(57), + REG_A8XX_CP_PROTECT_GLOBAL(58), + REG_A8XX_CP_PROTECT_GLOBAL(59), + REG_A8XX_CP_PROTECT_GLOBAL(60), + REG_A8XX_CP_PROTECT_GLOBAL(61), + REG_A8XX_CP_PROTECT_GLOBAL(62), + REG_A8XX_CP_PROTECT_GLOBAL(63), +}; +DECLARE_ADRENO_REGLIST_LIST(a810_ifpc_reglist); + +static const struct adreno_reglist_pipe a810_dyn_pwrup_reglist_regs[] = { + { REG_A8XX_CP_PROTECT_CNTL_PIPE, 0, BIT(PIPE_BR) | BIT(PIPE_BV) }, + { REG_A8XX_CP_PROTECT_PIPE(15), 0, BIT(PIPE_BR) | BIT(PIPE_BV) }, + { REG_A8XX_GRAS_TSEFE_DBG_ECO_CNTL, 0, BIT(PIPE_BV) | BIT(PIPE_BR) }, + { REG_A8XX_GRAS_NC_MODE_CNTL, 0, BIT(PIPE_BV) | BIT(PIPE_BR) }, + { REG_A8XX_GRAS_DBG_ECO_CNTL, 0, BIT(PIPE_BV) | BIT(PIPE_BR) }, + { REG_A7XX_RB_CCU_CNTL, 0, BIT(PIPE_BR) }, + { REG_A7XX_RB_CCU_DBG_ECO_CNTL, 0, BIT(PIPE_BR) }, + { REG_A8XX_RB_CCU_NC_MODE_CNTL, 0, BIT(PIPE_BR) }, + { REG_A8XX_RB_CMP_NC_MODE_CNTL, 0, BIT(PIPE_BR) }, + { REG_A8XX_RB_RESOLVE_PREFETCH_CNTL, 0, BIT(PIPE_BR) }, + { REG_A8XX_RB_CMP_DBG_ECO_CNTL, 0, BIT(PIPE_BR) }, + { REG_A8XX_RB_GC_GMEM_PROTECT, 0, BIT(PIPE_BR) }, + { REG_A6XX_RB_CONTEXT_SWITCH_GMEM_SAVE_RESTORE_ENABLE, 0, BIT(PIPE_BR) }, + { REG_A8XX_VPC_FLATSHADE_MODE_CNTL, 0, BIT(PIPE_BV) | BIT(PIPE_BR) }, + { REG_A8XX_PC_CHICKEN_BITS_1, 0, BIT(PIPE_BV) | BIT(PIPE_BR) }, + { REG_A8XX_PC_CHICKEN_BITS_2, 0, BIT(PIPE_BV) | BIT(PIPE_BR) }, + { REG_A8XX_PC_CHICKEN_BITS_3, 0, BIT(PIPE_BV) | BIT(PIPE_BR) }, + { REG_A8XX_PC_CHICKEN_BITS_4, 0, BIT(PIPE_BV) | BIT(PIPE_BR) }, + { REG_A6XX_PC_AUTO_VERTEX_STRIDE, 0, BIT(PIPE_BR) | BIT(PIPE_BV) }, + { REG_A8XX_PC_VIS_STREAM_CNTL, 0, BIT(PIPE_BR) | BIT(PIPE_BV) }, + { REG_A8XX_PC_CONTEXT_SWITCH_STABILIZE_CNTL_1, 0, BIT(PIPE_BR) | BIT(PIPE_BV) }, + { REG_A8XX_VFD_CB_BV_THRESHOLD, 0, BIT(PIPE_BV) | BIT(PIPE_BR) }, + { REG_A8XX_VFD_CB_BR_THRESHOLD, 0, BIT(PIPE_BV) | BIT(PIPE_BR) }, + { REG_A8XX_VFD_CB_BUSY_REQ_CNT, 0, BIT(PIPE_BV) | BIT(PIPE_BR) }, + { REG_A8XX_VFD_CB_LP_REQ_CNT, 0, BIT(PIPE_BV) | BIT(PIPE_BR) }, + { REG_A7XX_VFD_DBG_ECO_CNTL, 0, BIT(PIPE_BR) | BIT(PIPE_BV) }, +}; +DECLARE_ADRENO_REGLIST_PIPE_LIST(a810_dyn_pwrup_reglist); + static const struct adreno_reglist_pipe a840_nonctxt_regs[] = { { REG_A8XX_CP_SMMU_STREAM_ID_LPAC, 0x00000101, BIT(PIPE_NONE) }, { REG_A8XX_GRAS_DBG_ECO_CNTL, 0x00000800, BIT(PIPE_BV) | BIT(PIPE_BR) }, @@ -2193,6 +2448,48 @@ static const struct adreno_info a8xx_gpus[] = { { 252, 2 }, { 221, 3 }, ), + }, { + .chip_ids = ADRENO_CHIP_IDS(0x44010000), + .family = ADRENO_8XX_GEN1, + .fw = { + [ADRENO_FW_SQE] = "gen80300_sqe.fw", + [ADRENO_FW_GMU] = "gen80300_gmu.bin", + }, + .gmem = SZ_512K + SZ_64K, + .inactive_period = DRM_MSM_INACTIVE_PERIOD, + .quirks = ADRENO_QUIRK_HAS_CACHED_COHERENT | + ADRENO_QUIRK_HAS_HW_APRIV | + ADRENO_QUIRK_PREEMPTION | + ADRENO_QUIRK_IFPC, + .funcs = &a8xx_gpu_funcs, + .zapfw = "gen80300_zap.mbn", + .a6xx = &(const struct a6xx_info) { + .protect = &a810_protect, + .nonctxt_reglist = a810_nonctxt_regs, + .pwrup_reglist = &a810_pwrup_reglist, + .dyn_pwrup_reglist = &a810_dyn_pwrup_reglist, + .ifpc_reglist = &a810_ifpc_reglist, + .gbif_cx = a840_gbif, + .max_slices = 1, + .gmu_chipid = 0x8030000, + .bcms = (const struct a6xx_bcm[]) { + { .name = "SH0", .buswidth = 16 }, + { .name = "MC0", .buswidth = 4 }, + { + .name = "ACV", + .fixed = true, + .perfmode = BIT(2), + .perfmode_bw = 10687500, + }, + { /* sentinel */ }, + }, + }, + .preempt_record_size = 4558 * SZ_1K, + .speedbins = ADRENO_SPEEDBINS( + { 0, 0 }, + { 242, 1 }, + { 221, 2 }, + ), } }; @@ -2205,4 +2502,5 @@ static inline __always_unused void __build_asserts(void) BUILD_BUG_ON(a660_protect.count > a660_protect.count_max); BUILD_BUG_ON(a690_protect.count > a690_protect.count_max); BUILD_BUG_ON(a730_protect.count > a730_protect.count_max); + BUILD_BUG_ON(a810_protect.count > a810_protect.count_max); } diff --git a/drivers/gpu/drm/msm/adreno/a6xx_gmu.c b/drivers/gpu/drm/msm/adreno/a6xx_gmu.c index 1b44b9e21ad8..2e5d7b53a0c3 100644 --- a/drivers/gpu/drm/msm/adreno/a6xx_gmu.c +++ b/drivers/gpu/drm/msm/adreno/a6xx_gmu.c @@ -947,7 +947,7 @@ static int a6xx_gmu_fw_start(struct a6xx_gmu *gmu, unsigned int state) /* Turn on TCM (Tightly Coupled Memory) retention */ if (adreno_is_a7xx(adreno_gpu)) - a6xx_llc_write(a6xx_gpu, REG_A7XX_CX_MISC_TCM_RET_CNTL, 1); + a6xx_cx_misc_write(a6xx_gpu, REG_A7XX_CX_MISC_TCM_RET_CNTL, 1); else if (!adreno_is_a8xx(adreno_gpu)) gmu_write(gmu, REG_A6XX_GMU_GENERAL_7, 1); @@ -1215,7 +1215,7 @@ static int a6xx_gmu_secure_init(struct a6xx_gpu *a6xx_gpu) if (!qcom_scm_is_available()) { dev_warn_once(gpu->dev->dev, "SCM is not available, poking fuse register\n"); - a6xx_llc_write(a6xx_gpu, REG_A7XX_CX_MISC_SW_FUSE_VALUE, + a6xx_cx_misc_write(a6xx_gpu, REG_A7XX_CX_MISC_SW_FUSE_VALUE, A7XX_CX_MISC_SW_FUSE_VALUE_RAYTRACING | A7XX_CX_MISC_SW_FUSE_VALUE_FASTBLEND | A7XX_CX_MISC_SW_FUSE_VALUE_LPAC); @@ -1236,7 +1236,7 @@ static int a6xx_gmu_secure_init(struct a6xx_gpu *a6xx_gpu) * firmware, find out whether that's the case. The scm call * above sets the fuse register. */ - fuse_val = a6xx_llc_read(a6xx_gpu, + fuse_val = a6xx_cx_misc_read(a6xx_gpu, REG_A7XX_CX_MISC_SW_FUSE_VALUE); adreno_gpu->has_ray_tracing = !!(fuse_val & A7XX_CX_MISC_SW_FUSE_VALUE_RAYTRACING); @@ -1250,6 +1250,56 @@ done: return 0; } +static int a6xx_gmu_gxpd_get(struct a6xx_gmu *gmu) +{ + struct a6xx_gpu *a6xx_gpu = container_of(gmu, struct a6xx_gpu, gmu); + struct adreno_gpu *adreno_gpu = &a6xx_gpu->base; + + if (IS_ERR_OR_NULL(gmu->gxpd)) + return 0; + + /* + * On A8xx HW, GX GDSC is moved to a new clk controller block under GX + * power domain. The clock driver for this new block keeps the GX rail + * voted when gxpd is voted. So, use the gxpd only during gpu recovery. + */ + if (adreno_gpu->info->family >= ADRENO_8XX_GEN1) + return 0; + + /* + * On A6x/A7x, "enable" the GX power domain which won't actually do + * anything but it will make sure that the refcounting is correct in + * case we need to bring down the GX after a GMU failure + */ + return pm_runtime_get_sync(gmu->gxpd); +} + +static int a6xx_gmu_gxpd_put(struct a6xx_gmu *gmu) +{ + struct a6xx_gpu *a6xx_gpu = container_of(gmu, struct a6xx_gpu, gmu); + struct adreno_gpu *adreno_gpu = &a6xx_gpu->base; + + if (IS_ERR_OR_NULL(gmu->gxpd)) + return 0; + + if (adreno_gpu->info->family < ADRENO_8XX_GEN1) + return pm_runtime_put_sync(gmu->gxpd); + + /* + * On A8x, GX GDSC collapse should be triggered only when it is stuck ON + */ + if (adreno_gpu->funcs->gx_is_on(adreno_gpu)) { + pm_runtime_get_sync(gmu->gxpd); + /* + * Hint to gfxclkctl driver to do a hw collapse during the next + * RPM PUT. This is a special behavior in the gfxclkctl driver + */ + dev_pm_genpd_synced_poweroff(gmu->gxpd); + pm_runtime_put_sync(gmu->gxpd); + } + + return 0; +} int a6xx_gmu_resume(struct a6xx_gpu *a6xx_gpu) { @@ -1266,13 +1316,7 @@ int a6xx_gmu_resume(struct a6xx_gpu *a6xx_gpu) /* Turn on the resources */ pm_runtime_get_sync(gmu->dev); - /* - * "enable" the GX power domain which won't actually do anything but it - * will make sure that the refcounting is correct in case we need to - * bring down the GX after a GMU failure - */ - if (!IS_ERR_OR_NULL(gmu->gxpd)) - pm_runtime_get_sync(gmu->gxpd); + a6xx_gmu_gxpd_get(gmu); /* Use a known rate to bring up the GMU */ clk_set_rate(gmu->core_clk, 200000000); @@ -1299,7 +1343,7 @@ int a6xx_gmu_resume(struct a6xx_gpu *a6xx_gpu) /* Check to see if we are doing a cold or warm boot */ if (adreno_is_a7xx(adreno_gpu) || adreno_is_a8xx(adreno_gpu)) { - status = a6xx_llc_read(a6xx_gpu, REG_A7XX_CX_MISC_TCM_RET_CNTL) == 1 ? + status = a6xx_cx_misc_read(a6xx_gpu, REG_A7XX_CX_MISC_TCM_RET_CNTL) == 1 ? GMU_WARM_BOOT : GMU_COLD_BOOT; } else if (gmu->legacy) { status = gmu_read(gmu, REG_A6XX_GMU_GENERAL_7) == 1 ? @@ -1339,7 +1383,8 @@ disable_irq: disable_clk: clk_bulk_disable_unprepare(gmu->nr_clocks, gmu->clocks); rpm_put: - pm_runtime_put(gmu->gxpd); + a6xx_gmu_gxpd_put(gmu); + pm_runtime_put(gmu->dev); return ret; @@ -1455,8 +1500,7 @@ int a6xx_gmu_stop(struct a6xx_gpu *a6xx_gpu) * domain. Usually the GMU does this but only if the shutdown sequence * was successful */ - if (!IS_ERR_OR_NULL(gmu->gxpd)) - pm_runtime_put_sync(gmu->gxpd); + a6xx_gmu_gxpd_put(gmu); clk_bulk_disable_unprepare(gmu->nr_clocks, gmu->clocks); @@ -2034,12 +2078,12 @@ static int a6xx_gmu_get_irq(struct a6xx_gmu *gmu, struct platform_device *pdev, return irq; } -void a6xx_gmu_sysprof_setup(struct msm_gpu *gpu) +void a6xx_gmu_sysprof_setup(struct msm_gpu *gpu, bool force_on) { + bool sysprof = msm_gpu_sysprof_no_ifpc(gpu) || force_on; struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu); struct a6xx_gpu *a6xx_gpu = to_a6xx_gpu(adreno_gpu); struct a6xx_gmu *gmu = &a6xx_gpu->gmu; - unsigned int sysprof_active; /* Nothing to do if GPU is suspended. We will handle this during GMU resume */ if (!pm_runtime_get_if_active(&gpu->pdev->dev)) @@ -2047,15 +2091,13 @@ void a6xx_gmu_sysprof_setup(struct msm_gpu *gpu) mutex_lock(&gmu->lock); - sysprof_active = refcount_read(&gpu->sysprof_active); - /* * 'Perfcounter select' register values are lost during IFPC collapse. To avoid that, * use the currently unused perfcounter oob vote to block IFPC when sysprof is active */ - if ((sysprof_active > 1) && !test_and_set_bit(GMU_STATUS_OOB_PERF_SET, &gmu->status)) + if (sysprof && !test_and_set_bit(GMU_STATUS_OOB_PERF_SET, &gmu->status)) a6xx_gmu_set_oob(gmu, GMU_OOB_PERFCOUNTER_SET); - else if ((sysprof_active == 1) && test_and_clear_bit(GMU_STATUS_OOB_PERF_SET, &gmu->status)) + else if (!sysprof && test_and_clear_bit(GMU_STATUS_OOB_PERF_SET, &gmu->status)) a6xx_gmu_clear_oob(gmu, GMU_OOB_PERFCOUNTER_SET); mutex_unlock(&gmu->lock); @@ -2357,7 +2399,12 @@ int a6xx_gmu_init(struct a6xx_gpu *a6xx_gpu, struct device_node *node) goto err_mmio; } } else if (adreno_is_a8xx(adreno_gpu)) { - gmu->rscc = gmu->mmio + 0x19000; + /* + * On a8xx , RSCC lives at GPU base + 0x50000, which falls + * inside the GPU's kgsl_3d0_reg_memory range rather than the + * GMU's. + */ + gmu->rscc = gpu->mmio + 0x50000; } else { gmu->rscc = gmu->mmio + 0x23000; } diff --git a/drivers/gpu/drm/msm/adreno/a6xx_gpu.c b/drivers/gpu/drm/msm/adreno/a6xx_gpu.c index 7a3e3c2f5cf3..8b3bb2fd433b 100644 --- a/drivers/gpu/drm/msm/adreno/a6xx_gpu.c +++ b/drivers/gpu/drm/msm/adreno/a6xx_gpu.c @@ -5,6 +5,7 @@ #include "msm_gem.h" #include "msm_mmu.h" #include "msm_gpu_trace.h" +#include "msm_perfcntr.h" #include "a6xx_gpu.h" #include "a6xx_gmu.xml.h" @@ -188,6 +189,30 @@ void a6xx_flush(struct msm_gpu *gpu, struct msm_ringbuffer *ring) spin_unlock_irqrestore(&ring->preempt_lock, flags); } +void +a6xx_flush_yield(struct msm_gpu *gpu, struct msm_ringbuffer *ring) +{ + /* If preemption is enabled */ + if (gpu->nr_rings > 1) { + /* Yield the floor on command completion */ + OUT_PKT7(ring, CP_CONTEXT_SWITCH_YIELD, 4); + + /* + * If dword[2:1] are non zero, they specify an address for + * the CP to write the value of dword[3] to on preemption + * complete. Write 0 to skip the write + */ + OUT_RING(ring, 0x00); + OUT_RING(ring, 0x00); + /* Data value - not used if the address above is 0 */ + OUT_RING(ring, 0x01); + /* generate interrupt on preemption completion */ + OUT_RING(ring, 0x00); + } + + a6xx_flush(gpu, ring); +} + static void get_stats_counter(struct msm_ringbuffer *ring, u32 counter, u64 iova) { @@ -202,7 +227,7 @@ static void get_stats_counter(struct msm_ringbuffer *ring, u32 counter, static void a6xx_set_pagetable(struct a6xx_gpu *a6xx_gpu, struct msm_ringbuffer *ring, struct msm_gem_submit *submit) { - bool sysprof = refcount_read(&a6xx_gpu->base.base.sysprof_active) > 1; + bool sysprof = msm_gpu_sysprof_no_perfcntr_zap(&a6xx_gpu->base.base); struct msm_context *ctx = submit->queue->ctx; struct drm_gpuvm *vm = msm_context_vm(submit->dev, ctx); struct adreno_gpu *adreno_gpu = &a6xx_gpu->base; @@ -596,28 +621,9 @@ static void a7xx_submit(struct msm_gpu *gpu, struct msm_gem_submit *submit) OUT_PKT7(ring, CP_SET_MARKER, 1); OUT_RING(ring, 0x100); /* IFPC enable */ - /* If preemption is enabled */ - if (gpu->nr_rings > 1) { - /* Yield the floor on command completion */ - OUT_PKT7(ring, CP_CONTEXT_SWITCH_YIELD, 4); - - /* - * If dword[2:1] are non zero, they specify an address for - * the CP to write the value of dword[3] to on preemption - * complete. Write 0 to skip the write - */ - OUT_RING(ring, 0x00); - OUT_RING(ring, 0x00); - /* Data value - not used if the address above is 0 */ - OUT_RING(ring, 0x01); - /* generate interrupt on preemption completion */ - OUT_RING(ring, 0x00); - } - - trace_msm_gpu_submit_flush(submit, adreno_gpu->funcs->get_timestamp(gpu)); - a6xx_flush(gpu, ring); + a6xx_flush_yield(gpu, ring); /* Check to see if we need to start preemption */ if (adreno_is_a8xx(adreno_gpu)) @@ -733,82 +739,6 @@ static void a6xx_set_cp_protect(struct msm_gpu *gpu) gpu_write(gpu, REG_A6XX_CP_PROTECT(protect->count_max - 1), protect->regs[i]); } -static int a6xx_calc_ubwc_config(struct adreno_gpu *gpu) -{ - const struct qcom_ubwc_cfg_data *common_cfg; - struct qcom_ubwc_cfg_data *cfg = &gpu->_ubwc_config; - - /* Inherit the common config and make some necessary fixups */ - common_cfg = qcom_ubwc_config_get_data(); - if (IS_ERR(common_cfg)) - return PTR_ERR(common_cfg); - - /* Copy the data into the internal struct to drop the const qualifier (temporarily) */ - *cfg = *common_cfg; - - /* Use common config as is for A8x */ - if (!adreno_is_a8xx(gpu)) { - cfg->ubwc_swizzle = 0x6; - cfg->highest_bank_bit = 15; - } - - if (adreno_is_a610(gpu)) { - cfg->highest_bank_bit = 13; - cfg->ubwc_swizzle = 0x7; - } - - if (adreno_is_a612(gpu)) - cfg->highest_bank_bit = 14; - - if (adreno_is_a618(gpu)) - cfg->highest_bank_bit = 14; - - if (adreno_is_a619(gpu)) - /* TODO: Should be 14 but causes corruption at e.g. 1920x1200 on DP */ - cfg->highest_bank_bit = 13; - - if (adreno_is_a619_holi(gpu)) - cfg->highest_bank_bit = 13; - - if (adreno_is_a621(gpu)) - cfg->highest_bank_bit = 13; - - if (adreno_is_a623(gpu)) - cfg->highest_bank_bit = 16; - - if (adreno_is_a650(gpu) || - adreno_is_a660(gpu) || - adreno_is_a690(gpu) || - adreno_is_a730(gpu) || - adreno_is_a740_family(gpu)) { - /* TODO: get ddr type from bootloader and use 15 for LPDDR4 */ - cfg->highest_bank_bit = 16; - } - - if (adreno_is_a663(gpu)) { - cfg->highest_bank_bit = 13; - cfg->ubwc_swizzle = 0x4; - } - - if (adreno_is_7c3(gpu)) - cfg->highest_bank_bit = 14; - - if (adreno_is_a702(gpu)) - cfg->highest_bank_bit = 14; - - if (cfg->highest_bank_bit != common_cfg->highest_bank_bit) - DRM_WARN_ONCE("Inconclusive highest_bank_bit value: %u (GPU) vs %u (UBWC_CFG)\n", - cfg->highest_bank_bit, common_cfg->highest_bank_bit); - - if (cfg->ubwc_swizzle != common_cfg->ubwc_swizzle) - DRM_WARN_ONCE("Inconclusive ubwc_swizzle value: %u (GPU) vs %u (UBWC_CFG)\n", - cfg->ubwc_swizzle, common_cfg->ubwc_swizzle); - - gpu->ubwc_config = &gpu->_ubwc_config; - - return 0; -} - static void a6xx_set_ubwc_config(struct msm_gpu *gpu) { struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu); @@ -821,10 +751,10 @@ static void a6xx_set_ubwc_config(struct msm_gpu *gpu) BUG_ON(cfg->highest_bank_bit < 13); u32 hbb = cfg->highest_bank_bit - 13; bool rgb565_predicator = cfg->ubwc_enc_version >= UBWC_4_0; - u32 level2_swizzling_dis = !(cfg->ubwc_swizzle & UBWC_SWIZZLE_ENABLE_LVL2); + u32 level2_swizzling_dis = !(qcom_ubwc_swizzle(cfg) & UBWC_SWIZZLE_ENABLE_LVL2); bool ubwc_mode = qcom_ubwc_get_ubwc_mode(cfg); - bool amsbc = cfg->ubwc_enc_version >= UBWC_3_0; - bool min_acc_len_64b = false; + bool amsbc = qcom_ubwc_enable_amsbc(cfg); + bool min_acc_len_64b; u8 uavflagprd_inv = 0; u32 hbb_hi = hbb >> 2; u32 hbb_lo = hbb & 3; @@ -832,8 +762,7 @@ static void a6xx_set_ubwc_config(struct msm_gpu *gpu) if (adreno_is_a650_family(adreno_gpu) || adreno_is_a7xx(adreno_gpu)) uavflagprd_inv = 2; - if (adreno_is_a610(adreno_gpu) || adreno_is_a702(adreno_gpu)) - min_acc_len_64b = true; + min_acc_len_64b = qcom_ubwc_min_acc_length_64b(cfg); gpu_write(gpu, REG_A6XX_RB_NC_MODE_CNTL, level2_swizzling_dis << 12 | @@ -868,7 +797,7 @@ static void a6xx_set_ubwc_config(struct msm_gpu *gpu) min_acc_len_64b << 23 | hbb_lo << 21); gpu_write(gpu, REG_A6XX_RBBM_NC_MODE_CNTL, - cfg->macrotile_mode); + qcom_ubwc_macrotile_mode(cfg)); } static void a7xx_patch_pwrup_reglist(struct msm_gpu *gpu) @@ -940,6 +869,7 @@ static void a7xx_patch_pwrup_reglist(struct msm_gpu *gpu) A7XX_CP_APERTURE_CNTL_HOST_PIPE(PIPE_NONE)); } lock->dynamic_list_len = dyn_pwrup_reglist_count; + a6xx_gpu->dynamic_sel_reglist_offset = dyn_pwrup_reglist_count; } static int a7xx_preempt_start(struct msm_gpu *gpu) @@ -957,15 +887,7 @@ static int a7xx_preempt_start(struct msm_gpu *gpu) a6xx_emit_set_pseudo_reg(ring, a6xx_gpu, NULL); - /* Yield the floor on command completion */ - OUT_PKT7(ring, CP_CONTEXT_SWITCH_YIELD, 4); - OUT_RING(ring, 0x00); - OUT_RING(ring, 0x00); - OUT_RING(ring, 0x00); - /* Generate interrupt on preemption completion */ - OUT_RING(ring, 0x00); - - a6xx_flush(gpu, ring); + a6xx_flush_yield(gpu, ring); return a6xx_idle(gpu, ring) ? 0 : -EINVAL; } @@ -1185,7 +1107,7 @@ static int a6xx_ucode_load(struct msm_gpu *gpu) msm_gem_object_set_name(a6xx_gpu->shadow_bo, "shadow"); } - a6xx_gpu->pwrup_reglist_ptr = msm_gem_kernel_new(gpu->dev, PAGE_SIZE, + a6xx_gpu->pwrup_reglist_ptr = msm_gem_kernel_new(gpu->dev, PWRUP_REGLIST_SIZE, MSM_BO_WC | MSM_BO_MAP_PRIV, gpu->vm, &a6xx_gpu->pwrup_reglist_bo, &a6xx_gpu->pwrup_reglist_iova); @@ -1607,7 +1529,7 @@ out: a6xx_gmu_clear_oob(&a6xx_gpu->gmu, GMU_OOB_BOOT_SLUMBER); } - if (!ret && (refcount_read(&gpu->sysprof_active) > 1)) { + if (!ret && msm_gpu_sysprof_no_ifpc(gpu)) { ret = a6xx_gmu_set_oob(gmu, GMU_OOB_PERFCOUNTER_SET); if (!ret) set_bit(GMU_STATUS_OOB_PERF_SET, &gmu->status); @@ -2039,7 +1961,7 @@ static void a6xx_llc_activate(struct a6xx_gpu *a6xx_gpu) struct msm_gpu *gpu = &adreno_gpu->base; u32 cntl1_regval = 0; - if (IS_ERR(a6xx_gpu->llc_mmio)) + if (IS_ERR_OR_NULL(a6xx_gpu->llc_slice) && IS_ERR_OR_NULL(a6xx_gpu->htw_llc_slice)) return; if (!llcc_slice_activate(a6xx_gpu->llc_slice)) { @@ -2078,14 +2000,14 @@ static void a6xx_llc_activate(struct a6xx_gpu *a6xx_gpu) * pagetables */ if (!a6xx_gpu->have_mmu500) { - a6xx_llc_write(a6xx_gpu, + a6xx_cx_misc_write(a6xx_gpu, REG_A6XX_CX_MISC_SYSTEM_CACHE_CNTL_1, cntl1_regval); /* * Program cacheability overrides to not allocate cache * lines on a write miss */ - a6xx_llc_rmw(a6xx_gpu, + a6xx_cx_misc_rmw(a6xx_gpu, REG_A6XX_CX_MISC_SYSTEM_CACHE_CNTL_0, 0xF, 0x03); return; } @@ -2098,7 +2020,7 @@ static void a7xx_llc_activate(struct a6xx_gpu *a6xx_gpu) struct adreno_gpu *adreno_gpu = &a6xx_gpu->base; struct msm_gpu *gpu = &adreno_gpu->base; - if (IS_ERR(a6xx_gpu->llc_mmio)) + if (IS_ERR_OR_NULL(a6xx_gpu->llc_slice) && IS_ERR_OR_NULL(a6xx_gpu->htw_llc_slice)) return; if (!llcc_slice_activate(a6xx_gpu->llc_slice)) { @@ -2135,31 +2057,12 @@ static void a6xx_llc_slices_destroy(struct a6xx_gpu *a6xx_gpu) static void a6xx_llc_slices_init(struct platform_device *pdev, struct a6xx_gpu *a6xx_gpu, bool is_a7xx) { - struct device_node *phandle; - /* No LLCC on non-RPMh (and by extension, non-GMU) SoCs */ if (adreno_has_gmu_wrapper(&a6xx_gpu->base)) return; - /* - * There is a different programming path for A6xx targets with an - * mmu500 attached, so detect if that is the case - */ - phandle = of_parse_phandle(pdev->dev.of_node, "iommus", 0); - a6xx_gpu->have_mmu500 = (phandle && - of_device_is_compatible(phandle, "arm,mmu-500")); - of_node_put(phandle); - - if (is_a7xx || !a6xx_gpu->have_mmu500) - a6xx_gpu->llc_mmio = msm_ioremap(pdev, "cx_mem"); - else - a6xx_gpu->llc_mmio = NULL; - a6xx_gpu->llc_slice = llcc_slice_getd(LLCC_GPU); a6xx_gpu->htw_llc_slice = llcc_slice_getd(LLCC_GPUHTW); - - if (IS_ERR_OR_NULL(a6xx_gpu->llc_slice) && IS_ERR_OR_NULL(a6xx_gpu->htw_llc_slice)) - a6xx_gpu->llc_mmio = ERR_PTR(-EINVAL); } #define GBIF_CLIENT_HALT_MASK BIT(0) @@ -2537,6 +2440,142 @@ static bool a6xx_progress(struct msm_gpu *gpu, struct msm_ringbuffer *ring) return progress; } +static void +perfcntr_select(struct msm_ringbuffer *ring, enum adreno_pipe pipe, + uint32_t regidx, uint32_t *countables, uint32_t nr, + uint32_t **reglist) +{ + OUT_PKT4(ring, regidx, nr); + for (unsigned i = 0; i < nr; i++) + OUT_RING(ring, countables[i]); + + if (!*reglist) + return; + + for (unsigned i = 0; i < nr; i++) { + /* + * Bitfield is in same position on a7xx, but only 2 bits.. + * which is sufficient for NONE/BR/BV: + */ + *(*reglist)++ = A8XX_CP_APERTURE_CNTL_HOST_PIPEID(pipe); + *(*reglist)++ = regidx + i; + *(*reglist)++ = countables[i]; + } +} + +static void +a6xx_perfcntr_configure(struct msm_gpu *gpu, struct msm_ringbuffer *ring, + const struct msm_perfcntr_stream *stream) +{ + struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu); + struct a6xx_gpu *a6xx_gpu = to_a6xx_gpu(adreno_gpu); + enum adreno_pipe pipe = PIPE_NONE; + uint32_t *reglist = NULL; + uint32_t *reglist_sel_start; + + if (to_adreno_gpu(gpu)->info->family >= ADRENO_7XX_GEN1) { + WARN_ON(!a6xx_gpu->pwrup_reglist_emitted); + + struct cpu_gpu_lock *lock = a6xx_gpu->pwrup_reglist_ptr; + int off = (2 * lock->ifpc_list_len) + + (2 * lock->preemption_list_len) + + (3 * a6xx_gpu->dynamic_sel_reglist_offset); + + reglist = (uint32_t *)&lock->regs[0]; + reglist += off; + reglist_sel_start = reglist; + + /* Clear any previously configured SEL reg entries: */ + lock->dynamic_list_len = a6xx_gpu->dynamic_sel_reglist_offset; + + /* + * Ensure CP sees the dynamic_list_len update before we + * start modifying the SEL entries: + */ + dma_wmb(); + } + + for (unsigned i = 0; i < stream->nr_groups; i++) { + unsigned group_idx = msm_perfcntr_group_idx(stream, i); + unsigned base = msm_perfcntr_counter_base(stream, group_idx); + + const struct msm_perfcntr_group *group = + &gpu->perfcntr_groups[group_idx]; + + struct msm_perfcntr_group_state *group_state = + gpu->perfcntrs->groups[group_idx]; + + if (group->pipe != pipe) { + pipe = group->pipe; + + OUT_PKT7(ring, CP_THREAD_CONTROL, 1); + + if (pipe == PIPE_BR) { + OUT_RING(ring, CP_SET_THREAD_BR); + } else if (pipe == PIPE_BV) { + OUT_RING(ring, CP_SET_THREAD_BV); + } else { + OUT_RING(ring, CP_SET_THREAD_BOTH); + } + } + + const struct msm_perfcntr_counter *counter = &group->counters[base]; + unsigned nr = group_state->allocated_counters; + perfcntr_select(ring, pipe, counter->select_reg, + group_state->countables, nr, ®list); + + for (unsigned s = 0; s < ARRAY_SIZE(counter->slice_select_regs); s++) { + if (!counter->slice_select_regs[s]) + break; + + perfcntr_select(ring, pipe, counter->slice_select_regs[s], + group_state->countables, nr, ®list); + } + } + + if (pipe != PIPE_NONE) { + OUT_PKT7(ring, CP_THREAD_CONTROL, 1); + OUT_RING(ring, CP_SET_THREAD_BOTH); + } + + OUT_PKT7(ring, CP_MEM_WRITE, 3); + OUT_RING(ring, lower_32_bits(rbmemptr(ring, perfcntr_fence))); + OUT_RING(ring, upper_32_bits(rbmemptr(ring, perfcntr_fence))); + OUT_RING(ring, stream->sel_fence); + + /* + * Update the pwrup reglist size before flushing. Kgsl does a shared- + * memory spinlock dance with SQE to avoid racing with IFPC exit. But + * we can skip that since the ringbuffer programming will be executed + * by SQE after dynamic reglist size is updated. So even if we lose + * the race, the register programming in the rb will overwrite/correct + * the SEL regs restored by SQE on IFPC exit, before sampling begins. + */ + if (reglist) { + struct cpu_gpu_lock *lock = a6xx_gpu->pwrup_reglist_ptr; + unsigned nr_regs = (reglist - reglist_sel_start) / 3; + + /* + * Ensure CP sees updates to the pwrup_reglist before it + * sees the new (increased) length: + */ + dma_wmb(); + + /* Update dynamic reglist len to include new SEL reg programming: */ + lock->dynamic_list_len = a6xx_gpu->dynamic_sel_reglist_offset + nr_regs; + + WARN_ON_ONCE(reglist > (uint32_t *)((uint8_t *)lock + PWRUP_REGLIST_SIZE)); + } + + a6xx_flush_yield(gpu, ring); + + /* Check to see if we need to start preemption */ + if (adreno_is_a8xx(to_adreno_gpu(gpu))) + a8xx_preempt_trigger(gpu); + else + a6xx_preempt_trigger(gpu); +} + static u32 fuse_to_supp_hw(const struct adreno_info *info, u32 fuse) { if (!info->speedbins) @@ -2560,7 +2599,7 @@ static int a6xx_read_speedbin(struct device *dev, struct a6xx_gpu *a6xx_gpu, return ret; if (info->quirks & ADRENO_QUIRK_SOFTFUSE) { - *speedbin = a6xx_llc_read(a6xx_gpu, REG_A8XX_CX_MISC_SW_FUSE_FREQ_LIMIT_STATUS); + *speedbin = a6xx_cx_misc_read(a6xx_gpu, REG_A8XX_CX_MISC_SW_FUSE_FREQ_LIMIT_STATUS); *speedbin = A8XX_CX_MISC_SW_FUSE_FREQ_LIMIT_STATUS_FINALFREQLIMIT(*speedbin); return 0; } @@ -2621,6 +2660,7 @@ static struct msm_gpu *a6xx_gpu_init(struct drm_device *dev) struct platform_device *pdev = priv->gpu_pdev; struct adreno_platform_config *config = pdev->dev.platform_data; const struct adreno_info *info = config->info; + struct device_node *phandle; struct a6xx_gpu *a6xx_gpu; struct adreno_gpu *adreno_gpu; struct msm_gpu *gpu; @@ -2636,6 +2676,20 @@ static struct msm_gpu *a6xx_gpu_init(struct drm_device *dev) adreno_gpu = &a6xx_gpu->base; gpu = &adreno_gpu->base; + if ((ADRENO_6XX_GEN1 <= config->info->family) && + (config->info->family <= ADRENO_6XX_GEN4)) { + gpu->perfcntr_groups = a6xx_perfcntr_groups; + gpu->num_perfcntr_groups = a6xx_num_perfcntr_groups; + } else if ((ADRENO_7XX_GEN1 <= config->info->family) && + (config->info->family <= ADRENO_7XX_GEN3)) { + gpu->perfcntr_groups = a7xx_perfcntr_groups; + gpu->num_perfcntr_groups = a7xx_num_perfcntr_groups; + } else if ((ADRENO_8XX_GEN1 <= config->info->family) && + (config->info->family <= ADRENO_8XX_GEN2)) { + gpu->perfcntr_groups = a8xx_perfcntr_groups; + gpu->num_perfcntr_groups = a8xx_num_perfcntr_groups; + } + mutex_init(&a6xx_gpu->gmu.lock); spin_lock_init(&a6xx_gpu->aperture_lock); @@ -2657,6 +2711,20 @@ static struct msm_gpu *a6xx_gpu_init(struct drm_device *dev) a6xx_llc_slices_init(pdev, a6xx_gpu, is_a7xx); + /* + * There is a different programming path for A6xx targets with an + * mmu500 attached, so detect if that is the case + */ + phandle = of_parse_phandle(pdev->dev.of_node, "iommus", 0); + a6xx_gpu->have_mmu500 = (phandle && + of_device_is_compatible(phandle, "arm,mmu-500")); + of_node_put(phandle); + + if (is_a7xx || !a6xx_gpu->have_mmu500) + a6xx_gpu->cx_misc_mmio = msm_ioremap(pdev, "cx_mem"); + else + a6xx_gpu->cx_misc_mmio = NULL; + ret = a6xx_set_supported_hw(&pdev->dev, a6xx_gpu, info); if (ret) { a6xx_llc_slices_destroy(a6xx_gpu); @@ -2700,10 +2768,10 @@ static struct msm_gpu *a6xx_gpu_init(struct drm_device *dev) msm_mmu_set_fault_handler(to_msm_vm(gpu->vm)->mmu, gpu, adreno_gpu->funcs->mmu_fault_handler); - ret = a6xx_calc_ubwc_config(adreno_gpu); - if (ret) { + adreno_gpu->ubwc_config = qcom_ubwc_config_get_data(); + if (IS_ERR(adreno_gpu->ubwc_config)) { a6xx_destroy(&(a6xx_gpu->base.base)); - return ERR_PTR(ret); + return ERR_CAST(adreno_gpu->ubwc_config); } /* Set up the preemption specific bits and pieces for each ringbuffer */ @@ -2740,6 +2808,7 @@ const struct adreno_gpu_funcs a6xx_gpu_funcs = { .get_rptr = a6xx_get_rptr, .progress = a6xx_progress, .sysprof_setup = a6xx_gmu_sysprof_setup, + .perfcntr_configure = a6xx_perfcntr_configure, }, .init = a6xx_gpu_init, .get_timestamp = a6xx_gmu_get_timestamp, @@ -2773,6 +2842,7 @@ const struct adreno_gpu_funcs a6xx_gmuwrapper_funcs = { .create_private_vm = a6xx_create_private_vm, .get_rptr = a6xx_get_rptr, .progress = a6xx_progress, + .perfcntr_configure = a6xx_perfcntr_configure, }, .init = a6xx_gpu_init, .get_timestamp = a6xx_get_timestamp, @@ -2809,6 +2879,7 @@ const struct adreno_gpu_funcs a7xx_gpu_funcs = { .get_rptr = a6xx_get_rptr, .progress = a6xx_progress, .sysprof_setup = a6xx_gmu_sysprof_setup, + .perfcntr_configure = a6xx_perfcntr_configure, }, .init = a6xx_gpu_init, .get_timestamp = a6xx_gmu_get_timestamp, @@ -2838,6 +2909,9 @@ const struct adreno_gpu_funcs a8xx_gpu_funcs = { .create_private_vm = a6xx_create_private_vm, .get_rptr = a6xx_get_rptr, .progress = a8xx_progress, + .sysprof_setup = a6xx_gmu_sysprof_setup, + .perfcntr_configure = a6xx_perfcntr_configure, + .perfcntr_flush = a8xx_perfcntr_flush, }, .init = a6xx_gpu_init, .get_timestamp = a8xx_gmu_get_timestamp, diff --git a/drivers/gpu/drm/msm/adreno/a6xx_gpu.h b/drivers/gpu/drm/msm/adreno/a6xx_gpu.h index eb431e5e00b1..b50c57f427b4 100644 --- a/drivers/gpu/drm/msm/adreno/a6xx_gpu.h +++ b/drivers/gpu/drm/msm/adreno/a6xx_gpu.h @@ -21,17 +21,19 @@ struct cpu_gpu_lock { uint32_t cpu_req; uint32_t turn; union { + /* a6xx: */ struct { uint16_t list_length; uint16_t list_offset; }; + /* a7xx+: */ struct { uint8_t ifpc_list_len; uint8_t preemption_list_len; uint16_t dynamic_list_len; }; }; - uint64_t regs[62]; + uint64_t regs[]; }; /** @@ -96,13 +98,21 @@ struct a6xx_gpu { uint32_t *shadow; struct drm_gem_object *pwrup_reglist_bo; +#define PWRUP_REGLIST_SIZE (2 * PAGE_SIZE) void *pwrup_reglist_ptr; uint64_t pwrup_reglist_iova; bool pwrup_reglist_emitted; + /* + * Offset of start of SEL regs appended to pwrup_reglist. This + * is equal to lock->dynamic_list_len if no SEL regs are appended + * to the end of the dynamic reglist. + */ + uint16_t dynamic_sel_reglist_offset; + bool has_whereami; - void __iomem *llc_mmio; + void __iomem *cx_misc_mmio; void *llc_slice; void *htw_llc_slice; bool have_mmu500; @@ -240,19 +250,19 @@ static inline bool a6xx_has_gbif(struct adreno_gpu *gpu) return true; } -static inline void a6xx_llc_rmw(struct a6xx_gpu *a6xx_gpu, u32 reg, u32 mask, u32 or) +static inline void a6xx_cx_misc_rmw(struct a6xx_gpu *a6xx_gpu, u32 reg, u32 mask, u32 or) { - return msm_rmw(a6xx_gpu->llc_mmio + (reg << 2), mask, or); + return msm_rmw(a6xx_gpu->cx_misc_mmio + (reg << 2), mask, or); } -static inline u32 a6xx_llc_read(struct a6xx_gpu *a6xx_gpu, u32 reg) +static inline u32 a6xx_cx_misc_read(struct a6xx_gpu *a6xx_gpu, u32 reg) { - return readl(a6xx_gpu->llc_mmio + (reg << 2)); + return readl(a6xx_gpu->cx_misc_mmio + (reg << 2)); } -static inline void a6xx_llc_write(struct a6xx_gpu *a6xx_gpu, u32 reg, u32 value) +static inline void a6xx_cx_misc_write(struct a6xx_gpu *a6xx_gpu, u32 reg, u32 value) { - writel(value, a6xx_gpu->llc_mmio + (reg << 2)); + writel(value, a6xx_gpu->cx_misc_mmio + (reg << 2)); } #define shadowptr(_a6xx_gpu, _ring) ((_a6xx_gpu)->shadow_iova + \ @@ -271,7 +281,7 @@ void a6xx_gmu_clear_oob(struct a6xx_gmu *gmu, enum a6xx_gmu_oob_state state); int a6xx_gmu_init(struct a6xx_gpu *a6xx_gpu, struct device_node *node); int a6xx_gmu_wrapper_init(struct a6xx_gpu *a6xx_gpu, struct device_node *node); void a6xx_gmu_remove(struct a6xx_gpu *a6xx_gpu); -void a6xx_gmu_sysprof_setup(struct msm_gpu *gpu); +void a6xx_gmu_sysprof_setup(struct msm_gpu *gpu, bool force_on); void a6xx_preempt_init(struct msm_gpu *gpu); void a6xx_preempt_hw_init(struct msm_gpu *gpu); @@ -317,6 +327,7 @@ void a6xx_bus_clear_pending_transactions(struct adreno_gpu *adreno_gpu, bool gx_ void a6xx_gpu_sw_reset(struct msm_gpu *gpu, bool assert); int a6xx_fenced_write(struct a6xx_gpu *gpu, u32 offset, u64 value, u32 mask, bool is_64b); void a6xx_flush(struct msm_gpu *gpu, struct msm_ringbuffer *ring); +void a6xx_flush_yield(struct msm_gpu *gpu, struct msm_ringbuffer *ring); int a6xx_zap_shader_init(struct msm_gpu *gpu); void a8xx_bus_clear_pending_transactions(struct adreno_gpu *adreno_gpu, bool gx_off); @@ -333,5 +344,6 @@ void a8xx_preempt_hw_init(struct msm_gpu *gpu); void a8xx_preempt_trigger(struct msm_gpu *gpu); void a8xx_preempt_irq(struct msm_gpu *gpu); bool a8xx_progress(struct msm_gpu *gpu, struct msm_ringbuffer *ring); +void a8xx_perfcntr_flush(struct msm_gpu *gpu); void a8xx_recover(struct msm_gpu *gpu); #endif /* __A6XX_GPU_H__ */ diff --git a/drivers/gpu/drm/msm/adreno/a6xx_preempt.c b/drivers/gpu/drm/msm/adreno/a6xx_preempt.c index df4cbf42e9a4..1e599d4ddea1 100644 --- a/drivers/gpu/drm/msm/adreno/a6xx_preempt.c +++ b/drivers/gpu/drm/msm/adreno/a6xx_preempt.c @@ -261,7 +261,7 @@ void a6xx_preempt_trigger(struct msm_gpu *gpu) mod_timer(&a6xx_gpu->preempt_timer, jiffies + msecs_to_jiffies(10000)); /* Enable or disable postamble as needed */ - sysprof = refcount_read(&a6xx_gpu->base.base.sysprof_active) > 1; + sysprof = msm_gpu_sysprof_no_perfcntr_zap(gpu); if (!sysprof && !a6xx_gpu->postamble_enabled) preempt_prepare_postamble(a6xx_gpu); diff --git a/drivers/gpu/drm/msm/adreno/a8xx_gpu.c b/drivers/gpu/drm/msm/adreno/a8xx_gpu.c index ccfccc45133f..9e44fd1ae634 100644 --- a/drivers/gpu/drm/msm/adreno/a8xx_gpu.c +++ b/drivers/gpu/drm/msm/adreno/a8xx_gpu.c @@ -104,7 +104,7 @@ void a8xx_gpu_get_slice_info(struct msm_gpu *gpu) return; } - slice_mask &= a6xx_llc_read(a6xx_gpu, + slice_mask &= a6xx_cx_misc_read(a6xx_gpu, REG_A8XX_CX_MISC_SLICE_ENABLE_FINAL); a6xx_gpu->slice_mask = slice_mask; @@ -265,8 +265,8 @@ static void a8xx_set_cp_protect(struct msm_gpu *gpu) * Last span feature is only supported on PIPE specific register. * So update those here */ - a8xx_write_pipe(gpu, PIPE_BR, REG_A8XX_CP_PROTECT_PIPE(protect->count_max), final_cfg); - a8xx_write_pipe(gpu, PIPE_BV, REG_A8XX_CP_PROTECT_PIPE(protect->count_max), final_cfg); + a8xx_write_pipe(gpu, PIPE_BR, REG_A8XX_CP_PROTECT_PIPE(15), final_cfg); + a8xx_write_pipe(gpu, PIPE_BV, REG_A8XX_CP_PROTECT_PIPE(15), final_cfg); a8xx_aperture_clear(gpu); } @@ -275,41 +275,37 @@ static void a8xx_set_ubwc_config(struct msm_gpu *gpu) { struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu); const struct qcom_ubwc_cfg_data *cfg = adreno_gpu->ubwc_config; - u32 level2_swizzling_dis = !(cfg->ubwc_swizzle & UBWC_SWIZZLE_ENABLE_LVL2); - u32 level3_swizzling_dis = !(cfg->ubwc_swizzle & UBWC_SWIZZLE_ENABLE_LVL3); + u32 level2_swizzling_dis = !(qcom_ubwc_swizzle(cfg) & UBWC_SWIZZLE_ENABLE_LVL2); + u32 level3_swizzling_dis = !(qcom_ubwc_swizzle(cfg) & UBWC_SWIZZLE_ENABLE_LVL3); bool rgba8888_lossless = false, fp16compoptdis = false; bool yuvnotcomptofc = false, min_acc_len_64b = false; - bool rgb565_predicator = false, amsbc = false; + bool rgb565_predicator = false; + bool amsbc = qcom_ubwc_enable_amsbc(cfg); bool ubwc_mode = qcom_ubwc_get_ubwc_mode(cfg); u32 ubwc_version = cfg->ubwc_enc_version; - u32 hbb, hbb_hi, hbb_lo, mode = 1; + u32 hbb, hbb_hi, hbb_lo, mode; u8 uavflagprd_inv = 2; - switch (ubwc_version) { - case UBWC_6_0: + if (ubwc_version > UBWC_6_0) + dev_err(&gpu->pdev->dev, "Unknown UBWC version: 0x%x\n", ubwc_version); + + if (ubwc_version == UBWC_6_0) yuvnotcomptofc = true; - mode = 5; - break; - case UBWC_5_0: - amsbc = true; - rgb565_predicator = true; - mode = 4; - break; - case UBWC_4_0: - amsbc = true; - rgb565_predicator = true; - fp16compoptdis = true; + + if (ubwc_version < UBWC_5_0 && + ubwc_version >= UBWC_4_0) rgba8888_lossless = true; - mode = 2; - break; - case UBWC_3_0: - amsbc = true; - mode = 1; - break; - default: - dev_err(&gpu->pdev->dev, "Unknown UBWC version: 0x%x\n", ubwc_version); - break; - } + + if (ubwc_version < UBWC_4_3) + fp16compoptdis = true; + + if (cfg->ubwc_enc_version >= UBWC_4_0) + rgb565_predicator = true; + + if (ubwc_version < UBWC_3_0) + dev_err(&gpu->pdev->dev, "Unsupported UBWC version: 0x%x\n", ubwc_version); + + mode = qcom_ubwc_version_tag(cfg); /* * We subtract 13 from the highest bank bit (13 is the minimum value @@ -468,6 +464,7 @@ static void a8xx_patch_pwrup_reglist(struct msm_gpu *gpu) } lock->dynamic_list_len = dyn_pwrup_reglist_count; + a6xx_gpu->dynamic_sel_reglist_offset = dyn_pwrup_reglist_count; done: a8xx_aperture_clear(gpu); @@ -488,15 +485,7 @@ static int a8xx_preempt_start(struct msm_gpu *gpu) a6xx_emit_set_pseudo_reg(ring, a6xx_gpu, NULL); - /* Yield the floor on command completion */ - OUT_PKT7(ring, CP_CONTEXT_SWITCH_YIELD, 4); - OUT_RING(ring, 0x00); - OUT_RING(ring, 0x00); - OUT_RING(ring, 0x00); - /* Generate interrupt on preemption completion */ - OUT_RING(ring, 0x00); - - a6xx_flush(gpu, ring); + a6xx_flush_yield(gpu, ring); return a8xx_idle(gpu, ring) ? 0 : -EINVAL; } @@ -849,7 +838,7 @@ out: */ a6xx_gmu_clear_oob(&a6xx_gpu->gmu, GMU_OOB_GPU_SET); - if (!ret && (refcount_read(&gpu->sysprof_active) > 1)) { + if (!ret && msm_gpu_sysprof_no_perfcntr_zap(gpu)) { ret = a6xx_gmu_set_oob(gmu, GMU_OOB_PERFCOUNTER_SET); if (!ret) set_bit(GMU_STATUS_OOB_PERF_SET, &gmu->status); @@ -886,17 +875,22 @@ void a8xx_recover(struct msm_gpu *gpu) adreno_dump_info(gpu); - if (hang_debug) - a8xx_dump(gpu); - /* * To handle recovery specific sequences during the rpm suspend we are * about to trigger */ a6xx_gpu->hung = true; - /* Halt SQE first */ - gpu_write(gpu, REG_A8XX_CP_SQE_CNTL, 3); + if (adreno_gpu->funcs->gx_is_on(adreno_gpu)) { + /* + * Sometimes crashstate capture is skipped, so SQE should be + * halted here again + */ + gpu_write(gpu, REG_A8XX_CP_SQE_CNTL, 3); + + if (hang_debug) + a8xx_dump(gpu); + } pm_runtime_dont_use_autosuspend(&gpu->pdev->dev); @@ -1353,3 +1347,23 @@ bool a8xx_progress(struct msm_gpu *gpu, struct msm_ringbuffer *ring) { return true; } + +void a8xx_perfcntr_flush(struct msm_gpu *gpu) +{ + u32 val; + + /* + * Flush delta counters (both perf counters and pipe stats) present in + * RBBM_S and RBBM_US to perf RAM logic to get the latest data. + */ + gpu_write(gpu, REG_A8XX_RBBM_PERFCTR_FLUSH_HOST_CMD, BIT(0)); + gpu_write(gpu, REG_A8XX_RBBM_SLICE_PERFCTR_FLUSH_HOST_CMD, BIT(0)); + + /* Ensure all writes are posted before polling status register */ + wmb(); + + if (gpu_poll_timeout(gpu, REG_A8XX_RBBM_PERFCTR_FLUSH_HOST_STATUS, val, + val & BIT(0), 100, 100 * 1000)) { + dev_err(&gpu->pdev->dev, "Perfcounter flush timed out: status=0x%08x\n", val); + } +} diff --git a/drivers/gpu/drm/msm/adreno/a8xx_preempt.c b/drivers/gpu/drm/msm/adreno/a8xx_preempt.c index 3d8c33ba722e..6cb53a071801 100644 --- a/drivers/gpu/drm/msm/adreno/a8xx_preempt.c +++ b/drivers/gpu/drm/msm/adreno/a8xx_preempt.c @@ -242,7 +242,7 @@ void a8xx_preempt_trigger(struct msm_gpu *gpu) mod_timer(&a6xx_gpu->preempt_timer, jiffies + msecs_to_jiffies(10000)); /* Enable or disable postamble as needed */ - sysprof = refcount_read(&a6xx_gpu->base.base.sysprof_active) > 1; + sysprof = msm_gpu_sysprof_no_perfcntr_zap(gpu); if (!sysprof && !a6xx_gpu->postamble_enabled) preempt_prepare_postamble(a6xx_gpu); diff --git a/drivers/gpu/drm/msm/adreno/adreno_device.c b/drivers/gpu/drm/msm/adreno/adreno_device.c index fc38331ce640..7f20320ef66a 100644 --- a/drivers/gpu/drm/msm/adreno/adreno_device.c +++ b/drivers/gpu/drm/msm/adreno/adreno_device.c @@ -307,8 +307,10 @@ MODULE_DEVICE_TABLE(of, dt_match); static int adreno_runtime_resume(struct device *dev) { struct msm_gpu *gpu = dev_to_gpu(dev); - - return gpu->funcs->pm_resume(gpu); + int ret = gpu->funcs->pm_resume(gpu); + if (!ret) + ret = msm_perfcntr_resume(gpu); + return ret; } static int adreno_runtime_suspend(struct device *dev) @@ -322,6 +324,8 @@ static int adreno_runtime_suspend(struct device *dev) */ WARN_ON_ONCE(gpu->active_submits); + msm_perfcntr_suspend(gpu); + return gpu->funcs->pm_suspend(gpu); } diff --git a/drivers/gpu/drm/msm/adreno/adreno_gpu.c b/drivers/gpu/drm/msm/adreno/adreno_gpu.c index 03f96a1154e1..c62c45bb0ddb 100644 --- a/drivers/gpu/drm/msm/adreno/adreno_gpu.c +++ b/drivers/gpu/drm/msm/adreno/adreno_gpu.c @@ -434,12 +434,12 @@ int adreno_get_param(struct msm_gpu *gpu, struct msm_context *ctx, case MSM_PARAM_UBWC_SWIZZLE: if (!adreno_gpu->ubwc_config) return UERR(ENOENT, drm, "no UBWC on this platform"); - *value = adreno_gpu->ubwc_config->ubwc_swizzle; + *value = qcom_ubwc_swizzle(adreno_gpu->ubwc_config); return 0; case MSM_PARAM_MACROTILE_MODE: if (!adreno_gpu->ubwc_config) return UERR(ENOENT, drm, "no UBWC on this platform"); - *value = adreno_gpu->ubwc_config->macrotile_mode; + *value = qcom_ubwc_macrotile_mode(adreno_gpu->ubwc_config); return 0; case MSM_PARAM_UCHE_TRAP_BASE: *value = adreno_gpu->uche_trap_base; @@ -500,7 +500,7 @@ int adreno_set_param(struct msm_gpu *gpu, struct msm_context *ctx, return 0; } case MSM_PARAM_SYSPROF: - if (!capable(CAP_SYS_ADMIN)) + if (!perfmon_capable()) return UERR(EPERM, drm, "invalid permissions"); return msm_context_set_sysprof(ctx, gpu, value); case MSM_PARAM_EN_VM_BIND: @@ -708,11 +708,10 @@ void adreno_recover(struct msm_gpu *gpu) struct drm_device *dev = gpu->dev; int ret; - // XXX pm-runtime?? we *need* the device to be off after this - // so maybe continuing to call ->pm_suspend/resume() is better? - + msm_perfcntr_suspend(gpu); gpu->funcs->pm_suspend(gpu); gpu->funcs->pm_resume(gpu); + msm_perfcntr_resume(gpu); ret = msm_gpu_hw_init(gpu); if (ret) { diff --git a/drivers/gpu/drm/msm/adreno/adreno_gpu.h b/drivers/gpu/drm/msm/adreno/adreno_gpu.h index ec643b84646b..1f201322cb6e 100644 --- a/drivers/gpu/drm/msm/adreno/adreno_gpu.h +++ b/drivers/gpu/drm/msm/adreno/adreno_gpu.h @@ -236,12 +236,7 @@ struct adreno_gpu { /* firmware: */ const struct firmware *fw[ADRENO_FW_MAX]; - /* - * The migration to the central UBWC config db is still in flight - keep - * a copy containing some local fixups until that's done. - */ const struct qcom_ubwc_cfg_data *ubwc_config; - struct qcom_ubwc_cfg_data _ubwc_config; /* * Register offsets are different between some GPUs. @@ -592,6 +587,11 @@ static inline int adreno_is_a8xx(struct adreno_gpu *gpu) return gpu->info->family >= ADRENO_8XX_GEN1; } +static inline int adreno_is_a810(struct adreno_gpu *gpu) +{ + return gpu->info->chip_ids[0] == 0x44010000; +} + static inline int adreno_is_x285(struct adreno_gpu *gpu) { return gpu->info->chip_ids[0] == 0x44070001; diff --git a/drivers/gpu/drm/msm/disp/dpu1/catalog/dpu_10_2_milos.h b/drivers/gpu/drm/msm/disp/dpu1/catalog/dpu_10_2_milos.h new file mode 100644 index 000000000000..1aa8aea4e352 --- /dev/null +++ b/drivers/gpu/drm/msm/disp/dpu1/catalog/dpu_10_2_milos.h @@ -0,0 +1,279 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ +/* + * Copyright (c) 2022. Qualcomm Innovation Center, Inc. All rights reserved. + * Copyright (c) 2015-2018, 2020 The Linux Foundation. All rights reserved. + * Copyright (c) 2026, Luca Weiss <luca.weiss@fairphone.com> + */ + +#ifndef _DPU_10_2_MILOS_H +#define _DPU_10_2_MILOS_H + +static const struct dpu_caps milos_dpu_caps = { + .max_mixer_width = DEFAULT_DPU_OUTPUT_LINE_WIDTH, + .max_mixer_blendstages = 0x7, + .has_src_split = true, + .has_dim_layer = true, + .has_idle_pc = true, + .has_3d_merge = true, + .max_linewidth = 8192, + .pixel_ram_size = DEFAULT_PIXEL_RAM_SIZE, +}; + +static const struct dpu_mdp_cfg milos_mdp = { + .name = "top_0", + .base = 0, .len = 0x494, + .clk_ctrls = { + [DPU_CLK_CTRL_REG_DMA] = { .reg_off = 0x2bc, .bit_off = 20 }, + }, +}; + +static const struct dpu_ctl_cfg milos_ctl[] = { + { + .name = "ctl_0", .id = CTL_0, + .base = 0x15000, .len = 0x1000, + .intr_start = DPU_IRQ_IDX(MDP_SSPP_TOP0_INTR2, 9), + }, { + .name = "ctl_1", .id = CTL_1, + .base = 0x16000, .len = 0x1000, + .intr_start = DPU_IRQ_IDX(MDP_SSPP_TOP0_INTR2, 10), + }, { + .name = "ctl_2", .id = CTL_2, + .base = 0x17000, .len = 0x1000, + .intr_start = DPU_IRQ_IDX(MDP_SSPP_TOP0_INTR2, 11), + }, { + .name = "ctl_3", .id = CTL_3, + .base = 0x18000, .len = 0x1000, + .intr_start = DPU_IRQ_IDX(MDP_SSPP_TOP0_INTR2, 12), + }, +}; + +static const struct dpu_sspp_cfg milos_sspp[] = { + { + .name = "sspp_0", .id = SSPP_VIG0, + .base = 0x4000, .len = 0x344, + .features = VIG_SDM845_MASK_SDMA, + .sblk = &dpu_vig_sblk_qseed3_3_3, + .xin_id = 0, + .type = SSPP_TYPE_VIG, + }, { + .name = "sspp_8", .id = SSPP_DMA0, + .base = 0x24000, .len = 0x344, + .features = DMA_SDM845_MASK_SDMA, + .sblk = &dpu_dma_sblk, + .xin_id = 1, + .type = SSPP_TYPE_DMA, + }, { + .name = "sspp_9", .id = SSPP_DMA1, + .base = 0x26000, .len = 0x344, + .features = DMA_SDM845_MASK_SDMA, + .sblk = &dpu_dma_sblk, + .xin_id = 5, + .type = SSPP_TYPE_DMA, + }, { + .name = "sspp_10", .id = SSPP_DMA2, + .base = 0x28000, .len = 0x344, + .features = DMA_SDM845_MASK_SDMA, + .sblk = &dpu_dma_sblk, + .xin_id = 9, + .type = SSPP_TYPE_DMA, + }, +}; + +static const struct dpu_lm_cfg milos_lm[] = { + { + .name = "lm_0", .id = LM_0, + .base = 0x44000, .len = 0x400, + .features = MIXER_MSM8998_MASK, + .sblk = &sdm845_lm_sblk, + .pingpong = PINGPONG_0, + .dspp = DSPP_0, + }, { + .name = "lm_2", .id = LM_2, + .base = 0x46000, .len = 0x400, + .features = MIXER_MSM8998_MASK, + .sblk = &sdm845_lm_sblk, + .lm_pair = LM_3, + .pingpong = PINGPONG_2, + }, { + .name = "lm_3", .id = LM_3, + .base = 0x47000, .len = 0x400, + .features = MIXER_MSM8998_MASK, + .sblk = &sdm845_lm_sblk, + .lm_pair = LM_2, + .pingpong = PINGPONG_3, + }, +}; + +static const struct dpu_dspp_cfg milos_dspp[] = { + { + .name = "dspp_0", .id = DSPP_0, + .base = 0x54000, .len = 0x1800, + .sblk = &sdm845_dspp_sblk, + }, +}; + +static const struct dpu_pingpong_cfg milos_pp[] = { + { + .name = "pingpong_0", .id = PINGPONG_0, + .base = 0x69000, .len = 0, + .sblk = &sc7280_pp_sblk, + .intr_done = DPU_IRQ_IDX(MDP_SSPP_TOP0_INTR, 8), + }, { + .name = "pingpong_2", .id = PINGPONG_2, + .base = 0x6b000, .len = 0, + .sblk = &sc7280_pp_sblk, + .merge_3d = MERGE_3D_1, + .intr_done = DPU_IRQ_IDX(MDP_SSPP_TOP0_INTR, 10), + }, { + .name = "pingpong_3", .id = PINGPONG_3, + .base = 0x6c000, .len = 0, + .sblk = &sc7280_pp_sblk, + .merge_3d = MERGE_3D_1, + .intr_done = DPU_IRQ_IDX(MDP_SSPP_TOP0_INTR, 11), + }, { + .name = "pingpong_cwb_0", .id = PINGPONG_CWB_0, + .base = 0x66000, .len = 0, + .sblk = &sc7280_pp_sblk, + }, +}; + +static const struct dpu_merge_3d_cfg milos_merge_3d[] = { + { + .name = "merge_3d_1", .id = MERGE_3D_1, + .base = 0x4f000, .len = 0x8, + }, +}; + +/* + * NOTE: Each display compression engine (DCE) contains dual hard + * slice DSC encoders so both share same base address but with + * its own different sub block address. + */ +static const struct dpu_dsc_cfg milos_dsc[] = { + { + .name = "dce_0_0", .id = DSC_0, + .base = 0x80000, .len = 0x6, + .features = BIT(DPU_DSC_NATIVE_42x_EN), + .sblk = &milos_dsc_sblk_0, + }, { + .name = "dce_0_1", .id = DSC_1, + .base = 0x80000, .len = 0x6, + .features = BIT(DPU_DSC_NATIVE_42x_EN), + .sblk = &milos_dsc_sblk_1, + }, +}; + +static const struct dpu_wb_cfg milos_wb[] = { + { + .name = "wb_2", .id = WB_2, + .base = 0x65000, .len = 0x2c8, + .features = WB_SDM845_MASK, + .format_list = wb2_formats_rgb_yuv, + .num_formats = ARRAY_SIZE(wb2_formats_rgb_yuv), + .xin_id = 6, + .maxlinewidth = 4096, + .intr_wb_done = DPU_IRQ_IDX(MDP_SSPP_TOP0_INTR, 4), + }, +}; + +static const struct dpu_cwb_cfg milos_cwb[] = { + { + .name = "cwb_0", .id = CWB_0, + .base = 0x66200, .len = 0x8, + }, +}; + +static const struct dpu_intf_cfg milos_intf[] = { + { + .name = "intf_0", .id = INTF_0, + .base = 0x34000, .len = 0x300, + .type = INTF_DP, + .controller_id = MSM_DP_CONTROLLER_0, + .prog_fetch_lines_worst_case = 24, + .intr_underrun = DPU_IRQ_IDX(MDP_SSPP_TOP0_INTR, 24), + .intr_vsync = DPU_IRQ_IDX(MDP_SSPP_TOP0_INTR, 25), + }, { + .name = "intf_1", .id = INTF_1, + .base = 0x35000, .len = 0x300, + .type = INTF_DSI, + .controller_id = MSM_DSI_CONTROLLER_0, + .prog_fetch_lines_worst_case = 24, + .intr_underrun = DPU_IRQ_IDX(MDP_SSPP_TOP0_INTR, 26), + .intr_vsync = DPU_IRQ_IDX(MDP_SSPP_TOP0_INTR, 27), + .intr_tear_rd_ptr = DPU_IRQ_IDX(MDP_INTF1_TEAR_INTR, 2), + }, { + .name = "intf_3", .id = INTF_3, + .base = 0x37000, .len = 0x300, + .type = INTF_DP, + .controller_id = MSM_DP_CONTROLLER_0, /* pair with intf_0 for DP MST */ + .prog_fetch_lines_worst_case = 24, + .intr_underrun = DPU_IRQ_IDX(MDP_SSPP_TOP0_INTR, 30), + .intr_vsync = DPU_IRQ_IDX(MDP_SSPP_TOP0_INTR, 31), + }, +}; + +static const struct dpu_perf_cfg milos_perf_data = { + .max_bw_low = 7100000, + .max_bw_high = 9800000, + .min_core_ib = 2500000, + .min_llcc_ib = 0, + .min_dram_ib = 1600000, + .min_prefill_lines = 40, + /* FIXME: lut tables */ + .danger_lut_tbl = {0x3ffff, 0x3ffff, 0x0}, + .safe_lut_tbl = {0xff00, 0xfff0, 0x0fff}, + .qos_lut_tbl = { + {.nentry = ARRAY_SIZE(sc7180_qos_linear), + .entries = sc7180_qos_linear + }, + {.nentry = ARRAY_SIZE(sc7180_qos_macrotile), + .entries = sc7180_qos_macrotile + }, + {.nentry = ARRAY_SIZE(sc7180_qos_nrt), + .entries = sc7180_qos_nrt + }, + /* TODO: macrotile-qseed is different from macrotile */ + }, + .cdp_cfg = { + {.rd_enable = 1, .wr_enable = 1}, + {.rd_enable = 1, .wr_enable = 0} + }, + .clk_inefficiency_factor = 105, + .bw_inefficiency_factor = 120, +}; + +static const struct dpu_mdss_version milos_mdss_ver = { + .core_major_ver = 10, + .core_minor_ver = 2, +}; + +const struct dpu_mdss_cfg dpu_milos_cfg = { + .mdss_ver = &milos_mdss_ver, + .caps = &milos_dpu_caps, + .mdp = &milos_mdp, + .cdm = &dpu_cdm_5_x, + .ctl_count = ARRAY_SIZE(milos_ctl), + .ctl = milos_ctl, + .sspp_count = ARRAY_SIZE(milos_sspp), + .sspp = milos_sspp, + .mixer_count = ARRAY_SIZE(milos_lm), + .mixer = milos_lm, + .dspp_count = ARRAY_SIZE(milos_dspp), + .dspp = milos_dspp, + .pingpong_count = ARRAY_SIZE(milos_pp), + .pingpong = milos_pp, + .dsc_count = ARRAY_SIZE(milos_dsc), + .dsc = milos_dsc, + .merge_3d_count = ARRAY_SIZE(milos_merge_3d), + .merge_3d = milos_merge_3d, + .wb_count = ARRAY_SIZE(milos_wb), + .wb = milos_wb, + .cwb_count = ARRAY_SIZE(milos_cwb), + .cwb = milos_cwb, + .intf_count = ARRAY_SIZE(milos_intf), + .intf = milos_intf, + .vbif = &milos_vbif, + .perf = &milos_perf_data, +}; + +#endif diff --git a/drivers/gpu/drm/msm/disp/dpu1/dpu_hw_catalog.c b/drivers/gpu/drm/msm/disp/dpu1/dpu_hw_catalog.c index bb4fd5fa4b22..2e10add84fd7 100644 --- a/drivers/gpu/drm/msm/disp/dpu1/dpu_hw_catalog.c +++ b/drivers/gpu/drm/msm/disp/dpu1/dpu_hw_catalog.c @@ -454,6 +454,16 @@ static const struct dpu_dsc_sub_blks dsc_sblk_1 = { .ctl = {.name = "ctl", .base = 0xF80, .len = 0x10}, }; +static const struct dpu_dsc_sub_blks milos_dsc_sblk_0 = { + .enc = {.name = "enc", .base = 0x100, .len = 0x100}, + .ctl = {.name = "ctl", .base = 0xF00, .len = 0x80}, +}; + +static const struct dpu_dsc_sub_blks milos_dsc_sblk_1 = { + .enc = {.name = "enc", .base = 0x200, .len = 0x100}, + .ctl = {.name = "ctl", .base = 0xF80, .len = 0x80}, +}; + static const struct dpu_dsc_sub_blks sm8750_dsc_sblk_0 = { .enc = {.name = "enc", .base = 0x100, .len = 0x100}, .ctl = {.name = "ctl", .base = 0xF00, .len = 0x24}, @@ -513,6 +523,23 @@ static const struct dpu_vbif_dynamic_ot_cfg msm8998_ot_rdwr_cfg[] = { }, }; +static const struct dpu_vbif_cfg milos_vbif = { + .len = 0x1074, + .features = BIT(DPU_VBIF_QOS_REMAP), + .xin_halt_timeout = 0x4000, + .qos_rp_remap_size = 0x40, + .qos_rt_tbl = { + .npriority_lvl = ARRAY_SIZE(sdm845_rt_pri_lvl), + .priority_lvl = sdm845_rt_pri_lvl, + }, + .qos_nrt_tbl = { + .npriority_lvl = ARRAY_SIZE(sdm845_nrt_pri_lvl), + .priority_lvl = sdm845_nrt_pri_lvl, + }, + .memtype_count = 16, + .memtype = {3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3}, +}; + static const struct dpu_vbif_cfg msm8996_vbif = { .len = 0x1040, .default_ot_rd_limit = 32, @@ -754,6 +781,8 @@ static const struct dpu_qos_lut_entry sc7180_qos_nrt[] = { #include "catalog/dpu_9_2_x1e80100.h" #include "catalog/dpu_10_0_sm8650.h" +#include "catalog/dpu_10_2_milos.h" + #include "catalog/dpu_12_0_sm8750.h" #include "catalog/dpu_12_2_glymur.h" #include "catalog/dpu_12_4_eliza.h" diff --git a/drivers/gpu/drm/msm/disp/dpu1/dpu_hw_catalog.h b/drivers/gpu/drm/msm/disp/dpu1/dpu_hw_catalog.h index ba04ac24d5a9..f45faf87333e 100644 --- a/drivers/gpu/drm/msm/disp/dpu1/dpu_hw_catalog.h +++ b/drivers/gpu/drm/msm/disp/dpu1/dpu_hw_catalog.h @@ -766,6 +766,7 @@ struct dpu_mdss_cfg { extern const struct dpu_mdss_cfg dpu_eliza_cfg; extern const struct dpu_mdss_cfg dpu_glymur_cfg; extern const struct dpu_mdss_cfg dpu_kaanapali_cfg; +extern const struct dpu_mdss_cfg dpu_milos_cfg; extern const struct dpu_mdss_cfg dpu_msm8917_cfg; extern const struct dpu_mdss_cfg dpu_msm8937_cfg; extern const struct dpu_mdss_cfg dpu_msm8953_cfg; diff --git a/drivers/gpu/drm/msm/disp/dpu1/dpu_hw_sspp.c b/drivers/gpu/drm/msm/disp/dpu1/dpu_hw_sspp.c index d3da70009234..b5e50fc1916f 100644 --- a/drivers/gpu/drm/msm/disp/dpu1/dpu_hw_sspp.c +++ b/drivers/gpu/drm/msm/disp/dpu1/dpu_hw_sspp.c @@ -310,34 +310,35 @@ void dpu_hw_setup_format_impl(struct dpu_sw_pipe *pipe, const struct msm_format if (fmt->fetch_mode != MDP_FETCH_LINEAR) { u32 hbb = ctx->ubwc->highest_bank_bit - 13; - u32 ctrl_val; + u32 ctrl_val = 0; if (MSM_FORMAT_IS_UBWC(fmt)) opmode |= MDSS_MDP_OP_BWC_EN; src_format |= (fmt->fetch_mode & 3) << 30; /*FRAME_FORMAT */ - if (ctx->ubwc->ubwc_enc_version == UBWC_1_0) { - fast_clear = fmt->alpha_enable ? BIT(31) : 0; - ctrl_val = fast_clear | (ctx->ubwc->ubwc_swizzle & 0x1) | - BIT(8) | (hbb << 4); - } else if (ctx->ubwc->ubwc_enc_version == UBWC_2_0) { - fast_clear = fmt->alpha_enable ? BIT(31) : 0; - ctrl_val = fast_clear | ctx->ubwc->ubwc_swizzle | (hbb << 4); - } else if (ctx->ubwc->ubwc_enc_version == UBWC_3_0) { - ctrl_val = BIT(30) | (ctx->ubwc->ubwc_swizzle) | (hbb << 4); - } else if (ctx->ubwc->ubwc_enc_version == UBWC_4_0) { - ctrl_val = MSM_FORMAT_IS_YUV(fmt) ? 0 : BIT(30); - } else if (ctx->ubwc->ubwc_enc_version <= UBWC_6_0) { - if (MSM_FORMAT_IS_YUV(fmt)) - ctrl_val = 0; - else if (MSM_FORMAT_IS_DX(fmt)) /* or FP16, but it's unsupported */ + if (ctx->ubwc->ubwc_enc_version > UBWC_6_0) { + DRM_WARN_ONCE("Unsupported UBWC version %x\n", ctx->ubwc->ubwc_enc_version); + } else if (ctx->ubwc->ubwc_enc_version >= UBWC_5_0) { + if (!MSM_FORMAT_IS_YUV(fmt)) { ctrl_val = BIT(30); - else - ctrl_val = BIT(30) | BIT(31); + if (!MSM_FORMAT_IS_DX(fmt)) /* and not FP16, but it's unsupported */ + ctrl_val |= BIT(31); + } /* SDE also sets bits for lossy formats, but we don't support them yet */ + } else if (ctx->ubwc->ubwc_enc_version >= UBWC_4_0) { + ctrl_val = MSM_FORMAT_IS_YUV(fmt) ? 0 : BIT(30); + } else if (ctx->ubwc->ubwc_enc_version >= UBWC_3_0) { + ctrl_val = BIT(30) | qcom_ubwc_swizzle(ctx->ubwc) | (hbb << 4); + } else if (ctx->ubwc->ubwc_enc_version >= UBWC_2_0) { + fast_clear = fmt->alpha_enable ? BIT(31) : 0; + ctrl_val = fast_clear | qcom_ubwc_swizzle(ctx->ubwc) | (hbb << 4); + } else if (ctx->ubwc->ubwc_enc_version >= UBWC_1_0) { + fast_clear = fmt->alpha_enable ? BIT(31) : 0; + ctrl_val = fast_clear | + (qcom_ubwc_swizzle(ctx->ubwc) & UBWC_SWIZZLE_ENABLE_LVL1) | + BIT(8) | (hbb << 4); } else { DRM_WARN_ONCE("Unsupported UBWC version %x\n", ctx->ubwc->ubwc_enc_version); - ctrl_val = 0; } DPU_REG_WRITE(c, ubwc_ctrl_off, ctrl_val); diff --git a/drivers/gpu/drm/msm/disp/dpu1/dpu_kms.c b/drivers/gpu/drm/msm/disp/dpu1/dpu_kms.c index 657d956a6ca4..da3556eb6ecc 100644 --- a/drivers/gpu/drm/msm/disp/dpu1/dpu_kms.c +++ b/drivers/gpu/drm/msm/disp/dpu1/dpu_kms.c @@ -1483,6 +1483,7 @@ static const struct of_device_id dpu_dt_match[] = { { .compatible = "qcom,eliza-dpu", .data = &dpu_eliza_cfg, }, { .compatible = "qcom,glymur-dpu", .data = &dpu_glymur_cfg, }, { .compatible = "qcom,kaanapali-dpu", .data = &dpu_kaanapali_cfg, }, + { .compatible = "qcom,milos-dpu", .data = &dpu_milos_cfg, }, { .compatible = "qcom,msm8917-mdp5", .data = &dpu_msm8917_cfg, }, { .compatible = "qcom,msm8937-mdp5", .data = &dpu_msm8937_cfg, }, { .compatible = "qcom,msm8953-mdp5", .data = &dpu_msm8953_cfg, }, diff --git a/drivers/gpu/drm/msm/disp/dpu1/dpu_plane.c b/drivers/gpu/drm/msm/disp/dpu1/dpu_plane.c index 5642f3bc3568..7b92082d35a6 100644 --- a/drivers/gpu/drm/msm/disp/dpu1/dpu_plane.c +++ b/drivers/gpu/drm/msm/disp/dpu1/dpu_plane.c @@ -1781,8 +1781,7 @@ static bool dpu_plane_format_mod_supported(struct drm_plane *plane, uint32_t format, uint64_t modifier) { struct dpu_kms *dpu_kms = _dpu_plane_get_kms(plane); - bool has_no_ubwc = (dpu_kms->mdss->ubwc_enc_version == 0) && - (dpu_kms->mdss->ubwc_dec_version == 0); + bool has_no_ubwc = (dpu_kms->mdss->ubwc_enc_version == 0); if (modifier == DRM_FORMAT_MOD_LINEAR) return true; diff --git a/drivers/gpu/drm/msm/dp/dp_ctrl.c b/drivers/gpu/drm/msm/dp/dp_ctrl.c index cba8a71a2561..86ef8c89ad44 100644 --- a/drivers/gpu/drm/msm/dp/dp_ctrl.c +++ b/drivers/gpu/drm/msm/dp/dp_ctrl.c @@ -2583,22 +2583,6 @@ void msm_dp_ctrl_off_link_stream(struct msm_dp_ctrl *msm_dp_ctrl) phy_init(phy); } -void msm_dp_ctrl_off_link(struct msm_dp_ctrl *msm_dp_ctrl) -{ - struct msm_dp_ctrl_private *ctrl; - struct phy *phy; - - ctrl = container_of(msm_dp_ctrl, struct msm_dp_ctrl_private, msm_dp_ctrl); - phy = ctrl->phy; - - msm_dp_ctrl_mainlink_disable(ctrl); - - dev_pm_opp_set_rate(ctrl->dev, 0); - msm_dp_ctrl_link_clk_disable(&ctrl->msm_dp_ctrl); - - phy_power_off(phy); -} - void msm_dp_ctrl_off(struct msm_dp_ctrl *msm_dp_ctrl) { struct msm_dp_ctrl_private *ctrl; diff --git a/drivers/gpu/drm/msm/dp/dp_ctrl.h b/drivers/gpu/drm/msm/dp/dp_ctrl.h index 124b9b21bb7f..f68bee62713f 100644 --- a/drivers/gpu/drm/msm/dp/dp_ctrl.h +++ b/drivers/gpu/drm/msm/dp/dp_ctrl.h @@ -19,7 +19,6 @@ struct phy; int msm_dp_ctrl_on_link(struct msm_dp_ctrl *msm_dp_ctrl); int msm_dp_ctrl_on_stream(struct msm_dp_ctrl *msm_dp_ctrl, bool force_link_train); void msm_dp_ctrl_off_link_stream(struct msm_dp_ctrl *msm_dp_ctrl); -void msm_dp_ctrl_off_link(struct msm_dp_ctrl *msm_dp_ctrl); void msm_dp_ctrl_off(struct msm_dp_ctrl *msm_dp_ctrl); void msm_dp_ctrl_push_idle(struct msm_dp_ctrl *msm_dp_ctrl); irqreturn_t msm_dp_ctrl_isr(struct msm_dp_ctrl *msm_dp_ctrl); diff --git a/drivers/gpu/drm/msm/dp/dp_display.c b/drivers/gpu/drm/msm/dp/dp_display.c index 1b3cbf4016ef..dc6f33809ca5 100644 --- a/drivers/gpu/drm/msm/dp/dp_display.c +++ b/drivers/gpu/drm/msm/dp/dp_display.c @@ -38,40 +38,9 @@ enum { ISR_DISCONNECTED, ISR_CONNECT_PENDING, ISR_CONNECTED, - ISR_HPD_REPLUG_COUNT, + ISR_HPD_IO_GLITCH_COUNT, ISR_IRQ_HPD_PULSE_COUNT, - ISR_HPD_LO_GLITH_COUNT, -}; - -/* event thread connection state */ -enum { - ST_DISCONNECTED, - ST_MAINLINK_READY, - ST_CONNECTED, - ST_DISCONNECT_PENDING, - ST_DISPLAY_OFF, -}; - -enum { - EV_NO_EVENT, - /* hpd events */ - EV_HPD_PLUG_INT, - EV_IRQ_HPD_INT, - EV_HPD_UNPLUG_INT, - EV_USER_NOTIFICATION, -}; - -#define EVENT_TIMEOUT (HZ/10) /* 100ms */ -#define DP_EVENT_Q_MAX 8 - -#define DP_TIMEOUT_NONE 0 - -#define WAIT_FOR_RESUME_TIMEOUT_JIFFIES (HZ / 2) - -struct msm_dp_event { - u32 event_id; - u32 data; - u32 delay; + ISR_HPD_REPLUG_COUNT, }; struct msm_dp_display_private { @@ -84,6 +53,9 @@ struct msm_dp_display_private { bool phy_initialized; bool audio_supported; + struct mutex plugged_lock; + bool plugged; + struct drm_device *drm_dev; struct drm_dp_aux *aux; @@ -97,15 +69,9 @@ struct msm_dp_display_private { /* wait for audio signaling */ struct completion audio_comp; - /* event related only access by event thread */ - struct mutex event_mutex; - wait_queue_head_t event_q; - u32 hpd_state; - u32 event_pndx; - u32 event_gndx; - struct task_struct *ev_tsk; - struct msm_dp_event event_list[DP_EVENT_Q_MAX]; - spinlock_t event_lock; + /* HPD IRQ handling */ + spinlock_t irq_thread_lock; + u32 hpd_isr_status; bool wide_bus_supported; @@ -219,60 +185,6 @@ static struct msm_dp_display_private *dev_get_dp_display_private(struct device * return container_of(dp, struct msm_dp_display_private, msm_dp_display); } -static int msm_dp_add_event(struct msm_dp_display_private *msm_dp_priv, u32 event, - u32 data, u32 delay) -{ - unsigned long flag; - struct msm_dp_event *todo; - int pndx; - - spin_lock_irqsave(&msm_dp_priv->event_lock, flag); - pndx = msm_dp_priv->event_pndx + 1; - pndx %= DP_EVENT_Q_MAX; - if (pndx == msm_dp_priv->event_gndx) { - pr_err("event_q is full: pndx=%d gndx=%d\n", - msm_dp_priv->event_pndx, msm_dp_priv->event_gndx); - spin_unlock_irqrestore(&msm_dp_priv->event_lock, flag); - return -EPERM; - } - todo = &msm_dp_priv->event_list[msm_dp_priv->event_pndx++]; - msm_dp_priv->event_pndx %= DP_EVENT_Q_MAX; - todo->event_id = event; - todo->data = data; - todo->delay = delay; - wake_up(&msm_dp_priv->event_q); - spin_unlock_irqrestore(&msm_dp_priv->event_lock, flag); - - return 0; -} - -static int msm_dp_del_event(struct msm_dp_display_private *msm_dp_priv, u32 event) -{ - unsigned long flag; - struct msm_dp_event *todo; - u32 gndx; - - spin_lock_irqsave(&msm_dp_priv->event_lock, flag); - if (msm_dp_priv->event_pndx == msm_dp_priv->event_gndx) { - spin_unlock_irqrestore(&msm_dp_priv->event_lock, flag); - return -ENOENT; - } - - gndx = msm_dp_priv->event_gndx; - while (msm_dp_priv->event_pndx != gndx) { - todo = &msm_dp_priv->event_list[gndx]; - if (todo->event_id == event) { - todo->event_id = EV_NO_EVENT; /* deleted */ - todo->delay = 0; - } - gndx++; - gndx %= DP_EVENT_Q_MAX; - } - spin_unlock_irqrestore(&msm_dp_priv->event_lock, flag); - - return 0; -} - void msm_dp_display_signal_audio_start(struct msm_dp *msm_dp_display) { struct msm_dp_display_private *dp; @@ -291,8 +203,6 @@ void msm_dp_display_signal_audio_complete(struct msm_dp *msm_dp_display) complete_all(&dp->audio_comp); } -static int msm_dp_hpd_event_thread_start(struct msm_dp_display_private *msm_dp_priv); - static int msm_dp_display_bind(struct device *dev, struct device *master, void *data) { @@ -312,12 +222,6 @@ static int msm_dp_display_bind(struct device *dev, struct device *master, goto end; } - rc = msm_dp_hpd_event_thread_start(dp); - if (rc) { - DRM_ERROR("Event thread create failed\n"); - goto end; - } - return 0; end: return rc; @@ -329,8 +233,6 @@ static void msm_dp_display_unbind(struct device *dev, struct device *master, struct msm_dp_display_private *dp = dev_get_dp_display_private(dev); struct msm_drm_private *priv = dev_get_drvdata(master); - kthread_stop(dp->ev_tsk); - of_dp_aux_depopulate_bus(dp->aux); msm_dp_aux_unregister(dp->aux); @@ -344,45 +246,6 @@ static const struct component_ops msm_dp_display_comp_ops = { .unbind = msm_dp_display_unbind, }; -static void msm_dp_display_send_hpd_event(struct msm_dp *msm_dp_display) -{ - struct msm_dp_display_private *dp; - struct drm_connector *connector; - - dp = container_of(msm_dp_display, struct msm_dp_display_private, msm_dp_display); - - connector = dp->msm_dp_display.connector; - drm_helper_hpd_irq_event(connector->dev); -} - -static int msm_dp_display_send_hpd_notification(struct msm_dp_display_private *dp, - bool hpd) -{ - if ((hpd && dp->msm_dp_display.link_ready) || - (!hpd && !dp->msm_dp_display.link_ready)) { - drm_dbg_dp(dp->drm_dev, "HPD already %s\n", str_on_off(hpd)); - return 0; - } - - /* reset video pattern flag on disconnect */ - if (!hpd) { - dp->panel->video_test = false; - if (!dp->msm_dp_display.is_edp) - drm_dp_set_subconnector_property(dp->msm_dp_display.connector, - connector_status_disconnected, - dp->panel->dpcd, - dp->panel->downstream_ports); - } - - dp->msm_dp_display.link_ready = hpd; - - drm_dbg_dp(dp->drm_dev, "type=%d hpd=%d\n", - dp->msm_dp_display.connector_type, hpd); - msm_dp_display_send_hpd_event(&dp->msm_dp_display); - - return 0; -} - static int msm_dp_display_lttpr_init(struct msm_dp_display_private *dp, u8 *dpcd) { int rc, lttpr_count; @@ -437,19 +300,21 @@ static int msm_dp_display_process_hpd_high(struct msm_dp_display_private *dp) msm_dp_link_psm_config(dp->link, &dp->panel->link_info, false); msm_dp_link_reset_phy_params_vx_px(dp->link); - rc = msm_dp_ctrl_on_link(dp->ctrl); - if (rc) { - DRM_ERROR("failed to complete DP link training\n"); - goto end; - } - - msm_dp_add_event(dp, EV_USER_NOTIFICATION, true, 0); end: return rc; } -static void msm_dp_display_host_phy_init(struct msm_dp_display_private *dp) +/** + * msm_dp_display_host_phy_init() - start up DP PHY + * @dp: main display data structure + * + * Prepare DP PHY for the AUX transactions to succeed. + * + * Returns: true if this call has initliazed the PHY and false if the PHY has + * already been setup beforehand. + */ +static bool msm_dp_display_host_phy_init(struct msm_dp_display_private *dp) { drm_dbg_dp(dp->drm_dev, "type=%d core_init=%d phy_init=%d\n", dp->msm_dp_display.connector_type, dp->core_initialized, @@ -458,7 +323,10 @@ static void msm_dp_display_host_phy_init(struct msm_dp_display_private *dp) if (!dp->phy_initialized) { msm_dp_ctrl_phy_init(dp->ctrl); dp->phy_initialized = true; + return true; } + + return false; } static void msm_dp_display_host_phy_exit(struct msm_dp_display_private *dp) @@ -499,24 +367,6 @@ static void msm_dp_display_host_deinit(struct msm_dp_display_private *dp) dp->core_initialized = false; } -static int msm_dp_display_usbpd_configure_cb(struct device *dev) -{ - struct msm_dp_display_private *dp = dev_get_dp_display_private(dev); - - msm_dp_display_host_phy_init(dp); - - return msm_dp_display_process_hpd_high(dp); -} - -static int msm_dp_display_notify_disconnect(struct device *dev) -{ - struct msm_dp_display_private *dp = dev_get_dp_display_private(dev); - - msm_dp_add_event(dp, EV_USER_NOTIFICATION, false, 0); - - return 0; -} - static void msm_dp_display_handle_video_request(struct msm_dp_display_private *dp) { if (dp->link->sink_request & DP_TEST_LINK_VIDEO_PATTERN) { @@ -525,41 +375,11 @@ static void msm_dp_display_handle_video_request(struct msm_dp_display_private *d } } -static int msm_dp_display_handle_port_status_changed(struct msm_dp_display_private *dp) -{ - int rc = 0; - - if (drm_dp_is_branch(dp->panel->dpcd) && dp->link->sink_count == 0) { - drm_dbg_dp(dp->drm_dev, "sink count is zero, nothing to do\n"); - if (dp->hpd_state != ST_DISCONNECTED) { - dp->hpd_state = ST_DISCONNECT_PENDING; - msm_dp_add_event(dp, EV_USER_NOTIFICATION, false, 0); - } - } else { - if (dp->hpd_state == ST_DISCONNECTED) { - dp->hpd_state = ST_MAINLINK_READY; - rc = msm_dp_display_process_hpd_high(dp); - if (rc) - dp->hpd_state = ST_DISCONNECTED; - } - } - - return rc; -} - static int msm_dp_display_handle_irq_hpd(struct msm_dp_display_private *dp) { u32 sink_request = dp->link->sink_request; drm_dbg_dp(dp->drm_dev, "%d\n", sink_request); - if (dp->hpd_state == ST_DISCONNECTED) { - if (sink_request & DP_LINK_STATUS_UPDATED) { - drm_dbg_dp(dp->drm_dev, "Disconnected sink_request: %d\n", - sink_request); - DRM_ERROR("Disconnected, no DP_LINK_STATUS_UPDATED\n"); - return -EINVAL; - } - } msm_dp_ctrl_handle_sink_request(dp->ctrl); @@ -569,79 +389,36 @@ static int msm_dp_display_handle_irq_hpd(struct msm_dp_display_private *dp) return 0; } -static int msm_dp_display_usbpd_attention_cb(struct device *dev) -{ - int rc = 0; - u32 sink_request; - struct msm_dp_display_private *dp = dev_get_dp_display_private(dev); - - /* check for any test request issued by sink */ - rc = msm_dp_link_process_request(dp->link); - if (!rc) { - sink_request = dp->link->sink_request; - drm_dbg_dp(dp->drm_dev, "hpd_state=%d sink_request=%d\n", - dp->hpd_state, sink_request); - if (sink_request & DS_PORT_STATUS_CHANGED) - rc = msm_dp_display_handle_port_status_changed(dp); - else - rc = msm_dp_display_handle_irq_hpd(dp); - } - - return rc; -} - -static int msm_dp_hpd_plug_handle(struct msm_dp_display_private *dp, u32 data) +static int msm_dp_hpd_plug_handle(struct msm_dp_display_private *dp) { - u32 state; int ret; struct platform_device *pdev = dp->msm_dp_display.pdev; - msm_dp_aux_enable_xfers(dp->aux, true); - - mutex_lock(&dp->event_mutex); - - state = dp->hpd_state; - drm_dbg_dp(dp->drm_dev, "Before, type=%d hpd_state=%d\n", - dp->msm_dp_display.connector_type, state); - - if (state == ST_DISPLAY_OFF) { - mutex_unlock(&dp->event_mutex); - return 0; - } - - if (state == ST_MAINLINK_READY || state == ST_CONNECTED) { - mutex_unlock(&dp->event_mutex); - return 0; - } + drm_dbg_dp(dp->drm_dev, "Before, type=%d sink_count=%d\n", + dp->msm_dp_display.connector_type, + dp->link->sink_count); - if (state == ST_DISCONNECT_PENDING) { - /* wait until ST_DISCONNECTED */ - msm_dp_add_event(dp, EV_HPD_PLUG_INT, 0, 1); /* delay = 1 */ - mutex_unlock(&dp->event_mutex); - return 0; - } + guard(mutex)(&dp->plugged_lock); ret = pm_runtime_resume_and_get(&pdev->dev); if (ret) { DRM_ERROR("failed to pm_runtime_resume\n"); - mutex_unlock(&dp->event_mutex); return ret; } - ret = msm_dp_display_usbpd_configure_cb(&pdev->dev); - if (ret) { /* link train failed */ - dp->hpd_state = ST_DISCONNECTED; - pm_runtime_put_sync(&pdev->dev); - } else { - dp->hpd_state = ST_MAINLINK_READY; - } + msm_dp_aux_enable_xfers(dp->aux, true); + + msm_dp_display_host_phy_init(dp); - drm_dbg_dp(dp->drm_dev, "After, type=%d hpd_state=%d\n", - dp->msm_dp_display.connector_type, state); - mutex_unlock(&dp->event_mutex); + ret = msm_dp_display_process_hpd_high(dp); - /* uevent will complete connection part */ - return 0; + drm_dbg_dp(dp->drm_dev, "After, type=%d sink_count=%d\n", + dp->msm_dp_display.connector_type, + dp->link->sink_count); + + dp->plugged = true; + + return ret; }; static void msm_dp_display_handle_plugged_change(struct msm_dp *msm_dp_display, @@ -658,99 +435,81 @@ static void msm_dp_display_handle_plugged_change(struct msm_dp *msm_dp_display, plugged); } -static int msm_dp_hpd_unplug_handle(struct msm_dp_display_private *dp, u32 data) +static int msm_dp_hpd_unplug_handle(struct msm_dp_display_private *dp) { - u32 state; struct platform_device *pdev = dp->msm_dp_display.pdev; - msm_dp_aux_enable_xfers(dp->aux, false); + dp->panel->video_test = false; - mutex_lock(&dp->event_mutex); + msm_dp_aux_enable_xfers(dp->aux, false); - state = dp->hpd_state; + drm_dbg_dp(dp->drm_dev, "Before, type=%d sink_count=%d\n", + dp->msm_dp_display.connector_type, + dp->link->sink_count); - drm_dbg_dp(dp->drm_dev, "Before, type=%d hpd_state=%d\n", - dp->msm_dp_display.connector_type, state); + guard(mutex)(&dp->plugged_lock); + if (!dp->plugged) + return 0; - /* unplugged, no more irq_hpd handle */ - msm_dp_del_event(dp, EV_IRQ_HPD_INT); + /* Don't forget modes for eDP */ + if (!dp->msm_dp_display.is_edp) + msm_dp_panel_unplugged(dp->panel, dp->msm_dp_display.connector); - if (state == ST_DISCONNECTED) { - /* triggered by irq_hdp with sink_count = 0 */ - if (dp->link->sink_count == 0) { - msm_dp_display_host_phy_exit(dp); - } - msm_dp_display_notify_disconnect(&dp->msm_dp_display.pdev->dev); - mutex_unlock(&dp->event_mutex); - return 0; - } else if (state == ST_DISCONNECT_PENDING) { - mutex_unlock(&dp->event_mutex); - return 0; - } else if (state == ST_MAINLINK_READY) { - msm_dp_ctrl_off_link(dp->ctrl); + /* triggered by irq_hdp with sink_count = 0 */ + if (dp->link->sink_count == 0) msm_dp_display_host_phy_exit(dp); - dp->hpd_state = ST_DISCONNECTED; - msm_dp_display_notify_disconnect(&dp->msm_dp_display.pdev->dev); - pm_runtime_put_sync(&pdev->dev); - mutex_unlock(&dp->event_mutex); - return 0; - } /* * We don't need separate work for disconnect as * connect/attention interrupts are disabled */ - msm_dp_display_notify_disconnect(&dp->msm_dp_display.pdev->dev); - - if (state == ST_DISPLAY_OFF) { - dp->hpd_state = ST_DISCONNECTED; - } else { - dp->hpd_state = ST_DISCONNECT_PENDING; - } + if (!dp->msm_dp_display.is_edp) + drm_dp_set_subconnector_property(dp->msm_dp_display.connector, + connector_status_disconnected, + dp->panel->dpcd, + dp->panel->downstream_ports); /* signal the disconnect event early to ensure proper teardown */ msm_dp_display_handle_plugged_change(&dp->msm_dp_display, false); - drm_dbg_dp(dp->drm_dev, "After, type=%d hpd_state=%d\n", - dp->msm_dp_display.connector_type, state); + drm_dbg_dp(dp->drm_dev, "After, type=%d, sink_count=%d\n", + dp->msm_dp_display.connector_type, + dp->link->sink_count); + + if (dp->plugged) { + pm_runtime_put_sync(&pdev->dev); + dp->plugged = false; + } - /* uevent will complete disconnection part */ - pm_runtime_put_sync(&pdev->dev); - mutex_unlock(&dp->event_mutex); return 0; } -static int msm_dp_irq_hpd_handle(struct msm_dp_display_private *dp, u32 data) +static int msm_dp_irq_hpd_handle(struct msm_dp_display_private *dp) { - u32 state; - - mutex_lock(&dp->event_mutex); + u32 sink_request; + int rc = 0; /* irq_hpd can happen at either connected or disconnected state */ - state = dp->hpd_state; - drm_dbg_dp(dp->drm_dev, "Before, type=%d hpd_state=%d\n", - dp->msm_dp_display.connector_type, state); + drm_dbg_dp(dp->drm_dev, "Before, type=%d, sink_count=%d\n", + dp->msm_dp_display.connector_type, + dp->link->sink_count); - if (state == ST_DISPLAY_OFF) { - mutex_unlock(&dp->event_mutex); - return 0; - } - - if (state == ST_MAINLINK_READY || state == ST_DISCONNECT_PENDING) { - /* wait until ST_CONNECTED */ - msm_dp_add_event(dp, EV_IRQ_HPD_INT, 0, 1); /* delay = 1 */ - mutex_unlock(&dp->event_mutex); - return 0; + /* check for any test request issued by sink */ + rc = msm_dp_link_process_request(dp->link); + if (!rc) { + sink_request = dp->link->sink_request; + drm_dbg_dp(dp->drm_dev, "sink_request=%d\n", sink_request); + if (sink_request & DS_PORT_STATUS_CHANGED) + rc = msm_dp_display_process_hpd_high(dp); + else + rc = msm_dp_display_handle_irq_hpd(dp); } - msm_dp_display_usbpd_attention_cb(&dp->msm_dp_display.pdev->dev); + drm_dbg_dp(dp->drm_dev, "After, type=%d, sink_count=%d\n", + dp->msm_dp_display.connector_type, + dp->link->sink_count); - drm_dbg_dp(dp->drm_dev, "After, type=%d hpd_state=%d\n", - dp->msm_dp_display.connector_type, state); - - mutex_unlock(&dp->event_mutex); - - return 0; + return rc; } static void msm_dp_display_deinit_sub_modules(struct msm_dp_display_private *dp) @@ -1026,12 +785,8 @@ void msm_dp_snapshot(struct msm_disp_state *disp_state, struct msm_dp *dp) * power_on status before dumping DP registers to avoid crash due * to unclocked access */ - mutex_lock(&msm_dp_display->event_mutex); - - if (!dp->power_on) { - mutex_unlock(&msm_dp_display->event_mutex); + if (!dp->power_on) return; - } msm_disp_snapshot_add_block(disp_state, msm_dp_display->ahb_len, msm_dp_display->ahb_base, "dp_ahb"); @@ -1041,8 +796,6 @@ void msm_dp_snapshot(struct msm_disp_state *disp_state, struct msm_dp *dp) msm_dp_display->link_base, "dp_link"); msm_disp_snapshot_add_block(disp_state, msm_dp_display->p0_len, msm_dp_display->p0_base, "dp_p0"); - - mutex_unlock(&msm_dp_display->event_mutex); } void msm_dp_display_set_psr(struct msm_dp *msm_dp_display, bool enter) @@ -1058,133 +811,110 @@ void msm_dp_display_set_psr(struct msm_dp *msm_dp_display, bool enter) msm_dp_ctrl_set_psr(dp->ctrl, enter); } -static int hpd_event_thread(void *data) +/** + * msm_dp_bridge_detect - callback to determine if connector is connected + * + * @bridge: Pointer to drm bridge structure + * @connector: Pointer to drm connector structure + * + * Returns: where there is a display connected to the DPTX (returning + * disconnected for branch devices without DP Sinks being connected). + */ +enum drm_connector_status msm_dp_bridge_detect(struct drm_bridge *bridge, + struct drm_connector *connector) { - struct msm_dp_display_private *msm_dp_priv; - unsigned long flag; - struct msm_dp_event *todo; - int timeout_mode = 0; + struct msm_dp_bridge *msm_dp_bridge = to_dp_bridge(bridge); + struct msm_dp *dp = msm_dp_bridge->msm_dp_display; + int status = connector_status_disconnected; + struct msm_dp_display_private *priv; + u8 dpcd[DP_RECEIVER_CAP_SIZE]; + struct drm_dp_desc desc; + bool phy_deinit; + int ret; - msm_dp_priv = (struct msm_dp_display_private *)data; + dp = to_dp_bridge(bridge)->msm_dp_display; - while (1) { - if (timeout_mode) { - wait_event_timeout(msm_dp_priv->event_q, - (msm_dp_priv->event_pndx == msm_dp_priv->event_gndx) || - kthread_should_stop(), EVENT_TIMEOUT); - } else { - wait_event_interruptible(msm_dp_priv->event_q, - (msm_dp_priv->event_pndx != msm_dp_priv->event_gndx) || - kthread_should_stop()); - } + priv = container_of(dp, struct msm_dp_display_private, msm_dp_display); - if (kthread_should_stop()) - break; + guard(mutex)(&priv->plugged_lock); + ret = pm_runtime_resume_and_get(&dp->pdev->dev); + if (ret) { + DRM_ERROR("failed to pm_runtime_resume\n"); + return status; + } - spin_lock_irqsave(&msm_dp_priv->event_lock, flag); - todo = &msm_dp_priv->event_list[msm_dp_priv->event_gndx]; - if (todo->delay) { - struct msm_dp_event *todo_next; + phy_deinit = msm_dp_display_host_phy_init(priv); - msm_dp_priv->event_gndx++; - msm_dp_priv->event_gndx %= DP_EVENT_Q_MAX; + msm_dp_aux_enable_xfers(priv->aux, true); - /* re enter delay event into q */ - todo_next = &msm_dp_priv->event_list[msm_dp_priv->event_pndx++]; - msm_dp_priv->event_pndx %= DP_EVENT_Q_MAX; - todo_next->event_id = todo->event_id; - todo_next->data = todo->data; - todo_next->delay = todo->delay - 1; + ret = msm_dp_aux_is_link_connected(priv->aux); + DRM_DEBUG_DP("aux link status: %x\n", ret); + if (!priv->plugged && !ret) { + DRM_DEBUG_DP("aux not connected\n"); + priv->plugged = false; + goto end; + } - /* clean up older event */ - todo->event_id = EV_NO_EVENT; - todo->delay = 0; + ret = drm_dp_read_dpcd_caps(priv->aux, dpcd); + if (ret) { + DRM_DEBUG_DP("failed to read caps\n"); + priv->plugged = false; + goto end; + } - /* switch to timeout mode */ - timeout_mode = 1; - spin_unlock_irqrestore(&msm_dp_priv->event_lock, flag); - continue; - } + ret = drm_dp_read_desc(priv->aux, &desc, drm_dp_is_branch(dpcd)); + if (ret) { + DRM_DEBUG_DP("failed to read desc\n"); + priv->plugged = false; + goto end; + } - /* timeout with no events in q */ - if (msm_dp_priv->event_pndx == msm_dp_priv->event_gndx) { - spin_unlock_irqrestore(&msm_dp_priv->event_lock, flag); - continue; - } + status = connector_status_connected; + priv->plugged = true; - msm_dp_priv->event_gndx++; - msm_dp_priv->event_gndx %= DP_EVENT_Q_MAX; - timeout_mode = 0; - spin_unlock_irqrestore(&msm_dp_priv->event_lock, flag); - - switch (todo->event_id) { - case EV_HPD_PLUG_INT: - msm_dp_hpd_plug_handle(msm_dp_priv, todo->data); - break; - case EV_HPD_UNPLUG_INT: - msm_dp_hpd_unplug_handle(msm_dp_priv, todo->data); - break; - case EV_IRQ_HPD_INT: - msm_dp_irq_hpd_handle(msm_dp_priv, todo->data); - break; - case EV_USER_NOTIFICATION: - msm_dp_display_send_hpd_notification(msm_dp_priv, - todo->data); - break; - default: - break; - } - } + if (drm_dp_read_sink_count_cap(connector, dpcd, &desc)) { + int sink_count = drm_dp_read_sink_count(priv->aux); - return 0; -} + drm_dbg_dp(dp->drm_dev, "sink_count = %d\n", sink_count); -static int msm_dp_hpd_event_thread_start(struct msm_dp_display_private *msm_dp_priv) -{ - /* set event q to empty */ - msm_dp_priv->event_gndx = 0; - msm_dp_priv->event_pndx = 0; + if (sink_count <= 0) + status = connector_status_disconnected; + } + +end: + /* + * If we detected the DPRX, leave the controller on so that it doesn't + * lose the state. + */ + if (!priv->plugged) { + if (phy_deinit) { + msm_dp_aux_enable_xfers(priv->aux, false); + msm_dp_display_host_phy_exit(priv); + } - msm_dp_priv->ev_tsk = kthread_run(hpd_event_thread, msm_dp_priv, "dp_hpd_handler"); - if (IS_ERR(msm_dp_priv->ev_tsk)) - return PTR_ERR(msm_dp_priv->ev_tsk); + pm_runtime_put_sync(&dp->pdev->dev); + } - return 0; + return status; } static irqreturn_t msm_dp_display_irq_handler(int irq, void *dev_id) { struct msm_dp_display_private *dp = dev_id; - irqreturn_t ret = IRQ_NONE; u32 hpd_isr_status; - - if (!dp) { - DRM_ERROR("invalid data\n"); - return IRQ_NONE; - } + unsigned long flags; + irqreturn_t ret = IRQ_HANDLED; hpd_isr_status = msm_dp_aux_get_hpd_intr_status(dp->aux); if (hpd_isr_status & 0x0F) { drm_dbg_dp(dp->drm_dev, "type=%d isr=0x%x\n", dp->msm_dp_display.connector_type, hpd_isr_status); - /* hpd related interrupts */ - if (hpd_isr_status & DP_DP_HPD_PLUG_INT_MASK) - msm_dp_add_event(dp, EV_HPD_PLUG_INT, 0, 0); - - if (hpd_isr_status & DP_DP_IRQ_HPD_INT_MASK) { - msm_dp_add_event(dp, EV_IRQ_HPD_INT, 0, 0); - } - - if (hpd_isr_status & DP_DP_HPD_REPLUG_INT_MASK) { - msm_dp_add_event(dp, EV_HPD_UNPLUG_INT, 0, 0); - msm_dp_add_event(dp, EV_HPD_PLUG_INT, 0, 3); - } - - if (hpd_isr_status & DP_DP_HPD_UNPLUG_INT_MASK) - msm_dp_add_event(dp, EV_HPD_UNPLUG_INT, 0, 0); - ret = IRQ_HANDLED; + spin_lock_irqsave(&dp->irq_thread_lock, flags); + dp->hpd_isr_status |= hpd_isr_status; + ret = IRQ_WAKE_THREAD; + spin_unlock_irqrestore(&dp->irq_thread_lock, flags); } /* DP controller isr */ @@ -1193,6 +923,36 @@ static irqreturn_t msm_dp_display_irq_handler(int irq, void *dev_id) return ret; } +static irqreturn_t msm_dp_display_irq_thread(int irq, void *dev_id) +{ + struct msm_dp_display_private *dp = dev_id; + irqreturn_t ret = IRQ_NONE; + unsigned long flags; + u32 hpd_isr_status; + + spin_lock_irqsave(&dp->irq_thread_lock, flags); + hpd_isr_status = dp->hpd_isr_status; + dp->hpd_isr_status = 0; + spin_unlock_irqrestore(&dp->irq_thread_lock, flags); + + if (hpd_isr_status & DP_DP_HPD_UNPLUG_INT_MASK) + drm_bridge_hpd_notify(dp->msm_dp_display.bridge, + connector_status_disconnected); + + if (hpd_isr_status & DP_DP_HPD_PLUG_INT_MASK) + drm_bridge_hpd_notify(dp->msm_dp_display.bridge, + connector_status_connected); + + /* Send HPD as connected and distinguish it in the notifier */ + if (hpd_isr_status & DP_DP_IRQ_HPD_INT_MASK) + drm_bridge_hpd_notify(dp->msm_dp_display.bridge, + connector_status_connected); + + ret = IRQ_HANDLED; + + return ret; +} + static int msm_dp_display_request_irq(struct msm_dp_display_private *dp) { int rc = 0; @@ -1204,9 +964,13 @@ static int msm_dp_display_request_irq(struct msm_dp_display_private *dp) return dp->irq; } - rc = devm_request_irq(&pdev->dev, dp->irq, msm_dp_display_irq_handler, - IRQF_TRIGGER_HIGH|IRQF_NO_AUTOEN, - "dp_display_isr", dp); + spin_lock_init(&dp->irq_thread_lock); + irq_set_status_flags(dp->irq, IRQ_NOAUTOEN); + rc = devm_request_threaded_irq(&pdev->dev, dp->irq, + msm_dp_display_irq_handler, + msm_dp_display_irq_thread, + IRQ_TYPE_LEVEL_HIGH, + "dp_display_isr", dp); if (rc < 0) { DRM_ERROR("failed to request IRQ%u: %d\n", @@ -1386,6 +1150,9 @@ static int msm_dp_display_probe(struct platform_device *pdev) dp->wide_bus_supported = desc->wide_bus_supported; dp->msm_dp_display.is_edp = (dp->msm_dp_display.connector_type == DRM_MODE_CONNECTOR_eDP); + dp->hpd_isr_status = 0; + + mutex_init(&dp->plugged_lock); rc = msm_dp_display_get_io(dp); if (rc) @@ -1397,11 +1164,6 @@ static int msm_dp_display_probe(struct platform_device *pdev) return -EPROBE_DEFER; } - /* setup event q */ - mutex_init(&dp->event_mutex); - init_waitqueue_head(&dp->event_q); - spin_lock_init(&dp->event_lock); - /* Store DP audio handle inside DP display */ dp->msm_dp_display.msm_dp_audio = dp->audio; @@ -1597,7 +1359,6 @@ void msm_dp_bridge_atomic_enable(struct drm_bridge *drm_bridge, struct msm_dp *dp = msm_dp_bridge->msm_dp_display; int rc = 0; struct msm_dp_display_private *msm_dp_display; - u32 hpd_state; bool force_link_train = false; msm_dp_display = container_of(dp, struct msm_dp_display_private, msm_dp_display); @@ -1607,35 +1368,34 @@ void msm_dp_bridge_atomic_enable(struct drm_bridge *drm_bridge, } if (dp->is_edp) - msm_dp_hpd_plug_handle(msm_dp_display, 0); + msm_dp_hpd_plug_handle(msm_dp_display); - mutex_lock(&msm_dp_display->event_mutex); if (pm_runtime_resume_and_get(&dp->pdev->dev)) { DRM_ERROR("failed to pm_runtime_resume\n"); - mutex_unlock(&msm_dp_display->event_mutex); return; } - hpd_state = msm_dp_display->hpd_state; - if (hpd_state != ST_DISPLAY_OFF && hpd_state != ST_MAINLINK_READY) { - mutex_unlock(&msm_dp_display->event_mutex); + if (msm_dp_display->link->sink_count == 0) return; - } rc = msm_dp_display_set_mode(dp, &msm_dp_display->msm_dp_mode); if (rc) { DRM_ERROR("Failed to perform a mode set, rc=%d\n", rc); - mutex_unlock(&msm_dp_display->event_mutex); return; } - hpd_state = msm_dp_display->hpd_state; - - if (hpd_state == ST_DISPLAY_OFF) { + if (!dp->power_on) { msm_dp_display_host_phy_init(msm_dp_display); force_link_train = true; } + rc = msm_dp_ctrl_on_link(msm_dp_display->ctrl); + if (rc) { + DRM_ERROR("Failed link training (rc=%d)\n", rc); + // TODO: schedule drm_connector_set_link_status_property() + return; + } + msm_dp_display_enable(msm_dp_display, force_link_train); rc = msm_dp_display_post_enable(dp); @@ -1644,11 +1404,7 @@ void msm_dp_bridge_atomic_enable(struct drm_bridge *drm_bridge, msm_dp_display_disable(msm_dp_display); } - /* completed connection */ - msm_dp_display->hpd_state = ST_CONNECTED; - drm_dbg_dp(dp->drm_dev, "type=%d Done\n", dp->connector_type); - mutex_unlock(&msm_dp_display->event_mutex); } void msm_dp_bridge_atomic_disable(struct drm_bridge *drm_bridge, @@ -1668,35 +1424,18 @@ void msm_dp_bridge_atomic_post_disable(struct drm_bridge *drm_bridge, { struct msm_dp_bridge *msm_dp_bridge = to_dp_bridge(drm_bridge); struct msm_dp *dp = msm_dp_bridge->msm_dp_display; - u32 hpd_state; struct msm_dp_display_private *msm_dp_display; msm_dp_display = container_of(dp, struct msm_dp_display_private, msm_dp_display); if (dp->is_edp) - msm_dp_hpd_unplug_handle(msm_dp_display, 0); - - mutex_lock(&msm_dp_display->event_mutex); - - hpd_state = msm_dp_display->hpd_state; - if (hpd_state != ST_DISCONNECT_PENDING && hpd_state != ST_CONNECTED) - drm_dbg_dp(dp->drm_dev, "type=%d wrong hpd_state=%d\n", - dp->connector_type, hpd_state); + msm_dp_hpd_unplug_handle(msm_dp_display); msm_dp_display_disable(msm_dp_display); - hpd_state = msm_dp_display->hpd_state; - if (hpd_state == ST_DISCONNECT_PENDING) { - /* completed disconnection */ - msm_dp_display->hpd_state = ST_DISCONNECTED; - } else { - msm_dp_display->hpd_state = ST_DISPLAY_OFF; - } - drm_dbg_dp(dp->drm_dev, "type=%d Done\n", dp->connector_type); pm_runtime_put_sync(&dp->pdev->dev); - mutex_unlock(&msm_dp_display->event_mutex); } void msm_dp_bridge_mode_set(struct drm_bridge *drm_bridge, @@ -1752,18 +1491,13 @@ void msm_dp_bridge_hpd_enable(struct drm_bridge *bridge) * step-4: DP PHY is initialized at plugin handler before link training * */ - mutex_lock(&dp->event_mutex); if (pm_runtime_resume_and_get(&msm_dp_display->pdev->dev)) { DRM_ERROR("failed to resume power\n"); - mutex_unlock(&dp->event_mutex); return; } msm_dp_aux_hpd_enable(dp->aux); msm_dp_aux_hpd_intr_enable(dp->aux); - - msm_dp_display->internal_hpd = true; - mutex_unlock(&dp->event_mutex); } void msm_dp_bridge_hpd_disable(struct drm_bridge *bridge) @@ -1772,15 +1506,10 @@ void msm_dp_bridge_hpd_disable(struct drm_bridge *bridge) struct msm_dp *msm_dp_display = msm_dp_bridge->msm_dp_display; struct msm_dp_display_private *dp = container_of(msm_dp_display, struct msm_dp_display_private, msm_dp_display); - mutex_lock(&dp->event_mutex); - msm_dp_aux_hpd_intr_disable(dp->aux); msm_dp_aux_hpd_disable(dp->aux); - msm_dp_display->internal_hpd = false; - pm_runtime_put_sync(&msm_dp_display->pdev->dev); - mutex_unlock(&dp->event_mutex); } void msm_dp_bridge_hpd_notify(struct drm_bridge *bridge, @@ -1790,13 +1519,30 @@ void msm_dp_bridge_hpd_notify(struct drm_bridge *bridge, struct msm_dp_bridge *msm_dp_bridge = to_dp_bridge(bridge); struct msm_dp *msm_dp_display = msm_dp_bridge->msm_dp_display; struct msm_dp_display_private *dp = container_of(msm_dp_display, struct msm_dp_display_private, msm_dp_display); + u32 hpd_link_status = 0; - /* Without next_bridge interrupts are handled by the DP core directly */ - if (msm_dp_display->internal_hpd) + if (pm_runtime_resume_and_get(&msm_dp_display->pdev->dev)) { + DRM_ERROR("failed to pm_runtime_resume\n"); return; + } - if (!msm_dp_display->link_ready && status == connector_status_connected) - msm_dp_add_event(dp, EV_HPD_PLUG_INT, 0, 0); - else if (msm_dp_display->link_ready && status == connector_status_disconnected) - msm_dp_add_event(dp, EV_HPD_UNPLUG_INT, 0, 0); + hpd_link_status = msm_dp_aux_is_link_connected(dp->aux); + + drm_dbg_dp(dp->drm_dev, "type=%d link hpd_link_status=0x%x, status=%d\n", + msm_dp_display->connector_type, hpd_link_status, status); + + if (status == connector_status_connected) { + if (hpd_link_status == ISR_HPD_REPLUG_COUNT) { + msm_dp_hpd_unplug_handle(dp); + msm_dp_hpd_plug_handle(dp); + } else if (hpd_link_status == ISR_IRQ_HPD_PULSE_COUNT) { + msm_dp_irq_hpd_handle(dp); + } else { + msm_dp_hpd_plug_handle(dp); + } + } else { + msm_dp_hpd_unplug_handle(dp); + } + + pm_runtime_put_sync(&msm_dp_display->pdev->dev); } diff --git a/drivers/gpu/drm/msm/dp/dp_display.h b/drivers/gpu/drm/msm/dp/dp_display.h index cc6e2cab36e9..0b65e16c790d 100644 --- a/drivers/gpu/drm/msm/dp/dp_display.h +++ b/drivers/gpu/drm/msm/dp/dp_display.h @@ -16,12 +16,11 @@ struct msm_dp { struct platform_device *pdev; struct drm_connector *connector; struct drm_bridge *next_bridge; - bool link_ready; + struct drm_bridge *bridge; bool audio_enabled; bool power_on; unsigned int connector_type; bool is_edp; - bool internal_hpd; struct msm_dp_audio *msm_dp_audio; bool psr_supported; diff --git a/drivers/gpu/drm/msm/dp/dp_drm.c b/drivers/gpu/drm/msm/dp/dp_drm.c index 349175457566..b659d22f5f28 100644 --- a/drivers/gpu/drm/msm/dp/dp_drm.c +++ b/drivers/gpu/drm/msm/dp/dp_drm.c @@ -16,56 +16,6 @@ #include "dp_drm.h" /** - * msm_dp_bridge_detect - callback to determine if connector is connected - * @bridge: Pointer to drm bridge structure - * @connector: Pointer to drm connector structure - * Returns: Bridge's 'is connected' status - */ -static enum drm_connector_status -msm_dp_bridge_detect(struct drm_bridge *bridge, struct drm_connector *connector) -{ - struct msm_dp *dp; - - dp = to_dp_bridge(bridge)->msm_dp_display; - - drm_dbg_dp(dp->drm_dev, "link_ready = %s\n", - str_true_false(dp->link_ready)); - - return (dp->link_ready) ? connector_status_connected : - connector_status_disconnected; -} - -static int msm_dp_bridge_atomic_check(struct drm_bridge *bridge, - struct drm_bridge_state *bridge_state, - struct drm_crtc_state *crtc_state, - struct drm_connector_state *conn_state) -{ - struct msm_dp *dp; - - dp = to_dp_bridge(bridge)->msm_dp_display; - - drm_dbg_dp(dp->drm_dev, "link_ready = %s\n", - str_true_false(dp->link_ready)); - - /* - * There is no protection in the DRM framework to check if the display - * pipeline has been already disabled before trying to disable it again. - * Hence if the sink is unplugged, the pipeline gets disabled, but the - * crtc->active is still true. Any attempt to set the mode or manually - * disable this encoder will result in the crash. - * - * TODO: add support for telling the DRM subsystem that the pipeline is - * disabled by the hardware and thus all access to it should be forbidden. - * After that this piece of code can be removed. - */ - if (bridge->ops & DRM_BRIDGE_OP_HPD) - return (dp->link_ready) ? 0 : -ENOTCONN; - - return 0; -} - - -/** * msm_dp_bridge_get_modes - callback to add drm modes via drm_mode_probed_add() * @bridge: Poiner to drm bridge * @connector: Pointer to drm connector structure @@ -82,12 +32,10 @@ static int msm_dp_bridge_get_modes(struct drm_bridge *bridge, struct drm_connect dp = to_dp_bridge(bridge)->msm_dp_display; /* pluggable case assumes EDID is read when HPD */ - if (dp->link_ready) { - rc = msm_dp_display_get_modes(dp); - if (rc <= 0) { - DRM_ERROR("failed to get DP sink modes, rc=%d\n", rc); - return rc; - } + rc = msm_dp_display_get_modes(dp); + if (rc <= 0) { + DRM_ERROR("failed to get DP sink modes, rc=%d\n", rc); + return rc; } else { drm_dbg_dp(connector->dev, "No sink connected\n"); } @@ -112,7 +60,6 @@ static const struct drm_bridge_funcs msm_dp_bridge_ops = { .mode_valid = msm_dp_bridge_mode_valid, .get_modes = msm_dp_bridge_get_modes, .detect = msm_dp_bridge_detect, - .atomic_check = msm_dp_bridge_atomic_check, .hpd_enable = msm_dp_bridge_hpd_enable, .hpd_disable = msm_dp_bridge_hpd_disable, .hpd_notify = msm_dp_bridge_hpd_notify, @@ -360,6 +307,8 @@ int msm_dp_bridge_init(struct msm_dp *msm_dp_display, struct drm_device *dev, } } + msm_dp_display->bridge = bridge; + return 0; } diff --git a/drivers/gpu/drm/msm/dp/dp_drm.h b/drivers/gpu/drm/msm/dp/dp_drm.h index ec76448e71ae..041aa026ae2e 100644 --- a/drivers/gpu/drm/msm/dp/dp_drm.h +++ b/drivers/gpu/drm/msm/dp/dp_drm.h @@ -25,6 +25,8 @@ int msm_dp_bridge_init(struct msm_dp *msm_dp_display, struct drm_device *dev, struct drm_encoder *encoder, bool yuv_supported); +enum drm_connector_status msm_dp_bridge_detect(struct drm_bridge *bridge, + struct drm_connector *connector); void msm_dp_bridge_atomic_enable(struct drm_bridge *drm_bridge, struct drm_atomic_commit *state); void msm_dp_bridge_atomic_disable(struct drm_bridge *drm_bridge, diff --git a/drivers/gpu/drm/msm/dp/dp_panel.c b/drivers/gpu/drm/msm/dp/dp_panel.c index 891211b23202..6bb021820d7c 100644 --- a/drivers/gpu/drm/msm/dp/dp_panel.c +++ b/drivers/gpu/drm/msm/dp/dp_panel.c @@ -293,6 +293,14 @@ end: return rc; } +void msm_dp_panel_unplugged(struct msm_dp_panel *msm_dp_panel, + struct drm_connector *connector) +{ + drm_edid_connector_update(connector, NULL); + drm_edid_free(msm_dp_panel->drm_edid); + msm_dp_panel->drm_edid = NULL; +} + u32 msm_dp_panel_get_mode_bpp(struct msm_dp_panel *msm_dp_panel, u32 mode_edid_bpp, u32 mode_pclk_khz) { diff --git a/drivers/gpu/drm/msm/dp/dp_panel.h b/drivers/gpu/drm/msm/dp/dp_panel.h index 177c1328fd99..9173e90a5053 100644 --- a/drivers/gpu/drm/msm/dp/dp_panel.h +++ b/drivers/gpu/drm/msm/dp/dp_panel.h @@ -49,6 +49,8 @@ int msm_dp_panel_deinit(struct msm_dp_panel *msm_dp_panel); int msm_dp_panel_timing_cfg(struct msm_dp_panel *msm_dp_panel, bool wide_bus_en); int msm_dp_panel_read_sink_caps(struct msm_dp_panel *msm_dp_panel, struct drm_connector *connector); +void msm_dp_panel_unplugged(struct msm_dp_panel *msm_dp_panel, + struct drm_connector *connector); u32 msm_dp_panel_get_mode_bpp(struct msm_dp_panel *msm_dp_panel, u32 mode_max_bpp, u32 mode_pclk_khz); int msm_dp_panel_get_modes(struct msm_dp_panel *msm_dp_panel, diff --git a/drivers/gpu/drm/msm/dp/dp_reg.h b/drivers/gpu/drm/msm/dp/dp_reg.h index 7c44d4e2cf13..3689642b7fc0 100644 --- a/drivers/gpu/drm/msm/dp/dp_reg.h +++ b/drivers/gpu/drm/msm/dp/dp_reg.h @@ -68,8 +68,8 @@ #define DP_DP_IRQ_HPD_INT_ACK (0x00000002) #define DP_DP_HPD_REPLUG_INT_ACK (0x00000004) #define DP_DP_HPD_UNPLUG_INT_ACK (0x00000008) -#define DP_DP_HPD_STATE_STATUS_BITS_MASK (0x0000000F) -#define DP_DP_HPD_STATE_STATUS_BITS_SHIFT (0x1C) +#define DP_DP_HPD_STATE_STATUS_BITS_MASK (0x00000007) +#define DP_DP_HPD_STATE_STATUS_BITS_SHIFT (0x1D) #define REG_DP_DP_HPD_INT_MASK (0x0000000C) #define DP_DP_HPD_PLUG_INT_MASK (0x00000001) diff --git a/drivers/gpu/drm/msm/dsi/phy/dsi_phy.c b/drivers/gpu/drm/msm/dsi/phy/dsi_phy.c index c59375aaae19..1fb3899b88bf 100644 --- a/drivers/gpu/drm/msm/dsi/phy/dsi_phy.c +++ b/drivers/gpu/drm/msm/dsi/phy/dsi_phy.c @@ -571,6 +571,8 @@ static const struct of_device_id dsi_phy_dt_match[] = { .data = &dsi_phy_5nm_8350_cfgs }, { .compatible = "qcom,sm8450-dsi-phy-5nm", .data = &dsi_phy_5nm_8450_cfgs }, + { .compatible = "qcom,milos-dsi-phy-4nm", + .data = &dsi_phy_4nm_milos_cfgs }, { .compatible = "qcom,sm8550-dsi-phy-4nm", .data = &dsi_phy_4nm_8550_cfgs }, { .compatible = "qcom,sm8650-dsi-phy-4nm", diff --git a/drivers/gpu/drm/msm/dsi/phy/dsi_phy.h b/drivers/gpu/drm/msm/dsi/phy/dsi_phy.h index c01784ca38ed..21a59d66e8dc 100644 --- a/drivers/gpu/drm/msm/dsi/phy/dsi_phy.h +++ b/drivers/gpu/drm/msm/dsi/phy/dsi_phy.h @@ -61,6 +61,7 @@ extern const struct msm_dsi_phy_cfg dsi_phy_5nm_8350_cfgs; extern const struct msm_dsi_phy_cfg dsi_phy_5nm_8450_cfgs; extern const struct msm_dsi_phy_cfg dsi_phy_5nm_8775p_cfgs; extern const struct msm_dsi_phy_cfg dsi_phy_5nm_sar2130p_cfgs; +extern const struct msm_dsi_phy_cfg dsi_phy_4nm_milos_cfgs; extern const struct msm_dsi_phy_cfg dsi_phy_4nm_8550_cfgs; extern const struct msm_dsi_phy_cfg dsi_phy_4nm_8650_cfgs; extern const struct msm_dsi_phy_cfg dsi_phy_3nm_8750_cfgs; diff --git a/drivers/gpu/drm/msm/dsi/phy/dsi_phy_7nm.c b/drivers/gpu/drm/msm/dsi/phy/dsi_phy_7nm.c index 8f4b03713f25..984a66085dfb 100644 --- a/drivers/gpu/drm/msm/dsi/phy/dsi_phy_7nm.c +++ b/drivers/gpu/drm/msm/dsi/phy/dsi_phy_7nm.c @@ -1436,6 +1436,29 @@ const struct msm_dsi_phy_cfg dsi_phy_5nm_sar2130p_cfgs = { .quirks = DSI_PHY_7NM_QUIRK_V5_2, }; +const struct msm_dsi_phy_cfg dsi_phy_4nm_milos_cfgs = { + .has_phy_lane = true, + .regulator_data = dsi_phy_7nm_98000uA_regulators, + .num_regulators = ARRAY_SIZE(dsi_phy_7nm_98000uA_regulators), + .ops = { + .enable = dsi_7nm_phy_enable, + .disable = dsi_7nm_phy_disable, + .pll_init = dsi_pll_7nm_init, + .save_pll_state = dsi_7nm_pll_save_state, + .restore_pll_state = dsi_7nm_pll_restore_state, + .set_continuous_clock = dsi_7nm_set_continuous_clock, + }, + .min_pll_rate = 600000000UL, +#ifdef CONFIG_64BIT + .max_pll_rate = 5000000000UL, +#else + .max_pll_rate = ULONG_MAX, +#endif + .io_start = { 0xae95000 }, + .num_dsi_phy = 1, + .quirks = DSI_PHY_7NM_QUIRK_V5_2, +}; + const struct msm_dsi_phy_cfg dsi_phy_4nm_8550_cfgs = { .has_phy_lane = true, .regulator_data = dsi_phy_7nm_98400uA_regulators, diff --git a/drivers/gpu/drm/msm/msm_debugfs.c b/drivers/gpu/drm/msm/msm_debugfs.c index 1059a9b29d6a..f12701e286ec 100644 --- a/drivers/gpu/drm/msm/msm_debugfs.c +++ b/drivers/gpu/drm/msm/msm_debugfs.c @@ -344,12 +344,6 @@ static int late_init_minor(struct drm_minor *minor) return ret; } - ret = msm_perf_debugfs_init(minor); - if (ret) { - DRM_DEV_ERROR(dev->dev, "could not install perf debugfs\n"); - return ret; - } - return 0; } diff --git a/drivers/gpu/drm/msm/msm_drv.c b/drivers/gpu/drm/msm/msm_drv.c index cc2bcd14b1c2..32d5ebea2596 100644 --- a/drivers/gpu/drm/msm/msm_drv.c +++ b/drivers/gpu/drm/msm/msm_drv.c @@ -87,7 +87,6 @@ static int msm_drm_uninit(struct device *dev, const struct component_ops *gpu_op msm_gem_shrinker_cleanup(ddev); - msm_perf_debugfs_cleanup(priv); msm_rd_debugfs_cleanup(priv); if (priv->kms) @@ -801,6 +800,7 @@ static const struct drm_ioctl_desc msm_ioctls[] = { DRM_IOCTL_DEF_DRV(MSM_SUBMITQUEUE_CLOSE, msm_ioctl_submitqueue_close, DRM_RENDER_ALLOW), DRM_IOCTL_DEF_DRV(MSM_SUBMITQUEUE_QUERY, msm_ioctl_submitqueue_query, DRM_RENDER_ALLOW), DRM_IOCTL_DEF_DRV(MSM_VM_BIND, msm_ioctl_vm_bind, DRM_RENDER_ALLOW), + DRM_IOCTL_DEF_DRV(MSM_PERFCNTR_CONFIG, msm_ioctl_perfcntr_config, DRM_RENDER_ALLOW), }; static void msm_show_fdinfo(struct drm_printer *p, struct drm_file *file) diff --git a/drivers/gpu/drm/msm/msm_drv.h b/drivers/gpu/drm/msm/msm_drv.h index c3fb3205f683..3787db8770ad 100644 --- a/drivers/gpu/drm/msm/msm_drv.h +++ b/drivers/gpu/drm/msm/msm_drv.h @@ -45,7 +45,6 @@ struct msm_gpu; struct msm_mmu; struct msm_mdss; struct msm_rd_state; -struct msm_perf_state; struct msm_gem_submit; struct msm_fence_context; struct msm_disp_state; @@ -89,7 +88,6 @@ struct msm_drm_private { struct msm_rd_state *rd; /* debugfs to dump all submits */ struct msm_rd_state *hangrd; /* debugfs to dump hanging submits */ - struct msm_perf_state *perf; /** * total_mem: Total/global amount of memory backing GEM objects. @@ -230,6 +228,14 @@ int msm_ioctl_gem_submit(struct drm_device *dev, void *data, int msm_ioctl_vm_bind(struct drm_device *dev, void *data, struct drm_file *file); +int msm_perfcntr_resume(struct msm_gpu *gpu); +void msm_perfcntr_suspend(struct msm_gpu *gpu); +int msm_ioctl_perfcntr_config(struct drm_device *dev, void *data, + struct drm_file *file); + +struct msm_perfcntr_state * msm_perfcntr_init(struct msm_gpu *gpu); +void msm_perfcntr_cleanup(struct msm_gpu *gpu); + #ifdef CONFIG_DEBUG_FS unsigned long msm_gem_shrinker_shrink(struct drm_device *dev, unsigned long nr_to_scan); #endif @@ -435,8 +441,6 @@ void msm_rd_debugfs_cleanup(struct msm_drm_private *priv); __printf(3, 4) void msm_rd_dump_submit(struct msm_rd_state *rd, struct msm_gem_submit *submit, const char *fmt, ...); -int msm_perf_debugfs_init(struct drm_minor *minor); -void msm_perf_debugfs_cleanup(struct msm_drm_private *priv); #else static inline int msm_debugfs_late_init(struct drm_device *dev) { return 0; } __printf(3, 4) @@ -444,7 +448,6 @@ static inline void msm_rd_dump_submit(struct msm_rd_state *rd, struct msm_gem_submit *submit, const char *fmt, ...) {} static inline void msm_rd_debugfs_cleanup(struct msm_drm_private *priv) {} -static inline void msm_perf_debugfs_cleanup(struct msm_drm_private *priv) {} #endif struct clk *msm_clk_get(struct platform_device *pdev, const char *name); diff --git a/drivers/gpu/drm/msm/msm_gpu.c b/drivers/gpu/drm/msm/msm_gpu.c index 3f3925b11eea..18ed00e5f143 100644 --- a/drivers/gpu/drm/msm/msm_gpu.c +++ b/drivers/gpu/drm/msm/msm_gpu.c @@ -682,104 +682,6 @@ static void hangcheck_handler(struct timer_list *t) } /* - * Performance Counters: - */ - -/* called under perf_lock */ -static int update_hw_cntrs(struct msm_gpu *gpu, uint32_t ncntrs, uint32_t *cntrs) -{ - uint32_t current_cntrs[ARRAY_SIZE(gpu->last_cntrs)]; - int i, n = min(ncntrs, gpu->num_perfcntrs); - - /* read current values: */ - for (i = 0; i < gpu->num_perfcntrs; i++) - current_cntrs[i] = gpu_read(gpu, gpu->perfcntrs[i].sample_reg); - - /* update cntrs: */ - for (i = 0; i < n; i++) - cntrs[i] = current_cntrs[i] - gpu->last_cntrs[i]; - - /* save current values: */ - for (i = 0; i < gpu->num_perfcntrs; i++) - gpu->last_cntrs[i] = current_cntrs[i]; - - return n; -} - -static void update_sw_cntrs(struct msm_gpu *gpu) -{ - ktime_t time; - uint32_t elapsed; - unsigned long flags; - - spin_lock_irqsave(&gpu->perf_lock, flags); - if (!gpu->perfcntr_active) - goto out; - - time = ktime_get(); - elapsed = ktime_to_us(ktime_sub(time, gpu->last_sample.time)); - - gpu->totaltime += elapsed; - if (gpu->last_sample.active) - gpu->activetime += elapsed; - - gpu->last_sample.active = msm_gpu_active(gpu); - gpu->last_sample.time = time; - -out: - spin_unlock_irqrestore(&gpu->perf_lock, flags); -} - -void msm_gpu_perfcntr_start(struct msm_gpu *gpu) -{ - unsigned long flags; - - pm_runtime_get_sync(&gpu->pdev->dev); - - spin_lock_irqsave(&gpu->perf_lock, flags); - /* we could dynamically enable/disable perfcntr registers too.. */ - gpu->last_sample.active = msm_gpu_active(gpu); - gpu->last_sample.time = ktime_get(); - gpu->activetime = gpu->totaltime = 0; - gpu->perfcntr_active = true; - update_hw_cntrs(gpu, 0, NULL); - spin_unlock_irqrestore(&gpu->perf_lock, flags); -} - -void msm_gpu_perfcntr_stop(struct msm_gpu *gpu) -{ - gpu->perfcntr_active = false; - pm_runtime_put_sync(&gpu->pdev->dev); -} - -/* returns -errno or # of cntrs sampled */ -int msm_gpu_perfcntr_sample(struct msm_gpu *gpu, uint32_t *activetime, - uint32_t *totaltime, uint32_t ncntrs, uint32_t *cntrs) -{ - unsigned long flags; - int ret; - - spin_lock_irqsave(&gpu->perf_lock, flags); - - if (!gpu->perfcntr_active) { - ret = -EINVAL; - goto out; - } - - *activetime = gpu->activetime; - *totaltime = gpu->totaltime; - - gpu->activetime = gpu->totaltime = 0; - - ret = update_hw_cntrs(gpu, ncntrs, cntrs); - -out: - spin_unlock_irqrestore(&gpu->perf_lock, flags); - - return ret; -} - -/* * Cmdstream submission/retirement: */ @@ -881,7 +783,6 @@ void msm_gpu_retire(struct msm_gpu *gpu) msm_update_fence(gpu->rb[i]->fctx, gpu->rb[i]->memptrs->fence); kthread_queue_work(gpu->worker, &gpu->retire_work); - update_sw_cntrs(gpu); } /* add bo's to gpu's ring, and kick gpu: */ @@ -898,8 +799,6 @@ void msm_gpu_submit(struct msm_gpu *gpu, struct msm_gem_submit *submit) submit->seqno = submit->hw_fence->seqno; - update_sw_cntrs(gpu); - /* * ring->submits holds a ref to the submit, to deal with the case * that a submit completes before msm_ioctl_gem_submit() returns. @@ -991,9 +890,6 @@ int msm_gpu_init(struct drm_device *drm, struct platform_device *pdev, void *memptrs; uint64_t memptrs_iova; - if (WARN_ON(gpu->num_perfcntrs > ARRAY_SIZE(gpu->last_cntrs))) - gpu->num_perfcntrs = ARRAY_SIZE(gpu->last_cntrs); - gpu->dev = drm; gpu->funcs = funcs; gpu->name = name; @@ -1025,9 +921,6 @@ int msm_gpu_init(struct drm_device *drm, struct platform_device *pdev, timer_setup(&gpu->hangcheck_timer, hangcheck_handler, 0); - spin_lock_init(&gpu->perf_lock); - - /* Map registers: */ gpu->mmio = msm_ioremap(pdev, config->ioname); if (IS_ERR(gpu->mmio)) { @@ -1117,6 +1010,17 @@ int msm_gpu_init(struct drm_device *drm, struct platform_device *pdev, refcount_set(&gpu->sysprof_active, 1); + mutex_init(&gpu->perfcntr_lock); + + if (gpu->num_perfcntr_groups > 0) { + gpu->perfcntrs = msm_perfcntr_init(gpu); + if (IS_ERR(gpu->perfcntrs)) { + ret = PTR_ERR(gpu->perfcntrs); + gpu->perfcntrs = NULL; + goto fail; + } + } + return 0; fail: @@ -1155,6 +1059,7 @@ void msm_gpu_cleanup(struct msm_gpu *gpu) } msm_devfreq_cleanup(gpu); + msm_perfcntr_cleanup(gpu); platform_set_drvdata(gpu->pdev, NULL); } diff --git a/drivers/gpu/drm/msm/msm_gpu.h b/drivers/gpu/drm/msm/msm_gpu.h index 666cf499b7ec..6c83b8cbbb90 100644 --- a/drivers/gpu/drm/msm/msm_gpu.h +++ b/drivers/gpu/drm/msm/msm_gpu.h @@ -22,9 +22,10 @@ struct msm_gem_submit; struct msm_gem_vm_log_entry; -struct msm_gpu_perfcntr; struct msm_gpu_state; struct msm_context; +struct msm_perfcntr_group; +struct msm_perfcntr_stream; struct msm_gpu_config { const char *ioname; @@ -92,7 +93,14 @@ struct msm_gpu_funcs { * for cmdstream that is buffered in this FIFO upstream of the CP fw. */ bool (*progress)(struct msm_gpu *gpu, struct msm_ringbuffer *ring); - void (*sysprof_setup)(struct msm_gpu *gpu); + void (*sysprof_setup)(struct msm_gpu *gpu, bool force_on); + + /* Configure perfcntr SELect regs: */ + void (*perfcntr_configure)(struct msm_gpu *gpu, struct msm_ringbuffer *ring, + const struct msm_perfcntr_stream *stream); + + /* Flush perfcntrs before reading (optional): */ + void (*perfcntr_flush)(struct msm_gpu *gpu); }; /* Additional state for iommu faults: */ @@ -168,18 +176,6 @@ struct msm_gpu { struct adreno_smmu_priv adreno_smmu; - /* performance counters (hw & sw): */ - spinlock_t perf_lock; - bool perfcntr_active; - struct { - bool active; - ktime_t time; - } last_sample; - uint32_t totaltime, activetime; /* sw counters */ - uint32_t last_cntrs[5]; /* hw counters */ - const struct msm_gpu_perfcntr *perfcntrs; - uint32_t num_perfcntrs; - struct msm_ringbuffer *rb[MSM_GPU_MAX_RINGS]; int nr_rings; @@ -275,6 +271,14 @@ struct msm_gpu { bool allow_relocs; struct thermal_cooling_device *cooling; + + const struct msm_perfcntr_group *perfcntr_groups; + unsigned num_perfcntr_groups; + + struct msm_perfcntr_state *perfcntrs; + + /** @perfcntr_lock: protects perfcntr related state */ + struct mutex perfcntr_lock; }; static inline struct msm_gpu *dev_to_gpu(struct device *dev) @@ -320,19 +324,63 @@ static inline bool msm_gpu_active(struct msm_gpu *gpu) return false; } -/* Perf-Counters: - * The select_reg and select_val are just there for the benefit of the child - * class that actually enables the perf counter.. but msm_gpu base class - * will handle sampling/displaying the counters. +/** + * struct msm_perfcntr_group_state - Tracking for the currently allocated counter state */ +struct msm_perfcntr_group_state { + /** + * @allocated_counters: + * + * allocated counters for global counter collection. The + * corresponding counters are allocated from highest to + * lowest, to minimize chance of conflict with old userspace + * allocating from lowest to highest. + */ + unsigned allocated_counters; -struct msm_gpu_perfcntr { - uint32_t select_reg; - uint32_t sample_reg; - uint32_t select_val; - const char *name; + /** + * @countables: + * + * The corresponding SELect reg values for the allocated counters + */ + uint32_t countables[]; +}; + +/** + * struct msm_perfcntr_state - overall global perfcntr state + */ +struct msm_perfcntr_state { + /** @stream: current global counter stream if active */ + struct msm_perfcntr_stream *stream; + + /** @sel_seqno: counter for sel_fence */ + uint32_t sel_seqno; + + /** + * @groups: Global perfcntr stream group state. + * + * Conceptually this is part of msm_perfcntr_stream state, but is + * statically pre-allocated when the gpu is initialized to simplify + * error path cleanup in PERFCNTR_CONFIG ioctl. (__free(kfree) + * doesn't really help with variable length arrays of allocated + * pointers.) + */ + struct msm_perfcntr_group_state *groups[]; }; +static inline bool +msm_gpu_sysprof_no_perfcntr_zap(struct msm_gpu *gpu) +{ + return (refcount_read(&gpu->sysprof_active) > 1) || + (gpu->perfcntrs && READ_ONCE(gpu->perfcntrs->stream)); +} + +static inline bool +msm_gpu_sysprof_no_ifpc(struct msm_gpu *gpu) +{ + return refcount_read(&gpu->sysprof_active) > 1; +} + /* * The number of priority levels provided by drm gpu scheduler. The * DRM_SCHED_PRIORITY_KERNEL priority level is treated specially in some @@ -438,6 +486,11 @@ struct msm_context { * this context. */ atomic64_t ctx_mem; + + /** + * @perfcntrs: Per-context reserved perfcntrs state + */ + struct msm_perfcntr_context_state *perfctx; }; struct drm_gpuvm *msm_context_vm(struct drm_device *dev, struct msm_context *ctx); @@ -689,11 +742,6 @@ void msm_devfreq_idle(struct msm_gpu *gpu); int msm_gpu_hw_init(struct msm_gpu *gpu); -void msm_gpu_perfcntr_start(struct msm_gpu *gpu); -void msm_gpu_perfcntr_stop(struct msm_gpu *gpu); -int msm_gpu_perfcntr_sample(struct msm_gpu *gpu, uint32_t *activetime, - uint32_t *totaltime, uint32_t ncntrs, uint32_t *cntrs); - void msm_gpu_retire(struct msm_gpu *gpu); void msm_gpu_submit(struct msm_gpu *gpu, struct msm_gem_submit *submit); diff --git a/drivers/gpu/drm/msm/msm_mdss.c b/drivers/gpu/drm/msm/msm_mdss.c index 90c3fa0681a0..9087c4b290db 100644 --- a/drivers/gpu/drm/msm/msm_mdss.c +++ b/drivers/gpu/drm/msm/msm_mdss.c @@ -166,90 +166,72 @@ static int _msm_mdss_irq_domain_add(struct msm_mdss *msm_mdss) return 0; } -static void msm_mdss_setup_ubwc_dec_20(struct msm_mdss *msm_mdss) +static void msm_mdss_4x_setup_ubwc(struct msm_mdss *msm_mdss) { const struct qcom_ubwc_cfg_data *data = msm_mdss->mdss_data; - u32 value = MDSS_UBWC_STATIC_UBWC_SWIZZLE(data->ubwc_swizzle) | + u32 value = MDSS_UBWC_STATIC_UBWC_SWIZZLE(qcom_ubwc_swizzle(data) & + UBWC_SWIZZLE_ENABLE_LVL1) | MDSS_UBWC_STATIC_HIGHEST_BANK_BIT(data->highest_bank_bit - 13); - if (data->ubwc_bank_spread) - value |= MDSS_UBWC_STATIC_UBWC_BANK_SPREAD; - - if (data->ubwc_enc_version == UBWC_1_0) - value |= MDSS_UBWC_STATIC_UBWC_MIN_ACC_LEN(1); + value |= MDSS_UBWC_STATIC_UBWC_MIN_ACC_LEN(qcom_ubwc_min_acc_length_64b(data)); writel_relaxed(value, msm_mdss->mmio + REG_MDSS_UBWC_STATIC); } -static void msm_mdss_setup_ubwc_dec_30(struct msm_mdss *msm_mdss) +static void msm_mdss_5x_setup_ubwc(struct msm_mdss *msm_mdss) { const struct qcom_ubwc_cfg_data *data = msm_mdss->mdss_data; - u32 value = MDSS_UBWC_STATIC_UBWC_SWIZZLE(data->ubwc_swizzle & 0x1) | + u32 value = MDSS_UBWC_STATIC_UBWC_SWIZZLE(qcom_ubwc_swizzle(data) & + UBWC_SWIZZLE_ENABLE_LVL1) | MDSS_UBWC_STATIC_HIGHEST_BANK_BIT(data->highest_bank_bit - 13); - if (data->macrotile_mode) + if (qcom_ubwc_macrotile_mode(data)) value |= MDSS_UBWC_STATIC_MACROTILE_MODE; - if (data->ubwc_enc_version == UBWC_3_0) + if (qcom_ubwc_enable_amsbc(data)) value |= MDSS_UBWC_STATIC_UBWC_AMSBC; - if (data->ubwc_enc_version == UBWC_1_0) - value |= MDSS_UBWC_STATIC_UBWC_MIN_ACC_LEN(1); + value |= MDSS_UBWC_STATIC_UBWC_MIN_ACC_LEN(qcom_ubwc_min_acc_length_64b(data)); writel_relaxed(value, msm_mdss->mmio + REG_MDSS_UBWC_STATIC); } -static void msm_mdss_setup_ubwc_dec_40(struct msm_mdss *msm_mdss) +static void msm_mdss_6x_setup_ubwc(struct msm_mdss *msm_mdss) { const struct qcom_ubwc_cfg_data *data = msm_mdss->mdss_data; - u32 value = MDSS_UBWC_STATIC_UBWC_SWIZZLE(data->ubwc_swizzle) | + u32 value = MDSS_UBWC_STATIC_UBWC_SWIZZLE(qcom_ubwc_swizzle(data)) | MDSS_UBWC_STATIC_HIGHEST_BANK_BIT(data->highest_bank_bit - 13); + u32 prediction_mode; - if (data->ubwc_bank_spread) + if (qcom_ubwc_bank_spread(data)) value |= MDSS_UBWC_STATIC_UBWC_BANK_SPREAD; - if (data->macrotile_mode) + if (qcom_ubwc_macrotile_mode(data)) value |= MDSS_UBWC_STATIC_MACROTILE_MODE; - writel_relaxed(value, msm_mdss->mmio + REG_MDSS_UBWC_STATIC); - - if (data->ubwc_enc_version == UBWC_3_0) { - writel_relaxed(1, msm_mdss->mmio + REG_MDSS_UBWC_CTRL_2); - writel_relaxed(0, msm_mdss->mmio + REG_MDSS_UBWC_PREDICTION_MODE); - } else { - if (data->ubwc_dec_version == UBWC_4_3) - writel_relaxed(3, msm_mdss->mmio + REG_MDSS_UBWC_CTRL_2); - else - writel_relaxed(2, msm_mdss->mmio + REG_MDSS_UBWC_CTRL_2); - writel_relaxed(1, msm_mdss->mmio + REG_MDSS_UBWC_PREDICTION_MODE); - } -} - -static void msm_mdss_setup_ubwc_dec_50(struct msm_mdss *msm_mdss) -{ - const struct qcom_ubwc_cfg_data *data = msm_mdss->mdss_data; - u32 value = MDSS_UBWC_STATIC_UBWC_SWIZZLE(data->ubwc_swizzle) | - MDSS_UBWC_STATIC_HIGHEST_BANK_BIT(data->highest_bank_bit - 13); - - if (data->ubwc_bank_spread) - value |= MDSS_UBWC_STATIC_UBWC_BANK_SPREAD; - - if (data->macrotile_mode) - value |= MDSS_UBWC_STATIC_MACROTILE_MODE; + value |= MDSS_UBWC_STATIC_UBWC_MIN_ACC_LEN(qcom_ubwc_min_acc_length_64b(data)); writel_relaxed(value, msm_mdss->mmio + REG_MDSS_UBWC_STATIC); - if (data->ubwc_dec_version == UBWC_6_0) - writel_relaxed(5, msm_mdss->mmio + REG_MDSS_UBWC_CTRL_2); + if (data->ubwc_enc_version < UBWC_4_0) + prediction_mode = 0; else - writel_relaxed(4, msm_mdss->mmio + REG_MDSS_UBWC_CTRL_2); + prediction_mode = 1; - writel_relaxed(1, msm_mdss->mmio + REG_MDSS_UBWC_PREDICTION_MODE); + writel_relaxed(qcom_ubwc_version_tag(data), + msm_mdss->mmio + REG_MDSS_UBWC_CTRL_2); + writel_relaxed(prediction_mode, msm_mdss->mmio + REG_MDSS_UBWC_PREDICTION_MODE); } +#define MDSS_HW_VER(major, minor, step) \ + ((((major) & 0xf) << 28) | \ + (((minor) & 0xfff) << 16) | \ + ((step) & 0xffff)) + static int msm_mdss_enable(struct msm_mdss *msm_mdss) { int ret, i; + u32 hw_rev; /* * Several components have AXI clocks that can only be turned on if @@ -283,43 +265,15 @@ static int msm_mdss_enable(struct msm_mdss *msm_mdss) if (msm_mdss->is_mdp5 || !msm_mdss->mdss_data) return 0; - /* - * ubwc config is part of the "mdss" region which is not accessible - * from the rest of the driver. hardcode known configurations here - * - * Decoder version can be read from the UBWC_DEC_HW_VERSION reg, - * UBWC_n and the rest of params comes from hw data. - */ - switch (msm_mdss->mdss_data->ubwc_dec_version) { - case 0: /* no UBWC */ - case UBWC_1_0: - /* do nothing */ - break; - case UBWC_2_0: - msm_mdss_setup_ubwc_dec_20(msm_mdss); - break; - case UBWC_3_0: - msm_mdss_setup_ubwc_dec_30(msm_mdss); - break; - case UBWC_4_0: - case UBWC_4_3: - msm_mdss_setup_ubwc_dec_40(msm_mdss); - break; - case UBWC_5_0: - msm_mdss_setup_ubwc_dec_50(msm_mdss); - break; - case UBWC_6_0: - msm_mdss_setup_ubwc_dec_50(msm_mdss); - break; - default: - dev_err(msm_mdss->dev, "Unsupported UBWC decoder version %x\n", - msm_mdss->mdss_data->ubwc_dec_version); - dev_err(msm_mdss->dev, "HW_REV: 0x%x\n", - readl_relaxed(msm_mdss->mmio + REG_MDSS_HW_VERSION)); - dev_err(msm_mdss->dev, "UBWC_DEC_HW_VERSION: 0x%x\n", - readl_relaxed(msm_mdss->mmio + REG_MDSS_UBWC_DEC_HW_VERSION)); - break; - } + hw_rev = readl_relaxed(msm_mdss->mmio + REG_MDSS_HW_VERSION); + + if (hw_rev >= MDSS_HW_VER(6, 0, 0)) + msm_mdss_6x_setup_ubwc(msm_mdss); + else if (hw_rev >= MDSS_HW_VER(5, 0, 0)) + msm_mdss_5x_setup_ubwc(msm_mdss); + else if (hw_rev >= MDSS_HW_VER(4, 0, 0)) + msm_mdss_4x_setup_ubwc(msm_mdss); + /* else UBWC 1.0 or none, no params to program */ return ret; } @@ -550,6 +504,10 @@ static void mdss_remove(struct platform_device *pdev) msm_mdss_destroy(mdss); } +static const struct msm_mdss_data data_14k = { + .reg_bus_bw = 14000, +}; + static const struct msm_mdss_data data_57k = { .reg_bus_bw = 57000, }; @@ -571,6 +529,7 @@ static const struct of_device_id mdss_dt_match[] = { { .compatible = "qcom,eliza-mdss", .data = &data_57k }, { .compatible = "qcom,glymur-mdss", .data = &data_57k }, { .compatible = "qcom,kaanapali-mdss", .data = &data_57k }, + { .compatible = "qcom,milos-mdss", .data = &data_14k }, { .compatible = "qcom,msm8998-mdss", .data = &data_76k8 }, { .compatible = "qcom,qcm2290-mdss", .data = &data_76k8 }, { .compatible = "qcom,qcs8300-mdss", .data = &data_74k }, diff --git a/drivers/gpu/drm/msm/msm_perf.c b/drivers/gpu/drm/msm/msm_perf.c deleted file mode 100644 index 7768bde6745f..000000000000 --- a/drivers/gpu/drm/msm/msm_perf.c +++ /dev/null @@ -1,235 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0-only -/* - * Copyright (C) 2013 Red Hat - * Author: Rob Clark <robdclark@gmail.com> - */ - -/* For profiling, userspace can: - * - * tail -f /sys/kernel/debug/dri/<minor>/gpu - * - * This will enable performance counters/profiling to track the busy time - * and any gpu specific performance counters that are supported. - */ - -#ifdef CONFIG_DEBUG_FS - -#include <linux/debugfs.h> -#include <linux/uaccess.h> - -#include <drm/drm_file.h> - -#include "msm_drv.h" -#include "msm_gpu.h" - -struct msm_perf_state { - struct drm_device *dev; - - bool open; - int cnt; - struct mutex read_lock; - - char buf[256]; - int buftot, bufpos; - - unsigned long next_jiffies; -}; - -#define SAMPLE_TIME (HZ/4) - -/* wait for next sample time: */ -static int wait_sample(struct msm_perf_state *perf) -{ - unsigned long start_jiffies = jiffies; - - if (time_after(perf->next_jiffies, start_jiffies)) { - unsigned long remaining_jiffies = - perf->next_jiffies - start_jiffies; - int ret = schedule_timeout_interruptible(remaining_jiffies); - if (ret > 0) { - /* interrupted */ - return -ERESTARTSYS; - } - } - perf->next_jiffies += SAMPLE_TIME; - return 0; -} - -static int refill_buf(struct msm_perf_state *perf) -{ - struct msm_drm_private *priv = perf->dev->dev_private; - struct msm_gpu *gpu = priv->gpu; - char *ptr = perf->buf; - int rem = sizeof(perf->buf); - int i, n; - - if ((perf->cnt++ % 32) == 0) { - /* Header line: */ - n = scnprintf(ptr, rem, "%%BUSY"); - ptr += n; - rem -= n; - - for (i = 0; i < gpu->num_perfcntrs; i++) { - const struct msm_gpu_perfcntr *perfcntr = &gpu->perfcntrs[i]; - n = scnprintf(ptr, rem, "\t%s", perfcntr->name); - ptr += n; - rem -= n; - } - } else { - /* Sample line: */ - uint32_t activetime = 0, totaltime = 0; - uint32_t cntrs[5]; - uint32_t val; - int ret; - - /* sleep until next sample time: */ - ret = wait_sample(perf); - if (ret) - return ret; - - ret = msm_gpu_perfcntr_sample(gpu, &activetime, &totaltime, - ARRAY_SIZE(cntrs), cntrs); - if (ret < 0) - return ret; - - val = totaltime ? 1000 * activetime / totaltime : 0; - n = scnprintf(ptr, rem, "%3d.%d%%", val / 10, val % 10); - ptr += n; - rem -= n; - - for (i = 0; i < ret; i++) { - /* cycle counters (I think).. convert to MHz.. */ - val = cntrs[i] / 10000; - n = scnprintf(ptr, rem, "\t%5d.%02d", - val / 100, val % 100); - ptr += n; - rem -= n; - } - } - - n = scnprintf(ptr, rem, "\n"); - ptr += n; - rem -= n; - - perf->bufpos = 0; - perf->buftot = ptr - perf->buf; - - return 0; -} - -static ssize_t perf_read(struct file *file, char __user *buf, - size_t sz, loff_t *ppos) -{ - struct msm_perf_state *perf = file->private_data; - int n = 0, ret = 0; - - mutex_lock(&perf->read_lock); - - if (perf->bufpos >= perf->buftot) { - ret = refill_buf(perf); - if (ret) - goto out; - } - - n = min((int)sz, perf->buftot - perf->bufpos); - if (copy_to_user(buf, &perf->buf[perf->bufpos], n)) { - ret = -EFAULT; - goto out; - } - - perf->bufpos += n; - *ppos += n; - -out: - mutex_unlock(&perf->read_lock); - if (ret) - return ret; - return n; -} - -static int perf_open(struct inode *inode, struct file *file) -{ - struct msm_perf_state *perf = inode->i_private; - struct drm_device *dev = perf->dev; - struct msm_drm_private *priv = dev->dev_private; - struct msm_gpu *gpu = priv->gpu; - int ret = 0; - - if (!gpu) - return -ENODEV; - - mutex_lock(&gpu->lock); - - if (perf->open) { - ret = -EBUSY; - goto out; - } - - file->private_data = perf; - perf->open = true; - perf->cnt = 0; - perf->buftot = 0; - perf->bufpos = 0; - msm_gpu_perfcntr_start(gpu); - perf->next_jiffies = jiffies + SAMPLE_TIME; - -out: - mutex_unlock(&gpu->lock); - return ret; -} - -static int perf_release(struct inode *inode, struct file *file) -{ - struct msm_perf_state *perf = inode->i_private; - struct msm_drm_private *priv = perf->dev->dev_private; - msm_gpu_perfcntr_stop(priv->gpu); - perf->open = false; - return 0; -} - - -static const struct file_operations perf_debugfs_fops = { - .owner = THIS_MODULE, - .open = perf_open, - .read = perf_read, - .release = perf_release, -}; - -int msm_perf_debugfs_init(struct drm_minor *minor) -{ - struct msm_drm_private *priv = minor->dev->dev_private; - struct msm_perf_state *perf; - - /* only create on first minor: */ - if (priv->perf) - return 0; - - perf = kzalloc_obj(*perf); - if (!perf) - return -ENOMEM; - - perf->dev = minor->dev; - - mutex_init(&perf->read_lock); - priv->perf = perf; - - debugfs_create_file("perf", S_IFREG | S_IRUGO, minor->debugfs_root, - perf, &perf_debugfs_fops); - return 0; -} - -void msm_perf_debugfs_cleanup(struct msm_drm_private *priv) -{ - struct msm_perf_state *perf = priv->perf; - - if (!perf) - return; - - priv->perf = NULL; - - mutex_destroy(&perf->read_lock); - - kfree(perf); -} - -#endif diff --git a/drivers/gpu/drm/msm/msm_perfcntr.c b/drivers/gpu/drm/msm/msm_perfcntr.c new file mode 100644 index 000000000000..ce65b1160955 --- /dev/null +++ b/drivers/gpu/drm/msm/msm_perfcntr.c @@ -0,0 +1,670 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Copyright (c) Qualcomm Technologies, Inc. and/or its subsidiaries. + */ + +#include "drm/drm_file.h" +#include "drm/msm_drm.h" + +#include "linux/anon_inodes.h" +#include "linux/gfp_types.h" +#include "linux/poll.h" +#include "linux/slab.h" + +#include "msm_drv.h" +#include "msm_gpu.h" +#include "msm_perfcntr.h" + +#include "adreno/adreno_gpu.h" + +/* space used: */ +#define fifo_count(stream) \ + (CIRC_CNT((stream)->fifo.head, (stream)->fifo.tail, (stream)->fifo_size)) +#define fifo_count_to_end(stream) \ + (CIRC_CNT_TO_END(smp_load_acquire(&(stream)->fifo.head), (stream)->fifo.tail, (stream)->fifo_size)) +/* space available: */ +#define fifo_space(stream) \ + (CIRC_SPACE((stream)->fifo.head, (stream)->fifo.tail, (stream)->fifo_size)) + +static int +msm_perfcntr_resume_locked(struct msm_perfcntr_stream *stream) +{ + if (!stream) + return 0; + + /* Reprogram SEL regs on highest priority rb: */ + struct msm_ringbuffer *ring = stream->gpu->rb[0]; + + queue_work(ring->sched.submit_wq, &stream->sel_work); + + hrtimer_start(&stream->sample_timer, + ns_to_ktime(stream->sample_period_ns), + HRTIMER_MODE_REL_PINNED); + + return 0; +} + +int +msm_perfcntr_resume(struct msm_gpu *gpu) +{ + if (!gpu->perfcntrs) + return 0; + guard(mutex)(&gpu->perfcntr_lock); + return msm_perfcntr_resume_locked(gpu->perfcntrs->stream); +} + +static void +msm_perfcntr_suspend_locked(struct msm_perfcntr_stream *stream) +{ + if (!stream) + return; + + hrtimer_cancel(&stream->sample_timer); + kthread_cancel_work_sync(&stream->sample_work); + + /* + * We can't use cancel_work_sync() here, since sel_work acquires + * gpu->lock which (a) in suspend path can already be held, or + * (b) in release path would invert the order of gpu->lock and + * gpu->perfcntr_lock. Either would cause deadlock. + */ + cancel_work(&stream->sel_work); + + stream->sel_fence = ++stream->gpu->perfcntrs->sel_seqno; + stream->seqno = 0; +} + +void +msm_perfcntr_suspend(struct msm_gpu *gpu) +{ + if (!gpu->perfcntrs) + return; + guard(mutex)(&gpu->perfcntr_lock); + msm_perfcntr_suspend_locked(gpu->perfcntrs->stream); +} + +static int +msm_perfcntrs_stream_release(struct inode *inode, struct file *file) +{ + struct msm_perfcntr_stream *stream = file->private_data; + struct msm_gpu *gpu = stream->gpu; + + scoped_guard (mutex, &gpu->perfcntr_lock) { + struct msm_perfcntr_state *perfcntrs = gpu->perfcntrs; + + msm_perfcntr_suspend_locked(stream); + perfcntrs->stream = NULL; + + /* release previously allocated counters: */ + for (unsigned i = 0; i < gpu->num_perfcntr_groups; i++) + perfcntrs->groups[i]->allocated_counters = 0; + } + + /* + * In the suspend path we use async cancel_work(), to avoid blocking + * on sel_work, which acquires gpu->lock (which could deadlock since + * other paths acquire gpu->lock before perfcntr_lock) or already + * hold gpu->lock. + * + * But since we are freeing the stream, after dropping perfcntr_lock + * we need to block until sel_work is done: + */ + cancel_work_sync(&stream->sel_work); + + kfree(stream->group_idx); + kfree(stream->fifo.buf); + kfree(stream); + + return 0; +} + +static __poll_t +msm_perfcntrs_stream_poll(struct file *file, poll_table *wait) +{ + struct msm_perfcntr_stream *stream = file->private_data; + __poll_t events = 0; + + poll_wait(file, &stream->poll_wq, wait); + + /* Are there samples to read? */ + if (fifo_count(stream) > 0) + events |= EPOLLIN; + + return events; +} + +static ssize_t +msm_perfcntrs_stream_read(struct file *file, char __user *buf, + size_t count, loff_t *ppos) +{ + struct msm_perfcntr_stream *stream = file->private_data; + int ret; + + if (!(file->f_flags & O_NONBLOCK)) { + ret = wait_event_interruptible(stream->poll_wq, + fifo_count(stream) > 0); + if (ret) + return ret; + } + + guard(mutex)(&stream->read_lock); + + struct circ_buf *fifo = &stream->fifo; + const char *fptr = &fifo->buf[fifo->tail]; + + count = min_t(size_t, count, fifo_count_to_end(stream)); + if (!count) + return -EAGAIN; + if (copy_to_user(buf, fptr, count)) + return -EFAULT; + + smp_store_release(&fifo->tail, (fifo->tail + count) & (stream->fifo_size - 1)); + *ppos += count; + + return count; +} + +static const struct file_operations stream_fops = { + .owner = THIS_MODULE, + .release = msm_perfcntrs_stream_release, + .poll = msm_perfcntrs_stream_poll, + .read = msm_perfcntrs_stream_read, +}; + +static void +sel_worker(struct work_struct *w) +{ + struct msm_perfcntr_stream *stream = + container_of(w, typeof(*stream), sel_work); + struct msm_gpu *gpu = stream->gpu; + /* Reprogram SEL regs on highest priority rb: */ + struct msm_ringbuffer *ring = stream->gpu->rb[0]; + + /* + * If in the process of resuming, wait for that. Otherwise sel_worker + * which is enqueued in the resume path can be scheduled before the + * resume completes. + */ + pm_runtime_barrier(&gpu->pdev->dev); + + /* + * sel_work could end up scheduled before suspend, but running + * after. See msm_perfcntr_suspend_locked() + * + * So if we end up running sel_work after the GPU is already + * suspended, just bail. It will be scheduled again after + * the GPU is resumed. + */ + if (!pm_runtime_get_if_active(&gpu->pdev->dev)) + return; + + scoped_guard (mutex, &gpu->lock) { + guard(mutex)(&gpu->perfcntr_lock); + + if (stream == gpu->perfcntrs->stream) { + msm_gpu_hw_init(gpu); + gpu->funcs->perfcntr_configure(gpu, ring, stream); + } + } + + pm_runtime_put_autosuspend(&gpu->pdev->dev); +} + +static void +sample_write(struct msm_perfcntr_stream *stream, int *head, const void *buf, size_t sz) +{ + /* + * FIFO size is power-of-two, and guaranteed to have enough space to + * fit what we are writing. So we should not hit the wrap-around + * point writing things that are power-of-two sized + */ + WARN_ON(CIRC_SPACE_TO_END(*head, stream->fifo.tail, stream->fifo_size) < sz); + + memcpy(&stream->fifo.buf[*head], buf, sz); + + /* Advance head, wrapping around if necessary: */ + *head = (*head + sz) & (stream->fifo_size - 1); +} + +static void +sample_write_u32(struct msm_perfcntr_stream *stream, int *head, uint32_t val) +{ + sample_write(stream, head, &val, sizeof(val)); +} + +static void +sample_write_u64(struct msm_perfcntr_stream *stream, int *head, uint64_t val) +{ + sample_write(stream, head, &val, sizeof(val)); +} + +static void +sample_worker(struct kthread_work *work) +{ + struct msm_perfcntr_stream *stream = + container_of(work, typeof(*stream), sample_work); + struct msm_gpu *gpu = stream->gpu; + struct msm_rbmemptrs *memptrs = gpu->rb[0]->memptrs; + + if (memptrs->perfcntr_fence != stream->sel_fence) + return; + + /* + * Ensure we have enough space to capture a sample period's + * worth of data: + */ + if (stream->period_size > fifo_space(stream)) { + stream->seqno = 0; + return; + } + + /* Inhibit IFPC while accessing registers: */ + if (gpu->funcs->sysprof_setup) + gpu->funcs->sysprof_setup(gpu, true); + + if (gpu->funcs->perfcntr_flush) + gpu->funcs->perfcntr_flush(gpu); + + /* Keep local copy of head to avoid updating fifo until the end: */ + int head = stream->fifo.head; + + /* + * We expect the GPU to be powered at this point, as the timer + * and kthread work are canceled/flushed in the suspend path: + */ + sample_write_u64(stream, &head, + to_adreno_gpu(gpu)->funcs->get_timestamp(gpu)); + sample_write_u32(stream, &head, stream->seqno++); + sample_write_u32(stream, &head, 0); + + for (unsigned i = 0; i < stream->nr_groups; i++) { + unsigned group_idx = msm_perfcntr_group_idx(stream, i); + unsigned base = msm_perfcntr_counter_base(stream, group_idx); + + const struct msm_perfcntr_group *group = + &gpu->perfcntr_groups[group_idx]; + + struct msm_perfcntr_group_state *group_state = + gpu->perfcntrs->groups[group_idx]; + + unsigned nr = group_state->allocated_counters; + for (unsigned j = 0; j < nr; j++) { + const struct msm_perfcntr_counter *counter = + &group->counters[j + base]; + uint64_t val = gpu_read64(gpu, counter->counter_reg_lo); + sample_write_u64(stream, &head, val); + } + } + + /* Re-enable IFPC: */ + if (gpu->funcs->sysprof_setup) + gpu->funcs->sysprof_setup(gpu, false); + + smp_store_release(&stream->fifo.head, head); + wake_up_all(&stream->poll_wq); +} + +static enum hrtimer_restart +sample_timer(struct hrtimer *hrtimer) +{ + struct msm_perfcntr_stream *stream = + container_of(hrtimer, typeof(*stream), sample_timer); + + kthread_queue_work(stream->gpu->worker, &stream->sample_work); + + hrtimer_forward_now(hrtimer, ns_to_ktime(stream->sample_period_ns)); + + return HRTIMER_RESTART; +} + +static int +get_group_idx(struct msm_gpu *gpu, const char *name, size_t len) +{ + for (unsigned i = 0; i < gpu->num_perfcntr_groups; i++) { + const struct msm_perfcntr_group *group = + &gpu->perfcntr_groups[i]; + if (!strncmp(group->name, name, len)) + return i; + } + + return -1; +} + +static int +get_available_counters(struct msm_gpu *gpu, int group_idx, uint32_t flags) +{ + struct msm_perfcntr_state *perfcntrs = gpu->perfcntrs; + + /* + * For local counter reservation, anything that is not used by + * global perfcntr stream is available: + */ + if (!(flags & MSM_PERFCNTR_STREAM)) { + return gpu->perfcntr_groups[group_idx].num_counters - + perfcntrs->groups[group_idx]->allocated_counters; + } + + /* + * For global counter collection, anything that is not reserved by + * one or more contexts is available: + */ + guard(mutex)(&gpu->dev->filelist_mutex); + + unsigned reserved_counters = 0; + struct drm_file *file; + + list_for_each_entry (file, &gpu->dev->filelist, lhead) { + struct msm_context *ctx = file->driver_priv; + + if (!ctx || !ctx->perfctx) + continue; + + unsigned n = ctx->perfctx->reserved_counters[group_idx]; + reserved_counters = max(reserved_counters, n); + } + + return gpu->perfcntr_groups[group_idx].num_counters - reserved_counters; +} + +int +msm_ioctl_perfcntr_config(struct drm_device *dev, void *data, struct drm_file *file) +{ + struct msm_drm_private *priv = dev->dev_private; + const struct drm_msm_perfcntr_config *args = data; + struct msm_context *ctx = file->driver_priv; + struct msm_gpu *gpu = priv->gpu; + int stream_fd = 0; + + if (!gpu || !gpu->num_perfcntr_groups) + return -ENXIO; + + struct msm_perfcntr_state *perfcntrs = gpu->perfcntrs; + + /* + * Validate args that don't require locks/power first: + */ + + if (args->flags & ~MSM_PERFCNTR_FLAGS) + return UERR(EINVAL, dev, "invalid flags"); + + if (args->nr_groups && !args->group_stride) + return UERR(EINVAL, dev, "invalid group_stride"); + + if (args->nr_groups > gpu->num_perfcntr_groups) + return UERR(EINVAL, dev, "too many groups"); + + if (args->nr_groups && !args->groups) + return UERR(EINVAL, dev, "no groups"); + + if (args->flags & MSM_PERFCNTR_STREAM) { + if (!perfmon_capable()) + return UERR(EPERM, dev, "invalid permissions"); + if (!args->nr_groups) + return UERR(EINVAL, dev, "invalid nr_groups"); + if (!args->period) + return UERR(EINVAL, dev, "invalid sampling period"); + if (args->bufsz_shift > const_ilog2(SZ_128M)) + return UERR(EINVAL, dev, "buffer size too big (>128M)"); + } else { + if (args->period) + return UERR(EINVAL, dev, "sampling period not allowed"); + if (args->bufsz_shift) + return UERR(EINVAL, dev, "sample buf size not allowed"); + } + + /* + * To avoid iterating over the groups multiple times, allocate and setup + * both a ctx and global stream object. Only one of the two will be + * kept in the end. + */ + + struct msm_perfcntr_context_state *perfctx __free(kfree) = kzalloc( + struct_size(perfctx, reserved_counters, gpu->num_perfcntr_groups), + GFP_KERNEL); + if (!perfctx) + return -ENOMEM; + + struct msm_perfcntr_stream *stream __free(kfree) = kzalloc_obj(*stream); + if (!stream) + return -ENOMEM; + + uint8_t *nr_counters __free(kfree) = kzalloc_objs(uint8_t, gpu->num_perfcntr_groups); + if (!nr_counters) + return -ENOMEM; + + uint32_t *group_idx __free(kfree) = kzalloc_objs(uint32_t, args->nr_groups); + if (!group_idx) + return -ENOMEM; + + stream->gpu = gpu; + stream->sample_period_ns = args->period; + stream->nr_groups = args->nr_groups; + stream->fifo_size = 1ull << args->bufsz_shift; + + mutex_init(&stream->read_lock); + + guard(mutex)(&gpu->perfcntr_lock); + + if (args->flags & MSM_PERFCNTR_STREAM) { + if (perfcntrs->stream) + return UERR(EBUSY, dev, "perfcntr stream already open"); + } + + size_t bufsz = 16; /* header size includes seqno and 64b timestamp: */ + int ret = 0; + + for (unsigned i = 0; i < args->nr_groups; i++) { + struct drm_msm_perfcntr_group g = {0}; + size_t sz = min_t(size_t, args->group_stride, sizeof(g)); + void __user *userptr = + u64_to_user_ptr(args->groups + (i * args->group_stride)); + + if (copy_from_user(&g, userptr, sz)) + return -EFAULT; + + if (g.pad) + return UERR(EINVAL, dev, "groups[%d]: invalid pad", i); + + int idx = get_group_idx(gpu, g.group_name, sizeof(g.group_name)); + + if (idx < 0) + return UERR(EINVAL, dev, "groups[%d]: unknown group", i); + + if (nr_counters[idx]) + return UERR(EINVAL, dev, "groups[%d]: duplicate group", i); + + if (g.nr_countables > gpu->perfcntr_groups[idx].num_counters) + return UERR(EINVAL, dev, "groups[%d]: too many counters", i); + + if (args->flags & MSM_PERFCNTR_STREAM) { + if (g.nr_countables && !g.countables) + return UERR(EINVAL, dev, "groups[%d]: no countables", i); + } else { + if (g.countables) + return UERR(EINVAL, dev, "groups[%d]: countables should be NULL", i); + } + + int avail_counters = get_available_counters(gpu, idx, args->flags); + if (g.nr_countables > avail_counters) { + /* + * Defer error return until we process all groups, in + * case there are other E2BIG groups: + */ + ret = UERR(E2BIG, dev, "groups[%d]: too few counters available", i); + + if (args->flags & MSM_PERFCNTR_UPDATE) { + /* Let userspace know how many counters are actually avail: */ + g.nr_countables = avail_counters; + if (copy_to_user(userptr, &g, sz)) + return -EFAULT; + } + } + + group_idx[i] = idx; + perfctx->reserved_counters[idx] = g.nr_countables; + + /* +1 to catch duplicate zero sized groups: */ + nr_counters[idx] = g.nr_countables + 1; + + if (args->flags & MSM_PERFCNTR_STREAM) { + size_t sz = sizeof(uint32_t) * g.nr_countables; + void __user *userptr = u64_to_user_ptr(g.countables); + + if (copy_from_user(perfcntrs->groups[idx]->countables, userptr, sz)) + return -EFAULT; + + /* Samples are 64b per countable: */ + bufsz += 2 * sz; + } + } + + if (ret) + return ret; + + if (args->flags & MSM_PERFCNTR_STREAM) { + /* + * Validate requested buffer size is large enough for at least + * a single sample period. + * + * Note the circ_buf implementation needs to be 1 byte larger + * than max it can hold (see CIRC_SPACE()). + */ + if (stream->fifo_size <= bufsz) + return UERR(EINVAL, dev, "required buffer size: %zu", bufsz); + + /* There aren't enough counters to hit this limit: */ + WARN_ON(bufsz > SZ_128M); + + stream->period_size = bufsz; + + void *buf __free(kfree) = kmalloc(stream->fifo_size, GFP_KERNEL); + if (!buf) + return -ENOMEM; + + FD_PREPARE(fdf, O_CLOEXEC, + anon_inode_getfile("[msm_perfcntrs]", &stream_fops, stream, 0)); + if (fdf.err) + return fdf.err; + + INIT_WORK(&stream->sel_work, sel_worker); + kthread_init_work(&stream->sample_work, sample_worker); + init_waitqueue_head(&stream->poll_wq); + hrtimer_setup(&stream->sample_timer, sample_timer, + CLOCK_MONOTONIC, HRTIMER_MODE_REL); + + stream->sel_fence = ++perfcntrs->sel_seqno; + stream->group_idx = no_free_ptr(group_idx); + stream->fifo.buf = no_free_ptr(buf); + + /* commit the allocated counters, subtracting off original +1: */ + for (unsigned i = 0; i < gpu->num_perfcntr_groups; i++) + perfcntrs->groups[i]->allocated_counters = nr_counters[i] - 1; + + perfcntrs->stream = no_free_ptr(stream); + + msm_perfcntr_resume_locked(perfcntrs->stream); + + stream_fd = fd_publish(fdf); + } else { + kfree(ctx->perfctx); + ctx->perfctx = no_free_ptr(perfctx); + } + + return stream_fd; +} + +/** + * msm_perfcntr_group_idx - map idx of perfcntr group to group_idx + * @stream: The global perfcntr stream + * @n: The requested group_idx + * + * The PERFCNTR_CONFIG ioctl requested N counters/countables per perfcntr + * group, but the order of groups is not required to match the order they + * are defined in the perfcntr tables (which is not stable/UABI, only the + * group names are UABI). + * + * But the order samples are returned in the stream should match the + * order they are requested in the PERFCNTR_CONFIG ioctl. This helper + * handles the order remapping. + * + * Returns an index into gpu->perfcntr_groups[] and perfcntrs->groups[]. + */ +uint32_t +msm_perfcntr_group_idx(const struct msm_perfcntr_stream *stream, uint32_t n) +{ + WARN_ON_ONCE(n >= stream->nr_groups); + return stream->group_idx[n]; +} + +/** + * msm_perfcntr_counter_base - get idx of the first counter in group + * @stream: The global perfcntr stream + * @group_idx: the index of the counter group + * + * For global counter collection, counters are allocated from the end + * (last counter) while UMD allocates them from the start (0..N-1). + * Since UMD always allocated them from the start this also minimizes + * the chance of conflict when using old UMD which predates + * PERFCNTR_CONFIG ioctl. + * + * Returns the index of first counter to use. An index into + * msm_perfcntr_group::counters[]. + */ +uint32_t +msm_perfcntr_counter_base(const struct msm_perfcntr_stream *stream, uint32_t group_idx) +{ + struct msm_gpu *gpu = stream->gpu; + struct msm_perfcntr_state *perfcntrs = gpu->perfcntrs; + unsigned num_counters = gpu->perfcntr_groups[group_idx].num_counters; + unsigned allocated_counters = perfcntrs->groups[group_idx]->allocated_counters; + + return num_counters - allocated_counters; +} + +static void +__msm_perfcntr_cleanup(struct msm_gpu *gpu, struct msm_perfcntr_state *perfcntrs) +{ + struct device *dev = &gpu->pdev->dev; + + for (unsigned i = 0; i < gpu->num_perfcntr_groups; i++) + devm_kfree(dev, perfcntrs->groups[i]); + + devm_kfree(dev, perfcntrs); +} + +void +msm_perfcntr_cleanup(struct msm_gpu *gpu) +{ + if (!gpu->perfcntrs) + return; + + __msm_perfcntr_cleanup(gpu, gpu->perfcntrs); + gpu->perfcntrs = NULL; +} + +struct msm_perfcntr_state * +msm_perfcntr_init(struct msm_gpu *gpu) +{ + struct msm_perfcntr_state *perfcntrs; + struct device *dev = &gpu->pdev->dev; + size_t sz; + + sz = struct_size(perfcntrs, groups, gpu->num_perfcntr_groups); + perfcntrs = devm_kzalloc(dev, sz, GFP_KERNEL); + if (!perfcntrs) + return ERR_PTR(-ENOMEM); + + for (unsigned i = 0; i < gpu->num_perfcntr_groups; i++) { + const struct msm_perfcntr_group *group = + &gpu->perfcntr_groups[i]; + + sz = struct_size(perfcntrs->groups[i], countables, group->num_counters); + perfcntrs->groups[i] = devm_kzalloc(dev, sz, GFP_KERNEL); + if (!perfcntrs->groups[i]) { + __msm_perfcntr_cleanup(gpu, perfcntrs); + return ERR_PTR(-ENOMEM); + } + } + + return perfcntrs; +} diff --git a/drivers/gpu/drm/msm/msm_perfcntr.h b/drivers/gpu/drm/msm/msm_perfcntr.h new file mode 100644 index 000000000000..0feeb81c531f --- /dev/null +++ b/drivers/gpu/drm/msm/msm_perfcntr.h @@ -0,0 +1,155 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Copyright (c) Qualcomm Technologies, Inc. and/or its subsidiaries. + */ + +#ifndef __MSM_PERFCNTR_H__ +#define __MSM_PERFCNTR_H__ + +#include "linux/array_size.h" +#include "linux/circ_buf.h" +#include "linux/hrtimer.h" +#include "linux/kthread.h" +#include "linux/wait.h" +#include "linux/workqueue.h" + +#include "adreno_common.xml.h" + +/* + * This is a subset of the tables used by mesa. We don't need to + * enumerate the countables on the kernel side. + */ + +/* Describes a single counter: */ +struct msm_perfcntr_counter { + /* offset of the SELect register to choose what to count: */ + unsigned select_reg; + /* additional SEL regs to enable slice counters (gen8+) */ + unsigned slice_select_regs[2]; + /* offset of the lo/hi 32b to read current counter value: */ + unsigned counter_reg_lo; + unsigned counter_reg_hi; + /* TODO some counters have enable/clear registers */ +}; + +/* Describes an entire counter group: */ +struct msm_perfcntr_group { + const char *name; + enum adreno_pipe pipe; + unsigned num_counters; + const struct msm_perfcntr_counter *counters; +}; + +/** + * struct msm_perfcntr_stream - state for a single open stream fd + */ +struct msm_perfcntr_stream { + /** @gpu: Back-link to the GPU */ + struct msm_gpu *gpu; + + /** @sample_timer: Timer to sample counters */ + struct hrtimer sample_timer; + + /** @poll_wq: Wait queue for waiting for OA data to be available */ + wait_queue_head_t poll_wq; + + /** @sample_period_ns: Sampling period */ + uint64_t sample_period_ns; + + /** @nr_groups: # of counter groups with enabled counters */ + uint32_t nr_groups; + + /** @seqno: counter for collected samples */ + uint32_t seqno; + + /** @sel_fence: Fence for SEL reg programming */ + uint32_t sel_fence; + + /** + * @sel_work: Worker for SEL reg programming + * + * Initial SEL reg programming (as opposed to restoring the SEL + * regs on runpm resume) must run on the same ordered wq as is + * used by drm_sched, to serialize it with GEM_SUBMITs written + * into the same ringbuffer. + */ + struct work_struct sel_work; + + /** + * @sample_work: Worker for collecting samples + */ + struct kthread_work sample_work; + + /** + * @read_lock: + * + * Fifo access is synchronied on the producer side by virtue + * of there being a single timer collecting samples and writing + * into the fifo. It is protected on the consumer side by + * @read_lock. + */ + struct mutex read_lock; + + /** + * @group_idx: array of nr_groups + * + * Maps the order of groups in PERFCNTR_CONFIG ioctl to group idx, + * so that results in the results stream can be ordered to match + * the ioctl call that setup the stream + */ + uint32_t *group_idx; + + /** @fifo: circular buffer for samples */ + struct circ_buf fifo; + + /** @fifo_size: circular buffer size */ + size_t fifo_size; + + /** @period_size: size of data for single sampling period */ + size_t period_size; +}; + +uint32_t msm_perfcntr_group_idx(const struct msm_perfcntr_stream *stream, uint32_t n); +uint32_t msm_perfcntr_counter_base(const struct msm_perfcntr_stream *stream, uint32_t group_idx); + +/** + * struct msm_perfcntr_context_state - per-msm_context counter state + * + * A given counter can either be unused, reserved for global counter + * collection exclusively, or reserved for local per-context counter + * collection inclusively. Multiple contexts can reserve the same + * counter, since SEL reg programming and counter begin/end sampling + * happen locally (within a single GEM_SUBMIT ioctl). + */ +struct msm_perfcntr_context_state { + /** @dummy: Some compilers dislike structs with only a flex array */ + unsigned dummy; + + /** + * @reserved_counters: + * + * The number of reserved counters indexed by perfcntr group. + */ + unsigned reserved_counters[]; +}; + +extern const struct msm_perfcntr_group a6xx_perfcntr_groups[]; +extern const unsigned a6xx_num_perfcntr_groups; + +extern const struct msm_perfcntr_group a7xx_perfcntr_groups[]; +extern const unsigned a7xx_num_perfcntr_groups; + +extern const struct msm_perfcntr_group a8xx_perfcntr_groups[]; +extern const unsigned a8xx_num_perfcntr_groups; + +#define GROUP(_name, _pipe, _counters, _countables) { \ + .name = _name, \ + .pipe = _pipe, \ + .num_counters = ARRAY_SIZE(_counters), \ + .counters = _counters, \ + } + +#define fd_perfcntr_counter msm_perfcntr_counter +#define fd_perfcntr_group msm_perfcntr_group + +#endif /* __MSM_PERFCNTR_H__ */ diff --git a/drivers/gpu/drm/msm/msm_ringbuffer.h b/drivers/gpu/drm/msm/msm_ringbuffer.h index d1e49f701c81..28ca8c9f7463 100644 --- a/drivers/gpu/drm/msm/msm_ringbuffer.h +++ b/drivers/gpu/drm/msm/msm_ringbuffer.h @@ -37,6 +37,8 @@ struct msm_rbmemptrs { volatile struct msm_gpu_submit_stats stats[MSM_GPU_SUBMIT_STATS_COUNT]; volatile u64 ttbr0; volatile u32 context_idr; + + volatile u32 perfcntr_fence; }; struct msm_cp_state { diff --git a/drivers/gpu/drm/msm/msm_submitqueue.c b/drivers/gpu/drm/msm/msm_submitqueue.c index 2598d674a99d..1a5a77b28016 100644 --- a/drivers/gpu/drm/msm/msm_submitqueue.c +++ b/drivers/gpu/drm/msm/msm_submitqueue.c @@ -42,7 +42,7 @@ int msm_context_set_sysprof(struct msm_context *ctx, struct msm_gpu *gpu, int sy /* Some gpu families require additional setup for sysprof */ if (gpu->funcs->sysprof_setup) - gpu->funcs->sysprof_setup(gpu); + gpu->funcs->sysprof_setup(gpu, false); ctx->sysprof = sysprof; @@ -66,6 +66,7 @@ void __msm_context_destroy(struct kref *kref) drm_gpuvm_put(ctx->vm); kfree(ctx->comm); kfree(ctx->cmdline); + kfree(ctx->perfctx); kfree(ctx); } diff --git a/drivers/gpu/drm/msm/registers/adreno/a2xx_perfcntrs.json b/drivers/gpu/drm/msm/registers/adreno/a2xx_perfcntrs.json new file mode 100644 index 000000000000..8095345ffd8e --- /dev/null +++ b/drivers/gpu/drm/msm/registers/adreno/a2xx_perfcntrs.json @@ -0,0 +1,109 @@ +{ + "chip": "A2XX", + "groups": [ + { + "name": "CP", + "num": 1, + "select": "CP_PERFCOUNTER_SELECT", + "counter_lo": "CP_PERFCOUNTER_LO", + "counter_hi": "CP_PERFCOUNTER_HI", + "countable_type": "a2xx_cp_perfcount_sel" + }, + { + "name": "PA_SU", + "num": 4, + "select": "PA_SU_PERFCOUNTER{}_SELECT", + "counter_lo": "PA_SU_PERFCOUNTER{}_LOW", + "counter_hi": "PA_SU_PERFCOUNTER{}_HI", + "countable_type": "a2xx_su_perfcnt_select" + }, + { + "name": "PA_SC", + "num": 1, + "select": "PA_SC_PERFCOUNTER{}_SELECT", + "counter_lo": "PA_SC_PERFCOUNTER{}_LOW", + "counter_hi": "PA_SC_PERFCOUNTER{}_HI", + "countable_type": "a2xx_sc_perfcnt_select" + }, + { + "name": "VGT", + "num": 4, + "select": "VGT_PERFCOUNTER{}_SELECT", + "counter_lo": "VGT_PERFCOUNTER{}_LOW", + "counter_hi": "VGT_PERFCOUNTER{}_HI", + "countable_type": "a2xx_vgt_perfcount_select" + }, + { + "name": "TCR", + "num": 2, + "select": "TCR_PERFCOUNTER{}_SELECT", + "counter_lo": "TCR_PERFCOUNTER{}_LOW", + "counter_hi": "TCR_PERFCOUNTER{}_HI", + "countable_type": "a2xx_tcr_perfcount_select" + }, + { + "name": "TP0", + "num": 2, + "select": "TP0_PERFCOUNTER{}_SELECT", + "counter_lo": "TP0_PERFCOUNTER{}_LOW", + "counter_hi": "TP0_PERFCOUNTER{}_HI", + "countable_type": "a2xx_tp_perfcount_select" + }, + { + "name": "TCM", + "num": 2, + "select": "TCM_PERFCOUNTER{}_SELECT", + "counter_lo": "TCM_PERFCOUNTER{}_LOW", + "counter_hi": "TCM_PERFCOUNTER{}_HI", + "countable_type": "a2xx_tcm_perfcount_select" + }, + { + "name": "TCF", + "num": 12, + "select": "TCF_PERFCOUNTER{}_SELECT", + "counter_lo": "TCF_PERFCOUNTER{}_LOW", + "counter_hi": "TCF_PERFCOUNTER{}_HI", + "countable_type": "a2xx_tcf_perfcount_select" + }, + { + "name": "SQ", + "num": 4, + "select": "SQ_PERFCOUNTER{}_SELECT", + "counter_lo": "SQ_PERFCOUNTER{}_LOW", + "counter_hi": "SQ_PERFCOUNTER{}_HI", + "countable_type": "a2xx_sq_perfcnt_select" + }, + { + "name": "SX", + "num": 1, + "select": "SX_PERFCOUNTER{}_SELECT", + "counter_lo": "SX_PERFCOUNTER{}_LOW", + "counter_hi": "SX_PERFCOUNTER{}_HI", + "countable_type": "a2xx_sx_perfcnt_select" + }, + { + "name": "MH", + "num": 2, + "select": "MH_PERFCOUNTER{}_SELECT", + "counter_lo": "MH_PERFCOUNTER{}_LOW", + "counter_hi": "MH_PERFCOUNTER{}_HI", + "countable_type": "a2xx_mh_perfcnt_select" + }, + { + "name": "RBBM", + "num": 2, + "select": "RBBM_PERFCOUNTER{}_SELECT", + "counter_lo": "RBBM_PERFCOUNTER{}_LO", + "counter_hi": "RBBM_PERFCOUNTER{}_HI", + "countable_type": "a2xx_rbbm_perfcount1_sel" + }, + { + "name": "RB", + "num": 4, + "select": "RB_PERFCOUNTER{}_SELECT", + "counter_lo": "RB_PERFCOUNTER{}_LOW", + "counter_hi": "RB_PERFCOUNTER{}_HI", + "countable_type": "a2xx_rb_perfcnt_select" + } + ] +} diff --git a/drivers/gpu/drm/msm/registers/adreno/a3xx.xml b/drivers/gpu/drm/msm/registers/adreno/a3xx.xml index 6717abc0a897..09c9606fc3e1 100644 --- a/drivers/gpu/drm/msm/registers/adreno/a3xx.xml +++ b/drivers/gpu/drm/msm/registers/adreno/a3xx.xml @@ -1330,11 +1330,7 @@ xsi:schemaLocation="https://gitlab.freedesktop.org/freedreno/ rules-fd.xsd"> </reg32> <reg32 offset="0x22c5" name="SP_VS_CTRL_REG1"> <bitfield name="CONSTLENGTH" low="0" high="9" type="uint"/> - <!-- - not sure about full vs half const.. I can't get blob generate - something with a mediump/lowp uniform. - --> - <bitfield name="CONSTFOOTPRINT" low="10" high="19" type="uint"/> + <bitfield name="CONSTMAXID" low="10" high="19" type="uint"/> <bitfield name="INITIALOUTSTANDING" low="24" high="30" type="uint"/> </reg32> <reg32 offset="0x22c6" name="SP_VS_PARAM_REG"> @@ -1420,7 +1416,7 @@ xsi:schemaLocation="https://gitlab.freedesktop.org/freedreno/ rules-fd.xsd"> </reg32> <reg32 offset="0x22e1" name="SP_FS_CTRL_REG1"> <bitfield name="CONSTLENGTH" low="0" high="9" type="uint"/> - <bitfield name="CONSTFOOTPRINT" low="10" high="19" type="uint"/> + <bitfield name="CONSTMAXID" low="10" high="19" type="uint"/> <bitfield name="INITIALOUTSTANDING" low="20" high="23" type="uint"/> <bitfield name="HALFPRECVAROFFSET" low="24" high="30" type="uint"/> </reg32> diff --git a/drivers/gpu/drm/msm/registers/adreno/a5xx.xml b/drivers/gpu/drm/msm/registers/adreno/a5xx.xml index bd8df5945166..4af76b3750f7 100644 --- a/drivers/gpu/drm/msm/registers/adreno/a5xx.xml +++ b/drivers/gpu/drm/msm/registers/adreno/a5xx.xml @@ -1418,8 +1418,7 @@ xsi:schemaLocation="https://gitlab.freedesktop.org/freedreno/ rules-fd.xsd"> <bitfield name="HEIGHT" low="9" high="16" shr="5" type="uint"/> <!-- b17 maybe BYPASS like RB_CNTL, but reg not written for bypass --> </reg32> - <reg32 offset="0x0bc3" name="VSC_SIZE_ADDRESS_LO"/> - <reg32 offset="0x0bc4" name="VSC_SIZE_ADDRESS_HI"/> + <reg64 offset="0x0bc3" name="VSC_SIZE_ADDRESS" type="waddress"/> <reg32 offset="0x0bc5" name="UNKNOWN_0BC5"/> <!-- always 00000000? --> <reg32 offset="0x0bc6" name="UNKNOWN_0BC6"/> <!-- always 00000000? --> <array offset="0x0bd0" name="VSC_PIPE_CONFIG" stride="1" length="16"> @@ -1498,12 +1497,10 @@ xsi:schemaLocation="https://gitlab.freedesktop.org/freedreno/ rules-fd.xsd"> </reg32> <reg32 offset="0x0d01" name="PC_ADDR_MODE_CNTL" type="a5xx_address_mode"/> <reg32 offset="0x0d02" name="PC_MODE_CNTL"/> <!-- always 0000001f? --> - <reg32 offset="0x0d04" name="PC_INDEX_BUF_LO"/> - <reg32 offset="0x0d05" name="PC_INDEX_BUF_HI"/> + <reg64 offset="0x0d04" name="PC_INDEX_BUF" type="waddress"/> <reg32 offset="0x0d06" name="PC_START_INDEX"/> <reg32 offset="0x0d07" name="PC_MAX_INDEX"/> - <reg32 offset="0x0d08" name="PC_TESSFACTOR_ADDR_LO"/> - <reg32 offset="0x0d09" name="PC_TESSFACTOR_ADDR_HI"/> + <reg64 offset="0x0d08" name="PC_TESSFACTOR_ADDR" type="waddress"/> <reg32 offset="0x0d10" name="PC_PERFCTR_PC_SEL_0" type="a5xx_pc_perfcounter_select"/> <reg32 offset="0x0d11" name="PC_PERFCTR_PC_SEL_1" type="a5xx_pc_perfcounter_select"/> <reg32 offset="0x0d12" name="PC_PERFCTR_PC_SEL_2" type="a5xx_pc_perfcounter_select"/> @@ -1555,20 +1552,14 @@ xsi:schemaLocation="https://gitlab.freedesktop.org/freedreno/ rules-fd.xsd"> <reg32 offset="0x0e80" name="UCHE_ADDR_MODE_CNTL" type="a5xx_address_mode"/> <reg32 offset="0x0e81" name="UCHE_MODE_CNTL"/> <reg32 offset="0x0e82" name="UCHE_SVM_CNTL"/> - <reg32 offset="0x0e87" name="UCHE_WRITE_THRU_BASE_LO"/> - <reg32 offset="0x0e88" name="UCHE_WRITE_THRU_BASE_HI"/> - <reg32 offset="0x0e89" name="UCHE_TRAP_BASE_LO"/> - <reg32 offset="0x0e8a" name="UCHE_TRAP_BASE_HI"/> - <reg32 offset="0x0e8b" name="UCHE_GMEM_RANGE_MIN_LO"/> - <reg32 offset="0x0e8c" name="UCHE_GMEM_RANGE_MIN_HI"/> - <reg32 offset="0x0e8d" name="UCHE_GMEM_RANGE_MAX_LO"/> - <reg32 offset="0x0e8e" name="UCHE_GMEM_RANGE_MAX_HI"/> + <reg64 offset="0x0e87" name="UCHE_WRITE_THRU_BASE" type="waddress"/> + <reg64 offset="0x0e89" name="UCHE_TRAP_BASE" type="waddress"/> + <reg64 offset="0x0e8b" name="UCHE_GMEM_RANGE_MIN" type="waddress"/> + <reg64 offset="0x0e8d" name="UCHE_GMEM_RANGE_MAX" type="waddress"/> <reg32 offset="0x0e8f" name="UCHE_DBG_ECO_CNTL_2"/> <reg32 offset="0x0e90" name="UCHE_DBG_ECO_CNTL"/> - <reg32 offset="0x0e91" name="UCHE_CACHE_INVALIDATE_MIN_LO"/> - <reg32 offset="0x0e92" name="UCHE_CACHE_INVALIDATE_MIN_HI"/> - <reg32 offset="0x0e93" name="UCHE_CACHE_INVALIDATE_MAX_LO"/> - <reg32 offset="0x0e94" name="UCHE_CACHE_INVALIDATE_MAX_HI"/> + <reg64 offset="0x0e91" name="UCHE_CACHE_INVALIDATE_MIN" type="address"/> + <reg64 offset="0x0e93" name="UCHE_CACHE_INVALIDATE_MAX" type="address"/> <reg32 offset="0x0e95" name="UCHE_CACHE_INVALIDATE"/> <reg32 offset="0x0e96" name="UCHE_CACHE_WAYS"/> <reg32 offset="0x0ea0" name="UCHE_PERFCTR_UCHE_SEL_0" type="a5xx_uche_perfcounter_select"/> @@ -1583,8 +1574,7 @@ xsi:schemaLocation="https://gitlab.freedesktop.org/freedreno/ rules-fd.xsd"> <reg32 offset="0x0ea9" name="UCHE_POWERCTR_UCHE_SEL_1"/> <reg32 offset="0x0eaa" name="UCHE_POWERCTR_UCHE_SEL_2"/> <reg32 offset="0x0eab" name="UCHE_POWERCTR_UCHE_SEL_3"/> - <reg32 offset="0x0eb1" name="UCHE_TRAP_LOG_LO"/> - <reg32 offset="0x0eb2" name="UCHE_TRAP_LOG_HI"/> + <reg64 offset="0x0eb1" name="UCHE_TRAP_LOG" type="waddress"/> <reg32 offset="0x0ec0" name="SP_DBG_ECO_CNTL"/> <reg32 offset="0x0ec1" name="SP_ADDR_MODE_CNTL" type="a5xx_address_mode"/> @@ -1923,8 +1913,7 @@ xsi:schemaLocation="https://gitlab.freedesktop.org/freedreno/ rules-fd.xsd"> invalidates the LRZ buffer? (Or just the covered positions? --> </reg32> - <reg32 offset="0xe101" name="GRAS_LRZ_BUFFER_BASE_LO"/> - <reg32 offset="0xe102" name="GRAS_LRZ_BUFFER_BASE_HI"/> + <reg64 offset="0xe101" name="GRAS_LRZ_BUFFER_BASE" type="waddress"/> <!-- lzr pitch is depth pitch (in pixels) / 8 (aligned to 32).. --> @@ -1933,8 +1922,7 @@ xsi:schemaLocation="https://gitlab.freedesktop.org/freedreno/ rules-fd.xsd"> is also divided by 8 (ie. covers 8x8 pixels) </doc> <reg32 offset="0xe103" name="GRAS_LRZ_BUFFER_PITCH" shr="5" type="uint"/> - <reg32 offset="0xe104" name="GRAS_LRZ_FAST_CLEAR_BUFFER_BASE_LO"/> - <reg32 offset="0xe105" name="GRAS_LRZ_FAST_CLEAR_BUFFER_BASE_HI"/> + <reg64 offset="0xe104" name="GRAS_LRZ_FAST_CLEAR_BUFFER_BASE" type="waddress"/> <reg32 offset="0xe140" name="RB_CNTL"> <bitfield name="WIDTH" low="0" high="7" shr="5" type="uint"/> @@ -2035,8 +2023,7 @@ bit 7 for RECTLIST (clear) when z32s8 (used for clear of depth32? not set --> <reg32 offset="0x3" name="PITCH" shr="6" type="uint"/> <reg32 offset="0x4" name="ARRAY_PITCH" shr="6" type="uint"/> - <reg32 offset="0x5" name="BASE_LO"/> - <reg32 offset="0x6" name="BASE_HI"/> + <reg64 offset="0x5" name="BASE" type="waddress"/> </array> <reg32 offset="0xe1a0" name="RB_BLEND_RED"> <bitfield name="UINT" low="0" high="7" type="hex"/> @@ -2089,8 +2076,7 @@ bit 7 for RECTLIST (clear) when z32s8 (used for clear of depth32? not set <reg32 offset="0xe1b2" name="RB_DEPTH_BUFFER_INFO"> <bitfield name="DEPTH_FORMAT" low="0" high="2" type="a5xx_depth_format"/> </reg32> - <reg32 offset="0xe1b3" name="RB_DEPTH_BUFFER_BASE_LO"/> - <reg32 offset="0xe1b4" name="RB_DEPTH_BUFFER_BASE_HI"/> + <reg64 offset="0xe1b3" name="RB_DEPTH_BUFFER_BASE" type="waddress"/> <reg32 offset="0xe1b5" name="RB_DEPTH_BUFFER_PITCH" shr="6" type="uint"> <doc>stride of depth/stencil buffer</doc> </reg32> @@ -2119,8 +2105,7 @@ bit 7 for RECTLIST (clear) when z32s8 (used for clear of depth32? not set <reg32 offset="0xe1c1" name="RB_STENCIL_INFO"> <bitfield name="SEPARATE_STENCIL" pos="0" type="boolean"/> </reg32> - <reg32 offset="0xe1c2" name="RB_STENCIL_BASE_LO"/> - <reg32 offset="0xe1c3" name="RB_STENCIL_BASE_HI"/> + <reg64 offset="0xe1c2" name="RB_STENCIL_BASE" type="waddress"/> <reg32 offset="0xe1c4" name="RB_STENCIL_PITCH" shr="6" type="uint"/> <reg32 offset="0xe1c5" name="RB_STENCIL_ARRAY_PITCH" shr="6" type="uint"/> <reg32 offset="0xe1c6" name="RB_STENCILREFMASK" type="adreno_rb_stencilrefmask"/> @@ -2163,8 +2148,7 @@ bit 7 for RECTLIST (clear) when z32s8 (used for clear of depth32? not set also for gmem->mem preserving tiling --> </reg32> - <reg32 offset="0xe214" name="RB_BLIT_DST_LO"/> - <reg32 offset="0xe215" name="RB_BLIT_DST_HI"/> + <reg64 offset="0xe214" name="RB_BLIT_DST" type="waddress"/> <reg32 offset="0xe216" name="RB_BLIT_DST_PITCH" shr="6" type="uint"/> <!-- array-pitch is size of layer --> <reg32 offset="0xe217" name="RB_BLIT_DST_ARRAY_PITCH" shr="6" type="uint"/> @@ -2235,25 +2219,22 @@ bit 7 for RECTLIST (clear) when z32s8 (used for clear of depth32? not set </doc> - <reg32 offset="0xe240" name="RB_DEPTH_FLAG_BUFFER_BASE_LO"/> - <reg32 offset="0xe241" name="RB_DEPTH_FLAG_BUFFER_BASE_HI"/> + <reg64 offset="0xe240" name="RB_DEPTH_FLAG_BUFFER_BASE" type="waddress"/> <reg32 offset="0xe242" name="RB_DEPTH_FLAG_BUFFER_PITCH"> </reg32> <array offset="0xe243" name="RB_MRT_FLAG_BUFFER" stride="4" length="8"> - <reg32 offset="0" name="ADDR_LO"/> + <reg64 offset="0" name="ADDR" type="waddress"/> <reg32 offset="1" name="ADDR_HI"/> <reg32 offset="2" name="PITCH" shr="6" type="uint"/> <!-- array-pitch is size of layer --> <reg32 offset="3" name="ARRAY_PITCH" shr="6" type="uint"/> </array> - <reg32 offset="0xe263" name="RB_BLIT_FLAG_DST_LO"/> - <reg32 offset="0xe264" name="RB_BLIT_FLAG_DST_HI"/> + <reg64 offset="0xe263" name="RB_BLIT_FLAG_DST" type="waddress"/> <reg32 offset="0xe265" name="RB_BLIT_FLAG_DST_PITCH" shr="6" type="uint"/> <!-- array-pitch is size of layer --> <reg32 offset="0xe266" name="RB_BLIT_FLAG_DST_ARRAY_PITCH" shr="6" type="uint"/> - <reg32 offset="0xe267" name="RB_SAMPLE_COUNT_ADDR_LO"/> - <reg32 offset="0xe268" name="RB_SAMPLE_COUNT_ADDR_HI"/> + <reg64 offset="0xe267" name="RB_SAMPLE_COUNT_ADDR" type="waddress"/> <reg32 offset="0xe280" name="VPC_CNTL_0"> <doc> @@ -2357,13 +2338,11 @@ bit 7 for RECTLIST (clear) when z32s8 (used for clear of depth32? not set <bitfield name="B_EN" pos="23" type="boolean"/> </reg32> <array offset="0xe2a7" name="VPC_SO" stride="7" length="4"> - <reg32 offset="0" name="BUFFER_BASE_LO"/> - <reg32 offset="1" name="BUFFER_BASE_HI"/> + <reg64 offset="0" name="BUFFER_BASE" type="waddress"/> <reg32 offset="2" name="BUFFER_SIZE"/> <reg32 offset="3" name="NCOMP"/> <!-- component count --> <reg32 offset="4" name="BUFFER_OFFSET"/> - <reg32 offset="5" name="FLUSH_BASE_LO"/> - <reg32 offset="6" name="FLUSH_BASE_HI"/> + <reg64 offset="5" name="FLUSH_BASE" type="waddress"/> </array> <reg32 offset="0xe384" name="PC_PRIMITIVE_CNTL"> @@ -2423,8 +2402,7 @@ bit 7 for RECTLIST (clear) when z32s8 (used for clear of depth32? not set <reg32 offset="0xe408" name="VFD_INDEX_OFFSET"/> <reg32 offset="0xe409" name="VFD_INSTANCE_START_OFFSET"/> <array offset="0xe40a" name="VFD_FETCH" stride="4" length="32"> - <reg32 offset="0x0" name="BASE_LO"/> - <reg32 offset="0x1" name="BASE_HI"/> + <reg64 offset="0x0" name="BASE" type="address"/> <reg32 offset="0x2" name="SIZE" type="uint"/> <reg32 offset="0x3" name="STRIDE" type="uint"/> </array> @@ -2475,7 +2453,7 @@ bit 7 for RECTLIST (clear) when z32s8 (used for clear of depth32? not set <bitfield name="VARYING" pos="16" type="boolean"/> <bitfield name="PIXLODENABLE" pos="20" type="boolean"/> <!-- seems to be nesting level for flow control:.. --> - <bitfield name="BRANCHSTACK" low="25" high="31" type="uint"/> + <bitfield name="BRANCHSTACK" low="24" high="31" type="uint"/> </bitset> <!-- assuming things appear in same relative order as a4xx: --> <!-- duplicated exactly w/ corresponding HLSQ_ regs starting at 0xe78b.. --> @@ -2516,8 +2494,7 @@ bit 7 for RECTLIST (clear) when z32s8 (used for clear of depth32? not set </reg32> </array> <reg32 offset="0xe5ab" name="UNKNOWN_E5AB"/> - <reg32 offset="0xe5ac" name="SP_VS_OBJ_START_LO"/> - <reg32 offset="0xe5ad" name="SP_VS_OBJ_START_HI"/> + <reg64 offset="0xe5ac" name="SP_VS_OBJ_START" type="address"/> <bitset name="a5xx_sp_xs_pvt_mem_param" inline="yes"> <bitfield name="MEMSIZEPERITEM" low="0" high="7" shr="9"> @@ -2538,8 +2515,7 @@ bit 7 for RECTLIST (clear) when z32s8 (used for clear of depth32? not set <reg32 offset="0xe5b1" name="SP_VS_PVT_MEM_SIZE" type="a5xx_sp_xs_pvt_mem_size"/> <reg32 offset="0xe5c0" name="SP_FS_CTRL_REG0" type="a5xx_sp_xs_ctrl_reg0"/> <reg32 offset="0xe5c2" name="UNKNOWN_E5C2"/> - <reg32 offset="0xe5c3" name="SP_FS_OBJ_START_LO"/> - <reg32 offset="0xe5c4" name="SP_FS_OBJ_START_HI"/> + <reg64 offset="0xe5c3" name="SP_FS_OBJ_START" type="address"/> <reg32 offset="0xe5c5" name="SP_FS_PVT_MEM_PARAM" type="a5xx_sp_xs_pvt_mem_param"/> <reg64 offset="0xe5c6" name="SP_FS_PVT_MEM_ADDR" type="waddress" align="32"/> <reg32 offset="0xe5c8" name="SP_FS_PVT_MEM_SIZE" type="a5xx_sp_xs_pvt_mem_size"/> @@ -2577,8 +2553,7 @@ bit 7 for RECTLIST (clear) when z32s8 (used for clear of depth32? not set <reg32 offset="0xe5db" name="UNKNOWN_E5DB"/> <reg32 offset="0xe5f0" name="SP_CS_CTRL_REG0" type="a5xx_sp_xs_ctrl_reg0"/> <reg32 offset="0xe5f2" name="UNKNOWN_E5F2"/> - <reg32 offset="0xe5f3" name="SP_CS_OBJ_START_LO"/> - <reg32 offset="0xe5f4" name="SP_CS_OBJ_START_HI"/> + <reg64 offset="0xe5f3" name="SP_CS_OBJ_START" type="address"/> <reg32 offset="0xe5f5" name="SP_CS_PVT_MEM_PARAM" type="a5xx_sp_xs_pvt_mem_param"/> <reg64 offset="0xe5f6" name="SP_CS_PVT_MEM_ADDR" type="waddress" align="32"/> <reg32 offset="0xe5f8" name="SP_CS_PVT_MEM_SIZE" type="a5xx_sp_xs_pvt_mem_size"/> @@ -2587,22 +2562,19 @@ bit 7 for RECTLIST (clear) when z32s8 (used for clear of depth32? not set <reg32 offset="0xe600" name="SP_HS_CTRL_REG0" type="a5xx_sp_xs_ctrl_reg0"/> <reg32 offset="0xe602" name="UNKNOWN_E602"/> - <reg32 offset="0xe603" name="SP_HS_OBJ_START_LO"/> - <reg32 offset="0xe604" name="SP_HS_OBJ_START_HI"/> + <reg64 offset="0xe603" name="SP_HS_OBJ_START" type="address"/> <reg32 offset="0xe605" name="SP_HS_PVT_MEM_PARAM" type="a5xx_sp_xs_pvt_mem_param"/> <reg64 offset="0xe606" name="SP_HS_PVT_MEM_ADDR" type="waddress" align="32"/> <reg32 offset="0xe608" name="SP_HS_PVT_MEM_SIZE" type="a5xx_sp_xs_pvt_mem_size"/> <reg32 offset="0xe610" name="SP_DS_CTRL_REG0" type="a5xx_sp_xs_ctrl_reg0"/> <reg32 offset="0xe62b" name="UNKNOWN_E62B"/> - <reg32 offset="0xe62c" name="SP_DS_OBJ_START_LO"/> - <reg32 offset="0xe62d" name="SP_DS_OBJ_START_HI"/> + <reg64 offset="0xe62c" name="SP_DS_OBJ_START" type="address"/> <reg32 offset="0xe62e" name="SP_DS_PVT_MEM_PARAM" type="a5xx_sp_xs_pvt_mem_param"/> <reg64 offset="0xe62f" name="SP_DS_PVT_MEM_ADDR" type="waddress" align="32"/> <reg32 offset="0xe631" name="SP_DS_PVT_MEM_SIZE" type="a5xx_sp_xs_pvt_mem_size"/> <reg32 offset="0xe640" name="SP_GS_CTRL_REG0" type="a5xx_sp_xs_ctrl_reg0"/> <reg32 offset="0xe65b" name="UNKNOWN_E65B"/> - <reg32 offset="0xe65c" name="SP_GS_OBJ_START_LO"/> - <reg32 offset="0xe65d" name="SP_GS_OBJ_START_HI"/> + <reg64 offset="0xe65c" name="SP_GS_OBJ_START" type="address"/> <reg32 offset="0xe65e" name="SP_GS_PVT_MEM_PARAM" type="a5xx_sp_xs_pvt_mem_param"/> <reg64 offset="0xe65f" name="SP_GS_PVT_MEM_ADDR" type="waddress" align="32"/> <reg32 offset="0xe661" name="SP_GS_PVT_MEM_SIZE" type="a5xx_sp_xs_pvt_mem_size"/> @@ -2615,8 +2587,7 @@ bit 7 for RECTLIST (clear) when z32s8 (used for clear of depth32? not set <bitfield name="MSAA_DISABLE" pos="2" type="boolean"/> </reg32> <!-- either blob is doing it wrong, or this is not per-stage anymore: --> - <reg32 offset="0xe706" name="TPL1_TP_BORDER_COLOR_BASE_ADDR_LO"/> - <reg32 offset="0xe707" name="TPL1_TP_BORDER_COLOR_BASE_ADDR_HI"/> + <reg64 offset="0xe706" name="TPL1_TP_BORDER_COLOR_BASE_ADDR" type="address"/> <!-- so these have the same info that is normally in the CP_LOAD_STATE @@ -2628,35 +2599,23 @@ bit 7 for RECTLIST (clear) when z32s8 (used for clear of depth32? not set <reg32 offset="0xe702" name="TPL1_DS_TEX_COUNT" type="uint"/> <reg32 offset="0xe703" name="TPL1_GS_TEX_COUNT" type="uint"/> - <reg32 offset="0xe722" name="TPL1_VS_TEX_SAMP_LO"/> - <reg32 offset="0xe723" name="TPL1_VS_TEX_SAMP_HI"/> - <reg32 offset="0xe724" name="TPL1_HS_TEX_SAMP_LO"/> - <reg32 offset="0xe725" name="TPL1_HS_TEX_SAMP_HI"/> - <reg32 offset="0xe726" name="TPL1_DS_TEX_SAMP_LO"/> - <reg32 offset="0xe727" name="TPL1_DS_TEX_SAMP_HI"/> - <reg32 offset="0xe728" name="TPL1_GS_TEX_SAMP_LO"/> - <reg32 offset="0xe729" name="TPL1_GS_TEX_SAMP_HI"/> - - <reg32 offset="0xe72a" name="TPL1_VS_TEX_CONST_LO"/> - <reg32 offset="0xe72b" name="TPL1_VS_TEX_CONST_HI"/> - <reg32 offset="0xe72c" name="TPL1_HS_TEX_CONST_LO"/> - <reg32 offset="0xe72d" name="TPL1_HS_TEX_CONST_HI"/> - <reg32 offset="0xe72e" name="TPL1_DS_TEX_CONST_LO"/> - <reg32 offset="0xe72f" name="TPL1_DS_TEX_CONST_HI"/> - <reg32 offset="0xe730" name="TPL1_GS_TEX_CONST_LO"/> - <reg32 offset="0xe731" name="TPL1_GS_TEX_CONST_HI"/> + <reg64 offset="0xe722" name="TPL1_VS_TEX_SAMP" type="address"/> + <reg64 offset="0xe724" name="TPL1_HS_TEX_SAMP" type="address"/> + <reg64 offset="0xe726" name="TPL1_DS_TEX_SAMP" type="address"/> + <reg64 offset="0xe728" name="TPL1_GS_TEX_SAMP" type="address"/> + + <reg64 offset="0xe72a" name="TPL1_VS_TEX_CONST" type="address"/> + <reg64 offset="0xe72c" name="TPL1_HS_TEX_CONST" type="address"/> + <reg64 offset="0xe72e" name="TPL1_DS_TEX_CONST" type="address"/> + <reg64 offset="0xe730" name="TPL1_GS_TEX_CONST" type="address"/> <reg32 offset="0xe750" name="TPL1_FS_TEX_COUNT" type="uint"/> <reg32 offset="0xe751" name="TPL1_CS_TEX_COUNT" type="uint"/> - <reg32 offset="0xe75a" name="TPL1_FS_TEX_SAMP_LO"/> - <reg32 offset="0xe75b" name="TPL1_FS_TEX_SAMP_HI"/> - <reg32 offset="0xe75c" name="TPL1_CS_TEX_SAMP_LO"/> - <reg32 offset="0xe75d" name="TPL1_CS_TEX_SAMP_HI"/> - <reg32 offset="0xe75e" name="TPL1_FS_TEX_CONST_LO"/> - <reg32 offset="0xe75f" name="TPL1_FS_TEX_CONST_HI"/> - <reg32 offset="0xe760" name="TPL1_CS_TEX_CONST_LO"/> - <reg32 offset="0xe761" name="TPL1_CS_TEX_CONST_HI"/> + <reg64 offset="0xe75a" name="TPL1_FS_TEX_SAMP" type="address"/> + <reg64 offset="0xe75c" name="TPL1_CS_TEX_SAMP" type="address"/> + <reg64 offset="0xe75e" name="TPL1_FS_TEX_CONST" type="address"/> + <reg64 offset="0xe760" name="TPL1_CS_TEX_CONST" type="address"/> <reg32 offset="0xe764" name="TPL1_TP_FS_ROTATION_CNTL"/> @@ -2809,25 +2768,21 @@ bit 7 for RECTLIST (clear) when z32s8 (used for clear of depth32? not set </bitset> <reg32 offset="0x2107" name="RB_2D_SRC_INFO" type="a5xx_2d_surf_info"/> - <reg32 offset="0x2108" name="RB_2D_SRC_LO"/> - <reg32 offset="0x2109" name="RB_2D_SRC_HI"/> + <reg64 offset="0x2108" name="RB_2D_SRC" type="address"/> <reg32 offset="0x210a" name="RB_2D_SRC_SIZE"> <bitfield name="PITCH" low="0" high="15" shr="6" type="uint"/> <bitfield name="ARRAY_PITCH" low="16" high="31" shr="6" type="uint"/> </reg32> <reg32 offset="0x2110" name="RB_2D_DST_INFO" type="a5xx_2d_surf_info"/> - <reg32 offset="0x2111" name="RB_2D_DST_LO"/> - <reg32 offset="0x2112" name="RB_2D_DST_HI"/> + <reg64 offset="0x2111" name="RB_2D_DST" type="address"/> <reg32 offset="0x2113" name="RB_2D_DST_SIZE"> <bitfield name="PITCH" low="0" high="15" shr="6" type="uint"/> <bitfield name="ARRAY_PITCH" low="16" high="31" shr="6" type="uint"/> </reg32> - <reg32 offset="0x2140" name="RB_2D_SRC_FLAGS_LO"/> - <reg32 offset="0x2141" name="RB_2D_SRC_FLAGS_HI"/> + <reg64 offset="0x2140" name="RB_2D_SRC_FLAGS" type="address"/> <reg32 offset="0x2142" name="RB_2D_SRC_FLAGS_PITCH" shr="6" type="uint"/> - <reg32 offset="0x2143" name="RB_2D_DST_FLAGS_LO"/> - <reg32 offset="0x2144" name="RB_2D_DST_FLAGS_HI"/> + <reg64 offset="0x2143" name="RB_2D_DST_FLAGS" type="address"/> <reg32 offset="0x2145" name="RB_2D_DST_FLAGS_PITCH" shr="6" type="uint"/> <reg32 offset="0x2180" name="GRAS_2D_BLIT_CNTL"/> <!-- same as 0x2100 --> <!-- looks same as 0x2107: --> diff --git a/drivers/gpu/drm/msm/registers/adreno/a5xx_perfcntrs.json b/drivers/gpu/drm/msm/registers/adreno/a5xx_perfcntrs.json new file mode 100644 index 000000000000..d95503543f94 --- /dev/null +++ b/drivers/gpu/drm/msm/registers/adreno/a5xx_perfcntrs.json @@ -0,0 +1,128 @@ +{ + "chip": "A5XX", + "groups": [ + { + "name": "CP", + "num": 8, + "reserved": [ 0 ], + "select": "CP_PERFCTR_CP_SEL_{}", + "counter_lo": "RBBM_PERFCTR_CP_{}_LO", + "counter_hi": "RBBM_PERFCTR_CP_{}_HI", + "countable_type": "a5xx_cp_perfcounter_select" + }, + { + "name": "CCU", + "num": 4, + "select": "RB_PERFCTR_CCU_SEL_{}", + "counter_lo": "RBBM_PERFCTR_CCU_{}_LO", + "counter_hi": "RBBM_PERFCTR_CCU_{}_HI", + "countable_type": "a5xx_ccu_perfcounter_select" + }, + { + "name": "TSE", + "num": 4, + "select": "GRAS_PERFCTR_TSE_SEL_{}", + "counter_lo": "RBBM_PERFCTR_TSE_{}_LO", + "counter_hi": "RBBM_PERFCTR_TSE_{}_HI", + "countable_type": "a5xx_tse_perfcounter_select" + }, + { + "name": "RAS", + "num": 4, + "select": "GRAS_PERFCTR_RAS_SEL_{}", + "counter_lo": "RBBM_PERFCTR_RAS_{}_LO", + "counter_hi": "RBBM_PERFCTR_RAS_{}_HI", + "countable_type": "a5xx_ras_perfcounter_select" + }, + { + "name": "LRZ", + "num": 4, + "select": "GRAS_PERFCTR_LRZ_SEL_{}", + "counter_lo": "RBBM_PERFCTR_LRZ_{}_LO", + "counter_hi": "RBBM_PERFCTR_LRZ_{}_HI", + "countable_type": "a5xx_lrz_perfcounter_select" + }, + { + "name": "HLSQ", + "num": 8, + "select": "HLSQ_PERFCTR_HLSQ_SEL_{}", + "counter_lo": "RBBM_PERFCTR_HLSQ_{}_LO", + "counter_hi": "RBBM_PERFCTR_HLSQ_{}_HI", + "countable_type": "a5xx_hlsq_perfcounter_select" + }, + { + "name": "PC", + "num": 8, + "select": "PC_PERFCTR_PC_SEL_{}", + "counter_lo": "RBBM_PERFCTR_PC_{}_LO", + "counter_hi": "RBBM_PERFCTR_PC_{}_HI", + "countable_type": "a5xx_pc_perfcounter_select" + }, + { + "name": "RB", + "num": 8, + "select": "RB_PERFCTR_RB_SEL_{}", + "counter_lo": "RBBM_PERFCTR_RB_{}_LO", + "counter_hi": "RBBM_PERFCTR_RB_{}_HI", + "countable_type": "a5xx_rb_perfcounter_select" + }, + { + "name": "RBBM", + "num": 4, + "reserved": [ 0 ], + "select": "RBBM_PERFCTR_RBBM_SEL_{}", + "counter_lo": "RBBM_PERFCTR_RBBM_{}_LO", + "counter_hi": "RBBM_PERFCTR_RBBM_{}_HI", + "countable_type": "a5xx_rbbm_perfcounter_select" + }, + { + "name": "SP", + "num": 12, + "reserved": [ 0 ], + "select": "SP_PERFCTR_SP_SEL_{}", + "counter_lo": "RBBM_PERFCTR_SP_{}_LO", + "counter_hi": "RBBM_PERFCTR_SP_{}_HI", + "countable_type": "a5xx_sp_perfcounter_select" + }, + { + "name": "TP", + "num": 8, + "select": "TPL1_PERFCTR_TP_SEL_{}", + "counter_lo": "RBBM_PERFCTR_TP_{}_LO", + "counter_hi": "RBBM_PERFCTR_TP_{}_HI", + "countable_type": "a5xx_tp_perfcounter_select" + }, + { + "name": "UCHE", + "num": 8, + "select": "UCHE_PERFCTR_UCHE_SEL_{}", + "counter_lo": "RBBM_PERFCTR_UCHE_{}_LO", + "counter_hi": "RBBM_PERFCTR_UCHE_{}_HI", + "countable_type": "a5xx_uche_perfcounter_select" + }, + { + "name": "VFD", + "num": 8, + "select": "VFD_PERFCTR_VFD_SEL_{}", + "counter_lo": "RBBM_PERFCTR_VFD_{}_LO", + "counter_hi": "RBBM_PERFCTR_VFD_{}_HI", + "countable_type": "a5xx_vfd_perfcounter_select" + }, + { + "name": "VPC", + "num": 4, + "select": "VPC_PERFCTR_VPC_SEL_{}", + "counter_lo": "RBBM_PERFCTR_VPC_{}_LO", + "counter_hi": "RBBM_PERFCTR_VPC_{}_HI", + "countable_type": "a5xx_vpc_perfcounter_select" + }, + { + "name": "VSC", + "num": 2, + "select": "VSC_PERFCTR_VSC_SEL_{}", + "counter_lo": "RBBM_PERFCTR_VSC_{}_LO", + "counter_hi": "RBBM_PERFCTR_VSC_{}_HI", + "countable_type": "a5xx_vsc_perfcounter_select" + } + ] +} diff --git a/drivers/gpu/drm/msm/registers/adreno/a6xx.xml b/drivers/gpu/drm/msm/registers/adreno/a6xx.xml index 2309870f5031..3349c01646e1 100644 --- a/drivers/gpu/drm/msm/registers/adreno/a6xx.xml +++ b/drivers/gpu/drm/msm/registers/adreno/a6xx.xml @@ -10,19 +10,24 @@ xsi:schemaLocation="https://gitlab.freedesktop.org/freedreno/ rules-fd.xsd"> <import file="adreno/a8xx_enums.xml"/> <import file="adreno/a6xx_perfcntrs.xml"/> <import file="adreno/a7xx_perfcntrs.xml"/> +<import file="adreno/a8xx_perfcntrs.xml"/> <import file="adreno/a6xx_descriptors.xml"/> <import file="adreno/a8xx_descriptors.xml"/> <!-- Each register that is actually being used by driver should have "usage" defined, currently there are following usages: +- "init" - registers written (in ib1 generally, or ambles) to get the + GPU into a known state. - "cmd" - the register is used outside of renderpass and blits, roughly corresponds to registers used in ib1 for Freedreno -- "rp_blit" - the register is used inside renderpass or blits - (ib2 for Freedreno) +- "draw" - used by 3d draw cmds +- "compute" - used by compute cmds +- "blit" - used by CP_BLIT cmds +- "resolve" - used by CCU_RESOLVE (resolve/unresolve/clear) events -It is expected that register with "cmd" usage may be written into only at -the start of the command buffer (ib1), while "rp_blit" usage indicates that register +It is expected that register with "cmd" and "init" usage may be written into only at +the start of the command buffer (ib1), while other usages indicates that register is either overwritten by renderpass/blit (ib2) or not used if not overwritten by a particular renderpass/blit. --> @@ -1321,7 +1326,7 @@ by a particular renderpass/blit. <reg32 offset="0x0c00" name="VSC_DBG_ECO_CNTL"/> <reg32 offset="0x0df0" name="VSC_KMD_DBG_ECO_CNTL" variants="A8XX-"/> - <reg32 offset="0x0c02" name="VSC_BIN_SIZE" usage="rp_blit" variants="A6XX-A7XX"> + <reg32 offset="0x0c02" name="VSC_BIN_SIZE" usage="cmd" variants="A6XX-A7XX"> <bitfield name="BINW" low="0" high="7" shr="5" type="uint"/> <bitfield name="BINH" low="8" high="16" shr="4" type="uint"/> </reg32> @@ -1331,13 +1336,13 @@ by a particular renderpass/blit. <bitfield name="BINH" low="16" high="26" shr="4" type="uint"/> </bitset> - <reg32 offset="0x0c02" name="VSC_BIN_SIZE" type="a8xx_bin_size" usage="rp_blit" variants="A8XX"/> + <reg32 offset="0x0c02" name="VSC_BIN_SIZE" type="a8xx_bin_size" usage="cmd" variants="A8XX"/> <reg64 offset="0x0c03" name="VSC_SIZE_BASE" type="waddress" usage="cmd"/> - <reg32 offset="0x0c06" name="VSC_EXPANDED_BIN_CNTL" usage="rp_blit"> + <reg32 offset="0x0c06" name="VSC_EXPANDED_BIN_CNTL" usage="cmd"> <bitfield name="NX" low="1" high="10" type="uint"/> <bitfield name="NY" low="11" high="20" type="uint"/> </reg32> - <array offset="0x0c10" name="VSC_PIPE_CONFIG" stride="1" length="32" usage="rp_blit"> + <array offset="0x0c10" name="VSC_PIPE_CONFIG" stride="1" length="32" usage="cmd"> <reg32 offset="0x0" name="REG"> <doc> Configures the mapping between VSC_PIPE buffer and @@ -1370,7 +1375,7 @@ by a particular renderpass/blit. <reg32 offset="0x0c36" name="VSC_PIPE_DATA_DRAW_STRIDE" usage="cmd"/> <reg32 offset="0x0c37" name="VSC_PIPE_DATA_DRAW_LENGTH" usage="cmd"/> - <array offset="0x0c38" name="VSC_CHANNEL_VISIBILITY" stride="1" length="32" usage="rp_blit"> + <array offset="0x0c38" name="VSC_CHANNEL_VISIBILITY" stride="1" length="32" usage="cmd"> <doc> Seems to be a bitmap of which tiles mapped to the VSC pipe contain geometry. @@ -1381,7 +1386,7 @@ by a particular renderpass/blit. <reg32 offset="0x0" name="REG"/> </array> - <array offset="0x0c58" name="VSC_PIPE_DATA_PRIM_SIZE" stride="1" length="32" variants="A6XX" usage="rp_blit"> + <array offset="0x0c58" name="VSC_PIPE_DATA_PRIM_SIZE" stride="1" length="32" variants="A6XX" usage="cmd"> <doc> Has the size of data written to corresponding VSC_PRIM_STRM buffer. @@ -1389,7 +1394,7 @@ by a particular renderpass/blit. <reg32 offset="0x0" name="REG"/> </array> - <array offset="0x0c78" name="VSC_PIPE_DATA_DRAW_SIZE" stride="1" length="32" variants="A6XX" usage="rp_blit"> + <array offset="0x0c78" name="VSC_PIPE_DATA_DRAW_SIZE" stride="1" length="32" variants="A6XX" usage="cmd"> <doc> Has the size of data written to corresponding VSC pipe, ie. same thing that is written out to VSC_SIZE_BASE @@ -1397,7 +1402,7 @@ by a particular renderpass/blit. <reg32 offset="0x0" name="REG"/> </array> - <reg32 offset="0x0d08" name="VSC_UNKNOWN_0D08" variants="A7XX-" usage="rp_blit"/> + <reg32 offset="0x0d08" name="VSC_UNKNOWN_0D08" variants="A7XX-" usage="cmd"/> <reg32 offset="0x0e10" name="UCHE_UNKNOWN_0E10" variants="A7XX" usage="init"/> <reg32 offset="0x0e10" name="UCHE_VARB_IDLE_TIMEOUT" variants="A8XX-"/> @@ -1428,29 +1433,22 @@ by a particular renderpass/blit. <bitfield name="PERSP_DIVISION_DISABLE" pos="9" type="boolean"/> </bitset> - <reg32 offset="0x8000" name="GRAS_CL_CNTL" type="a6xx_gras_cl_cntl" variants="A6XX-A7XX" usage="rp_blit"/> - <reg32 offset="0x8200" name="GRAS_CL_CNTL" type="a6xx_gras_cl_cntl" variants="A8XX-" usage="rp_blit"/> + <reg32 offset="0x8000" name="GRAS_CL_CNTL" type="a6xx_gras_cl_cntl" variants="A6XX-A7XX" usage="draw"/> + <reg32 offset="0x8200" name="GRAS_CL_CNTL" type="a6xx_gras_cl_cntl" variants="A8XX-" usage="draw"/> <bitset name="a6xx_gras_xs_clip_cull_distance" inline="yes"> <bitfield name="CLIP_MASK" low="0" high="7"/> <bitfield name="CULL_MASK" low="8" high="15"/> </bitset> - <reg32 offset="0x8001" name="GRAS_CL_VS_CLIP_CULL_DISTANCE" type="a6xx_gras_xs_clip_cull_distance" usage="rp_blit" variants="A6XX-A7XX" /> - <reg32 offset="0x8002" name="GRAS_CL_DS_CLIP_CULL_DISTANCE" type="a6xx_gras_xs_clip_cull_distance" usage="rp_blit" variants="A6XX-A7XX" /> - <reg32 offset="0x8003" name="GRAS_CL_GS_CLIP_CULL_DISTANCE" type="a6xx_gras_xs_clip_cull_distance" usage="rp_blit" variants="A6XX-A7XX" /> - <reg32 offset="0x8004" name="GRAS_CL_ARRAY_SIZE" low="0" high="10" type="uint" usage="rp_blit" variants="A6XX-A7XX" /> - - <reg32 offset="0x8201" name="GRAS_CL_VS_CLIP_CULL_DISTANCE" type="a6xx_gras_xs_clip_cull_distance" usage="rp_blit" variants="A8XX" /> - <reg32 offset="0x8202" name="GRAS_CL_DS_CLIP_CULL_DISTANCE" type="a6xx_gras_xs_clip_cull_distance" usage="rp_blit" variants="A8XX" /> - <reg32 offset="0x8203" name="GRAS_CL_GS_CLIP_CULL_DISTANCE" type="a6xx_gras_xs_clip_cull_distance" usage="rp_blit" variants="A8XX" /> - <reg32 offset="0x8204" name="GRAS_CL_ARRAY_SIZE" low="0" high="10" type="uint" usage="rp_blit" variants="A8XX" /> - - <reg32 offset="0x8228" name="GRAS_UNKNOWN_8228" variants="A8XX-"/> - <reg32 offset="0x8229" name="GRAS_UNKNOWN_8229" variants="A8XX-"/> - <reg32 offset="0x822a" name="GRAS_UNKNOWN_822A" variants="A8XX-"/> - <reg32 offset="0x822b" name="GRAS_UNKNOWN_822B" variants="A8XX-"/> - <reg32 offset="0x822c" name="GRAS_UNKNOWN_822C" variants="A8XX-"/> - <reg32 offset="0x822d" name="GRAS_UNKNOWN_822D" variants="A8XX-"/> + <reg32 offset="0x8001" name="GRAS_CL_VS_CLIP_CULL_DISTANCE" type="a6xx_gras_xs_clip_cull_distance" usage="draw" variants="A6XX-A7XX" /> + <reg32 offset="0x8002" name="GRAS_CL_DS_CLIP_CULL_DISTANCE" type="a6xx_gras_xs_clip_cull_distance" usage="draw" variants="A6XX-A7XX" /> + <reg32 offset="0x8003" name="GRAS_CL_GS_CLIP_CULL_DISTANCE" type="a6xx_gras_xs_clip_cull_distance" usage="draw" variants="A6XX-A7XX" /> + <reg32 offset="0x8004" name="GRAS_CL_ARRAY_SIZE" low="0" high="10" type="uint" usage="draw" variants="A6XX-A7XX" /> + + <reg32 offset="0x8201" name="GRAS_CL_VS_CLIP_CULL_DISTANCE" type="a6xx_gras_xs_clip_cull_distance" usage="draw" variants="A8XX" /> + <reg32 offset="0x8202" name="GRAS_CL_DS_CLIP_CULL_DISTANCE" type="a6xx_gras_xs_clip_cull_distance" usage="draw" variants="A8XX" /> + <reg32 offset="0x8203" name="GRAS_CL_GS_CLIP_CULL_DISTANCE" type="a6xx_gras_xs_clip_cull_distance" usage="draw" variants="A8XX" /> + <reg32 offset="0x8204" name="GRAS_CL_ARRAY_SIZE" low="0" high="10" type="uint" usage="draw" variants="A8XX" /> <bitset name="a6xx_gras_cl_interp_cntl" inline="yes"> <!-- see also RB_INTERP_CNTL --> @@ -1465,16 +1463,16 @@ by a particular renderpass/blit. <bitfield name="UNK11" pos="11" type="boolean" variants="A7XX-"/> </bitset> - <reg32 offset="0x8005" name="GRAS_CL_INTERP_CNTL" type="a6xx_gras_cl_interp_cntl" variants="A6XX-A7XX" usage="rp_blit"/> - <reg32 offset="0x8080" name="GRAS_CL_INTERP_CNTL" type="a6xx_gras_cl_interp_cntl" variants="A8XX-" usage="rp_blit"/> + <reg32 offset="0x8005" name="GRAS_CL_INTERP_CNTL" type="a6xx_gras_cl_interp_cntl" variants="A6XX-A7XX" usage="draw"/> + <reg32 offset="0x8080" name="GRAS_CL_INTERP_CNTL" type="a6xx_gras_cl_interp_cntl" variants="A8XX-" usage="draw"/> <bitset name="a6xx_gras_cl_guardband_clip_adj" inline="true"> <bitfield name="HORZ" low="0" high="8" type="uint"/> <bitfield name="VERT" low="10" high="18" type="uint"/> </bitset> - <reg32 offset="0x8006" name="GRAS_CL_GUARDBAND_CLIP_ADJ" type="a6xx_gras_cl_guardband_clip_adj" variants="A6XX-A7XX" usage="rp_blit"/> - <reg32 offset="0x8205" name="GRAS_CL_GUARDBAND_CLIP_ADJ" type="a6xx_gras_cl_guardband_clip_adj" variants="A8XX-" usage="rp_blit"/> + <reg32 offset="0x8006" name="GRAS_CL_GUARDBAND_CLIP_ADJ" type="a6xx_gras_cl_guardband_clip_adj" variants="A6XX-A7XX" usage="draw"/> + <reg32 offset="0x8205" name="GRAS_CL_GUARDBAND_CLIP_ADJ" type="a6xx_gras_cl_guardband_clip_adj" variants="A8XX-" usage="draw"/> <!-- the scale/offset is per view, with up to 6 views --> <bitset name="a6xx_gras_bin_foveat" inline="yes"> @@ -1499,33 +1497,48 @@ by a particular renderpass/blit. </bitset> <reg32 offset="0x8008" name="GRAS_BIN_FOVEAT" type="a6xx_gras_bin_foveat" variants="A7XX" usage="cmd"/> - <reg32 offset="0x8206" name="GRAS_BIN_FOVEAT" type="a6xx_gras_bin_foveat" variants="A8XX-" usage="cmd"/> + <reg32 offset="0x8206" name="GRAS_BIN_FOVEAT" type="a6xx_gras_bin_foveat" variants="A8XX-" usage="cmd"> + <bitfield name="FDM_OFFSET_EN" pos="7" type="boolean"/> + </reg32> - <reg32 offset="0x8009" name="GRAS_BIN_FOVEAT_OFFSET_0" variants="A7XX-" usage="cmd"> + <reg32 offset="0x8009" name="GRAS_BIN_FOVEAT_OFFSET_0" variants="A7XX" usage="cmd"> <bitfield name="XOFFSET_0" low="0" high="9" shr="2" type="uint"/> <bitfield name="XOFFSET_1" low="10" high="19" shr="2" type="uint"/> <bitfield name="XOFFSET_2" low="20" high="29" shr="2" type="uint"/> </reg32> - <reg32 offset="0x800a" name="GRAS_BIN_FOVEAT_OFFSET_1" variants="A7XX-" usage="cmd"> + <reg32 offset="0x800a" name="GRAS_BIN_FOVEAT_OFFSET_1" variants="A7XX" usage="cmd"> <bitfield name="XOFFSET_3" low="0" high="9" shr="2" type="uint"/> <bitfield name="XOFFSET_4" low="10" high="19" shr="2" type="uint"/> <bitfield name="XOFFSET_5" low="20" high="29" shr="2" type="uint"/> </reg32> - <reg32 offset="0x800b" name="GRAS_BIN_FOVEAT_OFFSET_2" variants="A7XX-" usage="cmd"> + <reg32 offset="0x800b" name="GRAS_BIN_FOVEAT_OFFSET_2" variants="A7XX" usage="cmd"> <bitfield name="YOFFSET_0" low="0" high="9" shr="2" type="uint"/> <bitfield name="YOFFSET_1" low="10" high="19" shr="2" type="uint"/> <bitfield name="YOFFSET_2" low="20" high="29" shr="2" type="uint"/> </reg32> - <reg32 offset="0x800c" name="GRAS_BIN_FOVEAT_OFFSET_3" variants="A7XX-" usage="cmd"> + <reg32 offset="0x800c" name="GRAS_BIN_FOVEAT_OFFSET_3" variants="A7XX" usage="cmd"> <bitfield name="YOFFSET_3" low="0" high="9" shr="2" type="uint"/> <bitfield name="YOFFSET_4" low="10" high="19" shr="2" type="uint"/> <bitfield name="YOFFSET_5" low="20" high="29" shr="2" type="uint"/> </reg32> + <bitset name="a8xx_bin_foveat_xy" inline="yes"> + <bitfield name="XOFFSET" low="0" high="13" type="uint"/> + <bitfield name="YOFFSET" low="16" high="29" type="uint"/> + </bitset> + + <array offset="0x8220" name="GRAS_BIN_FOVEAT_XY" stride="1" length="6" variants="A8XX-" usage="cmd"> + <reg32 offset="0" name="OFFSET" type="a8xx_bin_foveat_xy"/> + </array> + + <array offset="0x8228" name="GRAS_BIN_FOVEAT_XY_FDM" stride="1" length="6" variants="A8XX-" usage="cmd"> + <reg32 offset="0" name="OFFSET" type="a8xx_bin_foveat_xy"/> + </array> + <!-- <reg32 offset="0x80f0" name="GRAS_UNKNOWN_80F0" type="a6xx_reg_xy"/> --> <!-- 0x8006-0x800f invalid --> - <array offset="0x8010" name="GRAS_CL_VIEWPORT" stride="6" length="16" variants="A6XX-A7XX" usage="rp_blit"> + <array offset="0x8010" name="GRAS_CL_VIEWPORT" stride="6" length="16" variants="A6XX-A7XX" usage="draw"> <reg32 offset="0" name="XOFFSET" type="float"/> <reg32 offset="1" name="XSCALE" type="float"/> <reg32 offset="2" name="YOFFSET" type="float"/> @@ -1534,7 +1547,7 @@ by a particular renderpass/blit. <reg32 offset="5" name="ZSCALE" type="float"/> </array> - <array offset="0x82d0" name="GRAS_CL_VIEWPORT" stride="6" length="16" variants="A8XX-" usage="rp_blit"> + <array offset="0x82d0" name="GRAS_CL_VIEWPORT" stride="6" length="16" variants="A8XX-" usage="draw"> <reg32 offset="0" name="XOFFSET" type="float"/> <reg32 offset="1" name="XSCALE" type="float"/> <reg32 offset="2" name="YOFFSET" type="float"/> @@ -1543,16 +1556,16 @@ by a particular renderpass/blit. <reg32 offset="5" name="ZSCALE" type="float"/> </array> - <array offset="0x8070" name="GRAS_CL_VIEWPORT_ZCLAMP" stride="2" length="16" variants="A6XX-A7XX" usage="rp_blit"> + <array offset="0x8070" name="GRAS_CL_VIEWPORT_ZCLAMP" stride="2" length="16" variants="A6XX-A7XX" usage="draw"> <reg32 offset="0" name="MIN" type="float"/> <reg32 offset="1" name="MAX" type="float"/> </array> - <array offset="0x80c0" name="GRAS_CL_VIEWPORT_ZCLAMP" stride="2" length="16" variants="A8XX-" usage="rp_blit"> + <array offset="0x80c0" name="GRAS_CL_VIEWPORT_ZCLAMP" stride="2" length="16" variants="A8XX-" usage="draw"> <reg32 offset="0" name="MIN" type="float"/> <reg32 offset="1" name="MAX" type="float"/> </array> - <bitset name="a6xx_gras_su_cntl" varset="chip"> + <bitset name="a6xx_gras_su_cntl" inline="yes"> <bitfield name="CULL_FRONT" pos="0" type="boolean"/> <bitfield name="CULL_BACK" pos="1" type="boolean"/> <bitfield name="FRONT_CW" pos="2" type="boolean"/> @@ -1572,17 +1585,18 @@ by a particular renderpass/blit. TODO: what about gen2 (a640)? --> <bitfield name="MULTIVIEW_ENABLE" pos="17" type="boolean"/> - <bitfield name="RENDERTARGETINDEXINCR" pos="18" type="boolean" variants="A6XX-A7XX"/> - <bitfield name="VIEWPORTINDEXINCR" pos="19" type="boolean" variants="A6XX-A7XX"/> - <bitfield name="UNK20" low="20" high="22" variants="A6XX-A7XX"/> </bitset> - <reg32 offset="0x8090" name="GRAS_SU_CNTL" type="a6xx_gras_su_cntl" variants="A6XX-A7XX" usage="rp_blit"/> - <reg32 offset="0x8209" name="GRAS_SU_CNTL" type="a6xx_gras_su_cntl" variants="A8XX-" usage="rp_blit"/> - - <!-- Fields moved from GRAS_SU_CNTL on earlier gens: --> - <reg32 offset="0x820c" name="GRAS_SU_STEREO_CNTL" variants="A8XX-" usage="rp_blit"> + <reg32 offset="0x8090" name="GRAS_SU_CNTL" type="a6xx_gras_su_cntl" variants="A6XX-A7XX" usage="draw"> <bitfield name="RENDERTARGETINDEXINCR" pos="18" type="boolean"/> <bitfield name="VIEWPORTINDEXINCR" pos="19" type="boolean"/> + <bitfield name="UNK20" low="20" high="22"/> + </reg32> + <reg32 offset="0x8209" name="GRAS_SU_CNTL" type="a6xx_gras_su_cntl" variants="A8XX-" usage="draw"/> + + <!-- Fields moved from GRAS_SU_CNTL on earlier gens: --> + <reg32 offset="0x820c" name="GRAS_SU_STEREO_CNTL" variants="A8XX-" usage="draw"> + <bitfield name="RENDERTARGETINDEXINCR" pos="0" type="boolean"/> + <bitfield name="VIEWPORTINDEXINCR" pos="1" type="boolean"/> </reg32> <bitset name="a6xx_gras_su_point_minmax" inline="yes"> @@ -1590,49 +1604,49 @@ by a particular renderpass/blit. <bitfield name="MAX" low="16" high="31" type="ufixed" radix="4"/> </bitset> - <reg32 offset="0x8091" name="GRAS_SU_POINT_MINMAX" type="a6xx_gras_su_point_minmax" variants="A6XX-A7XX" usage="rp_blit"/> - <reg32 offset="0x820a" name="GRAS_SU_POINT_MINMAX" type="a6xx_gras_su_point_minmax" variants="A8XX-" usage="rp_blit"/> + <reg32 offset="0x8091" name="GRAS_SU_POINT_MINMAX" type="a6xx_gras_su_point_minmax" variants="A6XX-A7XX" usage="draw"/> + <reg32 offset="0x820a" name="GRAS_SU_POINT_MINMAX" type="a6xx_gras_su_point_minmax" variants="A8XX-" usage="draw"/> - <reg32 offset="0x8092" name="GRAS_SU_POINT_SIZE" low="0" high="15" type="fixed" radix="4" variants="A6XX-A7XX" usage="rp_blit"/> - <reg32 offset="0x820b" name="GRAS_SU_POINT_SIZE" low="0" high="15" type="fixed" radix="4" variants="A8XX-" usage="rp_blit"/> + <reg32 offset="0x8092" name="GRAS_SU_POINT_SIZE" low="0" high="15" type="fixed" radix="4" variants="A6XX-A7XX" usage="draw"/> + <reg32 offset="0x820b" name="GRAS_SU_POINT_SIZE" low="0" high="15" type="fixed" radix="4" variants="A8XX-" usage="draw"/> <bitset name="a6xx_gras_su_depth_cntl" inline="yes"> <bitfield name="Z_TEST_ENABLE" pos="0" type="boolean"/> </bitset> - <reg32 offset="0x8114" name="GRAS_SU_DEPTH_CNTL" variants="A6XX-A7XX" type="a6xx_gras_su_depth_cntl" usage="rp_blit"/> - <reg32 offset="0x8086" name="GRAS_SU_DEPTH_CNTL" variants="A8XX-" type="a6xx_gras_su_depth_cntl" usage="rp_blit"/> + <reg32 offset="0x8114" name="GRAS_SU_DEPTH_CNTL" variants="A6XX-A7XX" type="a6xx_gras_su_depth_cntl" usage="draw"/> + <reg32 offset="0x8086" name="GRAS_SU_DEPTH_CNTL" variants="A8XX-" type="a6xx_gras_su_depth_cntl" usage="draw"/> <bitset name="a6xx_gras_su_stencil_cntl" inline="yes"> <bitfield name="STENCIL_ENABLE" pos="0" type="boolean"/> </bitset> - <reg32 offset="0x8115" name="GRAS_SU_STENCIL_CNTL" type="a6xx_gras_su_stencil_cntl" variants="A6XX-A7XX" usage="rp_blit"/> - <reg32 offset="0x8087" name="GRAS_SU_STENCIL_CNTL" type="a6xx_gras_su_stencil_cntl" variants="A8XX-" usage="rp_blit"/> + <reg32 offset="0x8115" name="GRAS_SU_STENCIL_CNTL" type="a6xx_gras_su_stencil_cntl" variants="A6XX-A7XX" usage="draw"/> + <reg32 offset="0x8087" name="GRAS_SU_STENCIL_CNTL" type="a6xx_gras_su_stencil_cntl" variants="A8XX-" usage="draw"/> <bitset name="a6xx_gras_su_render_cntl" inline="yes"> <bitfield name="FS_DISABLE" pos="7" type="boolean"/> </bitset> - <reg32 offset="0x8116" name="GRAS_SU_RENDER_CNTL" type="a6xx_gras_su_render_cntl" variants="A7XX" usage="rp_blit"/> - <reg32 offset="0x8088" name="GRAS_SU_RENDER_CNTL" type="a6xx_gras_su_render_cntl" variants="A8XX-" usage="rp_blit"/> + <reg32 offset="0x8116" name="GRAS_SU_RENDER_CNTL" type="a6xx_gras_su_render_cntl" variants="A7XX" usage="draw"/> + <reg32 offset="0x8088" name="GRAS_SU_RENDER_CNTL" type="a6xx_gras_su_render_cntl" variants="A8XX-" usage="draw"/> <!-- 0x8093 invalid --> <bitset name="a6xx_depth_plane_cntl" inline="yes"> <bitfield name="Z_MODE" low="0" high="1" type="a6xx_ztest_mode"/> </bitset> - <reg32 offset="0x8094" name="GRAS_SU_DEPTH_PLANE_CNTL" type="a6xx_depth_plane_cntl" variants="A6XX-A7XX" usage="rp_blit"/> - <reg32 offset="0x8089" name="GRAS_SU_DEPTH_PLANE_CNTL" type="a6xx_depth_plane_cntl" variants="A8XX-" usage="rp_blit"/> + <reg32 offset="0x8094" name="GRAS_SU_DEPTH_PLANE_CNTL" type="a6xx_depth_plane_cntl" variants="A6XX-A7XX" usage="draw"/> + <reg32 offset="0x8089" name="GRAS_SU_DEPTH_PLANE_CNTL" type="a6xx_depth_plane_cntl" variants="A8XX-" usage="draw"/> - <reg32 offset="0x8095" name="GRAS_SU_POLY_OFFSET_SCALE" type="float" variants="A6XX-A7XX" usage="rp_blit"/> - <reg32 offset="0x808a" name="GRAS_SU_POLY_OFFSET_SCALE" type="float" variants="A8XX-" usage="rp_blit"/> + <reg32 offset="0x8095" name="GRAS_SU_POLY_OFFSET_SCALE" type="float" variants="A6XX-A7XX" usage="draw"/> + <reg32 offset="0x808a" name="GRAS_SU_POLY_OFFSET_SCALE" type="float" variants="A8XX-" usage="draw"/> - <reg32 offset="0x8096" name="GRAS_SU_POLY_OFFSET_OFFSET" type="float" variants="A6XX-A7XX" usage="rp_blit"/> - <reg32 offset="0x808b" name="GRAS_SU_POLY_OFFSET_OFFSET" type="float" variants="A8XX-" usage="rp_blit"/> + <reg32 offset="0x8096" name="GRAS_SU_POLY_OFFSET_OFFSET" type="float" variants="A6XX-A7XX" usage="draw"/> + <reg32 offset="0x808b" name="GRAS_SU_POLY_OFFSET_OFFSET" type="float" variants="A8XX-" usage="draw"/> - <reg32 offset="0x8097" name="GRAS_SU_POLY_OFFSET_OFFSET_CLAMP" type="float" variants="A6XX-A7XX" usage="rp_blit"/> - <reg32 offset="0x808c" name="GRAS_SU_POLY_OFFSET_OFFSET_CLAMP" type="float" variants="A8XX-" usage="rp_blit"/> + <reg32 offset="0x8097" name="GRAS_SU_POLY_OFFSET_OFFSET_CLAMP" type="float" variants="A6XX-A7XX" usage="draw"/> + <reg32 offset="0x808c" name="GRAS_SU_POLY_OFFSET_OFFSET_CLAMP" type="float" variants="A8XX-" usage="draw"/> <bitset name="a6xx_depth_buffer_info" inline="yes"> <bitfield name="DEPTH_FORMAT" low="0" high="2" type="a6xx_depth_format"/> @@ -1640,8 +1654,8 @@ by a particular renderpass/blit. </bitset> <!-- duplicates RB_DEPTH_BUFFER_INFO: --> - <reg32 offset="0x8098" name="GRAS_SU_DEPTH_BUFFER_INFO" type="a6xx_depth_buffer_info" variants="A6XX-A7XX" usage="rp_blit"/> - <reg32 offset="0x808d" name="GRAS_SU_DEPTH_BUFFER_INFO" type="a6xx_depth_buffer_info" variants="A8XX-" usage="rp_blit"/> + <reg32 offset="0x8098" name="GRAS_SU_DEPTH_BUFFER_INFO" type="a6xx_depth_buffer_info" variants="A6XX-A7XX" usage="draw"/> + <reg32 offset="0x808d" name="GRAS_SU_DEPTH_BUFFER_INFO" type="a6xx_depth_buffer_info" variants="A8XX-" usage="draw"/> <bitset name="a6xx_gras_su_conservative_ras_cntl" inline="yes"> <bitfield name="CONSERVATIVERASEN" pos="0" type="boolean"/> @@ -1667,19 +1681,19 @@ by a particular renderpass/blit. <bitfield name="WRITES_LAYER" pos="0" type="boolean"/> <bitfield name="WRITES_VIEW" pos="1" type="boolean"/> </bitset> - <reg32 offset="0x809b" name="GRAS_SU_VS_SIV_CNTL" type="a6xx_gras_us_xs_siv_cntl" variants="A6XX-A7XX" usage="rp_blit"/> - <reg32 offset="0x809c" name="GRAS_SU_GS_SIV_CNTL" type="a6xx_gras_us_xs_siv_cntl" variants="A6XX-A7XX" usage="rp_blit"/> - <reg32 offset="0x809d" name="GRAS_SU_DS_SIV_CNTL" type="a6xx_gras_us_xs_siv_cntl" variants="A6XX-A7XX" usage="rp_blit"/> + <reg32 offset="0x809b" name="GRAS_SU_VS_SIV_CNTL" type="a6xx_gras_us_xs_siv_cntl" variants="A6XX-A7XX" usage="draw"/> + <reg32 offset="0x809c" name="GRAS_SU_GS_SIV_CNTL" type="a6xx_gras_us_xs_siv_cntl" variants="A6XX-A7XX" usage="draw"/> + <reg32 offset="0x809d" name="GRAS_SU_DS_SIV_CNTL" type="a6xx_gras_us_xs_siv_cntl" variants="A6XX-A7XX" usage="draw"/> - <reg32 offset="0x820e" name="GRAS_SU_VS_SIV_CNTL" type="a6xx_gras_us_xs_siv_cntl" variants="A8XX" usage="rp_blit"/> - <reg32 offset="0x820f" name="GRAS_SU_GS_SIV_CNTL" type="a6xx_gras_us_xs_siv_cntl" variants="A8XX" usage="rp_blit"/> - <reg32 offset="0x8210" name="GRAS_SU_DS_SIV_CNTL" type="a6xx_gras_us_xs_siv_cntl" variants="A8XX" usage="rp_blit"/> + <reg32 offset="0x820e" name="GRAS_SU_VS_SIV_CNTL" type="a6xx_gras_us_xs_siv_cntl" variants="A8XX" usage="draw"/> + <reg32 offset="0x820f" name="GRAS_SU_GS_SIV_CNTL" type="a6xx_gras_us_xs_siv_cntl" variants="A8XX" usage="draw"/> + <reg32 offset="0x8210" name="GRAS_SU_DS_SIV_CNTL" type="a6xx_gras_us_xs_siv_cntl" variants="A8XX" usage="draw"/> <bitset name="a6xx_rast_cntl" inline="yes"> <bitfield name="MODE" low="0" high="1" type="a6xx_polygon_mode"/> </bitset> - <reg32 offset="0x8211" name="GRAS_RAST_CNTL" type="a6xx_rast_cntl" variants="A8XX-" usage="rp_blit"/> + <reg32 offset="0x8211" name="GRAS_RAST_CNTL" type="a6xx_rast_cntl" variants="A8XX-" usage="draw"/> <enum name="a6xx_sequenced_thread_dist"> <value value="0x0" name="DIST_SCREEN_COORD"/> @@ -1738,10 +1752,10 @@ by a particular renderpass/blit. <bitfield name="EARLYVIZOUTEN" pos="12" type="boolean"/> </bitset> - <reg32 offset="0x80a0" name="GRAS_SC_CNTL" type="a6xx_gras_sc_cntl" variants="A6XX-A7XX" usage="rp_blit"> + <reg32 offset="0x80a0" name="GRAS_SC_CNTL" type="a6xx_gras_sc_cntl" variants="A6XX-A7XX" usage="draw"> <bitfield name="CCUSINGLECACHELINESIZE" low="0" high="2" variants="A6XX-A7XX"/> </reg32> - <reg32 offset="0x8230" name="GRAS_SC_CNTL" type="a6xx_gras_sc_cntl" variants="A8XX-" usage="rp_blit"/> + <reg32 offset="0x8230" name="GRAS_SC_CNTL" type="a6xx_gras_sc_cntl" variants="A8XX-" usage="draw"/> <enum name="a6xx_render_mode"> <value value="0x0" name="RENDERING_PASS"/> @@ -1778,7 +1792,7 @@ by a particular renderpass/blit. <bitfield name="FORCE_LRZ_DIS" pos="27" type="boolean"/> </bitset> - <reg32 offset="0x80a1" name="GRAS_SC_BIN_CNTL" type="a6xx_bin_cntl" variants="A6XX-A7XX" usage="rp_blit"/> + <reg32 offset="0x80a1" name="GRAS_SC_BIN_CNTL" type="a6xx_bin_cntl" variants="A6XX-A7XX" usage="cmd"/> <!-- Common fields for RB_CNTL and GRAS_SC_BIN_CNTL --> <bitset name="a8xx_bin_cntl" inline="yes"> @@ -1795,7 +1809,7 @@ by a particular renderpass/blit. <bitfield name="FORCE_LRZ_WRITE_DIS" pos="31" type="boolean"/> </bitset> - <reg32 offset="0x8231" name="GRAS_SC_BIN_CNTL" type="a8xx_bin_cntl" variants="A8XX-" usage="rp_blit"> + <reg32 offset="0x8231" name="GRAS_SC_BIN_CNTL" type="a8xx_bin_cntl" variants="A8XX-" usage="cmd"> <bitfield name="CONS_VIS_IN_BINNING" pos="10" type="boolean"/> <bitfield name="FORCE_BI_DIR_LRZ_DISABLE" pos="14" type="boolean"/> <bitfield name="FORCE_LRZ_DIS" pos="15" type="boolean"/> @@ -1808,16 +1822,16 @@ by a particular renderpass/blit. <bitfield name="UNK3" pos="3"/> </bitset> - <reg32 offset="0x80a2" name="GRAS_SC_RAS_MSAA_CNTL" type="a6xx_gras_sc_ras_msaa_cntl" variants="A6XX-A7XX" usage="rp_blit"/> - <reg32 offset="0x8232" name="GRAS_SC_RAS_MSAA_CNTL" type="a6xx_gras_sc_ras_msaa_cntl" variants="A8XX-" usage="rp_blit"/> + <reg32 offset="0x80a2" name="GRAS_SC_RAS_MSAA_CNTL" type="a6xx_gras_sc_ras_msaa_cntl" variants="A6XX-A7XX" usage="draw"/> + <reg32 offset="0x8232" name="GRAS_SC_RAS_MSAA_CNTL" type="a6xx_gras_sc_ras_msaa_cntl" variants="A8XX-" usage="draw"/> <bitset name="a6xx_gras_sc_dest_msaa_cntl" inline="yes"> <bitfield name="SAMPLES" low="0" high="1" type="a3xx_msaa_samples"/> <bitfield name="MSAA_DISABLE" pos="2" type="boolean"/> </bitset> - <reg32 offset="0x80a3" name="GRAS_SC_DEST_MSAA_CNTL" type="a6xx_gras_sc_dest_msaa_cntl" variants="A6XX-A7XX" usage="rp_blit"/> - <reg32 offset="0x8233" name="GRAS_SC_DEST_MSAA_CNTL" type="a6xx_gras_sc_dest_msaa_cntl" variants="A8XX-" usage="rp_blit"/> + <reg32 offset="0x80a3" name="GRAS_SC_DEST_MSAA_CNTL" type="a6xx_gras_sc_dest_msaa_cntl" variants="A6XX-A7XX" usage="draw"/> + <reg32 offset="0x8233" name="GRAS_SC_DEST_MSAA_CNTL" type="a6xx_gras_sc_dest_msaa_cntl" variants="A8XX-" usage="draw"/> <bitset name="a6xx_msaa_sample_pos_cntl" inline="yes"> <bitfield name="UNK0" pos="0"/> @@ -1835,15 +1849,15 @@ by a particular renderpass/blit. <bitfield name="SAMPLE_3_Y" low="28" high="31" radix="4" type="fixed"/> </bitset> - <reg32 offset="0x80a4" name="GRAS_SC_MSAA_SAMPLE_POS_CNTL" type="a6xx_msaa_sample_pos_cntl" variants="A6XX-A7XX" usage="rp_blit"/> - <reg32 offset="0x80a5" name="GRAS_SC_PROGRAMMABLE_MSAA_POS_0" type="a6xx_programmable_msaa_pos" variants="A6XX-A7XX" usage="rp_blit"/> - <reg32 offset="0x80a6" name="GRAS_SC_PROGRAMMABLE_MSAA_POS_1" type="a6xx_programmable_msaa_pos" variants="A6XX-A7XX" usage="rp_blit"/> + <reg32 offset="0x80a4" name="GRAS_SC_MSAA_SAMPLE_POS_CNTL" type="a6xx_msaa_sample_pos_cntl" variants="A6XX-A7XX" usage="draw"/> + <reg32 offset="0x80a5" name="GRAS_SC_PROGRAMMABLE_MSAA_POS_0" type="a6xx_programmable_msaa_pos" variants="A6XX-A7XX" usage="draw"/> + <reg32 offset="0x80a6" name="GRAS_SC_PROGRAMMABLE_MSAA_POS_1" type="a6xx_programmable_msaa_pos" variants="A6XX-A7XX" usage="draw"/> - <reg32 offset="0x8237" name="GRAS_SC_MSAA_SAMPLE_POS_CNTL" type="a6xx_msaa_sample_pos_cntl" variants="A8XX-" usage="rp_blit"/> - <reg32 offset="0x8238" name="GRAS_SC_PROGRAMMABLE_MSAA_POS_0" type="a6xx_programmable_msaa_pos" variants="A8XX-" usage="rp_blit"/> - <reg32 offset="0x8239" name="GRAS_SC_PROGRAMMABLE_MSAA_POS_1" type="a6xx_programmable_msaa_pos" variants="A8XX-" usage="rp_blit"/> - <reg32 offset="0x823a" name="GRAS_SC_PROGRAMMABLE_MSAA_POS_2" type="a6xx_programmable_msaa_pos" variants="A8XX-" usage="rp_blit"/> - <reg32 offset="0x823b" name="GRAS_SC_PROGRAMMABLE_MSAA_POS_3" type="a6xx_programmable_msaa_pos" variants="A8XX-" usage="rp_blit"/> + <reg32 offset="0x8237" name="GRAS_SC_MSAA_SAMPLE_POS_CNTL" type="a6xx_msaa_sample_pos_cntl" variants="A8XX-" usage="draw"/> + <reg32 offset="0x8238" name="GRAS_SC_PROGRAMMABLE_MSAA_POS_0" type="a6xx_programmable_msaa_pos" variants="A8XX-" usage="draw"/> + <reg32 offset="0x8239" name="GRAS_SC_PROGRAMMABLE_MSAA_POS_1" type="a6xx_programmable_msaa_pos" variants="A8XX-" usage="draw"/> + <reg32 offset="0x823a" name="GRAS_SC_PROGRAMMABLE_MSAA_POS_2" type="a6xx_programmable_msaa_pos" variants="A8XX-" usage="draw"/> + <reg32 offset="0x823b" name="GRAS_SC_PROGRAMMABLE_MSAA_POS_3" type="a6xx_programmable_msaa_pos" variants="A8XX-" usage="draw"/> <reg32 offset="0x80a7" name="GRAS_ROTATION_CNTL" variants="A7XX" usage="cmd"/> <reg32 offset="0x8207" name="GRAS_ROTATION_CNTL" variants="A8XX-" usage="cmd"/> @@ -1860,30 +1874,53 @@ by a particular renderpass/blit. <bitfield name="Y" low="16" high="31" type="uint"/> </bitset> - <array offset="0x80b0" name="GRAS_SC_SCREEN_SCISSOR" stride="2" length="16" variants="A6XX-A7XX" usage="rp_blit"> + <array offset="0x80b0" name="GRAS_SC_SCREEN_SCISSOR" stride="2" length="16" variants="A6XX-A7XX" usage="draw"> <reg32 offset="0" name="TL" type="a6xx_scissor_xy"/> <reg32 offset="1" name="BR" type="a6xx_scissor_xy"/> </array> - <array offset="0x8240" name="GRAS_SC_SCREEN_SCISSOR" stride="2" length="16" variants="A8XX-" usage="rp_blit"> + <array offset="0x8240" name="GRAS_SC_SCREEN_SCISSOR" stride="2" length="16" variants="A8XX-" usage="draw"> <reg32 offset="0" name="TL" type="a6xx_scissor_xy"/> <reg32 offset="1" name="BR" type="a6xx_scissor_xy"/> </array> - <array offset="0x80d0" name="GRAS_SC_VIEWPORT_SCISSOR" stride="2" length="16" variants="A6XX-A7XX" usage="rp_blit"> + <array offset="0x80d0" name="GRAS_SC_VIEWPORT_SCISSOR" stride="2" length="16" variants="A6XX-A7XX" usage="draw"> <reg32 offset="0" name="TL" type="a6xx_scissor_xy"/> <reg32 offset="1" name="BR" type="a6xx_scissor_xy"/> </array> - <array offset="0x8270" name="GRAS_SC_VIEWPORT_SCISSOR" stride="2" length="16" variants="A8XX-" usage="rp_blit"> + <array offset="0x8270" name="GRAS_SC_VIEWPORT_SCISSOR" stride="2" length="16" variants="A8XX-" usage="draw"> <reg32 offset="0" name="TL" type="a6xx_scissor_xy"/> <reg32 offset="1" name="BR" type="a6xx_scissor_xy"/> </array> - <reg32 offset="0x80f0" name="GRAS_SC_WINDOW_SCISSOR_TL" type="a6xx_reg_xy" variants="A6XX-A7XX" usage="rp_blit"/> - <reg32 offset="0x80f1" name="GRAS_SC_WINDOW_SCISSOR_BR" type="a6xx_reg_xy" variants="A6XX-A7XX" usage="rp_blit"/> + <reg32 offset="0x829f" name="GRAS_SC_WINDOW_INV_SCISSOR_CNTL" variants="A8XX-"> + <bitfield name="INCLUSION" pos="0" type="boolean"/> <!-- if not set, EXCLUSION --> + <bitfield name="VP0_INV_SCISSOR_0_EN" pos="4" type="boolean"/> + <bitfield name="VP0_INV_SCISSOR_1_EN" pos="5" type="boolean"/> + <bitfield name="VP1_INV_SCISSOR_0_EN" pos="6" type="boolean"/> + <bitfield name="VP1_INV_SCISSOR_1_EN" pos="7" type="boolean"/> + <bitfield name="VP2_INV_SCISSOR_0_EN" pos="8" type="boolean"/> + <bitfield name="VP2_INV_SCISSOR_1_EN" pos="9" type="boolean"/> + <bitfield name="VP3_INV_SCISSOR_0_EN" pos="10" type="boolean"/> + <bitfield name="VP3_INV_SCISSOR_1_EN" pos="11" type="boolean"/> + <bitfield name="VP4_INV_SCISSOR_0_EN" pos="12" type="boolean"/> + <bitfield name="VP4_INV_SCISSOR_1_EN" pos="13" type="boolean"/> + <bitfield name="VP5_INV_SCISSOR_0_EN" pos="14" type="boolean"/> + <bitfield name="VP5_INV_SCISSOR_1_EN" pos="15" type="boolean"/> + </reg32> + + <array offset="0x82a0" name="GRAS_SC_WINDOW_VP" stride="4" length="6" variants="A8XX-"> + <reg32 offset="0" name="INV_SCISSOR_0_TL" type="a6xx_scissor_xy"/> + <reg32 offset="1" name="INV_SCISSOR_0_BR" type="a6xx_scissor_xy"/> + <reg32 offset="2" name="INV_SCISSOR_1_TL" type="a6xx_scissor_xy"/> + <reg32 offset="3" name="INV_SCISSOR_1_BR" type="a6xx_scissor_xy"/> + </array> - <reg32 offset="0x8235" name="GRAS_SC_WINDOW_SCISSOR_TL" type="a6xx_reg_xy" variants="A8XX-" usage="rp_blit"/> - <reg32 offset="0x8236" name="GRAS_SC_WINDOW_SCISSOR_BR" type="a6xx_reg_xy" variants="A8XX-" usage="rp_blit"/> + <reg32 offset="0x80f0" name="GRAS_SC_WINDOW_SCISSOR_TL" type="a6xx_reg_xy" variants="A6XX-A7XX" usage="draw"/> + <reg32 offset="0x80f1" name="GRAS_SC_WINDOW_SCISSOR_BR" type="a6xx_reg_xy" variants="A6XX-A7XX" usage="draw"/> + + <reg32 offset="0x8235" name="GRAS_SC_WINDOW_SCISSOR_TL" type="a6xx_reg_xy" variants="A8XX-" usage="draw"/> + <reg32 offset="0x8236" name="GRAS_SC_WINDOW_SCISSOR_BR" type="a6xx_reg_xy" variants="A8XX-" usage="draw"/> <enum name="a6xx_fsr_combiner"> <value value="0" name="FSR_COMBINER_OP_KEEP"/> @@ -1893,45 +1930,59 @@ by a particular renderpass/blit. <value value="4" name="FSR_COMBINER_OP_MUL"/> </enum> - <bitset name="a6xx_gras_vrs_config"> + <enum name="a6xx_fsr_combiner_clamp_mode"> + <value value="0" name="FSR_COMBINER_CLAMP_4x4"/> + <value value="1" name="FSR_COMBINER_CLAMP_2x2"/> + <!-- gen8 and later, reserved on earlier gens: --> + <value value="2" name="FSR_COMBINER_CLAMP_16_SAMP"/> + </enum> + + <bitset name="a6xx_gras_vrs_config" inline="yes"> <bitfield name="PIPELINE_FSR_ENABLE" pos="0" type="boolean"/> <bitfield name="FRAG_SIZE_X" low="1" high="2" type="uint"/> <bitfield name="FRAG_SIZE_Y" low="3" high="4" type="uint"/> <bitfield name="COMBINER_OP_1" low="5" high="7" type="a6xx_fsr_combiner"/> <bitfield name="COMBINER_OP_2" low="8" high="10" type="a6xx_fsr_combiner"/> + <bitfield name="COMBINER_CLAMP_MODE" low="11" high="12" type="a6xx_fsr_combiner_clamp_mode"/> <bitfield name="ATTACHMENT_FSR_ENABLE" pos="13" type="boolean"/> <bitfield name="PRIMITIVE_FSR_ENABLE" pos="20" type="boolean"/> </bitset> - <reg32 offset="0x80f4" name="GRAS_VRS_CONFIG" type="a6xx_gras_vrs_config" variants="A7XX" usage="rp_blit"/> - <reg32 offset="0x8208" name="GRAS_VRS_CONFIG" type="a6xx_gras_vrs_config" variants="A8XX-" usage="rp_blit"/> + <reg32 offset="0x80f4" name="GRAS_VRS_CONFIG" type="a6xx_gras_vrs_config" variants="A7XX" usage="draw"> + <bitfield name="QUALITY_BUFFER_SHIFT_X" low="14" high="15" type="uint"/> + <bitfield name="QUALITY_BUFFER_SHIFT_Y" low="16" high="17" type="uint"/> + </reg32> + <reg32 offset="0x8208" name="GRAS_VRS_CONFIG" type="a6xx_gras_vrs_config" variants="A8XX-" usage="draw"/> <bitset name="a6xx_gras_quality_buffer_info" inline="yes"> <bitfield name="LAYERED" pos="0" type="boolean"/> <bitfield name="TILE_MODE" low="1" high="2" type="a6xx_tile_mode"/> </bitset> - <reg32 offset="0x80f5" name="GRAS_QUALITY_BUFFER_INFO" type="a6xx_gras_quality_buffer_info" variants="A7XX" usage="rp_blit"/> - <reg32 offset="0x808e" name="GRAS_QUALITY_BUFFER_INFO" type="a6xx_gras_quality_buffer_info" variants="A8XX-" usage="rp_blit"/> + <reg32 offset="0x80f5" name="GRAS_QUALITY_BUFFER_INFO" type="a6xx_gras_quality_buffer_info" variants="A7XX" usage="draw"/> + <reg32 offset="0x808e" name="GRAS_QUALITY_BUFFER_INFO" type="a6xx_gras_quality_buffer_info" variants="A8XX-" usage="draw"> + <bitfield name="QUALITY_BUFFER_SHIFT_X" low="4" high="5" type="uint"/> + <bitfield name="QUALITY_BUFFER_SHIFT_Y" low="6" high="7" type="uint"/> + </reg32> <bitset name="a6xx_gras_quality_buffer_dimension" inline="yes"> <bitfield name="WIDTH" low="0" high="15" type="uint"/> <bitfield name="HEIGHT" low="16" high="31" type="uint"/> </bitset> - <reg32 offset="0x80f6" name="GRAS_QUALITY_BUFFER_DIMENSION" type="a6xx_gras_quality_buffer_dimension" variants="A7XX" usage="rp_blit"/> - <reg32 offset="0x808f" name="GRAS_QUALITY_BUFFER_DIMENSION" type="a6xx_gras_quality_buffer_dimension" variants="A8XX-" usage="rp_blit"/> + <reg32 offset="0x80f6" name="GRAS_QUALITY_BUFFER_DIMENSION" type="a6xx_gras_quality_buffer_dimension" variants="A7XX" usage="draw"/> + <reg32 offset="0x808f" name="GRAS_QUALITY_BUFFER_DIMENSION" type="a6xx_gras_quality_buffer_dimension" variants="A8XX-" usage="draw"/> - <reg64 offset="0x80f8" name="GRAS_QUALITY_BUFFER_BASE" variants="A7XX" type="waddress" usage="rp_blit"/> - <reg64 offset="0x8090" name="GRAS_QUALITY_BUFFER_BASE" variants="A8XX-" type="waddress" usage="rp_blit"/> + <reg64 offset="0x80f8" name="GRAS_QUALITY_BUFFER_BASE" variants="A7XX" type="waddress" usage="draw"/> + <reg64 offset="0x8090" name="GRAS_QUALITY_BUFFER_BASE" variants="A8XX-" type="waddress" usage="draw"/> <bitset name="a6xx_gras_quality_buffer_pitch" inline="yes"> <bitfield name="PITCH" shr="6" low="0" high="7" type="uint"/> <bitfield name="ARRAY_PITCH" shr="6" low="10" high="28" type="uint"/> </bitset> - <reg32 offset="0x80fa" name="GRAS_QUALITY_BUFFER_PITCH" type="a6xx_gras_quality_buffer_pitch" variants="A7XX" usage="rp_blit"/> - <reg32 offset="0x8092" name="GRAS_QUALITY_BUFFER_PITCH" type="a6xx_gras_quality_buffer_pitch" variants="A8XX-" usage="rp_blit"/> + <reg32 offset="0x80fa" name="GRAS_QUALITY_BUFFER_PITCH" type="a6xx_gras_quality_buffer_pitch" variants="A7XX" usage="draw"/> + <reg32 offset="0x8092" name="GRAS_QUALITY_BUFFER_PITCH" type="a6xx_gras_quality_buffer_pitch" variants="A8XX-" usage="draw"/> <enum name="a6xx_lrz_dir_status"> <value value="0x1" name="LRZ_DIR_LE"/> @@ -1966,24 +2017,23 @@ by a particular renderpass/blit. <bitfield name="Z_FUNC" low="11" high="13" type="adreno_compare_func" variants="A7XX-"/> </bitset> - <reg32 offset="0x8100" name="GRAS_LRZ_CNTL" type="a6xx_gras_lrz_cntl" usage="rp_blit" variants="A6XX"> + <reg32 offset="0x8100" name="GRAS_LRZ_CNTL" type="a6xx_gras_lrz_cntl" usage="draw" variants="A6XX"> <bitfield name="FC_ENABLE" pos="3" type="boolean" variants="A6XX"/> <bitfield name="DISABLE_ON_WRONG_DIR" pos="9" type="boolean" variants="A6XX"/> </reg32> - <reg32 offset="0x8100" name="GRAS_LRZ_CNTL" type="a6xx_gras_lrz_cntl" usage="rp_blit" variants="A7XX"/> - <reg32 offset="0x8212" name="GRAS_LRZ_CNTL" type="a6xx_gras_lrz_cntl" usage="rp_blit" variants="A8XX-"/> + <reg32 offset="0x8100" name="GRAS_LRZ_CNTL" type="a6xx_gras_lrz_cntl" usage="draw" variants="A7XX"/> + <reg32 offset="0x8212" name="GRAS_LRZ_CNTL" type="a6xx_gras_lrz_cntl" usage="draw" variants="A8XX-"/> - <reg32 offset="0x8007" name="GRAS_LRZ_CB_CNTL" variants="A7XX" usage="rp_blit"> + <bitset name="a7xx_gras_lrz_cb_cntl" inline="yes"> <doc> The total size of the LRZ image array (not including fast clear buffer), used as a stride for double buffering used with concurrent binning. </doc> - <bitfield name="DOUBLE_BUFFER_STRIDE" low="8" high="31" shr="8"/> - </reg32> - <reg32 offset="0x8101" name="GRAS_LRZ_CB_CNTL" usage="rp_blit" variants="A8XX-"> <bitfield name="DOUBLE_BUFFER_PITCH" low="8" high="31" shr="8"/> - </reg32> + </bitset> + <reg32 offset="0x8007" name="GRAS_LRZ_CB_CNTL" type="a7xx_gras_lrz_cb_cntl" variants="A7XX" usage="draw"/> + <reg32 offset="0x8101" name="GRAS_LRZ_CB_CNTL" type="a7xx_gras_lrz_cb_cntl" variants="A8XX-" usage="draw"/> <enum name="a6xx_fragcoord_sample_mode"> <value value="0" name="FRAGCOORD_CENTER"/> @@ -1995,28 +2045,28 @@ by a particular renderpass/blit. <bitfield name="FRAGCOORDSAMPLEMODE" low="1" high="2" type="a6xx_fragcoord_sample_mode"/> </bitset> - <reg32 offset="0x8101" name="GRAS_LRZ_PS_INPUT_CNTL" type="a6xx_gras_lrz_ps_input_cntl" usage="rp_blit" variants="A6XX-A7XX"/> - <reg32 offset="0x8102" name="GRAS_LRZ_PS_INPUT_CNTL" type="a6xx_gras_lrz_ps_input_cntl" usage="rp_blit" variants="A8XX-"/> + <reg32 offset="0x8101" name="GRAS_LRZ_PS_INPUT_CNTL" type="a6xx_gras_lrz_ps_input_cntl" usage="draw" variants="A6XX-A7XX"/> + <reg32 offset="0x8102" name="GRAS_LRZ_PS_INPUT_CNTL" type="a6xx_gras_lrz_ps_input_cntl" usage="draw" variants="A8XX-"/> <bitset name="a6xx_gras_lrz_mrt_buffer_info_0" inline="yes"> <bitfield name="COLOR_FORMAT" low="0" high="7" type="a6xx_format"/> </bitset> - <reg32 offset="0x8102" name="GRAS_LRZ_MRT_BUFFER_INFO_0" type="a6xx_gras_lrz_mrt_buffer_info_0" usage="rp_blit" variants="A6XX-A7XX"/> - <reg32 offset="0x8103" name="GRAS_LRZ_MRT_BUFFER_INFO_0" type="a6xx_gras_lrz_mrt_buffer_info_0" usage="rp_blit" variants="A8XX-"/> + <reg32 offset="0x8102" name="GRAS_LRZ_MRT_BUFFER_INFO_0" type="a6xx_gras_lrz_mrt_buffer_info_0" usage="draw" variants="A6XX-A7XX"/> + <reg32 offset="0x8103" name="GRAS_LRZ_MRT_BUFFER_INFO_0" type="a6xx_gras_lrz_mrt_buffer_info_0" usage="draw" variants="A8XX-"/> - <reg64 offset="0x8103" name="GRAS_LRZ_BUFFER_BASE" align="256" type="waddress" usage="rp_blit" variants="A6XX-A7XX"/> - <reg64 offset="0x8104" name="GRAS_LRZ_BUFFER_BASE" align="256" type="waddress" usage="rp_blit" variants="A8XX-"/> + <reg64 offset="0x8103" name="GRAS_LRZ_BUFFER_BASE" align="256" type="waddress" usage="draw" variants="A6XX-A7XX"/> + <reg64 offset="0x8104" name="GRAS_LRZ_BUFFER_BASE" align="256" type="waddress" usage="draw" variants="A8XX-"/> <bitset name="a6xx_gras_lrz_buffer_pitch" inline="yes"> - <bitfield name="PITCH" low="0" high="7" shr="5" type="uint"/> + <bitfield name="PITCH" low="0" high="7" shr="6" type="uint"/> <bitfield name="ARRAY_PITCH" low="10" high="28" shr="8" type="uint"/> </bitset> - <reg32 offset="0x8105" name="GRAS_LRZ_BUFFER_PITCH" type="a6xx_gras_lrz_buffer_pitch" usage="rp_blit" variants="A6XX-A7XX"/> - <reg32 offset="0x8108" name="GRAS_LRZ_BUFFER_PITCH" type="a6xx_gras_lrz_buffer_pitch" usage="rp_blit" variants="A8XX-"/> + <reg32 offset="0x8105" name="GRAS_LRZ_BUFFER_PITCH" type="a6xx_gras_lrz_buffer_pitch" usage="draw" variants="A6XX-A7XX"/> + <reg32 offset="0x8108" name="GRAS_LRZ_BUFFER_PITCH" type="a6xx_gras_lrz_buffer_pitch" usage="draw" variants="A8XX-"/> - <reg32 offset="0x810e" name="GRAS_LRZ_BUFFER_STRIDE" usage="rp_blit" low="0" high="16" shr="12" variants="A8XX-"/> + <reg32 offset="0x810e" name="GRAS_LRZ_BUFFER_STRIDE" usage="draw" low="0" high="16" shr="12" variants="A8XX-"/> <!-- The LRZ "fast clear" buffer is initialized to zero's by blob, and @@ -2048,8 +2098,8 @@ by a particular renderpass/blit. increases beyond 1 page. Not sure if that is an actual limit or not. --> - <reg64 offset="0x8106" name="GRAS_LRZ_FAST_CLEAR_BUFFER_BASE" align="64" type="waddress" usage="rp_blit"/> - <reg32 offset="0x8109" name="GRAS_LRZ_PS_SAMPLEFREQ_CNTL" usage="rp_blit"> + <reg64 offset="0x8106" name="GRAS_LRZ_FAST_CLEAR_BUFFER_BASE" align="64" type="waddress" usage="resolve"/> + <reg32 offset="0x8109" name="GRAS_LRZ_PS_SAMPLEFREQ_CNTL" usage="draw"> <bitfield name="PER_SAMP_MODE" pos="0" type="boolean"/> </reg32> <!-- @@ -2066,14 +2116,14 @@ by a particular renderpass/blit. <bitfield name="BASE_MIP_LEVEL" low="28" high="31" type="uint"/> </reg32> - <reg32 offset="0x810b" name="GRAS_LRZ_CNTL2" variants="A7XX-" usage="rp_blit"> + <reg32 offset="0x810b" name="GRAS_LRZ_CNTL2" variants="A7XX-" usage="draw"> <bitfield name="DISABLE_ON_WRONG_DIR" pos="0" type="boolean"/> <bitfield name="FC_ENABLE" pos="1" type="boolean"/> </reg32> <!-- 0x810c-0x810f invalid --> - <reg32 offset="0x8110" name="GRAS_LRZ_BUFFER_SLICE_PITCH" low="0" high="31" shr="8" type="uint" variants="A8XX-"/> + <reg32 offset="0x8110" name="GRAS_LRZ_BUFFER_SLICE_PITCH" low="8" high="31" shr="8" type="uint" variants="A8XX-"/> <reg32 offset="0x8110" name="GRAS_MODE_CNTL" low="0" high="1" variants="A6XX-A7XX" usage="cmd"/> <reg32 offset="0x8213" name="GRAS_MODE_CNTL" low="0" high="1" variants="A8XX-" usage="cmd"/> @@ -2082,8 +2132,8 @@ by a particular renderpass/blit. <reg32 offset="0x8111" name="GRAS_LRZ_DEPTH_CLEAR" type="float" variants="A7XX"/> <reg32 offset="0x810d" name="GRAS_LRZ_DEPTH_CLEAR" type="float" variants="A8XX-"/> - <reg32 offset="0x8113" name="GRAS_LRZ_DEPTH_BUFFER_INFO" type="a6xx_depth_buffer_info" variants="A7XX" usage="rp_blit"/> - <reg32 offset="0x810f" name="GRAS_LRZ_DEPTH_BUFFER_INFO" type="a6xx_depth_buffer_info" variants="A8XX" usage="rp_blit"/> + <reg32 offset="0x8113" name="GRAS_LRZ_DEPTH_BUFFER_INFO" type="a6xx_depth_buffer_info" variants="A7XX" usage="draw"/> + <reg32 offset="0x810f" name="GRAS_LRZ_DEPTH_BUFFER_INFO" type="a6xx_depth_buffer_info" variants="A8XX" usage="draw"/> <doc>LUT used to convert quality buffer values to HW shading rate values. An array of 4-bit values.</doc> <array offset="0x8120" name="GRAS_LRZ_QUALITY_LOOKUP_TABLE" variants="A7XX" stride="1" length="2"/> @@ -2128,32 +2178,32 @@ by a particular renderpass/blit. <bitfield name="COPY" pos="30" type="boolean" variants="A7XX-"/> </bitset> - <reg32 offset="0x8400" name="GRAS_A2D_BLT_CNTL" type="a6xx_a2d_blt_cntl" variants="A6XX-A7XX" usage="rp_blit"/> + <reg32 offset="0x8400" name="GRAS_A2D_BLT_CNTL" type="a6xx_a2d_blt_cntl" variants="A6XX-A7XX" usage="blit"/> <!-- note: the low 8 bits for src coords are valid, probably fixed point it would be a bit weird though, since we subtract 1 from BR coords apparently signed, gallium driver uses negative coords and it works? --> - <reg32 offset="0x8401" name="GRAS_A2D_SRC_XMIN" low="8" high="24" type="int" variants="A6XX-A7XX" usage="rp_blit"/> - <reg32 offset="0x8402" name="GRAS_A2D_SRC_XMAX" low="8" high="24" type="int" variants="A6XX-A7XX" usage="rp_blit"/> - <reg32 offset="0x8403" name="GRAS_A2D_SRC_YMIN" low="8" high="24" type="int" variants="A6XX-A7XX" usage="rp_blit"/> - <reg32 offset="0x8404" name="GRAS_A2D_SRC_YMAX" low="8" high="24" type="int" variants="A6XX-A7XX" usage="rp_blit"/> - <reg32 offset="0x8405" name="GRAS_A2D_DEST_TL" type="a6xx_reg_xy" variants="A6XX-A7XX" usage="rp_blit"/> - <reg32 offset="0x8406" name="GRAS_A2D_DEST_BR" type="a6xx_reg_xy" variants="A6XX-A7XX" usage="rp_blit"/> - <reg32 offset="0x8407" name="GRAS_2D_UNKNOWN_8407" low="0" high="31"/> - <reg32 offset="0x8408" name="GRAS_2D_UNKNOWN_8408" low="0" high="31"/> - <reg32 offset="0x8409" name="GRAS_2D_UNKNOWN_8409" low="0" high="31"/> - <reg32 offset="0x840a" name="GRAS_A2D_SCISSOR_TL" type="a6xx_reg_xy" variants="A6XX-A7XX" usage="rp_blit"/> - <reg32 offset="0x840b" name="GRAS_A2D_SCISSOR_BR" type="a6xx_reg_xy" variants="A6XX-A7XX" usage="rp_blit"/> - - <reg32 offset="0x8500" name="GRAS_A2D_BLT_CNTL" type="a6xx_a2d_blt_cntl" variants="A8XX-" usage="rp_blit"/> - <reg32 offset="0x8501" name="GRAS_A2D_SRC_XMIN" low="8" high="24" type="int" variants="A8XX-" usage="rp_blit"/> - <reg32 offset="0x8502" name="GRAS_A2D_SRC_XMAX" low="8" high="24" type="int" variants="A8XX-" usage="rp_blit"/> - <reg32 offset="0x8503" name="GRAS_A2D_SRC_YMIN" low="8" high="24" type="int" variants="A8XX-" usage="rp_blit"/> - <reg32 offset="0x8504" name="GRAS_A2D_SRC_YMAX" low="8" high="24" type="int" variants="A8XX-" usage="rp_blit"/> - <reg32 offset="0x8505" name="GRAS_A2D_DEST_TL" type="a6xx_reg_xy" variants="A8XX-" usage="rp_blit"/> - <reg32 offset="0x8506" name="GRAS_A2D_DEST_BR" type="a6xx_reg_xy" variants="A8XX-" usage="rp_blit"/> - <reg32 offset="0x8507" name="GRAS_A2D_SCISSOR_TL" type="a6xx_reg_xy" variants="A8XX-" usage="rp_blit"/> - <reg32 offset="0x8508" name="GRAS_A2D_SCISSOR_BR" type="a6xx_reg_xy" variants="A8XX-" usage="rp_blit"/> + <reg32 offset="0x8401" name="GRAS_A2D_SRC_XMIN" low="8" high="24" type="int" variants="A6XX-A7XX" usage="blit"/> + <reg32 offset="0x8402" name="GRAS_A2D_SRC_XMAX" low="8" high="24" type="int" variants="A6XX-A7XX" usage="blit"/> + <reg32 offset="0x8403" name="GRAS_A2D_SRC_YMIN" low="8" high="24" type="int" variants="A6XX-A7XX" usage="blit"/> + <reg32 offset="0x8404" name="GRAS_A2D_SRC_YMAX" low="8" high="24" type="int" variants="A6XX-A7XX" usage="blit"/> + <reg32 offset="0x8405" name="GRAS_A2D_DEST_TL" type="a6xx_reg_xy" variants="A6XX-A7XX" usage="blit"/> + <reg32 offset="0x8406" name="GRAS_A2D_DEST_BR" type="a6xx_reg_xy" variants="A6XX-A7XX" usage="blit"/> + <reg32 offset="0x8407" name="GRAS_A2D_UNKNOWN_8407" low="0" high="31" variants="A6XX" usage="blit"/> + <reg32 offset="0x8408" name="GRAS_A2D_UNKNOWN_8408" low="0" high="31" variants="A6XX" usage="blit"/> + <reg32 offset="0x8409" name="GRAS_A2D_UNKNOWN_8409" low="0" high="31" variants="A6XX" usage="blit"/> + <reg32 offset="0x840a" name="GRAS_A2D_SCISSOR_TL" type="a6xx_reg_xy" variants="A6XX-A7XX" usage="blit"/> + <reg32 offset="0x840b" name="GRAS_A2D_SCISSOR_BR" type="a6xx_reg_xy" variants="A6XX-A7XX" usage="blit"/> + + <reg32 offset="0x8500" name="GRAS_A2D_BLT_CNTL" type="a6xx_a2d_blt_cntl" variants="A8XX-" usage="blit"/> + <reg32 offset="0x8501" name="GRAS_A2D_SRC_XMIN" low="8" high="24" type="int" variants="A8XX-" usage="blit"/> + <reg32 offset="0x8502" name="GRAS_A2D_SRC_XMAX" low="8" high="24" type="int" variants="A8XX-" usage="blit"/> + <reg32 offset="0x8503" name="GRAS_A2D_SRC_YMIN" low="8" high="24" type="int" variants="A8XX-" usage="blit"/> + <reg32 offset="0x8504" name="GRAS_A2D_SRC_YMAX" low="8" high="24" type="int" variants="A8XX-" usage="blit"/> + <reg32 offset="0x8505" name="GRAS_A2D_DEST_TL" type="a6xx_reg_xy" variants="A8XX-" usage="blit"/> + <reg32 offset="0x8506" name="GRAS_A2D_DEST_BR" type="a6xx_reg_xy" variants="A8XX-" usage="blit"/> + <reg32 offset="0x8507" name="GRAS_A2D_SCISSOR_TL" type="a6xx_reg_xy" variants="A8XX-" usage="blit"/> + <reg32 offset="0x8508" name="GRAS_A2D_SCISSOR_BR" type="a6xx_reg_xy" variants="A8XX-" usage="blit"/> <!-- always 0x880 ? (and 0 in a640/a650 traces?) --> <reg32 offset="0x8600" name="GRAS_DBG_ECO_CNTL" usage="init" variants="A6XX-A7XX"> @@ -2180,10 +2230,10 @@ by a particular renderpass/blit. --> <!-- same as GRAS_BIN_CONTROL, but without bit 27: --> - <reg32 offset="0x8800" name="RB_CNTL" variants="A6XX-A7XX" type="a6xx_bin_cntl" usage="rp_blit"/> - <reg32 offset="0x8800" name="RB_CNTL" variants="A8XX-" type="a8xx_bin_cntl" usage="rp_blit"/> + <reg32 offset="0x8800" name="RB_CNTL" variants="A6XX-A7XX" type="a6xx_bin_cntl" usage="cmd"/> + <reg32 offset="0x8800" name="RB_CNTL" variants="A8XX-" type="a8xx_bin_cntl" usage="cmd"/> - <reg32 offset="0x8801" name="RB_RENDER_CNTL" variants="A6XX" usage="rp_blit"> + <reg32 offset="0x8801" name="RB_RENDER_CNTL" variants="A6XX" usage="draw"> <bitfield name="CCUSINGLECACHELINESIZE" low="3" high="5"/> <bitfield name="EARLYVIZOUTEN" pos="6" type="boolean"/> <bitfield name="FS_DISABLE" pos="7" type="boolean"/> @@ -2197,7 +2247,7 @@ by a particular renderpass/blit. <!-- bitmask of MRTs using UBWC flag buffer: --> <bitfield name="FLAG_MRTS" low="16" high="23"/> </reg32> - <reg32 offset="0x8801" name="RB_RENDER_CNTL" variants="A7XX-" usage="rp_blit"> + <reg32 offset="0x8801" name="RB_RENDER_CNTL" variants="A7XX-" usage="draw"> <bitfield name="EARLYVIZOUTEN" pos="6" type="boolean"/> <bitfield name="FS_DISABLE" pos="7" type="boolean"/> <bitfield name="RASTER_MODE" pos="8" type="a6xx_raster_mode"/> @@ -2206,26 +2256,26 @@ by a particular renderpass/blit. <bitfield name="INNERCONSERVATIVERASEN" pos="12" type="boolean"/> </reg32> - <reg32 offset="0x8802" name="RB_RAS_MSAA_CNTL" usage="rp_blit"> + <reg32 offset="0x8802" name="RB_RAS_MSAA_CNTL" usage="draw"> <bitfield name="SAMPLES" low="0" high="1" type="a3xx_msaa_samples"/> <bitfield name="UNK2" pos="2"/> <bitfield name="UNK3" pos="3"/> </reg32> - <reg32 offset="0x8803" name="RB_DEST_MSAA_CNTL" usage="rp_blit"> + <reg32 offset="0x8803" name="RB_DEST_MSAA_CNTL" usage="draw"> <bitfield name="SAMPLES" low="0" high="1" type="a3xx_msaa_samples"/> <bitfield name="MSAA_DISABLE" pos="2" type="boolean"/> </reg32> - <reg32 offset="0x8804" name="RB_MSAA_SAMPLE_POS_CNTL" type="a6xx_msaa_sample_pos_cntl" usage="rp_blit"/> - <reg32 offset="0x8805" name="RB_PROGRAMMABLE_MSAA_POS_0" type="a6xx_programmable_msaa_pos" usage="rp_blit"/> - <reg32 offset="0x8806" name="RB_PROGRAMMABLE_MSAA_POS_1" type="a6xx_programmable_msaa_pos" usage="rp_blit"/> - <reg32 offset="0x8807" name="RB_PROGRAMMABLE_MSAA_POS_2" type="a6xx_programmable_msaa_pos" usage="rp_blit" variants="A8XX-"/> - <reg32 offset="0x8808" name="RB_PROGRAMMABLE_MSAA_POS_3" type="a6xx_programmable_msaa_pos" usage="rp_blit" variants="A8XX-"/> + <reg32 offset="0x8804" name="RB_MSAA_SAMPLE_POS_CNTL" type="a6xx_msaa_sample_pos_cntl" usage="draw"/> + <reg32 offset="0x8805" name="RB_PROGRAMMABLE_MSAA_POS_0" type="a6xx_programmable_msaa_pos" usage="draw"/> + <reg32 offset="0x8806" name="RB_PROGRAMMABLE_MSAA_POS_1" type="a6xx_programmable_msaa_pos" usage="draw"/> + <reg32 offset="0x8807" name="RB_PROGRAMMABLE_MSAA_POS_2" type="a6xx_programmable_msaa_pos" usage="draw" variants="A8XX-"/> + <reg32 offset="0x8808" name="RB_PROGRAMMABLE_MSAA_POS_3" type="a6xx_programmable_msaa_pos" usage="draw" variants="A8XX-"/> <!-- note: maybe not actually called RB_RENDER_CONTROLn (since RB_RENDER_CNTL name comes from kernel and is probably right) --> - <reg32 offset="0x8809" name="RB_INTERP_CNTL" usage="rp_blit"> + <reg32 offset="0x8809" name="RB_INTERP_CNTL" usage="draw"> <!-- see also GRAS_CL_INTERP_CNTL --> <bitfield name="IJ_PERSP_PIXEL" pos="0" type="boolean"/> <bitfield name="IJ_PERSP_CENTROID" pos="1" type="boolean"/> @@ -2236,7 +2286,7 @@ by a particular renderpass/blit. <bitfield name="COORD_MASK" low="6" high="9" type="hex"/> <bitfield name="INTERP_EN" pos="10" type="boolean"/> </reg32> - <reg32 offset="0x880a" name="RB_PS_INPUT_CNTL" usage="rp_blit"> + <reg32 offset="0x880a" name="RB_PS_INPUT_CNTL" usage="draw"> <!-- enable bits for various FS sysvalue regs: --> <bitfield name="SAMPLEMASK" pos="0" type="boolean"/> <bitfield name="POSTDEPTHCOVERAGE" pos="1" type="boolean"/> @@ -2248,16 +2298,16 @@ by a particular renderpass/blit. <bitfield name="FOVEATION" pos="8" type="boolean"/> </reg32> - <reg32 offset="0x880b" name="RB_PS_OUTPUT_CNTL" usage="rp_blit"> + <reg32 offset="0x880b" name="RB_PS_OUTPUT_CNTL" usage="draw"> <bitfield name="DUAL_COLOR_IN_ENABLE" pos="0" type="boolean"/> <bitfield name="FRAG_WRITES_Z" pos="1" type="boolean"/> <bitfield name="FRAG_WRITES_SAMPMASK" pos="2" type="boolean"/> <bitfield name="FRAG_WRITES_STENCILREF" pos="3" type="boolean"/> </reg32> - <reg32 offset="0x880c" name="RB_PS_MRT_CNTL" usage="rp_blit"> + <reg32 offset="0x880c" name="RB_PS_MRT_CNTL" usage="draw"> <bitfield name="MRT" low="0" high="3" type="uint"/> </reg32> - <reg32 offset="0x880d" name="RB_PS_OUTPUT_MASK" usage="rp_blit"> + <reg32 offset="0x880d" name="RB_PS_OUTPUT_MASK" usage="draw"> <bitfield name="RT0" low="0" high="3"/> <bitfield name="RT1" low="4" high="7"/> <bitfield name="RT2" low="8" high="11"/> @@ -2277,7 +2327,7 @@ by a particular renderpass/blit. <bitfield name="DITHER_MODE_MRT6" low="12" high="13" type="adreno_rb_dither_mode"/> <bitfield name="DITHER_MODE_MRT7" low="14" high="15" type="adreno_rb_dither_mode"/> </reg32> - <reg32 offset="0x880f" name="RB_SRGB_CNTL" usage="rp_blit"> + <reg32 offset="0x880f" name="RB_SRGB_CNTL" usage="draw"> <!-- Same as SP_SRGB_CNTL --> <bitfield name="SRGB_MRT0" pos="0" type="boolean"/> <bitfield name="SRGB_MRT1" pos="1" type="boolean"/> @@ -2289,11 +2339,11 @@ by a particular renderpass/blit. <bitfield name="SRGB_MRT7" pos="7" type="boolean"/> </reg32> - <reg32 offset="0x8810" name="RB_PS_SAMPLEFREQ_CNTL" usage="rp_blit"> + <reg32 offset="0x8810" name="RB_PS_SAMPLEFREQ_CNTL" usage="draw"> <bitfield name="PER_SAMP_MODE" pos="0" type="boolean"/> </reg32> <reg32 offset="0x8811" name="RB_MODE_CNTL" low="4" high="6" usage="cmd"/> - <reg32 offset="0x8812" name="RB_BUFFER_CNTL" variants="A7XX-" usage="rp_blit"> + <reg32 offset="0x8812" name="RB_BUFFER_CNTL" variants="A7XX-" usage="draw"> <bitfield name="Z_SYSMEM" pos="0" type="boolean"/> <bitfield name="S_SYSMEM" pos="1" type="boolean"/> <bitfield name="RT0_SYSMEM" pos="2" type="boolean"/> @@ -2316,7 +2366,7 @@ by a particular renderpass/blit. <bitfield name="RT7_FULL_IN_GMEM" pos="19" type="boolean" variants="A8XX-"/> </reg32> - <reg32 offset="0x8816" name="RB_RESOLVE_CR_CNTL" variants="A8XX-" usage="rp_blit"/> + <reg32 offset="0x8816" name="RB_RESOLVE_CR_CNTL" variants="A8XX-" usage="resolve"/> <!-- 0x8813-0x8817 invalid --> <!-- always 0x0 ? --> @@ -2330,12 +2380,12 @@ by a particular renderpass/blit. <reg32 offset="0x881e" name="RB_UNKNOWN_881E" usage="cmd"/> <!-- Duplicates fields from SP_PS_CNTL_0 --> - <reg32 offset="0x881f" name="RB_PS_CNTL" variants="A8XX-" usage="rp_blit"> + <reg32 offset="0x881f" name="RB_PS_CNTL" variants="A8XX-" usage="draw"> <bitfield name="PIXLODENABLE" pos="0" type="boolean"/> <bitfield name="LODPIXMASK" pos="1" type="boolean"/> </reg32> - <array offset="0x8820" name="RB_MRT" stride="8" length="8" usage="rp_blit"> + <array offset="0x8820" name="RB_MRT" stride="8" length="8" usage="draw"> <reg32 offset="0x0" name="CONTROL"> <bitfield name="COLOR_BLEND_EN" pos="0" type="boolean"/> <bitfield name="ALPHA_BLEND_EN" pos="1" type="boolean"/> @@ -2383,16 +2433,16 @@ by a particular renderpass/blit. <reg32 offset="0x7" name="BASE_GMEM" low="12" high="31" shr="12"/> </array> - <reg32 offset="0x8860" name="RB_BLEND_CONSTANT_RED_FP32" type="float" usage="rp_blit"/> - <reg32 offset="0x8861" name="RB_BLEND_CONSTANT_GREEN_FP32" type="float" usage="rp_blit"/> - <reg32 offset="0x8862" name="RB_BLEND_CONSTANT_BLUE_FP32" type="float" usage="rp_blit"/> - <reg32 offset="0x8863" name="RB_BLEND_CONSTANT_ALPHA_FP32" type="float" usage="rp_blit"/> + <reg32 offset="0x8860" name="RB_BLEND_CONSTANT_RED_FP32" type="float" usage="draw"/> + <reg32 offset="0x8861" name="RB_BLEND_CONSTANT_GREEN_FP32" type="float" usage="draw"/> + <reg32 offset="0x8862" name="RB_BLEND_CONSTANT_BLUE_FP32" type="float" usage="draw"/> + <reg32 offset="0x8863" name="RB_BLEND_CONSTANT_ALPHA_FP32" type="float" usage="draw"/> <reg32 offset="0x8864" name="RB_ALPHA_TEST_CNTL" usage="cmd"> <bitfield name="ALPHA_REF" low="0" high="7" type="hex"/> <bitfield name="ALPHA_TEST" pos="8" type="boolean"/> <bitfield name="ALPHA_TEST_FUNC" low="9" high="11" type="adreno_compare_func"/> </reg32> - <reg32 offset="0x8865" name="RB_BLEND_CNTL" usage="rp_blit"> + <reg32 offset="0x8865" name="RB_BLEND_CNTL" usage="draw"> <!-- per-mrt enable bit --> <bitfield name="BLEND_READS_DEST" low="0" high="7"/> <bitfield name="INDEPENDENT_BLEND" pos="8" type="boolean"/> @@ -2401,12 +2451,12 @@ by a particular renderpass/blit. <bitfield name="ALPHA_TO_ONE" pos="11" type="boolean"/> <bitfield name="SAMPLE_MASK" low="16" high="31"/> </reg32> - <reg32 offset="0x8866" name="RB_LB_PARAM_LIMIT" variants="A8XX-" usage="rp_blit"> + <reg32 offset="0x8866" name="RB_LB_PARAM_LIMIT" variants="A8XX-" usage="draw"> <bitfield name="PRIMALLOCTHRESHOLD" low="0" high="2" type="uint"/> </reg32> - <reg32 offset="0x8870" name="RB_DEPTH_PLANE_CNTL" type="a6xx_depth_plane_cntl" usage="rp_blit"/> + <reg32 offset="0x8870" name="RB_DEPTH_PLANE_CNTL" type="a6xx_depth_plane_cntl" usage="draw"/> - <reg32 offset="0x8871" name="RB_DEPTH_CNTL" usage="rp_blit"> + <reg32 offset="0x8871" name="RB_DEPTH_CNTL" usage="draw"> <bitfield name="Z_TEST_ENABLE" pos="0" type="boolean"/> <bitfield name="Z_WRITE_ENABLE" pos="1" type="boolean"/> <bitfield name="ZFUNC" low="2" high="4" type="adreno_compare_func"/> @@ -2422,23 +2472,23 @@ by a particular renderpass/blit. </reg32> <!-- duplicates GRAS_SU_DEPTH_BUFFER_INFO: --> - <reg32 offset="0x8872" name="RB_DEPTH_BUFFER_INFO" variants="A6XX" type="a6xx_depth_buffer_info" usage="rp_blit"/> + <reg32 offset="0x8872" name="RB_DEPTH_BUFFER_INFO" variants="A6XX" type="a6xx_depth_buffer_info" usage="draw"/> <!-- first 4 bits duplicates GRAS_SU_DEPTH_BUFFER_INFO --> - <reg32 offset="0x8872" name="RB_DEPTH_BUFFER_INFO" type="a6xx_depth_buffer_info" variants="A7XX-" usage="rp_blit"> + <reg32 offset="0x8872" name="RB_DEPTH_BUFFER_INFO" type="a6xx_depth_buffer_info" variants="A7XX-" usage="draw"> <bitfield name="PRT" low="3" high="4"/> <bitfield name="TILEMODE" low="5" high="6" type="a6xx_tile_mode"/> <bitfield name="LOSSLESSCOMPEN" pos="7" type="boolean"/> </reg32> - <reg32 offset="0x8873" name="RB_DEPTH_BUFFER_PITCH" low="0" high="13" shr="6" type="uint" usage="rp_blit"/> - <reg32 offset="0x8874" name="RB_DEPTH_BUFFER_ARRAY_PITCH" low="0" high="27" shr="6" type="uint" usage="rp_blit"/> - <reg64 offset="0x8875" name="RB_DEPTH_BUFFER_BASE" type="waddress" align="64" usage="rp_blit"/> - <reg32 offset="0x8877" name="RB_DEPTH_GMEM_BASE" low="12" high="31" shr="12" usage="rp_blit"/> + <reg32 offset="0x8873" name="RB_DEPTH_BUFFER_PITCH" low="0" high="13" shr="6" type="uint" usage="draw"/> + <reg32 offset="0x8874" name="RB_DEPTH_BUFFER_ARRAY_PITCH" low="0" high="27" shr="6" type="uint" usage="draw"/> + <reg64 offset="0x8875" name="RB_DEPTH_BUFFER_BASE" type="waddress" align="64" usage="draw"/> + <reg32 offset="0x8877" name="RB_DEPTH_GMEM_BASE" low="12" high="31" shr="12" usage="draw"/> - <reg32 offset="0x8878" name="RB_DEPTH_BOUND_MIN" type="float" usage="rp_blit"/> - <reg32 offset="0x8879" name="RB_DEPTH_BOUND_MAX" type="float" usage="rp_blit"/> + <reg32 offset="0x8878" name="RB_DEPTH_BOUND_MIN" type="float" usage="draw"/> + <reg32 offset="0x8879" name="RB_DEPTH_BOUND_MAX" type="float" usage="draw"/> <!-- 0x887a-0x887f invalid --> - <reg32 offset="0x8880" name="RB_STENCIL_CNTL" usage="rp_blit"> + <reg32 offset="0x8880" name="RB_STENCIL_CNTL" usage="draw"> <bitfield name="STENCIL_ENABLE" pos="0" type="boolean"/> <bitfield name="STENCIL_ENABLE_BF" pos="1" type="boolean"/> <!-- @@ -2458,39 +2508,39 @@ by a particular renderpass/blit. <bitfield name="ZFAIL_BF" low="29" high="31" type="adreno_stencil_op"/> </reg32> - <reg32 offset="0x8881" name="RB_STENCIL_BUFFER_INFO" variants="A6XX" usage="rp_blit"> + <reg32 offset="0x8881" name="RB_STENCIL_BUFFER_INFO" variants="A6XX" usage="draw"> <bitfield name="SEPARATE_STENCIL" pos="0" type="boolean"/> <bitfield name="UNK1" pos="1" type="boolean"/> </reg32> - <reg32 offset="0x8881" name="RB_STENCIL_BUFFER_INFO" variants="A7XX-" usage="rp_blit"> + <reg32 offset="0x8881" name="RB_STENCIL_BUFFER_INFO" variants="A7XX-" usage="draw"> <bitfield name="SEPARATE_STENCIL" pos="0" type="boolean"/> <bitfield name="UNK1" pos="1" type="boolean"/> <bitfield name="TILEMODE" low="2" high="3" type="a6xx_tile_mode"/> </reg32> - <reg32 offset="0x8882" name="RB_STENCIL_BUFFER_PITCH" low="0" high="11" shr="6" type="uint" usage="rp_blit"/> - <reg32 offset="0x8883" name="RB_STENCIL_BUFFER_ARRAY_PITCH" low="0" high="23" shr="6" type="uint" usage="rp_blit"/> - <reg64 offset="0x8884" name="RB_STENCIL_BUFFER_BASE" type="waddress" align="64" usage="rp_blit"/> - <reg32 offset="0x8886" name="RB_STENCIL_GMEM_BASE" low="12" high="31" shr="12" usage="rp_blit"/> - <reg32 offset="0x8887" name="RB_STENCIL_REF_CNTL" usage="rp_blit"> + <reg32 offset="0x8882" name="RB_STENCIL_BUFFER_PITCH" low="0" high="11" shr="6" type="uint" usage="draw"/> + <reg32 offset="0x8883" name="RB_STENCIL_BUFFER_ARRAY_PITCH" low="0" high="23" shr="6" type="uint" usage="draw"/> + <reg64 offset="0x8884" name="RB_STENCIL_BUFFER_BASE" type="waddress" align="64" usage="draw"/> + <reg32 offset="0x8886" name="RB_STENCIL_GMEM_BASE" low="12" high="31" shr="12" usage="draw"/> + <reg32 offset="0x8887" name="RB_STENCIL_REF_CNTL" usage="draw"> <bitfield name="REF" low="0" high="7"/> <bitfield name="BFREF" low="8" high="15"/> </reg32> - <reg32 offset="0x8888" name="RB_STENCIL_MASK" usage="rp_blit"> + <reg32 offset="0x8888" name="RB_STENCIL_MASK" usage="draw"> <bitfield name="MASK" low="0" high="7"/> <bitfield name="BFMASK" low="8" high="15"/> </reg32> - <reg32 offset="0x8889" name="RB_STENCIL_WRITE_MASK" usage="rp_blit"> + <reg32 offset="0x8889" name="RB_STENCIL_WRITE_MASK" usage="draw"> <bitfield name="WRMASK" low="0" high="7"/> <bitfield name="BFWRMASK" low="8" high="15"/> </reg32> <!-- 0x888a-0x888f invalid --> - <reg32 offset="0x8890" name="RB_WINDOW_OFFSET" type="a6xx_reg_xy" usage="rp_blit"/> + <reg32 offset="0x8890" name="RB_WINDOW_OFFSET" type="a6xx_reg_xy" usage="cmd"/> <reg32 offset="0x8891" name="RB_SAMPLE_COUNTER_CNTL" usage="cmd"> <bitfield name="DISABLE" pos="0" type="boolean"/> <bitfield name="COPY" pos="1" type="boolean"/> </reg32> <!-- 0x8892-0x8897 invalid --> - <reg32 offset="0x8898" name="RB_LRZ_CNTL" usage="rp_blit"> + <reg32 offset="0x8898" name="RB_LRZ_CNTL" usage="draw"> <bitfield name="ENABLE" pos="0" type="boolean"/> </reg32> <reg32 offset="0x8899" name="RB_LRZ_CNTL2" variants="A7XX-" usage="cmd"> @@ -2498,41 +2548,41 @@ by a particular renderpass/blit. </reg32> <!-- 0x8899-0x88bf invalid --> <!-- clamps depth value for depth test/write --> - <reg32 offset="0x88c0" name="RB_VIEWPORT_ZCLAMP_MIN" type="float" usage="rp_blit" variants="A6XX-A7XX"/> - <reg32 offset="0x88c1" name="RB_VIEWPORT_ZCLAMP_MAX" type="float" usage="rp_blit" variants="A6XX-A7XX"/> + <reg32 offset="0x88c0" name="RB_VIEWPORT_ZCLAMP_MIN" type="float" usage="draw" variants="A6XX-A7XX"/> + <reg32 offset="0x88c1" name="RB_VIEWPORT_ZCLAMP_MAX" type="float" usage="draw" variants="A6XX-A7XX"/> <!-- todo allow type="float" on an <array/> --> - <array offset="0x88b0" name="RB_VIEWPORT_ZCLAMP_MIN" stride="1" length="16" usage="rp_blit" variants="A8XX-"> + <array offset="0x88b0" name="RB_VIEWPORT_ZCLAMP_MIN" stride="1" length="16" usage="draw" variants="A8XX-"> <reg32 offset="0" name="REG" type="float"/> </array> - <array offset="0x88c0" name="RB_VIEWPORT_ZCLAMP_MAX" stride="1" length="16" usage="rp_blit" variants="A8XX-"> + <array offset="0x88c0" name="RB_VIEWPORT_ZCLAMP_MAX" stride="1" length="16" usage="draw" variants="A8XX-"> <reg32 offset="0" name="REG" type="float"/> </array> <!-- 0x88c2-0x88cf invalid--> - <reg32 offset="0x88d0" name="RB_RESOLVE_CNTL_0" usage="rp_blit"> + <reg32 offset="0x88d0" name="RB_RESOLVE_CNTL_0" usage="resolve"> <bitfield name="UNK0" low="0" high="12"/> <bitfield name="UNK16" low="16" high="26"/> </reg32> - <reg32 offset="0x88d1" name="RB_RESOLVE_CNTL_1" type="a6xx_reg_xy" usage="rp_blit"/> - <reg32 offset="0x88d2" name="RB_RESOLVE_CNTL_2" type="a6xx_reg_xy" usage="rp_blit"/> + <reg32 offset="0x88d1" name="RB_RESOLVE_CNTL_1" type="a6xx_reg_xy" usage="resolve"/> + <reg32 offset="0x88d2" name="RB_RESOLVE_CNTL_2" type="a6xx_reg_xy" usage="resolve"/> <!-- weird to duplicate other regs from same block?? --> - <reg32 offset="0x88d3" name="RB_RESOLVE_CNTL_3" variants="A6XX-A7XX" usage="rp_blit"> + <reg32 offset="0x88d3" name="RB_RESOLVE_CNTL_3" variants="A6XX-A7XX" usage="resolve"> <bitfield name="BINW" low="0" high="5" shr="5" type="uint"/> <bitfield name="BINH" low="8" high="14" shr="4" type="uint"/> </reg32> - <reg32 offset="0x88d3" name="RB_RESOLVE_CNTL_3" type="a8xx_bin_size" variants="A8XX-" usage="rp_blit"/> - <reg32 offset="0x88f0" name="RB_RESOLVE_CNTL_4" variants="A8XX-" usage="rp_blit"/> - <reg32 offset="0x88f1" name="RB_RESOLVE_CNTL_5" variants="A8XX-" usage="rp_blit"/> + <reg32 offset="0x88d3" name="RB_RESOLVE_CNTL_3" type="a8xx_bin_size" variants="A8XX-" usage="resolve"/> + <reg32 offset="0x88f0" name="RB_RESOLVE_CNTL_4" variants="A8XX-" usage="resolve"/> + <reg32 offset="0x88f1" name="RB_RESOLVE_CNTL_5" variants="A8XX-" usage="resolve"/> - <reg32 offset="0x88d4" name="RB_RESOLVE_WINDOW_OFFSET" type="a6xx_reg_xy" usage="rp_blit"/> - <reg32 offset="0x88d5" name="RB_RESOLVE_GMEM_BUFFER_INFO" usage="rp_blit"> + <reg32 offset="0x88d4" name="RB_RESOLVE_WINDOW_OFFSET" type="a6xx_reg_xy" usage="resolve"/> + <reg32 offset="0x88d5" name="RB_RESOLVE_GMEM_BUFFER_INFO" usage="resolve"> <bitfield name="SAMPLES" low="3" high="4" type="a3xx_msaa_samples"/> </reg32> - <reg32 offset="0x88d6" name="RB_RESOLVE_GMEM_BUFFER_BASE" low="12" high="31" shr="12" usage="rp_blit"/> + <reg32 offset="0x88d6" name="RB_RESOLVE_GMEM_BUFFER_BASE" low="12" high="31" shr="12" usage="resolve"/> <!-- s/DST_FORMAT/DST_INFO/ probably: --> - <reg32 offset="0x88d7" name="RB_RESOLVE_SYSTEM_BUFFER_INFO" usage="rp_blit"> + <reg32 offset="0x88d7" name="RB_RESOLVE_SYSTEM_BUFFER_INFO" usage="resolve"> <bitfield name="TILE_MODE" low="0" high="1" type="a6xx_tile_mode"/> <bitfield name="FLAGS" pos="2" type="boolean"/> <bitfield name="SAMPLES" low="3" high="4" type="a3xx_msaa_samples"/> @@ -2541,23 +2591,23 @@ by a particular renderpass/blit. <bitfield name="UNK15" pos="15" type="boolean"/> <bitfield name="MUTABLEEN" pos="16" type="boolean" variants="A7XX-"/> </reg32> - <reg64 offset="0x88d8" name="RB_RESOLVE_SYSTEM_BUFFER_BASE" type="waddress" align="64" usage="rp_blit"/> - <reg32 offset="0x88da" name="RB_RESOLVE_SYSTEM_BUFFER_PITCH" low="0" high="15" shr="6" type="uint" usage="rp_blit"/> + <reg64 offset="0x88d8" name="RB_RESOLVE_SYSTEM_BUFFER_BASE" type="waddress" align="64" usage="resolve"/> + <reg32 offset="0x88da" name="RB_RESOLVE_SYSTEM_BUFFER_PITCH" low="0" high="15" shr="6" type="uint" usage="resolve"/> <!-- array-pitch is size of layer --> - <reg32 offset="0x88db" name="RB_RESOLVE_SYSTEM_BUFFER_ARRAY_PITCH" low="0" high="28" shr="6" type="uint" usage="rp_blit"/> - <reg64 offset="0x88dc" name="RB_RESOLVE_SYSTEM_FLAG_BUFFER_BASE" type="waddress" align="64" usage="rp_blit"/> + <reg32 offset="0x88db" name="RB_RESOLVE_SYSTEM_BUFFER_ARRAY_PITCH" low="0" high="28" shr="6" type="uint" usage="resolve"/> + <reg64 offset="0x88dc" name="RB_RESOLVE_SYSTEM_FLAG_BUFFER_BASE" type="waddress" align="64" usage="resolve"/> <bitset name="a6xx_flag_buffer_pitch" inline="yes"> <bitfield name="PITCH" low="0" high="10" shr="6" type="uint"/> <bitfield name="ARRAY_PITCH" low="11" high="28" shr="7" type="uint"/> </bitset> - <reg32 offset="0x88de" name="RB_RESOLVE_SYSTEM_FLAG_BUFFER_PITCH" type="a6xx_flag_buffer_pitch" usage="rp_blit"/> + <reg32 offset="0x88de" name="RB_RESOLVE_SYSTEM_FLAG_BUFFER_PITCH" type="a6xx_flag_buffer_pitch" usage="resolve"/> - <reg32 offset="0x88df" name="RB_RESOLVE_CLEAR_COLOR_DW0" usage="rp_blit"/> - <reg32 offset="0x88e0" name="RB_RESOLVE_CLEAR_COLOR_DW1" usage="rp_blit"/> - <reg32 offset="0x88e1" name="RB_RESOLVE_CLEAR_COLOR_DW2" usage="rp_blit"/> - <reg32 offset="0x88e2" name="RB_RESOLVE_CLEAR_COLOR_DW3" usage="rp_blit"/> + <reg32 offset="0x88df" name="RB_RESOLVE_CLEAR_COLOR_DW0" usage="resolve"/> + <reg32 offset="0x88e0" name="RB_RESOLVE_CLEAR_COLOR_DW1" usage="resolve"/> + <reg32 offset="0x88e1" name="RB_RESOLVE_CLEAR_COLOR_DW2" usage="resolve"/> + <reg32 offset="0x88e2" name="RB_RESOLVE_CLEAR_COLOR_DW3" usage="resolve"/> <enum name="a6xx_blit_event_type"> <value value="0x0" name="BLIT_EVENT_STORE"/> @@ -2567,7 +2617,7 @@ by a particular renderpass/blit. </enum> <!-- seems somewhat similar to what we called RB_CLEAR_CNTL on a5xx: --> - <reg32 offset="0x88e3" name="RB_RESOLVE_OPERATION" usage="rp_blit"> + <reg32 offset="0x88e3" name="RB_RESOLVE_OPERATION" usage="resolve"> <bitfield name="TYPE" low="0" high="1" type="a6xx_blit_event_type"/> <bitfield name="SAMPLE_0" pos="2" type="boolean"/> <!-- takes sample 0 instead of averaging --> <bitfield name="DEPTH" pos="3" type="boolean"/> <!-- z16/z32/z24s8/x24x8 clear or resolve? --> @@ -2595,7 +2645,7 @@ by a particular renderpass/blit. <value value="0x0" name="CLEAR_MODE_SYSMEM"/> <value value="0x1" name="CLEAR_MODE_GMEM"/> </enum> - <reg32 offset="0x88e4" name="RB_CLEAR_TARGET" variants="A7XX-" usage="rp_blit"> + <reg32 offset="0x88e4" name="RB_CLEAR_TARGET" variants="A7XX-" usage="resolve"> <bitfield name="CLEAR_MODE" pos="0" type="a7xx_blit_clear_mode"/> </reg32> @@ -2637,17 +2687,17 @@ by a particular renderpass/blit. <bitfield name="DEPTH_CACHE_SIZE" low="30" high="31" type="a6xx_ccu_cache_size"/> </reg32> - <reg32 offset="0x88e6" name="RB_RESOLVE_GMEM_BUFFER_CNTL" variants="A8XX-"> + <reg32 offset="0x88e6" name="RB_RESOLVE_GMEM_BUFFER_CNTL" variants="A8XX-" usage="resolve"> <bitfield name="FULL_IN_GMEM" pos="0" type="boolean"/> </reg32> <!-- always 0x0 ? --> - <reg32 offset="0x88f0" name="RB_UNKNOWN_88F0" low="0" high="11" variants="A6XX" usage="cmd"/> + <reg32 offset="0x88f0" name="RB_UNKNOWN_88F0" low="0" high="11" variants="A6XX" usage="resolve"/> <!-- could be for separate stencil? (or may not be a flag buffer at all) --> <reg64 offset="0x88f1" name="RB_UNK_FLAG_BUFFER_BASE" type="waddress" align="64" variants="A6XX"/> <reg32 offset="0x88f3" name="RB_UNK_FLAG_BUFFER_PITCH" type="a6xx_flag_buffer_pitch" variants="A6XX"/> - <reg32 offset="0x88f4" name="RB_VRS_CONFIG" usage="rp_blit"> + <reg32 offset="0x88f4" name="RB_VRS_CONFIG" usage="draw"> <bitfield name="UNK2" pos="2" type="boolean"/> <bitfield name="PIPELINE_FSR_ENABLE" pos="4" type="boolean"/> <bitfield name="ATTACHMENT_FSR_ENABLE" pos="5" type="boolean"/> @@ -2656,16 +2706,25 @@ by a particular renderpass/blit. <reg32 offset="0x88f5" name="RB_BIN_FOVEAT" variants="A7XX-" usage="cmd"> <bitfield name="BINSCALEEN" pos="6" type="boolean"/> </reg32> + + <array offset="0x8950" name="RB_BIN_FOVEAT_XY" stride="1" length="6" variants="A8XX-" usage="cmd"> + <reg32 offset="0" name="OFFSET" type="a8xx_bin_foveat_xy"/> + </array> + + <array offset="0x8960" name="RB_BIN_FOVEAT_XY_FDM" stride="1" length="6" variants="A8XX-" usage="cmd"> + <reg32 offset="0" name="OFFSET" type="a8xx_bin_foveat_xy"/> + </array> + <!-- 0x88f6-0x88ff invalid --> - <reg64 offset="0x8900" name="RB_DEPTH_FLAG_BUFFER_BASE" type="waddress" align="64" usage="rp_blit"/> - <reg32 offset="0x8902" name="RB_DEPTH_FLAG_BUFFER_PITCH" usage="rp_blit"> + <reg64 offset="0x8900" name="RB_DEPTH_FLAG_BUFFER_BASE" type="waddress" align="64" usage="draw"/> + <reg32 offset="0x8902" name="RB_DEPTH_FLAG_BUFFER_PITCH" usage="draw"> <bitfield name="PITCH" low="0" high="6" shr="6" type="uint"/> <!-- TODO: actually part of array pitch --> <bitfield name="UNK8" low="8" high="10"/> <bitfield name="ARRAY_PITCH" low="11" high="27" shr="7" type="uint"/> </reg32> - <array offset="0x8903" name="RB_COLOR_FLAG_BUFFER" stride="3" length="8" usage="rp_blit"> + <array offset="0x8903" name="RB_COLOR_FLAG_BUFFER" stride="3" length="8" usage="draw"> <reg64 offset="0" name="ADDR" type="waddress" align="64"/> <reg32 offset="2" name="PITCH" type="a6xx_flag_buffer_pitch"/> </array> @@ -2683,7 +2742,8 @@ by a particular renderpass/blit. <reg32 offset="0x8813" name="RB_DEPTH_GMEM_DIMENSION" type="a8xx_gmem_dimension" variants="A8XX-"/> <reg32 offset="0x8814" name="RB_STENCIL_GMEM_DIMENSION" type="a8xx_gmem_dimension" variants="A8XX-"/> - <reg32 offset="0x8815" name="RB_RESOLVE_GMEM_DIMENSION" type="a8xx_gmem_dimension" variants="A8XX-"/> + <reg32 offset="0x8815" name="RB_RESOLVE_GMEM_DIMENSION" type="a8xx_gmem_dimension" variants="A8XX-" usage="resolve"/> + <reg32 offset="0x88af" name="RB_RESOLVE_GMEM_PARTITION" variants="A8XX-" usage="resolve"/> <array offset="0x8930" name="RB_MRT_GMEM_DIMENSION" variants="A8XX-" stride="1" length="8"> <reg32 offset="0" name="REG" type="a8xx_gmem_dimension"/> @@ -2693,13 +2753,13 @@ by a particular renderpass/blit. These show up in a6xx gen3+ but so far haven't found an example of blob writing non-zero: --> - <reg32 offset="0x8a00" name="RB_UNKNOWN_8A00" variants="A6XX" usage="rp_blit"/> - <reg32 offset="0x8a10" name="RB_UNKNOWN_8A10" variants="A6XX" usage="rp_blit"/> - <reg32 offset="0x8a20" name="RB_UNKNOWN_8A20" variants="A6XX" usage="rp_blit"/> - <reg32 offset="0x8a30" name="RB_UNKNOWN_8A30" variants="A6XX" usage="rp_blit"/> + <reg32 offset="0x8a00" name="RB_UNKNOWN_8A00" variants="A6XX" usage="draw"/> + <reg32 offset="0x8a10" name="RB_UNKNOWN_8A10" variants="A6XX" usage="draw"/> + <reg32 offset="0x8a20" name="RB_UNKNOWN_8A20" variants="A6XX" usage="draw"/> + <reg32 offset="0x8a30" name="RB_UNKNOWN_8A30" variants="A6XX" usage="draw"/> - <reg32 offset="0x8c00" name="RB_A2D_BLT_CNTL" type="a6xx_a2d_blt_cntl" usage="rp_blit"/> - <reg32 offset="0x8c01" name="RB_A2D_PIXEL_CNTL" low="0" high="31" usage="rp_blit"/> + <reg32 offset="0x8c00" name="RB_A2D_BLT_CNTL" type="a6xx_a2d_blt_cntl" usage="blit"/> + <reg32 offset="0x8c01" name="RB_A2D_PIXEL_CNTL" low="0" high="31" usage="blit"/> <bitset name="a6xx_a2d_src_texture_info" inline="yes"> <bitfield name="COLOR_FORMAT" low="0" high="7" type="a6xx_format"/> @@ -2731,28 +2791,28 @@ by a particular renderpass/blit. </bitset> <!-- 0x8c02-0x8c16 invalid --> - <reg32 offset="0x8c17" name="RB_A2D_DEST_BUFFER_INFO" type="a6xx_a2d_dest_buffer_info" usage="rp_blit"/> - <reg64 offset="0x8c18" name="RB_A2D_DEST_BUFFER_BASE" type="waddress" align="64" usage="rp_blit"/> - <reg32 offset="0x8c1a" name="RB_A2D_DEST_BUFFER_PITCH" low="0" high="15" shr="6" type="uint" usage="rp_blit"/> + <reg32 offset="0x8c17" name="RB_A2D_DEST_BUFFER_INFO" type="a6xx_a2d_dest_buffer_info" usage="blit"/> + <reg64 offset="0x8c18" name="RB_A2D_DEST_BUFFER_BASE" type="waddress" align="64" usage="blit"/> + <reg32 offset="0x8c1a" name="RB_A2D_DEST_BUFFER_PITCH" low="0" high="15" shr="6" type="uint" usage="blit"/> <!-- this is a guess but seems likely (for NV12/IYUV): --> - <reg64 offset="0x8c1b" name="RB_A2D_DEST_BUFFER_BASE_1" type="waddress" align="64" usage="rp_blit"/> - <reg32 offset="0x8c1d" name="RB_A2D_DEST_BUFFER_PITCH_1" low="0" high="15" shr="6" type="uint" usage="rp_blit"/> - <reg64 offset="0x8c1e" name="RB_A2D_DEST_BUFFER_BASE_2" type="waddress" align="64" usage="rp_blit"/> + <reg64 offset="0x8c1b" name="RB_A2D_DEST_BUFFER_BASE_1" type="waddress" align="64" usage="blit"/> + <reg32 offset="0x8c1d" name="RB_A2D_DEST_BUFFER_PITCH_1" low="0" high="15" shr="6" type="uint" usage="blit"/> + <reg64 offset="0x8c1e" name="RB_A2D_DEST_BUFFER_BASE_2" type="waddress" align="64" usage="blit"/> - <reg64 offset="0x8c20" name="RB_A2D_DEST_FLAG_BUFFER_BASE" type="waddress" align="64" usage="rp_blit"/> - <reg32 offset="0x8c22" name="RB_A2D_DEST_FLAG_BUFFER_PITCH" type="a6xx_flag_buffer_pitch" usage="rp_blit"/> + <reg64 offset="0x8c20" name="RB_A2D_DEST_FLAG_BUFFER_BASE" type="waddress" align="64" usage="blit"/> + <reg32 offset="0x8c22" name="RB_A2D_DEST_FLAG_BUFFER_PITCH" type="a6xx_flag_buffer_pitch" usage="blit"/> <!-- this is a guess but seems likely (for NV12 with UBWC): --> - <reg64 offset="0x8c23" name="RB_A2D_DEST_FLAG_BUFFER_BASE_1" type="waddress" align="64" usage="rp_blit"/> - <reg32 offset="0x8c25" name="RB_A2D_DEST_FLAG_BUFFER_PITCH_1" low="0" high="7" shr="6" type="uint" usage="rp_blit"/> + <reg64 offset="0x8c23" name="RB_A2D_DEST_FLAG_BUFFER_BASE_1" type="waddress" align="64" usage="blit"/> + <reg32 offset="0x8c25" name="RB_A2D_DEST_FLAG_BUFFER_PITCH_1" low="0" high="7" shr="6" type="uint" usage="blit"/> <!-- TODO: 0x8c26-0x8c33 are all full 32-bit registers --> <!-- unlike a5xx, these are per channel values rather than packed --> - <reg32 offset="0x8c2c" name="RB_A2D_CLEAR_COLOR_DW0" usage="rp_blit"/> - <reg32 offset="0x8c2d" name="RB_A2D_CLEAR_COLOR_DW1" usage="rp_blit"/> - <reg32 offset="0x8c2e" name="RB_A2D_CLEAR_COLOR_DW2" usage="rp_blit"/> - <reg32 offset="0x8c2f" name="RB_A2D_CLEAR_COLOR_DW3" usage="rp_blit"/> + <reg32 offset="0x8c2c" name="RB_A2D_CLEAR_COLOR_DW0" usage="blit"/> + <reg32 offset="0x8c2d" name="RB_A2D_CLEAR_COLOR_DW1" usage="blit"/> + <reg32 offset="0x8c2e" name="RB_A2D_CLEAR_COLOR_DW2" usage="blit"/> + <reg32 offset="0x8c2f" name="RB_A2D_CLEAR_COLOR_DW3" usage="blit"/> - <reg32 offset="0x8c34" name="RB_UNKNOWN_8C34" variants="A7XX-" usage="cmd"/> + <reg32 offset="0x8c34" name="RB_A2D_UNKNOWN_8C34" variants="A7XX-" usage="blit"/> <!-- 0x8c35-0x8dff invalid --> @@ -2821,7 +2881,7 @@ by a particular renderpass/blit. <!-- 0x8e26-0x8e27 invalid --> <reg32 offset="0x8f00" name="RB_CMP_NC_MODE_CNTL" variants="A8XX-"/> - <reg32 offset="0x8f01" name="RB_RESOLVE_PREFETCH_CNTL" variants="A8XX-"/> + <reg32 offset="0x8f01" name="RB_RESOLVE_PREFETCH_CNTL" variants="A8XX-" usage="resolve"/> <reg32 offset="0x8f02" name="RB_CMP_DBG_ECO_CNTL" variants="A8XX-"/> <reg32 offset="0x8f03" name="RB_UNSLICE_STATUS" variants="A8XX-"/> @@ -2847,7 +2907,7 @@ by a particular renderpass/blit. <!-- 0x9000-0x90ff invalid --> - <reg32 offset="0x9100" name="VPC_GS_PARAM" variants="A6XX" usage="rp_blit"> + <reg32 offset="0x9100" name="VPC_GS_PARAM" variants="A6XX" usage="draw"> <bitfield name="LINELENGTHLOC" low="0" high="7" type="uint"/> </reg32> @@ -2860,17 +2920,17 @@ by a particular renderpass/blit. <bitfield name="CLIP_DIST_03_LOC" low="8" high="15" type="uint"/> <bitfield name="CLIP_DIST_47_LOC" low="16" high="23" type="uint"/> </bitset> - <reg32 offset="0x9101" name="VPC_VS_CLIP_CULL_CNTL" type="a6xx_vpc_xs_clip_cntl" variants="A6XX-A7XX" usage="rp_blit"/> - <reg32 offset="0x9102" name="VPC_GS_CLIP_CULL_CNTL" type="a6xx_vpc_xs_clip_cntl" variants="A6XX-A7XX" usage="rp_blit"/> - <reg32 offset="0x9103" name="VPC_DS_CLIP_CULL_CNTL" type="a6xx_vpc_xs_clip_cntl" variants="A6XX-A7XX" usage="rp_blit"/> + <reg32 offset="0x9101" name="VPC_VS_CLIP_CULL_CNTL" type="a6xx_vpc_xs_clip_cntl" variants="A6XX-A7XX" usage="draw"/> + <reg32 offset="0x9102" name="VPC_GS_CLIP_CULL_CNTL" type="a6xx_vpc_xs_clip_cntl" variants="A6XX-A7XX" usage="draw"/> + <reg32 offset="0x9103" name="VPC_DS_CLIP_CULL_CNTL" type="a6xx_vpc_xs_clip_cntl" variants="A6XX-A7XX" usage="draw"/> - <reg32 offset="0x9307" name="VPC_VS_CLIP_CULL_CNTL" type="a6xx_vpc_xs_clip_cntl" variants="A8XX" usage="rp_blit"/> - <reg32 offset="0x9308" name="VPC_GS_CLIP_CULL_CNTL" type="a6xx_vpc_xs_clip_cntl" variants="A8XX" usage="rp_blit"/> - <reg32 offset="0x9309" name="VPC_DS_CLIP_CULL_CNTL" type="a6xx_vpc_xs_clip_cntl" variants="A8XX" usage="rp_blit"/> + <reg32 offset="0x9307" name="VPC_VS_CLIP_CULL_CNTL" type="a6xx_vpc_xs_clip_cntl" variants="A8XX" usage="draw"/> + <reg32 offset="0x9308" name="VPC_GS_CLIP_CULL_CNTL" type="a6xx_vpc_xs_clip_cntl" variants="A8XX" usage="draw"/> + <reg32 offset="0x9309" name="VPC_DS_CLIP_CULL_CNTL" type="a6xx_vpc_xs_clip_cntl" variants="A8XX" usage="draw"/> - <reg32 offset="0x9311" name="VPC_VS_CLIP_CULL_CNTL_V2" type="a6xx_vpc_xs_clip_cntl" variants="A6XX-A7XX" usage="rp_blit"/> - <reg32 offset="0x9312" name="VPC_GS_CLIP_CULL_CNTL_V2" type="a6xx_vpc_xs_clip_cntl" variants="A6XX-A7XX" usage="rp_blit"/> - <reg32 offset="0x9313" name="VPC_DS_CLIP_CULL_CNTL_V2" type="a6xx_vpc_xs_clip_cntl" variants="A6XX-A7XX" usage="rp_blit"/> + <reg32 offset="0x9311" name="VPC_VS_CLIP_CULL_CNTL_V2" type="a6xx_vpc_xs_clip_cntl" variants="A6XX-A7XX" usage="draw"/> + <reg32 offset="0x9312" name="VPC_GS_CLIP_CULL_CNTL_V2" type="a6xx_vpc_xs_clip_cntl" variants="A6XX-A7XX" usage="draw"/> + <reg32 offset="0x9313" name="VPC_DS_CLIP_CULL_CNTL_V2" type="a6xx_vpc_xs_clip_cntl" variants="A6XX-A7XX" usage="draw"/> <bitset name="a6xx_vpc_xs_siv_cntl" inline="yes"> <bitfield name="LAYERLOC" low="0" high="7" type="uint"/> @@ -2878,17 +2938,17 @@ by a particular renderpass/blit. <bitfield name="SHADINGRATELOC" low="16" high="23" type="uint" variants="A7XX-"/> </bitset> - <reg32 offset="0x9104" name="VPC_VS_SIV_CNTL" type="a6xx_vpc_xs_siv_cntl" variants="A6XX-A7XX" usage="rp_blit"/> - <reg32 offset="0x9105" name="VPC_GS_SIV_CNTL" type="a6xx_vpc_xs_siv_cntl" variants="A6XX-A7XX" usage="rp_blit"/> - <reg32 offset="0x9106" name="VPC_DS_SIV_CNTL" type="a6xx_vpc_xs_siv_cntl" variants="A6XX-A7XX" usage="rp_blit"/> + <reg32 offset="0x9104" name="VPC_VS_SIV_CNTL" type="a6xx_vpc_xs_siv_cntl" variants="A6XX-A7XX" usage="draw"/> + <reg32 offset="0x9105" name="VPC_GS_SIV_CNTL" type="a6xx_vpc_xs_siv_cntl" variants="A6XX-A7XX" usage="draw"/> + <reg32 offset="0x9106" name="VPC_DS_SIV_CNTL" type="a6xx_vpc_xs_siv_cntl" variants="A6XX-A7XX" usage="draw"/> - <reg32 offset="0x930a" name="VPC_VS_SIV_CNTL" type="a6xx_vpc_xs_siv_cntl" variants="A8XX-" usage="rp_blit"/> - <reg32 offset="0x930b" name="VPC_GS_SIV_CNTL" type="a6xx_vpc_xs_siv_cntl" variants="A8XX-" usage="rp_blit"/> - <reg32 offset="0x930c" name="VPC_DS_SIV_CNTL" type="a6xx_vpc_xs_siv_cntl" variants="A8XX-" usage="rp_blit"/> + <reg32 offset="0x930a" name="VPC_VS_SIV_CNTL" type="a6xx_vpc_xs_siv_cntl" variants="A8XX-" usage="draw"/> + <reg32 offset="0x930b" name="VPC_GS_SIV_CNTL" type="a6xx_vpc_xs_siv_cntl" variants="A8XX-" usage="draw"/> + <reg32 offset="0x930c" name="VPC_DS_SIV_CNTL" type="a6xx_vpc_xs_siv_cntl" variants="A8XX-" usage="draw"/> - <reg32 offset="0x9314" name="VPC_VS_SIV_CNTL_V2" type="a6xx_vpc_xs_siv_cntl" variants="A6XX-A7XX" usage="rp_blit"/> - <reg32 offset="0x9315" name="VPC_GS_SIV_CNTL_V2" type="a6xx_vpc_xs_siv_cntl" variants="A6XX-A7XX" usage="rp_blit"/> - <reg32 offset="0x9316" name="VPC_DS_SIV_CNTL_V2" type="a6xx_vpc_xs_siv_cntl" variants="A6XX-A7XX" usage="rp_blit"/> + <reg32 offset="0x9314" name="VPC_VS_SIV_CNTL_V2" type="a6xx_vpc_xs_siv_cntl" variants="A6XX-A7XX" usage="draw"/> + <reg32 offset="0x9315" name="VPC_GS_SIV_CNTL_V2" type="a6xx_vpc_xs_siv_cntl" variants="A6XX-A7XX" usage="draw"/> + <reg32 offset="0x9316" name="VPC_DS_SIV_CNTL_V2" type="a6xx_vpc_xs_siv_cntl" variants="A6XX-A7XX" usage="draw"/> <bitset name="a6xx_vpc_rast_stream_cntl" inline="yes"> <!-- which stream to send to GRAS --> @@ -2897,19 +2957,19 @@ by a particular renderpass/blit. <bitfield name="DISCARD" pos="2" type="boolean"/> </bitset> - <reg32 offset="0x9980" name="VPC_RAST_STREAM_CNTL" type="a6xx_vpc_rast_stream_cntl" variants="A6XX" usage="rp_blit"/> - <reg32 offset="0x9107" name="VPC_RAST_STREAM_CNTL" type="a6xx_vpc_rast_stream_cntl" variants="A7XX" usage="rp_blit"/> - <reg32 offset="0x930d" name="VPC_RAST_STREAM_CNTL" type="a6xx_vpc_rast_stream_cntl" variants="A8XX-" usage="rp_blit"/> - <reg32 offset="0x9317" name="VPC_RAST_STREAM_CNTL_V2" type="a6xx_vpc_rast_stream_cntl" variants="A7XX" usage="rp_blit"/> + <reg32 offset="0x9980" name="VPC_RAST_STREAM_CNTL" type="a6xx_vpc_rast_stream_cntl" variants="A6XX" usage="draw"/> + <reg32 offset="0x9107" name="VPC_RAST_STREAM_CNTL" type="a6xx_vpc_rast_stream_cntl" variants="A7XX" usage="draw"/> + <reg32 offset="0x930d" name="VPC_RAST_STREAM_CNTL" type="a6xx_vpc_rast_stream_cntl" variants="A8XX-" usage="draw"/> + <reg32 offset="0x9317" name="VPC_RAST_STREAM_CNTL_V2" type="a6xx_vpc_rast_stream_cntl" variants="A7XX" usage="draw"/> - <reg32 offset="0x9107" name="VPC_UNKNOWN_9107" variants="A6XX" usage="rp_blit"> + <reg32 offset="0x9107" name="VPC_UNKNOWN_9107" variants="A6XX" usage="draw"> <!-- this mirrors VPC_RAST_STREAM_CNTL::DISCARD, although it seems it's unused --> <bitfield name="RASTER_DISCARD" pos="0" type="boolean"/> <bitfield name="UNK2" pos="2" type="boolean"/> </reg32> - <reg32 offset="0x9108" name="VPC_RAST_CNTL" type="a6xx_rast_cntl" variants="A6XX-A7XX" usage="rp_blit"/> - <reg32 offset="0x930e" name="VPC_RAST_CNTL" type="a6xx_rast_cntl" variants="A8XX-" usage="rp_blit"/> + <reg32 offset="0x9108" name="VPC_RAST_CNTL" type="a6xx_rast_cntl" variants="A6XX-A7XX" usage="draw"/> + <reg32 offset="0x930e" name="VPC_RAST_CNTL" type="a6xx_rast_cntl" variants="A8XX-" usage="draw"/> <bitset name="a6xx_pc_cntl" inline="yes"> <bitfield name="PRIMITIVE_RESTART" pos="0" type="boolean"/> <bitfield name="PROVOKING_VTX_LAST" pos="1" type="boolean"/> @@ -2949,14 +3009,14 @@ by a particular renderpass/blit. <bitfield name="VIEWS" low="2" high="6" type="uint"/> </bitset> - <reg32 offset="0x9109" name="VPC_PC_CNTL" type="a6xx_pc_cntl" variants="A7XX" usage="rp_blit"/> - <reg32 offset="0x930f" name="VPC_PC_CNTL" type="a6xx_pc_cntl" variants="A8XX-" usage="rp_blit"/> - <reg32 offset="0x910a" name="VPC_GS_PARAM_0" type="a6xx_gs_param_0" variants="A7XX" usage="rp_blit"/> - <reg32 offset="0x90c0" name="VPC_GS_PARAM_0" type="a6xx_gs_param_0" variants="A8XX-" usage="rp_blit"/> - <reg32 offset="0x910b" name="VPC_STEREO_RENDERING_VIEWMASK" type="hex" low="0" high="15" variants="A7XX" usage="rp_blit"/> - <reg32 offset="0x90c1" name="VPC_STEREO_RENDERING_VIEWMASK" type="hex" low="0" high="15" variants="A8XX-" usage="rp_blit"/> - <reg32 offset="0x910c" name="VPC_STEREO_RENDERING_CNTL" type="a6xx_stereo_rendering_cntl" variants="A7XX" usage="rp_blit"/> - <reg32 offset="0x931a" name="VPC_STEREO_RENDERING_CNTL" type="a6xx_stereo_rendering_cntl" variants="A8XX-" usage="rp_blit"/> + <reg32 offset="0x9109" name="VPC_PC_CNTL" type="a6xx_pc_cntl" variants="A7XX" usage="draw"/> + <reg32 offset="0x930f" name="VPC_PC_CNTL" type="a6xx_pc_cntl" variants="A8XX-" usage="draw"/> + <reg32 offset="0x910a" name="VPC_GS_PARAM_0" type="a6xx_gs_param_0" variants="A7XX" usage="draw"/> + <reg32 offset="0x90c0" name="VPC_GS_PARAM_0" type="a6xx_gs_param_0" variants="A8XX-" usage="draw"/> + <reg32 offset="0x910b" name="VPC_STEREO_RENDERING_VIEWMASK" type="hex" low="0" high="15" variants="A7XX" usage="draw"/> + <reg32 offset="0x90c1" name="VPC_STEREO_RENDERING_VIEWMASK" type="hex" low="0" high="15" variants="A8XX-" usage="draw"/> + <reg32 offset="0x910c" name="VPC_STEREO_RENDERING_CNTL" type="a6xx_stereo_rendering_cntl" variants="A7XX" usage="draw"/> + <reg32 offset="0x931a" name="VPC_STEREO_RENDERING_CNTL" type="a6xx_stereo_rendering_cntl" variants="A8XX-" usage="draw"/> <enum name="a6xx_varying_interp_mode"> <value value="0" name="INTERP_SMOOTH"/> @@ -2973,20 +3033,20 @@ by a particular renderpass/blit. </enum> <!-- 0x9109-0x91ff invalid --> - <array offset="0x9200" name="VPC_VARYING_INTERP_MODE" stride="1" length="8" variants="A6XX-A7XX" usage="rp_blit"> + <array offset="0x9200" name="VPC_VARYING_INTERP_MODE" stride="1" length="8" variants="A6XX-A7XX" usage="draw"> <doc>Packed array of a6xx_varying_interp_mode</doc> <reg32 offset="0x0" name="MODE"/> </array> - <array offset="0x9208" name="VPC_VARYING_REPLACE_MODE" stride="1" length="8" variants="A6XX-A7XX" usage="rp_blit"> + <array offset="0x9208" name="VPC_VARYING_REPLACE_MODE" stride="1" length="8" variants="A6XX-A7XX" usage="draw"> <doc>Packed array of a6xx_varying_ps_repl_mode</doc> <reg32 offset="0x0" name="MODE"/> </array> - <array offset="0x9240" name="VPC_VARYING_INTERP_MODE" stride="1" length="8" variants="A8XX-" usage="rp_blit"> + <array offset="0x9240" name="VPC_VARYING_INTERP_MODE" stride="1" length="8" variants="A8XX-" usage="draw"> <doc>Packed array of a6xx_varying_interp_mode</doc> <reg32 offset="0x0" name="MODE"/> </array> - <array offset="0x9248" name="VPC_VARYING_REPLACE_MODE" stride="1" length="8" variants="A8XX-" usage="rp_blit"> + <array offset="0x9248" name="VPC_VARYING_REPLACE_MODE" stride="1" length="8" variants="A8XX-" usage="draw"> <doc>Packed array of a6xx_varying_ps_repl_mode</doc> <reg32 offset="0x0" name="MODE"/> </array> @@ -2995,12 +3055,12 @@ by a particular renderpass/blit. <reg32 offset="0x9210" name="VPC_UNKNOWN_9210" low="0" high="31" variants="A6XX" usage="cmd"/> <reg32 offset="0x9211" name="VPC_UNKNOWN_9211" low="0" high="31" variants="A6XX" usage="cmd"/> - <array offset="0x9212" name="VPC_VARYING_LM_TRANSFER_CNTL" stride="1" length="4" variants="A6XX-A7XX" usage="rp_blit"> + <array offset="0x9212" name="VPC_VARYING_LM_TRANSFER_CNTL" stride="1" length="4" variants="A6XX-A7XX" usage="draw"> <!-- one bit per varying component: --> <reg32 offset="0" name="DISABLE"/> </array> - <array offset="0x9252" name="VPC_VARYING_LM_TRANSFER_CNTL" stride="1" length="4" variants="A8XX-" usage="rp_blit"> + <array offset="0x9252" name="VPC_VARYING_LM_TRANSFER_CNTL" stride="1" length="4" variants="A8XX-" usage="draw"> <!-- one bit per varying component: --> <reg32 offset="0" name="DISABLE"/> </array> @@ -3034,8 +3094,8 @@ by a particular renderpass/blit. <bitfield name="RESET" pos="16" type="boolean"/> </bitset> - <reg32 offset="0x9216" name="VPC_SO_MAPPING_WPTR" type="a6xx_vpc_so_mapping_wptr" variants="A6XX-A7XX" usage="rp_blit"/> - <reg32 offset="0x9180" name="VPC_SO_MAPPING_WPTR" type="a6xx_vpc_so_mapping_wptr" variants="A8XX-" usage="rp_blit"/> + <reg32 offset="0x9216" name="VPC_SO_MAPPING_WPTR" type="a6xx_vpc_so_mapping_wptr" variants="A6XX-A7XX" usage="draw"/> + <reg32 offset="0x9180" name="VPC_SO_MAPPING_WPTR" type="a6xx_vpc_so_mapping_wptr" variants="A8XX-" usage="draw"/> <bitset name="a6xx_vpc_so_mapping_port" inline="yes"> <bitfield name="A_BUF" low="0" high="1" type="uint"/> @@ -3047,8 +3107,8 @@ by a particular renderpass/blit. </bitset> <!-- special register, write multiple times to load SO program (not readable) --> - <reg32 offset="0x9217" name="VPC_SO_MAPPING_PORT" type="a6xx_vpc_so_mapping_port" variants="A6XX-A7XX" usage="rp_blit"/> - <reg32 offset="0x9181" name="VPC_SO_MAPPING_PORT" type="a6xx_vpc_so_mapping_port" variants="A8XX-" usage="rp_blit"/> + <reg32 offset="0x9217" name="VPC_SO_MAPPING_PORT" type="a6xx_vpc_so_mapping_port" variants="A6XX-A7XX" usage="draw"/> + <reg32 offset="0x9181" name="VPC_SO_MAPPING_PORT" type="a6xx_vpc_so_mapping_port" variants="A8XX-" usage="draw"/> <reg64 offset="0x9218" name="VPC_SO_QUERY_BASE" type="waddress" align="32" variants="A6XX-A7XX" usage="cmd"/> <reg64 offset="0x9182" name="VPC_SO_QUERY_BASE" type="waddress" align="32" variants="A8XX-" usage="cmd"/> @@ -3097,13 +3157,13 @@ by a particular renderpass/blit. </bitfield> </bitset> - <reg32 offset="0x9301" name="VPC_VS_CNTL" type="a6xx_vpc_xs_cntl" variants="A6XX-A7XX" usage="rp_blit"/> - <reg32 offset="0x9302" name="VPC_GS_CNTL" type="a6xx_vpc_xs_cntl" variants="A6XX-A7XX" usage="rp_blit"/> - <reg32 offset="0x9303" name="VPC_DS_CNTL" type="a6xx_vpc_xs_cntl" variants="A6XX-A7XX" usage="rp_blit"/> + <reg32 offset="0x9301" name="VPC_VS_CNTL" type="a6xx_vpc_xs_cntl" variants="A6XX-A7XX" usage="draw"/> + <reg32 offset="0x9302" name="VPC_GS_CNTL" type="a6xx_vpc_xs_cntl" variants="A6XX-A7XX" usage="draw"/> + <reg32 offset="0x9303" name="VPC_DS_CNTL" type="a6xx_vpc_xs_cntl" variants="A6XX-A7XX" usage="draw"/> - <reg32 offset="0x9300" name="VPC_VS_CNTL" type="a6xx_vpc_xs_cntl" variants="A8XX-" usage="rp_blit"/> - <reg32 offset="0x9301" name="VPC_GS_CNTL" type="a6xx_vpc_xs_cntl" variants="A8XX-" usage="rp_blit"/> - <reg32 offset="0x9302" name="VPC_DS_CNTL" type="a6xx_vpc_xs_cntl" variants="A8XX-" usage="rp_blit"/> + <reg32 offset="0x9300" name="VPC_VS_CNTL" type="a6xx_vpc_xs_cntl" variants="A8XX-" usage="draw"/> + <reg32 offset="0x9301" name="VPC_GS_CNTL" type="a6xx_vpc_xs_cntl" variants="A8XX-" usage="draw"/> + <reg32 offset="0x9302" name="VPC_DS_CNTL" type="a6xx_vpc_xs_cntl" variants="A8XX-" usage="draw"/> <bitset name="a6xx_vpc_ps_cntl" inline="yes"> <bitfield name="NUMNONPOSVAR" low="0" high="7" type="uint"/> @@ -3124,8 +3184,8 @@ by a particular renderpass/blit. </bitfield> </bitset> - <reg32 offset="0x9304" name="VPC_PS_CNTL" type="a6xx_vpc_ps_cntl" variants="A6XX-A7XX" usage="rp_blit"/> - <reg32 offset="0x9303" name="VPC_PS_CNTL" type="a6xx_vpc_ps_cntl" variants="A8XX-" usage="rp_blit"/> + <reg32 offset="0x9304" name="VPC_PS_CNTL" type="a6xx_vpc_ps_cntl" variants="A6XX-A7XX" usage="draw"/> + <reg32 offset="0x9303" name="VPC_PS_CNTL" type="a6xx_vpc_ps_cntl" variants="A8XX-" usage="draw"/> <bitset name="a6xx_vpc_so_cntl" inline="yes"> <!-- @@ -3138,21 +3198,21 @@ by a particular renderpass/blit. <bitfield name="STREAM_ENABLE" low="15" high="18" type="hex"/> </bitset> - <reg32 offset="0x9305" name="VPC_SO_CNTL" type="a6xx_vpc_so_cntl" variants="A6XX-A7XX" usage="rp_blit"/> - <reg32 offset="0x9304" name="VPC_SO_CNTL" type="a6xx_vpc_so_cntl" variants="A8XX-" usage="rp_blit"/> + <reg32 offset="0x9305" name="VPC_SO_CNTL" type="a6xx_vpc_so_cntl" variants="A6XX-A7XX" usage="draw"/> + <reg32 offset="0x9304" name="VPC_SO_CNTL" type="a6xx_vpc_so_cntl" variants="A8XX-" usage="draw"/> <bitset name="a6xx_so_override" inline="yes"> <bitfield name="DISABLE" pos="0" type="boolean"/> </bitset> - <reg32 offset="0x9306" name="VPC_SO_OVERRIDE" type="a6xx_so_override" variants="A6XX-A7XX" usage="rp_blit"/> - <reg32 offset="0x9305" name="VPC_SO_OVERRIDE" type="a6xx_so_override" variants="A8XX-" usage="rp_blit"/> + <reg32 offset="0x9306" name="VPC_SO_OVERRIDE" type="a6xx_so_override" variants="A6XX-A7XX" usage="draw"/> + <reg32 offset="0x9305" name="VPC_SO_OVERRIDE" type="a6xx_so_override" variants="A8XX-" usage="draw"/> - <reg32 offset="0x9807" name="PC_DGEN_SO_OVERRIDE" type="a6xx_so_override" variants="A7XX" usage="rp_blit"/> - <reg32 offset="0x9b0a" name="PC_DGEN_SO_OVERRIDE" type="a6xx_so_override" variants="A8XX-" usage="rp_blit"/> + <reg32 offset="0x9807" name="PC_DGEN_SO_OVERRIDE" type="a6xx_so_override" variants="A7XX" usage="draw"/> + <reg32 offset="0x9b0a" name="PC_DGEN_SO_OVERRIDE" type="a6xx_so_override" variants="A8XX-" usage="draw"/> - <reg32 offset="0x9307" name="VPC_PS_RAST_CNTL" type="a6xx_rast_cntl" variants="A6XX-A7XX" usage="rp_blit"/> - <reg32 offset="0x9306" name="VPC_PS_RAST_CNTL" type="a6xx_rast_cntl" variants="A8XX-" usage="rp_blit"/> + <reg32 offset="0x9307" name="VPC_PS_RAST_CNTL" type="a6xx_rast_cntl" variants="A6XX-A7XX" usage="draw"/> + <reg32 offset="0x9306" name="VPC_PS_RAST_CNTL" type="a6xx_rast_cntl" variants="A8XX-" usage="draw"/> <reg32 offset="0x9308" name="VPC_ATTR_BUF_GMEM_SIZE" variants="A7XX" type="uint" usage="cmd"/> <reg32 offset="0x9309" name="VPC_ATTR_BUF_GMEM_BASE" variants="A7XX" type="hex" usage="cmd"/> @@ -3172,10 +3232,10 @@ by a particular renderpass/blit. <reg32 offset="0x9b17" name="PC_POS_BUF_GMEM_SIZE" variants="A8XX-" type="uint" usage="cmd"/> <reg32 offset="0x9b18" name="PC_BV_POS_BUF_GMEM_SIZE" variants="A8XX-" type="uint" usage="cmd"/> - <reg32 offset="0x930a" name="VPC_UNKNOWN_930A" variants="A7XX"/> + <reg32 offset="0x930a" name="VPC_UNKNOWN_CNTL" variants="A7XX" usage="draw"/> + <reg32 offset="0x9313" name="VPC_UNKNOWN_CNTL" variants="A8XX-" usage="draw"/> - <reg32 offset="0x9313" name="VPC_UNKNOWN_9313" variants="A8XX-"/> - <reg32 offset="0x9e17" name="PC_UNKNOWN_9E17" variants="A8XX-"/> + <reg32 offset="0x9e17" name="PC_UNKNOWN_9E17" variants="A8XX-" usage="draw"/> <reg32 offset="0x960a" name="VPC_FLATSHADE_MODE_CNTL" variants="A7XX"/> <reg32 offset="0x9741" name="VPC_FLATSHADE_MODE_CNTL" variants="A8XX-"/> @@ -3204,47 +3264,47 @@ by a particular renderpass/blit. <reg32 offset="0x980b" name="PC_UNKNOWN_980B" variants="A8XX-"/> - <reg32 offset="0x9800" name="PC_HS_PARAM_0" low="0" high="5" type="uint" variants="A6XX-A7XX" usage="rp_blit"/> - <reg32 offset="0x9b10" name="PC_HS_PARAM_0" low="0" high="5" type="uint" variants="A8XX-" usage="rp_blit"/> + <reg32 offset="0x9800" name="PC_HS_PARAM_0" low="0" high="5" type="uint" variants="A6XX-A7XX" usage="draw"/> + <reg32 offset="0x9b10" name="PC_HS_PARAM_0" low="0" high="5" type="uint" variants="A8XX-" usage="draw"/> <bitset name="a6xx_pc_hs_param_1" inline="yes"> <bitfield name="SIZE" low="0" high="10" type="uint"/> <bitfield name="UNK13" pos="13"/> </bitset> - <reg32 offset="0x9801" name="PC_HS_PARAM_1" type="a6xx_pc_hs_param_1" variants="A6XX-A7XX" usage="rp_blit"/> - <reg32 offset="0x9b11" name="PC_HS_PARAM_1" type="a6xx_pc_hs_param_1" variants="A8XX-" usage="rp_blit"/> + <reg32 offset="0x9801" name="PC_HS_PARAM_1" type="a6xx_pc_hs_param_1" variants="A6XX-A7XX" usage="draw"/> + <reg32 offset="0x9b11" name="PC_HS_PARAM_1" type="a6xx_pc_hs_param_1" variants="A8XX-" usage="draw"/> <bitset name="a6xx_pc_ds_param" inline="yes"> <bitfield name="SPACING" low="0" high="1" type="a6xx_tess_spacing"/> <bitfield name="OUTPUT" low="2" high="3" type="a6xx_tess_output"/> </bitset> - <reg32 offset="0x9802" name="PC_DS_PARAM" type="a6xx_pc_ds_param" variants="A6XX-A7XX" usage="rp_blit"/> - <reg32 offset="0x9b12" name="PC_DS_PARAM" type="a6xx_pc_ds_param" variants="A8XX-" usage="rp_blit"/> + <reg32 offset="0x9802" name="PC_DS_PARAM" type="a6xx_pc_ds_param" variants="A6XX-A7XX" usage="draw"/> + <reg32 offset="0x9b12" name="PC_DS_PARAM" type="a6xx_pc_ds_param" variants="A8XX-" usage="draw"/> - <reg32 offset="0x9803" name="PC_RESTART_INDEX" low="0" high="31" type="uint" variants="A6XX-A7XX" usage="rp_blit"/> - <reg32 offset="0x9b15" name="PC_RESTART_INDEX" low="0" high="31" type="uint" variants="A8XX-" usage="rp_blit"/> + <reg32 offset="0x9803" name="PC_RESTART_INDEX" low="0" high="31" type="uint" variants="A6XX-A7XX" usage="draw"/> + <reg32 offset="0x9b15" name="PC_RESTART_INDEX" low="0" high="31" type="uint" variants="A8XX-" usage="draw"/> - <reg32 offset="0x9804" name="PC_MODE_CNTL" low="0" high="7" variants="A6XX-A7XX" usage="rp_blit"/> - <reg32 offset="0x9b00" name="PC_MODE_CNTL" low="0" high="14" variants="A8XX" usage="rp_blit"/> + <reg32 offset="0x9804" name="PC_MODE_CNTL" low="0" high="7" variants="A6XX-A7XX" usage="draw"/> + <reg32 offset="0x9b00" name="PC_MODE_CNTL" low="0" high="14" variants="A8XX" usage="draw"/> - <reg32 offset="0x9805" name="PC_POWER_CNTL" low="0" high="2" usage="rp_blit"/> + <reg32 offset="0x9805" name="PC_POWER_CNTL" low="0" high="2" variants="A6XX" usage="draw"/> <bitset name="a6xx_pc_ps_cntl" inline="yes"> <bitfield name="PRIMITIVEIDEN" pos="0" type="boolean"/> </bitset> - <reg32 offset="0x9806" name="PC_PS_CNTL" type="a6xx_pc_ps_cntl" variants="A6XX-A7XX" usage="rp_blit"/> - <reg32 offset="0x9b06" name="PC_PS_CNTL" type="a6xx_pc_ps_cntl" variants="A8XX-" usage="rp_blit"/> + <reg32 offset="0x9806" name="PC_PS_CNTL" type="a6xx_pc_ps_cntl" variants="A6XX-A7XX" usage="draw"/> + <reg32 offset="0x9b06" name="PC_PS_CNTL" type="a6xx_pc_ps_cntl" variants="A8XX-" usage="draw"/> <bitset name="a6xx_pc_dgen_so_cntl" inline="yes"> <bitfield name="STREAM_ENABLE" low="15" high="18" type="hex"/> </bitset> <!-- New in a6xx gen3+ --> - <reg32 offset="0x9808" name="PC_DGEN_SO_CNTL" type="a6xx_pc_dgen_so_cntl" variants="A6XX-A7XX" usage="rp_blit"/> - <reg32 offset="0x9b0b" name="PC_DGEN_SO_CNTL" type="a6xx_pc_dgen_so_cntl" variants="A8XX-" usage="rp_blit"/> + <reg32 offset="0x9808" name="PC_DGEN_SO_CNTL" type="a6xx_pc_dgen_so_cntl" variants="A6XX-A7XX" usage="draw"/> + <reg32 offset="0x9b0b" name="PC_DGEN_SO_CNTL" type="a6xx_pc_dgen_so_cntl" variants="A8XX-" usage="draw"/> <bitset name="a6xx_pc_dgen_su_conservative_ras_cntl" inline="yes"> <bitfield name="CONSERVATIVERASEN" pos="0" type="boolean"/> @@ -3253,7 +3313,7 @@ by a particular renderpass/blit. <reg32 offset="0x980a" name="PC_DGEN_SU_CONSERVATIVE_RAS_CNTL" type="a6xx_pc_dgen_su_conservative_ras_cntl" variants="A6XX-A7XX"/> <reg32 offset="0x9b08" name="PC_DGEN_SU_CONSERVATIVE_RAS_CNTL" type="a6xx_pc_dgen_su_conservative_ras_cntl" variants="A8XX-"/> - <reg32 offset="0x9b0c" name="PC_VS_INPUT_CNTL" variants="A8XX-" usage="rp_blit"> + <reg32 offset="0x9b0c" name="PC_VS_INPUT_CNTL" variants="A8XX-" usage="draw"> <bitfield name="INSTR_CNT" low="0" high="5" type="uint"/> <bitfield name="SIDEBAND_CNT" low="6" high="8" type="uint"/> </reg32> @@ -3287,9 +3347,12 @@ by a particular renderpass/blit. <!-- 0x9843-0x997f invalid --> - <reg32 offset="0x9981" name="PC_DGEN_RAST_CNTL" type="a6xx_rast_cntl" variants="A6XX" usage="rp_blit"/> - <reg32 offset="0x9809" name="PC_DGEN_RAST_CNTL" type="a6xx_rast_cntl" variants="A7XX" usage="rp_blit"/> - <reg32 offset="0x9812" name="PC_DGEN_RAST_CNTL" type="a6xx_rast_cntl" variants="A8XX" usage="rp_blit"/> + <reg32 offset="0x9981" name="PC_DGEN_RAST_CNTL" type="a6xx_rast_cntl" variants="A6XX" usage="draw"/> + <reg32 offset="0x9809" name="PC_DGEN_RAST_CNTL" type="a6xx_rast_cntl" variants="A7XX" usage="draw"/> + <reg32 offset="0x9812" name="PC_DGEN_RAST_CNTL" type="a6xx_rast_cntl" variants="A8XX" usage="draw"/> + + <reg32 offset="0x9884" name="PC_HS_PATCH_SIZE" variants="A7XX" usage="cmd"/> + <reg32 offset="0x9813" name="PC_HS_PATCH_SIZE" variants="A8XX-" usage="cmd"/> <!-- Both are a750+. Probably needed to correctly overlap execution of several draws. @@ -3304,8 +3367,8 @@ by a particular renderpass/blit. <!-- 0x9982-0x9aff invalid --> - <reg32 offset="0x9b00" name="PC_CNTL" type="a6xx_pc_cntl" variants="A6XX-A7XX" usage="rp_blit"/> - <reg32 offset="0x9b01" name="PC_CNTL" type="a6xx_pc_cntl" variants="A8XX-" usage="rp_blit"/> + <reg32 offset="0x9b00" name="PC_CNTL" type="a6xx_pc_cntl" variants="A6XX-A7XX" usage="draw"/> + <reg32 offset="0x9b01" name="PC_CNTL" type="a6xx_pc_cntl" variants="A8XX-" usage="draw"/> <bitset name="a6xx_pc_xs_cntl" inline="yes"> <doc> @@ -3324,31 +3387,31 @@ by a particular renderpass/blit. <bitfield name="SHADINGRATE" pos="24" type="boolean" variants="A7XX-"/> </bitset> - <reg32 offset="0x9b01" name="PC_VS_CNTL" type="a6xx_pc_xs_cntl" variants="A6XX-A7XX" usage="rp_blit"/> - <reg32 offset="0x9b02" name="PC_GS_CNTL" type="a6xx_pc_xs_cntl" variants="A6XX-A7XX" usage="rp_blit"/> - <reg32 offset="0x9b03" name="PC_HS_CNTL" type="a6xx_pc_xs_cntl" variants="A6XX-A7XX" usage="rp_blit"/> - <reg32 offset="0x9b04" name="PC_DS_CNTL" type="a6xx_pc_xs_cntl" variants="A6XX-A7XX" usage="rp_blit"/> + <reg32 offset="0x9b01" name="PC_VS_CNTL" type="a6xx_pc_xs_cntl" variants="A6XX-A7XX" usage="draw"/> + <reg32 offset="0x9b02" name="PC_GS_CNTL" type="a6xx_pc_xs_cntl" variants="A6XX-A7XX" usage="draw"/> + <reg32 offset="0x9b03" name="PC_HS_CNTL" type="a6xx_pc_xs_cntl" variants="A6XX-A7XX" usage="draw"/> + <reg32 offset="0x9b04" name="PC_DS_CNTL" type="a6xx_pc_xs_cntl" variants="A6XX-A7XX" usage="draw"/> - <reg32 offset="0x9b02" name="PC_VS_CNTL" type="a6xx_pc_xs_cntl" variants="A8XX-" usage="rp_blit"/> - <reg32 offset="0x9b03" name="PC_GS_CNTL" type="a6xx_pc_xs_cntl" variants="A8XX-" usage="rp_blit"/> - <reg32 offset="0x9b04" name="PC_HS_CNTL" type="a6xx_pc_xs_cntl" variants="A8XX-" usage="rp_blit"/> - <reg32 offset="0x9b05" name="PC_DS_CNTL" type="a6xx_pc_xs_cntl" variants="A8XX-" usage="rp_blit"/> + <reg32 offset="0x9b02" name="PC_VS_CNTL" type="a6xx_pc_xs_cntl" variants="A8XX-" usage="draw"/> + <reg32 offset="0x9b03" name="PC_GS_CNTL" type="a6xx_pc_xs_cntl" variants="A8XX-" usage="draw"/> + <reg32 offset="0x9b04" name="PC_HS_CNTL" type="a6xx_pc_xs_cntl" variants="A8XX-" usage="draw"/> + <reg32 offset="0x9b05" name="PC_DS_CNTL" type="a6xx_pc_xs_cntl" variants="A8XX-" usage="draw"/> - <reg32 offset="0x9b05" name="PC_GS_PARAM_0" type="a6xx_gs_param_0" variants="A6XX-A7XX" usage="rp_blit"/> - <reg32 offset="0x9b13" name="PC_GS_PARAM_0" type="a6xx_gs_param_0" variants="A8XX-" usage="rp_blit"/> + <reg32 offset="0x9b05" name="PC_GS_PARAM_0" type="a6xx_gs_param_0" variants="A6XX-A7XX" usage="draw"/> + <reg32 offset="0x9b13" name="PC_GS_PARAM_0" type="a6xx_gs_param_0" variants="A8XX-" usage="draw"/> - <reg32 offset="0x9b06" name="PC_PRIMITIVE_CNTL_6" variants="A6XX" usage="rp_blit"> + <reg32 offset="0x9b06" name="PC_PRIMITIVE_CNTL_6" variants="A6XX" usage="draw"> <doc> size in vec4s of per-primitive storage for gs. TODO: not actually in VPC </doc> <bitfield name="STRIDE_IN_VPC" low="0" high="10" type="uint"/> </reg32> - <reg32 offset="0x9b07" name="PC_STEREO_RENDERING_CNTL" type="a6xx_stereo_rendering_cntl" variants="A6XX-A7XX" usage="rp_blit"/> - <reg32 offset="0x9b09" name="PC_STEREO_RENDERING_CNTL" type="a6xx_stereo_rendering_cntl" variants="A8XX-" usage="rp_blit"/> + <reg32 offset="0x9b07" name="PC_STEREO_RENDERING_CNTL" type="a6xx_stereo_rendering_cntl" variants="A6XX-A7XX" usage="draw"/> + <reg32 offset="0x9b09" name="PC_STEREO_RENDERING_CNTL" type="a6xx_stereo_rendering_cntl" variants="A8XX-" usage="draw"/> <!-- mask of enabled views, doesn't exist on A630 --> - <reg32 offset="0x9b08" name="PC_STEREO_RENDERING_VIEWMASK" type="hex" low="0" high="15" variants="A6XX-A7XX" usage="rp_blit"/> - <reg32 offset="0x9b0d" name="PC_STEREO_RENDERING_VIEWMASK" type="hex" low="0" high="15" variants="A8XX-" usage="rp_blit"/> + <reg32 offset="0x9b08" name="PC_STEREO_RENDERING_VIEWMASK" type="hex" low="0" high="15" variants="A6XX-A7XX" usage="draw"/> + <reg32 offset="0x9b0d" name="PC_STEREO_RENDERING_VIEWMASK" type="hex" low="0" high="15" variants="A8XX-" usage="draw"/> <!-- 0x9b09-0x9bff invalid --> <reg32 offset="0x9c00" name="PC_2D_EVENT_CMD"> <!-- special register (but note first 8 bits can be written/read) --> @@ -3426,18 +3489,18 @@ by a particular renderpass/blit. <reg32 offset="0x9e63" name="PC_CONTEXT_SWITCH_GFX_PREEMPTION_MODE" variants="A8XX-"/> <reg32 offset="0x9e64" name="PC_CONTEXT_SWITCH_STABILIZE_CNTL_1" variants="A8XX-"/> - <reg32 offset="0xa000" name="VFD_CNTL_0" usage="rp_blit"> + <reg32 offset="0xa000" name="VFD_CNTL_0" usage="draw"> <bitfield name="FETCH_CNT" low="0" high="5" type="uint"/> <bitfield name="DECODE_CNT" low="8" high="13" type="uint"/> </reg32> - <reg32 offset="0xa001" name="VFD_CNTL_1" usage="rp_blit"> + <reg32 offset="0xa001" name="VFD_CNTL_1" usage="draw"> <bitfield name="REGID4VTX" low="0" high="7" type="a3xx_regid"/> <bitfield name="REGID4INST" low="8" high="15" type="a3xx_regid"/> <bitfield name="REGID4PRIMID" low="16" high="23" type="a3xx_regid"/> <!-- only used for VS in non-multi-position-output case --> <bitfield name="REGID4VIEWID" low="24" high="31" type="a3xx_regid"/> </reg32> - <reg32 offset="0xa002" name="VFD_CNTL_2" usage="rp_blit"> + <reg32 offset="0xa002" name="VFD_CNTL_2" usage="draw"> <bitfield name="REGID_HSRELPATCHID" low="0" high="7" type="a3xx_regid"> <doc> This is the ID of the current patch within the @@ -3450,20 +3513,20 @@ by a particular renderpass/blit. </bitfield> <bitfield name="REGID_INVOCATIONID" low="8" high="15" type="a3xx_regid"/> </reg32> - <reg32 offset="0xa003" name="VFD_CNTL_3" usage="rp_blit"> + <reg32 offset="0xa003" name="VFD_CNTL_3" usage="draw"> <bitfield name="REGID_DSPRIMID" low="0" high="7" type="a3xx_regid"/> <bitfield name="REGID_DSRELPATCHID" low="8" high="15" type="a3xx_regid"/> <bitfield name="REGID_TESSX" low="16" high="23" type="a3xx_regid"/> <bitfield name="REGID_TESSY" low="24" high="31" type="a3xx_regid"/> </reg32> - <reg32 offset="0xa004" name="VFD_CNTL_4" usage="rp_blit"> - <bitfield name="UNK0" low="0" high="7" type="a3xx_regid"/> + <reg32 offset="0xa004" name="VFD_CNTL_4" usage="draw"> + <bitfield name="REGID_DSVIEWID" low="0" high="7" type="a3xx_regid"/> </reg32> - <reg32 offset="0xa005" name="VFD_CNTL_5" usage="rp_blit"> + <reg32 offset="0xa005" name="VFD_CNTL_5" usage="draw"> <bitfield name="REGID_GSHEADER" low="0" high="7" type="a3xx_regid"/> - <bitfield name="UNK8" low="8" high="15" type="a3xx_regid"/> + <bitfield name="REGID_GSVIEWID" low="8" high="15" type="a3xx_regid"/> </reg32> - <reg32 offset="0xa006" name="VFD_CNTL_6" usage="rp_blit"> + <reg32 offset="0xa006" name="VFD_CNTL_6" usage="draw"> <!-- True if gl_PrimitiveID is read via the FS --> @@ -3474,7 +3537,7 @@ by a particular renderpass/blit. <bitfield name="RENDER_MODE" low="0" high="2" type="a6xx_render_mode"/> </reg32> - <reg32 offset="0xa008" name="VFD_STEREO_RENDERING_CNTL" type="a6xx_stereo_rendering_cntl" usage="rp_blit"/> + <reg32 offset="0xa008" name="VFD_STEREO_RENDERING_CNTL" type="a6xx_stereo_rendering_cntl" usage="draw"/> <reg32 offset="0xa009" name="VFD_MODE_CNTL" usage="cmd"> <!-- add VFD_INDEX_OFFSET to REGID4VTX --> <bitfield name="VERTEX" pos="0" type="boolean"/> @@ -3482,14 +3545,14 @@ by a particular renderpass/blit. <bitfield name="INSTANCE" pos="1" type="boolean"/> </reg32> - <reg32 offset="0xa00e" name="VFD_INDEX_OFFSET" usage="rp_blit"/> - <reg32 offset="0xa00f" name="VFD_INSTANCE_START_OFFSET" usage="rp_blit"/> - <array offset="0xa010" name="VFD_VERTEX_BUFFER" stride="4" length="32" usage="rp_blit"> + <reg32 offset="0xa00e" name="VFD_INDEX_OFFSET" usage="draw"/> + <reg32 offset="0xa00f" name="VFD_INSTANCE_START_OFFSET" usage="draw"/> + <array offset="0xa010" name="VFD_VERTEX_BUFFER" stride="4" length="32" usage="draw"> <reg64 offset="0x0" name="BASE" type="address" align="1"/> <reg32 offset="0x2" name="SIZE" type="uint"/> <reg32 offset="0x3" name="STRIDE" low="0" high="11" type="uint"/> </array> - <array offset="0xa090" name="VFD_FETCH_INSTR" stride="2" length="32" usage="rp_blit"> + <array offset="0xa090" name="VFD_FETCH_INSTR" stride="2" length="32" usage="draw"> <reg32 offset="0x0" name="INSTR"> <!-- IDX and byte OFFSET into VFD_VERTEX_BUFFER --> <bitfield name="IDX" low="0" high="4" type="uint"/> @@ -3502,14 +3565,14 @@ by a particular renderpass/blit. </reg32> <reg32 offset="0x1" name="STEP_RATE" type="uint"/> </array> - <array offset="0xa0d0" name="VFD_DEST_CNTL" stride="1" length="32" usage="rp_blit"> + <array offset="0xa0d0" name="VFD_DEST_CNTL" stride="1" length="32" usage="draw"> <reg32 offset="0x0" name="INSTR"> <bitfield name="WRITEMASK" low="0" high="3" type="hex"/> <bitfield name="REGID" low="4" high="11" type="a3xx_regid"/> </reg32> </array> - <reg32 offset="0xa0f8" name="VFD_POWER_CNTL" low="0" high="2" usage="rp_blit"/> + <reg32 offset="0xa0f8" name="VFD_POWER_CNTL" low="0" high="2" variants="A6XX" usage="draw"/> <reg32 offset="0xa600" name="VFD_DBG_ECO_CNTL" variants="A7XX-" usage="init"/> @@ -3549,10 +3612,12 @@ by a particular renderpass/blit. --> <bitfield name="HALFREGFOOTPRINT" low="1" high="6" type="uint"/> <bitfield name="FULLREGFOOTPRINT" low="7" high="12" type="uint"/> - <!-- could it be a low bit of branchstack? --> - <bitfield name="UNK13" pos="13" type="boolean"/> <!-- seems to be nesting level for flow control:.. --> - <bitfield name="BRANCHSTACK" low="14" high="19" type="uint"/> + <bitfield name="BRANCHSTACK" low="13" high="19" type="uint"/> + + <!-- gen8: --> + <bitfield name="FULLREGFOOTPRINT_LSB" pos="27" type="uint" variants="A8XX-"/> + <bitfield name="HALFREGFOOTPRINT_LSB" pos="30" type="uint" variants="A8XX-"/> </bitset> <bitset name="a6xx_sp_xs_config" inline="yes"> @@ -3580,7 +3645,7 @@ by a particular renderpass/blit. <bitfield name="OUT" low="0" high="5" type="uint"/> </bitset> - <reg32 offset="0xa800" name="SP_VS_CNTL_0" type="a6xx_sp_xs_cntl_0" usage="rp_blit"> + <reg32 offset="0xa800" name="SP_VS_CNTL_0" type="a6xx_sp_xs_cntl_0" usage="draw"> <!-- This field actually controls all geometry stages. TCS, TES, and GS must have the same mergedregs setting as VS. @@ -3607,8 +3672,8 @@ by a particular renderpass/blit. bit N corresponds to brac.N --> <reg32 offset="0xa801" name="SP_VS_BOOLEAN_CF_MASK" type="hex"/> <!-- # of VS outputs including pos/psize --> - <reg32 offset="0xa802" name="SP_VS_OUTPUT_CNTL" type="a6xx_sp_xs_output_cntl" usage="rp_blit"/> - <array offset="0xa803" name="SP_VS_OUTPUT" stride="1" length="16" usage="rp_blit"> + <reg32 offset="0xa802" name="SP_VS_OUTPUT_CNTL" type="a6xx_sp_xs_output_cntl" usage="draw"/> + <array offset="0xa803" name="SP_VS_OUTPUT" stride="1" length="16" usage="draw"> <reg32 offset="0x0" name="REG"> <bitfield name="A_REGID" low="0" high="7" type="a3xx_regid"/> <bitfield name="A_COMPMASK" low="8" high="11" type="hex"/> @@ -3623,7 +3688,7 @@ by a particular renderpass/blit. an extra varying after, but with a lower OUTLOC position. If present, psize is last, preceded by position. --> - <array offset="0xa813" name="SP_VS_VPC_DEST" stride="1" length="8" usage="rp_blit"> + <array offset="0xa813" name="SP_VS_VPC_DEST" stride="1" length="8" usage="draw"> <reg32 offset="0x0" name="REG"> <bitfield name="OUTLOC0" low="0" high="7" type="uint"/> <bitfield name="OUTLOC1" low="8" high="15" type="uint"/> @@ -3712,20 +3777,20 @@ by a particular renderpass/blit. <!-- seen 0x400, 0xc00, 0x1000, 0x1c00, 0x1000, 0x2000, 0x3000 --> </bitset> - <reg32 offset="0xa81b" name="SP_VS_PROGRAM_COUNTER_OFFSET" type="uint" usage="rp_blit"/> - <reg64 offset="0xa81c" name="SP_VS_BASE" type="address" align="32" usage="rp_blit"/> - <reg32 offset="0xa81e" name="SP_VS_PVT_MEM_PARAM" type="a6xx_sp_xs_pvt_mem_param" usage="rp_blit"/> - <reg64 offset="0xa81f" name="SP_VS_PVT_MEM_BASE" type="waddress" align="32" usage="rp_blit"/> - <reg32 offset="0xa821" name="SP_VS_PVT_MEM_SIZE" type="a6xx_sp_xs_pvt_mem_size" usage="rp_blit"/> - <reg32 offset="0xa822" name="SP_VS_TSIZE" low="0" high="7" type="uint" usage="rp_blit"/> - <reg32 offset="0xa823" name="SP_VS_CONFIG" type="a6xx_sp_xs_config" usage="rp_blit"/> - <reg32 offset="0xa824" name="SP_VS_INSTR_SIZE" low="0" high="27" type="uint" usage="rp_blit"/> - <reg32 offset="0xa825" name="SP_VS_PVT_MEM_STACK_OFFSET" type="a6xx_sp_xs_pvt_mem_stack_offset" usage="rp_blit"/> + <reg32 offset="0xa81b" name="SP_VS_PROGRAM_COUNTER_OFFSET" type="uint" usage="draw"/> + <reg64 offset="0xa81c" name="SP_VS_BASE" type="address" align="32" usage="draw"/> + <reg32 offset="0xa81e" name="SP_VS_PVT_MEM_PARAM" type="a6xx_sp_xs_pvt_mem_param" usage="draw"/> + <reg64 offset="0xa81f" name="SP_VS_PVT_MEM_BASE" type="waddress" align="32" usage="draw"/> + <reg32 offset="0xa821" name="SP_VS_PVT_MEM_SIZE" type="a6xx_sp_xs_pvt_mem_size" usage="draw"/> + <reg32 offset="0xa822" name="SP_VS_TSIZE" low="0" high="7" type="uint" usage="draw"/> + <reg32 offset="0xa823" name="SP_VS_CONFIG" type="a6xx_sp_xs_config" usage="draw"/> + <reg32 offset="0xa824" name="SP_VS_INSTR_SIZE" low="0" high="27" type="uint" usage="draw"/> + <reg32 offset="0xa825" name="SP_VS_PVT_MEM_STACK_OFFSET" type="a6xx_sp_xs_pvt_mem_stack_offset" usage="draw"/> <reg32 offset="0xa826" name="SP_VS_HYSTERESIS" type="a6xx_sp_xs_hysteresis" variants="A6XX-A7XX"/> <reg32 offset="0xa826" name="SP_VS_HYSTERESIS" type="a8xx_sp_xs_hysteresis" variants="A8XX-"/> <reg32 offset="0xa82d" name="SP_VS_VGS_CNTL" variants="A7XX-" usage="cmd"/> - <reg32 offset="0xa830" name="SP_HS_CNTL_0" type="a6xx_sp_xs_cntl_0" usage="rp_blit"> + <reg32 offset="0xa830" name="SP_HS_CNTL_0" type="a6xx_sp_xs_cntl_0" usage="draw"> <!-- There is no mergedregs bit, that comes from the VS. --> <bitfield name="EARLYPREAMBLE" pos="20" type="boolean"/> </reg32> @@ -3735,32 +3800,32 @@ by a particular renderpass/blit. the maximum size of local storage should be: 64 (wavesize) * 64 (SP_HS_CNTL_1) * 4 = 16k --> - <reg32 offset="0xa831" name="SP_HS_CNTL_1" low="0" high="7" type="uint" usage="rp_blit"/> - <reg32 offset="0xa832" name="SP_HS_BOOLEAN_CF_MASK" type="hex" usage="rp_blit"/> + <reg32 offset="0xa831" name="SP_HS_CNTL_1" low="0" high="7" type="uint" usage="draw"/> + <reg32 offset="0xa832" name="SP_HS_BOOLEAN_CF_MASK" type="hex" usage="draw"/> <!-- TODO: exact same layout as 0xa81b-0xa825 --> - <reg32 offset="0xa833" name="SP_HS_PROGRAM_COUNTER_OFFSET" type="uint" usage="rp_blit"/> - <reg64 offset="0xa834" name="SP_HS_BASE" type="address" align="32" usage="rp_blit"/> - <reg32 offset="0xa836" name="SP_HS_PVT_MEM_PARAM" type="a6xx_sp_xs_pvt_mem_param" usage="rp_blit"/> - <reg64 offset="0xa837" name="SP_HS_PVT_MEM_BASE" type="waddress" align="32" usage="rp_blit"/> - <reg32 offset="0xa839" name="SP_HS_PVT_MEM_SIZE" type="a6xx_sp_xs_pvt_mem_size" usage="rp_blit"/> - <reg32 offset="0xa83a" name="SP_HS_TSIZE" low="0" high="7" type="uint" usage="rp_blit"/> - <reg32 offset="0xa83b" name="SP_HS_CONFIG" type="a6xx_sp_xs_config" usage="rp_blit"/> - <reg32 offset="0xa83c" name="SP_HS_INSTR_SIZE" low="0" high="27" type="uint" usage="rp_blit"/> - <reg32 offset="0xa83d" name="SP_HS_PVT_MEM_STACK_OFFSET" type="a6xx_sp_xs_pvt_mem_stack_offset" usage="rp_blit"/> + <reg32 offset="0xa833" name="SP_HS_PROGRAM_COUNTER_OFFSET" type="uint" usage="draw"/> + <reg64 offset="0xa834" name="SP_HS_BASE" type="address" align="32" usage="draw"/> + <reg32 offset="0xa836" name="SP_HS_PVT_MEM_PARAM" type="a6xx_sp_xs_pvt_mem_param" usage="draw"/> + <reg64 offset="0xa837" name="SP_HS_PVT_MEM_BASE" type="waddress" align="32" usage="draw"/> + <reg32 offset="0xa839" name="SP_HS_PVT_MEM_SIZE" type="a6xx_sp_xs_pvt_mem_size" usage="draw"/> + <reg32 offset="0xa83a" name="SP_HS_TSIZE" low="0" high="7" type="uint" usage="draw"/> + <reg32 offset="0xa83b" name="SP_HS_CONFIG" type="a6xx_sp_xs_config" usage="draw"/> + <reg32 offset="0xa83c" name="SP_HS_INSTR_SIZE" low="0" high="27" type="uint" usage="draw"/> + <reg32 offset="0xa83d" name="SP_HS_PVT_MEM_STACK_OFFSET" type="a6xx_sp_xs_pvt_mem_stack_offset" usage="draw"/> <reg32 offset="0xa83e" name="SP_HS_HYSTERESIS" type="a6xx_sp_xs_hysteresis" variants="A6XX-A7XX"/> <reg32 offset="0xa83e" name="SP_HS_HYSTERESIS" type="a8xx_sp_xs_hysteresis" variants="A8XX-"/> <reg32 offset="0xa82f" name="SP_HS_VGS_CNTL" variants="A7XX-" usage="cmd"/> - <reg32 offset="0xa840" name="SP_DS_CNTL_0" type="a6xx_sp_xs_cntl_0" usage="rp_blit"> + <reg32 offset="0xa840" name="SP_DS_CNTL_0" type="a6xx_sp_xs_cntl_0" usage="draw"> <!-- There is no mergedregs bit, that comes from the VS. --> <bitfield name="EARLYPREAMBLE" pos="20" type="boolean"/> </reg32> <reg32 offset="0xa841" name="SP_DS_BOOLEAN_CF_MASK" type="hex"/> <!-- TODO: exact same layout as 0xa802-0xa81a --> - <reg32 offset="0xa842" name="SP_DS_OUTPUT_CNTL" type="a6xx_sp_xs_output_cntl" usage="rp_blit"/> - <array offset="0xa843" name="SP_DS_OUTPUT" stride="1" length="16" usage="rp_blit"> + <reg32 offset="0xa842" name="SP_DS_OUTPUT_CNTL" type="a6xx_sp_xs_output_cntl" usage="draw"/> + <array offset="0xa843" name="SP_DS_OUTPUT" stride="1" length="16" usage="draw"> <reg32 offset="0x0" name="REG"> <bitfield name="A_REGID" low="0" high="7" type="a3xx_regid"/> <bitfield name="A_COMPMASK" low="8" high="11" type="hex"/> @@ -3768,7 +3833,7 @@ by a particular renderpass/blit. <bitfield name="B_COMPMASK" low="24" high="27" type="hex"/> </reg32> </array> - <array offset="0xa853" name="SP_DS_VPC_DEST" stride="1" length="8" usage="rp_blit"> + <array offset="0xa853" name="SP_DS_VPC_DEST" stride="1" length="8" usage="draw"> <reg32 offset="0x0" name="REG"> <bitfield name="OUTLOC0" low="0" high="7" type="uint"/> <bitfield name="OUTLOC1" low="8" high="15" type="uint"/> @@ -3778,24 +3843,24 @@ by a particular renderpass/blit. </array> <!-- TODO: exact same layout as 0xa81b-0xa825 --> - <reg32 offset="0xa85b" name="SP_DS_PROGRAM_COUNTER_OFFSET" type="uint" usage="rp_blit"/> - <reg64 offset="0xa85c" name="SP_DS_BASE" type="address" align="32" usage="rp_blit"/> - <reg32 offset="0xa85e" name="SP_DS_PVT_MEM_PARAM" type="a6xx_sp_xs_pvt_mem_param" usage="rp_blit"/> - <reg64 offset="0xa85f" name="SP_DS_PVT_MEM_BASE" type="waddress" align="32" usage="rp_blit"/> - <reg32 offset="0xa861" name="SP_DS_PVT_MEM_SIZE" type="a6xx_sp_xs_pvt_mem_size" usage="rp_blit"/> - <reg32 offset="0xa862" name="SP_DS_TSIZE" low="0" high="7" type="uint" usage="rp_blit"/> - <reg32 offset="0xa863" name="SP_DS_CONFIG" type="a6xx_sp_xs_config" usage="rp_blit"/> - <reg32 offset="0xa864" name="SP_DS_INSTR_SIZE" low="0" high="27" type="uint" usage="rp_blit"/> - <reg32 offset="0xa865" name="SP_DS_PVT_MEM_STACK_OFFSET" type="a6xx_sp_xs_pvt_mem_stack_offset" usage="rp_blit"/> + <reg32 offset="0xa85b" name="SP_DS_PROGRAM_COUNTER_OFFSET" type="uint" usage="draw"/> + <reg64 offset="0xa85c" name="SP_DS_BASE" type="address" align="32" usage="draw"/> + <reg32 offset="0xa85e" name="SP_DS_PVT_MEM_PARAM" type="a6xx_sp_xs_pvt_mem_param" usage="draw"/> + <reg64 offset="0xa85f" name="SP_DS_PVT_MEM_BASE" type="waddress" align="32" usage="draw"/> + <reg32 offset="0xa861" name="SP_DS_PVT_MEM_SIZE" type="a6xx_sp_xs_pvt_mem_size" usage="draw"/> + <reg32 offset="0xa862" name="SP_DS_TSIZE" low="0" high="7" type="uint" usage="draw"/> + <reg32 offset="0xa863" name="SP_DS_CONFIG" type="a6xx_sp_xs_config" usage="draw"/> + <reg32 offset="0xa864" name="SP_DS_INSTR_SIZE" low="0" high="27" type="uint" usage="draw"/> + <reg32 offset="0xa865" name="SP_DS_PVT_MEM_STACK_OFFSET" type="a6xx_sp_xs_pvt_mem_stack_offset" usage="draw"/> <reg32 offset="0xa866" name="SP_DS_HYSTERESIS" type="a6xx_sp_xs_hysteresis" variants="A6XX-A7XX"/> <reg32 offset="0xa866" name="SP_DS_HYSTERESIS" type="a8xx_sp_xs_hysteresis" variants="A8XX-"/> <reg32 offset="0xa868" name="SP_DS_VGS_CNTL" variants="A7XX-" usage="cmd"/> - <reg32 offset="0xa870" name="SP_GS_CNTL_0" type="a6xx_sp_xs_cntl_0" usage="rp_blit"> + <reg32 offset="0xa870" name="SP_GS_CNTL_0" type="a6xx_sp_xs_cntl_0" usage="draw"> <!-- There is no mergedregs bit, that comes from the VS. --> <bitfield name="EARLYPREAMBLE" pos="20" type="boolean"/> </reg32> - <reg32 offset="0xa871" name="SP_GS_CNTL_1" low="0" high="7" type="uint" usage="rp_blit"> + <reg32 offset="0xa871" name="SP_GS_CNTL_1" low="0" high="7" type="uint" usage="draw"> <doc> Normally the size of the output of the last stage in dwords. It should be programmed as follows: @@ -3809,14 +3874,14 @@ by a particular renderpass/blit. doesn't matter in practice. </doc> </reg32> - <reg32 offset="0xa872" name="SP_GS_BOOLEAN_CF_MASK" type="hex" usage="rp_blit"/> + <reg32 offset="0xa872" name="SP_GS_BOOLEAN_CF_MASK" type="hex" usage="draw"/> <!-- TODO: exact same layout as 0xa802-0xa81a --> - <reg32 offset="0xa873" name="SP_GS_OUTPUT_CNTL" type="a6xx_sp_xs_output_cntl" usage="rp_blit"> + <reg32 offset="0xa873" name="SP_GS_OUTPUT_CNTL" type="a6xx_sp_xs_output_cntl" usage="draw"> <!-- FLAGS_REGID only for GS --> <bitfield name="FLAGS_REGID" low="6" high="13" type="a3xx_regid"/> </reg32> - <array offset="0xa874" name="SP_GS_OUTPUT" stride="1" length="16" usage="rp_blit"> + <array offset="0xa874" name="SP_GS_OUTPUT" stride="1" length="16" usage="draw"> <reg32 offset="0x0" name="REG"> <bitfield name="A_REGID" low="0" high="7" type="a3xx_regid"/> <bitfield name="A_COMPMASK" low="8" high="11" type="hex"/> @@ -3825,7 +3890,7 @@ by a particular renderpass/blit. </reg32> </array> - <array offset="0xa884" name="SP_GS_VPC_DEST" stride="1" length="8" usage="rp_blit"> + <array offset="0xa884" name="SP_GS_VPC_DEST" stride="1" length="8" usage="draw"> <reg32 offset="0x0" name="REG"> <bitfield name="OUTLOC0" low="0" high="7" type="uint"/> <bitfield name="OUTLOC1" low="8" high="15" type="uint"/> @@ -3835,15 +3900,15 @@ by a particular renderpass/blit. </array> <!-- TODO: exact same layout as 0xa81b-0xa825 --> - <reg32 offset="0xa88c" name="SP_GS_PROGRAM_COUNTER_OFFSET" type="uint" usage="rp_blit"/> - <reg64 offset="0xa88d" name="SP_GS_BASE" type="address" align="32" usage="rp_blit"/> - <reg32 offset="0xa88f" name="SP_GS_PVT_MEM_PARAM" type="a6xx_sp_xs_pvt_mem_param" usage="rp_blit"/> - <reg64 offset="0xa890" name="SP_GS_PVT_MEM_BASE" type="waddress" align="32" usage="rp_blit"/> - <reg32 offset="0xa892" name="SP_GS_PVT_MEM_SIZE" type="a6xx_sp_xs_pvt_mem_size" usage="rp_blit"/> - <reg32 offset="0xa893" name="SP_GS_TSIZE" low="0" high="7" type="uint" usage="rp_blit"/> - <reg32 offset="0xa894" name="SP_GS_CONFIG" type="a6xx_sp_xs_config" usage="rp_blit"/> - <reg32 offset="0xa895" name="SP_GS_INSTR_SIZE" low="0" high="27" type="uint" usage="rp_blit"/> - <reg32 offset="0xa896" name="SP_GS_PVT_MEM_STACK_OFFSET" type="a6xx_sp_xs_pvt_mem_stack_offset" usage="rp_blit"/> + <reg32 offset="0xa88c" name="SP_GS_PROGRAM_COUNTER_OFFSET" type="uint" usage="draw"/> + <reg64 offset="0xa88d" name="SP_GS_BASE" type="address" align="32" usage="draw"/> + <reg32 offset="0xa88f" name="SP_GS_PVT_MEM_PARAM" type="a6xx_sp_xs_pvt_mem_param" usage="draw"/> + <reg64 offset="0xa890" name="SP_GS_PVT_MEM_BASE" type="waddress" align="32" usage="draw"/> + <reg32 offset="0xa892" name="SP_GS_PVT_MEM_SIZE" type="a6xx_sp_xs_pvt_mem_size" usage="draw"/> + <reg32 offset="0xa893" name="SP_GS_TSIZE" low="0" high="7" type="uint" usage="draw"/> + <reg32 offset="0xa894" name="SP_GS_CONFIG" type="a6xx_sp_xs_config" usage="draw"/> + <reg32 offset="0xa895" name="SP_GS_INSTR_SIZE" low="0" high="27" type="uint" usage="draw"/> + <reg32 offset="0xa896" name="SP_GS_PVT_MEM_STACK_OFFSET" type="a6xx_sp_xs_pvt_mem_stack_offset" usage="draw"/> <reg32 offset="0xa897" name="SP_GS_HYSTERESIS" type="a6xx_sp_xs_hysteresis" variants="A6XX-A7XX"/> <reg32 offset="0xa897" name="SP_GS_HYSTERESIS" type="a8xx_sp_xs_hysteresis" variants="A8XX-"/> <reg32 offset="0xa899" name="SP_GS_VGS_CNTL" variants="A7XX-" usage="cmd"/> @@ -3859,7 +3924,7 @@ by a particular renderpass/blit. <!-- TODO: 4 unknown bool registers 0xa8c0-0xa8c3 --> - <reg32 offset="0xa980" name="SP_PS_CNTL_0" type="a6xx_sp_xs_cntl_0" usage="rp_blit"> + <reg32 offset="0xa980" name="SP_PS_CNTL_0" type="a6xx_sp_xs_cntl_0" usage="draw"> <bitfield name="THREADSIZE" pos="20" type="a6xx_threadsize"/> <bitfield name="UNK21" pos="21" type="boolean"/> <bitfield name="VARYING" pos="22" type="boolean"/> @@ -3878,16 +3943,15 @@ by a particular renderpass/blit. and so one pixel's value is always unused. </doc> </bitfield> - <bitfield name="UNK27" pos="27" type="boolean"/> <bitfield name="EARLYPREAMBLE" pos="28" type="boolean"/> <bitfield name="MERGEDREGS" pos="31" type="boolean"/> </reg32> <reg32 offset="0xa981" name="SP_PS_BOOLEAN_CF_MASK" type="hex"/> - <reg32 offset="0xa982" name="SP_PS_PROGRAM_COUNTER_OFFSET" type="uint" usage="rp_blit"/> - <reg64 offset="0xa983" name="SP_PS_BASE" type="address" align="32" usage="rp_blit"/> - <reg32 offset="0xa985" name="SP_PS_PVT_MEM_PARAM" type="a6xx_sp_xs_pvt_mem_param" usage="rp_blit"/> - <reg64 offset="0xa986" name="SP_PS_PVT_MEM_BASE" type="waddress" align="32" usage="rp_blit"/> - <reg32 offset="0xa988" name="SP_PS_PVT_MEM_SIZE" type="a6xx_sp_xs_pvt_mem_size" usage="rp_blit"/> + <reg32 offset="0xa982" name="SP_PS_PROGRAM_COUNTER_OFFSET" type="uint" usage="draw"/> + <reg64 offset="0xa983" name="SP_PS_BASE" type="address" align="32" usage="draw"/> + <reg32 offset="0xa985" name="SP_PS_PVT_MEM_PARAM" type="a6xx_sp_xs_pvt_mem_param" usage="draw"/> + <reg64 offset="0xa986" name="SP_PS_PVT_MEM_BASE" type="waddress" align="32" usage="draw"/> + <reg32 offset="0xa988" name="SP_PS_PVT_MEM_SIZE" type="a6xx_sp_xs_pvt_mem_size" usage="draw"/> <bitset name="a6xx_sp_blend_cntl" inline="yes"> <!-- per-mrt enable bit --> @@ -3897,12 +3961,12 @@ by a particular renderpass/blit. <bitfield name="ALPHA_TO_COVERAGE" pos="10" type="boolean"/> </bitset> - <reg32 offset="0xa989" name="SP_BLEND_CNTL" type="a6xx_sp_blend_cntl" variants="A6XX-A7XX" usage="rp_blit"/> - <reg32 offset="0xa989" name="SP_BLEND_CNTL" type="a6xx_sp_blend_cntl" variants="A8XX-" usage="rp_blit"> + <reg32 offset="0xa989" name="SP_BLEND_CNTL" type="a6xx_sp_blend_cntl" variants="A6XX-A7XX" usage="draw"/> + <reg32 offset="0xa989" name="SP_BLEND_CNTL" type="a6xx_sp_blend_cntl" variants="A8XX-" usage="draw"> <bitfield name="ALPHA_TO_ONE" pos="11" type="boolean" variants="A8XX-"/> </reg32> - <reg32 offset="0xa98a" name="SP_SRGB_CNTL" usage="rp_blit"> + <reg32 offset="0xa98a" name="SP_SRGB_CNTL" usage="draw"> <!-- Same as RB_SRGB_CNTL --> <bitfield name="SRGB_MRT0" pos="0" type="boolean"/> <bitfield name="SRGB_MRT1" pos="1" type="boolean"/> @@ -3913,7 +3977,7 @@ by a particular renderpass/blit. <bitfield name="SRGB_MRT6" pos="6" type="boolean"/> <bitfield name="SRGB_MRT7" pos="7" type="boolean"/> </reg32> - <reg32 offset="0xa98b" name="SP_PS_OUTPUT_MASK" usage="rp_blit"> + <reg32 offset="0xa98b" name="SP_PS_OUTPUT_MASK" usage="draw"> <bitfield name="RT0" low="0" high="3"/> <bitfield name="RT1" low="4" high="7"/> <bitfield name="RT2" low="8" high="11"/> @@ -3923,17 +3987,17 @@ by a particular renderpass/blit. <bitfield name="RT6" low="24" high="27"/> <bitfield name="RT7" low="28" high="31"/> </reg32> - <reg32 offset="0xa98c" name="SP_PS_OUTPUT_CNTL" usage="rp_blit"> + <reg32 offset="0xa98c" name="SP_PS_OUTPUT_CNTL" usage="draw"> <bitfield name="DUAL_COLOR_IN_ENABLE" pos="0" type="boolean"/> <bitfield name="DEPTH_REGID" low="8" high="15" type="a3xx_regid"/> <bitfield name="SAMPMASK_REGID" low="16" high="23" type="a3xx_regid"/> <bitfield name="STENCILREF_REGID" low="24" high="31" type="a3xx_regid"/> </reg32> - <reg32 offset="0xa98d" name="SP_PS_MRT_CNTL" usage="rp_blit"> + <reg32 offset="0xa98d" name="SP_PS_MRT_CNTL" usage="draw"> <bitfield name="MRT" low="0" high="3" type="uint"/> </reg32> - <array offset="0xa98e" name="SP_PS_OUTPUT" stride="1" length="8" usage="rp_blit"> + <array offset="0xa98e" name="SP_PS_OUTPUT" stride="1" length="8" usage="draw"> <doc>per MRT</doc> <reg32 offset="0x0" name="REG"> <bitfield name="REGID" low="0" high="7" type="a3xx_regid"/> @@ -3941,7 +4005,7 @@ by a particular renderpass/blit. </reg32> </array> - <array offset="0xa996" name="SP_PS_MRT" stride="1" length="8" usage="rp_blit"> + <array offset="0xa996" name="SP_PS_MRT" stride="1" length="8" usage="draw"> <reg32 offset="0" name="REG"> <bitfield name="COLOR_FORMAT" low="0" high="7" type="a6xx_format"/> <bitfield name="COLOR_SINT" pos="8" type="boolean"/> @@ -3950,7 +4014,7 @@ by a particular renderpass/blit. </reg32> </array> - <reg32 offset="0xa99e" name="SP_PS_INITIAL_TEX_LOAD_CNTL" usage="rp_blit"> + <reg32 offset="0xa99e" name="SP_PS_INITIAL_TEX_LOAD_CNTL" usage="draw"> <bitfield name="COUNT" low="0" high="2" type="uint"/> <bitfield name="IJ_WRITE_DISABLE" pos="3" type="boolean"/> <doc> @@ -3967,7 +4031,7 @@ by a particular renderpass/blit. <!-- Blob never uses it --> <bitfield name="CONSTSLOTID4COORD" low="16" high="24" type="uint" variants="A7XX-"/> </reg32> - <array offset="0xa99f" name="SP_PS_INITIAL_TEX_LOAD" stride="1" length="4" variants="A6XX" usage="rp_blit"> + <array offset="0xa99f" name="SP_PS_INITIAL_TEX_LOAD" stride="1" length="4" variants="A6XX" usage="draw"> <reg32 offset="0" name="CMD" variants="A6XX"> <bitfield name="SRC" low="0" high="6" type="uint"/> <bitfield name="SAMP_ID" low="7" high="10" type="uint"/> @@ -3981,7 +4045,7 @@ by a particular renderpass/blit. <bitfield name="CMD" low="29" high="31" type="a6xx_tex_prefetch_cmd"/> </reg32> </array> - <array offset="0xa99f" name="SP_PS_INITIAL_TEX_LOAD" stride="1" length="4" variants="A7XX-" usage="rp_blit"> + <array offset="0xa99f" name="SP_PS_INITIAL_TEX_LOAD" stride="1" length="4" variants="A7XX-" usage="draw"> <reg32 offset="0" name="CMD" variants="A7XX-"> <bitfield name="SRC" low="0" high="6" type="uint"/> <bitfield name="SAMP_ID" low="7" high="9" type="uint"/> @@ -3993,21 +4057,21 @@ by a particular renderpass/blit. <bitfield name="CMD" low="26" high="29" type="a6xx_tex_prefetch_cmd"/> </reg32> </array> - <array offset="0xa9a3" name="SP_PS_INITIAL_TEX_INDEX" stride="1" length="4" usage="rp_blit"> + <array offset="0xa9a3" name="SP_PS_INITIAL_TEX_INDEX" stride="1" length="4" usage="draw"> <reg32 offset="0" name="CMD"> <bitfield name="SAMP_ID" low="0" high="15" type="uint"/> <bitfield name="TEX_ID" low="16" high="31" type="uint"/> </reg32> </array> - <reg32 offset="0xa9a7" name="SP_PS_TSIZE" low="0" high="7" type="uint" usage="rp_blit"/> + <reg32 offset="0xa9a7" name="SP_PS_TSIZE" low="0" high="7" type="uint" usage="draw"/> <reg32 offset="0xa9a8" name="SP_UNKNOWN_A9A8" low="0" high="16" usage="cmd"/> <!-- always 0x0 ? --> - <reg32 offset="0xa9a9" name="SP_PS_PVT_MEM_STACK_OFFSET" type="a6xx_sp_xs_pvt_mem_stack_offset" usage="rp_blit"/> + <reg32 offset="0xa9a9" name="SP_PS_PVT_MEM_STACK_OFFSET" type="a6xx_sp_xs_pvt_mem_stack_offset" usage="draw"/> <reg32 offset="0xa9ab" name="SP_PS_HYSTERESIS" type="a6xx_sp_xs_hysteresis" variants="A6XX-A7XX"/> <reg32 offset="0xa9ab" name="SP_PS_HYSTERESIS" type="a8xx_sp_xs_hysteresis" variants="A8XX-"/> <!-- TODO: unknown bool register at 0xa9aa, likely same as 0xa8c0-0xa8c3 but for FS --> - <reg32 offset="0xa9b0" name="SP_CS_CNTL_0" type="a6xx_sp_xs_cntl_0" usage="cmd"> + <reg32 offset="0xa9b0" name="SP_CS_CNTL_0" type="a6xx_sp_xs_cntl_0" usage="compute"> <bitfield name="THREADSIZE" pos="20" type="a6xx_threadsize"/> <!-- seems to make SP use less concurrent threads when possible? --> <bitfield name="UNK21" pos="21" type="boolean"/> @@ -4025,7 +4089,7 @@ by a particular renderpass/blit. </enum> <!-- set for compute shaders --> - <reg32 offset="0xa9b1" name="SP_CS_CNTL_1" usage="cmd"> + <reg32 offset="0xa9b1" name="SP_CS_CNTL_1" usage="compute"> <bitfield name="SHARED_SIZE" low="0" high="4" type="uint"> <doc> If 0 - all 32k of shared storage is enabled, otherwise @@ -4045,29 +4109,29 @@ by a particular renderpass/blit. </bitfield> <bitfield name="ALT_LM_ENCODE" pos="26" type="boolean"/> </reg32> - <reg32 offset="0xa9b2" name="SP_CS_BOOLEAN_CF_MASK" type="hex" usage="cmd"/> - <reg32 offset="0xa9b3" name="SP_CS_PROGRAM_COUNTER_OFFSET" type="uint" usage="cmd"/> - <reg64 offset="0xa9b4" name="SP_CS_BASE" type="address" align="32" usage="cmd"/> - <reg32 offset="0xa9b6" name="SP_CS_PVT_MEM_PARAM" type="a6xx_sp_xs_pvt_mem_param" usage="cmd"/> - <reg64 offset="0xa9b7" name="SP_CS_PVT_MEM_BASE" type="waddress" align="32" usage="cmd"/> - <reg32 offset="0xa9b9" name="SP_CS_PVT_MEM_SIZE" type="a6xx_sp_xs_pvt_mem_size" usage="cmd"/> - <reg32 offset="0xa9ba" name="SP_CS_TSIZE" low="0" high="7" type="uint" usage="cmd"/> - <reg32 offset="0xa9bb" name="SP_CS_CONFIG" type="a6xx_sp_xs_config" usage="cmd"/> - <reg32 offset="0xa9bc" name="SP_CS_INSTR_SIZE" low="0" high="27" type="uint" usage="cmd"/> - <reg32 offset="0xa9bd" name="SP_CS_PVT_MEM_STACK_OFFSET" type="a6xx_sp_xs_pvt_mem_stack_offset" usage="cmd"/> - <reg32 offset="0xa9be" name="SP_CS_HYSTERESIS" type="a6xx_sp_xs_hysteresis" variants="A6XX-A7XX"/> - <reg32 offset="0xa9be" name="SP_CS_HYSTERESIS" type="a8xx_sp_xs_hysteresis" variants="A8XX-"/> + <reg32 offset="0xa9b2" name="SP_CS_BOOLEAN_CF_MASK" type="hex" usage="compute"/> + <reg32 offset="0xa9b3" name="SP_CS_PROGRAM_COUNTER_OFFSET" type="uint" usage="compute"/> + <reg64 offset="0xa9b4" name="SP_CS_BASE" type="address" align="32" usage="compute"/> + <reg32 offset="0xa9b6" name="SP_CS_PVT_MEM_PARAM" type="a6xx_sp_xs_pvt_mem_param" usage="compute"/> + <reg64 offset="0xa9b7" name="SP_CS_PVT_MEM_BASE" type="waddress" align="32" usage="compute"/> + <reg32 offset="0xa9b9" name="SP_CS_PVT_MEM_SIZE" type="a6xx_sp_xs_pvt_mem_size" usage="compute"/> + <reg32 offset="0xa9ba" name="SP_CS_TSIZE" low="0" high="7" type="uint" usage="compute"/> + <reg32 offset="0xa9bb" name="SP_CS_CONFIG" type="a6xx_sp_xs_config" usage="compute"/> + <reg32 offset="0xa9bc" name="SP_CS_INSTR_SIZE" low="0" high="27" type="uint" usage="compute"/> + <reg32 offset="0xa9bd" name="SP_CS_PVT_MEM_STACK_OFFSET" type="a6xx_sp_xs_pvt_mem_stack_offset" usage="compute"/> + <reg32 offset="0xa9be" name="SP_CS_HYSTERESIS" type="a6xx_sp_xs_hysteresis" variants="A6XX-A7XX" usage="compute"/> + <reg32 offset="0xa9be" name="SP_CS_HYSTERESIS" type="a8xx_sp_xs_hysteresis" variants="A8XX-" usage="compute"/> <reg32 offset="0xa9c5" name="SP_CS_VGS_CNTL" variants="A7XX-" usage="cmd"/> <!-- new in a6xx gen4, matches SP_CS_CONST_CONFIG_0 --> - <reg32 offset="0xa9c2" name="SP_CS_WIE_CNTL_0" usage="cmd"> + <reg32 offset="0xa9c2" name="SP_CS_WIE_CNTL_0" usage="compute"> <bitfield name="WGIDCONSTID" low="0" high="7" type="a3xx_regid"/> <bitfield name="WGSIZECONSTID" low="8" high="15" type="a3xx_regid"/> <bitfield name="WGOFFSETCONSTID" low="16" high="23" type="a3xx_regid"/> <bitfield name="LOCALIDREGID" low="24" high="31" type="a3xx_regid"/> </reg32> <!-- new in a6xx gen4, matches SP_CS_WGE_CNTL --> - <reg32 offset="0xa9c3" name="SP_CS_WIE_CNTL_1" variants="A6XX" usage="cmd"> + <reg32 offset="0xa9c3" name="SP_CS_WIE_CNTL_1" variants="A6XX" usage="compute"> <!-- gl_LocalInvocationIndex --> <bitfield name="LINEARLOCALIDREGID" low="0" high="7" type="a3xx_regid"/> <!-- a650 has 6 "SP cores" (but 3 "SP"). this makes it use only @@ -4090,7 +4154,7 @@ by a particular renderpass/blit. <value value="0x1" name="WORKITEMRASTORDER_TILED"/> </enum> - <reg32 offset="0xa9c3" name="SP_CS_WIE_CNTL_1" variants="A7XX-" usage="cmd"> + <reg32 offset="0xa9c3" name="SP_CS_WIE_CNTL_1" variants="A7XX-" usage="compute"> <!-- gl_LocalInvocationIndex --> <bitfield name="LINEARLOCALIDREGID" low="0" high="7" type="a3xx_regid"/> <!-- Must match SP_CS_CTRL --> @@ -4104,10 +4168,10 @@ by a particular renderpass/blit. <!-- TODO: two 64kb aligned addresses at a9d0/a9d2 --> - <reg64 offset="0xa9e0" name="SP_PS_SAMPLER_BASE" type="address" align="16" usage="rp_blit"/> - <reg64 offset="0xa9e2" name="SP_CS_SAMPLER_BASE" type="address" align="16" usage="cmd"/> - <reg64 offset="0xa9e4" name="SP_PS_TEXMEMOBJ_BASE" type="address" align="64" usage="rp_blit"/> - <reg64 offset="0xa9e6" name="SP_CS_TEXMEMOBJ_BASE" type="address" align="64" usage="cmd"/> + <reg64 offset="0xa9e0" name="SP_PS_SAMPLER_BASE" type="address" align="16" usage="draw"/> + <reg64 offset="0xa9e2" name="SP_CS_SAMPLER_BASE" type="address" align="16" usage="compute"/> + <reg64 offset="0xa9e4" name="SP_PS_TEXMEMOBJ_BASE" type="address" align="64" usage="draw"/> + <reg64 offset="0xa9e6" name="SP_CS_TEXMEMOBJ_BASE" type="address" align="64" usage="compute"/> <enum name="a6xx_bindless_descriptor_size"> <doc> @@ -4118,13 +4182,13 @@ by a particular renderpass/blit. <value value="3" name="BINDLESS_DESCRIPTOR_64B"/> </enum> - <array offset="0xa9e8" name="SP_CS_BINDLESS_BASE" stride="2" length="5" variants="A6XX" usage="cmd"> + <array offset="0xa9e8" name="SP_CS_BINDLESS_BASE" stride="2" length="5" variants="A6XX" usage="compute"> <reg64 offset="0" name="DESCRIPTOR" variants="A6XX"> <bitfield name="DESC_SIZE" low="0" high="1" type="a6xx_bindless_descriptor_size"/> <bitfield name="ADDR" low="2" high="63" shr="2" type="address"/> </reg64> </array> - <array offset="0xa9e8" name="SP_CS_BINDLESS_BASE" stride="2" length="8" variants="A7XX-" usage="cmd"> + <array offset="0xa9e8" name="SP_CS_BINDLESS_BASE" stride="2" length="8" variants="A7XX-" usage="compute"> <reg64 offset="0" name="DESCRIPTOR" variants="A7XX-"> <bitfield name="DESC_SIZE" low="0" high="1" type="a6xx_bindless_descriptor_size"/> <bitfield name="ADDR" low="2" high="63" shr="2" type="address"/> @@ -4134,9 +4198,9 @@ by a particular renderpass/blit. <!-- UAV state for compute shader: --> - <reg64 offset="0xa9f2" name="SP_CS_UAV_BASE" type="address" align="16" variants="A6XX"/> - <reg64 offset="0xa9f8" name="SP_CS_UAV_BASE" type="address" align="16" variants="A7XX-"/> - <reg32 offset="0xaa00" name="SP_CS_USIZE" low="0" high="6" type="uint"/> + <reg64 offset="0xa9f2" name="SP_CS_UAV_BASE" type="address" align="16" variants="A6XX" usage="compute"/> + <reg64 offset="0xa9f8" name="SP_CS_UAV_BASE" type="address" align="16" variants="A7XX-" usage="compute"/> + <reg32 offset="0xaa00" name="SP_CS_USIZE" low="0" high="6" type="uint" usage="compute"/> <!-- Correlated with avgs/uvgs usage in FS --> <reg32 offset="0xaa01" name="SP_PS_VGS_CNTL" type="uint" variants="A7XX-" usage="cmd"/> @@ -4192,7 +4256,7 @@ by a particular renderpass/blit. <value value="0x2" name="ISAMMODE_GL"/> </enum> - <reg32 offset="0xab00" name="SP_MODE_CNTL" usage="rp_blit"> + <reg32 offset="0xab00" name="SP_MODE_CNTL" usage="draw"> <!-- When set, half register loads from the constant file will load a 32-bit value (so hc0.y loads the same value as c0.y) @@ -4203,26 +4267,26 @@ by a particular renderpass/blit. --> <bitfield name="CONSTANT_DEMOTION_ENABLE" pos="0" type="boolean"/> <bitfield name="ISAMMODE" low="1" high="2" type="a6xx_isam_mode"/> - <bitfield name="SHARED_CONSTS_ENABLE" pos="3" type="boolean"/> <!-- see SP_SHARED_CONSTANT --> + <bitfield name="SHARED_CONSTS_ENABLE" pos="3" type="boolean"/> <!-- see HLSQ_SHARED_CONSTS --> </reg32> <reg32 offset="0xab01" name="SP_UNKNOWN_AB01" variants="A7XX-" usage="cmd"/> <reg32 offset="0xab02" name="SP_HLSQ_MODE_CNTL" variants="A7XX-" usage="cmd"> - <bitfield name="SHARED_CONSTS_ENABLE" pos="0" type="boolean"/> <!-- see SP_SHARED_CONSTANT --> + <bitfield name="SHARED_CONSTS_ENABLE" pos="0" type="boolean"/> <!-- see HLSQ_SHARED_CONSTS --> </reg32> - <reg32 offset="0xab04" name="SP_PS_CONFIG" type="a6xx_sp_xs_config" usage="rp_blit"/> - <reg32 offset="0xab05" name="SP_PS_INSTR_SIZE" low="0" high="27" type="uint" usage="rp_blit"/> + <reg32 offset="0xab04" name="SP_PS_CONFIG" type="a6xx_sp_xs_config" usage="draw"/> + <reg32 offset="0xab05" name="SP_PS_INSTR_SIZE" low="0" high="27" type="uint" usage="draw"/> - <reg32 offset="0xab06" name="SP_BIN_SIZE" type="a8xx_bin_size" variants="A8XX-" usage="rp_blit"/> + <reg32 offset="0xab06" name="SP_BIN_SIZE" type="a8xx_bin_size" variants="A8XX-" usage="cmd"/> - <array offset="0xab10" name="SP_GFX_BINDLESS_BASE" stride="2" length="5" variants="A6XX" usage="rp_blit"> + <array offset="0xab10" name="SP_GFX_BINDLESS_BASE" stride="2" length="5" variants="A6XX" usage="draw"> <reg64 offset="0" name="DESCRIPTOR" variants="A6XX"> <bitfield name="DESC_SIZE" low="0" high="1" type="a6xx_bindless_descriptor_size"/> <bitfield name="ADDR" low="2" high="63" shr="2" type="address"/> </reg64> </array> - <array offset="0xab0a" name="SP_GFX_BINDLESS_BASE" stride="2" length="8" variants="A7XX-" usage="rp_blit"> + <array offset="0xab0a" name="SP_GFX_BINDLESS_BASE" stride="2" length="8" variants="A7XX-" usage="draw"> <reg64 offset="0" name="DESCRIPTOR" variants="A7XX-"> <bitfield name="DESC_SIZE" low="0" high="1" type="a6xx_bindless_descriptor_size"/> <bitfield name="ADDR" low="2" high="63" shr="2" type="address"/> @@ -4259,8 +4323,8 @@ by a particular renderpass/blit. <bitfield name="MASK" low="12" high="15"/> </bitset> - <reg32 offset="0xacc0" name="SP_A2D_OUTPUT_INFO" type="a6xx_sp_a2d_output_info" variants="A6XX" usage="rp_blit"/> - <reg32 offset="0xa9bf" name="SP_A2D_OUTPUT_INFO" type="a6xx_sp_a2d_output_info" variants="A7XX-" usage="rp_blit"/> + <reg32 offset="0xacc0" name="SP_A2D_OUTPUT_INFO" type="a6xx_sp_a2d_output_info" variants="A6XX" usage="blit"/> + <reg32 offset="0xa9bf" name="SP_A2D_OUTPUT_INFO" type="a6xx_sp_a2d_output_info" variants="A7XX-" usage="blit"/> <reg32 offset="0xae00" name="SP_DBG_ECO_CNTL" usage="init"/> <reg32 offset="0xae01" name="SP_ADDR_MODE_CNTL" pos="0" type="a5xx_address_mode" variants="A6XX"/> @@ -4268,7 +4332,14 @@ by a particular renderpass/blit. <reg32 offset="0xae02" name="SP_NC_MODE_CNTL"> <!-- TODO: valid bits 0x3c3f, see kernel --> </reg32> - <reg32 offset="0xae03" name="SP_CHICKEN_BITS" usage="init"/> + <reg32 offset="0xae03" name="SP_CHICKEN_BITS" usage="init"> + <doc> + When this bit is enabled, new waves may be unlocked once + all invocations have signaled they don't need local + memory anymore using (eolm)nop. + </doc> + <bitfield name="EOLM_ENABLE" pos="1" type="boolean"/> + </reg32> <reg32 offset="0xae04" name="SP_NC_MODE_CNTL_2" usage="init"> <bitfield name="F16_NO_INF" pos="3" type="boolean"/> </reg32> @@ -4333,7 +4404,7 @@ by a particular renderpass/blit. <reg64 offset="0xae3c" name="SP_SW_DEBUG_ADDR" variants="A7XX-"/> <reg64 offset="0xae3e" name="SP_ISDB_DEBUG_ADDR" variants="A7XX-"/> - <array offset="0xaec0" name="SP_PERFCTR_HLSQ_SEL_2_0" stride="1" length="6" variants="A7XX-"/> + <array offset="0xaec0" name="SP_PERFCTR_HLSQ_SEL_2" stride="1" length="6" variants="A7XX-"/> <!-- The downstream kernel calls the debug cluster of registers @@ -4347,39 +4418,39 @@ by a particular renderpass/blit. <reg32 offset="0xb190" name="SP_UNKNOWN_B190"/> <reg32 offset="0xb191" name="SP_UNKNOWN_B191"/> - <reg32 offset="0xb2d6" name="TPL1_A2D_BIN_SIZE" type="a8xx_bin_size" variants="A8XX-" usage="rp_blit"/> - <reg32 offset="0xb2d7" name="TPL1_A2D_FILTER_CNTL" variants="A8XX-" usage="rp_blit"/> + <reg32 offset="0xb2d6" name="TPL1_A2D_BIN_SIZE" type="a8xx_bin_size" variants="A8XX-" usage="cmd"/> + <reg32 offset="0xb2d7" name="TPL1_A2D_FILTER_CNTL" variants="A8XX-" usage="blit"/> - <reg32 offset="0xb300" name="TPL1_RAS_MSAA_CNTL" usage="rp_blit"> + <reg32 offset="0xb300" name="TPL1_RAS_MSAA_CNTL" usage="draw"> <bitfield name="SAMPLES" low="0" high="1" type="a3xx_msaa_samples"/> <bitfield name="UNK2" low="2" high="3"/> </reg32> - <reg32 offset="0xb301" name="TPL1_DEST_MSAA_CNTL" usage="rp_blit"> + <reg32 offset="0xb301" name="TPL1_DEST_MSAA_CNTL" usage="draw"> <bitfield name="SAMPLES" low="0" high="1" type="a3xx_msaa_samples"/> <bitfield name="MSAA_DISABLE" pos="2" type="boolean"/> </reg32> <!-- looks to work in the same way as a5xx: --> <reg64 offset="0xb302" name="TPL1_GFX_BORDER_COLOR_BASE" type="address" align="128" usage="cmd"/> - <reg32 offset="0xb304" name="TPL1_MSAA_SAMPLE_POS_CNTL" type="a6xx_msaa_sample_pos_cntl" variants="A6XX-A7XX" usage="rp_blit"/> - <reg32 offset="0xb305" name="TPL1_PROGRAMMABLE_MSAA_POS_0" type="a6xx_programmable_msaa_pos" usage="rp_blit" variants="A6XX-A7XX" /> - <reg32 offset="0xb306" name="TPL1_PROGRAMMABLE_MSAA_POS_1" type="a6xx_programmable_msaa_pos" usage="rp_blit" variants="A6XX-A7XX" /> - <reg32 offset="0xb307" name="TPL1_WINDOW_OFFSET" type="a6xx_reg_xy" usage="rp_blit"/> + <reg32 offset="0xb304" name="TPL1_MSAA_SAMPLE_POS_CNTL" type="a6xx_msaa_sample_pos_cntl" variants="A6XX-A7XX" usage="draw"/> + <reg32 offset="0xb305" name="TPL1_PROGRAMMABLE_MSAA_POS_0" type="a6xx_programmable_msaa_pos" usage="draw" variants="A6XX-A7XX" /> + <reg32 offset="0xb306" name="TPL1_PROGRAMMABLE_MSAA_POS_1" type="a6xx_programmable_msaa_pos" usage="draw" variants="A6XX-A7XX" /> + <reg32 offset="0xb307" name="TPL1_WINDOW_OFFSET" type="a6xx_reg_xy" usage="cmd"/> - <reg32 offset="0xb304" name="TPL1_BIN_SIZE" type="a8xx_bin_size" variants="A8XX-" usage="rp_blit"/> + <reg32 offset="0xb304" name="TPL1_BIN_SIZE" type="a8xx_bin_size" variants="A8XX-" usage="cmd"/> <enum name="a6xx_coord_round"> <value value="0" name="COORD_TRUNCATE"/> <value value="1" name="COORD_ROUND_NEAREST_EVEN"/> + <value value="2" name="ROUND_FLOAT_TO_INT"/> <!-- only ARRAYCOORDROUNDMODE --> </enum> <enum name="a6xx_nearest_mode"> <value value="0" name="ROUND_CLAMP_TRUNCATE"/> <value value="1" name="CLAMP_ROUND_TRUNCATE"/> - <value value="2" name="ROUND_FLOAT_TO_INT"/> <!-- only ARRAYCOORDROUNDMODE --> </enum> - <reg32 offset="0xb309" name="TPL1_MODE_CNTL" usage="cmd"> + <bitset name="a6xx_tpl1_mode_cntl" inline="yes"> <bitfield name="ISAMMODE" low="0" high="1" type="a6xx_isam_mode"/> <bitfield name="TEXCOORDROUNDMODE" pos="2" type="a6xx_coord_round"/> <bitfield name="ARRAYCOORDROUNDMODE" low="3" high="4" type="a6xx_coord_round"/> @@ -4387,6 +4458,11 @@ by a particular renderpass/blit. <bitfield name="SAMPLEREPLICATE" pos="6" type="boolean"/> <bitfield name="DESTDATATYPEOVERRIDE" pos="7" type="boolean"/> <bitfield name="PACK_SAMP_REDUCED_PRECISION" pos="8" type="boolean"/> + </bitset> + + <reg32 offset="0xb309" name="TPL1_MODE_CNTL" usage="cmd" type="a6xx_tpl1_mode_cntl" variants="A6XX-A7XX"/> + <reg32 offset="0xb309" name="TPL1_MODE_CNTL" usage="cmd" type="a6xx_tpl1_mode_cntl" variants="A8XX-"> + <bitfield name="CLAMP_DISABLE" pos="12" type="boolean"/> </reg32> <reg32 offset="0xb310" name="SP_UNKNOWN_B310" variants="A7XX-" usage="cmd"/> @@ -4395,24 +4471,26 @@ by a particular renderpass/blit. badly named or the functionality moved in a6xx. But downstream kernel calls this "a6xx_sp_ps_tp_2d_cluster" --> - <reg32 offset="0xb4c0" name="TPL1_A2D_SRC_TEXTURE_INFO" type="a6xx_a2d_src_texture_info" variants="A6XX" usage="rp_blit"/> - <reg32 offset="0xb4c1" name="TPL1_A2D_SRC_TEXTURE_SIZE" variants="A6XX" usage="rp_blit"> + <reg32 offset="0xb4c0" name="TPL1_A2D_SRC_TEXTURE_INFO" type="a6xx_a2d_src_texture_info" variants="A6XX" usage="blit"/> + <reg32 offset="0xb4c1" name="TPL1_A2D_SRC_TEXTURE_SIZE" variants="A6XX" usage="blit"> <bitfield name="WIDTH" low="0" high="14" type="uint"/> <bitfield name="HEIGHT" low="15" high="29" type="uint"/> </reg32> - <reg64 offset="0xb4c2" name="TPL1_A2D_SRC_TEXTURE_BASE" type="address" align="16" variants="A6XX" usage="rp_blit"/> - <reg32 offset="0xb4c4" name="TPL1_A2D_SRC_TEXTURE_PITCH" variants="A6XX" usage="rp_blit"> + <reg64 offset="0xb4c2" name="TPL1_A2D_SRC_TEXTURE_BASE" type="address" align="16" variants="A6XX" usage="blit"/> + <reg32 offset="0xb4c4" name="TPL1_A2D_SRC_TEXTURE_PITCH" variants="A6XX" usage="blit"> <bitfield name="UNK0" low="0" high="8"/> <bitfield name="PITCH" low="9" high="23" shr="6" type="uint"/> </reg32> - <reg32 offset="0xb2c0" name="TPL1_A2D_SRC_TEXTURE_INFO" type="a6xx_a2d_src_texture_info" variants="A7XX-" usage="rp_blit"/> - <reg32 offset="0xb2c1" name="TPL1_A2D_SRC_TEXTURE_SIZE" variants="A7XX-"> + <reg32 offset="0xb2c0" name="TPL1_A2D_SRC_TEXTURE_INFO" type="a6xx_a2d_src_texture_info" variants="A7XX-" usage="blit"/> + <reg32 offset="0xb2c1" name="TPL1_A2D_SRC_TEXTURE_SIZE" variants="A7XX-" usage="blit"> <bitfield name="WIDTH" low="0" high="14" type="uint"/> <bitfield name="HEIGHT" low="15" high="29" type="uint"/> </reg32> - <reg64 offset="0xb2c2" name="TPL1_A2D_SRC_TEXTURE_BASE" type="address" align="16" variants="A7XX-" usage="rp_blit"/> - <reg32 offset="0xb2c4" name="TPL1_A2D_SRC_TEXTURE_PITCH" variants="A7XX-"> + <reg64 offset="0xb2c2" name="TPL1_A2D_SRC_TEXTURE_BASE" type="address" align="16" variants="A7XX" usage="blit"/> + <!-- gen8 can use an unaligned base for IMG_BUFFER: --> + <reg64 offset="0xb2c2" name="TPL1_A2D_SRC_TEXTURE_BASE" type="address" variants="A8XX-" usage="blit"/> + <reg32 offset="0xb2c4" name="TPL1_A2D_SRC_TEXTURE_PITCH" variants="A7XX-" usage="blit"> <!-- Bits from 3..9 must be zero unless 'TPL1_A2D_BLT_CNTL::TYPE' is A6XX_TEX_IMG_BUFFER, which allows for lower alignment. @@ -4421,41 +4499,45 @@ by a particular renderpass/blit. </reg32> <!-- planes for NV12, etc. (TODO: not tested) --> - <reg64 offset="0xb4c5" name="TPL1_A2D_SRC_TEXTURE_BASE_1" type="address" align="16" variants="A6XX"/> - <reg32 offset="0xb4c7" name="TPL1_A2D_SRC_TEXTURE_PITCH_1" low="0" high="11" shr="6" type="uint" variants="A6XX"/> - <reg64 offset="0xb4c8" name="TPL1_A2D_SRC_TEXTURE_BASE_2" type="address" align="16" variants="A6XX"/> - - <reg64 offset="0xb2c5" name="TPL1_A2D_SRC_TEXTURE_BASE_1" type="address" align="16" variants="A7XX-"/> - <reg32 offset="0xb2c7" name="TPL1_A2D_SRC_TEXTURE_PITCH_1" low="0" high="11" shr="6" type="uint" variants="A7XX-"/> - <reg64 offset="0xb2c8" name="TPL1_A2D_SRC_TEXTURE_BASE_2" type="address" align="16" variants="A7XX-"/> - - <reg64 offset="0xb4ca" name="TPL1_A2D_SRC_TEXTURE_FLAG_BASE" type="address" align="16" variants="A6XX" usage="rp_blit"/> - <reg32 offset="0xb4cc" name="TPL1_A2D_SRC_TEXTURE_FLAG_PITCH" low="0" high="7" shr="6" type="uint" variants="A6XX" usage="rp_blit"/> - - <reg64 offset="0xb2ca" name="TPL1_A2D_SRC_TEXTURE_FLAG_BASE" type="address" align="16" variants="A7XX-" usage="rp_blit"/> - <reg32 offset="0xb2cc" name="TPL1_A2D_SRC_TEXTURE_FLAG_PITCH" low="0" high="7" shr="6" type="uint" variants="A7XX-" usage="rp_blit"/> - - <reg32 offset="0xb4cd" name="SP_PS_UNKNOWN_B4CD" low="6" high="31" variants="A6XX"/> - <reg32 offset="0xb4ce" name="SP_PS_UNKNOWN_B4CE" low="0" high="31" variants="A6XX"/> - <reg32 offset="0xb4cf" name="SP_PS_UNKNOWN_B4CF" low="0" high="30" variants="A6XX"/> - <reg32 offset="0xb4d0" name="SP_PS_UNKNOWN_B4D0" low="0" high="29" variants="A6XX"/> - <reg32 offset="0xb4d1" name="SP_WINDOW_OFFSET" type="a6xx_reg_xy" variants="A6XX" usage="rp_blit"/> - - <reg32 offset="0xb2cd" name="SP_PS_UNKNOWN_B4CD" low="6" high="31" variants="A7XX"/> - <reg32 offset="0xb2ce" name="SP_PS_UNKNOWN_B4CE" low="0" high="31" variants="A7XX"/> - <reg32 offset="0xb2cf" name="SP_PS_UNKNOWN_B4CF" low="0" high="30" variants="A7XX"/> - <reg32 offset="0xb2d0" name="SP_PS_UNKNOWN_B4D0" low="0" high="29" variants="A7XX"/> - <reg32 offset="0xb2d1" name="TPL1_A2D_WINDOW_OFFSET" type="a6xx_reg_xy" variants="A7XX-"/> - <reg32 offset="0xb2d2" name="TPL1_A2D_BLT_CNTL" variants="A7XX-" usage="rp_blit"> + <reg64 offset="0xb4c5" name="TPL1_A2D_SRC_TEXTURE_BASE_1" type="address" align="16" variants="A6XX" usage="blit"/> + <reg32 offset="0xb4c7" name="TPL1_A2D_SRC_TEXTURE_PITCH_1" low="0" high="11" shr="6" type="uint" variants="A6XX" usage="blit"/> + <reg64 offset="0xb4c8" name="TPL1_A2D_SRC_TEXTURE_BASE_2" type="address" align="16" variants="A6XX" usage="blit"/> + + <reg64 offset="0xb2c5" name="TPL1_A2D_SRC_TEXTURE_BASE_1" type="address" align="16" variants="A7XX-" usage="blit"/> + <reg32 offset="0xb2c7" name="TPL1_A2D_SRC_TEXTURE_PITCH_1" low="0" high="11" shr="6" type="uint" variants="A7XX-" usage="blit"/> + <reg64 offset="0xb2c8" name="TPL1_A2D_SRC_TEXTURE_BASE_2" type="address" align="16" variants="A7XX-" usage="blit"/> + + <reg64 offset="0xb4ca" name="TPL1_A2D_SRC_TEXTURE_FLAG_BASE" type="address" align="16" variants="A6XX" usage="blit"/> + <reg32 offset="0xb4cc" name="TPL1_A2D_SRC_TEXTURE_FLAG_PITCH" low="0" high="7" shr="6" type="uint" variants="A6XX" usage="blit"/> + + <reg64 offset="0xb2ca" name="TPL1_A2D_SRC_TEXTURE_FLAG_BASE" type="address" align="16" variants="A7XX-" usage="blit"/> + <reg32 offset="0xb2cc" name="TPL1_A2D_SRC_TEXTURE_FLAG_PITCH" low="0" high="7" shr="6" type="uint" variants="A7XX-" usage="blit"/> + + <reg32 offset="0xb4cd" name="SP_PS_UNKNOWN_B4CD" low="6" high="31" variants="A6XX" usage="blit"/> + <reg32 offset="0xb4ce" name="SP_PS_UNKNOWN_B4CE" low="0" high="31" variants="A6XX" usage="blit"/> + <reg32 offset="0xb4cf" name="SP_PS_UNKNOWN_B4CF" low="0" high="30" variants="A6XX" usage="blit"/> + <reg32 offset="0xb4d0" name="SP_PS_UNKNOWN_B4D0" low="0" high="29" variants="A6XX" usage="blit"/> + <reg32 offset="0xb4d1" name="SP_WINDOW_OFFSET" type="a6xx_reg_xy" variants="A6XX" usage="cmd"/> + + <reg32 offset="0xb2cd" name="SP_PS_UNKNOWN_B4CD" low="6" high="31" variants="A7XX" usage="blit"/> + <reg32 offset="0xb2ce" name="SP_PS_UNKNOWN_B4CE" low="0" high="31" variants="A7XX" usage="blit"/> + <reg32 offset="0xb2cf" name="SP_PS_UNKNOWN_B4CF" low="0" high="30" variants="A7XX" usage="blit"/> + <reg32 offset="0xb2d0" name="SP_PS_UNKNOWN_B4D0" low="0" high="29" variants="A7XX" usage="blit"/> + <reg32 offset="0xb2d1" name="TPL1_A2D_WINDOW_OFFSET" type="a6xx_reg_xy" variants="A7XX-" usage="cmd"/> + <bitset name="a7xx_tpl1_a2d_blt_cntl" inline="yes"> <bitfield name="RAW_COPY" pos="0" type="boolean"/> - <bitfield name="START_OFFSET_TEXELS" low="16" high="21"/> <bitfield name="TYPE" low="29" high="31" type="a6xx_tex_type"/> + </bitset> + <reg32 offset="0xb2d2" name="TPL1_A2D_BLT_CNTL" type="a7xx_tpl1_a2d_blt_cntl" variants="A7XX" usage="blit"> + <bitfield name="START_OFFSET_TEXELS" low="16" high="21"/> </reg32> - <reg32 offset="0xab21" name="SP_WINDOW_OFFSET" type="a6xx_reg_xy" variants="A7XX" usage="rp_blit"/> - <reg32 offset="0xab07" name="SP_WINDOW_OFFSET" type="a6xx_reg_xy" variants="A8XX-" usage="rp_blit"/> + <reg32 offset="0xb2d2" name="TPL1_A2D_BLT_CNTL" type="a7xx_tpl1_a2d_blt_cntl" variants="A8XX-" usage="blit"/> + <reg32 offset="0xab21" name="SP_WINDOW_OFFSET" type="a6xx_reg_xy" variants="A7XX" usage="cmd"/> + <reg32 offset="0xab07" name="SP_WINDOW_OFFSET" type="a6xx_reg_xy" variants="A8XX-" usage="cmd"/> - <!-- always 0x100000 or 0x1000000? --> - <reg32 offset="0xb600" name="TPL1_DBG_ECO_CNTL" low="0" high="25" usage="init"/> + <reg32 offset="0xb600" name="TPL1_DBG_ECO_CNTL" usage="init"> + <bitfield name="LINEAR_MIPMAP_FALLBACK_IN_BLOCKS" pos="25" type="boolean" variants="A6XX-A7XX"/> + </reg32> <reg32 offset="0xb601" name="TPL1_ADDR_MODE_CNTL" type="a5xx_address_mode" variants="A6XX"/> <reg32 offset="0xb602" name="TPL1_DBG_ECO_CNTL1" usage="init"> <!-- Affects UBWC in some way, if BLIT_OP_SCALE is done with this bit set @@ -4496,17 +4578,17 @@ by a particular renderpass/blit. <bitfield name="READ_IMM_SHARED_CONSTS" pos="9" type="boolean" variants="A7XX-"/> </bitset> - <reg32 offset="0xb800" name="SP_VS_CONST_CONFIG" type="a6xx_xs_const_config" variants="A6XX" usage="rp_blit"/> - <reg32 offset="0xb801" name="SP_HS_CONST_CONFIG" type="a6xx_xs_const_config" variants="A6XX" usage="rp_blit"/> - <reg32 offset="0xb802" name="SP_DS_CONST_CONFIG" type="a6xx_xs_const_config" variants="A6XX" usage="rp_blit"/> - <reg32 offset="0xb803" name="SP_GS_CONST_CONFIG" type="a6xx_xs_const_config" variants="A6XX" usage="rp_blit"/> + <reg32 offset="0xb800" name="SP_VS_CONST_CONFIG" type="a6xx_xs_const_config" variants="A6XX" usage="draw"/> + <reg32 offset="0xb801" name="SP_HS_CONST_CONFIG" type="a6xx_xs_const_config" variants="A6XX" usage="draw"/> + <reg32 offset="0xb802" name="SP_DS_CONST_CONFIG" type="a6xx_xs_const_config" variants="A6XX" usage="draw"/> + <reg32 offset="0xb803" name="SP_GS_CONST_CONFIG" type="a6xx_xs_const_config" variants="A6XX" usage="draw"/> - <reg32 offset="0xa827" name="SP_VS_CONST_CONFIG" type="a6xx_xs_const_config" variants="A7XX-" usage="rp_blit"/> - <reg32 offset="0xa83f" name="SP_HS_CONST_CONFIG" type="a6xx_xs_const_config" variants="A7XX-" usage="rp_blit"/> - <reg32 offset="0xa867" name="SP_DS_CONST_CONFIG" type="a6xx_xs_const_config" variants="A7XX-" usage="rp_blit"/> - <reg32 offset="0xa898" name="SP_GS_CONST_CONFIG" type="a6xx_xs_const_config" variants="A7XX-" usage="rp_blit"/> + <reg32 offset="0xa827" name="SP_VS_CONST_CONFIG" type="a6xx_xs_const_config" variants="A7XX-" usage="draw"/> + <reg32 offset="0xa83f" name="SP_HS_CONST_CONFIG" type="a6xx_xs_const_config" variants="A7XX-" usage="draw"/> + <reg32 offset="0xa867" name="SP_DS_CONST_CONFIG" type="a6xx_xs_const_config" variants="A7XX-" usage="draw"/> + <reg32 offset="0xa898" name="SP_GS_CONST_CONFIG" type="a6xx_xs_const_config" variants="A7XX-" usage="draw"/> - <reg32 offset="0xa9aa" name="SP_RENDER_CNTL" variants="A7XX-" usage="rp_blit"> + <reg32 offset="0xa9aa" name="SP_RENDER_CNTL" variants="A7XX-" usage="draw"> <bitfield name="FS_DISABLE" pos="0" type="boolean"/> </reg32> @@ -4521,16 +4603,18 @@ by a particular renderpass/blit. <bitfield name="DITHER_MODE_MRT7" low="14" high="15" type="adreno_rb_dither_mode"/> </reg32> - <reg32 offset="0xa9ad" name="SP_VRS_CONFIG" variants="A7XX-" usage="rp_blit"> + <reg32 offset="0xa9ad" name="SP_VRS_CONFIG" variants="A7XX-" usage="draw"> <bitfield name="PIPELINE_FSR_ENABLE" pos="0" type="boolean"/> <bitfield name="ATTACHMENT_FSR_ENABLE" pos="1" type="boolean"/> <bitfield name="PRIMITIVE_FSR_ENABLE" pos="3" type="boolean"/> </reg32> - <reg32 offset="0xa9ae" name="SP_PS_CNTL_1" variants="A7XX-" usage="rp_blit"> + <reg32 offset="0xa9ae" name="SP_PS_CNTL_1" variants="A7XX-" usage="draw"> <bitfield name="SYSVAL_REGS_COUNT" low="0" high="7" type="uint"/> <bitfield name="DEFER_WAVE_ALLOC_DIS" pos="8" type="boolean"/> <bitfield name="EVICT_BUF_MODE" low="9" high="10"/> + <bitfield name="WAVE_PAIR_MODE" low="11" high="12"/> + <bitfield name="NUM_TOTAL_VAR" low="13" high="20"/> </reg32> <reg32 offset="0xb820" name="HLSQ_LOAD_STATE_GEOM_CMD"/> @@ -4562,71 +4646,71 @@ by a particular renderpass/blit. <bitfield name="FOVEATIONQUALITYREGID" low="8" high="15" type="a3xx_regid"/> </bitset> - <reg32 offset="0xb980" type="a6xx_sp_ps_wave_cntl" name="SP_PS_WAVE_CNTL" variants="A6XX" usage="rp_blit"/> - <reg32 offset="0xb981" name="HLSQ_UNKNOWN_B981" pos="0" type="boolean" variants="A6XX"/> <!-- never used by blob --> - <reg32 offset="0xb982" name="SP_LB_PARAM_LIMIT" low="0" high="2" variants="A6XX" usage="rp_blit"> + <reg32 offset="0xb980" type="a6xx_sp_ps_wave_cntl" name="SP_PS_WAVE_CNTL" variants="A6XX" usage="draw"/> + <reg32 offset="0xb981" name="HLSQ_UNKNOWN_B981" pos="0" type="boolean" variants="A6XX" usage="draw"/> <!-- never used by blob --> + <reg32 offset="0xb982" name="SP_LB_PARAM_LIMIT" low="0" high="2" variants="A6XX" usage="draw"> <!-- Sets the maximum number of primitives allowed in one FS wave minus one, similarly to the A3xx field, except that it's not necessary to set it to anything but the maximum, since the hardware will simply emit smaller waves when it runs out of space. --> <bitfield name="PRIMALLOCTHRESHOLD" low="0" high="2" type="uint"/> </reg32> - <reg32 offset="0xb983" name="SP_REG_PROG_ID_0" variants="A6XX" usage="rp_blit"> + <reg32 offset="0xb983" name="SP_REG_PROG_ID_0" variants="A6XX" usage="draw"> <bitfield name="FACEREGID" low="0" high="7" type="a3xx_regid"/> <!-- SAMPLEID is loaded into a half-precision register: --> <bitfield name="SAMPLEID" low="8" high="15" type="a3xx_regid"/> <bitfield name="SAMPLEMASK" low="16" high="23" type="a3xx_regid"/> <bitfield name="CENTERRHW" low="24" high="31" type="a3xx_regid"/> </reg32> - <reg32 offset="0xb984" type="a6xx_sp_reg_prog_id_1" name="SP_REG_PROG_ID_1" variants="A6XX" usage="rp_blit"/> - <reg32 offset="0xb985" type="a6xx_sp_reg_prog_id_2" name="SP_REG_PROG_ID_2" variants="A6XX" usage="rp_blit"/> - <reg32 offset="0xb986" type="a6xx_sp_reg_prog_id_3" name="SP_REG_PROG_ID_3" variants="A6XX" usage="rp_blit"/> - <reg32 offset="0xb987" name="SP_CS_CONST_CONFIG" type="a6xx_xs_const_config" variants="A6XX" usage="cmd"/> - <reg32 offset="0xa9c6" type="a6xx_sp_ps_wave_cntl" name="SP_PS_WAVE_CNTL" variants="A7XX" usage="rp_blit"/> - <reg32 offset="0xa9c6" name="SP_PS_WAVE_CNTL" variants="A8XX-" usage="rp_blit"> + <reg32 offset="0xb984" type="a6xx_sp_reg_prog_id_1" name="SP_REG_PROG_ID_1" variants="A6XX" usage="draw"/> + <reg32 offset="0xb985" type="a6xx_sp_reg_prog_id_2" name="SP_REG_PROG_ID_2" variants="A6XX" usage="draw"/> + <reg32 offset="0xb986" type="a6xx_sp_reg_prog_id_3" name="SP_REG_PROG_ID_3" variants="A6XX" usage="draw"/> + <reg32 offset="0xb987" name="SP_CS_CONST_CONFIG" type="a6xx_xs_const_config" variants="A6XX" usage="compute"/> + <reg32 offset="0xa9c6" type="a6xx_sp_ps_wave_cntl" name="SP_PS_WAVE_CNTL" variants="A7XX" usage="draw"/> + <reg32 offset="0xa9c6" name="SP_PS_WAVE_CNTL" variants="A8XX-" usage="draw"> <bitfield name="VARYINGS" pos="1" type="boolean"/> </reg32> - <reg32 offset="0xa9c7" name="SP_LB_PARAM_LIMIT" low="0" high="2" variants="A7XX-" usage="rp_blit"> + <reg32 offset="0xa9c7" name="SP_LB_PARAM_LIMIT" low="0" high="2" variants="A7XX-" usage="draw"> <bitfield name="PRIMALLOCTHRESHOLD" low="0" high="2" type="uint"/> </reg32> - <reg32 offset="0xa9c8" name="SP_REG_PROG_ID_0" variants="A7XX-" usage="rp_blit"> + <reg32 offset="0xa9c8" name="SP_REG_PROG_ID_0" variants="A7XX-" usage="draw"> <bitfield name="FACEREGID" low="0" high="7" type="a3xx_regid"/> <!-- SAMPLEID is loaded into a half-precision register: --> <bitfield name="SAMPLEID" low="8" high="15" type="a3xx_regid"/> <bitfield name="SAMPLEMASK" low="16" high="23" type="a3xx_regid"/> <bitfield name="CENTERRHW" low="24" high="31" type="a3xx_regid"/> </reg32> - <reg32 offset="0xa9c9" type="a6xx_sp_reg_prog_id_1" name="SP_REG_PROG_ID_1" variants="A7XX-" usage="rp_blit"/> - <reg32 offset="0xa9ca" type="a6xx_sp_reg_prog_id_2" name="SP_REG_PROG_ID_2" variants="A7XX-" usage="rp_blit"/> - <reg32 offset="0xa9cb" type="a6xx_sp_reg_prog_id_3" name="SP_REG_PROG_ID_3" variants="A7XX-" usage="rp_blit"/> - <reg32 offset="0xa9cd" name="SP_CS_CONST_CONFIG" type="a6xx_xs_const_config" variants="A7XX-" usage="cmd"/> + <reg32 offset="0xa9c9" type="a6xx_sp_reg_prog_id_1" name="SP_REG_PROG_ID_1" variants="A7XX-" usage="draw"/> + <reg32 offset="0xa9ca" type="a6xx_sp_reg_prog_id_2" name="SP_REG_PROG_ID_2" variants="A7XX-" usage="draw"/> + <reg32 offset="0xa9cb" type="a6xx_sp_reg_prog_id_3" name="SP_REG_PROG_ID_3" variants="A7XX-" usage="draw"/> + <reg32 offset="0xa9cd" name="SP_CS_CONST_CONFIG" type="a6xx_xs_const_config" variants="A7XX-" usage="compute"/> <!-- TODO: what does KERNELDIM do exactly (blob sets it differently from turnip) --> - <reg32 offset="0xb990" name="SP_CS_NDRANGE_0" variants="A6XX" usage="rp_blit"> + <reg32 offset="0xb990" name="SP_CS_NDRANGE_0" variants="A6XX" usage="compute"> <bitfield name="KERNELDIM" low="0" high="1" type="uint"/> <!-- localsize is value minus one: --> <bitfield name="LOCALSIZEX" low="2" high="11" type="uint"/> <bitfield name="LOCALSIZEY" low="12" high="21" type="uint"/> <bitfield name="LOCALSIZEZ" low="22" high="31" type="uint"/> </reg32> - <reg32 offset="0xb991" name="SP_CS_NDRANGE_1" variants="A6XX" usage="rp_blit"> + <reg32 offset="0xb991" name="SP_CS_NDRANGE_1" variants="A6XX" usage="compute"> <bitfield name="GLOBALSIZE_X" low="0" high="31" type="uint"/> </reg32> - <reg32 offset="0xb992" name="SP_CS_NDRANGE_2" variants="A6XX" usage="rp_blit"> + <reg32 offset="0xb992" name="SP_CS_NDRANGE_2" variants="A6XX" usage="compute"> <bitfield name="GLOBALOFF_X" low="0" high="31" type="uint"/> </reg32> - <reg32 offset="0xb993" name="SP_CS_NDRANGE_3" variants="A6XX" usage="rp_blit"> + <reg32 offset="0xb993" name="SP_CS_NDRANGE_3" variants="A6XX" usage="compute"> <bitfield name="GLOBALSIZE_Y" low="0" high="31" type="uint"/> </reg32> - <reg32 offset="0xb994" name="SP_CS_NDRANGE_4" variants="A6XX" usage="rp_blit"> + <reg32 offset="0xb994" name="SP_CS_NDRANGE_4" variants="A6XX" usage="compute"> <bitfield name="GLOBALOFF_Y" low="0" high="31" type="uint"/> </reg32> - <reg32 offset="0xb995" name="SP_CS_NDRANGE_5" variants="A6XX" usage="rp_blit"> + <reg32 offset="0xb995" name="SP_CS_NDRANGE_5" variants="A6XX" usage="compute"> <bitfield name="GLOBALSIZE_Z" low="0" high="31" type="uint"/> </reg32> - <reg32 offset="0xb996" name="SP_CS_NDRANGE_6" variants="A6XX" usage="rp_blit"> + <reg32 offset="0xb996" name="SP_CS_NDRANGE_6" variants="A6XX" usage="compute"> <bitfield name="GLOBALOFF_Z" low="0" high="31" type="uint"/> </reg32> - <reg32 offset="0xb997" name="SP_CS_CONST_CONFIG_0" variants="A6XX" usage="rp_blit"> + <reg32 offset="0xb997" name="SP_CS_CONST_CONFIG_0" variants="A6XX" usage="compute"> <!-- these are all vec3. first 3 need to be high regs WGSIZECONSTID is the local size (from SP_CS_NDRANGE_0) WGOFFSETCONSTID is WGIDCONSTID*WGSIZECONSTID @@ -4636,7 +4720,7 @@ by a particular renderpass/blit. <bitfield name="WGOFFSETCONSTID" low="16" high="23" type="a3xx_regid"/> <bitfield name="LOCALIDREGID" low="24" high="31" type="a3xx_regid"/> </reg32> - <reg32 offset="0xb998" name="SP_CS_WGE_CNTL" variants="A6XX" usage="rp_blit"> + <reg32 offset="0xb998" name="SP_CS_WGE_CNTL" variants="A6XX" usage="compute"> <!-- gl_LocalInvocationIndex --> <bitfield name="LINEARLOCALIDREGID" low="0" high="7" type="a3xx_regid"/> <!-- a650 has 6 "SP cores" (but 3 "SP"). this makes it use only @@ -4648,40 +4732,40 @@ by a particular renderpass/blit. <bitfield name="THREADSIZE_SCALAR" pos="10" type="boolean"/> </reg32> <!--note: vulkan blob doesn't use these --> - <reg32 offset="0xb999" name="SP_CS_KERNEL_GROUP_X" variants="A6XX" usage="rp_blit"/> - <reg32 offset="0xb99a" name="SP_CS_KERNEL_GROUP_Y" variants="A6XX" usage="rp_blit"/> - <reg32 offset="0xb99b" name="SP_CS_KERNEL_GROUP_Z" variants="A6XX" usage="rp_blit"/> + <reg32 offset="0xb999" name="SP_CS_KERNEL_GROUP_X" variants="A6XX" usage="compute"/> + <reg32 offset="0xb99a" name="SP_CS_KERNEL_GROUP_Y" variants="A6XX" usage="compute"/> + <reg32 offset="0xb99b" name="SP_CS_KERNEL_GROUP_Z" variants="A6XX" usage="compute"/> <!-- TODO: what does KERNELDIM do exactly (blob sets it differently from turnip) --> - <reg32 offset="0xa9d4" name="SP_CS_NDRANGE_0" variants="A7XX-" usage="rp_blit"> + <reg32 offset="0xa9d4" name="SP_CS_NDRANGE_0" variants="A7XX-" usage="compute"> <bitfield name="KERNELDIM" low="0" high="1" type="uint"/> <!-- localsize is value minus one: --> <bitfield name="LOCALSIZEX" low="2" high="11" type="uint"/> <bitfield name="LOCALSIZEY" low="12" high="21" type="uint"/> <bitfield name="LOCALSIZEZ" low="22" high="31" type="uint"/> </reg32> - <reg32 offset="0xa9d5" name="SP_CS_NDRANGE_1" variants="A7XX-" usage="rp_blit"> + <reg32 offset="0xa9d5" name="SP_CS_NDRANGE_1" variants="A7XX-" usage="compute"> <bitfield name="GLOBALSIZE_X" low="0" high="31" type="uint"/> </reg32> - <reg32 offset="0xa9d6" name="SP_CS_NDRANGE_2" variants="A7XX-" usage="rp_blit"> + <reg32 offset="0xa9d6" name="SP_CS_NDRANGE_2" variants="A7XX-" usage="compute"> <bitfield name="GLOBALOFF_X" low="0" high="31" type="uint"/> </reg32> - <reg32 offset="0xa9d7" name="SP_CS_NDRANGE_3" variants="A7XX-" usage="rp_blit"> + <reg32 offset="0xa9d7" name="SP_CS_NDRANGE_3" variants="A7XX-" usage="compute"> <bitfield name="GLOBALSIZE_Y" low="0" high="31" type="uint"/> </reg32> - <reg32 offset="0xa9d8" name="SP_CS_NDRANGE_4" variants="A7XX-" usage="rp_blit"> + <reg32 offset="0xa9d8" name="SP_CS_NDRANGE_4" variants="A7XX-" usage="compute"> <bitfield name="GLOBALOFF_Y" low="0" high="31" type="uint"/> </reg32> - <reg32 offset="0xa9d9" name="SP_CS_NDRANGE_5" variants="A7XX-" usage="rp_blit"> + <reg32 offset="0xa9d9" name="SP_CS_NDRANGE_5" variants="A7XX-" usage="compute"> <bitfield name="GLOBALSIZE_Z" low="0" high="31" type="uint"/> </reg32> - <reg32 offset="0xa9da" name="SP_CS_NDRANGE_6" variants="A7XX-" usage="rp_blit"> + <reg32 offset="0xa9da" name="SP_CS_NDRANGE_6" variants="A7XX-" usage="compute"> <bitfield name="GLOBALOFF_Z" low="0" high="31" type="uint"/> </reg32> <!--note: vulkan blob doesn't use these --> - <reg32 offset="0xa9dc" name="SP_CS_KERNEL_GROUP_X" variants="A7XX-" usage="rp_blit"/> - <reg32 offset="0xa9dd" name="SP_CS_KERNEL_GROUP_Y" variants="A7XX-" usage="rp_blit"/> - <reg32 offset="0xa9de" name="SP_CS_KERNEL_GROUP_Z" variants="A7XX-" usage="rp_blit"/> + <reg32 offset="0xa9dc" name="SP_CS_KERNEL_GROUP_X" variants="A7XX-" usage="compute"/> + <reg32 offset="0xa9dd" name="SP_CS_KERNEL_GROUP_Y" variants="A7XX-" usage="compute"/> + <reg32 offset="0xa9de" name="SP_CS_KERNEL_GROUP_Z" variants="A7XX-" usage="compute"/> <enum name="a7xx_cs_yalign"> <value name="CS_YALIGN_1" value="8"/> @@ -4690,7 +4774,7 @@ by a particular renderpass/blit. <value name="CS_YALIGN_8" value="1"/> </enum> - <reg32 offset="0xa9db" name="SP_CS_WGE_CNTL" variants="A7XX-" usage="rp_blit"> + <reg32 offset="0xa9db" name="SP_CS_WGE_CNTL" variants="A7XX-" usage="compute"> <!-- gl_LocalInvocationIndex --> <bitfield name="LINEARLOCALIDREGID" low="0" high="7" type="a3xx_regid"/> <!-- Must match SP_CS_CTRL --> @@ -4711,7 +4795,7 @@ by a particular renderpass/blit. <bitfield name="WGTILEHEIGHT" low="26" high="31"/> </reg32> - <reg32 offset="0xa9df" name="SP_CS_NDRANGE_7" variants="A7XX-" usage="cmd"> + <reg32 offset="0xa9df" name="SP_CS_NDRANGE_7" variants="A7XX-" usage="compute"> <!-- The size of the last workgroup. localsize is value minus one: --> <bitfield name="LOCALSIZEX" low="2" high="11" type="uint"/> <bitfield name="LOCALSIZEY" low="12" high="21" type="uint"/> @@ -4723,7 +4807,7 @@ by a particular renderpass/blit. <reg32 offset="0xb9a3" name="HLSQ_LOAD_STATE_FRAG_DATA"/> <!-- mirror of SP_CS_BINDLESS_BASE --> - <array offset="0xb9c0" name="HLSQ_CS_BINDLESS_BASE" stride="2" length="5" variants="A6XX" usage="rp_blit"> + <array offset="0xb9c0" name="HLSQ_CS_BINDLESS_BASE" stride="2" length="5" variants="A6XX" usage="compute"> <reg64 offset="0" name="DESCRIPTOR"> <bitfield name="DESC_SIZE" low="0" high="1" type="a6xx_bindless_descriptor_size"/> <bitfield name="ADDR" low="2" high="63" shr="2" type="address"/> @@ -4731,7 +4815,7 @@ by a particular renderpass/blit. </array> <!-- new in a6xx gen4, mirror of SP_CS_CNTL_1? --> - <reg32 offset="0xb9d0" name="HLSQ_CS_CTRL_REG1" variants="A6XX" usage="cmd"> + <reg32 offset="0xb9d0" name="HLSQ_CS_CTRL_REG1" variants="A6XX" usage="compute"> <bitfield name="SHARED_SIZE" low="0" high="4" type="uint"/> <bitfield name="CONSTANTRAMMODE" low="5" high="6" type="a6xx_const_ram_mode"/> </reg32> @@ -4750,7 +4834,7 @@ by a particular renderpass/blit. <bitfield name="EVENT" low="0" high="6" type="vgt_event_type"/> </reg32> - <reg32 offset="0xbb08" name="SP_UPDATE_CNTL" variants="A6XX" usage="cmd"> + <reg32 offset="0xbb08" name="SP_UPDATE_CNTL" variants="A6XX" usage="cmd,compute"> <doc> This register clears pending loads queued up by CP_LOAD_STATE6. Each bit resets a particular kind(s) of @@ -4790,7 +4874,7 @@ by a particular renderpass/blit. <bitfield name="EVENT" low="0" high="6" type="vgt_event_type"/> </reg32> - <reg32 offset="0xab1f" name="SP_UPDATE_CNTL" variants="A7XX" usage="cmd"> + <reg32 offset="0xab1f" name="SP_UPDATE_CNTL" variants="A7XX" usage="cmd,compute"> <doc> This register clears pending loads queued up by CP_LOAD_STATE6. Each bit resets a particular kind(s) of @@ -4813,7 +4897,7 @@ by a particular renderpass/blit. <bitfield name="GFX_BINDLESS" low="17" high="24" type="hex"/> </reg32> - <reg32 offset="0xab1f" name="SP_UPDATE_CNTL" variants="A8XX" usage="cmd"> + <reg32 offset="0xab1f" name="SP_UPDATE_CNTL" variants="A8XX" usage="cmd,compute"> <doc> This register clears pending loads queued up by CP_LOAD_STATE6. Each bit resets a particular kind(s) of @@ -4829,16 +4913,16 @@ by a particular renderpass/blit. <bitfield name="CS_STATE" pos="5" type="boolean"/> </reg32> - <reg32 offset="0xa9c0" name="SP_CS_BINDLESS_INVALIDATE"/> - <reg32 offset="0xab08" name="SP_GFX_BINDLESS_INVALIDATE"/> + <reg32 offset="0xa9c0" name="SP_CS_BINDLESS_INVALIDATE" usage="compute"/> + <reg32 offset="0xab08" name="SP_GFX_BINDLESS_INVALIDATE" usage="draw"/> - <reg32 offset="0xbb10" name="SP_PS_CONST_CONFIG" type="a6xx_xs_const_config" variants="A6XX" usage="rp_blit"/> - <reg32 offset="0xab03" name="SP_PS_CONST_CONFIG" type="a6xx_xs_const_config" variants="A7XX-" usage="rp_blit"/> + <reg32 offset="0xbb10" name="SP_PS_CONST_CONFIG" type="a6xx_xs_const_config" variants="A6XX" usage="draw"/> + <reg32 offset="0xab03" name="SP_PS_CONST_CONFIG" type="a6xx_xs_const_config" variants="A7XX-" usage="draw"/> - <array offset="0xab40" name="SP_SHARED_CONSTANT_GFX" stride="1" length="64" variants="A7XX"/> - <array offset="0xab30" name="SP_SHARED_CONSTANT_GFX" stride="1" length="128" variants="A8XX-"/> + <array offset="0xab40" name="SP_SHARED_CONSTANT_GFX" stride="1" length="64" variants="A7XX" usage="draw"/> + <array offset="0xab30" name="SP_SHARED_CONSTANT_GFX" stride="1" length="128" variants="A8XX-" usage="draw"/> - <reg32 offset="0xbb11" name="HLSQ_SHARED_CONSTS" variants="A6XX" usage="cmd"> + <reg32 offset="0xbb11" name="HLSQ_SHARED_CONSTS" variants="A6XX" usage="cmd,compute"> <doc> Shared constants are intended to be used for Vulkan push constants. When enabled, 8 vec4's are reserved in the FS diff --git a/drivers/gpu/drm/msm/registers/adreno/a6xx_descriptors.xml b/drivers/gpu/drm/msm/registers/adreno/a6xx_descriptors.xml index 56cfaff614a4..08bc37f29a6f 100644 --- a/drivers/gpu/drm/msm/registers/adreno/a6xx_descriptors.xml +++ b/drivers/gpu/drm/msm/registers/adreno/a6xx_descriptors.xml @@ -45,19 +45,21 @@ xsi:schemaLocation="https://gitlab.freedesktop.org/freedreno/ rules-fd.xsd"> <reg32 offset="3" name="3"/> </domain> -<domain name="A6XX_TEX_CONST" width="32" varset="chip"> +<domain name="A6XX_TEX_MEMOBJ" width="32" varset="desctype"> <doc>Texture constant dwords</doc> <reg32 offset="0" name="0"> <bitfield name="TILE_MODE" low="0" high="1" type="a6xx_tile_mode"/> <bitfield name="SRGB" pos="2" type="boolean"/> - <bitfield name="SWIZ_X" low="4" high="6" type="a6xx_tex_swiz"/> - <bitfield name="SWIZ_Y" low="7" high="9" type="a6xx_tex_swiz"/> - <bitfield name="SWIZ_Z" low="10" high="12" type="a6xx_tex_swiz"/> - <bitfield name="SWIZ_W" low="13" high="15" type="a6xx_tex_swiz"/> - <bitfield name="MIPLVLS" low="16" high="19" type="uint"/> + <bitfield name="SWIZ_X" low="4" high="6" type="a6xx_tex_swiz" variants="DESC_SINGLE_PLANE DESC_MULTI_PLANE DESC_BUFFER"/> + <bitfield name="SWIZ_Y" low="7" high="9" type="a6xx_tex_swiz" variants="DESC_SINGLE_PLANE DESC_MULTI_PLANE DESC_BUFFER"/> + <bitfield name="SWIZ_Z" low="10" high="12" type="a6xx_tex_swiz" variants="DESC_SINGLE_PLANE DESC_MULTI_PLANE DESC_BUFFER"/> + <bitfield name="SWIZ_W" low="13" high="15" type="a6xx_tex_swiz" variants="DESC_SINGLE_PLANE DESC_MULTI_PLANE DESC_BUFFER"/> + <bitfield name="MIPLVLS" low="16" high="19" type="uint" variants="DESC_SINGLE_PLANE DESC_BUFFER DESC_WEIGHT"/> + <!-- overlaps with MIPLVLS --> - <bitfield name="CHROMA_MIDPOINT_X" pos="16" type="boolean"/> - <bitfield name="CHROMA_MIDPOINT_Y" pos="18" type="boolean"/> + <bitfield name="CHROMA_MIDPOINT_X" pos="16" type="boolean" variants="DESC_MULTI_PLANE"/> + <bitfield name="CHROMA_MIDPOINT_Y" pos="18" type="boolean" variants="DESC_MULTI_PLANE"/> + <bitfield name="SAMPLES" low="20" high="21" type="a3xx_msaa_samples"/> <bitfield name="FMT" low="22" high="29" type="a6xx_format"/> <!-- @@ -68,9 +70,10 @@ xsi:schemaLocation="https://gitlab.freedesktop.org/freedreno/ rules-fd.xsd"> <bitfield name="SWAP" low="30" high="31" type="a3xx_color_swap"/> </reg32> <reg32 offset="1" name="1"> - <bitfield name="WIDTH" low="0" high="14" type="uint"/> - <bitfield name="HEIGHT" low="15" high="29" type="uint"/> - <bitfield name="MUTABLEEN" pos="31" type="boolean" variants="A7XX-"/> + <bitfield name="WIDTH" low="0" high="14" type="uint" variants="DESC_SINGLE_PLANE DESC_MULTI_PLANE DESC_WEIGHT"/> + <bitfield name="HEIGHT" low="15" high="29" type="uint" variants="DESC_SINGLE_PLANE DESC_MULTI_PLANE DESC_WEIGHT"/> + <bitfield name="NUM_ELEMENTS" low="0" high="29" type="uint" variants="DESC_BUFFER"/> + <bitfield name="MUTABLEEN" pos="31" type="boolean"/> <!-- a7xx+ --> </reg32> <reg32 offset="2" name="2"> <!-- @@ -78,13 +81,13 @@ xsi:schemaLocation="https://gitlab.freedesktop.org/freedreno/ rules-fd.xsd"> PITCH/PITCHALIGN when TYPE is A6XX_TEX_BUFFER. --> <doc> probably for D3D structured UAVs, normally set to 1 </doc> - <bitfield name="STRUCTSIZETEXELS" low="4" high="15" type="uint"/> - <bitfield name="STARTOFFSETTEXELS" low="16" high="21" type="uint"/> + <bitfield name="STRUCTSIZETEXELS" low="4" high="15" type="uint" variants="DESC_BUFFER"/> + <bitfield name="STARTOFFSETTEXELS" low="16" high="21" type="uint" variants="DESC_BUFFER"/> <!-- minimum pitch (for mipmap levels): log2(pitchalign / 64) --> - <bitfield name="PITCHALIGN" low="0" high="3" type="uint"/> + <bitfield name="PITCHALIGN" low="0" high="3" type="uint" variants="DESC_SINGLE_PLANE DESC_MULTI_PLANE DESC_WEIGHT"/> <doc>Pitch in bytes (so actually stride)</doc> - <bitfield name="PITCH" low="7" high="28" type="uint"/> + <bitfield name="PITCH" low="7" high="28" type="uint" variants="DESC_SINGLE_PLANE DESC_MULTI_PLANE DESC_WEIGHT"/> <bitfield name="TYPE" low="29" high="31" type="a6xx_tex_type"/> </reg32> <reg32 offset="3" name="3"> @@ -94,15 +97,15 @@ xsi:schemaLocation="https://gitlab.freedesktop.org/freedreno/ rules-fd.xsd"> layer size at the point that it stops being reduced moving to higher (smaller) mipmap levels --> - <bitfield name="ARRAY_PITCH" low="0" high="22" shr="12" type="uint"/> - <bitfield name="MIN_LAYERSZ" low="23" high="26" shr="12"/> + <bitfield name="ARRAY_PITCH" low="0" high="22" shr="12" type="uint" variants="DESC_SINGLE_PLANE DESC_WEIGHT"/> + <bitfield name="MIN_LAYERSZ" low="23" high="26" shr="12" variants="DESC_SINGLE_PLANE"/> <!-- by default levels with w < 16 are linear TILE_ALL makes all levels have tiling seems required when using UBWC, since all levels have UBWC (can possibly be disabled?) --> - <bitfield name="TILE_ALL" pos="27" type="boolean"/> - <bitfield name="FLAG" pos="28" type="boolean"/> + <bitfield name="TILE_ALL" pos="27" type="boolean" variants="DESC_SINGLE_PLANE DESC_MULTI_PLANE DESC_WEIGHT"/> + <bitfield name="FLAG" pos="28" type="boolean" variants="DESC_SINGLE_PLANE DESC_MULTI_PLANE DESC_WEIGHT"/> </reg32> <!-- for 2-3 plane format, BASE is flag buffer address (if enabled) the address of the non-flag base buffer is determined automatically, @@ -113,30 +116,42 @@ xsi:schemaLocation="https://gitlab.freedesktop.org/freedreno/ rules-fd.xsd"> </reg32> <reg32 offset="5" name="5"> <bitfield name="BASE_HI" low="0" high="16"/> - <bitfield name="DEPTH" low="17" high="29" type="uint"/> + <bitfield name="DEPTH" low="17" high="29" type="uint" variants="DESC_SINGLE_PLANE DESC_MULTI_PLANE DESC_WEIGHT"/> </reg32> <reg32 offset="6" name="6"> <!-- overlaps with PLANE_PITCH --> - <bitfield name="MIN_LOD_CLAMP" low="0" high="11" type="ufixed" radix="8"/> + <bitfield name="MIN_LOD_CLAMP" low="0" high="11" type="ufixed" radix="8" variants="DESC_SINGLE_PLANE DESC_MULTI_PLANE"/> <!-- pitch for plane 2 / plane 3 --> - <bitfield name="PLANE_PITCH" low="8" high="31" type="uint"/> + <bitfield name="PLANE_PITCH" low="8" high="31" type="uint" variants="DESC_MULTI_PLANE"/> + + <!-- QCOM_image_filtering sample weights descriptor fields, overlapping the others. --> + <bitfield name="LOG2_PHASES" low="0" high="2" type="uint" variants="DESC_WEIGHT"/> + <bitfield name="DILATION" low="8" high="11" type="uint" variants="DESC_WEIGHT"/> </reg32> <!-- 7/8 is plane 2 address for planar formats --> <reg32 offset="7" name="7"> - <bitfield name="FLAG_LO" low="5" high="31" shr="5"/> + <bitfield name="FLAG_LO" low="5" high="31" shr="5" variants="DESC_SINGLE_PLANE"/> + <bitfield name="BASE_U_LO" low="5" high="31" shr="5" variants="DESC_MULTI_PLANE"/> </reg32> <reg32 offset="8" name="8"> - <bitfield name="FLAG_HI" low="0" high="16"/> + <bitfield name="FLAG_HI" low="0" high="16" variants="DESC_SINGLE_PLANE"/> + <bitfield name="BASE_U_HI" low="0" high="16" variants="DESC_MULTI_PLANE"/> + <bitfield name="FILTER_SIZE_X" low="17" high="23" variants="DESC_WEIGHT"/> + <bitfield name="FILTER_SIZE_Y" low="24" high="30" variants="DESC_WEIGHT"/> </reg32> <!-- 9/10 is plane 3 address for planar formats --> <reg32 offset="9" name="9"> - <bitfield name="FLAG_BUFFER_ARRAY_PITCH" low="0" high="16" shr="4" type="uint"/> + <bitfield name="FLAG_BUFFER_ARRAY_PITCH" low="0" high="16" shr="4" type="uint" variants="DESC_SINGLE_PLANE"/> + <bitfield name="BASE_V_LO" low="5" high="31" shr="5" variants="DESC_MULTI_PLANE"/> </reg32> <reg32 offset="10" name="10"> - <bitfield name="FLAG_BUFFER_PITCH" low="0" high="6" shr="6" type="uint"/> + <bitfield name="FLAG_BUFFER_PITCH" low="0" high="6" shr="6" type="uint" variants="DESC_SINGLE_PLANE"/> <!-- log2 size of the first level, required for mipmapping --> - <bitfield name="FLAG_BUFFER_LOGW" low="8" high="11" type="uint"/> - <bitfield name="FLAG_BUFFER_LOGH" low="12" high="15" type="uint"/> + <bitfield name="FLAG_BUFFER_LOGW" low="8" high="11" type="uint" variants="DESC_SINGLE_PLANE"/> + <bitfield name="FLAG_BUFFER_LOGH" low="12" high="15" type="uint" variants="DESC_SINGLE_PLANE"/> + <bitfield name="BASE_V_HI" low="0" high="16" variants="DESC_MULTI_PLANE"/> + <bitfield name="FILTER_OFFSET_X" low="17" high="22" variants="DESC_WEIGHT"/> + <bitfield name="FILTER_OFFSET_Y" low="23" high="28" variants="DESC_WEIGHT"/> </reg32> <reg32 offset="11" name="11"/> <reg32 offset="12" name="12"/> diff --git a/drivers/gpu/drm/msm/registers/adreno/a6xx_enums.xml b/drivers/gpu/drm/msm/registers/adreno/a6xx_enums.xml index 81538831dc19..b44946f36fae 100644 --- a/drivers/gpu/drm/msm/registers/adreno/a6xx_enums.xml +++ b/drivers/gpu/drm/msm/registers/adreno/a6xx_enums.xml @@ -328,6 +328,9 @@ to upconvert to 32b float internally? <value value="0x2" name="R2D_SNORM8"/> <value value="0x1" name="R2D_UNORM8_SRGB"/> <value value="0x0" name="R2D_UNORM8"/> + + <!-- Not a hw enum, used internally in driver --> + <value value="0xff" name="R2D_NONE"/> </enum> <enum name="a6xx_tex_type"> diff --git a/drivers/gpu/drm/msm/registers/adreno/a6xx_perfcntrs.json b/drivers/gpu/drm/msm/registers/adreno/a6xx_perfcntrs.json new file mode 100644 index 000000000000..ec303e0b9f28 --- /dev/null +++ b/drivers/gpu/drm/msm/registers/adreno/a6xx_perfcntrs.json @@ -0,0 +1,112 @@ +{ + "chip": "A6XX", + "groups": [ + { + "name": "CP", + "num": 14, + "reserved": [ 0 ], + "select": "CP_PERFCTR_CP_SEL", + "counter": "RBBM_PERFCTR_CP", + "countable_type": "a6xx_cp_perfcounter_select" + }, + { + "name": "CCU", + "num": 5, + "select": "RB_PERFCTR_CCU_SEL", + "counter": "RBBM_PERFCTR_CCU", + "countable_type": "a6xx_ccu_perfcounter_select" + }, + { + "name": "TSE", + "num": 4, + "select": "GRAS_PERFCTR_TSE_SEL", + "counter": "RBBM_PERFCTR_TSE", + "countable_type": "a6xx_tse_perfcounter_select" + }, + { + "name": "RAS", + "num": 4, + "select": "GRAS_PERFCTR_RAS_SEL", + "counter": "RBBM_PERFCTR_RAS", + "countable_type": "a6xx_ras_perfcounter_select" + }, + { + "name": "LRZ", + "num": 4, + "select": "GRAS_PERFCTR_LRZ_SEL", + "counter": "RBBM_PERFCTR_LRZ", + "countable_type": "a6xx_lrz_perfcounter_select" + }, + { + "name": "CMP", + "num": 4, + "select": "RB_PERFCTR_CMP_SEL", + "counter": "RBBM_PERFCTR_CMP", + "countable_type": "a6xx_cmp_perfcounter_select" + }, + { + "name": "HLSQ", + "num": 6, + "select": "HLSQ_PERFCTR_HLSQ_SEL", + "counter": "RBBM_PERFCTR_HLSQ", + "countable_type": "a6xx_hlsq_perfcounter_select" + }, + { + "name": "PC", + "num": 8, + "select": "PC_PERFCTR_PC_SEL", + "counter": "RBBM_PERFCTR_PC", + "countable_type": "a6xx_pc_perfcounter_select" + }, + { + "name": "RB", + "num": 8, + "select": "RB_PERFCTR_RB_SEL", + "counter": "RBBM_PERFCTR_RB", + "countable_type": "a6xx_rb_perfcounter_select" + }, + { + "name": "SP", + "num": 24, + "reserved": [ 0 ], + "select": "SP_PERFCTR_SP_SEL", + "counter": "RBBM_PERFCTR_SP", + "countable_type": "a6xx_sp_perfcounter_select" + }, + { + "name": "TP", + "num": 12, + "select": "TPL1_PERFCTR_TP_SEL", + "counter": "RBBM_PERFCTR_TP", + "countable_type": "a6xx_tp_perfcounter_select" + }, + { + "name": "UCHE", + "num": 12, + "select": "UCHE_PERFCTR_UCHE_SEL", + "counter": "RBBM_PERFCTR_UCHE", + "countable_type": "a6xx_uche_perfcounter_select" + }, + { + "name": "VFD", + "num": 8, + "select": "VFD_PERFCTR_VFD_SEL", + "counter": "RBBM_PERFCTR_VFD", + "countable_type": "a6xx_vfd_perfcounter_select" + }, + { + "name": "VPC", + "num": 6, + "select": "VPC_PERFCTR_VPC_SEL", + "counter": "RBBM_PERFCTR_VPC", + "countable_type": "a6xx_vpc_perfcounter_select" + }, + { + "name": "VSC", + "num": 2, + "select": "VSC_PERFCTR_VSC_SEL", + "counter": "RBBM_PERFCTR_VSC", + "countable_type": "a6xx_vsc_perfcounter_select" + } + ] +} diff --git a/drivers/gpu/drm/msm/registers/adreno/a7xx_perfcntrs.json b/drivers/gpu/drm/msm/registers/adreno/a7xx_perfcntrs.json new file mode 100644 index 000000000000..e60aab1862ec --- /dev/null +++ b/drivers/gpu/drm/msm/registers/adreno/a7xx_perfcntrs.json @@ -0,0 +1,228 @@ +{ + "chip": "A7XX", + "groups": [ + { + "name": "CP", + "num": 14, + "reserved": [ 0 ], + "select": "CP_PERFCTR_CP_SEL", + "counter": "RBBM_PERFCTR_CP", + "countable_type": "a7xx_cp_perfcounter_select" + }, + { + "name": "RBBM", + "num": 4, + "select": "RBBM_PERFCTR_RBBM_SEL", + "counter": "RBBM_PERFCTR_RBBM", + "countable_type": "a7xx_rbbm_perfcounter_select" + }, + { + "name": "PC", + "pipe": "BR", + "num": 8, + "select": "PC_PERFCTR_PC_SEL", + "counter": "RBBM_PERFCTR_PC", + "countable_type": "a7xx_pc_perfcounter_select" + }, + { + "name": "VFD", + "pipe": "BR", + "num": 8, + "select": "VFD_PERFCTR_VFD_SEL", + "counter": "RBBM_PERFCTR_VFD", + "countable_type": "a7xx_vfd_perfcounter_select" + }, + { + "name": "HLSQ", + "pipe": "BR", + "num": 6, + "select": "SP_PERFCTR_HLSQ_SEL", + "counter": "RBBM_PERFCTR_HLSQ", + "countable_type": "a7xx_hlsq_perfcounter_select" + }, + { + "name": "VPC", + "pipe": "BR", + "num": 6, + "select": "VPC_PERFCTR_VPC_SEL", + "counter": "RBBM_PERFCTR_VPC", + "countable_type": "a7xx_vpc_perfcounter_select" + }, + { + "name": "TSE", + "pipe": "BR", + "num": 4, + "select": "GRAS_PERFCTR_TSE_SEL", + "counter": "RBBM_PERFCTR_TSE", + "countable_type": "a7xx_tse_perfcounter_select" + }, + { + "name": "RAS", + "pipe": "BR", + "num": 4, + "select": "GRAS_PERFCTR_RAS_SEL", + "counter": "RBBM_PERFCTR_RAS", + "countable_type": "a7xx_ras_perfcounter_select" + }, + { + "name": "UCHE", + "num": 12, + "select": "UCHE_PERFCTR_UCHE_SEL", + "counter": "RBBM_PERFCTR_UCHE", + "countable_type": "a7xx_uche_perfcounter_select" + }, + { + "name": "TP", + "pipe": "BR", + "num": 12, + "select": "TPL1_PERFCTR_TP_SEL", + "counter": "RBBM_PERFCTR_TP", + "countable_type": "a7xx_tp_perfcounter_select" + }, + { + "name": "SP", + "pipe": "BR", + "num": 24, + "select": "SP_PERFCTR_SP_SEL", + "counter": "RBBM_PERFCTR_SP", + "countable_type": "a7xx_sp_perfcounter_select" + }, + { + "name": "RB", + "num": 8, + "select": "RB_PERFCTR_RB_SEL", + "counter": "RBBM_PERFCTR_RB", + "countable_type": "a7xx_rb_perfcounter_select" + }, + { + "name": "VSC", + "num": 2, + "select": "VSC_PERFCTR_VSC_SEL", + "counter": "RBBM_PERFCTR_VSC", + "countable_type": "a7xx_vsc_perfcounter_select" + }, + { + "name": "CCU", + "num": 5, + "select": "RB_PERFCTR_CCU_SEL", + "counter": "RBBM_PERFCTR_CCU", + "countable_type": "a7xx_ccu_perfcounter_select" + }, + { + "name": "LRZ", + "pipe": "BR", + "num": 4, + "select": "GRAS_PERFCTR_LRZ_SEL", + "counter": "RBBM_PERFCTR_LRZ", + "countable_type": "a7xx_lrz_perfcounter_select" + }, + { + "name": "CMP", + "num": 4, + "select": "RB_PERFCTR_CMP_SEL", + "counter": "RBBM_PERFCTR_CMP", + "countable_type": "a7xx_cmp_perfcounter_select" + }, + { + "name": "UFC", + "pipe": "BR", + "num": 4, + "select": "RB_PERFCTR_UFC_SEL", + "counter": "RBBM_PERFCTR_UFC", + "countable_type": "a7xx_ufc_perfcounter_select" + }, + { + "name": "BV_CP", + "num": 7, + "select": "CP_BV_PERFCTR_CP_SEL", + "counter": "RBBM_PERFCTR2_CP", + "countable_type": "a7xx_cp_perfcounter_select" + }, + { + "name": "BV_PC", + "pipe": "BV", + "num": 8, + "select_offset": 8, + "select": "PC_PERFCTR_PC_SEL", + "counter": "RBBM_PERFCTR_BV_PC", + "countable_type": "a7xx_pc_perfcounter_select" + }, + { + "name": "BV_VFD", + "pipe": "BV", + "num": 8, + "select_offset": 8, + "select": "VFD_PERFCTR_VFD_SEL", + "counter": "RBBM_PERFCTR_BV_VFD", + "countable_type": "a7xx_vfd_perfcounter_select" + }, + { + "name": "BV_VPC", + "pipe": "BV", + "num": 6, + "select_offset": 6, + "select": "VPC_PERFCTR_VPC_SEL", + "counter": "RBBM_PERFCTR_BV_VPC", + "countable_type": "a7xx_vpc_perfcounter_select" + }, + { + "name": "BV_TP", + "pipe": "BV", + "num": 6, + "select_offset": 12, + "select": "TPL1_PERFCTR_TP_SEL", + "counter": "RBBM_PERFCTR2_TP", + "countable_type": "a7xx_tp_perfcounter_select" + }, + { + "name": "BV_SP", + "pipe": "BV", + "num": 12, + "select_offset": 24, + "select": "SP_PERFCTR_SP_SEL", + "counter": "RBBM_PERFCTR2_SP", + "countable_type": "a7xx_sp_perfcounter_select" + }, + { + "name": "BV_UFC", + "pipe": "BV", + "num": 2, + "select_offset": 4, + "select": "RB_PERFCTR_UFC_SEL", + "counter": "RBBM_PERFCTR2_UFC", + "countable_type": "a7xx_ufc_perfcounter_select" + }, + { + "name": "BV_TSE", + "pipe": "BV", + "num": 4, + "select": "GRAS_PERFCTR_TSE_SEL", + "counter": "RBBM_PERFCTR_BV_TSE", + "countable_type": "a7xx_tse_perfcounter_select" + }, + { + "name": "BV_RAS", + "pipe": "BV", + "num": 4, + "select": "GRAS_PERFCTR_RAS_SEL", + "counter": "RBBM_PERFCTR_BV_RAS", + "countable_type": "a7xx_ras_perfcounter_select" + }, + { + "name": "BV_LRZ", + "pipe": "BV", + "num": 4, + "select": "GRAS_PERFCTR_LRZ_SEL", + "counter": "RBBM_PERFCTR_BV_LRZ", + "countable_type": "a7xx_lrz_perfcounter_select" + }, + { + "name": "BV_HLSQ", + "pipe": "BV", + "num": 6, + "select": "SP_PERFCTR_HLSQ_SEL", + "counter": "RBBM_PERFCTR2_HLSQ", + "countable_type": "a7xx_hlsq_perfcounter_select" + } + ] +} diff --git a/drivers/gpu/drm/msm/registers/adreno/a8xx_descriptors.xml b/drivers/gpu/drm/msm/registers/adreno/a8xx_descriptors.xml index edcbdb3b6921..d119d021446c 100644 --- a/drivers/gpu/drm/msm/registers/adreno/a8xx_descriptors.xml +++ b/drivers/gpu/drm/msm/registers/adreno/a8xx_descriptors.xml @@ -39,76 +39,92 @@ xsi:schemaLocation="https://gitlab.freedesktop.org/freedreno/ rules-fd.xsd"> <reg32 offset="3" name="3"/> </domain> -<domain name="A8XX_TEX_MEMOBJ" width="32" varset="chip"> +<domain name="A8XX_TEX_MEMOBJ" width="32" varset="desctype"> <doc>Texture memobj dwords</doc> <reg32 offset="0" name="0"> - <bitfield name="BASE_LO" low="6" high="31" shr="6"/> + <bitfield name="BASE_LO" low="6" high="31" shr="6" variants="DESC_SINGLE_PLANE DESC_MULTI_PLANE DESC_WEIGHT"/> + <doc>for type TEX_BUFFER</doc> + <bitfield name="INSTANCE_DESC_BASE_LO" low="0" high="31" variants="DESC_BUFFER"/> </reg32> <reg32 offset="1" name="1"> - <bitfield name="BASE_HI" low="0" high="16"/> + <bitfield name="BASE_HI" low="0" high="16" variants="DESC_SINGLE_PLANE DESC_MULTI_PLANE DESC_WEIGHT"/> + <bitfield name="INSTANCE_DESC_BASE_HI" low="0" high="16" variants="DESC_BUFFER"/> <bitfield name="TYPE" low="17" high="19" type="a6xx_tex_type"/> - <bitfield name="DEPTH" low="20" high="31" type="uint"/> + <bitfield name="DEPTH" low="20" high="31" type="uint" variants="DESC_SINGLE_PLANE DESC_WEIGHT"/> + <doc> for type TEX_BUFFER, probably for D3D structured UAVs, normally set to 1 </doc> + <bitfield name="STRUCTSIZETEXELS" low="20" high="31" type="uint" variants="DESC_BUFFER"/> </reg32> <reg32 offset="2" name="2"> - <bitfield name="WIDTH" low="0" high="14" type="uint"/> - <bitfield name="HEIGHT" low="15" high="29" type="uint"/> - <bitfield name="SAMPLES" low="30" high="31" type="a3xx_msaa_samples"/> + <bitfield name="WIDTH" low="0" high="14" type="uint" variants="DESC_SINGLE_PLANE DESC_MULTI_PLANE DESC_WEIGHT"/> + <bitfield name="HEIGHT" low="15" high="29" type="uint" variants="DESC_SINGLE_PLANE DESC_MULTI_PLANE DESC_WEIGHT"/> + <bitfield name="SAMPLES" low="30" high="31" type="a3xx_msaa_samples" variants="DESC_SINGLE_PLANE DESC_WEIGHT"/> + <bitfield name="NUM_ELEMENTS" low="0" high="31" type="uint" variants="DESC_BUFFER"/> </reg32> <reg32 offset="3" name="3"> <bitfield name="FMT" low="0" high="7" type="a6xx_format"/> <bitfield name="SWAP" low="8" high="9" type="a3xx_color_swap"/> - <bitfield name="SWIZ_X" low="10" high="12" type="a8xx_tex_swiz"/> - <bitfield name="SWIZ_Y" low="13" high="15" type="a8xx_tex_swiz"/> - <bitfield name="SWIZ_Z" low="16" high="18" type="a8xx_tex_swiz"/> - <bitfield name="SWIZ_W" low="19" high="21" type="a8xx_tex_swiz"/> + <bitfield name="SWIZ_X" low="10" high="12" type="a8xx_tex_swiz" variants="DESC_SINGLE_PLANE DESC_MULTI_PLANE DESC_BUFFER"/> + <bitfield name="SWIZ_Y" low="13" high="15" type="a8xx_tex_swiz" variants="DESC_SINGLE_PLANE DESC_MULTI_PLANE DESC_BUFFER"/> + <bitfield name="SWIZ_Z" low="16" high="18" type="a8xx_tex_swiz" variants="DESC_SINGLE_PLANE DESC_MULTI_PLANE DESC_BUFFER"/> + <bitfield name="SWIZ_W" low="19" high="21" type="a8xx_tex_swiz" variants="DESC_SINGLE_PLANE DESC_MULTI_PLANE DESC_BUFFER"/> </reg32> <reg32 offset="4" name="4"> - <bitfield name="TILE_MODE" low="0" high="1" type="a6xx_tile_mode"/> - <bitfield name="FLAG" pos="2" type="boolean"/> - <bitfield name="PRT_EN" pos="3" type="boolean"/> - <bitfield name="TILE_ALL" pos="4" type="boolean"/> + <bitfield name="TILE_MODE" low="0" high="1" type="a6xx_tile_mode" variants="DESC_SINGLE_PLANE DESC_MULTI_PLANE DESC_WEIGHT"/> + <bitfield name="FLAG" pos="2" type="boolean" variants="DESC_SINGLE_PLANE DESC_MULTI_PLANE DESC_WEIGHT"/> + <bitfield name="PRT_EN" pos="3" type="boolean" variants="DESC_SINGLE_PLANE DESC_WEIGHT"/> + <bitfield name="TILE_ALL" pos="4" type="boolean" variants="DESC_SINGLE_PLANE"/> <bitfield name="SRGB" pos="5" type="boolean"/> - <bitfield name="FLAG_LO" low="6" high="31" shr="6"/> + <bitfield name="FLAG_LO" low="6" high="31" shr="6" variants="DESC_SINGLE_PLANE"/> <!-- For multiplanar: --> - <bitfield name="BASE_U_LO" low="6" high="31" shr="6"/> + <bitfield name="BASE_U_LO" low="6" high="31" shr="6" variants="DESC_MULTI_PLANE"/> </reg32> <reg32 offset="5" name="5"> - <bitfield name="FLAG_HI" low="0" high="16"/> + <bitfield name="FLAG_HI" low="0" high="16" variants="DESC_SINGLE_PLANE"/> <!-- For multiplanar: --> - <bitfield name="BASE_U_HI" low="0" high="16"/> - <bitfield name="FLAG_BUFFER_PITCH" low="17" high="24" shr="6" type="uint"/> - <bitfield name="ALL_SAMPLES_CENTER" pos="29" type="boolean"/> - <bitfield name="MUTABLEEN" pos="31" type="boolean"/> + <bitfield name="BASE_U_HI" low="0" high="16" variants="DESC_MULTI_PLANE"/> + <bitfield name="FLAG_BUFFER_PITCH" low="17" high="24" shr="6" type="uint" variants="DESC_SINGLE_PLANE DESC_MULTI_PLANE"/> + <bitfield name="ALL_SAMPLES_CENTER" pos="29" type="boolean" variants="DESC_SINGLE_PLANE"/> + <bitfield name="MUTABLEEN" pos="31" type="boolean" variants="DESC_SINGLE_PLANE DESC_WEIGHT"/> + + <!-- QCOM_image_filtering sample weights descriptor fields, overlapping the others. --> + <bitfield name="FILTER_SIZE_X" low="0" high="6" type="uint" variants="DESC_WEIGHT"/> + <bitfield name="FILTER_SIZE_Y" low="7" high="13" type="uint" variants="DESC_WEIGHT"/> + <bitfield name="FILTER_OFFSET_X" low="19" high="24" type="uint" variants="DESC_WEIGHT"/> + <bitfield name="FILTER_OFFSET_Y" low="25" high="30" type="uint" variants="DESC_WEIGHT"/> </reg32> <reg32 offset="6" name="6"> - <bitfield name="TEX_LINE_OFFSET" low="0" high="23" type="uint"/> <!-- PITCH --> - <bitfield name="MIN_LINE_OFFSET" low="24" high="27" type="uint"/> <!-- PITCHALIGN --> - <bitfield name="MIPLVLS" low="28" high="31" type="uint"/> + <bitfield name="TEX_LINE_OFFSET" low="0" high="23" type="uint" variants="DESC_SINGLE_PLANE DESC_MULTI_PLANE DESC_WEIGHT"/> <!-- PITCH --> + <bitfield name="MIN_LINE_OFFSET" low="24" high="27" type="uint" variants="DESC_SINGLE_PLANE DESC_MULTI_PLANE DESC_WEIGHT"/> <!-- PITCHALIGN --> + <bitfield name="MIPLVLS" low="28" high="31" type="uint" variants="DESC_SINGLE_PLANE DESC_WEIGHT"/> </reg32> <reg32 offset="7" name="7"> - <bitfield name="ARRAY_SLICE_OFFSET" low="0" high="22" shr="12" type="uint"/> <!-- ARRAY_PITCH --> - <bitfield name="ASO_UNIT" pos="23"/> <!-- 4KB or 32B ? --> - <bitfield name="MIN_ARRAY_SLIZE_OFFSET" low="24" high="27" shr="12"/> <!-- MIN_LAYERSZ --> - <bitfield name="GMEM_TILING_FALLBACK_EN" pos="28" type="boolean"/> - <bitfield name="CORNER_BASED_EN" pos="30" type="boolean"/> - <bitfield name="GMEM_FULL_SURF" pos="31" type="boolean"/> + <bitfield name="ARRAY_SLICE_OFFSET" low="0" high="22" shr="12" type="uint" variants="DESC_SINGLE_PLANE DESC_WEIGHT"/> <!-- ARRAY_PITCH --> + <bitfield name="ASO_UNIT" pos="23" variants="DESC_SINGLE_PLANE DESC_WEIGHT"/> <!-- 4KB or 32B ? --> + <bitfield name="MIN_ARRAY_SLIZE_OFFSET" low="24" high="27" shr="12" variants="DESC_SINGLE_PLANE"/> <!-- MIN_LAYERSZ --> + <bitfield name="GMEM_TILING_FALLBACK_EN" pos="28" type="boolean" variants="DESC_SINGLE_PLANE"/> + <bitfield name="CORNER_BASED_EN" pos="30" type="boolean" variants="DESC_SINGLE_PLANE DESC_MULTI_PLANE"/> + <bitfield name="GMEM_FULL_SURF" pos="31" type="boolean" variants="DESC_SINGLE_PLANE DESC_MULTI_PLANE DESC_WEIGHT"/> <!-- For multiplanar. This overlaps other single-planar fields: --> - <bitfield name="UV_OFFSET_H" low="24" high="25" type="ufixed" radix="2"/> <!-- CHROMA_MIDPOINT_X --> - <bitfield name="UV_OFFSET_V" low="26" high="27" type="ufixed" radix="2"/> <!-- CHROMA_MIDPOINT_Y --> + <bitfield name="UV_OFFSET_H" low="24" high="25" type="ufixed" radix="2" variants="DESC_MULTI_PLANE"/> <!-- CHROMA_MIDPOINT_X --> + <bitfield name="UV_OFFSET_V" low="26" high="27" type="ufixed" radix="2" variants="DESC_MULTI_PLANE"/> <!-- CHROMA_MIDPOINT_Y --> + + <!-- QCOM_image_filtering sample weights descriptor fields, overlapping the others. --> + <bitfield name="DILATION" low="24" high="27" type="uint" variants="DESC_WEIGHT"/> + <bitfield name="LOG2_PHASES" low="28" high="30" type="uint" variants="DESC_WEIGHT"/> </reg32> <reg32 offset="8" name="8"> - <bitfield name="FLAG_ARRAY_PITCH" low="0" high="14" shr="12" type="uint"/> <!-- FLAG_BUFFER_ARRAY_PITCH --> + <bitfield name="FLAG_ARRAY_PITCH" low="0" high="14" shr="12" type="uint" variants="DESC_SINGLE_PLANE"/> <!-- FLAG_BUFFER_ARRAY_PITCH --> <!-- log2 size of the first level, required for mipmapping --> - <bitfield name="FLAG_BUFFER_LOGW" low="24" high="27" type="uint"/> - <bitfield name="FLAG_BUFFER_LOGH" low="28" high="31" type="uint"/> + <bitfield name="FLAG_BUFFER_LOGW" low="24" high="27" type="uint" variants="DESC_SINGLE_PLANE"/> + <bitfield name="FLAG_BUFFER_LOGH" low="28" high="31" type="uint" variants="DESC_SINGLE_PLANE"/> <!-- For multiplanar. This overlaps other single-planar fields: --> - <bitfield name="BASE_V_LO" low="6" high="31" shr="6"/> + <bitfield name="BASE_V_LO" low="6" high="31" shr="6" variants="DESC_MULTI_PLANE"/> </reg32> <reg32 offset="9" name="9"> - <bitfield name="MIN_LOD_CLAMP" low="19" high="30" type="ufixed" radix="8"/> + <bitfield name="MIN_LOD_CLAMP" low="19" high="30" type="ufixed" radix="8" variants="DESC_SINGLE_PLANE"/> <!-- For multiplanar, this overlaps other fields: --> - <bitfield name="BASE_V_HI" low="0" high="16"/> - <bitfield name="UV_PITCH" low="17" high="26"/> <!-- PLANE_PITCH --> + <bitfield name="BASE_V_HI" low="0" high="16" variants="DESC_MULTI_PLANE"/> + <bitfield name="UV_PITCH" shr="6" low="17" high="26" variants="DESC_MULTI_PLANE"/> <!-- PLANE_PITCH --> </reg32> <reg32 offset="10" name="10"/> <reg32 offset="11" name="11"/> diff --git a/drivers/gpu/drm/msm/registers/adreno/a8xx_perfcntrs.json b/drivers/gpu/drm/msm/registers/adreno/a8xx_perfcntrs.json new file mode 100644 index 000000000000..b20d0db088e3 --- /dev/null +++ b/drivers/gpu/drm/msm/registers/adreno/a8xx_perfcntrs.json @@ -0,0 +1,241 @@ +{ + "chip": "A8XX", + "groups": [ + { + "name": "CP", + "num": 14, + "reserved": [ 0 ], + "select": "CP_PERFCTR_CP_SEL", + "counter": "RBBM_PERFCTR_CP", + "countable_type": "a8xx_cp_perfcounter_select" + }, + { + "name": "RBBM", + "num": 4, + "select": "RBBM_PERFCTR_RBBM_SEL", + "slice_select": [ "RBBM_SLICE_PERFCTR_RBBM_SEL" ], + "counter": "RBBM_PERFCTR_RBBM", + "countable_type": "a8xx_rbbm_perfcounter_select" + }, + { + "name": "PC", + "pipe": "BR", + "num": 8, + "select": "PC_PERFCTR_PC_SEL", + "slice_select": [ "PC_SLICE_PERFCTR_PC_SEL" ], + "counter": "RBBM_PERFCTR_PC", + "countable_type": "a8xx_pc_perfcounter_select" + }, + { + "name": "VFD", + "pipe": "BR", + "num": 8, + "select": "VFD_PERFCTR_VFD_SEL", + "counter": "RBBM_PERFCTR_VFD", + "countable_type": "a8xx_vfd_perfcounter_select" + }, + { + "name": "HLSQ", + "pipe": "BR", + "num": 6, + "select": "SP_PERFCTR_HLSQ_SEL", + "slice_select": [ "SP_PERFCTR_HLSQ_SEL_2" ], + "counter": "RBBM_PERFCTR_HLSQ", + "countable_type": "a8xx_hlsq_perfcounter_select" + }, + { + "name": "VPC", + "pipe": "BR", + "num": 6, + "select": "VPC_PERFCTR_VPC_SEL", + "slice_select": [ "VPC_PERFCTR_VPC_SEL_1", "VPC_PERFCTR_VPC_SEL_2" ], + "counter": "RBBM_PERFCTR_VPC", + "countable_type": "a8xx_vpc_perfcounter_select" + }, + { + "name": "TSE", + "pipe": "BR", + "num": 4, + "select": "GRAS_PERFCTR_TSE_SEL", + "slice_select": [ "GRAS_PERFCTR_TSEFE_SEL" ], + "counter": "RBBM_PERFCTR_TSE", + "countable_type": "a8xx_tse_perfcounter_select" + }, + { + "name": "RAS", + "pipe": "BR", + "num": 4, + "select": "GRAS_PERFCTR_RAS_SEL", + "counter": "RBBM_PERFCTR_RAS", + "countable_type": "a8xx_ras_perfcounter_select" + }, + { + "name": "UCHE", + "num": 24, + "select": "UCHE_PERFCTR_UCHE_SEL", + "counter": "RBBM_PERFCTR_UCHE", + "countable_type": "a8xx_uche_perfcounter_select" + }, + { + "name": "TP", + "pipe": "BR", + "num": 12, + "select": "TPL1_PERFCTR_TP_SEL", + "counter": "RBBM_PERFCTR_TP", + "countable_type": "a8xx_tp_perfcounter_select" + }, + { + "name": "SP", + "pipe": "BR", + "num": 24, + "select": "SP_PERFCTR_SP_SEL", + "counter": "RBBM_PERFCTR_SP", + "countable_type": "a8xx_sp_perfcounter_select" + }, + { + "name": "RB", + "pipe": "BR", + "num": 8, + "select": "RB_PERFCTR_RB_SEL", + "counter": "RBBM_PERFCTR_RB", + "countable_type": "a8xx_rb_perfcounter_select" + }, + { + "name": "VSC", + "num": 2, + "select": "VSC_PERFCTR_VSC_SEL", + "counter": "RBBM_PERFCTR_VSC", + "countable_type": "a8xx_vsc_perfcounter_select" + }, + { + "name": "CCU", + "pipe": "BR", + "num": 5, + "select": "RB_PERFCTR_CCU_SEL", + "counter": "RBBM_PERFCTR_CCU", + "countable_type": "a8xx_ccu_perfcounter_select" + }, + { + "name": "LRZ", + "pipe": "BR", + "num": 4, + "select": "GRAS_PERFCTR_LRZ_SEL", + "counter": "RBBM_PERFCTR_LRZ", + "countable_type": "a8xx_lrz_perfcounter_select" + }, + { + "name": "CMP", + "pipe": "BR", + "num": 4, + "select": "RB_PERFCTR_CMP_SEL", + "counter": "RBBM_PERFCTR_CMP", + "countable_type": "a8xx_cmp_perfcounter_select" + }, + { + "name": "UFC", + "pipe": "BR", + "num": 4, + "select": "RB_PERFCTR_UFC_SEL", + "counter": "RBBM_PERFCTR_UFC", + "countable_type": "a8xx_ufc_perfcounter_select" + }, + { + "name": "BV_CP", + "num": 7, + "select_offset": 14, + "select": "CP_PERFCTR_CP_SEL", + "counter": "RBBM_PERFCTR2_CP", + "countable_type": "a8xx_cp_perfcounter_select" + }, + { + "name": "BV_PC", + "pipe": "BV", + "num": 8, + "select_offset": 8, + "select": "PC_PERFCTR_PC_SEL", + "slice_select": [ "PC_SLICE_PERFCTR_PC_SEL" ], + "counter": "RBBM_PERFCTR_BV_PC", + "countable_type": "a8xx_pc_perfcounter_select" + }, + { + "name": "BV_VFD", + "pipe": "BV", + "num": 8, + "select_offset": 8, + "select": "VFD_PERFCTR_VFD_SEL", + "counter": "RBBM_PERFCTR_BV_VFD", + "countable_type": "a8xx_vfd_perfcounter_select" + }, + { + "name": "BV_VPC", + "pipe": "BV", + "num": 6, + "select_offset": 6, + "select": "VPC_PERFCTR_VPC_SEL", + "slice_select": [ "VPC_PERFCTR_VPC_SEL_1", "VPC_PERFCTR_VPC_SEL_2" ], + "counter": "RBBM_PERFCTR_BV_VPC", + "countable_type": "a8xx_vpc_perfcounter_select" + }, + { + "name": "BV_TP", + "pipe": "BV", + "num": 8, + "select_offset": 12, + "select": "TPL1_PERFCTR_TP_SEL", + "counter": "RBBM_PERFCTR2_TP", + "countable_type": "a8xx_tp_perfcounter_select" + }, + { + "name": "BV_SP", + "pipe": "BV", + "num": 12, + "select_offset": 24, + "select": "SP_PERFCTR_SP_SEL", + "counter": "RBBM_PERFCTR2_SP", + "countable_type": "a8xx_sp_perfcounter_select" + }, + { + "name": "BV_UFC", + "pipe": "BV", + "num": 2, + "select_offset": 4, + "select": "RB_PERFCTR_UFC_SEL", + "counter": "RBBM_PERFCTR2_UFC", + "countable_type": "a8xx_ufc_perfcounter_select" + }, + { + "name": "BV_TSE", + "pipe": "BV", + "num": 4, + "select": "GRAS_PERFCTR_TSE_SEL", + "slice_select": [ "GRAS_PERFCTR_TSEFE_SEL" ], + "counter": "RBBM_PERFCTR_BV_TSE", + "countable_type": "a8xx_tse_perfcounter_select" + }, + { + "name": "BV_RAS", + "pipe": "BV", + "num": 4, + "select": "GRAS_PERFCTR_RAS_SEL", + "counter": "RBBM_PERFCTR_BV_RAS", + "countable_type": "a8xx_ras_perfcounter_select" + }, + { + "name": "BV_LRZ", + "pipe": "BV", + "num": 4, + "select": "GRAS_PERFCTR_LRZ_SEL", + "counter": "RBBM_PERFCTR_BV_LRZ", + "countable_type": "a8xx_lrz_perfcounter_select" + }, + { + "name": "BV_HLSQ", + "pipe": "BV", + "num": 6, + "select": "SP_PERFCTR_HLSQ_SEL", + "slice_select": [ "SP_PERFCTR_HLSQ_SEL_2" ], + "counter": "RBBM_PERFCTR2_HLSQ", + "countable_type": "a8xx_hlsq_perfcounter_select" + } + ] +} diff --git a/drivers/gpu/drm/msm/registers/adreno/a8xx_perfcntrs.xml b/drivers/gpu/drm/msm/registers/adreno/a8xx_perfcntrs.xml new file mode 100644 index 000000000000..a5bb44f76956 --- /dev/null +++ b/drivers/gpu/drm/msm/registers/adreno/a8xx_perfcntrs.xml @@ -0,0 +1,1929 @@ +<?xml version="1.0" encoding="UTF-8"?> +<database xmlns="http://nouveau.freedesktop.org/" +xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" +xsi:schemaLocation="https://gitlab.freedesktop.org/freedreno/ rules-fd.xsd"> +<import file="freedreno_copyright.xml"/> +<import file="adreno/adreno_common.xml"/> +<import file="adreno/adreno_pm4.xml"/> + +<enum name="a8xx_cp_perfcounter_select"> + <value value="0" name="A8XX_PERF_CP_NEVER_COUNT"/> + <value value="1" name="A8XX_PERF_CP_ALWAYS_COUNT"/> + <value value="2" name="A8XX_PERF_CP_BUSY_GFX_CORE_IDLE"/> + <value value="3" name="A8XX_PERF_CP_BUSY_CYCLES"/> + <value value="4" name="A8XX_PERF_CP_NUM_PREEMPTIONS"/> + <value value="5" name="A8XX_PERF_CP_PREEMPTION_REACTION_DELAY"/> + <value value="6" name="A8XX_PERF_CP_PREEMPTION_SWITCH_OUT_TIME"/> + <value value="7" name="A8XX_PERF_CP_PREEMPTION_SWITCH_IN_TIME"/> + <value value="8" name="A8XX_PERF_CP_DEAD_DRAWS_IN_BIN_RENDER"/> + <value value="9" name="A8XX_PERF_CP_PREDICATED_DRAWS_KILLED"/> + <value value="10" name="A8XX_PERF_CP_MODE_SWITCH"/> + <value value="11" name="A8XX_PERF_CP_ZPASS_DONE"/> + <value value="12" name="A8XX_PERF_CP_CONTEXT_DONE"/> + <value value="13" name="A8XX_PERF_CP_CACHE_FLUSH"/> + <value value="14" name="A8XX_PERF_CP_LONG_PREEMPTIONS"/> + <value value="15" name="A8XX_PERF_CP_SQE_I_CACHE_STARVE"/> + <value value="16" name="A8XX_PERF_CP_SQE_IDLE"/> + <value value="17" name="A8XX_PERF_CP_SQE_PM4_STARVE_RB"/> + <value value="18" name="A8XX_PERF_CP_SQE_PM4_STARVE_IB1"/> + <value value="19" name="A8XX_PERF_CP_SQE_PM4_STARVE_IB2"/> + <value value="20" name="A8XX_PERF_CP_SQE_PM4_STARVE_IB3"/> + <value value="21" name="A8XX_PERF_CP_SQE_PM4_STARVE_FSDT"/> + <value value="22" name="A8XX_PERF_CP_SQE_PM4_STARVE_SDS"/> + <value value="23" name="A8XX_PERF_CP_SQE_MRB_STARVE"/> + <value value="24" name="A8XX_PERF_CP_SQE_RRB_STARVE"/> + <value value="25" name="A8XX_PERF_CP_SQE_VSD_STARVE"/> + <value value="26" name="A8XX_PERF_CP_VSD_DECODE_STARVE"/> + <value value="27" name="A8XX_PERF_CP_SQE_PIPE_OUT_STALL"/> + <value value="28" name="A8XX_PERF_CP_SQE_SYNC_STALL"/> + <value value="29" name="A8XX_PERF_CP_SQE_PM4_WFI_STALL"/> + <value value="30" name="A8XX_PERF_CP_SQE_SYS_WFI_STALL"/> + <value value="31" name="A8XX_PERF_CP_WAIT_ON_OTHER_PIPE"/> + <value value="32" name="A8XX_PERF_CP_OUTPUT_BLOCKED"/> + <value value="33" name="A8XX_PERF_CP_SQE_T4_EXEC"/> + <value value="34" name="A8XX_PERF_CP_SQE_LOAD_STATE_EXEC"/> + <value value="35" name="A8XX_PERF_CP_SQE_SAVE_SDS_STATE"/> + <value value="36" name="A8XX_PERF_CP_SQE_DRAW_EXEC"/> + <value value="37" name="A8XX_PERF_CP_SQE_CTXT_REG_BUNCH_EXEC"/> + <value value="38" name="A8XX_PERF_CP_SQE_EXEC_PROFILED"/> + <value value="39" name="A8XX_PERF_CP_MEMORY_POOL_EMPTY"/> + <value value="40" name="A8XX_PERF_CP_MEMORY_POOL_SYNC_STALL"/> + <value value="41" name="A8XX_PERF_CP_MEMORY_POOL_ABOVE_THRESH"/> + <value value="42" name="A8XX_PERF_CP_MEMORY_POOL_BELOW_THRESH"/> + <value value="43" name="A8XX_PERF_CP_AHB_WR_STALL_PRE_DRAWS"/> + <value value="44" name="A8XX_PERF_CP_AHB_STALL_SQE_GMU"/> + <value value="45" name="A8XX_PERF_CP_AHB_STALL_SQE_WR_OTHER"/> + <value value="46" name="A8XX_PERF_CP_AHB_STALL_SQE_RD_OTHER"/> + <value value="47" name="A8XX_PERF_CP_CLUSTER_FE_U_EMPTY"/> + <value value="48" name="A8XX_PERF_CP_CLUSTER_FE_S_EMPTY"/> + <value value="49" name="A8XX_PERF_CP_CLUSTER_SP_VS_EMPTY"/> + <value value="50" name="A8XX_PERF_CP_CLUSTER_VPC_US_EMPTY"/> + <value value="51" name="A8XX_PERF_CP_CLUSTER_VPC_VS_EMPTY"/> + <value value="52" name="A8XX_PERF_CP_CLUSTER_GRAS_EMPTY"/> + <value value="53" name="A8XX_PERF_CP_CLUSTER_SP_PS_EMPTY"/> + <value value="54" name="A8XX_PERF_CP_CLUSTER_VPC_PS_EMPTY"/> + <value value="55" name="A8XX_PERF_CP_CLUSTER_PS_EMPTY"/> + <value value="56" name="A8XX_PERF_CP_PM4_DATA"/> + <value value="57" name="A8XX_PERF_CP_PM4_HEADERS"/> + <value value="58" name="A8XX_PERF_CP_VBIF_READ_BEATS"/> + <value value="59" name="A8XX_PERF_CP_VBIF_WRITE_BEATS"/> + <value value="60" name="A8XX_PERF_CP_SQE_INSTR_COUNTER"/> + <value value="61" name="A8XX_PERF_CP_CLUSTER_FE_US_FULL"/> + <value value="62" name="A8XX_PERF_CP_CLUSTER_FE_S_FULL"/> + <value value="63" name="A8XX_PERF_CP_CLUSTER_SP_VS_FULL"/> + <value value="64" name="A8XX_PERF_CP_CLUSTER_VPC_US_FULL"/> + <value value="65" name="A8XX_PERF_CP_CLUSTER_VPC_VS_FULL"/> + <value value="66" name="A8XX_PERF_CP_CLUSTER_GRAS_FULL"/> + <value value="67" name="A8XX_PERF_CP_CLUSTER_SP_PS_FULL"/> + <value value="68" name="A8XX_PERF_CP_CLUSTER_VPC_PS_FULL"/> + <value value="69" name="A8XX_PERF_CP_CLUSTER_PS_FULL"/> + <value value="70" name="A8XX_PERF_CP_ICACHE_MISSES"/> + <value value="71" name="A8XX_PERF_CP_ICACHE_HITS"/> + <value value="72" name="A8XX_PERF_CP_ICACHE_STALL"/> + <value value="73" name="A8XX_PERF_CP_DCACHE_MISSES"/> + <value value="74" name="A8XX_PERF_CP_DCACHE_HITS"/> + <value value="75" name="A8XX_PERF_CP_DCACHE_STALLS"/> + <value value="76" name="A8XX_PERF_CP_AQE_SQE_STALL"/> + <value value="77" name="A8XX_PERF_CP_SQE_AQE_STARVE"/> + <value value="78" name="A8XX_PERF_CP_ISR_CYCLES"/> + <value value="79" name="A8XX_PERF_CP_SQE_MD8_STALL_CYCLES"/> + <value value="80" name="A8XX_PERF_CP_SQE_MESH_EXEC_CYCLES"/> + <value value="81" name="A8XX_PERF_CP_AQE_NUM_AS_CHUNKS"/> + <value value="82" name="A8XX_PERF_CP_AQE_NUM_MS_CHUNKS"/> + <value value="83" name="A8XX_PERF_CP_S_SKEW_BUFFER_FULL"/> + <value value="84" name="A8XX_PERF_CP_S_SKEW_BUFFER_ABOVE_THRESH"/> + <value value="85" name="A8XX_PERF_CP_ECACHE_HITS"/> + <value value="86" name="A8XX_PERF_CP_ECACHE_KILLS"/> + <value value="87" name="A8XX_PERF_CP_ECACHE_REQS"/> + <value value="88" name="A8XX_PERF_CP_ECACHE_EVEN_REQS"/> + <value value="89" name="A8XX_PERF_CP_ECACHE_STALLS_RAP"/> + <value value="90" name="A8XX_PERF_CP_ECACHE_BUSY"/> +</enum> + +<enum name="a8xx_rbbm_perfcounter_select"> + <value value="0" name="A8XX_PERF_RBBM_NEVER_COUNT"/> + <value value="1" name="A8XX_PERF_RBBM_US_ALWAYS_COUNT"/> + <value value="2" name="A8XX_PERF_RBBM_US_ALWAYS_ON"/> + <value value="3" name="A8XX_PERF_RBBM_US_STATUS_MASKED"/> + <value value="4" name="A8XX_PERF_RBBM_US_PC_BUSY"/> + <value value="5" name="A8XX_PERF_RBBM_US_COM_BUSY"/> + <value value="6" name="A8XX_PERF_RBBM_US_DCOM_BUSY"/> + <value value="7" name="A8XX_PERF_RBBM_US_VBIF_BUSY"/> + <value value="8" name="A8XX_PERF_RBBM_US_VSC_BUSY"/> + <value value="9" name="A8XX_PERF_RBBM_US_UCHE_BUSY"/> + <value value="10" name="A8XX_PERF_RBBM_US_HLSQ_BUSY"/> + <value value="11" name="A8XX_PERF_RBBM_S_HLSQ_BUSY"/> + <value value="12" name="A8XX_PERF_RBBM_S_PC_BUSY"/> + <value value="13" name="A8XX_PERF_RBBM_S_TESS_BUSY"/> + <value value="14" name="A8XX_PERF_RBBM_S_TSEFE_BUSY"/> + <value value="15" name="A8XX_PERF_RBBM_S_TSEBE_BUSY"/> + <value value="16" name="A8XX_PERF_RBBM_S_RAS_BUSY"/> +</enum> + +<enum name="a8xx_pc_perfcounter_select"> + <value value="0" name="A8XX_PERF_PC_NEVER_COUNT"/> + <value value="1" name="A8XX_PERF_PC_US_BUSY_CYCLES"/> + <value value="2" name="A8XX_PERF_PC_US_WORKING_CYCLES"/> + <value value="3" name="A8XX_PERF_PC_US_UCHE_OUTSTANDING_TRANS"/> + <value value="4" name="A8XX_PERF_PC_US_PASS1_TF_STALL_CYCLES"/> + <value value="5" name="A8XX_PERF_PC_US_STARVE_CYCLES_FOR_INDEX"/> + <value value="6" name="A8XX_PERF_PC_US_STARVE_CYCLES_FOR_TF"/> + <value value="7" name="A8XX_PERF_PC_US_STARVE_CYCLES_FOR_VIZ_STREAM"/> + <value value="8" name="A8XX_PERF_PC_US_STARVE_CYCLES_DI"/> + <value value="9" name="A8XX_PERF_PC_US_VIS_STREAMS_LOADED"/> + <value value="10" name="A8XX_PERF_PC_US_INSTANCES"/> + <value value="11" name="A8XX_PERF_PC_US_DEAD_PRIM"/> + <value value="12" name="A8XX_PERF_PC_US_SLICE_LIVE_PRIM"/> + <value value="13" name="A8XX_PERF_PC_US_3D_DRAWCALLS"/> + <value value="14" name="A8XX_PERF_PC_US_2D_DRAWCALLS"/> + <value value="15" name="A8XX_PERF_PC_US_NON_DRAWCALL_GLOBAL_EVENTS"/> + <value value="16" name="A8XX_PERF_PC_US_MESH_DRAWS"/> + <value value="17" name="A8XX_PERF_PC_US_MESH_DEAD_DRAWS"/> + <value value="18" name="A8XX_PERF_PC_US_MESH_MVIS_EN_DRAWS"/> + <value value="19" name="A8XX_PERF_PC_US_MESH_DEAD_PRIM"/> + <value value="20" name="A8XX_PERF_PC_US_MESH_LIVE_PRIM"/> + <value value="21" name="A8XX_PERF_PC_US_MESH_PA_EN_PRIM"/> + <value value="22" name="A8XX_PERF_PC_US_STARVE_CYCLES_FOR_MVIS_STREAM"/> + <value value="23" name="A8XX_PERF_PC_US_STARVE_CYCLES_PREDRAW"/> + <value value="24" name="A8XX_PERF_PC_US_STALL_CYCLES_COMPUTE_GFX"/> + <value value="25" name="A8XX_PERF_PC_US_STALL_CYCLES_GFX_COMPUTE"/> + <value value="26" name="A8XX_PERF_PC_US_PREDRAW_STALLS"/> + <value value="27" name="A8XX_PERF_PC_US_DP0_INPUT_STALLS"/> + <value value="28" name="A8XX_PERF_PC_US_DP1_INPUT_STALLS"/> + <value value="29" name="A8XX_PERF_PC_US_BR_STALLS_BV_WORKLOAD"/> + <value value="30" name="A8XX_PERF_PC_US_BV_STALLS_BR_WORKLOAD"/> + <value value="31" name="A8XX_PERF_PC_US_PASSPAIR_STALL"/> + <value value="32" name="A8XX_PERF_PC_US_STALL_CYCLES_UCHE0"/> + <value value="33" name="A8XX_PERF_PC_US_STALL_CYCLES_UCHE1"/> + <value value="34" name="A8XX_PERF_PC_US_UCHE_0_TRANS"/> + <value value="35" name="A8XX_PERF_PC_US_UCHE_1_TRANS"/> + <value value="36" name="A8XX_PERF_PC_US_BV_STALLED_BY_ATTR"/> + <value value="37" name="A8XX_PERF_PC_US_BV_STARVED_BY_RARB"/> + <value value="38" name="A8XX_PERF_PC_US_VPC_PRIM_COUNT_STALLS_BR"/> + <value value="39" name="A8XX_PERF_PC_US_VPC_PRIM_COUNT_STALLS_BV"/> + <value value="40" name="A8XX_PERF_PC_US_BV_STALLED_BY_UCHE_FEEDBACK"/> + <value value="41" name="A8XX_PERF_PC_US_VSD_RARB_DVIZ_FULL"/> + <value value="42" name="A8XX_PERF_PC_US_VSD_RARB_PVIZ_FULL"/> + <value value="43" name="A8XX_PERF_PC_US_VSD_RARB_TVIZ_FULL"/> + <value value="44" name="A8XX_PERF_PC_US_DP0_RARB_FULL"/> + <value value="45" name="A8XX_PERF_PC_US_DP1_RARB_FULL"/> + <value value="46" name="A8XX_PERF_PC_US_DP0_LIVE_PRIM"/> + <value value="47" name="A8XX_PERF_PC_US_DP1_LIVE_PRIM"/> + <value value="48" name="A8XX_PERF_PC_US_BV2BR_SWITCH"/> + <value value="49" name="A8XX_PERF_PC_US_BR2BV_SWITCH"/> + <value value="50" name="A8XX_PERF_PC_US_STALL_CYCLES_PC_S"/> + <value value="51" name="A8XX_PERF_PC_RESERVED_51"/> + <value value="52" name="A8XX_PERF_PC_RESERVED_52"/> + <value value="53" name="A8XX_PERF_PC_RESERVED_53"/> + <value value="54" name="A8XX_PERF_PC_RESERVED_54"/> + <value value="55" name="A8XX_PERF_PC_RESERVED_55"/> + <value value="56" name="A8XX_PERF_PC_RESERVED_56"/> + <value value="57" name="A8XX_PERF_PC_RESERVED_57"/> + <value value="58" name="A8XX_PERF_PC_RESERVED_58"/> + <value value="59" name="A8XX_PERF_PC_RESERVED_59"/> + <value value="60" name="A8XX_PERF_PC_S_BUSY_CYCLES"/> + <value value="61" name="A8XX_PERF_PC_S_WORKING_CYCLES"/> + <value value="62" name="A8XX_PERF_PC_S_STALL_CYCLES_VFD"/> + <value value="63" name="A8XX_PERF_PC_S_STALL_CYCLES_VPC_FE"/> + <value value="64" name="A8XX_PERF_PC_S_STALL_CYCLES_TESS"/> + <value value="65" name="A8XX_PERF_PC_S_STALL_CYCLES_VFD_ONLY"/> + <value value="66" name="A8XX_PERF_PC_S_STALL_CYCLES_VPC_ONLY"/> + <value value="67" name="A8XX_PERF_PC_S_VPC_PRIMITIVES"/> + <value value="68" name="A8XX_PERF_PC_S_VERTEX_HITS"/> + <value value="69" name="A8XX_PERF_PC_S_IA_VERTICES"/> + <value value="70" name="A8XX_PERF_PC_S_IA_PRIMITIVES"/> + <value value="71" name="A8XX_PERF_PC_S_HS_INVOCATIONS"/> + <value value="72" name="A8XX_PERF_PC_S_DS_INVOCATIONS"/> + <value value="73" name="A8XX_PERF_PC_S_VS_INVOCATIONS"/> + <value value="74" name="A8XX_PERF_PC_S_GS_INVOCATIONS"/> + <value value="75" name="A8XX_PERF_PC_S_DS_PRIMITIVES"/> + <value value="76" name="A8XX_PERF_PC_S_TESS_BUSY_CYCLES"/> + <value value="77" name="A8XX_PERF_PC_S_TESS_WORKING_CYCLES"/> + <value value="78" name="A8XX_PERF_PC_S_TESS_STALL_CYCLES_PC"/> + <value value="79" name="A8XX_PERF_PC_S_TESS_STARVE_CYCLES_PC"/> + <value value="80" name="A8XX_PERF_PC_S_TESS_SETUP_ACTIVE"/> + <value value="81" name="A8XX_PERF_PC_S_TESS_PID_ACTIVE"/> + <value value="82" name="A8XX_PERF_PC_S_TESS_PRIM_GEN_ACTIVE"/> + <value value="83" name="A8XX_PERF_PC_S_TESS_FACTOR_TRANS"/> + <value value="84" name="A8XX_PERF_PC_S_TESS_PC_UV_TRANS"/> + <value value="85" name="A8XX_PERF_PC_S_TESS_PC_UV_PATCHES"/> + <value value="86" name="A8XX_PERF_PC_S_MESH_VS_WAVES"/> +</enum> + +<enum name="a8xx_vfd_perfcounter_select"> + <value value="0" name="A8XX_PERF_VFD_NEVER_COUNT"/> + <value value="1" name="A8XX_PERF_VFD_BUSY_CYCLES"/> + <value value="2" name="A8XX_PERF_VFD_STALL_CYCLES_UCHE"/> + <value value="3" name="A8XX_PERF_VFD_STALL_CYCLES_VPC_ALLOC"/> + <value value="4" name="A8XX_PERF_VFD_STALL_CYCLES_SP_INFO"/> + <value value="5" name="A8XX_PERF_VFD_STALL_CYCLES_SP_ATTR"/> + <value value="6" name="A8XX_PERF_VFD_STARVE_CYCLES_UCHE"/> + <value value="7" name="A8XX_PERF_VFD_RBUFFER_FULL"/> + <value value="8" name="A8XX_PERF_VFD_ATTR_INFO_FIFO_FULL"/> + <value value="9" name="A8XX_PERF_VFD_DECODED_ATTRIBUTE_BYTES"/> + <value value="10" name="A8XX_PERF_VFD_NUM_ATTRIBUTES"/> + <value value="11" name="A8XX_PERF_VFD_UPPER_SHADER_FIBERS"/> + <value value="12" name="A8XX_PERF_VFD_LOWER_SHADER_FIBERS"/> + <value value="13" name="A8XX_PERF_VFD_MODE_0_FIBERS"/> + <value value="14" name="A8XX_PERF_VFD_MODE_1_FIBERS"/> + <value value="15" name="A8XX_PERF_VFD_MODE_2_FIBERS"/> + <value value="16" name="A8XX_PERF_VFD_MODE_3_FIBERS"/> + <value value="17" name="A8XX_PERF_VFD_MODE_4_FIBERS"/> + <value value="18" name="A8XX_PERF_VFD_TOTAL_VERTICES"/> + <value value="19" name="A8XX_PERF_VFDP_STALL_CYCLES_VFD"/> + <value value="20" name="A8XX_PERF_VFDP_STALL_CYCLES_VFD_INDEX"/> + <value value="21" name="A8XX_PERF_VFDP_STALL_CYCLES_VFD_PROG"/> + <value value="22" name="A8XX_PERF_VFDP_STARVE_CYCLES_PC"/> + <value value="23" name="A8XX_PERF_VFDP_VS_STAGE_WAVES"/> + <value value="24" name="A8XX_PERF_VFD_STALL_CYCLES_PRG_END_FE"/> + <value value="25" name="A8XX_PERF_VFD_STALL_CYCLES_CBSYNC"/> + <value value="26" name="A8XX_PERF_VFD_BOTTLENECK_CYCLES"/> + <value value="27" name="A8XX_PERF_VFD_WORKING_CYCLES"/> +</enum> + +<enum name="a8xx_hlsq_perfcounter_select"> + <value value="0" name="A8XX_PERF_HLSQ_NEVER_COUNT"/> + <value value="1" name="A8XX_PERF_HLSQ_BUSY_CYCLES"/> + <value value="2" name="A8XX_PERF_HLSQ_STALL_CYCLES_SP_STATE"/> + <value value="3" name="A8XX_PERF_HLSQ_STALL_CYCLES_SP_FS_STAGE"/> + <value value="4" name="A8XX_PERF_HLSQ_UCHE_LATENCY_CYCLES"/> + <value value="5" name="A8XX_PERF_HLSQ_UCHE_LATENCY_COUNT"/> + <value value="6" name="A8XX_PERF_HLSQ_STALL_CYCLES_UCHE"/> + <value value="7" name="A8XX_PERF_HLSQ_RESERVED_7"/> + <value value="8" name="A8XX_PERF_HLSQ_RESERVED_8"/> + <value value="9" name="A8XX_PERF_HLSQ_RESERVED_9"/> + <value value="10" name="A8XX_PERF_HLSQ_COMPUTE_DRAWCALLS"/> + <value value="11" name="A8XX_PERF_HLSQ_FS_DATA_WAIT_PROGRAMMING"/> + <value value="12" name="A8XX_PERF_HLSQ_DUAL_FS_PROG_ACTIVE"/> + <value value="13" name="A8XX_PERF_HLSQ_DUAL_VS_PROG_ACTIVE"/> + <value value="14" name="A8XX_PERF_HLSQ_FS_BATCH_COUNT_ZERO"/> + <value value="15" name="A8XX_PERF_HLSQ_VS_BATCH_COUNT_ZERO"/> + <value value="16" name="A8XX_PERF_HLSQ_WAVE_PENDING_NO_QUAD"/> + <value value="17" name="A8XX_PERF_HLSQ_WAVE_PENDING_NO_PRIM_BASE"/> + <value value="18" name="A8XX_PERF_HLSQ_STALL_CYCLES_VPC_BE"/> + <value value="19" name="A8XX_PERF_HLSQ_RESERVED_19"/> + <value value="20" name="A8XX_PERF_HLSQ_RESERVED_20"/> + <value value="21" name="A8XX_PERF_HLSQ_VSBR_STALL_CYCLES"/> + <value value="22" name="A8XX_PERF_HLSQ_FS_STALL_CYCLES"/> + <value value="23" name="A8XX_PERF_HLSQ_LPAC_STALL_CYCLES"/> + <value value="24" name="A8XX_PERF_HLSQ_BV_STALL_CYCLES"/> + <value value="25" name="A8XX_PERF_HLSQ_VSBR_DEREF_CYCLES"/> + <value value="26" name="A8XX_PERF_HLSQ_FS_DEREF_CYCLES"/> + <value value="27" name="A8XX_PERF_HLSQ_LPAC_DEREF_CYCLES"/> + <value value="28" name="A8XX_PERF_HLSQ_BV_DEREF_CYCLES"/> + <value value="29" name="A8XX_PERF_HLSQ_VSBR_S2W_CYCLES"/> + <value value="30" name="A8XX_PERF_HLSQ_FS_S2W_CYCLES"/> + <value value="31" name="A8XX_PERF_HLSQ_LPAC_S2W_CYCLES"/> + <value value="32" name="A8XX_PERF_HLSQ_BV_S2W_CYCLES"/> + <value value="33" name="A8XX_PERF_HLSQ_VSBR_WAIT_FS_S2W"/> + <value value="34" name="A8XX_PERF_HLSQ_FS_WAIT_VS_S2W"/> + <value value="35" name="A8XX_PERF_HLSQ_LPAC_WAIT_VS_S2W"/> + <value value="36" name="A8XX_PERF_HLSQ_BV_WAIT_FS_S2W"/> + <value value="37" name="A8XX_PERF_HLSQ_RESERVED_37"/> + <value value="38" name="A8XX_PERF_HLSQ_FS_WAIT_SAME_VS_S2W"/> + <value value="39" name="A8XX_PERF_HLSQ_FS_STARVING_SP"/> + <value value="40" name="A8XX_PERF_HLSQ_VS_DATA_WAIT_PROGRAMMING"/> + <value value="41" name="A8XX_PERF_HLSQ_BV_DATA_WAIT_PROGRAMMING"/> + <value value="42" name="A8XX_PERF_HLSQ_STPROC_WAVE_CONTEXTS_VS"/> + <value value="43" name="A8XX_PERF_HLSQ_STPROC_WAVE_CONTEXT_CYCLES_VS"/> + <value value="44" name="A8XX_PERF_HLSQ_STPROC_WAVE_CONTEXTS_FS"/> + <value value="45" name="A8XX_PERF_HLSQ_STPROC_WAVE_CONTEXT_CYCLES_FS"/> + <value value="46" name="A8XX_PERF_HLSQ_STPROC_WAVE_CONTEXTS_BV"/> + <value value="47" name="A8XX_PERF_HLSQ_STPROC_WAVE_CONTEXT_CYCLES_BV"/> + <value value="48" name="A8XX_PERF_HLSQ_STPROC_WAVE_CONTEXTS_LPAC"/> + <value value="49" name="A8XX_PERF_HLSQ_STPROC_WAVE_CONTEXT_CYCLES_LPAC"/> + <value value="50" name="A8XX_PERF_HLSQ_SPTROC_STCHE_WARMUP_INC_VS"/> + <value value="51" name="A8XX_PERF_HLSQ_SPTROC_STCHE_WARMUP_INC_FS"/> + <value value="52" name="A8XX_PERF_HLSQ_SPTROC_STCHE_WARMUP_INC_BV"/> + <value value="53" name="A8XX_PERF_HLSQ_SPTROC_STCHE_WARMUP_INC_LPAC"/> + <value value="54" name="A8XX_PERF_HLSQ_SPTROC_STCHE_MISS_INC_VS"/> + <value value="55" name="A8XX_PERF_HLSQ_SPTROC_STCHE_MISS_INC_FS"/> + <value value="56" name="A8XX_PERF_HLSQ_SPTROC_STCHE_MISS_INC_BV"/> + <value value="57" name="A8XX_PERF_HLSQ_SPTROC_STCHE_MISS_INC_LPAC"/> + <value value="58" name="A8XX_PERF_HLSQ_VSBR_S2W_CYCLES_SP"/> + <value value="59" name="A8XX_PERF_HLSQ_FS_S2W_CYCLES_SP"/> + <value value="60" name="A8XX_PERF_HLSQ_LPAC_S2W_CYCLES_SP"/> + <value value="61" name="A8XX_PERF_HLSQ_BV_S2W_CYCLES_SP"/> + <value value="62" name="A8XX_PERF_HLSQ_L2STC_REQ_HLSQ"/> + <value value="63" name="A8XX_PERF_HLSQ_L2STC_REQ_HLSQ_HIT"/> + <value value="64" name="A8XX_PERF_HLSQ_L2STC_REQ_SP"/> + <value value="65" name="A8XX_PERF_HLSQ_L2STC_REQ_SP_HIT"/> + <value value="66" name="A8XX_PERF_HLSQ_L2STC_REQ_INS_HLSQ"/> + <value value="67" name="A8XX_PERF_HLSQ_L2STC_REQ_INS_HLSQ_HIT"/> + <value value="68" name="A8XX_PERF_HLSQ_L2STC_REQ_INS_SP"/> + <value value="69" name="A8XX_PERF_HLSQ_L2STC_REQ_INS_SP_HIT"/> + <value value="70" name="A8XX_PERF_HLSQ_L2STC_REQ_UCHE"/> + <value value="71" name="A8XX_PERF_HLSQ_L2STC_LATENCY_CYCLES"/> + <value value="72" name="A8XX_PERF_HLSQ_L2STC_LATENCY_COUNT"/> + <value value="73" name="A8XX_PERF_HLSQ_L2STC_STALL_SP_MISS_REQ"/> + <value value="74" name="A8XX_PERF_HLSQ_L2STC_BANK0_REPLACEMENT"/> + <value value="75" name="A8XX_PERF_HLSQ_L2STC_BANK1_REPLACEMENT"/> + <value value="76" name="A8XX_PERF_HLSQ_L2STC_BANK2_REPLACEMENT"/> + <value value="77" name="A8XX_PERF_HLSQ_L2STC_BANK3_REPLACEMENT"/> + <value value="78" name="A8XX_PERF_HLSQ_S2W_STALL_BY_MISS_RETURN"/> + <value value="79" name="A8XX_PERF_HLSQ_MISS_RETURN_STALL_BY_S2W"/> + <value value="80" name="A8XX_PERF_HLSQ_STPROC_L0_STALL_INS_RD"/> + <value value="81" name="A8XX_PERF_HLSQ_STPROC_L0_INS_MISS"/> + <value value="82" name="A8XX_PERF_HLSQ_STPROC_L0_INS_HIT"/> + <value value="83" name="A8XX_PERF_HLSQ_STPROC_L0_INS_LATENCY_COUNT"/> + <value value="84" name="A8XX_PERF_HLSQ_STPROC_L0_INS_LATENCY_CYCLE"/> + <value value="85" name="A8XX_PERF_HLSQ_STPROC_DPS_RUN_COUNT"/> + <value value="86" name="A8XX_PERF_HLSQ_STPROC_DPS_RUN_CYCLE"/> + <value value="87" name="A8XX_PERF_HLSQ_VSDP_BR_QUERY_REQ"/> + <value value="88" name="A8XX_PERF_HLSQ_VSDP_BV_QUERY_REQ"/> + <value value="89" name="A8XX_PERF_HLSQ_VSDP_BR_QUERY_REQ_WHEN_BV_PENDING"/> + <value value="90" name="A8XX_PERF_HLSQ_VSDP_BR_QUERY_BUSY"/> + <value value="91" name="A8XX_PERF_HLSQ_VSDP_BV_QUERY_BUSY"/> + <value value="92" name="A8XX_PERF_HLSQ_VSDP_BR_QUERY_FAIL"/> + <value value="93" name="A8XX_PERF_HLSQ_VSDP_BV_QUERY_FAIL"/> + <value value="94" name="A8XX_PERF_HLSQ_VS_CTXT_BUF_FULL_BLOCK_CPI"/> + <value value="95" name="A8XX_PERF_HLSQ_FS_CTXT_BUF_FULL_BLOCK_CPI"/> + <value value="96" name="A8XX_PERF_HLSQ_BV_CTXT_BUF_FULL_BLOCK_CPI"/> + <value value="97" name="A8XX_PERF_HLSQ_VS_CONST_BUF_FULL_BLOCK_CPI"/> + <value value="98" name="A8XX_PERF_HLSQ_FS_CONST_BUF_FULL_BLOCK_CPI"/> + <value value="99" name="A8XX_PERF_HLSQ_BV_CONST_BUF_FULL_BLOCK_CPI"/> + <value value="100" name="A8XX_PERF_HLSQ_VS_INS_BUF_FULL_BLOCK_CPI"/> + <value value="101" name="A8XX_PERF_HLSQ_FS_INS_BUF_FULL_BLOCK_CPI"/> + <value value="102" name="A8XX_PERF_HLSQ_BV_INS_BUF_FULL_BLOCK_CPI"/> + <value value="103" name="A8XX_PERF_HLSQ_VS_DES_BUF_FULL_BLOCK_CPI"/> + <value value="104" name="A8XX_PERF_HLSQ_FS_DES_BUF_FULL_BLOCK_CPI"/> + <value value="105" name="A8XX_PERF_HLSQ_BV_DES_BUF_FULL_BLOCK_CPI"/> + <value value="106" name="A8XX_PERF_HLSQ_PRIMITIVE_COUNT"/> + <value value="107" name="A8XX_PERF_HLSQ_LPAC2BV_SWITCH_CNT"/> + <value value="108" name="A8XX_PERF_HLSQ_BV2LPAC_SWITCH_CNT"/> + <value value="109" name="A8XX_PERF_HLSQ_LPAC2BV_SWITCH_CYC"/> + <value value="110" name="A8XX_PERF_HLSQ_BV2LPAC_SWITCH_CYC"/> + <value value="111" name="A8XX_PERF_HLSQ_VSDP_BV2BR_SWITCH_CYC"/> + <value value="112" name="A8XX_PERF_HLSQ_VS_SP_HCTX_ACTIVE_CNT"/> + <value value="113" name="A8XX_PERF_HLSQ_FS_SP_HCTX_ACTIVE_CNT"/> + <value value="114" name="A8XX_PERF_HLSQ_BV_SP_HCTX_ACTIVE_CNT"/> + <value value="115" name="A8XX_PERF_HLSQ_VS_SP_HCTX_ACTIVE_CYC"/> + <value value="116" name="A8XX_PERF_HLSQ_FS_SP_HCTX_ACTIVE_CYC"/> + <value value="117" name="A8XX_PERF_HLSQ_BV_SP_HCTX_ACTIVE_CYC"/> + <value value="118" name="A8XX_PERF_HLSQ_VS_SP_CCTX_ACTIVE_CNT"/> + <value value="119" name="A8XX_PERF_HLSQ_FS_SP_CCTX_ACTIVE_CNT"/> + <value value="120" name="A8XX_PERF_HLSQ_BV_SP_CCTX_ACTIVE_CNT"/> + <value value="121" name="A8XX_PERF_HLSQ_VS_SP_SCTX_ACTIVE_CNT"/> + <value value="122" name="A8XX_PERF_HLSQ_FS_SP_SCTX_ACTIVE_CNT"/> + <value value="123" name="A8XX_PERF_HLSQ_BV_SP_SCTX_ACTIVE_CNT"/> + <value value="124" name="A8XX_PERF_HLSQ_VS_SP_CTX_ALIVE_CYCLE"/> + <value value="125" name="A8XX_PERF_HLSQ_FS_SP_CTX_ALIVE_CYCLE"/> + <value value="126" name="A8XX_PERF_HLSQ_BV_SP_CTX_ALIVE_CYCLE"/> + <value value="127" name="A8XX_PERF_HLSQ_STPROC_WAVE_NUM_FS"/> + <value value="128" name="A8XX_PERF_HLSQ_STPROC_WAVE_NUM_VS"/> + <value value="129" name="A8XX_PERF_HLSQ_STPROC_WAVE_NUM_BV"/> + <value value="130" name="A8XX_PERF_HLSQ_STPROC_WAVE_NUM_LPAC"/> + <value value="131" name="A8XX_PERF_HLSQ_VS_SP_CCTX_NUM"/> + <value value="132" name="A8XX_PERF_HLSQ_FS_SP_CCTX_NUM"/> + <value value="133" name="A8XX_PERF_HLSQ_BV_SP_CCTX_NUM"/> + <value value="134" name="A8XX_PERF_HLSQ_VS_SP_SCTX_NUM"/> + <value value="135" name="A8XX_PERF_HLSQ_FS_SP_SCTX_NUM"/> + <value value="136" name="A8XX_PERF_HLSQ_BV_SP_SCTX_NUM"/> +</enum> + +<enum name="a8xx_vpc_perfcounter_select"> + <value value="0" name="A8XX_PERF_VPC_NEVER_COUNT"/> + <value value="1" name="A8XX_PERF_VPC_FE_BUSY_CYCLES"/> + <value value="2" name="A8XX_PERF_VPC_FE_WORKING_CYCLES"/> + <value value="3" name="A8XX_PERF_VPC_FE_STALL_CYCLES_VFD_WACK"/> + <value value="4" name="A8XX_PERF_VPC_FE_STARVE_CYCLES_SP"/> + <value value="5" name="A8XX_PERF_VPC_FE_PC_PRIMITIVES"/> + <value value="6" name="A8XX_PERF_VPC_FE_SP_COMPONENTS"/> + <value value="7" name="A8XX_PERF_VPC_FE_STALL_CYCLES_VPCRAM_POS"/> + <value value="8" name="A8XX_PERF_VPC_FE_VS_BUSY_CYCLES"/> + <value value="9" name="A8XX_PERF_VPC_FE_VS_WORKING_CYCLES"/> + <value value="10" name="A8XX_PERF_VPC_FE_NUM_VPCRAM_READ_POS"/> + <value value="11" name="A8XX_PERF_VPC_FE_WIT_FULL_CYCLES"/> + <value value="12" name="A8XX_PERF_VPC_FE_VPCRAM_FULL_CYCLES"/> + <value value="13" name="A8XX_PERF_VPC_FE_NUM_VPCRAM_WRITE"/> + <value value="14" name="A8XX_PERF_VPC_FE_STALL_CYCLES_TSE_FE"/> + <value value="15" name="A8XX_PERF_VPC_FE_STALL_CYCLES_VPC_US"/> + <value value="16" name="A8XX_PERF_VPC_FE_TSE_FE_PRIMITIVES"/> + <value value="17" name="A8XX_PERF_VPC_FE_GS_PRIMITIVES"/> + <value value="18" name="A8XX_PERF_VPC_FE_TSE_FE_TRANSACTIONS"/> + <value value="19" name="A8XX_PERF_VPC_FE_STALL_CYCLES_CCU"/> + <value value="20" name="A8XX_PERF_VPC_FE_NUM_WM_HIT"/> + <value value="21" name="A8XX_PERF_VPC_FE_STALL_DQ_WACK"/> + <value value="22" name="A8XX_PERF_VPC_FE_STALL_CYCLES_PRG_END_FE"/> + <value value="23" name="A8XX_PERF_VPC_FE_STALL_CYCLES_PRG_END_VPCVS"/> + <value value="24" name="A8XX_PERF_VPC_FE_POSRAM_FULL_CYCLES"/> + <value value="25" name="A8XX_PERF_VPC_FE_GMEM_NOP_FULL_CYCLES"/> + <value value="26" name="A8XX_PERF_VPC_FE_GMEM_POS_FULL_CYCLES"/> + <value value="27" name="A8XX_PERF_VPC_FE_BOTTLENECK"/> + <value value="28" name="A8XX_PERF_VPC_US_BUSY_CYCLES"/> + <value value="29" name="A8XX_PERF_VPC_US_WORKING_CYCLES"/> + <value value="30" name="A8XX_PERF_VPC_US_STARVE_CYCLES_TSE_FE"/> + <value value="31" name="A8XX_PERF_VPC_US_PTUS_FULL"/> + <value value="32" name="A8XX_PERF_VPC_US_COMP_INVIS_PRIM_COUNT"/> + <value value="33" name="A8XX_PERF_VPC_US_STALL_CYCLES_VSC"/> + <value value="34" name="A8XX_PERF_VPC_US_STALL_CYCLES_VPC_BE"/> + <value value="35" name="A8XX_PERF_VPC_US_STALL_CYCLES_UCHE"/> + <value value="36" name="A8XX_PERF_VPC_US_STREAMOUT_TRANSACTION"/> + <value value="37" name="A8XX_PERF_VPC_US_NUM_GMEM_READ_SO"/> + <value value="38" name="A8XX_PERF_VPC_US_STARVE_CYCLES_UCHE_RD"/> + <value value="39" name="A8XX_PERF_VPC_US_STALL_CYCLES_PRG_END_VPCUS"/> + <value value="40" name="A8XX_PERF_VPC_US_STARVE_CYCLES_REORDER"/> + <value value="41" name="A8XX_PERF_VPC_US_BOTTLENECK"/> + <value value="42" name="A8XX_PERF_VPC_RESERVED_42"/> + <value value="43" name="A8XX_PERF_VPC_RESERVED_43"/> + <value value="44" name="A8XX_PERF_VPC_RESERVED_44"/> + <value value="45" name="A8XX_PERF_VPC_BE_BUSY_CYCLES"/> + <value value="46" name="A8XX_PERF_VPC_BE_WORKING_CYCLES"/> + <value value="47" name="A8XX_PERF_VPC_BE_STALL_CYCLES_TSE_BE"/> + <value value="48" name="A8XX_PERF_VPC_BE_TSE_BE_PRIMITIVES"/> + <value value="49" name="A8XX_PERF_VPC_BE_TSE_BE_TRANSACTIONS"/> + <value value="50" name="A8XX_PERF_VPC_BE_STARVE_CYCLES_LRZ"/> + <value value="51" name="A8XX_PERF_VPC_BE_LRZ_ASSIGN_PRIMITIVES"/> + <value value="52" name="A8XX_PERF_VPC_BE_RB_VISIBLE_PRIMITIVES"/> + <value value="53" name="A8XX_PERF_VPC_BE_STARVE_CYCLES_RB"/> + <value value="54" name="A8XX_PERF_VPC_BE_STALL_CYCLES_HLSQ_PRIM_ALLOC"/> + <value value="55" name="A8XX_PERF_VPC_BE_STALL_CYCLES_SP_LM"/> + <value value="56" name="A8XX_PERF_VPC_BE_NUM_PA_REQ"/> + <value value="57" name="A8XX_PERF_VPC_BE_NUM_LM_REQ_HIT"/> + <value value="58" name="A8XX_PERF_VPC_BE_NUM_ATTR_REQ_LM"/> + <value value="59" name="A8XX_PERF_VPC_BE_LM_TRANSACTION"/> + <value value="60" name="A8XX_PERF_VPC_BE_PS_BUSY_CYCLES"/> + <value value="61" name="A8XX_PERF_VPC_BE_PS_WORKING_CYCLES"/> + <value value="62" name="A8XX_PERF_VPC_BE_STALL_CYCLES_CCHE"/> + <value value="63" name="A8XX_PERF_VPC_BE_STARVE_CYCLES_CCHE"/> + <value value="64" name="A8XX_PERF_VPC_BE_LM_FULL_WAIT_FOR_INTP_END"/> + <value value="65" name="A8XX_PERF_VPC_BE_CCHE_REQBUF_FULL"/> + <value value="66" name="A8XX_PERF_VPC_BE_CCHE_NUM_POS_REQ"/> + <value value="67" name="A8XX_PERF_VPC_BE_STALL_CYCLES_LM_ACK"/> + <value value="68" name="A8XX_PERF_VPC_BE_STALL_CYCLES_PRG_END_VPCPS"/> + <value value="69" name="A8XX_PERF_VPC_BE_POS_OVERFETCH_ATTR"/> + <value value="70" name="A8XX_PERF_VPC_BE_BOTTLENECK"/> +</enum> + +<enum name="a8xx_tse_perfcounter_select"> + <value value="0" name="A8XX_PERF_TSE_NEVER_COUNT"/> + <value value="1" name="A8XX_PERF_TSE_BE_BUSY_CYCLES"/> + <value value="2" name="A8XX_PERF_TSE_BE_CLIPPING_CYCLES"/> + <value value="3" name="A8XX_PERF_TSE_BE_STALL_CYCLES_RAS"/> + <value value="4" name="A8XX_PERF_TSE_BE_STALL_CYCLES_LRZ_BARYPLANE"/> + <value value="5" name="A8XX_PERF_TSE_BE_STALL_CYCLES_LRZ_ZPLANE"/> + <value value="6" name="A8XX_PERF_TSE_BE_STARVE_CYCLES_PC"/> + <value value="7" name="A8XX_PERF_TSE_BE_INPUT_PRIM"/> + <value value="8" name="A8XX_PERF_TSE_BE_INPUT_NULL_PRIM"/> + <value value="9" name="A8XX_PERF_TSE_BE_TRIVAL_REJ_PRIM"/> + <value value="10" name="A8XX_PERF_TSE_BE_CLIPPED_PRIM"/> + <value value="11" name="A8XX_PERF_TSE_BE_ZERO_AREA_PRIM"/> + <value value="12" name="A8XX_PERF_TSE_BE_FACENESS_CULLED_PRIM"/> + <value value="13" name="A8XX_PERF_TSE_BE_ZERO_PIXEL_PRIM"/> + <value value="14" name="A8XX_PERF_TSE_BE_OUTPUT_NULL_PRIM"/> + <value value="15" name="A8XX_PERF_TSE_BE_OUTPUT_VISIBLE_PRIM"/> + <value value="16" name="A8XX_PERF_TSE_BE_CINVOCATION"/> + <value value="17" name="A8XX_PERF_TSE_BE_CPRIMITIVES"/> + <value value="18" name="A8XX_PERF_TSE_BE_2D_INPUT_PRIM"/> + <value value="19" name="A8XX_PERF_TSE_BE_2D_ALIVE_CYCLES"/> + <value value="20" name="A8XX_PERF_TSE_BE_CLIP_PLANES"/> + <value value="21" name="A8XX_PERF_TSE_BE_EMPTY_BBOX_KILLED_PRIM"/> + <value value="22" name="A8XX_PERF_TSE_BE_ST1_VP_PARAMS_CACHE_MISS"/> + <value value="23" name="A8XX_PERF_TSE_BE_ST2_VPORT_VP_PARAMS_CACHE_MISS"/> + <value value="24" name="A8XX_PERF_TSE_BE_ST2_SCISSOR_VP_PARAMS_CACHE_MISS"/> + <value value="25" name="A8XX_PERF_TSE_BE_ILLEGAL_BOUNDING_BOX_PRIM"/> + <value value="26" name="A8XX_PERF_TSE_BE_VP_OUT_IS_NAN"/> + <value value="27" name="A8XX_PERF_TSE_BE_EXCLUDED_PRIM"/> + <value value="28" name="A8XX_PERF_TSE_BE_EARLY_CULL_CLIPPED_PRIM"/> + <value value="29" name="A8XX_PERF_TSE_BE_BR_STALLS_DUETO_BV_CLIP"/> + <value value="30" name="A8XX_PERF_TSE_BE_BR_STALLS_DUETO_BV_POLY"/> + <value value="31" name="A8XX_PERF_TSE_BE_BV_STALLS_DUETO_BR_CLIP"/> + <value value="32" name="A8XX_PERF_TSE_BE_BV_STALLS_DUETO_BR_POLY"/> + <value value="33" name="A8XX_PERF_TSE_BE_BV_STALLS_DUETO_BR"/> + <value value="34" name="A8XX_PERF_TSE_FE_BUSY_CYCLES"/> + <value value="35" name="A8XX_PERF_TSE_FE_STALL_CYCLES_VPC_US"/> + <value value="36" name="A8XX_PERF_TSE_FE_STARVE_CYCLES_PC"/> + <value value="37" name="A8XX_PERF_TSE_FE_INPUT_PRIM"/> + <value value="38" name="A8XX_PERF_TSE_FE_INPUT_NULL_PRIM"/> + <value value="39" name="A8XX_PERF_TSE_FE_TRIVAL_REJ_PRIM"/> + <value value="40" name="A8XX_PERF_TSE_FE_ZERO_AREA_PRIM"/> + <value value="41" name="A8XX_PERF_TSE_FE_FACENESS_CULLED_PRIM"/> + <value value="42" name="A8XX_PERF_TSE_FE_ZERO_PIXEL_PRIM"/> + <value value="43" name="A8XX_PERF_TSE_FE_OUTPUT_NULL_PRIM"/> + <value value="44" name="A8XX_PERF_TSE_FE_OUTPUT_VISIBLE_PRIM"/> + <value value="45" name="A8XX_PERF_TSE_FE_CINVOCATION"/> + <value value="46" name="A8XX_PERF_TSE_FE_CPRIMITIVES"/> + <value value="47" name="A8XX_PERF_TSE_FE_CLIP_PLANES"/> + <value value="48" name="A8XX_PERF_TSE_FE_EMPTY_BBOX_KILLED_PRIM"/> + <value value="49" name="A8XX_PERF_TSE_FE_ST1_VP_PARAMS_CACHE_MISS"/> + <value value="50" name="A8XX_PERF_TSE_FE_ST2_VPORT_VP_PARAMS_CACHE_MISS"/> + <value value="51" name="A8XX_PERF_TSE_FE_ST2_SCISSOR_VP_PARAMS_CACHE_MISS"/> + <value value="52" name="A8XX_PERF_TSE_FE_ILLEGAL_BOUNDING_BOX_PRIM"/> + <value value="53" name="A8XX_PERF_TSE_FE_VP_OUT_IS_NAN"/> + <value value="54" name="A8XX_PERF_TSE_FE_EXCLUDED_PRIM"/> + <value value="55" name="A8XX_PERF_TSE_FE_EARLY_CULL_CLIPPED_PRIM"/> + <value value="56" name="A8XX_PERF_TSE_FE_BR_STALLS_DUETO_BV_CLIP"/> + <value value="57" name="A8XX_PERF_TSE_FE_BR_STALLS_DUETO_BV_POLY"/> + <value value="58" name="A8XX_PERF_TSE_FE_BV_STALLS_DUETO_BR_CLIP"/> + <value value="59" name="A8XX_PERF_TSE_FE_BV_STALLS_DUETO_BR_POLY"/> + <value value="60" name="A8XX_PERF_TSE_FE_BV_STALLS_DUETO_BR"/> + <value value="61" name="A8XX_PERF_TSE_BE_STALL_CYCLES_LRZ_PRIM"/> + <value value="62" name="A8XX_PERF_TSE_FE_EARLY_BFCULL_GBCLIPPED_PRIM"/> + <value value="63" name="A8XX_PERF_TSE_REG_PROGRAMMING_WORKING_CYCLE_L1"/> + <value value="64" name="A8XX_PERF_TSE_REG_PROGRAMMING_ONLY_CYCLE_L0"/> + <value value="65" name="A8XX_PERF_TSE_COMMON_RAM_WORKING_CYCLE_L2"/> + <value value="66" name="A8XX_PERF_TSE_COMMON_RAM_RD_WORKING_CYCLE_L3"/> + <value value="67" name="A8XX_PERF_TSE_COMMON_RAM_WR_WORKING_CYCLE_L3"/> + <value value="68" name="A8XX_PERF_TSE_CLIP_CODE_WORKING_CYCLE_L1"/> + <value value="69" name="A8XX_PERF_TSE_CLIP_WORKING_CYCLE_L1"/> + <value value="70" name="A8XX_PERF_TSE_CLIP_CTRL_WORKING_CYCLE_L2"/> + <value value="71" name="A8XX_PERF_TSE_CLIP_VP_FMUL_WORKING_CYCLE_L2"/> + <value value="72" name="A8XX_PERF_TSE_CLIP_VP_FMUL_0_WORKING_CYCLE_L3"/> + <value value="73" name="A8XX_PERF_TSE_CLIP_VP_FMUL_1_WORKING_CYCLE_L3"/> + <value value="74" name="A8XX_PERF_TSE_CLIP_VP_FMUL_2_WORKING_CYCLE_L3"/> + <value value="75" name="A8XX_PERF_TSE_CLIP_VP_FMUL_3_WORKING_CYCLE_L3"/> + <value value="76" name="A8XX_PERF_TSE_CLIP_VP_FMUL_4_WORKING_CYCLE_L3"/> + <value value="77" name="A8XX_PERF_TSE_CLIP_VP_FMUL_5_WORKING_CYCLE_L3"/> + <value value="78" name="A8XX_PERF_TSE_CLIP_VP_FADD_WORKING_CYCLE_L2"/> + <value value="79" name="A8XX_PERF_TSE_CLIP_VP_FADD_0_WORKING_CYCLE_L3"/> + <value value="80" name="A8XX_PERF_TSE_CLIP_VP_FADD_1_WORKING_CYCLE_L3"/> + <value value="81" name="A8XX_PERF_TSE_CLIP_VP_FADD_2_WORKING_CYCLE_L3"/> + <value value="82" name="A8XX_PERF_TSE_CLIP_VP_RCP_WORKING_CYCLE_L2"/> + <value value="83" name="A8XX_PERF_TSE_VP_TRANSFORM_WORKING_CYCLE_L1"/> + <value value="84" name="A8XX_PERF_TSE_VP0_TRANSFORM_WORKING_CYCLE_L2"/> + <value value="85" name="A8XX_PERF_TSE_VP1_TRANSFORM_WORKING_CYCLE_L2"/> + <value value="86" name="A8XX_PERF_TSE_VP2_TRANSFORM_WORKING_CYCLE_L2"/> + <value value="87" name="A8XX_PERF_TSE_VERTEX_GEN_WORKING_CYCLE_L1"/> + <value value="88" name="A8XX_PERF_TSE_DET_WORKING_CYCLE_L1"/> + <value value="89" name="A8XX_PERF_TSE_BBOX_WORKING_CYCLE_L1"/> + <value value="90" name="A8XX_PERF_TSE_CULLING_WORKING_CYCLE_L1"/> + <value value="91" name="A8XX_PERF_TSE_SETUP_WORKING_CYCLE_L1"/> + <value value="92" name="A8XX_PERF_TSE_SETUP_CTRL_WORKING_CYCLE_L2"/> + <value value="93" name="A8XX_PERF_TSE_SETUP_FDP3_WORKING_CYCLE_L2"/> + <value value="94" name="A8XX_PERF_TSE_SETUP_FDP3_0_WORKING_CYCLE_L3"/> + <value value="95" name="A8XX_PERF_TSE_SETUP_FDP3_1_WORKING_CYCLE_L3"/> + <value value="96" name="A8XX_PERF_TSE_SETUP_FDP3_2_WORKING_CYCLE_L3"/> + <value value="97" name="A8XX_PERF_TSE_SETUP_FDP3_3_WORKING_CYCLE_L3"/> + <value value="98" name="A8XX_PERF_TSE_SETUP_FDP3_4_WORKING_CYCLE_L3"/> + <value value="99" name="A8XX_PERF_TSE_SETUP_FDP3_5_WORKING_CYCLE_L3"/> + <value value="100" name="A8XX_PERF_TSE_SETUP_FMUL_WORKING_CYCLE_L2"/> + <value value="101" name="A8XX_PERF_TSE_SETUP_FMUL_0_WORKING_CYCLE_L3"/> + <value value="102" name="A8XX_PERF_TSE_SETUP_FMUL_1_WORKING_CYCLE_L3"/> + <value value="103" name="A8XX_PERF_TSE_SETUP_FMUL_2_WORKING_CYCLE_L3"/> + <value value="104" name="A8XX_PERF_TSE_SETUP_FMUL_3_WORKING_CYCLE_L3"/> + <value value="105" name="A8XX_PERF_TSE_SETUP_FMUL_4_WORKING_CYCLE_L3"/> + <value value="106" name="A8XX_PERF_TSE_SETUP_FMUL_5_WORKING_CYCLE_L3"/> + <value value="107" name="A8XX_PERF_TSE_EDGE_SETUP_WORKING_CYCLE_L1"/> + <value value="108" name="A8XX_PERF_TSE_EDGE_SETUP_IMUL_WORKING_CYCLE_L2"/> + <value value="109" name="A8XX_PERF_TSE_RAS_INTF_WORKING_CYCLE_L1"/> + <value value="110" name="A8XX_PERF_TSE_LRZ_Z_INTF_WORKING_CYCLE_L1"/> + <value value="111" name="A8XX_PERF_TSE_LRZ_BARY_INTF_WORKING_CYCLE_L1"/> + <value value="112" name="A8XX_PERF_TSE_NON_CLIP_WORKING_CYCLES"/> + <value value="113" name="A8XX_PERF_TSE_NULL_WORKING_CYCLES"/> + <value value="114" name="A8XX_PERF_TSE_OUT_PRIM_WORKING_CYCLES"/> + <value value="115" name="A8XX_PERF_TSE_INPUT_VERTEX_WORKING_CYCLES"/> + <value value="116" name="A8XX_PERF_TSE_MVC_STALL_VPC"/> + <value value="117" name="A8XX_PERF_TSE_VPC_STARVE_TSE"/> + <value value="118" name="A8XX_PERF_TSE_WORKING_CYCLE_L0"/> +</enum> + +<enum name="a8xx_ras_perfcounter_select"> + <value value="0" name="A8XX_PERF_RAS_NEVER_COUNT"/> + <value value="1" name="A8XX_PERF_RAS_BUSY_CYCLES"/> + <value value="2" name="A8XX_PERF_RAS_SUPERTILE_ACTIVE_CYCLES"/> + <value value="3" name="A8XX_PERF_RAS_STALL_CYCLES_LRZ"/> + <value value="4" name="A8XX_PERF_RAS_STARVE_CYCLES_TSE"/> + <value value="5" name="A8XX_PERF_RAS_SUPER_TILES"/> + <value value="6" name="A8XX_PERF_RAS_8X4_TILES"/> + <value value="7" name="A8XX_PERF_RAS_MASKGEN_ACTIVE"/> + <value value="8" name="A8XX_PERF_RAS_FULLY_COVERED_SUPER_TILES"/> + <value value="9" name="A8XX_PERF_RAS_FULLY_COVERED_8X4_TILES"/> + <value value="10" name="A8XX_PERF_RAS_PRIM_KILLED_INVISILBE"/> + <value value="11" name="A8XX_PERF_RAS_SUPERTILE_GEN_ACTIVE_CYCLES"/> + <value value="12" name="A8XX_PERF_RAS_LRZ_INTF_WORKING_CYCLES"/> + <value value="13" name="A8XX_PERF_RAS_BLOCKS"/> + <value value="14" name="A8XX_PERF_RAS_FALSE_PARTIAL_STILE"/> + <value value="15" name="A8XX_PERF_RAS_SLICE_BLOCK_NONEMTPY"/> + <value value="16" name="A8XX_PERF_RAS_SLICE_BLOCK_EMPTY"/> + <value value="17" name="A8XX_PERF_RAS_SAMPLE_MASK_GEN_LANE_0_WORKING_CC_L2"/> + <value value="18" name="A8XX_PERF_RAS_SAMPLE_MASK_GEN_LANE_1_WORKING_CC_L2"/> + <value value="19" name="A8XX_PERF_RAS_SAMPLE_MASK_GEN_LANE_2_WORKING_CC_L2"/> + <value value="20" name="A8XX_PERF_RAS_SAMPLE_MASK_GEN_LANE_3_WORKING_CC_L2"/> + <value value="21" name="A8XX_PERF_RAS_SAMPLE_MASK_GEN_LANE_4_WORKING_CC_L2"/> + <value value="22" name="A8XX_PERF_RAS_SAMPLE_MASK_GEN_LANE_5_WORKING_CC_L2"/> + <value value="23" name="A8XX_PERF_RAS_SAMPLE_MASK_GEN_LANE_6_WORKING_CC_L2"/> + <value value="24" name="A8XX_PERF_RAS_SAMPLE_MASK_GEN_LANE_7_WORKING_CC_L2"/> + <value value="25" name="A8XX_PERF_RAS_SAMPLE_MASK_GEN_LANE_8_WORKING_CC_L2"/> + <value value="26" name="A8XX_PERF_RAS_SAMPLE_MASK_GEN_LANE_9_WORKING_CC_L2"/> + <value value="27" name="A8XX_PERF_RAS_SAMPLE_MASK_GEN_LANE_10_WORKING_CC_L2"/> + <value value="28" name="A8XX_PERF_RAS_SAMPLE_MASK_GEN_LANE_11_WORKING_CC_L2"/> + <value value="29" name="A8XX_PERF_RAS_SAMPLE_MASK_GEN_LANE_12_WORKING_CC_L2"/> + <value value="30" name="A8XX_PERF_RAS_SAMPLE_MASK_GEN_LANE_13_WORKING_CC_L2"/> + <value value="31" name="A8XX_PERF_RAS_SAMPLE_MASK_GEN_LANE_14_WORKING_CC_L2"/> + <value value="32" name="A8XX_PERF_RAS_SAMPLE_MASK_GEN_LANE_15_WORKING_CC_L2"/> + <value value="33" name="A8XX_PERF_RAS_EMPTY_SBLOCK_SCAN_CYCLE_PERFCOUNTER"/> + <value value="34" name="A8XX_PERF_RAS_STEGN_STALL_BY_LATENCY_FIFO"/> + <value value="35" name="A8XX_PERF_RAS_STALL_CYCLES_LRZ_PREFETCH"/> + <value value="36" name="A8XX_PERF_RAS_STGEN_BOTTLENECK_CYCLES"/> + <value value="37" name="A8XX_PERF_RAS_MTGEN_BOTTLENECK_CYCLES"/> + <value value="38" name="A8XX_PERF_RAS_TGEN_BOTTLENECK_CYCLES"/> +</enum> + +<enum name="a8xx_uche_perfcounter_select"> + <value value="0" name="A8XX_PERF_UCHE_NEVER_COUNT"/> + <value value="1" name="A8XX_PERF_UCHE_BUSY_CYCLES"/> + <value value="2" name="A8XX_PERF_UCHE_STALL_CYCLES_ARBITER"/> + <value value="3" name="A8XX_PERF_UCHE_VBIF_STALL_WRITE_DATA"/> + <value value="4" name="A8XX_PERF_UCHE_STARVED_CYCLES_VBIF_DECMP"/> + <value value="5" name="A8XX_PERF_UCHE_STALL_CYCLES_DECMP"/> + <value value="6" name="A8XX_PERF_UCHE_ARBITER_STALL_CYCLES_VBIF"/> + <value value="7" name="A8XX_PERF_UCHE_VBIF_LATENCY_CYCLES"/> + <value value="8" name="A8XX_PERF_UCHE_VBIF_LATENCY_SAMPLES"/> + <value value="9" name="A8XX_PERF_UCHE_DCMP_LATENCY_SAMPLES"/> + <value value="10" name="A8XX_PERF_UCHE_DCMP_LATENCY_CYCLES"/> + <value value="11" name="A8XX_PERF_UCHE_READ_REQUESTS_SP"/> + <value value="12" name="A8XX_PERF_UCHE_READ_REQUESTS_TP"/> + <value value="13" name="A8XX_PERF_UCHE_READ_REQUESTS_TP_UBWC"/> + <value value="14" name="A8XX_PERF_UCHE_READ_REQUESTS_TP_GBIF"/> + <value value="15" name="A8XX_PERF_UCHE_READ_REQUESTS_TP_GMEM"/> + <value value="16" name="A8XX_PERF_UCHE_READ_REQUESTS_VFD"/> + <value value="17" name="A8XX_PERF_UCHE_READ_REQUESTS_VPC"/> + <value value="18" name="A8XX_PERF_UCHE_READ_REQUESTS_HLSQ"/> + <value value="19" name="A8XX_PERF_UCHE_READ_REQUESTS_LRZ"/> + <value value="20" name="A8XX_PERF_UCHE_READ_REQUESTS_PC"/> + <value value="21" name="A8XX_PERF_UCHE_WRITE_REQUESTS_SP"/> + <value value="22" name="A8XX_PERF_UCHE_WRITE_REQUESTS_LRZ"/> + <value value="23" name="A8XX_PERF_UCHE_WRITE_REQUESTS_VPC"/> + <value value="24" name="A8XX_PERF_UCHE_WRITE_REQUESTS_VSC"/> + <value value="25" name="A8XX_PERF_UCHE_VBIF_READ_BEATS_SP"/> + <value value="26" name="A8XX_PERF_UCHE_VBIF_READ_BEATS_TP"/> + <value value="27" name="A8XX_PERF_UCHE_VBIF_READ_BEATS_VFD"/> + <value value="28" name="A8XX_PERF_UCHE_VBIF_READ_BEATS_VPC"/> + <value value="29" name="A8XX_PERF_UCHE_VBIF_READ_BEATS_HLSQ"/> + <value value="30" name="A8XX_PERF_UCHE_VBIF_READ_BEATS_LRZ"/> + <value value="31" name="A8XX_PERF_UCHE_VBIF_READ_BEATS_PC"/> + <value value="32" name="A8XX_PERF_UCHE_VBIF_READ_BEATS_CH0"/> + <value value="33" name="A8XX_PERF_UCHE_VBIF_READ_BEATS_CH1"/> + <value value="34" name="A8XX_PERF_UCHE_VBIF_WRITE_BEATS_CH0"/> + <value value="35" name="A8XX_PERF_UCHE_VBIF_WRITE_BEATS_CH1"/> + <value value="36" name="A8XX_PERF_UCHE_GMEM_READ_BEATS"/> + <value value="37" name="A8XX_PERF_UCHE_GMEM_WRITE_BEATS"/> + <value value="38" name="A8XX_PERF_UCHE_UBWC_READ_BEATS"/> + <value value="39" name="A8XX_PERF_UCHE_UBWC_WRITE_BEATS"/> + <value value="40" name="A8XX_PERF_UCHE_EVICTS"/> + <value value="41" name="A8XX_PERF_UCHE_BANK_REQ0"/> + <value value="42" name="A8XX_PERF_UCHE_BANK_REQ1"/> + <value value="43" name="A8XX_PERF_UCHE_BANK_REQ2"/> + <value value="44" name="A8XX_PERF_UCHE_BANK_REQ3"/> + <value value="45" name="A8XX_PERF_UCHE_BANK_REQ4"/> + <value value="46" name="A8XX_PERF_UCHE_BANK_REQ5"/> + <value value="47" name="A8XX_PERF_UCHE_BANK_REQ6"/> + <value value="48" name="A8XX_PERF_UCHE_BANK_REQ7"/> + <value value="49" name="A8XX_PERF_UCHE_TPH_REF_FULL"/> + <value value="50" name="A8XX_PERF_UCHE_TPH_VICTIM_FULL"/> + <value value="51" name="A8XX_PERF_UCHE_TPH_EXT_FULL"/> + <value value="52" name="A8XX_PERF_UCHE_RAM_READ_REQ"/> + <value value="53" name="A8XX_PERF_UCHE_RAM_WRITE_REQ"/> + <value value="54" name="A8XX_PERF_UCHE_LONG_LINE_ALL_EVICTS"/> + <value value="55" name="A8XX_PERF_UCHE_LONG_LINE_PARTIAL_EVICTS"/> + <value value="56" name="A8XX_PERF_UCHE_TPH_CONFLICT_CL_CCHE"/> + <value value="57" name="A8XX_PERF_UCHE_TPH_CONFLICT_CL_OTHER"/> + <value value="58" name="A8XX_PERF_UCHE_DBANK_CONFLICT_CL_CCHE"/> + <value value="59" name="A8XX_PERF_UCHE_DBANK_CONFLICT_CL_OTHER_CLIENTS"/> + <value value="60" name="A8XX_PERF_UCHE_CCHE_TPH_QUEUE_FULL"/> + <value value="61" name="A8XX_PERF_UCHE_CCHE_DPH_IO_QUEUE_FULL"/> + <value value="62" name="A8XX_PERF_UCHE_CCHE_DPH_CMDPOOL_FULL"/> + <value value="63" name="A8XX_PERF_UCHE_EVICTS_SP"/> + <value value="64" name="A8XX_PERF_UCHE_EVICTS_LRZ"/> + <value value="65" name="A8XX_PERF_UCHE_READ_REQUESTS_VPCUS"/> + <value value="66" name="A8XX_PERF_UCHE_READ_REQUESTS_VFD_BYPASS_BV"/> + <value value="67" name="A8XX_PERF_UCHE_READ_REQUESTS_VFD_BYPASS_BR"/> + <value value="68" name="A8XX_PERF_BYPC_FULL"/> + <value value="69" name="A8XX_PERF_BYPC_FULL_CCHE_STALL"/> + <value value="70" name="A8XX_PERF_BYPC_VHUB_STALL"/> + <value value="71" name="A8XX_PERF_BYPD_FULL"/> + <value value="72" name="A8XX_PERF_BYPD_FULL_GBIF_STALL"/> + <value value="73" name="A8XX_PERF_VHUB_PTABLE_FULL"/> + <value value="74" name="A8XX_PERF_DHUB_PTABLE_FULL"/> + <value value="75" name="A8XX_PERF_UCHE_RESERVED_75"/> + <value value="76" name="A8XX_PERF_UCHE_RESERVED_76"/> + <value value="77" name="A8XX_PERF_UCHE_RESERVED_77"/> + <value value="78" name="A8XX_PERF_UCHE_RESERVED_78"/> + <value value="79" name="A8XX_PERF_UCHE_RESERVED_79"/> + <value value="80" name="A8XX_PERF_UCHE_RESERVED_80"/> + <value value="81" name="A8XX_PERF_UCHE_RESERVED_81"/> + <value value="82" name="A8XX_PERF_UCHE_RESERVED_82"/> + <value value="83" name="A8XX_PERF_UCHE_RESERVED_83"/> + <value value="84" name="A8XX_PERF_UCHE_RESERVED_84"/> + <value value="85" name="A8XX_PERF_UCHE_RESERVED_85"/> + <value value="86" name="A8XX_PERF_UCHE_RESERVED_86"/> + <value value="87" name="A8XX_PERF_UCHE_RESERVED_87"/> + <value value="88" name="A8XX_PERF_UCHE_RESERVED_88"/> + <value value="89" name="A8XX_PERF_UCHE_RESERVED_89"/> + <value value="90" name="A8XX_PERF_UCHE_RESERVED_90"/> + <value value="91" name="A8XX_PERF_UCHE_RESERVED_91"/> + <value value="92" name="A8XX_PERF_UCHE_RESERVED_92"/> + <value value="93" name="A8XX_PERF_UCHE_RESERVED_93"/> + <value value="94" name="A8XX_PERF_UCHE_RESERVED_94"/> + <value value="95" name="A8XX_PERF_UCHE_RESERVED_95"/> + <value value="96" name="A8XX_PERF_UCHE_RESERVED_96"/> + <value value="97" name="A8XX_PERF_UCHE_RESERVED_97"/> + <value value="98" name="A8XX_PERF_UCHE_RESERVED_98"/> + <value value="99" name="A8XX_PERF_UCHE_RESERVED_99"/> + <value value="100" name="A8XX_PERF_UCHE_RESERVED_100"/> + <value value="101" name="A8XX_PERF_UCHE_RESERVED_101"/> + <value value="102" name="A8XX_PERF_UCHE_RESERVED_102"/> + <value value="103" name="A8XX_PERF_UCHE_RESERVED_103"/> + <value value="104" name="A8XX_PERF_UCHE_RESERVED_104"/> + <value value="105" name="A8XX_PERF_UCHE_RESERVED_105"/> + <value value="106" name="A8XX_PERF_UCHE_RESERVED_106"/> + <value value="107" name="A8XX_PERF_UCHE_RESERVED_107"/> + <value value="108" name="A8XX_PERF_UCHE_RESERVED_108"/> + <value value="109" name="A8XX_PERF_UCHE_RESERVED_109"/> + <value value="110" name="A8XX_PERF_UCHE_RESERVED_110"/> + <value value="111" name="A8XX_PERF_UCHE_RESERVED_111"/> + <value value="112" name="A8XX_PERF_UCHE_RESERVED_112"/> + <value value="113" name="A8XX_PERF_UCHE_RESERVED_113"/> + <value value="114" name="A8XX_PERF_UCHE_RESERVED_114"/> + <value value="115" name="A8XX_PERF_UCHE_RESERVED_115"/> + <value value="116" name="A8XX_PERF_UCHE_RESERVED_116"/> + <value value="117" name="A8XX_PERF_UCHE_RESERVED_117"/> + <value value="118" name="A8XX_PERF_UCHE_RESERVED_118"/> + <value value="119" name="A8XX_PERF_UCHE_RESERVED_119"/> + <value value="120" name="A8XX_PERF_UCHE_RESERVED_120"/> + <value value="121" name="A8XX_PERF_UCHE_RESERVED_121"/> + <value value="122" name="A8XX_PERF_UCHE_RESERVED_122"/> + <value value="123" name="A8XX_PERF_UCHE_RESERVED_123"/> + <value value="124" name="A8XX_PERF_UCHE_RESERVED_124"/> + <value value="125" name="A8XX_PERF_UCHE_RESERVED_125"/> + <value value="126" name="A8XX_PERF_UCHE_RESERVED_126"/> + <value value="127" name="A8XX_PERF_UCHE_RESERVED_127"/> + <value value="128" name="A8XX_PERF_CCHE_BUSY_CYCLES"/> + <value value="129" name="A8XX_PERF_CCHE_STALL_CYCLES_UCHE"/> + <value value="130" name="A8XX_PERF_CCHE_UCHE_STALL_WRITE_DATA"/> + <value value="131" name="A8XX_PERF_CCHE_UCHE_LATENCY_CYCLES"/> + <value value="132" name="A8XX_PERF_CCHE_UCHE_LATENCY_SAMPLES"/> + <value value="133" name="A8XX_PERF_CCHE_READ_REQUESTS_SP_TOTAL"/> + <value value="134" name="A8XX_PERF_CCHE_READ_REQUESTS_SP_UBWC"/> + <value value="135" name="A8XX_PERF_CCHE_READ_REQUESTS_SP_GBIF"/> + <value value="136" name="A8XX_PERF_CCHE_READ_REQUESTS_SP_GMEM"/> + <value value="137" name="A8XX_PERF_CCHE_READ_REQUESTS_TP_TOTAL"/> + <value value="138" name="A8XX_PERF_CCHE_READ_REQUESTS_TP_UBWC"/> + <value value="139" name="A8XX_PERF_CCHE_READ_REQUESTS_TP_GBIF"/> + <value value="140" name="A8XX_PERF_CCHE_READ_REQUESTS_TP_GMEM"/> + <value value="141" name="A8XX_PERF_CCHE_READ_REQUESTS_VFD_TOTAL"/> + <value value="142" name="A8XX_PERF_CCHE_READ_REQUEST_VFD_GMEM"/> + <value value="143" name="A8XX_PERF_CCHE_READ_REQUEST_VFD_GBIF"/> + <value value="144" name="A8XX_PERF_CCHE_READ_REQUESTS_LRZ"/> + <value value="145" name="A8XX_PERF_CCHE_READ_REQUESTS_VPC"/> + <value value="146" name="A8XX_PERF_CCHE_WRITE_REQUESTS_SP"/> + <value value="147" name="A8XX_PERF_CCHE_WRITE_REQUESTS_LRZ"/> + <value value="148" name="A8XX_PERF_CCHE_READ_REQUESTS_GMEM"/> + <value value="149" name="A8XX_PERF_CCHE_WRITE_REQUESTS_GMEM"/> + <value value="150" name="A8XX_PERF_CCHE_UCHE_READ_BEATS_TP"/> + <value value="151" name="A8XX_PERF_CCHE_UCHE_READ_BEATS_VFD"/> + <value value="152" name="A8XX_PERF_CCHE_UCHE_READ_BEATS_SP"/> + <value value="153" name="A8XX_PERF_CCHE_UCHE_READ_BEATS_VPC"/> + <value value="154" name="A8XX_PERF_CCHE_UCHE_READ_BEATS_LRZ"/> + <value value="155" name="A8XX_PERF_CCHE_UCHE_READ_BEATS_CH0"/> + <value value="156" name="A8XX_PERF_CCHE_UCHE_READ_BEATS_CH1"/> + <value value="157" name="A8XX_PERF_CCHE_GMEM_READ_BEATS_VPC"/> + <value value="158" name="A8XX_PERF_CCHE_GMEM_READ_BEATS_TP"/> + <value value="159" name="A8XX_PERF_CCHE_GMEM_READ_BEATS_SP"/> + <value value="160" name="A8XX_PERF_CCHE_GMEM_READ_BEATS_VFD"/> + <value value="161" name="A8XX_PERF_CCHE_BANK_REQ0"/> + <value value="162" name="A8XX_PERF_CCHE_BANK_REQ1"/> + <value value="163" name="A8XX_PERF_CCHE_BANK_REQ2"/> + <value value="164" name="A8XX_PERF_CCHE_BANK_REQ3"/> + <value value="165" name="A8XX_PERF_CCHE_BANK_REQ4"/> + <value value="166" name="A8XX_PERF_CCHE_BANK_REQ5"/> + <value value="167" name="A8XX_PERF_CCHE_BANK_REQ6"/> + <value value="168" name="A8XX_PERF_CCHE_BANK_REQ7"/> + <value value="169" name="A8XX_PERF_CCHE_BANK_REQ8"/> + <value value="170" name="A8XX_PERF_CCHE_BANK_REQ9"/> + <value value="171" name="A8XX_PERF_CCHE_BANK_REQ10"/> + <value value="172" name="A8XX_PERF_CCHE_BANK_REQ11"/> + <value value="173" name="A8XX_PERF_CCHE_BANK_REQ12"/> + <value value="174" name="A8XX_PERF_CCHE_BANK_REQ13"/> + <value value="175" name="A8XX_PERF_CCHE_BANK_REQ14"/> + <value value="176" name="A8XX_PERF_CCHE_BANK_REQ15"/> + <value value="177" name="A8XX_PERF_CCHE_GBANK_REQ0"/> + <value value="178" name="A8XX_PERF_CCHE_GBANK_REQ1"/> + <value value="179" name="A8XX_PERF_CCHE_GBANK_REQ2"/> + <value value="180" name="A8XX_PERF_CCHE_GBANK_REQ3"/> + <value value="181" name="A8XX_PERF_CCHE_TPH_REF_FULL"/> + <value value="182" name="A8XX_PERF_CCHE_TPH_VICTIM_FULL"/> + <value value="183" name="A8XX_PERF_CCHE_TPH_EXT_FULL"/> + <value value="184" name="A8XX_PERF_CCHE_RAM_READ_REQ"/> + <value value="185" name="A8XX_PERF_CCHE_RAM_WRITE_REQ"/> + <value value="186" name="A8XX_PERF_CCHE_TPH_CONFLICT_CL"/> + <value value="187" name="A8XX_PERF_CCHE_DBANK_CONFLICT"/> + <value value="188" name="A8XX_PERF_CCHE_TPH_QUEUE_FULL"/> + <value value="189" name="A8XX_PERF_CCHE_DPH_QUEUE_FULL"/> + <value value="190" name="A8XX_PERF_CCHE_OPH_QUEUE_FULL"/> + <value value="191" name="A8XX_PERF_CCHE_WACK_QUEUE_FULL"/> + <value value="192" name="A8XX_PERF_CCHE_GMEM0_LOCAL_RD_REQUEST"/> + <value value="193" name="A8XX_PERF_CCHE_GMEM0_LOCAL_WR_REQUEST"/> + <value value="194" name="A8XX_PERF_CCHE_GMEM1_LOCAL_RD_REQUEST"/> + <value value="195" name="A8XX_PERF_CCHE_GMEM1_LOCAL_WR_REQUEST"/> + <value value="196" name="A8XX_PERF_CCHE_GMEM0_REMOTE_RD_REQUEST"/> + <value value="197" name="A8XX_PERF_CCHE_GMEM0_REMOTE_WR_REQUEST"/> + <value value="198" name="A8XX_PERF_CCHE_GMEM1_REMOTE_RD_REQUEST"/> + <value value="199" name="A8XX_PERF_CCHE_GMEM1_REMOTE_WR_REQUEST"/> + <value value="200" name="A8XX_PERF_CCHE_STALL_CYCLES_TP"/> + <value value="201" name="A8XX_PERF_CCHE_ATOMIC_REQUESTS_SP_TOTAL"/> + <value value="202" name="A8XX_PERF_CCHE_ATOMIC_PACKING_BEAT_SP_TOTAL"/> +</enum> + +<enum name="a8xx_tp_perfcounter_select"> + <value value="0" name="A8XX_PERF_TP_NEVER_COUNT"/> + <value value="1" name="A8XX_PERF_TP_BUSY_CYCLES"/> + <value value="2" name="A8XX_PERF_TP_STALL_CYCLES_UCHE"/> + <value value="3" name="A8XX_PERF_TP_LATENCY_CYCLES"/> + <value value="4" name="A8XX_PERF_TP_LATENCY_TRANS"/> + <value value="5" name="A8XX_PERF_TP_FLAG_FIFO_DELAY_SAMPLES"/> + <value value="6" name="A8XX_PERF_TP_FLAG_FIFO_DELAY_CYCLES"/> + <value value="7" name="A8XX_PERF_TP_L1_CACHELINE_REQUESTS"/> + <value value="8" name="A8XX_PERF_TP_L1_CACHELINE_MISSES"/> + <value value="9" name="A8XX_PERF_TP_SP_TP_TRANS"/> + <value value="10" name="A8XX_PERF_TP_TP_SP_TRANS"/> + <value value="11" name="A8XX_PERF_TP_OUTPUT_PIXELS"/> + <value value="12" name="A8XX_PERF_TP_FILTER_WORKLOAD_16BIT"/> + <value value="13" name="A8XX_PERF_TP_FILTER_WORKLOAD_32BIT"/> + <value value="14" name="A8XX_PERF_TP_QUADS_RECEIVED"/> + <value value="15" name="A8XX_PERF_TP_QUADS_OFFSET"/> + <value value="16" name="A8XX_PERF_TP_QUADS_SHADOW"/> + <value value="17" name="A8XX_PERF_TP_QUADS_ARRAY"/> + <value value="18" name="A8XX_PERF_TP_QUADS_GRADIENT"/> + <value value="19" name="A8XX_PERF_TP_QUADS_1D"/> + <value value="20" name="A8XX_PERF_TP_QUADS_2D"/> + <value value="21" name="A8XX_PERF_TP_QUADS_BUFFER"/> + <value value="22" name="A8XX_PERF_TP_QUADS_3D"/> + <value value="23" name="A8XX_PERF_TP_QUADS_CUBE"/> + <value value="24" name="A8XX_PERF_TP_DIVERGENT_QUADS_RECEIVED"/> + <value value="25" name="A8XX_PERF_TP_PRT_NON_RESIDENT_EVENTS"/> + <value value="26" name="A8XX_PERF_TP_OUTPUT_PIXELS_POINT"/> + <value value="27" name="A8XX_PERF_TP_OUTPUT_PIXELS_BILINEAR"/> + <value value="28" name="A8XX_PERF_TP_OUTPUT_PIXELS_MIP"/> + <value value="29" name="A8XX_PERF_TP_OUTPUT_PIXELS_ANISO"/> + <value value="30" name="A8XX_PERF_TP_OUTPUT_PIXELS_ZERO_LOD"/> + <value value="31" name="A8XX_PERF_TP_FLAG_CACHE_REQUESTS"/> + <value value="32" name="A8XX_PERF_TP_FLAG_CACHE_MISSES"/> + <value value="33" name="A8XX_PERF_TP_L1_5_L2_REQUESTS"/> + <value value="34" name="A8XX_PERF_TP_2D_OUTPUT_PIXELS"/> + <value value="35" name="A8XX_PERF_TP_2D_OUTPUT_PIXELS_POINT"/> + <value value="36" name="A8XX_PERF_TP_2D_OUTPUT_PIXELS_BILINEAR"/> + <value value="37" name="A8XX_PERF_TP_2D_FILTER_WORKLOAD_16BIT"/> + <value value="38" name="A8XX_PERF_TP_2D_FILTER_WORKLOAD_32BIT"/> + <value value="39" name="A8XX_PERF_TP_TPA2TPC_TRANS"/> + <value value="40" name="A8XX_PERF_TP_L1_MISSES_ASTC_1TILE"/> + <value value="41" name="A8XX_PERF_TP_L1_MISSES_ASTC_2TILE"/> + <value value="42" name="A8XX_PERF_TP_L1_MISSES_ASTC_4TILE"/> + <value value="43" name="A8XX_PERF_TP_L1_5_COMPRESS_REQS"/> + <value value="44" name="A8XX_PERF_TP_L1_5_L2_COMPRESS_MISS"/> + <value value="45" name="A8XX_PERF_TP_L1_BANK_CONFLICT"/> + <value value="46" name="A8XX_PERF_TP_L1_5_MISS_LATENCY_CYCLES"/> + <value value="47" name="A8XX_PERF_TP_L1_5_MISS_LATENCY_TRANS"/> + <value value="48" name="A8XX_PERF_TP_QUADS_CONSTANT_MULTIPLIED"/> + <value value="49" name="A8XX_PERF_TP_FRONTEND_WORKING_CYCLES"/> + <value value="50" name="A8XX_PERF_TP_L1_TAG_WORKING_CYCLES"/> + <value value="51" name="A8XX_PERF_TP_L1_DATA_WRITE_WORKING_CYCLES"/> + <value value="52" name="A8XX_PERF_TP_PRE_L1_DECOM_WORKING_CYCLES"/> + <value value="53" name="A8XX_PERF_TP_BACKEND_WORKING_CYCLES"/> + <value value="54" name="A8XX_PERF_TP_L1_5_CACHE_WORKING_CYCLES"/> + <value value="55" name="A8XX_PERF_TP_STARVE_CYCLES_SP"/> + <value value="56" name="A8XX_PERF_TP_STARVE_CYCLES_UCHE"/> + <value value="57" name="A8XX_PERF_TP_STALL_CYCLES_UFC"/> + <value value="58" name="A8XX_PERF_TP_FORMAT_DECOMP_POINT"/> + <value value="59" name="A8XX_PERF_TP_FILTER_POINT_FP16"/> + <value value="60" name="A8XX_PERF_TP_FILTER_POINT_FP32"/> + <value value="61" name="A8XX_PERF_TP_LATENCY_FIFO_FULL"/> + <value value="62" name="A8XX_PERF_TP_RESERVED_62"/> + <value value="63" name="A8XX_PERF_TP_RESERVED_63"/> + <value value="64" name="A8XX_PERF_TP_RESERVED_64"/> + <value value="65" name="A8XX_PERF_TP_RESERVED_65"/> + <value value="66" name="A8XX_PERF_TP_RESERVED_66"/> + <value value="67" name="A8XX_PERF_TP_RESERVED_67"/> + <value value="68" name="A8XX_PERF_TP_RESERVED_68"/> + <value value="69" name="A8XX_PERF_TP_RESERVED_69"/> + <value value="70" name="A8XX_PERF_TP_RESERVED_70"/> + <value value="71" name="A8XX_PERF_TP_RESERVED_71"/> + <value value="72" name="A8XX_PERF_TP_RESERVED_72"/> + <value value="73" name="A8XX_PERF_TP_RESERVED_73"/> + <value value="74" name="A8XX_PERF_TP_RESERVED_74"/> + <value value="75" name="A8XX_PERF_TP_RESERVED_75"/> + <value value="76" name="A8XX_PERF_TP_RESERVED_76"/> + <value value="77" name="A8XX_PERF_TP_RESERVED_77"/> + <value value="78" name="A8XX_PERF_TP_RESERVED_78"/> + <value value="79" name="A8XX_PERF_TP_RESERVED_79"/> + <value value="80" name="A8XX_PERF_TP_RESERVED_80"/> + <value value="81" name="A8XX_PERF_TP_RESERVED_81"/> + <value value="82" name="A8XX_PERF_TP_RESERVED_82"/> + <value value="83" name="A8XX_PERF_TP_RESERVED_83"/> + <value value="84" name="A8XX_PERF_TP_RESERVED_84"/> + <value value="85" name="A8XX_PERF_TP_RESERVED_85"/> + <value value="86" name="A8XX_PERF_TP_RESERVED_86"/> + <value value="87" name="A8XX_PERF_TP_RESERVED_87"/> + <value value="88" name="A8XX_PERF_TP_RESERVED_88"/> + <value value="89" name="A8XX_PERF_TP_RESERVED_89"/> + <value value="90" name="A8XX_PERF_TP_RESERVED_90"/> + <value value="91" name="A8XX_PERF_TP_RESERVED_91"/> + <value value="92" name="A8XX_PERF_TP_RESERVED_92"/> + <value value="93" name="A8XX_PERF_TP_RESERVED_93"/> + <value value="94" name="A8XX_PERF_TP_RESERVED_94"/> + <value value="95" name="A8XX_PERF_TP_RESERVED_95"/> + <value value="96" name="A8XX_PERF_TP_RESERVED_96"/> + <value value="97" name="A8XX_PERF_TP_RESERVED_97"/> + <value value="98" name="A8XX_PERF_TP_RESERVED_98"/> + <value value="99" name="A8XX_PERF_TP_RESERVED_99"/> + <value value="100" name="A8XX_PERF_TP_RESERVED_100"/> + <value value="101" name="A8XX_PERF_TP_RESERVED_101"/> + <value value="102" name="A8XX_PERF_TP_RESERVED_102"/> + <value value="103" name="A8XX_PERF_TP_RESERVED_103"/> + <value value="104" name="A8XX_PERF_TP_RESERVED_104"/> + <value value="105" name="A8XX_PERF_TP_RESERVED_105"/> + <value value="106" name="A8XX_PERF_TP_RESERVED_106"/> + <value value="107" name="A8XX_PERF_TP_RESERVED_107"/> + <value value="108" name="A8XX_PERF_TP_RESERVED_108"/> + <value value="109" name="A8XX_PERF_TP_RESERVED_109"/> + <value value="110" name="A8XX_PERF_TP_RESERVED_110"/> + <value value="111" name="A8XX_PERF_TP_RESERVED_111"/> + <value value="112" name="A8XX_PERF_TP_RESERVED_112"/> + <value value="113" name="A8XX_PERF_TP_RESERVED_113"/> + <value value="114" name="A8XX_PERF_TP_RESERVED_114"/> + <value value="115" name="A8XX_PERF_TP_RESERVED_115"/> + <value value="116" name="A8XX_PERF_TP_RESERVED_116"/> + <value value="117" name="A8XX_PERF_TP_RESERVED_117"/> + <value value="118" name="A8XX_PERF_TP_RESERVED_118"/> + <value value="119" name="A8XX_PERF_TP_RESERVED_119"/> + <value value="120" name="A8XX_PERF_TP_RESERVED_120"/> + <value value="121" name="A8XX_PERF_TP_RESERVED_121"/> + <value value="122" name="A8XX_PERF_TP_RESERVED_122"/> + <value value="123" name="A8XX_PERF_TP_RESERVED_123"/> + <value value="124" name="A8XX_PERF_TP_RESERVED_124"/> + <value value="125" name="A8XX_PERF_TP_RESERVED_125"/> + <value value="126" name="A8XX_PERF_TP_RESERVED_126"/> + <value value="127" name="A8XX_PERF_TP_RESERVED_127"/> + <value value="128" name="A8XX_PERF_TP_FORMAT_DECOMP_BILINEAR"/> + <value value="129" name="A8XX_PERF_TP_PACKED_POINT_BOTH_VALID_FP16"/> + <value value="130" name="A8XX_PERF_TP_PACKED_POINT_SINGLE_VALID_FP16"/> + <value value="131" name="A8XX_PERF_TP_PACKED_POINT_BOTH_VALID_FP32"/> + <value value="132" name="A8XX_PERF_TP_PACKED_POINT_SINGLE_VALID_FP32"/> + <value value="133" name="A8XX_PERF_TP_TOTAL_DECOMP_L1_RESP_CH0"/> + <value value="134" name="A8XX_PERF_TP_TOTAL_DECOMP_L1_RESP_CH1"/> + <value value="135" name="A8XX_PERF_TP_L1POINT5_PRE_L1_DECOMP_0_WORKLOAD_R"/> + <value value="136" name="A8XX_PERF_TP_L1POINT5_PRE_L1_DECOMP_1_WORKLOAD_R"/> + <value value="137" name="A8XX_PERF_TP_L1POINT5_ASTC_DECODER_0_WORKLOAD_R"/> + <value value="138" name="A8XX_PERF_TP_L1POINT5_ASTC_DECODER_1_WORKLOAD_R"/> + <value value="139" name="A8XX_PERF_TP_PARTIAL_SURFACE_PGMEM_PIXELS"/> + <value value="140" name="A8XX_PERF_TP_PARTIAL_SURFACE_SYSMEM_PIXELS"/> + <value value="141" name="A8XX_PERF_TP_PARTIAL_SURFACE_PGMEM_MISS_REQ"/> + <value value="142" name="A8XX_PERF_TP_PARTIAL_SURFACE_SYSMEM_MISS_REQ"/> +</enum> + +<enum name="a8xx_sp_perfcounter_select"> + <value value="0" name="A8XX_PERF_SP_NEVER_COUNT"/> + <value value="1" name="A8XX_PERF_SP_BUSY_CYCLES"/> + <value value="2" name="A8XX_PERF_SP_ALU_WORKING_CYCLES"/> + <value value="3" name="A8XX_PERF_SP_STALL_CYCLES_VPC_BE"/> + <value value="4" name="A8XX_PERF_SP_STALL_CYCLES_TP"/> + <value value="5" name="A8XX_PERF_SP_STALL_CYCLES_UCHE"/> + <value value="6" name="A8XX_PERF_SP_STALL_CYCLES_RB"/> + <value value="7" name="A8XX_PERF_SP_NON_EXECUTION_CYCLES"/> + <value value="8" name="A8XX_PERF_SP_WAVE_CONTEXTS"/> + <value value="9" name="A8XX_PERF_SP_WAVE_CONTEXT_CYCLES"/> + <value value="10" name="A8XX_PERF_SP_FS_STAGE_WAVE_CYCLES"/> + <value value="11" name="A8XX_PERF_SP_FS_STAGE_WAVE_SAMPLES"/> + <value value="12" name="A8XX_PERF_SP_VS_STAGE_WAVE_CYCLES"/> + <value value="13" name="A8XX_PERF_SP_VS_STAGE_WAVE_SAMPLES"/> + <value value="14" name="A8XX_PERF_SP_FS_STAGE_DURATION_CYCLES"/> + <value value="15" name="A8XX_PERF_SP_VS_STAGE_DURATION_CYCLES"/> + <value value="16" name="A8XX_PERF_SP_WAVE_CTRL_CYCLES"/> + <value value="17" name="A8XX_PERF_SP_WAVE_LOAD_CYCLES"/> + <value value="18" name="A8XX_PERF_SP_WAVE_EMIT_CYCLES"/> + <value value="19" name="A8XX_PERF_SP_WAVE_NOP_CYCLES"/> + <value value="20" name="A8XX_PERF_SP_WAVE_WAIT_CYCLES"/> + <value value="21" name="A8XX_PERF_SP_WAVE_FETCH_CYCLES"/> + <value value="22" name="A8XX_PERF_SP_WAVE_IDLE_CYCLES"/> + <value value="23" name="A8XX_PERF_SP_WAVE_END_CYCLES"/> + <value value="24" name="A8XX_PERF_SP_WAVE_LONG_SYNC_CYCLES"/> + <value value="25" name="A8XX_PERF_SP_WAVE_SHORT_SYNC_CYCLES"/> + <value value="26" name="A8XX_PERF_SP_WAVE_JOIN_CYCLES"/> + <value value="27" name="A8XX_PERF_SP_LM_LOAD_INSTRUCTIONS"/> + <value value="28" name="A8XX_PERF_SP_LM_STORE_INSTRUCTIONS"/> + <value value="29" name="A8XX_PERF_SP_LM_ATOMICS"/> + <value value="30" name="A8XX_PERF_SP_GM_LOAD_INSTRUCTIONS"/> + <value value="31" name="A8XX_PERF_SP_GM_STORE_INSTRUCTIONS"/> + <value value="32" name="A8XX_PERF_SP_GM_ATOMICS"/> + <value value="33" name="A8XX_PERF_SP_VS_STAGE_TEX_INSTRUCTIONS"/> + <value value="34" name="A8XX_PERF_SP_VS_STAGE_EFU_INSTRUCTIONS"/> + <value value="35" name="A8XX_PERF_SP_VS_STAGE_FULL_ALU_INSTRUCTIONS"/> + <value value="36" name="A8XX_PERF_SP_VS_STAGE_HALF_ALU_INSTRUCTIONS"/> + <value value="37" name="A8XX_PERF_SP_FS_STAGE_TEX_INSTRUCTIONS"/> + <value value="38" name="A8XX_PERF_SP_FS_STAGE_CFLOW_INSTRUCTIONS"/> + <value value="39" name="A8XX_PERF_SP_FS_STAGE_EFU_INSTRUCTIONS"/> + <value value="40" name="A8XX_PERF_SP_FS_STAGE_FULL_ALU_INSTRUCTIONS"/> + <value value="41" name="A8XX_PERF_SP_FS_STAGE_HALF_ALU_INSTRUCTIONS"/> + <value value="42" name="A8XX_PERF_SP_FS_STAGE_BARY_INSTRUCTIONS"/> + <value value="43" name="A8XX_PERF_SP_VS_INSTRUCTIONS"/> + <value value="44" name="A8XX_PERF_SP_FS_INSTRUCTIONS"/> + <value value="45" name="A8XX_PERF_SP_ADDR_LOCK_COUNT"/> + <value value="46" name="A8XX_PERF_SP_UCHE_READ_TRANS"/> + <value value="47" name="A8XX_PERF_SP_UCHE_WRITE_TRANS"/> + <value value="48" name="A8XX_PERF_SP_EXPORT_VPC_TRANS"/> + <value value="49" name="A8XX_PERF_SP_EXPORT_RB_TRANS"/> + <value value="50" name="A8XX_PERF_SP_PIXELS_KILLED"/> + <value value="51" name="A8XX_PERF_SP_ICL1_REQUESTS"/> + <value value="52" name="A8XX_PERF_SP_ICL1_MISSES"/> + <value value="53" name="A8XX_PERF_SP_HS_INSTRUCTIONS"/> + <value value="54" name="A8XX_PERF_SP_DS_INSTRUCTIONS"/> + <value value="55" name="A8XX_PERF_SP_GS_INSTRUCTIONS"/> + <value value="56" name="A8XX_PERF_SP_CS_INSTRUCTIONS"/> + <value value="57" name="A8XX_PERF_SP_GPR_READ"/> + <value value="58" name="A8XX_PERF_SP_GPR_WRITE"/> + <value value="59" name="A8XX_PERF_SP_FS_STAGE_HALF_EFU_INSTRUCTIONS"/> + <value value="60" name="A8XX_PERF_SP_VS_STAGE_HALF_EFU_INSTRUCTIONS"/> + <value value="61" name="A8XX_PERF_SP_LM_BANK_CONFLICTS"/> + <value value="62" name="A8XX_PERF_SP_TEX_CONTROL_WORKING_CYCLES"/> + <value value="63" name="A8XX_PERF_SP_LOAD_CONTROL_WORKING_CYCLES"/> + <value value="64" name="A8XX_PERF_SP_FLOW_CONTROL_WORKING_CYCLES"/> + <value value="65" name="A8XX_PERF_SP_LM_WORKING_CYCLES"/> + <value value="66" name="A8XX_PERF_SP_DISPATCHER_WORKING_CYCLES"/> + <value value="67" name="A8XX_PERF_SP_SEQUENCER_WORKING_CYCLES"/> + <value value="68" name="A8XX_PERF_SP_LOW_EFFICIENCY_STARVED_BY_TP"/> + <value value="69" name="A8XX_PERF_SP_STARVE_CYCLES_HLSQ"/> + <value value="70" name="A8XX_PERF_SP_NON_EXECUTION_LS_CYCLES"/> + <value value="71" name="A8XX_PERF_SP_WORKING_EU"/> + <value value="72" name="A8XX_PERF_SP_ANY_EU_WORKING"/> + <value value="73" name="A8XX_PERF_SP_WORKING_EU_FS_STAGE"/> + <value value="74" name="A8XX_PERF_SP_ANY_EU_WORKING_FS_STAGE"/> + <value value="75" name="A8XX_PERF_SP_WORKING_EU_VS_STAGE"/> + <value value="76" name="A8XX_PERF_SP_ANY_EU_WORKING_VS_STAGE"/> + <value value="77" name="A8XX_PERF_SP_WORKING_EU_CS_STAGE"/> + <value value="78" name="A8XX_PERF_SP_ANY_EU_WORKING_CS_STAGE"/> + <value value="79" name="A8XX_PERF_SP_GPR_READ_PREFETCH"/> + <value value="80" name="A8XX_PERF_SP_GPR_READ_CONFLICT"/> + <value value="81" name="A8XX_PERF_SP_GPR_WRITE_CONFLICT"/> + <value value="82" name="A8XX_PERF_SP_GM_LOAD_LATENCY_CYCLES"/> + <value value="83" name="A8XX_PERF_SP_GM_LOAD_LATENCY_SAMPLES"/> + <value value="84" name="A8XX_PERF_SP_EXECUTABLE_WAVES"/> + <value value="85" name="A8XX_PERF_SP_ICL1_MISS_FETCH_CYCLES"/> + <value value="86" name="A8XX_PERF_SP_RESERVED_86"/> + <value value="87" name="A8XX_PERF_SP_BYPASS_BUSY_CYCLES"/> + <value value="88" name="A8XX_PERF_SP_ANY_EU_WORKING_LPAC"/> + <value value="89" name="A8XX_PERF_SP_WAVE_ALU_CYCLES"/> + <value value="90" name="A8XX_PERF_SP_WAVE_EFU_CYCLES"/> + <value value="91" name="A8XX_PERF_SP_WAVE_INT_CYCLES"/> + <value value="92" name="A8XX_PERF_SP_WAVE_CSP_CYCLES"/> + <value value="93" name="A8XX_PERF_SP_EWAVE_CONTEXTS"/> + <value value="94" name="A8XX_PERF_SP_EWAVE_CONTEXT_CYCLES"/> + <value value="95" name="A8XX_PERF_SP_LPAC_BUSY_CYCLES"/> + <value value="96" name="A8XX_PERF_SP_LPAC_INSTRUCTIONS"/> + <value value="97" name="A8XX_PERF_SP_FS_STAGE_1X_WAVES"/> + <value value="98" name="A8XX_PERF_SP_FS_STAGE_2X_WAVES"/> + <value value="99" name="A8XX_PERF_SP_QUADS"/> + <value value="100" name="A8XX_PERF_SP_CS_INVOCATIONS"/> + <value value="101" name="A8XX_PERF_SP_PIXELS"/> + <value value="102" name="A8XX_PERF_SP_LPAC_DRAWCALLS"/> + <value value="103" name="A8XX_PERF_SP_PI_WORKING_CYCLES"/> + <value value="104" name="A8XX_PERF_SP_WAVE_INPUT_CYCLES"/> + <value value="105" name="A8XX_PERF_SP_WAVE_OUTPUT_CYCLES"/> + <value value="106" name="A8XX_PERF_SP_WAVE_HWAVE_WAIT_CYCLES"/> + <value value="107" name="A8XX_PERF_SP_WAVE_HWAVE_SYNC"/> + <value value="108" name="A8XX_PERF_SP_OUTPUT_3D_PIXELS"/> + <value value="109" name="A8XX_PERF_SP_FULL_ALU_MAD_INSTRUCTIONS"/> + <value value="110" name="A8XX_PERF_SP_HALF_ALU_MAD_INSTRUCTIONS"/> + <value value="111" name="A8XX_PERF_SP_FULL_ALU_MUL_INSTRUCTIONS"/> + <value value="112" name="A8XX_PERF_SP_HALF_ALU_MUL_INSTRUCTIONS"/> + <value value="113" name="A8XX_PERF_SP_FULL_ALU_ADD_INSTRUCTIONS"/> + <value value="114" name="A8XX_PERF_SP_HALF_ALU_ADD_INSTRUCTIONS"/> + <value value="115" name="A8XX_PERF_SP_BARY_FP32_INSTRUCTIONS"/> + <value value="116" name="A8XX_PERF_SP_ALU_GPR_READ_CYCLES"/> + <value value="117" name="A8XX_PERF_SP_ALU_DATA_FORWARDING_CYCLES"/> + <value value="118" name="A8XX_PERF_SP_LM_FULL_CYCLES"/> + <value value="119" name="A8XX_PERF_SP_TEXTURE_FETCH_LATENCY_CYCLES"/> + <value value="120" name="A8XX_PERF_SP_TEXTURE_FETCH_LATENCY_SAMPLES"/> + <value value="121" name="A8XX_PERF_SP_FS_STAGE_PI_TEX_INSTRUCTION"/> + <value value="122" name="A8XX_PERF_SP_RAY_QUERY_INSTRUCTIONS"/> + <value value="123" name="A8XX_PERF_SP_RBRT_KICKOFF_FIBERS"/> + <value value="124" name="A8XX_PERF_SP_RBRT_KICKOFF_DQUADS"/> + <value value="125" name="A8XX_PERF_SP_RTU_BUSY_CYCLES"/> + <value value="126" name="A8XX_PERF_SP_RTU_L0_HITS"/> + <value value="127" name="A8XX_PERF_SP_RTU_L0_MISSES"/> + <value value="128" name="A8XX_PERF_SP_RTU_L0_HIT_ON_MISS"/> + <value value="129" name="A8XX_PERF_SP_RTU_STALL_CYCLES_WAVE_QUEUE"/> + <value value="130" name="A8XX_PERF_SP_RTU_STALL_CYCLES_L0_HIT_QUEUE"/> + <value value="131" name="A8XX_PERF_SP_RTU_STALL_CYCLES_L0_MISS_QUEUE"/> + <value value="132" name="A8XX_PERF_SP_RTU_STALL_CYCLES_L0D_IDX_QUEUE"/> + <value value="133" name="A8XX_PERF_SP_RTU_STALL_CYCLES_L0DATA"/> + <value value="134" name="A8XX_PERF_SP_RTU_STALL_CYCLES_REPLACE_CNT"/> + <value value="135" name="A8XX_PERF_SP_RTU_STALL_CYCLES_MRG_CNT"/> + <value value="136" name="A8XX_PERF_SP_RTU_STALL_CYCLES_UCHE"/> + <value value="137" name="A8XX_PERF_SP_RTU_OPERAND_FETCH_STALL_CYCLES_L0"/> + <value value="138" name="A8XX_PERF_SP_RTU_OPERAND_FETCH_STALL_CYCLES_INS_FIFO"/> + <value value="139" name="A8XX_PERF_SP_RTU_BVH_FETCH_LATENCY_CYCLES"/> + <value value="140" name="A8XX_PERF_SP_RTU_BVH_FETCH_LATENCY_SAMPLES"/> + <value value="141" name="A8XX_PERF_SP_STCHE_MISS_INC_VS"/> + <value value="142" name="A8XX_PERF_SP_STCHE_MISS_INC_FS"/> + <value value="143" name="A8XX_PERF_SP_STCHE_MISS_INC_BV"/> + <value value="144" name="A8XX_PERF_SP_STCHE_MISS_INC_LPAC"/> + <value value="145" name="A8XX_PERF_SP_VGPR_ACTIVE_CONTEXTS"/> + <value value="146" name="A8XX_PERF_SP_PGPR_ALLOC_CONTEXTS"/> + <value value="147" name="A8XX_PERF_SP_VGPR_ALLOC_CONTEXTS"/> + <value value="148" name="A8XX_PERF_SP_RTU_RAY_BOX_INTERSECTIONS"/> + <value value="149" name="A8XX_PERF_SP_RTU_RAY_TRIANGLE_INTERSECTIONS"/> + <value value="150" name="A8XX_PERF_SP_SCH_STALL_CYCLES_RTU"/> + <value value="151" name="A8XX_PERF_SP_EFU_WORKING_CYCLES"/> + <value value="152" name="A8XX_PERF_SP_BRANCH_TAKEN"/> + <value value="153" name="A8XX_PERF_SP_BRANCH_NOT_TAKEN"/> + <value value="154" name="A8XX_PERF_SP_BRANCH_INS_DIVERGENCY_COUNT"/> + <value value="155" name="A8XX_PERF_SP_BRANCH_INS_COUNT"/> + <value value="156" name="A8XX_PERF_SP_PREDICT_TAKEN"/> + <value value="157" name="A8XX_PERF_SP_PREDICT_NOT_TAKEN"/> + <value value="158" name="A8XX_PERF_SP_PREDICT_INS_DIVERGENCY_COUNT"/> + <value value="159" name="A8XX_PERF_SP_PREDICT_INS_COUNT"/> + <value value="160" name="A8XX_PERF_SP_CCHE_UAV_TOTAL_REQ"/> + <value value="161" name="A8XX_PERF_SP_CCHE_UAV_TOTAL_DUALQUAD"/> + <value value="162" name="A8XX_PERF_SP_CCHE_NONUAV_TOTAL_REQ"/> + <value value="163" name="A8XX_PERF_SP_CCHE_NONUAV_TOTAL_DUALQUAD"/> + <value value="164" name="A8XX_PERF_SP_LB_NONUAV_TOTAL_REQ"/> + <value value="165" name="A8XX_PERF_SP_LB_NONUAV_TOTAL_DUALQUAD"/> + <value value="166" name="A8XX_PERF_SP_LB_READ_XFER_ALU"/> + <value value="167" name="A8XX_PERF_SP_LB_ALU_READ_CONS"/> + <value value="168" name="A8XX_PERF_SP_LB_READ_ALU_BLOCK_OTHER"/> + <value value="169" name="A8XX_PERF_SP_LB_WRITE_XFER_VPC"/> + <value value="170" name="A8XX_PERF_SP_LB_WRITE_VPC_BLOCK_OTHER"/> + <value value="171" name="A8XX_PERF_SP_LB_LDST_RW_LM"/> + <value value="172" name="A8XX_PERF_SP_LB_LDST_RW_LM_BLOCKED"/> + <value value="173" name="A8XX_PERF_SP_LB_LDST_WRITE_CONS"/> + <value value="174" name="A8XX_PERF_SP_LB_LDST_WRITE_CONS_BLOCKED"/> + <value value="175" name="A8XX_PERF_SP_GPR_READ_BANK"/> + <value value="176" name="A8XX_PERF_SP_GPR_WRITE_BANK"/> + <value value="177" name="A8XX_PERF_SP_VS_WAVE_REQ_PENDING"/> + <value value="178" name="A8XX_PERF_SP_FS_WAVE_REQ_PENDING"/> + <value value="179" name="A8XX_PERF_SP_LPAC_WAVE_REQ_PENDING"/> + <value value="180" name="A8XX_PERF_SP_WAVE_SPLIT_CNT"/> + <value value="181" name="A8XX_PERF_SP_FS_OOO_WAVE_ACC"/> + <value value="182" name="A8XX_PERF_SP_SCH_MEM_CTRL_WORKING_CYCLES"/> + <value value="183" name="A8XX_PERF_SP_LDST_EXECUTION_CYCLES"/> + <value value="184" name="A8XX_PERF_SP_CCHE_UAV_TOTAL_ATOM_REQ"/> + <value value="185" name="A8XX_PERF_SP_CCHE_NONUAV_TOTAL_AOM_REQ"/> + <value value="186" name="A8XX_PERF_SP_TTU_INSTRUCTIONS"/> + <value value="187" name="A8XX_PERF_SP_TTU_ACTIVE_FIBERS"/> + <value value="188" name="A8XX_PERF_SP_TTU_BUSY_CYCLES"/> + <value value="189" name="A8XX_PERF_SP_TTU_GPR_RD_CYCLES"/> + <value value="190" name="A8XX_PERF_SP_TTU_GPR_RD_REQ"/> + <value value="191" name="A8XX_PERF_SP_TTU_GPR_WR_CYCLES"/> + <value value="192" name="A8XX_PERF_SP_TTU_GPR_WR_REQ"/> + <value value="193" name="A8XX_PERF_SP_GPR_CACHE_SRC0_GPR_RDCNT"/> + <value value="194" name="A8XX_PERF_SP_GPR_CACHE_SRC0_GPR_CACHE_HIT_RDCNT"/> + <value value="195" name="A8XX_PERF_SP_GPR_CACHE_SRC0_GPR_CACHE_HINT_RDCNT"/> + <value value="196" name="A8XX_PERF_SP_GPR_CACHE_SRC1_RDCNT"/> + <value value="197" name="A8XX_PERF_SP_GPR_CACHE_SRC1_GPR_CACHE_HIT_RDCNT"/> + <value value="198" name="A8XX_PERF_SP_GPR_CACHE_SRC1_GPR_CACHE_HINT_RDCNT"/> + <value value="199" name="A8XX_PERF_SP_GPR_CACHE_SRC2_RDCNT"/> + <value value="200" name="A8XX_PERF_SP_GPR_CACHE_SRC2_GPR_CACHE_HIT_RDCNT"/> + <value value="201" name="A8XX_PERF_SP_GPR_CACHE_SRC2_GPR_CACHE_HINT_RDCNT"/> + <value value="202" name="A8XX_PERF_SP_UCHE_READ_BURST"/> + <value value="203" name="A8XX_PERF_SP_UCHE_WRITE_BURST"/> + <value value="204" name="A8XX_PERF_SP_LDST_GMEM_REQ_NUM"/> +</enum> + +<enum name="a8xx_rb_perfcounter_select"> + <value value="0" name="A8XX_PERF_RB_NEVER_COUNT"/> + <value value="1" name="A8XX_PERF_RB_BUSY_CYCLES"/> + <value value="2" name="A8XX_PERF_RB_STALL_CYCLES_HLSQ"/> + <value value="3" name="A8XX_PERF_RB_STALL_CYCLES_FIFO0_FULL"/> + <value value="4" name="A8XX_PERF_RB_STALL_CYCLES_FIFO1_FULL"/> + <value value="5" name="A8XX_PERF_RB_STALL_CYCLES_FIFO2_FULL"/> + <value value="6" name="A8XX_PERF_RB_STARVE_CYCLES_SP"/> + <value value="7" name="A8XX_PERF_RB_STARVE_CYCLES_LRZ_TILE"/> + <value value="8" name="A8XX_PERF_RB_STARVE_CYCLES_CCU"/> + <value value="9" name="A8XX_PERF_RB_STARVE_CYCLES_Z_PLANE"/> + <value value="10" name="A8XX_PERF_RB_STARVE_CYCLES_BARY_PLANE"/> + <value value="11" name="A8XX_PERF_RB_Z_WORKLOAD"/> + <value value="12" name="A8XX_PERF_RB_HLSQ_ACTIVE"/> + <value value="13" name="A8XX_PERF_RB_Z_READ"/> + <value value="14" name="A8XX_PERF_RB_Z_WRITE"/> + <value value="15" name="A8XX_PERF_RB_C_READ"/> + <value value="16" name="A8XX_PERF_RB_C_WRITE"/> + <value value="17" name="A8XX_PERF_RB_TOTAL_PASS"/> + <value value="18" name="A8XX_PERF_RB_Z_PASS"/> + <value value="19" name="A8XX_PERF_RB_Z_FAIL"/> + <value value="20" name="A8XX_PERF_RB_S_FAIL"/> + <value value="21" name="A8XX_PERF_RB_BLENDED_FXP_COMPONENTS"/> + <value value="22" name="A8XX_PERF_RB_BLENDED_FP16_COMPONENTS"/> + <value value="23" name="A8XX_PERF_RB_PS_INVOCATIONS"/> + <value value="24" name="A8XX_PERF_RB_2D_ALIVE_CYCLES"/> + <value value="25" name="A8XX_PERF_RB_2D_STARVE_CYCLES_SP"/> + <value value="26" name="A8XX_PERF_RB_2D_VALID_PIXELS"/> + <value value="27" name="A8XX_PERF_RB_3D_PIXELS"/> + <value value="28" name="A8XX_PERF_RB_BLENDER_WORKING_CYCLES"/> + <value value="29" name="A8XX_PERF_RB_ZPROC_WORKING_CYCLES"/> + <value value="30" name="A8XX_PERF_RB_CPROC_WORKING_CYCLES"/> + <value value="31" name="A8XX_PERF_RB_SAMPLER_WORKING_CYCLES"/> + <value value="32" name="A8XX_PERF_RB_STALL_CYCLES_CCU_COLOR_READ"/> + <value value="33" name="A8XX_PERF_RB_STALL_CYCLES_CCU_COLOR_WRITE"/> + <value value="34" name="A8XX_PERF_RB_STALL_CYCLES_CCU_DEPTH_READ"/> + <value value="35" name="A8XX_PERF_RB_STALL_CYCLES_CCU_DEPTH_WRITE"/> + <value value="36" name="A8XX_PERF_RB_STALL_CYCLES_VPC_BE"/> + <value value="37" name="A8XX_PERF_RB_BLENDED_FP32_COMPONENTS"/> + <value value="38" name="A8XX_PERF_RB_COLOR_PIX_TILES"/> + <value value="39" name="A8XX_PERF_RB_STALL_CYCLES_CCU"/> + <value value="40" name="A8XX_PERF_RB_EARLY_Z_ARB3_GRANT"/> + <value value="41" name="A8XX_PERF_RB_LATE_Z_ARB3_GRANT"/> + <value value="42" name="A8XX_PERF_RB_EARLY_Z_SKIP_GRANT"/> + <value value="43" name="A8XX_PERF_RB_VRS_1X1_QUADS"/> + <value value="44" name="A8XX_PERF_RB_VRS_2X1_QUADS"/> + <value value="45" name="A8XX_PERF_RB_VRS_1X2_QUADS"/> + <value value="46" name="A8XX_PERF_RB_VRS_2X2_QUADS"/> + <value value="47" name="A8XX_PERF_RB_VRS_2X4_QUADS"/> + <value value="48" name="A8XX_PERF_RB_VRS_4X2_QUADS"/> + <value value="49" name="A8XX_PERF_RB_VRS_4X4_QUADS"/> + <value value="50" name="A8XX_PERF_RB_STALL_CYCLES_HLSQ_INFO"/> + <value value="51" name="A8XX_PERF_RB_HLSQ_ACTIVE_INFO"/> + <value value="52" name="A8XX_PERF_RB_STALL_CYCLES_HLSQ_ATTR"/> + <value value="53" name="A8XX_PERF_RB_HLSQ_ACTIVE_ATTR"/> + <value value="54" name="A8XX_PERF_RB_STALL_CYCLES_ZMODE_SWITCH"/> + <value value="55" name="A8XX_PERF_RB_TOTAL_PASS_PIXELS"/> +</enum> + +<enum name="a8xx_vsc_perfcounter_select"> + <value value="0" name="A8XX_PERF_VSC_NEVER_COUNT"/> + <value value="1" name="A8XX_PERF_VSC_BUSY_CYCLES"/> + <value value="2" name="A8XX_PERF_VSC_WORKING_CYCLES"/> + <value value="3" name="A8XX_PERF_VSC_STALL_CYCLES_UCHE"/> + <value value="4" name="A8XX_PERF_VSC_EOT_NUM"/> + <value value="5" name="A8XX_PERF_VSC_INPUT_TILES"/> + <value value="6" name="A8XX_PERF_VSC_TILE_COMP_TRAN"/> + <value value="7" name="A8XX_PERF_VSC_TILE_BYPASS_TRAN"/> +</enum> + +<enum name="a8xx_ccu_perfcounter_select"> + <value value="0" name="A8XX_PERF_CCU_NEVER_COUNT"/> + <value value="1" name="A8XX_PERF_CCU_BUSY_CYCLES"/> + <value value="2" name="A8XX_PERF_CCU_STALL_CYCLES_RB_DEPTH_RETURN"/> + <value value="3" name="A8XX_PERF_CCU_STALL_CYCLES_RB_COLOR_RETURN"/> + <value value="4" name="A8XX_PERF_CCU_DEPTH_BLOCKS"/> + <value value="5" name="A8XX_PERF_CCU_COLOR_BLOCKS"/> + <value value="6" name="A8XX_PERF_CCU_DEPTH_BLOCK_HIT"/> + <value value="7" name="A8XX_PERF_CCU_COLOR_BLOCK_HIT"/> + <value value="8" name="A8XX_PERF_CCU_PARTIAL_BLOCK_READ"/> + <value value="9" name="A8XX_PERF_CCU_GMEM_READ"/> + <value value="10" name="A8XX_PERF_CCU_GMEM_WRITE"/> + <value value="11" name="A8XX_PERF_CCU_2D_RD_REQ"/> + <value value="12" name="A8XX_PERF_CCU_2D_WR_REQ"/> + <value value="13" name="A8XX_PERF_CCU_UBWC_COLOR_BLOCKS_CONCURRENT"/> + <value value="14" name="A8XX_PERF_CCU_UBWC_DEPTH_BLOCKS_CONCURRENT"/> + <value value="15" name="A8XX_PERF_CCU_COLOR_RESOLVE_DROPPED"/> + <value value="16" name="A8XX_PERF_CCU_DEPTH_RESOLVE_DROPPED"/> + <value value="17" name="A8XX_PERF_CCU_COLOR_RENDER_CONCURRENT"/> + <value value="18" name="A8XX_PERF_CCU_DEPTH_RENDER_CONCURRENT"/> + <value value="19" name="A8XX_PERF_CCU_COLOR_RESOLVE_AFTER_RENDER"/> + <value value="20" name="A8XX_PERF_CCU_DEPTH_RESOLVE_AFTER_RENDER"/> + <value value="21" name="A8XX_PERF_CCU_GMEM_EXTRA_DEPTH_READ"/> + <value value="22" name="A8XX_PERF_CCU_GMEM_COLOR_READ_4AA"/> + <value value="23" name="A8XX_PERF_CCU_GMEM_COLOR_READ_4AA_FULL"/> + <value value="24" name="A8XX_PERF_CCU_COLOR_EVB_STALL"/> + <value value="25" name="A8XX_PERF_CCU_RENDER_OVERLAP_CRE_C"/> + <value value="26" name="A8XX_PERF_CCU_RENDER_OVERLAP_CRE_Z"/> + <value value="27" name="A8XX_PERF_CCU_RENDER_STALL_BY_CRE_C"/> + <value value="28" name="A8XX_PERF_CCU_RENDER_STALL_BY_CRE_Z"/> + <value value="29" name="A8XX_PERF_CCU_FULL_SURFACE_RESOLVE_CYCLES"/> + <value value="30" name="A8XX_PERF_CCU_RENDER_OVERLAP_FULL_SURFACE_RESOLVE"/> + <value value="31" name="A8XX_PERF_CCU_STALL_BY_FULL_SURFACE_RESOLVE"/> + <value value="32" name="A8XX_PERF_CCU_CONCURRENT_RESOLVE_CYCLES"/> + <value value="33" name="A8XX_PERF_CCU_GMEM_BK2BK_STALL"/> + <value value="34" name="A8XX_PERF_CCU_RESERVED_34"/> + <value value="35" name="A8XX_PERF_CCU_RESERVED_35"/> + <value value="36" name="A8XX_PERF_CCU_RESERVED_36"/> + <value value="37" name="A8XX_PERF_CCU_RESERVED_37"/> + <value value="38" name="A8XX_PERF_CCU_RESERVED_38"/> + <value value="39" name="A8XX_PERF_CCU_RESERVED_39"/> + <value value="40" name="A8XX_PERF_CCU_RESERVED_40"/> + <value value="41" name="A8XX_PERF_CCU_RESERVED_41"/> + <value value="42" name="A8XX_PERF_CCU_RESERVED_42"/> + <value value="43" name="A8XX_PERF_CCU_RESERVED_43"/> + <value value="44" name="A8XX_PERF_CCU_RESERVED_44"/> + <value value="45" name="A8XX_PERF_CCU_RESERVED_45"/> + <value value="46" name="A8XX_PERF_CCU_RESERVED_46"/> + <value value="47" name="A8XX_PERF_CCU_RESERVED_47"/> + <value value="48" name="A8XX_PERF_CCU_RESERVED_48"/> + <value value="49" name="A8XX_PERF_CCU_RESERVED_49"/> + <value value="50" name="A8XX_PERF_CCU_RESERVED_50"/> + <value value="51" name="A8XX_PERF_CCU_RESERVED_51"/> + <value value="52" name="A8XX_PERF_CCU_RESERVED_52"/> + <value value="53" name="A8XX_PERF_CCU_RESERVED_53"/> + <value value="54" name="A8XX_PERF_CCU_RESERVED_54"/> + <value value="55" name="A8XX_PERF_CCU_RESERVED_55"/> + <value value="56" name="A8XX_PERF_CCU_RESERVED_56"/> + <value value="57" name="A8XX_PERF_CCU_RESERVED_57"/> + <value value="58" name="A8XX_PERF_CCU_RESERVED_58"/> + <value value="59" name="A8XX_PERF_CCU_RESERVED_59"/> + <value value="60" name="A8XX_PERF_CCU_RESERVED_60"/> + <value value="61" name="A8XX_PERF_CCU_RESERVED_61"/> + <value value="62" name="A8XX_PERF_CCU_RESERVED_62"/> + <value value="63" name="A8XX_PERF_CCU_RESERVED_63"/> + <value value="64" name="A8XX_PERF_UFC_L0_TP_HINT_REQUESTS"/> + <value value="65" name="A8XX_PERF_UFC_L0_TP_HINT_TAG_MISS"/> + <value value="66" name="A8XX_PERF_UFC_L0_TP_HINT_TAG_HIT_RDY"/> + <value value="67" name="A8XX_PERF_UFC_L0_TP_HINT_TAG_HIT_NRDY"/> + <value value="68" name="A8XX_PERF_UFC_L0_TP_HINT_IS_FCLEAR"/> + <value value="69" name="A8XX_PERF_UFC_L0_TP_HINT_IS_ALPHA0"/> + <value value="70" name="A8XX_PERF_UFC_L0_TP_HINT_IS_ALPHA1"/> + <value value="71" name="A8XX_PERF_UFC_L0_TP_HINT_IS_UNCOMP"/> + <value value="72" name="A8XX_PERF_UFC_L0_SP_REQUESTS"/> + <value value="73" name="A8XX_PERF_UFC_L0_SP_FILTER_HIT"/> + <value value="74" name="A8XX_PERF_UFC_L0_SP_FILTER_MISS"/> + <value value="75" name="A8XX_PERF_UFC_L0_SP_REQ_STALLED_CYCLES"/> + <value value="76" name="A8XX_PERF_UFC_L0_TP_REQ_STALLED_CYCLES"/> + <value value="77" name="A8XX_PERF_UFC_L0_TP_RTN_STALLED_CYCLES"/> + <value value="78" name="A8XX_PERF_CCU_RESERVED_78"/> + <value value="79" name="A8XX_PERF_CCU_RESERVED_79"/> + <value value="80" name="A8XX_PERF_CCU_RESERVED_80"/> + <value value="81" name="A8XX_PERF_CCU_RESERVED_81"/> + <value value="82" name="A8XX_PERF_CCU_RESERVED_82"/> + <value value="83" name="A8XX_PERF_CCU_RESERVED_83"/> + <value value="84" name="A8XX_PERF_CCU_RESERVED_84"/> + <value value="85" name="A8XX_PERF_CCU_RESERVED_85"/> + <value value="86" name="A8XX_PERF_CCU_RESERVED_86"/> + <value value="87" name="A8XX_PERF_CCU_RESERVED_87"/> + <value value="88" name="A8XX_PERF_CCU_RESERVED_88"/> + <value value="89" name="A8XX_PERF_CCU_RESERVED_89"/> + <value value="90" name="A8XX_PERF_CCU_RESERVED_90"/> + <value value="91" name="A8XX_PERF_CCU_RESERVED_91"/> + <value value="92" name="A8XX_PERF_CCU_RESERVED_92"/> + <value value="93" name="A8XX_PERF_CCU_RESERVED_93"/> + <value value="94" name="A8XX_PERF_CCU_RESERVED_94"/> + <value value="95" name="A8XX_PERF_CCU_RESERVED_95"/> + <value value="96" name="A8XX_PERF_CCU_RESERVED_96"/> + <value value="97" name="A8XX_PERF_CCU_RESERVED_97"/> + <value value="98" name="A8XX_PERF_CCU_RESERVED_98"/> + <value value="99" name="A8XX_PERF_CCU_RESERVED_99"/> + <value value="100" name="A8XX_PERF_CCU_RESERVED_100"/> + <value value="101" name="A8XX_PERF_CCU_RESERVED_101"/> + <value value="102" name="A8XX_PERF_CCU_RESERVED_102"/> + <value value="103" name="A8XX_PERF_CCU_RESERVED_103"/> + <value value="104" name="A8XX_PERF_CCU_RESERVED_104"/> + <value value="105" name="A8XX_PERF_CCU_RESERVED_105"/> + <value value="106" name="A8XX_PERF_CCU_RESERVED_106"/> + <value value="107" name="A8XX_PERF_CCU_RESERVED_107"/> + <value value="108" name="A8XX_PERF_CCU_RESERVED_108"/> + <value value="109" name="A8XX_PERF_CCU_RESERVED_109"/> + <value value="110" name="A8XX_PERF_CCU_RESERVED_110"/> + <value value="111" name="A8XX_PERF_CCU_RESERVED_111"/> + <value value="112" name="A8XX_PERF_CCU_RESERVED_112"/> + <value value="113" name="A8XX_PERF_CCU_RESERVED_113"/> + <value value="114" name="A8XX_PERF_CCU_RESERVED_114"/> + <value value="115" name="A8XX_PERF_CCU_RESERVED_115"/> + <value value="116" name="A8XX_PERF_CCU_RESERVED_116"/> + <value value="117" name="A8XX_PERF_CCU_RESERVED_117"/> + <value value="118" name="A8XX_PERF_CCU_RESERVED_118"/> + <value value="119" name="A8XX_PERF_CCU_RESERVED_119"/> + <value value="120" name="A8XX_PERF_CCU_RESERVED_120"/> + <value value="121" name="A8XX_PERF_CCU_RESERVED_121"/> + <value value="122" name="A8XX_PERF_CCU_RESERVED_122"/> + <value value="123" name="A8XX_PERF_CCU_RESERVED_123"/> + <value value="124" name="A8XX_PERF_CCU_RESERVED_124"/> + <value value="125" name="A8XX_PERF_CCU_RESERVED_125"/> + <value value="126" name="A8XX_PERF_CCU_RESERVED_126"/> + <value value="127" name="A8XX_PERF_CCU_RESERVED_127"/> + <value value="128" name="A8XX_PERF_CRE_RESOLVE_EVENTS"/> + <value value="129" name="A8XX_PERF_CRE_CONCURRENT_RESOLVE_EVENTS"/> + <value value="130" name="A8XX_PERF_CRE_DROPPED_CLEAR_EVENTS"/> + <value value="131" name="A8XX_PERF_CRE_ST_BLOCKS_CONCURRENT"/> + <value value="132" name="A8XX_PERF_CRE_LRZ_ST_BLOCKS_CONCURRENT"/> + <value value="133" name="A8XX_PERF_CRE_SP_UFC_PREFETCH_REQUESTS"/> + <value value="134" name="A8XX_PERF_CRE_RESOLVE_CDP_PREFETCH_REQUESTS"/> + <value value="135" name="A8XX_PERF_CRE_RESOLVE_UFC_PREFETCH_REQUESTS"/> + <value value="136" name="A8XX_PERF_CRE_DR_UFC_PREFTCH_REQUESTS"/> +</enum> + +<enum name="a8xx_lrz_perfcounter_select"> + <value value="0" name="A8XX_PERF_LRZ_NEVER_COUNT"/> + <value value="1" name="A8XX_PERF_LRZ_BUSY_CYCLES"/> + <value value="2" name="A8XX_PERF_LRZ_STARVE_CYCLES_RAS"/> + <value value="3" name="A8XX_PERF_LRZ_STALL_CYCLES_RB"/> + <value value="4" name="A8XX_PERF_LRZ_STALL_CYCLES_VSC"/> + <value value="5" name="A8XX_PERF_LRZ_STALL_CYCLES_VPC_BE"/> + <value value="6" name="A8XX_PERF_LRZ_STALL_CYCLES_FLAG_ACR"/> + <value value="7" name="A8XX_PERF_LRZ_STALL_CYCLES_UCHE"/> + <value value="8" name="A8XX_PERF_LRZ_LRZ_READ"/> + <value value="9" name="A8XX_PERF_LRZ_LRZ_WRITE"/> + <value value="10" name="A8XX_PERF_LRZ_READ_LATENCY"/> + <value value="11" name="A8XX_PERF_LRZ_MERGE_CACHE_UPDATING"/> + <value value="12" name="A8XX_PERF_LRZ_PRIM_KILLED_BY_MASKGEN"/> + <value value="13" name="A8XX_PERF_LRZ_PRIM_KILLED_BY_LRZ"/> + <value value="14" name="A8XX_PERF_LRZ_VISIBLE_PRIM_AFTER_LRZ"/> + <value value="15" name="A8XX_PERF_LRZ_FULL_8X8_TILES"/> + <value value="16" name="A8XX_PERF_LRZ_PARTIAL_8X8_TILES"/> + <value value="17" name="A8XX_PERF_LRZ_TILE_KILLED"/> + <value value="18" name="A8XX_PERF_LRZ_TOTAL_PIXEL"/> + <value value="19" name="A8XX_PERF_LRZ_VISIBLE_PIXEL_AFTER_LRZ"/> + <value value="20" name="A8XX_PERF_LRZ_FEEDBACK_ACCEPT"/> + <value value="21" name="A8XX_PERF_LRZ_FEEDBACK_DISCARD"/> + <value value="22" name="A8XX_PERF_LRZ_FEEDBACK_STALL"/> + <value value="23" name="A8XX_PERF_LRZ_STALL_CYCLES_RB_ZPLANE"/> + <value value="24" name="A8XX_PERF_LRZ_STALL_CYCLES_RB_BPLANE"/> + <value value="25" name="A8XX_PERF_LRZ_RAS_MASK_TRANS"/> + <value value="26" name="A8XX_PERF_LRZ_STALL_CYCLES_MVC"/> + <value value="27" name="A8XX_PERF_LRZ_TILE_KILLED_BY_IMAGE_VRS"/> + <value value="28" name="A8XX_PERF_LRZ_TILE_KILLED_BY_Z"/> + <value value="29" name="A8XX_PERF_LRZ_STALL_CYCLES_HLSQ_BATCH"/> + <value value="30" name="A8XX_PERF_LRZ_NUM_FLOCK"/> + <value value="31" name="A8XX_PERF_LRZ_DRAWS_TOTAL"/> + <value value="32" name="A8XX_PERF_LRZ_DRAWS_LRZ_DIS"/> + <value value="33" name="A8XX_PERF_LRZ_DRAWS_LRZ_WRITE_DIS"/> +</enum> + +<enum name="a8xx_cmp_perfcounter_select"> + <value value="0" name="A8XX_PERF_CMPDECMP_NEVER_COUNT"/> + <value value="1" name="A8XX_PERF_CMPDECMP_STALL_CYCLES_ARB"/> + <value value="2" name="A8XX_PERF_CMPDECMP_VBIF_LATENCY_CYCLES"/> + <value value="3" name="A8XX_PERF_CMPDECMP_VBIF_LATENCY_SAMPLES"/> + <value value="4" name="A8XX_PERF_CMPDECMP_VBIF_READ_DATA_CCU"/> + <value value="5" name="A8XX_PERF_CMPDECMP_VBIF_WRITE_DATA_CCU"/> + <value value="6" name="A8XX_PERF_CMPDECMP_VBIF_READ_REQUEST"/> + <value value="7" name="A8XX_PERF_CMPDECMP_VBIF_WRITE_REQUEST"/> + <value value="8" name="A8XX_PERF_CMPDECMP_VBIF_READ_DATA"/> + <value value="9" name="A8XX_PERF_CMPDECMP_VBIF_WRITE_DATA"/> + <value value="10" name="A8XX_PERF_CMPDECMP_DEPTH_WRITE_FLAG1_COUNT"/> + <value value="11" name="A8XX_PERF_CMPDECMP_DEPTH_WRITE_FLAG2_COUNT"/> + <value value="12" name="A8XX_PERF_CMPDECMP_DEPTH_WRITE_FLAG3_COUNT"/> + <value value="13" name="A8XX_PERF_CMPDECMP_DEPTH_WRITE_FLAG4_COUNT"/> + <value value="14" name="A8XX_PERF_CMPDECMP_DEPTH_WRITE_FLAG5_COUNT"/> + <value value="15" name="A8XX_PERF_CMPDECMP_DEPTH_WRITE_FLAG6_COUNT"/> + <value value="16" name="A8XX_PERF_CMPDECMP_DEPTH_WRITE_FLAG8_COUNT"/> + <value value="17" name="A8XX_PERF_CMPDECMP_COLOR_WRITE_FLAG1_COUNT"/> + <value value="18" name="A8XX_PERF_CMPDECMP_COLOR_WRITE_FLAG2_COUNT"/> + <value value="19" name="A8XX_PERF_CMPDECMP_COLOR_WRITE_FLAG3_COUNT"/> + <value value="20" name="A8XX_PERF_CMPDECMP_COLOR_WRITE_FLAG4_COUNT"/> + <value value="21" name="A8XX_PERF_CMPDECMP_COLOR_WRITE_FLAG5_COUNT"/> + <value value="22" name="A8XX_PERF_CMPDECMP_COLOR_WRITE_FLAG6_COUNT"/> + <value value="23" name="A8XX_PERF_CMPDECMP_COLOR_WRITE_FLAG8_COUNT"/> + <value value="24" name="A8XX_PERF_CMPDECMP_VBIF_READ_DATA_UCHE_CH0"/> + <value value="25" name="A8XX_PERF_CMPDECMP_VBIF_READ_DATA_UCHE_CH1"/> + <value value="26" name="A8XX_PERF_CMPDECMP_VBIF_WRITE_DATA_UCHE"/> + <value value="27" name="A8XX_PERF_CMPDECMP_DEPTH_WRITE_FLAG0_COUNT"/> + <value value="28" name="A8XX_PERF_CMPDECMP_COLOR_WRITE_FLAG0_COUNT"/> + <value value="29" name="A8XX_PERF_CMPDECMP_COLOR_WRITE_FLAGALPHA_COUNT"/> + <value value="30" name="A8XX_PERF_CMPDECMP_CDP_FILTER_HIT"/> + <value value="31" name="A8XX_PERF_CMPDECMP_CDP_FILTER_MISS"/> + <value value="32" name="A8XX_PERF_CMPDECMP_DEPTH_READ_FLAG0_COUNT"/> + <value value="33" name="A8XX_PERF_CMPDECMP_DEPTH_READ_FLAG1_COUNT"/> + <value value="34" name="A8XX_PERF_CMPDECMP_DEPTH_READ_FLAG2_COUNT"/> + <value value="35" name="A8XX_PERF_CMPDECMP_DEPTH_READ_FLAG3_COUNT"/> + <value value="36" name="A8XX_PERF_CMPDECMP_DEPTH_READ_FLAG4_COUNT"/> + <value value="37" name="A8XX_PERF_CMPDECMP_DEPTH_READ_FLAG5_COUNT"/> + <value value="38" name="A8XX_PERF_CMPDECMP_DEPTH_READ_FLAG6_COUNT"/> + <value value="39" name="A8XX_PERF_CMPDECMP_DEPTH_READ_FLAG8_COUNT"/> + <value value="40" name="A8XX_PERF_CMPDECMP_COLOR_READ_FLAG0_COUNT"/> + <value value="41" name="A8XX_PERF_CMPDECMP_COLOR_READ_FLAG1_COUNT"/> + <value value="42" name="A8XX_PERF_CMPDECMP_COLOR_READ_FLAG2_COUNT"/> + <value value="43" name="A8XX_PERF_CMPDECMP_COLOR_READ_FLAG3_COUNT"/> + <value value="44" name="A8XX_PERF_CMPDECMP_COLOR_READ_FLAG4_COUNT"/> + <value value="45" name="A8XX_PERF_CMPDECMP_COLOR_READ_FLAG5_COUNT"/> + <value value="46" name="A8XX_PERF_CMPDECMP_COLOR_READ_FLAG6_COUNT"/> + <value value="47" name="A8XX_PERF_CMPDECMP_COLOR_READ_FLAG8_COUNT"/> + <value value="48" name="A8XX_PERF_CMPDECMP_UFC_REQ_CCU"/> + <value value="49" name="A8XX_PERF_CMPDECMP_UFC_REQ_UCHE"/> + <value value="50" name="A8XX_PERF_CMPDECMP_INPUT_STALL_CYCLES_ENC"/> + <value value="51" name="A8XX_PERF_CMPDECMP_ENC_STALL_CYCLES_UFC"/> + <value value="52" name="A8XX_PERF_CMPDECMP_ENC_STALL_CYCLES_GBIF_DATA"/> + <value value="53" name="A8XX_PERF_CMPDECMP_ENC_STALL_CYCLES_GBIF_CMD"/> + <value value="54" name="A8XX_PERF_CMPDECMP_INPUT_STALL_CYCLES_DEC"/> + <value value="55" name="A8XX_PERF_CMPDECMP_DEC_STALL_CYCLES_OUTPUT"/> + <value value="56" name="A8XX_PERF_CMPDECMP_REQ_SSEC_FLAG_COMP"/> + <value value="57" name="A8XX_PERF_CMPDECMP_CCU_EVENT_SKEW_STALL"/> + <value value="58" name="A8XX_PERF_CMPDECMP_CCU_BV_EVENT_SKEW_STALL"/> + <value value="59" name="A8XX_PERF_CMPDECMP_DEPTH_WRITE_FLAG7_COUNT"/> + <value value="60" name="A8XX_PERF_CMPDECMP_COLOR_WRITE_FLAG7_COUNT"/> + <value value="61" name="A8XX_PERF_CMPDECMP_DEPTH_READ_FLAG7_COUNT"/> + <value value="62" name="A8XX_PERF_CMPDECMP_COLOR_READ_FLAG7_COUNT"/> +</enum> + +<enum name="a8xx_gbif_perfcounter_select"> + <value value="0" name="A8XX_PERF_GBIF_NEVER_COUNT"/> + <value value="1" name="A8XX_PERF_GBIF_RESERVED_1"/> + <value value="2" name="A8XX_PERF_GBIF_RESERVED_2"/> + <value value="3" name="A8XX_PERF_GBIF_RESERVED_3"/> + <value value="4" name="A8XX_PERF_GBIF_RESERVED_4"/> + <value value="5" name="A8XX_PERF_GBIF_RESERVED_5"/> + <value value="6" name="A8XX_PERF_GBIF_RESERVED_6"/> + <value value="7" name="A8XX_PERF_GBIF_RESERVED_7"/> + <value value="8" name="A8XX_PERF_GBIF_RESERVED_8"/> + <value value="9" name="A8XX_PERF_GBIF_RESERVED_9"/> + <value value="10" name="A8XX_PERF_GBIF_AXI0_READ_REQUESTS_TOTAL"/> + <value value="11" name="A8XX_PERF_GBIF_AXI1_READ_REQUESTS_TOTAL"/> + <value value="12" name="A8XX_PERF_GBIF_RESERVED_12"/> + <value value="13" name="A8XX_PERF_GBIF_RESERVED_13"/> + <value value="14" name="A8XX_PERF_GBIF_RESERVED_14"/> + <value value="15" name="A8XX_PERF_GBIF_RESERVED_15"/> + <value value="16" name="A8XX_PERF_GBIF_RESERVED_16"/> + <value value="17" name="A8XX_PERF_GBIF_RESERVED_17"/> + <value value="18" name="A8XX_PERF_GBIF_RESERVED_18"/> + <value value="19" name="A8XX_PERF_GBIF_RESERVED_19"/> + <value value="20" name="A8XX_PERF_GBIF_RESERVED_20"/> + <value value="21" name="A8XX_PERF_GBIF_RESERVED_21"/> + <value value="22" name="A8XX_PERF_GBIF_AXI0_WRITE_REQUESTS_TOTAL"/> + <value value="23" name="A8XX_PERF_GBIF_AXI1_WRITE_REQUESTS_TOTAL"/> + <value value="24" name="A8XX_PERF_GBIF_RESERVED_24"/> + <value value="25" name="A8XX_PERF_GBIF_RESERVED_25"/> + <value value="26" name="A8XX_PERF_GBIF_RESERVED_26"/> + <value value="27" name="A8XX_PERF_GBIF_RESERVED_27"/> + <value value="28" name="A8XX_PERF_GBIF_RESERVED_28"/> + <value value="29" name="A8XX_PERF_GBIF_RESERVED_29"/> + <value value="30" name="A8XX_PERF_GBIF_RESERVED_30"/> + <value value="31" name="A8XX_PERF_GBIF_RESERVED_31"/> + <value value="32" name="A8XX_PERF_GBIF_RESERVED_32"/> + <value value="33" name="A8XX_PERF_GBIF_RESERVED_33"/> + <value value="34" name="A8XX_PERF_GBIF_AXI0_READ_DATA_BEATS_TOTAL"/> + <value value="35" name="A8XX_PERF_GBIF_AXI1_READ_DATA_BEATS_TOTAL"/> + <value value="36" name="A8XX_PERF_GBIF_RESERVED_36"/> + <value value="37" name="A8XX_PERF_GBIF_RESERVED_37"/> + <value value="38" name="A8XX_PERF_GBIF_RESERVED_38"/> + <value value="39" name="A8XX_PERF_GBIF_RESERVED_39"/> + <value value="40" name="A8XX_PERF_GBIF_RESERVED_40"/> + <value value="41" name="A8XX_PERF_GBIF_RESERVED_41"/> + <value value="42" name="A8XX_PERF_GBIF_RESERVED_42"/> + <value value="43" name="A8XX_PERF_GBIF_RESERVED_43"/> + <value value="44" name="A8XX_PERF_GBIF_RESERVED_44"/> + <value value="45" name="A8XX_PERF_GBIF_RESERVED_45"/> + <value value="46" name="A8XX_PERF_GBIF_AXI0_WRITE_DATA_BEATS_TOTAL"/> + <value value="47" name="A8XX_PERF_GBIF_AXI1_WRITE_DATA_BEATS_TOTAL"/> + <value value="48" name="A8XX_PERF_GBIF_RESERVED_48"/> + <value value="49" name="A8XX_PERF_GBIF_RESERVED_49"/> + <value value="50" name="A8XX_PERF_GBIF_RESERVED_50"/> + <value value="51" name="A8XX_PERF_GBIF_RESERVED_51"/> + <value value="52" name="A8XX_PERF_GBIF_RESERVED_52"/> + <value value="53" name="A8XX_PERF_GBIF_RESERVED_53"/> + <value value="54" name="A8XX_PERF_GBIF_RESERVED_54"/> + <value value="55" name="A8XX_PERF_GBIF_RESERVED_55"/> + <value value="56" name="A8XX_PERF_GBIF_RESERVED_56"/> + <value value="57" name="A8XX_PERF_GBIF_RESERVED_57"/> + <value value="58" name="A8XX_PERF_GBIF_RESERVED_58"/> + <value value="59" name="A8XX_PERF_GBIF_RESERVED_59"/> + <value value="60" name="A8XX_PERF_GBIF_RESERVED_60"/> + <value value="61" name="A8XX_PERF_GBIF_RESERVED_61"/> + <value value="62" name="A8XX_PERF_GBIF_RESERVED_62"/> + <value value="63" name="A8XX_PERF_GBIF_RESERVED_63"/> + <value value="64" name="A8XX_PERF_GBIF_RESERVED_64"/> + <value value="65" name="A8XX_PERF_GBIF_RESERVED_65"/> + <value value="66" name="A8XX_PERF_GBIF_RESERVED_66"/> + <value value="67" name="A8XX_PERF_GBIF_RESERVED_67"/> + <value value="68" name="A8XX_PERF_GBIF_CYCLES_CH0_HELD_OFF_RD_ALL"/> + <value value="69" name="A8XX_PERF_GBIF_CYCLES_CH1_HELD_OFF_RD_ALL"/> + <value value="70" name="A8XX_PERF_GBIF_CYCLES_CH0_HELD_OFF_WR_ALL"/> + <value value="71" name="A8XX_PERF_GBIF_CYCLES_CH1_HELD_OFF_WR_ALL"/> + <value value="72" name="A8XX_PERF_GBIF_AXI_CH0_REQUEST_HELD_OFF"/> + <value value="73" name="A8XX_PERF_GBIF_AXI_CH1_REQUEST_HELD_OFF"/> + <value value="74" name="A8XX_PERF_GBIF_AXI_REQUEST_HELD_OFF"/> + <value value="75" name="A8XX_PERF_GBIF_AXI_CH0_WRITE_DATA_HELD_OFF"/> + <value value="76" name="A8XX_PERF_GBIF_AXI_CH1_WRITE_DATA_HELD_OFF"/> + <value value="77" name="A8XX_PERF_GBIF_AXI_ALL_WRITE_DATA_HELD_OFF"/> + <value value="78" name="A8XX_PERF_GBIF_RESERVED_78"/> + <value value="79" name="A8XX_PERF_GBIF_RESERVED_79"/> + <value value="80" name="A8XX_PERF_GBIF_RESERVED_80"/> + <value value="81" name="A8XX_PERF_GBIF_RESERVED_81"/> + <value value="82" name="A8XX_PERF_GBIF_RESERVED_82"/> + <value value="83" name="A8XX_PERF_GBIF_RESERVED_83"/> + <value value="84" name="A8XX_PERF_GBIF_RESERVED_84"/> + <value value="85" name="A8XX_PERF_GBIF_RESERVED_85"/> + <value value="86" name="A8XX_PERF_GBIF_RESERVED_86"/> + <value value="87" name="A8XX_PERF_GBIF_RESERVED_87"/> + <value value="88" name="A8XX_PERF_GBIF_AXI2_READ_REQUESTS_TOTAL"/> + <value value="89" name="A8XX_PERF_GBIF_AXI3_READ_REQUESTS_TOTAL"/> + <value value="90" name="A8XX_PERF_GBIF_RESERVED_90"/> + <value value="91" name="A8XX_PERF_GBIF_RESERVED_91"/> + <value value="92" name="A8XX_PERF_GBIF_RESERVED_92"/> + <value value="93" name="A8XX_PERF_GBIF_RESERVED_93"/> + <value value="94" name="A8XX_PERF_GBIF_RESERVED_94"/> + <value value="95" name="A8XX_PERF_GBIF_RESERVED_95"/> + <value value="96" name="A8XX_PERF_GBIF_RESERVED_96"/> + <value value="97" name="A8XX_PERF_GBIF_RESERVED_97"/> + <value value="98" name="A8XX_PERF_GBIF_RESERVED_98"/> + <value value="99" name="A8XX_PERF_GBIF_RESERVED_99"/> + <value value="100" name="A8XX_PERF_GBIF_AXI2_WRITE_REQUESTS_TOTAL"/> + <value value="101" name="A8XX_PERF_GBIF_AXI3_WRITE_REQUESTS_TOTAL"/> + <value value="102" name="A8XX_PERF_GBIF_RESERVED_102"/> + <value value="103" name="A8XX_PERF_GBIF_RESERVED_103"/> + <value value="104" name="A8XX_PERF_GBIF_RESERVED_104"/> + <value value="105" name="A8XX_PERF_GBIF_RESERVED_105"/> + <value value="106" name="A8XX_PERF_GBIF_RESERVED_106"/> + <value value="107" name="A8XX_PERF_GBIF_RESERVED_107"/> + <value value="108" name="A8XX_PERF_GBIF_RESERVED_108"/> + <value value="109" name="A8XX_PERF_GBIF_RESERVED_109"/> + <value value="110" name="A8XX_PERF_GBIF_RESERVED_110"/> + <value value="111" name="A8XX_PERF_GBIF_RESERVED_111"/> + <value value="112" name="A8XX_PERF_GBIF_AXI2_READ_DATA_BEATS_TOTAL"/> + <value value="113" name="A8XX_PERF_GBIF_AXI3_READ_DATA_BEATS_TOTAL"/> + <value value="114" name="A8XX_PERF_GBIF_RESERVED_114"/> + <value value="115" name="A8XX_PERF_GBIF_RESERVED_115"/> + <value value="116" name="A8XX_PERF_GBIF_RESERVED_116"/> + <value value="117" name="A8XX_PERF_GBIF_RESERVED_117"/> + <value value="118" name="A8XX_PERF_GBIF_RESERVED_118"/> + <value value="119" name="A8XX_PERF_GBIF_RESERVED_119"/> + <value value="120" name="A8XX_PERF_GBIF_RESERVED_120"/> + <value value="121" name="A8XX_PERF_GBIF_RESERVED_121"/> + <value value="122" name="A8XX_PERF_GBIF_RESERVED_122"/> + <value value="123" name="A8XX_PERF_GBIF_RESERVED_123"/> + <value value="124" name="A8XX_PERF_GBIF_AXI2_WRITE_DATA_BEATS_TOTAL"/> + <value value="125" name="A8XX_PERF_GBIF_AXI3_WRITE_DATA_BEATS_TOTAL"/> + <value value="126" name="A8XX_PERF_GBIF_RESERVED_126"/> + <value value="127" name="A8XX_PERF_GBIF_RESERVED_127"/> + <value value="128" name="A8XX_PERF_GBIF_RESERVED_128"/> + <value value="129" name="A8XX_PERF_GBIF_RESERVED_129"/> + <value value="130" name="A8XX_PERF_GBIF_RESERVED_130"/> + <value value="131" name="A8XX_PERF_GBIF_RESERVED_131"/> + <value value="132" name="A8XX_PERF_GBIF_RESERVED_132"/> + <value value="133" name="A8XX_PERF_GBIF_RESERVED_133"/> + <value value="134" name="A8XX_PERF_GBIF_RESERVED_134"/> + <value value="135" name="A8XX_PERF_GBIF_RESERVED_135"/> + <value value="136" name="A8XX_PERF_GBIF_RESERVED_136"/> + <value value="137" name="A8XX_PERF_GBIF_RESERVED_137"/> + <value value="138" name="A8XX_PERF_GBIF_RESERVED_138"/> + <value value="139" name="A8XX_PERF_GBIF_RESERVED_139"/> + <value value="140" name="A8XX_PERF_GBIF_RESERVED_140"/> + <value value="141" name="A8XX_PERF_GBIF_RESERVED_141"/> + <value value="142" name="A8XX_PERF_GBIF_RESERVED_142"/> + <value value="143" name="A8XX_PERF_GBIF_RESERVED_143"/> + <value value="144" name="A8XX_PERF_GBIF_RESERVED_144"/> + <value value="145" name="A8XX_PERF_GBIF_RESERVED_145"/> + <value value="146" name="A8XX_PERF_GBIF_RESERVED_146"/> + <value value="147" name="A8XX_PERF_GBIF_RESERVED_147"/> + <value value="148" name="A8XX_PERF_GBIF_RESERVED_148"/> + <value value="149" name="A8XX_PERF_GBIF_RESERVED_149"/> + <value value="150" name="A8XX_PERF_GBIF_READ_BEAT_ALL_CHANNELS"/> + <value value="151" name="A8XX_PERF_GBIF_WRITE_BEAT_ALL_CHANNELS"/> + <value value="152" name="A8XX_PERF_GBIF_READ_AND_WRITE_BEAT_ALL_CHANNELS"/> + <value value="153" name="A8XX_PERF_GBIF_RESERVED_153"/> + <value value="154" name="A8XX_PERF_GBIF_RESERVED_154"/> + <value value="155" name="A8XX_PERF_GBIF_RESERVED_155"/> + <value value="156" name="A8XX_PERF_GBIF_CYCLES_CH2_HELD_OFF_RD_ALL"/> + <value value="157" name="A8XX_PERF_GBIF_CYCLES_CH3_HELD_OFF_RD_ALL"/> + <value value="158" name="A8XX_PERF_GBIF_CYCLES_CH2_HELD_OFF_WR_ALL"/> + <value value="159" name="A8XX_PERF_GBIF_CYCLES_CH3_HELD_OFF_WR_ALL"/> + <value value="160" name="A8XX_PERF_GBIF_AXI_CH2_REQUEST_HELD_OFF"/> + <value value="161" name="A8XX_PERF_GBIF_AXI_CH3_REQUEST_HELD_OFF"/> + <value value="162" name="A8XX_PERF_GBIF_AXI_CH2_WRITE_DATA_HELD_OFF"/> + <value value="163" name="A8XX_PERF_GBIF_AXI_CH3_WRITE_DATA_HELD_OFF"/> + <value value="164" name="A8XX_PERF_GBIF_AXI_CH0_READ_PENDING_SUM"/> + <value value="165" name="A8XX_PERF_GBIF_AXI_CH1_READ_PENDING_SUM"/> + <value value="166" name="A8XX_PERF_GBIF_AXI_CH2_READ_PENDING_SUM"/> + <value value="167" name="A8XX_PERF_GBIF_AXI_CH3_READ_PENDING_SUM"/> + <value value="168" name="A8XX_PERF_GBIF_AXI_CH0_WRITE_PENDING_SUM"/> + <value value="169" name="A8XX_PERF_GBIF_AXI_CH1_WRITE_PENDING_SUM"/> + <value value="170" name="A8XX_PERF_GBIF_AXI_CH2_WRITE_PENDING_SUM"/> + <value value="171" name="A8XX_PERF_GBIF_AXI_CH3_WRITE_PENDING_SUM"/> + <value value="172" name="A8XX_PERF_GBIF_SMMU_RD_REQ_SAMPLES_CH0"/> + <value value="173" name="A8XX_PERF_GBIF_SMMU_RD_REQ_CYCLES_CH0"/> + <value value="174" name="A8XX_PERF_GBIF_RBUF_RD_REQ_SAMPLES_CH0"/> + <value value="175" name="A8XX_PERF_GBIF_RBUF_RD_REQ_CYCLES_CH0"/> + <value value="176" name="A8XX_PERF_GBIF_RBUF_UTIL_SAMPLES_CH0"/> + <value value="177" name="A8XX_PERF_GBIF_RBUF_BUSY_CYCLES_CH0"/> + <value value="178" name="A8XX_PERF_GBIF_RTABLE_OT_UTIL_SAMPLES_CH0"/> + <value value="179" name="A8XX_PERF_GBIF_SMMU_RD_REQ_SAMPLES_CH1"/> + <value value="180" name="A8XX_PERF_GBIF_SMMU_RD_REQ_CYCLES_CH1"/> + <value value="181" name="A8XX_PERF_GBIF_RBUF_RD_REQ_SAMPLES_CH1"/> + <value value="182" name="A8XX_PERF_GBIF_RBUF_RD_REQ_CYCLES_CH1"/> + <value value="183" name="A8XX_PERF_GBIF_RBUF_UTIL_SAMPLES_CH1"/> + <value value="184" name="A8XX_PERF_GBIF_RBUF_BUSY_CYCLES_CH1"/> + <value value="185" name="A8XX_PERF_GBIF_RTABLE_OT_UTIL_SAMPLES_CH1"/> + <value value="186" name="A8XX_PERF_GBIF_SMMU_RD_REQ_SAMPLES_CH2"/> + <value value="187" name="A8XX_PERF_GBIF_SMMU_RD_REQ_CYCLES_CH2"/> + <value value="188" name="A8XX_PERF_GBIF_RBUF_RD_REQ_SAMPLES_CH2"/> + <value value="189" name="A8XX_PERF_GBIF_RBUF_RD_REQ_CYCLES_CH2"/> + <value value="190" name="A8XX_PERF_GBIF_RBUF_UTIL_SAMPLES_CH2"/> + <value value="191" name="A8XX_PERF_GBIF_RBUF_BUSY_CYCLES_CH2"/> + <value value="192" name="A8XX_PERF_GBIF_RTABLE_OT_UTIL_SAMPLES_CH2"/> + <value value="193" name="A8XX_PERF_GBIF_SMMU_RD_REQ_SAMPLES_CH3"/> + <value value="194" name="A8XX_PERF_GBIF_SMMU_RD_REQ_CYCLES_CH3"/> + <value value="195" name="A8XX_PERF_GBIF_RBUF_RD_REQ_SAMPLES_CH3"/> + <value value="196" name="A8XX_PERF_GBIF_RBUF_RD_REQ_CYCLES_CH3"/> + <value value="197" name="A8XX_PERF_GBIF_RBUF_UTIL_SAMPLES_CH3"/> + <value value="198" name="A8XX_PERF_GBIF_RBUF_BUSY_CYCLES_CH3"/> + <value value="199" name="A8XX_PERF_GBIF_RTABLE_OT_UTIL_SAMPLES_CH3"/> +</enum> + +<enum name="a8xx_gbif_pwr_perfcounter_select"> + <value value="0" name="A8XX_M0_STALL"/> + <value value="1" name="A8XX_M1_STALL"/> + <value value="2" name="A8XX_M0_NO_CLIENT_REQ"/> + <value value="3" name="A8XX_M1_NO_CLIENT_REQ"/> +</enum> + +<enum name="a8xx_alwayson_perfcounter_select"> + <value value="0" name="A8XX_PERF_ALWAYSON"/> +</enum> + +<enum name="a8xx_gmu_xoclk_perfcounter_select"> + <value value="0" name="A8XX_PERF_GMU_XOCLK_NO_COUNT"/> + <value value="1" name="A8XX_PERF_GMU_PWC_GPU_TOP_POWER_ON_CYCLES"/> + <value value="2" name="A8XX_PERF_GMU_PWC_GPU_TOP_POWER_OFF_SWITCHING_CYCLES"/> + <value value="3" name="A8XX_PERF_GMU_PWC_GPU_TOP_POWER_OFF_CYCLES"/> + <value value="4" name="A8XX_PERF_GMU_PWC_GPU_TOP_POWER_ON_SWITCHING_CYCLES"/> + <value value="5" name="A8XX_PERF_GMU_PWC_GPU_TOP_POWER_COLLAPSED"/> + <value value="6" name="A8XX_PERF_GMU_PWC_SPTPRAC_POWER_ON_CYCLES"/> + <value value="7" name="A8XX_PERF_GMU_PWC_SPTPRAC_POWER_OFF_SWITCHING_CYCLES"/> + <value value="8" name="A8XX_PERF_GMU_PWC_SPTPRAC_POWER_OFF_CYCLES"/> + <value value="9" name="A8XX_PERF_GMU_PWC_SPTPRAC_POWER_ON_SWITCHING_CYCLES"/> + <value value="10" name="A8XX_PERF_GMU_PWC_SPTPRAC_POWER_COLLAPSED"/> + <value value="11" name="A8XX_PERF_GMU_PWC_GPU_TOP_IDLE_PHASE1_IN_LATENCY"/> + <value value="12" name="A8XX_PERF_GMU_PWC_GPU_TOP_IDLE_PHASE2_IN_LATENCY"/> + <value value="13" name="A8XX_PERF_GMU_PWC_GPU_TOP_IDLE_PHASE1_OUT_LATENCY"/> + <value value="14" name="A8XX_PERF_GMU_PWC_GPU_TOP_IDLE_PHASE2_OUT_LATENCY"/> + <value value="15" name="A8XX_PERF_GMU_PWC_TOTAL_THROTTLED_CYCLES"/> + <value value="16" name="A8XX_PERF_GMU_PWC_SID_N_TOTAL_THROTTLED_CYCLES_0"/> + <value value="17" name="A8XX_PERF_GMU_PWC_SID_N_TOTAL_THROTTLED_CYCLES_1"/> + <value value="18" name="A8XX_PERF_GMU_PWC_SID_N_TOTAL_THROTTLED_CYCLES_2"/> + <value value="19" name="A8XX_PERF_GMU_PWC_SID_N_TOTAL_THROTTLED_CYCLES_3"/> + <value value="20" name="A8XX_PERF_GMU_PWC_SID_N_TOTAL_THROTTLED_CYCLES_4"/> + <value value="21" name="A8XX_PERF_GMU_PWC_SID_N_TOTAL_THROTTLED_CYCLES_5"/> + <value value="22" name="A8XX_PERF_GMU_PWC_SID_N_TOTAL_THROTTLED_CYCLES_6"/> + <value value="23" name="A8XX_PERF_GMU_PWC_SID_N_TOTAL_THROTTLED_CYCLES_7"/> + <value value="24" name="A8XX_PERF_GMU_PWC_SID_N_TOTAL_THROTTLED_CYCLES_8"/> + <value value="25" name="A8XX_PERF_GMU_PWC_SID_N_TOTAL_THROTTLED_CYCLES_9"/> + <value value="26" name="A8XX_PERF_GMU_PWC_NUM_GDPM_EPOCH_TOO_SHORT_INTR"/> + <value value="27" name="A8XX_PERF_GMU_PWC_NUM_GDPM_INVALID_OPCODE_INTR"/> + <value value="28" name="A8XX_PERF_GMU_PWC_NUM_MAX_THRESHOLD_BREACH"/> + <value value="29" name="A8XX_PERF_GMU_PWC_NUM_MIN_THRESHOLD_BREACH"/> + <value value="30" name="A8XX_PERF_GMU_PWC_CYCLE_MAX_THRESHOLD_BREACH"/> + <value value="31" name="A8XX_PERF_GMU_PWC_CYCLE_MIN_THRESHOLD_BREACH"/> + <value value="32" name="A8XX_PERF_GMU_PWC_GPU_BUSY_CYCLES"/> + <value value="33" name="A8XX_PERF_GMU_PWC_CYCLE_ALM_LOW_THROTTLE_CYCLES"/> + <value value="34" name="A8XX_PERF_GMU_PWC_CYCLE_ALM_HIGH_THROTTLE_CYCLES"/> + <value value="35" name="A8XX_PERF_GMU_PWC_SID_N_TOTAL_THROTTLED_CYCLES_10"/> + <value value="36" name="A8XX_PERF_GMU_PWC_SID_N_TOTAL_THROTTLED_CYCLES_11"/> + <value value="37" name="A8XX_PERF_GMU_PWC_CLX_TOTAL_THROTTLE_CYCLES"/> + <value value="38" name="A8XX_PERF_GMU_PWC_BCL_TOTAL_THROTTLE_CYCLES"/> + <value value="39" name="A8XX_PERF_GMU_PWC_MSG2_CLX_TOTAL_THROTTLE_CYCLES"/> + <value value="40" name="A8XX_PERF_GMU_XOCLK_RESERVED_40"/> + <value value="41" name="A8XX_PERF_GMU_XOCLK_RESERVED_41"/> + <value value="42" name="A8XX_PERF_GMU_XOCLK_RESERVED_42"/> + <value value="43" name="A8XX_PERF_GMU_XOCLK_RESERVED_43"/> + <value value="44" name="A8XX_PERF_GMU_XOCLK_RESERVED_44"/> + <value value="45" name="A8XX_PERF_GMU_XOCLK_RESERVED_45"/> + <value value="46" name="A8XX_PERF_GMU_XOCLK_RESERVED_46"/> + <value value="47" name="A8XX_PERF_GMU_XOCLK_RESERVED_47"/> + <value value="48" name="A8XX_PERF_GMU_PWC_LPAC_BUSY_CYCLES"/> + <value value="49" name="A8XX_PERF_GMU_PWC_GPU_OR_LPAC_BUSY_CYCLES"/> + <value value="50" name="A8XX_PERF_GMU_PWC_GX_THROTTLE_VIOLATION_CNT"/> + <value value="51" name="A8XX_PERF_GMU_PWC_MX_THROTTLE_VIOLATION_CNT"/> + <value value="52" name="A8XX_PERF_GMU_PWC_PWRLIMITS_PREBUCK_THROTTLE_CYCLES"/> + <value value="53" name="A8XX_PERF_GMU_PWC_GX_IFF_THROTTLE_CYCLES"/> + <value value="54" name="A8XX_PERF_GMU_PWC_GFX_PCLX_THROTTLE_CYCLES"/> + <value value="55" name="A8XX_PERF_GMU_PWC_MX_IFF_THROTTLE_CYCLES"/> + <value value="56" name="A8XX_PERF_GMU_PWC_MX_PCLX_THROTTLE_CYCLES"/> + <value value="57" name="A8XX_PERF_GMU_XOCLK_RESERVED_57"/> + <value value="58" name="A8XX_PERF_GMU_XOCLK_RESERVED_58"/> + <value value="59" name="A8XX_PERF_GMU_XOCLK_RESERVED_59"/> + <value value="60" name="A8XX_PERF_GMU_XOCLK_RESERVED_60"/> + <value value="61" name="A8XX_PERF_GMU_XOCLK_RESERVED_61"/> + <value value="62" name="A8XX_PERF_GMU_XOCLK_RESERVED_62"/> + <value value="63" name="A8XX_PERF_GMU_XOCLK_RESERVED_63"/> + <value value="64" name="A8XX_PERF_GMU_PWC_GXDCE_HIST_BIN_0_CNT"/> + <value value="65" name="A8XX_PERF_GMU_PWC_GXDCE_HIST_BIN_1_CNT"/> + <value value="66" name="A8XX_PERF_GMU_PWC_GXDCE_HIST_BIN_2_CNT"/> + <value value="67" name="A8XX_PERF_GMU_PWC_GXDCE_HIST_BIN_3_CNT"/> + <value value="68" name="A8XX_PERF_GMU_PWC_GXDCE_HIST_BIN_4_CNT"/> + <value value="69" name="A8XX_PERF_GMU_PWC_GXDCE_HIST_BIN_5_CNT"/> + <value value="70" name="A8XX_PERF_GMU_PWC_GXDCE_HIST_BIN_6_CNT"/> + <value value="71" name="A8XX_PERF_GMU_PWC_GXDCE_HIST_BIN_7_CNT"/> + <value value="72" name="A8XX_PERF_GMU_PWC_GXDCE_HIST_BIN_8_CNT"/> + <value value="73" name="A8XX_PERF_GMU_PWC_GXDCE_HIST_BIN_9_CNT"/> + <value value="74" name="A8XX_PERF_GMU_PWC_GXDCE_HIST_BIN_10_CNT"/> + <value value="75" name="A8XX_PERF_GMU_PWC_GXDCE_HIST_BIN_11_CNT"/> + <value value="76" name="A8XX_PERF_GMU_PWC_GXDCE_HIST_BIN_12_CNT"/> + <value value="77" name="A8XX_PERF_GMU_PWC_GXDCE_HIST_BIN_13_CNT"/> + <value value="78" name="A8XX_PERF_GMU_PWC_GXDCE_HIST_BIN_14_CNT"/> + <value value="79" name="A8XX_PERF_GMU_PWC_GXDCE_HIST_BIN_15_CNT"/> + <value value="80" name="A8XX_PERF_GMU_PWC_GXCDYN_HIST_BIN_0_CNT"/> + <value value="81" name="A8XX_PERF_GMU_PWC_GXCDYN_HIST_BIN_1_CNT"/> + <value value="82" name="A8XX_PERF_GMU_PWC_GXCDYN_HIST_BIN_2_CNT"/> + <value value="83" name="A8XX_PERF_GMU_PWC_GXCDYN_HIST_BIN_3_CNT"/> + <value value="84" name="A8XX_PERF_GMU_PWC_GXCDYN_HIST_BIN_4_CNT"/> + <value value="85" name="A8XX_PERF_GMU_PWC_GXCDYN_HIST_BIN_5_CNT"/> + <value value="86" name="A8XX_PERF_GMU_PWC_GXCDYN_HIST_BIN_6_CNT"/> + <value value="87" name="A8XX_PERF_GMU_PWC_GXCDYN_HIST_BIN_7_CNT"/> + <value value="88" name="A8XX_PERF_GMU_PWC_GXCDYN_HIST_BIN_8_CNT"/> + <value value="89" name="A8XX_PERF_GMU_PWC_GXCDYN_HIST_BIN_9_CNT"/> + <value value="90" name="A8XX_PERF_GMU_PWC_GXCDYN_HIST_BIN_10_CNT"/> + <value value="91" name="A8XX_PERF_GMU_PWC_GXCDYN_HIST_BIN_11_CNT"/> + <value value="92" name="A8XX_PERF_GMU_PWC_GXCDYN_HIST_BIN_12_CNT"/> + <value value="93" name="A8XX_PERF_GMU_PWC_GXCDYN_HIST_BIN_13_CNT"/> + <value value="94" name="A8XX_PERF_GMU_PWC_GXCDYN_HIST_BIN_14_CNT"/> + <value value="95" name="A8XX_PERF_GMU_PWC_GXCDYN_HIST_BIN_15_CNT"/> +</enum> + +<enum name="a8xx_gmu_gmuclk_perfcounter_select"> + <value value="0" name="A8XX_PERF_GMUCLK_NO_COUNT"/> + <value value="1" name="A8XX_PERF_GMU_PWC_FW_INCREMENT_0"/> + <value value="2" name="A8XX_PERF_GMU_PWC_FW_INCREMENT_1"/> + <value value="3" name="A8XX_PERF_GMU_PWC_FW_INCREMENT_2"/> + <value value="4" name="A8XX_PERF_GMU_PWC_FW_INCREMENT_3"/> + <value value="5" name="A8XX_PERF_GMU_PWC_FW_INCREMENT_4"/> + <value value="6" name="A8XX_PERF_GMU_PWC_FW_INCREMENT_5"/> + <value value="7" name="A8XX_PERF_GMU_PWC_FW_INCREMENT_6"/> + <value value="8" name="A8XX_PERF_GMU_PWC_FW_INCREMENT_7"/> + <value value="9" name="A8XX_PERF_GMU_PWC_SOC_MIN_IDLE_VOTE"/> + <value value="10" name="A8XX_PERF_GMU_PWC_TSENSE_CBCAST_MSG"/> + <value value="11" name="A8XX_PERF_GMU_PWC_MXA_CBCAST_MSG"/> + <value value="12" name="A8XX_PERF_GMU_PWC_CX_CBCAST_MSG"/> + <value value="13" name="A8XX_PERF_GMU_PWC_GX_CBCAST_MSG"/> + <value value="14" name="A8XX_PERF_GMU_PWC_BCL_CBCAST_MSG"/> + <value value="15" name="A8XX_PERF_GMU_PWC_MXC_CBCAST_MSG"/> + <value value="16" name="A8XX_PERF_GMU_PWC_GENERIC_CBCAST_MSG"/> + <value value="17" name="A8XX_PERF_GMU_PWC_PMIC_CBCAST_MSG"/> + <value value="18" name="A8XX_PERF_GMU_PWC_CLX_CBCAST_MSG"/> + <value value="19" name="A8XX_PERF_GMU_PWC_PMICARB_CLX_IRQ"/> + <value value="20" name="A8XX_PERF_GMU_PWC_CLX_EXPIRE_CBCAST_MSG"/> + <value value="21" name="A8XX_PERF_GMU_PWC_PMICARB_CLX_EXPIRE_IRQ"/> + <value value="22" name="A8XX_PERF_GMU_PWC_MXC_CLX_CBCAST_MSG"/> + <value value="23" name="A8XX_PERF_GMU_PWC_BCL_TYPE_0_CBCAST_MSG"/> + <value value="24" name="A8XX_PERF_GMU_PWC_BCL_TYPE_1_CBCAST_MSG"/> + <value value="25" name="A8XX_PERF_GMU_PWC_BCL_TYPE_2_CBCAST_MSG"/> + <value value="26" name="A8XX_PERF_GMU_PWC_PMIC_MSG2_CBCAST_MSG"/> + <value value="27" name="A8XX_PERF_GMU_PWC_MSG2_CLX_START_CBCAST_MSG"/> + <value value="28" name="A8XX_PERF_GMU_PWC_MSG2_CLX_EXPIRE_CBCAST_MSG"/> + <value value="29" name="A8XX_PERF_GMU_PWC_GX_VRM_1_CBCAST_MSG"/> + <value value="30" name="A8XX_PERF_GMU_GMUCLK_RESERVED_30"/> + <value value="31" name="A8XX_PERF_GMU_GMUCLK_RESERVED_31"/> + <value value="32" name="A8XX_PERF_GMU_PWC_CTXT_DONE_BR"/> + <value value="33" name="A8XX_PERF_GMU_PWC_CTXT_DONE_BV"/> + <value value="34" name="A8XX_PERF_GMU_PWC_CTXT_DONE_LPAC"/> + <value value="35" name="A8XX_PERF_GMU_PWC_GFX_IFF_MSG"/> + <value value="36" name="A8XX_PERF_GMU_PWC_GFX_PCLX_MSG"/> + <value value="37" name="A8XX_PERF_GMU_PWC_MX_IFF_MSG"/> + <value value="38" name="A8XX_PERF_GMU_PWC_MX_PCLX_MSG"/> +</enum> + +<enum name="a8xx_gmu_perf_perfcounter_select"> + <value value="0" name="A8XX_PERF_GMU_NO_COUNT"/> + <value value="1" name="A8XX_PERF_GMU_CM3_BUSY_CYCLES"/> + <value value="2" name="A8XX_PERF_GMU_DATA_CACHE_READ"/> + <value value="3" name="A8XX_PERF_GMU_DATA_CACHE_READ_MISS"/> + <value value="4" name="A8XX_PERF_GMU_DATA_CACHE_WRITE"/> + <value value="5" name="A8XX_PERF_GMU_DATA_CACHE_WRITE_MISS"/> + <value value="6" name="A8XX_PERF_GMU_DATA_CACHE_EVICTIONS"/> + <value value="7" name="A8XX_PERF_GMU_L0_INSTRUCTION_CACHE_READ"/> + <value value="8" name="A8XX_PERF_GMU_L0_INSTRUCTION_CACHE_READ_MISS"/> + <value value="9" name="A8XX_PERF_GMU_L1_INSTRUCTION_CACHE_READ"/> + <value value="10" name="A8XX_PERF_GMU_L1_INSTRUCTION_CACHE_READ_MISS"/> + <value value="11" name="A8XX_PERF_GMU_SYS_AHB_WRITE"/> + <value value="12" name="A8XX_PERF_GMU_SYS_AHB_WRITE_BUFFER_MISS"/> + <value value="13" name="A8XX_PERF_GMU_SYS_AHB_READ"/> + <value value="14" name="A8XX_PERF_GMU_SYS_AHB_READ_BUFFER_MISS"/> + <value value="15" name="A8XX_PERF_GMU_DATA_CACHE_AHB_TOTAL_WAIT_CYCLES"/> + <value value="16" name="A8XX_PERF_GMU_DATA_CACHE_VBIF_TOTAL_WAIT_CYCLES"/> + <value value="17" name="A8XX_PERF_GMU_INSTRUCTION_CACHE_AHB_TOTAL_WAIT_CYCLES"/> + <value value="18" name="A8XX_PERF_GMU_INSTRUCTION_CACHE_VBIF_TOTAL_WAIT_CYCLES"/> + <value value="19" name="A8XX_PERF_GMU_SYS_VBIF_AHB_TOTAL_WAIT_CYCLES"/> + <value value="20" name="A8XX_PERF_GMU_SYS__AHB_TOTAL_WAIT_CYCLES"/> + <value value="21" name="A8XX_PERF_GMU_TOTAL_INTR_LATENCY_CYCLES"/> + <value value="22" name="A8XX_PERF_GMU_WORST_INTR_LATENCY_CYCLES"/> + <value value="23" name="A8XX_PERF_GMU_TOTAL_OUTSTANDING_INTERRUPTS"/> +</enum> + +<enum name="a8xx_ufc_perfcounter_select"> + <value value="0" name="A8XX_PERF_UFC_NEVER_COUNT"/> + <value value="1" name="A8XX_PERF_UFC_BUSY_CYCLES"/> + <value value="2" name="A8XX_PERF_UFC_READ_DATA_VBIF"/> + <value value="3" name="A8XX_PERF_UFC_WRITE_DATA_VBIF"/> + <value value="4" name="A8XX_PERF_UFC_READ_REQUEST_VBIF"/> + <value value="5" name="A8XX_PERF_UFC_WRITE_REQUEST_VBIF"/> + <value value="6" name="A8XX_PERF_UFC_MAIN_HIT_CRE_PREFETCH"/> + <value value="7" name="A8XX_PERF_UFC_MAIN_HIT_SP_PREFETCH"/> + <value value="8" name="A8XX_PERF_UFC_MAIN_HIT_TP_PREFETCH"/> + <value value="9" name="A8XX_PERF_UFC_MAIN_HIT_UBWC_READ"/> + <value value="10" name="A8XX_PERF_UFC_MAIN_HIT_UBWC_WRITE"/> + <value value="11" name="A8XX_PERF_UFC_MAIN_MISS_CRE_PREFETCH"/> + <value value="12" name="A8XX_PERF_UFC_MAIN_MISS_SP_PREFETCH"/> + <value value="13" name="A8XX_PERF_UFC_MAIN_MISS_TP_PREFETCH"/> + <value value="14" name="A8XX_PERF_UFC_MAIN_MISS_UBWC_READ"/> + <value value="15" name="A8XX_PERF_UFC_MAIN_MISS_UBWC_WRITE"/> + <value value="16" name="A8XX_PERF_UFC_MAIN_UBWC_RD_NRDY"/> + <value value="17" name="A8XX_PERF_UFC_MAIN_UBWC_RD_RDY"/> + <value value="18" name="A8XX_PERF_UFC_MAIN_TP_RD_NRDY"/> + <value value="19" name="A8XX_PERF_UFC_MAIN_TP_RD_RDY"/> + <value value="20" name="A8XX_PERF_UFC_STALL_CYCLES_GBIF_CMD"/> + <value value="21" name="A8XX_PERF_UFC_STALL_CYCLES_GBIF_RDATA"/> + <value value="22" name="A8XX_PERF_UFC_STALL_CYCLES_GBIF_WDATA"/> + <value value="23" name="A8XX_PERF_UFC_STALL_CYCLES_UBWC_WR_FLAG"/> + <value value="24" name="A8XX_PERF_UFC_STALL_CYCLES_UBWC_FLAG_RTN"/> + <value value="25" name="A8XX_PERF_UFC_STALL_CYCLES_UBWC_EVENT"/> + <value value="26" name="A8XX_PERF_UFC_UBWC_REQ_STALLED_CYCLES"/> + <value value="27" name="A8XX_PERF_UFC_UBWC_RD_STALLED_CYCLES"/> + <value value="28" name="A8XX_PERF_UFC_UBWC_WR_STALLED_CYCLES"/> + <value value="29" name="A8XX_PERF_UFC_PREFETCH_STALLED_CYCLES"/> + <value value="30" name="A8XX_PERF_UFC_EVICTION_STALLED_CYCLES"/> + <value value="31" name="A8XX_PERF_UFC_LOCK_STALLED_CYCLES"/> + <value value="32" name="A8XX_PERF_UFC_MISS_LATENCY_CYCLES"/> + <value value="33" name="A8XX_PERF_UFC_MISS_LATENCY_SAMPLES"/> + <value value="34" name="A8XX_PERF_UFC_L1_CRE_REQUESTS"/> + <value value="35" name="A8XX_PERF_UFC_L1_CRE_STALLED_CYCLES"/> + <value value="36" name="A8XX_PERF_UFC_L1_CRE_FILTER_HIT"/> + <value value="37" name="A8XX_PERF_UFC_L1_CRE_FILTER_MISS"/> + <value value="38" name="A8XX_PERF_UFC_L1_SP_REQUESTS"/> + <value value="39" name="A8XX_PERF_UFC_L1_SP_STALLED_CYCLES"/> + <value value="40" name="A8XX_PERF_UFC_L1_SP_FILTER_HIT"/> + <value value="41" name="A8XX_PERF_UFC_L1_SP_FILTER_MISS"/> + <value value="42" name="A8XX_PERF_UFC_L1_TP_HINT_REQUESTS"/> + <value value="43" name="A8XX_PERF_UFC_L1_TP_STALLED_CYCLES"/> + <value value="44" name="A8XX_PERF_UFC_L1_TP_HINT_TAG_MISS"/> + <value value="45" name="A8XX_PERF_UFC_L1_TP_HINT_TAG_HIT_RDY"/> + <value value="46" name="A8XX_PERF_UFC_L1_TP_HINT_TAG_HIT_NRDY"/> + <value value="47" name="A8XX_PERF_UFC_AUTO_EVICTIONS"/> + <value value="48" name="A8XX_PERF_UFC_PARTIAL_EVICTIONS"/> +</enum> + +</database> diff --git a/drivers/gpu/drm/msm/registers/adreno/adreno_common.xml b/drivers/gpu/drm/msm/registers/adreno/adreno_common.xml index 79d204f1e400..195cee078357 100644 --- a/drivers/gpu/drm/msm/registers/adreno/adreno_common.xml +++ b/drivers/gpu/drm/msm/registers/adreno/adreno_common.xml @@ -14,6 +14,27 @@ xsi:schemaLocation="https://gitlab.freedesktop.org/freedreno/ rules-fd.xsd"> <value name="A8XX" value="8"/> </enum> +<enum name="desctype" bare="yes"> + <value name="DESC_NONE" value="0"/> + <doc> + TEX_MEMOBJ descriptor types. These are used + to mark fields that only apply to certain + descriptor types, and potentially overlap + with fields in other types. + </doc> + <value name="DESC_SINGLE_PLANE" value="1"/> + <value name="DESC_MULTI_PLANE" value="2"/> + <value name="DESC_BUFFER" value="3"/> + <value name="DESC_WEIGHT" value="4"/> + <doc> + Additional descriptor types not part of + TEX_MEMOBJ. These are described by their + own toplevel domain. + </doc> + <value name="DESC_SAMPLER" value="10"/> <!-- AxXX_UBO --> + <value name="DESC_UBO" value="11"/> <!-- A6XX_UBO, same on gen8 --> +</enum> + <enum name="adreno_pa_su_sc_draw"> <value name="PC_DRAW_POINTS" value="0"/> <value name="PC_DRAW_LINES" value="1"/> @@ -409,4 +430,25 @@ xsi:schemaLocation="https://gitlab.freedesktop.org/freedreno/ rules-fd.xsd"> <value value="7" name="PIPE_DDE_BV"/> </enum> +<!-- + A fake domain for giving lua scripts access to the shader_stats struct + --> +<domain name="ir3_shader_stats" width="32"> + <!-- indx 0 is bindful, indx N+1 is .baseN --> + <array offset="0" name="descriptor_stats" stride="8" length="9"> + <reg64 offset="0" name="img"/> + <reg64 offset="2" name="tex"/> + <reg64 offset="4" name="samp"/> + <reg64 offset="6" name="ubo"/> + </array> + <reg32 offset="72" name="has_img"/> + <reg32 offset="73" name="has_tex"/> + <reg32 offset="74" name="has_samp"/> + <reg32 offset="74" name="has_ubo"/> + <!-- + other following fields can be added as needed, but we + might need to take care of padding/alignment. + --> +</domain> + </database> diff --git a/drivers/gpu/drm/msm/registers/adreno/adreno_pm4.xml b/drivers/gpu/drm/msm/registers/adreno/adreno_pm4.xml index 51e9c94f5e37..f185b541aa70 100644 --- a/drivers/gpu/drm/msm/registers/adreno/adreno_pm4.xml +++ b/drivers/gpu/drm/msm/registers/adreno/adreno_pm4.xml @@ -152,6 +152,8 @@ xsi:schemaLocation="https://gitlab.freedesktop.org/freedreno/ rules-fd.xsd"> <!-- TODO: deal with name conflicts with other gens --> <value name="CACHE_FLUSH7" value="0x32" variants="A7XX-"/> <value name="CACHE_INVALIDATE7" value="0x33" variants="A7XX-"/> + <value name="SUBPASS_FENCE" value="0x35" variants="A7XX-"/> + <value name="SUBPASS_SLICE_FENCE" value="0x36" variants="A8XX-"/> <value name="DEPTH_BUFFER_FLIP" value="0x3d" variants="A8XX-"/> <value name="CCH_FAST_CLEAR_CLEAN" value="0x1b" variants="A8XX-"/> </enum> @@ -1095,7 +1097,7 @@ opcode: CP_LOAD_STATE4 (30) (4 dwords) <bitfield name="DIRTY" pos="16" type="boolean"/> <bitfield name="DISABLE" pos="17" type="boolean"/> <bitfield name="DISABLE_ALL_GROUPS" pos="18" type="boolean"/> - <bitfield name="LOAD_IMMED" pos="19" type="boolean"/> + <bitfield name="LOAD_IMMED" pos="19" type="boolean" variants="A5XX"/> <bitfield name="BINNING" pos="20" varset="chip" variants="A6XX-" type="boolean"/> <bitfield name="GMEM" pos="21" varset="chip" variants="A6XX-" type="boolean"/> <bitfield name="SYSMEM" pos="22" varset="chip" variants="A6XX-" type="boolean"/> @@ -1275,8 +1277,15 @@ opcode: CP_LOAD_STATE4 (30) (4 dwords) <domain name="CP_REG_TO_MEM" width="32" prefix="chip"> <reg32 offset="0" name="0"> <bitfield name="REG" low="0" high="17" type="hex"/> - <!-- number of registers/dwords copied is max(CNT, 1). --> + <!-- + Number of registers/dwords copied is max(CNT, 1). + With 64B it gets rounded to the next 64B boundary. + --> <bitfield name="CNT" low="18" high="29" type="uint"/> + <!-- + Treat things as 64B, allowing for a 64B atomic read of + registers. + --> <bitfield name="64B" pos="30" type="boolean"/> <bitfield name="ACCUMULATE" pos="31" type="boolean"/> </reg32> @@ -1469,6 +1478,7 @@ opcode: CP_LOAD_STATE4 (30) (4 dwords) <value value="1" name="POLL_MEMORY"/> <value value="2" name="POLL_SCRATCH"/> <value value="3" name="POLL_ON_CHIP" varset="chip" variants="A7XX-"/> + <value value="4" name="POLL_SYSTEM_VARIABLE" varset="chip" variants="A8XX-"/> </enum> <domain name="CP_COND_WRITE5" width="32"> @@ -1476,7 +1486,7 @@ opcode: CP_LOAD_STATE4 (30) (4 dwords) <bitfield name="FUNCTION" low="0" high="2" type="cp_cond_function"/> <bitfield name="SIGNED_COMPARE" pos="3" type="boolean"/> <!-- POLL_REGISTER polls a register at POLL_ADDR_LO. --> - <bitfield name="POLL" low="4" high="5" type="poll_memory_type"/> + <bitfield name="POLL" low="4" high="6" type="poll_memory_type"/> <bitfield name="WRITE_MEMORY" pos="8" type="boolean"/> </reg32> <reg64 offset="1" name="POLL_ADDR" type="address"/> @@ -2055,28 +2065,20 @@ opcode: CP_LOAD_STATE4 (30) (4 dwords) <domain name="CP_COND_EXEC" width="32"> <doc> - Executes the following DWORDs of commands if the dword at ADDR0 - is not equal to 0 and the dword at ADDR1 is less than REF - (signed comparison). + Executes the following DWORDs of commands if the dword + at BOOL_ADDR is not equal to 0 and the the timestamp + value ACTIVE_TIMESTAMP is ahead of the value fetched + from TIMESTAMP_ADDR. + + The timestamp comparision is an unsigned compare with + wraparound, ie: + + (ACTIVE_TIMESTAMP - *TIMESTAMP_ADDR) < 0x80000000 </doc> - <reg32 offset="0" name="0"> - <bitfield name="ADDR0_LO" low="0" high="31"/> - </reg32> - <reg32 offset="1" name="1"> - <bitfield name="ADDR0_HI" low="0" high="31"/> - </reg32> - <reg32 offset="2" name="2"> - <bitfield name="ADDR1_LO" low="0" high="31"/> - </reg32> - <reg32 offset="3" name="3"> - <bitfield name="ADDR1_HI" low="0" high="31"/> - </reg32> - <reg32 offset="4" name="4"> - <bitfield name="REF" low="0" high="31"/> - </reg32> - <reg32 offset="5" name="5"> - <bitfield name="DWORDS" low="0" high="31" type="uint"/> - </reg32> + <reg64 offset="0" name="BOOL_ADDR" type="address"/> + <reg64 offset="2" name="TIMESTAMP_ADDR" type="address"/> + <reg32 offset="4" name="ACTIVE_TIMESTAMP"/> + <reg32 offset="5" name="DWORDS"/> </domain> <domain name="CP_SET_AMBLE" width="32"> diff --git a/drivers/gpu/drm/msm/registers/gen_header.py b/drivers/gpu/drm/msm/registers/gen_header.py index 2acad951f1e2..d3b56a9d84fb 100644 --- a/drivers/gpu/drm/msm/registers/gen_header.py +++ b/drivers/gpu/drm/msm/registers/gen_header.py @@ -11,997 +11,1210 @@ import collections import argparse import time import datetime +import json + class Error(Exception): - def __init__(self, message): - self.message = message + def __init__(self, message): + self.message = message + class Enum(object): - def __init__(self, name): - self.name = name - self.values = [] - - def has_name(self, name): - for (n, value) in self.values: - if n == name: - return True - return False - - def names(self): - return [n for (n, value) in self.values] - - def dump(self, is_deprecated): - use_hex = False - for (name, value) in self.values: - if value > 0x1000: - use_hex = True - - print("enum %s {" % self.name) - for (name, value) in self.values: - if use_hex: - print("\t%s = 0x%08x," % (name, value)) - else: - print("\t%s = %d," % (name, value)) - print("};\n") - - def dump_pack_struct(self, is_deprecated): - pass + def __init__(self, name): + self.name = name + self.values = [] + + def has_name(self, name): + for (n, value) in self.values: + if n == name: + return True + return False + + def names(self): + return [n for (n, value) in self.values] + + def value(self, name): + for (n, v) in self.values: + if n == name: + return v + + def dump(self, has_variants): + use_hex = False + for (name, value) in self.values: + if value > 0x1000: + use_hex = True + + print("enum %s {" % self.name) + for (name, value) in self.values: + if use_hex: + print("\t%s = 0x%08x," % (name, value)) + else: + print("\t%s = %d," % (name, value)) + print("};\n") + + def dump_pack_struct(self, has_variants): + pass + class Field(object): - def __init__(self, name, low, high, shr, type, parser): - self.name = name - self.low = low - self.high = high - self.shr = shr - self.type = type - - builtin_types = [ None, "a3xx_regid", "boolean", "uint", "hex", "int", "fixed", "ufixed", "float", "address", "waddress" ] - - maxpos = parser.current_bitsize - 1 - - if low < 0 or low > maxpos: - raise parser.error("low attribute out of range: %d" % low) - if high < 0 or high > maxpos: - raise parser.error("high attribute out of range: %d" % high) - if high < low: - raise parser.error("low is greater than high: low=%d, high=%d" % (low, high)) - if self.type == "boolean" and not low == high: - raise parser.error("booleans should be 1 bit fields") - elif self.type == "float" and not (high - low == 31 or high - low == 15): - raise parser.error("floats should be 16 or 32 bit fields") - elif self.type not in builtin_types and self.type not in parser.enums: - raise parser.error("unknown type '%s'" % self.type) - - def ctype(self, var_name): - if self.type is None: - type = "uint32_t" - val = var_name - elif self.type == "boolean": - type = "bool" - val = var_name - elif self.type == "uint" or self.type == "hex" or self.type == "a3xx_regid": - type = "uint32_t" - val = var_name - elif self.type == "int": - type = "int32_t" - val = var_name - elif self.type == "fixed": - type = "float" - val = "((int32_t)(%s * %d.0))" % (var_name, 1 << self.radix) - elif self.type == "ufixed": - type = "float" - val = "((uint32_t)(%s * %d.0))" % (var_name, 1 << self.radix) - elif self.type == "float" and self.high - self.low == 31: - type = "float" - val = "fui(%s)" % var_name - elif self.type == "float" and self.high - self.low == 15: - type = "float" - val = "_mesa_float_to_half(%s)" % var_name - elif self.type in [ "address", "waddress" ]: - type = "uint64_t" - val = var_name - else: - type = "enum %s" % self.type - val = var_name - - if self.shr > 0: - val = "(%s >> %d)" % (val, self.shr) - - return (type, val) + def __init__(self, name, low, high, shr, type, parser): + self.name = name + self.low = low + self.high = high + self.shr = shr + self.type = type + + builtin_types = [None, "a3xx_regid", "boolean", "uint", "hex", + "int", "fixed", "ufixed", "float", "address", "waddress"] + + maxpos = parser.current_bitsize - 1 + + if low < 0 or low > maxpos: + raise parser.error("low attribute out of range: %d" % low) + if high < 0 or high > maxpos: + raise parser.error("high attribute out of range: %d" % high) + if high < low: + raise parser.error( + "low is greater than high: low=%d, high=%d" % (low, high)) + if self.type == "boolean" and not low == high: + raise parser.error("booleans should be 1 bit fields") + elif self.type == "float" and not (high - low == 31 or high - low == 15): + raise parser.error("floats should be 16 or 32 bit fields") + elif self.type not in builtin_types and self.type not in parser.enums: + raise parser.error("unknown type '%s'" % self.type) + + def ctype(self, var_name): + if self.type is None: + type = "uint32_t" + val = var_name + elif self.type == "boolean": + type = "bool" + val = var_name + elif self.type == "uint" or self.type == "hex" or self.type == "a3xx_regid": + type = "uint32_t" + val = var_name + elif self.type == "int": + type = "int32_t" + val = var_name + elif self.type == "fixed": + type = "float" + val = "(uint32_t)((int32_t)(%s * %d.0))" % (var_name, 1 << self.radix) + elif self.type == "ufixed": + type = "float" + val = "((uint32_t)(%s * %d.0))" % (var_name, 1 << self.radix) + elif self.type == "float" and self.high - self.low == 31: + type = "float" + val = "fui(%s)" % var_name + elif self.type == "float" and self.high - self.low == 15: + type = "float" + val = "_mesa_float_to_half(%s)" % var_name + elif self.type in ["address", "waddress"]: + type = "uint64_t" + val = var_name + else: + type = "enum %s" % self.type + val = var_name + + if self.shr > 0: + val = "(%s >> %d)" % (val, self.shr) + + return (type, val) + def tab_to(name, value): - tab_count = (68 - (len(name) & ~7)) // 8 - if tab_count <= 0: - tab_count = 1 - print(name + ('\t' * tab_count) + value) + tab_count = (68 - (len(name) & ~7)) // 8 + if tab_count <= 0: + tab_count = 1 + print(name + ('\t' * tab_count) + value) + +def define_macro(name, value, has_variants): + if has_variants: + value = "__FD_DEPRECATED " + value + tab_to(name, value) def mask(low, high): - return ((0xffffffffffffffff >> (64 - (high + 1 - low))) << low) + return ((0xffffffffffffffff >> (64 - (high + 1 - low))) << low) + def field_name(reg, f): - if f.name: - name = f.name.lower() - else: - # We hit this path when a reg is defined with no bitset fields, ie. - # <reg32 offset="0x88db" name="RB_RESOLVE_SYSTEM_BUFFER_ARRAY_PITCH" low="0" high="28" shr="6" type="uint"/> - name = reg.name.lower() + if f.name: + name = f.name.lower() + else: + # We hit this path when a reg is defined with no bitset fields, ie. + # <reg32 offset="0x88db" name="RB_RESOLVE_SYSTEM_BUFFER_ARRAY_PITCH" low="0" high="28" shr="6" type="uint"/> + name = reg.name.lower() - if (name in [ "double", "float", "int" ]) or not (name[0].isalpha()): - name = "_" + name + if (name in ["double", "float", "int"]) or not (name[0].isalpha()): + name = "_" + name - return name + return name # indices - array of (ctype, stride, __offsets_NAME) + + def indices_varlist(indices): - return ", ".join(["i%d" % i for i in range(len(indices))]) + return ", ".join(["i%d" % i for i in range(len(indices))]) + def indices_prototype(indices): - return ", ".join(["%s i%d" % (ctype, idx) - for (idx, (ctype, stride, offset)) in enumerate(indices)]) + return ", ".join(["%s i%d" % (ctype, idx) + for (idx, (ctype, stride, offset)) in enumerate(indices)]) + def indices_strides(indices): - return " + ".join(["0x%x*i%d" % (stride, idx) - if stride else - "%s(i%d)" % (offset, idx) - for (idx, (ctype, stride, offset)) in enumerate(indices)]) + return " + ".join(["0x%x*i%d" % (stride, idx) + if stride else + "%s(i%d)" % (offset, idx) + for (idx, (ctype, stride, offset)) in enumerate(indices)]) + def is_number(str): - try: - int(str) - return True - except ValueError: - return False + try: + int(str) + return True + except ValueError: + return False + def sanitize_variant(variant): - if variant and "-" in variant: - return variant[:variant.index("-")] - return variant + if variant and "-" in variant: + return variant[:variant.index("-")] + return variant + class Bitset(object): - def __init__(self, name, template): - self.name = name - self.inline = False - self.reg = None - if template: - self.fields = template.fields[:] - else: - self.fields = [] - - # Get address field if there is one in the bitset, else return None: - def get_address_field(self): - for f in self.fields: - if f.type in [ "address", "waddress" ]: - return f - return None - - def dump_regpair_builder(self, reg): - print("#ifndef NDEBUG") - known_mask = 0 - for f in self.fields: - known_mask |= mask(f.low, f.high) - if f.type in [ "boolean", "address", "waddress" ]: - continue - type, val = f.ctype("fields.%s" % field_name(reg, f)) - print(" assert((%-40s & 0x%08x) == 0);" % (val, 0xffffffff ^ mask(0 , f.high - f.low))) - print(" assert((%-40s & 0x%08x) == 0);" % ("fields.unknown", known_mask)) - print("#endif\n") - - print(" return (struct fd_reg_pair) {") - print(" .reg = (uint32_t)%s," % reg.reg_offset()) - print(" .value =") - cast = "(uint64_t)" if reg.bit_size == 64 else "" - for f in self.fields: - if f.type in [ "address", "waddress" ]: - continue - else: - type, val = f.ctype("fields.%s" % field_name(reg, f)) - print(" (%s%-40s << %2d) |" % (cast, val, f.low)) - value_name = "dword" - if reg.bit_size == 64: - value_name = "qword" - print(" fields.unknown | fields.%s," % (value_name,)) - - address = self.get_address_field() - if address: - print(" .bo = fields.bo,") - print(" .is_address = true,") - if f.type == "waddress": - print(" .bo_write = true,") - print(" .bo_offset = fields.bo_offset,") - print(" .bo_shift = %d," % address.shr) - print(" .bo_low = %d," % address.low) - - print(" };") - - def dump_pack_struct(self, is_deprecated, reg=None): - if not reg: - return - - prefix = reg.full_name - - print("struct %s {" % prefix) - for f in self.fields: - if f.type in [ "address", "waddress" ]: - tab_to(" __bo_type", "bo;") - tab_to(" uint32_t", "bo_offset;") - continue - name = field_name(reg, f) - - type, val = f.ctype("var") - - tab_to(" %s" % type, "%s;" % name) - if reg.bit_size == 64: - tab_to(" uint64_t", "unknown;") - tab_to(" uint64_t", "qword;") - else: - tab_to(" uint32_t", "unknown;") - tab_to(" uint32_t", "dword;") - print("};\n") - - depcrstr = "" - if is_deprecated: - depcrstr = " FD_DEPRECATED" - if reg.array: - print("static inline%s struct fd_reg_pair\npack_%s(uint32_t __i, struct %s fields)\n{" % - (depcrstr, prefix, prefix)) - else: - print("static inline%s struct fd_reg_pair\npack_%s(struct %s fields)\n{" % - (depcrstr, prefix, prefix)) - - self.dump_regpair_builder(reg) - - print("\n}\n") - - if self.get_address_field(): - skip = ", { .reg = 0 }" - else: - skip = "" - - if reg.array: - print("#define %s(__i, ...) pack_%s(__i, __struct_cast(%s) { __VA_ARGS__ })%s\n" % - (prefix, prefix, prefix, skip)) - else: - print("#define %s(...) pack_%s(__struct_cast(%s) { __VA_ARGS__ })%s\n" % - (prefix, prefix, prefix, skip)) - - - def dump(self, is_deprecated, prefix=None, reg=None): - if prefix is None: - prefix = self.name - reg64 = reg and self.reg and self.reg.bit_size == 64 - if reg64: - print("static inline uint32_t %s_LO(uint32_t val)\n{" % prefix) - print("\treturn val;\n}") - print("static inline uint32_t %s_HI(uint32_t val)\n{" % prefix) - print("\treturn val;\n}") - for f in self.fields: - if f.name: - name = prefix + "_" + f.name - else: - name = prefix - - if not f.name and f.low == 0 and f.shr == 0 and f.type not in ["float", "fixed", "ufixed"]: - pass - elif f.type == "boolean" or (f.type is None and f.low == f.high): - tab_to("#define %s" % name, "0x%08x" % (1 << f.low)) - else: - typespec = "ull" if reg64 else "u" - tab_to("#define %s__MASK" % name, "0x%08x%s" % (mask(f.low, f.high), typespec)) - tab_to("#define %s__SHIFT" % name, "%d" % f.low) - type, val = f.ctype("val") - ret_type = "uint64_t" if reg64 else "uint32_t" - cast = "(uint64_t)" if reg64 else "" - - print("static inline %s %s(%s val)\n{" % (ret_type, name, type)) - if f.shr > 0: - print("\tassert(!(val & 0x%x));" % mask(0, f.shr - 1)) - print("\treturn (%s(%s) << %s__SHIFT) & %s__MASK;\n}" % (cast, val, name, name)) - print() + def __init__(self, name, template): + self.name = name + self.inline = False + self.reg = None + if template: + self.fields = template.fields[:] + else: + self.fields = [] + + # Get address field if there is one in the bitset, else return None: + def get_address_field(self): + for f in self.fields: + if f.type in ["address", "waddress"]: + return f + return None + + def dump_regpair_builder(self, reg): + print("#ifndef NDEBUG") + known_mask = 0 + for f in self.fields: + known_mask |= mask(f.low, f.high) + if f.type in ["boolean", "address", "waddress"]: + continue + type, val = f.ctype("fields.%s" % field_name(reg, f)) + print(" assert((%-40s & 0x%08x) == 0);" % + (val, 0xffffffff ^ mask(0, f.high - f.low))) + print(" assert((%-40s & 0x%08x) == 0);" % + ("fields.unknown", known_mask)) + print("#endif\n") + + print(" return (struct fd_reg_pair) {") + print(" .reg = (uint32_t)%s," % reg.reg_offset()) + print(" .value =") + cast = "(uint64_t)" if reg.bit_size == 64 else "" + for f in self.fields: + if f.type in ["address", "waddress"]: + continue + else: + type, val = f.ctype("fields.%s" % field_name(reg, f)) + print(" (%s%-40s << %2d) |" % (cast, val, f.low)) + value_name = "dword" + if reg.bit_size == 64: + value_name = "qword" + print(" fields.unknown | fields.%s," % (value_name,)) + + address = self.get_address_field() + if address: + print("#ifndef TU_CS_H") + print(" .bo = fields.bo,") + print(" .is_address = true,") + print(" .bo_offset = fields.bo_offset,") + print(" .bo_shift = %d," % address.shr) + print(" .bo_low = %d," % address.low) + print("#else") + print(" .is_address = true,") + print("#endif") + + print(" };") + + def dump_pack_struct(self, has_variants, reg=None): + if not reg: + return + + prefix = reg.full_name + + constexpr_mark = " CONSTEXPR" + + print("struct %s {" % prefix) + for f in self.fields: + if f.type in ["address", "waddress"]: + print("#ifndef TU_CS_H") + tab_to(" __bo_type", "bo;") + tab_to(" uint32_t", "bo_offset;") + print("#endif\n") + continue + name = field_name(reg, f) + + type, val = f.ctype("var") + + tab_to(" %s" % type, "%s;" % name) + + if f.type == "float": + # Requires using `fui()` or `_mesa_float_to_half()` + constexpr_mark = "" + if reg.bit_size == 64: + tab_to(" uint64_t", "qword;") + tab_to(" uint64_t", "unknown;") + else: + tab_to(" uint32_t", "dword;") + tab_to(" uint32_t", "unknown;") + print("};\n") + + if not has_variants: + print("static%s inline struct fd_reg_pair" % constexpr_mark) + if reg.array: + print("pack_%s(uint32_t __i, struct %s fields)\n{" % (prefix, prefix)) + else: + print("pack_%s(struct %s fields)\n{" % (prefix, prefix)) + + self.dump_regpair_builder(reg) + + print("\n}\n") + + if self.get_address_field(): + skip = ", { .reg = 0 }" + else: + skip = "" + + if reg.array: + print("#define %s(__i, ...) pack_%s(__i, __struct_cast(%s) { __VA_ARGS__ })%s\n" % + (prefix, prefix, prefix, skip)) + else: + print("#define %s(...) pack_%s(__struct_cast(%s) { __VA_ARGS__ })%s\n" % + (prefix, prefix, prefix, skip)) + + def dump(self, has_variants, prefix=None, reg=None): + if prefix is None: + prefix = self.name + suffix = "" + if self.reg and self.reg.bit_size == 64: + print( + "static CONSTEXPR inline uint32_t %s_LO(uint32_t val)\n{" % prefix) + print("\treturn val;\n}") + print( + "static CONSTEXPR inline uint32_t %s_HI(uint32_t val)\n{" % prefix) + print("\treturn val;\n}") + suffix = "ull" + + for f in self.fields: + if f.name: + name = prefix + "_" + f.name + else: + name = prefix + + if not f.name and f.low == 0 and f.shr == 0 and f.type not in ["float", "fixed", "ufixed"]: + pass + elif f.type == "boolean" or (f.type is None and f.low == f.high): + tab_to("#define %s" % name, "0x%08x%s" % ((1 << f.low), suffix)) + else: + tab_to("#define %s__MASK" % + name, "0x%08x%s" % (mask(f.low, f.high), suffix)) + tab_to("#define %s__SHIFT" % name, "%d" % f.low) + type, val = f.ctype("val") + ret_type = "uint64_t" if reg and reg.bit_size == 64 else "uint32_t" + cast = "(uint64_t)" if reg and reg.bit_size == 64 else "" + + constexpr_mark = "" if type == "float" else " CONSTEXPR" + print("static%s inline %s %s(%s val)\n{" % ( + constexpr_mark, ret_type, name, type)) + if f.shr > 0: + print("\tassert(!(val & 0x%x));" % mask(0, f.shr - 1)) + print("\treturn (%s(%s) << %s__SHIFT) & %s__MASK;\n}" % + (cast, val, name, name)) + print() + class Array(object): - def __init__(self, attrs, domain, variant, parent, index_type): - if "name" in attrs: - self.local_name = attrs["name"] - else: - self.local_name = "" - self.domain = domain - self.variant = variant - self.parent = parent - self.children = [] - if self.parent: - self.name = self.parent.name + "_" + self.local_name - else: - self.name = self.local_name - if "offsets" in attrs: - self.offsets = map(lambda i: "0x%08x" % int(i, 0), attrs["offsets"].split(",")) - self.fixed_offsets = True - elif "doffsets" in attrs: - self.offsets = map(lambda s: "(%s)" % s , attrs["doffsets"].split(",")) - self.fixed_offsets = True - else: - self.offset = int(attrs["offset"], 0) - self.stride = int(attrs["stride"], 0) - self.fixed_offsets = False - if "index" in attrs: - self.index_type = index_type - else: - self.index_type = None - self.length = int(attrs["length"], 0) - if "usage" in attrs: - self.usages = attrs["usage"].split(',') - else: - self.usages = None - - def index_ctype(self): - if not self.index_type: - return "uint32_t" - else: - return "enum %s" % self.index_type.name - - # Generate array of (ctype, stride, __offsets_NAME) - def indices(self): - if self.parent: - indices = self.parent.indices() - else: - indices = [] - if self.length != 1: - if self.fixed_offsets: - indices.append((self.index_ctype(), None, "__offset_%s" % self.local_name)) - else: - indices.append((self.index_ctype(), self.stride, None)) - return indices - - def total_offset(self): - offset = 0 - if not self.fixed_offsets: - offset += self.offset - if self.parent: - offset += self.parent.total_offset() - return offset - - def dump(self, is_deprecated): - depcrstr = "" - if is_deprecated: - depcrstr = " FD_DEPRECATED" - proto = indices_varlist(self.indices()) - strides = indices_strides(self.indices()) - array_offset = self.total_offset() - if self.fixed_offsets: - print("static inline%s uint32_t __offset_%s(%s idx)" % (depcrstr, self.local_name, self.index_ctype())) - print("{\n\tswitch (idx) {") - if self.index_type: - for val, offset in zip(self.index_type.names(), self.offsets): - print("\t\tcase %s: return %s;" % (val, offset)) - else: - for idx, offset in enumerate(self.offsets): - print("\t\tcase %d: return %s;" % (idx, offset)) - print("\t\tdefault: return INVALID_IDX(idx);") - print("\t}\n}") - if proto == '': - tab_to("#define REG_%s_%s" % (self.domain, self.name), "0x%08x\n" % array_offset) - else: - tab_to("#define REG_%s_%s(%s)" % (self.domain, self.name, proto), "(0x%08x + %s )\n" % (array_offset, strides)) - - def dump_pack_struct(self, is_deprecated): - pass - - def dump_regpair_builder(self): - pass + def __init__(self, attrs, domain, variant, parent, index_type): + if "name" in attrs: + self.local_name = attrs["name"] + else: + self.local_name = "" + self.domain = domain + self.variant = variant + self.parent = parent + self.children = [] + if self.parent: + self.name = self.parent.name + "_" + self.local_name + else: + self.name = self.local_name + if "offsets" in attrs: + self.offsets = map(lambda i: "0x%08x" % + int(i, 0), attrs["offsets"].split(",")) + self.fixed_offsets = True + elif "doffsets" in attrs: + self.offsets = map(lambda s: "(%s)" % + s, attrs["doffsets"].split(",")) + self.fixed_offsets = True + else: + self.offset = int(attrs["offset"], 0) + self.stride = int(attrs["stride"], 0) + self.fixed_offsets = False + if "index" in attrs: + self.index_type = index_type + else: + self.index_type = None + self.length = int(attrs["length"], 0) + if "usage" in attrs: + self.usages = attrs["usage"].split(',') + else: + self.usages = None + + def index_ctype(self): + if not self.index_type: + return "uint32_t" + else: + return "enum %s" % self.index_type.name + + # Generate array of (ctype, stride, __offsets_NAME) + def indices(self): + if self.parent: + indices = self.parent.indices() + else: + indices = [] + if self.length != 1: + if self.fixed_offsets: + indices.append((self.index_ctype(), None, + "__offset_%s" % self.local_name)) + else: + indices.append((self.index_ctype(), self.stride, None)) + return indices + + def total_offset(self): + offset = 0 + if not self.fixed_offsets: + offset += self.offset + if self.parent: + offset += self.parent.total_offset() + return offset + + def dump(self, has_variants): + proto = indices_varlist(self.indices()) + strides = indices_strides(self.indices()) + array_offset = self.total_offset() + if self.fixed_offsets and not has_variants: + print("static CONSTEXPR inline uint32_t __offset_%s(%s idx)" % + (self.local_name, self.index_ctype())) + print("{\n\tswitch (idx) {") + if self.index_type: + for val, offset in zip(self.index_type.names(), self.offsets): + print("\t\tcase %s: return %s;" % (val, offset)) + else: + for idx, offset in enumerate(self.offsets): + print("\t\tcase %d: return %s;" % (idx, offset)) + print("\t\tdefault: return INVALID_IDX(idx);") + print("\t}\n}") + if proto == '': + define_macro("#define REG_%s_%s" % + (self.domain, self.name), "0x%08x\n" % array_offset, + has_variants) + else: + define_macro("#define REG_%s_%s(%s)" % (self.domain, self.name, + proto), "(0x%08x + %s )\n" % (array_offset, strides), + has_variants) + + def dump_pack_struct(self, has_variants): + pass + + def dump_regpair_builder(self): + pass + class Reg(object): - def __init__(self, attrs, domain, array, bit_size): - self.name = attrs["name"] - self.domain = domain - self.array = array - self.offset = int(attrs["offset"], 0) - self.type = None - self.bit_size = bit_size - if array: - self.name = array.name + "_" + self.name - array.children.append(self) - self.full_name = self.domain + "_" + self.name - if "stride" in attrs: - self.stride = int(attrs["stride"], 0) - self.length = int(attrs["length"], 0) - else: - self.stride = None - self.length = None - - # Generate array of (ctype, stride, __offsets_NAME) - def indices(self): - if self.array: - indices = self.array.indices() - else: - indices = [] - if self.stride: - indices.append(("uint32_t", self.stride, None)) - return indices - - def total_offset(self): - if self.array: - return self.array.total_offset() + self.offset - else: - return self.offset - - def reg_offset(self): - if self.array: - offset = self.array.offset + self.offset - return "(0x%08x + 0x%x*__i)" % (offset, self.array.stride) - return "0x%08x" % self.offset - - def dump(self, is_deprecated): - depcrstr = "" - if is_deprecated: - depcrstr = " FD_DEPRECATED " - proto = indices_prototype(self.indices()) - strides = indices_strides(self.indices()) - offset = self.total_offset() - if proto == '': - tab_to("#define REG_%s" % self.full_name, "0x%08x" % offset) - else: - print("static inline%s uint32_t REG_%s(%s) { return 0x%08x + %s; }" % (depcrstr, self.full_name, proto, offset, strides)) - - if self.bitset.inline: - self.bitset.dump(is_deprecated, self.full_name, self) - print("") - - def dump_pack_struct(self, is_deprecated): - if self.bitset.inline: - self.bitset.dump_pack_struct(is_deprecated, self) - - def dump_regpair_builder(self): - self.bitset.dump_regpair_builder(self) - - def dump_py(self): - print("\tREG_%s = 0x%08x" % (self.full_name, self.offset)) + def __init__(self, attrs, domain, array, bit_size): + self.name = attrs["name"] + self.domain = domain + self.array = array + self.offset = int(attrs["offset"], 0) + self.type = None + self.bit_size = bit_size + if array: + self.name = array.name + "_" + self.name + array.children.append(self) + self.full_name = self.domain + "_" + self.name + if "stride" in attrs: + self.stride = int(attrs["stride"], 0) + self.length = int(attrs["length"], 0) + else: + self.stride = None + self.length = None + + # Generate array of (ctype, stride, __offsets_NAME) + def indices(self): + if self.array: + indices = self.array.indices() + else: + indices = [] + if self.stride: + indices.append(("uint32_t", self.stride, None)) + return indices + + def total_offset(self): + if self.array: + return self.array.total_offset() + self.offset + else: + return self.offset + + def reg_offset(self): + if self.array: + offset = self.array.offset + self.offset + return "(0x%08x + 0x%x*__i)" % (offset, self.array.stride) + return "0x%08x" % self.offset + + def dump(self, has_variants): + proto = indices_prototype(self.indices()) + strides = indices_strides(self.indices()) + offset = self.total_offset() + if proto == '': + define_macro("#define REG_%s" % self.full_name, "0x%08x" % offset, has_variants) + elif not has_variants: + depcrstr = "" + if has_variants: + depcrstr = " __FD_DEPRECATED " + print("static CONSTEXPR inline%s uint32_t REG_%s(%s) { return 0x%08x + %s; }" % ( + depcrstr, self.full_name, proto, offset, strides)) + + if self.bitset.inline: + self.bitset.dump(has_variants, self.full_name, self) + print("") + + def dump_pack_struct(self, has_variants): + if self.bitset.inline: + self.bitset.dump_pack_struct(has_variants, self) + + def dump_regpair_builder(self): + self.bitset.dump_regpair_builder(self) + + def dump_py(self): + offset = self.offset + if self.array: + offset += self.array.offset + print("\tREG_%s = 0x%08x" % (self.full_name, offset)) class Parser(object): - def __init__(self): - self.current_array = None - self.current_domain = None - self.current_prefix = None - self.current_prefix_type = None - self.current_stripe = None - self.current_bitset = None - self.current_bitsize = 32 - # The varset attribute on the domain specifies the enum which - # specifies all possible hw variants: - self.current_varset = None - # Regs that have multiple variants.. we only generated the C++ - # template based struct-packers for these - self.variant_regs = {} - # Information in which contexts regs are used, to be used in - # debug options - self.usage_regs = collections.defaultdict(list) - self.bitsets = {} - self.enums = {} - self.variants = set() - self.file = [] - self.xml_files = [] - - def error(self, message): - parser, filename = self.stack[-1] - return Error("%s:%d:%d: %s" % (filename, parser.CurrentLineNumber, parser.CurrentColumnNumber, message)) - - def prefix(self, variant=None): - if self.current_prefix_type == "variant" and variant: - return sanitize_variant(variant) - elif self.current_stripe: - return self.current_stripe + "_" + self.current_domain - elif self.current_prefix: - return self.current_prefix + "_" + self.current_domain - else: - return self.current_domain - - def parse_field(self, name, attrs): - try: - if "pos" in attrs: - high = low = int(attrs["pos"], 0) - elif "high" in attrs and "low" in attrs: - high = int(attrs["high"], 0) - low = int(attrs["low"], 0) - else: - low = 0 - high = self.current_bitsize - 1 - - if "type" in attrs: - type = attrs["type"] - else: - type = None - - if "shr" in attrs: - shr = int(attrs["shr"], 0) - else: - shr = 0 - - b = Field(name, low, high, shr, type, self) - - if type == "fixed" or type == "ufixed": - b.radix = int(attrs["radix"], 0) - - self.current_bitset.fields.append(b) - except ValueError as e: - raise self.error(e) - - def parse_varset(self, attrs): - # Inherit the varset from the enclosing domain if not overriden: - varset = self.current_varset - if "varset" in attrs: - varset = self.enums[attrs["varset"]] - return varset - - def parse_variants(self, attrs): - if "variants" not in attrs: - return None - - variant = attrs["variants"].split(",")[0] - varset = self.parse_varset(attrs) - - if "-" in variant: - # if we have a range, validate that both the start and end - # of the range are valid enums: - start = variant[:variant.index("-")] - end = variant[variant.index("-") + 1:] - assert varset.has_name(start) - if end != "": - assert varset.has_name(end) - else: - assert varset.has_name(variant) - - return variant - - def add_all_variants(self, reg, attrs, parent_variant): - # TODO this should really handle *all* variants, including dealing - # with open ended ranges (ie. "A2XX,A4XX-") (we have the varset - # enum now to make that possible) - variant = self.parse_variants(attrs) - if not variant: - variant = parent_variant - - if reg.name not in self.variant_regs: - self.variant_regs[reg.name] = {} - else: - # All variants must be same size: - v = next(iter(self.variant_regs[reg.name])) - assert self.variant_regs[reg.name][v].bit_size == reg.bit_size - - self.variant_regs[reg.name][variant] = reg - - def add_all_usages(self, reg, usages): - if not usages: - return - - for usage in usages: - self.usage_regs[usage].append(reg) - - self.variants.add(reg.domain) - - def do_validate(self, schemafile): - if not self.validate: - return - - try: - from lxml import etree - - parser, filename = self.stack[-1] - dirname = os.path.dirname(filename) - - # we expect this to look like <namespace url> schema.xsd.. I think - # technically it is supposed to be just a URL, but that doesn't - # quite match up to what we do.. Just skip over everything up to - # and including the first whitespace character: - schemafile = schemafile[schemafile.rindex(" ")+1:] - - # this is a bit cheezy, but the xml file to validate could be - # in a child director, ie. we don't really know where the schema - # file is, the way the rnn C code does. So if it doesn't exist - # just look one level up - if not os.path.exists(dirname + "/" + schemafile): - schemafile = "../" + schemafile - - if not os.path.exists(dirname + "/" + schemafile): - raise self.error("Cannot find schema for: " + filename) - - xmlschema_doc = etree.parse(dirname + "/" + schemafile) - xmlschema = etree.XMLSchema(xmlschema_doc) - - xml_doc = etree.parse(filename) - if not xmlschema.validate(xml_doc): - error_str = str(xmlschema.error_log.filter_from_errors()[0]) - raise self.error("Schema validation failed for: " + filename + "\n" + error_str) - except ImportError as e: - print("lxml not found, skipping validation", file=sys.stderr) - - def do_parse(self, filename): - filepath = os.path.abspath(filename) - if filepath in self.xml_files: - return - self.xml_files.append(filepath) - file = open(filename, "rb") - parser = xml.parsers.expat.ParserCreate() - self.stack.append((parser, filename)) - parser.StartElementHandler = self.start_element - parser.EndElementHandler = self.end_element - parser.CharacterDataHandler = self.character_data - parser.buffer_text = True - parser.ParseFile(file) - self.stack.pop() - file.close() - - def parse(self, rnn_path, filename, validate): - self.path = rnn_path - self.stack = [] - self.validate = validate - self.do_parse(filename) - - def parse_reg(self, attrs, bit_size): - self.current_bitsize = bit_size - if "type" in attrs and attrs["type"] in self.bitsets: - bitset = self.bitsets[attrs["type"]] - if bitset.inline: - self.current_bitset = Bitset(attrs["name"], bitset) - self.current_bitset.inline = True - else: - self.current_bitset = bitset - else: - self.current_bitset = Bitset(attrs["name"], None) - self.current_bitset.inline = True - if "type" in attrs: - self.parse_field(None, attrs) - - variant = self.parse_variants(attrs) - if not variant and self.current_array: - variant = self.current_array.variant - - self.current_reg = Reg(attrs, self.prefix(variant), self.current_array, bit_size) - self.current_reg.bitset = self.current_bitset - self.current_bitset.reg = self.current_reg - - if len(self.stack) == 1: - self.file.append(self.current_reg) - - if variant is not None: - self.add_all_variants(self.current_reg, attrs, variant) - - usages = None - if "usage" in attrs: - usages = attrs["usage"].split(',') - elif self.current_array: - usages = self.current_array.usages - - self.add_all_usages(self.current_reg, usages) - - def start_element(self, name, attrs): - self.cdata = "" - if name == "import": - filename = attrs["file"] - self.do_parse(os.path.join(self.path, filename)) - elif name == "domain": - self.current_domain = attrs["name"] - if "prefix" in attrs: - self.current_prefix = sanitize_variant(self.parse_variants(attrs)) - self.current_prefix_type = attrs["prefix"] - else: - self.current_prefix = None - self.current_prefix_type = None - if "varset" in attrs: - self.current_varset = self.enums[attrs["varset"]] - elif name == "stripe": - self.current_stripe = sanitize_variant(self.parse_variants(attrs)) - elif name == "enum": - self.current_enum_value = 0 - self.current_enum = Enum(attrs["name"]) - self.enums[attrs["name"]] = self.current_enum - if len(self.stack) == 1: - self.file.append(self.current_enum) - elif name == "value": - if "value" in attrs: - value = int(attrs["value"], 0) - else: - value = self.current_enum_value - self.current_enum.values.append((attrs["name"], value)) - elif name == "reg32": - self.parse_reg(attrs, 32) - elif name == "reg64": - self.parse_reg(attrs, 64) - elif name == "array": - self.current_bitsize = 32 - variant = self.parse_variants(attrs) - index_type = self.enums[attrs["index"]] if "index" in attrs else None - self.current_array = Array(attrs, self.prefix(variant), variant, self.current_array, index_type) - if len(self.stack) == 1: - self.file.append(self.current_array) - elif name == "bitset": - self.current_bitset = Bitset(attrs["name"], None) - if "inline" in attrs and attrs["inline"] == "yes": - self.current_bitset.inline = True - self.bitsets[self.current_bitset.name] = self.current_bitset - if len(self.stack) == 1 and not self.current_bitset.inline: - self.file.append(self.current_bitset) - elif name == "bitfield" and self.current_bitset: - self.parse_field(attrs["name"], attrs) - elif name == "database": - self.do_validate(attrs["xsi:schemaLocation"]) - - def end_element(self, name): - if name == "domain": - self.current_domain = None - self.current_prefix = None - self.current_prefix_type = None - elif name == "stripe": - self.current_stripe = None - elif name == "bitset": - self.current_bitset = None - elif name == "reg32": - self.current_reg = None - elif name == "array": - # if the array has no Reg children, push an implicit reg32: - if len(self.current_array.children) == 0: - attrs = { - "name": "REG", - "offset": "0", - } - self.parse_reg(attrs, 32) - self.current_array = self.current_array.parent - elif name == "enum": - self.current_enum = None - - def character_data(self, data): - self.cdata += data - - def dump_reg_usages(self): - d = collections.defaultdict(list) - for usage, regs in self.usage_regs.items(): - for reg in regs: - variants = self.variant_regs.get(reg.name) - if variants: - for variant, vreg in variants.items(): - if reg == vreg: - d[(usage, sanitize_variant(variant))].append(reg) - else: - for variant in self.variants: - d[(usage, sanitize_variant(variant))].append(reg) - - print("#ifdef __cplusplus") - - for usage, regs in self.usage_regs.items(): - print("template<chip CHIP> constexpr inline uint16_t %s_REGS[] = {};" % (usage.upper())) - - for (usage, variant), regs in d.items(): - offsets = [] - - for reg in regs: - if reg.array: - for i in range(reg.array.length): - offsets.append(reg.array.offset + reg.offset + i * reg.array.stride) - if reg.bit_size == 64: - offsets.append(offsets[-1] + 1) - else: - offsets.append(reg.offset) - if reg.bit_size == 64: - offsets.append(offsets[-1] + 1) - - offsets.sort() - - print("template<> constexpr inline uint16_t %s_REGS<%s>[] = {" % (usage.upper(), variant)) - for offset in offsets: - print("\t%s," % hex(offset)) - print("};") - - print("#endif") - - def has_variants(self, reg): - return reg.name in self.variant_regs and not is_number(reg.name) and not is_number(reg.name[1:]) - - def dump(self): - enums = [] - bitsets = [] - regs = [] - for e in self.file: - if isinstance(e, Enum): - enums.append(e) - elif isinstance(e, Bitset): - bitsets.append(e) - else: - regs.append(e) - - for e in enums + bitsets + regs: - e.dump(self.has_variants(e)) - - self.dump_reg_usages() - - - def dump_regs_py(self): - regs = [] - for e in self.file: - if isinstance(e, Reg): - regs.append(e) - - for e in regs: - e.dump_py() - - - def dump_reg_variants(self, regname, variants): - if is_number(regname) or is_number(regname[1:]): - return - print("#ifdef __cplusplus") - print("struct __%s {" % regname) - # TODO be more clever.. we should probably figure out which - # fields have the same type in all variants (in which they - # appear) and stuff everything else in a variant specific - # sub-structure. - seen_fields = [] - bit_size = 32 - array = False - address = None - for variant in variants.keys(): - print(" /* %s fields: */" % variant) - reg = variants[variant] - bit_size = reg.bit_size - array = reg.array - for f in reg.bitset.fields: - fld_name = field_name(reg, f) - if fld_name in seen_fields: - continue - seen_fields.append(fld_name) - name = fld_name.lower() - if f.type in [ "address", "waddress" ]: - if address: - continue - address = f - tab_to(" __bo_type", "bo;") - tab_to(" uint32_t", "bo_offset;") - continue - type, val = f.ctype("var") - tab_to(" %s" %type, "%s;" %name) - print(" /* fallback fields: */") - if bit_size == 64: - tab_to(" uint64_t", "unknown;") - tab_to(" uint64_t", "qword;") - else: - tab_to(" uint32_t", "unknown;") - tab_to(" uint32_t", "dword;") - print("};") - # TODO don't hardcode the varset enum name - varenum = "chip" - print("template <%s %s>" % (varenum, varenum.upper())) - print("static inline struct fd_reg_pair") - xtra = "" - xtravar = "" - if array: - xtra = "int __i, " - xtravar = "__i, " - print("__%s(%sstruct __%s fields) {" % (regname, xtra, regname)) - for variant in variants.keys(): - if "-" in variant: - start = variant[:variant.index("-")] - end = variant[variant.index("-") + 1:] - if end != "": - print(" if ((%s >= %s) && (%s <= %s)) {" % (varenum.upper(), start, varenum.upper(), end)) - else: - print(" if (%s >= %s) {" % (varenum.upper(), start)) - else: - print(" if (%s == %s) {" % (varenum.upper(), variant)) - reg = variants[variant] - reg.dump_regpair_builder() - print(" } else") - print(" assert(!\"invalid variant\");") - print(" return (struct fd_reg_pair){};") - print("}") - - if bit_size == 64: - skip = ", { .reg = 0 }" - else: - skip = "" - - print("#define %s(VARIANT, %s...) __%s<VARIANT>(%s{__VA_ARGS__})%s" % (regname, xtravar, regname, xtravar, skip)) - print("#endif /* __cplusplus */") - - def dump_structs(self): - for e in self.file: - e.dump_pack_struct(self.has_variants(e)) - - for regname in self.variant_regs: - self.dump_reg_variants(regname, self.variant_regs[regname]) + def __init__(self): + self.current_array = None + self.current_domain = None + self.current_prefix = None + self.current_prefix_type = None + self.current_stripe = None + self.current_bitset = None + self.current_bitsize = 32 + # The varset attribute on the domain specifies the enum which + # specifies all possible hw variants: + self.current_varset = None + # Regs that have multiple variants.. we only generated the C++ + # template based struct-packers for these + self.variant_regs = {} + # Information in which contexts regs are used, to be used in + # debug options + self.usage_regs = collections.defaultdict(list) + self.bitsets = {} + self.enums = {} + self.variants = set() + self.file = [] + self.xml_files = [] + + def error(self, message): + parser, filename = self.stack[-1] + return Error("%s:%d:%d: %s" % (filename, parser.CurrentLineNumber, parser.CurrentColumnNumber, message)) + + def prefix(self, variant=None): + if self.current_prefix_type == "variant" and variant: + return sanitize_variant(variant) + elif self.current_stripe: + return self.current_stripe + "_" + self.current_domain + elif self.current_prefix: + return self.current_prefix + "_" + self.current_domain + else: + return self.current_domain + + def parse_field(self, name, attrs): + try: + if "pos" in attrs: + high = low = int(attrs["pos"], 0) + elif "high" in attrs and "low" in attrs: + high = int(attrs["high"], 0) + low = int(attrs["low"], 0) + else: + low = 0 + high = self.current_bitsize - 1 + + if "type" in attrs: + type = attrs["type"] + else: + type = None + + if "shr" in attrs: + shr = int(attrs["shr"], 0) + else: + shr = 0 + + b = Field(name, low, high, shr, type, self) + + if type == "fixed" or type == "ufixed": + b.radix = int(attrs["radix"], 0) + + self.current_bitset.fields.append(b) + except ValueError as e: + raise self.error(e) + + def parse_varset(self, attrs): + # Inherit the varset from the enclosing domain if not overriden: + varset = self.current_varset + if "varset" in attrs: + varset = self.enums[attrs["varset"]] + return varset + + def parse_variants(self, attrs): + if "variants" not in attrs: + return None + + variant = attrs["variants"].split(",")[0] + varset = self.parse_varset(attrs) + + if "-" in variant: + # if we have a range, validate that both the start and end + # of the range are valid enums: + start = variant[:variant.index("-")] + end = variant[variant.index("-") + 1:] + assert varset.has_name(start) + if end != "": + assert varset.has_name(end) + else: + assert varset.has_name(variant) + + return variant + + def add_all_variants(self, reg, attrs, parent_variant): + # TODO this should really handle *all* variants, including dealing + # with open ended ranges (ie. "A2XX,A4XX-") (we have the varset + # enum now to make that possible) + variant = self.parse_variants(attrs) + if not variant: + variant = parent_variant + + if reg.name not in self.variant_regs: + self.variant_regs[reg.name] = {} + else: + # All variants must be same size: + v = next(iter(self.variant_regs[reg.name])) + assert self.variant_regs[reg.name][v].bit_size == reg.bit_size + + self.variant_regs[reg.name][variant] = reg + + def add_all_usages(self, reg, usages): + if not usages: + return + + for usage in usages: + self.usage_regs[usage].append(reg) + + self.variants.add(reg.domain) + + def do_validate(self, schemafile): + if not self.validate: + return + + try: + from lxml import etree + + parser, filename = self.stack[-1] + dirname = os.path.dirname(filename) + + # we expect this to look like <namespace url> schema.xsd.. I think + # technically it is supposed to be just a URL, but that doesn't + # quite match up to what we do.. Just skip over everything up to + # and including the first whitespace character: + schemafile = schemafile[schemafile.rindex(" ")+1:] + + # this is a bit cheezy, but the xml file to validate could be + # in a child director, ie. we don't really know where the schema + # file is, the way the rnn C code does. So if it doesn't exist + # just look one level up + if not os.path.exists(dirname + "/" + schemafile): + schemafile = "../" + schemafile + + if not os.path.exists(dirname + "/" + schemafile): + raise self.error("Cannot find schema for: " + filename) + + xmlschema_doc = etree.parse(dirname + "/" + schemafile) + xmlschema = etree.XMLSchema(xmlschema_doc) + + xml_doc = etree.parse(filename) + if not xmlschema.validate(xml_doc): + error_str = str(xmlschema.error_log.filter_from_errors()[0]) + raise self.error( + "Schema validation failed for: " + filename + "\n" + error_str) + except ImportError as e: + print("lxml not found, skipping validation", file=sys.stderr) + + def do_parse(self, filename): + filepath = os.path.abspath(filename) + if filepath in self.xml_files: + return + self.xml_files.append(filepath) + file = open(filename, "rb") + parser = xml.parsers.expat.ParserCreate() + self.stack.append((parser, filename)) + parser.StartElementHandler = self.start_element + parser.EndElementHandler = self.end_element + parser.CharacterDataHandler = self.character_data + parser.buffer_text = True + parser.ParseFile(file) + self.stack.pop() + file.close() + + def parse(self, rnn_path, filename, validate): + self.path = rnn_path + self.stack = [] + self.validate = validate + self.do_parse(filename) + + def parse_reg(self, attrs, bit_size): + self.current_bitsize = bit_size + if "type" in attrs and attrs["type"] in self.bitsets: + bitset = self.bitsets[attrs["type"]] + if bitset.inline: + self.current_bitset = Bitset(attrs["name"], bitset) + self.current_bitset.inline = True + else: + self.current_bitset = bitset + else: + self.current_bitset = Bitset(attrs["name"], None) + self.current_bitset.inline = True + if "type" in attrs: + self.parse_field(None, attrs) + + variant = self.parse_variants(attrs) + if not variant and self.current_array: + variant = self.current_array.variant + + self.current_reg = Reg(attrs, self.prefix( + variant), self.current_array, bit_size) + self.current_reg.bitset = self.current_bitset + self.current_bitset.reg = self.current_reg + + if len(self.stack) == 1: + self.file.append(self.current_reg) + + if variant is not None: + self.add_all_variants(self.current_reg, attrs, variant) + + usages = None + if "usage" in attrs: + usages = attrs["usage"].split(',') + elif self.current_array: + usages = self.current_array.usages + + self.add_all_usages(self.current_reg, usages) + + def start_element(self, name, attrs): + self.cdata = "" + if name == "import": + filename = attrs["file"] + self.do_parse(os.path.join(self.path, filename)) + elif name == "domain": + self.current_domain = attrs["name"] + if "prefix" in attrs: + self.current_prefix = sanitize_variant( + self.parse_variants(attrs)) + self.current_prefix_type = attrs["prefix"] + else: + self.current_prefix = None + self.current_prefix_type = None + if "varset" in attrs: + self.current_varset = self.enums[attrs["varset"]] + elif name == "stripe": + self.current_stripe = sanitize_variant(self.parse_variants(attrs)) + elif name == "enum": + self.current_enum_value = 0 + self.current_enum = Enum(attrs["name"]) + self.enums[attrs["name"]] = self.current_enum + if len(self.stack) == 1: + self.file.append(self.current_enum) + elif name == "value": + if "value" in attrs: + value = int(attrs["value"], 0) + else: + value = self.current_enum_value + self.current_enum.values.append((attrs["name"], value)) + elif name == "reg32": + self.parse_reg(attrs, 32) + elif name == "reg64": + self.parse_reg(attrs, 64) + elif name == "array": + self.current_bitsize = 32 + variant = self.parse_variants(attrs) + index_type = self.enums[attrs["index"] + ] if "index" in attrs else None + self.current_array = Array(attrs, self.prefix( + variant), variant, self.current_array, index_type) + if len(self.stack) == 1: + self.file.append(self.current_array) + elif name == "bitset": + self.current_bitset = Bitset(attrs["name"], None) + if "inline" in attrs and attrs["inline"] == "yes": + self.current_bitset.inline = True + self.bitsets[self.current_bitset.name] = self.current_bitset + if len(self.stack) == 1 and not self.current_bitset.inline: + self.file.append(self.current_bitset) + elif name == "bitfield" and self.current_bitset: + self.parse_field(attrs["name"], attrs) + elif name == "database": + self.do_validate(attrs["xsi:schemaLocation"]) + + def end_element(self, name): + if name == "domain": + self.current_domain = None + self.current_prefix = None + self.current_prefix_type = None + elif name == "stripe": + self.current_stripe = None + elif name == "bitset": + self.current_bitset = None + elif name == "reg32": + self.current_reg = None + elif name == "array": + # if the array has no Reg children, push an implicit reg32: + if len(self.current_array.children) == 0: + attrs = { + "name": "REG", + "offset": "0", + } + self.parse_reg(attrs, 32) + self.current_array = self.current_array.parent + elif name == "enum": + self.current_enum = None + + def character_data(self, data): + self.cdata += data + + def dump_reg_usages(self): + d = collections.defaultdict(list) + for usage, regs in self.usage_regs.items(): + for reg in regs: + variants = self.variant_regs.get(reg.name) + if variants: + for variant, vreg in variants.items(): + if reg == vreg: + d[(usage, sanitize_variant(variant))].append(reg) + else: + for variant in self.variants: + d[(usage, sanitize_variant(variant))].append(reg) + + print("#ifdef __cplusplus") + + for usage, regs in self.usage_regs.items(): + print("template<chip CHIP> constexpr inline uint16_t %s_REGS[] = {};" % ( + usage.upper())) + + for (usage, variant), regs in d.items(): + offsets = [] + + for reg in regs: + if reg.array: + for i in range(reg.array.length): + offsets.append(reg.array.offset + + reg.offset + i * reg.array.stride) + if reg.bit_size == 64: + offsets.append(offsets[-1] + 1) + else: + offsets.append(reg.offset) + if reg.bit_size == 64: + offsets.append(offsets[-1] + 1) + + offsets.sort() + + print("template<> constexpr inline uint16_t %s_REGS<%s>[] = {" % ( + usage.upper(), variant)) + for offset in offsets: + print("\t%s," % hex(offset)) + print("};") + + print("#endif") + + def has_variants(self, reg): + return reg.name in self.variant_regs and not is_number(reg.name) and not is_number(reg.name[1:]) + + def dump(self): + enums = [] + bitsets = [] + regs = [] + for e in self.file: + if isinstance(e, Enum): + enums.append(e) + elif isinstance(e, Bitset): + bitsets.append(e) + else: + regs.append(e) + + for e in enums + bitsets + regs: + e.dump(self.has_variants(e)) + + self.dump_reg_usages() + + def dump_regs_py(self): + regs = [] + for e in self.file: + if isinstance(e, Reg): + regs.append(e) + + for e in regs: + e.dump_py() + + def dump_reg_variants(self, regname, variants): + if is_number(regname) or is_number(regname[1:]): + return + print("#ifdef __cplusplus") + print("struct __%s {" % regname) + # TODO be more clever.. we should probably figure out which + # fields have the same type in all variants (in which they + # appear) and stuff everything else in a variant specific + # sub-structure. + seen_fields = [] + bit_size = 32 + array = False + address = None + constexpr_mark = " CONSTEXPR" + for variant in variants.keys(): + print(" /* %s fields: */" % variant) + reg = variants[variant] + bit_size = reg.bit_size + array = reg.array + for f in reg.bitset.fields: + fld_name = field_name(reg, f) + if fld_name in seen_fields: + continue + seen_fields.append(fld_name) + name = fld_name.lower() + if f.type in ["address", "waddress"]: + if address: + continue + address = f + print("#ifndef TU_CS_H") + tab_to(" __bo_type", "bo;") + tab_to(" uint32_t", "bo_offset;") + print("#endif") + continue + type, val = f.ctype("var") + tab_to(" %s" % type, "%s;" % name) + if f.type == "float": + constexpr_mark = "" + print(" /* fallback fields: */") + if bit_size == 64: + tab_to(" uint64_t", "unknown;") + tab_to(" uint64_t", "qword;") + else: + tab_to(" uint32_t", "unknown;") + tab_to(" uint32_t", "dword;") + print("};") + # TODO don't hardcode the varset enum name + varenum = "chip" + print("template <%s %s>" % (varenum, varenum.upper())) + print("static%s inline struct fd_reg_pair" % (constexpr_mark)) + xtra = "" + xtravar = "" + if array: + xtra = "int __i, " + xtravar = "__i, " + print("__%s(%sstruct __%s fields) {" % (regname, xtra, regname)) + for variant in variants.keys(): + if "-" in variant: + start = variant[:variant.index("-")] + end = variant[variant.index("-") + 1:] + if end != "": + print(" if ((%s >= %s) && (%s <= %s)) {" % ( + varenum.upper(), start, varenum.upper(), end)) + else: + print(" if (%s >= %s) {" % (varenum.upper(), start)) + else: + print(" if (%s == %s) {" % (varenum.upper(), variant)) + reg = variants[variant] + reg.dump_regpair_builder() + print(" } else") + print(" assert(!\"invalid variant\");") + print(" return (struct fd_reg_pair){};") + print("}") + + if bit_size == 64: + skip = ", { .reg = 0 }" + else: + skip = "" + + print("#define %s(VARIANT, %s...) __%s<VARIANT>(%s{__VA_ARGS__})%s" % ( + regname, xtravar, regname, xtravar, skip)) + print("#endif /* __cplusplus */") + + def dump_structs(self): + for e in self.file: + e.dump_pack_struct(self.has_variants(e)) + + for regname in self.variant_regs: + self.dump_reg_variants(regname, self.variant_regs[regname]) def dump_c(args, guard, func): - p = Parser() - - try: - p.parse(args.rnn, args.xml, args.validate) - except Error as e: - print(e, file=sys.stderr) - exit(1) - - print("#ifndef %s\n#define %s\n" % (guard, guard)) - - print("/* Autogenerated file, DO NOT EDIT manually! */") - - print() - print("#ifdef __KERNEL__") - print("#include <linux/bug.h>") - print("#define assert(x) BUG_ON(!(x))") - print("#else") - print("#include <assert.h>") - print("#endif") - print() - - print("#ifdef __cplusplus") - print("#define __struct_cast(X)") - print("#else") - print("#define __struct_cast(X) (struct X)") - print("#endif") - print() - - print("#ifndef FD_NO_DEPRECATED_PACK") - print("#define FD_DEPRECATED __attribute__((deprecated))") - print("#else") - print("#define FD_DEPRECATED") - print("#endif") - print() - - func(p) - - print() - print("#undef FD_DEPRECATED") - print() - - print("#endif /* %s */" % guard) + p = Parser() + + try: + p.parse(args.rnn, args.xml, args.validate) + except Error as e: + print(e, file=sys.stderr) + exit(1) + + print("#ifndef %s\n#define %s\n" % (guard, guard)) + + print("/* Autogenerated file, DO NOT EDIT manually! */") + + print() + print("#ifdef __KERNEL__") + print("#include <linux/bug.h>") + print("#define assert(x) BUG_ON(!(x))") + print("#else") + print("#include <assert.h>") + print("#endif") + print() + + print("#ifdef __cplusplus") + print("#define __struct_cast(X)") + print("#define CONSTEXPR constexpr") + print("#else") + print("#define __struct_cast(X) (struct X)") + print("#define CONSTEXPR") + print("#endif") + print() + + # TODO figure out what to do about fd_reg_stomp_allowed() + # vs gcc.. for now only enable the warnings with clang: + print("#if defined(__clang__) && !defined(FD_NO_DEPRECATED_PACK) && !defined(__KERNEL__)") + print("#define __FD_DEPRECATED _Pragma (\"GCC warning \\\"Deprecated reg builder\\\"\")") + print("#else") + print("#define __FD_DEPRECATED") + print("#endif") + print() + + func(p) + + print("#endif /* %s */" % guard) def dump_c_defines(args): - guard = str.replace(os.path.basename(args.xml), '.', '_').upper() - dump_c(args, guard, lambda p: p.dump()) + guard = str.replace(os.path.basename(args.xml), '.', '_').upper() + dump_c(args, guard, lambda p: p.dump()) def dump_c_pack_structs(args): - guard = str.replace(os.path.basename(args.xml), '.', '_').upper() + '_STRUCTS' - dump_c(args, guard, lambda p: p.dump_structs()) - + guard = str.replace(os.path.basename(args.xml), + '.', '_').upper() + '_STRUCTS' + dump_c(args, guard, lambda p: p.dump_structs()) + + +def dump_perfcntrs(args): + p = Parser() + + try: + p.parse(args.rnn, args.xml, args.validate) + except Error as e: + print(e, file=sys.stderr) + exit(1) + + perfcntrs = json.load(open(args.json, "r", encoding="utf-8")) + + chip_type = p.enums['chip'] + chip = perfcntrs['chip'] + if not chip_type.has_name(chip): + raise Error("Invalid chip: " + chip) + + groups = perfcntrs['groups'] + + guard = "__" + chip + "_PERFCNTRS_" + print("#ifndef %s\n#define %s\n" % (guard, guard)) + print("/* Autogenerated file, DO NOT EDIT manually! */") + print() + print("#ifdef __KERNEL__") + print("#include \"msm_perfcntr.h\"") + print("#endif") + print() + + def has_variant(variant): + if variant is None: + return True + if "-" in variant: + start = chip_type.value(variant[:variant.index("-")]) + end = chip_type.value(variant[variant.index("-") + 1:]) + chipn = chip_type.value(chip) + + return (start is None or chipn >= start) and (end is None or chipn <= end) + return chip == variant + + # Split out arrays and regs for later access: + arrays = {} + regs = {} + for e in p.file: + if isinstance(e, Array) and has_variant(e.variant): + arrays[e.local_name] = e + if isinstance(e, Reg): + regs[e.name] = e + + # For variant regs, overwrite 'regs' entries with correct variant: + for regname in p.variant_regs: + for (variant, reg) in p.variant_regs[regname].items(): + if has_variant(variant): + regs[regname] = reg + break + + for group in groups: + name = group['name'] + name_low = name.lower() + num = group['num'] + countable_type_name = group['countable_type'] + + if not countable_type_name in p.enums: + raise Error("Invalid type: " + countable_type_name) + + countable_type = p.enums[countable_type_name] + + print("#ifndef __KERNEL__") + print("static const struct fd_perfcntr_countable " + name_low + "_countables[] = {") + for (name, value) in countable_type.values: + # if the countable is prefixed with the chip, strip that: + # (note: avoid py3.9 dependency for kernel) + if name.startswith(chip + "_"): + name = name[len(chip)+1:] + print(" { \"" + name + "\", " + str(value) + " },") + print("};") + print("#endif") + + print("static const struct fd_perfcntr_counter " + name_low + "_counters[] = {") + for i in range(0, num): + if "reserved" in group and i in group["reserved"]: + continue + def get_reg(name): + # if reg has {} pattern, expand that first: + name = name.format(i) + + if name in arrays: + arr = arrays[name] + return arr.offset + (i * arr.stride) + + if not name in regs: + raise Error("Invalid reg: " + name) + + reg = regs[name] + return reg.offset + + def get_counter(): + # if the counter is <reg64> just a single "counter" value + # should be specified in the json, but for legacy separate + # hi/lo <reg32> pairs "counter_lo" and "counter_hi" should + # be specified + if "counter" in group: + counter = get_reg(group["counter"]) + return [counter, counter+1] + counter_lo = get_reg(group["counter_lo"]) + counter_hi = get_reg(group["counter_hi"]) + return [counter_lo, counter_hi] + + (counter_lo, counter_hi) = get_counter() + select = get_reg(group['select']) + + select_offset = 0 + if "select_offset" in group: + select_offset = int(group["select_offset"]) + select = select + select_offset + + slice_select_str = "" + if "slice_select" in group: + slice_select = group["slice_select"] + for reg in slice_select: + val = get_reg(reg) + select_offset + slice_select_str += "0x%04x, " % val + + # TODO add support for things that need enable/clear regs + + print(" { 0x%04x, {%s}, 0x%04x, 0x%04x }," % (select, slice_select_str, counter_lo, counter_hi)) + print("};") + + print() + + print("const struct fd_perfcntr_group " + chip.lower() + "_perfcntr_groups[] = {") + for group in groups: + name = group['name'] + name_low = name.lower() + pipe = 'NONE' + if 'pipe' in group: + pipe = group['pipe'] + + print(" GROUP(\"%s\", PIPE_%s, %s_counters, %s_countables)," % (name, pipe, name_low, name_low)) + + print("};") + print("const unsigned " + chip.lower() + "_num_perfcntr_groups = ARRAY_SIZE(" + chip.lower() + "_perfcntr_groups);") + + print() + print("#endif /* %s */" % guard) def dump_py_defines(args): - p = Parser() + p = Parser() - try: - p.parse(args.rnn, args.xml, args.validate) - except Error as e: - print(e, file=sys.stderr) - exit(1) + try: + p.parse(args.rnn, args.xml, args.validate) + except Error as e: + print(e, file=sys.stderr) + exit(1) - file_name = os.path.splitext(os.path.basename(args.xml))[0] + file_name = os.path.splitext(os.path.basename(args.xml))[0] - print("from enum import IntEnum") - print("class %sRegs(IntEnum):" % file_name.upper()) + print("from enum import IntEnum") + print("class %sRegs(IntEnum):" % file_name.upper()) - os.path.basename(args.xml) + os.path.basename(args.xml) - p.dump_regs_py() + p.dump_regs_py() def main(): - parser = argparse.ArgumentParser() - parser.add_argument('--rnn', type=str, required=True) - parser.add_argument('--xml', type=str, required=True) - parser.add_argument('--validate', default=False, action='store_true') - parser.add_argument('--no-validate', dest='validate', action='store_false') + parser = argparse.ArgumentParser() + parser.add_argument('--rnn', type=str, required=True) + parser.add_argument('--xml', type=str, required=True) + parser.add_argument('--validate', default=False, action='store_true') + parser.add_argument('--no-validate', dest='validate', action='store_false') + + subparsers = parser.add_subparsers() + subparsers.required = True - subparsers = parser.add_subparsers() - subparsers.required = True + parser_c_defines = subparsers.add_parser('c-defines') + parser_c_defines.set_defaults(func=dump_c_defines) - parser_c_defines = subparsers.add_parser('c-defines') - parser_c_defines.set_defaults(func=dump_c_defines) + parser_c_pack_structs = subparsers.add_parser('c-pack-structs') + parser_c_pack_structs.set_defaults(func=dump_c_pack_structs) - parser_c_pack_structs = subparsers.add_parser('c-pack-structs') - parser_c_pack_structs.set_defaults(func=dump_c_pack_structs) + parser_perfcntrs = subparsers.add_parser('perfcntrs') + parser_perfcntrs.add_argument('--json', type=str, required=True) + parser_perfcntrs.set_defaults(func=dump_perfcntrs) - parser_py_defines = subparsers.add_parser('py-defines') - parser_py_defines.set_defaults(func=dump_py_defines) + parser_py_defines = subparsers.add_parser('py-defines') + parser_py_defines.set_defaults(func=dump_py_defines) - args = parser.parse_args() - args.func(args) + args = parser.parse_args() + args.func(args) if __name__ == '__main__': - main() + main() diff --git a/include/linux/soc/qcom/ubwc.h b/include/linux/soc/qcom/ubwc.h index f5d0e2341261..83d2c2a7116c 100644 --- a/include/linux/soc/qcom/ubwc.h +++ b/include/linux/soc/qcom/ubwc.h @@ -50,6 +50,7 @@ struct qcom_ubwc_cfg_data { #define UBWC_1_0 0x10000000 #define UBWC_2_0 0x20000000 #define UBWC_3_0 0x30000000 +#define UBWC_3_1 0x30010000 /* UBWC 3.0 + Macrotile mode */ #define UBWC_4_0 0x40000000 #define UBWC_4_3 0x40030000 #define UBWC_5_0 0x50000000 @@ -99,4 +100,25 @@ static inline u32 qcom_ubwc_swizzle(const struct qcom_ubwc_cfg_data *cfg) return cfg->ubwc_swizzle; } +static inline u32 qcom_ubwc_version_tag(const struct qcom_ubwc_cfg_data *cfg) +{ + if (cfg->ubwc_enc_version >= UBWC_6_0) + return 5; + if (cfg->ubwc_enc_version >= UBWC_5_0) + return 4; + if (cfg->ubwc_enc_version >= UBWC_4_3) + return 3; + if (cfg->ubwc_enc_version >= UBWC_4_0) + return 2; + if (cfg->ubwc_enc_version >= UBWC_3_0) + return 1; + + return 0; +} + +static inline bool qcom_ubwc_enable_amsbc(const struct qcom_ubwc_cfg_data *cfg) +{ + return cfg->ubwc_enc_version >= UBWC_3_0; +} + #endif /* __QCOM_UBWC_H__ */ diff --git a/include/uapi/drm/msm_drm.h b/include/uapi/drm/msm_drm.h index b99098792371..7f2e594be4eb 100644 --- a/include/uapi/drm/msm_drm.h +++ b/include/uapi/drm/msm_drm.h @@ -491,6 +491,52 @@ struct drm_msm_submitqueue_query { __u32 pad; }; +#define MSM_PERFCNTR_STREAM 0x00000001 +#define MSM_PERFCNTR_UPDATE 0x00000002 +#define MSM_PERFCNTR_FLAGS ( \ + MSM_PERFCNTR_STREAM | \ + MSM_PERFCNTR_UPDATE | \ + 0) + +struct drm_msm_perfcntr_group { + char group_name[16]; + __u32 nr_countables; + __u32 pad; /* mbz */ + __u64 countables; /* pointer to an array of nr_countables u32 */ +}; + +/* + * Note, for MSM_PERFCNTR_STREAM, the ioctl returns an fd to read recorded + * counters. This only works because the ioctl is DRM_IOW(), if we returned + * a out param in the ioctl struct the copy_to_user() (in drm_ioctl()) + * could fault, causing us to leak the fd. + * + * If the ioctl returns with error E2BIG, that means more counters/countables + * are requested than are currently available. If MSM_PERFCNTR_UPDATE flag + * is set, drm_msm_perfcntr_group::nr_countables will be updated to return + * the actual # of counters available. + * + * The data read from the has the following format for each sampling period: + * + * uint64_t timestamp; // CP_ALWAYS_ON_COUNTER captured at sample time + * uint32_t seqno; // increments by 1 each period, reset to 0 on discontinuity + * uint32_t mbz; // pad out counters to 64b + * struct { + * uint64_t counter[nr_countables]; + * } groups[nr_groups]; + * + * The ordering of groups and counters matches the order in PERFCNTR_CONFIG + * ioctl. + */ +struct drm_msm_perfcntr_config { + __u32 flags; /* bitmask of MSM_PERFCNTR_x */ + __u32 nr_groups; /* # of entries in groups array */ + __u64 groups; /* pointer to array of drm_msm_perfcntr_group */ + __u64 period; /* sampling period in ns */ + __u32 bufsz_shift; /* sample buffer size in bytes is 1<<bufsz_shift */ + __u32 group_stride; /* sizeof(struct drm_msm_perfcntr_group) */ +}; + #define DRM_MSM_GET_PARAM 0x00 #define DRM_MSM_SET_PARAM 0x01 #define DRM_MSM_GEM_NEW 0x02 @@ -507,6 +553,7 @@ struct drm_msm_submitqueue_query { #define DRM_MSM_SUBMITQUEUE_CLOSE 0x0B #define DRM_MSM_SUBMITQUEUE_QUERY 0x0C #define DRM_MSM_VM_BIND 0x0D +#define DRM_MSM_PERFCNTR_CONFIG 0x0E #define DRM_IOCTL_MSM_GET_PARAM DRM_IOWR(DRM_COMMAND_BASE + DRM_MSM_GET_PARAM, struct drm_msm_param) #define DRM_IOCTL_MSM_SET_PARAM DRM_IOW (DRM_COMMAND_BASE + DRM_MSM_SET_PARAM, struct drm_msm_param) @@ -521,6 +568,7 @@ struct drm_msm_submitqueue_query { #define DRM_IOCTL_MSM_SUBMITQUEUE_CLOSE DRM_IOW (DRM_COMMAND_BASE + DRM_MSM_SUBMITQUEUE_CLOSE, __u32) #define DRM_IOCTL_MSM_SUBMITQUEUE_QUERY DRM_IOW (DRM_COMMAND_BASE + DRM_MSM_SUBMITQUEUE_QUERY, struct drm_msm_submitqueue_query) #define DRM_IOCTL_MSM_VM_BIND DRM_IOWR(DRM_COMMAND_BASE + DRM_MSM_VM_BIND, struct drm_msm_vm_bind) +#define DRM_IOCTL_MSM_PERFCNTR_CONFIG DRM_IOW (DRM_COMMAND_BASE + DRM_MSM_PERFCNTR_CONFIG, struct drm_msm_perfcntr_config) #if defined(__cplusplus) } |
