diff options
246 files changed, 7790 insertions, 2458 deletions
diff --git a/Documentation/devicetree/bindings/display/bridge/samsung,mipi-dsim.yaml b/Documentation/devicetree/bindings/display/bridge/samsung,mipi-dsim.yaml index 1acad99f3965..ad279f0993fa 100644 --- a/Documentation/devicetree/bindings/display/bridge/samsung,mipi-dsim.yaml +++ b/Documentation/devicetree/bindings/display/bridge/samsung,mipi-dsim.yaml @@ -24,6 +24,7 @@ properties: - samsung,exynos5410-mipi-dsi - samsung,exynos5422-mipi-dsi - samsung,exynos5433-mipi-dsi + - samsung,exynos7870-mipi-dsi - fsl,imx8mm-mipi-dsim - fsl,imx8mp-mipi-dsim - items: @@ -148,6 +149,32 @@ allOf: properties: compatible: contains: + const: samsung,exynos7870-mipi-dsi + + then: + properties: + clocks: + minItems: 4 + maxItems: 4 + + clock-names: + items: + - const: bus + - const: pll + - const: byte + - const: esc + + ports: + required: + - port@0 + + required: + - ports + + - if: + properties: + compatible: + contains: const: samsung,exynos5433-mipi-dsi then: diff --git a/Documentation/devicetree/bindings/display/msm/dp-controller.yaml b/Documentation/devicetree/bindings/display/msm/dp-controller.yaml index 9923b065323b..aeb4e4f36044 100644 --- a/Documentation/devicetree/bindings/display/msm/dp-controller.yaml +++ b/Documentation/devicetree/bindings/display/msm/dp-controller.yaml @@ -29,15 +29,30 @@ properties: - qcom,sdm845-dp - qcom,sm8350-dp - qcom,sm8650-dp + - qcom,x1e80100-dp + + - items: + - enum: + - qcom,sm6350-dp + - const: qcom,sc7180-dp + + # deprecated entry for compatibility with old DT - items: - enum: - - qcom,sar2130p-dp - qcom,sm6350-dp + - const: qcom,sm8350-dp + deprecated: true + + - items: + - enum: + - qcom,sar2130p-dp + - qcom,sm7150-dp - qcom,sm8150-dp - qcom,sm8250-dp - qcom,sm8450-dp - qcom,sm8550-dp - const: qcom,sm8350-dp + - items: - enum: - qcom,sm8750-dp @@ -51,35 +66,37 @@ properties: - description: link register block - description: p0 register block - description: p1 register block + - description: p2 register block + - description: p3 register block + - description: mst2link register block + - description: mst3link register block interrupts: maxItems: 1 clocks: + minItems: 5 items: - description: AHB clock to enable register access - description: Display Port AUX clock - description: Display Port Link clock - description: Link interface clock between DP and PHY - - description: Display Port Pixel clock + - description: Display Port stream 0 Pixel clock + - description: Display Port stream 1 Pixel clock + - description: Display Port stream 2 Pixel clock + - description: Display Port stream 3 Pixel clock clock-names: + minItems: 5 items: - const: core_iface - const: core_aux - const: ctrl_link - const: ctrl_link_iface - const: stream_pixel - - assigned-clocks: - items: - - description: link clock source - - description: pixel clock source - - assigned-clock-parents: - items: - - description: phy 0 parent - - description: phy 1 parent + - const: stream_1_pixel + - const: stream_2_pixel + - const: stream_3_pixel phys: maxItems: 1 @@ -161,7 +178,6 @@ required: allOf: # AUX BUS does not exist on DP controllers # Audio output also is present only on DP output - # p1 regions is present on DP, but not on eDP - if: properties: compatible: @@ -174,12 +190,110 @@ allOf: properties: "#sound-dai-cells": false else: + if: + properties: + compatible: + contains: + enum: + - qcom,sa8775p-dp + - qcom,x1e80100-dp + then: + oneOf: + - required: + - aux-bus + - required: + - "#sound-dai-cells" + else: + properties: + aux-bus: false + required: + - "#sound-dai-cells" + + - if: + properties: + compatible: + contains: + enum: + # these platforms support SST only + - qcom,sc7180-dp + - qcom,sc7280-dp + - qcom,sc7280-edp + - qcom,sc8180x-edp + - qcom,sc8280xp-edp + then: properties: - aux-bus: false reg: minItems: 5 - required: - - "#sound-dai-cells" + maxItems: 5 + clocks: + minItems: 5 + maxItems: 5 + clocks-names: + minItems: 5 + maxItems: 5 + + - if: + properties: + compatible: + contains: + enum: + # these platforms support 2 streams MST on some interfaces, + # others are SST only + - qcom,sc8280xp-dp + - qcom,x1e80100-dp + then: + properties: + reg: + minItems: 5 + maxItems: 5 + clocks: + minItems: 5 + maxItems: 6 + clocks-names: + minItems: 5 + maxItems: 6 + + - if: + properties: + compatible: + contains: + # 2 streams MST + enum: + - qcom,sc8180x-dp + - qcom,sdm845-dp + - qcom,sm8350-dp + - qcom,sm8650-dp + then: + properties: + reg: + minItems: 5 + maxItems: 5 + clocks: + minItems: 6 + maxItems: 6 + clocks-names: + minItems: 6 + maxItems: 6 + + - if: + properties: + compatible: + contains: + enum: + # these platforms support 4 stream MST on first DP, + # 2 streams MST on the second one. + - qcom,sa8775p-dp + then: + properties: + reg: + minItems: 9 + maxItems: 9 + clocks: + minItems: 6 + maxItems: 8 + clocks-names: + minItems: 6 + maxItems: 8 additionalProperties: false diff --git a/Documentation/devicetree/bindings/display/msm/dsi-controller-main.yaml b/Documentation/devicetree/bindings/display/msm/dsi-controller-main.yaml index d4bb65c660af..4400d4cce072 100644 --- a/Documentation/devicetree/bindings/display/msm/dsi-controller-main.yaml +++ b/Documentation/devicetree/bindings/display/msm/dsi-controller-main.yaml @@ -27,6 +27,7 @@ properties: - qcom,sar2130p-dsi-ctrl - qcom,sc7180-dsi-ctrl - qcom,sc7280-dsi-ctrl + - qcom,sc8180x-dsi-ctrl - qcom,sdm660-dsi-ctrl - qcom,sdm670-dsi-ctrl - qcom,sdm845-dsi-ctrl @@ -332,6 +333,7 @@ allOf: - qcom,sar2130p-dsi-ctrl - qcom,sc7180-dsi-ctrl - qcom,sc7280-dsi-ctrl + - qcom,sc8180x-dsi-ctrl - qcom,sdm845-dsi-ctrl - qcom,sm6115-dsi-ctrl - qcom,sm6125-dsi-ctrl diff --git a/Documentation/devicetree/bindings/display/msm/gmu.yaml b/Documentation/devicetree/bindings/display/msm/gmu.yaml index 4392aa7a4ffe..afc187935744 100644 --- a/Documentation/devicetree/bindings/display/msm/gmu.yaml +++ b/Documentation/devicetree/bindings/display/msm/gmu.yaml @@ -124,6 +124,40 @@ allOf: contains: enum: - qcom,adreno-gmu-623.0 + then: + properties: + reg: + items: + - description: Core GMU registers + - description: Resource controller registers + - description: GMU PDC registers + reg-names: + items: + - const: gmu + - const: rscc + - const: gmu_pdc + clocks: + items: + - description: GMU clock + - description: GPU CX clock + - description: GPU AXI clock + - description: GPU MEMNOC clock + - description: GPU AHB clock + - description: GPU HUB CX clock + clock-names: + items: + - const: gmu + - const: cxo + - const: axi + - const: memnoc + - const: ahb + - const: hub + + - if: + properties: + compatible: + contains: + enum: - qcom,adreno-gmu-635.0 - qcom,adreno-gmu-660.1 - qcom,adreno-gmu-663.0 diff --git a/Documentation/devicetree/bindings/display/msm/gpu.yaml b/Documentation/devicetree/bindings/display/msm/gpu.yaml index 6ddc72fd85b0..3696b083e353 100644 --- a/Documentation/devicetree/bindings/display/msm/gpu.yaml +++ b/Documentation/devicetree/bindings/display/msm/gpu.yaml @@ -146,39 +146,209 @@ allOf: properties: compatible: contains: - pattern: '^qcom,adreno-[3-5][0-9][0-9]\.[0-9]+$' + oneOf: + - pattern: '^qcom,adreno-305\.[0-9]+$' + - pattern: '^qcom,adreno-330\.[0-9]+$' + then: + properties: + clocks: + minItems: 3 + maxItems: 3 + clock-names: + items: + - const: core + description: GPU Core clock + - const: iface + description: GPU Interface clock + - const: mem_iface + description: GPU Memory Interface clock + - if: + properties: + compatible: + contains: + pattern: '^qcom,adreno-306\.[0-9]+$' then: properties: clocks: - minItems: 2 - maxItems: 7 + minItems: 5 + maxItems: 6 + clock-names: + oneOf: + - items: + - const: core + description: GPU Core clock + - const: iface + description: GPU Interface clock + - const: mem_iface + description: GPU Memory Interface clock + - const: alt_mem_iface + description: GPU Alternative Memory Interface clock + - const: gfx3d + description: GPU 3D engine clock + - items: + - const: core + description: GPU Core clock + - const: iface + description: GPU Interface clock + - const: mem + description: GPU Memory clock + - const: mem_iface + description: GPU Memory Interface clock + - const: alt_mem_iface + description: GPU Alternative Memory Interface clock + - const: gfx3d + description: GPU 3D engine clock + - if: + properties: + compatible: + contains: + pattern: '^qcom,adreno-320\.[0-9]+$' + then: + properties: + clocks: + minItems: 4 + maxItems: 4 clock-names: items: - anyOf: - - const: core - description: GPU Core clock - - const: iface - description: GPU Interface clock - - const: mem - description: GPU Memory clock - - const: mem_iface - description: GPU Memory Interface clock - - const: alt_mem_iface - description: GPU Alternative Memory Interface clock - - const: gfx3d - description: GPU 3D engine clock - - const: rbbmtimer - description: GPU RBBM Timer for Adreno 5xx series - - const: rbcpr - description: GPU RB Core Power Reduction clock - minItems: 2 + - const: core + description: GPU Core clock + - const: iface + description: GPU Interface clock + - const: mem + description: GPU Memory clock + - const: mem_iface + description: GPU Memory Interface clock + + - if: + properties: + compatible: + contains: + pattern: '^qcom,adreno-405\.[0-9]+$' + then: + properties: + clocks: + minItems: 7 maxItems: 7 + clock-names: + items: + - const: core + description: GPU Core clock + - const: iface + description: GPU Interface clock + - const: mem + description: GPU Memory clock + - const: mem_iface + description: GPU Memory Interface clock + - const: alt_mem_iface + description: GPU Alternative Memory Interface clock + - const: gfx3d + description: GPU 3D engine clock + - const: rbbmtimer + description: GPU RBBM Timer for Adreno 5xx series - required: - - clocks - - clock-names + - if: + properties: + compatible: + contains: + pattern: '^qcom,adreno-50[56]\.[0-9]+$' + then: + properties: + clocks: + minItems: 6 + maxItems: 6 + clock-names: + items: + - const: core + description: GPU Core clock + - const: iface + description: GPU Interface clock + - const: mem_iface + description: GPU Memory Interface clock + - const: alt_mem_iface + description: GPU Alternative Memory Interface clock + - const: rbbmtimer + description: GPU RBBM Timer for Adreno 5xx series + - const: alwayson + description: GPU AON clock + + - if: + properties: + compatible: + contains: + oneOf: + - pattern: '^qcom,adreno-508\.[0-9]+$' + - pattern: '^qcom,adreno-509\.[0-9]+$' + - pattern: '^qcom,adreno-512\.[0-9]+$' + - pattern: '^qcom,adreno-540\.[0-9]+$' + then: + properties: + clocks: + minItems: 6 + maxItems: 6 + clock-names: + items: + - const: iface + description: GPU Interface clock + - const: rbbmtimer + description: GPU RBBM Timer for Adreno 5xx series + - const: mem + description: GPU Memory clock + - const: mem_iface + description: GPU Memory Interface clock + - const: rbcpr + description: GPU RB Core Power Reduction clock + - const: core + description: GPU Core clock + + - if: + properties: + compatible: + contains: + pattern: '^qcom,adreno-510\.[0-9]+$' + then: + properties: + clocks: + minItems: 6 + maxItems: 6 + clock-names: + items: + - const: core + description: GPU Core clock + - const: iface + description: GPU Interface clock + - const: mem + description: GPU Memory clock + - const: mem_iface + description: GPU Memory Interface clock + - const: rbbmtimer + description: GPU RBBM Timer for Adreno 5xx series + - const: alwayson + description: GPU AON clock + + - if: + properties: + compatible: + contains: + pattern: '^qcom,adreno-530\.[0-9]+$' + then: + properties: + clocks: + minItems: 5 + maxItems: 5 + clock-names: + items: + - const: core + description: GPU Core clock + - const: iface + description: GPU Interface clock + - const: rbbmtimer + description: GPU RBBM Timer for Adreno 5xx series + - const: mem + description: GPU Memory clock + - const: mem_iface + description: GPU Memory Interface clock - if: properties: @@ -187,6 +357,7 @@ allOf: enum: - qcom,adreno-610.0 - qcom,adreno-619.1 + - qcom,adreno-07000200 then: properties: clocks: @@ -222,7 +393,9 @@ allOf: properties: compatible: contains: - pattern: '^qcom,adreno-[67][0-9][0-9]\.[0-9]+$' + oneOf: + - pattern: '^qcom,adreno-[67][0-9][0-9]\.[0-9]+$' + - pattern: '^qcom,adreno-[0-9a-f]{8}$' then: # Starting with A6xx, the clocks are usually defined in the GMU node properties: diff --git a/Documentation/devicetree/bindings/display/msm/qcom,sa8775p-mdss.yaml b/Documentation/devicetree/bindings/display/msm/qcom,sa8775p-mdss.yaml index 1053b3bc4908..e2730a2f25cf 100644 --- a/Documentation/devicetree/bindings/display/msm/qcom,sa8775p-mdss.yaml +++ b/Documentation/devicetree/bindings/display/msm/qcom,sa8775p-mdss.yaml @@ -375,7 +375,11 @@ examples: <0xaf54200 0x0c0>, <0xaf55000 0x770>, <0xaf56000 0x09c>, - <0xaf57000 0x09c>; + <0xaf57000 0x09c>, + <0xaf58000 0x09c>, + <0xaf59000 0x09c>, + <0xaf5a000 0x23c>, + <0xaf5b000 0x23c>; interrupt-parent = <&mdss0>; interrupts = <12>; @@ -384,16 +388,28 @@ examples: <&dispcc_dptx0_aux_clk>, <&dispcc_dptx0_link_clk>, <&dispcc_dptx0_link_intf_clk>, - <&dispcc_dptx0_pixel0_clk>; + <&dispcc_dptx0_pixel0_clk>, + <&dispcc_dptx0_pixel1_clk>, + <&dispcc_dptx0_pixel2_clk>, + <&dispcc_dptx0_pixel3_clk>; clock-names = "core_iface", "core_aux", "ctrl_link", "ctrl_link_iface", - "stream_pixel"; + "stream_pixel", + "stream_1_pixel", + "stream_2_pixel", + "stream_3_pixel"; assigned-clocks = <&dispcc_mdss_dptx0_link_clk_src>, - <&dispcc_mdss_dptx0_pixel0_clk_src>; - assigned-clock-parents = <&mdss0_dp0_phy 0>, <&mdss0_dp0_phy 1>; + <&dispcc_mdss_dptx0_pixel0_clk_src>, + <&dispcc_mdss_dptx0_pixel1_clk_src>, + <&dispcc_mdss_dptx0_pixel2_clk_src>, + <&dispcc_mdss_dptx0_pixel3_clk_src>; + assigned-clock-parents = <&mdss0_dp0_phy 0>, + <&mdss0_dp0_phy 1>, + <&mdss0_dp0_phy 1>, + <&mdss0_dp0_phy 1>; phys = <&mdss0_dp0_phy>; phy-names = "dp"; diff --git a/Documentation/devicetree/bindings/display/msm/qcom,sar2130p-mdss.yaml b/Documentation/devicetree/bindings/display/msm/qcom,sar2130p-mdss.yaml index 870144b53cec..44c1bb9e4109 100644 --- a/Documentation/devicetree/bindings/display/msm/qcom,sar2130p-mdss.yaml +++ b/Documentation/devicetree/bindings/display/msm/qcom,sar2130p-mdss.yaml @@ -207,16 +207,20 @@ examples: <&dispcc_disp_cc_mdss_dptx0_aux_clk>, <&dispcc_disp_cc_mdss_dptx0_link_clk>, <&dispcc_disp_cc_mdss_dptx0_link_intf_clk>, - <&dispcc_disp_cc_mdss_dptx0_pixel0_clk>; + <&dispcc_disp_cc_mdss_dptx0_pixel0_clk>, + <&dispcc_disp_cc_mdss_dptx0_pixel1_clk>; clock-names = "core_iface", "core_aux", "ctrl_link", "ctrl_link_iface", - "stream_pixel"; + "stream_pixel", + "stream_1_pixel"; assigned-clocks = <&dispcc_disp_cc_mdss_dptx0_link_clk_src>, - <&dispcc_disp_cc_mdss_dptx0_pixel0_clk_src>; + <&dispcc_disp_cc_mdss_dptx0_pixel0_clk_src>, + <&dispcc_disp_cc_mdss_dptx0_pixel1_clk_src>; assigned-clock-parents = <&usb_dp_qmpphy_QMP_USB43DP_DP_LINK_CLK>, + <&usb_dp_qmpphy_QMP_USB43DP_DP_VCO_DIV_CLK>, <&usb_dp_qmpphy_QMP_USB43DP_DP_VCO_DIV_CLK>; phys = <&usb_dp_qmpphy QMP_USB43DP_DP_PHY>; diff --git a/Documentation/devicetree/bindings/display/msm/qcom,sc7280-mdss.yaml b/Documentation/devicetree/bindings/display/msm/qcom,sc7280-mdss.yaml index 2947f27e0585..b643d3adf669 100644 --- a/Documentation/devicetree/bindings/display/msm/qcom,sc7280-mdss.yaml +++ b/Documentation/devicetree/bindings/display/msm/qcom,sc7280-mdss.yaml @@ -281,7 +281,8 @@ examples: reg = <0xaea0000 0x200>, <0xaea0200 0x200>, <0xaea0400 0xc00>, - <0xaea1000 0x400>; + <0xaea1000 0x400>, + <0xaea1400 0x400>; interrupt-parent = <&mdss>; interrupts = <14>; diff --git a/Documentation/devicetree/bindings/display/msm/qcom,sc8180x-dpu.yaml b/Documentation/devicetree/bindings/display/msm/qcom,sc8180x-dpu.yaml new file mode 100644 index 000000000000..a411126708b8 --- /dev/null +++ b/Documentation/devicetree/bindings/display/msm/qcom,sc8180x-dpu.yaml @@ -0,0 +1,103 @@ +# SPDX-License-Identifier: GPL-2.0-only OR BSD-2-Clause +%YAML 1.2 +--- +$id: http://devicetree.org/schemas/display/msm/qcom,sc8180x-dpu.yaml# +$schema: http://devicetree.org/meta-schemas/core.yaml# + +title: Qualcomm SC8180X Display DPU + +maintainers: + - Dmitry Baryshkov <dmitry.baryshkov@linaro.org> + +$ref: /schemas/display/msm/dpu-common.yaml# + +properties: + compatible: + const: qcom,sc8180x-dpu + + reg: + items: + - description: Address offset and size for mdp register set + - description: Address offset and size for vbif register set + + reg-names: + items: + - const: mdp + - const: vbif + + clocks: + items: + - description: Display AHB clock + - description: Display HF AXI clock + - description: Display core clock + - description: Display vsync clock + - description: Display rotator clock + - description: Display LUT clock + + clock-names: + items: + - const: iface + - const: bus + - const: core + - const: vsync + - const: rot + - const: lut + +unevaluatedProperties: false + +examples: + - | + #include <dt-bindings/clock/qcom,dispcc-sm8250.h> + #include <dt-bindings/clock/qcom,gcc-sc8180x.h> + #include <dt-bindings/interrupt-controller/arm-gic.h> + #include <dt-bindings/interconnect/qcom,sc8180x.h> + #include <dt-bindings/power/qcom-rpmpd.h> + + display-controller@ae01000 { + compatible = "qcom,sc8180x-dpu"; + reg = <0x0ae01000 0x8f000>, + <0x0aeb0000 0x2008>; + reg-names = "mdp", "vbif"; + + clocks = <&dispcc DISP_CC_MDSS_AHB_CLK>, + <&gcc GCC_DISP_HF_AXI_CLK>, + <&dispcc DISP_CC_MDSS_MDP_CLK>, + <&dispcc DISP_CC_MDSS_VSYNC_CLK>, + <&dispcc DISP_CC_MDSS_ROT_CLK>, + <&dispcc DISP_CC_MDSS_MDP_LUT_CLK>; + clock-names = "iface", + "bus", + "core", + "vsync", + "rot", + "lut"; + + assigned-clocks = <&dispcc DISP_CC_MDSS_VSYNC_CLK>; + assigned-clock-rates = <19200000>; + + operating-points-v2 = <&mdp_opp_table>; + power-domains = <&rpmhpd SC8180X_MMCX>; + + interrupt-parent = <&mdss>; + interrupts = <0>; + + ports { + #address-cells = <1>; + #size-cells = <0>; + + port@0 { + reg = <0>; + endpoint { + remote-endpoint = <&dsi0_in>; + }; + }; + + port@1 { + reg = <1>; + endpoint { + remote-endpoint = <&dsi1_in>; + }; + }; + }; + }; +... diff --git a/Documentation/devicetree/bindings/display/msm/qcom,sc8180x-mdss.yaml b/Documentation/devicetree/bindings/display/msm/qcom,sc8180x-mdss.yaml new file mode 100644 index 000000000000..00e82bdbbcc7 --- /dev/null +++ b/Documentation/devicetree/bindings/display/msm/qcom,sc8180x-mdss.yaml @@ -0,0 +1,359 @@ +# SPDX-License-Identifier: GPL-2.0-only OR BSD-2-Clause +%YAML 1.2 +--- +$id: http://devicetree.org/schemas/display/msm/qcom,sc8180x-mdss.yaml# +$schema: http://devicetree.org/meta-schemas/core.yaml# + +title: Qualcomm SC8180X Display MDSS + +maintainers: + - Dmitry Baryshkov <dmitry.baryshkov@linaro.org> + +description: + Device tree bindings for MSM Mobile Display Subsystem(MDSS) that encapsulates + sub-blocks like DPU display controller, DSI and DP interfaces etc. Device tree + bindings of MDSS are mentioned for SC8180X target. + +$ref: /schemas/display/msm/mdss-common.yaml# + +properties: + compatible: + items: + - const: qcom,sc8180x-mdss + + clocks: + items: + - description: Display AHB clock from gcc + - description: Display hf axi clock + - description: Display sf axi clock + - description: Display core clock + + clock-names: + items: + - const: iface + - const: bus + - const: nrt_bus + - const: core + + iommus: + maxItems: 1 + + interconnects: + maxItems: 3 + + interconnect-names: + maxItems: 3 + +patternProperties: + "^display-controller@[0-9a-f]+$": + type: object + additionalProperties: true + + properties: + compatible: + const: qcom,sc8180x-dpu + + "^displayport-controller@[0-9a-f]+$": + type: object + additionalProperties: true + + properties: + compatible: + enum: + - qcom,sc8180x-dp + - qcom,sc8180x-edp + + "^dsi@[0-9a-f]+$": + type: object + additionalProperties: true + + properties: + compatible: + contains: + const: qcom,sc8180x-dsi-ctrl + + "^phy@[0-9a-f]+$": + type: object + additionalProperties: true + + properties: + compatible: + const: qcom,dsi-phy-7nm + +unevaluatedProperties: false + +examples: + - | + #include <dt-bindings/clock/qcom,dispcc-sm8250.h> + #include <dt-bindings/clock/qcom,gcc-sc8180x.h> + #include <dt-bindings/clock/qcom,rpmh.h> + #include <dt-bindings/interrupt-controller/arm-gic.h> + #include <dt-bindings/interconnect/qcom,sc8180x.h> + #include <dt-bindings/power/qcom-rpmpd.h> + + display-subsystem@ae00000 { + compatible = "qcom,sc8180x-mdss"; + reg = <0x0ae00000 0x1000>; + reg-names = "mdss"; + + interconnects = <&mmss_noc MASTER_MDP_PORT0 &mc_virt SLAVE_EBI_CH0>, + <&mmss_noc MASTER_MDP_PORT1 &mc_virt SLAVE_EBI_CH0>, + <&gem_noc MASTER_AMPSS_M0 &config_noc SLAVE_DISPLAY_CFG>; + interconnect-names = "mdp0-mem", + "mdp1-mem", + "cpu-cfg"; + + power-domains = <&dispcc MDSS_GDSC>; + + clocks = <&dispcc DISP_CC_MDSS_AHB_CLK>, + <&gcc GCC_DISP_HF_AXI_CLK>, + <&gcc GCC_DISP_SF_AXI_CLK>, + <&dispcc DISP_CC_MDSS_MDP_CLK>; + clock-names = "iface", "bus", "nrt_bus", "core"; + + interrupts = <GIC_SPI 83 IRQ_TYPE_LEVEL_HIGH>; + interrupt-controller; + #interrupt-cells = <1>; + + iommus = <&apps_smmu 0x800 0x420>; + + #address-cells = <1>; + #size-cells = <1>; + ranges; + + display-controller@ae01000 { + compatible = "qcom,sc8180x-dpu"; + reg = <0x0ae01000 0x8f000>, + <0x0aeb0000 0x2008>; + reg-names = "mdp", "vbif"; + + clocks = <&dispcc DISP_CC_MDSS_AHB_CLK>, + <&gcc GCC_DISP_HF_AXI_CLK>, + <&dispcc DISP_CC_MDSS_MDP_CLK>, + <&dispcc DISP_CC_MDSS_VSYNC_CLK>, + <&dispcc DISP_CC_MDSS_ROT_CLK>, + <&dispcc DISP_CC_MDSS_MDP_LUT_CLK>; + clock-names = "iface", + "bus", + "core", + "vsync", + "rot", + "lut"; + + assigned-clocks = <&dispcc DISP_CC_MDSS_VSYNC_CLK>; + assigned-clock-rates = <19200000>; + + operating-points-v2 = <&mdp_opp_table>; + power-domains = <&rpmhpd SC8180X_MMCX>; + + interrupt-parent = <&mdss>; + interrupts = <0>; + + ports { + #address-cells = <1>; + #size-cells = <0>; + + port@0 { + reg = <0>; + dpu_intf1_out: endpoint { + remote-endpoint = <&dsi0_in>; + }; + }; + + port@1 { + reg = <1>; + dpu_intf2_out: endpoint { + remote-endpoint = <&dsi1_in>; + }; + }; + }; + + mdp_opp_table: opp-table { + compatible = "operating-points-v2"; + + opp-171428571 { + opp-hz = /bits/ 64 <171428571>; + required-opps = <&rpmhpd_opp_low_svs>; + }; + + opp-300000000 { + opp-hz = /bits/ 64 <300000000>; + required-opps = <&rpmhpd_opp_svs>; + }; + + opp-345000000 { + opp-hz = /bits/ 64 <345000000>; + required-opps = <&rpmhpd_opp_svs_l1>; + }; + + opp-460000000 { + opp-hz = /bits/ 64 <460000000>; + required-opps = <&rpmhpd_opp_nom>; + }; + }; + }; + + dsi@ae94000 { + compatible = "qcom,sc8180x-dsi-ctrl", + "qcom,mdss-dsi-ctrl"; + reg = <0x0ae94000 0x400>; + reg-names = "dsi_ctrl"; + + interrupt-parent = <&mdss>; + interrupts = <4>; + + clocks = <&dispcc DISP_CC_MDSS_BYTE0_CLK>, + <&dispcc DISP_CC_MDSS_BYTE0_INTF_CLK>, + <&dispcc DISP_CC_MDSS_PCLK0_CLK>, + <&dispcc DISP_CC_MDSS_ESC0_CLK>, + <&dispcc DISP_CC_MDSS_AHB_CLK>, + <&gcc GCC_DISP_HF_AXI_CLK>; + clock-names = "byte", + "byte_intf", + "pixel", + "core", + "iface", + "bus"; + + assigned-clocks = <&dispcc DISP_CC_MDSS_BYTE0_CLK_SRC>, + <&dispcc DISP_CC_MDSS_PCLK0_CLK_SRC>; + assigned-clock-parents = <&dsi0_phy 0>, <&dsi0_phy 1>; + + operating-points-v2 = <&dsi_opp_table>; + power-domains = <&rpmhpd SC8180X_MMCX>; + + phys = <&dsi0_phy>; + phy-names = "dsi"; + + #address-cells = <1>; + #size-cells = <0>; + + ports { + #address-cells = <1>; + #size-cells = <0>; + + port@0 { + reg = <0>; + dsi0_in: endpoint { + remote-endpoint = <&dpu_intf1_out>; + }; + }; + + port@1 { + reg = <1>; + dsi0_out: endpoint { + }; + }; + }; + + dsi_opp_table: opp-table { + compatible = "operating-points-v2"; + + opp-187500000 { + opp-hz = /bits/ 64 <187500000>; + required-opps = <&rpmhpd_opp_low_svs>; + }; + + opp-300000000 { + opp-hz = /bits/ 64 <300000000>; + required-opps = <&rpmhpd_opp_svs>; + }; + + opp-358000000 { + opp-hz = /bits/ 64 <358000000>; + required-opps = <&rpmhpd_opp_svs_l1>; + }; + }; + }; + + dsi0_phy: phy@ae94400 { + compatible = "qcom,dsi-phy-7nm"; + reg = <0x0ae94400 0x200>, + <0x0ae94600 0x280>, + <0x0ae94900 0x260>; + reg-names = "dsi_phy", + "dsi_phy_lane", + "dsi_pll"; + + #clock-cells = <1>; + #phy-cells = <0>; + + clocks = <&dispcc DISP_CC_MDSS_AHB_CLK>, + <&rpmhcc RPMH_CXO_CLK>; + clock-names = "iface", "ref"; + vdds-supply = <&vreg_dsi_phy>; + }; + + dsi@ae96000 { + compatible = "qcom,sc8180x-dsi-ctrl", + "qcom,mdss-dsi-ctrl"; + reg = <0x0ae96000 0x400>; + reg-names = "dsi_ctrl"; + + interrupt-parent = <&mdss>; + interrupts = <5>; + + clocks = <&dispcc DISP_CC_MDSS_BYTE1_CLK>, + <&dispcc DISP_CC_MDSS_BYTE1_INTF_CLK>, + <&dispcc DISP_CC_MDSS_PCLK1_CLK>, + <&dispcc DISP_CC_MDSS_ESC1_CLK>, + <&dispcc DISP_CC_MDSS_AHB_CLK>, + <&gcc GCC_DISP_HF_AXI_CLK>; + clock-names = "byte", + "byte_intf", + "pixel", + "core", + "iface", + "bus"; + + assigned-clocks = <&dispcc DISP_CC_MDSS_BYTE1_CLK_SRC>, + <&dispcc DISP_CC_MDSS_PCLK1_CLK_SRC>; + assigned-clock-parents = <&dsi1_phy 0>, <&dsi1_phy 1>; + + operating-points-v2 = <&dsi_opp_table>; + power-domains = <&rpmhpd SC8180X_MMCX>; + + phys = <&dsi1_phy>; + phy-names = "dsi"; + + #address-cells = <1>; + #size-cells = <0>; + + ports { + #address-cells = <1>; + #size-cells = <0>; + + port@0 { + reg = <0>; + dsi1_in: endpoint { + remote-endpoint = <&dpu_intf2_out>; + }; + }; + + port@1 { + reg = <1>; + dsi1_out: endpoint { + }; + }; + }; + }; + + dsi1_phy: phy@ae96400 { + compatible = "qcom,dsi-phy-7nm"; + reg = <0x0ae96400 0x200>, + <0x0ae96600 0x280>, + <0x0ae96900 0x260>; + reg-names = "dsi_phy", + "dsi_phy_lane", + "dsi_pll"; + + #clock-cells = <1>; + #phy-cells = <0>; + + clocks = <&dispcc DISP_CC_MDSS_AHB_CLK>, + <&rpmhcc RPMH_CXO_CLK>; + clock-names = "iface", "ref"; + vdds-supply = <&vreg_dsi_phy>; + }; + }; +... diff --git a/Documentation/devicetree/bindings/display/msm/qcom,sm7150-mdss.yaml b/Documentation/devicetree/bindings/display/msm/qcom,sm7150-mdss.yaml index 13c5d5ffabde..9b0621d88d50 100644 --- a/Documentation/devicetree/bindings/display/msm/qcom,sm7150-mdss.yaml +++ b/Documentation/devicetree/bindings/display/msm/qcom,sm7150-mdss.yaml @@ -61,7 +61,8 @@ patternProperties: additionalProperties: true properties: compatible: - const: qcom,sm7150-dp + contains: + const: qcom,sm7150-dp "^dsi@[0-9a-f]+$": type: object @@ -378,7 +379,8 @@ examples: }; displayport-controller@ae90000 { - compatible = "qcom,sm7150-dp"; + compatible = "qcom,sm7150-dp", + "qcom,sm8350-dp"; reg = <0xae90000 0x200>, <0xae90200 0x200>, <0xae90400 0xc00>, @@ -392,16 +394,20 @@ examples: <&dispcc_mdss_dp_aux_clk>, <&dispcc_mdss_dp_link_clk>, <&dispcc_mdss_dp_link_intf_clk>, - <&dispcc_mdss_dp_pixel_clk>; + <&dispcc_mdss_dp_pixel_clk>, + <&dispcc_mdss_dp_pixel1_clk>; clock-names = "core_iface", "core_aux", "ctrl_link", "ctrl_link_iface", - "stream_pixel"; + "stream_pixel", + "stream_1_pixel"; assigned-clocks = <&dispcc_mdss_dp_link_clk_src>, - <&dispcc_mdss_dp_pixel_clk_src>; + <&dispcc_mdss_dp_pixel_clk_src>, + <&dispcc_mdss_dp_pixel1_clk_src>; assigned-clock-parents = <&dp_phy 0>, + <&dp_phy 1>, <&dp_phy 1>; operating-points-v2 = <&dp_opp_table>; diff --git a/Documentation/devicetree/bindings/display/msm/qcom,sm8750-mdss.yaml b/Documentation/devicetree/bindings/display/msm/qcom,sm8750-mdss.yaml index 72c70edc1fb0..4151f475f3bc 100644 --- a/Documentation/devicetree/bindings/display/msm/qcom,sm8750-mdss.yaml +++ b/Documentation/devicetree/bindings/display/msm/qcom,sm8750-mdss.yaml @@ -401,16 +401,20 @@ examples: <&disp_cc_mdss_dptx0_aux_clk>, <&disp_cc_mdss_dptx0_link_clk>, <&disp_cc_mdss_dptx0_link_intf_clk>, - <&disp_cc_mdss_dptx0_pixel0_clk>; + <&disp_cc_mdss_dptx0_pixel0_clk>, + <&disp_cc_mdss_dptx0_pixel1_clk>; clock-names = "core_iface", "core_aux", "ctrl_link", "ctrl_link_iface", - "stream_pixel"; + "stream_pixel", + "stream_1_pixel"; assigned-clocks = <&disp_cc_mdss_dptx0_link_clk_src>, - <&disp_cc_mdss_dptx0_pixel0_clk_src>; + <&disp_cc_mdss_dptx0_pixel0_clk_src>, + <&disp_cc_mdss_dptx0_pixel1_clk_src>; assigned-clock-parents = <&usb_dp_qmpphy QMP_USB43DP_DP_LINK_CLK>, + <&usb_dp_qmpphy QMP_USB43DP_DP_VCO_DIV_CLK>, <&usb_dp_qmpphy QMP_USB43DP_DP_VCO_DIV_CLK>; operating-points-v2 = <&dp_opp_table>; diff --git a/Documentation/devicetree/bindings/display/msm/qcom,x1e80100-mdss.yaml b/Documentation/devicetree/bindings/display/msm/qcom,x1e80100-mdss.yaml index 3b01a0e47333..8d698a2e055a 100644 --- a/Documentation/devicetree/bindings/display/msm/qcom,x1e80100-mdss.yaml +++ b/Documentation/devicetree/bindings/display/msm/qcom,x1e80100-mdss.yaml @@ -170,11 +170,11 @@ examples: displayport-controller@ae90000 { compatible = "qcom,x1e80100-dp"; - reg = <0 0xae90000 0 0x200>, - <0 0xae90200 0 0x200>, - <0 0xae90400 0 0x600>, - <0 0xae91000 0 0x400>, - <0 0xae91400 0 0x400>; + reg = <0xae90000 0x200>, + <0xae90200 0x200>, + <0xae90400 0x600>, + <0xae91000 0x400>, + <0xae91400 0x400>; interrupt-parent = <&mdss>; interrupts = <12>; @@ -183,15 +183,19 @@ examples: <&dispcc_dptx0_aux_clk>, <&dispcc_dptx0_link_clk>, <&dispcc_dptx0_link_intf_clk>, - <&dispcc_dptx0_pixel0_clk>; + <&dispcc_dptx0_pixel0_clk>, + <&dispcc_dptx0_pixel1_clk>; clock-names = "core_iface", "core_aux", "ctrl_link", "ctrl_link_iface", - "stream_pixel"; + "stream_pixel", + "stream_1_pixel"; assigned-clocks = <&dispcc_mdss_dptx0_link_clk_src>, - <&dispcc_mdss_dptx0_pixel0_clk_src>; + <&dispcc_mdss_dptx0_pixel0_clk_src>, + <&dispcc_mdss_dptx0_pixel1_clk_src>; assigned-clock-parents = <&usb_1_ss0_qmpphy QMP_USB43DP_DP_LINK_CLK>, + <&usb_1_ss0_qmpphy QMP_USB43DP_DP_VCO_DIV_CLK>, <&usb_1_ss0_qmpphy QMP_USB43DP_DP_VCO_DIV_CLK>; operating-points-v2 = <&mdss_dp0_opp_table>; diff --git a/Documentation/devicetree/bindings/display/samsung/samsung,exynos7-decon.yaml b/Documentation/devicetree/bindings/display/samsung/samsung,exynos7-decon.yaml index 53916e4c95d8..14b954718008 100644 --- a/Documentation/devicetree/bindings/display/samsung/samsung,exynos7-decon.yaml +++ b/Documentation/devicetree/bindings/display/samsung/samsung,exynos7-decon.yaml @@ -80,6 +80,21 @@ properties: - const: vsync - const: lcd_sys + iommus: + maxItems: 1 + + memory-region: + maxItems: 1 + description: + A phandle to a node describing a reserved framebuffer memory region. + For example, the splash memory region set up by the bootloader. + + port: + $ref: /schemas/graph.yaml#/properties/port + description: + Output port which is connected to either a Mobile Image Compressor + (MIC) or a DSI Master device. + power-domains: maxItems: 1 @@ -92,6 +107,7 @@ required: - clock-names - interrupts - interrupt-names + - port - reg additionalProperties: false @@ -118,4 +134,9 @@ examples: "decon0_vclk"; pinctrl-0 = <&lcd_clk &pwm1_out>; pinctrl-names = "default"; + port { + decon_to_dsi: endpoint { + remote-endpoint = <&dsi_to_decon>; + }; + }; }; diff --git a/Documentation/gpu/i915.rst b/Documentation/gpu/i915.rst index 72932fa31b8d..eba09c3ddce4 100644 --- a/Documentation/gpu/i915.rst +++ b/Documentation/gpu/i915.rst @@ -358,8 +358,6 @@ Locking Guidelines #. All locking rules and interface contracts with cross-driver interfaces (dma-buf, dma_fence) need to be followed. -#. No struct_mutex anywhere in the code - #. dma_resv will be the outermost lock (when needed) and ww_acquire_ctx is to be hoisted at highest level and passed down within i915_gem_ctx in the call chain @@ -367,11 +365,6 @@ Locking Guidelines #. While holding lru/memory manager (buddy, drm_mm, whatever) locks system memory allocations are not allowed - * Enforce this by priming lockdep (with fs_reclaim). If we - allocate memory while holding these looks we get a rehash - of the shrinker vs. struct_mutex saga, and that would be - real bad. - #. Do not nest different lru/memory manager locks within each other. Take them in turn to update memory allocations, relying on the object’s dma_resv ww_mutex to serialize against other operations. diff --git a/Documentation/gpu/nova/core/todo.rst b/Documentation/gpu/nova/core/todo.rst index 894a1e9c3741..091a2fb78c6b 100644 --- a/Documentation/gpu/nova/core/todo.rst +++ b/Documentation/gpu/nova/core/todo.rst @@ -131,8 +131,6 @@ crate so it can be used by other components as well. Features desired before this happens: -* Relative register with build-time base address validation, -* Arrays of registers with build-time index validation, * Make I/O optional I/O (for field values that are not registers), * Support other sizes than `u32`, * Allow visibility control for registers and individual fields, @@ -232,23 +230,6 @@ Rust abstraction for debugfs APIs. GPU (general) ============= -Parse firmware headers ----------------------- - -Parse ELF headers from the firmware files loaded from the filesystem. - -| Reference: ELF utils -| Complexity: Beginner -| Contact: Abdiel Janulgue - -Build radix3 page table ------------------------ - -Build the radix3 page table to map the firmware. - -| Complexity: Intermediate -| Contact: Abdiel Janulgue - Initial Devinit support ----------------------- diff --git a/Documentation/gpu/todo.rst b/Documentation/gpu/todo.rst index 92db80793bba..b5f58b4274b1 100644 --- a/Documentation/gpu/todo.rst +++ b/Documentation/gpu/todo.rst @@ -173,31 +173,6 @@ Contact: Simona Vetter Level: Intermediate -Get rid of dev->struct_mutex from GEM drivers ---------------------------------------------- - -``dev->struct_mutex`` is the Big DRM Lock from legacy days and infested -everything. Nowadays in modern drivers the only bit where it's mandatory is -serializing GEM buffer object destruction. Which unfortunately means drivers -have to keep track of that lock and either call ``unreference`` or -``unreference_locked`` depending upon context. - -Core GEM doesn't have a need for ``struct_mutex`` any more since kernel 4.8, -and there's a GEM object ``free`` callback for any drivers which are -entirely ``struct_mutex`` free. - -For drivers that need ``struct_mutex`` it should be replaced with a driver- -private lock. The tricky part is the BO free functions, since those can't -reliably take that lock any more. Instead state needs to be protected with -suitable subordinate locks or some cleanup work pushed to a worker thread. For -performance-critical drivers it might also be better to go with a more -fine-grained per-buffer object and per-context lockings scheme. Currently only -the ``msm`` and `i915` drivers use ``struct_mutex``. - -Contact: Simona Vetter, respective driver maintainers - -Level: Advanced - Move Buffer Object Locking to dma_resv_lock() --------------------------------------------- diff --git a/MAINTAINERS b/MAINTAINERS index 5257d52679d6..838ae3c2b6fc 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -2086,6 +2086,19 @@ F: Documentation/devicetree/bindings/gpu/arm,mali-valhall-csf.yaml F: drivers/gpu/drm/panthor/ F: include/uapi/drm/panthor_drm.h +ARM MALI TYR DRM DRIVER +M: Daniel Almeida <daniel.almeida@collabora.com> +M: Alice Ryhl <aliceryhl@google.com> +L: dri-devel@lists.freedesktop.org +S: Supported +W: https://rust-for-linux.com/tyr-gpu-driver +W https://drm.pages.freedesktop.org/maintainer-tools/drm-rust.html +B: https://gitlab.freedesktop.org/panfrost/linux/-/issues +T: git https://gitlab.freedesktop.org/drm/rust/kernel.git +F: Documentation/devicetree/bindings/gpu/arm,mali-valhall-csf.yaml +F: drivers/gpu/drm/tyr/ +F: include/uapi/drm/panthor_drm.h + ARM MALI-DP DRM DRIVER M: Liviu Dudau <liviu.dudau@arm.com> S: Supported @@ -7237,7 +7250,7 @@ F: include/linux/dma-mapping.h F: include/linux/swiotlb.h F: kernel/dma/ -DMA MAPPING HELPERS DEVICE DRIVER API [RUST] +DMA MAPPING & SCATTERLIST API [RUST] M: Danilo Krummrich <dakr@kernel.org> R: Abdiel Janulgue <abdiel.janulgue@gmail.com> R: Daniel Almeida <daniel.almeida@collabora.com> @@ -7248,7 +7261,9 @@ S: Supported W: https://rust-for-linux.com T: git git://git.kernel.org/pub/scm/linux/kernel/git/driver-core/driver-core.git F: rust/helpers/dma.c +F: rust/helpers/scatterlist.c F: rust/kernel/dma.rs +F: rust/kernel/scatterlist.rs F: samples/rust/rust_dma.rs DMA-BUF HEAPS FRAMEWORK @@ -7838,6 +7853,7 @@ M: Danilo Krummrich <dakr@kernel.org> M: Alexandre Courbot <acourbot@nvidia.com> L: nouveau@lists.freedesktop.org S: Supported +W: https://rust-for-linux.com/nova-gpu-driver Q: https://patchwork.freedesktop.org/project/nouveau/ B: https://gitlab.freedesktop.org/drm/nova/-/issues C: irc://irc.oftc.net/nouveau @@ -7849,6 +7865,7 @@ DRM DRIVER FOR NVIDIA GPUS [RUST] M: Danilo Krummrich <dakr@kernel.org> L: nouveau@lists.freedesktop.org S: Supported +W: https://rust-for-linux.com/nova-gpu-driver Q: https://patchwork.freedesktop.org/project/nouveau/ B: https://gitlab.freedesktop.org/drm/nova/-/issues C: irc://irc.oftc.net/nouveau diff --git a/drivers/gpu/drm/Kconfig b/drivers/gpu/drm/Kconfig index f7ea8e895c0c..fda170730468 100644 --- a/drivers/gpu/drm/Kconfig +++ b/drivers/gpu/drm/Kconfig @@ -396,6 +396,8 @@ source "drivers/gpu/drm/sprd/Kconfig" source "drivers/gpu/drm/imagination/Kconfig" +source "drivers/gpu/drm/tyr/Kconfig" + config DRM_HYPERV tristate "DRM Support for Hyper-V synthetic video device" depends on DRM && PCI && HYPERV diff --git a/drivers/gpu/drm/Makefile b/drivers/gpu/drm/Makefile index 4dafbdc8f86a..4b2f7d794275 100644 --- a/drivers/gpu/drm/Makefile +++ b/drivers/gpu/drm/Makefile @@ -220,6 +220,7 @@ obj-$(CONFIG_DRM_VBOXVIDEO) += vboxvideo/ obj-$(CONFIG_DRM_LIMA) += lima/ obj-$(CONFIG_DRM_PANFROST) += panfrost/ obj-$(CONFIG_DRM_PANTHOR) += panthor/ +obj-$(CONFIG_DRM_TYR) += tyr/ obj-$(CONFIG_DRM_ASPEED_GFX) += aspeed/ obj-$(CONFIG_DRM_MCDE) += mcde/ obj-$(CONFIG_DRM_TIDSS) += tidss/ diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c index fbe7616555c8..a2879d2b7c8e 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c @@ -250,16 +250,24 @@ void amdgpu_amdkfd_interrupt(struct amdgpu_device *adev, void amdgpu_amdkfd_suspend(struct amdgpu_device *adev, bool suspend_proc) { - if (adev->kfd.dev) - kgd2kfd_suspend(adev->kfd.dev, suspend_proc); + if (adev->kfd.dev) { + if (adev->in_s0ix) + kgd2kfd_stop_sched_all_nodes(adev->kfd.dev); + else + kgd2kfd_suspend(adev->kfd.dev, suspend_proc); + } } int amdgpu_amdkfd_resume(struct amdgpu_device *adev, bool resume_proc) { int r = 0; - if (adev->kfd.dev) - r = kgd2kfd_resume(adev->kfd.dev, resume_proc); + if (adev->kfd.dev) { + if (adev->in_s0ix) + r = kgd2kfd_start_sched_all_nodes(adev->kfd.dev); + else + r = kgd2kfd_resume(adev->kfd.dev, resume_proc); + } return r; } diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h index 127927b16ee2..9e120c934cc1 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h @@ -428,7 +428,9 @@ void kgd2kfd_smi_event_throttle(struct kfd_dev *kfd, uint64_t throttle_bitmask); int kgd2kfd_check_and_lock_kfd(struct kfd_dev *kfd); void kgd2kfd_unlock_kfd(struct kfd_dev *kfd); int kgd2kfd_start_sched(struct kfd_dev *kfd, uint32_t node_id); +int kgd2kfd_start_sched_all_nodes(struct kfd_dev *kfd); int kgd2kfd_stop_sched(struct kfd_dev *kfd, uint32_t node_id); +int kgd2kfd_stop_sched_all_nodes(struct kfd_dev *kfd); bool kgd2kfd_compute_active(struct kfd_dev *kfd, uint32_t node_id); bool kgd2kfd_vmfault_fast_path(struct amdgpu_device *adev, struct amdgpu_iv_entry *entry, bool retry_fault); @@ -518,11 +520,21 @@ static inline int kgd2kfd_start_sched(struct kfd_dev *kfd, uint32_t node_id) return 0; } +static inline int kgd2kfd_start_sched_all_nodes(struct kfd_dev *kfd) +{ + return 0; +} + static inline int kgd2kfd_stop_sched(struct kfd_dev *kfd, uint32_t node_id) { return 0; } +static inline int kgd2kfd_stop_sched_all_nodes(struct kfd_dev *kfd) +{ + return 0; +} + static inline bool kgd2kfd_compute_active(struct kfd_dev *kfd, uint32_t node_id) { return false; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c index bdfb80377e6a..cbb9c77c3358 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c @@ -5196,7 +5196,7 @@ int amdgpu_device_suspend(struct drm_device *dev, bool notify_clients) adev->in_suspend = true; if (amdgpu_sriov_vf(adev)) { - if (!adev->in_s0ix && !adev->in_runpm) + if (!adev->in_runpm) amdgpu_amdkfd_suspend_process(adev); amdgpu_virt_fini_data_exchange(adev); r = amdgpu_virt_request_full_gpu(adev, false); @@ -5216,10 +5216,8 @@ int amdgpu_device_suspend(struct drm_device *dev, bool notify_clients) amdgpu_device_ip_suspend_phase1(adev); - if (!adev->in_s0ix) { - amdgpu_amdkfd_suspend(adev, !amdgpu_sriov_vf(adev) && !adev->in_runpm); - amdgpu_userq_suspend(adev); - } + amdgpu_amdkfd_suspend(adev, !amdgpu_sriov_vf(adev) && !adev->in_runpm); + amdgpu_userq_suspend(adev); r = amdgpu_device_evict_resources(adev); if (r) @@ -5314,15 +5312,13 @@ int amdgpu_device_resume(struct drm_device *dev, bool notify_clients) goto exit; } - if (!adev->in_s0ix) { - r = amdgpu_amdkfd_resume(adev, !amdgpu_sriov_vf(adev) && !adev->in_runpm); - if (r) - goto exit; + r = amdgpu_amdkfd_resume(adev, !amdgpu_sriov_vf(adev) && !adev->in_runpm); + if (r) + goto exit; - r = amdgpu_userq_resume(adev); - if (r) - goto exit; - } + r = amdgpu_userq_resume(adev); + if (r) + goto exit; r = amdgpu_device_ip_late_init(adev); if (r) @@ -5335,7 +5331,7 @@ exit: amdgpu_virt_init_data_exchange(adev); amdgpu_virt_release_full_gpu(adev, true); - if (!adev->in_s0ix && !r && !adev->in_runpm) + if (!r && !adev->in_runpm) r = amdgpu_amdkfd_resume_process(adev); } diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c index 3d9c045a8a64..c9554ac3b1d4 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c @@ -1654,6 +1654,21 @@ static int gfx_v11_0_sw_init(struct amdgpu_ip_block *ip_block) } } break; + case IP_VERSION(11, 0, 1): + case IP_VERSION(11, 0, 4): + adev->gfx.cleaner_shader_ptr = gfx_11_0_3_cleaner_shader_hex; + adev->gfx.cleaner_shader_size = sizeof(gfx_11_0_3_cleaner_shader_hex); + if (adev->gfx.pfp_fw_version >= 102 && + adev->gfx.mec_fw_version >= 66 && + adev->mes.fw_version[0] >= 128) { + adev->gfx.enable_cleaner_shader = true; + r = amdgpu_gfx_cleaner_shader_sw_init(adev, adev->gfx.cleaner_shader_size); + if (r) { + adev->gfx.enable_cleaner_shader = false; + dev_err(adev->dev, "Failed to initialize cleaner shader\n"); + } + } + break; case IP_VERSION(11, 5, 0): case IP_VERSION(11, 5, 1): adev->gfx.cleaner_shader_ptr = gfx_11_0_3_cleaner_shader_hex; diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_device.c b/drivers/gpu/drm/amd/amdkfd/kfd_device.c index 7e749f9b6d69..349c351e242b 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_device.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_device.c @@ -1550,6 +1550,25 @@ int kgd2kfd_start_sched(struct kfd_dev *kfd, uint32_t node_id) return ret; } +int kgd2kfd_start_sched_all_nodes(struct kfd_dev *kfd) +{ + struct kfd_node *node; + int i, r; + + if (!kfd->init_complete) + return 0; + + for (i = 0; i < kfd->num_nodes; i++) { + node = kfd->nodes[i]; + r = node->dqm->ops.unhalt(node->dqm); + if (r) { + dev_err(kfd_device, "Error in starting scheduler\n"); + return r; + } + } + return 0; +} + int kgd2kfd_stop_sched(struct kfd_dev *kfd, uint32_t node_id) { struct kfd_node *node; @@ -1567,6 +1586,23 @@ int kgd2kfd_stop_sched(struct kfd_dev *kfd, uint32_t node_id) return node->dqm->ops.halt(node->dqm); } +int kgd2kfd_stop_sched_all_nodes(struct kfd_dev *kfd) +{ + struct kfd_node *node; + int i, r; + + if (!kfd->init_complete) + return 0; + + for (i = 0; i < kfd->num_nodes; i++) { + node = kfd->nodes[i]; + r = node->dqm->ops.halt(node->dqm); + if (r) + return r; + } + return 0; +} + bool kgd2kfd_compute_active(struct kfd_dev *kfd, uint32_t node_id) { struct kfd_node *node; diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c index 62defeccbb5c..23358e088bdc 100644 --- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c +++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c @@ -8817,7 +8817,16 @@ static int amdgpu_dm_encoder_init(struct drm_device *dev, static void manage_dm_interrupts(struct amdgpu_device *adev, struct amdgpu_crtc *acrtc, struct dm_crtc_state *acrtc_state) -{ +{ /* + * We cannot be sure that the frontend index maps to the same + * backend index - some even map to more than one. + * So we have to go through the CRTC to find the right IRQ. + */ + int irq_type = amdgpu_display_crtc_idx_to_irq_type( + adev, + acrtc->crtc_id); + struct drm_device *dev = adev_to_drm(adev); + struct drm_vblank_crtc_config config = {0}; struct dc_crtc_timing *timing; int offdelay; @@ -8870,7 +8879,35 @@ static void manage_dm_interrupts(struct amdgpu_device *adev, drm_crtc_vblank_on_config(&acrtc->base, &config); + /* Allow RX6xxx, RX7700, RX7800 GPUs to call amdgpu_irq_get.*/ + switch (amdgpu_ip_version(adev, DCE_HWIP, 0)) { + case IP_VERSION(3, 0, 0): + case IP_VERSION(3, 0, 2): + case IP_VERSION(3, 0, 3): + case IP_VERSION(3, 2, 0): + if (amdgpu_irq_get(adev, &adev->pageflip_irq, irq_type)) + drm_err(dev, "DM_IRQ: Cannot get pageflip irq!\n"); +#if defined(CONFIG_DRM_AMD_SECURE_DISPLAY) + if (amdgpu_irq_get(adev, &adev->vline0_irq, irq_type)) + drm_err(dev, "DM_IRQ: Cannot get vline0 irq!\n"); +#endif + } + } else { + /* Allow RX6xxx, RX7700, RX7800 GPUs to call amdgpu_irq_put.*/ + switch (amdgpu_ip_version(adev, DCE_HWIP, 0)) { + case IP_VERSION(3, 0, 0): + case IP_VERSION(3, 0, 2): + case IP_VERSION(3, 0, 3): + case IP_VERSION(3, 2, 0): +#if defined(CONFIG_DRM_AMD_SECURE_DISPLAY) + if (amdgpu_irq_put(adev, &adev->vline0_irq, irq_type)) + drm_err(dev, "DM_IRQ: Cannot put vline0 irq!\n"); +#endif + if (amdgpu_irq_put(adev, &adev->pageflip_irq, irq_type)) + drm_err(dev, "DM_IRQ: Cannot put pageflip irq!\n"); + } + drm_crtc_vblank_off(&acrtc->base); } } diff --git a/drivers/gpu/drm/amd/pm/swsmu/amdgpu_smu.c b/drivers/gpu/drm/amd/pm/swsmu/amdgpu_smu.c index c5965924e7c6..2ab902b5db21 100644 --- a/drivers/gpu/drm/amd/pm/swsmu/amdgpu_smu.c +++ b/drivers/gpu/drm/amd/pm/swsmu/amdgpu_smu.c @@ -2237,7 +2237,7 @@ static int smu_resume(struct amdgpu_ip_block *ip_block) return ret; } - if (smu_dpm_ctx->dpm_level == AMD_DPM_FORCED_LEVEL_MANUAL) { + if (smu_dpm_ctx->dpm_level == AMD_DPM_FORCED_LEVEL_MANUAL && smu->od_enabled) { ret = smu_od_edit_dpm_table(smu, PP_OD_COMMIT_DPM_TABLE, NULL, 0); if (ret) return ret; diff --git a/drivers/gpu/drm/ast/ast_dp.c b/drivers/gpu/drm/ast/ast_dp.c index 19c04687b0fe..8e650a02c528 100644 --- a/drivers/gpu/drm/ast/ast_dp.c +++ b/drivers/gpu/drm/ast/ast_dp.c @@ -134,7 +134,7 @@ static int ast_astdp_read_edid_block(void *data, u8 *buf, unsigned int block, si * 3. The Delays are often longer a lot when system resume from S3/S4. */ if (j) - mdelay(j + 1); + msleep(j + 1); /* Wait for EDID offset to show up in mirror register */ vgacrd7 = ast_get_index_reg(ast, AST_IO_VGACRI, 0xd7); diff --git a/drivers/gpu/drm/bridge/analogix/anx7625.c b/drivers/gpu/drm/bridge/analogix/anx7625.c index 609cdb9d371e..6f3fdcb6afdb 100644 --- a/drivers/gpu/drm/bridge/analogix/anx7625.c +++ b/drivers/gpu/drm/bridge/analogix/anx7625.c @@ -2678,7 +2678,7 @@ static int anx7625_i2c_probe(struct i2c_client *client) ret = devm_request_threaded_irq(dev, platform->pdata.intp_irq, NULL, anx7625_intr_hpd_isr, IRQF_TRIGGER_FALLING | - IRQF_ONESHOT, + IRQF_ONESHOT | IRQF_NO_AUTOEN, "anx7625-intp", platform); if (ret) { DRM_DEV_ERROR(dev, "fail to request irq\n"); @@ -2747,8 +2747,10 @@ static int anx7625_i2c_probe(struct i2c_client *client) } /* Add work function */ - if (platform->pdata.intp_irq) + if (platform->pdata.intp_irq) { + enable_irq(platform->pdata.intp_irq); queue_work(platform->workqueue, &platform->work); + } if (platform->pdata.audio_en) anx7625_register_audio(dev, platform); diff --git a/drivers/gpu/drm/bridge/cadence/cdns-mhdp8546-core.c b/drivers/gpu/drm/bridge/cadence/cdns-mhdp8546-core.c index a614d1384f71..38726ae1bf15 100644 --- a/drivers/gpu/drm/bridge/cadence/cdns-mhdp8546-core.c +++ b/drivers/gpu/drm/bridge/cadence/cdns-mhdp8546-core.c @@ -1984,8 +1984,10 @@ static void cdns_mhdp_atomic_enable(struct drm_bridge *bridge, mhdp_state = to_cdns_mhdp_bridge_state(new_state); mhdp_state->current_mode = drm_mode_duplicate(bridge->dev, mode); - if (!mhdp_state->current_mode) - return; + if (!mhdp_state->current_mode) { + ret = -EINVAL; + goto out; + } drm_mode_set_name(mhdp_state->current_mode); diff --git a/drivers/gpu/drm/bridge/samsung-dsim.c b/drivers/gpu/drm/bridge/samsung-dsim.c index b5dd71f6a990..eabc4c32f6ab 100644 --- a/drivers/gpu/drm/bridge/samsung-dsim.c +++ b/drivers/gpu/drm/bridge/samsung-dsim.c @@ -31,11 +31,10 @@ /* returns true iff both arguments logically differs */ #define NEQV(a, b) (!(a) ^ !(b)) -/* DSIM_STATUS */ +/* DSIM_STATUS or DSIM_DPHY_STATUS */ #define DSIM_STOP_STATE_DAT(x) (((x) & 0xf) << 0) #define DSIM_STOP_STATE_CLK BIT(8) #define DSIM_TX_READY_HS_CLK BIT(10) -#define DSIM_PLL_STABLE BIT(31) /* DSIM_SWRST */ #define DSIM_FUNCRST BIT(16) @@ -46,17 +45,13 @@ #define DSIM_BTA_TIMEOUT(x) ((x) << 16) /* DSIM_CLKCTRL */ -#define DSIM_ESC_PRESCALER(x) (((x) & 0xffff) << 0) -#define DSIM_ESC_PRESCALER_MASK (0xffff << 0) -#define DSIM_LANE_ESC_CLK_EN_CLK BIT(19) -#define DSIM_LANE_ESC_CLK_EN_DATA(x) (((x) & 0xf) << 20) -#define DSIM_LANE_ESC_CLK_EN_DATA_MASK (0xf << 20) -#define DSIM_BYTE_CLKEN BIT(24) -#define DSIM_BYTE_CLK_SRC(x) (((x) & 0x3) << 25) -#define DSIM_BYTE_CLK_SRC_MASK (0x3 << 25) -#define DSIM_PLL_BYPASS BIT(27) -#define DSIM_ESC_CLKEN BIT(28) -#define DSIM_TX_REQUEST_HSCLK BIT(31) +#define DSIM_ESC_PRESCALER(x) (((x) & 0xffff) << 0) +#define DSIM_ESC_PRESCALER_MASK (0xffff << 0) +#define DSIM_LANE_ESC_CLK_EN_DATA(x, offset) (((x) & 0xf) << offset) +#define DSIM_LANE_ESC_CLK_EN_DATA_MASK(offset) (0xf << offset) +#define DSIM_BYTE_CLK_SRC(x) (((x) & 0x3) << 25) +#define DSIM_BYTE_CLK_SRC_MASK (0x3 << 25) +#define DSIM_PLL_BYPASS BIT(27) /* DSIM_CONFIG */ #define DSIM_LANE_EN_CLK BIT(0) @@ -91,7 +86,6 @@ */ #define DSIM_HSE_DISABLE_MODE BIT(23) #define DSIM_AUTO_MODE BIT(24) -#define DSIM_VIDEO_MODE BIT(25) #define DSIM_BURST_MODE BIT(26) #define DSIM_SYNC_INFORM BIT(27) #define DSIM_EOT_DISABLE BIT(28) @@ -129,9 +123,9 @@ #define DSIM_MAIN_HBP_MASK ((0xffff) << 0) /* DSIM_MSYNC */ -#define DSIM_MAIN_VSA(x) ((x) << 22) +#define DSIM_MAIN_VSA(x, offset) ((x) << offset) #define DSIM_MAIN_HSA(x) ((x) << 0) -#define DSIM_MAIN_VSA_MASK ((0x3ff) << 22) +#define DSIM_MAIN_VSA_MASK(offset) ((0x3ff) << offset) #define DSIM_MAIN_HSA_MASK ((0xffff) << 0) /* DSIM_SDRESOL */ @@ -157,6 +151,11 @@ #define DSIM_INT_RX_ECC_ERR BIT(15) #define DSIM_INT_RX_CRC_ERR BIT(14) +/* DSIM_SFRCTRL */ +#define DSIM_SFR_CTRL_STAND_BY BIT(4) +#define DSIM_SFR_CTRL_SHADOW_UPDATE BIT(1) +#define DSIM_SFR_CTRL_SHADOW_EN BIT(0) + /* DSIM_FIFOCTRL */ #define DSIM_RX_DATA_FULL BIT(25) #define DSIM_RX_DATA_EMPTY BIT(24) @@ -191,9 +190,7 @@ #define DSIM_PLL_DPDNSWAP_DAT (1 << 24) #define DSIM_FREQ_BAND(x) ((x) << 24) #define DSIM_PLL_EN BIT(23) -#define DSIM_PLL_P(x, offset) ((x) << (offset)) -#define DSIM_PLL_M(x) ((x) << 4) -#define DSIM_PLL_S(x) ((x) << 1) +#define DSIM_PLL(x, offset) ((x) << (offset)) /* DSIM_PHYCTRL */ #define DSIM_PHYCTRL_ULPS_EXIT(x) (((x) & 0x1ff) << 0) @@ -222,25 +219,42 @@ #define DSI_XFER_TIMEOUT_MS 100 #define DSI_RX_FIFO_EMPTY 0x30800002 -#define OLD_SCLK_MIPI_CLK_NAME "pll_clk" - #define PS_TO_CYCLE(ps, hz) DIV64_U64_ROUND_CLOSEST(((ps) * (hz)), 1000000000000ULL) -static const char *const clk_names[5] = { - "bus_clk", - "sclk_mipi", - "phyclk_mipidphy0_bitclkdiv8", - "phyclk_mipidphy0_rxclkesc0", - "sclk_rgb_vclk_to_dsim0" -}; - enum samsung_dsim_transfer_type { EXYNOS_DSI_TX, EXYNOS_DSI_RX, }; +static struct clk_bulk_data exynos3_clk_bulk_data[] = { + { .id = "bus_clk" }, + { .id = "pll_clk" }, +}; + +static struct clk_bulk_data exynos4_clk_bulk_data[] = { + { .id = "bus_clk" }, + { .id = "sclk_mipi" }, +}; + +static struct clk_bulk_data exynos5433_clk_bulk_data[] = { + { .id = "bus_clk" }, + { .id = "sclk_mipi" }, + { .id = "phyclk_mipidphy0_bitclkdiv8" }, + { .id = "phyclk_mipidphy0_rxclkesc0" }, + { .id = "sclk_rgb_vclk_to_dsim0" }, +}; + +static struct clk_bulk_data exynos7870_clk_bulk_data[] = { + { .id = "bus" }, + { .id = "pll" }, + { .id = "byte" }, + { .id = "esc" }, +}; + enum reg_idx { - DSIM_STATUS_REG, /* Status register */ + DSIM_STATUS_REG, /* Status register (legacy) */ + DSIM_LINK_STATUS_REG, /* Link status register */ + DSIM_DPHY_STATUS_REG, /* D-PHY status register */ DSIM_SWRST_REG, /* Software reset register */ DSIM_CLKCTRL_REG, /* Clock control register */ DSIM_TIMEOUT_REG, /* Time out register */ @@ -255,6 +269,7 @@ enum reg_idx { DSIM_PKTHDR_REG, /* Packet Header FIFO register */ DSIM_PAYLOAD_REG, /* Payload FIFO register */ DSIM_RXFIFO_REG, /* Read FIFO register */ + DSIM_SFRCTRL_REG, /* SFR standby and shadow control register */ DSIM_FIFOCTRL_REG, /* FIFO status and control register */ DSIM_PLLCTRL_REG, /* PLL control register */ DSIM_PHYCTRL_REG, @@ -312,6 +327,32 @@ static const unsigned int exynos5433_reg_ofs[] = { [DSIM_PHYTIMING2_REG] = 0xBC, }; +static const unsigned int exynos7870_reg_ofs[] = { + [DSIM_LINK_STATUS_REG] = 0x04, + [DSIM_DPHY_STATUS_REG] = 0x08, + [DSIM_SWRST_REG] = 0x0C, + [DSIM_CLKCTRL_REG] = 0x10, + [DSIM_TIMEOUT_REG] = 0x14, + [DSIM_ESCMODE_REG] = 0x1C, + [DSIM_MDRESOL_REG] = 0x20, + [DSIM_MVPORCH_REG] = 0x24, + [DSIM_MHPORCH_REG] = 0x28, + [DSIM_MSYNC_REG] = 0x2C, + [DSIM_CONFIG_REG] = 0x30, + [DSIM_INTSRC_REG] = 0x34, + [DSIM_INTMSK_REG] = 0x38, + [DSIM_PKTHDR_REG] = 0x3C, + [DSIM_PAYLOAD_REG] = 0x40, + [DSIM_RXFIFO_REG] = 0x44, + [DSIM_SFRCTRL_REG] = 0x48, + [DSIM_FIFOCTRL_REG] = 0x4C, + [DSIM_PLLCTRL_REG] = 0x94, + [DSIM_PHYCTRL_REG] = 0xA4, + [DSIM_PHYTIMING_REG] = 0xB4, + [DSIM_PHYTIMING1_REG] = 0xB8, + [DSIM_PHYTIMING2_REG] = 0xBC, +}; + enum reg_value_idx { RESET_TYPE, PLL_TIMER, @@ -384,6 +425,24 @@ static const unsigned int exynos5433_reg_values[] = { [PHYTIMING_HS_TRAIL] = DSIM_PHYTIMING2_HS_TRAIL(0x0c), }; +static const unsigned int exynos7870_reg_values[] = { + [RESET_TYPE] = DSIM_SWRST, + [PLL_TIMER] = 80000, + [STOP_STATE_CNT] = 0xa, + [PHYCTRL_ULPS_EXIT] = DSIM_PHYCTRL_ULPS_EXIT(0x177), + [PHYCTRL_VREG_LP] = 0, + [PHYCTRL_SLEW_UP] = 0, + [PHYTIMING_LPX] = DSIM_PHYTIMING_LPX(0x07), + [PHYTIMING_HS_EXIT] = DSIM_PHYTIMING_HS_EXIT(0x0c), + [PHYTIMING_CLK_PREPARE] = DSIM_PHYTIMING1_CLK_PREPARE(0x08), + [PHYTIMING_CLK_ZERO] = DSIM_PHYTIMING1_CLK_ZERO(0x2b), + [PHYTIMING_CLK_POST] = DSIM_PHYTIMING1_CLK_POST(0x0d), + [PHYTIMING_CLK_TRAIL] = DSIM_PHYTIMING1_CLK_TRAIL(0x09), + [PHYTIMING_HS_PREPARE] = DSIM_PHYTIMING2_HS_PREPARE(0x09), + [PHYTIMING_HS_ZERO] = DSIM_PHYTIMING2_HS_ZERO(0x0f), + [PHYTIMING_HS_TRAIL] = DSIM_PHYTIMING2_HS_TRAIL(0x0c), +}; + static const unsigned int imx8mm_dsim_reg_values[] = { [RESET_TYPE] = DSIM_SWRST, [PLL_TIMER] = 500, @@ -405,13 +464,26 @@ static const unsigned int imx8mm_dsim_reg_values[] = { static const struct samsung_dsim_driver_data exynos3_dsi_driver_data = { .reg_ofs = exynos_reg_ofs, .plltmr_reg = 0x50, + .has_legacy_status_reg = 1, .has_freqband = 1, .has_clklane_stop = 1, - .num_clks = 2, + .clk_data = exynos3_clk_bulk_data, + .num_clks = ARRAY_SIZE(exynos3_clk_bulk_data), .max_freq = 1000, + .wait_for_hdr_fifo = 1, .wait_for_reset = 1, .num_bits_resol = 11, + .video_mode_bit = 25, + .pll_stable_bit = 31, + .esc_clken_bit = 28, + .byte_clken_bit = 24, + .tx_req_hsclk_bit = 31, + .lane_esc_clk_bit = 19, + .lane_esc_data_offset = 20, .pll_p_offset = 13, + .pll_m_offset = 4, + .pll_s_offset = 1, + .main_vsa_offset = 22, .reg_values = reg_values, .pll_fin_min = 6, .pll_fin_max = 12, @@ -424,13 +496,26 @@ static const struct samsung_dsim_driver_data exynos3_dsi_driver_data = { static const struct samsung_dsim_driver_data exynos4_dsi_driver_data = { .reg_ofs = exynos_reg_ofs, .plltmr_reg = 0x50, + .has_legacy_status_reg = 1, .has_freqband = 1, .has_clklane_stop = 1, - .num_clks = 2, + .clk_data = exynos4_clk_bulk_data, + .num_clks = ARRAY_SIZE(exynos4_clk_bulk_data), .max_freq = 1000, + .wait_for_hdr_fifo = 1, .wait_for_reset = 1, .num_bits_resol = 11, + .video_mode_bit = 25, + .pll_stable_bit = 31, + .esc_clken_bit = 28, + .byte_clken_bit = 24, + .tx_req_hsclk_bit = 31, + .lane_esc_clk_bit = 19, + .lane_esc_data_offset = 20, .pll_p_offset = 13, + .pll_m_offset = 4, + .pll_s_offset = 1, + .main_vsa_offset = 22, .reg_values = reg_values, .pll_fin_min = 6, .pll_fin_max = 12, @@ -443,11 +528,24 @@ static const struct samsung_dsim_driver_data exynos4_dsi_driver_data = { static const struct samsung_dsim_driver_data exynos5_dsi_driver_data = { .reg_ofs = exynos_reg_ofs, .plltmr_reg = 0x58, - .num_clks = 2, + .has_legacy_status_reg = 1, + .clk_data = exynos3_clk_bulk_data, + .num_clks = ARRAY_SIZE(exynos3_clk_bulk_data), .max_freq = 1000, + .wait_for_hdr_fifo = 1, .wait_for_reset = 1, .num_bits_resol = 11, + .video_mode_bit = 25, + .pll_stable_bit = 31, + .esc_clken_bit = 28, + .byte_clken_bit = 24, + .tx_req_hsclk_bit = 31, + .lane_esc_clk_bit = 19, + .lane_esc_data_offset = 20, .pll_p_offset = 13, + .pll_m_offset = 4, + .pll_s_offset = 1, + .main_vsa_offset = 22, .reg_values = reg_values, .pll_fin_min = 6, .pll_fin_max = 12, @@ -459,12 +557,25 @@ static const struct samsung_dsim_driver_data exynos5_dsi_driver_data = { static const struct samsung_dsim_driver_data exynos5433_dsi_driver_data = { .reg_ofs = exynos5433_reg_ofs, .plltmr_reg = 0xa0, + .has_legacy_status_reg = 1, .has_clklane_stop = 1, - .num_clks = 5, + .clk_data = exynos5433_clk_bulk_data, + .num_clks = ARRAY_SIZE(exynos5433_clk_bulk_data), .max_freq = 1500, + .wait_for_hdr_fifo = 1, .wait_for_reset = 0, .num_bits_resol = 12, + .video_mode_bit = 25, + .pll_stable_bit = 31, + .esc_clken_bit = 28, + .byte_clken_bit = 24, + .tx_req_hsclk_bit = 31, + .lane_esc_clk_bit = 19, + .lane_esc_data_offset = 20, .pll_p_offset = 13, + .pll_m_offset = 4, + .pll_s_offset = 1, + .main_vsa_offset = 22, .reg_values = exynos5433_reg_values, .pll_fin_min = 6, .pll_fin_max = 12, @@ -476,12 +587,25 @@ static const struct samsung_dsim_driver_data exynos5433_dsi_driver_data = { static const struct samsung_dsim_driver_data exynos5422_dsi_driver_data = { .reg_ofs = exynos5433_reg_ofs, .plltmr_reg = 0xa0, + .has_legacy_status_reg = 1, .has_clklane_stop = 1, - .num_clks = 2, + .clk_data = exynos3_clk_bulk_data, + .num_clks = ARRAY_SIZE(exynos3_clk_bulk_data), .max_freq = 1500, + .wait_for_hdr_fifo = 1, .wait_for_reset = 1, .num_bits_resol = 12, + .video_mode_bit = 25, + .pll_stable_bit = 31, + .esc_clken_bit = 28, + .byte_clken_bit = 24, + .tx_req_hsclk_bit = 31, + .lane_esc_clk_bit = 19, + .lane_esc_data_offset = 20, .pll_p_offset = 13, + .pll_m_offset = 4, + .pll_s_offset = 1, + .main_vsa_offset = 22, .reg_values = exynos5422_reg_values, .pll_fin_min = 6, .pll_fin_max = 12, @@ -490,19 +614,62 @@ static const struct samsung_dsim_driver_data exynos5422_dsi_driver_data = { .min_freq = 500, }; +static const struct samsung_dsim_driver_data exynos7870_dsi_driver_data = { + .reg_ofs = exynos7870_reg_ofs, + .plltmr_reg = 0xa0, + .has_clklane_stop = 1, + .has_sfrctrl = 1, + .clk_data = exynos7870_clk_bulk_data, + .num_clks = ARRAY_SIZE(exynos7870_clk_bulk_data), + .max_freq = 1500, + .wait_for_hdr_fifo = 0, + .wait_for_reset = 1, + .num_bits_resol = 12, + .video_mode_bit = 18, + .pll_stable_bit = 24, + .esc_clken_bit = 16, + .byte_clken_bit = 17, + .tx_req_hsclk_bit = 20, + .lane_esc_clk_bit = 8, + .lane_esc_data_offset = 9, + .pll_p_offset = 13, + .pll_m_offset = 3, + .pll_s_offset = 0, + .main_vsa_offset = 16, + .reg_values = exynos7870_reg_values, + .pll_fin_min = 6, + .pll_fin_max = 12, + .m_min = 41, + .m_max = 125, + .min_freq = 500, +}; + static const struct samsung_dsim_driver_data imx8mm_dsi_driver_data = { .reg_ofs = exynos5433_reg_ofs, .plltmr_reg = 0xa0, + .has_legacy_status_reg = 1, .has_clklane_stop = 1, - .num_clks = 2, + .clk_data = exynos4_clk_bulk_data, + .num_clks = ARRAY_SIZE(exynos4_clk_bulk_data), .max_freq = 2100, + .wait_for_hdr_fifo = 1, .wait_for_reset = 0, .num_bits_resol = 12, + .video_mode_bit = 25, + .pll_stable_bit = 31, + .esc_clken_bit = 28, + .byte_clken_bit = 24, + .tx_req_hsclk_bit = 31, + .lane_esc_clk_bit = 19, + .lane_esc_data_offset = 20, /* * Unlike Exynos, PLL_P(PMS_P) offset 14 is used in i.MX8M Mini/Nano/Plus * downstream driver - drivers/gpu/drm/bridge/sec-dsim.c */ .pll_p_offset = 14, + .pll_m_offset = 4, + .pll_s_offset = 1, + .main_vsa_offset = 22, .reg_values = imx8mm_dsim_reg_values, .pll_fin_min = 2, .pll_fin_max = 30, @@ -518,6 +685,7 @@ samsung_dsim_types[DSIM_TYPE_COUNT] = { [DSIM_TYPE_EXYNOS5410] = &exynos5_dsi_driver_data, [DSIM_TYPE_EXYNOS5422] = &exynos5422_dsi_driver_data, [DSIM_TYPE_EXYNOS5433] = &exynos5433_dsi_driver_data, + [DSIM_TYPE_EXYNOS7870] = &exynos7870_dsi_driver_data, [DSIM_TYPE_IMX8MM] = &imx8mm_dsi_driver_data, [DSIM_TYPE_IMX8MP] = &imx8mm_dsi_driver_data, }; @@ -653,8 +821,9 @@ static unsigned long samsung_dsim_set_pll(struct samsung_dsim *dsi, writel(driver_data->reg_values[PLL_TIMER], dsi->reg_base + driver_data->plltmr_reg); - reg = DSIM_PLL_EN | DSIM_PLL_P(p, driver_data->pll_p_offset) | - DSIM_PLL_M(m) | DSIM_PLL_S(s); + reg = DSIM_PLL_EN | DSIM_PLL(p, driver_data->pll_p_offset) + | DSIM_PLL(m, driver_data->pll_m_offset) + | DSIM_PLL(s, driver_data->pll_s_offset); if (driver_data->has_freqband) { static const unsigned long freq_bands[] = { @@ -682,14 +851,17 @@ static unsigned long samsung_dsim_set_pll(struct samsung_dsim *dsi, samsung_dsim_write(dsi, DSIM_PLLCTRL_REG, reg); - timeout = 1000; + timeout = 3000; do { if (timeout-- == 0) { dev_err(dsi->dev, "PLL failed to stabilize\n"); return 0; } - reg = samsung_dsim_read(dsi, DSIM_STATUS_REG); - } while ((reg & DSIM_PLL_STABLE) == 0); + if (driver_data->has_legacy_status_reg) + reg = samsung_dsim_read(dsi, DSIM_STATUS_REG); + else + reg = samsung_dsim_read(dsi, DSIM_LINK_STATUS_REG); + } while ((reg & BIT(driver_data->pll_stable_bit)) == 0); dsi->hs_clock = fout; @@ -698,6 +870,7 @@ static unsigned long samsung_dsim_set_pll(struct samsung_dsim *dsi, static int samsung_dsim_enable_clock(struct samsung_dsim *dsi) { + const struct samsung_dsim_driver_data *driver_data = dsi->driver_data; unsigned long hs_clk, byte_clk, esc_clk, pix_clk; unsigned long esc_div; u32 reg; @@ -731,15 +904,17 @@ static int samsung_dsim_enable_clock(struct samsung_dsim *dsi) hs_clk, byte_clk, esc_clk); reg = samsung_dsim_read(dsi, DSIM_CLKCTRL_REG); - reg &= ~(DSIM_ESC_PRESCALER_MASK | DSIM_LANE_ESC_CLK_EN_CLK - | DSIM_LANE_ESC_CLK_EN_DATA_MASK | DSIM_PLL_BYPASS - | DSIM_BYTE_CLK_SRC_MASK); - reg |= DSIM_ESC_CLKEN | DSIM_BYTE_CLKEN - | DSIM_ESC_PRESCALER(esc_div) - | DSIM_LANE_ESC_CLK_EN_CLK - | DSIM_LANE_ESC_CLK_EN_DATA(BIT(dsi->lanes) - 1) - | DSIM_BYTE_CLK_SRC(0) - | DSIM_TX_REQUEST_HSCLK; + reg &= ~(DSIM_ESC_PRESCALER_MASK | BIT(driver_data->lane_esc_clk_bit) + | DSIM_LANE_ESC_CLK_EN_DATA_MASK(driver_data->lane_esc_data_offset) + | DSIM_PLL_BYPASS + | DSIM_BYTE_CLK_SRC_MASK); + reg |= BIT(driver_data->esc_clken_bit) | BIT(driver_data->byte_clken_bit) + | DSIM_ESC_PRESCALER(esc_div) + | BIT(driver_data->lane_esc_clk_bit) + | DSIM_LANE_ESC_CLK_EN_DATA(BIT(dsi->lanes) - 1, + driver_data->lane_esc_data_offset) + | DSIM_BYTE_CLK_SRC(0) + | BIT(driver_data->tx_req_hsclk_bit); samsung_dsim_write(dsi, DSIM_CLKCTRL_REG, reg); return 0; @@ -843,11 +1018,14 @@ static void samsung_dsim_set_phy_ctrl(struct samsung_dsim *dsi) static void samsung_dsim_disable_clock(struct samsung_dsim *dsi) { + const struct samsung_dsim_driver_data *driver_data = dsi->driver_data; u32 reg; reg = samsung_dsim_read(dsi, DSIM_CLKCTRL_REG); - reg &= ~(DSIM_LANE_ESC_CLK_EN_CLK | DSIM_LANE_ESC_CLK_EN_DATA_MASK - | DSIM_ESC_CLKEN | DSIM_BYTE_CLKEN); + reg &= ~(BIT(driver_data->lane_esc_clk_bit) + | DSIM_LANE_ESC_CLK_EN_DATA_MASK(driver_data->lane_esc_data_offset) + | BIT(driver_data->esc_clken_bit) + | BIT(driver_data->byte_clken_bit)); samsung_dsim_write(dsi, DSIM_CLKCTRL_REG, reg); reg = samsung_dsim_read(dsi, DSIM_PLLCTRL_REG); @@ -891,7 +1069,7 @@ static int samsung_dsim_init_link(struct samsung_dsim *dsi) * mode, otherwise it will support command mode. */ if (dsi->mode_flags & MIPI_DSI_MODE_VIDEO) { - reg |= DSIM_VIDEO_MODE; + reg |= BIT(driver_data->video_mode_bit); /* * The user manual describes that following bits are ignored in @@ -962,7 +1140,10 @@ static int samsung_dsim_init_link(struct samsung_dsim *dsi) return -EFAULT; } - reg = samsung_dsim_read(dsi, DSIM_STATUS_REG); + if (driver_data->has_legacy_status_reg) + reg = samsung_dsim_read(dsi, DSIM_STATUS_REG); + else + reg = samsung_dsim_read(dsi, DSIM_DPHY_STATUS_REG); if ((reg & DSIM_STOP_STATE_DAT(lanes_mask)) != DSIM_STOP_STATE_DAT(lanes_mask)) continue; @@ -983,6 +1164,7 @@ static void samsung_dsim_set_display_mode(struct samsung_dsim *dsi) { struct drm_display_mode *m = &dsi->mode; unsigned int num_bits_resol = dsi->driver_data->num_bits_resol; + unsigned int main_vsa_offset = dsi->driver_data->main_vsa_offset; u32 reg; if (dsi->mode_flags & MIPI_DSI_MODE_VIDEO) { @@ -1009,7 +1191,7 @@ static void samsung_dsim_set_display_mode(struct samsung_dsim *dsi) reg = DSIM_MAIN_HFP(hfp) | DSIM_MAIN_HBP(hbp); samsung_dsim_write(dsi, DSIM_MHPORCH_REG, reg); - reg = DSIM_MAIN_VSA(m->vsync_end - m->vsync_start) + reg = DSIM_MAIN_VSA(m->vsync_end - m->vsync_start, main_vsa_offset) | DSIM_MAIN_HSA(hsa); samsung_dsim_write(dsi, DSIM_MSYNC_REG, reg); } @@ -1023,6 +1205,7 @@ static void samsung_dsim_set_display_mode(struct samsung_dsim *dsi) static void samsung_dsim_set_display_enable(struct samsung_dsim *dsi, bool enable) { + const struct samsung_dsim_driver_data *driver_data = dsi->driver_data; u32 reg; reg = samsung_dsim_read(dsi, DSIM_MDRESOL_REG); @@ -1031,6 +1214,15 @@ static void samsung_dsim_set_display_enable(struct samsung_dsim *dsi, bool enabl else reg &= ~DSIM_MAIN_STAND_BY; samsung_dsim_write(dsi, DSIM_MDRESOL_REG, reg); + + if (driver_data->has_sfrctrl) { + reg = samsung_dsim_read(dsi, DSIM_SFRCTRL_REG); + if (enable) + reg |= DSIM_SFR_CTRL_STAND_BY; + else + reg &= ~DSIM_SFR_CTRL_STAND_BY; + samsung_dsim_write(dsi, DSIM_SFRCTRL_REG, reg); + } } static int samsung_dsim_wait_for_hdr_fifo(struct samsung_dsim *dsi) @@ -1087,6 +1279,7 @@ static void samsung_dsim_send_to_fifo(struct samsung_dsim *dsi, { struct device *dev = dsi->dev; struct mipi_dsi_packet *pkt = &xfer->packet; + const struct samsung_dsim_driver_data *driver_data = dsi->driver_data; const u8 *payload = pkt->payload + xfer->tx_done; u16 length = pkt->payload_length - xfer->tx_done; bool first = !xfer->tx_done; @@ -1127,9 +1320,11 @@ static void samsung_dsim_send_to_fifo(struct samsung_dsim *dsi, return; reg = get_unaligned_le32(pkt->header); - if (samsung_dsim_wait_for_hdr_fifo(dsi)) { - dev_err(dev, "waiting for header FIFO timed out\n"); - return; + if (driver_data->wait_for_hdr_fifo) { + if (samsung_dsim_wait_for_hdr_fifo(dsi)) { + dev_err(dev, "waiting for header FIFO timed out\n"); + return; + } } if (NEQV(xfer->flags & MIPI_DSI_MSG_USE_LPM, @@ -1922,7 +2117,7 @@ int samsung_dsim_probe(struct platform_device *pdev) { struct device *dev = &pdev->dev; struct samsung_dsim *dsi; - int ret, i; + int ret; dsi = devm_drm_bridge_alloc(dev, struct samsung_dsim, bridge, &samsung_dsim_bridge_funcs); if (IS_ERR(dsi)) @@ -1946,23 +2141,11 @@ int samsung_dsim_probe(struct platform_device *pdev) if (ret) return dev_err_probe(dev, ret, "failed to get regulators\n"); - dsi->clks = devm_kcalloc(dev, dsi->driver_data->num_clks, - sizeof(*dsi->clks), GFP_KERNEL); - if (!dsi->clks) - return -ENOMEM; - - for (i = 0; i < dsi->driver_data->num_clks; i++) { - dsi->clks[i] = devm_clk_get(dev, clk_names[i]); - if (IS_ERR(dsi->clks[i])) { - if (strcmp(clk_names[i], "sclk_mipi") == 0) { - dsi->clks[i] = devm_clk_get(dev, OLD_SCLK_MIPI_CLK_NAME); - if (!IS_ERR(dsi->clks[i])) - continue; - } - - dev_info(dev, "failed to get the clock: %s\n", clk_names[i]); - return PTR_ERR(dsi->clks[i]); - } + ret = devm_clk_bulk_get(dev, dsi->driver_data->num_clks, + dsi->driver_data->clk_data); + if (ret) { + dev_err(dev, "failed to get clocks in bulk (%d)\n", ret); + return ret; } dsi->reg_base = devm_platform_ioremap_resource(pdev, 0); @@ -2035,7 +2218,7 @@ static int samsung_dsim_suspend(struct device *dev) { struct samsung_dsim *dsi = dev_get_drvdata(dev); const struct samsung_dsim_driver_data *driver_data = dsi->driver_data; - int ret, i; + int ret; usleep_range(10000, 20000); @@ -2051,8 +2234,7 @@ static int samsung_dsim_suspend(struct device *dev) phy_power_off(dsi->phy); - for (i = driver_data->num_clks - 1; i > -1; i--) - clk_disable_unprepare(dsi->clks[i]); + clk_bulk_disable_unprepare(driver_data->num_clks, driver_data->clk_data); ret = regulator_bulk_disable(ARRAY_SIZE(dsi->supplies), dsi->supplies); if (ret < 0) @@ -2065,7 +2247,7 @@ static int samsung_dsim_resume(struct device *dev) { struct samsung_dsim *dsi = dev_get_drvdata(dev); const struct samsung_dsim_driver_data *driver_data = dsi->driver_data; - int ret, i; + int ret; ret = regulator_bulk_enable(ARRAY_SIZE(dsi->supplies), dsi->supplies); if (ret < 0) { @@ -2073,11 +2255,9 @@ static int samsung_dsim_resume(struct device *dev) return ret; } - for (i = 0; i < driver_data->num_clks; i++) { - ret = clk_prepare_enable(dsi->clks[i]); - if (ret < 0) - goto err_clk; - } + ret = clk_bulk_prepare_enable(driver_data->num_clks, driver_data->clk_data); + if (ret < 0) + goto err_clk; ret = phy_power_on(dsi->phy); if (ret < 0) { @@ -2088,8 +2268,7 @@ static int samsung_dsim_resume(struct device *dev) return 0; err_clk: - while (--i > -1) - clk_disable_unprepare(dsi->clks[i]); + clk_bulk_disable_unprepare(driver_data->num_clks, driver_data->clk_data); regulator_bulk_disable(ARRAY_SIZE(dsi->supplies), dsi->supplies); return ret; diff --git a/drivers/gpu/drm/drm_drv.c b/drivers/gpu/drm/drm_drv.c index 0ac723a46a91..8e3cb08241c8 100644 --- a/drivers/gpu/drm/drm_drv.c +++ b/drivers/gpu/drm/drm_drv.c @@ -696,7 +696,6 @@ static void drm_dev_init_release(struct drm_device *dev, void *res) mutex_destroy(&dev->master_mutex); mutex_destroy(&dev->clientlist_mutex); mutex_destroy(&dev->filelist_mutex); - mutex_destroy(&dev->struct_mutex); } static int drm_dev_init(struct drm_device *dev, @@ -737,7 +736,6 @@ static int drm_dev_init(struct drm_device *dev, INIT_LIST_HEAD(&dev->vblank_event_list); spin_lock_init(&dev->event_lock); - mutex_init(&dev->struct_mutex); mutex_init(&dev->filelist_mutex); mutex_init(&dev->clientlist_mutex); mutex_init(&dev->master_mutex); diff --git a/drivers/gpu/drm/drm_gpuvm.c b/drivers/gpu/drm/drm_gpuvm.c index a52e95555549..af63f4d00315 100644 --- a/drivers/gpu/drm/drm_gpuvm.c +++ b/drivers/gpu/drm/drm_gpuvm.c @@ -2533,8 +2533,6 @@ static const struct drm_gpuvm_ops lock_ops = { * * The expected usage is:: * - * .. code-block:: c - * * vm_bind { * struct drm_exec exec; * diff --git a/drivers/gpu/drm/exynos/exynos7_drm_decon.c b/drivers/gpu/drm/exynos/exynos7_drm_decon.c index 805aa28c1723..b8d9b7251319 100644 --- a/drivers/gpu/drm/exynos/exynos7_drm_decon.c +++ b/drivers/gpu/drm/exynos/exynos7_drm_decon.c @@ -69,7 +69,6 @@ struct decon_context { void __iomem *regs; unsigned long irq_flags; bool i80_if; - bool suspended; wait_queue_head_t wait_vsync_queue; atomic_t wait_vsync_event; @@ -132,9 +131,6 @@ static void decon_shadow_protect_win(struct decon_context *ctx, static void decon_wait_for_vblank(struct decon_context *ctx) { - if (ctx->suspended) - return; - atomic_set(&ctx->wait_vsync_event, 1); /* @@ -210,9 +206,6 @@ static void decon_commit(struct exynos_drm_crtc *crtc) struct drm_display_mode *mode = &crtc->base.state->adjusted_mode; u32 val, clkdiv; - if (ctx->suspended) - return; - /* nothing to do if we haven't set the mode yet */ if (mode->htotal == 0 || mode->vtotal == 0) return; @@ -274,9 +267,6 @@ static int decon_enable_vblank(struct exynos_drm_crtc *crtc) struct decon_context *ctx = crtc->ctx; u32 val; - if (ctx->suspended) - return -EPERM; - if (!test_and_set_bit(0, &ctx->irq_flags)) { val = readl(ctx->regs + VIDINTCON0); @@ -299,9 +289,6 @@ static void decon_disable_vblank(struct exynos_drm_crtc *crtc) struct decon_context *ctx = crtc->ctx; u32 val; - if (ctx->suspended) - return; - if (test_and_clear_bit(0, &ctx->irq_flags)) { val = readl(ctx->regs + VIDINTCON0); @@ -404,9 +391,6 @@ static void decon_atomic_begin(struct exynos_drm_crtc *crtc) struct decon_context *ctx = crtc->ctx; int i; - if (ctx->suspended) - return; - for (i = 0; i < WINDOWS_NR; i++) decon_shadow_protect_win(ctx, i, true); } @@ -427,9 +411,6 @@ static void decon_update_plane(struct exynos_drm_crtc *crtc, unsigned int pitch = fb->pitches[0]; unsigned int vidw_addr0_base = ctx->data->vidw_buf_start_base; - if (ctx->suspended) - return; - /* * SHADOWCON/PRTCON register is used for enabling timing. * @@ -517,9 +498,6 @@ static void decon_disable_plane(struct exynos_drm_crtc *crtc, unsigned int win = plane->index; u32 val; - if (ctx->suspended) - return; - /* protect windows */ decon_shadow_protect_win(ctx, win, true); @@ -538,9 +516,6 @@ static void decon_atomic_flush(struct exynos_drm_crtc *crtc) struct decon_context *ctx = crtc->ctx; int i; - if (ctx->suspended) - return; - for (i = 0; i < WINDOWS_NR; i++) decon_shadow_protect_win(ctx, i, false); exynos_crtc_handle_event(crtc); @@ -568,9 +543,6 @@ static void decon_atomic_enable(struct exynos_drm_crtc *crtc) struct decon_context *ctx = crtc->ctx; int ret; - if (!ctx->suspended) - return; - ret = pm_runtime_resume_and_get(ctx->dev); if (ret < 0) { DRM_DEV_ERROR(ctx->dev, "failed to enable DECON device.\n"); @@ -584,8 +556,6 @@ static void decon_atomic_enable(struct exynos_drm_crtc *crtc) decon_enable_vblank(ctx->crtc); decon_commit(ctx->crtc); - - ctx->suspended = false; } static void decon_atomic_disable(struct exynos_drm_crtc *crtc) @@ -593,9 +563,6 @@ static void decon_atomic_disable(struct exynos_drm_crtc *crtc) struct decon_context *ctx = crtc->ctx; int i; - if (ctx->suspended) - return; - /* * We need to make sure that all windows are disabled before we * suspend that connector. Otherwise we might try to scan from @@ -605,8 +572,6 @@ static void decon_atomic_disable(struct exynos_drm_crtc *crtc) decon_disable_plane(crtc, &ctx->planes[i]); pm_runtime_put_sync(ctx->dev); - - ctx->suspended = true; } static const struct exynos_drm_crtc_ops decon_crtc_ops = { @@ -727,7 +692,6 @@ static int decon_probe(struct platform_device *pdev) return -ENOMEM; ctx->dev = dev; - ctx->suspended = true; ctx->data = of_device_get_match_data(dev); i80_if_timings = of_get_child_by_name(dev->of_node, "i80-if-timings"); diff --git a/drivers/gpu/drm/exynos/exynos_drm_dsi.c b/drivers/gpu/drm/exynos/exynos_drm_dsi.c index 896a03639e2d..c4d098ab7863 100644 --- a/drivers/gpu/drm/exynos/exynos_drm_dsi.c +++ b/drivers/gpu/drm/exynos/exynos_drm_dsi.c @@ -154,6 +154,11 @@ static const struct samsung_dsim_plat_data exynos5433_dsi_pdata = { .host_ops = &exynos_dsi_exynos_host_ops, }; +static const struct samsung_dsim_plat_data exynos7870_dsi_pdata = { + .hw_type = DSIM_TYPE_EXYNOS7870, + .host_ops = &exynos_dsi_exynos_host_ops, +}; + static const struct of_device_id exynos_dsi_of_match[] = { { .compatible = "samsung,exynos3250-mipi-dsi", @@ -175,6 +180,10 @@ static const struct of_device_id exynos_dsi_of_match[] = { .compatible = "samsung,exynos5433-mipi-dsi", .data = &exynos5433_dsi_pdata, }, + { + .compatible = "samsung,exynos7870-mipi-dsi", + .data = &exynos7870_dsi_pdata, + }, { /* sentinel. */ } }; MODULE_DEVICE_TABLE(of, exynos_dsi_of_match); diff --git a/drivers/gpu/drm/gma500/oaktrail_hdmi.c b/drivers/gpu/drm/gma500/oaktrail_hdmi.c index 1cf394369127..c0feca58511d 100644 --- a/drivers/gpu/drm/gma500/oaktrail_hdmi.c +++ b/drivers/gpu/drm/gma500/oaktrail_hdmi.c @@ -726,8 +726,8 @@ void oaktrail_hdmi_teardown(struct drm_device *dev) if (hdmi_dev) { pdev = hdmi_dev->dev; - pci_set_drvdata(pdev, NULL); oaktrail_hdmi_i2c_exit(pdev); + pci_set_drvdata(pdev, NULL); iounmap(hdmi_dev->regs); kfree(hdmi_dev); pci_dev_put(pdev); diff --git a/drivers/gpu/drm/i915/Makefile b/drivers/gpu/drm/i915/Makefile index 853543443072..e58c0c158b3a 100644 --- a/drivers/gpu/drm/i915/Makefile +++ b/drivers/gpu/drm/i915/Makefile @@ -32,6 +32,7 @@ i915-y += \ i915_scatterlist.o \ i915_switcheroo.o \ i915_sysfs.o \ + i915_timer_util.o \ i915_utils.o \ intel_clock_gating.o \ intel_cpu_info.o \ @@ -280,6 +281,7 @@ i915-y += \ display/intel_modeset_setup.o \ display/intel_modeset_verify.o \ display/intel_overlay.o \ + display/intel_panic.o \ display/intel_pch.o \ display/intel_pch_display.o \ display/intel_pch_refclk.o \ diff --git a/drivers/gpu/drm/i915/display/i9xx_plane.c b/drivers/gpu/drm/i915/display/i9xx_plane.c index 3eb96d8abba8..407deb5dfb57 100644 --- a/drivers/gpu/drm/i915/display/i9xx_plane.c +++ b/drivers/gpu/drm/i915/display/i9xx_plane.c @@ -15,7 +15,6 @@ #include "i9xx_plane.h" #include "i9xx_plane_regs.h" #include "intel_atomic.h" -#include "intel_bo.h" #include "intel_de.h" #include "intel_display_irq.h" #include "intel_display_regs.h" @@ -23,6 +22,7 @@ #include "intel_fb.h" #include "intel_fbc.h" #include "intel_frontbuffer.h" +#include "intel_panic.h" #include "intel_plane.h" #include "intel_sprite.h" @@ -1178,7 +1178,7 @@ i9xx_get_initial_plane_config(struct intel_crtc *crtc, drm_WARN_ON(display->drm, pipe != crtc->pipe); - intel_fb = intel_bo_alloc_framebuffer(); + intel_fb = intel_framebuffer_alloc(); if (!intel_fb) { drm_dbg_kms(display->drm, "failed to alloc fb\n"); return; diff --git a/drivers/gpu/drm/i915/display/intel_bo.c b/drivers/gpu/drm/i915/display/intel_bo.c index d29c1508ccb9..6ae1374d5c2b 100644 --- a/drivers/gpu/drm/i915/display/intel_bo.c +++ b/drivers/gpu/drm/i915/display/intel_bo.c @@ -59,18 +59,3 @@ void intel_bo_describe(struct seq_file *m, struct drm_gem_object *obj) { i915_debugfs_describe_obj(m, to_intel_bo(obj)); } - -struct intel_framebuffer *intel_bo_alloc_framebuffer(void) -{ - return i915_gem_object_alloc_framebuffer(); -} - -int intel_bo_panic_setup(struct drm_scanout_buffer *sb) -{ - return i915_gem_object_panic_setup(sb); -} - -void intel_bo_panic_finish(struct intel_framebuffer *fb) -{ - return i915_gem_object_panic_finish(fb); -} diff --git a/drivers/gpu/drm/i915/display/intel_bo.h b/drivers/gpu/drm/i915/display/intel_bo.h index 97087a64d23b..48d87019e48a 100644 --- a/drivers/gpu/drm/i915/display/intel_bo.h +++ b/drivers/gpu/drm/i915/display/intel_bo.h @@ -25,8 +25,5 @@ struct intel_frontbuffer *intel_bo_set_frontbuffer(struct drm_gem_object *obj, struct intel_frontbuffer *front); void intel_bo_describe(struct seq_file *m, struct drm_gem_object *obj); -struct intel_framebuffer *intel_bo_alloc_framebuffer(void); -int intel_bo_panic_setup(struct drm_scanout_buffer *sb); -void intel_bo_panic_finish(struct intel_framebuffer *fb); #endif /* __INTEL_BO__ */ diff --git a/drivers/gpu/drm/i915/display/intel_ddi.c b/drivers/gpu/drm/i915/display/intel_ddi.c index 46017091bb0b..08083ac83a74 100644 --- a/drivers/gpu/drm/i915/display/intel_ddi.c +++ b/drivers/gpu/drm/i915/display/intel_ddi.c @@ -26,6 +26,7 @@ */ #include <linux/iopoll.h> +#include <linux/seq_buf.h> #include <linux/string_helpers.h> #include <drm/display/drm_dp_helper.h> @@ -5067,11 +5068,45 @@ static bool port_in_use(struct intel_display *display, enum port port) return false; } +static const char *intel_ddi_encoder_name(struct intel_display *display, + enum port port, enum phy phy, + struct seq_buf *s) +{ + if (DISPLAY_VER(display) >= 13 && port >= PORT_D_XELPD) { + seq_buf_printf(s, "DDI %c/PHY %c", + port_name(port - PORT_D_XELPD + PORT_D), + phy_name(phy)); + } else if (DISPLAY_VER(display) >= 12) { + enum tc_port tc_port = intel_port_to_tc(display, port); + + seq_buf_printf(s, "DDI %s%c/PHY %s%c", + port >= PORT_TC1 ? "TC" : "", + port >= PORT_TC1 ? port_tc_name(port) : port_name(port), + tc_port != TC_PORT_NONE ? "TC" : "", + tc_port != TC_PORT_NONE ? tc_port_name(tc_port) : phy_name(phy)); + } else if (DISPLAY_VER(display) >= 11) { + enum tc_port tc_port = intel_port_to_tc(display, port); + + seq_buf_printf(s, "DDI %c%s/PHY %s%c", + port_name(port), + port >= PORT_C ? " (TC)" : "", + tc_port != TC_PORT_NONE ? "TC" : "", + tc_port != TC_PORT_NONE ? tc_port_name(tc_port) : phy_name(phy)); + } else { + seq_buf_printf(s, "DDI %c/PHY %c", port_name(port), phy_name(phy)); + } + + drm_WARN_ON(display->drm, seq_buf_has_overflowed(s)); + + return seq_buf_str(s); +} + void intel_ddi_init(struct intel_display *display, const struct intel_bios_encoder_data *devdata) { struct intel_digital_port *dig_port; struct intel_encoder *encoder; + DECLARE_SEQ_BUF(encoder_name, 20); bool init_hdmi, init_dp; enum port port; enum phy phy; @@ -5156,37 +5191,9 @@ void intel_ddi_init(struct intel_display *display, encoder = &dig_port->base; encoder->devdata = devdata; - if (DISPLAY_VER(display) >= 13 && port >= PORT_D_XELPD) { - drm_encoder_init(display->drm, &encoder->base, &intel_ddi_funcs, - DRM_MODE_ENCODER_TMDS, - "DDI %c/PHY %c", - port_name(port - PORT_D_XELPD + PORT_D), - phy_name(phy)); - } else if (DISPLAY_VER(display) >= 12) { - enum tc_port tc_port = intel_port_to_tc(display, port); - - drm_encoder_init(display->drm, &encoder->base, &intel_ddi_funcs, - DRM_MODE_ENCODER_TMDS, - "DDI %s%c/PHY %s%c", - port >= PORT_TC1 ? "TC" : "", - port >= PORT_TC1 ? port_tc_name(port) : port_name(port), - tc_port != TC_PORT_NONE ? "TC" : "", - tc_port != TC_PORT_NONE ? tc_port_name(tc_port) : phy_name(phy)); - } else if (DISPLAY_VER(display) >= 11) { - enum tc_port tc_port = intel_port_to_tc(display, port); - - drm_encoder_init(display->drm, &encoder->base, &intel_ddi_funcs, - DRM_MODE_ENCODER_TMDS, - "DDI %c%s/PHY %s%c", - port_name(port), - port >= PORT_C ? " (TC)" : "", - tc_port != TC_PORT_NONE ? "TC" : "", - tc_port != TC_PORT_NONE ? tc_port_name(tc_port) : phy_name(phy)); - } else { - drm_encoder_init(display->drm, &encoder->base, &intel_ddi_funcs, - DRM_MODE_ENCODER_TMDS, - "DDI %c/PHY %c", port_name(port), phy_name(phy)); - } + drm_encoder_init(display->drm, &encoder->base, &intel_ddi_funcs, + DRM_MODE_ENCODER_TMDS, "%s", + intel_ddi_encoder_name(display, port, phy, &encoder_name)); intel_encoder_link_check_init(encoder, intel_ddi_link_check); diff --git a/drivers/gpu/drm/i915/display/intel_display.c b/drivers/gpu/drm/i915/display/intel_display.c index c1a3a95c65f0..5dca7f96b425 100644 --- a/drivers/gpu/drm/i915/display/intel_display.c +++ b/drivers/gpu/drm/i915/display/intel_display.c @@ -7271,6 +7271,9 @@ static void intel_atomic_dsb_finish(struct intel_atomic_state *state, intel_psr_trigger_frame_change_event(new_crtc_state->dsb_commit, state, crtc); + intel_psr_wait_for_idle_dsb(new_crtc_state->dsb_commit, + new_crtc_state); + if (new_crtc_state->use_dsb) intel_dsb_vblank_evade(state, new_crtc_state->dsb_commit); diff --git a/drivers/gpu/drm/i915/display/intel_display_device.c b/drivers/gpu/drm/i915/display/intel_display_device.c index 65f0efc35bb7..a002bc6ce7b0 100644 --- a/drivers/gpu/drm/i915/display/intel_display_device.c +++ b/drivers/gpu/drm/i915/display/intel_display_device.c @@ -1944,6 +1944,11 @@ void intel_display_device_info_print(const struct intel_display_device_info *inf drm_printf(p, "rawclk rate: %u kHz\n", runtime->rawclk_freq); } +bool intel_display_device_present(struct intel_display *display) +{ + return display && HAS_DISPLAY(display); +} + /* * Assuming the device has display hardware, should it be enabled? * diff --git a/drivers/gpu/drm/i915/display/intel_display_device.h b/drivers/gpu/drm/i915/display/intel_display_device.h index 6e87b763fe7c..f329f1beafef 100644 --- a/drivers/gpu/drm/i915/display/intel_display_device.h +++ b/drivers/gpu/drm/i915/display/intel_display_device.h @@ -306,6 +306,7 @@ struct intel_display_device_info { } color; }; +bool intel_display_device_present(struct intel_display *display); bool intel_display_device_enabled(struct intel_display *display); struct intel_display *intel_display_device_probe(struct pci_dev *pdev); void intel_display_device_remove(struct intel_display *display); diff --git a/drivers/gpu/drm/i915/display/intel_display_types.h b/drivers/gpu/drm/i915/display/intel_display_types.h index fd9d2527889b..358ab922d7a7 100644 --- a/drivers/gpu/drm/i915/display/intel_display_types.h +++ b/drivers/gpu/drm/i915/display/intel_display_types.h @@ -60,6 +60,7 @@ struct intel_ddi_buf_trans; struct intel_fbc; struct intel_global_objs_state; struct intel_hdcp_shim; +struct intel_panic; struct intel_tc_port; /* @@ -149,6 +150,7 @@ struct intel_framebuffer { unsigned int vtd_guard; unsigned int (*panic_tiling)(unsigned int x, unsigned int y, unsigned int width); + struct intel_panic *panic; }; enum intel_hotplug_state { diff --git a/drivers/gpu/drm/i915/display/intel_dp_aux_backlight.c b/drivers/gpu/drm/i915/display/intel_dp_aux_backlight.c index 12084a542fc5..eb05ef4bd9f6 100644 --- a/drivers/gpu/drm/i915/display/intel_dp_aux_backlight.c +++ b/drivers/gpu/drm/i915/display/intel_dp_aux_backlight.c @@ -508,9 +508,6 @@ static void intel_dp_aux_vesa_disable_backlight(const struct drm_connector_state struct intel_panel *panel = &connector->panel; struct intel_dp *intel_dp = enc_to_intel_dp(connector->encoder); - if (panel->backlight.edp.vesa.luminance_control_support) - return; - drm_edp_backlight_disable(&intel_dp->aux, &panel->backlight.edp.vesa.info); if (!panel->backlight.edp.vesa.info.aux_enable) @@ -533,7 +530,7 @@ static int intel_dp_aux_vesa_setup_backlight(struct intel_connector *connector, luminance_range->max_luminance, panel->vbt.backlight.pwm_freq_hz, intel_dp->edp_dpcd, ¤t_level, ¤t_mode, - false); + panel->backlight.edp.vesa.luminance_control_support); if (ret < 0) return ret; diff --git a/drivers/gpu/drm/i915/display/intel_fb.c b/drivers/gpu/drm/i915/display/intel_fb.c index b210c3250501..22a4a1575d22 100644 --- a/drivers/gpu/drm/i915/display/intel_fb.c +++ b/drivers/gpu/drm/i915/display/intel_fb.c @@ -20,6 +20,7 @@ #include "intel_fb.h" #include "intel_fb_bo.h" #include "intel_frontbuffer.h" +#include "intel_panic.h" #include "intel_plane.h" #define check_array_bounds(display, a, i) drm_WARN_ON((display)->drm, (i) >= ARRAY_SIZE(a)) @@ -2343,6 +2344,26 @@ intel_user_framebuffer_create(struct drm_device *dev, return fb; } +struct intel_framebuffer *intel_framebuffer_alloc(void) +{ + struct intel_framebuffer *intel_fb; + struct intel_panic *panic; + + intel_fb = kzalloc(sizeof(*intel_fb), GFP_KERNEL); + if (!intel_fb) + return NULL; + + panic = intel_panic_alloc(); + if (!panic) { + kfree(intel_fb); + return NULL; + } + + intel_fb->panic = panic; + + return intel_fb; +} + struct drm_framebuffer * intel_framebuffer_create(struct drm_gem_object *obj, const struct drm_format_info *info, @@ -2351,7 +2372,7 @@ intel_framebuffer_create(struct drm_gem_object *obj, struct intel_framebuffer *intel_fb; int ret; - intel_fb = intel_bo_alloc_framebuffer(); + intel_fb = intel_framebuffer_alloc(); if (!intel_fb) return ERR_PTR(-ENOMEM); diff --git a/drivers/gpu/drm/i915/display/intel_fb.h b/drivers/gpu/drm/i915/display/intel_fb.h index 403b8b63721a..22514d5f2bb6 100644 --- a/drivers/gpu/drm/i915/display/intel_fb.h +++ b/drivers/gpu/drm/i915/display/intel_fb.h @@ -104,6 +104,9 @@ int intel_framebuffer_init(struct intel_framebuffer *ifb, struct drm_gem_object *obj, const struct drm_format_info *info, struct drm_mode_fb_cmd2 *mode_cmd); + +struct intel_framebuffer *intel_framebuffer_alloc(void); + struct drm_framebuffer * intel_framebuffer_create(struct drm_gem_object *obj, const struct drm_format_info *info, diff --git a/drivers/gpu/drm/i915/display/intel_fbc.c b/drivers/gpu/drm/i915/display/intel_fbc.c index d4c5deff9cbe..0d380c825791 100644 --- a/drivers/gpu/drm/i915/display/intel_fbc.c +++ b/drivers/gpu/drm/i915/display/intel_fbc.c @@ -98,11 +98,7 @@ struct intel_fbc { struct intel_display *display; const struct intel_fbc_funcs *funcs; - /* - * This is always the inner lock when overlapping with - * struct_mutex and it's the outer lock when overlapping - * with stolen_lock. - */ + /* This is always the outer lock when overlapping with stolen_lock */ struct mutex lock; unsigned int busy_bits; @@ -383,11 +379,11 @@ static void i8xx_fbc_program_cfb(struct intel_fbc *fbc) struct drm_i915_private *i915 = to_i915(display->drm); drm_WARN_ON(display->drm, - range_overflows_end_t(u64, i915_gem_stolen_area_address(i915), + range_end_overflows_t(u64, i915_gem_stolen_area_address(i915), i915_gem_stolen_node_offset(&fbc->compressed_fb), U32_MAX)); drm_WARN_ON(display->drm, - range_overflows_end_t(u64, i915_gem_stolen_area_address(i915), + range_end_overflows_t(u64, i915_gem_stolen_area_address(i915), i915_gem_stolen_node_offset(&fbc->compressed_llb), U32_MAX)); intel_de_write(display, FBC_CFB_BASE, @@ -1550,14 +1546,14 @@ static int intel_fbc_check_plane(struct intel_atomic_state *state, * having a Y offset that isn't divisible by 4 causes FIFO underrun * and screen flicker. */ - if (DISPLAY_VER(display) >= 9 && + if (IS_DISPLAY_VER(display, 9, 12) && plane_state->view.color_plane[0].y & 3) { plane_state->no_fbc_reason = "plane start Y offset misaligned"; return 0; } /* Wa_22010751166: icl, ehl, tgl, dg1, rkl */ - if (DISPLAY_VER(display) >= 11 && + if (IS_DISPLAY_VER(display, 9, 12) && (plane_state->view.color_plane[0].y + (drm_rect_height(&plane_state->uapi.src) >> 16)) & 3) { plane_state->no_fbc_reason = "plane end Y offset misaligned"; diff --git a/drivers/gpu/drm/i915/display/intel_panic.c b/drivers/gpu/drm/i915/display/intel_panic.c new file mode 100644 index 000000000000..7311ce4e8b6c --- /dev/null +++ b/drivers/gpu/drm/i915/display/intel_panic.c @@ -0,0 +1,27 @@ +// SPDX-License-Identifier: MIT +/* Copyright © 2025 Intel Corporation */ + +#include <drm/drm_panic.h> + +#include "gem/i915_gem_object.h" +#include "intel_display_types.h" +#include "intel_fb.h" +#include "intel_panic.h" + +struct intel_panic *intel_panic_alloc(void) +{ + return i915_gem_object_alloc_panic(); +} + +int intel_panic_setup(struct intel_panic *panic, struct drm_scanout_buffer *sb) +{ + struct intel_framebuffer *fb = sb->private; + struct drm_gem_object *obj = intel_fb_bo(&fb->base); + + return i915_gem_object_panic_setup(panic, sb, obj, fb->panic_tiling); +} + +void intel_panic_finish(struct intel_panic *panic) +{ + return i915_gem_object_panic_finish(panic); +} diff --git a/drivers/gpu/drm/i915/display/intel_panic.h b/drivers/gpu/drm/i915/display/intel_panic.h new file mode 100644 index 000000000000..afb472e924aa --- /dev/null +++ b/drivers/gpu/drm/i915/display/intel_panic.h @@ -0,0 +1,14 @@ +/* SPDX-License-Identifier: MIT */ +/* Copyright © 2025 Intel Corporation */ + +#ifndef __INTEL_PANIC_H__ +#define __INTEL_PANIC_H__ + +struct drm_scanout_buffer; +struct intel_panic; + +struct intel_panic *intel_panic_alloc(void); +int intel_panic_setup(struct intel_panic *panic, struct drm_scanout_buffer *sb); +void intel_panic_finish(struct intel_panic *panic); + +#endif /* __INTEL_PANIC_H__ */ diff --git a/drivers/gpu/drm/i915/display/intel_plane.c b/drivers/gpu/drm/i915/display/intel_plane.c index 81f05ee9a21a..2329f09d413d 100644 --- a/drivers/gpu/drm/i915/display/intel_plane.c +++ b/drivers/gpu/drm/i915/display/intel_plane.c @@ -47,7 +47,6 @@ #include "gem/i915_gem_object.h" #include "i915_scheduler_types.h" #include "i9xx_plane_regs.h" -#include "intel_bo.h" #include "intel_cdclk.h" #include "intel_cursor.h" #include "intel_display_rps.h" @@ -56,6 +55,7 @@ #include "intel_fb.h" #include "intel_fb_pin.h" #include "intel_fbdev.h" +#include "intel_panic.h" #include "intel_plane.h" #include "intel_psr.h" #include "skl_scaler.h" @@ -1326,7 +1326,7 @@ static void intel_panic_flush(struct drm_plane *plane) struct drm_framebuffer *fb = plane_state->hw.fb; struct intel_framebuffer *intel_fb = to_intel_framebuffer(fb); - intel_bo_panic_finish(intel_fb); + intel_panic_finish(intel_fb->panic); if (crtc_state->enable_psr2_sel_fetch) { /* Force a full update for psr2 */ @@ -1409,7 +1409,7 @@ static int intel_get_scanout_buffer(struct drm_plane *plane, return -EOPNOTSUPP; } sb->private = intel_fb; - ret = intel_bo_panic_setup(sb); + ret = intel_panic_setup(intel_fb->panic, sb); if (ret) return ret; } diff --git a/drivers/gpu/drm/i915/display/intel_psr.c b/drivers/gpu/drm/i915/display/intel_psr.c index 22433fe2ee14..01bf304c705f 100644 --- a/drivers/gpu/drm/i915/display/intel_psr.c +++ b/drivers/gpu/drm/i915/display/intel_psr.c @@ -42,6 +42,7 @@ #include "intel_dmc.h" #include "intel_dp.h" #include "intel_dp_aux.h" +#include "intel_dsb.h" #include "intel_frontbuffer.h" #include "intel_hdmi.h" #include "intel_psr.h" @@ -494,12 +495,14 @@ static u8 intel_dp_get_su_capability(struct intel_dp *intel_dp) { u8 su_capability = 0; - if (intel_dp->psr.sink_panel_replay_su_support) - drm_dp_dpcd_readb(&intel_dp->aux, - DP_PANEL_REPLAY_CAP_CAPABILITY, - &su_capability); - else + if (intel_dp->psr.sink_panel_replay_su_support) { + if (drm_dp_dpcd_read_byte(&intel_dp->aux, + DP_PANEL_REPLAY_CAP_CAPABILITY, + &su_capability) < 0) + return 0; + } else { su_capability = intel_dp->psr_dpcd[1]; + } return su_capability; } @@ -2997,35 +3000,57 @@ void intel_psr_post_plane_update(struct intel_atomic_state *state, } } -static int _psr2_ready_for_pipe_update_locked(struct intel_dp *intel_dp) +/* + * From bspec: Panel Self Refresh (BDW+) + * Max. time for PSR to idle = Inverse of the refresh rate + 6 ms of + * exit training time + 1.5 ms of aux channel handshake. 50 ms is + * defensive enough to cover everything. + */ +#define PSR_IDLE_TIMEOUT_MS 50 + +static int +_psr2_ready_for_pipe_update_locked(const struct intel_crtc_state *new_crtc_state, + struct intel_dsb *dsb) { - struct intel_display *display = to_intel_display(intel_dp); - enum transcoder cpu_transcoder = intel_dp->psr.transcoder; + struct intel_display *display = to_intel_display(new_crtc_state); + enum transcoder cpu_transcoder = new_crtc_state->cpu_transcoder; /* * Any state lower than EDP_PSR2_STATUS_STATE_DEEP_SLEEP is enough. * As all higher states has bit 4 of PSR2 state set we can just wait for * EDP_PSR2_STATUS_STATE_DEEP_SLEEP to be cleared. */ + if (dsb) { + intel_dsb_poll(dsb, EDP_PSR2_STATUS(display, cpu_transcoder), + EDP_PSR2_STATUS_STATE_DEEP_SLEEP, 0, 200, + PSR_IDLE_TIMEOUT_MS * 1000 / 200); + return true; + } + return intel_de_wait_for_clear(display, EDP_PSR2_STATUS(display, cpu_transcoder), - EDP_PSR2_STATUS_STATE_DEEP_SLEEP, 50); + EDP_PSR2_STATUS_STATE_DEEP_SLEEP, + PSR_IDLE_TIMEOUT_MS); } -static int _psr1_ready_for_pipe_update_locked(struct intel_dp *intel_dp) +static int +_psr1_ready_for_pipe_update_locked(const struct intel_crtc_state *new_crtc_state, + struct intel_dsb *dsb) { - struct intel_display *display = to_intel_display(intel_dp); - enum transcoder cpu_transcoder = intel_dp->psr.transcoder; + struct intel_display *display = to_intel_display(new_crtc_state); + enum transcoder cpu_transcoder = new_crtc_state->cpu_transcoder; + + if (dsb) { + intel_dsb_poll(dsb, psr_status_reg(display, cpu_transcoder), + EDP_PSR_STATUS_STATE_MASK, 0, 200, + PSR_IDLE_TIMEOUT_MS * 1000 / 200); + return true; + } - /* - * From bspec: Panel Self Refresh (BDW+) - * Max. time for PSR to idle = Inverse of the refresh rate + 6 ms of - * exit training time + 1.5 ms of aux channel handshake. 50 ms is - * defensive enough to cover everything. - */ return intel_de_wait_for_clear(display, psr_status_reg(display, cpu_transcoder), - EDP_PSR_STATUS_STATE_MASK, 50); + EDP_PSR_STATUS_STATE_MASK, + PSR_IDLE_TIMEOUT_MS); } /** @@ -3054,9 +3079,11 @@ void intel_psr_wait_for_idle_locked(const struct intel_crtc_state *new_crtc_stat continue; if (intel_dp->psr.sel_update_enabled) - ret = _psr2_ready_for_pipe_update_locked(intel_dp); + ret = _psr2_ready_for_pipe_update_locked(new_crtc_state, + NULL); else - ret = _psr1_ready_for_pipe_update_locked(intel_dp); + ret = _psr1_ready_for_pipe_update_locked(new_crtc_state, + NULL); if (ret) drm_err(display->drm, @@ -3064,6 +3091,18 @@ void intel_psr_wait_for_idle_locked(const struct intel_crtc_state *new_crtc_stat } } +void intel_psr_wait_for_idle_dsb(struct intel_dsb *dsb, + const struct intel_crtc_state *new_crtc_state) +{ + if (!new_crtc_state->has_psr || new_crtc_state->has_panel_replay) + return; + + if (new_crtc_state->has_sel_update) + _psr2_ready_for_pipe_update_locked(new_crtc_state, dsb); + else + _psr1_ready_for_pipe_update_locked(new_crtc_state, dsb); +} + static bool __psr_wait_for_idle_locked(struct intel_dp *intel_dp) { struct intel_display *display = to_intel_display(intel_dp); diff --git a/drivers/gpu/drm/i915/display/intel_psr.h b/drivers/gpu/drm/i915/display/intel_psr.h index 9b061a22361f..077751aa599f 100644 --- a/drivers/gpu/drm/i915/display/intel_psr.h +++ b/drivers/gpu/drm/i915/display/intel_psr.h @@ -52,6 +52,8 @@ void intel_psr_get_config(struct intel_encoder *encoder, void intel_psr_irq_handler(struct intel_dp *intel_dp, u32 psr_iir); void intel_psr_short_pulse(struct intel_dp *intel_dp); void intel_psr_wait_for_idle_locked(const struct intel_crtc_state *new_crtc_state); +void intel_psr_wait_for_idle_dsb(struct intel_dsb *dsb, + const struct intel_crtc_state *new_crtc_state); bool intel_psr_enabled(struct intel_dp *intel_dp); int intel_psr2_sel_fetch_update(struct intel_atomic_state *state, struct intel_crtc *crtc); diff --git a/drivers/gpu/drm/i915/display/skl_universal_plane.c b/drivers/gpu/drm/i915/display/skl_universal_plane.c index 950dc79dbdd4..e13fb781e7b2 100644 --- a/drivers/gpu/drm/i915/display/skl_universal_plane.c +++ b/drivers/gpu/drm/i915/display/skl_universal_plane.c @@ -20,6 +20,7 @@ #include "intel_fb.h" #include "intel_fbc.h" #include "intel_frontbuffer.h" +#include "intel_panic.h" #include "intel_plane.h" #include "intel_psr.h" #include "intel_psr_regs.h" @@ -3028,7 +3029,7 @@ skl_get_initial_plane_config(struct intel_crtc *crtc, return; } - intel_fb = intel_bo_alloc_framebuffer(); + intel_fb = intel_framebuffer_alloc(); if (!intel_fb) { drm_dbg_kms(display->drm, "failed to alloc fb\n"); return; diff --git a/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c b/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c index f243f8a5215d..39c7c32e1e74 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c @@ -182,7 +182,7 @@ enum { * the object. Simple! ... The relocation entries are stored in user memory * and so to access them we have to copy them into a local buffer. That copy * has to avoid taking any pagefaults as they may lead back to a GEM object - * requiring the struct_mutex (i.e. recursive deadlock). So once again we split + * requiring the vm->mutex (i.e. recursive deadlock). So once again we split * the relocation into multiple passes. First we try to do everything within an * atomic context (avoid the pagefaults) which requires that we never wait. If * we detect that we may wait, or if we need to fault, then we have to fallback diff --git a/drivers/gpu/drm/i915/gem/i915_gem_object.c b/drivers/gpu/drm/i915/gem/i915_gem_object.c index 1f38e367c60b..478011e5ecb3 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_object.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_object.c @@ -459,8 +459,8 @@ static void i915_gem_free_object(struct drm_gem_object *gem_obj) atomic_inc(&i915->mm.free_count); /* - * Since we require blocking on struct_mutex to unbind the freed - * object from the GPU before releasing resources back to the + * Since we require blocking on drm_i915_gem_object->vma.lock to unbind + * the freed object from the GPU before releasing resources back to the * system, we can not do that directly from the RCU callback (which may * be a softirq context), but must instead then defer that work onto a * kthread. We use the RCU callback rather than move the freed object diff --git a/drivers/gpu/drm/i915/gem/i915_gem_object.h b/drivers/gpu/drm/i915/gem/i915_gem_object.h index 565f8fa330db..148034ef504d 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_object.h +++ b/drivers/gpu/drm/i915/gem/i915_gem_object.h @@ -16,9 +16,9 @@ #include "i915_gem_ww.h" #include "i915_vma_types.h" -struct drm_scanout_buffer; enum intel_region_id; -struct intel_framebuffer; +struct drm_scanout_buffer; +struct intel_panic; #define obj_to_i915(obj__) to_i915((obj__)->base.dev) @@ -693,9 +693,10 @@ i915_gem_object_unpin_pages(struct drm_i915_gem_object *obj) int __i915_gem_object_put_pages(struct drm_i915_gem_object *obj); int i915_gem_object_truncate(struct drm_i915_gem_object *obj); -struct intel_framebuffer *i915_gem_object_alloc_framebuffer(void); -int i915_gem_object_panic_setup(struct drm_scanout_buffer *sb); -void i915_gem_object_panic_finish(struct intel_framebuffer *fb); +struct intel_panic *i915_gem_object_alloc_panic(void); +int i915_gem_object_panic_setup(struct intel_panic *panic, struct drm_scanout_buffer *sb, + struct drm_gem_object *_obj, bool panic_tiling); +void i915_gem_object_panic_finish(struct intel_panic *panic); /** * i915_gem_object_pin_map - return a contiguous mapping of the entire object diff --git a/drivers/gpu/drm/i915/gem/i915_gem_pages.c b/drivers/gpu/drm/i915/gem/i915_gem_pages.c index c16a57160b26..76d2178572b6 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_pages.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_pages.c @@ -357,23 +357,13 @@ static void *i915_gem_object_map_pfn(struct drm_i915_gem_object *obj, return vaddr ?: ERR_PTR(-ENOMEM); } -struct i915_panic_data { +struct intel_panic { struct page **pages; int page; void *vaddr; }; -struct i915_framebuffer { - struct intel_framebuffer base; - struct i915_panic_data panic; -}; - -static inline struct i915_panic_data *to_i915_panic_data(struct intel_framebuffer *fb) -{ - return &container_of_const(fb, struct i915_framebuffer, base)->panic; -} - -static void i915_panic_kunmap(struct i915_panic_data *panic) +static void i915_panic_kunmap(struct intel_panic *panic) { if (panic->vaddr) { drm_clflush_virt_range(panic->vaddr, PAGE_SIZE); @@ -420,7 +410,7 @@ static void i915_gem_object_panic_page_set_pixel(struct drm_scanout_buffer *sb, unsigned int new_page; unsigned int offset; struct intel_framebuffer *fb = (struct intel_framebuffer *)sb->private; - struct i915_panic_data *panic = to_i915_panic_data(fb); + struct intel_panic *panic = fb->panic; if (fb->panic_tiling) offset = fb->panic_tiling(sb->width, x, y); @@ -441,14 +431,13 @@ static void i915_gem_object_panic_page_set_pixel(struct drm_scanout_buffer *sb, } } -struct intel_framebuffer *i915_gem_object_alloc_framebuffer(void) +struct intel_panic *i915_gem_object_alloc_panic(void) { - struct i915_framebuffer *i915_fb; + struct intel_panic *panic; + + panic = kzalloc(sizeof(*panic), GFP_KERNEL); - i915_fb = kzalloc(sizeof(*i915_fb), GFP_KERNEL); - if (i915_fb) - return &i915_fb->base; - return NULL; + return panic; } /* @@ -456,12 +445,11 @@ struct intel_framebuffer *i915_gem_object_alloc_framebuffer(void) * Use current vaddr if it exists, or setup a list of pages. * pfn is not supported yet. */ -int i915_gem_object_panic_setup(struct drm_scanout_buffer *sb) +int i915_gem_object_panic_setup(struct intel_panic *panic, struct drm_scanout_buffer *sb, + struct drm_gem_object *_obj, bool panic_tiling) { enum i915_map_type has_type; - struct intel_framebuffer *fb = (struct intel_framebuffer *)sb->private; - struct i915_panic_data *panic = to_i915_panic_data(fb); - struct drm_i915_gem_object *obj = to_intel_bo(intel_fb_bo(&fb->base)); + struct drm_i915_gem_object *obj = to_intel_bo(_obj); void *ptr; ptr = page_unpack_bits(obj->mm.mapping, &has_type); @@ -471,7 +459,7 @@ int i915_gem_object_panic_setup(struct drm_scanout_buffer *sb) else iosys_map_set_vaddr(&sb->map[0], ptr); - if (fb->panic_tiling) + if (panic_tiling) sb->set_pixel = i915_gem_object_panic_map_set_pixel; return 0; } @@ -486,10 +474,8 @@ int i915_gem_object_panic_setup(struct drm_scanout_buffer *sb) return -EOPNOTSUPP; } -void i915_gem_object_panic_finish(struct intel_framebuffer *fb) +void i915_gem_object_panic_finish(struct intel_panic *panic) { - struct i915_panic_data *panic = to_i915_panic_data(fb); - i915_panic_kunmap(panic); panic->page = -1; kfree(panic->pages); diff --git a/drivers/gpu/drm/i915/gem/i915_gem_shrinker.c b/drivers/gpu/drm/i915/gem/i915_gem_shrinker.c index b81e67504bbe..7a3e74a6676e 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_shrinker.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_shrinker.c @@ -170,7 +170,7 @@ i915_gem_shrink(struct i915_gem_ww_ctx *ww, * Also note that although these lists do not hold a reference to * the object we can safely grab one here: The final object * unreferencing and the bound_list are both protected by the - * dev->struct_mutex and so we won't ever be able to observe an + * i915->mm.obj_lock and so we won't ever be able to observe an * object on the bound_list with a reference count equals 0. */ for (phase = phases; phase->list; phase++) { @@ -185,7 +185,7 @@ i915_gem_shrink(struct i915_gem_ww_ctx *ww, /* * We serialize our access to unreferenced objects through - * the use of the struct_mutex. While the objects are not + * the use of the obj_lock. While the objects are not * yet freed (due to RCU then a workqueue) we still want * to be able to shrink their pages, so they remain on * the unbound/bound list until actually freed. diff --git a/drivers/gpu/drm/i915/gem/i915_gem_wait.c b/drivers/gpu/drm/i915/gem/i915_gem_wait.c index 991666fd9f85..54829801d3f7 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_wait.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_wait.c @@ -217,10 +217,10 @@ static unsigned long to_wait_timeout(s64 timeout_ns) * * The wait ioctl with a timeout of 0 reimplements the busy ioctl. With any * non-zero timeout parameter the wait ioctl will wait for the given number of - * nanoseconds on an object becoming unbusy. Since the wait itself does so - * without holding struct_mutex the object may become re-busied before this - * function completes. A similar but shorter * race condition exists in the busy - * ioctl + * nanoseconds on an object becoming unbusy. Since the wait occurs without + * holding a global or exclusive lock the object may become re-busied before + * this function completes. A similar but shorter * race condition exists + * in the busy ioctl */ int i915_gem_wait_ioctl(struct drm_device *dev, void *data, struct drm_file *file) diff --git a/drivers/gpu/drm/i915/gem/selftests/i915_gem_client_blt.c b/drivers/gpu/drm/i915/gem/selftests/i915_gem_client_blt.c index e747f5ed195e..539c620364e3 100644 --- a/drivers/gpu/drm/i915/gem/selftests/i915_gem_client_blt.c +++ b/drivers/gpu/drm/i915/gem/selftests/i915_gem_client_blt.c @@ -5,7 +5,7 @@ #include "i915_selftest.h" -#include "display/intel_display_core.h" +#include "display/intel_display_device.h" #include "gt/intel_context.h" #include "gt/intel_engine_regs.h" #include "gt/intel_engine_user.h" @@ -122,7 +122,7 @@ static bool fastblit_supports_x_tiling(const struct drm_i915_private *i915) if (GRAPHICS_VER_FULL(i915) < IP_VER(12, 55)) return false; - return HAS_DISPLAY(display); + return intel_display_device_present(display); } static bool fast_blit_ok(const struct blit_buffer *buf) diff --git a/drivers/gpu/drm/i915/gt/intel_context_types.h b/drivers/gpu/drm/i915/gt/intel_context_types.h index 98c7f6052069..10070ee4d74c 100644 --- a/drivers/gpu/drm/i915/gt/intel_context_types.h +++ b/drivers/gpu/drm/i915/gt/intel_context_types.h @@ -14,7 +14,6 @@ #include "i915_active_types.h" #include "i915_sw_fence.h" -#include "i915_utils.h" #include "intel_engine_types.h" #include "intel_sseu.h" #include "intel_wakeref.h" diff --git a/drivers/gpu/drm/i915/gt/intel_execlists_submission.c b/drivers/gpu/drm/i915/gt/intel_execlists_submission.c index 03baa7fa0a27..7f389cb0bde4 100644 --- a/drivers/gpu/drm/i915/gt/intel_execlists_submission.c +++ b/drivers/gpu/drm/i915/gt/intel_execlists_submission.c @@ -106,14 +106,18 @@ * preemption, but just sampling the new tail pointer). * */ + #include <linux/interrupt.h> #include <linux/string_helpers.h> +#include "gen8_engine_cs.h" #include "i915_drv.h" +#include "i915_list_util.h" #include "i915_reg.h" +#include "i915_timer_util.h" #include "i915_trace.h" #include "i915_vgpu.h" -#include "gen8_engine_cs.h" +#include "i915_wait_util.h" #include "intel_breadcrumbs.h" #include "intel_context.h" #include "intel_engine_heartbeat.h" diff --git a/drivers/gpu/drm/i915/gt/intel_gt_buffer_pool.c b/drivers/gpu/drm/i915/gt/intel_gt_buffer_pool.c index 86b5a9ba323d..c7befc5c20d0 100644 --- a/drivers/gpu/drm/i915/gt/intel_gt_buffer_pool.c +++ b/drivers/gpu/drm/i915/gt/intel_gt_buffer_pool.c @@ -7,6 +7,7 @@ #include "gem/i915_gem_object.h" #include "i915_drv.h" +#include "i915_list_util.h" #include "intel_engine_pm.h" #include "intel_gt_buffer_pool.h" diff --git a/drivers/gpu/drm/i915/gt/intel_gt_mcr.c b/drivers/gpu/drm/i915/gt/intel_gt_mcr.c index a60822e2b5d4..c3afa321fe30 100644 --- a/drivers/gpu/drm/i915/gt/intel_gt_mcr.c +++ b/drivers/gpu/drm/i915/gt/intel_gt_mcr.c @@ -4,6 +4,7 @@ */ #include "i915_drv.h" +#include "i915_wait_util.h" #include "intel_gt.h" #include "intel_gt_mcr.h" #include "intel_gt_print.h" diff --git a/drivers/gpu/drm/i915/gt/intel_rc6.c b/drivers/gpu/drm/i915/gt/intel_rc6.c index 9ca42589da4d..bf38cc5fe872 100644 --- a/drivers/gpu/drm/i915/gt/intel_rc6.c +++ b/drivers/gpu/drm/i915/gt/intel_rc6.c @@ -341,7 +341,7 @@ static int vlv_rc6_init(struct intel_rc6 *rc6) return PTR_ERR(pctx); } - GEM_BUG_ON(range_overflows_end_t(u64, + GEM_BUG_ON(range_end_overflows_t(u64, i915->dsm.stolen.start, pctx->stolen->start, U32_MAX)); diff --git a/drivers/gpu/drm/i915/gt/intel_reset.c b/drivers/gpu/drm/i915/gt/intel_reset.c index 4a1675dea1c7..41b5036dc538 100644 --- a/drivers/gpu/drm/i915/gt/intel_reset.c +++ b/drivers/gpu/drm/i915/gt/intel_reset.c @@ -9,18 +9,17 @@ #include "display/intel_display_reset.h" #include "display/intel_overlay.h" - #include "gem/i915_gem_context.h" - #include "gt/intel_gt_regs.h" - #include "gt/uc/intel_gsc_fw.h" +#include "uc/intel_guc.h" #include "i915_drv.h" #include "i915_file_private.h" #include "i915_gpu_error.h" #include "i915_irq.h" #include "i915_reg.h" +#include "i915_wait_util.h" #include "intel_breadcrumbs.h" #include "intel_engine_pm.h" #include "intel_engine_regs.h" @@ -32,8 +31,6 @@ #include "intel_pci_config.h" #include "intel_reset.h" -#include "uc/intel_guc.h" - #define RESET_MAX_RETRIES 3 static void client_mark_guilty(struct i915_gem_context *ctx, bool banned) diff --git a/drivers/gpu/drm/i915/gt/intel_reset_types.h b/drivers/gpu/drm/i915/gt/intel_reset_types.h index 4f5fd393af6f..ee4eb574a219 100644 --- a/drivers/gpu/drm/i915/gt/intel_reset_types.h +++ b/drivers/gpu/drm/i915/gt/intel_reset_types.h @@ -20,7 +20,7 @@ struct intel_reset { * FENCE registers). * * #I915_RESET_ENGINE[num_engines] - Since the driver doesn't need to - * acquire the struct_mutex to reset an engine, we need an explicit + * acquire a global lock to reset an engine, we need an explicit * flag to prevent two concurrent reset attempts in the same engine. * As the number of engines continues to grow, allocate the flags from * the most significant bits. diff --git a/drivers/gpu/drm/i915/gt/intel_ring_submission.c b/drivers/gpu/drm/i915/gt/intel_ring_submission.c index 2a6d79abf25b..8314a4b0505e 100644 --- a/drivers/gpu/drm/i915/gt/intel_ring_submission.c +++ b/drivers/gpu/drm/i915/gt/intel_ring_submission.c @@ -15,18 +15,19 @@ #include "i915_irq.h" #include "i915_mitigations.h" #include "i915_reg.h" +#include "i915_wait_util.h" #include "intel_breadcrumbs.h" #include "intel_context.h" +#include "intel_engine_heartbeat.h" +#include "intel_engine_pm.h" #include "intel_engine_regs.h" #include "intel_gt.h" #include "intel_gt_irq.h" +#include "intel_gt_print.h" #include "intel_gt_regs.h" #include "intel_reset.h" #include "intel_ring.h" #include "shmem_utils.h" -#include "intel_engine_heartbeat.h" -#include "intel_engine_pm.h" -#include "intel_gt_print.h" /* Rough estimate of the typical request size, performing a flush, * set-context and then emitting the batch. diff --git a/drivers/gpu/drm/i915/gt/intel_rps.c b/drivers/gpu/drm/i915/gt/intel_rps.c index 006042e0b229..4da94098bd3e 100644 --- a/drivers/gpu/drm/i915/gt/intel_rps.c +++ b/drivers/gpu/drm/i915/gt/intel_rps.c @@ -10,9 +10,11 @@ #include "display/intel_display.h" #include "display/intel_display_rps.h" #include "soc/intel_dram.h" + #include "i915_drv.h" #include "i915_irq.h" #include "i915_reg.h" +#include "i915_wait_util.h" #include "intel_breadcrumbs.h" #include "intel_gt.h" #include "intel_gt_clock_utils.h" diff --git a/drivers/gpu/drm/i915/gt/intel_timeline.h b/drivers/gpu/drm/i915/gt/intel_timeline.h index 57308c4d664a..85b43f9b9d95 100644 --- a/drivers/gpu/drm/i915/gt/intel_timeline.h +++ b/drivers/gpu/drm/i915/gt/intel_timeline.h @@ -9,6 +9,7 @@ #include <linux/lockdep.h> #include "i915_active.h" +#include "i915_list_util.h" #include "i915_syncmap.h" #include "intel_timeline_types.h" diff --git a/drivers/gpu/drm/i915/gt/selftest_tlb.c b/drivers/gpu/drm/i915/gt/selftest_tlb.c index 69ed946a39e5..a5184f09d1de 100644 --- a/drivers/gpu/drm/i915/gt/selftest_tlb.c +++ b/drivers/gpu/drm/i915/gt/selftest_tlb.c @@ -3,17 +3,17 @@ * Copyright © 2022 Intel Corporation */ -#include "i915_selftest.h" - #include "gem/i915_gem_internal.h" #include "gem/i915_gem_lmem.h" #include "gem/i915_gem_region.h" #include "gen8_engine_cs.h" #include "i915_gem_ww.h" +#include "i915_selftest.h" +#include "i915_wait_util.h" +#include "intel_context.h" #include "intel_engine_regs.h" #include "intel_gpu_commands.h" -#include "intel_context.h" #include "intel_gt.h" #include "intel_ring.h" diff --git a/drivers/gpu/drm/i915/gt/sysfs_engines.c b/drivers/gpu/drm/i915/gt/sysfs_engines.c index aab2759067d2..4a81bc396b21 100644 --- a/drivers/gpu/drm/i915/gt/sysfs_engines.c +++ b/drivers/gpu/drm/i915/gt/sysfs_engines.c @@ -7,6 +7,7 @@ #include <linux/sysfs.h> #include "i915_drv.h" +#include "i915_timer_util.h" #include "intel_engine.h" #include "intel_engine_heartbeat.h" #include "sysfs_engines.h" diff --git a/drivers/gpu/drm/i915/gt/uc/intel_gsc_proxy.c b/drivers/gpu/drm/i915/gt/uc/intel_gsc_proxy.c index d8edd7c054c8..e7444ebc373e 100644 --- a/drivers/gpu/drm/i915/gt/uc/intel_gsc_proxy.c +++ b/drivers/gpu/drm/i915/gt/uc/intel_gsc_proxy.c @@ -10,11 +10,13 @@ #include "gt/intel_gt.h" #include "gt/intel_gt_print.h" + +#include "i915_drv.h" +#include "i915_reg.h" +#include "i915_wait_util.h" #include "intel_gsc_proxy.h" #include "intel_gsc_uc.h" #include "intel_gsc_uc_heci_cmd_submit.h" -#include "i915_drv.h" -#include "i915_reg.h" /* * GSC proxy: diff --git a/drivers/gpu/drm/i915/gt/uc/intel_gsc_uc_heci_cmd_submit.c b/drivers/gpu/drm/i915/gt/uc/intel_gsc_uc_heci_cmd_submit.c index 2fde5c360cff..9bd29be7656f 100644 --- a/drivers/gpu/drm/i915/gt/uc/intel_gsc_uc_heci_cmd_submit.c +++ b/drivers/gpu/drm/i915/gt/uc/intel_gsc_uc_heci_cmd_submit.c @@ -8,6 +8,8 @@ #include "gt/intel_gpu_commands.h" #include "gt/intel_gt.h" #include "gt/intel_ring.h" + +#include "i915_wait_util.h" #include "intel_gsc_uc_heci_cmd_submit.h" struct gsc_heci_pkt { diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc.c b/drivers/gpu/drm/i915/gt/uc/intel_guc.c index f360f020d8f1..52ec4421a211 100644 --- a/drivers/gpu/drm/i915/gt/uc/intel_guc.c +++ b/drivers/gpu/drm/i915/gt/uc/intel_guc.c @@ -8,15 +8,17 @@ #include "gt/intel_gt_irq.h" #include "gt/intel_gt_pm_irq.h" #include "gt/intel_gt_regs.h" + +#include "i915_drv.h" +#include "i915_irq.h" +#include "i915_reg.h" +#include "i915_wait_util.h" #include "intel_guc.h" #include "intel_guc_ads.h" #include "intel_guc_capture.h" #include "intel_guc_print.h" #include "intel_guc_slpc.h" #include "intel_guc_submission.h" -#include "i915_drv.h" -#include "i915_irq.h" -#include "i915_reg.h" /** * DOC: GuC diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_ct.c b/drivers/gpu/drm/i915/gt/uc/intel_guc_ct.c index 380a11c92d63..3e7e5badcc2b 100644 --- a/drivers/gpu/drm/i915/gt/uc/intel_guc_ct.c +++ b/drivers/gpu/drm/i915/gt/uc/intel_guc_ct.c @@ -5,11 +5,12 @@ #include <linux/circ_buf.h> #include <linux/ktime.h> -#include <linux/time64.h> #include <linux/string_helpers.h> +#include <linux/time64.h> #include <linux/timekeeping.h> #include "i915_drv.h" +#include "i915_wait_util.h" #include "intel_guc_ct.h" #include "intel_guc_print.h" diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_fw.c b/drivers/gpu/drm/i915/gt/uc/intel_guc_fw.c index 384d1400134d..b1bda1b84f0a 100644 --- a/drivers/gpu/drm/i915/gt/uc/intel_guc_fw.c +++ b/drivers/gpu/drm/i915/gt/uc/intel_guc_fw.c @@ -13,9 +13,11 @@ #include "gt/intel_gt_mcr.h" #include "gt/intel_gt_regs.h" #include "gt/intel_rps.h" + +#include "i915_drv.h" +#include "i915_wait_util.h" #include "intel_guc_fw.h" #include "intel_guc_print.h" -#include "i915_drv.h" static void guc_prepare_xfer(struct intel_gt *gt) { diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_log.c b/drivers/gpu/drm/i915/gt/uc/intel_guc_log.c index 09a64f224c49..cdff48920ee6 100644 --- a/drivers/gpu/drm/i915/gt/uc/intel_guc_log.c +++ b/drivers/gpu/drm/i915/gt/uc/intel_guc_log.c @@ -6,6 +6,8 @@ #include <linux/debugfs.h> #include <linux/string_helpers.h> +#include <drm/drm_managed.h> + #include "gt/intel_gt.h" #include "i915_drv.h" #include "i915_irq.h" @@ -511,7 +513,11 @@ static void guc_log_relay_unmap(struct intel_guc_log *log) void intel_guc_log_init_early(struct intel_guc_log *log) { - mutex_init(&log->relay.lock); + struct intel_guc *guc = log_to_guc(log); + struct drm_i915_private *i915 = guc_to_i915(guc); + + drmm_mutex_init(&i915->drm, &log->relay.lock); + drmm_mutex_init(&i915->drm, &log->guc_lock); INIT_WORK(&log->relay.flush_work, copy_debug_logs_work); log->relay.started = false; } @@ -677,7 +683,7 @@ int intel_guc_log_set_level(struct intel_guc_log *log, u32 level) if (level < GUC_LOG_LEVEL_DISABLED || level > GUC_LOG_LEVEL_MAX) return -EINVAL; - mutex_lock(&i915->drm.struct_mutex); + mutex_lock(&log->guc_lock); if (log->level == level) goto out_unlock; @@ -695,7 +701,7 @@ int intel_guc_log_set_level(struct intel_guc_log *log, u32 level) log->level = level; out_unlock: - mutex_unlock(&i915->drm.struct_mutex); + mutex_unlock(&log->guc_lock); return ret; } diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_log.h b/drivers/gpu/drm/i915/gt/uc/intel_guc_log.h index 02127703be80..13cb93ad0710 100644 --- a/drivers/gpu/drm/i915/gt/uc/intel_guc_log.h +++ b/drivers/gpu/drm/i915/gt/uc/intel_guc_log.h @@ -42,6 +42,14 @@ enum { struct intel_guc_log { u32 level; + /* + * Protects concurrent access and modification of intel_guc_log->level. + * + * This lock replaces the legacy struct_mutex usage in + * intel_guc_log system. + */ + struct mutex guc_lock; + /* Allocation settings */ struct { s32 bytes; /* Size in bytes */ diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_slpc.c b/drivers/gpu/drm/i915/gt/uc/intel_guc_slpc.c index d5ee6e5e1443..fa9af08f9708 100644 --- a/drivers/gpu/drm/i915/gt/uc/intel_guc_slpc.c +++ b/drivers/gpu/drm/i915/gt/uc/intel_guc_slpc.c @@ -3,17 +3,20 @@ * Copyright © 2021 Intel Corporation */ -#include <drm/drm_cache.h> #include <linux/string_helpers.h> +#include <drm/drm_cache.h> + +#include "gt/intel_gt.h" +#include "gt/intel_gt_regs.h" +#include "gt/intel_rps.h" + #include "i915_drv.h" #include "i915_reg.h" -#include "intel_guc_slpc.h" +#include "i915_wait_util.h" #include "intel_guc_print.h" +#include "intel_guc_slpc.h" #include "intel_mchbar_regs.h" -#include "gt/intel_gt.h" -#include "gt/intel_gt_regs.h" -#include "gt/intel_rps.h" /** * DOC: SLPC - Dynamic Frequency management diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c b/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c index 127316d2c8aa..68f2b8d363ac 100644 --- a/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c +++ b/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c @@ -25,16 +25,16 @@ #include "gt/intel_mocs.h" #include "gt/intel_ring.h" +#include "i915_drv.h" +#include "i915_irq.h" +#include "i915_reg.h" +#include "i915_trace.h" +#include "i915_wait_util.h" #include "intel_guc_ads.h" #include "intel_guc_capture.h" #include "intel_guc_print.h" #include "intel_guc_submission.h" -#include "i915_drv.h" -#include "i915_reg.h" -#include "i915_irq.h" -#include "i915_trace.h" - /** * DOC: GuC-based command submission * diff --git a/drivers/gpu/drm/i915/gvt/cmd_parser.c b/drivers/gpu/drm/i915/gvt/cmd_parser.c index a91e23c22ea1..d432fdd69833 100644 --- a/drivers/gpu/drm/i915/gvt/cmd_parser.c +++ b/drivers/gpu/drm/i915/gvt/cmd_parser.c @@ -1921,7 +1921,7 @@ static int perform_bb_shadow(struct parser_exec_state *s) if (!bb) return -ENOMEM; - bb->ppgtt = (s->buf_addr_type == GTT_BUFFER) ? false : true; + bb->ppgtt = s->buf_addr_type != GTT_BUFFER; /* * The start_offset stores the batch buffer's start gma's diff --git a/drivers/gpu/drm/i915/gvt/mmio_context.c b/drivers/gpu/drm/i915/gvt/mmio_context.c index 2f7208843367..0b810baad20a 100644 --- a/drivers/gpu/drm/i915/gvt/mmio_context.c +++ b/drivers/gpu/drm/i915/gvt/mmio_context.c @@ -33,14 +33,16 @@ * */ -#include "i915_drv.h" -#include "i915_reg.h" #include "gt/intel_context.h" #include "gt/intel_engine_regs.h" #include "gt/intel_gpu_commands.h" #include "gt/intel_gt_regs.h" #include "gt/intel_ring.h" + #include "gvt.h" +#include "i915_drv.h" +#include "i915_reg.h" +#include "i915_wait_util.h" #include "trace.h" #define GEN9_MOCS_SIZE 64 diff --git a/drivers/gpu/drm/i915/i915_debugfs.c b/drivers/gpu/drm/i915/i915_debugfs.c index 23fa098c4479..c2e38d4bcd01 100644 --- a/drivers/gpu/drm/i915/i915_debugfs.c +++ b/drivers/gpu/drm/i915/i915_debugfs.c @@ -26,11 +26,11 @@ * */ +#include <linux/debugfs.h> #include <linux/sched/mm.h> #include <linux/sort.h> #include <linux/string_helpers.h> -#include <linux/debugfs.h> #include <drm/drm_debugfs.h> #include "gem/i915_gem_context.h" @@ -54,6 +54,7 @@ #include "i915_irq.h" #include "i915_reg.h" #include "i915_scheduler.h" +#include "i915_wait_util.h" #include "intel_mchbar_regs.h" static inline struct drm_i915_private *node_to_i915(struct drm_info_node *node) diff --git a/drivers/gpu/drm/i915/i915_driver.c b/drivers/gpu/drm/i915/i915_driver.c index 70f042ce8705..a28c3710c4d5 100644 --- a/drivers/gpu/drm/i915/i915_driver.c +++ b/drivers/gpu/drm/i915/i915_driver.c @@ -51,13 +51,15 @@ #include "display/intel_bw.h" #include "display/intel_cdclk.h" #include "display/intel_crtc.h" -#include "display/intel_display_core.h" +#include "display/intel_display_device.h" #include "display/intel_display_driver.h" +#include "display/intel_display_power.h" #include "display/intel_dmc.h" #include "display/intel_dp.h" #include "display/intel_dpt.h" #include "display/intel_encoder.h" #include "display/intel_fbdev.h" +#include "display/intel_gmbus.h" #include "display/intel_hotplug.h" #include "display/intel_opregion.h" #include "display/intel_overlay.h" @@ -977,7 +979,7 @@ void i915_driver_shutdown(struct drm_i915_private *i915) intel_power_domains_disable(display); drm_client_dev_suspend(&i915->drm, false); - if (HAS_DISPLAY(display)) { + if (intel_display_device_present(display)) { drm_kms_helper_poll_disable(&i915->drm); intel_display_driver_disable_user_access(display); @@ -989,7 +991,7 @@ void i915_driver_shutdown(struct drm_i915_private *i915) intel_irq_suspend(i915); intel_hpd_cancel_work(display); - if (HAS_DISPLAY(display)) + if (intel_display_device_present(display)) intel_display_driver_suspend_access(display); intel_encoder_suspend_all(display); @@ -1060,7 +1062,7 @@ static int i915_drm_suspend(struct drm_device *dev) * properly. */ intel_power_domains_disable(display); drm_client_dev_suspend(dev, false); - if (HAS_DISPLAY(display)) { + if (intel_display_device_present(display)) { drm_kms_helper_poll_disable(dev); intel_display_driver_disable_user_access(display); } @@ -1072,7 +1074,7 @@ static int i915_drm_suspend(struct drm_device *dev) intel_irq_suspend(dev_priv); intel_hpd_cancel_work(display); - if (HAS_DISPLAY(display)) + if (intel_display_device_present(display)) intel_display_driver_suspend_access(display); intel_encoder_suspend_all(display); @@ -1219,7 +1221,7 @@ static int i915_drm_resume(struct drm_device *dev) */ intel_irq_resume(dev_priv); - if (HAS_DISPLAY(display)) + if (intel_display_device_present(display)) drm_mode_config_reset(dev); i915_gem_resume(dev_priv); @@ -1228,14 +1230,14 @@ static int i915_drm_resume(struct drm_device *dev) intel_clock_gating_init(dev_priv); - if (HAS_DISPLAY(display)) + if (intel_display_device_present(display)) intel_display_driver_resume_access(display); intel_hpd_init(display); intel_display_driver_resume(display); - if (HAS_DISPLAY(display)) { + if (intel_display_device_present(display)) { intel_display_driver_enable_user_access(display); drm_kms_helper_poll_enable(dev); } diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h index 2f3965feada1..6a768aad8edd 100644 --- a/drivers/gpu/drm/i915/i915_drv.h +++ b/drivers/gpu/drm/i915/i915_drv.h @@ -114,8 +114,7 @@ struct i915_gem_mm { struct intel_memory_region *stolen_region; /** Memory allocator for GTT stolen memory */ struct drm_mm stolen; - /** Protects the usage of the GTT stolen memory allocator. This is - * always the inner lock when overlapping with struct_mutex. */ + /** Protects the usage of the GTT stolen memory allocator */ struct mutex stolen_lock; /* Protects bound_list/unbound_list and #drm_i915_gem_object.mm.link */ @@ -222,6 +221,9 @@ struct drm_i915_private { bool irqs_enabled; + /* LPT/WPT IOSF sideband protection */ + struct mutex sbi_lock; + /* VLV/CHV IOSF sideband */ struct { struct mutex lock; /* protect sideband access */ diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c index 8c8d43451f35..e14a0c3db999 100644 --- a/drivers/gpu/drm/i915/i915_gem.c +++ b/drivers/gpu/drm/i915/i915_gem.c @@ -847,8 +847,7 @@ void i915_gem_runtime_suspend(struct drm_i915_private *i915) /* * Only called during RPM suspend. All users of the userfault_list * must be holding an RPM wakeref to ensure that this can not - * run concurrently with themselves (and use the struct_mutex for - * protection between themselves). + * run concurrently with themselves. */ list_for_each_entry_safe(obj, on, diff --git a/drivers/gpu/drm/i915/i915_irq.c b/drivers/gpu/drm/i915/i915_irq.c index a5fa40ab5de2..8d5da222a187 100644 --- a/drivers/gpu/drm/i915/i915_irq.c +++ b/drivers/gpu/drm/i915/i915_irq.c @@ -163,11 +163,6 @@ static void ivb_parity_work(struct work_struct *work) u32 misccpctl; u8 slice = 0; - /* We must turn off DOP level clock gating to access the L3 registers. - * In order to prevent a get/put style interface, acquire struct mutex - * any time we access those registers. - */ - mutex_lock(&dev_priv->drm.struct_mutex); /* If we've screwed up tracking, just let the interrupt fire again */ if (drm_WARN_ON(&dev_priv->drm, !dev_priv->l3_parity.which_slice)) @@ -225,7 +220,6 @@ out: gen5_gt_enable_irq(gt, GT_PARITY_ERROR(dev_priv)); spin_unlock_irq(gt->irq_lock); - mutex_unlock(&dev_priv->drm.struct_mutex); } static irqreturn_t valleyview_irq_handler(int irq, void *arg) diff --git a/drivers/gpu/drm/i915/i915_list_util.h b/drivers/gpu/drm/i915/i915_list_util.h new file mode 100644 index 000000000000..4e515dc8a3e0 --- /dev/null +++ b/drivers/gpu/drm/i915/i915_list_util.h @@ -0,0 +1,23 @@ +/* SPDX-License-Identifier: MIT */ +/* Copyright © 2025 Intel Corporation */ + +#ifndef __I915_LIST_UTIL_H__ +#define __I915_LIST_UTIL_H__ + +#include <linux/list.h> +#include <asm/rwonce.h> + +static inline void __list_del_many(struct list_head *head, + struct list_head *first) +{ + first->prev = head; + WRITE_ONCE(head->next, first); +} + +static inline int list_is_last_rcu(const struct list_head *list, + const struct list_head *head) +{ + return READ_ONCE(list->next) == head; +} + +#endif /* __I915_LIST_UTIL_H__ */ diff --git a/drivers/gpu/drm/i915/i915_ptr_util.h b/drivers/gpu/drm/i915/i915_ptr_util.h new file mode 100644 index 000000000000..9f8931d7d99b --- /dev/null +++ b/drivers/gpu/drm/i915/i915_ptr_util.h @@ -0,0 +1,66 @@ +/* SPDX-License-Identifier: MIT */ +/* Copyright © 2025 Intel Corporation */ + +#ifndef __I915_PTR_UTIL_H__ +#define __I915_PTR_UTIL_H__ + +#include <linux/types.h> + +#define ptr_mask_bits(ptr, n) ({ \ + unsigned long __v = (unsigned long)(ptr); \ + (typeof(ptr))(__v & -BIT(n)); \ +}) + +#define ptr_unmask_bits(ptr, n) ((unsigned long)(ptr) & (BIT(n) - 1)) + +#define ptr_unpack_bits(ptr, bits, n) ({ \ + unsigned long __v = (unsigned long)(ptr); \ + *(bits) = __v & (BIT(n) - 1); \ + (typeof(ptr))(__v & -BIT(n)); \ +}) + +#define ptr_pack_bits(ptr, bits, n) ({ \ + unsigned long __bits = (bits); \ + GEM_BUG_ON(__bits & -BIT(n)); \ + ((typeof(ptr))((unsigned long)(ptr) | __bits)); \ +}) + +#define ptr_dec(ptr) ({ \ + unsigned long __v = (unsigned long)(ptr); \ + (typeof(ptr))(__v - 1); \ +}) + +#define ptr_inc(ptr) ({ \ + unsigned long __v = (unsigned long)(ptr); \ + (typeof(ptr))(__v + 1); \ +}) + +#define page_mask_bits(ptr) ptr_mask_bits(ptr, PAGE_SHIFT) +#define page_unmask_bits(ptr) ptr_unmask_bits(ptr, PAGE_SHIFT) +#define page_pack_bits(ptr, bits) ptr_pack_bits(ptr, bits, PAGE_SHIFT) +#define page_unpack_bits(ptr, bits) ptr_unpack_bits(ptr, bits, PAGE_SHIFT) + +static __always_inline ptrdiff_t ptrdiff(const void *a, const void *b) +{ + return a - b; +} + +#define u64_to_ptr(T, x) ({ \ + typecheck(u64, x); \ + (T *)(uintptr_t)(x); \ +}) + +/* + * container_of_user: Extract the superclass from a pointer to a member. + * + * Exactly like container_of() with the exception that it plays nicely + * with sparse for __user @ptr. + */ +#define container_of_user(ptr, type, member) ({ \ + void __user *__mptr = (void __user *)(ptr); \ + BUILD_BUG_ON_MSG(!__same_type(*(ptr), typeof_member(type, member)) && \ + !__same_type(*(ptr), void), \ + "pointer type mismatch in container_of()"); \ + ((type __user *)(__mptr - offsetof(type, member))); }) + +#endif /* __I915_PTR_UTIL_H__ */ diff --git a/drivers/gpu/drm/i915/i915_request.h b/drivers/gpu/drm/i915/i915_request.h index 5f7e8138ec14..b09135301f39 100644 --- a/drivers/gpu/drm/i915/i915_request.h +++ b/drivers/gpu/drm/i915/i915_request.h @@ -31,19 +31,20 @@ #include <linux/llist.h> #include <linux/lockdep.h> +#include <uapi/drm/i915_drm.h> + #include "gem/i915_gem_context_types.h" #include "gt/intel_context_types.h" #include "gt/intel_engine_types.h" #include "gt/intel_timeline_types.h" #include "i915_gem.h" +#include "i915_ptr_util.h" #include "i915_scheduler.h" #include "i915_selftest.h" #include "i915_sw_fence.h" #include "i915_vma_resource.h" -#include <uapi/drm/i915_drm.h> - struct drm_file; struct drm_i915_gem_object; struct drm_printer; diff --git a/drivers/gpu/drm/i915/i915_switcheroo.c b/drivers/gpu/drm/i915/i915_switcheroo.c index 3a95a55b2e87..d5b6d8ab31a2 100644 --- a/drivers/gpu/drm/i915/i915_switcheroo.c +++ b/drivers/gpu/drm/i915/i915_switcheroo.c @@ -5,7 +5,7 @@ #include <linux/vga_switcheroo.h> -#include "display/intel_display_core.h" +#include "display/intel_display_device.h" #include "i915_driver.h" #include "i915_drv.h" @@ -22,7 +22,7 @@ static void i915_switcheroo_set_state(struct pci_dev *pdev, dev_err(&pdev->dev, "DRM not initialized, aborting switch.\n"); return; } - if (!HAS_DISPLAY(display)) { + if (!intel_display_device_present(display)) { dev_err(&pdev->dev, "Device state not initialized, aborting switch.\n"); return; } @@ -52,7 +52,8 @@ static bool i915_switcheroo_can_switch(struct pci_dev *pdev) * locking inversion with the driver load path. And the access here is * completely racy anyway. So don't bother with locking for now. */ - return i915 && HAS_DISPLAY(display) && atomic_read(&i915->drm.open_count) == 0; + return i915 && intel_display_device_present(display) && + atomic_read(&i915->drm.open_count) == 0; } static const struct vga_switcheroo_client_ops i915_switcheroo_ops = { diff --git a/drivers/gpu/drm/i915/i915_timer_util.c b/drivers/gpu/drm/i915/i915_timer_util.c new file mode 100644 index 000000000000..ee4cfd8b3c07 --- /dev/null +++ b/drivers/gpu/drm/i915/i915_timer_util.c @@ -0,0 +1,36 @@ +// SPDX-License-Identifier: MIT +/* Copyright © 2025 Intel Corporation */ + +#include <linux/jiffies.h> + +#include "i915_timer_util.h" + +void cancel_timer(struct timer_list *t) +{ + if (!timer_active(t)) + return; + + timer_delete(t); + WRITE_ONCE(t->expires, 0); +} + +void set_timer_ms(struct timer_list *t, unsigned long timeout) +{ + if (!timeout) { + cancel_timer(t); + return; + } + + timeout = msecs_to_jiffies(timeout); + + /* + * Paranoia to make sure the compiler computes the timeout before + * loading 'jiffies' as jiffies is volatile and may be updated in + * the background by a timer tick. All to reduce the complexity + * of the addition and reduce the risk of losing a jiffy. + */ + barrier(); + + /* Keep t->expires = 0 reserved to indicate a canceled timer. */ + mod_timer(t, jiffies + timeout ?: 1); +} diff --git a/drivers/gpu/drm/i915/i915_timer_util.h b/drivers/gpu/drm/i915/i915_timer_util.h new file mode 100644 index 000000000000..f35ad730820c --- /dev/null +++ b/drivers/gpu/drm/i915/i915_timer_util.h @@ -0,0 +1,23 @@ +/* SPDX-License-Identifier: MIT */ +/* Copyright © 2025 Intel Corporation */ + +#ifndef __I915_TIMER_UTIL_H__ +#define __I915_TIMER_UTIL_H__ + +#include <linux/timer.h> +#include <asm/rwonce.h> + +void cancel_timer(struct timer_list *t); +void set_timer_ms(struct timer_list *t, unsigned long timeout); + +static inline bool timer_active(const struct timer_list *t) +{ + return READ_ONCE(t->expires); +} + +static inline bool timer_expired(const struct timer_list *t) +{ + return timer_active(t) && !timer_pending(t); +} + +#endif /* __I915_TIMER_UTIL_H__ */ diff --git a/drivers/gpu/drm/i915/i915_utils.c b/drivers/gpu/drm/i915/i915_utils.c index b60c28fbd207..49f7ed413132 100644 --- a/drivers/gpu/drm/i915/i915_utils.c +++ b/drivers/gpu/drm/i915/i915_utils.c @@ -47,36 +47,6 @@ bool i915_error_injected(void) #endif -void cancel_timer(struct timer_list *t) -{ - if (!timer_active(t)) - return; - - timer_delete(t); - WRITE_ONCE(t->expires, 0); -} - -void set_timer_ms(struct timer_list *t, unsigned long timeout) -{ - if (!timeout) { - cancel_timer(t); - return; - } - - timeout = msecs_to_jiffies(timeout); - - /* - * Paranoia to make sure the compiler computes the timeout before - * loading 'jiffies' as jiffies is volatile and may be updated in - * the background by a timer tick. All to reduce the complexity - * of the addition and reduce the risk of losing a jiffy. - */ - barrier(); - - /* Keep t->expires = 0 reserved to indicate a canceled timer. */ - mod_timer(t, jiffies + timeout ?: 1); -} - bool i915_vtd_active(struct drm_i915_private *i915) { if (device_iommu_mapped(i915->drm.dev)) diff --git a/drivers/gpu/drm/i915/i915_utils.h b/drivers/gpu/drm/i915/i915_utils.h index 9cb40c2c4b12..a0c892e4c40d 100644 --- a/drivers/gpu/drm/i915/i915_utils.h +++ b/drivers/gpu/drm/i915/i915_utils.h @@ -25,7 +25,6 @@ #ifndef __I915_UTILS_H #define __I915_UTILS_H -#include <linux/list.h> #include <linux/overflow.h> #include <linux/sched.h> #include <linux/string_helpers.h> @@ -38,7 +37,6 @@ #endif struct drm_i915_private; -struct timer_list; #define MISSING_CASE(x) WARN(1, "Missing case (%s == %ld)\n", \ __stringify(x), (long)(x)) @@ -67,88 +65,12 @@ bool i915_error_injected(void); drm_err(&(i915)->drm, fmt, ##__VA_ARGS__); \ }) -#define range_overflows(start, size, max) ({ \ - typeof(start) start__ = (start); \ - typeof(size) size__ = (size); \ - typeof(max) max__ = (max); \ - (void)(&start__ == &size__); \ - (void)(&start__ == &max__); \ - start__ >= max__ || size__ > max__ - start__; \ -}) - -#define range_overflows_t(type, start, size, max) \ - range_overflows((type)(start), (type)(size), (type)(max)) - -#define range_overflows_end(start, size, max) ({ \ - typeof(start) start__ = (start); \ - typeof(size) size__ = (size); \ - typeof(max) max__ = (max); \ - (void)(&start__ == &size__); \ - (void)(&start__ == &max__); \ - start__ > max__ || size__ > max__ - start__; \ -}) - -#define range_overflows_end_t(type, start, size, max) \ - range_overflows_end((type)(start), (type)(size), (type)(max)) - -#define ptr_mask_bits(ptr, n) ({ \ - unsigned long __v = (unsigned long)(ptr); \ - (typeof(ptr))(__v & -BIT(n)); \ -}) - -#define ptr_unmask_bits(ptr, n) ((unsigned long)(ptr) & (BIT(n) - 1)) - -#define ptr_unpack_bits(ptr, bits, n) ({ \ - unsigned long __v = (unsigned long)(ptr); \ - *(bits) = __v & (BIT(n) - 1); \ - (typeof(ptr))(__v & -BIT(n)); \ -}) - -#define ptr_pack_bits(ptr, bits, n) ({ \ - unsigned long __bits = (bits); \ - GEM_BUG_ON(__bits & -BIT(n)); \ - ((typeof(ptr))((unsigned long)(ptr) | __bits)); \ -}) - -#define ptr_dec(ptr) ({ \ - unsigned long __v = (unsigned long)(ptr); \ - (typeof(ptr))(__v - 1); \ -}) - -#define ptr_inc(ptr) ({ \ - unsigned long __v = (unsigned long)(ptr); \ - (typeof(ptr))(__v + 1); \ -}) - -#define page_mask_bits(ptr) ptr_mask_bits(ptr, PAGE_SHIFT) -#define page_unmask_bits(ptr) ptr_unmask_bits(ptr, PAGE_SHIFT) -#define page_pack_bits(ptr, bits) ptr_pack_bits(ptr, bits, PAGE_SHIFT) -#define page_unpack_bits(ptr, bits) ptr_unpack_bits(ptr, bits, PAGE_SHIFT) - #define fetch_and_zero(ptr) ({ \ typeof(*ptr) __T = *(ptr); \ *(ptr) = (typeof(*ptr))0; \ __T; \ }) -static __always_inline ptrdiff_t ptrdiff(const void *a, const void *b) -{ - return a - b; -} - -/* - * container_of_user: Extract the superclass from a pointer to a member. - * - * Exactly like container_of() with the exception that it plays nicely - * with sparse for __user @ptr. - */ -#define container_of_user(ptr, type, member) ({ \ - void __user *__mptr = (void __user *)(ptr); \ - BUILD_BUG_ON_MSG(!__same_type(*(ptr), typeof_member(type, member)) && \ - !__same_type(*(ptr), void), \ - "pointer type mismatch in container_of()"); \ - ((type __user *)(__mptr - offsetof(type, member))); }) - /* * check_user_mbz: Check that a user value exists and is zero * @@ -167,11 +89,6 @@ static __always_inline ptrdiff_t ptrdiff(const void *a, const void *b) get_user(mbz__, (U)) ? -EFAULT : mbz__ ? -EINVAL : 0; \ }) -#define u64_to_ptr(T, x) ({ \ - typecheck(u64, x); \ - (T *)(uintptr_t)(x); \ -}) - #define __mask_next_bit(mask) ({ \ int __idx = ffs(mask) - 1; \ mask &= ~BIT(__idx); \ @@ -183,19 +100,6 @@ static inline bool is_power_of_2_u64(u64 n) return (n != 0 && ((n & (n - 1)) == 0)); } -static inline void __list_del_many(struct list_head *head, - struct list_head *first) -{ - first->prev = head; - WRITE_ONCE(head->next, first); -} - -static inline int list_is_last_rcu(const struct list_head *list, - const struct list_head *head) -{ - return READ_ONCE(list->next) == head; -} - static inline unsigned long msecs_to_jiffies_timeout(const unsigned int m) { unsigned long j = msecs_to_jiffies(m); @@ -230,112 +134,6 @@ wait_remaining_ms_from_jiffies(unsigned long timestamp_jiffies, int to_wait_ms) } } -/* - * __wait_for - magic wait macro - * - * Macro to help avoid open coding check/wait/timeout patterns. Note that it's - * important that we check the condition again after having timed out, since the - * timeout could be due to preemption or similar and we've never had a chance to - * check the condition before the timeout. - */ -#define __wait_for(OP, COND, US, Wmin, Wmax) ({ \ - const ktime_t end__ = ktime_add_ns(ktime_get_raw(), 1000ll * (US)); \ - long wait__ = (Wmin); /* recommended min for usleep is 10 us */ \ - int ret__; \ - might_sleep(); \ - for (;;) { \ - const bool expired__ = ktime_after(ktime_get_raw(), end__); \ - OP; \ - /* Guarantee COND check prior to timeout */ \ - barrier(); \ - if (COND) { \ - ret__ = 0; \ - break; \ - } \ - if (expired__) { \ - ret__ = -ETIMEDOUT; \ - break; \ - } \ - usleep_range(wait__, wait__ * 2); \ - if (wait__ < (Wmax)) \ - wait__ <<= 1; \ - } \ - ret__; \ -}) - -#define _wait_for(COND, US, Wmin, Wmax) __wait_for(, (COND), (US), (Wmin), \ - (Wmax)) -#define wait_for(COND, MS) _wait_for((COND), (MS) * 1000, 10, 1000) - -/* - * If CONFIG_PREEMPT_COUNT is disabled, in_atomic() always reports false. - * On PREEMPT_RT the context isn't becoming atomic because it is used in an - * interrupt handler or because a spinlock_t is acquired. This leads to - * warnings which don't occur otherwise and therefore the check is disabled. - */ -#if IS_ENABLED(CONFIG_DRM_I915_DEBUG) && IS_ENABLED(CONFIG_PREEMPT_COUNT) && !defined(CONFIG_PREEMPT_RT) -# define _WAIT_FOR_ATOMIC_CHECK(ATOMIC) WARN_ON_ONCE((ATOMIC) && !in_atomic()) -#else -# define _WAIT_FOR_ATOMIC_CHECK(ATOMIC) do { } while (0) -#endif - -#define _wait_for_atomic(COND, US, ATOMIC) \ -({ \ - int cpu, ret, timeout = (US) * 1000; \ - u64 base; \ - _WAIT_FOR_ATOMIC_CHECK(ATOMIC); \ - if (!(ATOMIC)) { \ - preempt_disable(); \ - cpu = smp_processor_id(); \ - } \ - base = local_clock(); \ - for (;;) { \ - u64 now = local_clock(); \ - if (!(ATOMIC)) \ - preempt_enable(); \ - /* Guarantee COND check prior to timeout */ \ - barrier(); \ - if (COND) { \ - ret = 0; \ - break; \ - } \ - if (now - base >= timeout) { \ - ret = -ETIMEDOUT; \ - break; \ - } \ - cpu_relax(); \ - if (!(ATOMIC)) { \ - preempt_disable(); \ - if (unlikely(cpu != smp_processor_id())) { \ - timeout -= now - base; \ - cpu = smp_processor_id(); \ - base = local_clock(); \ - } \ - } \ - } \ - ret; \ -}) - -#define wait_for_us(COND, US) \ -({ \ - int ret__; \ - BUILD_BUG_ON(!__builtin_constant_p(US)); \ - if ((US) > 10) \ - ret__ = _wait_for((COND), (US), 10, 10); \ - else \ - ret__ = _wait_for_atomic((COND), (US), 0); \ - ret__; \ -}) - -#define wait_for_atomic_us(COND, US) \ -({ \ - BUILD_BUG_ON(!__builtin_constant_p(US)); \ - BUILD_BUG_ON((US) > 50000); \ - _wait_for_atomic((COND), (US), 1); \ -}) - -#define wait_for_atomic(COND, MS) wait_for_atomic_us((COND), (MS) * 1000) - #define KHz(x) (1000 * (x)) #define MHz(x) KHz(1000 * (x)) @@ -351,19 +149,6 @@ static inline void __add_taint_for_CI(unsigned int taint) add_taint(taint, LOCKDEP_STILL_OK); } -void cancel_timer(struct timer_list *t); -void set_timer_ms(struct timer_list *t, unsigned long timeout); - -static inline bool timer_active(const struct timer_list *t) -{ - return READ_ONCE(t->expires); -} - -static inline bool timer_expired(const struct timer_list *t) -{ - return timer_active(t) && !timer_pending(t); -} - static inline bool i915_run_as_guest(void) { #if IS_ENABLED(CONFIG_X86) diff --git a/drivers/gpu/drm/i915/i915_vma.h b/drivers/gpu/drm/i915/i915_vma.h index 0f9eee6d18d2..8054047840aa 100644 --- a/drivers/gpu/drm/i915/i915_vma.h +++ b/drivers/gpu/drm/i915/i915_vma.h @@ -30,12 +30,12 @@ #include <drm/drm_mm.h> -#include "gt/intel_ggtt_fencing.h" #include "gem/i915_gem_object.h" - -#include "i915_gem_gtt.h" +#include "gt/intel_ggtt_fencing.h" #include "i915_active.h" +#include "i915_gem_gtt.h" +#include "i915_ptr_util.h" #include "i915_request.h" #include "i915_vma_resource.h" #include "i915_vma_types.h" diff --git a/drivers/gpu/drm/i915/i915_wait_util.h b/drivers/gpu/drm/i915/i915_wait_util.h new file mode 100644 index 000000000000..7376898e3bf8 --- /dev/null +++ b/drivers/gpu/drm/i915/i915_wait_util.h @@ -0,0 +1,119 @@ +/* SPDX-License-Identifier: MIT */ +/* Copyright © 2025 Intel Corporation */ + +#ifndef __I915_WAIT_UTIL_H__ +#define __I915_WAIT_UTIL_H__ + +#include <linux/compiler.h> +#include <linux/delay.h> +#include <linux/ktime.h> +#include <linux/sched/clock.h> +#include <linux/smp.h> + +/* + * __wait_for - magic wait macro + * + * Macro to help avoid open coding check/wait/timeout patterns. Note that it's + * important that we check the condition again after having timed out, since the + * timeout could be due to preemption or similar and we've never had a chance to + * check the condition before the timeout. + */ +#define __wait_for(OP, COND, US, Wmin, Wmax) ({ \ + const ktime_t end__ = ktime_add_ns(ktime_get_raw(), 1000ll * (US)); \ + long wait__ = (Wmin); /* recommended min for usleep is 10 us */ \ + int ret__; \ + might_sleep(); \ + for (;;) { \ + const bool expired__ = ktime_after(ktime_get_raw(), end__); \ + OP; \ + /* Guarantee COND check prior to timeout */ \ + barrier(); \ + if (COND) { \ + ret__ = 0; \ + break; \ + } \ + if (expired__) { \ + ret__ = -ETIMEDOUT; \ + break; \ + } \ + usleep_range(wait__, wait__ * 2); \ + if (wait__ < (Wmax)) \ + wait__ <<= 1; \ + } \ + ret__; \ +}) + +#define _wait_for(COND, US, Wmin, Wmax) __wait_for(, (COND), (US), (Wmin), \ + (Wmax)) +#define wait_for(COND, MS) _wait_for((COND), (MS) * 1000, 10, 1000) + +/* + * If CONFIG_PREEMPT_COUNT is disabled, in_atomic() always reports false. + * On PREEMPT_RT the context isn't becoming atomic because it is used in an + * interrupt handler or because a spinlock_t is acquired. This leads to + * warnings which don't occur otherwise and therefore the check is disabled. + */ +#if IS_ENABLED(CONFIG_DRM_I915_DEBUG) && IS_ENABLED(CONFIG_PREEMPT_COUNT) && !defined(CONFIG_PREEMPT_RT) +# define _WAIT_FOR_ATOMIC_CHECK(ATOMIC) WARN_ON_ONCE((ATOMIC) && !in_atomic()) +#else +# define _WAIT_FOR_ATOMIC_CHECK(ATOMIC) do { } while (0) +#endif + +#define _wait_for_atomic(COND, US, ATOMIC) \ +({ \ + int cpu, ret, timeout = (US) * 1000; \ + u64 base; \ + _WAIT_FOR_ATOMIC_CHECK(ATOMIC); \ + if (!(ATOMIC)) { \ + preempt_disable(); \ + cpu = smp_processor_id(); \ + } \ + base = local_clock(); \ + for (;;) { \ + u64 now = local_clock(); \ + if (!(ATOMIC)) \ + preempt_enable(); \ + /* Guarantee COND check prior to timeout */ \ + barrier(); \ + if (COND) { \ + ret = 0; \ + break; \ + } \ + if (now - base >= timeout) { \ + ret = -ETIMEDOUT; \ + break; \ + } \ + cpu_relax(); \ + if (!(ATOMIC)) { \ + preempt_disable(); \ + if (unlikely(cpu != smp_processor_id())) { \ + timeout -= now - base; \ + cpu = smp_processor_id(); \ + base = local_clock(); \ + } \ + } \ + } \ + ret; \ +}) + +#define wait_for_us(COND, US) \ +({ \ + int ret__; \ + BUILD_BUG_ON(!__builtin_constant_p(US)); \ + if ((US) > 10) \ + ret__ = _wait_for((COND), (US), 10, 10); \ + else \ + ret__ = _wait_for_atomic((COND), (US), 0); \ + ret__; \ +}) + +#define wait_for_atomic_us(COND, US) \ +({ \ + BUILD_BUG_ON(!__builtin_constant_p(US)); \ + BUILD_BUG_ON((US) > 50000); \ + _wait_for_atomic((COND), (US), 1); \ +}) + +#define wait_for_atomic(COND, MS) wait_for_atomic_us((COND), (MS) * 1000) + +#endif /* __I915_WAIT_UTIL_H__ */ diff --git a/drivers/gpu/drm/i915/intel_pcode.c b/drivers/gpu/drm/i915/intel_pcode.c index 81da75108c60..55ffedad2490 100644 --- a/drivers/gpu/drm/i915/intel_pcode.c +++ b/drivers/gpu/drm/i915/intel_pcode.c @@ -5,6 +5,7 @@ #include "i915_drv.h" #include "i915_reg.h" +#include "i915_wait_util.h" #include "intel_pcode.h" static int gen6_check_mailbox_status(u32 mbox) diff --git a/drivers/gpu/drm/i915/intel_uncore.c b/drivers/gpu/drm/i915/intel_uncore.c index 4ccba7c8ffb3..8cb59f8d1f4c 100644 --- a/drivers/gpu/drm/i915/intel_uncore.c +++ b/drivers/gpu/drm/i915/intel_uncore.c @@ -21,19 +21,20 @@ * IN THE SOFTWARE. */ -#include <drm/drm_managed.h> #include <linux/pm_runtime.h> -#include "display/intel_display_core.h" +#include <drm/drm_managed.h> -#include "gt/intel_gt.h" +#include "display/intel_display_core.h" #include "gt/intel_engine_regs.h" +#include "gt/intel_gt.h" #include "gt/intel_gt_regs.h" #include "i915_drv.h" #include "i915_iosf_mbi.h" #include "i915_reg.h" #include "i915_vgpu.h" +#include "i915_wait_util.h" #include "intel_uncore_trace.h" #define FORCEWAKE_ACK_TIMEOUT_MS 50 diff --git a/drivers/gpu/drm/i915/pxp/intel_pxp.c b/drivers/gpu/drm/i915/pxp/intel_pxp.c index f8da693ad3ce..27d545c4e6a5 100644 --- a/drivers/gpu/drm/i915/pxp/intel_pxp.c +++ b/drivers/gpu/drm/i915/pxp/intel_pxp.c @@ -2,15 +2,15 @@ /* * Copyright(c) 2020 Intel Corporation. */ + #include <linux/workqueue.h> #include "gem/i915_gem_context.h" - #include "gt/intel_context.h" #include "gt/intel_gt.h" #include "i915_drv.h" - +#include "i915_wait_util.h" #include "intel_pxp.h" #include "intel_pxp_gsccs.h" #include "intel_pxp_irq.h" diff --git a/drivers/gpu/drm/i915/selftests/i915_request.c b/drivers/gpu/drm/i915/selftests/i915_request.c index 2fb7a9e7efec..48cd617247d1 100644 --- a/drivers/gpu/drm/i915/selftests/i915_request.c +++ b/drivers/gpu/drm/i915/selftests/i915_request.c @@ -22,14 +22,13 @@ * */ -#include <linux/prime_numbers.h> #include <linux/pm_qos.h> +#include <linux/prime_numbers.h> #include <linux/sort.h> #include "gem/i915_gem_internal.h" #include "gem/i915_gem_pm.h" #include "gem/selftests/mock_context.h" - #include "gt/intel_engine_heartbeat.h" #include "gt/intel_engine_pm.h" #include "gt/intel_engine_user.h" @@ -40,11 +39,11 @@ #include "i915_random.h" #include "i915_selftest.h" +#include "i915_wait_util.h" #include "igt_flush_test.h" #include "igt_live_test.h" #include "igt_spinner.h" #include "lib_sw_fence.h" - #include "mock_drm.h" #include "mock_gem_device.h" diff --git a/drivers/gpu/drm/i915/selftests/i915_selftest.c b/drivers/gpu/drm/i915/selftests/i915_selftest.c index 889281819c5b..9c276c9d0a75 100644 --- a/drivers/gpu/drm/i915/selftests/i915_selftest.c +++ b/drivers/gpu/drm/i915/selftests/i915_selftest.c @@ -31,7 +31,7 @@ #include "i915_driver.h" #include "i915_drv.h" #include "i915_selftest.h" - +#include "i915_wait_util.h" #include "igt_flush_test.h" struct i915_selftest i915_selftest __read_mostly = { diff --git a/drivers/gpu/drm/i915/selftests/igt_spinner.c b/drivers/gpu/drm/i915/selftests/igt_spinner.c index 8c3e1f20e5a1..820364171ebe 100644 --- a/drivers/gpu/drm/i915/selftests/igt_spinner.c +++ b/drivers/gpu/drm/i915/selftests/igt_spinner.c @@ -3,12 +3,13 @@ * * Copyright © 2018 Intel Corporation */ -#include "gt/intel_gpu_commands.h" -#include "gt/intel_gt.h" #include "gem/i915_gem_internal.h" #include "gem/selftests/igt_gem_utils.h" +#include "gt/intel_gpu_commands.h" +#include "gt/intel_gt.h" +#include "i915_wait_util.h" #include "igt_spinner.h" int igt_spinner_init(struct igt_spinner *spin, struct intel_gt *gt) diff --git a/drivers/gpu/drm/i915/soc/intel_dram.c b/drivers/gpu/drm/i915/soc/intel_dram.c index 149527827624..edffaed8f9a7 100644 --- a/drivers/gpu/drm/i915/soc/intel_dram.c +++ b/drivers/gpu/drm/i915/soc/intel_dram.c @@ -732,7 +732,7 @@ int intel_dram_detect(struct drm_i915_private *i915) struct dram_info *dram_info; int ret; - if (IS_DG2(i915) || !HAS_DISPLAY(display)) + if (IS_DG2(i915) || !intel_display_device_present(display)) return 0; dram_info = drmm_kzalloc(&i915->drm, sizeof(*dram_info), GFP_KERNEL); diff --git a/drivers/gpu/drm/i915/vlv_suspend.c b/drivers/gpu/drm/i915/vlv_suspend.c index fc9f311ea1db..221e4c0b2c58 100644 --- a/drivers/gpu/drm/i915/vlv_suspend.c +++ b/drivers/gpu/drm/i915/vlv_suspend.c @@ -8,16 +8,17 @@ #include <drm/drm_print.h> +#include "gt/intel_gt_regs.h" + #include "i915_drv.h" #include "i915_reg.h" #include "i915_trace.h" #include "i915_utils.h" +#include "i915_wait_util.h" #include "intel_clock_gating.h" #include "intel_uncore_trace.h" #include "vlv_suspend.h" -#include "gt/intel_gt_regs.h" - struct vlv_s0ix_state { /* GAM */ u32 wr_watermark; diff --git a/drivers/gpu/drm/msm/adreno/a6xx_catalog.c b/drivers/gpu/drm/msm/adreno/a6xx_catalog.c index 00e1afd46b81..44df6410bce1 100644 --- a/drivers/gpu/drm/msm/adreno/a6xx_catalog.c +++ b/drivers/gpu/drm/msm/adreno/a6xx_catalog.c @@ -913,6 +913,11 @@ static const struct adreno_info a6xx_gpus[] = { { /* sentinel */ }, }, }, + .speedbins = ADRENO_SPEEDBINS( + { 0, 0 }, + { 185, 0 }, + { 127, 1 }, + ), }, { .chip_ids = ADRENO_CHIP_IDS( 0x06030001, @@ -1024,6 +1029,11 @@ static const struct adreno_info a6xx_gpus[] = { .gmu_cgc_mode = 0x00020200, .prim_fifo_threshold = 0x00300200, }, + .speedbins = ADRENO_SPEEDBINS( + { 0, 0 }, + { 169, 0 }, + { 113, 1 }, + ), }, { .chip_ids = ADRENO_CHIP_IDS(0x06030500), .family = ADRENO_6XX_GEN4, @@ -1343,6 +1353,69 @@ static const uint32_t a7xx_pwrup_reglist_regs[] = { DECLARE_ADRENO_REGLIST_LIST(a7xx_pwrup_reglist); +/* Applicable for X185, A750 */ +static const u32 a750_ifpc_reglist_regs[] = { + REG_A6XX_TPL1_BICUBIC_WEIGHTS_TABLE(0), + REG_A6XX_TPL1_BICUBIC_WEIGHTS_TABLE(1), + REG_A6XX_TPL1_BICUBIC_WEIGHTS_TABLE(2), + REG_A6XX_TPL1_BICUBIC_WEIGHTS_TABLE(3), + REG_A6XX_TPL1_BICUBIC_WEIGHTS_TABLE(4), + REG_A6XX_TPL1_NC_MODE_CNTL, + REG_A6XX_SP_NC_MODE_CNTL, + REG_A6XX_CP_DBG_ECO_CNTL, + REG_A6XX_CP_PROTECT_CNTL, + REG_A6XX_CP_PROTECT(0), + REG_A6XX_CP_PROTECT(1), + REG_A6XX_CP_PROTECT(2), + REG_A6XX_CP_PROTECT(3), + REG_A6XX_CP_PROTECT(4), + REG_A6XX_CP_PROTECT(5), + REG_A6XX_CP_PROTECT(6), + REG_A6XX_CP_PROTECT(7), + REG_A6XX_CP_PROTECT(8), + REG_A6XX_CP_PROTECT(9), + REG_A6XX_CP_PROTECT(10), + REG_A6XX_CP_PROTECT(11), + REG_A6XX_CP_PROTECT(12), + REG_A6XX_CP_PROTECT(13), + REG_A6XX_CP_PROTECT(14), + REG_A6XX_CP_PROTECT(15), + REG_A6XX_CP_PROTECT(16), + REG_A6XX_CP_PROTECT(17), + REG_A6XX_CP_PROTECT(18), + REG_A6XX_CP_PROTECT(19), + REG_A6XX_CP_PROTECT(20), + REG_A6XX_CP_PROTECT(21), + REG_A6XX_CP_PROTECT(22), + REG_A6XX_CP_PROTECT(23), + REG_A6XX_CP_PROTECT(24), + REG_A6XX_CP_PROTECT(25), + REG_A6XX_CP_PROTECT(26), + REG_A6XX_CP_PROTECT(27), + REG_A6XX_CP_PROTECT(28), + REG_A6XX_CP_PROTECT(29), + REG_A6XX_CP_PROTECT(30), + REG_A6XX_CP_PROTECT(31), + REG_A6XX_CP_PROTECT(32), + REG_A6XX_CP_PROTECT(33), + REG_A6XX_CP_PROTECT(34), + REG_A6XX_CP_PROTECT(35), + REG_A6XX_CP_PROTECT(36), + REG_A6XX_CP_PROTECT(37), + REG_A6XX_CP_PROTECT(38), + REG_A6XX_CP_PROTECT(39), + REG_A6XX_CP_PROTECT(40), + REG_A6XX_CP_PROTECT(41), + REG_A6XX_CP_PROTECT(42), + REG_A6XX_CP_PROTECT(43), + REG_A6XX_CP_PROTECT(44), + REG_A6XX_CP_PROTECT(45), + REG_A6XX_CP_PROTECT(46), + REG_A6XX_CP_PROTECT(47), +}; + +DECLARE_ADRENO_REGLIST_LIST(a750_ifpc_reglist); + static const struct adreno_info a7xx_gpus[] = { { .chip_ids = ADRENO_CHIP_IDS(0x07000200), @@ -1432,14 +1505,27 @@ static const struct adreno_info a7xx_gpus[] = { .inactive_period = DRM_MSM_INACTIVE_PERIOD, .quirks = ADRENO_QUIRK_HAS_CACHED_COHERENT | ADRENO_QUIRK_HAS_HW_APRIV | - ADRENO_QUIRK_PREEMPTION, + ADRENO_QUIRK_PREEMPTION | + ADRENO_QUIRK_IFPC, .init = a6xx_gpu_init, .a6xx = &(const struct a6xx_info) { .hwcg = a740_hwcg, .protect = &a730_protect, .pwrup_reglist = &a7xx_pwrup_reglist, + .ifpc_reglist = &a750_ifpc_reglist, .gmu_chipid = 0x7050001, .gmu_cgc_mode = 0x00020202, + .bcms = (const struct a6xx_bcm[]) { + { .name = "SH0", .buswidth = 16 }, + { .name = "MC0", .buswidth = 4 }, + { + .name = "ACV", + .fixed = true, + .perfmode = BIT(3), + .perfmode_bw = 16500000, + }, + { /* sentinel */ }, + }, }, .preempt_record_size = 4192 * SZ_1K, .speedbins = ADRENO_SPEEDBINS( @@ -1460,12 +1546,14 @@ static const struct adreno_info a7xx_gpus[] = { .inactive_period = DRM_MSM_INACTIVE_PERIOD, .quirks = ADRENO_QUIRK_HAS_CACHED_COHERENT | ADRENO_QUIRK_HAS_HW_APRIV | - ADRENO_QUIRK_PREEMPTION, + ADRENO_QUIRK_PREEMPTION | + ADRENO_QUIRK_IFPC, .init = a6xx_gpu_init, .zapfw = "gen70900_zap.mbn", .a6xx = &(const struct a6xx_info) { .protect = &a730_protect, .pwrup_reglist = &a7xx_pwrup_reglist, + .ifpc_reglist = &a750_ifpc_reglist, .gmu_chipid = 0x7090100, .gmu_cgc_mode = 0x00020202, .bcms = (const struct a6xx_bcm[]) { diff --git a/drivers/gpu/drm/msm/adreno/a6xx_gmu.c b/drivers/gpu/drm/msm/adreno/a6xx_gmu.c index 28e6705c6da6..fc62fef2fed8 100644 --- a/drivers/gpu/drm/msm/adreno/a6xx_gmu.c +++ b/drivers/gpu/drm/msm/adreno/a6xx_gmu.c @@ -93,14 +93,25 @@ bool a6xx_gmu_sptprac_is_on(struct a6xx_gmu *gmu) /* Check to see if the GX rail is still powered */ bool a6xx_gmu_gx_is_on(struct a6xx_gmu *gmu) { + struct a6xx_gpu *a6xx_gpu = container_of(gmu, struct a6xx_gpu, gmu); + struct adreno_gpu *adreno_gpu = &a6xx_gpu->base; u32 val; /* This can be called from gpu state code so make sure GMU is valid */ if (!gmu->initialized) return false; + /* If GMU is absent, then GX power domain is ON as long as GPU is in active state */ + if (adreno_has_gmu_wrapper(adreno_gpu)) + return true; + val = gmu_read(gmu, REG_A6XX_GMU_SPTPRAC_PWR_CLK_STATUS); + if (adreno_is_a7xx(adreno_gpu)) + return !(val & + (A7XX_GMU_SPTPRAC_PWR_CLK_STATUS_GX_HM_GDSC_POWER_OFF | + A7XX_GMU_SPTPRAC_PWR_CLK_STATUS_GX_HM_CLK_OFF)); + return !(val & (A6XX_GMU_SPTPRAC_PWR_CLK_STATUS_GX_HM_GDSC_POWER_OFF | A6XX_GMU_SPTPRAC_PWR_CLK_STATUS_GX_HM_CLK_OFF)); @@ -272,6 +283,8 @@ static int a6xx_gmu_start(struct a6xx_gmu *gmu) if (ret) DRM_DEV_ERROR(gmu->dev, "GMU firmware initialization timed out\n"); + set_bit(GMU_STATUS_FW_START, &gmu->status); + return ret; } @@ -403,7 +416,10 @@ int a6xx_sptprac_enable(struct a6xx_gmu *gmu) int ret; u32 val; - if (!gmu->legacy) + WARN_ON(!gmu->legacy); + + /* Nothing to do if GMU does the power management */ + if (gmu->idle_level > GMU_IDLE_STATE_ACTIVE) return 0; gmu_write(gmu, REG_A6XX_GMU_GX_SPTPRAC_POWER_CONTROL, 0x778000); @@ -518,6 +534,9 @@ static int a6xx_rpmh_start(struct a6xx_gmu *gmu) int ret; u32 val; + if (!test_and_clear_bit(GMU_STATUS_PDC_SLEEP, &gmu->status)) + return 0; + gmu_write(gmu, REG_A6XX_GMU_RSCC_CONTROL_REQ, BIT(1)); ret = gmu_poll_timeout(gmu, REG_A6XX_GMU_RSCC_CONTROL_ACK, val, @@ -545,6 +564,9 @@ static void a6xx_rpmh_stop(struct a6xx_gmu *gmu) int ret; u32 val; + if (test_and_clear_bit(GMU_STATUS_FW_START, &gmu->status)) + return; + gmu_write(gmu, REG_A6XX_GMU_RSCC_CONTROL_REQ, 1); ret = gmu_poll_timeout_rscc(gmu, REG_A6XX_GPU_RSCC_RSC_STATUS0_DRV0, @@ -553,6 +575,8 @@ static void a6xx_rpmh_stop(struct a6xx_gmu *gmu) DRM_DEV_ERROR(gmu->dev, "Unable to power off the GPU RSC\n"); gmu_write(gmu, REG_A6XX_GMU_RSCC_CONTROL_REQ, 0); + + set_bit(GMU_STATUS_PDC_SLEEP, &gmu->status); } static inline void pdc_write(void __iomem *ptr, u32 offset, u32 value) @@ -681,8 +705,6 @@ setup_pdc: /* ensure no writes happen before the uCode is fully written */ wmb(); - a6xx_rpmh_stop(gmu); - err: if (!IS_ERR_OR_NULL(pdcptr)) iounmap(pdcptr); @@ -842,19 +864,15 @@ static int a6xx_gmu_fw_start(struct a6xx_gmu *gmu, unsigned int state) else gmu_write(gmu, REG_A6XX_GMU_GENERAL_7, 1); - if (state == GMU_WARM_BOOT) { - ret = a6xx_rpmh_start(gmu); - if (ret) - return ret; - } else { + ret = a6xx_rpmh_start(gmu); + if (ret) + return ret; + + if (state == GMU_COLD_BOOT) { if (WARN(!adreno_gpu->fw[ADRENO_FW_GMU], "GMU firmware is not loaded\n")) return -ENOENT; - ret = a6xx_rpmh_start(gmu); - if (ret) - return ret; - ret = a6xx_gmu_fw_load(gmu); if (ret) return ret; @@ -925,10 +943,7 @@ static int a6xx_gmu_fw_start(struct a6xx_gmu *gmu, unsigned int state) ret = a6xx_gmu_gfx_rail_on(gmu); if (ret) return ret; - } - /* Enable SPTP_PC if the CPU is responsible for it */ - if (gmu->idle_level < GMU_IDLE_STATE_SPTP) { ret = a6xx_sptprac_enable(gmu); if (ret) return ret; @@ -980,6 +995,22 @@ static void a6xx_gmu_rpmh_off(struct a6xx_gmu *gmu) val, (val & 1), 100, 10000); gmu_poll_timeout_rscc(gmu, REG_A6XX_RSCC_TCS3_DRV0_STATUS + seqmem_off, val, (val & 1), 100, 1000); + + if (!adreno_is_a740_family(adreno_gpu)) + return; + + gmu_poll_timeout_rscc(gmu, REG_A7XX_RSCC_TCS4_DRV0_STATUS + seqmem_off, + val, (val & 1), 100, 10000); + gmu_poll_timeout_rscc(gmu, REG_A7XX_RSCC_TCS5_DRV0_STATUS + seqmem_off, + val, (val & 1), 100, 10000); + gmu_poll_timeout_rscc(gmu, REG_A7XX_RSCC_TCS6_DRV0_STATUS + seqmem_off, + val, (val & 1), 100, 10000); + gmu_poll_timeout_rscc(gmu, REG_A7XX_RSCC_TCS7_DRV0_STATUS + seqmem_off, + val, (val & 1), 100, 1000); + gmu_poll_timeout_rscc(gmu, REG_A7XX_RSCC_TCS8_DRV0_STATUS + seqmem_off, + val, (val & 1), 100, 10000); + gmu_poll_timeout_rscc(gmu, REG_A7XX_RSCC_TCS9_DRV0_STATUS + seqmem_off, + val, (val & 1), 100, 1000); } /* Force the GMU off in case it isn't responsive */ @@ -1023,6 +1054,8 @@ static void a6xx_gmu_force_off(struct a6xx_gmu *gmu) /* Reset GPU core blocks */ a6xx_gpu_sw_reset(gpu, true); + + a6xx_rpmh_stop(gmu); } static void a6xx_gmu_set_initial_freq(struct msm_gpu *gpu, struct a6xx_gmu *gmu) @@ -1128,6 +1161,11 @@ int a6xx_gmu_resume(struct a6xx_gpu *a6xx_gpu) /* Set the GPU to the current freq */ a6xx_gmu_set_initial_freq(gpu, gmu); + if (refcount_read(&gpu->sysprof_active) > 1) { + ret = a6xx_gmu_set_oob(gmu, GMU_OOB_PERFCOUNTER_SET); + if (!ret) + set_bit(GMU_STATUS_OOB_PERF_SET, &gmu->status); + } out: /* On failure, shut down the GMU to leave it in a good state */ if (ret) { @@ -1175,6 +1213,9 @@ static void a6xx_gmu_shutdown(struct a6xx_gmu *gmu) a6xx_gmu_clear_oob(&a6xx_gpu->gmu, GMU_OOB_GPU_SET); } + if (test_and_clear_bit(GMU_STATUS_OOB_PERF_SET, &gmu->status)) + a6xx_gmu_clear_oob(gmu, GMU_OOB_PERFCOUNTER_SET); + ret = a6xx_gmu_wait_for_idle(gmu); /* If the GMU isn't responding assume it is hung */ @@ -1318,8 +1359,6 @@ static int a6xx_gmu_memory_probe(struct drm_device *drm, struct a6xx_gmu *gmu) struct msm_mmu *mmu; mmu = msm_iommu_new(gmu->dev, 0); - if (!mmu) - return -ENODEV; if (IS_ERR(mmu)) return PTR_ERR(mmu); @@ -1692,6 +1731,7 @@ static int a6xx_gmu_acd_probe(struct a6xx_gmu *gmu) u32 val; freq = gmu->gpu_freqs[i]; + /* This is unlikely to fail because we are passing back a known freq */ opp = dev_pm_opp_find_freq_exact(&gpu->pdev->dev, freq, true); np = dev_pm_opp_get_of_node(opp); @@ -1790,6 +1830,35 @@ static int a6xx_gmu_get_irq(struct a6xx_gmu *gmu, struct platform_device *pdev, return irq; } +void a6xx_gmu_sysprof_setup(struct msm_gpu *gpu) +{ + struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu); + struct a6xx_gpu *a6xx_gpu = to_a6xx_gpu(adreno_gpu); + struct a6xx_gmu *gmu = &a6xx_gpu->gmu; + unsigned int sysprof_active; + + /* Nothing to do if GPU is suspended. We will handle this during GMU resume */ + if (!pm_runtime_get_if_active(&gpu->pdev->dev)) + return; + + mutex_lock(&gmu->lock); + + sysprof_active = refcount_read(&gpu->sysprof_active); + + /* + * 'Perfcounter select' register values are lost during IFPC collapse. To avoid that, + * use the currently unused perfcounter oob vote to block IFPC when sysprof is active + */ + if ((sysprof_active > 1) && !test_and_set_bit(GMU_STATUS_OOB_PERF_SET, &gmu->status)) + a6xx_gmu_set_oob(gmu, GMU_OOB_PERFCOUNTER_SET); + else if ((sysprof_active == 1) && test_and_clear_bit(GMU_STATUS_OOB_PERF_SET, &gmu->status)) + a6xx_gmu_clear_oob(gmu, GMU_OOB_PERFCOUNTER_SET); + + mutex_unlock(&gmu->lock); + + pm_runtime_put(&gpu->pdev->dev); +} + void a6xx_gmu_remove(struct a6xx_gpu *a6xx_gpu) { struct adreno_gpu *adreno_gpu = &a6xx_gpu->base; @@ -1932,8 +2001,9 @@ int a6xx_gmu_init(struct a6xx_gpu *a6xx_gpu, struct device_node *node) if (ret) return ret; - /* Fow now, don't do anything fancy until we get our feet under us */ - gmu->idle_level = GMU_IDLE_STATE_ACTIVE; + /* Set GMU idle level */ + gmu->idle_level = (adreno_gpu->info->quirks & ADRENO_QUIRK_IFPC) ? + GMU_IDLE_STATE_IFPC : GMU_IDLE_STATE_ACTIVE; pm_runtime_enable(gmu->dev); diff --git a/drivers/gpu/drm/msm/adreno/a6xx_gmu.h b/drivers/gpu/drm/msm/adreno/a6xx_gmu.h index d1ce11131ba6..06cfc294016f 100644 --- a/drivers/gpu/drm/msm/adreno/a6xx_gmu.h +++ b/drivers/gpu/drm/msm/adreno/a6xx_gmu.h @@ -50,6 +50,9 @@ struct a6xx_bcm { /* The GMU does not do any idle state management */ #define GMU_IDLE_STATE_ACTIVE 0 +/* Unknown power state. Not exposed by the firmware. For documentation purpose only */ +#define GMU_IDLE_STATE_RESERVED 1 + /* The GMU manages SPTP power collapse */ #define GMU_IDLE_STATE_SPTP 2 @@ -117,6 +120,14 @@ struct a6xx_gmu { struct qmp *qmp; struct a6xx_hfi_msg_bw_table *bw_table; + +/* To check if we can trigger sleep seq at PDC. Cleared in a6xx_rpmh_stop() */ +#define GMU_STATUS_FW_START 0 +/* To track if PDC sleep seq was done */ +#define GMU_STATUS_PDC_SLEEP 1 +/* To track Perfcounter OOB set status */ +#define GMU_STATUS_OOB_PERF_SET 2 + unsigned long status; }; static inline u32 gmu_read(struct a6xx_gmu *gmu, u32 offset) @@ -158,6 +169,9 @@ static inline u64 gmu_read64(struct a6xx_gmu *gmu, u32 lo, u32 hi) #define gmu_poll_timeout(gmu, addr, val, cond, interval, timeout) \ readl_poll_timeout((gmu)->mmio + ((addr) << 2), val, cond, \ interval, timeout) +#define gmu_poll_timeout_atomic(gmu, addr, val, cond, interval, timeout) \ + readl_poll_timeout_atomic((gmu)->mmio + ((addr) << 2), val, cond, \ + interval, timeout) static inline u32 gmu_read_rscc(struct a6xx_gmu *gmu, u32 offset) { diff --git a/drivers/gpu/drm/msm/adreno/a6xx_gpu.c b/drivers/gpu/drm/msm/adreno/a6xx_gpu.c index 45dd5fd1c2bf..b8f8ae940b55 100644 --- a/drivers/gpu/drm/msm/adreno/a6xx_gpu.c +++ b/drivers/gpu/drm/msm/adreno/a6xx_gpu.c @@ -16,6 +16,97 @@ #define GPU_PAS_ID 13 +static u64 read_gmu_ao_counter(struct a6xx_gpu *a6xx_gpu) +{ + u64 count_hi, count_lo, temp; + + do { + count_hi = gmu_read(&a6xx_gpu->gmu, REG_A6XX_GMU_ALWAYS_ON_COUNTER_H); + count_lo = gmu_read(&a6xx_gpu->gmu, REG_A6XX_GMU_ALWAYS_ON_COUNTER_L); + temp = gmu_read(&a6xx_gpu->gmu, REG_A6XX_GMU_ALWAYS_ON_COUNTER_H); + } while (unlikely(count_hi != temp)); + + return (count_hi << 32) | count_lo; +} + +static bool fence_status_check(struct msm_gpu *gpu, u32 offset, u32 value, u32 status, u32 mask) +{ + /* Success if !writedropped0/1 */ + if (!(status & mask)) + return true; + + udelay(10); + + /* Try to update fenced register again */ + gpu_write(gpu, offset, value); + + /* We can't do a posted write here because the power domain could be + * in collapse state. So use the heaviest barrier instead + */ + mb(); + return false; +} + +static int fenced_write(struct a6xx_gpu *a6xx_gpu, u32 offset, u32 value, u32 mask) +{ + struct adreno_gpu *adreno_gpu = &a6xx_gpu->base; + struct msm_gpu *gpu = &adreno_gpu->base; + struct a6xx_gmu *gmu = &a6xx_gpu->gmu; + u32 status; + + gpu_write(gpu, offset, value); + + /* Nothing else to be done in the case of no-GMU */ + if (adreno_has_gmu_wrapper(adreno_gpu)) + return 0; + + /* We can't do a posted write here because the power domain could be + * in collapse state. So use the heaviest barrier instead + */ + mb(); + + if (!gmu_poll_timeout(gmu, REG_A6XX_GMU_AHB_FENCE_STATUS, status, + fence_status_check(gpu, offset, value, status, mask), 0, 1000)) + return 0; + + /* Try again for another 1ms before failing */ + gpu_write(gpu, offset, value); + mb(); + + if (!gmu_poll_timeout(gmu, REG_A6XX_GMU_AHB_FENCE_STATUS, status, + fence_status_check(gpu, offset, value, status, mask), 0, 1000)) { + /* + * The 'delay' warning is here because the pause to print this + * warning will allow gpu to move to power collapse which + * defeats the purpose of continuous polling for 2 ms + */ + dev_err_ratelimited(gmu->dev, "delay in fenced register write (0x%x)\n", + offset); + return 0; + } + + dev_err_ratelimited(gmu->dev, "fenced register write (0x%x) fail\n", + offset); + + return -ETIMEDOUT; +} + +int a6xx_fenced_write(struct a6xx_gpu *a6xx_gpu, u32 offset, u64 value, u32 mask, bool is_64b) +{ + int ret; + + ret = fenced_write(a6xx_gpu, offset, lower_32_bits(value), mask); + if (ret) + return ret; + + if (!is_64b) + return 0; + + ret = fenced_write(a6xx_gpu, offset + 1, upper_32_bits(value), mask); + + return ret; +} + static inline bool _a6xx_check_idle(struct msm_gpu *gpu) { struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu); @@ -86,7 +177,7 @@ static void a6xx_flush(struct msm_gpu *gpu, struct msm_ringbuffer *ring) /* Update HW if this is the current ring and we are not in preempt*/ if (!a6xx_in_preempt(a6xx_gpu)) { if (a6xx_gpu->cur_ring == ring) - gpu_write(gpu, REG_A6XX_CP_RB_WPTR, wptr); + a6xx_fenced_write(a6xx_gpu, REG_A6XX_CP_RB_WPTR, wptr, BIT(0), false); else ring->restore_wptr = true; } else { @@ -173,8 +264,8 @@ static void a6xx_set_pagetable(struct a6xx_gpu *a6xx_gpu, * Needed for preemption */ OUT_PKT7(ring, CP_MEM_WRITE, 5); - OUT_RING(ring, CP_MEM_WRITE_0_ADDR_LO(lower_32_bits(memptr))); - OUT_RING(ring, CP_MEM_WRITE_1_ADDR_HI(upper_32_bits(memptr))); + OUT_RING(ring, A5XX_CP_MEM_WRITE_ADDR_LO(lower_32_bits(memptr))); + OUT_RING(ring, A5XX_CP_MEM_WRITE_ADDR_HI(upper_32_bits(memptr))); OUT_RING(ring, lower_32_bits(ttbr)); OUT_RING(ring, upper_32_bits(ttbr)); OUT_RING(ring, ctx->seqno); @@ -204,9 +295,9 @@ static void a6xx_set_pagetable(struct a6xx_gpu *a6xx_gpu, */ OUT_PKT7(ring, CP_WAIT_REG_MEM, 6); OUT_RING(ring, CP_WAIT_REG_MEM_0_FUNCTION(WRITE_EQ)); - OUT_RING(ring, CP_WAIT_REG_MEM_1_POLL_ADDR_LO( + OUT_RING(ring, CP_WAIT_REG_MEM_POLL_ADDR_LO( REG_A6XX_RBBM_PERFCTR_SRAM_INIT_STATUS)); - OUT_RING(ring, CP_WAIT_REG_MEM_2_POLL_ADDR_HI(0)); + OUT_RING(ring, CP_WAIT_REG_MEM_POLL_ADDR_HI(0)); OUT_RING(ring, CP_WAIT_REG_MEM_3_REF(0x1)); OUT_RING(ring, CP_WAIT_REG_MEM_4_MASK(0x1)); OUT_RING(ring, CP_WAIT_REG_MEM_5_DELAY_LOOP_CYCLES(0)); @@ -298,8 +389,7 @@ static void a6xx_submit(struct msm_gpu *gpu, struct msm_gem_submit *submit) OUT_RING(ring, upper_32_bits(rbmemptr(ring, fence))); OUT_RING(ring, submit->seqno); - trace_msm_gpu_submit_flush(submit, - gpu_read64(gpu, REG_A6XX_CP_ALWAYS_ON_COUNTER)); + trace_msm_gpu_submit_flush(submit, read_gmu_ao_counter(a6xx_gpu)); a6xx_flush(gpu, ring); } @@ -499,8 +589,7 @@ static void a7xx_submit(struct msm_gpu *gpu, struct msm_gem_submit *submit) } - trace_msm_gpu_submit_flush(submit, - gpu_read64(gpu, REG_A6XX_CP_ALWAYS_ON_COUNTER)); + trace_msm_gpu_submit_flush(submit, read_gmu_ao_counter(a6xx_gpu)); a6xx_flush(gpu, ring); @@ -739,11 +828,10 @@ static void a7xx_patch_pwrup_reglist(struct msm_gpu *gpu) u32 *dest = (u32 *)&lock->regs[0]; int i; - reglist = adreno_gpu->info->a6xx->pwrup_reglist; - lock->gpu_req = lock->cpu_req = lock->turn = 0; - lock->ifpc_list_len = 0; - lock->preemption_list_len = reglist->count; + + reglist = adreno_gpu->info->a6xx->ifpc_reglist; + lock->ifpc_list_len = reglist->count; /* * For each entry in each of the lists, write the offset and the current @@ -754,6 +842,14 @@ static void a7xx_patch_pwrup_reglist(struct msm_gpu *gpu) *dest++ = gpu_read(gpu, reglist->regs[i]); } + reglist = adreno_gpu->info->a6xx->pwrup_reglist; + lock->preemption_list_len = reglist->count; + + for (i = 0; i < reglist->count; i++) { + *dest++ = reglist->regs[i]; + *dest++ = gpu_read(gpu, reglist->regs[i]); + } + /* * The overall register list is composed of * 1. Static IFPC-only registers @@ -1241,14 +1337,14 @@ static int hw_init(struct msm_gpu *gpu) /* Set weights for bicubic filtering */ if (adreno_is_a650_family(adreno_gpu) || adreno_is_x185(adreno_gpu)) { - gpu_write(gpu, REG_A6XX_TPL1_BICUBIC_WEIGHTS_TABLE_0, 0); - gpu_write(gpu, REG_A6XX_TPL1_BICUBIC_WEIGHTS_TABLE_1, + gpu_write(gpu, REG_A6XX_TPL1_BICUBIC_WEIGHTS_TABLE(0), 0); + gpu_write(gpu, REG_A6XX_TPL1_BICUBIC_WEIGHTS_TABLE(1), 0x3fe05ff4); - gpu_write(gpu, REG_A6XX_TPL1_BICUBIC_WEIGHTS_TABLE_2, + gpu_write(gpu, REG_A6XX_TPL1_BICUBIC_WEIGHTS_TABLE(2), 0x3fa0ebee); - gpu_write(gpu, REG_A6XX_TPL1_BICUBIC_WEIGHTS_TABLE_3, + gpu_write(gpu, REG_A6XX_TPL1_BICUBIC_WEIGHTS_TABLE(3), 0x3f5193ed); - gpu_write(gpu, REG_A6XX_TPL1_BICUBIC_WEIGHTS_TABLE_4, + gpu_write(gpu, REG_A6XX_TPL1_BICUBIC_WEIGHTS_TABLE(4), 0x3f0243f0); } @@ -1448,21 +1544,25 @@ static void a6xx_recover(struct msm_gpu *gpu) adreno_dump_info(gpu); - for (i = 0; i < 8; i++) - DRM_DEV_INFO(&gpu->pdev->dev, "CP_SCRATCH_REG%d: %u\n", i, - gpu_read(gpu, REG_A6XX_CP_SCRATCH_REG(i))); + if (a6xx_gmu_gx_is_on(&a6xx_gpu->gmu)) { + /* Sometimes crashstate capture is skipped, so SQE should be halted here again */ + gpu_write(gpu, REG_A6XX_CP_SQE_CNTL, 3); + + for (i = 0; i < 8; i++) + DRM_DEV_INFO(&gpu->pdev->dev, "CP_SCRATCH_REG%d: %u\n", i, + gpu_read(gpu, REG_A6XX_CP_SCRATCH_REG(i))); - if (hang_debug) - a6xx_dump(gpu); + if (hang_debug) + a6xx_dump(gpu); + + } /* * To handle recovery specific sequences during the rpm suspend we are * about to trigger */ - a6xx_gpu->hung = true; - /* Halt SQE first */ - gpu_write(gpu, REG_A6XX_CP_SQE_CNTL, 3); + a6xx_gpu->hung = true; pm_runtime_dont_use_autosuspend(&gpu->pdev->dev); @@ -1693,8 +1793,6 @@ static void a6xx_cp_hw_err_irq(struct msm_gpu *gpu) static void a6xx_fault_detect_irq(struct msm_gpu *gpu) { - struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu); - struct a6xx_gpu *a6xx_gpu = to_a6xx_gpu(adreno_gpu); struct msm_ringbuffer *ring = gpu->funcs->active_ring(gpu); /* @@ -1706,13 +1804,6 @@ static void a6xx_fault_detect_irq(struct msm_gpu *gpu) if (gpu_read(gpu, REG_A6XX_RBBM_STATUS3) & A6XX_RBBM_STATUS3_SMMU_STALLED_ON_FAULT) return; - /* - * Force the GPU to stay on until after we finish - * collecting information - */ - if (!adreno_has_gmu_wrapper(adreno_gpu)) - gmu_write(&a6xx_gpu->gmu, REG_A6XX_GMU_GMU_PWR_COL_KEEPALIVE, 1); - DRM_DEV_ERROR(&gpu->pdev->dev, "gpu fault ring %d fence %x status %8.8X rb %4.4x/%4.4x ib1 %16.16llX/%4.4x ib2 %16.16llX/%4.4x\n", ring ? ring->id : -1, ring ? ring->fctx->last_fence : 0, @@ -1727,6 +1818,9 @@ static void a6xx_fault_detect_irq(struct msm_gpu *gpu) /* Turn off the hangcheck timer to keep it from bothering us */ timer_delete(&gpu->hangcheck_timer); + /* Turn off interrupts to avoid triggering recovery again */ + gpu_write(gpu, REG_A6XX_RBBM_INT_0_MASK, 0); + kthread_queue_work(gpu->worker, &gpu->recover_work); } @@ -1751,9 +1845,49 @@ static void a7xx_sw_fuse_violation_irq(struct msm_gpu *gpu) } } +static void a6xx_gpu_keepalive_vote(struct msm_gpu *gpu, bool on) +{ + struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu); + struct a6xx_gpu *a6xx_gpu = to_a6xx_gpu(adreno_gpu); + + if (adreno_has_gmu_wrapper(adreno_gpu)) + return; + + gmu_write(&a6xx_gpu->gmu, REG_A6XX_GMU_GMU_PWR_COL_KEEPALIVE, on); +} + +static int irq_poll_fence(struct msm_gpu *gpu) +{ + struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu); + struct a6xx_gpu *a6xx_gpu = to_a6xx_gpu(adreno_gpu); + struct a6xx_gmu *gmu = &a6xx_gpu->gmu; + u32 status; + + if (adreno_has_gmu_wrapper(adreno_gpu)) + return 0; + + if (gmu_poll_timeout_atomic(gmu, REG_A6XX_GMU_AO_AHB_FENCE_CTRL, status, !status, 1, 100)) { + u32 rbbm_unmasked = gmu_read(gmu, REG_A6XX_GMU_RBBM_INT_UNMASKED_STATUS); + + dev_err_ratelimited(&gpu->pdev->dev, + "irq fence poll timeout, fence_ctrl=0x%x, unmasked_status=0x%x\n", + status, rbbm_unmasked); + return -ETIMEDOUT; + } + + return 0; +} + static irqreturn_t a6xx_irq(struct msm_gpu *gpu) { struct msm_drm_private *priv = gpu->dev->dev_private; + + /* Set keepalive vote to avoid power collapse after RBBM_INT_0_STATUS is read */ + a6xx_gpu_keepalive_vote(gpu, true); + + if (irq_poll_fence(gpu)) + goto done; + u32 status = gpu_read(gpu, REG_A6XX_RBBM_INT_0_STATUS); gpu_write(gpu, REG_A6XX_RBBM_INT_CLEAR_CMD, status); @@ -1790,6 +1924,9 @@ static irqreturn_t a6xx_irq(struct msm_gpu *gpu) if (status & A6XX_RBBM_INT_0_MASK_CP_SW) a6xx_preempt_irq(gpu); +done: + a6xx_gpu_keepalive_vote(gpu, false); + return IRQ_HANDLED; } @@ -2179,16 +2316,7 @@ static int a6xx_gmu_get_timestamp(struct msm_gpu *gpu, uint64_t *value) struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu); struct a6xx_gpu *a6xx_gpu = to_a6xx_gpu(adreno_gpu); - mutex_lock(&a6xx_gpu->gmu.lock); - - /* Force the GPU power on so we can read this register */ - a6xx_gmu_set_oob(&a6xx_gpu->gmu, GMU_OOB_PERFCOUNTER_SET); - - *value = gpu_read64(gpu, REG_A6XX_CP_ALWAYS_ON_COUNTER); - - a6xx_gmu_clear_oob(&a6xx_gpu->gmu, GMU_OOB_PERFCOUNTER_SET); - - mutex_unlock(&a6xx_gpu->gmu.lock); + *value = read_gmu_ao_counter(a6xx_gpu); return 0; } @@ -2298,18 +2426,36 @@ static uint32_t a6xx_get_rptr(struct msm_gpu *gpu, struct msm_ringbuffer *ring) if (adreno_gpu->base.hw_apriv || a6xx_gpu->has_whereami) return a6xx_gpu->shadow[ring->id]; + /* + * This is true only on an A6XX_GEN1 with GMU, has IFPC enabled and a super old SQE firmware + * without 'whereami' support + */ + WARN_ONCE((to_adreno_gpu(gpu)->info->quirks & ADRENO_QUIRK_IFPC), + "Can't read CP_RB_RPTR register reliably\n"); + return ring->memptrs->rptr = gpu_read(gpu, REG_A6XX_CP_RB_RPTR); } static bool a6xx_progress(struct msm_gpu *gpu, struct msm_ringbuffer *ring) { - struct msm_cp_state cp_state = { + struct msm_cp_state cp_state; + bool progress; + + /* + * With IFPC, KMD doesn't know whether GX power domain is collapsed + * or not. So, we can't blindly read the below registers in GX domain. + * Lets trust the hang detection in HW and lie to the caller that + * there was progress. + */ + if (to_adreno_gpu(gpu)->info->quirks & ADRENO_QUIRK_IFPC) + return true; + + cp_state = (struct msm_cp_state) { .ib1_base = gpu_read64(gpu, REG_A6XX_CP_IB1_BASE), .ib2_base = gpu_read64(gpu, REG_A6XX_CP_IB2_BASE), .ib1_rem = gpu_read(gpu, REG_A6XX_CP_IB1_REM_SIZE), .ib2_rem = gpu_read(gpu, REG_A6XX_CP_IB2_REM_SIZE), }; - bool progress; /* * Adjust the remaining data to account for what has already been @@ -2408,6 +2554,7 @@ static const struct adreno_gpu_funcs funcs = { .create_private_vm = a6xx_create_private_vm, .get_rptr = a6xx_get_rptr, .progress = a6xx_progress, + .sysprof_setup = a6xx_gmu_sysprof_setup, }, .get_timestamp = a6xx_gmu_get_timestamp, }; @@ -2468,6 +2615,7 @@ static const struct adreno_gpu_funcs funcs_a7xx = { .create_private_vm = a6xx_create_private_vm, .get_rptr = a6xx_get_rptr, .progress = a6xx_progress, + .sysprof_setup = a6xx_gmu_sysprof_setup, }, .get_timestamp = a6xx_gmu_get_timestamp, }; diff --git a/drivers/gpu/drm/msm/adreno/a6xx_gpu.h b/drivers/gpu/drm/msm/adreno/a6xx_gpu.h index 6e71f617fc3d..0b17d36c36a9 100644 --- a/drivers/gpu/drm/msm/adreno/a6xx_gpu.h +++ b/drivers/gpu/drm/msm/adreno/a6xx_gpu.h @@ -45,6 +45,7 @@ struct a6xx_info { const struct adreno_reglist *hwcg; const struct adreno_protect *protect; const struct adreno_reglist_list *pwrup_reglist; + const struct adreno_reglist_list *ifpc_reglist; u32 gmu_chipid; u32 gmu_cgc_mode; u32 prim_fifo_threshold; @@ -254,6 +255,7 @@ void a6xx_gmu_clear_oob(struct a6xx_gmu *gmu, enum a6xx_gmu_oob_state state); int a6xx_gmu_init(struct a6xx_gpu *a6xx_gpu, struct device_node *node); int a6xx_gmu_wrapper_init(struct a6xx_gpu *a6xx_gpu, struct device_node *node); void a6xx_gmu_remove(struct a6xx_gpu *a6xx_gpu); +void a6xx_gmu_sysprof_setup(struct msm_gpu *gpu); void a6xx_preempt_init(struct msm_gpu *gpu); void a6xx_preempt_hw_init(struct msm_gpu *gpu); @@ -295,5 +297,6 @@ int a6xx_gpu_state_put(struct msm_gpu_state *state); void a6xx_bus_clear_pending_transactions(struct adreno_gpu *adreno_gpu, bool gx_off); void a6xx_gpu_sw_reset(struct msm_gpu *gpu, bool assert); +int a6xx_fenced_write(struct a6xx_gpu *gpu, u32 offset, u64 value, u32 mask, bool is_64b); #endif /* __A6XX_GPU_H__ */ diff --git a/drivers/gpu/drm/msm/adreno/a6xx_gpu_state.c b/drivers/gpu/drm/msm/adreno/a6xx_gpu_state.c index d5d1271fce61..4c7f3c642f6a 100644 --- a/drivers/gpu/drm/msm/adreno/a6xx_gpu_state.c +++ b/drivers/gpu/drm/msm/adreno/a6xx_gpu_state.c @@ -1586,8 +1586,7 @@ struct msm_gpu_state *a6xx_gpu_state_get(struct msm_gpu *gpu) struct a6xx_gpu *a6xx_gpu = to_a6xx_gpu(adreno_gpu); struct a6xx_gpu_state *a6xx_state = kzalloc(sizeof(*a6xx_state), GFP_KERNEL); - bool stalled = !!(gpu_read(gpu, REG_A6XX_RBBM_STATUS3) & - A6XX_RBBM_STATUS3_SMMU_STALLED_ON_FAULT); + bool stalled; if (!a6xx_state) return ERR_PTR(-ENOMEM); @@ -1608,15 +1607,20 @@ struct msm_gpu_state *a6xx_gpu_state_get(struct msm_gpu *gpu) } /* If GX isn't on the rest of the data isn't going to be accessible */ - if (!adreno_has_gmu_wrapper(adreno_gpu) && !a6xx_gmu_gx_is_on(&a6xx_gpu->gmu)) + if (!a6xx_gmu_gx_is_on(&a6xx_gpu->gmu)) return &a6xx_state->base; + /* Halt SQE first */ + gpu_write(gpu, REG_A6XX_CP_SQE_CNTL, 3); + /* Get the banks of indexed registers */ if (adreno_is_a7xx(adreno_gpu)) a7xx_get_indexed_registers(gpu, a6xx_state); else a6xx_get_indexed_registers(gpu, a6xx_state); + stalled = !!(gpu_read(gpu, REG_A6XX_RBBM_STATUS3) & + A6XX_RBBM_STATUS3_SMMU_STALLED_ON_FAULT); /* * Try to initialize the crashdumper, if we are not dumping state * with the SMMU stalled. The crashdumper needs memory access to diff --git a/drivers/gpu/drm/msm/adreno/a6xx_hfi.c b/drivers/gpu/drm/msm/adreno/a6xx_hfi.c index 8e69b1e84657..550de6ad68ef 100644 --- a/drivers/gpu/drm/msm/adreno/a6xx_hfi.c +++ b/drivers/gpu/drm/msm/adreno/a6xx_hfi.c @@ -21,6 +21,7 @@ static const char * const a6xx_hfi_msg_id[] = { HFI_MSG_ID(HFI_H2F_MSG_PERF_TABLE), HFI_MSG_ID(HFI_H2F_MSG_TEST), HFI_MSG_ID(HFI_H2F_MSG_START), + HFI_MSG_ID(HFI_H2F_FEATURE_CTRL), HFI_MSG_ID(HFI_H2F_MSG_CORE_FW_START), HFI_MSG_ID(HFI_H2F_MSG_GX_BW_PERF_VOTE), HFI_MSG_ID(HFI_H2F_MSG_PREPARE_SLUMBER), @@ -765,23 +766,40 @@ send: NULL, 0); } +static int a6xx_hfi_feature_ctrl_msg(struct a6xx_gmu *gmu, u32 feature, u32 enable, u32 data) +{ + struct a6xx_hfi_msg_feature_ctrl msg = { + .feature = feature, + .enable = enable, + .data = data, + }; + + return a6xx_hfi_send_msg(gmu, HFI_H2F_FEATURE_CTRL, &msg, sizeof(msg), NULL, 0); +} + +#define HFI_FEATURE_IFPC 9 +#define IFPC_LONG_HYST 0x1680 + +static int a6xx_hfi_enable_ifpc(struct a6xx_gmu *gmu) +{ + if (gmu->idle_level != GMU_IDLE_STATE_IFPC) + return 0; + + return a6xx_hfi_feature_ctrl_msg(gmu, HFI_FEATURE_IFPC, 1, IFPC_LONG_HYST); +} + #define HFI_FEATURE_ACD 12 static int a6xx_hfi_enable_acd(struct a6xx_gmu *gmu) { struct a6xx_hfi_acd_table *acd_table = &gmu->acd_table; - struct a6xx_hfi_msg_feature_ctrl msg = { - .feature = HFI_FEATURE_ACD, - .enable = 1, - .data = 0, - }; int ret; if (!acd_table->enable_by_level) return 0; /* Enable ACD feature at GMU */ - ret = a6xx_hfi_send_msg(gmu, HFI_H2F_FEATURE_CTRL, &msg, sizeof(msg), NULL, 0); + ret = a6xx_hfi_feature_ctrl_msg(gmu, HFI_FEATURE_ACD, 1, 0); if (ret) { DRM_DEV_ERROR(gmu->dev, "Unable to enable ACD (%d)\n", ret); return ret; @@ -898,6 +916,10 @@ int a6xx_hfi_start(struct a6xx_gmu *gmu, int boot_state) if (ret) return ret; + ret = a6xx_hfi_enable_ifpc(gmu); + if (ret) + return ret; + ret = a6xx_hfi_send_core_fw_start(gmu); if (ret) return ret; diff --git a/drivers/gpu/drm/msm/adreno/a6xx_preempt.c b/drivers/gpu/drm/msm/adreno/a6xx_preempt.c index 6a12a35dabff..afc5f4aa3b17 100644 --- a/drivers/gpu/drm/msm/adreno/a6xx_preempt.c +++ b/drivers/gpu/drm/msm/adreno/a6xx_preempt.c @@ -41,7 +41,7 @@ static inline void set_preempt_state(struct a6xx_gpu *gpu, } /* Write the most recent wptr for the given ring into the hardware */ -static inline void update_wptr(struct msm_gpu *gpu, struct msm_ringbuffer *ring) +static inline void update_wptr(struct a6xx_gpu *a6xx_gpu, struct msm_ringbuffer *ring) { unsigned long flags; uint32_t wptr; @@ -51,7 +51,7 @@ static inline void update_wptr(struct msm_gpu *gpu, struct msm_ringbuffer *ring) if (ring->restore_wptr) { wptr = get_wptr(ring); - gpu_write(gpu, REG_A6XX_CP_RB_WPTR, wptr); + a6xx_fenced_write(a6xx_gpu, REG_A6XX_CP_RB_WPTR, wptr, BIT(0), false); ring->restore_wptr = false; } @@ -111,9 +111,9 @@ static void preempt_prepare_postamble(struct a6xx_gpu *a6xx_gpu) postamble[count++] = PKT7(CP_WAIT_REG_MEM, 6); postamble[count++] = CP_WAIT_REG_MEM_0_FUNCTION(WRITE_EQ); - postamble[count++] = CP_WAIT_REG_MEM_1_POLL_ADDR_LO( + postamble[count++] = CP_WAIT_REG_MEM_POLL_ADDR_LO( REG_A6XX_RBBM_PERFCTR_SRAM_INIT_STATUS); - postamble[count++] = CP_WAIT_REG_MEM_2_POLL_ADDR_HI(0); + postamble[count++] = CP_WAIT_REG_MEM_POLL_ADDR_HI(0); postamble[count++] = CP_WAIT_REG_MEM_3_REF(0x1); postamble[count++] = CP_WAIT_REG_MEM_4_MASK(0x1); postamble[count++] = CP_WAIT_REG_MEM_5_DELAY_LOOP_CYCLES(0); @@ -136,6 +136,21 @@ static void preempt_disable_postamble(struct a6xx_gpu *a6xx_gpu) a6xx_gpu->postamble_enabled = false; } +/* + * Set preemption keepalive vote. Please note that this vote is different from the one used in + * a6xx_irq() + */ +static void a6xx_preempt_keepalive_vote(struct msm_gpu *gpu, bool on) +{ + struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu); + struct a6xx_gpu *a6xx_gpu = to_a6xx_gpu(adreno_gpu); + + if (adreno_has_gmu_wrapper(adreno_gpu)) + return; + + gmu_write(&a6xx_gpu->gmu, REG_A6XX_GMU_PWR_COL_PREEMPT_KEEPALIVE, on); +} + void a6xx_preempt_irq(struct msm_gpu *gpu) { uint32_t status; @@ -172,10 +187,12 @@ void a6xx_preempt_irq(struct msm_gpu *gpu) set_preempt_state(a6xx_gpu, PREEMPT_FINISH); - update_wptr(gpu, a6xx_gpu->cur_ring); + update_wptr(a6xx_gpu, a6xx_gpu->cur_ring); set_preempt_state(a6xx_gpu, PREEMPT_NONE); + a6xx_preempt_keepalive_vote(gpu, false); + trace_msm_gpu_preemption_irq(a6xx_gpu->cur_ring->id); /* @@ -268,7 +285,7 @@ void a6xx_preempt_trigger(struct msm_gpu *gpu) */ if (!ring || (a6xx_gpu->cur_ring == ring)) { set_preempt_state(a6xx_gpu, PREEMPT_FINISH); - update_wptr(gpu, a6xx_gpu->cur_ring); + update_wptr(a6xx_gpu, a6xx_gpu->cur_ring); set_preempt_state(a6xx_gpu, PREEMPT_NONE); spin_unlock_irqrestore(&a6xx_gpu->eval_lock, flags); return; @@ -302,13 +319,16 @@ void a6xx_preempt_trigger(struct msm_gpu *gpu) spin_unlock_irqrestore(&ring->preempt_lock, flags); - gpu_write64(gpu, - REG_A6XX_CP_CONTEXT_SWITCH_SMMU_INFO, - a6xx_gpu->preempt_smmu_iova[ring->id]); + /* Set the keepalive bit to keep the GPU ON until preemption is complete */ + a6xx_preempt_keepalive_vote(gpu, true); + + a6xx_fenced_write(a6xx_gpu, + REG_A6XX_CP_CONTEXT_SWITCH_SMMU_INFO, a6xx_gpu->preempt_smmu_iova[ring->id], + BIT(1), true); - gpu_write64(gpu, + a6xx_fenced_write(a6xx_gpu, REG_A6XX_CP_CONTEXT_SWITCH_PRIV_NON_SECURE_RESTORE_ADDR, - a6xx_gpu->preempt_iova[ring->id]); + a6xx_gpu->preempt_iova[ring->id], BIT(1), true); a6xx_gpu->next_ring = ring; @@ -328,7 +348,7 @@ void a6xx_preempt_trigger(struct msm_gpu *gpu) set_preempt_state(a6xx_gpu, PREEMPT_TRIGGERED); /* Trigger the preemption */ - gpu_write(gpu, REG_A6XX_CP_CONTEXT_SWITCH_CNTL, cntl); + a6xx_fenced_write(a6xx_gpu, REG_A6XX_CP_CONTEXT_SWITCH_CNTL, cntl, BIT(1), false); } static int preempt_init_ring(struct a6xx_gpu *a6xx_gpu, diff --git a/drivers/gpu/drm/msm/adreno/adreno_device.c b/drivers/gpu/drm/msm/adreno/adreno_device.c index 50945bfe9b49..28f744f3caf7 100644 --- a/drivers/gpu/drm/msm/adreno/adreno_device.c +++ b/drivers/gpu/drm/msm/adreno/adreno_device.c @@ -24,6 +24,10 @@ bool disable_acd; MODULE_PARM_DESC(disable_acd, "Forcefully disable GPU ACD"); module_param_unsafe(disable_acd, bool, 0400); +static bool skip_gpu; +MODULE_PARM_DESC(no_gpu, "Disable GPU driver register (0=enable GPU driver register (default), 1=skip GPU driver register"); +module_param(skip_gpu, bool, 0400); + extern const struct adreno_gpulist a2xx_gpulist; extern const struct adreno_gpulist a3xx_gpulist; extern const struct adreno_gpulist a4xx_gpulist; @@ -184,6 +188,9 @@ bool adreno_has_gpu(struct device_node *node) uint32_t chip_id; int ret; + if (skip_gpu) + return false; + ret = find_chipid(node, &chip_id); if (ret) return false; @@ -404,10 +411,16 @@ static struct platform_driver adreno_driver = { void __init adreno_register(void) { + if (skip_gpu) + return; + platform_driver_register(&adreno_driver); } void __exit adreno_unregister(void) { + if (skip_gpu) + return; + platform_driver_unregister(&adreno_driver); } diff --git a/drivers/gpu/drm/msm/adreno/adreno_gpu.c b/drivers/gpu/drm/msm/adreno/adreno_gpu.c index f1230465bf0d..afaa3cfefd35 100644 --- a/drivers/gpu/drm/msm/adreno/adreno_gpu.c +++ b/drivers/gpu/drm/msm/adreno/adreno_gpu.c @@ -10,7 +10,7 @@ #include <linux/interconnect.h> #include <linux/firmware/qcom/qcom_scm.h> #include <linux/kernel.h> -#include <linux/of_address.h> +#include <linux/of_reserved_mem.h> #include <linux/pm_opp.h> #include <linux/slab.h> #include <linux/soc/qcom/mdt_loader.h> @@ -33,7 +33,7 @@ static int zap_shader_load_mdt(struct msm_gpu *gpu, const char *fwname, struct device *dev = &gpu->pdev->dev; const struct firmware *fw; const char *signed_fwname = NULL; - struct device_node *np, *mem_np; + struct device_node *np; struct resource r; phys_addr_t mem_phys; ssize_t mem_size; @@ -51,18 +51,11 @@ static int zap_shader_load_mdt(struct msm_gpu *gpu, const char *fwname, return -ENODEV; } - mem_np = of_parse_phandle(np, "memory-region", 0); - of_node_put(np); - if (!mem_np) { + ret = of_reserved_mem_region_to_resource(np, 0, &r); + if (ret) { zap_available = false; - return -EINVAL; - } - - ret = of_address_to_resource(mem_np, 0, &r); - of_node_put(mem_np); - if (ret) return ret; - + } mem_phys = r.start; /* @@ -209,9 +202,7 @@ adreno_iommu_create_vm(struct msm_gpu *gpu, u64 start, size; mmu = msm_iommu_gpu_new(&pdev->dev, gpu, quirks); - if (!mmu) - return ERR_PTR(-ENODEV); - else if (IS_ERR_OR_NULL(mmu)) + if (IS_ERR(mmu)) return ERR_CAST(mmu); geometry = msm_iommu_get_geometry(mmu); diff --git a/drivers/gpu/drm/msm/adreno/adreno_gpu.h b/drivers/gpu/drm/msm/adreno/adreno_gpu.h index 9dc93c247196..390fa6720d9b 100644 --- a/drivers/gpu/drm/msm/adreno/adreno_gpu.h +++ b/drivers/gpu/drm/msm/adreno/adreno_gpu.h @@ -59,6 +59,7 @@ enum adreno_family { #define ADRENO_QUIRK_HAS_CACHED_COHERENT BIT(4) #define ADRENO_QUIRK_PREEMPTION BIT(5) #define ADRENO_QUIRK_4GB_VA BIT(6) +#define ADRENO_QUIRK_IFPC BIT(7) /* Helper for formating the chip_id in the way that userspace tools like * crashdec expect. diff --git a/drivers/gpu/drm/msm/disp/dpu1/dpu_core_perf.c b/drivers/gpu/drm/msm/disp/dpu1/dpu_core_perf.c index 0fb5789c60d0..13cc658065c5 100644 --- a/drivers/gpu/drm/msm/disp/dpu1/dpu_core_perf.c +++ b/drivers/gpu/drm/msm/disp/dpu1/dpu_core_perf.c @@ -32,6 +32,26 @@ enum dpu_perf_mode { }; /** + * dpu_core_perf_adjusted_mode_clk - Adjust given mode clock rate according to + * the perf clock factor. + * @crtc_clk_rate - Unadjusted mode clock rate + * @perf_cfg: performance configuration + */ +u64 dpu_core_perf_adjusted_mode_clk(u64 mode_clk_rate, + const struct dpu_perf_cfg *perf_cfg) +{ + u32 clk_factor; + + clk_factor = perf_cfg->clk_inefficiency_factor; + if (clk_factor) { + mode_clk_rate *= clk_factor; + do_div(mode_clk_rate, 100); + } + + return mode_clk_rate; +} + +/** * _dpu_core_perf_calc_bw() - to calculate BW per crtc * @perf_cfg: performance configuration * @crtc: pointer to a crtc @@ -75,28 +95,21 @@ static u64 _dpu_core_perf_calc_clk(const struct dpu_perf_cfg *perf_cfg, struct drm_plane *plane; struct dpu_plane_state *pstate; struct drm_display_mode *mode; - u64 crtc_clk; - u32 clk_factor; + u64 mode_clk; mode = &state->adjusted_mode; - crtc_clk = (u64)mode->vtotal * mode->hdisplay * drm_mode_vrefresh(mode); + mode_clk = (u64)mode->vtotal * mode->hdisplay * drm_mode_vrefresh(mode); drm_atomic_crtc_for_each_plane(plane, crtc) { pstate = to_dpu_plane_state(plane->state); if (!pstate) continue; - crtc_clk = max(pstate->plane_clk, crtc_clk); - } - - clk_factor = perf_cfg->clk_inefficiency_factor; - if (clk_factor) { - crtc_clk *= clk_factor; - do_div(crtc_clk, 100); + mode_clk = max(pstate->plane_clk, mode_clk); } - return crtc_clk; + return dpu_core_perf_adjusted_mode_clk(mode_clk, perf_cfg); } static struct dpu_kms *_dpu_crtc_get_kms(struct drm_crtc *crtc) diff --git a/drivers/gpu/drm/msm/disp/dpu1/dpu_core_perf.h b/drivers/gpu/drm/msm/disp/dpu1/dpu_core_perf.h index d2f21d34e501..3740bc97422c 100644 --- a/drivers/gpu/drm/msm/disp/dpu1/dpu_core_perf.h +++ b/drivers/gpu/drm/msm/disp/dpu1/dpu_core_perf.h @@ -54,6 +54,9 @@ struct dpu_core_perf { u32 fix_core_ab_vote; }; +u64 dpu_core_perf_adjusted_mode_clk(u64 clk_rate, + const struct dpu_perf_cfg *perf_cfg); + int dpu_core_perf_crtc_check(struct drm_crtc *crtc, struct drm_crtc_state *state); diff --git a/drivers/gpu/drm/msm/disp/dpu1/dpu_crtc.c b/drivers/gpu/drm/msm/disp/dpu1/dpu_crtc.c index 94912b4708fb..4b970a59deaf 100644 --- a/drivers/gpu/drm/msm/disp/dpu1/dpu_crtc.c +++ b/drivers/gpu/drm/msm/disp/dpu1/dpu_crtc.c @@ -377,11 +377,10 @@ static void _dpu_crtc_setup_blend_cfg(struct dpu_crtc_mixer *mixer, static void _dpu_crtc_program_lm_output_roi(struct drm_crtc *crtc) { struct dpu_crtc_state *crtc_state; - int lm_idx, lm_horiz_position; + int lm_idx; crtc_state = to_dpu_crtc_state(crtc->state); - lm_horiz_position = 0; for (lm_idx = 0; lm_idx < crtc_state->num_mixers; lm_idx++) { const struct drm_rect *lm_roi = &crtc_state->lm_bounds[lm_idx]; struct dpu_hw_mixer *hw_lm = crtc_state->mixers[lm_idx].hw_lm; @@ -392,7 +391,7 @@ static void _dpu_crtc_program_lm_output_roi(struct drm_crtc *crtc) cfg.out_width = drm_rect_width(lm_roi); cfg.out_height = drm_rect_height(lm_roi); - cfg.right_mixer = lm_horiz_position++; + cfg.right_mixer = lm_idx & 0x1; cfg.flags = 0; hw_lm->ops.setup_mixer_out(hw_lm, &cfg); } @@ -1534,6 +1533,7 @@ static enum drm_mode_status dpu_crtc_mode_valid(struct drm_crtc *crtc, const struct drm_display_mode *mode) { struct dpu_kms *dpu_kms = _dpu_crtc_get_kms(crtc); + u64 adjusted_mode_clk; /* if there is no 3d_mux block we cannot merge LMs so we cannot * split the large layer into 2 LMs, filter out such modes @@ -1541,6 +1541,17 @@ static enum drm_mode_status dpu_crtc_mode_valid(struct drm_crtc *crtc, if (!dpu_kms->catalog->caps->has_3d_merge && mode->hdisplay > dpu_kms->catalog->caps->max_mixer_width) return MODE_BAD_HVALUE; + + adjusted_mode_clk = dpu_core_perf_adjusted_mode_clk(mode->clock, + dpu_kms->perf.perf_cfg); + + /* + * The given mode, adjusted for the perf clock factor, should not exceed + * the max core clock rate + */ + if (dpu_kms->perf.max_core_clk_rate < adjusted_mode_clk * 1000) + return MODE_CLOCK_HIGH; + /* * max crtc width is equal to the max mixer width * 2 and max height is 4K */ diff --git a/drivers/gpu/drm/msm/disp/dpu1/dpu_encoder_phys_wb.c b/drivers/gpu/drm/msm/disp/dpu1/dpu_encoder_phys_wb.c index 56a5b596554d..46f348972a97 100644 --- a/drivers/gpu/drm/msm/disp/dpu1/dpu_encoder_phys_wb.c +++ b/drivers/gpu/drm/msm/disp/dpu1/dpu_encoder_phys_wb.c @@ -446,7 +446,7 @@ static void _dpu_encoder_phys_wb_handle_wbdone_timeout( static int dpu_encoder_phys_wb_wait_for_commit_done( struct dpu_encoder_phys *phys_enc) { - unsigned long ret; + int ret; struct dpu_encoder_wait_info wait_info; struct dpu_encoder_phys_wb *wb_enc = to_dpu_encoder_phys_wb(phys_enc); diff --git a/drivers/gpu/drm/msm/disp/dpu1/dpu_hw_catalog.c b/drivers/gpu/drm/msm/disp/dpu1/dpu_hw_catalog.c index e824cd64fd3f..6641455c4ec6 100644 --- a/drivers/gpu/drm/msm/disp/dpu1/dpu_hw_catalog.c +++ b/drivers/gpu/drm/msm/disp/dpu1/dpu_hw_catalog.c @@ -338,7 +338,6 @@ static const struct dpu_sspp_sub_blks dpu_dma_sblk = _DMA_SBLK(); *************************************************************/ static const struct dpu_lm_sub_blks msm8998_lm_sblk = { - .maxwidth = DEFAULT_DPU_OUTPUT_LINE_WIDTH, .maxblendstages = 7, /* excluding base layer */ .blendstage_base = { /* offsets relative to mixer base */ 0x20, 0x50, 0x80, 0xb0, 0x230, @@ -347,7 +346,6 @@ static const struct dpu_lm_sub_blks msm8998_lm_sblk = { }; static const struct dpu_lm_sub_blks sdm845_lm_sblk = { - .maxwidth = DEFAULT_DPU_OUTPUT_LINE_WIDTH, .maxblendstages = 11, /* excluding base layer */ .blendstage_base = { /* offsets relative to mixer base */ 0x20, 0x38, 0x50, 0x68, 0x80, 0x98, @@ -356,7 +354,6 @@ static const struct dpu_lm_sub_blks sdm845_lm_sblk = { }; static const struct dpu_lm_sub_blks sc7180_lm_sblk = { - .maxwidth = DEFAULT_DPU_OUTPUT_LINE_WIDTH, .maxblendstages = 7, /* excluding base layer */ .blendstage_base = { /* offsets relative to mixer base */ 0x20, 0x38, 0x50, 0x68, 0x80, 0x98, 0xb0 @@ -364,7 +361,6 @@ static const struct dpu_lm_sub_blks sc7180_lm_sblk = { }; static const struct dpu_lm_sub_blks sm8750_lm_sblk = { - .maxwidth = DEFAULT_DPU_OUTPUT_LINE_WIDTH, .maxblendstages = 11, /* excluding base layer */ .blendstage_base = { /* offsets relative to mixer base */ /* 0x40 + n*0x30 */ @@ -374,7 +370,6 @@ static const struct dpu_lm_sub_blks sm8750_lm_sblk = { }; static const struct dpu_lm_sub_blks qcm2290_lm_sblk = { - .maxwidth = DEFAULT_DPU_LINE_WIDTH, .maxblendstages = 4, /* excluding base layer */ .blendstage_base = { /* offsets relative to mixer base */ 0x20, 0x38, 0x50, 0x68 diff --git a/drivers/gpu/drm/msm/disp/dpu1/dpu_hw_catalog.h b/drivers/gpu/drm/msm/disp/dpu1/dpu_hw_catalog.h index a78bb2c334e3..f0768f54e9b3 100644 --- a/drivers/gpu/drm/msm/disp/dpu1/dpu_hw_catalog.h +++ b/drivers/gpu/drm/msm/disp/dpu1/dpu_hw_catalog.h @@ -307,7 +307,6 @@ struct dpu_sspp_sub_blks { * @blendstage_base: Blend-stage register base offset */ struct dpu_lm_sub_blks { - u32 maxwidth; u32 maxblendstages; u32 blendstage_base[MAX_BLOCKS]; }; diff --git a/drivers/gpu/drm/msm/disp/dpu1/dpu_kms.c b/drivers/gpu/drm/msm/disp/dpu1/dpu_kms.c index a306077647c3..4e5a8ecd31f7 100644 --- a/drivers/gpu/drm/msm/disp/dpu1/dpu_kms.c +++ b/drivers/gpu/drm/msm/disp/dpu1/dpu_kms.c @@ -1110,7 +1110,7 @@ static int _dpu_kms_mmu_init(struct dpu_kms *dpu_kms) { struct drm_gpuvm *vm; - vm = msm_kms_init_vm(dpu_kms->dev); + vm = msm_kms_init_vm(dpu_kms->dev, dpu_kms->dev->dev->parent); if (IS_ERR(vm)) return PTR_ERR(vm); diff --git a/drivers/gpu/drm/msm/disp/dpu1/dpu_plane.c b/drivers/gpu/drm/msm/disp/dpu1/dpu_plane.c index 6859e8ef6b05..f54cf0faa1c7 100644 --- a/drivers/gpu/drm/msm/disp/dpu1/dpu_plane.c +++ b/drivers/gpu/drm/msm/disp/dpu1/dpu_plane.c @@ -922,6 +922,9 @@ static int dpu_plane_is_multirect_capable(struct dpu_hw_sspp *sspp, if (MSM_FORMAT_IS_YUV(fmt)) return false; + if (!sspp) + return true; + if (!test_bit(DPU_SSPP_SMART_DMA_V1, &sspp->cap->features) && !test_bit(DPU_SSPP_SMART_DMA_V2, &sspp->cap->features)) return false; @@ -1028,6 +1031,7 @@ static int dpu_plane_try_multirect_shared(struct dpu_plane_state *pstate, prev_pipe->multirect_mode != DPU_SSPP_MULTIRECT_NONE) return false; + /* Do not validate SSPP of current plane when it is not ready */ if (!dpu_plane_is_multirect_capable(pipe->sspp, pipe_cfg, fmt) || !dpu_plane_is_multirect_capable(prev_pipe->sspp, prev_pipe_cfg, prev_fmt)) return false; diff --git a/drivers/gpu/drm/msm/disp/dpu1/dpu_rm.c b/drivers/gpu/drm/msm/disp/dpu1/dpu_rm.c index 25382120cb1a..2c77c74fac0f 100644 --- a/drivers/gpu/drm/msm/disp/dpu1/dpu_rm.c +++ b/drivers/gpu/drm/msm/disp/dpu1/dpu_rm.c @@ -865,6 +865,21 @@ void dpu_rm_release_all_sspp(struct dpu_global_state *global_state, ARRAY_SIZE(global_state->sspp_to_crtc_id), crtc_id); } +static char *dpu_hw_blk_type_name[] = { + [DPU_HW_BLK_TOP] = "TOP", + [DPU_HW_BLK_SSPP] = "SSPP", + [DPU_HW_BLK_LM] = "LM", + [DPU_HW_BLK_CTL] = "CTL", + [DPU_HW_BLK_PINGPONG] = "pingpong", + [DPU_HW_BLK_INTF] = "INTF", + [DPU_HW_BLK_WB] = "WB", + [DPU_HW_BLK_DSPP] = "DSPP", + [DPU_HW_BLK_MERGE_3D] = "merge_3d", + [DPU_HW_BLK_DSC] = "DSC", + [DPU_HW_BLK_CDM] = "CDM", + [DPU_HW_BLK_MAX] = "unknown", +}; + /** * dpu_rm_get_assigned_resources - Get hw resources of the given type that are * assigned to this encoder @@ -946,13 +961,13 @@ int dpu_rm_get_assigned_resources(struct dpu_rm *rm, } if (num_blks == blks_size) { - DPU_ERROR("More than %d resources assigned to crtc %d\n", - blks_size, crtc_id); + DPU_ERROR("More than %d %s assigned to crtc %d\n", + blks_size, dpu_hw_blk_type_name[type], crtc_id); break; } if (!hw_blks[i]) { - DPU_ERROR("Allocated resource %d unavailable to assign to crtc %d\n", - type, crtc_id); + DPU_ERROR("%s unavailable to assign to crtc %d\n", + dpu_hw_blk_type_name[type], crtc_id); break; } blks[num_blks++] = hw_blks[i]; diff --git a/drivers/gpu/drm/msm/disp/dpu1/dpu_writeback.c b/drivers/gpu/drm/msm/disp/dpu1/dpu_writeback.c index 8ff496082902..cd73468e369a 100644 --- a/drivers/gpu/drm/msm/disp/dpu1/dpu_writeback.c +++ b/drivers/gpu/drm/msm/disp/dpu1/dpu_writeback.c @@ -80,7 +80,6 @@ static int dpu_wb_conn_atomic_check(struct drm_connector *connector, static const struct drm_connector_funcs dpu_wb_conn_funcs = { .reset = drm_atomic_helper_connector_reset, .fill_modes = drm_helper_probe_single_connector_modes, - .destroy = drm_connector_cleanup, .atomic_duplicate_state = drm_atomic_helper_connector_duplicate_state, .atomic_destroy_state = drm_atomic_helper_connector_destroy_state, }; @@ -131,12 +130,9 @@ int dpu_writeback_init(struct drm_device *dev, struct drm_encoder *enc, drm_connector_helper_add(&dpu_wb_conn->base.base, &dpu_wb_conn_helper_funcs); - /* DPU initializes the encoder and sets it up completely for writeback - * cases and hence should use the new API drm_writeback_connector_init_with_encoder - * to initialize the writeback connector - */ - rc = drm_writeback_connector_init_with_encoder(dev, &dpu_wb_conn->base, enc, - &dpu_wb_conn_funcs, format_list, num_formats); + rc = drmm_writeback_connector_init(dev, &dpu_wb_conn->base, + &dpu_wb_conn_funcs, enc, + format_list, num_formats); if (!rc) dpu_wb_conn->wb_enc = enc; diff --git a/drivers/gpu/drm/msm/disp/mdp4/mdp4_kms.c b/drivers/gpu/drm/msm/disp/mdp4/mdp4_kms.c index 0952c7f18abd..809ca191e9de 100644 --- a/drivers/gpu/drm/msm/disp/mdp4/mdp4_kms.c +++ b/drivers/gpu/drm/msm/disp/mdp4/mdp4_kms.c @@ -391,11 +391,9 @@ static void read_mdp_hw_revision(struct mdp4_kms *mdp4_kms, static int mdp4_kms_init(struct drm_device *dev) { - struct platform_device *pdev = to_platform_device(dev->dev); struct msm_drm_private *priv = dev->dev_private; struct mdp4_kms *mdp4_kms = to_mdp4_kms(to_mdp_kms(priv->kms)); struct msm_kms *kms = NULL; - struct msm_mmu *mmu; struct drm_gpuvm *vm; int ret; u32 major, minor; @@ -458,29 +456,14 @@ static int mdp4_kms_init(struct drm_device *dev) mdp4_disable(mdp4_kms); mdelay(16); - mmu = msm_iommu_new(&pdev->dev, 0); - if (IS_ERR(mmu)) { - ret = PTR_ERR(mmu); + vm = msm_kms_init_vm(mdp4_kms->dev, NULL); + if (IS_ERR(vm)) { + ret = PTR_ERR(vm); goto fail; - } else if (!mmu) { - DRM_DEV_INFO(dev->dev, "no iommu, fallback to phys " - "contig buffers for scanout\n"); - vm = NULL; - } else { - vm = msm_gem_vm_create(dev, mmu, "mdp4", - 0x1000, 0x100000000 - 0x1000, - true); - - if (IS_ERR(vm)) { - if (!IS_ERR(mmu)) - mmu->funcs->destroy(mmu); - ret = PTR_ERR(vm); - goto fail; - } - - kms->vm = vm; } + kms->vm = vm; + ret = modeset_init(mdp4_kms); if (ret) { DRM_DEV_ERROR(dev->dev, "modeset_init failed: %d\n", ret); @@ -529,7 +512,7 @@ static int mdp4_probe(struct platform_device *pdev) mdp4_kms = devm_kzalloc(dev, sizeof(*mdp4_kms), GFP_KERNEL); if (!mdp4_kms) - return dev_err_probe(dev, -ENOMEM, "failed to allocate kms\n"); + return -ENOMEM; mdp4_kms->mmio = msm_ioremap(pdev, NULL); if (IS_ERR(mdp4_kms->mmio)) diff --git a/drivers/gpu/drm/msm/disp/mdp4/mdp4_kms.h b/drivers/gpu/drm/msm/disp/mdp4/mdp4_kms.h index fb348583dc84..06458d4ee48c 100644 --- a/drivers/gpu/drm/msm/disp/mdp4/mdp4_kms.h +++ b/drivers/gpu/drm/msm/disp/mdp4/mdp4_kms.h @@ -202,6 +202,6 @@ static inline struct drm_encoder *mdp4_dsi_encoder_init(struct drm_device *dev) } #endif -struct clk *mpd4_get_lcdc_clock(struct drm_device *dev); +struct clk *mdp4_get_lcdc_clock(struct drm_device *dev); #endif /* __MDP4_KMS_H__ */ diff --git a/drivers/gpu/drm/msm/disp/mdp4/mdp4_lcdc_encoder.c b/drivers/gpu/drm/msm/disp/mdp4/mdp4_lcdc_encoder.c index 06a307c1272d..1051873057f6 100644 --- a/drivers/gpu/drm/msm/disp/mdp4/mdp4_lcdc_encoder.c +++ b/drivers/gpu/drm/msm/disp/mdp4/mdp4_lcdc_encoder.c @@ -375,7 +375,7 @@ struct drm_encoder *mdp4_lcdc_encoder_init(struct drm_device *dev) drm_encoder_helper_add(encoder, &mdp4_lcdc_encoder_helper_funcs); - mdp4_lcdc_encoder->lcdc_clk = mpd4_get_lcdc_clock(dev); + mdp4_lcdc_encoder->lcdc_clk = mdp4_get_lcdc_clock(dev); if (IS_ERR(mdp4_lcdc_encoder->lcdc_clk)) { DRM_DEV_ERROR(dev->dev, "failed to get lvds_clk\n"); return ERR_CAST(mdp4_lcdc_encoder->lcdc_clk); diff --git a/drivers/gpu/drm/msm/disp/mdp4/mdp4_lvds_pll.c b/drivers/gpu/drm/msm/disp/mdp4/mdp4_lvds_pll.c index fa2c29470510..04c49bf3d854 100644 --- a/drivers/gpu/drm/msm/disp/mdp4/mdp4_lvds_pll.c +++ b/drivers/gpu/drm/msm/disp/mdp4/mdp4_lvds_pll.c @@ -54,7 +54,7 @@ static const struct pll_rate *find_rate(unsigned long rate) return &freqtbl[i-1]; } -static int mpd4_lvds_pll_enable(struct clk_hw *hw) +static int mdp4_lvds_pll_enable(struct clk_hw *hw) { struct mdp4_lvds_pll *lvds_pll = to_mdp4_lvds_pll(hw); struct mdp4_kms *mdp4_kms = get_kms(lvds_pll); @@ -80,7 +80,7 @@ static int mpd4_lvds_pll_enable(struct clk_hw *hw) return 0; } -static void mpd4_lvds_pll_disable(struct clk_hw *hw) +static void mdp4_lvds_pll_disable(struct clk_hw *hw) { struct mdp4_lvds_pll *lvds_pll = to_mdp4_lvds_pll(hw); struct mdp4_kms *mdp4_kms = get_kms(lvds_pll); @@ -91,21 +91,24 @@ static void mpd4_lvds_pll_disable(struct clk_hw *hw) mdp4_write(mdp4_kms, REG_MDP4_LVDS_PHY_PLL_CTRL_0, 0x0); } -static unsigned long mpd4_lvds_pll_recalc_rate(struct clk_hw *hw, +static unsigned long mdp4_lvds_pll_recalc_rate(struct clk_hw *hw, unsigned long parent_rate) { struct mdp4_lvds_pll *lvds_pll = to_mdp4_lvds_pll(hw); return lvds_pll->pixclk; } -static long mpd4_lvds_pll_round_rate(struct clk_hw *hw, unsigned long rate, - unsigned long *parent_rate) +static int mdp4_lvds_pll_determine_rate(struct clk_hw *hw, + struct clk_rate_request *req) { - const struct pll_rate *pll_rate = find_rate(rate); - return pll_rate->rate; + const struct pll_rate *pll_rate = find_rate(req->rate); + + req->rate = pll_rate->rate; + + return 0; } -static int mpd4_lvds_pll_set_rate(struct clk_hw *hw, unsigned long rate, +static int mdp4_lvds_pll_set_rate(struct clk_hw *hw, unsigned long rate, unsigned long parent_rate) { struct mdp4_lvds_pll *lvds_pll = to_mdp4_lvds_pll(hw); @@ -114,26 +117,26 @@ static int mpd4_lvds_pll_set_rate(struct clk_hw *hw, unsigned long rate, } -static const struct clk_ops mpd4_lvds_pll_ops = { - .enable = mpd4_lvds_pll_enable, - .disable = mpd4_lvds_pll_disable, - .recalc_rate = mpd4_lvds_pll_recalc_rate, - .round_rate = mpd4_lvds_pll_round_rate, - .set_rate = mpd4_lvds_pll_set_rate, +static const struct clk_ops mdp4_lvds_pll_ops = { + .enable = mdp4_lvds_pll_enable, + .disable = mdp4_lvds_pll_disable, + .recalc_rate = mdp4_lvds_pll_recalc_rate, + .determine_rate = mdp4_lvds_pll_determine_rate, + .set_rate = mdp4_lvds_pll_set_rate, }; -static const struct clk_parent_data mpd4_lvds_pll_parents[] = { +static const struct clk_parent_data mdp4_lvds_pll_parents[] = { { .fw_name = "pxo", .name = "pxo", }, }; static struct clk_init_data pll_init = { - .name = "mpd4_lvds_pll", - .ops = &mpd4_lvds_pll_ops, - .parent_data = mpd4_lvds_pll_parents, - .num_parents = ARRAY_SIZE(mpd4_lvds_pll_parents), + .name = "mdp4_lvds_pll", + .ops = &mdp4_lvds_pll_ops, + .parent_data = mdp4_lvds_pll_parents, + .num_parents = ARRAY_SIZE(mdp4_lvds_pll_parents), }; -static struct clk_hw *mpd4_lvds_pll_init(struct drm_device *dev) +static struct clk_hw *mdp4_lvds_pll_init(struct drm_device *dev) { struct mdp4_lvds_pll *lvds_pll; int ret; @@ -156,14 +159,14 @@ static struct clk_hw *mpd4_lvds_pll_init(struct drm_device *dev) return &lvds_pll->pll_hw; } -struct clk *mpd4_get_lcdc_clock(struct drm_device *dev) +struct clk *mdp4_get_lcdc_clock(struct drm_device *dev) { struct clk_hw *hw; struct clk *clk; /* TODO: do we need different pll in other cases? */ - hw = mpd4_lvds_pll_init(dev); + hw = mdp4_lvds_pll_init(dev); if (IS_ERR(hw)) { DRM_DEV_ERROR(dev->dev, "failed to register LVDS PLL\n"); return ERR_CAST(hw); diff --git a/drivers/gpu/drm/msm/disp/mdp5/mdp5_kms.c b/drivers/gpu/drm/msm/disp/mdp5/mdp5_kms.c index 5b6ca8dd929e..61edf6864092 100644 --- a/drivers/gpu/drm/msm/disp/mdp5/mdp5_kms.c +++ b/drivers/gpu/drm/msm/disp/mdp5/mdp5_kms.c @@ -534,7 +534,7 @@ static int mdp5_kms_init(struct drm_device *dev) } mdelay(16); - vm = msm_kms_init_vm(mdp5_kms->dev); + vm = msm_kms_init_vm(mdp5_kms->dev, pdev->dev.parent); if (IS_ERR(vm)) { ret = PTR_ERR(vm); goto fail; diff --git a/drivers/gpu/drm/msm/dsi/phy/dsi_phy.h b/drivers/gpu/drm/msm/dsi/phy/dsi_phy.h index 3cbf08231492..e391505fdaf0 100644 --- a/drivers/gpu/drm/msm/dsi/phy/dsi_phy.h +++ b/drivers/gpu/drm/msm/dsi/phy/dsi_phy.h @@ -109,6 +109,7 @@ struct msm_dsi_phy { struct msm_dsi_dphy_timing timing; const struct msm_dsi_phy_cfg *cfg; void *tuning_cfg; + void *pll_data; enum msm_dsi_phy_usecase usecase; bool regulator_ldo_mode; diff --git a/drivers/gpu/drm/msm/dsi/phy/dsi_phy_10nm.c b/drivers/gpu/drm/msm/dsi/phy/dsi_phy_10nm.c index af2e30f3f842..ec486ff02c9b 100644 --- a/drivers/gpu/drm/msm/dsi/phy/dsi_phy_10nm.c +++ b/drivers/gpu/drm/msm/dsi/phy/dsi_phy_10nm.c @@ -444,21 +444,19 @@ static unsigned long dsi_pll_10nm_vco_recalc_rate(struct clk_hw *hw, return (unsigned long)vco_rate; } -static long dsi_pll_10nm_clk_round_rate(struct clk_hw *hw, - unsigned long rate, unsigned long *parent_rate) +static int dsi_pll_10nm_clk_determine_rate(struct clk_hw *hw, + struct clk_rate_request *req) { struct dsi_pll_10nm *pll_10nm = to_pll_10nm(hw); - if (rate < pll_10nm->phy->cfg->min_pll_rate) - return pll_10nm->phy->cfg->min_pll_rate; - else if (rate > pll_10nm->phy->cfg->max_pll_rate) - return pll_10nm->phy->cfg->max_pll_rate; - else - return rate; + req->rate = clamp_t(unsigned long, req->rate, + pll_10nm->phy->cfg->min_pll_rate, pll_10nm->phy->cfg->max_pll_rate); + + return 0; } static const struct clk_ops clk_ops_dsi_pll_10nm_vco = { - .round_rate = dsi_pll_10nm_clk_round_rate, + .determine_rate = dsi_pll_10nm_clk_determine_rate, .set_rate = dsi_pll_10nm_vco_set_rate, .recalc_rate = dsi_pll_10nm_vco_recalc_rate, .prepare = dsi_pll_10nm_vco_prepare, diff --git a/drivers/gpu/drm/msm/dsi/phy/dsi_phy_14nm.c b/drivers/gpu/drm/msm/dsi/phy/dsi_phy_14nm.c index 3a1c8ece6657..fdefcbd9c284 100644 --- a/drivers/gpu/drm/msm/dsi/phy/dsi_phy_14nm.c +++ b/drivers/gpu/drm/msm/dsi/phy/dsi_phy_14nm.c @@ -578,21 +578,19 @@ static void dsi_pll_14nm_vco_unprepare(struct clk_hw *hw) pll_14nm->phy->pll_on = false; } -static long dsi_pll_14nm_clk_round_rate(struct clk_hw *hw, - unsigned long rate, unsigned long *parent_rate) +static int dsi_pll_14nm_clk_determine_rate(struct clk_hw *hw, + struct clk_rate_request *req) { struct dsi_pll_14nm *pll_14nm = to_pll_14nm(hw); - if (rate < pll_14nm->phy->cfg->min_pll_rate) - return pll_14nm->phy->cfg->min_pll_rate; - else if (rate > pll_14nm->phy->cfg->max_pll_rate) - return pll_14nm->phy->cfg->max_pll_rate; - else - return rate; + req->rate = clamp_t(unsigned long, req->rate, + pll_14nm->phy->cfg->min_pll_rate, pll_14nm->phy->cfg->max_pll_rate); + + return 0; } static const struct clk_ops clk_ops_dsi_pll_14nm_vco = { - .round_rate = dsi_pll_14nm_clk_round_rate, + .determine_rate = dsi_pll_14nm_clk_determine_rate, .set_rate = dsi_pll_14nm_vco_set_rate, .recalc_rate = dsi_pll_14nm_vco_recalc_rate, .prepare = dsi_pll_14nm_vco_prepare, @@ -622,18 +620,20 @@ static unsigned long dsi_pll_14nm_postdiv_recalc_rate(struct clk_hw *hw, postdiv->flags, width); } -static long dsi_pll_14nm_postdiv_round_rate(struct clk_hw *hw, - unsigned long rate, - unsigned long *prate) +static int dsi_pll_14nm_postdiv_determine_rate(struct clk_hw *hw, + struct clk_rate_request *req) { struct dsi_pll_14nm_postdiv *postdiv = to_pll_14nm_postdiv(hw); struct dsi_pll_14nm *pll_14nm = postdiv->pll; - DBG("DSI%d PLL parent rate=%lu", pll_14nm->phy->id, rate); + DBG("DSI%d PLL parent rate=%lu", pll_14nm->phy->id, req->rate); - return divider_round_rate(hw, rate, prate, NULL, - postdiv->width, - postdiv->flags); + req->rate = divider_round_rate(hw, req->rate, &req->best_parent_rate, + NULL, + postdiv->width, + postdiv->flags); + + return 0; } static int dsi_pll_14nm_postdiv_set_rate(struct clk_hw *hw, unsigned long rate, @@ -680,7 +680,7 @@ static int dsi_pll_14nm_postdiv_set_rate(struct clk_hw *hw, unsigned long rate, static const struct clk_ops clk_ops_dsi_pll_14nm_postdiv = { .recalc_rate = dsi_pll_14nm_postdiv_recalc_rate, - .round_rate = dsi_pll_14nm_postdiv_round_rate, + .determine_rate = dsi_pll_14nm_postdiv_determine_rate, .set_rate = dsi_pll_14nm_postdiv_set_rate, }; diff --git a/drivers/gpu/drm/msm/dsi/phy/dsi_phy_28nm.c b/drivers/gpu/drm/msm/dsi/phy/dsi_phy_28nm.c index 90348a2af3e9..d00e415b9a99 100644 --- a/drivers/gpu/drm/msm/dsi/phy/dsi_phy_28nm.c +++ b/drivers/gpu/drm/msm/dsi/phy/dsi_phy_28nm.c @@ -533,21 +533,20 @@ static void dsi_pll_28nm_vco_unprepare(struct clk_hw *hw) pll_28nm->phy->pll_on = false; } -static long dsi_pll_28nm_clk_round_rate(struct clk_hw *hw, - unsigned long rate, unsigned long *parent_rate) +static int dsi_pll_28nm_clk_determine_rate(struct clk_hw *hw, + struct clk_rate_request *req) { struct dsi_pll_28nm *pll_28nm = to_pll_28nm(hw); - if (rate < pll_28nm->phy->cfg->min_pll_rate) - return pll_28nm->phy->cfg->min_pll_rate; - else if (rate > pll_28nm->phy->cfg->max_pll_rate) - return pll_28nm->phy->cfg->max_pll_rate; - else - return rate; + req->rate = clamp_t(unsigned long, req->rate, + pll_28nm->phy->cfg->min_pll_rate, + pll_28nm->phy->cfg->max_pll_rate); + + return 0; } static const struct clk_ops clk_ops_dsi_pll_28nm_vco_hpm = { - .round_rate = dsi_pll_28nm_clk_round_rate, + .determine_rate = dsi_pll_28nm_clk_determine_rate, .set_rate = dsi_pll_28nm_clk_set_rate, .recalc_rate = dsi_pll_28nm_clk_recalc_rate, .prepare = dsi_pll_28nm_vco_prepare_hpm, @@ -556,7 +555,7 @@ static const struct clk_ops clk_ops_dsi_pll_28nm_vco_hpm = { }; static const struct clk_ops clk_ops_dsi_pll_28nm_vco_lp = { - .round_rate = dsi_pll_28nm_clk_round_rate, + .determine_rate = dsi_pll_28nm_clk_determine_rate, .set_rate = dsi_pll_28nm_clk_set_rate, .recalc_rate = dsi_pll_28nm_clk_recalc_rate, .prepare = dsi_pll_28nm_vco_prepare_lp, @@ -565,7 +564,7 @@ static const struct clk_ops clk_ops_dsi_pll_28nm_vco_lp = { }; static const struct clk_ops clk_ops_dsi_pll_28nm_vco_8226 = { - .round_rate = dsi_pll_28nm_clk_round_rate, + .determine_rate = dsi_pll_28nm_clk_determine_rate, .set_rate = dsi_pll_28nm_clk_set_rate, .recalc_rate = dsi_pll_28nm_clk_recalc_rate, .prepare = dsi_pll_28nm_vco_prepare_8226, diff --git a/drivers/gpu/drm/msm/dsi/phy/dsi_phy_28nm_8960.c b/drivers/gpu/drm/msm/dsi/phy/dsi_phy_28nm_8960.c index f3643320ff2f..8dcce9581dc3 100644 --- a/drivers/gpu/drm/msm/dsi/phy/dsi_phy_28nm_8960.c +++ b/drivers/gpu/drm/msm/dsi/phy/dsi_phy_28nm_8960.c @@ -231,21 +231,19 @@ static void dsi_pll_28nm_vco_unprepare(struct clk_hw *hw) pll_28nm->phy->pll_on = false; } -static long dsi_pll_28nm_clk_round_rate(struct clk_hw *hw, - unsigned long rate, unsigned long *parent_rate) +static int dsi_pll_28nm_clk_determine_rate(struct clk_hw *hw, + struct clk_rate_request *req) { struct dsi_pll_28nm *pll_28nm = to_pll_28nm(hw); - if (rate < pll_28nm->phy->cfg->min_pll_rate) - return pll_28nm->phy->cfg->min_pll_rate; - else if (rate > pll_28nm->phy->cfg->max_pll_rate) - return pll_28nm->phy->cfg->max_pll_rate; - else - return rate; + req->rate = clamp_t(unsigned long, req->rate, + pll_28nm->phy->cfg->min_pll_rate, pll_28nm->phy->cfg->max_pll_rate); + + return 0; } static const struct clk_ops clk_ops_dsi_pll_28nm_vco = { - .round_rate = dsi_pll_28nm_clk_round_rate, + .determine_rate = dsi_pll_28nm_clk_determine_rate, .set_rate = dsi_pll_28nm_clk_set_rate, .recalc_rate = dsi_pll_28nm_clk_recalc_rate, .prepare = dsi_pll_28nm_vco_prepare, @@ -296,18 +294,20 @@ static unsigned int get_vco_mul_factor(unsigned long byte_clk_rate) return 8; } -static long clk_bytediv_round_rate(struct clk_hw *hw, unsigned long rate, - unsigned long *prate) +static int clk_bytediv_determine_rate(struct clk_hw *hw, + struct clk_rate_request *req) { unsigned long best_parent; unsigned int factor; - factor = get_vco_mul_factor(rate); + factor = get_vco_mul_factor(req->rate); + + best_parent = req->rate * factor; + req->best_parent_rate = clk_hw_round_rate(clk_hw_get_parent(hw), best_parent); - best_parent = rate * factor; - *prate = clk_hw_round_rate(clk_hw_get_parent(hw), best_parent); + req->rate = req->best_parent_rate / factor; - return *prate / factor; + return 0; } static int clk_bytediv_set_rate(struct clk_hw *hw, unsigned long rate, @@ -328,7 +328,7 @@ static int clk_bytediv_set_rate(struct clk_hw *hw, unsigned long rate, /* Our special byte clock divider ops */ static const struct clk_ops clk_bytediv_ops = { - .round_rate = clk_bytediv_round_rate, + .determine_rate = clk_bytediv_determine_rate, .set_rate = clk_bytediv_set_rate, .recalc_rate = clk_bytediv_recalc_rate, }; diff --git a/drivers/gpu/drm/msm/dsi/phy/dsi_phy_7nm.c b/drivers/gpu/drm/msm/dsi/phy/dsi_phy_7nm.c index 8c98f91a5930..32f06edd21a9 100644 --- a/drivers/gpu/drm/msm/dsi/phy/dsi_phy_7nm.c +++ b/drivers/gpu/drm/msm/dsi/phy/dsi_phy_7nm.c @@ -90,6 +90,13 @@ struct dsi_pll_7nm { /* protects REG_DSI_7nm_PHY_CMN_CLK_CFG1 register */ spinlock_t pclk_mux_lock; + /* + * protects REG_DSI_7nm_PHY_CMN_CTRL_0 register and pll_enable_cnt + * member + */ + spinlock_t pll_enable_lock; + int pll_enable_cnt; + struct pll_7nm_cached_state cached_state; struct dsi_pll_7nm *slave; @@ -103,6 +110,9 @@ struct dsi_pll_7nm { */ static struct dsi_pll_7nm *pll_7nm_list[DSI_MAX]; +static void dsi_pll_enable_pll_bias(struct dsi_pll_7nm *pll); +static void dsi_pll_disable_pll_bias(struct dsi_pll_7nm *pll); + static void dsi_pll_setup_config(struct dsi_pll_config *config) { config->ssc_freq = 31500; @@ -340,6 +350,7 @@ static int dsi_pll_7nm_vco_set_rate(struct clk_hw *hw, unsigned long rate, struct dsi_pll_7nm *pll_7nm = to_pll_7nm(hw); struct dsi_pll_config config; + dsi_pll_enable_pll_bias(pll_7nm); DBG("DSI PLL%d rate=%lu, parent's=%lu", pll_7nm->phy->id, rate, parent_rate); @@ -357,6 +368,7 @@ static int dsi_pll_7nm_vco_set_rate(struct clk_hw *hw, unsigned long rate, dsi_pll_ssc_commit(pll_7nm, &config); + dsi_pll_disable_pll_bias(pll_7nm); /* flush, ensure all register writes are done*/ wmb(); @@ -385,19 +397,47 @@ static int dsi_pll_7nm_lock_status(struct dsi_pll_7nm *pll) static void dsi_pll_disable_pll_bias(struct dsi_pll_7nm *pll) { - u32 data = readl(pll->phy->base + REG_DSI_7nm_PHY_CMN_CTRL_0); + unsigned long flags; + u32 data; + spin_lock_irqsave(&pll->pll_enable_lock, flags); + --pll->pll_enable_cnt; + if (pll->pll_enable_cnt < 0) { + spin_unlock_irqrestore(&pll->pll_enable_lock, flags); + DRM_DEV_ERROR_RATELIMITED(&pll->phy->pdev->dev, + "bug: imbalance in disabling PLL bias\n"); + return; + } else if (pll->pll_enable_cnt > 0) { + spin_unlock_irqrestore(&pll->pll_enable_lock, flags); + return; + } /* else: == 0 */ + + data = readl(pll->phy->base + REG_DSI_7nm_PHY_CMN_CTRL_0); + data &= ~DSI_7nm_PHY_CMN_CTRL_0_PLL_SHUTDOWNB; writel(0, pll->phy->pll_base + REG_DSI_7nm_PHY_PLL_SYSTEM_MUXES); - writel(data & ~BIT(5), pll->phy->base + REG_DSI_7nm_PHY_CMN_CTRL_0); + writel(data, pll->phy->base + REG_DSI_7nm_PHY_CMN_CTRL_0); + spin_unlock_irqrestore(&pll->pll_enable_lock, flags); ndelay(250); } static void dsi_pll_enable_pll_bias(struct dsi_pll_7nm *pll) { - u32 data = readl(pll->phy->base + REG_DSI_7nm_PHY_CMN_CTRL_0); + unsigned long flags; + u32 data; + + spin_lock_irqsave(&pll->pll_enable_lock, flags); + if (pll->pll_enable_cnt++) { + spin_unlock_irqrestore(&pll->pll_enable_lock, flags); + WARN_ON(pll->pll_enable_cnt == INT_MAX); + return; + } + + data = readl(pll->phy->base + REG_DSI_7nm_PHY_CMN_CTRL_0); + data |= DSI_7nm_PHY_CMN_CTRL_0_PLL_SHUTDOWNB; + writel(data, pll->phy->base + REG_DSI_7nm_PHY_CMN_CTRL_0); - writel(data | BIT(5), pll->phy->base + REG_DSI_7nm_PHY_CMN_CTRL_0); writel(0xc0, pll->phy->pll_base + REG_DSI_7nm_PHY_PLL_SYSTEM_MUXES); + spin_unlock_irqrestore(&pll->pll_enable_lock, flags); ndelay(250); } @@ -491,6 +531,10 @@ static int dsi_pll_7nm_vco_prepare(struct clk_hw *hw) if (pll_7nm->slave) dsi_pll_enable_global_clk(pll_7nm->slave); + writel(0x1, pll_7nm->phy->base + REG_DSI_7nm_PHY_CMN_RBUF_CTRL); + if (pll_7nm->slave) + writel(0x1, pll_7nm->slave->phy->base + REG_DSI_7nm_PHY_CMN_RBUF_CTRL); + error: return rc; } @@ -534,6 +578,7 @@ static unsigned long dsi_pll_7nm_vco_recalc_rate(struct clk_hw *hw, u32 dec; u64 pll_freq, tmp64; + dsi_pll_enable_pll_bias(pll_7nm); dec = readl(base + REG_DSI_7nm_PHY_PLL_DECIMAL_DIV_START_1); dec &= 0xff; @@ -558,24 +603,24 @@ static unsigned long dsi_pll_7nm_vco_recalc_rate(struct clk_hw *hw, DBG("DSI PLL%d returning vco rate = %lu, dec = %x, frac = %x", pll_7nm->phy->id, (unsigned long)vco_rate, dec, frac); + dsi_pll_disable_pll_bias(pll_7nm); + return (unsigned long)vco_rate; } -static long dsi_pll_7nm_clk_round_rate(struct clk_hw *hw, - unsigned long rate, unsigned long *parent_rate) +static int dsi_pll_7nm_clk_determine_rate(struct clk_hw *hw, + struct clk_rate_request *req) { struct dsi_pll_7nm *pll_7nm = to_pll_7nm(hw); - if (rate < pll_7nm->phy->cfg->min_pll_rate) - return pll_7nm->phy->cfg->min_pll_rate; - else if (rate > pll_7nm->phy->cfg->max_pll_rate) - return pll_7nm->phy->cfg->max_pll_rate; - else - return rate; + req->rate = clamp_t(unsigned long, req->rate, + pll_7nm->phy->cfg->min_pll_rate, pll_7nm->phy->cfg->max_pll_rate); + + return 0; } static const struct clk_ops clk_ops_dsi_pll_7nm_vco = { - .round_rate = dsi_pll_7nm_clk_round_rate, + .determine_rate = dsi_pll_7nm_clk_determine_rate, .set_rate = dsi_pll_7nm_vco_set_rate, .recalc_rate = dsi_pll_7nm_vco_recalc_rate, .prepare = dsi_pll_7nm_vco_prepare, @@ -593,6 +638,7 @@ static void dsi_7nm_pll_save_state(struct msm_dsi_phy *phy) void __iomem *phy_base = pll_7nm->phy->base; u32 cmn_clk_cfg0, cmn_clk_cfg1; + dsi_pll_enable_pll_bias(pll_7nm); cached->pll_out_div = readl(pll_7nm->phy->pll_base + REG_DSI_7nm_PHY_PLL_PLL_OUTDIV_RATE); cached->pll_out_div &= 0x3; @@ -604,6 +650,7 @@ static void dsi_7nm_pll_save_state(struct msm_dsi_phy *phy) cmn_clk_cfg1 = readl(phy_base + REG_DSI_7nm_PHY_CMN_CLK_CFG1); cached->pll_mux = FIELD_GET(DSI_7nm_PHY_CMN_CLK_CFG1_DSICLK_SEL__MASK, cmn_clk_cfg1); + dsi_pll_disable_pll_bias(pll_7nm); DBG("DSI PLL%d outdiv %x bit_clk_div %x pix_clk_div %x pll_mux %x", pll_7nm->phy->id, cached->pll_out_div, cached->bit_clk_div, cached->pix_clk_div, cached->pll_mux); @@ -826,8 +873,10 @@ static int dsi_pll_7nm_init(struct msm_dsi_phy *phy) spin_lock_init(&pll_7nm->postdiv_lock); spin_lock_init(&pll_7nm->pclk_mux_lock); + spin_lock_init(&pll_7nm->pll_enable_lock); pll_7nm->phy = phy; + phy->pll_data = pll_7nm; ret = pll_7nm_register(pll_7nm, phy->provided_clocks->hws); if (ret) { @@ -839,6 +888,12 @@ static int dsi_pll_7nm_init(struct msm_dsi_phy *phy) /* TODO: Remove this when we have proper display handover support */ msm_dsi_phy_pll_save_state(phy); + /* + * Store also proper vco_current_rate, because its value will be used in + * dsi_7nm_pll_restore_state(). + */ + if (!dsi_pll_7nm_vco_recalc_rate(&pll_7nm->clk_hw, VCO_REF_CLK_RATE)) + pll_7nm->vco_current_rate = pll_7nm->phy->cfg->min_pll_rate; return 0; } @@ -910,8 +965,10 @@ static int dsi_7nm_phy_enable(struct msm_dsi_phy *phy, u32 const delay_us = 5; u32 const timeout_us = 1000; struct msm_dsi_dphy_timing *timing = &phy->timing; + struct dsi_pll_7nm *pll = phy->pll_data; void __iomem *base = phy->base; bool less_than_1500_mhz; + unsigned long flags; u32 vreg_ctrl_0, vreg_ctrl_1, lane_ctrl0; u32 glbl_pemph_ctrl_0; u32 glbl_str_swi_cal_sel_ctrl, glbl_hstx_str_ctrl_0; @@ -1033,9 +1090,13 @@ static int dsi_7nm_phy_enable(struct msm_dsi_phy *phy, glbl_rescode_bot_ctrl = 0x3c; } + spin_lock_irqsave(&pll->pll_enable_lock, flags); + pll->pll_enable_cnt = 1; /* de-assert digital and pll power down */ - data = BIT(6) | BIT(5); + data = DSI_7nm_PHY_CMN_CTRL_0_DIGTOP_PWRDN_B | + DSI_7nm_PHY_CMN_CTRL_0_PLL_SHUTDOWNB; writel(data, base + REG_DSI_7nm_PHY_CMN_CTRL_0); + spin_unlock_irqrestore(&pll->pll_enable_lock, flags); /* Assert PLL core reset */ writel(0x00, base + REG_DSI_7nm_PHY_CMN_PLL_CNTRL); @@ -1148,7 +1209,9 @@ static bool dsi_7nm_set_continuous_clock(struct msm_dsi_phy *phy, bool enable) static void dsi_7nm_phy_disable(struct msm_dsi_phy *phy) { + struct dsi_pll_7nm *pll = phy->pll_data; void __iomem *base = phy->base; + unsigned long flags; u32 data; DBG(""); @@ -1175,8 +1238,12 @@ static void dsi_7nm_phy_disable(struct msm_dsi_phy *phy) writel(data, base + REG_DSI_7nm_PHY_CMN_CTRL_0); writel(0, base + REG_DSI_7nm_PHY_CMN_LANE_CTRL0); + spin_lock_irqsave(&pll->pll_enable_lock, flags); + pll->pll_enable_cnt = 0; /* Turn off all PHY blocks */ writel(0x00, base + REG_DSI_7nm_PHY_CMN_CTRL_0); + spin_unlock_irqrestore(&pll->pll_enable_lock, flags); + /* make sure phy is turned off */ wmb(); diff --git a/drivers/gpu/drm/msm/hdmi/hdmi_phy_8996.c b/drivers/gpu/drm/msm/hdmi/hdmi_phy_8996.c index 8c8d80b59573..36e928b0fd5a 100644 --- a/drivers/gpu/drm/msm/hdmi/hdmi_phy_8996.c +++ b/drivers/gpu/drm/msm/hdmi/hdmi_phy_8996.c @@ -629,16 +629,12 @@ static int hdmi_8996_pll_prepare(struct clk_hw *hw) return 0; } -static long hdmi_8996_pll_round_rate(struct clk_hw *hw, - unsigned long rate, - unsigned long *parent_rate) +static int hdmi_8996_pll_determine_rate(struct clk_hw *hw, + struct clk_rate_request *req) { - if (rate < HDMI_PCLK_MIN_FREQ) - return HDMI_PCLK_MIN_FREQ; - else if (rate > HDMI_PCLK_MAX_FREQ) - return HDMI_PCLK_MAX_FREQ; - else - return rate; + req->rate = clamp_t(unsigned long, req->rate, HDMI_PCLK_MIN_FREQ, HDMI_PCLK_MAX_FREQ); + + return 0; } static unsigned long hdmi_8996_pll_recalc_rate(struct clk_hw *hw, @@ -684,7 +680,7 @@ static int hdmi_8996_pll_is_enabled(struct clk_hw *hw) static const struct clk_ops hdmi_8996_pll_ops = { .set_rate = hdmi_8996_pll_set_clk_rate, - .round_rate = hdmi_8996_pll_round_rate, + .determine_rate = hdmi_8996_pll_determine_rate, .recalc_rate = hdmi_8996_pll_recalc_rate, .prepare = hdmi_8996_pll_prepare, .unprepare = hdmi_8996_pll_unprepare, diff --git a/drivers/gpu/drm/msm/hdmi/hdmi_phy_8998.c b/drivers/gpu/drm/msm/hdmi/hdmi_phy_8998.c index 33bb48ae58a2..a86ff3706369 100644 --- a/drivers/gpu/drm/msm/hdmi/hdmi_phy_8998.c +++ b/drivers/gpu/drm/msm/hdmi/hdmi_phy_8998.c @@ -646,16 +646,12 @@ static int hdmi_8998_pll_prepare(struct clk_hw *hw) return 0; } -static long hdmi_8998_pll_round_rate(struct clk_hw *hw, - unsigned long rate, - unsigned long *parent_rate) +static int hdmi_8998_pll_determine_rate(struct clk_hw *hw, + struct clk_rate_request *req) { - if (rate < HDMI_PCLK_MIN_FREQ) - return HDMI_PCLK_MIN_FREQ; - else if (rate > HDMI_PCLK_MAX_FREQ) - return HDMI_PCLK_MAX_FREQ; - else - return rate; + req->rate = clamp_t(unsigned long, req->rate, HDMI_PCLK_MIN_FREQ, HDMI_PCLK_MAX_FREQ); + + return 0; } static unsigned long hdmi_8998_pll_recalc_rate(struct clk_hw *hw, @@ -688,7 +684,7 @@ static int hdmi_8998_pll_is_enabled(struct clk_hw *hw) static const struct clk_ops hdmi_8998_pll_ops = { .set_rate = hdmi_8998_pll_set_clk_rate, - .round_rate = hdmi_8998_pll_round_rate, + .determine_rate = hdmi_8998_pll_determine_rate, .recalc_rate = hdmi_8998_pll_recalc_rate, .prepare = hdmi_8998_pll_prepare, .unprepare = hdmi_8998_pll_unprepare, diff --git a/drivers/gpu/drm/msm/hdmi/hdmi_pll_8960.c b/drivers/gpu/drm/msm/hdmi/hdmi_pll_8960.c index 83c8781fcc3f..6ba6bbdb7e05 100644 --- a/drivers/gpu/drm/msm/hdmi/hdmi_pll_8960.c +++ b/drivers/gpu/drm/msm/hdmi/hdmi_pll_8960.c @@ -373,12 +373,14 @@ static unsigned long hdmi_pll_recalc_rate(struct clk_hw *hw, return pll->pixclk; } -static long hdmi_pll_round_rate(struct clk_hw *hw, unsigned long rate, - unsigned long *parent_rate) +static int hdmi_pll_determine_rate(struct clk_hw *hw, + struct clk_rate_request *req) { - const struct pll_rate *pll_rate = find_rate(rate); + const struct pll_rate *pll_rate = find_rate(req->rate); + + req->rate = pll_rate->rate; - return pll_rate->rate; + return 0; } static int hdmi_pll_set_rate(struct clk_hw *hw, unsigned long rate, @@ -402,7 +404,7 @@ static const struct clk_ops hdmi_pll_ops = { .enable = hdmi_pll_enable, .disable = hdmi_pll_disable, .recalc_rate = hdmi_pll_recalc_rate, - .round_rate = hdmi_pll_round_rate, + .determine_rate = hdmi_pll_determine_rate, .set_rate = hdmi_pll_set_rate, }; diff --git a/drivers/gpu/drm/msm/msm_drv.c b/drivers/gpu/drm/msm/msm_drv.c index 9dcc7a596a11..7e977fec4100 100644 --- a/drivers/gpu/drm/msm/msm_drv.c +++ b/drivers/gpu/drm/msm/msm_drv.c @@ -826,6 +826,7 @@ static const struct file_operations fops = { #define DRIVER_FEATURES_KMS ( \ DRIVER_GEM | \ + DRIVER_GEM_GPUVA | \ DRIVER_ATOMIC | \ DRIVER_MODESET | \ 0 ) diff --git a/drivers/gpu/drm/msm/msm_drv.h b/drivers/gpu/drm/msm/msm_drv.h index 985db9febd98..6d847d593f1a 100644 --- a/drivers/gpu/drm/msm/msm_drv.h +++ b/drivers/gpu/drm/msm/msm_drv.h @@ -229,7 +229,7 @@ void msm_crtc_disable_vblank(struct drm_crtc *crtc); int msm_register_mmu(struct drm_device *dev, struct msm_mmu *mmu); void msm_unregister_mmu(struct drm_device *dev, struct msm_mmu *mmu); -struct drm_gpuvm *msm_kms_init_vm(struct drm_device *dev); +struct drm_gpuvm *msm_kms_init_vm(struct drm_device *dev, struct device *mdss_dev); bool msm_use_mmu(struct drm_device *dev); int msm_ioctl_gem_submit(struct drm_device *dev, void *data, diff --git a/drivers/gpu/drm/msm/msm_gem.c b/drivers/gpu/drm/msm/msm_gem.c index e7631f4ef530..07d8cdd6bb2e 100644 --- a/drivers/gpu/drm/msm/msm_gem.c +++ b/drivers/gpu/drm/msm/msm_gem.c @@ -191,7 +191,7 @@ static struct page **get_pages(struct drm_gem_object *obj) if (!msm_obj->pages) { struct drm_device *dev = obj->dev; struct page **p; - int npages = obj->size >> PAGE_SHIFT; + size_t npages = obj->size >> PAGE_SHIFT; p = drm_gem_get_pages(obj); @@ -1148,7 +1148,7 @@ static int msm_gem_object_mmap(struct drm_gem_object *obj, struct vm_area_struct /* convenience method to construct a GEM buffer object, and userspace handle */ int msm_gem_new_handle(struct drm_device *dev, struct drm_file *file, - uint32_t size, uint32_t flags, uint32_t *handle, + size_t size, uint32_t flags, uint32_t *handle, char *name) { struct drm_gem_object *obj; @@ -1214,9 +1214,8 @@ static const struct drm_gem_object_funcs msm_gem_object_funcs = { .vm_ops = &vm_ops, }; -static int msm_gem_new_impl(struct drm_device *dev, - uint32_t size, uint32_t flags, - struct drm_gem_object **obj) +static int msm_gem_new_impl(struct drm_device *dev, uint32_t flags, + struct drm_gem_object **obj) { struct msm_drm_private *priv = dev->dev_private; struct msm_gem_object *msm_obj; @@ -1250,7 +1249,7 @@ static int msm_gem_new_impl(struct drm_device *dev, return 0; } -struct drm_gem_object *msm_gem_new(struct drm_device *dev, uint32_t size, uint32_t flags) +struct drm_gem_object *msm_gem_new(struct drm_device *dev, size_t size, uint32_t flags) { struct msm_drm_private *priv = dev->dev_private; struct msm_gem_object *msm_obj; @@ -1265,7 +1264,7 @@ struct drm_gem_object *msm_gem_new(struct drm_device *dev, uint32_t size, uint32 if (size == 0) return ERR_PTR(-EINVAL); - ret = msm_gem_new_impl(dev, size, flags, &obj); + ret = msm_gem_new_impl(dev, flags, &obj); if (ret) return ERR_PTR(ret); @@ -1305,12 +1304,12 @@ struct drm_gem_object *msm_gem_import(struct drm_device *dev, struct msm_drm_private *priv = dev->dev_private; struct msm_gem_object *msm_obj; struct drm_gem_object *obj; - uint32_t size; - int ret, npages; + size_t size, npages; + int ret; size = PAGE_ALIGN(dmabuf->size); - ret = msm_gem_new_impl(dev, size, MSM_BO_WC, &obj); + ret = msm_gem_new_impl(dev, MSM_BO_WC, &obj); if (ret) return ERR_PTR(ret); @@ -1353,7 +1352,7 @@ fail: return ERR_PTR(ret); } -void *msm_gem_kernel_new(struct drm_device *dev, uint32_t size, uint32_t flags, +void *msm_gem_kernel_new(struct drm_device *dev, size_t size, uint32_t flags, struct drm_gpuvm *vm, struct drm_gem_object **bo, uint64_t *iova) { diff --git a/drivers/gpu/drm/msm/msm_gem.h b/drivers/gpu/drm/msm/msm_gem.h index 751c3b4965bc..a4cf31853c50 100644 --- a/drivers/gpu/drm/msm/msm_gem.h +++ b/drivers/gpu/drm/msm/msm_gem.h @@ -297,10 +297,10 @@ bool msm_gem_active(struct drm_gem_object *obj); int msm_gem_cpu_prep(struct drm_gem_object *obj, uint32_t op, ktime_t *timeout); int msm_gem_cpu_fini(struct drm_gem_object *obj); int msm_gem_new_handle(struct drm_device *dev, struct drm_file *file, - uint32_t size, uint32_t flags, uint32_t *handle, char *name); + size_t size, uint32_t flags, uint32_t *handle, char *name); struct drm_gem_object *msm_gem_new(struct drm_device *dev, - uint32_t size, uint32_t flags); -void *msm_gem_kernel_new(struct drm_device *dev, uint32_t size, uint32_t flags, + size_t size, uint32_t flags); +void *msm_gem_kernel_new(struct drm_device *dev, size_t size, uint32_t flags, struct drm_gpuvm *vm, struct drm_gem_object **bo, uint64_t *iova); void msm_gem_kernel_put(struct drm_gem_object *bo, struct drm_gpuvm *vm); diff --git a/drivers/gpu/drm/msm/msm_gem_prime.c b/drivers/gpu/drm/msm/msm_gem_prime.c index c0a33ac839cb..036d34c674d9 100644 --- a/drivers/gpu/drm/msm/msm_gem_prime.c +++ b/drivers/gpu/drm/msm/msm_gem_prime.c @@ -15,7 +15,7 @@ struct sg_table *msm_gem_prime_get_sg_table(struct drm_gem_object *obj) { struct msm_gem_object *msm_obj = to_msm_bo(obj); - int npages = obj->size >> PAGE_SHIFT; + size_t npages = obj->size >> PAGE_SHIFT; if (msm_obj->flags & MSM_BO_NO_SHARE) return ERR_PTR(-EINVAL); diff --git a/drivers/gpu/drm/msm/msm_gem_vma.c b/drivers/gpu/drm/msm/msm_gem_vma.c index 6df6b7c0984d..8316af1723c2 100644 --- a/drivers/gpu/drm/msm/msm_gem_vma.c +++ b/drivers/gpu/drm/msm/msm_gem_vma.c @@ -1030,6 +1030,7 @@ vm_bind_job_lookup_ops(struct msm_vm_bind_job *job, struct drm_msm_vm_bind *args struct drm_device *dev = job->vm->drm; int ret = 0; int cnt = 0; + int i = -1; if (args->nr_ops == 1) { /* Single op case, the op is inlined: */ @@ -1063,11 +1064,12 @@ vm_bind_job_lookup_ops(struct msm_vm_bind_job *job, struct drm_msm_vm_bind *args spin_lock(&file->table_lock); - for (unsigned i = 0; i < args->nr_ops; i++) { + for (i = 0; i < args->nr_ops; i++) { + struct msm_vm_bind_op *op = &job->ops[i]; struct drm_gem_object *obj; - if (!job->ops[i].handle) { - job->ops[i].obj = NULL; + if (!op->handle) { + op->obj = NULL; continue; } @@ -1075,16 +1077,22 @@ vm_bind_job_lookup_ops(struct msm_vm_bind_job *job, struct drm_msm_vm_bind *args * normally use drm_gem_object_lookup(), but for bulk lookup * all under single table_lock just hit object_idr directly: */ - obj = idr_find(&file->object_idr, job->ops[i].handle); + obj = idr_find(&file->object_idr, op->handle); if (!obj) { - ret = UERR(EINVAL, dev, "invalid handle %u at index %u\n", job->ops[i].handle, i); + ret = UERR(EINVAL, dev, "invalid handle %u at index %u\n", op->handle, i); goto out_unlock; } drm_gem_object_get(obj); - job->ops[i].obj = obj; + op->obj = obj; cnt++; + + if ((op->range + op->obj_offset) > obj->size) { + ret = UERR(EINVAL, dev, "invalid range: %016llx + %016llx > %016zx\n", + op->range, op->obj_offset, obj->size); + goto out_unlock; + } } *nr_bos = cnt; @@ -1092,6 +1100,17 @@ vm_bind_job_lookup_ops(struct msm_vm_bind_job *job, struct drm_msm_vm_bind *args out_unlock: spin_unlock(&file->table_lock); + if (ret) { + for (; i >= 0; i--) { + struct msm_vm_bind_op *op = &job->ops[i]; + + if (!op->obj) + continue; + + drm_gem_object_put(op->obj); + op->obj = NULL; + } + } out: return ret; } diff --git a/drivers/gpu/drm/msm/msm_gpu.c b/drivers/gpu/drm/msm/msm_gpu.c index 26c5ce897cbb..17759abc46d7 100644 --- a/drivers/gpu/drm/msm/msm_gpu.c +++ b/drivers/gpu/drm/msm/msm_gpu.c @@ -304,7 +304,7 @@ static void crashstate_get_bos(struct msm_gpu_state *state, struct msm_gem_submi sizeof(struct msm_gpu_state_bo), GFP_KERNEL); for (int i = 0; state->bos && i < submit->nr_bos; i++) { - struct drm_gem_object *obj = submit->bos[i].obj;; + struct drm_gem_object *obj = submit->bos[i].obj; bool dump = rd_full || (submit->bos[i].flags & MSM_SUBMIT_BO_DUMP); msm_gem_lock(obj); diff --git a/drivers/gpu/drm/msm/msm_gpu.h b/drivers/gpu/drm/msm/msm_gpu.h index b2a96544f92a..a597f2bee30b 100644 --- a/drivers/gpu/drm/msm/msm_gpu.h +++ b/drivers/gpu/drm/msm/msm_gpu.h @@ -16,6 +16,7 @@ #include "msm_drv.h" #include "msm_fence.h" +#include "msm_gpu_trace.h" #include "msm_ringbuffer.h" #include "msm_gem.h" @@ -91,6 +92,7 @@ struct msm_gpu_funcs { * for cmdstream that is buffered in this FIFO upstream of the CP fw. */ bool (*progress)(struct msm_gpu *gpu, struct msm_ringbuffer *ring); + void (*sysprof_setup)(struct msm_gpu *gpu); }; /* Additional state for iommu faults: */ @@ -613,16 +615,19 @@ struct msm_gpu_state { static inline void gpu_write(struct msm_gpu *gpu, u32 reg, u32 data) { + trace_msm_gpu_regaccess(reg); writel(data, gpu->mmio + (reg << 2)); } static inline u32 gpu_read(struct msm_gpu *gpu, u32 reg) { + trace_msm_gpu_regaccess(reg); return readl(gpu->mmio + (reg << 2)); } static inline void gpu_rmw(struct msm_gpu *gpu, u32 reg, u32 mask, u32 or) { + trace_msm_gpu_regaccess(reg); msm_rmw(gpu->mmio + (reg << 2), mask, or); } @@ -644,7 +649,9 @@ static inline u64 gpu_read64(struct msm_gpu *gpu, u32 reg) * when the lo is read, so make sure to read the lo first to trigger * that */ + trace_msm_gpu_regaccess(reg); val = (u64) readl(gpu->mmio + (reg << 2)); + trace_msm_gpu_regaccess(reg+1); val |= ((u64) readl(gpu->mmio + ((reg + 1) << 2)) << 32); return val; @@ -652,8 +659,10 @@ static inline u64 gpu_read64(struct msm_gpu *gpu, u32 reg) static inline void gpu_write64(struct msm_gpu *gpu, u32 reg, u64 val) { + trace_msm_gpu_regaccess(reg); /* Why not a writeq here? Read the screed above */ writel(lower_32_bits(val), gpu->mmio + (reg << 2)); + trace_msm_gpu_regaccess(reg+1); writel(upper_32_bits(val), gpu->mmio + ((reg + 1) << 2)); } diff --git a/drivers/gpu/drm/msm/msm_gpu_trace.h b/drivers/gpu/drm/msm/msm_gpu_trace.h index 781bbe5540bd..5417f8d389a3 100644 --- a/drivers/gpu/drm/msm/msm_gpu_trace.h +++ b/drivers/gpu/drm/msm/msm_gpu_trace.h @@ -219,6 +219,18 @@ TRACE_EVENT(msm_mmu_prealloc_cleanup, TP_printk("count=%u, remaining=%u", __entry->count, __entry->remaining) ); +TRACE_EVENT(msm_gpu_regaccess, + TP_PROTO(u32 offset), + TP_ARGS(offset), + TP_STRUCT__entry( + __field(u32, offset) + ), + TP_fast_assign( + __entry->offset = offset; + ), + TP_printk("offset=0x%x", __entry->offset) +); + #endif #undef TRACE_INCLUDE_PATH diff --git a/drivers/gpu/drm/msm/msm_iommu.c b/drivers/gpu/drm/msm/msm_iommu.c index 76cdd5ea06a0..0e18619f96cb 100644 --- a/drivers/gpu/drm/msm/msm_iommu.c +++ b/drivers/gpu/drm/msm/msm_iommu.c @@ -721,7 +721,7 @@ struct msm_mmu *msm_iommu_new(struct device *dev, unsigned long quirks) int ret; if (!device_iommu_mapped(dev)) - return NULL; + return ERR_PTR(-ENODEV); domain = iommu_paging_domain_alloc(dev); if (IS_ERR(domain)) @@ -756,7 +756,7 @@ struct msm_mmu *msm_iommu_disp_new(struct device *dev, unsigned long quirks) struct msm_mmu *mmu; mmu = msm_iommu_new(dev, quirks); - if (IS_ERR_OR_NULL(mmu)) + if (IS_ERR(mmu)) return mmu; iommu = to_msm_iommu(mmu); @@ -772,11 +772,11 @@ struct msm_mmu *msm_iommu_gpu_new(struct device *dev, struct msm_gpu *gpu, unsig struct msm_mmu *mmu; mmu = msm_iommu_new(dev, quirks); - if (IS_ERR_OR_NULL(mmu)) + if (IS_ERR(mmu)) return mmu; iommu = to_msm_iommu(mmu); - if (adreno_smmu && adreno_smmu->cookie) { + if (adreno_smmu->cookie) { const struct io_pgtable_cfg *cfg = adreno_smmu->get_ttbr1_cfg(adreno_smmu->cookie); size_t tblsz = get_tblsz(cfg); diff --git a/drivers/gpu/drm/msm/msm_kms.c b/drivers/gpu/drm/msm/msm_kms.c index 56828d218e88..6e5e94f5c9a7 100644 --- a/drivers/gpu/drm/msm/msm_kms.c +++ b/drivers/gpu/drm/msm/msm_kms.c @@ -177,12 +177,11 @@ static int msm_kms_fault_handler(void *arg, unsigned long iova, int flags, void return -ENOSYS; } -struct drm_gpuvm *msm_kms_init_vm(struct drm_device *dev) +struct drm_gpuvm *msm_kms_init_vm(struct drm_device *dev, struct device *mdss_dev) { struct drm_gpuvm *vm; struct msm_mmu *mmu; struct device *mdp_dev = dev->dev; - struct device *mdss_dev = mdp_dev->parent; struct msm_drm_private *priv = dev->dev_private; struct msm_kms *kms = priv->kms; struct device *iommu_dev; @@ -193,18 +192,17 @@ struct drm_gpuvm *msm_kms_init_vm(struct drm_device *dev) */ if (device_iommu_mapped(mdp_dev)) iommu_dev = mdp_dev; - else + else if (mdss_dev && device_iommu_mapped(mdss_dev)) iommu_dev = mdss_dev; + else { + drm_info(dev, "no IOMMU, bailing out\n"); + return ERR_PTR(-ENODEV); + } mmu = msm_iommu_disp_new(iommu_dev, 0); if (IS_ERR(mmu)) return ERR_CAST(mmu); - if (!mmu) { - drm_info(dev, "no IOMMU, fallback to phys contig buffers for scanout\n"); - return NULL; - } - vm = msm_gem_vm_create(dev, mmu, "mdp_kms", 0x1000, 0x100000000 - 0x1000, true); if (IS_ERR(vm)) { diff --git a/drivers/gpu/drm/msm/msm_mdss.c b/drivers/gpu/drm/msm/msm_mdss.c index 39885b333910..2d0e3e784c04 100644 --- a/drivers/gpu/drm/msm/msm_mdss.c +++ b/drivers/gpu/drm/msm/msm_mdss.c @@ -154,8 +154,7 @@ static int _msm_mdss_irq_domain_add(struct msm_mdss *msm_mdss) dev = msm_mdss->dev; - domain = irq_domain_create_linear(of_fwnode_handle(dev->of_node), 32, - &msm_mdss_irqdomain_ops, msm_mdss); + domain = irq_domain_create_linear(dev_fwnode(dev), 32, &msm_mdss_irqdomain_ops, msm_mdss); if (!domain) { dev_err(dev, "failed to add irq_domain\n"); return -EINVAL; diff --git a/drivers/gpu/drm/msm/msm_submitqueue.c b/drivers/gpu/drm/msm/msm_submitqueue.c index 8617a82cd6b3..d53dfad16bde 100644 --- a/drivers/gpu/drm/msm/msm_submitqueue.c +++ b/drivers/gpu/drm/msm/msm_submitqueue.c @@ -40,6 +40,10 @@ int msm_context_set_sysprof(struct msm_context *ctx, struct msm_gpu *gpu, int sy break; } + /* Some gpu families require additional setup for sysprof */ + if (gpu->funcs->sysprof_setup) + gpu->funcs->sysprof_setup(gpu); + ctx->sysprof = sysprof; return 0; diff --git a/drivers/gpu/drm/msm/registers/adreno/a6xx.xml b/drivers/gpu/drm/msm/registers/adreno/a6xx.xml index 86fab2750ba7..9459b6038217 100644 --- a/drivers/gpu/drm/msm/registers/adreno/a6xx.xml +++ b/drivers/gpu/drm/msm/registers/adreno/a6xx.xml @@ -814,7 +814,7 @@ by a particular renderpass/blit. <bitfield name="Y" low="16" high="29" type="uint"/> </bitset> - <reg32 offset="0x8000" name="GRAS_CL_CNTL" usage="rp_blit"> + <bitset name="a6xx_gras_cl_cntl" inline="yes"> <bitfield name="CLIP_DISABLE" pos="0" type="boolean"/> <bitfield name="ZNEAR_CLIP_DISABLE" pos="1" type="boolean"/> <bitfield name="ZFAR_CLIP_DISABLE" pos="2" type="boolean"/> @@ -826,18 +826,20 @@ by a particular renderpass/blit. <bitfield name="VP_CLIP_CODE_IGNORE" pos="7" type="boolean"/> <bitfield name="VP_XFORM_DISABLE" pos="8" type="boolean"/> <bitfield name="PERSP_DIVISION_DISABLE" pos="9" type="boolean"/> - </reg32> + </bitset> + + <reg32 offset="0x8000" name="GRAS_CL_CNTL" type="a6xx_gras_cl_cntl" variants="A6XX-A7XX" usage="rp_blit"/> <bitset name="a6xx_gras_xs_clip_cull_distance" inline="yes"> <bitfield name="CLIP_MASK" low="0" high="7"/> <bitfield name="CULL_MASK" low="8" high="15"/> </bitset> - <reg32 offset="0x8001" name="GRAS_CL_VS_CLIP_CULL_DISTANCE" type="a6xx_gras_xs_clip_cull_distance" usage="rp_blit"/> - <reg32 offset="0x8002" name="GRAS_CL_DS_CLIP_CULL_DISTANCE" type="a6xx_gras_xs_clip_cull_distance" usage="rp_blit"/> - <reg32 offset="0x8003" name="GRAS_CL_GS_CLIP_CULL_DISTANCE" type="a6xx_gras_xs_clip_cull_distance" usage="rp_blit"/> - <reg32 offset="0x8004" name="GRAS_CL_ARRAY_SIZE" low="0" high="10" type="uint" usage="rp_blit"/> + <reg32 offset="0x8001" name="GRAS_CL_VS_CLIP_CULL_DISTANCE" type="a6xx_gras_xs_clip_cull_distance" usage="rp_blit" variants="A6XX-A7XX" /> + <reg32 offset="0x8002" name="GRAS_CL_DS_CLIP_CULL_DISTANCE" type="a6xx_gras_xs_clip_cull_distance" usage="rp_blit" variants="A6XX-A7XX" /> + <reg32 offset="0x8003" name="GRAS_CL_GS_CLIP_CULL_DISTANCE" type="a6xx_gras_xs_clip_cull_distance" usage="rp_blit" variants="A6XX-A7XX" /> + <reg32 offset="0x8004" name="GRAS_CL_ARRAY_SIZE" low="0" high="10" type="uint" usage="rp_blit" variants="A6XX-A7XX" /> - <reg32 offset="0x8005" name="GRAS_CL_INTERP_CNTL" usage="rp_blit"> + <bitset name="a6xx_gras_cl_interp_cntl" inline="yes"> <!-- see also RB_INTERP_CNTL --> <bitfield name="IJ_PERSP_PIXEL" pos="0" type="boolean"/> <bitfield name="IJ_PERSP_CENTROID" pos="1" type="boolean"/> @@ -848,26 +850,69 @@ by a particular renderpass/blit. <bitfield name="COORD_MASK" low="6" high="9" type="hex"/> <bitfield name="UNK10" pos="10" type="boolean" variants="A7XX-"/> <bitfield name="UNK11" pos="11" type="boolean" variants="A7XX-"/> - </reg32> - <reg32 offset="0x8006" name="GRAS_CL_GUARDBAND_CLIP_ADJ" usage="rp_blit"> + </bitset> + + <reg32 offset="0x8005" name="GRAS_CL_INTERP_CNTL" type="a6xx_gras_cl_interp_cntl" variants="A6XX-A7XX" usage="rp_blit"/> + + <bitset name="a6xx_gras_cl_guardband_clip_adj" inline="true"> <bitfield name="HORZ" low="0" high="8" type="uint"/> <bitfield name="VERT" low="10" high="18" type="uint"/> - </reg32> + </bitset> + + <reg32 offset="0x8006" name="GRAS_CL_GUARDBAND_CLIP_ADJ" type="a6xx_gras_cl_guardband_clip_adj" variants="A6XX-A7XX" usage="rp_blit"/> <!-- Something connected to depth-stencil attachment size --> <reg32 offset="0x8007" name="GRAS_UNKNOWN_8007" variants="A7XX-" usage="rp_blit"/> - <reg32 offset="0x8008" name="GRAS_UNKNOWN_8008" variants="A7XX-" usage="cmd"/> + <!-- the scale/offset is per view, with up to 6 views --> + <bitset name="a6xx_gras_bin_foveat" inline="yes"> + <bitfield name="BINSCALEEN" pos="6" type="boolean"/> + <enum name="a7xx_bin_scale"> + <value value="0" name="NOSCALE"/> + <value value="1" name="SCALE2X"/> + <value value="2" name="SCALE4X"/> + </enum> + <bitfield name="XSCALE_0" low="8" high="9" type="a7xx_bin_scale"/> + <bitfield name="YSCALE_0" low="10" high="11" type="a7xx_bin_scale"/> + <bitfield name="XSCALE_1" low="12" high="13" type="a7xx_bin_scale"/> + <bitfield name="YSCALE_1" low="14" high="15" type="a7xx_bin_scale"/> + <bitfield name="XSCALE_2" low="16" high="17" type="a7xx_bin_scale"/> + <bitfield name="YSCALE_2" low="18" high="19" type="a7xx_bin_scale"/> + <bitfield name="XSCALE_3" low="20" high="21" type="a7xx_bin_scale"/> + <bitfield name="YSCALE_3" low="22" high="23" type="a7xx_bin_scale"/> + <bitfield name="XSCALE_4" low="24" high="25" type="a7xx_bin_scale"/> + <bitfield name="YSCALE_4" low="26" high="27" type="a7xx_bin_scale"/> + <bitfield name="XSCALE_5" low="28" high="29" type="a7xx_bin_scale"/> + <bitfield name="YSCALE_5" low="30" high="31" type="a7xx_bin_scale"/> + </bitset> - <reg32 offset="0x8009" name="GRAS_UNKNOWN_8009" variants="A7XX-" usage="cmd"/> - <reg32 offset="0x800a" name="GRAS_UNKNOWN_800A" variants="A7XX-" usage="cmd"/> - <reg32 offset="0x800b" name="GRAS_UNKNOWN_800B" variants="A7XX-" usage="cmd"/> - <reg32 offset="0x800c" name="GRAS_UNKNOWN_800C" variants="A7XX-" usage="cmd"/> + <reg32 offset="0x8008" name="GRAS_BIN_FOVEAT" type="a6xx_gras_bin_foveat" variants="A7XX" usage="cmd"/> + + <reg32 offset="0x8009" name="GRAS_BIN_FOVEAT_OFFSET_0" variants="A7XX-" usage="cmd"> + <bitfield name="XOFFSET_0" low="0" high="9" shr="2" type="uint"/> + <bitfield name="XOFFSET_1" low="10" high="19" shr="2" type="uint"/> + <bitfield name="XOFFSET_2" low="20" high="29" shr="2" type="uint"/> + </reg32> + <reg32 offset="0x800a" name="GRAS_BIN_FOVEAT_OFFSET_1" variants="A7XX-" usage="cmd"> + <bitfield name="XOFFSET_3" low="0" high="9" shr="2" type="uint"/> + <bitfield name="XOFFSET_4" low="10" high="19" shr="2" type="uint"/> + <bitfield name="XOFFSET_5" low="20" high="29" shr="2" type="uint"/> + </reg32> + <reg32 offset="0x800b" name="GRAS_BIN_FOVEAT_OFFSET_2" variants="A7XX-" usage="cmd"> + <bitfield name="YOFFSET_0" low="0" high="9" shr="2" type="uint"/> + <bitfield name="YOFFSET_1" low="10" high="19" shr="2" type="uint"/> + <bitfield name="YOFFSET_2" low="20" high="29" shr="2" type="uint"/> + </reg32> + <reg32 offset="0x800c" name="GRAS_BIN_FOVEAT_OFFSET_3" variants="A7XX-" usage="cmd"> + <bitfield name="YOFFSET_3" low="0" high="9" shr="2" type="uint"/> + <bitfield name="YOFFSET_4" low="10" high="19" shr="2" type="uint"/> + <bitfield name="YOFFSET_5" low="20" high="29" shr="2" type="uint"/> + </reg32> <!-- <reg32 offset="0x80f0" name="GRAS_UNKNOWN_80F0" type="a6xx_reg_xy"/> --> <!-- 0x8006-0x800f invalid --> - <array offset="0x8010" name="GRAS_CL_VIEWPORT" stride="6" length="16" usage="rp_blit"> + <array offset="0x8010" name="GRAS_CL_VIEWPORT" stride="6" length="16" variants="A6XX-A7XX" usage="rp_blit"> <reg32 offset="0" name="XOFFSET" type="float"/> <reg32 offset="1" name="XSCALE" type="float"/> <reg32 offset="2" name="YOFFSET" type="float"/> @@ -875,12 +920,13 @@ by a particular renderpass/blit. <reg32 offset="4" name="ZOFFSET" type="float"/> <reg32 offset="5" name="ZSCALE" type="float"/> </array> - <array offset="0x8070" name="GRAS_CL_VIEWPORT_ZCLAMP" stride="2" length="16" usage="rp_blit"> + + <array offset="0x8070" name="GRAS_CL_VIEWPORT_ZCLAMP" stride="2" length="16" variants="A6XX-A7XX" usage="rp_blit"> <reg32 offset="0" name="MIN" type="float"/> <reg32 offset="1" name="MAX" type="float"/> </array> - <reg32 offset="0x8090" name="GRAS_SU_CNTL" usage="rp_blit"> + <bitset name="a6xx_gras_su_cntl" varset="chip"> <bitfield name="CULL_FRONT" pos="0" type="boolean"/> <bitfield name="CULL_BACK" pos="1" type="boolean"/> <bitfield name="FRONT_CW" pos="2" type="boolean"/> @@ -890,39 +936,66 @@ by a particular renderpass/blit. <bitfield name="LINE_MODE" pos="13" type="a5xx_line_mode"/> <bitfield name="UNK15" low="15" high="16"/> <!-- - On gen1 only MULTIVIEW_ENABLE exists. On gen3 we have - the ability to add the view index to either the RT array - index or the viewport index, and it seems that - MULTIVIEW_ENABLE doesn't do anything, instead we need to - set at least one of RENDERTARGETINDEXINCR or - VIEWPORTINDEXINCR to enable multiview. The blob still - sets MULTIVIEW_ENABLE regardless. - TODO: what about gen2 (a640)? + On gen1 only MULTIVIEW_ENABLE exists. On gen3 we have + the ability to add the view index to either the RT array + index or the viewport index, and it seems that + MULTIVIEW_ENABLE doesn't do anything, instead we need to + set at least one of RENDERTARGETINDEXINCR or + VIEWPORTINDEXINCR to enable multiview. The blob still + sets MULTIVIEW_ENABLE regardless. + TODO: what about gen2 (a640)? --> <bitfield name="MULTIVIEW_ENABLE" pos="17" type="boolean"/> - <bitfield name="RENDERTARGETINDEXINCR" pos="18" type="boolean"/> - <bitfield name="VIEWPORTINDEXINCR" pos="19" type="boolean"/> - <bitfield name="UNK20" low="20" high="22"/> - </reg32> - <reg32 offset="0x8091" name="GRAS_SU_POINT_MINMAX" usage="rp_blit"> + <bitfield name="RENDERTARGETINDEXINCR" pos="18" type="boolean" variants="A6XX-A7XX"/> + <bitfield name="VIEWPORTINDEXINCR" pos="19" type="boolean" variants="A6XX-A7XX"/> + <bitfield name="UNK20" low="20" high="22" variants="A6XX-A7XX"/> + </bitset> + <reg32 offset="0x8090" name="GRAS_SU_CNTL" type="a6xx_gras_su_cntl" variants="A6XX-A7XX" usage="rp_blit"/> + + <bitset name="a6xx_gras_su_point_minmax" inline="yes"> <bitfield name="MIN" low="0" high="15" type="ufixed" radix="4"/> <bitfield name="MAX" low="16" high="31" type="ufixed" radix="4"/> - </reg32> - <reg32 offset="0x8092" name="GRAS_SU_POINT_SIZE" low="0" high="15" type="fixed" radix="4" usage="rp_blit"/> + </bitset> + + <reg32 offset="0x8091" name="GRAS_SU_POINT_MINMAX" type="a6xx_gras_su_point_minmax" variants="A6XX-A7XX" usage="rp_blit"/> + <reg32 offset="0x8092" name="GRAS_SU_POINT_SIZE" low="0" high="15" type="fixed" radix="4" variants="A6XX-A7XX" usage="rp_blit"/> + + <bitset name="a6xx_gras_su_depth_cntl" inline="yes"> + <bitfield name="Z_TEST_ENABLE" pos="0" type="boolean"/> + </bitset> + + <reg32 offset="0x8114" name="GRAS_SU_DEPTH_CNTL" variants="A6XX-A7XX" type="a6xx_gras_su_depth_cntl" usage="rp_blit"/> + + <bitset name="a6xx_gras_su_stencil_cntl" inline="yes"> + <bitfield name="STENCIL_ENABLE" pos="0" type="boolean"/> + </bitset> + + <reg32 offset="0x8115" name="GRAS_SU_STENCIL_CNTL" type="a6xx_gras_su_stencil_cntl" variants="A6XX-A7XX" usage="rp_blit"/> + + <bitset name="a6xx_gras_su_render_cntl" inline="yes"> + <bitfield name="FS_DISABLE" pos="7" type="boolean"/> + </bitset> + + <reg32 offset="0x8116" name="GRAS_SU_RENDER_CNTL" type="a6xx_gras_su_render_cntl" variants="A7XX" usage="rp_blit"/> + <!-- 0x8093 invalid --> - <reg32 offset="0x8094" name="GRAS_SU_DEPTH_PLANE_CNTL" usage="rp_blit"> + <bitset name="a6xx_depth_plane_cntl" inline="yes"> <bitfield name="Z_MODE" low="0" high="1" type="a6xx_ztest_mode"/> - </reg32> - <reg32 offset="0x8095" name="GRAS_SU_POLY_OFFSET_SCALE" type="float" usage="rp_blit"/> - <reg32 offset="0x8096" name="GRAS_SU_POLY_OFFSET_OFFSET" type="float" usage="rp_blit"/> - <reg32 offset="0x8097" name="GRAS_SU_POLY_OFFSET_OFFSET_CLAMP" type="float" usage="rp_blit"/> - <!-- duplicates RB_DEPTH_BUFFER_INFO: --> - <reg32 offset="0x8098" name="GRAS_SU_DEPTH_BUFFER_INFO" usage="rp_blit"> + </bitset> + + <reg32 offset="0x8094" name="GRAS_SU_DEPTH_PLANE_CNTL" type="a6xx_depth_plane_cntl" variants="A6XX-A7XX" usage="rp_blit"/> + <reg32 offset="0x8095" name="GRAS_SU_POLY_OFFSET_SCALE" type="float" variants="A6XX-A7XX" usage="rp_blit"/> + <reg32 offset="0x8096" name="GRAS_SU_POLY_OFFSET_OFFSET" type="float" variants="A6XX-A7XX" usage="rp_blit"/> + <reg32 offset="0x8097" name="GRAS_SU_POLY_OFFSET_OFFSET_CLAMP" type="float" variants="A6XX-A7XX" usage="rp_blit"/> + <bitset name="a6xx_depth_buffer_info" inline="yes"> <bitfield name="DEPTH_FORMAT" low="0" high="2" type="a6xx_depth_format"/> <bitfield name="UNK3" pos="3"/> - </reg32> + </bitset> - <reg32 offset="0x8099" name="GRAS_SU_CONSERVATIVE_RAS_CNTL" usage="cmd"> + <!-- duplicates RB_DEPTH_BUFFER_INFO: --> + <reg32 offset="0x8098" name="GRAS_SU_DEPTH_BUFFER_INFO" type="a6xx_depth_buffer_info" variants="A6XX-A7XX" usage="rp_blit"/> + + <bitset name="a6xx_gras_su_conservative_ras_cntl" inline="yes"> <bitfield name="CONSERVATIVERASEN" pos="0" type="boolean"/> <enum name="a6xx_shift_amount"> <value value="0" name="NO_SHIFT"/> @@ -932,7 +1005,10 @@ by a particular renderpass/blit. <bitfield name="SHIFTAMOUNT" low="1" high="2" type="a6xx_shift_amount"/> <bitfield name="INNERCONSERVATIVERASEN" pos="3" type="boolean"/> <bitfield name="UNK4" low="4" high="5"/> - </reg32> + </bitset> + + <reg32 offset="0x8099" name="GRAS_SU_CONSERVATIVE_RAS_CNTL" type="a6xx_gras_su_conservative_ras_cntl" variants="A6XX-A7XX" usage="cmd"/> + <reg32 offset="0x809a" name="GRAS_SU_PATH_RENDERING_CNTL"> <bitfield name="UNK0" pos="0" type="boolean"/> <bitfield name="LINELENGTHEN" pos="1" type="boolean"/> @@ -942,10 +1018,13 @@ by a particular renderpass/blit. <bitfield name="WRITES_LAYER" pos="0" type="boolean"/> <bitfield name="WRITES_VIEW" pos="1" type="boolean"/> </bitset> - <reg32 offset="0x809b" name="GRAS_SU_VS_SIV_CNTL" type="a6xx_gras_us_xs_siv_cntl" usage="rp_blit"/> - <reg32 offset="0x809c" name="GRAS_SU_GS_SIV_CNTL" type="a6xx_gras_us_xs_siv_cntl" usage="rp_blit"/> - <reg32 offset="0x809d" name="GRAS_SU_DS_SIV_CNTL" type="a6xx_gras_us_xs_siv_cntl" usage="rp_blit"/> - <!-- 0x809e/0x809f invalid --> + <reg32 offset="0x809b" name="GRAS_SU_VS_SIV_CNTL" type="a6xx_gras_us_xs_siv_cntl" variants="A6XX-A7XX" usage="rp_blit"/> + <reg32 offset="0x809c" name="GRAS_SU_GS_SIV_CNTL" type="a6xx_gras_us_xs_siv_cntl" variants="A6XX-A7XX" usage="rp_blit"/> + <reg32 offset="0x809d" name="GRAS_SU_DS_SIV_CNTL" type="a6xx_gras_us_xs_siv_cntl" variants="A6XX-A7XX" usage="rp_blit"/> + + <bitset name="a6xx_rast_cntl" inline="yes"> + <bitfield name="MODE" low="0" high="1" type="a6xx_polygon_mode"/> + </bitset> <enum name="a6xx_sequenced_thread_dist"> <value value="0x0" name="DIST_SCREEN_COORD"/> @@ -993,7 +1072,7 @@ by a particular renderpass/blit. <value value="0x3" name="RB_BT"/> </enum> - <reg32 offset="0x80a0" name="GRAS_SC_CNTL" usage="rp_blit"> + <bitset name="a6xx_gras_sc_cntl" inline="yes"> <bitfield name="CCUSINGLECACHELINESIZE" low="0" high="2"/> <bitfield name="SINGLE_PRIM_MODE" low="3" high="4" type="a6xx_single_prim_mode"/> <bitfield name="RASTER_MODE" pos="5" type="a6xx_raster_mode"/> @@ -1003,7 +1082,9 @@ by a particular renderpass/blit. <bitfield name="UNK9" pos="9" type="boolean"/> <bitfield name="ROTATION" low="10" high="11" type="uint"/> <bitfield name="EARLYVIZOUTEN" pos="12" type="boolean"/> - </reg32> + </bitset> + + <reg32 offset="0x80a0" name="GRAS_SC_CNTL" type="a6xx_gras_sc_cntl" variants="A6XX-A7XX" usage="rp_blit"/> <enum name="a6xx_render_mode"> <value value="0x0" name="RENDERING_PASS"/> @@ -1024,7 +1105,7 @@ by a particular renderpass/blit. <value value="0x4" name="LRZ_FEEDBACK_LATE_Z"/> </enum> - <reg32 offset="0x80a1" name="GRAS_SC_BIN_CNTL" usage="rp_blit"> + <bitset name="a6xx_bin_cntl" inline="yes"> <bitfield name="BINW" low="0" high="5" shr="5" type="uint"/> <bitfield name="BINH" low="8" high="14" shr="4" type="uint"/> <bitfield name="RENDER_MODE" low="18" high="20" type="a6xx_render_mode"/> @@ -1037,18 +1118,25 @@ by a particular renderpass/blit. In sysmem mode GRAS_LRZ_CNTL.LRZ_WRITE is not considered. </doc> <bitfield name="LRZ_FEEDBACK_ZMODE_MASK" low="24" high="26" type="a6xx_lrz_feedback_mask"/> - <bitfield name="UNK27" pos="27"/> - </reg32> + <bitfield name="FORCE_LRZ_DIS" pos="27" type="boolean"/> + </bitset> + + <reg32 offset="0x80a1" name="GRAS_SC_BIN_CNTL" type="a6xx_bin_cntl" variants="A6XX-A7XX" usage="rp_blit"/> - <reg32 offset="0x80a2" name="GRAS_SC_RAS_MSAA_CNTL" usage="rp_blit"> + <bitset name="a6xx_gras_sc_ras_msaa_cntl" inline="yes"> <bitfield name="SAMPLES" low="0" high="1" type="a3xx_msaa_samples"/> <bitfield name="UNK2" pos="2"/> <bitfield name="UNK3" pos="3"/> - </reg32> - <reg32 offset="0x80a3" name="GRAS_SC_DEST_MSAA_CNTL" usage="rp_blit"> + </bitset> + + <reg32 offset="0x80a2" name="GRAS_SC_RAS_MSAA_CNTL" type="a6xx_gras_sc_ras_msaa_cntl" variants="A6XX-A7XX" usage="rp_blit"/> + + <bitset name="a6xx_gras_sc_dest_msaa_cntl" inline="yes"> <bitfield name="SAMPLES" low="0" high="1" type="a3xx_msaa_samples"/> <bitfield name="MSAA_DISABLE" pos="2" type="boolean"/> - </reg32> + </bitset> + + <reg32 offset="0x80a3" name="GRAS_SC_DEST_MSAA_CNTL" type="a6xx_gras_sc_dest_msaa_cntl" variants="A6XX-A7XX" usage="rp_blit"/> <bitset name="a6xx_msaa_sample_pos_cntl" inline="yes"> <bitfield name="UNK0" pos="0"/> @@ -1066,30 +1154,35 @@ by a particular renderpass/blit. <bitfield name="SAMPLE_3_Y" low="28" high="31" radix="4" type="fixed"/> </bitset> - <reg32 offset="0x80a4" name="GRAS_SC_MSAA_SAMPLE_POS_CNTL" type="a6xx_msaa_sample_pos_cntl" usage="rp_blit"/> - <reg32 offset="0x80a5" name="GRAS_SC_PROGRAMMABLE_MSAA_POS_0" type="a6xx_programmable_msaa_pos" usage="rp_blit"/> - <reg32 offset="0x80a6" name="GRAS_SC_PROGRAMMABLE_MSAA_POS_1" type="a6xx_programmable_msaa_pos" usage="rp_blit"/> + <reg32 offset="0x80a4" name="GRAS_SC_MSAA_SAMPLE_POS_CNTL" type="a6xx_msaa_sample_pos_cntl" variants="A6XX-A7XX" usage="rp_blit"/> + <reg32 offset="0x80a5" name="GRAS_SC_PROGRAMMABLE_MSAA_POS_0" type="a6xx_programmable_msaa_pos" variants="A6XX-A7XX" usage="rp_blit"/> + <reg32 offset="0x80a6" name="GRAS_SC_PROGRAMMABLE_MSAA_POS_1" type="a6xx_programmable_msaa_pos" variants="A6XX-A7XX" usage="rp_blit"/> - <reg32 offset="0x80a7" name="GRAS_UNKNOWN_80A7" variants="A7XX-" usage="cmd"/> + <reg32 offset="0x80a7" name="GRAS_ROTATION_CNTL" variants="A7XX" usage="cmd"/> - <!-- 0x80a7-0x80ae invalid --> - <reg32 offset="0x80af" name="GRAS_UNKNOWN_80AF" pos="0" usage="cmd"/> + <bitset name="a6xx_screen_scissor_cntl" inline="yes"> + <bitfield name="SCISSOR_DISABLE" pos="0" type="boolean"/> + </bitset> + + <reg32 offset="0x80af" name="GRAS_SC_SCREEN_SCISSOR_CNTL" type="a6xx_screen_scissor_cntl" variants="A6XX-A7XX" pos="0" usage="cmd"/> <bitset name="a6xx_scissor_xy" inline="yes"> <bitfield name="X" low="0" high="15" type="uint"/> <bitfield name="Y" low="16" high="31" type="uint"/> </bitset> - <array offset="0x80b0" name="GRAS_SC_SCREEN_SCISSOR" stride="2" length="16" usage="rp_blit"> + + <array offset="0x80b0" name="GRAS_SC_SCREEN_SCISSOR" stride="2" length="16" variants="A6XX-A7XX" usage="rp_blit"> <reg32 offset="0" name="TL" type="a6xx_scissor_xy"/> <reg32 offset="1" name="BR" type="a6xx_scissor_xy"/> </array> - <array offset="0x80d0" name="GRAS_SC_VIEWPORT_SCISSOR" stride="2" length="16" usage="rp_blit"> + + <array offset="0x80d0" name="GRAS_SC_VIEWPORT_SCISSOR" stride="2" length="16" variants="A6XX-A7XX" usage="rp_blit"> <reg32 offset="0" name="TL" type="a6xx_scissor_xy"/> <reg32 offset="1" name="BR" type="a6xx_scissor_xy"/> </array> - <reg32 offset="0x80f0" name="GRAS_SC_WINDOW_SCISSOR_TL" type="a6xx_reg_xy" usage="rp_blit"/> - <reg32 offset="0x80f1" name="GRAS_SC_WINDOW_SCISSOR_BR" type="a6xx_reg_xy" usage="rp_blit"/> + <reg32 offset="0x80f0" name="GRAS_SC_WINDOW_SCISSOR_TL" type="a6xx_reg_xy" variants="A6XX-A7XX" usage="rp_blit"/> + <reg32 offset="0x80f1" name="GRAS_SC_WINDOW_SCISSOR_BR" type="a6xx_reg_xy" variants="A6XX-A7XX" usage="rp_blit"/> <enum name="a6xx_fsr_combiner"> <value value="0" name="FSR_COMBINER_OP_KEEP"/> @@ -1099,7 +1192,7 @@ by a particular renderpass/blit. <value value="4" name="FSR_COMBINER_OP_MUL"/> </enum> - <reg32 offset="0x80f4" name="GRAS_VRS_CONFIG" variants="A7XX-" usage="rp_blit"> + <bitset name="a6xx_gras_vrs_config"> <bitfield name="PIPELINE_FSR_ENABLE" pos="0" type="boolean"/> <bitfield name="FRAG_SIZE_X" low="1" high="2" type="uint"/> <bitfield name="FRAG_SIZE_Y" low="3" high="4" type="uint"/> @@ -1107,20 +1200,32 @@ by a particular renderpass/blit. <bitfield name="COMBINER_OP_2" low="8" high="10" type="a6xx_fsr_combiner"/> <bitfield name="ATTACHMENT_FSR_ENABLE" pos="13" type="boolean"/> <bitfield name="PRIMITIVE_FSR_ENABLE" pos="20" type="boolean"/> - </reg32> - <reg32 offset="0x80f5" name="GRAS_QUALITY_BUFFER_INFO" variants="A7XX-" usage="rp_blit"> + </bitset> + + <reg32 offset="0x80f4" name="GRAS_VRS_CONFIG" type="a6xx_gras_vrs_config" variants="A7XX" usage="rp_blit"/> + + <bitset name="a6xx_gras_quality_buffer_info" inline="yes"> <bitfield name="LAYERED" pos="0" type="boolean"/> <bitfield name="TILE_MODE" low="1" high="2" type="a6xx_tile_mode"/> - </reg32> - <reg32 offset="0x80f6" name="GRAS_QUALITY_BUFFER_DIMENSION" variants="A7XX-" usage="rp_blit"> + </bitset> + + <reg32 offset="0x80f5" name="GRAS_QUALITY_BUFFER_INFO" type="a6xx_gras_quality_buffer_info" variants="A7XX" usage="rp_blit"/> + + <bitset name="a6xx_gras_quality_buffer_dimension" inline="yes"> <bitfield name="WIDTH" low="0" high="15" type="uint"/> <bitfield name="HEIGHT" low="16" high="31" type="uint"/> - </reg32> - <reg64 offset="0x80f8" name="GRAS_QUALITY_BUFFER_BASE" variants="A7XX-" type="waddress" usage="rp_blit"/> - <reg32 offset="0x80fa" name="GRAS_QUALITY_BUFFER_PITCH" variants="A7XX-" usage="rp_blit"> + </bitset> + + <reg32 offset="0x80f6" name="GRAS_QUALITY_BUFFER_DIMENSION" type="a6xx_gras_quality_buffer_dimension" variants="A7XX" usage="rp_blit"/> + + <reg64 offset="0x80f8" name="GRAS_QUALITY_BUFFER_BASE" variants="A7XX" type="waddress" usage="rp_blit"/> + + <bitset name="a6xx_gras_quality_buffer_pitch" inline="yes"> <bitfield name="PITCH" shr="6" low="0" high="7" type="uint"/> <bitfield name="ARRAY_PITCH" shr="6" low="10" high="28" type="uint"/> - </reg32> + </bitset> + + <reg32 offset="0x80fa" name="GRAS_QUALITY_BUFFER_PITCH" type="a6xx_gras_quality_buffer_pitch" variants="A7XX" usage="rp_blit"/> <enum name="a6xx_lrz_dir_status"> <value value="0x1" name="LRZ_DIR_LE"/> @@ -1128,7 +1233,7 @@ by a particular renderpass/blit. <value value="0x3" name="LRZ_DIR_INVALID"/> </enum> - <reg32 offset="0x8100" name="GRAS_LRZ_CNTL" usage="rp_blit"> + <bitset name="a6xx_gras_lrz_cntl" inline="yes"> <bitfield name="ENABLE" pos="0" type="boolean"/> <doc>LRZ write also disabled for blend/etc.</doc> <bitfield name="LRZ_WRITE" pos="1" type="boolean"/> @@ -1155,26 +1260,36 @@ by a particular renderpass/blit. </doc> <bitfield name="DISABLE_ON_WRONG_DIR" pos="9" type="boolean" variants="A6XX"/> <bitfield name="Z_FUNC" low="11" high="13" type="adreno_compare_func" variants="A7XX-"/> - </reg32> + </bitset> + + <reg32 offset="0x8100" name="GRAS_LRZ_CNTL" type="a6xx_gras_lrz_cntl" usage="rp_blit" variants="A6XX-A7XX"/> <enum name="a6xx_fragcoord_sample_mode"> <value value="0" name="FRAGCOORD_CENTER"/> <value value="3" name="FRAGCOORD_SAMPLE"/> </enum> - <reg32 offset="0x8101" name="GRAS_LRZ_PS_INPUT_CNTL" low="0" high="2" usage="rp_blit"> + <bitset name="a6xx_gras_lrz_ps_input_cntl" inline="yes"> <bitfield name="SAMPLEID" pos="0" type="boolean"/> <bitfield name="FRAGCOORDSAMPLEMODE" low="1" high="2" type="a6xx_fragcoord_sample_mode"/> - </reg32> + </bitset> + + <reg32 offset="0x8101" name="GRAS_LRZ_PS_INPUT_CNTL" type="a6xx_gras_lrz_ps_input_cntl" usage="rp_blit" variants="A6XX-A7XX"/> - <reg32 offset="0x8102" name="GRAS_LRZ_MRT_BUFFER_INFO_0" usage="rp_blit"> + <bitset name="a6xx_gras_lrz_mrt_buffer_info_0" inline="yes"> <bitfield name="COLOR_FORMAT" low="0" high="7" type="a6xx_format"/> - </reg32> - <reg64 offset="0x8103" name="GRAS_LRZ_BUFFER_BASE" align="256" type="waddress" usage="rp_blit"/> - <reg32 offset="0x8105" name="GRAS_LRZ_BUFFER_PITCH" usage="rp_blit"> + </bitset> + + <reg32 offset="0x8102" name="GRAS_LRZ_MRT_BUFFER_INFO_0" type="a6xx_gras_lrz_mrt_buffer_info_0" usage="rp_blit" variants="A6XX-A7XX"/> + + <reg64 offset="0x8103" name="GRAS_LRZ_BUFFER_BASE" align="256" type="waddress" usage="rp_blit" variants="A6XX-A7XX"/> + + <bitset name="a6xx_gras_lrz_buffer_pitch" inline="yes"> <bitfield name="PITCH" low="0" high="7" shr="5" type="uint"/> <bitfield name="ARRAY_PITCH" low="10" high="28" shr="8" type="uint"/> - </reg32> + </bitset> + + <reg32 offset="0x8105" name="GRAS_LRZ_BUFFER_PITCH" type="a6xx_gras_lrz_buffer_pitch" usage="rp_blit" variants="A6XX-A7XX"/> <!-- The LRZ "fast clear" buffer is initialized to zero's by blob, and @@ -1207,7 +1322,6 @@ by a particular renderpass/blit. not. --> <reg64 offset="0x8106" name="GRAS_LRZ_FAST_CLEAR_BUFFER_BASE" align="64" type="waddress" usage="rp_blit"/> - <!-- 0x8108 invalid --> <reg32 offset="0x8109" name="GRAS_LRZ_PS_SAMPLEFREQ_CNTL" usage="rp_blit"> <bitfield name="PER_SAMP_MODE" pos="0" type="boolean"/> </reg32> @@ -1232,19 +1346,20 @@ by a particular renderpass/blit. <!-- 0x810c-0x810f invalid --> - <reg32 offset="0x8110" name="GRAS_UNKNOWN_8110" low="0" high="1" usage="cmd"/> + <reg32 offset="0x8110" name="GRAS_MODE_CNTL" low="0" high="1" variants="A6XX-A7XX" usage="cmd"/> <!-- A bit tentative but it's a color and it is followed by LRZ_CLEAR --> - <reg32 offset="0x8111" name="GRAS_LRZ_DEPTH_CLEAR" type="float" variants="A7XX-"/> + <reg32 offset="0x8111" name="GRAS_LRZ_DEPTH_CLEAR" type="float" variants="A7XX"/> - <reg32 offset="0x8113" name="GRAS_LRZ_DEPTH_BUFFER_INFO" variants="A7XX-" usage="rp_blit"> + <bitset name="a6xx_gras_lrz_depth_buffer_info" inline="yes"> <bitfield name="DEPTH_FORMAT" low="0" high="2" type="a6xx_depth_format"/> <bitfield name="UNK3" pos="3"/> - </reg32> + </bitset> + + <reg32 offset="0x8113" name="GRAS_LRZ_DEPTH_BUFFER_INFO" type="a6xx_gras_lrz_depth_buffer_info" variants="A7XX" usage="rp_blit"/> - <!-- Always written together and always equal 09510840 00000a62 --> - <reg32 offset="0x8120" name="GRAS_UNKNOWN_8120" variants="A7XX-" usage="cmd"/> - <reg32 offset="0x8121" name="GRAS_UNKNOWN_8121" variants="A7XX-" usage="cmd"/> + <doc>LUT used to convert quality buffer values to HW shading rate values. An array of 4-bit values.</doc> + <array offset="0x8120" name="GRAS_LRZ_QUALITY_LOOKUP_TABLE" variants="A7XX-" stride="1" length="2"/> <!-- 0x8112-0x83ff invalid --> @@ -1269,28 +1384,29 @@ by a particular renderpass/blit. <bitfield name="D24S8" pos="19" type="boolean"/> <!-- some sort of channel mask, disabled channels are set to zero ? --> <bitfield name="MASK" low="20" high="23"/> - <bitfield name="IFMT" low="24" high="28" type="a6xx_2d_ifmt"/> + <bitfield name="IFMT" low="24" high="26" type="a6xx_2d_ifmt"/> + <bitfield name="UNK27" pos="27" type="boolean"/> + <bitfield name="UNK28" pos="28" type="boolean"/> <bitfield name="RASTER_MODE" pos="29" type="a6xx_raster_mode"/> - <bitfield name="UNK30" pos="30" type="boolean" variants="A7XX-"/> + <bitfield name="COPY" pos="30" type="boolean" variants="A7XX-"/> </bitset> - <reg32 offset="0x8400" name="GRAS_A2D_BLT_CNTL" type="a6xx_a2d_bit_cntl" usage="rp_blit"/> + <reg32 offset="0x8400" name="GRAS_A2D_BLT_CNTL" type="a6xx_a2d_bit_cntl" variants="A6XX-A7XX" usage="rp_blit"/> <!-- note: the low 8 bits for src coords are valid, probably fixed point it would be a bit weird though, since we subtract 1 from BR coords apparently signed, gallium driver uses negative coords and it works? --> - <reg32 offset="0x8401" name="GRAS_A2D_SRC_XMIN" low="8" high="24" type="int" usage="rp_blit"/> - <reg32 offset="0x8402" name="GRAS_A2D_SRC_XMAX" low="8" high="24" type="int" usage="rp_blit"/> - <reg32 offset="0x8403" name="GRAS_A2D_SRC_YMIN" low="8" high="24" type="int" usage="rp_blit"/> - <reg32 offset="0x8404" name="GRAS_A2D_SRC_YMAX" low="8" high="24" type="int" usage="rp_blit"/> - <reg32 offset="0x8405" name="GRAS_A2D_DEST_TL" type="a6xx_reg_xy" usage="rp_blit"/> - <reg32 offset="0x8406" name="GRAS_A2D_DEST_BR" type="a6xx_reg_xy" usage="rp_blit"/> + <reg32 offset="0x8401" name="GRAS_A2D_SRC_XMIN" low="8" high="24" type="int" variants="A6XX-A7XX" usage="rp_blit"/> + <reg32 offset="0x8402" name="GRAS_A2D_SRC_XMAX" low="8" high="24" type="int" variants="A6XX-A7XX" usage="rp_blit"/> + <reg32 offset="0x8403" name="GRAS_A2D_SRC_YMIN" low="8" high="24" type="int" variants="A6XX-A7XX" usage="rp_blit"/> + <reg32 offset="0x8404" name="GRAS_A2D_SRC_YMAX" low="8" high="24" type="int" variants="A6XX-A7XX" usage="rp_blit"/> + <reg32 offset="0x8405" name="GRAS_A2D_DEST_TL" type="a6xx_reg_xy" variants="A6XX-A7XX" usage="rp_blit"/> + <reg32 offset="0x8406" name="GRAS_A2D_DEST_BR" type="a6xx_reg_xy" variants="A6XX-A7XX" usage="rp_blit"/> <reg32 offset="0x8407" name="GRAS_2D_UNKNOWN_8407" low="0" high="31"/> <reg32 offset="0x8408" name="GRAS_2D_UNKNOWN_8408" low="0" high="31"/> <reg32 offset="0x8409" name="GRAS_2D_UNKNOWN_8409" low="0" high="31"/> - <reg32 offset="0x840a" name="GRAS_A2D_SCISSOR_TL" type="a6xx_reg_xy" usage="rp_blit"/> - <reg32 offset="0x840b" name="GRAS_A2D_SCISSOR_BR" type="a6xx_reg_xy" usage="rp_blit"/> - <!-- 0x840c-0x85ff invalid --> + <reg32 offset="0x840a" name="GRAS_A2D_SCISSOR_TL" type="a6xx_reg_xy" variants="A6XX-A7XX" usage="rp_blit"/> + <reg32 offset="0x840b" name="GRAS_A2D_SCISSOR_BR" type="a6xx_reg_xy" variants="A6XX-A7XX" usage="rp_blit"/> <!-- always 0x880 ? (and 0 in a640/a650 traces?) --> <reg32 offset="0x8600" name="GRAS_DBG_ECO_CNTL" usage="cmd"> @@ -1308,22 +1424,7 @@ by a particular renderpass/blit. --> <!-- same as GRAS_BIN_CONTROL, but without bit 27: --> - <reg32 offset="0x8800" name="RB_CNTL" variants="A6XX" usage="rp_blit"> - <bitfield name="BINW" low="0" high="5" shr="5" type="uint"/> - <bitfield name="BINH" low="8" high="14" shr="4" type="uint"/> - <bitfield name="RENDER_MODE" low="18" high="20" type="a6xx_render_mode"/> - <bitfield name="FORCE_LRZ_WRITE_DIS" pos="21" type="boolean"/> - <bitfield name="BUFFERS_LOCATION" low="22" high="23" type="a6xx_buffers_location"/> - <bitfield name="LRZ_FEEDBACK_ZMODE_MASK" low="24" high="26" type="a6xx_lrz_feedback_mask"/> - </reg32> - - <reg32 offset="0x8800" name="RB_CNTL" variants="A7XX-" usage="rp_blit"> - <bitfield name="BINW" low="0" high="5" shr="5" type="uint"/> - <bitfield name="BINH" low="8" high="14" shr="4" type="uint"/> - <bitfield name="RENDER_MODE" low="18" high="20" type="a6xx_render_mode"/> - <bitfield name="FORCE_LRZ_WRITE_DIS" pos="21" type="boolean"/> - <bitfield name="LRZ_FEEDBACK_ZMODE_MASK" low="24" high="26" type="a6xx_lrz_feedback_mask"/> - </reg32> + <reg32 offset="0x8800" name="RB_CNTL" variants="A6XX-A7XX" type="a6xx_bin_cntl" usage="rp_blit"/> <reg32 offset="0x8801" name="RB_RENDER_CNTL" variants="A6XX" usage="rp_blit"> <bitfield name="CCUSINGLECACHELINESIZE" low="3" high="5"/> @@ -1347,9 +1448,6 @@ by a particular renderpass/blit. <bitfield name="CONSERVATIVERASEN" pos="11" type="boolean"/> <bitfield name="INNERCONSERVATIVERASEN" pos="12" type="boolean"/> </reg32> - <reg32 offset="0x8116" name="GRAS_SU_RENDER_CNTL" variants="A7XX-" usage="rp_blit"> - <bitfield name="FS_DISABLE" pos="7" type="boolean"/> - </reg32> <reg32 offset="0x8802" name="RB_RAS_MSAA_CNTL" usage="rp_blit"> <bitfield name="SAMPLES" low="0" high="1" type="a3xx_msaa_samples"/> @@ -1516,9 +1614,7 @@ by a particular renderpass/blit. <bitfield name="SAMPLE_MASK" low="16" high="31"/> </reg32> <!-- 0x8866-0x886f invalid --> - <reg32 offset="0x8870" name="RB_DEPTH_PLANE_CNTL" usage="rp_blit"> - <bitfield name="Z_MODE" low="0" high="1" type="a6xx_ztest_mode"/> - </reg32> + <reg32 offset="0x8870" name="RB_DEPTH_PLANE_CNTL" type="a6xx_depth_plane_cntl" usage="rp_blit"/> <reg32 offset="0x8871" name="RB_DEPTH_CNTL" usage="rp_blit"> <bitfield name="Z_TEST_ENABLE" pos="0" type="boolean"/> @@ -1532,14 +1628,9 @@ by a particular renderpass/blit. <bitfield name="Z_READ_ENABLE" pos="6" type="boolean"/> <bitfield name="Z_BOUNDS_ENABLE" pos="7" type="boolean"/> </reg32> - <reg32 offset="0x8114" name="GRAS_SU_DEPTH_CNTL" usage="rp_blit"> - <bitfield name="Z_TEST_ENABLE" pos="0" type="boolean"/> - </reg32> + <!-- duplicates GRAS_SU_DEPTH_BUFFER_INFO: --> - <reg32 offset="0x8872" name="RB_DEPTH_BUFFER_INFO" variants="A6XX" usage="rp_blit"> - <bitfield name="DEPTH_FORMAT" low="0" high="2" type="a6xx_depth_format"/> - <bitfield name="UNK3" low="3" high="4"/> - </reg32> + <reg32 offset="0x8872" name="RB_DEPTH_BUFFER_INFO" variants="A6XX" type="a6xx_depth_buffer_info" usage="rp_blit"/> <!-- first 4 bits duplicates GRAS_SU_DEPTH_BUFFER_INFO --> <reg32 offset="0x8872" name="RB_DEPTH_BUFFER_INFO" variants="A7XX-" usage="rp_blit"> <bitfield name="DEPTH_FORMAT" low="0" high="2" type="a6xx_depth_format"/> @@ -1575,9 +1666,7 @@ by a particular renderpass/blit. <bitfield name="ZPASS_BF" low="26" high="28" type="adreno_stencil_op"/> <bitfield name="ZFAIL_BF" low="29" high="31" type="adreno_stencil_op"/> </reg32> - <reg32 offset="0x8115" name="GRAS_SU_STENCIL_CNTL" usage="rp_blit"> - <bitfield name="STENCIL_ENABLE" pos="0" type="boolean"/> - </reg32> + <reg32 offset="0x8881" name="RB_STENCIL_BUFFER_INFO" variants="A6XX" usage="rp_blit"> <bitfield name="SEPARATE_STENCIL" pos="0" type="boolean"/> <bitfield name="UNK1" pos="1" type="boolean"/> @@ -1616,8 +1705,9 @@ by a particular renderpass/blit. <reg32 offset="0x8899" name="RB_UNKNOWN_8899" variants="A7XX-" usage="cmd"/> <!-- 0x8899-0x88bf invalid --> <!-- clamps depth value for depth test/write --> - <reg32 offset="0x88c0" name="RB_VIEWPORT_ZCLAMP_MIN" type="float" usage="rp_blit"/> - <reg32 offset="0x88c1" name="RB_VIEWPORT_ZCLAMP_MAX" type="float" usage="rp_blit"/> + <reg32 offset="0x88c0" name="RB_VIEWPORT_ZCLAMP_MIN" type="float" usage="rp_blit" variants="A6XX-A7XX"/> + <reg32 offset="0x88c1" name="RB_VIEWPORT_ZCLAMP_MAX" type="float" usage="rp_blit" variants="A6XX-A7XX"/> + <!-- 0x88c2-0x88cf invalid--> <reg32 offset="0x88d0" name="RB_RESOLVE_CNTL_0" usage="rp_blit"> <bitfield name="UNK0" low="0" high="12"/> @@ -1626,7 +1716,7 @@ by a particular renderpass/blit. <reg32 offset="0x88d1" name="RB_RESOLVE_CNTL_1" type="a6xx_reg_xy" usage="rp_blit"/> <reg32 offset="0x88d2" name="RB_RESOLVE_CNTL_2" type="a6xx_reg_xy" usage="rp_blit"/> <!-- weird to duplicate other regs from same block?? --> - <reg32 offset="0x88d3" name="RB_RESOLVE_CNTL_3" usage="rp_blit"> + <reg32 offset="0x88d3" name="RB_RESOLVE_CNTL_3" variants="A6XX-A7XX" usage="rp_blit"> <bitfield name="BINW" low="0" high="5" shr="5" type="uint"/> <bitfield name="BINH" low="8" high="14" shr="4" type="uint"/> </reg32> @@ -1650,10 +1740,13 @@ by a particular renderpass/blit. <!-- array-pitch is size of layer --> <reg32 offset="0x88db" name="RB_RESOLVE_SYSTEM_BUFFER_ARRAY_PITCH" low="0" high="28" shr="6" type="uint" usage="rp_blit"/> <reg64 offset="0x88dc" name="RB_RESOLVE_SYSTEM_FLAG_BUFFER_BASE" type="waddress" align="64" usage="rp_blit"/> - <reg32 offset="0x88de" name="RB_RESOLVE_SYSTEM_FLAG_BUFFER_PITCH" usage="rp_blit"> + + <bitset name="a6xx_flag_buffer_pitch" inline="yes"> <bitfield name="PITCH" low="0" high="10" shr="6" type="uint"/> - <bitfield name="ARRAY_PITCH" low="11" high="27" shr="7" type="uint"/> - </reg32> + <bitfield name="ARRAY_PITCH" low="11" high="28" shr="7" type="uint"/> + </bitset> + + <reg32 offset="0x88de" name="RB_RESOLVE_SYSTEM_FLAG_BUFFER_PITCH" type="a6xx_flag_buffer_pitch" usage="rp_blit"/> <reg32 offset="0x88df" name="RB_RESOLVE_CLEAR_COLOR_DW0" usage="rp_blit"/> <reg32 offset="0x88e0" name="RB_RESOLVE_CLEAR_COLOR_DW1" usage="rp_blit"/> @@ -1726,10 +1819,7 @@ by a particular renderpass/blit. <reg32 offset="0x88f0" name="RB_UNKNOWN_88F0" low="0" high="11" usage="cmd"/> <!-- could be for separate stencil? (or may not be a flag buffer at all) --> <reg64 offset="0x88f1" name="RB_UNK_FLAG_BUFFER_BASE" type="waddress" align="64"/> - <reg32 offset="0x88f3" name="RB_UNK_FLAG_BUFFER_PITCH"> - <bitfield name="PITCH" low="0" high="10" shr="6" type="uint"/> - <bitfield name="ARRAY_PITCH" low="11" high="23" shr="7" type="uint"/> - </reg32> + <reg32 offset="0x88f3" name="RB_UNK_FLAG_BUFFER_PITCH" type="a6xx_flag_buffer_pitch"/> <reg32 offset="0x88f4" name="RB_VRS_CONFIG" usage="rp_blit"> <bitfield name="UNK2" pos="2" type="boolean"/> @@ -1737,8 +1827,9 @@ by a particular renderpass/blit. <bitfield name="ATTACHMENT_FSR_ENABLE" pos="5" type="boolean"/> <bitfield name="PRIMITIVE_FSR_ENABLE" pos="18" type="boolean"/> </reg32> - <!-- Connected to VK_EXT_fragment_density_map? --> - <reg32 offset="0x88f5" name="RB_UNKNOWN_88F5" variants="A7XX-"/> + <reg32 offset="0x88f5" name="RB_BIN_FOVEAT" variants="A7XX-" usage="cmd"> + <bitfield name="BINSCALEEN" pos="6" type="boolean"/> + </reg32> <!-- 0x88f6-0x88ff invalid --> <reg64 offset="0x8900" name="RB_DEPTH_FLAG_BUFFER_BASE" type="waddress" align="64" usage="rp_blit"/> <reg32 offset="0x8902" name="RB_DEPTH_FLAG_BUFFER_PITCH" usage="rp_blit"> @@ -1747,12 +1838,10 @@ by a particular renderpass/blit. <bitfield name="UNK8" low="8" high="10"/> <bitfield name="ARRAY_PITCH" low="11" high="27" shr="7" type="uint"/> </reg32> + <array offset="0x8903" name="RB_COLOR_FLAG_BUFFER" stride="3" length="8" usage="rp_blit"> <reg64 offset="0" name="ADDR" type="waddress" align="64"/> - <reg32 offset="2" name="PITCH"> - <bitfield name="PITCH" low="0" high="10" shr="6" type="uint"/> - <bitfield name="ARRAY_PITCH" low="11" high="28" shr="7" type="uint"/> - </reg32> + <reg32 offset="2" name="PITCH" type="a6xx_flag_buffer_pitch"/> </array> <!-- 0x891b-0x8926 invalid --> <doc> @@ -1815,7 +1904,7 @@ by a particular renderpass/blit. <reg64 offset="0x8c1e" name="RB_A2D_DEST_BUFFER_BASE_2" type="waddress" align="64" usage="rp_blit"/> <reg64 offset="0x8c20" name="RB_A2D_DEST_FLAG_BUFFER_BASE" type="waddress" align="64" usage="rp_blit"/> - <reg32 offset="0x8c22" name="RB_A2D_DEST_FLAG_BUFFER_PITCH" low="0" high="7" shr="6" type="uint" usage="rp_blit"/> + <reg32 offset="0x8c22" name="RB_A2D_DEST_FLAG_BUFFER_PITCH" type="a6xx_flag_buffer_pitch" usage="rp_blit"/> <!-- this is a guess but seems likely (for NV12 with UBWC): --> <reg64 offset="0x8c23" name="RB_A2D_DEST_FLAG_BUFFER_BASE_1" type="waddress" align="64" usage="rp_blit"/> <reg32 offset="0x8c25" name="RB_A2D_DEST_FLAG_BUFFER_PITCH_1" low="0" high="7" shr="6" type="uint" usage="rp_blit"/> @@ -1921,13 +2010,13 @@ by a particular renderpass/blit. <bitfield name="CLIP_DIST_03_LOC" low="8" high="15" type="uint"/> <bitfield name="CLIP_DIST_47_LOC" low="16" high="23" type="uint"/> </bitset> - <reg32 offset="0x9101" name="VPC_VS_CLIP_CULL_CNTL" type="a6xx_vpc_xs_clip_cntl" usage="rp_blit"/> - <reg32 offset="0x9102" name="VPC_GS_CLIP_CULL_CNTL" type="a6xx_vpc_xs_clip_cntl" usage="rp_blit"/> - <reg32 offset="0x9103" name="VPC_DS_CLIP_CULL_CNTL" type="a6xx_vpc_xs_clip_cntl" usage="rp_blit"/> + <reg32 offset="0x9101" name="VPC_VS_CLIP_CULL_CNTL" type="a6xx_vpc_xs_clip_cntl" variants="A6XX-A7XX" usage="rp_blit"/> + <reg32 offset="0x9102" name="VPC_GS_CLIP_CULL_CNTL" type="a6xx_vpc_xs_clip_cntl" variants="A6XX-A7XX" usage="rp_blit"/> + <reg32 offset="0x9103" name="VPC_DS_CLIP_CULL_CNTL" type="a6xx_vpc_xs_clip_cntl" variants="A6XX-A7XX" usage="rp_blit"/> - <reg32 offset="0x9311" name="VPC_VS_CLIP_CULL_CNTL_V2" type="a6xx_vpc_xs_clip_cntl" usage="rp_blit"/> - <reg32 offset="0x9312" name="VPC_GS_CLIP_CULL_CNTL_V2" type="a6xx_vpc_xs_clip_cntl" usage="rp_blit"/> - <reg32 offset="0x9313" name="VPC_DS_CLIP_CULL_CNTL_V2" type="a6xx_vpc_xs_clip_cntl" usage="rp_blit"/> + <reg32 offset="0x9311" name="VPC_VS_CLIP_CULL_CNTL_V2" type="a6xx_vpc_xs_clip_cntl" variants="A6XX-A7XX" usage="rp_blit"/> + <reg32 offset="0x9312" name="VPC_GS_CLIP_CULL_CNTL_V2" type="a6xx_vpc_xs_clip_cntl" variants="A6XX-A7XX" usage="rp_blit"/> + <reg32 offset="0x9313" name="VPC_DS_CLIP_CULL_CNTL_V2" type="a6xx_vpc_xs_clip_cntl" variants="A6XX-A7XX" usage="rp_blit"/> <bitset name="a6xx_vpc_xs_siv_cntl" inline="yes"> <bitfield name="LAYERLOC" low="0" high="7" type="uint"/> @@ -1935,23 +2024,33 @@ by a particular renderpass/blit. <bitfield name="SHADINGRATELOC" low="16" high="23" type="uint" variants="A7XX-"/> </bitset> - <reg32 offset="0x9104" name="VPC_VS_SIV_CNTL" type="a6xx_vpc_xs_siv_cntl" usage="rp_blit"/> - <reg32 offset="0x9105" name="VPC_GS_SIV_CNTL" type="a6xx_vpc_xs_siv_cntl" usage="rp_blit"/> - <reg32 offset="0x9106" name="VPC_DS_SIV_CNTL" type="a6xx_vpc_xs_siv_cntl" usage="rp_blit"/> + <reg32 offset="0x9104" name="VPC_VS_SIV_CNTL" type="a6xx_vpc_xs_siv_cntl" variants="A6XX-A7XX" usage="rp_blit"/> + <reg32 offset="0x9105" name="VPC_GS_SIV_CNTL" type="a6xx_vpc_xs_siv_cntl" variants="A6XX-A7XX" usage="rp_blit"/> + <reg32 offset="0x9106" name="VPC_DS_SIV_CNTL" type="a6xx_vpc_xs_siv_cntl" variants="A6XX-A7XX" usage="rp_blit"/> + - <reg32 offset="0x9314" name="VPC_VS_SIV_CNTL_V2" type="a6xx_vpc_xs_siv_cntl" usage="rp_blit"/> - <reg32 offset="0x9315" name="VPC_GS_SIV_CNTL_V2" type="a6xx_vpc_xs_siv_cntl" usage="rp_blit"/> - <reg32 offset="0x9316" name="VPC_DS_SIV_CNTL_V2" type="a6xx_vpc_xs_siv_cntl" usage="rp_blit"/> + <reg32 offset="0x9314" name="VPC_VS_SIV_CNTL_V2" type="a6xx_vpc_xs_siv_cntl" variants="A6XX-A7XX" usage="rp_blit"/> + <reg32 offset="0x9315" name="VPC_GS_SIV_CNTL_V2" type="a6xx_vpc_xs_siv_cntl" variants="A6XX-A7XX" usage="rp_blit"/> + <reg32 offset="0x9316" name="VPC_DS_SIV_CNTL_V2" type="a6xx_vpc_xs_siv_cntl" variants="A6XX-A7XX" usage="rp_blit"/> + + <bitset name="a6xx_vpc_rast_stream_cntl" inline="yes"> + <!-- which stream to send to GRAS --> + <bitfield name="STREAM" low="0" high="1" type="uint"/> + <!-- discard primitives before rasterization --> + <bitfield name="DISCARD" pos="2" type="boolean"/> + </bitset> + + <reg32 offset="0x9980" name="VPC_RAST_STREAM_CNTL" type="a6xx_vpc_rast_stream_cntl" variants="A6XX" usage="rp_blit"/> + <reg32 offset="0x9107" name="VPC_RAST_STREAM_CNTL" type="a6xx_vpc_rast_stream_cntl" variants="A7XX" usage="rp_blit"/> + <reg32 offset="0x9317" name="VPC_RAST_STREAM_CNTL_V2" type="a6xx_vpc_rast_stream_cntl" variants="A7XX" usage="rp_blit"/> <reg32 offset="0x9107" name="VPC_UNKNOWN_9107" variants="A6XX" usage="rp_blit"> <!-- this mirrors VPC_RAST_STREAM_CNTL::DISCARD, although it seems it's unused --> <bitfield name="RASTER_DISCARD" pos="0" type="boolean"/> <bitfield name="UNK2" pos="2" type="boolean"/> </reg32> - <reg32 offset="0x9108" name="VPC_RAST_CNTL" usage="rp_blit"> - <bitfield name="MODE" low="0" high="1" type="a6xx_polygon_mode"/> - </reg32> + <reg32 offset="0x9108" name="VPC_RAST_CNTL" type="a6xx_rast_cntl" variants="A6XX-A7XX" usage="rp_blit"/> <bitset name="a6xx_pc_cntl" inline="yes"> <bitfield name="PRIMITIVE_RESTART" pos="0" type="boolean"/> <bitfield name="PROVOKING_VTX_LAST" pos="1" type="boolean"/> @@ -1991,10 +2090,10 @@ by a particular renderpass/blit. <bitfield name="VIEWS" low="2" high="6" type="uint"/> </bitset> - <reg32 offset="0x9109" name="VPC_PC_CNTL" type="a6xx_pc_cntl" variants="A7XX-" usage="rp_blit"/> - <reg32 offset="0x910a" name="VPC_GS_PARAM_0" type="a6xx_gs_param_0" variants="A7XX-" usage="rp_blit"/> - <reg32 offset="0x910b" name="VPC_STEREO_RENDERING_VIEWMASK" type="hex" low="0" high="15" variants="A7XX-" usage="rp_blit"/> - <reg32 offset="0x910c" name="VPC_STEREO_RENDERING_CNTL" type="a6xx_stereo_rendering_cntl" variants="A7XX-" usage="rp_blit"/> + <reg32 offset="0x9109" name="VPC_PC_CNTL" type="a6xx_pc_cntl" variants="A7XX" usage="rp_blit"/> + <reg32 offset="0x910a" name="VPC_GS_PARAM_0" type="a6xx_gs_param_0" variants="A7XX" usage="rp_blit"/> + <reg32 offset="0x910b" name="VPC_STEREO_RENDERING_VIEWMASK" type="hex" low="0" high="15" variants="A7XX" usage="rp_blit"/> + <reg32 offset="0x910c" name="VPC_STEREO_RENDERING_CNTL" type="a6xx_stereo_rendering_cntl" variants="A7XX" usage="rp_blit"/> <enum name="a6xx_varying_interp_mode"> <value value="0" name="INTERP_SMOOTH"/> @@ -2011,11 +2110,11 @@ by a particular renderpass/blit. </enum> <!-- 0x9109-0x91ff invalid --> - <array offset="0x9200" name="VPC_VARYING_INTERP_MODE" stride="1" length="8" usage="rp_blit"> + <array offset="0x9200" name="VPC_VARYING_INTERP_MODE" stride="1" length="8" variants="A6XX-A7XX" usage="rp_blit"> <doc>Packed array of a6xx_varying_interp_mode</doc> <reg32 offset="0x0" name="MODE"/> </array> - <array offset="0x9208" name="VPC_VARYING_REPLACE_MODE_0" stride="1" length="8" usage="rp_blit"> + <array offset="0x9208" name="VPC_VARYING_REPLACE_MODE" stride="1" length="8" variants="A6XX-A7XX" usage="rp_blit"> <doc>Packed array of a6xx_varying_ps_repl_mode</doc> <reg32 offset="0x0" name="MODE"/> </array> @@ -2024,12 +2123,12 @@ by a particular renderpass/blit. <reg32 offset="0x9210" name="VPC_UNKNOWN_9210" low="0" high="31" variants="A6XX" usage="cmd"/> <reg32 offset="0x9211" name="VPC_UNKNOWN_9211" low="0" high="31" variants="A6XX" usage="cmd"/> - <array offset="0x9212" name="VPC_VARYING_LM_TRANSFER_CNTL_0" stride="1" length="4" usage="rp_blit"> + <array offset="0x9212" name="VPC_VARYING_LM_TRANSFER_CNTL" stride="1" length="4" variants="A6XX-A7XX" usage="rp_blit"> <!-- one bit per varying component: --> <reg32 offset="0" name="DISABLE"/> </array> - <reg32 offset="0x9216" name="VPC_SO_MAPPING_WPTR" usage="rp_blit"> + <bitset name="a6xx_vpc_so_mapping_wptr" inline="yes"> <!-- Choose which DWORD to write to. There is an array of (4 * 64) DWORD's, dumped in the devcoredump at @@ -2056,20 +2155,25 @@ by a particular renderpass/blit. <bitfield name="ADDR" low="0" high="7" type="hex"/> <!-- clear all A_EN and B_EN bits for all DWORD's --> <bitfield name="RESET" pos="16" type="boolean"/> - </reg32> - <!-- special register, write multiple times to load SO program (not readable) --> - <reg32 offset="0x9217" name="VPC_SO_MAPPING_PORT" usage="rp_blit"> + </bitset> + + <reg32 offset="0x9216" name="VPC_SO_MAPPING_WPTR" type="a6xx_vpc_so_mapping_wptr" variants="A6XX-A7XX" usage="rp_blit"/> + + <bitset name="a6xx_vpc_so_mapping_port" inline="yes"> <bitfield name="A_BUF" low="0" high="1" type="uint"/> <bitfield name="A_OFF" low="2" high="10" shr="2" type="uint"/> <bitfield name="A_EN" pos="11" type="boolean"/> <bitfield name="B_BUF" low="12" high="13" type="uint"/> <bitfield name="B_OFF" low="14" high="22" shr="2" type="uint"/> <bitfield name="B_EN" pos="23" type="boolean"/> - </reg32> + </bitset> + + <!-- special register, write multiple times to load SO program (not readable) --> + <reg32 offset="0x9217" name="VPC_SO_MAPPING_PORT" type="a6xx_vpc_so_mapping_port" variants="A6XX-A7XX" usage="rp_blit"/> - <reg64 offset="0x9218" name="VPC_SO_QUERY_BASE" type="waddress" align="32" usage="cmd"/> + <reg64 offset="0x9218" name="VPC_SO_QUERY_BASE" type="waddress" align="32" variants="A6XX-A7XX" usage="cmd"/> - <array offset="0x921a" name="VPC_SO" stride="7" length="4" usage="cmd"> + <array offset="0x921a" name="VPC_SO" stride="7" length="4" variants="A6XX-A7XX" usage="cmd"> <reg64 offset="0" name="BUFFER_BASE" type="waddress" align="32"/> <reg32 offset="2" name="BUFFER_SIZE" low="2" high="31" shr="2"/> <reg32 offset="3" name="BUFFER_STRIDE" low="0" high="9" shr="2"/> @@ -2077,12 +2181,13 @@ by a particular renderpass/blit. <reg64 offset="5" name="FLUSH_BASE" type="waddress" align="32"/> </array> - <reg32 offset="0x9236" name="VPC_REPLACE_MODE_CNTL" usage="cmd"> + <bitset name="a6xx_vpc_replace_mode_cntl" inline="yes"> <bitfield name="INVERT" pos="0" type="boolean"/> - </reg32> - <!-- 0x9237-0x92ff invalid --> - <!-- always 0x0 ? --> - <reg32 offset="0x9300" name="VPC_UNKNOWN_9300" low="0" high="2" usage="cmd"/> + </bitset> + + <reg32 offset="0x9236" name="VPC_REPLACE_MODE_CNTL" type="a6xx_vpc_replace_mode_cntl" variants="A6XX-A7XX" usage="cmd"/> + + <reg32 offset="0x9300" name="VPC_ROTATION_CNTL" low="0" high="2" variants="A6XX-A7XX" usage="cmd"/> <bitset name="a6xx_vpc_xs_cntl" inline="yes"> <doc> @@ -2101,11 +2206,12 @@ by a particular renderpass/blit. </doc> </bitfield> </bitset> - <reg32 offset="0x9301" name="VPC_VS_CNTL" type="a6xx_vpc_xs_cntl" usage="rp_blit"/> - <reg32 offset="0x9302" name="VPC_GS_CNTL" type="a6xx_vpc_xs_cntl" usage="rp_blit"/> - <reg32 offset="0x9303" name="VPC_DS_CNTL" type="a6xx_vpc_xs_cntl" usage="rp_blit"/> - <reg32 offset="0x9304" name="VPC_PS_CNTL" usage="rp_blit"> + <reg32 offset="0x9301" name="VPC_VS_CNTL" type="a6xx_vpc_xs_cntl" variants="A6XX-A7XX" usage="rp_blit"/> + <reg32 offset="0x9302" name="VPC_GS_CNTL" type="a6xx_vpc_xs_cntl" variants="A6XX-A7XX" usage="rp_blit"/> + <reg32 offset="0x9303" name="VPC_DS_CNTL" type="a6xx_vpc_xs_cntl" variants="A6XX-A7XX" usage="rp_blit"/> + + <bitset name="a6xx_vpc_ps_cntl" inline="yes"> <bitfield name="NUMNONPOSVAR" low="0" high="7" type="uint"/> <!-- for fixed-function (i.e. no GS) gl_PrimitiveID in FS --> <bitfield name="PRIMIDLOC" low="8" high="15" type="uint"/> @@ -2122,9 +2228,11 @@ by a particular renderpass/blit. ViewID through the VS. </doc> </bitfield> - </reg32> + </bitset> - <reg32 offset="0x9305" name="VPC_SO_CNTL" usage="rp_blit"> + <reg32 offset="0x9304" name="VPC_PS_CNTL" type="a6xx_vpc_ps_cntl" variants="A6XX-A7XX" usage="rp_blit"/> + + <bitset name="a6xx_vpc_so_cntl" inline="yes"> <!-- It's offset by 1, and 0 means "disabled" --> @@ -2133,22 +2241,28 @@ by a particular renderpass/blit. <bitfield name="BUF2_STREAM" low="6" high="8" type="uint"/> <bitfield name="BUF3_STREAM" low="9" high="11" type="uint"/> <bitfield name="STREAM_ENABLE" low="15" high="18" type="hex"/> - </reg32> - <reg32 offset="0x9306" name="VPC_SO_OVERRIDE" usage="rp_blit"> + </bitset> + + <reg32 offset="0x9305" name="VPC_SO_CNTL" type="a6xx_vpc_so_cntl" variants="A6XX-A7XX" usage="rp_blit"/> + + <bitset name="a6xx_so_override" inline="yes"> <bitfield name="DISABLE" pos="0" type="boolean"/> - </reg32> - <reg32 offset="0x9307" name="VPC_PS_RAST_CNTL" variants="A6XX-" usage="rp_blit"> <!-- A702 + A7xx --> - <bitfield name="MODE" low="0" high="1" type="a6xx_polygon_mode"/> - </reg32> - <reg32 offset="0x9308" name="VPC_ATTR_BUF_GMEM_SIZE" variants="A7XX-" usage="rp_blit"> - <bitfield name="SIZE_GMEM" low="0" high="31"/> - </reg32> - <reg32 offset="0x9309" name="VPC_ATTR_BUF_GMEM_BASE" variants="A7XX-" usage="rp_blit"> - <bitfield name="BASE_GMEM" low="0" high="31"/> - </reg32> - <reg32 offset="0x9b09" name="PC_ATTR_BUF_GMEM_SIZE" variants="A7XX-" usage="rp_blit"> - <bitfield name="SIZE_GMEM" low="0" high="31"/> - </reg32> + </bitset> + + <reg32 offset="0x9306" name="VPC_SO_OVERRIDE" type="a6xx_so_override" variants="A6XX-A7XX" usage="rp_blit"/> + + <reg32 offset="0x9807" name="PC_DGEN_SO_OVERRIDE" type="a6xx_so_override" variants="A7XX" usage="rp_blit"/> + + <reg32 offset="0x9307" name="VPC_PS_RAST_CNTL" type="a6xx_rast_cntl" variants="A6XX-A7XX" usage="rp_blit"/> + + <reg32 offset="0x9308" name="VPC_ATTR_BUF_GMEM_SIZE" variants="A7XX" type="uint" usage="rp_blit"/> + <reg32 offset="0x9309" name="VPC_ATTR_BUF_GMEM_BASE" variants="A7XX" type="uint" usage="rp_blit"/> + + <reg32 offset="0x9b09" name="PC_ATTR_BUF_GMEM_SIZE" variants="A7XX" type="uint" usage="rp_blit"/> + + <reg32 offset="0x930a" name="VPC_UNKNOWN_930A" variants="A7XX"/> + + <reg32 offset="0x960a" name="VPC_FLATSHADE_MODE_CNTL" variants="A7XX"/> <!-- 0x9307-0x95ff invalid --> @@ -2163,52 +2277,62 @@ by a particular renderpass/blit. <!-- TODO: regs from 0x9624-0x963a --> <!-- 0x963b-0x97ff invalid --> - <reg32 offset="0x9800" name="PC_HS_PARAM_0" low="0" high="5" type="uint" usage="rp_blit"/> + <reg32 offset="0x9800" name="PC_HS_PARAM_0" low="0" high="5" type="uint" variants="A6XX-A7XX" usage="rp_blit"/> - <!-- always 0x0 ? --> - <reg32 offset="0x9801" name="PC_HS_PARAM_1" usage="rp_blit"> + <bitset name="a6xx_pc_hs_param_1" inline="yes"> <bitfield name="SIZE" low="0" high="10" type="uint"/> <bitfield name="UNK13" pos="13"/> - </reg32> + </bitset> + + <reg32 offset="0x9801" name="PC_HS_PARAM_1" type="a6xx_pc_hs_param_1" variants="A6XX-A7XX" usage="rp_blit"/> - <reg32 offset="0x9802" name="PC_DS_PARAM" usage="rp_blit"> + <bitset name="a6xx_pc_ds_param" inline="yes"> <bitfield name="SPACING" low="0" high="1" type="a6xx_tess_spacing"/> <bitfield name="OUTPUT" low="2" high="3" type="a6xx_tess_output"/> - </reg32> + </bitset> + + <reg32 offset="0x9802" name="PC_DS_PARAM" type="a6xx_pc_ds_param" variants="A6XX-A7XX" usage="rp_blit"/> + + <reg32 offset="0x9803" name="PC_RESTART_INDEX" low="0" high="31" type="uint" variants="A6XX-A7XX" usage="rp_blit"/> - <reg32 offset="0x9803" name="PC_RESTART_INDEX" low="0" high="31" type="uint" usage="rp_blit"/> - <reg32 offset="0x9804" name="PC_MODE_CNTL" low="0" high="7" usage="rp_blit"/> + <reg32 offset="0x9804" name="PC_MODE_CNTL" low="0" high="7" variants="A6XX-A7XX" usage="rp_blit"/> <reg32 offset="0x9805" name="PC_POWER_CNTL" low="0" high="2" usage="rp_blit"/> - <reg32 offset="0x9806" name="PC_PS_CNTL" usage="rp_blit"> + <bitset name="a6xx_pc_ps_cntl" inline="yes"> <bitfield name="PRIMITIVEIDEN" pos="0" type="boolean"/> - </reg32> + </bitset> - <!-- New in a6xx gen3+ --> - <reg32 offset="0x9808" name="PC_DGEN_SO_CNTL" usage="rp_blit"> + <reg32 offset="0x9806" name="PC_PS_CNTL" type="a6xx_pc_ps_cntl" variants="A6XX-A7XX" usage="rp_blit"/> + + <bitset name="a6xx_pc_dgen_so_cntl" inline="yes"> <bitfield name="STREAM_ENABLE" low="15" high="18" type="hex"/> - </reg32> + </bitset> + + <!-- New in a6xx gen3+ --> + <reg32 offset="0x9808" name="PC_DGEN_SO_CNTL" type="a6xx_pc_dgen_so_cntl" variants="A6XX-A7XX" usage="rp_blit"/> - <reg32 offset="0x980a" name="PC_DGEN_SU_CONSERVATIVE_RAS_CNTL"> + <bitset name="a6xx_pc_dgen_su_conservative_ras_cntl" inline="yes"> <bitfield name="CONSERVATIVERASEN" pos="0" type="boolean"/> - </reg32> - <!-- 0x980b-0x983f invalid --> + </bitset> + + <reg32 offset="0x980a" name="PC_DGEN_SU_CONSERVATIVE_RAS_CNTL" type="a6xx_pc_dgen_su_conservative_ras_cntl" variants="A6XX-A7XX"/> <!-- 0x9840 - 0x9842 are not readable --> - <reg32 offset="0x9840" name="PC_DRAW_INITIATOR"> + <bitset name="a6xx_draw_initiator" inline="yes"> <bitfield name="STATE_ID" low="0" high="7"/> - </reg32> + </bitset> - <reg32 offset="0x9841" name="PC_KERNEL_INITIATOR"> - <bitfield name="STATE_ID" low="0" high="7"/> - </reg32> + <reg32 offset="0x9840" name="PC_DRAW_INITIATOR" type="a6xx_draw_initiator" variants="A6XX-A7XX"/> + <reg32 offset="0x9841" name="PC_KERNEL_INITIATOR" type="a6xx_draw_initiator" variants="A6XX-A7XX"/> - <reg32 offset="0x9842" name="PC_EVENT_INITIATOR"> + <bitset name="a6xx_event_initiator" inline="yes"> <!-- I think only the low bit is actually used? --> <bitfield name="STATE_ID" low="16" high="23"/> <bitfield name="EVENT" low="0" high="6" type="vgt_event_type"/> - </reg32> + </bitset> + + <reg32 offset="0x9842" name="PC_EVENT_INITIATOR" type="a6xx_event_initiator" variants="A6XX-A7XX"/> <!-- 0x9880 written in a lot of places by SQE, same value gets written @@ -2219,45 +2343,21 @@ by a particular renderpass/blit. <!-- 0x9843-0x997f invalid --> - <reg32 offset="0x9981" name="PC_DGEN_RAST_CNTL" variants="A6XX" usage="rp_blit"> - <bitfield name="MODE" low="0" high="1" type="a6xx_polygon_mode"/> - </reg32> - <reg32 offset="0x9809" name="PC_DGEN_RAST_CNTL" variants="A7XX-" usage="rp_blit"> - <bitfield name="MODE" low="0" high="1" type="a6xx_polygon_mode"/> - </reg32> - - <reg32 offset="0x9980" name="VPC_RAST_STREAM_CNTL" variants="A6XX" usage="rp_blit"> - <!-- which stream to send to GRAS --> - <bitfield name="STREAM" low="0" high="1" type="uint"/> - <!-- discard primitives before rasterization --> - <bitfield name="DISCARD" pos="2" type="boolean"/> - </reg32> - <!-- VPC_RAST_STREAM_CNTL --> - <reg32 offset="0x9107" name="VPC_RAST_STREAM_CNTL" variants="A7XX-" usage="rp_blit"> - <!-- which stream to send to GRAS --> - <bitfield name="STREAM" low="0" high="1" type="uint"/> - <!-- discard primitives before rasterization --> - <bitfield name="DISCARD" pos="2" type="boolean"/> - </reg32> - <reg32 offset="0x9317" name="VPC_RAST_STREAM_CNTL_V2" variants="A7XX-" usage="rp_blit"> - <!-- which stream to send to GRAS --> - <bitfield name="STREAM" low="0" high="1" type="uint"/> - <!-- discard primitives before rasterization --> - <bitfield name="DISCARD" pos="2" type="boolean"/> - </reg32> + <reg32 offset="0x9981" name="PC_DGEN_RAST_CNTL" type="a6xx_rast_cntl" variants="A6XX" usage="rp_blit"/> + <reg32 offset="0x9809" name="PC_DGEN_RAST_CNTL" type="a6xx_rast_cntl" variants="A7XX" usage="rp_blit"/> <!-- Both are a750+. Probably needed to correctly overlap execution of several draws. --> - <reg32 offset="0x9885" name="PC_HS_BUFFER_SIZE" variants="A7XX-" usage="cmd"/> + <reg32 offset="0x9885" name="PC_HS_BUFFER_SIZE" variants="A7XX" usage="cmd"/> <!-- Blob adds a bit more space {0x10, 0x20, 0x30, 0x40} bytes, but the meaning of this additional space is not known. --> - <reg32 offset="0x9886" name="PC_TF_BUFFER_SIZE" variants="A7XX-" usage="cmd"/> + <reg32 offset="0x9886" name="PC_TF_BUFFER_SIZE" variants="A7XX" usage="cmd"/> <!-- 0x9982-0x9aff invalid --> - <reg32 offset="0x9b00" name="PC_CNTL" type="a6xx_pc_cntl" usage="rp_blit"/> + <reg32 offset="0x9b00" name="PC_CNTL" type="a6xx_pc_cntl" variants="A6XX-A7XX" usage="rp_blit"/> <bitset name="a6xx_pc_xs_cntl" inline="yes"> <doc> @@ -2270,18 +2370,18 @@ by a particular renderpass/blit. <bitfield name="LAYER" pos="9" type="boolean"/> <bitfield name="VIEW" pos="10" type="boolean"/> <!-- note: PC_VS_CNTL doesn't have the PRIMITIVE_ID bit --> + <!-- since HS can't output anything, only PRIMITIVE_ID is valid --> <bitfield name="PRIMITIVE_ID" pos="11" type="boolean"/> <bitfield name="CLIP_MASK" low="16" high="23" type="uint"/> <bitfield name="SHADINGRATE" pos="24" type="boolean" variants="A7XX-"/> </bitset> - <reg32 offset="0x9b01" name="PC_VS_CNTL" type="a6xx_pc_xs_cntl" usage="rp_blit"/> - <reg32 offset="0x9b02" name="PC_GS_CNTL" type="a6xx_pc_xs_cntl" usage="rp_blit"/> - <!-- since HS can't output anything, only PRIMITIVE_ID is valid --> - <reg32 offset="0x9b03" name="PC_HS_CNTL" type="a6xx_pc_xs_cntl" usage="rp_blit"/> - <reg32 offset="0x9b04" name="PC_DS_CNTL" type="a6xx_pc_xs_cntl" usage="rp_blit"/> + <reg32 offset="0x9b01" name="PC_VS_CNTL" type="a6xx_pc_xs_cntl" variants="A6XX-A7XX" usage="rp_blit"/> + <reg32 offset="0x9b02" name="PC_GS_CNTL" type="a6xx_pc_xs_cntl" variants="A6XX-A7XX" usage="rp_blit"/> + <reg32 offset="0x9b03" name="PC_HS_CNTL" type="a6xx_pc_xs_cntl" variants="A6XX-A7XX" usage="rp_blit"/> + <reg32 offset="0x9b04" name="PC_DS_CNTL" type="a6xx_pc_xs_cntl" variants="A6XX-A7XX" usage="rp_blit"/> - <reg32 offset="0x9b05" name="PC_GS_PARAM_0" type="a6xx_gs_param_0" usage="rp_blit"/> + <reg32 offset="0x9b05" name="PC_GS_PARAM_0" type="a6xx_gs_param_0" variants="A6XX-A7XX" usage="rp_blit"/> <reg32 offset="0x9b06" name="PC_PRIMITIVE_CNTL_6" variants="A6XX" usage="rp_blit"> <doc> @@ -2290,9 +2390,9 @@ by a particular renderpass/blit. <bitfield name="STRIDE_IN_VPC" low="0" high="10" type="uint"/> </reg32> - <reg32 offset="0x9b07" name="PC_STEREO_RENDERING_CNTL" type="a6xx_stereo_rendering_cntl" usage="rp_blit"/> + <reg32 offset="0x9b07" name="PC_STEREO_RENDERING_CNTL" type="a6xx_stereo_rendering_cntl" variants="A6XX-A7XX" usage="rp_blit"/> <!-- mask of enabled views, doesn't exist on A630 --> - <reg32 offset="0x9b08" name="PC_STEREO_RENDERING_VIEWMASK" type="hex" low="0" high="15" usage="rp_blit"/> + <reg32 offset="0x9b08" name="PC_STEREO_RENDERING_VIEWMASK" type="hex" low="0" high="15" variants="A6XX-A7XX" usage="rp_blit"/> <!-- 0x9b09-0x9bff invalid --> <reg32 offset="0x9c00" name="PC_2D_EVENT_CMD"> <!-- special register (but note first 8 bits can be written/read) --> @@ -2303,34 +2403,39 @@ by a particular renderpass/blit. <!-- TODO: 0x9e00-0xa000 range incomplete --> <reg32 offset="0x9e00" name="PC_DBG_ECO_CNTL"/> <reg32 offset="0x9e01" name="PC_ADDR_MODE_CNTL" type="a5xx_address_mode"/> - <reg64 offset="0x9e04" name="PC_DMA_BASE"/> - <reg32 offset="0x9e06" name="PC_DMA_OFFSET" type="uint"/> - <reg32 offset="0x9e07" name="PC_DMA_SIZE" type="uint"/> + <reg64 offset="0x9e04" name="PC_DMA_BASE" type="address" variants="A6XX-A7XX"/> + <reg32 offset="0x9e06" name="PC_DMA_OFFSET" type="uint" variants="A6XX-A7XX"/> + <reg32 offset="0x9e07" name="PC_DMA_SIZE" type="uint" variants="A6XX-A7XX"/> + <reg64 offset="0x9e08" name="PC_TESS_BASE" variants="A6XX" type="waddress" align="32" usage="cmd"/> - <reg64 offset="0x9810" name="PC_TESS_BASE" variants="A7XX-" type="waddress" align="32" usage="cmd"/> + <reg64 offset="0x9810" name="PC_TESS_BASE" variants="A7XX" type="waddress" align="32" usage="cmd"/> - <reg32 offset="0x9e0b" name="PC_DRAWCALL_CNTL" type="vgt_draw_initiator_a4xx"> + <reg32 offset="0x9e0b" name="PC_DRAWCALL_CNTL" type="vgt_draw_initiator_a4xx" variants="A6XX-A7XX"> <doc> Possibly not really "initiating" the draw but the layout is similar to VGT_DRAW_INITIATOR on older gens </doc> </reg32> - <reg32 offset="0x9e0c" name="PC_DRAWCALL_INSTANCE_NUM" type="uint"/> - <reg32 offset="0x9e0d" name="PC_DRAWCALL_SIZE" type="uint"/> + <reg32 offset="0x9e0c" name="PC_DRAWCALL_INSTANCE_NUM" type="uint" variants="A6XX-A7XX"/> + <reg32 offset="0x9e0d" name="PC_DRAWCALL_SIZE" type="uint" variants="A6XX-A7XX"/> <!-- These match the contents of CP_SET_BIN_DATA (not written directly) --> - <reg32 offset="0x9e11" name="PC_VIS_STREAM_CNTL"> + <bitset name="a6xx_pc_vis_stream_cntl" inline="yes"> <bitfield name="UNK0" low="0" high="15"/> <bitfield name="VSC_SIZE" low="16" high="21" type="uint"/> <bitfield name="VSC_N" low="22" high="26" type="uint"/> - </reg32> - <reg64 offset="0x9e12" name="PC_PVIS_STREAM_BIN_BASE" type="waddress" align="32"/> - <reg64 offset="0x9e14" name="PC_DVIS_STREAM_BIN_BASE" type="waddress" align="32"/> + </bitset> + + <reg32 offset="0x9e11" name="PC_VIS_STREAM_CNTL" type="a6xx_pc_vis_stream_cntl" variants="A6XX-A7XX"/> + <reg64 offset="0x9e12" name="PC_PVIS_STREAM_BIN_BASE" type="waddress" align="32" variants="A6XX-A7XX"/> + <reg64 offset="0x9e14" name="PC_DVIS_STREAM_BIN_BASE" type="waddress" align="32" variants="A6XX-A7XX"/> - <reg32 offset="0x9e1c" name="PC_DRAWCALL_CNTL_OVERRIDE"> + <bitset name="a6xx_pc_drawcall_cntl_override" inline="yes"> <doc>Written by CP_SET_VISIBILITY_OVERRIDE handler</doc> <bitfield name="OVERRIDE" pos="0" type="boolean"/> - </reg32> + </bitset> + + <reg32 offset="0x9e1c" name="PC_DRAWCALL_CNTL_OVERRIDE" type="a6xx_pc_drawcall_cntl_override" variants="A6XX-A7XX"/> <reg32 offset="0x9e24" name="PC_UNKNOWN_9E24" variants="A7XX-" usage="cmd"/> @@ -2936,7 +3041,7 @@ by a particular renderpass/blit. <reg32 offset="0xa9b3" name="SP_CS_PROGRAM_COUNTER_OFFSET" type="uint" usage="cmd"/> <reg64 offset="0xa9b4" name="SP_CS_BASE" type="address" align="32" usage="cmd"/> <reg32 offset="0xa9b6" name="SP_CS_PVT_MEM_PARAM" type="a6xx_sp_xs_pvt_mem_param" usage="cmd"/> - <reg64 offset="0xa9b7" name="SP_CS_PVT_MEM_BASE" align="32" usage="cmd"/> + <reg64 offset="0xa9b7" name="SP_CS_PVT_MEM_BASE" type="waddress" align="32" usage="cmd"/> <reg32 offset="0xa9b9" name="SP_CS_PVT_MEM_SIZE" type="a6xx_sp_xs_pvt_mem_size" usage="cmd"/> <reg32 offset="0xa9ba" name="SP_CS_TSIZE" low="0" high="7" type="uint" usage="cmd"/> <reg32 offset="0xa9bb" name="SP_CS_CONFIG" type="a6xx_sp_xs_config" usage="cmd"/> @@ -3021,7 +3126,7 @@ by a particular renderpass/blit. UAV state for compute shader: --> <reg64 offset="0xa9f2" name="SP_CS_UAV_BASE" type="address" align="16" variants="A6XX"/> - <reg64 offset="0xa9f8" name="SP_CS_UAV_BASE" type="address" align="16" variants="A7XX"/> + <reg64 offset="0xa9f8" name="SP_CS_UAV_BASE" type="address" align="16" variants="A7XX-"/> <reg32 offset="0xaa00" name="SP_CS_USIZE" low="0" high="6" type="uint"/> <!-- Correlated with avgs/uvgs usage in FS --> @@ -3104,14 +3209,19 @@ by a particular renderpass/blit. instructions VS/HS/DS/GS/FS. See SP_CS_UAV_BASE_* for compute shaders. --> <reg64 offset="0xab1a" name="SP_GFX_UAV_BASE" type="address" align="16" usage="cmd"/> - <reg32 offset="0xab20" name="SP_GFX_USIZE" low="0" high="6" type="uint" usage="cmd"/> + <reg32 offset="0xab20" name="SP_GFX_USIZE" low="0" high="6" type="uint" variants="A6XX-A7XX" usage="cmd"/> - <reg32 offset="0xab22" name="SP_UNKNOWN_AB22" variants="A7XX-" usage="cmd"/> + <reg32 offset="0xab22" name="SP_UNKNOWN_AB22" variants="A7XX" usage="cmd"/> + + <enum name="a6xx_sp_a2d_output_ifmt_type"> + <value name="OUTPUT_IFMT_2D_FLOAT" value="0"/> + <value name="OUTPUT_IFMT_2D_SINT" value="1"/> + <value name="OUTPUT_IFMT_2D_UINT" value="2"/> + </enum> <bitset name="a6xx_sp_a2d_output_info" inline="yes"> - <bitfield name="NORM" pos="0" type="boolean"/> - <bitfield name="SINT" pos="1" type="boolean"/> - <bitfield name="UINT" pos="2" type="boolean"/> + <bitfield name="HALF_PRECISION" pos="0" type="boolean"/> + <bitfield name="IFMT_TYPE" low="1" high="2" type="a6xx_sp_a2d_output_ifmt_type"/> <!-- looks like HW only cares about the base type of this format, which matches the ifmt? --> <bitfield name="COLOR_FORMAT" low="3" high="10" type="a6xx_format"/> @@ -3156,7 +3266,7 @@ by a particular renderpass/blit. <reg32 offset="0xae6b" name="SP_UNKNOWN_AE6B" variants="A7XX-" usage="cmd"/> <reg32 offset="0xae6c" name="SP_HLSQ_DBG_ECO_CNTL" variants="A7XX-" usage="cmd"/> <reg32 offset="0xae6d" name="SP_READ_SEL" variants="A7XX-"> - <bitfield name="LOCATION" low="18" high="19" type="a7xx_state_location"/> + <bitfield name="LOCATION" low="18" high="20" type="a7xx_state_location"/> <bitfield name="PIPE" low="16" high="17" type="a7xx_pipe"/> <bitfield name="STATETYPE" low="8" high="15" type="a7xx_statetype_id"/> <bitfield name="USPTP" low="4" high="7"/> @@ -3192,7 +3302,7 @@ by a particular renderpass/blit. <!-- looks to work in the same way as a5xx: --> <reg64 offset="0xb302" name="TPL1_GFX_BORDER_COLOR_BASE" type="address" align="128" usage="cmd"/> - <reg32 offset="0xb304" name="TPL1_MSAA_SAMPLE_POS_CNTL" type="a6xx_msaa_sample_pos_cntl" usage="rp_blit"/> + <reg32 offset="0xb304" name="TPL1_MSAA_SAMPLE_POS_CNTL" type="a6xx_msaa_sample_pos_cntl" variants="A6XX-A7XX" usage="rp_blit"/> <reg32 offset="0xb305" name="TPL1_PROGRAMMABLE_MSAA_POS_0" type="a6xx_programmable_msaa_pos" usage="rp_blit"/> <reg32 offset="0xb306" name="TPL1_PROGRAMMABLE_MSAA_POS_1" type="a6xx_programmable_msaa_pos" usage="rp_blit"/> <reg32 offset="0xb307" name="TPL1_WINDOW_OFFSET" type="a6xx_reg_xy" usage="rp_blit"/> @@ -3232,12 +3342,12 @@ by a particular renderpass/blit. </reg32> <reg32 offset="0xb2c0" name="TPL1_A2D_SRC_TEXTURE_INFO" type="a6xx_a2d_src_texture_info" variants="A7XX-" usage="rp_blit"/> - <reg32 offset="0xb2c1" name="TPL1_A2D_SRC_TEXTURE_SIZE" variants="A7XX"> + <reg32 offset="0xb2c1" name="TPL1_A2D_SRC_TEXTURE_SIZE" variants="A7XX-"> <bitfield name="WIDTH" low="0" high="14" type="uint"/> <bitfield name="HEIGHT" low="15" high="29" type="uint"/> </reg32> <reg64 offset="0xb2c2" name="TPL1_A2D_SRC_TEXTURE_BASE" type="address" align="16" variants="A7XX-" usage="rp_blit"/> - <reg32 offset="0xb2c4" name="TPL1_A2D_SRC_TEXTURE_PITCH" variants="A7XX"> + <reg32 offset="0xb2c4" name="TPL1_A2D_SRC_TEXTURE_PITCH" variants="A7XX-"> <!-- Bits from 3..9 must be zero unless 'TPL1_A2D_BLT_CNTL::TYPE' is A6XX_TEX_IMG_BUFFER, which allows for lower alignment. @@ -3270,13 +3380,13 @@ by a particular renderpass/blit. <reg32 offset="0xb2ce" name="SP_PS_UNKNOWN_B4CE" low="0" high="31" variants="A7XX"/> <reg32 offset="0xb2cf" name="SP_PS_UNKNOWN_B4CF" low="0" high="30" variants="A7XX"/> <reg32 offset="0xb2d0" name="SP_PS_UNKNOWN_B4D0" low="0" high="29" variants="A7XX"/> - <reg32 offset="0xb2d1" name="TPL1_A2D_WINDOW_OFFSET" type="a6xx_reg_xy" variants="A7XX"/> + <reg32 offset="0xb2d1" name="TPL1_A2D_WINDOW_OFFSET" type="a6xx_reg_xy" variants="A7XX-"/> <reg32 offset="0xb2d2" name="TPL1_A2D_BLT_CNTL" variants="A7XX-" usage="rp_blit"> <bitfield name="RAW_COPY" pos="0" type="boolean"/> <bitfield name="START_OFFSET_TEXELS" low="16" high="21"/> <bitfield name="TYPE" low="29" high="31" type="a6xx_tex_type"/> </reg32> - <reg32 offset="0xab21" name="SP_WINDOW_OFFSET" type="a6xx_reg_xy" variants="A7XX-" usage="rp_blit"/> + <reg32 offset="0xab21" name="SP_WINDOW_OFFSET" type="a6xx_reg_xy" variants="A7XX" usage="rp_blit"/> <!-- always 0x100000 or 0x1000000? --> <reg32 offset="0xb600" name="TPL1_DBG_ECO_CNTL" low="0" high="25" usage="cmd"/> @@ -3296,17 +3406,13 @@ by a particular renderpass/blit. </reg32> <reg32 offset="0xb605" name="TPL1_UNKNOWN_B605" low="0" high="7" type="uint" variants="A6XX" usage="cmd"/> <!-- always 0x0 or 0x44 ? --> - <reg32 offset="0xb608" name="TPL1_BICUBIC_WEIGHTS_TABLE_0" low="0" high="29" variants="A6XX"/> - <reg32 offset="0xb609" name="TPL1_BICUBIC_WEIGHTS_TABLE_1" low="0" high="29" variants="A6XX"/> - <reg32 offset="0xb60a" name="TPL1_BICUBIC_WEIGHTS_TABLE_2" low="0" high="29" variants="A6XX"/> - <reg32 offset="0xb60b" name="TPL1_BICUBIC_WEIGHTS_TABLE_3" low="0" high="29" variants="A6XX"/> - <reg32 offset="0xb60c" name="TPL1_BICUBIC_WEIGHTS_TABLE_4" low="0" high="29" variants="A6XX"/> + <array offset="0xb608" name="TPL1_BICUBIC_WEIGHTS_TABLE" stride="1" length="5" variants="A6XX"> + <reg32 offset="0" name="REG" low="0" high="29"/> + </array> - <reg32 offset="0xb608" name="TPL1_BICUBIC_WEIGHTS_TABLE_0" low="0" high="29" variants="A7XX" usage="cmd"/> - <reg32 offset="0xb609" name="TPL1_BICUBIC_WEIGHTS_TABLE_1" low="0" high="29" variants="A7XX" usage="cmd"/> - <reg32 offset="0xb60a" name="TPL1_BICUBIC_WEIGHTS_TABLE_2" low="0" high="29" variants="A7XX" usage="cmd"/> - <reg32 offset="0xb60b" name="TPL1_BICUBIC_WEIGHTS_TABLE_3" low="0" high="29" variants="A7XX" usage="cmd"/> - <reg32 offset="0xb60c" name="TPL1_BICUBIC_WEIGHTS_TABLE_4" low="0" high="29" variants="A7XX" usage="cmd"/> + <array offset="0xb608" name="TPL1_BICUBIC_WEIGHTS_TABLE" stride="1" length="5" variants="A7XX"> + <reg32 offset="0" name="REG" low="0" high="29" usage="cmd"/> + </array> <array offset="0xb610" name="TPL1_PERFCTR_TP_SEL" stride="1" length="12" variants="A6XX"/> <array offset="0xb610" name="TPL1_PERFCTR_TP_SEL" stride="1" length="18" variants="A7XX"/> @@ -3638,7 +3744,7 @@ by a particular renderpass/blit. <reg32 offset="0xbb10" name="SP_PS_CONST_CONFIG" type="a6xx_xs_const_config" variants="A6XX" usage="rp_blit"/> <reg32 offset="0xab03" name="SP_PS_CONST_CONFIG" type="a6xx_xs_const_config" variants="A7XX-" usage="rp_blit"/> - <array offset="0xab40" name="SP_SHARED_CONSTANT_GFX_0" stride="1" length="64" variants="A7XX-"/> + <array offset="0xab40" name="SP_SHARED_CONSTANT_GFX" stride="1" length="64" variants="A7XX"/> <reg32 offset="0xbb11" name="HLSQ_SHARED_CONSTS" variants="A6XX" usage="cmd"> <doc> @@ -3800,7 +3906,7 @@ by a particular renderpass/blit. <reg32 offset="0x0030" name="CFG_DBGBUS_TRACE_BUF2"/> </domain> -<domain name="A7XX_CX_DBGC" width="32"> +<domain name="A7XX_CX_DBGC" width="32" varset="chip"> <!-- Bitfields shifted, but otherwise the same: --> <reg32 offset="0x0000" name="CFG_DBGBUS_SEL_A" variants="A7XX-"> <bitfield high="7" low="0" name="PING_INDEX"/> diff --git a/drivers/gpu/drm/msm/registers/adreno/a6xx_descriptors.xml b/drivers/gpu/drm/msm/registers/adreno/a6xx_descriptors.xml index 307d43dda8a2..56cfaff614a4 100644 --- a/drivers/gpu/drm/msm/registers/adreno/a6xx_descriptors.xml +++ b/drivers/gpu/drm/msm/registers/adreno/a6xx_descriptors.xml @@ -9,38 +9,6 @@ xsi:schemaLocation="https://gitlab.freedesktop.org/freedreno/ rules-fd.xsd"> <domain name="A6XX_TEX_SAMP" width="32"> <doc>Texture sampler dwords</doc> - <enum name="a6xx_tex_filter"> <!-- same as a4xx? --> - <value name="A6XX_TEX_NEAREST" value="0"/> - <value name="A6XX_TEX_LINEAR" value="1"/> - <value name="A6XX_TEX_ANISO" value="2"/> - <value name="A6XX_TEX_CUBIC" value="3"/> <!-- a650 only --> - </enum> - <enum name="a6xx_tex_clamp"> <!-- same as a4xx? --> - <value name="A6XX_TEX_REPEAT" value="0"/> - <value name="A6XX_TEX_CLAMP_TO_EDGE" value="1"/> - <value name="A6XX_TEX_MIRROR_REPEAT" value="2"/> - <value name="A6XX_TEX_CLAMP_TO_BORDER" value="3"/> - <value name="A6XX_TEX_MIRROR_CLAMP" value="4"/> - </enum> - <enum name="a6xx_tex_aniso"> <!-- same as a4xx? --> - <value name="A6XX_TEX_ANISO_1" value="0"/> - <value name="A6XX_TEX_ANISO_2" value="1"/> - <value name="A6XX_TEX_ANISO_4" value="2"/> - <value name="A6XX_TEX_ANISO_8" value="3"/> - <value name="A6XX_TEX_ANISO_16" value="4"/> - </enum> - <enum name="a6xx_reduction_mode"> - <value name="A6XX_REDUCTION_MODE_AVERAGE" value="0"/> - <value name="A6XX_REDUCTION_MODE_MIN" value="1"/> - <value name="A6XX_REDUCTION_MODE_MAX" value="2"/> - </enum> - <enum name="a6xx_fast_border_color"> - <!-- R B G A --> - <value name="A6XX_BORDER_COLOR_0_0_0_0" value="0"/> - <value name="A6XX_BORDER_COLOR_0_0_0_1" value="1"/> - <value name="A6XX_BORDER_COLOR_1_1_1_0" value="2"/> - <value name="A6XX_BORDER_COLOR_1_1_1_1" value="3"/> - </enum> <reg32 offset="0" name="0"> <bitfield name="MIPFILTER_LINEAR_NEAR" pos="0" type="boolean"/> @@ -79,14 +47,6 @@ xsi:schemaLocation="https://gitlab.freedesktop.org/freedreno/ rules-fd.xsd"> <domain name="A6XX_TEX_CONST" width="32" varset="chip"> <doc>Texture constant dwords</doc> - <enum name="a6xx_tex_swiz"> <!-- same as a4xx? --> - <value name="A6XX_TEX_X" value="0"/> - <value name="A6XX_TEX_Y" value="1"/> - <value name="A6XX_TEX_Z" value="2"/> - <value name="A6XX_TEX_W" value="3"/> - <value name="A6XX_TEX_ZERO" value="4"/> - <value name="A6XX_TEX_ONE" value="5"/> - </enum> <reg32 offset="0" name="0"> <bitfield name="TILE_MODE" low="0" high="1" type="a6xx_tile_mode"/> <bitfield name="SRGB" pos="2" type="boolean"/> diff --git a/drivers/gpu/drm/msm/registers/adreno/a6xx_enums.xml b/drivers/gpu/drm/msm/registers/adreno/a6xx_enums.xml index 665539b098c6..4e42f055b85f 100644 --- a/drivers/gpu/drm/msm/registers/adreno/a6xx_enums.xml +++ b/drivers/gpu/drm/msm/registers/adreno/a6xx_enums.xml @@ -320,14 +320,14 @@ to upconvert to 32b float internally? 16b float: 3 --> <enum name="a6xx_2d_ifmt"> - <value value="0x10" name="R2D_UNORM8"/> <value value="0x7" name="R2D_INT32"/> <value value="0x6" name="R2D_INT16"/> <value value="0x5" name="R2D_INT8"/> <value value="0x4" name="R2D_FLOAT32"/> <value value="0x3" name="R2D_FLOAT16"/> + <value value="0x2" name="R2D_SNORM8"/> <value value="0x1" name="R2D_UNORM8_SRGB"/> - <value value="0x0" name="R2D_RAW"/> + <value value="0x0" name="R2D_UNORM8"/> </enum> <enum name="a6xx_tex_type"> @@ -380,4 +380,50 @@ to upconvert to 32b float internally? <value value="0x3" name="TESS_CCW_TRIS"/> </enum> +<enum name="a6xx_tex_filter"> <!-- same as a4xx? --> + <value name="A6XX_TEX_NEAREST" value="0"/> + <value name="A6XX_TEX_LINEAR" value="1"/> + <value name="A6XX_TEX_ANISO" value="2"/> + <value name="A6XX_TEX_CUBIC" value="3"/> <!-- a650 only --> +</enum> + +<enum name="a6xx_tex_clamp"> <!-- same as a4xx? --> + <value name="A6XX_TEX_REPEAT" value="0"/> + <value name="A6XX_TEX_CLAMP_TO_EDGE" value="1"/> + <value name="A6XX_TEX_MIRROR_REPEAT" value="2"/> + <value name="A6XX_TEX_CLAMP_TO_BORDER" value="3"/> + <value name="A6XX_TEX_MIRROR_CLAMP" value="4"/> +</enum> + +<enum name="a6xx_tex_aniso"> <!-- same as a4xx? --> + <value name="A6XX_TEX_ANISO_1" value="0"/> + <value name="A6XX_TEX_ANISO_2" value="1"/> + <value name="A6XX_TEX_ANISO_4" value="2"/> + <value name="A6XX_TEX_ANISO_8" value="3"/> + <value name="A6XX_TEX_ANISO_16" value="4"/> +</enum> + +<enum name="a6xx_reduction_mode"> + <value name="A6XX_REDUCTION_MODE_AVERAGE" value="0"/> + <value name="A6XX_REDUCTION_MODE_MIN" value="1"/> + <value name="A6XX_REDUCTION_MODE_MAX" value="2"/> +</enum> + +<enum name="a6xx_fast_border_color"> + <!-- R B G A --> + <value name="A6XX_BORDER_COLOR_0_0_0_0" value="0"/> + <value name="A6XX_BORDER_COLOR_0_0_0_1" value="1"/> + <value name="A6XX_BORDER_COLOR_1_1_1_0" value="2"/> + <value name="A6XX_BORDER_COLOR_1_1_1_1" value="3"/> +</enum> + +<enum name="a6xx_tex_swiz"> <!-- same as a4xx? --> + <value name="A6XX_TEX_X" value="0"/> + <value name="A6XX_TEX_Y" value="1"/> + <value name="A6XX_TEX_Z" value="2"/> + <value name="A6XX_TEX_W" value="3"/> + <value name="A6XX_TEX_ZERO" value="4"/> + <value name="A6XX_TEX_ONE" value="5"/> +</enum> + </database> diff --git a/drivers/gpu/drm/msm/registers/adreno/a6xx_gmu.xml b/drivers/gpu/drm/msm/registers/adreno/a6xx_gmu.xml index 3d2cc339b8f1..b15a242d974d 100644 --- a/drivers/gpu/drm/msm/registers/adreno/a6xx_gmu.xml +++ b/drivers/gpu/drm/msm/registers/adreno/a6xx_gmu.xml @@ -99,6 +99,10 @@ xsi:schemaLocation="https://gitlab.freedesktop.org/freedreno/ rules-fd.xsd"> <bitfield name="GX_HM_GDSC_POWER_OFF" pos="6" type="boolean"/> <bitfield name="GX_HM_CLK_OFF" pos="7" type="boolean"/> </reg32> + <reg32 offset="0x50d0" name="GMU_SPTPRAC_PWR_CLK_STATUS" variants="A7XX"> + <bitfield name="GX_HM_GDSC_POWER_OFF" pos="0" type="boolean"/> + <bitfield name="GX_HM_CLK_OFF" pos="1" type="boolean"/> + </reg32> <reg32 offset="0x50e4" name="GMU_GPU_NAP_CTRL"> <bitfield name="HW_NAP_ENABLE" pos="0"/> <bitfield name="SID" low="4" high="8"/> @@ -127,6 +131,7 @@ xsi:schemaLocation="https://gitlab.freedesktop.org/freedreno/ rules-fd.xsd"> <reg32 offset="0x5088" name="GMU_ALWAYS_ON_COUNTER_L"/> <reg32 offset="0x5089" name="GMU_ALWAYS_ON_COUNTER_H"/> <reg32 offset="0x50c3" name="GMU_GMU_PWR_COL_KEEPALIVE"/> + <reg32 offset="0x50c4" name="GMU_PWR_COL_PREEMPT_KEEPALIVE"/> <reg32 offset="0x5180" name="GMU_HFI_CTRL_STATUS"/> <reg32 offset="0x5181" name="GMU_HFI_VERSION_INFO"/> <reg32 offset="0x5182" name="GMU_HFI_SFR_ADDR"/> @@ -228,6 +233,12 @@ xsi:schemaLocation="https://gitlab.freedesktop.org/freedreno/ rules-fd.xsd"> <reg32 offset="0x03ee" name="RSCC_TCS1_DRV0_STATUS"/> <reg32 offset="0x0496" name="RSCC_TCS2_DRV0_STATUS"/> <reg32 offset="0x053e" name="RSCC_TCS3_DRV0_STATUS"/> + <reg32 offset="0x05e6" name="RSCC_TCS4_DRV0_STATUS" variants="A7XX"/> + <reg32 offset="0x068e" name="RSCC_TCS5_DRV0_STATUS" variants="A7XX"/> + <reg32 offset="0x0736" name="RSCC_TCS6_DRV0_STATUS" variants="A7XX"/> + <reg32 offset="0x07de" name="RSCC_TCS7_DRV0_STATUS" variants="A7XX"/> + <reg32 offset="0x0886" name="RSCC_TCS8_DRV0_STATUS" variants="A7XX"/> + <reg32 offset="0x092e" name="RSCC_TCS9_DRV0_STATUS" variants="A7XX"/> </domain> </database> diff --git a/drivers/gpu/drm/msm/registers/adreno/adreno_pm4.xml b/drivers/gpu/drm/msm/registers/adreno/adreno_pm4.xml index 7abc08635495..0e10e1c6d263 100644 --- a/drivers/gpu/drm/msm/registers/adreno/adreno_pm4.xml +++ b/drivers/gpu/drm/msm/registers/adreno/adreno_pm4.xml @@ -120,12 +120,12 @@ xsi:schemaLocation="https://gitlab.freedesktop.org/freedreno/ rules-fd.xsd"> <value name="LRZ_FLUSH" value="38" variants="A5XX-"/> <value name="BLIT_OP_FILL_2D" value="39" variants="A5XX-"/> <value name="BLIT_OP_COPY_2D" value="40" variants="A5XX-A6XX"/> - <value name="UNK_40" value="40" variants="A7XX"/> + <value name="LRZ_CACHE_INVALIDATE" value="40" variants="A7XX"/> <value name="LRZ_Q_CACHE_INVALIDATE" value="41" variants="A7XX"/> <value name="BLIT_OP_SCALE_2D" value="42" variants="A5XX-"/> <value name="CONTEXT_DONE_2D" value="43" variants="A5XX-"/> - <value name="UNK_2C" value="44" variants="A5XX-"/> - <value name="UNK_2D" value="45" variants="A5XX-"/> + <value name="VSC_BINNING_START" value="44" variants="A5XX-"/> + <value name="VSC_BINNING_END" value="45" variants="A5XX-"/> <!-- a6xx events --> <doc> @@ -523,7 +523,7 @@ xsi:schemaLocation="https://gitlab.freedesktop.org/freedreno/ rules-fd.xsd"> <!-- Seems to set the mode flags which control which CP_SET_DRAW_STATE packets are executed, based on their ENABLE_MASK values - + CP_SET_MODE w/ payload of 0x1 seems to cause CP_SET_DRAW_STATE packets w/ ENABLE_MASK & 0x6 to execute immediately --> @@ -640,8 +640,7 @@ xsi:schemaLocation="https://gitlab.freedesktop.org/freedreno/ rules-fd.xsd"> <value name="CP_BV_BR_COUNT_OPS" value="0x1b" variants="A7XX-"/> <doc> Clears, adds to local, or adds to global timestamp </doc> <value name="CP_MODIFY_TIMESTAMP" value="0x1c" variants="A7XX-"/> - <!-- similar to CP_CONTEXT_REG_BUNCH, but discards first two dwords?? --> - <value name="CP_CONTEXT_REG_BUNCH2" value="0x5d" variants="A7XX-"/> + <value name="CP_NON_CONTEXT_REG_BUNCH" value="0x5d" variants="A7XX-"/> <doc> Write to a scratch memory that is read by CP_REG_TEST with SOURCE_SCRATCH_MEM set. It's not the same scratch as scratch registers. @@ -918,12 +917,6 @@ opcode: CP_LOAD_STATE4 (30) (4 dwords) </reg32> <stripe varset="chip" variants="A5XX-"> - <reg32 offset="4" name="4"> - <bitfield name="INDX_BASE_LO" low="0" high="31"/> - </reg32> - <reg32 offset="5" name="5"> - <bitfield name="INDX_BASE_HI" low="0" high="31"/> - </reg32> <reg64 offset="4" name="INDX_BASE" type="address"/> <reg32 offset="6" name="6"> <!-- max # of elements in index buffer --> @@ -1099,8 +1092,10 @@ opcode: CP_LOAD_STATE4 (30) (4 dwords) <bitfield name="BINNING" pos="20" varset="chip" variants="A6XX-" type="boolean"/> <bitfield name="GMEM" pos="21" varset="chip" variants="A6XX-" type="boolean"/> <bitfield name="SYSMEM" pos="22" varset="chip" variants="A6XX-" type="boolean"/> - <bitfield name="GROUP_ID" low="24" high="28" type="uint"/> + <!-- high bit is 28 until a750: --> + <bitfield name="GROUP_ID" low="24" high="29" type="uint"/> </reg32> + <reg64 offset="1" name="ADDR" type="address"/> <reg32 offset="1" name="1"> <bitfield name="ADDR_LO" low="0" high="31" type="hex"/> </reg32> @@ -1166,26 +1161,11 @@ opcode: CP_LOAD_STATE4 (30) (4 dwords) </reg32> <stripe varset="a7xx_abs_mask_mode" variants="NO_ABS_MASK"> <!-- BIN_DATA_ADDR -> VSC_PIPE[p].DATA_ADDRESS --> - <reg32 offset="1" name="1"> - <bitfield name="BIN_DATA_ADDR_LO" low="0" high="31" type="hex"/> - </reg32> - <reg32 offset="2" name="2"> - <bitfield name="BIN_DATA_ADDR_HI" low="0" high="31" type="hex"/> - </reg32> + <reg64 offset="1" name="BIN_DATA_ADDR" type="address"/> <!-- BIN_SIZE_ADDRESS -> VSC_SIZE_ADDRESS + (p * 4)--> - <reg32 offset="3" name="3"> - <bitfield name="BIN_SIZE_ADDRESS_LO" low="0" high="31"/> - </reg32> - <reg32 offset="4" name="4"> - <bitfield name="BIN_SIZE_ADDRESS_HI" low="0" high="31"/> - </reg32> + <reg64 offset="3" name="BIN_SIZE_ADDR" type="address"/> <!-- new on a6xx, where BIN_DATA_ADDR is the DRAW_STRM: --> - <reg32 offset="5" name="5"> - <bitfield name="BIN_PRIM_STRM_LO" low="0" high="31"/> - </reg32> - <reg32 offset="6" name="6"> - <bitfield name="BIN_PRIM_STRM_HI" low="0" high="31"/> - </reg32> + <reg64 offset="5" name="BIN_PRIM_STRM" type="address"/> <!-- a7xx adds a few more addresses to the end of the pkt --> @@ -1195,26 +1175,11 @@ opcode: CP_LOAD_STATE4 (30) (4 dwords) <stripe varset="a7xx_abs_mask_mode" variants="ABS_MASK"> <reg32 offset="1" name="ABS_MASK"/> <!-- BIN_DATA_ADDR -> VSC_PIPE[p].DATA_ADDRESS --> - <reg32 offset="2" name="2"> - <bitfield name="BIN_DATA_ADDR_LO" low="0" high="31" type="hex"/> - </reg32> - <reg32 offset="3" name="3"> - <bitfield name="BIN_DATA_ADDR_HI" low="0" high="31" type="hex"/> - </reg32> + <reg64 offset="2" name="BIN_DATA_ADDR" type="address"/> <!-- BIN_SIZE_ADDRESS -> VSC_SIZE_ADDRESS + (p * 4)--> - <reg32 offset="4" name="4"> - <bitfield name="BIN_SIZE_ADDRESS_LO" low="0" high="31"/> - </reg32> - <reg32 offset="5" name="5"> - <bitfield name="BIN_SIZE_ADDRESS_HI" low="0" high="31"/> - </reg32> + <reg64 offset="4" name="BIN_SIZE_ADDR" type="address"/> <!-- new on a6xx, where BIN_DATA_ADDR is the DRAW_STRM: --> - <reg32 offset="6" name="6"> - <bitfield name="BIN_PRIM_STRM_LO" low="0" high="31"/> - </reg32> - <reg32 offset="7" name="7"> - <bitfield name="BIN_PRIM_STRM_HI" low="0" high="31"/> - </reg32> + <reg64 offset="6" name="BIN_PRIM_STRM" type="address"/> <!-- a7xx adds a few more addresses to the end of the pkt --> @@ -1300,7 +1265,7 @@ opcode: CP_LOAD_STATE4 (30) (4 dwords) </reg32> </domain> -<domain name="CP_REG_TO_MEM" width="32"> +<domain name="CP_REG_TO_MEM" width="32" prefix="chip"> <reg32 offset="0" name="0"> <bitfield name="REG" low="0" high="17" type="hex"/> <!-- number of registers/dwords copied is max(CNT, 1). --> @@ -1308,12 +1273,12 @@ opcode: CP_LOAD_STATE4 (30) (4 dwords) <bitfield name="64B" pos="30" type="boolean"/> <bitfield name="ACCUMULATE" pos="31" type="boolean"/> </reg32> - <reg32 offset="1" name="1"> - <bitfield name="DEST" low="0" high="31"/> - </reg32> - <reg32 offset="2" name="2" varset="chip" variants="A5XX-"> - <bitfield name="DEST_HI" low="0" high="31"/> - </reg32> + <stripe varset="chip" variants="A2XX-A4XX"> + <reg32 offset="1" name="DEST" type="address"/> + </stripe> + <stripe varset="chip" variants="A5XX-"> + <reg64 offset="1" name="DEST" type="address"/> + </stripe> </domain> <domain name="CP_REG_TO_MEM_OFFSET_REG" width="32"> @@ -1329,12 +1294,7 @@ opcode: CP_LOAD_STATE4 (30) (4 dwords) <bitfield name="64B" pos="30" type="boolean"/> <bitfield name="ACCUMULATE" pos="31" type="boolean"/> </reg32> - <reg32 offset="1" name="1"> - <bitfield name="DEST" low="0" high="31"/> - </reg32> - <reg32 offset="2" name="2" varset="chip" variants="A5XX-"> - <bitfield name="DEST_HI" low="0" high="31"/> - </reg32> + <reg64 offset="1" name="DEST" type="waddress"/> <reg32 offset="3" name="3"> <bitfield name="OFFSET0" low="0" high="17" type="hex"/> <bitfield name="OFFSET0_SCRATCH" pos="19" type="boolean"/> @@ -1354,18 +1314,8 @@ opcode: CP_LOAD_STATE4 (30) (4 dwords) <bitfield name="64B" pos="30" type="boolean"/> <bitfield name="ACCUMULATE" pos="31" type="boolean"/> </reg32> - <reg32 offset="1" name="1"> - <bitfield name="DEST" low="0" high="31"/> - </reg32> - <reg32 offset="2" name="2" varset="chip" variants="A5XX-"> - <bitfield name="DEST_HI" low="0" high="31"/> - </reg32> - <reg32 offset="3" name="3"> - <bitfield name="OFFSET_LO" low="0" high="31" type="hex"/> - </reg32> - <reg32 offset="4" name="4"> - <bitfield name="OFFSET_HI" low="0" high="31" type="hex"/> - </reg32> + <reg64 offset="1" name="DEST" type="waddress"/> + <reg64 offset="3" name="OFFSET" type="waddress"/> </domain> <domain name="CP_MEM_TO_REG" width="32"> @@ -1378,12 +1328,12 @@ opcode: CP_LOAD_STATE4 (30) (4 dwords) <!-- does the same thing as CP_MEM_TO_MEM::UNK31 --> <bitfield name="UNK31" pos="31" type="boolean"/> </reg32> - <reg32 offset="1" name="1"> - <bitfield name="SRC" low="0" high="31"/> - </reg32> - <reg32 offset="2" name="2" varset="chip" variants="A5XX-"> - <bitfield name="SRC_HI" low="0" high="31"/> - </reg32> + <stripe varset="chip" variants="A2XX-A4XX"> + <reg32 offset="1" name="SRC" type="address"/> + </stripe> + <stripe varset="chip" variants="A5XX-"> + <reg64 offset="1" name="SRC" type="address"/> + </stripe> </domain> <domain name="CP_MEM_TO_MEM" width="32"> @@ -1403,6 +1353,10 @@ opcode: CP_LOAD_STATE4 (30) (4 dwords) <!-- some other kind of wait --> <bitfield name="UNK31" pos="31" type="boolean"/> </reg32> + <reg64 offset="1" name="DST" type="waddress"/> + <reg64 offset="3" name="SRC_A" type="address"/> + <reg64 offset="5" name="SRC_B" type="address"/> + <reg64 offset="7" name="SRC_C" type="address"/> <!-- followed by sequence of addresses.. the first is the destination and the rest are N src addresses which are @@ -1461,12 +1415,12 @@ opcode: CP_LOAD_STATE4 (30) (4 dwords) </domain> <domain name="CP_MEM_WRITE" width="32"> - <reg32 offset="0" name="0"> - <bitfield name="ADDR_LO" low="0" high="31"/> - </reg32> - <reg32 offset="1" name="1"> - <bitfield name="ADDR_HI" low="0" high="31"/> - </reg32> + <stripe varset="chip" variants="A2XX-A4XX"> + <reg32 offset="0" name="ADDR" type="address"/> + </stripe> + <stripe varset="chip" variants="A5XX-"> + <reg64 offset="0" name="ADDR" type="address"/> + </stripe> <!-- followed by the DWORDs to write --> </domain> @@ -1518,24 +1472,14 @@ opcode: CP_LOAD_STATE4 (30) (4 dwords) <bitfield name="POLL" low="4" high="5" type="poll_memory_type"/> <bitfield name="WRITE_MEMORY" pos="8" type="boolean"/> </reg32> - <reg32 offset="1" name="1"> - <bitfield name="POLL_ADDR_LO" low="0" high="31" type="hex"/> - </reg32> - <reg32 offset="2" name="2"> - <bitfield name="POLL_ADDR_HI" low="0" high="31" type="hex"/> - </reg32> + <reg64 offset="1" name="POLL_ADDR" type="address"/> <reg32 offset="3" name="3"> <bitfield name="REF" low="0" high="31"/> </reg32> <reg32 offset="4" name="4"> <bitfield name="MASK" low="0" high="31"/> </reg32> - <reg32 offset="5" name="5"> - <bitfield name="WRITE_ADDR_LO" low="0" high="31" type="hex"/> - </reg32> - <reg32 offset="6" name="6"> - <bitfield name="WRITE_ADDR_HI" low="0" high="31" type="hex"/> - </reg32> + <reg64 offset="5" name="WRITE_ADDR" type="waddress"/> <reg32 offset="7" name="7"> <bitfield name="WRITE_DATA" low="0" high="31"/> </reg32> @@ -1550,12 +1494,7 @@ opcode: CP_LOAD_STATE4 (30) (4 dwords) <!-- Reserved for flags, presumably? Unused in FW --> <bitfield name="RESERVED" low="0" high="31" type="hex"/> </reg32> - <reg32 offset="1" name="1"> - <bitfield name="POLL_ADDR_LO" low="0" high="31" type="hex"/> - </reg32> - <reg32 offset="2" name="2"> - <bitfield name="POLL_ADDR_HI" low="0" high="31" type="hex"/> - </reg32> + <reg64 offset="1" name="POLL_ADDR" type="address"/> <reg32 offset="3" name="3"> <bitfield name="REF" low="0" high="31"/> </reg32> @@ -1573,12 +1512,7 @@ opcode: CP_LOAD_STATE4 (30) (4 dwords) <bitfield name="POLL" low="4" high="5" type="poll_memory_type"/> <bitfield name="WRITE_MEMORY" pos="8" type="boolean"/> </reg32> - <reg32 offset="1" name="1"> - <bitfield name="POLL_ADDR_LO" low="0" high="31" type="hex"/> - </reg32> - <reg32 offset="2" name="2"> - <bitfield name="POLL_ADDR_HI" low="0" high="31" type="hex"/> - </reg32> + <reg64 offset="1" name="POLL_ADDR" type="address"/> <reg32 offset="3" name="3"> <bitfield name="REF" low="0" high="31"/> </reg32> @@ -1712,12 +1646,7 @@ opcode: CP_LOAD_STATE4 (30) (4 dwords) TODO what is gpuaddr for, seems to be all 0's.. maybe needed for context switch? --> - <reg32 offset="1" name="1"> - <bitfield name="ADDR_0_LO" low="0" high="31"/> - </reg32> - <reg32 offset="2" name="2"> - <bitfield name="ADDR_0_HI" low="0" high="31"/> - </reg32> + <reg64 offset="1" name="ADDR" type="waddress"/> <reg32 offset="3" name="3"> <!-- ??? --> </reg32> @@ -1832,9 +1761,7 @@ opcode: CP_LOAD_STATE4 (30) (4 dwords) <reg32 offset="0" name="0"> </reg32> <stripe varset="chip" variants="A4XX"> - <reg32 offset="1" name="1"> - <bitfield name="ADDR" low="0" high="31"/> - </reg32> + <reg32 offset="1" name="ADDR" type="address"/> <reg32 offset="2" name="2"> <!-- localsize is value minus one: --> <bitfield name="LOCALSIZEX" low="2" high="11" type="uint"/> @@ -1843,12 +1770,7 @@ opcode: CP_LOAD_STATE4 (30) (4 dwords) </reg32> </stripe> <stripe varset="chip" variants="A5XX-"> - <reg32 offset="1" name="1"> - <bitfield name="ADDR_LO" low="0" high="31"/> - </reg32> - <reg32 offset="2" name="2"> - <bitfield name="ADDR_HI" low="0" high="31"/> - </reg32> + <reg64 offset="1" name="ADDR" type="address"/> <reg32 offset="3" name="3"> <!-- localsize is value minus one: --> <bitfield name="LOCALSIZEX" low="2" high="11" type="uint"/> @@ -2161,12 +2083,7 @@ opcode: CP_LOAD_STATE4 (30) (4 dwords) </doc> </value> </enum> - <reg32 offset="0" name="0"> - <bitfield name="ADDR_LO" low="0" high="31"/> - </reg32> - <reg32 offset="1" name="1"> - <bitfield name="ADDR_HI" low="0" high="31"/> - </reg32> + <reg64 offset="0" name="ADDR" type="address"/> <reg32 offset="2" name="2"> <bitfield name="DWORDS" low="0" high="19" type="uint"/> <bitfield name="TYPE" low="20" high="21" type="amble_type"/> diff --git a/drivers/gpu/drm/msm/registers/display/dsi_phy_7nm.xml b/drivers/gpu/drm/msm/registers/display/dsi_phy_7nm.xml index 4e5ac0f25dea..f41516dd0567 100644 --- a/drivers/gpu/drm/msm/registers/display/dsi_phy_7nm.xml +++ b/drivers/gpu/drm/msm/registers/display/dsi_phy_7nm.xml @@ -22,7 +22,16 @@ xsi:schemaLocation="https://gitlab.freedesktop.org/freedreno/ rules-fd.xsd"> <reg32 offset="0x00018" name="GLBL_CTRL"/> <reg32 offset="0x0001c" name="RBUF_CTRL"/> <reg32 offset="0x00020" name="VREG_CTRL_0"/> - <reg32 offset="0x00024" name="CTRL_0"/> + <reg32 offset="0x00024" name="CTRL_0"> + <bitfield name="CLKSL_SHUTDOWNB" pos="7" type="boolean"/> + <bitfield name="DIGTOP_PWRDN_B" pos="6" type="boolean"/> + <bitfield name="PLL_SHUTDOWNB" pos="5" type="boolean"/> + <bitfield name="DLN3_SHUTDOWNB" pos="4" type="boolean"/> + <bitfield name="DLN2_SHUTDOWNB" pos="3" type="boolean"/> + <bitfield name="CLK_SHUTDOWNB" pos="2" type="boolean"/> + <bitfield name="DLN1_SHUTDOWNB" pos="1" type="boolean"/> + <bitfield name="DLN0_SHUTDOWNB" pos="0" type="boolean"/> + </reg32> <reg32 offset="0x00028" name="CTRL_1"/> <reg32 offset="0x0002c" name="CTRL_2"/> <reg32 offset="0x00030" name="CTRL_3"/> diff --git a/drivers/gpu/drm/msm/registers/gen_header.py b/drivers/gpu/drm/msm/registers/gen_header.py index a409404627c7..1d603dadfabd 100644 --- a/drivers/gpu/drm/msm/registers/gen_header.py +++ b/drivers/gpu/drm/msm/registers/gen_header.py @@ -11,7 +11,6 @@ import collections import argparse import time import datetime -import re class Error(Exception): def __init__(self, message): @@ -31,7 +30,7 @@ class Enum(object): def names(self): return [n for (n, value) in self.values] - def dump(self): + def dump(self, is_deprecated): use_hex = False for (name, value) in self.values: if value > 0x1000: @@ -45,7 +44,7 @@ class Enum(object): print("\t%s = %d," % (name, value)) print("};\n") - def dump_pack_struct(self): + def dump_pack_struct(self, is_deprecated): pass class Field(object): @@ -70,11 +69,11 @@ class Field(object): raise parser.error("booleans should be 1 bit fields") elif self.type == "float" and not (high - low == 31 or high - low == 15): raise parser.error("floats should be 16 or 32 bit fields") - elif not self.type in builtin_types and not self.type in parser.enums: + elif self.type not in builtin_types and self.type not in parser.enums: raise parser.error("unknown type '%s'" % self.type) def ctype(self, var_name): - if self.type == None: + if self.type is None: type = "uint32_t" val = var_name elif self.type == "boolean": @@ -124,7 +123,7 @@ def field_name(reg, f): name = f.name.lower() else: # We hit this path when a reg is defined with no bitset fields, ie. - # <reg32 offset="0x88db" name="RB_BLIT_DST_ARRAY_PITCH" low="0" high="28" shr="6" type="uint"/> + # <reg32 offset="0x88db" name="RB_RESOLVE_SYSTEM_BUFFER_ARRAY_PITCH" low="0" high="28" shr="6" type="uint"/> name = reg.name.lower() if (name in [ "double", "float", "int" ]) or not (name[0].isalpha()): @@ -146,10 +145,23 @@ def indices_strides(indices): "%s(i%d)" % (offset, idx) for (idx, (ctype, stride, offset)) in enumerate(indices)]) +def is_number(str): + try: + int(str) + return True + except ValueError: + return False + +def sanitize_variant(variant): + if variant and "-" in variant: + return variant[:variant.index("-")] + return variant + class Bitset(object): def __init__(self, name, template): self.name = name self.inline = False + self.reg = None if template: self.fields = template.fields[:] else: @@ -175,11 +187,7 @@ class Bitset(object): print("#endif\n") print(" return (struct fd_reg_pair) {") - if reg.array: - print(" .reg = REG_%s(__i)," % reg.full_name) - else: - print(" .reg = REG_%s," % reg.full_name) - + print(" .reg = (uint32_t)%s," % reg.reg_offset()) print(" .value =") for f in self.fields: if f.type in [ "address", "waddress" ]: @@ -204,7 +212,7 @@ class Bitset(object): print(" };") - def dump_pack_struct(self, reg=None): + def dump_pack_struct(self, is_deprecated, reg=None): if not reg: return @@ -229,12 +237,15 @@ class Bitset(object): tab_to(" uint32_t", "dword;") print("};\n") + depcrstr = "" + if is_deprecated: + depcrstr = " FD_DEPRECATED" if reg.array: - print("static inline struct fd_reg_pair\npack_%s(uint32_t __i, struct %s fields)\n{" % - (prefix, prefix)) + print("static inline%s struct fd_reg_pair\npack_%s(uint32_t __i, struct %s fields)\n{" % + (depcrstr, prefix, prefix)) else: - print("static inline struct fd_reg_pair\npack_%s(struct %s fields)\n{" % - (prefix, prefix)) + print("static inline%s struct fd_reg_pair\npack_%s(struct %s fields)\n{" % + (depcrstr, prefix, prefix)) self.dump_regpair_builder(reg) @@ -253,18 +264,23 @@ class Bitset(object): (prefix, prefix, prefix, skip)) - def dump(self, prefix=None): - if prefix == None: + def dump(self, is_deprecated, prefix=None): + if prefix is None: prefix = self.name + if self.reg and self.reg.bit_size == 64: + print("static inline uint32_t %s_LO(uint32_t val)\n{" % prefix) + print("\treturn val;\n}") + print("static inline uint32_t %s_HI(uint32_t val)\n{" % prefix) + print("\treturn val;\n}") for f in self.fields: if f.name: name = prefix + "_" + f.name else: name = prefix - if not f.name and f.low == 0 and f.shr == 0 and not f.type in ["float", "fixed", "ufixed"]: + if not f.name and f.low == 0 and f.shr == 0 and f.type not in ["float", "fixed", "ufixed"]: pass - elif f.type == "boolean" or (f.type == None and f.low == f.high): + elif f.type == "boolean" or (f.type is None and f.low == f.high): tab_to("#define %s" % name, "0x%08x" % (1 << f.low)) else: tab_to("#define %s__MASK" % name, "0x%08x" % mask(f.low, f.high)) @@ -286,6 +302,7 @@ class Array(object): self.domain = domain self.variant = variant self.parent = parent + self.children = [] if self.parent: self.name = self.parent.name + "_" + self.local_name else: @@ -337,12 +354,15 @@ class Array(object): offset += self.parent.total_offset() return offset - def dump(self): + def dump(self, is_deprecated): + depcrstr = "" + if is_deprecated: + depcrstr = " FD_DEPRECATED" proto = indices_varlist(self.indices()) strides = indices_strides(self.indices()) array_offset = self.total_offset() if self.fixed_offsets: - print("static inline uint32_t __offset_%s(%s idx)" % (self.local_name, self.index_ctype())) + print("static inline%s uint32_t __offset_%s(%s idx)" % (depcrstr, self.local_name, self.index_ctype())) print("{\n\tswitch (idx) {") if self.index_type: for val, offset in zip(self.index_type.names(), self.offsets): @@ -357,7 +377,7 @@ class Array(object): else: tab_to("#define REG_%s_%s(%s)" % (self.domain, self.name, proto), "(0x%08x + %s )\n" % (array_offset, strides)) - def dump_pack_struct(self): + def dump_pack_struct(self, is_deprecated): pass def dump_regpair_builder(self): @@ -373,6 +393,7 @@ class Reg(object): self.bit_size = bit_size if array: self.name = array.name + "_" + self.name + array.children.append(self) self.full_name = self.domain + "_" + self.name if "stride" in attrs: self.stride = int(attrs["stride"], 0) @@ -397,25 +418,34 @@ class Reg(object): else: return self.offset - def dump(self): + def reg_offset(self): + if self.array: + offset = self.array.offset + self.offset + return "(0x%08x + 0x%x*__i)" % (offset, self.array.stride) + return "0x%08x" % self.offset + + def dump(self, is_deprecated): + depcrstr = "" + if is_deprecated: + depcrstr = " FD_DEPRECATED " proto = indices_prototype(self.indices()) strides = indices_strides(self.indices()) offset = self.total_offset() if proto == '': tab_to("#define REG_%s" % self.full_name, "0x%08x" % offset) else: - print("static inline uint32_t REG_%s(%s) { return 0x%08x + %s; }" % (self.full_name, proto, offset, strides)) + print("static inline%s uint32_t REG_%s(%s) { return 0x%08x + %s; }" % (depcrstr, self.full_name, proto, offset, strides)) if self.bitset.inline: - self.bitset.dump(self.full_name) + self.bitset.dump(is_deprecated, self.full_name) + print("") - def dump_pack_struct(self): + def dump_pack_struct(self, is_deprecated): if self.bitset.inline: - self.bitset.dump_pack_struct(self) + self.bitset.dump_pack_struct(is_deprecated, self) def dump_regpair_builder(self): - if self.bitset.inline: - self.bitset.dump_regpair_builder(self) + self.bitset.dump_regpair_builder(self) def dump_py(self): print("\tREG_%s = 0x%08x" % (self.full_name, self.offset)) @@ -444,9 +474,6 @@ class Parser(object): self.variants = set() self.file = [] self.xml_files = [] - self.copyright_year = None - self.authors = [] - self.license = None def error(self, message): parser, filename = self.stack[-1] @@ -454,7 +481,7 @@ class Parser(object): def prefix(self, variant=None): if self.current_prefix_type == "variant" and variant: - return variant + return sanitize_variant(variant) elif self.current_stripe: return self.current_stripe + "_" + self.current_domain elif self.current_prefix: @@ -500,15 +527,22 @@ class Parser(object): return varset def parse_variants(self, attrs): - if not "variants" in attrs: + if "variants" not in attrs: return None - variant = attrs["variants"].split(",")[0] - if "-" in variant: - variant = variant[:variant.index("-")] + variant = attrs["variants"].split(",")[0] varset = self.parse_varset(attrs) - assert varset.has_name(variant) + if "-" in variant: + # if we have a range, validate that both the start and end + # of the range are valid enums: + start = variant[:variant.index("-")] + end = variant[variant.index("-") + 1:] + assert varset.has_name(start) + if end != "": + assert varset.has_name(end) + else: + assert varset.has_name(variant) return variant @@ -572,9 +606,6 @@ class Parser(object): error_str = str(xmlschema.error_log.filter_from_errors()[0]) raise self.error("Schema validation failed for: " + filename + "\n" + error_str) except ImportError as e: - if self.validate: - raise e - print("lxml not found, skipping validation", file=sys.stderr) def do_parse(self, filename): @@ -620,6 +651,7 @@ class Parser(object): self.current_reg = Reg(attrs, self.prefix(variant), self.current_array, bit_size) self.current_reg.bitset = self.current_bitset + self.current_bitset.reg = self.current_reg if len(self.stack) == 1: self.file.append(self.current_reg) @@ -643,7 +675,7 @@ class Parser(object): elif name == "domain": self.current_domain = attrs["name"] if "prefix" in attrs: - self.current_prefix = self.parse_variants(attrs) + self.current_prefix = sanitize_variant(self.parse_variants(attrs)) self.current_prefix_type = attrs["prefix"] else: self.current_prefix = None @@ -651,7 +683,7 @@ class Parser(object): if "varset" in attrs: self.current_varset = self.enums[attrs["varset"]] elif name == "stripe": - self.current_stripe = self.parse_variants(attrs) + self.current_stripe = sanitize_variant(self.parse_variants(attrs)) elif name == "enum": self.current_enum_value = 0 self.current_enum = Enum(attrs["name"]) @@ -686,10 +718,6 @@ class Parser(object): self.parse_field(attrs["name"], attrs) elif name == "database": self.do_validate(attrs["xsi:schemaLocation"]) - elif name == "copyright": - self.copyright_year = attrs["year"] - elif name == "author": - self.authors.append(attrs["name"] + " <" + attrs["email"] + "> " + attrs["name"]) def end_element(self, name): if name == "domain": @@ -703,11 +731,16 @@ class Parser(object): elif name == "reg32": self.current_reg = None elif name == "array": + # if the array has no Reg children, push an implicit reg32: + if len(self.current_array.children) == 0: + attrs = { + "name": "REG", + "offset": "0", + } + self.parse_reg(attrs, 32) self.current_array = self.current_array.parent elif name == "enum": self.current_enum = None - elif name == "license": - self.license = self.cdata def character_data(self, data): self.cdata += data @@ -720,10 +753,10 @@ class Parser(object): if variants: for variant, vreg in variants.items(): if reg == vreg: - d[(usage, variant)].append(reg) + d[(usage, sanitize_variant(variant))].append(reg) else: for variant in self.variants: - d[(usage, variant)].append(reg) + d[(usage, sanitize_variant(variant))].append(reg) print("#ifdef __cplusplus") @@ -753,6 +786,9 @@ class Parser(object): print("#endif") + def has_variants(self, reg): + return reg.name in self.variant_regs and not is_number(reg.name) and not is_number(reg.name[1:]) + def dump(self): enums = [] bitsets = [] @@ -766,7 +802,7 @@ class Parser(object): regs.append(e) for e in enums + bitsets + regs: - e.dump() + e.dump(self.has_variants(e)) self.dump_reg_usages() @@ -782,8 +818,7 @@ class Parser(object): def dump_reg_variants(self, regname, variants): - # Don't bother for things that only have a single variant: - if len(variants) == 1: + if is_number(regname) or is_number(regname[1:]): return print("#ifdef __cplusplus") print("struct __%s {" % regname) @@ -834,11 +869,20 @@ class Parser(object): xtravar = "__i, " print("__%s(%sstruct __%s fields) {" % (regname, xtra, regname)) for variant in variants.keys(): - print(" if (%s == %s) {" % (varenum.upper(), variant)) + if "-" in variant: + start = variant[:variant.index("-")] + end = variant[variant.index("-") + 1:] + if end != "": + print(" if ((%s >= %s) && (%s <= %s)) {" % (varenum.upper(), start, varenum.upper(), end)) + else: + print(" if (%s >= %s) {" % (varenum.upper(), start)) + else: + print(" if (%s == %s) {" % (varenum.upper(), variant)) reg = variants[variant] reg.dump_regpair_builder() print(" } else") print(" assert(!\"invalid variant\");") + print(" return (struct fd_reg_pair){};") print("}") if bit_size == 64: @@ -851,7 +895,7 @@ class Parser(object): def dump_structs(self): for e in self.file: - e.dump_pack_struct() + e.dump_pack_struct(self.has_variants(e)) for regname in self.variant_regs: self.dump_reg_variants(regname, self.variant_regs[regname]) @@ -868,33 +912,7 @@ def dump_c(args, guard, func): print("#ifndef %s\n#define %s\n" % (guard, guard)) - print("""/* Autogenerated file, DO NOT EDIT manually! - -This file was generated by the rules-ng-ng gen_header.py tool in this git repository: -http://gitlab.freedesktop.org/mesa/mesa/ -git clone https://gitlab.freedesktop.org/mesa/mesa.git - -The rules-ng-ng source files this header was generated from are: -""") - maxlen = 0 - for filepath in p.xml_files: - new_filepath = re.sub("^.+drivers","drivers",filepath) - maxlen = max(maxlen, len(new_filepath)) - for filepath in p.xml_files: - pad = " " * (maxlen - len(new_filepath)) - filesize = str(os.path.getsize(filepath)) - filesize = " " * (7 - len(filesize)) + filesize - filetime = time.ctime(os.path.getmtime(filepath)) - print("- " + new_filepath + pad + " (" + filesize + " bytes, from <stripped>)") - if p.copyright_year: - current_year = str(datetime.date.today().year) - print() - print("Copyright (C) %s-%s by the following authors:" % (p.copyright_year, current_year)) - for author in p.authors: - print("- " + author) - if p.license: - print(p.license) - print("*/") + print("/* Autogenerated file, DO NOT EDIT manually! */") print() print("#ifdef __KERNEL__") @@ -912,9 +930,20 @@ The rules-ng-ng source files this header was generated from are: print("#endif") print() + print("#ifndef FD_NO_DEPRECATED_PACK") + print("#define FD_DEPRECATED __attribute__((deprecated))") + print("#else") + print("#define FD_DEPRECATED") + print("#endif") + print() + func(p) - print("\n#endif /* %s */" % guard) + print() + print("#undef FD_DEPRECATED") + print() + + print("#endif /* %s */" % guard) def dump_c_defines(args): @@ -931,7 +960,7 @@ def dump_py_defines(args): p = Parser() try: - p.parse(args.rnn, args.xml) + p.parse(args.rnn, args.xml, args.validate) except Error as e: print(e, file=sys.stderr) exit(1) diff --git a/drivers/gpu/drm/nova/driver.rs b/drivers/gpu/drm/nova/driver.rs index b28b2e05cc15..91b7380f83ab 100644 --- a/drivers/gpu/drm/nova/driver.rs +++ b/drivers/gpu/drm/nova/driver.rs @@ -1,6 +1,8 @@ // SPDX-License-Identifier: GPL-2.0 -use kernel::{auxiliary, c_str, device::Core, drm, drm::gem, drm::ioctl, prelude::*, types::ARef}; +use kernel::{ + auxiliary, c_str, device::Core, drm, drm::gem, drm::ioctl, prelude::*, sync::aref::ARef, +}; use crate::file::File; use crate::gem::NovaObject; diff --git a/drivers/gpu/drm/nova/gem.rs b/drivers/gpu/drm/nova/gem.rs index 33b62d21400c..2760ba4f3450 100644 --- a/drivers/gpu/drm/nova/gem.rs +++ b/drivers/gpu/drm/nova/gem.rs @@ -4,7 +4,7 @@ use kernel::{ drm, drm::{gem, gem::BaseObject}, prelude::*, - types::ARef, + sync::aref::ARef, }; use crate::{ @@ -16,16 +16,14 @@ use crate::{ #[pin_data] pub(crate) struct NovaObject {} -impl gem::BaseDriverObject<gem::Object<NovaObject>> for NovaObject { +impl gem::DriverObject for NovaObject { + type Driver = NovaDriver; + fn new(_dev: &NovaDevice, _size: usize) -> impl PinInit<Self, Error> { try_pin_init!(NovaObject {}) } } -impl gem::DriverObject for NovaObject { - type Driver = NovaDriver; -} - impl NovaObject { /// Create a new DRM GEM object. pub(crate) fn new(dev: &NovaDevice, size: usize) -> Result<ARef<gem::Object<Self>>> { diff --git a/drivers/gpu/drm/tyr/Kconfig b/drivers/gpu/drm/tyr/Kconfig new file mode 100644 index 000000000000..4b55308fd2eb --- /dev/null +++ b/drivers/gpu/drm/tyr/Kconfig @@ -0,0 +1,19 @@ +# SPDX-License-Identifier: GPL-2.0 or MIT + +config DRM_TYR + tristate "Tyr (Rust DRM support for ARM Mali CSF-based GPUs)" + depends on DRM=y + depends on RUST + depends on ARM || ARM64 || COMPILE_TEST + depends on !GENERIC_ATOMIC64 # for IOMMU_IO_PGTABLE_LPAE + default n + help + Rust DRM driver for ARM Mali CSF-based GPUs. + + This driver is for Mali (or Immortalis) Valhall Gxxx GPUs. + + Note that the Mali-G68 and Mali-G78, while Valhall architecture, will + be supported with the panfrost driver as they are not CSF GPUs. + + if M is selected, the module will be called tyr. This driver is work + in progress and may not be functional. diff --git a/drivers/gpu/drm/tyr/Makefile b/drivers/gpu/drm/tyr/Makefile new file mode 100644 index 000000000000..ba545f65f2c0 --- /dev/null +++ b/drivers/gpu/drm/tyr/Makefile @@ -0,0 +1,3 @@ +# SPDX-License-Identifier: GPL-2.0 or MIT + +obj-$(CONFIG_DRM_TYR) += tyr.o diff --git a/drivers/gpu/drm/tyr/driver.rs b/drivers/gpu/drm/tyr/driver.rs new file mode 100644 index 000000000000..d5625dd1e41c --- /dev/null +++ b/drivers/gpu/drm/tyr/driver.rs @@ -0,0 +1,205 @@ +// SPDX-License-Identifier: GPL-2.0 or MIT + +use kernel::c_str; +use kernel::clk::Clk; +use kernel::clk::OptionalClk; +use kernel::device::Bound; +use kernel::device::Core; +use kernel::device::Device; +use kernel::devres::Devres; +use kernel::drm; +use kernel::drm::ioctl; +use kernel::new_mutex; +use kernel::of; +use kernel::platform; +use kernel::prelude::*; +use kernel::regulator; +use kernel::regulator::Regulator; +use kernel::sizes::SZ_2M; +use kernel::sync::Arc; +use kernel::sync::Mutex; +use kernel::time; +use kernel::types::ARef; + +use crate::file::File; +use crate::gem::TyrObject; +use crate::gpu; +use crate::gpu::GpuInfo; +use crate::regs; + +pub(crate) type IoMem = kernel::io::mem::IoMem<SZ_2M>; + +/// Convenience type alias for the DRM device type for this driver. +pub(crate) type TyrDevice = drm::Device<TyrDriver>; + +#[pin_data(PinnedDrop)] +pub(crate) struct TyrDriver { + device: ARef<TyrDevice>, +} + +#[pin_data(PinnedDrop)] +pub(crate) struct TyrData { + pub(crate) pdev: ARef<platform::Device>, + + #[pin] + clks: Mutex<Clocks>, + + #[pin] + regulators: Mutex<Regulators>, + + /// Some information on the GPU. + /// + /// This is mainly queried by userspace, i.e.: Mesa. + pub(crate) gpu_info: GpuInfo, +} + +// Both `Clk` and `Regulator` do not implement `Send` or `Sync`, but they +// should. There are patches on the mailing list to address this, but they have +// not landed yet. +// +// For now, add this workaround so that this patch compiles with the promise +// that it will be removed in a future patch. +// +// SAFETY: This will be removed in a future patch. +unsafe impl Send for TyrData {} +// SAFETY: This will be removed in a future patch. +unsafe impl Sync for TyrData {} + +fn issue_soft_reset(dev: &Device<Bound>, iomem: &Devres<IoMem>) -> Result { + regs::GPU_CMD.write(dev, iomem, regs::GPU_CMD_SOFT_RESET)?; + + // TODO: We cannot poll, as there is no support in Rust currently, so we + // sleep. Change this when read_poll_timeout() is implemented in Rust. + kernel::time::delay::fsleep(time::Delta::from_millis(100)); + + if regs::GPU_IRQ_RAWSTAT.read(dev, iomem)? & regs::GPU_IRQ_RAWSTAT_RESET_COMPLETED == 0 { + dev_err!(dev, "GPU reset failed with errno\n"); + dev_err!( + dev, + "GPU_INT_RAWSTAT is {}\n", + regs::GPU_IRQ_RAWSTAT.read(dev, iomem)? + ); + + return Err(EIO); + } + + Ok(()) +} + +kernel::of_device_table!( + OF_TABLE, + MODULE_OF_TABLE, + <TyrDriver as platform::Driver>::IdInfo, + [ + (of::DeviceId::new(c_str!("rockchip,rk3588-mali")), ()), + (of::DeviceId::new(c_str!("arm,mali-valhall-csf")), ()) + ] +); + +impl platform::Driver for TyrDriver { + type IdInfo = (); + const OF_ID_TABLE: Option<of::IdTable<Self::IdInfo>> = Some(&OF_TABLE); + + fn probe( + pdev: &platform::Device<Core>, + _info: Option<&Self::IdInfo>, + ) -> Result<Pin<KBox<Self>>> { + let core_clk = Clk::get(pdev.as_ref(), Some(c_str!("core")))?; + let stacks_clk = OptionalClk::get(pdev.as_ref(), Some(c_str!("stacks")))?; + let coregroup_clk = OptionalClk::get(pdev.as_ref(), Some(c_str!("coregroup")))?; + + core_clk.prepare_enable()?; + stacks_clk.prepare_enable()?; + coregroup_clk.prepare_enable()?; + + let mali_regulator = Regulator::<regulator::Enabled>::get(pdev.as_ref(), c_str!("mali"))?; + let sram_regulator = Regulator::<regulator::Enabled>::get(pdev.as_ref(), c_str!("sram"))?; + + let request = pdev.io_request_by_index(0).ok_or(ENODEV)?; + let iomem = Arc::pin_init(request.iomap_sized::<SZ_2M>(), GFP_KERNEL)?; + + issue_soft_reset(pdev.as_ref(), &iomem)?; + gpu::l2_power_on(pdev.as_ref(), &iomem)?; + + let gpu_info = GpuInfo::new(pdev.as_ref(), &iomem)?; + gpu_info.log(pdev); + + let platform: ARef<platform::Device> = pdev.into(); + + let data = try_pin_init!(TyrData { + pdev: platform.clone(), + clks <- new_mutex!(Clocks { + core: core_clk, + stacks: stacks_clk, + coregroup: coregroup_clk, + }), + regulators <- new_mutex!(Regulators { + mali: mali_regulator, + sram: sram_regulator, + }), + gpu_info, + }); + + let tdev: ARef<TyrDevice> = drm::Device::new(pdev.as_ref(), data)?; + drm::driver::Registration::new_foreign_owned(&tdev, pdev.as_ref(), 0)?; + + let driver = KBox::pin_init(try_pin_init!(TyrDriver { device: tdev }), GFP_KERNEL)?; + + // We need this to be dev_info!() because dev_dbg!() does not work at + // all in Rust for now, and we need to see whether probe succeeded. + dev_info!(pdev.as_ref(), "Tyr initialized correctly.\n"); + Ok(driver) + } +} + +#[pinned_drop] +impl PinnedDrop for TyrDriver { + fn drop(self: Pin<&mut Self>) {} +} + +#[pinned_drop] +impl PinnedDrop for TyrData { + fn drop(self: Pin<&mut Self>) { + // TODO: the type-state pattern for Clks will fix this. + let clks = self.clks.lock(); + clks.core.disable_unprepare(); + clks.stacks.disable_unprepare(); + clks.coregroup.disable_unprepare(); + } +} + +// We need to retain the name "panthor" to achieve drop-in compatibility with +// the C driver in the userspace stack. +const INFO: drm::DriverInfo = drm::DriverInfo { + major: 1, + minor: 5, + patchlevel: 0, + name: c_str!("panthor"), + desc: c_str!("ARM Mali Tyr DRM driver"), +}; + +#[vtable] +impl drm::Driver for TyrDriver { + type Data = TyrData; + type File = File; + type Object = drm::gem::Object<TyrObject>; + + const INFO: drm::DriverInfo = INFO; + + kernel::declare_drm_ioctls! { + (PANTHOR_DEV_QUERY, drm_panthor_dev_query, ioctl::RENDER_ALLOW, File::dev_query), + } +} + +#[pin_data] +struct Clocks { + core: Clk, + stacks: OptionalClk, + coregroup: OptionalClk, +} + +#[pin_data] +struct Regulators { + mali: Regulator<regulator::Enabled>, + sram: Regulator<regulator::Enabled>, +} diff --git a/drivers/gpu/drm/tyr/file.rs b/drivers/gpu/drm/tyr/file.rs new file mode 100644 index 000000000000..0ef432947b73 --- /dev/null +++ b/drivers/gpu/drm/tyr/file.rs @@ -0,0 +1,56 @@ +// SPDX-License-Identifier: GPL-2.0 or MIT + +use kernel::drm; +use kernel::prelude::*; +use kernel::uaccess::UserSlice; +use kernel::uapi; + +use crate::driver::TyrDevice; +use crate::TyrDriver; + +#[pin_data] +pub(crate) struct File {} + +/// Convenience type alias for our DRM `File` type +pub(crate) type DrmFile = drm::file::File<File>; + +impl drm::file::DriverFile for File { + type Driver = TyrDriver; + + fn open(_dev: &drm::Device<Self::Driver>) -> Result<Pin<KBox<Self>>> { + KBox::try_pin_init(try_pin_init!(Self {}), GFP_KERNEL) + } +} + +impl File { + pub(crate) fn dev_query( + tdev: &TyrDevice, + devquery: &mut uapi::drm_panthor_dev_query, + _file: &DrmFile, + ) -> Result<u32> { + if devquery.pointer == 0 { + match devquery.type_ { + uapi::drm_panthor_dev_query_type_DRM_PANTHOR_DEV_QUERY_GPU_INFO => { + devquery.size = core::mem::size_of_val(&tdev.gpu_info) as u32; + Ok(0) + } + _ => Err(EINVAL), + } + } else { + match devquery.type_ { + uapi::drm_panthor_dev_query_type_DRM_PANTHOR_DEV_QUERY_GPU_INFO => { + let mut writer = UserSlice::new( + UserPtr::from_addr(devquery.pointer as usize), + devquery.size as usize, + ) + .writer(); + + writer.write(&tdev.gpu_info)?; + + Ok(0) + } + _ => Err(EINVAL), + } + } + } +} diff --git a/drivers/gpu/drm/tyr/gem.rs b/drivers/gpu/drm/tyr/gem.rs new file mode 100644 index 000000000000..1273bf89dbd5 --- /dev/null +++ b/drivers/gpu/drm/tyr/gem.rs @@ -0,0 +1,18 @@ +// SPDX-License-Identifier: GPL-2.0 or MIT + +use crate::driver::TyrDevice; +use crate::driver::TyrDriver; +use kernel::drm::gem; +use kernel::prelude::*; + +/// GEM Object inner driver data +#[pin_data] +pub(crate) struct TyrObject {} + +impl gem::DriverObject for TyrObject { + type Driver = TyrDriver; + + fn new(_dev: &TyrDevice, _size: usize) -> impl PinInit<Self, Error> { + try_pin_init!(TyrObject {}) + } +} diff --git a/drivers/gpu/drm/tyr/gpu.rs b/drivers/gpu/drm/tyr/gpu.rs new file mode 100644 index 000000000000..6c582910dd5d --- /dev/null +++ b/drivers/gpu/drm/tyr/gpu.rs @@ -0,0 +1,219 @@ +// SPDX-License-Identifier: GPL-2.0 or MIT + +use kernel::bits::genmask_u32; +use kernel::device::Bound; +use kernel::device::Device; +use kernel::devres::Devres; +use kernel::platform; +use kernel::prelude::*; +use kernel::time; +use kernel::transmute::AsBytes; + +use crate::driver::IoMem; +use crate::regs; + +/// Struct containing information that can be queried by userspace. This is read from +/// the GPU's registers. +/// +/// # Invariants +/// +/// - The layout of this struct identical to the C `struct drm_panthor_gpu_info`. +#[repr(C)] +pub(crate) struct GpuInfo { + pub(crate) gpu_id: u32, + pub(crate) gpu_rev: u32, + pub(crate) csf_id: u32, + pub(crate) l2_features: u32, + pub(crate) tiler_features: u32, + pub(crate) mem_features: u32, + pub(crate) mmu_features: u32, + pub(crate) thread_features: u32, + pub(crate) max_threads: u32, + pub(crate) thread_max_workgroup_size: u32, + pub(crate) thread_max_barrier_size: u32, + pub(crate) coherency_features: u32, + pub(crate) texture_features: [u32; 4], + pub(crate) as_present: u32, + pub(crate) pad0: u32, + pub(crate) shader_present: u64, + pub(crate) l2_present: u64, + pub(crate) tiler_present: u64, + pub(crate) core_features: u32, + pub(crate) pad: u32, +} + +impl GpuInfo { + pub(crate) fn new(dev: &Device<Bound>, iomem: &Devres<IoMem>) -> Result<Self> { + let gpu_id = regs::GPU_ID.read(dev, iomem)?; + let csf_id = regs::GPU_CSF_ID.read(dev, iomem)?; + let gpu_rev = regs::GPU_REVID.read(dev, iomem)?; + let core_features = regs::GPU_CORE_FEATURES.read(dev, iomem)?; + let l2_features = regs::GPU_L2_FEATURES.read(dev, iomem)?; + let tiler_features = regs::GPU_TILER_FEATURES.read(dev, iomem)?; + let mem_features = regs::GPU_MEM_FEATURES.read(dev, iomem)?; + let mmu_features = regs::GPU_MMU_FEATURES.read(dev, iomem)?; + let thread_features = regs::GPU_THREAD_FEATURES.read(dev, iomem)?; + let max_threads = regs::GPU_THREAD_MAX_THREADS.read(dev, iomem)?; + let thread_max_workgroup_size = regs::GPU_THREAD_MAX_WORKGROUP_SIZE.read(dev, iomem)?; + let thread_max_barrier_size = regs::GPU_THREAD_MAX_BARRIER_SIZE.read(dev, iomem)?; + let coherency_features = regs::GPU_COHERENCY_FEATURES.read(dev, iomem)?; + + let texture_features = regs::GPU_TEXTURE_FEATURES0.read(dev, iomem)?; + + let as_present = regs::GPU_AS_PRESENT.read(dev, iomem)?; + + let shader_present = u64::from(regs::GPU_SHADER_PRESENT_LO.read(dev, iomem)?); + let shader_present = + shader_present | u64::from(regs::GPU_SHADER_PRESENT_HI.read(dev, iomem)?) << 32; + + let tiler_present = u64::from(regs::GPU_TILER_PRESENT_LO.read(dev, iomem)?); + let tiler_present = + tiler_present | u64::from(regs::GPU_TILER_PRESENT_HI.read(dev, iomem)?) << 32; + + let l2_present = u64::from(regs::GPU_L2_PRESENT_LO.read(dev, iomem)?); + let l2_present = l2_present | u64::from(regs::GPU_L2_PRESENT_HI.read(dev, iomem)?) << 32; + + Ok(Self { + gpu_id, + gpu_rev, + csf_id, + l2_features, + tiler_features, + mem_features, + mmu_features, + thread_features, + max_threads, + thread_max_workgroup_size, + thread_max_barrier_size, + coherency_features, + // TODO: Add texture_features_{1,2,3}. + texture_features: [texture_features, 0, 0, 0], + as_present, + pad0: 0, + shader_present, + l2_present, + tiler_present, + core_features, + pad: 0, + }) + } + + pub(crate) fn log(&self, pdev: &platform::Device) { + let major = (self.gpu_id >> 16) & 0xff; + let minor = (self.gpu_id >> 8) & 0xff; + let status = self.gpu_id & 0xff; + + let model_name = if let Some(model) = GPU_MODELS + .iter() + .find(|&f| f.major == major && f.minor == minor) + { + model.name + } else { + "unknown" + }; + + dev_info!( + pdev.as_ref(), + "mali-{} id 0x{:x} major 0x{:x} minor 0x{:x} status 0x{:x}", + model_name, + self.gpu_id >> 16, + major, + minor, + status + ); + + dev_info!( + pdev.as_ref(), + "Features: L2:{:#x} Tiler:{:#x} Mem:{:#x} MMU:{:#x} AS:{:#x}", + self.l2_features, + self.tiler_features, + self.mem_features, + self.mmu_features, + self.as_present + ); + + dev_info!( + pdev.as_ref(), + "shader_present=0x{:016x} l2_present=0x{:016x} tiler_present=0x{:016x}", + self.shader_present, + self.l2_present, + self.tiler_present + ); + } + + /// Returns the number of virtual address bits supported by the GPU. + #[expect(dead_code)] + pub(crate) fn va_bits(&self) -> u32 { + self.mmu_features & genmask_u32(0..=7) + } + + /// Returns the number of physical address bits supported by the GPU. + #[expect(dead_code)] + pub(crate) fn pa_bits(&self) -> u32 { + (self.mmu_features >> 8) & genmask_u32(0..=7) + } +} + +// SAFETY: `GpuInfo`'s invariant guarantees that it is the same type that is +// already exposed to userspace by the C driver. This implies that it fulfills +// the requirements for `AsBytes`. +// +// This means: +// +// - No implicit padding, +// - No kernel pointers, +// - No interior mutability. +unsafe impl AsBytes for GpuInfo {} + +struct GpuModels { + name: &'static str, + major: u32, + minor: u32, +} + +const GPU_MODELS: [GpuModels; 1] = [GpuModels { + name: "g610", + major: 10, + minor: 7, +}]; + +#[allow(dead_code)] +pub(crate) struct GpuId { + pub(crate) arch_major: u32, + pub(crate) arch_minor: u32, + pub(crate) arch_rev: u32, + pub(crate) prod_major: u32, + pub(crate) ver_major: u32, + pub(crate) ver_minor: u32, + pub(crate) ver_status: u32, +} + +impl From<u32> for GpuId { + fn from(value: u32) -> Self { + GpuId { + arch_major: (value & genmask_u32(28..=31)) >> 28, + arch_minor: (value & genmask_u32(24..=27)) >> 24, + arch_rev: (value & genmask_u32(20..=23)) >> 20, + prod_major: (value & genmask_u32(16..=19)) >> 16, + ver_major: (value & genmask_u32(12..=15)) >> 12, + ver_minor: (value & genmask_u32(4..=11)) >> 4, + ver_status: value & genmask_u32(0..=3), + } + } +} + +/// Powers on the l2 block. +pub(crate) fn l2_power_on(dev: &Device<Bound>, iomem: &Devres<IoMem>) -> Result { + regs::L2_PWRON_LO.write(dev, iomem, 1)?; + + // TODO: We cannot poll, as there is no support in Rust currently, so we + // sleep. Change this when read_poll_timeout() is implemented in Rust. + kernel::time::delay::fsleep(time::Delta::from_millis(100)); + + if regs::L2_READY_LO.read(dev, iomem)? != 1 { + dev_err!(dev, "Failed to power on the GPU\n"); + return Err(EIO); + } + + Ok(()) +} diff --git a/drivers/gpu/drm/tyr/regs.rs b/drivers/gpu/drm/tyr/regs.rs new file mode 100644 index 000000000000..f46933aaa221 --- /dev/null +++ b/drivers/gpu/drm/tyr/regs.rs @@ -0,0 +1,108 @@ +// SPDX-License-Identifier: GPL-2.0 or MIT + +// We don't expect that all the registers and fields will be used, even in the +// future. +// +// Nevertheless, it is useful to have most of them defined, like the C driver +// does. +#![allow(dead_code)] + +use kernel::bits::bit_u32; +use kernel::device::Bound; +use kernel::device::Device; +use kernel::devres::Devres; +use kernel::prelude::*; + +use crate::driver::IoMem; + +/// Represents a register in the Register Set +/// +/// TODO: Replace this with the Nova `register!()` macro when it is available. +/// In particular, this will automatically give us 64bit register reads and +/// writes. +pub(crate) struct Register<const OFFSET: usize>; + +impl<const OFFSET: usize> Register<OFFSET> { + #[inline] + pub(crate) fn read(&self, dev: &Device<Bound>, iomem: &Devres<IoMem>) -> Result<u32> { + let value = (*iomem).access(dev)?.read32(OFFSET); + Ok(value) + } + + #[inline] + pub(crate) fn write(&self, dev: &Device<Bound>, iomem: &Devres<IoMem>, value: u32) -> Result { + (*iomem).access(dev)?.write32(value, OFFSET); + Ok(()) + } +} + +pub(crate) const GPU_ID: Register<0x0> = Register; +pub(crate) const GPU_L2_FEATURES: Register<0x4> = Register; +pub(crate) const GPU_CORE_FEATURES: Register<0x8> = Register; +pub(crate) const GPU_CSF_ID: Register<0x1c> = Register; +pub(crate) const GPU_REVID: Register<0x280> = Register; +pub(crate) const GPU_TILER_FEATURES: Register<0xc> = Register; +pub(crate) const GPU_MEM_FEATURES: Register<0x10> = Register; +pub(crate) const GPU_MMU_FEATURES: Register<0x14> = Register; +pub(crate) const GPU_AS_PRESENT: Register<0x18> = Register; +pub(crate) const GPU_IRQ_RAWSTAT: Register<0x20> = Register; + +pub(crate) const GPU_IRQ_RAWSTAT_FAULT: u32 = bit_u32(0); +pub(crate) const GPU_IRQ_RAWSTAT_PROTECTED_FAULT: u32 = bit_u32(1); +pub(crate) const GPU_IRQ_RAWSTAT_RESET_COMPLETED: u32 = bit_u32(8); +pub(crate) const GPU_IRQ_RAWSTAT_POWER_CHANGED_SINGLE: u32 = bit_u32(9); +pub(crate) const GPU_IRQ_RAWSTAT_POWER_CHANGED_ALL: u32 = bit_u32(10); +pub(crate) const GPU_IRQ_RAWSTAT_CLEAN_CACHES_COMPLETED: u32 = bit_u32(17); +pub(crate) const GPU_IRQ_RAWSTAT_DOORBELL_STATUS: u32 = bit_u32(18); +pub(crate) const GPU_IRQ_RAWSTAT_MCU_STATUS: u32 = bit_u32(19); + +pub(crate) const GPU_IRQ_CLEAR: Register<0x24> = Register; +pub(crate) const GPU_IRQ_MASK: Register<0x28> = Register; +pub(crate) const GPU_IRQ_STAT: Register<0x2c> = Register; +pub(crate) const GPU_CMD: Register<0x30> = Register; +pub(crate) const GPU_CMD_SOFT_RESET: u32 = 1 | (1 << 8); +pub(crate) const GPU_CMD_HARD_RESET: u32 = 1 | (2 << 8); +pub(crate) const GPU_THREAD_FEATURES: Register<0xac> = Register; +pub(crate) const GPU_THREAD_MAX_THREADS: Register<0xa0> = Register; +pub(crate) const GPU_THREAD_MAX_WORKGROUP_SIZE: Register<0xa4> = Register; +pub(crate) const GPU_THREAD_MAX_BARRIER_SIZE: Register<0xa8> = Register; +pub(crate) const GPU_TEXTURE_FEATURES0: Register<0xb0> = Register; +pub(crate) const GPU_SHADER_PRESENT_LO: Register<0x100> = Register; +pub(crate) const GPU_SHADER_PRESENT_HI: Register<0x104> = Register; +pub(crate) const GPU_TILER_PRESENT_LO: Register<0x110> = Register; +pub(crate) const GPU_TILER_PRESENT_HI: Register<0x114> = Register; +pub(crate) const GPU_L2_PRESENT_LO: Register<0x120> = Register; +pub(crate) const GPU_L2_PRESENT_HI: Register<0x124> = Register; +pub(crate) const L2_READY_LO: Register<0x160> = Register; +pub(crate) const L2_READY_HI: Register<0x164> = Register; +pub(crate) const L2_PWRON_LO: Register<0x1a0> = Register; +pub(crate) const L2_PWRON_HI: Register<0x1a4> = Register; +pub(crate) const L2_PWRTRANS_LO: Register<0x220> = Register; +pub(crate) const L2_PWRTRANS_HI: Register<0x204> = Register; +pub(crate) const L2_PWRACTIVE_LO: Register<0x260> = Register; +pub(crate) const L2_PWRACTIVE_HI: Register<0x264> = Register; + +pub(crate) const MCU_CONTROL: Register<0x700> = Register; +pub(crate) const MCU_CONTROL_ENABLE: u32 = 1; +pub(crate) const MCU_CONTROL_AUTO: u32 = 2; +pub(crate) const MCU_CONTROL_DISABLE: u32 = 0; + +pub(crate) const MCU_STATUS: Register<0x704> = Register; +pub(crate) const MCU_STATUS_DISABLED: u32 = 0; +pub(crate) const MCU_STATUS_ENABLED: u32 = 1; +pub(crate) const MCU_STATUS_HALT: u32 = 2; +pub(crate) const MCU_STATUS_FATAL: u32 = 3; + +pub(crate) const GPU_COHERENCY_FEATURES: Register<0x300> = Register; + +pub(crate) const JOB_IRQ_RAWSTAT: Register<0x1000> = Register; +pub(crate) const JOB_IRQ_CLEAR: Register<0x1004> = Register; +pub(crate) const JOB_IRQ_MASK: Register<0x1008> = Register; +pub(crate) const JOB_IRQ_STAT: Register<0x100c> = Register; + +pub(crate) const JOB_IRQ_GLOBAL_IF: u32 = bit_u32(31); + +pub(crate) const MMU_IRQ_RAWSTAT: Register<0x2000> = Register; +pub(crate) const MMU_IRQ_CLEAR: Register<0x2004> = Register; +pub(crate) const MMU_IRQ_MASK: Register<0x2008> = Register; +pub(crate) const MMU_IRQ_STAT: Register<0x200c> = Register; diff --git a/drivers/gpu/drm/tyr/tyr.rs b/drivers/gpu/drm/tyr/tyr.rs new file mode 100644 index 000000000000..861d1db43072 --- /dev/null +++ b/drivers/gpu/drm/tyr/tyr.rs @@ -0,0 +1,22 @@ +// SPDX-License-Identifier: GPL-2.0 or MIT + +//! Arm Mali Tyr DRM driver. +//! +//! The name "Tyr" is inspired by Norse mythology, reflecting Arm's tradition of +//! naming their GPUs after Nordic mythological figures and places. + +use crate::driver::TyrDriver; + +mod driver; +mod file; +mod gem; +mod gpu; +mod regs; + +kernel::module_platform_driver! { + type: TyrDriver, + name: "tyr", + authors: ["The Tyr driver authors"], + description: "Arm Mali Tyr DRM driver", + license: "Dual MIT/GPL", +} diff --git a/drivers/gpu/drm/xe/Makefile b/drivers/gpu/drm/xe/Makefile index 987e4fe10538..bb135f457a6c 100644 --- a/drivers/gpu/drm/xe/Makefile +++ b/drivers/gpu/drm/xe/Makefile @@ -210,6 +210,7 @@ xe-$(CONFIG_DRM_XE_DISPLAY) += \ display/xe_dsb_buffer.o \ display/xe_fb_pin.o \ display/xe_hdcp_gsc.o \ + display/xe_panic.o \ display/xe_plane_initial.o \ display/xe_tdf.o diff --git a/drivers/gpu/drm/xe/abi/guc_actions_abi.h b/drivers/gpu/drm/xe/abi/guc_actions_abi.h index d8cf68a0516d..1baa969aaa7c 100644 --- a/drivers/gpu/drm/xe/abi/guc_actions_abi.h +++ b/drivers/gpu/drm/xe/abi/guc_actions_abi.h @@ -117,6 +117,7 @@ enum xe_guc_action { XE_GUC_ACTION_ENTER_S_STATE = 0x501, XE_GUC_ACTION_EXIT_S_STATE = 0x502, XE_GUC_ACTION_GLOBAL_SCHED_POLICY_CHANGE = 0x506, + XE_GUC_ACTION_UPDATE_SCHEDULING_POLICIES_KLV = 0x509, XE_GUC_ACTION_SCHED_CONTEXT = 0x1000, XE_GUC_ACTION_SCHED_CONTEXT_MODE_SET = 0x1001, XE_GUC_ACTION_SCHED_CONTEXT_MODE_DONE = 0x1002, diff --git a/drivers/gpu/drm/xe/abi/guc_klvs_abi.h b/drivers/gpu/drm/xe/abi/guc_klvs_abi.h index 0e78351c6ef5..265a135e7061 100644 --- a/drivers/gpu/drm/xe/abi/guc_klvs_abi.h +++ b/drivers/gpu/drm/xe/abi/guc_klvs_abi.h @@ -17,6 +17,7 @@ * | 0 | 31:16 | **KEY** - KLV key identifier | * | | | - `GuC Self Config KLVs`_ | * | | | - `GuC Opt In Feature KLVs`_ | + * | | | - `GuC Scheduling Policies KLVs`_ | * | | | - `GuC VGT Policy KLVs`_ | * | | | - `GuC VF Configuration KLVs`_ | * | | | | @@ -153,6 +154,30 @@ enum { #define GUC_KLV_OPT_IN_FEATURE_DYNAMIC_INHIBIT_CONTEXT_SWITCH_LEN 0u /** + * DOC: GuC Scheduling Policies KLVs + * + * `GuC KLV`_ keys available for use with UPDATE_SCHEDULING_POLICIES_KLV. + * + * _`GUC_KLV_SCHEDULING_POLICIES_RENDER_COMPUTE_YIELD` : 0x1001 + * Some platforms do not allow concurrent execution of RCS and CCS + * workloads from different address spaces. By default, the GuC prioritizes + * RCS submissions over CCS ones, which can lead to CCS workloads being + * significantly (or completely) starved of execution time. This KLV allows + * the driver to specify a quantum (in ms) and a ratio (percentage value + * between 0 and 100), and the GuC will prioritize the CCS for that + * percentage of each quantum. For example, specifying 100ms and 30% will + * make the GuC prioritize the CCS for 30ms of every 100ms. + * Note that this does not necessarly mean that RCS and CCS engines will + * only be active for their percentage of the quantum, as the restriction + * only kicks in if both classes are fully busy with non-compatible address + * spaces; i.e., if one engine is idle or running the same address space, + * a pending job on the other engine will still be submitted to the HW no + * matter what the ratio is + */ +#define GUC_KLV_SCHEDULING_POLICIES_RENDER_COMPUTE_YIELD_KEY 0x1001 +#define GUC_KLV_SCHEDULING_POLICIES_RENDER_COMPUTE_YIELD_LEN 2u + +/** * DOC: GuC VGT Policy KLVs * * `GuC KLV`_ keys available for use with PF2GUC_UPDATE_VGT_POLICY. diff --git a/drivers/gpu/drm/xe/display/intel_bo.c b/drivers/gpu/drm/xe/display/intel_bo.c index 910632f57c3d..27437c22bd70 100644 --- a/drivers/gpu/drm/xe/display/intel_bo.c +++ b/drivers/gpu/drm/xe/display/intel_bo.c @@ -1,12 +1,7 @@ // SPDX-License-Identifier: MIT /* Copyright © 2024 Intel Corporation */ -#include <drm/drm_cache.h> #include <drm/drm_gem.h> -#include <drm/drm_panic.h> - -#include "intel_fb.h" -#include "intel_display_types.h" #include "xe_bo.h" #include "intel_bo.h" @@ -64,89 +59,3 @@ void intel_bo_describe(struct seq_file *m, struct drm_gem_object *obj) { /* FIXME */ } - -struct xe_panic_data { - struct page **pages; - int page; - void *vaddr; -}; - -struct xe_framebuffer { - struct intel_framebuffer base; - struct xe_panic_data panic; -}; - -static inline struct xe_panic_data *to_xe_panic_data(struct intel_framebuffer *fb) -{ - return &container_of_const(fb, struct xe_framebuffer, base)->panic; -} - -static void xe_panic_kunmap(struct xe_panic_data *panic) -{ - if (panic->vaddr) { - drm_clflush_virt_range(panic->vaddr, PAGE_SIZE); - kunmap_local(panic->vaddr); - panic->vaddr = NULL; - } -} - -/* - * The scanout buffer pages are not mapped, so for each pixel, - * use kmap_local_page_try_from_panic() to map the page, and write the pixel. - * Try to keep the map from the previous pixel, to avoid too much map/unmap. - */ -static void xe_panic_page_set_pixel(struct drm_scanout_buffer *sb, unsigned int x, - unsigned int y, u32 color) -{ - struct intel_framebuffer *fb = (struct intel_framebuffer *)sb->private; - struct xe_panic_data *panic = to_xe_panic_data(fb); - struct xe_bo *bo = gem_to_xe_bo(intel_fb_bo(&fb->base)); - unsigned int new_page; - unsigned int offset; - - if (fb->panic_tiling) - offset = fb->panic_tiling(sb->width, x, y); - else - offset = y * sb->pitch[0] + x * sb->format->cpp[0]; - - new_page = offset >> PAGE_SHIFT; - offset = offset % PAGE_SIZE; - if (new_page != panic->page) { - xe_panic_kunmap(panic); - panic->page = new_page; - panic->vaddr = ttm_bo_kmap_try_from_panic(&bo->ttm, - panic->page); - } - if (panic->vaddr) { - u32 *pix = panic->vaddr + offset; - *pix = color; - } -} - -struct intel_framebuffer *intel_bo_alloc_framebuffer(void) -{ - struct xe_framebuffer *xe_fb; - - xe_fb = kzalloc(sizeof(*xe_fb), GFP_KERNEL); - if (xe_fb) - return &xe_fb->base; - return NULL; -} - -int intel_bo_panic_setup(struct drm_scanout_buffer *sb) -{ - struct intel_framebuffer *fb = (struct intel_framebuffer *)sb->private; - struct xe_panic_data *panic = to_xe_panic_data(fb); - - panic->page = -1; - sb->set_pixel = xe_panic_page_set_pixel; - return 0; -} - -void intel_bo_panic_finish(struct intel_framebuffer *fb) -{ - struct xe_panic_data *panic = to_xe_panic_data(fb); - - xe_panic_kunmap(panic); - panic->page = -1; -} diff --git a/drivers/gpu/drm/xe/display/xe_display.c b/drivers/gpu/drm/xe/display/xe_display.c index 8b68d70db6c8..19e691fccf8c 100644 --- a/drivers/gpu/drm/xe/display/xe_display.c +++ b/drivers/gpu/drm/xe/display/xe_display.c @@ -20,7 +20,7 @@ #include "intel_audio.h" #include "intel_bw.h" #include "intel_display.h" -#include "intel_display_core.h" +#include "intel_display_device.h" #include "intel_display_driver.h" #include "intel_display_irq.h" #include "intel_display_types.h" @@ -37,13 +37,6 @@ /* Xe device functions */ -static bool has_display(struct xe_device *xe) -{ - struct intel_display *display = xe->display; - - return HAS_DISPLAY(display); -} - /** * xe_display_driver_probe_defer - Detect if we need to wait for other drivers * early on @@ -290,7 +283,7 @@ static void xe_display_enable_d3cold(struct xe_device *xe) intel_dmc_suspend(display); - if (has_display(xe)) + if (intel_display_device_present(display)) intel_hpd_poll_enable(display); } @@ -303,14 +296,14 @@ static void xe_display_disable_d3cold(struct xe_device *xe) intel_dmc_resume(display); - if (has_display(xe)) + if (intel_display_device_present(display)) drm_mode_config_reset(&xe->drm); intel_display_driver_init_hw(display); intel_hpd_init(display); - if (has_display(xe)) + if (intel_display_device_present(display)) intel_hpd_poll_disable(display); intel_opregion_resume(display); @@ -333,7 +326,7 @@ void xe_display_pm_suspend(struct xe_device *xe) intel_power_domains_disable(display); drm_client_dev_suspend(&xe->drm, false); - if (has_display(xe)) { + if (intel_display_device_present(display)) { drm_kms_helper_poll_disable(&xe->drm); intel_display_driver_disable_user_access(display); intel_display_driver_suspend(display); @@ -345,7 +338,7 @@ void xe_display_pm_suspend(struct xe_device *xe) intel_hpd_cancel_work(display); - if (has_display(xe)) { + if (intel_display_device_present(display)) { intel_display_driver_suspend_access(display); intel_encoder_suspend_all(display); } @@ -365,7 +358,7 @@ void xe_display_pm_shutdown(struct xe_device *xe) intel_power_domains_disable(display); drm_client_dev_suspend(&xe->drm, false); - if (has_display(xe)) { + if (intel_display_device_present(display)) { drm_kms_helper_poll_disable(&xe->drm); intel_display_driver_disable_user_access(display); intel_display_driver_suspend(display); @@ -376,7 +369,7 @@ void xe_display_pm_shutdown(struct xe_device *xe) intel_encoder_block_all_hpds(display); intel_hpd_cancel_work(display); - if (has_display(xe)) + if (intel_display_device_present(display)) intel_display_driver_suspend_access(display); intel_encoder_suspend_all(display); @@ -465,25 +458,25 @@ void xe_display_pm_resume(struct xe_device *xe) intel_dmc_resume(display); - if (has_display(xe)) + if (intel_display_device_present(display)) drm_mode_config_reset(&xe->drm); intel_display_driver_init_hw(display); - if (has_display(xe)) + if (intel_display_device_present(display)) intel_display_driver_resume_access(display); intel_hpd_init(display); intel_encoder_unblock_all_hpds(display); - if (has_display(xe)) { + if (intel_display_device_present(display)) { intel_display_driver_resume(display); drm_kms_helper_poll_enable(&xe->drm); intel_display_driver_enable_user_access(display); } - if (has_display(xe)) + if (intel_display_device_present(display)) intel_hpd_poll_disable(display); intel_opregion_resume(display); @@ -548,7 +541,7 @@ int xe_display_probe(struct xe_device *xe) xe->display = display; - if (has_display(xe)) + if (intel_display_device_present(display)) return 0; no_display: diff --git a/drivers/gpu/drm/xe/display/xe_panic.c b/drivers/gpu/drm/xe/display/xe_panic.c new file mode 100644 index 000000000000..f32b23338331 --- /dev/null +++ b/drivers/gpu/drm/xe/display/xe_panic.c @@ -0,0 +1,80 @@ +// SPDX-License-Identifier: MIT +/* Copyright © 2025 Intel Corporation */ + +#include <drm/drm_cache.h> +#include <drm/drm_panic.h> + +#include "intel_display_types.h" +#include "intel_fb.h" +#include "intel_panic.h" +#include "xe_bo.h" + +struct intel_panic { + struct page **pages; + int page; + void *vaddr; +}; + +static void xe_panic_kunmap(struct intel_panic *panic) +{ + if (panic->vaddr) { + drm_clflush_virt_range(panic->vaddr, PAGE_SIZE); + kunmap_local(panic->vaddr); + panic->vaddr = NULL; + } +} + +/* + * The scanout buffer pages are not mapped, so for each pixel, + * use kmap_local_page_try_from_panic() to map the page, and write the pixel. + * Try to keep the map from the previous pixel, to avoid too much map/unmap. + */ +static void xe_panic_page_set_pixel(struct drm_scanout_buffer *sb, unsigned int x, + unsigned int y, u32 color) +{ + struct intel_framebuffer *fb = (struct intel_framebuffer *)sb->private; + struct intel_panic *panic = fb->panic; + struct xe_bo *bo = gem_to_xe_bo(intel_fb_bo(&fb->base)); + unsigned int new_page; + unsigned int offset; + + if (fb->panic_tiling) + offset = fb->panic_tiling(sb->width, x, y); + else + offset = y * sb->pitch[0] + x * sb->format->cpp[0]; + + new_page = offset >> PAGE_SHIFT; + offset = offset % PAGE_SIZE; + if (new_page != panic->page) { + xe_panic_kunmap(panic); + panic->page = new_page; + panic->vaddr = ttm_bo_kmap_try_from_panic(&bo->ttm, + panic->page); + } + if (panic->vaddr) { + u32 *pix = panic->vaddr + offset; + *pix = color; + } +} + +struct intel_panic *intel_panic_alloc(void) +{ + struct intel_panic *panic; + + panic = kzalloc(sizeof(*panic), GFP_KERNEL); + + return panic; +} + +int intel_panic_setup(struct intel_panic *panic, struct drm_scanout_buffer *sb) +{ + panic->page = -1; + sb->set_pixel = xe_panic_page_set_pixel; + return 0; +} + +void intel_panic_finish(struct intel_panic *panic) +{ + xe_panic_kunmap(panic); + panic->page = -1; +} diff --git a/drivers/gpu/drm/xe/xe_device_sysfs.c b/drivers/gpu/drm/xe/xe_device_sysfs.c index 6ee422594b56..b7f8fcfed8d8 100644 --- a/drivers/gpu/drm/xe/xe_device_sysfs.c +++ b/drivers/gpu/drm/xe/xe_device_sysfs.c @@ -311,12 +311,16 @@ int xe_device_sysfs_init(struct xe_device *xe) if (xe->info.platform == XE_BATTLEMAGE) { ret = sysfs_create_files(&dev->kobj, auto_link_downgrade_attrs); if (ret) - return ret; + goto cleanup; ret = late_bind_create_files(dev); if (ret) - return ret; + goto cleanup; } return devm_add_action_or_reset(dev, xe_device_sysfs_fini, xe); + +cleanup: + xe_device_sysfs_fini(xe); + return ret; } diff --git a/drivers/gpu/drm/xe/xe_exec_queue.c b/drivers/gpu/drm/xe/xe_exec_queue.c index 063c89d981e5..37b2b93b73d6 100644 --- a/drivers/gpu/drm/xe/xe_exec_queue.c +++ b/drivers/gpu/drm/xe/xe_exec_queue.c @@ -199,6 +199,16 @@ err_lrc: return err; } +static void __xe_exec_queue_fini(struct xe_exec_queue *q) +{ + int i; + + q->ops->fini(q); + + for (i = 0; i < q->width; ++i) + xe_lrc_put(q->lrc[i]); +} + struct xe_exec_queue *xe_exec_queue_create(struct xe_device *xe, struct xe_vm *vm, u32 logical_mask, u16 width, struct xe_hw_engine *hwe, u32 flags, @@ -229,11 +239,13 @@ struct xe_exec_queue *xe_exec_queue_create(struct xe_device *xe, struct xe_vm *v if (xe_exec_queue_uses_pxp(q)) { err = xe_pxp_exec_queue_add(xe->pxp, q); if (err) - goto err_post_alloc; + goto err_post_init; } return q; +err_post_init: + __xe_exec_queue_fini(q); err_post_alloc: __xe_exec_queue_free(q); return ERR_PTR(err); @@ -331,13 +343,11 @@ void xe_exec_queue_destroy(struct kref *ref) xe_exec_queue_put(eq); } - q->ops->fini(q); + q->ops->destroy(q); } void xe_exec_queue_fini(struct xe_exec_queue *q) { - int i; - /* * Before releasing our ref to lrc and xef, accumulate our run ticks * and wakeup any waiters. @@ -346,9 +356,7 @@ void xe_exec_queue_fini(struct xe_exec_queue *q) if (q->xef && atomic_dec_and_test(&q->xef->exec_queue.pending_removal)) wake_up_var(&q->xef->exec_queue.pending_removal); - for (i = 0; i < q->width; ++i) - xe_lrc_put(q->lrc[i]); - + __xe_exec_queue_fini(q); __xe_exec_queue_free(q); } diff --git a/drivers/gpu/drm/xe/xe_exec_queue_types.h b/drivers/gpu/drm/xe/xe_exec_queue_types.h index ba443a497b38..27b76cf9da89 100644 --- a/drivers/gpu/drm/xe/xe_exec_queue_types.h +++ b/drivers/gpu/drm/xe/xe_exec_queue_types.h @@ -181,8 +181,14 @@ struct xe_exec_queue_ops { int (*init)(struct xe_exec_queue *q); /** @kill: Kill inflight submissions for backend */ void (*kill)(struct xe_exec_queue *q); - /** @fini: Fini exec queue for submission backend */ + /** @fini: Undoes the init() for submission backend */ void (*fini)(struct xe_exec_queue *q); + /** + * @destroy: Destroy exec queue for submission backend. The backend + * function must call xe_exec_queue_fini() (which will in turn call the + * fini() backend function) to ensure the queue is properly cleaned up. + */ + void (*destroy)(struct xe_exec_queue *q); /** @set_priority: Set priority for exec queue */ int (*set_priority)(struct xe_exec_queue *q, enum xe_exec_queue_priority priority); diff --git a/drivers/gpu/drm/xe/xe_execlist.c b/drivers/gpu/drm/xe/xe_execlist.c index 788f56b066b6..f83d421ac9d3 100644 --- a/drivers/gpu/drm/xe/xe_execlist.c +++ b/drivers/gpu/drm/xe/xe_execlist.c @@ -385,10 +385,20 @@ err_free: return err; } -static void execlist_exec_queue_fini_async(struct work_struct *w) +static void execlist_exec_queue_fini(struct xe_exec_queue *q) +{ + struct xe_execlist_exec_queue *exl = q->execlist; + + drm_sched_entity_fini(&exl->entity); + drm_sched_fini(&exl->sched); + + kfree(exl); +} + +static void execlist_exec_queue_destroy_async(struct work_struct *w) { struct xe_execlist_exec_queue *ee = - container_of(w, struct xe_execlist_exec_queue, fini_async); + container_of(w, struct xe_execlist_exec_queue, destroy_async); struct xe_exec_queue *q = ee->q; struct xe_execlist_exec_queue *exl = q->execlist; struct xe_device *xe = gt_to_xe(q->gt); @@ -401,10 +411,6 @@ static void execlist_exec_queue_fini_async(struct work_struct *w) list_del(&exl->active_link); spin_unlock_irqrestore(&exl->port->lock, flags); - drm_sched_entity_fini(&exl->entity); - drm_sched_fini(&exl->sched); - kfree(exl); - xe_exec_queue_fini(q); } @@ -413,10 +419,10 @@ static void execlist_exec_queue_kill(struct xe_exec_queue *q) /* NIY */ } -static void execlist_exec_queue_fini(struct xe_exec_queue *q) +static void execlist_exec_queue_destroy(struct xe_exec_queue *q) { - INIT_WORK(&q->execlist->fini_async, execlist_exec_queue_fini_async); - queue_work(system_unbound_wq, &q->execlist->fini_async); + INIT_WORK(&q->execlist->destroy_async, execlist_exec_queue_destroy_async); + queue_work(system_unbound_wq, &q->execlist->destroy_async); } static int execlist_exec_queue_set_priority(struct xe_exec_queue *q, @@ -467,6 +473,7 @@ static const struct xe_exec_queue_ops execlist_exec_queue_ops = { .init = execlist_exec_queue_init, .kill = execlist_exec_queue_kill, .fini = execlist_exec_queue_fini, + .destroy = execlist_exec_queue_destroy, .set_priority = execlist_exec_queue_set_priority, .set_timeslice = execlist_exec_queue_set_timeslice, .set_preempt_timeout = execlist_exec_queue_set_preempt_timeout, diff --git a/drivers/gpu/drm/xe/xe_execlist_types.h b/drivers/gpu/drm/xe/xe_execlist_types.h index 415140936f11..92c4ba52db0c 100644 --- a/drivers/gpu/drm/xe/xe_execlist_types.h +++ b/drivers/gpu/drm/xe/xe_execlist_types.h @@ -42,7 +42,7 @@ struct xe_execlist_exec_queue { bool has_run; - struct work_struct fini_async; + struct work_struct destroy_async; enum xe_exec_queue_priority active_priority; struct list_head active_link; diff --git a/drivers/gpu/drm/xe/xe_gt.c b/drivers/gpu/drm/xe/xe_gt.c index 34505a6d93ed..3e0ad7e5b5df 100644 --- a/drivers/gpu/drm/xe/xe_gt.c +++ b/drivers/gpu/drm/xe/xe_gt.c @@ -98,7 +98,7 @@ void xe_gt_sanitize(struct xe_gt *gt) * FIXME: if xe_uc_sanitize is called here, on TGL driver will not * reload */ - gt->uc.guc.submission_state.enabled = false; + xe_guc_submit_disable(>->uc.guc); } static void xe_gt_enable_host_l2_vram(struct xe_gt *gt) diff --git a/drivers/gpu/drm/xe/xe_gt_sriov_pf_config.c b/drivers/gpu/drm/xe/xe_gt_sriov_pf_config.c index c8f0320d032f..4cae2d514306 100644 --- a/drivers/gpu/drm/xe/xe_gt_sriov_pf_config.c +++ b/drivers/gpu/drm/xe/xe_gt_sriov_pf_config.c @@ -1636,7 +1636,6 @@ static u64 pf_estimate_fair_lmem(struct xe_gt *gt, unsigned int num_vfs) u64 fair; fair = div_u64(available, num_vfs); - fair = rounddown_pow_of_two(fair); /* XXX: ttm_vram_mgr & drm_buddy limitation */ fair = ALIGN_DOWN(fair, alignment); #ifdef MAX_FAIR_LMEM fair = min_t(u64, MAX_FAIR_LMEM, fair); diff --git a/drivers/gpu/drm/xe/xe_guc.c b/drivers/gpu/drm/xe/xe_guc.c index 37d06c51180c..fb7bcb9185b7 100644 --- a/drivers/gpu/drm/xe/xe_guc.c +++ b/drivers/gpu/drm/xe/xe_guc.c @@ -888,9 +888,7 @@ int xe_guc_post_load_init(struct xe_guc *guc) return ret; } - guc->submission_state.enabled = true; - - return 0; + return xe_guc_submit_enable(guc); } int xe_guc_reset(struct xe_guc *guc) @@ -1602,7 +1600,7 @@ void xe_guc_sanitize(struct xe_guc *guc) { xe_uc_fw_sanitize(&guc->fw); xe_guc_ct_disable(&guc->ct); - guc->submission_state.enabled = false; + xe_guc_submit_disable(guc); } int xe_guc_reset_prepare(struct xe_guc *guc) diff --git a/drivers/gpu/drm/xe/xe_guc_exec_queue_types.h b/drivers/gpu/drm/xe/xe_guc_exec_queue_types.h index a3f421e2adc0..c30c0e3ccbbb 100644 --- a/drivers/gpu/drm/xe/xe_guc_exec_queue_types.h +++ b/drivers/gpu/drm/xe/xe_guc_exec_queue_types.h @@ -35,8 +35,8 @@ struct xe_guc_exec_queue { struct xe_sched_msg static_msgs[MAX_STATIC_MSG_TYPE]; /** @lr_tdr: long running TDR worker */ struct work_struct lr_tdr; - /** @fini_async: do final fini async from this worker */ - struct work_struct fini_async; + /** @destroy_async: do final destroy async from this worker */ + struct work_struct destroy_async; /** @resume_time: time of last resume */ u64 resume_time; /** @state: GuC specific state for this xe_exec_queue */ diff --git a/drivers/gpu/drm/xe/xe_guc_submit.c b/drivers/gpu/drm/xe/xe_guc_submit.c index 1185b23b1384..e377ba3a39b3 100644 --- a/drivers/gpu/drm/xe/xe_guc_submit.c +++ b/drivers/gpu/drm/xe/xe_guc_submit.c @@ -32,6 +32,7 @@ #include "xe_guc_ct.h" #include "xe_guc_exec_queue_types.h" #include "xe_guc_id_mgr.h" +#include "xe_guc_klv_helpers.h" #include "xe_guc_submit_types.h" #include "xe_hw_engine.h" #include "xe_hw_fence.h" @@ -316,6 +317,71 @@ int xe_guc_submit_init(struct xe_guc *guc, unsigned int num_ids) return drmm_add_action_or_reset(&xe->drm, guc_submit_fini, guc); } +/* + * Given that we want to guarantee enough RCS throughput to avoid missing + * frames, we set the yield policy to 20% of each 80ms interval. + */ +#define RC_YIELD_DURATION 80 /* in ms */ +#define RC_YIELD_RATIO 20 /* in percent */ +static u32 *emit_render_compute_yield_klv(u32 *emit) +{ + *emit++ = PREP_GUC_KLV_TAG(SCHEDULING_POLICIES_RENDER_COMPUTE_YIELD); + *emit++ = RC_YIELD_DURATION; + *emit++ = RC_YIELD_RATIO; + + return emit; +} + +#define SCHEDULING_POLICY_MAX_DWORDS 16 +static int guc_init_global_schedule_policy(struct xe_guc *guc) +{ + u32 data[SCHEDULING_POLICY_MAX_DWORDS]; + u32 *emit = data; + u32 count = 0; + int ret; + + if (GUC_SUBMIT_VER(guc) < MAKE_GUC_VER(1, 1, 0)) + return 0; + + *emit++ = XE_GUC_ACTION_UPDATE_SCHEDULING_POLICIES_KLV; + + if (CCS_MASK(guc_to_gt(guc))) + emit = emit_render_compute_yield_klv(emit); + + count = emit - data; + if (count > 1) { + xe_assert(guc_to_xe(guc), count <= SCHEDULING_POLICY_MAX_DWORDS); + + ret = xe_guc_ct_send_block(&guc->ct, data, count); + if (ret < 0) { + xe_gt_err(guc_to_gt(guc), + "failed to enable GuC sheduling policies: %pe\n", + ERR_PTR(ret)); + return ret; + } + } + + return 0; +} + +int xe_guc_submit_enable(struct xe_guc *guc) +{ + int ret; + + ret = guc_init_global_schedule_policy(guc); + if (ret) + return ret; + + guc->submission_state.enabled = true; + + return 0; +} + +void xe_guc_submit_disable(struct xe_guc *guc) +{ + guc->submission_state.enabled = false; +} + static void __release_guc_id(struct xe_guc *guc, struct xe_exec_queue *q, u32 xa_count) { int i; @@ -1355,48 +1421,57 @@ rearm: return DRM_GPU_SCHED_STAT_NO_HANG; } -static void __guc_exec_queue_fini_async(struct work_struct *w) +static void guc_exec_queue_fini(struct xe_exec_queue *q) +{ + struct xe_guc_exec_queue *ge = q->guc; + struct xe_guc *guc = exec_queue_to_guc(q); + + release_guc_id(guc, q); + xe_sched_entity_fini(&ge->entity); + xe_sched_fini(&ge->sched); + + /* + * RCU free due sched being exported via DRM scheduler fences + * (timeline name). + */ + kfree_rcu(ge, rcu); +} + +static void __guc_exec_queue_destroy_async(struct work_struct *w) { struct xe_guc_exec_queue *ge = - container_of(w, struct xe_guc_exec_queue, fini_async); + container_of(w, struct xe_guc_exec_queue, destroy_async); struct xe_exec_queue *q = ge->q; struct xe_guc *guc = exec_queue_to_guc(q); xe_pm_runtime_get(guc_to_xe(guc)); trace_xe_exec_queue_destroy(q); - release_guc_id(guc, q); if (xe_exec_queue_is_lr(q)) cancel_work_sync(&ge->lr_tdr); /* Confirm no work left behind accessing device structures */ cancel_delayed_work_sync(&ge->sched.base.work_tdr); - xe_sched_entity_fini(&ge->entity); - xe_sched_fini(&ge->sched); - /* - * RCU free due sched being exported via DRM scheduler fences - * (timeline name). - */ - kfree_rcu(ge, rcu); xe_exec_queue_fini(q); + xe_pm_runtime_put(guc_to_xe(guc)); } -static void guc_exec_queue_fini_async(struct xe_exec_queue *q) +static void guc_exec_queue_destroy_async(struct xe_exec_queue *q) { struct xe_guc *guc = exec_queue_to_guc(q); struct xe_device *xe = guc_to_xe(guc); - INIT_WORK(&q->guc->fini_async, __guc_exec_queue_fini_async); + INIT_WORK(&q->guc->destroy_async, __guc_exec_queue_destroy_async); /* We must block on kernel engines so slabs are empty on driver unload */ if (q->flags & EXEC_QUEUE_FLAG_PERMANENT || exec_queue_wedged(q)) - __guc_exec_queue_fini_async(&q->guc->fini_async); + __guc_exec_queue_destroy_async(&q->guc->destroy_async); else - queue_work(xe->destroy_wq, &q->guc->fini_async); + queue_work(xe->destroy_wq, &q->guc->destroy_async); } -static void __guc_exec_queue_fini(struct xe_guc *guc, struct xe_exec_queue *q) +static void __guc_exec_queue_destroy(struct xe_guc *guc, struct xe_exec_queue *q) { /* * Might be done from within the GPU scheduler, need to do async as we @@ -1405,7 +1480,7 @@ static void __guc_exec_queue_fini(struct xe_guc *guc, struct xe_exec_queue *q) * this we and don't really care when everything is fini'd, just that it * is. */ - guc_exec_queue_fini_async(q); + guc_exec_queue_destroy_async(q); } static void __guc_exec_queue_process_msg_cleanup(struct xe_sched_msg *msg) @@ -1419,7 +1494,7 @@ static void __guc_exec_queue_process_msg_cleanup(struct xe_sched_msg *msg) if (exec_queue_registered(q)) disable_scheduling_deregister(guc, q); else - __guc_exec_queue_fini(guc, q); + __guc_exec_queue_destroy(guc, q); } static bool guc_exec_queue_allowed_to_change_state(struct xe_exec_queue *q) @@ -1652,14 +1727,14 @@ static bool guc_exec_queue_try_add_msg(struct xe_exec_queue *q, #define STATIC_MSG_CLEANUP 0 #define STATIC_MSG_SUSPEND 1 #define STATIC_MSG_RESUME 2 -static void guc_exec_queue_fini(struct xe_exec_queue *q) +static void guc_exec_queue_destroy(struct xe_exec_queue *q) { struct xe_sched_msg *msg = q->guc->static_msgs + STATIC_MSG_CLEANUP; if (!(q->flags & EXEC_QUEUE_FLAG_PERMANENT) && !exec_queue_wedged(q)) guc_exec_queue_add_msg(q, msg, CLEANUP); else - __guc_exec_queue_fini(exec_queue_to_guc(q), q); + __guc_exec_queue_destroy(exec_queue_to_guc(q), q); } static int guc_exec_queue_set_priority(struct xe_exec_queue *q, @@ -1789,6 +1864,7 @@ static const struct xe_exec_queue_ops guc_exec_queue_ops = { .init = guc_exec_queue_init, .kill = guc_exec_queue_kill, .fini = guc_exec_queue_fini, + .destroy = guc_exec_queue_destroy, .set_priority = guc_exec_queue_set_priority, .set_timeslice = guc_exec_queue_set_timeslice, .set_preempt_timeout = guc_exec_queue_set_preempt_timeout, @@ -1810,7 +1886,7 @@ static void guc_exec_queue_stop(struct xe_guc *guc, struct xe_exec_queue *q) if (exec_queue_extra_ref(q) || xe_exec_queue_is_lr(q)) xe_exec_queue_put(q); else if (exec_queue_destroyed(q)) - __guc_exec_queue_fini(guc, q); + __guc_exec_queue_destroy(guc, q); } if (q->guc->suspend_pending) { set_exec_queue_suspended(q); @@ -2139,7 +2215,7 @@ static void handle_deregister_done(struct xe_guc *guc, struct xe_exec_queue *q) if (exec_queue_extra_ref(q) || xe_exec_queue_is_lr(q)) xe_exec_queue_put(q); else - __guc_exec_queue_fini(guc, q); + __guc_exec_queue_destroy(guc, q); } int xe_guc_deregister_done_handler(struct xe_guc *guc, u32 *msg, u32 len) diff --git a/drivers/gpu/drm/xe/xe_guc_submit.h b/drivers/gpu/drm/xe/xe_guc_submit.h index 6b5df5d0956b..e20ccafdfab5 100644 --- a/drivers/gpu/drm/xe/xe_guc_submit.h +++ b/drivers/gpu/drm/xe/xe_guc_submit.h @@ -13,6 +13,8 @@ struct xe_exec_queue; struct xe_guc; int xe_guc_submit_init(struct xe_guc *guc, unsigned int num_ids); +int xe_guc_submit_enable(struct xe_guc *guc); +void xe_guc_submit_disable(struct xe_guc *guc); int xe_guc_submit_reset_prepare(struct xe_guc *guc); void xe_guc_submit_reset_wait(struct xe_guc *guc); diff --git a/drivers/gpu/drm/xe/xe_hwmon.c b/drivers/gpu/drm/xe/xe_hwmon.c index 32a76ae6e9dc..5ade08f90b89 100644 --- a/drivers/gpu/drm/xe/xe_hwmon.c +++ b/drivers/gpu/drm/xe/xe_hwmon.c @@ -286,7 +286,7 @@ static struct xe_reg xe_hwmon_get_reg(struct xe_hwmon *hwmon, enum xe_hwmon_reg */ static void xe_hwmon_power_max_read(struct xe_hwmon *hwmon, u32 attr, int channel, long *value) { - u64 reg_val = 0, min, max; + u32 reg_val = 0; struct xe_device *xe = hwmon->xe; struct xe_reg rapl_limit, pkg_power_sku; struct xe_mmio *mmio = xe_root_tile_mmio(xe); @@ -294,7 +294,7 @@ static void xe_hwmon_power_max_read(struct xe_hwmon *hwmon, u32 attr, int channe mutex_lock(&hwmon->hwmon_lock); if (hwmon->xe->info.has_mbx_power_limits) { - xe_hwmon_pcode_read_power_limit(hwmon, attr, channel, (u32 *)®_val); + xe_hwmon_pcode_read_power_limit(hwmon, attr, channel, ®_val); } else { rapl_limit = xe_hwmon_get_reg(hwmon, REG_PKG_RAPL_LIMIT, channel); pkg_power_sku = xe_hwmon_get_reg(hwmon, REG_PKG_POWER_SKU, channel); @@ -304,19 +304,21 @@ static void xe_hwmon_power_max_read(struct xe_hwmon *hwmon, u32 attr, int channe /* Check if PL limits are disabled. */ if (!(reg_val & PWR_LIM_EN)) { *value = PL_DISABLE; - drm_info(&hwmon->xe->drm, "%s disabled for channel %d, val 0x%016llx\n", + drm_info(&hwmon->xe->drm, "%s disabled for channel %d, val 0x%08x\n", PWR_ATTR_TO_STR(attr), channel, reg_val); goto unlock; } reg_val = REG_FIELD_GET(PWR_LIM_VAL, reg_val); - *value = mul_u64_u32_shr(reg_val, SF_POWER, hwmon->scl_shift_power); + *value = mul_u32_u32(reg_val, SF_POWER) >> hwmon->scl_shift_power; /* For platforms with mailbox power limit support clamping would be done by pcode. */ if (!hwmon->xe->info.has_mbx_power_limits) { - reg_val = xe_mmio_read64_2x32(mmio, pkg_power_sku); - min = REG_FIELD_GET(PKG_MIN_PWR, reg_val); - max = REG_FIELD_GET(PKG_MAX_PWR, reg_val); + u64 pkg_pwr, min, max; + + pkg_pwr = xe_mmio_read64_2x32(mmio, pkg_power_sku); + min = REG_FIELD_GET(PKG_MIN_PWR, pkg_pwr); + max = REG_FIELD_GET(PKG_MAX_PWR, pkg_pwr); min = mul_u64_u32_shr(min, SF_POWER, hwmon->scl_shift_power); max = mul_u64_u32_shr(max, SF_POWER, hwmon->scl_shift_power); if (min && max) @@ -493,8 +495,8 @@ xe_hwmon_power_max_interval_show(struct device *dev, struct device_attribute *at { struct xe_hwmon *hwmon = dev_get_drvdata(dev); struct xe_mmio *mmio = xe_root_tile_mmio(hwmon->xe); - u32 x, y, x_w = 2; /* 2 bits */ - u64 r, tau4, out; + u32 reg_val, x, y, x_w = 2; /* 2 bits */ + u64 tau4, out; int channel = (to_sensor_dev_attr(attr)->index % 2) ? CHANNEL_PKG : CHANNEL_CARD; u32 power_attr = (to_sensor_dev_attr(attr)->index > 1) ? PL2_HWMON_ATTR : PL1_HWMON_ATTR; @@ -505,23 +507,24 @@ xe_hwmon_power_max_interval_show(struct device *dev, struct device_attribute *at mutex_lock(&hwmon->hwmon_lock); if (hwmon->xe->info.has_mbx_power_limits) { - ret = xe_hwmon_pcode_read_power_limit(hwmon, power_attr, channel, (u32 *)&r); + ret = xe_hwmon_pcode_read_power_limit(hwmon, power_attr, channel, ®_val); if (ret) { drm_err(&hwmon->xe->drm, - "power interval read fail, ch %d, attr %d, r 0%llx, ret %d\n", - channel, power_attr, r, ret); - r = 0; + "power interval read fail, ch %d, attr %d, val 0x%08x, ret %d\n", + channel, power_attr, reg_val, ret); + reg_val = 0; } } else { - r = xe_mmio_read32(mmio, xe_hwmon_get_reg(hwmon, REG_PKG_RAPL_LIMIT, channel)); + reg_val = xe_mmio_read32(mmio, xe_hwmon_get_reg(hwmon, REG_PKG_RAPL_LIMIT, + channel)); } mutex_unlock(&hwmon->hwmon_lock); xe_pm_runtime_put(hwmon->xe); - x = REG_FIELD_GET(PWR_LIM_TIME_X, r); - y = REG_FIELD_GET(PWR_LIM_TIME_Y, r); + x = REG_FIELD_GET(PWR_LIM_TIME_X, reg_val); + y = REG_FIELD_GET(PWR_LIM_TIME_Y, reg_val); /* * tau = (1 + (x / 4)) * power(2,y), x = bits(23:22), y = bits(21:17) diff --git a/drivers/gpu/drm/xe/xe_nvm.c b/drivers/gpu/drm/xe/xe_nvm.c index 9ca4a5ae1693..33f4ac82fc80 100644 --- a/drivers/gpu/drm/xe/xe_nvm.c +++ b/drivers/gpu/drm/xe/xe_nvm.c @@ -35,6 +35,10 @@ static const struct intel_dg_nvm_region regions[INTEL_DG_NVM_REGIONS] = { static void xe_nvm_release_dev(struct device *dev) { + struct auxiliary_device *aux = container_of(dev, struct auxiliary_device, dev); + struct intel_dg_nvm_dev *nvm = container_of(aux, struct intel_dg_nvm_dev, aux_dev); + + kfree(nvm); } static bool xe_nvm_non_posted_erase(struct xe_device *xe) @@ -162,6 +166,5 @@ void xe_nvm_fini(struct xe_device *xe) auxiliary_device_delete(&nvm->aux_dev); auxiliary_device_uninit(&nvm->aux_dev); - kfree(nvm); xe->nvm = NULL; } diff --git a/drivers/gpu/drm/xe/xe_tile_sysfs.c b/drivers/gpu/drm/xe/xe_tile_sysfs.c index b804234a6551..9e1236a9ec67 100644 --- a/drivers/gpu/drm/xe/xe_tile_sysfs.c +++ b/drivers/gpu/drm/xe/xe_tile_sysfs.c @@ -44,16 +44,18 @@ int xe_tile_sysfs_init(struct xe_tile *tile) kt->tile = tile; err = kobject_add(&kt->base, &dev->kobj, "tile%d", tile->id); - if (err) { - kobject_put(&kt->base); - return err; - } + if (err) + goto err_object; tile->sysfs = &kt->base; err = xe_vram_freq_sysfs_init(tile); if (err) - return err; + goto err_object; return devm_add_action_or_reset(xe->drm.dev, tile_sysfs_fini, tile); + +err_object: + kobject_put(&kt->base); + return err; } diff --git a/drivers/gpu/drm/xe/xe_vm.c b/drivers/gpu/drm/xe/xe_vm.c index c00a5ff31817..db6b9a6651b7 100644 --- a/drivers/gpu/drm/xe/xe_vm.c +++ b/drivers/gpu/drm/xe/xe_vm.c @@ -241,8 +241,8 @@ int xe_vm_add_compute_exec_queue(struct xe_vm *vm, struct xe_exec_queue *q) pfence = xe_preempt_fence_create(q, q->lr.context, ++q->lr.seqno); - if (!pfence) { - err = -ENOMEM; + if (IS_ERR(pfence)) { + err = PTR_ERR(pfence); goto out_fini; } diff --git a/drivers/gpu/nova-core/driver.rs b/drivers/gpu/nova-core/driver.rs index 274989ea1fb4..1380b47617f7 100644 --- a/drivers/gpu/nova-core/driver.rs +++ b/drivers/gpu/nova-core/driver.rs @@ -34,14 +34,19 @@ impl pci::Driver for NovaCore { pdev.enable_device_mem()?; pdev.set_master(); - let bar = Arc::pin_init( + let devres_bar = Arc::pin_init( pdev.iomap_region_sized::<BAR0_SIZE>(0, c_str!("nova-core/bar0")), GFP_KERNEL, )?; + // Used to provided a `&Bar0` to `Gpu::new` without tying it to the lifetime of + // `devres_bar`. + let bar_clone = Arc::clone(&devres_bar); + let bar = bar_clone.access(pdev.as_ref())?; + let this = KBox::pin_init( try_pin_init!(Self { - gpu <- Gpu::new(pdev, bar)?, + gpu <- Gpu::new(pdev, devres_bar, bar), _reg: auxiliary::Registration::new( pdev.as_ref(), c_str!("nova-drm"), @@ -54,4 +59,8 @@ impl pci::Driver for NovaCore { Ok(this) } + + fn unbind(pdev: &pci::Device<Core>, this: Pin<&Self>) { + this.gpu.unbind(pdev.as_ref()); + } } diff --git a/drivers/gpu/nova-core/falcon.rs b/drivers/gpu/nova-core/falcon.rs index 50437c67c14a..37e6298195e4 100644 --- a/drivers/gpu/nova-core/falcon.rs +++ b/drivers/gpu/nova-core/falcon.rs @@ -4,16 +4,17 @@ use core::ops::Deref; use hal::FalconHal; -use kernel::bindings; use kernel::device; +use kernel::dma::DmaAddress; use kernel::prelude::*; +use kernel::sync::aref::ARef; use kernel::time::Delta; -use kernel::types::ARef; use crate::dma::DmaObject; use crate::driver::Bar0; use crate::gpu::Chipset; use crate::regs; +use crate::regs::macros::RegisterBase; use crate::util; pub(crate) mod gsp; @@ -274,14 +275,25 @@ impl From<bool> for FalconFbifMemType { } } -/// Trait defining the parameters of a given Falcon instance. -pub(crate) trait FalconEngine: Sync { - /// Base I/O address for the falcon, relative from which its registers are accessed. - const BASE: usize; +/// Type used to represent the `PFALCON` registers address base for a given falcon engine. +pub(crate) struct PFalconBase(()); + +/// Type used to represent the `PFALCON2` registers address base for a given falcon engine. +pub(crate) struct PFalcon2Base(()); + +/// Trait defining the parameters of a given Falcon engine. +/// +/// Each engine provides one base for `PFALCON` and `PFALCON2` registers. The `ID` constant is used +/// to identify a given Falcon instance with register I/O methods. +pub(crate) trait FalconEngine: + Send + Sync + RegisterBase<PFalconBase> + RegisterBase<PFalcon2Base> + Sized +{ + /// Singleton of the engine, used to identify it with register I/O methods. + const ID: Self; } /// Represents a portion of the firmware to be loaded into a particular memory (e.g. IMEM or DMEM). -#[derive(Debug)] +#[derive(Debug, Clone)] pub(crate) struct FalconLoadTarget { /// Offset from the start of the source object to copy from. pub(crate) src_start: u32, @@ -292,7 +304,7 @@ pub(crate) struct FalconLoadTarget { } /// Parameters for the falcon boot ROM. -#[derive(Debug)] +#[derive(Debug, Clone)] pub(crate) struct FalconBromParams { /// Offset in `DMEM`` of the firmware's signature. pub(crate) pkc_data_offset: u32, @@ -343,13 +355,13 @@ impl<E: FalconEngine + 'static> Falcon<E> { bar: &Bar0, need_riscv: bool, ) -> Result<Self> { - let hwcfg1 = regs::NV_PFALCON_FALCON_HWCFG1::read(bar, E::BASE); + let hwcfg1 = regs::NV_PFALCON_FALCON_HWCFG1::read(bar, &E::ID); // Check that the revision and security model contain valid values. let _ = hwcfg1.core_rev()?; let _ = hwcfg1.security_model()?; if need_riscv { - let hwcfg2 = regs::NV_PFALCON_FALCON_HWCFG2::read(bar, E::BASE); + let hwcfg2 = regs::NV_PFALCON_FALCON_HWCFG2::read(bar, &E::ID); if !hwcfg2.riscv() { dev_err!( dev, @@ -369,7 +381,7 @@ impl<E: FalconEngine + 'static> Falcon<E> { fn reset_wait_mem_scrubbing(&self, bar: &Bar0) -> Result { // TIMEOUT: memory scrubbing should complete in less than 20ms. util::wait_on(Delta::from_millis(20), || { - if regs::NV_PFALCON_FALCON_HWCFG2::read(bar, E::BASE).mem_scrubbing_done() { + if regs::NV_PFALCON_FALCON_HWCFG2::read(bar, &E::ID).mem_scrubbing_done() { Some(()) } else { None @@ -379,12 +391,12 @@ impl<E: FalconEngine + 'static> Falcon<E> { /// Reset the falcon engine. fn reset_eng(&self, bar: &Bar0) -> Result { - let _ = regs::NV_PFALCON_FALCON_HWCFG2::read(bar, E::BASE); + let _ = regs::NV_PFALCON_FALCON_HWCFG2::read(bar, &E::ID); // According to OpenRM's `kflcnPreResetWait_GA102` documentation, HW sometimes does not set // RESET_READY so a non-failing timeout is used. let _ = util::wait_on(Delta::from_micros(150), || { - let r = regs::NV_PFALCON_FALCON_HWCFG2::read(bar, E::BASE); + let r = regs::NV_PFALCON_FALCON_HWCFG2::read(bar, &E::ID); if r.reset_ready() { Some(()) } else { @@ -392,13 +404,13 @@ impl<E: FalconEngine + 'static> Falcon<E> { } }); - regs::NV_PFALCON_FALCON_ENGINE::alter(bar, E::BASE, |v| v.set_reset(true)); + regs::NV_PFALCON_FALCON_ENGINE::alter(bar, &E::ID, |v| v.set_reset(true)); // TODO[DLAY]: replace with udelay() or equivalent once available. // TIMEOUT: falcon engine should not take more than 10us to reset. let _: Result = util::wait_on(Delta::from_micros(10), || None); - regs::NV_PFALCON_FALCON_ENGINE::alter(bar, E::BASE, |v| v.set_reset(false)); + regs::NV_PFALCON_FALCON_ENGINE::alter(bar, &E::ID, |v| v.set_reset(false)); self.reset_wait_mem_scrubbing(bar)?; @@ -413,7 +425,7 @@ impl<E: FalconEngine + 'static> Falcon<E> { regs::NV_PFALCON_FALCON_RM::default() .set_value(regs::NV_PMC_BOOT_0::read(bar).into()) - .write(bar, E::BASE); + .write(bar, &E::ID); Ok(()) } @@ -443,7 +455,7 @@ impl<E: FalconEngine + 'static> Falcon<E> { fw.dma_handle_with_offset(load_offsets.src_start as usize)?, ), }; - if dma_start % bindings::dma_addr_t::from(DMA_LEN) > 0 { + if dma_start % DmaAddress::from(DMA_LEN) > 0 { dev_err!( self.dev, "DMA transfer start addresses must be a multiple of {}", @@ -451,44 +463,57 @@ impl<E: FalconEngine + 'static> Falcon<E> { ); return Err(EINVAL); } - if load_offsets.len % DMA_LEN > 0 { - dev_err!( - self.dev, - "DMA transfer length must be a multiple of {}", - DMA_LEN - ); - return Err(EINVAL); - } + + // DMA transfers can only be done in units of 256 bytes. Compute how many such transfers we + // need to perform. + let num_transfers = load_offsets.len.div_ceil(DMA_LEN); + + // Check that the area we are about to transfer is within the bounds of the DMA object. + // Upper limit of transfer is `(num_transfers * DMA_LEN) + load_offsets.src_start`. + match num_transfers + .checked_mul(DMA_LEN) + .and_then(|size| size.checked_add(load_offsets.src_start)) + { + None => { + dev_err!(self.dev, "DMA transfer length overflow"); + return Err(EOVERFLOW); + } + Some(upper_bound) if upper_bound as usize > fw.size() => { + dev_err!(self.dev, "DMA transfer goes beyond range of DMA object"); + return Err(EINVAL); + } + Some(_) => (), + }; // Set up the base source DMA address. regs::NV_PFALCON_FALCON_DMATRFBASE::default() .set_base((dma_start >> 8) as u32) - .write(bar, E::BASE); + .write(bar, &E::ID); regs::NV_PFALCON_FALCON_DMATRFBASE1::default() .set_base((dma_start >> 40) as u16) - .write(bar, E::BASE); + .write(bar, &E::ID); let cmd = regs::NV_PFALCON_FALCON_DMATRFCMD::default() .set_size(DmaTrfCmdSize::Size256B) .set_imem(target_mem == FalconMem::Imem) .set_sec(if sec { 1 } else { 0 }); - for pos in (0..load_offsets.len).step_by(DMA_LEN as usize) { + for pos in (0..num_transfers).map(|i| i * DMA_LEN) { // Perform a transfer of size `DMA_LEN`. regs::NV_PFALCON_FALCON_DMATRFMOFFS::default() .set_offs(load_offsets.dst_start + pos) - .write(bar, E::BASE); + .write(bar, &E::ID); regs::NV_PFALCON_FALCON_DMATRFFBOFFS::default() .set_offs(src_start + pos) - .write(bar, E::BASE); - cmd.write(bar, E::BASE); + .write(bar, &E::ID); + cmd.write(bar, &E::ID); // Wait for the transfer to complete. // TIMEOUT: arbitrarily large value, no DMA transfer to the falcon's small memories // should ever take that long. util::wait_on(Delta::from_secs(2), || { - let r = regs::NV_PFALCON_FALCON_DMATRFCMD::read(bar, E::BASE); + let r = regs::NV_PFALCON_FALCON_DMATRFCMD::read(bar, &E::ID); if r.idle() { Some(()) } else { @@ -502,9 +527,9 @@ impl<E: FalconEngine + 'static> Falcon<E> { /// Perform a DMA load into `IMEM` and `DMEM` of `fw`, and prepare the falcon to run it. pub(crate) fn dma_load<F: FalconFirmware<Target = E>>(&self, bar: &Bar0, fw: &F) -> Result { - regs::NV_PFALCON_FBIF_CTL::alter(bar, E::BASE, |v| v.set_allow_phys_no_ctx(true)); - regs::NV_PFALCON_FALCON_DMACTL::default().write(bar, E::BASE); - regs::NV_PFALCON_FBIF_TRANSCFG::alter(bar, E::BASE, |v| { + regs::NV_PFALCON_FBIF_CTL::alter(bar, &E::ID, |v| v.set_allow_phys_no_ctx(true)); + regs::NV_PFALCON_FALCON_DMACTL::default().write(bar, &E::ID); + regs::NV_PFALCON_FBIF_TRANSCFG::alter(bar, &E::ID, 0, |v| { v.set_target(FalconFbifTarget::CoherentSysmem) .set_mem_type(FalconFbifMemType::Physical) }); @@ -517,7 +542,7 @@ impl<E: FalconEngine + 'static> Falcon<E> { // Set `BootVec` to start of non-secure code. regs::NV_PFALCON_FALCON_BOOTVEC::default() .set_value(fw.boot_addr()) - .write(bar, E::BASE); + .write(bar, &E::ID); Ok(()) } @@ -538,27 +563,27 @@ impl<E: FalconEngine + 'static> Falcon<E> { if let Some(mbox0) = mbox0 { regs::NV_PFALCON_FALCON_MAILBOX0::default() .set_value(mbox0) - .write(bar, E::BASE); + .write(bar, &E::ID); } if let Some(mbox1) = mbox1 { regs::NV_PFALCON_FALCON_MAILBOX1::default() .set_value(mbox1) - .write(bar, E::BASE); + .write(bar, &E::ID); } - match regs::NV_PFALCON_FALCON_CPUCTL::read(bar, E::BASE).alias_en() { + match regs::NV_PFALCON_FALCON_CPUCTL::read(bar, &E::ID).alias_en() { true => regs::NV_PFALCON_FALCON_CPUCTL_ALIAS::default() .set_startcpu(true) - .write(bar, E::BASE), + .write(bar, &E::ID), false => regs::NV_PFALCON_FALCON_CPUCTL::default() .set_startcpu(true) - .write(bar, E::BASE), + .write(bar, &E::ID), } // TIMEOUT: arbitrarily large value, firmwares should complete in less than 2 seconds. util::wait_on(Delta::from_secs(2), || { - let r = regs::NV_PFALCON_FALCON_CPUCTL::read(bar, E::BASE); + let r = regs::NV_PFALCON_FALCON_CPUCTL::read(bar, &E::ID); if r.halted() { Some(()) } else { @@ -567,8 +592,8 @@ impl<E: FalconEngine + 'static> Falcon<E> { })?; let (mbox0, mbox1) = ( - regs::NV_PFALCON_FALCON_MAILBOX0::read(bar, E::BASE).value(), - regs::NV_PFALCON_FALCON_MAILBOX1::read(bar, E::BASE).value(), + regs::NV_PFALCON_FALCON_MAILBOX0::read(bar, &E::ID).value(), + regs::NV_PFALCON_FALCON_MAILBOX1::read(bar, &E::ID).value(), ); Ok((mbox0, mbox1)) diff --git a/drivers/gpu/nova-core/falcon/gsp.rs b/drivers/gpu/nova-core/falcon/gsp.rs index d622e9a64470..f17599cb49fa 100644 --- a/drivers/gpu/nova-core/falcon/gsp.rs +++ b/drivers/gpu/nova-core/falcon/gsp.rs @@ -2,23 +2,31 @@ use crate::{ driver::Bar0, - falcon::{Falcon, FalconEngine}, - regs, + falcon::{Falcon, FalconEngine, PFalcon2Base, PFalconBase}, + regs::{self, macros::RegisterBase}, }; /// Type specifying the `Gsp` falcon engine. Cannot be instantiated. pub(crate) struct Gsp(()); -impl FalconEngine for Gsp { +impl RegisterBase<PFalconBase> for Gsp { const BASE: usize = 0x00110000; } +impl RegisterBase<PFalcon2Base> for Gsp { + const BASE: usize = 0x00111000; +} + +impl FalconEngine for Gsp { + const ID: Self = Gsp(()); +} + impl Falcon<Gsp> { /// Clears the SWGEN0 bit in the Falcon's IRQ status clear register to /// allow GSP to signal CPU for processing new messages in message queue. pub(crate) fn clear_swgen0_intr(&self, bar: &Bar0) { regs::NV_PFALCON_FALCON_IRQSCLR::default() .set_swgen0(true) - .write(bar, Gsp::BASE); + .write(bar, &Gsp::ID); } } diff --git a/drivers/gpu/nova-core/falcon/hal.rs b/drivers/gpu/nova-core/falcon/hal.rs index b233bc365882..bba288455617 100644 --- a/drivers/gpu/nova-core/falcon/hal.rs +++ b/drivers/gpu/nova-core/falcon/hal.rs @@ -13,7 +13,7 @@ mod ga102; /// Implements chipset-specific low-level operations. The trait is generic against [`FalconEngine`] /// so its `BASE` parameter can be used in order to avoid runtime bound checks when accessing /// registers. -pub(crate) trait FalconHal<E: FalconEngine>: Sync { +pub(crate) trait FalconHal<E: FalconEngine>: Send + Sync { /// Activates the Falcon core if the engine is a risvc/falcon dual engine. fn select_core(&self, _falcon: &Falcon<E>, _bar: &Bar0) -> Result { Ok(()) diff --git a/drivers/gpu/nova-core/falcon/hal/ga102.rs b/drivers/gpu/nova-core/falcon/hal/ga102.rs index 52c33d3f22a8..0b1cbe7853b3 100644 --- a/drivers/gpu/nova-core/falcon/hal/ga102.rs +++ b/drivers/gpu/nova-core/falcon/hal/ga102.rs @@ -16,15 +16,15 @@ use crate::util; use super::FalconHal; fn select_core_ga102<E: FalconEngine>(bar: &Bar0) -> Result { - let bcr_ctrl = regs::NV_PRISCV_RISCV_BCR_CTRL::read(bar, E::BASE); + let bcr_ctrl = regs::NV_PRISCV_RISCV_BCR_CTRL::read(bar, &E::ID); if bcr_ctrl.core_select() != PeregrineCoreSelect::Falcon { regs::NV_PRISCV_RISCV_BCR_CTRL::default() .set_core_select(PeregrineCoreSelect::Falcon) - .write(bar, E::BASE); + .write(bar, &E::ID); // TIMEOUT: falcon core should take less than 10ms to report being enabled. util::wait_on(Delta::from_millis(10), || { - let r = regs::NV_PRISCV_RISCV_BCR_CTRL::read(bar, E::BASE); + let r = regs::NV_PRISCV_RISCV_BCR_CTRL::read(bar, &E::ID); if r.valid() { Some(()) } else { @@ -42,50 +42,47 @@ fn signature_reg_fuse_version_ga102( engine_id_mask: u16, ucode_id: u8, ) -> Result<u32> { - // TODO[REGA]: The ucode fuse versions are contained in the - // FUSE_OPT_FPF_<ENGINE>_UCODE<X>_VERSION registers, which are an array. Our register - // definition macros do not allow us to manage them properly, so we need to hardcode their - // addresses for now. Clean this up once we support register arrays. + const NV_FUSE_OPT_FPF_SIZE: u8 = regs::NV_FUSE_OPT_FPF_SIZE as u8; // Each engine has 16 ucode version registers numbered from 1 to 16. - if ucode_id == 0 || ucode_id > 16 { - dev_err!(dev, "invalid ucode id {:#x}", ucode_id); - return Err(EINVAL); - } + let ucode_idx = match ucode_id { + 1..=NV_FUSE_OPT_FPF_SIZE => (ucode_id - 1) as usize, + _ => { + dev_err!(dev, "invalid ucode id {:#x}", ucode_id); + return Err(EINVAL); + } + }; - // Base address of the FUSE registers array corresponding to the engine. - let reg_fuse_base = if engine_id_mask & 0x0001 != 0 { - regs::NV_FUSE_OPT_FPF_SEC2_UCODE1_VERSION::OFFSET + // `ucode_idx` is guaranteed to be in the range [0..15], making the `read` calls provable valid + // at build-time. + let reg_fuse_version = if engine_id_mask & 0x0001 != 0 { + regs::NV_FUSE_OPT_FPF_SEC2_UCODE1_VERSION::read(bar, ucode_idx).data() } else if engine_id_mask & 0x0004 != 0 { - regs::NV_FUSE_OPT_FPF_NVDEC_UCODE1_VERSION::OFFSET + regs::NV_FUSE_OPT_FPF_NVDEC_UCODE1_VERSION::read(bar, ucode_idx).data() } else if engine_id_mask & 0x0400 != 0 { - regs::NV_FUSE_OPT_FPF_GSP_UCODE1_VERSION::OFFSET + regs::NV_FUSE_OPT_FPF_GSP_UCODE1_VERSION::read(bar, ucode_idx).data() } else { dev_err!(dev, "unexpected engine_id_mask {:#x}", engine_id_mask); return Err(EINVAL); }; - // Read `reg_fuse_base[ucode_id - 1]`. - let reg_fuse_version = - bar.read32(reg_fuse_base + ((ucode_id - 1) as usize * core::mem::size_of::<u32>())); - // TODO[NUMM]: replace with `last_set_bit` once it lands. - Ok(u32::BITS - reg_fuse_version.leading_zeros()) + Ok(u16::BITS - reg_fuse_version.leading_zeros()) } fn program_brom_ga102<E: FalconEngine>(bar: &Bar0, params: &FalconBromParams) -> Result { regs::NV_PFALCON2_FALCON_BROM_PARAADDR::default() .set_value(params.pkc_data_offset) - .write(bar, E::BASE); + .write(bar, &E::ID, 0); regs::NV_PFALCON2_FALCON_BROM_ENGIDMASK::default() .set_value(u32::from(params.engine_id_mask)) - .write(bar, E::BASE); + .write(bar, &E::ID); regs::NV_PFALCON2_FALCON_BROM_CURR_UCODE_ID::default() .set_ucode_id(params.ucode_id) - .write(bar, E::BASE); + .write(bar, &E::ID); regs::NV_PFALCON2_FALCON_MOD_SEL::default() .set_algo(FalconModSelAlgo::Rsa3k) - .write(bar, E::BASE); + .write(bar, &E::ID); Ok(()) } diff --git a/drivers/gpu/nova-core/falcon/sec2.rs b/drivers/gpu/nova-core/falcon/sec2.rs index 5147d9e2a7fe..815786c8480d 100644 --- a/drivers/gpu/nova-core/falcon/sec2.rs +++ b/drivers/gpu/nova-core/falcon/sec2.rs @@ -1,10 +1,19 @@ // SPDX-License-Identifier: GPL-2.0 -use crate::falcon::FalconEngine; +use crate::falcon::{FalconEngine, PFalcon2Base, PFalconBase}; +use crate::regs::macros::RegisterBase; /// Type specifying the `Sec2` falcon engine. Cannot be instantiated. pub(crate) struct Sec2(()); -impl FalconEngine for Sec2 { +impl RegisterBase<PFalconBase> for Sec2 { const BASE: usize = 0x00840000; } + +impl RegisterBase<PFalcon2Base> for Sec2 { + const BASE: usize = 0x00841000; +} + +impl FalconEngine for Sec2 { + const ID: Self = Sec2(()); +} diff --git a/drivers/gpu/nova-core/fb.rs b/drivers/gpu/nova-core/fb.rs index 4a702525fff4..68559902ae78 100644 --- a/drivers/gpu/nova-core/fb.rs +++ b/drivers/gpu/nova-core/fb.rs @@ -4,7 +4,7 @@ use core::ops::Range; use kernel::prelude::*; use kernel::sizes::*; -use kernel::types::ARef; +use kernel::sync::aref::ARef; use kernel::{dev_warn, device}; use crate::dma::DmaObject; diff --git a/drivers/gpu/nova-core/firmware.rs b/drivers/gpu/nova-core/firmware.rs index 2931912ddba0..4179a74a2342 100644 --- a/drivers/gpu/nova-core/firmware.rs +++ b/drivers/gpu/nova-core/firmware.rs @@ -4,48 +4,36 @@ //! to be loaded into a given execution unit. use core::marker::PhantomData; +use core::mem::size_of; use kernel::device; use kernel::firmware; use kernel::prelude::*; use kernel::str::CString; +use kernel::transmute::FromBytes; use crate::dma::DmaObject; use crate::falcon::FalconFirmware; use crate::gpu; -use crate::gpu::Chipset; +pub(crate) mod booter; pub(crate) mod fwsec; - -pub(crate) const FIRMWARE_VERSION: &str = "535.113.01"; - -/// Structure encapsulating the firmware blobs required for the GPU to operate. -#[expect(dead_code)] -pub(crate) struct Firmware { - booter_load: firmware::Firmware, - booter_unload: firmware::Firmware, - bootloader: firmware::Firmware, - gsp: firmware::Firmware, -} - -impl Firmware { - pub(crate) fn new(dev: &device::Device, chipset: Chipset, ver: &str) -> Result<Firmware> { - let mut chip_name = CString::try_from_fmt(fmt!("{chipset}"))?; - chip_name.make_ascii_lowercase(); - let chip_name = &*chip_name; - - let request = |name_| { - CString::try_from_fmt(fmt!("nvidia/{chip_name}/gsp/{name_}-{ver}.bin")) - .and_then(|path| firmware::Firmware::request(&path, dev)) - }; - - Ok(Firmware { - booter_load: request("booter_load")?, - booter_unload: request("booter_unload")?, - bootloader: request("bootloader")?, - gsp: request("gsp")?, - }) - } +pub(crate) mod gsp; +pub(crate) mod riscv; + +pub(crate) const FIRMWARE_VERSION: &str = "570.144"; + +/// Requests the GPU firmware `name` suitable for `chipset`, with version `ver`. +fn request_firmware( + dev: &device::Device, + chipset: gpu::Chipset, + name: &str, + ver: &str, +) -> Result<firmware::Firmware> { + let chip_name = chipset.name(); + + CString::try_from_fmt(fmt!("nvidia/{chip_name}/gsp/{name}-{ver}.bin")) + .and_then(|path| firmware::Firmware::request(&path, dev)) } /// Structure used to describe some firmwares, notably FWSEC-FRTS. @@ -150,6 +138,65 @@ impl<F: FalconFirmware> FirmwareDmaObject<F, Unsigned> { } } +/// Header common to most firmware files. +#[repr(C)] +#[derive(Debug, Clone)] +struct BinHdr { + /// Magic number, must be `0x10de`. + bin_magic: u32, + /// Version of the header. + bin_ver: u32, + /// Size in bytes of the binary (to be ignored). + bin_size: u32, + /// Offset of the start of the application-specific header. + header_offset: u32, + /// Offset of the start of the data payload. + data_offset: u32, + /// Size in bytes of the data payload. + data_size: u32, +} + +// SAFETY: all bit patterns are valid for this type, and it doesn't use interior mutability. +unsafe impl FromBytes for BinHdr {} + +// A firmware blob starting with a `BinHdr`. +struct BinFirmware<'a> { + hdr: BinHdr, + fw: &'a [u8], +} + +impl<'a> BinFirmware<'a> { + /// Interpret `fw` as a firmware image starting with a [`BinHdr`], and returns the + /// corresponding [`BinFirmware`] that can be used to extract its payload. + fn new(fw: &'a firmware::Firmware) -> Result<Self> { + const BIN_MAGIC: u32 = 0x10de; + let fw = fw.data(); + + fw.get(0..size_of::<BinHdr>()) + // Extract header. + .and_then(BinHdr::from_bytes_copy) + // Validate header. + .and_then(|hdr| { + if hdr.bin_magic == BIN_MAGIC { + Some(hdr) + } else { + None + } + }) + .map(|hdr| Self { hdr, fw }) + .ok_or(EINVAL) + } + + /// Returns the data payload of the firmware, or `None` if the data range is out of bounds of + /// the firmware image. + fn data(&self) -> Option<&[u8]> { + let fw_start = self.hdr.data_offset as usize; + let fw_size = self.hdr.data_size as usize; + + self.fw.get(fw_start..fw_start + fw_size) + } +} + pub(crate) struct ModInfoBuilder<const N: usize>(firmware::ModInfoBuilder<N>); impl<const N: usize> ModInfoBuilder<N> { @@ -180,8 +227,8 @@ impl<const N: usize> ModInfoBuilder<N> { let mut this = Self(firmware::ModInfoBuilder::new(module_name)); let mut i = 0; - while i < gpu::Chipset::NAMES.len() { - this = this.make_entry_chipset(gpu::Chipset::NAMES[i]); + while i < gpu::Chipset::ALL.len() { + this = this.make_entry_chipset(gpu::Chipset::ALL[i].name()); i += 1; } diff --git a/drivers/gpu/nova-core/firmware/booter.rs b/drivers/gpu/nova-core/firmware/booter.rs new file mode 100644 index 000000000000..b4ff1b17e4a0 --- /dev/null +++ b/drivers/gpu/nova-core/firmware/booter.rs @@ -0,0 +1,375 @@ +// SPDX-License-Identifier: GPL-2.0 + +//! Support for loading and patching the `Booter` firmware. `Booter` is a Heavy Secured firmware +//! running on [`Sec2`], that is used on Turing/Ampere to load the GSP firmware into the GSP falcon +//! (and optionally unload it through a separate firmware image). + +use core::marker::PhantomData; +use core::mem::size_of; +use core::ops::Deref; + +use kernel::device; +use kernel::prelude::*; +use kernel::transmute::FromBytes; + +use crate::dma::DmaObject; +use crate::driver::Bar0; +use crate::falcon::sec2::Sec2; +use crate::falcon::{Falcon, FalconBromParams, FalconFirmware, FalconLoadParams, FalconLoadTarget}; +use crate::firmware::{BinFirmware, FirmwareDmaObject, FirmwareSignature, Signed, Unsigned}; +use crate::gpu::Chipset; + +/// Local convenience function to return a copy of `S` by reinterpreting the bytes starting at +/// `offset` in `slice`. +fn frombytes_at<S: FromBytes + Sized>(slice: &[u8], offset: usize) -> Result<S> { + slice + .get(offset..offset + size_of::<S>()) + .and_then(S::from_bytes_copy) + .ok_or(EINVAL) +} + +/// Heavy-Secured firmware header. +/// +/// Such firmwares have an application-specific payload that needs to be patched with a given +/// signature. +#[repr(C)] +#[derive(Debug, Clone)] +struct HsHeaderV2 { + /// Offset to the start of the signatures. + sig_prod_offset: u32, + /// Size in bytes of the signatures. + sig_prod_size: u32, + /// Offset to a `u32` containing the location at which to patch the signature in the microcode + /// image. + patch_loc_offset: u32, + /// Offset to a `u32` containing the index of the signature to patch. + patch_sig_offset: u32, + /// Start offset to the signature metadata. + meta_data_offset: u32, + /// Size in bytes of the signature metadata. + meta_data_size: u32, + /// Offset to a `u32` containing the number of signatures in the signatures section. + num_sig_offset: u32, + /// Offset of the application-specific header. + header_offset: u32, + /// Size in bytes of the application-specific header. + header_size: u32, +} + +// SAFETY: all bit patterns are valid for this type, and it doesn't use interior mutability. +unsafe impl FromBytes for HsHeaderV2 {} + +/// Heavy-Secured Firmware image container. +/// +/// This provides convenient access to the fields of [`HsHeaderV2`] that are actually indices to +/// read from in the firmware data. +struct HsFirmwareV2<'a> { + hdr: HsHeaderV2, + fw: &'a [u8], +} + +impl<'a> HsFirmwareV2<'a> { + /// Interprets the header of `bin_fw` as a [`HsHeaderV2`] and returns an instance of + /// `HsFirmwareV2` for further parsing. + /// + /// Fails if the header pointed at by `bin_fw` is not within the bounds of the firmware image. + fn new(bin_fw: &BinFirmware<'a>) -> Result<Self> { + frombytes_at::<HsHeaderV2>(bin_fw.fw, bin_fw.hdr.header_offset as usize) + .map(|hdr| Self { hdr, fw: bin_fw.fw }) + } + + /// Returns the location at which the signatures should be patched in the microcode image. + /// + /// Fails if the offset of the patch location is outside the bounds of the firmware + /// image. + fn patch_location(&self) -> Result<u32> { + frombytes_at::<u32>(self.fw, self.hdr.patch_loc_offset as usize) + } + + /// Returns an iterator to the signatures of the firmware. The iterator can be empty if the + /// firmware is unsigned. + /// + /// Fails if the pointed signatures are outside the bounds of the firmware image. + fn signatures_iter(&'a self) -> Result<impl Iterator<Item = BooterSignature<'a>>> { + let num_sig = frombytes_at::<u32>(self.fw, self.hdr.num_sig_offset as usize)?; + let iter = match self.hdr.sig_prod_size.checked_div(num_sig) { + // If there are no signatures, return an iterator that will yield zero elements. + None => (&[] as &[u8]).chunks_exact(1), + Some(sig_size) => { + let patch_sig = frombytes_at::<u32>(self.fw, self.hdr.patch_sig_offset as usize)?; + let signatures_start = (self.hdr.sig_prod_offset + patch_sig) as usize; + + self.fw + // Get signatures range. + .get(signatures_start..signatures_start + self.hdr.sig_prod_size as usize) + .ok_or(EINVAL)? + .chunks_exact(sig_size as usize) + } + }; + + // Map the byte slices into signatures. + Ok(iter.map(BooterSignature)) + } +} + +/// Signature parameters, as defined in the firmware. +#[repr(C)] +struct HsSignatureParams { + /// Fuse version to use. + fuse_ver: u32, + /// Mask of engine IDs this firmware applies to. + engine_id_mask: u32, + /// ID of the microcode. + ucode_id: u32, +} + +// SAFETY: all bit patterns are valid for this type, and it doesn't use interior mutability. +unsafe impl FromBytes for HsSignatureParams {} + +impl HsSignatureParams { + /// Returns the signature parameters contained in `hs_fw`. + /// + /// Fails if the meta data parameter of `hs_fw` is outside the bounds of the firmware image, or + /// if its size doesn't match that of [`HsSignatureParams`]. + fn new(hs_fw: &HsFirmwareV2<'_>) -> Result<Self> { + let start = hs_fw.hdr.meta_data_offset as usize; + let end = start + .checked_add(hs_fw.hdr.meta_data_size as usize) + .ok_or(EINVAL)?; + + hs_fw + .fw + .get(start..end) + .and_then(Self::from_bytes_copy) + .ok_or(EINVAL) + } +} + +/// Header for code and data load offsets. +#[repr(C)] +#[derive(Debug, Clone)] +struct HsLoadHeaderV2 { + // Offset at which the code starts. + os_code_offset: u32, + // Total size of the code, for all apps. + os_code_size: u32, + // Offset at which the data starts. + os_data_offset: u32, + // Size of the data. + os_data_size: u32, + // Number of apps following this header. Each app is described by a [`HsLoadHeaderV2App`]. + num_apps: u32, +} + +// SAFETY: all bit patterns are valid for this type, and it doesn't use interior mutability. +unsafe impl FromBytes for HsLoadHeaderV2 {} + +impl HsLoadHeaderV2 { + /// Returns the load header contained in `hs_fw`. + /// + /// Fails if the header pointed at by `hs_fw` is not within the bounds of the firmware image. + fn new(hs_fw: &HsFirmwareV2<'_>) -> Result<Self> { + frombytes_at::<Self>(hs_fw.fw, hs_fw.hdr.header_offset as usize) + } +} + +/// Header for app code loader. +#[repr(C)] +#[derive(Debug, Clone)] +struct HsLoadHeaderV2App { + /// Offset at which to load the app code. + offset: u32, + /// Length in bytes of the app code. + len: u32, +} + +// SAFETY: all bit patterns are valid for this type, and it doesn't use interior mutability. +unsafe impl FromBytes for HsLoadHeaderV2App {} + +impl HsLoadHeaderV2App { + /// Returns the [`HsLoadHeaderV2App`] for app `idx` of `hs_fw`. + /// + /// Fails if `idx` is larger than the number of apps declared in `hs_fw`, or if the header is + /// not within the bounds of the firmware image. + fn new(hs_fw: &HsFirmwareV2<'_>, idx: u32) -> Result<Self> { + let load_hdr = HsLoadHeaderV2::new(hs_fw)?; + if idx >= load_hdr.num_apps { + Err(EINVAL) + } else { + frombytes_at::<Self>( + hs_fw.fw, + (hs_fw.hdr.header_offset as usize) + // Skip the load header... + .checked_add(size_of::<HsLoadHeaderV2>()) + // ... and jump to app header `idx`. + .and_then(|offset| { + offset.checked_add((idx as usize).checked_mul(size_of::<Self>())?) + }) + .ok_or(EINVAL)?, + ) + } + } +} + +/// Signature for Booter firmware. Their size is encoded into the header and not known a compile +/// time, so we just wrap a byte slices on which we can implement [`FirmwareSignature`]. +struct BooterSignature<'a>(&'a [u8]); + +impl<'a> AsRef<[u8]> for BooterSignature<'a> { + fn as_ref(&self) -> &[u8] { + self.0 + } +} + +impl<'a> FirmwareSignature<BooterFirmware> for BooterSignature<'a> {} + +/// The `Booter` loader firmware, responsible for loading the GSP. +pub(crate) struct BooterFirmware { + // Load parameters for `IMEM` falcon memory. + imem_load_target: FalconLoadTarget, + // Load parameters for `DMEM` falcon memory. + dmem_load_target: FalconLoadTarget, + // BROM falcon parameters. + brom_params: FalconBromParams, + // Device-mapped firmware image. + ucode: FirmwareDmaObject<Self, Signed>, +} + +impl FirmwareDmaObject<BooterFirmware, Unsigned> { + fn new_booter(dev: &device::Device<device::Bound>, data: &[u8]) -> Result<Self> { + DmaObject::from_data(dev, data).map(|ucode| Self(ucode, PhantomData)) + } +} + +#[derive(Copy, Clone, Debug, PartialEq)] +pub(crate) enum BooterKind { + Loader, + #[expect(unused)] + Unloader, +} + +impl BooterFirmware { + /// Parses the Booter firmware contained in `fw`, and patches the correct signature so it is + /// ready to be loaded and run on `falcon`. + pub(crate) fn new( + dev: &device::Device<device::Bound>, + kind: BooterKind, + chipset: Chipset, + ver: &str, + falcon: &Falcon<<Self as FalconFirmware>::Target>, + bar: &Bar0, + ) -> Result<Self> { + let fw_name = match kind { + BooterKind::Loader => "booter_load", + BooterKind::Unloader => "booter_unload", + }; + let fw = super::request_firmware(dev, chipset, fw_name, ver)?; + let bin_fw = BinFirmware::new(&fw)?; + + // The binary firmware embeds a Heavy-Secured firmware. + let hs_fw = HsFirmwareV2::new(&bin_fw)?; + + // The Heavy-Secured firmware embeds a firmware load descriptor. + let load_hdr = HsLoadHeaderV2::new(&hs_fw)?; + + // Offset in `ucode` where to patch the signature. + let patch_loc = hs_fw.patch_location()?; + + let sig_params = HsSignatureParams::new(&hs_fw)?; + let brom_params = FalconBromParams { + // `load_hdr.os_data_offset` is an absolute index, but `pkc_data_offset` is from the + // signature patch location. + pkc_data_offset: patch_loc + .checked_sub(load_hdr.os_data_offset) + .ok_or(EINVAL)?, + engine_id_mask: u16::try_from(sig_params.engine_id_mask).map_err(|_| EINVAL)?, + ucode_id: u8::try_from(sig_params.ucode_id).map_err(|_| EINVAL)?, + }; + let app0 = HsLoadHeaderV2App::new(&hs_fw, 0)?; + + // Object containing the firmware microcode to be signature-patched. + let ucode = bin_fw + .data() + .ok_or(EINVAL) + .and_then(|data| FirmwareDmaObject::<Self, _>::new_booter(dev, data))?; + + let ucode_signed = { + let mut signatures = hs_fw.signatures_iter()?.peekable(); + + if signatures.peek().is_none() { + // If there are no signatures, then the firmware is unsigned. + ucode.no_patch_signature() + } else { + // Obtain the version from the fuse register, and extract the corresponding + // signature. + let reg_fuse_version = falcon.signature_reg_fuse_version( + bar, + brom_params.engine_id_mask, + brom_params.ucode_id, + )?; + + // `0` means the last signature should be used. + const FUSE_VERSION_USE_LAST_SIG: u32 = 0; + let signature = match reg_fuse_version { + FUSE_VERSION_USE_LAST_SIG => signatures.last(), + // Otherwise hardware fuse version needs to be subtracted to obtain the index. + reg_fuse_version => { + let Some(idx) = sig_params.fuse_ver.checked_sub(reg_fuse_version) else { + dev_err!(dev, "invalid fuse version for Booter firmware\n"); + return Err(EINVAL); + }; + signatures.nth(idx as usize) + } + } + .ok_or(EINVAL)?; + + ucode.patch_signature(&signature, patch_loc as usize)? + } + }; + + Ok(Self { + imem_load_target: FalconLoadTarget { + src_start: app0.offset, + dst_start: 0, + len: app0.len, + }, + dmem_load_target: FalconLoadTarget { + src_start: load_hdr.os_data_offset, + dst_start: 0, + len: load_hdr.os_data_size, + }, + brom_params, + ucode: ucode_signed, + }) + } +} + +impl FalconLoadParams for BooterFirmware { + fn imem_load_params(&self) -> FalconLoadTarget { + self.imem_load_target.clone() + } + + fn dmem_load_params(&self) -> FalconLoadTarget { + self.dmem_load_target.clone() + } + + fn brom_params(&self) -> FalconBromParams { + self.brom_params.clone() + } + + fn boot_addr(&self) -> u32 { + self.imem_load_target.src_start + } +} + +impl Deref for BooterFirmware { + type Target = DmaObject; + + fn deref(&self) -> &Self::Target { + &self.ucode.0 + } +} + +impl FalconFirmware for BooterFirmware { + type Target = Sec2; +} diff --git a/drivers/gpu/nova-core/firmware/fwsec.rs b/drivers/gpu/nova-core/firmware/fwsec.rs index 0dff3cfa90af..8edbb5c0572c 100644 --- a/drivers/gpu/nova-core/firmware/fwsec.rs +++ b/drivers/gpu/nova-core/firmware/fwsec.rs @@ -202,9 +202,6 @@ pub(crate) struct FwsecFirmware { ucode: FirmwareDmaObject<Self, Signed>, } -// We need to load full DMEM pages. -const DMEM_LOAD_SIZE_ALIGN: u32 = 256; - impl FalconLoadParams for FwsecFirmware { fn imem_load_params(&self) -> FalconLoadTarget { FalconLoadTarget { @@ -218,11 +215,7 @@ impl FalconLoadParams for FwsecFirmware { FalconLoadTarget { src_start: self.desc.imem_load_size, dst_start: self.desc.dmem_phys_base, - // TODO[NUMM]: replace with `align_up` once it lands. - len: self - .desc - .dmem_load_size - .next_multiple_of(DMEM_LOAD_SIZE_ALIGN), + len: self.desc.dmem_load_size, } } @@ -253,8 +246,8 @@ impl FalconFirmware for FwsecFirmware { impl FirmwareDmaObject<FwsecFirmware, Unsigned> { fn new_fwsec(dev: &Device<device::Bound>, bios: &Vbios, cmd: FwsecCommand) -> Result<Self> { - let desc = bios.fwsec_image().header(dev)?; - let ucode = bios.fwsec_image().ucode(dev, desc)?; + let desc = bios.fwsec_image().header()?; + let ucode = bios.fwsec_image().ucode(desc)?; let mut dma_object = DmaObject::from_data(dev, ucode)?; let hdr_offset = (desc.imem_load_size + desc.interface_offset) as usize; @@ -343,7 +336,7 @@ impl FwsecFirmware { let ucode_dma = FirmwareDmaObject::<Self, _>::new_fwsec(dev, bios, cmd)?; // Patch signature if needed. - let desc = bios.fwsec_image().header(dev)?; + let desc = bios.fwsec_image().header()?; let ucode_signed = if desc.signature_count != 0 { let sig_base_img = (desc.imem_load_size + desc.pkc_data_offset) as usize; let desc_sig_versions = u32::from(desc.signature_versions); @@ -382,7 +375,7 @@ impl FwsecFirmware { dev_dbg!(dev, "patching signature with index {}\n", signature_idx); let signature = bios .fwsec_image() - .sigs(dev, desc) + .sigs(desc) .and_then(|sigs| sigs.get(signature_idx).ok_or(EINVAL))?; ucode_dma.patch_signature(signature, sig_base_img)? diff --git a/drivers/gpu/nova-core/firmware/gsp.rs b/drivers/gpu/nova-core/firmware/gsp.rs new file mode 100644 index 000000000000..9b70095434c6 --- /dev/null +++ b/drivers/gpu/nova-core/firmware/gsp.rs @@ -0,0 +1,243 @@ +// SPDX-License-Identifier: GPL-2.0 + +use core::mem::size_of_val; + +use kernel::device; +use kernel::dma::{DataDirection, DmaAddress}; +use kernel::kvec; +use kernel::prelude::*; +use kernel::scatterlist::{Owned, SGTable}; + +use crate::dma::DmaObject; +use crate::firmware::riscv::RiscvFirmware; +use crate::gpu::{Architecture, Chipset}; +use crate::gsp::GSP_PAGE_SIZE; + +/// Ad-hoc and temporary module to extract sections from ELF images. +/// +/// Some firmware images are currently packaged as ELF files, where sections names are used as keys +/// to specific and related bits of data. Future firmware versions are scheduled to move away from +/// that scheme before nova-core becomes stable, which means this module will eventually be +/// removed. +mod elf { + use core::mem::size_of; + + use kernel::bindings; + use kernel::str::CStr; + use kernel::transmute::FromBytes; + + /// Newtype to provide a [`FromBytes`] implementation. + #[repr(transparent)] + struct Elf64Hdr(bindings::elf64_hdr); + // SAFETY: all bit patterns are valid for this type, and it doesn't use interior mutability. + unsafe impl FromBytes for Elf64Hdr {} + + #[repr(transparent)] + struct Elf64SHdr(bindings::elf64_shdr); + // SAFETY: all bit patterns are valid for this type, and it doesn't use interior mutability. + unsafe impl FromBytes for Elf64SHdr {} + + /// Tries to extract section with name `name` from the ELF64 image `elf`, and returns it. + pub(super) fn elf64_section<'a, 'b>(elf: &'a [u8], name: &'b str) -> Option<&'a [u8]> { + let hdr = &elf + .get(0..size_of::<bindings::elf64_hdr>()) + .and_then(Elf64Hdr::from_bytes)? + .0; + + // Get all the section headers. + let mut shdr = { + let shdr_num = usize::from(hdr.e_shnum); + let shdr_start = usize::try_from(hdr.e_shoff).ok()?; + let shdr_end = shdr_num + .checked_mul(size_of::<Elf64SHdr>()) + .and_then(|v| v.checked_add(shdr_start))?; + + elf.get(shdr_start..shdr_end) + .map(|slice| slice.chunks_exact(size_of::<Elf64SHdr>()))? + }; + + // Get the strings table. + let strhdr = shdr + .clone() + .nth(usize::from(hdr.e_shstrndx)) + .and_then(Elf64SHdr::from_bytes)?; + + // Find the section which name matches `name` and return it. + shdr.find(|&sh| { + let Some(hdr) = Elf64SHdr::from_bytes(sh) else { + return false; + }; + + let Some(name_idx) = strhdr + .0 + .sh_offset + .checked_add(u64::from(hdr.0.sh_name)) + .and_then(|idx| usize::try_from(idx).ok()) + else { + return false; + }; + + // Get the start of the name. + elf.get(name_idx..) + // Stop at the first `0`. + .and_then(|nstr| nstr.get(0..=nstr.iter().position(|b| *b == 0)?)) + // Convert into CStr. This should never fail because of the line above. + .and_then(|nstr| CStr::from_bytes_with_nul(nstr).ok()) + // Convert into str. + .and_then(|c_str| c_str.to_str().ok()) + // Check that the name matches. + .map(|str| str == name) + .unwrap_or(false) + }) + // Return the slice containing the section. + .and_then(|sh| { + let hdr = Elf64SHdr::from_bytes(sh)?; + let start = usize::try_from(hdr.0.sh_offset).ok()?; + let end = usize::try_from(hdr.0.sh_size) + .ok() + .and_then(|sh_size| start.checked_add(sh_size))?; + + elf.get(start..end) + }) + } +} + +/// GSP firmware with 3-level radix page tables for the GSP bootloader. +/// +/// The bootloader expects firmware to be mapped starting at address 0 in GSP's virtual address +/// space: +/// +/// ```text +/// Level 0: 1 page, 1 entry -> points to first level 1 page +/// Level 1: Multiple pages/entries -> each entry points to a level 2 page +/// Level 2: Multiple pages/entries -> each entry points to a firmware page +/// ``` +/// +/// Each page is 4KB, each entry is 8 bytes (64-bit DMA address). +/// Also known as "Radix3" firmware. +#[pin_data] +pub(crate) struct GspFirmware { + /// The GSP firmware inside a [`VVec`], device-mapped via a SG table. + #[pin] + fw: SGTable<Owned<VVec<u8>>>, + /// Level 2 page table whose entries contain DMA addresses of firmware pages. + #[pin] + level2: SGTable<Owned<VVec<u8>>>, + /// Level 1 page table whose entries contain DMA addresses of level 2 pages. + #[pin] + level1: SGTable<Owned<VVec<u8>>>, + /// Level 0 page table (single 4KB page) with one entry: DMA address of first level 1 page. + level0: DmaObject, + /// Size in bytes of the firmware contained in [`Self::fw`]. + size: usize, + /// Device-mapped GSP signatures matching the GPU's [`Chipset`]. + signatures: DmaObject, + /// GSP bootloader, verifies the GSP firmware before loading and running it. + bootloader: RiscvFirmware, +} + +impl GspFirmware { + /// Loads the GSP firmware binaries, map them into `dev`'s address-space, and creates the page + /// tables expected by the GSP bootloader to load it. + pub(crate) fn new<'a, 'b>( + dev: &'a device::Device<device::Bound>, + chipset: Chipset, + ver: &'b str, + ) -> Result<impl PinInit<Self, Error> + 'a> { + let fw = super::request_firmware(dev, chipset, "gsp", ver)?; + + let fw_section = elf::elf64_section(fw.data(), ".fwimage").ok_or(EINVAL)?; + + let sigs_section = match chipset.arch() { + Architecture::Ampere => ".fwsignature_ga10x", + _ => return Err(ENOTSUPP), + }; + let signatures = elf::elf64_section(fw.data(), sigs_section) + .ok_or(EINVAL) + .and_then(|data| DmaObject::from_data(dev, data))?; + + let size = fw_section.len(); + + // Move the firmware into a vmalloc'd vector and map it into the device address + // space. + let fw_vvec = VVec::with_capacity(fw_section.len(), GFP_KERNEL) + .and_then(|mut v| { + v.extend_from_slice(fw_section, GFP_KERNEL)?; + Ok(v) + }) + .map_err(|_| ENOMEM)?; + + let bl = super::request_firmware(dev, chipset, "bootloader", ver)?; + let bootloader = RiscvFirmware::new(dev, &bl)?; + + Ok(try_pin_init!(Self { + fw <- SGTable::new(dev, fw_vvec, DataDirection::ToDevice, GFP_KERNEL), + level2 <- { + // Allocate the level 2 page table, map the firmware onto it, and map it into the + // device address space. + VVec::<u8>::with_capacity( + fw.iter().count() * core::mem::size_of::<u64>(), + GFP_KERNEL, + ) + .map_err(|_| ENOMEM) + .and_then(|level2| map_into_lvl(&fw, level2)) + .map(|level2| SGTable::new(dev, level2, DataDirection::ToDevice, GFP_KERNEL))? + }, + level1 <- { + // Allocate the level 1 page table, map the level 2 page table onto it, and map it + // into the device address space. + VVec::<u8>::with_capacity( + level2.iter().count() * core::mem::size_of::<u64>(), + GFP_KERNEL, + ) + .map_err(|_| ENOMEM) + .and_then(|level1| map_into_lvl(&level2, level1)) + .map(|level1| SGTable::new(dev, level1, DataDirection::ToDevice, GFP_KERNEL))? + }, + level0: { + // Allocate the level 0 page table as a device-visible DMA object, and map the + // level 1 page table onto it. + + // Level 0 page table data. + let mut level0_data = kvec![0u8; GSP_PAGE_SIZE]?; + + // Fill level 1 page entry. + #[allow(clippy::useless_conversion)] + let level1_entry = u64::from(level1.iter().next().unwrap().dma_address()); + let dst = &mut level0_data[..size_of_val(&level1_entry)]; + dst.copy_from_slice(&level1_entry.to_le_bytes()); + + // Turn the level0 page table into a [`DmaObject`]. + DmaObject::from_data(dev, &level0_data)? + }, + size, + signatures, + bootloader, + })) + } + + #[expect(unused)] + /// Returns the DMA handle of the radix3 level 0 page table. + pub(crate) fn radix3_dma_handle(&self) -> DmaAddress { + self.level0.dma_handle() + } +} + +/// Build a page table from a scatter-gather list. +/// +/// Takes each DMA-mapped region from `sg_table` and writes page table entries +/// for all 4KB pages within that region. For example, a 16KB SG entry becomes +/// 4 consecutive page table entries. +fn map_into_lvl(sg_table: &SGTable<Owned<VVec<u8>>>, mut dst: VVec<u8>) -> Result<VVec<u8>> { + for sg_entry in sg_table.iter() { + // Number of pages we need to map. + let num_pages = (sg_entry.dma_len() as usize).div_ceil(GSP_PAGE_SIZE); + + for i in 0..num_pages { + let entry = sg_entry.dma_address() + (i as u64 * GSP_PAGE_SIZE as u64); + dst.extend_from_slice(&entry.to_le_bytes(), GFP_KERNEL)?; + } + } + + Ok(dst) +} diff --git a/drivers/gpu/nova-core/firmware/riscv.rs b/drivers/gpu/nova-core/firmware/riscv.rs new file mode 100644 index 000000000000..afb08f5bc4ba --- /dev/null +++ b/drivers/gpu/nova-core/firmware/riscv.rs @@ -0,0 +1,91 @@ +// SPDX-License-Identifier: GPL-2.0 + +//! Support for firmware binaries designed to run on a RISC-V core. Such firmwares files have a +//! dedicated header. + +use core::mem::size_of; + +use kernel::device; +use kernel::firmware::Firmware; +use kernel::prelude::*; +use kernel::transmute::FromBytes; + +use crate::dma::DmaObject; +use crate::firmware::BinFirmware; + +/// Descriptor for microcode running on a RISC-V core. +#[repr(C)] +#[derive(Debug)] +struct RmRiscvUCodeDesc { + version: u32, + bootloader_offset: u32, + bootloader_size: u32, + bootloader_param_offset: u32, + bootloader_param_size: u32, + riscv_elf_offset: u32, + riscv_elf_size: u32, + app_version: u32, + manifest_offset: u32, + manifest_size: u32, + monitor_data_offset: u32, + monitor_data_size: u32, + monitor_code_offset: u32, + monitor_code_size: u32, +} + +// SAFETY: all bit patterns are valid for this type, and it doesn't use interior mutability. +unsafe impl FromBytes for RmRiscvUCodeDesc {} + +impl RmRiscvUCodeDesc { + /// Interprets the header of `bin_fw` as a [`RmRiscvUCodeDesc`] and returns it. + /// + /// Fails if the header pointed at by `bin_fw` is not within the bounds of the firmware image. + fn new(bin_fw: &BinFirmware<'_>) -> Result<Self> { + let offset = bin_fw.hdr.header_offset as usize; + + bin_fw + .fw + .get(offset..offset + size_of::<Self>()) + .and_then(Self::from_bytes_copy) + .ok_or(EINVAL) + } +} + +/// A parsed firmware for a RISC-V core, ready to be loaded and run. +#[expect(unused)] +pub(crate) struct RiscvFirmware { + /// Offset at which the code starts in the firmware image. + code_offset: u32, + /// Offset at which the data starts in the firmware image. + data_offset: u32, + /// Offset at which the manifest starts in the firmware image. + manifest_offset: u32, + /// Application version. + app_version: u32, + /// Device-mapped firmware image. + ucode: DmaObject, +} + +impl RiscvFirmware { + /// Parses the RISC-V firmware image contained in `fw`. + pub(crate) fn new(dev: &device::Device<device::Bound>, fw: &Firmware) -> Result<Self> { + let bin_fw = BinFirmware::new(fw)?; + + let riscv_desc = RmRiscvUCodeDesc::new(&bin_fw)?; + + let ucode = { + let start = bin_fw.hdr.data_offset as usize; + let len = bin_fw.hdr.data_size as usize; + + DmaObject::from_data(dev, fw.data().get(start..start + len).ok_or(EINVAL)?)? + }; + + Ok(Self { + ucode, + code_offset: riscv_desc.monitor_code_offset, + data_offset: riscv_desc.monitor_data_offset, + manifest_offset: riscv_desc.manifest_offset, + app_version: riscv_desc.app_version, + }) + } +} diff --git a/drivers/gpu/nova-core/gpu.rs b/drivers/gpu/nova-core/gpu.rs index b5c9786619a9..5da9ad726483 100644 --- a/drivers/gpu/nova-core/gpu.rs +++ b/drivers/gpu/nova-core/gpu.rs @@ -3,15 +3,11 @@ use kernel::{device, devres::Devres, error::code::*, pci, prelude::*, sync::Arc}; use crate::driver::Bar0; -use crate::falcon::{gsp::Gsp, sec2::Sec2, Falcon}; -use crate::fb::FbLayout; +use crate::falcon::{gsp::Gsp as GspFalcon, sec2::Sec2 as Sec2Falcon, Falcon}; use crate::fb::SysmemFlush; -use crate::firmware::fwsec::{FwsecCommand, FwsecFirmware}; -use crate::firmware::{Firmware, FIRMWARE_VERSION}; use crate::gfw; +use crate::gsp::Gsp; use crate::regs; -use crate::util; -use crate::vbios::Vbios; use core::fmt; macro_rules! define_chipset { @@ -28,13 +24,23 @@ macro_rules! define_chipset { $( Chipset::$variant, )* ]; - pub(crate) const NAMES: [&'static str; Self::ALL.len()] = [ - $( util::const_bytes_to_str( - util::to_lowercase_bytes::<{ stringify!($variant).len() }>( - stringify!($variant) - ).as_slice() - ), )* - ]; + ::kernel::macros::paste!( + /// Returns the name of this chipset, in lowercase. + /// + /// # Examples + /// + /// ``` + /// let chipset = Chipset::GA102; + /// assert_eq!(chipset.name(), "ga102"); + /// ``` + pub(crate) const fn name(&self) -> &'static str { + match *self { + $( + Chipset::$variant => stringify!([<$variant:lower>]), + )* + } + } + ); } // TODO[FPRI]: replace with something like derive(FromPrimitive) @@ -163,150 +169,74 @@ impl Spec { } /// Structure holding the resources required to operate the GPU. -#[pin_data(PinnedDrop)] +#[pin_data] pub(crate) struct Gpu { spec: Spec, /// MMIO mapping of PCI BAR 0 bar: Arc<Devres<Bar0>>, - fw: Firmware, /// System memory page required for flushing all pending GPU-side memory writes done through /// PCIE into system memory, via sysmembar (A GPU-initiated HW memory-barrier operation). sysmem_flush: SysmemFlush, -} - -#[pinned_drop] -impl PinnedDrop for Gpu { - fn drop(self: Pin<&mut Self>) { - // Unregister the sysmem flush page before we release it. - self.bar - .try_access_with(|b| self.sysmem_flush.unregister(b)); - } + /// GSP falcon instance, used for GSP boot up and cleanup. + gsp_falcon: Falcon<GspFalcon>, + /// SEC2 falcon instance, used for GSP boot up and cleanup. + sec2_falcon: Falcon<Sec2Falcon>, + /// GSP runtime data. Temporarily an empty placeholder. + #[pin] + gsp: Gsp, } impl Gpu { - /// Helper function to load and run the FWSEC-FRTS firmware and confirm that it has properly - /// created the WPR2 region. - /// - /// TODO: this needs to be moved into a larger type responsible for booting the whole GSP - /// (`GspBooter`?). - fn run_fwsec_frts( - dev: &device::Device<device::Bound>, - falcon: &Falcon<Gsp>, - bar: &Bar0, - bios: &Vbios, - fb_layout: &FbLayout, - ) -> Result<()> { - // Check that the WPR2 region does not already exists - if it does, we cannot run - // FWSEC-FRTS until the GPU is reset. - if regs::NV_PFB_PRI_MMU_WPR2_ADDR_HI::read(bar).higher_bound() != 0 { - dev_err!( - dev, - "WPR2 region already exists - GPU needs to be reset to proceed\n" - ); - return Err(EBUSY); - } + pub(crate) fn new<'a>( + pdev: &'a pci::Device<device::Bound>, + devres_bar: Arc<Devres<Bar0>>, + bar: &'a Bar0, + ) -> impl PinInit<Self, Error> + 'a { + try_pin_init!(Self { + spec: Spec::new(bar).inspect(|spec| { + dev_info!( + pdev.as_ref(), + "NVIDIA (Chipset: {}, Architecture: {:?}, Revision: {})\n", + spec.chipset, + spec.chipset.arch(), + spec.revision + ); + })?, - let fwsec_frts = FwsecFirmware::new( - dev, - falcon, - bar, - bios, - FwsecCommand::Frts { - frts_addr: fb_layout.frts.start, - frts_size: fb_layout.frts.end - fb_layout.frts.start, + // We must wait for GFW_BOOT completion before doing any significant setup on the GPU. + _: { + gfw::wait_gfw_boot_completion(bar) + .inspect_err(|_| dev_err!(pdev.as_ref(), "GFW boot did not complete"))?; }, - )?; - // Run FWSEC-FRTS to create the WPR2 region. - fwsec_frts.run(dev, falcon, bar)?; + sysmem_flush: SysmemFlush::register(pdev.as_ref(), bar, spec.chipset)?, - // SCRATCH_E contains the error code for FWSEC-FRTS. - let frts_status = regs::NV_PBUS_SW_SCRATCH_0E::read(bar).frts_err_code(); - if frts_status != 0 { - dev_err!( - dev, - "FWSEC-FRTS returned with error code {:#x}", - frts_status - ); + gsp_falcon: Falcon::new( + pdev.as_ref(), + spec.chipset, + bar, + spec.chipset > Chipset::GA100, + ) + .inspect(|falcon| falcon.clear_swgen0_intr(bar))?, - return Err(EIO); - } + sec2_falcon: Falcon::new(pdev.as_ref(), spec.chipset, bar, true)?, - // Check that the WPR2 region has been created as we requested. - let (wpr2_lo, wpr2_hi) = ( - regs::NV_PFB_PRI_MMU_WPR2_ADDR_LO::read(bar).lower_bound(), - regs::NV_PFB_PRI_MMU_WPR2_ADDR_HI::read(bar).higher_bound(), - ); + gsp <- Gsp::new(), - match (wpr2_lo, wpr2_hi) { - (_, 0) => { - dev_err!(dev, "WPR2 region not created after running FWSEC-FRTS\n"); + _: { gsp.boot(pdev, bar, spec.chipset, gsp_falcon, sec2_falcon)? }, - Err(EIO) - } - (wpr2_lo, _) if wpr2_lo != fb_layout.frts.start => { - dev_err!( - dev, - "WPR2 region created at unexpected address {:#x}; expected {:#x}\n", - wpr2_lo, - fb_layout.frts.start, - ); - - Err(EIO) - } - (wpr2_lo, wpr2_hi) => { - dev_dbg!(dev, "WPR2: {:#x}-{:#x}\n", wpr2_lo, wpr2_hi); - dev_dbg!(dev, "GPU instance built\n"); - - Ok(()) - } - } + bar: devres_bar, + }) } - pub(crate) fn new( - pdev: &pci::Device<device::Bound>, - devres_bar: Arc<Devres<Bar0>>, - ) -> Result<impl PinInit<Self>> { - let bar = devres_bar.access(pdev.as_ref())?; - let spec = Spec::new(bar)?; - let fw = Firmware::new(pdev.as_ref(), spec.chipset, FIRMWARE_VERSION)?; - - dev_info!( - pdev.as_ref(), - "NVIDIA (Chipset: {}, Architecture: {:?}, Revision: {})\n", - spec.chipset, - spec.chipset.arch(), - spec.revision - ); - - // We must wait for GFW_BOOT completion before doing any significant setup on the GPU. - gfw::wait_gfw_boot_completion(bar) - .inspect_err(|_| dev_err!(pdev.as_ref(), "GFW boot did not complete"))?; - - let sysmem_flush = SysmemFlush::register(pdev.as_ref(), bar, spec.chipset)?; - - let gsp_falcon = Falcon::<Gsp>::new( - pdev.as_ref(), - spec.chipset, - bar, - spec.chipset > Chipset::GA100, - )?; - gsp_falcon.clear_swgen0_intr(bar); - - let _sec2_falcon = Falcon::<Sec2>::new(pdev.as_ref(), spec.chipset, bar, true)?; - - let fb_layout = FbLayout::new(spec.chipset, bar)?; - dev_dbg!(pdev.as_ref(), "{:#x?}\n", fb_layout); - - let bios = Vbios::new(pdev, bar)?; - - Self::run_fwsec_frts(pdev.as_ref(), &gsp_falcon, bar, &bios, &fb_layout)?; - - Ok(pin_init!(Self { - spec, - bar: devres_bar, - fw, - sysmem_flush, - })) + /// Called when the corresponding [`Device`](device::Device) is unbound. + /// + /// Note: This method must only be called from `Driver::unbind`. + pub(crate) fn unbind(&self, dev: &device::Device<device::Core>) { + kernel::warn_on!(self + .bar + .access(dev) + .inspect(|bar| self.sysmem_flush.unregister(bar)) + .is_err()); } } diff --git a/drivers/gpu/nova-core/gsp.rs b/drivers/gpu/nova-core/gsp.rs new file mode 100644 index 000000000000..64e472e7a9d3 --- /dev/null +++ b/drivers/gpu/nova-core/gsp.rs @@ -0,0 +1,22 @@ +// SPDX-License-Identifier: GPL-2.0 + +mod boot; + +use kernel::prelude::*; + +mod fw; + +pub(crate) const GSP_PAGE_SHIFT: usize = 12; +pub(crate) const GSP_PAGE_SIZE: usize = 1 << GSP_PAGE_SHIFT; + +/// GSP runtime data. +/// +/// This is an empty pinned placeholder for now. +#[pin_data] +pub(crate) struct Gsp {} + +impl Gsp { + pub(crate) fn new() -> impl PinInit<Self> { + pin_init!(Self {}) + } +} diff --git a/drivers/gpu/nova-core/gsp/boot.rs b/drivers/gpu/nova-core/gsp/boot.rs new file mode 100644 index 000000000000..2800f3aee37d --- /dev/null +++ b/drivers/gpu/nova-core/gsp/boot.rs @@ -0,0 +1,137 @@ +// SPDX-License-Identifier: GPL-2.0 + +use kernel::device; +use kernel::pci; +use kernel::prelude::*; + +use crate::driver::Bar0; +use crate::falcon::{gsp::Gsp, sec2::Sec2, Falcon}; +use crate::fb::FbLayout; +use crate::firmware::{ + booter::{BooterFirmware, BooterKind}, + fwsec::{FwsecCommand, FwsecFirmware}, + gsp::GspFirmware, + FIRMWARE_VERSION, +}; +use crate::gpu::Chipset; +use crate::regs; +use crate::vbios::Vbios; + +impl super::Gsp { + /// Helper function to load and run the FWSEC-FRTS firmware and confirm that it has properly + /// created the WPR2 region. + fn run_fwsec_frts( + dev: &device::Device<device::Bound>, + falcon: &Falcon<Gsp>, + bar: &Bar0, + bios: &Vbios, + fb_layout: &FbLayout, + ) -> Result<()> { + // Check that the WPR2 region does not already exists - if it does, we cannot run + // FWSEC-FRTS until the GPU is reset. + if regs::NV_PFB_PRI_MMU_WPR2_ADDR_HI::read(bar).higher_bound() != 0 { + dev_err!( + dev, + "WPR2 region already exists - GPU needs to be reset to proceed\n" + ); + return Err(EBUSY); + } + + let fwsec_frts = FwsecFirmware::new( + dev, + falcon, + bar, + bios, + FwsecCommand::Frts { + frts_addr: fb_layout.frts.start, + frts_size: fb_layout.frts.end - fb_layout.frts.start, + }, + )?; + + // Run FWSEC-FRTS to create the WPR2 region. + fwsec_frts.run(dev, falcon, bar)?; + + // SCRATCH_E contains the error code for FWSEC-FRTS. + let frts_status = regs::NV_PBUS_SW_SCRATCH_0E_FRTS_ERR::read(bar).frts_err_code(); + if frts_status != 0 { + dev_err!( + dev, + "FWSEC-FRTS returned with error code {:#x}", + frts_status + ); + + return Err(EIO); + } + + // Check that the WPR2 region has been created as we requested. + let (wpr2_lo, wpr2_hi) = ( + regs::NV_PFB_PRI_MMU_WPR2_ADDR_LO::read(bar).lower_bound(), + regs::NV_PFB_PRI_MMU_WPR2_ADDR_HI::read(bar).higher_bound(), + ); + + match (wpr2_lo, wpr2_hi) { + (_, 0) => { + dev_err!(dev, "WPR2 region not created after running FWSEC-FRTS\n"); + + Err(EIO) + } + (wpr2_lo, _) if wpr2_lo != fb_layout.frts.start => { + dev_err!( + dev, + "WPR2 region created at unexpected address {:#x}; expected {:#x}\n", + wpr2_lo, + fb_layout.frts.start, + ); + + Err(EIO) + } + (wpr2_lo, wpr2_hi) => { + dev_dbg!(dev, "WPR2: {:#x}-{:#x}\n", wpr2_lo, wpr2_hi); + dev_dbg!(dev, "GPU instance built\n"); + + Ok(()) + } + } + } + + /// Attempt to boot the GSP. + /// + /// This is a GPU-dependent and complex procedure that involves loading firmware files from + /// user-space, patching them with signatures, and building firmware-specific intricate data + /// structures that the GSP will use at runtime. + /// + /// Upon return, the GSP is up and running, and its runtime object given as return value. + pub(crate) fn boot( + self: Pin<&mut Self>, + pdev: &pci::Device<device::Bound>, + bar: &Bar0, + chipset: Chipset, + gsp_falcon: &Falcon<Gsp>, + sec2_falcon: &Falcon<Sec2>, + ) -> Result { + let dev = pdev.as_ref(); + + let bios = Vbios::new(dev, bar)?; + + let _gsp_fw = KBox::pin_init( + GspFirmware::new(dev, chipset, FIRMWARE_VERSION)?, + GFP_KERNEL, + )?; + + let fb_layout = FbLayout::new(chipset, bar)?; + dev_dbg!(dev, "{:#x?}\n", fb_layout); + + Self::run_fwsec_frts(dev, gsp_falcon, bar, &bios, &fb_layout)?; + + let _booter_loader = BooterFirmware::new( + dev, + BooterKind::Loader, + chipset, + FIRMWARE_VERSION, + sec2_falcon, + bar, + )?; + + Ok(()) + } +} diff --git a/drivers/gpu/nova-core/gsp/fw.rs b/drivers/gpu/nova-core/gsp/fw.rs new file mode 100644 index 000000000000..34226dd00982 --- /dev/null +++ b/drivers/gpu/nova-core/gsp/fw.rs @@ -0,0 +1,7 @@ +// SPDX-License-Identifier: GPL-2.0 + +mod r570_144; + +// Alias to avoid repeating the version number with every use. +#[expect(unused)] +use r570_144 as bindings; diff --git a/drivers/gpu/nova-core/gsp/fw/r570_144.rs b/drivers/gpu/nova-core/gsp/fw/r570_144.rs new file mode 100644 index 000000000000..35cb0370a7c9 --- /dev/null +++ b/drivers/gpu/nova-core/gsp/fw/r570_144.rs @@ -0,0 +1,29 @@ +// SPDX-License-Identifier: GPL-2.0 + +//! Firmware bindings. +//! +//! Imports the generated bindings by `bindgen`. +//! +//! This module may not be directly used. Please abstract or re-export the needed symbols in the +//! parent module instead. + +#![cfg_attr(test, allow(deref_nullptr))] +#![cfg_attr(test, allow(unaligned_references))] +#![cfg_attr(test, allow(unsafe_op_in_unsafe_fn))] +#![allow( + dead_code, + unused_imports, + clippy::all, + clippy::undocumented_unsafe_blocks, + clippy::ptr_as_ptr, + clippy::ref_as_ptr, + missing_docs, + non_camel_case_types, + non_upper_case_globals, + non_snake_case, + improper_ctypes, + unreachable_pub, + unsafe_op_in_unsafe_fn +)] +use kernel::ffi; +include!("r570_144/bindings.rs"); diff --git a/drivers/gpu/nova-core/gsp/fw/r570_144/bindings.rs b/drivers/gpu/nova-core/gsp/fw/r570_144/bindings.rs new file mode 100644 index 000000000000..cec594032515 --- /dev/null +++ b/drivers/gpu/nova-core/gsp/fw/r570_144/bindings.rs @@ -0,0 +1 @@ +// SPDX-License-Identifier: GPL-2.0 diff --git a/drivers/gpu/nova-core/nova_core.rs b/drivers/gpu/nova-core/nova_core.rs index cb2bbb30cba1..fffcaee2249f 100644 --- a/drivers/gpu/nova-core/nova_core.rs +++ b/drivers/gpu/nova-core/nova_core.rs @@ -9,6 +9,7 @@ mod fb; mod firmware; mod gfw; mod gpu; +mod gsp; mod regs; mod util; mod vbios; diff --git a/drivers/gpu/nova-core/regs.rs b/drivers/gpu/nova-core/regs.rs index d49fddf6a3c6..206dab2e1335 100644 --- a/drivers/gpu/nova-core/regs.rs +++ b/drivers/gpu/nova-core/regs.rs @@ -5,11 +5,11 @@ #![allow(non_camel_case_types)] #[macro_use] -mod macros; +pub(crate) mod macros; use crate::falcon::{ DmaTrfCmdSize, FalconCoreRev, FalconCoreRevSubversion, FalconFbifMemType, FalconFbifTarget, - FalconModSelAlgo, FalconSecurityModel, PeregrineCoreSelect, + FalconModSelAlgo, FalconSecurityModel, PFalcon2Base, PFalconBase, PeregrineCoreSelect, }; use crate::gpu::{Architecture, Chipset}; use kernel::prelude::*; @@ -28,7 +28,7 @@ impl NV_PMC_BOOT_0 { /// Combines `architecture_0` and `architecture_1` to obtain the architecture of the chip. pub(crate) fn architecture(self) -> Result<Architecture> { Architecture::try_from( - self.architecture_0() | (self.architecture_1() << Self::ARCHITECTURE_0.len()), + self.architecture_0() | (self.architecture_1() << Self::ARCHITECTURE_0_RANGE.len()), ) } @@ -36,7 +36,8 @@ impl NV_PMC_BOOT_0 { pub(crate) fn chipset(self) -> Result<Chipset> { self.architecture() .map(|arch| { - ((arch as u32) << Self::IMPLEMENTATION.len()) | u32::from(self.implementation()) + ((arch as u32) << Self::IMPLEMENTATION_RANGE.len()) + | u32::from(self.implementation()) }) .and_then(Chipset::try_from) } @@ -44,8 +45,10 @@ impl NV_PMC_BOOT_0 { // PBUS -// TODO[REGA]: this is an array of registers. -register!(NV_PBUS_SW_SCRATCH_0E@0x00001438 { +register!(NV_PBUS_SW_SCRATCH @ 0x00001400[64] {}); + +register!(NV_PBUS_SW_SCRATCH_0E_FRTS_ERR => NV_PBUS_SW_SCRATCH[0xe], + "scratch register 0xe used as FRTS firmware error code" { 31:16 frts_err_code as u16; }); @@ -123,13 +126,12 @@ register!(NV_PGC6_AON_SECURE_SCRATCH_GROUP_05_PRIV_LEVEL_MASK @ 0x00118128, 0:0 read_protection_level0 as bool, "Set after FWSEC lowers its protection level"; }); -// TODO[REGA]: This is an array of registers. -register!(NV_PGC6_AON_SECURE_SCRATCH_GROUP_05 @ 0x00118234 { - 31:0 value as u32; -}); +// OpenRM defines this as a register array, but doesn't specify its size and only uses its first +// element. Be conservative until we know the actual size or need to use more registers. +register!(NV_PGC6_AON_SECURE_SCRATCH_GROUP_05 @ 0x00118234[1] {}); register!( - NV_PGC6_AON_SECURE_SCRATCH_GROUP_05_0_GFW_BOOT => NV_PGC6_AON_SECURE_SCRATCH_GROUP_05, + NV_PGC6_AON_SECURE_SCRATCH_GROUP_05_0_GFW_BOOT => NV_PGC6_AON_SECURE_SCRATCH_GROUP_05[0], "Scratch group 05 register 0 used as GFW boot progress indicator" { 7:0 progress as u8, "Progress of GFW boot (0xff means completed)"; } @@ -180,38 +182,40 @@ impl NV_PDISP_VGA_WORKSPACE_BASE { // FUSE -register!(NV_FUSE_OPT_FPF_NVDEC_UCODE1_VERSION @ 0x00824100 { +pub(crate) const NV_FUSE_OPT_FPF_SIZE: usize = 16; + +register!(NV_FUSE_OPT_FPF_NVDEC_UCODE1_VERSION @ 0x00824100[NV_FUSE_OPT_FPF_SIZE] { 15:0 data as u16; }); -register!(NV_FUSE_OPT_FPF_SEC2_UCODE1_VERSION @ 0x00824140 { +register!(NV_FUSE_OPT_FPF_SEC2_UCODE1_VERSION @ 0x00824140[NV_FUSE_OPT_FPF_SIZE] { 15:0 data as u16; }); -register!(NV_FUSE_OPT_FPF_GSP_UCODE1_VERSION @ 0x008241c0 { +register!(NV_FUSE_OPT_FPF_GSP_UCODE1_VERSION @ 0x008241c0[NV_FUSE_OPT_FPF_SIZE] { 15:0 data as u16; }); // PFALCON -register!(NV_PFALCON_FALCON_IRQSCLR @ +0x00000004 { +register!(NV_PFALCON_FALCON_IRQSCLR @ PFalconBase[0x00000004] { 4:4 halt as bool; 6:6 swgen0 as bool; }); -register!(NV_PFALCON_FALCON_MAILBOX0 @ +0x00000040 { +register!(NV_PFALCON_FALCON_MAILBOX0 @ PFalconBase[0x00000040] { 31:0 value as u32; }); -register!(NV_PFALCON_FALCON_MAILBOX1 @ +0x00000044 { +register!(NV_PFALCON_FALCON_MAILBOX1 @ PFalconBase[0x00000044] { 31:0 value as u32; }); -register!(NV_PFALCON_FALCON_RM @ +0x00000084 { +register!(NV_PFALCON_FALCON_RM @ PFalconBase[0x00000084] { 31:0 value as u32; }); -register!(NV_PFALCON_FALCON_HWCFG2 @ +0x000000f4 { +register!(NV_PFALCON_FALCON_HWCFG2 @ PFalconBase[0x000000f4] { 10:10 riscv as bool; 12:12 mem_scrubbing as bool, "Set to 0 after memory scrubbing is completed"; 31:31 reset_ready as bool, "Signal indicating that reset is completed (GA102+)"; @@ -224,17 +228,17 @@ impl NV_PFALCON_FALCON_HWCFG2 { } } -register!(NV_PFALCON_FALCON_CPUCTL @ +0x00000100 { +register!(NV_PFALCON_FALCON_CPUCTL @ PFalconBase[0x00000100] { 1:1 startcpu as bool; 4:4 halted as bool; 6:6 alias_en as bool; }); -register!(NV_PFALCON_FALCON_BOOTVEC @ +0x00000104 { +register!(NV_PFALCON_FALCON_BOOTVEC @ PFalconBase[0x00000104] { 31:0 value as u32; }); -register!(NV_PFALCON_FALCON_DMACTL @ +0x0000010c { +register!(NV_PFALCON_FALCON_DMACTL @ PFalconBase[0x0000010c] { 0:0 require_ctx as bool; 1:1 dmem_scrubbing as bool; 2:2 imem_scrubbing as bool; @@ -242,15 +246,15 @@ register!(NV_PFALCON_FALCON_DMACTL @ +0x0000010c { 7:7 secure_stat as bool; }); -register!(NV_PFALCON_FALCON_DMATRFBASE @ +0x00000110 { +register!(NV_PFALCON_FALCON_DMATRFBASE @ PFalconBase[0x00000110] { 31:0 base as u32; }); -register!(NV_PFALCON_FALCON_DMATRFMOFFS @ +0x00000114 { +register!(NV_PFALCON_FALCON_DMATRFMOFFS @ PFalconBase[0x00000114] { 23:0 offs as u32; }); -register!(NV_PFALCON_FALCON_DMATRFCMD @ +0x00000118 { +register!(NV_PFALCON_FALCON_DMATRFCMD @ PFalconBase[0x00000118] { 0:0 full as bool; 1:1 idle as bool; 3:2 sec as u8; @@ -261,60 +265,62 @@ register!(NV_PFALCON_FALCON_DMATRFCMD @ +0x00000118 { 16:16 set_dmtag as u8; }); -register!(NV_PFALCON_FALCON_DMATRFFBOFFS @ +0x0000011c { +register!(NV_PFALCON_FALCON_DMATRFFBOFFS @ PFalconBase[0x0000011c] { 31:0 offs as u32; }); -register!(NV_PFALCON_FALCON_DMATRFBASE1 @ +0x00000128 { +register!(NV_PFALCON_FALCON_DMATRFBASE1 @ PFalconBase[0x00000128] { 8:0 base as u16; }); -register!(NV_PFALCON_FALCON_HWCFG1 @ +0x0000012c { +register!(NV_PFALCON_FALCON_HWCFG1 @ PFalconBase[0x0000012c] { 3:0 core_rev as u8 ?=> FalconCoreRev, "Core revision"; 5:4 security_model as u8 ?=> FalconSecurityModel, "Security model"; 7:6 core_rev_subversion as u8 ?=> FalconCoreRevSubversion, "Core revision subversion"; }); -register!(NV_PFALCON_FALCON_CPUCTL_ALIAS @ +0x00000130 { +register!(NV_PFALCON_FALCON_CPUCTL_ALIAS @ PFalconBase[0x00000130] { 1:1 startcpu as bool; }); // Actually known as `NV_PSEC_FALCON_ENGINE` and `NV_PGSP_FALCON_ENGINE` depending on the falcon // instance. -register!(NV_PFALCON_FALCON_ENGINE @ +0x000003c0 { +register!(NV_PFALCON_FALCON_ENGINE @ PFalconBase[0x000003c0] { 0:0 reset as bool; }); -// TODO[REGA]: this is an array of registers. -register!(NV_PFALCON_FBIF_TRANSCFG @ +0x00000600 { +register!(NV_PFALCON_FBIF_TRANSCFG @ PFalconBase[0x00000600[8]] { 1:0 target as u8 ?=> FalconFbifTarget; 2:2 mem_type as bool => FalconFbifMemType; }); -register!(NV_PFALCON_FBIF_CTL @ +0x00000624 { +register!(NV_PFALCON_FBIF_CTL @ PFalconBase[0x00000624] { 7:7 allow_phys_no_ctx as bool; }); -register!(NV_PFALCON2_FALCON_MOD_SEL @ +0x00001180 { +/* PFALCON2 */ + +register!(NV_PFALCON2_FALCON_MOD_SEL @ PFalcon2Base[0x00000180] { 7:0 algo as u8 ?=> FalconModSelAlgo; }); -register!(NV_PFALCON2_FALCON_BROM_CURR_UCODE_ID @ +0x00001198 { +register!(NV_PFALCON2_FALCON_BROM_CURR_UCODE_ID @ PFalcon2Base[0x00000198] { 7:0 ucode_id as u8; }); -register!(NV_PFALCON2_FALCON_BROM_ENGIDMASK @ +0x0000119c { +register!(NV_PFALCON2_FALCON_BROM_ENGIDMASK @ PFalcon2Base[0x0000019c] { 31:0 value as u32; }); -// TODO[REGA]: this is an array of registers. -register!(NV_PFALCON2_FALCON_BROM_PARAADDR @ +0x00001210 { +// OpenRM defines this as a register array, but doesn't specify its size and only uses its first +// element. Be conservative until we know the actual size or need to use more registers. +register!(NV_PFALCON2_FALCON_BROM_PARAADDR @ PFalcon2Base[0x00000210[1]] { 31:0 value as u32; }); // PRISCV -register!(NV_PRISCV_RISCV_BCR_CTRL @ +0x00001668 { +register!(NV_PRISCV_RISCV_BCR_CTRL @ PFalconBase[0x00001668] { 0:0 valid as bool; 4:4 core_select as bool => PeregrineCoreSelect; 8:8 br_fetch as bool; diff --git a/drivers/gpu/nova-core/regs/macros.rs b/drivers/gpu/nova-core/regs/macros.rs index a3e6de1779d4..754c14ee7f40 100644 --- a/drivers/gpu/nova-core/regs/macros.rs +++ b/drivers/gpu/nova-core/regs/macros.rs @@ -1,17 +1,27 @@ // SPDX-License-Identifier: GPL-2.0 -//! Macro to define register layout and accessors. +//! `register!` macro to define register layout and accessors. //! //! A single register typically includes several fields, which are accessed through a combination //! of bit-shift and mask operations that introduce a class of potential mistakes, notably because //! not all possible field values are necessarily valid. //! -//! The macro in this module allow to define, using an intruitive and readable syntax, a dedicated -//! type for each register with its own field accessors that can return an error is a field's value -//! is invalid. +//! The `register!` macro in this module provides an intuitive and readable syntax for defining a +//! dedicated type for each register. Each such type comes with its own field accessors that can +//! return an error if a field's value is invalid. -/// Defines a dedicated type for a register with an absolute offset, alongside with getter and -/// setter methods for its fields and methods to read and write it from an `Io` region. +/// Trait providing a base address to be added to the offset of a relative register to obtain +/// its actual offset. +/// +/// The `T` generic argument is used to distinguish which base to use, in case a type provides +/// several bases. It is given to the `register!` macro to restrict the use of the register to +/// implementors of this particular variant. +pub(crate) trait RegisterBase<T> { + const BASE: usize; +} + +/// Defines a dedicated type for a register with an absolute offset, including getter and setter +/// methods for its fields and methods to read and write it from an `Io` region. /// /// Example: /// @@ -24,7 +34,7 @@ /// ``` /// /// This defines a `BOOT_0` type which can be read or written from offset `0x100` of an `Io` -/// region. It is composed of 3 fields, for instance `minor_revision` is made of the 4 less +/// region. It is composed of 3 fields, for instance `minor_revision` is made of the 4 least /// significant bits of the register. Each field can be accessed and modified using accessor /// methods: /// @@ -33,130 +43,344 @@ /// let boot0 = BOOT_0::read(&bar); /// pr_info!("chip revision: {}.{}", boot0.major_revision(), boot0.minor_revision()); /// -/// // `Chipset::try_from` will be called with the value of the field and returns an error if the -/// // value is invalid. +/// // `Chipset::try_from` is called with the value of the `chipset` field and returns an +/// // error if it is invalid. /// let chipset = boot0.chipset()?; /// /// // Update some fields and write the value back. /// boot0.set_major_revision(3).set_minor_revision(10).write(&bar); /// -/// // Or just read and update the register in a single step: +/// // Or, just read and update the register in a single step: /// BOOT_0::alter(&bar, |r| r.set_major_revision(3).set_minor_revision(10)); /// ``` /// -/// Fields can be defined as follows: +/// Fields are defined as follows: /// -/// - `as <type>` simply returns the field value casted as the requested integer type, typically -/// `u32`, `u16`, `u8` or `bool`. Note that `bool` fields must have a range of 1 bit. +/// - `as <type>` simply returns the field value casted to <type>, typically `u32`, `u16`, `u8` or +/// `bool`. Note that `bool` fields must have a range of 1 bit. /// - `as <type> => <into_type>` calls `<into_type>`'s `From::<<type>>` implementation and returns /// the result. /// - `as <type> ?=> <try_into_type>` calls `<try_into_type>`'s `TryFrom::<<type>>` implementation -/// and returns the result. This is useful on fields for which not all values are value. +/// and returns the result. This is useful with fields for which not all values are valid. /// /// The documentation strings are optional. If present, they will be added to the type's /// definition, or the field getter and setter methods they are attached to. /// -/// Putting a `+` before the address of the register makes it relative to a base: the `read` and -/// `write` methods take a `base` argument that is added to the specified address before access, -/// and `try_read` and `try_write` methods are also created, allowing access with offsets unknown -/// at compile-time: +/// It is also possible to create a alias register by using the `=> ALIAS` syntax. This is useful +/// for cases where a register's interpretation depends on the context: /// /// ```no_run -/// register!(CPU_CTL @ +0x0000010, "CPU core control" { -/// 0:0 start as bool, "Start the CPU core"; +/// register!(SCRATCH @ 0x00000200, "Scratch register" { +/// 31:0 value as u32, "Raw value"; /// }); /// -/// // Flip the `start` switch for the CPU core which base address is at `CPU_BASE`. -/// let cpuctl = CPU_CTL::read(&bar, CPU_BASE); -/// pr_info!("CPU CTL: {:#x}", cpuctl); -/// cpuctl.set_start(true).write(&bar, CPU_BASE); +/// register!(SCRATCH_BOOT_STATUS => SCRATCH, "Boot status of the firmware" { +/// 0:0 completed as bool, "Whether the firmware has completed booting"; +/// }); /// ``` /// -/// It is also possible to create a alias register by using the `=> ALIAS` syntax. This is useful -/// for cases where a register's interpretation depends on the context: +/// In this example, `SCRATCH_0_BOOT_STATUS` uses the same I/O address as `SCRATCH`, while also +/// providing its own `completed` field. +/// +/// ## Relative registers +/// +/// A register can be defined as being accessible from a fixed offset of a provided base. For +/// instance, imagine the following I/O space: +/// +/// ```text +/// +-----------------------------+ +/// | ... | +/// | | +/// 0x100--->+------------CPU0-------------+ +/// | | +/// 0x110--->+-----------------------------+ +/// | CPU_CTL | +/// +-----------------------------+ +/// | ... | +/// | | +/// | | +/// 0x200--->+------------CPU1-------------+ +/// | | +/// 0x210--->+-----------------------------+ +/// | CPU_CTL | +/// +-----------------------------+ +/// | ... | +/// +-----------------------------+ +/// ``` +/// +/// `CPU0` and `CPU1` both have a `CPU_CTL` register that starts at offset `0x10` of their I/O +/// space segment. Since both instances of `CPU_CTL` share the same layout, we don't want to define +/// them twice and would prefer a way to select which one to use from a single definition +/// +/// This can be done using the `Base[Offset]` syntax when specifying the register's address. +/// +/// `Base` is an arbitrary type (typically a ZST) to be used as a generic parameter of the +/// [`RegisterBase`] trait to provide the base as a constant, i.e. each type providing a base for +/// this register needs to implement `RegisterBase<Base>`. Here is the above example translated +/// into code: /// /// ```no_run -/// register!(SCRATCH_0 @ 0x0000100, "Scratch register 0" { -/// 31:0 value as u32, "Raw value"; +/// // Type used to identify the base. +/// pub(crate) struct CpuCtlBase; /// -/// register!(SCRATCH_0_BOOT_STATUS => SCRATCH_0, "Boot status of the firmware" { -/// 0:0 completed as bool, "Whether the firmware has completed booting"; +/// // ZST describing `CPU0`. +/// struct Cpu0; +/// impl RegisterBase<CpuCtlBase> for Cpu0 { +/// const BASE: usize = 0x100; +/// } +/// // Singleton of `CPU0` used to identify it. +/// const CPU0: Cpu0 = Cpu0; +/// +/// // ZST describing `CPU1`. +/// struct Cpu1; +/// impl RegisterBase<CpuCtlBase> for Cpu1 { +/// const BASE: usize = 0x200; +/// } +/// // Singleton of `CPU1` used to identify it. +/// const CPU1: Cpu1 = Cpu1; +/// +/// // This makes `CPU_CTL` accessible from all implementors of `RegisterBase<CpuCtlBase>`. +/// register!(CPU_CTL @ CpuCtlBase[0x10], "CPU core control" { +/// 0:0 start as bool, "Start the CPU core"; +/// }); +/// +/// // The `read`, `write` and `alter` methods of relative registers take an extra `base` argument +/// // that is used to resolve its final address by adding its `BASE` to the offset of the +/// // register. +/// +/// // Start `CPU0`. +/// CPU_CTL::alter(bar, &CPU0, |r| r.set_start(true)); +/// +/// // Start `CPU1`. +/// CPU_CTL::alter(bar, &CPU1, |r| r.set_start(true)); +/// +/// // Aliases can also be defined for relative register. +/// register!(CPU_CTL_ALIAS => CpuCtlBase[CPU_CTL], "Alias to CPU core control" { +/// 1:1 alias_start as bool, "Start the aliased CPU core"; +/// }); +/// +/// // Start the aliased `CPU0`. +/// CPU_CTL_ALIAS::alter(bar, &CPU0, |r| r.set_alias_start(true)); +/// ``` +/// +/// ## Arrays of registers +/// +/// Some I/O areas contain consecutive values that can be interpreted in the same way. These areas +/// can be defined as an array of identical registers, allowing them to be accessed by index with +/// compile-time or runtime bound checking. Simply define their address as `Address[Size]`, and add +/// an `idx` parameter to their `read`, `write` and `alter` methods: +/// +/// ```no_run +/// # fn no_run() -> Result<(), Error> { +/// # fn get_scratch_idx() -> usize { +/// # 0x15 +/// # } +/// // Array of 64 consecutive registers with the same layout starting at offset `0x80`. +/// register!(SCRATCH @ 0x00000080[64], "Scratch registers" { +/// 31:0 value as u32; +/// }); +/// +/// // Read scratch register 0, i.e. I/O address `0x80`. +/// let scratch_0 = SCRATCH::read(bar, 0).value(); +/// // Read scratch register 15, i.e. I/O address `0x80 + (15 * 4)`. +/// let scratch_15 = SCRATCH::read(bar, 15).value(); +/// +/// // This is out of bounds and won't build. +/// // let scratch_128 = SCRATCH::read(bar, 128).value(); +/// +/// // Runtime-obtained array index. +/// let scratch_idx = get_scratch_idx(); +/// // Access on a runtime index returns an error if it is out-of-bounds. +/// let some_scratch = SCRATCH::try_read(bar, scratch_idx)?.value(); +/// +/// // Alias to a particular register in an array. +/// // Here `SCRATCH[8]` is used to convey the firmware exit code. +/// register!(FIRMWARE_STATUS => SCRATCH[8], "Firmware exit status code" { +/// 7:0 status as u8; +/// }); +/// +/// let status = FIRMWARE_STATUS::read(bar).status(); +/// +/// // Non-contiguous register arrays can be defined by adding a stride parameter. +/// // Here, each of the 16 registers of the array are separated by 8 bytes, meaning that the +/// // registers of the two declarations below are interleaved. +/// register!(SCRATCH_INTERLEAVED_0 @ 0x000000c0[16 ; 8], "Scratch registers bank 0" { +/// 31:0 value as u32; +/// }); +/// register!(SCRATCH_INTERLEAVED_1 @ 0x000000c4[16 ; 8], "Scratch registers bank 1" { +/// 31:0 value as u32; +/// }); +/// # Ok(()) +/// # } /// ``` /// -/// In this example, `SCRATCH_0_BOOT_STATUS` uses the same I/O address as `SCRATCH_0`, while also -/// providing its own `completed` method. +/// ## Relative arrays of registers +/// +/// Combining the two features described in the sections above, arrays of registers accessible from +/// a base can also be defined: +/// +/// ```no_run +/// # fn no_run() -> Result<(), Error> { +/// # fn get_scratch_idx() -> usize { +/// # 0x15 +/// # } +/// // Type used as parameter of `RegisterBase` to specify the base. +/// pub(crate) struct CpuCtlBase; +/// +/// // ZST describing `CPU0`. +/// struct Cpu0; +/// impl RegisterBase<CpuCtlBase> for Cpu0 { +/// const BASE: usize = 0x100; +/// } +/// // Singleton of `CPU0` used to identify it. +/// const CPU0: Cpu0 = Cpu0; +/// +/// // ZST describing `CPU1`. +/// struct Cpu1; +/// impl RegisterBase<CpuCtlBase> for Cpu1 { +/// const BASE: usize = 0x200; +/// } +/// // Singleton of `CPU1` used to identify it. +/// const CPU1: Cpu1 = Cpu1; +/// +/// // 64 per-cpu scratch registers, arranged as an contiguous array. +/// register!(CPU_SCRATCH @ CpuCtlBase[0x00000080[64]], "Per-CPU scratch registers" { +/// 31:0 value as u32; +/// }); +/// +/// let cpu0_scratch_0 = CPU_SCRATCH::read(bar, &Cpu0, 0).value(); +/// let cpu1_scratch_15 = CPU_SCRATCH::read(bar, &Cpu1, 15).value(); +/// +/// // This won't build. +/// // let cpu0_scratch_128 = CPU_SCRATCH::read(bar, &Cpu0, 128).value(); +/// +/// // Runtime-obtained array index. +/// let scratch_idx = get_scratch_idx(); +/// // Access on a runtime value returns an error if it is out-of-bounds. +/// let cpu0_some_scratch = CPU_SCRATCH::try_read(bar, &Cpu0, scratch_idx)?.value(); +/// +/// // `SCRATCH[8]` is used to convey the firmware exit code. +/// register!(CPU_FIRMWARE_STATUS => CpuCtlBase[CPU_SCRATCH[8]], +/// "Per-CPU firmware exit status code" { +/// 7:0 status as u8; +/// }); +/// +/// let cpu0_status = CPU_FIRMWARE_STATUS::read(bar, &Cpu0).status(); +/// +/// // Non-contiguous register arrays can be defined by adding a stride parameter. +/// // Here, each of the 16 registers of the array are separated by 8 bytes, meaning that the +/// // registers of the two declarations below are interleaved. +/// register!(CPU_SCRATCH_INTERLEAVED_0 @ CpuCtlBase[0x00000d00[16 ; 8]], +/// "Scratch registers bank 0" { +/// 31:0 value as u32; +/// }); +/// register!(CPU_SCRATCH_INTERLEAVED_1 @ CpuCtlBase[0x00000d04[16 ; 8]], +/// "Scratch registers bank 1" { +/// 31:0 value as u32; +/// }); +/// # Ok(()) +/// # } +/// ``` macro_rules! register { // Creates a register at a fixed offset of the MMIO space. + ($name:ident @ $offset:literal $(, $comment:literal)? { $($fields:tt)* } ) => { + register!(@core $name $(, $comment)? { $($fields)* } ); + register!(@io_fixed $name @ $offset); + }; + + // Creates an alias register of fixed offset register `alias` with its own fields. + ($name:ident => $alias:ident $(, $comment:literal)? { $($fields:tt)* } ) => { + register!(@core $name $(, $comment)? { $($fields)* } ); + register!(@io_fixed $name @ $alias::OFFSET); + }; + + // Creates a register at a relative offset from a base address provider. + ($name:ident @ $base:ty [ $offset:literal ] $(, $comment:literal)? { $($fields:tt)* } ) => { + register!(@core $name $(, $comment)? { $($fields)* } ); + register!(@io_relative $name @ $base [ $offset ]); + }; + + // Creates an alias register of relative offset register `alias` with its own fields. + ($name:ident => $base:ty [ $alias:ident ] $(, $comment:literal)? { $($fields:tt)* }) => { + register!(@core $name $(, $comment)? { $($fields)* } ); + register!(@io_relative $name @ $base [ $alias::OFFSET ]); + }; + + // Creates an array of registers at a fixed offset of the MMIO space. ( - $name:ident @ $offset:literal $(, $comment:literal)? { + $name:ident @ $offset:literal [ $size:expr ; $stride:expr ] $(, $comment:literal)? { $($fields:tt)* } ) => { - register!(@common $name @ $offset $(, $comment)?); - register!(@field_accessors $name { $($fields)* }); - register!(@io $name @ $offset); + static_assert!(::core::mem::size_of::<u32>() <= $stride); + register!(@core $name $(, $comment)? { $($fields)* } ); + register!(@io_array $name @ $offset [ $size ; $stride ]); }; - // Creates a alias register of fixed offset register `alias` with its own fields. + // Shortcut for contiguous array of registers (stride == size of element). ( - $name:ident => $alias:ident $(, $comment:literal)? { + $name:ident @ $offset:literal [ $size:expr ] $(, $comment:literal)? { $($fields:tt)* } ) => { - register!(@common $name @ $alias::OFFSET $(, $comment)?); - register!(@field_accessors $name { $($fields)* }); - register!(@io $name @ $alias::OFFSET); + register!($name @ $offset [ $size ; ::core::mem::size_of::<u32>() ] $(, $comment)? { + $($fields)* + } ); + }; + + // Creates an array of registers at a relative offset from a base address provider. + ( + $name:ident @ $base:ty [ $offset:literal [ $size:expr ; $stride:expr ] ] + $(, $comment:literal)? { $($fields:tt)* } + ) => { + static_assert!(::core::mem::size_of::<u32>() <= $stride); + register!(@core $name $(, $comment)? { $($fields)* } ); + register!(@io_relative_array $name @ $base [ $offset [ $size ; $stride ] ]); }; - // Creates a register at a relative offset from a base address. + // Shortcut for contiguous array of relative registers (stride == size of element). ( - $name:ident @ + $offset:literal $(, $comment:literal)? { + $name:ident @ $base:ty [ $offset:literal [ $size:expr ] ] $(, $comment:literal)? { $($fields:tt)* } ) => { - register!(@common $name @ $offset $(, $comment)?); - register!(@field_accessors $name { $($fields)* }); - register!(@io$name @ + $offset); + register!($name @ $base [ $offset [ $size ; ::core::mem::size_of::<u32>() ] ] + $(, $comment)? { $($fields)* } ); }; - // Creates a alias register of relative offset register `alias` with its own fields. + // Creates an alias of register `idx` of relative array of registers `alias` with its own + // fields. ( - $name:ident => + $alias:ident $(, $comment:literal)? { + $name:ident => $base:ty [ $alias:ident [ $idx:expr ] ] $(, $comment:literal)? { $($fields:tt)* } ) => { - register!(@common $name @ $alias::OFFSET $(, $comment)?); - register!(@field_accessors $name { $($fields)* }); - register!(@io $name @ + $alias::OFFSET); + static_assert!($idx < $alias::SIZE); + register!(@core $name $(, $comment)? { $($fields)* } ); + register!(@io_relative $name @ $base [ $alias::OFFSET + $idx * $alias::STRIDE ] ); + }; + + // Creates an alias of register `idx` of array of registers `alias` with its own fields. + // This rule belongs to the (non-relative) register arrays set, but needs to be put last + // to avoid it being interpreted in place of the relative register array alias rule. + ($name:ident => $alias:ident [ $idx:expr ] $(, $comment:literal)? { $($fields:tt)* }) => { + static_assert!($idx < $alias::SIZE); + register!(@core $name $(, $comment)? { $($fields)* } ); + register!(@io_fixed $name @ $alias::OFFSET + $idx * $alias::STRIDE ); }; // All rules below are helpers. - // Defines the wrapper `$name` type, as well as its relevant implementations (`Debug`, `BitOr`, - // and conversion to regular `u32`). - (@common $name:ident @ $offset:expr $(, $comment:literal)?) => { + // Defines the wrapper `$name` type, as well as its relevant implementations (`Debug`, + // `Default`, `BitOr`, and conversion to the value type) and field accessor methods. + (@core $name:ident $(, $comment:literal)? { $($fields:tt)* }) => { $( #[doc=$comment] )? #[repr(transparent)] - #[derive(Clone, Copy, Default)] + #[derive(Clone, Copy)] pub(crate) struct $name(u32); - #[allow(dead_code)] - impl $name { - pub(crate) const OFFSET: usize = $offset; - } - - // TODO[REGA]: display the raw hex value, then the value of all the fields. This requires - // matching the fields, which will complexify the syntax considerably... - impl ::core::fmt::Debug for $name { - fn fmt(&self, f: &mut ::core::fmt::Formatter<'_>) -> ::core::fmt::Result { - f.debug_tuple(stringify!($name)) - .field(&format_args!("0x{0:x}", &self.0)) - .finish() - } - } - impl ::core::ops::BitOr for $name { type Output = Self; @@ -170,6 +394,34 @@ macro_rules! register { reg.0 } } + + register!(@fields_dispatcher $name { $($fields)* }); + }; + + // Captures the fields and passes them to all the implementers that require field information. + // + // Used to simplify the matching rules for implementers, so they don't need to match the entire + // complex fields rule even though they only make use of part of it. + (@fields_dispatcher $name:ident { + $($hi:tt:$lo:tt $field:ident as $type:tt + $(?=> $try_into_type:ty)? + $(=> $into_type:ty)? + $(, $comment:literal)? + ; + )* + } + ) => { + register!(@field_accessors $name { + $( + $hi:$lo $field as $type + $(?=> $try_into_type)? + $(=> $into_type)? + $(, $comment)? + ; + )* + }); + register!(@debug $name { $($field;)* }); + register!(@default $name { $($field;)* }); }; // Defines all the field getter/methods methods for `$name`. @@ -228,7 +480,7 @@ macro_rules! register { $(, $comment:literal)?; ) => { register!( - @leaf_accessor $name $hi:$lo $field as bool + @leaf_accessor $name $hi:$lo $field { |f| <$into_type>::from(if f != 0 { true } else { false }) } $into_type => $into_type $(, $comment)?; ); @@ -246,7 +498,7 @@ macro_rules! register { @field_accessor $name:ident $hi:tt:$lo:tt $field:ident as $type:tt ?=> $try_into_type:ty $(, $comment:literal)?; ) => { - register!(@leaf_accessor $name $hi:$lo $field as $type + register!(@leaf_accessor $name $hi:$lo $field { |f| <$try_into_type>::try_from(f as $type) } $try_into_type => ::core::result::Result< $try_into_type, @@ -260,11 +512,11 @@ macro_rules! register { @field_accessor $name:ident $hi:tt:$lo:tt $field:ident as $type:tt => $into_type:ty $(, $comment:literal)?; ) => { - register!(@leaf_accessor $name $hi:$lo $field as $type + register!(@leaf_accessor $name $hi:$lo $field { |f| <$into_type>::from(f as $type) } $into_type => $into_type $(, $comment)?;); }; - // Shortcut for fields defined as non-`bool` without the `=>` or `?=>` syntax. + // Shortcut for non-boolean fields defined without the `=>` or `?=>` syntax. ( @field_accessor $name:ident $hi:tt:$lo:tt $field:ident as $type:tt $(, $comment:literal)?; @@ -274,11 +526,11 @@ macro_rules! register { // Generates the accessor methods for a single field. ( - @leaf_accessor $name:ident $hi:tt:$lo:tt $field:ident as $type:ty + @leaf_accessor $name:ident $hi:tt:$lo:tt $field:ident { $process:expr } $to_type:ty => $res_type:ty $(, $comment:literal)?; ) => { ::kernel::macros::paste!( - const [<$field:upper>]: ::core::ops::RangeInclusive<u8> = $lo..=$hi; + const [<$field:upper _RANGE>]: ::core::ops::RangeInclusive<u8> = $lo..=$hi; const [<$field:upper _MASK>]: u32 = ((((1 << $hi) - 1) << 1) + 1) - ((1 << $lo) - 1); const [<$field:upper _SHIFT>]: u32 = Self::[<$field:upper _MASK>].trailing_zeros(); ); @@ -287,7 +539,7 @@ macro_rules! register { #[doc="Returns the value of this field:"] #[doc=$comment] )? - #[inline] + #[inline(always)] pub(crate) fn $field(self) -> $res_type { ::kernel::macros::paste!( const MASK: u32 = $name::[<$field:upper _MASK>]; @@ -303,7 +555,7 @@ macro_rules! register { #[doc="Sets the value of this field:"] #[doc=$comment] )? - #[inline] + #[inline(always)] pub(crate) fn [<set_ $field>](mut self, value: $to_type) -> Self { const MASK: u32 = $name::[<$field:upper _MASK>]; const SHIFT: u32 = $name::[<$field:upper _SHIFT>]; @@ -315,25 +567,64 @@ macro_rules! register { ); }; - // Creates the IO accessors for a fixed offset register. - (@io $name:ident @ $offset:expr) => { + // Generates the `Debug` implementation for `$name`. + (@debug $name:ident { $($field:ident;)* }) => { + impl ::core::fmt::Debug for $name { + fn fmt(&self, f: &mut ::core::fmt::Formatter<'_>) -> ::core::fmt::Result { + f.debug_struct(stringify!($name)) + .field("<raw>", &format_args!("{:#x}", &self.0)) + $( + .field(stringify!($field), &self.$field()) + )* + .finish() + } + } + }; + + // Generates the `Default` implementation for `$name`. + (@default $name:ident { $($field:ident;)* }) => { + /// Returns a value for the register where all fields are set to their default value. + impl ::core::default::Default for $name { + fn default() -> Self { + #[allow(unused_mut)] + let mut value = Self(Default::default()); + + ::kernel::macros::paste!( + $( + value.[<set_ $field>](Default::default()); + )* + ); + + value + } + } + }; + + // Generates the IO accessors for a fixed offset register. + (@io_fixed $name:ident @ $offset:expr) => { #[allow(dead_code)] impl $name { - #[inline] + pub(crate) const OFFSET: usize = $offset; + + /// Read the register from its address in `io`. + #[inline(always)] pub(crate) fn read<const SIZE: usize, T>(io: &T) -> Self where T: ::core::ops::Deref<Target = ::kernel::io::Io<SIZE>>, { Self(io.read32($offset)) } - #[inline] + /// Write the value contained in `self` to the register address in `io`. + #[inline(always)] pub(crate) fn write<const SIZE: usize, T>(self, io: &T) where T: ::core::ops::Deref<Target = ::kernel::io::Io<SIZE>>, { io.write32(self.0, $offset) } - #[inline] + /// Read the register from its address in `io` and run `f` on its value to obtain a new + /// value to write back. + #[inline(always)] pub(crate) fn alter<const SIZE: usize, T, F>( io: &T, f: F, @@ -347,76 +638,322 @@ macro_rules! register { } }; - // Create the IO accessors for a relative offset register. - (@io $name:ident @ + $offset:literal) => { + // Generates the IO accessors for a relative offset register. + (@io_relative $name:ident @ $base:ty [ $offset:expr ]) => { #[allow(dead_code)] impl $name { - #[inline] + pub(crate) const OFFSET: usize = $offset; + + /// Read the register from `io`, using the base address provided by `base` and adding + /// the register's offset to it. + #[inline(always)] + pub(crate) fn read<const SIZE: usize, T, B>( + io: &T, + #[allow(unused_variables)] + base: &B, + ) -> Self where + T: ::core::ops::Deref<Target = ::kernel::io::Io<SIZE>>, + B: crate::regs::macros::RegisterBase<$base>, + { + const OFFSET: usize = $name::OFFSET; + + let value = io.read32( + <B as crate::regs::macros::RegisterBase<$base>>::BASE + OFFSET + ); + + Self(value) + } + + /// Write the value contained in `self` to `io`, using the base address provided by + /// `base` and adding the register's offset to it. + #[inline(always)] + pub(crate) fn write<const SIZE: usize, T, B>( + self, + io: &T, + #[allow(unused_variables)] + base: &B, + ) where + T: ::core::ops::Deref<Target = ::kernel::io::Io<SIZE>>, + B: crate::regs::macros::RegisterBase<$base>, + { + const OFFSET: usize = $name::OFFSET; + + io.write32( + self.0, + <B as crate::regs::macros::RegisterBase<$base>>::BASE + OFFSET + ); + } + + /// Read the register from `io`, using the base address provided by `base` and adding + /// the register's offset to it, then run `f` on its value to obtain a new value to + /// write back. + #[inline(always)] + pub(crate) fn alter<const SIZE: usize, T, B, F>( + io: &T, + base: &B, + f: F, + ) where + T: ::core::ops::Deref<Target = ::kernel::io::Io<SIZE>>, + B: crate::regs::macros::RegisterBase<$base>, + F: ::core::ops::FnOnce(Self) -> Self, + { + let reg = f(Self::read(io, base)); + reg.write(io, base); + } + } + }; + + // Generates the IO accessors for an array of registers. + (@io_array $name:ident @ $offset:literal [ $size:expr ; $stride:expr ]) => { + #[allow(dead_code)] + impl $name { + pub(crate) const OFFSET: usize = $offset; + pub(crate) const SIZE: usize = $size; + pub(crate) const STRIDE: usize = $stride; + + /// Read the array register at index `idx` from its address in `io`. + #[inline(always)] pub(crate) fn read<const SIZE: usize, T>( io: &T, - base: usize, + idx: usize, ) -> Self where T: ::core::ops::Deref<Target = ::kernel::io::Io<SIZE>>, { - Self(io.read32(base + $offset)) + build_assert!(idx < Self::SIZE); + + let offset = Self::OFFSET + (idx * Self::STRIDE); + let value = io.read32(offset); + + Self(value) } - #[inline] + /// Write the value contained in `self` to the array register with index `idx` in `io`. + #[inline(always)] pub(crate) fn write<const SIZE: usize, T>( self, io: &T, - base: usize, + idx: usize ) where T: ::core::ops::Deref<Target = ::kernel::io::Io<SIZE>>, { - io.write32(self.0, base + $offset) + build_assert!(idx < Self::SIZE); + + let offset = Self::OFFSET + (idx * Self::STRIDE); + + io.write32(self.0, offset); } - #[inline] + /// Read the array register at index `idx` in `io` and run `f` on its value to obtain a + /// new value to write back. + #[inline(always)] pub(crate) fn alter<const SIZE: usize, T, F>( io: &T, - base: usize, + idx: usize, f: F, ) where T: ::core::ops::Deref<Target = ::kernel::io::Io<SIZE>>, F: ::core::ops::FnOnce(Self) -> Self, { - let reg = f(Self::read(io, base)); - reg.write(io, base); + let reg = f(Self::read(io, idx)); + reg.write(io, idx); } - #[inline] + /// Read the array register at index `idx` from its address in `io`. + /// + /// The validity of `idx` is checked at run-time, and `EINVAL` is returned is the + /// access was out-of-bounds. + #[inline(always)] pub(crate) fn try_read<const SIZE: usize, T>( io: &T, - base: usize, + idx: usize, ) -> ::kernel::error::Result<Self> where T: ::core::ops::Deref<Target = ::kernel::io::Io<SIZE>>, { - io.try_read32(base + $offset).map(Self) + if idx < Self::SIZE { + Ok(Self::read(io, idx)) + } else { + Err(EINVAL) + } } - #[inline] + /// Write the value contained in `self` to the array register with index `idx` in `io`. + /// + /// The validity of `idx` is checked at run-time, and `EINVAL` is returned is the + /// access was out-of-bounds. + #[inline(always)] pub(crate) fn try_write<const SIZE: usize, T>( self, io: &T, - base: usize, - ) -> ::kernel::error::Result<()> where + idx: usize, + ) -> ::kernel::error::Result where T: ::core::ops::Deref<Target = ::kernel::io::Io<SIZE>>, { - io.try_write32(self.0, base + $offset) + if idx < Self::SIZE { + Ok(self.write(io, idx)) + } else { + Err(EINVAL) + } } - #[inline] + /// Read the array register at index `idx` in `io` and run `f` on its value to obtain a + /// new value to write back. + /// + /// The validity of `idx` is checked at run-time, and `EINVAL` is returned is the + /// access was out-of-bounds. + #[inline(always)] pub(crate) fn try_alter<const SIZE: usize, T, F>( io: &T, - base: usize, + idx: usize, + f: F, + ) -> ::kernel::error::Result where + T: ::core::ops::Deref<Target = ::kernel::io::Io<SIZE>>, + F: ::core::ops::FnOnce(Self) -> Self, + { + if idx < Self::SIZE { + Ok(Self::alter(io, idx, f)) + } else { + Err(EINVAL) + } + } + } + }; + + // Generates the IO accessors for an array of relative registers. + ( + @io_relative_array $name:ident @ $base:ty + [ $offset:literal [ $size:expr ; $stride:expr ] ] + ) => { + #[allow(dead_code)] + impl $name { + pub(crate) const OFFSET: usize = $offset; + pub(crate) const SIZE: usize = $size; + pub(crate) const STRIDE: usize = $stride; + + /// Read the array register at index `idx` from `io`, using the base address provided + /// by `base` and adding the register's offset to it. + #[inline(always)] + pub(crate) fn read<const SIZE: usize, T, B>( + io: &T, + #[allow(unused_variables)] + base: &B, + idx: usize, + ) -> Self where + T: ::core::ops::Deref<Target = ::kernel::io::Io<SIZE>>, + B: crate::regs::macros::RegisterBase<$base>, + { + build_assert!(idx < Self::SIZE); + + let offset = <B as crate::regs::macros::RegisterBase<$base>>::BASE + + Self::OFFSET + (idx * Self::STRIDE); + let value = io.read32(offset); + + Self(value) + } + + /// Write the value contained in `self` to `io`, using the base address provided by + /// `base` and adding the offset of array register `idx` to it. + #[inline(always)] + pub(crate) fn write<const SIZE: usize, T, B>( + self, + io: &T, + #[allow(unused_variables)] + base: &B, + idx: usize + ) where + T: ::core::ops::Deref<Target = ::kernel::io::Io<SIZE>>, + B: crate::regs::macros::RegisterBase<$base>, + { + build_assert!(idx < Self::SIZE); + + let offset = <B as crate::regs::macros::RegisterBase<$base>>::BASE + + Self::OFFSET + (idx * Self::STRIDE); + + io.write32(self.0, offset); + } + + /// Read the array register at index `idx` from `io`, using the base address provided + /// by `base` and adding the register's offset to it, then run `f` on its value to + /// obtain a new value to write back. + #[inline(always)] + pub(crate) fn alter<const SIZE: usize, T, B, F>( + io: &T, + base: &B, + idx: usize, + f: F, + ) where + T: ::core::ops::Deref<Target = ::kernel::io::Io<SIZE>>, + B: crate::regs::macros::RegisterBase<$base>, + F: ::core::ops::FnOnce(Self) -> Self, + { + let reg = f(Self::read(io, base, idx)); + reg.write(io, base, idx); + } + + /// Read the array register at index `idx` from `io`, using the base address provided + /// by `base` and adding the register's offset to it. + /// + /// The validity of `idx` is checked at run-time, and `EINVAL` is returned is the + /// access was out-of-bounds. + #[inline(always)] + pub(crate) fn try_read<const SIZE: usize, T, B>( + io: &T, + base: &B, + idx: usize, + ) -> ::kernel::error::Result<Self> where + T: ::core::ops::Deref<Target = ::kernel::io::Io<SIZE>>, + B: crate::regs::macros::RegisterBase<$base>, + { + if idx < Self::SIZE { + Ok(Self::read(io, base, idx)) + } else { + Err(EINVAL) + } + } + + /// Write the value contained in `self` to `io`, using the base address provided by + /// `base` and adding the offset of array register `idx` to it. + /// + /// The validity of `idx` is checked at run-time, and `EINVAL` is returned is the + /// access was out-of-bounds. + #[inline(always)] + pub(crate) fn try_write<const SIZE: usize, T, B>( + self, + io: &T, + base: &B, + idx: usize, + ) -> ::kernel::error::Result where + T: ::core::ops::Deref<Target = ::kernel::io::Io<SIZE>>, + B: crate::regs::macros::RegisterBase<$base>, + { + if idx < Self::SIZE { + Ok(self.write(io, base, idx)) + } else { + Err(EINVAL) + } + } + + /// Read the array register at index `idx` from `io`, using the base address provided + /// by `base` and adding the register's offset to it, then run `f` on its value to + /// obtain a new value to write back. + /// + /// The validity of `idx` is checked at run-time, and `EINVAL` is returned is the + /// access was out-of-bounds. + #[inline(always)] + pub(crate) fn try_alter<const SIZE: usize, T, B, F>( + io: &T, + base: &B, + idx: usize, f: F, - ) -> ::kernel::error::Result<()> where + ) -> ::kernel::error::Result where T: ::core::ops::Deref<Target = ::kernel::io::Io<SIZE>>, + B: crate::regs::macros::RegisterBase<$base>, F: ::core::ops::FnOnce(Self) -> Self, { - let reg = f(Self::try_read(io, base)?); - reg.try_write(io, base) + if idx < Self::SIZE { + Ok(Self::alter(io, base, idx, f)) + } else { + Err(EINVAL) + } } } }; diff --git a/drivers/gpu/nova-core/util.rs b/drivers/gpu/nova-core/util.rs index 76cedf3710d7..bf35f00cb732 100644 --- a/drivers/gpu/nova-core/util.rs +++ b/drivers/gpu/nova-core/util.rs @@ -3,26 +3,6 @@ use kernel::prelude::*; use kernel::time::{Delta, Instant, Monotonic}; -pub(crate) const fn to_lowercase_bytes<const N: usize>(s: &str) -> [u8; N] { - let src = s.as_bytes(); - let mut dst = [0; N]; - let mut i = 0; - - while i < src.len() && i < N { - dst[i] = (src[i] as char).to_ascii_lowercase() as u8; - i += 1; - } - - dst -} - -pub(crate) const fn const_bytes_to_str(bytes: &[u8]) -> &str { - match core::str::from_utf8(bytes) { - Ok(string) => string, - Err(_) => kernel::build_error!("Bytes are not valid UTF-8."), - } -} - /// Wait until `cond` is true or `timeout` elapsed. /// /// When `cond` evaluates to `Some`, its return value is returned. diff --git a/drivers/gpu/nova-core/vbios.rs b/drivers/gpu/nova-core/vbios.rs index 5b5d9f38cbb3..e6a060714205 100644 --- a/drivers/gpu/nova-core/vbios.rs +++ b/drivers/gpu/nova-core/vbios.rs @@ -8,8 +8,8 @@ use crate::firmware::FalconUCodeDescV3; use core::convert::TryFrom; use kernel::device; use kernel::error::Result; -use kernel::pci; use kernel::prelude::*; +use kernel::types::ARef; /// The offset of the VBIOS ROM in the BAR0 space. const ROM_OFFSET: usize = 0x300000; @@ -31,7 +31,7 @@ const FALCON_UCODE_ENTRY_APPID_FWSEC_PROD: u8 = 0x85; /// Vbios Reader for constructing the VBIOS data. struct VbiosIterator<'a> { - pdev: &'a pci::Device, + dev: &'a device::Device, bar0: &'a Bar0, /// VBIOS data vector: As BIOS images are scanned, they are added to this vector for reference /// or copying into other data structures. It is the entire scanned contents of the VBIOS which @@ -46,9 +46,9 @@ struct VbiosIterator<'a> { } impl<'a> VbiosIterator<'a> { - fn new(pdev: &'a pci::Device, bar0: &'a Bar0) -> Result<Self> { + fn new(dev: &'a device::Device, bar0: &'a Bar0) -> Result<Self> { Ok(Self { - pdev, + dev, bar0, data: KVec::new(), current_offset: 0, @@ -64,7 +64,7 @@ impl<'a> VbiosIterator<'a> { // Ensure length is a multiple of 4 for 32-bit reads if len % core::mem::size_of::<u32>() != 0 { dev_err!( - self.pdev.as_ref(), + self.dev, "VBIOS read length {} is not a multiple of 4\n", len ); @@ -89,7 +89,7 @@ impl<'a> VbiosIterator<'a> { /// Read bytes at a specific offset, filling any gap. fn read_more_at_offset(&mut self, offset: usize, len: usize) -> Result { if offset > BIOS_MAX_SCAN_LEN { - dev_err!(self.pdev.as_ref(), "Error: exceeded BIOS scan limit.\n"); + dev_err!(self.dev, "Error: exceeded BIOS scan limit.\n"); return Err(EINVAL); } @@ -115,7 +115,7 @@ impl<'a> VbiosIterator<'a> { if offset + len > data_len { self.read_more_at_offset(offset, len).inspect_err(|e| { dev_err!( - self.pdev.as_ref(), + self.dev, "Failed to read more at offset {:#x}: {:?}\n", offset, e @@ -123,9 +123,9 @@ impl<'a> VbiosIterator<'a> { })?; } - BiosImage::new(self.pdev, &self.data[offset..offset + len]).inspect_err(|err| { + BiosImage::new(self.dev, &self.data[offset..offset + len]).inspect_err(|err| { dev_err!( - self.pdev.as_ref(), + self.dev, "Failed to {} at offset {:#x}: {:?}\n", context, offset, @@ -146,10 +146,7 @@ impl<'a> Iterator for VbiosIterator<'a> { } if self.current_offset > BIOS_MAX_SCAN_LEN { - dev_err!( - self.pdev.as_ref(), - "Error: exceeded BIOS scan limit, stopping scan\n" - ); + dev_err!(self.dev, "Error: exceeded BIOS scan limit, stopping scan\n"); return None; } @@ -192,18 +189,18 @@ impl Vbios { /// Probe for VBIOS extraction. /// /// Once the VBIOS object is built, `bar0` is not read for [`Vbios`] purposes anymore. - pub(crate) fn new(pdev: &pci::Device, bar0: &Bar0) -> Result<Vbios> { + pub(crate) fn new(dev: &device::Device, bar0: &Bar0) -> Result<Vbios> { // Images to extract from iteration let mut pci_at_image: Option<PciAtBiosImage> = None; let mut first_fwsec_image: Option<FwSecBiosBuilder> = None; let mut second_fwsec_image: Option<FwSecBiosBuilder> = None; // Parse all VBIOS images in the ROM - for image_result in VbiosIterator::new(pdev, bar0)? { + for image_result in VbiosIterator::new(dev, bar0)? { let full_image = image_result?; dev_dbg!( - pdev.as_ref(), + dev, "Found BIOS image: size: {:#x}, type: {}, last: {}\n", full_image.image_size_bytes(), full_image.image_type_str(), @@ -234,14 +231,14 @@ impl Vbios { (second_fwsec_image, first_fwsec_image, pci_at_image) { second - .setup_falcon_data(pdev, &pci_at, &first) - .inspect_err(|e| dev_err!(pdev.as_ref(), "Falcon data setup failed: {:?}\n", e))?; + .setup_falcon_data(&pci_at, &first) + .inspect_err(|e| dev_err!(dev, "Falcon data setup failed: {:?}\n", e))?; Ok(Vbios { - fwsec_image: second.build(pdev)?, + fwsec_image: second.build()?, }) } else { dev_err!( - pdev.as_ref(), + dev, "Missing required images for falcon data setup, skipping\n" ); Err(EINVAL) @@ -284,9 +281,9 @@ struct PcirStruct { } impl PcirStruct { - fn new(pdev: &pci::Device, data: &[u8]) -> Result<Self> { + fn new(dev: &device::Device, data: &[u8]) -> Result<Self> { if data.len() < core::mem::size_of::<PcirStruct>() { - dev_err!(pdev.as_ref(), "Not enough data for PcirStruct\n"); + dev_err!(dev, "Not enough data for PcirStruct\n"); return Err(EINVAL); } @@ -295,11 +292,7 @@ impl PcirStruct { // Signature should be "PCIR" (0x52494350) or "NPDS" (0x5344504e). if &signature != b"PCIR" && &signature != b"NPDS" { - dev_err!( - pdev.as_ref(), - "Invalid signature for PcirStruct: {:?}\n", - signature - ); + dev_err!(dev, "Invalid signature for PcirStruct: {:?}\n", signature); return Err(EINVAL); } @@ -308,7 +301,7 @@ impl PcirStruct { let image_len = u16::from_le_bytes([data[16], data[17]]); if image_len == 0 { - dev_err!(pdev.as_ref(), "Invalid image length: 0\n"); + dev_err!(dev, "Invalid image length: 0\n"); return Err(EINVAL); } @@ -345,7 +338,7 @@ impl PcirStruct { /// its header) is in the [`PciAtBiosImage`] and the falcon data it is pointing to is in the /// [`FwSecBiosImage`]. #[derive(Debug, Clone, Copy)] -#[expect(dead_code)] +#[repr(C)] struct BitHeader { /// 0h: BIT Header Identifier (BMP=0x7FFF/BIT=0xB8FF) id: u16, @@ -365,7 +358,7 @@ struct BitHeader { impl BitHeader { fn new(data: &[u8]) -> Result<Self> { - if data.len() < 12 { + if data.len() < core::mem::size_of::<Self>() { return Err(EINVAL); } @@ -467,7 +460,7 @@ struct PciRomHeader { } impl PciRomHeader { - fn new(pdev: &pci::Device, data: &[u8]) -> Result<Self> { + fn new(dev: &device::Device, data: &[u8]) -> Result<Self> { if data.len() < 26 { // Need at least 26 bytes to read pciDataStrucPtr and sizeOfBlock. return Err(EINVAL); @@ -479,7 +472,7 @@ impl PciRomHeader { match signature { 0xAA55 | 0xBB77 | 0x4E56 => {} _ => { - dev_err!(pdev.as_ref(), "ROM signature unknown {:#x}\n", signature); + dev_err!(dev, "ROM signature unknown {:#x}\n", signature); return Err(EINVAL); } } @@ -538,9 +531,9 @@ struct NpdeStruct { } impl NpdeStruct { - fn new(pdev: &pci::Device, data: &[u8]) -> Option<Self> { + fn new(dev: &device::Device, data: &[u8]) -> Option<Self> { if data.len() < core::mem::size_of::<Self>() { - dev_dbg!(pdev.as_ref(), "Not enough data for NpdeStruct\n"); + dev_dbg!(dev, "Not enough data for NpdeStruct\n"); return None; } @@ -549,17 +542,13 @@ impl NpdeStruct { // Signature should be "NPDE" (0x4544504E). if &signature != b"NPDE" { - dev_dbg!( - pdev.as_ref(), - "Invalid signature for NpdeStruct: {:?}\n", - signature - ); + dev_dbg!(dev, "Invalid signature for NpdeStruct: {:?}\n", signature); return None; } let subimage_len = u16::from_le_bytes([data[8], data[9]]); if subimage_len == 0 { - dev_dbg!(pdev.as_ref(), "Invalid subimage length: 0\n"); + dev_dbg!(dev, "Invalid subimage length: 0\n"); return None; } @@ -584,7 +573,7 @@ impl NpdeStruct { /// Try to find NPDE in the data, the NPDE is right after the PCIR. fn find_in_data( - pdev: &pci::Device, + dev: &device::Device, data: &[u8], rom_header: &PciRomHeader, pcir: &PcirStruct, @@ -596,12 +585,12 @@ impl NpdeStruct { // Check if we have enough data if npde_start + core::mem::size_of::<Self>() > data.len() { - dev_dbg!(pdev.as_ref(), "Not enough data for NPDE\n"); + dev_dbg!(dev, "Not enough data for NPDE\n"); return None; } // Try to create NPDE from the data - NpdeStruct::new(pdev, &data[npde_start..]) + NpdeStruct::new(dev, &data[npde_start..]) } } @@ -669,10 +658,10 @@ impl BiosImage { /// Create a [`BiosImageBase`] from a byte slice and convert it to a [`BiosImage`] which /// triggers the constructor of the specific BiosImage enum variant. - fn new(pdev: &pci::Device, data: &[u8]) -> Result<Self> { - let base = BiosImageBase::new(pdev, data)?; + fn new(dev: &device::Device, data: &[u8]) -> Result<Self> { + let base = BiosImageBase::new(dev, data)?; let image = base.into_image().inspect_err(|e| { - dev_err!(pdev.as_ref(), "Failed to create BiosImage: {:?}\n", e); + dev_err!(dev, "Failed to create BiosImage: {:?}\n", e); })?; Ok(image) @@ -754,9 +743,10 @@ impl TryFrom<BiosImageBase> for BiosImage { /// /// Each BiosImage type has a BiosImageBase type along with other image-specific fields. Note that /// Rust favors composition of types over inheritance. -#[derive(Debug)] #[expect(dead_code)] struct BiosImageBase { + /// Used for logging. + dev: ARef<device::Device>, /// PCI ROM Expansion Header rom_header: PciRomHeader, /// PCI Data Structure @@ -773,16 +763,16 @@ impl BiosImageBase { } /// Creates a new BiosImageBase from raw byte data. - fn new(pdev: &pci::Device, data: &[u8]) -> Result<Self> { + fn new(dev: &device::Device, data: &[u8]) -> Result<Self> { // Ensure we have enough data for the ROM header. if data.len() < 26 { - dev_err!(pdev.as_ref(), "Not enough data for ROM header\n"); + dev_err!(dev, "Not enough data for ROM header\n"); return Err(EINVAL); } // Parse the ROM header. - let rom_header = PciRomHeader::new(pdev, &data[0..26]) - .inspect_err(|e| dev_err!(pdev.as_ref(), "Failed to create PciRomHeader: {:?}\n", e))?; + let rom_header = PciRomHeader::new(dev, &data[0..26]) + .inspect_err(|e| dev_err!(dev, "Failed to create PciRomHeader: {:?}\n", e))?; // Get the PCI Data Structure using the pointer from the ROM header. let pcir_offset = rom_header.pci_data_struct_offset as usize; @@ -791,28 +781,29 @@ impl BiosImageBase { .ok_or(EINVAL) .inspect_err(|_| { dev_err!( - pdev.as_ref(), + dev, "PCIR offset {:#x} out of bounds (data length: {})\n", pcir_offset, data.len() ); dev_err!( - pdev.as_ref(), + dev, "Consider reading more data for construction of BiosImage\n" ); })?; - let pcir = PcirStruct::new(pdev, pcir_data) - .inspect_err(|e| dev_err!(pdev.as_ref(), "Failed to create PcirStruct: {:?}\n", e))?; + let pcir = PcirStruct::new(dev, pcir_data) + .inspect_err(|e| dev_err!(dev, "Failed to create PcirStruct: {:?}\n", e))?; // Look for NPDE structure if this is not an NBSI image (type != 0x70). - let npde = NpdeStruct::find_in_data(pdev, data, &rom_header, &pcir); + let npde = NpdeStruct::find_in_data(dev, data, &rom_header, &pcir); // Create a copy of the data. let mut data_copy = KVec::new(); data_copy.extend_from_slice(data, GFP_KERNEL)?; Ok(BiosImageBase { + dev: dev.into(), rom_header, pcir, npde, @@ -848,7 +839,7 @@ impl PciAtBiosImage { /// /// This is just a 4 byte structure that contains a pointer to the Falcon data in the FWSEC /// image. - fn falcon_data_ptr(&self, pdev: &pci::Device) -> Result<u32> { + fn falcon_data_ptr(&self) -> Result<u32> { let token = self.get_bit_token(BIT_TOKEN_ID_FALCON_DATA)?; // Make sure we don't go out of bounds @@ -859,14 +850,14 @@ impl PciAtBiosImage { // read the 4 bytes at the offset specified in the token let offset = token.data_offset as usize; let bytes: [u8; 4] = self.base.data[offset..offset + 4].try_into().map_err(|_| { - dev_err!(pdev.as_ref(), "Failed to convert data slice to array"); + dev_err!(self.base.dev, "Failed to convert data slice to array"); EINVAL })?; let data_ptr = u32::from_le_bytes(bytes); if (data_ptr as usize) < self.base.data.len() { - dev_err!(pdev.as_ref(), "Falcon data pointer out of bounds\n"); + dev_err!(self.base.dev, "Falcon data pointer out of bounds\n"); return Err(EINVAL); } @@ -892,7 +883,7 @@ impl TryFrom<BiosImageBase> for PciAtBiosImage { /// The [`PmuLookupTableEntry`] structure is a single entry in the [`PmuLookupTable`]. /// /// See the [`PmuLookupTable`] description for more information. -#[expect(dead_code)] +#[repr(C, packed)] struct PmuLookupTableEntry { application_id: u8, target_id: u8, @@ -901,7 +892,7 @@ struct PmuLookupTableEntry { impl PmuLookupTableEntry { fn new(data: &[u8]) -> Result<Self> { - if data.len() < 6 { + if data.len() < core::mem::size_of::<Self>() { return Err(EINVAL); } @@ -928,7 +919,7 @@ struct PmuLookupTable { } impl PmuLookupTable { - fn new(pdev: &pci::Device, data: &[u8]) -> Result<Self> { + fn new(dev: &device::Device, data: &[u8]) -> Result<Self> { if data.len() < 4 { return Err(EINVAL); } @@ -940,10 +931,7 @@ impl PmuLookupTable { let required_bytes = header_len + (entry_count * entry_len); if data.len() < required_bytes { - dev_err!( - pdev.as_ref(), - "PmuLookupTable data length less than required\n" - ); + dev_err!(dev, "PmuLookupTable data length less than required\n"); return Err(EINVAL); } @@ -956,11 +944,7 @@ impl PmuLookupTable { // Debug logging of entries (dumps the table data to dmesg) for i in (header_len..required_bytes).step_by(entry_len) { - dev_dbg!( - pdev.as_ref(), - "PMU entry: {:02x?}\n", - &data[i..][..entry_len] - ); + dev_dbg!(dev, "PMU entry: {:02x?}\n", &data[i..][..entry_len]); } Ok(PmuLookupTable { @@ -997,11 +981,10 @@ impl PmuLookupTable { impl FwSecBiosBuilder { fn setup_falcon_data( &mut self, - pdev: &pci::Device, pci_at_image: &PciAtBiosImage, first_fwsec: &FwSecBiosBuilder, ) -> Result { - let mut offset = pci_at_image.falcon_data_ptr(pdev)? as usize; + let mut offset = pci_at_image.falcon_data_ptr()? as usize; let mut pmu_in_first_fwsec = false; // The falcon data pointer assumes that the PciAt and FWSEC images @@ -1024,10 +1007,15 @@ impl FwSecBiosBuilder { self.falcon_data_offset = Some(offset); if pmu_in_first_fwsec { - self.pmu_lookup_table = - Some(PmuLookupTable::new(pdev, &first_fwsec.base.data[offset..])?); + self.pmu_lookup_table = Some(PmuLookupTable::new( + &self.base.dev, + &first_fwsec.base.data[offset..], + )?); } else { - self.pmu_lookup_table = Some(PmuLookupTable::new(pdev, &self.base.data[offset..])?); + self.pmu_lookup_table = Some(PmuLookupTable::new( + &self.base.dev, + &self.base.data[offset..], + )?); } match self @@ -1040,7 +1028,7 @@ impl FwSecBiosBuilder { let mut ucode_offset = entry.data as usize; ucode_offset -= pci_at_image.base.data.len(); if ucode_offset < first_fwsec.base.data.len() { - dev_err!(pdev.as_ref(), "Falcon Ucode offset not in second Fwsec.\n"); + dev_err!(self.base.dev, "Falcon Ucode offset not in second Fwsec.\n"); return Err(EINVAL); } ucode_offset -= first_fwsec.base.data.len(); @@ -1048,7 +1036,7 @@ impl FwSecBiosBuilder { } Err(e) => { dev_err!( - pdev.as_ref(), + self.base.dev, "PmuLookupTableEntry not found, error: {:?}\n", e ); @@ -1059,7 +1047,7 @@ impl FwSecBiosBuilder { } /// Build the final FwSecBiosImage from this builder - fn build(self, pdev: &pci::Device) -> Result<FwSecBiosImage> { + fn build(self) -> Result<FwSecBiosImage> { let ret = FwSecBiosImage { base: self.base, falcon_ucode_offset: self.falcon_ucode_offset.ok_or(EINVAL)?, @@ -1067,8 +1055,8 @@ impl FwSecBiosBuilder { if cfg!(debug_assertions) { // Print the desc header for debugging - let desc = ret.header(pdev.as_ref())?; - dev_dbg!(pdev.as_ref(), "PmuLookupTableEntry desc: {:#?}\n", desc); + let desc = ret.header()?; + dev_dbg!(ret.base.dev, "PmuLookupTableEntry desc: {:#?}\n", desc); } Ok(ret) @@ -1077,13 +1065,16 @@ impl FwSecBiosBuilder { impl FwSecBiosImage { /// Get the FwSec header ([`FalconUCodeDescV3`]). - pub(crate) fn header(&self, dev: &device::Device) -> Result<&FalconUCodeDescV3> { + pub(crate) fn header(&self) -> Result<&FalconUCodeDescV3> { // Get the falcon ucode offset that was found in setup_falcon_data. let falcon_ucode_offset = self.falcon_ucode_offset; // Make sure the offset is within the data bounds. if falcon_ucode_offset + core::mem::size_of::<FalconUCodeDescV3>() > self.base.data.len() { - dev_err!(dev, "fwsec-frts header not contained within BIOS bounds\n"); + dev_err!( + self.base.dev, + "fwsec-frts header not contained within BIOS bounds\n" + ); return Err(ERANGE); } @@ -1095,7 +1086,7 @@ impl FwSecBiosImage { let ver = (hdr & 0xff00) >> 8; if ver != 3 { - dev_err!(dev, "invalid fwsec firmware version: {:?}\n", ver); + dev_err!(self.base.dev, "invalid fwsec firmware version: {:?}\n", ver); return Err(EINVAL); } @@ -1115,7 +1106,7 @@ impl FwSecBiosImage { } /// Get the ucode data as a byte slice - pub(crate) fn ucode(&self, dev: &device::Device, desc: &FalconUCodeDescV3) -> Result<&[u8]> { + pub(crate) fn ucode(&self, desc: &FalconUCodeDescV3) -> Result<&[u8]> { let falcon_ucode_offset = self.falcon_ucode_offset; // The ucode data follows the descriptor. @@ -1127,15 +1118,16 @@ impl FwSecBiosImage { .data .get(ucode_data_offset..ucode_data_offset + size) .ok_or(ERANGE) - .inspect_err(|_| dev_err!(dev, "fwsec ucode data not contained within BIOS bounds\n")) + .inspect_err(|_| { + dev_err!( + self.base.dev, + "fwsec ucode data not contained within BIOS bounds\n" + ) + }) } /// Get the signatures as a byte slice - pub(crate) fn sigs( - &self, - dev: &device::Device, - desc: &FalconUCodeDescV3, - ) -> Result<&[Bcrt30Rsa3kSignature]> { + pub(crate) fn sigs(&self, desc: &FalconUCodeDescV3) -> Result<&[Bcrt30Rsa3kSignature]> { // The signatures data follows the descriptor. let sigs_data_offset = self.falcon_ucode_offset + core::mem::size_of::<FalconUCodeDescV3>(); let sigs_size = @@ -1144,7 +1136,7 @@ impl FwSecBiosImage { // Make sure the data is within bounds. if sigs_data_offset + sigs_size > self.base.data.len() { dev_err!( - dev, + self.base.dev, "fwsec signatures data not contained within BIOS bounds\n" ); return Err(ERANGE); diff --git a/include/drm/bridge/samsung-dsim.h b/include/drm/bridge/samsung-dsim.h index 9764d6eb5beb..31d7ed589233 100644 --- a/include/drm/bridge/samsung-dsim.h +++ b/include/drm/bridge/samsung-dsim.h @@ -29,6 +29,7 @@ enum samsung_dsim_type { DSIM_TYPE_EXYNOS5410, DSIM_TYPE_EXYNOS5422, DSIM_TYPE_EXYNOS5433, + DSIM_TYPE_EXYNOS7870, DSIM_TYPE_IMX8MM, DSIM_TYPE_IMX8MP, DSIM_TYPE_COUNT, @@ -53,15 +54,29 @@ struct samsung_dsim_transfer { struct samsung_dsim_driver_data { const unsigned int *reg_ofs; unsigned int plltmr_reg; + unsigned int has_legacy_status_reg:1; unsigned int has_freqband:1; unsigned int has_clklane_stop:1; unsigned int has_broken_fifoctrl_emptyhdr:1; + unsigned int has_sfrctrl:1; + struct clk_bulk_data *clk_data; unsigned int num_clks; unsigned int min_freq; unsigned int max_freq; + unsigned int wait_for_hdr_fifo; unsigned int wait_for_reset; unsigned int num_bits_resol; + unsigned int video_mode_bit; + unsigned int pll_stable_bit; + unsigned int esc_clken_bit; + unsigned int byte_clken_bit; + unsigned int tx_req_hsclk_bit; + unsigned int lane_esc_clk_bit; + unsigned int lane_esc_data_offset; unsigned int pll_p_offset; + unsigned int pll_m_offset; + unsigned int pll_s_offset; + unsigned int main_vsa_offset; const unsigned int *reg_values; unsigned int pll_fin_min; unsigned int pll_fin_max; @@ -91,7 +106,6 @@ struct samsung_dsim { void __iomem *reg_base; struct phy *phy; - struct clk **clks; struct clk *pll_clk; struct regulator_bulk_data supplies[2]; int irq; diff --git a/include/drm/drm_buddy.h b/include/drm/drm_buddy.h index 513837632b7d..04afd7c21a82 100644 --- a/include/drm/drm_buddy.h +++ b/include/drm/drm_buddy.h @@ -13,15 +13,6 @@ #include <drm/drm_print.h> -#define range_overflows(start, size, max) ({ \ - typeof(start) start__ = (start); \ - typeof(size) size__ = (size); \ - typeof(max) max__ = (max); \ - (void)(&start__ == &size__); \ - (void)(&start__ == &max__); \ - start__ >= max__ || size__ > max__ - start__; \ -}) - #define DRM_BUDDY_RANGE_ALLOCATION BIT(0) #define DRM_BUDDY_TOPDOWN_ALLOCATION BIT(1) #define DRM_BUDDY_CONTIGUOUS_ALLOCATION BIT(2) diff --git a/include/drm/drm_device.h b/include/drm/drm_device.h index 59fd3f4d5995..778b2cca6c49 100644 --- a/include/drm/drm_device.h +++ b/include/drm/drm_device.h @@ -193,16 +193,6 @@ struct drm_device { char *unique; /** - * @struct_mutex: - * - * Lock for others (not &drm_minor.master and &drm_file.is_master) - * - * TODO: This lock used to be the BKL of the DRM subsystem. Move the - * lock into i915, which is the only remaining user. - */ - struct mutex struct_mutex; - - /** * @master_mutex: * * Lock for &drm_minor.master and &drm_file.is_master diff --git a/include/linux/overflow.h b/include/linux/overflow.h index 154ed0dbb43f..725f95f7e416 100644 --- a/include/linux/overflow.h +++ b/include/linux/overflow.h @@ -239,6 +239,76 @@ static inline bool __must_check __must_check_overflow(bool overflow) __overflows_type(n, T)) /** + * range_overflows() - Check if a range is out of bounds + * @start: Start of the range. + * @size: Size of the range. + * @max: Exclusive upper boundary. + * + * A strict check to determine if the range [@start, @start + @size) is + * invalid with respect to the allowable range [0, @max). Any range + * starting at or beyond @max is considered an overflow, even if @size is 0. + * + * Returns: true if the range is out of bounds. + */ +#define range_overflows(start, size, max) ({ \ + typeof(start) start__ = (start); \ + typeof(size) size__ = (size); \ + typeof(max) max__ = (max); \ + (void)(&start__ == &size__); \ + (void)(&start__ == &max__); \ + start__ >= max__ || size__ > max__ - start__; \ +}) + +/** + * range_overflows_t() - Check if a range is out of bounds + * @type: Data type to use. + * @start: Start of the range. + * @size: Size of the range. + * @max: Exclusive upper boundary. + * + * Same as range_overflows() but forcing the parameters to @type. + * + * Returns: true if the range is out of bounds. + */ +#define range_overflows_t(type, start, size, max) \ + range_overflows((type)(start), (type)(size), (type)(max)) + +/** + * range_end_overflows() - Check if a range's endpoint is out of bounds + * @start: Start of the range. + * @size: Size of the range. + * @max: Exclusive upper boundary. + * + * Checks only if the endpoint of a range (@start + @size) exceeds @max. + * Unlike range_overflows(), a zero-sized range at the boundary (@start == @max) + * is not considered an overflow. Useful for iterator-style checks. + * + * Returns: true if the endpoint exceeds the boundary. + */ +#define range_end_overflows(start, size, max) ({ \ + typeof(start) start__ = (start); \ + typeof(size) size__ = (size); \ + typeof(max) max__ = (max); \ + (void)(&start__ == &size__); \ + (void)(&start__ == &max__); \ + start__ > max__ || size__ > max__ - start__; \ +}) + +/** + * range_end_overflows_t() - Check if a range's endpoint is out of bounds + * @type: Data type to use. + * @start: Start of the range. + * @size: Size of the range. + * @max: Exclusive upper boundary. + * + * Same as range_end_overflows() but forcing the parameters to @type. + * + * Returns: true if the endpoint exceeds the boundary. + */ +#define range_end_overflows_t(type, start, size, max) \ + range_end_overflows((type)(start), (type)(size), (type)(max)) + +/** * castable_to_type - like __same_type(), but also allows for casted literals * * @n: variable or constant value diff --git a/rust/bindings/bindings_helper.h b/rust/bindings/bindings_helper.h index 84d60635e8a9..c2cc52ee9945 100644 --- a/rust/bindings/bindings_helper.h +++ b/rust/bindings/bindings_helper.h @@ -47,6 +47,7 @@ #include <linux/cpumask.h> #include <linux/cred.h> #include <linux/device/faux.h> +#include <linux/dma-direction.h> #include <linux/dma-mapping.h> #include <linux/errname.h> #include <linux/ethtool.h> @@ -57,6 +58,7 @@ #include <linux/jiffies.h> #include <linux/jump_label.h> #include <linux/mdio.h> +#include <linux/mm.h> #include <linux/miscdevice.h> #include <linux/of_device.h> #include <linux/pci.h> diff --git a/rust/helpers/helpers.c b/rust/helpers/helpers.c index 7cf7fe95e41d..e94542bf6ea7 100644 --- a/rust/helpers/helpers.c +++ b/rust/helpers/helpers.c @@ -39,6 +39,7 @@ #include "rcu.c" #include "refcount.c" #include "regulator.c" +#include "scatterlist.c" #include "security.c" #include "signal.c" #include "slab.c" diff --git a/rust/helpers/scatterlist.c b/rust/helpers/scatterlist.c new file mode 100644 index 000000000000..80c956ee09ab --- /dev/null +++ b/rust/helpers/scatterlist.c @@ -0,0 +1,24 @@ +// SPDX-License-Identifier: GPL-2.0 + +#include <linux/dma-direction.h> + +dma_addr_t rust_helper_sg_dma_address(struct scatterlist *sg) +{ + return sg_dma_address(sg); +} + +unsigned int rust_helper_sg_dma_len(struct scatterlist *sg) +{ + return sg_dma_len(sg); +} + +struct scatterlist *rust_helper_sg_next(struct scatterlist *sg) +{ + return sg_next(sg); +} + +void rust_helper_dma_unmap_sgtable(struct device *dev, struct sg_table *sgt, + enum dma_data_direction dir, unsigned long attrs) +{ + return dma_unmap_sgtable(dev, sgt, dir, attrs); +} diff --git a/rust/kernel/alloc/allocator.rs b/rust/kernel/alloc/allocator.rs index 2692cf90c948..84ee7e9d7b0e 100644 --- a/rust/kernel/alloc/allocator.rs +++ b/rust/kernel/alloc/allocator.rs @@ -15,8 +15,12 @@ use core::ptr::NonNull; use crate::alloc::{AllocError, Allocator}; use crate::bindings; +use crate::page; use crate::pr_warn; +mod iter; +pub use self::iter::VmallocPageIter; + /// The contiguous kernel allocator. /// /// `Kmalloc` is typically used for physically contiguous allocations up to page size, but also @@ -142,6 +146,54 @@ unsafe impl Allocator for Kmalloc { } } +impl Vmalloc { + /// Convert a pointer to a [`Vmalloc`] allocation to a [`page::BorrowedPage`]. + /// + /// # Examples + /// + /// ``` + /// # use core::ptr::{NonNull, from_mut}; + /// # use kernel::{page, prelude::*}; + /// use kernel::alloc::allocator::Vmalloc; + /// + /// let mut vbox = VBox::<[u8; page::PAGE_SIZE]>::new_uninit(GFP_KERNEL)?; + /// + /// { + /// // SAFETY: By the type invariant of `Box` the inner pointer of `vbox` is non-null. + /// let ptr = unsafe { NonNull::new_unchecked(from_mut(&mut *vbox)) }; + /// + /// // SAFETY: + /// // `ptr` is a valid pointer to a `Vmalloc` allocation. + /// // `ptr` is valid for the entire lifetime of `page`. + /// let page = unsafe { Vmalloc::to_page(ptr.cast()) }; + /// + /// // SAFETY: There is no concurrent read or write to the same page. + /// unsafe { page.fill_zero_raw(0, page::PAGE_SIZE)? }; + /// } + /// # Ok::<(), Error>(()) + /// ``` + /// + /// # Safety + /// + /// - `ptr` must be a valid pointer to a [`Vmalloc`] allocation. + /// - `ptr` must remain valid for the entire duration of `'a`. + pub unsafe fn to_page<'a>(ptr: NonNull<u8>) -> page::BorrowedPage<'a> { + // SAFETY: `ptr` is a valid pointer to `Vmalloc` memory. + let page = unsafe { bindings::vmalloc_to_page(ptr.as_ptr().cast()) }; + + // SAFETY: `vmalloc_to_page` returns a valid pointer to a `struct page` for a valid pointer + // to `Vmalloc` memory. + let page = unsafe { NonNull::new_unchecked(page) }; + + // SAFETY: + // - `page` is a valid pointer to a `struct page`, given that by the safety requirements of + // this function `ptr` is a valid pointer to a `Vmalloc` allocation. + // - By the safety requirements of this function `ptr` is valid for the entire lifetime of + // `'a`. + unsafe { page::BorrowedPage::from_raw(page) } + } +} + // SAFETY: `realloc` delegates to `ReallocFunc::call`, which guarantees that // - memory remains valid until it is explicitly freed, // - passing a pointer to a valid memory allocation is OK, diff --git a/rust/kernel/alloc/allocator/iter.rs b/rust/kernel/alloc/allocator/iter.rs new file mode 100644 index 000000000000..5759f86029b7 --- /dev/null +++ b/rust/kernel/alloc/allocator/iter.rs @@ -0,0 +1,102 @@ +// SPDX-License-Identifier: GPL-2.0 + +use super::Vmalloc; +use crate::page; +use core::marker::PhantomData; +use core::ptr::NonNull; + +/// An [`Iterator`] of [`page::BorrowedPage`] items owned by a [`Vmalloc`] allocation. +/// +/// # Guarantees +/// +/// The pages iterated by the [`Iterator`] appear in the order as they are mapped in the CPU's +/// virtual address space ascendingly. +/// +/// # Invariants +/// +/// - `buf` is a valid and [`page::PAGE_SIZE`] aligned pointer into a [`Vmalloc`] allocation. +/// - `size` is the number of bytes from `buf` until the end of the [`Vmalloc`] allocation `buf` +/// points to. +pub struct VmallocPageIter<'a> { + /// The base address of the [`Vmalloc`] buffer. + buf: NonNull<u8>, + /// The size of the buffer pointed to by `buf` in bytes. + size: usize, + /// The current page index of the [`Iterator`]. + index: usize, + _p: PhantomData<page::BorrowedPage<'a>>, +} + +impl<'a> Iterator for VmallocPageIter<'a> { + type Item = page::BorrowedPage<'a>; + + fn next(&mut self) -> Option<Self::Item> { + let offset = self.index.checked_mul(page::PAGE_SIZE)?; + + // Even though `self.size()` may be smaller than `Self::page_count() * page::PAGE_SIZE`, it + // is always a number between `(Self::page_count() - 1) * page::PAGE_SIZE` and + // `Self::page_count() * page::PAGE_SIZE`, hence the check below is sufficient. + if offset < self.size() { + self.index += 1; + } else { + return None; + } + + // TODO: Use `NonNull::add()` instead, once the minimum supported compiler version is + // bumped to 1.80 or later. + // + // SAFETY: `offset` is in the interval `[0, (self.page_count() - 1) * page::PAGE_SIZE]`, + // hence the resulting pointer is guaranteed to be within the same allocation. + let ptr = unsafe { self.buf.as_ptr().add(offset) }; + + // SAFETY: `ptr` is guaranteed to be non-null given that it is derived from `self.buf`. + let ptr = unsafe { NonNull::new_unchecked(ptr) }; + + // SAFETY: + // - `ptr` is a valid pointer to a `Vmalloc` allocation. + // - `ptr` is valid for the duration of `'a`. + Some(unsafe { Vmalloc::to_page(ptr) }) + } + + fn size_hint(&self) -> (usize, Option<usize>) { + let remaining = self.page_count().saturating_sub(self.index); + + (remaining, Some(remaining)) + } +} + +impl<'a> VmallocPageIter<'a> { + /// Creates a new [`VmallocPageIter`] instance. + /// + /// # Safety + /// + /// - `buf` must be a [`page::PAGE_SIZE`] aligned pointer into a [`Vmalloc`] allocation. + /// - `buf` must be valid for at least the lifetime of `'a`. + /// - `size` must be the number of bytes from `buf` until the end of the [`Vmalloc`] allocation + /// `buf` points to. + pub unsafe fn new(buf: NonNull<u8>, size: usize) -> Self { + // INVARIANT: By the safety requirements, `buf` is a valid and `page::PAGE_SIZE` aligned + // pointer into a [`Vmalloc`] allocation. + Self { + buf, + size, + index: 0, + _p: PhantomData, + } + } + + /// Returns the size of the backing [`Vmalloc`] allocation in bytes. + /// + /// Note that this is the size the [`Vmalloc`] allocation has been allocated with. Hence, this + /// number may be smaller than `[`Self::page_count`] * [`page::PAGE_SIZE`]`. + #[inline] + pub fn size(&self) -> usize { + self.size + } + + /// Returns the number of pages owned by the backing [`Vmalloc`] allocation. + #[inline] + pub fn page_count(&self) -> usize { + self.size().div_ceil(page::PAGE_SIZE) + } +} diff --git a/rust/kernel/alloc/allocator_test.rs b/rust/kernel/alloc/allocator_test.rs index 90dd987d40e4..7b10e276f621 100644 --- a/rust/kernel/alloc/allocator_test.rs +++ b/rust/kernel/alloc/allocator_test.rs @@ -12,8 +12,10 @@ use super::{flags::*, AllocError, Allocator, Flags}; use core::alloc::Layout; use core::cmp; +use core::marker::PhantomData; use core::ptr; use core::ptr::NonNull; +use kernel::page; /// The userspace allocator based on libc. pub struct Cmalloc; @@ -33,6 +35,33 @@ impl Cmalloc { } } +pub struct VmallocPageIter<'a> { + _p: PhantomData<page::BorrowedPage<'a>>, +} + +impl<'a> Iterator for VmallocPageIter<'a> { + type Item = page::BorrowedPage<'a>; + + fn next(&mut self) -> Option<Self::Item> { + None + } +} + +impl<'a> VmallocPageIter<'a> { + #[allow(clippy::missing_safety_doc)] + pub unsafe fn new(_buf: NonNull<u8>, _size: usize) -> Self { + Self { _p: PhantomData } + } + + pub fn size(&self) -> usize { + 0 + } + + pub fn page_count(&self) -> usize { + 0 + } +} + extern "C" { #[link_name = "aligned_alloc"] fn libc_aligned_alloc(align: usize, size: usize) -> *mut crate::ffi::c_void; diff --git a/rust/kernel/alloc/kbox.rs b/rust/kernel/alloc/kbox.rs index 856d05aa60f1..b69ff4a1d748 100644 --- a/rust/kernel/alloc/kbox.rs +++ b/rust/kernel/alloc/kbox.rs @@ -3,7 +3,7 @@ //! Implementation of [`Box`]. #[allow(unused_imports)] // Used in doc comments. -use super::allocator::{KVmalloc, Kmalloc, Vmalloc}; +use super::allocator::{KVmalloc, Kmalloc, Vmalloc, VmallocPageIter}; use super::{AllocError, Allocator, Flags}; use core::alloc::Layout; use core::borrow::{Borrow, BorrowMut}; @@ -18,6 +18,7 @@ use core::result::Result; use crate::ffi::c_void; use crate::init::InPlaceInit; +use crate::page::AsPageIter; use crate::types::ForeignOwnable; use pin_init::{InPlaceWrite, Init, PinInit, ZeroableOption}; @@ -598,3 +599,40 @@ where unsafe { A::free(self.0.cast(), layout) }; } } + +/// # Examples +/// +/// ``` +/// # use kernel::prelude::*; +/// use kernel::alloc::allocator::VmallocPageIter; +/// use kernel::page::{AsPageIter, PAGE_SIZE}; +/// +/// let mut vbox = VBox::new((), GFP_KERNEL)?; +/// +/// assert!(vbox.page_iter().next().is_none()); +/// +/// let mut vbox = VBox::<[u8; PAGE_SIZE]>::new_uninit(GFP_KERNEL)?; +/// +/// let page = vbox.page_iter().next().expect("At least one page should be available.\n"); +/// +/// // SAFETY: There is no concurrent read or write to the same page. +/// unsafe { page.fill_zero_raw(0, PAGE_SIZE)? }; +/// # Ok::<(), Error>(()) +/// ``` +impl<T> AsPageIter for VBox<T> { + type Iter<'a> + = VmallocPageIter<'a> + where + T: 'a; + + fn page_iter(&mut self) -> Self::Iter<'_> { + let ptr = self.0.cast(); + let size = core::mem::size_of::<T>(); + + // SAFETY: + // - `ptr` is a valid pointer to the beginning of a `Vmalloc` allocation. + // - `ptr` is guaranteed to be valid for the lifetime of `'a`. + // - `size` is the size of the `Vmalloc` allocation `ptr` points to. + unsafe { VmallocPageIter::new(ptr, size) } + } +} diff --git a/rust/kernel/alloc/kvec.rs b/rust/kernel/alloc/kvec.rs index 3c72e0bdddb8..ac438e70a1ed 100644 --- a/rust/kernel/alloc/kvec.rs +++ b/rust/kernel/alloc/kvec.rs @@ -3,10 +3,11 @@ //! Implementation of [`Vec`]. use super::{ - allocator::{KVmalloc, Kmalloc, Vmalloc}, + allocator::{KVmalloc, Kmalloc, Vmalloc, VmallocPageIter}, layout::ArrayLayout, AllocError, Allocator, Box, Flags, }; +use crate::page::AsPageIter; use core::{ borrow::{Borrow, BorrowMut}, fmt, @@ -1017,6 +1018,43 @@ where } } +/// # Examples +/// +/// ``` +/// # use kernel::prelude::*; +/// use kernel::alloc::allocator::VmallocPageIter; +/// use kernel::page::{AsPageIter, PAGE_SIZE}; +/// +/// let mut vec = VVec::<u8>::new(); +/// +/// assert!(vec.page_iter().next().is_none()); +/// +/// vec.reserve(PAGE_SIZE, GFP_KERNEL)?; +/// +/// let page = vec.page_iter().next().expect("At least one page should be available.\n"); +/// +/// // SAFETY: There is no concurrent read or write to the same page. +/// unsafe { page.fill_zero_raw(0, PAGE_SIZE)? }; +/// # Ok::<(), Error>(()) +/// ``` +impl<T> AsPageIter for VVec<T> { + type Iter<'a> + = VmallocPageIter<'a> + where + T: 'a; + + fn page_iter(&mut self) -> Self::Iter<'_> { + let ptr = self.ptr.cast(); + let size = self.layout.size(); + + // SAFETY: + // - `ptr` is a valid pointer to the beginning of a `Vmalloc` allocation. + // - `ptr` is guaranteed to be valid for the lifetime of `'a`. + // - `size` is the size of the `Vmalloc` allocation `ptr` points to. + unsafe { VmallocPageIter::new(ptr, size) } + } +} + /// An [`Iterator`] implementation for [`Vec`] that moves elements out of a vector. /// /// This structure is created by the [`Vec::into_iter`] method on [`Vec`] (provided by the diff --git a/rust/kernel/alloc/layout.rs b/rust/kernel/alloc/layout.rs index 93ed514f7cc7..666accb7859c 100644 --- a/rust/kernel/alloc/layout.rs +++ b/rust/kernel/alloc/layout.rs @@ -98,6 +98,11 @@ impl<T> ArrayLayout<T> { pub const fn is_empty(&self) -> bool { self.len == 0 } + + /// Returns the size of the [`ArrayLayout`] in bytes. + pub const fn size(&self) -> usize { + self.len() * core::mem::size_of::<T>() + } } impl<T> From<ArrayLayout<T>> for Layout { diff --git a/rust/kernel/devres.rs b/rust/kernel/devres.rs index d04e3fcebafb..aea6b7e31170 100644 --- a/rust/kernel/devres.rs +++ b/rust/kernel/devres.rs @@ -135,11 +135,9 @@ impl<T: Send> Devres<T> { T: 'a, Error: From<E>, { - let callback = Self::devres_callback; - try_pin_init!(&this in Self { dev: dev.into(), - callback, + callback: Self::devres_callback, // INVARIANT: `inner` is properly initialized. inner <- Opaque::pin_init(try_pin_init!(Inner { devm <- Completion::new(), @@ -160,7 +158,7 @@ impl<T: Send> Devres<T> { // properly initialized, because we require `dev` (i.e. the *bound* device) to // live at least as long as the returned `impl PinInit<Self, Error>`. to_result(unsafe { - bindings::devm_add_action(dev.as_raw(), Some(callback), inner.cast()) + bindings::devm_add_action(dev.as_raw(), Some(*callback), inner.cast()) }).inspect_err(|_| { let inner = Opaque::cast_into(inner); diff --git a/rust/kernel/dma.rs b/rust/kernel/dma.rs index 2bc8ab51ec28..b2a6282876da 100644 --- a/rust/kernel/dma.rs +++ b/rust/kernel/dma.rs @@ -13,6 +13,16 @@ use crate::{ types::ARef, }; +/// DMA address type. +/// +/// Represents a bus address used for Direct Memory Access (DMA) operations. +/// +/// This is an alias of the kernel's `dma_addr_t`, which may be `u32` or `u64` depending on +/// `CONFIG_ARCH_DMA_ADDR_T_64BIT`. +/// +/// Note that this may be `u64` even on 32-bit architectures. +pub type DmaAddress = bindings::dma_addr_t; + /// Trait to be implemented by DMA capable bus devices. /// /// The [`dma::Device`](Device) trait should be implemented by bus specific device representations, @@ -244,6 +254,74 @@ pub mod attrs { pub const DMA_ATTR_PRIVILEGED: Attrs = Attrs(bindings::DMA_ATTR_PRIVILEGED); } +/// DMA data direction. +/// +/// Corresponds to the C [`enum dma_data_direction`]. +/// +/// [`enum dma_data_direction`]: srctree/include/linux/dma-direction.h +#[derive(Copy, Clone, PartialEq, Eq, Debug)] +#[repr(u32)] +pub enum DataDirection { + /// The DMA mapping is for bidirectional data transfer. + /// + /// This is used when the buffer can be both read from and written to by the device. + /// The cache for the corresponding memory region is both flushed and invalidated. + Bidirectional = Self::const_cast(bindings::dma_data_direction_DMA_BIDIRECTIONAL), + + /// The DMA mapping is for data transfer from memory to the device (write). + /// + /// The CPU has prepared data in the buffer, and the device will read it. + /// The cache for the corresponding memory region is flushed before device access. + ToDevice = Self::const_cast(bindings::dma_data_direction_DMA_TO_DEVICE), + + /// The DMA mapping is for data transfer from the device to memory (read). + /// + /// The device will write data into the buffer for the CPU to read. + /// The cache for the corresponding memory region is invalidated before CPU access. + FromDevice = Self::const_cast(bindings::dma_data_direction_DMA_FROM_DEVICE), + + /// The DMA mapping is not for data transfer. + /// + /// This is primarily for debugging purposes. With this direction, the DMA mapping API + /// will not perform any cache coherency operations. + None = Self::const_cast(bindings::dma_data_direction_DMA_NONE), +} + +impl DataDirection { + /// Casts the bindgen-generated enum type to a `u32` at compile time. + /// + /// This function will cause a compile-time error if the underlying value of the + /// C enum is out of bounds for `u32`. + const fn const_cast(val: bindings::dma_data_direction) -> u32 { + // CAST: The C standard allows compilers to choose different integer types for enums. + // To safely check the value, we cast it to a wide signed integer type (`i128`) + // which can hold any standard C integer enum type without truncation. + let wide_val = val as i128; + + // Check if the value is outside the valid range for the target type `u32`. + // CAST: `u32::MAX` is cast to `i128` to match the type of `wide_val` for the comparison. + if wide_val < 0 || wide_val > u32::MAX as i128 { + // Trigger a compile-time error in a const context. + build_error!("C enum value is out of bounds for the target type `u32`."); + } + + // CAST: This cast is valid because the check above guarantees that `wide_val` + // is within the representable range of `u32`. + wide_val as u32 + } +} + +impl From<DataDirection> for bindings::dma_data_direction { + /// Returns the raw representation of [`enum dma_data_direction`]. + fn from(direction: DataDirection) -> Self { + // CAST: `direction as u32` gets the underlying representation of our `#[repr(u32)]` enum. + // The subsequent cast to `Self` (the bindgen type) assumes the C enum is compatible + // with the enum variants of `DataDirection`, which is a valid assumption given our + // compile-time checks. + direction as u32 as Self + } +} + /// An abstraction of the `dma_alloc_coherent` API. /// /// This is an abstraction around the `dma_alloc_coherent` API which is used to allocate and map @@ -275,7 +353,7 @@ pub mod attrs { // entire `CoherentAllocation` including the allocated memory itself. pub struct CoherentAllocation<T: AsBytes + FromBytes> { dev: ARef<device::Device>, - dma_handle: bindings::dma_addr_t, + dma_handle: DmaAddress, count: usize, cpu_addr: *mut T, dma_attrs: Attrs, @@ -376,7 +454,7 @@ impl<T: AsBytes + FromBytes> CoherentAllocation<T> { /// Returns a DMA handle which may be given to the device as the DMA address base of /// the region. - pub fn dma_handle(&self) -> bindings::dma_addr_t { + pub fn dma_handle(&self) -> DmaAddress { self.dma_handle } @@ -384,13 +462,13 @@ impl<T: AsBytes + FromBytes> CoherentAllocation<T> { /// device as the DMA address base of the region. /// /// Returns `EINVAL` if `offset` is not within the bounds of the allocation. - pub fn dma_handle_with_offset(&self, offset: usize) -> Result<bindings::dma_addr_t> { + pub fn dma_handle_with_offset(&self, offset: usize) -> Result<DmaAddress> { if offset >= self.count { Err(EINVAL) } else { // INVARIANT: The type invariant of `Self` guarantees that `size_of::<T> * count` fits // into a `usize`, and `offset` is inferior to `count`. - Ok(self.dma_handle + (offset * core::mem::size_of::<T>()) as bindings::dma_addr_t) + Ok(self.dma_handle + (offset * core::mem::size_of::<T>()) as DmaAddress) } } diff --git a/rust/kernel/drm/driver.rs b/rust/kernel/drm/driver.rs index 8fefae41bcc6..91e13b6ca26a 100644 --- a/rust/kernel/drm/driver.rs +++ b/rust/kernel/drm/driver.rs @@ -86,6 +86,9 @@ pub struct AllocOps { /// Trait for memory manager implementations. Implemented internally. pub trait AllocImpl: super::private::Sealed + drm::gem::IntoGEMObject { + /// The [`Driver`] implementation for this [`AllocImpl`]. + type Driver: drm::Driver; + /// The C callback operations for this memory manager. const ALLOC_OPS: AllocOps; } diff --git a/rust/kernel/drm/gem/mod.rs b/rust/kernel/drm/gem/mod.rs index a822aedee949..6ccbb25628a1 100644 --- a/rust/kernel/drm/gem/mod.rs +++ b/rust/kernel/drm/gem/mod.rs @@ -13,34 +13,34 @@ use crate::{ sync::aref::{ARef, AlwaysRefCounted}, types::Opaque, }; -use core::{mem, ops::Deref, ptr::NonNull}; +use core::{ops::Deref, ptr::NonNull}; + +/// A type alias for retrieving a [`Driver`]s [`DriverFile`] implementation from its +/// [`DriverObject`] implementation. +/// +/// [`Driver`]: drm::Driver +/// [`DriverFile`]: drm::file::DriverFile +pub type DriverFile<T> = drm::File<<<T as DriverObject>::Driver as drm::Driver>::File>; /// GEM object functions, which must be implemented by drivers. -pub trait BaseDriverObject<T: BaseObject>: Sync + Send + Sized { +pub trait DriverObject: Sync + Send + Sized { + /// Parent `Driver` for this object. + type Driver: drm::Driver; + /// Create a new driver data object for a GEM object of a given size. - fn new(dev: &drm::Device<T::Driver>, size: usize) -> impl PinInit<Self, Error>; + fn new(dev: &drm::Device<Self::Driver>, size: usize) -> impl PinInit<Self, Error>; /// Open a new handle to an existing object, associated with a File. - fn open( - _obj: &<<T as IntoGEMObject>::Driver as drm::Driver>::Object, - _file: &drm::File<<<T as IntoGEMObject>::Driver as drm::Driver>::File>, - ) -> Result { + fn open(_obj: &<Self::Driver as drm::Driver>::Object, _file: &DriverFile<Self>) -> Result { Ok(()) } /// Close a handle to an existing object, associated with a File. - fn close( - _obj: &<<T as IntoGEMObject>::Driver as drm::Driver>::Object, - _file: &drm::File<<<T as IntoGEMObject>::Driver as drm::Driver>::File>, - ) { - } + fn close(_obj: &<Self::Driver as drm::Driver>::Object, _file: &DriverFile<Self>) {} } /// Trait that represents a GEM object subtype pub trait IntoGEMObject: Sized + super::private::Sealed + AlwaysRefCounted { - /// Owning driver for this type - type Driver: drm::Driver; - /// Returns a reference to the raw `drm_gem_object` structure, which must be valid as long as /// this owning object is valid. fn as_raw(&self) -> *mut bindings::drm_gem_object; @@ -75,25 +75,16 @@ unsafe impl<T: IntoGEMObject> AlwaysRefCounted for T { } } -/// Trait which must be implemented by drivers using base GEM objects. -pub trait DriverObject: BaseDriverObject<Object<Self>> { - /// Parent `Driver` for this object. - type Driver: drm::Driver; -} - -extern "C" fn open_callback<T: BaseDriverObject<U>, U: BaseObject>( +extern "C" fn open_callback<T: DriverObject>( raw_obj: *mut bindings::drm_gem_object, raw_file: *mut bindings::drm_file, ) -> core::ffi::c_int { // SAFETY: `open_callback` is only ever called with a valid pointer to a `struct drm_file`. - let file = unsafe { - drm::File::<<<U as IntoGEMObject>::Driver as drm::Driver>::File>::from_raw(raw_file) - }; - // SAFETY: `open_callback` is specified in the AllocOps structure for `Object<T>`, ensuring that - // `raw_obj` is indeed contained within a `Object<T>`. - let obj = unsafe { - <<<U as IntoGEMObject>::Driver as drm::Driver>::Object as IntoGEMObject>::from_raw(raw_obj) - }; + let file = unsafe { DriverFile::<T>::from_raw(raw_file) }; + + // SAFETY: `open_callback` is specified in the AllocOps structure for `DriverObject<T>`, + // ensuring that `raw_obj` is contained within a `DriverObject<T>` + let obj = unsafe { <<T::Driver as drm::Driver>::Object as IntoGEMObject>::from_raw(raw_obj) }; match T::open(obj, file) { Err(e) => e.to_errno(), @@ -101,26 +92,21 @@ extern "C" fn open_callback<T: BaseDriverObject<U>, U: BaseObject>( } } -extern "C" fn close_callback<T: BaseDriverObject<U>, U: BaseObject>( +extern "C" fn close_callback<T: DriverObject>( raw_obj: *mut bindings::drm_gem_object, raw_file: *mut bindings::drm_file, ) { // SAFETY: `open_callback` is only ever called with a valid pointer to a `struct drm_file`. - let file = unsafe { - drm::File::<<<U as IntoGEMObject>::Driver as drm::Driver>::File>::from_raw(raw_file) - }; + let file = unsafe { DriverFile::<T>::from_raw(raw_file) }; + // SAFETY: `close_callback` is specified in the AllocOps structure for `Object<T>`, ensuring // that `raw_obj` is indeed contained within a `Object<T>`. - let obj = unsafe { - <<<U as IntoGEMObject>::Driver as drm::Driver>::Object as IntoGEMObject>::from_raw(raw_obj) - }; + let obj = unsafe { <<T::Driver as drm::Driver>::Object as IntoGEMObject>::from_raw(raw_obj) }; T::close(obj, file); } impl<T: DriverObject> IntoGEMObject for Object<T> { - type Driver = T::Driver; - fn as_raw(&self) -> *mut bindings::drm_gem_object { self.obj.get() } @@ -142,10 +128,12 @@ pub trait BaseObject: IntoGEMObject { /// Creates a new handle for the object associated with a given `File` /// (or returns an existing one). - fn create_handle( - &self, - file: &drm::File<<<Self as IntoGEMObject>::Driver as drm::Driver>::File>, - ) -> Result<u32> { + fn create_handle<D, F>(&self, file: &drm::File<F>) -> Result<u32> + where + Self: AllocImpl<Driver = D>, + D: drm::Driver<Object = Self, File = F>, + F: drm::file::DriverFile<Driver = D>, + { let mut handle: u32 = 0; // SAFETY: The arguments are all valid per the type invariants. to_result(unsafe { @@ -155,10 +143,12 @@ pub trait BaseObject: IntoGEMObject { } /// Looks up an object by its handle for a given `File`. - fn lookup_handle( - file: &drm::File<<<Self as IntoGEMObject>::Driver as drm::Driver>::File>, - handle: u32, - ) -> Result<ARef<Self>> { + fn lookup_handle<D, F>(file: &drm::File<F>, handle: u32) -> Result<ARef<Self>> + where + Self: AllocImpl<Driver = D>, + D: drm::Driver<Object = Self, File = F>, + F: drm::file::DriverFile<Driver = D>, + { // SAFETY: The arguments are all valid per the type invariants. let ptr = unsafe { bindings::drm_gem_object_lookup(file.as_raw().cast(), handle) }; if ptr.is_null() { @@ -208,13 +198,10 @@ pub struct Object<T: DriverObject + Send + Sync> { } impl<T: DriverObject> Object<T> { - /// The size of this object's structure. - pub const SIZE: usize = mem::size_of::<Self>(); - const OBJECT_FUNCS: bindings::drm_gem_object_funcs = bindings::drm_gem_object_funcs { free: Some(Self::free_callback), - open: Some(open_callback::<T, Object<T>>), - close: Some(close_callback::<T, Object<T>>), + open: Some(open_callback::<T>), + close: Some(close_callback::<T>), print_info: None, export: None, pin: None, @@ -297,6 +284,8 @@ impl<T: DriverObject> Deref for Object<T> { } impl<T: DriverObject> AllocImpl for Object<T> { + type Driver = T::Driver; + const ALLOC_OPS: AllocOps = AllocOps { gem_create_object: None, prime_handle_to_fd: None, diff --git a/rust/kernel/lib.rs b/rust/kernel/lib.rs index fef97f2a5098..44e4b8853ff3 100644 --- a/rust/kernel/lib.rs +++ b/rust/kernel/lib.rs @@ -18,6 +18,7 @@ // // Stable since Rust 1.79.0. #![feature(inline_const)] +#![feature(pointer_is_aligned)] // // Stable since Rust 1.81.0. #![feature(lint_reasons)] @@ -113,6 +114,7 @@ pub mod print; pub mod rbtree; pub mod regulator; pub mod revocable; +pub mod scatterlist; pub mod security; pub mod seq_file; pub mod sizes; diff --git a/rust/kernel/page.rs b/rust/kernel/page.rs index 7c1b17246ed5..75ef096075cb 100644 --- a/rust/kernel/page.rs +++ b/rust/kernel/page.rs @@ -9,7 +9,12 @@ use crate::{ error::Result, uaccess::UserSliceReader, }; -use core::ptr::{self, NonNull}; +use core::{ + marker::PhantomData, + mem::ManuallyDrop, + ops::Deref, + ptr::{self, NonNull}, +}; /// A bitwise shift for the page size. pub const PAGE_SHIFT: usize = bindings::PAGE_SHIFT as usize; @@ -30,6 +35,86 @@ pub const fn page_align(addr: usize) -> usize { (addr + (PAGE_SIZE - 1)) & PAGE_MASK } +/// Representation of a non-owning reference to a [`Page`]. +/// +/// This type provides a borrowed version of a [`Page`] that is owned by some other entity, e.g. a +/// [`Vmalloc`] allocation such as [`VBox`]. +/// +/// # Example +/// +/// ``` +/// # use kernel::{bindings, prelude::*}; +/// use kernel::page::{BorrowedPage, Page, PAGE_SIZE}; +/// # use core::{mem::MaybeUninit, ptr, ptr::NonNull }; +/// +/// fn borrow_page<'a>(vbox: &'a mut VBox<MaybeUninit<[u8; PAGE_SIZE]>>) -> BorrowedPage<'a> { +/// let ptr = ptr::from_ref(&**vbox); +/// +/// // SAFETY: `ptr` is a valid pointer to `Vmalloc` memory. +/// let page = unsafe { bindings::vmalloc_to_page(ptr.cast()) }; +/// +/// // SAFETY: `vmalloc_to_page` returns a valid pointer to a `struct page` for a valid +/// // pointer to `Vmalloc` memory. +/// let page = unsafe { NonNull::new_unchecked(page) }; +/// +/// // SAFETY: +/// // - `self.0` is a valid pointer to a `struct page`. +/// // - `self.0` is valid for the entire lifetime of `self`. +/// unsafe { BorrowedPage::from_raw(page) } +/// } +/// +/// let mut vbox = VBox::<[u8; PAGE_SIZE]>::new_uninit(GFP_KERNEL)?; +/// let page = borrow_page(&mut vbox); +/// +/// // SAFETY: There is no concurrent read or write to this page. +/// unsafe { page.fill_zero_raw(0, PAGE_SIZE)? }; +/// # Ok::<(), Error>(()) +/// ``` +/// +/// # Invariants +/// +/// The borrowed underlying pointer to a `struct page` is valid for the entire lifetime `'a`. +/// +/// [`VBox`]: kernel::alloc::VBox +/// [`Vmalloc`]: kernel::alloc::allocator::Vmalloc +pub struct BorrowedPage<'a>(ManuallyDrop<Page>, PhantomData<&'a Page>); + +impl<'a> BorrowedPage<'a> { + /// Constructs a [`BorrowedPage`] from a raw pointer to a `struct page`. + /// + /// # Safety + /// + /// - `ptr` must point to a valid `bindings::page`. + /// - `ptr` must remain valid for the entire lifetime `'a`. + pub unsafe fn from_raw(ptr: NonNull<bindings::page>) -> Self { + let page = Page { page: ptr }; + + // INVARIANT: The safety requirements guarantee that `ptr` is valid for the entire lifetime + // `'a`. + Self(ManuallyDrop::new(page), PhantomData) + } +} + +impl<'a> Deref for BorrowedPage<'a> { + type Target = Page; + + fn deref(&self) -> &Self::Target { + &self.0 + } +} + +/// Trait to be implemented by types which provide an [`Iterator`] implementation of +/// [`BorrowedPage`] items, such as [`VmallocPageIter`](kernel::alloc::allocator::VmallocPageIter). +pub trait AsPageIter { + /// The [`Iterator`] type, e.g. [`VmallocPageIter`](kernel::alloc::allocator::VmallocPageIter). + type Iter<'a>: Iterator<Item = BorrowedPage<'a>> + where + Self: 'a; + + /// Returns an [`Iterator`] of [`BorrowedPage`] items over all pages owned by `self`. + fn page_iter(&mut self) -> Self::Iter<'_>; +} + /// A pointer to a page that owns the page allocation. /// /// # Invariants diff --git a/rust/kernel/scatterlist.rs b/rust/kernel/scatterlist.rs new file mode 100644 index 000000000000..9709dff60b5a --- /dev/null +++ b/rust/kernel/scatterlist.rs @@ -0,0 +1,491 @@ +// SPDX-License-Identifier: GPL-2.0 + +//! Abstractions for scatter-gather lists. +//! +//! C header: [`include/linux/scatterlist.h`](srctree/include/linux/scatterlist.h) +//! +//! Scatter-gather (SG) I/O is a memory access technique that allows devices to perform DMA +//! operations on data buffers that are not physically contiguous in memory. It works by creating a +//! "scatter-gather list", an array where each entry specifies the address and length of a +//! physically contiguous memory segment. +//! +//! The device's DMA controller can then read this list and process the segments sequentially as +//! part of one logical I/O request. This avoids the need for a single, large, physically contiguous +//! memory buffer, which can be difficult or impossible to allocate. +//! +//! This module provides safe Rust abstractions over the kernel's `struct scatterlist` and +//! `struct sg_table` types. +//! +//! The main entry point is the [`SGTable`] type, which represents a complete scatter-gather table. +//! It can be either: +//! +//! - An owned table ([`SGTable<Owned<P>>`]), created from a Rust memory buffer (e.g., [`VVec`]). +//! This type manages the allocation of the `struct sg_table`, the DMA mapping of the buffer, and +//! the automatic cleanup of all resources. +//! - A borrowed reference (&[`SGTable`]), which provides safe, read-only access to a table that was +//! allocated by other (e.g., C) code. +//! +//! Individual entries in the table are represented by [`SGEntry`], which can be accessed by +//! iterating over an [`SGTable`]. + +use crate::{ + alloc, + alloc::allocator::VmallocPageIter, + bindings, + device::{Bound, Device}, + devres::Devres, + dma, error, + io::resource::ResourceSize, + page, + prelude::*, + types::{ARef, Opaque}, +}; +use core::{ops::Deref, ptr::NonNull}; + +/// A single entry in a scatter-gather list. +/// +/// An `SGEntry` represents a single, physically contiguous segment of memory that has been mapped +/// for DMA. +/// +/// Instances of this struct are obtained by iterating over an [`SGTable`]. Drivers do not create +/// or own [`SGEntry`] objects directly. +#[repr(transparent)] +pub struct SGEntry(Opaque<bindings::scatterlist>); + +// SAFETY: `SGEntry` can be sent to any task. +unsafe impl Send for SGEntry {} + +// SAFETY: `SGEntry` has no interior mutability and can be accessed concurrently. +unsafe impl Sync for SGEntry {} + +impl SGEntry { + /// Convert a raw `struct scatterlist *` to a `&'a SGEntry`. + /// + /// # Safety + /// + /// Callers must ensure that the `struct scatterlist` pointed to by `ptr` is valid for the + /// lifetime `'a`. + #[inline] + unsafe fn from_raw<'a>(ptr: *mut bindings::scatterlist) -> &'a Self { + // SAFETY: The safety requirements of this function guarantee that `ptr` is a valid pointer + // to a `struct scatterlist` for the duration of `'a`. + unsafe { &*ptr.cast() } + } + + /// Obtain the raw `struct scatterlist *`. + #[inline] + fn as_raw(&self) -> *mut bindings::scatterlist { + self.0.get() + } + + /// Returns the DMA address of this SG entry. + /// + /// This is the address that the device should use to access the memory segment. + #[inline] + pub fn dma_address(&self) -> dma::DmaAddress { + // SAFETY: `self.as_raw()` is a valid pointer to a `struct scatterlist`. + unsafe { bindings::sg_dma_address(self.as_raw()) } + } + + /// Returns the length of this SG entry in bytes. + #[inline] + pub fn dma_len(&self) -> ResourceSize { + #[allow(clippy::useless_conversion)] + // SAFETY: `self.as_raw()` is a valid pointer to a `struct scatterlist`. + unsafe { bindings::sg_dma_len(self.as_raw()) }.into() + } +} + +/// The borrowed generic type of an [`SGTable`], representing a borrowed or externally managed +/// table. +#[repr(transparent)] +pub struct Borrowed(Opaque<bindings::sg_table>); + +// SAFETY: `Borrowed` can be sent to any task. +unsafe impl Send for Borrowed {} + +// SAFETY: `Borrowed` has no interior mutability and can be accessed concurrently. +unsafe impl Sync for Borrowed {} + +/// A scatter-gather table. +/// +/// This struct is a wrapper around the kernel's `struct sg_table`. It manages a list of DMA-mapped +/// memory segments that can be passed to a device for I/O operations. +/// +/// The generic parameter `T` is used as a generic type to distinguish between owned and borrowed +/// tables. +/// +/// - [`SGTable<Owned>`]: An owned table created and managed entirely by Rust code. It handles +/// allocation, DMA mapping, and cleanup of all associated resources. See [`SGTable::new`]. +/// - [`SGTable<Borrowed>`} (or simply [`SGTable`]): Represents a table whose lifetime is managed +/// externally. It can be used safely via a borrowed reference `&'a SGTable`, where `'a` is the +/// external lifetime. +/// +/// All [`SGTable`] variants can be iterated over the individual [`SGEntry`]s. +#[repr(transparent)] +#[pin_data] +pub struct SGTable<T: private::Sealed = Borrowed> { + #[pin] + inner: T, +} + +impl SGTable { + /// Creates a borrowed `&'a SGTable` from a raw `struct sg_table` pointer. + /// + /// This allows safe access to an `sg_table` that is managed elsewhere (for example, in C code). + /// + /// # Safety + /// + /// Callers must ensure that: + /// + /// - the `struct sg_table` pointed to by `ptr` is valid for the entire lifetime of `'a`, + /// - the data behind `ptr` is not modified concurrently for the duration of `'a`. + #[inline] + pub unsafe fn from_raw<'a>(ptr: *mut bindings::sg_table) -> &'a Self { + // SAFETY: The safety requirements of this function guarantee that `ptr` is a valid pointer + // to a `struct sg_table` for the duration of `'a`. + unsafe { &*ptr.cast() } + } + + #[inline] + fn as_raw(&self) -> *mut bindings::sg_table { + self.inner.0.get() + } + + /// Returns an [`SGTableIter`] bound to the lifetime of `self`. + pub fn iter(&self) -> SGTableIter<'_> { + // SAFETY: `self.as_raw()` is a valid pointer to a `struct sg_table`. + let nents = unsafe { (*self.as_raw()).nents }; + + let pos = if nents > 0 { + // SAFETY: `self.as_raw()` is a valid pointer to a `struct sg_table`. + let ptr = unsafe { (*self.as_raw()).sgl }; + + // SAFETY: `ptr` is guaranteed to be a valid pointer to a `struct scatterlist`. + Some(unsafe { SGEntry::from_raw(ptr) }) + } else { + None + }; + + SGTableIter { pos, nents } + } +} + +/// Represents the DMA mapping state of a `struct sg_table`. +/// +/// This is used as an inner type of [`Owned`] to manage the DMA mapping lifecycle. +/// +/// # Invariants +/// +/// - `sgt` is a valid pointer to a `struct sg_table` for the entire lifetime of the +/// [`DmaMappedSgt`]. +/// - `sgt` is always DMA mapped. +struct DmaMappedSgt { + sgt: NonNull<bindings::sg_table>, + dev: ARef<Device>, + dir: dma::DataDirection, +} + +// SAFETY: `DmaMappedSgt` can be sent to any task. +unsafe impl Send for DmaMappedSgt {} + +// SAFETY: `DmaMappedSgt` has no interior mutability and can be accessed concurrently. +unsafe impl Sync for DmaMappedSgt {} + +impl DmaMappedSgt { + /// # Safety + /// + /// - `sgt` must be a valid pointer to a `struct sg_table` for the entire lifetime of the + /// returned [`DmaMappedSgt`]. + /// - The caller must guarantee that `sgt` remains DMA mapped for the entire lifetime of + /// [`DmaMappedSgt`]. + unsafe fn new( + sgt: NonNull<bindings::sg_table>, + dev: &Device<Bound>, + dir: dma::DataDirection, + ) -> Result<Self> { + // SAFETY: + // - `dev.as_raw()` is a valid pointer to a `struct device`, which is guaranteed to be + // bound to a driver for the duration of this call. + // - `sgt` is a valid pointer to a `struct sg_table`. + error::to_result(unsafe { + bindings::dma_map_sgtable(dev.as_raw(), sgt.as_ptr(), dir.into(), 0) + })?; + + // INVARIANT: By the safety requirements of this function it is guaranteed that `sgt` is + // valid for the entire lifetime of this object instance. + Ok(Self { + sgt, + dev: dev.into(), + dir, + }) + } +} + +impl Drop for DmaMappedSgt { + #[inline] + fn drop(&mut self) { + // SAFETY: + // - `self.dev.as_raw()` is a pointer to a valid `struct device`. + // - `self.dev` is the same device the mapping has been created for in `Self::new()`. + // - `self.sgt.as_ptr()` is a valid pointer to a `struct sg_table` by the type invariants + // of `Self`. + // - `self.dir` is the same `dma::DataDirection` the mapping has been created with in + // `Self::new()`. + unsafe { + bindings::dma_unmap_sgtable(self.dev.as_raw(), self.sgt.as_ptr(), self.dir.into(), 0) + }; + } +} + +/// A transparent wrapper around a `struct sg_table`. +/// +/// While we could also create the `struct sg_table` in the constructor of [`Owned`], we can't tear +/// down the `struct sg_table` in [`Owned::drop`]; the drop order in [`Owned`] matters. +#[repr(transparent)] +struct RawSGTable(Opaque<bindings::sg_table>); + +// SAFETY: `RawSGTable` can be sent to any task. +unsafe impl Send for RawSGTable {} + +// SAFETY: `RawSGTable` has no interior mutability and can be accessed concurrently. +unsafe impl Sync for RawSGTable {} + +impl RawSGTable { + /// # Safety + /// + /// - `pages` must be a slice of valid `struct page *`. + /// - The pages pointed to by `pages` must remain valid for the entire lifetime of the returned + /// [`RawSGTable`]. + unsafe fn new( + pages: &mut [*mut bindings::page], + size: usize, + max_segment: u32, + flags: alloc::Flags, + ) -> Result<Self> { + // `sg_alloc_table_from_pages_segment()` expects at least one page, otherwise it + // produces a NPE. + if pages.is_empty() { + return Err(EINVAL); + } + + let sgt = Opaque::zeroed(); + // SAFETY: + // - `sgt.get()` is a valid pointer to uninitialized memory. + // - As by the check above, `pages` is not empty. + error::to_result(unsafe { + bindings::sg_alloc_table_from_pages_segment( + sgt.get(), + pages.as_mut_ptr(), + pages.len().try_into()?, + 0, + size, + max_segment, + flags.as_raw(), + ) + })?; + + Ok(Self(sgt)) + } + + #[inline] + fn as_raw(&self) -> *mut bindings::sg_table { + self.0.get() + } +} + +impl Drop for RawSGTable { + #[inline] + fn drop(&mut self) { + // SAFETY: `sgt` is a valid and initialized `struct sg_table`. + unsafe { bindings::sg_free_table(self.0.get()) }; + } +} + +/// The [`Owned`] generic type of an [`SGTable`]. +/// +/// A [`SGTable<Owned>`] signifies that the [`SGTable`] owns all associated resources: +/// +/// - The backing memory pages. +/// - The `struct sg_table` allocation (`sgt`). +/// - The DMA mapping, managed through a [`Devres`]-managed `DmaMappedSgt`. +/// +/// Users interact with this type through the [`SGTable`] handle and do not need to manage +/// [`Owned`] directly. +#[pin_data] +pub struct Owned<P> { + // Note: The drop order is relevant; we first have to unmap the `struct sg_table`, then free the + // `struct sg_table` and finally free the backing pages. + #[pin] + dma: Devres<DmaMappedSgt>, + sgt: RawSGTable, + _pages: P, +} + +// SAFETY: `Owned` can be sent to any task if `P` can be send to any task. +unsafe impl<P: Send> Send for Owned<P> {} + +// SAFETY: `Owned` has no interior mutability and can be accessed concurrently if `P` can be +// accessed concurrently. +unsafe impl<P: Sync> Sync for Owned<P> {} + +impl<P> Owned<P> +where + for<'a> P: page::AsPageIter<Iter<'a> = VmallocPageIter<'a>> + 'static, +{ + fn new( + dev: &Device<Bound>, + mut pages: P, + dir: dma::DataDirection, + flags: alloc::Flags, + ) -> Result<impl PinInit<Self, Error> + '_> { + let page_iter = pages.page_iter(); + let size = page_iter.size(); + + let mut page_vec: KVec<*mut bindings::page> = + KVec::with_capacity(page_iter.page_count(), flags)?; + + for page in page_iter { + page_vec.push(page.as_ptr(), flags)?; + } + + // `dma_max_mapping_size` returns `size_t`, but `sg_alloc_table_from_pages_segment()` takes + // an `unsigned int`. + // + // SAFETY: `dev.as_raw()` is a valid pointer to a `struct device`. + let max_segment = match unsafe { bindings::dma_max_mapping_size(dev.as_raw()) } { + 0 => u32::MAX, + max_segment => u32::try_from(max_segment).unwrap_or(u32::MAX), + }; + + Ok(try_pin_init!(&this in Self { + // SAFETY: + // - `page_vec` is a `KVec` of valid `struct page *` obtained from `pages`. + // - The pages contained in `pages` remain valid for the entire lifetime of the + // `RawSGTable`. + sgt: unsafe { RawSGTable::new(&mut page_vec, size, max_segment, flags) }?, + dma <- { + // SAFETY: `this` is a valid pointer to uninitialized memory. + let sgt = unsafe { &raw mut (*this.as_ptr()).sgt }.cast(); + + // SAFETY: `sgt` is guaranteed to be non-null. + let sgt = unsafe { NonNull::new_unchecked(sgt) }; + + // SAFETY: + // - It is guaranteed that the object returned by `DmaMappedSgt::new` won't out-live + // `sgt`. + // - `sgt` is never DMA unmapped manually. + Devres::new(dev, unsafe { DmaMappedSgt::new(sgt, dev, dir) }) + }, + _pages: pages, + })) + } +} + +impl<P> SGTable<Owned<P>> +where + for<'a> P: page::AsPageIter<Iter<'a> = VmallocPageIter<'a>> + 'static, +{ + /// Allocates a new scatter-gather table from the given pages and maps it for DMA. + /// + /// This constructor creates a new [`SGTable<Owned>`] that takes ownership of `P`. + /// It allocates a `struct sg_table`, populates it with entries corresponding to the physical + /// pages of `P`, and maps the table for DMA with the specified [`Device`] and + /// [`dma::DataDirection`]. + /// + /// The DMA mapping is managed through [`Devres`], ensuring that the DMA mapping is unmapped + /// once the associated [`Device`] is unbound, or when the [`SGTable<Owned>`] is dropped. + /// + /// # Parameters + /// + /// * `dev`: The [`Device`] that will be performing the DMA. + /// * `pages`: The entity providing the backing pages. It must implement [`page::AsPageIter`]. + /// The ownership of this entity is moved into the new [`SGTable<Owned>`]. + /// * `dir`: The [`dma::DataDirection`] of the DMA transfer. + /// * `flags`: Allocation flags for internal allocations (e.g., [`GFP_KERNEL`]). + /// + /// # Examples + /// + /// ``` + /// use kernel::{ + /// device::{Bound, Device}, + /// dma, page, + /// prelude::*, + /// scatterlist::{SGTable, Owned}, + /// }; + /// + /// fn test(dev: &Device<Bound>) -> Result { + /// let size = 4 * page::PAGE_SIZE; + /// let pages = VVec::<u8>::with_capacity(size, GFP_KERNEL)?; + /// + /// let sgt = KBox::pin_init(SGTable::new( + /// dev, + /// pages, + /// dma::DataDirection::ToDevice, + /// GFP_KERNEL, + /// ), GFP_KERNEL)?; + /// + /// Ok(()) + /// } + /// ``` + pub fn new( + dev: &Device<Bound>, + pages: P, + dir: dma::DataDirection, + flags: alloc::Flags, + ) -> impl PinInit<Self, Error> + '_ { + try_pin_init!(Self { + inner <- Owned::new(dev, pages, dir, flags)? + }) + } +} + +impl<P> Deref for SGTable<Owned<P>> { + type Target = SGTable; + + #[inline] + fn deref(&self) -> &Self::Target { + // SAFETY: + // - `self.inner.sgt.as_raw()` is a valid pointer to a `struct sg_table` for the entire + // lifetime of `self`. + // - The backing `struct sg_table` is not modified for the entire lifetime of `self`. + unsafe { SGTable::from_raw(self.inner.sgt.as_raw()) } + } +} + +mod private { + pub trait Sealed {} + + impl Sealed for super::Borrowed {} + impl<P> Sealed for super::Owned<P> {} +} + +/// An [`Iterator`] over the DMA mapped [`SGEntry`] items of an [`SGTable`]. +/// +/// Note that the existence of an [`SGTableIter`] does not guarantee that the [`SGEntry`] items +/// actually remain DMA mapped; they are prone to be unmapped on device unbind. +pub struct SGTableIter<'a> { + pos: Option<&'a SGEntry>, + /// The number of DMA mapped entries in a `struct sg_table`. + nents: c_uint, +} + +impl<'a> Iterator for SGTableIter<'a> { + type Item = &'a SGEntry; + + fn next(&mut self) -> Option<Self::Item> { + let entry = self.pos?; + self.nents = self.nents.saturating_sub(1); + + // SAFETY: `entry.as_raw()` is a valid pointer to a `struct scatterlist`. + let next = unsafe { bindings::sg_next(entry.as_raw()) }; + + self.pos = (!next.is_null() && self.nents > 0).then(|| { + // SAFETY: If `next` is not NULL, `sg_next()` guarantees to return a valid pointer to + // the next `struct scatterlist`. + unsafe { SGEntry::from_raw(next) } + }); + + Some(entry) + } +} diff --git a/rust/kernel/transmute.rs b/rust/kernel/transmute.rs index 1c7d43771a37..cfc37d81adf2 100644 --- a/rust/kernel/transmute.rs +++ b/rust/kernel/transmute.rs @@ -2,6 +2,8 @@ //! Traits for transmuting types. +use core::mem::size_of; + /// Types for which any bit pattern is valid. /// /// Not all types are valid for all values. For example, a `bool` must be either zero or one, so @@ -9,10 +11,93 @@ /// /// It's okay for the type to have padding, as initializing those bytes has no effect. /// +/// # Examples +/// +/// ``` +/// use kernel::transmute::FromBytes; +/// +/// # fn test() -> Option<()> { +/// let raw = [1, 2, 3, 4]; +/// +/// let result = u32::from_bytes(&raw)?; +/// +/// #[cfg(target_endian = "little")] +/// assert_eq!(*result, 0x4030201); +/// +/// #[cfg(target_endian = "big")] +/// assert_eq!(*result, 0x1020304); +/// +/// # Some(()) } +/// # test().ok_or(EINVAL)?; +/// # Ok::<(), Error>(()) +/// ``` +/// /// # Safety /// /// All bit-patterns must be valid for this type. This type must not have interior mutability. -pub unsafe trait FromBytes {} +pub unsafe trait FromBytes { + /// Converts a slice of bytes to a reference to `Self`. + /// + /// Succeeds if the reference is properly aligned, and the size of `bytes` is equal to that of + /// `T` and different from zero. + /// + /// Otherwise, returns [`None`]. + fn from_bytes(bytes: &[u8]) -> Option<&Self> + where + Self: Sized, + { + let slice_ptr = bytes.as_ptr().cast::<Self>(); + let size = size_of::<Self>(); + + #[allow(clippy::incompatible_msrv)] + if bytes.len() == size && slice_ptr.is_aligned() { + // SAFETY: Size and alignment were just checked. + unsafe { Some(&*slice_ptr) } + } else { + None + } + } + + /// Converts a mutable slice of bytes to a reference to `Self`. + /// + /// Succeeds if the reference is properly aligned, and the size of `bytes` is equal to that of + /// `T` and different from zero. + /// + /// Otherwise, returns [`None`]. + fn from_bytes_mut(bytes: &mut [u8]) -> Option<&mut Self> + where + Self: AsBytes + Sized, + { + let slice_ptr = bytes.as_mut_ptr().cast::<Self>(); + let size = size_of::<Self>(); + + #[allow(clippy::incompatible_msrv)] + if bytes.len() == size && slice_ptr.is_aligned() { + // SAFETY: Size and alignment were just checked. + unsafe { Some(&mut *slice_ptr) } + } else { + None + } + } + + /// Creates an owned instance of `Self` by copying `bytes`. + /// + /// Unlike [`FromBytes::from_bytes`], which requires aligned input, this method can be used on + /// non-aligned data at the cost of a copy. + fn from_bytes_copy(bytes: &[u8]) -> Option<Self> + where + Self: Sized, + { + if bytes.len() == size_of::<Self>() { + // SAFETY: we just verified that `bytes` has the same size as `Self`, and per the + // invariants of `FromBytes`, any byte sequence of the correct length is a valid value + // for `Self`. + Some(unsafe { core::ptr::read_unaligned(bytes.as_ptr().cast::<Self>()) }) + } else { + None + } + } +} macro_rules! impl_frombytes { ($($({$($generics:tt)*})? $t:ty, )*) => { @@ -47,7 +132,32 @@ impl_frombytes! { /// /// Values of this type may not contain any uninitialized bytes. This type must not have interior /// mutability. -pub unsafe trait AsBytes {} +pub unsafe trait AsBytes { + /// Returns `self` as a slice of bytes. + fn as_bytes(&self) -> &[u8] { + // CAST: `Self` implements `AsBytes` thus all bytes of `self` are initialized. + let data = core::ptr::from_ref(self).cast::<u8>(); + let len = core::mem::size_of_val(self); + + // SAFETY: `data` is non-null and valid for reads of `len * sizeof::<u8>()` bytes. + unsafe { core::slice::from_raw_parts(data, len) } + } + + /// Returns `self` as a mutable slice of bytes. + fn as_bytes_mut(&mut self) -> &mut [u8] + where + Self: FromBytes, + { + // CAST: `Self` implements both `AsBytes` and `FromBytes` thus making `Self` + // bi-directionally transmutable to `[u8; size_of_val(self)]`. + let data = core::ptr::from_mut(self).cast::<u8>(); + let len = core::mem::size_of_val(self); + + // SAFETY: `data` is non-null and valid for read and writes of `len * sizeof::<u8>()` + // bytes. + unsafe { core::slice::from_raw_parts_mut(data, len) } + } +} macro_rules! impl_asbytes { ($($({$($generics:tt)*})? $t:ty, )*) => { diff --git a/rust/kernel/workqueue.rs b/rust/kernel/workqueue.rs index b9343d5bc00f..706e833e9702 100644 --- a/rust/kernel/workqueue.rs +++ b/rust/kernel/workqueue.rs @@ -356,18 +356,11 @@ struct ClosureWork<T> { func: Option<T>, } -impl<T> ClosureWork<T> { - fn project(self: Pin<&mut Self>) -> &mut Option<T> { - // SAFETY: The `func` field is not structurally pinned. - unsafe { &mut self.get_unchecked_mut().func } - } -} - impl<T: FnOnce()> WorkItem for ClosureWork<T> { type Pointer = Pin<KBox<Self>>; fn run(mut this: Pin<KBox<Self>>) { - if let Some(func) = this.as_mut().project().take() { + if let Some(func) = this.as_mut().project().func.take() { (func)() } } diff --git a/rust/pin-init/README.md b/rust/pin-init/README.md index a4c01a8d78b2..723e275445d4 100644 --- a/rust/pin-init/README.md +++ b/rust/pin-init/README.md @@ -6,6 +6,18 @@  # `pin-init` +> [!NOTE] +> +> This crate was originally named [`pinned-init`], but the migration to +> `pin-init` is not yet complete. The `legcay` branch contains the current +> version of the `pinned-init` crate & the `main` branch already incorporates +> the rename to `pin-init`. +> +> There are still some changes needed on the kernel side before the migration +> can be completed. + +[`pinned-init`]: https://crates.io/crates/pinned-init + <!-- cargo-rdme start --> Library to safely and fallibly initialize pinned `struct`s using in-place constructors. diff --git a/rust/pin-init/examples/error.rs b/rust/pin-init/examples/error.rs index e0cc258746ce..8f4e135eb8ba 100644 --- a/rust/pin-init/examples/error.rs +++ b/rust/pin-init/examples/error.rs @@ -24,4 +24,6 @@ impl From<AllocError> for Error { } #[allow(dead_code)] -fn main() {} +fn main() { + let _ = Error; +} diff --git a/rust/pin-init/src/lib.rs b/rust/pin-init/src/lib.rs index 62e013a5cc20..dd553212836e 100644 --- a/rust/pin-init/src/lib.rs +++ b/rust/pin-init/src/lib.rs @@ -740,6 +740,8 @@ macro_rules! stack_try_pin_init { /// As already mentioned in the examples above, inside of `pin_init!` a `struct` initializer with /// the following modifications is expected: /// - Fields that you want to initialize in-place have to use `<-` instead of `:`. +/// - You can use `_: { /* run any user-code here */ },` anywhere where you can place fields in +/// order to run arbitrary code. /// - In front of the initializer you can write `&this in` to have access to a [`NonNull<Self>`] /// pointer named `this` inside of the initializer. /// - Using struct update syntax one can place `..Zeroable::init_zeroed()` at the very end of the @@ -994,7 +996,7 @@ macro_rules! try_init { /// } /// /// impl<T> Foo<T> { -/// fn project(self: Pin<&mut Self>) -> Pin<&mut T> { +/// fn project_this(self: Pin<&mut Self>) -> Pin<&mut T> { /// assert_pinned!(Foo<T>, elem, T, inline); /// /// // SAFETY: The field is structurally pinned. diff --git a/rust/pin-init/src/macros.rs b/rust/pin-init/src/macros.rs index 9ced630737b8..d6acf2cd291e 100644 --- a/rust/pin-init/src/macros.rs +++ b/rust/pin-init/src/macros.rs @@ -831,6 +831,17 @@ macro_rules! __pin_data { $($fields)* } + $crate::__pin_data!(make_pin_projections: + @vis($vis), + @name($name), + @impl_generics($($impl_generics)*), + @ty_generics($($ty_generics)*), + @decl_generics($($decl_generics)*), + @where($($whr)*), + @pinned($($pinned)*), + @not_pinned($($not_pinned)*), + ); + // We put the rest into this const item, because it then will not be accessible to anything // outside. const _: () = { @@ -980,6 +991,56 @@ macro_rules! __pin_data { stringify!($($rest)*), ); }; + (make_pin_projections: + @vis($vis:vis), + @name($name:ident), + @impl_generics($($impl_generics:tt)*), + @ty_generics($($ty_generics:tt)*), + @decl_generics($($decl_generics:tt)*), + @where($($whr:tt)*), + @pinned($($(#[$($p_attr:tt)*])* $pvis:vis $p_field:ident : $p_type:ty),* $(,)?), + @not_pinned($($(#[$($attr:tt)*])* $fvis:vis $field:ident : $type:ty),* $(,)?), + ) => { + $crate::macros::paste! { + #[doc(hidden)] + $vis struct [< $name Projection >] <'__pin, $($decl_generics)*> { + $($(#[$($p_attr)*])* $pvis $p_field : ::core::pin::Pin<&'__pin mut $p_type>,)* + $($(#[$($attr)*])* $fvis $field : &'__pin mut $type,)* + ___pin_phantom_data: ::core::marker::PhantomData<&'__pin mut ()>, + } + + impl<$($impl_generics)*> $name<$($ty_generics)*> + where $($whr)* + { + /// Pin-projects all fields of `Self`. + /// + /// These fields are structurally pinned: + $(#[doc = ::core::concat!(" - `", ::core::stringify!($p_field), "`")])* + /// + /// These fields are **not** structurally pinned: + $(#[doc = ::core::concat!(" - `", ::core::stringify!($field), "`")])* + #[inline] + $vis fn project<'__pin>( + self: ::core::pin::Pin<&'__pin mut Self>, + ) -> [< $name Projection >] <'__pin, $($ty_generics)*> { + // SAFETY: we only give access to `&mut` for fields not structurally pinned. + let this = unsafe { ::core::pin::Pin::get_unchecked_mut(self) }; + [< $name Projection >] { + $( + // SAFETY: `$p_field` is structurally pinned. + $(#[$($p_attr)*])* + $p_field : unsafe { ::core::pin::Pin::new_unchecked(&mut this.$p_field) }, + )* + $( + $(#[$($attr)*])* + $field : &mut this.$field, + )* + ___pin_phantom_data: ::core::marker::PhantomData, + } + } + } + } + }; (make_pin_data: @pin_data($pin_data:ident), @impl_generics($($impl_generics:tt)*), @@ -988,38 +1049,56 @@ macro_rules! __pin_data { @pinned($($(#[$($p_attr:tt)*])* $pvis:vis $p_field:ident : $p_type:ty),* $(,)?), @not_pinned($($(#[$($attr:tt)*])* $fvis:vis $field:ident : $type:ty),* $(,)?), ) => { - // For every field, we create a projection function according to its projection type. If a - // field is structurally pinned, then it must be initialized via `PinInit`, if it is not - // structurally pinned, then it can be initialized via `Init`. - // - // The functions are `unsafe` to prevent accidentally calling them. - #[allow(dead_code)] - #[expect(clippy::missing_safety_doc)] - impl<$($impl_generics)*> $pin_data<$($ty_generics)*> - where $($whr)* - { - $( - $(#[$($p_attr)*])* - $pvis unsafe fn $p_field<E>( - self, - slot: *mut $p_type, - init: impl $crate::PinInit<$p_type, E>, - ) -> ::core::result::Result<(), E> { - // SAFETY: TODO. - unsafe { $crate::PinInit::__pinned_init(init, slot) } - } - )* - $( - $(#[$($attr)*])* - $fvis unsafe fn $field<E>( - self, - slot: *mut $type, - init: impl $crate::Init<$type, E>, - ) -> ::core::result::Result<(), E> { - // SAFETY: TODO. - unsafe { $crate::Init::__init(init, slot) } - } - )* + $crate::macros::paste! { + // For every field, we create a projection function according to its projection type. If a + // field is structurally pinned, then it must be initialized via `PinInit`, if it is not + // structurally pinned, then it can be initialized via `Init`. + // + // The functions are `unsafe` to prevent accidentally calling them. + #[allow(dead_code)] + #[expect(clippy::missing_safety_doc)] + impl<$($impl_generics)*> $pin_data<$($ty_generics)*> + where $($whr)* + { + $( + $(#[$($p_attr)*])* + $pvis unsafe fn $p_field<E>( + self, + slot: *mut $p_type, + init: impl $crate::PinInit<$p_type, E>, + ) -> ::core::result::Result<(), E> { + // SAFETY: TODO. + unsafe { $crate::PinInit::__pinned_init(init, slot) } + } + + $(#[$($p_attr)*])* + $pvis unsafe fn [<__project_ $p_field>]<'__slot>( + self, + slot: &'__slot mut $p_type, + ) -> ::core::pin::Pin<&'__slot mut $p_type> { + ::core::pin::Pin::new_unchecked(slot) + } + )* + $( + $(#[$($attr)*])* + $fvis unsafe fn $field<E>( + self, + slot: *mut $type, + init: impl $crate::Init<$type, E>, + ) -> ::core::result::Result<(), E> { + // SAFETY: TODO. + unsafe { $crate::Init::__init(init, slot) } + } + + $(#[$($attr)*])* + $fvis unsafe fn [<__project_ $field>]<'__slot>( + self, + slot: &'__slot mut $type, + ) -> &'__slot mut $type { + slot + } + )* + } } }; } @@ -1202,6 +1281,21 @@ macro_rules! __init_internal { // have been initialized. Therefore we can now dismiss the guards by forgetting them. $(::core::mem::forget($guards);)* }; + (init_slot($($use_data:ident)?): + @data($data:ident), + @slot($slot:ident), + @guards($($guards:ident,)*), + // arbitrary code block + @munch_fields(_: { $($code:tt)* }, $($rest:tt)*), + ) => { + { $($code)* } + $crate::__init_internal!(init_slot($($use_data)?): + @data($data), + @slot($slot), + @guards($($guards,)*), + @munch_fields($($rest)*), + ); + }; (init_slot($use_data:ident): // `use_data` is present, so we use the `data` to init fields. @data($data:ident), @slot($slot:ident), @@ -1216,6 +1310,13 @@ macro_rules! __init_internal { // return when an error/panic occurs. // We also use the `data` to require the correct trait (`Init` or `PinInit`) for `$field`. unsafe { $data.$field(::core::ptr::addr_of_mut!((*$slot).$field), init)? }; + // SAFETY: + // - the project function does the correct field projection, + // - the field has been initialized, + // - the reference is only valid until the end of the initializer. + #[allow(unused_variables)] + let $field = $crate::macros::paste!(unsafe { $data.[< __project_ $field >](&mut (*$slot).$field) }); + // Create the drop guard: // // We rely on macro hygiene to make it impossible for users to access this local variable. @@ -1247,6 +1348,14 @@ macro_rules! __init_internal { // SAFETY: `slot` is valid, because we are inside of an initializer closure, we // return when an error/panic occurs. unsafe { $crate::Init::__init(init, ::core::ptr::addr_of_mut!((*$slot).$field))? }; + + // SAFETY: + // - the field is not structurally pinned, since the line above must compile, + // - the field has been initialized, + // - the reference is only valid until the end of the initializer. + #[allow(unused_variables)] + let $field = unsafe { &mut (*$slot).$field }; + // Create the drop guard: // // We rely on macro hygiene to make it impossible for users to access this local variable. @@ -1265,7 +1374,48 @@ macro_rules! __init_internal { ); } }; - (init_slot($($use_data:ident)?): + (init_slot(): // No `use_data`, so all fields are not structurally pinned + @data($data:ident), + @slot($slot:ident), + @guards($($guards:ident,)*), + // Init by-value. + @munch_fields($field:ident $(: $val:expr)?, $($rest:tt)*), + ) => { + { + $(let $field = $val;)? + // Initialize the field. + // + // SAFETY: The memory at `slot` is uninitialized. + unsafe { ::core::ptr::write(::core::ptr::addr_of_mut!((*$slot).$field), $field) }; + } + + #[allow(unused_variables)] + // SAFETY: + // - the field is not structurally pinned, since no `use_data` was required to create this + // initializer, + // - the field has been initialized, + // - the reference is only valid until the end of the initializer. + let $field = unsafe { &mut (*$slot).$field }; + + // Create the drop guard: + // + // We rely on macro hygiene to make it impossible for users to access this local variable. + // We use `paste!` to create new hygiene for `$field`. + $crate::macros::paste! { + // SAFETY: We forget the guard later when initialization has succeeded. + let [< __ $field _guard >] = unsafe { + $crate::__internal::DropGuard::new(::core::ptr::addr_of_mut!((*$slot).$field)) + }; + + $crate::__init_internal!(init_slot(): + @data($data), + @slot($slot), + @guards([< __ $field _guard >], $($guards,)*), + @munch_fields($($rest)*), + ); + } + }; + (init_slot($use_data:ident): @data($data:ident), @slot($slot:ident), @guards($($guards:ident,)*), @@ -1279,6 +1429,13 @@ macro_rules! __init_internal { // SAFETY: The memory at `slot` is uninitialized. unsafe { ::core::ptr::write(::core::ptr::addr_of_mut!((*$slot).$field), $field) }; } + // SAFETY: + // - the project function does the correct field projection, + // - the field has been initialized, + // - the reference is only valid until the end of the initializer. + #[allow(unused_variables)] + let $field = $crate::macros::paste!(unsafe { $data.[< __project_ $field >](&mut (*$slot).$field) }); + // Create the drop guard: // // We rely on macro hygiene to make it impossible for users to access this local variable. @@ -1289,7 +1446,7 @@ macro_rules! __init_internal { $crate::__internal::DropGuard::new(::core::ptr::addr_of_mut!((*$slot).$field)) }; - $crate::__init_internal!(init_slot($($use_data)?): + $crate::__init_internal!(init_slot($use_data): @data($data), @slot($slot), @guards([< __ $field _guard >], $($guards,)*), @@ -1300,6 +1457,20 @@ macro_rules! __init_internal { (make_initializer: @slot($slot:ident), @type_name($t:path), + @munch_fields(_: { $($code:tt)* }, $($rest:tt)*), + @acc($($acc:tt)*), + ) => { + // code blocks are ignored for the initializer check + $crate::__init_internal!(make_initializer: + @slot($slot), + @type_name($t), + @munch_fields($($rest)*), + @acc($($acc)*), + ); + }; + (make_initializer: + @slot($slot:ident), + @type_name($t:path), @munch_fields(..Zeroable::init_zeroed() $(,)?), @acc($($acc:tt)*), ) => { diff --git a/rust/uapi/uapi_helper.h b/rust/uapi/uapi_helper.h index 1409441359f5..d4a239cf2a64 100644 --- a/rust/uapi/uapi_helper.h +++ b/rust/uapi/uapi_helper.h @@ -9,6 +9,7 @@ #include <uapi/asm-generic/ioctl.h> #include <uapi/drm/drm.h> #include <uapi/drm/nova_drm.h> +#include <uapi/drm/panthor_drm.h> #include <uapi/linux/mdio.h> #include <uapi/linux/mii.h> #include <uapi/linux/ethtool.h> diff --git a/samples/rust/rust_dma.rs b/samples/rust/rust_dma.rs index c5e7cce68654..04007e29fd85 100644 --- a/samples/rust/rust_dma.rs +++ b/samples/rust/rust_dma.rs @@ -7,15 +7,19 @@ use kernel::{ bindings, device::Core, - dma::{CoherentAllocation, Device, DmaMask}, - pci, + dma::{CoherentAllocation, DataDirection, Device, DmaMask}, + page, pci, prelude::*, + scatterlist::{Owned, SGTable}, types::ARef, }; +#[pin_data(PinnedDrop)] struct DmaSampleDriver { pdev: ARef<pci::Device>, ca: CoherentAllocation<MyStruct>, + #[pin] + sgt: SGTable<Owned<VVec<u8>>>, } const TEST_VALUES: [(u32, u32); 5] = [ @@ -70,21 +74,30 @@ impl pci::Driver for DmaSampleDriver { kernel::dma_write!(ca[i] = MyStruct::new(value.0, value.1))?; } - let drvdata = KBox::new( - Self { + let size = 4 * page::PAGE_SIZE; + let pages = VVec::with_capacity(size, GFP_KERNEL)?; + + let sgt = SGTable::new(pdev.as_ref(), pages, DataDirection::ToDevice, GFP_KERNEL); + + let drvdata = KBox::pin_init( + try_pin_init!(Self { pdev: pdev.into(), ca, - }, + sgt <- sgt, + }), GFP_KERNEL, )?; - Ok(drvdata.into()) + Ok(drvdata) } } -impl Drop for DmaSampleDriver { - fn drop(&mut self) { - dev_info!(self.pdev.as_ref(), "Unload DMA test driver.\n"); +#[pinned_drop] +impl PinnedDrop for DmaSampleDriver { + fn drop(self: Pin<&mut Self>) { + let dev = self.pdev.as_ref(); + + dev_info!(dev, "Unload DMA test driver.\n"); for (i, value) in TEST_VALUES.into_iter().enumerate() { let val0 = kernel::dma_read!(self.ca[i].h); @@ -99,6 +112,10 @@ impl Drop for DmaSampleDriver { assert_eq!(val1, value.1); } } + + for (i, entry) in self.sgt.iter().enumerate() { + dev_info!(dev, "Entry[{}]: DMA address: {:#x}", i, entry.dma_address()); + } } } diff --git a/samples/rust/rust_driver_pci.rs b/samples/rust/rust_driver_pci.rs index 606946ff4d7f..1ac0b06fa3b3 100644 --- a/samples/rust/rust_driver_pci.rs +++ b/samples/rust/rust_driver_pci.rs @@ -78,8 +78,8 @@ impl pci::Driver for SampleDriver { let drvdata = KBox::pin_init( try_pin_init!(Self { - pdev: pdev.into(), bar <- pdev.iomap_region_sized::<{ Regs::END }>(0, c_str!("rust_driver_pci")), + pdev: pdev.into(), index: *info, }), GFP_KERNEL, |