summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorDave Airlie <airlied@redhat.com>2017-03-31 11:47:18 +1000
committerDave Airlie <airlied@redhat.com>2017-03-31 11:47:18 +1000
commit8cd3ac52963f2e99f4c21d1c9ce89531ce66c2d6 (patch)
tree94f7d4526fe19a32643308d6e00d0fc5442af277
parent8bcad07a45637fb88e799466e4eee83859e8ffd3 (diff)
parent60508d3df2d2052881190ac82802a12cabcef53c (diff)
Merge branch 'drm-next-4.12' of git://people.freedesktop.org/~agd5f/linux into drm-next
New stuff for 4.12: - Preliminary vega10 support - Support for multi-level page tables - GPU sensor stuff for mesa - job tracing improvements - PRT support for sparse buffers - Additional SR-IOV improvements - ttm improvements - misc bug fixes and code cleanups * 'drm-next-4.12' of git://people.freedesktop.org/~agd5f/linux: (315 commits) drm/amdgpu: Fix 32bit x86 compilation warning drm/amdgpu: just disallow reading untouched registers drm/amdgpu: remove duplicate allowed reg CP_CPF_BUSY_STAT drm/amdgpu/soc15: enable psp block for SRIOV drm/amdgpu/soc15: bypass pp block for vf drm/amdgpu/psp: add check sOS sign drm/amd/amdgpu: Correct ring wptr address in debugfs (v2) drm/amdgpu: Fix multi-level page table bugs for large BOs v3 drm/amdgpu: Fix Vega10 VM initialization drm/amdgpu: Make max_pfn 64-bit drm/amdgpu: drop GB_GPU_ID from the golden settings drm/amdgpu: fix vm pte pde flags to 64-bit for sdma (v3) drm/amd/amdgpu: fix Tonga S3 resume hang on rhel6.8 drm/ttm: decrease ttm bo priority number drm/amd/amdgpu: fix performance drop when VRAM pressure drm/amdgpu: Couple small warning fixes drm/amdgpu: Clean up GFX 9 VM fault messages drm/amdgpu: Register UTCL2 as a source of VM faults drm/amdgpu/soc15: drop support for reading some registers drm/amdgpu/soc15: return cached values for some registers (v2) ...
-rw-r--r--drivers/gpu/drm/amd/amdgpu/Makefile27
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu.h307
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_afmt.c4
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_atombios.c28
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_atombios.h3
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_atomfirmware.c112
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_atomfirmware.h33
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_atpx_handler.c4
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_bios.c30
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_cgs.c41
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c50
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_device.c483
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_dpm.c70
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_dpm.h32
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c43
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_fb.c8
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_gart.c5
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c122
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c21
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_ih.h51
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_irq.c114
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_irq.h10
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c146
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_object.c31
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_pm.c243
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_powerplay.c7
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c481
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_psp.h127
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_ring.c61
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h29
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_test.c81
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_trace.h94
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c55
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_ucode.c112
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_ucode.h19
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_uvd.c58
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_uvd.h29
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_vce.c21
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_vce.h25
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c20
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_virt.h13
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c1014
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h56
-rw-r--r--drivers/gpu/drm/amd/amdgpu/atom.c90
-rw-r--r--drivers/gpu/drm/amd/amdgpu/atom.h2
-rw-r--r--drivers/gpu/drm/amd/amdgpu/ci_dpm.c305
-rw-r--r--drivers/gpu/drm/amd/amdgpu/ci_dpm.h7
-rw-r--r--drivers/gpu/drm/amd/amdgpu/cik.c8
-rw-r--r--drivers/gpu/drm/amd/amdgpu/cik_ih.c3
-rw-r--r--drivers/gpu/drm/amd/amdgpu/cik_sdma.c31
-rw-r--r--drivers/gpu/drm/amd/amdgpu/cikd.h2
-rw-r--r--drivers/gpu/drm/amd/amdgpu/clearstate_gfx9.h941
-rw-r--r--drivers/gpu/drm/amd/amdgpu/cz_ih.c3
-rw-r--r--drivers/gpu/drm/amd/amdgpu/dce_v10_0.c16
-rw-r--r--drivers/gpu/drm/amd/amdgpu/dce_v11_0.c16
-rw-r--r--drivers/gpu/drm/amd/amdgpu/dce_v6_0.c16
-rw-r--r--drivers/gpu/drm/amd/amdgpu/dce_v8_0.c16
-rw-r--r--drivers/gpu/drm/amd/amdgpu/dce_virtual.c8
-rw-r--r--drivers/gpu/drm/amd/amdgpu/gfx_v6_0.c31
-rw-r--r--drivers/gpu/drm/amd/amdgpu/gfx_v7_0.c60
-rw-r--r--drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c473
-rw-r--r--drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c4140
-rw-r--r--drivers/gpu/drm/amd/amdgpu/gfx_v9_0.h35
-rw-r--r--drivers/gpu/drm/amd/amdgpu/gfxhub_v1_0.c458
-rw-r--r--drivers/gpu/drm/amd/amdgpu/gfxhub_v1_0.h35
-rw-r--r--drivers/gpu/drm/amd/amdgpu/gmc_v6_0.c81
-rw-r--r--drivers/gpu/drm/amd/amdgpu/gmc_v7_0.c99
-rw-r--r--drivers/gpu/drm/amd/amdgpu/gmc_v8_0.c208
-rw-r--r--drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c842
-rw-r--r--drivers/gpu/drm/amd/amdgpu/gmc_v9_0.h30
-rw-r--r--drivers/gpu/drm/amd/amdgpu/iceland_ih.c3
-rw-r--r--drivers/gpu/drm/amd/amdgpu/kv_dpm.c40
-rw-r--r--drivers/gpu/drm/amd/amdgpu/mmhub_v1_0.c615
-rw-r--r--drivers/gpu/drm/amd/amdgpu/mmhub_v1_0.h35
-rw-r--r--drivers/gpu/drm/amd/amdgpu/mmsch_v1_0.h87
-rw-r--r--drivers/gpu/drm/amd/amdgpu/mxgpu_ai.c207
-rw-r--r--drivers/gpu/drm/amd/amdgpu/mxgpu_ai.h47
-rw-r--r--drivers/gpu/drm/amd/amdgpu/mxgpu_vi.c102
-rw-r--r--drivers/gpu/drm/amd/amdgpu/mxgpu_vi.h2
-rw-r--r--drivers/gpu/drm/amd/amdgpu/nbio_v6_1.c266
-rw-r--r--drivers/gpu/drm/amd/amdgpu/nbio_v6_1.h54
-rw-r--r--drivers/gpu/drm/amd/amdgpu/psp_gfx_if.h269
-rw-r--r--drivers/gpu/drm/amd/amdgpu/psp_v3_1.c521
-rw-r--r--drivers/gpu/drm/amd/amdgpu/psp_v3_1.h50
-rw-r--r--drivers/gpu/drm/amd/amdgpu/sdma_v2_4.c34
-rw-r--r--drivers/gpu/drm/amd/amdgpu/sdma_v3_0.c49
-rw-r--r--drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c1616
-rw-r--r--drivers/gpu/drm/amd/amdgpu/sdma_v4_0.h30
-rw-r--r--drivers/gpu/drm/amd/amdgpu/si.c7
-rw-r--r--drivers/gpu/drm/amd/amdgpu/si_dma.c22
-rw-r--r--drivers/gpu/drm/amd/amdgpu/si_dpm.c45
-rw-r--r--drivers/gpu/drm/amd/amdgpu/si_ih.c3
-rw-r--r--drivers/gpu/drm/amd/amdgpu/soc15.c871
-rw-r--r--drivers/gpu/drm/amd/amdgpu/soc15.h35
-rw-r--r--drivers/gpu/drm/amd/amdgpu/soc15_common.h57
-rw-r--r--drivers/gpu/drm/amd/amdgpu/soc15d.h288
-rw-r--r--drivers/gpu/drm/amd/amdgpu/tonga_ih.c5
-rw-r--r--drivers/gpu/drm/amd/amdgpu/uvd_v4_2.c11
-rw-r--r--drivers/gpu/drm/amd/amdgpu/uvd_v5_0.c11
-rw-r--r--drivers/gpu/drm/amd/amdgpu/uvd_v6_0.c20
-rw-r--r--drivers/gpu/drm/amd/amdgpu/uvd_v7_0.c1543
-rw-r--r--drivers/gpu/drm/amd/amdgpu/uvd_v7_0.h29
-rw-r--r--drivers/gpu/drm/amd/amdgpu/vce_v2_0.c69
-rw-r--r--drivers/gpu/drm/amd/amdgpu/vce_v3_0.c56
-rw-r--r--drivers/gpu/drm/amd/amdgpu/vce_v4_0.c1141
-rw-r--r--drivers/gpu/drm/amd/amdgpu/vce_v4_0.h29
-rw-r--r--drivers/gpu/drm/amd/amdgpu/vega10_ih.c424
-rw-r--r--drivers/gpu/drm/amd/amdgpu/vega10_ih.h30
-rw-r--r--drivers/gpu/drm/amd/amdgpu/vega10_sdma_pkt_open.h3335
-rw-r--r--drivers/gpu/drm/amd/amdgpu/vi.c67
-rw-r--r--drivers/gpu/drm/amd/amdgpu/vi.h112
-rw-r--r--drivers/gpu/drm/amd/amdgpu/vid.h1
-rw-r--r--drivers/gpu/drm/amd/include/amd_acpi.h12
-rw-r--r--drivers/gpu/drm/amd/include/amd_pcie_helpers.h4
-rw-r--r--drivers/gpu/drm/amd/include/amd_shared.h20
-rw-r--r--drivers/gpu/drm/amd/include/asic_reg/gmc/gmc_6_0_sh_mask.h4
-rw-r--r--drivers/gpu/drm/amd/include/asic_reg/smu/smu_7_1_2_d.h1
-rw-r--r--drivers/gpu/drm/amd/include/asic_reg/smu/smu_7_1_3_d.h2
-rw-r--r--drivers/gpu/drm/amd/include/asic_reg/vega10/ATHUB/athub_1_0_default.h241
-rw-r--r--drivers/gpu/drm/amd/include/asic_reg/vega10/ATHUB/athub_1_0_offset.h453
-rw-r--r--drivers/gpu/drm/amd/include/asic_reg/vega10/ATHUB/athub_1_0_sh_mask.h2045
-rw-r--r--drivers/gpu/drm/amd/include/asic_reg/vega10/DC/dce_12_0_default.h9868
-rw-r--r--drivers/gpu/drm/amd/include/asic_reg/vega10/DC/dce_12_0_offset.h18193
-rw-r--r--drivers/gpu/drm/amd/include/asic_reg/vega10/DC/dce_12_0_sh_mask.h64636
-rw-r--r--drivers/gpu/drm/amd/include/asic_reg/vega10/GC/gc_9_0_default.h3873
-rw-r--r--drivers/gpu/drm/amd/include/asic_reg/vega10/GC/gc_9_0_offset.h7230
-rw-r--r--drivers/gpu/drm/amd/include/asic_reg/vega10/GC/gc_9_0_sh_mask.h29868
-rw-r--r--drivers/gpu/drm/amd/include/asic_reg/vega10/HDP/hdp_4_0_default.h117
-rw-r--r--drivers/gpu/drm/amd/include/asic_reg/vega10/HDP/hdp_4_0_offset.h209
-rw-r--r--drivers/gpu/drm/amd/include/asic_reg/vega10/HDP/hdp_4_0_sh_mask.h601
-rw-r--r--drivers/gpu/drm/amd/include/asic_reg/vega10/MMHUB/mmhub_1_0_default.h1011
-rw-r--r--drivers/gpu/drm/amd/include/asic_reg/vega10/MMHUB/mmhub_1_0_offset.h1967
-rw-r--r--drivers/gpu/drm/amd/include/asic_reg/vega10/MMHUB/mmhub_1_0_sh_mask.h10127
-rw-r--r--drivers/gpu/drm/amd/include/asic_reg/vega10/MP/mp_9_0_default.h342
-rw-r--r--drivers/gpu/drm/amd/include/asic_reg/vega10/MP/mp_9_0_offset.h375
-rw-r--r--drivers/gpu/drm/amd/include/asic_reg/vega10/MP/mp_9_0_sh_mask.h1463
-rw-r--r--drivers/gpu/drm/amd/include/asic_reg/vega10/NBIF/nbif_6_1_default.h1271
-rw-r--r--drivers/gpu/drm/amd/include/asic_reg/vega10/NBIF/nbif_6_1_offset.h1688
-rw-r--r--drivers/gpu/drm/amd/include/asic_reg/vega10/NBIF/nbif_6_1_sh_mask.h10281
-rw-r--r--drivers/gpu/drm/amd/include/asic_reg/vega10/NBIO/nbio_6_1_default.h22340
-rw-r--r--drivers/gpu/drm/amd/include/asic_reg/vega10/NBIO/nbio_6_1_offset.h3649
-rw-r--r--drivers/gpu/drm/amd/include/asic_reg/vega10/NBIO/nbio_6_1_sh_mask.h133884
-rw-r--r--drivers/gpu/drm/amd/include/asic_reg/vega10/OSSSYS/osssys_4_0_default.h176
-rw-r--r--drivers/gpu/drm/amd/include/asic_reg/vega10/OSSSYS/osssys_4_0_offset.h327
-rw-r--r--drivers/gpu/drm/amd/include/asic_reg/vega10/OSSSYS/osssys_4_0_sh_mask.h1196
-rw-r--r--drivers/gpu/drm/amd/include/asic_reg/vega10/SDMA0/sdma0_4_0_default.h286
-rw-r--r--drivers/gpu/drm/amd/include/asic_reg/vega10/SDMA0/sdma0_4_0_offset.h547
-rw-r--r--drivers/gpu/drm/amd/include/asic_reg/vega10/SDMA0/sdma0_4_0_sh_mask.h1852
-rw-r--r--drivers/gpu/drm/amd/include/asic_reg/vega10/SDMA1/sdma1_4_0_default.h282
-rw-r--r--drivers/gpu/drm/amd/include/asic_reg/vega10/SDMA1/sdma1_4_0_offset.h539
-rw-r--r--drivers/gpu/drm/amd/include/asic_reg/vega10/SDMA1/sdma1_4_0_sh_mask.h1810
-rw-r--r--drivers/gpu/drm/amd/include/asic_reg/vega10/SMUIO/smuio_9_0_default.h100
-rw-r--r--drivers/gpu/drm/amd/include/asic_reg/vega10/SMUIO/smuio_9_0_offset.h175
-rw-r--r--drivers/gpu/drm/amd/include/asic_reg/vega10/SMUIO/smuio_9_0_sh_mask.h258
-rw-r--r--drivers/gpu/drm/amd/include/asic_reg/vega10/THM/thm_9_0_default.h194
-rw-r--r--drivers/gpu/drm/amd/include/asic_reg/vega10/THM/thm_9_0_offset.h363
-rw-r--r--drivers/gpu/drm/amd/include/asic_reg/vega10/THM/thm_9_0_sh_mask.h1314
-rw-r--r--drivers/gpu/drm/amd/include/asic_reg/vega10/UVD/uvd_7_0_default.h127
-rw-r--r--drivers/gpu/drm/amd/include/asic_reg/vega10/UVD/uvd_7_0_offset.h222
-rw-r--r--drivers/gpu/drm/amd/include/asic_reg/vega10/UVD/uvd_7_0_sh_mask.h811
-rw-r--r--drivers/gpu/drm/amd/include/asic_reg/vega10/VCE/vce_4_0_default.h122
-rw-r--r--drivers/gpu/drm/amd/include/asic_reg/vega10/VCE/vce_4_0_offset.h208
-rw-r--r--drivers/gpu/drm/amd/include/asic_reg/vega10/VCE/vce_4_0_sh_mask.h488
-rw-r--r--drivers/gpu/drm/amd/include/asic_reg/vega10/soc15ip.h1343
-rw-r--r--drivers/gpu/drm/amd/include/asic_reg/vega10/vega10_enum.h22531
-rw-r--r--drivers/gpu/drm/amd/include/atomfirmware.h2385
-rw-r--r--drivers/gpu/drm/amd/include/atomfirmwareid.h86
-rw-r--r--drivers/gpu/drm/amd/include/cgs_linux.h19
-rw-r--r--drivers/gpu/drm/amd/include/displayobject.h249
-rw-r--r--drivers/gpu/drm/amd/include/dm_pp_interface.h83
-rw-r--r--drivers/gpu/drm/amd/include/ivsrcid/ivsrcid_vislands30.h99
-rw-r--r--drivers/gpu/drm/amd/include/v9_structs.h743
-rw-r--r--drivers/gpu/drm/amd/include/vi_structs.h106
-rw-r--r--drivers/gpu/drm/amd/powerplay/amd_powerplay.c439
-rw-r--r--drivers/gpu/drm/amd/powerplay/hwmgr/Makefile6
-rw-r--r--drivers/gpu/drm/amd/powerplay/hwmgr/cz_hwmgr.c34
-rw-r--r--drivers/gpu/drm/amd/powerplay/hwmgr/hardwaremanager.c73
-rw-r--r--drivers/gpu/drm/amd/powerplay/hwmgr/hwmgr.c9
-rw-r--r--drivers/gpu/drm/amd/powerplay/hwmgr/hwmgr_ppt.h16
-rw-r--r--drivers/gpu/drm/amd/powerplay/hwmgr/ppatomfwctrl.c396
-rw-r--r--drivers/gpu/drm/amd/powerplay/hwmgr/ppatomfwctrl.h140
-rw-r--r--drivers/gpu/drm/amd/powerplay/hwmgr/smu7_hwmgr.c170
-rw-r--r--drivers/gpu/drm/amd/powerplay/hwmgr/vega10_hwmgr.c4450
-rw-r--r--drivers/gpu/drm/amd/powerplay/hwmgr/vega10_hwmgr.h434
-rw-r--r--drivers/gpu/drm/amd/powerplay/hwmgr/vega10_inc.h44
-rw-r--r--drivers/gpu/drm/amd/powerplay/hwmgr/vega10_powertune.c137
-rw-r--r--drivers/gpu/drm/amd/powerplay/hwmgr/vega10_powertune.h65
-rw-r--r--drivers/gpu/drm/amd/powerplay/hwmgr/vega10_pptable.h331
-rw-r--r--drivers/gpu/drm/amd/powerplay/hwmgr/vega10_processpptables.c1056
-rw-r--r--drivers/gpu/drm/amd/powerplay/hwmgr/vega10_processpptables.h34
-rw-r--r--drivers/gpu/drm/amd/powerplay/hwmgr/vega10_thermal.c761
-rw-r--r--drivers/gpu/drm/amd/powerplay/hwmgr/vega10_thermal.h83
-rw-r--r--drivers/gpu/drm/amd/powerplay/inc/amd_powerplay.h46
-rw-r--r--drivers/gpu/drm/amd/powerplay/inc/hardwaremanager.h43
-rw-r--r--drivers/gpu/drm/amd/powerplay/inc/hwmgr.h137
-rw-r--r--drivers/gpu/drm/amd/powerplay/inc/pp_debug.h2
-rw-r--r--drivers/gpu/drm/amd/powerplay/inc/pp_instance.h1
-rw-r--r--drivers/gpu/drm/amd/powerplay/inc/pp_soc15.h48
-rw-r--r--drivers/gpu/drm/amd/powerplay/inc/smu7_ppsmc.h1
-rw-r--r--drivers/gpu/drm/amd/powerplay/inc/smu9.h147
-rw-r--r--drivers/gpu/drm/amd/powerplay/inc/smu9_driver_if.h418
-rw-r--r--drivers/gpu/drm/amd/powerplay/inc/smumgr.h8
-rw-r--r--drivers/gpu/drm/amd/powerplay/inc/vega10_ppsmc.h131
-rw-r--r--drivers/gpu/drm/amd/powerplay/smumgr/Makefile2
-rw-r--r--drivers/gpu/drm/amd/powerplay/smumgr/fiji_smc.c103
-rw-r--r--drivers/gpu/drm/amd/powerplay/smumgr/fiji_smc.h3
-rw-r--r--drivers/gpu/drm/amd/powerplay/smumgr/fiji_smumgr.c1
-rw-r--r--drivers/gpu/drm/amd/powerplay/smumgr/iceland_smc.c14
-rw-r--r--drivers/gpu/drm/amd/powerplay/smumgr/polaris10_smc.c68
-rw-r--r--drivers/gpu/drm/amd/powerplay/smumgr/polaris10_smc.h2
-rw-r--r--drivers/gpu/drm/amd/powerplay/smumgr/polaris10_smumgr.c1
-rw-r--r--drivers/gpu/drm/amd/powerplay/smumgr/smumgr.c19
-rw-r--r--drivers/gpu/drm/amd/powerplay/smumgr/tonga_smc.c67
-rw-r--r--drivers/gpu/drm/amd/powerplay/smumgr/tonga_smc.h2
-rw-r--r--drivers/gpu/drm/amd/powerplay/smumgr/tonga_smumgr.c1
-rw-r--r--drivers/gpu/drm/amd/powerplay/smumgr/vega10_smumgr.c564
-rw-r--r--drivers/gpu/drm/amd/powerplay/smumgr/vega10_smumgr.h70
-rw-r--r--drivers/gpu/drm/amd/scheduler/gpu_scheduler.c6
-rw-r--r--drivers/gpu/drm/amd/scheduler/gpu_scheduler.h11
-rw-r--r--drivers/gpu/drm/radeon/atom.c46
-rw-r--r--drivers/gpu/drm/radeon/cik.c63
-rw-r--r--drivers/gpu/drm/radeon/cikd.h2
-rw-r--r--drivers/gpu/drm/radeon/evergreen.c2
-rw-r--r--drivers/gpu/drm/radeon/evergreen_cs.c7
-rw-r--r--drivers/gpu/drm/radeon/ni.c22
-rw-r--r--drivers/gpu/drm/radeon/r100.c18
-rw-r--r--drivers/gpu/drm/radeon/r200.c3
-rw-r--r--drivers/gpu/drm/radeon/r300.c13
-rw-r--r--drivers/gpu/drm/radeon/r420.c9
-rw-r--r--drivers/gpu/drm/radeon/r520.c3
-rw-r--r--drivers/gpu/drm/radeon/r600.c21
-rw-r--r--drivers/gpu/drm/radeon/r600_cs.c7
-rw-r--r--drivers/gpu/drm/radeon/r600_dpm.c71
-rw-r--r--drivers/gpu/drm/radeon/radeon.h3
-rw-r--r--drivers/gpu/drm/radeon/radeon_acpi.h12
-rw-r--r--drivers/gpu/drm/radeon/radeon_atpx_handler.c4
-rw-r--r--drivers/gpu/drm/radeon/radeon_audio.c4
-rw-r--r--drivers/gpu/drm/radeon/radeon_clocks.c2
-rw-r--r--drivers/gpu/drm/radeon/radeon_device.c13
-rw-r--r--drivers/gpu/drm/radeon/radeon_dp_auxch.c3
-rw-r--r--drivers/gpu/drm/radeon/radeon_drv.c6
-rw-r--r--drivers/gpu/drm/radeon/radeon_fb.c3
-rw-r--r--drivers/gpu/drm/radeon/radeon_gem.c4
-rw-r--r--drivers/gpu/drm/radeon/radeon_kms.c42
-rw-r--r--drivers/gpu/drm/radeon/radeon_test.c6
-rw-r--r--drivers/gpu/drm/radeon/rs400.c4
-rw-r--r--drivers/gpu/drm/radeon/rs690.c3
-rw-r--r--drivers/gpu/drm/radeon/rv515.c9
-rw-r--r--drivers/gpu/drm/radeon/si.c45
-rw-r--r--drivers/gpu/drm/ttm/ttm_bo.c67
-rw-r--r--include/drm/ttm/ttm_bo_api.h60
-rw-r--r--include/drm/ttm/ttm_bo_driver.h2
-rw-r--r--include/uapi/drm/amdgpu_drm.h69
253 files changed, 398656 insertions, 1916 deletions
diff --git a/drivers/gpu/drm/amd/amdgpu/Makefile b/drivers/gpu/drm/amd/amdgpu/Makefile
index 2814aad81752..660786aba7d2 100644
--- a/drivers/gpu/drm/amd/amdgpu/Makefile
+++ b/drivers/gpu/drm/amd/amdgpu/Makefile
@@ -24,7 +24,7 @@ amdgpu-y += amdgpu_device.o amdgpu_kms.o \
atombios_encoders.o amdgpu_sa.o atombios_i2c.o \
amdgpu_prime.o amdgpu_vm.o amdgpu_ib.o amdgpu_pll.o \
amdgpu_ucode.o amdgpu_bo_list.o amdgpu_ctx.o amdgpu_sync.o \
- amdgpu_gtt_mgr.o amdgpu_vram_mgr.o amdgpu_virt.o
+ amdgpu_gtt_mgr.o amdgpu_vram_mgr.o amdgpu_virt.o amdgpu_atomfirmware.o
# add asic specific block
amdgpu-$(CONFIG_DRM_AMDGPU_CIK)+= cik.o cik_ih.o kv_smc.o kv_dpm.o \
@@ -34,12 +34,13 @@ amdgpu-$(CONFIG_DRM_AMDGPU_CIK)+= cik.o cik_ih.o kv_smc.o kv_dpm.o \
amdgpu-$(CONFIG_DRM_AMDGPU_SI)+= si.o gmc_v6_0.o gfx_v6_0.o si_ih.o si_dma.o dce_v6_0.o si_dpm.o si_smc.o
amdgpu-y += \
- vi.o mxgpu_vi.o
+ vi.o mxgpu_vi.o nbio_v6_1.o soc15.o mxgpu_ai.o
# add GMC block
amdgpu-y += \
gmc_v7_0.o \
- gmc_v8_0.o
+ gmc_v8_0.o \
+ gfxhub_v1_0.o mmhub_v1_0.o gmc_v9_0.o
# add IH block
amdgpu-y += \
@@ -47,7 +48,13 @@ amdgpu-y += \
amdgpu_ih.o \
iceland_ih.o \
tonga_ih.o \
- cz_ih.o
+ cz_ih.o \
+ vega10_ih.o
+
+# add PSP block
+amdgpu-y += \
+ amdgpu_psp.o \
+ psp_v3_1.o
# add SMC block
amdgpu-y += \
@@ -63,23 +70,27 @@ amdgpu-y += \
# add GFX block
amdgpu-y += \
amdgpu_gfx.o \
- gfx_v8_0.o
+ gfx_v8_0.o \
+ gfx_v9_0.o
# add async DMA block
amdgpu-y += \
sdma_v2_4.o \
- sdma_v3_0.o
+ sdma_v3_0.o \
+ sdma_v4_0.o
# add UVD block
amdgpu-y += \
amdgpu_uvd.o \
uvd_v5_0.o \
- uvd_v6_0.o
+ uvd_v6_0.o \
+ uvd_v7_0.o
# add VCE block
amdgpu-y += \
amdgpu_vce.o \
- vce_v3_0.o
+ vce_v3_0.o \
+ vce_v4_0.o
# add amdkfd interfaces
amdgpu-y += \
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
index c1b913541739..262056778f52 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
@@ -52,6 +52,7 @@
#include "amdgpu_irq.h"
#include "amdgpu_ucode.h"
#include "amdgpu_ttm.h"
+#include "amdgpu_psp.h"
#include "amdgpu_gds.h"
#include "amdgpu_sync.h"
#include "amdgpu_ring.h"
@@ -59,6 +60,8 @@
#include "amd_powerplay.h"
#include "amdgpu_dpm.h"
#include "amdgpu_acp.h"
+#include "amdgpu_uvd.h"
+#include "amdgpu_vce.h"
#include "gpu_scheduler.h"
#include "amdgpu_virt.h"
@@ -79,7 +82,7 @@ extern int amdgpu_pcie_gen2;
extern int amdgpu_msi;
extern int amdgpu_lockup_timeout;
extern int amdgpu_dpm;
-extern int amdgpu_smc_load_fw;
+extern int amdgpu_fw_load_type;
extern int amdgpu_aspm;
extern int amdgpu_runtime_pm;
extern unsigned amdgpu_ip_block_mask;
@@ -101,6 +104,11 @@ extern char *amdgpu_disable_cu;
extern char *amdgpu_virtual_display;
extern unsigned amdgpu_pp_feature_mask;
extern int amdgpu_vram_page_split;
+extern int amdgpu_ngg;
+extern int amdgpu_prim_buf_per_se;
+extern int amdgpu_pos_buf_per_se;
+extern int amdgpu_cntl_sb_buf_per_se;
+extern int amdgpu_param_buf_per_se;
#define AMDGPU_WAIT_IDLE_TIMEOUT_IN_MS 3000
#define AMDGPU_MAX_USEC_TIMEOUT 100000 /* 100 ms */
@@ -109,11 +117,16 @@ extern int amdgpu_vram_page_split;
#define AMDGPU_IB_POOL_SIZE 16
#define AMDGPU_DEBUGFS_MAX_COMPONENTS 32
#define AMDGPUFB_CONN_LIMIT 4
-#define AMDGPU_BIOS_NUM_SCRATCH 8
+#define AMDGPU_BIOS_NUM_SCRATCH 16
/* max number of IP instances */
#define AMDGPU_MAX_SDMA_INSTANCES 2
+/* max number of VMHUB */
+#define AMDGPU_MAX_VMHUBS 2
+#define AMDGPU_MMHUB 0
+#define AMDGPU_GFXHUB 1
+
/* hardcode that limit for now */
#define AMDGPU_VA_RESERVED_SIZE (8 << 20)
@@ -280,7 +293,7 @@ struct amdgpu_vm_pte_funcs {
void (*set_pte_pde)(struct amdgpu_ib *ib,
uint64_t pe,
uint64_t addr, unsigned count,
- uint32_t incr, uint32_t flags);
+ uint32_t incr, uint64_t flags);
};
/* provided by the gmc block */
@@ -293,7 +306,18 @@ struct amdgpu_gart_funcs {
void *cpu_pt_addr, /* cpu addr of page table */
uint32_t gpu_page_idx, /* pte/pde to update */
uint64_t addr, /* addr to write into pte/pde */
- uint32_t flags); /* access flags */
+ uint64_t flags); /* access flags */
+ /* enable/disable PRT support */
+ void (*set_prt)(struct amdgpu_device *adev, bool enable);
+ /* set pte flags based per asic */
+ uint64_t (*get_vm_pte_flags)(struct amdgpu_device *adev,
+ uint32_t flags);
+};
+
+/* provided by the mc block */
+struct amdgpu_mc_funcs {
+ /* adjust mc addr in fb for APU case */
+ u64 (*adjust_mc_addr)(struct amdgpu_device *adev, u64 addr);
};
/* provided by the ih block */
@@ -522,6 +546,10 @@ struct amdgpu_gart {
struct page **pages;
#endif
bool ready;
+
+ /* Asic default pte flags */
+ uint64_t gart_pte_flags;
+
const struct amdgpu_gart_funcs *gart_funcs;
};
@@ -537,10 +565,25 @@ void amdgpu_gart_unbind(struct amdgpu_device *adev, uint64_t offset,
int pages);
int amdgpu_gart_bind(struct amdgpu_device *adev, uint64_t offset,
int pages, struct page **pagelist,
- dma_addr_t *dma_addr, uint32_t flags);
+ dma_addr_t *dma_addr, uint64_t flags);
int amdgpu_ttm_recover_gart(struct amdgpu_device *adev);
/*
+ * VMHUB structures, functions & helpers
+ */
+struct amdgpu_vmhub {
+ uint32_t ctx0_ptb_addr_lo32;
+ uint32_t ctx0_ptb_addr_hi32;
+ uint32_t vm_inv_eng0_req;
+ uint32_t vm_inv_eng0_ack;
+ uint32_t vm_context0_cntl;
+ uint32_t vm_l2_pro_fault_status;
+ uint32_t vm_l2_pro_fault_cntl;
+ uint32_t (*get_invalidate_req)(unsigned int vm_id);
+ uint32_t (*get_vm_protection_bits)(void);
+};
+
+/*
* GPU MC structures, functions & helpers
*/
struct amdgpu_mc {
@@ -567,6 +610,15 @@ struct amdgpu_mc {
uint32_t vram_type;
uint32_t srbm_soft_reset;
struct amdgpu_mode_mc_save save;
+ bool prt_warning;
+ /* apertures */
+ u64 shared_aperture_start;
+ u64 shared_aperture_end;
+ u64 private_aperture_start;
+ u64 private_aperture_end;
+ /* protects concurrent invalidation */
+ spinlock_t invalidate_lock;
+ const struct amdgpu_mc_funcs *mc_funcs;
};
/*
@@ -601,6 +653,83 @@ struct amdgpu_doorbell {
u32 num_doorbells; /* Number of doorbells actually reserved for amdgpu. */
};
+/*
+ * 64bit doorbell, offset are in QWORD, occupy 2KB doorbell space
+ */
+typedef enum _AMDGPU_DOORBELL64_ASSIGNMENT
+{
+ /*
+ * All compute related doorbells: kiq, hiq, diq, traditional compute queue, user queue, should locate in
+ * a continues range so that programming CP_MEC_DOORBELL_RANGE_LOWER/UPPER can cover this range.
+ * Compute related doorbells are allocated from 0x00 to 0x8a
+ */
+
+
+ /* kernel scheduling */
+ AMDGPU_DOORBELL64_KIQ = 0x00,
+
+ /* HSA interface queue and debug queue */
+ AMDGPU_DOORBELL64_HIQ = 0x01,
+ AMDGPU_DOORBELL64_DIQ = 0x02,
+
+ /* Compute engines */
+ AMDGPU_DOORBELL64_MEC_RING0 = 0x03,
+ AMDGPU_DOORBELL64_MEC_RING1 = 0x04,
+ AMDGPU_DOORBELL64_MEC_RING2 = 0x05,
+ AMDGPU_DOORBELL64_MEC_RING3 = 0x06,
+ AMDGPU_DOORBELL64_MEC_RING4 = 0x07,
+ AMDGPU_DOORBELL64_MEC_RING5 = 0x08,
+ AMDGPU_DOORBELL64_MEC_RING6 = 0x09,
+ AMDGPU_DOORBELL64_MEC_RING7 = 0x0a,
+
+ /* User queue doorbell range (128 doorbells) */
+ AMDGPU_DOORBELL64_USERQUEUE_START = 0x0b,
+ AMDGPU_DOORBELL64_USERQUEUE_END = 0x8a,
+
+ /* Graphics engine */
+ AMDGPU_DOORBELL64_GFX_RING0 = 0x8b,
+
+ /*
+ * Other graphics doorbells can be allocated here: from 0x8c to 0xef
+ * Graphics voltage island aperture 1
+ * default non-graphics QWORD index is 0xF0 - 0xFF inclusive
+ */
+
+ /* sDMA engines */
+ AMDGPU_DOORBELL64_sDMA_ENGINE0 = 0xF0,
+ AMDGPU_DOORBELL64_sDMA_HI_PRI_ENGINE0 = 0xF1,
+ AMDGPU_DOORBELL64_sDMA_ENGINE1 = 0xF2,
+ AMDGPU_DOORBELL64_sDMA_HI_PRI_ENGINE1 = 0xF3,
+
+ /* Interrupt handler */
+ AMDGPU_DOORBELL64_IH = 0xF4, /* For legacy interrupt ring buffer */
+ AMDGPU_DOORBELL64_IH_RING1 = 0xF5, /* For page migration request log */
+ AMDGPU_DOORBELL64_IH_RING2 = 0xF6, /* For page migration translation/invalidation log */
+
+ /* VCN engine use 32 bits doorbell */
+ AMDGPU_DOORBELL64_VCN0_1 = 0xF8, /* lower 32 bits for VNC0 and upper 32 bits for VNC1 */
+ AMDGPU_DOORBELL64_VCN2_3 = 0xF9,
+ AMDGPU_DOORBELL64_VCN4_5 = 0xFA,
+ AMDGPU_DOORBELL64_VCN6_7 = 0xFB,
+
+ /* overlap the doorbell assignment with VCN as they are mutually exclusive
+ * VCE engine's doorbell is 32 bit and two VCE ring share one QWORD
+ */
+ AMDGPU_DOORBELL64_RING0_1 = 0xF8,
+ AMDGPU_DOORBELL64_RING2_3 = 0xF9,
+ AMDGPU_DOORBELL64_RING4_5 = 0xFA,
+ AMDGPU_DOORBELL64_RING6_7 = 0xFB,
+
+ AMDGPU_DOORBELL64_UVD_RING0_1 = 0xFC,
+ AMDGPU_DOORBELL64_UVD_RING2_3 = 0xFD,
+ AMDGPU_DOORBELL64_UVD_RING4_5 = 0xFE,
+ AMDGPU_DOORBELL64_UVD_RING6_7 = 0xFF,
+
+ AMDGPU_DOORBELL64_MAX_ASSIGNMENT = 0xFF,
+ AMDGPU_DOORBELL64_INVALID = 0xFFFF
+} AMDGPU_DOORBELL64_ASSIGNMENT;
+
+
void amdgpu_doorbell_get_kfd_info(struct amdgpu_device *adev,
phys_addr_t *aperture_base,
size_t *aperture_size,
@@ -699,6 +828,7 @@ void amdgpu_ctx_mgr_fini(struct amdgpu_ctx_mgr *mgr);
struct amdgpu_fpriv {
struct amdgpu_vm vm;
+ struct amdgpu_bo_va *prt_va;
struct mutex bo_list_lock;
struct idr bo_list_handles;
struct amdgpu_ctx_mgr ctx_mgr;
@@ -776,9 +906,12 @@ struct amdgpu_rlc {
struct amdgpu_mec {
struct amdgpu_bo *hpd_eop_obj;
u64 hpd_eop_gpu_addr;
+ struct amdgpu_bo *mec_fw_obj;
+ u64 mec_fw_gpu_addr;
u32 num_pipe;
u32 num_mec;
u32 num_queue;
+ void *mqd_backup[AMDGPU_MAX_COMPUTE_RINGS + 1];
};
struct amdgpu_kiq {
@@ -810,7 +943,16 @@ struct amdgpu_rb_config {
uint32_t raster_config_1;
};
-struct amdgpu_gca_config {
+struct gb_addr_config {
+ uint16_t pipe_interleave_size;
+ uint8_t num_pipes;
+ uint8_t max_compress_frags;
+ uint8_t num_banks;
+ uint8_t num_se;
+ uint8_t num_rb_per_se;
+};
+
+struct amdgpu_gfx_config {
unsigned max_shader_engines;
unsigned max_tile_pipes;
unsigned max_cu_per_sh;
@@ -839,7 +981,11 @@ struct amdgpu_gca_config {
uint32_t tile_mode_array[32];
uint32_t macrotile_mode_array[16];
+ struct gb_addr_config gb_addr_config_fields;
struct amdgpu_rb_config rb_config[AMDGPU_GFX_MAX_SE][AMDGPU_GFX_MAX_SH_PER_SE];
+
+ /* gfx configure feature */
+ uint32_t double_offchip_lds_buf;
};
struct amdgpu_cu_info {
@@ -857,9 +1003,31 @@ struct amdgpu_gfx_funcs {
void (*read_wave_sgprs)(struct amdgpu_device *adev, uint32_t simd, uint32_t wave, uint32_t start, uint32_t size, uint32_t *dst);
};
+struct amdgpu_ngg_buf {
+ struct amdgpu_bo *bo;
+ uint64_t gpu_addr;
+ uint32_t size;
+ uint32_t bo_size;
+};
+
+enum {
+ PRIM = 0,
+ POS,
+ CNTL,
+ PARAM,
+ NGG_BUF_MAX
+};
+
+struct amdgpu_ngg {
+ struct amdgpu_ngg_buf buf[NGG_BUF_MAX];
+ uint32_t gds_reserve_addr;
+ uint32_t gds_reserve_size;
+ bool init;
+};
+
struct amdgpu_gfx {
struct mutex gpu_clock_mutex;
- struct amdgpu_gca_config config;
+ struct amdgpu_gfx_config config;
struct amdgpu_rlc rlc;
struct amdgpu_mec mec;
struct amdgpu_kiq kiq;
@@ -899,6 +1067,9 @@ struct amdgpu_gfx {
/* reset mask */
uint32_t grbm_soft_reset;
uint32_t srbm_soft_reset;
+ bool in_reset;
+ /* NGG */
+ struct amdgpu_ngg ngg;
};
int amdgpu_ib_get(struct amdgpu_device *adev, struct amdgpu_vm *vm,
@@ -1007,67 +1178,12 @@ struct amdgpu_wb {
int amdgpu_wb_get(struct amdgpu_device *adev, u32 *wb);
void amdgpu_wb_free(struct amdgpu_device *adev, u32 wb);
+int amdgpu_wb_get_64bit(struct amdgpu_device *adev, u32 *wb);
+void amdgpu_wb_free_64bit(struct amdgpu_device *adev, u32 wb);
void amdgpu_get_pcie_info(struct amdgpu_device *adev);
/*
- * UVD
- */
-#define AMDGPU_DEFAULT_UVD_HANDLES 10
-#define AMDGPU_MAX_UVD_HANDLES 40
-#define AMDGPU_UVD_STACK_SIZE (200*1024)
-#define AMDGPU_UVD_HEAP_SIZE (256*1024)
-#define AMDGPU_UVD_SESSION_SIZE (50*1024)
-#define AMDGPU_UVD_FIRMWARE_OFFSET 256
-
-struct amdgpu_uvd {
- struct amdgpu_bo *vcpu_bo;
- void *cpu_addr;
- uint64_t gpu_addr;
- unsigned fw_version;
- void *saved_bo;
- unsigned max_handles;
- atomic_t handles[AMDGPU_MAX_UVD_HANDLES];
- struct drm_file *filp[AMDGPU_MAX_UVD_HANDLES];
- struct delayed_work idle_work;
- const struct firmware *fw; /* UVD firmware */
- struct amdgpu_ring ring;
- struct amdgpu_irq_src irq;
- bool address_64_bit;
- bool use_ctx_buf;
- struct amd_sched_entity entity;
- uint32_t srbm_soft_reset;
-};
-
-/*
- * VCE
- */
-#define AMDGPU_MAX_VCE_HANDLES 16
-#define AMDGPU_VCE_FIRMWARE_OFFSET 256
-
-#define AMDGPU_VCE_HARVEST_VCE0 (1 << 0)
-#define AMDGPU_VCE_HARVEST_VCE1 (1 << 1)
-
-struct amdgpu_vce {
- struct amdgpu_bo *vcpu_bo;
- uint64_t gpu_addr;
- unsigned fw_version;
- unsigned fb_version;
- atomic_t handles[AMDGPU_MAX_VCE_HANDLES];
- struct drm_file *filp[AMDGPU_MAX_VCE_HANDLES];
- uint32_t img_size[AMDGPU_MAX_VCE_HANDLES];
- struct delayed_work idle_work;
- struct mutex idle_mutex;
- const struct firmware *fw; /* VCE firmware */
- struct amdgpu_ring ring[AMDGPU_MAX_VCE_RINGS];
- struct amdgpu_irq_src irq;
- unsigned harvest_config;
- struct amd_sched_entity entity;
- uint32_t srbm_soft_reset;
- unsigned num_rings;
-};
-
-/*
* SDMA
*/
struct amdgpu_sdma_instance {
@@ -1095,11 +1211,22 @@ struct amdgpu_sdma {
/*
* Firmware
*/
+enum amdgpu_firmware_load_type {
+ AMDGPU_FW_LOAD_DIRECT = 0,
+ AMDGPU_FW_LOAD_SMU,
+ AMDGPU_FW_LOAD_PSP,
+};
+
struct amdgpu_firmware {
struct amdgpu_firmware_info ucode[AMDGPU_UCODE_ID_MAXIMUM];
- bool smu_load;
+ enum amdgpu_firmware_load_type load_type;
struct amdgpu_bo *fw_buf;
unsigned int fw_size;
+ unsigned int max_ucodes;
+ /* firmwares are loaded by psp instead of smu from vega10 */
+ const struct amdgpu_psp_funcs *funcs;
+ struct amdgpu_bo *rbuf;
+ struct mutex mutex;
};
/*
@@ -1112,10 +1239,6 @@ void amdgpu_benchmark(struct amdgpu_device *adev, int test_number);
* Testing
*/
void amdgpu_test_moves(struct amdgpu_device *adev);
-void amdgpu_test_ring_sync(struct amdgpu_device *adev,
- struct amdgpu_ring *cpA,
- struct amdgpu_ring *cpB);
-void amdgpu_test_syncing(struct amdgpu_device *adev);
/*
* MMU Notifier
@@ -1202,6 +1325,8 @@ struct amdgpu_asic_funcs {
/* static power management */
int (*get_pcie_lanes)(struct amdgpu_device *adev);
void (*set_pcie_lanes)(struct amdgpu_device *adev, int lanes);
+ /* get config memsize register */
+ u32 (*get_config_memsize)(struct amdgpu_device *adev);
};
/*
@@ -1342,9 +1467,11 @@ struct amdgpu_device {
bool have_disp_power_ref;
/* BIOS */
+ bool is_atom_fw;
uint8_t *bios;
uint32_t bios_size;
struct amdgpu_bo *stollen_vga_memory;
+ uint32_t bios_scratch_reg_offset;
uint32_t bios_scratch[AMDGPU_BIOS_NUM_SCRATCH];
/* Register/doorbell mmio */
@@ -1391,6 +1518,7 @@ struct amdgpu_device {
struct amdgpu_gart gart;
struct amdgpu_dummy_page dummy_page;
struct amdgpu_vm_manager vm_manager;
+ struct amdgpu_vmhub vmhub[AMDGPU_MAX_VMHUBS];
/* memory management */
struct amdgpu_mman mman;
@@ -1457,6 +1585,9 @@ struct amdgpu_device {
/* firmwares */
struct amdgpu_firmware firmware;
+ /* PSP */
+ struct psp_context psp;
+
/* GDS */
struct amdgpu_gds gds;
@@ -1501,23 +1632,32 @@ void amdgpu_device_fini(struct amdgpu_device *adev);
int amdgpu_gpu_wait_for_idle(struct amdgpu_device *adev);
uint32_t amdgpu_mm_rreg(struct amdgpu_device *adev, uint32_t reg,
- bool always_indirect);
+ uint32_t acc_flags);
void amdgpu_mm_wreg(struct amdgpu_device *adev, uint32_t reg, uint32_t v,
- bool always_indirect);
+ uint32_t acc_flags);
u32 amdgpu_io_rreg(struct amdgpu_device *adev, u32 reg);
void amdgpu_io_wreg(struct amdgpu_device *adev, u32 reg, u32 v);
u32 amdgpu_mm_rdoorbell(struct amdgpu_device *adev, u32 index);
void amdgpu_mm_wdoorbell(struct amdgpu_device *adev, u32 index, u32 v);
+u64 amdgpu_mm_rdoorbell64(struct amdgpu_device *adev, u32 index);
+void amdgpu_mm_wdoorbell64(struct amdgpu_device *adev, u32 index, u64 v);
/*
* Registers read & write functions.
*/
-#define RREG32(reg) amdgpu_mm_rreg(adev, (reg), false)
-#define RREG32_IDX(reg) amdgpu_mm_rreg(adev, (reg), true)
-#define DREG32(reg) printk(KERN_INFO "REGISTER: " #reg " : 0x%08X\n", amdgpu_mm_rreg(adev, (reg), false))
-#define WREG32(reg, v) amdgpu_mm_wreg(adev, (reg), (v), false)
-#define WREG32_IDX(reg, v) amdgpu_mm_wreg(adev, (reg), (v), true)
+
+#define AMDGPU_REGS_IDX (1<<0)
+#define AMDGPU_REGS_NO_KIQ (1<<1)
+
+#define RREG32_NO_KIQ(reg) amdgpu_mm_rreg(adev, (reg), AMDGPU_REGS_NO_KIQ)
+#define WREG32_NO_KIQ(reg, v) amdgpu_mm_wreg(adev, (reg), (v), AMDGPU_REGS_NO_KIQ)
+
+#define RREG32(reg) amdgpu_mm_rreg(adev, (reg), 0)
+#define RREG32_IDX(reg) amdgpu_mm_rreg(adev, (reg), AMDGPU_REGS_IDX)
+#define DREG32(reg) printk(KERN_INFO "REGISTER: " #reg " : 0x%08X\n", amdgpu_mm_rreg(adev, (reg), 0))
+#define WREG32(reg, v) amdgpu_mm_wreg(adev, (reg), (v), 0)
+#define WREG32_IDX(reg, v) amdgpu_mm_wreg(adev, (reg), (v), AMDGPU_REGS_IDX)
#define REG_SET(FIELD, v) (((v) << FIELD##_SHIFT) & FIELD##_MASK)
#define REG_GET(FIELD, v) (((v) << FIELD##_SHIFT) & FIELD##_MASK)
#define RREG32_PCIE(reg) adev->pcie_rreg(adev, (reg))
@@ -1556,6 +1696,8 @@ void amdgpu_mm_wdoorbell(struct amdgpu_device *adev, u32 index, u32 v);
#define RDOORBELL32(index) amdgpu_mm_rdoorbell(adev, (index))
#define WDOORBELL32(index, v) amdgpu_mm_wdoorbell(adev, (index), (v))
+#define RDOORBELL64(index) amdgpu_mm_rdoorbell64(adev, (index))
+#define WDOORBELL64(index, v) amdgpu_mm_wdoorbell64(adev, (index), (v))
#define REG_FIELD_SHIFT(reg, field) reg##__##field##__SHIFT
#define REG_FIELD_MASK(reg, field) reg##__##field##_MASK
@@ -1584,7 +1726,7 @@ static inline void amdgpu_ring_write(struct amdgpu_ring *ring, uint32_t v)
{
if (ring->count_dw <= 0)
DRM_ERROR("amdgpu: writing more dwords to the ring than expected!\n");
- ring->ring[ring->wptr++] = v;
+ ring->ring[ring->wptr++ & ring->buf_mask] = v;
ring->wptr &= ring->ptr_mask;
ring->count_dw--;
}
@@ -1597,9 +1739,9 @@ static inline void amdgpu_ring_write_multiple(struct amdgpu_ring *ring, void *sr
if (ring->count_dw < count_dw) {
DRM_ERROR("amdgpu: writing more dwords to the ring than expected!\n");
} else {
- occupied = ring->wptr & ring->ptr_mask;
+ occupied = ring->wptr & ring->buf_mask;
dst = (void *)&ring->ring[occupied];
- chunk1 = ring->ptr_mask + 1 - occupied;
+ chunk1 = ring->buf_mask + 1 - occupied;
chunk1 = (chunk1 >= count_dw) ? count_dw: chunk1;
chunk2 = count_dw - chunk1;
chunk1 <<= 2;
@@ -1650,11 +1792,13 @@ amdgpu_get_sdma_instance(struct amdgpu_ring *ring)
#define amdgpu_asic_read_disabled_bios(adev) (adev)->asic_funcs->read_disabled_bios((adev))
#define amdgpu_asic_read_bios_from_rom(adev, b, l) (adev)->asic_funcs->read_bios_from_rom((adev), (b), (l))
#define amdgpu_asic_read_register(adev, se, sh, offset, v)((adev)->asic_funcs->read_register((adev), (se), (sh), (offset), (v)))
+#define amdgpu_asic_get_config_memsize(adev) (adev)->asic_funcs->get_config_memsize((adev))
#define amdgpu_gart_flush_gpu_tlb(adev, vmid) (adev)->gart.gart_funcs->flush_gpu_tlb((adev), (vmid))
#define amdgpu_gart_set_pte_pde(adev, pt, idx, addr, flags) (adev)->gart.gart_funcs->set_pte_pde((adev), (pt), (idx), (addr), (flags))
#define amdgpu_vm_copy_pte(adev, ib, pe, src, count) ((adev)->vm_manager.vm_pte_funcs->copy_pte((ib), (pe), (src), (count)))
#define amdgpu_vm_write_pte(adev, ib, pe, value, count, incr) ((adev)->vm_manager.vm_pte_funcs->write_pte((ib), (pe), (value), (count), (incr)))
#define amdgpu_vm_set_pte_pde(adev, ib, pe, addr, count, incr, flags) ((adev)->vm_manager.vm_pte_funcs->set_pte_pde((ib), (pe), (addr), (count), (incr), (flags)))
+#define amdgpu_vm_get_pte_flags(adev, flags) (adev)->gart.gart_funcs->get_vm_pte_flags((adev),(flags))
#define amdgpu_ring_parse_cs(r, p, ib) ((r)->funcs->parse_cs((p), (ib)))
#define amdgpu_ring_test_ring(r) (r)->funcs->test_ring((r))
#define amdgpu_ring_test_ib(r, t) (r)->funcs->test_ib((r), (t))
@@ -1698,6 +1842,7 @@ amdgpu_get_sdma_instance(struct amdgpu_ring *ring)
#define amdgpu_gfx_get_gpu_clock_counter(adev) (adev)->gfx.funcs->get_gpu_clock_counter((adev))
#define amdgpu_gfx_select_se_sh(adev, se, sh, instance) (adev)->gfx.funcs->select_se_sh((adev), (se), (sh), (instance))
#define amdgpu_gds_switch(adev, r, v, d, w, a) (adev)->gds.funcs->patch_gds_switch((r), (v), (d), (w), (a))
+#define amdgpu_psp_check_fw_loading_status(adev, i) (adev)->firmware.funcs->check_fw_loading_status((adev), (i))
/* Common functions */
int amdgpu_gpu_reset(struct amdgpu_device *adev);
@@ -1723,7 +1868,7 @@ bool amdgpu_ttm_tt_affect_userptr(struct ttm_tt *ttm, unsigned long start,
bool amdgpu_ttm_tt_userptr_invalidated(struct ttm_tt *ttm,
int *last_invalidated);
bool amdgpu_ttm_tt_is_readonly(struct ttm_tt *ttm);
-uint32_t amdgpu_ttm_tt_pte_flags(struct amdgpu_device *adev, struct ttm_tt *ttm,
+uint64_t amdgpu_ttm_tt_pte_flags(struct amdgpu_device *adev, struct ttm_tt *ttm,
struct ttm_mem_reg *mem);
void amdgpu_vram_location(struct amdgpu_device *adev, struct amdgpu_mc *mc, u64 base);
void amdgpu_gtt_location(struct amdgpu_device *adev, struct amdgpu_mc *mc);
@@ -1762,8 +1907,6 @@ void amdgpu_driver_lastclose_kms(struct drm_device *dev);
int amdgpu_driver_open_kms(struct drm_device *dev, struct drm_file *file_priv);
void amdgpu_driver_postclose_kms(struct drm_device *dev,
struct drm_file *file_priv);
-void amdgpu_driver_preclose_kms(struct drm_device *dev,
- struct drm_file *file_priv);
int amdgpu_suspend(struct amdgpu_device *adev);
int amdgpu_device_suspend(struct drm_device *dev, bool suspend, bool fbcon);
int amdgpu_device_resume(struct drm_device *dev, bool resume, bool fbcon);
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_afmt.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_afmt.c
index 857ba0897159..3889486f71fe 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_afmt.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_afmt.c
@@ -74,9 +74,9 @@ static void amdgpu_afmt_calc_cts(uint32_t clock, int *CTS, int *N, int freq)
/* Check that we are in spec (not always possible) */
if (n < (128*freq/1500))
- printk(KERN_WARNING "Calculated ACR N value is too small. You may experience audio problems.\n");
+ pr_warn("Calculated ACR N value is too small. You may experience audio problems.\n");
if (n > (128*freq/300))
- printk(KERN_WARNING "Calculated ACR N value is too large. You may experience audio problems.\n");
+ pr_warn("Calculated ACR N value is too large. You may experience audio problems.\n");
*N = n;
*CTS = cts;
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_atombios.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_atombios.c
index 56a86dd5789e..f52b1bf3d3d9 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_atombios.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_atombios.c
@@ -1748,3 +1748,31 @@ void amdgpu_atombios_copy_swap(u8 *dst, u8 *src, u8 num_bytes, bool to_le)
memcpy(dst, src, num_bytes);
#endif
}
+
+int amdgpu_atombios_allocate_fb_scratch(struct amdgpu_device *adev)
+{
+ struct atom_context *ctx = adev->mode_info.atom_context;
+ int index = GetIndexIntoMasterTable(DATA, VRAM_UsageByFirmware);
+ uint16_t data_offset;
+ int usage_bytes = 0;
+ struct _ATOM_VRAM_USAGE_BY_FIRMWARE *firmware_usage;
+
+ if (amdgpu_atom_parse_data_header(ctx, index, NULL, NULL, NULL, &data_offset)) {
+ firmware_usage = (struct _ATOM_VRAM_USAGE_BY_FIRMWARE *)(ctx->bios + data_offset);
+
+ DRM_DEBUG("atom firmware requested %08x %dkb\n",
+ le32_to_cpu(firmware_usage->asFirmwareVramReserveInfo[0].ulStartAddrUsedByFirmware),
+ le16_to_cpu(firmware_usage->asFirmwareVramReserveInfo[0].usFirmwareUseInKb));
+
+ usage_bytes = le16_to_cpu(firmware_usage->asFirmwareVramReserveInfo[0].usFirmwareUseInKb) * 1024;
+ }
+ ctx->scratch_size_bytes = 0;
+ if (usage_bytes == 0)
+ usage_bytes = 20 * 1024;
+ /* allocate some scratch memory */
+ ctx->scratch = kzalloc(usage_bytes, GFP_KERNEL);
+ if (!ctx->scratch)
+ return -ENOMEM;
+ ctx->scratch_size_bytes = usage_bytes;
+ return 0;
+}
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_atombios.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_atombios.h
index 70e9acef5d9c..4e0f488487f3 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_atombios.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_atombios.h
@@ -215,4 +215,7 @@ int amdgpu_atombios_get_clock_dividers(struct amdgpu_device *adev,
int amdgpu_atombios_get_svi2_info(struct amdgpu_device *adev,
u8 voltage_type,
u8 *svd_gpio_id, u8 *svc_gpio_id);
+
+int amdgpu_atombios_allocate_fb_scratch(struct amdgpu_device *adev);
+
#endif
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_atomfirmware.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_atomfirmware.c
new file mode 100644
index 000000000000..4b9abd68e04f
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_atomfirmware.c
@@ -0,0 +1,112 @@
+/*
+ * Copyright 2016 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+#include <drm/drmP.h>
+#include <drm/amdgpu_drm.h>
+#include "amdgpu.h"
+#include "atomfirmware.h"
+#include "amdgpu_atomfirmware.h"
+#include "atom.h"
+
+#define get_index_into_master_table(master_table, table_name) (offsetof(struct master_table, table_name) / sizeof(uint16_t))
+
+bool amdgpu_atomfirmware_gpu_supports_virtualization(struct amdgpu_device *adev)
+{
+ int index = get_index_into_master_table(atom_master_list_of_data_tables_v2_1,
+ firmwareinfo);
+ uint16_t data_offset;
+
+ if (amdgpu_atom_parse_data_header(adev->mode_info.atom_context, index, NULL,
+ NULL, NULL, &data_offset)) {
+ struct atom_firmware_info_v3_1 *firmware_info =
+ (struct atom_firmware_info_v3_1 *)(adev->mode_info.atom_context->bios +
+ data_offset);
+
+ if (le32_to_cpu(firmware_info->firmware_capability) &
+ ATOM_FIRMWARE_CAP_GPU_VIRTUALIZATION)
+ return true;
+ }
+ return false;
+}
+
+void amdgpu_atomfirmware_scratch_regs_init(struct amdgpu_device *adev)
+{
+ int index = get_index_into_master_table(atom_master_list_of_data_tables_v2_1,
+ firmwareinfo);
+ uint16_t data_offset;
+
+ if (amdgpu_atom_parse_data_header(adev->mode_info.atom_context, index, NULL,
+ NULL, NULL, &data_offset)) {
+ struct atom_firmware_info_v3_1 *firmware_info =
+ (struct atom_firmware_info_v3_1 *)(adev->mode_info.atom_context->bios +
+ data_offset);
+
+ adev->bios_scratch_reg_offset =
+ le32_to_cpu(firmware_info->bios_scratch_reg_startaddr);
+ }
+}
+
+void amdgpu_atomfirmware_scratch_regs_save(struct amdgpu_device *adev)
+{
+ int i;
+
+ for (i = 0; i < AMDGPU_BIOS_NUM_SCRATCH; i++)
+ adev->bios_scratch[i] = RREG32(adev->bios_scratch_reg_offset + i);
+}
+
+void amdgpu_atomfirmware_scratch_regs_restore(struct amdgpu_device *adev)
+{
+ int i;
+
+ for (i = 0; i < AMDGPU_BIOS_NUM_SCRATCH; i++)
+ WREG32(adev->bios_scratch_reg_offset + i, adev->bios_scratch[i]);
+}
+
+int amdgpu_atomfirmware_allocate_fb_scratch(struct amdgpu_device *adev)
+{
+ struct atom_context *ctx = adev->mode_info.atom_context;
+ int index = get_index_into_master_table(atom_master_list_of_data_tables_v2_1,
+ vram_usagebyfirmware);
+ uint16_t data_offset;
+ int usage_bytes = 0;
+
+ if (amdgpu_atom_parse_data_header(ctx, index, NULL, NULL, NULL, &data_offset)) {
+ struct vram_usagebyfirmware_v2_1 *firmware_usage =
+ (struct vram_usagebyfirmware_v2_1 *)(ctx->bios + data_offset);
+
+ DRM_DEBUG("atom firmware requested %08x %dkb fw %dkb drv\n",
+ le32_to_cpu(firmware_usage->start_address_in_kb),
+ le16_to_cpu(firmware_usage->used_by_firmware_in_kb),
+ le16_to_cpu(firmware_usage->used_by_driver_in_kb));
+
+ usage_bytes = le16_to_cpu(firmware_usage->used_by_driver_in_kb) * 1024;
+ }
+ ctx->scratch_size_bytes = 0;
+ if (usage_bytes == 0)
+ usage_bytes = 20 * 1024;
+ /* allocate some scratch memory */
+ ctx->scratch = kzalloc(usage_bytes, GFP_KERNEL);
+ if (!ctx->scratch)
+ return -ENOMEM;
+ ctx->scratch_size_bytes = usage_bytes;
+ return 0;
+}
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_atomfirmware.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_atomfirmware.h
new file mode 100644
index 000000000000..d0c4dcd7fa96
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_atomfirmware.h
@@ -0,0 +1,33 @@
+/*
+ * Copyright 2014 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+
+#ifndef __AMDGPU_ATOMFIRMWARE_H__
+#define __AMDGPU_ATOMFIRMWARE_H__
+
+bool amdgpu_atomfirmware_gpu_supports_virtualization(struct amdgpu_device *adev);
+void amdgpu_atomfirmware_scratch_regs_init(struct amdgpu_device *adev);
+void amdgpu_atomfirmware_scratch_regs_save(struct amdgpu_device *adev);
+void amdgpu_atomfirmware_scratch_regs_restore(struct amdgpu_device *adev);
+int amdgpu_atomfirmware_allocate_fb_scratch(struct amdgpu_device *adev);
+
+#endif
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_atpx_handler.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_atpx_handler.c
index 6c343a933182..c13c51af0b68 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_atpx_handler.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_atpx_handler.c
@@ -583,8 +583,8 @@ static bool amdgpu_atpx_detect(void)
if (has_atpx && vga_count == 2) {
acpi_get_name(amdgpu_atpx_priv.atpx.handle, ACPI_FULL_PATHNAME, &buffer);
- printk(KERN_INFO "vga_switcheroo: detected switching method %s handle\n",
- acpi_method_name);
+ pr_info("vga_switcheroo: detected switching method %s handle\n",
+ acpi_method_name);
amdgpu_atpx_priv.atpx_detected = true;
amdgpu_atpx_priv.bridge_pm_usable = d3_supported;
amdgpu_atpx_init();
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_bios.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_bios.c
index 821f7cc2051f..365e735f6647 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_bios.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_bios.c
@@ -86,6 +86,18 @@ static bool check_atom_bios(uint8_t *bios, size_t size)
return false;
}
+static bool is_atom_fw(uint8_t *bios)
+{
+ uint16_t bios_header_start = bios[0x48] | (bios[0x49] << 8);
+ uint8_t frev = bios[bios_header_start + 2];
+ uint8_t crev = bios[bios_header_start + 3];
+
+ if ((frev < 3) ||
+ ((frev == 3) && (crev < 3)))
+ return false;
+
+ return true;
+}
/* If you boot an IGP board with a discrete card as the primary,
* the IGP rom is not accessible via the rom bar as the IGP rom is
@@ -419,26 +431,30 @@ static inline bool amdgpu_acpi_vfct_bios(struct amdgpu_device *adev)
bool amdgpu_get_bios(struct amdgpu_device *adev)
{
if (amdgpu_atrm_get_bios(adev))
- return true;
+ goto success;
if (amdgpu_acpi_vfct_bios(adev))
- return true;
+ goto success;
if (igp_read_bios_from_vram(adev))
- return true;
+ goto success;
if (amdgpu_read_bios(adev))
- return true;
+ goto success;
if (amdgpu_read_bios_from_rom(adev))
- return true;
+ goto success;
if (amdgpu_read_disabled_bios(adev))
- return true;
+ goto success;
if (amdgpu_read_platform_bios(adev))
- return true;
+ goto success;
DRM_ERROR("Unable to locate a BIOS ROM\n");
return false;
+
+success:
+ adev->is_atom_fw = is_atom_fw(adev->bios);
+ return true;
}
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_cgs.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_cgs.c
index d9e5aa4a79ef..1c7e6c28f93a 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_cgs.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_cgs.c
@@ -571,7 +571,9 @@ static const struct amdgpu_irq_src_funcs cgs_irq_funcs = {
.process = cgs_process_irq,
};
-static int amdgpu_cgs_add_irq_source(struct cgs_device *cgs_device, unsigned src_id,
+static int amdgpu_cgs_add_irq_source(void *cgs_device,
+ unsigned client_id,
+ unsigned src_id,
unsigned num_types,
cgs_irq_source_set_func_t set,
cgs_irq_handler_func_t handler,
@@ -597,7 +599,7 @@ static int amdgpu_cgs_add_irq_source(struct cgs_device *cgs_device, unsigned src
irq_params->handler = handler;
irq_params->private_data = private_data;
source->data = (void *)irq_params;
- ret = amdgpu_irq_add_id(adev, src_id, source);
+ ret = amdgpu_irq_add_id(adev, client_id, src_id, source);
if (ret) {
kfree(irq_params);
kfree(source);
@@ -606,16 +608,26 @@ static int amdgpu_cgs_add_irq_source(struct cgs_device *cgs_device, unsigned src
return ret;
}
-static int amdgpu_cgs_irq_get(struct cgs_device *cgs_device, unsigned src_id, unsigned type)
+static int amdgpu_cgs_irq_get(void *cgs_device, unsigned client_id,
+ unsigned src_id, unsigned type)
{
CGS_FUNC_ADEV;
- return amdgpu_irq_get(adev, adev->irq.sources[src_id], type);
+
+ if (!adev->irq.client[client_id].sources)
+ return -EINVAL;
+
+ return amdgpu_irq_get(adev, adev->irq.client[client_id].sources[src_id], type);
}
-static int amdgpu_cgs_irq_put(struct cgs_device *cgs_device, unsigned src_id, unsigned type)
+static int amdgpu_cgs_irq_put(void *cgs_device, unsigned client_id,
+ unsigned src_id, unsigned type)
{
CGS_FUNC_ADEV;
- return amdgpu_irq_put(adev, adev->irq.sources[src_id], type);
+
+ if (!adev->irq.client[client_id].sources)
+ return -EINVAL;
+
+ return amdgpu_irq_put(adev, adev->irq.client[client_id].sources[src_id], type);
}
static int amdgpu_cgs_set_clockgating_state(struct cgs_device *cgs_device,
@@ -825,9 +837,8 @@ static int amdgpu_cgs_get_firmware_info(struct cgs_device *cgs_device,
uint32_t ucode_start_address;
const uint8_t *src;
const struct smc_firmware_header_v1_0 *hdr;
-
- if (CGS_UCODE_ID_SMU_SK == type)
- amdgpu_cgs_rel_firmware(cgs_device, CGS_UCODE_ID_SMU);
+ const struct common_firmware_header *header;
+ struct amdgpu_firmware_info *ucode = NULL;
if (!adev->pm.fw) {
switch (adev->asic_type) {
@@ -889,6 +900,9 @@ static int amdgpu_cgs_get_firmware_info(struct cgs_device *cgs_device,
case CHIP_POLARIS12:
strcpy(fw_name, "amdgpu/polaris12_smc.bin");
break;
+ case CHIP_VEGA10:
+ strcpy(fw_name, "amdgpu/vega10_smc.bin");
+ break;
default:
DRM_ERROR("SMC firmware not supported\n");
return -EINVAL;
@@ -907,6 +921,15 @@ static int amdgpu_cgs_get_firmware_info(struct cgs_device *cgs_device,
adev->pm.fw = NULL;
return err;
}
+
+ if (adev->firmware.load_type == AMDGPU_FW_LOAD_PSP) {
+ ucode = &adev->firmware.ucode[AMDGPU_UCODE_ID_SMC];
+ ucode->ucode_id = AMDGPU_UCODE_ID_SMC;
+ ucode->fw = adev->pm.fw;
+ header = (const struct common_firmware_header *)ucode->fw->data;
+ adev->firmware.fw_size +=
+ ALIGN(le32_to_cpu(header->ucode_size_bytes), PAGE_SIZE);
+ }
}
hdr = (const struct smc_firmware_header_v1_0 *) adev->pm.fw->data;
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
index 99424cb8020b..97f661372a1c 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
@@ -82,6 +82,15 @@ int amdgpu_cs_get_ring(struct amdgpu_device *adev, u32 ip_type,
return -EINVAL;
}
break;
+ case AMDGPU_HW_IP_UVD_ENC:
+ if (ring < adev->uvd.num_enc_rings){
+ *out_ring = &adev->uvd.ring_enc[ring];
+ } else {
+ DRM_ERROR("only %d UVD ENC rings are supported\n",
+ adev->uvd.num_enc_rings);
+ return -EINVAL;
+ }
+ break;
}
if (!(*out_ring && (*out_ring)->adev)) {
@@ -759,23 +768,33 @@ static void amdgpu_cs_parser_fini(struct amdgpu_cs_parser *parser, int error, bo
amdgpu_bo_unref(&parser->uf_entry.robj);
}
-static int amdgpu_bo_vm_update_pte(struct amdgpu_cs_parser *p,
- struct amdgpu_vm *vm)
+static int amdgpu_bo_vm_update_pte(struct amdgpu_cs_parser *p)
{
struct amdgpu_device *adev = p->adev;
+ struct amdgpu_fpriv *fpriv = p->filp->driver_priv;
+ struct amdgpu_vm *vm = &fpriv->vm;
struct amdgpu_bo_va *bo_va;
struct amdgpu_bo *bo;
int i, r;
- r = amdgpu_vm_update_page_directory(adev, vm);
+ r = amdgpu_vm_update_directories(adev, vm);
+ if (r)
+ return r;
+
+ r = amdgpu_sync_fence(adev, &p->job->sync, vm->last_dir_update);
if (r)
return r;
- r = amdgpu_sync_fence(adev, &p->job->sync, vm->page_directory_fence);
+ r = amdgpu_vm_clear_freed(adev, vm, NULL);
if (r)
return r;
- r = amdgpu_vm_clear_freed(adev, vm);
+ r = amdgpu_vm_bo_update(adev, fpriv->prt_va, false);
+ if (r)
+ return r;
+
+ r = amdgpu_sync_fence(adev, &p->job->sync,
+ fpriv->prt_va->last_pt_update);
if (r)
return r;
@@ -853,9 +872,9 @@ static int amdgpu_cs_ib_vm_chunk(struct amdgpu_device *adev,
}
if (p->job->vm) {
- p->job->vm_pd_addr = amdgpu_bo_gpu_offset(vm->page_directory);
+ p->job->vm_pd_addr = amdgpu_bo_gpu_offset(vm->root.bo);
- r = amdgpu_bo_vm_update_pte(p, vm);
+ r = amdgpu_bo_vm_update_pte(p);
if (r)
return r;
}
@@ -869,7 +888,7 @@ static int amdgpu_cs_ib_fill(struct amdgpu_device *adev,
struct amdgpu_fpriv *fpriv = parser->filp->driver_priv;
struct amdgpu_vm *vm = &fpriv->vm;
int i, j;
- int r;
+ int r, ce_preempt = 0, de_preempt = 0;
for (i = 0, j = 0; i < parser->nchunks && j < parser->job->num_ibs; i++) {
struct amdgpu_cs_chunk *chunk;
@@ -884,13 +903,26 @@ static int amdgpu_cs_ib_fill(struct amdgpu_device *adev,
if (chunk->chunk_id != AMDGPU_CHUNK_ID_IB)
continue;
+ if (chunk_ib->ip_type == AMDGPU_HW_IP_GFX && amdgpu_sriov_vf(adev)) {
+ if (chunk_ib->flags & AMDGPU_IB_FLAG_PREEMPT) {
+ if (chunk_ib->flags & AMDGPU_IB_FLAG_CE)
+ ce_preempt++;
+ else
+ de_preempt++;
+ }
+
+ /* each GFX command submit allows 0 or 1 IB preemptible for CE & DE */
+ if (ce_preempt > 1 || de_preempt > 1)
+ return -EINVAL;
+ }
+
r = amdgpu_cs_get_ring(adev, chunk_ib->ip_type,
chunk_ib->ip_instance, chunk_ib->ring,
&ring);
if (r)
return r;
- if (ib->flags & AMDGPU_IB_FLAG_PREAMBLE) {
+ if (chunk_ib->flags & AMDGPU_IB_FLAG_PREAMBLE) {
parser->job->preamble_status |= AMDGPU_PREAMBLE_IB_PRESENT;
if (!parser->ctx->preamble_presented) {
parser->job->preamble_status |= AMDGPU_PREAMBLE_IB_PRESENT_FIRST;
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
index de0cf3315484..93061a439dbc 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
@@ -40,6 +40,7 @@
#include "amdgpu_i2c.h"
#include "atom.h"
#include "amdgpu_atombios.h"
+#include "amdgpu_atomfirmware.h"
#include "amd_pcie.h"
#ifdef CONFIG_DRM_AMDGPU_SI
#include "si.h"
@@ -48,9 +49,11 @@
#include "cik.h"
#endif
#include "vi.h"
+#include "soc15.h"
#include "bif/bif_4_1_d.h"
#include <linux/pci.h>
#include <linux/firmware.h>
+#include "amdgpu_pm.h"
static int amdgpu_debugfs_regs_init(struct amdgpu_device *adev);
static void amdgpu_debugfs_regs_cleanup(struct amdgpu_device *adev);
@@ -74,6 +77,7 @@ static const char *amdgpu_asic_name[] = {
"POLARIS10",
"POLARIS11",
"POLARIS12",
+ "VEGA10",
"LAST",
};
@@ -90,16 +94,16 @@ bool amdgpu_device_is_px(struct drm_device *dev)
* MMIO register access helper functions.
*/
uint32_t amdgpu_mm_rreg(struct amdgpu_device *adev, uint32_t reg,
- bool always_indirect)
+ uint32_t acc_flags)
{
uint32_t ret;
- if (amdgpu_sriov_runtime(adev)) {
+ if (!(acc_flags & AMDGPU_REGS_NO_KIQ) && amdgpu_sriov_runtime(adev)) {
BUG_ON(in_interrupt());
return amdgpu_virt_kiq_rreg(adev, reg);
}
- if ((reg * 4) < adev->rmmio_size && !always_indirect)
+ if ((reg * 4) < adev->rmmio_size && !(acc_flags & AMDGPU_REGS_IDX))
ret = readl(((void __iomem *)adev->rmmio) + (reg * 4));
else {
unsigned long flags;
@@ -114,16 +118,16 @@ uint32_t amdgpu_mm_rreg(struct amdgpu_device *adev, uint32_t reg,
}
void amdgpu_mm_wreg(struct amdgpu_device *adev, uint32_t reg, uint32_t v,
- bool always_indirect)
+ uint32_t acc_flags)
{
trace_amdgpu_mm_wreg(adev->pdev->device, reg, v);
- if (amdgpu_sriov_runtime(adev)) {
+ if (!(acc_flags & AMDGPU_REGS_NO_KIQ) && amdgpu_sriov_runtime(adev)) {
BUG_ON(in_interrupt());
return amdgpu_virt_kiq_wreg(adev, reg, v);
}
- if ((reg * 4) < adev->rmmio_size && !always_indirect)
+ if ((reg * 4) < adev->rmmio_size && !(acc_flags & AMDGPU_REGS_IDX))
writel(v, ((void __iomem *)adev->rmmio) + (reg * 4));
else {
unsigned long flags;
@@ -195,6 +199,44 @@ void amdgpu_mm_wdoorbell(struct amdgpu_device *adev, u32 index, u32 v)
}
/**
+ * amdgpu_mm_rdoorbell64 - read a doorbell Qword
+ *
+ * @adev: amdgpu_device pointer
+ * @index: doorbell index
+ *
+ * Returns the value in the doorbell aperture at the
+ * requested doorbell index (VEGA10+).
+ */
+u64 amdgpu_mm_rdoorbell64(struct amdgpu_device *adev, u32 index)
+{
+ if (index < adev->doorbell.num_doorbells) {
+ return atomic64_read((atomic64_t *)(adev->doorbell.ptr + index));
+ } else {
+ DRM_ERROR("reading beyond doorbell aperture: 0x%08x!\n", index);
+ return 0;
+ }
+}
+
+/**
+ * amdgpu_mm_wdoorbell64 - write a doorbell Qword
+ *
+ * @adev: amdgpu_device pointer
+ * @index: doorbell index
+ * @v: value to write
+ *
+ * Writes @v to the doorbell aperture at the
+ * requested doorbell index (VEGA10+).
+ */
+void amdgpu_mm_wdoorbell64(struct amdgpu_device *adev, u32 index, u64 v)
+{
+ if (index < adev->doorbell.num_doorbells) {
+ atomic64_set((atomic64_t *)(adev->doorbell.ptr + index), v);
+ } else {
+ DRM_ERROR("writing beyond doorbell aperture: 0x%08x!\n", index);
+ }
+}
+
+/**
* amdgpu_invalid_rreg - dummy reg read function
*
* @adev: amdgpu device pointer
@@ -516,6 +558,29 @@ int amdgpu_wb_get(struct amdgpu_device *adev, u32 *wb)
}
/**
+ * amdgpu_wb_get_64bit - Allocate a wb entry
+ *
+ * @adev: amdgpu_device pointer
+ * @wb: wb index
+ *
+ * Allocate a wb slot for use by the driver (all asics).
+ * Returns 0 on success or -EINVAL on failure.
+ */
+int amdgpu_wb_get_64bit(struct amdgpu_device *adev, u32 *wb)
+{
+ unsigned long offset = bitmap_find_next_zero_area_off(adev->wb.used,
+ adev->wb.num_wb, 0, 2, 7, 0);
+ if ((offset + 1) < adev->wb.num_wb) {
+ __set_bit(offset, adev->wb.used);
+ __set_bit(offset + 1, adev->wb.used);
+ *wb = offset;
+ return 0;
+ } else {
+ return -EINVAL;
+ }
+}
+
+/**
* amdgpu_wb_free - Free a wb entry
*
* @adev: amdgpu_device pointer
@@ -530,6 +595,22 @@ void amdgpu_wb_free(struct amdgpu_device *adev, u32 wb)
}
/**
+ * amdgpu_wb_free_64bit - Free a wb entry
+ *
+ * @adev: amdgpu_device pointer
+ * @wb: wb index
+ *
+ * Free a wb slot allocated for use by the driver (all asics)
+ */
+void amdgpu_wb_free_64bit(struct amdgpu_device *adev, u32 wb)
+{
+ if ((wb + 1) < adev->wb.num_wb) {
+ __clear_bit(wb, adev->wb.used);
+ __clear_bit(wb + 1, adev->wb.used);
+ }
+}
+
+/**
* amdgpu_vram_location - try to find VRAM location
* @adev: amdgpu device structure holding all necessary informations
* @mc: memory controller structure holding memory informations
@@ -602,7 +683,7 @@ void amdgpu_gtt_location(struct amdgpu_device *adev, struct amdgpu_mc *mc)
dev_warn(adev->dev, "limiting GTT\n");
mc->gtt_size = size_bf;
}
- mc->gtt_start = (mc->vram_start & ~mc->gtt_base_align) - mc->gtt_size;
+ mc->gtt_start = 0;
} else {
if (mc->gtt_size > size_af) {
dev_warn(adev->dev, "limiting GTT\n");
@@ -636,9 +717,9 @@ bool amdgpu_need_post(struct amdgpu_device *adev)
return true;
}
/* then check MEM_SIZE, in case the crtcs are off */
- reg = RREG32(mmCONFIG_MEMSIZE);
+ reg = amdgpu_asic_get_config_memsize(adev);
- if (reg)
+ if ((reg != 0) && (reg != 0xffffffff))
return false;
return true;
@@ -915,8 +996,13 @@ static int amdgpu_atombios_init(struct amdgpu_device *adev)
}
mutex_init(&adev->mode_info.atom_context->mutex);
- amdgpu_atombios_scratch_regs_init(adev);
- amdgpu_atom_allocate_fb_scratch(adev->mode_info.atom_context);
+ if (adev->is_atom_fw) {
+ amdgpu_atomfirmware_scratch_regs_init(adev);
+ amdgpu_atomfirmware_allocate_fb_scratch(adev);
+ } else {
+ amdgpu_atombios_scratch_regs_init(adev);
+ amdgpu_atombios_allocate_fb_scratch(adev);
+ }
return 0;
}
@@ -954,6 +1040,45 @@ static bool amdgpu_check_pot_argument(int arg)
return (arg & (arg - 1)) == 0;
}
+static void amdgpu_get_block_size(struct amdgpu_device *adev)
+{
+ /* from AI, asic starts to support multiple level VMPT */
+ if (adev->asic_type >= CHIP_VEGA10) {
+ if (amdgpu_vm_block_size != 9)
+ dev_warn(adev->dev,
+ "Multi-VMPT limits block size to one page!\n");
+ amdgpu_vm_block_size = 9;
+ return;
+ }
+ /* defines number of bits in page table versus page directory,
+ * a page is 4KB so we have 12 bits offset, minimum 9 bits in the
+ * page table and the remaining bits are in the page directory */
+ if (amdgpu_vm_block_size == -1) {
+
+ /* Total bits covered by PD + PTs */
+ unsigned bits = ilog2(amdgpu_vm_size) + 18;
+
+ /* Make sure the PD is 4K in size up to 8GB address space.
+ Above that split equal between PD and PTs */
+ if (amdgpu_vm_size <= 8)
+ amdgpu_vm_block_size = bits - 9;
+ else
+ amdgpu_vm_block_size = (bits + 3) / 2;
+
+ } else if (amdgpu_vm_block_size < 9) {
+ dev_warn(adev->dev, "VM page table size (%d) too small\n",
+ amdgpu_vm_block_size);
+ amdgpu_vm_block_size = 9;
+ }
+
+ if (amdgpu_vm_block_size > 24 ||
+ (amdgpu_vm_size * 1024) < (1ull << amdgpu_vm_block_size)) {
+ dev_warn(adev->dev, "VM page table size (%d) too large\n",
+ amdgpu_vm_block_size);
+ amdgpu_vm_block_size = 9;
+ }
+}
+
/**
* amdgpu_check_arguments - validate module params
*
@@ -1004,33 +1129,7 @@ static void amdgpu_check_arguments(struct amdgpu_device *adev)
amdgpu_vm_size = 8;
}
- /* defines number of bits in page table versus page directory,
- * a page is 4KB so we have 12 bits offset, minimum 9 bits in the
- * page table and the remaining bits are in the page directory */
- if (amdgpu_vm_block_size == -1) {
-
- /* Total bits covered by PD + PTs */
- unsigned bits = ilog2(amdgpu_vm_size) + 18;
-
- /* Make sure the PD is 4K in size up to 8GB address space.
- Above that split equal between PD and PTs */
- if (amdgpu_vm_size <= 8)
- amdgpu_vm_block_size = bits - 9;
- else
- amdgpu_vm_block_size = (bits + 3) / 2;
-
- } else if (amdgpu_vm_block_size < 9) {
- dev_warn(adev->dev, "VM page table size (%d) too small\n",
- amdgpu_vm_block_size);
- amdgpu_vm_block_size = 9;
- }
-
- if (amdgpu_vm_block_size > 24 ||
- (amdgpu_vm_size * 1024) < (1ull << amdgpu_vm_block_size)) {
- dev_warn(adev->dev, "VM page table size (%d) too large\n",
- amdgpu_vm_block_size);
- amdgpu_vm_block_size = 9;
- }
+ amdgpu_get_block_size(adev);
if (amdgpu_vram_page_split != -1 && (amdgpu_vram_page_split < 16 ||
!amdgpu_check_pot_argument(amdgpu_vram_page_split))) {
@@ -1059,7 +1158,7 @@ static void amdgpu_switcheroo_set_state(struct pci_dev *pdev, enum vga_switchero
if (state == VGA_SWITCHEROO_ON) {
unsigned d3_delay = dev->pdev->d3_delay;
- printk(KERN_INFO "amdgpu: switched on\n");
+ pr_info("amdgpu: switched on\n");
/* don't suspend or resume card normally */
dev->switch_power_state = DRM_SWITCH_POWER_CHANGING;
@@ -1070,7 +1169,7 @@ static void amdgpu_switcheroo_set_state(struct pci_dev *pdev, enum vga_switchero
dev->switch_power_state = DRM_SWITCH_POWER_ON;
drm_kms_helper_poll_enable(dev);
} else {
- printk(KERN_INFO "amdgpu: switched off\n");
+ pr_info("amdgpu: switched off\n");
drm_kms_helper_poll_disable(dev);
dev->switch_power_state = DRM_SWITCH_POWER_CHANGING;
amdgpu_device_suspend(dev, true, true);
@@ -1114,13 +1213,15 @@ int amdgpu_set_clockgating_state(struct amdgpu_device *adev,
for (i = 0; i < adev->num_ip_blocks; i++) {
if (!adev->ip_blocks[i].status.valid)
continue;
- if (adev->ip_blocks[i].version->type == block_type) {
- r = adev->ip_blocks[i].version->funcs->set_clockgating_state((void *)adev,
- state);
- if (r)
- return r;
- break;
- }
+ if (adev->ip_blocks[i].version->type != block_type)
+ continue;
+ if (!adev->ip_blocks[i].version->funcs->set_clockgating_state)
+ continue;
+ r = adev->ip_blocks[i].version->funcs->set_clockgating_state(
+ (void *)adev, state);
+ if (r)
+ DRM_ERROR("set_clockgating_state of IP block <%s> failed %d\n",
+ adev->ip_blocks[i].version->funcs->name, r);
}
return r;
}
@@ -1134,13 +1235,15 @@ int amdgpu_set_powergating_state(struct amdgpu_device *adev,
for (i = 0; i < adev->num_ip_blocks; i++) {
if (!adev->ip_blocks[i].status.valid)
continue;
- if (adev->ip_blocks[i].version->type == block_type) {
- r = adev->ip_blocks[i].version->funcs->set_powergating_state((void *)adev,
- state);
- if (r)
- return r;
- break;
- }
+ if (adev->ip_blocks[i].version->type != block_type)
+ continue;
+ if (!adev->ip_blocks[i].version->funcs->set_powergating_state)
+ continue;
+ r = adev->ip_blocks[i].version->funcs->set_powergating_state(
+ (void *)adev, state);
+ if (r)
+ DRM_ERROR("set_powergating_state of IP block <%s> failed %d\n",
+ adev->ip_blocks[i].version->funcs->name, r);
}
return r;
}
@@ -1345,6 +1448,13 @@ static int amdgpu_early_init(struct amdgpu_device *adev)
return r;
break;
#endif
+ case CHIP_VEGA10:
+ adev->family = AMDGPU_FAMILY_AI;
+
+ r = soc15_set_ip_blocks(adev);
+ if (r)
+ return r;
+ break;
default:
/* FIXME: not supported yet */
return -EINVAL;
@@ -1476,6 +1586,9 @@ static int amdgpu_late_init(struct amdgpu_device *adev)
}
}
+ amdgpu_dpm_enable_uvd(adev, false);
+ amdgpu_dpm_enable_vce(adev, false);
+
return 0;
}
@@ -1607,6 +1720,53 @@ int amdgpu_suspend(struct amdgpu_device *adev)
return 0;
}
+static int amdgpu_sriov_reinit_early(struct amdgpu_device *adev)
+{
+ int i, r;
+
+ for (i = 0; i < adev->num_ip_blocks; i++) {
+ if (!adev->ip_blocks[i].status.valid)
+ continue;
+
+ if (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_COMMON ||
+ adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_GMC ||
+ adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_IH)
+ r = adev->ip_blocks[i].version->funcs->hw_init(adev);
+
+ if (r) {
+ DRM_ERROR("resume of IP block <%s> failed %d\n",
+ adev->ip_blocks[i].version->funcs->name, r);
+ return r;
+ }
+ }
+
+ return 0;
+}
+
+static int amdgpu_sriov_reinit_late(struct amdgpu_device *adev)
+{
+ int i, r;
+
+ for (i = 0; i < adev->num_ip_blocks; i++) {
+ if (!adev->ip_blocks[i].status.valid)
+ continue;
+
+ if (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_COMMON ||
+ adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_GMC ||
+ adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_IH )
+ continue;
+
+ r = adev->ip_blocks[i].version->funcs->hw_init(adev);
+ if (r) {
+ DRM_ERROR("resume of IP block <%s> failed %d\n",
+ adev->ip_blocks[i].version->funcs->name, r);
+ return r;
+ }
+ }
+
+ return 0;
+}
+
static int amdgpu_resume(struct amdgpu_device *adev)
{
int i, r;
@@ -1627,8 +1787,13 @@ static int amdgpu_resume(struct amdgpu_device *adev)
static void amdgpu_device_detect_sriov_bios(struct amdgpu_device *adev)
{
- if (amdgpu_atombios_has_gpu_virtualization_table(adev))
- adev->virt.caps |= AMDGPU_SRIOV_CAPS_SRIOV_VBIOS;
+ if (adev->is_atom_fw) {
+ if (amdgpu_atomfirmware_gpu_supports_virtualization(adev))
+ adev->virt.caps |= AMDGPU_SRIOV_CAPS_SRIOV_VBIOS;
+ } else {
+ if (amdgpu_atombios_has_gpu_virtualization_table(adev))
+ adev->virt.caps |= AMDGPU_SRIOV_CAPS_SRIOV_VBIOS;
+ }
}
/**
@@ -1693,6 +1858,7 @@ int amdgpu_device_init(struct amdgpu_device *adev,
* can recall function without having locking issues */
mutex_init(&adev->vm_manager.lock);
atomic_set(&adev->irq.ih.lock, 0);
+ mutex_init(&adev->firmware.mutex);
mutex_init(&adev->pm.mutex);
mutex_init(&adev->gfx.gpu_clock_mutex);
mutex_init(&adev->srbm_mutex);
@@ -1799,14 +1965,16 @@ int amdgpu_device_init(struct amdgpu_device *adev,
DRM_INFO("GPU post is not needed\n");
}
- /* Initialize clocks */
- r = amdgpu_atombios_get_clock_info(adev);
- if (r) {
- dev_err(adev->dev, "amdgpu_atombios_get_clock_info failed\n");
- goto failed;
+ if (!adev->is_atom_fw) {
+ /* Initialize clocks */
+ r = amdgpu_atombios_get_clock_info(adev);
+ if (r) {
+ dev_err(adev->dev, "amdgpu_atombios_get_clock_info failed\n");
+ return r;
+ }
+ /* init i2c buses */
+ amdgpu_atombios_i2c_init(adev);
}
- /* init i2c buses */
- amdgpu_atombios_i2c_init(adev);
/* Fence driver */
r = amdgpu_fence_driver_init(adev);
@@ -1835,8 +2003,6 @@ int amdgpu_device_init(struct amdgpu_device *adev,
/* Get a log2 for easy divisions. */
adev->mm_stats.log2_max_MBps = ilog2(max(1u, max_MBps));
- amdgpu_fbdev_init(adev);
-
r = amdgpu_ib_pool_init(adev);
if (r) {
dev_err(adev->dev, "IB initialization failed (%d).\n", r);
@@ -1847,21 +2013,19 @@ int amdgpu_device_init(struct amdgpu_device *adev,
if (r)
DRM_ERROR("ib ring test failed (%d).\n", r);
+ amdgpu_fbdev_init(adev);
+
r = amdgpu_gem_debugfs_init(adev);
- if (r) {
+ if (r)
DRM_ERROR("registering gem debugfs failed (%d).\n", r);
- }
r = amdgpu_debugfs_regs_init(adev);
- if (r) {
+ if (r)
DRM_ERROR("registering register debugfs failed (%d).\n", r);
- }
r = amdgpu_debugfs_firmware_init(adev);
- if (r) {
+ if (r)
DRM_ERROR("registering firmware debugfs failed (%d).\n", r);
- return r;
- }
if ((amdgpu_testing & 1)) {
if (adev->accel_working)
@@ -1869,12 +2033,6 @@ int amdgpu_device_init(struct amdgpu_device *adev,
else
DRM_INFO("amdgpu: acceleration disabled, skipping move tests\n");
}
- if ((amdgpu_testing & 2)) {
- if (adev->accel_working)
- amdgpu_test_syncing(adev);
- else
- DRM_INFO("amdgpu: acceleration disabled, skipping sync tests\n");
- }
if (amdgpu_benchmarking) {
if (adev->accel_working)
amdgpu_benchmark(adev, amdgpu_benchmarking);
@@ -2020,7 +2178,10 @@ int amdgpu_device_suspend(struct drm_device *dev, bool suspend, bool fbcon)
*/
amdgpu_bo_evict_vram(adev);
- amdgpu_atombios_scratch_regs_save(adev);
+ if (adev->is_atom_fw)
+ amdgpu_atomfirmware_scratch_regs_save(adev);
+ else
+ amdgpu_atombios_scratch_regs_save(adev);
pci_save_state(dev->pdev);
if (suspend) {
/* Shut down the device */
@@ -2072,7 +2233,10 @@ int amdgpu_device_resume(struct drm_device *dev, bool resume, bool fbcon)
return r;
}
}
- amdgpu_atombios_scratch_regs_restore(adev);
+ if (adev->is_atom_fw)
+ amdgpu_atomfirmware_scratch_regs_restore(adev);
+ else
+ amdgpu_atombios_scratch_regs_restore(adev);
/* post card */
if (amdgpu_need_post(adev)) {
@@ -2286,6 +2450,117 @@ err:
}
/**
+ * amdgpu_sriov_gpu_reset - reset the asic
+ *
+ * @adev: amdgpu device pointer
+ * @voluntary: if this reset is requested by guest.
+ * (true means by guest and false means by HYPERVISOR )
+ *
+ * Attempt the reset the GPU if it has hung (all asics).
+ * for SRIOV case.
+ * Returns 0 for success or an error on failure.
+ */
+int amdgpu_sriov_gpu_reset(struct amdgpu_device *adev, bool voluntary)
+{
+ int i, r = 0;
+ int resched;
+ struct amdgpu_bo *bo, *tmp;
+ struct amdgpu_ring *ring;
+ struct dma_fence *fence = NULL, *next = NULL;
+
+ mutex_lock(&adev->virt.lock_reset);
+ atomic_inc(&adev->gpu_reset_counter);
+ adev->gfx.in_reset = true;
+
+ /* block TTM */
+ resched = ttm_bo_lock_delayed_workqueue(&adev->mman.bdev);
+
+ /* block scheduler */
+ for (i = 0; i < AMDGPU_MAX_RINGS; ++i) {
+ ring = adev->rings[i];
+
+ if (!ring || !ring->sched.thread)
+ continue;
+
+ kthread_park(ring->sched.thread);
+ amd_sched_hw_job_reset(&ring->sched);
+ }
+
+ /* after all hw jobs are reset, hw fence is meaningless, so force_completion */
+ amdgpu_fence_driver_force_completion(adev);
+
+ /* request to take full control of GPU before re-initialization */
+ if (voluntary)
+ amdgpu_virt_reset_gpu(adev);
+ else
+ amdgpu_virt_request_full_gpu(adev, true);
+
+
+ /* Resume IP prior to SMC */
+ amdgpu_sriov_reinit_early(adev);
+
+ /* we need recover gart prior to run SMC/CP/SDMA resume */
+ amdgpu_ttm_recover_gart(adev);
+
+ /* now we are okay to resume SMC/CP/SDMA */
+ amdgpu_sriov_reinit_late(adev);
+
+ amdgpu_irq_gpu_reset_resume_helper(adev);
+
+ if (amdgpu_ib_ring_tests(adev))
+ dev_err(adev->dev, "[GPU_RESET] ib ring test failed (%d).\n", r);
+
+ /* release full control of GPU after ib test */
+ amdgpu_virt_release_full_gpu(adev, true);
+
+ DRM_INFO("recover vram bo from shadow\n");
+
+ ring = adev->mman.buffer_funcs_ring;
+ mutex_lock(&adev->shadow_list_lock);
+ list_for_each_entry_safe(bo, tmp, &adev->shadow_list, shadow_list) {
+ amdgpu_recover_vram_from_shadow(adev, ring, bo, &next);
+ if (fence) {
+ r = dma_fence_wait(fence, false);
+ if (r) {
+ WARN(r, "recovery from shadow isn't completed\n");
+ break;
+ }
+ }
+
+ dma_fence_put(fence);
+ fence = next;
+ }
+ mutex_unlock(&adev->shadow_list_lock);
+
+ if (fence) {
+ r = dma_fence_wait(fence, false);
+ if (r)
+ WARN(r, "recovery from shadow isn't completed\n");
+ }
+ dma_fence_put(fence);
+
+ for (i = 0; i < AMDGPU_MAX_RINGS; ++i) {
+ struct amdgpu_ring *ring = adev->rings[i];
+ if (!ring || !ring->sched.thread)
+ continue;
+
+ amd_sched_job_recovery(&ring->sched);
+ kthread_unpark(ring->sched.thread);
+ }
+
+ drm_helper_resume_force_mode(adev->ddev);
+ ttm_bo_unlock_delayed_workqueue(&adev->mman.bdev, resched);
+ if (r) {
+ /* bad news, how to tell it to userspace ? */
+ dev_info(adev->dev, "GPU reset failed\n");
+ }
+
+ adev->gfx.in_reset = false;
+ mutex_unlock(&adev->virt.lock_reset);
+ return r;
+}
+
+/**
* amdgpu_gpu_reset - reset the asic
*
* @adev: amdgpu device pointer
@@ -2300,7 +2575,7 @@ int amdgpu_gpu_reset(struct amdgpu_device *adev)
bool need_full_reset;
if (amdgpu_sriov_vf(adev))
- return 0;
+ return amdgpu_sriov_gpu_reset(adev, true);
if (!amdgpu_check_soft_reset(adev)) {
DRM_INFO("No hardware hang detected. Did some blocks stall?\n");
@@ -2346,9 +2621,15 @@ retry:
amdgpu_display_stop_mc_access(adev, &save);
amdgpu_wait_for_idle(adev, AMD_IP_BLOCK_TYPE_GMC);
}
- amdgpu_atombios_scratch_regs_save(adev);
+ if (adev->is_atom_fw)
+ amdgpu_atomfirmware_scratch_regs_save(adev);
+ else
+ amdgpu_atombios_scratch_regs_save(adev);
r = amdgpu_asic_reset(adev);
- amdgpu_atombios_scratch_regs_restore(adev);
+ if (adev->is_atom_fw)
+ amdgpu_atomfirmware_scratch_regs_restore(adev);
+ else
+ amdgpu_atombios_scratch_regs_restore(adev);
/* post card */
amdgpu_atom_asic_init(adev->mode_info.atom_context);
@@ -2387,7 +2668,7 @@ retry:
if (fence) {
r = dma_fence_wait(fence, false);
if (r) {
- WARN(r, "recovery from shadow isn't comleted\n");
+ WARN(r, "recovery from shadow isn't completed\n");
break;
}
}
@@ -2399,7 +2680,7 @@ retry:
if (fence) {
r = dma_fence_wait(fence, false);
if (r)
- WARN(r, "recovery from shadow isn't comleted\n");
+ WARN(r, "recovery from shadow isn't completed\n");
}
dma_fence_put(fence);
}
@@ -2954,24 +3235,42 @@ static ssize_t amdgpu_debugfs_sensor_read(struct file *f, char __user *buf,
size_t size, loff_t *pos)
{
struct amdgpu_device *adev = file_inode(f)->i_private;
- int idx, r;
- int32_t value;
+ int idx, x, outsize, r, valuesize;
+ uint32_t values[16];
- if (size != 4 || *pos & 0x3)
+ if (size & 3 || *pos & 0x3)
+ return -EINVAL;
+
+ if (amdgpu_dpm == 0)
return -EINVAL;
/* convert offset to sensor number */
idx = *pos >> 2;
+ valuesize = sizeof(values);
if (adev->powerplay.pp_funcs && adev->powerplay.pp_funcs->read_sensor)
- r = adev->powerplay.pp_funcs->read_sensor(adev->powerplay.pp_handle, idx, &value);
+ r = adev->powerplay.pp_funcs->read_sensor(adev->powerplay.pp_handle, idx, &values[0], &valuesize);
+ else if (adev->pm.funcs && adev->pm.funcs->read_sensor)
+ r = adev->pm.funcs->read_sensor(adev, idx, &values[0],
+ &valuesize);
else
return -EINVAL;
- if (!r)
- r = put_user(value, (int32_t *)buf);
+ if (size > valuesize)
+ return -EINVAL;
+
+ outsize = 0;
+ x = 0;
+ if (!r) {
+ while (size) {
+ r = put_user(values[x++], (int32_t *)buf);
+ buf += 4;
+ size -= 4;
+ outsize += 4;
+ }
+ }
- return !r ? 4 : r;
+ return !r ? outsize : r;
}
static ssize_t amdgpu_debugfs_wave_read(struct file *f, char __user *buf,
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_dpm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_dpm.c
index 6ca0333ca4c0..38e9b0d3659a 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_dpm.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_dpm.c
@@ -31,86 +31,88 @@
void amdgpu_dpm_print_class_info(u32 class, u32 class2)
{
- printk("\tui class: ");
+ const char *s;
+
switch (class & ATOM_PPLIB_CLASSIFICATION_UI_MASK) {
case ATOM_PPLIB_CLASSIFICATION_UI_NONE:
default:
- printk("none\n");
+ s = "none";
break;
case ATOM_PPLIB_CLASSIFICATION_UI_BATTERY:
- printk("battery\n");
+ s = "battery";
break;
case ATOM_PPLIB_CLASSIFICATION_UI_BALANCED:
- printk("balanced\n");
+ s = "balanced";
break;
case ATOM_PPLIB_CLASSIFICATION_UI_PERFORMANCE:
- printk("performance\n");
+ s = "performance";
break;
}
- printk("\tinternal class: ");
+ printk("\tui class: %s\n", s);
+ printk("\tinternal class:");
if (((class & ~ATOM_PPLIB_CLASSIFICATION_UI_MASK) == 0) &&
(class2 == 0))
- printk("none");
+ pr_cont(" none");
else {
if (class & ATOM_PPLIB_CLASSIFICATION_BOOT)
- printk("boot ");
+ pr_cont(" boot");
if (class & ATOM_PPLIB_CLASSIFICATION_THERMAL)
- printk("thermal ");
+ pr_cont(" thermal");
if (class & ATOM_PPLIB_CLASSIFICATION_LIMITEDPOWERSOURCE)
- printk("limited_pwr ");
+ pr_cont(" limited_pwr");
if (class & ATOM_PPLIB_CLASSIFICATION_REST)
- printk("rest ");
+ pr_cont(" rest");
if (class & ATOM_PPLIB_CLASSIFICATION_FORCED)
- printk("forced ");
+ pr_cont(" forced");
if (class & ATOM_PPLIB_CLASSIFICATION_3DPERFORMANCE)
- printk("3d_perf ");
+ pr_cont(" 3d_perf");
if (class & ATOM_PPLIB_CLASSIFICATION_OVERDRIVETEMPLATE)
- printk("ovrdrv ");
+ pr_cont(" ovrdrv");
if (class & ATOM_PPLIB_CLASSIFICATION_UVDSTATE)
- printk("uvd ");
+ pr_cont(" uvd");
if (class & ATOM_PPLIB_CLASSIFICATION_3DLOW)
- printk("3d_low ");
+ pr_cont(" 3d_low");
if (class & ATOM_PPLIB_CLASSIFICATION_ACPI)
- printk("acpi ");
+ pr_cont(" acpi");
if (class & ATOM_PPLIB_CLASSIFICATION_HD2STATE)
- printk("uvd_hd2 ");
+ pr_cont(" uvd_hd2");
if (class & ATOM_PPLIB_CLASSIFICATION_HDSTATE)
- printk("uvd_hd ");
+ pr_cont(" uvd_hd");
if (class & ATOM_PPLIB_CLASSIFICATION_SDSTATE)
- printk("uvd_sd ");
+ pr_cont(" uvd_sd");
if (class2 & ATOM_PPLIB_CLASSIFICATION2_LIMITEDPOWERSOURCE_2)
- printk("limited_pwr2 ");
+ pr_cont(" limited_pwr2");
if (class2 & ATOM_PPLIB_CLASSIFICATION2_ULV)
- printk("ulv ");
+ pr_cont(" ulv");
if (class2 & ATOM_PPLIB_CLASSIFICATION2_MVC)
- printk("uvd_mvc ");
+ pr_cont(" uvd_mvc");
}
- printk("\n");
+ pr_cont("\n");
}
void amdgpu_dpm_print_cap_info(u32 caps)
{
- printk("\tcaps: ");
+ printk("\tcaps:");
if (caps & ATOM_PPLIB_SINGLE_DISPLAY_ONLY)
- printk("single_disp ");
+ pr_cont(" single_disp");
if (caps & ATOM_PPLIB_SUPPORTS_VIDEO_PLAYBACK)
- printk("video ");
+ pr_cont(" video");
if (caps & ATOM_PPLIB_DISALLOW_ON_DC)
- printk("no_dc ");
- printk("\n");
+ pr_cont(" no_dc");
+ pr_cont("\n");
}
void amdgpu_dpm_print_ps_status(struct amdgpu_device *adev,
struct amdgpu_ps *rps)
{
- printk("\tstatus: ");
+ printk("\tstatus:");
if (rps == adev->pm.dpm.current_ps)
- printk("c ");
+ pr_cont(" c");
if (rps == adev->pm.dpm.requested_ps)
- printk("r ");
+ pr_cont(" r");
if (rps == adev->pm.dpm.boot_ps)
- printk("b ");
- printk("\n");
+ pr_cont(" b");
+ pr_cont("\n");
}
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_dpm.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_dpm.h
index fa2b55681422..8c96a4caa715 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_dpm.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_dpm.h
@@ -270,8 +270,18 @@ struct amdgpu_dpm_funcs {
struct amdgpu_ps *cps,
struct amdgpu_ps *rps,
bool *equal);
+ int (*read_sensor)(struct amdgpu_device *adev, int idx, void *value,
+ int *size);
struct amd_vce_state* (*get_vce_clock_state)(struct amdgpu_device *adev, unsigned idx);
+ int (*reset_power_profile_state)(struct amdgpu_device *adev,
+ struct amd_pp_profile *request);
+ int (*get_power_profile_state)(struct amdgpu_device *adev,
+ struct amd_pp_profile *query);
+ int (*set_power_profile_state)(struct amdgpu_device *adev,
+ struct amd_pp_profile *request);
+ int (*switch_power_profile)(struct amdgpu_device *adev,
+ enum amd_pp_profile_type type);
};
#define amdgpu_dpm_pre_set_power_state(adev) (adev)->pm.funcs->pre_set_power_state((adev))
@@ -282,10 +292,10 @@ struct amdgpu_dpm_funcs {
#define amdgpu_dpm_vblank_too_short(adev) (adev)->pm.funcs->vblank_too_short((adev))
#define amdgpu_dpm_enable_bapm(adev, e) (adev)->pm.funcs->enable_bapm((adev), (e))
-#define amdgpu_dpm_read_sensor(adev, idx, value) \
+#define amdgpu_dpm_read_sensor(adev, idx, value, size) \
((adev)->pp_enabled ? \
- (adev)->powerplay.pp_funcs->read_sensor(adev->powerplay.pp_handle, (idx), (value)) : \
- -EINVAL)
+ (adev)->powerplay.pp_funcs->read_sensor(adev->powerplay.pp_handle, (idx), (value), (size)) : \
+ (adev)->pm.funcs->read_sensor((adev), (idx), (value), (size)))
#define amdgpu_dpm_get_temperature(adev) \
((adev)->pp_enabled ? \
@@ -388,6 +398,22 @@ struct amdgpu_dpm_funcs {
(adev)->powerplay.pp_funcs->get_performance_level((adev)->powerplay.pp_handle) : \
(adev)->pm.dpm.forced_level)
+#define amdgpu_dpm_reset_power_profile_state(adev, request) \
+ ((adev)->powerplay.pp_funcs->reset_power_profile_state(\
+ (adev)->powerplay.pp_handle, request))
+
+#define amdgpu_dpm_get_power_profile_state(adev, query) \
+ ((adev)->powerplay.pp_funcs->get_power_profile_state(\
+ (adev)->powerplay.pp_handle, query))
+
+#define amdgpu_dpm_set_power_profile_state(adev, request) \
+ ((adev)->powerplay.pp_funcs->set_power_profile_state(\
+ (adev)->powerplay.pp_handle, request))
+
+#define amdgpu_dpm_switch_power_profile(adev, type) \
+ ((adev)->powerplay.pp_funcs->switch_power_profile(\
+ (adev)->powerplay.pp_handle, type))
+
struct amdgpu_dpm {
struct amdgpu_ps *ps;
/* number of valid power states */
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
index b76cd699eb0d..400917fd7486 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
@@ -60,9 +60,12 @@
* - 3.8.0 - Add support raster config init in the kernel
* - 3.9.0 - Add support for memory query info about VRAM and GTT.
* - 3.10.0 - Add support for new fences ioctl, new gem ioctl flags
+ * - 3.11.0 - Add support for sensor query info (clocks, temp, etc).
+ * - 3.12.0 - Add query for double offchip LDS buffers
+ * - 3.13.0 - Add PRT support
*/
#define KMS_DRIVER_MAJOR 3
-#define KMS_DRIVER_MINOR 10
+#define KMS_DRIVER_MINOR 13
#define KMS_DRIVER_PATCHLEVEL 0
int amdgpu_vram_limit = 0;
@@ -77,7 +80,7 @@ int amdgpu_pcie_gen2 = -1;
int amdgpu_msi = -1;
int amdgpu_lockup_timeout = 0;
int amdgpu_dpm = -1;
-int amdgpu_smc_load_fw = 1;
+int amdgpu_fw_load_type = -1;
int amdgpu_aspm = -1;
int amdgpu_runtime_pm = -1;
unsigned amdgpu_ip_block_mask = 0xffffffff;
@@ -100,6 +103,11 @@ unsigned amdgpu_pg_mask = 0xffffffff;
char *amdgpu_disable_cu = NULL;
char *amdgpu_virtual_display = NULL;
unsigned amdgpu_pp_feature_mask = 0xffffffff;
+int amdgpu_ngg = 0;
+int amdgpu_prim_buf_per_se = 0;
+int amdgpu_pos_buf_per_se = 0;
+int amdgpu_cntl_sb_buf_per_se = 0;
+int amdgpu_param_buf_per_se = 0;
MODULE_PARM_DESC(vramlimit, "Restrict VRAM for testing, in megabytes");
module_param_named(vramlimit, amdgpu_vram_limit, int, 0600);
@@ -137,8 +145,8 @@ module_param_named(lockup_timeout, amdgpu_lockup_timeout, int, 0444);
MODULE_PARM_DESC(dpm, "DPM support (1 = enable, 0 = disable, -1 = auto)");
module_param_named(dpm, amdgpu_dpm, int, 0444);
-MODULE_PARM_DESC(smc_load_fw, "SMC firmware loading(1 = enable, 0 = disable)");
-module_param_named(smc_load_fw, amdgpu_smc_load_fw, int, 0444);
+MODULE_PARM_DESC(fw_load_type, "firmware loading type (0 = direct, 1 = SMU, 2 = PSP, -1 = auto)");
+module_param_named(fw_load_type, amdgpu_fw_load_type, int, 0444);
MODULE_PARM_DESC(aspm, "ASPM support (1 = enable, 0 = disable, -1 = auto)");
module_param_named(aspm, amdgpu_aspm, int, 0444);
@@ -207,6 +215,22 @@ MODULE_PARM_DESC(virtual_display,
"Enable virtual display feature (the virtual_display will be set like xxxx:xx:xx.x,x;xxxx:xx:xx.x,x)");
module_param_named(virtual_display, amdgpu_virtual_display, charp, 0444);
+MODULE_PARM_DESC(ngg, "Next Generation Graphics (1 = enable, 0 = disable(default depending on gfx))");
+module_param_named(ngg, amdgpu_ngg, int, 0444);
+
+MODULE_PARM_DESC(prim_buf_per_se, "the size of Primitive Buffer per Shader Engine (default depending on gfx)");
+module_param_named(prim_buf_per_se, amdgpu_prim_buf_per_se, int, 0444);
+
+MODULE_PARM_DESC(pos_buf_per_se, "the size of Position Buffer per Shader Engine (default depending on gfx)");
+module_param_named(pos_buf_per_se, amdgpu_pos_buf_per_se, int, 0444);
+
+MODULE_PARM_DESC(cntl_sb_buf_per_se, "the size of Control Sideband per Shader Engine (default depending on gfx)");
+module_param_named(cntl_sb_buf_per_se, amdgpu_cntl_sb_buf_per_se, int, 0444);
+
+MODULE_PARM_DESC(param_buf_per_se, "the size of Off-Chip Pramater Cache per Shader Engine (default depending on gfx)");
+module_param_named(param_buf_per_se, amdgpu_param_buf_per_se, int, 0444);
+
+
static const struct pci_device_id pciidlist[] = {
#ifdef CONFIG_DRM_AMDGPU_SI
{0x1002, 0x6780, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_TAHITI},
@@ -409,6 +433,7 @@ static const struct pci_device_id pciidlist[] = {
{0x1002, 0x67C2, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_POLARIS10},
{0x1002, 0x67C4, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_POLARIS10},
{0x1002, 0x67C7, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_POLARIS10},
+ {0x1002, 0x67D0, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_POLARIS10},
{0x1002, 0x67DF, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_POLARIS10},
{0x1002, 0x67C8, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_POLARIS10},
{0x1002, 0x67C9, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_POLARIS10},
@@ -423,7 +448,14 @@ static const struct pci_device_id pciidlist[] = {
{0x1002, 0x6987, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_POLARIS12},
{0x1002, 0x6995, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_POLARIS12},
{0x1002, 0x699F, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_POLARIS12},
-
+ /* Vega 10 */
+ {0x1002, 0x6860, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_VEGA10|AMD_EXP_HW_SUPPORT},
+ {0x1002, 0x6861, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_VEGA10|AMD_EXP_HW_SUPPORT},
+ {0x1002, 0x6862, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_VEGA10|AMD_EXP_HW_SUPPORT},
+ {0x1002, 0x6863, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_VEGA10|AMD_EXP_HW_SUPPORT},
+ {0x1002, 0x6867, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_VEGA10|AMD_EXP_HW_SUPPORT},
+ {0x1002, 0x686c, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_VEGA10|AMD_EXP_HW_SUPPORT},
+ {0x1002, 0x687f, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_VEGA10|AMD_EXP_HW_SUPPORT},
{0, 0, 0}
};
@@ -686,7 +718,6 @@ static struct drm_driver kms_driver = {
DRIVER_PRIME | DRIVER_RENDER | DRIVER_MODESET,
.load = amdgpu_driver_load_kms,
.open = amdgpu_driver_open_kms,
- .preclose = amdgpu_driver_preclose_kms,
.postclose = amdgpu_driver_postclose_kms,
.lastclose = amdgpu_driver_lastclose_kms,
.set_busid = drm_pci_set_busid,
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_fb.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_fb.c
index 72505b15dd13..a48142d930c6 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_fb.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_fb.c
@@ -147,11 +147,11 @@ static int amdgpufb_create_pinned_object(struct amdgpu_fbdev *rfbdev,
ret = amdgpu_gem_object_create(adev, aligned_size, 0,
AMDGPU_GEM_DOMAIN_VRAM,
AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED |
- AMDGPU_GEM_CREATE_VRAM_CONTIGUOUS,
+ AMDGPU_GEM_CREATE_VRAM_CONTIGUOUS |
+ AMDGPU_GEM_CREATE_VRAM_CLEARED,
true, &gobj);
if (ret) {
- printk(KERN_ERR "failed to allocate framebuffer (%d)\n",
- aligned_size);
+ pr_err("failed to allocate framebuffer (%d)\n", aligned_size);
return -ENOMEM;
}
abo = gem_to_amdgpu_bo(gobj);
@@ -241,8 +241,6 @@ static int amdgpufb_create(struct drm_fb_helper *helper,
/* setup helper */
rfbdev->helper.fb = fb;
- memset_io(abo->kptr, 0x0, amdgpu_bo_size(abo));
-
strcpy(info->fix.id, "amdgpudrmfb");
drm_fb_helper_fill_fix(info, fb->pitches[0], fb->format->depth);
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gart.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_gart.c
index 964d2a946ed5..6d691abe889c 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gart.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gart.c
@@ -229,7 +229,8 @@ void amdgpu_gart_unbind(struct amdgpu_device *adev, uint64_t offset,
unsigned p;
int i, j;
u64 page_base;
- uint32_t flags = AMDGPU_PTE_SYSTEM;
+ /* Starting from VEGA10, system bit must be 0 to mean invalid. */
+ uint64_t flags = 0;
if (!adev->gart.ready) {
WARN(1, "trying to unbind memory from uninitialized GART !\n");
@@ -271,7 +272,7 @@ void amdgpu_gart_unbind(struct amdgpu_device *adev, uint64_t offset,
*/
int amdgpu_gart_bind(struct amdgpu_device *adev, uint64_t offset,
int pages, struct page **pagelist, dma_addr_t *dma_addr,
- uint32_t flags)
+ uint64_t flags)
{
unsigned t;
unsigned p;
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c
index 106cf83c2e6b..f85520d4e711 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c
@@ -152,6 +152,7 @@ void amdgpu_gem_object_close(struct drm_gem_object *obj,
struct ttm_validate_buffer tv;
struct ww_acquire_ctx ticket;
struct amdgpu_bo_va *bo_va;
+ struct dma_fence *fence = NULL;
int r;
INIT_LIST_HEAD(&list);
@@ -173,6 +174,17 @@ void amdgpu_gem_object_close(struct drm_gem_object *obj,
if (bo_va) {
if (--bo_va->ref_count == 0) {
amdgpu_vm_bo_rmv(adev, bo_va);
+
+ r = amdgpu_vm_clear_freed(adev, vm, &fence);
+ if (unlikely(r)) {
+ dev_err(adev->dev, "failed to clear page "
+ "tables on GEM object close (%d)\n", r);
+ }
+
+ if (fence) {
+ amdgpu_bo_fence(bo, fence, true);
+ dma_fence_put(fence);
+ }
}
}
ttm_eu_backoff_reservation(&ticket, &list);
@@ -507,14 +519,16 @@ static int amdgpu_gem_va_check(void *param, struct amdgpu_bo *bo)
* amdgpu_gem_va_update_vm -update the bo_va in its VM
*
* @adev: amdgpu_device pointer
+ * @vm: vm to update
* @bo_va: bo_va to update
* @list: validation list
- * @operation: map or unmap
+ * @operation: map, unmap or clear
*
* Update the bo_va directly after setting its address. Errors are not
* vital here, so they are not reported back to userspace.
*/
static void amdgpu_gem_va_update_vm(struct amdgpu_device *adev,
+ struct amdgpu_vm *vm,
struct amdgpu_bo_va *bo_va,
struct list_head *list,
uint32_t operation)
@@ -529,20 +543,21 @@ static void amdgpu_gem_va_update_vm(struct amdgpu_device *adev,
goto error;
}
- r = amdgpu_vm_validate_pt_bos(adev, bo_va->vm, amdgpu_gem_va_check,
+ r = amdgpu_vm_validate_pt_bos(adev, vm, amdgpu_gem_va_check,
NULL);
if (r)
goto error;
- r = amdgpu_vm_update_page_directory(adev, bo_va->vm);
+ r = amdgpu_vm_update_directories(adev, vm);
if (r)
goto error;
- r = amdgpu_vm_clear_freed(adev, bo_va->vm);
+ r = amdgpu_vm_clear_freed(adev, vm, NULL);
if (r)
goto error;
- if (operation == AMDGPU_VA_OP_MAP)
+ if (operation == AMDGPU_VA_OP_MAP ||
+ operation == AMDGPU_VA_OP_REPLACE)
r = amdgpu_vm_bo_update(adev, bo_va, false);
error:
@@ -553,6 +568,12 @@ error:
int amdgpu_gem_va_ioctl(struct drm_device *dev, void *data,
struct drm_file *filp)
{
+ const uint32_t valid_flags = AMDGPU_VM_DELAY_UPDATE |
+ AMDGPU_VM_PAGE_READABLE | AMDGPU_VM_PAGE_WRITEABLE |
+ AMDGPU_VM_PAGE_EXECUTABLE | AMDGPU_VM_MTYPE_MASK;
+ const uint32_t prt_flags = AMDGPU_VM_DELAY_UPDATE |
+ AMDGPU_VM_PAGE_PRT;
+
struct drm_amdgpu_gem_va *args = data;
struct drm_gem_object *gobj;
struct amdgpu_device *adev = dev->dev_private;
@@ -563,7 +584,7 @@ int amdgpu_gem_va_ioctl(struct drm_device *dev, void *data,
struct ttm_validate_buffer tv;
struct ww_acquire_ctx ticket;
struct list_head list;
- uint32_t invalid_flags, va_flags = 0;
+ uint64_t va_flags;
int r = 0;
if (!adev->vm_manager.enabled)
@@ -577,17 +598,17 @@ int amdgpu_gem_va_ioctl(struct drm_device *dev, void *data,
return -EINVAL;
}
- invalid_flags = ~(AMDGPU_VM_DELAY_UPDATE | AMDGPU_VM_PAGE_READABLE |
- AMDGPU_VM_PAGE_WRITEABLE | AMDGPU_VM_PAGE_EXECUTABLE);
- if ((args->flags & invalid_flags)) {
- dev_err(&dev->pdev->dev, "invalid flags 0x%08X vs 0x%08X\n",
- args->flags, invalid_flags);
+ if ((args->flags & ~valid_flags) && (args->flags & ~prt_flags)) {
+ dev_err(&dev->pdev->dev, "invalid flags combination 0x%08X\n",
+ args->flags);
return -EINVAL;
}
switch (args->operation) {
case AMDGPU_VA_OP_MAP:
case AMDGPU_VA_OP_UNMAP:
+ case AMDGPU_VA_OP_CLEAR:
+ case AMDGPU_VA_OP_REPLACE:
break;
default:
dev_err(&dev->pdev->dev, "unsupported operation %d\n",
@@ -595,38 +616,47 @@ int amdgpu_gem_va_ioctl(struct drm_device *dev, void *data,
return -EINVAL;
}
- gobj = drm_gem_object_lookup(filp, args->handle);
- if (gobj == NULL)
- return -ENOENT;
- abo = gem_to_amdgpu_bo(gobj);
INIT_LIST_HEAD(&list);
- tv.bo = &abo->tbo;
- tv.shared = false;
- list_add(&tv.head, &list);
+ if ((args->operation != AMDGPU_VA_OP_CLEAR) &&
+ !(args->flags & AMDGPU_VM_PAGE_PRT)) {
+ gobj = drm_gem_object_lookup(filp, args->handle);
+ if (gobj == NULL)
+ return -ENOENT;
+ abo = gem_to_amdgpu_bo(gobj);
+ tv.bo = &abo->tbo;
+ tv.shared = false;
+ list_add(&tv.head, &list);
+ } else {
+ gobj = NULL;
+ abo = NULL;
+ }
amdgpu_vm_get_pd_bo(&fpriv->vm, &list, &vm_pd);
r = ttm_eu_reserve_buffers(&ticket, &list, true, NULL);
- if (r) {
- drm_gem_object_unreference_unlocked(gobj);
- return r;
- }
+ if (r)
+ goto error_unref;
- bo_va = amdgpu_vm_bo_find(&fpriv->vm, abo);
- if (!bo_va) {
- ttm_eu_backoff_reservation(&ticket, &list);
- drm_gem_object_unreference_unlocked(gobj);
- return -ENOENT;
+ if (abo) {
+ bo_va = amdgpu_vm_bo_find(&fpriv->vm, abo);
+ if (!bo_va) {
+ r = -ENOENT;
+ goto error_backoff;
+ }
+ } else if (args->operation != AMDGPU_VA_OP_CLEAR) {
+ bo_va = fpriv->prt_va;
+ } else {
+ bo_va = NULL;
}
switch (args->operation) {
case AMDGPU_VA_OP_MAP:
- if (args->flags & AMDGPU_VM_PAGE_READABLE)
- va_flags |= AMDGPU_PTE_READABLE;
- if (args->flags & AMDGPU_VM_PAGE_WRITEABLE)
- va_flags |= AMDGPU_PTE_WRITEABLE;
- if (args->flags & AMDGPU_VM_PAGE_EXECUTABLE)
- va_flags |= AMDGPU_PTE_EXECUTABLE;
+ r = amdgpu_vm_alloc_pts(adev, bo_va->vm, args->va_address,
+ args->map_size);
+ if (r)
+ goto error_backoff;
+
+ va_flags = amdgpu_vm_get_pte_flags(adev, args->flags);
r = amdgpu_vm_bo_map(adev, bo_va, args->va_address,
args->offset_in_bo, args->map_size,
va_flags);
@@ -634,14 +664,34 @@ int amdgpu_gem_va_ioctl(struct drm_device *dev, void *data,
case AMDGPU_VA_OP_UNMAP:
r = amdgpu_vm_bo_unmap(adev, bo_va, args->va_address);
break;
+
+ case AMDGPU_VA_OP_CLEAR:
+ r = amdgpu_vm_bo_clear_mappings(adev, &fpriv->vm,
+ args->va_address,
+ args->map_size);
+ break;
+ case AMDGPU_VA_OP_REPLACE:
+ r = amdgpu_vm_alloc_pts(adev, bo_va->vm, args->va_address,
+ args->map_size);
+ if (r)
+ goto error_backoff;
+
+ va_flags = amdgpu_vm_get_pte_flags(adev, args->flags);
+ r = amdgpu_vm_bo_replace_map(adev, bo_va, args->va_address,
+ args->offset_in_bo, args->map_size,
+ va_flags);
+ break;
default:
break;
}
- if (!r && !(args->flags & AMDGPU_VM_DELAY_UPDATE) &&
- !amdgpu_vm_debug)
- amdgpu_gem_va_update_vm(adev, bo_va, &list, args->operation);
+ if (!r && !(args->flags & AMDGPU_VM_DELAY_UPDATE) && !amdgpu_vm_debug)
+ amdgpu_gem_va_update_vm(adev, &fpriv->vm, bo_va, &list,
+ args->operation);
+
+error_backoff:
ttm_eu_backoff_reservation(&ticket, &list);
+error_unref:
drm_gem_object_unreference_unlocked(gobj);
return r;
}
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c
index e02a70dd37b5..aab857d89d03 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c
@@ -161,9 +161,6 @@ int amdgpu_ib_schedule(struct amdgpu_ring *ring, unsigned num_ibs,
return r;
}
- if (ring->funcs->init_cond_exec)
- patch_offset = amdgpu_ring_init_cond_exec(ring);
-
if (vm) {
r = amdgpu_vm_flush(ring, job);
if (r) {
@@ -172,7 +169,14 @@ int amdgpu_ib_schedule(struct amdgpu_ring *ring, unsigned num_ibs,
}
}
- if (ring->funcs->emit_hdp_flush)
+ if (ring->funcs->init_cond_exec)
+ patch_offset = amdgpu_ring_init_cond_exec(ring);
+
+ if (ring->funcs->emit_hdp_flush
+#ifdef CONFIG_X86_64
+ && !(adev->flags & AMD_IS_APU)
+#endif
+ )
amdgpu_ring_emit_hdp_flush(ring);
skip_preamble = ring->current_ctx == fence_ctx;
@@ -202,7 +206,11 @@ int amdgpu_ib_schedule(struct amdgpu_ring *ring, unsigned num_ibs,
need_ctx_switch = false;
}
- if (ring->funcs->emit_hdp_invalidate)
+ if (ring->funcs->emit_hdp_invalidate
+#ifdef CONFIG_X86_64
+ && !(adev->flags & AMD_IS_APU)
+#endif
+ )
amdgpu_ring_emit_hdp_invalidate(ring);
r = amdgpu_fence_emit(ring, f);
@@ -214,6 +222,9 @@ int amdgpu_ib_schedule(struct amdgpu_ring *ring, unsigned num_ibs,
return r;
}
+ if (ring->funcs->insert_end)
+ ring->funcs->insert_end(ring);
+
/* wrap the last IB with fence */
if (job && job->uf_addr) {
amdgpu_ring_emit_fence(ring, job->uf_addr, job->uf_sequence,
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ih.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_ih.h
index ba38ae6a1463..a3da1a122fc8 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ih.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ih.h
@@ -25,6 +25,48 @@
#define __AMDGPU_IH_H__
struct amdgpu_device;
+ /*
+ * vega10+ IH clients
+ */
+enum amdgpu_ih_clientid
+{
+ AMDGPU_IH_CLIENTID_IH = 0x00,
+ AMDGPU_IH_CLIENTID_ACP = 0x01,
+ AMDGPU_IH_CLIENTID_ATHUB = 0x02,
+ AMDGPU_IH_CLIENTID_BIF = 0x03,
+ AMDGPU_IH_CLIENTID_DCE = 0x04,
+ AMDGPU_IH_CLIENTID_ISP = 0x05,
+ AMDGPU_IH_CLIENTID_PCIE0 = 0x06,
+ AMDGPU_IH_CLIENTID_RLC = 0x07,
+ AMDGPU_IH_CLIENTID_SDMA0 = 0x08,
+ AMDGPU_IH_CLIENTID_SDMA1 = 0x09,
+ AMDGPU_IH_CLIENTID_SE0SH = 0x0a,
+ AMDGPU_IH_CLIENTID_SE1SH = 0x0b,
+ AMDGPU_IH_CLIENTID_SE2SH = 0x0c,
+ AMDGPU_IH_CLIENTID_SE3SH = 0x0d,
+ AMDGPU_IH_CLIENTID_SYSHUB = 0x0e,
+ AMDGPU_IH_CLIENTID_THM = 0x0f,
+ AMDGPU_IH_CLIENTID_UVD = 0x10,
+ AMDGPU_IH_CLIENTID_VCE0 = 0x11,
+ AMDGPU_IH_CLIENTID_VMC = 0x12,
+ AMDGPU_IH_CLIENTID_XDMA = 0x13,
+ AMDGPU_IH_CLIENTID_GRBM_CP = 0x14,
+ AMDGPU_IH_CLIENTID_ATS = 0x15,
+ AMDGPU_IH_CLIENTID_ROM_SMUIO = 0x16,
+ AMDGPU_IH_CLIENTID_DF = 0x17,
+ AMDGPU_IH_CLIENTID_VCE1 = 0x18,
+ AMDGPU_IH_CLIENTID_PWR = 0x19,
+ AMDGPU_IH_CLIENTID_UTCL2 = 0x1b,
+ AMDGPU_IH_CLIENTID_EA = 0x1c,
+ AMDGPU_IH_CLIENTID_UTCL2LOG = 0x1d,
+ AMDGPU_IH_CLIENTID_MP0 = 0x1e,
+ AMDGPU_IH_CLIENTID_MP1 = 0x1f,
+
+ AMDGPU_IH_CLIENTID_MAX
+
+};
+
+#define AMDGPU_IH_CLIENTID_LEGACY 0
/*
* R6xx+ IH ring
@@ -46,12 +88,19 @@ struct amdgpu_ih_ring {
dma_addr_t rb_dma_addr; /* only used when use_bus_addr = true */
};
+#define AMDGPU_IH_SRC_DATA_MAX_SIZE_DW 4
+
struct amdgpu_iv_entry {
+ unsigned client_id;
unsigned src_id;
- unsigned src_data;
unsigned ring_id;
unsigned vm_id;
+ unsigned vm_id_src;
+ uint64_t timestamp;
+ unsigned timestamp_src;
unsigned pas_id;
+ unsigned pasid_src;
+ unsigned src_data[AMDGPU_IH_SRC_DATA_MAX_SIZE_DW];
const uint32_t *iv_entry;
};
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_irq.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_irq.c
index e63ece049b05..13b487235a8b 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_irq.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_irq.c
@@ -33,6 +33,7 @@
#include "amdgpu_ih.h"
#include "atom.h"
#include "amdgpu_connectors.h"
+#include "amdgpu_trace.h"
#include <linux/pm_runtime.h>
@@ -89,23 +90,28 @@ static void amdgpu_irq_reset_work_func(struct work_struct *work)
static void amdgpu_irq_disable_all(struct amdgpu_device *adev)
{
unsigned long irqflags;
- unsigned i, j;
+ unsigned i, j, k;
int r;
spin_lock_irqsave(&adev->irq.lock, irqflags);
- for (i = 0; i < AMDGPU_MAX_IRQ_SRC_ID; ++i) {
- struct amdgpu_irq_src *src = adev->irq.sources[i];
-
- if (!src || !src->funcs->set || !src->num_types)
+ for (i = 0; i < AMDGPU_IH_CLIENTID_MAX; ++i) {
+ if (!adev->irq.client[i].sources)
continue;
- for (j = 0; j < src->num_types; ++j) {
- atomic_set(&src->enabled_types[j], 0);
- r = src->funcs->set(adev, src, j,
- AMDGPU_IRQ_STATE_DISABLE);
- if (r)
- DRM_ERROR("error disabling interrupt (%d)\n",
- r);
+ for (j = 0; j < AMDGPU_MAX_IRQ_SRC_ID; ++j) {
+ struct amdgpu_irq_src *src = adev->irq.client[i].sources[j];
+
+ if (!src || !src->funcs->set || !src->num_types)
+ continue;
+
+ for (k = 0; k < src->num_types; ++k) {
+ atomic_set(&src->enabled_types[k], 0);
+ r = src->funcs->set(adev, src, k,
+ AMDGPU_IRQ_STATE_DISABLE);
+ if (r)
+ DRM_ERROR("error disabling interrupt (%d)\n",
+ r);
+ }
}
}
spin_unlock_irqrestore(&adev->irq.lock, irqflags);
@@ -254,7 +260,7 @@ int amdgpu_irq_init(struct amdgpu_device *adev)
*/
void amdgpu_irq_fini(struct amdgpu_device *adev)
{
- unsigned i;
+ unsigned i, j;
drm_vblank_cleanup(adev->ddev);
if (adev->irq.installed) {
@@ -266,19 +272,25 @@ void amdgpu_irq_fini(struct amdgpu_device *adev)
cancel_work_sync(&adev->reset_work);
}
- for (i = 0; i < AMDGPU_MAX_IRQ_SRC_ID; ++i) {
- struct amdgpu_irq_src *src = adev->irq.sources[i];
-
- if (!src)
+ for (i = 0; i < AMDGPU_IH_CLIENTID_MAX; ++i) {
+ if (!adev->irq.client[i].sources)
continue;
- kfree(src->enabled_types);
- src->enabled_types = NULL;
- if (src->data) {
- kfree(src->data);
- kfree(src);
- adev->irq.sources[i] = NULL;
+ for (j = 0; j < AMDGPU_MAX_IRQ_SRC_ID; ++j) {
+ struct amdgpu_irq_src *src = adev->irq.client[i].sources[j];
+
+ if (!src)
+ continue;
+
+ kfree(src->enabled_types);
+ src->enabled_types = NULL;
+ if (src->data) {
+ kfree(src->data);
+ kfree(src);
+ adev->irq.client[i].sources[j] = NULL;
+ }
}
+ kfree(adev->irq.client[i].sources);
}
}
@@ -290,18 +302,30 @@ void amdgpu_irq_fini(struct amdgpu_device *adev)
* @source: irq source
*
*/
-int amdgpu_irq_add_id(struct amdgpu_device *adev, unsigned src_id,
+int amdgpu_irq_add_id(struct amdgpu_device *adev,
+ unsigned client_id, unsigned src_id,
struct amdgpu_irq_src *source)
{
- if (src_id >= AMDGPU_MAX_IRQ_SRC_ID)
+ if (client_id >= AMDGPU_IH_CLIENTID_MAX)
return -EINVAL;
- if (adev->irq.sources[src_id] != NULL)
+ if (src_id >= AMDGPU_MAX_IRQ_SRC_ID)
return -EINVAL;
if (!source->funcs)
return -EINVAL;
+ if (!adev->irq.client[client_id].sources) {
+ adev->irq.client[client_id].sources = kcalloc(AMDGPU_MAX_IRQ_SRC_ID,
+ sizeof(struct amdgpu_irq_src),
+ GFP_KERNEL);
+ if (!adev->irq.client[client_id].sources)
+ return -ENOMEM;
+ }
+
+ if (adev->irq.client[client_id].sources[src_id] != NULL)
+ return -EINVAL;
+
if (source->num_types && !source->enabled_types) {
atomic_t *types;
@@ -313,8 +337,7 @@ int amdgpu_irq_add_id(struct amdgpu_device *adev, unsigned src_id,
source->enabled_types = types;
}
- adev->irq.sources[src_id] = source;
-
+ adev->irq.client[client_id].sources[src_id] = source;
return 0;
}
@@ -329,10 +352,18 @@ int amdgpu_irq_add_id(struct amdgpu_device *adev, unsigned src_id,
void amdgpu_irq_dispatch(struct amdgpu_device *adev,
struct amdgpu_iv_entry *entry)
{
+ unsigned client_id = entry->client_id;
unsigned src_id = entry->src_id;
struct amdgpu_irq_src *src;
int r;
+ trace_amdgpu_iv(entry);
+
+ if (client_id >= AMDGPU_IH_CLIENTID_MAX) {
+ DRM_DEBUG("Invalid client_id in IV: %d\n", client_id);
+ return;
+ }
+
if (src_id >= AMDGPU_MAX_IRQ_SRC_ID) {
DRM_DEBUG("Invalid src_id in IV: %d\n", src_id);
return;
@@ -341,7 +372,13 @@ void amdgpu_irq_dispatch(struct amdgpu_device *adev,
if (adev->irq.virq[src_id]) {
generic_handle_irq(irq_find_mapping(adev->irq.domain, src_id));
} else {
- src = adev->irq.sources[src_id];
+ if (!adev->irq.client[client_id].sources) {
+ DRM_DEBUG("Unregistered interrupt client_id: %d src_id: %d\n",
+ client_id, src_id);
+ return;
+ }
+
+ src = adev->irq.client[client_id].sources[src_id];
if (!src) {
DRM_DEBUG("Unhandled interrupt src_id: %d\n", src_id);
return;
@@ -385,13 +422,20 @@ int amdgpu_irq_update(struct amdgpu_device *adev,
void amdgpu_irq_gpu_reset_resume_helper(struct amdgpu_device *adev)
{
- int i, j;
- for (i = 0; i < AMDGPU_MAX_IRQ_SRC_ID; i++) {
- struct amdgpu_irq_src *src = adev->irq.sources[i];
- if (!src)
+ int i, j, k;
+
+ for (i = 0; i < AMDGPU_IH_CLIENTID_MAX; ++i) {
+ if (!adev->irq.client[i].sources)
continue;
- for (j = 0; j < src->num_types; j++)
- amdgpu_irq_update(adev, src, j);
+
+ for (j = 0; j < AMDGPU_MAX_IRQ_SRC_ID; ++j) {
+ struct amdgpu_irq_src *src = adev->irq.client[i].sources[j];
+
+ if (!src)
+ continue;
+ for (k = 0; k < src->num_types; k++)
+ amdgpu_irq_update(adev, src, k);
+ }
}
}
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_irq.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_irq.h
index 1642f4108297..0610cc4a9788 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_irq.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_irq.h
@@ -28,6 +28,7 @@
#include "amdgpu_ih.h"
#define AMDGPU_MAX_IRQ_SRC_ID 0x100
+#define AMDGPU_MAX_IRQ_CLIENT_ID 0x100
struct amdgpu_device;
struct amdgpu_iv_entry;
@@ -44,6 +45,10 @@ struct amdgpu_irq_src {
void *data;
};
+struct amdgpu_irq_client {
+ struct amdgpu_irq_src **sources;
+};
+
/* provided by interrupt generating IP blocks */
struct amdgpu_irq_src_funcs {
int (*set)(struct amdgpu_device *adev, struct amdgpu_irq_src *source,
@@ -58,7 +63,7 @@ struct amdgpu_irq {
bool installed;
spinlock_t lock;
/* interrupt sources */
- struct amdgpu_irq_src *sources[AMDGPU_MAX_IRQ_SRC_ID];
+ struct amdgpu_irq_client client[AMDGPU_IH_CLIENTID_MAX];
/* status, etc. */
bool msi_enabled; /* msi enabled */
@@ -80,7 +85,8 @@ irqreturn_t amdgpu_irq_handler(int irq, void *arg);
int amdgpu_irq_init(struct amdgpu_device *adev);
void amdgpu_irq_fini(struct amdgpu_device *adev);
-int amdgpu_irq_add_id(struct amdgpu_device *adev, unsigned src_id,
+int amdgpu_irq_add_id(struct amdgpu_device *adev,
+ unsigned client_id, unsigned src_id,
struct amdgpu_irq_src *source);
void amdgpu_irq_dispatch(struct amdgpu_device *adev,
struct amdgpu_iv_entry *entry);
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c
index 61d94c745672..5ded370a4b35 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c
@@ -208,6 +208,14 @@ static int amdgpu_firmware_info(struct drm_amdgpu_info_firmware *fw_info,
fw_info->ver = adev->sdma.instance[query_fw->index].fw_version;
fw_info->feature = adev->sdma.instance[query_fw->index].feature_version;
break;
+ case AMDGPU_INFO_FW_SOS:
+ fw_info->ver = adev->psp.sos_fw_version;
+ fw_info->feature = adev->psp.sos_feature_version;
+ break;
+ case AMDGPU_INFO_FW_ASD:
+ fw_info->ver = adev->psp.asd_fw_version;
+ fw_info->feature = adev->psp.asd_feature_version;
+ break;
default:
return -EINVAL;
}
@@ -240,6 +248,7 @@ static int amdgpu_info_ioctl(struct drm_device *dev, void *data, struct drm_file
uint32_t ui32 = 0;
uint64_t ui64 = 0;
int i, found;
+ int ui32_size = sizeof(ui32);
if (!info->return_size || !info->return_pointer)
return -EINVAL;
@@ -308,6 +317,13 @@ static int amdgpu_info_ioctl(struct drm_device *dev, void *data, struct drm_file
ib_start_alignment = AMDGPU_GPU_PAGE_SIZE;
ib_size_alignment = 1;
break;
+ case AMDGPU_HW_IP_UVD_ENC:
+ type = AMD_IP_BLOCK_TYPE_UVD;
+ for (i = 0; i < adev->uvd.num_enc_rings; i++)
+ ring_mask |= ((adev->uvd.ring_enc[i].ready ? 1 : 0) << i);
+ ib_start_alignment = AMDGPU_GPU_PAGE_SIZE;
+ ib_size_alignment = 1;
+ break;
default:
return -EINVAL;
}
@@ -347,6 +363,9 @@ static int amdgpu_info_ioctl(struct drm_device *dev, void *data, struct drm_file
case AMDGPU_HW_IP_VCE:
type = AMD_IP_BLOCK_TYPE_VCE;
break;
+ case AMDGPU_HW_IP_UVD_ENC:
+ type = AMD_IP_BLOCK_TYPE_UVD;
+ break;
default:
return -EINVAL;
}
@@ -527,6 +546,15 @@ static int amdgpu_info_ioctl(struct drm_device *dev, void *data, struct drm_file
dev_info.vram_type = adev->mc.vram_type;
dev_info.vram_bit_width = adev->mc.vram_width;
dev_info.vce_harvest_config = adev->vce.harvest_config;
+ dev_info.gc_double_offchip_lds_buf =
+ adev->gfx.config.double_offchip_lds_buf;
+
+ if (amdgpu_ngg) {
+ dev_info.prim_buf_gpu_addr = adev->gfx.ngg.buf[PRIM].gpu_addr;
+ dev_info.pos_buf_gpu_addr = adev->gfx.ngg.buf[POS].gpu_addr;
+ dev_info.cntl_sb_buf_gpu_addr = adev->gfx.ngg.buf[CNTL].gpu_addr;
+ dev_info.param_buf_gpu_addr = adev->gfx.ngg.buf[PARAM].gpu_addr;
+ }
return copy_to_user(out, &dev_info,
min((size_t)size, sizeof(dev_info))) ? -EFAULT : 0;
@@ -596,6 +624,80 @@ static int amdgpu_info_ioctl(struct drm_device *dev, void *data, struct drm_file
return -EINVAL;
}
}
+ case AMDGPU_INFO_SENSOR: {
+ struct pp_gpu_power query = {0};
+ int query_size = sizeof(query);
+
+ if (amdgpu_dpm == 0)
+ return -ENOENT;
+
+ switch (info->sensor_info.type) {
+ case AMDGPU_INFO_SENSOR_GFX_SCLK:
+ /* get sclk in Mhz */
+ if (amdgpu_dpm_read_sensor(adev,
+ AMDGPU_PP_SENSOR_GFX_SCLK,
+ (void *)&ui32, &ui32_size)) {
+ return -EINVAL;
+ }
+ ui32 /= 100;
+ break;
+ case AMDGPU_INFO_SENSOR_GFX_MCLK:
+ /* get mclk in Mhz */
+ if (amdgpu_dpm_read_sensor(adev,
+ AMDGPU_PP_SENSOR_GFX_MCLK,
+ (void *)&ui32, &ui32_size)) {
+ return -EINVAL;
+ }
+ ui32 /= 100;
+ break;
+ case AMDGPU_INFO_SENSOR_GPU_TEMP:
+ /* get temperature in millidegrees C */
+ if (amdgpu_dpm_read_sensor(adev,
+ AMDGPU_PP_SENSOR_GPU_TEMP,
+ (void *)&ui32, &ui32_size)) {
+ return -EINVAL;
+ }
+ break;
+ case AMDGPU_INFO_SENSOR_GPU_LOAD:
+ /* get GPU load */
+ if (amdgpu_dpm_read_sensor(adev,
+ AMDGPU_PP_SENSOR_GPU_LOAD,
+ (void *)&ui32, &ui32_size)) {
+ return -EINVAL;
+ }
+ break;
+ case AMDGPU_INFO_SENSOR_GPU_AVG_POWER:
+ /* get average GPU power */
+ if (amdgpu_dpm_read_sensor(adev,
+ AMDGPU_PP_SENSOR_GPU_POWER,
+ (void *)&query, &query_size)) {
+ return -EINVAL;
+ }
+ ui32 = query.average_gpu_power >> 8;
+ break;
+ case AMDGPU_INFO_SENSOR_VDDNB:
+ /* get VDDNB in millivolts */
+ if (amdgpu_dpm_read_sensor(adev,
+ AMDGPU_PP_SENSOR_VDDNB,
+ (void *)&ui32, &ui32_size)) {
+ return -EINVAL;
+ }
+ break;
+ case AMDGPU_INFO_SENSOR_VDDGFX:
+ /* get VDDGFX in millivolts */
+ if (amdgpu_dpm_read_sensor(adev,
+ AMDGPU_PP_SENSOR_VDDGFX,
+ (void *)&ui32, &ui32_size)) {
+ return -EINVAL;
+ }
+ break;
+ default:
+ DRM_DEBUG_KMS("Invalid request %d\n",
+ info->sensor_info.type);
+ return -EINVAL;
+ }
+ return copy_to_user(out, &ui32, min(size, 4u)) ? -EFAULT : 0;
+ }
default:
DRM_DEBUG_KMS("Invalid request %d\n", info->query);
return -EINVAL;
@@ -655,6 +757,14 @@ int amdgpu_driver_open_kms(struct drm_device *dev, struct drm_file *file_priv)
goto out_suspend;
}
+ fpriv->prt_va = amdgpu_vm_bo_add(adev, &fpriv->vm, NULL);
+ if (!fpriv->prt_va) {
+ r = -ENOMEM;
+ amdgpu_vm_fini(adev, &fpriv->vm);
+ kfree(fpriv);
+ goto out_suspend;
+ }
+
if (amdgpu_sriov_vf(adev)) {
r = amdgpu_map_static_csa(adev, &fpriv->vm);
if (r)
@@ -694,11 +804,15 @@ void amdgpu_driver_postclose_kms(struct drm_device *dev,
if (!fpriv)
return;
+ pm_runtime_get_sync(dev->dev);
+
amdgpu_ctx_mgr_fini(&fpriv->ctx_mgr);
amdgpu_uvd_free_handles(adev, file_priv);
amdgpu_vce_free_handles(adev, file_priv);
+ amdgpu_vm_bo_rmv(adev, fpriv->prt_va);
+
if (amdgpu_sriov_vf(adev)) {
/* TODO: how to handle reserve failure */
BUG_ON(amdgpu_bo_reserve(adev->virt.csa_obj, false));
@@ -722,21 +836,6 @@ void amdgpu_driver_postclose_kms(struct drm_device *dev,
pm_runtime_put_autosuspend(dev->dev);
}
-/**
- * amdgpu_driver_preclose_kms - drm callback for pre close
- *
- * @dev: drm dev pointer
- * @file_priv: drm file
- *
- * On device pre close, tear down hyperz and cmask filps on r1xx-r5xx
- * (all asics).
- */
-void amdgpu_driver_preclose_kms(struct drm_device *dev,
- struct drm_file *file_priv)
-{
- pm_runtime_get_sync(dev->dev);
-}
-
/*
* VBlank related functions.
*/
@@ -989,6 +1088,23 @@ static int amdgpu_debugfs_firmware_info(struct seq_file *m, void *data)
fw_info.feature, fw_info.ver);
}
+ /* PSP SOS */
+ query_fw.fw_type = AMDGPU_INFO_FW_SOS;
+ ret = amdgpu_firmware_info(&fw_info, &query_fw, adev);
+ if (ret)
+ return ret;
+ seq_printf(m, "SOS feature version: %u, firmware version: 0x%08x\n",
+ fw_info.feature, fw_info.ver);
+
+
+ /* PSP ASD */
+ query_fw.fw_type = AMDGPU_INFO_FW_ASD;
+ ret = amdgpu_firmware_info(&fw_info, &query_fw, adev);
+ if (ret)
+ return ret;
+ seq_printf(m, "ASD feature version: %u, firmware version: 0x%08x\n",
+ fw_info.feature, fw_info.ver);
+
/* SMC */
query_fw.fw_type = AMDGPU_INFO_FW_SMC;
ret = amdgpu_firmware_info(&fw_info, &query_fw, adev);
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c
index be80a4a68d7b..5aac350b007f 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c
@@ -395,32 +395,18 @@ int amdgpu_bo_create_restricted(struct amdgpu_device *adev,
amdgpu_fill_placement_to_bo(bo, placement);
/* Kernel allocation are uninterruptible */
- if (!resv) {
- bool locked;
-
- reservation_object_init(&bo->tbo.ttm_resv);
- locked = ww_mutex_trylock(&bo->tbo.ttm_resv.lock);
- WARN_ON(!locked);
- }
-
initial_bytes_moved = atomic64_read(&adev->num_bytes_moved);
- r = ttm_bo_init(&adev->mman.bdev, &bo->tbo, size, type,
- &bo->placement, page_align, !kernel, NULL,
- acc_size, sg, resv ? resv : &bo->tbo.ttm_resv,
- &amdgpu_ttm_bo_destroy);
+ r = ttm_bo_init_reserved(&adev->mman.bdev, &bo->tbo, size, type,
+ &bo->placement, page_align, !kernel, NULL,
+ acc_size, sg, resv, &amdgpu_ttm_bo_destroy);
amdgpu_cs_report_moved_bytes(adev,
atomic64_read(&adev->num_bytes_moved) - initial_bytes_moved);
- if (unlikely(r != 0)) {
- if (!resv)
- ww_mutex_unlock(&bo->tbo.resv->lock);
+ if (unlikely(r != 0))
return r;
- }
- bo->tbo.priority = ilog2(bo->tbo.num_pages);
if (kernel)
- bo->tbo.priority *= 2;
- bo->tbo.priority = min(bo->tbo.priority, (unsigned)(TTM_MAX_BO_PRIORITY - 1));
+ bo->tbo.priority = 1;
if (flags & AMDGPU_GEM_CREATE_VRAM_CLEARED &&
bo->tbo.mem.placement & TTM_PL_FLAG_VRAM) {
@@ -436,7 +422,7 @@ int amdgpu_bo_create_restricted(struct amdgpu_device *adev,
dma_fence_put(fence);
}
if (!resv)
- ww_mutex_unlock(&bo->tbo.resv->lock);
+ amdgpu_bo_unreserve(bo);
*bo_ptr = bo;
trace_amdgpu_bo_create(bo);
@@ -827,7 +813,10 @@ int amdgpu_bo_fbdev_mmap(struct amdgpu_bo *bo,
int amdgpu_bo_set_tiling_flags(struct amdgpu_bo *bo, u64 tiling_flags)
{
- if (AMDGPU_TILING_GET(tiling_flags, TILE_SPLIT) > 6)
+ struct amdgpu_device *adev = amdgpu_ttm_adev(bo->tbo.bdev);
+
+ if (adev->family <= AMDGPU_FAMILY_CZ &&
+ AMDGPU_TILING_GET(tiling_flags, TILE_SPLIT) > 6)
return -EINVAL;
bo->tiling_flags = tiling_flags;
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_pm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_pm.c
index 346e80a7119b..990fde2cf4fd 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_pm.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_pm.c
@@ -43,16 +43,22 @@ static const struct cg_flag_name clocks[] = {
{AMD_CG_SUPPORT_GFX_CGTS_LS, "Graphics Coarse Grain Tree Shader Light Sleep"},
{AMD_CG_SUPPORT_GFX_CP_LS, "Graphics Command Processor Light Sleep"},
{AMD_CG_SUPPORT_GFX_RLC_LS, "Graphics Run List Controller Light Sleep"},
+ {AMD_CG_SUPPORT_GFX_3D_CGCG, "Graphics 3D Coarse Grain Clock Gating"},
+ {AMD_CG_SUPPORT_GFX_3D_CGLS, "Graphics 3D Coarse Grain memory Light Sleep"},
{AMD_CG_SUPPORT_MC_LS, "Memory Controller Light Sleep"},
{AMD_CG_SUPPORT_MC_MGCG, "Memory Controller Medium Grain Clock Gating"},
{AMD_CG_SUPPORT_SDMA_LS, "System Direct Memory Access Light Sleep"},
{AMD_CG_SUPPORT_SDMA_MGCG, "System Direct Memory Access Medium Grain Clock Gating"},
+ {AMD_CG_SUPPORT_BIF_MGCG, "Bus Interface Medium Grain Clock Gating"},
{AMD_CG_SUPPORT_BIF_LS, "Bus Interface Light Sleep"},
{AMD_CG_SUPPORT_UVD_MGCG, "Unified Video Decoder Medium Grain Clock Gating"},
{AMD_CG_SUPPORT_VCE_MGCG, "Video Compression Engine Medium Grain Clock Gating"},
{AMD_CG_SUPPORT_HDP_LS, "Host Data Path Light Sleep"},
{AMD_CG_SUPPORT_HDP_MGCG, "Host Data Path Medium Grain Clock Gating"},
+ {AMD_CG_SUPPORT_DRM_MGCG, "Digital Right Management Medium Grain Clock Gating"},
+ {AMD_CG_SUPPORT_DRM_LS, "Digital Right Management Light Sleep"},
{AMD_CG_SUPPORT_ROM_MGCG, "Rom Medium Grain Clock Gating"},
+ {AMD_CG_SUPPORT_DF_MGCG, "Data Fabric Medium Grain Clock Gating"},
{0, NULL},
};
@@ -610,6 +616,174 @@ fail:
return count;
}
+static ssize_t amdgpu_get_pp_power_profile(struct device *dev,
+ char *buf, struct amd_pp_profile *query)
+{
+ struct drm_device *ddev = dev_get_drvdata(dev);
+ struct amdgpu_device *adev = ddev->dev_private;
+ int ret = 0;
+
+ if (adev->pp_enabled)
+ ret = amdgpu_dpm_get_power_profile_state(
+ adev, query);
+ else if (adev->pm.funcs->get_power_profile_state)
+ ret = adev->pm.funcs->get_power_profile_state(
+ adev, query);
+
+ if (ret)
+ return ret;
+
+ return snprintf(buf, PAGE_SIZE,
+ "%d %d %d %d %d\n",
+ query->min_sclk / 100,
+ query->min_mclk / 100,
+ query->activity_threshold,
+ query->up_hyst,
+ query->down_hyst);
+}
+
+static ssize_t amdgpu_get_pp_gfx_power_profile(struct device *dev,
+ struct device_attribute *attr,
+ char *buf)
+{
+ struct amd_pp_profile query = {0};
+
+ query.type = AMD_PP_GFX_PROFILE;
+
+ return amdgpu_get_pp_power_profile(dev, buf, &query);
+}
+
+static ssize_t amdgpu_get_pp_compute_power_profile(struct device *dev,
+ struct device_attribute *attr,
+ char *buf)
+{
+ struct amd_pp_profile query = {0};
+
+ query.type = AMD_PP_COMPUTE_PROFILE;
+
+ return amdgpu_get_pp_power_profile(dev, buf, &query);
+}
+
+static ssize_t amdgpu_set_pp_power_profile(struct device *dev,
+ const char *buf,
+ size_t count,
+ struct amd_pp_profile *request)
+{
+ struct drm_device *ddev = dev_get_drvdata(dev);
+ struct amdgpu_device *adev = ddev->dev_private;
+ uint32_t loop = 0;
+ char *sub_str, buf_cpy[128], *tmp_str;
+ const char delimiter[3] = {' ', '\n', '\0'};
+ long int value;
+ int ret = 0;
+
+ if (strncmp("reset", buf, strlen("reset")) == 0) {
+ if (adev->pp_enabled)
+ ret = amdgpu_dpm_reset_power_profile_state(
+ adev, request);
+ else if (adev->pm.funcs->reset_power_profile_state)
+ ret = adev->pm.funcs->reset_power_profile_state(
+ adev, request);
+ if (ret) {
+ count = -EINVAL;
+ goto fail;
+ }
+ return count;
+ }
+
+ if (strncmp("set", buf, strlen("set")) == 0) {
+ if (adev->pp_enabled)
+ ret = amdgpu_dpm_set_power_profile_state(
+ adev, request);
+ else if (adev->pm.funcs->set_power_profile_state)
+ ret = adev->pm.funcs->set_power_profile_state(
+ adev, request);
+ if (ret) {
+ count = -EINVAL;
+ goto fail;
+ }
+ return count;
+ }
+
+ if (count + 1 >= 128) {
+ count = -EINVAL;
+ goto fail;
+ }
+
+ memcpy(buf_cpy, buf, count + 1);
+ tmp_str = buf_cpy;
+
+ while (tmp_str[0]) {
+ sub_str = strsep(&tmp_str, delimiter);
+ ret = kstrtol(sub_str, 0, &value);
+ if (ret) {
+ count = -EINVAL;
+ goto fail;
+ }
+
+ switch (loop) {
+ case 0:
+ /* input unit MHz convert to dpm table unit 10KHz*/
+ request->min_sclk = (uint32_t)value * 100;
+ break;
+ case 1:
+ /* input unit MHz convert to dpm table unit 10KHz*/
+ request->min_mclk = (uint32_t)value * 100;
+ break;
+ case 2:
+ request->activity_threshold = (uint16_t)value;
+ break;
+ case 3:
+ request->up_hyst = (uint8_t)value;
+ break;
+ case 4:
+ request->down_hyst = (uint8_t)value;
+ break;
+ default:
+ break;
+ }
+
+ loop++;
+ }
+
+ if (adev->pp_enabled)
+ ret = amdgpu_dpm_set_power_profile_state(
+ adev, request);
+ else if (adev->pm.funcs->set_power_profile_state)
+ ret = adev->pm.funcs->set_power_profile_state(
+ adev, request);
+
+ if (ret)
+ count = -EINVAL;
+
+fail:
+ return count;
+}
+
+static ssize_t amdgpu_set_pp_gfx_power_profile(struct device *dev,
+ struct device_attribute *attr,
+ const char *buf,
+ size_t count)
+{
+ struct amd_pp_profile request = {0};
+
+ request.type = AMD_PP_GFX_PROFILE;
+
+ return amdgpu_set_pp_power_profile(dev, buf, count, &request);
+}
+
+static ssize_t amdgpu_set_pp_compute_power_profile(struct device *dev,
+ struct device_attribute *attr,
+ const char *buf,
+ size_t count)
+{
+ struct amd_pp_profile request = {0};
+
+ request.type = AMD_PP_COMPUTE_PROFILE;
+
+ return amdgpu_set_pp_power_profile(dev, buf, count, &request);
+}
+
static DEVICE_ATTR(power_dpm_state, S_IRUGO | S_IWUSR, amdgpu_get_dpm_state, amdgpu_set_dpm_state);
static DEVICE_ATTR(power_dpm_force_performance_level, S_IRUGO | S_IWUSR,
amdgpu_get_dpm_forced_performance_level,
@@ -637,6 +811,12 @@ static DEVICE_ATTR(pp_sclk_od, S_IRUGO | S_IWUSR,
static DEVICE_ATTR(pp_mclk_od, S_IRUGO | S_IWUSR,
amdgpu_get_pp_mclk_od,
amdgpu_set_pp_mclk_od);
+static DEVICE_ATTR(pp_gfx_power_profile, S_IRUGO | S_IWUSR,
+ amdgpu_get_pp_gfx_power_profile,
+ amdgpu_set_pp_gfx_power_profile);
+static DEVICE_ATTR(pp_compute_power_profile, S_IRUGO | S_IWUSR,
+ amdgpu_get_pp_compute_power_profile,
+ amdgpu_set_pp_compute_power_profile);
static ssize_t amdgpu_hwmon_show_temp(struct device *dev,
struct device_attribute *attr,
@@ -1142,11 +1322,11 @@ void amdgpu_dpm_enable_vce(struct amdgpu_device *adev, bool enable)
/* XXX select vce level based on ring/task */
adev->pm.dpm.vce_level = AMD_VCE_LEVEL_AC_ALL;
mutex_unlock(&adev->pm.mutex);
- amdgpu_pm_compute_clocks(adev);
- amdgpu_set_powergating_state(adev, AMD_IP_BLOCK_TYPE_VCE,
- AMD_PG_STATE_UNGATE);
amdgpu_set_clockgating_state(adev, AMD_IP_BLOCK_TYPE_VCE,
AMD_CG_STATE_UNGATE);
+ amdgpu_set_powergating_state(adev, AMD_IP_BLOCK_TYPE_VCE,
+ AMD_PG_STATE_UNGATE);
+ amdgpu_pm_compute_clocks(adev);
} else {
amdgpu_set_powergating_state(adev, AMD_IP_BLOCK_TYPE_VCE,
AMD_PG_STATE_GATE);
@@ -1255,6 +1435,20 @@ int amdgpu_pm_sysfs_init(struct amdgpu_device *adev)
DRM_ERROR("failed to create device file pp_mclk_od\n");
return ret;
}
+ ret = device_create_file(adev->dev,
+ &dev_attr_pp_gfx_power_profile);
+ if (ret) {
+ DRM_ERROR("failed to create device file "
+ "pp_gfx_power_profile\n");
+ return ret;
+ }
+ ret = device_create_file(adev->dev,
+ &dev_attr_pp_compute_power_profile);
+ if (ret) {
+ DRM_ERROR("failed to create device file "
+ "pp_compute_power_profile\n");
+ return ret;
+ }
ret = amdgpu_debugfs_pm_init(adev);
if (ret) {
@@ -1284,6 +1478,10 @@ void amdgpu_pm_sysfs_fini(struct amdgpu_device *adev)
device_remove_file(adev->dev, &dev_attr_pp_dpm_pcie);
device_remove_file(adev->dev, &dev_attr_pp_sclk_od);
device_remove_file(adev->dev, &dev_attr_pp_mclk_od);
+ device_remove_file(adev->dev,
+ &dev_attr_pp_gfx_power_profile);
+ device_remove_file(adev->dev,
+ &dev_attr_pp_compute_power_profile);
}
void amdgpu_pm_compute_clocks(struct amdgpu_device *adev)
@@ -1340,7 +1538,9 @@ void amdgpu_pm_compute_clocks(struct amdgpu_device *adev)
static int amdgpu_debugfs_pm_info_pp(struct seq_file *m, struct amdgpu_device *adev)
{
- int32_t value;
+ uint32_t value;
+ struct pp_gpu_power query = {0};
+ int size;
/* sanity check PP is enabled */
if (!(adev->powerplay.pp_funcs &&
@@ -1348,47 +1548,60 @@ static int amdgpu_debugfs_pm_info_pp(struct seq_file *m, struct amdgpu_device *a
return -EINVAL;
/* GPU Clocks */
+ size = sizeof(value);
seq_printf(m, "GFX Clocks and Power:\n");
- if (!amdgpu_dpm_read_sensor(adev, AMDGPU_PP_SENSOR_GFX_MCLK, &value))
+ if (!amdgpu_dpm_read_sensor(adev, AMDGPU_PP_SENSOR_GFX_MCLK, (void *)&value, &size))
seq_printf(m, "\t%u MHz (MCLK)\n", value/100);
- if (!amdgpu_dpm_read_sensor(adev, AMDGPU_PP_SENSOR_GFX_SCLK, &value))
+ if (!amdgpu_dpm_read_sensor(adev, AMDGPU_PP_SENSOR_GFX_SCLK, (void *)&value, &size))
seq_printf(m, "\t%u MHz (SCLK)\n", value/100);
- if (!amdgpu_dpm_read_sensor(adev, AMDGPU_PP_SENSOR_VDDGFX, &value))
+ if (!amdgpu_dpm_read_sensor(adev, AMDGPU_PP_SENSOR_VDDGFX, (void *)&value, &size))
seq_printf(m, "\t%u mV (VDDGFX)\n", value);
- if (!amdgpu_dpm_read_sensor(adev, AMDGPU_PP_SENSOR_VDDNB, &value))
+ if (!amdgpu_dpm_read_sensor(adev, AMDGPU_PP_SENSOR_VDDNB, (void *)&value, &size))
seq_printf(m, "\t%u mV (VDDNB)\n", value);
+ size = sizeof(query);
+ if (!amdgpu_dpm_read_sensor(adev, AMDGPU_PP_SENSOR_GPU_POWER, (void *)&query, &size)) {
+ seq_printf(m, "\t%u.%u W (VDDC)\n", query.vddc_power >> 8,
+ query.vddc_power & 0xff);
+ seq_printf(m, "\t%u.%u W (VDDCI)\n", query.vddci_power >> 8,
+ query.vddci_power & 0xff);
+ seq_printf(m, "\t%u.%u W (max GPU)\n", query.max_gpu_power >> 8,
+ query.max_gpu_power & 0xff);
+ seq_printf(m, "\t%u.%u W (average GPU)\n", query.average_gpu_power >> 8,
+ query.average_gpu_power & 0xff);
+ }
+ size = sizeof(value);
seq_printf(m, "\n");
/* GPU Temp */
- if (!amdgpu_dpm_read_sensor(adev, AMDGPU_PP_SENSOR_GPU_TEMP, &value))
+ if (!amdgpu_dpm_read_sensor(adev, AMDGPU_PP_SENSOR_GPU_TEMP, (void *)&value, &size))
seq_printf(m, "GPU Temperature: %u C\n", value/1000);
/* GPU Load */
- if (!amdgpu_dpm_read_sensor(adev, AMDGPU_PP_SENSOR_GPU_LOAD, &value))
+ if (!amdgpu_dpm_read_sensor(adev, AMDGPU_PP_SENSOR_GPU_LOAD, (void *)&value, &size))
seq_printf(m, "GPU Load: %u %%\n", value);
seq_printf(m, "\n");
/* UVD clocks */
- if (!amdgpu_dpm_read_sensor(adev, AMDGPU_PP_SENSOR_UVD_POWER, &value)) {
+ if (!amdgpu_dpm_read_sensor(adev, AMDGPU_PP_SENSOR_UVD_POWER, (void *)&value, &size)) {
if (!value) {
seq_printf(m, "UVD: Disabled\n");
} else {
seq_printf(m, "UVD: Enabled\n");
- if (!amdgpu_dpm_read_sensor(adev, AMDGPU_PP_SENSOR_UVD_DCLK, &value))
+ if (!amdgpu_dpm_read_sensor(adev, AMDGPU_PP_SENSOR_UVD_DCLK, (void *)&value, &size))
seq_printf(m, "\t%u MHz (DCLK)\n", value/100);
- if (!amdgpu_dpm_read_sensor(adev, AMDGPU_PP_SENSOR_UVD_VCLK, &value))
+ if (!amdgpu_dpm_read_sensor(adev, AMDGPU_PP_SENSOR_UVD_VCLK, (void *)&value, &size))
seq_printf(m, "\t%u MHz (VCLK)\n", value/100);
}
}
seq_printf(m, "\n");
/* VCE clocks */
- if (!amdgpu_dpm_read_sensor(adev, AMDGPU_PP_SENSOR_VCE_POWER, &value)) {
+ if (!amdgpu_dpm_read_sensor(adev, AMDGPU_PP_SENSOR_VCE_POWER, (void *)&value, &size)) {
if (!value) {
seq_printf(m, "VCE: Disabled\n");
} else {
seq_printf(m, "VCE: Enabled\n");
- if (!amdgpu_dpm_read_sensor(adev, AMDGPU_PP_SENSOR_VCE_ECCLK, &value))
+ if (!amdgpu_dpm_read_sensor(adev, AMDGPU_PP_SENSOR_VCE_ECCLK, (void *)&value, &size))
seq_printf(m, "\t%u MHz (ECCLK)\n", value/100);
}
}
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_powerplay.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_powerplay.c
index 8856eccc37fa..f5ae871aa11c 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_powerplay.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_powerplay.c
@@ -43,7 +43,7 @@ static int amdgpu_create_pp_handle(struct amdgpu_device *adev)
amd_pp = &(adev->powerplay);
pp_init.chip_family = adev->family;
pp_init.chip_id = adev->asic_type;
- pp_init.pm_en = amdgpu_dpm != 0 ? true : false;
+ pp_init.pm_en = (amdgpu_dpm != 0 && !amdgpu_sriov_vf(adev)) ? true : false;
pp_init.feature_mask = amdgpu_pp_feature_mask;
pp_init.device = amdgpu_cgs_create_device(adev);
ret = amd_powerplay_create(&pp_init, &(amd_pp->pp_handle));
@@ -71,6 +71,7 @@ static int amdgpu_pp_early_init(void *handle)
case CHIP_TOPAZ:
case CHIP_CARRIZO:
case CHIP_STONEY:
+ case CHIP_VEGA10:
adev->pp_enabled = true;
if (amdgpu_create_pp_handle(adev))
return -EINVAL;
@@ -163,7 +164,7 @@ static int amdgpu_pp_hw_init(void *handle)
int ret = 0;
struct amdgpu_device *adev = (struct amdgpu_device *)handle;
- if (adev->pp_enabled && adev->firmware.smu_load)
+ if (adev->pp_enabled && adev->firmware.load_type == AMDGPU_FW_LOAD_SMU)
amdgpu_ucode_init_bo(adev);
if (adev->powerplay.ip_funcs->hw_init)
@@ -190,7 +191,7 @@ static int amdgpu_pp_hw_fini(void *handle)
ret = adev->powerplay.ip_funcs->hw_fini(
adev->powerplay.pp_handle);
- if (adev->pp_enabled && adev->firmware.smu_load)
+ if (adev->pp_enabled && adev->firmware.load_type == AMDGPU_FW_LOAD_SMU)
amdgpu_ucode_fini_bo(adev);
return ret;
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c
new file mode 100644
index 000000000000..4731015f6101
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c
@@ -0,0 +1,481 @@
+/*
+ * Copyright 2016 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ * Author: Huang Rui
+ *
+ */
+
+#include <linux/firmware.h>
+#include "drmP.h"
+#include "amdgpu.h"
+#include "amdgpu_psp.h"
+#include "amdgpu_ucode.h"
+#include "soc15_common.h"
+#include "psp_v3_1.h"
+
+static void psp_set_funcs(struct amdgpu_device *adev);
+
+static int psp_early_init(void *handle)
+{
+ struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+
+ psp_set_funcs(adev);
+
+ return 0;
+}
+
+static int psp_sw_init(void *handle)
+{
+ struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct psp_context *psp = &adev->psp;
+ int ret;
+
+ switch (adev->asic_type) {
+ case CHIP_VEGA10:
+ psp->init_microcode = psp_v3_1_init_microcode;
+ psp->bootloader_load_sysdrv = psp_v3_1_bootloader_load_sysdrv;
+ psp->bootloader_load_sos = psp_v3_1_bootloader_load_sos;
+ psp->prep_cmd_buf = psp_v3_1_prep_cmd_buf;
+ psp->ring_init = psp_v3_1_ring_init;
+ psp->cmd_submit = psp_v3_1_cmd_submit;
+ psp->compare_sram_data = psp_v3_1_compare_sram_data;
+ psp->smu_reload_quirk = psp_v3_1_smu_reload_quirk;
+ break;
+ default:
+ return -EINVAL;
+ }
+
+ psp->adev = adev;
+
+ ret = psp_init_microcode(psp);
+ if (ret) {
+ DRM_ERROR("Failed to load psp firmware!\n");
+ return ret;
+ }
+
+ return 0;
+}
+
+static int psp_sw_fini(void *handle)
+{
+ return 0;
+}
+
+int psp_wait_for(struct psp_context *psp, uint32_t reg_index,
+ uint32_t reg_val, uint32_t mask, bool check_changed)
+{
+ uint32_t val;
+ int i;
+ struct amdgpu_device *adev = psp->adev;
+
+ val = RREG32(reg_index);
+
+ for (i = 0; i < adev->usec_timeout; i++) {
+ if (check_changed) {
+ if (val != reg_val)
+ return 0;
+ } else {
+ if ((val & mask) == reg_val)
+ return 0;
+ }
+ udelay(1);
+ }
+
+ return -ETIME;
+}
+
+static int
+psp_cmd_submit_buf(struct psp_context *psp,
+ struct amdgpu_firmware_info *ucode,
+ struct psp_gfx_cmd_resp *cmd, uint64_t fence_mc_addr,
+ int index)
+{
+ int ret;
+ struct amdgpu_bo *cmd_buf_bo;
+ uint64_t cmd_buf_mc_addr;
+ struct psp_gfx_cmd_resp *cmd_buf_mem;
+ struct amdgpu_device *adev = psp->adev;
+
+ ret = amdgpu_bo_create_kernel(adev, PSP_CMD_BUFFER_SIZE, PAGE_SIZE,
+ AMDGPU_GEM_DOMAIN_VRAM,
+ &cmd_buf_bo, &cmd_buf_mc_addr,
+ (void **)&cmd_buf_mem);
+ if (ret)
+ return ret;
+
+ memset(cmd_buf_mem, 0, PSP_CMD_BUFFER_SIZE);
+
+ memcpy(cmd_buf_mem, cmd, sizeof(struct psp_gfx_cmd_resp));
+
+ ret = psp_cmd_submit(psp, ucode, cmd_buf_mc_addr,
+ fence_mc_addr, index);
+
+ while (*((unsigned int *)psp->fence_buf) != index) {
+ msleep(1);
+ };
+
+ amdgpu_bo_free_kernel(&cmd_buf_bo,
+ &cmd_buf_mc_addr,
+ (void **)&cmd_buf_mem);
+
+ return ret;
+}
+
+static void psp_prep_tmr_cmd_buf(struct psp_gfx_cmd_resp *cmd,
+ uint64_t tmr_mc, uint32_t size)
+{
+ cmd->cmd_id = GFX_CMD_ID_SETUP_TMR;
+ cmd->cmd.cmd_setup_tmr.buf_phy_addr_lo = (uint32_t)tmr_mc;
+ cmd->cmd.cmd_setup_tmr.buf_phy_addr_hi = (uint32_t)(tmr_mc >> 32);
+ cmd->cmd.cmd_setup_tmr.buf_size = size;
+}
+
+/* Set up Trusted Memory Region */
+static int psp_tmr_init(struct psp_context *psp)
+{
+ int ret;
+ struct psp_gfx_cmd_resp *cmd;
+
+ cmd = kzalloc(sizeof(struct psp_gfx_cmd_resp), GFP_KERNEL);
+ if (!cmd)
+ return -ENOMEM;
+
+ /*
+ * Allocate 3M memory aligned to 1M from Frame Buffer (local
+ * physical).
+ *
+ * Note: this memory need be reserved till the driver
+ * uninitializes.
+ */
+ ret = amdgpu_bo_create_kernel(psp->adev, 0x300000, 0x100000,
+ AMDGPU_GEM_DOMAIN_VRAM,
+ &psp->tmr_bo, &psp->tmr_mc_addr, &psp->tmr_buf);
+ if (ret)
+ goto failed;
+
+ psp_prep_tmr_cmd_buf(cmd, psp->tmr_mc_addr, 0x300000);
+
+ ret = psp_cmd_submit_buf(psp, NULL, cmd,
+ psp->fence_buf_mc_addr, 1);
+ if (ret)
+ goto failed_mem;
+
+ kfree(cmd);
+
+ return 0;
+
+failed_mem:
+ amdgpu_bo_free_kernel(&psp->tmr_bo, &psp->tmr_mc_addr, &psp->tmr_buf);
+failed:
+ kfree(cmd);
+ return ret;
+}
+
+static void psp_prep_asd_cmd_buf(struct psp_gfx_cmd_resp *cmd,
+ uint64_t asd_mc, uint64_t asd_mc_shared,
+ uint32_t size, uint32_t shared_size)
+{
+ cmd->cmd_id = GFX_CMD_ID_LOAD_ASD;
+ cmd->cmd.cmd_load_ta.app_phy_addr_lo = lower_32_bits(asd_mc);
+ cmd->cmd.cmd_load_ta.app_phy_addr_hi = upper_32_bits(asd_mc);
+ cmd->cmd.cmd_load_ta.app_len = size;
+
+ cmd->cmd.cmd_load_ta.cmd_buf_phy_addr_lo = lower_32_bits(asd_mc_shared);
+ cmd->cmd.cmd_load_ta.cmd_buf_phy_addr_hi = upper_32_bits(asd_mc_shared);
+ cmd->cmd.cmd_load_ta.cmd_buf_len = shared_size;
+}
+
+static int psp_asd_load(struct psp_context *psp)
+{
+ int ret;
+ struct amdgpu_bo *asd_bo, *asd_shared_bo;
+ uint64_t asd_mc_addr, asd_shared_mc_addr;
+ void *asd_buf, *asd_shared_buf;
+ struct psp_gfx_cmd_resp *cmd;
+
+ cmd = kzalloc(sizeof(struct psp_gfx_cmd_resp), GFP_KERNEL);
+ if (!cmd)
+ return -ENOMEM;
+
+ /*
+ * Allocate 16k memory aligned to 4k from Frame Buffer (local
+ * physical) for shared ASD <-> Driver
+ */
+ ret = amdgpu_bo_create_kernel(psp->adev, PSP_ASD_SHARED_MEM_SIZE, PAGE_SIZE,
+ AMDGPU_GEM_DOMAIN_VRAM,
+ &asd_shared_bo, &asd_shared_mc_addr, &asd_buf);
+ if (ret)
+ goto failed;
+
+ /*
+ * Allocate 256k memory aligned to 4k from Frame Buffer (local
+ * physical) for ASD firmware
+ */
+ ret = amdgpu_bo_create_kernel(psp->adev, PSP_ASD_BIN_SIZE, PAGE_SIZE,
+ AMDGPU_GEM_DOMAIN_VRAM,
+ &asd_bo, &asd_mc_addr, &asd_buf);
+ if (ret)
+ goto failed_mem;
+
+ memcpy(asd_buf, psp->asd_start_addr, psp->asd_ucode_size);
+
+ psp_prep_asd_cmd_buf(cmd, asd_mc_addr, asd_shared_mc_addr,
+ psp->asd_ucode_size, PSP_ASD_SHARED_MEM_SIZE);
+
+ ret = psp_cmd_submit_buf(psp, NULL, cmd,
+ psp->fence_buf_mc_addr, 2);
+ if (ret)
+ goto failed_mem1;
+
+ amdgpu_bo_free_kernel(&asd_bo, &asd_mc_addr, &asd_buf);
+ amdgpu_bo_free_kernel(&asd_shared_bo, &asd_shared_mc_addr, &asd_shared_buf);
+ kfree(cmd);
+
+ return 0;
+
+failed_mem1:
+ amdgpu_bo_free_kernel(&asd_bo, &asd_mc_addr, &asd_buf);
+failed_mem:
+ amdgpu_bo_free_kernel(&asd_shared_bo, &asd_shared_mc_addr, &asd_shared_buf);
+failed:
+ kfree(cmd);
+ return ret;
+}
+
+static int psp_load_fw(struct amdgpu_device *adev)
+{
+ int ret;
+ struct psp_gfx_cmd_resp *cmd;
+ int i;
+ struct amdgpu_firmware_info *ucode;
+ struct psp_context *psp = &adev->psp;
+
+ cmd = kzalloc(sizeof(struct psp_gfx_cmd_resp), GFP_KERNEL);
+ if (!cmd)
+ return -ENOMEM;
+
+ ret = psp_bootloader_load_sysdrv(psp);
+ if (ret)
+ goto failed;
+
+ ret = psp_bootloader_load_sos(psp);
+ if (ret)
+ goto failed;
+
+ ret = psp_ring_init(psp, PSP_RING_TYPE__KM);
+ if (ret)
+ goto failed;
+
+ ret = amdgpu_bo_create_kernel(adev, PSP_FENCE_BUFFER_SIZE, PAGE_SIZE,
+ AMDGPU_GEM_DOMAIN_VRAM,
+ &psp->fence_buf_bo,
+ &psp->fence_buf_mc_addr,
+ &psp->fence_buf);
+ if (ret)
+ goto failed;
+
+ memset(psp->fence_buf, 0, PSP_FENCE_BUFFER_SIZE);
+
+ ret = psp_tmr_init(psp);
+ if (ret)
+ goto failed_mem;
+
+ ret = psp_asd_load(psp);
+ if (ret)
+ goto failed_mem;
+
+ for (i = 0; i < adev->firmware.max_ucodes; i++) {
+ ucode = &adev->firmware.ucode[i];
+ if (!ucode->fw)
+ continue;
+
+ if (ucode->ucode_id == AMDGPU_UCODE_ID_SMC &&
+ psp_smu_reload_quirk(psp))
+ continue;
+
+ ret = psp_prep_cmd_buf(ucode, cmd);
+ if (ret)
+ goto failed_mem;
+
+ ret = psp_cmd_submit_buf(psp, ucode, cmd,
+ psp->fence_buf_mc_addr, i + 3);
+ if (ret)
+ goto failed_mem;
+
+#if 0
+ /* check if firmware loaded sucessfully */
+ if (!amdgpu_psp_check_fw_loading_status(adev, i))
+ return -EINVAL;
+#endif
+ }
+
+ amdgpu_bo_free_kernel(&psp->fence_buf_bo,
+ &psp->fence_buf_mc_addr, &psp->fence_buf);
+ kfree(cmd);
+
+ return 0;
+
+failed_mem:
+ amdgpu_bo_free_kernel(&psp->fence_buf_bo,
+ &psp->fence_buf_mc_addr, &psp->fence_buf);
+failed:
+ kfree(cmd);
+ return ret;
+}
+
+static int psp_hw_init(void *handle)
+{
+ int ret;
+ struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+
+
+ if (adev->firmware.load_type != AMDGPU_FW_LOAD_PSP)
+ return 0;
+
+ mutex_lock(&adev->firmware.mutex);
+ /*
+ * This sequence is just used on hw_init only once, no need on
+ * resume.
+ */
+ ret = amdgpu_ucode_init_bo(adev);
+ if (ret)
+ goto failed;
+
+ ret = psp_load_fw(adev);
+ if (ret) {
+ DRM_ERROR("PSP firmware loading failed\n");
+ goto failed;
+ }
+
+ mutex_unlock(&adev->firmware.mutex);
+ return 0;
+
+failed:
+ adev->firmware.load_type = AMDGPU_FW_LOAD_DIRECT;
+ mutex_unlock(&adev->firmware.mutex);
+ return -EINVAL;
+}
+
+static int psp_hw_fini(void *handle)
+{
+ struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct psp_context *psp = &adev->psp;
+
+ if (adev->firmware.load_type == AMDGPU_FW_LOAD_PSP)
+ amdgpu_ucode_fini_bo(adev);
+
+ if (psp->tmr_buf)
+ amdgpu_bo_free_kernel(&psp->tmr_bo, &psp->tmr_mc_addr, &psp->tmr_buf);
+
+ return 0;
+}
+
+static int psp_suspend(void *handle)
+{
+ return 0;
+}
+
+static int psp_resume(void *handle)
+{
+ int ret;
+ struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+
+ if (adev->firmware.load_type != AMDGPU_FW_LOAD_PSP)
+ return 0;
+
+ mutex_lock(&adev->firmware.mutex);
+
+ ret = psp_load_fw(adev);
+ if (ret)
+ DRM_ERROR("PSP resume failed\n");
+
+ mutex_unlock(&adev->firmware.mutex);
+
+ return ret;
+}
+
+static bool psp_check_fw_loading_status(struct amdgpu_device *adev,
+ enum AMDGPU_UCODE_ID ucode_type)
+{
+ struct amdgpu_firmware_info *ucode = NULL;
+
+ if (adev->firmware.load_type != AMDGPU_FW_LOAD_PSP) {
+ DRM_INFO("firmware is not loaded by PSP\n");
+ return true;
+ }
+
+ if (!adev->firmware.fw_size)
+ return false;
+
+ ucode = &adev->firmware.ucode[ucode_type];
+ if (!ucode->fw || !ucode->ucode_size)
+ return false;
+
+ return psp_compare_sram_data(&adev->psp, ucode, ucode_type);
+}
+
+static int psp_set_clockgating_state(void *handle,
+ enum amd_clockgating_state state)
+{
+ return 0;
+}
+
+static int psp_set_powergating_state(void *handle,
+ enum amd_powergating_state state)
+{
+ return 0;
+}
+
+const struct amd_ip_funcs psp_ip_funcs = {
+ .name = "psp",
+ .early_init = psp_early_init,
+ .late_init = NULL,
+ .sw_init = psp_sw_init,
+ .sw_fini = psp_sw_fini,
+ .hw_init = psp_hw_init,
+ .hw_fini = psp_hw_fini,
+ .suspend = psp_suspend,
+ .resume = psp_resume,
+ .is_idle = NULL,
+ .wait_for_idle = NULL,
+ .soft_reset = NULL,
+ .set_clockgating_state = psp_set_clockgating_state,
+ .set_powergating_state = psp_set_powergating_state,
+};
+
+static const struct amdgpu_psp_funcs psp_funcs = {
+ .check_fw_loading_status = psp_check_fw_loading_status,
+};
+
+static void psp_set_funcs(struct amdgpu_device *adev)
+{
+ if (NULL == adev->firmware.funcs)
+ adev->firmware.funcs = &psp_funcs;
+}
+
+const struct amdgpu_ip_block_version psp_v3_1_ip_block =
+{
+ .type = AMD_IP_BLOCK_TYPE_PSP,
+ .major = 3,
+ .minor = 1,
+ .rev = 0,
+ .funcs = &psp_ip_funcs,
+};
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.h
new file mode 100644
index 000000000000..e9f35e025b59
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.h
@@ -0,0 +1,127 @@
+/*
+ * Copyright 2016 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ * Author: Huang Rui
+ *
+ */
+#ifndef __AMDGPU_PSP_H__
+#define __AMDGPU_PSP_H__
+
+#include "amdgpu.h"
+#include "psp_gfx_if.h"
+
+#define PSP_FENCE_BUFFER_SIZE 0x1000
+#define PSP_CMD_BUFFER_SIZE 0x1000
+#define PSP_ASD_BIN_SIZE 0x40000
+#define PSP_ASD_SHARED_MEM_SIZE 0x4000
+
+enum psp_ring_type
+{
+ PSP_RING_TYPE__INVALID = 0,
+ /*
+ * These values map to the way the PSP kernel identifies the
+ * rings.
+ */
+ PSP_RING_TYPE__UM = 1, /* User mode ring (formerly called RBI) */
+ PSP_RING_TYPE__KM = 2 /* Kernel mode ring (formerly called GPCOM) */
+};
+
+struct psp_ring
+{
+ enum psp_ring_type ring_type;
+ struct psp_gfx_rb_frame *ring_mem;
+ uint64_t ring_mem_mc_addr;
+ void *ring_mem_handle;
+ uint32_t ring_size;
+};
+
+struct psp_context
+{
+ struct amdgpu_device *adev;
+ struct psp_ring km_ring;
+
+ int (*init_microcode)(struct psp_context *psp);
+ int (*bootloader_load_sysdrv)(struct psp_context *psp);
+ int (*bootloader_load_sos)(struct psp_context *psp);
+ int (*prep_cmd_buf)(struct amdgpu_firmware_info *ucode,
+ struct psp_gfx_cmd_resp *cmd);
+ int (*ring_init)(struct psp_context *psp, enum psp_ring_type ring_type);
+ int (*cmd_submit)(struct psp_context *psp, struct amdgpu_firmware_info *ucode,
+ uint64_t cmd_buf_mc_addr, uint64_t fence_mc_addr, int index);
+ bool (*compare_sram_data)(struct psp_context *psp,
+ struct amdgpu_firmware_info *ucode,
+ enum AMDGPU_UCODE_ID ucode_type);
+ bool (*smu_reload_quirk)(struct psp_context *psp);
+
+ /* sos firmware */
+ const struct firmware *sos_fw;
+ uint32_t sos_fw_version;
+ uint32_t sos_feature_version;
+ uint32_t sys_bin_size;
+ uint32_t sos_bin_size;
+ uint8_t *sys_start_addr;
+ uint8_t *sos_start_addr;
+
+ /* tmr buffer */
+ struct amdgpu_bo *tmr_bo;
+ uint64_t tmr_mc_addr;
+ void *tmr_buf;
+
+ /* asd firmware */
+ const struct firmware *asd_fw;
+ uint32_t asd_fw_version;
+ uint32_t asd_feature_version;
+ uint32_t asd_ucode_size;
+ uint8_t *asd_start_addr;
+
+ /* fence buffer */
+ struct amdgpu_bo *fence_buf_bo;
+ uint64_t fence_buf_mc_addr;
+ void *fence_buf;
+};
+
+struct amdgpu_psp_funcs {
+ bool (*check_fw_loading_status)(struct amdgpu_device *adev,
+ enum AMDGPU_UCODE_ID);
+};
+
+#define psp_prep_cmd_buf(ucode, type) (psp)->prep_cmd_buf((ucode), (type))
+#define psp_ring_init(psp, type) (psp)->ring_init((psp), (type))
+#define psp_cmd_submit(psp, ucode, cmd_mc, fence_mc, index) \
+ (psp)->cmd_submit((psp), (ucode), (cmd_mc), (fence_mc), (index))
+#define psp_compare_sram_data(psp, ucode, type) \
+ (psp)->compare_sram_data((psp), (ucode), (type))
+#define psp_init_microcode(psp) \
+ ((psp)->init_microcode ? (psp)->init_microcode((psp)) : 0)
+#define psp_bootloader_load_sysdrv(psp) \
+ ((psp)->bootloader_load_sysdrv ? (psp)->bootloader_load_sysdrv((psp)) : 0)
+#define psp_bootloader_load_sos(psp) \
+ ((psp)->bootloader_load_sos ? (psp)->bootloader_load_sos((psp)) : 0)
+#define psp_smu_reload_quirk(psp) \
+ ((psp)->smu_reload_quirk ? (psp)->smu_reload_quirk((psp)) : false)
+
+extern const struct amd_ip_funcs psp_ip_funcs;
+
+extern const struct amdgpu_ip_block_version psp_v3_1_ip_block;
+extern int psp_wait_for(struct psp_context *psp, uint32_t reg_index,
+ uint32_t field_val, uint32_t mask, bool check_changed);
+
+#endif
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.c
index 7c842b7f1004..6a85db0c0bc3 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.c
@@ -182,16 +182,32 @@ int amdgpu_ring_init(struct amdgpu_device *adev, struct amdgpu_ring *ring,
return r;
}
- r = amdgpu_wb_get(adev, &ring->rptr_offs);
- if (r) {
- dev_err(adev->dev, "(%d) ring rptr_offs wb alloc failed\n", r);
- return r;
- }
+ if (ring->funcs->support_64bit_ptrs) {
+ r = amdgpu_wb_get_64bit(adev, &ring->rptr_offs);
+ if (r) {
+ dev_err(adev->dev, "(%d) ring rptr_offs wb alloc failed\n", r);
+ return r;
+ }
+
+ r = amdgpu_wb_get_64bit(adev, &ring->wptr_offs);
+ if (r) {
+ dev_err(adev->dev, "(%d) ring wptr_offs wb alloc failed\n", r);
+ return r;
+ }
+
+ } else {
+ r = amdgpu_wb_get(adev, &ring->rptr_offs);
+ if (r) {
+ dev_err(adev->dev, "(%d) ring rptr_offs wb alloc failed\n", r);
+ return r;
+ }
+
+ r = amdgpu_wb_get(adev, &ring->wptr_offs);
+ if (r) {
+ dev_err(adev->dev, "(%d) ring wptr_offs wb alloc failed\n", r);
+ return r;
+ }
- r = amdgpu_wb_get(adev, &ring->wptr_offs);
- if (r) {
- dev_err(adev->dev, "(%d) ring wptr_offs wb alloc failed\n", r);
- return r;
}
r = amdgpu_wb_get(adev, &ring->fence_offs);
@@ -219,6 +235,9 @@ int amdgpu_ring_init(struct amdgpu_device *adev, struct amdgpu_ring *ring,
ring->ring_size = roundup_pow_of_two(max_dw * 4 *
amdgpu_sched_hw_submission);
+ ring->buf_mask = (ring->ring_size / 4) - 1;
+ ring->ptr_mask = ring->funcs->support_64bit_ptrs ?
+ 0xffffffffffffffff : ring->buf_mask;
/* Allocate ring buffer */
if (ring->ring_obj == NULL) {
r = amdgpu_bo_create_kernel(adev, ring->ring_size, PAGE_SIZE,
@@ -230,9 +249,9 @@ int amdgpu_ring_init(struct amdgpu_device *adev, struct amdgpu_ring *ring,
dev_err(adev->dev, "(%d) ring create failed\n", r);
return r;
}
- memset((void *)ring->ring, 0, ring->ring_size);
+ amdgpu_ring_clear_ring(ring);
}
- ring->ptr_mask = (ring->ring_size / 4) - 1;
+
ring->max_dw = max_dw;
if (amdgpu_debugfs_ring_init(adev, ring)) {
@@ -253,10 +272,18 @@ void amdgpu_ring_fini(struct amdgpu_ring *ring)
{
ring->ready = false;
- amdgpu_wb_free(ring->adev, ring->cond_exe_offs);
- amdgpu_wb_free(ring->adev, ring->fence_offs);
- amdgpu_wb_free(ring->adev, ring->rptr_offs);
- amdgpu_wb_free(ring->adev, ring->wptr_offs);
+ if (ring->funcs->support_64bit_ptrs) {
+ amdgpu_wb_free_64bit(ring->adev, ring->cond_exe_offs);
+ amdgpu_wb_free_64bit(ring->adev, ring->fence_offs);
+ amdgpu_wb_free_64bit(ring->adev, ring->rptr_offs);
+ amdgpu_wb_free_64bit(ring->adev, ring->wptr_offs);
+ } else {
+ amdgpu_wb_free(ring->adev, ring->cond_exe_offs);
+ amdgpu_wb_free(ring->adev, ring->fence_offs);
+ amdgpu_wb_free(ring->adev, ring->rptr_offs);
+ amdgpu_wb_free(ring->adev, ring->wptr_offs);
+ }
+
amdgpu_bo_free_kernel(&ring->ring_obj,
&ring->gpu_addr,
@@ -293,8 +320,8 @@ static ssize_t amdgpu_debugfs_ring_read(struct file *f, char __user *buf,
if (*pos < 12) {
early[0] = amdgpu_ring_get_rptr(ring);
- early[1] = amdgpu_ring_get_wptr(ring);
- early[2] = ring->wptr;
+ early[1] = amdgpu_ring_get_wptr(ring) & ring->buf_mask;
+ early[2] = ring->wptr & ring->buf_mask;
for (i = *pos / 4; i < 3 && size; i++) {
r = put_user(early[i], (uint32_t *)buf);
if (r)
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h
index 2345b39878c6..63e56398ca9a 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h
@@ -27,10 +27,11 @@
#include "gpu_scheduler.h"
/* max number of rings */
-#define AMDGPU_MAX_RINGS 16
+#define AMDGPU_MAX_RINGS 18
#define AMDGPU_MAX_GFX_RINGS 1
#define AMDGPU_MAX_COMPUTE_RINGS 8
#define AMDGPU_MAX_VCE_RINGS 3
+#define AMDGPU_MAX_UVD_ENC_RINGS 2
/* some special values for the owner field */
#define AMDGPU_FENCE_OWNER_UNDEFINED ((void*)0ul)
@@ -45,7 +46,8 @@ enum amdgpu_ring_type {
AMDGPU_RING_TYPE_SDMA,
AMDGPU_RING_TYPE_UVD,
AMDGPU_RING_TYPE_VCE,
- AMDGPU_RING_TYPE_KIQ
+ AMDGPU_RING_TYPE_KIQ,
+ AMDGPU_RING_TYPE_UVD_ENC
};
struct amdgpu_device;
@@ -96,10 +98,11 @@ struct amdgpu_ring_funcs {
enum amdgpu_ring_type type;
uint32_t align_mask;
u32 nop;
+ bool support_64bit_ptrs;
/* ring read/write ptr handling */
- u32 (*get_rptr)(struct amdgpu_ring *ring);
- u32 (*get_wptr)(struct amdgpu_ring *ring);
+ u64 (*get_rptr)(struct amdgpu_ring *ring);
+ u64 (*get_wptr)(struct amdgpu_ring *ring);
void (*set_wptr)(struct amdgpu_ring *ring);
/* validating and patching of IBs */
int (*parse_cs)(struct amdgpu_cs_parser *p, uint32_t ib_idx);
@@ -126,6 +129,7 @@ struct amdgpu_ring_funcs {
int (*test_ib)(struct amdgpu_ring *ring, long timeout);
/* insert NOP packets */
void (*insert_nop)(struct amdgpu_ring *ring, uint32_t count);
+ void (*insert_end)(struct amdgpu_ring *ring);
/* pad the indirect buffer to the necessary number of dw */
void (*pad_ib)(struct amdgpu_ring *ring, struct amdgpu_ib *ib);
unsigned (*init_cond_exec)(struct amdgpu_ring *ring);
@@ -148,19 +152,23 @@ struct amdgpu_ring {
struct amdgpu_bo *ring_obj;
volatile uint32_t *ring;
unsigned rptr_offs;
- unsigned wptr;
- unsigned wptr_old;
+ u64 wptr;
+ u64 wptr_old;
unsigned ring_size;
unsigned max_dw;
int count_dw;
uint64_t gpu_addr;
- uint32_t ptr_mask;
+ uint64_t ptr_mask;
+ uint32_t buf_mask;
bool ready;
u32 idx;
u32 me;
u32 pipe;
u32 queue;
struct amdgpu_bo *mqd_obj;
+ uint64_t mqd_gpu_addr;
+ void *mqd_ptr;
+ uint64_t eop_gpu_addr;
u32 doorbell_index;
bool use_doorbell;
unsigned wptr_offs;
@@ -184,5 +192,12 @@ int amdgpu_ring_init(struct amdgpu_device *adev, struct amdgpu_ring *ring,
unsigned ring_size, struct amdgpu_irq_src *irq_src,
unsigned irq_type);
void amdgpu_ring_fini(struct amdgpu_ring *ring);
+static inline void amdgpu_ring_clear_ring(struct amdgpu_ring *ring)
+{
+ int i = 0;
+ while (i <= ring->buf_mask)
+ ring->ring[i++] = ring->funcs->nop;
+
+}
#endif
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_test.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_test.c
index e05a24325eeb..15510dadde01 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_test.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_test.c
@@ -228,7 +228,7 @@ out_unref:
out_cleanup:
kfree(gtt_obj);
if (r) {
- printk(KERN_WARNING "Error while testing BO move.\n");
+ pr_warn("Error while testing BO move\n");
}
}
@@ -237,82 +237,3 @@ void amdgpu_test_moves(struct amdgpu_device *adev)
if (adev->mman.buffer_funcs)
amdgpu_do_test_moves(adev);
}
-
-void amdgpu_test_ring_sync(struct amdgpu_device *adev,
- struct amdgpu_ring *ringA,
- struct amdgpu_ring *ringB)
-{
-}
-
-static void amdgpu_test_ring_sync2(struct amdgpu_device *adev,
- struct amdgpu_ring *ringA,
- struct amdgpu_ring *ringB,
- struct amdgpu_ring *ringC)
-{
-}
-
-static bool amdgpu_test_sync_possible(struct amdgpu_ring *ringA,
- struct amdgpu_ring *ringB)
-{
- if (ringA == &ringA->adev->vce.ring[0] &&
- ringB == &ringB->adev->vce.ring[1])
- return false;
-
- return true;
-}
-
-void amdgpu_test_syncing(struct amdgpu_device *adev)
-{
- int i, j, k;
-
- for (i = 1; i < AMDGPU_MAX_RINGS; ++i) {
- struct amdgpu_ring *ringA = adev->rings[i];
- if (!ringA || !ringA->ready)
- continue;
-
- for (j = 0; j < i; ++j) {
- struct amdgpu_ring *ringB = adev->rings[j];
- if (!ringB || !ringB->ready)
- continue;
-
- if (!amdgpu_test_sync_possible(ringA, ringB))
- continue;
-
- DRM_INFO("Testing syncing between rings %d and %d...\n", i, j);
- amdgpu_test_ring_sync(adev, ringA, ringB);
-
- DRM_INFO("Testing syncing between rings %d and %d...\n", j, i);
- amdgpu_test_ring_sync(adev, ringB, ringA);
-
- for (k = 0; k < j; ++k) {
- struct amdgpu_ring *ringC = adev->rings[k];
- if (!ringC || !ringC->ready)
- continue;
-
- if (!amdgpu_test_sync_possible(ringA, ringC))
- continue;
-
- if (!amdgpu_test_sync_possible(ringB, ringC))
- continue;
-
- DRM_INFO("Testing syncing between rings %d, %d and %d...\n", i, j, k);
- amdgpu_test_ring_sync2(adev, ringA, ringB, ringC);
-
- DRM_INFO("Testing syncing between rings %d, %d and %d...\n", i, k, j);
- amdgpu_test_ring_sync2(adev, ringA, ringC, ringB);
-
- DRM_INFO("Testing syncing between rings %d, %d and %d...\n", j, i, k);
- amdgpu_test_ring_sync2(adev, ringB, ringA, ringC);
-
- DRM_INFO("Testing syncing between rings %d, %d and %d...\n", j, k, i);
- amdgpu_test_ring_sync2(adev, ringB, ringC, ringA);
-
- DRM_INFO("Testing syncing between rings %d, %d and %d...\n", k, i, j);
- amdgpu_test_ring_sync2(adev, ringC, ringA, ringB);
-
- DRM_INFO("Testing syncing between rings %d, %d and %d...\n", k, j, i);
- amdgpu_test_ring_sync2(adev, ringC, ringB, ringA);
- }
- }
- }
-}
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_trace.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_trace.h
index a18ae1e97860..a87de18160a8 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_trace.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_trace.h
@@ -11,6 +11,9 @@
#define TRACE_SYSTEM amdgpu
#define TRACE_INCLUDE_FILE amdgpu_trace
+#define AMDGPU_JOB_GET_TIMELINE_NAME(job) \
+ job->base.s_fence->finished.ops->get_timeline_name(&job->base.s_fence->finished)
+
TRACE_EVENT(amdgpu_mm_rreg,
TP_PROTO(unsigned did, uint32_t reg, uint32_t value),
TP_ARGS(did, reg, value),
@@ -49,6 +52,43 @@ TRACE_EVENT(amdgpu_mm_wreg,
(unsigned long)__entry->value)
);
+TRACE_EVENT(amdgpu_iv,
+ TP_PROTO(struct amdgpu_iv_entry *iv),
+ TP_ARGS(iv),
+ TP_STRUCT__entry(
+ __field(unsigned, client_id)
+ __field(unsigned, src_id)
+ __field(unsigned, ring_id)
+ __field(unsigned, vm_id)
+ __field(unsigned, vm_id_src)
+ __field(uint64_t, timestamp)
+ __field(unsigned, timestamp_src)
+ __field(unsigned, pas_id)
+ __array(unsigned, src_data, 4)
+ ),
+ TP_fast_assign(
+ __entry->client_id = iv->client_id;
+ __entry->src_id = iv->src_id;
+ __entry->ring_id = iv->ring_id;
+ __entry->vm_id = iv->vm_id;
+ __entry->vm_id_src = iv->vm_id_src;
+ __entry->timestamp = iv->timestamp;
+ __entry->timestamp_src = iv->timestamp_src;
+ __entry->pas_id = iv->pas_id;
+ __entry->src_data[0] = iv->src_data[0];
+ __entry->src_data[1] = iv->src_data[1];
+ __entry->src_data[2] = iv->src_data[2];
+ __entry->src_data[3] = iv->src_data[3];
+ ),
+ TP_printk("client_id:%u src_id:%u ring:%u vm_id:%u timestamp: %llu pas_id:%u src_data: %08x %08x %08x %08x\n",
+ __entry->client_id, __entry->src_id,
+ __entry->ring_id, __entry->vm_id,
+ __entry->timestamp, __entry->pas_id,
+ __entry->src_data[0], __entry->src_data[1],
+ __entry->src_data[2], __entry->src_data[3])
+);
+
+
TRACE_EVENT(amdgpu_bo_create,
TP_PROTO(struct amdgpu_bo *bo),
TP_ARGS(bo),
@@ -70,7 +110,7 @@ TRACE_EVENT(amdgpu_bo_create,
__entry->visible = bo->flags;
),
- TP_printk("bo=%p,pages=%u,type=%d,prefered=%d,allowed=%d,visible=%d",
+ TP_printk("bo=%p, pages=%u, type=%d, prefered=%d, allowed=%d, visible=%d",
__entry->bo, __entry->pages, __entry->type,
__entry->prefer, __entry->allow, __entry->visible)
);
@@ -101,50 +141,51 @@ TRACE_EVENT(amdgpu_cs_ioctl,
TP_PROTO(struct amdgpu_job *job),
TP_ARGS(job),
TP_STRUCT__entry(
- __field(struct amdgpu_device *, adev)
- __field(struct amd_sched_job *, sched_job)
- __field(struct amdgpu_ib *, ib)
+ __field(uint64_t, sched_job_id)
+ __string(timeline, AMDGPU_JOB_GET_TIMELINE_NAME(job))
+ __field(unsigned int, context)
+ __field(unsigned int, seqno)
__field(struct dma_fence *, fence)
__field(char *, ring_name)
__field(u32, num_ibs)
),
TP_fast_assign(
- __entry->adev = job->adev;
- __entry->sched_job = &job->base;
- __entry->ib = job->ibs;
- __entry->fence = &job->base.s_fence->finished;
+ __entry->sched_job_id = job->base.id;
+ __assign_str(timeline, AMDGPU_JOB_GET_TIMELINE_NAME(job))
+ __entry->context = job->base.s_fence->finished.context;
+ __entry->seqno = job->base.s_fence->finished.seqno;
__entry->ring_name = job->ring->name;
__entry->num_ibs = job->num_ibs;
),
- TP_printk("adev=%p, sched_job=%p, first ib=%p, sched fence=%p, ring name:%s, num_ibs:%u",
- __entry->adev, __entry->sched_job, __entry->ib,
- __entry->fence, __entry->ring_name, __entry->num_ibs)
+ TP_printk("sched_job=%llu, timeline=%s, context=%u, seqno=%u, ring_name=%s, num_ibs=%u",
+ __entry->sched_job_id, __get_str(timeline), __entry->context,
+ __entry->seqno, __entry->ring_name, __entry->num_ibs)
);
TRACE_EVENT(amdgpu_sched_run_job,
TP_PROTO(struct amdgpu_job *job),
TP_ARGS(job),
TP_STRUCT__entry(
- __field(struct amdgpu_device *, adev)
- __field(struct amd_sched_job *, sched_job)
- __field(struct amdgpu_ib *, ib)
- __field(struct dma_fence *, fence)
+ __field(uint64_t, sched_job_id)
+ __string(timeline, AMDGPU_JOB_GET_TIMELINE_NAME(job))
+ __field(unsigned int, context)
+ __field(unsigned int, seqno)
__field(char *, ring_name)
__field(u32, num_ibs)
),
TP_fast_assign(
- __entry->adev = job->adev;
- __entry->sched_job = &job->base;
- __entry->ib = job->ibs;
- __entry->fence = &job->base.s_fence->finished;
+ __entry->sched_job_id = job->base.id;
+ __assign_str(timeline, AMDGPU_JOB_GET_TIMELINE_NAME(job))
+ __entry->context = job->base.s_fence->finished.context;
+ __entry->seqno = job->base.s_fence->finished.seqno;
__entry->ring_name = job->ring->name;
__entry->num_ibs = job->num_ibs;
),
- TP_printk("adev=%p, sched_job=%p, first ib=%p, sched fence=%p, ring name:%s, num_ibs:%u",
- __entry->adev, __entry->sched_job, __entry->ib,
- __entry->fence, __entry->ring_name, __entry->num_ibs)
+ TP_printk("sched_job=%llu, timeline=%s, context=%u, seqno=%u, ring_name=%s, num_ibs=%u",
+ __entry->sched_job_id, __get_str(timeline), __entry->context,
+ __entry->seqno, __entry->ring_name, __entry->num_ibs)
);
@@ -184,7 +225,7 @@ TRACE_EVENT(amdgpu_vm_bo_map,
),
TP_fast_assign(
- __entry->bo = bo_va->bo;
+ __entry->bo = bo_va ? bo_va->bo : NULL;
__entry->start = mapping->it.start;
__entry->last = mapping->it.last;
__entry->offset = mapping->offset;
@@ -321,7 +362,7 @@ TRACE_EVENT(amdgpu_bo_list_set,
__entry->bo = bo;
__entry->bo_size = amdgpu_bo_size(bo);
),
- TP_printk("list=%p, bo=%p, bo_size = %Ld",
+ TP_printk("list=%p, bo=%p, bo_size=%Ld",
__entry->list,
__entry->bo,
__entry->bo_size)
@@ -339,7 +380,7 @@ TRACE_EVENT(amdgpu_cs_bo_status,
__entry->total_bo = total_bo;
__entry->total_size = total_size;
),
- TP_printk("total bo size = %Ld, total bo count = %Ld",
+ TP_printk("total_bo_size=%Ld, total_bo_count=%Ld",
__entry->total_bo, __entry->total_size)
);
@@ -359,11 +400,12 @@ TRACE_EVENT(amdgpu_ttm_bo_move,
__entry->new_placement = new_placement;
__entry->old_placement = old_placement;
),
- TP_printk("bo=%p from:%d to %d with size = %Ld",
+ TP_printk("bo=%p, from=%d, to=%d, size=%Ld",
__entry->bo, __entry->old_placement,
__entry->new_placement, __entry->bo_size)
);
+#undef AMDGPU_JOB_GET_TIMELINE_NAME
#endif
/* This part must be outside protection */
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
index 4c6094eefc51..244bb9aacf86 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
@@ -746,7 +746,7 @@ int amdgpu_ttm_bind(struct ttm_buffer_object *bo, struct ttm_mem_reg *bo_mem)
{
struct ttm_tt *ttm = bo->ttm;
struct amdgpu_ttm_tt *gtt = (void *)bo->ttm;
- uint32_t flags;
+ uint64_t flags;
int r;
if (!ttm || amdgpu_ttm_is_bound(ttm))
@@ -1027,10 +1027,10 @@ bool amdgpu_ttm_tt_is_readonly(struct ttm_tt *ttm)
return !!(gtt->userflags & AMDGPU_GEM_USERPTR_READONLY);
}
-uint32_t amdgpu_ttm_tt_pte_flags(struct amdgpu_device *adev, struct ttm_tt *ttm,
+uint64_t amdgpu_ttm_tt_pte_flags(struct amdgpu_device *adev, struct ttm_tt *ttm,
struct ttm_mem_reg *mem)
{
- uint32_t flags = 0;
+ uint64_t flags = 0;
if (mem && mem->mem_type != TTM_PL_SYSTEM)
flags |= AMDGPU_PTE_VALID;
@@ -1042,9 +1042,7 @@ uint32_t amdgpu_ttm_tt_pte_flags(struct amdgpu_device *adev, struct ttm_tt *ttm,
flags |= AMDGPU_PTE_SNOOPED;
}
- if (adev->asic_type >= CHIP_TONGA)
- flags |= AMDGPU_PTE_EXECUTABLE;
-
+ flags |= adev->gart.gart_pte_flags;
flags |= AMDGPU_PTE_READABLE;
if (!amdgpu_ttm_tt_is_readonly(ttm))
@@ -1160,27 +1158,33 @@ int amdgpu_ttm_init(struct amdgpu_device *adev)
adev->gds.oa.gfx_partition_size = adev->gds.oa.gfx_partition_size << AMDGPU_OA_SHIFT;
adev->gds.oa.cs_partition_size = adev->gds.oa.cs_partition_size << AMDGPU_OA_SHIFT;
/* GDS Memory */
- r = ttm_bo_init_mm(&adev->mman.bdev, AMDGPU_PL_GDS,
- adev->gds.mem.total_size >> PAGE_SHIFT);
- if (r) {
- DRM_ERROR("Failed initializing GDS heap.\n");
- return r;
+ if (adev->gds.mem.total_size) {
+ r = ttm_bo_init_mm(&adev->mman.bdev, AMDGPU_PL_GDS,
+ adev->gds.mem.total_size >> PAGE_SHIFT);
+ if (r) {
+ DRM_ERROR("Failed initializing GDS heap.\n");
+ return r;
+ }
}
/* GWS */
- r = ttm_bo_init_mm(&adev->mman.bdev, AMDGPU_PL_GWS,
- adev->gds.gws.total_size >> PAGE_SHIFT);
- if (r) {
- DRM_ERROR("Failed initializing gws heap.\n");
- return r;
+ if (adev->gds.gws.total_size) {
+ r = ttm_bo_init_mm(&adev->mman.bdev, AMDGPU_PL_GWS,
+ adev->gds.gws.total_size >> PAGE_SHIFT);
+ if (r) {
+ DRM_ERROR("Failed initializing gws heap.\n");
+ return r;
+ }
}
/* OA */
- r = ttm_bo_init_mm(&adev->mman.bdev, AMDGPU_PL_OA,
- adev->gds.oa.total_size >> PAGE_SHIFT);
- if (r) {
- DRM_ERROR("Failed initializing oa heap.\n");
- return r;
+ if (adev->gds.oa.total_size) {
+ r = ttm_bo_init_mm(&adev->mman.bdev, AMDGPU_PL_OA,
+ adev->gds.oa.total_size >> PAGE_SHIFT);
+ if (r) {
+ DRM_ERROR("Failed initializing oa heap.\n");
+ return r;
+ }
}
r = amdgpu_ttm_debugfs_init(adev);
@@ -1208,9 +1212,12 @@ void amdgpu_ttm_fini(struct amdgpu_device *adev)
}
ttm_bo_clean_mm(&adev->mman.bdev, TTM_PL_VRAM);
ttm_bo_clean_mm(&adev->mman.bdev, TTM_PL_TT);
- ttm_bo_clean_mm(&adev->mman.bdev, AMDGPU_PL_GDS);
- ttm_bo_clean_mm(&adev->mman.bdev, AMDGPU_PL_GWS);
- ttm_bo_clean_mm(&adev->mman.bdev, AMDGPU_PL_OA);
+ if (adev->gds.mem.total_size)
+ ttm_bo_clean_mm(&adev->mman.bdev, AMDGPU_PL_GDS);
+ if (adev->gds.gws.total_size)
+ ttm_bo_clean_mm(&adev->mman.bdev, AMDGPU_PL_GWS);
+ if (adev->gds.oa.total_size)
+ ttm_bo_clean_mm(&adev->mman.bdev, AMDGPU_PL_OA);
ttm_bo_device_release(&adev->mman.bdev);
amdgpu_gart_fini(adev);
amdgpu_ttm_global_fini(adev);
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ucode.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ucode.c
index 0f0b38191fac..a1891c93cdbf 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ucode.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ucode.c
@@ -217,10 +217,55 @@ bool amdgpu_ucode_hdr_version(union amdgpu_firmware_header *hdr,
return true;
}
-static int amdgpu_ucode_init_single_fw(struct amdgpu_firmware_info *ucode,
- uint64_t mc_addr, void *kptr)
+enum amdgpu_firmware_load_type
+amdgpu_ucode_get_load_type(struct amdgpu_device *adev, int load_type)
+{
+ switch (adev->asic_type) {
+#ifdef CONFIG_DRM_AMDGPU_SI
+ case CHIP_TAHITI:
+ case CHIP_PITCAIRN:
+ case CHIP_VERDE:
+ case CHIP_OLAND:
+ return AMDGPU_FW_LOAD_DIRECT;
+#endif
+#ifdef CONFIG_DRM_AMDGPU_CIK
+ case CHIP_BONAIRE:
+ case CHIP_KAVERI:
+ case CHIP_KABINI:
+ case CHIP_HAWAII:
+ case CHIP_MULLINS:
+ return AMDGPU_FW_LOAD_DIRECT;
+#endif
+ case CHIP_TOPAZ:
+ case CHIP_TONGA:
+ case CHIP_FIJI:
+ case CHIP_CARRIZO:
+ case CHIP_STONEY:
+ case CHIP_POLARIS10:
+ case CHIP_POLARIS11:
+ case CHIP_POLARIS12:
+ if (!load_type)
+ return AMDGPU_FW_LOAD_DIRECT;
+ else
+ return AMDGPU_FW_LOAD_SMU;
+ case CHIP_VEGA10:
+ if (!load_type)
+ return AMDGPU_FW_LOAD_DIRECT;
+ else
+ return AMDGPU_FW_LOAD_PSP;
+ default:
+ DRM_ERROR("Unknow firmware load type\n");
+ }
+
+ return AMDGPU_FW_LOAD_DIRECT;
+}
+
+static int amdgpu_ucode_init_single_fw(struct amdgpu_device *adev,
+ struct amdgpu_firmware_info *ucode,
+ uint64_t mc_addr, void *kptr)
{
const struct common_firmware_header *header = NULL;
+ const struct gfx_firmware_header_v1_0 *cp_hdr = NULL;
if (NULL == ucode->fw)
return 0;
@@ -232,9 +277,36 @@ static int amdgpu_ucode_init_single_fw(struct amdgpu_firmware_info *ucode,
return 0;
header = (const struct common_firmware_header *)ucode->fw->data;
- memcpy(ucode->kaddr, (void *)((uint8_t *)ucode->fw->data +
- le32_to_cpu(header->ucode_array_offset_bytes)),
- le32_to_cpu(header->ucode_size_bytes));
+
+ cp_hdr = (const struct gfx_firmware_header_v1_0 *)ucode->fw->data;
+
+ if (adev->firmware.load_type != AMDGPU_FW_LOAD_PSP ||
+ (ucode->ucode_id != AMDGPU_UCODE_ID_CP_MEC1 &&
+ ucode->ucode_id != AMDGPU_UCODE_ID_CP_MEC2 &&
+ ucode->ucode_id != AMDGPU_UCODE_ID_CP_MEC1_JT &&
+ ucode->ucode_id != AMDGPU_UCODE_ID_CP_MEC2_JT)) {
+ ucode->ucode_size = le32_to_cpu(header->ucode_size_bytes);
+
+ memcpy(ucode->kaddr, (void *)((uint8_t *)ucode->fw->data +
+ le32_to_cpu(header->ucode_array_offset_bytes)),
+ ucode->ucode_size);
+ } else if (ucode->ucode_id == AMDGPU_UCODE_ID_CP_MEC1 ||
+ ucode->ucode_id == AMDGPU_UCODE_ID_CP_MEC2) {
+ ucode->ucode_size = le32_to_cpu(header->ucode_size_bytes) -
+ le32_to_cpu(cp_hdr->jt_size) * 4;
+
+ memcpy(ucode->kaddr, (void *)((uint8_t *)ucode->fw->data +
+ le32_to_cpu(header->ucode_array_offset_bytes)),
+ ucode->ucode_size);
+ } else if (ucode->ucode_id == AMDGPU_UCODE_ID_CP_MEC1_JT ||
+ ucode->ucode_id == AMDGPU_UCODE_ID_CP_MEC2_JT) {
+ ucode->ucode_size = le32_to_cpu(cp_hdr->jt_size) * 4;
+
+ memcpy(ucode->kaddr, (void *)((uint8_t *)ucode->fw->data +
+ le32_to_cpu(header->ucode_array_offset_bytes) +
+ le32_to_cpu(cp_hdr->jt_offset) * 4),
+ ucode->ucode_size);
+ }
return 0;
}
@@ -260,10 +332,11 @@ static int amdgpu_ucode_patch_jt(struct amdgpu_firmware_info *ucode,
(le32_to_cpu(header->jt_offset) * 4);
memcpy(dst_addr, src_addr, le32_to_cpu(header->jt_size) * 4);
+ ucode->ucode_size += le32_to_cpu(header->jt_size) * 4;
+
return 0;
}
-
int amdgpu_ucode_init_bo(struct amdgpu_device *adev)
{
struct amdgpu_bo **bo = &adev->firmware.fw_buf;
@@ -303,20 +376,32 @@ int amdgpu_ucode_init_bo(struct amdgpu_device *adev)
amdgpu_bo_unreserve(*bo);
- for (i = 0; i < AMDGPU_UCODE_ID_MAXIMUM; i++) {
+ memset(fw_buf_ptr, 0, adev->firmware.fw_size);
+
+ /*
+ * if SMU loaded firmware, it needn't add SMC, UVD, and VCE
+ * ucode info here
+ */
+ if (adev->firmware.load_type != AMDGPU_FW_LOAD_PSP)
+ adev->firmware.max_ucodes = AMDGPU_UCODE_ID_MAXIMUM - 4;
+ else
+ adev->firmware.max_ucodes = AMDGPU_UCODE_ID_MAXIMUM;
+
+ for (i = 0; i < adev->firmware.max_ucodes; i++) {
ucode = &adev->firmware.ucode[i];
if (ucode->fw) {
header = (const struct common_firmware_header *)ucode->fw->data;
- amdgpu_ucode_init_single_fw(ucode, fw_mc_addr + fw_offset,
- fw_buf_ptr + fw_offset);
- if (i == AMDGPU_UCODE_ID_CP_MEC1) {
+ amdgpu_ucode_init_single_fw(adev, ucode, fw_mc_addr + fw_offset,
+ (void *)((uint8_t *)fw_buf_ptr + fw_offset));
+ if (i == AMDGPU_UCODE_ID_CP_MEC1 &&
+ adev->firmware.load_type != AMDGPU_FW_LOAD_PSP) {
const struct gfx_firmware_header_v1_0 *cp_hdr;
cp_hdr = (const struct gfx_firmware_header_v1_0 *)ucode->fw->data;
amdgpu_ucode_patch_jt(ucode, fw_mc_addr + fw_offset,
fw_buf_ptr + fw_offset);
fw_offset += ALIGN(le32_to_cpu(cp_hdr->jt_size) << 2, PAGE_SIZE);
}
- fw_offset += ALIGN(le32_to_cpu(header->ucode_size_bytes), PAGE_SIZE);
+ fw_offset += ALIGN(ucode->ucode_size, PAGE_SIZE);
}
}
return 0;
@@ -328,7 +413,8 @@ failed_pin:
failed_reserve:
amdgpu_bo_unref(bo);
failed:
- adev->firmware.smu_load = false;
+ if (err)
+ adev->firmware.load_type = AMDGPU_FW_LOAD_DIRECT;
return err;
}
@@ -338,7 +424,7 @@ int amdgpu_ucode_fini_bo(struct amdgpu_device *adev)
int i;
struct amdgpu_firmware_info *ucode = NULL;
- for (i = 0; i < AMDGPU_UCODE_ID_MAXIMUM; i++) {
+ for (i = 0; i < adev->firmware.max_ucodes; i++) {
ucode = &adev->firmware.ucode[i];
if (ucode->fw) {
ucode->mc_addr = 0;
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ucode.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_ucode.h
index a8a4230729f9..758f03a1770d 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ucode.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ucode.h
@@ -50,6 +50,14 @@ struct smc_firmware_header_v1_0 {
};
/* version_major=1, version_minor=0 */
+struct psp_firmware_header_v1_0 {
+ struct common_firmware_header header;
+ uint32_t ucode_feature_version;
+ uint32_t sos_offset_bytes;
+ uint32_t sos_size_bytes;
+};
+
+/* version_major=1, version_minor=0 */
struct gfx_firmware_header_v1_0 {
struct common_firmware_header header;
uint32_t ucode_feature_version;
@@ -110,6 +118,7 @@ union amdgpu_firmware_header {
struct common_firmware_header common;
struct mc_firmware_header_v1_0 mc;
struct smc_firmware_header_v1_0 smc;
+ struct psp_firmware_header_v1_0 psp;
struct gfx_firmware_header_v1_0 gfx;
struct rlc_firmware_header_v1_0 rlc;
struct rlc_firmware_header_v2_0 rlc_v2_0;
@@ -128,9 +137,14 @@ enum AMDGPU_UCODE_ID {
AMDGPU_UCODE_ID_CP_PFP,
AMDGPU_UCODE_ID_CP_ME,
AMDGPU_UCODE_ID_CP_MEC1,
+ AMDGPU_UCODE_ID_CP_MEC1_JT,
AMDGPU_UCODE_ID_CP_MEC2,
+ AMDGPU_UCODE_ID_CP_MEC2_JT,
AMDGPU_UCODE_ID_RLC_G,
AMDGPU_UCODE_ID_STORAGE,
+ AMDGPU_UCODE_ID_SMC,
+ AMDGPU_UCODE_ID_UVD,
+ AMDGPU_UCODE_ID_VCE,
AMDGPU_UCODE_ID_MAXIMUM,
};
@@ -161,6 +175,8 @@ struct amdgpu_firmware_info {
uint64_t mc_addr;
/* kernel linear address */
void *kaddr;
+ /* ucode_size_bytes */
+ uint32_t ucode_size;
};
void amdgpu_ucode_print_mc_hdr(const struct common_firmware_header *hdr);
@@ -174,4 +190,7 @@ bool amdgpu_ucode_hdr_version(union amdgpu_firmware_header *hdr,
int amdgpu_ucode_init_bo(struct amdgpu_device *adev);
int amdgpu_ucode_fini_bo(struct amdgpu_device *adev);
+enum amdgpu_firmware_load_type
+amdgpu_ucode_get_load_type(struct amdgpu_device *adev, int load_type);
+
#endif
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_uvd.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_uvd.c
index 6d6ab7f11b4c..0b92dd0c1d70 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_uvd.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_uvd.c
@@ -67,6 +67,14 @@
#define FIRMWARE_POLARIS11 "amdgpu/polaris11_uvd.bin"
#define FIRMWARE_POLARIS12 "amdgpu/polaris12_uvd.bin"
+#define FIRMWARE_VEGA10 "amdgpu/vega10_uvd.bin"
+
+#define mmUVD_GPCOM_VCPU_DATA0_VEGA10 (0x03c4 + 0x7e00)
+#define mmUVD_GPCOM_VCPU_DATA1_VEGA10 (0x03c5 + 0x7e00)
+#define mmUVD_GPCOM_VCPU_CMD_VEGA10 (0x03c3 + 0x7e00)
+#define mmUVD_NO_OP_VEGA10 (0x03ff + 0x7e00)
+#define mmUVD_ENGINE_CNTL_VEGA10 (0x03c6 + 0x7e00)
+
/**
* amdgpu_uvd_cs_ctx - Command submission parser context
*
@@ -101,6 +109,8 @@ MODULE_FIRMWARE(FIRMWARE_POLARIS10);
MODULE_FIRMWARE(FIRMWARE_POLARIS11);
MODULE_FIRMWARE(FIRMWARE_POLARIS12);
+MODULE_FIRMWARE(FIRMWARE_VEGA10);
+
static void amdgpu_uvd_idle_work_handler(struct work_struct *work);
int amdgpu_uvd_sw_init(struct amdgpu_device *adev)
@@ -151,6 +161,9 @@ int amdgpu_uvd_sw_init(struct amdgpu_device *adev)
case CHIP_POLARIS11:
fw_name = FIRMWARE_POLARIS11;
break;
+ case CHIP_VEGA10:
+ fw_name = FIRMWARE_VEGA10;
+ break;
case CHIP_POLARIS12:
fw_name = FIRMWARE_POLARIS12;
break;
@@ -203,9 +216,11 @@ int amdgpu_uvd_sw_init(struct amdgpu_device *adev)
DRM_ERROR("POLARIS10/11 UVD firmware version %hu.%hu is too old.\n",
version_major, version_minor);
- bo_size = AMDGPU_GPU_PAGE_ALIGN(le32_to_cpu(hdr->ucode_size_bytes) + 8)
- + AMDGPU_UVD_STACK_SIZE + AMDGPU_UVD_HEAP_SIZE
+ bo_size = AMDGPU_UVD_STACK_SIZE + AMDGPU_UVD_HEAP_SIZE
+ AMDGPU_UVD_SESSION_SIZE * adev->uvd.max_handles;
+ if (adev->firmware.load_type != AMDGPU_FW_LOAD_PSP)
+ bo_size += AMDGPU_GPU_PAGE_ALIGN(le32_to_cpu(hdr->ucode_size_bytes) + 8);
+
r = amdgpu_bo_create_kernel(adev, bo_size, PAGE_SIZE,
AMDGPU_GEM_DOMAIN_VRAM, &adev->uvd.vcpu_bo,
&adev->uvd.gpu_addr, &adev->uvd.cpu_addr);
@@ -319,11 +334,13 @@ int amdgpu_uvd_resume(struct amdgpu_device *adev)
unsigned offset;
hdr = (const struct common_firmware_header *)adev->uvd.fw->data;
- offset = le32_to_cpu(hdr->ucode_array_offset_bytes);
- memcpy_toio(adev->uvd.cpu_addr, adev->uvd.fw->data + offset,
- le32_to_cpu(hdr->ucode_size_bytes));
- size -= le32_to_cpu(hdr->ucode_size_bytes);
- ptr += le32_to_cpu(hdr->ucode_size_bytes);
+ if (adev->firmware.load_type != AMDGPU_FW_LOAD_PSP) {
+ offset = le32_to_cpu(hdr->ucode_array_offset_bytes);
+ memcpy_toio(adev->uvd.cpu_addr, adev->uvd.fw->data + offset,
+ le32_to_cpu(hdr->ucode_size_bytes));
+ size -= le32_to_cpu(hdr->ucode_size_bytes);
+ ptr += le32_to_cpu(hdr->ucode_size_bytes);
+ }
memset_io(ptr, 0, size);
}
@@ -936,6 +953,7 @@ static int amdgpu_uvd_send_msg(struct amdgpu_ring *ring, struct amdgpu_bo *bo,
struct dma_fence *f = NULL;
struct amdgpu_device *adev = ring->adev;
uint64_t addr;
+ uint32_t data[4];
int i, r;
memset(&tv, 0, sizeof(tv));
@@ -961,16 +979,28 @@ static int amdgpu_uvd_send_msg(struct amdgpu_ring *ring, struct amdgpu_bo *bo,
if (r)
goto err;
+ if (adev->asic_type >= CHIP_VEGA10) {
+ data[0] = PACKET0(mmUVD_GPCOM_VCPU_DATA0_VEGA10, 0);
+ data[1] = PACKET0(mmUVD_GPCOM_VCPU_DATA1_VEGA10, 0);
+ data[2] = PACKET0(mmUVD_GPCOM_VCPU_CMD_VEGA10, 0);
+ data[3] = PACKET0(mmUVD_NO_OP_VEGA10, 0);
+ } else {
+ data[0] = PACKET0(mmUVD_GPCOM_VCPU_DATA0, 0);
+ data[1] = PACKET0(mmUVD_GPCOM_VCPU_DATA1, 0);
+ data[2] = PACKET0(mmUVD_GPCOM_VCPU_CMD, 0);
+ data[3] = PACKET0(mmUVD_NO_OP, 0);
+ }
+
ib = &job->ibs[0];
addr = amdgpu_bo_gpu_offset(bo);
- ib->ptr[0] = PACKET0(mmUVD_GPCOM_VCPU_DATA0, 0);
+ ib->ptr[0] = data[0];
ib->ptr[1] = addr;
- ib->ptr[2] = PACKET0(mmUVD_GPCOM_VCPU_DATA1, 0);
+ ib->ptr[2] = data[1];
ib->ptr[3] = addr >> 32;
- ib->ptr[4] = PACKET0(mmUVD_GPCOM_VCPU_CMD, 0);
+ ib->ptr[4] = data[2];
ib->ptr[5] = 0;
for (i = 6; i < 16; i += 2) {
- ib->ptr[i] = PACKET0(mmUVD_NO_OP, 0);
+ ib->ptr[i] = data[3];
ib->ptr[i+1] = 0;
}
ib->length_dw = 16;
@@ -1108,6 +1138,9 @@ static void amdgpu_uvd_idle_work_handler(struct work_struct *work)
container_of(work, struct amdgpu_device, uvd.idle_work.work);
unsigned fences = amdgpu_fence_count_emitted(&adev->uvd.ring);
+ if (amdgpu_sriov_vf(adev))
+ return;
+
if (fences == 0) {
if (adev->pm.dpm_enabled) {
amdgpu_dpm_enable_uvd(adev, false);
@@ -1129,6 +1162,9 @@ void amdgpu_uvd_ring_begin_use(struct amdgpu_ring *ring)
struct amdgpu_device *adev = ring->adev;
bool set_clocks = !cancel_delayed_work_sync(&adev->uvd.idle_work);
+ if (amdgpu_sriov_vf(adev))
+ return;
+
if (set_clocks) {
if (adev->pm.dpm_enabled) {
amdgpu_dpm_enable_uvd(adev, true);
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_uvd.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_uvd.h
index c10682baccae..3553b92bf69a 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_uvd.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_uvd.h
@@ -24,6 +24,35 @@
#ifndef __AMDGPU_UVD_H__
#define __AMDGPU_UVD_H__
+#define AMDGPU_DEFAULT_UVD_HANDLES 10
+#define AMDGPU_MAX_UVD_HANDLES 40
+#define AMDGPU_UVD_STACK_SIZE (200*1024)
+#define AMDGPU_UVD_HEAP_SIZE (256*1024)
+#define AMDGPU_UVD_SESSION_SIZE (50*1024)
+#define AMDGPU_UVD_FIRMWARE_OFFSET 256
+
+struct amdgpu_uvd {
+ struct amdgpu_bo *vcpu_bo;
+ void *cpu_addr;
+ uint64_t gpu_addr;
+ unsigned fw_version;
+ void *saved_bo;
+ unsigned max_handles;
+ atomic_t handles[AMDGPU_MAX_UVD_HANDLES];
+ struct drm_file *filp[AMDGPU_MAX_UVD_HANDLES];
+ struct delayed_work idle_work;
+ const struct firmware *fw; /* UVD firmware */
+ struct amdgpu_ring ring;
+ struct amdgpu_ring ring_enc[AMDGPU_MAX_UVD_ENC_RINGS];
+ struct amdgpu_irq_src irq;
+ bool address_64_bit;
+ bool use_ctx_buf;
+ struct amd_sched_entity entity;
+ struct amd_sched_entity entity_enc;
+ uint32_t srbm_soft_reset;
+ unsigned num_enc_rings;
+};
+
int amdgpu_uvd_sw_init(struct amdgpu_device *adev);
int amdgpu_uvd_sw_fini(struct amdgpu_device *adev);
int amdgpu_uvd_suspend(struct amdgpu_device *adev);
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vce.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vce.c
index e2c06780ce49..0184197eb000 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vce.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vce.c
@@ -54,6 +54,8 @@
#define FIRMWARE_POLARIS11 "amdgpu/polaris11_vce.bin"
#define FIRMWARE_POLARIS12 "amdgpu/polaris12_vce.bin"
+#define FIRMWARE_VEGA10 "amdgpu/vega10_vce.bin"
+
#ifdef CONFIG_DRM_AMDGPU_CIK
MODULE_FIRMWARE(FIRMWARE_BONAIRE);
MODULE_FIRMWARE(FIRMWARE_KABINI);
@@ -69,6 +71,8 @@ MODULE_FIRMWARE(FIRMWARE_POLARIS10);
MODULE_FIRMWARE(FIRMWARE_POLARIS11);
MODULE_FIRMWARE(FIRMWARE_POLARIS12);
+MODULE_FIRMWARE(FIRMWARE_VEGA10);
+
static void amdgpu_vce_idle_work_handler(struct work_struct *work);
/**
@@ -123,6 +127,9 @@ int amdgpu_vce_sw_init(struct amdgpu_device *adev, unsigned long size)
case CHIP_POLARIS11:
fw_name = FIRMWARE_POLARIS11;
break;
+ case CHIP_VEGA10:
+ fw_name = FIRMWARE_VEGA10;
+ break;
case CHIP_POLARIS12:
fw_name = FIRMWARE_POLARIS12;
break;
@@ -313,6 +320,9 @@ static void amdgpu_vce_idle_work_handler(struct work_struct *work)
container_of(work, struct amdgpu_device, vce.idle_work.work);
unsigned i, count = 0;
+ if (amdgpu_sriov_vf(adev))
+ return;
+
for (i = 0; i < adev->vce.num_rings; i++)
count += amdgpu_fence_count_emitted(&adev->vce.ring[i]);
@@ -343,6 +353,9 @@ void amdgpu_vce_ring_begin_use(struct amdgpu_ring *ring)
struct amdgpu_device *adev = ring->adev;
bool set_clocks;
+ if (amdgpu_sriov_vf(adev))
+ return;
+
mutex_lock(&adev->vce.idle_mutex);
set_clocks = !cancel_delayed_work_sync(&adev->vce.idle_work);
if (set_clocks) {
@@ -944,6 +957,10 @@ int amdgpu_vce_ring_test_ring(struct amdgpu_ring *ring)
unsigned i;
int r;
+ /* TODO: remove it if VCE can work for sriov */
+ if (amdgpu_sriov_vf(adev))
+ return 0;
+
r = amdgpu_ring_alloc(ring, 16);
if (r) {
DRM_ERROR("amdgpu: vce failed to lock ring %d (%d).\n",
@@ -982,6 +999,10 @@ int amdgpu_vce_ring_test_ib(struct amdgpu_ring *ring, long timeout)
struct dma_fence *fence = NULL;
long r;
+ /* TODO: remove it if VCE can work for sriov */
+ if (amdgpu_sriov_vf(ring->adev))
+ return 0;
+
/* skip vce ring1/2 ib test for now, since it's not reliable */
if (ring != &ring->adev->vce.ring[0])
return 0;
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vce.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_vce.h
index d98041f7508d..0a7f18c461e4 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vce.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vce.h
@@ -24,6 +24,31 @@
#ifndef __AMDGPU_VCE_H__
#define __AMDGPU_VCE_H__
+#define AMDGPU_MAX_VCE_HANDLES 16
+#define AMDGPU_VCE_FIRMWARE_OFFSET 256
+
+#define AMDGPU_VCE_HARVEST_VCE0 (1 << 0)
+#define AMDGPU_VCE_HARVEST_VCE1 (1 << 1)
+
+struct amdgpu_vce {
+ struct amdgpu_bo *vcpu_bo;
+ uint64_t gpu_addr;
+ unsigned fw_version;
+ unsigned fb_version;
+ atomic_t handles[AMDGPU_MAX_VCE_HANDLES];
+ struct drm_file *filp[AMDGPU_MAX_VCE_HANDLES];
+ uint32_t img_size[AMDGPU_MAX_VCE_HANDLES];
+ struct delayed_work idle_work;
+ struct mutex idle_mutex;
+ const struct firmware *fw; /* VCE firmware */
+ struct amdgpu_ring ring[AMDGPU_MAX_VCE_RINGS];
+ struct amdgpu_irq_src irq;
+ unsigned harvest_config;
+ struct amd_sched_entity entity;
+ uint32_t srbm_soft_reset;
+ unsigned num_rings;
+};
+
int amdgpu_vce_sw_init(struct amdgpu_device *adev, unsigned long size);
int amdgpu_vce_sw_fini(struct amdgpu_device *adev);
int amdgpu_vce_suspend(struct amdgpu_device *adev);
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c
index dcfb7df3caf4..ecef35a1fe33 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c
@@ -75,6 +75,15 @@ int amdgpu_map_static_csa(struct amdgpu_device *adev, struct amdgpu_vm *vm)
return -ENOMEM;
}
+ r = amdgpu_vm_alloc_pts(adev, bo_va->vm, AMDGPU_CSA_VADDR,
+ AMDGPU_CSA_SIZE);
+ if (r) {
+ DRM_ERROR("failed to allocate pts for static CSA, err=%d\n", r);
+ amdgpu_vm_bo_rmv(adev, bo_va);
+ ttm_eu_backoff_reservation(&ticket, &list);
+ return r;
+ }
+
r = amdgpu_vm_bo_map(adev, bo_va, AMDGPU_CSA_VADDR, 0,AMDGPU_CSA_SIZE,
AMDGPU_PTE_READABLE | AMDGPU_PTE_WRITEABLE |
AMDGPU_PTE_EXECUTABLE);
@@ -97,7 +106,8 @@ void amdgpu_virt_init_setting(struct amdgpu_device *adev)
adev->mode_info.num_crtc = 1;
adev->enable_virtual_display = true;
- mutex_init(&adev->virt.lock);
+ mutex_init(&adev->virt.lock_kiq);
+ mutex_init(&adev->virt.lock_reset);
}
uint32_t amdgpu_virt_kiq_rreg(struct amdgpu_device *adev, uint32_t reg)
@@ -110,14 +120,14 @@ uint32_t amdgpu_virt_kiq_rreg(struct amdgpu_device *adev, uint32_t reg)
BUG_ON(!ring->funcs->emit_rreg);
- mutex_lock(&adev->virt.lock);
+ mutex_lock(&adev->virt.lock_kiq);
amdgpu_ring_alloc(ring, 32);
amdgpu_ring_emit_hdp_flush(ring);
amdgpu_ring_emit_rreg(ring, reg);
amdgpu_ring_emit_hdp_invalidate(ring);
amdgpu_fence_emit(ring, &f);
amdgpu_ring_commit(ring);
- mutex_unlock(&adev->virt.lock);
+ mutex_unlock(&adev->virt.lock_kiq);
r = dma_fence_wait(f, false);
if (r)
@@ -138,14 +148,14 @@ void amdgpu_virt_kiq_wreg(struct amdgpu_device *adev, uint32_t reg, uint32_t v)
BUG_ON(!ring->funcs->emit_wreg);
- mutex_lock(&adev->virt.lock);
+ mutex_lock(&adev->virt.lock_kiq);
amdgpu_ring_alloc(ring, 32);
amdgpu_ring_emit_hdp_flush(ring);
amdgpu_ring_emit_wreg(ring, reg, v);
amdgpu_ring_emit_hdp_invalidate(ring);
amdgpu_fence_emit(ring, &f);
amdgpu_ring_commit(ring);
- mutex_unlock(&adev->virt.lock);
+ mutex_unlock(&adev->virt.lock_kiq);
r = dma_fence_wait(f, false);
if (r)
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.h
index 675e12c42532..1ee0a190b33b 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.h
@@ -30,6 +30,12 @@
#define AMDGPU_PASSTHROUGH_MODE (1 << 3) /* thw whole GPU is pass through for VM */
#define AMDGPU_SRIOV_CAPS_RUNTIME (1 << 4) /* is out of full access mode */
+struct amdgpu_mm_table {
+ struct amdgpu_bo *bo;
+ uint32_t *cpu_addr;
+ uint64_t gpu_addr;
+};
+
/**
* struct amdgpu_virt_ops - amdgpu device virt operations
*/
@@ -46,10 +52,12 @@ struct amdgpu_virt {
uint64_t csa_vmid0_addr;
bool chained_ib_support;
uint32_t reg_val_offs;
- struct mutex lock;
+ struct mutex lock_kiq;
+ struct mutex lock_reset;
struct amdgpu_irq_src ack_irq;
struct amdgpu_irq_src rcv_irq;
- struct delayed_work flr_work;
+ struct work_struct flr_work;
+ struct amdgpu_mm_table mm_table;
const struct amdgpu_virt_ops *ops;
};
@@ -89,5 +97,6 @@ void amdgpu_virt_kiq_wreg(struct amdgpu_device *adev, uint32_t reg, uint32_t v);
int amdgpu_virt_request_full_gpu(struct amdgpu_device *adev, bool init);
int amdgpu_virt_release_full_gpu(struct amdgpu_device *adev, bool init);
int amdgpu_virt_reset_gpu(struct amdgpu_device *adev);
+int amdgpu_sriov_gpu_reset(struct amdgpu_device *adev, bool voluntary);
#endif
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
index bd0d33125c18..0235d7933efd 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
@@ -57,6 +57,8 @@
struct amdgpu_pte_update_params {
/* amdgpu device we do this update for */
struct amdgpu_device *adev;
+ /* optional amdgpu_vm we do this update for */
+ struct amdgpu_vm *vm;
/* address where to copy page table entries from */
uint64_t src;
/* indirect buffer to fill with commands */
@@ -64,33 +66,49 @@ struct amdgpu_pte_update_params {
/* Function which actually does the update */
void (*func)(struct amdgpu_pte_update_params *params, uint64_t pe,
uint64_t addr, unsigned count, uint32_t incr,
- uint32_t flags);
+ uint64_t flags);
/* indicate update pt or its shadow */
bool shadow;
};
+/* Helper to disable partial resident texture feature from a fence callback */
+struct amdgpu_prt_cb {
+ struct amdgpu_device *adev;
+ struct dma_fence_cb cb;
+};
+
/**
- * amdgpu_vm_num_pde - return the number of page directory entries
+ * amdgpu_vm_num_entries - return the number of entries in a PD/PT
*
* @adev: amdgpu_device pointer
*
- * Calculate the number of page directory entries.
+ * Calculate the number of entries in a page directory or page table.
*/
-static unsigned amdgpu_vm_num_pdes(struct amdgpu_device *adev)
+static unsigned amdgpu_vm_num_entries(struct amdgpu_device *adev,
+ unsigned level)
{
- return adev->vm_manager.max_pfn >> amdgpu_vm_block_size;
+ if (level == 0)
+ /* For the root directory */
+ return adev->vm_manager.max_pfn >>
+ (amdgpu_vm_block_size * adev->vm_manager.num_level);
+ else if (level == adev->vm_manager.num_level)
+ /* For the page tables on the leaves */
+ return AMDGPU_VM_PTE_COUNT;
+ else
+ /* Everything in between */
+ return 1 << amdgpu_vm_block_size;
}
/**
- * amdgpu_vm_directory_size - returns the size of the page directory in bytes
+ * amdgpu_vm_bo_size - returns the size of the BOs in bytes
*
* @adev: amdgpu_device pointer
*
- * Calculate the size of the page directory in bytes.
+ * Calculate the size of the BO for a page directory or page table in bytes.
*/
-static unsigned amdgpu_vm_directory_size(struct amdgpu_device *adev)
+static unsigned amdgpu_vm_bo_size(struct amdgpu_device *adev, unsigned level)
{
- return AMDGPU_GPU_PAGE_ALIGN(amdgpu_vm_num_pdes(adev) * 8);
+ return AMDGPU_GPU_PAGE_ALIGN(amdgpu_vm_num_entries(adev, level) * 8);
}
/**
@@ -107,15 +125,56 @@ void amdgpu_vm_get_pd_bo(struct amdgpu_vm *vm,
struct list_head *validated,
struct amdgpu_bo_list_entry *entry)
{
- entry->robj = vm->page_directory;
+ entry->robj = vm->root.bo;
entry->priority = 0;
- entry->tv.bo = &vm->page_directory->tbo;
+ entry->tv.bo = &entry->robj->tbo;
entry->tv.shared = true;
entry->user_pages = NULL;
list_add(&entry->tv.head, validated);
}
/**
+ * amdgpu_vm_validate_layer - validate a single page table level
+ *
+ * @parent: parent page table level
+ * @validate: callback to do the validation
+ * @param: parameter for the validation callback
+ *
+ * Validate the page table BOs on command submission if neccessary.
+ */
+static int amdgpu_vm_validate_level(struct amdgpu_vm_pt *parent,
+ int (*validate)(void *, struct amdgpu_bo *),
+ void *param)
+{
+ unsigned i;
+ int r;
+
+ if (!parent->entries)
+ return 0;
+
+ for (i = 0; i <= parent->last_entry_used; ++i) {
+ struct amdgpu_vm_pt *entry = &parent->entries[i];
+
+ if (!entry->bo)
+ continue;
+
+ r = validate(param, entry->bo);
+ if (r)
+ return r;
+
+ /*
+ * Recurse into the sub directory. This is harmless because we
+ * have only a maximum of 5 layers.
+ */
+ r = amdgpu_vm_validate_level(entry, validate, param);
+ if (r)
+ return r;
+ }
+
+ return r;
+}
+
+/**
* amdgpu_vm_validate_pt_bos - validate the page table BOs
*
* @adev: amdgpu device pointer
@@ -130,8 +189,6 @@ int amdgpu_vm_validate_pt_bos(struct amdgpu_device *adev, struct amdgpu_vm *vm,
void *param)
{
uint64_t num_evictions;
- unsigned i;
- int r;
/* We only need to validate the page tables
* if they aren't already valid.
@@ -140,19 +197,33 @@ int amdgpu_vm_validate_pt_bos(struct amdgpu_device *adev, struct amdgpu_vm *vm,
if (num_evictions == vm->last_eviction_counter)
return 0;
- /* add the vm page table to the list */
- for (i = 0; i <= vm->max_pde_used; ++i) {
- struct amdgpu_bo *bo = vm->page_tables[i].bo;
+ return amdgpu_vm_validate_level(&vm->root, validate, param);
+}
- if (!bo)
+/**
+ * amdgpu_vm_move_level_in_lru - move one level of PT BOs to the LRU tail
+ *
+ * @adev: amdgpu device instance
+ * @vm: vm providing the BOs
+ *
+ * Move the PT BOs to the tail of the LRU.
+ */
+static void amdgpu_vm_move_level_in_lru(struct amdgpu_vm_pt *parent)
+{
+ unsigned i;
+
+ if (!parent->entries)
+ return;
+
+ for (i = 0; i <= parent->last_entry_used; ++i) {
+ struct amdgpu_vm_pt *entry = &parent->entries[i];
+
+ if (!entry->bo)
continue;
- r = validate(param, bo);
- if (r)
- return r;
+ ttm_bo_move_to_lru_tail(&entry->bo->tbo);
+ amdgpu_vm_move_level_in_lru(entry);
}
-
- return 0;
}
/**
@@ -167,18 +238,131 @@ void amdgpu_vm_move_pt_bos_in_lru(struct amdgpu_device *adev,
struct amdgpu_vm *vm)
{
struct ttm_bo_global *glob = adev->mman.bdev.glob;
- unsigned i;
spin_lock(&glob->lru_lock);
- for (i = 0; i <= vm->max_pde_used; ++i) {
- struct amdgpu_bo *bo = vm->page_tables[i].bo;
+ amdgpu_vm_move_level_in_lru(&vm->root);
+ spin_unlock(&glob->lru_lock);
+}
- if (!bo)
- continue;
+ /**
+ * amdgpu_vm_alloc_levels - allocate the PD/PT levels
+ *
+ * @adev: amdgpu_device pointer
+ * @vm: requested vm
+ * @saddr: start of the address range
+ * @eaddr: end of the address range
+ *
+ * Make sure the page directories and page tables are allocated
+ */
+static int amdgpu_vm_alloc_levels(struct amdgpu_device *adev,
+ struct amdgpu_vm *vm,
+ struct amdgpu_vm_pt *parent,
+ uint64_t saddr, uint64_t eaddr,
+ unsigned level)
+{
+ unsigned shift = (adev->vm_manager.num_level - level) *
+ amdgpu_vm_block_size;
+ unsigned pt_idx, from, to;
+ int r;
+
+ if (!parent->entries) {
+ unsigned num_entries = amdgpu_vm_num_entries(adev, level);
- ttm_bo_move_to_lru_tail(&bo->tbo);
+ parent->entries = drm_calloc_large(num_entries,
+ sizeof(struct amdgpu_vm_pt));
+ if (!parent->entries)
+ return -ENOMEM;
+ memset(parent->entries, 0 , sizeof(struct amdgpu_vm_pt));
}
- spin_unlock(&glob->lru_lock);
+
+ from = saddr >> shift;
+ to = eaddr >> shift;
+ if (from >= amdgpu_vm_num_entries(adev, level) ||
+ to >= amdgpu_vm_num_entries(adev, level))
+ return -EINVAL;
+
+ if (to > parent->last_entry_used)
+ parent->last_entry_used = to;
+
+ ++level;
+ saddr = saddr & ((1 << shift) - 1);
+ eaddr = eaddr & ((1 << shift) - 1);
+
+ /* walk over the address space and allocate the page tables */
+ for (pt_idx = from; pt_idx <= to; ++pt_idx) {
+ struct reservation_object *resv = vm->root.bo->tbo.resv;
+ struct amdgpu_vm_pt *entry = &parent->entries[pt_idx];
+ struct amdgpu_bo *pt;
+
+ if (!entry->bo) {
+ r = amdgpu_bo_create(adev,
+ amdgpu_vm_bo_size(adev, level),
+ AMDGPU_GPU_PAGE_SIZE, true,
+ AMDGPU_GEM_DOMAIN_VRAM,
+ AMDGPU_GEM_CREATE_NO_CPU_ACCESS |
+ AMDGPU_GEM_CREATE_SHADOW |
+ AMDGPU_GEM_CREATE_VRAM_CONTIGUOUS |
+ AMDGPU_GEM_CREATE_VRAM_CLEARED,
+ NULL, resv, &pt);
+ if (r)
+ return r;
+
+ /* Keep a reference to the root directory to avoid
+ * freeing them up in the wrong order.
+ */
+ pt->parent = amdgpu_bo_ref(vm->root.bo);
+
+ entry->bo = pt;
+ entry->addr = 0;
+ }
+
+ if (level < adev->vm_manager.num_level) {
+ uint64_t sub_saddr = (pt_idx == from) ? saddr : 0;
+ uint64_t sub_eaddr = (pt_idx == to) ? eaddr :
+ ((1 << shift) - 1);
+ r = amdgpu_vm_alloc_levels(adev, vm, entry, sub_saddr,
+ sub_eaddr, level);
+ if (r)
+ return r;
+ }
+ }
+
+ return 0;
+}
+
+/**
+ * amdgpu_vm_alloc_pts - Allocate page tables.
+ *
+ * @adev: amdgpu_device pointer
+ * @vm: VM to allocate page tables for
+ * @saddr: Start address which needs to be allocated
+ * @size: Size from start address we need.
+ *
+ * Make sure the page tables are allocated.
+ */
+int amdgpu_vm_alloc_pts(struct amdgpu_device *adev,
+ struct amdgpu_vm *vm,
+ uint64_t saddr, uint64_t size)
+{
+ uint64_t last_pfn;
+ uint64_t eaddr;
+
+ /* validate the parameters */
+ if (saddr & AMDGPU_GPU_PAGE_MASK || size & AMDGPU_GPU_PAGE_MASK)
+ return -EINVAL;
+
+ eaddr = saddr + size - 1;
+ last_pfn = eaddr / AMDGPU_GPU_PAGE_SIZE;
+ if (last_pfn >= adev->vm_manager.max_pfn) {
+ dev_err(adev->dev, "va above limit (0x%08llX >= 0x%08llX)\n",
+ last_pfn, adev->vm_manager.max_pfn);
+ return -EINVAL;
+ }
+
+ saddr /= AMDGPU_GPU_PAGE_SIZE;
+ eaddr /= AMDGPU_GPU_PAGE_SIZE;
+
+ return amdgpu_vm_alloc_levels(adev, vm, &vm->root, saddr, eaddr, 0);
}
static bool amdgpu_vm_is_gpu_reset(struct amdgpu_device *adev,
@@ -369,6 +553,16 @@ static bool amdgpu_vm_ring_has_compute_vm_bug(struct amdgpu_ring *ring)
return false;
}
+static u64 amdgpu_vm_adjust_mc_addr(struct amdgpu_device *adev, u64 mc_addr)
+{
+ u64 addr = mc_addr;
+
+ if (adev->mc.mc_funcs && adev->mc.mc_funcs->adjust_mc_addr)
+ addr = adev->mc.mc_funcs->adjust_mc_addr(adev, addr);
+
+ return addr;
+}
+
/**
* amdgpu_vm_flush - hardware flush the vm
*
@@ -391,41 +585,59 @@ int amdgpu_vm_flush(struct amdgpu_ring *ring, struct amdgpu_job *job)
id->oa_size != job->oa_size);
int r;
- if (ring->funcs->emit_pipeline_sync && (
- job->vm_needs_flush || gds_switch_needed ||
- amdgpu_vm_ring_has_compute_vm_bug(ring)))
- amdgpu_ring_emit_pipeline_sync(ring);
+ if (job->vm_needs_flush || gds_switch_needed ||
+ amdgpu_vm_is_gpu_reset(adev, id) ||
+ amdgpu_vm_ring_has_compute_vm_bug(ring)) {
+ unsigned patch_offset = 0;
- if (ring->funcs->emit_vm_flush && (job->vm_needs_flush ||
- amdgpu_vm_is_gpu_reset(adev, id))) {
- struct dma_fence *fence;
+ if (ring->funcs->init_cond_exec)
+ patch_offset = amdgpu_ring_init_cond_exec(ring);
- trace_amdgpu_vm_flush(job->vm_pd_addr, ring->idx, job->vm_id);
- amdgpu_ring_emit_vm_flush(ring, job->vm_id, job->vm_pd_addr);
+ if (ring->funcs->emit_pipeline_sync &&
+ (job->vm_needs_flush || gds_switch_needed ||
+ amdgpu_vm_ring_has_compute_vm_bug(ring)))
+ amdgpu_ring_emit_pipeline_sync(ring);
- r = amdgpu_fence_emit(ring, &fence);
- if (r)
- return r;
+ if (ring->funcs->emit_vm_flush && (job->vm_needs_flush ||
+ amdgpu_vm_is_gpu_reset(adev, id))) {
+ struct dma_fence *fence;
+ u64 pd_addr = amdgpu_vm_adjust_mc_addr(adev, job->vm_pd_addr);
- mutex_lock(&adev->vm_manager.lock);
- dma_fence_put(id->last_flush);
- id->last_flush = fence;
- mutex_unlock(&adev->vm_manager.lock);
- }
+ trace_amdgpu_vm_flush(pd_addr, ring->idx, job->vm_id);
+ amdgpu_ring_emit_vm_flush(ring, job->vm_id, pd_addr);
- if (gds_switch_needed) {
- id->gds_base = job->gds_base;
- id->gds_size = job->gds_size;
- id->gws_base = job->gws_base;
- id->gws_size = job->gws_size;
- id->oa_base = job->oa_base;
- id->oa_size = job->oa_size;
- amdgpu_ring_emit_gds_switch(ring, job->vm_id,
- job->gds_base, job->gds_size,
- job->gws_base, job->gws_size,
- job->oa_base, job->oa_size);
- }
+ r = amdgpu_fence_emit(ring, &fence);
+ if (r)
+ return r;
+
+ mutex_lock(&adev->vm_manager.lock);
+ dma_fence_put(id->last_flush);
+ id->last_flush = fence;
+ mutex_unlock(&adev->vm_manager.lock);
+ }
+
+ if (gds_switch_needed) {
+ id->gds_base = job->gds_base;
+ id->gds_size = job->gds_size;
+ id->gws_base = job->gws_base;
+ id->gws_size = job->gws_size;
+ id->oa_base = job->oa_base;
+ id->oa_size = job->oa_size;
+ amdgpu_ring_emit_gds_switch(ring, job->vm_id,
+ job->gds_base, job->gds_size,
+ job->gws_base, job->gws_size,
+ job->oa_base, job->oa_size);
+ }
+
+ if (ring->funcs->patch_cond_exec)
+ amdgpu_ring_patch_cond_exec(ring, patch_offset);
+ /* the double SWITCH_BUFFER here *cannot* be skipped by COND_EXEC */
+ if (ring->funcs->emit_switch_buffer) {
+ amdgpu_ring_emit_switch_buffer(ring);
+ amdgpu_ring_emit_switch_buffer(ring);
+ }
+ }
return 0;
}
@@ -490,7 +702,7 @@ struct amdgpu_bo_va *amdgpu_vm_bo_find(struct amdgpu_vm *vm,
static void amdgpu_vm_do_set_ptes(struct amdgpu_pte_update_params *params,
uint64_t pe, uint64_t addr,
unsigned count, uint32_t incr,
- uint32_t flags)
+ uint64_t flags)
{
trace_amdgpu_vm_set_ptes(pe, addr, count, incr, flags);
@@ -519,7 +731,7 @@ static void amdgpu_vm_do_set_ptes(struct amdgpu_pte_update_params *params,
static void amdgpu_vm_do_copy_ptes(struct amdgpu_pte_update_params *params,
uint64_t pe, uint64_t addr,
unsigned count, uint32_t incr,
- uint32_t flags)
+ uint64_t flags)
{
uint64_t src = (params->src + (addr >> 12) * 8);
@@ -554,24 +766,24 @@ static uint64_t amdgpu_vm_map_gart(const dma_addr_t *pages_addr, uint64_t addr)
}
/*
- * amdgpu_vm_update_pdes - make sure that page directory is valid
+ * amdgpu_vm_update_level - update a single level in the hierarchy
*
* @adev: amdgpu_device pointer
* @vm: requested vm
- * @start: start of GPU address range
- * @end: end of GPU address range
+ * @parent: parent directory
*
- * Allocates new page tables if necessary
- * and updates the page directory.
+ * Makes sure all entries in @parent are up to date.
* Returns 0 for success, error for failure.
*/
-int amdgpu_vm_update_page_directory(struct amdgpu_device *adev,
- struct amdgpu_vm *vm)
+static int amdgpu_vm_update_level(struct amdgpu_device *adev,
+ struct amdgpu_vm *vm,
+ struct amdgpu_vm_pt *parent,
+ unsigned level)
{
struct amdgpu_bo *shadow;
struct amdgpu_ring *ring;
uint64_t pd_addr, shadow_addr;
- uint32_t incr = AMDGPU_VM_PTE_COUNT * 8;
+ uint32_t incr = amdgpu_vm_bo_size(adev, level + 1);
uint64_t last_pde = ~0, last_pt = ~0, last_shadow = ~0;
unsigned count = 0, pt_idx, ndw;
struct amdgpu_job *job;
@@ -580,16 +792,19 @@ int amdgpu_vm_update_page_directory(struct amdgpu_device *adev,
int r;
+ if (!parent->entries)
+ return 0;
ring = container_of(vm->entity.sched, struct amdgpu_ring, sched);
- shadow = vm->page_directory->shadow;
/* padding, etc. */
ndw = 64;
/* assume the worst case */
- ndw += vm->max_pde_used * 6;
+ ndw += parent->last_entry_used * 6;
+
+ pd_addr = amdgpu_bo_gpu_offset(parent->bo);
- pd_addr = amdgpu_bo_gpu_offset(vm->page_directory);
+ shadow = parent->bo->shadow;
if (shadow) {
r = amdgpu_ttm_bind(&shadow->tbo, &shadow->tbo.mem);
if (r)
@@ -608,9 +823,9 @@ int amdgpu_vm_update_page_directory(struct amdgpu_device *adev,
params.adev = adev;
params.ib = &job->ibs[0];
- /* walk over the address space and update the page directory */
- for (pt_idx = 0; pt_idx <= vm->max_pde_used; ++pt_idx) {
- struct amdgpu_bo *bo = vm->page_tables[pt_idx].bo;
+ /* walk over the address space and update the directory */
+ for (pt_idx = 0; pt_idx <= parent->last_entry_used; ++pt_idx) {
+ struct amdgpu_bo *bo = parent->entries[pt_idx].bo;
uint64_t pde, pt;
if (bo == NULL)
@@ -626,10 +841,10 @@ int amdgpu_vm_update_page_directory(struct amdgpu_device *adev,
}
pt = amdgpu_bo_gpu_offset(bo);
- if (vm->page_tables[pt_idx].addr == pt)
+ if (parent->entries[pt_idx].addr == pt)
continue;
- vm->page_tables[pt_idx].addr = pt;
+ parent->entries[pt_idx].addr = pt;
pde = pd_addr + pt_idx * 8;
if (((last_pde + 8 * count) != pde) ||
@@ -637,15 +852,18 @@ int amdgpu_vm_update_page_directory(struct amdgpu_device *adev,
(count == AMDGPU_VM_MAX_UPDATE_SIZE)) {
if (count) {
+ uint64_t pt_addr =
+ amdgpu_vm_adjust_mc_addr(adev, last_pt);
+
if (shadow)
amdgpu_vm_do_set_ptes(&params,
last_shadow,
- last_pt, count,
+ pt_addr, count,
incr,
AMDGPU_PTE_VALID);
amdgpu_vm_do_set_ptes(&params, last_pde,
- last_pt, count, incr,
+ pt_addr, count, incr,
AMDGPU_PTE_VALID);
}
@@ -659,36 +877,51 @@ int amdgpu_vm_update_page_directory(struct amdgpu_device *adev,
}
if (count) {
- if (vm->page_directory->shadow)
- amdgpu_vm_do_set_ptes(&params, last_shadow, last_pt,
+ uint64_t pt_addr = amdgpu_vm_adjust_mc_addr(adev, last_pt);
+
+ if (vm->root.bo->shadow)
+ amdgpu_vm_do_set_ptes(&params, last_shadow, pt_addr,
count, incr, AMDGPU_PTE_VALID);
- amdgpu_vm_do_set_ptes(&params, last_pde, last_pt,
+ amdgpu_vm_do_set_ptes(&params, last_pde, pt_addr,
count, incr, AMDGPU_PTE_VALID);
}
if (params.ib->length_dw == 0) {
amdgpu_job_free(job);
- return 0;
- }
-
- amdgpu_ring_pad_ib(ring, params.ib);
- amdgpu_sync_resv(adev, &job->sync, vm->page_directory->tbo.resv,
- AMDGPU_FENCE_OWNER_VM);
- if (shadow)
- amdgpu_sync_resv(adev, &job->sync, shadow->tbo.resv,
+ } else {
+ amdgpu_ring_pad_ib(ring, params.ib);
+ amdgpu_sync_resv(adev, &job->sync, parent->bo->tbo.resv,
AMDGPU_FENCE_OWNER_VM);
+ if (shadow)
+ amdgpu_sync_resv(adev, &job->sync, shadow->tbo.resv,
+ AMDGPU_FENCE_OWNER_VM);
- WARN_ON(params.ib->length_dw > ndw);
- r = amdgpu_job_submit(job, ring, &vm->entity,
- AMDGPU_FENCE_OWNER_VM, &fence);
- if (r)
- goto error_free;
+ WARN_ON(params.ib->length_dw > ndw);
+ r = amdgpu_job_submit(job, ring, &vm->entity,
+ AMDGPU_FENCE_OWNER_VM, &fence);
+ if (r)
+ goto error_free;
+
+ amdgpu_bo_fence(parent->bo, fence, true);
+ dma_fence_put(vm->last_dir_update);
+ vm->last_dir_update = dma_fence_get(fence);
+ dma_fence_put(fence);
+ }
+ /*
+ * Recurse into the subdirectories. This recursion is harmless because
+ * we only have a maximum of 5 layers.
+ */
+ for (pt_idx = 0; pt_idx <= parent->last_entry_used; ++pt_idx) {
+ struct amdgpu_vm_pt *entry = &parent->entries[pt_idx];
- amdgpu_bo_fence(vm->page_directory, fence, true);
- dma_fence_put(vm->page_directory_fence);
- vm->page_directory_fence = dma_fence_get(fence);
- dma_fence_put(fence);
+ if (!entry->bo)
+ continue;
+
+ r = amdgpu_vm_update_level(adev, vm, entry, level + 1);
+ if (r)
+ return r;
+ }
return 0;
@@ -697,6 +930,47 @@ error_free:
return r;
}
+/*
+ * amdgpu_vm_update_directories - make sure that all directories are valid
+ *
+ * @adev: amdgpu_device pointer
+ * @vm: requested vm
+ *
+ * Makes sure all directories are up to date.
+ * Returns 0 for success, error for failure.
+ */
+int amdgpu_vm_update_directories(struct amdgpu_device *adev,
+ struct amdgpu_vm *vm)
+{
+ return amdgpu_vm_update_level(adev, vm, &vm->root, 0);
+}
+
+/**
+ * amdgpu_vm_find_pt - find the page table for an address
+ *
+ * @p: see amdgpu_pte_update_params definition
+ * @addr: virtual address in question
+ *
+ * Find the page table BO for a virtual address, return NULL when none found.
+ */
+static struct amdgpu_bo *amdgpu_vm_get_pt(struct amdgpu_pte_update_params *p,
+ uint64_t addr)
+{
+ struct amdgpu_vm_pt *entry = &p->vm->root;
+ unsigned idx, level = p->adev->vm_manager.num_level;
+
+ while (entry->entries) {
+ idx = addr >> (amdgpu_vm_block_size * level--);
+ idx %= amdgpu_bo_size(entry->bo) / 8;
+ entry = &entry->entries[idx];
+ }
+
+ if (level)
+ return NULL;
+
+ return entry->bo;
+}
+
/**
* amdgpu_vm_update_ptes - make sure that page tables are valid
*
@@ -710,23 +984,25 @@ error_free:
* Update the page tables in the range @start - @end.
*/
static void amdgpu_vm_update_ptes(struct amdgpu_pte_update_params *params,
- struct amdgpu_vm *vm,
uint64_t start, uint64_t end,
- uint64_t dst, uint32_t flags)
+ uint64_t dst, uint64_t flags)
{
const uint64_t mask = AMDGPU_VM_PTE_COUNT - 1;
uint64_t cur_pe_start, cur_nptes, cur_dst;
uint64_t addr; /* next GPU address to be updated */
- uint64_t pt_idx;
struct amdgpu_bo *pt;
unsigned nptes; /* next number of ptes to be updated */
uint64_t next_pe_start;
/* initialize the variables */
addr = start;
- pt_idx = addr >> amdgpu_vm_block_size;
- pt = vm->page_tables[pt_idx].bo;
+ pt = amdgpu_vm_get_pt(params, addr);
+ if (!pt) {
+ pr_err("PT not found, aborting update_ptes\n");
+ return;
+ }
+
if (params->shadow) {
if (!pt->shadow)
return;
@@ -748,8 +1024,12 @@ static void amdgpu_vm_update_ptes(struct amdgpu_pte_update_params *params,
/* walk over the address space and update the page tables */
while (addr < end) {
- pt_idx = addr >> amdgpu_vm_block_size;
- pt = vm->page_tables[pt_idx].bo;
+ pt = amdgpu_vm_get_pt(params, addr);
+ if (!pt) {
+ pr_err("PT not found, aborting update_ptes\n");
+ return;
+ }
+
if (params->shadow) {
if (!pt->shadow)
return;
@@ -800,9 +1080,8 @@ static void amdgpu_vm_update_ptes(struct amdgpu_pte_update_params *params,
* @flags: hw mapping flags
*/
static void amdgpu_vm_frag_ptes(struct amdgpu_pte_update_params *params,
- struct amdgpu_vm *vm,
uint64_t start, uint64_t end,
- uint64_t dst, uint32_t flags)
+ uint64_t dst, uint64_t flags)
{
/**
* The MC L1 TLB supports variable sized pages, based on a fragment
@@ -834,25 +1113,25 @@ static void amdgpu_vm_frag_ptes(struct amdgpu_pte_update_params *params,
if (params->src || !(flags & AMDGPU_PTE_VALID) ||
(frag_start >= frag_end)) {
- amdgpu_vm_update_ptes(params, vm, start, end, dst, flags);
+ amdgpu_vm_update_ptes(params, start, end, dst, flags);
return;
}
/* handle the 4K area at the beginning */
if (start != frag_start) {
- amdgpu_vm_update_ptes(params, vm, start, frag_start,
+ amdgpu_vm_update_ptes(params, start, frag_start,
dst, flags);
dst += (frag_start - start) * AMDGPU_GPU_PAGE_SIZE;
}
/* handle the area in the middle */
- amdgpu_vm_update_ptes(params, vm, frag_start, frag_end, dst,
+ amdgpu_vm_update_ptes(params, frag_start, frag_end, dst,
flags | frag_flags);
/* handle the 4K area at the end */
if (frag_end != end) {
dst += (frag_end - frag_start) * AMDGPU_GPU_PAGE_SIZE;
- amdgpu_vm_update_ptes(params, vm, frag_end, end, dst, flags);
+ amdgpu_vm_update_ptes(params, frag_end, end, dst, flags);
}
}
@@ -879,7 +1158,7 @@ static int amdgpu_vm_bo_update_mapping(struct amdgpu_device *adev,
dma_addr_t *pages_addr,
struct amdgpu_vm *vm,
uint64_t start, uint64_t last,
- uint32_t flags, uint64_t addr,
+ uint64_t flags, uint64_t addr,
struct dma_fence **fence)
{
struct amdgpu_ring *ring;
@@ -892,14 +1171,11 @@ static int amdgpu_vm_bo_update_mapping(struct amdgpu_device *adev,
memset(&params, 0, sizeof(params));
params.adev = adev;
+ params.vm = vm;
params.src = src;
ring = container_of(vm->entity.sched, struct amdgpu_ring, sched);
- memset(&params, 0, sizeof(params));
- params.adev = adev;
- params.src = src;
-
/* sync to everything on unmapping */
if (!(flags & AMDGPU_PTE_VALID))
owner = AMDGPU_FENCE_OWNER_UNDEFINED;
@@ -967,19 +1243,19 @@ static int amdgpu_vm_bo_update_mapping(struct amdgpu_device *adev,
if (r)
goto error_free;
- r = amdgpu_sync_resv(adev, &job->sync, vm->page_directory->tbo.resv,
+ r = amdgpu_sync_resv(adev, &job->sync, vm->root.bo->tbo.resv,
owner);
if (r)
goto error_free;
- r = reservation_object_reserve_shared(vm->page_directory->tbo.resv);
+ r = reservation_object_reserve_shared(vm->root.bo->tbo.resv);
if (r)
goto error_free;
params.shadow = true;
- amdgpu_vm_frag_ptes(&params, vm, start, last + 1, addr, flags);
+ amdgpu_vm_frag_ptes(&params, start, last + 1, addr, flags);
params.shadow = false;
- amdgpu_vm_frag_ptes(&params, vm, start, last + 1, addr, flags);
+ amdgpu_vm_frag_ptes(&params, start, last + 1, addr, flags);
amdgpu_ring_pad_ib(ring, params.ib);
WARN_ON(params.ib->length_dw > ndw);
@@ -988,12 +1264,9 @@ static int amdgpu_vm_bo_update_mapping(struct amdgpu_device *adev,
if (r)
goto error_free;
- amdgpu_bo_fence(vm->page_directory, f, true);
- if (fence) {
- dma_fence_put(*fence);
- *fence = dma_fence_get(f);
- }
- dma_fence_put(f);
+ amdgpu_bo_fence(vm->root.bo, f, true);
+ dma_fence_put(*fence);
+ *fence = f;
return 0;
error_free:
@@ -1020,11 +1293,11 @@ error_free:
*/
static int amdgpu_vm_bo_split_mapping(struct amdgpu_device *adev,
struct dma_fence *exclusive,
- uint32_t gtt_flags,
+ uint64_t gtt_flags,
dma_addr_t *pages_addr,
struct amdgpu_vm *vm,
struct amdgpu_bo_va_mapping *mapping,
- uint32_t flags,
+ uint64_t flags,
struct drm_mm_node *nodes,
struct dma_fence **fence)
{
@@ -1039,6 +1312,12 @@ static int amdgpu_vm_bo_split_mapping(struct amdgpu_device *adev,
if (!(mapping->flags & AMDGPU_PTE_WRITEABLE))
flags &= ~AMDGPU_PTE_WRITEABLE;
+ flags &= ~AMDGPU_PTE_EXECUTABLE;
+ flags |= mapping->flags & AMDGPU_PTE_EXECUTABLE;
+
+ flags &= ~AMDGPU_PTE_MTYPE_MASK;
+ flags |= (mapping->flags & AMDGPU_PTE_MTYPE_MASK);
+
trace_amdgpu_vm_bo_update(mapping);
pfn = mapping->offset >> PAGE_SHIFT;
@@ -1111,13 +1390,13 @@ int amdgpu_vm_bo_update(struct amdgpu_device *adev,
struct amdgpu_vm *vm = bo_va->vm;
struct amdgpu_bo_va_mapping *mapping;
dma_addr_t *pages_addr = NULL;
- uint32_t gtt_flags, flags;
+ uint64_t gtt_flags, flags;
struct ttm_mem_reg *mem;
struct drm_mm_node *nodes;
struct dma_fence *exclusive;
int r;
- if (clear) {
+ if (clear || !bo_va->bo) {
mem = NULL;
nodes = NULL;
exclusive = NULL;
@@ -1134,9 +1413,15 @@ int amdgpu_vm_bo_update(struct amdgpu_device *adev,
exclusive = reservation_object_get_excl(bo_va->bo->tbo.resv);
}
- flags = amdgpu_ttm_tt_pte_flags(adev, bo_va->bo->tbo.ttm, mem);
- gtt_flags = (amdgpu_ttm_is_bound(bo_va->bo->tbo.ttm) &&
- adev == amdgpu_ttm_adev(bo_va->bo->tbo.bdev)) ? flags : 0;
+ if (bo_va->bo) {
+ flags = amdgpu_ttm_tt_pte_flags(adev, bo_va->bo->tbo.ttm, mem);
+ gtt_flags = (amdgpu_ttm_is_bound(bo_va->bo->tbo.ttm) &&
+ adev == amdgpu_ttm_adev(bo_va->bo->tbo.bdev)) ?
+ flags : 0;
+ } else {
+ flags = 0x0;
+ gtt_flags = ~0x0;
+ }
spin_lock(&vm->status_lock);
if (!list_empty(&bo_va->vm_status))
@@ -1171,10 +1456,142 @@ int amdgpu_vm_bo_update(struct amdgpu_device *adev,
}
/**
+ * amdgpu_vm_update_prt_state - update the global PRT state
+ */
+static void amdgpu_vm_update_prt_state(struct amdgpu_device *adev)
+{
+ unsigned long flags;
+ bool enable;
+
+ spin_lock_irqsave(&adev->vm_manager.prt_lock, flags);
+ enable = !!atomic_read(&adev->vm_manager.num_prt_users);
+ adev->gart.gart_funcs->set_prt(adev, enable);
+ spin_unlock_irqrestore(&adev->vm_manager.prt_lock, flags);
+}
+
+/**
+ * amdgpu_vm_prt_get - add a PRT user
+ */
+static void amdgpu_vm_prt_get(struct amdgpu_device *adev)
+{
+ if (!adev->gart.gart_funcs->set_prt)
+ return;
+
+ if (atomic_inc_return(&adev->vm_manager.num_prt_users) == 1)
+ amdgpu_vm_update_prt_state(adev);
+}
+
+/**
+ * amdgpu_vm_prt_put - drop a PRT user
+ */
+static void amdgpu_vm_prt_put(struct amdgpu_device *adev)
+{
+ if (atomic_dec_return(&adev->vm_manager.num_prt_users) == 0)
+ amdgpu_vm_update_prt_state(adev);
+}
+
+/**
+ * amdgpu_vm_prt_cb - callback for updating the PRT status
+ */
+static void amdgpu_vm_prt_cb(struct dma_fence *fence, struct dma_fence_cb *_cb)
+{
+ struct amdgpu_prt_cb *cb = container_of(_cb, struct amdgpu_prt_cb, cb);
+
+ amdgpu_vm_prt_put(cb->adev);
+ kfree(cb);
+}
+
+/**
+ * amdgpu_vm_add_prt_cb - add callback for updating the PRT status
+ */
+static void amdgpu_vm_add_prt_cb(struct amdgpu_device *adev,
+ struct dma_fence *fence)
+{
+ struct amdgpu_prt_cb *cb;
+
+ if (!adev->gart.gart_funcs->set_prt)
+ return;
+
+ cb = kmalloc(sizeof(struct amdgpu_prt_cb), GFP_KERNEL);
+ if (!cb) {
+ /* Last resort when we are OOM */
+ if (fence)
+ dma_fence_wait(fence, false);
+
+ amdgpu_vm_prt_put(cb->adev);
+ } else {
+ cb->adev = adev;
+ if (!fence || dma_fence_add_callback(fence, &cb->cb,
+ amdgpu_vm_prt_cb))
+ amdgpu_vm_prt_cb(fence, &cb->cb);
+ }
+}
+
+/**
+ * amdgpu_vm_free_mapping - free a mapping
+ *
+ * @adev: amdgpu_device pointer
+ * @vm: requested vm
+ * @mapping: mapping to be freed
+ * @fence: fence of the unmap operation
+ *
+ * Free a mapping and make sure we decrease the PRT usage count if applicable.
+ */
+static void amdgpu_vm_free_mapping(struct amdgpu_device *adev,
+ struct amdgpu_vm *vm,
+ struct amdgpu_bo_va_mapping *mapping,
+ struct dma_fence *fence)
+{
+ if (mapping->flags & AMDGPU_PTE_PRT)
+ amdgpu_vm_add_prt_cb(adev, fence);
+ kfree(mapping);
+}
+
+/**
+ * amdgpu_vm_prt_fini - finish all prt mappings
+ *
+ * @adev: amdgpu_device pointer
+ * @vm: requested vm
+ *
+ * Register a cleanup callback to disable PRT support after VM dies.
+ */
+static void amdgpu_vm_prt_fini(struct amdgpu_device *adev, struct amdgpu_vm *vm)
+{
+ struct reservation_object *resv = vm->root.bo->tbo.resv;
+ struct dma_fence *excl, **shared;
+ unsigned i, shared_count;
+ int r;
+
+ r = reservation_object_get_fences_rcu(resv, &excl,
+ &shared_count, &shared);
+ if (r) {
+ /* Not enough memory to grab the fence list, as last resort
+ * block for all the fences to complete.
+ */
+ reservation_object_wait_timeout_rcu(resv, true, false,
+ MAX_SCHEDULE_TIMEOUT);
+ return;
+ }
+
+ /* Add a callback for each fence in the reservation object */
+ amdgpu_vm_prt_get(adev);
+ amdgpu_vm_add_prt_cb(adev, excl);
+
+ for (i = 0; i < shared_count; ++i) {
+ amdgpu_vm_prt_get(adev);
+ amdgpu_vm_add_prt_cb(adev, shared[i]);
+ }
+
+ kfree(shared);
+}
+
+/**
* amdgpu_vm_clear_freed - clear freed BOs in the PT
*
* @adev: amdgpu_device pointer
* @vm: requested vm
+ * @fence: optional resulting fence (unchanged if no work needed to be done
+ * or if an error occurred)
*
* Make sure all freed BOs are cleared in the PT.
* Returns 0 for success.
@@ -1182,9 +1599,11 @@ int amdgpu_vm_bo_update(struct amdgpu_device *adev,
* PTs have to be reserved and mutex must be locked!
*/
int amdgpu_vm_clear_freed(struct amdgpu_device *adev,
- struct amdgpu_vm *vm)
+ struct amdgpu_vm *vm,
+ struct dma_fence **fence)
{
struct amdgpu_bo_va_mapping *mapping;
+ struct dma_fence *f = NULL;
int r;
while (!list_empty(&vm->freed)) {
@@ -1193,12 +1612,21 @@ int amdgpu_vm_clear_freed(struct amdgpu_device *adev,
list_del(&mapping->list);
r = amdgpu_vm_bo_split_mapping(adev, NULL, 0, NULL, vm, mapping,
- 0, 0, NULL);
- kfree(mapping);
- if (r)
+ 0, 0, &f);
+ amdgpu_vm_free_mapping(adev, vm, mapping, f);
+ if (r) {
+ dma_fence_put(f);
return r;
+ }
+ }
+ if (fence && f) {
+ dma_fence_put(*fence);
+ *fence = f;
+ } else {
+ dma_fence_put(f);
}
+
return 0;
}
@@ -1271,7 +1699,8 @@ struct amdgpu_bo_va *amdgpu_vm_bo_add(struct amdgpu_device *adev,
INIT_LIST_HEAD(&bo_va->invalids);
INIT_LIST_HEAD(&bo_va->vm_status);
- list_add_tail(&bo_va->bo_list, &bo->va);
+ if (bo)
+ list_add_tail(&bo_va->bo_list, &bo->va);
return bo_va;
}
@@ -1298,9 +1727,7 @@ int amdgpu_vm_bo_map(struct amdgpu_device *adev,
struct amdgpu_bo_va_mapping *mapping;
struct amdgpu_vm *vm = bo_va->vm;
struct interval_tree_node *it;
- unsigned last_pfn, pt_idx;
uint64_t eaddr;
- int r;
/* validate the parameters */
if (saddr & AMDGPU_GPU_PAGE_MASK || offset & AMDGPU_GPU_PAGE_MASK ||
@@ -1309,15 +1736,9 @@ int amdgpu_vm_bo_map(struct amdgpu_device *adev,
/* make sure object fit at this offset */
eaddr = saddr + size - 1;
- if ((saddr >= eaddr) || (offset + size > amdgpu_bo_size(bo_va->bo)))
- return -EINVAL;
-
- last_pfn = eaddr / AMDGPU_GPU_PAGE_SIZE;
- if (last_pfn >= adev->vm_manager.max_pfn) {
- dev_err(adev->dev, "va above limit (0x%08X >= 0x%08X)\n",
- last_pfn, adev->vm_manager.max_pfn);
+ if (saddr >= eaddr ||
+ (bo_va->bo && offset + size > amdgpu_bo_size(bo_va->bo)))
return -EINVAL;
- }
saddr /= AMDGPU_GPU_PAGE_SIZE;
eaddr /= AMDGPU_GPU_PAGE_SIZE;
@@ -1330,15 +1751,12 @@ int amdgpu_vm_bo_map(struct amdgpu_device *adev,
dev_err(adev->dev, "bo %p va 0x%010Lx-0x%010Lx conflict with "
"0x%010lx-0x%010lx\n", bo_va->bo, saddr, eaddr,
tmp->it.start, tmp->it.last + 1);
- r = -EINVAL;
- goto error;
+ return -EINVAL;
}
mapping = kmalloc(sizeof(*mapping), GFP_KERNEL);
- if (!mapping) {
- r = -ENOMEM;
- goto error;
- }
+ if (!mapping)
+ return -ENOMEM;
INIT_LIST_HEAD(&mapping->list);
mapping->it.start = saddr;
@@ -1349,53 +1767,74 @@ int amdgpu_vm_bo_map(struct amdgpu_device *adev,
list_add(&mapping->list, &bo_va->invalids);
interval_tree_insert(&mapping->it, &vm->va);
- /* Make sure the page tables are allocated */
- saddr >>= amdgpu_vm_block_size;
- eaddr >>= amdgpu_vm_block_size;
-
- BUG_ON(eaddr >= amdgpu_vm_num_pdes(adev));
+ if (flags & AMDGPU_PTE_PRT)
+ amdgpu_vm_prt_get(adev);
- if (eaddr > vm->max_pde_used)
- vm->max_pde_used = eaddr;
+ return 0;
+}
- /* walk over the address space and allocate the page tables */
- for (pt_idx = saddr; pt_idx <= eaddr; ++pt_idx) {
- struct reservation_object *resv = vm->page_directory->tbo.resv;
- struct amdgpu_bo *pt;
+/**
+ * amdgpu_vm_bo_replace_map - map bo inside a vm, replacing existing mappings
+ *
+ * @adev: amdgpu_device pointer
+ * @bo_va: bo_va to store the address
+ * @saddr: where to map the BO
+ * @offset: requested offset in the BO
+ * @flags: attributes of pages (read/write/valid/etc.)
+ *
+ * Add a mapping of the BO at the specefied addr into the VM. Replace existing
+ * mappings as we do so.
+ * Returns 0 for success, error for failure.
+ *
+ * Object has to be reserved and unreserved outside!
+ */
+int amdgpu_vm_bo_replace_map(struct amdgpu_device *adev,
+ struct amdgpu_bo_va *bo_va,
+ uint64_t saddr, uint64_t offset,
+ uint64_t size, uint64_t flags)
+{
+ struct amdgpu_bo_va_mapping *mapping;
+ struct amdgpu_vm *vm = bo_va->vm;
+ uint64_t eaddr;
+ int r;
- if (vm->page_tables[pt_idx].bo)
- continue;
+ /* validate the parameters */
+ if (saddr & AMDGPU_GPU_PAGE_MASK || offset & AMDGPU_GPU_PAGE_MASK ||
+ size == 0 || size & AMDGPU_GPU_PAGE_MASK)
+ return -EINVAL;
- r = amdgpu_bo_create(adev, AMDGPU_VM_PTE_COUNT * 8,
- AMDGPU_GPU_PAGE_SIZE, true,
- AMDGPU_GEM_DOMAIN_VRAM,
- AMDGPU_GEM_CREATE_NO_CPU_ACCESS |
- AMDGPU_GEM_CREATE_SHADOW |
- AMDGPU_GEM_CREATE_VRAM_CONTIGUOUS |
- AMDGPU_GEM_CREATE_VRAM_CLEARED,
- NULL, resv, &pt);
- if (r)
- goto error_free;
+ /* make sure object fit at this offset */
+ eaddr = saddr + size - 1;
+ if (saddr >= eaddr ||
+ (bo_va->bo && offset + size > amdgpu_bo_size(bo_va->bo)))
+ return -EINVAL;
- /* Keep a reference to the page table to avoid freeing
- * them up in the wrong order.
- */
- pt->parent = amdgpu_bo_ref(vm->page_directory);
+ /* Allocate all the needed memory */
+ mapping = kmalloc(sizeof(*mapping), GFP_KERNEL);
+ if (!mapping)
+ return -ENOMEM;
- vm->page_tables[pt_idx].bo = pt;
- vm->page_tables[pt_idx].addr = 0;
+ r = amdgpu_vm_bo_clear_mappings(adev, bo_va->vm, saddr, size);
+ if (r) {
+ kfree(mapping);
+ return r;
}
- return 0;
+ saddr /= AMDGPU_GPU_PAGE_SIZE;
+ eaddr /= AMDGPU_GPU_PAGE_SIZE;
-error_free:
- list_del(&mapping->list);
- interval_tree_remove(&mapping->it, &vm->va);
- trace_amdgpu_vm_bo_unmap(bo_va, mapping);
- kfree(mapping);
+ mapping->it.start = saddr;
+ mapping->it.last = eaddr;
+ mapping->offset = offset;
+ mapping->flags = flags;
-error:
- return r;
+ list_add(&mapping->list, &bo_va->invalids);
+ interval_tree_insert(&mapping->it, &vm->va);
+
+ if (flags & AMDGPU_PTE_PRT)
+ amdgpu_vm_prt_get(adev);
+
+ return 0;
}
/**
@@ -1444,7 +1883,109 @@ int amdgpu_vm_bo_unmap(struct amdgpu_device *adev,
if (valid)
list_add(&mapping->list, &vm->freed);
else
- kfree(mapping);
+ amdgpu_vm_free_mapping(adev, vm, mapping,
+ bo_va->last_pt_update);
+
+ return 0;
+}
+
+/**
+ * amdgpu_vm_bo_clear_mappings - remove all mappings in a specific range
+ *
+ * @adev: amdgpu_device pointer
+ * @vm: VM structure to use
+ * @saddr: start of the range
+ * @size: size of the range
+ *
+ * Remove all mappings in a range, split them as appropriate.
+ * Returns 0 for success, error for failure.
+ */
+int amdgpu_vm_bo_clear_mappings(struct amdgpu_device *adev,
+ struct amdgpu_vm *vm,
+ uint64_t saddr, uint64_t size)
+{
+ struct amdgpu_bo_va_mapping *before, *after, *tmp, *next;
+ struct interval_tree_node *it;
+ LIST_HEAD(removed);
+ uint64_t eaddr;
+
+ eaddr = saddr + size - 1;
+ saddr /= AMDGPU_GPU_PAGE_SIZE;
+ eaddr /= AMDGPU_GPU_PAGE_SIZE;
+
+ /* Allocate all the needed memory */
+ before = kzalloc(sizeof(*before), GFP_KERNEL);
+ if (!before)
+ return -ENOMEM;
+ INIT_LIST_HEAD(&before->list);
+
+ after = kzalloc(sizeof(*after), GFP_KERNEL);
+ if (!after) {
+ kfree(before);
+ return -ENOMEM;
+ }
+ INIT_LIST_HEAD(&after->list);
+
+ /* Now gather all removed mappings */
+ it = interval_tree_iter_first(&vm->va, saddr, eaddr);
+ while (it) {
+ tmp = container_of(it, struct amdgpu_bo_va_mapping, it);
+ it = interval_tree_iter_next(it, saddr, eaddr);
+
+ /* Remember mapping split at the start */
+ if (tmp->it.start < saddr) {
+ before->it.start = tmp->it.start;
+ before->it.last = saddr - 1;
+ before->offset = tmp->offset;
+ before->flags = tmp->flags;
+ list_add(&before->list, &tmp->list);
+ }
+
+ /* Remember mapping split at the end */
+ if (tmp->it.last > eaddr) {
+ after->it.start = eaddr + 1;
+ after->it.last = tmp->it.last;
+ after->offset = tmp->offset;
+ after->offset += after->it.start - tmp->it.start;
+ after->flags = tmp->flags;
+ list_add(&after->list, &tmp->list);
+ }
+
+ list_del(&tmp->list);
+ list_add(&tmp->list, &removed);
+ }
+
+ /* And free them up */
+ list_for_each_entry_safe(tmp, next, &removed, list) {
+ interval_tree_remove(&tmp->it, &vm->va);
+ list_del(&tmp->list);
+
+ if (tmp->it.start < saddr)
+ tmp->it.start = saddr;
+ if (tmp->it.last > eaddr)
+ tmp->it.last = eaddr;
+
+ list_add(&tmp->list, &vm->freed);
+ trace_amdgpu_vm_bo_unmap(NULL, tmp);
+ }
+
+ /* Insert partial mapping before the range */
+ if (!list_empty(&before->list)) {
+ interval_tree_insert(&before->it, &vm->va);
+ if (before->flags & AMDGPU_PTE_PRT)
+ amdgpu_vm_prt_get(adev);
+ } else {
+ kfree(before);
+ }
+
+ /* Insert partial mapping after the range */
+ if (!list_empty(&after->list)) {
+ interval_tree_insert(&after->it, &vm->va);
+ if (after->flags & AMDGPU_PTE_PRT)
+ amdgpu_vm_prt_get(adev);
+ } else {
+ kfree(after);
+ }
return 0;
}
@@ -1480,7 +2021,8 @@ void amdgpu_vm_bo_rmv(struct amdgpu_device *adev,
list_for_each_entry_safe(mapping, next, &bo_va->invalids, list) {
list_del(&mapping->list);
interval_tree_remove(&mapping->it, &vm->va);
- kfree(mapping);
+ amdgpu_vm_free_mapping(adev, vm, mapping,
+ bo_va->last_pt_update);
}
dma_fence_put(bo_va->last_pt_update);
@@ -1521,7 +2063,6 @@ int amdgpu_vm_init(struct amdgpu_device *adev, struct amdgpu_vm *vm)
{
const unsigned align = min(AMDGPU_VM_PTB_ALIGN_SIZE,
AMDGPU_VM_PTE_COUNT * 8);
- unsigned pd_size, pd_entries;
unsigned ring_instance;
struct amdgpu_ring *ring;
struct amd_sched_rq *rq;
@@ -1536,16 +2077,6 @@ int amdgpu_vm_init(struct amdgpu_device *adev, struct amdgpu_vm *vm)
INIT_LIST_HEAD(&vm->cleared);
INIT_LIST_HEAD(&vm->freed);
- pd_size = amdgpu_vm_directory_size(adev);
- pd_entries = amdgpu_vm_num_pdes(adev);
-
- /* allocate page table array */
- vm->page_tables = drm_calloc_large(pd_entries, sizeof(struct amdgpu_vm_pt));
- if (vm->page_tables == NULL) {
- DRM_ERROR("Cannot allocate memory for page table array\n");
- return -ENOMEM;
- }
-
/* create scheduler entity for page table updates */
ring_instance = atomic_inc_return(&adev->vm_manager.vm_pte_next_ring);
@@ -1555,44 +2086,64 @@ int amdgpu_vm_init(struct amdgpu_device *adev, struct amdgpu_vm *vm)
r = amd_sched_entity_init(&ring->sched, &vm->entity,
rq, amdgpu_sched_jobs);
if (r)
- goto err;
+ return r;
- vm->page_directory_fence = NULL;
+ vm->last_dir_update = NULL;
- r = amdgpu_bo_create(adev, pd_size, align, true,
+ r = amdgpu_bo_create(adev, amdgpu_vm_bo_size(adev, 0), align, true,
AMDGPU_GEM_DOMAIN_VRAM,
AMDGPU_GEM_CREATE_NO_CPU_ACCESS |
AMDGPU_GEM_CREATE_SHADOW |
AMDGPU_GEM_CREATE_VRAM_CONTIGUOUS |
AMDGPU_GEM_CREATE_VRAM_CLEARED,
- NULL, NULL, &vm->page_directory);
+ NULL, NULL, &vm->root.bo);
if (r)
goto error_free_sched_entity;
- r = amdgpu_bo_reserve(vm->page_directory, false);
+ r = amdgpu_bo_reserve(vm->root.bo, false);
if (r)
- goto error_free_page_directory;
+ goto error_free_root;
vm->last_eviction_counter = atomic64_read(&adev->num_evictions);
- amdgpu_bo_unreserve(vm->page_directory);
+ amdgpu_bo_unreserve(vm->root.bo);
return 0;
-error_free_page_directory:
- amdgpu_bo_unref(&vm->page_directory->shadow);
- amdgpu_bo_unref(&vm->page_directory);
- vm->page_directory = NULL;
+error_free_root:
+ amdgpu_bo_unref(&vm->root.bo->shadow);
+ amdgpu_bo_unref(&vm->root.bo);
+ vm->root.bo = NULL;
error_free_sched_entity:
amd_sched_entity_fini(&ring->sched, &vm->entity);
-err:
- drm_free_large(vm->page_tables);
-
return r;
}
/**
+ * amdgpu_vm_free_levels - free PD/PT levels
+ *
+ * @level: PD/PT starting level to free
+ *
+ * Free the page directory or page table level and all sub levels.
+ */
+static void amdgpu_vm_free_levels(struct amdgpu_vm_pt *level)
+{
+ unsigned i;
+
+ if (level->bo) {
+ amdgpu_bo_unref(&level->bo->shadow);
+ amdgpu_bo_unref(&level->bo);
+ }
+
+ if (level->entries)
+ for (i = 0; i <= level->last_entry_used; i++)
+ amdgpu_vm_free_levels(&level->entries[i]);
+
+ drm_free_large(level->entries);
+}
+
+/**
* amdgpu_vm_fini - tear down a vm instance
*
* @adev: amdgpu_device pointer
@@ -1604,7 +2155,7 @@ err:
void amdgpu_vm_fini(struct amdgpu_device *adev, struct amdgpu_vm *vm)
{
struct amdgpu_bo_va_mapping *mapping, *tmp;
- int i;
+ bool prt_fini_needed = !!adev->gart.gart_funcs->set_prt;
amd_sched_entity_fini(vm->entity.sched, &vm->entity);
@@ -1617,24 +2168,17 @@ void amdgpu_vm_fini(struct amdgpu_device *adev, struct amdgpu_vm *vm)
kfree(mapping);
}
list_for_each_entry_safe(mapping, tmp, &vm->freed, list) {
- list_del(&mapping->list);
- kfree(mapping);
- }
-
- for (i = 0; i < amdgpu_vm_num_pdes(adev); i++) {
- struct amdgpu_bo *pt = vm->page_tables[i].bo;
-
- if (!pt)
- continue;
+ if (mapping->flags & AMDGPU_PTE_PRT && prt_fini_needed) {
+ amdgpu_vm_prt_fini(adev, vm);
+ prt_fini_needed = false;
+ }
- amdgpu_bo_unref(&pt->shadow);
- amdgpu_bo_unref(&pt);
+ list_del(&mapping->list);
+ amdgpu_vm_free_mapping(adev, vm, mapping, NULL);
}
- drm_free_large(vm->page_tables);
- amdgpu_bo_unref(&vm->page_directory->shadow);
- amdgpu_bo_unref(&vm->page_directory);
- dma_fence_put(vm->page_directory_fence);
+ amdgpu_vm_free_levels(&vm->root);
+ dma_fence_put(vm->last_dir_update);
}
/**
@@ -1665,6 +2209,8 @@ void amdgpu_vm_manager_init(struct amdgpu_device *adev)
atomic_set(&adev->vm_manager.vm_pte_next_ring, 0);
atomic64_set(&adev->vm_manager.client_counter, 0);
+ spin_lock_init(&adev->vm_manager.prt_lock);
+ atomic_set(&adev->vm_manager.num_prt_users, 0);
}
/**
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h
index 18c72c0b478d..fbe17bf73a00 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h
@@ -53,17 +53,23 @@ struct amdgpu_bo_list_entry;
/* LOG2 number of continuous pages for the fragment field */
#define AMDGPU_LOG2_PAGES_PER_FRAG 4
-#define AMDGPU_PTE_VALID (1 << 0)
-#define AMDGPU_PTE_SYSTEM (1 << 1)
-#define AMDGPU_PTE_SNOOPED (1 << 2)
+#define AMDGPU_PTE_VALID (1ULL << 0)
+#define AMDGPU_PTE_SYSTEM (1ULL << 1)
+#define AMDGPU_PTE_SNOOPED (1ULL << 2)
/* VI only */
-#define AMDGPU_PTE_EXECUTABLE (1 << 4)
+#define AMDGPU_PTE_EXECUTABLE (1ULL << 4)
-#define AMDGPU_PTE_READABLE (1 << 5)
-#define AMDGPU_PTE_WRITEABLE (1 << 6)
+#define AMDGPU_PTE_READABLE (1ULL << 5)
+#define AMDGPU_PTE_WRITEABLE (1ULL << 6)
-#define AMDGPU_PTE_FRAG(x) ((x & 0x1f) << 7)
+#define AMDGPU_PTE_FRAG(x) ((x & 0x1fULL) << 7)
+
+#define AMDGPU_PTE_PRT (1ULL << 63)
+
+/* VEGA10 only */
+#define AMDGPU_PTE_MTYPE(a) ((uint64_t)a << 57)
+#define AMDGPU_PTE_MTYPE_MASK AMDGPU_PTE_MTYPE(3ULL)
/* How to programm VM fault handling */
#define AMDGPU_VM_FAULT_STOP_NEVER 0
@@ -73,6 +79,10 @@ struct amdgpu_bo_list_entry;
struct amdgpu_vm_pt {
struct amdgpu_bo *bo;
uint64_t addr;
+
+ /* array of page tables, one for each directory entry */
+ struct amdgpu_vm_pt *entries;
+ unsigned last_entry_used;
};
struct amdgpu_vm {
@@ -92,14 +102,10 @@ struct amdgpu_vm {
struct list_head freed;
/* contains the page directory */
- struct amdgpu_bo *page_directory;
- unsigned max_pde_used;
- struct dma_fence *page_directory_fence;
+ struct amdgpu_vm_pt root;
+ struct dma_fence *last_dir_update;
uint64_t last_eviction_counter;
- /* array of page tables, one for each page directory entry */
- struct amdgpu_vm_pt *page_tables;
-
/* for id and flush management per ring */
struct amdgpu_vm_id *ids[AMDGPU_MAX_RINGS];
@@ -147,7 +153,8 @@ struct amdgpu_vm_manager {
u64 fence_context;
unsigned seqno[AMDGPU_MAX_RINGS];
- uint32_t max_pfn;
+ uint64_t max_pfn;
+ uint32_t num_level;
/* vram base address for page table entry */
u64 vram_base_offset;
/* is vm enabled? */
@@ -159,6 +166,10 @@ struct amdgpu_vm_manager {
atomic_t vm_pte_next_ring;
/* client id counter */
atomic64_t client_counter;
+
+ /* partial resident texture handling */
+ spinlock_t prt_lock;
+ atomic_t num_prt_users;
};
void amdgpu_vm_manager_init(struct amdgpu_device *adev);
@@ -173,15 +184,19 @@ int amdgpu_vm_validate_pt_bos(struct amdgpu_device *adev, struct amdgpu_vm *vm,
void *param);
void amdgpu_vm_move_pt_bos_in_lru(struct amdgpu_device *adev,
struct amdgpu_vm *vm);
+int amdgpu_vm_alloc_pts(struct amdgpu_device *adev,
+ struct amdgpu_vm *vm,
+ uint64_t saddr, uint64_t size);
int amdgpu_vm_grab_id(struct amdgpu_vm *vm, struct amdgpu_ring *ring,
struct amdgpu_sync *sync, struct dma_fence *fence,
struct amdgpu_job *job);
int amdgpu_vm_flush(struct amdgpu_ring *ring, struct amdgpu_job *job);
void amdgpu_vm_reset_id(struct amdgpu_device *adev, unsigned vm_id);
-int amdgpu_vm_update_page_directory(struct amdgpu_device *adev,
- struct amdgpu_vm *vm);
+int amdgpu_vm_update_directories(struct amdgpu_device *adev,
+ struct amdgpu_vm *vm);
int amdgpu_vm_clear_freed(struct amdgpu_device *adev,
- struct amdgpu_vm *vm);
+ struct amdgpu_vm *vm,
+ struct dma_fence **fence);
int amdgpu_vm_clear_invalids(struct amdgpu_device *adev, struct amdgpu_vm *vm,
struct amdgpu_sync *sync);
int amdgpu_vm_bo_update(struct amdgpu_device *adev,
@@ -198,9 +213,16 @@ int amdgpu_vm_bo_map(struct amdgpu_device *adev,
struct amdgpu_bo_va *bo_va,
uint64_t addr, uint64_t offset,
uint64_t size, uint64_t flags);
+int amdgpu_vm_bo_replace_map(struct amdgpu_device *adev,
+ struct amdgpu_bo_va *bo_va,
+ uint64_t addr, uint64_t offset,
+ uint64_t size, uint64_t flags);
int amdgpu_vm_bo_unmap(struct amdgpu_device *adev,
struct amdgpu_bo_va *bo_va,
uint64_t addr);
+int amdgpu_vm_bo_clear_mappings(struct amdgpu_device *adev,
+ struct amdgpu_vm *vm,
+ uint64_t saddr, uint64_t size);
void amdgpu_vm_bo_rmv(struct amdgpu_device *adev,
struct amdgpu_bo_va *bo_va);
diff --git a/drivers/gpu/drm/amd/amdgpu/atom.c b/drivers/gpu/drm/amd/amdgpu/atom.c
index 1b50e6c13fb3..d69aa2e179bb 100644
--- a/drivers/gpu/drm/amd/amdgpu/atom.c
+++ b/drivers/gpu/drm/amd/amdgpu/atom.c
@@ -166,7 +166,7 @@ static uint32_t atom_iio_execute(struct atom_context *ctx, int base,
case ATOM_IIO_END:
return temp;
default:
- printk(KERN_INFO "Unknown IIO opcode.\n");
+ pr_info("Unknown IIO opcode\n");
return 0;
}
}
@@ -190,22 +190,19 @@ static uint32_t atom_get_src_int(atom_exec_context *ctx, uint8_t attr,
val = gctx->card->reg_read(gctx->card, idx);
break;
case ATOM_IO_PCI:
- printk(KERN_INFO
- "PCI registers are not implemented.\n");
+ pr_info("PCI registers are not implemented\n");
return 0;
case ATOM_IO_SYSIO:
- printk(KERN_INFO
- "SYSIO registers are not implemented.\n");
+ pr_info("SYSIO registers are not implemented\n");
return 0;
default:
if (!(gctx->io_mode & 0x80)) {
- printk(KERN_INFO "Bad IO mode.\n");
+ pr_info("Bad IO mode\n");
return 0;
}
if (!gctx->iio[gctx->io_mode & 0x7F]) {
- printk(KERN_INFO
- "Undefined indirect IO read method %d.\n",
- gctx->io_mode & 0x7F);
+ pr_info("Undefined indirect IO read method %d\n",
+ gctx->io_mode & 0x7F);
return 0;
}
val =
@@ -469,22 +466,19 @@ static void atom_put_dst(atom_exec_context *ctx, int arg, uint8_t attr,
gctx->card->reg_write(gctx->card, idx, val);
break;
case ATOM_IO_PCI:
- printk(KERN_INFO
- "PCI registers are not implemented.\n");
+ pr_info("PCI registers are not implemented\n");
return;
case ATOM_IO_SYSIO:
- printk(KERN_INFO
- "SYSIO registers are not implemented.\n");
+ pr_info("SYSIO registers are not implemented\n");
return;
default:
if (!(gctx->io_mode & 0x80)) {
- printk(KERN_INFO "Bad IO mode.\n");
+ pr_info("Bad IO mode\n");
return;
}
if (!gctx->iio[gctx->io_mode & 0xFF]) {
- printk(KERN_INFO
- "Undefined indirect IO write method %d.\n",
- gctx->io_mode & 0x7F);
+ pr_info("Undefined indirect IO write method %d\n",
+ gctx->io_mode & 0x7F);
return;
}
atom_iio_execute(gctx, gctx->iio[gctx->io_mode & 0xFF],
@@ -850,17 +844,17 @@ static void atom_op_postcard(atom_exec_context *ctx, int *ptr, int arg)
static void atom_op_repeat(atom_exec_context *ctx, int *ptr, int arg)
{
- printk(KERN_INFO "unimplemented!\n");
+ pr_info("unimplemented!\n");
}
static void atom_op_restorereg(atom_exec_context *ctx, int *ptr, int arg)
{
- printk(KERN_INFO "unimplemented!\n");
+ pr_info("unimplemented!\n");
}
static void atom_op_savereg(atom_exec_context *ctx, int *ptr, int arg)
{
- printk(KERN_INFO "unimplemented!\n");
+ pr_info("unimplemented!\n");
}
static void atom_op_setdatablock(atom_exec_context *ctx, int *ptr, int arg)
@@ -1023,7 +1017,7 @@ static void atom_op_switch(atom_exec_context *ctx, int *ptr, int arg)
}
(*ptr) += 2;
} else {
- printk(KERN_INFO "Bad case.\n");
+ pr_info("Bad case\n");
return;
}
(*ptr) += 2;
@@ -1306,8 +1300,7 @@ struct atom_context *amdgpu_atom_parse(struct card_info *card, void *bios)
struct atom_context *ctx =
kzalloc(sizeof(struct atom_context), GFP_KERNEL);
char *str;
- char name[512];
- int i;
+ u16 idx;
if (!ctx)
return NULL;
@@ -1316,14 +1309,14 @@ struct atom_context *amdgpu_atom_parse(struct card_info *card, void *bios)
ctx->bios = bios;
if (CU16(0) != ATOM_BIOS_MAGIC) {
- printk(KERN_INFO "Invalid BIOS magic.\n");
+ pr_info("Invalid BIOS magic\n");
kfree(ctx);
return NULL;
}
if (strncmp
(CSTR(ATOM_ATI_MAGIC_PTR), ATOM_ATI_MAGIC,
strlen(ATOM_ATI_MAGIC))) {
- printk(KERN_INFO "Invalid ATI magic.\n");
+ pr_info("Invalid ATI magic\n");
kfree(ctx);
return NULL;
}
@@ -1332,7 +1325,7 @@ struct atom_context *amdgpu_atom_parse(struct card_info *card, void *bios)
if (strncmp
(CSTR(base + ATOM_ROM_MAGIC_PTR), ATOM_ROM_MAGIC,
strlen(ATOM_ROM_MAGIC))) {
- printk(KERN_INFO "Invalid ATOM magic.\n");
+ pr_info("Invalid ATOM magic\n");
kfree(ctx);
return NULL;
}
@@ -1345,18 +1338,13 @@ struct atom_context *amdgpu_atom_parse(struct card_info *card, void *bios)
return NULL;
}
- str = CSTR(CU16(base + ATOM_ROM_MSG_PTR));
- while (*str && ((*str == '\n') || (*str == '\r')))
- str++;
- /* name string isn't always 0 terminated */
- for (i = 0; i < 511; i++) {
- name[i] = str[i];
- if (name[i] < '.' || name[i] > 'z') {
- name[i] = 0;
- break;
- }
- }
- printk(KERN_INFO "ATOM BIOS: %s\n", name);
+ idx = CU16(ATOM_ROM_PART_NUMBER_PTR);
+ if (idx == 0)
+ idx = 0x80;
+
+ str = CSTR(idx);
+ if (*str != '\0')
+ pr_info("ATOM BIOS: %s\n", str);
return ctx;
}
@@ -1429,29 +1417,3 @@ bool amdgpu_atom_parse_cmd_header(struct atom_context *ctx, int index, uint8_t *
return true;
}
-int amdgpu_atom_allocate_fb_scratch(struct atom_context *ctx)
-{
- int index = GetIndexIntoMasterTable(DATA, VRAM_UsageByFirmware);
- uint16_t data_offset;
- int usage_bytes = 0;
- struct _ATOM_VRAM_USAGE_BY_FIRMWARE *firmware_usage;
-
- if (amdgpu_atom_parse_data_header(ctx, index, NULL, NULL, NULL, &data_offset)) {
- firmware_usage = (struct _ATOM_VRAM_USAGE_BY_FIRMWARE *)(ctx->bios + data_offset);
-
- DRM_DEBUG("atom firmware requested %08x %dkb\n",
- le32_to_cpu(firmware_usage->asFirmwareVramReserveInfo[0].ulStartAddrUsedByFirmware),
- le16_to_cpu(firmware_usage->asFirmwareVramReserveInfo[0].usFirmwareUseInKb));
-
- usage_bytes = le16_to_cpu(firmware_usage->asFirmwareVramReserveInfo[0].usFirmwareUseInKb) * 1024;
- }
- ctx->scratch_size_bytes = 0;
- if (usage_bytes == 0)
- usage_bytes = 20 * 1024;
- /* allocate some scratch memory */
- ctx->scratch = kzalloc(usage_bytes, GFP_KERNEL);
- if (!ctx->scratch)
- return -ENOMEM;
- ctx->scratch_size_bytes = usage_bytes;
- return 0;
-}
diff --git a/drivers/gpu/drm/amd/amdgpu/atom.h b/drivers/gpu/drm/amd/amdgpu/atom.h
index 49daf6d723e5..ddd8045accf3 100644
--- a/drivers/gpu/drm/amd/amdgpu/atom.h
+++ b/drivers/gpu/drm/amd/amdgpu/atom.h
@@ -32,6 +32,7 @@
#define ATOM_ATI_MAGIC_PTR 0x30
#define ATOM_ATI_MAGIC " 761295520"
#define ATOM_ROM_TABLE_PTR 0x48
+#define ATOM_ROM_PART_NUMBER_PTR 0x6E
#define ATOM_ROM_MAGIC "ATOM"
#define ATOM_ROM_MAGIC_PTR 4
@@ -151,7 +152,6 @@ bool amdgpu_atom_parse_data_header(struct atom_context *ctx, int index, uint16_t
uint8_t *frev, uint8_t *crev, uint16_t *data_start);
bool amdgpu_atom_parse_cmd_header(struct atom_context *ctx, int index,
uint8_t *frev, uint8_t *crev);
-int amdgpu_atom_allocate_fb_scratch(struct atom_context *ctx);
#include "atom-types.h"
#include "atombios.h"
#include "ObjectID.h"
diff --git a/drivers/gpu/drm/amd/amdgpu/ci_dpm.c b/drivers/gpu/drm/amd/amdgpu/ci_dpm.c
index f97ecb49972e..11ccda83d767 100644
--- a/drivers/gpu/drm/amd/amdgpu/ci_dpm.c
+++ b/drivers/gpu/drm/amd/amdgpu/ci_dpm.c
@@ -3681,6 +3681,40 @@ static int ci_find_boot_level(struct ci_single_dpm_table *table,
return ret;
}
+static void ci_save_default_power_profile(struct amdgpu_device *adev)
+{
+ struct ci_power_info *pi = ci_get_pi(adev);
+ struct SMU7_Discrete_GraphicsLevel *levels =
+ pi->smc_state_table.GraphicsLevel;
+ uint32_t min_level = 0;
+
+ pi->default_gfx_power_profile.activity_threshold =
+ be16_to_cpu(levels[0].ActivityLevel);
+ pi->default_gfx_power_profile.up_hyst = levels[0].UpH;
+ pi->default_gfx_power_profile.down_hyst = levels[0].DownH;
+ pi->default_gfx_power_profile.type = AMD_PP_GFX_PROFILE;
+
+ pi->default_compute_power_profile = pi->default_gfx_power_profile;
+ pi->default_compute_power_profile.type = AMD_PP_COMPUTE_PROFILE;
+
+ /* Optimize compute power profile: Use only highest
+ * 2 power levels (if more than 2 are available), Hysteresis:
+ * 0ms up, 5ms down
+ */
+ if (pi->smc_state_table.GraphicsDpmLevelCount > 2)
+ min_level = pi->smc_state_table.GraphicsDpmLevelCount - 2;
+ else if (pi->smc_state_table.GraphicsDpmLevelCount == 2)
+ min_level = 1;
+ pi->default_compute_power_profile.min_sclk =
+ be32_to_cpu(levels[min_level].SclkFrequency);
+
+ pi->default_compute_power_profile.up_hyst = 0;
+ pi->default_compute_power_profile.down_hyst = 5;
+
+ pi->gfx_power_profile = pi->default_gfx_power_profile;
+ pi->compute_power_profile = pi->default_compute_power_profile;
+}
+
static int ci_init_smc_table(struct amdgpu_device *adev)
{
struct ci_power_info *pi = ci_get_pi(adev);
@@ -3826,6 +3860,8 @@ static int ci_init_smc_table(struct amdgpu_device *adev)
if (ret)
return ret;
+ ci_save_default_power_profile(adev);
+
return 0;
}
@@ -5804,9 +5840,7 @@ static int ci_dpm_init_microcode(struct amdgpu_device *adev)
out:
if (err) {
- printk(KERN_ERR
- "cik_smc: Failed to load firmware \"%s\"\n",
- fw_name);
+ pr_err("cik_smc: Failed to load firmware \"%s\"\n", fw_name);
release_firmware(adev->pm.fw);
adev->pm.fw = NULL;
}
@@ -6250,11 +6284,13 @@ static int ci_dpm_sw_init(void *handle)
int ret;
struct amdgpu_device *adev = (struct amdgpu_device *)handle;
- ret = amdgpu_irq_add_id(adev, 230, &adev->pm.dpm.thermal.irq);
+ ret = amdgpu_irq_add_id(adev, AMDGPU_IH_CLIENTID_LEGACY, 230,
+ &adev->pm.dpm.thermal.irq);
if (ret)
return ret;
- ret = amdgpu_irq_add_id(adev, 231, &adev->pm.dpm.thermal.irq);
+ ret = amdgpu_irq_add_id(adev, AMDGPU_IH_CLIENTID_LEGACY, 231,
+ &adev->pm.dpm.thermal.irq);
if (ret)
return ret;
@@ -6688,6 +6724,260 @@ static int ci_dpm_set_mclk_od(struct amdgpu_device *adev, uint32_t value)
return 0;
}
+static int ci_dpm_get_power_profile_state(struct amdgpu_device *adev,
+ struct amd_pp_profile *query)
+{
+ struct ci_power_info *pi = ci_get_pi(adev);
+
+ if (!pi || !query)
+ return -EINVAL;
+
+ if (query->type == AMD_PP_GFX_PROFILE)
+ memcpy(query, &pi->gfx_power_profile,
+ sizeof(struct amd_pp_profile));
+ else if (query->type == AMD_PP_COMPUTE_PROFILE)
+ memcpy(query, &pi->compute_power_profile,
+ sizeof(struct amd_pp_profile));
+ else
+ return -EINVAL;
+
+ return 0;
+}
+
+static int ci_populate_requested_graphic_levels(struct amdgpu_device *adev,
+ struct amd_pp_profile *request)
+{
+ struct ci_power_info *pi = ci_get_pi(adev);
+ struct ci_dpm_table *dpm_table = &(pi->dpm_table);
+ struct SMU7_Discrete_GraphicsLevel *levels =
+ pi->smc_state_table.GraphicsLevel;
+ uint32_t array = pi->dpm_table_start +
+ offsetof(SMU7_Discrete_DpmTable, GraphicsLevel);
+ uint32_t array_size = sizeof(struct SMU7_Discrete_GraphicsLevel) *
+ SMU7_MAX_LEVELS_GRAPHICS;
+ uint32_t i;
+
+ for (i = 0; i < dpm_table->sclk_table.count; i++) {
+ levels[i].ActivityLevel =
+ cpu_to_be16(request->activity_threshold);
+ levels[i].EnabledForActivity = 1;
+ levels[i].UpH = request->up_hyst;
+ levels[i].DownH = request->down_hyst;
+ }
+
+ return amdgpu_ci_copy_bytes_to_smc(adev, array, (uint8_t *)levels,
+ array_size, pi->sram_end);
+}
+
+static void ci_find_min_clock_masks(struct amdgpu_device *adev,
+ uint32_t *sclk_mask, uint32_t *mclk_mask,
+ uint32_t min_sclk, uint32_t min_mclk)
+{
+ struct ci_power_info *pi = ci_get_pi(adev);
+ struct ci_dpm_table *dpm_table = &(pi->dpm_table);
+ uint32_t i;
+
+ for (i = 0; i < dpm_table->sclk_table.count; i++) {
+ if (dpm_table->sclk_table.dpm_levels[i].enabled &&
+ dpm_table->sclk_table.dpm_levels[i].value >= min_sclk)
+ *sclk_mask |= 1 << i;
+ }
+
+ for (i = 0; i < dpm_table->mclk_table.count; i++) {
+ if (dpm_table->mclk_table.dpm_levels[i].enabled &&
+ dpm_table->mclk_table.dpm_levels[i].value >= min_mclk)
+ *mclk_mask |= 1 << i;
+ }
+}
+
+static int ci_set_power_profile_state(struct amdgpu_device *adev,
+ struct amd_pp_profile *request)
+{
+ struct ci_power_info *pi = ci_get_pi(adev);
+ int tmp_result, result = 0;
+ uint32_t sclk_mask = 0, mclk_mask = 0;
+
+ tmp_result = ci_freeze_sclk_mclk_dpm(adev);
+ if (tmp_result) {
+ DRM_ERROR("Failed to freeze SCLK MCLK DPM!");
+ result = tmp_result;
+ }
+
+ tmp_result = ci_populate_requested_graphic_levels(adev,
+ request);
+ if (tmp_result) {
+ DRM_ERROR("Failed to populate requested graphic levels!");
+ result = tmp_result;
+ }
+
+ tmp_result = ci_unfreeze_sclk_mclk_dpm(adev);
+ if (tmp_result) {
+ DRM_ERROR("Failed to unfreeze SCLK MCLK DPM!");
+ result = tmp_result;
+ }
+
+ ci_find_min_clock_masks(adev, &sclk_mask, &mclk_mask,
+ request->min_sclk, request->min_mclk);
+
+ if (sclk_mask) {
+ if (!pi->sclk_dpm_key_disabled)
+ amdgpu_ci_send_msg_to_smc_with_parameter(
+ adev,
+ PPSMC_MSG_SCLKDPM_SetEnabledMask,
+ pi->dpm_level_enable_mask.
+ sclk_dpm_enable_mask &
+ sclk_mask);
+ }
+
+ if (mclk_mask) {
+ if (!pi->mclk_dpm_key_disabled)
+ amdgpu_ci_send_msg_to_smc_with_parameter(
+ adev,
+ PPSMC_MSG_MCLKDPM_SetEnabledMask,
+ pi->dpm_level_enable_mask.
+ mclk_dpm_enable_mask &
+ mclk_mask);
+ }
+
+
+ return result;
+}
+
+static int ci_dpm_set_power_profile_state(struct amdgpu_device *adev,
+ struct amd_pp_profile *request)
+{
+ struct ci_power_info *pi = ci_get_pi(adev);
+ int ret = -1;
+
+ if (!pi || !request)
+ return -EINVAL;
+
+ if (adev->pm.dpm.forced_level !=
+ AMD_DPM_FORCED_LEVEL_AUTO)
+ return -EINVAL;
+
+ if (request->min_sclk ||
+ request->min_mclk ||
+ request->activity_threshold ||
+ request->up_hyst ||
+ request->down_hyst) {
+ if (request->type == AMD_PP_GFX_PROFILE)
+ memcpy(&pi->gfx_power_profile, request,
+ sizeof(struct amd_pp_profile));
+ else if (request->type == AMD_PP_COMPUTE_PROFILE)
+ memcpy(&pi->compute_power_profile, request,
+ sizeof(struct amd_pp_profile));
+ else
+ return -EINVAL;
+
+ if (request->type == pi->current_power_profile)
+ ret = ci_set_power_profile_state(
+ adev,
+ request);
+ } else {
+ /* set power profile if it exists */
+ switch (request->type) {
+ case AMD_PP_GFX_PROFILE:
+ ret = ci_set_power_profile_state(
+ adev,
+ &pi->gfx_power_profile);
+ break;
+ case AMD_PP_COMPUTE_PROFILE:
+ ret = ci_set_power_profile_state(
+ adev,
+ &pi->compute_power_profile);
+ break;
+ default:
+ return -EINVAL;
+ }
+ }
+
+ if (!ret)
+ pi->current_power_profile = request->type;
+
+ return 0;
+}
+
+static int ci_dpm_reset_power_profile_state(struct amdgpu_device *adev,
+ struct amd_pp_profile *request)
+{
+ struct ci_power_info *pi = ci_get_pi(adev);
+
+ if (!pi || !request)
+ return -EINVAL;
+
+ if (request->type == AMD_PP_GFX_PROFILE) {
+ pi->gfx_power_profile = pi->default_gfx_power_profile;
+ return ci_dpm_set_power_profile_state(adev,
+ &pi->gfx_power_profile);
+ } else if (request->type == AMD_PP_COMPUTE_PROFILE) {
+ pi->compute_power_profile =
+ pi->default_compute_power_profile;
+ return ci_dpm_set_power_profile_state(adev,
+ &pi->compute_power_profile);
+ } else
+ return -EINVAL;
+}
+
+static int ci_dpm_switch_power_profile(struct amdgpu_device *adev,
+ enum amd_pp_profile_type type)
+{
+ struct ci_power_info *pi = ci_get_pi(adev);
+ struct amd_pp_profile request = {0};
+
+ if (!pi)
+ return -EINVAL;
+
+ if (pi->current_power_profile != type) {
+ request.type = type;
+ return ci_dpm_set_power_profile_state(adev, &request);
+ }
+
+ return 0;
+}
+
+static int ci_dpm_read_sensor(struct amdgpu_device *adev, int idx,
+ void *value, int *size)
+{
+ u32 activity_percent = 50;
+ int ret;
+
+ /* size must be at least 4 bytes for all sensors */
+ if (*size < 4)
+ return -EINVAL;
+
+ switch (idx) {
+ case AMDGPU_PP_SENSOR_GFX_SCLK:
+ *((uint32_t *)value) = ci_get_average_sclk_freq(adev);
+ *size = 4;
+ return 0;
+ case AMDGPU_PP_SENSOR_GFX_MCLK:
+ *((uint32_t *)value) = ci_get_average_mclk_freq(adev);
+ *size = 4;
+ return 0;
+ case AMDGPU_PP_SENSOR_GPU_TEMP:
+ *((uint32_t *)value) = ci_dpm_get_temp(adev);
+ *size = 4;
+ return 0;
+ case AMDGPU_PP_SENSOR_GPU_LOAD:
+ ret = ci_read_smc_soft_register(adev,
+ offsetof(SMU7_SoftRegisters,
+ AverageGraphicsA),
+ &activity_percent);
+ if (ret == 0) {
+ activity_percent += 0x80;
+ activity_percent >>= 8;
+ activity_percent =
+ activity_percent > 100 ? 100 : activity_percent;
+ }
+ *((uint32_t *)value) = activity_percent;
+ *size = 4;
+ return 0;
+ default:
+ return -EINVAL;
+ }
+}
+
const struct amd_ip_funcs ci_dpm_ip_funcs = {
.name = "ci_dpm",
.early_init = ci_dpm_early_init,
@@ -6730,6 +7020,11 @@ static const struct amdgpu_dpm_funcs ci_dpm_funcs = {
.set_mclk_od = ci_dpm_set_mclk_od,
.check_state_equal = ci_check_state_equal,
.get_vce_clock_state = amdgpu_get_vce_clock_state,
+ .get_power_profile_state = ci_dpm_get_power_profile_state,
+ .set_power_profile_state = ci_dpm_set_power_profile_state,
+ .reset_power_profile_state = ci_dpm_reset_power_profile_state,
+ .switch_power_profile = ci_dpm_switch_power_profile,
+ .read_sensor = ci_dpm_read_sensor,
};
static void ci_dpm_set_dpm_funcs(struct amdgpu_device *adev)
diff --git a/drivers/gpu/drm/amd/amdgpu/ci_dpm.h b/drivers/gpu/drm/amd/amdgpu/ci_dpm.h
index 91be2996ae7c..84cbc9c45f4d 100644
--- a/drivers/gpu/drm/amd/amdgpu/ci_dpm.h
+++ b/drivers/gpu/drm/amd/amdgpu/ci_dpm.h
@@ -295,6 +295,13 @@ struct ci_power_info {
bool fan_is_controlled_by_smc;
u32 t_min;
u32 fan_ctrl_default_mode;
+
+ /* power profile */
+ struct amd_pp_profile gfx_power_profile;
+ struct amd_pp_profile compute_power_profile;
+ struct amd_pp_profile default_gfx_power_profile;
+ struct amd_pp_profile default_compute_power_profile;
+ enum amd_pp_profile_type current_power_profile;
};
#define CISLANDS_VOLTAGE_CONTROL_NONE 0x0
diff --git a/drivers/gpu/drm/amd/amdgpu/cik.c b/drivers/gpu/drm/amd/amdgpu/cik.c
index c4d4b35e54ec..9d33e5641419 100644
--- a/drivers/gpu/drm/amd/amdgpu/cik.c
+++ b/drivers/gpu/drm/amd/amdgpu/cik.c
@@ -1212,6 +1212,11 @@ static int cik_asic_reset(struct amdgpu_device *adev)
return r;
}
+static u32 cik_get_config_memsize(struct amdgpu_device *adev)
+{
+ return RREG32(mmCONFIG_MEMSIZE);
+}
+
static int cik_set_uvd_clock(struct amdgpu_device *adev, u32 clock,
u32 cntl_reg, u32 status_reg)
{
@@ -1641,6 +1646,7 @@ static const struct amdgpu_asic_funcs cik_asic_funcs =
.get_xclk = &cik_get_xclk,
.set_uvd_clocks = &cik_set_uvd_clocks,
.set_vce_clocks = &cik_set_vce_clocks,
+ .get_config_memsize = &cik_get_config_memsize,
};
static int cik_common_early_init(void *handle)
@@ -1779,6 +1785,8 @@ static int cik_common_early_init(void *handle)
return -EINVAL;
}
+ adev->firmware.load_type = amdgpu_ucode_get_load_type(adev, amdgpu_fw_load_type);
+
amdgpu_get_pcie_info(adev);
return 0;
diff --git a/drivers/gpu/drm/amd/amdgpu/cik_ih.c b/drivers/gpu/drm/amd/amdgpu/cik_ih.c
index 319b32cdea84..c57c3f18af01 100644
--- a/drivers/gpu/drm/amd/amdgpu/cik_ih.c
+++ b/drivers/gpu/drm/amd/amdgpu/cik_ih.c
@@ -248,8 +248,9 @@ static void cik_ih_decode_iv(struct amdgpu_device *adev,
dw[2] = le32_to_cpu(adev->irq.ih.ring[ring_index + 2]);
dw[3] = le32_to_cpu(adev->irq.ih.ring[ring_index + 3]);
+ entry->client_id = AMDGPU_IH_CLIENTID_LEGACY;
entry->src_id = dw[0] & 0xff;
- entry->src_data = dw[1] & 0xfffffff;
+ entry->src_data[0] = dw[1] & 0xfffffff;
entry->ring_id = dw[2] & 0xff;
entry->vm_id = (dw[2] >> 8) & 0xff;
entry->pas_id = (dw[2] >> 16) & 0xffff;
diff --git a/drivers/gpu/drm/amd/amdgpu/cik_sdma.c b/drivers/gpu/drm/amd/amdgpu/cik_sdma.c
index 810bba533975..c216e16826c9 100644
--- a/drivers/gpu/drm/amd/amdgpu/cik_sdma.c
+++ b/drivers/gpu/drm/amd/amdgpu/cik_sdma.c
@@ -142,9 +142,7 @@ static int cik_sdma_init_microcode(struct amdgpu_device *adev)
}
out:
if (err) {
- printk(KERN_ERR
- "cik_sdma: Failed to load firmware \"%s\"\n",
- fw_name);
+ pr_err("cik_sdma: Failed to load firmware \"%s\"\n", fw_name);
for (i = 0; i < adev->sdma.num_instances; i++) {
release_firmware(adev->sdma.instance[i].fw);
adev->sdma.instance[i].fw = NULL;
@@ -160,7 +158,7 @@ out:
*
* Get the current rptr from the hardware (CIK+).
*/
-static uint32_t cik_sdma_ring_get_rptr(struct amdgpu_ring *ring)
+static uint64_t cik_sdma_ring_get_rptr(struct amdgpu_ring *ring)
{
u32 rptr;
@@ -176,7 +174,7 @@ static uint32_t cik_sdma_ring_get_rptr(struct amdgpu_ring *ring)
*
* Get the current wptr from the hardware (CIK+).
*/
-static uint32_t cik_sdma_ring_get_wptr(struct amdgpu_ring *ring)
+static uint64_t cik_sdma_ring_get_wptr(struct amdgpu_ring *ring)
{
struct amdgpu_device *adev = ring->adev;
u32 me = (ring == &adev->sdma.instance[0].ring) ? 0 : 1;
@@ -196,7 +194,8 @@ static void cik_sdma_ring_set_wptr(struct amdgpu_ring *ring)
struct amdgpu_device *adev = ring->adev;
u32 me = (ring == &adev->sdma.instance[0].ring) ? 0 : 1;
- WREG32(mmSDMA0_GFX_RB_WPTR + sdma_offsets[me], (ring->wptr << 2) & 0x3fffc);
+ WREG32(mmSDMA0_GFX_RB_WPTR + sdma_offsets[me],
+ (lower_32_bits(ring->wptr) << 2) & 0x3fffc);
}
static void cik_sdma_ring_insert_nop(struct amdgpu_ring *ring, uint32_t count)
@@ -227,7 +226,7 @@ static void cik_sdma_ring_emit_ib(struct amdgpu_ring *ring,
u32 extra_bits = vm_id & 0xf;
/* IB packet must end on a 8 DW boundary */
- cik_sdma_ring_insert_nop(ring, (12 - (ring->wptr & 7)) % 8);
+ cik_sdma_ring_insert_nop(ring, (12 - (lower_32_bits(ring->wptr) & 7)) % 8);
amdgpu_ring_write(ring, SDMA_PACKET(SDMA_OPCODE_INDIRECT_BUFFER, 0, extra_bits));
amdgpu_ring_write(ring, ib->gpu_addr & 0xffffffe0); /* base must be 32 byte aligned */
@@ -434,7 +433,7 @@ static int cik_sdma_gfx_resume(struct amdgpu_device *adev)
WREG32(mmSDMA0_GFX_RB_BASE_HI + sdma_offsets[i], ring->gpu_addr >> 40);
ring->wptr = 0;
- WREG32(mmSDMA0_GFX_RB_WPTR + sdma_offsets[i], ring->wptr << 2);
+ WREG32(mmSDMA0_GFX_RB_WPTR + sdma_offsets[i], lower_32_bits(ring->wptr) << 2);
/* enable DMA RB */
WREG32(mmSDMA0_GFX_RB_CNTL + sdma_offsets[i],
@@ -750,14 +749,14 @@ static void cik_sdma_vm_write_pte(struct amdgpu_ib *ib, uint64_t pe,
*/
static void cik_sdma_vm_set_pte_pde(struct amdgpu_ib *ib, uint64_t pe,
uint64_t addr, unsigned count,
- uint32_t incr, uint32_t flags)
+ uint32_t incr, uint64_t flags)
{
/* for physically contiguous pages (vram) */
ib->ptr[ib->length_dw++] = SDMA_PACKET(SDMA_OPCODE_GENERATE_PTE_PDE, 0, 0);
ib->ptr[ib->length_dw++] = lower_32_bits(pe); /* dst addr */
ib->ptr[ib->length_dw++] = upper_32_bits(pe);
- ib->ptr[ib->length_dw++] = flags; /* mask */
- ib->ptr[ib->length_dw++] = 0;
+ ib->ptr[ib->length_dw++] = lower_32_bits(flags); /* mask */
+ ib->ptr[ib->length_dw++] = upper_32_bits(flags);
ib->ptr[ib->length_dw++] = lower_32_bits(addr); /* value */
ib->ptr[ib->length_dw++] = upper_32_bits(addr);
ib->ptr[ib->length_dw++] = incr; /* increment size */
@@ -924,17 +923,20 @@ static int cik_sdma_sw_init(void *handle)
}
/* SDMA trap event */
- r = amdgpu_irq_add_id(adev, 224, &adev->sdma.trap_irq);
+ r = amdgpu_irq_add_id(adev, AMDGPU_IH_CLIENTID_LEGACY, 224,
+ &adev->sdma.trap_irq);
if (r)
return r;
/* SDMA Privileged inst */
- r = amdgpu_irq_add_id(adev, 241, &adev->sdma.illegal_inst_irq);
+ r = amdgpu_irq_add_id(adev, AMDGPU_IH_CLIENTID_LEGACY, 241,
+ &adev->sdma.illegal_inst_irq);
if (r)
return r;
/* SDMA Privileged inst */
- r = amdgpu_irq_add_id(adev, 247, &adev->sdma.illegal_inst_irq);
+ r = amdgpu_irq_add_id(adev, AMDGPU_IH_CLIENTID_LEGACY, 247,
+ &adev->sdma.illegal_inst_irq);
if (r)
return r;
@@ -1211,6 +1213,7 @@ static const struct amdgpu_ring_funcs cik_sdma_ring_funcs = {
.type = AMDGPU_RING_TYPE_SDMA,
.align_mask = 0xf,
.nop = SDMA_PACKET(SDMA_OPCODE_NOP, 0, 0),
+ .support_64bit_ptrs = false,
.get_rptr = cik_sdma_ring_get_rptr,
.get_wptr = cik_sdma_ring_get_wptr,
.set_wptr = cik_sdma_ring_set_wptr,
diff --git a/drivers/gpu/drm/amd/amdgpu/cikd.h b/drivers/gpu/drm/amd/amdgpu/cikd.h
index 6cbd913fd12e..6a9e38a3d2a0 100644
--- a/drivers/gpu/drm/amd/amdgpu/cikd.h
+++ b/drivers/gpu/drm/amd/amdgpu/cikd.h
@@ -502,7 +502,7 @@
# define SDMA_COPY_SUB_OPCODE_T2T_SUB_WINDOW 6
#define SDMA_OPCODE_WRITE 2
# define SDMA_WRITE_SUB_OPCODE_LINEAR 0
-# define SDMA_WRTIE_SUB_OPCODE_TILED 1
+# define SDMA_WRITE_SUB_OPCODE_TILED 1
#define SDMA_OPCODE_INDIRECT_BUFFER 4
#define SDMA_OPCODE_FENCE 5
#define SDMA_OPCODE_TRAP 6
diff --git a/drivers/gpu/drm/amd/amdgpu/clearstate_gfx9.h b/drivers/gpu/drm/amd/amdgpu/clearstate_gfx9.h
new file mode 100644
index 000000000000..18fd01f3e4b2
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdgpu/clearstate_gfx9.h
@@ -0,0 +1,941 @@
+
+/*
+***************************************************************************************************
+*
+* Trade secret of Advanced Micro Devices, Inc.
+* Copyright (c) 2010 Advanced Micro Devices, Inc. (unpublished)
+*
+* All rights reserved. This notice is intended as a precaution against inadvertent publication and
+* does not imply publication or any waiver of confidentiality. The year included in the foregoing
+* notice is the year of creation of the work.
+*
+***************************************************************************************************
+*/
+/**
+***************************************************************************************************
+* @brief gfx9 Clearstate Definitions
+***************************************************************************************************
+*
+* Do not edit! This is a machine-generated file!
+*
+*/
+
+static const unsigned int gfx9_SECT_CONTEXT_def_1[] =
+{
+ 0x00000000, // DB_RENDER_CONTROL
+ 0x00000000, // DB_COUNT_CONTROL
+ 0x00000000, // DB_DEPTH_VIEW
+ 0x00000000, // DB_RENDER_OVERRIDE
+ 0x00000000, // DB_RENDER_OVERRIDE2
+ 0x00000000, // DB_HTILE_DATA_BASE
+ 0x00000000, // DB_HTILE_DATA_BASE_HI
+ 0x00000000, // DB_DEPTH_SIZE
+ 0x00000000, // DB_DEPTH_BOUNDS_MIN
+ 0x00000000, // DB_DEPTH_BOUNDS_MAX
+ 0x00000000, // DB_STENCIL_CLEAR
+ 0x00000000, // DB_DEPTH_CLEAR
+ 0x00000000, // PA_SC_SCREEN_SCISSOR_TL
+ 0x40004000, // PA_SC_SCREEN_SCISSOR_BR
+ 0x00000000, // DB_Z_INFO
+ 0x00000000, // DB_STENCIL_INFO
+ 0x00000000, // DB_Z_READ_BASE
+ 0x00000000, // DB_Z_READ_BASE_HI
+ 0x00000000, // DB_STENCIL_READ_BASE
+ 0x00000000, // DB_STENCIL_READ_BASE_HI
+ 0x00000000, // DB_Z_WRITE_BASE
+ 0x00000000, // DB_Z_WRITE_BASE_HI
+ 0x00000000, // DB_STENCIL_WRITE_BASE
+ 0x00000000, // DB_STENCIL_WRITE_BASE_HI
+ 0x00000000, // DB_DFSM_CONTROL
+ 0x00000000, // DB_RENDER_FILTER
+ 0x00000000, // DB_Z_INFO2
+ 0x00000000, // DB_STENCIL_INFO2
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0x00000000, // TA_BC_BASE_ADDR
+ 0x00000000, // TA_BC_BASE_ADDR_HI
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0x00000000, // COHER_DEST_BASE_HI_0
+ 0x00000000, // COHER_DEST_BASE_HI_1
+ 0x00000000, // COHER_DEST_BASE_HI_2
+ 0x00000000, // COHER_DEST_BASE_HI_3
+ 0x00000000, // COHER_DEST_BASE_2
+ 0x00000000, // COHER_DEST_BASE_3
+ 0x00000000, // PA_SC_WINDOW_OFFSET
+ 0x80000000, // PA_SC_WINDOW_SCISSOR_TL
+ 0x40004000, // PA_SC_WINDOW_SCISSOR_BR
+ 0x0000ffff, // PA_SC_CLIPRECT_RULE
+ 0x00000000, // PA_SC_CLIPRECT_0_TL
+ 0x40004000, // PA_SC_CLIPRECT_0_BR
+ 0x00000000, // PA_SC_CLIPRECT_1_TL
+ 0x40004000, // PA_SC_CLIPRECT_1_BR
+ 0x00000000, // PA_SC_CLIPRECT_2_TL
+ 0x40004000, // PA_SC_CLIPRECT_2_BR
+ 0x00000000, // PA_SC_CLIPRECT_3_TL
+ 0x40004000, // PA_SC_CLIPRECT_3_BR
+ 0xaa99aaaa, // PA_SC_EDGERULE
+ 0x00000000, // PA_SU_HARDWARE_SCREEN_OFFSET
+ 0xffffffff, // CB_TARGET_MASK
+ 0xffffffff, // CB_SHADER_MASK
+ 0x80000000, // PA_SC_GENERIC_SCISSOR_TL
+ 0x40004000, // PA_SC_GENERIC_SCISSOR_BR
+ 0x00000000, // COHER_DEST_BASE_0
+ 0x00000000, // COHER_DEST_BASE_1
+ 0x80000000, // PA_SC_VPORT_SCISSOR_0_TL
+ 0x40004000, // PA_SC_VPORT_SCISSOR_0_BR
+ 0x80000000, // PA_SC_VPORT_SCISSOR_1_TL
+ 0x40004000, // PA_SC_VPORT_SCISSOR_1_BR
+ 0x80000000, // PA_SC_VPORT_SCISSOR_2_TL
+ 0x40004000, // PA_SC_VPORT_SCISSOR_2_BR
+ 0x80000000, // PA_SC_VPORT_SCISSOR_3_TL
+ 0x40004000, // PA_SC_VPORT_SCISSOR_3_BR
+ 0x80000000, // PA_SC_VPORT_SCISSOR_4_TL
+ 0x40004000, // PA_SC_VPORT_SCISSOR_4_BR
+ 0x80000000, // PA_SC_VPORT_SCISSOR_5_TL
+ 0x40004000, // PA_SC_VPORT_SCISSOR_5_BR
+ 0x80000000, // PA_SC_VPORT_SCISSOR_6_TL
+ 0x40004000, // PA_SC_VPORT_SCISSOR_6_BR
+ 0x80000000, // PA_SC_VPORT_SCISSOR_7_TL
+ 0x40004000, // PA_SC_VPORT_SCISSOR_7_BR
+ 0x80000000, // PA_SC_VPORT_SCISSOR_8_TL
+ 0x40004000, // PA_SC_VPORT_SCISSOR_8_BR
+ 0x80000000, // PA_SC_VPORT_SCISSOR_9_TL
+ 0x40004000, // PA_SC_VPORT_SCISSOR_9_BR
+ 0x80000000, // PA_SC_VPORT_SCISSOR_10_TL
+ 0x40004000, // PA_SC_VPORT_SCISSOR_10_BR
+ 0x80000000, // PA_SC_VPORT_SCISSOR_11_TL
+ 0x40004000, // PA_SC_VPORT_SCISSOR_11_BR
+ 0x80000000, // PA_SC_VPORT_SCISSOR_12_TL
+ 0x40004000, // PA_SC_VPORT_SCISSOR_12_BR
+ 0x80000000, // PA_SC_VPORT_SCISSOR_13_TL
+ 0x40004000, // PA_SC_VPORT_SCISSOR_13_BR
+ 0x80000000, // PA_SC_VPORT_SCISSOR_14_TL
+ 0x40004000, // PA_SC_VPORT_SCISSOR_14_BR
+ 0x80000000, // PA_SC_VPORT_SCISSOR_15_TL
+ 0x40004000, // PA_SC_VPORT_SCISSOR_15_BR
+ 0x00000000, // PA_SC_VPORT_ZMIN_0
+ 0x3f800000, // PA_SC_VPORT_ZMAX_0
+ 0x00000000, // PA_SC_VPORT_ZMIN_1
+ 0x3f800000, // PA_SC_VPORT_ZMAX_1
+ 0x00000000, // PA_SC_VPORT_ZMIN_2
+ 0x3f800000, // PA_SC_VPORT_ZMAX_2
+ 0x00000000, // PA_SC_VPORT_ZMIN_3
+ 0x3f800000, // PA_SC_VPORT_ZMAX_3
+ 0x00000000, // PA_SC_VPORT_ZMIN_4
+ 0x3f800000, // PA_SC_VPORT_ZMAX_4
+ 0x00000000, // PA_SC_VPORT_ZMIN_5
+ 0x3f800000, // PA_SC_VPORT_ZMAX_5
+ 0x00000000, // PA_SC_VPORT_ZMIN_6
+ 0x3f800000, // PA_SC_VPORT_ZMAX_6
+ 0x00000000, // PA_SC_VPORT_ZMIN_7
+ 0x3f800000, // PA_SC_VPORT_ZMAX_7
+ 0x00000000, // PA_SC_VPORT_ZMIN_8
+ 0x3f800000, // PA_SC_VPORT_ZMAX_8
+ 0x00000000, // PA_SC_VPORT_ZMIN_9
+ 0x3f800000, // PA_SC_VPORT_ZMAX_9
+ 0x00000000, // PA_SC_VPORT_ZMIN_10
+ 0x3f800000, // PA_SC_VPORT_ZMAX_10
+ 0x00000000, // PA_SC_VPORT_ZMIN_11
+ 0x3f800000, // PA_SC_VPORT_ZMAX_11
+ 0x00000000, // PA_SC_VPORT_ZMIN_12
+ 0x3f800000, // PA_SC_VPORT_ZMAX_12
+ 0x00000000, // PA_SC_VPORT_ZMIN_13
+ 0x3f800000, // PA_SC_VPORT_ZMAX_13
+ 0x00000000, // PA_SC_VPORT_ZMIN_14
+ 0x3f800000, // PA_SC_VPORT_ZMAX_14
+ 0x00000000, // PA_SC_VPORT_ZMIN_15
+ 0x3f800000, // PA_SC_VPORT_ZMAX_15
+};
+static const unsigned int gfx9_SECT_CONTEXT_def_2[] =
+{
+ 0x00000000, // PA_SC_SCREEN_EXTENT_CONTROL
+ 0x00000000, // PA_SC_TILE_STEERING_OVERRIDE
+ 0x00000000, // CP_PERFMON_CNTX_CNTL
+ 0x00000000, // CP_RINGID
+ 0x00000000, // CP_VMID
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0x00000000, // PA_SC_RIGHT_VERT_GRID
+ 0x00000000, // PA_SC_LEFT_VERT_GRID
+ 0x00000000, // PA_SC_HORIZ_GRID
+ 0x00000000, // PA_SC_FOV_WINDOW_LR
+ 0x00000000, // PA_SC_FOV_WINDOW_TB
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0x00000000, // VGT_MULTI_PRIM_IB_RESET_INDX
+ 0, // HOLE
+ 0x00000000, // CB_BLEND_RED
+ 0x00000000, // CB_BLEND_GREEN
+ 0x00000000, // CB_BLEND_BLUE
+ 0x00000000, // CB_BLEND_ALPHA
+ 0x00000000, // CB_DCC_CONTROL
+ 0, // HOLE
+ 0x00000000, // DB_STENCIL_CONTROL
+ 0x01000000, // DB_STENCILREFMASK
+ 0x01000000, // DB_STENCILREFMASK_BF
+ 0, // HOLE
+ 0x00000000, // PA_CL_VPORT_XSCALE
+ 0x00000000, // PA_CL_VPORT_XOFFSET
+ 0x00000000, // PA_CL_VPORT_YSCALE
+ 0x00000000, // PA_CL_VPORT_YOFFSET
+ 0x00000000, // PA_CL_VPORT_ZSCALE
+ 0x00000000, // PA_CL_VPORT_ZOFFSET
+ 0x00000000, // PA_CL_VPORT_XSCALE_1
+ 0x00000000, // PA_CL_VPORT_XOFFSET_1
+ 0x00000000, // PA_CL_VPORT_YSCALE_1
+ 0x00000000, // PA_CL_VPORT_YOFFSET_1
+ 0x00000000, // PA_CL_VPORT_ZSCALE_1
+ 0x00000000, // PA_CL_VPORT_ZOFFSET_1
+ 0x00000000, // PA_CL_VPORT_XSCALE_2
+ 0x00000000, // PA_CL_VPORT_XOFFSET_2
+ 0x00000000, // PA_CL_VPORT_YSCALE_2
+ 0x00000000, // PA_CL_VPORT_YOFFSET_2
+ 0x00000000, // PA_CL_VPORT_ZSCALE_2
+ 0x00000000, // PA_CL_VPORT_ZOFFSET_2
+ 0x00000000, // PA_CL_VPORT_XSCALE_3
+ 0x00000000, // PA_CL_VPORT_XOFFSET_3
+ 0x00000000, // PA_CL_VPORT_YSCALE_3
+ 0x00000000, // PA_CL_VPORT_YOFFSET_3
+ 0x00000000, // PA_CL_VPORT_ZSCALE_3
+ 0x00000000, // PA_CL_VPORT_ZOFFSET_3
+ 0x00000000, // PA_CL_VPORT_XSCALE_4
+ 0x00000000, // PA_CL_VPORT_XOFFSET_4
+ 0x00000000, // PA_CL_VPORT_YSCALE_4
+ 0x00000000, // PA_CL_VPORT_YOFFSET_4
+ 0x00000000, // PA_CL_VPORT_ZSCALE_4
+ 0x00000000, // PA_CL_VPORT_ZOFFSET_4
+ 0x00000000, // PA_CL_VPORT_XSCALE_5
+ 0x00000000, // PA_CL_VPORT_XOFFSET_5
+ 0x00000000, // PA_CL_VPORT_YSCALE_5
+ 0x00000000, // PA_CL_VPORT_YOFFSET_5
+ 0x00000000, // PA_CL_VPORT_ZSCALE_5
+ 0x00000000, // PA_CL_VPORT_ZOFFSET_5
+ 0x00000000, // PA_CL_VPORT_XSCALE_6
+ 0x00000000, // PA_CL_VPORT_XOFFSET_6
+ 0x00000000, // PA_CL_VPORT_YSCALE_6
+ 0x00000000, // PA_CL_VPORT_YOFFSET_6
+ 0x00000000, // PA_CL_VPORT_ZSCALE_6
+ 0x00000000, // PA_CL_VPORT_ZOFFSET_6
+ 0x00000000, // PA_CL_VPORT_XSCALE_7
+ 0x00000000, // PA_CL_VPORT_XOFFSET_7
+ 0x00000000, // PA_CL_VPORT_YSCALE_7
+ 0x00000000, // PA_CL_VPORT_YOFFSET_7
+ 0x00000000, // PA_CL_VPORT_ZSCALE_7
+ 0x00000000, // PA_CL_VPORT_ZOFFSET_7
+ 0x00000000, // PA_CL_VPORT_XSCALE_8
+ 0x00000000, // PA_CL_VPORT_XOFFSET_8
+ 0x00000000, // PA_CL_VPORT_YSCALE_8
+ 0x00000000, // PA_CL_VPORT_YOFFSET_8
+ 0x00000000, // PA_CL_VPORT_ZSCALE_8
+ 0x00000000, // PA_CL_VPORT_ZOFFSET_8
+ 0x00000000, // PA_CL_VPORT_XSCALE_9
+ 0x00000000, // PA_CL_VPORT_XOFFSET_9
+ 0x00000000, // PA_CL_VPORT_YSCALE_9
+ 0x00000000, // PA_CL_VPORT_YOFFSET_9
+ 0x00000000, // PA_CL_VPORT_ZSCALE_9
+ 0x00000000, // PA_CL_VPORT_ZOFFSET_9
+ 0x00000000, // PA_CL_VPORT_XSCALE_10
+ 0x00000000, // PA_CL_VPORT_XOFFSET_10
+ 0x00000000, // PA_CL_VPORT_YSCALE_10
+ 0x00000000, // PA_CL_VPORT_YOFFSET_10
+ 0x00000000, // PA_CL_VPORT_ZSCALE_10
+ 0x00000000, // PA_CL_VPORT_ZOFFSET_10
+ 0x00000000, // PA_CL_VPORT_XSCALE_11
+ 0x00000000, // PA_CL_VPORT_XOFFSET_11
+ 0x00000000, // PA_CL_VPORT_YSCALE_11
+ 0x00000000, // PA_CL_VPORT_YOFFSET_11
+ 0x00000000, // PA_CL_VPORT_ZSCALE_11
+ 0x00000000, // PA_CL_VPORT_ZOFFSET_11
+ 0x00000000, // PA_CL_VPORT_XSCALE_12
+ 0x00000000, // PA_CL_VPORT_XOFFSET_12
+ 0x00000000, // PA_CL_VPORT_YSCALE_12
+ 0x00000000, // PA_CL_VPORT_YOFFSET_12
+ 0x00000000, // PA_CL_VPORT_ZSCALE_12
+ 0x00000000, // PA_CL_VPORT_ZOFFSET_12
+ 0x00000000, // PA_CL_VPORT_XSCALE_13
+ 0x00000000, // PA_CL_VPORT_XOFFSET_13
+ 0x00000000, // PA_CL_VPORT_YSCALE_13
+ 0x00000000, // PA_CL_VPORT_YOFFSET_13
+ 0x00000000, // PA_CL_VPORT_ZSCALE_13
+ 0x00000000, // PA_CL_VPORT_ZOFFSET_13
+ 0x00000000, // PA_CL_VPORT_XSCALE_14
+ 0x00000000, // PA_CL_VPORT_XOFFSET_14
+ 0x00000000, // PA_CL_VPORT_YSCALE_14
+ 0x00000000, // PA_CL_VPORT_YOFFSET_14
+ 0x00000000, // PA_CL_VPORT_ZSCALE_14
+ 0x00000000, // PA_CL_VPORT_ZOFFSET_14
+ 0x00000000, // PA_CL_VPORT_XSCALE_15
+ 0x00000000, // PA_CL_VPORT_XOFFSET_15
+ 0x00000000, // PA_CL_VPORT_YSCALE_15
+ 0x00000000, // PA_CL_VPORT_YOFFSET_15
+ 0x00000000, // PA_CL_VPORT_ZSCALE_15
+ 0x00000000, // PA_CL_VPORT_ZOFFSET_15
+ 0x00000000, // PA_CL_UCP_0_X
+ 0x00000000, // PA_CL_UCP_0_Y
+ 0x00000000, // PA_CL_UCP_0_Z
+ 0x00000000, // PA_CL_UCP_0_W
+ 0x00000000, // PA_CL_UCP_1_X
+ 0x00000000, // PA_CL_UCP_1_Y
+ 0x00000000, // PA_CL_UCP_1_Z
+ 0x00000000, // PA_CL_UCP_1_W
+ 0x00000000, // PA_CL_UCP_2_X
+ 0x00000000, // PA_CL_UCP_2_Y
+ 0x00000000, // PA_CL_UCP_2_Z
+ 0x00000000, // PA_CL_UCP_2_W
+ 0x00000000, // PA_CL_UCP_3_X
+ 0x00000000, // PA_CL_UCP_3_Y
+ 0x00000000, // PA_CL_UCP_3_Z
+ 0x00000000, // PA_CL_UCP_3_W
+ 0x00000000, // PA_CL_UCP_4_X
+ 0x00000000, // PA_CL_UCP_4_Y
+ 0x00000000, // PA_CL_UCP_4_Z
+ 0x00000000, // PA_CL_UCP_4_W
+ 0x00000000, // PA_CL_UCP_5_X
+ 0x00000000, // PA_CL_UCP_5_Y
+ 0x00000000, // PA_CL_UCP_5_Z
+ 0x00000000, // PA_CL_UCP_5_W
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0x00000000, // SPI_PS_INPUT_CNTL_0
+ 0x00000000, // SPI_PS_INPUT_CNTL_1
+ 0x00000000, // SPI_PS_INPUT_CNTL_2
+ 0x00000000, // SPI_PS_INPUT_CNTL_3
+ 0x00000000, // SPI_PS_INPUT_CNTL_4
+ 0x00000000, // SPI_PS_INPUT_CNTL_5
+ 0x00000000, // SPI_PS_INPUT_CNTL_6
+ 0x00000000, // SPI_PS_INPUT_CNTL_7
+ 0x00000000, // SPI_PS_INPUT_CNTL_8
+ 0x00000000, // SPI_PS_INPUT_CNTL_9
+ 0x00000000, // SPI_PS_INPUT_CNTL_10
+ 0x00000000, // SPI_PS_INPUT_CNTL_11
+ 0x00000000, // SPI_PS_INPUT_CNTL_12
+ 0x00000000, // SPI_PS_INPUT_CNTL_13
+ 0x00000000, // SPI_PS_INPUT_CNTL_14
+ 0x00000000, // SPI_PS_INPUT_CNTL_15
+ 0x00000000, // SPI_PS_INPUT_CNTL_16
+ 0x00000000, // SPI_PS_INPUT_CNTL_17
+ 0x00000000, // SPI_PS_INPUT_CNTL_18
+ 0x00000000, // SPI_PS_INPUT_CNTL_19
+ 0x00000000, // SPI_PS_INPUT_CNTL_20
+ 0x00000000, // SPI_PS_INPUT_CNTL_21
+ 0x00000000, // SPI_PS_INPUT_CNTL_22
+ 0x00000000, // SPI_PS_INPUT_CNTL_23
+ 0x00000000, // SPI_PS_INPUT_CNTL_24
+ 0x00000000, // SPI_PS_INPUT_CNTL_25
+ 0x00000000, // SPI_PS_INPUT_CNTL_26
+ 0x00000000, // SPI_PS_INPUT_CNTL_27
+ 0x00000000, // SPI_PS_INPUT_CNTL_28
+ 0x00000000, // SPI_PS_INPUT_CNTL_29
+ 0x00000000, // SPI_PS_INPUT_CNTL_30
+ 0x00000000, // SPI_PS_INPUT_CNTL_31
+ 0x00000000, // SPI_VS_OUT_CONFIG
+ 0, // HOLE
+ 0x00000000, // SPI_PS_INPUT_ENA
+ 0x00000000, // SPI_PS_INPUT_ADDR
+ 0x00000000, // SPI_INTERP_CONTROL_0
+ 0x00000002, // SPI_PS_IN_CONTROL
+ 0, // HOLE
+ 0x00000000, // SPI_BARYC_CNTL
+ 0, // HOLE
+ 0x00000000, // SPI_TMPRING_SIZE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0x00000000, // SPI_SHADER_POS_FORMAT
+ 0x00000000, // SPI_SHADER_Z_FORMAT
+ 0x00000000, // SPI_SHADER_COL_FORMAT
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0x00000000, // SX_PS_DOWNCONVERT
+ 0x00000000, // SX_BLEND_OPT_EPSILON
+ 0x00000000, // SX_BLEND_OPT_CONTROL
+ 0x00000000, // SX_MRT0_BLEND_OPT
+ 0x00000000, // SX_MRT1_BLEND_OPT
+ 0x00000000, // SX_MRT2_BLEND_OPT
+ 0x00000000, // SX_MRT3_BLEND_OPT
+ 0x00000000, // SX_MRT4_BLEND_OPT
+ 0x00000000, // SX_MRT5_BLEND_OPT
+ 0x00000000, // SX_MRT6_BLEND_OPT
+ 0x00000000, // SX_MRT7_BLEND_OPT
+ 0x00000000, // CB_BLEND0_CONTROL
+ 0x00000000, // CB_BLEND1_CONTROL
+ 0x00000000, // CB_BLEND2_CONTROL
+ 0x00000000, // CB_BLEND3_CONTROL
+ 0x00000000, // CB_BLEND4_CONTROL
+ 0x00000000, // CB_BLEND5_CONTROL
+ 0x00000000, // CB_BLEND6_CONTROL
+ 0x00000000, // CB_BLEND7_CONTROL
+ 0x00000000, // CB_MRT0_EPITCH
+ 0x00000000, // CB_MRT1_EPITCH
+ 0x00000000, // CB_MRT2_EPITCH
+ 0x00000000, // CB_MRT3_EPITCH
+ 0x00000000, // CB_MRT4_EPITCH
+ 0x00000000, // CB_MRT5_EPITCH
+ 0x00000000, // CB_MRT6_EPITCH
+ 0x00000000, // CB_MRT7_EPITCH
+};
+static const unsigned int gfx9_SECT_CONTEXT_def_3[] =
+{
+ 0x00000000, // PA_CL_POINT_X_RAD
+ 0x00000000, // PA_CL_POINT_Y_RAD
+ 0x00000000, // PA_CL_POINT_SIZE
+ 0x00000000, // PA_CL_POINT_CULL_RAD
+};
+static const unsigned int gfx9_SECT_CONTEXT_def_4[] =
+{
+ 0x00000000, // DB_DEPTH_CONTROL
+ 0x00000000, // DB_EQAA
+ 0x00000000, // CB_COLOR_CONTROL
+ 0x00000000, // DB_SHADER_CONTROL
+ 0x00090000, // PA_CL_CLIP_CNTL
+ 0x00000004, // PA_SU_SC_MODE_CNTL
+ 0x00000000, // PA_CL_VTE_CNTL
+ 0x00000000, // PA_CL_VS_OUT_CNTL
+ 0x00000000, // PA_CL_NANINF_CNTL
+ 0x00000000, // PA_SU_LINE_STIPPLE_CNTL
+ 0x00000000, // PA_SU_LINE_STIPPLE_SCALE
+ 0x00000000, // PA_SU_PRIM_FILTER_CNTL
+ 0x00000000, // PA_SU_SMALL_PRIM_FILTER_CNTL
+ 0x00000000, // PA_CL_OBJPRIM_ID_CNTL
+ 0x00000000, // PA_CL_NGG_CNTL
+ 0x00000000, // PA_SU_OVER_RASTERIZATION_CNTL
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0x00000000, // PA_SU_POINT_SIZE
+ 0x00000000, // PA_SU_POINT_MINMAX
+ 0x00000000, // PA_SU_LINE_CNTL
+ 0x00000000, // PA_SC_LINE_STIPPLE
+ 0x00000000, // VGT_OUTPUT_PATH_CNTL
+ 0x00000000, // VGT_HOS_CNTL
+ 0x00000000, // VGT_HOS_MAX_TESS_LEVEL
+ 0x00000000, // VGT_HOS_MIN_TESS_LEVEL
+ 0x00000000, // VGT_HOS_REUSE_DEPTH
+ 0x00000000, // VGT_GROUP_PRIM_TYPE
+ 0x00000000, // VGT_GROUP_FIRST_DECR
+ 0x00000000, // VGT_GROUP_DECR
+ 0x00000000, // VGT_GROUP_VECT_0_CNTL
+ 0x00000000, // VGT_GROUP_VECT_1_CNTL
+ 0x00000000, // VGT_GROUP_VECT_0_FMT_CNTL
+ 0x00000000, // VGT_GROUP_VECT_1_FMT_CNTL
+ 0x00000000, // VGT_GS_MODE
+ 0x00000000, // VGT_GS_ONCHIP_CNTL
+ 0x00000000, // PA_SC_MODE_CNTL_0
+ 0x00000000, // PA_SC_MODE_CNTL_1
+ 0x00000000, // VGT_ENHANCE
+ 0x00000100, // VGT_GS_PER_ES
+ 0x00000080, // VGT_ES_PER_GS
+ 0x00000002, // VGT_GS_PER_VS
+ 0x00000000, // VGT_GSVS_RING_OFFSET_1
+ 0x00000000, // VGT_GSVS_RING_OFFSET_2
+ 0x00000000, // VGT_GSVS_RING_OFFSET_3
+ 0x00000000, // VGT_GS_OUT_PRIM_TYPE
+ 0x00000000, // IA_ENHANCE
+};
+static const unsigned int gfx9_SECT_CONTEXT_def_5[] =
+{
+ 0x00000000, // WD_ENHANCE
+ 0x00000000, // VGT_PRIMITIVEID_EN
+};
+static const unsigned int gfx9_SECT_CONTEXT_def_6[] =
+{
+ 0x00000000, // VGT_PRIMITIVEID_RESET
+};
+static const unsigned int gfx9_SECT_CONTEXT_def_7[] =
+{
+ 0x00000000, // VGT_GS_MAX_PRIMS_PER_SUBGROUP
+ 0x00000000, // VGT_DRAW_PAYLOAD_CNTL
+ 0x00000000, // VGT_INDEX_PAYLOAD_CNTL
+ 0x00000000, // VGT_INSTANCE_STEP_RATE_0
+ 0x00000000, // VGT_INSTANCE_STEP_RATE_1
+ 0, // HOLE
+ 0x00000000, // VGT_ESGS_RING_ITEMSIZE
+ 0x00000000, // VGT_GSVS_RING_ITEMSIZE
+ 0x00000000, // VGT_REUSE_OFF
+ 0x00000000, // VGT_VTX_CNT_EN
+ 0x00000000, // DB_HTILE_SURFACE
+ 0x00000000, // DB_SRESULTS_COMPARE_STATE0
+ 0x00000000, // DB_SRESULTS_COMPARE_STATE1
+ 0x00000000, // DB_PRELOAD_CONTROL
+ 0, // HOLE
+ 0x00000000, // VGT_STRMOUT_BUFFER_SIZE_0
+ 0x00000000, // VGT_STRMOUT_VTX_STRIDE_0
+ 0, // HOLE
+ 0x00000000, // VGT_STRMOUT_BUFFER_OFFSET_0
+ 0x00000000, // VGT_STRMOUT_BUFFER_SIZE_1
+ 0x00000000, // VGT_STRMOUT_VTX_STRIDE_1
+ 0, // HOLE
+ 0x00000000, // VGT_STRMOUT_BUFFER_OFFSET_1
+ 0x00000000, // VGT_STRMOUT_BUFFER_SIZE_2
+ 0x00000000, // VGT_STRMOUT_VTX_STRIDE_2
+ 0, // HOLE
+ 0x00000000, // VGT_STRMOUT_BUFFER_OFFSET_2
+ 0x00000000, // VGT_STRMOUT_BUFFER_SIZE_3
+ 0x00000000, // VGT_STRMOUT_VTX_STRIDE_3
+ 0, // HOLE
+ 0x00000000, // VGT_STRMOUT_BUFFER_OFFSET_3
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0x00000000, // VGT_STRMOUT_DRAW_OPAQUE_OFFSET
+ 0x00000000, // VGT_STRMOUT_DRAW_OPAQUE_BUFFER_FILLED_SIZE
+ 0x00000000, // VGT_STRMOUT_DRAW_OPAQUE_VERTEX_STRIDE
+ 0, // HOLE
+ 0x00000000, // VGT_GS_MAX_VERT_OUT
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0x00000000, // VGT_TESS_DISTRIBUTION
+ 0x00000000, // VGT_SHADER_STAGES_EN
+ 0x00000000, // VGT_LS_HS_CONFIG
+ 0x00000000, // VGT_GS_VERT_ITEMSIZE
+ 0x00000000, // VGT_GS_VERT_ITEMSIZE_1
+ 0x00000000, // VGT_GS_VERT_ITEMSIZE_2
+ 0x00000000, // VGT_GS_VERT_ITEMSIZE_3
+ 0x00000000, // VGT_TF_PARAM
+ 0x00000000, // DB_ALPHA_TO_MASK
+ 0x00000000, // VGT_DISPATCH_DRAW_INDEX
+ 0x00000000, // PA_SU_POLY_OFFSET_DB_FMT_CNTL
+ 0x00000000, // PA_SU_POLY_OFFSET_CLAMP
+ 0x00000000, // PA_SU_POLY_OFFSET_FRONT_SCALE
+ 0x00000000, // PA_SU_POLY_OFFSET_FRONT_OFFSET
+ 0x00000000, // PA_SU_POLY_OFFSET_BACK_SCALE
+ 0x00000000, // PA_SU_POLY_OFFSET_BACK_OFFSET
+ 0x00000000, // VGT_GS_INSTANCE_CNT
+ 0x00000000, // VGT_STRMOUT_CONFIG
+ 0x00000000, // VGT_STRMOUT_BUFFER_CONFIG
+};
+static const unsigned int gfx9_SECT_CONTEXT_def_8[] =
+{
+ 0x00000000, // PA_SC_CENTROID_PRIORITY_0
+ 0x00000000, // PA_SC_CENTROID_PRIORITY_1
+ 0x00001000, // PA_SC_LINE_CNTL
+ 0x00000000, // PA_SC_AA_CONFIG
+ 0x00000005, // PA_SU_VTX_CNTL
+ 0x3f800000, // PA_CL_GB_VERT_CLIP_ADJ
+ 0x3f800000, // PA_CL_GB_VERT_DISC_ADJ
+ 0x3f800000, // PA_CL_GB_HORZ_CLIP_ADJ
+ 0x3f800000, // PA_CL_GB_HORZ_DISC_ADJ
+ 0x00000000, // PA_SC_AA_SAMPLE_LOCS_PIXEL_X0Y0_0
+ 0x00000000, // PA_SC_AA_SAMPLE_LOCS_PIXEL_X0Y0_1
+ 0x00000000, // PA_SC_AA_SAMPLE_LOCS_PIXEL_X0Y0_2
+ 0x00000000, // PA_SC_AA_SAMPLE_LOCS_PIXEL_X0Y0_3
+ 0x00000000, // PA_SC_AA_SAMPLE_LOCS_PIXEL_X1Y0_0
+ 0x00000000, // PA_SC_AA_SAMPLE_LOCS_PIXEL_X1Y0_1
+ 0x00000000, // PA_SC_AA_SAMPLE_LOCS_PIXEL_X1Y0_2
+ 0x00000000, // PA_SC_AA_SAMPLE_LOCS_PIXEL_X1Y0_3
+ 0x00000000, // PA_SC_AA_SAMPLE_LOCS_PIXEL_X0Y1_0
+ 0x00000000, // PA_SC_AA_SAMPLE_LOCS_PIXEL_X0Y1_1
+ 0x00000000, // PA_SC_AA_SAMPLE_LOCS_PIXEL_X0Y1_2
+ 0x00000000, // PA_SC_AA_SAMPLE_LOCS_PIXEL_X0Y1_3
+ 0x00000000, // PA_SC_AA_SAMPLE_LOCS_PIXEL_X1Y1_0
+ 0x00000000, // PA_SC_AA_SAMPLE_LOCS_PIXEL_X1Y1_1
+ 0x00000000, // PA_SC_AA_SAMPLE_LOCS_PIXEL_X1Y1_2
+ 0x00000000, // PA_SC_AA_SAMPLE_LOCS_PIXEL_X1Y1_3
+ 0xffffffff, // PA_SC_AA_MASK_X0Y0_X1Y0
+ 0xffffffff, // PA_SC_AA_MASK_X0Y1_X1Y1
+ 0x00000000, // PA_SC_SHADER_CONTROL
+ 0x00000003, // PA_SC_BINNER_CNTL_0
+ 0x00000000, // PA_SC_BINNER_CNTL_1
+ 0x00000000, // PA_SC_CONSERVATIVE_RASTERIZATION_CNTL
+ 0x00000000, // PA_SC_NGG_MODE_CNTL
+ 0, // HOLE
+ 0x0000001e, // VGT_VERTEX_REUSE_BLOCK_CNTL
+ 0x00000020, // VGT_OUT_DEALLOC_CNTL
+ 0x00000000, // CB_COLOR0_BASE
+ 0x00000000, // CB_COLOR0_BASE_EXT
+ 0x00000000, // CB_COLOR0_ATTRIB2
+ 0x00000000, // CB_COLOR0_VIEW
+ 0x00000000, // CB_COLOR0_INFO
+ 0x00000000, // CB_COLOR0_ATTRIB
+ 0x00000000, // CB_COLOR0_DCC_CONTROL
+ 0x00000000, // CB_COLOR0_CMASK
+ 0x00000000, // CB_COLOR0_CMASK_BASE_EXT
+ 0x00000000, // CB_COLOR0_FMASK
+ 0x00000000, // CB_COLOR0_FMASK_BASE_EXT
+ 0x00000000, // CB_COLOR0_CLEAR_WORD0
+ 0x00000000, // CB_COLOR0_CLEAR_WORD1
+ 0x00000000, // CB_COLOR0_DCC_BASE
+ 0x00000000, // CB_COLOR0_DCC_BASE_EXT
+ 0x00000000, // CB_COLOR1_BASE
+ 0x00000000, // CB_COLOR1_BASE_EXT
+ 0x00000000, // CB_COLOR1_ATTRIB2
+ 0x00000000, // CB_COLOR1_VIEW
+ 0x00000000, // CB_COLOR1_INFO
+ 0x00000000, // CB_COLOR1_ATTRIB
+ 0x00000000, // CB_COLOR1_DCC_CONTROL
+ 0x00000000, // CB_COLOR1_CMASK
+ 0x00000000, // CB_COLOR1_CMASK_BASE_EXT
+ 0x00000000, // CB_COLOR1_FMASK
+ 0x00000000, // CB_COLOR1_FMASK_BASE_EXT
+ 0x00000000, // CB_COLOR1_CLEAR_WORD0
+ 0x00000000, // CB_COLOR1_CLEAR_WORD1
+ 0x00000000, // CB_COLOR1_DCC_BASE
+ 0x00000000, // CB_COLOR1_DCC_BASE_EXT
+ 0x00000000, // CB_COLOR2_BASE
+ 0x00000000, // CB_COLOR2_BASE_EXT
+ 0x00000000, // CB_COLOR2_ATTRIB2
+ 0x00000000, // CB_COLOR2_VIEW
+ 0x00000000, // CB_COLOR2_INFO
+ 0x00000000, // CB_COLOR2_ATTRIB
+ 0x00000000, // CB_COLOR2_DCC_CONTROL
+ 0x00000000, // CB_COLOR2_CMASK
+ 0x00000000, // CB_COLOR2_CMASK_BASE_EXT
+ 0x00000000, // CB_COLOR2_FMASK
+ 0x00000000, // CB_COLOR2_FMASK_BASE_EXT
+ 0x00000000, // CB_COLOR2_CLEAR_WORD0
+ 0x00000000, // CB_COLOR2_CLEAR_WORD1
+ 0x00000000, // CB_COLOR2_DCC_BASE
+ 0x00000000, // CB_COLOR2_DCC_BASE_EXT
+ 0x00000000, // CB_COLOR3_BASE
+ 0x00000000, // CB_COLOR3_BASE_EXT
+ 0x00000000, // CB_COLOR3_ATTRIB2
+ 0x00000000, // CB_COLOR3_VIEW
+ 0x00000000, // CB_COLOR3_INFO
+ 0x00000000, // CB_COLOR3_ATTRIB
+ 0x00000000, // CB_COLOR3_DCC_CONTROL
+ 0x00000000, // CB_COLOR3_CMASK
+ 0x00000000, // CB_COLOR3_CMASK_BASE_EXT
+ 0x00000000, // CB_COLOR3_FMASK
+ 0x00000000, // CB_COLOR3_FMASK_BASE_EXT
+ 0x00000000, // CB_COLOR3_CLEAR_WORD0
+ 0x00000000, // CB_COLOR3_CLEAR_WORD1
+ 0x00000000, // CB_COLOR3_DCC_BASE
+ 0x00000000, // CB_COLOR3_DCC_BASE_EXT
+ 0x00000000, // CB_COLOR4_BASE
+ 0x00000000, // CB_COLOR4_BASE_EXT
+ 0x00000000, // CB_COLOR4_ATTRIB2
+ 0x00000000, // CB_COLOR4_VIEW
+ 0x00000000, // CB_COLOR4_INFO
+ 0x00000000, // CB_COLOR4_ATTRIB
+ 0x00000000, // CB_COLOR4_DCC_CONTROL
+ 0x00000000, // CB_COLOR4_CMASK
+ 0x00000000, // CB_COLOR4_CMASK_BASE_EXT
+ 0x00000000, // CB_COLOR4_FMASK
+ 0x00000000, // CB_COLOR4_FMASK_BASE_EXT
+ 0x00000000, // CB_COLOR4_CLEAR_WORD0
+ 0x00000000, // CB_COLOR4_CLEAR_WORD1
+ 0x00000000, // CB_COLOR4_DCC_BASE
+ 0x00000000, // CB_COLOR4_DCC_BASE_EXT
+ 0x00000000, // CB_COLOR5_BASE
+ 0x00000000, // CB_COLOR5_BASE_EXT
+ 0x00000000, // CB_COLOR5_ATTRIB2
+ 0x00000000, // CB_COLOR5_VIEW
+ 0x00000000, // CB_COLOR5_INFO
+ 0x00000000, // CB_COLOR5_ATTRIB
+ 0x00000000, // CB_COLOR5_DCC_CONTROL
+ 0x00000000, // CB_COLOR5_CMASK
+ 0x00000000, // CB_COLOR5_CMASK_BASE_EXT
+ 0x00000000, // CB_COLOR5_FMASK
+ 0x00000000, // CB_COLOR5_FMASK_BASE_EXT
+ 0x00000000, // CB_COLOR5_CLEAR_WORD0
+ 0x00000000, // CB_COLOR5_CLEAR_WORD1
+ 0x00000000, // CB_COLOR5_DCC_BASE
+ 0x00000000, // CB_COLOR5_DCC_BASE_EXT
+ 0x00000000, // CB_COLOR6_BASE
+ 0x00000000, // CB_COLOR6_BASE_EXT
+ 0x00000000, // CB_COLOR6_ATTRIB2
+ 0x00000000, // CB_COLOR6_VIEW
+ 0x00000000, // CB_COLOR6_INFO
+ 0x00000000, // CB_COLOR6_ATTRIB
+ 0x00000000, // CB_COLOR6_DCC_CONTROL
+ 0x00000000, // CB_COLOR6_CMASK
+ 0x00000000, // CB_COLOR6_CMASK_BASE_EXT
+ 0x00000000, // CB_COLOR6_FMASK
+ 0x00000000, // CB_COLOR6_FMASK_BASE_EXT
+ 0x00000000, // CB_COLOR6_CLEAR_WORD0
+ 0x00000000, // CB_COLOR6_CLEAR_WORD1
+ 0x00000000, // CB_COLOR6_DCC_BASE
+ 0x00000000, // CB_COLOR6_DCC_BASE_EXT
+ 0x00000000, // CB_COLOR7_BASE
+ 0x00000000, // CB_COLOR7_BASE_EXT
+ 0x00000000, // CB_COLOR7_ATTRIB2
+ 0x00000000, // CB_COLOR7_VIEW
+ 0x00000000, // CB_COLOR7_INFO
+ 0x00000000, // CB_COLOR7_ATTRIB
+ 0x00000000, // CB_COLOR7_DCC_CONTROL
+ 0x00000000, // CB_COLOR7_CMASK
+ 0x00000000, // CB_COLOR7_CMASK_BASE_EXT
+ 0x00000000, // CB_COLOR7_FMASK
+ 0x00000000, // CB_COLOR7_FMASK_BASE_EXT
+ 0x00000000, // CB_COLOR7_CLEAR_WORD0
+ 0x00000000, // CB_COLOR7_CLEAR_WORD1
+ 0x00000000, // CB_COLOR7_DCC_BASE
+ 0x00000000, // CB_COLOR7_DCC_BASE_EXT
+};
+static const struct cs_extent_def gfx9_SECT_CONTEXT_defs[] =
+{
+ {gfx9_SECT_CONTEXT_def_1, 0x0000a000, 212 },
+ {gfx9_SECT_CONTEXT_def_2, 0x0000a0d6, 282 },
+ {gfx9_SECT_CONTEXT_def_3, 0x0000a1f5, 4 },
+ {gfx9_SECT_CONTEXT_def_4, 0x0000a200, 157 },
+ {gfx9_SECT_CONTEXT_def_5, 0x0000a2a0, 2 },
+ {gfx9_SECT_CONTEXT_def_6, 0x0000a2a3, 1 },
+ {gfx9_SECT_CONTEXT_def_7, 0x0000a2a5, 66 },
+ {gfx9_SECT_CONTEXT_def_8, 0x0000a2f5, 155 },
+ { 0, 0, 0 }
+};
+static const struct cs_section_def gfx9_cs_data[] = {
+ { gfx9_SECT_CONTEXT_defs, SECT_CONTEXT },
+ { 0, SECT_NONE }
+};
diff --git a/drivers/gpu/drm/amd/amdgpu/cz_ih.c b/drivers/gpu/drm/amd/amdgpu/cz_ih.c
index fe7cbb24da7b..a5f294ebff5c 100644
--- a/drivers/gpu/drm/amd/amdgpu/cz_ih.c
+++ b/drivers/gpu/drm/amd/amdgpu/cz_ih.c
@@ -227,8 +227,9 @@ static void cz_ih_decode_iv(struct amdgpu_device *adev,
dw[2] = le32_to_cpu(adev->irq.ih.ring[ring_index + 2]);
dw[3] = le32_to_cpu(adev->irq.ih.ring[ring_index + 3]);
+ entry->client_id = AMDGPU_IH_CLIENTID_LEGACY;
entry->src_id = dw[0] & 0xff;
- entry->src_data = dw[1] & 0xfffffff;
+ entry->src_data[0] = dw[1] & 0xfffffff;
entry->ring_id = dw[2] & 0xff;
entry->vm_id = (dw[2] >> 8) & 0xff;
entry->pas_id = (dw[2] >> 16) & 0xffff;
diff --git a/drivers/gpu/drm/amd/amdgpu/dce_v10_0.c b/drivers/gpu/drm/amd/amdgpu/dce_v10_0.c
index d4452d8f76ca..f525ae4e0576 100644
--- a/drivers/gpu/drm/amd/amdgpu/dce_v10_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/dce_v10_0.c
@@ -2947,19 +2947,19 @@ static int dce_v10_0_sw_init(void *handle)
struct amdgpu_device *adev = (struct amdgpu_device *)handle;
for (i = 0; i < adev->mode_info.num_crtc; i++) {
- r = amdgpu_irq_add_id(adev, i + 1, &adev->crtc_irq);
+ r = amdgpu_irq_add_id(adev, AMDGPU_IH_CLIENTID_LEGACY, i + 1, &adev->crtc_irq);
if (r)
return r;
}
for (i = 8; i < 20; i += 2) {
- r = amdgpu_irq_add_id(adev, i, &adev->pageflip_irq);
+ r = amdgpu_irq_add_id(adev, AMDGPU_IH_CLIENTID_LEGACY, i, &adev->pageflip_irq);
if (r)
return r;
}
/* HPD hotplug */
- r = amdgpu_irq_add_id(adev, 42, &adev->hpd_irq);
+ r = amdgpu_irq_add_id(adev, AMDGPU_IH_CLIENTID_LEGACY, 42, &adev->hpd_irq);
if (r)
return r;
@@ -3398,7 +3398,7 @@ static int dce_v10_0_crtc_irq(struct amdgpu_device *adev,
uint32_t disp_int = RREG32(interrupt_status_offsets[crtc].reg);
unsigned irq_type = amdgpu_crtc_idx_to_irq_type(adev, crtc);
- switch (entry->src_data) {
+ switch (entry->src_data[0]) {
case 0: /* vblank */
if (disp_int & interrupt_status_offsets[crtc].vblank)
dce_v10_0_crtc_vblank_int_ack(adev, crtc);
@@ -3421,7 +3421,7 @@ static int dce_v10_0_crtc_irq(struct amdgpu_device *adev,
break;
default:
- DRM_DEBUG("Unhandled interrupt: %d %d\n", entry->src_id, entry->src_data);
+ DRM_DEBUG("Unhandled interrupt: %d %d\n", entry->src_id, entry->src_data[0]);
break;
}
@@ -3435,12 +3435,12 @@ static int dce_v10_0_hpd_irq(struct amdgpu_device *adev,
uint32_t disp_int, mask;
unsigned hpd;
- if (entry->src_data >= adev->mode_info.num_hpd) {
- DRM_DEBUG("Unhandled interrupt: %d %d\n", entry->src_id, entry->src_data);
+ if (entry->src_data[0] >= adev->mode_info.num_hpd) {
+ DRM_DEBUG("Unhandled interrupt: %d %d\n", entry->src_id, entry->src_data[0]);
return 0;
}
- hpd = entry->src_data;
+ hpd = entry->src_data[0];
disp_int = RREG32(interrupt_status_offsets[hpd].reg);
mask = interrupt_status_offsets[hpd].hpd;
diff --git a/drivers/gpu/drm/amd/amdgpu/dce_v11_0.c b/drivers/gpu/drm/amd/amdgpu/dce_v11_0.c
index 5b24e89552ec..3eac27f24d94 100644
--- a/drivers/gpu/drm/amd/amdgpu/dce_v11_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/dce_v11_0.c
@@ -3007,19 +3007,19 @@ static int dce_v11_0_sw_init(void *handle)
struct amdgpu_device *adev = (struct amdgpu_device *)handle;
for (i = 0; i < adev->mode_info.num_crtc; i++) {
- r = amdgpu_irq_add_id(adev, i + 1, &adev->crtc_irq);
+ r = amdgpu_irq_add_id(adev, AMDGPU_IH_CLIENTID_LEGACY, i + 1, &adev->crtc_irq);
if (r)
return r;
}
for (i = 8; i < 20; i += 2) {
- r = amdgpu_irq_add_id(adev, i, &adev->pageflip_irq);
+ r = amdgpu_irq_add_id(adev, AMDGPU_IH_CLIENTID_LEGACY, i, &adev->pageflip_irq);
if (r)
return r;
}
/* HPD hotplug */
- r = amdgpu_irq_add_id(adev, 42, &adev->hpd_irq);
+ r = amdgpu_irq_add_id(adev, AMDGPU_IH_CLIENTID_LEGACY, 42, &adev->hpd_irq);
if (r)
return r;
@@ -3462,7 +3462,7 @@ static int dce_v11_0_crtc_irq(struct amdgpu_device *adev,
uint32_t disp_int = RREG32(interrupt_status_offsets[crtc].reg);
unsigned irq_type = amdgpu_crtc_idx_to_irq_type(adev, crtc);
- switch (entry->src_data) {
+ switch (entry->src_data[0]) {
case 0: /* vblank */
if (disp_int & interrupt_status_offsets[crtc].vblank)
dce_v11_0_crtc_vblank_int_ack(adev, crtc);
@@ -3485,7 +3485,7 @@ static int dce_v11_0_crtc_irq(struct amdgpu_device *adev,
break;
default:
- DRM_DEBUG("Unhandled interrupt: %d %d\n", entry->src_id, entry->src_data);
+ DRM_DEBUG("Unhandled interrupt: %d %d\n", entry->src_id, entry->src_data[0]);
break;
}
@@ -3499,12 +3499,12 @@ static int dce_v11_0_hpd_irq(struct amdgpu_device *adev,
uint32_t disp_int, mask;
unsigned hpd;
- if (entry->src_data >= adev->mode_info.num_hpd) {
- DRM_DEBUG("Unhandled interrupt: %d %d\n", entry->src_id, entry->src_data);
+ if (entry->src_data[0] >= adev->mode_info.num_hpd) {
+ DRM_DEBUG("Unhandled interrupt: %d %d\n", entry->src_id, entry->src_data[0]);
return 0;
}
- hpd = entry->src_data;
+ hpd = entry->src_data[0];
disp_int = RREG32(interrupt_status_offsets[hpd].reg);
mask = interrupt_status_offsets[hpd].hpd;
diff --git a/drivers/gpu/drm/amd/amdgpu/dce_v6_0.c b/drivers/gpu/drm/amd/amdgpu/dce_v6_0.c
index 809aa94a0cc1..838cf1a778f2 100644
--- a/drivers/gpu/drm/amd/amdgpu/dce_v6_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/dce_v6_0.c
@@ -2295,19 +2295,19 @@ static int dce_v6_0_sw_init(void *handle)
struct amdgpu_device *adev = (struct amdgpu_device *)handle;
for (i = 0; i < adev->mode_info.num_crtc; i++) {
- r = amdgpu_irq_add_id(adev, i + 1, &adev->crtc_irq);
+ r = amdgpu_irq_add_id(adev, AMDGPU_IH_CLIENTID_LEGACY, i + 1, &adev->crtc_irq);
if (r)
return r;
}
for (i = 8; i < 20; i += 2) {
- r = amdgpu_irq_add_id(adev, i, &adev->pageflip_irq);
+ r = amdgpu_irq_add_id(adev, AMDGPU_IH_CLIENTID_LEGACY, i, &adev->pageflip_irq);
if (r)
return r;
}
/* HPD hotplug */
- r = amdgpu_irq_add_id(adev, 42, &adev->hpd_irq);
+ r = amdgpu_irq_add_id(adev, AMDGPU_IH_CLIENTID_LEGACY, 42, &adev->hpd_irq);
if (r)
return r;
@@ -2592,7 +2592,7 @@ static int dce_v6_0_crtc_irq(struct amdgpu_device *adev,
uint32_t disp_int = RREG32(interrupt_status_offsets[crtc].reg);
unsigned irq_type = amdgpu_crtc_idx_to_irq_type(adev, crtc);
- switch (entry->src_data) {
+ switch (entry->src_data[0]) {
case 0: /* vblank */
if (disp_int & interrupt_status_offsets[crtc].vblank)
WREG32(mmVBLANK_STATUS + crtc_offsets[crtc], VBLANK_ACK);
@@ -2613,7 +2613,7 @@ static int dce_v6_0_crtc_irq(struct amdgpu_device *adev,
DRM_DEBUG("IH: D%d vline\n", crtc + 1);
break;
default:
- DRM_DEBUG("Unhandled interrupt: %d %d\n", entry->src_id, entry->src_data);
+ DRM_DEBUG("Unhandled interrupt: %d %d\n", entry->src_id, entry->src_data[0]);
break;
}
@@ -2703,12 +2703,12 @@ static int dce_v6_0_hpd_irq(struct amdgpu_device *adev,
uint32_t disp_int, mask, tmp;
unsigned hpd;
- if (entry->src_data >= adev->mode_info.num_hpd) {
- DRM_DEBUG("Unhandled interrupt: %d %d\n", entry->src_id, entry->src_data);
+ if (entry->src_data[0] >= adev->mode_info.num_hpd) {
+ DRM_DEBUG("Unhandled interrupt: %d %d\n", entry->src_id, entry->src_data[0]);
return 0;
}
- hpd = entry->src_data;
+ hpd = entry->src_data[0];
disp_int = RREG32(interrupt_status_offsets[hpd].reg);
mask = interrupt_status_offsets[hpd].hpd;
diff --git a/drivers/gpu/drm/amd/amdgpu/dce_v8_0.c b/drivers/gpu/drm/amd/amdgpu/dce_v8_0.c
index d2590d75aa11..1b0717b11efe 100644
--- a/drivers/gpu/drm/amd/amdgpu/dce_v8_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/dce_v8_0.c
@@ -2794,19 +2794,19 @@ static int dce_v8_0_sw_init(void *handle)
struct amdgpu_device *adev = (struct amdgpu_device *)handle;
for (i = 0; i < adev->mode_info.num_crtc; i++) {
- r = amdgpu_irq_add_id(adev, i + 1, &adev->crtc_irq);
+ r = amdgpu_irq_add_id(adev, AMDGPU_IH_CLIENTID_LEGACY, i + 1, &adev->crtc_irq);
if (r)
return r;
}
for (i = 8; i < 20; i += 2) {
- r = amdgpu_irq_add_id(adev, i, &adev->pageflip_irq);
+ r = amdgpu_irq_add_id(adev, AMDGPU_IH_CLIENTID_LEGACY, i, &adev->pageflip_irq);
if (r)
return r;
}
/* HPD hotplug */
- r = amdgpu_irq_add_id(adev, 42, &adev->hpd_irq);
+ r = amdgpu_irq_add_id(adev, AMDGPU_IH_CLIENTID_LEGACY, 42, &adev->hpd_irq);
if (r)
return r;
@@ -3159,7 +3159,7 @@ static int dce_v8_0_crtc_irq(struct amdgpu_device *adev,
uint32_t disp_int = RREG32(interrupt_status_offsets[crtc].reg);
unsigned irq_type = amdgpu_crtc_idx_to_irq_type(adev, crtc);
- switch (entry->src_data) {
+ switch (entry->src_data[0]) {
case 0: /* vblank */
if (disp_int & interrupt_status_offsets[crtc].vblank)
WREG32(mmLB_VBLANK_STATUS + crtc_offsets[crtc], LB_VBLANK_STATUS__VBLANK_ACK_MASK);
@@ -3180,7 +3180,7 @@ static int dce_v8_0_crtc_irq(struct amdgpu_device *adev,
DRM_DEBUG("IH: D%d vline\n", crtc + 1);
break;
default:
- DRM_DEBUG("Unhandled interrupt: %d %d\n", entry->src_id, entry->src_data);
+ DRM_DEBUG("Unhandled interrupt: %d %d\n", entry->src_id, entry->src_data[0]);
break;
}
@@ -3270,12 +3270,12 @@ static int dce_v8_0_hpd_irq(struct amdgpu_device *adev,
uint32_t disp_int, mask, tmp;
unsigned hpd;
- if (entry->src_data >= adev->mode_info.num_hpd) {
- DRM_DEBUG("Unhandled interrupt: %d %d\n", entry->src_id, entry->src_data);
+ if (entry->src_data[0] >= adev->mode_info.num_hpd) {
+ DRM_DEBUG("Unhandled interrupt: %d %d\n", entry->src_id, entry->src_data[0]);
return 0;
}
- hpd = entry->src_data;
+ hpd = entry->src_data[0];
disp_int = RREG32(interrupt_status_offsets[hpd].reg);
mask = interrupt_status_offsets[hpd].hpd;
diff --git a/drivers/gpu/drm/amd/amdgpu/dce_virtual.c b/drivers/gpu/drm/amd/amdgpu/dce_virtual.c
index e9a176891e13..5c51f9a97811 100644
--- a/drivers/gpu/drm/amd/amdgpu/dce_virtual.c
+++ b/drivers/gpu/drm/amd/amdgpu/dce_virtual.c
@@ -122,8 +122,9 @@ static void dce_virtual_stop_mc_access(struct amdgpu_device *adev,
break;
case CHIP_CARRIZO:
case CHIP_STONEY:
- case CHIP_POLARIS11:
case CHIP_POLARIS10:
+ case CHIP_POLARIS11:
+ case CHIP_POLARIS12:
dce_v11_0_disable_dce(adev);
break;
case CHIP_TOPAZ:
@@ -203,6 +204,9 @@ static void dce_virtual_crtc_dpms(struct drm_crtc *crtc, int mode)
struct amdgpu_crtc *amdgpu_crtc = to_amdgpu_crtc(crtc);
unsigned type;
+ if (amdgpu_sriov_vf(adev))
+ return;
+
switch (mode) {
case DRM_MODE_DPMS_ON:
amdgpu_crtc->enabled = true;
@@ -463,7 +467,7 @@ static int dce_virtual_sw_init(void *handle)
int r, i;
struct amdgpu_device *adev = (struct amdgpu_device *)handle;
- r = amdgpu_irq_add_id(adev, 229, &adev->crtc_irq);
+ r = amdgpu_irq_add_id(adev, AMDGPU_IH_CLIENTID_LEGACY, 229, &adev->crtc_irq);
if (r)
return r;
diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v6_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v6_0.c
index 2086e7e68de4..4c4874fdf59f 100644
--- a/drivers/gpu/drm/amd/amdgpu/gfx_v6_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gfx_v6_0.c
@@ -378,9 +378,7 @@ static int gfx_v6_0_init_microcode(struct amdgpu_device *adev)
out:
if (err) {
- printk(KERN_ERR
- "gfx6: Failed to load firmware \"%s\"\n",
- fw_name);
+ pr_err("gfx6: Failed to load firmware \"%s\"\n", fw_name);
release_firmware(adev->gfx.pfp_fw);
adev->gfx.pfp_fw = NULL;
release_firmware(adev->gfx.me_fw);
@@ -1579,6 +1577,11 @@ static void gfx_v6_0_setup_spi(struct amdgpu_device *adev)
mutex_unlock(&adev->grbm_idx_mutex);
}
+static void gfx_v6_0_config_init(struct amdgpu_device *adev)
+{
+ adev->gfx.config.double_offchip_lds_buf = 1;
+}
+
static void gfx_v6_0_gpu_init(struct amdgpu_device *adev)
{
u32 gb_addr_config = 0;
@@ -1736,6 +1739,7 @@ static void gfx_v6_0_gpu_init(struct amdgpu_device *adev)
gfx_v6_0_setup_spi(adev);
gfx_v6_0_get_cu_info(adev);
+ gfx_v6_0_config_init(adev);
WREG32(mmCP_QUEUE_THRESHOLDS, ((0x16 << CP_QUEUE_THRESHOLDS__ROQ_IB1_START__SHIFT) |
(0x2b << CP_QUEUE_THRESHOLDS__ROQ_IB2_START__SHIFT)));
@@ -2188,12 +2192,12 @@ static int gfx_v6_0_cp_gfx_resume(struct amdgpu_device *adev)
return 0;
}
-static u32 gfx_v6_0_ring_get_rptr(struct amdgpu_ring *ring)
+static u64 gfx_v6_0_ring_get_rptr(struct amdgpu_ring *ring)
{
return ring->adev->wb.wb[ring->rptr_offs];
}
-static u32 gfx_v6_0_ring_get_wptr(struct amdgpu_ring *ring)
+static u64 gfx_v6_0_ring_get_wptr(struct amdgpu_ring *ring)
{
struct amdgpu_device *adev = ring->adev;
@@ -2211,7 +2215,7 @@ static void gfx_v6_0_ring_set_wptr_gfx(struct amdgpu_ring *ring)
{
struct amdgpu_device *adev = ring->adev;
- WREG32(mmCP_RB0_WPTR, ring->wptr);
+ WREG32(mmCP_RB0_WPTR, lower_32_bits(ring->wptr));
(void)RREG32(mmCP_RB0_WPTR);
}
@@ -2220,10 +2224,10 @@ static void gfx_v6_0_ring_set_wptr_compute(struct amdgpu_ring *ring)
struct amdgpu_device *adev = ring->adev;
if (ring == &adev->gfx.compute_ring[0]) {
- WREG32(mmCP_RB1_WPTR, ring->wptr);
+ WREG32(mmCP_RB1_WPTR, lower_32_bits(ring->wptr));
(void)RREG32(mmCP_RB1_WPTR);
} else if (ring == &adev->gfx.compute_ring[1]) {
- WREG32(mmCP_RB2_WPTR, ring->wptr);
+ WREG32(mmCP_RB2_WPTR, lower_32_bits(ring->wptr));
(void)RREG32(mmCP_RB2_WPTR);
} else {
BUG();
@@ -3238,15 +3242,15 @@ static int gfx_v6_0_sw_init(void *handle)
struct amdgpu_device *adev = (struct amdgpu_device *)handle;
int i, r;
- r = amdgpu_irq_add_id(adev, 181, &adev->gfx.eop_irq);
+ r = amdgpu_irq_add_id(adev, AMDGPU_IH_CLIENTID_LEGACY, 181, &adev->gfx.eop_irq);
if (r)
return r;
- r = amdgpu_irq_add_id(adev, 184, &adev->gfx.priv_reg_irq);
+ r = amdgpu_irq_add_id(adev, AMDGPU_IH_CLIENTID_LEGACY, 184, &adev->gfx.priv_reg_irq);
if (r)
return r;
- r = amdgpu_irq_add_id(adev, 185, &adev->gfx.priv_inst_irq);
+ r = amdgpu_irq_add_id(adev, AMDGPU_IH_CLIENTID_LEGACY, 185, &adev->gfx.priv_inst_irq);
if (r)
return r;
@@ -3304,10 +3308,6 @@ static int gfx_v6_0_sw_fini(void *handle)
int i;
struct amdgpu_device *adev = (struct amdgpu_device *)handle;
- amdgpu_bo_unref(&adev->gds.oa_gfx_bo);
- amdgpu_bo_unref(&adev->gds.gws_gfx_bo);
- amdgpu_bo_unref(&adev->gds.gds_gfx_bo);
-
for (i = 0; i < adev->gfx.num_gfx_rings; i++)
amdgpu_ring_fini(&adev->gfx.gfx_ring[i]);
for (i = 0; i < adev->gfx.num_compute_rings; i++)
@@ -3627,6 +3627,7 @@ static const struct amdgpu_ring_funcs gfx_v6_0_ring_funcs_gfx = {
.type = AMDGPU_RING_TYPE_GFX,
.align_mask = 0xff,
.nop = 0x80000000,
+ .support_64bit_ptrs = false,
.get_rptr = gfx_v6_0_ring_get_rptr,
.get_wptr = gfx_v6_0_ring_get_wptr,
.set_wptr = gfx_v6_0_ring_set_wptr_gfx,
diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v7_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v7_0.c
index 1f9354541f29..8a8bc2fe6f2e 100644
--- a/drivers/gpu/drm/amd/amdgpu/gfx_v7_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gfx_v7_0.c
@@ -972,9 +972,7 @@ static int gfx_v7_0_init_microcode(struct amdgpu_device *adev)
out:
if (err) {
- printk(KERN_ERR
- "gfx7: Failed to load firmware \"%s\"\n",
- fw_name);
+ pr_err("gfx7: Failed to load firmware \"%s\"\n", fw_name);
release_firmware(adev->gfx.pfp_fw);
adev->gfx.pfp_fw = NULL;
release_firmware(adev->gfx.me_fw);
@@ -1876,6 +1874,11 @@ static void gmc_v7_0_init_compute_vmid(struct amdgpu_device *adev)
mutex_unlock(&adev->srbm_mutex);
}
+static void gfx_v7_0_config_init(struct amdgpu_device *adev)
+{
+ adev->gfx.config.double_offchip_lds_buf = 1;
+}
+
/**
* gfx_v7_0_gpu_init - setup the 3D engine
*
@@ -1886,7 +1889,8 @@ static void gmc_v7_0_init_compute_vmid(struct amdgpu_device *adev)
*/
static void gfx_v7_0_gpu_init(struct amdgpu_device *adev)
{
- u32 tmp, sh_mem_cfg;
+ u32 sh_mem_cfg, sh_static_mem_cfg, sh_mem_base;
+ u32 tmp;
int i;
WREG32(mmGRBM_CNTL, (0xff << GRBM_CNTL__READ_TIMEOUT__SHIFT));
@@ -1899,6 +1903,7 @@ static void gfx_v7_0_gpu_init(struct amdgpu_device *adev)
gfx_v7_0_setup_rb(adev);
gfx_v7_0_get_cu_info(adev);
+ gfx_v7_0_config_init(adev);
/* set HW defaults for 3D engine */
WREG32(mmCP_MEQ_THRESHOLDS,
@@ -1916,15 +1921,32 @@ static void gfx_v7_0_gpu_init(struct amdgpu_device *adev)
/* where to put LDS, scratch, GPUVM in FSA64 space */
sh_mem_cfg = REG_SET_FIELD(0, SH_MEM_CONFIG, ALIGNMENT_MODE,
SH_MEM_ALIGNMENT_MODE_UNALIGNED);
+ sh_mem_cfg = REG_SET_FIELD(sh_mem_cfg, SH_MEM_CONFIG, DEFAULT_MTYPE,
+ MTYPE_NC);
+ sh_mem_cfg = REG_SET_FIELD(sh_mem_cfg, SH_MEM_CONFIG, APE1_MTYPE,
+ MTYPE_UC);
+ sh_mem_cfg = REG_SET_FIELD(sh_mem_cfg, SH_MEM_CONFIG, PRIVATE_ATC, 0);
+
+ sh_static_mem_cfg = REG_SET_FIELD(0, SH_STATIC_MEM_CONFIG,
+ SWIZZLE_ENABLE, 1);
+ sh_static_mem_cfg = REG_SET_FIELD(sh_static_mem_cfg, SH_STATIC_MEM_CONFIG,
+ ELEMENT_SIZE, 1);
+ sh_static_mem_cfg = REG_SET_FIELD(sh_static_mem_cfg, SH_STATIC_MEM_CONFIG,
+ INDEX_STRIDE, 3);
mutex_lock(&adev->srbm_mutex);
- for (i = 0; i < 16; i++) {
+ for (i = 0; i < adev->vm_manager.num_ids; i++) {
+ if (i == 0)
+ sh_mem_base = 0;
+ else
+ sh_mem_base = adev->mc.shared_aperture_start >> 48;
cik_srbm_select(adev, 0, 0, 0, i);
/* CP and shaders */
WREG32(mmSH_MEM_CONFIG, sh_mem_cfg);
WREG32(mmSH_MEM_APE1_BASE, 1);
WREG32(mmSH_MEM_APE1_LIMIT, 0);
- WREG32(mmSH_MEM_BASES, 0);
+ WREG32(mmSH_MEM_BASES, sh_mem_base);
+ WREG32(mmSH_STATIC_MEM_CONFIG, sh_static_mem_cfg);
}
cik_srbm_select(adev, 0, 0, 0, 0);
mutex_unlock(&adev->srbm_mutex);
@@ -2607,7 +2629,7 @@ static int gfx_v7_0_cp_gfx_resume(struct amdgpu_device *adev)
/* Initialize the ring buffer's read and write pointers */
WREG32(mmCP_RB0_CNTL, tmp | CP_RB0_CNTL__RB_RPTR_WR_ENA_MASK);
ring->wptr = 0;
- WREG32(mmCP_RB0_WPTR, ring->wptr);
+ WREG32(mmCP_RB0_WPTR, lower_32_bits(ring->wptr));
/* set the wb address wether it's enabled or not */
rptr_addr = adev->wb.gpu_addr + (ring->rptr_offs * 4);
@@ -2636,12 +2658,12 @@ static int gfx_v7_0_cp_gfx_resume(struct amdgpu_device *adev)
return 0;
}
-static u32 gfx_v7_0_ring_get_rptr(struct amdgpu_ring *ring)
+static u64 gfx_v7_0_ring_get_rptr(struct amdgpu_ring *ring)
{
return ring->adev->wb.wb[ring->rptr_offs];
}
-static u32 gfx_v7_0_ring_get_wptr_gfx(struct amdgpu_ring *ring)
+static u64 gfx_v7_0_ring_get_wptr_gfx(struct amdgpu_ring *ring)
{
struct amdgpu_device *adev = ring->adev;
@@ -2652,11 +2674,11 @@ static void gfx_v7_0_ring_set_wptr_gfx(struct amdgpu_ring *ring)
{
struct amdgpu_device *adev = ring->adev;
- WREG32(mmCP_RB0_WPTR, ring->wptr);
+ WREG32(mmCP_RB0_WPTR, lower_32_bits(ring->wptr));
(void)RREG32(mmCP_RB0_WPTR);
}
-static u32 gfx_v7_0_ring_get_wptr_compute(struct amdgpu_ring *ring)
+static u64 gfx_v7_0_ring_get_wptr_compute(struct amdgpu_ring *ring)
{
/* XXX check if swapping is necessary on BE */
return ring->adev->wb.wb[ring->wptr_offs];
@@ -2667,8 +2689,8 @@ static void gfx_v7_0_ring_set_wptr_compute(struct amdgpu_ring *ring)
struct amdgpu_device *adev = ring->adev;
/* XXX check if swapping is necessary on BE */
- adev->wb.wb[ring->wptr_offs] = ring->wptr;
- WDOORBELL32(ring->doorbell_index, ring->wptr);
+ adev->wb.wb[ring->wptr_offs] = lower_32_bits(ring->wptr);
+ WDOORBELL32(ring->doorbell_index, lower_32_bits(ring->wptr));
}
/**
@@ -3138,7 +3160,7 @@ static int gfx_v7_0_cp_compute_resume(struct amdgpu_device *adev)
/* read and write pointers, similar to CP_RB0_WPTR/_RPTR */
ring->wptr = 0;
- mqd->queue_state.cp_hqd_pq_wptr = ring->wptr;
+ mqd->queue_state.cp_hqd_pq_wptr = lower_32_bits(ring->wptr);
WREG32(mmCP_HQD_PQ_WPTR, mqd->queue_state.cp_hqd_pq_wptr);
mqd->queue_state.cp_hqd_pq_rptr = RREG32(mmCP_HQD_PQ_RPTR);
@@ -4647,17 +4669,19 @@ static int gfx_v7_0_sw_init(void *handle)
int i, r;
/* EOP Event */
- r = amdgpu_irq_add_id(adev, 181, &adev->gfx.eop_irq);
+ r = amdgpu_irq_add_id(adev, AMDGPU_IH_CLIENTID_LEGACY, 181, &adev->gfx.eop_irq);
if (r)
return r;
/* Privileged reg */
- r = amdgpu_irq_add_id(adev, 184, &adev->gfx.priv_reg_irq);
+ r = amdgpu_irq_add_id(adev, AMDGPU_IH_CLIENTID_LEGACY, 184,
+ &adev->gfx.priv_reg_irq);
if (r)
return r;
/* Privileged inst */
- r = amdgpu_irq_add_id(adev, 185, &adev->gfx.priv_inst_irq);
+ r = amdgpu_irq_add_id(adev, AMDGPU_IH_CLIENTID_LEGACY, 185,
+ &adev->gfx.priv_inst_irq);
if (r)
return r;
@@ -5184,6 +5208,7 @@ static const struct amdgpu_ring_funcs gfx_v7_0_ring_funcs_gfx = {
.type = AMDGPU_RING_TYPE_GFX,
.align_mask = 0xff,
.nop = PACKET3(PACKET3_NOP, 0x3FFF),
+ .support_64bit_ptrs = false,
.get_rptr = gfx_v7_0_ring_get_rptr,
.get_wptr = gfx_v7_0_ring_get_wptr_gfx,
.set_wptr = gfx_v7_0_ring_set_wptr_gfx,
@@ -5214,6 +5239,7 @@ static const struct amdgpu_ring_funcs gfx_v7_0_ring_funcs_compute = {
.type = AMDGPU_RING_TYPE_COMPUTE,
.align_mask = 0xff,
.nop = PACKET3(PACKET3_NOP, 0x3FFF),
+ .support_64bit_ptrs = false,
.get_rptr = gfx_v7_0_ring_get_rptr,
.get_wptr = gfx_v7_0_ring_get_wptr_compute,
.set_wptr = gfx_v7_0_ring_set_wptr_compute,
diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c
index 67afc901905c..e0fa0d30e162 100644
--- a/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c
@@ -659,6 +659,8 @@ static u32 gfx_v8_0_get_csb_size(struct amdgpu_device *adev);
static void gfx_v8_0_get_cu_info(struct amdgpu_device *adev);
static void gfx_v8_0_ring_emit_ce_meta_init(struct amdgpu_ring *ring, uint64_t addr);
static void gfx_v8_0_ring_emit_de_meta_init(struct amdgpu_ring *ring, uint64_t addr);
+static int gfx_v8_0_compute_mqd_sw_init(struct amdgpu_device *adev);
+static void gfx_v8_0_compute_mqd_sw_fini(struct amdgpu_device *adev);
static void gfx_v8_0_init_golden_registers(struct amdgpu_device *adev)
{
@@ -1038,7 +1040,7 @@ static int gfx_v8_0_init_microcode(struct amdgpu_device *adev)
}
}
- if (adev->firmware.smu_load) {
+ if (adev->firmware.load_type == AMDGPU_FW_LOAD_SMU) {
info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_PFP];
info->ucode_id = AMDGPU_UCODE_ID_CP_PFP;
info->fw = adev->gfx.pfp_fw;
@@ -1375,13 +1377,12 @@ static int gfx_v8_0_kiq_init_ring(struct amdgpu_device *adev,
struct amdgpu_ring *ring,
struct amdgpu_irq_src *irq)
{
+ struct amdgpu_kiq *kiq = &adev->gfx.kiq;
int r = 0;
- if (amdgpu_sriov_vf(adev)) {
- r = amdgpu_wb_get(adev, &adev->virt.reg_val_offs);
- if (r)
- return r;
- }
+ r = amdgpu_wb_get(adev, &adev->virt.reg_val_offs);
+ if (r)
+ return r;
ring->adev = NULL;
ring->ring_obj = NULL;
@@ -1395,8 +1396,8 @@ static int gfx_v8_0_kiq_init_ring(struct amdgpu_device *adev,
ring->pipe = 1;
}
- irq->data = ring;
ring->queue = 0;
+ ring->eop_gpu_addr = kiq->eop_gpu_addr;
sprintf(ring->name, "kiq %d.%d.%d", ring->me, ring->pipe, ring->queue);
r = amdgpu_ring_init(adev, ring, 1024,
irq, AMDGPU_CP_KIQ_IRQ_DRIVER0);
@@ -1405,15 +1406,11 @@ static int gfx_v8_0_kiq_init_ring(struct amdgpu_device *adev,
return r;
}
-
static void gfx_v8_0_kiq_free_ring(struct amdgpu_ring *ring,
struct amdgpu_irq_src *irq)
{
- if (amdgpu_sriov_vf(ring->adev))
- amdgpu_wb_free(ring->adev, ring->adev->virt.reg_val_offs);
-
+ amdgpu_wb_free(ring->adev, ring->adev->virt.reg_val_offs);
amdgpu_ring_fini(ring);
- irq->data = NULL;
}
#define MEC_HPD_SIZE 2048
@@ -1475,7 +1472,6 @@ static void gfx_v8_0_kiq_fini(struct amdgpu_device *adev)
struct amdgpu_kiq *kiq = &adev->gfx.kiq;
amdgpu_bo_free_kernel(&kiq->eop_obj, &kiq->eop_gpu_addr, NULL);
- kiq->eop_obj = NULL;
}
static int gfx_v8_0_kiq_init(struct amdgpu_device *adev)
@@ -1494,7 +1490,11 @@ static int gfx_v8_0_kiq_init(struct amdgpu_device *adev)
memset(hpd, 0, MEC_HPD_SIZE);
+ r = amdgpu_bo_reserve(kiq->eop_obj, false);
+ if (unlikely(r != 0))
+ dev_warn(adev->dev, "(%d) reserve kiq eop bo failed\n", r);
amdgpu_bo_kunmap(kiq->eop_obj);
+ amdgpu_bo_unreserve(kiq->eop_obj);
return 0;
}
@@ -2079,22 +2079,24 @@ static int gfx_v8_0_sw_init(void *handle)
struct amdgpu_device *adev = (struct amdgpu_device *)handle;
/* KIQ event */
- r = amdgpu_irq_add_id(adev, 178, &adev->gfx.kiq.irq);
+ r = amdgpu_irq_add_id(adev, AMDGPU_IH_CLIENTID_LEGACY, 178, &adev->gfx.kiq.irq);
if (r)
return r;
/* EOP Event */
- r = amdgpu_irq_add_id(adev, 181, &adev->gfx.eop_irq);
+ r = amdgpu_irq_add_id(adev, AMDGPU_IH_CLIENTID_LEGACY, 181, &adev->gfx.eop_irq);
if (r)
return r;
/* Privileged reg */
- r = amdgpu_irq_add_id(adev, 184, &adev->gfx.priv_reg_irq);
+ r = amdgpu_irq_add_id(adev, AMDGPU_IH_CLIENTID_LEGACY, 184,
+ &adev->gfx.priv_reg_irq);
if (r)
return r;
/* Privileged inst */
- r = amdgpu_irq_add_id(adev, 185, &adev->gfx.priv_inst_irq);
+ r = amdgpu_irq_add_id(adev, AMDGPU_IH_CLIENTID_LEGACY, 185,
+ &adev->gfx.priv_inst_irq);
if (r)
return r;
@@ -2120,17 +2122,6 @@ static int gfx_v8_0_sw_init(void *handle)
return r;
}
- r = gfx_v8_0_kiq_init(adev);
- if (r) {
- DRM_ERROR("Failed to init KIQ BOs!\n");
- return r;
- }
-
- kiq = &adev->gfx.kiq;
- r = gfx_v8_0_kiq_init_ring(adev, &kiq->ring, &kiq->irq);
- if (r)
- return r;
-
/* set up the gfx ring */
for (i = 0; i < adev->gfx.num_gfx_rings; i++) {
ring = &adev->gfx.gfx_ring[i];
@@ -2164,6 +2155,7 @@ static int gfx_v8_0_sw_init(void *handle)
ring->me = 1; /* first MEC */
ring->pipe = i / 8;
ring->queue = i % 8;
+ ring->eop_gpu_addr = adev->gfx.mec.hpd_eop_gpu_addr + (i * MEC_HPD_SIZE);
sprintf(ring->name, "comp_%d.%d.%d", ring->me, ring->pipe, ring->queue);
irq_type = AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE0_EOP + ring->pipe;
/* type-2 packets are deprecated on MEC, use type-3 instead */
@@ -2173,6 +2165,24 @@ static int gfx_v8_0_sw_init(void *handle)
return r;
}
+ if (amdgpu_sriov_vf(adev)) {
+ r = gfx_v8_0_kiq_init(adev);
+ if (r) {
+ DRM_ERROR("Failed to init KIQ BOs!\n");
+ return r;
+ }
+
+ kiq = &adev->gfx.kiq;
+ r = gfx_v8_0_kiq_init_ring(adev, &kiq->ring, &kiq->irq);
+ if (r)
+ return r;
+
+ /* create MQD for all compute queues as wel as KIQ for SRIOV case */
+ r = gfx_v8_0_compute_mqd_sw_init(adev);
+ if (r)
+ return r;
+ }
+
/* reserve GDS, GWS and OA resource for gfx */
r = amdgpu_bo_create_kernel(adev, adev->gds.mem.gfx_partition_size,
PAGE_SIZE, AMDGPU_GEM_DOMAIN_GDS,
@@ -2214,9 +2224,13 @@ static int gfx_v8_0_sw_fini(void *handle)
amdgpu_ring_fini(&adev->gfx.gfx_ring[i]);
for (i = 0; i < adev->gfx.num_compute_rings; i++)
amdgpu_ring_fini(&adev->gfx.compute_ring[i]);
- gfx_v8_0_kiq_free_ring(&adev->gfx.kiq.ring, &adev->gfx.kiq.irq);
- gfx_v8_0_kiq_fini(adev);
+ if (amdgpu_sriov_vf(adev)) {
+ gfx_v8_0_compute_mqd_sw_fini(adev);
+ gfx_v8_0_kiq_free_ring(&adev->gfx.kiq.ring, &adev->gfx.kiq.irq);
+ gfx_v8_0_kiq_fini(adev);
+ }
+
gfx_v8_0_mec_fini(adev);
gfx_v8_0_rlc_fini(adev);
gfx_v8_0_free_microcode(adev);
@@ -3839,9 +3853,22 @@ static void gfx_v8_0_init_compute_vmid(struct amdgpu_device *adev)
mutex_unlock(&adev->srbm_mutex);
}
+static void gfx_v8_0_config_init(struct amdgpu_device *adev)
+{
+ switch (adev->asic_type) {
+ default:
+ adev->gfx.config.double_offchip_lds_buf = 1;
+ break;
+ case CHIP_CARRIZO:
+ case CHIP_STONEY:
+ adev->gfx.config.double_offchip_lds_buf = 0;
+ break;
+ }
+}
+
static void gfx_v8_0_gpu_init(struct amdgpu_device *adev)
{
- u32 tmp;
+ u32 tmp, sh_static_mem_cfg;
int i;
WREG32_FIELD(GRBM_CNTL, READ_TIMEOUT, 0xFF);
@@ -3852,11 +3879,18 @@ static void gfx_v8_0_gpu_init(struct amdgpu_device *adev)
gfx_v8_0_tiling_mode_table_init(adev);
gfx_v8_0_setup_rb(adev);
gfx_v8_0_get_cu_info(adev);
+ gfx_v8_0_config_init(adev);
/* XXX SH_MEM regs */
/* where to put LDS, scratch, GPUVM in FSA64 space */
+ sh_static_mem_cfg = REG_SET_FIELD(0, SH_STATIC_MEM_CONFIG,
+ SWIZZLE_ENABLE, 1);
+ sh_static_mem_cfg = REG_SET_FIELD(sh_static_mem_cfg, SH_STATIC_MEM_CONFIG,
+ ELEMENT_SIZE, 1);
+ sh_static_mem_cfg = REG_SET_FIELD(sh_static_mem_cfg, SH_STATIC_MEM_CONFIG,
+ INDEX_STRIDE, 3);
mutex_lock(&adev->srbm_mutex);
- for (i = 0; i < 16; i++) {
+ for (i = 0; i < adev->vm_manager.num_ids; i++) {
vi_srbm_select(adev, 0, 0, 0, i);
/* CP and shaders */
if (i == 0) {
@@ -3865,17 +3899,20 @@ static void gfx_v8_0_gpu_init(struct amdgpu_device *adev)
tmp = REG_SET_FIELD(tmp, SH_MEM_CONFIG, ALIGNMENT_MODE,
SH_MEM_ALIGNMENT_MODE_UNALIGNED);
WREG32(mmSH_MEM_CONFIG, tmp);
+ WREG32(mmSH_MEM_BASES, 0);
} else {
tmp = REG_SET_FIELD(0, SH_MEM_CONFIG, DEFAULT_MTYPE, MTYPE_NC);
- tmp = REG_SET_FIELD(tmp, SH_MEM_CONFIG, APE1_MTYPE, MTYPE_NC);
+ tmp = REG_SET_FIELD(tmp, SH_MEM_CONFIG, APE1_MTYPE, MTYPE_UC);
tmp = REG_SET_FIELD(tmp, SH_MEM_CONFIG, ALIGNMENT_MODE,
SH_MEM_ALIGNMENT_MODE_UNALIGNED);
WREG32(mmSH_MEM_CONFIG, tmp);
+ tmp = adev->mc.shared_aperture_start >> 48;
+ WREG32(mmSH_MEM_BASES, tmp);
}
WREG32(mmSH_MEM_APE1_BASE, 1);
WREG32(mmSH_MEM_APE1_LIMIT, 0);
- WREG32(mmSH_MEM_BASES, 0);
+ WREG32(mmSH_STATIC_MEM_CONFIG, sh_static_mem_cfg);
}
vi_srbm_select(adev, 0, 0, 0, 0);
mutex_unlock(&adev->srbm_mutex);
@@ -4069,10 +4106,8 @@ static int gfx_v8_0_init_save_restore_list(struct amdgpu_device *adev)
data = mmRLC_SRM_INDEX_CNTL_DATA_0;
for (i = 0; i < sizeof(unique_indices) / sizeof(int); i++) {
if (unique_indices[i] != 0) {
- amdgpu_mm_wreg(adev, temp + i,
- unique_indices[i] & 0x3FFFF, false);
- amdgpu_mm_wreg(adev, data + i,
- unique_indices[i] >> 20, false);
+ WREG32(temp + i, unique_indices[i] & 0x3FFFF);
+ WREG32(data + i, unique_indices[i] >> 20);
}
}
kfree(register_list_format);
@@ -4218,7 +4253,7 @@ static int gfx_v8_0_rlc_resume(struct amdgpu_device *adev)
gfx_v8_0_init_pg(adev);
if (!adev->pp_enabled) {
- if (!adev->firmware.smu_load) {
+ if (adev->firmware.load_type != AMDGPU_FW_LOAD_SMU) {
/* legacy rlc firmware loading */
r = gfx_v8_0_rlc_load_microcode(adev);
if (r)
@@ -4464,7 +4499,7 @@ static int gfx_v8_0_cp_gfx_resume(struct amdgpu_device *adev)
/* Initialize the ring buffer's read and write pointers */
WREG32(mmCP_RB0_CNTL, tmp | CP_RB0_CNTL__RB_RPTR_WR_ENA_MASK);
ring->wptr = 0;
- WREG32(mmCP_RB0_WPTR, ring->wptr);
+ WREG32(mmCP_RB0_WPTR, lower_32_bits(ring->wptr));
/* set the wb address wether it's enabled or not */
rptr_addr = adev->wb.gpu_addr + (ring->rptr_offs * 4);
@@ -4510,6 +4545,7 @@ static int gfx_v8_0_cp_gfx_resume(struct amdgpu_device *adev)
}
/* start the ring */
+ amdgpu_ring_clear_ring(ring);
gfx_v8_0_cp_gfx_start(adev);
ring->ready = true;
r = amdgpu_ring_test_ring(ring);
@@ -4596,6 +4632,8 @@ static void gfx_v8_0_cp_compute_fini(struct amdgpu_device *adev)
amdgpu_bo_unref(&ring->mqd_obj);
ring->mqd_obj = NULL;
+ ring->mqd_ptr = NULL;
+ ring->mqd_gpu_addr = 0;
}
}
}
@@ -4656,12 +4694,10 @@ static void gfx_v8_0_map_queue_enable(struct amdgpu_ring *kiq_ring,
udelay(50);
}
-static int gfx_v8_0_mqd_init(struct amdgpu_device *adev,
- struct vi_mqd *mqd,
- uint64_t mqd_gpu_addr,
- uint64_t eop_gpu_addr,
- struct amdgpu_ring *ring)
+static int gfx_v8_0_mqd_init(struct amdgpu_ring *ring)
{
+ struct amdgpu_device *adev = ring->adev;
+ struct vi_mqd *mqd = ring->mqd_ptr;
uint64_t hqd_gpu_addr, wb_gpu_addr, eop_base_addr;
uint32_t tmp;
@@ -4673,7 +4709,7 @@ static int gfx_v8_0_mqd_init(struct amdgpu_device *adev,
mqd->compute_static_thread_mgmt_se3 = 0xffffffff;
mqd->compute_misc_reserved = 0x00000003;
- eop_base_addr = eop_gpu_addr >> 8;
+ eop_base_addr = ring->eop_gpu_addr >> 8;
mqd->cp_hqd_eop_base_addr_lo = eop_base_addr;
mqd->cp_hqd_eop_base_addr_hi = upper_32_bits(eop_base_addr);
@@ -4702,8 +4738,8 @@ static int gfx_v8_0_mqd_init(struct amdgpu_device *adev,
mqd->cp_hqd_pq_wptr = 0;
/* set the pointer to the MQD */
- mqd->cp_mqd_base_addr_lo = mqd_gpu_addr & 0xfffffffc;
- mqd->cp_mqd_base_addr_hi = upper_32_bits(mqd_gpu_addr);
+ mqd->cp_mqd_base_addr_lo = ring->mqd_gpu_addr & 0xfffffffc;
+ mqd->cp_mqd_base_addr_hi = upper_32_bits(ring->mqd_gpu_addr);
/* set MQD vmid to 0 */
tmp = RREG32(mmCP_MQD_CONTROL);
@@ -4776,10 +4812,10 @@ static int gfx_v8_0_mqd_init(struct amdgpu_device *adev,
return 0;
}
-static int gfx_v8_0_kiq_init_register(struct amdgpu_device *adev,
- struct vi_mqd *mqd,
- struct amdgpu_ring *ring)
+static int gfx_v8_0_kiq_init_register(struct amdgpu_ring *ring)
{
+ struct amdgpu_device *adev = ring->adev;
+ struct vi_mqd *mqd = ring->mqd_ptr;
uint32_t tmp;
int j;
@@ -4867,35 +4903,49 @@ static int gfx_v8_0_kiq_init_register(struct amdgpu_device *adev,
return 0;
}
-static int gfx_v8_0_kiq_init_queue(struct amdgpu_ring *ring,
- struct vi_mqd *mqd,
- u64 mqd_gpu_addr)
+static int gfx_v8_0_kiq_init_queue(struct amdgpu_ring *ring)
{
struct amdgpu_device *adev = ring->adev;
struct amdgpu_kiq *kiq = &adev->gfx.kiq;
- uint64_t eop_gpu_addr;
- bool is_kiq = false;
-
- if (ring->funcs->type == AMDGPU_RING_TYPE_KIQ)
- is_kiq = true;
+ struct vi_mqd *mqd = ring->mqd_ptr;
+ bool is_kiq = (ring->funcs->type == AMDGPU_RING_TYPE_KIQ);
+ int mqd_idx = AMDGPU_MAX_COMPUTE_RINGS;
if (is_kiq) {
- eop_gpu_addr = kiq->eop_gpu_addr;
gfx_v8_0_kiq_setting(&kiq->ring);
- } else
- eop_gpu_addr = adev->gfx.mec.hpd_eop_gpu_addr +
- ring->queue * MEC_HPD_SIZE;
-
- mutex_lock(&adev->srbm_mutex);
- vi_srbm_select(adev, ring->me, ring->pipe, ring->queue, 0);
+ } else {
+ mqd_idx = ring - &adev->gfx.compute_ring[0];
+ }
- gfx_v8_0_mqd_init(adev, mqd, mqd_gpu_addr, eop_gpu_addr, ring);
+ if (!adev->gfx.in_reset) {
+ memset((void *)mqd, 0, sizeof(*mqd));
+ mutex_lock(&adev->srbm_mutex);
+ vi_srbm_select(adev, ring->me, ring->pipe, ring->queue, 0);
+ gfx_v8_0_mqd_init(ring);
+ if (is_kiq)
+ gfx_v8_0_kiq_init_register(ring);
+ vi_srbm_select(adev, 0, 0, 0, 0);
+ mutex_unlock(&adev->srbm_mutex);
- if (is_kiq)
- gfx_v8_0_kiq_init_register(adev, mqd, ring);
+ if (adev->gfx.mec.mqd_backup[mqd_idx])
+ memcpy(adev->gfx.mec.mqd_backup[mqd_idx], mqd, sizeof(*mqd));
+ } else { /* for GPU_RESET case */
+ /* reset MQD to a clean status */
+ if (adev->gfx.mec.mqd_backup[mqd_idx])
+ memcpy(mqd, adev->gfx.mec.mqd_backup[mqd_idx], sizeof(*mqd));
- vi_srbm_select(adev, 0, 0, 0, 0);
- mutex_unlock(&adev->srbm_mutex);
+ /* reset ring buffer */
+ ring->wptr = 0;
+ amdgpu_ring_clear_ring(ring);
+
+ if (is_kiq) {
+ mutex_lock(&adev->srbm_mutex);
+ vi_srbm_select(adev, ring->me, ring->pipe, ring->queue, 0);
+ gfx_v8_0_kiq_init_register(ring);
+ vi_srbm_select(adev, 0, 0, 0, 0);
+ mutex_unlock(&adev->srbm_mutex);
+ }
+ }
if (is_kiq)
gfx_v8_0_kiq_enable(ring);
@@ -4905,86 +4955,60 @@ static int gfx_v8_0_kiq_init_queue(struct amdgpu_ring *ring,
return 0;
}
-static void gfx_v8_0_kiq_free_queue(struct amdgpu_device *adev)
+static int gfx_v8_0_kiq_resume(struct amdgpu_device *adev)
{
struct amdgpu_ring *ring = NULL;
- int i;
+ int r = 0, i;
- for (i = 0; i < adev->gfx.num_compute_rings; i++) {
- ring = &adev->gfx.compute_ring[i];
- amdgpu_bo_free_kernel(&ring->mqd_obj, NULL, NULL);
- ring->mqd_obj = NULL;
- }
+ gfx_v8_0_cp_compute_enable(adev, true);
ring = &adev->gfx.kiq.ring;
- amdgpu_bo_free_kernel(&ring->mqd_obj, NULL, NULL);
- ring->mqd_obj = NULL;
-}
-static int gfx_v8_0_kiq_setup_queue(struct amdgpu_device *adev,
- struct amdgpu_ring *ring)
-{
- struct vi_mqd *mqd;
- u64 mqd_gpu_addr;
- u32 *buf;
- int r = 0;
+ r = amdgpu_bo_reserve(ring->mqd_obj, false);
+ if (unlikely(r != 0))
+ goto done;
- r = amdgpu_bo_create_kernel(adev, sizeof(struct vi_mqd), PAGE_SIZE,
- AMDGPU_GEM_DOMAIN_GTT, &ring->mqd_obj,
- &mqd_gpu_addr, (void **)&buf);
- if (r) {
- dev_warn(adev->dev, "failed to create ring mqd ob (%d)", r);
- return r;
+ r = amdgpu_bo_kmap(ring->mqd_obj, &ring->mqd_ptr);
+ if (!r) {
+ r = gfx_v8_0_kiq_init_queue(ring);
+ amdgpu_bo_kunmap(ring->mqd_obj);
+ ring->mqd_ptr = NULL;
}
-
- /* init the mqd struct */
- memset(buf, 0, sizeof(struct vi_mqd));
- mqd = (struct vi_mqd *)buf;
-
- r = gfx_v8_0_kiq_init_queue(ring, mqd, mqd_gpu_addr);
- if (r)
- return r;
-
- amdgpu_bo_kunmap(ring->mqd_obj);
-
- return 0;
-}
-
-static int gfx_v8_0_kiq_resume(struct amdgpu_device *adev)
-{
- struct amdgpu_ring *ring = NULL;
- int r, i;
-
- ring = &adev->gfx.kiq.ring;
- r = gfx_v8_0_kiq_setup_queue(adev, ring);
+ amdgpu_bo_unreserve(ring->mqd_obj);
if (r)
- return r;
+ goto done;
- for (i = 0; i < adev->gfx.num_compute_rings; i++) {
- ring = &adev->gfx.compute_ring[i];
- r = gfx_v8_0_kiq_setup_queue(adev, ring);
- if (r)
- return r;
+ ring->ready = true;
+ r = amdgpu_ring_test_ring(ring);
+ if (r) {
+ ring->ready = false;
+ goto done;
}
- gfx_v8_0_cp_compute_enable(adev, true);
-
for (i = 0; i < adev->gfx.num_compute_rings; i++) {
ring = &adev->gfx.compute_ring[i];
+ r = amdgpu_bo_reserve(ring->mqd_obj, false);
+ if (unlikely(r != 0))
+ goto done;
+ r = amdgpu_bo_kmap(ring->mqd_obj, &ring->mqd_ptr);
+ if (!r) {
+ r = gfx_v8_0_kiq_init_queue(ring);
+ amdgpu_bo_kunmap(ring->mqd_obj);
+ ring->mqd_ptr = NULL;
+ }
+ amdgpu_bo_unreserve(ring->mqd_obj);
+ if (r)
+ goto done;
+
ring->ready = true;
r = amdgpu_ring_test_ring(ring);
if (r)
ring->ready = false;
}
- ring = &adev->gfx.kiq.ring;
- ring->ready = true;
- r = amdgpu_ring_test_ring(ring);
- if (r)
- ring->ready = false;
-
- return 0;
+done:
+ return r;
}
static int gfx_v8_0_cp_compute_resume(struct amdgpu_device *adev)
@@ -5185,7 +5209,7 @@ static int gfx_v8_0_cp_compute_resume(struct amdgpu_device *adev)
/* reset read and write pointers, similar to CP_RB0_WPTR/_RPTR */
ring->wptr = 0;
- mqd->cp_hqd_pq_wptr = ring->wptr;
+ mqd->cp_hqd_pq_wptr = lower_32_bits(ring->wptr);
WREG32(mmCP_HQD_PQ_WPTR, mqd->cp_hqd_pq_wptr);
mqd->cp_hqd_pq_rptr = RREG32(mmCP_HQD_PQ_RPTR);
@@ -5245,7 +5269,7 @@ static int gfx_v8_0_cp_resume(struct amdgpu_device *adev)
gfx_v8_0_enable_gui_idle_interrupt(adev, false);
if (!adev->pp_enabled) {
- if (!adev->firmware.smu_load) {
+ if (adev->firmware.load_type != AMDGPU_FW_LOAD_SMU) {
/* legacy firmware loading */
r = gfx_v8_0_cp_gfx_load_microcode(adev);
if (r)
@@ -5329,7 +5353,6 @@ static int gfx_v8_0_hw_fini(void *handle)
amdgpu_irq_put(adev, &adev->gfx.priv_reg_irq, 0);
amdgpu_irq_put(adev, &adev->gfx.priv_inst_irq, 0);
if (amdgpu_sriov_vf(adev)) {
- gfx_v8_0_kiq_free_queue(adev);
pr_debug("For SRIOV client, shouldn't do anything.\n");
return 0;
}
@@ -5839,7 +5862,10 @@ static int gfx_v8_0_set_powergating_state(void *handle,
enum amd_powergating_state state)
{
struct amdgpu_device *adev = (struct amdgpu_device *)handle;
- bool enable = (state == AMD_PG_STATE_GATE) ? true : false;
+ bool enable = (state == AMD_PG_STATE_GATE);
+
+ if (amdgpu_sriov_vf(adev))
+ return 0;
switch (adev->asic_type) {
case CHIP_CARRIZO:
@@ -5898,6 +5924,9 @@ static void gfx_v8_0_get_clockgating_state(void *handle, u32 *flags)
struct amdgpu_device *adev = (struct amdgpu_device *)handle;
int data;
+ if (amdgpu_sriov_vf(adev))
+ *flags = 0;
+
/* AMD_CG_SUPPORT_GFX_MGCG */
data = RREG32(mmRLC_CGTT_MGCG_OVERRIDE);
if (!(data & RLC_CGTT_MGCG_OVERRIDE__CPF_MASK))
@@ -6411,18 +6440,22 @@ static int gfx_v8_0_set_clockgating_state(void *handle,
{
struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ if (amdgpu_sriov_vf(adev))
+ return 0;
+
switch (adev->asic_type) {
case CHIP_FIJI:
case CHIP_CARRIZO:
case CHIP_STONEY:
gfx_v8_0_update_gfx_clock_gating(adev,
- state == AMD_CG_STATE_GATE ? true : false);
+ state == AMD_CG_STATE_GATE);
break;
case CHIP_TONGA:
gfx_v8_0_tonga_update_gfx_clock_gating(adev, state);
break;
case CHIP_POLARIS10:
case CHIP_POLARIS11:
+ case CHIP_POLARIS12:
gfx_v8_0_polaris_update_gfx_clock_gating(adev, state);
break;
default:
@@ -6431,12 +6464,12 @@ static int gfx_v8_0_set_clockgating_state(void *handle,
return 0;
}
-static u32 gfx_v8_0_ring_get_rptr(struct amdgpu_ring *ring)
+static u64 gfx_v8_0_ring_get_rptr(struct amdgpu_ring *ring)
{
return ring->adev->wb.wb[ring->rptr_offs];
}
-static u32 gfx_v8_0_ring_get_wptr_gfx(struct amdgpu_ring *ring)
+static u64 gfx_v8_0_ring_get_wptr_gfx(struct amdgpu_ring *ring)
{
struct amdgpu_device *adev = ring->adev;
@@ -6453,10 +6486,10 @@ static void gfx_v8_0_ring_set_wptr_gfx(struct amdgpu_ring *ring)
if (ring->use_doorbell) {
/* XXX check if swapping is necessary on BE */
- adev->wb.wb[ring->wptr_offs] = ring->wptr;
- WDOORBELL32(ring->doorbell_index, ring->wptr);
+ adev->wb.wb[ring->wptr_offs] = lower_32_bits(ring->wptr);
+ WDOORBELL32(ring->doorbell_index, lower_32_bits(ring->wptr));
} else {
- WREG32(mmCP_RB0_WPTR, ring->wptr);
+ WREG32(mmCP_RB0_WPTR, lower_32_bits(ring->wptr));
(void)RREG32(mmCP_RB0_WPTR);
}
}
@@ -6531,6 +6564,9 @@ static void gfx_v8_0_ring_emit_ib_gfx(struct amdgpu_ring *ring,
control |= ib->length_dw | (vm_id << 24);
+ if (amdgpu_sriov_vf(ring->adev) && ib->flags & AMDGPU_IB_FLAG_PREEMPT)
+ control |= INDIRECT_BUFFER_PRE_ENB(1);
+
amdgpu_ring_write(ring, header);
amdgpu_ring_write(ring,
#ifdef __BIG_ENDIAN
@@ -6639,12 +6675,10 @@ static void gfx_v8_0_ring_emit_vm_flush(struct amdgpu_ring *ring,
/* sync PFP to ME, otherwise we might get invalid PFP reads */
amdgpu_ring_write(ring, PACKET3(PACKET3_PFP_SYNC_ME, 0));
amdgpu_ring_write(ring, 0x0);
- /* GFX8 emits 128 dw nop to prevent CE access VM before vm_flush finish */
- amdgpu_ring_insert_nop(ring, 128);
}
}
-static u32 gfx_v8_0_ring_get_wptr_compute(struct amdgpu_ring *ring)
+static u64 gfx_v8_0_ring_get_wptr_compute(struct amdgpu_ring *ring)
{
return ring->adev->wb.wb[ring->wptr_offs];
}
@@ -6654,8 +6688,8 @@ static void gfx_v8_0_ring_set_wptr_compute(struct amdgpu_ring *ring)
struct amdgpu_device *adev = ring->adev;
/* XXX check if swapping is necessary on BE */
- adev->wb.wb[ring->wptr_offs] = ring->wptr;
- WDOORBELL32(ring->doorbell_index, ring->wptr);
+ adev->wb.wb[ring->wptr_offs] = lower_32_bits(ring->wptr);
+ WDOORBELL32(ring->doorbell_index, lower_32_bits(ring->wptr));
}
static void gfx_v8_0_ring_emit_fence_compute(struct amdgpu_ring *ring,
@@ -6748,6 +6782,34 @@ static void gfx_v8_ring_emit_cntxcntl(struct amdgpu_ring *ring, uint32_t flags)
(flags & AMDGPU_VM_DOMAIN) ? AMDGPU_CSA_VADDR : ring->adev->virt.csa_vmid0_addr);
}
+static unsigned gfx_v8_0_ring_emit_init_cond_exec(struct amdgpu_ring *ring)
+{
+ unsigned ret;
+
+ amdgpu_ring_write(ring, PACKET3(PACKET3_COND_EXEC, 3));
+ amdgpu_ring_write(ring, lower_32_bits(ring->cond_exe_gpu_addr));
+ amdgpu_ring_write(ring, upper_32_bits(ring->cond_exe_gpu_addr));
+ amdgpu_ring_write(ring, 0); /* discard following DWs if *cond_exec_gpu_addr==0 */
+ ret = ring->wptr & ring->buf_mask;
+ amdgpu_ring_write(ring, 0x55aa55aa); /* patch dummy value later */
+ return ret;
+}
+
+static void gfx_v8_0_ring_emit_patch_cond_exec(struct amdgpu_ring *ring, unsigned offset)
+{
+ unsigned cur;
+
+ BUG_ON(offset > ring->buf_mask);
+ BUG_ON(ring->ring[offset] != 0x55aa55aa);
+
+ cur = (ring->wptr & ring->buf_mask) - 1;
+ if (likely(cur > offset))
+ ring->ring[offset] = cur - offset;
+ else
+ ring->ring[offset] = (ring->ring_size >> 2) - offset + cur;
+}
+
+
static void gfx_v8_0_ring_emit_rreg(struct amdgpu_ring *ring, uint32_t reg)
{
struct amdgpu_device *adev = ring->adev;
@@ -6925,9 +6987,9 @@ static int gfx_v8_0_kiq_set_interrupt_state(struct amdgpu_device *adev,
enum amdgpu_interrupt_state state)
{
uint32_t tmp, target;
- struct amdgpu_ring *ring = (struct amdgpu_ring *)src->data;
+ struct amdgpu_ring *ring = &(adev->gfx.kiq.ring);
- BUG_ON(!ring || (ring->funcs->type != AMDGPU_RING_TYPE_KIQ));
+ BUG_ON(ring->funcs->type != AMDGPU_RING_TYPE_KIQ);
if (ring->me == 1)
target = mmCP_ME1_PIPE0_INT_CNTL;
@@ -6971,9 +7033,9 @@ static int gfx_v8_0_kiq_irq(struct amdgpu_device *adev,
struct amdgpu_iv_entry *entry)
{
u8 me_id, pipe_id, queue_id;
- struct amdgpu_ring *ring = (struct amdgpu_ring *)source->data;
+ struct amdgpu_ring *ring = &(adev->gfx.kiq.ring);
- BUG_ON(!ring || (ring->funcs->type != AMDGPU_RING_TYPE_KIQ));
+ BUG_ON(ring->funcs->type != AMDGPU_RING_TYPE_KIQ);
me_id = (entry->ring_id & 0x0c) >> 2;
pipe_id = (entry->ring_id & 0x03) >> 0;
@@ -7010,18 +7072,28 @@ static const struct amdgpu_ring_funcs gfx_v8_0_ring_funcs_gfx = {
.type = AMDGPU_RING_TYPE_GFX,
.align_mask = 0xff,
.nop = PACKET3(PACKET3_NOP, 0x3FFF),
+ .support_64bit_ptrs = false,
.get_rptr = gfx_v8_0_ring_get_rptr,
.get_wptr = gfx_v8_0_ring_get_wptr_gfx,
.set_wptr = gfx_v8_0_ring_set_wptr_gfx,
- .emit_frame_size =
- 20 + /* gfx_v8_0_ring_emit_gds_switch */
- 7 + /* gfx_v8_0_ring_emit_hdp_flush */
- 5 + /* gfx_v8_0_ring_emit_hdp_invalidate */
- 6 + 6 + 6 +/* gfx_v8_0_ring_emit_fence_gfx x3 for user fence, vm fence */
- 7 + /* gfx_v8_0_ring_emit_pipeline_sync */
- 128 + 19 + /* gfx_v8_0_ring_emit_vm_flush */
- 2 + /* gfx_v8_ring_emit_sb */
- 3 + 4 + 29, /* gfx_v8_ring_emit_cntxcntl including vgt flush/meta-data */
+ .emit_frame_size = /* maximum 215dw if count 16 IBs in */
+ 5 + /* COND_EXEC */
+ 7 + /* PIPELINE_SYNC */
+ 19 + /* VM_FLUSH */
+ 8 + /* FENCE for VM_FLUSH */
+ 20 + /* GDS switch */
+ 4 + /* double SWITCH_BUFFER,
+ the first COND_EXEC jump to the place just
+ prior to this double SWITCH_BUFFER */
+ 5 + /* COND_EXEC */
+ 7 + /* HDP_flush */
+ 4 + /* VGT_flush */
+ 14 + /* CE_META */
+ 31 + /* DE_META */
+ 3 + /* CNTX_CTRL */
+ 5 + /* HDP_INVL */
+ 8 + 8 + /* FENCE x2 */
+ 2, /* SWITCH_BUFFER */
.emit_ib_size = 4, /* gfx_v8_0_ring_emit_ib_gfx */
.emit_ib = gfx_v8_0_ring_emit_ib_gfx,
.emit_fence = gfx_v8_0_ring_emit_fence_gfx,
@@ -7036,12 +7108,15 @@ static const struct amdgpu_ring_funcs gfx_v8_0_ring_funcs_gfx = {
.pad_ib = amdgpu_ring_generic_pad_ib,
.emit_switch_buffer = gfx_v8_ring_emit_sb,
.emit_cntxcntl = gfx_v8_ring_emit_cntxcntl,
+ .init_cond_exec = gfx_v8_0_ring_emit_init_cond_exec,
+ .patch_cond_exec = gfx_v8_0_ring_emit_patch_cond_exec,
};
static const struct amdgpu_ring_funcs gfx_v8_0_ring_funcs_compute = {
.type = AMDGPU_RING_TYPE_COMPUTE,
.align_mask = 0xff,
.nop = PACKET3(PACKET3_NOP, 0x3FFF),
+ .support_64bit_ptrs = false,
.get_rptr = gfx_v8_0_ring_get_rptr,
.get_wptr = gfx_v8_0_ring_get_wptr_compute,
.set_wptr = gfx_v8_0_ring_set_wptr_compute,
@@ -7070,6 +7145,7 @@ static const struct amdgpu_ring_funcs gfx_v8_0_ring_funcs_kiq = {
.type = AMDGPU_RING_TYPE_KIQ,
.align_mask = 0xff,
.nop = PACKET3(PACKET3_NOP, 0x3FFF),
+ .support_64bit_ptrs = false,
.get_rptr = gfx_v8_0_ring_get_rptr,
.get_wptr = gfx_v8_0_ring_get_wptr_compute,
.set_wptr = gfx_v8_0_ring_set_wptr_compute,
@@ -7266,15 +7342,15 @@ static void gfx_v8_0_ring_emit_ce_meta_init(struct amdgpu_ring *ring, uint64_t c
uint64_t ce_payload_addr;
int cnt_ce;
static union {
- struct amdgpu_ce_ib_state regular;
- struct amdgpu_ce_ib_state_chained_ib chained;
+ struct vi_ce_ib_state regular;
+ struct vi_ce_ib_state_chained_ib chained;
} ce_payload = {};
if (ring->adev->virt.chained_ib_support) {
- ce_payload_addr = csa_addr + offsetof(struct amdgpu_gfx_meta_data_chained_ib, ce_payload);
+ ce_payload_addr = csa_addr + offsetof(struct vi_gfx_meta_data_chained_ib, ce_payload);
cnt_ce = (sizeof(ce_payload.chained) >> 2) + 4 - 2;
} else {
- ce_payload_addr = csa_addr + offsetof(struct amdgpu_gfx_meta_data, ce_payload);
+ ce_payload_addr = csa_addr + offsetof(struct vi_gfx_meta_data, ce_payload);
cnt_ce = (sizeof(ce_payload.regular) >> 2) + 4 - 2;
}
@@ -7293,20 +7369,20 @@ static void gfx_v8_0_ring_emit_de_meta_init(struct amdgpu_ring *ring, uint64_t c
uint64_t de_payload_addr, gds_addr;
int cnt_de;
static union {
- struct amdgpu_de_ib_state regular;
- struct amdgpu_de_ib_state_chained_ib chained;
+ struct vi_de_ib_state regular;
+ struct vi_de_ib_state_chained_ib chained;
} de_payload = {};
gds_addr = csa_addr + 4096;
if (ring->adev->virt.chained_ib_support) {
de_payload.chained.gds_backup_addrlo = lower_32_bits(gds_addr);
de_payload.chained.gds_backup_addrhi = upper_32_bits(gds_addr);
- de_payload_addr = csa_addr + offsetof(struct amdgpu_gfx_meta_data_chained_ib, de_payload);
+ de_payload_addr = csa_addr + offsetof(struct vi_gfx_meta_data_chained_ib, de_payload);
cnt_de = (sizeof(de_payload.chained) >> 2) + 4 - 2;
} else {
de_payload.regular.gds_backup_addrlo = lower_32_bits(gds_addr);
de_payload.regular.gds_backup_addrhi = upper_32_bits(gds_addr);
- de_payload_addr = csa_addr + offsetof(struct amdgpu_gfx_meta_data, de_payload);
+ de_payload_addr = csa_addr + offsetof(struct vi_gfx_meta_data, de_payload);
cnt_de = (sizeof(de_payload.regular) >> 2) + 4 - 2;
}
@@ -7319,3 +7395,68 @@ static void gfx_v8_0_ring_emit_de_meta_init(struct amdgpu_ring *ring, uint64_t c
amdgpu_ring_write(ring, upper_32_bits(de_payload_addr));
amdgpu_ring_write_multiple(ring, (void *)&de_payload, cnt_de - 2);
}
+
+/* create MQD for each compute queue */
+static int gfx_v8_0_compute_mqd_sw_init(struct amdgpu_device *adev)
+{
+ struct amdgpu_ring *ring = NULL;
+ int r, i;
+
+ /* create MQD for KIQ */
+ ring = &adev->gfx.kiq.ring;
+ if (!ring->mqd_obj) {
+ r = amdgpu_bo_create_kernel(adev, sizeof(struct vi_mqd), PAGE_SIZE,
+ AMDGPU_GEM_DOMAIN_GTT, &ring->mqd_obj,
+ &ring->mqd_gpu_addr, &ring->mqd_ptr);
+ if (r) {
+ dev_warn(adev->dev, "failed to create ring mqd ob (%d)", r);
+ return r;
+ }
+
+ /* prepare MQD backup */
+ adev->gfx.mec.mqd_backup[AMDGPU_MAX_COMPUTE_RINGS] = kmalloc(sizeof(struct vi_mqd), GFP_KERNEL);
+ if (!adev->gfx.mec.mqd_backup[AMDGPU_MAX_COMPUTE_RINGS])
+ dev_warn(adev->dev, "no memory to create MQD backup for ring %s\n", ring->name);
+ }
+
+ /* create MQD for each KCQ */
+ for (i = 0; i < adev->gfx.num_compute_rings; i++) {
+ ring = &adev->gfx.compute_ring[i];
+ if (!ring->mqd_obj) {
+ r = amdgpu_bo_create_kernel(adev, sizeof(struct vi_mqd), PAGE_SIZE,
+ AMDGPU_GEM_DOMAIN_GTT, &ring->mqd_obj,
+ &ring->mqd_gpu_addr, &ring->mqd_ptr);
+ if (r) {
+ dev_warn(adev->dev, "failed to create ring mqd ob (%d)", r);
+ return r;
+ }
+
+ /* prepare MQD backup */
+ adev->gfx.mec.mqd_backup[i] = kmalloc(sizeof(struct vi_mqd), GFP_KERNEL);
+ if (!adev->gfx.mec.mqd_backup[i])
+ dev_warn(adev->dev, "no memory to create MQD backup for ring %s\n", ring->name);
+ }
+ }
+
+ return 0;
+}
+
+static void gfx_v8_0_compute_mqd_sw_fini(struct amdgpu_device *adev)
+{
+ struct amdgpu_ring *ring = NULL;
+ int i;
+
+ for (i = 0; i < adev->gfx.num_compute_rings; i++) {
+ ring = &adev->gfx.compute_ring[i];
+ kfree(adev->gfx.mec.mqd_backup[i]);
+ amdgpu_bo_free_kernel(&ring->mqd_obj,
+ &ring->mqd_gpu_addr,
+ &ring->mqd_ptr);
+ }
+
+ ring = &adev->gfx.kiq.ring;
+ kfree(adev->gfx.mec.mqd_backup[AMDGPU_MAX_COMPUTE_RINGS]);
+ amdgpu_bo_free_kernel(&ring->mqd_obj,
+ &ring->mqd_gpu_addr,
+ &ring->mqd_ptr);
+}
diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c
new file mode 100644
index 000000000000..669bb98fc45d
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c
@@ -0,0 +1,4140 @@
+/*
+ * Copyright 2016 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+#include <linux/firmware.h>
+#include "drmP.h"
+#include "amdgpu.h"
+#include "amdgpu_gfx.h"
+#include "soc15.h"
+#include "soc15d.h"
+
+#include "vega10/soc15ip.h"
+#include "vega10/GC/gc_9_0_offset.h"
+#include "vega10/GC/gc_9_0_sh_mask.h"
+#include "vega10/vega10_enum.h"
+#include "vega10/HDP/hdp_4_0_offset.h"
+
+#include "soc15_common.h"
+#include "clearstate_gfx9.h"
+#include "v9_structs.h"
+
+#define GFX9_NUM_GFX_RINGS 1
+#define GFX9_NUM_COMPUTE_RINGS 8
+#define GFX9_NUM_SE 4
+#define RLCG_UCODE_LOADING_START_ADDRESS 0x2000
+
+MODULE_FIRMWARE("amdgpu/vega10_ce.bin");
+MODULE_FIRMWARE("amdgpu/vega10_pfp.bin");
+MODULE_FIRMWARE("amdgpu/vega10_me.bin");
+MODULE_FIRMWARE("amdgpu/vega10_mec.bin");
+MODULE_FIRMWARE("amdgpu/vega10_mec2.bin");
+MODULE_FIRMWARE("amdgpu/vega10_rlc.bin");
+
+static const struct amdgpu_gds_reg_offset amdgpu_gds_reg_offset[] =
+{
+ {SOC15_REG_OFFSET(GC, 0, mmGDS_VMID0_BASE), SOC15_REG_OFFSET(GC, 0, mmGDS_VMID0_SIZE),
+ SOC15_REG_OFFSET(GC, 0, mmGDS_GWS_VMID0), SOC15_REG_OFFSET(GC, 0, mmGDS_OA_VMID0)},
+ {SOC15_REG_OFFSET(GC, 0, mmGDS_VMID1_BASE), SOC15_REG_OFFSET(GC, 0, mmGDS_VMID1_SIZE),
+ SOC15_REG_OFFSET(GC, 0, mmGDS_GWS_VMID1), SOC15_REG_OFFSET(GC, 0, mmGDS_OA_VMID1)},
+ {SOC15_REG_OFFSET(GC, 0, mmGDS_VMID2_BASE), SOC15_REG_OFFSET(GC, 0, mmGDS_VMID2_SIZE),
+ SOC15_REG_OFFSET(GC, 0, mmGDS_GWS_VMID2), SOC15_REG_OFFSET(GC, 0, mmGDS_OA_VMID2)},
+ {SOC15_REG_OFFSET(GC, 0, mmGDS_VMID3_BASE), SOC15_REG_OFFSET(GC, 0, mmGDS_VMID3_SIZE),
+ SOC15_REG_OFFSET(GC, 0, mmGDS_GWS_VMID3), SOC15_REG_OFFSET(GC, 0, mmGDS_OA_VMID3)},
+ {SOC15_REG_OFFSET(GC, 0, mmGDS_VMID4_BASE), SOC15_REG_OFFSET(GC, 0, mmGDS_VMID4_SIZE),
+ SOC15_REG_OFFSET(GC, 0, mmGDS_GWS_VMID4), SOC15_REG_OFFSET(GC, 0, mmGDS_OA_VMID4)},
+ {SOC15_REG_OFFSET(GC, 0, mmGDS_VMID5_BASE), SOC15_REG_OFFSET(GC, 0, mmGDS_VMID5_SIZE),
+ SOC15_REG_OFFSET(GC, 0, mmGDS_GWS_VMID5), SOC15_REG_OFFSET(GC, 0, mmGDS_OA_VMID5)},
+ {SOC15_REG_OFFSET(GC, 0, mmGDS_VMID6_BASE), SOC15_REG_OFFSET(GC, 0, mmGDS_VMID6_SIZE),
+ SOC15_REG_OFFSET(GC, 0, mmGDS_GWS_VMID6), SOC15_REG_OFFSET(GC, 0, mmGDS_OA_VMID6)},
+ {SOC15_REG_OFFSET(GC, 0, mmGDS_VMID7_BASE), SOC15_REG_OFFSET(GC, 0, mmGDS_VMID7_SIZE),
+ SOC15_REG_OFFSET(GC, 0, mmGDS_GWS_VMID7), SOC15_REG_OFFSET(GC, 0, mmGDS_OA_VMID7)},
+ {SOC15_REG_OFFSET(GC, 0, mmGDS_VMID8_BASE), SOC15_REG_OFFSET(GC, 0, mmGDS_VMID8_SIZE),
+ SOC15_REG_OFFSET(GC, 0, mmGDS_GWS_VMID8), SOC15_REG_OFFSET(GC, 0, mmGDS_OA_VMID8)},
+ {SOC15_REG_OFFSET(GC, 0, mmGDS_VMID9_BASE), SOC15_REG_OFFSET(GC, 0, mmGDS_VMID9_SIZE),
+ SOC15_REG_OFFSET(GC, 0, mmGDS_GWS_VMID9), SOC15_REG_OFFSET(GC, 0, mmGDS_OA_VMID9)},
+ {SOC15_REG_OFFSET(GC, 0, mmGDS_VMID10_BASE), SOC15_REG_OFFSET(GC, 0, mmGDS_VMID10_SIZE),
+ SOC15_REG_OFFSET(GC, 0, mmGDS_GWS_VMID10), SOC15_REG_OFFSET(GC, 0, mmGDS_OA_VMID10)},
+ {SOC15_REG_OFFSET(GC, 0, mmGDS_VMID11_BASE), SOC15_REG_OFFSET(GC, 0, mmGDS_VMID11_SIZE),
+ SOC15_REG_OFFSET(GC, 0, mmGDS_GWS_VMID11), SOC15_REG_OFFSET(GC, 0, mmGDS_OA_VMID11)},
+ {SOC15_REG_OFFSET(GC, 0, mmGDS_VMID12_BASE), SOC15_REG_OFFSET(GC, 0, mmGDS_VMID12_SIZE),
+ SOC15_REG_OFFSET(GC, 0, mmGDS_GWS_VMID12), SOC15_REG_OFFSET(GC, 0, mmGDS_OA_VMID12)},
+ {SOC15_REG_OFFSET(GC, 0, mmGDS_VMID13_BASE), SOC15_REG_OFFSET(GC, 0, mmGDS_VMID13_SIZE),
+ SOC15_REG_OFFSET(GC, 0, mmGDS_GWS_VMID13), SOC15_REG_OFFSET(GC, 0, mmGDS_OA_VMID13)},
+ {SOC15_REG_OFFSET(GC, 0, mmGDS_VMID14_BASE), SOC15_REG_OFFSET(GC, 0, mmGDS_VMID14_SIZE),
+ SOC15_REG_OFFSET(GC, 0, mmGDS_GWS_VMID14), SOC15_REG_OFFSET(GC, 0, mmGDS_OA_VMID14)},
+ {SOC15_REG_OFFSET(GC, 0, mmGDS_VMID15_BASE), SOC15_REG_OFFSET(GC, 0, mmGDS_VMID15_SIZE),
+ SOC15_REG_OFFSET(GC, 0, mmGDS_GWS_VMID15), SOC15_REG_OFFSET(GC, 0, mmGDS_OA_VMID15)}
+};
+
+static const u32 golden_settings_gc_9_0[] =
+{
+ SOC15_REG_OFFSET(GC, 0, mmDB_DEBUG2), 0xf00ffeff, 0x00000400,
+ SOC15_REG_OFFSET(GC, 0, mmPA_SC_BINNER_EVENT_CNTL_3), 0x00000003, 0x82400024,
+ SOC15_REG_OFFSET(GC, 0, mmPA_SC_ENHANCE), 0x3fffffff, 0x00000001,
+ SOC15_REG_OFFSET(GC, 0, mmPA_SC_LINE_STIPPLE_STATE), 0x0000ff0f, 0x00000000,
+ SOC15_REG_OFFSET(GC, 0, mmTA_CNTL_AUX), 0xfffffeef, 0x010b0000,
+ SOC15_REG_OFFSET(GC, 0, mmTCP_CHAN_STEER_HI), 0xffffffff, 0x4a2c0e68,
+ SOC15_REG_OFFSET(GC, 0, mmTCP_CHAN_STEER_LO), 0xffffffff, 0xb5d3f197,
+ SOC15_REG_OFFSET(GC, 0, mmVGT_GS_MAX_WAVE_ID), 0x00000fff, 0x000003ff
+};
+
+static const u32 golden_settings_gc_9_0_vg10[] =
+{
+ SOC15_REG_OFFSET(GC, 0, mmCB_HW_CONTROL), 0x0000f000, 0x00012107,
+ SOC15_REG_OFFSET(GC, 0, mmCB_HW_CONTROL_3), 0x30000000, 0x10000000,
+ SOC15_REG_OFFSET(GC, 0, mmGB_ADDR_CONFIG), 0xffff77ff, 0x2a114042,
+ SOC15_REG_OFFSET(GC, 0, mmGB_ADDR_CONFIG_READ), 0xffff77ff, 0x2a114042,
+ SOC15_REG_OFFSET(GC, 0, mmPA_SC_ENHANCE_1), 0x00008000, 0x00048000,
+ SOC15_REG_OFFSET(GC, 0, mmRMI_UTCL1_CNTL2), 0x00030000, 0x00020000,
+ SOC15_REG_OFFSET(GC, 0, mmTD_CNTL), 0x00001800, 0x00000800,
+ SOC15_REG_OFFSET(GC, 0, mmSPI_CONFIG_CNTL_1),0x0000000f, 0x00000007
+};
+
+#define VEGA10_GB_ADDR_CONFIG_GOLDEN 0x2a114042
+
+static void gfx_v9_0_set_ring_funcs(struct amdgpu_device *adev);
+static void gfx_v9_0_set_irq_funcs(struct amdgpu_device *adev);
+static void gfx_v9_0_set_gds_init(struct amdgpu_device *adev);
+static void gfx_v9_0_set_rlc_funcs(struct amdgpu_device *adev);
+static int gfx_v9_0_get_cu_info(struct amdgpu_device *adev,
+ struct amdgpu_cu_info *cu_info);
+static uint64_t gfx_v9_0_get_gpu_clock_counter(struct amdgpu_device *adev);
+static void gfx_v9_0_select_se_sh(struct amdgpu_device *adev, u32 se_num, u32 sh_num, u32 instance);
+
+static void gfx_v9_0_init_golden_registers(struct amdgpu_device *adev)
+{
+ switch (adev->asic_type) {
+ case CHIP_VEGA10:
+ amdgpu_program_register_sequence(adev,
+ golden_settings_gc_9_0,
+ (const u32)ARRAY_SIZE(golden_settings_gc_9_0));
+ amdgpu_program_register_sequence(adev,
+ golden_settings_gc_9_0_vg10,
+ (const u32)ARRAY_SIZE(golden_settings_gc_9_0_vg10));
+ break;
+ default:
+ break;
+ }
+}
+
+static void gfx_v9_0_scratch_init(struct amdgpu_device *adev)
+{
+ adev->gfx.scratch.num_reg = 7;
+ adev->gfx.scratch.reg_base = SOC15_REG_OFFSET(GC, 0, mmSCRATCH_REG0);
+ adev->gfx.scratch.free_mask = (1u << adev->gfx.scratch.num_reg) - 1;
+}
+
+static void gfx_v9_0_write_data_to_reg(struct amdgpu_ring *ring, int eng_sel,
+ bool wc, uint32_t reg, uint32_t val)
+{
+ amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
+ amdgpu_ring_write(ring, WRITE_DATA_ENGINE_SEL(eng_sel) |
+ WRITE_DATA_DST_SEL(0) |
+ (wc ? WR_CONFIRM : 0));
+ amdgpu_ring_write(ring, reg);
+ amdgpu_ring_write(ring, 0);
+ amdgpu_ring_write(ring, val);
+}
+
+static void gfx_v9_0_wait_reg_mem(struct amdgpu_ring *ring, int eng_sel,
+ int mem_space, int opt, uint32_t addr0,
+ uint32_t addr1, uint32_t ref, uint32_t mask,
+ uint32_t inv)
+{
+ amdgpu_ring_write(ring, PACKET3(PACKET3_WAIT_REG_MEM, 5));
+ amdgpu_ring_write(ring,
+ /* memory (1) or register (0) */
+ (WAIT_REG_MEM_MEM_SPACE(mem_space) |
+ WAIT_REG_MEM_OPERATION(opt) | /* wait */
+ WAIT_REG_MEM_FUNCTION(3) | /* equal */
+ WAIT_REG_MEM_ENGINE(eng_sel)));
+
+ if (mem_space)
+ BUG_ON(addr0 & 0x3); /* Dword align */
+ amdgpu_ring_write(ring, addr0);
+ amdgpu_ring_write(ring, addr1);
+ amdgpu_ring_write(ring, ref);
+ amdgpu_ring_write(ring, mask);
+ amdgpu_ring_write(ring, inv); /* poll interval */
+}
+
+static int gfx_v9_0_ring_test_ring(struct amdgpu_ring *ring)
+{
+ struct amdgpu_device *adev = ring->adev;
+ uint32_t scratch;
+ uint32_t tmp = 0;
+ unsigned i;
+ int r;
+
+ r = amdgpu_gfx_scratch_get(adev, &scratch);
+ if (r) {
+ DRM_ERROR("amdgpu: cp failed to get scratch reg (%d).\n", r);
+ return r;
+ }
+ WREG32(scratch, 0xCAFEDEAD);
+ r = amdgpu_ring_alloc(ring, 3);
+ if (r) {
+ DRM_ERROR("amdgpu: cp failed to lock ring %d (%d).\n",
+ ring->idx, r);
+ amdgpu_gfx_scratch_free(adev, scratch);
+ return r;
+ }
+ amdgpu_ring_write(ring, PACKET3(PACKET3_SET_UCONFIG_REG, 1));
+ amdgpu_ring_write(ring, (scratch - PACKET3_SET_UCONFIG_REG_START));
+ amdgpu_ring_write(ring, 0xDEADBEEF);
+ amdgpu_ring_commit(ring);
+
+ for (i = 0; i < adev->usec_timeout; i++) {
+ tmp = RREG32(scratch);
+ if (tmp == 0xDEADBEEF)
+ break;
+ DRM_UDELAY(1);
+ }
+ if (i < adev->usec_timeout) {
+ DRM_INFO("ring test on %d succeeded in %d usecs\n",
+ ring->idx, i);
+ } else {
+ DRM_ERROR("amdgpu: ring %d test failed (scratch(0x%04X)=0x%08X)\n",
+ ring->idx, scratch, tmp);
+ r = -EINVAL;
+ }
+ amdgpu_gfx_scratch_free(adev, scratch);
+ return r;
+}
+
+static int gfx_v9_0_ring_test_ib(struct amdgpu_ring *ring, long timeout)
+{
+ struct amdgpu_device *adev = ring->adev;
+ struct amdgpu_ib ib;
+ struct dma_fence *f = NULL;
+ uint32_t scratch;
+ uint32_t tmp = 0;
+ long r;
+
+ r = amdgpu_gfx_scratch_get(adev, &scratch);
+ if (r) {
+ DRM_ERROR("amdgpu: failed to get scratch reg (%ld).\n", r);
+ return r;
+ }
+ WREG32(scratch, 0xCAFEDEAD);
+ memset(&ib, 0, sizeof(ib));
+ r = amdgpu_ib_get(adev, NULL, 256, &ib);
+ if (r) {
+ DRM_ERROR("amdgpu: failed to get ib (%ld).\n", r);
+ goto err1;
+ }
+ ib.ptr[0] = PACKET3(PACKET3_SET_UCONFIG_REG, 1);
+ ib.ptr[1] = ((scratch - PACKET3_SET_UCONFIG_REG_START));
+ ib.ptr[2] = 0xDEADBEEF;
+ ib.length_dw = 3;
+
+ r = amdgpu_ib_schedule(ring, 1, &ib, NULL, &f);
+ if (r)
+ goto err2;
+
+ r = dma_fence_wait_timeout(f, false, timeout);
+ if (r == 0) {
+ DRM_ERROR("amdgpu: IB test timed out.\n");
+ r = -ETIMEDOUT;
+ goto err2;
+ } else if (r < 0) {
+ DRM_ERROR("amdgpu: fence wait failed (%ld).\n", r);
+ goto err2;
+ }
+ tmp = RREG32(scratch);
+ if (tmp == 0xDEADBEEF) {
+ DRM_INFO("ib test on ring %d succeeded\n", ring->idx);
+ r = 0;
+ } else {
+ DRM_ERROR("amdgpu: ib test failed (scratch(0x%04X)=0x%08X)\n",
+ scratch, tmp);
+ r = -EINVAL;
+ }
+err2:
+ amdgpu_ib_free(adev, &ib, NULL);
+ dma_fence_put(f);
+err1:
+ amdgpu_gfx_scratch_free(adev, scratch);
+ return r;
+}
+
+static int gfx_v9_0_init_microcode(struct amdgpu_device *adev)
+{
+ const char *chip_name;
+ char fw_name[30];
+ int err;
+ struct amdgpu_firmware_info *info = NULL;
+ const struct common_firmware_header *header = NULL;
+ const struct gfx_firmware_header_v1_0 *cp_hdr;
+
+ DRM_DEBUG("\n");
+
+ switch (adev->asic_type) {
+ case CHIP_VEGA10:
+ chip_name = "vega10";
+ break;
+ default:
+ BUG();
+ }
+
+ snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_pfp.bin", chip_name);
+ err = request_firmware(&adev->gfx.pfp_fw, fw_name, adev->dev);
+ if (err)
+ goto out;
+ err = amdgpu_ucode_validate(adev->gfx.pfp_fw);
+ if (err)
+ goto out;
+ cp_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.pfp_fw->data;
+ adev->gfx.pfp_fw_version = le32_to_cpu(cp_hdr->header.ucode_version);
+ adev->gfx.pfp_feature_version = le32_to_cpu(cp_hdr->ucode_feature_version);
+
+ snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_me.bin", chip_name);
+ err = request_firmware(&adev->gfx.me_fw, fw_name, adev->dev);
+ if (err)
+ goto out;
+ err = amdgpu_ucode_validate(adev->gfx.me_fw);
+ if (err)
+ goto out;
+ cp_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.me_fw->data;
+ adev->gfx.me_fw_version = le32_to_cpu(cp_hdr->header.ucode_version);
+ adev->gfx.me_feature_version = le32_to_cpu(cp_hdr->ucode_feature_version);
+
+ snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_ce.bin", chip_name);
+ err = request_firmware(&adev->gfx.ce_fw, fw_name, adev->dev);
+ if (err)
+ goto out;
+ err = amdgpu_ucode_validate(adev->gfx.ce_fw);
+ if (err)
+ goto out;
+ cp_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.ce_fw->data;
+ adev->gfx.ce_fw_version = le32_to_cpu(cp_hdr->header.ucode_version);
+ adev->gfx.ce_feature_version = le32_to_cpu(cp_hdr->ucode_feature_version);
+
+ snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_rlc.bin", chip_name);
+ err = request_firmware(&adev->gfx.rlc_fw, fw_name, adev->dev);
+ if (err)
+ goto out;
+ err = amdgpu_ucode_validate(adev->gfx.rlc_fw);
+ cp_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.rlc_fw->data;
+ adev->gfx.rlc_fw_version = le32_to_cpu(cp_hdr->header.ucode_version);
+ adev->gfx.rlc_feature_version = le32_to_cpu(cp_hdr->ucode_feature_version);
+
+ snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_mec.bin", chip_name);
+ err = request_firmware(&adev->gfx.mec_fw, fw_name, adev->dev);
+ if (err)
+ goto out;
+ err = amdgpu_ucode_validate(adev->gfx.mec_fw);
+ if (err)
+ goto out;
+ cp_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.mec_fw->data;
+ adev->gfx.mec_fw_version = le32_to_cpu(cp_hdr->header.ucode_version);
+ adev->gfx.mec_feature_version = le32_to_cpu(cp_hdr->ucode_feature_version);
+
+
+ snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_mec2.bin", chip_name);
+ err = request_firmware(&adev->gfx.mec2_fw, fw_name, adev->dev);
+ if (!err) {
+ err = amdgpu_ucode_validate(adev->gfx.mec2_fw);
+ if (err)
+ goto out;
+ cp_hdr = (const struct gfx_firmware_header_v1_0 *)
+ adev->gfx.mec2_fw->data;
+ adev->gfx.mec2_fw_version =
+ le32_to_cpu(cp_hdr->header.ucode_version);
+ adev->gfx.mec2_feature_version =
+ le32_to_cpu(cp_hdr->ucode_feature_version);
+ } else {
+ err = 0;
+ adev->gfx.mec2_fw = NULL;
+ }
+
+ if (adev->firmware.load_type == AMDGPU_FW_LOAD_PSP) {
+ info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_PFP];
+ info->ucode_id = AMDGPU_UCODE_ID_CP_PFP;
+ info->fw = adev->gfx.pfp_fw;
+ header = (const struct common_firmware_header *)info->fw->data;
+ adev->firmware.fw_size +=
+ ALIGN(le32_to_cpu(header->ucode_size_bytes), PAGE_SIZE);
+
+ info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_ME];
+ info->ucode_id = AMDGPU_UCODE_ID_CP_ME;
+ info->fw = adev->gfx.me_fw;
+ header = (const struct common_firmware_header *)info->fw->data;
+ adev->firmware.fw_size +=
+ ALIGN(le32_to_cpu(header->ucode_size_bytes), PAGE_SIZE);
+
+ info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_CE];
+ info->ucode_id = AMDGPU_UCODE_ID_CP_CE;
+ info->fw = adev->gfx.ce_fw;
+ header = (const struct common_firmware_header *)info->fw->data;
+ adev->firmware.fw_size +=
+ ALIGN(le32_to_cpu(header->ucode_size_bytes), PAGE_SIZE);
+
+ info = &adev->firmware.ucode[AMDGPU_UCODE_ID_RLC_G];
+ info->ucode_id = AMDGPU_UCODE_ID_RLC_G;
+ info->fw = adev->gfx.rlc_fw;
+ header = (const struct common_firmware_header *)info->fw->data;
+ adev->firmware.fw_size +=
+ ALIGN(le32_to_cpu(header->ucode_size_bytes), PAGE_SIZE);
+
+ info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_MEC1];
+ info->ucode_id = AMDGPU_UCODE_ID_CP_MEC1;
+ info->fw = adev->gfx.mec_fw;
+ header = (const struct common_firmware_header *)info->fw->data;
+ cp_hdr = (const struct gfx_firmware_header_v1_0 *)info->fw->data;
+ adev->firmware.fw_size +=
+ ALIGN(le32_to_cpu(header->ucode_size_bytes) - le32_to_cpu(cp_hdr->jt_size) * 4, PAGE_SIZE);
+
+ info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_MEC1_JT];
+ info->ucode_id = AMDGPU_UCODE_ID_CP_MEC1_JT;
+ info->fw = adev->gfx.mec_fw;
+ adev->firmware.fw_size +=
+ ALIGN(le32_to_cpu(cp_hdr->jt_size) * 4, PAGE_SIZE);
+
+ if (adev->gfx.mec2_fw) {
+ info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_MEC2];
+ info->ucode_id = AMDGPU_UCODE_ID_CP_MEC2;
+ info->fw = adev->gfx.mec2_fw;
+ header = (const struct common_firmware_header *)info->fw->data;
+ cp_hdr = (const struct gfx_firmware_header_v1_0 *)info->fw->data;
+ adev->firmware.fw_size +=
+ ALIGN(le32_to_cpu(header->ucode_size_bytes) - le32_to_cpu(cp_hdr->jt_size) * 4, PAGE_SIZE);
+ info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_MEC2_JT];
+ info->ucode_id = AMDGPU_UCODE_ID_CP_MEC2_JT;
+ info->fw = adev->gfx.mec2_fw;
+ adev->firmware.fw_size +=
+ ALIGN(le32_to_cpu(cp_hdr->jt_size) * 4, PAGE_SIZE);
+ }
+
+ }
+
+out:
+ if (err) {
+ dev_err(adev->dev,
+ "gfx9: Failed to load firmware \"%s\"\n",
+ fw_name);
+ release_firmware(adev->gfx.pfp_fw);
+ adev->gfx.pfp_fw = NULL;
+ release_firmware(adev->gfx.me_fw);
+ adev->gfx.me_fw = NULL;
+ release_firmware(adev->gfx.ce_fw);
+ adev->gfx.ce_fw = NULL;
+ release_firmware(adev->gfx.rlc_fw);
+ adev->gfx.rlc_fw = NULL;
+ release_firmware(adev->gfx.mec_fw);
+ adev->gfx.mec_fw = NULL;
+ release_firmware(adev->gfx.mec2_fw);
+ adev->gfx.mec2_fw = NULL;
+ }
+ return err;
+}
+
+static void gfx_v9_0_mec_fini(struct amdgpu_device *adev)
+{
+ int r;
+
+ if (adev->gfx.mec.hpd_eop_obj) {
+ r = amdgpu_bo_reserve(adev->gfx.mec.hpd_eop_obj, false);
+ if (unlikely(r != 0))
+ dev_warn(adev->dev, "(%d) reserve HPD EOP bo failed\n", r);
+ amdgpu_bo_unpin(adev->gfx.mec.hpd_eop_obj);
+ amdgpu_bo_unreserve(adev->gfx.mec.hpd_eop_obj);
+
+ amdgpu_bo_unref(&adev->gfx.mec.hpd_eop_obj);
+ adev->gfx.mec.hpd_eop_obj = NULL;
+ }
+ if (adev->gfx.mec.mec_fw_obj) {
+ r = amdgpu_bo_reserve(adev->gfx.mec.mec_fw_obj, false);
+ if (unlikely(r != 0))
+ dev_warn(adev->dev, "(%d) reserve mec firmware bo failed\n", r);
+ amdgpu_bo_unpin(adev->gfx.mec.mec_fw_obj);
+ amdgpu_bo_unreserve(adev->gfx.mec.mec_fw_obj);
+
+ amdgpu_bo_unref(&adev->gfx.mec.mec_fw_obj);
+ adev->gfx.mec.mec_fw_obj = NULL;
+ }
+}
+
+#define MEC_HPD_SIZE 2048
+
+static int gfx_v9_0_mec_init(struct amdgpu_device *adev)
+{
+ int r;
+ u32 *hpd;
+ const __le32 *fw_data;
+ unsigned fw_size;
+ u32 *fw;
+
+ const struct gfx_firmware_header_v1_0 *mec_hdr;
+
+ /*
+ * we assign only 1 pipe because all other pipes will
+ * be handled by KFD
+ */
+ adev->gfx.mec.num_mec = 1;
+ adev->gfx.mec.num_pipe = 1;
+ adev->gfx.mec.num_queue = adev->gfx.mec.num_mec * adev->gfx.mec.num_pipe * 8;
+
+ if (adev->gfx.mec.hpd_eop_obj == NULL) {
+ r = amdgpu_bo_create(adev,
+ adev->gfx.mec.num_queue * MEC_HPD_SIZE,
+ PAGE_SIZE, true,
+ AMDGPU_GEM_DOMAIN_GTT, 0, NULL, NULL,
+ &adev->gfx.mec.hpd_eop_obj);
+ if (r) {
+ dev_warn(adev->dev, "(%d) create HDP EOP bo failed\n", r);
+ return r;
+ }
+ }
+
+ r = amdgpu_bo_reserve(adev->gfx.mec.hpd_eop_obj, false);
+ if (unlikely(r != 0)) {
+ gfx_v9_0_mec_fini(adev);
+ return r;
+ }
+ r = amdgpu_bo_pin(adev->gfx.mec.hpd_eop_obj, AMDGPU_GEM_DOMAIN_GTT,
+ &adev->gfx.mec.hpd_eop_gpu_addr);
+ if (r) {
+ dev_warn(adev->dev, "(%d) pin HDP EOP bo failed\n", r);
+ gfx_v9_0_mec_fini(adev);
+ return r;
+ }
+ r = amdgpu_bo_kmap(adev->gfx.mec.hpd_eop_obj, (void **)&hpd);
+ if (r) {
+ dev_warn(adev->dev, "(%d) map HDP EOP bo failed\n", r);
+ gfx_v9_0_mec_fini(adev);
+ return r;
+ }
+
+ memset(hpd, 0, adev->gfx.mec.hpd_eop_obj->tbo.mem.size);
+
+ amdgpu_bo_kunmap(adev->gfx.mec.hpd_eop_obj);
+ amdgpu_bo_unreserve(adev->gfx.mec.hpd_eop_obj);
+
+ mec_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.mec_fw->data;
+
+ fw_data = (const __le32 *)
+ (adev->gfx.mec_fw->data +
+ le32_to_cpu(mec_hdr->header.ucode_array_offset_bytes));
+ fw_size = le32_to_cpu(mec_hdr->header.ucode_size_bytes) / 4;
+
+ if (adev->gfx.mec.mec_fw_obj == NULL) {
+ r = amdgpu_bo_create(adev,
+ mec_hdr->header.ucode_size_bytes,
+ PAGE_SIZE, true,
+ AMDGPU_GEM_DOMAIN_GTT, 0, NULL, NULL,
+ &adev->gfx.mec.mec_fw_obj);
+ if (r) {
+ dev_warn(adev->dev, "(%d) create mec firmware bo failed\n", r);
+ return r;
+ }
+ }
+
+ r = amdgpu_bo_reserve(adev->gfx.mec.mec_fw_obj, false);
+ if (unlikely(r != 0)) {
+ gfx_v9_0_mec_fini(adev);
+ return r;
+ }
+ r = amdgpu_bo_pin(adev->gfx.mec.mec_fw_obj, AMDGPU_GEM_DOMAIN_GTT,
+ &adev->gfx.mec.mec_fw_gpu_addr);
+ if (r) {
+ dev_warn(adev->dev, "(%d) pin mec firmware bo failed\n", r);
+ gfx_v9_0_mec_fini(adev);
+ return r;
+ }
+ r = amdgpu_bo_kmap(adev->gfx.mec.mec_fw_obj, (void **)&fw);
+ if (r) {
+ dev_warn(adev->dev, "(%d) map firmware bo failed\n", r);
+ gfx_v9_0_mec_fini(adev);
+ return r;
+ }
+ memcpy(fw, fw_data, fw_size);
+
+ amdgpu_bo_kunmap(adev->gfx.mec.mec_fw_obj);
+ amdgpu_bo_unreserve(adev->gfx.mec.mec_fw_obj);
+
+
+ return 0;
+}
+
+static void gfx_v9_0_kiq_fini(struct amdgpu_device *adev)
+{
+ struct amdgpu_kiq *kiq = &adev->gfx.kiq;
+
+ amdgpu_bo_free_kernel(&kiq->eop_obj, &kiq->eop_gpu_addr, NULL);
+}
+
+static int gfx_v9_0_kiq_init(struct amdgpu_device *adev)
+{
+ int r;
+ u32 *hpd;
+ struct amdgpu_kiq *kiq = &adev->gfx.kiq;
+
+ r = amdgpu_bo_create_kernel(adev, MEC_HPD_SIZE, PAGE_SIZE,
+ AMDGPU_GEM_DOMAIN_GTT, &kiq->eop_obj,
+ &kiq->eop_gpu_addr, (void **)&hpd);
+ if (r) {
+ dev_warn(adev->dev, "failed to create KIQ bo (%d).\n", r);
+ return r;
+ }
+
+ memset(hpd, 0, MEC_HPD_SIZE);
+
+ r = amdgpu_bo_reserve(kiq->eop_obj, false);
+ if (unlikely(r != 0))
+ dev_warn(adev->dev, "(%d) reserve kiq eop bo failed\n", r);
+ amdgpu_bo_kunmap(kiq->eop_obj);
+ amdgpu_bo_unreserve(kiq->eop_obj);
+
+ return 0;
+}
+
+static int gfx_v9_0_kiq_init_ring(struct amdgpu_device *adev,
+ struct amdgpu_ring *ring,
+ struct amdgpu_irq_src *irq)
+{
+ struct amdgpu_kiq *kiq = &adev->gfx.kiq;
+ int r = 0;
+
+ r = amdgpu_wb_get(adev, &adev->virt.reg_val_offs);
+ if (r)
+ return r;
+
+ ring->adev = NULL;
+ ring->ring_obj = NULL;
+ ring->use_doorbell = true;
+ ring->doorbell_index = AMDGPU_DOORBELL_KIQ;
+ if (adev->gfx.mec2_fw) {
+ ring->me = 2;
+ ring->pipe = 0;
+ } else {
+ ring->me = 1;
+ ring->pipe = 1;
+ }
+
+ irq->data = ring;
+ ring->queue = 0;
+ ring->eop_gpu_addr = kiq->eop_gpu_addr;
+ sprintf(ring->name, "kiq %d.%d.%d", ring->me, ring->pipe, ring->queue);
+ r = amdgpu_ring_init(adev, ring, 1024,
+ irq, AMDGPU_CP_KIQ_IRQ_DRIVER0);
+ if (r)
+ dev_warn(adev->dev, "(%d) failed to init kiq ring\n", r);
+
+ return r;
+}
+static void gfx_v9_0_kiq_free_ring(struct amdgpu_ring *ring,
+ struct amdgpu_irq_src *irq)
+{
+ amdgpu_wb_free(ring->adev, ring->adev->virt.reg_val_offs);
+ amdgpu_ring_fini(ring);
+ irq->data = NULL;
+}
+
+/* create MQD for each compute queue */
+static int gfx_v9_0_compute_mqd_sw_init(struct amdgpu_device *adev)
+{
+ struct amdgpu_ring *ring = NULL;
+ int r, i;
+
+ /* create MQD for KIQ */
+ ring = &adev->gfx.kiq.ring;
+ if (!ring->mqd_obj) {
+ r = amdgpu_bo_create_kernel(adev, sizeof(struct v9_mqd), PAGE_SIZE,
+ AMDGPU_GEM_DOMAIN_GTT, &ring->mqd_obj,
+ &ring->mqd_gpu_addr, (void **)&ring->mqd_ptr);
+ if (r) {
+ dev_warn(adev->dev, "failed to create ring mqd ob (%d)", r);
+ return r;
+ }
+
+ /*TODO: prepare MQD backup */
+ }
+
+ /* create MQD for each KCQ */
+ for (i = 0; i < adev->gfx.num_compute_rings; i++) {
+ ring = &adev->gfx.compute_ring[i];
+ if (!ring->mqd_obj) {
+ r = amdgpu_bo_create_kernel(adev, sizeof(struct v9_mqd), PAGE_SIZE,
+ AMDGPU_GEM_DOMAIN_GTT, &ring->mqd_obj,
+ &ring->mqd_gpu_addr, (void **)&ring->mqd_ptr);
+ if (r) {
+ dev_warn(adev->dev, "failed to create ring mqd ob (%d)", r);
+ return r;
+ }
+
+ /* TODO: prepare MQD backup */
+ }
+ }
+
+ return 0;
+}
+
+static void gfx_v9_0_compute_mqd_sw_fini(struct amdgpu_device *adev)
+{
+ struct amdgpu_ring *ring = NULL;
+ int i;
+
+ for (i = 0; i < adev->gfx.num_compute_rings; i++) {
+ ring = &adev->gfx.compute_ring[i];
+ amdgpu_bo_free_kernel(&ring->mqd_obj, &ring->mqd_gpu_addr, (void **)&ring->mqd_ptr);
+ }
+
+ ring = &adev->gfx.kiq.ring;
+ amdgpu_bo_free_kernel(&ring->mqd_obj, &ring->mqd_gpu_addr, (void **)&ring->mqd_ptr);
+}
+
+static uint32_t wave_read_ind(struct amdgpu_device *adev, uint32_t simd, uint32_t wave, uint32_t address)
+{
+ WREG32(SOC15_REG_OFFSET(GC, 0, mmSQ_IND_INDEX),
+ (wave << SQ_IND_INDEX__WAVE_ID__SHIFT) |
+ (simd << SQ_IND_INDEX__SIMD_ID__SHIFT) |
+ (address << SQ_IND_INDEX__INDEX__SHIFT) |
+ (SQ_IND_INDEX__FORCE_READ_MASK));
+ return RREG32(SOC15_REG_OFFSET(GC, 0, mmSQ_IND_DATA));
+}
+
+static void wave_read_regs(struct amdgpu_device *adev, uint32_t simd,
+ uint32_t wave, uint32_t thread,
+ uint32_t regno, uint32_t num, uint32_t *out)
+{
+ WREG32(SOC15_REG_OFFSET(GC, 0, mmSQ_IND_INDEX),
+ (wave << SQ_IND_INDEX__WAVE_ID__SHIFT) |
+ (simd << SQ_IND_INDEX__SIMD_ID__SHIFT) |
+ (regno << SQ_IND_INDEX__INDEX__SHIFT) |
+ (thread << SQ_IND_INDEX__THREAD_ID__SHIFT) |
+ (SQ_IND_INDEX__FORCE_READ_MASK) |
+ (SQ_IND_INDEX__AUTO_INCR_MASK));
+ while (num--)
+ *(out++) = RREG32(SOC15_REG_OFFSET(GC, 0, mmSQ_IND_DATA));
+}
+
+static void gfx_v9_0_read_wave_data(struct amdgpu_device *adev, uint32_t simd, uint32_t wave, uint32_t *dst, int *no_fields)
+{
+ /* type 1 wave data */
+ dst[(*no_fields)++] = 1;
+ dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_STATUS);
+ dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_PC_LO);
+ dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_PC_HI);
+ dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_EXEC_LO);
+ dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_EXEC_HI);
+ dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_HW_ID);
+ dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_INST_DW0);
+ dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_INST_DW1);
+ dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_GPR_ALLOC);
+ dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_LDS_ALLOC);
+ dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_TRAPSTS);
+ dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_IB_STS);
+ dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_IB_DBG0);
+ dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_M0);
+}
+
+static void gfx_v9_0_read_wave_sgprs(struct amdgpu_device *adev, uint32_t simd,
+ uint32_t wave, uint32_t start,
+ uint32_t size, uint32_t *dst)
+{
+ wave_read_regs(
+ adev, simd, wave, 0,
+ start + SQIND_WAVE_SGPRS_OFFSET, size, dst);
+}
+
+
+static const struct amdgpu_gfx_funcs gfx_v9_0_gfx_funcs = {
+ .get_gpu_clock_counter = &gfx_v9_0_get_gpu_clock_counter,
+ .select_se_sh = &gfx_v9_0_select_se_sh,
+ .read_wave_data = &gfx_v9_0_read_wave_data,
+ .read_wave_sgprs = &gfx_v9_0_read_wave_sgprs,
+};
+
+static void gfx_v9_0_gpu_early_init(struct amdgpu_device *adev)
+{
+ u32 gb_addr_config;
+
+ adev->gfx.funcs = &gfx_v9_0_gfx_funcs;
+
+ switch (adev->asic_type) {
+ case CHIP_VEGA10:
+ adev->gfx.config.max_shader_engines = 4;
+ adev->gfx.config.max_tile_pipes = 8; //??
+ adev->gfx.config.max_cu_per_sh = 16;
+ adev->gfx.config.max_sh_per_se = 1;
+ adev->gfx.config.max_backends_per_se = 4;
+ adev->gfx.config.max_texture_channel_caches = 16;
+ adev->gfx.config.max_gprs = 256;
+ adev->gfx.config.max_gs_threads = 32;
+ adev->gfx.config.max_hw_contexts = 8;
+
+ adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
+ adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
+ adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
+ adev->gfx.config.sc_earlyz_tile_fifo_size = 0x4C0;
+ gb_addr_config = VEGA10_GB_ADDR_CONFIG_GOLDEN;
+ break;
+ default:
+ BUG();
+ break;
+ }
+
+ adev->gfx.config.gb_addr_config = gb_addr_config;
+
+ adev->gfx.config.gb_addr_config_fields.num_pipes = 1 <<
+ REG_GET_FIELD(
+ adev->gfx.config.gb_addr_config,
+ GB_ADDR_CONFIG,
+ NUM_PIPES);
+ adev->gfx.config.gb_addr_config_fields.num_banks = 1 <<
+ REG_GET_FIELD(
+ adev->gfx.config.gb_addr_config,
+ GB_ADDR_CONFIG,
+ NUM_BANKS);
+ adev->gfx.config.gb_addr_config_fields.max_compress_frags = 1 <<
+ REG_GET_FIELD(
+ adev->gfx.config.gb_addr_config,
+ GB_ADDR_CONFIG,
+ MAX_COMPRESSED_FRAGS);
+ adev->gfx.config.gb_addr_config_fields.num_rb_per_se = 1 <<
+ REG_GET_FIELD(
+ adev->gfx.config.gb_addr_config,
+ GB_ADDR_CONFIG,
+ NUM_RB_PER_SE);
+ adev->gfx.config.gb_addr_config_fields.num_se = 1 <<
+ REG_GET_FIELD(
+ adev->gfx.config.gb_addr_config,
+ GB_ADDR_CONFIG,
+ NUM_SHADER_ENGINES);
+ adev->gfx.config.gb_addr_config_fields.pipe_interleave_size = 1 << (8 +
+ REG_GET_FIELD(
+ adev->gfx.config.gb_addr_config,
+ GB_ADDR_CONFIG,
+ PIPE_INTERLEAVE_SIZE));
+}
+
+static int gfx_v9_0_ngg_create_buf(struct amdgpu_device *adev,
+ struct amdgpu_ngg_buf *ngg_buf,
+ int size_se,
+ int default_size_se)
+{
+ int r;
+
+ if (size_se < 0) {
+ dev_err(adev->dev, "Buffer size is invalid: %d\n", size_se);
+ return -EINVAL;
+ }
+ size_se = size_se ? size_se : default_size_se;
+
+ ngg_buf->size = size_se * GFX9_NUM_SE;
+ r = amdgpu_bo_create_kernel(adev, ngg_buf->size,
+ PAGE_SIZE, AMDGPU_GEM_DOMAIN_VRAM,
+ &ngg_buf->bo,
+ &ngg_buf->gpu_addr,
+ NULL);
+ if (r) {
+ dev_err(adev->dev, "(%d) failed to create NGG buffer\n", r);
+ return r;
+ }
+ ngg_buf->bo_size = amdgpu_bo_size(ngg_buf->bo);
+
+ return r;
+}
+
+static int gfx_v9_0_ngg_fini(struct amdgpu_device *adev)
+{
+ int i;
+
+ for (i = 0; i < NGG_BUF_MAX; i++)
+ amdgpu_bo_free_kernel(&adev->gfx.ngg.buf[i].bo,
+ &adev->gfx.ngg.buf[i].gpu_addr,
+ NULL);
+
+ memset(&adev->gfx.ngg.buf[0], 0,
+ sizeof(struct amdgpu_ngg_buf) * NGG_BUF_MAX);
+
+ adev->gfx.ngg.init = false;
+
+ return 0;
+}
+
+static int gfx_v9_0_ngg_init(struct amdgpu_device *adev)
+{
+ int r;
+
+ if (!amdgpu_ngg || adev->gfx.ngg.init == true)
+ return 0;
+
+ /* GDS reserve memory: 64 bytes alignment */
+ adev->gfx.ngg.gds_reserve_size = ALIGN(5 * 4, 0x40);
+ adev->gds.mem.total_size -= adev->gfx.ngg.gds_reserve_size;
+ adev->gds.mem.gfx_partition_size -= adev->gfx.ngg.gds_reserve_size;
+ adev->gfx.ngg.gds_reserve_addr = amdgpu_gds_reg_offset[0].mem_base;
+ adev->gfx.ngg.gds_reserve_addr += adev->gds.mem.gfx_partition_size;
+
+ /* Primitive Buffer */
+ r = gfx_v9_0_ngg_create_buf(adev, &adev->gfx.ngg.buf[PRIM],
+ amdgpu_prim_buf_per_se,
+ 64 * 1024);
+ if (r) {
+ dev_err(adev->dev, "Failed to create Primitive Buffer\n");
+ goto err;
+ }
+
+ /* Position Buffer */
+ r = gfx_v9_0_ngg_create_buf(adev, &adev->gfx.ngg.buf[POS],
+ amdgpu_pos_buf_per_se,
+ 256 * 1024);
+ if (r) {
+ dev_err(adev->dev, "Failed to create Position Buffer\n");
+ goto err;
+ }
+
+ /* Control Sideband */
+ r = gfx_v9_0_ngg_create_buf(adev, &adev->gfx.ngg.buf[CNTL],
+ amdgpu_cntl_sb_buf_per_se,
+ 256);
+ if (r) {
+ dev_err(adev->dev, "Failed to create Control Sideband Buffer\n");
+ goto err;
+ }
+
+ /* Parameter Cache, not created by default */
+ if (amdgpu_param_buf_per_se <= 0)
+ goto out;
+
+ r = gfx_v9_0_ngg_create_buf(adev, &adev->gfx.ngg.buf[PARAM],
+ amdgpu_param_buf_per_se,
+ 512 * 1024);
+ if (r) {
+ dev_err(adev->dev, "Failed to create Parameter Cache\n");
+ goto err;
+ }
+
+out:
+ adev->gfx.ngg.init = true;
+ return 0;
+err:
+ gfx_v9_0_ngg_fini(adev);
+ return r;
+}
+
+static int gfx_v9_0_ngg_en(struct amdgpu_device *adev)
+{
+ struct amdgpu_ring *ring = &adev->gfx.gfx_ring[0];
+ int r;
+ u32 data;
+ u32 size;
+ u32 base;
+
+ if (!amdgpu_ngg)
+ return 0;
+
+ /* Program buffer size */
+ data = 0;
+ size = adev->gfx.ngg.buf[PRIM].size / 256;
+ data = REG_SET_FIELD(data, WD_BUF_RESOURCE_1, INDEX_BUF_SIZE, size);
+
+ size = adev->gfx.ngg.buf[POS].size / 256;
+ data = REG_SET_FIELD(data, WD_BUF_RESOURCE_1, POS_BUF_SIZE, size);
+
+ WREG32(SOC15_REG_OFFSET(GC, 0, mmWD_BUF_RESOURCE_1), data);
+
+ data = 0;
+ size = adev->gfx.ngg.buf[CNTL].size / 256;
+ data = REG_SET_FIELD(data, WD_BUF_RESOURCE_2, CNTL_SB_BUF_SIZE, size);
+
+ size = adev->gfx.ngg.buf[PARAM].size / 1024;
+ data = REG_SET_FIELD(data, WD_BUF_RESOURCE_2, PARAM_BUF_SIZE, size);
+
+ WREG32(SOC15_REG_OFFSET(GC, 0, mmWD_BUF_RESOURCE_2), data);
+
+ /* Program buffer base address */
+ base = lower_32_bits(adev->gfx.ngg.buf[PRIM].gpu_addr);
+ data = REG_SET_FIELD(0, WD_INDEX_BUF_BASE, BASE, base);
+ WREG32(SOC15_REG_OFFSET(GC, 0, mmWD_INDEX_BUF_BASE), data);
+
+ base = upper_32_bits(adev->gfx.ngg.buf[PRIM].gpu_addr);
+ data = REG_SET_FIELD(0, WD_INDEX_BUF_BASE_HI, BASE_HI, base);
+ WREG32(SOC15_REG_OFFSET(GC, 0, mmWD_INDEX_BUF_BASE_HI), data);
+
+ base = lower_32_bits(adev->gfx.ngg.buf[POS].gpu_addr);
+ data = REG_SET_FIELD(0, WD_POS_BUF_BASE, BASE, base);
+ WREG32(SOC15_REG_OFFSET(GC, 0, mmWD_POS_BUF_BASE), data);
+
+ base = upper_32_bits(adev->gfx.ngg.buf[POS].gpu_addr);
+ data = REG_SET_FIELD(0, WD_POS_BUF_BASE_HI, BASE_HI, base);
+ WREG32(SOC15_REG_OFFSET(GC, 0, mmWD_POS_BUF_BASE_HI), data);
+
+ base = lower_32_bits(adev->gfx.ngg.buf[CNTL].gpu_addr);
+ data = REG_SET_FIELD(0, WD_CNTL_SB_BUF_BASE, BASE, base);
+ WREG32(SOC15_REG_OFFSET(GC, 0, mmWD_CNTL_SB_BUF_BASE), data);
+
+ base = upper_32_bits(adev->gfx.ngg.buf[CNTL].gpu_addr);
+ data = REG_SET_FIELD(0, WD_CNTL_SB_BUF_BASE_HI, BASE_HI, base);
+ WREG32(SOC15_REG_OFFSET(GC, 0, mmWD_CNTL_SB_BUF_BASE_HI), data);
+
+ /* Clear GDS reserved memory */
+ r = amdgpu_ring_alloc(ring, 17);
+ if (r) {
+ DRM_ERROR("amdgpu: NGG failed to lock ring %d (%d).\n",
+ ring->idx, r);
+ return r;
+ }
+
+ gfx_v9_0_write_data_to_reg(ring, 0, false,
+ amdgpu_gds_reg_offset[0].mem_size,
+ (adev->gds.mem.total_size +
+ adev->gfx.ngg.gds_reserve_size) >>
+ AMDGPU_GDS_SHIFT);
+
+ amdgpu_ring_write(ring, PACKET3(PACKET3_DMA_DATA, 5));
+ amdgpu_ring_write(ring, (PACKET3_DMA_DATA_CP_SYNC |
+ PACKET3_DMA_DATA_SRC_SEL(2)));
+ amdgpu_ring_write(ring, 0);
+ amdgpu_ring_write(ring, 0);
+ amdgpu_ring_write(ring, adev->gfx.ngg.gds_reserve_addr);
+ amdgpu_ring_write(ring, 0);
+ amdgpu_ring_write(ring, adev->gfx.ngg.gds_reserve_size);
+
+
+ gfx_v9_0_write_data_to_reg(ring, 0, false,
+ amdgpu_gds_reg_offset[0].mem_size, 0);
+
+ amdgpu_ring_commit(ring);
+
+ return 0;
+}
+
+static int gfx_v9_0_sw_init(void *handle)
+{
+ int i, r;
+ struct amdgpu_ring *ring;
+ struct amdgpu_kiq *kiq;
+ struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+
+ /* KIQ event */
+ r = amdgpu_irq_add_id(adev, AMDGPU_IH_CLIENTID_GRBM_CP, 178, &adev->gfx.kiq.irq);
+ if (r)
+ return r;
+
+ /* EOP Event */
+ r = amdgpu_irq_add_id(adev, AMDGPU_IH_CLIENTID_GRBM_CP, 181, &adev->gfx.eop_irq);
+ if (r)
+ return r;
+
+ /* Privileged reg */
+ r = amdgpu_irq_add_id(adev, AMDGPU_IH_CLIENTID_GRBM_CP, 184,
+ &adev->gfx.priv_reg_irq);
+ if (r)
+ return r;
+
+ /* Privileged inst */
+ r = amdgpu_irq_add_id(adev, AMDGPU_IH_CLIENTID_GRBM_CP, 185,
+ &adev->gfx.priv_inst_irq);
+ if (r)
+ return r;
+
+ adev->gfx.gfx_current_status = AMDGPU_GFX_NORMAL_MODE;
+
+ gfx_v9_0_scratch_init(adev);
+
+ r = gfx_v9_0_init_microcode(adev);
+ if (r) {
+ DRM_ERROR("Failed to load gfx firmware!\n");
+ return r;
+ }
+
+ r = gfx_v9_0_mec_init(adev);
+ if (r) {
+ DRM_ERROR("Failed to init MEC BOs!\n");
+ return r;
+ }
+
+ /* set up the gfx ring */
+ for (i = 0; i < adev->gfx.num_gfx_rings; i++) {
+ ring = &adev->gfx.gfx_ring[i];
+ ring->ring_obj = NULL;
+ sprintf(ring->name, "gfx");
+ ring->use_doorbell = true;
+ ring->doorbell_index = AMDGPU_DOORBELL64_GFX_RING0 << 1;
+ r = amdgpu_ring_init(adev, ring, 1024,
+ &adev->gfx.eop_irq, AMDGPU_CP_IRQ_GFX_EOP);
+ if (r)
+ return r;
+ }
+
+ /* set up the compute queues */
+ for (i = 0; i < adev->gfx.num_compute_rings; i++) {
+ unsigned irq_type;
+
+ /* max 32 queues per MEC */
+ if ((i >= 32) || (i >= AMDGPU_MAX_COMPUTE_RINGS)) {
+ DRM_ERROR("Too many (%d) compute rings!\n", i);
+ break;
+ }
+ ring = &adev->gfx.compute_ring[i];
+ ring->ring_obj = NULL;
+ ring->use_doorbell = true;
+ ring->doorbell_index = (AMDGPU_DOORBELL64_MEC_RING0 + i) << 1;
+ ring->me = 1; /* first MEC */
+ ring->pipe = i / 8;
+ ring->queue = i % 8;
+ ring->eop_gpu_addr = adev->gfx.mec.hpd_eop_gpu_addr + (i * MEC_HPD_SIZE);
+ sprintf(ring->name, "comp %d.%d.%d", ring->me, ring->pipe, ring->queue);
+ irq_type = AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE0_EOP + ring->pipe;
+ /* type-2 packets are deprecated on MEC, use type-3 instead */
+ r = amdgpu_ring_init(adev, ring, 1024,
+ &adev->gfx.eop_irq, irq_type);
+ if (r)
+ return r;
+ }
+
+ if (amdgpu_sriov_vf(adev)) {
+ r = gfx_v9_0_kiq_init(adev);
+ if (r) {
+ DRM_ERROR("Failed to init KIQ BOs!\n");
+ return r;
+ }
+
+ kiq = &adev->gfx.kiq;
+ r = gfx_v9_0_kiq_init_ring(adev, &kiq->ring, &kiq->irq);
+ if (r)
+ return r;
+
+ /* create MQD for all compute queues as wel as KIQ for SRIOV case */
+ r = gfx_v9_0_compute_mqd_sw_init(adev);
+ if (r)
+ return r;
+ }
+
+ /* reserve GDS, GWS and OA resource for gfx */
+ r = amdgpu_bo_create_kernel(adev, adev->gds.mem.gfx_partition_size,
+ PAGE_SIZE, AMDGPU_GEM_DOMAIN_GDS,
+ &adev->gds.gds_gfx_bo, NULL, NULL);
+ if (r)
+ return r;
+
+ r = amdgpu_bo_create_kernel(adev, adev->gds.gws.gfx_partition_size,
+ PAGE_SIZE, AMDGPU_GEM_DOMAIN_GWS,
+ &adev->gds.gws_gfx_bo, NULL, NULL);
+ if (r)
+ return r;
+
+ r = amdgpu_bo_create_kernel(adev, adev->gds.oa.gfx_partition_size,
+ PAGE_SIZE, AMDGPU_GEM_DOMAIN_OA,
+ &adev->gds.oa_gfx_bo, NULL, NULL);
+ if (r)
+ return r;
+
+ adev->gfx.ce_ram_size = 0x8000;
+
+ gfx_v9_0_gpu_early_init(adev);
+
+ r = gfx_v9_0_ngg_init(adev);
+ if (r)
+ return r;
+
+ return 0;
+}
+
+
+static int gfx_v9_0_sw_fini(void *handle)
+{
+ int i;
+ struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+
+ amdgpu_bo_free_kernel(&adev->gds.oa_gfx_bo, NULL, NULL);
+ amdgpu_bo_free_kernel(&adev->gds.gws_gfx_bo, NULL, NULL);
+ amdgpu_bo_free_kernel(&adev->gds.gds_gfx_bo, NULL, NULL);
+
+ for (i = 0; i < adev->gfx.num_gfx_rings; i++)
+ amdgpu_ring_fini(&adev->gfx.gfx_ring[i]);
+ for (i = 0; i < adev->gfx.num_compute_rings; i++)
+ amdgpu_ring_fini(&adev->gfx.compute_ring[i]);
+
+ if (amdgpu_sriov_vf(adev)) {
+ gfx_v9_0_compute_mqd_sw_fini(adev);
+ gfx_v9_0_kiq_free_ring(&adev->gfx.kiq.ring, &adev->gfx.kiq.irq);
+ gfx_v9_0_kiq_fini(adev);
+ }
+
+ gfx_v9_0_mec_fini(adev);
+ gfx_v9_0_ngg_fini(adev);
+
+ return 0;
+}
+
+
+static void gfx_v9_0_tiling_mode_table_init(struct amdgpu_device *adev)
+{
+ /* TODO */
+}
+
+static void gfx_v9_0_select_se_sh(struct amdgpu_device *adev, u32 se_num, u32 sh_num, u32 instance)
+{
+ u32 data = REG_SET_FIELD(0, GRBM_GFX_INDEX, INSTANCE_BROADCAST_WRITES, 1);
+
+ if ((se_num == 0xffffffff) && (sh_num == 0xffffffff)) {
+ data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SH_BROADCAST_WRITES, 1);
+ data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SE_BROADCAST_WRITES, 1);
+ } else if (se_num == 0xffffffff) {
+ data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SH_INDEX, sh_num);
+ data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SE_BROADCAST_WRITES, 1);
+ } else if (sh_num == 0xffffffff) {
+ data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SH_BROADCAST_WRITES, 1);
+ data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SE_INDEX, se_num);
+ } else {
+ data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SH_INDEX, sh_num);
+ data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SE_INDEX, se_num);
+ }
+ WREG32( SOC15_REG_OFFSET(GC, 0, mmGRBM_GFX_INDEX), data);
+}
+
+static u32 gfx_v9_0_create_bitmask(u32 bit_width)
+{
+ return (u32)((1ULL << bit_width) - 1);
+}
+
+static u32 gfx_v9_0_get_rb_active_bitmap(struct amdgpu_device *adev)
+{
+ u32 data, mask;
+
+ data = RREG32(SOC15_REG_OFFSET(GC, 0, mmCC_RB_BACKEND_DISABLE));
+ data |= RREG32(SOC15_REG_OFFSET(GC, 0, mmGC_USER_RB_BACKEND_DISABLE));
+
+ data &= CC_RB_BACKEND_DISABLE__BACKEND_DISABLE_MASK;
+ data >>= GC_USER_RB_BACKEND_DISABLE__BACKEND_DISABLE__SHIFT;
+
+ mask = gfx_v9_0_create_bitmask(adev->gfx.config.max_backends_per_se /
+ adev->gfx.config.max_sh_per_se);
+
+ return (~data) & mask;
+}
+
+static void gfx_v9_0_setup_rb(struct amdgpu_device *adev)
+{
+ int i, j;
+ u32 data;
+ u32 active_rbs = 0;
+ u32 rb_bitmap_width_per_sh = adev->gfx.config.max_backends_per_se /
+ adev->gfx.config.max_sh_per_se;
+
+ mutex_lock(&adev->grbm_idx_mutex);
+ for (i = 0; i < adev->gfx.config.max_shader_engines; i++) {
+ for (j = 0; j < adev->gfx.config.max_sh_per_se; j++) {
+ gfx_v9_0_select_se_sh(adev, i, j, 0xffffffff);
+ data = gfx_v9_0_get_rb_active_bitmap(adev);
+ active_rbs |= data << ((i * adev->gfx.config.max_sh_per_se + j) *
+ rb_bitmap_width_per_sh);
+ }
+ }
+ gfx_v9_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
+ mutex_unlock(&adev->grbm_idx_mutex);
+
+ adev->gfx.config.backend_enable_mask = active_rbs;
+ adev->gfx.config.num_rbs = hweight32(active_rbs);
+}
+
+#define DEFAULT_SH_MEM_BASES (0x6000)
+#define FIRST_COMPUTE_VMID (8)
+#define LAST_COMPUTE_VMID (16)
+static void gfx_v9_0_init_compute_vmid(struct amdgpu_device *adev)
+{
+ int i;
+ uint32_t sh_mem_config;
+ uint32_t sh_mem_bases;
+
+ /*
+ * Configure apertures:
+ * LDS: 0x60000000'00000000 - 0x60000001'00000000 (4GB)
+ * Scratch: 0x60000001'00000000 - 0x60000002'00000000 (4GB)
+ * GPUVM: 0x60010000'00000000 - 0x60020000'00000000 (1TB)
+ */
+ sh_mem_bases = DEFAULT_SH_MEM_BASES | (DEFAULT_SH_MEM_BASES << 16);
+
+ sh_mem_config = SH_MEM_ADDRESS_MODE_64 |
+ SH_MEM_ALIGNMENT_MODE_UNALIGNED <<
+ SH_MEM_CONFIG__ALIGNMENT_MODE__SHIFT;
+
+ mutex_lock(&adev->srbm_mutex);
+ for (i = FIRST_COMPUTE_VMID; i < LAST_COMPUTE_VMID; i++) {
+ soc15_grbm_select(adev, 0, 0, 0, i);
+ /* CP and shaders */
+ WREG32(SOC15_REG_OFFSET(GC, 0, mmSH_MEM_CONFIG), sh_mem_config);
+ WREG32(SOC15_REG_OFFSET(GC, 0, mmSH_MEM_BASES), sh_mem_bases);
+ }
+ soc15_grbm_select(adev, 0, 0, 0, 0);
+ mutex_unlock(&adev->srbm_mutex);
+}
+
+static void gfx_v9_0_gpu_init(struct amdgpu_device *adev)
+{
+ u32 tmp;
+ int i;
+
+ tmp = RREG32(SOC15_REG_OFFSET(GC, 0, mmGRBM_CNTL));
+ tmp = REG_SET_FIELD(tmp, GRBM_CNTL, READ_TIMEOUT, 0xff);
+ WREG32(SOC15_REG_OFFSET(GC, 0, mmGRBM_CNTL), tmp);
+
+ gfx_v9_0_tiling_mode_table_init(adev);
+
+ gfx_v9_0_setup_rb(adev);
+ gfx_v9_0_get_cu_info(adev, &adev->gfx.cu_info);
+
+ /* XXX SH_MEM regs */
+ /* where to put LDS, scratch, GPUVM in FSA64 space */
+ mutex_lock(&adev->srbm_mutex);
+ for (i = 0; i < 16; i++) {
+ soc15_grbm_select(adev, 0, 0, 0, i);
+ /* CP and shaders */
+ tmp = 0;
+ tmp = REG_SET_FIELD(tmp, SH_MEM_CONFIG, ALIGNMENT_MODE,
+ SH_MEM_ALIGNMENT_MODE_UNALIGNED);
+ WREG32(SOC15_REG_OFFSET(GC, 0, mmSH_MEM_CONFIG), tmp);
+ WREG32(SOC15_REG_OFFSET(GC, 0, mmSH_MEM_BASES), 0);
+ }
+ soc15_grbm_select(adev, 0, 0, 0, 0);
+
+ mutex_unlock(&adev->srbm_mutex);
+
+ gfx_v9_0_init_compute_vmid(adev);
+
+ mutex_lock(&adev->grbm_idx_mutex);
+ /*
+ * making sure that the following register writes will be broadcasted
+ * to all the shaders
+ */
+ gfx_v9_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
+
+ WREG32(SOC15_REG_OFFSET(GC, 0, mmPA_SC_FIFO_SIZE),
+ (adev->gfx.config.sc_prim_fifo_size_frontend <<
+ PA_SC_FIFO_SIZE__SC_FRONTEND_PRIM_FIFO_SIZE__SHIFT) |
+ (adev->gfx.config.sc_prim_fifo_size_backend <<
+ PA_SC_FIFO_SIZE__SC_BACKEND_PRIM_FIFO_SIZE__SHIFT) |
+ (adev->gfx.config.sc_hiz_tile_fifo_size <<
+ PA_SC_FIFO_SIZE__SC_HIZ_TILE_FIFO_SIZE__SHIFT) |
+ (adev->gfx.config.sc_earlyz_tile_fifo_size <<
+ PA_SC_FIFO_SIZE__SC_EARLYZ_TILE_FIFO_SIZE__SHIFT));
+ mutex_unlock(&adev->grbm_idx_mutex);
+
+}
+
+static void gfx_v9_0_wait_for_rlc_serdes(struct amdgpu_device *adev)
+{
+ u32 i, j, k;
+ u32 mask;
+
+ mutex_lock(&adev->grbm_idx_mutex);
+ for (i = 0; i < adev->gfx.config.max_shader_engines; i++) {
+ for (j = 0; j < adev->gfx.config.max_sh_per_se; j++) {
+ gfx_v9_0_select_se_sh(adev, i, j, 0xffffffff);
+ for (k = 0; k < adev->usec_timeout; k++) {
+ if (RREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_SERDES_CU_MASTER_BUSY)) == 0)
+ break;
+ udelay(1);
+ }
+ }
+ }
+ gfx_v9_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
+ mutex_unlock(&adev->grbm_idx_mutex);
+
+ mask = RLC_SERDES_NONCU_MASTER_BUSY__SE_MASTER_BUSY_MASK |
+ RLC_SERDES_NONCU_MASTER_BUSY__GC_MASTER_BUSY_MASK |
+ RLC_SERDES_NONCU_MASTER_BUSY__TC0_MASTER_BUSY_MASK |
+ RLC_SERDES_NONCU_MASTER_BUSY__TC1_MASTER_BUSY_MASK;
+ for (k = 0; k < adev->usec_timeout; k++) {
+ if ((RREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_SERDES_NONCU_MASTER_BUSY)) & mask) == 0)
+ break;
+ udelay(1);
+ }
+}
+
+static void gfx_v9_0_enable_gui_idle_interrupt(struct amdgpu_device *adev,
+ bool enable)
+{
+ u32 tmp = RREG32(SOC15_REG_OFFSET(GC, 0, mmCP_INT_CNTL_RING0));
+
+ if (enable)
+ return;
+
+ tmp = REG_SET_FIELD(tmp, CP_INT_CNTL_RING0, CNTX_BUSY_INT_ENABLE, enable ? 1 : 0);
+ tmp = REG_SET_FIELD(tmp, CP_INT_CNTL_RING0, CNTX_EMPTY_INT_ENABLE, enable ? 1 : 0);
+ tmp = REG_SET_FIELD(tmp, CP_INT_CNTL_RING0, CMP_BUSY_INT_ENABLE, enable ? 1 : 0);
+ tmp = REG_SET_FIELD(tmp, CP_INT_CNTL_RING0, GFX_IDLE_INT_ENABLE, enable ? 1 : 0);
+
+ WREG32(SOC15_REG_OFFSET(GC, 0, mmCP_INT_CNTL_RING0), tmp);
+}
+
+void gfx_v9_0_rlc_stop(struct amdgpu_device *adev)
+{
+ u32 tmp = RREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_CNTL));
+
+ tmp = REG_SET_FIELD(tmp, RLC_CNTL, RLC_ENABLE_F32, 0);
+ WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_CNTL), tmp);
+
+ gfx_v9_0_enable_gui_idle_interrupt(adev, false);
+
+ gfx_v9_0_wait_for_rlc_serdes(adev);
+}
+
+static void gfx_v9_0_rlc_reset(struct amdgpu_device *adev)
+{
+ u32 tmp = RREG32(SOC15_REG_OFFSET(GC, 0, mmGRBM_SOFT_RESET));
+
+ tmp = REG_SET_FIELD(tmp, GRBM_SOFT_RESET, SOFT_RESET_RLC, 1);
+ WREG32(SOC15_REG_OFFSET(GC, 0, mmGRBM_SOFT_RESET), tmp);
+ udelay(50);
+ tmp = REG_SET_FIELD(tmp, GRBM_SOFT_RESET, SOFT_RESET_RLC, 0);
+ WREG32(SOC15_REG_OFFSET(GC, 0, mmGRBM_SOFT_RESET), tmp);
+ udelay(50);
+}
+
+static void gfx_v9_0_rlc_start(struct amdgpu_device *adev)
+{
+#ifdef AMDGPU_RLC_DEBUG_RETRY
+ u32 rlc_ucode_ver;
+#endif
+ u32 tmp = RREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_CNTL));
+
+ tmp = REG_SET_FIELD(tmp, RLC_CNTL, RLC_ENABLE_F32, 1);
+ WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_CNTL), tmp);
+
+ /* carrizo do enable cp interrupt after cp inited */
+ if (!(adev->flags & AMD_IS_APU))
+ gfx_v9_0_enable_gui_idle_interrupt(adev, true);
+
+ udelay(50);
+
+#ifdef AMDGPU_RLC_DEBUG_RETRY
+ /* RLC_GPM_GENERAL_6 : RLC Ucode version */
+ rlc_ucode_ver = RREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_GPM_GENERAL_6));
+ if(rlc_ucode_ver == 0x108) {
+ DRM_INFO("Using rlc debug ucode. mmRLC_GPM_GENERAL_6 ==0x08%x / fw_ver == %i \n",
+ rlc_ucode_ver, adev->gfx.rlc_fw_version);
+ /* RLC_GPM_TIMER_INT_3 : Timer interval in RefCLK cycles,
+ * default is 0x9C4 to create a 100us interval */
+ WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_GPM_TIMER_INT_3), 0x9C4);
+ /* RLC_GPM_GENERAL_12 : Minimum gap between wptr and rptr
+ * to disable the page fault retry interrupts, default is
+ * 0x100 (256) */
+ WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_GPM_GENERAL_12), 0x100);
+ }
+#endif
+}
+
+static int gfx_v9_0_rlc_load_microcode(struct amdgpu_device *adev)
+{
+ const struct rlc_firmware_header_v2_0 *hdr;
+ const __le32 *fw_data;
+ unsigned i, fw_size;
+
+ if (!adev->gfx.rlc_fw)
+ return -EINVAL;
+
+ hdr = (const struct rlc_firmware_header_v2_0 *)adev->gfx.rlc_fw->data;
+ amdgpu_ucode_print_rlc_hdr(&hdr->header);
+
+ fw_data = (const __le32 *)(adev->gfx.rlc_fw->data +
+ le32_to_cpu(hdr->header.ucode_array_offset_bytes));
+ fw_size = le32_to_cpu(hdr->header.ucode_size_bytes) / 4;
+
+ WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_GPM_UCODE_ADDR),
+ RLCG_UCODE_LOADING_START_ADDRESS);
+ for (i = 0; i < fw_size; i++)
+ WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_GPM_UCODE_DATA), le32_to_cpup(fw_data++));
+ WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_GPM_UCODE_ADDR), adev->gfx.rlc_fw_version);
+
+ return 0;
+}
+
+static int gfx_v9_0_rlc_resume(struct amdgpu_device *adev)
+{
+ int r;
+
+ if (amdgpu_sriov_vf(adev))
+ return 0;
+
+ gfx_v9_0_rlc_stop(adev);
+
+ /* disable CG */
+ WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_CGCG_CGLS_CTRL), 0);
+
+ /* disable PG */
+ WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL), 0);
+
+ gfx_v9_0_rlc_reset(adev);
+
+ if (adev->firmware.load_type != AMDGPU_FW_LOAD_PSP) {
+ /* legacy rlc firmware loading */
+ r = gfx_v9_0_rlc_load_microcode(adev);
+ if (r)
+ return r;
+ }
+
+ gfx_v9_0_rlc_start(adev);
+
+ return 0;
+}
+
+static void gfx_v9_0_cp_gfx_enable(struct amdgpu_device *adev, bool enable)
+{
+ int i;
+ u32 tmp = RREG32(SOC15_REG_OFFSET(GC, 0, mmCP_ME_CNTL));
+
+ if (enable) {
+ tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, ME_HALT, 0);
+ tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, PFP_HALT, 0);
+ tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, CE_HALT, 0);
+ } else {
+ tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, ME_HALT, 1);
+ tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, PFP_HALT, 1);
+ tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, CE_HALT, 1);
+ for (i = 0; i < adev->gfx.num_gfx_rings; i++)
+ adev->gfx.gfx_ring[i].ready = false;
+ }
+ WREG32(SOC15_REG_OFFSET(GC, 0, mmCP_ME_CNTL), tmp);
+ udelay(50);
+}
+
+static int gfx_v9_0_cp_gfx_load_microcode(struct amdgpu_device *adev)
+{
+ const struct gfx_firmware_header_v1_0 *pfp_hdr;
+ const struct gfx_firmware_header_v1_0 *ce_hdr;
+ const struct gfx_firmware_header_v1_0 *me_hdr;
+ const __le32 *fw_data;
+ unsigned i, fw_size;
+
+ if (!adev->gfx.me_fw || !adev->gfx.pfp_fw || !adev->gfx.ce_fw)
+ return -EINVAL;
+
+ pfp_hdr = (const struct gfx_firmware_header_v1_0 *)
+ adev->gfx.pfp_fw->data;
+ ce_hdr = (const struct gfx_firmware_header_v1_0 *)
+ adev->gfx.ce_fw->data;
+ me_hdr = (const struct gfx_firmware_header_v1_0 *)
+ adev->gfx.me_fw->data;
+
+ amdgpu_ucode_print_gfx_hdr(&pfp_hdr->header);
+ amdgpu_ucode_print_gfx_hdr(&ce_hdr->header);
+ amdgpu_ucode_print_gfx_hdr(&me_hdr->header);
+
+ gfx_v9_0_cp_gfx_enable(adev, false);
+
+ /* PFP */
+ fw_data = (const __le32 *)
+ (adev->gfx.pfp_fw->data +
+ le32_to_cpu(pfp_hdr->header.ucode_array_offset_bytes));
+ fw_size = le32_to_cpu(pfp_hdr->header.ucode_size_bytes) / 4;
+ WREG32(SOC15_REG_OFFSET(GC, 0, mmCP_PFP_UCODE_ADDR), 0);
+ for (i = 0; i < fw_size; i++)
+ WREG32(SOC15_REG_OFFSET(GC, 0, mmCP_PFP_UCODE_DATA), le32_to_cpup(fw_data++));
+ WREG32(SOC15_REG_OFFSET(GC, 0, mmCP_PFP_UCODE_ADDR), adev->gfx.pfp_fw_version);
+
+ /* CE */
+ fw_data = (const __le32 *)
+ (adev->gfx.ce_fw->data +
+ le32_to_cpu(ce_hdr->header.ucode_array_offset_bytes));
+ fw_size = le32_to_cpu(ce_hdr->header.ucode_size_bytes) / 4;
+ WREG32(SOC15_REG_OFFSET(GC, 0, mmCP_CE_UCODE_ADDR), 0);
+ for (i = 0; i < fw_size; i++)
+ WREG32(SOC15_REG_OFFSET(GC, 0, mmCP_CE_UCODE_DATA), le32_to_cpup(fw_data++));
+ WREG32(SOC15_REG_OFFSET(GC, 0, mmCP_CE_UCODE_ADDR), adev->gfx.ce_fw_version);
+
+ /* ME */
+ fw_data = (const __le32 *)
+ (adev->gfx.me_fw->data +
+ le32_to_cpu(me_hdr->header.ucode_array_offset_bytes));
+ fw_size = le32_to_cpu(me_hdr->header.ucode_size_bytes) / 4;
+ WREG32(SOC15_REG_OFFSET(GC, 0, mmCP_ME_RAM_WADDR), 0);
+ for (i = 0; i < fw_size; i++)
+ WREG32(SOC15_REG_OFFSET(GC, 0, mmCP_ME_RAM_DATA), le32_to_cpup(fw_data++));
+ WREG32(SOC15_REG_OFFSET(GC, 0, mmCP_ME_RAM_WADDR), adev->gfx.me_fw_version);
+
+ return 0;
+}
+
+static u32 gfx_v9_0_get_csb_size(struct amdgpu_device *adev)
+{
+ u32 count = 0;
+ const struct cs_section_def *sect = NULL;
+ const struct cs_extent_def *ext = NULL;
+
+ /* begin clear state */
+ count += 2;
+ /* context control state */
+ count += 3;
+
+ for (sect = gfx9_cs_data; sect->section != NULL; ++sect) {
+ for (ext = sect->section; ext->extent != NULL; ++ext) {
+ if (sect->id == SECT_CONTEXT)
+ count += 2 + ext->reg_count;
+ else
+ return 0;
+ }
+ }
+ /* pa_sc_raster_config/pa_sc_raster_config1 */
+ count += 4;
+ /* end clear state */
+ count += 2;
+ /* clear state */
+ count += 2;
+
+ return count;
+}
+
+static int gfx_v9_0_cp_gfx_start(struct amdgpu_device *adev)
+{
+ struct amdgpu_ring *ring = &adev->gfx.gfx_ring[0];
+ const struct cs_section_def *sect = NULL;
+ const struct cs_extent_def *ext = NULL;
+ int r, i;
+
+ /* init the CP */
+ WREG32(SOC15_REG_OFFSET(GC, 0, mmCP_MAX_CONTEXT), adev->gfx.config.max_hw_contexts - 1);
+ WREG32(SOC15_REG_OFFSET(GC, 0, mmCP_DEVICE_ID), 1);
+
+ gfx_v9_0_cp_gfx_enable(adev, true);
+
+ r = amdgpu_ring_alloc(ring, gfx_v9_0_get_csb_size(adev) + 4);
+ if (r) {
+ DRM_ERROR("amdgpu: cp failed to lock ring (%d).\n", r);
+ return r;
+ }
+
+ amdgpu_ring_write(ring, PACKET3(PACKET3_PREAMBLE_CNTL, 0));
+ amdgpu_ring_write(ring, PACKET3_PREAMBLE_BEGIN_CLEAR_STATE);
+
+ amdgpu_ring_write(ring, PACKET3(PACKET3_CONTEXT_CONTROL, 1));
+ amdgpu_ring_write(ring, 0x80000000);
+ amdgpu_ring_write(ring, 0x80000000);
+
+ for (sect = gfx9_cs_data; sect->section != NULL; ++sect) {
+ for (ext = sect->section; ext->extent != NULL; ++ext) {
+ if (sect->id == SECT_CONTEXT) {
+ amdgpu_ring_write(ring,
+ PACKET3(PACKET3_SET_CONTEXT_REG,
+ ext->reg_count));
+ amdgpu_ring_write(ring,
+ ext->reg_index - PACKET3_SET_CONTEXT_REG_START);
+ for (i = 0; i < ext->reg_count; i++)
+ amdgpu_ring_write(ring, ext->extent[i]);
+ }
+ }
+ }
+
+ amdgpu_ring_write(ring, PACKET3(PACKET3_PREAMBLE_CNTL, 0));
+ amdgpu_ring_write(ring, PACKET3_PREAMBLE_END_CLEAR_STATE);
+
+ amdgpu_ring_write(ring, PACKET3(PACKET3_CLEAR_STATE, 0));
+ amdgpu_ring_write(ring, 0);
+
+ amdgpu_ring_write(ring, PACKET3(PACKET3_SET_BASE, 2));
+ amdgpu_ring_write(ring, PACKET3_BASE_INDEX(CE_PARTITION_BASE));
+ amdgpu_ring_write(ring, 0x8000);
+ amdgpu_ring_write(ring, 0x8000);
+
+ amdgpu_ring_commit(ring);
+
+ return 0;
+}
+
+static int gfx_v9_0_cp_gfx_resume(struct amdgpu_device *adev)
+{
+ struct amdgpu_ring *ring;
+ u32 tmp;
+ u32 rb_bufsz;
+ u64 rb_addr, rptr_addr, wptr_gpu_addr;
+
+ /* Set the write pointer delay */
+ WREG32(SOC15_REG_OFFSET(GC, 0, mmCP_RB_WPTR_DELAY), 0);
+
+ /* set the RB to use vmid 0 */
+ WREG32(SOC15_REG_OFFSET(GC, 0, mmCP_RB_VMID), 0);
+
+ /* Set ring buffer size */
+ ring = &adev->gfx.gfx_ring[0];
+ rb_bufsz = order_base_2(ring->ring_size / 8);
+ tmp = REG_SET_FIELD(0, CP_RB0_CNTL, RB_BUFSZ, rb_bufsz);
+ tmp = REG_SET_FIELD(tmp, CP_RB0_CNTL, RB_BLKSZ, rb_bufsz - 2);
+#ifdef __BIG_ENDIAN
+ tmp = REG_SET_FIELD(tmp, CP_RB0_CNTL, BUF_SWAP, 1);
+#endif
+ WREG32(SOC15_REG_OFFSET(GC, 0, mmCP_RB0_CNTL), tmp);
+
+ /* Initialize the ring buffer's write pointers */
+ ring->wptr = 0;
+ WREG32(SOC15_REG_OFFSET(GC, 0, mmCP_RB0_WPTR), lower_32_bits(ring->wptr));
+ WREG32(SOC15_REG_OFFSET(GC, 0, mmCP_RB0_WPTR_HI), upper_32_bits(ring->wptr));
+
+ /* set the wb address wether it's enabled or not */
+ rptr_addr = adev->wb.gpu_addr + (ring->rptr_offs * 4);
+ WREG32(SOC15_REG_OFFSET(GC, 0, mmCP_RB0_RPTR_ADDR), lower_32_bits(rptr_addr));
+ WREG32(SOC15_REG_OFFSET(GC, 0, mmCP_RB0_RPTR_ADDR_HI), upper_32_bits(rptr_addr) & CP_RB_RPTR_ADDR_HI__RB_RPTR_ADDR_HI_MASK);
+
+ wptr_gpu_addr = adev->wb.gpu_addr + (ring->wptr_offs * 4);
+ WREG32(SOC15_REG_OFFSET(GC, 0, mmCP_RB_WPTR_POLL_ADDR_LO), lower_32_bits(wptr_gpu_addr));
+ WREG32(SOC15_REG_OFFSET(GC, 0, mmCP_RB_WPTR_POLL_ADDR_HI), upper_32_bits(wptr_gpu_addr));
+
+ mdelay(1);
+ WREG32(SOC15_REG_OFFSET(GC, 0, mmCP_RB0_CNTL), tmp);
+
+ rb_addr = ring->gpu_addr >> 8;
+ WREG32(SOC15_REG_OFFSET(GC, 0, mmCP_RB0_BASE), rb_addr);
+ WREG32(SOC15_REG_OFFSET(GC, 0, mmCP_RB0_BASE_HI), upper_32_bits(rb_addr));
+
+ tmp = RREG32(SOC15_REG_OFFSET(GC, 0, mmCP_RB_DOORBELL_CONTROL));
+ if (ring->use_doorbell) {
+ tmp = REG_SET_FIELD(tmp, CP_RB_DOORBELL_CONTROL,
+ DOORBELL_OFFSET, ring->doorbell_index);
+ tmp = REG_SET_FIELD(tmp, CP_RB_DOORBELL_CONTROL,
+ DOORBELL_EN, 1);
+ } else {
+ tmp = REG_SET_FIELD(tmp, CP_RB_DOORBELL_CONTROL, DOORBELL_EN, 0);
+ }
+ WREG32(SOC15_REG_OFFSET(GC, 0, mmCP_RB_DOORBELL_CONTROL), tmp);
+
+ tmp = REG_SET_FIELD(0, CP_RB_DOORBELL_RANGE_LOWER,
+ DOORBELL_RANGE_LOWER, ring->doorbell_index);
+ WREG32(SOC15_REG_OFFSET(GC, 0, mmCP_RB_DOORBELL_RANGE_LOWER), tmp);
+
+ WREG32(SOC15_REG_OFFSET(GC, 0, mmCP_RB_DOORBELL_RANGE_UPPER),
+ CP_RB_DOORBELL_RANGE_UPPER__DOORBELL_RANGE_UPPER_MASK);
+
+
+ /* start the ring */
+ gfx_v9_0_cp_gfx_start(adev);
+ ring->ready = true;
+
+ return 0;
+}
+
+static void gfx_v9_0_cp_compute_enable(struct amdgpu_device *adev, bool enable)
+{
+ int i;
+
+ if (enable) {
+ WREG32(SOC15_REG_OFFSET(GC, 0, mmCP_MEC_CNTL), 0);
+ } else {
+ WREG32(SOC15_REG_OFFSET(GC, 0, mmCP_MEC_CNTL),
+ (CP_MEC_CNTL__MEC_ME1_HALT_MASK | CP_MEC_CNTL__MEC_ME2_HALT_MASK));
+ for (i = 0; i < adev->gfx.num_compute_rings; i++)
+ adev->gfx.compute_ring[i].ready = false;
+ adev->gfx.kiq.ring.ready = false;
+ }
+ udelay(50);
+}
+
+static int gfx_v9_0_cp_compute_start(struct amdgpu_device *adev)
+{
+ gfx_v9_0_cp_compute_enable(adev, true);
+
+ return 0;
+}
+
+static int gfx_v9_0_cp_compute_load_microcode(struct amdgpu_device *adev)
+{
+ const struct gfx_firmware_header_v1_0 *mec_hdr;
+ const __le32 *fw_data;
+ unsigned i;
+ u32 tmp;
+
+ if (!adev->gfx.mec_fw)
+ return -EINVAL;
+
+ gfx_v9_0_cp_compute_enable(adev, false);
+
+ mec_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.mec_fw->data;
+ amdgpu_ucode_print_gfx_hdr(&mec_hdr->header);
+
+ fw_data = (const __le32 *)
+ (adev->gfx.mec_fw->data +
+ le32_to_cpu(mec_hdr->header.ucode_array_offset_bytes));
+ tmp = 0;
+ tmp = REG_SET_FIELD(tmp, CP_CPC_IC_BASE_CNTL, VMID, 0);
+ tmp = REG_SET_FIELD(tmp, CP_CPC_IC_BASE_CNTL, CACHE_POLICY, 0);
+ WREG32(SOC15_REG_OFFSET(GC, 0, mmCP_CPC_IC_BASE_CNTL), tmp);
+
+ WREG32(SOC15_REG_OFFSET(GC, 0, mmCP_CPC_IC_BASE_LO),
+ adev->gfx.mec.mec_fw_gpu_addr & 0xFFFFF000);
+ WREG32(SOC15_REG_OFFSET(GC, 0, mmCP_CPC_IC_BASE_HI),
+ upper_32_bits(adev->gfx.mec.mec_fw_gpu_addr));
+
+ /* MEC1 */
+ WREG32(SOC15_REG_OFFSET(GC, 0, mmCP_MEC_ME1_UCODE_ADDR),
+ mec_hdr->jt_offset);
+ for (i = 0; i < mec_hdr->jt_size; i++)
+ WREG32(SOC15_REG_OFFSET(GC, 0, mmCP_MEC_ME1_UCODE_DATA),
+ le32_to_cpup(fw_data + mec_hdr->jt_offset + i));
+
+ WREG32(SOC15_REG_OFFSET(GC, 0, mmCP_MEC_ME1_UCODE_ADDR),
+ adev->gfx.mec_fw_version);
+ /* Todo : Loading MEC2 firmware is only necessary if MEC2 should run different microcode than MEC1. */
+
+ return 0;
+}
+
+static void gfx_v9_0_cp_compute_fini(struct amdgpu_device *adev)
+{
+ int i, r;
+
+ for (i = 0; i < adev->gfx.num_compute_rings; i++) {
+ struct amdgpu_ring *ring = &adev->gfx.compute_ring[i];
+
+ if (ring->mqd_obj) {
+ r = amdgpu_bo_reserve(ring->mqd_obj, false);
+ if (unlikely(r != 0))
+ dev_warn(adev->dev, "(%d) reserve MQD bo failed\n", r);
+
+ amdgpu_bo_unpin(ring->mqd_obj);
+ amdgpu_bo_unreserve(ring->mqd_obj);
+
+ amdgpu_bo_unref(&ring->mqd_obj);
+ ring->mqd_obj = NULL;
+ }
+ }
+}
+
+static int gfx_v9_0_init_queue(struct amdgpu_ring *ring);
+
+static int gfx_v9_0_cp_compute_resume(struct amdgpu_device *adev)
+{
+ int i, r;
+ for (i = 0; i < adev->gfx.num_compute_rings; i++) {
+ struct amdgpu_ring *ring = &adev->gfx.compute_ring[i];
+ if (gfx_v9_0_init_queue(ring))
+ dev_warn(adev->dev, "compute queue %d init failed!\n", i);
+ }
+
+ r = gfx_v9_0_cp_compute_start(adev);
+ if (r)
+ return r;
+
+ return 0;
+}
+
+/* KIQ functions */
+static void gfx_v9_0_kiq_setting(struct amdgpu_ring *ring)
+{
+ uint32_t tmp;
+ struct amdgpu_device *adev = ring->adev;
+
+ /* tell RLC which is KIQ queue */
+ tmp = RREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_CP_SCHEDULERS));
+ tmp &= 0xffffff00;
+ tmp |= (ring->me << 5) | (ring->pipe << 3) | (ring->queue);
+ WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_CP_SCHEDULERS), tmp);
+ tmp |= 0x80;
+ WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_CP_SCHEDULERS), tmp);
+}
+
+static void gfx_v9_0_kiq_enable(struct amdgpu_ring *ring)
+{
+ amdgpu_ring_alloc(ring, 8);
+ /* set resources */
+ amdgpu_ring_write(ring, PACKET3(PACKET3_SET_RESOURCES, 6));
+ amdgpu_ring_write(ring, 0); /* vmid_mask:0 queue_type:0 (KIQ) */
+ amdgpu_ring_write(ring, 0x000000FF); /* queue mask lo */
+ amdgpu_ring_write(ring, 0); /* queue mask hi */
+ amdgpu_ring_write(ring, 0); /* gws mask lo */
+ amdgpu_ring_write(ring, 0); /* gws mask hi */
+ amdgpu_ring_write(ring, 0); /* oac mask */
+ amdgpu_ring_write(ring, 0); /* gds heap base:0, gds heap size:0 */
+ amdgpu_ring_commit(ring);
+ udelay(50);
+}
+
+static void gfx_v9_0_map_queue_enable(struct amdgpu_ring *kiq_ring,
+ struct amdgpu_ring *ring)
+{
+ struct amdgpu_device *adev = kiq_ring->adev;
+ uint64_t mqd_addr, wptr_addr;
+
+ mqd_addr = amdgpu_bo_gpu_offset(ring->mqd_obj);
+ wptr_addr = adev->wb.gpu_addr + (ring->wptr_offs * 4);
+ amdgpu_ring_alloc(kiq_ring, 8);
+
+ amdgpu_ring_write(kiq_ring, PACKET3(PACKET3_MAP_QUEUES, 5));
+ /* Q_sel:0, vmid:0, vidmem: 1, engine:0, num_Q:1*/
+ amdgpu_ring_write(kiq_ring, /* Q_sel: 0, vmid: 0, engine: 0, num_Q: 1 */
+ (0 << 4) | /* Queue_Sel */
+ (0 << 8) | /* VMID */
+ (ring->queue << 13 ) |
+ (ring->pipe << 16) |
+ ((ring->me == 1 ? 0 : 1) << 18) |
+ (0 << 21) | /*queue_type: normal compute queue */
+ (1 << 24) | /* alloc format: all_on_one_pipe */
+ (0 << 26) | /* engine_sel: compute */
+ (1 << 29)); /* num_queues: must be 1 */
+ amdgpu_ring_write(kiq_ring, (ring->doorbell_index << 2));
+ amdgpu_ring_write(kiq_ring, lower_32_bits(mqd_addr));
+ amdgpu_ring_write(kiq_ring, upper_32_bits(mqd_addr));
+ amdgpu_ring_write(kiq_ring, lower_32_bits(wptr_addr));
+ amdgpu_ring_write(kiq_ring, upper_32_bits(wptr_addr));
+ amdgpu_ring_commit(kiq_ring);
+ udelay(50);
+}
+
+static int gfx_v9_0_mqd_init(struct amdgpu_ring *ring)
+{
+ struct amdgpu_device *adev = ring->adev;
+ struct v9_mqd *mqd = ring->mqd_ptr;
+ uint64_t hqd_gpu_addr, wb_gpu_addr, eop_base_addr;
+ uint32_t tmp;
+
+ mqd->header = 0xC0310800;
+ mqd->compute_pipelinestat_enable = 0x00000001;
+ mqd->compute_static_thread_mgmt_se0 = 0xffffffff;
+ mqd->compute_static_thread_mgmt_se1 = 0xffffffff;
+ mqd->compute_static_thread_mgmt_se2 = 0xffffffff;
+ mqd->compute_static_thread_mgmt_se3 = 0xffffffff;
+ mqd->compute_misc_reserved = 0x00000003;
+
+ eop_base_addr = ring->eop_gpu_addr >> 8;
+ mqd->cp_hqd_eop_base_addr_lo = eop_base_addr;
+ mqd->cp_hqd_eop_base_addr_hi = upper_32_bits(eop_base_addr);
+
+ /* set the EOP size, register value is 2^(EOP_SIZE+1) dwords */
+ tmp = RREG32(SOC15_REG_OFFSET(GC, 0, mmCP_HQD_EOP_CONTROL));
+ tmp = REG_SET_FIELD(tmp, CP_HQD_EOP_CONTROL, EOP_SIZE,
+ (order_base_2(MEC_HPD_SIZE / 4) - 1));
+
+ mqd->cp_hqd_eop_control = tmp;
+
+ /* enable doorbell? */
+ tmp = RREG32(SOC15_REG_OFFSET(GC, 0, mmCP_HQD_PQ_DOORBELL_CONTROL));
+
+ if (ring->use_doorbell) {
+ tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
+ DOORBELL_OFFSET, ring->doorbell_index);
+ tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
+ DOORBELL_EN, 1);
+ tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
+ DOORBELL_SOURCE, 0);
+ tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
+ DOORBELL_HIT, 0);
+ }
+ else
+ tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
+ DOORBELL_EN, 0);
+
+ mqd->cp_hqd_pq_doorbell_control = tmp;
+
+ /* disable the queue if it's active */
+ ring->wptr = 0;
+ mqd->cp_hqd_dequeue_request = 0;
+ mqd->cp_hqd_pq_rptr = 0;
+ mqd->cp_hqd_pq_wptr_lo = 0;
+ mqd->cp_hqd_pq_wptr_hi = 0;
+
+ /* set the pointer to the MQD */
+ mqd->cp_mqd_base_addr_lo = ring->mqd_gpu_addr & 0xfffffffc;
+ mqd->cp_mqd_base_addr_hi = upper_32_bits(ring->mqd_gpu_addr);
+
+ /* set MQD vmid to 0 */
+ tmp = RREG32(SOC15_REG_OFFSET(GC, 0, mmCP_MQD_CONTROL));
+ tmp = REG_SET_FIELD(tmp, CP_MQD_CONTROL, VMID, 0);
+ mqd->cp_mqd_control = tmp;
+
+ /* set the pointer to the HQD, this is similar CP_RB0_BASE/_HI */
+ hqd_gpu_addr = ring->gpu_addr >> 8;
+ mqd->cp_hqd_pq_base_lo = hqd_gpu_addr;
+ mqd->cp_hqd_pq_base_hi = upper_32_bits(hqd_gpu_addr);
+
+ /* set up the HQD, this is similar to CP_RB0_CNTL */
+ tmp = RREG32(SOC15_REG_OFFSET(GC, 0, mmCP_HQD_PQ_CONTROL));
+ tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, QUEUE_SIZE,
+ (order_base_2(ring->ring_size / 4) - 1));
+ tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, RPTR_BLOCK_SIZE,
+ ((order_base_2(AMDGPU_GPU_PAGE_SIZE / 4) - 1) << 8));
+#ifdef __BIG_ENDIAN
+ tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, ENDIAN_SWAP, 1);
+#endif
+ tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, UNORD_DISPATCH, 0);
+ tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, ROQ_PQ_IB_FLIP, 0);
+ tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, PRIV_STATE, 1);
+ tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, KMD_QUEUE, 1);
+ mqd->cp_hqd_pq_control = tmp;
+
+ /* set the wb address whether it's enabled or not */
+ wb_gpu_addr = adev->wb.gpu_addr + (ring->rptr_offs * 4);
+ mqd->cp_hqd_pq_rptr_report_addr_lo = wb_gpu_addr & 0xfffffffc;
+ mqd->cp_hqd_pq_rptr_report_addr_hi =
+ upper_32_bits(wb_gpu_addr) & 0xffff;
+
+ /* only used if CP_PQ_WPTR_POLL_CNTL.CP_PQ_WPTR_POLL_CNTL__EN_MASK=1 */
+ wb_gpu_addr = adev->wb.gpu_addr + (ring->wptr_offs * 4);
+ mqd->cp_hqd_pq_wptr_poll_addr_lo = wb_gpu_addr & 0xfffffffc;
+ mqd->cp_hqd_pq_wptr_poll_addr_hi = upper_32_bits(wb_gpu_addr) & 0xffff;
+
+ tmp = 0;
+ /* enable the doorbell if requested */
+ if (ring->use_doorbell) {
+ tmp = RREG32(SOC15_REG_OFFSET(GC, 0, mmCP_HQD_PQ_DOORBELL_CONTROL));
+ tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
+ DOORBELL_OFFSET, ring->doorbell_index);
+
+ tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
+ DOORBELL_EN, 1);
+ tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
+ DOORBELL_SOURCE, 0);
+ tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
+ DOORBELL_HIT, 0);
+ }
+
+ mqd->cp_hqd_pq_doorbell_control = tmp;
+
+ /* reset read and write pointers, similar to CP_RB0_WPTR/_RPTR */
+ ring->wptr = 0;
+ mqd->cp_hqd_pq_rptr = RREG32(mmCP_HQD_PQ_RPTR);
+
+ /* set the vmid for the queue */
+ mqd->cp_hqd_vmid = 0;
+
+ tmp = RREG32(mmCP_HQD_PERSISTENT_STATE);
+ tmp = REG_SET_FIELD(tmp, CP_HQD_PERSISTENT_STATE, PRELOAD_SIZE, 0x53);
+ mqd->cp_hqd_persistent_state = tmp;
+
+ /* activate the queue */
+ mqd->cp_hqd_active = 1;
+
+ return 0;
+}
+
+static int gfx_v9_0_kiq_init_register(struct amdgpu_ring *ring)
+{
+ struct amdgpu_device *adev = ring->adev;
+ struct v9_mqd *mqd = ring->mqd_ptr;
+ uint32_t tmp;
+ int j;
+
+ /* disable wptr polling */
+ tmp = RREG32(SOC15_REG_OFFSET(GC, 0, mmCP_PQ_WPTR_POLL_CNTL));
+ tmp = REG_SET_FIELD(tmp, CP_PQ_WPTR_POLL_CNTL, EN, 0);
+ WREG32(SOC15_REG_OFFSET(GC, 0, mmCP_PQ_WPTR_POLL_CNTL), tmp);
+
+ WREG32(SOC15_REG_OFFSET(GC, 0, mmCP_HQD_EOP_BASE_ADDR),
+ mqd->cp_hqd_eop_base_addr_lo);
+ WREG32(SOC15_REG_OFFSET(GC, 0, mmCP_HQD_EOP_BASE_ADDR_HI),
+ mqd->cp_hqd_eop_base_addr_hi);
+
+ /* set the EOP size, register value is 2^(EOP_SIZE+1) dwords */
+ WREG32(SOC15_REG_OFFSET(GC, 0, mmCP_HQD_EOP_CONTROL),
+ mqd->cp_hqd_eop_control);
+
+ /* enable doorbell? */
+ WREG32(SOC15_REG_OFFSET(GC, 0, mmCP_HQD_PQ_DOORBELL_CONTROL),
+ mqd->cp_hqd_pq_doorbell_control);
+
+ /* disable the queue if it's active */
+ if (RREG32(SOC15_REG_OFFSET(GC, 0, mmCP_HQD_ACTIVE)) & 1) {
+ WREG32(SOC15_REG_OFFSET(GC, 0, mmCP_HQD_DEQUEUE_REQUEST), 1);
+ for (j = 0; j < adev->usec_timeout; j++) {
+ if (!(RREG32(SOC15_REG_OFFSET(GC, 0, mmCP_HQD_ACTIVE)) & 1))
+ break;
+ udelay(1);
+ }
+ WREG32(SOC15_REG_OFFSET(GC, 0, mmCP_HQD_DEQUEUE_REQUEST),
+ mqd->cp_hqd_dequeue_request);
+ WREG32(SOC15_REG_OFFSET(GC, 0, mmCP_HQD_PQ_RPTR),
+ mqd->cp_hqd_pq_rptr);
+ WREG32(SOC15_REG_OFFSET(GC, 0, mmCP_HQD_PQ_WPTR_LO),
+ mqd->cp_hqd_pq_wptr_lo);
+ WREG32(SOC15_REG_OFFSET(GC, 0, mmCP_HQD_PQ_WPTR_HI),
+ mqd->cp_hqd_pq_wptr_hi);
+ }
+
+ /* set the pointer to the MQD */
+ WREG32(SOC15_REG_OFFSET(GC, 0, mmCP_MQD_BASE_ADDR),
+ mqd->cp_mqd_base_addr_lo);
+ WREG32(SOC15_REG_OFFSET(GC, 0, mmCP_MQD_BASE_ADDR_HI),
+ mqd->cp_mqd_base_addr_hi);
+
+ /* set MQD vmid to 0 */
+ WREG32(SOC15_REG_OFFSET(GC, 0, mmCP_MQD_CONTROL),
+ mqd->cp_mqd_control);
+
+ /* set the pointer to the HQD, this is similar CP_RB0_BASE/_HI */
+ WREG32(SOC15_REG_OFFSET(GC, 0, mmCP_HQD_PQ_BASE),
+ mqd->cp_hqd_pq_base_lo);
+ WREG32(SOC15_REG_OFFSET(GC, 0, mmCP_HQD_PQ_BASE_HI),
+ mqd->cp_hqd_pq_base_hi);
+
+ /* set up the HQD, this is similar to CP_RB0_CNTL */
+ WREG32(SOC15_REG_OFFSET(GC, 0, mmCP_HQD_PQ_CONTROL),
+ mqd->cp_hqd_pq_control);
+
+ /* set the wb address whether it's enabled or not */
+ WREG32(SOC15_REG_OFFSET(GC, 0, mmCP_HQD_PQ_RPTR_REPORT_ADDR),
+ mqd->cp_hqd_pq_rptr_report_addr_lo);
+ WREG32(SOC15_REG_OFFSET(GC, 0, mmCP_HQD_PQ_RPTR_REPORT_ADDR_HI),
+ mqd->cp_hqd_pq_rptr_report_addr_hi);
+
+ /* only used if CP_PQ_WPTR_POLL_CNTL.CP_PQ_WPTR_POLL_CNTL__EN_MASK=1 */
+ WREG32(SOC15_REG_OFFSET(GC, 0, mmCP_HQD_PQ_WPTR_POLL_ADDR),
+ mqd->cp_hqd_pq_wptr_poll_addr_lo);
+ WREG32(SOC15_REG_OFFSET(GC, 0, mmCP_HQD_PQ_WPTR_POLL_ADDR_HI),
+ mqd->cp_hqd_pq_wptr_poll_addr_hi);
+
+ /* enable the doorbell if requested */
+ if (ring->use_doorbell) {
+ WREG32(SOC15_REG_OFFSET(GC, 0, mmCP_MEC_DOORBELL_RANGE_LOWER),
+ (AMDGPU_DOORBELL64_KIQ *2) << 2);
+ WREG32(SOC15_REG_OFFSET(GC, 0, mmCP_MEC_DOORBELL_RANGE_UPPER),
+ (AMDGPU_DOORBELL64_USERQUEUE_END * 2) << 2);
+ }
+
+ WREG32(SOC15_REG_OFFSET(GC, 0, mmCP_HQD_PQ_DOORBELL_CONTROL),
+ mqd->cp_hqd_pq_doorbell_control);
+
+ /* reset read and write pointers, similar to CP_RB0_WPTR/_RPTR */
+ WREG32(SOC15_REG_OFFSET(GC, 0, mmCP_HQD_PQ_WPTR_LO),
+ mqd->cp_hqd_pq_wptr_lo);
+ WREG32(SOC15_REG_OFFSET(GC, 0, mmCP_HQD_PQ_WPTR_HI),
+ mqd->cp_hqd_pq_wptr_hi);
+
+ /* set the vmid for the queue */
+ WREG32(SOC15_REG_OFFSET(GC, 0, mmCP_HQD_VMID), mqd->cp_hqd_vmid);
+
+ WREG32(SOC15_REG_OFFSET(GC, 0, mmCP_HQD_PERSISTENT_STATE),
+ mqd->cp_hqd_persistent_state);
+
+ /* activate the queue */
+ WREG32(SOC15_REG_OFFSET(GC, 0, mmCP_HQD_ACTIVE),
+ mqd->cp_hqd_active);
+
+ if (ring->use_doorbell) {
+ tmp = RREG32(SOC15_REG_OFFSET(GC, 0, mmCP_PQ_STATUS));
+ tmp = REG_SET_FIELD(tmp, CP_PQ_STATUS, DOORBELL_ENABLE, 1);
+ WREG32(SOC15_REG_OFFSET(GC, 0, mmCP_PQ_STATUS), tmp);
+ }
+
+ return 0;
+}
+
+static int gfx_v9_0_kiq_init_queue(struct amdgpu_ring *ring)
+{
+ struct amdgpu_device *adev = ring->adev;
+ struct amdgpu_kiq *kiq = &adev->gfx.kiq;
+ struct v9_mqd *mqd = ring->mqd_ptr;
+ bool is_kiq = (ring->funcs->type == AMDGPU_RING_TYPE_KIQ);
+ int mqd_idx = AMDGPU_MAX_COMPUTE_RINGS;
+
+ if (is_kiq) {
+ gfx_v9_0_kiq_setting(&kiq->ring);
+ } else {
+ mqd_idx = ring - &adev->gfx.compute_ring[0];
+ }
+
+ if (!adev->gfx.in_reset) {
+ memset((void *)mqd, 0, sizeof(*mqd));
+ mutex_lock(&adev->srbm_mutex);
+ soc15_grbm_select(adev, ring->me, ring->pipe, ring->queue, 0);
+ gfx_v9_0_mqd_init(ring);
+ if (is_kiq)
+ gfx_v9_0_kiq_init_register(ring);
+ soc15_grbm_select(adev, 0, 0, 0, 0);
+ mutex_unlock(&adev->srbm_mutex);
+
+ } else { /* for GPU_RESET case */
+ /* reset MQD to a clean status */
+
+ /* reset ring buffer */
+ ring->wptr = 0;
+
+ if (is_kiq) {
+ mutex_lock(&adev->srbm_mutex);
+ soc15_grbm_select(adev, ring->me, ring->pipe, ring->queue, 0);
+ gfx_v9_0_kiq_init_register(ring);
+ soc15_grbm_select(adev, 0, 0, 0, 0);
+ mutex_unlock(&adev->srbm_mutex);
+ }
+ }
+
+ if (is_kiq)
+ gfx_v9_0_kiq_enable(ring);
+ else
+ gfx_v9_0_map_queue_enable(&kiq->ring, ring);
+
+ return 0;
+}
+
+static int gfx_v9_0_kiq_resume(struct amdgpu_device *adev)
+{
+ struct amdgpu_ring *ring = NULL;
+ int r = 0, i;
+
+ gfx_v9_0_cp_compute_enable(adev, true);
+
+ ring = &adev->gfx.kiq.ring;
+
+ r = amdgpu_bo_reserve(ring->mqd_obj, false);
+ if (unlikely(r != 0))
+ goto done;
+
+ r = amdgpu_bo_kmap(ring->mqd_obj, (void **)&ring->mqd_ptr);
+ if (!r) {
+ r = gfx_v9_0_kiq_init_queue(ring);
+ amdgpu_bo_kunmap(ring->mqd_obj);
+ ring->mqd_ptr = NULL;
+ }
+ amdgpu_bo_unreserve(ring->mqd_obj);
+ if (r)
+ goto done;
+
+ for (i = 0; i < adev->gfx.num_compute_rings; i++) {
+ ring = &adev->gfx.compute_ring[i];
+
+ r = amdgpu_bo_reserve(ring->mqd_obj, false);
+ if (unlikely(r != 0))
+ goto done;
+ r = amdgpu_bo_kmap(ring->mqd_obj, (void **)&ring->mqd_ptr);
+ if (!r) {
+ r = gfx_v9_0_kiq_init_queue(ring);
+ amdgpu_bo_kunmap(ring->mqd_obj);
+ ring->mqd_ptr = NULL;
+ }
+ amdgpu_bo_unreserve(ring->mqd_obj);
+ if (r)
+ goto done;
+ }
+
+done:
+ return r;
+}
+
+static int gfx_v9_0_cp_resume(struct amdgpu_device *adev)
+{
+ int r,i;
+ struct amdgpu_ring *ring;
+
+ if (!(adev->flags & AMD_IS_APU))
+ gfx_v9_0_enable_gui_idle_interrupt(adev, false);
+
+ if (adev->firmware.load_type != AMDGPU_FW_LOAD_PSP) {
+ /* legacy firmware loading */
+ r = gfx_v9_0_cp_gfx_load_microcode(adev);
+ if (r)
+ return r;
+
+ r = gfx_v9_0_cp_compute_load_microcode(adev);
+ if (r)
+ return r;
+ }
+
+ r = gfx_v9_0_cp_gfx_resume(adev);
+ if (r)
+ return r;
+
+ if (amdgpu_sriov_vf(adev))
+ r = gfx_v9_0_kiq_resume(adev);
+ else
+ r = gfx_v9_0_cp_compute_resume(adev);
+ if (r)
+ return r;
+
+ ring = &adev->gfx.gfx_ring[0];
+ r = amdgpu_ring_test_ring(ring);
+ if (r) {
+ ring->ready = false;
+ return r;
+ }
+ for (i = 0; i < adev->gfx.num_compute_rings; i++) {
+ ring = &adev->gfx.compute_ring[i];
+
+ ring->ready = true;
+ r = amdgpu_ring_test_ring(ring);
+ if (r)
+ ring->ready = false;
+ }
+
+ if (amdgpu_sriov_vf(adev)) {
+ ring = &adev->gfx.kiq.ring;
+ ring->ready = true;
+ r = amdgpu_ring_test_ring(ring);
+ if (r)
+ ring->ready = false;
+ }
+
+ gfx_v9_0_enable_gui_idle_interrupt(adev, true);
+
+ return 0;
+}
+
+static void gfx_v9_0_cp_enable(struct amdgpu_device *adev, bool enable)
+{
+ gfx_v9_0_cp_gfx_enable(adev, enable);
+ gfx_v9_0_cp_compute_enable(adev, enable);
+}
+
+static int gfx_v9_0_hw_init(void *handle)
+{
+ int r;
+ struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+
+ gfx_v9_0_init_golden_registers(adev);
+
+ gfx_v9_0_gpu_init(adev);
+
+ r = gfx_v9_0_rlc_resume(adev);
+ if (r)
+ return r;
+
+ r = gfx_v9_0_cp_resume(adev);
+ if (r)
+ return r;
+
+ r = gfx_v9_0_ngg_en(adev);
+ if (r)
+ return r;
+
+ return r;
+}
+
+static int gfx_v9_0_hw_fini(void *handle)
+{
+ struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+
+ amdgpu_irq_put(adev, &adev->gfx.priv_reg_irq, 0);
+ amdgpu_irq_put(adev, &adev->gfx.priv_inst_irq, 0);
+ if (amdgpu_sriov_vf(adev)) {
+ pr_debug("For SRIOV client, shouldn't do anything.\n");
+ return 0;
+ }
+ gfx_v9_0_cp_enable(adev, false);
+ gfx_v9_0_rlc_stop(adev);
+ gfx_v9_0_cp_compute_fini(adev);
+
+ return 0;
+}
+
+static int gfx_v9_0_suspend(void *handle)
+{
+ struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+
+ return gfx_v9_0_hw_fini(adev);
+}
+
+static int gfx_v9_0_resume(void *handle)
+{
+ struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+
+ return gfx_v9_0_hw_init(adev);
+}
+
+static bool gfx_v9_0_is_idle(void *handle)
+{
+ struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+
+ if (REG_GET_FIELD(RREG32(SOC15_REG_OFFSET(GC, 0, mmGRBM_STATUS)),
+ GRBM_STATUS, GUI_ACTIVE))
+ return false;
+ else
+ return true;
+}
+
+static int gfx_v9_0_wait_for_idle(void *handle)
+{
+ unsigned i;
+ u32 tmp;
+ struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+
+ for (i = 0; i < adev->usec_timeout; i++) {
+ /* read MC_STATUS */
+ tmp = RREG32(SOC15_REG_OFFSET(GC, 0, mmGRBM_STATUS)) &
+ GRBM_STATUS__GUI_ACTIVE_MASK;
+
+ if (!REG_GET_FIELD(tmp, GRBM_STATUS, GUI_ACTIVE))
+ return 0;
+ udelay(1);
+ }
+ return -ETIMEDOUT;
+}
+
+static void gfx_v9_0_print_status(void *handle)
+{
+ int i;
+ struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+
+ dev_info(adev->dev, "GFX 9.x registers\n");
+ dev_info(adev->dev, " GRBM_STATUS=0x%08X\n",
+ RREG32(SOC15_REG_OFFSET(GC, 0, mmGRBM_STATUS)));
+ dev_info(adev->dev, " GRBM_STATUS2=0x%08X\n",
+ RREG32(SOC15_REG_OFFSET(GC, 0, mmGRBM_STATUS2)));
+ dev_info(adev->dev, " GRBM_STATUS_SE0=0x%08X\n",
+ RREG32(SOC15_REG_OFFSET(GC, 0, mmGRBM_STATUS_SE0)));
+ dev_info(adev->dev, " GRBM_STATUS_SE1=0x%08X\n",
+ RREG32(SOC15_REG_OFFSET(GC, 0, mmGRBM_STATUS_SE1)));
+ dev_info(adev->dev, " GRBM_STATUS_SE2=0x%08X\n",
+ RREG32(SOC15_REG_OFFSET(GC, 0, mmGRBM_STATUS_SE2)));
+ dev_info(adev->dev, " GRBM_STATUS_SE3=0x%08X\n",
+ RREG32(SOC15_REG_OFFSET(GC, 0, mmGRBM_STATUS_SE3)));
+ dev_info(adev->dev, " CP_STAT = 0x%08x\n", RREG32(SOC15_REG_OFFSET(GC, 0, mmCP_STAT)));
+ dev_info(adev->dev, " CP_STALLED_STAT1 = 0x%08x\n",
+ RREG32(SOC15_REG_OFFSET(GC, 0, mmCP_STALLED_STAT1)));
+ dev_info(adev->dev, " CP_STALLED_STAT2 = 0x%08x\n",
+ RREG32(SOC15_REG_OFFSET(GC, 0, mmCP_STALLED_STAT2)));
+ dev_info(adev->dev, " CP_STALLED_STAT3 = 0x%08x\n",
+ RREG32(SOC15_REG_OFFSET(GC, 0, mmCP_STALLED_STAT3)));
+ dev_info(adev->dev, " CP_CPF_BUSY_STAT = 0x%08x\n",
+ RREG32(SOC15_REG_OFFSET(GC, 0, mmCP_CPF_BUSY_STAT)));
+ dev_info(adev->dev, " CP_CPF_STALLED_STAT1 = 0x%08x\n",
+ RREG32(SOC15_REG_OFFSET(GC, 0, mmCP_CPF_STALLED_STAT1)));
+ dev_info(adev->dev, " CP_CPF_STATUS = 0x%08x\n", RREG32(SOC15_REG_OFFSET(GC, 0, mmCP_CPF_STATUS)));
+ dev_info(adev->dev, " CP_CPC_BUSY_STAT = 0x%08x\n", RREG32(SOC15_REG_OFFSET(GC, 0, mmCP_CPC_BUSY_STAT)));
+ dev_info(adev->dev, " CP_CPC_STALLED_STAT1 = 0x%08x\n",
+ RREG32(SOC15_REG_OFFSET(GC, 0, mmCP_CPC_STALLED_STAT1)));
+ dev_info(adev->dev, " CP_CPC_STATUS = 0x%08x\n", RREG32(SOC15_REG_OFFSET(GC, 0, mmCP_CPC_STATUS)));
+
+ for (i = 0; i < 32; i++) {
+ dev_info(adev->dev, " GB_TILE_MODE%d=0x%08X\n",
+ i, RREG32(SOC15_REG_OFFSET(GC, 0, mmGB_TILE_MODE0 ) + i*4));
+ }
+ for (i = 0; i < 16; i++) {
+ dev_info(adev->dev, " GB_MACROTILE_MODE%d=0x%08X\n",
+ i, RREG32(SOC15_REG_OFFSET(GC, 0, mmGB_MACROTILE_MODE0) + i*4));
+ }
+ for (i = 0; i < adev->gfx.config.max_shader_engines; i++) {
+ dev_info(adev->dev, " se: %d\n", i);
+ gfx_v9_0_select_se_sh(adev, i, 0xffffffff, 0xffffffff);
+ dev_info(adev->dev, " PA_SC_RASTER_CONFIG=0x%08X\n",
+ RREG32(SOC15_REG_OFFSET(GC, 0, mmPA_SC_RASTER_CONFIG)));
+ dev_info(adev->dev, " PA_SC_RASTER_CONFIG_1=0x%08X\n",
+ RREG32(SOC15_REG_OFFSET(GC, 0, mmPA_SC_RASTER_CONFIG_1)));
+ }
+ gfx_v9_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
+
+ dev_info(adev->dev, " GB_ADDR_CONFIG=0x%08X\n",
+ RREG32(SOC15_REG_OFFSET(GC, 0, mmGB_ADDR_CONFIG)));
+
+ dev_info(adev->dev, " CP_MEQ_THRESHOLDS=0x%08X\n",
+ RREG32(SOC15_REG_OFFSET(GC, 0, mmCP_MEQ_THRESHOLDS)));
+ dev_info(adev->dev, " SX_DEBUG_1=0x%08X\n",
+ RREG32(SOC15_REG_OFFSET(GC, 0, mmSX_DEBUG_1)));
+ dev_info(adev->dev, " TA_CNTL_AUX=0x%08X\n",
+ RREG32(SOC15_REG_OFFSET(GC, 0, mmTA_CNTL_AUX)));
+ dev_info(adev->dev, " SPI_CONFIG_CNTL=0x%08X\n",
+ RREG32(SOC15_REG_OFFSET(GC, 0, mmSPI_CONFIG_CNTL)));
+ dev_info(adev->dev, " SQ_CONFIG=0x%08X\n",
+ RREG32(SOC15_REG_OFFSET(GC, 0, mmSQ_CONFIG)));
+ dev_info(adev->dev, " DB_DEBUG=0x%08X\n",
+ RREG32(SOC15_REG_OFFSET(GC, 0, mmDB_DEBUG)));
+ dev_info(adev->dev, " DB_DEBUG2=0x%08X\n",
+ RREG32(SOC15_REG_OFFSET(GC, 0, mmDB_DEBUG2)));
+ dev_info(adev->dev, " DB_DEBUG3=0x%08X\n",
+ RREG32(SOC15_REG_OFFSET(GC, 0, mmDB_DEBUG3)));
+ dev_info(adev->dev, " CB_HW_CONTROL=0x%08X\n",
+ RREG32(SOC15_REG_OFFSET(GC, 0, mmCB_HW_CONTROL)));
+ dev_info(adev->dev, " SPI_CONFIG_CNTL_1=0x%08X\n",
+ RREG32(SOC15_REG_OFFSET(GC, 0, mmSPI_CONFIG_CNTL_1)));
+ dev_info(adev->dev, " PA_SC_FIFO_SIZE=0x%08X\n",
+ RREG32(SOC15_REG_OFFSET(GC, 0, mmPA_SC_FIFO_SIZE)));
+ dev_info(adev->dev, " VGT_NUM_INSTANCES=0x%08X\n",
+ RREG32(SOC15_REG_OFFSET(GC, 0, mmVGT_NUM_INSTANCES)));
+ dev_info(adev->dev, " CP_PERFMON_CNTL=0x%08X\n",
+ RREG32(SOC15_REG_OFFSET(GC, 0, mmCP_PERFMON_CNTL)));
+ dev_info(adev->dev, " PA_SC_FORCE_EOV_MAX_CNTS=0x%08X\n",
+ RREG32(SOC15_REG_OFFSET(GC, 0, mmPA_SC_FORCE_EOV_MAX_CNTS)));
+ dev_info(adev->dev, " VGT_CACHE_INVALIDATION=0x%08X\n",
+ RREG32(SOC15_REG_OFFSET(GC, 0, mmVGT_CACHE_INVALIDATION)));
+ dev_info(adev->dev, " VGT_GS_VERTEX_REUSE=0x%08X\n",
+ RREG32(SOC15_REG_OFFSET(GC, 0, mmVGT_GS_VERTEX_REUSE)));
+ dev_info(adev->dev, " PA_SC_LINE_STIPPLE_STATE=0x%08X\n",
+ RREG32(SOC15_REG_OFFSET(GC, 0, mmPA_SC_LINE_STIPPLE_STATE)));
+ dev_info(adev->dev, " PA_CL_ENHANCE=0x%08X\n",
+ RREG32(SOC15_REG_OFFSET(GC, 0, mmPA_CL_ENHANCE)));
+ dev_info(adev->dev, " PA_SC_ENHANCE=0x%08X\n",
+ RREG32(SOC15_REG_OFFSET(GC, 0, mmPA_SC_ENHANCE)));
+
+ dev_info(adev->dev, " CP_ME_CNTL=0x%08X\n",
+ RREG32(SOC15_REG_OFFSET(GC, 0, mmCP_ME_CNTL)));
+ dev_info(adev->dev, " CP_MAX_CONTEXT=0x%08X\n",
+ RREG32(SOC15_REG_OFFSET(GC, 0, mmCP_MAX_CONTEXT)));
+ dev_info(adev->dev, " CP_DEVICE_ID=0x%08X\n",
+ RREG32(SOC15_REG_OFFSET(GC, 0, mmCP_DEVICE_ID)));
+
+ dev_info(adev->dev, " CP_SEM_WAIT_TIMER=0x%08X\n",
+ RREG32(SOC15_REG_OFFSET(GC, 0, mmCP_SEM_WAIT_TIMER)));
+
+ dev_info(adev->dev, " CP_RB_WPTR_DELAY=0x%08X\n",
+ RREG32(SOC15_REG_OFFSET(GC, 0, mmCP_RB_WPTR_DELAY)));
+ dev_info(adev->dev, " CP_RB_VMID=0x%08X\n",
+ RREG32(SOC15_REG_OFFSET(GC, 0, mmCP_RB_VMID)));
+ dev_info(adev->dev, " CP_RB0_CNTL=0x%08X\n",
+ RREG32(SOC15_REG_OFFSET(GC, 0, mmCP_RB0_CNTL)));
+ dev_info(adev->dev, " CP_RB0_WPTR=0x%08X\n",
+ RREG32(SOC15_REG_OFFSET(GC, 0, mmCP_RB0_WPTR)));
+ dev_info(adev->dev, " CP_RB0_RPTR_ADDR=0x%08X\n",
+ RREG32(SOC15_REG_OFFSET(GC, 0, mmCP_RB0_RPTR_ADDR)));
+ dev_info(adev->dev, " CP_RB0_RPTR_ADDR_HI=0x%08X\n",
+ RREG32(SOC15_REG_OFFSET(GC, 0, mmCP_RB0_RPTR_ADDR_HI)));
+ dev_info(adev->dev, " CP_RB0_CNTL=0x%08X\n",
+ RREG32(SOC15_REG_OFFSET(GC, 0, mmCP_RB0_CNTL)));
+ dev_info(adev->dev, " CP_RB0_BASE=0x%08X\n",
+ RREG32(SOC15_REG_OFFSET(GC, 0, mmCP_RB0_BASE)));
+ dev_info(adev->dev, " CP_RB0_BASE_HI=0x%08X\n",
+ RREG32(SOC15_REG_OFFSET(GC, 0, mmCP_RB0_BASE_HI)));
+ dev_info(adev->dev, " CP_MEC_CNTL=0x%08X\n",
+ RREG32(SOC15_REG_OFFSET(GC, 0, mmCP_MEC_CNTL)));
+
+ dev_info(adev->dev, " SCRATCH_ADDR=0x%08X\n",
+ RREG32(SOC15_REG_OFFSET(GC, 0, mmSCRATCH_ADDR)));
+ dev_info(adev->dev, " SCRATCH_UMSK=0x%08X\n",
+ RREG32(SOC15_REG_OFFSET(GC, 0, mmSCRATCH_UMSK)));
+
+ dev_info(adev->dev, " CP_INT_CNTL_RING0=0x%08X\n",
+ RREG32(SOC15_REG_OFFSET(GC, 0, mmCP_INT_CNTL_RING0)));
+ dev_info(adev->dev, " RLC_LB_CNTL=0x%08X\n",
+ RREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_LB_CNTL)));
+ dev_info(adev->dev, " RLC_CNTL=0x%08X\n",
+ RREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_CNTL)));
+ dev_info(adev->dev, " RLC_CGCG_CGLS_CTRL=0x%08X\n",
+ RREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_CGCG_CGLS_CTRL)));
+ dev_info(adev->dev, " RLC_LB_CNTR_INIT=0x%08X\n",
+ RREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_LB_CNTR_INIT)));
+ dev_info(adev->dev, " RLC_LB_CNTR_MAX=0x%08X\n",
+ RREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_LB_CNTR_MAX)));
+ dev_info(adev->dev, " RLC_LB_INIT_CU_MASK=0x%08X\n",
+ RREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_LB_INIT_CU_MASK)));
+ dev_info(adev->dev, " RLC_LB_PARAMS=0x%08X\n",
+ RREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_LB_PARAMS)));
+ dev_info(adev->dev, " RLC_LB_CNTL=0x%08X\n",
+ RREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_LB_CNTL)));
+ dev_info(adev->dev, " RLC_UCODE_CNTL=0x%08X\n",
+ RREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_UCODE_CNTL)));
+
+ dev_info(adev->dev, " RLC_GPM_GENERAL_6=0x%08X\n",
+ RREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_GPM_GENERAL_6)));
+ dev_info(adev->dev, " RLC_GPM_GENERAL_12=0x%08X\n",
+ RREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_GPM_GENERAL_12)));
+ dev_info(adev->dev, " RLC_GPM_TIMER_INT_3=0x%08X\n",
+ RREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_GPM_TIMER_INT_3)));
+ mutex_lock(&adev->srbm_mutex);
+ for (i = 0; i < 16; i++) {
+ soc15_grbm_select(adev, 0, 0, 0, i);
+ dev_info(adev->dev, " VM %d:\n", i);
+ dev_info(adev->dev, " SH_MEM_CONFIG=0x%08X\n",
+ RREG32(SOC15_REG_OFFSET(GC, 0, mmSH_MEM_CONFIG)));
+ dev_info(adev->dev, " SH_MEM_BASES=0x%08X\n",
+ RREG32(SOC15_REG_OFFSET(GC, 0, mmSH_MEM_BASES)));
+ }
+ soc15_grbm_select(adev, 0, 0, 0, 0);
+ mutex_unlock(&adev->srbm_mutex);
+}
+
+static int gfx_v9_0_soft_reset(void *handle)
+{
+ u32 grbm_soft_reset = 0;
+ u32 tmp;
+ struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+
+ /* GRBM_STATUS */
+ tmp = RREG32(SOC15_REG_OFFSET(GC, 0, mmGRBM_STATUS));
+ if (tmp & (GRBM_STATUS__PA_BUSY_MASK | GRBM_STATUS__SC_BUSY_MASK |
+ GRBM_STATUS__BCI_BUSY_MASK | GRBM_STATUS__SX_BUSY_MASK |
+ GRBM_STATUS__TA_BUSY_MASK | GRBM_STATUS__VGT_BUSY_MASK |
+ GRBM_STATUS__DB_BUSY_MASK | GRBM_STATUS__CB_BUSY_MASK |
+ GRBM_STATUS__GDS_BUSY_MASK | GRBM_STATUS__SPI_BUSY_MASK |
+ GRBM_STATUS__IA_BUSY_MASK | GRBM_STATUS__IA_BUSY_NO_DMA_MASK)) {
+ grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset,
+ GRBM_SOFT_RESET, SOFT_RESET_CP, 1);
+ grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset,
+ GRBM_SOFT_RESET, SOFT_RESET_GFX, 1);
+ }
+
+ if (tmp & (GRBM_STATUS__CP_BUSY_MASK | GRBM_STATUS__CP_COHERENCY_BUSY_MASK)) {
+ grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset,
+ GRBM_SOFT_RESET, SOFT_RESET_CP, 1);
+ }
+
+ /* GRBM_STATUS2 */
+ tmp = RREG32(SOC15_REG_OFFSET(GC, 0, mmGRBM_STATUS2));
+ if (REG_GET_FIELD(tmp, GRBM_STATUS2, RLC_BUSY))
+ grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset,
+ GRBM_SOFT_RESET, SOFT_RESET_RLC, 1);
+
+
+ if (grbm_soft_reset ) {
+ gfx_v9_0_print_status((void *)adev);
+ /* stop the rlc */
+ gfx_v9_0_rlc_stop(adev);
+
+ /* Disable GFX parsing/prefetching */
+ gfx_v9_0_cp_gfx_enable(adev, false);
+
+ /* Disable MEC parsing/prefetching */
+ gfx_v9_0_cp_compute_enable(adev, false);
+
+ if (grbm_soft_reset) {
+ tmp = RREG32(SOC15_REG_OFFSET(GC, 0, mmGRBM_SOFT_RESET));
+ tmp |= grbm_soft_reset;
+ dev_info(adev->dev, "GRBM_SOFT_RESET=0x%08X\n", tmp);
+ WREG32(SOC15_REG_OFFSET(GC, 0, mmGRBM_SOFT_RESET), tmp);
+ tmp = RREG32(SOC15_REG_OFFSET(GC, 0, mmGRBM_SOFT_RESET));
+
+ udelay(50);
+
+ tmp &= ~grbm_soft_reset;
+ WREG32(SOC15_REG_OFFSET(GC, 0, mmGRBM_SOFT_RESET), tmp);
+ tmp = RREG32(SOC15_REG_OFFSET(GC, 0, mmGRBM_SOFT_RESET));
+ }
+
+ /* Wait a little for things to settle down */
+ udelay(50);
+ gfx_v9_0_print_status((void *)adev);
+ }
+ return 0;
+}
+
+static uint64_t gfx_v9_0_get_gpu_clock_counter(struct amdgpu_device *adev)
+{
+ uint64_t clock;
+
+ mutex_lock(&adev->gfx.gpu_clock_mutex);
+ WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_CAPTURE_GPU_CLOCK_COUNT), 1);
+ clock = (uint64_t)RREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_GPU_CLOCK_COUNT_LSB)) |
+ ((uint64_t)RREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_GPU_CLOCK_COUNT_MSB)) << 32ULL);
+ mutex_unlock(&adev->gfx.gpu_clock_mutex);
+ return clock;
+}
+
+static void gfx_v9_0_ring_emit_gds_switch(struct amdgpu_ring *ring,
+ uint32_t vmid,
+ uint32_t gds_base, uint32_t gds_size,
+ uint32_t gws_base, uint32_t gws_size,
+ uint32_t oa_base, uint32_t oa_size)
+{
+ gds_base = gds_base >> AMDGPU_GDS_SHIFT;
+ gds_size = gds_size >> AMDGPU_GDS_SHIFT;
+
+ gws_base = gws_base >> AMDGPU_GWS_SHIFT;
+ gws_size = gws_size >> AMDGPU_GWS_SHIFT;
+
+ oa_base = oa_base >> AMDGPU_OA_SHIFT;
+ oa_size = oa_size >> AMDGPU_OA_SHIFT;
+
+ /* GDS Base */
+ gfx_v9_0_write_data_to_reg(ring, 0, false,
+ amdgpu_gds_reg_offset[vmid].mem_base,
+ gds_base);
+
+ /* GDS Size */
+ gfx_v9_0_write_data_to_reg(ring, 0, false,
+ amdgpu_gds_reg_offset[vmid].mem_size,
+ gds_size);
+
+ /* GWS */
+ gfx_v9_0_write_data_to_reg(ring, 0, false,
+ amdgpu_gds_reg_offset[vmid].gws,
+ gws_size << GDS_GWS_VMID0__SIZE__SHIFT | gws_base);
+
+ /* OA */
+ gfx_v9_0_write_data_to_reg(ring, 0, false,
+ amdgpu_gds_reg_offset[vmid].oa,
+ (1 << (oa_size + oa_base)) - (1 << oa_base));
+}
+
+static int gfx_v9_0_early_init(void *handle)
+{
+ struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+
+ adev->gfx.num_gfx_rings = GFX9_NUM_GFX_RINGS;
+ adev->gfx.num_compute_rings = GFX9_NUM_COMPUTE_RINGS;
+ gfx_v9_0_set_ring_funcs(adev);
+ gfx_v9_0_set_irq_funcs(adev);
+ gfx_v9_0_set_gds_init(adev);
+ gfx_v9_0_set_rlc_funcs(adev);
+
+ return 0;
+}
+
+static int gfx_v9_0_late_init(void *handle)
+{
+ struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ int r;
+
+ r = amdgpu_irq_get(adev, &adev->gfx.priv_reg_irq, 0);
+ if (r)
+ return r;
+
+ r = amdgpu_irq_get(adev, &adev->gfx.priv_inst_irq, 0);
+ if (r)
+ return r;
+
+ return 0;
+}
+
+static void gfx_v9_0_enter_rlc_safe_mode(struct amdgpu_device *adev)
+{
+ uint32_t rlc_setting, data;
+ unsigned i;
+
+ if (adev->gfx.rlc.in_safe_mode)
+ return;
+
+ /* if RLC is not enabled, do nothing */
+ rlc_setting = RREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_CNTL));
+ if (!(rlc_setting & RLC_CNTL__RLC_ENABLE_F32_MASK))
+ return;
+
+ if (adev->cg_flags &
+ (AMD_CG_SUPPORT_GFX_CGCG | AMD_CG_SUPPORT_GFX_MGCG |
+ AMD_CG_SUPPORT_GFX_3D_CGCG)) {
+ data = RLC_SAFE_MODE__CMD_MASK;
+ data |= (1 << RLC_SAFE_MODE__MESSAGE__SHIFT);
+ WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_SAFE_MODE), data);
+
+ /* wait for RLC_SAFE_MODE */
+ for (i = 0; i < adev->usec_timeout; i++) {
+ if (!REG_GET_FIELD(SOC15_REG_OFFSET(GC, 0, mmRLC_SAFE_MODE), RLC_SAFE_MODE, CMD))
+ break;
+ udelay(1);
+ }
+ adev->gfx.rlc.in_safe_mode = true;
+ }
+}
+
+static void gfx_v9_0_exit_rlc_safe_mode(struct amdgpu_device *adev)
+{
+ uint32_t rlc_setting, data;
+
+ if (!adev->gfx.rlc.in_safe_mode)
+ return;
+
+ /* if RLC is not enabled, do nothing */
+ rlc_setting = RREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_CNTL));
+ if (!(rlc_setting & RLC_CNTL__RLC_ENABLE_F32_MASK))
+ return;
+
+ if (adev->cg_flags &
+ (AMD_CG_SUPPORT_GFX_CGCG | AMD_CG_SUPPORT_GFX_MGCG)) {
+ /*
+ * Try to exit safe mode only if it is already in safe
+ * mode.
+ */
+ data = RLC_SAFE_MODE__CMD_MASK;
+ WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_SAFE_MODE), data);
+ adev->gfx.rlc.in_safe_mode = false;
+ }
+}
+
+static void gfx_v9_0_update_medium_grain_clock_gating(struct amdgpu_device *adev,
+ bool enable)
+{
+ uint32_t data, def;
+
+ /* It is disabled by HW by default */
+ if (enable && (adev->cg_flags & AMD_CG_SUPPORT_GFX_MGCG)) {
+ /* 1 - RLC_CGTT_MGCG_OVERRIDE */
+ def = data = RREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_CGTT_MGCG_OVERRIDE));
+ data &= ~(RLC_CGTT_MGCG_OVERRIDE__CPF_CGTT_SCLK_OVERRIDE_MASK |
+ RLC_CGTT_MGCG_OVERRIDE__GRBM_CGTT_SCLK_OVERRIDE_MASK |
+ RLC_CGTT_MGCG_OVERRIDE__GFXIP_MGCG_OVERRIDE_MASK |
+ RLC_CGTT_MGCG_OVERRIDE__GFXIP_MGLS_OVERRIDE_MASK);
+
+ /* only for Vega10 & Raven1 */
+ data |= RLC_CGTT_MGCG_OVERRIDE__RLC_CGTT_SCLK_OVERRIDE_MASK;
+
+ if (def != data)
+ WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_CGTT_MGCG_OVERRIDE), data);
+
+ /* MGLS is a global flag to control all MGLS in GFX */
+ if (adev->cg_flags & AMD_CG_SUPPORT_GFX_MGLS) {
+ /* 2 - RLC memory Light sleep */
+ if (adev->cg_flags & AMD_CG_SUPPORT_GFX_RLC_LS) {
+ def = data = RREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_MEM_SLP_CNTL));
+ data |= RLC_MEM_SLP_CNTL__RLC_MEM_LS_EN_MASK;
+ if (def != data)
+ WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_MEM_SLP_CNTL), data);
+ }
+ /* 3 - CP memory Light sleep */
+ if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CP_LS) {
+ def = data = RREG32(SOC15_REG_OFFSET(GC, 0, mmCP_MEM_SLP_CNTL));
+ data |= CP_MEM_SLP_CNTL__CP_MEM_LS_EN_MASK;
+ if (def != data)
+ WREG32(SOC15_REG_OFFSET(GC, 0, mmCP_MEM_SLP_CNTL), data);
+ }
+ }
+ } else {
+ /* 1 - MGCG_OVERRIDE */
+ def = data = RREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_CGTT_MGCG_OVERRIDE));
+ data |= (RLC_CGTT_MGCG_OVERRIDE__CPF_CGTT_SCLK_OVERRIDE_MASK |
+ RLC_CGTT_MGCG_OVERRIDE__RLC_CGTT_SCLK_OVERRIDE_MASK |
+ RLC_CGTT_MGCG_OVERRIDE__GRBM_CGTT_SCLK_OVERRIDE_MASK |
+ RLC_CGTT_MGCG_OVERRIDE__GFXIP_MGCG_OVERRIDE_MASK |
+ RLC_CGTT_MGCG_OVERRIDE__GFXIP_MGLS_OVERRIDE_MASK);
+ if (def != data)
+ WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_CGTT_MGCG_OVERRIDE), data);
+
+ /* 2 - disable MGLS in RLC */
+ data = RREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_MEM_SLP_CNTL));
+ if (data & RLC_MEM_SLP_CNTL__RLC_MEM_LS_EN_MASK) {
+ data &= ~RLC_MEM_SLP_CNTL__RLC_MEM_LS_EN_MASK;
+ WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_MEM_SLP_CNTL), data);
+ }
+
+ /* 3 - disable MGLS in CP */
+ data = RREG32(SOC15_REG_OFFSET(GC, 0, mmCP_MEM_SLP_CNTL));
+ if (data & CP_MEM_SLP_CNTL__CP_MEM_LS_EN_MASK) {
+ data &= ~CP_MEM_SLP_CNTL__CP_MEM_LS_EN_MASK;
+ WREG32(SOC15_REG_OFFSET(GC, 0, mmCP_MEM_SLP_CNTL), data);
+ }
+ }
+}
+
+static void gfx_v9_0_update_3d_clock_gating(struct amdgpu_device *adev,
+ bool enable)
+{
+ uint32_t data, def;
+
+ adev->gfx.rlc.funcs->enter_safe_mode(adev);
+
+ /* Enable 3D CGCG/CGLS */
+ if (enable && (adev->cg_flags & AMD_CG_SUPPORT_GFX_3D_CGCG)) {
+ /* write cmd to clear cgcg/cgls ov */
+ def = data = RREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_CGTT_MGCG_OVERRIDE));
+ /* unset CGCG override */
+ data &= ~RLC_CGTT_MGCG_OVERRIDE__GFXIP_GFX3D_CG_OVERRIDE_MASK;
+ /* update CGCG and CGLS override bits */
+ if (def != data)
+ WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_CGTT_MGCG_OVERRIDE), data);
+ /* enable 3Dcgcg FSM(0x0020003f) */
+ def = RREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_CGCG_CGLS_CTRL_3D));
+ data = (0x2000 << RLC_CGCG_CGLS_CTRL_3D__CGCG_GFX_IDLE_THRESHOLD__SHIFT) |
+ RLC_CGCG_CGLS_CTRL_3D__CGCG_EN_MASK;
+ if (adev->cg_flags & AMD_CG_SUPPORT_GFX_3D_CGLS)
+ data |= (0x000F << RLC_CGCG_CGLS_CTRL_3D__CGLS_REP_COMPANSAT_DELAY__SHIFT) |
+ RLC_CGCG_CGLS_CTRL_3D__CGLS_EN_MASK;
+ if (def != data)
+ WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_CGCG_CGLS_CTRL_3D), data);
+
+ /* set IDLE_POLL_COUNT(0x00900100) */
+ def = RREG32(SOC15_REG_OFFSET(GC, 0, mmCP_RB_WPTR_POLL_CNTL));
+ data = (0x0100 << CP_RB_WPTR_POLL_CNTL__POLL_FREQUENCY__SHIFT) |
+ (0x0090 << CP_RB_WPTR_POLL_CNTL__IDLE_POLL_COUNT__SHIFT);
+ if (def != data)
+ WREG32(SOC15_REG_OFFSET(GC, 0, mmCP_RB_WPTR_POLL_CNTL), data);
+ } else {
+ /* Disable CGCG/CGLS */
+ def = data = RREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_CGCG_CGLS_CTRL_3D));
+ /* disable cgcg, cgls should be disabled */
+ data &= ~(RLC_CGCG_CGLS_CTRL_3D__CGCG_EN_MASK |
+ RLC_CGCG_CGLS_CTRL_3D__CGLS_EN_MASK);
+ /* disable cgcg and cgls in FSM */
+ if (def != data)
+ WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_CGCG_CGLS_CTRL_3D), data);
+ }
+
+ adev->gfx.rlc.funcs->exit_safe_mode(adev);
+}
+
+static void gfx_v9_0_update_coarse_grain_clock_gating(struct amdgpu_device *adev,
+ bool enable)
+{
+ uint32_t def, data;
+
+ adev->gfx.rlc.funcs->enter_safe_mode(adev);
+
+ if (enable && (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGCG)) {
+ def = data = RREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_CGTT_MGCG_OVERRIDE));
+ /* unset CGCG override */
+ data &= ~RLC_CGTT_MGCG_OVERRIDE__GFXIP_CGCG_OVERRIDE_MASK;
+ if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGLS)
+ data &= ~RLC_CGTT_MGCG_OVERRIDE__GFXIP_CGLS_OVERRIDE_MASK;
+ else
+ data |= RLC_CGTT_MGCG_OVERRIDE__GFXIP_CGLS_OVERRIDE_MASK;
+ /* update CGCG and CGLS override bits */
+ if (def != data)
+ WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_CGTT_MGCG_OVERRIDE), data);
+
+ /* enable cgcg FSM(0x0020003F) */
+ def = RREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_CGCG_CGLS_CTRL));
+ data = (0x2000 << RLC_CGCG_CGLS_CTRL__CGCG_GFX_IDLE_THRESHOLD__SHIFT) |
+ RLC_CGCG_CGLS_CTRL__CGCG_EN_MASK;
+ if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGLS)
+ data |= (0x000F << RLC_CGCG_CGLS_CTRL__CGLS_REP_COMPANSAT_DELAY__SHIFT) |
+ RLC_CGCG_CGLS_CTRL__CGLS_EN_MASK;
+ if (def != data)
+ WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_CGCG_CGLS_CTRL), data);
+
+ /* set IDLE_POLL_COUNT(0x00900100) */
+ def = RREG32(SOC15_REG_OFFSET(GC, 0, mmCP_RB_WPTR_POLL_CNTL));
+ data = (0x0100 << CP_RB_WPTR_POLL_CNTL__POLL_FREQUENCY__SHIFT) |
+ (0x0090 << CP_RB_WPTR_POLL_CNTL__IDLE_POLL_COUNT__SHIFT);
+ if (def != data)
+ WREG32(SOC15_REG_OFFSET(GC, 0, mmCP_RB_WPTR_POLL_CNTL), data);
+ } else {
+ def = data = RREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_CGCG_CGLS_CTRL));
+ /* reset CGCG/CGLS bits */
+ data &= ~(RLC_CGCG_CGLS_CTRL__CGCG_EN_MASK | RLC_CGCG_CGLS_CTRL__CGLS_EN_MASK);
+ /* disable cgcg and cgls in FSM */
+ if (def != data)
+ WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_CGCG_CGLS_CTRL), data);
+ }
+
+ adev->gfx.rlc.funcs->exit_safe_mode(adev);
+}
+
+static int gfx_v9_0_update_gfx_clock_gating(struct amdgpu_device *adev,
+ bool enable)
+{
+ if (enable) {
+ /* CGCG/CGLS should be enabled after MGCG/MGLS
+ * === MGCG + MGLS ===
+ */
+ gfx_v9_0_update_medium_grain_clock_gating(adev, enable);
+ /* === CGCG /CGLS for GFX 3D Only === */
+ gfx_v9_0_update_3d_clock_gating(adev, enable);
+ /* === CGCG + CGLS === */
+ gfx_v9_0_update_coarse_grain_clock_gating(adev, enable);
+ } else {
+ /* CGCG/CGLS should be disabled before MGCG/MGLS
+ * === CGCG + CGLS ===
+ */
+ gfx_v9_0_update_coarse_grain_clock_gating(adev, enable);
+ /* === CGCG /CGLS for GFX 3D Only === */
+ gfx_v9_0_update_3d_clock_gating(adev, enable);
+ /* === MGCG + MGLS === */
+ gfx_v9_0_update_medium_grain_clock_gating(adev, enable);
+ }
+ return 0;
+}
+
+static const struct amdgpu_rlc_funcs gfx_v9_0_rlc_funcs = {
+ .enter_safe_mode = gfx_v9_0_enter_rlc_safe_mode,
+ .exit_safe_mode = gfx_v9_0_exit_rlc_safe_mode
+};
+
+static int gfx_v9_0_set_powergating_state(void *handle,
+ enum amd_powergating_state state)
+{
+ return 0;
+}
+
+static int gfx_v9_0_set_clockgating_state(void *handle,
+ enum amd_clockgating_state state)
+{
+ struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+
+ switch (adev->asic_type) {
+ case CHIP_VEGA10:
+ gfx_v9_0_update_gfx_clock_gating(adev,
+ state == AMD_CG_STATE_GATE ? true : false);
+ break;
+ default:
+ break;
+ }
+ return 0;
+}
+
+static void gfx_v9_0_get_clockgating_state(void *handle, u32 *flags)
+{
+ struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ int data;
+
+ if (amdgpu_sriov_vf(adev))
+ *flags = 0;
+
+ /* AMD_CG_SUPPORT_GFX_MGCG */
+ data = RREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_CGTT_MGCG_OVERRIDE));
+ if (!(data & RLC_CGTT_MGCG_OVERRIDE__GFXIP_MGCG_OVERRIDE_MASK))
+ *flags |= AMD_CG_SUPPORT_GFX_MGCG;
+
+ /* AMD_CG_SUPPORT_GFX_CGCG */
+ data = RREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_CGCG_CGLS_CTRL));
+ if (data & RLC_CGCG_CGLS_CTRL__CGCG_EN_MASK)
+ *flags |= AMD_CG_SUPPORT_GFX_CGCG;
+
+ /* AMD_CG_SUPPORT_GFX_CGLS */
+ if (data & RLC_CGCG_CGLS_CTRL__CGLS_EN_MASK)
+ *flags |= AMD_CG_SUPPORT_GFX_CGLS;
+
+ /* AMD_CG_SUPPORT_GFX_RLC_LS */
+ data = RREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_MEM_SLP_CNTL));
+ if (data & RLC_MEM_SLP_CNTL__RLC_MEM_LS_EN_MASK)
+ *flags |= AMD_CG_SUPPORT_GFX_RLC_LS | AMD_CG_SUPPORT_GFX_MGLS;
+
+ /* AMD_CG_SUPPORT_GFX_CP_LS */
+ data = RREG32(SOC15_REG_OFFSET(GC, 0, mmCP_MEM_SLP_CNTL));
+ if (data & CP_MEM_SLP_CNTL__CP_MEM_LS_EN_MASK)
+ *flags |= AMD_CG_SUPPORT_GFX_CP_LS | AMD_CG_SUPPORT_GFX_MGLS;
+
+ /* AMD_CG_SUPPORT_GFX_3D_CGCG */
+ data = RREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_CGCG_CGLS_CTRL_3D));
+ if (data & RLC_CGCG_CGLS_CTRL_3D__CGCG_EN_MASK)
+ *flags |= AMD_CG_SUPPORT_GFX_3D_CGCG;
+
+ /* AMD_CG_SUPPORT_GFX_3D_CGLS */
+ if (data & RLC_CGCG_CGLS_CTRL_3D__CGLS_EN_MASK)
+ *flags |= AMD_CG_SUPPORT_GFX_3D_CGLS;
+}
+
+static u64 gfx_v9_0_ring_get_rptr_gfx(struct amdgpu_ring *ring)
+{
+ return ring->adev->wb.wb[ring->rptr_offs]; /* gfx9 is 32bit rptr*/
+}
+
+static u64 gfx_v9_0_ring_get_wptr_gfx(struct amdgpu_ring *ring)
+{
+ struct amdgpu_device *adev = ring->adev;
+ u64 wptr;
+
+ /* XXX check if swapping is necessary on BE */
+ if (ring->use_doorbell) {
+ wptr = atomic64_read((atomic64_t *)&adev->wb.wb[ring->wptr_offs]);
+ } else {
+ wptr = RREG32(SOC15_REG_OFFSET(GC, 0, mmCP_RB0_WPTR));
+ wptr += (u64)RREG32(SOC15_REG_OFFSET(GC, 0, mmCP_RB0_WPTR_HI)) << 32;
+ }
+
+ return wptr;
+}
+
+static void gfx_v9_0_ring_set_wptr_gfx(struct amdgpu_ring *ring)
+{
+ struct amdgpu_device *adev = ring->adev;
+
+ if (ring->use_doorbell) {
+ /* XXX check if swapping is necessary on BE */
+ atomic64_set((atomic64_t*)&adev->wb.wb[ring->wptr_offs], ring->wptr);
+ WDOORBELL64(ring->doorbell_index, ring->wptr);
+ } else {
+ WREG32(SOC15_REG_OFFSET(GC, 0, mmCP_RB0_WPTR), lower_32_bits(ring->wptr));
+ WREG32(SOC15_REG_OFFSET(GC, 0, mmCP_RB0_WPTR_HI), upper_32_bits(ring->wptr));
+ }
+}
+
+static void gfx_v9_0_ring_emit_hdp_flush(struct amdgpu_ring *ring)
+{
+ u32 ref_and_mask, reg_mem_engine;
+ struct nbio_hdp_flush_reg *nbio_hf_reg;
+
+ if (ring->adev->asic_type == CHIP_VEGA10)
+ nbio_hf_reg = &nbio_v6_1_hdp_flush_reg;
+
+ if (ring->funcs->type == AMDGPU_RING_TYPE_COMPUTE) {
+ switch (ring->me) {
+ case 1:
+ ref_and_mask = nbio_hf_reg->ref_and_mask_cp2 << ring->pipe;
+ break;
+ case 2:
+ ref_and_mask = nbio_hf_reg->ref_and_mask_cp6 << ring->pipe;
+ break;
+ default:
+ return;
+ }
+ reg_mem_engine = 0;
+ } else {
+ ref_and_mask = nbio_hf_reg->ref_and_mask_cp0;
+ reg_mem_engine = 1; /* pfp */
+ }
+
+ gfx_v9_0_wait_reg_mem(ring, reg_mem_engine, 0, 1,
+ nbio_hf_reg->hdp_flush_req_offset,
+ nbio_hf_reg->hdp_flush_done_offset,
+ ref_and_mask, ref_and_mask, 0x20);
+}
+
+static void gfx_v9_0_ring_emit_hdp_invalidate(struct amdgpu_ring *ring)
+{
+ gfx_v9_0_write_data_to_reg(ring, 0, true,
+ SOC15_REG_OFFSET(HDP, 0, mmHDP_DEBUG0), 1);
+}
+
+static void gfx_v9_0_ring_emit_ib_gfx(struct amdgpu_ring *ring,
+ struct amdgpu_ib *ib,
+ unsigned vm_id, bool ctx_switch)
+{
+ u32 header, control = 0;
+
+ if (ib->flags & AMDGPU_IB_FLAG_CE)
+ header = PACKET3(PACKET3_INDIRECT_BUFFER_CONST, 2);
+ else
+ header = PACKET3(PACKET3_INDIRECT_BUFFER, 2);
+
+ control |= ib->length_dw | (vm_id << 24);
+
+ if (amdgpu_sriov_vf(ring->adev) && (ib->flags & AMDGPU_IB_FLAG_PREEMPT))
+ control |= INDIRECT_BUFFER_PRE_ENB(1);
+
+ amdgpu_ring_write(ring, header);
+ BUG_ON(ib->gpu_addr & 0x3); /* Dword align */
+ amdgpu_ring_write(ring,
+#ifdef __BIG_ENDIAN
+ (2 << 0) |
+#endif
+ lower_32_bits(ib->gpu_addr));
+ amdgpu_ring_write(ring, upper_32_bits(ib->gpu_addr));
+ amdgpu_ring_write(ring, control);
+}
+
+#define INDIRECT_BUFFER_VALID (1 << 23)
+
+static void gfx_v9_0_ring_emit_ib_compute(struct amdgpu_ring *ring,
+ struct amdgpu_ib *ib,
+ unsigned vm_id, bool ctx_switch)
+{
+ u32 control = INDIRECT_BUFFER_VALID | ib->length_dw | (vm_id << 24);
+
+ amdgpu_ring_write(ring, PACKET3(PACKET3_INDIRECT_BUFFER, 2));
+ BUG_ON(ib->gpu_addr & 0x3); /* Dword align */
+ amdgpu_ring_write(ring,
+#ifdef __BIG_ENDIAN
+ (2 << 0) |
+#endif
+ lower_32_bits(ib->gpu_addr));
+ amdgpu_ring_write(ring, upper_32_bits(ib->gpu_addr));
+ amdgpu_ring_write(ring, control);
+}
+
+static void gfx_v9_0_ring_emit_fence(struct amdgpu_ring *ring, u64 addr,
+ u64 seq, unsigned flags)
+{
+ bool write64bit = flags & AMDGPU_FENCE_FLAG_64BIT;
+ bool int_sel = flags & AMDGPU_FENCE_FLAG_INT;
+
+ /* RELEASE_MEM - flush caches, send int */
+ amdgpu_ring_write(ring, PACKET3(PACKET3_RELEASE_MEM, 6));
+ amdgpu_ring_write(ring, (EOP_TCL1_ACTION_EN |
+ EOP_TC_ACTION_EN |
+ EOP_TC_WB_ACTION_EN |
+ EOP_TC_MD_ACTION_EN |
+ EVENT_TYPE(CACHE_FLUSH_AND_INV_TS_EVENT) |
+ EVENT_INDEX(5)));
+ amdgpu_ring_write(ring, DATA_SEL(write64bit ? 2 : 1) | INT_SEL(int_sel ? 2 : 0));
+
+ /*
+ * the address should be Qword aligned if 64bit write, Dword
+ * aligned if only send 32bit data low (discard data high)
+ */
+ if (write64bit)
+ BUG_ON(addr & 0x7);
+ else
+ BUG_ON(addr & 0x3);
+ amdgpu_ring_write(ring, lower_32_bits(addr));
+ amdgpu_ring_write(ring, upper_32_bits(addr));
+ amdgpu_ring_write(ring, lower_32_bits(seq));
+ amdgpu_ring_write(ring, upper_32_bits(seq));
+ amdgpu_ring_write(ring, 0);
+}
+
+static void gfx_v9_0_ring_emit_pipeline_sync(struct amdgpu_ring *ring)
+{
+ int usepfp = (ring->funcs->type == AMDGPU_RING_TYPE_GFX);
+ uint32_t seq = ring->fence_drv.sync_seq;
+ uint64_t addr = ring->fence_drv.gpu_addr;
+
+ gfx_v9_0_wait_reg_mem(ring, usepfp, 1, 0,
+ lower_32_bits(addr), upper_32_bits(addr),
+ seq, 0xffffffff, 4);
+}
+
+static void gfx_v9_0_ring_emit_vm_flush(struct amdgpu_ring *ring,
+ unsigned vm_id, uint64_t pd_addr)
+{
+ int usepfp = (ring->funcs->type == AMDGPU_RING_TYPE_GFX);
+ unsigned eng = ring->idx;
+ unsigned i;
+
+ pd_addr = pd_addr | 0x1; /* valid bit */
+ /* now only use physical base address of PDE and valid */
+ BUG_ON(pd_addr & 0xFFFF00000000003EULL);
+
+ for (i = 0; i < AMDGPU_MAX_VMHUBS; ++i) {
+ struct amdgpu_vmhub *hub = &ring->adev->vmhub[i];
+ uint32_t req = hub->get_invalidate_req(vm_id);
+
+ gfx_v9_0_write_data_to_reg(ring, usepfp, true,
+ hub->ctx0_ptb_addr_lo32
+ + (2 * vm_id),
+ lower_32_bits(pd_addr));
+
+ gfx_v9_0_write_data_to_reg(ring, usepfp, true,
+ hub->ctx0_ptb_addr_hi32
+ + (2 * vm_id),
+ upper_32_bits(pd_addr));
+
+ gfx_v9_0_write_data_to_reg(ring, usepfp, true,
+ hub->vm_inv_eng0_req + eng, req);
+
+ /* wait for the invalidate to complete */
+ gfx_v9_0_wait_reg_mem(ring, 0, 0, 0, hub->vm_inv_eng0_ack +
+ eng, 0, 1 << vm_id, 1 << vm_id, 0x20);
+ }
+
+ /* compute doesn't have PFP */
+ if (usepfp) {
+ /* sync PFP to ME, otherwise we might get invalid PFP reads */
+ amdgpu_ring_write(ring, PACKET3(PACKET3_PFP_SYNC_ME, 0));
+ amdgpu_ring_write(ring, 0x0);
+ }
+}
+
+static u64 gfx_v9_0_ring_get_rptr_compute(struct amdgpu_ring *ring)
+{
+ return ring->adev->wb.wb[ring->rptr_offs]; /* gfx9 hardware is 32bit rptr */
+}
+
+static u64 gfx_v9_0_ring_get_wptr_compute(struct amdgpu_ring *ring)
+{
+ u64 wptr;
+
+ /* XXX check if swapping is necessary on BE */
+ if (ring->use_doorbell)
+ wptr = atomic64_read((atomic64_t *)&ring->adev->wb.wb[ring->wptr_offs]);
+ else
+ BUG();
+ return wptr;
+}
+
+static void gfx_v9_0_ring_set_wptr_compute(struct amdgpu_ring *ring)
+{
+ struct amdgpu_device *adev = ring->adev;
+
+ /* XXX check if swapping is necessary on BE */
+ if (ring->use_doorbell) {
+ atomic64_set((atomic64_t*)&adev->wb.wb[ring->wptr_offs], ring->wptr);
+ WDOORBELL64(ring->doorbell_index, ring->wptr);
+ } else{
+ BUG(); /* only DOORBELL method supported on gfx9 now */
+ }
+}
+
+static void gfx_v9_0_ring_emit_fence_kiq(struct amdgpu_ring *ring, u64 addr,
+ u64 seq, unsigned int flags)
+{
+ /* we only allocate 32bit for each seq wb address */
+ BUG_ON(flags & AMDGPU_FENCE_FLAG_64BIT);
+
+ /* write fence seq to the "addr" */
+ amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
+ amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
+ WRITE_DATA_DST_SEL(5) | WR_CONFIRM));
+ amdgpu_ring_write(ring, lower_32_bits(addr));
+ amdgpu_ring_write(ring, upper_32_bits(addr));
+ amdgpu_ring_write(ring, lower_32_bits(seq));
+
+ if (flags & AMDGPU_FENCE_FLAG_INT) {
+ /* set register to trigger INT */
+ amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
+ amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
+ WRITE_DATA_DST_SEL(0) | WR_CONFIRM));
+ amdgpu_ring_write(ring, SOC15_REG_OFFSET(GC, 0, mmCPC_INT_STATUS));
+ amdgpu_ring_write(ring, 0);
+ amdgpu_ring_write(ring, 0x20000000); /* src_id is 178 */
+ }
+}
+
+static void gfx_v9_ring_emit_sb(struct amdgpu_ring *ring)
+{
+ amdgpu_ring_write(ring, PACKET3(PACKET3_SWITCH_BUFFER, 0));
+ amdgpu_ring_write(ring, 0);
+}
+
+static void gfx_v9_0_ring_emit_ce_meta(struct amdgpu_ring *ring)
+{
+ static struct v9_ce_ib_state ce_payload = {0};
+ uint64_t csa_addr;
+ int cnt;
+
+ cnt = (sizeof(ce_payload) >> 2) + 4 - 2;
+ csa_addr = AMDGPU_VA_RESERVED_SIZE - 2 * 4096;
+
+ amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, cnt));
+ amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(2) |
+ WRITE_DATA_DST_SEL(8) |
+ WR_CONFIRM) |
+ WRITE_DATA_CACHE_POLICY(0));
+ amdgpu_ring_write(ring, lower_32_bits(csa_addr + offsetof(struct v9_gfx_meta_data, ce_payload)));
+ amdgpu_ring_write(ring, upper_32_bits(csa_addr + offsetof(struct v9_gfx_meta_data, ce_payload)));
+ amdgpu_ring_write_multiple(ring, (void *)&ce_payload, sizeof(ce_payload) >> 2);
+}
+
+static void gfx_v9_0_ring_emit_de_meta(struct amdgpu_ring *ring)
+{
+ static struct v9_de_ib_state de_payload = {0};
+ uint64_t csa_addr, gds_addr;
+ int cnt;
+
+ csa_addr = AMDGPU_VA_RESERVED_SIZE - 2 * 4096;
+ gds_addr = csa_addr + 4096;
+ de_payload.gds_backup_addrlo = lower_32_bits(gds_addr);
+ de_payload.gds_backup_addrhi = upper_32_bits(gds_addr);
+
+ cnt = (sizeof(de_payload) >> 2) + 4 - 2;
+ amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, cnt));
+ amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(1) |
+ WRITE_DATA_DST_SEL(8) |
+ WR_CONFIRM) |
+ WRITE_DATA_CACHE_POLICY(0));
+ amdgpu_ring_write(ring, lower_32_bits(csa_addr + offsetof(struct v9_gfx_meta_data, de_payload)));
+ amdgpu_ring_write(ring, upper_32_bits(csa_addr + offsetof(struct v9_gfx_meta_data, de_payload)));
+ amdgpu_ring_write_multiple(ring, (void *)&de_payload, sizeof(de_payload) >> 2);
+}
+
+static void gfx_v9_ring_emit_cntxcntl(struct amdgpu_ring *ring, uint32_t flags)
+{
+ uint32_t dw2 = 0;
+
+ if (amdgpu_sriov_vf(ring->adev))
+ gfx_v9_0_ring_emit_ce_meta(ring);
+
+ dw2 |= 0x80000000; /* set load_enable otherwise this package is just NOPs */
+ if (flags & AMDGPU_HAVE_CTX_SWITCH) {
+ /* set load_global_config & load_global_uconfig */
+ dw2 |= 0x8001;
+ /* set load_cs_sh_regs */
+ dw2 |= 0x01000000;
+ /* set load_per_context_state & load_gfx_sh_regs for GFX */
+ dw2 |= 0x10002;
+
+ /* set load_ce_ram if preamble presented */
+ if (AMDGPU_PREAMBLE_IB_PRESENT & flags)
+ dw2 |= 0x10000000;
+ } else {
+ /* still load_ce_ram if this is the first time preamble presented
+ * although there is no context switch happens.
+ */
+ if (AMDGPU_PREAMBLE_IB_PRESENT_FIRST & flags)
+ dw2 |= 0x10000000;
+ }
+
+ amdgpu_ring_write(ring, PACKET3(PACKET3_CONTEXT_CONTROL, 1));
+ amdgpu_ring_write(ring, dw2);
+ amdgpu_ring_write(ring, 0);
+
+ if (amdgpu_sriov_vf(ring->adev))
+ gfx_v9_0_ring_emit_de_meta(ring);
+}
+
+static unsigned gfx_v9_0_ring_emit_init_cond_exec(struct amdgpu_ring *ring)
+{
+ unsigned ret;
+ amdgpu_ring_write(ring, PACKET3(PACKET3_COND_EXEC, 3));
+ amdgpu_ring_write(ring, lower_32_bits(ring->cond_exe_gpu_addr));
+ amdgpu_ring_write(ring, upper_32_bits(ring->cond_exe_gpu_addr));
+ amdgpu_ring_write(ring, 0); /* discard following DWs if *cond_exec_gpu_addr==0 */
+ ret = ring->wptr & ring->buf_mask;
+ amdgpu_ring_write(ring, 0x55aa55aa); /* patch dummy value later */
+ return ret;
+}
+
+static void gfx_v9_0_ring_emit_patch_cond_exec(struct amdgpu_ring *ring, unsigned offset)
+{
+ unsigned cur;
+ BUG_ON(offset > ring->buf_mask);
+ BUG_ON(ring->ring[offset] != 0x55aa55aa);
+
+ cur = (ring->wptr & ring->buf_mask) - 1;
+ if (likely(cur > offset))
+ ring->ring[offset] = cur - offset;
+ else
+ ring->ring[offset] = (ring->ring_size>>2) - offset + cur;
+}
+
+static void gfx_v9_0_ring_emit_rreg(struct amdgpu_ring *ring, uint32_t reg)
+{
+ struct amdgpu_device *adev = ring->adev;
+
+ amdgpu_ring_write(ring, PACKET3(PACKET3_COPY_DATA, 4));
+ amdgpu_ring_write(ring, 0 | /* src: register*/
+ (5 << 8) | /* dst: memory */
+ (1 << 20)); /* write confirm */
+ amdgpu_ring_write(ring, reg);
+ amdgpu_ring_write(ring, 0);
+ amdgpu_ring_write(ring, lower_32_bits(adev->wb.gpu_addr +
+ adev->virt.reg_val_offs * 4));
+ amdgpu_ring_write(ring, upper_32_bits(adev->wb.gpu_addr +
+ adev->virt.reg_val_offs * 4));
+}
+
+static void gfx_v9_0_ring_emit_wreg(struct amdgpu_ring *ring, uint32_t reg,
+ uint32_t val)
+{
+ amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
+ amdgpu_ring_write(ring, (1 << 16)); /* no inc addr */
+ amdgpu_ring_write(ring, reg);
+ amdgpu_ring_write(ring, 0);
+ amdgpu_ring_write(ring, val);
+}
+
+static void gfx_v9_0_set_gfx_eop_interrupt_state(struct amdgpu_device *adev,
+ enum amdgpu_interrupt_state state)
+{
+ u32 cp_int_cntl;
+
+ switch (state) {
+ case AMDGPU_IRQ_STATE_DISABLE:
+ cp_int_cntl = RREG32(SOC15_REG_OFFSET(GC, 0, mmCP_INT_CNTL_RING0));
+ cp_int_cntl = REG_SET_FIELD(cp_int_cntl, CP_INT_CNTL_RING0,
+ TIME_STAMP_INT_ENABLE, 0);
+ WREG32(SOC15_REG_OFFSET(GC, 0, mmCP_INT_CNTL_RING0), cp_int_cntl);
+ break;
+ case AMDGPU_IRQ_STATE_ENABLE:
+ cp_int_cntl = RREG32(SOC15_REG_OFFSET(GC, 0, mmCP_INT_CNTL_RING0));
+ cp_int_cntl =
+ REG_SET_FIELD(cp_int_cntl, CP_INT_CNTL_RING0,
+ TIME_STAMP_INT_ENABLE, 1);
+ WREG32(SOC15_REG_OFFSET(GC, 0, mmCP_INT_CNTL_RING0), cp_int_cntl);
+ break;
+ default:
+ break;
+ }
+}
+
+static void gfx_v9_0_set_compute_eop_interrupt_state(struct amdgpu_device *adev,
+ int me, int pipe,
+ enum amdgpu_interrupt_state state)
+{
+ u32 mec_int_cntl, mec_int_cntl_reg;
+
+ /*
+ * amdgpu controls only pipe 0 of MEC1. That's why this function only
+ * handles the setting of interrupts for this specific pipe. All other
+ * pipes' interrupts are set by amdkfd.
+ */
+
+ if (me == 1) {
+ switch (pipe) {
+ case 0:
+ mec_int_cntl_reg = SOC15_REG_OFFSET(GC, 0, mmCP_ME1_PIPE0_INT_CNTL);
+ break;
+ default:
+ DRM_DEBUG("invalid pipe %d\n", pipe);
+ return;
+ }
+ } else {
+ DRM_DEBUG("invalid me %d\n", me);
+ return;
+ }
+
+ switch (state) {
+ case AMDGPU_IRQ_STATE_DISABLE:
+ mec_int_cntl = RREG32(mec_int_cntl_reg);
+ mec_int_cntl = REG_SET_FIELD(mec_int_cntl, CP_ME1_PIPE0_INT_CNTL,
+ TIME_STAMP_INT_ENABLE, 0);
+ WREG32(mec_int_cntl_reg, mec_int_cntl);
+ break;
+ case AMDGPU_IRQ_STATE_ENABLE:
+ mec_int_cntl = RREG32(mec_int_cntl_reg);
+ mec_int_cntl = REG_SET_FIELD(mec_int_cntl, CP_ME1_PIPE0_INT_CNTL,
+ TIME_STAMP_INT_ENABLE, 1);
+ WREG32(mec_int_cntl_reg, mec_int_cntl);
+ break;
+ default:
+ break;
+ }
+}
+
+static int gfx_v9_0_set_priv_reg_fault_state(struct amdgpu_device *adev,
+ struct amdgpu_irq_src *source,
+ unsigned type,
+ enum amdgpu_interrupt_state state)
+{
+ u32 cp_int_cntl;
+
+ switch (state) {
+ case AMDGPU_IRQ_STATE_DISABLE:
+ cp_int_cntl = RREG32(SOC15_REG_OFFSET(GC, 0, mmCP_INT_CNTL_RING0));
+ cp_int_cntl = REG_SET_FIELD(cp_int_cntl, CP_INT_CNTL_RING0,
+ PRIV_REG_INT_ENABLE, 0);
+ WREG32(SOC15_REG_OFFSET(GC, 0, mmCP_INT_CNTL_RING0), cp_int_cntl);
+ break;
+ case AMDGPU_IRQ_STATE_ENABLE:
+ cp_int_cntl = RREG32(SOC15_REG_OFFSET(GC, 0, mmCP_INT_CNTL_RING0));
+ cp_int_cntl = REG_SET_FIELD(cp_int_cntl, CP_INT_CNTL_RING0,
+ PRIV_REG_INT_ENABLE, 1);
+ WREG32(SOC15_REG_OFFSET(GC, 0, mmCP_INT_CNTL_RING0), cp_int_cntl);
+ break;
+ default:
+ break;
+ }
+
+ return 0;
+}
+
+static int gfx_v9_0_set_priv_inst_fault_state(struct amdgpu_device *adev,
+ struct amdgpu_irq_src *source,
+ unsigned type,
+ enum amdgpu_interrupt_state state)
+{
+ u32 cp_int_cntl;
+
+ switch (state) {
+ case AMDGPU_IRQ_STATE_DISABLE:
+ cp_int_cntl = RREG32(SOC15_REG_OFFSET(GC, 0, mmCP_INT_CNTL_RING0));
+ cp_int_cntl = REG_SET_FIELD(cp_int_cntl, CP_INT_CNTL_RING0,
+ PRIV_INSTR_INT_ENABLE, 0);
+ WREG32(SOC15_REG_OFFSET(GC, 0, mmCP_INT_CNTL_RING0), cp_int_cntl);
+ break;
+ case AMDGPU_IRQ_STATE_ENABLE:
+ cp_int_cntl = RREG32(SOC15_REG_OFFSET(GC, 0, mmCP_INT_CNTL_RING0));
+ cp_int_cntl = REG_SET_FIELD(cp_int_cntl, CP_INT_CNTL_RING0,
+ PRIV_INSTR_INT_ENABLE, 1);
+ WREG32(SOC15_REG_OFFSET(GC, 0, mmCP_INT_CNTL_RING0), cp_int_cntl);
+ break;
+ default:
+ break;
+ }
+
+ return 0;
+}
+
+static int gfx_v9_0_set_eop_interrupt_state(struct amdgpu_device *adev,
+ struct amdgpu_irq_src *src,
+ unsigned type,
+ enum amdgpu_interrupt_state state)
+{
+ switch (type) {
+ case AMDGPU_CP_IRQ_GFX_EOP:
+ gfx_v9_0_set_gfx_eop_interrupt_state(adev, state);
+ break;
+ case AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE0_EOP:
+ gfx_v9_0_set_compute_eop_interrupt_state(adev, 1, 0, state);
+ break;
+ case AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE1_EOP:
+ gfx_v9_0_set_compute_eop_interrupt_state(adev, 1, 1, state);
+ break;
+ case AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE2_EOP:
+ gfx_v9_0_set_compute_eop_interrupt_state(adev, 1, 2, state);
+ break;
+ case AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE3_EOP:
+ gfx_v9_0_set_compute_eop_interrupt_state(adev, 1, 3, state);
+ break;
+ case AMDGPU_CP_IRQ_COMPUTE_MEC2_PIPE0_EOP:
+ gfx_v9_0_set_compute_eop_interrupt_state(adev, 2, 0, state);
+ break;
+ case AMDGPU_CP_IRQ_COMPUTE_MEC2_PIPE1_EOP:
+ gfx_v9_0_set_compute_eop_interrupt_state(adev, 2, 1, state);
+ break;
+ case AMDGPU_CP_IRQ_COMPUTE_MEC2_PIPE2_EOP:
+ gfx_v9_0_set_compute_eop_interrupt_state(adev, 2, 2, state);
+ break;
+ case AMDGPU_CP_IRQ_COMPUTE_MEC2_PIPE3_EOP:
+ gfx_v9_0_set_compute_eop_interrupt_state(adev, 2, 3, state);
+ break;
+ default:
+ break;
+ }
+ return 0;
+}
+
+static int gfx_v9_0_eop_irq(struct amdgpu_device *adev,
+ struct amdgpu_irq_src *source,
+ struct amdgpu_iv_entry *entry)
+{
+ int i;
+ u8 me_id, pipe_id, queue_id;
+ struct amdgpu_ring *ring;
+
+ DRM_DEBUG("IH: CP EOP\n");
+ me_id = (entry->ring_id & 0x0c) >> 2;
+ pipe_id = (entry->ring_id & 0x03) >> 0;
+ queue_id = (entry->ring_id & 0x70) >> 4;
+
+ switch (me_id) {
+ case 0:
+ amdgpu_fence_process(&adev->gfx.gfx_ring[0]);
+ break;
+ case 1:
+ case 2:
+ for (i = 0; i < adev->gfx.num_compute_rings; i++) {
+ ring = &adev->gfx.compute_ring[i];
+ /* Per-queue interrupt is supported for MEC starting from VI.
+ * The interrupt can only be enabled/disabled per pipe instead of per queue.
+ */
+ if ((ring->me == me_id) && (ring->pipe == pipe_id) && (ring->queue == queue_id))
+ amdgpu_fence_process(ring);
+ }
+ break;
+ }
+ return 0;
+}
+
+static int gfx_v9_0_priv_reg_irq(struct amdgpu_device *adev,
+ struct amdgpu_irq_src *source,
+ struct amdgpu_iv_entry *entry)
+{
+ DRM_ERROR("Illegal register access in command stream\n");
+ schedule_work(&adev->reset_work);
+ return 0;
+}
+
+static int gfx_v9_0_priv_inst_irq(struct amdgpu_device *adev,
+ struct amdgpu_irq_src *source,
+ struct amdgpu_iv_entry *entry)
+{
+ DRM_ERROR("Illegal instruction in command stream\n");
+ schedule_work(&adev->reset_work);
+ return 0;
+}
+
+static int gfx_v9_0_kiq_set_interrupt_state(struct amdgpu_device *adev,
+ struct amdgpu_irq_src *src,
+ unsigned int type,
+ enum amdgpu_interrupt_state state)
+{
+ uint32_t tmp, target;
+ struct amdgpu_ring *ring = (struct amdgpu_ring *)src->data;
+
+ BUG_ON(!ring || (ring->funcs->type != AMDGPU_RING_TYPE_KIQ));
+
+ if (ring->me == 1)
+ target = SOC15_REG_OFFSET(GC, 0, mmCP_ME1_PIPE0_INT_CNTL);
+ else
+ target = SOC15_REG_OFFSET(GC, 0, mmCP_ME2_PIPE0_INT_CNTL);
+ target += ring->pipe;
+
+ switch (type) {
+ case AMDGPU_CP_KIQ_IRQ_DRIVER0:
+ if (state == AMDGPU_IRQ_STATE_DISABLE) {
+ tmp = RREG32(SOC15_REG_OFFSET(GC, 0, mmCPC_INT_CNTL));
+ tmp = REG_SET_FIELD(tmp, CPC_INT_CNTL,
+ GENERIC2_INT_ENABLE, 0);
+ WREG32(SOC15_REG_OFFSET(GC, 0, mmCPC_INT_CNTL), tmp);
+
+ tmp = RREG32(target);
+ tmp = REG_SET_FIELD(tmp, CP_ME2_PIPE0_INT_CNTL,
+ GENERIC2_INT_ENABLE, 0);
+ WREG32(target, tmp);
+ } else {
+ tmp = RREG32(SOC15_REG_OFFSET(GC, 0, mmCPC_INT_CNTL));
+ tmp = REG_SET_FIELD(tmp, CPC_INT_CNTL,
+ GENERIC2_INT_ENABLE, 1);
+ WREG32(SOC15_REG_OFFSET(GC, 0, mmCPC_INT_CNTL), tmp);
+
+ tmp = RREG32(target);
+ tmp = REG_SET_FIELD(tmp, CP_ME2_PIPE0_INT_CNTL,
+ GENERIC2_INT_ENABLE, 1);
+ WREG32(target, tmp);
+ }
+ break;
+ default:
+ BUG(); /* kiq only support GENERIC2_INT now */
+ break;
+ }
+ return 0;
+}
+
+static int gfx_v9_0_kiq_irq(struct amdgpu_device *adev,
+ struct amdgpu_irq_src *source,
+ struct amdgpu_iv_entry *entry)
+{
+ u8 me_id, pipe_id, queue_id;
+ struct amdgpu_ring *ring = (struct amdgpu_ring *)source->data;
+
+ BUG_ON(!ring || (ring->funcs->type != AMDGPU_RING_TYPE_KIQ));
+
+ me_id = (entry->ring_id & 0x0c) >> 2;
+ pipe_id = (entry->ring_id & 0x03) >> 0;
+ queue_id = (entry->ring_id & 0x70) >> 4;
+ DRM_DEBUG("IH: CPC GENERIC2_INT, me:%d, pipe:%d, queue:%d\n",
+ me_id, pipe_id, queue_id);
+
+ amdgpu_fence_process(ring);
+ return 0;
+}
+
+const struct amd_ip_funcs gfx_v9_0_ip_funcs = {
+ .name = "gfx_v9_0",
+ .early_init = gfx_v9_0_early_init,
+ .late_init = gfx_v9_0_late_init,
+ .sw_init = gfx_v9_0_sw_init,
+ .sw_fini = gfx_v9_0_sw_fini,
+ .hw_init = gfx_v9_0_hw_init,
+ .hw_fini = gfx_v9_0_hw_fini,
+ .suspend = gfx_v9_0_suspend,
+ .resume = gfx_v9_0_resume,
+ .is_idle = gfx_v9_0_is_idle,
+ .wait_for_idle = gfx_v9_0_wait_for_idle,
+ .soft_reset = gfx_v9_0_soft_reset,
+ .set_clockgating_state = gfx_v9_0_set_clockgating_state,
+ .set_powergating_state = gfx_v9_0_set_powergating_state,
+ .get_clockgating_state = gfx_v9_0_get_clockgating_state,
+};
+
+static const struct amdgpu_ring_funcs gfx_v9_0_ring_funcs_gfx = {
+ .type = AMDGPU_RING_TYPE_GFX,
+ .align_mask = 0xff,
+ .nop = PACKET3(PACKET3_NOP, 0x3FFF),
+ .support_64bit_ptrs = true,
+ .get_rptr = gfx_v9_0_ring_get_rptr_gfx,
+ .get_wptr = gfx_v9_0_ring_get_wptr_gfx,
+ .set_wptr = gfx_v9_0_ring_set_wptr_gfx,
+ .emit_frame_size = /* totally 242 maximum if 16 IBs */
+ 5 + /* COND_EXEC */
+ 7 + /* PIPELINE_SYNC */
+ 46 + /* VM_FLUSH */
+ 8 + /* FENCE for VM_FLUSH */
+ 20 + /* GDS switch */
+ 4 + /* double SWITCH_BUFFER,
+ the first COND_EXEC jump to the place just
+ prior to this double SWITCH_BUFFER */
+ 5 + /* COND_EXEC */
+ 7 + /* HDP_flush */
+ 4 + /* VGT_flush */
+ 14 + /* CE_META */
+ 31 + /* DE_META */
+ 3 + /* CNTX_CTRL */
+ 5 + /* HDP_INVL */
+ 8 + 8 + /* FENCE x2 */
+ 2, /* SWITCH_BUFFER */
+ .emit_ib_size = 4, /* gfx_v9_0_ring_emit_ib_gfx */
+ .emit_ib = gfx_v9_0_ring_emit_ib_gfx,
+ .emit_fence = gfx_v9_0_ring_emit_fence,
+ .emit_pipeline_sync = gfx_v9_0_ring_emit_pipeline_sync,
+ .emit_vm_flush = gfx_v9_0_ring_emit_vm_flush,
+ .emit_gds_switch = gfx_v9_0_ring_emit_gds_switch,
+ .emit_hdp_flush = gfx_v9_0_ring_emit_hdp_flush,
+ .emit_hdp_invalidate = gfx_v9_0_ring_emit_hdp_invalidate,
+ .test_ring = gfx_v9_0_ring_test_ring,
+ .test_ib = gfx_v9_0_ring_test_ib,
+ .insert_nop = amdgpu_ring_insert_nop,
+ .pad_ib = amdgpu_ring_generic_pad_ib,
+ .emit_switch_buffer = gfx_v9_ring_emit_sb,
+ .emit_cntxcntl = gfx_v9_ring_emit_cntxcntl,
+ .init_cond_exec = gfx_v9_0_ring_emit_init_cond_exec,
+ .patch_cond_exec = gfx_v9_0_ring_emit_patch_cond_exec,
+};
+
+static const struct amdgpu_ring_funcs gfx_v9_0_ring_funcs_compute = {
+ .type = AMDGPU_RING_TYPE_COMPUTE,
+ .align_mask = 0xff,
+ .nop = PACKET3(PACKET3_NOP, 0x3FFF),
+ .support_64bit_ptrs = true,
+ .get_rptr = gfx_v9_0_ring_get_rptr_compute,
+ .get_wptr = gfx_v9_0_ring_get_wptr_compute,
+ .set_wptr = gfx_v9_0_ring_set_wptr_compute,
+ .emit_frame_size =
+ 20 + /* gfx_v9_0_ring_emit_gds_switch */
+ 7 + /* gfx_v9_0_ring_emit_hdp_flush */
+ 5 + /* gfx_v9_0_ring_emit_hdp_invalidate */
+ 7 + /* gfx_v9_0_ring_emit_pipeline_sync */
+ 64 + /* gfx_v9_0_ring_emit_vm_flush */
+ 8 + 8 + 8, /* gfx_v9_0_ring_emit_fence x3 for user fence, vm fence */
+ .emit_ib_size = 4, /* gfx_v9_0_ring_emit_ib_compute */
+ .emit_ib = gfx_v9_0_ring_emit_ib_compute,
+ .emit_fence = gfx_v9_0_ring_emit_fence,
+ .emit_pipeline_sync = gfx_v9_0_ring_emit_pipeline_sync,
+ .emit_vm_flush = gfx_v9_0_ring_emit_vm_flush,
+ .emit_gds_switch = gfx_v9_0_ring_emit_gds_switch,
+ .emit_hdp_flush = gfx_v9_0_ring_emit_hdp_flush,
+ .emit_hdp_invalidate = gfx_v9_0_ring_emit_hdp_invalidate,
+ .test_ring = gfx_v9_0_ring_test_ring,
+ .test_ib = gfx_v9_0_ring_test_ib,
+ .insert_nop = amdgpu_ring_insert_nop,
+ .pad_ib = amdgpu_ring_generic_pad_ib,
+};
+
+static const struct amdgpu_ring_funcs gfx_v9_0_ring_funcs_kiq = {
+ .type = AMDGPU_RING_TYPE_KIQ,
+ .align_mask = 0xff,
+ .nop = PACKET3(PACKET3_NOP, 0x3FFF),
+ .support_64bit_ptrs = true,
+ .get_rptr = gfx_v9_0_ring_get_rptr_compute,
+ .get_wptr = gfx_v9_0_ring_get_wptr_compute,
+ .set_wptr = gfx_v9_0_ring_set_wptr_compute,
+ .emit_frame_size =
+ 20 + /* gfx_v9_0_ring_emit_gds_switch */
+ 7 + /* gfx_v9_0_ring_emit_hdp_flush */
+ 5 + /* gfx_v9_0_ring_emit_hdp_invalidate */
+ 7 + /* gfx_v9_0_ring_emit_pipeline_sync */
+ 64 + /* gfx_v9_0_ring_emit_vm_flush */
+ 8 + 8 + 8, /* gfx_v9_0_ring_emit_fence_kiq x3 for user fence, vm fence */
+ .emit_ib_size = 4, /* gfx_v9_0_ring_emit_ib_compute */
+ .emit_ib = gfx_v9_0_ring_emit_ib_compute,
+ .emit_fence = gfx_v9_0_ring_emit_fence_kiq,
+ .emit_hdp_flush = gfx_v9_0_ring_emit_hdp_flush,
+ .emit_hdp_invalidate = gfx_v9_0_ring_emit_hdp_invalidate,
+ .test_ring = gfx_v9_0_ring_test_ring,
+ .test_ib = gfx_v9_0_ring_test_ib,
+ .insert_nop = amdgpu_ring_insert_nop,
+ .pad_ib = amdgpu_ring_generic_pad_ib,
+ .emit_rreg = gfx_v9_0_ring_emit_rreg,
+ .emit_wreg = gfx_v9_0_ring_emit_wreg,
+};
+
+static void gfx_v9_0_set_ring_funcs(struct amdgpu_device *adev)
+{
+ int i;
+
+ adev->gfx.kiq.ring.funcs = &gfx_v9_0_ring_funcs_kiq;
+
+ for (i = 0; i < adev->gfx.num_gfx_rings; i++)
+ adev->gfx.gfx_ring[i].funcs = &gfx_v9_0_ring_funcs_gfx;
+
+ for (i = 0; i < adev->gfx.num_compute_rings; i++)
+ adev->gfx.compute_ring[i].funcs = &gfx_v9_0_ring_funcs_compute;
+}
+
+static const struct amdgpu_irq_src_funcs gfx_v9_0_kiq_irq_funcs = {
+ .set = gfx_v9_0_kiq_set_interrupt_state,
+ .process = gfx_v9_0_kiq_irq,
+};
+
+static const struct amdgpu_irq_src_funcs gfx_v9_0_eop_irq_funcs = {
+ .set = gfx_v9_0_set_eop_interrupt_state,
+ .process = gfx_v9_0_eop_irq,
+};
+
+static const struct amdgpu_irq_src_funcs gfx_v9_0_priv_reg_irq_funcs = {
+ .set = gfx_v9_0_set_priv_reg_fault_state,
+ .process = gfx_v9_0_priv_reg_irq,
+};
+
+static const struct amdgpu_irq_src_funcs gfx_v9_0_priv_inst_irq_funcs = {
+ .set = gfx_v9_0_set_priv_inst_fault_state,
+ .process = gfx_v9_0_priv_inst_irq,
+};
+
+static void gfx_v9_0_set_irq_funcs(struct amdgpu_device *adev)
+{
+ adev->gfx.eop_irq.num_types = AMDGPU_CP_IRQ_LAST;
+ adev->gfx.eop_irq.funcs = &gfx_v9_0_eop_irq_funcs;
+
+ adev->gfx.priv_reg_irq.num_types = 1;
+ adev->gfx.priv_reg_irq.funcs = &gfx_v9_0_priv_reg_irq_funcs;
+
+ adev->gfx.priv_inst_irq.num_types = 1;
+ adev->gfx.priv_inst_irq.funcs = &gfx_v9_0_priv_inst_irq_funcs;
+
+ adev->gfx.kiq.irq.num_types = AMDGPU_CP_KIQ_IRQ_LAST;
+ adev->gfx.kiq.irq.funcs = &gfx_v9_0_kiq_irq_funcs;
+}
+
+static void gfx_v9_0_set_rlc_funcs(struct amdgpu_device *adev)
+{
+ switch (adev->asic_type) {
+ case CHIP_VEGA10:
+ adev->gfx.rlc.funcs = &gfx_v9_0_rlc_funcs;
+ break;
+ default:
+ break;
+ }
+}
+
+static void gfx_v9_0_set_gds_init(struct amdgpu_device *adev)
+{
+ /* init asci gds info */
+ adev->gds.mem.total_size = RREG32(SOC15_REG_OFFSET(GC, 0, mmGDS_VMID0_SIZE));
+ adev->gds.gws.total_size = 64;
+ adev->gds.oa.total_size = 16;
+
+ if (adev->gds.mem.total_size == 64 * 1024) {
+ adev->gds.mem.gfx_partition_size = 4096;
+ adev->gds.mem.cs_partition_size = 4096;
+
+ adev->gds.gws.gfx_partition_size = 4;
+ adev->gds.gws.cs_partition_size = 4;
+
+ adev->gds.oa.gfx_partition_size = 4;
+ adev->gds.oa.cs_partition_size = 1;
+ } else {
+ adev->gds.mem.gfx_partition_size = 1024;
+ adev->gds.mem.cs_partition_size = 1024;
+
+ adev->gds.gws.gfx_partition_size = 16;
+ adev->gds.gws.cs_partition_size = 16;
+
+ adev->gds.oa.gfx_partition_size = 4;
+ adev->gds.oa.cs_partition_size = 4;
+ }
+}
+
+static u32 gfx_v9_0_get_cu_active_bitmap(struct amdgpu_device *adev)
+{
+ u32 data, mask;
+
+ data = RREG32(SOC15_REG_OFFSET(GC, 0, mmCC_GC_SHADER_ARRAY_CONFIG));
+ data |= RREG32(SOC15_REG_OFFSET(GC, 0, mmGC_USER_SHADER_ARRAY_CONFIG));
+
+ data &= CC_GC_SHADER_ARRAY_CONFIG__INACTIVE_CUS_MASK;
+ data >>= CC_GC_SHADER_ARRAY_CONFIG__INACTIVE_CUS__SHIFT;
+
+ mask = gfx_v9_0_create_bitmask(adev->gfx.config.max_cu_per_sh);
+
+ return (~data) & mask;
+}
+
+static int gfx_v9_0_get_cu_info(struct amdgpu_device *adev,
+ struct amdgpu_cu_info *cu_info)
+{
+ int i, j, k, counter, active_cu_number = 0;
+ u32 mask, bitmap, ao_bitmap, ao_cu_mask = 0;
+
+ if (!adev || !cu_info)
+ return -EINVAL;
+
+ memset(cu_info, 0, sizeof(*cu_info));
+
+ mutex_lock(&adev->grbm_idx_mutex);
+ for (i = 0; i < adev->gfx.config.max_shader_engines; i++) {
+ for (j = 0; j < adev->gfx.config.max_sh_per_se; j++) {
+ mask = 1;
+ ao_bitmap = 0;
+ counter = 0;
+ gfx_v9_0_select_se_sh(adev, i, j, 0xffffffff);
+ bitmap = gfx_v9_0_get_cu_active_bitmap(adev);
+ cu_info->bitmap[i][j] = bitmap;
+
+ for (k = 0; k < 16; k ++) {
+ if (bitmap & mask) {
+ if (counter < 2)
+ ao_bitmap |= mask;
+ counter ++;
+ }
+ mask <<= 1;
+ }
+ active_cu_number += counter;
+ ao_cu_mask |= (ao_bitmap << (i * 16 + j * 8));
+ }
+ }
+ gfx_v9_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
+ mutex_unlock(&adev->grbm_idx_mutex);
+
+ cu_info->number = active_cu_number;
+ cu_info->ao_cu_mask = ao_cu_mask;
+
+ return 0;
+}
+
+static int gfx_v9_0_init_queue(struct amdgpu_ring *ring)
+{
+ int r, j;
+ u32 tmp;
+ bool use_doorbell = true;
+ u64 hqd_gpu_addr;
+ u64 mqd_gpu_addr;
+ u64 eop_gpu_addr;
+ u64 wb_gpu_addr;
+ u32 *buf;
+ struct v9_mqd *mqd;
+ struct amdgpu_device *adev;
+
+ adev = ring->adev;
+ if (ring->mqd_obj == NULL) {
+ r = amdgpu_bo_create(adev,
+ sizeof(struct v9_mqd),
+ PAGE_SIZE,true,
+ AMDGPU_GEM_DOMAIN_GTT, 0, NULL,
+ NULL, &ring->mqd_obj);
+ if (r) {
+ dev_warn(adev->dev, "(%d) create MQD bo failed\n", r);
+ return r;
+ }
+ }
+
+ r = amdgpu_bo_reserve(ring->mqd_obj, false);
+ if (unlikely(r != 0)) {
+ gfx_v9_0_cp_compute_fini(adev);
+ return r;
+ }
+
+ r = amdgpu_bo_pin(ring->mqd_obj, AMDGPU_GEM_DOMAIN_GTT,
+ &mqd_gpu_addr);
+ if (r) {
+ dev_warn(adev->dev, "(%d) pin MQD bo failed\n", r);
+ gfx_v9_0_cp_compute_fini(adev);
+ return r;
+ }
+ r = amdgpu_bo_kmap(ring->mqd_obj, (void **)&buf);
+ if (r) {
+ dev_warn(adev->dev, "(%d) map MQD bo failed\n", r);
+ gfx_v9_0_cp_compute_fini(adev);
+ return r;
+ }
+
+ /* init the mqd struct */
+ memset(buf, 0, sizeof(struct v9_mqd));
+
+ mqd = (struct v9_mqd *)buf;
+ mqd->header = 0xC0310800;
+ mqd->compute_pipelinestat_enable = 0x00000001;
+ mqd->compute_static_thread_mgmt_se0 = 0xffffffff;
+ mqd->compute_static_thread_mgmt_se1 = 0xffffffff;
+ mqd->compute_static_thread_mgmt_se2 = 0xffffffff;
+ mqd->compute_static_thread_mgmt_se3 = 0xffffffff;
+ mqd->compute_misc_reserved = 0x00000003;
+ mutex_lock(&adev->srbm_mutex);
+ soc15_grbm_select(adev, ring->me,
+ ring->pipe,
+ ring->queue, 0);
+ /* disable wptr polling */
+ tmp = RREG32(SOC15_REG_OFFSET(GC, 0, mmCP_PQ_WPTR_POLL_CNTL));
+ tmp = REG_SET_FIELD(tmp, CP_PQ_WPTR_POLL_CNTL, EN, 0);
+ WREG32(SOC15_REG_OFFSET(GC, 0, mmCP_PQ_WPTR_POLL_CNTL), tmp);
+
+ /* write the EOP addr */
+ BUG_ON(ring->me != 1 || ring->pipe != 0); /* can't handle other cases eop address */
+ eop_gpu_addr = adev->gfx.mec.hpd_eop_gpu_addr + (ring->queue * MEC_HPD_SIZE);
+ eop_gpu_addr >>= 8;
+
+ WREG32(SOC15_REG_OFFSET(GC, 0, mmCP_HQD_EOP_BASE_ADDR), lower_32_bits(eop_gpu_addr));
+ WREG32(SOC15_REG_OFFSET(GC, 0, mmCP_HQD_EOP_BASE_ADDR_HI), upper_32_bits(eop_gpu_addr));
+ mqd->cp_hqd_eop_base_addr_lo = lower_32_bits(eop_gpu_addr);
+ mqd->cp_hqd_eop_base_addr_hi = upper_32_bits(eop_gpu_addr);
+
+ /* set the EOP size, register value is 2^(EOP_SIZE+1) dwords */
+ tmp = RREG32(SOC15_REG_OFFSET(GC, 0, mmCP_HQD_EOP_CONTROL));
+ tmp = REG_SET_FIELD(tmp, CP_HQD_EOP_CONTROL, EOP_SIZE,
+ (order_base_2(MEC_HPD_SIZE / 4) - 1));
+ WREG32(SOC15_REG_OFFSET(GC, 0, mmCP_HQD_EOP_CONTROL), tmp);
+
+ /* enable doorbell? */
+ tmp = RREG32(SOC15_REG_OFFSET(GC, 0, mmCP_HQD_PQ_DOORBELL_CONTROL));
+ if (use_doorbell)
+ tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL, DOORBELL_EN, 1);
+ else
+ tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL, DOORBELL_EN, 0);
+
+ WREG32(SOC15_REG_OFFSET(GC, 0, mmCP_HQD_PQ_DOORBELL_CONTROL), tmp);
+ mqd->cp_hqd_pq_doorbell_control = tmp;
+
+ /* disable the queue if it's active */
+ ring->wptr = 0;
+ mqd->cp_hqd_dequeue_request = 0;
+ mqd->cp_hqd_pq_rptr = 0;
+ mqd->cp_hqd_pq_wptr_lo = 0;
+ mqd->cp_hqd_pq_wptr_hi = 0;
+ if (RREG32(SOC15_REG_OFFSET(GC, 0, mmCP_HQD_ACTIVE)) & 1) {
+ WREG32(SOC15_REG_OFFSET(GC, 0, mmCP_HQD_DEQUEUE_REQUEST), 1);
+ for (j = 0; j < adev->usec_timeout; j++) {
+ if (!(RREG32(SOC15_REG_OFFSET(GC, 0, mmCP_HQD_ACTIVE)) & 1))
+ break;
+ udelay(1);
+ }
+ WREG32(SOC15_REG_OFFSET(GC, 0, mmCP_HQD_DEQUEUE_REQUEST), mqd->cp_hqd_dequeue_request);
+ WREG32(SOC15_REG_OFFSET(GC, 0, mmCP_HQD_PQ_RPTR), mqd->cp_hqd_pq_rptr);
+ WREG32(SOC15_REG_OFFSET(GC, 0, mmCP_HQD_PQ_WPTR_LO), mqd->cp_hqd_pq_wptr_lo);
+ WREG32(SOC15_REG_OFFSET(GC, 0, mmCP_HQD_PQ_WPTR_HI), mqd->cp_hqd_pq_wptr_hi);
+ }
+
+ /* set the pointer to the MQD */
+ mqd->cp_mqd_base_addr_lo = mqd_gpu_addr & 0xfffffffc;
+ mqd->cp_mqd_base_addr_hi = upper_32_bits(mqd_gpu_addr);
+ WREG32(SOC15_REG_OFFSET(GC, 0, mmCP_MQD_BASE_ADDR), mqd->cp_mqd_base_addr_lo);
+ WREG32(SOC15_REG_OFFSET(GC, 0, mmCP_MQD_BASE_ADDR_HI), mqd->cp_mqd_base_addr_hi);
+
+ /* set MQD vmid to 0 */
+ tmp = RREG32(SOC15_REG_OFFSET(GC, 0, mmCP_MQD_CONTROL));
+ tmp = REG_SET_FIELD(tmp, CP_MQD_CONTROL, VMID, 0);
+ WREG32(SOC15_REG_OFFSET(GC, 0, mmCP_MQD_CONTROL), tmp);
+ mqd->cp_mqd_control = tmp;
+
+ /* set the pointer to the HQD, this is similar CP_RB0_BASE/_HI */
+ hqd_gpu_addr = ring->gpu_addr >> 8;
+ mqd->cp_hqd_pq_base_lo = hqd_gpu_addr;
+ mqd->cp_hqd_pq_base_hi = upper_32_bits(hqd_gpu_addr);
+ WREG32(SOC15_REG_OFFSET(GC, 0, mmCP_HQD_PQ_BASE), mqd->cp_hqd_pq_base_lo);
+ WREG32(SOC15_REG_OFFSET(GC, 0, mmCP_HQD_PQ_BASE_HI), mqd->cp_hqd_pq_base_hi);
+
+ /* set up the HQD, this is similar to CP_RB0_CNTL */
+ tmp = RREG32(SOC15_REG_OFFSET(GC, 0, mmCP_HQD_PQ_CONTROL));
+ tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, QUEUE_SIZE,
+ (order_base_2(ring->ring_size / 4) - 1));
+ tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, RPTR_BLOCK_SIZE,
+ ((order_base_2(AMDGPU_GPU_PAGE_SIZE / 4) - 1) << 8));
+#ifdef __BIG_ENDIAN
+ tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, ENDIAN_SWAP, 1);
+#endif
+ tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, UNORD_DISPATCH, 0);
+ tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, ROQ_PQ_IB_FLIP, 0);
+ tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, PRIV_STATE, 1);
+ tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, KMD_QUEUE, 1);
+ WREG32(SOC15_REG_OFFSET(GC, 0, mmCP_HQD_PQ_CONTROL), tmp);
+ mqd->cp_hqd_pq_control = tmp;
+
+ /* set the wb address wether it's enabled or not */
+ wb_gpu_addr = adev->wb.gpu_addr + (ring->rptr_offs * 4);
+ mqd->cp_hqd_pq_rptr_report_addr_lo = wb_gpu_addr & 0xfffffffc;
+ mqd->cp_hqd_pq_rptr_report_addr_hi =
+ upper_32_bits(wb_gpu_addr) & 0xffff;
+ WREG32(SOC15_REG_OFFSET(GC, 0, mmCP_HQD_PQ_RPTR_REPORT_ADDR),
+ mqd->cp_hqd_pq_rptr_report_addr_lo);
+ WREG32(SOC15_REG_OFFSET(GC, 0, mmCP_HQD_PQ_RPTR_REPORT_ADDR_HI),
+ mqd->cp_hqd_pq_rptr_report_addr_hi);
+
+ /* only used if CP_PQ_WPTR_POLL_CNTL.CP_PQ_WPTR_POLL_CNTL__EN_MASK=1 */
+ wb_gpu_addr = adev->wb.gpu_addr + (ring->wptr_offs * 4);
+ mqd->cp_hqd_pq_wptr_poll_addr_lo = wb_gpu_addr & 0xfffffffc;
+ mqd->cp_hqd_pq_wptr_poll_addr_hi = upper_32_bits(wb_gpu_addr) & 0xffff;
+ WREG32(SOC15_REG_OFFSET(GC, 0, mmCP_HQD_PQ_WPTR_POLL_ADDR),
+ mqd->cp_hqd_pq_wptr_poll_addr_lo);
+ WREG32(SOC15_REG_OFFSET(GC, 0, mmCP_HQD_PQ_WPTR_POLL_ADDR_HI),
+ mqd->cp_hqd_pq_wptr_poll_addr_hi);
+
+ /* enable the doorbell if requested */
+ if (use_doorbell) {
+ WREG32(SOC15_REG_OFFSET(GC, 0, mmCP_MEC_DOORBELL_RANGE_LOWER),
+ (AMDGPU_DOORBELL64_KIQ * 2) << 2);
+ WREG32(SOC15_REG_OFFSET(GC, 0, mmCP_MEC_DOORBELL_RANGE_UPPER),
+ (AMDGPU_DOORBELL64_MEC_RING7 * 2) << 2);
+ tmp = RREG32(SOC15_REG_OFFSET(GC, 0, mmCP_HQD_PQ_DOORBELL_CONTROL));
+ tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
+ DOORBELL_OFFSET, ring->doorbell_index);
+ tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL, DOORBELL_EN, 1);
+ tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL, DOORBELL_SOURCE, 0);
+ tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL, DOORBELL_HIT, 0);
+ mqd->cp_hqd_pq_doorbell_control = tmp;
+
+ } else {
+ mqd->cp_hqd_pq_doorbell_control = 0;
+ }
+ WREG32(SOC15_REG_OFFSET(GC, 0, mmCP_HQD_PQ_DOORBELL_CONTROL),
+ mqd->cp_hqd_pq_doorbell_control);
+
+ /* reset read and write pointers, similar to CP_RB0_WPTR/_RPTR */
+ WREG32(SOC15_REG_OFFSET(GC, 0, mmCP_HQD_PQ_WPTR_LO), mqd->cp_hqd_pq_wptr_lo);
+ WREG32(SOC15_REG_OFFSET(GC, 0, mmCP_HQD_PQ_WPTR_HI), mqd->cp_hqd_pq_wptr_hi);
+
+ /* set the vmid for the queue */
+ mqd->cp_hqd_vmid = 0;
+ WREG32(SOC15_REG_OFFSET(GC, 0, mmCP_HQD_VMID), mqd->cp_hqd_vmid);
+
+ tmp = RREG32(SOC15_REG_OFFSET(GC, 0, mmCP_HQD_PERSISTENT_STATE));
+ tmp = REG_SET_FIELD(tmp, CP_HQD_PERSISTENT_STATE, PRELOAD_SIZE, 0x53);
+ WREG32(SOC15_REG_OFFSET(GC, 0, mmCP_HQD_PERSISTENT_STATE), tmp);
+ mqd->cp_hqd_persistent_state = tmp;
+
+ /* activate the queue */
+ mqd->cp_hqd_active = 1;
+ WREG32(SOC15_REG_OFFSET(GC, 0, mmCP_HQD_ACTIVE), mqd->cp_hqd_active);
+
+ soc15_grbm_select(adev, 0, 0, 0, 0);
+ mutex_unlock(&adev->srbm_mutex);
+
+ amdgpu_bo_kunmap(ring->mqd_obj);
+ amdgpu_bo_unreserve(ring->mqd_obj);
+
+ if (use_doorbell) {
+ tmp = RREG32(SOC15_REG_OFFSET(GC, 0, mmCP_PQ_STATUS));
+ tmp = REG_SET_FIELD(tmp, CP_PQ_STATUS, DOORBELL_ENABLE, 1);
+ WREG32(SOC15_REG_OFFSET(GC, 0, mmCP_PQ_STATUS), tmp);
+ }
+
+ return 0;
+}
+
+const struct amdgpu_ip_block_version gfx_v9_0_ip_block =
+{
+ .type = AMD_IP_BLOCK_TYPE_GFX,
+ .major = 9,
+ .minor = 0,
+ .rev = 0,
+ .funcs = &gfx_v9_0_ip_funcs,
+};
diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.h b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.h
new file mode 100644
index 000000000000..56ef652a575d
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.h
@@ -0,0 +1,35 @@
+/*
+ * Copyright 2016 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+
+#ifndef __GFX_V9_0_H__
+#define __GFX_V9_0_H__
+
+extern const struct amd_ip_funcs gfx_v9_0_ip_funcs;
+extern const struct amdgpu_ip_block_version gfx_v9_0_ip_block;
+
+void gfx_v9_0_select_se_sh(struct amdgpu_device *adev, u32 se_num, u32 sh_num);
+
+uint64_t gfx_v9_0_get_gpu_clock_counter(struct amdgpu_device *adev);
+int gfx_v9_0_get_cu_info(struct amdgpu_device *adev, struct amdgpu_cu_info *cu_info);
+
+#endif
diff --git a/drivers/gpu/drm/amd/amdgpu/gfxhub_v1_0.c b/drivers/gpu/drm/amd/amdgpu/gfxhub_v1_0.c
new file mode 100644
index 000000000000..30ef3126c8a9
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdgpu/gfxhub_v1_0.c
@@ -0,0 +1,458 @@
+/*
+ * Copyright 2016 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+#include "amdgpu.h"
+#include "gfxhub_v1_0.h"
+
+#include "vega10/soc15ip.h"
+#include "vega10/GC/gc_9_0_offset.h"
+#include "vega10/GC/gc_9_0_sh_mask.h"
+#include "vega10/GC/gc_9_0_default.h"
+#include "vega10/vega10_enum.h"
+
+#include "soc15_common.h"
+
+int gfxhub_v1_0_gart_enable(struct amdgpu_device *adev)
+{
+ u32 tmp;
+ u64 value;
+ u32 i;
+
+ /* Program MC. */
+ /* Update configuration */
+ WREG32(SOC15_REG_OFFSET(GC, 0, mmMC_VM_SYSTEM_APERTURE_LOW_ADDR),
+ adev->mc.vram_start >> 18);
+ WREG32(SOC15_REG_OFFSET(GC, 0, mmMC_VM_SYSTEM_APERTURE_HIGH_ADDR),
+ adev->mc.vram_end >> 18);
+
+ value = adev->vram_scratch.gpu_addr - adev->mc.vram_start
+ + adev->vm_manager.vram_base_offset;
+ WREG32(SOC15_REG_OFFSET(GC, 0,
+ mmMC_VM_SYSTEM_APERTURE_DEFAULT_ADDR_LSB),
+ (u32)(value >> 12));
+ WREG32(SOC15_REG_OFFSET(GC, 0,
+ mmMC_VM_SYSTEM_APERTURE_DEFAULT_ADDR_MSB),
+ (u32)(value >> 44));
+
+ if (amdgpu_sriov_vf(adev)) {
+ /* MC_VM_FB_LOCATION_BASE/TOP is NULL for VF, becuase they are VF copy registers so
+ vbios post doesn't program them, for SRIOV driver need to program them */
+ WREG32(SOC15_REG_OFFSET(GC, 0, mmMC_VM_FB_LOCATION_BASE),
+ adev->mc.vram_start >> 24);
+ WREG32(SOC15_REG_OFFSET(GC, 0, mmMC_VM_FB_LOCATION_TOP),
+ adev->mc.vram_end >> 24);
+ }
+
+ /* Disable AGP. */
+ WREG32(SOC15_REG_OFFSET(GC, 0, mmMC_VM_AGP_BASE), 0);
+ WREG32(SOC15_REG_OFFSET(GC, 0, mmMC_VM_AGP_TOP), 0);
+ WREG32(SOC15_REG_OFFSET(GC, 0, mmMC_VM_AGP_BOT), 0xFFFFFFFF);
+
+ /* GART Enable. */
+
+ /* Setup TLB control */
+ tmp = RREG32(SOC15_REG_OFFSET(GC, 0, mmMC_VM_MX_L1_TLB_CNTL));
+ tmp = REG_SET_FIELD(tmp, MC_VM_MX_L1_TLB_CNTL, ENABLE_L1_TLB, 1);
+ tmp = REG_SET_FIELD(tmp,
+ MC_VM_MX_L1_TLB_CNTL,
+ SYSTEM_ACCESS_MODE,
+ 3);
+ tmp = REG_SET_FIELD(tmp,
+ MC_VM_MX_L1_TLB_CNTL,
+ ENABLE_ADVANCED_DRIVER_MODEL,
+ 1);
+ tmp = REG_SET_FIELD(tmp,
+ MC_VM_MX_L1_TLB_CNTL,
+ SYSTEM_APERTURE_UNMAPPED_ACCESS,
+ 0);
+ tmp = REG_SET_FIELD(tmp,
+ MC_VM_MX_L1_TLB_CNTL,
+ ECO_BITS,
+ 0);
+ tmp = REG_SET_FIELD(tmp,
+ MC_VM_MX_L1_TLB_CNTL,
+ MTYPE,
+ MTYPE_UC);/* XXX for emulation. */
+ tmp = REG_SET_FIELD(tmp,
+ MC_VM_MX_L1_TLB_CNTL,
+ ATC_EN,
+ 1);
+ WREG32(SOC15_REG_OFFSET(GC, 0, mmMC_VM_MX_L1_TLB_CNTL), tmp);
+
+ /* Setup L2 cache */
+ tmp = RREG32(SOC15_REG_OFFSET(GC, 0, mmVM_L2_CNTL));
+ tmp = REG_SET_FIELD(tmp, VM_L2_CNTL, ENABLE_L2_CACHE, 1);
+ tmp = REG_SET_FIELD(tmp,
+ VM_L2_CNTL,
+ ENABLE_L2_FRAGMENT_PROCESSING,
+ 0);
+ tmp = REG_SET_FIELD(tmp,
+ VM_L2_CNTL,
+ L2_PDE0_CACHE_TAG_GENERATION_MODE,
+ 0);/* XXX for emulation, Refer to closed source code.*/
+ tmp = REG_SET_FIELD(tmp, VM_L2_CNTL, PDE_FAULT_CLASSIFICATION, 1);
+ tmp = REG_SET_FIELD(tmp,
+ VM_L2_CNTL,
+ CONTEXT1_IDENTITY_ACCESS_MODE,
+ 1);
+ tmp = REG_SET_FIELD(tmp,
+ VM_L2_CNTL,
+ IDENTITY_MODE_FRAGMENT_SIZE,
+ 0);
+ WREG32(SOC15_REG_OFFSET(GC, 0, mmVM_L2_CNTL), tmp);
+
+ tmp = RREG32(SOC15_REG_OFFSET(GC, 0, mmVM_L2_CNTL2));
+ tmp = REG_SET_FIELD(tmp, VM_L2_CNTL2, INVALIDATE_ALL_L1_TLBS, 1);
+ tmp = REG_SET_FIELD(tmp, VM_L2_CNTL2, INVALIDATE_L2_CACHE, 1);
+ WREG32(SOC15_REG_OFFSET(GC, 0, mmVM_L2_CNTL2), tmp);
+
+ tmp = mmVM_L2_CNTL3_DEFAULT;
+ WREG32(SOC15_REG_OFFSET(GC, 0, mmVM_L2_CNTL3), tmp);
+
+ tmp = RREG32(SOC15_REG_OFFSET(GC, 0, mmVM_L2_CNTL4));
+ tmp = REG_SET_FIELD(tmp,
+ VM_L2_CNTL4,
+ VMC_TAP_PDE_REQUEST_PHYSICAL,
+ 0);
+ tmp = REG_SET_FIELD(tmp,
+ VM_L2_CNTL4,
+ VMC_TAP_PTE_REQUEST_PHYSICAL,
+ 0);
+ WREG32(SOC15_REG_OFFSET(GC, 0, mmVM_L2_CNTL4), tmp);
+
+ /* setup context0 */
+ WREG32(SOC15_REG_OFFSET(GC, 0,
+ mmVM_CONTEXT0_PAGE_TABLE_START_ADDR_LO32),
+ (u32)(adev->mc.gtt_start >> 12));
+ WREG32(SOC15_REG_OFFSET(GC, 0,
+ mmVM_CONTEXT0_PAGE_TABLE_START_ADDR_HI32),
+ (u32)(adev->mc.gtt_start >> 44));
+
+ WREG32(SOC15_REG_OFFSET(GC, 0,
+ mmVM_CONTEXT0_PAGE_TABLE_END_ADDR_LO32),
+ (u32)(adev->mc.gtt_end >> 12));
+ WREG32(SOC15_REG_OFFSET(GC, 0,
+ mmVM_CONTEXT0_PAGE_TABLE_END_ADDR_HI32),
+ (u32)(adev->mc.gtt_end >> 44));
+
+ BUG_ON(adev->gart.table_addr & (~0x0000FFFFFFFFF000ULL));
+ value = adev->gart.table_addr - adev->mc.vram_start
+ + adev->vm_manager.vram_base_offset;
+ value &= 0x0000FFFFFFFFF000ULL;
+ value |= 0x1; /*valid bit*/
+
+ WREG32(SOC15_REG_OFFSET(GC, 0,
+ mmVM_CONTEXT0_PAGE_TABLE_BASE_ADDR_LO32),
+ (u32)value);
+ WREG32(SOC15_REG_OFFSET(GC, 0,
+ mmVM_CONTEXT0_PAGE_TABLE_BASE_ADDR_HI32),
+ (u32)(value >> 32));
+
+ WREG32(SOC15_REG_OFFSET(GC, 0,
+ mmVM_L2_PROTECTION_FAULT_DEFAULT_ADDR_LO32),
+ (u32)(adev->dummy_page.addr >> 12));
+ WREG32(SOC15_REG_OFFSET(GC, 0,
+ mmVM_L2_PROTECTION_FAULT_DEFAULT_ADDR_HI32),
+ (u32)((u64)adev->dummy_page.addr >> 44));
+
+ tmp = RREG32(SOC15_REG_OFFSET(GC, 0, mmVM_L2_PROTECTION_FAULT_CNTL2));
+ tmp = REG_SET_FIELD(tmp, VM_L2_PROTECTION_FAULT_CNTL2,
+ ACTIVE_PAGE_MIGRATION_PTE_READ_RETRY,
+ 1);
+ WREG32(SOC15_REG_OFFSET(GC, 0, mmVM_L2_PROTECTION_FAULT_CNTL2), tmp);
+
+ tmp = RREG32(SOC15_REG_OFFSET(GC, 0, mmVM_CONTEXT0_CNTL));
+ tmp = REG_SET_FIELD(tmp, VM_CONTEXT0_CNTL, ENABLE_CONTEXT, 1);
+ tmp = REG_SET_FIELD(tmp, VM_CONTEXT0_CNTL, PAGE_TABLE_DEPTH, 0);
+ WREG32(SOC15_REG_OFFSET(GC, 0, mmVM_CONTEXT0_CNTL), tmp);
+
+ /* Disable identity aperture.*/
+ WREG32(SOC15_REG_OFFSET(GC, 0,
+ mmVM_L2_CONTEXT1_IDENTITY_APERTURE_LOW_ADDR_LO32), 0XFFFFFFFF);
+ WREG32(SOC15_REG_OFFSET(GC, 0,
+ mmVM_L2_CONTEXT1_IDENTITY_APERTURE_LOW_ADDR_HI32), 0x0000000F);
+
+ WREG32(SOC15_REG_OFFSET(GC, 0,
+ mmVM_L2_CONTEXT1_IDENTITY_APERTURE_HIGH_ADDR_LO32), 0);
+ WREG32(SOC15_REG_OFFSET(GC, 0,
+ mmVM_L2_CONTEXT1_IDENTITY_APERTURE_HIGH_ADDR_HI32), 0);
+
+ WREG32(SOC15_REG_OFFSET(GC, 0,
+ mmVM_L2_CONTEXT_IDENTITY_PHYSICAL_OFFSET_LO32), 0);
+ WREG32(SOC15_REG_OFFSET(GC, 0,
+ mmVM_L2_CONTEXT_IDENTITY_PHYSICAL_OFFSET_HI32), 0);
+
+ for (i = 0; i <= 14; i++) {
+ tmp = RREG32(SOC15_REG_OFFSET(GC, 0, mmVM_CONTEXT1_CNTL) + i);
+ tmp = REG_SET_FIELD(tmp, VM_CONTEXT1_CNTL, ENABLE_CONTEXT, 1);
+ tmp = REG_SET_FIELD(tmp, VM_CONTEXT1_CNTL, PAGE_TABLE_DEPTH,
+ adev->vm_manager.num_level);
+ tmp = REG_SET_FIELD(tmp, VM_CONTEXT1_CNTL,
+ RANGE_PROTECTION_FAULT_ENABLE_DEFAULT, 1);
+ tmp = REG_SET_FIELD(tmp, VM_CONTEXT1_CNTL,
+ DUMMY_PAGE_PROTECTION_FAULT_ENABLE_DEFAULT, 1);
+ tmp = REG_SET_FIELD(tmp, VM_CONTEXT1_CNTL,
+ PDE0_PROTECTION_FAULT_ENABLE_DEFAULT, 1);
+ tmp = REG_SET_FIELD(tmp, VM_CONTEXT1_CNTL,
+ VALID_PROTECTION_FAULT_ENABLE_DEFAULT, 1);
+ tmp = REG_SET_FIELD(tmp, VM_CONTEXT1_CNTL,
+ READ_PROTECTION_FAULT_ENABLE_DEFAULT, 1);
+ tmp = REG_SET_FIELD(tmp, VM_CONTEXT1_CNTL,
+ WRITE_PROTECTION_FAULT_ENABLE_DEFAULT, 1);
+ tmp = REG_SET_FIELD(tmp, VM_CONTEXT1_CNTL,
+ EXECUTE_PROTECTION_FAULT_ENABLE_DEFAULT, 1);
+ tmp = REG_SET_FIELD(tmp, VM_CONTEXT1_CNTL,
+ PAGE_TABLE_BLOCK_SIZE,
+ amdgpu_vm_block_size - 9);
+ WREG32(SOC15_REG_OFFSET(GC, 0, mmVM_CONTEXT1_CNTL) + i, tmp);
+ WREG32(SOC15_REG_OFFSET(GC, 0, mmVM_CONTEXT1_PAGE_TABLE_START_ADDR_LO32) + i*2, 0);
+ WREG32(SOC15_REG_OFFSET(GC, 0, mmVM_CONTEXT1_PAGE_TABLE_START_ADDR_HI32) + i*2, 0);
+ WREG32(SOC15_REG_OFFSET(GC, 0, mmVM_CONTEXT1_PAGE_TABLE_END_ADDR_LO32) + i*2,
+ lower_32_bits(adev->vm_manager.max_pfn - 1));
+ WREG32(SOC15_REG_OFFSET(GC, 0, mmVM_CONTEXT1_PAGE_TABLE_END_ADDR_HI32) + i*2,
+ upper_32_bits(adev->vm_manager.max_pfn - 1));
+ }
+
+
+ return 0;
+}
+
+void gfxhub_v1_0_gart_disable(struct amdgpu_device *adev)
+{
+ u32 tmp;
+ u32 i;
+
+ /* Disable all tables */
+ for (i = 0; i < 16; i++)
+ WREG32(SOC15_REG_OFFSET(GC, 0, mmVM_CONTEXT0_CNTL) + i, 0);
+
+ /* Setup TLB control */
+ tmp = RREG32(SOC15_REG_OFFSET(GC, 0, mmMC_VM_MX_L1_TLB_CNTL));
+ tmp = REG_SET_FIELD(tmp, MC_VM_MX_L1_TLB_CNTL, ENABLE_L1_TLB, 0);
+ tmp = REG_SET_FIELD(tmp,
+ MC_VM_MX_L1_TLB_CNTL,
+ ENABLE_ADVANCED_DRIVER_MODEL,
+ 0);
+ WREG32(SOC15_REG_OFFSET(GC, 0, mmMC_VM_MX_L1_TLB_CNTL), tmp);
+
+ /* Setup L2 cache */
+ tmp = RREG32(SOC15_REG_OFFSET(GC, 0, mmVM_L2_CNTL));
+ tmp = REG_SET_FIELD(tmp, VM_L2_CNTL, ENABLE_L2_CACHE, 0);
+ WREG32(SOC15_REG_OFFSET(GC, 0, mmVM_L2_CNTL), tmp);
+ WREG32(SOC15_REG_OFFSET(GC, 0, mmVM_L2_CNTL3), 0);
+}
+
+/**
+ * gfxhub_v1_0_set_fault_enable_default - update GART/VM fault handling
+ *
+ * @adev: amdgpu_device pointer
+ * @value: true redirects VM faults to the default page
+ */
+void gfxhub_v1_0_set_fault_enable_default(struct amdgpu_device *adev,
+ bool value)
+{
+ u32 tmp;
+ tmp = RREG32(SOC15_REG_OFFSET(GC, 0, mmVM_L2_PROTECTION_FAULT_CNTL));
+ tmp = REG_SET_FIELD(tmp, VM_L2_PROTECTION_FAULT_CNTL,
+ RANGE_PROTECTION_FAULT_ENABLE_DEFAULT, value);
+ tmp = REG_SET_FIELD(tmp, VM_L2_PROTECTION_FAULT_CNTL,
+ PDE0_PROTECTION_FAULT_ENABLE_DEFAULT, value);
+ tmp = REG_SET_FIELD(tmp, VM_L2_PROTECTION_FAULT_CNTL,
+ PDE1_PROTECTION_FAULT_ENABLE_DEFAULT, value);
+ tmp = REG_SET_FIELD(tmp, VM_L2_PROTECTION_FAULT_CNTL,
+ PDE2_PROTECTION_FAULT_ENABLE_DEFAULT, value);
+ tmp = REG_SET_FIELD(tmp,
+ VM_L2_PROTECTION_FAULT_CNTL,
+ TRANSLATE_FURTHER_PROTECTION_FAULT_ENABLE_DEFAULT,
+ value);
+ tmp = REG_SET_FIELD(tmp, VM_L2_PROTECTION_FAULT_CNTL,
+ NACK_PROTECTION_FAULT_ENABLE_DEFAULT, value);
+ tmp = REG_SET_FIELD(tmp, VM_L2_PROTECTION_FAULT_CNTL,
+ DUMMY_PAGE_PROTECTION_FAULT_ENABLE_DEFAULT, value);
+ tmp = REG_SET_FIELD(tmp, VM_L2_PROTECTION_FAULT_CNTL,
+ VALID_PROTECTION_FAULT_ENABLE_DEFAULT, value);
+ tmp = REG_SET_FIELD(tmp, VM_L2_PROTECTION_FAULT_CNTL,
+ READ_PROTECTION_FAULT_ENABLE_DEFAULT, value);
+ tmp = REG_SET_FIELD(tmp, VM_L2_PROTECTION_FAULT_CNTL,
+ WRITE_PROTECTION_FAULT_ENABLE_DEFAULT, value);
+ tmp = REG_SET_FIELD(tmp, VM_L2_PROTECTION_FAULT_CNTL,
+ EXECUTE_PROTECTION_FAULT_ENABLE_DEFAULT, value);
+ WREG32(SOC15_REG_OFFSET(GC, 0, mmVM_L2_PROTECTION_FAULT_CNTL), tmp);
+}
+
+static uint32_t gfxhub_v1_0_get_invalidate_req(unsigned int vm_id)
+{
+ u32 req = 0;
+
+ /* invalidate using legacy mode on vm_id*/
+ req = REG_SET_FIELD(req, VM_INVALIDATE_ENG0_REQ,
+ PER_VMID_INVALIDATE_REQ, 1 << vm_id);
+ req = REG_SET_FIELD(req, VM_INVALIDATE_ENG0_REQ, FLUSH_TYPE, 0);
+ req = REG_SET_FIELD(req, VM_INVALIDATE_ENG0_REQ, INVALIDATE_L2_PTES, 1);
+ req = REG_SET_FIELD(req, VM_INVALIDATE_ENG0_REQ, INVALIDATE_L2_PDE0, 1);
+ req = REG_SET_FIELD(req, VM_INVALIDATE_ENG0_REQ, INVALIDATE_L2_PDE1, 1);
+ req = REG_SET_FIELD(req, VM_INVALIDATE_ENG0_REQ, INVALIDATE_L2_PDE2, 1);
+ req = REG_SET_FIELD(req, VM_INVALIDATE_ENG0_REQ, INVALIDATE_L1_PTES, 1);
+ req = REG_SET_FIELD(req, VM_INVALIDATE_ENG0_REQ,
+ CLEAR_PROTECTION_FAULT_STATUS_ADDR, 0);
+
+ return req;
+}
+
+static uint32_t gfxhub_v1_0_get_vm_protection_bits(void)
+{
+ return (VM_CONTEXT1_CNTL__RANGE_PROTECTION_FAULT_ENABLE_INTERRUPT_MASK |
+ VM_CONTEXT1_CNTL__DUMMY_PAGE_PROTECTION_FAULT_ENABLE_INTERRUPT_MASK |
+ VM_CONTEXT1_CNTL__PDE0_PROTECTION_FAULT_ENABLE_INTERRUPT_MASK |
+ VM_CONTEXT1_CNTL__VALID_PROTECTION_FAULT_ENABLE_INTERRUPT_MASK |
+ VM_CONTEXT1_CNTL__READ_PROTECTION_FAULT_ENABLE_INTERRUPT_MASK |
+ VM_CONTEXT1_CNTL__WRITE_PROTECTION_FAULT_ENABLE_INTERRUPT_MASK |
+ VM_CONTEXT1_CNTL__EXECUTE_PROTECTION_FAULT_ENABLE_INTERRUPT_MASK);
+}
+
+static int gfxhub_v1_0_early_init(void *handle)
+{
+ return 0;
+}
+
+static int gfxhub_v1_0_late_init(void *handle)
+{
+ return 0;
+}
+
+static int gfxhub_v1_0_sw_init(void *handle)
+{
+ struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_vmhub *hub = &adev->vmhub[AMDGPU_GFXHUB];
+
+ hub->ctx0_ptb_addr_lo32 =
+ SOC15_REG_OFFSET(GC, 0,
+ mmVM_CONTEXT0_PAGE_TABLE_BASE_ADDR_LO32);
+ hub->ctx0_ptb_addr_hi32 =
+ SOC15_REG_OFFSET(GC, 0,
+ mmVM_CONTEXT0_PAGE_TABLE_BASE_ADDR_HI32);
+ hub->vm_inv_eng0_req =
+ SOC15_REG_OFFSET(GC, 0, mmVM_INVALIDATE_ENG0_REQ);
+ hub->vm_inv_eng0_ack =
+ SOC15_REG_OFFSET(GC, 0, mmVM_INVALIDATE_ENG0_ACK);
+ hub->vm_context0_cntl =
+ SOC15_REG_OFFSET(GC, 0, mmVM_CONTEXT0_CNTL);
+ hub->vm_l2_pro_fault_status =
+ SOC15_REG_OFFSET(GC, 0, mmVM_L2_PROTECTION_FAULT_STATUS);
+ hub->vm_l2_pro_fault_cntl =
+ SOC15_REG_OFFSET(GC, 0, mmVM_L2_PROTECTION_FAULT_CNTL);
+
+ hub->get_invalidate_req = gfxhub_v1_0_get_invalidate_req;
+ hub->get_vm_protection_bits = gfxhub_v1_0_get_vm_protection_bits;
+
+ return 0;
+}
+
+static int gfxhub_v1_0_sw_fini(void *handle)
+{
+ return 0;
+}
+
+static int gfxhub_v1_0_hw_init(void *handle)
+{
+ struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ unsigned i;
+
+ for (i = 0 ; i < 18; ++i) {
+ WREG32(SOC15_REG_OFFSET(GC, 0,
+ mmVM_INVALIDATE_ENG0_ADDR_RANGE_LO32) +
+ 2 * i, 0xffffffff);
+ WREG32(SOC15_REG_OFFSET(GC, 0,
+ mmVM_INVALIDATE_ENG0_ADDR_RANGE_HI32) +
+ 2 * i, 0x1f);
+ }
+
+ return 0;
+}
+
+static int gfxhub_v1_0_hw_fini(void *handle)
+{
+ return 0;
+}
+
+static int gfxhub_v1_0_suspend(void *handle)
+{
+ return 0;
+}
+
+static int gfxhub_v1_0_resume(void *handle)
+{
+ return 0;
+}
+
+static bool gfxhub_v1_0_is_idle(void *handle)
+{
+ return true;
+}
+
+static int gfxhub_v1_0_wait_for_idle(void *handle)
+{
+ return 0;
+}
+
+static int gfxhub_v1_0_soft_reset(void *handle)
+{
+ return 0;
+}
+
+static int gfxhub_v1_0_set_clockgating_state(void *handle,
+ enum amd_clockgating_state state)
+{
+ return 0;
+}
+
+static int gfxhub_v1_0_set_powergating_state(void *handle,
+ enum amd_powergating_state state)
+{
+ return 0;
+}
+
+const struct amd_ip_funcs gfxhub_v1_0_ip_funcs = {
+ .name = "gfxhub_v1_0",
+ .early_init = gfxhub_v1_0_early_init,
+ .late_init = gfxhub_v1_0_late_init,
+ .sw_init = gfxhub_v1_0_sw_init,
+ .sw_fini = gfxhub_v1_0_sw_fini,
+ .hw_init = gfxhub_v1_0_hw_init,
+ .hw_fini = gfxhub_v1_0_hw_fini,
+ .suspend = gfxhub_v1_0_suspend,
+ .resume = gfxhub_v1_0_resume,
+ .is_idle = gfxhub_v1_0_is_idle,
+ .wait_for_idle = gfxhub_v1_0_wait_for_idle,
+ .soft_reset = gfxhub_v1_0_soft_reset,
+ .set_clockgating_state = gfxhub_v1_0_set_clockgating_state,
+ .set_powergating_state = gfxhub_v1_0_set_powergating_state,
+};
+
+const struct amdgpu_ip_block_version gfxhub_v1_0_ip_block =
+{
+ .type = AMD_IP_BLOCK_TYPE_GFXHUB,
+ .major = 1,
+ .minor = 0,
+ .rev = 0,
+ .funcs = &gfxhub_v1_0_ip_funcs,
+};
diff --git a/drivers/gpu/drm/amd/amdgpu/gfxhub_v1_0.h b/drivers/gpu/drm/amd/amdgpu/gfxhub_v1_0.h
new file mode 100644
index 000000000000..5129a8ff0932
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdgpu/gfxhub_v1_0.h
@@ -0,0 +1,35 @@
+/*
+ * Copyright 2016 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+
+#ifndef __GFXHUB_V1_0_H__
+#define __GFXHUB_V1_0_H__
+
+int gfxhub_v1_0_gart_enable(struct amdgpu_device *adev);
+void gfxhub_v1_0_gart_disable(struct amdgpu_device *adev);
+void gfxhub_v1_0_set_fault_enable_default(struct amdgpu_device *adev,
+ bool value);
+
+extern const struct amd_ip_funcs gfxhub_v1_0_ip_funcs;
+extern const struct amdgpu_ip_block_version gfxhub_v1_0_ip_block;
+
+#endif
diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v6_0.c b/drivers/gpu/drm/amd/amdgpu/gmc_v6_0.c
index 0635829b18cf..d9586601a437 100644
--- a/drivers/gpu/drm/amd/amdgpu/gmc_v6_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gmc_v6_0.c
@@ -367,7 +367,7 @@ static int gmc_v6_0_gart_set_pte_pde(struct amdgpu_device *adev,
void *cpu_pt_addr,
uint32_t gpu_page_idx,
uint64_t addr,
- uint32_t flags)
+ uint64_t flags)
{
void __iomem *ptr = (void *)cpu_pt_addr;
uint64_t value;
@@ -379,6 +379,21 @@ static int gmc_v6_0_gart_set_pte_pde(struct amdgpu_device *adev,
return 0;
}
+static uint64_t gmc_v6_0_get_vm_pte_flags(struct amdgpu_device *adev,
+ uint32_t flags)
+{
+ uint64_t pte_flag = 0;
+
+ if (flags & AMDGPU_VM_PAGE_READABLE)
+ pte_flag |= AMDGPU_PTE_READABLE;
+ if (flags & AMDGPU_VM_PAGE_WRITEABLE)
+ pte_flag |= AMDGPU_PTE_WRITEABLE;
+ if (flags & AMDGPU_VM_PAGE_PRT)
+ pte_flag |= AMDGPU_PTE_PRT;
+
+ return pte_flag;
+}
+
static void gmc_v6_0_set_fault_enable_default(struct amdgpu_device *adev,
bool value)
{
@@ -400,6 +415,60 @@ static void gmc_v6_0_set_fault_enable_default(struct amdgpu_device *adev,
WREG32(mmVM_CONTEXT1_CNTL, tmp);
}
+ /**
+ + * gmc_v8_0_set_prt - set PRT VM fault
+ + *
+ + * @adev: amdgpu_device pointer
+ + * @enable: enable/disable VM fault handling for PRT
+ +*/
+static void gmc_v6_0_set_prt(struct amdgpu_device *adev, bool enable)
+{
+ u32 tmp;
+
+ if (enable && !adev->mc.prt_warning) {
+ dev_warn(adev->dev, "Disabling VM faults because of PRT request!\n");
+ adev->mc.prt_warning = true;
+ }
+
+ tmp = RREG32(mmVM_PRT_CNTL);
+ tmp = REG_SET_FIELD(tmp, VM_PRT_CNTL,
+ CB_DISABLE_FAULT_ON_UNMAPPED_ACCESS,
+ enable);
+ tmp = REG_SET_FIELD(tmp, VM_PRT_CNTL,
+ TC_DISABLE_FAULT_ON_UNMAPPED_ACCESS,
+ enable);
+ tmp = REG_SET_FIELD(tmp, VM_PRT_CNTL,
+ L2_CACHE_STORE_INVALID_ENTRIES,
+ enable);
+ tmp = REG_SET_FIELD(tmp, VM_PRT_CNTL,
+ L1_TLB_STORE_INVALID_ENTRIES,
+ enable);
+ WREG32(mmVM_PRT_CNTL, tmp);
+
+ if (enable) {
+ uint32_t low = AMDGPU_VA_RESERVED_SIZE >> AMDGPU_GPU_PAGE_SHIFT;
+ uint32_t high = adev->vm_manager.max_pfn;
+
+ WREG32(mmVM_PRT_APERTURE0_LOW_ADDR, low);
+ WREG32(mmVM_PRT_APERTURE1_LOW_ADDR, low);
+ WREG32(mmVM_PRT_APERTURE2_LOW_ADDR, low);
+ WREG32(mmVM_PRT_APERTURE3_LOW_ADDR, low);
+ WREG32(mmVM_PRT_APERTURE0_HIGH_ADDR, high);
+ WREG32(mmVM_PRT_APERTURE1_HIGH_ADDR, high);
+ WREG32(mmVM_PRT_APERTURE2_HIGH_ADDR, high);
+ WREG32(mmVM_PRT_APERTURE3_HIGH_ADDR, high);
+ } else {
+ WREG32(mmVM_PRT_APERTURE0_LOW_ADDR, 0xfffffff);
+ WREG32(mmVM_PRT_APERTURE1_LOW_ADDR, 0xfffffff);
+ WREG32(mmVM_PRT_APERTURE2_LOW_ADDR, 0xfffffff);
+ WREG32(mmVM_PRT_APERTURE3_LOW_ADDR, 0xfffffff);
+ WREG32(mmVM_PRT_APERTURE0_HIGH_ADDR, 0x0);
+ WREG32(mmVM_PRT_APERTURE1_HIGH_ADDR, 0x0);
+ WREG32(mmVM_PRT_APERTURE2_HIGH_ADDR, 0x0);
+ WREG32(mmVM_PRT_APERTURE3_HIGH_ADDR, 0x0);
+ }
+}
+
static int gmc_v6_0_gart_enable(struct amdgpu_device *adev)
{
int r, i;
@@ -500,6 +569,7 @@ static int gmc_v6_0_gart_init(struct amdgpu_device *adev)
if (r)
return r;
adev->gart.table_size = adev->gart.num_gpu_pages * 8;
+ adev->gart.gart_pte_flags = 0;
return amdgpu_gart_table_vram_alloc(adev);
}
@@ -551,6 +621,7 @@ static int gmc_v6_0_vm_init(struct amdgpu_device *adev)
* amdkfd will use VMIDs 8-15
*/
adev->vm_manager.num_ids = AMDGPU_NUM_OF_VMIDS;
+ adev->vm_manager.num_level = 1;
amdgpu_vm_manager_init(adev);
/* base offset of vram pages */
@@ -769,11 +840,11 @@ static int gmc_v6_0_sw_init(void *handle)
int dma_bits;
struct amdgpu_device *adev = (struct amdgpu_device *)handle;
- r = amdgpu_irq_add_id(adev, 146, &adev->mc.vm_fault);
+ r = amdgpu_irq_add_id(adev, AMDGPU_IH_CLIENTID_LEGACY, 146, &adev->mc.vm_fault);
if (r)
return r;
- r = amdgpu_irq_add_id(adev, 147, &adev->mc.vm_fault);
+ r = amdgpu_irq_add_id(adev, AMDGPU_IH_CLIENTID_LEGACY, 147, &adev->mc.vm_fault);
if (r)
return r;
@@ -1039,7 +1110,7 @@ static int gmc_v6_0_process_interrupt(struct amdgpu_device *adev,
if (printk_ratelimit()) {
dev_err(adev->dev, "GPU fault detected: %d 0x%08x\n",
- entry->src_id, entry->src_data);
+ entry->src_id, entry->src_data[0]);
dev_err(adev->dev, " VM_CONTEXT1_PROTECTION_FAULT_ADDR 0x%08X\n",
addr);
dev_err(adev->dev, " VM_CONTEXT1_PROTECTION_FAULT_STATUS 0x%08X\n",
@@ -1082,6 +1153,8 @@ static const struct amd_ip_funcs gmc_v6_0_ip_funcs = {
static const struct amdgpu_gart_funcs gmc_v6_0_gart_funcs = {
.flush_gpu_tlb = gmc_v6_0_gart_flush_gpu_tlb,
.set_pte_pde = gmc_v6_0_gart_set_pte_pde,
+ .set_prt = gmc_v6_0_set_prt,
+ .get_vm_pte_flags = gmc_v6_0_get_vm_pte_flags
};
static const struct amdgpu_irq_src_funcs gmc_v6_0_irq_funcs = {
diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v7_0.c b/drivers/gpu/drm/amd/amdgpu/gmc_v7_0.c
index 8d05e0c4e3d7..0c0a6015cca5 100644
--- a/drivers/gpu/drm/amd/amdgpu/gmc_v7_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gmc_v7_0.c
@@ -161,9 +161,7 @@ static int gmc_v7_0_init_microcode(struct amdgpu_device *adev)
out:
if (err) {
- printk(KERN_ERR
- "cik_mc: Failed to load firmware \"%s\"\n",
- fw_name);
+ pr_err("cik_mc: Failed to load firmware \"%s\"\n", fw_name);
release_firmware(adev->mc.fw);
adev->mc.fw = NULL;
}
@@ -441,7 +439,7 @@ static int gmc_v7_0_gart_set_pte_pde(struct amdgpu_device *adev,
void *cpu_pt_addr,
uint32_t gpu_page_idx,
uint64_t addr,
- uint32_t flags)
+ uint64_t flags)
{
void __iomem *ptr = (void *)cpu_pt_addr;
uint64_t value;
@@ -453,6 +451,21 @@ static int gmc_v7_0_gart_set_pte_pde(struct amdgpu_device *adev,
return 0;
}
+static uint64_t gmc_v7_0_get_vm_pte_flags(struct amdgpu_device *adev,
+ uint32_t flags)
+{
+ uint64_t pte_flag = 0;
+
+ if (flags & AMDGPU_VM_PAGE_READABLE)
+ pte_flag |= AMDGPU_PTE_READABLE;
+ if (flags & AMDGPU_VM_PAGE_WRITEABLE)
+ pte_flag |= AMDGPU_PTE_WRITEABLE;
+ if (flags & AMDGPU_VM_PAGE_PRT)
+ pte_flag |= AMDGPU_PTE_PRT;
+
+ return pte_flag;
+}
+
/**
* gmc_v8_0_set_fault_enable_default - update VM fault handling
*
@@ -481,6 +494,62 @@ static void gmc_v7_0_set_fault_enable_default(struct amdgpu_device *adev,
}
/**
+ * gmc_v7_0_set_prt - set PRT VM fault
+ *
+ * @adev: amdgpu_device pointer
+ * @enable: enable/disable VM fault handling for PRT
+ */
+static void gmc_v7_0_set_prt(struct amdgpu_device *adev, bool enable)
+{
+ uint32_t tmp;
+
+ if (enable && !adev->mc.prt_warning) {
+ dev_warn(adev->dev, "Disabling VM faults because of PRT request!\n");
+ adev->mc.prt_warning = true;
+ }
+
+ tmp = RREG32(mmVM_PRT_CNTL);
+ tmp = REG_SET_FIELD(tmp, VM_PRT_CNTL,
+ CB_DISABLE_READ_FAULT_ON_UNMAPPED_ACCESS, enable);
+ tmp = REG_SET_FIELD(tmp, VM_PRT_CNTL,
+ CB_DISABLE_WRITE_FAULT_ON_UNMAPPED_ACCESS, enable);
+ tmp = REG_SET_FIELD(tmp, VM_PRT_CNTL,
+ TC_DISABLE_READ_FAULT_ON_UNMAPPED_ACCESS, enable);
+ tmp = REG_SET_FIELD(tmp, VM_PRT_CNTL,
+ TC_DISABLE_WRITE_FAULT_ON_UNMAPPED_ACCESS, enable);
+ tmp = REG_SET_FIELD(tmp, VM_PRT_CNTL,
+ L2_CACHE_STORE_INVALID_ENTRIES, enable);
+ tmp = REG_SET_FIELD(tmp, VM_PRT_CNTL,
+ L1_TLB_STORE_INVALID_ENTRIES, enable);
+ tmp = REG_SET_FIELD(tmp, VM_PRT_CNTL,
+ MASK_PDE0_FAULT, enable);
+ WREG32(mmVM_PRT_CNTL, tmp);
+
+ if (enable) {
+ uint32_t low = AMDGPU_VA_RESERVED_SIZE >> AMDGPU_GPU_PAGE_SHIFT;
+ uint32_t high = adev->vm_manager.max_pfn;
+
+ WREG32(mmVM_PRT_APERTURE0_LOW_ADDR, low);
+ WREG32(mmVM_PRT_APERTURE1_LOW_ADDR, low);
+ WREG32(mmVM_PRT_APERTURE2_LOW_ADDR, low);
+ WREG32(mmVM_PRT_APERTURE3_LOW_ADDR, low);
+ WREG32(mmVM_PRT_APERTURE0_HIGH_ADDR, high);
+ WREG32(mmVM_PRT_APERTURE1_HIGH_ADDR, high);
+ WREG32(mmVM_PRT_APERTURE2_HIGH_ADDR, high);
+ WREG32(mmVM_PRT_APERTURE3_HIGH_ADDR, high);
+ } else {
+ WREG32(mmVM_PRT_APERTURE0_LOW_ADDR, 0xfffffff);
+ WREG32(mmVM_PRT_APERTURE1_LOW_ADDR, 0xfffffff);
+ WREG32(mmVM_PRT_APERTURE2_LOW_ADDR, 0xfffffff);
+ WREG32(mmVM_PRT_APERTURE3_LOW_ADDR, 0xfffffff);
+ WREG32(mmVM_PRT_APERTURE0_HIGH_ADDR, 0x0);
+ WREG32(mmVM_PRT_APERTURE1_HIGH_ADDR, 0x0);
+ WREG32(mmVM_PRT_APERTURE2_HIGH_ADDR, 0x0);
+ WREG32(mmVM_PRT_APERTURE3_HIGH_ADDR, 0x0);
+ }
+}
+
+/**
* gmc_v7_0_gart_enable - gart enable
*
* @adev: amdgpu_device pointer
@@ -604,6 +673,7 @@ static int gmc_v7_0_gart_init(struct amdgpu_device *adev)
if (r)
return r;
adev->gart.table_size = adev->gart.num_gpu_pages * 8;
+ adev->gart.gart_pte_flags = 0;
return amdgpu_gart_table_vram_alloc(adev);
}
@@ -672,6 +742,7 @@ static int gmc_v7_0_vm_init(struct amdgpu_device *adev)
* amdkfd will use VMIDs 8-15
*/
adev->vm_manager.num_ids = AMDGPU_NUM_OF_VMIDS;
+ adev->vm_manager.num_level = 1;
amdgpu_vm_manager_init(adev);
/* base offset of vram pages */
@@ -880,6 +951,14 @@ static int gmc_v7_0_early_init(void *handle)
gmc_v7_0_set_gart_funcs(adev);
gmc_v7_0_set_irq_funcs(adev);
+ adev->mc.shared_aperture_start = 0x2000000000000000ULL;
+ adev->mc.shared_aperture_end =
+ adev->mc.shared_aperture_start + (4ULL << 30) - 1;
+ adev->mc.private_aperture_start =
+ adev->mc.shared_aperture_end + 1;
+ adev->mc.private_aperture_end =
+ adev->mc.private_aperture_start + (4ULL << 30) - 1;
+
return 0;
}
@@ -907,11 +986,11 @@ static int gmc_v7_0_sw_init(void *handle)
adev->mc.vram_type = gmc_v7_0_convert_vram_type(tmp);
}
- r = amdgpu_irq_add_id(adev, 146, &adev->mc.vm_fault);
+ r = amdgpu_irq_add_id(adev, AMDGPU_IH_CLIENTID_LEGACY, 146, &adev->mc.vm_fault);
if (r)
return r;
- r = amdgpu_irq_add_id(adev, 147, &adev->mc.vm_fault);
+ r = amdgpu_irq_add_id(adev, AMDGPU_IH_CLIENTID_LEGACY, 147, &adev->mc.vm_fault);
if (r)
return r;
@@ -938,12 +1017,12 @@ static int gmc_v7_0_sw_init(void *handle)
if (r) {
adev->need_dma32 = true;
dma_bits = 32;
- printk(KERN_WARNING "amdgpu: No suitable DMA available.\n");
+ pr_warn("amdgpu: No suitable DMA available\n");
}
r = pci_set_consistent_dma_mask(adev->pdev, DMA_BIT_MASK(dma_bits));
if (r) {
pci_set_consistent_dma_mask(adev->pdev, DMA_BIT_MASK(32));
- printk(KERN_WARNING "amdgpu: No coherent DMA available.\n");
+ pr_warn("amdgpu: No coherent DMA available\n");
}
r = gmc_v7_0_init_microcode(adev);
@@ -1202,7 +1281,7 @@ static int gmc_v7_0_process_interrupt(struct amdgpu_device *adev,
if (printk_ratelimit()) {
dev_err(adev->dev, "GPU fault detected: %d 0x%08x\n",
- entry->src_id, entry->src_data);
+ entry->src_id, entry->src_data[0]);
dev_err(adev->dev, " VM_CONTEXT1_PROTECTION_FAULT_ADDR 0x%08X\n",
addr);
dev_err(adev->dev, " VM_CONTEXT1_PROTECTION_FAULT_STATUS 0x%08X\n",
@@ -1259,6 +1338,8 @@ static const struct amd_ip_funcs gmc_v7_0_ip_funcs = {
static const struct amdgpu_gart_funcs gmc_v7_0_gart_funcs = {
.flush_gpu_tlb = gmc_v7_0_gart_flush_gpu_tlb,
.set_pte_pde = gmc_v7_0_gart_set_pte_pde,
+ .set_prt = gmc_v7_0_set_prt,
+ .get_vm_pte_flags = gmc_v7_0_get_vm_pte_flags
};
static const struct amdgpu_irq_src_funcs gmc_v7_0_irq_funcs = {
diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v8_0.c b/drivers/gpu/drm/amd/amdgpu/gmc_v8_0.c
index 7669b3259f35..d19d1c5e2847 100644
--- a/drivers/gpu/drm/amd/amdgpu/gmc_v8_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gmc_v8_0.c
@@ -245,9 +245,7 @@ static int gmc_v8_0_init_microcode(struct amdgpu_device *adev)
out:
if (err) {
- printk(KERN_ERR
- "mc: Failed to load firmware \"%s\"\n",
- fw_name);
+ pr_err("mc: Failed to load firmware \"%s\"\n", fw_name);
release_firmware(adev->mc.fw);
adev->mc.fw = NULL;
}
@@ -255,14 +253,14 @@ out:
}
/**
- * gmc_v8_0_mc_load_microcode - load MC ucode into the hw
+ * gmc_v8_0_tonga_mc_load_microcode - load tonga MC ucode into the hw
*
* @adev: amdgpu_device pointer
*
* Load the GDDR MC ucode into the hw (CIK).
* Returns 0 on success, error on failure.
*/
-static int gmc_v8_0_mc_load_microcode(struct amdgpu_device *adev)
+static int gmc_v8_0_tonga_mc_load_microcode(struct amdgpu_device *adev)
{
const struct mc_firmware_header_v1_0 *hdr;
const __le32 *fw_data = NULL;
@@ -270,9 +268,6 @@ static int gmc_v8_0_mc_load_microcode(struct amdgpu_device *adev)
u32 running;
int i, ucode_size, regs_size;
- if (!adev->mc.fw)
- return -EINVAL;
-
/* Skip MC ucode loading on SR-IOV capable boards.
* vbios does this for us in asic_init in that case.
* Skip MC ucode loading on VF, because hypervisor will do that
@@ -281,6 +276,9 @@ static int gmc_v8_0_mc_load_microcode(struct amdgpu_device *adev)
if (amdgpu_sriov_bios(adev))
return 0;
+ if (!adev->mc.fw)
+ return -EINVAL;
+
hdr = (const struct mc_firmware_header_v1_0 *)adev->mc.fw->data;
amdgpu_ucode_print_mc_hdr(&hdr->header);
@@ -331,6 +329,76 @@ static int gmc_v8_0_mc_load_microcode(struct amdgpu_device *adev)
return 0;
}
+static int gmc_v8_0_polaris_mc_load_microcode(struct amdgpu_device *adev)
+{
+ const struct mc_firmware_header_v1_0 *hdr;
+ const __le32 *fw_data = NULL;
+ const __le32 *io_mc_regs = NULL;
+ u32 data, vbios_version;
+ int i, ucode_size, regs_size;
+
+ /* Skip MC ucode loading on SR-IOV capable boards.
+ * vbios does this for us in asic_init in that case.
+ * Skip MC ucode loading on VF, because hypervisor will do that
+ * for this adaptor.
+ */
+ if (amdgpu_sriov_bios(adev))
+ return 0;
+
+ WREG32(mmMC_SEQ_IO_DEBUG_INDEX, 0x9F);
+ data = RREG32(mmMC_SEQ_IO_DEBUG_DATA);
+ vbios_version = data & 0xf;
+
+ if (vbios_version == 0)
+ return 0;
+
+ if (!adev->mc.fw)
+ return -EINVAL;
+
+ hdr = (const struct mc_firmware_header_v1_0 *)adev->mc.fw->data;
+ amdgpu_ucode_print_mc_hdr(&hdr->header);
+
+ adev->mc.fw_version = le32_to_cpu(hdr->header.ucode_version);
+ regs_size = le32_to_cpu(hdr->io_debug_size_bytes) / (4 * 2);
+ io_mc_regs = (const __le32 *)
+ (adev->mc.fw->data + le32_to_cpu(hdr->io_debug_array_offset_bytes));
+ ucode_size = le32_to_cpu(hdr->header.ucode_size_bytes) / 4;
+ fw_data = (const __le32 *)
+ (adev->mc.fw->data + le32_to_cpu(hdr->header.ucode_array_offset_bytes));
+
+ data = RREG32(mmMC_SEQ_MISC0);
+ data &= ~(0x40);
+ WREG32(mmMC_SEQ_MISC0, data);
+
+ /* load mc io regs */
+ for (i = 0; i < regs_size; i++) {
+ WREG32(mmMC_SEQ_IO_DEBUG_INDEX, le32_to_cpup(io_mc_regs++));
+ WREG32(mmMC_SEQ_IO_DEBUG_DATA, le32_to_cpup(io_mc_regs++));
+ }
+
+ WREG32(mmMC_SEQ_SUP_CNTL, 0x00000008);
+ WREG32(mmMC_SEQ_SUP_CNTL, 0x00000010);
+
+ /* load the MC ucode */
+ for (i = 0; i < ucode_size; i++)
+ WREG32(mmMC_SEQ_SUP_PGM, le32_to_cpup(fw_data++));
+
+ /* put the engine back into the active state */
+ WREG32(mmMC_SEQ_SUP_CNTL, 0x00000008);
+ WREG32(mmMC_SEQ_SUP_CNTL, 0x00000004);
+ WREG32(mmMC_SEQ_SUP_CNTL, 0x00000001);
+
+ /* wait for training to complete */
+ for (i = 0; i < adev->usec_timeout; i++) {
+ data = RREG32(mmMC_SEQ_MISC0);
+ if (data & 0x80)
+ break;
+ udelay(1);
+ }
+
+ return 0;
+}
+
static void gmc_v8_0_vram_gtt_location(struct amdgpu_device *adev,
struct amdgpu_mc *mc)
{
@@ -533,7 +601,7 @@ static int gmc_v8_0_gart_set_pte_pde(struct amdgpu_device *adev,
void *cpu_pt_addr,
uint32_t gpu_page_idx,
uint64_t addr,
- uint32_t flags)
+ uint64_t flags)
{
void __iomem *ptr = (void *)cpu_pt_addr;
uint64_t value;
@@ -565,6 +633,23 @@ static int gmc_v8_0_gart_set_pte_pde(struct amdgpu_device *adev,
return 0;
}
+static uint64_t gmc_v8_0_get_vm_pte_flags(struct amdgpu_device *adev,
+ uint32_t flags)
+{
+ uint64_t pte_flag = 0;
+
+ if (flags & AMDGPU_VM_PAGE_EXECUTABLE)
+ pte_flag |= AMDGPU_PTE_EXECUTABLE;
+ if (flags & AMDGPU_VM_PAGE_READABLE)
+ pte_flag |= AMDGPU_PTE_READABLE;
+ if (flags & AMDGPU_VM_PAGE_WRITEABLE)
+ pte_flag |= AMDGPU_PTE_WRITEABLE;
+ if (flags & AMDGPU_VM_PAGE_PRT)
+ pte_flag |= AMDGPU_PTE_PRT;
+
+ return pte_flag;
+}
+
/**
* gmc_v8_0_set_fault_enable_default - update VM fault handling
*
@@ -595,6 +680,62 @@ static void gmc_v8_0_set_fault_enable_default(struct amdgpu_device *adev,
}
/**
+ * gmc_v8_0_set_prt - set PRT VM fault
+ *
+ * @adev: amdgpu_device pointer
+ * @enable: enable/disable VM fault handling for PRT
+*/
+static void gmc_v8_0_set_prt(struct amdgpu_device *adev, bool enable)
+{
+ u32 tmp;
+
+ if (enable && !adev->mc.prt_warning) {
+ dev_warn(adev->dev, "Disabling VM faults because of PRT request!\n");
+ adev->mc.prt_warning = true;
+ }
+
+ tmp = RREG32(mmVM_PRT_CNTL);
+ tmp = REG_SET_FIELD(tmp, VM_PRT_CNTL,
+ CB_DISABLE_READ_FAULT_ON_UNMAPPED_ACCESS, enable);
+ tmp = REG_SET_FIELD(tmp, VM_PRT_CNTL,
+ CB_DISABLE_WRITE_FAULT_ON_UNMAPPED_ACCESS, enable);
+ tmp = REG_SET_FIELD(tmp, VM_PRT_CNTL,
+ TC_DISABLE_READ_FAULT_ON_UNMAPPED_ACCESS, enable);
+ tmp = REG_SET_FIELD(tmp, VM_PRT_CNTL,
+ TC_DISABLE_WRITE_FAULT_ON_UNMAPPED_ACCESS, enable);
+ tmp = REG_SET_FIELD(tmp, VM_PRT_CNTL,
+ L2_CACHE_STORE_INVALID_ENTRIES, enable);
+ tmp = REG_SET_FIELD(tmp, VM_PRT_CNTL,
+ L1_TLB_STORE_INVALID_ENTRIES, enable);
+ tmp = REG_SET_FIELD(tmp, VM_PRT_CNTL,
+ MASK_PDE0_FAULT, enable);
+ WREG32(mmVM_PRT_CNTL, tmp);
+
+ if (enable) {
+ uint32_t low = AMDGPU_VA_RESERVED_SIZE >> AMDGPU_GPU_PAGE_SHIFT;
+ uint32_t high = adev->vm_manager.max_pfn;
+
+ WREG32(mmVM_PRT_APERTURE0_LOW_ADDR, low);
+ WREG32(mmVM_PRT_APERTURE1_LOW_ADDR, low);
+ WREG32(mmVM_PRT_APERTURE2_LOW_ADDR, low);
+ WREG32(mmVM_PRT_APERTURE3_LOW_ADDR, low);
+ WREG32(mmVM_PRT_APERTURE0_HIGH_ADDR, high);
+ WREG32(mmVM_PRT_APERTURE1_HIGH_ADDR, high);
+ WREG32(mmVM_PRT_APERTURE2_HIGH_ADDR, high);
+ WREG32(mmVM_PRT_APERTURE3_HIGH_ADDR, high);
+ } else {
+ WREG32(mmVM_PRT_APERTURE0_LOW_ADDR, 0xfffffff);
+ WREG32(mmVM_PRT_APERTURE1_LOW_ADDR, 0xfffffff);
+ WREG32(mmVM_PRT_APERTURE2_LOW_ADDR, 0xfffffff);
+ WREG32(mmVM_PRT_APERTURE3_LOW_ADDR, 0xfffffff);
+ WREG32(mmVM_PRT_APERTURE0_HIGH_ADDR, 0x0);
+ WREG32(mmVM_PRT_APERTURE1_HIGH_ADDR, 0x0);
+ WREG32(mmVM_PRT_APERTURE2_HIGH_ADDR, 0x0);
+ WREG32(mmVM_PRT_APERTURE3_HIGH_ADDR, 0x0);
+ }
+}
+
+/**
* gmc_v8_0_gart_enable - gart enable
*
* @adev: amdgpu_device pointer
@@ -735,6 +876,7 @@ static int gmc_v8_0_gart_init(struct amdgpu_device *adev)
if (r)
return r;
adev->gart.table_size = adev->gart.num_gpu_pages * 8;
+ adev->gart.gart_pte_flags = AMDGPU_PTE_EXECUTABLE;
return amdgpu_gart_table_vram_alloc(adev);
}
@@ -803,6 +945,7 @@ static int gmc_v8_0_vm_init(struct amdgpu_device *adev)
* amdkfd will use VMIDs 8-15
*/
adev->vm_manager.num_ids = AMDGPU_NUM_OF_VMIDS;
+ adev->vm_manager.num_level = 1;
amdgpu_vm_manager_init(adev);
/* base offset of vram pages */
@@ -885,6 +1028,14 @@ static int gmc_v8_0_early_init(void *handle)
gmc_v8_0_set_gart_funcs(adev);
gmc_v8_0_set_irq_funcs(adev);
+ adev->mc.shared_aperture_start = 0x2000000000000000ULL;
+ adev->mc.shared_aperture_end =
+ adev->mc.shared_aperture_start + (4ULL << 30) - 1;
+ adev->mc.private_aperture_start =
+ adev->mc.shared_aperture_end + 1;
+ adev->mc.private_aperture_end =
+ adev->mc.private_aperture_start + (4ULL << 30) - 1;
+
return 0;
}
@@ -919,11 +1070,11 @@ static int gmc_v8_0_sw_init(void *handle)
adev->mc.vram_type = gmc_v8_0_convert_vram_type(tmp);
}
- r = amdgpu_irq_add_id(adev, 146, &adev->mc.vm_fault);
+ r = amdgpu_irq_add_id(adev, AMDGPU_IH_CLIENTID_LEGACY, 146, &adev->mc.vm_fault);
if (r)
return r;
- r = amdgpu_irq_add_id(adev, 147, &adev->mc.vm_fault);
+ r = amdgpu_irq_add_id(adev, AMDGPU_IH_CLIENTID_LEGACY, 147, &adev->mc.vm_fault);
if (r)
return r;
@@ -950,12 +1101,12 @@ static int gmc_v8_0_sw_init(void *handle)
if (r) {
adev->need_dma32 = true;
dma_bits = 32;
- printk(KERN_WARNING "amdgpu: No suitable DMA available.\n");
+ pr_warn("amdgpu: No suitable DMA available\n");
}
r = pci_set_consistent_dma_mask(adev->pdev, DMA_BIT_MASK(dma_bits));
if (r) {
pci_set_consistent_dma_mask(adev->pdev, DMA_BIT_MASK(32));
- printk(KERN_WARNING "amdgpu: No coherent DMA available.\n");
+ pr_warn("amdgpu: No coherent DMA available\n");
}
r = gmc_v8_0_init_microcode(adev);
@@ -1015,7 +1166,15 @@ static int gmc_v8_0_hw_init(void *handle)
gmc_v8_0_mc_program(adev);
if (adev->asic_type == CHIP_TONGA) {
- r = gmc_v8_0_mc_load_microcode(adev);
+ r = gmc_v8_0_tonga_mc_load_microcode(adev);
+ if (r) {
+ DRM_ERROR("Failed to load MC firmware!\n");
+ return r;
+ }
+ } else if (adev->asic_type == CHIP_POLARIS11 ||
+ adev->asic_type == CHIP_POLARIS10 ||
+ adev->asic_type == CHIP_POLARIS12) {
+ r = gmc_v8_0_polaris_mc_load_microcode(adev);
if (r) {
DRM_ERROR("Failed to load MC firmware!\n");
return r;
@@ -1237,6 +1396,13 @@ static int gmc_v8_0_process_interrupt(struct amdgpu_device *adev,
{
u32 addr, status, mc_client;
+ if (amdgpu_sriov_vf(adev)) {
+ dev_err(adev->dev, "GPU fault detected: %d 0x%08x\n",
+ entry->src_id, entry->src_data[0]);
+ dev_err(adev->dev, " Can't decode VM fault info here on SRIOV VF\n");
+ return 0;
+ }
+
addr = RREG32(mmVM_CONTEXT1_PROTECTION_FAULT_ADDR);
status = RREG32(mmVM_CONTEXT1_PROTECTION_FAULT_STATUS);
mc_client = RREG32(mmVM_CONTEXT1_PROTECTION_FAULT_MCCLIENT);
@@ -1251,7 +1417,7 @@ static int gmc_v8_0_process_interrupt(struct amdgpu_device *adev,
if (printk_ratelimit()) {
dev_err(adev->dev, "GPU fault detected: %d 0x%08x\n",
- entry->src_id, entry->src_data);
+ entry->src_id, entry->src_data[0]);
dev_err(adev->dev, " VM_CONTEXT1_PROTECTION_FAULT_ADDR 0x%08X\n",
addr);
dev_err(adev->dev, " VM_CONTEXT1_PROTECTION_FAULT_STATUS 0x%08X\n",
@@ -1427,12 +1593,15 @@ static int gmc_v8_0_set_clockgating_state(void *handle,
{
struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ if (amdgpu_sriov_vf(adev))
+ return 0;
+
switch (adev->asic_type) {
case CHIP_FIJI:
fiji_update_mc_medium_grain_clock_gating(adev,
- state == AMD_CG_STATE_GATE ? true : false);
+ state == AMD_CG_STATE_GATE);
fiji_update_mc_light_sleep(adev,
- state == AMD_CG_STATE_GATE ? true : false);
+ state == AMD_CG_STATE_GATE);
break;
default:
break;
@@ -1451,6 +1620,9 @@ static void gmc_v8_0_get_clockgating_state(void *handle, u32 *flags)
struct amdgpu_device *adev = (struct amdgpu_device *)handle;
int data;
+ if (amdgpu_sriov_vf(adev))
+ *flags = 0;
+
/* AMD_CG_SUPPORT_MC_MGCG */
data = RREG32(mmMC_HUB_MISC_HUB_CG);
if (data & MC_HUB_MISC_HUB_CG__ENABLE_MASK)
@@ -1485,6 +1657,8 @@ static const struct amd_ip_funcs gmc_v8_0_ip_funcs = {
static const struct amdgpu_gart_funcs gmc_v8_0_gart_funcs = {
.flush_gpu_tlb = gmc_v8_0_gart_flush_gpu_tlb,
.set_pte_pde = gmc_v8_0_gart_set_pte_pde,
+ .set_prt = gmc_v8_0_set_prt,
+ .get_vm_pte_flags = gmc_v8_0_get_vm_pte_flags
};
static const struct amdgpu_irq_src_funcs gmc_v8_0_irq_funcs = {
diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c b/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c
new file mode 100644
index 000000000000..df69aae99df4
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c
@@ -0,0 +1,842 @@
+/*
+ * Copyright 2016 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+#include <linux/firmware.h>
+#include "amdgpu.h"
+#include "gmc_v9_0.h"
+
+#include "vega10/soc15ip.h"
+#include "vega10/HDP/hdp_4_0_offset.h"
+#include "vega10/HDP/hdp_4_0_sh_mask.h"
+#include "vega10/GC/gc_9_0_sh_mask.h"
+#include "vega10/vega10_enum.h"
+
+#include "soc15_common.h"
+
+#include "nbio_v6_1.h"
+#include "gfxhub_v1_0.h"
+#include "mmhub_v1_0.h"
+
+#define mmDF_CS_AON0_DramBaseAddress0 0x0044
+#define mmDF_CS_AON0_DramBaseAddress0_BASE_IDX 0
+//DF_CS_AON0_DramBaseAddress0
+#define DF_CS_AON0_DramBaseAddress0__AddrRngVal__SHIFT 0x0
+#define DF_CS_AON0_DramBaseAddress0__LgcyMmioHoleEn__SHIFT 0x1
+#define DF_CS_AON0_DramBaseAddress0__IntLvNumChan__SHIFT 0x4
+#define DF_CS_AON0_DramBaseAddress0__IntLvAddrSel__SHIFT 0x8
+#define DF_CS_AON0_DramBaseAddress0__DramBaseAddr__SHIFT 0xc
+#define DF_CS_AON0_DramBaseAddress0__AddrRngVal_MASK 0x00000001L
+#define DF_CS_AON0_DramBaseAddress0__LgcyMmioHoleEn_MASK 0x00000002L
+#define DF_CS_AON0_DramBaseAddress0__IntLvNumChan_MASK 0x000000F0L
+#define DF_CS_AON0_DramBaseAddress0__IntLvAddrSel_MASK 0x00000700L
+#define DF_CS_AON0_DramBaseAddress0__DramBaseAddr_MASK 0xFFFFF000L
+
+/* XXX Move this macro to VEGA10 header file, which is like vid.h for VI.*/
+#define AMDGPU_NUM_OF_VMIDS 8
+
+static const u32 golden_settings_vega10_hdp[] =
+{
+ 0xf64, 0x0fffffff, 0x00000000,
+ 0xf65, 0x0fffffff, 0x00000000,
+ 0xf66, 0x0fffffff, 0x00000000,
+ 0xf67, 0x0fffffff, 0x00000000,
+ 0xf68, 0x0fffffff, 0x00000000,
+ 0xf6a, 0x0fffffff, 0x00000000,
+ 0xf6b, 0x0fffffff, 0x00000000,
+ 0xf6c, 0x0fffffff, 0x00000000,
+ 0xf6d, 0x0fffffff, 0x00000000,
+ 0xf6e, 0x0fffffff, 0x00000000,
+};
+
+static int gmc_v9_0_vm_fault_interrupt_state(struct amdgpu_device *adev,
+ struct amdgpu_irq_src *src,
+ unsigned type,
+ enum amdgpu_interrupt_state state)
+{
+ struct amdgpu_vmhub *hub;
+ u32 tmp, reg, bits, i;
+
+ switch (state) {
+ case AMDGPU_IRQ_STATE_DISABLE:
+ /* MM HUB */
+ hub = &adev->vmhub[AMDGPU_MMHUB];
+ bits = hub->get_vm_protection_bits();
+ for (i = 0; i< 16; i++) {
+ reg = hub->vm_context0_cntl + i;
+ tmp = RREG32(reg);
+ tmp &= ~bits;
+ WREG32(reg, tmp);
+ }
+
+ /* GFX HUB */
+ hub = &adev->vmhub[AMDGPU_GFXHUB];
+ bits = hub->get_vm_protection_bits();
+ for (i = 0; i < 16; i++) {
+ reg = hub->vm_context0_cntl + i;
+ tmp = RREG32(reg);
+ tmp &= ~bits;
+ WREG32(reg, tmp);
+ }
+ break;
+ case AMDGPU_IRQ_STATE_ENABLE:
+ /* MM HUB */
+ hub = &adev->vmhub[AMDGPU_MMHUB];
+ bits = hub->get_vm_protection_bits();
+ for (i = 0; i< 16; i++) {
+ reg = hub->vm_context0_cntl + i;
+ tmp = RREG32(reg);
+ tmp |= bits;
+ WREG32(reg, tmp);
+ }
+
+ /* GFX HUB */
+ hub = &adev->vmhub[AMDGPU_GFXHUB];
+ bits = hub->get_vm_protection_bits();
+ for (i = 0; i < 16; i++) {
+ reg = hub->vm_context0_cntl + i;
+ tmp = RREG32(reg);
+ tmp |= bits;
+ WREG32(reg, tmp);
+ }
+ break;
+ default:
+ break;
+ }
+
+ return 0;
+}
+
+static int gmc_v9_0_process_interrupt(struct amdgpu_device *adev,
+ struct amdgpu_irq_src *source,
+ struct amdgpu_iv_entry *entry)
+{
+ struct amdgpu_vmhub *gfxhub = &adev->vmhub[AMDGPU_GFXHUB];
+ struct amdgpu_vmhub *mmhub = &adev->vmhub[AMDGPU_MMHUB];
+ uint32_t status = 0;
+ u64 addr;
+
+ addr = (u64)entry->src_data[0] << 12;
+ addr |= ((u64)entry->src_data[1] & 0xf) << 44;
+
+ if (!amdgpu_sriov_vf(adev)) {
+ if (entry->vm_id_src) {
+ status = RREG32(mmhub->vm_l2_pro_fault_status);
+ WREG32_P(mmhub->vm_l2_pro_fault_cntl, 1, ~1);
+ } else {
+ status = RREG32(gfxhub->vm_l2_pro_fault_status);
+ WREG32_P(gfxhub->vm_l2_pro_fault_cntl, 1, ~1);
+ }
+ }
+
+ if (printk_ratelimit()) {
+ dev_err(adev->dev,
+ "[%s] VMC page fault (src_id:%u ring:%u vm_id:%u pas_id:%u)\n",
+ entry->vm_id_src ? "mmhub" : "gfxhub",
+ entry->src_id, entry->ring_id, entry->vm_id,
+ entry->pas_id);
+ dev_err(adev->dev, " at page 0x%016llx from %d\n",
+ addr, entry->client_id);
+ if (!amdgpu_sriov_vf(adev))
+ dev_err(adev->dev,
+ "VM_L2_PROTECTION_FAULT_STATUS:0x%08X\n",
+ status);
+ }
+
+ return 0;
+}
+
+static const struct amdgpu_irq_src_funcs gmc_v9_0_irq_funcs = {
+ .set = gmc_v9_0_vm_fault_interrupt_state,
+ .process = gmc_v9_0_process_interrupt,
+};
+
+static void gmc_v9_0_set_irq_funcs(struct amdgpu_device *adev)
+{
+ adev->mc.vm_fault.num_types = 1;
+ adev->mc.vm_fault.funcs = &gmc_v9_0_irq_funcs;
+}
+
+/*
+ * GART
+ * VMID 0 is the physical GPU addresses as used by the kernel.
+ * VMIDs 1-15 are used for userspace clients and are handled
+ * by the amdgpu vm/hsa code.
+ */
+
+/**
+ * gmc_v9_0_gart_flush_gpu_tlb - gart tlb flush callback
+ *
+ * @adev: amdgpu_device pointer
+ * @vmid: vm instance to flush
+ *
+ * Flush the TLB for the requested page table.
+ */
+static void gmc_v9_0_gart_flush_gpu_tlb(struct amdgpu_device *adev,
+ uint32_t vmid)
+{
+ /* Use register 17 for GART */
+ const unsigned eng = 17;
+ unsigned i, j;
+
+ /* flush hdp cache */
+ nbio_v6_1_hdp_flush(adev);
+
+ spin_lock(&adev->mc.invalidate_lock);
+
+ for (i = 0; i < AMDGPU_MAX_VMHUBS; ++i) {
+ struct amdgpu_vmhub *hub = &adev->vmhub[i];
+ u32 tmp = hub->get_invalidate_req(vmid);
+
+ WREG32_NO_KIQ(hub->vm_inv_eng0_req + eng, tmp);
+
+ /* Busy wait for ACK.*/
+ for (j = 0; j < 100; j++) {
+ tmp = RREG32_NO_KIQ(hub->vm_inv_eng0_ack + eng);
+ tmp &= 1 << vmid;
+ if (tmp)
+ break;
+ cpu_relax();
+ }
+ if (j < 100)
+ continue;
+
+ /* Wait for ACK with a delay.*/
+ for (j = 0; j < adev->usec_timeout; j++) {
+ tmp = RREG32_NO_KIQ(hub->vm_inv_eng0_ack + eng);
+ tmp &= 1 << vmid;
+ if (tmp)
+ break;
+ udelay(1);
+ }
+ if (j < adev->usec_timeout)
+ continue;
+
+ DRM_ERROR("Timeout waiting for VM flush ACK!\n");
+ }
+
+ spin_unlock(&adev->mc.invalidate_lock);
+}
+
+/**
+ * gmc_v9_0_gart_set_pte_pde - update the page tables using MMIO
+ *
+ * @adev: amdgpu_device pointer
+ * @cpu_pt_addr: cpu address of the page table
+ * @gpu_page_idx: entry in the page table to update
+ * @addr: dst addr to write into pte/pde
+ * @flags: access flags
+ *
+ * Update the page tables using the CPU.
+ */
+static int gmc_v9_0_gart_set_pte_pde(struct amdgpu_device *adev,
+ void *cpu_pt_addr,
+ uint32_t gpu_page_idx,
+ uint64_t addr,
+ uint64_t flags)
+{
+ void __iomem *ptr = (void *)cpu_pt_addr;
+ uint64_t value;
+
+ /*
+ * PTE format on VEGA 10:
+ * 63:59 reserved
+ * 58:57 mtype
+ * 56 F
+ * 55 L
+ * 54 P
+ * 53 SW
+ * 52 T
+ * 50:48 reserved
+ * 47:12 4k physical page base address
+ * 11:7 fragment
+ * 6 write
+ * 5 read
+ * 4 exe
+ * 3 Z
+ * 2 snooped
+ * 1 system
+ * 0 valid
+ *
+ * PDE format on VEGA 10:
+ * 63:59 block fragment size
+ * 58:55 reserved
+ * 54 P
+ * 53:48 reserved
+ * 47:6 physical base address of PD or PTE
+ * 5:3 reserved
+ * 2 C
+ * 1 system
+ * 0 valid
+ */
+
+ /*
+ * The following is for PTE only. GART does not have PDEs.
+ */
+ value = addr & 0x0000FFFFFFFFF000ULL;
+ value |= flags;
+ writeq(value, ptr + (gpu_page_idx * 8));
+ return 0;
+}
+
+static uint64_t gmc_v9_0_get_vm_pte_flags(struct amdgpu_device *adev,
+ uint32_t flags)
+
+{
+ uint64_t pte_flag = 0;
+
+ if (flags & AMDGPU_VM_PAGE_EXECUTABLE)
+ pte_flag |= AMDGPU_PTE_EXECUTABLE;
+ if (flags & AMDGPU_VM_PAGE_READABLE)
+ pte_flag |= AMDGPU_PTE_READABLE;
+ if (flags & AMDGPU_VM_PAGE_WRITEABLE)
+ pte_flag |= AMDGPU_PTE_WRITEABLE;
+
+ switch (flags & AMDGPU_VM_MTYPE_MASK) {
+ case AMDGPU_VM_MTYPE_DEFAULT:
+ pte_flag |= AMDGPU_PTE_MTYPE(MTYPE_NC);
+ break;
+ case AMDGPU_VM_MTYPE_NC:
+ pte_flag |= AMDGPU_PTE_MTYPE(MTYPE_NC);
+ break;
+ case AMDGPU_VM_MTYPE_WC:
+ pte_flag |= AMDGPU_PTE_MTYPE(MTYPE_WC);
+ break;
+ case AMDGPU_VM_MTYPE_CC:
+ pte_flag |= AMDGPU_PTE_MTYPE(MTYPE_CC);
+ break;
+ case AMDGPU_VM_MTYPE_UC:
+ pte_flag |= AMDGPU_PTE_MTYPE(MTYPE_UC);
+ break;
+ default:
+ pte_flag |= AMDGPU_PTE_MTYPE(MTYPE_NC);
+ break;
+ }
+
+ if (flags & AMDGPU_VM_PAGE_PRT)
+ pte_flag |= AMDGPU_PTE_PRT;
+
+ return pte_flag;
+}
+
+static const struct amdgpu_gart_funcs gmc_v9_0_gart_funcs = {
+ .flush_gpu_tlb = gmc_v9_0_gart_flush_gpu_tlb,
+ .set_pte_pde = gmc_v9_0_gart_set_pte_pde,
+ .get_vm_pte_flags = gmc_v9_0_get_vm_pte_flags
+};
+
+static void gmc_v9_0_set_gart_funcs(struct amdgpu_device *adev)
+{
+ if (adev->gart.gart_funcs == NULL)
+ adev->gart.gart_funcs = &gmc_v9_0_gart_funcs;
+}
+
+static u64 gmc_v9_0_adjust_mc_addr(struct amdgpu_device *adev, u64 mc_addr)
+{
+ return adev->vm_manager.vram_base_offset + mc_addr - adev->mc.vram_start;
+}
+
+static const struct amdgpu_mc_funcs gmc_v9_0_mc_funcs = {
+ .adjust_mc_addr = gmc_v9_0_adjust_mc_addr,
+};
+
+static void gmc_v9_0_set_mc_funcs(struct amdgpu_device *adev)
+{
+ adev->mc.mc_funcs = &gmc_v9_0_mc_funcs;
+}
+
+static int gmc_v9_0_early_init(void *handle)
+{
+ struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+
+ gmc_v9_0_set_gart_funcs(adev);
+ gmc_v9_0_set_mc_funcs(adev);
+ gmc_v9_0_set_irq_funcs(adev);
+
+ return 0;
+}
+
+static int gmc_v9_0_late_init(void *handle)
+{
+ struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ return amdgpu_irq_get(adev, &adev->mc.vm_fault, 0);
+}
+
+static void gmc_v9_0_vram_gtt_location(struct amdgpu_device *adev,
+ struct amdgpu_mc *mc)
+{
+ u64 base = 0;
+ if (!amdgpu_sriov_vf(adev))
+ base = mmhub_v1_0_get_fb_location(adev);
+ amdgpu_vram_location(adev, &adev->mc, base);
+ adev->mc.gtt_base_align = 0;
+ amdgpu_gtt_location(adev, mc);
+}
+
+/**
+ * gmc_v9_0_mc_init - initialize the memory controller driver params
+ *
+ * @adev: amdgpu_device pointer
+ *
+ * Look up the amount of vram, vram width, and decide how to place
+ * vram and gart within the GPU's physical address space.
+ * Returns 0 for success.
+ */
+static int gmc_v9_0_mc_init(struct amdgpu_device *adev)
+{
+ u32 tmp;
+ int chansize, numchan;
+
+ /* hbm memory channel size */
+ chansize = 128;
+
+ tmp = RREG32(SOC15_REG_OFFSET(DF, 0, mmDF_CS_AON0_DramBaseAddress0));
+ tmp &= DF_CS_AON0_DramBaseAddress0__IntLvNumChan_MASK;
+ tmp >>= DF_CS_AON0_DramBaseAddress0__IntLvNumChan__SHIFT;
+ switch (tmp) {
+ case 0:
+ default:
+ numchan = 1;
+ break;
+ case 1:
+ numchan = 2;
+ break;
+ case 2:
+ numchan = 0;
+ break;
+ case 3:
+ numchan = 4;
+ break;
+ case 4:
+ numchan = 0;
+ break;
+ case 5:
+ numchan = 8;
+ break;
+ case 6:
+ numchan = 0;
+ break;
+ case 7:
+ numchan = 16;
+ break;
+ case 8:
+ numchan = 2;
+ break;
+ }
+ adev->mc.vram_width = numchan * chansize;
+
+ /* Could aper size report 0 ? */
+ adev->mc.aper_base = pci_resource_start(adev->pdev, 0);
+ adev->mc.aper_size = pci_resource_len(adev->pdev, 0);
+ /* size in MB on si */
+ adev->mc.mc_vram_size =
+ nbio_v6_1_get_memsize(adev) * 1024ULL * 1024ULL;
+ adev->mc.real_vram_size = adev->mc.mc_vram_size;
+ adev->mc.visible_vram_size = adev->mc.aper_size;
+
+ /* In case the PCI BAR is larger than the actual amount of vram */
+ if (adev->mc.visible_vram_size > adev->mc.real_vram_size)
+ adev->mc.visible_vram_size = adev->mc.real_vram_size;
+
+ /* unless the user had overridden it, set the gart
+ * size equal to the 1024 or vram, whichever is larger.
+ */
+ if (amdgpu_gart_size == -1)
+ adev->mc.gtt_size = max((1024ULL << 20), adev->mc.mc_vram_size);
+ else
+ adev->mc.gtt_size = (uint64_t)amdgpu_gart_size << 20;
+
+ gmc_v9_0_vram_gtt_location(adev, &adev->mc);
+
+ return 0;
+}
+
+static int gmc_v9_0_gart_init(struct amdgpu_device *adev)
+{
+ int r;
+
+ if (adev->gart.robj) {
+ WARN(1, "VEGA10 PCIE GART already initialized\n");
+ return 0;
+ }
+ /* Initialize common gart structure */
+ r = amdgpu_gart_init(adev);
+ if (r)
+ return r;
+ adev->gart.table_size = adev->gart.num_gpu_pages * 8;
+ adev->gart.gart_pte_flags = AMDGPU_PTE_MTYPE(MTYPE_UC) |
+ AMDGPU_PTE_EXECUTABLE;
+ return amdgpu_gart_table_vram_alloc(adev);
+}
+
+/*
+ * vm
+ * VMID 0 is the physical GPU addresses as used by the kernel.
+ * VMIDs 1-15 are used for userspace clients and are handled
+ * by the amdgpu vm/hsa code.
+ */
+/**
+ * gmc_v9_0_vm_init - vm init callback
+ *
+ * @adev: amdgpu_device pointer
+ *
+ * Inits vega10 specific vm parameters (number of VMs, base of vram for
+ * VMIDs 1-15) (vega10).
+ * Returns 0 for success.
+ */
+static int gmc_v9_0_vm_init(struct amdgpu_device *adev)
+{
+ /*
+ * number of VMs
+ * VMID 0 is reserved for System
+ * amdgpu graphics/compute will use VMIDs 1-7
+ * amdkfd will use VMIDs 8-15
+ */
+ adev->vm_manager.num_ids = AMDGPU_NUM_OF_VMIDS;
+ adev->vm_manager.num_level = 3;
+ amdgpu_vm_manager_init(adev);
+
+ /* base offset of vram pages */
+ /*XXX This value is not zero for APU*/
+ adev->vm_manager.vram_base_offset = 0;
+
+ return 0;
+}
+
+/**
+ * gmc_v9_0_vm_fini - vm fini callback
+ *
+ * @adev: amdgpu_device pointer
+ *
+ * Tear down any asic specific VM setup.
+ */
+static void gmc_v9_0_vm_fini(struct amdgpu_device *adev)
+{
+ return;
+}
+
+static int gmc_v9_0_sw_init(void *handle)
+{
+ int r;
+ int dma_bits;
+ struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+
+ spin_lock_init(&adev->mc.invalidate_lock);
+
+ if (adev->flags & AMD_IS_APU) {
+ adev->mc.vram_type = AMDGPU_VRAM_TYPE_UNKNOWN;
+ } else {
+ /* XXX Don't know how to get VRAM type yet. */
+ adev->mc.vram_type = AMDGPU_VRAM_TYPE_HBM;
+ }
+
+ /* This interrupt is VMC page fault.*/
+ r = amdgpu_irq_add_id(adev, AMDGPU_IH_CLIENTID_VMC, 0,
+ &adev->mc.vm_fault);
+ r = amdgpu_irq_add_id(adev, AMDGPU_IH_CLIENTID_UTCL2, 0,
+ &adev->mc.vm_fault);
+
+ if (r)
+ return r;
+
+ /* Because of four level VMPTs, vm size is at least 512GB.
+ * The maximum size is 256TB (48bit).
+ */
+ if (amdgpu_vm_size < 512) {
+ DRM_WARN("VM size is at least 512GB!\n");
+ amdgpu_vm_size = 512;
+ }
+ adev->vm_manager.max_pfn = (uint64_t)amdgpu_vm_size << 18;
+
+ /* Set the internal MC address mask
+ * This is the max address of the GPU's
+ * internal address space.
+ */
+ adev->mc.mc_mask = 0xffffffffffffULL; /* 48 bit MC */
+
+ /* set DMA mask + need_dma32 flags.
+ * PCIE - can handle 44-bits.
+ * IGP - can handle 44-bits
+ * PCI - dma32 for legacy pci gart, 44 bits on vega10
+ */
+ adev->need_dma32 = false;
+ dma_bits = adev->need_dma32 ? 32 : 44;
+ r = pci_set_dma_mask(adev->pdev, DMA_BIT_MASK(dma_bits));
+ if (r) {
+ adev->need_dma32 = true;
+ dma_bits = 32;
+ printk(KERN_WARNING "amdgpu: No suitable DMA available.\n");
+ }
+ r = pci_set_consistent_dma_mask(adev->pdev, DMA_BIT_MASK(dma_bits));
+ if (r) {
+ pci_set_consistent_dma_mask(adev->pdev, DMA_BIT_MASK(32));
+ printk(KERN_WARNING "amdgpu: No coherent DMA available.\n");
+ }
+
+ r = gmc_v9_0_mc_init(adev);
+ if (r)
+ return r;
+
+ /* Memory manager */
+ r = amdgpu_bo_init(adev);
+ if (r)
+ return r;
+
+ r = gmc_v9_0_gart_init(adev);
+ if (r)
+ return r;
+
+ if (!adev->vm_manager.enabled) {
+ r = gmc_v9_0_vm_init(adev);
+ if (r) {
+ dev_err(adev->dev, "vm manager initialization failed (%d).\n", r);
+ return r;
+ }
+ adev->vm_manager.enabled = true;
+ }
+ return r;
+}
+
+/**
+ * gmc_v8_0_gart_fini - vm fini callback
+ *
+ * @adev: amdgpu_device pointer
+ *
+ * Tears down the driver GART/VM setup (CIK).
+ */
+static void gmc_v9_0_gart_fini(struct amdgpu_device *adev)
+{
+ amdgpu_gart_table_vram_free(adev);
+ amdgpu_gart_fini(adev);
+}
+
+static int gmc_v9_0_sw_fini(void *handle)
+{
+ struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+
+ if (adev->vm_manager.enabled) {
+ amdgpu_vm_manager_fini(adev);
+ gmc_v9_0_vm_fini(adev);
+ adev->vm_manager.enabled = false;
+ }
+ gmc_v9_0_gart_fini(adev);
+ amdgpu_gem_force_release(adev);
+ amdgpu_bo_fini(adev);
+
+ return 0;
+}
+
+static void gmc_v9_0_init_golden_registers(struct amdgpu_device *adev)
+{
+ switch (adev->asic_type) {
+ case CHIP_VEGA10:
+ break;
+ default:
+ break;
+ }
+}
+
+/**
+ * gmc_v9_0_gart_enable - gart enable
+ *
+ * @adev: amdgpu_device pointer
+ */
+static int gmc_v9_0_gart_enable(struct amdgpu_device *adev)
+{
+ int r;
+ bool value;
+ u32 tmp;
+
+ amdgpu_program_register_sequence(adev,
+ golden_settings_vega10_hdp,
+ (const u32)ARRAY_SIZE(golden_settings_vega10_hdp));
+
+ if (adev->gart.robj == NULL) {
+ dev_err(adev->dev, "No VRAM object for PCIE GART.\n");
+ return -EINVAL;
+ }
+ r = amdgpu_gart_table_vram_pin(adev);
+ if (r)
+ return r;
+
+ /* After HDP is initialized, flush HDP.*/
+ nbio_v6_1_hdp_flush(adev);
+
+ r = gfxhub_v1_0_gart_enable(adev);
+ if (r)
+ return r;
+
+ r = mmhub_v1_0_gart_enable(adev);
+ if (r)
+ return r;
+
+ tmp = RREG32(SOC15_REG_OFFSET(HDP, 0, mmHDP_MISC_CNTL));
+ tmp |= HDP_MISC_CNTL__FLUSH_INVALIDATE_CACHE_MASK;
+ WREG32(SOC15_REG_OFFSET(HDP, 0, mmHDP_MISC_CNTL), tmp);
+
+ tmp = RREG32(SOC15_REG_OFFSET(HDP, 0, mmHDP_HOST_PATH_CNTL));
+ WREG32(SOC15_REG_OFFSET(HDP, 0, mmHDP_HOST_PATH_CNTL), tmp);
+
+
+ if (amdgpu_vm_fault_stop == AMDGPU_VM_FAULT_STOP_ALWAYS)
+ value = false;
+ else
+ value = true;
+
+ gfxhub_v1_0_set_fault_enable_default(adev, value);
+ mmhub_v1_0_set_fault_enable_default(adev, value);
+
+ gmc_v9_0_gart_flush_gpu_tlb(adev, 0);
+
+ DRM_INFO("PCIE GART of %uM enabled (table at 0x%016llX).\n",
+ (unsigned)(adev->mc.gtt_size >> 20),
+ (unsigned long long)adev->gart.table_addr);
+ adev->gart.ready = true;
+ return 0;
+}
+
+static int gmc_v9_0_hw_init(void *handle)
+{
+ int r;
+ struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+
+ /* The sequence of these two function calls matters.*/
+ gmc_v9_0_init_golden_registers(adev);
+
+ r = gmc_v9_0_gart_enable(adev);
+
+ return r;
+}
+
+/**
+ * gmc_v9_0_gart_disable - gart disable
+ *
+ * @adev: amdgpu_device pointer
+ *
+ * This disables all VM page table.
+ */
+static void gmc_v9_0_gart_disable(struct amdgpu_device *adev)
+{
+ gfxhub_v1_0_gart_disable(adev);
+ mmhub_v1_0_gart_disable(adev);
+ amdgpu_gart_table_vram_unpin(adev);
+}
+
+static int gmc_v9_0_hw_fini(void *handle)
+{
+ struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+
+ amdgpu_irq_put(adev, &adev->mc.vm_fault, 0);
+ gmc_v9_0_gart_disable(adev);
+
+ return 0;
+}
+
+static int gmc_v9_0_suspend(void *handle)
+{
+ struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+
+ if (adev->vm_manager.enabled) {
+ gmc_v9_0_vm_fini(adev);
+ adev->vm_manager.enabled = false;
+ }
+ gmc_v9_0_hw_fini(adev);
+
+ return 0;
+}
+
+static int gmc_v9_0_resume(void *handle)
+{
+ int r;
+ struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+
+ r = gmc_v9_0_hw_init(adev);
+ if (r)
+ return r;
+
+ if (!adev->vm_manager.enabled) {
+ r = gmc_v9_0_vm_init(adev);
+ if (r) {
+ dev_err(adev->dev,
+ "vm manager initialization failed (%d).\n", r);
+ return r;
+ }
+ adev->vm_manager.enabled = true;
+ }
+
+ return r;
+}
+
+static bool gmc_v9_0_is_idle(void *handle)
+{
+ /* MC is always ready in GMC v9.*/
+ return true;
+}
+
+static int gmc_v9_0_wait_for_idle(void *handle)
+{
+ /* There is no need to wait for MC idle in GMC v9.*/
+ return 0;
+}
+
+static int gmc_v9_0_soft_reset(void *handle)
+{
+ /* XXX for emulation.*/
+ return 0;
+}
+
+static int gmc_v9_0_set_clockgating_state(void *handle,
+ enum amd_clockgating_state state)
+{
+ return 0;