summaryrefslogtreecommitdiff
path: root/drivers/gpu
diff options
context:
space:
mode:
Diffstat (limited to 'drivers/gpu')
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu.h1
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c5
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_device.c29
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c5
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_fb.c3
-rw-r--r--drivers/gpu/drm/amd/amdgpu/cik.c2
-rw-r--r--drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c24
-rw-r--r--drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c16
-rw-r--r--drivers/gpu/drm/amd/amdgpu/navi10_sdma_pkt_open.h16
-rw-r--r--drivers/gpu/drm/amd/amdgpu/nv.c4
-rw-r--r--drivers/gpu/drm/amd/amdgpu/sdma_v5_0.c14
-rw-r--r--drivers/gpu/drm/amd/amdgpu/soc15.c4
-rw-r--r--drivers/gpu/drm/amd/amdgpu/vi.c2
-rw-r--r--drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c225
-rw-r--r--drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_hdcp.c10
-rw-r--r--drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_mst_types.c15
-rw-r--r--drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_mst_types.h3
-rw-r--r--drivers/gpu/drm/amd/display/dc/core/dc.c5
-rw-r--r--drivers/gpu/drm/amd/display/dc/core/dc_link_dp.c54
-rw-r--r--drivers/gpu/drm/amd/display/dc/core/dc_stream.c40
-rw-r--r--drivers/gpu/drm/amd/display/dc/dce110/dce110_hw_sequencer.c1
-rw-r--r--drivers/gpu/drm/amd/display/dc/dcn10/dcn10_hw_sequencer.c81
-rw-r--r--drivers/gpu/drm/amd/display/dc/dcn10/dcn10_hw_sequencer.h6
-rw-r--r--drivers/gpu/drm/amd/display/dc/dcn10/dcn10_init.c2
-rw-r--r--drivers/gpu/drm/amd/display/dc/dcn10/dcn10_mpc.c15
-rw-r--r--drivers/gpu/drm/amd/display/dc/dcn10/dcn10_mpc.h20
-rw-r--r--drivers/gpu/drm/amd/display/dc/dcn10/dcn10_resource.c14
-rw-r--r--drivers/gpu/drm/amd/display/dc/dcn20/dcn20_hwseq.c3
-rw-r--r--drivers/gpu/drm/amd/display/dc/dcn20/dcn20_init.c2
-rw-r--r--drivers/gpu/drm/amd/display/dc/dcn20/dcn20_mpc.c1
-rw-r--r--drivers/gpu/drm/amd/display/dc/dcn20/dcn20_mpc.h3
-rw-r--r--drivers/gpu/drm/amd/display/dc/dcn20/dcn20_resource.c35
-rw-r--r--drivers/gpu/drm/amd/display/dc/dcn21/dcn21_init.c2
-rw-r--r--drivers/gpu/drm/amd/display/dc/dcn21/dcn21_resource.c77
-rw-r--r--drivers/gpu/drm/amd/display/dc/dml/Makefile2
-rw-r--r--drivers/gpu/drm/amd/display/dc/dml/dcn20/display_rq_dlg_calc_20.h1
-rw-r--r--drivers/gpu/drm/amd/display/dc/dml/dcn20/display_rq_dlg_calc_20v2.h1
-rw-r--r--drivers/gpu/drm/amd/display/dc/dml/dcn21/display_rq_dlg_calc_21.c8
-rw-r--r--drivers/gpu/drm/amd/display/dc/dml/dcn21/display_rq_dlg_calc_21.h2
-rw-r--r--drivers/gpu/drm/amd/display/dc/dml/display_mode_lib.h6
-rw-r--r--drivers/gpu/drm/amd/display/dc/dml/display_mode_vba.h2
-rw-r--r--drivers/gpu/drm/amd/display/dc/dml/display_rq_dlg_helpers.h1
-rw-r--r--drivers/gpu/drm/amd/display/dc/dml/dml1_display_rq_dlg_calc.h2
-rw-r--r--drivers/gpu/drm/amd/display/dc/dml/dml_common_defs.c43
-rw-r--r--drivers/gpu/drm/amd/display/dc/dml/dml_common_defs.h37
-rw-r--r--drivers/gpu/drm/amd/display/dc/dml/dml_inline_defs.h15
-rw-r--r--drivers/gpu/drm/amd/display/dc/inc/hw/mpc.h16
-rw-r--r--drivers/gpu/drm/amd/display/dc/inc/hw_sequencer.h6
-rw-r--r--drivers/gpu/drm/amd/display/dc/os_types.h2
-rw-r--r--drivers/gpu/drm/amd/powerplay/amd_powerplay.c15
-rw-r--r--drivers/gpu/drm/amd/powerplay/amdgpu_smu.c8
-rw-r--r--drivers/gpu/drm/amd/powerplay/hwmgr/processpptables.c26
-rw-r--r--drivers/gpu/drm/amd/powerplay/hwmgr/smu7_hwmgr.c5
-rw-r--r--drivers/gpu/drm/amd/powerplay/renoir_ppt.c7
-rw-r--r--drivers/gpu/drm/amd/powerplay/smu_v11_0.c6
-rw-r--r--drivers/gpu/drm/drm_edid.c3
-rw-r--r--drivers/gpu/drm/etnaviv/etnaviv_gem_submit.c4
-rw-r--r--drivers/gpu/drm/etnaviv/etnaviv_perfmon.c2
-rw-r--r--drivers/gpu/drm/i915/display/intel_ddi.c14
-rw-r--r--drivers/gpu/drm/i915/display/intel_display_power.c12
-rw-r--r--drivers/gpu/drm/i915/display/intel_dp.c5
-rw-r--r--drivers/gpu/drm/i915/display/intel_dp_aux_backlight.c1
-rw-r--r--drivers/gpu/drm/i915/display/intel_fbc.c3
-rw-r--r--drivers/gpu/drm/i915/display/intel_hdmi.c3
-rw-r--r--drivers/gpu/drm/i915/display/intel_sprite.c17
-rw-r--r--drivers/gpu/drm/i915/gem/i915_gem_domain.c7
-rw-r--r--drivers/gpu/drm/i915/gem/i915_gem_tiling.c20
-rw-r--r--drivers/gpu/drm/i915/gem/selftests/huge_pages.c12
-rw-r--r--drivers/gpu/drm/i915/gt/intel_context_types.h8
-rw-r--r--drivers/gpu/drm/i915/gt/intel_engine.h9
-rw-r--r--drivers/gpu/drm/i915/gt/intel_engine_cs.c6
-rw-r--r--drivers/gpu/drm/i915/gt/intel_engine_types.h35
-rw-r--r--drivers/gpu/drm/i915/gt/intel_gt_irq.c15
-rw-r--r--drivers/gpu/drm/i915/gt/intel_lrc.c149
-rw-r--r--drivers/gpu/drm/i915/gt/intel_rps.c9
-rw-r--r--drivers/gpu/drm/i915/gt/intel_timeline.c2
-rw-r--r--drivers/gpu/drm/i915/gt/selftest_lrc.c152
-rw-r--r--drivers/gpu/drm/i915/gt/selftest_workarounds.c2
-rw-r--r--drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c2
-rw-r--r--drivers/gpu/drm/i915/gvt/display.c49
-rw-r--r--drivers/gpu/drm/i915/gvt/kvmgt.c46
-rw-r--r--drivers/gpu/drm/i915/gvt/scheduler.c10
-rw-r--r--drivers/gpu/drm/i915/i915_drv.h2
-rw-r--r--drivers/gpu/drm/i915/i915_gem_evict.c26
-rw-r--r--drivers/gpu/drm/i915/i915_gpu_error.c12
-rw-r--r--drivers/gpu/drm/i915/i915_irq.c22
-rw-r--r--drivers/gpu/drm/i915/i915_perf.c71
-rw-r--r--drivers/gpu/drm/i915/i915_pmu.c84
-rw-r--r--drivers/gpu/drm/i915/i915_priolist_types.h2
-rw-r--r--drivers/gpu/drm/i915/i915_reg.h5
-rw-r--r--drivers/gpu/drm/i915/i915_request.c12
-rw-r--r--drivers/gpu/drm/i915/i915_scheduler.c6
-rw-r--r--drivers/gpu/drm/i915/i915_scheduler.h3
-rw-r--r--drivers/gpu/drm/i915/i915_scheduler_types.h1
-rw-r--r--drivers/gpu/drm/i915/i915_vma.c35
-rw-r--r--drivers/gpu/drm/i915/intel_pm.c2
-rw-r--r--drivers/gpu/drm/i915/selftests/i915_vma.c2
-rw-r--r--drivers/gpu/drm/nouveau/nvkm/engine/sec2/gp108.c3
-rw-r--r--drivers/gpu/drm/nouveau/nvkm/engine/sec2/tu102.c16
-rw-r--r--drivers/gpu/drm/tegra/drm.c3
-rw-r--r--drivers/gpu/drm/virtio/virtgpu_ioctl.c1
-rw-r--r--drivers/gpu/drm/virtio/virtgpu_kms.c1
-rw-r--r--drivers/gpu/drm/vmwgfx/vmwgfx_drv.h2
-rw-r--r--drivers/gpu/drm/vmwgfx/vmwgfx_fence.c2
-rw-r--r--drivers/gpu/drm/vmwgfx/vmwgfx_surface.c2
-rw-r--r--drivers/gpu/host1x/dev.c59
106 files changed, 1227 insertions, 694 deletions
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
index 2992a49ad4a5..8ac1581a6b53 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
@@ -945,6 +945,7 @@ struct amdgpu_device {
/* s3/s4 mask */
bool in_suspend;
+ bool in_hibernate;
/* record last mm index being written through WREG32*/
unsigned long last_mm_index;
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c
index 9dff792c9290..6a5b91d23fd9 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c
@@ -1343,7 +1343,7 @@ int amdgpu_amdkfd_gpuvm_free_memory_of_gpu(
}
/* Free the BO*/
- amdgpu_bo_unref(&mem->bo);
+ drm_gem_object_put_unlocked(&mem->bo->tbo.base);
mutex_destroy(&mem->lock);
kfree(mem);
@@ -1688,7 +1688,8 @@ int amdgpu_amdkfd_gpuvm_import_dmabuf(struct kgd_dev *kgd,
| KFD_IOC_ALLOC_MEM_FLAGS_WRITABLE
| KFD_IOC_ALLOC_MEM_FLAGS_EXECUTABLE;
- (*mem)->bo = amdgpu_bo_ref(bo);
+ drm_gem_object_get(&bo->tbo.base);
+ (*mem)->bo = bo;
(*mem)->va = va;
(*mem)->domain = (bo->preferred_domains & AMDGPU_GEM_DOMAIN_VRAM) ?
AMDGPU_GEM_DOMAIN_VRAM : AMDGPU_GEM_DOMAIN_GTT;
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
index 559dc24ef436..affde2de2a0d 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
@@ -2008,8 +2008,24 @@ static void amdgpu_device_fill_reset_magic(struct amdgpu_device *adev)
*/
static bool amdgpu_device_check_vram_lost(struct amdgpu_device *adev)
{
- return !!memcmp(adev->gart.ptr, adev->reset_magic,
- AMDGPU_RESET_MAGIC_NUM);
+ if (memcmp(adev->gart.ptr, adev->reset_magic,
+ AMDGPU_RESET_MAGIC_NUM))
+ return true;
+
+ if (!adev->in_gpu_reset)
+ return false;
+
+ /*
+ * For all ASICs with baco/mode1 reset, the VRAM is
+ * always assumed to be lost.
+ */
+ switch (amdgpu_asic_reset_method(adev)) {
+ case AMD_RESET_METHOD_BACO:
+ case AMD_RESET_METHOD_MODE1:
+ return true;
+ default:
+ return false;
+ }
}
/**
@@ -2340,6 +2356,8 @@ static int amdgpu_device_ip_suspend_phase1(struct amdgpu_device *adev)
{
int i, r;
+ amdgpu_device_set_pg_state(adev, AMD_PG_STATE_UNGATE);
+ amdgpu_device_set_cg_state(adev, AMD_CG_STATE_UNGATE);
for (i = adev->num_ip_blocks - 1; i >= 0; i--) {
if (!adev->ip_blocks[i].status.valid)
@@ -3354,15 +3372,12 @@ int amdgpu_device_suspend(struct drm_device *dev, bool fbcon)
}
}
- amdgpu_device_set_pg_state(adev, AMD_PG_STATE_UNGATE);
- amdgpu_device_set_cg_state(adev, AMD_CG_STATE_UNGATE);
-
- amdgpu_amdkfd_suspend(adev, !fbcon);
-
amdgpu_ras_suspend(adev);
r = amdgpu_device_ip_suspend_phase1(adev);
+ amdgpu_amdkfd_suspend(adev, !fbcon);
+
/* evict vram memory */
amdgpu_bo_evict_vram(adev);
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
index 8ea86ffdea0d..a735d79a717b 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
@@ -85,9 +85,10 @@
* - 3.34.0 - Non-DC can flip correctly between buffers with different pitches
* - 3.35.0 - Add drm_amdgpu_info_device::tcc_disabled_mask
* - 3.36.0 - Allow reading more status registers on si/cik
+ * - 3.37.0 - L2 is invalidated before SDMA IBs, needed for correctness
*/
#define KMS_DRIVER_MAJOR 3
-#define KMS_DRIVER_MINOR 36
+#define KMS_DRIVER_MINOR 37
#define KMS_DRIVER_PATCHLEVEL 0
int amdgpu_vram_limit = 0;
@@ -1180,7 +1181,9 @@ static int amdgpu_pmops_freeze(struct device *dev)
struct amdgpu_device *adev = drm_dev->dev_private;
int r;
+ adev->in_hibernate = true;
r = amdgpu_device_suspend(drm_dev, true);
+ adev->in_hibernate = false;
if (r)
return r;
return amdgpu_asic_reset(adev);
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_fb.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_fb.c
index 9ae7b61f696a..25ddb482466a 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_fb.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_fb.c
@@ -133,8 +133,7 @@ static int amdgpufb_create_pinned_object(struct amdgpu_fbdev *rfbdev,
u32 cpp;
u64 flags = AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED |
AMDGPU_GEM_CREATE_VRAM_CONTIGUOUS |
- AMDGPU_GEM_CREATE_VRAM_CLEARED |
- AMDGPU_GEM_CREATE_CPU_GTT_USWC;
+ AMDGPU_GEM_CREATE_VRAM_CLEARED;
info = drm_get_format_info(adev->ddev, mode_cmd);
cpp = info->cpp[0];
diff --git a/drivers/gpu/drm/amd/amdgpu/cik.c b/drivers/gpu/drm/amd/amdgpu/cik.c
index 006f21ef7ddf..62635e58e45e 100644
--- a/drivers/gpu/drm/amd/amdgpu/cik.c
+++ b/drivers/gpu/drm/amd/amdgpu/cik.c
@@ -1358,8 +1358,6 @@ static int cik_asic_reset(struct amdgpu_device *adev)
int r;
if (cik_asic_reset_method(adev) == AMD_RESET_METHOD_BACO) {
- if (!adev->in_suspend)
- amdgpu_inc_vram_lost(adev);
r = amdgpu_dpm_baco_reset(adev);
} else {
r = cik_asic_pci_config_reset(adev);
diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c
index d78059fd2c72..0e0daf0021b6 100644
--- a/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c
@@ -279,7 +279,7 @@ static const struct soc15_reg_golden golden_settings_gc_10_1_2_nv12[] =
#define DEFAULT_SH_MEM_CONFIG \
((SH_MEM_ADDRESS_MODE_64 << SH_MEM_CONFIG__ADDRESS_MODE__SHIFT) | \
- (SH_MEM_ALIGNMENT_MODE_DWORD << SH_MEM_CONFIG__ALIGNMENT_MODE__SHIFT) | \
+ (SH_MEM_ALIGNMENT_MODE_UNALIGNED << SH_MEM_CONFIG__ALIGNMENT_MODE__SHIFT) | \
(SH_MEM_RETRY_MODE_ALL << SH_MEM_CONFIG__RETRY_MODE__SHIFT) | \
(3 << SH_MEM_CONFIG__INITIAL_INST_PREFETCH__SHIFT))
@@ -4273,7 +4273,7 @@ static int gfx_v10_0_update_gfx_clock_gating(struct amdgpu_device *adev,
/* === CGCG /CGLS for GFX 3D Only === */
gfx_v10_0_update_3d_clock_gating(adev, enable);
/* === MGCG + MGLS === */
- /* gfx_v10_0_update_medium_grain_clock_gating(adev, enable); */
+ gfx_v10_0_update_medium_grain_clock_gating(adev, enable);
}
if (adev->cg_flags &
@@ -4353,11 +4353,7 @@ static int gfx_v10_0_set_powergating_state(void *handle,
switch (adev->asic_type) {
case CHIP_NAVI10:
case CHIP_NAVI14:
- if (!enable) {
- amdgpu_gfx_off_ctrl(adev, false);
- cancel_delayed_work_sync(&adev->gfx.gfx_off_delay_work);
- } else
- amdgpu_gfx_off_ctrl(adev, true);
+ amdgpu_gfx_off_ctrl(adev, enable);
break;
default:
break;
@@ -4918,6 +4914,19 @@ static void gfx_v10_0_ring_emit_reg_write_reg_wait(struct amdgpu_ring *ring,
ref, mask);
}
+static void gfx_v10_0_ring_soft_recovery(struct amdgpu_ring *ring,
+ unsigned vmid)
+{
+ struct amdgpu_device *adev = ring->adev;
+ uint32_t value = 0;
+
+ value = REG_SET_FIELD(value, SQ_CMD, CMD, 0x03);
+ value = REG_SET_FIELD(value, SQ_CMD, MODE, 0x01);
+ value = REG_SET_FIELD(value, SQ_CMD, CHECK_VMID, 1);
+ value = REG_SET_FIELD(value, SQ_CMD, VM_ID, vmid);
+ WREG32_SOC15(GC, 0, mmSQ_CMD, value);
+}
+
static void
gfx_v10_0_set_gfx_eop_interrupt_state(struct amdgpu_device *adev,
uint32_t me, uint32_t pipe,
@@ -5309,6 +5318,7 @@ static const struct amdgpu_ring_funcs gfx_v10_0_ring_funcs_gfx = {
.emit_wreg = gfx_v10_0_ring_emit_wreg,
.emit_reg_wait = gfx_v10_0_ring_emit_reg_wait,
.emit_reg_write_reg_wait = gfx_v10_0_ring_emit_reg_write_reg_wait,
+ .soft_recovery = gfx_v10_0_ring_soft_recovery,
};
static const struct amdgpu_ring_funcs gfx_v10_0_ring_funcs_compute = {
diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c
index e6b113ed2f40..d2d9dce68c2f 100644
--- a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c
@@ -1234,6 +1234,10 @@ struct amdgpu_gfxoff_quirk {
static const struct amdgpu_gfxoff_quirk amdgpu_gfxoff_quirk_list[] = {
/* https://bugzilla.kernel.org/show_bug.cgi?id=204689 */
{ 0x1002, 0x15dd, 0x1002, 0x15dd, 0xc8 },
+ /* https://bugzilla.kernel.org/show_bug.cgi?id=207171 */
+ { 0x1002, 0x15dd, 0x103c, 0x83e7, 0xd3 },
+ /* GFXOFF is unstable on C6 parts with a VBIOS 113-RAVEN-114 */
+ { 0x1002, 0x15dd, 0x1002, 0x15dd, 0xc6 },
{ 0, 0, 0, 0, 0 },
};
@@ -5023,10 +5027,9 @@ static int gfx_v9_0_set_powergating_state(void *handle,
switch (adev->asic_type) {
case CHIP_RAVEN:
case CHIP_RENOIR:
- if (!enable) {
+ if (!enable)
amdgpu_gfx_off_ctrl(adev, false);
- cancel_delayed_work_sync(&adev->gfx.gfx_off_delay_work);
- }
+
if (adev->pg_flags & AMD_PG_SUPPORT_RLC_SMU_HS) {
gfx_v9_0_enable_sck_slow_down_on_power_up(adev, true);
gfx_v9_0_enable_sck_slow_down_on_power_down(adev, true);
@@ -5050,12 +5053,7 @@ static int gfx_v9_0_set_powergating_state(void *handle,
amdgpu_gfx_off_ctrl(adev, true);
break;
case CHIP_VEGA12:
- if (!enable) {
- amdgpu_gfx_off_ctrl(adev, false);
- cancel_delayed_work_sync(&adev->gfx.gfx_off_delay_work);
- } else {
- amdgpu_gfx_off_ctrl(adev, true);
- }
+ amdgpu_gfx_off_ctrl(adev, enable);
break;
default:
break;
diff --git a/drivers/gpu/drm/amd/amdgpu/navi10_sdma_pkt_open.h b/drivers/gpu/drm/amd/amdgpu/navi10_sdma_pkt_open.h
index 074a9a09c0a7..a5b60c9a2418 100644
--- a/drivers/gpu/drm/amd/amdgpu/navi10_sdma_pkt_open.h
+++ b/drivers/gpu/drm/amd/amdgpu/navi10_sdma_pkt_open.h
@@ -73,6 +73,22 @@
#define SDMA_OP_AQL_COPY 0
#define SDMA_OP_AQL_BARRIER_OR 0
+#define SDMA_GCR_RANGE_IS_PA (1 << 18)
+#define SDMA_GCR_SEQ(x) (((x) & 0x3) << 16)
+#define SDMA_GCR_GL2_WB (1 << 15)
+#define SDMA_GCR_GL2_INV (1 << 14)
+#define SDMA_GCR_GL2_DISCARD (1 << 13)
+#define SDMA_GCR_GL2_RANGE(x) (((x) & 0x3) << 11)
+#define SDMA_GCR_GL2_US (1 << 10)
+#define SDMA_GCR_GL1_INV (1 << 9)
+#define SDMA_GCR_GLV_INV (1 << 8)
+#define SDMA_GCR_GLK_INV (1 << 7)
+#define SDMA_GCR_GLK_WB (1 << 6)
+#define SDMA_GCR_GLM_INV (1 << 5)
+#define SDMA_GCR_GLM_WB (1 << 4)
+#define SDMA_GCR_GL1_RANGE(x) (((x) & 0x3) << 2)
+#define SDMA_GCR_GLI_INV(x) (((x) & 0x3) << 0)
+
/*define for op field*/
#define SDMA_PKT_HEADER_op_offset 0
#define SDMA_PKT_HEADER_op_mask 0x000000FF
diff --git a/drivers/gpu/drm/amd/amdgpu/nv.c b/drivers/gpu/drm/amd/amdgpu/nv.c
index 033cbbca2072..52318b03c424 100644
--- a/drivers/gpu/drm/amd/amdgpu/nv.c
+++ b/drivers/gpu/drm/amd/amdgpu/nv.c
@@ -351,8 +351,6 @@ static int nv_asic_reset(struct amdgpu_device *adev)
struct smu_context *smu = &adev->smu;
if (nv_asic_reset_method(adev) == AMD_RESET_METHOD_BACO) {
- if (!adev->in_suspend)
- amdgpu_inc_vram_lost(adev);
ret = smu_baco_enter(smu);
if (ret)
return ret;
@@ -360,8 +358,6 @@ static int nv_asic_reset(struct amdgpu_device *adev)
if (ret)
return ret;
} else {
- if (!adev->in_suspend)
- amdgpu_inc_vram_lost(adev);
ret = nv_asic_mode1_reset(adev);
}
diff --git a/drivers/gpu/drm/amd/amdgpu/sdma_v5_0.c b/drivers/gpu/drm/amd/amdgpu/sdma_v5_0.c
index ebfd2cdf4e65..d2840c2f6286 100644
--- a/drivers/gpu/drm/amd/amdgpu/sdma_v5_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/sdma_v5_0.c
@@ -382,6 +382,18 @@ static void sdma_v5_0_ring_emit_ib(struct amdgpu_ring *ring,
unsigned vmid = AMDGPU_JOB_GET_VMID(job);
uint64_t csa_mc_addr = amdgpu_sdma_get_csa_mc_addr(ring, vmid);
+ /* Invalidate L2, because if we don't do it, we might get stale cache
+ * lines from previous IBs.
+ */
+ amdgpu_ring_write(ring, SDMA_PKT_HEADER_OP(SDMA_OP_GCR_REQ));
+ amdgpu_ring_write(ring, 0);
+ amdgpu_ring_write(ring, (SDMA_GCR_GL2_INV |
+ SDMA_GCR_GL2_WB |
+ SDMA_GCR_GLM_INV |
+ SDMA_GCR_GLM_WB) << 16);
+ amdgpu_ring_write(ring, 0xffffff80);
+ amdgpu_ring_write(ring, 0xffff);
+
/* An IB packet must end on a 8 DW boundary--the next dword
* must be on a 8-dword boundary. Our IB packet below is 6
* dwords long, thus add x number of NOPs, such that, in
@@ -1595,7 +1607,7 @@ static const struct amdgpu_ring_funcs sdma_v5_0_ring_funcs = {
SOC15_FLUSH_GPU_TLB_NUM_WREG * 3 +
SOC15_FLUSH_GPU_TLB_NUM_REG_WAIT * 6 * 2 +
10 + 10 + 10, /* sdma_v5_0_ring_emit_fence x3 for user fence, vm fence */
- .emit_ib_size = 7 + 6, /* sdma_v5_0_ring_emit_ib */
+ .emit_ib_size = 5 + 7 + 6, /* sdma_v5_0_ring_emit_ib */
.emit_ib = sdma_v5_0_ring_emit_ib,
.emit_fence = sdma_v5_0_ring_emit_fence,
.emit_pipeline_sync = sdma_v5_0_ring_emit_pipeline_sync,
diff --git a/drivers/gpu/drm/amd/amdgpu/soc15.c b/drivers/gpu/drm/amd/amdgpu/soc15.c
index a40499d51c93..d42a8d8a0dea 100644
--- a/drivers/gpu/drm/amd/amdgpu/soc15.c
+++ b/drivers/gpu/drm/amd/amdgpu/soc15.c
@@ -569,14 +569,10 @@ static int soc15_asic_reset(struct amdgpu_device *adev)
switch (soc15_asic_reset_method(adev)) {
case AMD_RESET_METHOD_BACO:
- if (!adev->in_suspend)
- amdgpu_inc_vram_lost(adev);
return soc15_asic_baco_reset(adev);
case AMD_RESET_METHOD_MODE2:
return amdgpu_dpm_mode2_reset(adev);
default:
- if (!adev->in_suspend)
- amdgpu_inc_vram_lost(adev);
return soc15_asic_mode1_reset(adev);
}
}
diff --git a/drivers/gpu/drm/amd/amdgpu/vi.c b/drivers/gpu/drm/amd/amdgpu/vi.c
index 78b35901643b..3ce10e05d0d6 100644
--- a/drivers/gpu/drm/amd/amdgpu/vi.c
+++ b/drivers/gpu/drm/amd/amdgpu/vi.c
@@ -765,8 +765,6 @@ static int vi_asic_reset(struct amdgpu_device *adev)
int r;
if (vi_asic_reset_method(adev) == AMD_RESET_METHOD_BACO) {
- if (!adev->in_suspend)
- amdgpu_inc_vram_lost(adev);
r = amdgpu_dpm_baco_reset(adev);
} else {
r = vi_asic_pci_config_reset(adev);
diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c
index f7c5cdc10a70..7fc15b82fe48 100644
--- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c
+++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c
@@ -441,7 +441,7 @@ static void dm_vupdate_high_irq(void *interrupt_params)
/**
* dm_crtc_high_irq() - Handles CRTC interrupt
- * @interrupt_params: ignored
+ * @interrupt_params: used for determining the CRTC instance
*
* Handles the CRTC/VSYNC interrupt by notfying DRM's VBLANK
* event handler.
@@ -455,70 +455,6 @@ static void dm_crtc_high_irq(void *interrupt_params)
unsigned long flags;
acrtc = get_crtc_by_otg_inst(adev, irq_params->irq_src - IRQ_TYPE_VBLANK);
-
- if (acrtc) {
- acrtc_state = to_dm_crtc_state(acrtc->base.state);
-
- DRM_DEBUG_VBL("crtc:%d, vupdate-vrr:%d\n",
- acrtc->crtc_id,
- amdgpu_dm_vrr_active(acrtc_state));
-
- /* Core vblank handling at start of front-porch is only possible
- * in non-vrr mode, as only there vblank timestamping will give
- * valid results while done in front-porch. Otherwise defer it
- * to dm_vupdate_high_irq after end of front-porch.
- */
- if (!amdgpu_dm_vrr_active(acrtc_state))
- drm_crtc_handle_vblank(&acrtc->base);
-
- /* Following stuff must happen at start of vblank, for crc
- * computation and below-the-range btr support in vrr mode.
- */
- amdgpu_dm_crtc_handle_crc_irq(&acrtc->base);
-
- if (acrtc_state->stream && adev->family >= AMDGPU_FAMILY_AI &&
- acrtc_state->vrr_params.supported &&
- acrtc_state->freesync_config.state == VRR_STATE_ACTIVE_VARIABLE) {
- spin_lock_irqsave(&adev->ddev->event_lock, flags);
- mod_freesync_handle_v_update(
- adev->dm.freesync_module,
- acrtc_state->stream,
- &acrtc_state->vrr_params);
-
- dc_stream_adjust_vmin_vmax(
- adev->dm.dc,
- acrtc_state->stream,
- &acrtc_state->vrr_params.adjust);
- spin_unlock_irqrestore(&adev->ddev->event_lock, flags);
- }
- }
-}
-
-#if defined(CONFIG_DRM_AMD_DC_DCN)
-/**
- * dm_dcn_crtc_high_irq() - Handles VStartup interrupt for DCN generation ASICs
- * @interrupt params - interrupt parameters
- *
- * Notify DRM's vblank event handler at VSTARTUP
- *
- * Unlike DCE hardware, we trigger the handler at VSTARTUP. at which:
- * * We are close enough to VUPDATE - the point of no return for hw
- * * We are in the fixed portion of variable front porch when vrr is enabled
- * * We are before VUPDATE, where double-buffered vrr registers are swapped
- *
- * It is therefore the correct place to signal vblank, send user flip events,
- * and update VRR.
- */
-static void dm_dcn_crtc_high_irq(void *interrupt_params)
-{
- struct common_irq_params *irq_params = interrupt_params;
- struct amdgpu_device *adev = irq_params->adev;
- struct amdgpu_crtc *acrtc;
- struct dm_crtc_state *acrtc_state;
- unsigned long flags;
-
- acrtc = get_crtc_by_otg_inst(adev, irq_params->irq_src - IRQ_TYPE_VBLANK);
-
if (!acrtc)
return;
@@ -528,22 +464,35 @@ static void dm_dcn_crtc_high_irq(void *interrupt_params)
amdgpu_dm_vrr_active(acrtc_state),
acrtc_state->active_planes);
+ /**
+ * Core vblank handling at start of front-porch is only possible
+ * in non-vrr mode, as only there vblank timestamping will give
+ * valid results while done in front-porch. Otherwise defer it
+ * to dm_vupdate_high_irq after end of front-porch.
+ */
+ if (!amdgpu_dm_vrr_active(acrtc_state))
+ drm_crtc_handle_vblank(&acrtc->base);
+
+ /**
+ * Following stuff must happen at start of vblank, for crc
+ * computation and below-the-range btr support in vrr mode.
+ */
amdgpu_dm_crtc_handle_crc_irq(&acrtc->base);
- drm_crtc_handle_vblank(&acrtc->base);
+
+ /* BTR updates need to happen before VUPDATE on Vega and above. */
+ if (adev->family < AMDGPU_FAMILY_AI)
+ return;
spin_lock_irqsave(&adev->ddev->event_lock, flags);
- if (acrtc_state->vrr_params.supported &&
+ if (acrtc_state->stream && acrtc_state->vrr_params.supported &&
acrtc_state->freesync_config.state == VRR_STATE_ACTIVE_VARIABLE) {
- mod_freesync_handle_v_update(
- adev->dm.freesync_module,
- acrtc_state->stream,
- &acrtc_state->vrr_params);
+ mod_freesync_handle_v_update(adev->dm.freesync_module,
+ acrtc_state->stream,
+ &acrtc_state->vrr_params);
- dc_stream_adjust_vmin_vmax(
- adev->dm.dc,
- acrtc_state->stream,
- &acrtc_state->vrr_params.adjust);
+ dc_stream_adjust_vmin_vmax(adev->dm.dc, acrtc_state->stream,
+ &acrtc_state->vrr_params.adjust);
}
/*
@@ -556,7 +505,8 @@ static void dm_dcn_crtc_high_irq(void *interrupt_params)
* avoid race conditions between flip programming and completion,
* which could cause too early flip completion events.
*/
- if (acrtc->pflip_status == AMDGPU_FLIP_SUBMITTED &&
+ if (adev->family >= AMDGPU_FAMILY_RV &&
+ acrtc->pflip_status == AMDGPU_FLIP_SUBMITTED &&
acrtc_state->active_planes == 0) {
if (acrtc->event) {
drm_crtc_send_vblank_event(&acrtc->base, acrtc->event);
@@ -568,7 +518,6 @@ static void dm_dcn_crtc_high_irq(void *interrupt_params)
spin_unlock_irqrestore(&adev->ddev->event_lock, flags);
}
-#endif
static int dm_set_clockgating_state(void *handle,
enum amd_clockgating_state state)
@@ -2008,17 +1957,22 @@ void amdgpu_dm_update_connector_after_detect(
dc_sink_retain(aconnector->dc_sink);
if (sink->dc_edid.length == 0) {
aconnector->edid = NULL;
- drm_dp_cec_unset_edid(&aconnector->dm_dp_aux.aux);
+ if (aconnector->dc_link->aux_mode) {
+ drm_dp_cec_unset_edid(
+ &aconnector->dm_dp_aux.aux);
+ }
} else {
aconnector->edid =
- (struct edid *) sink->dc_edid.raw_edid;
-
+ (struct edid *)sink->dc_edid.raw_edid;
drm_connector_update_edid_property(connector,
- aconnector->edid);
- drm_dp_cec_set_edid(&aconnector->dm_dp_aux.aux,
- aconnector->edid);
+ aconnector->edid);
+
+ if (aconnector->dc_link->aux_mode)
+ drm_dp_cec_set_edid(&aconnector->dm_dp_aux.aux,
+ aconnector->edid);
}
+
amdgpu_dm_update_freesync_caps(connector, aconnector->edid);
update_connector_ext_caps(aconnector);
} else {
@@ -2440,8 +2394,36 @@ static int dcn10_register_irq_handlers(struct amdgpu_device *adev)
c_irq_params->adev = adev;
c_irq_params->irq_src = int_params.irq_source;
+ amdgpu_dm_irq_register_interrupt(
+ adev, &int_params, dm_crtc_high_irq, c_irq_params);
+ }
+
+ /* Use VUPDATE_NO_LOCK interrupt on DCN, which seems to correspond to
+ * the regular VUPDATE interrupt on DCE. We want DC_IRQ_SOURCE_VUPDATEx
+ * to trigger at end of each vblank, regardless of state of the lock,
+ * matching DCE behaviour.
+ */
+ for (i = DCN_1_0__SRCID__OTG0_IHC_V_UPDATE_NO_LOCK_INTERRUPT;
+ i <= DCN_1_0__SRCID__OTG0_IHC_V_UPDATE_NO_LOCK_INTERRUPT + adev->mode_info.num_crtc - 1;
+ i++) {
+ r = amdgpu_irq_add_id(adev, SOC15_IH_CLIENTID_DCE, i, &adev->vupdate_irq);
+
+ if (r) {
+ DRM_ERROR("Failed to add vupdate irq id!\n");
+ return r;
+ }
+
+ int_params.int_context = INTERRUPT_HIGH_IRQ_CONTEXT;
+ int_params.irq_source =
+ dc_interrupt_to_irq_source(dc, i, 0);
+
+ c_irq_params = &adev->dm.vupdate_params[int_params.irq_source - DC_IRQ_SOURCE_VUPDATE1];
+
+ c_irq_params->adev = adev;
+ c_irq_params->irq_src = int_params.irq_source;
+
amdgpu_dm_irq_register_interrupt(adev, &int_params,
- dm_dcn_crtc_high_irq, c_irq_params);
+ dm_vupdate_high_irq, c_irq_params);
}
/* Use GRPH_PFLIP interrupt */
@@ -3340,7 +3322,8 @@ fill_plane_dcc_attributes(struct amdgpu_device *adev,
const union dc_tiling_info *tiling_info,
const uint64_t info,
struct dc_plane_dcc_param *dcc,
- struct dc_plane_address *address)
+ struct dc_plane_address *address,
+ bool force_disable_dcc)
{
struct dc *dc = adev->dm.dc;
struct dc_dcc_surface_param input;
@@ -3352,6 +3335,9 @@ fill_plane_dcc_attributes(struct amdgpu_device *adev,
memset(&input, 0, sizeof(input));
memset(&output, 0, sizeof(output));
+ if (force_disable_dcc)
+ return 0;
+
if (!offset)
return 0;
@@ -3401,7 +3387,8 @@ fill_plane_buffer_attributes(struct amdgpu_device *adev,
union dc_tiling_info *tiling_info,
struct plane_size *plane_size,
struct dc_plane_dcc_param *dcc,
- struct dc_plane_address *address)
+ struct dc_plane_address *address,
+ bool force_disable_dcc)
{
const struct drm_framebuffer *fb = &afb->base;
int ret;
@@ -3507,7 +3494,8 @@ fill_plane_buffer_attributes(struct amdgpu_device *adev,
ret = fill_plane_dcc_attributes(adev, afb, format, rotation,
plane_size, tiling_info,
- tiling_flags, dcc, address);
+ tiling_flags, dcc, address,
+ force_disable_dcc);
if (ret)
return ret;
}
@@ -3599,7 +3587,8 @@ fill_dc_plane_info_and_addr(struct amdgpu_device *adev,
const struct drm_plane_state *plane_state,
const uint64_t tiling_flags,
struct dc_plane_info *plane_info,
- struct dc_plane_address *address)
+ struct dc_plane_address *address,
+ bool force_disable_dcc)
{
const struct drm_framebuffer *fb = plane_state->fb;
const struct amdgpu_framebuffer *afb =
@@ -3681,7 +3670,8 @@ fill_dc_plane_info_and_addr(struct amdgpu_device *adev,
plane_info->rotation, tiling_flags,
&plane_info->tiling_info,
&plane_info->plane_size,
- &plane_info->dcc, address);
+ &plane_info->dcc, address,
+ force_disable_dcc);
if (ret)
return ret;
@@ -3704,6 +3694,7 @@ static int fill_dc_plane_attributes(struct amdgpu_device *adev,
struct dc_plane_info plane_info;
uint64_t tiling_flags;
int ret;
+ bool force_disable_dcc = false;
ret = fill_dc_scaling_info(plane_state, &scaling_info);
if (ret)
@@ -3718,9 +3709,11 @@ static int fill_dc_plane_attributes(struct amdgpu_device *adev,
if (ret)
return ret;
+ force_disable_dcc = adev->asic_type == CHIP_RAVEN && adev->in_suspend;
ret = fill_dc_plane_info_and_addr(adev, plane_state, tiling_flags,
&plane_info,
- &dc_plane_state->address);
+ &dc_plane_state->address,
+ force_disable_dcc);
if (ret)
return ret;
@@ -4437,10 +4430,6 @@ static inline int dm_set_vupdate_irq(struct drm_crtc *crtc, bool enable)
struct amdgpu_device *adev = crtc->dev->dev_private;
int rc;
- /* Do not set vupdate for DCN hardware */
- if (adev->family > AMDGPU_FAMILY_AI)
- return 0;
-
irq_source = IRQ_TYPE_VUPDATE + acrtc->otg_inst;
rc = dc_interrupt_set(adev->dm.dc, irq_source, enable) ? 0 : -EBUSY;
@@ -4664,6 +4653,7 @@ static void amdgpu_dm_connector_destroy(struct drm_connector *connector)
i2c_del_adapter(&aconnector->i2c->base);
kfree(aconnector->i2c);
}
+ kfree(aconnector->dm_dp_aux.aux.name);
kfree(connector);
}
@@ -4723,10 +4713,19 @@ amdgpu_dm_connector_atomic_duplicate_state(struct drm_connector *connector)
static int
amdgpu_dm_connector_late_register(struct drm_connector *connector)
{
-#if defined(CONFIG_DEBUG_FS)
struct amdgpu_dm_connector *amdgpu_dm_connector =
to_amdgpu_dm_connector(connector);
+ int r;
+ if ((connector->connector_type == DRM_MODE_CONNECTOR_DisplayPort) ||
+ (connector->connector_type == DRM_MODE_CONNECTOR_eDP)) {
+ amdgpu_dm_connector->dm_dp_aux.aux.dev = connector->kdev;
+ r = drm_dp_aux_register(&amdgpu_dm_connector->dm_dp_aux.aux);
+ if (r)
+ return r;
+ }
+
+#if defined(CONFIG_DEBUG_FS)
connector_debugfs_init(amdgpu_dm_connector);
#endif
@@ -5332,6 +5331,7 @@ static int dm_plane_helper_prepare_fb(struct drm_plane *plane,
uint64_t tiling_flags;
uint32_t domain;
int r;
+ bool force_disable_dcc = false;
dm_plane_state_old = to_dm_plane_state(plane->state);
dm_plane_state_new = to_dm_plane_state(new_state);
@@ -5390,11 +5390,13 @@ static int dm_plane_helper_prepare_fb(struct drm_plane *plane,
dm_plane_state_old->dc_state != dm_plane_state_new->dc_state) {
struct dc_plane_state *plane_state = dm_plane_state_new->dc_state;
+ force_disable_dcc = adev->asic_type == CHIP_RAVEN && adev->in_suspend;
fill_plane_buffer_attributes(
adev, afb, plane_state->format, plane_state->rotation,
tiling_flags, &plane_state->tiling_info,
&plane_state->plane_size, &plane_state->dcc,
- &plane_state->address);
+ &plane_state->address,
+ force_disable_dcc);
}
return 0;
@@ -6092,7 +6094,7 @@ static int amdgpu_dm_connector_init(struct amdgpu_display_manager *dm,
if (connector_type == DRM_MODE_CONNECTOR_DisplayPort
|| connector_type == DRM_MODE_CONNECTOR_eDP)
- amdgpu_dm_initialize_dp_connector(dm, aconnector);
+ amdgpu_dm_initialize_dp_connector(dm, aconnector, link->link_index);
out_free:
if (res) {
@@ -6666,7 +6668,12 @@ static void amdgpu_dm_commit_planes(struct drm_atomic_state *state,
fill_dc_plane_info_and_addr(
dm->adev, new_plane_state, tiling_flags,
&bundle->plane_infos[planes_count],
- &bundle->flip_addrs[planes_count].address);
+ &bundle->flip_addrs[planes_count].address,
+ false);
+
+ DRM_DEBUG_DRIVER("plane: id=%d dcc_en=%d\n",
+ new_plane_state->plane->index,
+ bundle->plane_infos[planes_count].dcc.enable);
bundle->surface_updates[planes_count].plane_info =
&bundle->plane_infos[planes_count];
@@ -7848,6 +7855,7 @@ static int dm_update_plane_state(struct dc *dc,
struct drm_crtc_state *old_crtc_state, *new_crtc_state;
struct dm_crtc_state *dm_new_crtc_state, *dm_old_crtc_state;
struct dm_plane_state *dm_new_plane_state, *dm_old_plane_state;
+ struct amdgpu_crtc *new_acrtc;
bool needs_reset;
int ret = 0;
@@ -7857,9 +7865,23 @@ static int dm_update_plane_state(struct dc *dc,
dm_new_plane_state = to_dm_plane_state(new_plane_state);
dm_old_plane_state = to_dm_plane_state(old_plane_state);
- /*TODO Implement atomic check for cursor plane */
- if (plane->type == DRM_PLANE_TYPE_CURSOR)
+ /*TODO Implement better atomic check for cursor plane */
+ if (plane->type == DRM_PLANE_TYPE_CURSOR) {
+ if (!enable || !new_plane_crtc ||
+ drm_atomic_plane_disabling(plane->state, new_plane_state))
+ return 0;
+
+ new_acrtc = to_amdgpu_crtc(new_plane_crtc);
+
+ if ((new_plane_state->crtc_w > new_acrtc->max_cursor_width) ||
+ (new_plane_state->crtc_h > new_acrtc->max_cursor_height)) {
+ DRM_DEBUG_ATOMIC("Bad cursor size %d x %d\n",
+ new_plane_state->crtc_w, new_plane_state->crtc_h);
+ return -EINVAL;
+ }
+
return 0;
+ }
needs_reset = should_reset_plane(state, plane, old_plane_state,
new_plane_state);
@@ -8086,7 +8108,8 @@ dm_determine_update_type_for_commit(struct amdgpu_display_manager *dm,
ret = fill_dc_plane_info_and_addr(
dm->adev, new_plane_state, tiling_flags,
plane_info,
- &flip_addr->address);
+ &flip_addr->address,
+ false);
if (ret)
goto cleanup;
diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_hdcp.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_hdcp.c
index 78e1c11d4ae5..dcf84a61de37 100644
--- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_hdcp.c
+++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_hdcp.c
@@ -398,15 +398,15 @@ static void update_config(void *handle, struct cp_psp_stream_config *config)
struct mod_hdcp_display *display = &hdcp_work[link_index].display;
struct mod_hdcp_link *link = &hdcp_work[link_index].link;
- memset(display, 0, sizeof(*display));
- memset(link, 0, sizeof(*link));
-
- display->index = aconnector->base.index;
-
if (config->dpms_off) {
hdcp_remove_display(hdcp_work, link_index, aconnector);
return;
}
+
+ memset(display, 0, sizeof(*display));
+ memset(link, 0, sizeof(*link));
+
+ display->index = aconnector->base.index;
display->state = MOD_HDCP_DISPLAY_ACTIVE;
if (aconnector->dc_sink != NULL)
diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_mst_types.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_mst_types.c
index fabbe78d5aef..d2917759b7ab 100644
--- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_mst_types.c
+++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_mst_types.c
@@ -156,16 +156,16 @@ amdgpu_dm_mst_connector_late_register(struct drm_connector *connector)
to_amdgpu_dm_connector(connector);
int r;
- amdgpu_dm_connector->dm_dp_aux.aux.dev = connector->kdev;
- r = drm_dp_aux_register(&amdgpu_dm_connector->dm_dp_aux.aux);
- if (r)
+ r = drm_dp_mst_connector_late_register(connector,
+ amdgpu_dm_connector->port);
+ if (r < 0)
return r;
#if defined(CONFIG_DEBUG_FS)
connector_debugfs_init(amdgpu_dm_connector);
#endif
- return r;
+ return 0;
}
static void
@@ -472,9 +472,12 @@ static const struct drm_dp_mst_topology_cbs dm_mst_cbs = {
};
void amdgpu_dm_initialize_dp_connector(struct amdgpu_display_manager *dm,
- struct amdgpu_dm_connector *aconnector)
+ struct amdgpu_dm_connector *aconnector,
+ int link_index)
{
- aconnector->dm_dp_aux.aux.name = "dmdc";
+ aconnector->dm_dp_aux.aux.name =
+ kasprintf(GFP_KERNEL, "AMDGPU DM aux hw bus %d",
+ link_index);
aconnector->dm_dp_aux.aux.transfer = dm_dp_aux_transfer;
aconnector->dm_dp_aux.ddc_service = aconnector->dc_link->ddc;
diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_mst_types.h b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_mst_types.h
index d6813ce67bbd..d2c56579a2cc 100644
--- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_mst_types.h
+++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_mst_types.h
@@ -32,7 +32,8 @@ struct amdgpu_dm_connector;
int dm_mst_get_pbn_divider(struct dc_link *link);
void amdgpu_dm_initialize_dp_connector(struct amdgpu_display_manager *dm,
- struct amdgpu_dm_connector *aconnector);
+ struct amdgpu_dm_connector *aconnector,
+ int link_index);
#if defined(CONFIG_DRM_AMD_DC_DCN)
bool compute_mst_dsc_configs_for_state(struct drm_atomic_state *state,
diff --git a/drivers/gpu/drm/amd/display/dc/core/dc.c b/drivers/gpu/drm/amd/display/dc/core/dc.c
index 8489f1e56892..47431ca6986d 100644
--- a/drivers/gpu/drm/amd/display/dc/core/dc.c
+++ b/drivers/gpu/drm/amd/display/dc/core/dc.c
@@ -834,11 +834,10 @@ static void disable_dangling_plane(struct dc *dc, struct dc_state *context)
static void wait_for_no_pipes_pending(struct dc *dc, struct dc_state *context)
{
int i;
- int count = 0;
- struct pipe_ctx *pipe;
PERF_TRACE();
for (i = 0; i < MAX_PIPES; i++) {
- pipe = &context->res_ctx.pipe_ctx[i];
+ int count = 0;
+ struct pipe_ctx *pipe = &context->res_ctx.pipe_ctx[i];
if (!pipe->plane_state)
continue;
diff --git a/drivers/gpu/drm/amd/display/dc/core/dc_link_dp.c b/drivers/gpu/drm/amd/display/dc/core/dc_link_dp.c
index 7cbb1efb4f68..caa090d0b6ac 100644
--- a/drivers/gpu/drm/amd/display/dc/core/dc_link_dp.c
+++ b/drivers/gpu/drm/amd/display/dc/core/dc_link_dp.c
@@ -220,6 +220,30 @@ static enum dpcd_training_patterns
return dpcd_tr_pattern;
}
+static uint8_t dc_dp_initialize_scrambling_data_symbols(
+ struct dc_link *link,
+ enum dc_dp_training_pattern pattern)
+{
+ uint8_t disable_scrabled_data_symbols = 0;
+
+ switch (pattern) {
+ case DP_TRAINING_PATTERN_SEQUENCE_1:
+ case DP_TRAINING_PATTERN_SEQUENCE_2:
+ case DP_TRAINING_PATTERN_SEQUENCE_3:
+ disable_scrabled_data_symbols = 1;
+ break;
+ case DP_TRAINING_PATTERN_SEQUENCE_4:
+ disable_scrabled_data_symbols = 0;
+ break;
+ default:
+ ASSERT(0);
+ DC_LOG_HW_LINK_TRAINING("%s: Invalid HW Training pattern: %d\n",
+ __func__, pattern);
+ break;
+ }
+ return disable_scrabled_data_symbols;
+}
+
static inline bool is_repeater(struct dc_link *link, uint32_t offset)
{
return (!link->is_lttpr_mode_transparent && offset != 0);
@@ -252,6 +276,9 @@ static void dpcd_set_lt_pattern_and_lane_settings(
dpcd_pattern.v1_4.TRAINING_PATTERN_SET =
dc_dp_training_pattern_to_dpcd_training_pattern(link, pattern);
+ dpcd_pattern.v1_4.SCRAMBLING_DISABLE =
+ dc_dp_initialize_scrambling_data_symbols(link, pattern);
+
dpcd_lt_buffer[DP_TRAINING_PATTERN_SET - DP_TRAINING_PATTERN_SET]
= dpcd_pattern.raw;
@@ -2911,6 +2938,12 @@ bool dc_link_handle_hpd_rx_irq(struct dc_link *link, union hpd_irq_data *out_hpd
for (i = 0; i < MAX_PIPES; i++) {
pipe_ctx = &link->dc->current_state->res_ctx.pipe_ctx[i];
if (pipe_ctx && pipe_ctx->stream && pipe_ctx->stream->link == link)
+ link->dc->hwss.blank_stream(pipe_ctx);
+ }
+
+ for (i = 0; i < MAX_PIPES; i++) {
+ pipe_ctx = &link->dc->current_state->res_ctx.pipe_ctx[i];
+ if (pipe_ctx && pipe_ctx->stream && pipe_ctx->stream->link == link)
break;
}
@@ -2927,6 +2960,12 @@ bool dc_link_handle_hpd_rx_irq(struct dc_link *link, union hpd_irq_data *out_hpd
if (pipe_ctx->stream->signal == SIGNAL_TYPE_DISPLAY_PORT_MST)
dc_link_reallocate_mst_payload(link);
+ for (i = 0; i < MAX_PIPES; i++) {
+ pipe_ctx = &link->dc->current_state->res_ctx.pipe_ctx[i];
+ if (pipe_ctx && pipe_ctx->stream && pipe_ctx->stream->link == link)
+ link->dc->hwss.unblank_stream(pipe_ctx, &previous_link_settings);
+ }
+
status = false;
if (out_link_loss)
*out_link_loss = true;
@@ -4227,6 +4266,21 @@ void dp_set_fec_enable(struct dc_link *link, bool enable)
void dpcd_set_source_specific_data(struct dc_link *link)
{
const uint32_t post_oui_delay = 30; // 30ms
+ uint8_t dspc = 0;
+ enum dc_status ret = DC_ERROR_UNEXPECTED;
+
+ ret = core_link_read_dpcd(link, DP_DOWN_STREAM_PORT_COUNT, &dspc,
+ sizeof(dspc));
+
+ if (ret != DC_OK) {
+ DC_LOG_ERROR("Error in DP aux read transaction,"
+ " not writing source specific data\n");
+ return;
+ }
+
+ /* Return if OUI unsupported */
+ if (!(dspc & DP_OUI_SUPPORT))
+ return;
if (!link->dc->vendor_signature.is_valid) {
struct dpcd_amd_signature amd_signature;
diff --git a/drivers/gpu/drm/amd/display/dc/core/dc_stream.c b/drivers/gpu/drm/amd/display/dc/core/dc_stream.c
index 6ddbb00ed37a..4f0e7203dba4 100644
--- a/drivers/gpu/drm/amd/display/dc/core/dc_stream.c
+++ b/drivers/gpu/drm/amd/display/dc/core/dc_stream.c
@@ -231,34 +231,6 @@ struct dc_stream_status *dc_stream_get_status(
return dc_stream_get_status_from_state(dc->current_state, stream);
}
-static void delay_cursor_until_vupdate(struct pipe_ctx *pipe_ctx, struct dc *dc)
-{
-#if defined(CONFIG_DRM_AMD_DC_DCN)
- unsigned int vupdate_line;
- unsigned int lines_to_vupdate, us_to_vupdate, vpos, nvpos;
- struct dc_stream_state *stream = pipe_ctx->stream;
- unsigned int us_per_line;
-
- if (stream->ctx->asic_id.chip_family == FAMILY_RV &&
- ASICREV_IS_RAVEN(stream->ctx->asic_id.hw_internal_rev)) {
-
- vupdate_line = dc->hwss.get_vupdate_offset_from_vsync(pipe_ctx);
- if (!dc_stream_get_crtc_position(dc, &stream, 1, &vpos, &nvpos))
- return;
-
- if (vpos >= vupdate_line)
- return;
-
- us_per_line = stream->timing.h_total * 10000 / stream->timing.pix_clk_100hz;
- lines_to_vupdate = vupdate_line - vpos;
- us_to_vupdate = lines_to_vupdate * us_per_line;
-
- /* 70 us is a conservative estimate of cursor update time*/
- if (us_to_vupdate < 70)
- udelay(us_to_vupdate);
- }
-#endif
-}
/**
* dc_stream_set_cursor_attributes() - Update cursor attributes and set cursor surface address
@@ -298,9 +270,7 @@ bool dc_stream_set_cursor_attributes(
if (!pipe_to_program) {
pipe_to_program = pipe_ctx;
-
- delay_cursor_until_vupdate(pipe_ctx, dc);
- dc->hwss.pipe_control_lock(dc, pipe_to_program, true);
+ dc->hwss.cursor_lock(dc, pipe_to_program, true);
}
dc->hwss.set_cursor_attribute(pipe_ctx);
@@ -309,7 +279,7 @@ bool dc_stream_set_cursor_attributes(
}
if (pipe_to_program)
- dc->hwss.pipe_control_lock(dc, pipe_to_program, false);
+ dc->hwss.cursor_lock(dc, pipe_to_program, false);
return true;
}
@@ -349,16 +319,14 @@ bool dc_stream_set_cursor_position(
if (!pipe_to_program) {
pipe_to_program = pipe_ctx;
-
- delay_cursor_until_vupdate(pipe_ctx, dc);
- dc->hwss.pipe_control_lock(dc, pipe_to_program, true);
+ dc->hwss.cursor_lock(dc, pipe_to_program, true);
}
dc->hwss.set_cursor_position(pipe_ctx);
}
if (pipe_to_program)
- dc->hwss.pipe_control_lock(dc, pipe_to_program, false);
+ dc->hwss.cursor_lock(dc, pipe_to_program, false);
return true;
}
diff --git a/drivers/gpu/drm/amd/display/dc/dce110/dce110_hw_sequencer.c b/drivers/gpu/drm/amd/display/dc/dce110/dce110_hw_sequencer.c
index c279982947e1..10527593868c 100644
--- a/drivers/gpu/drm/amd/display/dc/dce110/dce110_hw_sequencer.c
+++ b/drivers/gpu/drm/amd/display/dc/dce110/dce110_hw_sequencer.c
@@ -2757,6 +2757,7 @@ static const struct hw_sequencer_funcs dce110_funcs = {
.disable_plane = dce110_power_down_fe,
.pipe_control_lock = dce_pipe_control_lock,
.interdependent_update_lock = NULL,
+ .cursor_lock = dce_pipe_control_lock,
.prepare_bandwidth = dce110_prepare_bandwidth,
.optimize_bandwidth = dce110_optimize_bandwidth,
.set_drr = set_drr,
diff --git a/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_hw_sequencer.c b/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_hw_sequencer.c
index b0357546471b..416afb99529d 100644
--- a/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_hw_sequencer.c
+++ b/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_hw_sequencer.c
@@ -1625,6 +1625,85 @@ void dcn10_pipe_control_lock(
hws->funcs.verify_allow_pstate_change_high(dc);
}
+/**
+ * delay_cursor_until_vupdate() - Delay cursor update if too close to VUPDATE.
+ *
+ * Software keepout workaround to prevent cursor update locking from stalling
+ * out cursor updates indefinitely or from old values from being retained in
+ * the case where the viewport changes in the same frame as the cursor.
+ *
+ * The idea is to calculate the remaining time from VPOS to VUPDATE. If it's
+ * too close to VUPDATE, then stall out until VUPDATE finishes.
+ *
+ * TODO: Optimize cursor programming to be once per frame before VUPDATE
+ * to avoid the need for this workaround.
+ */
+static void delay_cursor_until_vupdate(struct dc *dc, struct pipe_ctx *pipe_ctx)
+{
+ struct dc_stream_state *stream = pipe_ctx->stream;
+ struct crtc_position position;
+ uint32_t vupdate_start, vupdate_end;
+ unsigned int lines_to_vupdate, us_to_vupdate, vpos;
+ unsigned int us_per_line, us_vupdate;
+
+ if (!dc->hwss.calc_vupdate_position || !dc->hwss.get_position)
+ return;
+
+ if (!pipe_ctx->stream_res.stream_enc || !pipe_ctx->stream_res.tg)
+ return;
+
+ dc->hwss.calc_vupdate_position(dc, pipe_ctx, &vupdate_start,
+ &vupdate_end);
+
+ dc->hwss.get_position(&pipe_ctx, 1, &position);
+ vpos = position.vertical_count;
+
+ /* Avoid wraparound calculation issues */
+ vupdate_start += stream->timing.v_total;
+ vupdate_end += stream->timing.v_total;
+ vpos += stream->timing.v_total;
+
+ if (vpos <= vupdate_start) {
+ /* VPOS is in VACTIVE or back porch. */
+ lines_to_vupdate = vupdate_start - vpos;
+ } else if (vpos > vupdate_end) {
+ /* VPOS is in the front porch. */
+ return;
+ } else {
+ /* VPOS is in VUPDATE. */
+ lines_to_vupdate = 0;
+ }
+
+ /* Calculate time until VUPDATE in microseconds. */
+ us_per_line =
+ stream->timing.h_total * 10000u / stream->timing.pix_clk_100hz;
+ us_to_vupdate = lines_to_vupdate * us_per_line;
+
+ /* 70 us is a conservative estimate of cursor update time*/
+ if (us_to_vupdate > 70)
+ return;
+
+ /* Stall out until the cursor update completes. */
+ if (vupdate_end < vupdate_start)
+ vupdate_end += stream->timing.v_total;
+ us_vupdate = (vupdate_end - vupdate_start + 1) * us_per_line;
+ udelay(us_to_vupdate + us_vupdate);
+}
+
+void dcn10_cursor_lock(struct dc *dc, struct pipe_ctx *pipe, bool lock)
+{
+ /* cursor lock is per MPCC tree, so only need to lock one pipe per stream */
+ if (!pipe || pipe->top_pipe)
+ return;
+
+ /* Prevent cursor lock from stalling out cursor updates. */
+ if (lock)
+ delay_cursor_until_vupdate(dc, pipe);
+
+ dc->res_pool->mpc->funcs->cursor_lock(dc->res_pool->mpc,
+ pipe->stream_res.opp->inst, lock);
+}
+
static bool wait_for_reset_trigger_to_occur(
struct dc_context *dc_ctx,
struct timing_generator *tg)
@@ -3226,7 +3305,7 @@ int dcn10_get_vupdate_offset_from_vsync(struct pipe_ctx *pipe_ctx)
return vertical_line_start;
}
-static void dcn10_calc_vupdate_position(
+void dcn10_calc_vupdate_position(
struct dc *dc,
struct pipe_ctx *pipe_ctx,
uint32_t *start_line,
diff --git a/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_hw_sequencer.h b/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_hw_sequencer.h
index 16a50e05ffbf..42b6e016d71e 100644
--- a/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_hw_sequencer.h
+++ b/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_hw_sequencer.h
@@ -34,6 +34,11 @@ struct dc;
void dcn10_hw_sequencer_construct(struct dc *dc);
int dcn10_get_vupdate_offset_from_vsync(struct pipe_ctx *pipe_ctx);
+void dcn10_calc_vupdate_position(
+ struct dc *dc,
+ struct pipe_ctx *pipe_ctx,
+ uint32_t *start_line,
+ uint32_t *end_line);
void dcn10_setup_vupdate_interrupt(struct dc *dc, struct pipe_ctx *pipe_ctx);
enum dc_status dcn10_enable_stream_timing(
struct pipe_ctx *pipe_ctx,
@@ -49,6 +54,7 @@ void dcn10_pipe_control_lock(
struct dc *dc,
struct pipe_ctx *pipe,
bool lock);
+void dcn10_cursor_lock(struct dc *dc, struct pipe_ctx *pipe, bool lock);
void dcn10_blank_pixel_data(
struct dc *dc,
struct pipe_ctx *pipe_ctx,
diff --git a/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_init.c b/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_init.c
index dd02d3983695..9e8e32629e47 100644
--- a/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_init.c
+++ b/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_init.c
@@ -50,6 +50,7 @@ static const struct hw_sequencer_funcs dcn10_funcs = {
.disable_audio_stream = dce110_disable_audio_stream,
.disable_plane = dcn10_disable_plane,
.pipe_control_lock = dcn10_pipe_control_lock,
+ .cursor_lock = dcn10_cursor_lock,
.interdependent_update_lock = dcn10_lock_all_pipes,
.prepare_bandwidth = dcn10_prepare_bandwidth,
.optimize_bandwidth = dcn10_optimize_bandwidth,
@@ -71,6 +72,7 @@ static const struct hw_sequencer_funcs dcn10_funcs = {
.set_clock = dcn10_set_clock,
.get_clock = dcn10_get_clock,
.get_vupdate_offset_from_vsync = dcn10_get_vupdate_offset_from_vsync,
+ .calc_vupdate_position = dcn10_calc_vupdate_position,
};
static const struct hwseq_private_funcs dcn10_private_funcs = {
diff --git a/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_mpc.c b/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_mpc.c
index 04f863499cfb..3fcd408e9103 100644
--- a/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_mpc.c
+++ b/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_mpc.c
@@ -223,6 +223,9 @@ struct mpcc *mpc1_insert_plane(
REG_SET(MPCC_TOP_SEL[mpcc_id], 0, MPCC_TOP_SEL, dpp_id);
REG_SET(MPCC_OPP_ID[mpcc_id], 0, MPCC_OPP_ID, tree->opp_id);
+ /* Configure VUPDATE lock set for this MPCC to map to the OPP */
+ REG_SET(MPCC_UPDATE_LOCK_SEL[mpcc_id], 0, MPCC_UPDATE_LOCK_SEL, tree->opp_id);
+
/* update mpc tree mux setting */
if (tree->opp_list == insert_above_mpcc) {
/* insert the toppest mpcc */
@@ -318,6 +321,7 @@ void mpc1_remove_mpcc(
REG_SET(MPCC_TOP_SEL[mpcc_id], 0, MPCC_TOP_SEL, 0xf);
REG_SET(MPCC_BOT_SEL[mpcc_id], 0, MPCC_BOT_SEL, 0xf);
REG_SET(MPCC_OPP_ID[mpcc_id], 0, MPCC_OPP_ID, 0xf);
+ REG_SET(MPCC_UPDATE_LOCK_SEL[mpcc_id], 0, MPCC_UPDATE_LOCK_SEL, 0xf);
/* mark this mpcc as not in use */
mpc10->mpcc_in_use_mask &= ~(1 << mpcc_id);
@@ -328,6 +332,7 @@ void mpc1_remove_mpcc(
REG_SET(MPCC_TOP_SEL[mpcc_id], 0, MPCC_TOP_SEL, 0xf);
REG_SET(MPCC_BOT_SEL[mpcc_id], 0, MPCC_BOT_SEL, 0xf);
REG_SET(MPCC_OPP_ID[mpcc_id], 0, MPCC_OPP_ID, 0xf);
+ REG_SET(MPCC_UPDATE_LOCK_SEL[mpcc_id], 0, MPCC_UPDATE_LOCK_SEL, 0xf);
}
}
@@ -361,6 +366,7 @@ void mpc1_mpc_init(struct mpc *mpc)
REG_SET(MPCC_TOP_SEL[mpcc_id], 0, MPCC_TOP_SEL, 0xf);
REG_SET(MPCC_BOT_SEL[mpcc_id], 0, MPCC_BOT_SEL, 0xf);
REG_SET(MPCC_OPP_ID[mpcc_id], 0, MPCC_OPP_ID, 0xf);
+ REG_SET(MPCC_UPDATE_LOCK_SEL[mpcc_id], 0, MPCC_UPDATE_LOCK_SEL, 0xf);
mpc1_init_mpcc(&(mpc->mpcc_array[mpcc_id]), mpcc_id);
}
@@ -381,6 +387,7 @@ void mpc1_mpc_init_single_inst(struct mpc *mpc, unsigned int mpcc_id)
REG_SET(MPCC_TOP_SEL[mpcc_id], 0, MPCC_TOP_SEL, 0xf);
REG_SET(MPCC_BOT_SEL[mpcc_id], 0, MPCC_BOT_SEL, 0xf);
REG_SET(MPCC_OPP_ID[mpcc_id], 0, MPCC_OPP_ID, 0xf);
+ REG_SET(MPCC_UPDATE_LOCK_SEL[mpcc_id], 0, MPCC_UPDATE_LOCK_SEL, 0xf);
mpc1_init_mpcc(&(mpc->mpcc_array[mpcc_id]), mpcc_id);
@@ -453,6 +460,13 @@ void mpc1_read_mpcc_state(
MPCC_BUSY, &s->busy);
}
+void mpc1_cursor_lock(struct mpc *mpc, int opp_id, bool lock)
+{
+ struct dcn10_mpc *mpc10 = TO_DCN10_MPC(mpc);
+
+ REG_SET(CUR[opp_id], 0, CUR_VUPDATE_LOCK_SET, lock ? 1 : 0);
+}
+
static const struct mpc_funcs dcn10_mpc_funcs = {
.read_mpcc_state = mpc1_read_mpcc_state,
.insert_plane = mpc1_insert_plane,
@@ -464,6 +478,7 @@ static const struct mpc_funcs dcn10_mpc_funcs = {
.assert_mpcc_idle_before_connect = mpc1_assert_mpcc_idle_before_connect,
.init_mpcc_list_from_hw = mpc1_init_mpcc_list_from_hw,
.update_blending = mpc1_update_blending,
+ .cursor_lock = mpc1_cursor_lock,
.set_denorm = NULL,
.set_denorm_clamp = NULL,
.set_output_csc = NULL,
diff --git a/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_mpc.h b/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_mpc.h
index 962a68e322ee..66a4719c22a0 100644
--- a/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_mpc.h
+++ b/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_mpc.h
@@ -39,11 +39,12 @@
SRII(MPCC_BG_G_Y, MPCC, inst),\
SRII(MPCC_BG_R_CR, MPCC, inst),\
SRII(MPCC_BG_B_CB, MPCC, inst),\
- SRII(MPCC_BG_B_CB, MPCC, inst),\
- SRII(MPCC_SM_CONTROL, MPCC, inst)
+ SRII(MPCC_SM_CONTROL, MPCC, inst),\
+ SRII(MPCC_UPDATE_LOCK_SEL, MPCC, inst)
#define MPC_OUT_MUX_COMMON_REG_LIST_DCN1_0(inst) \
- SRII(MUX, MPC_OUT, inst)
+ SRII(MUX, MPC_OUT, inst),\
+ VUPDATE_SRII(CUR, VUPDATE_LOCK_SET, inst)
#define MPC_COMMON_REG_VARIABLE_LIST \
uint32_t MPCC_TOP_SEL[MAX_MPCC]; \
@@ -55,7 +56,9 @@
uint32_t MPCC_BG_R_CR[MAX_MPCC]; \
uint32_t MPCC_BG_B_CB[MAX_MPCC]; \
uint32_t MPCC_SM_CONTROL[MAX_MPCC]; \
- uint32_t MUX[MAX_OPP];
+ uint32_t MUX[MAX_OPP]; \
+ uint32_t MPCC_UPDATE_LOCK_SEL[MAX_MPCC]; \
+ uint32_t CUR[MAX_OPP];
#define MPC_COMMON_MASK_SH_LIST_DCN1_0(mask_sh)\
SF(MPCC0_MPCC_TOP_SEL, MPCC_TOP_SEL, mask_sh),\
@@ -78,7 +81,8 @@
SF(MPCC0_MPCC_SM_CONTROL, MPCC_SM_FIELD_ALT, mask_sh),\
SF(MPCC0_MPCC_SM_CONTROL, MPCC_SM_FORCE_NEXT_FRAME_POL, mask_sh),\
SF(MPCC0_MPCC_SM_CONTROL, MPCC_SM_FORCE_NEXT_TOP_POL, mask_sh),\
- SF(MPC_OUT0_MUX, MPC_OUT_MUX, mask_sh)
+ SF(MPC_OUT0_MUX, MPC_OUT_MUX, mask_sh),\
+ SF(MPCC0_MPCC_UPDATE_LOCK_SEL, MPCC_UPDATE_LOCK_SEL, mask_sh)
#define MPC_REG_FIELD_LIST(type) \
type MPCC_TOP_SEL;\
@@ -101,7 +105,9 @@
type MPCC_SM_FIELD_ALT;\
type MPCC_SM_FORCE_NEXT_FRAME_POL;\
type MPCC_SM_FORCE_NEXT_TOP_POL;\
- type MPC_OUT_MUX;
+ type MPC_OUT_MUX;\
+ type MPCC_UPDATE_LOCK_SEL;\
+ type CUR_VUPDATE_LOCK_SET;
struct dcn_mpc_registers {
MPC_COMMON_REG_VARIABLE_LIST
@@ -192,4 +198,6 @@ void mpc1_read_mpcc_state(
int mpcc_inst,
struct mpcc_state *s);
+void mpc1_cursor_lock(struct mpc *mpc, int opp_id, bool lock);
+
#endif
diff --git a/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_resource.c b/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_resource.c
index 07265ca7d28c..ba849aa31e6e 100644
--- a/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_resource.c
+++ b/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_resource.c
@@ -181,6 +181,14 @@ enum dcn10_clk_src_array_id {
.reg_name[id] = BASE(mm ## block ## id ## _ ## reg_name ## _BASE_IDX) + \
mm ## block ## id ## _ ## reg_name
+#define VUPDATE_SRII(reg_name, block, id)\
+ .reg_name[id] = BASE(mm ## reg_name ## 0 ## _ ## block ## id ## _BASE_IDX) + \
+ mm ## reg_name ## 0 ## _ ## block ## id
+
+/* set field/register/bitfield name */
+#define SFRB(field_name, reg_name, bitfield, post_fix)\
+ .field_name = reg_name ## __ ## bitfield ## post_fix
+
/* NBIO */
#define NBIO_BASE_INNER(seg) \
NBIF_BASE__INST0_SEG ## seg
@@ -419,11 +427,13 @@ static const struct dcn_mpc_registers mpc_regs = {
};
static const struct dcn_mpc_shift mpc_shift = {
- MPC_COMMON_MASK_SH_LIST_DCN1_0(__SHIFT)
+ MPC_COMMON_MASK_SH_LIST_DCN1_0(__SHIFT),\
+ SFRB(CUR_VUPDATE_LOCK_SET, CUR0_VUPDATE_LOCK_SET0, CUR0_VUPDATE_LOCK_SET, __SHIFT)
};
static const struct dcn_mpc_mask mpc_mask = {
- MPC_COMMON_MASK_SH_LIST_DCN1_0(_MASK),
+ MPC_COMMON_MASK_SH_LIST_DCN1_0(_MASK),\
+ SFRB(CUR_VUPDATE_LOCK_SET, CUR0_VUPDATE_LOCK_SET0, CUR0_VUPDATE_LOCK_SET, _MASK)
};
#define tg_regs(id)\
diff --git a/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_hwseq.c b/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_hwseq.c
index 22f421e82733..a023a4d59f41 100644
--- a/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_hwseq.c
+++ b/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_hwseq.c
@@ -2294,7 +2294,8 @@ void dcn20_fpga_init_hw(struct dc *dc)
REG_UPDATE(DCHUBBUB_GLOBAL_TIMER_CNTL, DCHUBBUB_GLOBAL_TIMER_REFDIV, 2);
REG_UPDATE(DCHUBBUB_GLOBAL_TIMER_CNTL, DCHUBBUB_GLOBAL_TIMER_ENABLE, 1);
- REG_WRITE(REFCLK_CNTL, 0);
+ if (REG(REFCLK_CNTL))
+ REG_WRITE(REFCLK_CNTL, 0);
//
diff --git a/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_init.c b/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_init.c
index 1e73357eda34..8334bbd6eabb 100644
--- a/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_init.c
+++ b/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_init.c
@@ -52,6 +52,7 @@ static const struct hw_sequencer_funcs dcn20_funcs = {
.disable_plane = dcn20_disable_plane,
.pipe_control_lock = dcn20_pipe_control_lock,
.interdependent_update_lock = dcn10_lock_all_pipes,
+ .cursor_lock = dcn10_cursor_lock,
.prepare_bandwidth = dcn20_prepare_bandwidth,
.optimize_bandwidth = dcn20_optimize_bandwidth,
.update_bandwidth = dcn20_update_bandwidth,
@@ -82,6 +83,7 @@ static const struct hw_sequencer_funcs dcn20_funcs = {
.init_vm_ctx = dcn20_init_vm_ctx,
.set_flip_control_gsl = dcn20_set_flip_control_gsl,
.get_vupdate_offset_from_vsync = dcn10_get_vupdate_offset_from_vsync,
+ .calc_vupdate_position = dcn10_calc_vupdate_position,
};
static const struct hwseq_private_funcs dcn20_private_funcs = {
diff --git a/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_mpc.c b/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_mpc.c
index de9c857ab3e9..570dfd9a243f 100644
--- a/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_mpc.c
+++ b/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_mpc.c
@@ -545,6 +545,7 @@ const struct mpc_funcs dcn20_mpc_funcs = {
.mpc_init = mpc1_mpc_init,
.mpc_init_single_inst = mpc1_mpc_init_single_inst,
.update_blending = mpc2_update_blending,
+ .cursor_lock = mpc1_cursor_lock,
.get_mpcc_for_dpp = mpc2_get_mpcc_for_dpp,
.wait_for_idle = mpc2_assert_idle_mpcc,
.assert_mpcc_idle_before_connect = mpc2_assert_mpcc_idle_before_connect,
diff --git a/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_mpc.h b/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_mpc.h
index c78fd5123497..496658f420db 100644
--- a/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_mpc.h
+++ b/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_mpc.h
@@ -179,7 +179,8 @@
SF(MPC_OUT0_DENORM_CLAMP_G_Y, MPC_OUT_DENORM_CLAMP_MAX_G_Y, mask_sh),\
SF(MPC_OUT0_DENORM_CLAMP_G_Y, MPC_OUT_DENORM_CLAMP_MIN_G_Y, mask_sh),\
SF(MPC_OUT0_DENORM_CLAMP_B_CB, MPC_OUT_DENORM_CLAMP_MAX_B_CB, mask_sh),\
- SF(MPC_OUT0_DENORM_CLAMP_B_CB, MPC_OUT_DENORM_CLAMP_MIN_B_CB, mask_sh)
+ SF(MPC_OUT0_DENORM_CLAMP_B_CB, MPC_OUT_DENORM_CLAMP_MIN_B_CB, mask_sh),\
+ SF(CUR_VUPDATE_LOCK_SET0, CUR_VUPDATE_LOCK_SET, mask_sh)
/*
* DCN2 MPC_OCSC debug status register:
diff --git a/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_resource.c b/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_resource.c
index 5cdbba0cd873..e4348e3b6389 100644
--- a/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_resource.c
+++ b/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_resource.c
@@ -508,6 +508,10 @@ enum dcn20_clk_src_array_id {
.block ## _ ## reg_name[id] = BASE(mm ## block ## id ## _ ## reg_name ## _BASE_IDX) + \
mm ## block ## id ## _ ## reg_name
+#define VUPDATE_SRII(reg_name, block, id)\
+ .reg_name[id] = BASE(mm ## reg_name ## _ ## block ## id ## _BASE_IDX) + \
+ mm ## reg_name ## _ ## block ## id
+
/* NBIO */
#define NBIO_BASE_INNER(seg) \
NBIO_BASE__INST0_SEG ## seg
@@ -3064,25 +3068,32 @@ validate_out:
return out;
}
-
-bool dcn20_validate_bandwidth(struct dc *dc, struct dc_state *context,
- bool fast_validate)
+/*
+ * This must be noinline to ensure anything that deals with FP registers
+ * is contained within this call; previously our compiling with hard-float
+ * would result in fp instructions being emitted outside of the boundaries
+ * of the DC_FP_START/END macros, which makes sense as the compiler has no
+ * idea about what is wrapped and what is not
+ *
+ * This is largely just a workaround to avoid breakage introduced with 5.6,
+ * ideally all fp-using code should be moved into its own file, only that
+ * should be compiled with hard-float, and all code exported from there
+ * should be strictly wrapped with DC_FP_START/END
+ */
+static noinline bool dcn20_validate_bandwidth_fp(struct dc *dc,
+ struct dc_state *context, bool fast_validate)
{
bool voltage_supported = false;
bool full_pstate_supported = false;
bool dummy_pstate_supported = false;
double p_state_latency_us;
- DC_FP_START();
p_state_latency_us = context->bw_ctx.dml.soc.dram_clock_change_latency_us;
context->bw_ctx.dml.soc.disable_dram_clock_change_vactive_support =
dc->debug.disable_dram_clock_change_vactive_support;
if (fast_validate) {
- voltage_supported = dcn20_validate_bandwidth_internal(dc, context, true);
-
- DC_FP_END();
- return voltage_supported;
+ return dcn20_validate_bandwidth_internal(dc, context, true);
}
// Best case, we support full UCLK switch latency
@@ -3111,7 +3122,15 @@ bool dcn20_validate_bandwidth(struct dc *dc, struct dc_state *context,
restore_dml_state:
context->bw_ctx.dml.soc.dram_clock_change_latency_us = p_state_latency_us;
+ return voltage_supported;
+}
+bool dcn20_validate_bandwidth(struct dc *dc, struct dc_state *context,
+ bool fast_validate)
+{
+ bool voltage_supported = false;
+ DC_FP_START();
+ voltage_supported = dcn20_validate_bandwidth_fp(dc, context, fast_validate);
DC_FP_END();
return voltage_supported;
}
diff --git a/drivers/gpu/drm/amd/display/dc/dcn21/dcn21_init.c b/drivers/gpu/drm/amd/display/dc/dcn21/dcn21_init.c
index b9ff9767e08f..4dd634118df2 100644
--- a/drivers/gpu/drm/amd/display/dc/dcn21/dcn21_init.c
+++ b/drivers/gpu/drm/amd/display/dc/dcn21/dcn21_init.c
@@ -53,6 +53,7 @@ static const struct hw_sequencer_funcs dcn21_funcs = {
.disable_plane = dcn20_disable_plane,
.pipe_control_lock = dcn20_pipe_control_lock,
.interdependent_update_lock = dcn10_lock_all_pipes,
+ .cursor_lock = dcn10_cursor_lock,
.prepare_bandwidth = dcn20_prepare_bandwidth,
.optimize_bandwidth = dcn20_optimize_bandwidth,
.update_bandwidth = dcn20_update_bandwidth,
@@ -85,6 +86,7 @@ static const struct hw_sequencer_funcs dcn21_funcs = {
.optimize_pwr_state = dcn21_optimize_pwr_state,
.exit_optimized_pwr_state = dcn21_exit_optimized_pwr_state,
.get_vupdate_offset_from_vsync = dcn10_get_vupdate_offset_from_vsync,
+ .calc_vupdate_position = dcn10_calc_vupdate_position,
.set_cursor_position = dcn10_set_cursor_position,
.set_cursor_attribute = dcn10_set_cursor_attribute,
.set_cursor_sdr_white_level = dcn10_set_cursor_sdr_white_level,
diff --git a/drivers/gpu/drm/amd/display/dc/dcn21/dcn21_resource.c b/drivers/gpu/drm/amd/display/dc/dcn21/dcn21_resource.c
index b25484aa8222..a721bb401ef0 100644
--- a/drivers/gpu/drm/amd/display/dc/dcn21/dcn21_resource.c
+++ b/drivers/gpu/drm/amd/display/dc/dcn21/dcn21_resource.c
@@ -284,7 +284,7 @@ struct _vcs_dpi_soc_bounding_box_st dcn2_1_soc = {
.dram_channel_width_bytes = 4,
.fabric_datapath_to_dcn_data_return_bytes = 32,
.dcn_downspread_percent = 0.5,
- .downspread_percent = 0.5,
+ .downspread_percent = 0.38,
.dram_page_open_time_ns = 50.0,
.dram_rw_turnaround_time_ns = 17.5,
.dram_return_buffer_per_channel_bytes = 8192,
@@ -340,6 +340,10 @@ struct _vcs_dpi_soc_bounding_box_st dcn2_1_soc = {
.block ## _ ## reg_name[id] = BASE(mm ## block ## id ## _ ## reg_name ## _BASE_IDX) + \
mm ## block ## id ## _ ## reg_name
+#define VUPDATE_SRII(reg_name, block, id)\
+ .reg_name[id] = BASE(mm ## reg_name ## _ ## block ## id ## _BASE_IDX) + \
+ mm ## reg_name ## _ ## block ## id
+
/* NBIO */
#define NBIO_BASE_INNER(seg) \
NBIF0_BASE__INST0_SEG ## seg
@@ -1374,64 +1378,49 @@ static void update_bw_bounding_box(struct dc *dc, struct clk_bw_params *bw_param
{
struct dcn21_resource_pool *pool = TO_DCN21_RES_POOL(dc->res_pool);
struct clk_limit_table *clk_table = &bw_params->clk_table;
- unsigned int i, j, k;
- int closest_clk_lvl;
+ struct _vcs_dpi_voltage_scaling_st clock_limits[DC__VOLTAGE_STATES];
+ unsigned int i, j, closest_clk_lvl;
// Default clock levels are used for diags, which may lead to overclocking.
- if (!IS_FPGA_MAXIMUS_DC(dc->ctx->dce_environment) && !IS_DIAG_DC(dc->ctx->dce_environment)) {
+ if (!IS_DIAG_DC(dc->ctx->dce_environment)) {
dcn2_1_ip.max_num_otg = pool->base.res_cap->num_timing_generator;
dcn2_1_ip.max_num_dpp = pool->base.pipe_count;
dcn2_1_soc.num_chans = bw_params->num_channels;
- /* Vmin: leave lowest DCN clocks, override with dcfclk, fclk, memclk from fuse */
- dcn2_1_soc.clock_limits[0].state = 0;
- dcn2_1_soc.clock_limits[0].dcfclk_mhz = clk_table->entries[0].dcfclk_mhz;
- dcn2_1_soc.clock_limits[0].fabricclk_mhz = clk_table->entries[0].fclk_mhz;
- dcn2_1_soc.clock_limits[0].socclk_mhz = clk_table->entries[0].socclk_mhz;
- dcn2_1_soc.clock_limits[0].dram_speed_mts = clk_table->entries[0].memclk_mhz * 2;
-
- /*
- * Other levels: find closest DCN clocks that fit the given clock limit using dcfclk
- * as indicator
- */
-
- closest_clk_lvl = -1;
- /* index currently being filled */
- k = 1;
- for (i = 1; i < clk_table->num_entries; i++) {
- /* loop backwards, skip duplicate state*/
- for (j = dcn2_1_soc.num_states - 1; j >= k; j--) {
+ ASSERT(clk_table->num_entries);
+ for (i = 0; i < clk_table->num_entries; i++) {
+ /* loop backwards*/
+ for (closest_clk_lvl = 0, j = dcn2_1_soc.num_states - 1; j >= 0; j--) {
if ((unsigned int) dcn2_1_soc.clock_limits[j].dcfclk_mhz <= clk_table->entries[i].dcfclk_mhz) {
closest_clk_lvl = j;
break;
}
}
- /* if found a lvl that fits, use the DCN clks from it, if not, go to next clk limit*/
- if (closest_clk_lvl != -1) {
- dcn2_1_soc.clock_limits[k].state = i;
- dcn2_1_soc.clock_limits[k].dcfclk_mhz = clk_table->entries[i].dcfclk_mhz;
- dcn2_1_soc.clock_limits[k].fabricclk_mhz = clk_table->entries[i].fclk_mhz;
- dcn2_1_soc.clock_limits[k].socclk_mhz = clk_table->entries[i].socclk_mhz;
- dcn2_1_soc.clock_limits[k].dram_speed_mts = clk_table->entries[i].memclk_mhz * 2;
-
- dcn2_1_soc.clock_limits[k].dispclk_mhz = dcn2_1_soc.clock_limits[closest_clk_lvl].dispclk_mhz;
- dcn2_1_soc.clock_limits[k].dppclk_mhz = dcn2_1_soc.clock_limits[closest_clk_lvl].dppclk_mhz;
- dcn2_1_soc.clock_limits[k].dram_bw_per_chan_gbps = dcn2_1_soc.clock_limits[closest_clk_lvl].dram_bw_per_chan_gbps;
- dcn2_1_soc.clock_limits[k].dscclk_mhz = dcn2_1_soc.clock_limits[closest_clk_lvl].dscclk_mhz;
- dcn2_1_soc.clock_limits[k].dtbclk_mhz = dcn2_1_soc.clock_limits[closest_clk_lvl].dtbclk_mhz;
- dcn2_1_soc.clock_limits[k].phyclk_d18_mhz = dcn2_1_soc.clock_limits[closest_clk_lvl].phyclk_d18_mhz;
- dcn2_1_soc.clock_limits[k].phyclk_mhz = dcn2_1_soc.clock_limits[closest_clk_lvl].phyclk_mhz;
- k++;
- }
+ clock_limits[i].state = i;
+ clock_limits[i].dcfclk_mhz = clk_table->entries[i].dcfclk_mhz;
+ clock_limits[i].fabricclk_mhz = clk_table->entries[i].fclk_mhz;
+ clock_limits[i].socclk_mhz = clk_table->entries[i].socclk_mhz;
+ clock_limits[i].dram_speed_mts = clk_table->entries[i].memclk_mhz * 2;
+
+ clock_limits[i].dispclk_mhz = dcn2_1_soc.clock_limits[closest_clk_lvl].dispclk_mhz;
+ clock_limits[i].dppclk_mhz = dcn2_1_soc.clock_limits[closest_clk_lvl].dppclk_mhz;
+ clock_limits[i].dram_bw_per_chan_gbps = dcn2_1_soc.clock_limits[closest_clk_lvl].dram_bw_per_chan_gbps;
+ clock_limits[i].dscclk_mhz = dcn2_1_soc.clock_limits[closest_clk_lvl].dscclk_mhz;
+ clock_limits[i].dtbclk_mhz = dcn2_1_soc.clock_limits[closest_clk_lvl].dtbclk_mhz;
+ clock_limits[i].phyclk_d18_mhz = dcn2_1_soc.clock_limits[closest_clk_lvl].phyclk_d18_mhz;
+ clock_limits[i].phyclk_mhz = dcn2_1_soc.clock_limits[closest_clk_lvl].phyclk_mhz;
+ }
+ for (i = 0; i < clk_table->num_entries; i++)
+ dcn2_1_soc.clock_limits[i] = clock_limits[i];
+ if (clk_table->num_entries) {
+ dcn2_1_soc.num_states = clk_table->num_entries;
+ /* duplicate last level */
+ dcn2_1_soc.clock_limits[dcn2_1_soc.num_states] = dcn2_1_soc.clock_limits[dcn2_1_soc.num_states - 1];
+ dcn2_1_soc.clock_limits[dcn2_1_soc.num_states].state = dcn2_1_soc.num_states;
}
- dcn2_1_soc.num_states = k;
}
- /* duplicate last level */
- dcn2_1_soc.clock_limits[dcn2_1_soc.num_states] = dcn2_1_soc.clock_limits[dcn2_1_soc.num_states - 1];
- dcn2_1_soc.clock_limits[dcn2_1_soc.num_states].state = dcn2_1_soc.num_states;
-
dml_init_instance(&dc->dml, &dcn2_1_soc, &dcn2_1_ip, DML_PROJECT_DCN21);
}
diff --git a/drivers/gpu/drm/amd/display/dc/dml/Makefile b/drivers/gpu/drm/amd/display/dc/dml/Makefile
index 7ee8b8460a9b..e34c3376efc1 100644
--- a/drivers/gpu/drm/amd/display/dc/dml/Makefile
+++ b/drivers/gpu/drm/amd/display/dc/dml/Makefile
@@ -63,10 +63,8 @@ CFLAGS_$(AMDDALPATH)/dc/dml/dcn21/display_rq_dlg_calc_21.o := $(dml_ccflags)
endif
CFLAGS_$(AMDDALPATH)/dc/dml/dml1_display_rq_dlg_calc.o := $(dml_ccflags)
CFLAGS_$(AMDDALPATH)/dc/dml/display_rq_dlg_helpers.o := $(dml_ccflags)
-CFLAGS_$(AMDDALPATH)/dc/dml/dml_common_defs.o := $(dml_ccflags)
DML = display_mode_lib.o display_rq_dlg_helpers.o dml1_display_rq_dlg_calc.o \
- dml_common_defs.o
ifdef CONFIG_DRM_AMD_DC_DCN
DML += display_mode_vba.o dcn20/display_rq_dlg_calc_20.o dcn20/display_mode_vba_20.o
diff --git a/drivers/gpu/drm/amd/display/dc/dml/dcn20/display_rq_dlg_calc_20.h b/drivers/gpu/drm/amd/display/dc/dml/dcn20/display_rq_dlg_calc_20.h
index 8c86b63ddf07..1e557ddcb638 100644
--- a/drivers/gpu/drm/amd/display/dc/dml/dcn20/display_rq_dlg_calc_20.h
+++ b/drivers/gpu/drm/amd/display/dc/dml/dcn20/display_rq_dlg_calc_20.h
@@ -26,7 +26,6 @@
#ifndef __DML20_DISPLAY_RQ_DLG_CALC_H__
#define __DML20_DISPLAY_RQ_DLG_CALC_H__
-#include "../dml_common_defs.h"
#include "../display_rq_dlg_helpers.h"
struct display_mode_lib;
diff --git a/drivers/gpu/drm/amd/display/dc/dml/dcn20/display_rq_dlg_calc_20v2.h b/drivers/gpu/drm/amd/display/dc/dml/dcn20/display_rq_dlg_calc_20v2.h
index 0378406bf7e7..0d53e871a9d1 100644
--- a/drivers/gpu/drm/amd/display/dc/dml/dcn20/display_rq_dlg_calc_20v2.h
+++ b/drivers/gpu/drm/amd/display/dc/dml/dcn20/display_rq_dlg_calc_20v2.h
@@ -26,7 +26,6 @@
#ifndef __DML20V2_DISPLAY_RQ_DLG_CALC_H__
#define __DML20V2_DISPLAY_RQ_DLG_CALC_H__
-#include "../dml_common_defs.h"
#include "../display_rq_dlg_helpers.h"
struct display_mode_lib;
diff --git a/drivers/gpu/drm/amd/display/dc/dml/dcn21/display_rq_dlg_calc_21.c b/drivers/gpu/drm/amd/display/dc/dml/dcn21/display_rq_dlg_calc_21.c
index a38baa73d484..b8ec08e3b7a3 100644
--- a/drivers/gpu/drm/amd/display/dc/dml/dcn21/display_rq_dlg_calc_21.c
+++ b/drivers/gpu/drm/amd/display/dc/dml/dcn21/display_rq_dlg_calc_21.c
@@ -1200,7 +1200,7 @@ static void dml_rq_dlg_get_dlg_params(
min_hratio_fact_l = 1.0;
min_hratio_fact_c = 1.0;
- if (htaps_l <= 1)
+ if (hratio_l <= 1)
min_hratio_fact_l = 2.0;
else if (htaps_l <= 6) {
if ((hratio_l * 2.0) > 4.0)
@@ -1216,7 +1216,7 @@ static void dml_rq_dlg_get_dlg_params(
hscale_pixel_rate_l = min_hratio_fact_l * dppclk_freq_in_mhz;
- if (htaps_c <= 1)
+ if (hratio_c <= 1)
min_hratio_fact_c = 2.0;
else if (htaps_c <= 6) {
if ((hratio_c * 2.0) > 4.0)
@@ -1522,8 +1522,8 @@ static void dml_rq_dlg_get_dlg_params(
disp_dlg_regs->refcyc_per_vm_group_vblank = get_refcyc_per_vm_group_vblank(mode_lib, e2e_pipe_param, num_pipes, pipe_idx) * refclk_freq_in_mhz;
disp_dlg_regs->refcyc_per_vm_group_flip = get_refcyc_per_vm_group_flip(mode_lib, e2e_pipe_param, num_pipes, pipe_idx) * refclk_freq_in_mhz;
- disp_dlg_regs->refcyc_per_vm_req_vblank = get_refcyc_per_vm_req_vblank(mode_lib, e2e_pipe_param, num_pipes, pipe_idx) * refclk_freq_in_mhz;
- disp_dlg_regs->refcyc_per_vm_req_flip = get_refcyc_per_vm_req_flip(mode_lib, e2e_pipe_param, num_pipes, pipe_idx) * refclk_freq_in_mhz;
+ disp_dlg_regs->refcyc_per_vm_req_vblank = get_refcyc_per_vm_req_vblank(mode_lib, e2e_pipe_param, num_pipes, pipe_idx) * refclk_freq_in_mhz * dml_pow(2, 10);
+ disp_dlg_regs->refcyc_per_vm_req_flip = get_refcyc_per_vm_req_flip(mode_lib, e2e_pipe_param, num_pipes, pipe_idx) * refclk_freq_in_mhz * dml_pow(2, 10);
// Clamp to max for now
if (disp_dlg_regs->refcyc_per_vm_group_vblank >= (unsigned int)dml_pow(2, 23))
diff --git a/drivers/gpu/drm/amd/display/dc/dml/dcn21/display_rq_dlg_calc_21.h b/drivers/gpu/drm/amd/display/dc/dml/dcn21/display_rq_dlg_calc_21.h
index 83e95f8cbff2..e8f7785e3fc6 100644
--- a/drivers/gpu/drm/amd/display/dc/dml/dcn21/display_rq_dlg_calc_21.h
+++ b/drivers/gpu/drm/amd/display/dc/dml/dcn21/display_rq_dlg_calc_21.h
@@ -26,7 +26,7 @@
#ifndef __DML21_DISPLAY_RQ_DLG_CALC_H__
#define __DML21_DISPLAY_RQ_DLG_CALC_H__
-#include "../dml_common_defs.h"
+#include "dm_services.h"
#include "../display_rq_dlg_helpers.h"
struct display_mode_lib;
diff --git a/drivers/gpu/drm/amd/display/dc/dml/display_mode_lib.h b/drivers/gpu/drm/amd/display/dc/dml/display_mode_lib.h
index cf2758ca5b02..c77c3d827e4a 100644
--- a/drivers/gpu/drm/amd/display/dc/dml/display_mode_lib.h
+++ b/drivers/gpu/drm/amd/display/dc/dml/display_mode_lib.h
@@ -25,8 +25,10 @@
#ifndef __DISPLAY_MODE_LIB_H__
#define __DISPLAY_MODE_LIB_H__
-
-#include "dml_common_defs.h"
+#include "dm_services.h"
+#include "dc_features.h"
+#include "display_mode_structs.h"
+#include "display_mode_enums.h"
#include "display_mode_vba.h"
enum dml_project {
diff --git a/drivers/gpu/drm/amd/display/dc/dml/display_mode_vba.h b/drivers/gpu/drm/amd/display/dc/dml/display_mode_vba.h
index 5d82fc5a7ed7..3a734171f083 100644
--- a/drivers/gpu/drm/amd/display/dc/dml/display_mode_vba.h
+++ b/drivers/gpu/drm/amd/display/dc/dml/display_mode_vba.h
@@ -27,8 +27,6 @@
#ifndef __DML2_DISPLAY_MODE_VBA_H__
#define __DML2_DISPLAY_MODE_VBA_H__
-#include "dml_common_defs.h"
-
struct display_mode_lib;
void ModeSupportAndSystemConfiguration(struct display_mode_lib *mode_lib);
diff --git a/drivers/gpu/drm/amd/display/dc/dml/display_rq_dlg_helpers.h b/drivers/gpu/drm/amd/display/dc/dml/display_rq_dlg_helpers.h
index 1f24db830737..2555ef0358c2 100644
--- a/drivers/gpu/drm/amd/display/dc/dml/display_rq_dlg_helpers.h
+++ b/drivers/gpu/drm/amd/display/dc/dml/display_rq_dlg_helpers.h
@@ -26,7 +26,6 @@
#ifndef __DISPLAY_RQ_DLG_HELPERS_H__
#define __DISPLAY_RQ_DLG_HELPERS_H__
-#include "dml_common_defs.h"
#include "display_mode_lib.h"
/* Function: Printer functions
diff --git a/drivers/gpu/drm/amd/display/dc/dml/dml1_display_rq_dlg_calc.h b/drivers/gpu/drm/amd/display/dc/dml/dml1_display_rq_dlg_calc.h
index 304164986bd8..9c06913ad767 100644
--- a/drivers/gpu/drm/amd/display/dc/dml/dml1_display_rq_dlg_calc.h
+++ b/drivers/gpu/drm/amd/display/dc/dml/dml1_display_rq_dlg_calc.h
@@ -26,8 +26,6 @@
#ifndef __DISPLAY_RQ_DLG_CALC_H__
#define __DISPLAY_RQ_DLG_CALC_H__
-#include "dml_common_defs.h"
-
struct display_mode_lib;
#include "display_rq_dlg_helpers.h"
diff --git a/drivers/gpu/drm/amd/display/dc/dml/dml_common_defs.c b/drivers/gpu/drm/amd/display/dc/dml/dml_common_defs.c
deleted file mode 100644
index 723af0b2dda0..000000000000
--- a/drivers/gpu/drm/amd/display/dc/dml/dml_common_defs.c
+++ /dev/null
@@ -1,43 +0,0 @@
-/*
- * Copyright 2017 Advanced Micro Devices, Inc.
- *
- * Permission is hereby granted, free of charge, to any person obtaining a
- * copy of this software and associated documentation files (the "Software"),
- * to deal in the Software without restriction, including without limitation
- * the rights to use, copy, modify, merge, publish, distribute, sublicense,
- * and/or sell copies of the Software, and to permit persons to whom the
- * Software is furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in
- * all copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
- * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
- * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
- * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
- * OTHER DEALINGS IN THE SOFTWARE.
- *
- * Authors: AMD
- *
- */
-
-#include "dml_common_defs.h"
-#include "dcn_calc_math.h"
-
-#include "dml_inline_defs.h"
-
-double dml_round(double a)
-{
- double round_pt = 0.5;
- double ceil = dml_ceil(a, 1);
- double floor = dml_floor(a, 1);
-
- if (a - floor >= round_pt)
- return ceil;
- else
- return floor;
-}
-
-
diff --git a/drivers/gpu/drm/amd/display/dc/dml/dml_common_defs.h b/drivers/gpu/drm/amd/display/dc/dml/dml_common_defs.h
deleted file mode 100644
index f78cbae9db88..000000000000
--- a/drivers/gpu/drm/amd/display/dc/dml/dml_common_defs.h
+++ /dev/null
@@ -1,37 +0,0 @@
-/*
- * Copyright 2017 Advanced Micro Devices, Inc.
- *
- * Permission is hereby granted, free of charge, to any person obtaining a
- * copy of this software and associated documentation files (the "Software"),
- * to deal in the Software without restriction, including without limitation
- * the rights to use, copy, modify, merge, publish, distribute, sublicense,
- * and/or sell copies of the Software, and to permit persons to whom the
- * Software is furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in
- * all copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
- * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
- * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
- * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
- * OTHER DEALINGS IN THE SOFTWARE.
- *
- * Authors: AMD
- *
- */
-
-#ifndef __DC_COMMON_DEFS_H__
-#define __DC_COMMON_DEFS_H__
-
-#include "dm_services.h"
-#include "dc_features.h"
-#include "display_mode_structs.h"
-#include "display_mode_enums.h"
-
-
-double dml_round(double a);
-
-#endif /* __DC_COMMON_DEFS_H__ */
diff --git a/drivers/gpu/drm/amd/display/dc/dml/dml_inline_defs.h b/drivers/gpu/drm/amd/display/dc/dml/dml_inline_defs.h
index ded71ea82413..02e06c9b3230 100644
--- a/drivers/gpu/drm/amd/display/dc/dml/dml_inline_defs.h
+++ b/drivers/gpu/drm/amd/display/dc/dml/dml_inline_defs.h
@@ -26,7 +26,6 @@
#ifndef __DML_INLINE_DEFS_H__
#define __DML_INLINE_DEFS_H__
-#include "dml_common_defs.h"
#include "dcn_calc_math.h"
#include "dml_logger.h"
@@ -75,6 +74,18 @@ static inline double dml_floor(double a, double granularity)
return (double) dcn_bw_floor2(a, granularity);
}
+static inline double dml_round(double a)
+{
+ double round_pt = 0.5;
+ double ceil = dml_ceil(a, 1);
+ double floor = dml_floor(a, 1);
+
+ if (a - floor >= round_pt)
+ return ceil;
+ else
+ return floor;
+}
+
static inline int dml_log2(double x)
{
return dml_round((double)dcn_bw_log(x, 2));
@@ -112,7 +123,7 @@ static inline double dml_log(double x, double base)
static inline unsigned int dml_round_to_multiple(unsigned int num,
unsigned int multiple,
- bool up)
+ unsigned char up)
{
unsigned int remainder;
diff --git a/drivers/gpu/drm/amd/display/dc/inc/hw/mpc.h b/drivers/gpu/drm/amd/display/dc/inc/hw/mpc.h
index 094afc4c8173..50ee8aa7ec3b 100644
--- a/drivers/gpu/drm/amd/display/dc/inc/hw/mpc.h
+++ b/drivers/gpu/drm/amd/display/dc/inc/hw/mpc.h
@@ -210,6 +210,22 @@ struct mpc_funcs {
struct mpcc_blnd_cfg *blnd_cfg,
int mpcc_id);
+ /*
+ * Lock cursor updates for the specified OPP.
+ * OPP defines the set of MPCC that are locked together for cursor.
+ *
+ * Parameters:
+ * [in] mpc - MPC context.
+ * [in] opp_id - The OPP to lock cursor updates on
+ * [in] lock - lock/unlock the OPP
+ *
+ * Return: void
+ */
+ void (*cursor_lock)(
+ struct mpc *mpc,
+ int opp_id,
+ bool lock);
+
struct mpcc* (*get_mpcc_for_dpp)(
struct mpc_tree *tree,
int dpp_id);
diff --git a/drivers/gpu/drm/amd/display/dc/inc/hw_sequencer.h b/drivers/gpu/drm/amd/display/dc/inc/hw_sequencer.h
index d4c1fb242c63..08307f3796e3 100644
--- a/drivers/gpu/drm/amd/display/dc/inc/hw_sequencer.h
+++ b/drivers/gpu/drm/amd/display/dc/inc/hw_sequencer.h
@@ -86,11 +86,17 @@ struct hw_sequencer_funcs {
struct dc_state *context, bool lock);
void (*set_flip_control_gsl)(struct pipe_ctx *pipe_ctx,
bool flip_immediate);
+ void (*cursor_lock)(struct dc *dc, struct pipe_ctx *pipe, bool lock);
/* Timing Related */
void (*get_position)(struct pipe_ctx **pipe_ctx, int num_pipes,
struct crtc_position *position);
int (*get_vupdate_offset_from_vsync)(struct pipe_ctx *pipe_ctx);
+ void (*calc_vupdate_position)(
+ struct dc *dc,
+ struct pipe_ctx *pipe_ctx,
+ uint32_t *start_line,
+ uint32_t *end_line);
void (*enable_per_frame_crtc_position_reset)(struct dc *dc,
int group_size, struct pipe_ctx *grouped_pipes[]);
void (*enable_timing_synchronization)(struct dc *dc,
diff --git a/drivers/gpu/drm/amd/display/dc/os_types.h b/drivers/gpu/drm/amd/display/dc/os_types.h
index c34eba19860a..6d7bca562eec 100644
--- a/drivers/gpu/drm/amd/display/dc/os_types.h
+++ b/drivers/gpu/drm/amd/display/dc/os_types.h
@@ -108,7 +108,7 @@
#define ASSERT(expr) ASSERT_CRITICAL(expr)
#else
-#define ASSERT(expr) WARN_ON(!(expr))
+#define ASSERT(expr) WARN_ON_ONCE(!(expr))
#endif
#define BREAK_TO_DEBUGGER() ASSERT(0)
diff --git a/drivers/gpu/drm/amd/powerplay/amd_powerplay.c b/drivers/gpu/drm/amd/powerplay/amd_powerplay.c
index 2a12614a12c2..8e2acb4df860 100644
--- a/drivers/gpu/drm/amd/powerplay/amd_powerplay.c
+++ b/drivers/gpu/drm/amd/powerplay/amd_powerplay.c
@@ -319,12 +319,12 @@ static void pp_dpm_en_umd_pstate(struct pp_hwmgr *hwmgr,
if (*level & profile_mode_mask) {
hwmgr->saved_dpm_level = hwmgr->dpm_level;
hwmgr->en_umd_pstate = true;
- amdgpu_device_ip_set_clockgating_state(hwmgr->adev,
- AMD_IP_BLOCK_TYPE_GFX,
- AMD_CG_STATE_UNGATE);
amdgpu_device_ip_set_powergating_state(hwmgr->adev,
AMD_IP_BLOCK_TYPE_GFX,
AMD_PG_STATE_UNGATE);
+ amdgpu_device_ip_set_clockgating_state(hwmgr->adev,
+ AMD_IP_BLOCK_TYPE_GFX,
+ AMD_CG_STATE_UNGATE);
}
} else {
/* exit umd pstate, restore level, enable gfx cg*/
@@ -1435,7 +1435,8 @@ static int pp_get_asic_baco_capability(void *handle, bool *cap)
if (!hwmgr)
return -EINVAL;
- if (!hwmgr->pm_en || !hwmgr->hwmgr_func->get_asic_baco_capability)
+ if (!(hwmgr->not_vf && amdgpu_dpm) ||
+ !hwmgr->hwmgr_func->get_asic_baco_capability)
return 0;
mutex_lock(&hwmgr->smu_lock);
@@ -1452,8 +1453,7 @@ static int pp_get_asic_baco_state(void *handle, int *state)
if (!hwmgr)
return -EINVAL;
- if (!(hwmgr->not_vf && amdgpu_dpm) ||
- !hwmgr->hwmgr_func->get_asic_baco_state)
+ if (!hwmgr->pm_en || !hwmgr->hwmgr_func->get_asic_baco_state)
return 0;
mutex_lock(&hwmgr->smu_lock);
@@ -1470,7 +1470,8 @@ static int pp_set_asic_baco_state(void *handle, int state)
if (!hwmgr)
return -EINVAL;
- if (!hwmgr->pm_en || !hwmgr->hwmgr_func->set_asic_baco_state)
+ if (!(hwmgr->not_vf && amdgpu_dpm) ||
+ !hwmgr->hwmgr_func->set_asic_baco_state)
return 0;
mutex_lock(&hwmgr->smu_lock);
diff --git a/drivers/gpu/drm/amd/powerplay/amdgpu_smu.c b/drivers/gpu/drm/amd/powerplay/amdgpu_smu.c
index e8b27fab6aa1..e77046931e4c 100644
--- a/drivers/gpu/drm/amd/powerplay/amdgpu_smu.c
+++ b/drivers/gpu/drm/amd/powerplay/amdgpu_smu.c
@@ -1476,7 +1476,7 @@ static int smu_disable_dpm(struct smu_context *smu)
bool use_baco = !smu->is_apu &&
((adev->in_gpu_reset &&
(amdgpu_asic_reset_method(adev) == AMD_RESET_METHOD_BACO)) ||
- (adev->in_runpm && amdgpu_asic_supports_baco(adev)));
+ ((adev->in_runpm || adev->in_hibernate) && amdgpu_asic_supports_baco(adev)));
ret = smu_get_smc_version(smu, NULL, &smu_version);
if (ret) {
@@ -1744,12 +1744,12 @@ static int smu_enable_umd_pstate(void *handle,
if (*level & profile_mode_mask) {
smu_dpm_ctx->saved_dpm_level = smu_dpm_ctx->dpm_level;
smu_dpm_ctx->enable_umd_pstate = true;
- amdgpu_device_ip_set_clockgating_state(smu->adev,
- AMD_IP_BLOCK_TYPE_GFX,
- AMD_CG_STATE_UNGATE);
amdgpu_device_ip_set_powergating_state(smu->adev,
AMD_IP_BLOCK_TYPE_GFX,
AMD_PG_STATE_UNGATE);
+ amdgpu_device_ip_set_clockgating_state(smu->adev,
+ AMD_IP_BLOCK_TYPE_GFX,
+ AMD_CG_STATE_UNGATE);
}
} else {
/* exit umd pstate, restore level, enable gfx cg*/
diff --git a/drivers/gpu/drm/amd/powerplay/hwmgr/processpptables.c b/drivers/gpu/drm/amd/powerplay/hwmgr/processpptables.c
index 77c14671866c..719597c5d27d 100644
--- a/drivers/gpu/drm/amd/powerplay/hwmgr/processpptables.c
+++ b/drivers/gpu/drm/amd/powerplay/hwmgr/processpptables.c
@@ -984,6 +984,32 @@ static int init_thermal_controller(
struct pp_hwmgr *hwmgr,
const ATOM_PPLIB_POWERPLAYTABLE *powerplay_table)
{
+ hwmgr->thermal_controller.ucType =
+ powerplay_table->sThermalController.ucType;
+ hwmgr->thermal_controller.ucI2cLine =
+ powerplay_table->sThermalController.ucI2cLine;
+ hwmgr->thermal_controller.ucI2cAddress =
+ powerplay_table->sThermalController.ucI2cAddress;
+
+ hwmgr->thermal_controller.fanInfo.bNoFan =
+ (0 != (powerplay_table->sThermalController.ucFanParameters &
+ ATOM_PP_FANPARAMETERS_NOFAN));
+
+ hwmgr->thermal_controller.fanInfo.ucTachometerPulsesPerRevolution =
+ powerplay_table->sThermalController.ucFanParameters &
+ ATOM_PP_FANPARAMETERS_TACHOMETER_PULSES_PER_REVOLUTION_MASK;
+
+ hwmgr->thermal_controller.fanInfo.ulMinRPM
+ = powerplay_table->sThermalController.ucFanMinRPM * 100UL;
+ hwmgr->thermal_controller.fanInfo.ulMaxRPM
+ = powerplay_table->sThermalController.ucFanMaxRPM * 100UL;
+
+ set_hw_cap(hwmgr,
+ ATOM_PP_THERMALCONTROLLER_NONE != hwmgr->thermal_controller.ucType,
+ PHM_PlatformCaps_ThermalController);
+
+ hwmgr->thermal_controller.use_hw_fan_control = 1;
+
return 0;
}
diff --git a/drivers/gpu/drm/amd/powerplay/hwmgr/smu7_hwmgr.c b/drivers/gpu/drm/amd/powerplay/hwmgr/smu7_hwmgr.c
index 7740488999df..4795eb66b2b2 100644
--- a/drivers/gpu/drm/amd/powerplay/hwmgr/smu7_hwmgr.c
+++ b/drivers/gpu/drm/amd/powerplay/hwmgr/smu7_hwmgr.c
@@ -3804,9 +3804,12 @@ static int smu7_trim_single_dpm_states(struct pp_hwmgr *hwmgr,
{
uint32_t i;
+ /* force the trim if mclk_switching is disabled to prevent flicker */
+ bool force_trim = (low_limit == high_limit);
for (i = 0; i < dpm_table->count; i++) {
/*skip the trim if od is enabled*/
- if (!hwmgr->od_enabled && (dpm_table->dpm_levels[i].value < low_limit
+ if ((!hwmgr->od_enabled || force_trim)
+ && (dpm_table->dpm_levels[i].value < low_limit
|| dpm_table->dpm_levels[i].value > high_limit))
dpm_table->dpm_levels[i].enabled = false;
else
diff --git a/drivers/gpu/drm/amd/powerplay/renoir_ppt.c b/drivers/gpu/drm/amd/powerplay/renoir_ppt.c
index ff73a735b888..b0ed1b3fe79a 100644
--- a/drivers/gpu/drm/amd/powerplay/renoir_ppt.c
+++ b/drivers/gpu/drm/amd/powerplay/renoir_ppt.c
@@ -895,12 +895,17 @@ static int renoir_read_sensor(struct smu_context *smu,
static bool renoir_is_dpm_running(struct smu_context *smu)
{
+ struct amdgpu_device *adev = smu->adev;
+
/*
* Util now, the pmfw hasn't exported the interface of SMU
* feature mask to APU SKU so just force on all the feature
* at early initial stage.
*/
- return true;
+ if (adev->in_suspend)
+ return false;
+ else
+ return true;
}
diff --git a/drivers/gpu/drm/amd/powerplay/smu_v11_0.c b/drivers/gpu/drm/amd/powerplay/smu_v11_0.c
index 541c932a6005..655ba4fb05dc 100644
--- a/drivers/gpu/drm/amd/powerplay/smu_v11_0.c
+++ b/drivers/gpu/drm/amd/powerplay/smu_v11_0.c
@@ -1718,6 +1718,12 @@ int smu_v11_0_baco_set_state(struct smu_context *smu, enum smu_baco_state state)
if (ret)
goto out;
+ if (ras && ras->supported) {
+ ret = smu_send_smc_msg(smu, SMU_MSG_PrepareMp1ForUnload, NULL);
+ if (ret)
+ goto out;
+ }
+
/* clear vbios scratch 6 and 7 for coming asic reinit */
WREG32(adev->bios_scratch_reg_offset + 6, 0);
WREG32(adev->bios_scratch_reg_offset + 7, 0);
diff --git a/drivers/gpu/drm/drm_edid.c b/drivers/gpu/drm/drm_edid.c
index 4ede08a84e37..d96e3ce3e535 100644
--- a/drivers/gpu/drm/drm_edid.c
+++ b/drivers/gpu/drm/drm_edid.c
@@ -191,10 +191,11 @@ static const struct edid_quirk {
{ "HVR", 0xaa01, EDID_QUIRK_NON_DESKTOP },
{ "HVR", 0xaa02, EDID_QUIRK_NON_DESKTOP },
- /* Oculus Rift DK1, DK2, and CV1 VR Headsets */
+ /* Oculus Rift DK1, DK2, CV1 and Rift S VR Headsets */
{ "OVR", 0x0001, EDID_QUIRK_NON_DESKTOP },
{ "OVR", 0x0003, EDID_QUIRK_NON_DESKTOP },
{ "OVR", 0x0004, EDID_QUIRK_NON_DESKTOP },
+ { "OVR", 0x0012, EDID_QUIRK_NON_DESKTOP },
/* Windows Mixed Reality Headsets */
{ "ACR", 0x7fce, EDID_QUIRK_NON_DESKTOP },
diff --git a/drivers/gpu/drm/etnaviv/etnaviv_gem_submit.c b/drivers/gpu/drm/etnaviv/etnaviv_gem_submit.c
index 3b0afa156d92..54def341c1db 100644
--- a/drivers/gpu/drm/etnaviv/etnaviv_gem_submit.c
+++ b/drivers/gpu/drm/etnaviv/etnaviv_gem_submit.c
@@ -238,8 +238,10 @@ static int submit_pin_objects(struct etnaviv_gem_submit *submit)
}
if ((submit->flags & ETNA_SUBMIT_SOFTPIN) &&
- submit->bos[i].va != mapping->iova)
+ submit->bos[i].va != mapping->iova) {
+ etnaviv_gem_mapping_unreference(mapping);
return -EINVAL;
+ }
atomic_inc(&etnaviv_obj->gpu_active);
diff --git a/drivers/gpu/drm/etnaviv/etnaviv_perfmon.c b/drivers/gpu/drm/etnaviv/etnaviv_perfmon.c
index e6795bafcbb9..75f9db8f7bec 100644
--- a/drivers/gpu/drm/etnaviv/etnaviv_perfmon.c
+++ b/drivers/gpu/drm/etnaviv/etnaviv_perfmon.c
@@ -453,7 +453,7 @@ static const struct etnaviv_pm_domain *pm_domain(const struct etnaviv_gpu *gpu,
if (!(gpu->identity.features & meta->feature))
continue;
- if (meta->nr_domains < (index - offset)) {
+ if (index - offset >= meta->nr_domains) {
offset += meta->nr_domains;
continue;
}
diff --git a/drivers/gpu/drm/i915/display/intel_ddi.c b/drivers/gpu/drm/i915/display/intel_ddi.c
index 2c617c98db3a..52db7852827b 100644
--- a/drivers/gpu/drm/i915/display/intel_ddi.c
+++ b/drivers/gpu/drm/i915/display/intel_ddi.c
@@ -3141,9 +3141,6 @@ static void hsw_ddi_pre_enable_dp(struct intel_encoder *encoder,
intel_dp_set_link_params(intel_dp, crtc_state->port_clock,
crtc_state->lane_count, is_mst);
- intel_dp->regs.dp_tp_ctl = DP_TP_CTL(port);
- intel_dp->regs.dp_tp_status = DP_TP_STATUS(port);
-
intel_edp_panel_on(intel_dp);
intel_ddi_clk_select(encoder, crtc_state);
@@ -3848,12 +3845,18 @@ void intel_ddi_get_config(struct intel_encoder *encoder,
struct drm_i915_private *dev_priv = to_i915(encoder->base.dev);
struct intel_crtc *intel_crtc = to_intel_crtc(pipe_config->uapi.crtc);
enum transcoder cpu_transcoder = pipe_config->cpu_transcoder;
+ struct intel_dp *intel_dp = enc_to_intel_dp(encoder);
u32 temp, flags = 0;
/* XXX: DSI transcoder paranoia */
if (drm_WARN_ON(&dev_priv->drm, transcoder_is_dsi(cpu_transcoder)))
return;
+ if (INTEL_GEN(dev_priv) >= 12) {
+ intel_dp->regs.dp_tp_ctl = TGL_DP_TP_CTL(cpu_transcoder);
+ intel_dp->regs.dp_tp_status = TGL_DP_TP_STATUS(cpu_transcoder);
+ }
+
intel_dsc_get_config(encoder, pipe_config);
temp = intel_de_read(dev_priv, TRANS_DDI_FUNC_CTL(cpu_transcoder));
@@ -4173,6 +4176,7 @@ static const struct drm_encoder_funcs intel_ddi_funcs = {
static struct intel_connector *
intel_ddi_init_dp_connector(struct intel_digital_port *intel_dig_port)
{
+ struct drm_i915_private *dev_priv = to_i915(intel_dig_port->base.base.dev);
struct intel_connector *connector;
enum port port = intel_dig_port->base.port;
@@ -4183,6 +4187,10 @@ intel_ddi_init_dp_connector(struct intel_digital_port *intel_dig_port)
intel_dig_port->dp.output_reg = DDI_BUF_CTL(port);
intel_dig_port->dp.prepare_link_retrain =
intel_ddi_prepare_link_retrain;
+ if (INTEL_GEN(dev_priv) < 12) {
+ intel_dig_port->dp.regs.dp_tp_ctl = DP_TP_CTL(port);
+ intel_dig_port->dp.regs.dp_tp_status = DP_TP_STATUS(port);
+ }
if (!intel_dp_init_connector(intel_dig_port, connector)) {
kfree(connector);
diff --git a/drivers/gpu/drm/i915/display/intel_display_power.c b/drivers/gpu/drm/i915/display/intel_display_power.c
index 246e406bb385..84ecf8e58523 100644
--- a/drivers/gpu/drm/i915/display/intel_display_power.c
+++ b/drivers/gpu/drm/i915/display/intel_display_power.c
@@ -4140,7 +4140,7 @@ static const struct i915_power_well_desc tgl_power_wells[] = {
{
.name = "AUX D TBT1",
.domains = TGL_AUX_D_TBT1_IO_POWER_DOMAINS,
- .ops = &hsw_power_well_ops,
+ .ops = &icl_tc_phy_aux_power_well_ops,
.id = DISP_PW_ID_NONE,
{
.hsw.regs = &icl_aux_power_well_regs,
@@ -4151,7 +4151,7 @@ static const struct i915_power_well_desc tgl_power_wells[] = {
{
.name = "AUX E TBT2",
.domains = TGL_AUX_E_TBT2_IO_POWER_DOMAINS,
- .ops = &hsw_power_well_ops,
+ .ops = &icl_tc_phy_aux_power_well_ops,
.id = DISP_PW_ID_NONE,
{
.hsw.regs = &icl_aux_power_well_regs,
@@ -4162,7 +4162,7 @@ static const struct i915_power_well_desc tgl_power_wells[] = {
{
.name = "AUX F TBT3",
.domains = TGL_AUX_F_TBT3_IO_POWER_DOMAINS,
- .ops = &hsw_power_well_ops,
+ .ops = &icl_tc_phy_aux_power_well_ops,
.id = DISP_PW_ID_NONE,
{
.hsw.regs = &icl_aux_power_well_regs,
@@ -4173,7 +4173,7 @@ static const struct i915_power_well_desc tgl_power_wells[] = {
{
.name = "AUX G TBT4",
.domains = TGL_AUX_G_TBT4_IO_POWER_DOMAINS,
- .ops = &hsw_power_well_ops,
+ .ops = &icl_tc_phy_aux_power_well_ops,
.id = DISP_PW_ID_NONE,
{
.hsw.regs = &icl_aux_power_well_regs,
@@ -4184,7 +4184,7 @@ static const struct i915_power_well_desc tgl_power_wells[] = {
{
.name = "AUX H TBT5",
.domains = TGL_AUX_H_TBT5_IO_POWER_DOMAINS,
- .ops = &hsw_power_well_ops,
+ .ops = &icl_tc_phy_aux_power_well_ops,
.id = DISP_PW_ID_NONE,
{
.hsw.regs = &icl_aux_power_well_regs,
@@ -4195,7 +4195,7 @@ static const struct i915_power_well_desc tgl_power_wells[] = {
{
.name = "AUX I TBT6",
.domains = TGL_AUX_I_TBT6_IO_POWER_DOMAINS,
- .ops = &hsw_power_well_ops,
+ .ops = &icl_tc_phy_aux_power_well_ops,
.id = DISP_PW_ID_NONE,
{
.hsw.regs = &icl_aux_power_well_regs,
diff --git a/drivers/gpu/drm/i915/display/intel_dp.c b/drivers/gpu/drm/i915/display/intel_dp.c
index 804b1d966f66..a2fafd4499f2 100644
--- a/drivers/gpu/drm/i915/display/intel_dp.c
+++ b/drivers/gpu/drm/i915/display/intel_dp.c
@@ -2517,9 +2517,6 @@ static void intel_dp_prepare(struct intel_encoder *encoder,
intel_crtc_has_type(pipe_config,
INTEL_OUTPUT_DP_MST));
- intel_dp->regs.dp_tp_ctl = DP_TP_CTL(port);
- intel_dp->regs.dp_tp_status = DP_TP_STATUS(port);
-
/*
* There are four kinds of DP registers:
*
@@ -7836,6 +7833,8 @@ bool intel_dp_init(struct drm_i915_private *dev_priv,
intel_dig_port->dp.output_reg = output_reg;
intel_dig_port->max_lanes = 4;
+ intel_dig_port->dp.regs.dp_tp_ctl = DP_TP_CTL(port);
+ intel_dig_port->dp.regs.dp_tp_status = DP_TP_STATUS(port);
intel_encoder->type = INTEL_OUTPUT_DP;
intel_encoder->power_domain = intel_port_to_power_domain(port);
diff --git a/drivers/gpu/drm/i915/display/intel_dp_aux_backlight.c b/drivers/gpu/drm/i915/display/intel_dp_aux_backlight.c
index 3e706bb850a8..dbfa6895795b 100644
--- a/drivers/gpu/drm/i915/display/intel_dp_aux_backlight.c
+++ b/drivers/gpu/drm/i915/display/intel_dp_aux_backlight.c
@@ -342,6 +342,7 @@ int intel_dp_aux_init_backlight_funcs(struct intel_connector *intel_connector)
*/
if (dev_priv->vbt.backlight.type !=
INTEL_BACKLIGHT_VESA_EDP_AUX_INTERFACE &&
+ i915_modparams.enable_dpcd_backlight != 1 &&
!drm_dp_has_quirk(&intel_dp->desc, intel_dp->edid_quirks,
DP_QUIRK_FORCE_DPCD_BACKLIGHT)) {
DRM_DEV_INFO(dev->dev,
diff --git a/drivers/gpu/drm/i915/display/intel_fbc.c b/drivers/gpu/drm/i915/display/intel_fbc.c
index 2e5d835a9eaa..c125ca9ab9b3 100644
--- a/drivers/gpu/drm/i915/display/intel_fbc.c
+++ b/drivers/gpu/drm/i915/display/intel_fbc.c
@@ -485,8 +485,7 @@ static int intel_fbc_alloc_cfb(struct drm_i915_private *dev_priv,
if (!ret)
goto err_llb;
else if (ret > 1) {
- DRM_INFO("Reducing the compressed framebuffer size. This may lead to less power savings than a non-reduced-size. Try to increase stolen memory size if available in BIOS.\n");
-
+ DRM_INFO_ONCE("Reducing the compressed framebuffer size. This may lead to less power savings than a non-reduced-size. Try to increase stolen memory size if available in BIOS.\n");
}
fbc->threshold = ret;
diff --git a/drivers/gpu/drm/i915/display/intel_hdmi.c b/drivers/gpu/drm/i915/display/intel_hdmi.c
index 39930232b253..821411b93dac 100644
--- a/drivers/gpu/drm/i915/display/intel_hdmi.c
+++ b/drivers/gpu/drm/i915/display/intel_hdmi.c
@@ -1536,7 +1536,8 @@ bool intel_hdmi_hdcp_check_link(struct intel_digital_port *intel_dig_port)
intel_de_write(i915, HDCP_RPRIME(i915, cpu_transcoder, port), ri.reg);
/* Wait for Ri prime match */
- if (wait_for(intel_de_read(i915, HDCP_STATUS(i915, cpu_transcoder, port)) &
+ if (wait_for((intel_de_read(i915, HDCP_STATUS(i915, cpu_transcoder, port)) &
+ (HDCP_STATUS_RI_MATCH | HDCP_STATUS_ENC)) ==
(HDCP_STATUS_RI_MATCH | HDCP_STATUS_ENC), 1)) {
DRM_ERROR("Ri' mismatch detected, link check failed (%x)\n",
intel_de_read(i915, HDCP_STATUS(i915, cpu_transcoder, port)));
diff --git a/drivers/gpu/drm/i915/display/intel_sprite.c b/drivers/gpu/drm/i915/display/intel_sprite.c
index deda351719db..33d886141138 100644
--- a/drivers/gpu/drm/i915/display/intel_sprite.c
+++ b/drivers/gpu/drm/i915/display/intel_sprite.c
@@ -2817,19 +2817,25 @@ static bool skl_plane_format_mod_supported(struct drm_plane *_plane,
}
}
-static bool gen12_plane_supports_mc_ccs(enum plane_id plane_id)
+static bool gen12_plane_supports_mc_ccs(struct drm_i915_private *dev_priv,
+ enum plane_id plane_id)
{
+ /* Wa_14010477008:tgl[a0..c0] */
+ if (IS_TGL_REVID(dev_priv, TGL_REVID_A0, TGL_REVID_C0))
+ return false;
+
return plane_id < PLANE_SPRITE4;
}
static bool gen12_plane_format_mod_supported(struct drm_plane *_plane,
u32 format, u64 modifier)
{
+ struct drm_i915_private *dev_priv = to_i915(_plane->dev);
struct intel_plane *plane = to_intel_plane(_plane);
switch (modifier) {
case I915_FORMAT_MOD_Y_TILED_GEN12_MC_CCS:
- if (!gen12_plane_supports_mc_ccs(plane->id))
+ if (!gen12_plane_supports_mc_ccs(dev_priv, plane->id))
return false;
/* fall through */
case DRM_FORMAT_MOD_LINEAR:
@@ -2998,9 +3004,10 @@ static const u32 *icl_get_plane_formats(struct drm_i915_private *dev_priv,
}
}
-static const u64 *gen12_get_plane_modifiers(enum plane_id plane_id)
+static const u64 *gen12_get_plane_modifiers(struct drm_i915_private *dev_priv,
+ enum plane_id plane_id)
{
- if (gen12_plane_supports_mc_ccs(plane_id))
+ if (gen12_plane_supports_mc_ccs(dev_priv, plane_id))
return gen12_plane_format_modifiers_mc_ccs;
else
return gen12_plane_format_modifiers_rc_ccs;
@@ -3070,7 +3077,7 @@ skl_universal_plane_create(struct drm_i915_private *dev_priv,
plane->has_ccs = skl_plane_has_ccs(dev_priv, pipe, plane_id);
if (INTEL_GEN(dev_priv) >= 12) {
- modifiers = gen12_get_plane_modifiers(plane_id);
+ modifiers = gen12_get_plane_modifiers(dev_priv, plane_id);
plane_funcs = &gen12_plane_funcs;
} else {
if (plane->has_ccs)
diff --git a/drivers/gpu/drm/i915/gem/i915_gem_domain.c b/drivers/gpu/drm/i915/gem/i915_gem_domain.c
index 0cc40e77bbd2..4f96c8788a2e 100644
--- a/drivers/gpu/drm/i915/gem/i915_gem_domain.c
+++ b/drivers/gpu/drm/i915/gem/i915_gem_domain.c
@@ -368,7 +368,6 @@ static void i915_gem_object_bump_inactive_ggtt(struct drm_i915_gem_object *obj)
struct drm_i915_private *i915 = to_i915(obj->base.dev);
struct i915_vma *vma;
- GEM_BUG_ON(!i915_gem_object_has_pinned_pages(obj));
if (!atomic_read(&obj->bind_count))
return;
@@ -400,12 +399,8 @@ static void i915_gem_object_bump_inactive_ggtt(struct drm_i915_gem_object *obj)
void
i915_gem_object_unpin_from_display_plane(struct i915_vma *vma)
{
- struct drm_i915_gem_object *obj = vma->obj;
-
- assert_object_held(obj);
-
/* Bump the LRU to try and avoid premature eviction whilst flipping */
- i915_gem_object_bump_inactive_ggtt(obj);
+ i915_gem_object_bump_inactive_ggtt(vma->obj);
i915_vma_unpin(vma);
}
diff --git a/drivers/gpu/drm/i915/gem/i915_gem_tiling.c b/drivers/gpu/drm/i915/gem/i915_gem_tiling.c
index 37f77aee1212..0158e49bf9bb 100644
--- a/drivers/gpu/drm/i915/gem/i915_gem_tiling.c
+++ b/drivers/gpu/drm/i915/gem/i915_gem_tiling.c
@@ -182,21 +182,35 @@ i915_gem_object_fence_prepare(struct drm_i915_gem_object *obj,
int tiling_mode, unsigned int stride)
{
struct i915_ggtt *ggtt = &to_i915(obj->base.dev)->ggtt;
- struct i915_vma *vma;
+ struct i915_vma *vma, *vn;
+ LIST_HEAD(unbind);
int ret = 0;
if (tiling_mode == I915_TILING_NONE)
return 0;
mutex_lock(&ggtt->vm.mutex);
+
+ spin_lock(&obj->vma.lock);
for_each_ggtt_vma(vma, obj) {
+ GEM_BUG_ON(vma->vm != &ggtt->vm);
+
if (i915_vma_fence_prepare(vma, tiling_mode, stride))
continue;
+ list_move(&vma->vm_link, &unbind);
+ }
+ spin_unlock(&obj->vma.lock);
+
+ list_for_each_entry_safe(vma, vn, &unbind, vm_link) {
ret = __i915_vma_unbind(vma);
- if (ret)
+ if (ret) {
+ /* Restore the remaining vma on an error */
+ list_splice(&unbind, &ggtt->vm.bound_list);
break;
+ }
}
+
mutex_unlock(&ggtt->vm.mutex);
return ret;
@@ -268,6 +282,7 @@ i915_gem_object_set_tiling(struct drm_i915_gem_object *obj,
}
mutex_unlock(&obj->mm.lock);
+ spin_lock(&obj->vma.lock);
for_each_ggtt_vma(vma, obj) {
vma->fence_size =
i915_gem_fence_size(i915, vma->size, tiling, stride);
@@ -278,6 +293,7 @@ i915_gem_object_set_tiling(struct drm_i915_gem_object *obj,
if (vma->fence)
vma->fence->dirty = true;
}
+ spin_unlock(&obj->vma.lock);
obj->tiling_and_stride = tiling | stride;
i915_gem_object_unlock(obj);
diff --git a/drivers/gpu/drm/i915/gem/selftests/huge_pages.c b/drivers/gpu/drm/i915/gem/selftests/huge_pages.c
index 2d0fd50c5312..d4f94ca9ae0d 100644
--- a/drivers/gpu/drm/i915/gem/selftests/huge_pages.c
+++ b/drivers/gpu/drm/i915/gem/selftests/huge_pages.c
@@ -1477,8 +1477,10 @@ static int igt_ppgtt_pin_update(void *arg)
unsigned int page_size = BIT(first);
obj = i915_gem_object_create_internal(dev_priv, page_size);
- if (IS_ERR(obj))
- return PTR_ERR(obj);
+ if (IS_ERR(obj)) {
+ err = PTR_ERR(obj);
+ goto out_vm;
+ }
vma = i915_vma_instance(obj, vm, NULL);
if (IS_ERR(vma)) {
@@ -1531,8 +1533,10 @@ static int igt_ppgtt_pin_update(void *arg)
}
obj = i915_gem_object_create_internal(dev_priv, PAGE_SIZE);
- if (IS_ERR(obj))
- return PTR_ERR(obj);
+ if (IS_ERR(obj)) {
+ err = PTR_ERR(obj);
+ goto out_vm;
+ }
vma = i915_vma_instance(obj, vm, NULL);
if (IS_ERR(vma)) {
diff --git a/drivers/gpu/drm/i915/gt/intel_context_types.h b/drivers/gpu/drm/i915/gt/intel_context_types.h
index 07cb83a0d017..ca0d4f4f3615 100644
--- a/drivers/gpu/drm/i915/gt/intel_context_types.h
+++ b/drivers/gpu/drm/i915/gt/intel_context_types.h
@@ -69,7 +69,13 @@ struct intel_context {
#define CONTEXT_NOPREEMPT 7
u32 *lrc_reg_state;
- u64 lrc_desc;
+ union {
+ struct {
+ u32 lrca;
+ u32 ccid;
+ };
+ u64 desc;
+ } lrc;
u32 tag; /* cookie passed to HW to track this context on submission */
/* Time on GPU as tracked by the hw. */
diff --git a/drivers/gpu/drm/i915/gt/intel_engine.h b/drivers/gpu/drm/i915/gt/intel_engine.h
index b469de0dd9b6..a1aa0d3e8be1 100644
--- a/drivers/gpu/drm/i915/gt/intel_engine.h
+++ b/drivers/gpu/drm/i915/gt/intel_engine.h
@@ -333,13 +333,4 @@ intel_engine_has_preempt_reset(const struct intel_engine_cs *engine)
return intel_engine_has_preemption(engine);
}
-static inline bool
-intel_engine_has_timeslices(const struct intel_engine_cs *engine)
-{
- if (!IS_ACTIVE(CONFIG_DRM_I915_TIMESLICE_DURATION))
- return false;
-
- return intel_engine_has_semaphores(engine);
-}
-
#endif /* _INTEL_RINGBUFFER_H_ */
diff --git a/drivers/gpu/drm/i915/gt/intel_engine_cs.c b/drivers/gpu/drm/i915/gt/intel_engine_cs.c
index 3aa8a652c16d..883a9b7fe88d 100644
--- a/drivers/gpu/drm/i915/gt/intel_engine_cs.c
+++ b/drivers/gpu/drm/i915/gt/intel_engine_cs.c
@@ -1295,6 +1295,12 @@ static void intel_engine_print_registers(struct intel_engine_cs *engine,
if (engine->id == RENDER_CLASS && IS_GEN_RANGE(dev_priv, 4, 7))
drm_printf(m, "\tCCID: 0x%08x\n", ENGINE_READ(engine, CCID));
+ if (HAS_EXECLISTS(dev_priv)) {
+ drm_printf(m, "\tEL_STAT_HI: 0x%08x\n",
+ ENGINE_READ(engine, RING_EXECLIST_STATUS_HI));
+ drm_printf(m, "\tEL_STAT_LO: 0x%08x\n",
+ ENGINE_READ(engine, RING_EXECLIST_STATUS_LO));
+ }
drm_printf(m, "\tRING_START: 0x%08x\n",
ENGINE_READ(engine, RING_START));
drm_printf(m, "\tRING_HEAD: 0x%08x\n",
diff --git a/drivers/gpu/drm/i915/gt/intel_engine_types.h b/drivers/gpu/drm/i915/gt/intel_engine_types.h
index 80cdde712842..0be674ae1cf6 100644
--- a/drivers/gpu/drm/i915/gt/intel_engine_types.h
+++ b/drivers/gpu/drm/i915/gt/intel_engine_types.h
@@ -157,6 +157,20 @@ struct intel_engine_execlists {
struct i915_priolist default_priolist;
/**
+ * @ccid: identifier for contexts submitted to this engine
+ */
+ u32 ccid;
+
+ /**
+ * @yield: CCID at the time of the last semaphore-wait interrupt.
+ *
+ * Instead of leaving a semaphore busy-spinning on an engine, we would
+ * like to switch to another ready context, i.e. yielding the semaphore
+ * timeslice.
+ */
+ u32 yield;
+
+ /**
* @error_interrupt: CS Master EIR
*
* The CS generates an interrupt when it detects an error. We capture
@@ -295,8 +309,7 @@ struct intel_engine_cs {
u32 context_size;
u32 mmio_base;
- unsigned int context_tag;
-#define NUM_CONTEXT_TAG roundup_pow_of_two(2 * EXECLIST_MAX_PORTS)
+ unsigned long context_tag;
struct rb_node uabi_node;
@@ -483,10 +496,11 @@ struct intel_engine_cs {
#define I915_ENGINE_SUPPORTS_STATS BIT(1)
#define I915_ENGINE_HAS_PREEMPTION BIT(2)
#define I915_ENGINE_HAS_SEMAPHORES BIT(3)
-#define I915_ENGINE_NEEDS_BREADCRUMB_TASKLET BIT(4)
-#define I915_ENGINE_IS_VIRTUAL BIT(5)
-#define I915_ENGINE_HAS_RELATIVE_MMIO BIT(6)
-#define I915_ENGINE_REQUIRES_CMD_PARSER BIT(7)
+#define I915_ENGINE_HAS_TIMESLICES BIT(4)
+#define I915_ENGINE_NEEDS_BREADCRUMB_TASKLET BIT(5)
+#define I915_ENGINE_IS_VIRTUAL BIT(6)
+#define I915_ENGINE_HAS_RELATIVE_MMIO BIT(7)
+#define I915_ENGINE_REQUIRES_CMD_PARSER BIT(8)
unsigned int flags;
/*
@@ -585,6 +599,15 @@ intel_engine_has_semaphores(const struct intel_engine_cs *engine)
}
static inline bool
+intel_engine_has_timeslices(const struct intel_engine_cs *engine)
+{
+ if (!IS_ACTIVE(CONFIG_DRM_I915_TIMESLICE_DURATION))
+ return false;
+
+ return engine->flags & I915_ENGINE_HAS_TIMESLICES;
+}
+
+static inline bool
intel_engine_needs_breadcrumb_tasklet(const struct intel_engine_cs *engine)
{
return engine->flags & I915_ENGINE_NEEDS_BREADCRUMB_TASKLET;
diff --git a/drivers/gpu/drm/i915/gt/intel_gt_irq.c b/drivers/gpu/drm/i915/gt/intel_gt_irq.c
index f0e7fd95165a..0cc7dd54f4f9 100644
--- a/drivers/gpu/drm/i915/gt/intel_gt_irq.c
+++ b/drivers/gpu/drm/i915/gt/intel_gt_irq.c
@@ -39,6 +39,15 @@ cs_irq_handler(struct intel_engine_cs *engine, u32 iir)
}
}
+ if (iir & GT_WAIT_SEMAPHORE_INTERRUPT) {
+ WRITE_ONCE(engine->execlists.yield,
+ ENGINE_READ_FW(engine, RING_EXECLIST_STATUS_HI));
+ ENGINE_TRACE(engine, "semaphore yield: %08x\n",
+ engine->execlists.yield);
+ if (del_timer(&engine->execlists.timer))
+ tasklet = true;
+ }
+
if (iir & GT_CONTEXT_SWITCH_INTERRUPT)
tasklet = true;
@@ -228,7 +237,8 @@ void gen11_gt_irq_postinstall(struct intel_gt *gt)
const u32 irqs =
GT_CS_MASTER_ERROR_INTERRUPT |
GT_RENDER_USER_INTERRUPT |
- GT_CONTEXT_SWITCH_INTERRUPT;
+ GT_CONTEXT_SWITCH_INTERRUPT |
+ GT_WAIT_SEMAPHORE_INTERRUPT;
struct intel_uncore *uncore = gt->uncore;
const u32 dmask = irqs << 16 | irqs;
const u32 smask = irqs << 16;
@@ -366,7 +376,8 @@ void gen8_gt_irq_postinstall(struct intel_gt *gt)
const u32 irqs =
GT_CS_MASTER_ERROR_INTERRUPT |
GT_RENDER_USER_INTERRUPT |
- GT_CONTEXT_SWITCH_INTERRUPT;
+ GT_CONTEXT_SWITCH_INTERRUPT |
+ GT_WAIT_SEMAPHORE_INTERRUPT;
const u32 gt_interrupts[] = {
irqs << GEN8_RCS_IRQ_SHIFT | irqs << GEN8_BCS_IRQ_SHIFT,
irqs << GEN8_VCS0_IRQ_SHIFT | irqs << GEN8_VCS1_IRQ_SHIFT,
diff --git a/drivers/gpu/drm/i915/gt/intel_lrc.c b/drivers/gpu/drm/i915/gt/intel_lrc.c
index 683014e7bc51..7c369737d1cf 100644
--- a/drivers/gpu/drm/i915/gt/intel_lrc.c
+++ b/drivers/gpu/drm/i915/gt/intel_lrc.c
@@ -456,10 +456,10 @@ assert_priority_queue(const struct i915_request *prev,
* engine info, SW context ID and SW counter need to form a unique number
* (Context ID) per lrc.
*/
-static u64
+static u32
lrc_descriptor(struct intel_context *ce, struct intel_engine_cs *engine)
{
- u64 desc;
+ u32 desc;
desc = INTEL_LEGACY_32B_CONTEXT;
if (i915_vm_is_4lvl(ce->vm))
@@ -470,21 +470,7 @@ lrc_descriptor(struct intel_context *ce, struct intel_engine_cs *engine)
if (IS_GEN(engine->i915, 8))
desc |= GEN8_CTX_L3LLC_COHERENT;
- desc |= i915_ggtt_offset(ce->state); /* bits 12-31 */
- /*
- * The following 32bits are copied into the OA reports (dword 2).
- * Consider updating oa_get_render_ctx_id in i915_perf.c when changing
- * anything below.
- */
- if (INTEL_GEN(engine->i915) >= 11) {
- desc |= (u64)engine->instance << GEN11_ENGINE_INSTANCE_SHIFT;
- /* bits 48-53 */
-
- desc |= (u64)engine->class << GEN11_ENGINE_CLASS_SHIFT;
- /* bits 61-63 */
- }
-
- return desc;
+ return i915_ggtt_offset(ce->state) | desc;
}
static inline unsigned int dword_in_page(void *addr)
@@ -1192,7 +1178,7 @@ static void reset_active(struct i915_request *rq,
__execlists_update_reg_state(ce, engine, head);
/* We've switched away, so this should be a no-op, but intent matters */
- ce->lrc_desc |= CTX_DESC_FORCE_RESTORE;
+ ce->lrc.desc |= CTX_DESC_FORCE_RESTORE;
}
static u32 intel_context_get_runtime(const struct intel_context *ce)
@@ -1251,18 +1237,23 @@ __execlists_schedule_in(struct i915_request *rq)
if (IS_ENABLED(CONFIG_DRM_I915_DEBUG_GEM))
execlists_check_context(ce, engine);
- ce->lrc_desc &= ~GENMASK_ULL(47, 37);
if (ce->tag) {
/* Use a fixed tag for OA and friends */
- ce->lrc_desc |= (u64)ce->tag << 32;
+ GEM_BUG_ON(ce->tag <= BITS_PER_LONG);
+ ce->lrc.ccid = ce->tag;
} else {
/* We don't need a strict matching tag, just different values */
- ce->lrc_desc |=
- (u64)(++engine->context_tag % NUM_CONTEXT_TAG) <<
- GEN11_SW_CTX_ID_SHIFT;
- BUILD_BUG_ON(NUM_CONTEXT_TAG > GEN12_MAX_CONTEXT_HW_ID);
+ unsigned int tag = ffs(engine->context_tag);
+
+ GEM_BUG_ON(tag == 0 || tag >= BITS_PER_LONG);
+ clear_bit(tag - 1, &engine->context_tag);
+ ce->lrc.ccid = tag << (GEN11_SW_CTX_ID_SHIFT - 32);
+
+ BUILD_BUG_ON(BITS_PER_LONG > GEN12_MAX_CONTEXT_HW_ID);
}
+ ce->lrc.ccid |= engine->execlists.ccid;
+
__intel_gt_pm_get(engine->gt);
execlists_context_status_change(rq, INTEL_CONTEXT_SCHEDULE_IN);
intel_engine_context_in(engine);
@@ -1302,7 +1293,8 @@ static void kick_siblings(struct i915_request *rq, struct intel_context *ce)
static inline void
__execlists_schedule_out(struct i915_request *rq,
- struct intel_engine_cs * const engine)
+ struct intel_engine_cs * const engine,
+ unsigned int ccid)
{
struct intel_context * const ce = rq->context;
@@ -1320,6 +1312,14 @@ __execlists_schedule_out(struct i915_request *rq,
i915_request_completed(rq))
intel_engine_add_retire(engine, ce->timeline);
+ ccid >>= GEN11_SW_CTX_ID_SHIFT - 32;
+ ccid &= GEN12_MAX_CONTEXT_HW_ID;
+ if (ccid < BITS_PER_LONG) {
+ GEM_BUG_ON(ccid == 0);
+ GEM_BUG_ON(test_bit(ccid - 1, &engine->context_tag));
+ set_bit(ccid - 1, &engine->context_tag);
+ }
+
intel_context_update_runtime(ce);
intel_engine_context_out(engine);
execlists_context_status_change(rq, INTEL_CONTEXT_SCHEDULE_OUT);
@@ -1345,15 +1345,17 @@ execlists_schedule_out(struct i915_request *rq)
{
struct intel_context * const ce = rq->context;
struct intel_engine_cs *cur, *old;
+ u32 ccid;
trace_i915_request_out(rq);
+ ccid = rq->context->lrc.ccid;
old = READ_ONCE(ce->inflight);
do
cur = ptr_unmask_bits(old, 2) ? ptr_dec(old) : NULL;
while (!try_cmpxchg(&ce->inflight, &old, cur));
if (!cur)
- __execlists_schedule_out(rq, old);
+ __execlists_schedule_out(rq, old, ccid);
i915_request_put(rq);
}
@@ -1361,7 +1363,7 @@ execlists_schedule_out(struct i915_request *rq)
static u64 execlists_update_context(struct i915_request *rq)
{
struct intel_context *ce = rq->context;
- u64 desc = ce->lrc_desc;
+ u64 desc = ce->lrc.desc;
u32 tail, prev;
/*
@@ -1400,7 +1402,7 @@ static u64 execlists_update_context(struct i915_request *rq)
*/
wmb();
- ce->lrc_desc &= ~CTX_DESC_FORCE_RESTORE;
+ ce->lrc.desc &= ~CTX_DESC_FORCE_RESTORE;
return desc;
}
@@ -1719,6 +1721,9 @@ static void defer_request(struct i915_request *rq, struct list_head * const pl)
struct i915_request *w =
container_of(p->waiter, typeof(*w), sched);
+ if (p->flags & I915_DEPENDENCY_WEAK)
+ continue;
+
/* Leave semaphores spinning on the other engines */
if (w->engine != rq->engine)
continue;
@@ -1754,7 +1759,9 @@ static void defer_active(struct intel_engine_cs *engine)
}
static bool
-need_timeslice(struct intel_engine_cs *engine, const struct i915_request *rq)
+need_timeslice(const struct intel_engine_cs *engine,
+ const struct i915_request *rq,
+ const struct rb_node *rb)
{
int hint;
@@ -1762,12 +1769,57 @@ need_timeslice(struct intel_engine_cs *engine, const struct i915_request *rq)
return false;
hint = engine->execlists.queue_priority_hint;
+
+ if (rb) {
+ const struct virtual_engine *ve =
+ rb_entry(rb, typeof(*ve), nodes[engine->id].rb);
+ const struct intel_engine_cs *inflight =
+ intel_context_inflight(&ve->context);
+
+ if (!inflight || inflight == engine) {
+ struct i915_request *next;
+
+ rcu_read_lock();
+ next = READ_ONCE(ve->request);
+ if (next)
+ hint = max(hint, rq_prio(next));
+ rcu_read_unlock();
+ }
+ }
+
if (!list_is_last(&rq->sched.link, &engine->active.requests))
hint = max(hint, rq_prio(list_next_entry(rq, sched.link)));
+ GEM_BUG_ON(hint >= I915_PRIORITY_UNPREEMPTABLE);
return hint >= effective_prio(rq);
}
+static bool
+timeslice_yield(const struct intel_engine_execlists *el,
+ const struct i915_request *rq)
+{
+ /*
+ * Once bitten, forever smitten!
+ *
+ * If the active context ever busy-waited on a semaphore,
+ * it will be treated as a hog until the end of its timeslice (i.e.
+ * until it is scheduled out and replaced by a new submission,
+ * possibly even its own lite-restore). The HW only sends an interrupt
+ * on the first miss, and we do know if that semaphore has been
+ * signaled, or even if it is now stuck on another semaphore. Play
+ * safe, yield if it might be stuck -- it will be given a fresh
+ * timeslice in the near future.
+ */
+ return rq->context->lrc.ccid == READ_ONCE(el->yield);
+}
+
+static bool
+timeslice_expired(const struct intel_engine_execlists *el,
+ const struct i915_request *rq)
+{
+ return timer_expired(&el->timer) || timeslice_yield(el, rq);
+}
+
static int
switch_prio(struct intel_engine_cs *engine, const struct i915_request *rq)
{
@@ -1783,8 +1835,7 @@ timeslice(const struct intel_engine_cs *engine)
return READ_ONCE(engine->props.timeslice_duration_ms);
}
-static unsigned long
-active_timeslice(const struct intel_engine_cs *engine)
+static unsigned long active_timeslice(const struct intel_engine_cs *engine)
{
const struct intel_engine_execlists *execlists = &engine->execlists;
const struct i915_request *rq = *execlists->active;
@@ -1806,10 +1857,9 @@ static void set_timeslice(struct intel_engine_cs *engine)
set_timer_ms(&engine->execlists.timer, active_timeslice(engine));
}
-static void start_timeslice(struct intel_engine_cs *engine)
+static void start_timeslice(struct intel_engine_cs *engine, int prio)
{
struct intel_engine_execlists *execlists = &engine->execlists;
- int prio = queue_prio(execlists);
WRITE_ONCE(execlists->switch_priority_hint, prio);
if (prio == INT_MIN)
@@ -1945,14 +1995,15 @@ static void execlists_dequeue(struct intel_engine_cs *engine)
__unwind_incomplete_requests(engine);
last = NULL;
- } else if (need_timeslice(engine, last) &&
- timer_expired(&engine->execlists.timer)) {
+ } else if (need_timeslice(engine, last, rb) &&
+ timeslice_expired(execlists, last)) {
ENGINE_TRACE(engine,
- "expired last=%llx:%lld, prio=%d, hint=%d\n",
+ "expired last=%llx:%lld, prio=%d, hint=%d, yield?=%s\n",
last->fence.context,
last->fence.seqno,
last->sched.attr.priority,
- execlists->queue_priority_hint);
+ execlists->queue_priority_hint,
+ yesno(timeslice_yield(execlists, last)));
ring_set_paused(engine, 1);
defer_active(engine);
@@ -1987,7 +2038,7 @@ static void execlists_dequeue(struct intel_engine_cs *engine)
* Even if ELSP[1] is occupied and not worthy
* of timeslices, our queue might be.
*/
- start_timeslice(engine);
+ start_timeslice(engine, queue_prio(execlists));
return;
}
}
@@ -2022,7 +2073,7 @@ static void execlists_dequeue(struct intel_engine_cs *engine)
if (last && !can_merge_rq(last, rq)) {
spin_unlock(&ve->base.active.lock);
- start_timeslice(engine);
+ start_timeslice(engine, rq_prio(rq));
return; /* leave this for another sibling */
}
@@ -2213,6 +2264,7 @@ done:
}
clear_ports(port + 1, last_port - port);
+ WRITE_ONCE(execlists->yield, -1);
execlists_submit_ports(engine);
set_preempt_timeout(engine, *active);
} else {
@@ -3043,7 +3095,7 @@ __execlists_context_pin(struct intel_context *ce,
if (IS_ERR(vaddr))
return PTR_ERR(vaddr);
- ce->lrc_desc = lrc_descriptor(ce, engine) | CTX_DESC_FORCE_RESTORE;
+ ce->lrc.lrca = lrc_descriptor(ce, engine) | CTX_DESC_FORCE_RESTORE;
ce->lrc_reg_state = vaddr + LRC_STATE_PN * PAGE_SIZE;
__execlists_update_reg_state(ce, engine, ce->ring->tail);
@@ -3072,7 +3124,7 @@ static void execlists_context_reset(struct intel_context *ce)
ce, ce->engine, ce->ring, true);
__execlists_update_reg_state(ce, ce->engine, ce->ring->tail);
- ce->lrc_desc |= CTX_DESC_FORCE_RESTORE;
+ ce->lrc.desc |= CTX_DESC_FORCE_RESTORE;
}
static const struct intel_context_ops execlists_context_ops = {
@@ -3541,7 +3593,7 @@ static void enable_execlists(struct intel_engine_cs *engine)
enable_error_interrupt(engine);
- engine->context_tag = 0;
+ engine->context_tag = GENMASK(BITS_PER_LONG - 2, 0);
}
static bool unexpected_starting_state(struct intel_engine_cs *engine)
@@ -3753,7 +3805,7 @@ out_replay:
head, ce->ring->tail);
__execlists_reset_reg_state(ce, engine);
__execlists_update_reg_state(ce, engine, head);
- ce->lrc_desc |= CTX_DESC_FORCE_RESTORE; /* paranoid: GPU was reset! */
+ ce->lrc.desc |= CTX_DESC_FORCE_RESTORE; /* paranoid: GPU was reset! */
unwind:
/* Push back any incomplete requests for replay after the reset. */
@@ -4369,8 +4421,11 @@ void intel_execlists_set_default_submission(struct intel_engine_cs *engine)
engine->flags |= I915_ENGINE_SUPPORTS_STATS;
if (!intel_vgpu_active(engine->i915)) {
engine->flags |= I915_ENGINE_HAS_SEMAPHORES;
- if (HAS_LOGICAL_RING_PREEMPTION(engine->i915))
+ if (HAS_LOGICAL_RING_PREEMPTION(engine->i915)) {
engine->flags |= I915_ENGINE_HAS_PREEMPTION;
+ if (IS_ACTIVE(CONFIG_DRM_I915_TIMESLICE_DURATION))
+ engine->flags |= I915_ENGINE_HAS_TIMESLICES;
+ }
}
if (INTEL_GEN(engine->i915) >= 12)
@@ -4449,6 +4504,7 @@ logical_ring_default_irqs(struct intel_engine_cs *engine)
engine->irq_enable_mask = GT_RENDER_USER_INTERRUPT << shift;
engine->irq_keep_mask = GT_CONTEXT_SWITCH_INTERRUPT << shift;
engine->irq_keep_mask |= GT_CS_MASTER_ERROR_INTERRUPT << shift;
+ engine->irq_keep_mask |= GT_WAIT_SEMAPHORE_INTERRUPT << shift;
}
static void rcs_submission_override(struct intel_engine_cs *engine)
@@ -4516,6 +4572,11 @@ int intel_execlists_submission_setup(struct intel_engine_cs *engine)
else
execlists->csb_size = GEN11_CSB_ENTRIES;
+ if (INTEL_GEN(engine->i915) >= 11) {
+ execlists->ccid |= engine->instance << (GEN11_ENGINE_INSTANCE_SHIFT - 32);
+ execlists->ccid |= engine->class << (GEN11_ENGINE_CLASS_SHIFT - 32);
+ }
+
reset_csb_pointers(engine);
/* Finally, take ownership and responsibility for cleanup! */
diff --git a/drivers/gpu/drm/i915/gt/intel_rps.c b/drivers/gpu/drm/i915/gt/intel_rps.c
index cfaf141bac4d..19542fd9e207 100644
--- a/drivers/gpu/drm/i915/gt/intel_rps.c
+++ b/drivers/gpu/drm/i915/gt/intel_rps.c
@@ -81,13 +81,14 @@ static void rps_enable_interrupts(struct intel_rps *rps)
events = (GEN6_PM_RP_UP_THRESHOLD |
GEN6_PM_RP_DOWN_THRESHOLD |
GEN6_PM_RP_DOWN_TIMEOUT);
-
WRITE_ONCE(rps->pm_events, events);
+
spin_lock_irq(&gt->irq_lock);
gen6_gt_pm_enable_irq(gt, rps->pm_events);
spin_unlock_irq(&gt->irq_lock);
- set(gt->uncore, GEN6_PMINTRMSK, rps_pm_mask(rps, rps->cur_freq));
+ intel_uncore_write(gt->uncore,
+ GEN6_PMINTRMSK, rps_pm_mask(rps, rps->last_freq));
}
static void gen6_rps_reset_interrupts(struct intel_rps *rps)
@@ -120,7 +121,9 @@ static void rps_disable_interrupts(struct intel_rps *rps)
struct intel_gt *gt = rps_to_gt(rps);
WRITE_ONCE(rps->pm_events, 0);
- set(gt->uncore, GEN6_PMINTRMSK, rps_pm_sanitize_mask(rps, ~0u));
+
+ intel_uncore_write(gt->uncore,
+ GEN6_PMINTRMSK, rps_pm_sanitize_mask(rps, ~0u));
spin_lock_irq(&gt->irq_lock);
gen6_gt_pm_disable_irq(gt, GEN6_PM_RPS_EVENTS);
diff --git a/drivers/gpu/drm/i915/gt/intel_timeline.c b/drivers/gpu/drm/i915/gt/intel_timeline.c
index 91debbc97c9a..08b56d7ab4f4 100644
--- a/drivers/gpu/drm/i915/gt/intel_timeline.c
+++ b/drivers/gpu/drm/i915/gt/intel_timeline.c
@@ -521,6 +521,8 @@ int intel_timeline_read_hwsp(struct i915_request *from,
rcu_read_lock();
cl = rcu_dereference(from->hwsp_cacheline);
+ if (i915_request_completed(from)) /* confirm cacheline is valid */
+ goto unlock;
if (unlikely(!i915_active_acquire_if_busy(&cl->active)))
goto unlock; /* seqno wrapped and completed! */
if (unlikely(i915_request_completed(from)))
diff --git a/drivers/gpu/drm/i915/gt/selftest_lrc.c b/drivers/gpu/drm/i915/gt/selftest_lrc.c
index 6f06ba750a0a..904193960584 100644
--- a/drivers/gpu/drm/i915/gt/selftest_lrc.c
+++ b/drivers/gpu/drm/i915/gt/selftest_lrc.c
@@ -811,7 +811,7 @@ slice_semaphore_queue(struct intel_engine_cs *outer,
}
}
- err = release_queue(outer, vma, n, INT_MAX);
+ err = release_queue(outer, vma, n, I915_PRIORITY_BARRIER);
if (err)
goto out;
@@ -929,7 +929,7 @@ create_rewinder(struct intel_context *ce,
goto err;
}
- cs = intel_ring_begin(rq, 10);
+ cs = intel_ring_begin(rq, 14);
if (IS_ERR(cs)) {
err = PTR_ERR(cs);
goto err;
@@ -941,8 +941,8 @@ create_rewinder(struct intel_context *ce,
*cs++ = MI_SEMAPHORE_WAIT |
MI_SEMAPHORE_GLOBAL_GTT |
MI_SEMAPHORE_POLL |
- MI_SEMAPHORE_SAD_NEQ_SDD;
- *cs++ = 0;
+ MI_SEMAPHORE_SAD_GTE_SDD;
+ *cs++ = idx;
*cs++ = offset;
*cs++ = 0;
@@ -951,6 +951,11 @@ create_rewinder(struct intel_context *ce,
*cs++ = offset + idx * sizeof(u32);
*cs++ = 0;
+ *cs++ = MI_STORE_DWORD_IMM_GEN4 | MI_USE_GGTT;
+ *cs++ = offset;
+ *cs++ = 0;
+ *cs++ = idx + 1;
+
intel_ring_advance(rq, cs);
rq->sched.attr.priority = I915_PRIORITY_MASK;
@@ -984,7 +989,7 @@ static int live_timeslice_rewind(void *arg)
for_each_engine(engine, gt, id) {
enum { A1, A2, B1 };
- enum { X = 1, Y, Z };
+ enum { X = 1, Z, Y };
struct i915_request *rq[3] = {};
struct intel_context *ce;
unsigned long heartbeat;
@@ -1017,13 +1022,13 @@ static int live_timeslice_rewind(void *arg)
goto err;
}
- rq[0] = create_rewinder(ce, NULL, slot, 1);
+ rq[0] = create_rewinder(ce, NULL, slot, X);
if (IS_ERR(rq[0])) {
intel_context_put(ce);
goto err;
}
- rq[1] = create_rewinder(ce, NULL, slot, 2);
+ rq[1] = create_rewinder(ce, NULL, slot, Y);
intel_context_put(ce);
if (IS_ERR(rq[1]))
goto err;
@@ -1041,7 +1046,7 @@ static int live_timeslice_rewind(void *arg)
goto err;
}
- rq[2] = create_rewinder(ce, rq[0], slot, 3);
+ rq[2] = create_rewinder(ce, rq[0], slot, Z);
intel_context_put(ce);
if (IS_ERR(rq[2]))
goto err;
@@ -1055,15 +1060,12 @@ static int live_timeslice_rewind(void *arg)
GEM_BUG_ON(!timer_pending(&engine->execlists.timer));
/* ELSP[] = { { A:rq1, A:rq2 }, { B:rq1 } } */
- GEM_BUG_ON(!i915_request_is_active(rq[A1]));
- GEM_BUG_ON(!i915_request_is_active(rq[A2]));
- GEM_BUG_ON(!i915_request_is_active(rq[B1]));
-
- /* Wait for the timeslice to kick in */
- del_timer(&engine->execlists.timer);
- tasklet_hi_schedule(&engine->execlists.tasklet);
- intel_engine_flush_submission(engine);
-
+ if (i915_request_is_active(rq[A2])) { /* semaphore yielded! */
+ /* Wait for the timeslice to kick in */
+ del_timer(&engine->execlists.timer);
+ tasklet_hi_schedule(&engine->execlists.tasklet);
+ intel_engine_flush_submission(engine);
+ }
/* -> ELSP[] = { { A:rq1 }, { B:rq1 } } */
GEM_BUG_ON(!i915_request_is_active(rq[A1]));
GEM_BUG_ON(!i915_request_is_active(rq[B1]));
@@ -1274,6 +1276,121 @@ err_obj:
return err;
}
+static int live_timeslice_nopreempt(void *arg)
+{
+ struct intel_gt *gt = arg;
+ struct intel_engine_cs *engine;
+ enum intel_engine_id id;
+ struct igt_spinner spin;
+ int err = 0;
+
+ /*
+ * We should not timeslice into a request that is marked with
+ * I915_REQUEST_NOPREEMPT.
+ */
+ if (!IS_ACTIVE(CONFIG_DRM_I915_TIMESLICE_DURATION))
+ return 0;
+
+ if (igt_spinner_init(&spin, gt))
+ return -ENOMEM;
+
+ for_each_engine(engine, gt, id) {
+ struct intel_context *ce;
+ struct i915_request *rq;
+ unsigned long timeslice;
+
+ if (!intel_engine_has_preemption(engine))
+ continue;
+
+ ce = intel_context_create(engine);
+ if (IS_ERR(ce)) {
+ err = PTR_ERR(ce);
+ break;
+ }
+
+ engine_heartbeat_disable(engine);
+ timeslice = xchg(&engine->props.timeslice_duration_ms, 1);
+
+ /* Create an unpreemptible spinner */
+
+ rq = igt_spinner_create_request(&spin, ce, MI_ARB_CHECK);
+ intel_context_put(ce);
+ if (IS_ERR(rq)) {
+ err = PTR_ERR(rq);
+ goto out_heartbeat;
+ }
+
+ i915_request_get(rq);
+ i915_request_add(rq);
+
+ if (!igt_wait_for_spinner(&spin, rq)) {
+ i915_request_put(rq);
+ err = -ETIME;
+ goto out_spin;
+ }
+
+ set_bit(I915_FENCE_FLAG_NOPREEMPT, &rq->fence.flags);
+ i915_request_put(rq);
+
+ /* Followed by a maximum priority barrier (heartbeat) */
+
+ ce = intel_context_create(engine);
+ if (IS_ERR(ce)) {
+ err = PTR_ERR(rq);
+ goto out_spin;
+ }
+
+ rq = intel_context_create_request(ce);
+ intel_context_put(ce);
+ if (IS_ERR(rq)) {
+ err = PTR_ERR(rq);
+ goto out_spin;
+ }
+
+ rq->sched.attr.priority = I915_PRIORITY_BARRIER;
+ i915_request_get(rq);
+ i915_request_add(rq);
+
+ /*
+ * Wait until the barrier is in ELSP, and we know timeslicing
+ * will have been activated.
+ */
+ if (wait_for_submit(engine, rq, HZ / 2)) {
+ i915_request_put(rq);
+ err = -ETIME;
+ goto out_spin;
+ }
+
+ /*
+ * Since the ELSP[0] request is unpreemptible, it should not
+ * allow the maximum priority barrier through. Wait long
+ * enough to see if it is timesliced in by mistake.
+ */
+ if (i915_request_wait(rq, 0, timeslice_threshold(engine)) >= 0) {
+ pr_err("%s: I915_PRIORITY_BARRIER request completed, bypassing no-preempt request\n",
+ engine->name);
+ err = -EINVAL;
+ }
+ i915_request_put(rq);
+
+out_spin:
+ igt_spinner_end(&spin);
+out_heartbeat:
+ xchg(&engine->props.timeslice_duration_ms, timeslice);
+ engine_heartbeat_enable(engine);
+ if (err)
+ break;
+
+ if (igt_flush_test(gt->i915)) {
+ err = -EIO;
+ break;
+ }
+ }
+
+ igt_spinner_fini(&spin);
+ return err;
+}
+
static int live_busywait_preempt(void *arg)
{
struct intel_gt *gt = arg;
@@ -3960,6 +4077,7 @@ int intel_execlists_live_selftests(struct drm_i915_private *i915)
SUBTEST(live_timeslice_preempt),
SUBTEST(live_timeslice_rewind),
SUBTEST(live_timeslice_queue),
+ SUBTEST(live_timeslice_nopreempt),
SUBTEST(live_busywait_preempt),
SUBTEST(live_preempt),
SUBTEST(live_late_preempt),
diff --git a/drivers/gpu/drm/i915/gt/selftest_workarounds.c b/drivers/gpu/drm/i915/gt/selftest_workarounds.c
index 5ed323254ee1..32785463ec9e 100644
--- a/drivers/gpu/drm/i915/gt/selftest_workarounds.c
+++ b/drivers/gpu/drm/i915/gt/selftest_workarounds.c
@@ -623,6 +623,8 @@ err_request:
err = -EINVAL;
goto out_unpin;
}
+ } else {
+ rsvd = 0;
}
expect = results[0];
diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c b/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c
index fe7778c28d2d..aa6d56e25a10 100644
--- a/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c
+++ b/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c
@@ -217,7 +217,7 @@ static void guc_wq_item_append(struct intel_guc *guc,
static void guc_add_request(struct intel_guc *guc, struct i915_request *rq)
{
struct intel_engine_cs *engine = rq->engine;
- u32 ctx_desc = lower_32_bits(rq->context->lrc_desc);
+ u32 ctx_desc = rq->context->lrc.ccid;
u32 ring_tail = intel_ring_set_tail(rq->ring, rq->tail) / sizeof(u64);
guc_wq_item_append(guc, engine->guc_id, ctx_desc,
diff --git a/drivers/gpu/drm/i915/gvt/display.c b/drivers/gpu/drm/i915/gvt/display.c
index a83df2f84eb9..a1696e9ce4b6 100644
--- a/drivers/gpu/drm/i915/gvt/display.c
+++ b/drivers/gpu/drm/i915/gvt/display.c
@@ -208,14 +208,41 @@ static void emulate_monitor_status_change(struct intel_vgpu *vgpu)
SKL_FUSE_PG_DIST_STATUS(SKL_PG0) |
SKL_FUSE_PG_DIST_STATUS(SKL_PG1) |
SKL_FUSE_PG_DIST_STATUS(SKL_PG2);
- vgpu_vreg_t(vgpu, LCPLL1_CTL) |=
- LCPLL_PLL_ENABLE |
- LCPLL_PLL_LOCK;
- vgpu_vreg_t(vgpu, LCPLL2_CTL) |= LCPLL_PLL_ENABLE;
-
+ /*
+ * Only 1 PIPE enabled in current vGPU display and PIPE_A is
+ * tied to TRANSCODER_A in HW, so it's safe to assume PIPE_A,
+ * TRANSCODER_A can be enabled. PORT_x depends on the input of
+ * setup_virtual_dp_monitor, we can bind DPLL0 to any PORT_x
+ * so we fixed to DPLL0 here.
+ * Setup DPLL0: DP link clk 1620 MHz, non SSC, DP Mode
+ */
+ vgpu_vreg_t(vgpu, DPLL_CTRL1) =
+ DPLL_CTRL1_OVERRIDE(DPLL_ID_SKL_DPLL0);
+ vgpu_vreg_t(vgpu, DPLL_CTRL1) |=
+ DPLL_CTRL1_LINK_RATE(DPLL_CTRL1_LINK_RATE_1620, DPLL_ID_SKL_DPLL0);
+ vgpu_vreg_t(vgpu, LCPLL1_CTL) =
+ LCPLL_PLL_ENABLE | LCPLL_PLL_LOCK;
+ vgpu_vreg_t(vgpu, DPLL_STATUS) = DPLL_LOCK(DPLL_ID_SKL_DPLL0);
+ /*
+ * Golden M/N are calculated based on:
+ * 24 bpp, 4 lanes, 154000 pixel clk (from virtual EDID),
+ * DP link clk 1620 MHz and non-constant_n.
+ * TODO: calculate DP link symbol clk and stream clk m/n.
+ */
+ vgpu_vreg_t(vgpu, PIPE_DATA_M1(TRANSCODER_A)) = 63 << TU_SIZE_SHIFT;
+ vgpu_vreg_t(vgpu, PIPE_DATA_M1(TRANSCODER_A)) |= 0x5b425e;
+ vgpu_vreg_t(vgpu, PIPE_DATA_N1(TRANSCODER_A)) = 0x800000;
+ vgpu_vreg_t(vgpu, PIPE_LINK_M1(TRANSCODER_A)) = 0x3cd6e;
+ vgpu_vreg_t(vgpu, PIPE_LINK_N1(TRANSCODER_A)) = 0x80000;
}
if (intel_vgpu_has_monitor_on_port(vgpu, PORT_B)) {
+ vgpu_vreg_t(vgpu, DPLL_CTRL2) &=
+ ~DPLL_CTRL2_DDI_CLK_OFF(PORT_B);
+ vgpu_vreg_t(vgpu, DPLL_CTRL2) |=
+ DPLL_CTRL2_DDI_CLK_SEL(DPLL_ID_SKL_DPLL0, PORT_B);
+ vgpu_vreg_t(vgpu, DPLL_CTRL2) |=
+ DPLL_CTRL2_DDI_SEL_OVERRIDE(PORT_B);
vgpu_vreg_t(vgpu, SFUSE_STRAP) |= SFUSE_STRAP_DDIB_DETECTED;
vgpu_vreg_t(vgpu, TRANS_DDI_FUNC_CTL(TRANSCODER_A)) &=
~(TRANS_DDI_BPC_MASK | TRANS_DDI_MODE_SELECT_MASK |
@@ -236,6 +263,12 @@ static void emulate_monitor_status_change(struct intel_vgpu *vgpu)
}
if (intel_vgpu_has_monitor_on_port(vgpu, PORT_C)) {
+ vgpu_vreg_t(vgpu, DPLL_CTRL2) &=
+ ~DPLL_CTRL2_DDI_CLK_OFF(PORT_C);
+ vgpu_vreg_t(vgpu, DPLL_CTRL2) |=
+ DPLL_CTRL2_DDI_CLK_SEL(DPLL_ID_SKL_DPLL0, PORT_C);
+ vgpu_vreg_t(vgpu, DPLL_CTRL2) |=
+ DPLL_CTRL2_DDI_SEL_OVERRIDE(PORT_C);
vgpu_vreg_t(vgpu, SDEISR) |= SDE_PORTC_HOTPLUG_CPT;
vgpu_vreg_t(vgpu, TRANS_DDI_FUNC_CTL(TRANSCODER_A)) &=
~(TRANS_DDI_BPC_MASK | TRANS_DDI_MODE_SELECT_MASK |
@@ -256,6 +289,12 @@ static void emulate_monitor_status_change(struct intel_vgpu *vgpu)
}
if (intel_vgpu_has_monitor_on_port(vgpu, PORT_D)) {
+ vgpu_vreg_t(vgpu, DPLL_CTRL2) &=
+ ~DPLL_CTRL2_DDI_CLK_OFF(PORT_D);
+ vgpu_vreg_t(vgpu, DPLL_CTRL2) |=
+ DPLL_CTRL2_DDI_CLK_SEL(DPLL_ID_SKL_DPLL0, PORT_D);
+ vgpu_vreg_t(vgpu, DPLL_CTRL2) |=
+ DPLL_CTRL2_DDI_SEL_OVERRIDE(PORT_D);
vgpu_vreg_t(vgpu, SDEISR) |= SDE_PORTD_HOTPLUG_CPT;
vgpu_vreg_t(vgpu, TRANS_DDI_FUNC_CTL(TRANSCODER_A)) &=
~(TRANS_DDI_BPC_MASK | TRANS_DDI_MODE_SELECT_MASK |
diff --git a/drivers/gpu/drm/i915/gvt/kvmgt.c b/drivers/gpu/drm/i915/gvt/kvmgt.c
index 074c4efb58eb..eee530453aa6 100644
--- a/drivers/gpu/drm/i915/gvt/kvmgt.c
+++ b/drivers/gpu/drm/i915/gvt/kvmgt.c
@@ -131,6 +131,7 @@ struct kvmgt_vdev {
struct work_struct release_work;
atomic_t released;
struct vfio_device *vfio_device;
+ struct vfio_group *vfio_group;
};
static inline struct kvmgt_vdev *kvmgt_vdev(struct intel_vgpu *vgpu)
@@ -151,6 +152,7 @@ static void gvt_unpin_guest_page(struct intel_vgpu *vgpu, unsigned long gfn,
unsigned long size)
{
struct drm_i915_private *i915 = vgpu->gvt->gt->i915;
+ struct kvmgt_vdev *vdev = kvmgt_vdev(vgpu);
int total_pages;
int npage;
int ret;
@@ -160,7 +162,7 @@ static void gvt_unpin_guest_page(struct intel_vgpu *vgpu, unsigned long gfn,
for (npage = 0; npage < total_pages; npage++) {
unsigned long cur_gfn = gfn + npage;
- ret = vfio_unpin_pages(mdev_dev(kvmgt_vdev(vgpu)->mdev), &cur_gfn, 1);
+ ret = vfio_group_unpin_pages(vdev->vfio_group, &cur_gfn, 1);
drm_WARN_ON(&i915->drm, ret != 1);
}
}
@@ -169,6 +171,7 @@ static void gvt_unpin_guest_page(struct intel_vgpu *vgpu, unsigned long gfn,
static int gvt_pin_guest_page(struct intel_vgpu *vgpu, unsigned long gfn,
unsigned long size, struct page **page)
{
+ struct kvmgt_vdev *vdev = kvmgt_vdev(vgpu);
unsigned long base_pfn = 0;
int total_pages;
int npage;
@@ -183,8 +186,8 @@ static int gvt_pin_guest_page(struct intel_vgpu *vgpu, unsigned long gfn,
unsigned long cur_gfn = gfn + npage;
unsigned long pfn;
- ret = vfio_pin_pages(mdev_dev(kvmgt_vdev(vgpu)->mdev), &cur_gfn, 1,
- IOMMU_READ | IOMMU_WRITE, &pfn);
+ ret = vfio_group_pin_pages(vdev->vfio_group, &cur_gfn, 1,
+ IOMMU_READ | IOMMU_WRITE, &pfn);
if (ret != 1) {
gvt_vgpu_err("vfio_pin_pages failed for gfn 0x%lx, ret %d\n",
cur_gfn, ret);
@@ -792,6 +795,7 @@ static int intel_vgpu_open(struct mdev_device *mdev)
struct kvmgt_vdev *vdev = kvmgt_vdev(vgpu);
unsigned long events;
int ret;
+ struct vfio_group *vfio_group;
vdev->iommu_notifier.notifier_call = intel_vgpu_iommu_notifier;
vdev->group_notifier.notifier_call = intel_vgpu_group_notifier;
@@ -814,6 +818,14 @@ static int intel_vgpu_open(struct mdev_device *mdev)
goto undo_iommu;
}
+ vfio_group = vfio_group_get_external_user_from_dev(mdev_dev(mdev));
+ if (IS_ERR_OR_NULL(vfio_group)) {
+ ret = !vfio_group ? -EFAULT : PTR_ERR(vfio_group);
+ gvt_vgpu_err("vfio_group_get_external_user_from_dev failed\n");
+ goto undo_register;
+ }
+ vdev->vfio_group = vfio_group;
+
/* Take a module reference as mdev core doesn't take
* a reference for vendor driver.
*/
@@ -830,6 +842,10 @@ static int intel_vgpu_open(struct mdev_device *mdev)
return ret;
undo_group:
+ vfio_group_put_external_user(vdev->vfio_group);
+ vdev->vfio_group = NULL;
+
+undo_register:
vfio_unregister_notifier(mdev_dev(mdev), VFIO_GROUP_NOTIFY,
&vdev->group_notifier);
@@ -884,6 +900,7 @@ static void __intel_vgpu_release(struct intel_vgpu *vgpu)
kvmgt_guest_exit(info);
intel_vgpu_release_msi_eventfd_ctx(vgpu);
+ vfio_group_put_external_user(vdev->vfio_group);
vdev->kvm = NULL;
vgpu->handle = 0;
@@ -2035,33 +2052,14 @@ static int kvmgt_rw_gpa(unsigned long handle, unsigned long gpa,
void *buf, unsigned long len, bool write)
{
struct kvmgt_guest_info *info;
- struct kvm *kvm;
- int idx, ret;
- bool kthread = current->mm == NULL;
if (!handle_valid(handle))
return -ESRCH;
info = (struct kvmgt_guest_info *)handle;
- kvm = info->kvm;
-
- if (kthread) {
- if (!mmget_not_zero(kvm->mm))
- return -EFAULT;
- use_mm(kvm->mm);
- }
-
- idx = srcu_read_lock(&kvm->srcu);
- ret = write ? kvm_write_guest(kvm, gpa, buf, len) :
- kvm_read_guest(kvm, gpa, buf, len);
- srcu_read_unlock(&kvm->srcu, idx);
-
- if (kthread) {
- unuse_mm(kvm->mm);
- mmput(kvm->mm);
- }
- return ret;
+ return vfio_dma_rw(kvmgt_vdev(info->vgpu)->vfio_group,
+ gpa, buf, len, write);
}
static int kvmgt_read_gpa(unsigned long handle, unsigned long gpa,
diff --git a/drivers/gpu/drm/i915/gvt/scheduler.c b/drivers/gpu/drm/i915/gvt/scheduler.c
index cb11c3184085..e92ed96c9b23 100644
--- a/drivers/gpu/drm/i915/gvt/scheduler.c
+++ b/drivers/gpu/drm/i915/gvt/scheduler.c
@@ -290,7 +290,7 @@ static void
shadow_context_descriptor_update(struct intel_context *ce,
struct intel_vgpu_workload *workload)
{
- u64 desc = ce->lrc_desc;
+ u64 desc = ce->lrc.desc;
/*
* Update bits 0-11 of the context descriptor which includes flags
@@ -300,7 +300,7 @@ shadow_context_descriptor_update(struct intel_context *ce,
desc |= (u64)workload->ctx_desc.addressing_mode <<
GEN8_CTX_ADDRESSING_MODE_SHIFT;
- ce->lrc_desc = desc;
+ ce->lrc.desc = desc;
}
static int copy_workload_to_ring_buffer(struct intel_vgpu_workload *workload)
@@ -379,7 +379,11 @@ static void set_context_ppgtt_from_shadow(struct intel_vgpu_workload *workload,
for (i = 0; i < GVT_RING_CTX_NR_PDPS; i++) {
struct i915_page_directory * const pd =
i915_pd_entry(ppgtt->pd, i);
-
+ /* skip now as current i915 ppgtt alloc won't allocate
+ top level pdp for non 4-level table, won't impact
+ shadow ppgtt. */
+ if (!pd)
+ break;
px_dma(pd) = mm->ppgtt_mm.shadow_pdps[i];
}
}
diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
index 1f5b9a584f71..62b901ffabf9 100644
--- a/drivers/gpu/drm/i915/i915_drv.h
+++ b/drivers/gpu/drm/i915/i915_drv.h
@@ -1507,6 +1507,8 @@ IS_SUBPLATFORM(const struct drm_i915_private *i915,
(IS_ICELAKE(p) && IS_REVID(p, since, until))
#define TGL_REVID_A0 0x0
+#define TGL_REVID_B0 0x1
+#define TGL_REVID_C0 0x2
#define IS_TGL_REVID(p, since, until) \
(IS_TIGERLAKE(p) && IS_REVID(p, since, until))
diff --git a/drivers/gpu/drm/i915/i915_gem_evict.c b/drivers/gpu/drm/i915/i915_gem_evict.c
index 4518b9b35c3d..02ad1acd117c 100644
--- a/drivers/gpu/drm/i915/i915_gem_evict.c
+++ b/drivers/gpu/drm/i915/i915_gem_evict.c
@@ -128,6 +128,13 @@ search_again:
active = NULL;
INIT_LIST_HEAD(&eviction_list);
list_for_each_entry_safe(vma, next, &vm->bound_list, vm_link) {
+ if (vma == active) { /* now seen this vma twice */
+ if (flags & PIN_NONBLOCK)
+ break;
+
+ active = ERR_PTR(-EAGAIN);
+ }
+
/*
* We keep this list in a rough least-recently scanned order
* of active elements (inactive elements are cheap to reap).
@@ -143,21 +150,12 @@ search_again:
* To notice when we complete one full cycle, we record the
* first active element seen, before moving it to the tail.
*/
- if (i915_vma_is_active(vma)) {
- if (vma == active) {
- if (flags & PIN_NONBLOCK)
- break;
-
- active = ERR_PTR(-EAGAIN);
- }
-
- if (active != ERR_PTR(-EAGAIN)) {
- if (!active)
- active = vma;
+ if (active != ERR_PTR(-EAGAIN) && i915_vma_is_active(vma)) {
+ if (!active)
+ active = vma;
- list_move_tail(&vma->vm_link, &vm->bound_list);
- continue;
- }
+ list_move_tail(&vma->vm_link, &vm->bound_list);
+ continue;
}
if (mark_free(&scan, vma, flags, &eviction_list))
diff --git a/drivers/gpu/drm/i915/i915_gpu_error.c b/drivers/gpu/drm/i915/i915_gpu_error.c
index 2a4cd0ba5464..5c8e51d2ba5b 100644
--- a/drivers/gpu/drm/i915/i915_gpu_error.c
+++ b/drivers/gpu/drm/i915/i915_gpu_error.c
@@ -1207,8 +1207,6 @@ static void engine_record_registers(struct intel_engine_coredump *ee)
static void record_request(const struct i915_request *request,
struct i915_request_coredump *erq)
{
- const struct i915_gem_context *ctx;
-
erq->flags = request->fence.flags;
erq->context = request->fence.context;
erq->seqno = request->fence.seqno;
@@ -1219,9 +1217,13 @@ static void record_request(const struct i915_request *request,
erq->pid = 0;
rcu_read_lock();
- ctx = rcu_dereference(request->context->gem_context);
- if (ctx)
- erq->pid = pid_nr(ctx->pid);
+ if (!intel_context_is_closed(request->context)) {
+ const struct i915_gem_context *ctx;
+
+ ctx = rcu_dereference(request->context->gem_context);
+ if (ctx)
+ erq->pid = pid_nr(ctx->pid);
+ }
rcu_read_unlock();
}
diff --git a/drivers/gpu/drm/i915/i915_irq.c b/drivers/gpu/drm/i915/i915_irq.c
index 9f0653cf0510..8a2b83807ffc 100644
--- a/drivers/gpu/drm/i915/i915_irq.c
+++ b/drivers/gpu/drm/i915/i915_irq.c
@@ -3358,9 +3358,10 @@ static void gen8_de_irq_postinstall(struct drm_i915_private *dev_priv)
{
struct intel_uncore *uncore = &dev_priv->uncore;
- u32 de_pipe_masked = GEN8_PIPE_CDCLK_CRC_DONE;
+ u32 de_pipe_masked = gen8_de_pipe_fault_mask(dev_priv) |
+ GEN8_PIPE_CDCLK_CRC_DONE;
u32 de_pipe_enables;
- u32 de_port_masked = GEN8_AUX_CHANNEL_A;
+ u32 de_port_masked = gen8_de_port_aux_mask(dev_priv);
u32 de_port_enables;
u32 de_misc_masked = GEN8_DE_EDP_PSR;
enum pipe pipe;
@@ -3368,21 +3369,8 @@ static void gen8_de_irq_postinstall(struct drm_i915_private *dev_priv)
if (INTEL_GEN(dev_priv) <= 10)
de_misc_masked |= GEN8_DE_MISC_GSE;
- if (INTEL_GEN(dev_priv) >= 9) {
- de_pipe_masked |= GEN9_DE_PIPE_IRQ_FAULT_ERRORS;
- de_port_masked |= GEN9_AUX_CHANNEL_B | GEN9_AUX_CHANNEL_C |
- GEN9_AUX_CHANNEL_D;
- if (IS_GEN9_LP(dev_priv))
- de_port_masked |= BXT_DE_PORT_GMBUS;
- } else {
- de_pipe_masked |= GEN8_DE_PIPE_IRQ_FAULT_ERRORS;
- }
-
- if (INTEL_GEN(dev_priv) >= 11)
- de_port_masked |= ICL_AUX_CHANNEL_E;
-
- if (IS_CNL_WITH_PORT_F(dev_priv) || INTEL_GEN(dev_priv) >= 11)
- de_port_masked |= CNL_AUX_CHANNEL_F;
+ if (IS_GEN9_LP(dev_priv))
+ de_port_masked |= BXT_DE_PORT_GMBUS;
de_pipe_enables = de_pipe_masked | GEN8_PIPE_VBLANK |
GEN8_PIPE_FIFO_UNDERRUN;
diff --git a/drivers/gpu/drm/i915/i915_perf.c b/drivers/gpu/drm/i915/i915_perf.c
index 551be589d6f4..cf2c01f17da8 100644
--- a/drivers/gpu/drm/i915/i915_perf.c
+++ b/drivers/gpu/drm/i915/i915_perf.c
@@ -1310,8 +1310,7 @@ static int oa_get_render_ctx_id(struct i915_perf_stream *stream)
* dropped by GuC. They won't be part of the context
* ID in the OA reports, so squash those lower bits.
*/
- stream->specific_ctx_id =
- lower_32_bits(ce->lrc_desc) >> 12;
+ stream->specific_ctx_id = ce->lrc.lrca >> 12;
/*
* GuC uses the top bit to signal proxy submission, so
@@ -1328,11 +1327,10 @@ static int oa_get_render_ctx_id(struct i915_perf_stream *stream)
((1U << GEN11_SW_CTX_ID_WIDTH) - 1) << (GEN11_SW_CTX_ID_SHIFT - 32);
/*
* Pick an unused context id
- * 0 - (NUM_CONTEXT_TAG - 1) are used by other contexts
+ * 0 - BITS_PER_LONG are used by other contexts
* GEN12_MAX_CONTEXT_HW_ID (0x7ff) is used by idle context
*/
stream->specific_ctx_id = (GEN12_MAX_CONTEXT_HW_ID - 1) << (GEN11_SW_CTX_ID_SHIFT - 32);
- BUILD_BUG_ON((GEN12_MAX_CONTEXT_HW_ID - 1) < NUM_CONTEXT_TAG);
break;
}
@@ -2941,49 +2939,6 @@ void i915_oa_init_reg_state(const struct intel_context *ce,
}
/**
- * i915_perf_read_locked - &i915_perf_stream_ops->read with error normalisation
- * @stream: An i915 perf stream
- * @file: An i915 perf stream file
- * @buf: destination buffer given by userspace
- * @count: the number of bytes userspace wants to read
- * @ppos: (inout) file seek position (unused)
- *
- * Besides wrapping &i915_perf_stream_ops->read this provides a common place to
- * ensure that if we've successfully copied any data then reporting that takes
- * precedence over any internal error status, so the data isn't lost.
- *
- * For example ret will be -ENOSPC whenever there is more buffered data than
- * can be copied to userspace, but that's only interesting if we weren't able
- * to copy some data because it implies the userspace buffer is too small to
- * receive a single record (and we never split records).
- *
- * Another case with ret == -EFAULT is more of a grey area since it would seem
- * like bad form for userspace to ask us to overrun its buffer, but the user
- * knows best:
- *
- * http://yarchive.net/comp/linux/partial_reads_writes.html
- *
- * Returns: The number of bytes copied or a negative error code on failure.
- */
-static ssize_t i915_perf_read_locked(struct i915_perf_stream *stream,
- struct file *file,
- char __user *buf,
- size_t count,
- loff_t *ppos)
-{
- /* Note we keep the offset (aka bytes read) separate from any
- * error status so that the final check for whether we return
- * the bytes read with a higher precedence than any error (see
- * comment below) doesn't need to be handled/duplicated in
- * stream->ops->read() implementations.
- */
- size_t offset = 0;
- int ret = stream->ops->read(stream, buf, count, &offset);
-
- return offset ?: (ret ?: -EAGAIN);
-}
-
-/**
* i915_perf_read - handles read() FOP for i915 perf stream FDs
* @file: An i915 perf stream file
* @buf: destination buffer given by userspace
@@ -3008,7 +2963,8 @@ static ssize_t i915_perf_read(struct file *file,
{
struct i915_perf_stream *stream = file->private_data;
struct i915_perf *perf = stream->perf;
- ssize_t ret;
+ size_t offset = 0;
+ int ret;
/* To ensure it's handled consistently we simply treat all reads of a
* disabled stream as an error. In particular it might otherwise lead
@@ -3031,13 +2987,12 @@ static ssize_t i915_perf_read(struct file *file,
return ret;
mutex_lock(&perf->lock);
- ret = i915_perf_read_locked(stream, file,
- buf, count, ppos);
+ ret = stream->ops->read(stream, buf, count, &offset);
mutex_unlock(&perf->lock);
- } while (ret == -EAGAIN);
+ } while (!offset && !ret);
} else {
mutex_lock(&perf->lock);
- ret = i915_perf_read_locked(stream, file, buf, count, ppos);
+ ret = stream->ops->read(stream, buf, count, &offset);
mutex_unlock(&perf->lock);
}
@@ -3048,15 +3003,15 @@ static ssize_t i915_perf_read(struct file *file,
* and read() returning -EAGAIN. Clearing the oa.pollin state here
* effectively ensures we back off until the next hrtimer callback
* before reporting another EPOLLIN event.
+ * The exception to this is if ops->read() returned -ENOSPC which means
+ * that more OA data is available than could fit in the user provided
+ * buffer. In this case we want the next poll() call to not block.
*/
- if (ret >= 0 || ret == -EAGAIN) {
- /* Maybe make ->pollin per-stream state if we support multiple
- * concurrent streams in the future.
- */
+ if (ret != -ENOSPC)
stream->pollin = false;
- }
- return ret;
+ /* Possible values for ret are 0, -EFAULT, -ENOSPC, -EIO, ... */
+ return offset ?: (ret ?: -EAGAIN);
}
static enum hrtimer_restart oa_poll_check_timer_cb(struct hrtimer *hrtimer)
diff --git a/drivers/gpu/drm/i915/i915_pmu.c b/drivers/gpu/drm/i915/i915_pmu.c
index 2c062534eac1..569a87d0d799 100644
--- a/drivers/gpu/drm/i915/i915_pmu.c
+++ b/drivers/gpu/drm/i915/i915_pmu.c
@@ -269,12 +269,48 @@ static bool exclusive_mmio_access(const struct drm_i915_private *i915)
return IS_GEN(i915, 7);
}
+static void engine_sample(struct intel_engine_cs *engine, unsigned int period_ns)
+{
+ struct intel_engine_pmu *pmu = &engine->pmu;
+ bool busy;
+ u32 val;
+
+ val = ENGINE_READ_FW(engine, RING_CTL);
+ if (val == 0) /* powerwell off => engine idle */
+ return;
+
+ if (val & RING_WAIT)
+ add_sample(&pmu->sample[I915_SAMPLE_WAIT], period_ns);
+ if (val & RING_WAIT_SEMAPHORE)
+ add_sample(&pmu->sample[I915_SAMPLE_SEMA], period_ns);
+
+ /* No need to sample when busy stats are supported. */
+ if (intel_engine_supports_stats(engine))
+ return;
+
+ /*
+ * While waiting on a semaphore or event, MI_MODE reports the
+ * ring as idle. However, previously using the seqno, and with
+ * execlists sampling, we account for the ring waiting as the
+ * engine being busy. Therefore, we record the sample as being
+ * busy if either waiting or !idle.
+ */
+ busy = val & (RING_WAIT_SEMAPHORE | RING_WAIT);
+ if (!busy) {
+ val = ENGINE_READ_FW(engine, RING_MI_MODE);
+ busy = !(val & MODE_IDLE);
+ }
+ if (busy)
+ add_sample(&pmu->sample[I915_SAMPLE_BUSY], period_ns);
+}
+
static void
engines_sample(struct intel_gt *gt, unsigned int period_ns)
{
struct drm_i915_private *i915 = gt->i915;
struct intel_engine_cs *engine;
enum intel_engine_id id;
+ unsigned long flags;
if ((i915->pmu.enable & ENGINE_SAMPLE_MASK) == 0)
return;
@@ -283,53 +319,17 @@ engines_sample(struct intel_gt *gt, unsigned int period_ns)
return;
for_each_engine(engine, gt, id) {
- struct intel_engine_pmu *pmu = &engine->pmu;
- spinlock_t *mmio_lock;
- unsigned long flags;
- bool busy;
- u32 val;
-
if (!intel_engine_pm_get_if_awake(engine))
continue;
- mmio_lock = NULL;
- if (exclusive_mmio_access(i915))
- mmio_lock = &engine->uncore->lock;
-
- if (unlikely(mmio_lock))
- spin_lock_irqsave(mmio_lock, flags);
-
- val = ENGINE_READ_FW(engine, RING_CTL);
- if (val == 0) /* powerwell off => engine idle */
- goto skip;
-
- if (val & RING_WAIT)
- add_sample(&pmu->sample[I915_SAMPLE_WAIT], period_ns);
- if (val & RING_WAIT_SEMAPHORE)
- add_sample(&pmu->sample[I915_SAMPLE_SEMA], period_ns);
-
- /* No need to sample when busy stats are supported. */
- if (intel_engine_supports_stats(engine))
- goto skip;
-
- /*
- * While waiting on a semaphore or event, MI_MODE reports the
- * ring as idle. However, previously using the seqno, and with
- * execlists sampling, we account for the ring waiting as the
- * engine being busy. Therefore, we record the sample as being
- * busy if either waiting or !idle.
- */
- busy = val & (RING_WAIT_SEMAPHORE | RING_WAIT);
- if (!busy) {
- val = ENGINE_READ_FW(engine, RING_MI_MODE);
- busy = !(val & MODE_IDLE);
+ if (exclusive_mmio_access(i915)) {
+ spin_lock_irqsave(&engine->uncore->lock, flags);
+ engine_sample(engine, period_ns);
+ spin_unlock_irqrestore(&engine->uncore->lock, flags);
+ } else {
+ engine_sample(engine, period_ns);
}
- if (busy)
- add_sample(&pmu->sample[I915_SAMPLE_BUSY], period_ns);
-skip:
- if (unlikely(mmio_lock))
- spin_unlock_irqrestore(mmio_lock, flags);
intel_engine_pm_put_async(engine);
}
}
diff --git a/drivers/gpu/drm/i915/i915_priolist_types.h b/drivers/gpu/drm/i915/i915_priolist_types.h
index 732aad148881..eb97d626f9ad 100644
--- a/drivers/gpu/drm/i915/i915_priolist_types.h
+++ b/drivers/gpu/drm/i915/i915_priolist_types.h
@@ -45,7 +45,7 @@ enum {
* active request.
*/
#define I915_PRIORITY_UNPREEMPTABLE INT_MAX
-#define I915_PRIORITY_BARRIER INT_MAX
+#define I915_PRIORITY_BARRIER (I915_PRIORITY_UNPREEMPTABLE - 1)
#define __NO_PREEMPTION (I915_PRIORITY_WAIT)
diff --git a/drivers/gpu/drm/i915/i915_reg.h b/drivers/gpu/drm/i915/i915_reg.h
index 59e64acc2c56..6e12000c4b6b 100644
--- a/drivers/gpu/drm/i915/i915_reg.h
+++ b/drivers/gpu/drm/i915/i915_reg.h
@@ -34,8 +34,8 @@
* Follow the style described here for new macros, and while changing existing
* macros. Do **not** mass change existing definitions just to update the style.
*
- * Layout
- * ~~~~~~
+ * File Layout
+ * ~~~~~~~~~~~
*
* Keep helper macros near the top. For example, _PIPE() and friends.
*
@@ -3094,6 +3094,7 @@ static inline bool i915_mmio_reg_valid(i915_reg_t reg)
#define GT_BSD_CS_ERROR_INTERRUPT (1 << 15)
#define GT_BSD_USER_INTERRUPT (1 << 12)
#define GT_RENDER_L3_PARITY_ERROR_INTERRUPT_S1 (1 << 11) /* hsw+; rsvd on snb, ivb, vlv */
+#define GT_WAIT_SEMAPHORE_INTERRUPT REG_BIT(11) /* bdw+ */
#define GT_CONTEXT_SWITCH_INTERRUPT (1 << 8)
#define GT_RENDER_L3_PARITY_ERROR_INTERRUPT (1 << 5) /* !snb */
#define GT_RENDER_PIPECTL_NOTIFY_INTERRUPT (1 << 4)
diff --git a/drivers/gpu/drm/i915/i915_request.c b/drivers/gpu/drm/i915/i915_request.c
index c0df71d7d0ff..e2b78db685ea 100644
--- a/drivers/gpu/drm/i915/i915_request.c
+++ b/drivers/gpu/drm/i915/i915_request.c
@@ -1017,11 +1017,15 @@ i915_request_await_request(struct i915_request *to, struct i915_request *from)
GEM_BUG_ON(to == from);
GEM_BUG_ON(to->timeline == from->timeline);
- if (i915_request_completed(from))
+ if (i915_request_completed(from)) {
+ i915_sw_fence_set_error_once(&to->submit, from->fence.error);
return 0;
+ }
if (to->engine->schedule) {
- ret = i915_sched_node_add_dependency(&to->sched, &from->sched);
+ ret = i915_sched_node_add_dependency(&to->sched,
+ &from->sched,
+ I915_DEPENDENCY_EXTERNAL);
if (ret < 0)
return ret;
}
@@ -1183,7 +1187,9 @@ __i915_request_await_execution(struct i915_request *to,
/* Couple the dependency tree for PI on this exposed to->fence */
if (to->engine->schedule) {
- err = i915_sched_node_add_dependency(&to->sched, &from->sched);
+ err = i915_sched_node_add_dependency(&to->sched,
+ &from->sched,
+ I915_DEPENDENCY_WEAK);
if (err < 0)
return err;
}
diff --git a/drivers/gpu/drm/i915/i915_scheduler.c b/drivers/gpu/drm/i915/i915_scheduler.c
index 68b06a7ba667..f0a9e8958ca0 100644
--- a/drivers/gpu/drm/i915/i915_scheduler.c
+++ b/drivers/gpu/drm/i915/i915_scheduler.c
@@ -456,7 +456,8 @@ bool __i915_sched_node_add_dependency(struct i915_sched_node *node,
}
int i915_sched_node_add_dependency(struct i915_sched_node *node,
- struct i915_sched_node *signal)
+ struct i915_sched_node *signal,
+ unsigned long flags)
{
struct i915_dependency *dep;
@@ -465,8 +466,7 @@ int i915_sched_node_add_dependency(struct i915_sched_node *node,
return -ENOMEM;
if (!__i915_sched_node_add_dependency(node, signal, dep,
- I915_DEPENDENCY_EXTERNAL |
- I915_DEPENDENCY_ALLOC))
+ flags | I915_DEPENDENCY_ALLOC))
i915_dependency_free(dep);
return 0;
diff --git a/drivers/gpu/drm/i915/i915_scheduler.h b/drivers/gpu/drm/i915/i915_scheduler.h
index d1dc4efef77b..6f0bf00fc569 100644
--- a/drivers/gpu/drm/i915/i915_scheduler.h
+++ b/drivers/gpu/drm/i915/i915_scheduler.h
@@ -34,7 +34,8 @@ bool __i915_sched_node_add_dependency(struct i915_sched_node *node,
unsigned long flags);
int i915_sched_node_add_dependency(struct i915_sched_node *node,
- struct i915_sched_node *signal);
+ struct i915_sched_node *signal,
+ unsigned long flags);
void i915_sched_node_fini(struct i915_sched_node *node);
diff --git a/drivers/gpu/drm/i915/i915_scheduler_types.h b/drivers/gpu/drm/i915/i915_scheduler_types.h
index d18e70550054..7186875088a0 100644
--- a/drivers/gpu/drm/i915/i915_scheduler_types.h
+++ b/drivers/gpu/drm/i915/i915_scheduler_types.h
@@ -78,6 +78,7 @@ struct i915_dependency {
unsigned long flags;
#define I915_DEPENDENCY_ALLOC BIT(0)
#define I915_DEPENDENCY_EXTERNAL BIT(1)
+#define I915_DEPENDENCY_WEAK BIT(2)
};
#endif /* _I915_SCHEDULER_TYPES_H_ */
diff --git a/drivers/gpu/drm/i915/i915_vma.c b/drivers/gpu/drm/i915/i915_vma.c
index 08699fa069aa..2cd7a7e87c0a 100644
--- a/drivers/gpu/drm/i915/i915_vma.c
+++ b/drivers/gpu/drm/i915/i915_vma.c
@@ -158,16 +158,18 @@ vma_create(struct drm_i915_gem_object *obj,
GEM_BUG_ON(!IS_ALIGNED(vma->size, I915_GTT_PAGE_SIZE));
+ spin_lock(&obj->vma.lock);
+
if (i915_is_ggtt(vm)) {
if (unlikely(overflows_type(vma->size, u32)))
- goto err_vma;
+ goto err_unlock;
vma->fence_size = i915_gem_fence_size(vm->i915, vma->size,
i915_gem_object_get_tiling(obj),
i915_gem_object_get_stride(obj));
if (unlikely(vma->fence_size < vma->size || /* overflow */
vma->fence_size > vm->total))
- goto err_vma;
+ goto err_unlock;
GEM_BUG_ON(!IS_ALIGNED(vma->fence_size, I915_GTT_MIN_ALIGNMENT));
@@ -179,8 +181,6 @@ vma_create(struct drm_i915_gem_object *obj,
__set_bit(I915_VMA_GGTT_BIT, __i915_vma_flags(vma));
}
- spin_lock(&obj->vma.lock);
-
rb = NULL;
p = &obj->vma.tree.rb_node;
while (*p) {
@@ -225,6 +225,8 @@ vma_create(struct drm_i915_gem_object *obj,
return vma;
+err_unlock:
+ spin_unlock(&obj->vma.lock);
err_vma:
i915_vma_free(vma);
return ERR_PTR(-E2BIG);
@@ -1226,18 +1228,6 @@ int __i915_vma_unbind(struct i915_vma *vma)
lockdep_assert_held(&vma->vm->mutex);
- /*
- * First wait upon any activity as retiring the request may
- * have side-effects such as unpinning or even unbinding this vma.
- *
- * XXX Actually waiting under the vm->mutex is a hinderance and
- * should be pipelined wherever possible. In cases where that is
- * unavoidable, we should lift the wait to before the mutex.
- */
- ret = i915_vma_sync(vma);
- if (ret)
- return ret;
-
if (i915_vma_is_pinned(vma)) {
vma_print_allocator(vma, "is pinned");
return -EAGAIN;
@@ -1311,15 +1301,20 @@ int i915_vma_unbind(struct i915_vma *vma)
if (!drm_mm_node_allocated(&vma->node))
return 0;
- if (i915_vma_is_bound(vma, I915_VMA_GLOBAL_BIND))
- /* XXX not always required: nop_clear_range */
- wakeref = intel_runtime_pm_get(&vm->i915->runtime_pm);
-
/* Optimistic wait before taking the mutex */
err = i915_vma_sync(vma);
if (err)
goto out_rpm;
+ if (i915_vma_is_pinned(vma)) {
+ vma_print_allocator(vma, "is pinned");
+ return -EAGAIN;
+ }
+
+ if (i915_vma_is_bound(vma, I915_VMA_GLOBAL_BIND))
+ /* XXX not always required: nop_clear_range */
+ wakeref = intel_runtime_pm_get(&vm->i915->runtime_pm);
+
err = mutex_lock_interruptible(&vm->mutex);
if (err)
goto out_rpm;
diff --git a/drivers/gpu/drm/i915/intel_pm.c b/drivers/gpu/drm/i915/intel_pm.c
index 8375054ba27d..a52986a9e7a6 100644
--- a/drivers/gpu/drm/i915/intel_pm.c
+++ b/drivers/gpu/drm/i915/intel_pm.c
@@ -4992,7 +4992,7 @@ static void skl_compute_plane_wm(const struct intel_crtc_state *crtc_state,
* WaIncreaseLatencyIPCEnabled: kbl,cfl
* Display WA #1141: kbl,cfl
*/
- if ((IS_KABYLAKE(dev_priv) || IS_COFFEELAKE(dev_priv)) ||
+ if ((IS_KABYLAKE(dev_priv) || IS_COFFEELAKE(dev_priv)) &&
dev_priv->ipc_enabled)
latency += 4;
diff --git a/drivers/gpu/drm/i915/selftests/i915_vma.c b/drivers/gpu/drm/i915/selftests/i915_vma.c
index 58b5f40a07dd..af89c7fc8f59 100644
--- a/drivers/gpu/drm/i915/selftests/i915_vma.c
+++ b/drivers/gpu/drm/i915/selftests/i915_vma.c
@@ -173,7 +173,7 @@ static int igt_vma_create(void *arg)
}
nc = 0;
- for_each_prime_number(num_ctx, 2 * NUM_CONTEXT_TAG) {
+ for_each_prime_number(num_ctx, 2 * BITS_PER_LONG) {
for (; nc < num_ctx; nc++) {
ctx = mock_context(i915, "mock");
if (!ctx)
diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/sec2/gp108.c b/drivers/gpu/drm/nouveau/nvkm/engine/sec2/gp108.c
index 232a9d7c51e5..e770c9497871 100644
--- a/drivers/gpu/drm/nouveau/nvkm/engine/sec2/gp108.c
+++ b/drivers/gpu/drm/nouveau/nvkm/engine/sec2/gp108.c
@@ -25,6 +25,9 @@
MODULE_FIRMWARE("nvidia/gp108/sec2/desc.bin");
MODULE_FIRMWARE("nvidia/gp108/sec2/image.bin");
MODULE_FIRMWARE("nvidia/gp108/sec2/sig.bin");
+MODULE_FIRMWARE("nvidia/gv100/sec2/desc.bin");
+MODULE_FIRMWARE("nvidia/gv100/sec2/image.bin");
+MODULE_FIRMWARE("nvidia/gv100/sec2/sig.bin");
static const struct nvkm_sec2_fwif
gp108_sec2_fwif[] = {
diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/sec2/tu102.c b/drivers/gpu/drm/nouveau/nvkm/engine/sec2/tu102.c
index b6ebd95c9ba1..a8295653ceab 100644
--- a/drivers/gpu/drm/nouveau/nvkm/engine/sec2/tu102.c
+++ b/drivers/gpu/drm/nouveau/nvkm/engine/sec2/tu102.c
@@ -56,6 +56,22 @@ tu102_sec2_nofw(struct nvkm_sec2 *sec2, int ver,
return 0;
}
+MODULE_FIRMWARE("nvidia/tu102/sec2/desc.bin");
+MODULE_FIRMWARE("nvidia/tu102/sec2/image.bin");
+MODULE_FIRMWARE("nvidia/tu102/sec2/sig.bin");
+MODULE_FIRMWARE("nvidia/tu104/sec2/desc.bin");
+MODULE_FIRMWARE("nvidia/tu104/sec2/image.bin");
+MODULE_FIRMWARE("nvidia/tu104/sec2/sig.bin");
+MODULE_FIRMWARE("nvidia/tu106/sec2/desc.bin");
+MODULE_FIRMWARE("nvidia/tu106/sec2/image.bin");
+MODULE_FIRMWARE("nvidia/tu106/sec2/sig.bin");
+MODULE_FIRMWARE("nvidia/tu116/sec2/desc.bin");
+MODULE_FIRMWARE("nvidia/tu116/sec2/image.bin");
+MODULE_FIRMWARE("nvidia/tu116/sec2/sig.bin");
+MODULE_FIRMWARE("nvidia/tu117/sec2/desc.bin");
+MODULE_FIRMWARE("nvidia/tu117/sec2/image.bin");
+MODULE_FIRMWARE("nvidia/tu117/sec2/sig.bin");
+
static const struct nvkm_sec2_fwif
tu102_sec2_fwif[] = {
{ 0, gp102_sec2_load, &tu102_sec2, &gp102_sec2_acr_1 },
diff --git a/drivers/gpu/drm/tegra/drm.c b/drivers/gpu/drm/tegra/drm.c
index bd268028fb3d..583cd6e0ae27 100644
--- a/drivers/gpu/drm/tegra/drm.c
+++ b/drivers/gpu/drm/tegra/drm.c
@@ -1039,6 +1039,7 @@ void tegra_drm_free(struct tegra_drm *tegra, size_t size, void *virt,
static bool host1x_drm_wants_iommu(struct host1x_device *dev)
{
+ struct host1x *host1x = dev_get_drvdata(dev->dev.parent);
struct iommu_domain *domain;
/*
@@ -1076,7 +1077,7 @@ static bool host1x_drm_wants_iommu(struct host1x_device *dev)
* sufficient and whether or not the host1x is attached to an IOMMU
* doesn't matter.
*/
- if (!domain && dma_get_mask(dev->dev.parent) <= DMA_BIT_MASK(32))
+ if (!domain && host1x_get_dma_mask(host1x) <= DMA_BIT_MASK(32))
return true;
return domain != NULL;
diff --git a/drivers/gpu/drm/virtio/virtgpu_ioctl.c b/drivers/gpu/drm/virtio/virtgpu_ioctl.c
index 70ba13e8e58a..512daff92038 100644
--- a/drivers/gpu/drm/virtio/virtgpu_ioctl.c
+++ b/drivers/gpu/drm/virtio/virtgpu_ioctl.c
@@ -27,6 +27,7 @@
#include <linux/file.h>
#include <linux/sync_file.h>
+#include <linux/uaccess.h>
#include <drm/drm_file.h>
#include <drm/virtgpu_drm.h>
diff --git a/drivers/gpu/drm/virtio/virtgpu_kms.c b/drivers/gpu/drm/virtio/virtgpu_kms.c
index a5038298c6ae..0a5c8cf409fb 100644
--- a/drivers/gpu/drm/virtio/virtgpu_kms.c
+++ b/drivers/gpu/drm/virtio/virtgpu_kms.c
@@ -25,6 +25,7 @@
#include <linux/virtio.h>
#include <linux/virtio_config.h>
+#include <linux/virtio_ring.h>
#include <drm/drm_file.h>
diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_drv.h b/drivers/gpu/drm/vmwgfx/vmwgfx_drv.h
index 8cdcd6e5f9e1..3596f3923ea3 100644
--- a/drivers/gpu/drm/vmwgfx/vmwgfx_drv.h
+++ b/drivers/gpu/drm/vmwgfx/vmwgfx_drv.h
@@ -850,7 +850,7 @@ extern void vmw_bo_bo_free(struct ttm_buffer_object *bo);
extern int vmw_bo_init(struct vmw_private *dev_priv,
struct vmw_buffer_object *vmw_bo,
size_t size, struct ttm_placement *placement,
- bool interuptable,
+ bool interruptible,
void (*bo_free)(struct ttm_buffer_object *bo));
extern int vmw_user_bo_verify_access(struct ttm_buffer_object *bo,
struct ttm_object_file *tfile);
diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_fence.c b/drivers/gpu/drm/vmwgfx/vmwgfx_fence.c
index 178a6cd1a06f..0f8d29397157 100644
--- a/drivers/gpu/drm/vmwgfx/vmwgfx_fence.c
+++ b/drivers/gpu/drm/vmwgfx/vmwgfx_fence.c
@@ -515,7 +515,7 @@ bool vmw_fence_obj_signaled(struct vmw_fence_obj *fence)
struct vmw_fence_manager *fman = fman_from_fence(fence);
if (test_bit(DMA_FENCE_FLAG_SIGNALED_BIT, &fence->base.flags))
- return 1;
+ return true;
vmw_fences_update(fman);
diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_surface.c b/drivers/gpu/drm/vmwgfx/vmwgfx_surface.c
index 7ef51fa84b01..126f93c0b0b8 100644
--- a/drivers/gpu/drm/vmwgfx/vmwgfx_surface.c
+++ b/drivers/gpu/drm/vmwgfx/vmwgfx_surface.c
@@ -1651,7 +1651,7 @@ vmw_gb_surface_reference_internal(struct drm_device *dev,
struct vmw_surface_metadata *metadata;
struct ttm_base_object *base;
uint32_t backup_handle;
- int ret = -EINVAL;
+ int ret;
ret = vmw_surface_handle_reference(dev_priv, file_priv, req->sid,
req->handle_type, &base);
diff --git a/drivers/gpu/host1x/dev.c b/drivers/gpu/host1x/dev.c
index 388bcc2889aa..d24344e91922 100644
--- a/drivers/gpu/host1x/dev.c
+++ b/drivers/gpu/host1x/dev.c
@@ -192,17 +192,55 @@ static void host1x_setup_sid_table(struct host1x *host)
}
}
+static bool host1x_wants_iommu(struct host1x *host1x)
+{
+ /*
+ * If we support addressing a maximum of 32 bits of physical memory
+ * and if the host1x firewall is enabled, there's no need to enable
+ * IOMMU support. This can happen for example on Tegra20, Tegra30
+ * and Tegra114.
+ *
+ * Tegra124 and later can address up to 34 bits of physical memory and
+ * many platforms come equipped with more than 2 GiB of system memory,
+ * which requires crossing the 4 GiB boundary. But there's a catch: on
+ * SoCs before Tegra186 (i.e. Tegra124 and Tegra210), the host1x can
+ * only address up to 32 bits of memory in GATHER opcodes, which means
+ * that command buffers need to either be in the first 2 GiB of system
+ * memory (which could quickly lead to memory exhaustion), or command
+ * buffers need to be treated differently from other buffers (which is
+ * not possible with the current ABI).
+ *
+ * A third option is to use the IOMMU in these cases to make sure all
+ * buffers will be mapped into a 32-bit IOVA space that host1x can
+ * address. This allows all of the system memory to be used and works
+ * within the limitations of the host1x on these SoCs.
+ *
+ * In summary, default to enable IOMMU on Tegra124 and later. For any
+ * of the earlier SoCs, only use the IOMMU for additional safety when
+ * the host1x firewall is disabled.
+ */
+ if (host1x->info->dma_mask <= DMA_BIT_MASK(32)) {
+ if (IS_ENABLED(CONFIG_TEGRA_HOST1X_FIREWALL))
+ return false;
+ }
+
+ return true;
+}
+
static struct iommu_domain *host1x_iommu_attach(struct host1x *host)
{
struct iommu_domain *domain = iommu_get_domain_for_dev(host->dev);
int err;
/*
- * If the host1x firewall is enabled, there's no need to enable IOMMU
- * support. Similarly, if host1x is already attached to an IOMMU (via
- * the DMA API), don't try to attach again.
+ * We may not always want to enable IOMMU support (for example if the
+ * host1x firewall is already enabled and we don't support addressing
+ * more than 32 bits of physical memory), so check for that first.
+ *
+ * Similarly, if host1x is already attached to an IOMMU (via the DMA
+ * API), don't try to attach again.
*/
- if (IS_ENABLED(CONFIG_TEGRA_HOST1X_FIREWALL) || domain)
+ if (!host1x_wants_iommu(host) || domain)
return domain;
host->group = iommu_group_get(host->dev);
@@ -502,6 +540,19 @@ static void __exit tegra_host1x_exit(void)
}
module_exit(tegra_host1x_exit);
+/**
+ * host1x_get_dma_mask() - query the supported DMA mask for host1x
+ * @host1x: host1x instance
+ *
+ * Note that this returns the supported DMA mask for host1x, which can be
+ * different from the applicable DMA mask under certain circumstances.
+ */
+u64 host1x_get_dma_mask(struct host1x *host1x)
+{
+ return host1x->info->dma_mask;
+}
+EXPORT_SYMBOL(host1x_get_dma_mask);
+
MODULE_AUTHOR("Thierry Reding <thierry.reding@avionic-design.de>");
MODULE_AUTHOR("Terje Bergstrom <tbergstrom@nvidia.com>");
MODULE_DESCRIPTION("Host1x driver for Tegra products");