diff options
Diffstat (limited to 'drivers/gpu/drm/amd/amdgpu')
69 files changed, 1989 insertions, 489 deletions
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h b/drivers/gpu/drm/amd/amdgpu/amdgpu.h index 3d2625beacf7..327a0daf4a1d 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h @@ -193,6 +193,7 @@ static const bool debug_evictions; /* = false */ #endif extern int amdgpu_tmz; +extern int amdgpu_reset_method; #ifdef CONFIG_DRM_AMDGPU_SI extern int amdgpu_si_support; @@ -1010,10 +1011,10 @@ int amdgpu_gpu_wait_for_idle(struct amdgpu_device *adev); void amdgpu_device_vram_access(struct amdgpu_device *adev, loff_t pos, uint32_t *buf, size_t size, bool write); -uint32_t amdgpu_device_rreg(struct amdgpu_device *adev, uint32_t reg, - uint32_t acc_flags); -void amdgpu_device_wreg(struct amdgpu_device *adev, uint32_t reg, uint32_t v, +uint32_t amdgpu_mm_rreg(struct amdgpu_device *adev, uint32_t reg, uint32_t acc_flags); +void amdgpu_mm_wreg(struct amdgpu_device *adev, uint32_t reg, uint32_t v, + uint32_t acc_flags); void amdgpu_mm_wreg_mmio_rlc(struct amdgpu_device *adev, uint32_t reg, uint32_t v, uint32_t acc_flags); void amdgpu_mm_wreg8(struct amdgpu_device *adev, uint32_t offset, uint8_t value); @@ -1032,8 +1033,8 @@ int emu_soc_asic_init(struct amdgpu_device *adev); */ #define AMDGPU_REGS_NO_KIQ (1<<1) -#define RREG32_NO_KIQ(reg) amdgpu_device_rreg(adev, (reg), AMDGPU_REGS_NO_KIQ) -#define WREG32_NO_KIQ(reg, v) amdgpu_device_wreg(adev, (reg), (v), AMDGPU_REGS_NO_KIQ) +#define RREG32_NO_KIQ(reg) amdgpu_mm_rreg(adev, (reg), AMDGPU_REGS_NO_KIQ) +#define WREG32_NO_KIQ(reg, v) amdgpu_mm_wreg(adev, (reg), (v), AMDGPU_REGS_NO_KIQ) #define RREG32_KIQ(reg) amdgpu_kiq_rreg(adev, (reg)) #define WREG32_KIQ(reg, v) amdgpu_kiq_wreg(adev, (reg), (v)) @@ -1041,9 +1042,9 @@ int emu_soc_asic_init(struct amdgpu_device *adev); #define RREG8(reg) amdgpu_mm_rreg8(adev, (reg)) #define WREG8(reg, v) amdgpu_mm_wreg8(adev, (reg), (v)) -#define RREG32(reg) amdgpu_device_rreg(adev, (reg), 0) -#define DREG32(reg) printk(KERN_INFO "REGISTER: " #reg " : 0x%08X\n", amdgpu_device_rreg(adev, (reg), 0)) -#define WREG32(reg, v) amdgpu_device_wreg(adev, (reg), (v), 0) +#define RREG32(reg) amdgpu_mm_rreg(adev, (reg), 0) +#define DREG32(reg) printk(KERN_INFO "REGISTER: " #reg " : 0x%08X\n", amdgpu_mm_rreg(adev, (reg), 0)) +#define WREG32(reg, v) amdgpu_mm_wreg(adev, (reg), (v), 0) #define REG_SET(FIELD, v) (((v) << FIELD##_SHIFT) & FIELD##_MASK) #define REG_GET(FIELD, v) (((v) << FIELD##_SHIFT) & FIELD##_MASK) #define RREG32_PCIE(reg) adev->pcie_rreg(adev, (reg)) @@ -1080,7 +1081,16 @@ int emu_soc_asic_init(struct amdgpu_device *adev); tmp_ |= ((val) & ~(mask)); \ WREG32_PLL(reg, tmp_); \ } while (0) -#define DREG32_SYS(sqf, adev, reg) seq_printf((sqf), #reg " : 0x%08X\n", amdgpu_device_rreg((adev), (reg), false)) + +#define WREG32_SMC_P(_Reg, _Val, _Mask) \ + do { \ + u32 tmp = RREG32_SMC(_Reg); \ + tmp &= (_Mask); \ + tmp |= ((_Val) & ~(_Mask)); \ + WREG32_SMC(_Reg, tmp); \ + } while (0) + +#define DREG32_SYS(sqf, adev, reg) seq_printf((sqf), #reg " : 0x%08X\n", amdgpu_mm_rreg((adev), (reg), false)) #define RREG32_IO(reg) amdgpu_io_rreg(adev, (reg)) #define WREG32_IO(reg, v) amdgpu_io_wreg(adev, (reg), (v)) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c index ad59ac4423b8..1b865fed74ca 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c @@ -31,8 +31,6 @@ #include "amdgpu_xgmi.h" #include <uapi/linux/kfd_ioctl.h> -static const unsigned int compute_vmid_bitmap = 0xFF00; - /* Total memory size in system memory and all GPU VRAM. Used to * estimate worst case amount of memory to reserve for page tables */ @@ -113,7 +111,9 @@ void amdgpu_amdkfd_device_init(struct amdgpu_device *adev) if (adev->kfd.dev) { struct kgd2kfd_shared_resources gpu_resources = { - .compute_vmid_bitmap = compute_vmid_bitmap, + .compute_vmid_bitmap = + ((1 << AMDGPU_NUM_VMID) - 1) - + ((1 << adev->vm_manager.first_kfd_vmid) - 1), .num_pipe_per_mec = adev->gfx.mec.num_pipe_per_mec, .num_queue_per_pipe = adev->gfx.mec.num_queue_per_pipe, .gpuvm_size = min(adev->vm_manager.max_pfn @@ -637,10 +637,8 @@ void amdgpu_amdkfd_set_compute_idle(struct kgd_dev *kgd, bool idle) bool amdgpu_amdkfd_is_kfd_vmid(struct amdgpu_device *adev, u32 vmid) { - if (adev->kfd.dev) { - if ((1 << vmid) & compute_vmid_bitmap) - return true; - } + if (adev->kfd.dev) + return vmid >= adev->vm_manager.first_kfd_vmid; return false; } diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_atomfirmware.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_atomfirmware.c index e249b22fef54..1279053324f9 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_atomfirmware.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_atomfirmware.c @@ -111,6 +111,7 @@ int amdgpu_atomfirmware_allocate_fb_scratch(struct amdgpu_device *adev) union igp_info { struct atom_integrated_system_info_v1_11 v11; + struct atom_integrated_system_info_v1_12 v12; }; union umc_info { @@ -214,6 +215,15 @@ amdgpu_atomfirmware_get_vram_info(struct amdgpu_device *adev, if (vram_type) *vram_type = convert_atom_mem_type_to_vram_type(adev, mem_type); break; + case 12: + mem_channel_number = igp_info->v12.umachannelnumber; + /* channel width is 64 */ + if (vram_width) + *vram_width = mem_channel_number * 64; + mem_type = igp_info->v12.memorytype; + if (vram_type) + *vram_type = convert_atom_mem_type_to_vram_type(adev, mem_type); + break; default: return -EINVAL; } diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.c index aeada7c9fbea..a3fa1560de96 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.c @@ -1343,27 +1343,37 @@ static void amdgpu_ib_preempt_job_recovery(struct drm_gpu_scheduler *sched) static void amdgpu_ib_preempt_mark_partial_job(struct amdgpu_ring *ring) { struct amdgpu_job *job; - struct drm_sched_job *s_job; + struct drm_sched_job *s_job, *tmp; uint32_t preempt_seq; struct dma_fence *fence, **ptr; struct amdgpu_fence_driver *drv = &ring->fence_drv; struct drm_gpu_scheduler *sched = &ring->sched; + bool preempted = true; if (ring->funcs->type != AMDGPU_RING_TYPE_GFX) return; preempt_seq = le32_to_cpu(*(drv->cpu_addr + 2)); - if (preempt_seq <= atomic_read(&drv->last_seq)) - return; + if (preempt_seq <= atomic_read(&drv->last_seq)) { + preempted = false; + goto no_preempt; + } preempt_seq &= drv->num_fences_mask; ptr = &drv->fences[preempt_seq]; fence = rcu_dereference_protected(*ptr, 1); +no_preempt: spin_lock(&sched->job_list_lock); - list_for_each_entry(s_job, &sched->ring_mirror_list, node) { + list_for_each_entry_safe(s_job, tmp, &sched->ring_mirror_list, node) { + if (dma_fence_is_signaled(&s_job->s_fence->finished)) { + /* remove job from ring_mirror_list */ + list_del_init(&s_job->node); + sched->ops->free_job(s_job); + continue; + } job = to_amdgpu_job(s_job); - if (job->fence == fence) + if (preempted && job->fence == fence) /* mark the job as preempted */ job->preemption_status |= AMDGPU_IB_PREEMPTED; } @@ -1461,10 +1471,12 @@ static int amdgpu_debugfs_sclk_set(void *data, u64 val) } if (is_support_sw_smu(adev)) { - ret = smu_get_dpm_freq_range(&adev->smu, SMU_SCLK, &min_freq, &max_freq, true); + ret = smu_get_dpm_freq_range(&adev->smu, SMU_SCLK, &min_freq, &max_freq); if (ret || val > max_freq || val < min_freq) return -EINVAL; - ret = smu_set_soft_freq_range(&adev->smu, SMU_SCLK, (uint32_t)val, (uint32_t)val, true); + ret = smu_set_soft_freq_range(&adev->smu, SMU_SCLK, (uint32_t)val, (uint32_t)val); + } else { + return 0; } pm_runtime_mark_last_busy(adev->ddev->dev); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c index a649e40fd96f..aa5b54e5a1d7 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c @@ -81,6 +81,7 @@ MODULE_FIRMWARE("amdgpu/navi10_gpu_info.bin"); MODULE_FIRMWARE("amdgpu/navi14_gpu_info.bin"); MODULE_FIRMWARE("amdgpu/navi12_gpu_info.bin"); MODULE_FIRMWARE("amdgpu/sienna_cichlid_gpu_info.bin"); +MODULE_FIRMWARE("amdgpu/navy_flounder_gpu_info.bin"); #define AMDGPU_RESUME_MS 2000 @@ -114,6 +115,7 @@ const char *amdgpu_asic_name[] = { "NAVI14", "NAVI12", "SIENNA_CICHLID", + "NAVY_FLOUNDER", "LAST", }; @@ -301,10 +303,10 @@ void amdgpu_device_vram_access(struct amdgpu_device *adev, loff_t pos, } /* - * device register access helper functions. + * MMIO register access helper functions. */ /** - * amdgpu_device_rreg - read a register + * amdgpu_mm_rreg - read a memory mapped IO register * * @adev: amdgpu_device pointer * @reg: dword aligned register offset @@ -312,8 +314,8 @@ void amdgpu_device_vram_access(struct amdgpu_device *adev, loff_t pos, * * Returns the 32 bit value from the offset specified. */ -uint32_t amdgpu_device_rreg(struct amdgpu_device *adev, uint32_t reg, - uint32_t acc_flags) +uint32_t amdgpu_mm_rreg(struct amdgpu_device *adev, uint32_t reg, + uint32_t acc_flags) { uint32_t ret; @@ -322,9 +324,15 @@ uint32_t amdgpu_device_rreg(struct amdgpu_device *adev, uint32_t reg, if ((reg * 4) < adev->rmmio_size) ret = readl(((void __iomem *)adev->rmmio) + (reg * 4)); - else - ret = adev->pcie_rreg(adev, (reg * 4)); - trace_amdgpu_device_rreg(adev->pdev->device, reg, ret); + else { + unsigned long flags; + + spin_lock_irqsave(&adev->mmio_idx_lock, flags); + writel((reg * 4), ((void __iomem *)adev->rmmio) + (mmMM_INDEX * 4)); + ret = readl(((void __iomem *)adev->rmmio) + (mmMM_DATA * 4)); + spin_unlock_irqrestore(&adev->mmio_idx_lock, flags); + } + trace_amdgpu_mm_rreg(adev->pdev->device, reg, ret); return ret; } @@ -370,19 +378,24 @@ void amdgpu_mm_wreg8(struct amdgpu_device *adev, uint32_t offset, uint8_t value) BUG(); } -void static inline amdgpu_device_wreg_no_kiq(struct amdgpu_device *adev, uint32_t reg, - uint32_t v, uint32_t acc_flags) +void static inline amdgpu_mm_wreg_mmio(struct amdgpu_device *adev, uint32_t reg, uint32_t v, uint32_t acc_flags) { - trace_amdgpu_device_wreg(adev->pdev->device, reg, v); + trace_amdgpu_mm_wreg(adev->pdev->device, reg, v); if ((reg * 4) < adev->rmmio_size) writel(v, ((void __iomem *)adev->rmmio) + (reg * 4)); - else - adev->pcie_wreg(adev, (reg * 4), v); + else { + unsigned long flags; + + spin_lock_irqsave(&adev->mmio_idx_lock, flags); + writel((reg * 4), ((void __iomem *)adev->rmmio) + (mmMM_INDEX * 4)); + writel(v, ((void __iomem *)adev->rmmio) + (mmMM_DATA * 4)); + spin_unlock_irqrestore(&adev->mmio_idx_lock, flags); + } } /** - * amdgpu_device_wreg - write to a register + * amdgpu_mm_wreg - write to a memory mapped IO register * * @adev: amdgpu_device pointer * @reg: dword aligned register offset @@ -391,13 +404,13 @@ void static inline amdgpu_device_wreg_no_kiq(struct amdgpu_device *adev, uint32_ * * Writes the value specified to the offset specified. */ -void amdgpu_device_wreg(struct amdgpu_device *adev, uint32_t reg, uint32_t v, - uint32_t acc_flags) +void amdgpu_mm_wreg(struct amdgpu_device *adev, uint32_t reg, uint32_t v, + uint32_t acc_flags) { if (!(acc_flags & AMDGPU_REGS_NO_KIQ) && amdgpu_sriov_runtime(adev)) return amdgpu_kiq_wreg(adev, reg, v); - amdgpu_device_wreg_no_kiq(adev, reg, v, acc_flags); + amdgpu_mm_wreg_mmio(adev, reg, v, acc_flags); } /* @@ -416,7 +429,7 @@ void amdgpu_mm_wreg_mmio_rlc(struct amdgpu_device *adev, uint32_t reg, uint32_t return adev->gfx.rlc.funcs->rlcg_wreg(adev, reg, v); } - amdgpu_device_wreg_no_kiq(adev, reg, v, acc_flags); + amdgpu_mm_wreg_mmio(adev, reg, v, acc_flags); } /** @@ -1621,6 +1634,9 @@ static int amdgpu_device_parse_gpu_info_fw(struct amdgpu_device *adev) case CHIP_SIENNA_CICHLID: chip_name = "sienna_cichlid"; break; + case CHIP_NAVY_FLOUNDER: + chip_name = "navy_flounder"; + break; } snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_gpu_info.bin", chip_name); @@ -1722,6 +1738,12 @@ static int amdgpu_device_ip_early_init(struct amdgpu_device *adev) amdgpu_device_enable_virtual_display(adev); + if (amdgpu_sriov_vf(adev)) { + r = amdgpu_virt_request_full_gpu(adev, true); + if (r) + return r; + } + switch (adev->asic_type) { #ifdef CONFIG_DRM_AMDGPU_SI case CHIP_VERDE: @@ -1788,6 +1810,7 @@ static int amdgpu_device_ip_early_init(struct amdgpu_device *adev) case CHIP_NAVI14: case CHIP_NAVI12: case CHIP_SIENNA_CICHLID: + case CHIP_NAVY_FLOUNDER: adev->family = AMDGPU_FAMILY_NV; r = nv_set_ip_blocks(adev); @@ -1801,31 +1824,6 @@ static int amdgpu_device_ip_early_init(struct amdgpu_device *adev) amdgpu_amdkfd_device_probe(adev); - if (amdgpu_sriov_vf(adev)) { - /* handle vbios stuff prior full access mode for new handshake */ - if (adev->virt.req_init_data_ver == 1) { - if (!amdgpu_get_bios(adev)) { - DRM_ERROR("failed to get vbios\n"); - return -EINVAL; - } - - r = amdgpu_atombios_init(adev); - if (r) { - dev_err(adev->dev, "amdgpu_atombios_init failed\n"); - amdgpu_vf_error_put(adev, AMDGIM_ERROR_VF_ATOMBIOS_INIT_FAIL, 0, 0); - return r; - } - } - } - - /* we need to send REQ_GPU here for legacy handshaker otherwise the vbios - * will not be prepared by host for this VF */ - if (amdgpu_sriov_vf(adev) && adev->virt.req_init_data_ver < 1) { - r = amdgpu_virt_request_full_gpu(adev, true); - if (r) - return r; - } - adev->pm.pp_feature = amdgpu_pp_feature_mask; if (amdgpu_sriov_vf(adev) || sched_policy == KFD_SCHED_POLICY_NO_HWS) adev->pm.pp_feature &= ~PP_GFXOFF_MASK; @@ -1857,10 +1855,6 @@ static int amdgpu_device_ip_early_init(struct amdgpu_device *adev) if (r) return r; - /* skip vbios handling for new handshake */ - if (amdgpu_sriov_vf(adev) && adev->virt.req_init_data_ver == 1) - continue; - /* Read BIOS */ if (!amdgpu_get_bios(adev)) return -EINVAL; @@ -1987,12 +1981,6 @@ static int amdgpu_device_ip_init(struct amdgpu_device *adev) if (r) return r; - if (amdgpu_sriov_vf(adev) && adev->virt.req_init_data_ver > 0) { - r = amdgpu_virt_request_full_gpu(adev, true); - if (r) - return -EAGAIN; - } - for (i = 0; i < adev->num_ip_blocks; i++) { if (!adev->ip_blocks[i].status.valid) continue; @@ -2474,18 +2462,21 @@ static int amdgpu_device_ip_suspend_phase1(struct amdgpu_device *adev) for (i = adev->num_ip_blocks - 1; i >= 0; i--) { if (!adev->ip_blocks[i].status.valid) continue; + /* displays are handled separately */ - if (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_DCE) { - /* XXX handle errors */ - r = adev->ip_blocks[i].version->funcs->suspend(adev); - /* XXX handle errors */ - if (r) { - DRM_ERROR("suspend of IP block <%s> failed %d\n", - adev->ip_blocks[i].version->funcs->name, r); - return r; - } - adev->ip_blocks[i].status.hw = false; + if (adev->ip_blocks[i].version->type != AMD_IP_BLOCK_TYPE_DCE) + continue; + + /* XXX handle errors */ + r = adev->ip_blocks[i].version->funcs->suspend(adev); + /* XXX handle errors */ + if (r) { + DRM_ERROR("suspend of IP block <%s> failed %d\n", + adev->ip_blocks[i].version->funcs->name, r); + return r; } + + adev->ip_blocks[i].status.hw = false; } return 0; @@ -2817,6 +2808,7 @@ bool amdgpu_device_asic_has_dc_support(enum amd_asic_type asic_type) #endif #if defined(CONFIG_DRM_AMD_DC_DCN3_0) case CHIP_SIENNA_CICHLID: + case CHIP_NAVY_FLOUNDER: #endif return amdgpu_dc != 0; #endif @@ -3324,6 +3316,9 @@ fence_driver_init: queue_delayed_work(system_wq, &adev->delayed_init_work, msecs_to_jiffies(AMDGPU_RESUME_MS)); + if (amdgpu_sriov_vf(adev)) + flush_delayed_work(&adev->delayed_init_work); + r = sysfs_create_files(&adev->dev->kobj, amdgpu_dev_attributes); if (r) { dev_err(adev->dev, "Could not create amdgpu device attr\n"); @@ -3951,6 +3946,7 @@ bool amdgpu_device_should_recover_gpu(struct amdgpu_device *adev) case CHIP_NAVI10: case CHIP_NAVI14: case CHIP_NAVI12: + case CHIP_SIENNA_CICHLID: break; default: goto disabled; @@ -4256,18 +4252,19 @@ int amdgpu_device_gpu_recover(struct amdgpu_device *adev, struct amdgpu_hive_info *hive = NULL; struct amdgpu_device *tmp_adev = NULL; int i, r = 0; - bool in_ras_intr = amdgpu_ras_intr_triggered(); - bool use_baco = - (amdgpu_asic_reset_method(adev) == AMD_RESET_METHOD_BACO) ? - true : false; + bool need_emergency_restart = false; bool audio_suspended = false; + /** + * Special case: RAS triggered and full reset isn't supported + */ + need_emergency_restart = amdgpu_ras_need_emergency_restart(adev); + /* * Flush RAM to disk so that after reboot * the user can read log and see why the system rebooted. */ - if (in_ras_intr && !use_baco && amdgpu_ras_get_context(adev)->reboot) { - + if (need_emergency_restart && amdgpu_ras_get_context(adev)->reboot) { DRM_WARN("Emergency reboot."); ksys_sync_helper(); @@ -4275,7 +4272,7 @@ int amdgpu_device_gpu_recover(struct amdgpu_device *adev, } dev_info(adev->dev, "GPU %s begin!\n", - (in_ras_intr && !use_baco) ? "jobs stop":"reset"); + need_emergency_restart ? "jobs stop":"reset"); /* * Here we trylock to avoid chain of resets executing from @@ -4347,7 +4344,7 @@ int amdgpu_device_gpu_recover(struct amdgpu_device *adev, amdgpu_fbdev_set_suspend(tmp_adev, 1); /* disable ras on ALL IPs */ - if (!(in_ras_intr && !use_baco) && + if (!need_emergency_restart && amdgpu_device_ip_need_full_reset(tmp_adev)) amdgpu_ras_suspend(tmp_adev); @@ -4359,12 +4356,12 @@ int amdgpu_device_gpu_recover(struct amdgpu_device *adev, drm_sched_stop(&ring->sched, job ? &job->base : NULL); - if (in_ras_intr && !use_baco) + if (need_emergency_restart) amdgpu_job_stop_all_jobs_on_sched(&ring->sched); } } - if (in_ras_intr && !use_baco) + if (need_emergency_restart) goto skip_sched_resume; /* @@ -4441,7 +4438,7 @@ skip_hw_reset: skip_sched_resume: list_for_each_entry(tmp_adev, device_list_handle, gmc.xgmi.head) { /*unlock kfd: SRIOV would do it separately */ - if (!(in_ras_intr && !use_baco) && !amdgpu_sriov_vf(tmp_adev)) + if (!need_emergency_restart && !amdgpu_sriov_vf(tmp_adev)) amdgpu_amdkfd_post_reset(tmp_adev); if (audio_suspended) amdgpu_device_resume_display_audio(tmp_adev); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_discovery.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_discovery.c index 4ef38c2411ae..a50ff2306504 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_discovery.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_discovery.c @@ -133,7 +133,7 @@ static int hw_id_map[MAX_HWIP] = { static int amdgpu_discovery_read_binary(struct amdgpu_device *adev, uint8_t *binary) { uint64_t vram_size = (uint64_t)RREG32(mmRCC_CONFIG_MEMSIZE) << 20; - uint64_t pos = vram_size - adev->discovery_tmr_size; + uint64_t pos = vram_size - DISCOVERY_TMR_OFFSET; amdgpu_device_vram_access(adev, pos, (uint32_t *)binary, adev->discovery_tmr_size, false); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_discovery.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_discovery.h index d50d597c45ed..8f6183801cb3 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_discovery.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_discovery.h @@ -24,7 +24,8 @@ #ifndef __AMDGPU_DISCOVERY__ #define __AMDGPU_DISCOVERY__ -#define DISCOVERY_TMR_SIZE (64 << 10) +#define DISCOVERY_TMR_SIZE (4 << 10) +#define DISCOVERY_TMR_OFFSET (64 << 10) void amdgpu_discovery_fini(struct amdgpu_device *adev); int amdgpu_discovery_reg_base_init(struct amdgpu_device *adev); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_dpm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_dpm.c index 6a39996487b9..2082c0acd216 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_dpm.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_dpm.c @@ -911,8 +911,7 @@ int amdgpu_dpm_get_sclk(struct amdgpu_device *adev, bool low) if (is_support_sw_smu(adev)) { ret = smu_get_dpm_freq_range(&adev->smu, SMU_GFXCLK, low ? &clk_freq : NULL, - !low ? &clk_freq : NULL, - true); + !low ? &clk_freq : NULL); if (ret) return 0; return clk_freq * 100; @@ -929,8 +928,7 @@ int amdgpu_dpm_get_mclk(struct amdgpu_device *adev, bool low) if (is_support_sw_smu(adev)) { ret = smu_get_dpm_freq_range(&adev->smu, SMU_UCLK, low ? &clk_freq : NULL, - !low ? &clk_freq : NULL, - true); + !low ? &clk_freq : NULL); if (ret) return 0; return clk_freq * 100; @@ -1141,6 +1139,26 @@ int amdgpu_dpm_baco_reset(struct amdgpu_device *adev) return 0; } +bool amdgpu_dpm_is_mode1_reset_supported(struct amdgpu_device *adev) +{ + struct smu_context *smu = &adev->smu; + + if (is_support_sw_smu(adev)) + return smu_mode1_reset_is_support(smu); + + return false; +} + +int amdgpu_dpm_mode1_reset(struct amdgpu_device *adev) +{ + struct smu_context *smu = &adev->smu; + + if (is_support_sw_smu(adev)) + return smu_mode1_reset(smu); + + return -EOPNOTSUPP; +} + int amdgpu_dpm_switch_power_profile(struct amdgpu_device *adev, enum PP_SMC_POWER_PROFILE type, bool en) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_dpm.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_dpm.h index 6a8aae70a0e6..7f3cd7185650 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_dpm.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_dpm.h @@ -529,6 +529,9 @@ int amdgpu_dpm_mode2_reset(struct amdgpu_device *adev); bool amdgpu_dpm_is_baco_supported(struct amdgpu_device *adev); +bool amdgpu_dpm_is_mode1_reset_supported(struct amdgpu_device *adev); +int amdgpu_dpm_mode1_reset(struct amdgpu_device *adev); + int amdgpu_dpm_set_mp1_state(struct amdgpu_device *adev, enum pp_mp1_state mp1_state); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c index 2f12f3ae3c7f..2eacf1f51bbf 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c @@ -149,6 +149,7 @@ int amdgpu_mes = 0; int amdgpu_noretry; int amdgpu_force_asic_type = -1; int amdgpu_tmz = 0; +int amdgpu_reset_method = -1; /* auto */ struct amdgpu_mgpu_info mgpu_info = { .mutex = __MUTEX_INITIALIZER(mgpu_info.mutex), @@ -757,6 +758,13 @@ module_param_named(abmlevel, amdgpu_dm_abm_level, uint, 0444); MODULE_PARM_DESC(tmz, "Enable TMZ feature (-1 = auto, 0 = off (default), 1 = on)"); module_param_named(tmz, amdgpu_tmz, int, 0444); +/** + * DOC: reset_method (int) + * GPU reset method (-1 = auto (default), 0 = legacy, 1 = mode0, 2 = mode1, 3 = mode2, 4 = baco) + */ +MODULE_PARM_DESC(reset_method, "GPU reset method (-1 = auto (default), 0 = legacy, 1 = mode0, 2 = mode1, 3 = mode2, 4 = baco)"); +module_param_named(reset_method, amdgpu_reset_method, int, 0444); + static const struct pci_device_id pciidlist[] = { #ifdef CONFIG_DRM_AMDGPU_SI {0x1002, 0x6780, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_TAHITI}, diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c index 8d84975885cd..58d4c219178a 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c @@ -424,9 +424,8 @@ int amdgpu_fence_driver_start_ring(struct amdgpu_ring *ring, ring->fence_drv.irq_type = irq_type; ring->fence_drv.initialized = true; - DRM_DEV_DEBUG(adev->dev, "fence driver on ring %s use gpu addr " - "0x%016llx, cpu addr 0x%p\n", ring->name, - ring->fence_drv.gpu_addr, ring->fence_drv.cpu_addr); + DRM_DEV_DEBUG(adev->dev, "fence driver on ring %s use gpu addr 0x%016llx\n", + ring->name, ring->fence_drv.gpu_addr); return 0; } @@ -782,8 +781,10 @@ int amdgpu_debugfs_fence_init(struct amdgpu_device *adev) { #if defined(CONFIG_DEBUG_FS) if (amdgpu_sriov_vf(adev)) - return amdgpu_debugfs_add_files(adev, amdgpu_debugfs_fence_list_sriov, 1); - return amdgpu_debugfs_add_files(adev, amdgpu_debugfs_fence_list, 2); + return amdgpu_debugfs_add_files(adev, amdgpu_debugfs_fence_list_sriov, + ARRAY_SIZE(amdgpu_debugfs_fence_list_sriov)); + return amdgpu_debugfs_add_files(adev, amdgpu_debugfs_fence_list, + ARRAY_SIZE(amdgpu_debugfs_fence_list)); #else return 0; #endif diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c index de9784b0c19b..7f9e50247413 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c @@ -930,7 +930,8 @@ static const struct drm_info_list amdgpu_debugfs_gem_list[] = { int amdgpu_debugfs_gem_init(struct amdgpu_device *adev) { #if defined(CONFIG_DEBUG_FS) - return amdgpu_debugfs_add_files(adev, amdgpu_debugfs_gem_list, 1); + return amdgpu_debugfs_add_files(adev, amdgpu_debugfs_gem_list, + ARRAY_SIZE(amdgpu_debugfs_gem_list)); #endif return 0; } diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.h index 2bd9423c1dab..acdb61cfa24c 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.h @@ -83,6 +83,15 @@ struct amdgpu_vmhub { uint32_t vm_context0_cntl; uint32_t vm_l2_pro_fault_status; uint32_t vm_l2_pro_fault_cntl; + + /* + * store the register distances between two continuous context domain + * and invalidation engine. + */ + uint32_t ctx_distance; + uint32_t ctx_addr_distance; /* include LO32/HI32 */ + uint32_t eng_distance; + uint32_t eng_addr_distance; /* include LO32/HI32 */ }; /* diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c index 4ffc32b78745..dcd492170598 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c @@ -468,7 +468,8 @@ static const struct drm_info_list amdgpu_debugfs_sa_list[] = { int amdgpu_debugfs_sa_init(struct amdgpu_device *adev) { #if defined(CONFIG_DEBUG_FS) - return amdgpu_debugfs_add_files(adev, amdgpu_debugfs_sa_list, 1); + return amdgpu_debugfs_add_files(adev, amdgpu_debugfs_sa_list, + ARRAY_SIZE(amdgpu_debugfs_sa_list)); #else return 0; #endif diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ids.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ids.c index 267fa45ddb66..7521f4ab55de 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ids.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ids.c @@ -574,6 +574,9 @@ void amdgpu_vmid_mgr_init(struct amdgpu_device *adev) INIT_LIST_HEAD(&id_mgr->ids_lru); atomic_set(&id_mgr->reserved_vmid_num, 0); + /* manage only VMIDs not used by KFD */ + id_mgr->num_ids = adev->vm_manager.first_kfd_vmid; + /* skip over VMID 0, since it is the system VM */ for (j = 1; j < id_mgr->num_ids; ++j) { amdgpu_vmid_reset(adev, i, j); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c index 2975c4a6e581..937029ad5271 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c @@ -37,7 +37,8 @@ static void amdgpu_job_timedout(struct drm_sched_job *s_job) memset(&ti, 0, sizeof(struct amdgpu_task_info)); - if (amdgpu_ring_soft_recovery(ring, job->vmid, s_job->s_fence->parent)) { + if (amdgpu_gpu_recovery && + amdgpu_ring_soft_recovery(ring, job->vmid, s_job->s_fence->parent)) { DRM_ERROR("ring %s timeout, but soft recovered\n", s_job->sched->name); return; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_pm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_pm.c index 347b06d3c140..20f39aa04fb9 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_pm.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_pm.c @@ -1105,7 +1105,7 @@ static ssize_t amdgpu_set_pp_dpm_sclk(struct device *dev, } if (is_support_sw_smu(adev)) - ret = smu_force_clk_levels(&adev->smu, SMU_SCLK, mask, true); + ret = smu_force_clk_levels(&adev->smu, SMU_SCLK, mask); else if (adev->powerplay.pp_funcs->force_clock_level) ret = amdgpu_dpm_force_clock_level(adev, PP_SCLK, mask); @@ -1173,7 +1173,7 @@ static ssize_t amdgpu_set_pp_dpm_mclk(struct device *dev, } if (is_support_sw_smu(adev)) - ret = smu_force_clk_levels(&adev->smu, SMU_MCLK, mask, true); + ret = smu_force_clk_levels(&adev->smu, SMU_MCLK, mask); else if (adev->powerplay.pp_funcs->force_clock_level) ret = amdgpu_dpm_force_clock_level(adev, PP_MCLK, mask); @@ -1241,7 +1241,7 @@ static ssize_t amdgpu_set_pp_dpm_socclk(struct device *dev, } if (is_support_sw_smu(adev)) - ret = smu_force_clk_levels(&adev->smu, SMU_SOCCLK, mask, true); + ret = smu_force_clk_levels(&adev->smu, SMU_SOCCLK, mask); else if (adev->powerplay.pp_funcs->force_clock_level) ret = amdgpu_dpm_force_clock_level(adev, PP_SOCCLK, mask); else @@ -1311,7 +1311,7 @@ static ssize_t amdgpu_set_pp_dpm_fclk(struct device *dev, } if (is_support_sw_smu(adev)) - ret = smu_force_clk_levels(&adev->smu, SMU_FCLK, mask, true); + ret = smu_force_clk_levels(&adev->smu, SMU_FCLK, mask); else if (adev->powerplay.pp_funcs->force_clock_level) ret = amdgpu_dpm_force_clock_level(adev, PP_FCLK, mask); else @@ -1381,7 +1381,7 @@ static ssize_t amdgpu_set_pp_dpm_dcefclk(struct device *dev, } if (is_support_sw_smu(adev)) - ret = smu_force_clk_levels(&adev->smu, SMU_DCEFCLK, mask, true); + ret = smu_force_clk_levels(&adev->smu, SMU_DCEFCLK, mask); else if (adev->powerplay.pp_funcs->force_clock_level) ret = amdgpu_dpm_force_clock_level(adev, PP_DCEFCLK, mask); else @@ -1451,7 +1451,7 @@ static ssize_t amdgpu_set_pp_dpm_pcie(struct device *dev, } if (is_support_sw_smu(adev)) - ret = smu_force_clk_levels(&adev->smu, SMU_PCIE, mask, true); + ret = smu_force_clk_levels(&adev->smu, SMU_PCIE, mask); else if (adev->powerplay.pp_funcs->force_clock_level) ret = amdgpu_dpm_force_clock_level(adev, PP_PCIE, mask); else @@ -2960,7 +2960,7 @@ static ssize_t amdgpu_hwmon_show_sclk(struct device *dev, if (r) return r; - return snprintf(buf, PAGE_SIZE, "%d\n", sclk * 10 * 1000); + return snprintf(buf, PAGE_SIZE, "%u\n", sclk * 10 * 1000); } static ssize_t amdgpu_hwmon_show_sclk_label(struct device *dev, @@ -2997,7 +2997,7 @@ static ssize_t amdgpu_hwmon_show_mclk(struct device *dev, if (r) return r; - return snprintf(buf, PAGE_SIZE, "%d\n", mclk * 10 * 1000); + return snprintf(buf, PAGE_SIZE, "%u\n", mclk * 10 * 1000); } static ssize_t amdgpu_hwmon_show_mclk_label(struct device *dev, @@ -3558,21 +3558,34 @@ void amdgpu_dpm_enable_uvd(struct amdgpu_device *adev, bool enable) { int ret = 0; - ret = amdgpu_dpm_set_powergating_by_smu(adev, AMD_IP_BLOCK_TYPE_UVD, !enable); - if (ret) - DRM_ERROR("Dpm %s uvd failed, ret = %d. \n", - enable ? "enable" : "disable", ret); - - /* enable/disable Low Memory PState for UVD (4k videos) */ - if (adev->asic_type == CHIP_STONEY && - adev->uvd.decode_image_width >= WIDTH_4K) { - struct pp_hwmgr *hwmgr = adev->powerplay.pp_handle; + if (adev->family == AMDGPU_FAMILY_SI) { + mutex_lock(&adev->pm.mutex); + if (enable) { + adev->pm.dpm.uvd_active = true; + adev->pm.dpm.state = POWER_STATE_TYPE_INTERNAL_UVD; + } else { + adev->pm.dpm.uvd_active = false; + } + mutex_unlock(&adev->pm.mutex); - if (hwmgr && hwmgr->hwmgr_func && - hwmgr->hwmgr_func->update_nbdpm_pstate) - hwmgr->hwmgr_func->update_nbdpm_pstate(hwmgr, - !enable, - true); + amdgpu_pm_compute_clocks(adev); + } else { + ret = amdgpu_dpm_set_powergating_by_smu(adev, AMD_IP_BLOCK_TYPE_UVD, !enable); + if (ret) + DRM_ERROR("Dpm %s uvd failed, ret = %d. \n", + enable ? "enable" : "disable", ret); + + /* enable/disable Low Memory PState for UVD (4k videos) */ + if (adev->asic_type == CHIP_STONEY && + adev->uvd.decode_image_width >= WIDTH_4K) { + struct pp_hwmgr *hwmgr = adev->powerplay.pp_handle; + + if (hwmgr && hwmgr->hwmgr_func && + hwmgr->hwmgr_func->update_nbdpm_pstate) + hwmgr->hwmgr_func->update_nbdpm_pstate(hwmgr, + !enable, + true); + } } } @@ -3580,10 +3593,24 @@ void amdgpu_dpm_enable_vce(struct amdgpu_device *adev, bool enable) { int ret = 0; - ret = amdgpu_dpm_set_powergating_by_smu(adev, AMD_IP_BLOCK_TYPE_VCE, !enable); - if (ret) - DRM_ERROR("Dpm %s vce failed, ret = %d. \n", - enable ? "enable" : "disable", ret); + if (adev->family == AMDGPU_FAMILY_SI) { + mutex_lock(&adev->pm.mutex); + if (enable) { + adev->pm.dpm.vce_active = true; + /* XXX select vce level based on ring/task */ + adev->pm.dpm.vce_level = AMD_VCE_LEVEL_AC_ALL; + } else { + adev->pm.dpm.vce_active = false; + } + mutex_unlock(&adev->pm.mutex); + + amdgpu_pm_compute_clocks(adev); + } else { + ret = amdgpu_dpm_set_powergating_by_smu(adev, AMD_IP_BLOCK_TYPE_VCE, !enable); + if (ret) + DRM_ERROR("Dpm %s vce failed, ret = %d. \n", + enable ? "enable" : "disable", ret); + } } void amdgpu_pm_print_power_states(struct amdgpu_device *adev) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c index 9342a9e8cadf..aa80cf799e42 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c @@ -99,6 +99,7 @@ static int psp_early_init(void *handle) case CHIP_NAVI14: case CHIP_NAVI12: case CHIP_SIENNA_CICHLID: + case CHIP_NAVY_FLOUNDER: psp_v11_0_set_psp_funcs(psp); psp->autoload_supported = true; break; @@ -430,6 +431,52 @@ static int psp_tmr_load(struct psp_context *psp) return ret; } +static void psp_prep_tmr_unload_cmd_buf(struct psp_context *psp, + struct psp_gfx_cmd_resp *cmd) +{ + if (amdgpu_sriov_vf(psp->adev)) + cmd->cmd_id = GFX_CMD_ID_DESTROY_VMR; + else + cmd->cmd_id = GFX_CMD_ID_DESTROY_TMR; +} + +static int psp_tmr_unload(struct psp_context *psp) +{ + int ret; + struct psp_gfx_cmd_resp *cmd; + + cmd = kzalloc(sizeof(struct psp_gfx_cmd_resp), GFP_KERNEL); + if (!cmd) + return -ENOMEM; + + psp_prep_tmr_unload_cmd_buf(psp, cmd); + DRM_INFO("free PSP TMR buffer\n"); + + ret = psp_cmd_submit_buf(psp, NULL, cmd, + psp->fence_buf_mc_addr); + + kfree(cmd); + + return ret; +} + +static int psp_tmr_terminate(struct psp_context *psp) +{ + int ret; + void *tmr_buf; + void **pptr; + + ret = psp_tmr_unload(psp); + if (ret) + return ret; + + /* free TMR memory buffer */ + pptr = amdgpu_sriov_vf(psp->adev) ? &tmr_buf : NULL; + amdgpu_bo_free_kernel(&psp->tmr_bo, &psp->tmr_mc_addr, pptr); + + return 0; +} + static void psp_prep_asd_load_cmd_buf(struct psp_gfx_cmd_resp *cmd, uint64_t asd_mc, uint32_t size) { @@ -452,7 +499,9 @@ static int psp_asd_load(struct psp_context *psp) * add workaround to bypass it for sriov now. * TODO: add version check to make it common */ - if (amdgpu_sriov_vf(psp->adev) || (psp->adev->asic_type == CHIP_SIENNA_CICHLID)) + if (amdgpu_sriov_vf(psp->adev) || + (psp->adev->asic_type == CHIP_SIENNA_CICHLID) || + (psp->adev->asic_type == CHIP_NAVY_FLOUNDER)) return 0; cmd = kzalloc(sizeof(struct psp_gfx_cmd_resp), GFP_KERNEL); @@ -1717,7 +1766,8 @@ static int psp_np_fw_load(struct psp_context *psp) continue; if (psp->autoload_supported && - adev->asic_type == CHIP_SIENNA_CICHLID && + (adev->asic_type == CHIP_SIENNA_CICHLID || + adev->asic_type == CHIP_NAVY_FLOUNDER) && (ucode->ucode_id == AMDGPU_UCODE_ID_SDMA1 || ucode->ucode_id == AMDGPU_UCODE_ID_SDMA2 || ucode->ucode_id == AMDGPU_UCODE_ID_SDMA3)) @@ -1866,8 +1916,6 @@ static int psp_hw_fini(void *handle) { struct amdgpu_device *adev = (struct amdgpu_device *)handle; struct psp_context *psp = &adev->psp; - void *tmr_buf; - void **pptr; int ret; if (psp->adev->psp.ta_fw) { @@ -1883,10 +1931,9 @@ static int psp_hw_fini(void *handle) return ret; } + psp_tmr_terminate(psp); psp_ring_destroy(psp, PSP_RING_TYPE__KM); - pptr = amdgpu_sriov_vf(psp->adev) ? &tmr_buf : NULL; - amdgpu_bo_free_kernel(&psp->tmr_bo, &psp->tmr_mc_addr, pptr); amdgpu_bo_free_kernel(&psp->fw_pri_bo, &psp->fw_pri_mc_addr, &psp->fw_pri_buf); amdgpu_bo_free_kernel(&psp->fence_buf_bo, @@ -1933,6 +1980,18 @@ static int psp_suspend(void *handle) } } + ret = psp_asd_unload(psp); + if (ret) { + DRM_ERROR("Failed to unload asd\n"); + return ret; + } + + ret = psp_tmr_terminate(psp); + if (ret) { + DRM_ERROR("Failed to terminate tmr\n"); + return ret; + } + ret = psp_ring_stop(psp, PSP_RING_TYPE__KM); if (ret) { DRM_ERROR("PSP ring stop failed\n"); @@ -2222,6 +2281,107 @@ out: return err; } +int parse_ta_bin_descriptor(struct psp_context *psp, + const struct ta_fw_bin_desc *desc, + const struct ta_firmware_header_v2_0 *ta_hdr) +{ + uint8_t *ucode_start_addr = NULL; + + if (!psp || !desc || !ta_hdr) + return -EINVAL; + + ucode_start_addr = (uint8_t *)ta_hdr + + le32_to_cpu(desc->offset_bytes) + + le32_to_cpu(ta_hdr->header.ucode_array_offset_bytes); + + switch (desc->fw_type) { + case TA_FW_TYPE_PSP_ASD: + psp->asd_fw_version = le32_to_cpu(desc->fw_version); + psp->asd_feature_version = le32_to_cpu(desc->fw_version); + psp->asd_ucode_size = le32_to_cpu(desc->size_bytes); + psp->asd_start_addr = ucode_start_addr; + break; + case TA_FW_TYPE_PSP_XGMI: + psp->ta_xgmi_ucode_version = le32_to_cpu(desc->fw_version); + psp->ta_xgmi_ucode_size = le32_to_cpu(desc->size_bytes); + psp->ta_xgmi_start_addr = ucode_start_addr; + break; + case TA_FW_TYPE_PSP_RAS: + psp->ta_ras_ucode_version = le32_to_cpu(desc->fw_version); + psp->ta_ras_ucode_size = le32_to_cpu(desc->size_bytes); + psp->ta_ras_start_addr = ucode_start_addr; + break; + case TA_FW_TYPE_PSP_HDCP: + psp->ta_hdcp_ucode_version = le32_to_cpu(desc->fw_version); + psp->ta_hdcp_ucode_size = le32_to_cpu(desc->size_bytes); + psp->ta_hdcp_start_addr = ucode_start_addr; + break; + case TA_FW_TYPE_PSP_DTM: + psp->ta_dtm_ucode_version = le32_to_cpu(desc->fw_version); + psp->ta_dtm_ucode_size = le32_to_cpu(desc->size_bytes); + psp->ta_dtm_start_addr = ucode_start_addr; + break; + default: + dev_warn(psp->adev->dev, "Unsupported TA type: %d\n", desc->fw_type); + break; + } + + return 0; +} + +int psp_init_ta_microcode(struct psp_context *psp, + const char *chip_name) +{ + struct amdgpu_device *adev = psp->adev; + char fw_name[30]; + const struct ta_firmware_header_v2_0 *ta_hdr; + int err = 0; + int ta_index = 0; + + if (!chip_name) { + dev_err(adev->dev, "invalid chip name for ta microcode\n"); + return -EINVAL; + } + + snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_ta.bin", chip_name); + err = request_firmware(&adev->psp.ta_fw, fw_name, adev->dev); + if (err) + goto out; + + err = amdgpu_ucode_validate(adev->psp.ta_fw); + if (err) + goto out; + + ta_hdr = (const struct ta_firmware_header_v2_0 *)adev->psp.ta_fw->data; + + if (le16_to_cpu(ta_hdr->header.header_version_major) != 2) { + dev_err(adev->dev, "unsupported TA header version\n"); + err = -EINVAL; + goto out; + } + + if (le32_to_cpu(ta_hdr->ta_fw_bin_count) >= UCODE_MAX_TA_PACKAGING) { + dev_err(adev->dev, "packed TA count exceeds maximum limit\n"); + err = -EINVAL; + goto out; + } + + for (ta_index = 0; ta_index < le32_to_cpu(ta_hdr->ta_fw_bin_count); ta_index++) { + err = parse_ta_bin_descriptor(psp, + &ta_hdr->ta_fw_bin[ta_index], + ta_hdr); + if (err) + goto out; + } + + return 0; +out: + dev_err(adev->dev, "fail to initialize ta microcode\n"); + release_firmware(adev->psp.ta_fw); + adev->psp.ta_fw = NULL; + return err; +} + static int psp_set_clockgating_state(void *handle, enum amd_clockgating_state state) { diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.h index 1513887e7a68..623888bf30cb 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.h @@ -371,4 +371,6 @@ int psp_init_asd_microcode(struct psp_context *psp, const char *chip_name); int psp_init_sos_microcode(struct psp_context *psp, const char *chip_name); +int psp_init_ta_microcode(struct psp_context *psp, + const char *chip_name); #endif diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c index ab8e7c91c645..e10f02ed3f65 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c @@ -2131,3 +2131,14 @@ void amdgpu_ras_global_ras_isr(struct amdgpu_device *adev) amdgpu_ras_reset_gpu(adev); } } + +bool amdgpu_ras_need_emergency_restart(struct amdgpu_device *adev) +{ + if (adev->asic_type == CHIP_VEGA20 && + adev->pm.fw_version <= 0x283400) { + return !(amdgpu_asic_reset_method(adev) == AMD_RESET_METHOD_BACO) && + amdgpu_ras_intr_triggered(); + } + + return false; +} diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.h index e7df5d8429f8..b2667342cf67 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.h @@ -633,4 +633,5 @@ void amdgpu_ras_global_ras_isr(struct amdgpu_device *adev); void amdgpu_ras_set_error_query_ready(struct amdgpu_device *adev, bool ready); +bool amdgpu_ras_need_emergency_restart(struct amdgpu_device *adev); #endif diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_trace.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_trace.h index 5da20fc166d9..63e734a125fb 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_trace.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_trace.h @@ -35,7 +35,7 @@ #define AMDGPU_JOB_GET_TIMELINE_NAME(job) \ job->base.s_fence->finished.ops->get_timeline_name(&job->base.s_fence->finished) -TRACE_EVENT(amdgpu_device_rreg, +TRACE_EVENT(amdgpu_mm_rreg, TP_PROTO(unsigned did, uint32_t reg, uint32_t value), TP_ARGS(did, reg, value), TP_STRUCT__entry( @@ -54,7 +54,7 @@ TRACE_EVENT(amdgpu_device_rreg, (unsigned long)__entry->value) ); -TRACE_EVENT(amdgpu_device_wreg, +TRACE_EVENT(amdgpu_mm_wreg, TP_PROTO(unsigned did, uint32_t reg, uint32_t value), TP_ARGS(did, reg, value), TP_STRUCT__entry( diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c index 62fa1e691800..fcff5671f6f8 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c @@ -1869,7 +1869,7 @@ static int amdgpu_ttm_reserve_tmr(struct amdgpu_device *adev) adev->discovery_tmr_size = amdgpu_atomfirmware_get_fw_reserved_fb_size(adev); if (!adev->discovery_tmr_size) - adev->discovery_tmr_size = DISCOVERY_TMR_SIZE; + adev->discovery_tmr_size = DISCOVERY_TMR_OFFSET; if (mem_train_support) { /* reserve vram for mem train according to TMR location */ diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ucode.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ucode.c index 744404a05fee..183743c5fb7b 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ucode.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ucode.c @@ -390,11 +390,11 @@ amdgpu_ucode_get_load_type(struct amdgpu_device *adev, int load_type) case CHIP_NAVI14: case CHIP_NAVI12: case CHIP_SIENNA_CICHLID: + case CHIP_NAVY_FLOUNDER: if (!load_type) return AMDGPU_FW_LOAD_DIRECT; else return AMDGPU_FW_LOAD_PSP; - default: DRM_ERROR("Unknown firmware load type\n"); } diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ucode.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_ucode.h index df402c7b3233..12a8bc8fca0b 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ucode.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ucode.h @@ -124,6 +124,29 @@ struct ta_firmware_header_v1_0 { uint32_t ta_dtm_size_bytes; }; +enum ta_fw_type { + TA_FW_TYPE_UNKOWN, + TA_FW_TYPE_PSP_ASD, + TA_FW_TYPE_PSP_XGMI, + TA_FW_TYPE_PSP_RAS, + TA_FW_TYPE_PSP_HDCP, + TA_FW_TYPE_PSP_DTM, +}; + +struct ta_fw_bin_desc { + uint32_t fw_type; + uint32_t fw_version; + uint32_t offset_bytes; + uint32_t size_bytes; +}; + +/* version_major=2, version_minor=0 */ +struct ta_firmware_header_v2_0 { + struct common_firmware_header header; + uint32_t ta_fw_bin_count; + struct ta_fw_bin_desc ta_fw_bin[]; +}; + /* version_major=1, version_minor=0 */ struct gfx_firmware_header_v1_0 { struct common_firmware_header header; @@ -276,6 +299,7 @@ union amdgpu_firmware_header { struct psp_firmware_header_v1_1 psp_v1_1; struct psp_firmware_header_v1_3 psp_v1_3; struct ta_firmware_header_v1_0 ta; + struct ta_firmware_header_v2_0 ta_v2_0; struct gfx_firmware_header_v1_0 gfx; struct rlc_firmware_header_v1_0 rlc; struct rlc_firmware_header_v2_0 rlc_v2_0; @@ -288,6 +312,8 @@ union amdgpu_firmware_header { uint8_t raw[0x100]; }; +#define UCODE_MAX_TA_PACKAGING ((sizeof(union amdgpu_firmware_header) - sizeof(struct common_firmware_header) - 4) / sizeof(struct ta_fw_bin_desc)) + /* * fw loading support */ diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.c index 15ff30c53e24..a777d585db84 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.c @@ -43,6 +43,7 @@ #define FIRMWARE_NAVI14 "amdgpu/navi14_vcn.bin" #define FIRMWARE_NAVI12 "amdgpu/navi12_vcn.bin" #define FIRMWARE_SIENNA_CICHLID "amdgpu/sienna_cichlid_vcn.bin" +#define FIRMWARE_NAVY_FLOUNDER "amdgpu/navy_flounder_vcn.bin" MODULE_FIRMWARE(FIRMWARE_RAVEN); MODULE_FIRMWARE(FIRMWARE_PICASSO); @@ -53,6 +54,7 @@ MODULE_FIRMWARE(FIRMWARE_NAVI10); MODULE_FIRMWARE(FIRMWARE_NAVI14); MODULE_FIRMWARE(FIRMWARE_NAVI12); MODULE_FIRMWARE(FIRMWARE_SIENNA_CICHLID); +MODULE_FIRMWARE(FIRMWARE_NAVY_FLOUNDER); static void amdgpu_vcn_idle_work_handler(struct work_struct *work); @@ -115,6 +117,12 @@ int amdgpu_vcn_sw_init(struct amdgpu_device *adev) (adev->pg_flags & AMD_PG_SUPPORT_VCN_DPG)) adev->vcn.indirect_sram = true; break; + case CHIP_NAVY_FLOUNDER: + fw_name = FIRMWARE_NAVY_FLOUNDER; + if ((adev->firmware.load_type == AMDGPU_FW_LOAD_PSP) && + (adev->pg_flags & AMD_PG_SUPPORT_VCN_DPG)) + adev->vcn.indirect_sram = true; + break; default: return -EINVAL; } @@ -421,6 +429,10 @@ int amdgpu_vcn_dec_ring_test_ring(struct amdgpu_ring *ring) unsigned i; int r; + /* VCN in SRIOV does not support direct register read/write */ + if (amdgpu_sriov_vf(adev)) + return 0; + WREG32(adev->vcn.inst[ring->me].external.scratch9, 0xCAFEDEAD); r = amdgpu_ring_alloc(ring, 3); if (r) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c index da233a9e429d..1203c20491e6 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c @@ -27,6 +27,9 @@ #include "amdgpu.h" #include "amdgpu_ras.h" +#include "vi.h" +#include "soc15.h" +#include "nv.h" bool amdgpu_virt_mmio_blocked(struct amdgpu_device *adev) { @@ -513,6 +516,31 @@ void amdgpu_detect_virtualization(struct amdgpu_device *adev) if (is_virtual_machine()) /* passthrough mode exclus sriov mod */ adev->virt.caps |= AMDGPU_PASSTHROUGH_MODE; } + + /* we have the ability to check now */ + if (amdgpu_sriov_vf(adev)) { + switch (adev->asic_type) { + case CHIP_TONGA: + case CHIP_FIJI: + vi_set_virt_ops(adev); + break; + case CHIP_VEGA10: + case CHIP_VEGA20: + case CHIP_ARCTURUS: + soc15_set_virt_ops(adev); + break; + case CHIP_NAVI10: + case CHIP_NAVI12: + case CHIP_SIENNA_CICHLID: + nv_set_virt_ops(adev); + /* try send GPU_INIT_DATA request to host */ + amdgpu_virt_request_init_data(adev); + break; + default: /* other chip doesn't support SRIOV */ + DRM_ERROR("Unknown asic type: %d!\n", adev->asic_type); + break; + } + } } static bool amdgpu_virt_access_debugfs_is_mmio(struct amdgpu_device *adev) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h index c8e68d7890bf..770025a5e500 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h @@ -324,6 +324,7 @@ struct amdgpu_vm { struct amdgpu_vm_manager { /* Handling of VMIDs */ struct amdgpu_vmid_mgr id_mgr[AMDGPU_MAX_VMHUBS]; + unsigned int first_kfd_vmid; /* Handling of VM fences */ u64 fence_context; diff --git a/drivers/gpu/drm/amd/amdgpu/athub_v2_1.c b/drivers/gpu/drm/amd/amdgpu/athub_v2_1.c index 0219bd6ce1b2..939eca63b094 100644 --- a/drivers/gpu/drm/amd/amdgpu/athub_v2_1.c +++ b/drivers/gpu/drm/amd/amdgpu/athub_v2_1.c @@ -73,6 +73,7 @@ int athub_v2_1_set_clockgating(struct amdgpu_device *adev, switch (adev->asic_type) { case CHIP_SIENNA_CICHLID: + case CHIP_NAVY_FLOUNDER: athub_v2_1_update_medium_grain_clock_gating(adev, state == AMD_CG_STATE_GATE ? true : false); athub_v2_1_update_medium_grain_light_sleep(adev, diff --git a/drivers/gpu/drm/amd/amdgpu/cik.c b/drivers/gpu/drm/amd/amdgpu/cik.c index fe306d0f73f7..c2c67ab68a43 100644 --- a/drivers/gpu/drm/amd/amdgpu/cik.c +++ b/drivers/gpu/drm/amd/amdgpu/cik.c @@ -1326,6 +1326,14 @@ cik_asic_reset_method(struct amdgpu_device *adev) { bool baco_reset; + if (amdgpu_reset_method == AMD_RESET_METHOD_LEGACY || + amdgpu_reset_method == AMD_RESET_METHOD_BACO) + return amdgpu_reset_method; + + if (amdgpu_reset_method != -1) + dev_warn(adev->dev, "Specified reset:%d isn't supported, using AUTO instead.\n", + amdgpu_reset_method); + switch (adev->asic_type) { case CHIP_BONAIRE: case CHIP_HAWAII: diff --git a/drivers/gpu/drm/amd/amdgpu/cikd.h b/drivers/gpu/drm/amd/amdgpu/cikd.h index 5f3f6ebfb387..55982c0064b5 100644 --- a/drivers/gpu/drm/amd/amdgpu/cikd.h +++ b/drivers/gpu/drm/amd/amdgpu/cikd.h @@ -54,8 +54,6 @@ #define BONAIRE_GB_ADDR_CONFIG_GOLDEN 0x12010001 #define HAWAII_GB_ADDR_CONFIG_GOLDEN 0x12011003 -#define AMDGPU_NUM_OF_VMIDS 8 - #define PIPEID(x) ((x) << 0) #define MEID(x) ((x) << 2) #define VMID(x) ((x) << 4) diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c index 323285eb1457..61e89247faf3 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c @@ -145,6 +145,13 @@ MODULE_FIRMWARE("amdgpu/sienna_cichlid_mec.bin"); MODULE_FIRMWARE("amdgpu/sienna_cichlid_mec2.bin"); MODULE_FIRMWARE("amdgpu/sienna_cichlid_rlc.bin"); +MODULE_FIRMWARE("amdgpu/navy_flounder_ce.bin"); +MODULE_FIRMWARE("amdgpu/navy_flounder_pfp.bin"); +MODULE_FIRMWARE("amdgpu/navy_flounder_me.bin"); +MODULE_FIRMWARE("amdgpu/navy_flounder_mec.bin"); +MODULE_FIRMWARE("amdgpu/navy_flounder_mec2.bin"); +MODULE_FIRMWARE("amdgpu/navy_flounder_rlc.bin"); + static const struct soc15_reg_golden golden_settings_gc_10_1[] = { SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_HW_CONTROL_4, 0xffffffff, 0x00400014), @@ -3114,6 +3121,48 @@ static const struct soc15_reg_golden golden_settings_gc_10_3_sienna_cichlid[] = /* Pending on emulation bring up */ }; +static const struct soc15_reg_golden golden_settings_gc_10_3_2[] = +{ + SOC15_REG_GOLDEN_VALUE(GC, 0, mmCGTT_SPI_PS_CLK_CTRL, 0xff7f0fff, 0x78000100), + SOC15_REG_GOLDEN_VALUE(GC, 0, mmCGTT_SPI_RA0_CLK_CTRL, 0xff7f0fff, 0x30000100), + SOC15_REG_GOLDEN_VALUE(GC, 0, mmCGTT_SPI_RA1_CLK_CTRL, 0xff7f0fff, 0x7e000100), + SOC15_REG_GOLDEN_VALUE(GC, 0, mmCPF_GCR_CNTL, 0x0007ffff, 0x0000c000), + SOC15_REG_GOLDEN_VALUE(GC, 0, mmDB_DEBUG3, 0xffffffff, 0x00000200), + SOC15_REG_GOLDEN_VALUE(GC, 0, mmDB_DEBUG4, 0xffffffff, 0x00800000), + SOC15_REG_GOLDEN_VALUE(GC, 0, mmDB_EXCEPTION_CONTROL, 0x7fff0f1f, 0x00b80000), + SOC15_REG_GOLDEN_VALUE(GC, 0, mmGCR_GENERAL_CNTL_Sienna_Cichlid, 0x1ff1ffff, 0x00000500), + SOC15_REG_GOLDEN_VALUE(GC, 0, mmGE_PC_CNTL, 0x003fffff, 0x00280400), + SOC15_REG_GOLDEN_VALUE(GC, 0, mmGL2A_ADDR_MATCH_MASK, 0xffffffff, 0xffffffcf), + SOC15_REG_GOLDEN_VALUE(GC, 0, mmGL2C_ADDR_MATCH_MASK, 0xffffffff, 0xffffffcf), + SOC15_REG_GOLDEN_VALUE(GC, 0, mmGL2C_CM_CTRL1, 0xff8fff0f, 0x580f1008), + SOC15_REG_GOLDEN_VALUE(GC, 0, mmGL2C_CTRL3, 0xf7ffffff, 0x00f80988), + SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_CL_ENHANCE, 0xf17fffff, 0x01200007), + SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_BINNER_TIMEOUT_COUNTER, 0xffffffff, 0x00000800), + SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_ENHANCE_2, 0xffffffbf, 0x00000820), + SOC15_REG_GOLDEN_VALUE(GC, 0, mmSPI_CONFIG_CNTL_1, 0xffffffff, 0x00070104), + SOC15_REG_GOLDEN_VALUE(GC, 0, mmSPI_START_PHASE, 0x000000ff, 0x00000004), + SOC15_REG_GOLDEN_VALUE(GC, 0, mmSQ_CONFIG, 0xe07df47f, 0x00180070), + SOC15_REG_GOLDEN_VALUE(GC, 0, mmSQ_PERFCOUNTER0_SELECT, 0xf0f001ff, 0x00000000), + SOC15_REG_GOLDEN_VALUE(GC, 0, mmSQ_PERFCOUNTER1_SELECT, 0xf0f001ff, 0x00000000), + SOC15_REG_GOLDEN_VALUE(GC, 0, mmSQ_PERFCOUNTER10_SELECT, 0xf0f001ff, 0x00000000), + SOC15_REG_GOLDEN_VALUE(GC, 0, mmSQ_PERFCOUNTER11_SELECT, 0xf0f001ff, 0x00000000), + SOC15_REG_GOLDEN_VALUE(GC, 0, mmSQ_PERFCOUNTER12_SELECT, 0xf0f001ff, 0x00000000), + SOC15_REG_GOLDEN_VALUE(GC, 0, mmSQ_PERFCOUNTER13_SELECT, 0xf0f001ff, 0x00000000), + SOC15_REG_GOLDEN_VALUE(GC, 0, mmSQ_PERFCOUNTER14_SELECT, 0xf0f001ff, 0x00000000), + SOC15_REG_GOLDEN_VALUE(GC, 0, mmSQ_PERFCOUNTER15_SELECT, 0xf0f001ff, 0x00000000), + SOC15_REG_GOLDEN_VALUE(GC, 0, mmSQ_PERFCOUNTER2_SELECT, 0xf0f001ff, 0x00000000), + SOC15_REG_GOLDEN_VALUE(GC, 0, mmSQ_PERFCOUNTER3_SELECT, 0xf0f001ff, 0x00000000), + SOC15_REG_GOLDEN_VALUE(GC, 0, mmSQ_PERFCOUNTER4_SELECT, 0xf0f001ff, 0x00000000), + SOC15_REG_GOLDEN_VALUE(GC, 0, mmSQ_PERFCOUNTER5_SELECT, 0xf0f001ff, 0x00000000), + SOC15_REG_GOLDEN_VALUE(GC, 0, mmSQ_PERFCOUNTER6_SELECT, 0xf0f001ff, 0x00000000), + SOC15_REG_GOLDEN_VALUE(GC, 0, mmSQ_PERFCOUNTER7_SELECT, 0xf0f001ff, 0x00000000), + SOC15_REG_GOLDEN_VALUE(GC, 0, mmSQ_PERFCOUNTER8_SELECT, 0xf0f001ff, 0x00000000), + SOC15_REG_GOLDEN_VALUE(GC, 0, mmSQ_PERFCOUNTER9_SELECT, 0xf0f001ff, 0x00000000), + SOC15_REG_GOLDEN_VALUE(GC, 0, mmTA_CNTL_AUX, 0xffffffff, 0x010b0000), + SOC15_REG_GOLDEN_VALUE(GC, 0, mmUTCL1_CTRL, 0xffbfffff, 0x00a00000), + SOC15_REG_GOLDEN_VALUE(GC, 0, mmVGT_GS_MAX_WAVE_ID, 0x00000fff, 0x000003ff) +}; + #define DEFAULT_SH_MEM_CONFIG \ ((SH_MEM_ADDRESS_MODE_64 << SH_MEM_CONFIG__ADDRESS_MODE__SHIFT) | \ (SH_MEM_ALIGNMENT_MODE_UNALIGNED << SH_MEM_CONFIG__ALIGNMENT_MODE__SHIFT) | \ @@ -3302,6 +3351,12 @@ static void gfx_v10_0_init_golden_registers(struct amdgpu_device *adev) golden_settings_gc_10_3_sienna_cichlid, (const u32)ARRAY_SIZE(golden_settings_gc_10_3_sienna_cichlid)); break; + case CHIP_NAVY_FLOUNDER: + soc15_program_register_sequence(adev, + golden_settings_gc_10_3_2, + (const u32)ARRAY_SIZE(golden_settings_gc_10_3_2)); + break; + default: break; } @@ -3483,6 +3538,7 @@ static void gfx_v10_0_check_fw_write_wait(struct amdgpu_device *adev) adev->gfx.cp_fw_write_wait = true; break; case CHIP_SIENNA_CICHLID: + case CHIP_NAVY_FLOUNDER: adev->gfx.cp_fw_write_wait = true; break; default: @@ -3578,6 +3634,9 @@ static int gfx_v10_0_init_microcode(struct amdgpu_device *adev) case CHIP_SIENNA_CICHLID: chip_name = "sienna_cichlid"; break; + case CHIP_NAVY_FLOUNDER: + chip_name = "navy_flounder"; + break; default: BUG(); } @@ -4108,6 +4167,7 @@ static void gfx_v10_0_gpu_early_init(struct amdgpu_device *adev) gb_addr_config = RREG32_SOC15(GC, 0, mmGB_ADDR_CONFIG); break; case CHIP_SIENNA_CICHLID: + case CHIP_NAVY_FLOUNDER: adev->gfx.config.max_hw_contexts = 8; adev->gfx.config.sc_prim_fifo_size_frontend = 0x20; adev->gfx.config.sc_prim_fifo_size_backend = 0x100; @@ -4230,6 +4290,7 @@ static int gfx_v10_0_sw_init(void *handle) adev->gfx.mec.num_queue_per_pipe = 8; break; case CHIP_SIENNA_CICHLID: + case CHIP_NAVY_FLOUNDER: adev->gfx.me.num_me = 1; adev->gfx.me.num_pipe_per_me = 1; adev->gfx.me.num_queue_per_pipe = 1; @@ -4484,7 +4545,8 @@ static u32 gfx_v10_0_init_pa_sc_tile_steering_override(struct amdgpu_device *ade /* for ASICs that integrates GFX v10.3 * pa_sc_tile_steering_override should be set to 0 */ - if (adev->asic_type == CHIP_SIENNA_CICHLID) + if (adev->asic_type == CHIP_SIENNA_CICHLID || + adev->asic_type == CHIP_NAVY_FLOUNDER) return 0; /* init num_sc */ @@ -4512,8 +4574,6 @@ static u32 gfx_v10_0_init_pa_sc_tile_steering_override(struct amdgpu_device *ade } #define DEFAULT_SH_MEM_BASES (0x6000) -#define FIRST_COMPUTE_VMID (8) -#define LAST_COMPUTE_VMID (16) static void gfx_v10_0_init_compute_vmid(struct amdgpu_device *adev) { @@ -4529,7 +4589,7 @@ static void gfx_v10_0_init_compute_vmid(struct amdgpu_device *adev) sh_mem_bases = DEFAULT_SH_MEM_BASES | (DEFAULT_SH_MEM_BASES << 16); mutex_lock(&adev->srbm_mutex); - for (i = FIRST_COMPUTE_VMID; i < LAST_COMPUTE_VMID; i++) { + for (i = adev->vm_manager.first_kfd_vmid; i < AMDGPU_NUM_VMID; i++) { nv_grbm_select(adev, 0, 0, 0, i); /* CP and shaders */ WREG32_SOC15(GC, 0, mmSH_MEM_CONFIG, DEFAULT_SH_MEM_CONFIG); @@ -4540,7 +4600,7 @@ static void gfx_v10_0_init_compute_vmid(struct amdgpu_device *adev) /* Initialize all compute VMIDs to have no GDS, GWS, or OA acccess. These should be enabled by FW for target VMIDs. */ - for (i = FIRST_COMPUTE_VMID; i < LAST_COMPUTE_VMID; i++) { + for (i = adev->vm_manager.first_kfd_vmid; i < AMDGPU_NUM_VMID; i++) { WREG32_SOC15_OFFSET(GC, 0, mmGDS_VMID0_BASE, 2 * i, 0); WREG32_SOC15_OFFSET(GC, 0, mmGDS_VMID0_SIZE, 2 * i, 0); WREG32_SOC15_OFFSET(GC, 0, mmGDS_GWS_VMID0, i, 0); @@ -4712,12 +4772,19 @@ static int gfx_v10_0_init_csb(struct amdgpu_device *adev) adev->gfx.rlc.funcs->get_csb_buffer(adev, adev->gfx.rlc.cs_ptr); /* csib */ - WREG32_SOC15_RLC(GC, 0, mmRLC_CSIB_ADDR_HI, - adev->gfx.rlc.clear_state_gpu_addr >> 32); - WREG32_SOC15_RLC(GC, 0, mmRLC_CSIB_ADDR_LO, - adev->gfx.rlc.clear_state_gpu_addr & 0xfffffffc); - WREG32_SOC15_RLC(GC, 0, mmRLC_CSIB_LENGTH, adev->gfx.rlc.clear_state_size); - + if (adev->asic_type == CHIP_NAVI12) { + WREG32_SOC15_RLC(GC, 0, mmRLC_CSIB_ADDR_HI, + adev->gfx.rlc.clear_state_gpu_addr >> 32); + WREG32_SOC15_RLC(GC, 0, mmRLC_CSIB_ADDR_LO, + adev->gfx.rlc.clear_state_gpu_addr & 0xfffffffc); + WREG32_SOC15_RLC(GC, 0, mmRLC_CSIB_LENGTH, adev->gfx.rlc.clear_state_size); + } else { + WREG32_SOC15(GC, 0, mmRLC_CSIB_ADDR_HI, + adev->gfx.rlc.clear_state_gpu_addr >> 32); + WREG32_SOC15(GC, 0, mmRLC_CSIB_ADDR_LO, + adev->gfx.rlc.clear_state_gpu_addr & 0xfffffffc); + WREG32_SOC15(GC, 0, mmRLC_CSIB_LENGTH, adev->gfx.rlc.clear_state_size); + } return 0; } @@ -5325,7 +5392,12 @@ static int gfx_v10_0_cp_gfx_enable(struct amdgpu_device *adev, bool enable) tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, ME_HALT, enable ? 0 : 1); tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, PFP_HALT, enable ? 0 : 1); tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, CE_HALT, enable ? 0 : 1); - WREG32_SOC15_RLC(GC, 0, mmCP_ME_CNTL, tmp); + + if (adev->asic_type == CHIP_NAVI12) { + WREG32_SOC15_RLC(GC, 0, mmCP_ME_CNTL, tmp); + } else { + WREG32_SOC15(GC, 0, mmCP_ME_CNTL, tmp); + } for (i = 0; i < adev->usec_timeout; i++) { if (RREG32_SOC15(GC, 0, mmCP_STAT) == 0) @@ -5710,6 +5782,7 @@ static void gfx_v10_0_cp_gfx_set_doorbell(struct amdgpu_device *adev, WREG32_SOC15(GC, 0, mmCP_RB_DOORBELL_CONTROL, tmp); switch (adev->asic_type) { case CHIP_SIENNA_CICHLID: + case CHIP_NAVY_FLOUNDER: tmp = REG_SET_FIELD(0, CP_RB_DOORBELL_RANGE_LOWER, DOORBELL_RANGE_LOWER_Sienna_Cichlid, ring->doorbell_index); WREG32_SOC15(GC, 0, mmCP_RB_DOORBELL_RANGE_LOWER, tmp); @@ -5842,6 +5915,7 @@ static void gfx_v10_0_cp_compute_enable(struct amdgpu_device *adev, bool enable) if (enable) { switch (adev->asic_type) { case CHIP_SIENNA_CICHLID: + case CHIP_NAVY_FLOUNDER: WREG32_SOC15(GC, 0, mmCP_MEC_CNTL_Sienna_Cichlid, 0); break; default: @@ -5851,6 +5925,7 @@ static void gfx_v10_0_cp_compute_enable(struct amdgpu_device *adev, bool enable) } else { switch (adev->asic_type) { case CHIP_SIENNA_CICHLID: + case CHIP_NAVY_FLOUNDER: WREG32_SOC15(GC, 0, mmCP_MEC_CNTL_Sienna_Cichlid, (CP_MEC_CNTL__MEC_ME1_HALT_MASK | CP_MEC_CNTL__MEC_ME2_HALT_MASK)); @@ -5944,6 +6019,7 @@ static void gfx_v10_0_kiq_setting(struct amdgpu_ring *ring) /* tell RLC which is KIQ queue */ switch (adev->asic_type) { case CHIP_SIENNA_CICHLID: + case CHIP_NAVY_FLOUNDER: tmp = RREG32_SOC15(GC, 0, mmRLC_CP_SCHEDULERS_Sienna_Cichlid); tmp &= 0xffffff00; tmp |= (ring->me << 5) | (ring->pipe << 3) | (ring->queue); @@ -6647,6 +6723,7 @@ static bool gfx_v10_0_check_grbm_cam_remapping(struct amdgpu_device *adev) * has been remapped to mmVGT_ESGS_RING_SIZE */ switch (adev->asic_type) { case CHIP_SIENNA_CICHLID: + case CHIP_NAVY_FLOUNDER: data = RREG32_SOC15(GC, 0, mmVGT_ESGS_RING_SIZE_Sienna_Cichlid); WREG32_SOC15(GC, 0, mmVGT_ESGS_RING_SIZE_Sienna_Cichlid, 0); WREG32_SOC15(GC, 0, mmVGT_ESGS_RING_SIZE_UMD, pattern); @@ -6685,6 +6762,7 @@ static void gfx_v10_0_setup_grbm_cam_remapping(struct amdgpu_device *adev) switch (adev->asic_type) { case CHIP_SIENNA_CICHLID: + case CHIP_NAVY_FLOUNDER: /* mmVGT_TF_RING_SIZE_UMD -> mmVGT_TF_RING_SIZE */ data = (SOC15_REG_OFFSET(GC, 0, mmVGT_TF_RING_SIZE_UMD) << GRBM_CAM_DATA__CAM_ADDR__SHIFT) | @@ -6975,6 +7053,7 @@ static int gfx_v10_0_soft_reset(void *handle) tmp = RREG32_SOC15(GC, 0, mmGRBM_STATUS2); switch (adev->asic_type) { case CHIP_SIENNA_CICHLID: + case CHIP_NAVY_FLOUNDER: if (REG_GET_FIELD(tmp, GRBM_STATUS2, RLC_BUSY_Sienna_Cichlid)) grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, @@ -7073,6 +7152,7 @@ static int gfx_v10_0_early_init(void *handle) adev->gfx.num_gfx_rings = GFX10_NUM_GFX_RINGS_NV1X; break; case CHIP_SIENNA_CICHLID: + case CHIP_NAVY_FLOUNDER: adev->gfx.num_gfx_rings = GFX10_NUM_GFX_RINGS_Sienna_Cichlid; break; default: @@ -7125,6 +7205,7 @@ static void gfx_v10_0_set_safe_mode(struct amdgpu_device *adev) switch (adev->asic_type) { case CHIP_SIENNA_CICHLID: + case CHIP_NAVY_FLOUNDER: WREG32_SOC15(GC, 0, mmRLC_SAFE_MODE_Sienna_Cichlid, data); /* wait for RLC_SAFE_MODE */ @@ -7156,6 +7237,7 @@ static void gfx_v10_0_unset_safe_mode(struct amdgpu_device *adev) data = RLC_SAFE_MODE__CMD_MASK; switch (adev->asic_type) { case CHIP_SIENNA_CICHLID: + case CHIP_NAVY_FLOUNDER: WREG32_SOC15(GC, 0, mmRLC_SAFE_MODE_Sienna_Cichlid, data); break; default: @@ -7468,6 +7550,7 @@ static int gfx_v10_0_set_clockgating_state(void *handle, case CHIP_NAVI14: case CHIP_NAVI12: case CHIP_SIENNA_CICHLID: + case CHIP_NAVY_FLOUNDER: gfx_v10_0_update_gfx_clock_gating(adev, state == AMD_CG_STATE_GATE); break; @@ -7483,12 +7566,12 @@ static void gfx_v10_0_get_clockgating_state(void *handle, u32 *flags) int data; /* AMD_CG_SUPPORT_GFX_MGCG */ - data = RREG32_SOC15(GC, 0, mmRLC_CGTT_MGCG_OVERRIDE); + data = RREG32_KIQ(SOC15_REG_OFFSET(GC, 0, mmRLC_CGTT_MGCG_OVERRIDE)); if (!(data & RLC_CGTT_MGCG_OVERRIDE__GFXIP_MGCG_OVERRIDE_MASK)) *flags |= AMD_CG_SUPPORT_GFX_MGCG; /* AMD_CG_SUPPORT_GFX_CGCG */ - data = RREG32_SOC15(GC, 0, mmRLC_CGCG_CGLS_CTRL); + data = RREG32_KIQ(SOC15_REG_OFFSET(GC, 0, mmRLC_CGCG_CGLS_CTRL)); if (data & RLC_CGCG_CGLS_CTRL__CGCG_EN_MASK) *flags |= AMD_CG_SUPPORT_GFX_CGCG; @@ -7497,17 +7580,17 @@ static void gfx_v10_0_get_clockgating_state(void *handle, u32 *flags) *flags |= AMD_CG_SUPPORT_GFX_CGLS; /* AMD_CG_SUPPORT_GFX_RLC_LS */ - data = RREG32_SOC15(GC, 0, mmRLC_MEM_SLP_CNTL); + data = RREG32_KIQ(SOC15_REG_OFFSET(GC, 0, mmRLC_MEM_SLP_CNTL)); if (data & RLC_MEM_SLP_CNTL__RLC_MEM_LS_EN_MASK) *flags |= AMD_CG_SUPPORT_GFX_RLC_LS | AMD_CG_SUPPORT_GFX_MGLS; /* AMD_CG_SUPPORT_GFX_CP_LS */ - data = RREG32_SOC15(GC, 0, mmCP_MEM_SLP_CNTL); + data = RREG32_KIQ(SOC15_REG_OFFSET(GC, 0, mmCP_MEM_SLP_CNTL)); if (data & CP_MEM_SLP_CNTL__CP_MEM_LS_EN_MASK) *flags |= AMD_CG_SUPPORT_GFX_CP_LS | AMD_CG_SUPPORT_GFX_MGLS; /* AMD_CG_SUPPORT_GFX_3D_CGCG */ - data = RREG32_SOC15(GC, 0, mmRLC_CGCG_CGLS_CTRL_3D); + data = RREG32_KIQ(SOC15_REG_OFFSET(GC, 0, mmRLC_CGCG_CGLS_CTRL_3D)); if (data & RLC_CGCG_CGLS_CTRL_3D__CGCG_EN_MASK) *flags |= AMD_CG_SUPPORT_GFX_3D_CGCG; @@ -7685,14 +7768,9 @@ static void gfx_v10_0_ring_emit_ib_compute(struct amdgpu_ring *ring, static void gfx_v10_0_ring_emit_fence(struct amdgpu_ring *ring, u64 addr, u64 seq, unsigned flags) { - struct amdgpu_device *adev = ring->adev; bool write64bit = flags & AMDGPU_FENCE_FLAG_64BIT; bool int_sel = flags & AMDGPU_FENCE_FLAG_INT; - /* Interrupt not work fine on GFX10.1 model yet. Use fallback instead */ - if (adev->pdev->device == 0x50) - int_sel = false; - /* RELEASE_MEM - flush caches, send int */ amdgpu_ring_write(ring, PACKET3(PACKET3_RELEASE_MEM, 6)); amdgpu_ring_write(ring, (PACKET3_RELEASE_MEM_GCR_SEQ | @@ -7843,12 +7921,17 @@ static int gfx_v10_0_ring_preempt_ib(struct amdgpu_ring *ring) struct amdgpu_device *adev = ring->adev; struct amdgpu_kiq *kiq = &adev->gfx.kiq; struct amdgpu_ring *kiq_ring = &kiq->ring; + unsigned long flags; if (!kiq->pmf || !kiq->pmf->kiq_unmap_queues) return -EINVAL; - if (amdgpu_ring_alloc(kiq_ring, kiq->pmf->unmap_queues_size)) + spin_lock_irqsave(&kiq->ring_lock, flags); + + if (amdgpu_ring_alloc(kiq_ring, kiq->pmf->unmap_queues_size)) { + spin_unlock_irqrestore(&kiq->ring_lock, flags); return -ENOMEM; + } /* assert preemption condition */ amdgpu_ring_set_preempt_cond_exec(ring, false); @@ -7859,6 +7942,8 @@ static int gfx_v10_0_ring_preempt_ib(struct amdgpu_ring *ring) ++ring->trail_seq); amdgpu_ring_commit(kiq_ring); + spin_unlock_irqrestore(&kiq->ring_lock, flags); + /* poll the trailing fence */ for (i = 0; i < adev->usec_timeout; i++) { if (ring->trail_seq == @@ -8567,6 +8652,7 @@ static void gfx_v10_0_set_rlc_funcs(struct amdgpu_device *adev) case CHIP_NAVI10: case CHIP_NAVI14: case CHIP_SIENNA_CICHLID: + case CHIP_NAVY_FLOUNDER: adev->gfx.rlc.funcs = &gfx_v10_0_rlc_funcs; break; case CHIP_NAVI12: diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v7_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v7_0.c index 4aec76049a60..04eaf3a8fddb 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v7_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v7_0.c @@ -1850,8 +1850,6 @@ static void gfx_v7_0_setup_rb(struct amdgpu_device *adev) * */ #define DEFAULT_SH_MEM_BASES (0x6000) -#define FIRST_COMPUTE_VMID (8) -#define LAST_COMPUTE_VMID (16) static void gfx_v7_0_init_compute_vmid(struct amdgpu_device *adev) { int i; @@ -1869,7 +1867,7 @@ static void gfx_v7_0_init_compute_vmid(struct amdgpu_device *adev) SH_MEM_CONFIG__ALIGNMENT_MODE__SHIFT; sh_mem_config |= MTYPE_NONCACHED << SH_MEM_CONFIG__DEFAULT_MTYPE__SHIFT; mutex_lock(&adev->srbm_mutex); - for (i = FIRST_COMPUTE_VMID; i < LAST_COMPUTE_VMID; i++) { + for (i = adev->vm_manager.first_kfd_vmid; i < AMDGPU_NUM_VMID; i++) { cik_srbm_select(adev, 0, 0, 0, i); /* CP and shaders */ WREG32(mmSH_MEM_CONFIG, sh_mem_config); @@ -1882,7 +1880,7 @@ static void gfx_v7_0_init_compute_vmid(struct amdgpu_device *adev) /* Initialize all compute VMIDs to have no GDS, GWS, or OA acccess. These should be enabled by FW for target VMIDs. */ - for (i = FIRST_COMPUTE_VMID; i < LAST_COMPUTE_VMID; i++) { + for (i = adev->vm_manager.first_kfd_vmid; i < AMDGPU_NUM_VMID; i++) { WREG32(amdgpu_gds_reg_offset[i].mem_base, 0); WREG32(amdgpu_gds_reg_offset[i].mem_size, 0); WREG32(amdgpu_gds_reg_offset[i].gws, 0); diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c index efb759b62d21..33f1c4a46ebe 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c @@ -3686,8 +3686,6 @@ static void gfx_v8_0_setup_rb(struct amdgpu_device *adev) * */ #define DEFAULT_SH_MEM_BASES (0x6000) -#define FIRST_COMPUTE_VMID (8) -#define LAST_COMPUTE_VMID (16) static void gfx_v8_0_init_compute_vmid(struct amdgpu_device *adev) { int i; @@ -3710,7 +3708,7 @@ static void gfx_v8_0_init_compute_vmid(struct amdgpu_device *adev) SH_MEM_CONFIG__PRIVATE_ATC_MASK; mutex_lock(&adev->srbm_mutex); - for (i = FIRST_COMPUTE_VMID; i < LAST_COMPUTE_VMID; i++) { + for (i = adev->vm_manager.first_kfd_vmid; i < AMDGPU_NUM_VMID; i++) { vi_srbm_select(adev, 0, 0, 0, i); /* CP and shaders */ WREG32(mmSH_MEM_CONFIG, sh_mem_config); @@ -3723,7 +3721,7 @@ static void gfx_v8_0_init_compute_vmid(struct amdgpu_device *adev) /* Initialize all compute VMIDs to have no GDS, GWS, or OA acccess. These should be enabled by FW for target VMIDs. */ - for (i = FIRST_COMPUTE_VMID; i < LAST_COMPUTE_VMID; i++) { + for (i = adev->vm_manager.first_kfd_vmid; i < AMDGPU_NUM_VMID; i++) { WREG32(amdgpu_gds_reg_offset[i].mem_base, 0); WREG32(amdgpu_gds_reg_offset[i].mem_size, 0); WREG32(amdgpu_gds_reg_offset[i].gws, 0); diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c index 99ffc3e1fddc..cb9d60a4e05e 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c @@ -2463,8 +2463,6 @@ static void gfx_v9_0_setup_rb(struct amdgpu_device *adev) } #define DEFAULT_SH_MEM_BASES (0x6000) -#define FIRST_COMPUTE_VMID (8) -#define LAST_COMPUTE_VMID (16) static void gfx_v9_0_init_compute_vmid(struct amdgpu_device *adev) { int i; @@ -2484,7 +2482,7 @@ static void gfx_v9_0_init_compute_vmid(struct amdgpu_device *adev) SH_MEM_CONFIG__ALIGNMENT_MODE__SHIFT; mutex_lock(&adev->srbm_mutex); - for (i = FIRST_COMPUTE_VMID; i < LAST_COMPUTE_VMID; i++) { + for (i = adev->vm_manager.first_kfd_vmid; i < AMDGPU_NUM_VMID; i++) { soc15_grbm_select(adev, 0, 0, 0, i); /* CP and shaders */ WREG32_SOC15_RLC(GC, 0, mmSH_MEM_CONFIG, sh_mem_config); @@ -2495,7 +2493,7 @@ static void gfx_v9_0_init_compute_vmid(struct amdgpu_device *adev) /* Initialize all compute VMIDs to have no GDS, GWS, or OA acccess. These should be enabled by FW for target VMIDs. */ - for (i = FIRST_COMPUTE_VMID; i < LAST_COMPUTE_VMID; i++) { + for (i = adev->vm_manager.first_kfd_vmid; i < AMDGPU_NUM_VMID; i++) { WREG32_SOC15_OFFSET(GC, 0, mmGDS_VMID0_BASE, 2 * i, 0); WREG32_SOC15_OFFSET(GC, 0, mmGDS_VMID0_SIZE, 2 * i, 0); WREG32_SOC15_OFFSET(GC, 0, mmGDS_GWS_VMID0, i, 0); diff --git a/drivers/gpu/drm/amd/amdgpu/gfxhub_v1_0.c b/drivers/gpu/drm/amd/amdgpu/gfxhub_v1_0.c index 6682b843bafe..529e46386a50 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfxhub_v1_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gfxhub_v1_0.c @@ -38,15 +38,15 @@ u64 gfxhub_v1_0_get_mc_fb_offset(struct amdgpu_device *adev) void gfxhub_v1_0_setup_vm_pt_regs(struct amdgpu_device *adev, uint32_t vmid, uint64_t page_table_base) { - /* two registers distance between mmVM_CONTEXT0_* to mmVM_CONTEXT1_* */ - int offset = mmVM_CONTEXT1_PAGE_TABLE_BASE_ADDR_LO32 - - mmVM_CONTEXT0_PAGE_TABLE_BASE_ADDR_LO32; + struct amdgpu_vmhub *hub = &adev->vmhub[AMDGPU_GFXHUB_0]; WREG32_SOC15_OFFSET(GC, 0, mmVM_CONTEXT0_PAGE_TABLE_BASE_ADDR_LO32, - offset * vmid, lower_32_bits(page_table_base)); + hub->ctx_addr_distance * vmid, + lower_32_bits(page_table_base)); WREG32_SOC15_OFFSET(GC, 0, mmVM_CONTEXT0_PAGE_TABLE_BASE_ADDR_HI32, - offset * vmid, upper_32_bits(page_table_base)); + hub->ctx_addr_distance * vmid, + upper_32_bits(page_table_base)); } static void gfxhub_v1_0_init_gart_aperture_regs(struct amdgpu_device *adev) @@ -207,6 +207,7 @@ static void gfxhub_v1_0_disable_identity_aperture(struct amdgpu_device *adev) static void gfxhub_v1_0_setup_vmid_config(struct amdgpu_device *adev) { + struct amdgpu_vmhub *hub = &adev->vmhub[AMDGPU_GFXHUB_0]; unsigned num_level, block_size; uint32_t tmp; int i; @@ -245,25 +246,31 @@ static void gfxhub_v1_0_setup_vmid_config(struct amdgpu_device *adev) tmp = REG_SET_FIELD(tmp, VM_CONTEXT1_CNTL, RETRY_PERMISSION_OR_INVALID_PAGE_FAULT, !amdgpu_noretry); - WREG32_SOC15_OFFSET(GC, 0, mmVM_CONTEXT1_CNTL, i, tmp); - WREG32_SOC15_OFFSET(GC, 0, mmVM_CONTEXT1_PAGE_TABLE_START_ADDR_LO32, i*2, 0); - WREG32_SOC15_OFFSET(GC, 0, mmVM_CONTEXT1_PAGE_TABLE_START_ADDR_HI32, i*2, 0); - WREG32_SOC15_OFFSET(GC, 0, mmVM_CONTEXT1_PAGE_TABLE_END_ADDR_LO32, i*2, - lower_32_bits(adev->vm_manager.max_pfn - 1)); - WREG32_SOC15_OFFSET(GC, 0, mmVM_CONTEXT1_PAGE_TABLE_END_ADDR_HI32, i*2, - upper_32_bits(adev->vm_manager.max_pfn - 1)); + WREG32_SOC15_OFFSET(GC, 0, mmVM_CONTEXT1_CNTL, + i * hub->ctx_distance, tmp); + WREG32_SOC15_OFFSET(GC, 0, mmVM_CONTEXT1_PAGE_TABLE_START_ADDR_LO32, + i * hub->ctx_addr_distance, 0); + WREG32_SOC15_OFFSET(GC, 0, mmVM_CONTEXT1_PAGE_TABLE_START_ADDR_HI32, + i * hub->ctx_addr_distance, 0); + WREG32_SOC15_OFFSET(GC, 0, mmVM_CONTEXT1_PAGE_TABLE_END_ADDR_LO32, + i * hub->ctx_addr_distance, + lower_32_bits(adev->vm_manager.max_pfn - 1)); + WREG32_SOC15_OFFSET(GC, 0, mmVM_CONTEXT1_PAGE_TABLE_END_ADDR_HI32, + i * hub->ctx_addr_distance, + upper_32_bits(adev->vm_manager.max_pfn - 1)); } } static void gfxhub_v1_0_program_invalidation(struct amdgpu_device *adev) { + struct amdgpu_vmhub *hub = &adev->vmhub[AMDGPU_GFXHUB_0]; unsigned i; for (i = 0 ; i < 18; ++i) { WREG32_SOC15_OFFSET(GC, 0, mmVM_INVALIDATE_ENG0_ADDR_RANGE_LO32, - 2 * i, 0xffffffff); + i * hub->eng_addr_distance, 0xffffffff); WREG32_SOC15_OFFSET(GC, 0, mmVM_INVALIDATE_ENG0_ADDR_RANGE_HI32, - 2 * i, 0x1f); + i * hub->eng_addr_distance, 0x1f); } } @@ -299,12 +306,14 @@ int gfxhub_v1_0_gart_enable(struct amdgpu_device *adev) void gfxhub_v1_0_gart_disable(struct amdgpu_device *adev) { + struct amdgpu_vmhub *hub = &adev->vmhub[AMDGPU_GFXHUB_0]; u32 tmp; u32 i; /* Disable all tables */ for (i = 0; i < 16; i++) - WREG32_SOC15_OFFSET(GC, 0, mmVM_CONTEXT0_CNTL, i, 0); + WREG32_SOC15_OFFSET(GC, 0, mmVM_CONTEXT0_CNTL, + i * hub->ctx_distance, 0); /* Setup TLB control */ tmp = RREG32_SOC15(GC, 0, mmMC_VM_MX_L1_TLB_CNTL); @@ -360,7 +369,7 @@ void gfxhub_v1_0_set_fault_enable_default(struct amdgpu_device *adev, CRASH_ON_NO_RETRY_FAULT, 1); tmp = REG_SET_FIELD(tmp, VM_L2_PROTECTION_FAULT_CNTL, CRASH_ON_RETRY_FAULT, 1); - } + } WREG32_SOC15(GC, 0, mmVM_L2_PROTECTION_FAULT_CNTL, tmp); } @@ -386,4 +395,11 @@ void gfxhub_v1_0_init(struct amdgpu_device *adev) SOC15_REG_OFFSET(GC, 0, mmVM_L2_PROTECTION_FAULT_STATUS); hub->vm_l2_pro_fault_cntl = SOC15_REG_OFFSET(GC, 0, mmVM_L2_PROTECTION_FAULT_CNTL); + + hub->ctx_distance = mmVM_CONTEXT1_CNTL - mmVM_CONTEXT0_CNTL; + hub->ctx_addr_distance = mmVM_CONTEXT1_PAGE_TABLE_BASE_ADDR_LO32 - + mmVM_CONTEXT0_PAGE_TABLE_BASE_ADDR_LO32; + hub->eng_distance = mmVM_INVALIDATE_ENG1_REQ - mmVM_INVALIDATE_ENG0_REQ; + hub->eng_addr_distance = mmVM_INVALIDATE_ENG1_ADDR_RANGE_LO32 - + mmVM_INVALIDATE_ENG0_ADDR_RANGE_LO32; } diff --git a/drivers/gpu/drm/amd/amdgpu/gfxhub_v2_0.c b/drivers/gpu/drm/amd/amdgpu/gfxhub_v2_0.c index 6939edfc5232..394e6f56948a 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfxhub_v2_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gfxhub_v2_0.c @@ -49,15 +49,15 @@ u64 gfxhub_v2_0_get_mc_fb_offset(struct amdgpu_device *adev) void gfxhub_v2_0_setup_vm_pt_regs(struct amdgpu_device *adev, uint32_t vmid, uint64_t page_table_base) { - /* two registers distance between mmGCVM_CONTEXT0_* to mmGCVM_CONTEXT1_* */ - int offset = mmGCVM_CONTEXT1_PAGE_TABLE_BASE_ADDR_LO32 - - mmGCVM_CONTEXT0_PAGE_TABLE_BASE_ADDR_LO32; + struct amdgpu_vmhub *hub = &adev->vmhub[AMDGPU_GFXHUB_0]; WREG32_SOC15_OFFSET(GC, 0, mmGCVM_CONTEXT0_PAGE_TABLE_BASE_ADDR_LO32, - offset * vmid, lower_32_bits(page_table_base)); + hub->ctx_addr_distance * vmid, + lower_32_bits(page_table_base)); WREG32_SOC15_OFFSET(GC, 0, mmGCVM_CONTEXT0_PAGE_TABLE_BASE_ADDR_HI32, - offset * vmid, upper_32_bits(page_table_base)); + hub->ctx_addr_distance * vmid, + upper_32_bits(page_table_base)); } static void gfxhub_v2_0_init_gart_aperture_regs(struct amdgpu_device *adev) @@ -218,6 +218,7 @@ static void gfxhub_v2_0_disable_identity_aperture(struct amdgpu_device *adev) static void gfxhub_v2_0_setup_vmid_config(struct amdgpu_device *adev) { + struct amdgpu_vmhub *hub = &adev->vmhub[AMDGPU_GFXHUB_0]; int i; uint32_t tmp; @@ -247,25 +248,31 @@ static void gfxhub_v2_0_setup_vmid_config(struct amdgpu_device *adev) tmp = REG_SET_FIELD(tmp, GCVM_CONTEXT1_CNTL, RETRY_PERMISSION_OR_INVALID_PAGE_FAULT, !amdgpu_noretry); - WREG32_SOC15_OFFSET(GC, 0, mmGCVM_CONTEXT1_CNTL, i, tmp); - WREG32_SOC15_OFFSET(GC, 0, mmGCVM_CONTEXT1_PAGE_TABLE_START_ADDR_LO32, i*2, 0); - WREG32_SOC15_OFFSET(GC, 0, mmGCVM_CONTEXT1_PAGE_TABLE_START_ADDR_HI32, i*2, 0); - WREG32_SOC15_OFFSET(GC, 0, mmGCVM_CONTEXT1_PAGE_TABLE_END_ADDR_LO32, i*2, - lower_32_bits(adev->vm_manager.max_pfn - 1)); - WREG32_SOC15_OFFSET(GC, 0, mmGCVM_CONTEXT1_PAGE_TABLE_END_ADDR_HI32, i*2, - upper_32_bits(adev->vm_manager.max_pfn - 1)); + WREG32_SOC15_OFFSET(GC, 0, mmGCVM_CONTEXT1_CNTL, + i * hub->ctx_distance, tmp); + WREG32_SOC15_OFFSET(GC, 0, mmGCVM_CONTEXT1_PAGE_TABLE_START_ADDR_LO32, + i * hub->ctx_addr_distance, 0); + WREG32_SOC15_OFFSET(GC, 0, mmGCVM_CONTEXT1_PAGE_TABLE_START_ADDR_HI32, + i * hub->ctx_addr_distance, 0); + WREG32_SOC15_OFFSET(GC, 0, mmGCVM_CONTEXT1_PAGE_TABLE_END_ADDR_LO32, + i * hub->ctx_addr_distance, + lower_32_bits(adev->vm_manager.max_pfn - 1)); + WREG32_SOC15_OFFSET(GC, 0, mmGCVM_CONTEXT1_PAGE_TABLE_END_ADDR_HI32, + i * hub->ctx_addr_distance, + upper_32_bits(adev->vm_manager.max_pfn - 1)); } } static void gfxhub_v2_0_program_invalidation(struct amdgpu_device *adev) { + struct amdgpu_vmhub *hub = &adev->vmhub[AMDGPU_GFXHUB_0]; unsigned i; for (i = 0 ; i < 18; ++i) { WREG32_SOC15_OFFSET(GC, 0, mmGCVM_INVALIDATE_ENG0_ADDR_RANGE_LO32, - 2 * i, 0xffffffff); + i * hub->eng_addr_distance, 0xffffffff); WREG32_SOC15_OFFSET(GC, 0, mmGCVM_INVALIDATE_ENG0_ADDR_RANGE_HI32, - 2 * i, 0x1f); + i * hub->eng_addr_distance, 0x1f); } } @@ -287,12 +294,14 @@ int gfxhub_v2_0_gart_enable(struct amdgpu_device *adev) void gfxhub_v2_0_gart_disable(struct amdgpu_device *adev) { + struct amdgpu_vmhub *hub = &adev->vmhub[AMDGPU_GFXHUB_0]; u32 tmp; u32 i; /* Disable all tables */ for (i = 0; i < 16; i++) - WREG32_SOC15_OFFSET(GC, 0, mmGCVM_CONTEXT0_CNTL, i, 0); + WREG32_SOC15_OFFSET(GC, 0, mmGCVM_CONTEXT0_CNTL, + i * hub->ctx_distance, 0); /* Setup TLB control */ tmp = RREG32_SOC15(GC, 0, mmGCMC_VM_MX_L1_TLB_CNTL); @@ -373,4 +382,12 @@ void gfxhub_v2_0_init(struct amdgpu_device *adev) SOC15_REG_OFFSET(GC, 0, mmGCVM_L2_PROTECTION_FAULT_STATUS); hub->vm_l2_pro_fault_cntl = SOC15_REG_OFFSET(GC, 0, mmGCVM_L2_PROTECTION_FAULT_CNTL); + + hub->ctx_distance = mmGCVM_CONTEXT1_CNTL - mmGCVM_CONTEXT0_CNTL; + hub->ctx_addr_distance = mmGCVM_CONTEXT1_PAGE_TABLE_BASE_ADDR_LO32 - + mmGCVM_CONTEXT0_PAGE_TABLE_BASE_ADDR_LO32; + hub->eng_distance = mmGCVM_INVALIDATE_ENG1_REQ - + mmGCVM_INVALIDATE_ENG0_REQ; + hub->eng_addr_distance = mmGCVM_INVALIDATE_ENG1_ADDR_RANGE_LO32 - + mmGCVM_INVALIDATE_ENG0_ADDR_RANGE_LO32; } diff --git a/drivers/gpu/drm/amd/amdgpu/gfxhub_v2_1.c b/drivers/gpu/drm/amd/amdgpu/gfxhub_v2_1.c index fcc4c1912513..fa0bca3e1f73 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfxhub_v2_1.c +++ b/drivers/gpu/drm/amd/amdgpu/gfxhub_v2_1.c @@ -49,15 +49,15 @@ u64 gfxhub_v2_1_get_mc_fb_offset(struct amdgpu_device *adev) void gfxhub_v2_1_setup_vm_pt_regs(struct amdgpu_device *adev, uint32_t vmid, uint64_t page_table_base) { - /* two registers distance between mmGCVM_CONTEXT0_* to mmGCVM_CONTEXT1_* */ - int offset = mmGCVM_CONTEXT1_PAGE_TABLE_BASE_ADDR_LO32 - - mmGCVM_CONTEXT0_PAGE_TABLE_BASE_ADDR_LO32; + struct amdgpu_vmhub *hub = &adev->vmhub[AMDGPU_GFXHUB_0]; WREG32_SOC15_OFFSET(GC, 0, mmGCVM_CONTEXT0_PAGE_TABLE_BASE_ADDR_LO32, - offset * vmid, lower_32_bits(page_table_base)); + hub->ctx_addr_distance * vmid, + lower_32_bits(page_table_base)); WREG32_SOC15_OFFSET(GC, 0, mmGCVM_CONTEXT0_PAGE_TABLE_BASE_ADDR_HI32, - offset * vmid, upper_32_bits(page_table_base)); + hub->ctx_addr_distance * vmid, + upper_32_bits(page_table_base)); } static void gfxhub_v2_1_init_gart_aperture_regs(struct amdgpu_device *adev) @@ -207,6 +207,7 @@ static void gfxhub_v2_1_disable_identity_aperture(struct amdgpu_device *adev) static void gfxhub_v2_1_setup_vmid_config(struct amdgpu_device *adev) { + struct amdgpu_vmhub *hub = &adev->vmhub[AMDGPU_GFXHUB_0]; int i; uint32_t tmp; @@ -236,25 +237,31 @@ static void gfxhub_v2_1_setup_vmid_config(struct amdgpu_device *adev) tmp = REG_SET_FIELD(tmp, GCVM_CONTEXT1_CNTL, RETRY_PERMISSION_OR_INVALID_PAGE_FAULT, !amdgpu_noretry); - WREG32_SOC15_OFFSET(GC, 0, mmGCVM_CONTEXT1_CNTL, i, tmp); - WREG32_SOC15_OFFSET(GC, 0, mmGCVM_CONTEXT1_PAGE_TABLE_START_ADDR_LO32, i*2, 0); - WREG32_SOC15_OFFSET(GC, 0, mmGCVM_CONTEXT1_PAGE_TABLE_START_ADDR_HI32, i*2, 0); - WREG32_SOC15_OFFSET(GC, 0, mmGCVM_CONTEXT1_PAGE_TABLE_END_ADDR_LO32, i*2, - lower_32_bits(adev->vm_manager.max_pfn - 1)); - WREG32_SOC15_OFFSET(GC, 0, mmGCVM_CONTEXT1_PAGE_TABLE_END_ADDR_HI32, i*2, - upper_32_bits(adev->vm_manager.max_pfn - 1)); + WREG32_SOC15_OFFSET(GC, 0, mmGCVM_CONTEXT1_CNTL, + i * hub->ctx_distance, tmp); + WREG32_SOC15_OFFSET(GC, 0, mmGCVM_CONTEXT1_PAGE_TABLE_START_ADDR_LO32, + i * hub->ctx_addr_distance, 0); + WREG32_SOC15_OFFSET(GC, 0, mmGCVM_CONTEXT1_PAGE_TABLE_START_ADDR_HI32, + i * hub->ctx_addr_distance, 0); + WREG32_SOC15_OFFSET(GC, 0, mmGCVM_CONTEXT1_PAGE_TABLE_END_ADDR_LO32, + i * hub->ctx_addr_distance, + lower_32_bits(adev->vm_manager.max_pfn - 1)); + WREG32_SOC15_OFFSET(GC, 0, mmGCVM_CONTEXT1_PAGE_TABLE_END_ADDR_HI32, + i * hub->ctx_addr_distance, + upper_32_bits(adev->vm_manager.max_pfn - 1)); } } static void gfxhub_v2_1_program_invalidation(struct amdgpu_device *adev) { + struct amdgpu_vmhub *hub = &adev->vmhub[AMDGPU_GFXHUB_0]; unsigned i; for (i = 0 ; i < 18; ++i) { WREG32_SOC15_OFFSET(GC, 0, mmGCVM_INVALIDATE_ENG0_ADDR_RANGE_LO32, - 2 * i, 0xffffffff); + i * hub->eng_addr_distance, 0xffffffff); WREG32_SOC15_OFFSET(GC, 0, mmGCVM_INVALIDATE_ENG0_ADDR_RANGE_HI32, - 2 * i, 0x1f); + i * hub->eng_addr_distance, 0x1f); } } @@ -288,12 +295,14 @@ int gfxhub_v2_1_gart_enable(struct amdgpu_device *adev) void gfxhub_v2_1_gart_disable(struct amdgpu_device *adev) { + struct amdgpu_vmhub *hub = &adev->vmhub[AMDGPU_GFXHUB_0]; u32 tmp; u32 i; /* Disable all tables */ for (i = 0; i < 16; i++) - WREG32_SOC15_OFFSET(GC, 0, mmGCVM_CONTEXT0_CNTL, i, 0); + WREG32_SOC15_OFFSET(GC, 0, mmGCVM_CONTEXT0_CNTL, + i * hub->ctx_distance, 0); /* Setup TLB control */ tmp = RREG32_SOC15(GC, 0, mmGCMC_VM_MX_L1_TLB_CNTL); @@ -372,6 +381,14 @@ void gfxhub_v2_1_init(struct amdgpu_device *adev) SOC15_REG_OFFSET(GC, 0, mmGCVM_L2_PROTECTION_FAULT_STATUS); hub->vm_l2_pro_fault_cntl = SOC15_REG_OFFSET(GC, 0, mmGCVM_L2_PROTECTION_FAULT_CNTL); + + hub->ctx_distance = mmGCVM_CONTEXT1_CNTL - mmGCVM_CONTEXT0_CNTL; + hub->ctx_addr_distance = mmGCVM_CONTEXT1_PAGE_TABLE_BASE_ADDR_LO32 - + mmGCVM_CONTEXT0_PAGE_TABLE_BASE_ADDR_LO32; + hub->eng_distance = mmGCVM_INVALIDATE_ENG1_REQ - + mmGCVM_INVALIDATE_ENG0_REQ; + hub->eng_addr_distance = mmGCVM_INVALIDATE_ENG1_ADDR_RANGE_LO32 - + mmGCVM_INVALIDATE_ENG0_ADDR_RANGE_LO32; } int gfxhub_v2_1_get_xgmi_info(struct amdgpu_device *adev) diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c b/drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c index f7e66bf0f647..ec90c62078d9 100644 --- a/drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c @@ -49,8 +49,6 @@ #include "mmhub_v2_0.h" #include "athub_v2_0.h" #include "athub_v2_1.h" -/* XXX Move this macro to navi10 header file, which is like vid.h for VI.*/ -#define AMDGPU_NUM_OF_VMIDS 8 #if 0 static const struct soc15_reg_golden golden_settings_navi10_hdp[] = @@ -88,7 +86,7 @@ gmc_v10_0_vm_fault_interrupt_state(struct amdgpu_device *adev, /* MM HUB */ hub = &adev->vmhub[AMDGPU_MMHUB_0]; for (i = 0; i < 16; i++) { - reg = hub->vm_context0_cntl + i; + reg = hub->vm_context0_cntl + hub->ctx_distance * i; tmp = RREG32(reg); tmp &= ~bits[AMDGPU_MMHUB_0]; WREG32(reg, tmp); @@ -97,7 +95,7 @@ gmc_v10_0_vm_fault_interrupt_state(struct amdgpu_device *adev, /* GFX HUB */ hub = &adev->vmhub[AMDGPU_GFXHUB_0]; for (i = 0; i < 16; i++) { - reg = hub->vm_context0_cntl + i; + reg = hub->vm_context0_cntl + hub->ctx_distance * i; tmp = RREG32(reg); tmp &= ~bits[AMDGPU_GFXHUB_0]; WREG32(reg, tmp); @@ -107,7 +105,7 @@ gmc_v10_0_vm_fault_interrupt_state(struct amdgpu_device *adev, /* MM HUB */ hub = &adev->vmhub[AMDGPU_MMHUB_0]; for (i = 0; i < 16; i++) { - reg = hub->vm_context0_cntl + i; + reg = hub->vm_context0_cntl + hub->ctx_distance * i; tmp = RREG32(reg); tmp |= bits[AMDGPU_MMHUB_0]; WREG32(reg, tmp); @@ -116,7 +114,7 @@ gmc_v10_0_vm_fault_interrupt_state(struct amdgpu_device *adev, /* GFX HUB */ hub = &adev->vmhub[AMDGPU_GFXHUB_0]; for (i = 0; i < 16; i++) { - reg = hub->vm_context0_cntl + i; + reg = hub->vm_context0_cntl + hub->ctx_distance * i; tmp = RREG32(reg); tmp |= bits[AMDGPU_GFXHUB_0]; WREG32(reg, tmp); @@ -285,7 +283,8 @@ static void gmc_v10_0_flush_vm_hub(struct amdgpu_device *adev, uint32_t vmid, if (use_semaphore) { for (i = 0; i < adev->usec_timeout; i++) { /* a read return value of 1 means semaphore acuqire */ - tmp = RREG32_NO_KIQ(hub->vm_inv_eng0_sem + eng); + tmp = RREG32_NO_KIQ(hub->vm_inv_eng0_sem + + hub->eng_distance * eng); if (tmp & 0x1) break; udelay(1); @@ -295,18 +294,19 @@ static void gmc_v10_0_flush_vm_hub(struct amdgpu_device *adev, uint32_t vmid, DRM_ERROR("Timeout waiting for sem acquire in VM flush!\n"); } - WREG32_NO_KIQ(hub->vm_inv_eng0_req + eng, inv_req); + WREG32_NO_KIQ(hub->vm_inv_eng0_req + hub->eng_distance * eng, inv_req); /* * Issue a dummy read to wait for the ACK register to be cleared * to avoid a false ACK due to the new fast GRBM interface. */ if (vmhub == AMDGPU_GFXHUB_0) - RREG32_NO_KIQ(hub->vm_inv_eng0_req + eng); + RREG32_NO_KIQ(hub->vm_inv_eng0_req + hub->eng_distance * eng); /* Wait for ACK with a delay.*/ for (i = 0; i < adev->usec_timeout; i++) { - tmp = RREG32_NO_KIQ(hub->vm_inv_eng0_ack + eng); + tmp = RREG32_NO_KIQ(hub->vm_inv_eng0_ack + + hub->eng_distance * eng); tmp &= 1 << vmid; if (tmp) break; @@ -320,7 +320,8 @@ static void gmc_v10_0_flush_vm_hub(struct amdgpu_device *adev, uint32_t vmid, * add semaphore release after invalidation, * write with 0 means semaphore release */ - WREG32_NO_KIQ(hub->vm_inv_eng0_sem + eng, 0); + WREG32_NO_KIQ(hub->vm_inv_eng0_sem + + hub->eng_distance * eng, 0); spin_unlock(&adev->gmc.invalidate_lock); @@ -360,8 +361,8 @@ static void gmc_v10_0_flush_gpu_tlb(struct amdgpu_device *adev, uint32_t vmid, struct amdgpu_vmhub *hub = &adev->vmhub[vmhub]; const unsigned eng = 17; u32 inv_req = gmc_v10_0_get_invalidate_req(vmid, flush_type); - u32 req = hub->vm_inv_eng0_req + eng; - u32 ack = hub->vm_inv_eng0_ack + eng; + u32 req = hub->vm_inv_eng0_req + hub->eng_distance * eng; + u32 ack = hub->vm_inv_eng0_ack + hub->eng_distance * eng; amdgpu_virt_kiq_reg_write_reg_wait(adev, req, ack, inv_req, 1 << vmid); @@ -504,16 +505,21 @@ static uint64_t gmc_v10_0_emit_flush_gpu_tlb(struct amdgpu_ring *ring, if (use_semaphore) /* a read return value of 1 means semaphore acuqire */ amdgpu_ring_emit_reg_wait(ring, - hub->vm_inv_eng0_sem + eng, 0x1, 0x1); + hub->vm_inv_eng0_sem + + hub->eng_distance * eng, 0x1, 0x1); - amdgpu_ring_emit_wreg(ring, hub->ctx0_ptb_addr_lo32 + (2 * vmid), + amdgpu_ring_emit_wreg(ring, hub->ctx0_ptb_addr_lo32 + + (hub->ctx_addr_distance * vmid), lower_32_bits(pd_addr)); - amdgpu_ring_emit_wreg(ring, hub->ctx0_ptb_addr_hi32 + (2 * vmid), + amdgpu_ring_emit_wreg(ring, hub->ctx0_ptb_addr_hi32 + + (hub->ctx_addr_distance * vmid), upper_32_bits(pd_addr)); - amdgpu_ring_emit_reg_write_reg_wait(ring, hub->vm_inv_eng0_req + eng, - hub->vm_inv_eng0_ack + eng, + amdgpu_ring_emit_reg_write_reg_wait(ring, hub->vm_inv_eng0_req + + hub->eng_distance * eng, + hub->vm_inv_eng0_ack + + hub->eng_distance * eng, req, 1 << vmid); /* TODO: It needs to continue working on debugging with semaphore for GFXHUB as well. */ @@ -522,7 +528,8 @@ static uint64_t gmc_v10_0_emit_flush_gpu_tlb(struct amdgpu_ring *ring, * add semaphore release after invalidation, * write with 0 means semaphore release */ - amdgpu_ring_emit_wreg(ring, hub->vm_inv_eng0_sem + eng, 0); + amdgpu_ring_emit_wreg(ring, hub->vm_inv_eng0_sem + + hub->eng_distance * eng, 0); return pd_addr; } @@ -686,7 +693,8 @@ static void gmc_v10_0_vram_gtt_location(struct amdgpu_device *adev, { u64 base = 0; - if (adev->asic_type == CHIP_SIENNA_CICHLID) + if (adev->asic_type == CHIP_SIENNA_CICHLID || + adev->asic_type == CHIP_NAVY_FLOUNDER) base = gfxhub_v2_1_get_fb_location(adev); else base = gfxhub_v2_0_get_fb_location(adev); @@ -698,7 +706,8 @@ static void gmc_v10_0_vram_gtt_location(struct amdgpu_device *adev, amdgpu_gmc_gart_location(adev, mc); /* base offset of vram pages */ - if (adev->asic_type == CHIP_SIENNA_CICHLID) + if (adev->asic_type == CHIP_SIENNA_CICHLID || + adev->asic_type == CHIP_NAVY_FLOUNDER) adev->vm_manager.vram_base_offset = gfxhub_v2_1_get_mc_fb_offset(adev); else adev->vm_manager.vram_base_offset = gfxhub_v2_0_get_mc_fb_offset(adev); @@ -746,6 +755,7 @@ static int gmc_v10_0_mc_init(struct amdgpu_device *adev) case CHIP_NAVI14: case CHIP_NAVI12: case CHIP_SIENNA_CICHLID: + case CHIP_NAVY_FLOUNDER: default: adev->gmc.gart_size = 512ULL << 20; break; @@ -814,7 +824,8 @@ static int gmc_v10_0_sw_init(void *handle) int r, vram_width = 0, vram_type = 0, vram_vendor = 0; struct amdgpu_device *adev = (struct amdgpu_device *)handle; - if (adev->asic_type == CHIP_SIENNA_CICHLID) + if (adev->asic_type == CHIP_SIENNA_CICHLID || + adev->asic_type == CHIP_NAVY_FLOUNDER) gfxhub_v2_1_init(adev); else gfxhub_v2_0_init(adev); @@ -840,6 +851,7 @@ static int gmc_v10_0_sw_init(void *handle) case CHIP_NAVI14: case CHIP_NAVI12: case CHIP_SIENNA_CICHLID: + case CHIP_NAVY_FLOUNDER: adev->num_vmhubs = 2; /* * To fulfill 4-level page support, @@ -905,8 +917,7 @@ static int gmc_v10_0_sw_init(void *handle) * amdgpu graphics/compute will use VMIDs 1-7 * amdkfd will use VMIDs 8-15 */ - adev->vm_manager.id_mgr[AMDGPU_GFXHUB_0].num_ids = AMDGPU_NUM_OF_VMIDS; - adev->vm_manager.id_mgr[AMDGPU_MMHUB_0].num_ids = AMDGPU_NUM_OF_VMIDS; + adev->vm_manager.first_kfd_vmid = 8; amdgpu_vm_manager_init(adev); @@ -945,6 +956,7 @@ static void gmc_v10_0_init_golden_registers(struct amdgpu_device *adev) case CHIP_NAVI14: case CHIP_NAVI12: case CHIP_SIENNA_CICHLID: + case CHIP_NAVY_FLOUNDER: break; default: break; @@ -971,7 +983,8 @@ static int gmc_v10_0_gart_enable(struct amdgpu_device *adev) if (r) return r; - if (adev->asic_type == CHIP_SIENNA_CICHLID) + if (adev->asic_type == CHIP_SIENNA_CICHLID || + adev->asic_type == CHIP_NAVY_FLOUNDER) r = gfxhub_v2_1_gart_enable(adev); else r = gfxhub_v2_0_gart_enable(adev); @@ -995,7 +1008,8 @@ static int gmc_v10_0_gart_enable(struct amdgpu_device *adev) value = (amdgpu_vm_fault_stop == AMDGPU_VM_FAULT_STOP_ALWAYS) ? false : true; - if (adev->asic_type == CHIP_SIENNA_CICHLID) + if (adev->asic_type == CHIP_SIENNA_CICHLID || + adev->asic_type == CHIP_NAVY_FLOUNDER) gfxhub_v2_1_set_fault_enable_default(adev, value); else gfxhub_v2_0_set_fault_enable_default(adev, value); @@ -1036,7 +1050,8 @@ static int gmc_v10_0_hw_init(void *handle) */ static void gmc_v10_0_gart_disable(struct amdgpu_device *adev) { - if (adev->asic_type == CHIP_SIENNA_CICHLID) + if (adev->asic_type == CHIP_SIENNA_CICHLID || + adev->asic_type == CHIP_NAVY_FLOUNDER) gfxhub_v2_1_gart_disable(adev); else gfxhub_v2_0_gart_disable(adev); @@ -1110,7 +1125,8 @@ static int gmc_v10_0_set_clockgating_state(void *handle, if (r) return r; - if (adev->asic_type == CHIP_SIENNA_CICHLID) + if (adev->asic_type == CHIP_SIENNA_CICHLID || + adev->asic_type == CHIP_NAVY_FLOUNDER) return athub_v2_1_set_clockgating(adev, state); else return athub_v2_0_set_clockgating(adev, state); @@ -1122,7 +1138,8 @@ static void gmc_v10_0_get_clockgating_state(void *handle, u32 *flags) mmhub_v2_0_get_clockgating(adev, flags); - if (adev->asic_type == CHIP_SIENNA_CICHLID) + if (adev->asic_type == CHIP_SIENNA_CICHLID || + adev->asic_type == CHIP_NAVY_FLOUNDER) athub_v2_1_get_clockgating(adev, flags); else athub_v2_0_get_clockgating(adev, flags); diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v6_0.c b/drivers/gpu/drm/amd/amdgpu/gmc_v6_0.c index a75e472b4a81..538e7ee35cdf 100644 --- a/drivers/gpu/drm/amd/amdgpu/gmc_v6_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gmc_v6_0.c @@ -878,7 +878,7 @@ static int gmc_v6_0_sw_init(void *handle) * amdgpu graphics/compute will use VMIDs 1-7 * amdkfd will use VMIDs 8-15 */ - adev->vm_manager.id_mgr[0].num_ids = AMDGPU_NUM_OF_VMIDS; + adev->vm_manager.first_kfd_vmid = 8; amdgpu_vm_manager_init(adev); /* base offset of vram pages */ diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v7_0.c b/drivers/gpu/drm/amd/amdgpu/gmc_v7_0.c index bcd4baecfe11..e18296dc1386 100644 --- a/drivers/gpu/drm/amd/amdgpu/gmc_v7_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gmc_v7_0.c @@ -1052,7 +1052,7 @@ static int gmc_v7_0_sw_init(void *handle) * amdgpu graphics/compute will use VMIDs 1-7 * amdkfd will use VMIDs 8-15 */ - adev->vm_manager.id_mgr[0].num_ids = AMDGPU_NUM_OF_VMIDS; + adev->vm_manager.first_kfd_vmid = 8; amdgpu_vm_manager_init(adev); /* base offset of vram pages */ diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v8_0.c b/drivers/gpu/drm/amd/amdgpu/gmc_v8_0.c index 26976e50e2a2..a9e722b8a458 100644 --- a/drivers/gpu/drm/amd/amdgpu/gmc_v8_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gmc_v8_0.c @@ -1177,7 +1177,7 @@ static int gmc_v8_0_sw_init(void *handle) * amdgpu graphics/compute will use VMIDs 1-7 * amdkfd will use VMIDs 8-15 */ - adev->vm_manager.id_mgr[0].num_ids = AMDGPU_NUM_OF_VMIDS; + adev->vm_manager.first_kfd_vmid = 8; amdgpu_vm_manager_init(adev); /* base offset of vram pages */ diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c b/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c index 11e93a82131d..6e4f3ff4810f 100644 --- a/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c @@ -68,9 +68,6 @@ #define HUBP0_DCSURF_PRI_VIEWPORT_DIMENSION__PRI_VIEWPORT_WIDTH_MASK 0x00003FFFL #define HUBP0_DCSURF_PRI_VIEWPORT_DIMENSION__PRI_VIEWPORT_HEIGHT_MASK 0x3FFF0000L -/* XXX Move this macro to VEGA10 header file, which is like vid.h for VI.*/ -#define AMDGPU_NUM_OF_VMIDS 8 - static const u32 golden_settings_vega10_hdp[] = { 0xf64, 0x0fffffff, 0x00000000, @@ -505,11 +502,11 @@ static void gmc_v9_0_flush_gpu_tlb(struct amdgpu_device *adev, uint32_t vmid, if (adev->gfx.kiq.ring.sched.ready && (amdgpu_sriov_runtime(adev) || !amdgpu_sriov_vf(adev)) && !adev->in_gpu_reset) { - uint32_t req = hub->vm_inv_eng0_req + eng; - uint32_t ack = hub->vm_inv_eng0_ack + eng; + uint32_t req = hub->vm_inv_eng0_req + hub->eng_distance * eng; + uint32_t ack = hub->vm_inv_eng0_ack + hub->eng_distance * eng; amdgpu_virt_kiq_reg_write_reg_wait(adev, req, ack, inv_req, - 1 << vmid); + 1 << vmid); return; } @@ -526,7 +523,8 @@ static void gmc_v9_0_flush_gpu_tlb(struct amdgpu_device *adev, uint32_t vmid, if (use_semaphore) { for (j = 0; j < adev->usec_timeout; j++) { /* a read return value of 1 means semaphore acuqire */ - tmp = RREG32_NO_KIQ(hub->vm_inv_eng0_sem + eng); + tmp = RREG32_NO_KIQ(hub->vm_inv_eng0_sem + + hub->eng_distance * eng); if (tmp & 0x1) break; udelay(1); @@ -537,7 +535,8 @@ static void gmc_v9_0_flush_gpu_tlb(struct amdgpu_device *adev, uint32_t vmid, } do { - WREG32_NO_KIQ(hub->vm_inv_eng0_req + eng, inv_req); + WREG32_NO_KIQ(hub->vm_inv_eng0_req + + hub->eng_distance * eng, inv_req); /* * Issue a dummy read to wait for the ACK register to @@ -545,10 +544,12 @@ static void gmc_v9_0_flush_gpu_tlb(struct amdgpu_device *adev, uint32_t vmid, * GRBM interface. */ if (vmhub == AMDGPU_GFXHUB_0) - RREG32_NO_KIQ(hub->vm_inv_eng0_req + eng); + RREG32_NO_KIQ(hub->vm_inv_eng0_req + + hub->eng_distance * eng); for (j = 0; j < adev->usec_timeout; j++) { - tmp = RREG32_NO_KIQ(hub->vm_inv_eng0_ack + eng); + tmp = RREG32_NO_KIQ(hub->vm_inv_eng0_ack + + hub->eng_distance * eng); if (tmp & (1 << vmid)) break; udelay(1); @@ -564,7 +565,8 @@ static void gmc_v9_0_flush_gpu_tlb(struct amdgpu_device *adev, uint32_t vmid, * add semaphore release after invalidation, * write with 0 means semaphore release */ - WREG32_NO_KIQ(hub->vm_inv_eng0_sem + eng, 0); + WREG32_NO_KIQ(hub->vm_inv_eng0_sem + + hub->eng_distance * eng, 0); spin_unlock(&adev->gmc.invalidate_lock); @@ -679,16 +681,21 @@ static uint64_t gmc_v9_0_emit_flush_gpu_tlb(struct amdgpu_ring *ring, if (use_semaphore) /* a read return value of 1 means semaphore acuqire */ amdgpu_ring_emit_reg_wait(ring, - hub->vm_inv_eng0_sem + eng, 0x1, 0x1); + hub->vm_inv_eng0_sem + + hub->eng_distance * eng, 0x1, 0x1); - amdgpu_ring_emit_wreg(ring, hub->ctx0_ptb_addr_lo32 + (2 * vmid), + amdgpu_ring_emit_wreg(ring, hub->ctx0_ptb_addr_lo32 + + (hub->ctx_addr_distance * vmid), lower_32_bits(pd_addr)); - amdgpu_ring_emit_wreg(ring, hub->ctx0_ptb_addr_hi32 + (2 * vmid), + amdgpu_ring_emit_wreg(ring, hub->ctx0_ptb_addr_hi32 + + (hub->ctx_addr_distance * vmid), upper_32_bits(pd_addr)); - amdgpu_ring_emit_reg_write_reg_wait(ring, hub->vm_inv_eng0_req + eng, - hub->vm_inv_eng0_ack + eng, + amdgpu_ring_emit_reg_write_reg_wait(ring, hub->vm_inv_eng0_req + + hub->eng_distance * eng, + hub->vm_inv_eng0_ack + + hub->eng_distance * eng, req, 1 << vmid); /* TODO: It needs to continue working on debugging with semaphore for GFXHUB as well. */ @@ -697,7 +704,8 @@ static uint64_t gmc_v9_0_emit_flush_gpu_tlb(struct amdgpu_ring *ring, * add semaphore release after invalidation, * write with 0 means semaphore release */ - amdgpu_ring_emit_wreg(ring, hub->vm_inv_eng0_sem + eng, 0); + amdgpu_ring_emit_wreg(ring, hub->vm_inv_eng0_sem + + hub->eng_distance * eng, 0); return pd_addr; } @@ -1248,12 +1256,15 @@ static int gmc_v9_0_sw_init(void *handle) /* * number of VMs * VMID 0 is reserved for System - * amdgpu graphics/compute will use VMIDs 1-7 - * amdkfd will use VMIDs 8-15 + * amdgpu graphics/compute will use VMIDs 1..n-1 + * amdkfd will use VMIDs n..15 + * + * The first KFD VMID is 8 for GPUs with graphics, 3 for + * compute-only GPUs. On compute-only GPUs that leaves 2 VMIDs + * for video processing. */ - adev->vm_manager.id_mgr[AMDGPU_GFXHUB_0].num_ids = AMDGPU_NUM_OF_VMIDS; - adev->vm_manager.id_mgr[AMDGPU_MMHUB_0].num_ids = AMDGPU_NUM_OF_VMIDS; - adev->vm_manager.id_mgr[AMDGPU_MMHUB_1].num_ids = AMDGPU_NUM_OF_VMIDS; + adev->vm_manager.first_kfd_vmid = + adev->asic_type == CHIP_ARCTURUS ? 3 : 8; amdgpu_vm_manager_init(adev); diff --git a/drivers/gpu/drm/amd/amdgpu/jpeg_v1_0.c b/drivers/gpu/drm/amd/amdgpu/jpeg_v1_0.c index 4c6c7544b400..bc300283b6ab 100644 --- a/drivers/gpu/drm/amd/amdgpu/jpeg_v1_0.c +++ b/drivers/gpu/drm/amd/amdgpu/jpeg_v1_0.c @@ -377,7 +377,7 @@ static void jpeg_v1_0_decode_ring_emit_vm_flush(struct amdgpu_ring *ring, pd_addr = amdgpu_gmc_emit_flush_gpu_tlb(ring, vmid, pd_addr); /* wait for register write */ - data0 = hub->ctx0_ptb_addr_lo32 + vmid * 2; + data0 = hub->ctx0_ptb_addr_lo32 + vmid * hub->ctx_addr_distance; data1 = lower_32_bits(pd_addr); mask = 0xffffffff; jpeg_v1_0_decode_ring_emit_reg_wait(ring, data0, data1, mask); diff --git a/drivers/gpu/drm/amd/amdgpu/jpeg_v2_0.c b/drivers/gpu/drm/amd/amdgpu/jpeg_v2_0.c index c5f49129a300..94caf5204c8b 100644 --- a/drivers/gpu/drm/amd/amdgpu/jpeg_v2_0.c +++ b/drivers/gpu/drm/amd/amdgpu/jpeg_v2_0.c @@ -629,7 +629,7 @@ void jpeg_v2_0_dec_ring_emit_vm_flush(struct amdgpu_ring *ring, pd_addr = amdgpu_gmc_emit_flush_gpu_tlb(ring, vmid, pd_addr); /* wait for register write */ - data0 = hub->ctx0_ptb_addr_lo32 + vmid * 2; + data0 = hub->ctx0_ptb_addr_lo32 + vmid * hub->ctx_addr_distance; data1 = lower_32_bits(pd_addr); mask = 0xffffffff; jpeg_v2_0_dec_ring_emit_reg_wait(ring, data0, data1, mask); diff --git a/drivers/gpu/drm/amd/amdgpu/mmhub_v1_0.c b/drivers/gpu/drm/amd/amdgpu/mmhub_v1_0.c index 405767208a4d..dffcb93ecee5 100755 --- a/drivers/gpu/drm/amd/amdgpu/mmhub_v1_0.c +++ b/drivers/gpu/drm/amd/amdgpu/mmhub_v1_0.c @@ -54,15 +54,15 @@ u64 mmhub_v1_0_get_fb_location(struct amdgpu_device *adev) void mmhub_v1_0_setup_vm_pt_regs(struct amdgpu_device *adev, uint32_t vmid, uint64_t page_table_base) { - /* two registers distance between mmVM_CONTEXT0_* to mmVM_CONTEXT1_* */ - int offset = mmVM_CONTEXT1_PAGE_TABLE_BASE_ADDR_LO32 - - mmVM_CONTEXT0_PAGE_TABLE_BASE_ADDR_LO32; + struct amdgpu_vmhub *hub = &adev->vmhub[AMDGPU_MMHUB_0]; WREG32_SOC15_OFFSET(MMHUB, 0, mmVM_CONTEXT0_PAGE_TABLE_BASE_ADDR_LO32, - offset * vmid, lower_32_bits(page_table_base)); + hub->ctx_addr_distance * vmid, + lower_32_bits(page_table_base)); WREG32_SOC15_OFFSET(MMHUB, 0, mmVM_CONTEXT0_PAGE_TABLE_BASE_ADDR_HI32, - offset * vmid, upper_32_bits(page_table_base)); + hub->ctx_addr_distance * vmid, + upper_32_bits(page_table_base)); } static void mmhub_v1_0_init_gart_aperture_regs(struct amdgpu_device *adev) @@ -230,6 +230,7 @@ static void mmhub_v1_0_disable_identity_aperture(struct amdgpu_device *adev) static void mmhub_v1_0_setup_vmid_config(struct amdgpu_device *adev) { + struct amdgpu_vmhub *hub = &adev->vmhub[AMDGPU_MMHUB_0]; unsigned num_level, block_size; uint32_t tmp; int i; @@ -268,25 +269,31 @@ static void mmhub_v1_0_setup_vmid_config(struct amdgpu_device *adev) tmp = REG_SET_FIELD(tmp, VM_CONTEXT1_CNTL, RETRY_PERMISSION_OR_INVALID_PAGE_FAULT, !amdgpu_noretry); - WREG32_SOC15_OFFSET(MMHUB, 0, mmVM_CONTEXT1_CNTL, i, tmp); - WREG32_SOC15_OFFSET(MMHUB, 0, mmVM_CONTEXT1_PAGE_TABLE_START_ADDR_LO32, i*2, 0); - WREG32_SOC15_OFFSET(MMHUB, 0, mmVM_CONTEXT1_PAGE_TABLE_START_ADDR_HI32, i*2, 0); - WREG32_SOC15_OFFSET(MMHUB, 0, mmVM_CONTEXT1_PAGE_TABLE_END_ADDR_LO32, i*2, - lower_32_bits(adev->vm_manager.max_pfn - 1)); - WREG32_SOC15_OFFSET(MMHUB, 0, mmVM_CONTEXT1_PAGE_TABLE_END_ADDR_HI32, i*2, - upper_32_bits(adev->vm_manager.max_pfn - 1)); + WREG32_SOC15_OFFSET(MMHUB, 0, mmVM_CONTEXT1_CNTL, + i * hub->ctx_distance, tmp); + WREG32_SOC15_OFFSET(MMHUB, 0, mmVM_CONTEXT1_PAGE_TABLE_START_ADDR_LO32, + i * hub->ctx_addr_distance, 0); + WREG32_SOC15_OFFSET(MMHUB, 0, mmVM_CONTEXT1_PAGE_TABLE_START_ADDR_HI32, + i * hub->ctx_addr_distance, 0); + WREG32_SOC15_OFFSET(MMHUB, 0, mmVM_CONTEXT1_PAGE_TABLE_END_ADDR_LO32, + i * hub->ctx_addr_distance, + lower_32_bits(adev->vm_manager.max_pfn - 1)); + WREG32_SOC15_OFFSET(MMHUB, 0, mmVM_CONTEXT1_PAGE_TABLE_END_ADDR_HI32, + i * hub->ctx_addr_distance, + upper_32_bits(adev->vm_manager.max_pfn - 1)); } } static void mmhub_v1_0_program_invalidation(struct amdgpu_device *adev) { + struct amdgpu_vmhub *hub = &adev->vmhub[AMDGPU_MMHUB_0]; unsigned i; for (i = 0; i < 18; ++i) { WREG32_SOC15_OFFSET(MMHUB, 0, mmVM_INVALIDATE_ENG0_ADDR_RANGE_LO32, - 2 * i, 0xffffffff); + i * hub->eng_addr_distance, 0xffffffff); WREG32_SOC15_OFFSET(MMHUB, 0, mmVM_INVALIDATE_ENG0_ADDR_RANGE_HI32, - 2 * i, 0x1f); + i * hub->eng_addr_distance, 0x1f); } } @@ -333,12 +340,14 @@ int mmhub_v1_0_gart_enable(struct amdgpu_device *adev) void mmhub_v1_0_gart_disable(struct amdgpu_device *adev) { + struct amdgpu_vmhub *hub = &adev->vmhub[AMDGPU_MMHUB_0]; u32 tmp; u32 i; /* Disable all tables */ for (i = 0; i < 16; i++) - WREG32_SOC15_OFFSET(MMHUB, 0, mmVM_CONTEXT0_CNTL, i, 0); + WREG32_SOC15_OFFSET(MMHUB, 0, mmVM_CONTEXT0_CNTL, + i * hub->ctx_distance, 0); /* Setup TLB control */ tmp = RREG32_SOC15(MMHUB, 0, mmMC_VM_MX_L1_TLB_CNTL); @@ -429,6 +438,12 @@ void mmhub_v1_0_init(struct amdgpu_device *adev) hub->vm_l2_pro_fault_cntl = SOC15_REG_OFFSET(MMHUB, 0, mmVM_L2_PROTECTION_FAULT_CNTL); + hub->ctx_distance = mmVM_CONTEXT1_CNTL - mmVM_CONTEXT0_CNTL; + hub->ctx_addr_distance = mmVM_CONTEXT1_PAGE_TABLE_BASE_ADDR_LO32 - + mmVM_CONTEXT0_PAGE_TABLE_BASE_ADDR_LO32; + hub->eng_distance = mmVM_INVALIDATE_ENG1_REQ - mmVM_INVALIDATE_ENG0_REQ; + hub->eng_addr_distance = mmVM_INVALIDATE_ENG1_ADDR_RANGE_LO32 - + mmVM_INVALIDATE_ENG0_ADDR_RANGE_LO32; } static void mmhub_v1_0_update_medium_grain_clock_gating(struct amdgpu_device *adev, diff --git a/drivers/gpu/drm/amd/amdgpu/mmhub_v2_0.c b/drivers/gpu/drm/amd/amdgpu/mmhub_v2_0.c index af0866af63a5..757fa8e83f5b 100644 --- a/drivers/gpu/drm/amd/amdgpu/mmhub_v2_0.c +++ b/drivers/gpu/drm/amd/amdgpu/mmhub_v2_0.c @@ -39,15 +39,15 @@ void mmhub_v2_0_setup_vm_pt_regs(struct amdgpu_device *adev, uint32_t vmid, uint64_t page_table_base) { - /* two registers distance between mmMMVM_CONTEXT0_* to mmMMVM_CONTEXT1_* */ - int offset = mmMMVM_CONTEXT1_PAGE_TABLE_BASE_ADDR_LO32 - - mmMMVM_CONTEXT0_PAGE_TABLE_BASE_ADDR_LO32; + struct amdgpu_vmhub *hub = &adev->vmhub[AMDGPU_MMHUB_0]; WREG32_SOC15_OFFSET(MMHUB, 0, mmMMVM_CONTEXT0_PAGE_TABLE_BASE_ADDR_LO32, - offset * vmid, lower_32_bits(page_table_base)); + hub->ctx_addr_distance * vmid, + lower_32_bits(page_table_base)); WREG32_SOC15_OFFSET(MMHUB, 0, mmMMVM_CONTEXT0_PAGE_TABLE_BASE_ADDR_HI32, - offset * vmid, upper_32_bits(page_table_base)); + hub->ctx_addr_distance * vmid, + upper_32_bits(page_table_base)); } static void mmhub_v2_0_init_gart_aperture_regs(struct amdgpu_device *adev) @@ -209,6 +209,7 @@ static void mmhub_v2_0_disable_identity_aperture(struct amdgpu_device *adev) static void mmhub_v2_0_setup_vmid_config(struct amdgpu_device *adev) { + struct amdgpu_vmhub *hub = &adev->vmhub[AMDGPU_MMHUB_0]; int i; uint32_t tmp; @@ -239,25 +240,31 @@ static void mmhub_v2_0_setup_vmid_config(struct amdgpu_device *adev) tmp = REG_SET_FIELD(tmp, MMVM_CONTEXT1_CNTL, RETRY_PERMISSION_OR_INVALID_PAGE_FAULT, !amdgpu_noretry); - WREG32_SOC15_OFFSET(MMHUB, 0, mmMMVM_CONTEXT1_CNTL, i, tmp); - WREG32_SOC15_OFFSET(MMHUB, 0, mmMMVM_CONTEXT1_PAGE_TABLE_START_ADDR_LO32, i*2, 0); - WREG32_SOC15_OFFSET(MMHUB, 0, mmMMVM_CONTEXT1_PAGE_TABLE_START_ADDR_HI32, i*2, 0); - WREG32_SOC15_OFFSET(MMHUB, 0, mmMMVM_CONTEXT1_PAGE_TABLE_END_ADDR_LO32, i*2, - lower_32_bits(adev->vm_manager.max_pfn - 1)); - WREG32_SOC15_OFFSET(MMHUB, 0, mmMMVM_CONTEXT1_PAGE_TABLE_END_ADDR_HI32, i*2, - upper_32_bits(adev->vm_manager.max_pfn - 1)); + WREG32_SOC15_OFFSET(MMHUB, 0, mmMMVM_CONTEXT1_CNTL, + i * hub->ctx_distance, tmp); + WREG32_SOC15_OFFSET(MMHUB, 0, mmMMVM_CONTEXT1_PAGE_TABLE_START_ADDR_LO32, + i * hub->ctx_addr_distance, 0); + WREG32_SOC15_OFFSET(MMHUB, 0, mmMMVM_CONTEXT1_PAGE_TABLE_START_ADDR_HI32, + i * hub->ctx_addr_distance, 0); + WREG32_SOC15_OFFSET(MMHUB, 0, mmMMVM_CONTEXT1_PAGE_TABLE_END_ADDR_LO32, + i * hub->ctx_addr_distance, + lower_32_bits(adev->vm_manager.max_pfn - 1)); + WREG32_SOC15_OFFSET(MMHUB, 0, mmMMVM_CONTEXT1_PAGE_TABLE_END_ADDR_HI32, + i * hub->ctx_addr_distance, + upper_32_bits(adev->vm_manager.max_pfn - 1)); } } static void mmhub_v2_0_program_invalidation(struct amdgpu_device *adev) { + struct amdgpu_vmhub *hub = &adev->vmhub[AMDGPU_MMHUB_0]; unsigned i; for (i = 0; i < 18; ++i) { WREG32_SOC15_OFFSET(MMHUB, 0, mmMMVM_INVALIDATE_ENG0_ADDR_RANGE_LO32, - 2 * i, 0xffffffff); + i * hub->eng_addr_distance, 0xffffffff); WREG32_SOC15_OFFSET(MMHUB, 0, mmMMVM_INVALIDATE_ENG0_ADDR_RANGE_HI32, - 2 * i, 0x1f); + i * hub->eng_addr_distance, 0x1f); } } @@ -279,12 +286,14 @@ int mmhub_v2_0_gart_enable(struct amdgpu_device *adev) void mmhub_v2_0_gart_disable(struct amdgpu_device *adev) { + struct amdgpu_vmhub *hub = &adev->vmhub[AMDGPU_MMHUB_0]; u32 tmp; u32 i; /* Disable all tables */ for (i = 0; i < 16; i++) - WREG32_SOC15_OFFSET(MMHUB, 0, mmMMVM_CONTEXT0_CNTL, i, 0); + WREG32_SOC15_OFFSET(MMHUB, 0, mmMMVM_CONTEXT0_CNTL, + i * hub->ctx_distance, 0); /* Setup TLB control */ tmp = RREG32_SOC15(MMHUB, 0, mmMMMC_VM_MX_L1_TLB_CNTL); @@ -365,6 +374,13 @@ void mmhub_v2_0_init(struct amdgpu_device *adev) hub->vm_l2_pro_fault_cntl = SOC15_REG_OFFSET(MMHUB, 0, mmMMVM_L2_PROTECTION_FAULT_CNTL); + hub->ctx_distance = mmMMVM_CONTEXT1_CNTL - mmMMVM_CONTEXT0_CNTL; + hub->ctx_addr_distance = mmMMVM_CONTEXT1_PAGE_TABLE_BASE_ADDR_LO32 - + mmMMVM_CONTEXT0_PAGE_TABLE_BASE_ADDR_LO32; + hub->eng_distance = mmMMVM_INVALIDATE_ENG1_REQ - + mmMMVM_INVALIDATE_ENG0_REQ; + hub->eng_addr_distance = mmMMVM_INVALIDATE_ENG1_ADDR_RANGE_LO32 - + mmMMVM_INVALIDATE_ENG0_ADDR_RANGE_LO32; } static void mmhub_v2_0_update_medium_grain_clock_gating(struct amdgpu_device *adev, @@ -374,6 +390,7 @@ static void mmhub_v2_0_update_medium_grain_clock_gating(struct amdgpu_device *ad switch (adev->asic_type) { case CHIP_SIENNA_CICHLID: + case CHIP_NAVY_FLOUNDER: def = data = RREG32_SOC15(MMHUB, 0, mmMM_ATC_L2_MISC_CG_Sienna_Cichlid); def1 = data1 = RREG32_SOC15(MMHUB, 0, mmDAGB0_CNTL_MISC2_Sienna_Cichlid); break; @@ -406,6 +423,7 @@ static void mmhub_v2_0_update_medium_grain_clock_gating(struct amdgpu_device *ad switch (adev->asic_type) { case CHIP_SIENNA_CICHLID: + case CHIP_NAVY_FLOUNDER: if (def != data) WREG32_SOC15(MMHUB, 0, mmMM_ATC_L2_MISC_CG_Sienna_Cichlid, data); if (def1 != data1) @@ -427,6 +445,7 @@ static void mmhub_v2_0_update_medium_grain_light_sleep(struct amdgpu_device *ade switch (adev->asic_type) { case CHIP_SIENNA_CICHLID: + case CHIP_NAVY_FLOUNDER: def = data = RREG32_SOC15(MMHUB, 0, mmMM_ATC_L2_MISC_CG_Sienna_Cichlid); break; default: @@ -442,6 +461,7 @@ static void mmhub_v2_0_update_medium_grain_light_sleep(struct amdgpu_device *ade if (def != data) { switch (adev->asic_type) { case CHIP_SIENNA_CICHLID: + case CHIP_NAVY_FLOUNDER: WREG32_SOC15(MMHUB, 0, mmMM_ATC_L2_MISC_CG_Sienna_Cichlid, data); break; default: @@ -462,6 +482,7 @@ int mmhub_v2_0_set_clockgating(struct amdgpu_device *adev, case CHIP_NAVI14: case CHIP_NAVI12: case CHIP_SIENNA_CICHLID: + case CHIP_NAVY_FLOUNDER: mmhub_v2_0_update_medium_grain_clock_gating(adev, state == AMD_CG_STATE_GATE); mmhub_v2_0_update_medium_grain_light_sleep(adev, @@ -483,6 +504,7 @@ void mmhub_v2_0_get_clockgating(struct amdgpu_device *adev, u32 *flags) switch (adev->asic_type) { case CHIP_SIENNA_CICHLID: + case CHIP_NAVY_FLOUNDER: data = RREG32_SOC15(MMHUB, 0, mmMM_ATC_L2_MISC_CG_Sienna_Cichlid); data1 = RREG32_SOC15(MMHUB, 0, mmDAGB0_CNTL_MISC2_Sienna_Cichlid); break; diff --git a/drivers/gpu/drm/amd/amdgpu/mmhub_v9_4.c b/drivers/gpu/drm/amd/amdgpu/mmhub_v9_4.c index c0e3efcb09bf..9979f54fef57 100644 --- a/drivers/gpu/drm/amd/amdgpu/mmhub_v9_4.c +++ b/drivers/gpu/drm/amd/amdgpu/mmhub_v9_4.c @@ -57,20 +57,16 @@ u64 mmhub_v9_4_get_fb_location(struct amdgpu_device *adev) static void mmhub_v9_4_setup_hubid_vm_pt_regs(struct amdgpu_device *adev, int hubid, uint32_t vmid, uint64_t value) { - /* two registers distance between mmVML2VC0_VM_CONTEXT0_* to - * mmVML2VC0_VM_CONTEXT1_* - */ - int dist = mmVML2VC0_VM_CONTEXT1_PAGE_TABLE_BASE_ADDR_LO32 - - mmVML2VC0_VM_CONTEXT0_PAGE_TABLE_BASE_ADDR_LO32; + struct amdgpu_vmhub *hub = &adev->vmhub[AMDGPU_MMHUB_0]; WREG32_SOC15_OFFSET(MMHUB, 0, mmVML2VC0_VM_CONTEXT0_PAGE_TABLE_BASE_ADDR_LO32, - dist * vmid + hubid * MMHUB_INSTANCE_REGISTER_OFFSET, + hub->ctx_addr_distance * vmid + hubid * MMHUB_INSTANCE_REGISTER_OFFSET, lower_32_bits(value)); WREG32_SOC15_OFFSET(MMHUB, 0, mmVML2VC0_VM_CONTEXT0_PAGE_TABLE_BASE_ADDR_HI32, - dist * vmid + hubid * MMHUB_INSTANCE_REGISTER_OFFSET, + hub->ctx_addr_distance * vmid + hubid * MMHUB_INSTANCE_REGISTER_OFFSET, upper_32_bits(value)); } @@ -301,6 +297,7 @@ static void mmhub_v9_4_disable_identity_aperture(struct amdgpu_device *adev, static void mmhub_v9_4_setup_vmid_config(struct amdgpu_device *adev, int hubid) { + struct amdgpu_vmhub *hub = &adev->vmhub[AMDGPU_MMHUB_0]; uint32_t tmp; int i; @@ -335,21 +332,25 @@ static void mmhub_v9_4_setup_vmid_config(struct amdgpu_device *adev, int hubid) RETRY_PERMISSION_OR_INVALID_PAGE_FAULT, !amdgpu_noretry); WREG32_SOC15_OFFSET(MMHUB, 0, mmVML2VC0_VM_CONTEXT1_CNTL, - hubid * MMHUB_INSTANCE_REGISTER_OFFSET + i, - tmp); + hubid * MMHUB_INSTANCE_REGISTER_OFFSET + + i * hub->ctx_distance, tmp); WREG32_SOC15_OFFSET(MMHUB, 0, mmVML2VC0_VM_CONTEXT1_PAGE_TABLE_START_ADDR_LO32, - hubid * MMHUB_INSTANCE_REGISTER_OFFSET + i*2, 0); + hubid * MMHUB_INSTANCE_REGISTER_OFFSET + + i * hub->ctx_addr_distance, 0); WREG32_SOC15_OFFSET(MMHUB, 0, mmVML2VC0_VM_CONTEXT1_PAGE_TABLE_START_ADDR_HI32, - hubid * MMHUB_INSTANCE_REGISTER_OFFSET + i*2, 0); + hubid * MMHUB_INSTANCE_REGISTER_OFFSET + + i * hub->ctx_addr_distance, 0); WREG32_SOC15_OFFSET(MMHUB, 0, mmVML2VC0_VM_CONTEXT1_PAGE_TABLE_END_ADDR_LO32, - hubid * MMHUB_INSTANCE_REGISTER_OFFSET + i*2, + hubid * MMHUB_INSTANCE_REGISTER_OFFSET + + i * hub->ctx_addr_distance, lower_32_bits(adev->vm_manager.max_pfn - 1)); WREG32_SOC15_OFFSET(MMHUB, 0, mmVML2VC0_VM_CONTEXT1_PAGE_TABLE_END_ADDR_HI32, - hubid * MMHUB_INSTANCE_REGISTER_OFFSET + i*2, + hubid * MMHUB_INSTANCE_REGISTER_OFFSET + + i * hub->ctx_addr_distance, upper_32_bits(adev->vm_manager.max_pfn - 1)); } } @@ -357,16 +358,19 @@ static void mmhub_v9_4_setup_vmid_config(struct amdgpu_device *adev, int hubid) static void mmhub_v9_4_program_invalidation(struct amdgpu_device *adev, int hubid) { + struct amdgpu_vmhub *hub = &adev->vmhub[AMDGPU_MMHUB_0]; unsigned i; for (i = 0; i < 18; ++i) { WREG32_SOC15_OFFSET(MMHUB, 0, mmVML2VC0_VM_INVALIDATE_ENG0_ADDR_RANGE_LO32, - hubid * MMHUB_INSTANCE_REGISTER_OFFSET + 2 * i, + hubid * MMHUB_INSTANCE_REGISTER_OFFSET + + i * hub->eng_addr_distance, 0xffffffff); WREG32_SOC15_OFFSET(MMHUB, 0, mmVML2VC0_VM_INVALIDATE_ENG0_ADDR_RANGE_HI32, - hubid * MMHUB_INSTANCE_REGISTER_OFFSET + 2 * i, + hubid * MMHUB_INSTANCE_REGISTER_OFFSET + + i * hub->eng_addr_distance, 0x1f); } } @@ -395,6 +399,7 @@ int mmhub_v9_4_gart_enable(struct amdgpu_device *adev) void mmhub_v9_4_gart_disable(struct amdgpu_device *adev) { + struct amdgpu_vmhub *hub = &adev->vmhub[AMDGPU_MMHUB_0]; u32 tmp; u32 i, j; @@ -404,7 +409,7 @@ void mmhub_v9_4_gart_disable(struct amdgpu_device *adev) WREG32_SOC15_OFFSET(MMHUB, 0, mmVML2VC0_VM_CONTEXT0_CNTL, j * MMHUB_INSTANCE_REGISTER_OFFSET + - i, 0); + i * hub->ctx_distance, 0); /* Setup TLB control */ tmp = RREG32_SOC15_OFFSET(MMHUB, 0, @@ -534,6 +539,15 @@ void mmhub_v9_4_init(struct amdgpu_device *adev) SOC15_REG_OFFSET(MMHUB, 0, mmVML2PF0_VM_L2_PROTECTION_FAULT_CNTL) + i * MMHUB_INSTANCE_REGISTER_OFFSET; + + hub[i]->ctx_distance = mmVML2VC0_VM_CONTEXT1_CNTL - + mmVML2VC0_VM_CONTEXT0_CNTL; + hub[i]->ctx_addr_distance = mmVML2VC0_VM_CONTEXT1_PAGE_TABLE_BASE_ADDR_LO32 - + mmVML2VC0_VM_CONTEXT0_PAGE_TABLE_BASE_ADDR_LO32; + hub[i]->eng_distance = mmVML2VC0_VM_INVALIDATE_ENG1_REQ - + mmVML2VC0_VM_INVALIDATE_ENG0_REQ; + hub[i]->eng_addr_distance = mmVML2VC0_VM_INVALIDATE_ENG1_ADDR_RANGE_LO32 - + mmVML2VC0_VM_INVALIDATE_ENG0_ADDR_RANGE_LO32; } } diff --git a/drivers/gpu/drm/amd/amdgpu/mmsch_v3_0.h b/drivers/gpu/drm/amd/amdgpu/mmsch_v3_0.h new file mode 100644 index 000000000000..3e4e858a6965 --- /dev/null +++ b/drivers/gpu/drm/amd/amdgpu/mmsch_v3_0.h @@ -0,0 +1,130 @@ +/* + * Copyright 2020 Advanced Micro Devices, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + * + */ + +#ifndef __MMSCH_V3_0_H__ +#define __MMSCH_V3_0_H__ + +#include "amdgpu_vcn.h" + +#define MMSCH_VERSION_MAJOR 3 +#define MMSCH_VERSION_MINOR 0 +#define MMSCH_VERSION (MMSCH_VERSION_MAJOR << 16 | MMSCH_VERSION_MINOR) + +enum mmsch_v3_0_command_type { + MMSCH_COMMAND__DIRECT_REG_WRITE = 0, + MMSCH_COMMAND__DIRECT_REG_POLLING = 2, + MMSCH_COMMAND__DIRECT_REG_READ_MODIFY_WRITE = 3, + MMSCH_COMMAND__INDIRECT_REG_WRITE = 8, + MMSCH_COMMAND__END = 0xf +}; + +struct mmsch_v3_0_table_info { + uint32_t init_status; + uint32_t table_offset; + uint32_t table_size; +}; + +struct mmsch_v3_0_init_header { + uint32_t version; + uint32_t total_size; + struct mmsch_v3_0_table_info inst[AMDGPU_MAX_VCN_INSTANCES]; +}; + +struct mmsch_v3_0_cmd_direct_reg_header { + uint32_t reg_offset : 28; + uint32_t command_type : 4; +}; + +struct mmsch_v3_0_cmd_indirect_reg_header { + uint32_t reg_offset : 20; + uint32_t reg_idx_space : 8; + uint32_t command_type : 4; +}; + +struct mmsch_v3_0_cmd_direct_write { + struct mmsch_v3_0_cmd_direct_reg_header cmd_header; + uint32_t reg_value; +}; + +struct mmsch_v3_0_cmd_direct_read_modify_write { + struct mmsch_v3_0_cmd_direct_reg_header cmd_header; + uint32_t write_data; + uint32_t mask_value; +}; + +struct mmsch_v3_0_cmd_direct_polling { + struct mmsch_v3_0_cmd_direct_reg_header cmd_header; + uint32_t mask_value; + uint32_t wait_value; +}; + +struct mmsch_v3_0_cmd_end { + struct mmsch_v3_0_cmd_direct_reg_header cmd_header; +}; + +struct mmsch_v3_0_cmd_indirect_write { + struct mmsch_v3_0_cmd_indirect_reg_header cmd_header; + uint32_t reg_value; +}; + +#define MMSCH_V3_0_INSERT_DIRECT_RD_MOD_WT(reg, mask, data) { \ + size = sizeof(struct mmsch_v3_0_cmd_direct_read_modify_write); \ + size_dw = size / 4; \ + direct_rd_mod_wt.cmd_header.reg_offset = reg; \ + direct_rd_mod_wt.mask_value = mask; \ + direct_rd_mod_wt.write_data = data; \ + memcpy((void *)table_loc, &direct_rd_mod_wt, size); \ + table_loc += size_dw; \ + table_size += size_dw; \ +} + +#define MMSCH_V3_0_INSERT_DIRECT_WT(reg, value) { \ + size = sizeof(struct mmsch_v3_0_cmd_direct_write); \ + size_dw = size / 4; \ + direct_wt.cmd_header.reg_offset = reg; \ + direct_wt.reg_value = value; \ + memcpy((void *)table_loc, &direct_wt, size); \ + table_loc += size_dw; \ + table_size += size_dw; \ +} + +#define MMSCH_V3_0_INSERT_DIRECT_POLL(reg, mask, wait) { \ + size = sizeof(struct mmsch_v3_0_cmd_direct_polling); \ + size_dw = size / 4; \ + direct_poll.cmd_header.reg_offset = reg; \ + direct_poll.mask_value = mask; \ + direct_poll.wait_value = wait; \ + memcpy((void *)table_loc, &direct_poll, size); \ + table_loc += size_dw; \ + table_size += size_dw; \ +} + +#define MMSCH_V3_0_INSERT_END() { \ + size = sizeof(struct mmsch_v3_0_cmd_end); \ + size_dw = size / 4; \ + memcpy((void *)table_loc, &end, size); \ + table_loc += size_dw; \ + table_size += size_dw; \ +} + +#endif diff --git a/drivers/gpu/drm/amd/amdgpu/navi10_ih.c b/drivers/gpu/drm/amd/amdgpu/navi10_ih.c index 471dc82fd1aa..fdabaf0db3e6 100644 --- a/drivers/gpu/drm/amd/amdgpu/navi10_ih.c +++ b/drivers/gpu/drm/amd/amdgpu/navi10_ih.c @@ -270,6 +270,7 @@ static int navi10_ih_irq_init(struct amdgpu_device *adev) if (ih->use_bus_addr) { switch (adev->asic_type) { case CHIP_SIENNA_CICHLID: + case CHIP_NAVY_FLOUNDER: ih_chicken = RREG32_SOC15(OSSSYS, 0, mmIH_CHICKEN_Sienna_Cichlid); ih_chicken = REG_SET_FIELD(ih_chicken, IH_CHICKEN, MC_SPACE_GPA_ENABLE, 1); diff --git a/drivers/gpu/drm/amd/amdgpu/nv.c b/drivers/gpu/drm/amd/amdgpu/nv.c index a7cfe3ac7cb6..479991b71295 100644 --- a/drivers/gpu/drm/amd/amdgpu/nv.c +++ b/drivers/gpu/drm/amd/amdgpu/nv.c @@ -265,17 +265,21 @@ static int nv_asic_mode1_reset(struct amdgpu_device *adev) amdgpu_atombios_scratch_regs_engine_hung(adev, true); - dev_info(adev->dev, "GPU mode1 reset\n"); - /* disable BM */ pci_clear_master(adev->pdev); pci_save_state(adev->pdev); - ret = psp_gpu_reset(adev); + if (amdgpu_dpm_is_mode1_reset_supported(adev)) { + dev_info(adev->dev, "GPU smu mode1 reset\n"); + ret = amdgpu_dpm_mode1_reset(adev); + } else { + dev_info(adev->dev, "GPU psp mode1 reset\n"); + ret = psp_gpu_reset(adev); + } + if (ret) dev_err(adev->dev, "GPU mode1 reset failed\n"); - pci_restore_state(adev->pdev); /* wait for asic to come out of reset */ @@ -307,7 +311,15 @@ nv_asic_reset_method(struct amdgpu_device *adev) { struct smu_context *smu = &adev->smu; - if (!amdgpu_sriov_vf(adev) && smu_baco_is_support(smu)) + if (amdgpu_reset_method == AMD_RESET_METHOD_MODE1 || + amdgpu_reset_method == AMD_RESET_METHOD_BACO) + return amdgpu_reset_method; + + if (amdgpu_reset_method != -1) + dev_warn(adev->dev, "Specified reset method:%d isn't supported, using AUTO instead.\n", + amdgpu_reset_method); + + if (smu_baco_is_support(smu)) return AMD_RESET_METHOD_BACO; else return AMD_RESET_METHOD_MODE1; @@ -319,15 +331,16 @@ static int nv_asic_reset(struct amdgpu_device *adev) struct smu_context *smu = &adev->smu; if (nv_asic_reset_method(adev) == AMD_RESET_METHOD_BACO) { + dev_info(adev->dev, "GPU BACO reset\n"); + ret = smu_baco_enter(smu); if (ret) return ret; ret = smu_baco_exit(smu); if (ret) return ret; - } else { + } else ret = nv_asic_mode1_reset(adev); - } return ret; } @@ -411,6 +424,7 @@ legacy_init: navi12_reg_base_init(adev); break; case CHIP_SIENNA_CICHLID: + case CHIP_NAVY_FLOUNDER: sienna_cichlid_reg_base_init(adev); break; default: @@ -420,6 +434,11 @@ legacy_init: return 0; } +void nv_set_virt_ops(struct amdgpu_device *adev) +{ + adev->virt.ops = &xgpu_nv_virt_ops; +} + int nv_set_ip_blocks(struct amdgpu_device *adev) { int r; @@ -427,12 +446,6 @@ int nv_set_ip_blocks(struct amdgpu_device *adev) adev->nbio.funcs = &nbio_v2_3_funcs; adev->nbio.hdp_flush_reg = &nbio_v2_3_hdp_flush_reg; - if (amdgpu_sriov_vf(adev)) { - adev->virt.ops = &xgpu_nv_virt_ops; - /* try send GPU_INIT_DATA request to host */ - amdgpu_virt_request_init_data(adev); - } - /* Set IP register base before any HW register access */ r = nv_reg_base_init(adev); if (r) @@ -504,10 +517,35 @@ int nv_set_ip_blocks(struct amdgpu_device *adev) amdgpu_device_ip_block_add(adev, &gfx_v10_0_ip_block); amdgpu_device_ip_block_add(adev, &sdma_v5_2_ip_block); amdgpu_device_ip_block_add(adev, &vcn_v3_0_ip_block); - amdgpu_device_ip_block_add(adev, &jpeg_v3_0_ip_block); + if (!amdgpu_sriov_vf(adev)) + amdgpu_device_ip_block_add(adev, &jpeg_v3_0_ip_block); + if (adev->enable_mes) amdgpu_device_ip_block_add(adev, &mes_v10_1_ip_block); break; + case CHIP_NAVY_FLOUNDER: + amdgpu_device_ip_block_add(adev, &nv_common_ip_block); + amdgpu_device_ip_block_add(adev, &gmc_v10_0_ip_block); + amdgpu_device_ip_block_add(adev, &navi10_ih_ip_block); + if (likely(adev->firmware.load_type == AMDGPU_FW_LOAD_PSP)) + amdgpu_device_ip_block_add(adev, &psp_v11_0_ip_block); + if (adev->firmware.load_type == AMDGPU_FW_LOAD_PSP && + is_support_sw_smu(adev)) + amdgpu_device_ip_block_add(adev, &smu_v11_0_ip_block); + if (adev->enable_virtual_display || amdgpu_sriov_vf(adev)) + amdgpu_device_ip_block_add(adev, &dce_virtual_ip_block); +#if defined(CONFIG_DRM_AMD_DC) + else if (amdgpu_device_has_dc_support(adev)) + amdgpu_device_ip_block_add(adev, &dm_ip_block); +#endif + amdgpu_device_ip_block_add(adev, &gfx_v10_0_ip_block); + amdgpu_device_ip_block_add(adev, &sdma_v5_2_ip_block); + amdgpu_device_ip_block_add(adev, &vcn_v3_0_ip_block); + amdgpu_device_ip_block_add(adev, &jpeg_v3_0_ip_block); + if (adev->firmware.load_type == AMDGPU_FW_LOAD_DIRECT && + is_support_sw_smu(adev)) + amdgpu_device_ip_block_add(adev, &smu_v11_0_ip_block); + break; default: return -EINVAL; } @@ -708,8 +746,7 @@ static int nv_common_early_init(void *handle) adev->pg_flags = AMD_PG_SUPPORT_VCN | AMD_PG_SUPPORT_VCN_DPG | AMD_PG_SUPPORT_JPEG | - AMD_PG_SUPPORT_ATHUB | - AMD_PG_SUPPORT_MMHUB; + AMD_PG_SUPPORT_ATHUB; /* guest vm gets 0xffffffff when reading RCC_DEV0_EPF0_STRAP0, * as a consequence, the rev_id and external_rev_id are wrong. * workaround it by hardcoding rev_id to 0 (default value). @@ -732,9 +769,34 @@ static int nv_common_early_init(void *handle) adev->pg_flags = AMD_PG_SUPPORT_VCN | AMD_PG_SUPPORT_VCN_DPG | AMD_PG_SUPPORT_JPEG | - AMD_PG_SUPPORT_ATHUB; + AMD_PG_SUPPORT_ATHUB | + AMD_PG_SUPPORT_MMHUB; + if (amdgpu_sriov_vf(adev)) { + /* hypervisor control CG and PG enablement */ + adev->cg_flags = 0; + adev->pg_flags = 0; + } adev->external_rev_id = adev->rev_id + 0x28; break; + case CHIP_NAVY_FLOUNDER: + adev->cg_flags = AMD_CG_SUPPORT_GFX_MGCG | + AMD_CG_SUPPORT_GFX_CGCG | + AMD_CG_SUPPORT_GFX_3D_CGCG | + AMD_CG_SUPPORT_VCN_MGCG | + AMD_CG_SUPPORT_JPEG_MGCG | + AMD_CG_SUPPORT_MC_MGCG | + AMD_CG_SUPPORT_MC_LS | + AMD_CG_SUPPORT_HDP_MGCG | + AMD_CG_SUPPORT_HDP_LS | + AMD_CG_SUPPORT_IH_CG; + adev->pg_flags = AMD_PG_SUPPORT_VCN | + AMD_PG_SUPPORT_VCN_DPG | + AMD_PG_SUPPORT_JPEG | + AMD_PG_SUPPORT_ATHUB | + AMD_PG_SUPPORT_MMHUB; + adev->external_rev_id = adev->rev_id + 0x32; + break; + default: /* FIXME: not supported yet */ return -EINVAL; @@ -961,6 +1023,7 @@ static int nv_common_set_clockgating_state(void *handle, case CHIP_NAVI14: case CHIP_NAVI12: case CHIP_SIENNA_CICHLID: + case CHIP_NAVY_FLOUNDER: adev->nbio.funcs->update_medium_grain_clock_gating(adev, state == AMD_CG_STATE_GATE); adev->nbio.funcs->update_medium_grain_light_sleep(adev, diff --git a/drivers/gpu/drm/amd/amdgpu/nv.h b/drivers/gpu/drm/amd/amdgpu/nv.h index b6a95f0122fb..aeef50a6a54b 100644 --- a/drivers/gpu/drm/amd/amdgpu/nv.h +++ b/drivers/gpu/drm/amd/amdgpu/nv.h @@ -28,6 +28,7 @@ void nv_grbm_select(struct amdgpu_device *adev, u32 me, u32 pipe, u32 queue, u32 vmid); +void nv_set_virt_ops(struct amdgpu_device *adev); int nv_set_ip_blocks(struct amdgpu_device *adev); int navi10_reg_base_init(struct amdgpu_device *adev); int navi14_reg_base_init(struct amdgpu_device *adev); diff --git a/drivers/gpu/drm/amd/amdgpu/psp_v11_0.c b/drivers/gpu/drm/amd/amdgpu/psp_v11_0.c index 423386272920..77f99811cd85 100644 --- a/drivers/gpu/drm/amd/amdgpu/psp_v11_0.c +++ b/drivers/gpu/drm/amd/amdgpu/psp_v11_0.c @@ -57,6 +57,8 @@ MODULE_FIRMWARE("amdgpu/arcturus_asd.bin"); MODULE_FIRMWARE("amdgpu/arcturus_ta.bin"); MODULE_FIRMWARE("amdgpu/sienna_cichlid_sos.bin"); MODULE_FIRMWARE("amdgpu/sienna_cichlid_asd.bin"); +MODULE_FIRMWARE("amdgpu/navy_flounder_sos.bin"); +MODULE_FIRMWARE("amdgpu/navy_flounder_asd.bin"); /* address block */ #define smnMP1_FIRMWARE_FLAGS 0x3010024 @@ -100,6 +102,9 @@ static int psp_v11_0_init_microcode(struct psp_context *psp) case CHIP_SIENNA_CICHLID: chip_name = "sienna_cichlid"; break; + case CHIP_NAVY_FLOUNDER: + chip_name = "navy_flounder"; + break; default: BUG(); } @@ -108,7 +113,8 @@ static int psp_v11_0_init_microcode(struct psp_context *psp) if (err) return err; - if (adev->asic_type != CHIP_SIENNA_CICHLID) { + if (adev->asic_type != CHIP_SIENNA_CICHLID && + adev->asic_type != CHIP_NAVY_FLOUNDER) { err = psp_init_asd_microcode(psp, chip_name); if (err) return err; @@ -173,6 +179,7 @@ static int psp_v11_0_init_microcode(struct psp_context *psp) } break; case CHIP_SIENNA_CICHLID: + case CHIP_NAVY_FLOUNDER: break; default: BUG(); @@ -397,7 +404,8 @@ static int psp_v11_0_ring_init(struct psp_context *psp, struct amdgpu_device *adev = psp->adev; if ((!amdgpu_sriov_vf(adev)) && - (adev->asic_type != CHIP_SIENNA_CICHLID)) + (adev->asic_type != CHIP_SIENNA_CICHLID) && + (adev->asic_type != CHIP_NAVY_FLOUNDER)) psp_v11_0_reroute_ih(psp); ring = &psp->km_ring; diff --git a/drivers/gpu/drm/amd/amdgpu/sdma_v5_0.c b/drivers/gpu/drm/amd/amdgpu/sdma_v5_0.c index 1baeddf2f1e6..e2232dd12d8e 100644 --- a/drivers/gpu/drm/amd/amdgpu/sdma_v5_0.c +++ b/drivers/gpu/drm/amd/amdgpu/sdma_v5_0.c @@ -315,30 +315,20 @@ static uint64_t sdma_v5_0_ring_get_rptr(struct amdgpu_ring *ring) static uint64_t sdma_v5_0_ring_get_wptr(struct amdgpu_ring *ring) { struct amdgpu_device *adev = ring->adev; - u64 *wptr = NULL; - uint64_t local_wptr = 0; + u64 wptr; if (ring->use_doorbell) { /* XXX check if swapping is necessary on BE */ - wptr = ((u64 *)&adev->wb.wb[ring->wptr_offs]); - DRM_DEBUG("wptr/doorbell before shift == 0x%016llx\n", *wptr); - *wptr = (*wptr) >> 2; - DRM_DEBUG("wptr/doorbell after shift == 0x%016llx\n", *wptr); + wptr = READ_ONCE(*((u64 *)&adev->wb.wb[ring->wptr_offs])); + DRM_DEBUG("wptr/doorbell before shift == 0x%016llx\n", wptr); } else { - u32 lowbit, highbit; - - wptr = &local_wptr; - lowbit = RREG32(sdma_v5_0_get_reg_offset(adev, ring->me, mmSDMA0_GFX_RB_WPTR)) >> 2; - highbit = RREG32(sdma_v5_0_get_reg_offset(adev, ring->me, mmSDMA0_GFX_RB_WPTR_HI)) >> 2; - - DRM_DEBUG("wptr [%i]high== 0x%08x low==0x%08x\n", - ring->me, highbit, lowbit); - *wptr = highbit; - *wptr = (*wptr) << 32; - *wptr |= lowbit; + wptr = RREG32(sdma_v5_0_get_reg_offset(adev, ring->me, mmSDMA0_GFX_RB_WPTR_HI)); + wptr = wptr << 32; + wptr |= RREG32(sdma_v5_0_get_reg_offset(adev, ring->me, mmSDMA0_GFX_RB_WPTR)); + DRM_DEBUG("wptr before shift [%i] wptr == 0x%016llx\n", ring->me, wptr); } - return *wptr; + return wptr >> 2; } /** @@ -485,7 +475,6 @@ static void sdma_v5_0_ring_emit_hdp_flush(struct amdgpu_ring *ring) static void sdma_v5_0_ring_emit_fence(struct amdgpu_ring *ring, u64 addr, u64 seq, unsigned flags) { - struct amdgpu_device *adev = ring->adev; bool write64bit = flags & AMDGPU_FENCE_FLAG_64BIT; /* write the fence */ amdgpu_ring_write(ring, SDMA_PKT_HEADER_OP(SDMA_OP_FENCE) | @@ -508,8 +497,7 @@ static void sdma_v5_0_ring_emit_fence(struct amdgpu_ring *ring, u64 addr, u64 se amdgpu_ring_write(ring, upper_32_bits(seq)); } - /* Interrupt not work fine on GFX10.1 model yet. Use fallback instead */ - if ((flags & AMDGPU_FENCE_FLAG_INT) && adev->pdev->device != 0x50) { + if (flags & AMDGPU_FENCE_FLAG_INT) { /* generate an interrupt */ amdgpu_ring_write(ring, SDMA_PKT_HEADER_OP(SDMA_OP_TRAP)); amdgpu_ring_write(ring, SDMA_PKT_TRAP_INT_CONTEXT_INT_CONTEXT(0)); @@ -887,10 +875,6 @@ static int sdma_v5_0_start(struct amdgpu_device *adev) r = sdma_v5_0_load_microcode(adev); if (r) return r; - - /* The value of mmSDMA_F32_CNTL is invalid the moment after loading fw */ - if (amdgpu_emu_mode == 1 && adev->pdev->device == 0x4d) - msleep(1000); } /* unhalt the MEs */ diff --git a/drivers/gpu/drm/amd/amdgpu/sdma_v5_2.c b/drivers/gpu/drm/amd/amdgpu/sdma_v5_2.c index 318d32e2bbf6..46a9617fee5f 100644 --- a/drivers/gpu/drm/amd/amdgpu/sdma_v5_2.c +++ b/drivers/gpu/drm/amd/amdgpu/sdma_v5_2.c @@ -45,6 +45,7 @@ #include "sdma_v5_2.h" MODULE_FIRMWARE("amdgpu/sienna_cichlid_sdma.bin"); +MODULE_FIRMWARE("amdgpu/navy_flounder_sdma.bin"); #define SDMA1_REG_OFFSET 0x600 #define SDMA3_REG_OFFSET 0x400 @@ -85,6 +86,7 @@ static void sdma_v5_2_init_golden_registers(struct amdgpu_device *adev) { switch (adev->asic_type) { case CHIP_SIENNA_CICHLID: + case CHIP_NAVY_FLOUNDER: break; default: break; @@ -152,6 +154,9 @@ static int sdma_v5_2_init_microcode(struct amdgpu_device *adev) case CHIP_SIENNA_CICHLID: chip_name = "sienna_cichlid"; break; + case CHIP_NAVY_FLOUNDER: + chip_name = "navy_flounder"; + break; default: BUG(); } @@ -167,7 +172,8 @@ static int sdma_v5_2_init_microcode(struct amdgpu_device *adev) goto out; for (i = 1; i < adev->sdma.num_instances; i++) { - if (adev->asic_type == CHIP_SIENNA_CICHLID) { + if (adev->asic_type == CHIP_SIENNA_CICHLID || + adev->asic_type == CHIP_NAVY_FLOUNDER) { memcpy((void*)&adev->sdma.instance[i], (void*)&adev->sdma.instance[0], sizeof(struct amdgpu_sdma_instance)); @@ -262,30 +268,20 @@ static uint64_t sdma_v5_2_ring_get_rptr(struct amdgpu_ring *ring) static uint64_t sdma_v5_2_ring_get_wptr(struct amdgpu_ring *ring) { struct amdgpu_device *adev = ring->adev; - u64 *wptr = NULL; - uint64_t local_wptr = 0; + u64 wptr; if (ring->use_doorbell) { /* XXX check if swapping is necessary on BE */ - wptr = ((u64 *)&adev->wb.wb[ring->wptr_offs]); - DRM_DEBUG("wptr/doorbell before shift == 0x%016llx\n", *wptr); - *wptr = (*wptr) >> 2; - DRM_DEBUG("wptr/doorbell after shift == 0x%016llx\n", *wptr); + wptr = READ_ONCE(*((u64 *)&adev->wb.wb[ring->wptr_offs])); + DRM_DEBUG("wptr/doorbell before shift == 0x%016llx\n", wptr); } else { - u32 lowbit, highbit; - - wptr = &local_wptr; - lowbit = RREG32(sdma_v5_2_get_reg_offset(adev, ring->me, mmSDMA0_GFX_RB_WPTR)) >> 2; - highbit = RREG32(sdma_v5_2_get_reg_offset(adev, ring->me, mmSDMA0_GFX_RB_WPTR_HI)) >> 2; - - DRM_DEBUG("wptr [%i]high== 0x%08x low==0x%08x\n", - ring->me, highbit, lowbit); - *wptr = highbit; - *wptr = (*wptr) << 32; - *wptr |= lowbit; + wptr = RREG32(sdma_v5_2_get_reg_offset(adev, ring->me, mmSDMA0_GFX_RB_WPTR_HI)); + wptr = wptr << 32; + wptr |= RREG32(sdma_v5_2_get_reg_offset(adev, ring->me, mmSDMA0_GFX_RB_WPTR)); + DRM_DEBUG("wptr before shift [%i] wptr == 0x%016llx\n", ring->me, wptr); } - return *wptr; + return wptr >> 2; } /** @@ -417,7 +413,6 @@ static void sdma_v5_2_ring_emit_hdp_flush(struct amdgpu_ring *ring) static void sdma_v5_2_ring_emit_fence(struct amdgpu_ring *ring, u64 addr, u64 seq, unsigned flags) { - struct amdgpu_device *adev = ring->adev; bool write64bit = flags & AMDGPU_FENCE_FLAG_64BIT; /* write the fence */ amdgpu_ring_write(ring, SDMA_PKT_HEADER_OP(SDMA_OP_FENCE) | @@ -440,8 +435,7 @@ static void sdma_v5_2_ring_emit_fence(struct amdgpu_ring *ring, u64 addr, u64 se amdgpu_ring_write(ring, upper_32_bits(seq)); } - /* Interrupt not work fine on GFX10.1 model yet. Use fallback instead */ - if ((flags & AMDGPU_FENCE_FLAG_INT) && adev->pdev->device != 0x50) { + if (flags & AMDGPU_FENCE_FLAG_INT) { /* generate an interrupt */ amdgpu_ring_write(ring, SDMA_PKT_HEADER_OP(SDMA_OP_TRAP)); amdgpu_ring_write(ring, SDMA_PKT_TRAP_INT_CONTEXT_INT_CONTEXT(0)); @@ -1167,7 +1161,16 @@ static int sdma_v5_2_early_init(void *handle) { struct amdgpu_device *adev = (struct amdgpu_device *)handle; - adev->sdma.num_instances = 4; + switch (adev->asic_type) { + case CHIP_SIENNA_CICHLID: + adev->sdma.num_instances = 4; + break; + case CHIP_NAVY_FLOUNDER: + adev->sdma.num_instances = 2; + break; + default: + break; + } sdma_v5_2_set_ring_funcs(adev); sdma_v5_2_set_buffer_funcs(adev); @@ -1177,6 +1180,40 @@ static int sdma_v5_2_early_init(void *handle) return 0; } +static unsigned sdma_v5_2_seq_to_irq_id(int seq_num) +{ + switch (seq_num) { + case 0: + return SOC15_IH_CLIENTID_SDMA0; + case 1: + return SOC15_IH_CLIENTID_SDMA1; + case 2: + return SOC15_IH_CLIENTID_SDMA2; + case 3: + return SOC15_IH_CLIENTID_SDMA3_Sienna_Cichlid; + default: + break; + } + return -EINVAL; +} + +static unsigned sdma_v5_2_seq_to_trap_id(int seq_num) +{ + switch (seq_num) { + case 0: + return SDMA0_5_0__SRCID__SDMA_TRAP; + case 1: + return SDMA1_5_0__SRCID__SDMA_TRAP; + case 2: + return SDMA2_5_0__SRCID__SDMA_TRAP; + case 3: + return SDMA3_5_0__SRCID__SDMA_TRAP; + default: + break; + } + return -EINVAL; +} + static int sdma_v5_2_sw_init(void *handle) { struct amdgpu_ring *ring; @@ -1184,32 +1221,13 @@ static int sdma_v5_2_sw_init(void *handle) struct amdgpu_device *adev = (struct amdgpu_device *)handle; /* SDMA trap event */ - r = amdgpu_irq_add_id(adev, SOC15_IH_CLIENTID_SDMA0, - SDMA0_5_0__SRCID__SDMA_TRAP, - &adev->sdma.trap_irq); - if (r) - return r; - - /* SDMA trap event */ - r = amdgpu_irq_add_id(adev, SOC15_IH_CLIENTID_SDMA1, - SDMA1_5_0__SRCID__SDMA_TRAP, - &adev->sdma.trap_irq); - if (r) - return r; - - /* SDMA trap event */ - r = amdgpu_irq_add_id(adev, SOC15_IH_CLIENTID_SDMA2, - SDMA2_5_0__SRCID__SDMA_TRAP, - &adev->sdma.trap_irq); - if (r) - return r; - - /* SDMA trap event */ - r = amdgpu_irq_add_id(adev, SOC15_IH_CLIENTID_SDMA3_Sienna_Cichlid, - SDMA3_5_0__SRCID__SDMA_TRAP, - &adev->sdma.trap_irq); - if (r) - return r; + for (i = 0; i < adev->sdma.num_instances; i++) { + r = amdgpu_irq_add_id(adev, sdma_v5_2_seq_to_irq_id(i), + sdma_v5_2_seq_to_trap_id(i), + &adev->sdma.trap_irq); + if (r) + return r; + } r = sdma_v5_2_init_microcode(adev); if (r) { @@ -1545,6 +1563,7 @@ static int sdma_v5_2_set_clockgating_state(void *handle, switch (adev->asic_type) { case CHIP_SIENNA_CICHLID: + case CHIP_NAVY_FLOUNDER: sdma_v5_2_update_medium_grain_clock_gating(adev, state == AMD_CG_STATE_GATE ? true : false); sdma_v5_2_update_medium_grain_light_sleep(adev, @@ -1572,7 +1591,7 @@ static void sdma_v5_2_get_clockgating_state(void *handle, u32 *flags) *flags = 0; /* AMD_CG_SUPPORT_SDMA_LS */ - data = RREG32(sdma_v5_2_get_reg_offset(adev, 0, mmSDMA0_POWER_CNTL)); + data = RREG32_KIQ(sdma_v5_2_get_reg_offset(adev, 0, mmSDMA0_POWER_CNTL)); if (data & SDMA0_POWER_CNTL__MEM_POWER_OVERRIDE_MASK) *flags |= AMD_CG_SUPPORT_SDMA_LS; } diff --git a/drivers/gpu/drm/amd/amdgpu/si.c b/drivers/gpu/drm/amd/amdgpu/si.c index 9b12285177e3..1b449291f068 100644 --- a/drivers/gpu/drm/amd/amdgpu/si.c +++ b/drivers/gpu/drm/amd/amdgpu/si.c @@ -1229,6 +1229,11 @@ static bool si_asic_supports_baco(struct amdgpu_device *adev) static enum amd_reset_method si_asic_reset_method(struct amdgpu_device *adev) { + if (amdgpu_reset_method != AMD_RESET_METHOD_LEGACY && + amdgpu_reset_method != -1) + dev_warn(adev->dev, "Specified reset method:%d isn't supported, using AUTO instead.\n", + amdgpu_reset_method); + return AMD_RESET_METHOD_LEGACY; } @@ -1267,12 +1272,6 @@ static u32 si_get_xclk(struct amdgpu_device *adev) return reference_clock; } -//xxx:not implemented -static int si_set_uvd_clocks(struct amdgpu_device *adev, u32 vclk, u32 dclk) -{ - return 0; -} - static void si_flush_hdp(struct amdgpu_device *adev, struct amdgpu_ring *ring) { if (!ring || !ring->funcs->emit_wreg) { @@ -1428,6 +1427,358 @@ static uint64_t si_get_pcie_replay_count(struct amdgpu_device *adev) return (nak_r + nak_g); } +static int si_uvd_send_upll_ctlreq(struct amdgpu_device *adev, + unsigned cg_upll_func_cntl) +{ + unsigned i; + + /* Make sure UPLL_CTLREQ is deasserted */ + WREG32_P(cg_upll_func_cntl, 0, ~UPLL_CTLREQ_MASK); + + mdelay(10); + + /* Assert UPLL_CTLREQ */ + WREG32_P(cg_upll_func_cntl, UPLL_CTLREQ_MASK, ~UPLL_CTLREQ_MASK); + + /* Wait for CTLACK and CTLACK2 to get asserted */ + for (i = 0; i < SI_MAX_CTLACKS_ASSERTION_WAIT; ++i) { + uint32_t mask = UPLL_CTLACK_MASK | UPLL_CTLACK2_MASK; + + if ((RREG32(cg_upll_func_cntl) & mask) == mask) + break; + mdelay(10); + } + + /* Deassert UPLL_CTLREQ */ + WREG32_P(cg_upll_func_cntl, 0, ~UPLL_CTLREQ_MASK); + + if (i == SI_MAX_CTLACKS_ASSERTION_WAIT) { + DRM_ERROR("Timeout setting UVD clocks!\n"); + return -ETIMEDOUT; + } + + return 0; +} + +static unsigned si_uvd_calc_upll_post_div(unsigned vco_freq, + unsigned target_freq, + unsigned pd_min, + unsigned pd_even) +{ + unsigned post_div = vco_freq / target_freq; + + /* Adjust to post divider minimum value */ + if (post_div < pd_min) + post_div = pd_min; + + /* We alway need a frequency less than or equal the target */ + if ((vco_freq / post_div) > target_freq) + post_div += 1; + + /* Post dividers above a certain value must be even */ + if (post_div > pd_even && post_div % 2) + post_div += 1; + + return post_div; +} + +/** + * si_calc_upll_dividers - calc UPLL clock dividers + * + * @adev: amdgpu_device pointer + * @vclk: wanted VCLK + * @dclk: wanted DCLK + * @vco_min: minimum VCO frequency + * @vco_max: maximum VCO frequency + * @fb_factor: factor to multiply vco freq with + * @fb_mask: limit and bitmask for feedback divider + * @pd_min: post divider minimum + * @pd_max: post divider maximum + * @pd_even: post divider must be even above this value + * @optimal_fb_div: resulting feedback divider + * @optimal_vclk_div: resulting vclk post divider + * @optimal_dclk_div: resulting dclk post divider + * + * Calculate dividers for UVDs UPLL (except APUs). + * Returns zero on success; -EINVAL on error. + */ +static int si_calc_upll_dividers(struct amdgpu_device *adev, + unsigned vclk, unsigned dclk, + unsigned vco_min, unsigned vco_max, + unsigned fb_factor, unsigned fb_mask, + unsigned pd_min, unsigned pd_max, + unsigned pd_even, + unsigned *optimal_fb_div, + unsigned *optimal_vclk_div, + unsigned *optimal_dclk_div) +{ + unsigned vco_freq, ref_freq = adev->clock.spll.reference_freq; + + /* Start off with something large */ + unsigned optimal_score = ~0; + + /* Loop through vco from low to high */ + vco_min = max(max(vco_min, vclk), dclk); + for (vco_freq = vco_min; vco_freq <= vco_max; vco_freq += 100) { + uint64_t fb_div = (uint64_t)vco_freq * fb_factor; + unsigned vclk_div, dclk_div, score; + + do_div(fb_div, ref_freq); + + /* fb div out of range ? */ + if (fb_div > fb_mask) + break; /* It can oly get worse */ + + fb_div &= fb_mask; + + /* Calc vclk divider with current vco freq */ + vclk_div = si_uvd_calc_upll_post_div(vco_freq, vclk, + pd_min, pd_even); + if (vclk_div > pd_max) + break; /* vco is too big, it has to stop */ + + /* Calc dclk divider with current vco freq */ + dclk_div = si_uvd_calc_upll_post_div(vco_freq, dclk, + pd_min, pd_even); + if (dclk_div > pd_max) + break; /* vco is too big, it has to stop */ + + /* Calc score with current vco freq */ + score = vclk - (vco_freq / vclk_div) + dclk - (vco_freq / dclk_div); + + /* Determine if this vco setting is better than current optimal settings */ + if (score < optimal_score) { + *optimal_fb_div = fb_div; + *optimal_vclk_div = vclk_div; + *optimal_dclk_div = dclk_div; + optimal_score = score; + if (optimal_score == 0) + break; /* It can't get better than this */ + } + } + + /* Did we found a valid setup ? */ + if (optimal_score == ~0) + return -EINVAL; + + return 0; +} + +static int si_set_uvd_clocks(struct amdgpu_device *adev, u32 vclk, u32 dclk) +{ + unsigned fb_div = 0, vclk_div = 0, dclk_div = 0; + int r; + + /* Bypass vclk and dclk with bclk */ + WREG32_P(CG_UPLL_FUNC_CNTL_2, + VCLK_SRC_SEL(1) | DCLK_SRC_SEL(1), + ~(VCLK_SRC_SEL_MASK | DCLK_SRC_SEL_MASK)); + + /* Put PLL in bypass mode */ + WREG32_P(CG_UPLL_FUNC_CNTL, UPLL_BYPASS_EN_MASK, ~UPLL_BYPASS_EN_MASK); + + if (!vclk || !dclk) { + /* Keep the Bypass mode */ + return 0; + } + + r = si_calc_upll_dividers(adev, vclk, dclk, 125000, 250000, + 16384, 0x03FFFFFF, 0, 128, 5, + &fb_div, &vclk_div, &dclk_div); + if (r) + return r; + + /* Set RESET_ANTI_MUX to 0 */ + WREG32_P(CG_UPLL_FUNC_CNTL_5, 0, ~RESET_ANTI_MUX_MASK); + + /* Set VCO_MODE to 1 */ + WREG32_P(CG_UPLL_FUNC_CNTL, UPLL_VCO_MODE_MASK, ~UPLL_VCO_MODE_MASK); + + /* Disable sleep mode */ + WREG32_P(CG_UPLL_FUNC_CNTL, 0, ~UPLL_SLEEP_MASK); + + /* Deassert UPLL_RESET */ + WREG32_P(CG_UPLL_FUNC_CNTL, 0, ~UPLL_RESET_MASK); + + mdelay(1); + + r = si_uvd_send_upll_ctlreq(adev, CG_UPLL_FUNC_CNTL); + if (r) + return r; + + /* Assert UPLL_RESET again */ + WREG32_P(CG_UPLL_FUNC_CNTL, UPLL_RESET_MASK, ~UPLL_RESET_MASK); + + /* Disable spread spectrum. */ + WREG32_P(CG_UPLL_SPREAD_SPECTRUM, 0, ~SSEN_MASK); + + /* Set feedback divider */ + WREG32_P(CG_UPLL_FUNC_CNTL_3, UPLL_FB_DIV(fb_div), ~UPLL_FB_DIV_MASK); + + /* Set ref divider to 0 */ + WREG32_P(CG_UPLL_FUNC_CNTL, 0, ~UPLL_REF_DIV_MASK); + + if (fb_div < 307200) + WREG32_P(CG_UPLL_FUNC_CNTL_4, 0, ~UPLL_SPARE_ISPARE9); + else + WREG32_P(CG_UPLL_FUNC_CNTL_4, + UPLL_SPARE_ISPARE9, + ~UPLL_SPARE_ISPARE9); + + /* Set PDIV_A and PDIV_B */ + WREG32_P(CG_UPLL_FUNC_CNTL_2, + UPLL_PDIV_A(vclk_div) | UPLL_PDIV_B(dclk_div), + ~(UPLL_PDIV_A_MASK | UPLL_PDIV_B_MASK)); + + /* Give the PLL some time to settle */ + mdelay(15); + + /* Deassert PLL_RESET */ + WREG32_P(CG_UPLL_FUNC_CNTL, 0, ~UPLL_RESET_MASK); + + mdelay(15); + + /* Switch from bypass mode to normal mode */ + WREG32_P(CG_UPLL_FUNC_CNTL, 0, ~UPLL_BYPASS_EN_MASK); + + r = si_uvd_send_upll_ctlreq(adev, CG_UPLL_FUNC_CNTL); + if (r) + return r; + + /* Switch VCLK and DCLK selection */ + WREG32_P(CG_UPLL_FUNC_CNTL_2, + VCLK_SRC_SEL(2) | DCLK_SRC_SEL(2), + ~(VCLK_SRC_SEL_MASK | DCLK_SRC_SEL_MASK)); + + mdelay(100); + + return 0; +} + +static int si_vce_send_vcepll_ctlreq(struct amdgpu_device *adev) +{ + unsigned i; + + /* Make sure VCEPLL_CTLREQ is deasserted */ + WREG32_SMC_P(CG_VCEPLL_FUNC_CNTL, 0, ~UPLL_CTLREQ_MASK); + + mdelay(10); + + /* Assert UPLL_CTLREQ */ + WREG32_SMC_P(CG_VCEPLL_FUNC_CNTL, UPLL_CTLREQ_MASK, ~UPLL_CTLREQ_MASK); + + /* Wait for CTLACK and CTLACK2 to get asserted */ + for (i = 0; i < SI_MAX_CTLACKS_ASSERTION_WAIT; ++i) { + uint32_t mask = UPLL_CTLACK_MASK | UPLL_CTLACK2_MASK; + + if ((RREG32_SMC(CG_VCEPLL_FUNC_CNTL) & mask) == mask) + break; + mdelay(10); + } + + /* Deassert UPLL_CTLREQ */ + WREG32_SMC_P(CG_VCEPLL_FUNC_CNTL, 0, ~UPLL_CTLREQ_MASK); + + if (i == SI_MAX_CTLACKS_ASSERTION_WAIT) { + DRM_ERROR("Timeout setting UVD clocks!\n"); + return -ETIMEDOUT; + } + + return 0; +} + +static int si_set_vce_clocks(struct amdgpu_device *adev, u32 evclk, u32 ecclk) +{ + unsigned fb_div = 0, evclk_div = 0, ecclk_div = 0; + int r; + + /* Bypass evclk and ecclk with bclk */ + WREG32_SMC_P(CG_VCEPLL_FUNC_CNTL_2, + EVCLK_SRC_SEL(1) | ECCLK_SRC_SEL(1), + ~(EVCLK_SRC_SEL_MASK | ECCLK_SRC_SEL_MASK)); + + /* Put PLL in bypass mode */ + WREG32_SMC_P(CG_VCEPLL_FUNC_CNTL, VCEPLL_BYPASS_EN_MASK, + ~VCEPLL_BYPASS_EN_MASK); + + if (!evclk || !ecclk) { + /* Keep the Bypass mode, put PLL to sleep */ + WREG32_SMC_P(CG_VCEPLL_FUNC_CNTL, VCEPLL_SLEEP_MASK, + ~VCEPLL_SLEEP_MASK); + return 0; + } + + r = si_calc_upll_dividers(adev, evclk, ecclk, 125000, 250000, + 16384, 0x03FFFFFF, 0, 128, 5, + &fb_div, &evclk_div, &ecclk_div); + if (r) + return r; + + /* Set RESET_ANTI_MUX to 0 */ + WREG32_SMC_P(CG_VCEPLL_FUNC_CNTL_5, 0, ~RESET_ANTI_MUX_MASK); + + /* Set VCO_MODE to 1 */ + WREG32_SMC_P(CG_VCEPLL_FUNC_CNTL, VCEPLL_VCO_MODE_MASK, + ~VCEPLL_VCO_MODE_MASK); + + /* Toggle VCEPLL_SLEEP to 1 then back to 0 */ + WREG32_SMC_P(CG_VCEPLL_FUNC_CNTL, VCEPLL_SLEEP_MASK, + ~VCEPLL_SLEEP_MASK); + WREG32_SMC_P(CG_VCEPLL_FUNC_CNTL, 0, ~VCEPLL_SLEEP_MASK); + + /* Deassert VCEPLL_RESET */ + WREG32_SMC_P(CG_VCEPLL_FUNC_CNTL, 0, ~VCEPLL_RESET_MASK); + + mdelay(1); + + r = si_vce_send_vcepll_ctlreq(adev); + if (r) + return r; + + /* Assert VCEPLL_RESET again */ + WREG32_SMC_P(CG_VCEPLL_FUNC_CNTL, VCEPLL_RESET_MASK, ~VCEPLL_RESET_MASK); + + /* Disable spread spectrum. */ + WREG32_SMC_P(CG_VCEPLL_SPREAD_SPECTRUM, 0, ~SSEN_MASK); + + /* Set feedback divider */ + WREG32_SMC_P(CG_VCEPLL_FUNC_CNTL_3, + VCEPLL_FB_DIV(fb_div), + ~VCEPLL_FB_DIV_MASK); + + /* Set ref divider to 0 */ + WREG32_SMC_P(CG_VCEPLL_FUNC_CNTL, 0, ~VCEPLL_REF_DIV_MASK); + + /* Set PDIV_A and PDIV_B */ + WREG32_SMC_P(CG_VCEPLL_FUNC_CNTL_2, + VCEPLL_PDIV_A(evclk_div) | VCEPLL_PDIV_B(ecclk_div), + ~(VCEPLL_PDIV_A_MASK | VCEPLL_PDIV_B_MASK)); + + /* Give the PLL some time to settle */ + mdelay(15); + + /* Deassert PLL_RESET */ + WREG32_SMC_P(CG_VCEPLL_FUNC_CNTL, 0, ~VCEPLL_RESET_MASK); + + mdelay(15); + + /* Switch from bypass mode to normal mode */ + WREG32_SMC_P(CG_VCEPLL_FUNC_CNTL, 0, ~VCEPLL_BYPASS_EN_MASK); + + r = si_vce_send_vcepll_ctlreq(adev); + if (r) + return r; + + /* Switch VCLK and DCLK selection */ + WREG32_SMC_P(CG_VCEPLL_FUNC_CNTL_2, + EVCLK_SRC_SEL(16) | ECCLK_SRC_SEL(16), + ~(EVCLK_SRC_SEL_MASK | ECCLK_SRC_SEL_MASK)); + + mdelay(100); + + return 0; +} + static const struct amdgpu_asic_funcs si_asic_funcs = { .read_disabled_bios = &si_read_disabled_bios, @@ -1438,7 +1789,7 @@ static const struct amdgpu_asic_funcs si_asic_funcs = .set_vga_state = &si_vga_set_state, .get_xclk = &si_get_xclk, .set_uvd_clocks = &si_set_uvd_clocks, - .set_vce_clocks = NULL, + .set_vce_clocks = &si_set_vce_clocks, .get_pcie_lanes = &si_get_pcie_lanes, .set_pcie_lanes = &si_set_pcie_lanes, .get_config_memsize = &si_get_config_memsize, diff --git a/drivers/gpu/drm/amd/amdgpu/si_dpm.c b/drivers/gpu/drm/amd/amdgpu/si_dpm.c index c00ba4b23c9a..ea914b256ebd 100644 --- a/drivers/gpu/drm/amd/amdgpu/si_dpm.c +++ b/drivers/gpu/drm/amd/amdgpu/si_dpm.c @@ -6953,6 +6953,24 @@ static int si_power_control_set_level(struct amdgpu_device *adev) return 0; } +static void si_set_vce_clock(struct amdgpu_device *adev, + struct amdgpu_ps *new_rps, + struct amdgpu_ps *old_rps) +{ + if ((old_rps->evclk != new_rps->evclk) || + (old_rps->ecclk != new_rps->ecclk)) { + /* Turn the clocks on when encoding, off otherwise */ + if (new_rps->evclk || new_rps->ecclk) { + /* Place holder for future VCE1.0 porting to amdgpu + vce_v1_0_enable_mgcg(adev, false, false);*/ + } else { + /* Place holder for future VCE1.0 porting to amdgpu + vce_v1_0_enable_mgcg(adev, true, false); + amdgpu_asic_set_vce_clocks(adev, new_rps->evclk, new_rps->ecclk);*/ + } + } +} + static int si_dpm_set_power_state(void *handle) { struct amdgpu_device *adev = (struct amdgpu_device *)handle; @@ -7029,6 +7047,7 @@ static int si_dpm_set_power_state(void *handle) return ret; } ni_set_uvd_clock_after_set_eng_clock(adev, new_ps, old_ps); + si_set_vce_clock(adev, new_ps, old_ps); if (eg_pi->pcie_performance_request) si_notify_link_speed_change_after_state_change(adev, new_ps, old_ps); ret = si_set_power_state_conditionally_enable_ulv(adev, new_ps); diff --git a/drivers/gpu/drm/amd/amdgpu/si_enums.h b/drivers/gpu/drm/amd/amdgpu/si_enums.h index 790ba46eaebb..4e935baa7b91 100644 --- a/drivers/gpu/drm/amd/amdgpu/si_enums.h +++ b/drivers/gpu/drm/amd/amdgpu/si_enums.h @@ -121,7 +121,6 @@ #define CURSOR_UPDATE_LOCK (1 << 16) #define CURSOR_DISABLE_MULTIPLE_UPDATE (1 << 24) -#define AMDGPU_NUM_OF_VMIDS 8 #define SI_CRTC0_REGISTER_OFFSET 0 #define SI_CRTC1_REGISTER_OFFSET 0x300 #define SI_CRTC2_REGISTER_OFFSET 0x2600 diff --git a/drivers/gpu/drm/amd/amdgpu/sid.h b/drivers/gpu/drm/amd/amdgpu/sid.h index 5f660f0c819f..9a39cbfe6db9 100644 --- a/drivers/gpu/drm/amd/amdgpu/sid.h +++ b/drivers/gpu/drm/amd/amdgpu/sid.h @@ -47,8 +47,7 @@ #define SI_MAX_LDS_NUM 0xFFFF #define SI_MAX_TCC 16 #define SI_MAX_TCC_MASK 0xFFFF - -#define AMDGPU_NUM_OF_VMIDS 8 +#define SI_MAX_CTLACKS_ASSERTION_WAIT 100 /* SMC IND accessor regs */ #define SMC_IND_INDEX_0 0x80 @@ -2460,4 +2459,36 @@ #define MC_VM_FB_OFFSET 0x81a +/* Discrete VCE clocks */ +#define CG_VCEPLL_FUNC_CNTL 0xc0030600 +#define VCEPLL_RESET_MASK 0x00000001 +#define VCEPLL_SLEEP_MASK 0x00000002 +#define VCEPLL_BYPASS_EN_MASK 0x00000004 +#define VCEPLL_CTLREQ_MASK 0x00000008 +#define VCEPLL_VCO_MODE_MASK 0x00000600 +#define VCEPLL_REF_DIV_MASK 0x003F0000 +#define VCEPLL_CTLACK_MASK 0x40000000 +#define VCEPLL_CTLACK2_MASK 0x80000000 + +#define CG_VCEPLL_FUNC_CNTL_2 0xc0030601 +#define VCEPLL_PDIV_A(x) ((x) << 0) +#define VCEPLL_PDIV_A_MASK 0x0000007F +#define VCEPLL_PDIV_B(x) ((x) << 8) +#define VCEPLL_PDIV_B_MASK 0x00007F00 +#define EVCLK_SRC_SEL(x) ((x) << 20) +#define EVCLK_SRC_SEL_MASK 0x01F00000 +#define ECCLK_SRC_SEL(x) ((x) << 25) +#define ECCLK_SRC_SEL_MASK 0x3E000000 + +#define CG_VCEPLL_FUNC_CNTL_3 0xc0030602 +#define VCEPLL_FB_DIV(x) ((x) << 0) +#define VCEPLL_FB_DIV_MASK 0x01FFFFFF + +#define CG_VCEPLL_FUNC_CNTL_4 0xc0030603 + +#define CG_VCEPLL_FUNC_CNTL_5 0xc0030604 +#define CG_VCEPLL_SPREAD_SPECTRUM 0xc0030606 +#define VCEPLL_SSEN_MASK 0x00000001 + + #endif diff --git a/drivers/gpu/drm/amd/amdgpu/soc15.c b/drivers/gpu/drm/amd/amdgpu/soc15.c index 3e406eeeaff6..84d811b6e48b 100644 --- a/drivers/gpu/drm/amd/amdgpu/soc15.c +++ b/drivers/gpu/drm/amd/amdgpu/soc15.c @@ -532,6 +532,15 @@ soc15_asic_reset_method(struct amdgpu_device *adev) bool baco_reset = false; struct amdgpu_ras *ras = amdgpu_ras_get_context(adev); + if (amdgpu_reset_method == AMD_RESET_METHOD_MODE1 || + amdgpu_reset_method == AMD_RESET_METHOD_MODE2 || + amdgpu_reset_method == AMD_RESET_METHOD_BACO) + return amdgpu_reset_method; + + if (amdgpu_reset_method != -1) + dev_warn(adev->dev, "Specified reset method:%d isn't supported, using AUTO instead.\n", + amdgpu_reset_method); + switch (adev->asic_type) { case CHIP_RAVEN: case CHIP_RENOIR: @@ -669,7 +678,7 @@ static uint32_t soc15_get_rev_id(struct amdgpu_device *adev) return adev->nbio.funcs->get_rev_id(adev); } -int soc15_set_ip_blocks(struct amdgpu_device *adev) +static void soc15_reg_base_init(struct amdgpu_device *adev) { int r; @@ -681,6 +690,8 @@ int soc15_set_ip_blocks(struct amdgpu_device *adev) vega10_reg_base_init(adev); break; case CHIP_RENOIR: + /* It's safe to do ip discovery here for Renior, + * it doesn't support SRIOV. */ if (amdgpu_discovery) { r = amdgpu_discovery_reg_base_init(adev); if (r) { @@ -697,8 +708,26 @@ int soc15_set_ip_blocks(struct amdgpu_device *adev) arct_reg_base_init(adev); break; default: - return -EINVAL; + DRM_ERROR("Unsupported asic type: %d!\n", adev->asic_type); + break; } +} + +void soc15_set_virt_ops(struct amdgpu_device *adev) +{ + adev->virt.ops = &xgpu_ai_virt_ops; + + /* init soc15 reg base early enough so we can + * request request full access for sriov before + * set_ip_blocks. */ + soc15_reg_base_init(adev); +} + +int soc15_set_ip_blocks(struct amdgpu_device *adev) +{ + /* for bare metal case */ + if (!amdgpu_sriov_vf(adev)) + soc15_reg_base_init(adev); if (adev->asic_type == CHIP_VEGA20 || adev->asic_type == CHIP_ARCTURUS) adev->gmc.xgmi.supported = true; @@ -722,9 +751,6 @@ int soc15_set_ip_blocks(struct amdgpu_device *adev) adev->rev_id = soc15_get_rev_id(adev); - if (amdgpu_sriov_vf(adev)) - adev->virt.ops = &xgpu_ai_virt_ops; - switch (adev->asic_type) { case CHIP_VEGA10: case CHIP_VEGA12: diff --git a/drivers/gpu/drm/amd/amdgpu/soc15.h b/drivers/gpu/drm/amd/amdgpu/soc15.h index b03f950c486c..8f38f047265b 100644 --- a/drivers/gpu/drm/amd/amdgpu/soc15.h +++ b/drivers/gpu/drm/amd/amdgpu/soc15.h @@ -90,6 +90,7 @@ struct soc15_ras_field_entry { void soc15_grbm_select(struct amdgpu_device *adev, u32 me, u32 pipe, u32 queue, u32 vmid); +void soc15_set_virt_ops(struct amdgpu_device *adev); int soc15_set_ip_blocks(struct amdgpu_device *adev); void soc15_program_register_sequence(struct amdgpu_device *adev, diff --git a/drivers/gpu/drm/amd/amdgpu/uvd_v7_0.c b/drivers/gpu/drm/amd/amdgpu/uvd_v7_0.c index 7a55457e6f9e..e07e3fae99b5 100644 --- a/drivers/gpu/drm/amd/amdgpu/uvd_v7_0.c +++ b/drivers/gpu/drm/amd/amdgpu/uvd_v7_0.c @@ -1375,7 +1375,7 @@ static void uvd_v7_0_ring_emit_vm_flush(struct amdgpu_ring *ring, pd_addr = amdgpu_gmc_emit_flush_gpu_tlb(ring, vmid, pd_addr); /* wait for reg writes */ - data0 = hub->ctx0_ptb_addr_lo32 + vmid * 2; + data0 = hub->ctx0_ptb_addr_lo32 + vmid * hub->ctx_addr_distance; data1 = lower_32_bits(pd_addr); mask = 0xffffffff; uvd_v7_0_ring_emit_reg_wait(ring, data0, data1, mask); @@ -1417,7 +1417,8 @@ static void uvd_v7_0_enc_ring_emit_vm_flush(struct amdgpu_ring *ring, pd_addr = amdgpu_gmc_emit_flush_gpu_tlb(ring, vmid, pd_addr); /* wait for reg writes */ - uvd_v7_0_enc_ring_emit_reg_wait(ring, hub->ctx0_ptb_addr_lo32 + vmid * 2, + uvd_v7_0_enc_ring_emit_reg_wait(ring, hub->ctx0_ptb_addr_lo32 + + vmid * hub->ctx_addr_distance, lower_32_bits(pd_addr), 0xffffffff); } diff --git a/drivers/gpu/drm/amd/amdgpu/vce_v4_0.c b/drivers/gpu/drm/amd/amdgpu/vce_v4_0.c index a0fb119240f4..37fa163393fd 100644 --- a/drivers/gpu/drm/amd/amdgpu/vce_v4_0.c +++ b/drivers/gpu/drm/amd/amdgpu/vce_v4_0.c @@ -991,7 +991,8 @@ static void vce_v4_0_emit_vm_flush(struct amdgpu_ring *ring, pd_addr = amdgpu_gmc_emit_flush_gpu_tlb(ring, vmid, pd_addr); /* wait for reg writes */ - vce_v4_0_emit_reg_wait(ring, hub->ctx0_ptb_addr_lo32 + vmid * 2, + vce_v4_0_emit_reg_wait(ring, hub->ctx0_ptb_addr_lo32 + + vmid * hub->ctx_addr_distance, lower_32_bits(pd_addr), 0xffffffff); } diff --git a/drivers/gpu/drm/amd/amdgpu/vcn_v1_0.c b/drivers/gpu/drm/amd/amdgpu/vcn_v1_0.c index 6dcc3ce0c00a..927c330fad21 100644 --- a/drivers/gpu/drm/amd/amdgpu/vcn_v1_0.c +++ b/drivers/gpu/drm/amd/amdgpu/vcn_v1_0.c @@ -1539,7 +1539,7 @@ static void vcn_v1_0_dec_ring_emit_vm_flush(struct amdgpu_ring *ring, pd_addr = amdgpu_gmc_emit_flush_gpu_tlb(ring, vmid, pd_addr); /* wait for register write */ - data0 = hub->ctx0_ptb_addr_lo32 + vmid * 2; + data0 = hub->ctx0_ptb_addr_lo32 + vmid * hub->ctx_addr_distance; data1 = lower_32_bits(pd_addr); mask = 0xffffffff; vcn_v1_0_dec_ring_emit_reg_wait(ring, data0, data1, mask); @@ -1679,7 +1679,8 @@ static void vcn_v1_0_enc_ring_emit_vm_flush(struct amdgpu_ring *ring, pd_addr = amdgpu_gmc_emit_flush_gpu_tlb(ring, vmid, pd_addr); /* wait for reg writes */ - vcn_v1_0_enc_ring_emit_reg_wait(ring, hub->ctx0_ptb_addr_lo32 + vmid * 2, + vcn_v1_0_enc_ring_emit_reg_wait(ring, hub->ctx0_ptb_addr_lo32 + + vmid * hub->ctx_addr_distance, lower_32_bits(pd_addr), 0xffffffff); } diff --git a/drivers/gpu/drm/amd/amdgpu/vcn_v2_0.c b/drivers/gpu/drm/amd/amdgpu/vcn_v2_0.c index c0e4133a6dd5..23a9eb5b2c8a 100644 --- a/drivers/gpu/drm/amd/amdgpu/vcn_v2_0.c +++ b/drivers/gpu/drm/amd/amdgpu/vcn_v2_0.c @@ -1505,7 +1505,7 @@ void vcn_v2_0_dec_ring_emit_vm_flush(struct amdgpu_ring *ring, pd_addr = amdgpu_gmc_emit_flush_gpu_tlb(ring, vmid, pd_addr); /* wait for register write */ - data0 = hub->ctx0_ptb_addr_lo32 + vmid * 2; + data0 = hub->ctx0_ptb_addr_lo32 + vmid * hub->ctx_addr_distance; data1 = lower_32_bits(pd_addr); mask = 0xffffffff; vcn_v2_0_dec_ring_emit_reg_wait(ring, data0, data1, mask); @@ -1660,7 +1660,8 @@ void vcn_v2_0_enc_ring_emit_vm_flush(struct amdgpu_ring *ring, pd_addr = amdgpu_gmc_emit_flush_gpu_tlb(ring, vmid, pd_addr); /* wait for reg writes */ - vcn_v2_0_enc_ring_emit_reg_wait(ring, hub->ctx0_ptb_addr_lo32 + vmid * 2, + vcn_v2_0_enc_ring_emit_reg_wait(ring, hub->ctx0_ptb_addr_lo32 + + vmid * hub->ctx_addr_distance, lower_32_bits(pd_addr), 0xffffffff); } diff --git a/drivers/gpu/drm/amd/amdgpu/vcn_v3_0.c b/drivers/gpu/drm/amd/amdgpu/vcn_v3_0.c index 90fe95f345e3..910a4a32ff78 100644 --- a/drivers/gpu/drm/amd/amdgpu/vcn_v3_0.c +++ b/drivers/gpu/drm/amd/amdgpu/vcn_v3_0.c @@ -28,6 +28,7 @@ #include "soc15.h" #include "soc15d.h" #include "vcn_v2_0.h" +#include "mmsch_v3_0.h" #include "vcn/vcn_3_0_0_offset.h" #include "vcn/vcn_3_0_0_sh_mask.h" @@ -48,6 +49,17 @@ #define VCN_INSTANCES_SIENNA_CICHLID 2 +static int amdgpu_ih_clientid_vcns[] = { + SOC15_IH_CLIENTID_VCN, + SOC15_IH_CLIENTID_VCN1 +}; + +static int amdgpu_ucode_id_vcns[] = { + AMDGPU_UCODE_ID_VCN, + AMDGPU_UCODE_ID_VCN1 +}; + +static int vcn_v3_0_start_sriov(struct amdgpu_device *adev); static void vcn_v3_0_set_dec_ring_funcs(struct amdgpu_device *adev); static void vcn_v3_0_set_enc_ring_funcs(struct amdgpu_device *adev); static void vcn_v3_0_set_irq_funcs(struct amdgpu_device *adev); @@ -56,10 +68,8 @@ static int vcn_v3_0_set_powergating_state(void *handle, static int vcn_v3_0_pause_dpg_mode(struct amdgpu_device *adev, int inst_idx, struct dpg_pause_state *new_state); -static int amdgpu_ih_clientid_vcns[] = { - SOC15_IH_CLIENTID_VCN, - SOC15_IH_CLIENTID_VCN1 -}; +static void vcn_v3_0_dec_ring_set_wptr(struct amdgpu_ring *ring); +static void vcn_v3_0_enc_ring_set_wptr(struct amdgpu_ring *ring); /** * vcn_v3_0_early_init - set function pointers @@ -71,25 +81,33 @@ static int amdgpu_ih_clientid_vcns[] = { static int vcn_v3_0_early_init(void *handle) { struct amdgpu_device *adev = (struct amdgpu_device *)handle; - if (adev->asic_type == CHIP_SIENNA_CICHLID) { - u32 harvest; - int i; + if (amdgpu_sriov_vf(adev)) { adev->vcn.num_vcn_inst = VCN_INSTANCES_SIENNA_CICHLID; - for (i = 0; i < adev->vcn.num_vcn_inst; i++) { - harvest = RREG32_SOC15(VCN, i, mmCC_UVD_HARVESTING); - if (harvest & CC_UVD_HARVESTING__UVD_DISABLE_MASK) - adev->vcn.harvest_config |= 1 << i; - } + adev->vcn.harvest_config = 0; + adev->vcn.num_enc_rings = 1; + + } else { + if (adev->asic_type == CHIP_SIENNA_CICHLID) { + u32 harvest; + int i; + + adev->vcn.num_vcn_inst = VCN_INSTANCES_SIENNA_CICHLID; + for (i = 0; i < adev->vcn.num_vcn_inst; i++) { + harvest = RREG32_SOC15(VCN, i, mmCC_UVD_HARVESTING); + if (harvest & CC_UVD_HARVESTING__UVD_DISABLE_MASK) + adev->vcn.harvest_config |= 1 << i; + } - if (adev->vcn.harvest_config == (AMDGPU_VCN_HARVEST_VCN0 | - AMDGPU_VCN_HARVEST_VCN1)) - /* both instances are harvested, disable the block */ - return -ENOENT; - } else - adev->vcn.num_vcn_inst = 1; + if (adev->vcn.harvest_config == (AMDGPU_VCN_HARVEST_VCN0 | + AMDGPU_VCN_HARVEST_VCN1)) + /* both instances are harvested, disable the block */ + return -ENOENT; + } else + adev->vcn.num_vcn_inst = 1; - adev->vcn.num_enc_rings = 2; + adev->vcn.num_enc_rings = 2; + } vcn_v3_0_set_dec_ring_funcs(adev); vcn_v3_0_set_enc_ring_funcs(adev); @@ -109,6 +127,7 @@ static int vcn_v3_0_sw_init(void *handle) { struct amdgpu_ring *ring; int i, j, r; + int vcn_doorbell_index = 0; struct amdgpu_device *adev = (struct amdgpu_device *)handle; r = amdgpu_vcn_sw_init(adev); @@ -136,6 +155,12 @@ static int vcn_v3_0_sw_init(void *handle) if (r) return r; + if (amdgpu_sriov_vf(adev)) { + vcn_doorbell_index = adev->doorbell_index.vcn.vcn_ring0_1; + /* get DWORD offset */ + vcn_doorbell_index = vcn_doorbell_index << 1; + } + for (i = 0; i < adev->vcn.num_vcn_inst; i++) { if (adev->vcn.harvest_config & (1 << i)) continue; @@ -166,7 +191,13 @@ static int vcn_v3_0_sw_init(void *handle) ring = &adev->vcn.inst[i].ring_dec; ring->use_doorbell = true; - ring->doorbell_index = (adev->doorbell_index.vcn.vcn_ring0_1 << 1) + 8 * i; + if (amdgpu_sriov_vf(adev)) { + ring->doorbell_index = vcn_doorbell_index; + /* NOTE: increment so next VCN engine use next DOORBELL DWORD */ + vcn_doorbell_index++; + } else { + ring->doorbell_index = (adev->doorbell_index.vcn.vcn_ring0_1 << 1) + 8 * i; + } if (i != 0) ring->no_scheduler = true; sprintf(ring->name, "vcn_dec_%d", i); @@ -184,7 +215,13 @@ static int vcn_v3_0_sw_init(void *handle) ring = &adev->vcn.inst[i].ring_enc[j]; ring->use_doorbell = true; - ring->doorbell_index = (adev->doorbell_index.vcn.vcn_ring0_1 << 1) + 2 + j + 8 * i; + if (amdgpu_sriov_vf(adev)) { + ring->doorbell_index = vcn_doorbell_index; + /* NOTE: increment so next VCN engine use next DOORBELL DWORD */ + vcn_doorbell_index++; + } else { + ring->doorbell_index = (adev->doorbell_index.vcn.vcn_ring0_1 << 1) + 2 + j + 8 * i; + } if (i != 1) ring->no_scheduler = true; sprintf(ring->name, "vcn_enc_%d.%d", i, j); @@ -195,6 +232,11 @@ static int vcn_v3_0_sw_init(void *handle) } } + if (amdgpu_sriov_vf(adev)) { + r = amdgpu_virt_alloc_mm_table(adev); + if (r) + return r; + } if (adev->pg_flags & AMD_PG_SUPPORT_VCN_DPG) adev->vcn.pause_dpg_mode = vcn_v3_0_pause_dpg_mode; @@ -213,6 +255,9 @@ static int vcn_v3_0_sw_fini(void *handle) struct amdgpu_device *adev = (struct amdgpu_device *)handle; int r; + if (amdgpu_sriov_vf(adev)) + amdgpu_virt_free_mm_table(adev); + r = amdgpu_vcn_suspend(adev); if (r) return r; @@ -235,24 +280,50 @@ static int vcn_v3_0_hw_init(void *handle) struct amdgpu_ring *ring; int i, j, r; - for (i = 0; i < adev->vcn.num_vcn_inst; ++i) { - if (adev->vcn.harvest_config & (1 << i)) - continue; + if (amdgpu_sriov_vf(adev)) { + r = vcn_v3_0_start_sriov(adev); + if (r) + goto done; - ring = &adev->vcn.inst[i].ring_dec; + /* initialize VCN dec and enc ring buffers */ + for (i = 0; i < adev->vcn.num_vcn_inst; ++i) { + if (adev->vcn.harvest_config & (1 << i)) + continue; + + ring = &adev->vcn.inst[i].ring_dec; + ring->wptr = 0; + ring->wptr_old = 0; + vcn_v3_0_dec_ring_set_wptr(ring); + ring->sched.ready = true; + + for (j = 0; j < adev->vcn.num_enc_rings; ++j) { + ring = &adev->vcn.inst[i].ring_enc[j]; + ring->wptr = 0; + ring->wptr_old = 0; + vcn_v3_0_enc_ring_set_wptr(ring); + ring->sched.ready = true; + } + } + } else { + for (i = 0; i < adev->vcn.num_vcn_inst; ++i) { + if (adev->vcn.harvest_config & (1 << i)) + continue; - adev->nbio.funcs->vcn_doorbell_range(adev, ring->use_doorbell, - ring->doorbell_index, i); + ring = &adev->vcn.inst[i].ring_dec; - r = amdgpu_ring_test_helper(ring); - if (r) - goto done; + adev->nbio.funcs->vcn_doorbell_range(adev, ring->use_doorbell, + ring->doorbell_index, i); - for (j = 0; j < adev->vcn.num_enc_rings; ++j) { - ring = &adev->vcn.inst[i].ring_enc[j]; r = amdgpu_ring_test_helper(ring); if (r) goto done; + + for (j = 0; j < adev->vcn.num_enc_rings; ++j) { + ring = &adev->vcn.inst[i].ring_enc[j]; + r = amdgpu_ring_test_helper(ring); + if (r) + goto done; + } } } @@ -283,11 +354,13 @@ static int vcn_v3_0_hw_fini(void *handle) ring = &adev->vcn.inst[i].ring_dec; - if ((adev->pg_flags & AMD_PG_SUPPORT_VCN_DPG) || - (adev->vcn.cur_state != AMD_PG_STATE_GATE && - RREG32_SOC15(VCN, i, mmUVD_STATUS))) - vcn_v3_0_set_powergating_state(adev, AMD_PG_STATE_GATE); - + if (!amdgpu_sriov_vf(adev)) { + if ((adev->pg_flags & AMD_PG_SUPPORT_VCN_DPG) || + (adev->vcn.cur_state != AMD_PG_STATE_GATE && + RREG32_SOC15(VCN, i, mmUVD_STATUS))) { + vcn_v3_0_set_powergating_state(adev, AMD_PG_STATE_GATE); + } + } ring->sched.ready = false; for (j = 0; j < adev->vcn.num_enc_rings; ++j) { @@ -1137,6 +1210,221 @@ static int vcn_v3_0_start(struct amdgpu_device *adev) return 0; } +static int vcn_v3_0_start_sriov(struct amdgpu_device *adev) +{ + int i, j; + struct amdgpu_ring *ring; + uint64_t cache_addr; + uint64_t rb_addr; + uint64_t ctx_addr; + uint32_t param, resp, expected; + uint32_t offset, cache_size; + uint32_t tmp, timeout; + uint32_t id; + + struct amdgpu_mm_table *table = &adev->virt.mm_table; + uint32_t *table_loc; + uint32_t table_size; + uint32_t size, size_dw; + + struct mmsch_v3_0_cmd_direct_write + direct_wt = { {0} }; + struct mmsch_v3_0_cmd_direct_read_modify_write + direct_rd_mod_wt = { {0} }; + struct mmsch_v3_0_cmd_direct_polling + direct_poll = { {0} }; + struct mmsch_v3_0_cmd_end end = { {0} }; + struct mmsch_v3_0_init_header header; + + direct_wt.cmd_header.command_type = + MMSCH_COMMAND__DIRECT_REG_WRITE; + direct_rd_mod_wt.cmd_header.command_type = + MMSCH_COMMAND__DIRECT_REG_READ_MODIFY_WRITE; + direct_poll.cmd_header.command_type = + MMSCH_COMMAND__DIRECT_REG_POLLING; + end.cmd_header.command_type = + MMSCH_COMMAND__END; + + header.version = MMSCH_VERSION; + header.total_size = sizeof(struct mmsch_v3_0_init_header) >> 2; + for (i = 0; i < AMDGPU_MAX_VCN_INSTANCES; i++) { + header.inst[i].init_status = 0; + header.inst[i].table_offset = 0; + header.inst[i].table_size = 0; + } + + table_loc = (uint32_t *)table->cpu_addr; + table_loc += header.total_size; + for (i = 0; i < adev->vcn.num_vcn_inst; i++) { + if (adev->vcn.harvest_config & (1 << i)) + continue; + + table_size = 0; + + MMSCH_V3_0_INSERT_DIRECT_RD_MOD_WT(SOC15_REG_OFFSET(VCN, i, + mmUVD_STATUS), + ~UVD_STATUS__UVD_BUSY, UVD_STATUS__UVD_BUSY); + + cache_size = AMDGPU_GPU_PAGE_ALIGN(adev->vcn.fw->size + 4); + + if (adev->firmware.load_type == AMDGPU_FW_LOAD_PSP) { + id = amdgpu_ucode_id_vcns[i]; + MMSCH_V3_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCN, i, + mmUVD_LMI_VCPU_CACHE_64BIT_BAR_LOW), + adev->firmware.ucode[id].tmr_mc_addr_lo); + MMSCH_V3_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCN, i, + mmUVD_LMI_VCPU_CACHE_64BIT_BAR_HIGH), + adev->firmware.ucode[id].tmr_mc_addr_hi); + offset = 0; + MMSCH_V3_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCN, i, + mmUVD_VCPU_CACHE_OFFSET0), + 0); + } else { + MMSCH_V3_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCN, i, + mmUVD_LMI_VCPU_CACHE_64BIT_BAR_LOW), + lower_32_bits(adev->vcn.inst[i].gpu_addr)); + MMSCH_V3_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCN, i, + mmUVD_LMI_VCPU_CACHE_64BIT_BAR_HIGH), + upper_32_bits(adev->vcn.inst[i].gpu_addr)); + offset = cache_size; + MMSCH_V3_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCN, i, + mmUVD_VCPU_CACHE_OFFSET0), + AMDGPU_UVD_FIRMWARE_OFFSET >> 3); + } + + MMSCH_V3_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCN, i, + mmUVD_VCPU_CACHE_SIZE0), + cache_size); + + cache_addr = adev->vcn.inst[i].gpu_addr + offset; + MMSCH_V3_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCN, i, + mmUVD_LMI_VCPU_CACHE1_64BIT_BAR_LOW), + lower_32_bits(cache_addr)); + MMSCH_V3_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCN, i, + mmUVD_LMI_VCPU_CACHE1_64BIT_BAR_HIGH), + upper_32_bits(cache_addr)); + MMSCH_V3_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCN, i, + mmUVD_VCPU_CACHE_OFFSET1), + 0); + MMSCH_V3_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCN, i, + mmUVD_VCPU_CACHE_SIZE1), + AMDGPU_VCN_STACK_SIZE); + + cache_addr = adev->vcn.inst[i].gpu_addr + offset + + AMDGPU_VCN_STACK_SIZE; + MMSCH_V3_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCN, i, + mmUVD_LMI_VCPU_CACHE2_64BIT_BAR_LOW), + lower_32_bits(cache_addr)); + MMSCH_V3_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCN, i, + mmUVD_LMI_VCPU_CACHE2_64BIT_BAR_HIGH), + upper_32_bits(cache_addr)); + MMSCH_V3_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCN, i, + mmUVD_VCPU_CACHE_OFFSET2), + 0); + MMSCH_V3_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCN, i, + mmUVD_VCPU_CACHE_SIZE2), + AMDGPU_VCN_CONTEXT_SIZE); + + for (j = 0; j < adev->vcn.num_enc_rings; ++j) { + ring = &adev->vcn.inst[i].ring_enc[j]; + ring->wptr = 0; + rb_addr = ring->gpu_addr; + MMSCH_V3_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCN, i, + mmUVD_RB_BASE_LO), + lower_32_bits(rb_addr)); + MMSCH_V3_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCN, i, + mmUVD_RB_BASE_HI), + upper_32_bits(rb_addr)); + MMSCH_V3_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCN, i, + mmUVD_RB_SIZE), + ring->ring_size / 4); + } + + ring = &adev->vcn.inst[i].ring_dec; + ring->wptr = 0; + rb_addr = ring->gpu_addr; + MMSCH_V3_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCN, i, + mmUVD_LMI_RBC_RB_64BIT_BAR_LOW), + lower_32_bits(rb_addr)); + MMSCH_V3_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCN, i, + mmUVD_LMI_RBC_RB_64BIT_BAR_HIGH), + upper_32_bits(rb_addr)); + /* force RBC into idle state */ + tmp = order_base_2(ring->ring_size); + tmp = REG_SET_FIELD(0, UVD_RBC_RB_CNTL, RB_BUFSZ, tmp); + tmp = REG_SET_FIELD(tmp, UVD_RBC_RB_CNTL, RB_BLKSZ, 1); + tmp = REG_SET_FIELD(tmp, UVD_RBC_RB_CNTL, RB_NO_FETCH, 1); + tmp = REG_SET_FIELD(tmp, UVD_RBC_RB_CNTL, RB_NO_UPDATE, 1); + tmp = REG_SET_FIELD(tmp, UVD_RBC_RB_CNTL, RB_RPTR_WR_EN, 1); + MMSCH_V3_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCN, i, + mmUVD_RBC_RB_CNTL), + tmp); + + /* add end packet */ + MMSCH_V3_0_INSERT_END(); + + /* refine header */ + header.inst[i].init_status = 1; + header.inst[i].table_offset = header.total_size; + header.inst[i].table_size = table_size; + header.total_size += table_size; + } + + /* Update init table header in memory */ + size = sizeof(struct mmsch_v3_0_init_header); + table_loc = (uint32_t *)table->cpu_addr; + memcpy((void *)table_loc, &header, size); + + /* message MMSCH (in VCN[0]) to initialize this client + * 1, write to mmsch_vf_ctx_addr_lo/hi register with GPU mc addr + * of memory descriptor location + */ + ctx_addr = table->gpu_addr; + WREG32_SOC15(VCN, 0, mmMMSCH_VF_CTX_ADDR_LO, lower_32_bits(ctx_addr)); + WREG32_SOC15(VCN, 0, mmMMSCH_VF_CTX_ADDR_HI, upper_32_bits(ctx_addr)); + + /* 2, update vmid of descriptor */ + tmp = RREG32_SOC15(VCN, 0, mmMMSCH_VF_VMID); + tmp &= ~MMSCH_VF_VMID__VF_CTX_VMID_MASK; + /* use domain0 for MM scheduler */ + tmp |= (0 << MMSCH_VF_VMID__VF_CTX_VMID__SHIFT); + WREG32_SOC15(VCN, 0, mmMMSCH_VF_VMID, tmp); + + /* 3, notify mmsch about the size of this descriptor */ + size = header.total_size; + WREG32_SOC15(VCN, 0, mmMMSCH_VF_CTX_SIZE, size); + + /* 4, set resp to zero */ + WREG32_SOC15(VCN, 0, mmMMSCH_VF_MAILBOX_RESP, 0); + + /* 5, kick off the initialization and wait until + * MMSCH_VF_MAILBOX_RESP becomes non-zero + */ + param = 0x10000001; + WREG32_SOC15(VCN, 0, mmMMSCH_VF_MAILBOX_HOST, param); + tmp = 0; + timeout = 1000; + resp = 0; + expected = param + 1; + while (resp != expected) { + resp = RREG32_SOC15(VCN, 0, mmMMSCH_VF_MAILBOX_RESP); + if (resp == expected) + break; + + udelay(10); + tmp = tmp + 10; + if (tmp >= timeout) { + DRM_ERROR("failed to init MMSCH. TIME-OUT after %d usec"\ + " waiting for mmMMSCH_VF_MAILBOX_RESP "\ + "(expected=0x%08x, readback=0x%08x)\n", + tmp, expected, resp); + return -EBUSY; + } + } + + return 0; +} + static int vcn_v3_0_stop_dpg_mode(struct amdgpu_device *adev, int inst_idx) { uint32_t tmp; @@ -1575,6 +1863,15 @@ static int vcn_v3_0_set_powergating_state(void *handle, struct amdgpu_device *adev = (struct amdgpu_device *)handle; int ret; + /* for SRIOV, guest should not control VCN Power-gating + * MMSCH FW should control Power-gating and clock-gating + * guest should avoid touching CGC and PG + */ + if (amdgpu_sriov_vf(adev)) { + adev->vcn.cur_state = AMD_PG_STATE_UNGATE; + return 0; + } + if(state == adev->vcn.cur_state) return 0; diff --git a/drivers/gpu/drm/amd/amdgpu/vi.c b/drivers/gpu/drm/amd/amdgpu/vi.c index af8986a55354..f6f2ed0830b1 100644 --- a/drivers/gpu/drm/amd/amdgpu/vi.c +++ b/drivers/gpu/drm/amd/amdgpu/vi.c @@ -710,6 +710,14 @@ vi_asic_reset_method(struct amdgpu_device *adev) { bool baco_reset; + if (amdgpu_reset_method == AMD_RESET_METHOD_LEGACY || + amdgpu_reset_method == AMD_RESET_METHOD_BACO) + return amdgpu_reset_method; + + if (amdgpu_reset_method != -1) + dev_warn(adev->dev, "Specified reset method:%d isn't supported, using AUTO instead.\n", + amdgpu_reset_method); + switch (adev->asic_type) { case CHIP_FIJI: case CHIP_TONGA: @@ -1705,11 +1713,13 @@ static const struct amdgpu_ip_block_version vi_common_ip_block = .funcs = &vi_common_ip_funcs, }; -int vi_set_ip_blocks(struct amdgpu_device *adev) +void vi_set_virt_ops(struct amdgpu_device *adev) { - if (amdgpu_sriov_vf(adev)) - adev->virt.ops = &xgpu_vi_virt_ops; + adev->virt.ops = &xgpu_vi_virt_ops; +} +int vi_set_ip_blocks(struct amdgpu_device *adev) +{ switch (adev->asic_type) { case CHIP_TOPAZ: /* topaz has no DCE, UVD, VCE */ diff --git a/drivers/gpu/drm/amd/amdgpu/vi.h b/drivers/gpu/drm/amd/amdgpu/vi.h index defb4aaf929a..9718f98f8533 100644 --- a/drivers/gpu/drm/amd/amdgpu/vi.h +++ b/drivers/gpu/drm/amd/amdgpu/vi.h @@ -28,6 +28,7 @@ void vi_srbm_select(struct amdgpu_device *adev, u32 me, u32 pipe, u32 queue, u32 vmid); +void vi_set_virt_ops(struct amdgpu_device *adev); int vi_set_ip_blocks(struct amdgpu_device *adev); void legacy_doorbell_index_init(struct amdgpu_device *adev); diff --git a/drivers/gpu/drm/amd/amdgpu/vid.h b/drivers/gpu/drm/amd/amdgpu/vid.h index 7a01e6133798..80ce42aacc0c 100644 --- a/drivers/gpu/drm/amd/amdgpu/vid.h +++ b/drivers/gpu/drm/amd/amdgpu/vid.h @@ -67,8 +67,6 @@ #define HPD4_REGISTER_OFFSET (0x18b8 - 0x1898) #define HPD5_REGISTER_OFFSET (0x18c0 - 0x1898) -#define AMDGPU_NUM_OF_VMIDS 8 - #define PIPEID(x) ((x) << 0) #define MEID(x) ((x) << 2) #define VMID(x) ((x) << 4) |