diff options
Diffstat (limited to 'drivers/gpu/drm/msm/adreno/a6xx_gpu.c')
-rw-r--r-- | drivers/gpu/drm/msm/adreno/a6xx_gpu.c | 242 |
1 files changed, 195 insertions, 47 deletions
diff --git a/drivers/gpu/drm/msm/adreno/a6xx_gpu.c b/drivers/gpu/drm/msm/adreno/a6xx_gpu.c index 45dd5fd1c2bf..b8f8ae940b55 100644 --- a/drivers/gpu/drm/msm/adreno/a6xx_gpu.c +++ b/drivers/gpu/drm/msm/adreno/a6xx_gpu.c @@ -16,6 +16,97 @@ #define GPU_PAS_ID 13 +static u64 read_gmu_ao_counter(struct a6xx_gpu *a6xx_gpu) +{ + u64 count_hi, count_lo, temp; + + do { + count_hi = gmu_read(&a6xx_gpu->gmu, REG_A6XX_GMU_ALWAYS_ON_COUNTER_H); + count_lo = gmu_read(&a6xx_gpu->gmu, REG_A6XX_GMU_ALWAYS_ON_COUNTER_L); + temp = gmu_read(&a6xx_gpu->gmu, REG_A6XX_GMU_ALWAYS_ON_COUNTER_H); + } while (unlikely(count_hi != temp)); + + return (count_hi << 32) | count_lo; +} + +static bool fence_status_check(struct msm_gpu *gpu, u32 offset, u32 value, u32 status, u32 mask) +{ + /* Success if !writedropped0/1 */ + if (!(status & mask)) + return true; + + udelay(10); + + /* Try to update fenced register again */ + gpu_write(gpu, offset, value); + + /* We can't do a posted write here because the power domain could be + * in collapse state. So use the heaviest barrier instead + */ + mb(); + return false; +} + +static int fenced_write(struct a6xx_gpu *a6xx_gpu, u32 offset, u32 value, u32 mask) +{ + struct adreno_gpu *adreno_gpu = &a6xx_gpu->base; + struct msm_gpu *gpu = &adreno_gpu->base; + struct a6xx_gmu *gmu = &a6xx_gpu->gmu; + u32 status; + + gpu_write(gpu, offset, value); + + /* Nothing else to be done in the case of no-GMU */ + if (adreno_has_gmu_wrapper(adreno_gpu)) + return 0; + + /* We can't do a posted write here because the power domain could be + * in collapse state. So use the heaviest barrier instead + */ + mb(); + + if (!gmu_poll_timeout(gmu, REG_A6XX_GMU_AHB_FENCE_STATUS, status, + fence_status_check(gpu, offset, value, status, mask), 0, 1000)) + return 0; + + /* Try again for another 1ms before failing */ + gpu_write(gpu, offset, value); + mb(); + + if (!gmu_poll_timeout(gmu, REG_A6XX_GMU_AHB_FENCE_STATUS, status, + fence_status_check(gpu, offset, value, status, mask), 0, 1000)) { + /* + * The 'delay' warning is here because the pause to print this + * warning will allow gpu to move to power collapse which + * defeats the purpose of continuous polling for 2 ms + */ + dev_err_ratelimited(gmu->dev, "delay in fenced register write (0x%x)\n", + offset); + return 0; + } + + dev_err_ratelimited(gmu->dev, "fenced register write (0x%x) fail\n", + offset); + + return -ETIMEDOUT; +} + +int a6xx_fenced_write(struct a6xx_gpu *a6xx_gpu, u32 offset, u64 value, u32 mask, bool is_64b) +{ + int ret; + + ret = fenced_write(a6xx_gpu, offset, lower_32_bits(value), mask); + if (ret) + return ret; + + if (!is_64b) + return 0; + + ret = fenced_write(a6xx_gpu, offset + 1, upper_32_bits(value), mask); + + return ret; +} + static inline bool _a6xx_check_idle(struct msm_gpu *gpu) { struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu); @@ -86,7 +177,7 @@ static void a6xx_flush(struct msm_gpu *gpu, struct msm_ringbuffer *ring) /* Update HW if this is the current ring and we are not in preempt*/ if (!a6xx_in_preempt(a6xx_gpu)) { if (a6xx_gpu->cur_ring == ring) - gpu_write(gpu, REG_A6XX_CP_RB_WPTR, wptr); + a6xx_fenced_write(a6xx_gpu, REG_A6XX_CP_RB_WPTR, wptr, BIT(0), false); else ring->restore_wptr = true; } else { @@ -173,8 +264,8 @@ static void a6xx_set_pagetable(struct a6xx_gpu *a6xx_gpu, * Needed for preemption */ OUT_PKT7(ring, CP_MEM_WRITE, 5); - OUT_RING(ring, CP_MEM_WRITE_0_ADDR_LO(lower_32_bits(memptr))); - OUT_RING(ring, CP_MEM_WRITE_1_ADDR_HI(upper_32_bits(memptr))); + OUT_RING(ring, A5XX_CP_MEM_WRITE_ADDR_LO(lower_32_bits(memptr))); + OUT_RING(ring, A5XX_CP_MEM_WRITE_ADDR_HI(upper_32_bits(memptr))); OUT_RING(ring, lower_32_bits(ttbr)); OUT_RING(ring, upper_32_bits(ttbr)); OUT_RING(ring, ctx->seqno); @@ -204,9 +295,9 @@ static void a6xx_set_pagetable(struct a6xx_gpu *a6xx_gpu, */ OUT_PKT7(ring, CP_WAIT_REG_MEM, 6); OUT_RING(ring, CP_WAIT_REG_MEM_0_FUNCTION(WRITE_EQ)); - OUT_RING(ring, CP_WAIT_REG_MEM_1_POLL_ADDR_LO( + OUT_RING(ring, CP_WAIT_REG_MEM_POLL_ADDR_LO( REG_A6XX_RBBM_PERFCTR_SRAM_INIT_STATUS)); - OUT_RING(ring, CP_WAIT_REG_MEM_2_POLL_ADDR_HI(0)); + OUT_RING(ring, CP_WAIT_REG_MEM_POLL_ADDR_HI(0)); OUT_RING(ring, CP_WAIT_REG_MEM_3_REF(0x1)); OUT_RING(ring, CP_WAIT_REG_MEM_4_MASK(0x1)); OUT_RING(ring, CP_WAIT_REG_MEM_5_DELAY_LOOP_CYCLES(0)); @@ -298,8 +389,7 @@ static void a6xx_submit(struct msm_gpu *gpu, struct msm_gem_submit *submit) OUT_RING(ring, upper_32_bits(rbmemptr(ring, fence))); OUT_RING(ring, submit->seqno); - trace_msm_gpu_submit_flush(submit, - gpu_read64(gpu, REG_A6XX_CP_ALWAYS_ON_COUNTER)); + trace_msm_gpu_submit_flush(submit, read_gmu_ao_counter(a6xx_gpu)); a6xx_flush(gpu, ring); } @@ -499,8 +589,7 @@ static void a7xx_submit(struct msm_gpu *gpu, struct msm_gem_submit *submit) } - trace_msm_gpu_submit_flush(submit, - gpu_read64(gpu, REG_A6XX_CP_ALWAYS_ON_COUNTER)); + trace_msm_gpu_submit_flush(submit, read_gmu_ao_counter(a6xx_gpu)); a6xx_flush(gpu, ring); @@ -739,11 +828,10 @@ static void a7xx_patch_pwrup_reglist(struct msm_gpu *gpu) u32 *dest = (u32 *)&lock->regs[0]; int i; - reglist = adreno_gpu->info->a6xx->pwrup_reglist; - lock->gpu_req = lock->cpu_req = lock->turn = 0; - lock->ifpc_list_len = 0; - lock->preemption_list_len = reglist->count; + + reglist = adreno_gpu->info->a6xx->ifpc_reglist; + lock->ifpc_list_len = reglist->count; /* * For each entry in each of the lists, write the offset and the current @@ -754,6 +842,14 @@ static void a7xx_patch_pwrup_reglist(struct msm_gpu *gpu) *dest++ = gpu_read(gpu, reglist->regs[i]); } + reglist = adreno_gpu->info->a6xx->pwrup_reglist; + lock->preemption_list_len = reglist->count; + + for (i = 0; i < reglist->count; i++) { + *dest++ = reglist->regs[i]; + *dest++ = gpu_read(gpu, reglist->regs[i]); + } + /* * The overall register list is composed of * 1. Static IFPC-only registers @@ -1241,14 +1337,14 @@ static int hw_init(struct msm_gpu *gpu) /* Set weights for bicubic filtering */ if (adreno_is_a650_family(adreno_gpu) || adreno_is_x185(adreno_gpu)) { - gpu_write(gpu, REG_A6XX_TPL1_BICUBIC_WEIGHTS_TABLE_0, 0); - gpu_write(gpu, REG_A6XX_TPL1_BICUBIC_WEIGHTS_TABLE_1, + gpu_write(gpu, REG_A6XX_TPL1_BICUBIC_WEIGHTS_TABLE(0), 0); + gpu_write(gpu, REG_A6XX_TPL1_BICUBIC_WEIGHTS_TABLE(1), 0x3fe05ff4); - gpu_write(gpu, REG_A6XX_TPL1_BICUBIC_WEIGHTS_TABLE_2, + gpu_write(gpu, REG_A6XX_TPL1_BICUBIC_WEIGHTS_TABLE(2), 0x3fa0ebee); - gpu_write(gpu, REG_A6XX_TPL1_BICUBIC_WEIGHTS_TABLE_3, + gpu_write(gpu, REG_A6XX_TPL1_BICUBIC_WEIGHTS_TABLE(3), 0x3f5193ed); - gpu_write(gpu, REG_A6XX_TPL1_BICUBIC_WEIGHTS_TABLE_4, + gpu_write(gpu, REG_A6XX_TPL1_BICUBIC_WEIGHTS_TABLE(4), 0x3f0243f0); } @@ -1448,21 +1544,25 @@ static void a6xx_recover(struct msm_gpu *gpu) adreno_dump_info(gpu); - for (i = 0; i < 8; i++) - DRM_DEV_INFO(&gpu->pdev->dev, "CP_SCRATCH_REG%d: %u\n", i, - gpu_read(gpu, REG_A6XX_CP_SCRATCH_REG(i))); + if (a6xx_gmu_gx_is_on(&a6xx_gpu->gmu)) { + /* Sometimes crashstate capture is skipped, so SQE should be halted here again */ + gpu_write(gpu, REG_A6XX_CP_SQE_CNTL, 3); + + for (i = 0; i < 8; i++) + DRM_DEV_INFO(&gpu->pdev->dev, "CP_SCRATCH_REG%d: %u\n", i, + gpu_read(gpu, REG_A6XX_CP_SCRATCH_REG(i))); - if (hang_debug) - a6xx_dump(gpu); + if (hang_debug) + a6xx_dump(gpu); + + } /* * To handle recovery specific sequences during the rpm suspend we are * about to trigger */ - a6xx_gpu->hung = true; - /* Halt SQE first */ - gpu_write(gpu, REG_A6XX_CP_SQE_CNTL, 3); + a6xx_gpu->hung = true; pm_runtime_dont_use_autosuspend(&gpu->pdev->dev); @@ -1693,8 +1793,6 @@ static void a6xx_cp_hw_err_irq(struct msm_gpu *gpu) static void a6xx_fault_detect_irq(struct msm_gpu *gpu) { - struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu); - struct a6xx_gpu *a6xx_gpu = to_a6xx_gpu(adreno_gpu); struct msm_ringbuffer *ring = gpu->funcs->active_ring(gpu); /* @@ -1706,13 +1804,6 @@ static void a6xx_fault_detect_irq(struct msm_gpu *gpu) if (gpu_read(gpu, REG_A6XX_RBBM_STATUS3) & A6XX_RBBM_STATUS3_SMMU_STALLED_ON_FAULT) return; - /* - * Force the GPU to stay on until after we finish - * collecting information - */ - if (!adreno_has_gmu_wrapper(adreno_gpu)) - gmu_write(&a6xx_gpu->gmu, REG_A6XX_GMU_GMU_PWR_COL_KEEPALIVE, 1); - DRM_DEV_ERROR(&gpu->pdev->dev, "gpu fault ring %d fence %x status %8.8X rb %4.4x/%4.4x ib1 %16.16llX/%4.4x ib2 %16.16llX/%4.4x\n", ring ? ring->id : -1, ring ? ring->fctx->last_fence : 0, @@ -1727,6 +1818,9 @@ static void a6xx_fault_detect_irq(struct msm_gpu *gpu) /* Turn off the hangcheck timer to keep it from bothering us */ timer_delete(&gpu->hangcheck_timer); + /* Turn off interrupts to avoid triggering recovery again */ + gpu_write(gpu, REG_A6XX_RBBM_INT_0_MASK, 0); + kthread_queue_work(gpu->worker, &gpu->recover_work); } @@ -1751,9 +1845,49 @@ static void a7xx_sw_fuse_violation_irq(struct msm_gpu *gpu) } } +static void a6xx_gpu_keepalive_vote(struct msm_gpu *gpu, bool on) +{ + struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu); + struct a6xx_gpu *a6xx_gpu = to_a6xx_gpu(adreno_gpu); + + if (adreno_has_gmu_wrapper(adreno_gpu)) + return; + + gmu_write(&a6xx_gpu->gmu, REG_A6XX_GMU_GMU_PWR_COL_KEEPALIVE, on); +} + +static int irq_poll_fence(struct msm_gpu *gpu) +{ + struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu); + struct a6xx_gpu *a6xx_gpu = to_a6xx_gpu(adreno_gpu); + struct a6xx_gmu *gmu = &a6xx_gpu->gmu; + u32 status; + + if (adreno_has_gmu_wrapper(adreno_gpu)) + return 0; + + if (gmu_poll_timeout_atomic(gmu, REG_A6XX_GMU_AO_AHB_FENCE_CTRL, status, !status, 1, 100)) { + u32 rbbm_unmasked = gmu_read(gmu, REG_A6XX_GMU_RBBM_INT_UNMASKED_STATUS); + + dev_err_ratelimited(&gpu->pdev->dev, + "irq fence poll timeout, fence_ctrl=0x%x, unmasked_status=0x%x\n", + status, rbbm_unmasked); + return -ETIMEDOUT; + } + + return 0; +} + static irqreturn_t a6xx_irq(struct msm_gpu *gpu) { struct msm_drm_private *priv = gpu->dev->dev_private; + + /* Set keepalive vote to avoid power collapse after RBBM_INT_0_STATUS is read */ + a6xx_gpu_keepalive_vote(gpu, true); + + if (irq_poll_fence(gpu)) + goto done; + u32 status = gpu_read(gpu, REG_A6XX_RBBM_INT_0_STATUS); gpu_write(gpu, REG_A6XX_RBBM_INT_CLEAR_CMD, status); @@ -1790,6 +1924,9 @@ static irqreturn_t a6xx_irq(struct msm_gpu *gpu) if (status & A6XX_RBBM_INT_0_MASK_CP_SW) a6xx_preempt_irq(gpu); +done: + a6xx_gpu_keepalive_vote(gpu, false); + return IRQ_HANDLED; } @@ -2179,16 +2316,7 @@ static int a6xx_gmu_get_timestamp(struct msm_gpu *gpu, uint64_t *value) struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu); struct a6xx_gpu *a6xx_gpu = to_a6xx_gpu(adreno_gpu); - mutex_lock(&a6xx_gpu->gmu.lock); - - /* Force the GPU power on so we can read this register */ - a6xx_gmu_set_oob(&a6xx_gpu->gmu, GMU_OOB_PERFCOUNTER_SET); - - *value = gpu_read64(gpu, REG_A6XX_CP_ALWAYS_ON_COUNTER); - - a6xx_gmu_clear_oob(&a6xx_gpu->gmu, GMU_OOB_PERFCOUNTER_SET); - - mutex_unlock(&a6xx_gpu->gmu.lock); + *value = read_gmu_ao_counter(a6xx_gpu); return 0; } @@ -2298,18 +2426,36 @@ static uint32_t a6xx_get_rptr(struct msm_gpu *gpu, struct msm_ringbuffer *ring) if (adreno_gpu->base.hw_apriv || a6xx_gpu->has_whereami) return a6xx_gpu->shadow[ring->id]; + /* + * This is true only on an A6XX_GEN1 with GMU, has IFPC enabled and a super old SQE firmware + * without 'whereami' support + */ + WARN_ONCE((to_adreno_gpu(gpu)->info->quirks & ADRENO_QUIRK_IFPC), + "Can't read CP_RB_RPTR register reliably\n"); + return ring->memptrs->rptr = gpu_read(gpu, REG_A6XX_CP_RB_RPTR); } static bool a6xx_progress(struct msm_gpu *gpu, struct msm_ringbuffer *ring) { - struct msm_cp_state cp_state = { + struct msm_cp_state cp_state; + bool progress; + + /* + * With IFPC, KMD doesn't know whether GX power domain is collapsed + * or not. So, we can't blindly read the below registers in GX domain. + * Lets trust the hang detection in HW and lie to the caller that + * there was progress. + */ + if (to_adreno_gpu(gpu)->info->quirks & ADRENO_QUIRK_IFPC) + return true; + + cp_state = (struct msm_cp_state) { .ib1_base = gpu_read64(gpu, REG_A6XX_CP_IB1_BASE), .ib2_base = gpu_read64(gpu, REG_A6XX_CP_IB2_BASE), .ib1_rem = gpu_read(gpu, REG_A6XX_CP_IB1_REM_SIZE), .ib2_rem = gpu_read(gpu, REG_A6XX_CP_IB2_REM_SIZE), }; - bool progress; /* * Adjust the remaining data to account for what has already been @@ -2408,6 +2554,7 @@ static const struct adreno_gpu_funcs funcs = { .create_private_vm = a6xx_create_private_vm, .get_rptr = a6xx_get_rptr, .progress = a6xx_progress, + .sysprof_setup = a6xx_gmu_sysprof_setup, }, .get_timestamp = a6xx_gmu_get_timestamp, }; @@ -2468,6 +2615,7 @@ static const struct adreno_gpu_funcs funcs_a7xx = { .create_private_vm = a6xx_create_private_vm, .get_rptr = a6xx_get_rptr, .progress = a6xx_progress, + .sysprof_setup = a6xx_gmu_sysprof_setup, }, .get_timestamp = a6xx_gmu_get_timestamp, }; |