summaryrefslogtreecommitdiff
path: root/drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c
diff options
context:
space:
mode:
Diffstat (limited to 'drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c')
-rw-r--r--drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c222
1 files changed, 107 insertions, 115 deletions
diff --git a/drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c b/drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c
index 21f38d882335..ecc70a730a54 100644
--- a/drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c
@@ -48,8 +48,7 @@ static void sdma_v4_0_set_buffer_funcs(struct amdgpu_device *adev);
static void sdma_v4_0_set_vm_pte_funcs(struct amdgpu_device *adev);
static void sdma_v4_0_set_irq_funcs(struct amdgpu_device *adev);
-static const u32 golden_settings_sdma_4[] =
-{
+static const u32 golden_settings_sdma_4[] = {
SOC15_REG_OFFSET(SDMA0, 0, mmSDMA0_CHICKEN_BITS), 0xfe931f07, 0x02831f07,
SOC15_REG_OFFSET(SDMA0, 0, mmSDMA0_CLK_CTRL), 0xff000ff0, 0x3f000100,
SOC15_REG_OFFSET(SDMA0, 0, mmSDMA0_GFX_IB_CNTL), 0x800f0100, 0x00000100,
@@ -76,8 +75,7 @@ static const u32 golden_settings_sdma_4[] =
SOC15_REG_OFFSET(SDMA1, 0, mmSDMA1_UTCL1_PAGE), 0x000003ff, 0x000003c0
};
-static const u32 golden_settings_sdma_vg10[] =
-{
+static const u32 golden_settings_sdma_vg10[] = {
SOC15_REG_OFFSET(SDMA0, 0, mmSDMA0_GB_ADDR_CONFIG), 0x0018773f, 0x00104002,
SOC15_REG_OFFSET(SDMA0, 0, mmSDMA0_GB_ADDR_CONFIG_READ), 0x0018773f, 0x00104002,
SOC15_REG_OFFSET(SDMA1, 0, mmSDMA1_GB_ADDR_CONFIG), 0x0018773f, 0x00104002,
@@ -87,16 +85,17 @@ static const u32 golden_settings_sdma_vg10[] =
static u32 sdma_v4_0_get_reg_offset(u32 instance, u32 internal_offset)
{
u32 base = 0;
+
switch (instance) {
- case 0:
- base = SDMA0_BASE.instance[0].segment[0];
- break;
- case 1:
- base = SDMA1_BASE.instance[0].segment[0];
- break;
- default:
- BUG();
- break;
+ case 0:
+ base = SDMA0_BASE.instance[0].segment[0];
+ break;
+ case 1:
+ base = SDMA1_BASE.instance[0].segment[0];
+ break;
+ default:
+ BUG();
+ break;
}
return base + internal_offset;
@@ -159,7 +158,8 @@ static int sdma_v4_0_init_microcode(struct amdgpu_device *adev)
case CHIP_VEGA10:
chip_name = "vega10";
break;
- default: BUG();
+ default:
+ BUG();
}
for (i = 0; i < adev->sdma.num_instances; i++) {
@@ -179,7 +179,7 @@ static int sdma_v4_0_init_microcode(struct amdgpu_device *adev)
if (adev->sdma.instance[i].feature_version >= 20)
adev->sdma.instance[i].burst_nop = true;
DRM_DEBUG("psp_load == '%s'\n",
- adev->firmware.load_type == AMDGPU_FW_LOAD_PSP? "true": "false");
+ adev->firmware.load_type == AMDGPU_FW_LOAD_PSP ? "true" : "false");
if (adev->firmware.load_type == AMDGPU_FW_LOAD_PSP) {
info = &adev->firmware.ucode[AMDGPU_UCODE_ID_SDMA0 + i];
@@ -192,9 +192,7 @@ static int sdma_v4_0_init_microcode(struct amdgpu_device *adev)
}
out:
if (err) {
- printk(KERN_ERR
- "sdma_v4_0: Failed to load firmware \"%s\"\n",
- fw_name);
+ DRM_ERROR("sdma_v4_0: Failed to load firmware \"%s\"\n", fw_name);
for (i = 0; i < adev->sdma.num_instances; i++) {
release_firmware(adev->sdma.instance[i].fw);
adev->sdma.instance[i].fw = NULL;
@@ -212,10 +210,10 @@ out:
*/
static uint64_t sdma_v4_0_ring_get_rptr(struct amdgpu_ring *ring)
{
- u64* rptr;
+ u64 *rptr;
/* XXX check if swapping is necessary on BE */
- rptr =((u64*)&ring->adev->wb.wb[ring->rptr_offs]);
+ rptr = ((u64 *)&ring->adev->wb.wb[ring->rptr_offs]);
DRM_DEBUG("rptr before shift == 0x%016llx\n", *rptr);
return ((*rptr) >> 2);
@@ -231,19 +229,20 @@ static uint64_t sdma_v4_0_ring_get_rptr(struct amdgpu_ring *ring)
static uint64_t sdma_v4_0_ring_get_wptr(struct amdgpu_ring *ring)
{
struct amdgpu_device *adev = ring->adev;
- u64* wptr = NULL;
- uint64_t local_wptr=0;
+ u64 *wptr = NULL;
+ uint64_t local_wptr = 0;
if (ring->use_doorbell) {
/* XXX check if swapping is necessary on BE */
- wptr = ((u64*)&adev->wb.wb[ring->wptr_offs]);
+ wptr = ((u64 *)&adev->wb.wb[ring->wptr_offs]);
DRM_DEBUG("wptr/doorbell before shift == 0x%016llx\n", *wptr);
*wptr = (*wptr) >> 2;
DRM_DEBUG("wptr/doorbell after shift == 0x%016llx\n", *wptr);
} else {
u32 lowbit, highbit;
int me = (ring == &adev->sdma.instance[0].ring) ? 0 : 1;
- wptr=&local_wptr;
+
+ wptr = &local_wptr;
lowbit = RREG32(sdma_v4_0_get_reg_offset(me, mmSDMA0_GFX_RB_WPTR)) >> 2;
highbit = RREG32(sdma_v4_0_get_reg_offset(me, mmSDMA0_GFX_RB_WPTR_HI)) >> 2;
@@ -285,12 +284,13 @@ static void sdma_v4_0_ring_set_wptr(struct amdgpu_ring *ring)
WDOORBELL64(ring->doorbell_index, ring->wptr << 2);
} else {
int me = (ring == &ring->adev->sdma.instance[0].ring) ? 0 : 1;
+
DRM_DEBUG("Not using doorbell -- "
"mmSDMA%i_GFX_RB_WPTR == 0x%08x "
- "mmSDMA%i_GFX_RB_WPTR_HI == 0x%08x \n",
- me,
+ "mmSDMA%i_GFX_RB_WPTR_HI == 0x%08x\n",
me,
lower_32_bits(ring->wptr << 2),
+ me,
upper_32_bits(ring->wptr << 2));
WREG32(sdma_v4_0_get_reg_offset(me, mmSDMA0_GFX_RB_WPTR), lower_32_bits(ring->wptr << 2));
WREG32(sdma_v4_0_get_reg_offset(me, mmSDMA0_GFX_RB_WPTR_HI), upper_32_bits(ring->wptr << 2));
@@ -319,22 +319,22 @@ static void sdma_v4_0_ring_insert_nop(struct amdgpu_ring *ring, uint32_t count)
* Schedule an IB in the DMA ring (VEGA10).
*/
static void sdma_v4_0_ring_emit_ib(struct amdgpu_ring *ring,
- struct amdgpu_ib *ib,
- unsigned vm_id, bool ctx_switch)
+ struct amdgpu_ib *ib,
+ unsigned vm_id, bool ctx_switch)
{
- u32 vmid = vm_id & 0xf;
+ u32 vmid = vm_id & 0xf;
- /* IB packet must end on a 8 DW boundary */
- sdma_v4_0_ring_insert_nop(ring, (10 - (lower_32_bits(ring->wptr) & 7)) % 8);
+ /* IB packet must end on a 8 DW boundary */
+ sdma_v4_0_ring_insert_nop(ring, (10 - (lower_32_bits(ring->wptr) & 7)) % 8);
- amdgpu_ring_write(ring, SDMA_PKT_HEADER_OP(SDMA_OP_INDIRECT) |
- SDMA_PKT_INDIRECT_HEADER_VMID(vmid));
- /* base must be 32 byte aligned */
- amdgpu_ring_write(ring, lower_32_bits(ib->gpu_addr) & 0xffffffe0);
- amdgpu_ring_write(ring, upper_32_bits(ib->gpu_addr));
- amdgpu_ring_write(ring, ib->length_dw);
- amdgpu_ring_write(ring, 0);
- amdgpu_ring_write(ring, 0);
+ amdgpu_ring_write(ring, SDMA_PKT_HEADER_OP(SDMA_OP_INDIRECT) |
+ SDMA_PKT_INDIRECT_HEADER_VMID(vmid));
+ /* base must be 32 byte aligned */
+ amdgpu_ring_write(ring, lower_32_bits(ib->gpu_addr) & 0xffffffe0);
+ amdgpu_ring_write(ring, upper_32_bits(ib->gpu_addr));
+ amdgpu_ring_write(ring, ib->length_dw);
+ amdgpu_ring_write(ring, 0);
+ amdgpu_ring_write(ring, 0);
}
@@ -523,7 +523,7 @@ static int sdma_v4_0_gfx_resume(struct amdgpu_device *adev)
u32 doorbell;
u32 doorbell_offset;
u32 temp;
- int i,r;
+ int i, r;
for (i = 0; i < adev->sdma.num_instances; i++) {
ring = &adev->sdma.instance[i].ring;
@@ -572,7 +572,7 @@ static int sdma_v4_0_gfx_resume(struct amdgpu_device *adev)
doorbell = RREG32(sdma_v4_0_get_reg_offset(i, mmSDMA0_GFX_DOORBELL));
doorbell_offset = RREG32(sdma_v4_0_get_reg_offset(i, mmSDMA0_GFX_DOORBELL_OFFSET));
- if (ring->use_doorbell){
+ if (ring->use_doorbell) {
doorbell = REG_SET_FIELD(doorbell, SDMA0_GFX_DOORBELL, ENABLE, 1);
doorbell_offset = REG_SET_FIELD(doorbell_offset, SDMA0_GFX_DOORBELL_OFFSET,
OFFSET, ring->doorbell_index);
@@ -694,9 +694,7 @@ static int sdma_v4_0_load_microcode(struct amdgpu_device *adev)
for (j = 0; j < fw_size; j++)
- {
WREG32(sdma_v4_0_get_reg_offset(i, mmSDMA0_UCODE_DATA), le32_to_cpup(fw_data++));
- }
WREG32(sdma_v4_0_get_reg_offset(i, mmSDMA0_UCODE_ADDR), adev->sdma.instance[i].fw_version);
}
@@ -744,10 +742,8 @@ static int sdma_v4_0_start(struct amdgpu_device *adev)
if (r)
return r;
r = sdma_v4_0_rlc_resume(adev);
- if (r)
- return r;
- return 0;
+ return r;
}
/**
@@ -797,9 +793,8 @@ static int sdma_v4_0_ring_test_ring(struct amdgpu_ring *ring)
for (i = 0; i < adev->usec_timeout; i++) {
tmp = le32_to_cpu(adev->wb.wb[index]);
- if (tmp == 0xDEADBEEF) {
+ if (tmp == 0xDEADBEEF)
break;
- }
DRM_UDELAY(1);
}
@@ -864,29 +859,29 @@ static int sdma_v4_0_ring_test_ib(struct amdgpu_ring *ring, long timeout)
if (r)
goto err1;
- r = dma_fence_wait_timeout(f, false, timeout);
- if (r == 0) {
- DRM_ERROR("amdgpu: IB test timed out\n");
- r = -ETIMEDOUT;
- goto err1;
- } else if (r < 0) {
- DRM_ERROR("amdgpu: fence wait failed (%ld).\n", r);
- goto err1;
- }
- tmp = le32_to_cpu(adev->wb.wb[index]);
- if (tmp == 0xDEADBEEF) {
- DRM_INFO("ib test on ring %d succeeded\n", ring->idx);
- r = 0;
- } else {
- DRM_ERROR("amdgpu: ib test failed (0x%08X)\n", tmp);
- r = -EINVAL;
- }
+ r = dma_fence_wait_timeout(f, false, timeout);
+ if (r == 0) {
+ DRM_ERROR("amdgpu: IB test timed out\n");
+ r = -ETIMEDOUT;
+ goto err1;
+ } else if (r < 0) {
+ DRM_ERROR("amdgpu: fence wait failed (%ld).\n", r);
+ goto err1;
+ }
+ tmp = le32_to_cpu(adev->wb.wb[index]);
+ if (tmp == 0xDEADBEEF) {
+ DRM_INFO("ib test on ring %d succeeded\n", ring->idx);
+ r = 0;
+ } else {
+ DRM_ERROR("amdgpu: ib test failed (0x%08X)\n", tmp);
+ r = -EINVAL;
+ }
err1:
- amdgpu_ib_free(adev, &ib, NULL);
- dma_fence_put(f);
+ amdgpu_ib_free(adev, &ib, NULL);
+ dma_fence_put(f);
err0:
- amdgpu_wb_free(adev, index);
- return r;
+ amdgpu_wb_free(adev, index);
+ return r;
}
@@ -1039,44 +1034,40 @@ static void sdma_v4_0_ring_emit_pipeline_sync(struct amdgpu_ring *ring)
static void sdma_v4_0_ring_emit_vm_flush(struct amdgpu_ring *ring,
unsigned vm_id, uint64_t pd_addr)
{
+ struct amdgpu_vmhub *hub = &ring->adev->vmhub[ring->funcs->vmhub];
uint32_t req = ring->adev->gart.gart_funcs->get_invalidate_req(vm_id);
- unsigned eng = ring->idx;
- unsigned i;
+ unsigned eng = ring->vm_inv_eng;
pd_addr = pd_addr | 0x1; /* valid bit */
/* now only use physical base address of PDE and valid */
BUG_ON(pd_addr & 0xFFFF00000000003EULL);
- for (i = 0; i < AMDGPU_MAX_VMHUBS; ++i) {
- struct amdgpu_vmhub *hub = &ring->adev->vmhub[i];
-
- amdgpu_ring_write(ring, SDMA_PKT_HEADER_OP(SDMA_OP_SRBM_WRITE) |
- SDMA_PKT_SRBM_WRITE_HEADER_BYTE_EN(0xf));
- amdgpu_ring_write(ring, hub->ctx0_ptb_addr_lo32 + vm_id * 2);
- amdgpu_ring_write(ring, lower_32_bits(pd_addr));
-
- amdgpu_ring_write(ring, SDMA_PKT_HEADER_OP(SDMA_OP_SRBM_WRITE) |
- SDMA_PKT_SRBM_WRITE_HEADER_BYTE_EN(0xf));
- amdgpu_ring_write(ring, hub->ctx0_ptb_addr_hi32 + vm_id * 2);
- amdgpu_ring_write(ring, upper_32_bits(pd_addr));
-
- /* flush TLB */
- amdgpu_ring_write(ring, SDMA_PKT_HEADER_OP(SDMA_OP_SRBM_WRITE) |
- SDMA_PKT_SRBM_WRITE_HEADER_BYTE_EN(0xf));
- amdgpu_ring_write(ring, hub->vm_inv_eng0_req + eng);
- amdgpu_ring_write(ring, req);
-
- /* wait for flush */
- amdgpu_ring_write(ring, SDMA_PKT_HEADER_OP(SDMA_OP_POLL_REGMEM) |
- SDMA_PKT_POLL_REGMEM_HEADER_HDP_FLUSH(0) |
- SDMA_PKT_POLL_REGMEM_HEADER_FUNC(3)); /* equal */
- amdgpu_ring_write(ring, (hub->vm_inv_eng0_ack + eng) << 2);
- amdgpu_ring_write(ring, 0);
- amdgpu_ring_write(ring, 1 << vm_id); /* reference */
- amdgpu_ring_write(ring, 1 << vm_id); /* mask */
- amdgpu_ring_write(ring, SDMA_PKT_POLL_REGMEM_DW5_RETRY_COUNT(0xfff) |
- SDMA_PKT_POLL_REGMEM_DW5_INTERVAL(10));
- }
+ amdgpu_ring_write(ring, SDMA_PKT_HEADER_OP(SDMA_OP_SRBM_WRITE) |
+ SDMA_PKT_SRBM_WRITE_HEADER_BYTE_EN(0xf));
+ amdgpu_ring_write(ring, hub->ctx0_ptb_addr_lo32 + vm_id * 2);
+ amdgpu_ring_write(ring, lower_32_bits(pd_addr));
+
+ amdgpu_ring_write(ring, SDMA_PKT_HEADER_OP(SDMA_OP_SRBM_WRITE) |
+ SDMA_PKT_SRBM_WRITE_HEADER_BYTE_EN(0xf));
+ amdgpu_ring_write(ring, hub->ctx0_ptb_addr_hi32 + vm_id * 2);
+ amdgpu_ring_write(ring, upper_32_bits(pd_addr));
+
+ /* flush TLB */
+ amdgpu_ring_write(ring, SDMA_PKT_HEADER_OP(SDMA_OP_SRBM_WRITE) |
+ SDMA_PKT_SRBM_WRITE_HEADER_BYTE_EN(0xf));
+ amdgpu_ring_write(ring, hub->vm_inv_eng0_req + eng);
+ amdgpu_ring_write(ring, req);
+
+ /* wait for flush */
+ amdgpu_ring_write(ring, SDMA_PKT_HEADER_OP(SDMA_OP_POLL_REGMEM) |
+ SDMA_PKT_POLL_REGMEM_HEADER_HDP_FLUSH(0) |
+ SDMA_PKT_POLL_REGMEM_HEADER_FUNC(3)); /* equal */
+ amdgpu_ring_write(ring, (hub->vm_inv_eng0_ack + eng) << 2);
+ amdgpu_ring_write(ring, 0);
+ amdgpu_ring_write(ring, 1 << vm_id); /* reference */
+ amdgpu_ring_write(ring, 1 << vm_id); /* mask */
+ amdgpu_ring_write(ring, SDMA_PKT_POLL_REGMEM_DW5_RETRY_COUNT(0xfff) |
+ SDMA_PKT_POLL_REGMEM_DW5_INTERVAL(10));
}
static int sdma_v4_0_early_init(void *handle)
@@ -1162,8 +1153,6 @@ static int sdma_v4_0_hw_init(void *handle)
sdma_v4_0_init_golden_registers(adev);
r = sdma_v4_0_start(adev);
- if (r)
- return r;
return r;
}
@@ -1199,10 +1188,12 @@ static bool sdma_v4_0_is_idle(void *handle)
{
struct amdgpu_device *adev = (struct amdgpu_device *)handle;
u32 i;
+
for (i = 0; i < adev->sdma.num_instances; i++) {
u32 tmp = RREG32(sdma_v4_0_get_reg_offset(i, mmSDMA0_STATUS_REG));
+
if (!(tmp & SDMA0_STATUS_REG__IDLE_MASK))
- return false;
+ return false;
}
return true;
@@ -1211,8 +1202,9 @@ static bool sdma_v4_0_is_idle(void *handle)
static int sdma_v4_0_wait_for_idle(void *handle)
{
unsigned i;
- u32 sdma0,sdma1;
+ u32 sdma0, sdma1;
struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+
for (i = 0; i < adev->usec_timeout; i++) {
sdma0 = RREG32(sdma_v4_0_get_reg_offset(0, mmSDMA0_STATUS_REG));
sdma1 = RREG32(sdma_v4_0_get_reg_offset(1, mmSDMA0_STATUS_REG));
@@ -1240,7 +1232,7 @@ static int sdma_v4_0_set_trap_irq_state(struct amdgpu_device *adev,
u32 reg_offset = (type == AMDGPU_SDMA_IRQ_TRAP0) ?
sdma_v4_0_get_reg_offset(0, mmSDMA0_CNTL) :
- sdma_v4_0_get_reg_offset(1, mmSDMA0_CNTL);
+ sdma_v4_0_get_reg_offset(1, mmSDMA0_CNTL);
sdma_cntl = RREG32(reg_offset);
sdma_cntl = REG_SET_FIELD(sdma_cntl, SDMA0_CNTL, TRAP_ENABLE,
@@ -1332,7 +1324,7 @@ static void sdma_v4_0_update_medium_grain_clock_gating(
SDMA1_CLK_CTRL__SOFT_OVERRIDE2_MASK |
SDMA1_CLK_CTRL__SOFT_OVERRIDE1_MASK |
SDMA1_CLK_CTRL__SOFT_OVERRIDE0_MASK);
- if(def != data)
+ if (def != data)
WREG32(SOC15_REG_OFFSET(SDMA1, 0, mmSDMA1_CLK_CTRL), data);
}
} else {
@@ -1382,17 +1374,17 @@ static void sdma_v4_0_update_medium_grain_light_sleep(
/* 1-not override: enable sdma1 mem light sleep */
if (adev->asic_type == CHIP_VEGA10) {
- def = data = RREG32(SOC15_REG_OFFSET(SDMA1, 0, mmSDMA1_POWER_CNTL));
- data |= SDMA1_POWER_CNTL__MEM_POWER_OVERRIDE_MASK;
- if (def != data)
- WREG32(SOC15_REG_OFFSET(SDMA1, 0, mmSDMA1_POWER_CNTL), data);
+ def = data = RREG32(SOC15_REG_OFFSET(SDMA1, 0, mmSDMA1_POWER_CNTL));
+ data |= SDMA1_POWER_CNTL__MEM_POWER_OVERRIDE_MASK;
+ if (def != data)
+ WREG32(SOC15_REG_OFFSET(SDMA1, 0, mmSDMA1_POWER_CNTL), data);
}
} else {
/* 0-override:disable sdma0 mem light sleep */
def = data = RREG32(SOC15_REG_OFFSET(SDMA0, 0, mmSDMA0_POWER_CNTL));
data &= ~SDMA0_POWER_CNTL__MEM_POWER_OVERRIDE_MASK;
if (def != data)
- WREG32(SOC15_REG_OFFSET(SDMA0, 0, mmSDMA0_POWER_CNTL), data);
+ WREG32(SOC15_REG_OFFSET(SDMA0, 0, mmSDMA0_POWER_CNTL), data);
/* 0-override:disable sdma1 mem light sleep */
if (adev->asic_type == CHIP_VEGA10) {
@@ -1473,6 +1465,7 @@ static const struct amdgpu_ring_funcs sdma_v4_0_ring_funcs = {
.align_mask = 0xf,
.nop = SDMA_PKT_NOP_HEADER_OP(SDMA_OP_NOP),
.support_64bit_ptrs = true,
+ .vmhub = AMDGPU_MMHUB,
.get_rptr = sdma_v4_0_ring_get_rptr,
.get_wptr = sdma_v4_0_ring_get_wptr,
.set_wptr = sdma_v4_0_ring_set_wptr,
@@ -1480,7 +1473,7 @@ static const struct amdgpu_ring_funcs sdma_v4_0_ring_funcs = {
6 + /* sdma_v4_0_ring_emit_hdp_flush */
3 + /* sdma_v4_0_ring_emit_hdp_invalidate */
6 + /* sdma_v4_0_ring_emit_pipeline_sync */
- 36 + /* sdma_v4_0_ring_emit_vm_flush */
+ 18 + /* sdma_v4_0_ring_emit_vm_flush */
10 + 10 + 10, /* sdma_v4_0_ring_emit_fence x3 for user fence, vm fence */
.emit_ib_size = 7 + 6, /* sdma_v4_0_ring_emit_ib */
.emit_ib = sdma_v4_0_ring_emit_ib,
@@ -1606,8 +1599,7 @@ static void sdma_v4_0_set_vm_pte_funcs(struct amdgpu_device *adev)
}
}
-const struct amdgpu_ip_block_version sdma_v4_0_ip_block =
-{
+const struct amdgpu_ip_block_version sdma_v4_0_ip_block = {
.type = AMD_IP_BLOCK_TYPE_SDMA,
.major = 4,
.minor = 0,