summaryrefslogtreecommitdiff
path: root/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
diff options
context:
space:
mode:
Diffstat (limited to 'drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c')
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c130
1 files changed, 110 insertions, 20 deletions
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
index 27ab4e754b2a..71b6691edab4 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
@@ -123,6 +123,7 @@ static void amdgpu_evict_flags(struct ttm_buffer_object *bo,
case AMDGPU_PL_GWS:
case AMDGPU_PL_OA:
case AMDGPU_PL_DOORBELL:
+ case AMDGPU_PL_MMIO_REMAP:
placement->num_placement = 0;
return;
@@ -226,7 +227,8 @@ static int amdgpu_ttm_map_buffer(struct ttm_buffer_object *bo,
r = amdgpu_job_alloc_with_ib(adev, &adev->mman.high_pr,
AMDGPU_FENCE_OWNER_UNDEFINED,
num_dw * 4 + num_bytes,
- AMDGPU_IB_POOL_DELAYED, &job);
+ AMDGPU_IB_POOL_DELAYED, &job,
+ AMDGPU_KERNEL_JOB_ID_TTM_MAP_BUFFER);
if (r)
return r;
@@ -406,7 +408,7 @@ static int amdgpu_move_blit(struct ttm_buffer_object *bo,
struct dma_fence *wipe_fence = NULL;
r = amdgpu_fill_buffer(abo, 0, NULL, &wipe_fence,
- false);
+ false, AMDGPU_KERNEL_JOB_ID_MOVE_BLIT);
if (r) {
goto error;
} else if (wipe_fence) {
@@ -447,7 +449,8 @@ bool amdgpu_res_cpu_visible(struct amdgpu_device *adev,
return false;
if (res->mem_type == TTM_PL_SYSTEM || res->mem_type == TTM_PL_TT ||
- res->mem_type == AMDGPU_PL_PREEMPT || res->mem_type == AMDGPU_PL_DOORBELL)
+ res->mem_type == AMDGPU_PL_PREEMPT || res->mem_type == AMDGPU_PL_DOORBELL ||
+ res->mem_type == AMDGPU_PL_MMIO_REMAP)
return true;
if (res->mem_type != TTM_PL_VRAM)
@@ -538,10 +541,12 @@ static int amdgpu_bo_move(struct ttm_buffer_object *bo, bool evict,
old_mem->mem_type == AMDGPU_PL_GWS ||
old_mem->mem_type == AMDGPU_PL_OA ||
old_mem->mem_type == AMDGPU_PL_DOORBELL ||
+ old_mem->mem_type == AMDGPU_PL_MMIO_REMAP ||
new_mem->mem_type == AMDGPU_PL_GDS ||
new_mem->mem_type == AMDGPU_PL_GWS ||
new_mem->mem_type == AMDGPU_PL_OA ||
- new_mem->mem_type == AMDGPU_PL_DOORBELL) {
+ new_mem->mem_type == AMDGPU_PL_DOORBELL ||
+ new_mem->mem_type == AMDGPU_PL_MMIO_REMAP) {
/* Nothing to save here */
amdgpu_bo_move_notify(bo, evict, new_mem);
ttm_bo_move_null(bo, new_mem);
@@ -629,6 +634,12 @@ static int amdgpu_ttm_io_mem_reserve(struct ttm_device *bdev,
mem->bus.is_iomem = true;
mem->bus.caching = ttm_uncached;
break;
+ case AMDGPU_PL_MMIO_REMAP:
+ mem->bus.offset = mem->start << PAGE_SHIFT;
+ mem->bus.offset += adev->rmmio_remap.bus_addr;
+ mem->bus.is_iomem = true;
+ mem->bus.caching = ttm_uncached;
+ break;
default:
return -EINVAL;
}
@@ -646,6 +657,8 @@ static unsigned long amdgpu_ttm_io_mem_pfn(struct ttm_buffer_object *bo,
if (bo->resource->mem_type == AMDGPU_PL_DOORBELL)
return ((uint64_t)(adev->doorbell.base + cursor.start)) >> PAGE_SHIFT;
+ else if (bo->resource->mem_type == AMDGPU_PL_MMIO_REMAP)
+ return ((uint64_t)(adev->rmmio_remap.bus_addr + cursor.start)) >> PAGE_SHIFT;
return (adev->gmc.aper_base + cursor.start) >> PAGE_SHIFT;
}
@@ -1355,7 +1368,8 @@ uint64_t amdgpu_ttm_tt_pde_flags(struct ttm_tt *ttm, struct ttm_resource *mem)
if (mem && (mem->mem_type == TTM_PL_TT ||
mem->mem_type == AMDGPU_PL_DOORBELL ||
- mem->mem_type == AMDGPU_PL_PREEMPT)) {
+ mem->mem_type == AMDGPU_PL_PREEMPT ||
+ mem->mem_type == AMDGPU_PL_MMIO_REMAP)) {
flags |= AMDGPU_PTE_SYSTEM;
if (ttm->caching == ttm_cached)
@@ -1510,7 +1524,8 @@ static int amdgpu_ttm_access_memory_sdma(struct ttm_buffer_object *bo,
r = amdgpu_job_alloc_with_ib(adev, &adev->mman.high_pr,
AMDGPU_FENCE_OWNER_UNDEFINED,
num_dw * 4, AMDGPU_IB_POOL_DELAYED,
- &job);
+ &job,
+ AMDGPU_KERNEL_JOB_ID_TTM_ACCESS_MEMORY_SDMA);
if (r)
goto out;
@@ -1841,6 +1856,59 @@ static void amdgpu_ttm_pools_fini(struct amdgpu_device *adev)
adev->mman.ttm_pools = NULL;
}
+/**
+ * amdgpu_ttm_mmio_remap_bo_init - Allocate the singleton 4K MMIO_REMAP BO
+ * @adev: amdgpu device
+ *
+ * Allocates a one-page (4K) GEM BO in AMDGPU_GEM_DOMAIN_MMIO_REMAP when the
+ * hardware exposes a remap base (adev->rmmio_remap.bus_addr) and the host
+ * PAGE_SIZE is <= AMDGPU_GPU_PAGE_SIZE (4K). The BO is created as a regular
+ * GEM object (amdgpu_bo_create).
+ *
+ * Return:
+ * * 0 on success or intentional skip (feature not present/unsupported)
+ * * negative errno on allocation failure
+ */
+static int amdgpu_ttm_mmio_remap_bo_init(struct amdgpu_device *adev)
+{
+ struct amdgpu_bo_param bp;
+ int r;
+
+ /* Skip if HW doesn't expose remap, or if PAGE_SIZE > AMDGPU_GPU_PAGE_SIZE (4K). */
+ if (!adev->rmmio_remap.bus_addr || PAGE_SIZE > AMDGPU_GPU_PAGE_SIZE)
+ return 0;
+
+ memset(&bp, 0, sizeof(bp));
+
+ /* Create exactly one GEM BO in the MMIO_REMAP domain. */
+ bp.type = ttm_bo_type_device; /* userspace-mappable GEM */
+ bp.size = AMDGPU_GPU_PAGE_SIZE; /* 4K */
+ bp.byte_align = AMDGPU_GPU_PAGE_SIZE;
+ bp.domain = AMDGPU_GEM_DOMAIN_MMIO_REMAP;
+ bp.flags = 0;
+ bp.resv = NULL;
+ bp.bo_ptr_size = sizeof(struct amdgpu_bo);
+
+ r = amdgpu_bo_create(adev, &bp, &adev->rmmio_remap.bo);
+ if (r)
+ return r;
+
+ return 0;
+}
+
+/**
+ * amdgpu_ttm_mmio_remap_bo_fini - Free the singleton MMIO_REMAP BO
+ * @adev: amdgpu device
+ *
+ * Frees the kernel-owned MMIO_REMAP BO if it was allocated by
+ * amdgpu_ttm_mmio_remap_bo_init().
+ */
+static void amdgpu_ttm_mmio_remap_bo_fini(struct amdgpu_device *adev)
+{
+ amdgpu_bo_unref(&adev->rmmio_remap.bo);
+ adev->rmmio_remap.bo = NULL;
+}
+
/*
* amdgpu_ttm_init - Init the memory management (ttm) as well as various
* gtt/vram related fields.
@@ -1877,11 +1945,13 @@ int amdgpu_ttm_init(struct amdgpu_device *adev)
}
adev->mman.initialized = true;
- /* Initialize VRAM pool with all of VRAM divided into pages */
- r = amdgpu_vram_mgr_init(adev);
- if (r) {
- dev_err(adev->dev, "Failed initializing VRAM heap.\n");
- return r;
+ if (!adev->gmc.is_app_apu) {
+ /* Initialize VRAM pool with all of VRAM divided into pages */
+ r = amdgpu_vram_mgr_init(adev);
+ if (r) {
+ dev_err(adev->dev, "Failed initializing VRAM heap.\n");
+ return r;
+ }
}
/* Change the size here instead of the init above so only lpfn is affected */
@@ -2008,6 +2078,18 @@ int amdgpu_ttm_init(struct amdgpu_device *adev)
return r;
}
+ /* Initialize MMIO-remap pool (single page 4K) */
+ r = amdgpu_ttm_init_on_chip(adev, AMDGPU_PL_MMIO_REMAP, 1);
+ if (r) {
+ dev_err(adev->dev, "Failed initializing MMIO-remap heap.\n");
+ return r;
+ }
+
+ /* Allocate the singleton MMIO_REMAP BO (4K) if supported */
+ r = amdgpu_ttm_mmio_remap_bo_init(adev);
+ if (r)
+ return r;
+
/* Initialize preemptible memory pool */
r = amdgpu_preempt_mgr_init(adev);
if (r) {
@@ -2070,6 +2152,8 @@ void amdgpu_ttm_fini(struct amdgpu_device *adev)
}
amdgpu_bo_free_kernel(&adev->mman.sdma_access_bo, NULL,
&adev->mman.sdma_access_ptr);
+
+ amdgpu_ttm_mmio_remap_bo_fini(adev);
amdgpu_ttm_fw_reserve_vram_fini(adev);
amdgpu_ttm_drv_reserve_vram_fini(adev);
@@ -2082,7 +2166,8 @@ void amdgpu_ttm_fini(struct amdgpu_device *adev)
drm_dev_exit(idx);
}
- amdgpu_vram_mgr_fini(adev);
+ if (!adev->gmc.is_app_apu)
+ amdgpu_vram_mgr_fini(adev);
amdgpu_gtt_mgr_fini(adev);
amdgpu_preempt_mgr_fini(adev);
amdgpu_doorbell_fini(adev);
@@ -2091,6 +2176,7 @@ void amdgpu_ttm_fini(struct amdgpu_device *adev)
ttm_range_man_fini(&adev->mman.bdev, AMDGPU_PL_GWS);
ttm_range_man_fini(&adev->mman.bdev, AMDGPU_PL_OA);
ttm_range_man_fini(&adev->mman.bdev, AMDGPU_PL_DOORBELL);
+ ttm_range_man_fini(&adev->mman.bdev, AMDGPU_PL_MMIO_REMAP);
ttm_device_fini(&adev->mman.bdev);
adev->mman.initialized = false;
dev_info(adev->dev, "amdgpu: ttm finalized\n");
@@ -2167,7 +2253,7 @@ static int amdgpu_ttm_prepare_job(struct amdgpu_device *adev,
struct dma_resv *resv,
bool vm_needs_flush,
struct amdgpu_job **job,
- bool delayed)
+ bool delayed, u64 k_job_id)
{
enum amdgpu_ib_pool_type pool = direct_submit ?
AMDGPU_IB_POOL_DIRECT :
@@ -2177,7 +2263,7 @@ static int amdgpu_ttm_prepare_job(struct amdgpu_device *adev,
&adev->mman.high_pr;
r = amdgpu_job_alloc_with_ib(adev, entity,
AMDGPU_FENCE_OWNER_UNDEFINED,
- num_dw * 4, pool, job);
+ num_dw * 4, pool, job, k_job_id);
if (r)
return r;
@@ -2217,7 +2303,8 @@ int amdgpu_copy_buffer(struct amdgpu_ring *ring, uint64_t src_offset,
num_loops = DIV_ROUND_UP(byte_count, max_bytes);
num_dw = ALIGN(num_loops * adev->mman.buffer_funcs->copy_num_dw, 8);
r = amdgpu_ttm_prepare_job(adev, direct_submit, num_dw,
- resv, vm_needs_flush, &job, false);
+ resv, vm_needs_flush, &job, false,
+ AMDGPU_KERNEL_JOB_ID_TTM_COPY_BUFFER);
if (r)
return r;
@@ -2252,7 +2339,8 @@ static int amdgpu_ttm_fill_mem(struct amdgpu_ring *ring, uint32_t src_data,
uint64_t dst_addr, uint32_t byte_count,
struct dma_resv *resv,
struct dma_fence **fence,
- bool vm_needs_flush, bool delayed)
+ bool vm_needs_flush, bool delayed,
+ u64 k_job_id)
{
struct amdgpu_device *adev = ring->adev;
unsigned int num_loops, num_dw;
@@ -2265,7 +2353,7 @@ static int amdgpu_ttm_fill_mem(struct amdgpu_ring *ring, uint32_t src_data,
num_loops = DIV_ROUND_UP_ULL(byte_count, max_bytes);
num_dw = ALIGN(num_loops * adev->mman.buffer_funcs->fill_num_dw, 8);
r = amdgpu_ttm_prepare_job(adev, false, num_dw, resv, vm_needs_flush,
- &job, delayed);
+ &job, delayed, k_job_id);
if (r)
return r;
@@ -2335,7 +2423,8 @@ int amdgpu_ttm_clear_buffer(struct amdgpu_bo *bo,
goto err;
r = amdgpu_ttm_fill_mem(ring, 0, addr, size, resv,
- &next, true, true);
+ &next, true, true,
+ AMDGPU_KERNEL_JOB_ID_TTM_CLEAR_BUFFER);
if (r)
goto err;
@@ -2354,7 +2443,8 @@ int amdgpu_fill_buffer(struct amdgpu_bo *bo,
uint32_t src_data,
struct dma_resv *resv,
struct dma_fence **f,
- bool delayed)
+ bool delayed,
+ u64 k_job_id)
{
struct amdgpu_device *adev = amdgpu_ttm_adev(bo->tbo.bdev);
struct amdgpu_ring *ring = adev->mman.buffer_funcs_ring;
@@ -2384,7 +2474,7 @@ int amdgpu_fill_buffer(struct amdgpu_bo *bo,
goto error;
r = amdgpu_ttm_fill_mem(ring, src_data, to, cur_size, resv,
- &next, true, delayed);
+ &next, true, delayed, k_job_id);
if (r)
goto error;