diff options
author | Thomas Hellström <thomas.hellstrom@linux.intel.com> | 2021-06-20 18:14:55 +0200 |
---|---|---|
committer | Thomas Hellström <thomas.hellstrom@linux.intel.com> | 2021-06-22 10:31:00 +0200 |
commit | 92a0588687348af1917f0075e66c7fd401ba6050 (patch) | |
tree | 54c8cb305661ad83da0ce16fa66684d278652af4 | |
parent | 95a1cc9be09cd41dada2f5d5c776393d64cea60f (diff) |
drm/i915/ttm: Asynchronous clears, swapins and sort of swapoutstopic/migration_fence_2
Don't explicitly sync before or after migration blits, this will make
migrations blits be performed asynchronously and also LMEM management
performed asynchronously with the impoartant exception that vma unbinding
is still a synchronisation point, relevant for swapouts and migrations.
Possibly we will perform vma unbinding asynchronously with an upcoming
change and make sure the unbind operation is included in the
migration fence together with the migration blit.
With this change we are starting to see ghost objects in the driver,
resulting from migrations (and unfortunately also from clears due to a
TTM flaw) so adjust some callbacks to ignore them.
Since we're now also attaching migration fences to objects without any
vmas whose activity trackers hold an object reference while active, we're
also starting to truly hit the TTM delayed destroy path. That means
the delete_mem_notify() might for some reason be called twice, so to avoid
__i915_gem_free_object() being called twice, move it to the bo destroy
callback instead.
Signed-off-by: Thomas Hellström <thomas.hellstrom@linux.intel.com>
-rw-r--r-- | drivers/gpu/drm/i915/gem/i915_gem_ttm.c | 113 | ||||
-rw-r--r-- | drivers/gpu/drm/i915/gem/i915_gem_ttm.h | 3 |
2 files changed, 93 insertions, 23 deletions
diff --git a/drivers/gpu/drm/i915/gem/i915_gem_ttm.c b/drivers/gpu/drm/i915/gem/i915_gem_ttm.c index 07097f150065..ebeea05b005b 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_ttm.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_ttm.c @@ -224,6 +224,14 @@ static bool i915_ttm_eviction_valuable(struct ttm_buffer_object *bo, { struct drm_i915_gem_object *obj = i915_ttm_to_gem(bo); + /* + * If it's a ghost object, TTM looks to swap out a system object + * waiting to be destroyed. That's generally a bad idea, so + * return false here. + */ + if (likely(!obj)) + return false; + /* Will do for now. Our pinned objects are still on TTM's LRU lists */ return i915_gem_object_evictable(obj); } @@ -343,8 +351,12 @@ static void i915_ttm_purge(struct drm_i915_gem_object *obj) static void i915_ttm_swap_notify(struct ttm_buffer_object *bo) { struct drm_i915_gem_object *obj = i915_ttm_to_gem(bo); - int ret = i915_ttm_move_notify(bo); + int ret; + + if (unlikely(!obj)) + return; + ret = i915_ttm_move_notify(bo); GEM_WARN_ON(ret); GEM_WARN_ON(obj->ttm.cached_io_st); if (!ret && obj->mm.madv != I915_MADV_WILLNEED) @@ -355,11 +367,8 @@ static void i915_ttm_delete_mem_notify(struct ttm_buffer_object *bo) { struct drm_i915_gem_object *obj = i915_ttm_to_gem(bo); - if (likely(obj)) { - /* This releases all gem object bindings to the backend. */ + if (likely(obj)) i915_ttm_free_cached_io_st(obj); - __i915_gem_free_object(obj); - } } static struct intel_memory_region * @@ -429,9 +438,43 @@ i915_ttm_resource_get_st(struct drm_i915_gem_object *obj, return intel_region_ttm_resource_to_st(obj->mm.region, res); } +static struct dma_fence *check_fence(struct dma_fence *fence) +{ + int err; + + if (!fence) + return NULL; + + if (!dma_fence_is_signaled(fence)) + return fence; + + err = fence->error; + dma_fence_put(fence); + return ERR_PTR(err); +} + +static struct dma_fence *prev_fence(struct ttm_buffer_object *bo) +{ + struct dma_fence *fence; + + /* + * We need to wait for a previous move or a previous write + * operation before starting the migration copy. + * For read operations, it's sufficient that we sync *after* + * the migration copy. Currently we sync when we unbind vmas, so + * we don't have to bother about that. Yet. + */ + fence = check_fence(dma_resv_get_excl_unlocked(bo->base.resv)); + if (fence) + return fence; + + return check_fence(dma_fence_get(bo->moving)); +} + static int i915_ttm_accel_move(struct ttm_buffer_object *bo, struct ttm_resource *dst_mem, - struct sg_table *dst_st) + struct sg_table *dst_st, + bool evict) { struct drm_i915_private *i915 = container_of(bo->bdev, typeof(*i915), bdev); @@ -449,40 +492,65 @@ static int i915_ttm_accel_move(struct ttm_buffer_object *bo, dst_level = i915_ttm_cache_level(i915, dst_mem, ttm); if (!ttm || !ttm_tt_is_populated(ttm)) { + struct dma_fence *fence; + if (bo->type == ttm_bo_type_kernel) return -EINVAL; if (ttm && !(ttm->page_flags & TTM_PAGE_FLAG_ZERO_ALLOC)) return 0; + fence = prev_fence(bo); + if (IS_ERR(fence)) + return PTR_ERR(fence); intel_engine_pm_get(i915->gt.migrate.context->engine); - ret = intel_context_migrate_clear(i915->gt.migrate.context, NULL, + ret = intel_context_migrate_clear(i915->gt.migrate.context, + fence, dst_st->sgl, dst_level, gpu_binds_iomem(dst_mem), 0, &rq); - - if (!ret && rq) { + intel_engine_pm_put(i915->gt.migrate.context->engine); + dma_fence_put(fence); + if (ret && rq) { i915_request_wait(rq, 0, MAX_SCHEDULE_TIMEOUT); i915_request_put(rq); + rq = NULL; } - intel_engine_pm_put(i915->gt.migrate.context->engine); } else { + struct dma_fence *fence; + src_st = src_man->use_tt ? i915_ttm_tt_get_st(ttm) : obj->ttm.cached_io_st; src_level = i915_ttm_cache_level(i915, bo->resource, ttm); + fence = prev_fence(bo); + if (IS_ERR(fence)) + return PTR_ERR(fence); intel_engine_pm_get(i915->gt.migrate.context->engine); ret = intel_context_migrate_copy(i915->gt.migrate.context, - NULL, src_st->sgl, src_level, + fence, src_st->sgl, src_level, gpu_binds_iomem(bo->resource), dst_st->sgl, dst_level, gpu_binds_iomem(dst_mem), &rq); - if (!ret && rq) { + intel_engine_pm_put(i915->gt.migrate.context->engine); + dma_fence_put(fence); + if (ret && rq) { i915_request_wait(rq, 0, MAX_SCHEDULE_TIMEOUT); i915_request_put(rq); + rq = NULL; } - intel_engine_pm_put(i915->gt.migrate.context->engine); + } + + if (rq) { + GEM_BUG_ON(ret); + ret = ttm_bo_move_accel_cleanup(bo, &rq->fence, evict, true, + dst_mem); + if (ret) { + i915_request_wait(rq, 0, MAX_SCHEDULE_TIMEOUT); + ret = rq->fence.error; + } + i915_request_put(rq); } return ret; @@ -509,11 +577,6 @@ static int i915_ttm_move(struct ttm_buffer_object *bo, bool evict, src_reg = i915_ttm_region(bo->bdev, bo->resource->mem_type); GEM_BUG_ON(!dst_reg || !src_reg); - /* Sync for now. We could do the actual copy async. */ - ret = ttm_bo_wait_ctx(bo, ctx); - if (ret) - return ret; - ret = i915_ttm_move_notify(bo); if (ret) return ret; @@ -536,7 +599,7 @@ static int i915_ttm_move(struct ttm_buffer_object *bo, bool evict, if (IS_ERR(dst_st)) return PTR_ERR(dst_st); - ret = i915_ttm_accel_move(bo, dst_mem, dst_st); + ret = i915_ttm_accel_move(bo, dst_mem, dst_st, evict); if (ret) { /* If we start mapping GGTT, we can no longer use man::use_tt here. */ dst_iter = !cpu_maps_iomem(dst_mem) ? @@ -551,9 +614,9 @@ static int i915_ttm_move(struct ttm_buffer_object *bo, bool evict, src_reg->region.start); ttm_move_memcpy(bo, dst_mem->num_pages, dst_iter, src_iter); + ttm_bo_move_sync_cleanup(bo, dst_mem); } /* Below dst_mem becomes bo->resource. */ - ttm_bo_move_sync_cleanup(bo, dst_mem); i915_ttm_adjust_domains_after_move(obj); i915_ttm_free_cached_io_st(obj); @@ -823,8 +886,14 @@ void i915_ttm_bo_destroy(struct ttm_buffer_object *bo) i915_gem_object_release_memory_region(obj); mutex_destroy(&obj->ttm.get_io_page.lock); - if (obj->ttm.created) + if (obj->ttm.created) { + /* + * Once we detach the vmas from the object, we can call + * this __i915_gem_free_object() earlier. + */ + __i915_gem_free_object(obj); call_rcu(&obj->rcu, __i915_gem_free_object_rcu); + } } /** @@ -894,7 +963,7 @@ i915_gem_ttm_system_setup(struct drm_i915_private *i915, mr = intel_memory_region_create(i915, 0, totalram_pages() << PAGE_SHIFT, - PAGE_SIZE, 0, + PAGE_SIZE, 1, type, instance, &ttm_system_region_ops); if (IS_ERR(mr)) diff --git a/drivers/gpu/drm/i915/gem/i915_gem_ttm.h b/drivers/gpu/drm/i915/gem/i915_gem_ttm.h index b8d3dcbb50df..eafac2d74d79 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_ttm.h +++ b/drivers/gpu/drm/i915/gem/i915_gem_ttm.h @@ -35,7 +35,8 @@ void i915_ttm_bo_destroy(struct ttm_buffer_object *bo); static inline struct drm_i915_gem_object * i915_ttm_to_gem(struct ttm_buffer_object *bo) { - if (GEM_WARN_ON(bo->destroy != i915_ttm_bo_destroy)) + /* Foreign TTM object (typically ghost object */ + if (bo->destroy != i915_ttm_bo_destroy) return NULL; return container_of(bo, struct drm_i915_gem_object, __do_not_access); |