diff options
Diffstat (limited to 'src/gallium/drivers/iris/iris_batch.c')
-rw-r--r-- | src/gallium/drivers/iris/iris_batch.c | 615 |
1 files changed, 340 insertions, 275 deletions
diff --git a/src/gallium/drivers/iris/iris_batch.c b/src/gallium/drivers/iris/iris_batch.c index d21e2ec27c5..354cccbf8be 100644 --- a/src/gallium/drivers/iris/iris_batch.c +++ b/src/gallium/drivers/iris/iris_batch.c @@ -41,20 +41,25 @@ #include "iris_bufmgr.h" #include "iris_context.h" #include "iris_fence.h" - -#include "drm-uapi/i915_drm.h" +#include "iris_kmd_backend.h" +#include "iris_utrace.h" +#include "i915/iris_batch.h" +#include "xe/iris_batch.h" #include "common/intel_aux_map.h" #include "intel/common/intel_gem.h" +#include "intel/compiler/brw_compiler.h" +#include "intel/compiler/elk/elk_compiler.h" +#include "intel/ds/intel_tracepoints.h" #include "util/hash_table.h" +#include "util/u_debug.h" #include "util/set.h" #include "util/u_upload_mgr.h" -#include "main/macros.h" #include <errno.h> #include <xf86drm.h> -#if HAVE_VALGRIND +#ifdef HAVE_VALGRIND #include <valgrind.h> #include <memcheck.h> #define VG(x) x @@ -67,27 +72,26 @@ static void iris_batch_reset(struct iris_batch *batch); -static unsigned -num_fences(struct iris_batch *batch) +unsigned +iris_batch_num_fences(struct iris_batch *batch) { return util_dynarray_num_elements(&batch->exec_fences, - struct drm_i915_gem_exec_fence); + struct iris_batch_fence); } /** * Debugging code to dump the fence list, used by INTEL_DEBUG=submit. */ -static void -dump_fence_list(struct iris_batch *batch) +void +iris_dump_fence_list(struct iris_batch *batch) { - fprintf(stderr, "Fence list (length %u): ", num_fences(batch)); + fprintf(stderr, "Fence list (length %u): ", iris_batch_num_fences(batch)); - util_dynarray_foreach(&batch->exec_fences, - struct drm_i915_gem_exec_fence, f) { + util_dynarray_foreach(&batch->exec_fences, struct iris_batch_fence, f) { fprintf(stderr, "%s%u%s ", - (f->flags & I915_EXEC_FENCE_WAIT) ? "..." : "", + (f->flags & IRIS_BATCH_FENCE_WAIT) ? "..." : "", f->handle, - (f->flags & I915_EXEC_FENCE_SIGNAL) ? "!" : ""); + (f->flags & IRIS_BATCH_FENCE_SIGNAL) ? "!" : ""); } fprintf(stderr, "\n"); @@ -96,23 +100,30 @@ dump_fence_list(struct iris_batch *batch) /** * Debugging code to dump the validation list, used by INTEL_DEBUG=submit. */ -static void -dump_validation_list(struct iris_batch *batch, - struct drm_i915_gem_exec_object2 *validation_list) +void +iris_dump_bo_list(struct iris_batch *batch) { - fprintf(stderr, "Validation list (length %d):\n", batch->exec_count); + fprintf(stderr, "BO list (length %d):\n", batch->exec_count); for (int i = 0; i < batch->exec_count; i++) { - uint64_t flags = validation_list[i].flags; - assert(validation_list[i].handle == batch->exec_bos[i]->gem_handle); - fprintf(stderr, "[%2d]: %2d %-14s @ 0x%"PRIx64" (%"PRIu64"B)\t %2d refs %s\n", + struct iris_bo *bo = batch->exec_bos[i]; + struct iris_bo *backing = iris_get_backing_bo(bo); + bool written = BITSET_TEST(batch->bos_written, i); + bool exported = iris_bo_is_exported(bo); + bool imported = iris_bo_is_imported(bo); + + fprintf(stderr, "[%2d]: %3d (%3d) %-14s @ 0x%016"PRIx64" (%-15s %8"PRIu64"B) %2d refs %s%s%s\n", i, - validation_list[i].handle, - batch->exec_bos[i]->name, - (uint64_t)validation_list[i].offset, - batch->exec_bos[i]->size, - batch->exec_bos[i]->refcount, - (flags & EXEC_OBJECT_WRITE) ? " (write)" : ""); + bo->gem_handle, + backing->gem_handle, + bo->name, + bo->address, + iris_heap_to_string[backing->real.heap], + bo->size, + bo->refcount, + written ? " write" : "", + exported ? " exported" : "", + imported ? " imported" : ""); } } @@ -132,10 +143,13 @@ decode_get_bo(void *v_batch, bool ppgtt, uint64_t address) uint64_t bo_address = bo->address & (~0ull >> 16); if (address >= bo_address && address < bo_address + bo->size) { + if (bo->real.mmap_mode == IRIS_MMAP_NONE) + return (struct intel_batch_decode_bo) { }; + return (struct intel_batch_decode_bo) { .addr = bo_address, .size = bo->size, - .map = iris_bo_map(batch->dbg, bo, MAP_READ), + .map = iris_bo_map(batch->dbg, bo, MAP_READ | MAP_ASYNC), }; } } @@ -158,28 +172,34 @@ decode_get_state_size(void *v_batch, /** * Decode the current batch. */ -static void -decode_batch(struct iris_batch *batch) +void +iris_batch_decode_batch(struct iris_batch *batch) { void *map = iris_bo_map(batch->dbg, batch->exec_bos[0], MAP_READ); intel_print_batch(&batch->decoder, map, batch->primary_batch_size, batch->exec_bos[0]->address, false); } -void +static void iris_init_batch(struct iris_context *ice, - enum iris_batch_name name, - int priority) + enum iris_batch_name name) { struct iris_batch *batch = &ice->batches[name]; struct iris_screen *screen = (void *) ice->ctx.screen; - batch->screen = screen; + /* Note: screen, ctx_id, exec_flags and has_engines_context fields are + * initialized at an earlier phase when contexts are created. + * + * See iris_init_batches(), which calls either iris_init_engines_context() + * or iris_init_non_engine_contexts(). + */ + batch->dbg = &ice->dbg; batch->reset = &ice->reset; batch->state_sizes = ice->state.sizes; batch->name = name; batch->ice = ice; + batch->screen = screen; batch->contains_fence_signal = false; batch->fine_fences.uploader = @@ -187,56 +207,89 @@ iris_init_batch(struct iris_context *ice, PIPE_USAGE_STAGING, 0); iris_fine_fence_init(batch); - batch->hw_ctx_id = iris_create_hw_context(screen->bufmgr); - assert(batch->hw_ctx_id); - - iris_hw_context_set_priority(screen->bufmgr, batch->hw_ctx_id, priority); - util_dynarray_init(&batch->exec_fences, ralloc_context(NULL)); util_dynarray_init(&batch->syncobjs, ralloc_context(NULL)); batch->exec_count = 0; + batch->max_gem_handle = 0; batch->exec_array_size = 128; batch->exec_bos = malloc(batch->exec_array_size * sizeof(batch->exec_bos[0])); batch->bos_written = rzalloc_array(NULL, BITSET_WORD, BITSET_WORDS(batch->exec_array_size)); - batch->cache.render = _mesa_hash_table_create(NULL, _mesa_hash_pointer, + batch->bo_aux_modes = _mesa_hash_table_create(NULL, _mesa_hash_pointer, _mesa_key_pointer_equal); + batch->num_other_batches = 0; memset(batch->other_batches, 0, sizeof(batch->other_batches)); - for (int i = 0, j = 0; i < IRIS_BATCH_COUNT; i++) { - if (i != name) - batch->other_batches[j++] = &ice->batches[i]; + iris_foreach_batch(ice, other_batch) { + if (batch != other_batch) + batch->other_batches[batch->num_other_batches++] = other_batch; } - if (INTEL_DEBUG) { - const unsigned decode_flags = - INTEL_BATCH_DECODE_FULL | - ((INTEL_DEBUG & DEBUG_COLOR) ? INTEL_BATCH_DECODE_IN_COLOR : 0) | - INTEL_BATCH_DECODE_OFFSETS | - INTEL_BATCH_DECODE_FLOATS; - - intel_batch_decode_ctx_init(&batch->decoder, &screen->devinfo, - stderr, decode_flags, NULL, - decode_get_bo, decode_get_state_size, batch); + if (INTEL_DEBUG(DEBUG_BATCH | DEBUG_BATCH_STATS)) { + const unsigned decode_flags = INTEL_BATCH_DECODE_DEFAULT_FLAGS | + (INTEL_DEBUG(DEBUG_COLOR) ? INTEL_BATCH_DECODE_IN_COLOR : 0); + + if (screen->brw) { + intel_batch_decode_ctx_init_brw(&batch->decoder, &screen->brw->isa, + screen->devinfo, + stderr, decode_flags, NULL, + decode_get_bo, decode_get_state_size, batch); + } else { + assert(screen->elk); + intel_batch_decode_ctx_init_elk(&batch->decoder, &screen->elk->isa, + screen->devinfo, + stderr, decode_flags, NULL, + decode_get_bo, decode_get_state_size, batch); + } batch->decoder.dynamic_base = IRIS_MEMZONE_DYNAMIC_START; batch->decoder.instruction_base = IRIS_MEMZONE_SHADER_START; + batch->decoder.surface_base = IRIS_MEMZONE_BINDER_START; batch->decoder.max_vbo_decoded_lines = 32; + if (batch->name == IRIS_BATCH_BLITTER) + batch->decoder.engine = INTEL_ENGINE_CLASS_COPY; } iris_init_batch_measure(ice, batch); + u_trace_init(&batch->trace, &ice->ds.trace_context); + iris_batch_reset(batch); } +void +iris_init_batches(struct iris_context *ice) +{ + struct iris_screen *screen = (struct iris_screen *)ice->ctx.screen; + struct iris_bufmgr *bufmgr = screen->bufmgr; + const struct intel_device_info *devinfo = iris_bufmgr_get_device_info(bufmgr); + + switch (devinfo->kmd_type) { + case INTEL_KMD_TYPE_I915: + iris_i915_init_batches(ice); + break; + case INTEL_KMD_TYPE_XE: + iris_xe_init_batches(ice); + break; + default: + unreachable("missing"); + } + + iris_foreach_batch(ice, batch) + iris_init_batch(ice, batch - &ice->batches[0]); +} + static int find_exec_index(struct iris_batch *batch, struct iris_bo *bo) { unsigned index = READ_ONCE(bo->index); + if (index == -1) + return -1; + if (index < batch->exec_count && batch->exec_bos[index] == bo) return index; @@ -281,6 +334,45 @@ add_bo_to_batch(struct iris_batch *batch, struct iris_bo *bo, bool writable) bo->index = batch->exec_count; batch->exec_count++; batch->aperture_space += bo->size; + + batch->max_gem_handle = + MAX2(batch->max_gem_handle, iris_get_backing_bo(bo)->gem_handle); +} + +static void +flush_for_cross_batch_dependencies(struct iris_batch *batch, + struct iris_bo *bo, + bool writable) +{ + if (batch->measure && bo == batch->measure->bo) + return; + + /* When a batch uses a buffer for the first time, or newly writes a buffer + * it had already referenced, we may need to flush other batches in order + * to correctly synchronize them. + */ + for (int b = 0; b < batch->num_other_batches; b++) { + struct iris_batch *other_batch = batch->other_batches[b]; + int other_index = find_exec_index(other_batch, bo); + + /* If the buffer is referenced by another batch, and either batch + * intends to write it, then flush the other batch and synchronize. + * + * Consider these cases: + * + * 1. They read, we read => No synchronization required. + * 2. They read, we write => Synchronize (they need the old value) + * 3. They write, we read => Synchronize (we need their new value) + * 4. They write, we write => Synchronize (order writes) + * + * The read/read case is very common, as multiple batches usually + * share a streaming state buffer or shader assembly buffer, and + * we want to avoid synchronizing in this case. + */ + if (other_index != -1 && + (writable || BITSET_TEST(other_batch->bos_written, other_index))) + iris_batch_flush(other_batch); + } } /** @@ -294,7 +386,6 @@ iris_use_pinned_bo(struct iris_batch *batch, struct iris_bo *bo, bool writable, enum iris_domain access) { - assert(bo->kflags & EXEC_OBJECT_PINNED); assert(bo != batch->bo); /* Never mark the workaround BO with EXEC_OBJECT_WRITE. We don't care @@ -313,44 +404,17 @@ iris_use_pinned_bo(struct iris_batch *batch, int existing_index = find_exec_index(batch, bo); - if (existing_index != -1) { - /* The BO is already in the list; mark it writable */ - if (writable) - BITSET_SET(batch->bos_written, existing_index); + if (existing_index == -1) { + flush_for_cross_batch_dependencies(batch, bo, writable); - return; - } + ensure_exec_obj_space(batch, 1); + add_bo_to_batch(batch, bo, writable); + } else if (writable && !BITSET_TEST(batch->bos_written, existing_index)) { + flush_for_cross_batch_dependencies(batch, bo, writable); - if (!batch->measure || bo != batch->measure->bo) { - /* This is the first time our batch has seen this BO. Before we use it, - * we may need to flush and synchronize with other batches. - */ - for (int b = 0; b < ARRAY_SIZE(batch->other_batches); b++) { - struct iris_batch *other_batch = batch->other_batches[b]; - int other_index = find_exec_index(other_batch, bo); - - /* If the buffer is referenced by another batch, and either batch - * intends to write it, then flush the other batch and synchronize. - * - * Consider these cases: - * - * 1. They read, we read => No synchronization required. - * 2. They read, we write => Synchronize (they need the old value) - * 3. They write, we read => Synchronize (we need their new value) - * 4. They write, we write => Synchronize (order writes) - * - * The read/read case is very common, as multiple batches usually - * share a streaming state buffer or shader assembly buffer, and - * we want to avoid synchronizing in this case. - */ - if (other_index != -1 && - (writable || BITSET_TEST(other_batch->bos_written, other_index))) - iris_batch_flush(other_batch); - } + /* The BO is already in the list; mark it writable */ + BITSET_SET(batch->bos_written, existing_index); } - - ensure_exec_obj_space(batch, 1); - add_bo_to_batch(batch, bo, writable); } static void @@ -359,10 +423,11 @@ create_batch(struct iris_batch *batch) struct iris_screen *screen = batch->screen; struct iris_bufmgr *bufmgr = screen->bufmgr; + /* TODO: We probably could suballocate batches... */ batch->bo = iris_bo_alloc(bufmgr, "command buffer", - BATCH_SZ + BATCH_RESERVED, 1, - IRIS_MEMZONE_OTHER, 0); - batch->bo->kflags |= EXEC_OBJECT_CAPTURE; + BATCH_SZ + BATCH_RESERVED, 8, + IRIS_MEMZONE_OTHER, + BO_ALLOC_NO_SUBALLOC | BO_ALLOC_CAPTURE); batch->map = iris_bo_map(NULL, batch->bo, MAP_READ | MAP_WRITE); batch->map_next = batch->map; @@ -393,19 +458,28 @@ iris_batch_reset(struct iris_batch *batch) { struct iris_screen *screen = batch->screen; struct iris_bufmgr *bufmgr = screen->bufmgr; + const struct intel_device_info *devinfo = screen->devinfo; + + u_trace_fini(&batch->trace); iris_bo_unreference(batch->bo); batch->primary_batch_size = 0; batch->total_chained_batch_size = 0; batch->contains_draw = false; batch->contains_fence_signal = false; - batch->decoder.surface_base = batch->last_surface_base_address; + if (devinfo->ver < 11) + batch->decoder.surface_base = batch->last_binder_address; + else + batch->decoder.bt_pool_base = batch->last_binder_address; create_batch(batch); assert(batch->bo->index == 0); + memset(batch->bos_written, 0, + sizeof(BITSET_WORD) * BITSET_WORDS(batch->exec_array_size)); + struct iris_syncobj *syncobj = iris_create_syncobj(bufmgr); - iris_batch_add_syncobj(batch, syncobj, I915_EXEC_FENCE_SIGNAL); + iris_batch_add_syncobj(batch, syncobj, IRIS_BATCH_FENCE_SIGNAL); iris_syncobj_reference(bufmgr, &syncobj, NULL); assert(!batch->sync_region_depth); @@ -418,13 +492,17 @@ iris_batch_reset(struct iris_batch *batch) add_bo_to_batch(batch, screen->workaround_bo, false); iris_batch_maybe_noop(batch); + + u_trace_init(&batch->trace, &batch->ice->ds.trace_context); + batch->begin_trace_recorded = false; } -void -iris_batch_free(struct iris_batch *batch) +static void +iris_batch_free(const struct iris_context *ice, struct iris_batch *batch) { struct iris_screen *screen = batch->screen; struct iris_bufmgr *bufmgr = screen->bufmgr; + const struct intel_device_info *devinfo = iris_bufmgr_get_device_info(bufmgr); for (int i = 0; i < batch->exec_count; i++) { iris_bo_unreference(batch->exec_bos[i]); @@ -448,17 +526,45 @@ iris_batch_free(struct iris_batch *batch) batch->map = NULL; batch->map_next = NULL; - iris_destroy_hw_context(bufmgr, batch->hw_ctx_id); + switch (devinfo->kmd_type) { + case INTEL_KMD_TYPE_I915: + iris_i915_destroy_batch(batch); + break; + case INTEL_KMD_TYPE_XE: + iris_xe_destroy_batch(batch); + break; + default: + unreachable("missing"); + } iris_destroy_batch_measure(batch->measure); batch->measure = NULL; - _mesa_hash_table_destroy(batch->cache.render, NULL); + u_trace_fini(&batch->trace); - if (INTEL_DEBUG) + _mesa_hash_table_destroy(batch->bo_aux_modes, NULL); + + if (INTEL_DEBUG(DEBUG_BATCH | DEBUG_BATCH_STATS)) intel_batch_decode_ctx_finish(&batch->decoder); } +void +iris_destroy_batches(struct iris_context *ice) +{ + iris_foreach_batch(ice, batch) + iris_batch_free(ice, batch); +} + +void iris_batch_maybe_begin_frame(struct iris_batch *batch) +{ + struct iris_context *ice = batch->ice; + + if (ice->utrace.begin_frame != ice->frame) { + trace_intel_begin_frame(&batch->trace, batch); + ice->utrace.begin_frame = ice->utrace.end_frame = ice->frame; + } +} + /** * If we've chained to a secondary batch, or are getting near to the end, * then flush. This should only be called between draws. @@ -523,7 +629,7 @@ add_aux_map_bos_to_batch(struct iris_batch *batch) static void finish_seqno(struct iris_batch *batch) { - struct iris_fine_fence *sq = iris_fine_fence_new(batch, IRIS_FENCE_END); + struct iris_fine_fence *sq = iris_fine_fence_new(batch); if (!sq) return; @@ -537,7 +643,7 @@ finish_seqno(struct iris_batch *batch) static void iris_finish_batch(struct iris_batch *batch) { - const struct intel_device_info *devinfo = &batch->screen->devinfo; + const struct intel_device_info *devinfo = batch->screen->devinfo; if (devinfo->ver == 12 && batch->name == IRIS_BATCH_RENDER) { /* We re-emit constants at the beginning of every batch as a hardware @@ -555,6 +661,14 @@ iris_finish_batch(struct iris_batch *batch) finish_seqno(batch); + trace_intel_end_batch(&batch->trace, batch->name); + + struct iris_context *ice = batch->ice; + if (ice->utrace.end_frame != ice->frame) { + trace_intel_end_frame(&batch->trace, batch, ice->utrace.end_frame); + ice->utrace.end_frame = ice->frame; + } + /* Emit MI_BATCH_BUFFER_END to finish our batch. */ uint32_t *map = batch->map_next; @@ -569,54 +683,43 @@ iris_finish_batch(struct iris_batch *batch) * Replace our current GEM context with a new one (in case it got banned). */ static bool -replace_hw_ctx(struct iris_batch *batch) +replace_kernel_ctx(struct iris_batch *batch) { struct iris_screen *screen = batch->screen; struct iris_bufmgr *bufmgr = screen->bufmgr; + const struct intel_device_info *devinfo = iris_bufmgr_get_device_info(bufmgr); - uint32_t new_ctx = iris_clone_hw_context(bufmgr, batch->hw_ctx_id); - if (!new_ctx) - return false; - - iris_destroy_hw_context(bufmgr, batch->hw_ctx_id); - batch->hw_ctx_id = new_ctx; + threaded_context_unwrap_sync(&batch->ice->ctx); - /* Notify the context that state must be re-initialized. */ - iris_lost_context_state(batch); - - return true; + switch (devinfo->kmd_type) { + case INTEL_KMD_TYPE_I915: + return iris_i915_replace_batch(batch); + case INTEL_KMD_TYPE_XE: + return iris_xe_replace_batch(batch); + default: + unreachable("missing"); + return false; + } } enum pipe_reset_status iris_batch_check_for_reset(struct iris_batch *batch) { struct iris_screen *screen = batch->screen; + struct iris_bufmgr *bufmgr = screen->bufmgr; + struct iris_context *ice = batch->ice; + const struct iris_kmd_backend *backend; enum pipe_reset_status status = PIPE_NO_RESET; - struct drm_i915_reset_stats stats = { .ctx_id = batch->hw_ctx_id }; - if (intel_ioctl(screen->fd, DRM_IOCTL_I915_GET_RESET_STATS, &stats)) - DBG("DRM_IOCTL_I915_GET_RESET_STATS failed: %s\n", strerror(errno)); + /* Banned context was already signalled to application */ + if (ice->context_reset_signaled) + return status; - if (stats.batch_active != 0) { - /* A reset was observed while a batch from this hardware context was - * executing. Assume that this context was at fault. - */ - status = PIPE_GUILTY_CONTEXT_RESET; - } else if (stats.batch_pending != 0) { - /* A reset was observed while a batch from this context was in progress, - * but the batch was not executing. In this case, assume that the - * context was not at fault. - */ - status = PIPE_INNOCENT_CONTEXT_RESET; - } + backend = iris_bufmgr_get_kernel_driver_backend(bufmgr); + status = backend->batch_check_for_reset(batch); - if (status != PIPE_NO_RESET) { - /* Our context is likely banned, or at least in an unknown state. - * Throw it away and start with a fresh context. Ideally this may - * catch the problem before our next execbuf fails with -EIO. - */ - replace_hw_ctx(batch); - } + if (status != PIPE_NO_RESET) + ice->context_reset_signaled = true; return status; } @@ -624,7 +727,7 @@ iris_batch_check_for_reset(struct iris_batch *batch) static void move_syncobj_to_batch(struct iris_batch *batch, struct iris_syncobj **p_syncobj, - unsigned flags) + uint32_t flags) { struct iris_bufmgr *bufmgr = batch->screen->bufmgr; @@ -650,11 +753,15 @@ update_bo_syncobjs(struct iris_batch *batch, struct iris_bo *bo, bool write) { struct iris_screen *screen = batch->screen; struct iris_bufmgr *bufmgr = screen->bufmgr; + struct iris_context *ice = batch->ice; + + simple_mtx_assert_locked(iris_bufmgr_get_bo_deps_lock(bufmgr)); /* Make sure bo->deps is big enough */ if (screen->id >= bo->deps_size) { int new_size = screen->id + 1; - bo->deps= realloc(bo->deps, new_size * sizeof(bo->deps[0])); + bo->deps = realloc(bo->deps, new_size * sizeof(bo->deps[0])); + assert(bo->deps); memset(&bo->deps[bo->deps_size], 0, sizeof(bo->deps[0]) * (new_size - bo->deps_size)); @@ -668,63 +775,49 @@ update_bo_syncobjs(struct iris_batch *batch, struct iris_bo *bo, bool write) * our code may need to care about all the operations done by every batch * on every screen. */ - struct iris_bo_screen_deps *deps = &bo->deps[screen->id]; + struct iris_bo_screen_deps *bo_deps = &bo->deps[screen->id]; int batch_idx = batch->name; -#if IRIS_BATCH_COUNT == 2 - /* Due to the above, we exploit the fact that IRIS_NUM_BATCHES is actually - * 2, which means there's only one other batch we need to care about. - */ - int other_batch_idx = 1 - batch_idx; -#else - /* For IRIS_BATCH_COUNT == 3 we can do: - * int other_batch_idxs[IRIS_BATCH_COUNT - 1] = { - * (batch_idx ^ 1) & 1, - * (batch_idx ^ 2) & 2, - * }; - * For IRIS_BATCH_COUNT == 4 we can do: - * int other_batch_idxs[IRIS_BATCH_COUNT - 1] = { - * (batch_idx + 1) & 3, - * (batch_idx + 2) & 3, - * (batch_idx + 3) & 3, - * }; + /* Make our batch depend on additional syncobjs depending on what other + * batches have been doing to this bo. + * + * We also look at the dependencies set by our own batch since those could + * have come from a different context, and apps don't like it when we don't + * do inter-context tracking. */ -#error "Implement me." -#endif - - /* If it is being written to by others, wait on it. */ - if (deps->write_syncobjs[other_batch_idx]) - move_syncobj_to_batch(batch, &deps->write_syncobjs[other_batch_idx], - I915_EXEC_FENCE_WAIT); + iris_foreach_batch(ice, batch_i) { + unsigned i = batch_i->name; + + /* If the bo is being written to by others, wait for them. */ + if (bo_deps->write_syncobjs[i]) + move_syncobj_to_batch(batch, &bo_deps->write_syncobjs[i], + IRIS_BATCH_FENCE_WAIT); + + /* If we're writing to the bo, wait on the reads from other batches. */ + if (write) + move_syncobj_to_batch(batch, &bo_deps->read_syncobjs[i], + IRIS_BATCH_FENCE_WAIT); + } - struct iris_syncobj *batch_syncobj = iris_batch_get_signal_syncobj(batch); + struct iris_syncobj *batch_syncobj = + iris_batch_get_signal_syncobj(batch); + /* Update bo_deps depending on what we're doing with the bo in this batch + * by putting the batch's syncobj in the bo_deps lists accordingly. Only + * keep track of the last time we wrote to or read the BO. + */ if (write) { - /* If we're writing to it, set our batch's syncobj as write_syncobj so - * others can wait on us. Also wait every reader we care about before - * writing. - */ - iris_syncobj_reference(bufmgr, &deps->write_syncobjs[batch_idx], - batch_syncobj); - - move_syncobj_to_batch(batch, &deps->read_syncobjs[other_batch_idx], - I915_EXEC_FENCE_WAIT); - + iris_syncobj_reference(bufmgr, &bo_deps->write_syncobjs[batch_idx], + batch_syncobj); } else { - /* If we're reading, replace the other read from our batch index. */ - iris_syncobj_reference(bufmgr, &deps->read_syncobjs[batch_idx], + iris_syncobj_reference(bufmgr, &bo_deps->read_syncobjs[batch_idx], batch_syncobj); } } -static void -update_batch_syncobjs(struct iris_batch *batch) +void +iris_batch_update_syncobjs(struct iris_batch *batch) { - struct iris_bufmgr *bufmgr = batch->screen->bufmgr; - simple_mtx_t *bo_deps_lock = iris_bufmgr_get_bo_deps_lock(bufmgr); - - simple_mtx_lock(bo_deps_lock); - for (int i = 0; i < batch->exec_count; i++) { struct iris_bo *bo = batch->exec_bos[i]; bool write = BITSET_TEST(batch->bos_written, i); @@ -734,107 +827,65 @@ update_batch_syncobjs(struct iris_batch *batch) update_bo_syncobjs(batch, bo, write); } - simple_mtx_unlock(bo_deps_lock); } /** - * Submit the batch to the GPU via execbuffer2. + * Convert the syncobj which will be signaled when this batch completes + * to a SYNC_FILE object, for use with import/export sync ioctls. */ -static int -submit_batch(struct iris_batch *batch) +bool +iris_batch_syncobj_to_sync_file_fd(struct iris_batch *batch, int *out_fd) { - iris_bo_unmap(batch->bo); + int drm_fd = batch->screen->fd; - struct drm_i915_gem_exec_object2 *validation_list = - malloc(batch->exec_count * sizeof(*validation_list)); + struct iris_syncobj *batch_syncobj = + iris_batch_get_signal_syncobj(batch); - for (int i = 0; i < batch->exec_count; i++) { - struct iris_bo *bo = batch->exec_bos[i]; - bool written = BITSET_TEST(batch->bos_written, i); - unsigned extra_flags = 0; - - if (written) - extra_flags |= EXEC_OBJECT_WRITE; - if (!iris_bo_is_external(bo)) - extra_flags |= EXEC_OBJECT_ASYNC; - - validation_list[i] = (struct drm_i915_gem_exec_object2) { - .handle = bo->gem_handle, - .offset = bo->address, - .flags = bo->kflags | extra_flags, - }; - } - - if (INTEL_DEBUG & (DEBUG_BATCH | DEBUG_SUBMIT)) { - dump_fence_list(batch); - dump_validation_list(batch, validation_list); - } - - if (INTEL_DEBUG & DEBUG_BATCH) { - decode_batch(batch); - } - - /* The requirement for using I915_EXEC_NO_RELOC are: - * - * The addresses written in the objects must match the corresponding - * reloc.address which in turn must match the corresponding - * execobject.offset. - * - * Any render targets written to in the batch must be flagged with - * EXEC_OBJECT_WRITE. - * - * To avoid stalling, execobject.offset should match the current - * address of that object within the active context. - */ - struct drm_i915_gem_execbuffer2 execbuf = { - .buffers_ptr = (uintptr_t) validation_list, - .buffer_count = batch->exec_count, - .batch_start_offset = 0, - /* This must be QWord aligned. */ - .batch_len = ALIGN(batch->primary_batch_size, 8), - .flags = I915_EXEC_RENDER | - I915_EXEC_NO_RELOC | - I915_EXEC_BATCH_FIRST | - I915_EXEC_HANDLE_LUT, - .rsvd1 = batch->hw_ctx_id, /* rsvd1 is actually the context ID */ + struct drm_syncobj_handle syncobj_to_fd_ioctl = { + .handle = batch_syncobj->handle, + .flags = DRM_SYNCOBJ_HANDLE_TO_FD_FLAGS_EXPORT_SYNC_FILE, + .fd = -1, }; - - if (num_fences(batch)) { - execbuf.flags |= I915_EXEC_FENCE_ARRAY; - execbuf.num_cliprects = num_fences(batch); - execbuf.cliprects_ptr = - (uintptr_t)util_dynarray_begin(&batch->exec_fences); - } - - int ret = 0; - if (!batch->screen->devinfo.no_hw && - intel_ioctl(batch->screen->fd, DRM_IOCTL_I915_GEM_EXECBUFFER2, &execbuf)) - ret = -errno; - - for (int i = 0; i < batch->exec_count; i++) { - struct iris_bo *bo = batch->exec_bos[i]; - - bo->idle = false; - bo->index = -1; - - iris_bo_unreference(bo); + if (intel_ioctl(drm_fd, DRM_IOCTL_SYNCOBJ_HANDLE_TO_FD, + &syncobj_to_fd_ioctl)) { + fprintf(stderr, "DRM_IOCTL_SYNCOBJ_HANDLE_TO_FD ioctl failed (%d)\n", + errno); + return false; } - free(validation_list); + assert(syncobj_to_fd_ioctl.fd >= 0); + *out_fd = syncobj_to_fd_ioctl.fd; - return ret; + return true; } -static const char * -batch_name_to_string(enum iris_batch_name name) +const char * +iris_batch_name_to_string(enum iris_batch_name name) { const char *names[IRIS_BATCH_COUNT] = { [IRIS_BATCH_RENDER] = "render", [IRIS_BATCH_COMPUTE] = "compute", + [IRIS_BATCH_BLITTER] = "blitter", }; return names[name]; } +bool +iris_batch_is_banned(struct iris_bufmgr *bufmgr, int ret) +{ + enum intel_kmd_type kmd_type = iris_bufmgr_get_device_info(bufmgr)->kmd_type; + + assert(ret < 0); + /* In i915 EIO means our context is banned, while on Xe ECANCELED means + * our exec queue was banned + */ + if ((kmd_type == INTEL_KMD_TYPE_I915 && ret == -EIO) || + (kmd_type == INTEL_KMD_TYPE_XE && ret == -ECANCELED)) + return true; + + return false; +} + /** * Flush the batch buffer, submitting it to the GPU and resetting it so * we're ready to emit the next batch. @@ -843,33 +894,39 @@ void _iris_batch_flush(struct iris_batch *batch, const char *file, int line) { struct iris_screen *screen = batch->screen; + struct iris_context *ice = batch->ice; + struct iris_bufmgr *bufmgr = screen->bufmgr; /* If a fence signals we need to flush it. */ if (iris_batch_bytes_used(batch) == 0 && !batch->contains_fence_signal) return; - iris_measure_batch_end(batch->ice, batch); + iris_measure_batch_end(ice, batch); iris_finish_batch(batch); - update_batch_syncobjs(batch); - - if (INTEL_DEBUG & (DEBUG_BATCH | DEBUG_SUBMIT | DEBUG_PIPE_CONTROL)) { + if (INTEL_DEBUG(DEBUG_BATCH | DEBUG_SUBMIT | DEBUG_PIPE_CONTROL)) { const char *basefile = strstr(file, "iris/"); if (basefile) file = basefile + 5; + enum intel_kmd_type kmd_type = iris_bufmgr_get_device_info(bufmgr)->kmd_type; + uint32_t batch_ctx_id = kmd_type == INTEL_KMD_TYPE_I915 ? + batch->i915.ctx_id : batch->xe.exec_queue_id; fprintf(stderr, "%19s:%-3d: %s batch [%u] flush with %5db (%0.1f%%) " "(cmds), %4d BOs (%0.1fMb aperture)\n", - file, line, batch_name_to_string(batch->name), batch->hw_ctx_id, - batch->total_chained_batch_size, + file, line, iris_batch_name_to_string(batch->name), + batch_ctx_id, batch->total_chained_batch_size, 100.0f * batch->total_chained_batch_size / BATCH_SZ, batch->exec_count, (float) batch->aperture_space / (1024 * 1024)); } - int ret = submit_batch(batch); + uint64_t start_ts = intel_ds_begin_submit(&batch->ds); + uint64_t submission_id = batch->ds.submission_id; + int ret = iris_bufmgr_get_kernel_driver_backend(bufmgr)->batch_submit(batch); + intel_ds_end_submit(&batch->ds, start_ts); /* When batch submission fails, our end-of-batch syncobj remains * unsignalled, and in fact is not even considered submitted. @@ -887,6 +944,7 @@ _iris_batch_flush(struct iris_batch *batch, const char *file, int line) iris_syncobj_signal(screen->bufmgr, iris_batch_get_signal_syncobj(batch)); batch->exec_count = 0; + batch->max_gem_handle = 0; batch->aperture_space = 0; util_dynarray_foreach(&batch->syncobjs, struct iris_syncobj *, s) @@ -895,32 +953,39 @@ _iris_batch_flush(struct iris_batch *batch, const char *file, int line) util_dynarray_clear(&batch->exec_fences); - if (INTEL_DEBUG & DEBUG_SYNC) { + if (INTEL_DEBUG(DEBUG_SYNC)) { dbg_printf("waiting for idle\n"); iris_bo_wait_rendering(batch->bo); /* if execbuf failed; this is a nop */ } + if (u_trace_should_process(&ice->ds.trace_context)) + iris_utrace_flush(batch, submission_id); + /* Start a new batch buffer. */ iris_batch_reset(batch); - /* EIO means our context is banned. In this case, try and replace it + /* Check if context or engine was banned, if yes try to replace it * with a new logical context, and inform iris_context that all state * has been lost and needs to be re-initialized. If this succeeds, * dubiously claim success... - * Also handle ENOMEM here. */ - if ((ret == -EIO || ret == -ENOMEM) && replace_hw_ctx(batch)) { + if (ret && iris_batch_is_banned(bufmgr, ret)) { + enum pipe_reset_status status = iris_batch_check_for_reset(batch); + + if (status != PIPE_NO_RESET || ice->context_reset_signaled) + replace_kernel_ctx(batch); + if (batch->reset->reset) { /* Tell gallium frontends the device is lost and it was our fault. */ - batch->reset->reset(batch->reset->data, PIPE_GUILTY_CONTEXT_RESET); + batch->reset->reset(batch->reset->data, status); } ret = 0; } if (ret < 0) { -#ifdef DEBUG - const bool color = INTEL_DEBUG & DEBUG_COLOR; +#if MESA_DEBUG + const bool color = INTEL_DEBUG(DEBUG_COLOR); fprintf(stderr, "%siris: Failed to submit batchbuffer: %-80s%s\n", color ? "\e[1;41m" : "", strerror(-ret), color ? "\e[0m" : ""); #endif |