1 files changed, 340 insertions, 275 deletions
diff --git a/src/gallium/drivers/iris/iris_batch.c b/src/gallium/drivers/iris/iris_batch.c
index d21e2ec27c5..354cccbf8be 100644
--- a/src/gallium/drivers/iris/iris_batch.c
+++ b/src/gallium/drivers/iris/iris_batch.c
@@ -41,20 +41,25 @@
 #include "iris_bufmgr.h"
 #include "iris_context.h"
 #include "iris_fence.h"
-
-#include "drm-uapi/i915_drm.h"
+#include "iris_kmd_backend.h"
+#include "iris_utrace.h"
+#include "i915/iris_batch.h"
+#include "xe/iris_batch.h"
 
 #include "common/intel_aux_map.h"
 #include "intel/common/intel_gem.h"
+#include "intel/compiler/brw_compiler.h"
+#include "intel/compiler/elk/elk_compiler.h"
+#include "intel/ds/intel_tracepoints.h"
 #include "util/hash_table.h"
+#include "util/u_debug.h"
 #include "util/set.h"
 #include "util/u_upload_mgr.h"
-#include "main/macros.h"
 
 #include <errno.h>
 #include <xf86drm.h>
 
-#if HAVE_VALGRIND
+#ifdef HAVE_VALGRIND
 #include <valgrind.h>
 #include <memcheck.h>
 #define VG(x) x
@@ -67,27 +72,26 @@
 static void
 iris_batch_reset(struct iris_batch *batch);
 
-static unsigned
-num_fences(struct iris_batch *batch)
+unsigned
+iris_batch_num_fences(struct iris_batch *batch)
 {
    return util_dynarray_num_elements(&batch->exec_fences,
-                                     struct drm_i915_gem_exec_fence);
+                                     struct iris_batch_fence);
 }
 
 /**
  * Debugging code to dump the fence list, used by INTEL_DEBUG=submit.
  */
-static void
-dump_fence_list(struct iris_batch *batch)
+void
+iris_dump_fence_list(struct iris_batch *batch)
 {
-   fprintf(stderr, "Fence list (length %u):      ", num_fences(batch));
+   fprintf(stderr, "Fence list (length %u):      ", iris_batch_num_fences(batch));
 
-   util_dynarray_foreach(&batch->exec_fences,
-                         struct drm_i915_gem_exec_fence, f) {
+   util_dynarray_foreach(&batch->exec_fences, struct iris_batch_fence, f) {
       fprintf(stderr, "%s%u%s ",
-              (f->flags & I915_EXEC_FENCE_WAIT) ? "..." : "",
+              (f->flags & IRIS_BATCH_FENCE_WAIT) ? "..." : "",
               f->handle,
-              (f->flags & I915_EXEC_FENCE_SIGNAL) ? "!" : "");
+              (f->flags & IRIS_BATCH_FENCE_SIGNAL) ? "!" : "");
    }
 
    fprintf(stderr, "\n");
@@ -96,23 +100,30 @@ dump_fence_list(struct iris_batch *batch)
 /**
  * Debugging code to dump the validation list, used by INTEL_DEBUG=submit.
  */
-static void
-dump_validation_list(struct iris_batch *batch,
-                     struct drm_i915_gem_exec_object2 *validation_list)
+void
+iris_dump_bo_list(struct iris_batch *batch)
 {
-   fprintf(stderr, "Validation list (length %d):\n", batch->exec_count);
+   fprintf(stderr, "BO list (length %d):\n", batch->exec_count);
 
    for (int i = 0; i < batch->exec_count; i++) {
-      uint64_t flags = validation_list[i].flags;
-      assert(validation_list[i].handle == batch->exec_bos[i]->gem_handle);
-      fprintf(stderr, "[%2d]: %2d %-14s @ 0x%"PRIx64" (%"PRIu64"B)\t %2d refs %s\n",
+      struct iris_bo *bo = batch->exec_bos[i];
+      struct iris_bo *backing = iris_get_backing_bo(bo);
+      bool written = BITSET_TEST(batch->bos_written, i);
+      bool exported = iris_bo_is_exported(bo);
+      bool imported = iris_bo_is_imported(bo);
+
+      fprintf(stderr, "[%2d]: %3d (%3d) %-14s @ 0x%016"PRIx64" (%-15s %8"PRIu64"B) %2d refs %s%s%s\n",
               i,
-              validation_list[i].handle,
-              batch->exec_bos[i]->name,
-              (uint64_t)validation_list[i].offset,
-              batch->exec_bos[i]->size,
-              batch->exec_bos[i]->refcount,
-              (flags & EXEC_OBJECT_WRITE) ? " (write)" : "");
+              bo->gem_handle,
+              backing->gem_handle,
+              bo->name,
+              bo->address,
+              iris_heap_to_string[backing->real.heap],
+              bo->size,
+              bo->refcount,
+              written ? " write" : "",
+              exported ? " exported" : "",
+              imported ? " imported" : "");
    }
 }
 
@@ -132,10 +143,13 @@ decode_get_bo(void *v_batch, bool ppgtt, uint64_t address)
       uint64_t bo_address = bo->address & (~0ull >> 16);
 
       if (address >= bo_address && address < bo_address + bo->size) {
+         if (bo->real.mmap_mode == IRIS_MMAP_NONE)
+            return (struct intel_batch_decode_bo) { };
+
          return (struct intel_batch_decode_bo) {
             .addr = bo_address,
             .size = bo->size,
-            .map = iris_bo_map(batch->dbg, bo, MAP_READ),
+            .map = iris_bo_map(batch->dbg, bo, MAP_READ | MAP_ASYNC),
          };
       }
    }
@@ -158,28 +172,34 @@ decode_get_state_size(void *v_batch,
 /**
  * Decode the current batch.
  */
-static void
-decode_batch(struct iris_batch *batch)
+void
+iris_batch_decode_batch(struct iris_batch *batch)
 {
    void *map = iris_bo_map(batch->dbg, batch->exec_bos[0], MAP_READ);
    intel_print_batch(&batch->decoder, map, batch->primary_batch_size,
                      batch->exec_bos[0]->address, false);
 }
 
-void
+static void
 iris_init_batch(struct iris_context *ice,
-                enum iris_batch_name name,
-                int priority)
+                enum iris_batch_name name)
 {
    struct iris_batch *batch = &ice->batches[name];
    struct iris_screen *screen = (void *) ice->ctx.screen;
 
-   batch->screen = screen;
+   /* Note: screen, ctx_id, exec_flags and has_engines_context fields are
+    * initialized at an earlier phase when contexts are created.
+    *
+    * See iris_init_batches(), which calls either iris_init_engines_context()
+    * or iris_init_non_engine_contexts().
+    */
+
    batch->dbg = &ice->dbg;
    batch->reset = &ice->reset;
    batch->state_sizes = ice->state.sizes;
    batch->name = name;
    batch->ice = ice;
+   batch->screen = screen;
    batch->contains_fence_signal = false;
 
    batch->fine_fences.uploader =
@@ -187,56 +207,89 @@ iris_init_batch(struct iris_context *ice,
                       PIPE_USAGE_STAGING, 0);
    iris_fine_fence_init(batch);
 
-   batch->hw_ctx_id = iris_create_hw_context(screen->bufmgr);
-   assert(batch->hw_ctx_id);
-
-   iris_hw_context_set_priority(screen->bufmgr, batch->hw_ctx_id, priority);
-
    util_dynarray_init(&batch->exec_fences, ralloc_context(NULL));
    util_dynarray_init(&batch->syncobjs, ralloc_context(NULL));
 
    batch->exec_count = 0;
+   batch->max_gem_handle = 0;
    batch->exec_array_size = 128;
    batch->exec_bos =
       malloc(batch->exec_array_size * sizeof(batch->exec_bos[0]));
    batch->bos_written =
       rzalloc_array(NULL, BITSET_WORD, BITSET_WORDS(batch->exec_array_size));
 
-   batch->cache.render = _mesa_hash_table_create(NULL, _mesa_hash_pointer,
+   batch->bo_aux_modes = _mesa_hash_table_create(NULL, _mesa_hash_pointer,
                                                  _mesa_key_pointer_equal);
 
+   batch->num_other_batches = 0;
    memset(batch->other_batches, 0, sizeof(batch->other_batches));
 
-   for (int i = 0, j = 0; i < IRIS_BATCH_COUNT; i++) {
-      if (i != name)
-         batch->other_batches[j++] = &ice->batches[i];
+   iris_foreach_batch(ice, other_batch) {
+      if (batch != other_batch)
+         batch->other_batches[batch->num_other_batches++] = other_batch;
    }
 
-   if (INTEL_DEBUG) {
-      const unsigned decode_flags =
-         INTEL_BATCH_DECODE_FULL |
-         ((INTEL_DEBUG & DEBUG_COLOR) ? INTEL_BATCH_DECODE_IN_COLOR : 0) |
-         INTEL_BATCH_DECODE_OFFSETS |
-         INTEL_BATCH_DECODE_FLOATS;
-
-      intel_batch_decode_ctx_init(&batch->decoder, &screen->devinfo,
-                                  stderr, decode_flags, NULL,
-                                  decode_get_bo, decode_get_state_size, batch);
+   if (INTEL_DEBUG(DEBUG_BATCH | DEBUG_BATCH_STATS)) {
+      const unsigned decode_flags = INTEL_BATCH_DECODE_DEFAULT_FLAGS |
+         (INTEL_DEBUG(DEBUG_COLOR) ? INTEL_BATCH_DECODE_IN_COLOR : 0);
+
+      if (screen->brw) {
+         intel_batch_decode_ctx_init_brw(&batch->decoder, &screen->brw->isa,
+                                         screen->devinfo,
+                                         stderr, decode_flags, NULL,
+                                         decode_get_bo, decode_get_state_size, batch);
+      } else {
+         assert(screen->elk);
+         intel_batch_decode_ctx_init_elk(&batch->decoder, &screen->elk->isa,
+                                         screen->devinfo,
+                                         stderr, decode_flags, NULL,
+                                         decode_get_bo, decode_get_state_size, batch);
+      }
       batch->decoder.dynamic_base = IRIS_MEMZONE_DYNAMIC_START;
       batch->decoder.instruction_base = IRIS_MEMZONE_SHADER_START;
+      batch->decoder.surface_base = IRIS_MEMZONE_BINDER_START;
       batch->decoder.max_vbo_decoded_lines = 32;
+      if (batch->name == IRIS_BATCH_BLITTER)
+         batch->decoder.engine = INTEL_ENGINE_CLASS_COPY;
    }
 
    iris_init_batch_measure(ice, batch);
 
+   u_trace_init(&batch->trace, &ice->ds.trace_context);
+
    iris_batch_reset(batch);
 }
 
+void
+iris_init_batches(struct iris_context *ice)
+{
+   struct iris_screen *screen = (struct iris_screen *)ice->ctx.screen;
+   struct iris_bufmgr *bufmgr = screen->bufmgr;
+   const struct intel_device_info *devinfo = iris_bufmgr_get_device_info(bufmgr);
+
+   switch (devinfo->kmd_type) {
+   case INTEL_KMD_TYPE_I915:
+      iris_i915_init_batches(ice);
+      break;
+   case INTEL_KMD_TYPE_XE:
+      iris_xe_init_batches(ice);
+      break;
+   default:
+      unreachable("missing");
+   }
+
+   iris_foreach_batch(ice, batch)
+      iris_init_batch(ice, batch - &ice->batches[0]);
+}
+
 static int
 find_exec_index(struct iris_batch *batch, struct iris_bo *bo)
 {
    unsigned index = READ_ONCE(bo->index);
 
+   if (index == -1)
+      return -1;
+
    if (index < batch->exec_count && batch->exec_bos[index] == bo)
       return index;
 
@@ -281,6 +334,45 @@ add_bo_to_batch(struct iris_batch *batch, struct iris_bo *bo, bool writable)
    bo->index = batch->exec_count;
    batch->exec_count++;
    batch->aperture_space += bo->size;
+
+   batch->max_gem_handle =
+      MAX2(batch->max_gem_handle, iris_get_backing_bo(bo)->gem_handle);
+}
+
+static void
+flush_for_cross_batch_dependencies(struct iris_batch *batch,
+                                   struct iris_bo *bo,
+                                   bool writable)
+{
+   if (batch->measure && bo == batch->measure->bo)
+      return;
+
+   /* When a batch uses a buffer for the first time, or newly writes a buffer
+    * it had already referenced, we may need to flush other batches in order
+    * to correctly synchronize them.
+    */
+   for (int b = 0; b < batch->num_other_batches; b++) {
+      struct iris_batch *other_batch = batch->other_batches[b];
+      int other_index = find_exec_index(other_batch, bo);
+
+      /* If the buffer is referenced by another batch, and either batch
+       * intends to write it, then flush the other batch and synchronize.
+       *
+       * Consider these cases:
+       *
+       * 1. They read, we read   =>  No synchronization required.
+       * 2. They read, we write  =>  Synchronize (they need the old value)
+       * 3. They write, we read  =>  Synchronize (we need their new value)
+       * 4. They write, we write =>  Synchronize (order writes)
+       *
+       * The read/read case is very common, as multiple batches usually
+       * share a streaming state buffer or shader assembly buffer, and
+       * we want to avoid synchronizing in this case.
+       */
+      if (other_index != -1 &&
+          (writable || BITSET_TEST(other_batch->bos_written, other_index)))
+         iris_batch_flush(other_batch);
+   }
 }
 
 /**
@@ -294,7 +386,6 @@ iris_use_pinned_bo(struct iris_batch *batch,
                    struct iris_bo *bo,
                    bool writable, enum iris_domain access)
 {
-   assert(bo->kflags & EXEC_OBJECT_PINNED);
    assert(bo != batch->bo);
 
    /* Never mark the workaround BO with EXEC_OBJECT_WRITE.  We don't care
@@ -313,44 +404,17 @@ iris_use_pinned_bo(struct iris_batch *batch,
 
    int existing_index = find_exec_index(batch, bo);
 
-   if (existing_index != -1) {
-      /* The BO is already in the list; mark it writable */
-      if (writable)
-         BITSET_SET(batch->bos_written, existing_index);
+   if (existing_index == -1) {
+      flush_for_cross_batch_dependencies(batch, bo, writable);
 
-      return;
-   }
+      ensure_exec_obj_space(batch, 1);
+      add_bo_to_batch(batch, bo, writable);
+   } else if (writable && !BITSET_TEST(batch->bos_written, existing_index)) {
+      flush_for_cross_batch_dependencies(batch, bo, writable);
 
-   if (!batch->measure || bo != batch->measure->bo) {
-      /* This is the first time our batch has seen this BO.  Before we use it,
-       * we may need to flush and synchronize with other batches.
-       */
-      for (int b = 0; b < ARRAY_SIZE(batch->other_batches); b++) {
-         struct iris_batch *other_batch = batch->other_batches[b];
-         int other_index = find_exec_index(other_batch, bo);
-
-         /* If the buffer is referenced by another batch, and either batch
-          * intends to write it, then flush the other batch and synchronize.
-          *
-          * Consider these cases:
-          *
-          * 1. They read, we read   =>  No synchronization required.
-          * 2. They read, we write  =>  Synchronize (they need the old value)
-          * 3. They write, we read  =>  Synchronize (we need their new value)
-          * 4. They write, we write =>  Synchronize (order writes)
-          *
-          * The read/read case is very common, as multiple batches usually
-          * share a streaming state buffer or shader assembly buffer, and
-          * we want to avoid synchronizing in this case.
-          */
-         if (other_index != -1 &&
-             (writable || BITSET_TEST(other_batch->bos_written, other_index)))
-            iris_batch_flush(other_batch);
-      }
+      /* The BO is already in the list; mark it writable */
+      BITSET_SET(batch->bos_written, existing_index);
    }
-
-   ensure_exec_obj_space(batch, 1);
-   add_bo_to_batch(batch, bo, writable);
 }
 
 static void
@@ -359,10 +423,11 @@ create_batch(struct iris_batch *batch)
    struct iris_screen *screen = batch->screen;
    struct iris_bufmgr *bufmgr = screen->bufmgr;
 
+   /* TODO: We probably could suballocate batches... */
    batch->bo = iris_bo_alloc(bufmgr, "command buffer",
-                             BATCH_SZ + BATCH_RESERVED, 1,
-                             IRIS_MEMZONE_OTHER, 0);
-   batch->bo->kflags |= EXEC_OBJECT_CAPTURE;
+                             BATCH_SZ + BATCH_RESERVED, 8,
+                             IRIS_MEMZONE_OTHER,
+                             BO_ALLOC_NO_SUBALLOC | BO_ALLOC_CAPTURE);
    batch->map = iris_bo_map(NULL, batch->bo, MAP_READ | MAP_WRITE);
    batch->map_next = batch->map;
 
@@ -393,19 +458,28 @@ iris_batch_reset(struct iris_batch *batch)
 {
    struct iris_screen *screen = batch->screen;
    struct iris_bufmgr *bufmgr = screen->bufmgr;
+   const struct intel_device_info *devinfo = screen->devinfo;
+
+   u_trace_fini(&batch->trace);
 
    iris_bo_unreference(batch->bo);
    batch->primary_batch_size = 0;
    batch->total_chained_batch_size = 0;
    batch->contains_draw = false;
    batch->contains_fence_signal = false;
-   batch->decoder.surface_base = batch->last_surface_base_address;
+   if (devinfo->ver < 11)
+      batch->decoder.surface_base = batch->last_binder_address;
+   else
+      batch->decoder.bt_pool_base = batch->last_binder_address;
 
    create_batch(batch);
    assert(batch->bo->index == 0);
 
+   memset(batch->bos_written, 0,
+          sizeof(BITSET_WORD) * BITSET_WORDS(batch->exec_array_size));
+
    struct iris_syncobj *syncobj = iris_create_syncobj(bufmgr);
-   iris_batch_add_syncobj(batch, syncobj, I915_EXEC_FENCE_SIGNAL);
+   iris_batch_add_syncobj(batch, syncobj, IRIS_BATCH_FENCE_SIGNAL);
    iris_syncobj_reference(bufmgr, &syncobj, NULL);
 
    assert(!batch->sync_region_depth);
@@ -418,13 +492,17 @@ iris_batch_reset(struct iris_batch *batch)
    add_bo_to_batch(batch, screen->workaround_bo, false);
 
    iris_batch_maybe_noop(batch);
+
+   u_trace_init(&batch->trace, &batch->ice->ds.trace_context);
+   batch->begin_trace_recorded = false;
 }
 
-void
-iris_batch_free(struct iris_batch *batch)
+static void
+iris_batch_free(const struct iris_context *ice, struct iris_batch *batch)
 {
    struct iris_screen *screen = batch->screen;
    struct iris_bufmgr *bufmgr = screen->bufmgr;
+   const struct intel_device_info *devinfo = iris_bufmgr_get_device_info(bufmgr);
 
    for (int i = 0; i < batch->exec_count; i++) {
       iris_bo_unreference(batch->exec_bos[i]);
@@ -448,17 +526,45 @@ iris_batch_free(struct iris_batch *batch)
    batch->map = NULL;
    batch->map_next = NULL;
 
-   iris_destroy_hw_context(bufmgr, batch->hw_ctx_id);
+   switch (devinfo->kmd_type) {
+   case INTEL_KMD_TYPE_I915:
+      iris_i915_destroy_batch(batch);
+      break;
+   case INTEL_KMD_TYPE_XE:
+      iris_xe_destroy_batch(batch);
+      break;
+   default:
+      unreachable("missing");
+   }
 
    iris_destroy_batch_measure(batch->measure);
    batch->measure = NULL;
 
-   _mesa_hash_table_destroy(batch->cache.render, NULL);
+   u_trace_fini(&batch->trace);
 
-   if (INTEL_DEBUG)
+   _mesa_hash_table_destroy(batch->bo_aux_modes, NULL);
+
+   if (INTEL_DEBUG(DEBUG_BATCH | DEBUG_BATCH_STATS))
       intel_batch_decode_ctx_finish(&batch->decoder);
 }
 
+void
+iris_destroy_batches(struct iris_context *ice)
+{
+   iris_foreach_batch(ice, batch)
+      iris_batch_free(ice, batch);
+}
+
+void iris_batch_maybe_begin_frame(struct iris_batch *batch)
+{
+   struct iris_context *ice = batch->ice;
+
+   if (ice->utrace.begin_frame != ice->frame) {
+      trace_intel_begin_frame(&batch->trace, batch);
+      ice->utrace.begin_frame = ice->utrace.end_frame = ice->frame;
+   }
+}
+
 /**
  * If we've chained to a secondary batch, or are getting near to the end,
  * then flush.  This should only be called between draws.
@@ -523,7 +629,7 @@ add_aux_map_bos_to_batch(struct iris_batch *batch)
 static void
 finish_seqno(struct iris_batch *batch)
 {
-   struct iris_fine_fence *sq = iris_fine_fence_new(batch, IRIS_FENCE_END);
+   struct iris_fine_fence *sq = iris_fine_fence_new(batch);
    if (!sq)
       return;
 
@@ -537,7 +643,7 @@ finish_seqno(struct iris_batch *batch)
 static void
 iris_finish_batch(struct iris_batch *batch)
 {
-   const struct intel_device_info *devinfo = &batch->screen->devinfo;
+   const struct intel_device_info *devinfo = batch->screen->devinfo;
 
    if (devinfo->ver == 12 && batch->name == IRIS_BATCH_RENDER) {
       /* We re-emit constants at the beginning of every batch as a hardware
@@ -555,6 +661,14 @@ iris_finish_batch(struct iris_batch *batch)
 
    finish_seqno(batch);
 
+   trace_intel_end_batch(&batch->trace, batch->name);
+
+   struct iris_context *ice = batch->ice;
+   if (ice->utrace.end_frame != ice->frame) {
+      trace_intel_end_frame(&batch->trace, batch, ice->utrace.end_frame);
+      ice->utrace.end_frame = ice->frame;
+   }
+
    /* Emit MI_BATCH_BUFFER_END to finish our batch. */
    uint32_t *map = batch->map_next;
 
@@ -569,54 +683,43 @@ iris_finish_batch(struct iris_batch *batch)
  * Replace our current GEM context with a new one (in case it got banned).
  */
 static bool
-replace_hw_ctx(struct iris_batch *batch)
+replace_kernel_ctx(struct iris_batch *batch)
 {
    struct iris_screen *screen = batch->screen;
    struct iris_bufmgr *bufmgr = screen->bufmgr;
+   const struct intel_device_info *devinfo = iris_bufmgr_get_device_info(bufmgr);
 
-   uint32_t new_ctx = iris_clone_hw_context(bufmgr, batch->hw_ctx_id);
-   if (!new_ctx)
-      return false;
-
-   iris_destroy_hw_context(bufmgr, batch->hw_ctx_id);
-   batch->hw_ctx_id = new_ctx;
+   threaded_context_unwrap_sync(&batch->ice->ctx);
 
-   /* Notify the context that state must be re-initialized. */
-   iris_lost_context_state(batch);
-
-   return true;
+   switch (devinfo->kmd_type) {
+   case INTEL_KMD_TYPE_I915:
+      return iris_i915_replace_batch(batch);
+   case INTEL_KMD_TYPE_XE:
+      return iris_xe_replace_batch(batch);
+   default:
+      unreachable("missing");
+      return false;
+   }
 }
 
 enum pipe_reset_status
 iris_batch_check_for_reset(struct iris_batch *batch)
 {
    struct iris_screen *screen = batch->screen;
+   struct iris_bufmgr *bufmgr = screen->bufmgr;
+   struct iris_context *ice = batch->ice;
+   const struct iris_kmd_backend *backend;
    enum pipe_reset_status status = PIPE_NO_RESET;
-   struct drm_i915_reset_stats stats = { .ctx_id = batch->hw_ctx_id };
 
-   if (intel_ioctl(screen->fd, DRM_IOCTL_I915_GET_RESET_STATS, &stats))
-      DBG("DRM_IOCTL_I915_GET_RESET_STATS failed: %s\n", strerror(errno));
+   /* Banned context was already signalled to application */
+   if (ice->context_reset_signaled)
+      return status;
 
-   if (stats.batch_active != 0) {
-      /* A reset was observed while a batch from this hardware context was
-       * executing.  Assume that this context was at fault.
-       */
-      status = PIPE_GUILTY_CONTEXT_RESET;
-   } else if (stats.batch_pending != 0) {
-      /* A reset was observed while a batch from this context was in progress,
-       * but the batch was not executing.  In this case, assume that the
-       * context was not at fault.
-       */
-      status = PIPE_INNOCENT_CONTEXT_RESET;
-   }
+   backend = iris_bufmgr_get_kernel_driver_backend(bufmgr);
+   status = backend->batch_check_for_reset(batch);
 
-   if (status != PIPE_NO_RESET) {
-      /* Our context is likely banned, or at least in an unknown state.
-       * Throw it away and start with a fresh context.  Ideally this may
-       * catch the problem before our next execbuf fails with -EIO.
-       */
-      replace_hw_ctx(batch);
-   }
+   if (status != PIPE_NO_RESET)
+      ice->context_reset_signaled = true;
 
    return status;
 }
@@ -624,7 +727,7 @@ iris_batch_check_for_reset(struct iris_batch *batch)
 static void
 move_syncobj_to_batch(struct iris_batch *batch,
                       struct iris_syncobj **p_syncobj,
-                      unsigned flags)
+                      uint32_t flags)
 {
    struct iris_bufmgr *bufmgr = batch->screen->bufmgr;
 
@@ -650,11 +753,15 @@ update_bo_syncobjs(struct iris_batch *batch, struct iris_bo *bo, bool write)
 {
    struct iris_screen *screen = batch->screen;
    struct iris_bufmgr *bufmgr = screen->bufmgr;
+   struct iris_context *ice = batch->ice;
+
+   simple_mtx_assert_locked(iris_bufmgr_get_bo_deps_lock(bufmgr));
 
    /* Make sure bo->deps is big enough */
    if (screen->id >= bo->deps_size) {
       int new_size = screen->id + 1;
-      bo->deps= realloc(bo->deps, new_size * sizeof(bo->deps[0]));
+      bo->deps = realloc(bo->deps, new_size * sizeof(bo->deps[0]));
+      assert(bo->deps);
       memset(&bo->deps[bo->deps_size], 0,
              sizeof(bo->deps[0]) * (new_size - bo->deps_size));
 
@@ -668,63 +775,49 @@ update_bo_syncobjs(struct iris_batch *batch, struct iris_bo *bo, bool write)
     * our code may need to care about all the operations done by every batch
     * on every screen.
     */
-   struct iris_bo_screen_deps *deps = &bo->deps[screen->id];
+   struct iris_bo_screen_deps *bo_deps = &bo->deps[screen->id];
    int batch_idx = batch->name;
 
-#if IRIS_BATCH_COUNT == 2
-   /* Due to the above, we exploit the fact that IRIS_NUM_BATCHES is actually
-    * 2, which means there's only one other batch we need to care about.
-    */
-   int other_batch_idx = 1 - batch_idx;
-#else
-   /* For IRIS_BATCH_COUNT == 3 we can do:
-    *   int other_batch_idxs[IRIS_BATCH_COUNT - 1] = {
-    *      (batch_idx ^ 1) & 1,
-    *      (batch_idx ^ 2) & 2,
-    *   };
-    * For IRIS_BATCH_COUNT == 4 we can do:
-    *   int other_batch_idxs[IRIS_BATCH_COUNT - 1] = {
-    *      (batch_idx + 1) & 3,
-    *      (batch_idx + 2) & 3,
-    *      (batch_idx + 3) & 3,
-    *   };
+   /* Make our batch depend on additional syncobjs depending on what other
+    * batches have been doing to this bo.
+    *
+    * We also look at the dependencies set by our own batch since those could
+    * have come from a different context, and apps don't like it when we don't
+    * do inter-context tracking.
     */
-#error "Implement me."
-#endif
-
-   /* If it is being written to by others, wait on it. */
-   if (deps->write_syncobjs[other_batch_idx])
-      move_syncobj_to_batch(batch, &deps->write_syncobjs[other_batch_idx],
-                            I915_EXEC_FENCE_WAIT);
+   iris_foreach_batch(ice, batch_i) {
+      unsigned i = batch_i->name;
+
+      /* If the bo is being written to by others, wait for them. */
+      if (bo_deps->write_syncobjs[i])
+         move_syncobj_to_batch(batch, &bo_deps->write_syncobjs[i],
+                               IRIS_BATCH_FENCE_WAIT);
+
+      /* If we're writing to the bo, wait on the reads from other batches. */
+      if (write)
+         move_syncobj_to_batch(batch, &bo_deps->read_syncobjs[i],
+                               IRIS_BATCH_FENCE_WAIT);
+   }
 
-   struct iris_syncobj *batch_syncobj = iris_batch_get_signal_syncobj(batch);
+   struct iris_syncobj *batch_syncobj =
+      iris_batch_get_signal_syncobj(batch);
 
+   /* Update bo_deps depending on what we're doing with the bo in this batch
+    * by putting the batch's syncobj in the bo_deps lists accordingly. Only
+    * keep track of the last time we wrote to or read the BO.
+    */
    if (write) {
-      /* If we're writing to it, set our batch's syncobj as write_syncobj so
-       * others can wait on us. Also wait every reader we care about before
-       * writing.
-       */
-      iris_syncobj_reference(bufmgr, &deps->write_syncobjs[batch_idx],
-                              batch_syncobj);
-
-      move_syncobj_to_batch(batch, &deps->read_syncobjs[other_batch_idx],
-                           I915_EXEC_FENCE_WAIT);
-
+      iris_syncobj_reference(bufmgr, &bo_deps->write_syncobjs[batch_idx],
+                             batch_syncobj);
    } else {
-      /* If we're reading, replace the other read from our batch index. */
-      iris_syncobj_reference(bufmgr, &deps->read_syncobjs[batch_idx],
+      iris_syncobj_reference(bufmgr, &bo_deps->read_syncobjs[batch_idx],
                              batch_syncobj);
    }
 }
 
-static void
-update_batch_syncobjs(struct iris_batch *batch)
+void
+iris_batch_update_syncobjs(struct iris_batch *batch)
 {
-   struct iris_bufmgr *bufmgr = batch->screen->bufmgr;
-   simple_mtx_t *bo_deps_lock = iris_bufmgr_get_bo_deps_lock(bufmgr);
-
-   simple_mtx_lock(bo_deps_lock);
-
    for (int i = 0; i < batch->exec_count; i++) {
       struct iris_bo *bo = batch->exec_bos[i];
       bool write = BITSET_TEST(batch->bos_written, i);
@@ -734,107 +827,65 @@ update_batch_syncobjs(struct iris_batch *batch)
 
       update_bo_syncobjs(batch, bo, write);
    }
-   simple_mtx_unlock(bo_deps_lock);
 }
 
 /**
- * Submit the batch to the GPU via execbuffer2.
+ * Convert the syncobj which will be signaled when this batch completes
+ * to a SYNC_FILE object, for use with import/export sync ioctls.
  */
-static int
-submit_batch(struct iris_batch *batch)
+bool
+iris_batch_syncobj_to_sync_file_fd(struct iris_batch *batch, int *out_fd)
 {
-   iris_bo_unmap(batch->bo);
+   int drm_fd = batch->screen->fd;
 
-   struct drm_i915_gem_exec_object2 *validation_list =
-      malloc(batch->exec_count * sizeof(*validation_list));
+   struct iris_syncobj *batch_syncobj =
+      iris_batch_get_signal_syncobj(batch);
 
-   for (int i = 0; i < batch->exec_count; i++) {
-      struct iris_bo *bo = batch->exec_bos[i];
-      bool written = BITSET_TEST(batch->bos_written, i);
-      unsigned extra_flags = 0;
-
-      if (written)
-         extra_flags |= EXEC_OBJECT_WRITE;
-      if (!iris_bo_is_external(bo))
-         extra_flags |= EXEC_OBJECT_ASYNC;
-
-      validation_list[i] = (struct drm_i915_gem_exec_object2) {
-         .handle = bo->gem_handle,
-         .offset = bo->address,
-         .flags  = bo->kflags | extra_flags,
-      };
-   }
-
-   if (INTEL_DEBUG & (DEBUG_BATCH | DEBUG_SUBMIT)) {
-      dump_fence_list(batch);
-      dump_validation_list(batch, validation_list);
-   }
-
-   if (INTEL_DEBUG & DEBUG_BATCH) {
-      decode_batch(batch);
-   }
-
-   /* The requirement for using I915_EXEC_NO_RELOC are:
-    *
-    *   The addresses written in the objects must match the corresponding
-    *   reloc.address which in turn must match the corresponding
-    *   execobject.offset.
-    *
-    *   Any render targets written to in the batch must be flagged with
-    *   EXEC_OBJECT_WRITE.
-    *
-    *   To avoid stalling, execobject.offset should match the current
-    *   address of that object within the active context.
-    */
-   struct drm_i915_gem_execbuffer2 execbuf = {
-      .buffers_ptr = (uintptr_t) validation_list,
-      .buffer_count = batch->exec_count,
-      .batch_start_offset = 0,
-      /* This must be QWord aligned. */
-      .batch_len = ALIGN(batch->primary_batch_size, 8),
-      .flags = I915_EXEC_RENDER |
-               I915_EXEC_NO_RELOC |
-               I915_EXEC_BATCH_FIRST |
-               I915_EXEC_HANDLE_LUT,
-      .rsvd1 = batch->hw_ctx_id, /* rsvd1 is actually the context ID */
+   struct drm_syncobj_handle syncobj_to_fd_ioctl = {
+      .handle = batch_syncobj->handle,
+      .flags = DRM_SYNCOBJ_HANDLE_TO_FD_FLAGS_EXPORT_SYNC_FILE,
+      .fd = -1,
    };
-
-   if (num_fences(batch)) {
-      execbuf.flags |= I915_EXEC_FENCE_ARRAY;
-      execbuf.num_cliprects = num_fences(batch);
-      execbuf.cliprects_ptr =
-         (uintptr_t)util_dynarray_begin(&batch->exec_fences);
-   }
-
-   int ret = 0;
-   if (!batch->screen->devinfo.no_hw &&
-       intel_ioctl(batch->screen->fd, DRM_IOCTL_I915_GEM_EXECBUFFER2, &execbuf))
-      ret = -errno;
-
-   for (int i = 0; i < batch->exec_count; i++) {
-      struct iris_bo *bo = batch->exec_bos[i];
-
-      bo->idle = false;
-      bo->index = -1;
-
-      iris_bo_unreference(bo);
+   if (intel_ioctl(drm_fd, DRM_IOCTL_SYNCOBJ_HANDLE_TO_FD,
+                   &syncobj_to_fd_ioctl)) {
+      fprintf(stderr, "DRM_IOCTL_SYNCOBJ_HANDLE_TO_FD ioctl failed (%d)\n",
+              errno);
+      return false;
    }
 
-   free(validation_list);
+   assert(syncobj_to_fd_ioctl.fd >= 0);
+   *out_fd = syncobj_to_fd_ioctl.fd;
 
-   return ret;
+   return true;
 }
 
-static const char *
-batch_name_to_string(enum iris_batch_name name)
+const char *
+iris_batch_name_to_string(enum iris_batch_name name)
 {
    const char *names[IRIS_BATCH_COUNT] = {
       [IRIS_BATCH_RENDER]  = "render",
       [IRIS_BATCH_COMPUTE] = "compute",
+      [IRIS_BATCH_BLITTER] = "blitter",
    };
    return names[name];
 }
 
+bool
+iris_batch_is_banned(struct iris_bufmgr *bufmgr, int ret)
+{
+   enum intel_kmd_type kmd_type = iris_bufmgr_get_device_info(bufmgr)->kmd_type;
+
+   assert(ret < 0);
+   /* In i915 EIO means our context is banned, while on Xe ECANCELED means
+    * our exec queue was banned
+    */
+   if ((kmd_type == INTEL_KMD_TYPE_I915 && ret == -EIO) ||
+       (kmd_type == INTEL_KMD_TYPE_XE && ret == -ECANCELED))
+      return true;
+
+   return false;
+}
+
 /**
  * Flush the batch buffer, submitting it to the GPU and resetting it so
  * we're ready to emit the next batch.
@@ -843,33 +894,39 @@ void
 _iris_batch_flush(struct iris_batch *batch, const char *file, int line)
 {
    struct iris_screen *screen = batch->screen;
+   struct iris_context *ice = batch->ice;
+   struct iris_bufmgr *bufmgr = screen->bufmgr;
 
    /* If a fence signals we need to flush it. */
    if (iris_batch_bytes_used(batch) == 0 && !batch->contains_fence_signal)
       return;
 
-   iris_measure_batch_end(batch->ice, batch);
+   iris_measure_batch_end(ice, batch);
 
    iris_finish_batch(batch);
 
-   update_batch_syncobjs(batch);
-
-   if (INTEL_DEBUG & (DEBUG_BATCH | DEBUG_SUBMIT | DEBUG_PIPE_CONTROL)) {
+   if (INTEL_DEBUG(DEBUG_BATCH | DEBUG_SUBMIT | DEBUG_PIPE_CONTROL)) {
       const char *basefile = strstr(file, "iris/");
       if (basefile)
          file = basefile + 5;
 
+      enum intel_kmd_type kmd_type = iris_bufmgr_get_device_info(bufmgr)->kmd_type;
+      uint32_t batch_ctx_id = kmd_type == INTEL_KMD_TYPE_I915 ?
+                              batch->i915.ctx_id : batch->xe.exec_queue_id;
       fprintf(stderr, "%19s:%-3d: %s batch [%u] flush with %5db (%0.1f%%) "
               "(cmds), %4d BOs (%0.1fMb aperture)\n",
-              file, line, batch_name_to_string(batch->name), batch->hw_ctx_id,
-              batch->total_chained_batch_size,
+              file, line, iris_batch_name_to_string(batch->name),
+              batch_ctx_id, batch->total_chained_batch_size,
               100.0f * batch->total_chained_batch_size / BATCH_SZ,
               batch->exec_count,
               (float) batch->aperture_space / (1024 * 1024));
 
    }
 
-   int ret = submit_batch(batch);
+   uint64_t start_ts = intel_ds_begin_submit(&batch->ds);
+   uint64_t submission_id = batch->ds.submission_id;
+   int ret = iris_bufmgr_get_kernel_driver_backend(bufmgr)->batch_submit(batch);
+   intel_ds_end_submit(&batch->ds, start_ts);
 
    /* When batch submission fails, our end-of-batch syncobj remains
     * unsignalled, and in fact is not even considered submitted.
@@ -887,6 +944,7 @@ _iris_batch_flush(struct iris_batch *batch, const char *file, int line)
       iris_syncobj_signal(screen->bufmgr, iris_batch_get_signal_syncobj(batch));
 
    batch->exec_count = 0;
+   batch->max_gem_handle = 0;
    batch->aperture_space = 0;
 
    util_dynarray_foreach(&batch->syncobjs, struct iris_syncobj *, s)
@@ -895,32 +953,39 @@ _iris_batch_flush(struct iris_batch *batch, const char *file, int line)
 
    util_dynarray_clear(&batch->exec_fences);
 
-   if (INTEL_DEBUG & DEBUG_SYNC) {
+   if (INTEL_DEBUG(DEBUG_SYNC)) {
       dbg_printf("waiting for idle\n");
       iris_bo_wait_rendering(batch->bo); /* if execbuf failed; this is a nop */
    }
 
+   if (u_trace_should_process(&ice->ds.trace_context))
+      iris_utrace_flush(batch, submission_id);
+
    /* Start a new batch buffer. */
    iris_batch_reset(batch);
 
-   /* EIO means our context is banned.  In this case, try and replace it
+   /* Check if context or engine was banned, if yes try to replace it
     * with a new logical context, and inform iris_context that all state
     * has been lost and needs to be re-initialized.  If this succeeds,
     * dubiously claim success...
-    * Also handle ENOMEM here.
     */
-   if ((ret == -EIO || ret == -ENOMEM) && replace_hw_ctx(batch)) {
+   if (ret && iris_batch_is_banned(bufmgr, ret)) {
+      enum pipe_reset_status status = iris_batch_check_for_reset(batch);
+
+      if (status != PIPE_NO_RESET || ice->context_reset_signaled)
+         replace_kernel_ctx(batch);
+
       if (batch->reset->reset) {
          /* Tell gallium frontends the device is lost and it was our fault. */
-         batch->reset->reset(batch->reset->data, PIPE_GUILTY_CONTEXT_RESET);
+         batch->reset->reset(batch->reset->data, status);
       }
 
       ret = 0;
    }
 
    if (ret < 0) {
-#ifdef DEBUG
-      const bool color = INTEL_DEBUG & DEBUG_COLOR;
+#if MESA_DEBUG
+      const bool color = INTEL_DEBUG(DEBUG_COLOR);
       fprintf(stderr, "%siris: Failed to submit batchbuffer: %-80s%s\n",
               color ? "\e[1;41m" : "", strerror(-ret), color ? "\e[0m" : "");
 #endif