1 files changed, 143 insertions, 57 deletions
diff --git a/src/gallium/drivers/freedreno/freedreno_batch.h b/src/gallium/drivers/freedreno/freedreno_batch.h
index 6123016f759..b419d8ec690 100644
--- a/src/gallium/drivers/freedreno/freedreno_batch.h
+++ b/src/gallium/drivers/freedreno/freedreno_batch.h
@@ -28,7 +28,6 @@
 #define FREEDRENO_BATCH_H_
 
 #include "util/list.h"
-#include "util/set.h"
 #include "util/simple_mtx.h"
 #include "util/u_inlines.h"
 #include "util/u_queue.h"
@@ -46,7 +45,53 @@ struct fd_resource;
 struct fd_batch_key;
 struct fd_batch_result;
 
-/* A batch tracks everything about a cmdstream batch/submit, including the
+/**
+ * A subpass is a fragment of a batch potentially starting with a clear.
+ * If the app does a mid-batch clear, that clear and subsequent draws
+ * can be split out into another sub-pass.  At gmem time, the appropriate
+ * sysmem or gmem clears can be interleaved with the CP_INDIRECT_BUFFER
+ * to the subpass's draw cmdstream.
+ *
+ * For depth clears, a replacement LRZ buffer can be allocated (clear
+ * still inserted into the prologue cmdstream since it needs be executed
+ * even in sysmem or if we aren't binning, since later batches could
+ * depend in the LRZ state).  The alternative would be to invalidate
+ * LRZ for draws after the start of the new subpass.
+ */
+struct fd_batch_subpass {
+   struct list_head node;
+
+   /** draw pass cmdstream: */
+   struct fd_ringbuffer *draw;
+
+   /** for the gmem code to stash per tile per subpass clears */
+   struct fd_ringbuffer *subpass_clears;
+
+   BITMASK_ENUM(fd_buffer_mask) fast_cleared;
+
+   union pipe_color_union clear_color[MAX_RENDER_TARGETS];
+   double clear_depth;
+   unsigned clear_stencil;
+
+   /**
+    * The number of draws emitted to this subpass.  If it is greater than
+    * zero, a clear triggers creating a new subpass (because clears must
+    * always come at the start of a subpass).
+    */
+   unsigned num_draws;
+
+   /**
+    * If a subpass starts with a LRZ clear, it gets a new LRZ buffer.
+    * The fd_resource::lrz always tracks the current lrz buffer, but at
+    * binning/gmem time we need to know what was the current lrz buffer
+    * at the time draws were emitted to the subpass.  Which is tracked
+    * here.
+    */
+   struct fd_bo *lrz;
+};
+
+/**
+ * A batch tracks everything about a cmdstream batch/submit, including the
  * ringbuffers used for binning, draw, and gmem cmds, list of associated
  * fd_resource-s, etc.
  */
@@ -71,20 +116,13 @@ struct fd_batch {
     * restore.  Masks of PIPE_CLEAR_*
     *
     * The 'cleared' bits will be set for buffers which are *entirely*
-    * cleared, and 'partial_cleared' bits will be set if you must
-    * check cleared_scissor.
+    * cleared.
     *
     * The 'invalidated' bits are set for cleared buffers, and buffers
     * where the contents are undefined, ie. what we don't need to restore
     * to gmem.
     */
-   enum {
-      /* align bitmask values w/ PIPE_CLEAR_*.. since that is convenient.. */
-      FD_BUFFER_COLOR = PIPE_CLEAR_COLOR,
-      FD_BUFFER_DEPTH = PIPE_CLEAR_DEPTH,
-      FD_BUFFER_STENCIL = PIPE_CLEAR_STENCIL,
-      FD_BUFFER_ALL = FD_BUFFER_COLOR | FD_BUFFER_DEPTH | FD_BUFFER_STENCIL,
-   } invalidated, cleared, fast_cleared, restore, resolve;
+   BITMASK_ENUM(fd_buffer_mask) invalidated, cleared, restore, resolve;
 
    /* is this a non-draw batch (ie compute/blit which has no pfb state)? */
    bool nondraw : 1;
@@ -103,13 +141,16 @@ struct fd_batch {
     * color_logic_Op (since those functions are disabled when by-
     * passing GMEM.
     */
-   enum fd_gmem_reason gmem_reason;
+   BITMASK_ENUM(fd_gmem_reason) gmem_reason;
 
    /* At submit time, once we've decided that this batch will use GMEM
     * rendering, the appropriate gmem state is looked up:
     */
    const struct fd_gmem_stateobj *gmem_state;
 
+   /* Driver specific barrier/flush flags: */
+   unsigned barrier;
+
    /* A calculated "draw cost" value for the batch, which tries to
     * estimate the bandwidth-per-sample of all the draws according
     * to:
@@ -157,6 +198,9 @@ struct fd_batch {
    /* Track the maximal bounds of the scissor of all the draws within a
     * batch.  Used at the tile rendering step (fd_gmem_render_tiles(),
     * mem2gmem/gmem2mem) to avoid needlessly moving data in/out of gmem.
+    *
+    * Note that unlike gallium state, maxx/maxy are inclusive (for
+    * fully covered 512x512 the scissor would be 0,0+511,511)
     */
    struct pipe_scissor_state max_scissor;
 
@@ -192,7 +236,24 @@ struct fd_batch {
 
    struct fd_submit *submit;
 
-   /** draw pass cmdstream: */
+   /**
+    * List of fd_batch_subpass.
+    */
+   struct list_head subpasses;
+
+#define foreach_subpass(subpass, batch) \
+   list_for_each_entry (struct fd_batch_subpass, subpass, &batch->subpasses, node)
+#define foreach_subpass_safe(subpass, batch) \
+   list_for_each_entry_safe (struct fd_batch_subpass, subpass, &batch->subpasses, node)
+
+   /**
+    * The current subpass.
+    */
+   struct fd_batch_subpass *subpass;
+
+   /**
+    * just a reference to the current subpass's draw cmds for backwards compat.
+    */
    struct fd_ringbuffer *draw;
    /** binning pass cmdstream: */
    struct fd_ringbuffer *binning;
@@ -203,14 +264,13 @@ struct fd_batch {
    struct fd_ringbuffer *prologue;
 
    /** epilogue cmdstream (executed after each tile): */
-   struct fd_ringbuffer *epilogue;
+   struct fd_ringbuffer *tile_epilogue;
 
-   struct fd_ringbuffer *tile_setup;
-   struct fd_ringbuffer *tile_fini;
+   /** epilogue cmdstream (executed after all tiles): */
+   struct fd_ringbuffer *epilogue;
 
-   union pipe_color_union clear_color[MAX_RENDER_TARGETS];
-   double clear_depth;
-   unsigned clear_stencil;
+   struct fd_ringbuffer *tile_loads;
+   struct fd_ringbuffer *tile_store;
 
    /**
     * hw query related state:
@@ -221,6 +281,13 @@ struct fd_batch {
     */
    uint32_t next_sample_offset;
 
+   /* The # of pipeline-stats queries running.  In case of nested
+    * queries using {START/STOP}_{PRIMITIVE,FRAGMENT,COMPUTE}_CNTRS,
+    * we need to start only on the first one and stop only on the
+    * last one.
+    */
+   uint8_t pipeline_stats_queries_active[3];
+
    /* cached samples (in case multiple queries need to reference
     * the same sample snapshot)
     */
@@ -240,39 +307,27 @@ struct fd_batch {
    uint32_t query_tile_stride;
    /*@}*/
 
-   /* Set of references to resources used by currently-unsubmitted batch (read
-    * or write).  Note that this set may not include all BOs referenced by the
-    * batch due to fd_bc_resource_invalidate().
+   /* Set of resources used by currently-unsubmitted batch (read or
+    * write).. does not hold a reference to the resource.
     */
    struct set *resources;
 
-   BITSET_WORD *bos;
-   uint32_t bos_size;
-
    /** key in batch-cache (if not null): */
    struct fd_batch_key *key;
    uint32_t hash;
 
    /** set of dependent batches.. holds refs to dependent batches: */
-   struct set *dependents;
-
-   /* Buffer for tessellation engine input
-    */
-   struct fd_bo *tessfactor_bo;
-   uint32_t tessfactor_size;
-
-   /* Buffer for passing parameters between TCS and TES
-    */
-   struct fd_bo *tessparam_bo;
-   uint32_t tessparam_size;
-
-   struct fd_ringbuffer *tess_addrs_constobj;
+   uint32_t dependents_mask;
 };
 
 struct fd_batch *fd_batch_create(struct fd_context *ctx, bool nondraw);
 
-void fd_batch_reset(struct fd_batch *batch) assert_dt;
+struct fd_batch_subpass *fd_batch_create_subpass(struct fd_batch *batch) assert_dt;
+
+void fd_batch_set_fb(struct fd_batch *batch, const struct pipe_framebuffer_state *pfb) assert_dt;
+
 void fd_batch_flush(struct fd_batch *batch) assert_dt;
+bool fd_batch_has_dep(struct fd_batch *batch, struct fd_batch *dep) assert_dt;
 void fd_batch_add_dep(struct fd_batch *batch, struct fd_batch *dep) assert_dt;
 void fd_batch_resource_write(struct fd_batch *batch,
                              struct fd_resource *rsc) assert_dt;
@@ -287,34 +342,52 @@ struct fd_batch_key *fd_batch_key_clone(void *mem_ctx,
 
 /* not called directly: */
 void __fd_batch_describe(char *buf, const struct fd_batch *batch) assert_dt;
+void __fd_batch_destroy_locked(struct fd_batch *batch);
 void __fd_batch_destroy(struct fd_batch *batch);
 
+/*
+ * NOTE the rule is, you need to hold the screen->lock when destroying
+ * a batch..  so either use fd_batch_reference() (which grabs the lock
+ * for you) if you don't hold the lock, or fd_batch_reference_locked()
+ * if you do hold the lock.
+ *
+ * WARNING the _locked() version can briefly drop the lock.  Without
+ * recursive mutexes, I'm not sure there is much else we can do (since
+ * __fd_batch_destroy() needs to unref resources)
+ *
+ * WARNING you must acquire the screen->lock and use the _locked()
+ * version in case that the batch being ref'd can disappear under
+ * you.
+ */
+
 static inline void
-fd_batch_reference(struct fd_batch **ptr, struct fd_batch *batch)
+fd_batch_reference_locked(struct fd_batch **ptr, struct fd_batch *batch)
 {
    struct fd_batch *old_batch = *ptr;
 
+   /* only need lock if a reference is dropped: */
+   if (old_batch)
+      fd_screen_assert_locked(old_batch->ctx->screen);
+
    if (pipe_reference_described(
           &(*ptr)->reference, &batch->reference,
           (debug_reference_descriptor)__fd_batch_describe))
-      __fd_batch_destroy(old_batch);
+      __fd_batch_destroy_locked(old_batch);
 
    *ptr = batch;
 }
 
 static inline void
-fd_batch_unlock_submit(struct fd_batch *batch)
+fd_batch_reference(struct fd_batch **ptr, struct fd_batch *batch)
 {
-}
+   struct fd_batch *old_batch = *ptr;
 
-/**
- * Returns true if emit-lock was acquired, false if failed to acquire lock,
- * ie. batch already flushed.
- */
-static inline bool MUST_CHECK
-fd_batch_lock_submit(struct fd_batch *batch)
-{
-   return !batch->flushed;
+   if (pipe_reference_described(
+          &(*ptr)->reference, &batch->reference,
+          (debug_reference_descriptor)__fd_batch_describe))
+      __fd_batch_destroy(old_batch);
+
+   *ptr = batch;
 }
 
 /**
@@ -325,7 +398,7 @@ static inline void
 fd_batch_needs_flush(struct fd_batch *batch)
 {
    batch->needs_flush = true;
-   fd_fence_ref(&batch->ctx->last_fence, NULL);
+   fd_pipe_fence_ref(&batch->ctx->last_fence, NULL);
 }
 
 /* Since we reorder batches and can pause/resume queries (notably for disabling
@@ -337,8 +410,10 @@ fd_batch_update_queries(struct fd_batch *batch) assert_dt
 {
    struct fd_context *ctx = batch->ctx;
 
-   if (ctx->query_update_batch)
-      ctx->query_update_batch(batch, false);
+   if (!(ctx->dirty & FD_DIRTY_QUERY))
+      return;
+
+   ctx->query_update_batch(batch, false);
 }
 
 static inline void
@@ -346,8 +421,7 @@ fd_batch_finish_queries(struct fd_batch *batch) assert_dt
 {
    struct fd_context *ctx = batch->ctx;
 
-   if (ctx->query_update_batch)
-      ctx->query_update_batch(batch, true);
+   ctx->query_update_batch(batch, true);
 }
 
 static inline void
@@ -371,11 +445,23 @@ fd_event_write(struct fd_batch *batch, struct fd_ringbuffer *ring,
 
 /* Get per-tile epilogue */
 static inline struct fd_ringbuffer *
+fd_batch_get_tile_epilogue(struct fd_batch *batch)
+{
+   if (batch->tile_epilogue == NULL) {
+      batch->tile_epilogue = fd_submit_new_ringbuffer(batch->submit, 0x1000,
+                                                 FD_RINGBUFFER_GROWABLE);
+   }
+
+   return batch->tile_epilogue;
+}
+
+/* Get epilogue run after all tiles*/
+static inline struct fd_ringbuffer *
 fd_batch_get_epilogue(struct fd_batch *batch)
 {
    if (batch->epilogue == NULL) {
       batch->epilogue = fd_submit_new_ringbuffer(batch->submit, 0x1000,
-                                                 (enum fd_ringbuffer_flags)0);
+                                                 FD_RINGBUFFER_GROWABLE);
    }
 
    return batch->epilogue;