summaryrefslogtreecommitdiff
path: root/src/gallium/drivers/iris/iris_pipe_control.c
diff options
context:
space:
mode:
Diffstat (limited to 'src/gallium/drivers/iris/iris_pipe_control.c')
-rw-r--r--src/gallium/drivers/iris/iris_pipe_control.c104
1 files changed, 87 insertions, 17 deletions
diff --git a/src/gallium/drivers/iris/iris_pipe_control.c b/src/gallium/drivers/iris/iris_pipe_control.c
index 33c904740da..32dd33ba1b9 100644
--- a/src/gallium/drivers/iris/iris_pipe_control.c
+++ b/src/gallium/drivers/iris/iris_pipe_control.c
@@ -184,25 +184,42 @@ iris_emit_buffer_barrier_for(struct iris_batch *batch,
struct iris_bo *bo,
enum iris_domain access)
{
+ const struct intel_device_info *devinfo = batch->screen->devinfo;
+
+ const bool access_via_l3 = iris_domain_is_l3_coherent(devinfo, access);
+
const uint32_t all_flush_bits = (PIPE_CONTROL_CACHE_FLUSH_BITS |
PIPE_CONTROL_STALL_AT_SCOREBOARD |
PIPE_CONTROL_FLUSH_ENABLE);
const uint32_t flush_bits[NUM_IRIS_DOMAINS] = {
[IRIS_DOMAIN_RENDER_WRITE] = PIPE_CONTROL_RENDER_TARGET_FLUSH,
[IRIS_DOMAIN_DEPTH_WRITE] = PIPE_CONTROL_DEPTH_CACHE_FLUSH,
- [IRIS_DOMAIN_DATA_WRITE] = PIPE_CONTROL_DATA_CACHE_FLUSH,
- [IRIS_DOMAIN_OTHER_WRITE] = PIPE_CONTROL_FLUSH_ENABLE,
+ [IRIS_DOMAIN_DATA_WRITE] = PIPE_CONTROL_FLUSH_HDC,
+ /* OTHER_WRITE includes "VF Cache Invalidate" to make sure that any
+ * stream output writes are finished. CS stall is added implicitly.
+ */
+ [IRIS_DOMAIN_OTHER_WRITE] = PIPE_CONTROL_FLUSH_ENABLE | PIPE_CONTROL_VF_CACHE_INVALIDATE,
[IRIS_DOMAIN_VF_READ] = PIPE_CONTROL_STALL_AT_SCOREBOARD,
+ [IRIS_DOMAIN_SAMPLER_READ] = PIPE_CONTROL_STALL_AT_SCOREBOARD,
+ [IRIS_DOMAIN_PULL_CONSTANT_READ] = PIPE_CONTROL_STALL_AT_SCOREBOARD,
[IRIS_DOMAIN_OTHER_READ] = PIPE_CONTROL_STALL_AT_SCOREBOARD,
};
const uint32_t invalidate_bits[NUM_IRIS_DOMAINS] = {
[IRIS_DOMAIN_RENDER_WRITE] = PIPE_CONTROL_RENDER_TARGET_FLUSH,
[IRIS_DOMAIN_DEPTH_WRITE] = PIPE_CONTROL_DEPTH_CACHE_FLUSH,
- [IRIS_DOMAIN_DATA_WRITE] = PIPE_CONTROL_DATA_CACHE_FLUSH,
+ [IRIS_DOMAIN_DATA_WRITE] = PIPE_CONTROL_FLUSH_HDC,
[IRIS_DOMAIN_OTHER_WRITE] = PIPE_CONTROL_FLUSH_ENABLE,
[IRIS_DOMAIN_VF_READ] = PIPE_CONTROL_VF_CACHE_INVALIDATE,
- [IRIS_DOMAIN_OTHER_READ] = (PIPE_CONTROL_TEXTURE_CACHE_INVALIDATE |
- PIPE_CONTROL_CONST_CACHE_INVALIDATE),
+ [IRIS_DOMAIN_SAMPLER_READ] = PIPE_CONTROL_TEXTURE_CACHE_INVALIDATE,
+ [IRIS_DOMAIN_PULL_CONSTANT_READ] = PIPE_CONTROL_CONST_CACHE_INVALIDATE |
+ (iris_indirect_ubos_use_sampler(batch->screen) ?
+ PIPE_CONTROL_TEXTURE_CACHE_INVALIDATE :
+ PIPE_CONTROL_DATA_CACHE_FLUSH),
+ };
+ const uint32_t l3_flush_bits[NUM_IRIS_DOMAINS] = {
+ [IRIS_DOMAIN_RENDER_WRITE] = PIPE_CONTROL_TILE_CACHE_FLUSH,
+ [IRIS_DOMAIN_DEPTH_WRITE] = PIPE_CONTROL_TILE_CACHE_FLUSH,
+ [IRIS_DOMAIN_DATA_WRITE] = PIPE_CONTROL_DATA_CACHE_FLUSH,
};
uint32_t bits = 0;
@@ -212,6 +229,8 @@ iris_emit_buffer_barrier_for(struct iris_batch *batch,
*/
for (unsigned i = 0; i < IRIS_DOMAIN_OTHER_WRITE; i++) {
assert(!iris_domain_is_read_only(i));
+ assert(iris_domain_is_l3_coherent(devinfo, i));
+
if (i != access) {
const uint64_t seqno = READ_ONCE(bo->last_seqnos[i]);
@@ -223,8 +242,19 @@ iris_emit_buffer_barrier_for(struct iris_batch *batch,
if (seqno > batch->coherent_seqnos[access][i]) {
bits |= invalidate_bits[access];
- if (seqno > batch->coherent_seqnos[i][i])
- bits |= flush_bits[i];
+ if (access_via_l3) {
+ /* Both domains share L3. If the most recent read/write access
+ * in domain `i' isn't visible to L3, then flush it to L3.
+ */
+ if (seqno > batch->l3_coherent_seqnos[i])
+ bits |= flush_bits[i];
+ } else {
+ /* Domain `i` is L3 coherent but the specified domain is not.
+ * Flush both this cache and L3 out to memory.
+ */
+ if (seqno > batch->coherent_seqnos[i][i])
+ bits |= flush_bits[i] | l3_flush_bits[i];
+ }
}
}
}
@@ -239,10 +269,14 @@ iris_emit_buffer_barrier_for(struct iris_batch *batch,
assert(iris_domain_is_read_only(i));
const uint64_t seqno = READ_ONCE(bo->last_seqnos[i]);
+ const uint64_t last_visible_seqno =
+ iris_domain_is_l3_coherent(devinfo, i) ?
+ batch->l3_coherent_seqnos[i] : batch->coherent_seqnos[i][i];
+
/* Flush if the most recent access from this domain occurred
* after its most recent flush.
*/
- if (seqno > batch->coherent_seqnos[i][i])
+ if (seqno > last_visible_seqno)
bits |= flush_bits[i];
}
}
@@ -255,6 +289,8 @@ iris_emit_buffer_barrier_for(struct iris_batch *batch,
const unsigned i = IRIS_DOMAIN_OTHER_WRITE;
const uint64_t seqno = READ_ONCE(bo->last_seqnos[i]);
+ assert(!iris_domain_is_l3_coherent(devinfo, i));
+
/* Invalidate unless the most recent read/write access from this
* domain is already guaranteed to be visible to the specified
* domain. Flush if the most recent access from this domain
@@ -263,25 +299,46 @@ iris_emit_buffer_barrier_for(struct iris_batch *batch,
if (seqno > batch->coherent_seqnos[access][i]) {
bits |= invalidate_bits[access];
+ /* There is a non-L3-coherent write that isn't visible to the
+ * specified domain. If the access is via L3, then it might see
+ * stale L3 data that was loaded before that write. In this case,
+ * we try to invalidate all read-only sections of the L3 cache.
+ */
+ if (access_via_l3 && seqno > batch->l3_coherent_seqnos[i])
+ bits |= PIPE_CONTROL_L3_RO_INVALIDATE_BITS;
+
if (seqno > batch->coherent_seqnos[i][i])
bits |= flush_bits[i];
}
if (bits) {
+ /* Stall-at-scoreboard is not supported by the compute pipeline, use the
+ * documented sequence of two PIPE_CONTROLs with PIPE_CONTROL_FLUSH_ENABLE
+ * set in the second PIPE_CONTROL in order to obtain a similar effect.
+ */
+ const bool compute_stall_sequence = batch->name == IRIS_BATCH_COMPUTE &&
+ (bits & PIPE_CONTROL_STALL_AT_SCOREBOARD) &&
+ !(bits & PIPE_CONTROL_CACHE_FLUSH_BITS);
+
/* Stall-at-scoreboard is not expected to work in combination with other
* flush bits.
*/
if (bits & PIPE_CONTROL_CACHE_FLUSH_BITS)
bits &= ~PIPE_CONTROL_STALL_AT_SCOREBOARD;
+ if (batch->name == IRIS_BATCH_COMPUTE)
+ bits &= ~PIPE_CONTROL_GRAPHICS_BITS;
+
/* Emit any required flushes and invalidations. */
- if (bits & all_flush_bits)
+ if ((bits & all_flush_bits) || compute_stall_sequence)
iris_emit_end_of_pipe_sync(batch, "cache tracker: flush",
bits & all_flush_bits);
- if (bits & ~all_flush_bits)
+ if ((bits & ~all_flush_bits) || compute_stall_sequence)
iris_emit_pipe_control_flush(batch, "cache tracker: invalidate",
- bits & ~all_flush_bits);
+ (bits & ~all_flush_bits) |
+ (compute_stall_sequence ?
+ PIPE_CONTROL_FLUSH_ENABLE : 0));
}
}
@@ -351,16 +408,29 @@ iris_memory_barrier(struct pipe_context *ctx, unsigned flags)
PIPE_CONTROL_CONST_CACHE_INVALIDATE;
}
- if (flags & (PIPE_BARRIER_TEXTURE | PIPE_BARRIER_FRAMEBUFFER)) {
+ if (flags & PIPE_BARRIER_TEXTURE)
+ bits |= PIPE_CONTROL_TEXTURE_CACHE_INVALIDATE;
+
+ if (flags & PIPE_BARRIER_FRAMEBUFFER) {
+ /* The caller may have issued a render target read and a data cache data
+ * port write in the same draw call. Depending on the hardware, iris
+ * performs render target reads with either the sampler or the render
+ * cache data port. If the next framebuffer access is a render target
+ * read, the previously affected caches must be invalidated.
+ */
bits |= PIPE_CONTROL_TEXTURE_CACHE_INVALIDATE |
PIPE_CONTROL_RENDER_TARGET_FLUSH;
}
- for (int i = 0; i < IRIS_BATCH_COUNT; i++) {
- if (ice->batches[i].contains_draw) {
- iris_batch_maybe_flush(&ice->batches[i], 24);
- iris_emit_pipe_control_flush(&ice->batches[i], "API: memory barrier",
- bits);
+ iris_foreach_batch(ice, batch) {
+ const unsigned allowed_bits =
+ batch->name == IRIS_BATCH_COMPUTE ? ~PIPE_CONTROL_GRAPHICS_BITS : ~0u;
+
+ if (batch->contains_draw) {
+ iris_batch_maybe_flush(batch, 24);
+ iris_emit_pipe_control_flush(batch,
+ "API: memory barrier",
+ bits & allowed_bits);
}
}
}