summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorIan Romanick <ian.d.romanick@intel.com>2022-01-14 17:12:35 -0800
committerMarge Bot <emma+marge@anholt.net>2022-02-11 17:25:33 +0000
commit46a9df6aac1c82fa985cf6b844553ce95c09cf34 (patch)
treee8c3e3a19374d40e12d0d7c8f9e54a46252e9a4e
parent05703a49f9e8b536c2de966ef531710b00d9aea2 (diff)
glsl: Don't try to emit the "linear sequence" in lower_variable_index_to_cond_assign
When there are four or fewer elements left in the array partition, the strategy changes from a binary search of nested flow control to sequence of conditional assignments like (assign, dest, src[constant_i+0], index == constant_i+0) (assign, dest, src[constant_i+1], index == constant_i+1) (assign, dest, src[constant_i+2], index == constant_i+2) (assign, dest, src[constant_i+3], index == constant_i+3) or (assign, dest[constant_i+0], src, index == constant_i+0) (assign, dest[constant_i+1], src, index == constant_i+1) (assign, dest[constant_i+2], src, index == constant_i+2) (assign, dest[constant_i+3], src, index == constant_i+3) Realistically, the first case should use ir_triop_csel instead. The second case will either get turned back into flow control like if (index == constant_i+0) (assign, dest[constant_i+0], src) if (index == constant_i+1) (assign, dest[constant_i+1], src) if (index == constant_i+2) (assign, dest[constant_i+2], src) if (index == constant_i+3) (assign, dest[constant_i+3], src) or a sequence of conditional selects like (assign, dest[constant_i+0], (csel, index == constant_i+0, src, dest[constant_i+0])) (assign, dest[constant_i+1], (csel, index == constant_i+1, src, dest[constant_i+1])) (assign, dest[constant_i+2], (csel, index == constant_i+2, src, dest[constant_i+2])) (assign, dest[constant_i+3], (csel, index == constant_i+3, src, dest[constant_i+3])) The former case should continue to use the binary search. The later case could be generated from the binary search by other lowering passes. At the end of the day, conditional assignments don't really help anything here, so stop using them. Radeon R430 total instructions in shared programs: 2398683 -> 2398419 (-0.01%) instructions in affected programs: 5143 -> 4879 (-5.13%) helped: 9 HURT: 8 total vinst in shared programs: 616292 -> 616010 (-0.05%) vinst in affected programs: 4467 -> 4185 (-6.31%) helped: 9 HURT: 8 total sinst in shared programs: 315417 -> 315667 (0.08%) sinst in affected programs: 2568 -> 2818 (9.74%) helped: 2 HURT: 15 total flowcontrol in shared programs: 1049 -> 1048 (-0.10%) flowcontrol in affected programs: 7 -> 6 (-14.29%) helped: 1 HURT: 0 total presub in shared programs: 47027 -> 47027 (0.00%) presub in affected programs: 127 -> 127 (0.00%) helped: 1 HURT: 1 total omod in shared programs: 3618 -> 3615 (-0.08%) omod in affected programs: 8 -> 5 (-37.50%) helped: 3 HURT: 0 total temps in shared programs: 450757 -> 451312 (0.12%) temps in affected programs: 837 -> 1392 (66.31%) helped: 8 HURT: 6 total consts in shared programs: 1031928 -> 1031920 (<.01%) consts in affected programs: 1211 -> 1203 (-0.66%) helped: 6 HURT: 7 The shaders that were hurt for temps... are all lies. None of those shaders should have compiled as all 6 had more than 32 temps to begin with. Reviewed-by: Matt Turner <mattst88@gmail.com> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/14573>
-rw-r--r--src/compiler/glsl/lower_variable_index_to_cond_assign.cpp121
1 files changed, 12 insertions, 109 deletions
diff --git a/src/compiler/glsl/lower_variable_index_to_cond_assign.cpp b/src/compiler/glsl/lower_variable_index_to_cond_assign.cpp
index c22789c39e3..1c5076076ce 100644
--- a/src/compiler/glsl/lower_variable_index_to_cond_assign.cpp
+++ b/src/compiler/glsl/lower_variable_index_to_cond_assign.cpp
@@ -56,58 +56,6 @@
using namespace ir_builder;
-/**
- * Generate a comparison value for a block of indices
- *
- * Lowering passes for non-constant indexing of arrays, matrices, or vectors
- * can use this to generate blocks of index comparison values.
- *
- * \param instructions List where new instructions will be appended
- * \param index \c ir_variable containing the desired index
- * \param base Base value for this block of comparisons
- * \param components Number of unique index values to compare. This must
- * be on the range [1, 4].
- * \param mem_ctx ralloc memory context to be used for all allocations.
- *
- * \returns
- * An \c ir_variable containing the per-component comparison results. This
- * must be dereferenced per use.
- */
-ir_variable *
-compare_index_block(ir_factory &body, ir_variable *index,
- unsigned base, unsigned components)
-{
- assert(index->type->is_scalar());
- assert(index->type->base_type == GLSL_TYPE_INT ||
- index->type->base_type == GLSL_TYPE_UINT);
- assert(components >= 1 && components <= 4);
-
- ir_rvalue *const broadcast_index = components > 1
- ? swizzle(index, SWIZZLE_XXXX, components)
- : operand(index).val;
-
- /* Compare the desired index value with the next block of four indices.
- */
- ir_constant_data test_indices_data;
- memset(&test_indices_data, 0, sizeof(test_indices_data));
- test_indices_data.i[0] = base;
- test_indices_data.i[1] = base + 1;
- test_indices_data.i[2] = base + 2;
- test_indices_data.i[3] = base + 3;
-
- ir_constant *const test_indices =
- new(body.mem_ctx) ir_constant(broadcast_index->type, &test_indices_data);
-
- ir_rvalue *const condition_val = equal(broadcast_index, test_indices);
-
- ir_variable *const condition = body.make_temp(condition_val->type,
- "dereference_condition");
-
- body.emit(assign(condition, condition_val));
-
- return condition;
-}
-
static inline bool
is_array_or_matrix(const ir_rvalue *ir)
{
@@ -193,7 +141,7 @@ struct assignment_generator
{
}
- void generate(unsigned i, ir_rvalue* condition, ir_factory &body) const
+ void generate(unsigned i, ir_factory &body) const
{
/* Clone the old r-value in its entirety. Then replace any occurances of
* the old variable index with the new constant index.
@@ -204,12 +152,9 @@ struct assignment_generator
element->accept(&r);
assert(r.progress);
- /* Generate a conditional assignment to (or from) the constant indexed
- * array dereference.
- */
ir_assignment *const assignment = (is_write)
- ? assign(element, this->var, condition, write_mask)
- : assign(this->var, element, condition);
+ ? assign(element, this->var, write_mask)
+ : assign(this->var, element);
body.emit(assignment);
}
@@ -222,60 +167,15 @@ struct switch_generator
const TFunction& generator;
ir_variable* index;
- unsigned linear_sequence_max_length;
- unsigned condition_components;
void *mem_ctx;
- switch_generator(const TFunction& generator, ir_variable *index,
- unsigned linear_sequence_max_length,
- unsigned condition_components)
- : generator(generator), index(index),
- linear_sequence_max_length(linear_sequence_max_length),
- condition_components(condition_components)
+ switch_generator(const TFunction& generator, ir_variable *index)
+ : generator(generator), index(index)
{
this->mem_ctx = ralloc_parent(index);
}
- void linear_sequence(unsigned begin, unsigned end, ir_factory &body)
- {
- if (begin == end)
- return;
-
- /* If the array access is a read, read the first element of this subregion
- * unconditionally. The remaining tests will possibly overwrite this
- * value with one of the other array elements.
- *
- * This optimization cannot be done for writes because it will cause the
- * first element of the subregion to be written possibly *in addition* to
- * one of the other elements.
- */
- unsigned first;
- if (!this->generator.is_write) {
- this->generator.generate(begin, 0, body);
- first = begin + 1;
- } else {
- first = begin;
- }
-
- for (unsigned i = first; i < end; i += 4) {
- const unsigned comps = MIN2(condition_components, end - i);
- ir_variable *const cond = compare_index_block(body, index, i, comps);
-
- if (comps == 1) {
- this->generator.generate(i,
- operand(cond).val,
- body);
- } else {
- for (unsigned j = 0; j < comps; j++) {
- this->generator.generate(i + j,
- swizzle(cond, j, 1),
- body);
- }
- }
- }
- }
-
void bisect(unsigned begin, unsigned end, ir_factory &body)
{
unsigned middle = (begin + end) >> 1;
@@ -298,11 +198,14 @@ struct switch_generator
void generate(unsigned begin, unsigned end, ir_factory &body)
{
+ if (begin == end)
+ return;
+
unsigned length = end - begin;
- if (length <= this->linear_sequence_max_length)
- return linear_sequence(begin, end, body);
+ if (length == 1)
+ generator.generate(begin, body);
else
- return bisect(begin, end, body);
+ bisect(begin, end, body);
}
};
@@ -476,7 +379,7 @@ public:
ag.is_write = false;
}
- switch_generator sg(ag, index, 4, 4);
+ switch_generator sg(ag, index);
/* If the original assignment has a condition, respect that original
* condition! This is acomplished by wrapping the new conditional