From 96b22fb080894ba1840af2372f28a46cc0f40c76 Mon Sep 17 00:00:00 2001
From: Kristian Høgsberg Kristensen <krh@bitplanet.net>
Date: Wed, 4 Nov 2015 14:58:54 -0800
Subject: glsl: Use array deref for access to vector components
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

We've assumed that we could lower per-component vector access from

  vec[i] = scalar

to

  vec = ir_triop_vector_insert(vec, scalar, i)

but with SSBOs (and compute shader SLM and tesselation outputs) this is
no longer valid. If a vector is "externally visible", multiple threads
can write independent components simultaneously. With lowering to
ir_triop_vector_insert, each thread read the entire vector, changes one
component, then writes out the entire vector. This is racy.

Instead of generating a ir_binop_vector_extract when we see v[i], we
generate ir_dereference_array. We then add a lowering pass to lower the
ir_dereference_array to ir_binop_vector_extract for rvalues and for to
vector_insert for lvalues in a separate lowering pass.

The resulting IR is the same as before, but we now have a window between
ast->ir conversion and the lowering pass where v[i] appears in the IR as
an array deref. This lets us run lowering passes that lower the vector
access to I/O (eg for SSBO load/store) before we lower the per-component
access to full vector writes.

Reviewed-by: Jordan Justen <jordan.l.justen@intel.com>
Signed-off-by: Kristian Høgsberg Kristensen <krh@bitplanet.net>
---
 src/glsl/Makefile.sources        |   1 +
 src/glsl/ast_array_index.cpp     |   5 +-
 src/glsl/ast_function.cpp        |  24 +++------
 src/glsl/ast_to_hir.cpp          |  43 ----------------
 src/glsl/ir_optimization.h       |   1 +
 src/glsl/ir_validate.cpp         |   7 +--
 src/glsl/linker.cpp              |   2 +
 src/glsl/lower_ubo_reference.cpp |  14 +++++-
 src/glsl/lower_vector_derefs.cpp | 104 +++++++++++++++++++++++++++++++++++++++
 src/glsl/opt_dead_code_local.cpp |   5 ++
 10 files changed, 138 insertions(+), 68 deletions(-)
 create mode 100644 src/glsl/lower_vector_derefs.cpp

diff --git a/src/glsl/Makefile.sources b/src/glsl/Makefile.sources
index 0266f290ccb..78d295b8e91 100644
--- a/src/glsl/Makefile.sources
+++ b/src/glsl/Makefile.sources
@@ -176,6 +176,7 @@ LIBGLSL_FILES = \
 	lower_vec_index_to_cond_assign.cpp \
 	lower_vec_index_to_swizzle.cpp \
 	lower_vector.cpp \
+	lower_vector_derefs.cpp \
 	lower_vector_insert.cpp \
 	lower_vertex_id.cpp \
 	lower_output_reads.cpp \
diff --git a/src/glsl/ast_array_index.cpp b/src/glsl/ast_array_index.cpp
index 74d403fdb65..ca7a9a10c36 100644
--- a/src/glsl/ast_array_index.cpp
+++ b/src/glsl/ast_array_index.cpp
@@ -319,10 +319,9 @@ _mesa_ast_array_index_to_hir(void *mem_ctx,
     * expression.
     */
    if (array->type->is_array()
-       || array->type->is_matrix()) {
+       || array->type->is_matrix()
+       || array->type->is_vector()) {
       return new(mem_ctx) ir_dereference_array(array, idx);
-   } else if (array->type->is_vector()) {
-      return new(mem_ctx) ir_expression(ir_binop_vector_extract, array, idx);
    } else if (array->type->is_error()) {
       return array;
    } else {
diff --git a/src/glsl/ast_function.cpp b/src/glsl/ast_function.cpp
index e4e4a3fe148..55844706d35 100644
--- a/src/glsl/ast_function.cpp
+++ b/src/glsl/ast_function.cpp
@@ -256,18 +256,10 @@ verify_parameter_modes(_mesa_glsl_parse_state *state,
 			     actual->variable_referenced()->name);
 	    return false;
 	 } else if (!actual->is_lvalue()) {
-            /* Even though ir_binop_vector_extract is not an l-value, let it
-             * slop through.  generate_call will handle it correctly.
-             */
-            ir_expression *const expr = ((ir_rvalue *) actual)->as_expression();
-            if (expr == NULL
-                || expr->operation != ir_binop_vector_extract
-                || !expr->operands[0]->is_lvalue()) {
-               _mesa_glsl_error(&loc, state,
-                                "function parameter '%s %s' is not an lvalue",
-                                mode, formal->name);
-               return false;
-            }
+            _mesa_glsl_error(&loc, state,
+                             "function parameter '%s %s' is not an lvalue",
+                             mode, formal->name);
+            return false;
 	 }
       }
 
@@ -376,12 +368,8 @@ fix_parameter(void *mem_ctx, ir_rvalue *actual, const glsl_type *formal_type,
 
    ir_rvalue *lhs = actual;
    if (expr != NULL && expr->operation == ir_binop_vector_extract) {
-      rhs = new(mem_ctx) ir_expression(ir_triop_vector_insert,
-                                       expr->operands[0]->type,
-                                       expr->operands[0]->clone(mem_ctx, NULL),
-                                       rhs,
-                                       expr->operands[1]->clone(mem_ctx, NULL));
-      lhs = expr->operands[0]->clone(mem_ctx, NULL);
+      lhs == new(mem_ctx) ir_dereference_array(expr->operands[0]->clone(mem_ctx, NULL),
+                                               expr->operands[1]->clone(mem_ctx, NULL));
    }
 
    ir_assignment *const assignment_2 = new(mem_ctx) ir_assignment(lhs, rhs);
diff --git a/src/glsl/ast_to_hir.cpp b/src/glsl/ast_to_hir.cpp
index 6f5f3c1b245..9d341e8cf92 100644
--- a/src/glsl/ast_to_hir.cpp
+++ b/src/glsl/ast_to_hir.cpp
@@ -850,43 +850,6 @@ do_assignment(exec_list *instructions, struct _mesa_glsl_parse_state *state,
 {
    void *ctx = state;
    bool error_emitted = (lhs->type->is_error() || rhs->type->is_error());
-   ir_rvalue *extract_channel = NULL;
-
-   /* If the assignment LHS comes back as an ir_binop_vector_extract
-    * expression, move it to the RHS as an ir_triop_vector_insert.
-    */
-   if (lhs->ir_type == ir_type_expression) {
-      ir_expression *const lhs_expr = lhs->as_expression();
-
-      if (unlikely(lhs_expr->operation == ir_binop_vector_extract)) {
-         ir_rvalue *new_rhs =
-            validate_assignment(state, lhs_loc, lhs,
-                                rhs, is_initializer);
-
-         if (new_rhs == NULL) {
-            return lhs;
-         } else {
-            /* This converts:
-             * - LHS: (expression float vector_extract <vec> <channel>)
-             * - RHS: <scalar>
-             * into:
-             * - LHS: <vec>
-             * - RHS: (expression vec2 vector_insert <vec> <channel> <scalar>)
-             *
-             * The LHS type is now a vector instead of a scalar.  Since GLSL
-             * allows assignments to be used as rvalues, we need to re-extract
-             * the channel from assignment_temp when returning the rvalue.
-             */
-            extract_channel = lhs_expr->operands[1];
-            rhs = new(ctx) ir_expression(ir_triop_vector_insert,
-                                         lhs_expr->operands[0]->type,
-                                         lhs_expr->operands[0],
-                                         new_rhs,
-                                         extract_channel);
-            lhs = lhs_expr->operands[0]->clone(ctx, NULL);
-         }
-      }
-   }
 
    ir_variable *lhs_var = lhs->variable_referenced();
    if (lhs_var)
@@ -984,12 +947,6 @@ do_assignment(exec_list *instructions, struct _mesa_glsl_parse_state *state,
       }
       ir_rvalue *rvalue = new(ctx) ir_dereference_variable(var);
 
-      if (extract_channel) {
-         rvalue = new(ctx) ir_expression(ir_binop_vector_extract,
-                                         rvalue,
-                                         extract_channel->clone(ctx, NULL));
-      }
-
       *out_rvalue = rvalue;
    } else {
       if (!error_emitted)
diff --git a/src/glsl/ir_optimization.h b/src/glsl/ir_optimization.h
index 6d19a6ca476..2fee81c09c2 100644
--- a/src/glsl/ir_optimization.h
+++ b/src/glsl/ir_optimization.h
@@ -129,6 +129,7 @@ void lower_packed_varyings(void *mem_ctx,
                            unsigned locations_used, ir_variable_mode mode,
                            unsigned gs_input_vertices, gl_shader *shader);
 bool lower_vector_insert(exec_list *instructions, bool lower_nonconstant_index);
+bool lower_vector_derefs(gl_shader *shader);
 void lower_named_interface_blocks(void *mem_ctx, gl_shader *shader);
 bool optimize_redundant_jumps(exec_list *instructions);
 bool optimize_split_arrays(exec_list *instructions, bool linked);
diff --git a/src/glsl/ir_validate.cpp b/src/glsl/ir_validate.cpp
index 935571ae1d6..e63b5c318e3 100644
--- a/src/glsl/ir_validate.cpp
+++ b/src/glsl/ir_validate.cpp
@@ -110,9 +110,10 @@ ir_validate::visit(ir_dereference_variable *ir)
 ir_visitor_status
 ir_validate::visit_enter(class ir_dereference_array *ir)
 {
-   if (!ir->array->type->is_array() && !ir->array->type->is_matrix()) {
-      printf("ir_dereference_array @ %p does not specify an array or a "
-             "matrix\n",
+   if (!ir->array->type->is_array() && !ir->array->type->is_matrix() &&
+      !ir->array->type->is_vector()) {
+      printf("ir_dereference_array @ %p does not specify an array, a vector "
+             "or a matrix\n",
              (void *) ir);
       ir->print();
       printf("\n");
diff --git a/src/glsl/linker.cpp b/src/glsl/linker.cpp
index a8baee07f10..db00f8febc6 100644
--- a/src/glsl/linker.cpp
+++ b/src/glsl/linker.cpp
@@ -4451,6 +4451,8 @@ link_shaders(struct gl_context *ctx, struct gl_shader_program *prog)
 
       if (ctx->Const.ShaderCompilerOptions[i].LowerBufferInterfaceBlocks)
          lower_ubo_reference(prog->_LinkedShaders[i]);
+
+      lower_vector_derefs(prog->_LinkedShaders[i]);
    }
 
 done:
diff --git a/src/glsl/lower_ubo_reference.cpp b/src/glsl/lower_ubo_reference.cpp
index 24806ac6ce9..b74aa3d0630 100644
--- a/src/glsl/lower_ubo_reference.cpp
+++ b/src/glsl/lower_ubo_reference.cpp
@@ -390,7 +390,19 @@ lower_ubo_reference_visitor::setup_for_load_or_store(ir_variable *var,
       case ir_type_dereference_array: {
          ir_dereference_array *deref_array = (ir_dereference_array *) deref;
          unsigned array_stride;
-         if (deref_array->array->type->is_matrix() && *row_major) {
+         if (deref_array->array->type->is_vector()) {
+            /* We get this when storing or loading a component out of a vector
+             * with a non-constant index. This happens for v[i] = f where v is
+             * a vector (or m[i][j] = f where m is a matrix). If we don't
+             * lower that here, it gets turned into v = vector_insert(v, i,
+             * f), which loads the entire vector, modifies one component and
+             * then write the entire thing back.  That breaks if another
+             * thread or SIMD channel is modifying the same vector.
+             */
+            array_stride = 4;
+            if (deref_array->array->type->is_double())
+               array_stride *= 2;
+         } else if (deref_array->array->type->is_matrix() && *row_major) {
             /* When loading a vector out of a row major matrix, the
              * step between the columns (vectors) is the size of a
              * float, while the step between the rows (elements of a
diff --git a/src/glsl/lower_vector_derefs.cpp b/src/glsl/lower_vector_derefs.cpp
new file mode 100644
index 00000000000..4a5d6f0da4c
--- /dev/null
+++ b/src/glsl/lower_vector_derefs.cpp
@@ -0,0 +1,104 @@
+/*
+ * Copyright © 2013 Intel Corporation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+ * DEALINGS IN THE SOFTWARE.
+ */
+#include "ir.h"
+#include "ir_builder.h"
+#include "ir_rvalue_visitor.h"
+#include "ir_optimization.h"
+
+using namespace ir_builder;
+
+namespace {
+
+class vector_deref_visitor : public ir_rvalue_enter_visitor {
+public:
+   vector_deref_visitor()
+      : progress(false)
+   {
+   }
+
+   virtual ~vector_deref_visitor()
+   {
+   }
+
+   virtual void handle_rvalue(ir_rvalue **rv);
+   virtual ir_visitor_status visit_enter(ir_assignment *ir);
+
+   bool progress;
+};
+
+} /* anonymous namespace */
+
+ir_visitor_status
+vector_deref_visitor::visit_enter(ir_assignment *ir)
+{
+   if (!ir->lhs || ir->lhs->ir_type != ir_type_dereference_array)
+      return ir_rvalue_enter_visitor::visit_enter(ir);
+
+   ir_dereference_array *const deref = (ir_dereference_array *) ir->lhs;
+   if (!deref->array->type->is_vector())
+      return ir_rvalue_enter_visitor::visit_enter(ir);
+
+   ir_dereference *const new_lhs = (ir_dereference *) deref->array;
+   ir->set_lhs(new_lhs);
+
+   ir_constant *old_index_constant = deref->array_index->constant_expression_value();
+   void *mem_ctx = ralloc_parent(ir);
+   if (!old_index_constant) {
+      ir->rhs = new(mem_ctx) ir_expression(ir_triop_vector_insert,
+                                           new_lhs->type,
+                                           new_lhs->clone(mem_ctx, NULL),
+                                           ir->rhs,
+                                           deref->array_index);
+      ir->write_mask = (1 << new_lhs->type->vector_elements) - 1;
+   } else {
+      ir->write_mask = 1 << old_index_constant->get_int_component(0);
+   }
+
+   return ir_rvalue_enter_visitor::visit_enter(ir);
+}
+
+void
+vector_deref_visitor::handle_rvalue(ir_rvalue **rv)
+{
+   if (*rv == NULL || (*rv)->ir_type != ir_type_dereference_array)
+      return;
+
+   ir_dereference_array *const deref = (ir_dereference_array *) *rv;
+   if (!deref->array->type->is_vector())
+      return;
+
+   void *mem_ctx = ralloc_parent(deref);
+   *rv = new(mem_ctx) ir_expression(ir_binop_vector_extract,
+                                    deref->array,
+                                    deref->array_index);
+}
+
+bool
+lower_vector_derefs(gl_shader *shader)
+{
+   vector_deref_visitor v;
+
+   visit_list_elements(&v, shader->ir);
+
+   return v.progress;
+}
diff --git a/src/glsl/opt_dead_code_local.cpp b/src/glsl/opt_dead_code_local.cpp
index 4770fcff2ea..ee9f22c0373 100644
--- a/src/glsl/opt_dead_code_local.cpp
+++ b/src/glsl/opt_dead_code_local.cpp
@@ -197,6 +197,11 @@ process_assignment(void *ctx, ir_assignment *ir, exec_list *assignments)
 	    if (entry->lhs != var)
 	       continue;
 
+            /* Skip if the assignment we're trying to eliminate isn't a plain
+             * variable deref. */
+            if (entry->ir->lhs->ir_type != ir_type_dereference_variable)
+               continue;
+
 	    int remove = entry->unused & ir->write_mask;
 	    if (debug) {
 	       printf("%s 0x%01x - 0x%01x = 0x%01x\n",
-- 
cgit v1.2.3