glsl: Vectorize multiple scalar assignments

Reduces vertex shader instruction counts in DOTA2 by 6.42%, L4D2 by 4.61%, and CS:GO by 5.71%. total instructions in shared programs: 1500153 -> 1498191 (-0.13%) instructions in affected programs: 59919 -> 57957 (-3.27%) Reviewed-by: Ian Romanick <ian.d.romanick@intel.com>
author: Matt Turner <mattst88@gmail.com> 2013-12-21 11:28:05 -0800
committer: Matt Turner <mattst88@gmail.com> 2014-01-21 14:20:44 -0800
commit: 4bd6e0d7c69b304be88996a6c2b96ce7d996e627 (patch)
tree: 6ed796744e87637628cba67217f31da8f78fea68
parent: 5e82d8a9da9eeab33e2819c2d90f1419e42cb33d (diff)
4 files changed, 325 insertions, 0 deletions
diff --git a/src/glsl/Makefile.sources b/src/glsl/Makefile.sources
index 2e81deddabb..e69c1ac612b 100644
--- a/src/glsl/Makefile.sources
+++ b/src/glsl/Makefile.sources
@@ -99,6 +99,7 @@ LIBGLSL_FILES = \
        $(GLSL_SRCDIR)/opt_structure_splitting.cpp \
        $(GLSL_SRCDIR)/opt_swizzle_swizzle.cpp \
        $(GLSL_SRCDIR)/opt_tree_grafting.cpp \
+        $(GLSL_SRCDIR)/opt_vectorize.cpp \
        $(GLSL_SRCDIR)/s_expression.cpp \
        $(GLSL_SRCDIR)/strtod.c
diff --git a/src/glsl/glsl_parser_extras.cpp b/src/glsl/glsl_parser_extras.cpp
index 35d5679b58b..9a1e0a249ed 100644
--- a/src/glsl/glsl_parser_extras.cpp
+++ b/src/glsl/glsl_parser_extras.cpp
@@ -1564,6 +1564,10 @@ do_common_optimization(exec_list *ir, bool linked,
   if (options->OptimizeForAOS && !linked)
      progress = opt_flip_matrices(ir) || progress;
+   if (linked && options->OptimizeForAOS) {
+      progress = do_vectorize(ir) || progress;
+   }
   if (linked)
      progress = do_dead_code(ir, uniform_locations_assigned) || progress;
   else
diff --git a/src/glsl/ir_optimization.h b/src/glsl/ir_optimization.h
index 3ca9f574453..055d65547ba 100644
--- a/src/glsl/ir_optimization.h
+++ b/src/glsl/ir_optimization.h
@@ -98,6 +98,7 @@ bool do_mat_op_to_vec(exec_list *instructions);
 bool do_noop_swizzle(exec_list *instructions);
 bool do_structure_splitting(exec_list *instructions);
 bool do_swizzle_swizzle(exec_list *instructions);
+bool do_vectorize(exec_list *instructions);
 bool do_tree_grafting(exec_list *instructions);
 bool do_vec_index_to_cond_assign(exec_list *instructions);
 bool do_vec_index_to_swizzle(exec_list *instructions);
diff --git a/src/glsl/opt_vectorize.cpp b/src/glsl/opt_vectorize.cpp
new file mode 100644
index 00000000000..9ca811a8603
--- /dev/null
+++ b/src/glsl/opt_vectorize.cpp
@@ -0,0 +1,319 @@
+/*
+ * Copyright © 2013 Intel Corporation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+ * DEALINGS IN THE SOFTWARE.
+ */
+/**
+ * \file opt_vectorize.cpp
+ *
+ * Combines scalar assignments of the same expression (modulo swizzle) to
+ * multiple channels of the same variable into a single vectorized expression
+ * and assignment.
+ *
+ * Many generated shaders contain scalarized code. That is, they contain
+ *
+ * r1.x = log2(v0.x);
+ * r1.y = log2(v0.y);
+ * r1.z = log2(v0.z);
+ *
+ * rather than
+ *
+ * r1.xyz = log2(v0.xyz);
+ *
+ * We look for consecutive assignments of the same expression (modulo swizzle)
+ * to each channel of the same variable.
+ *
+ * For instance, we want to convert these three scalar operations
+ *
+ * (assign (x) (var_ref r1) (expression float log2 (swiz x (var_ref v0))))
+ * (assign (y) (var_ref r1) (expression float log2 (swiz y (var_ref v0))))
+ * (assign (z) (var_ref r1) (expression float log2 (swiz z (var_ref v0))))
+ *
+ * into a single vector operation
+ *
+ * (assign (xyz) (var_ref r1) (expression vec3 log2 (swiz xyz (var_ref v0))))
+ */
+#include "ir.h"
+#include "ir_visitor.h"
+#include "ir_optimization.h"
+#include "glsl_types.h"
+#include "program/prog_instruction.h"
+namespace {
+class ir_vectorize_visitor : public ir_hierarchical_visitor {
+public:
+   void clear()
+   {
+      assignment[0] = NULL;
+      assignment[1] = NULL;
+      assignment[2] = NULL;
+      assignment[3] = NULL;
+      current_assignment = NULL;
+      last_assignment = NULL;
+      channels = 0;
+      has_swizzle = false;
+   }
+   ir_vectorize_visitor()
+   {
+      clear();
+      progress = false;
+   }
+   virtual ir_visitor_status visit_enter(ir_assignment *);
+   virtual ir_visitor_status visit_enter(ir_swizzle *);
+   virtual ir_visitor_status visit_leave(ir_assignment *);
+   void try_vectorize();
+   ir_assignment *assignment[4];
+   ir_assignment *current_assignment, *last_assignment;
+   unsigned channels;
+   bool has_swizzle;
+   bool progress;
+};
+} /* unnamed namespace */
+/**
+ * Rewrites the swizzles and types of a right-hand side of an assignment.
+ *
+ * From the example above, this function would be called (by visit_tree()) on
+ * the nodes of the tree (expression float log2 (swiz z   (var_ref v0))),
+ * rewriting it into     (expression vec3  log2 (swiz xyz (var_ref v0))).
+ *
+ * The function modifies only ir_expressions and ir_swizzles. For expressions
+ * it sets a new type and swizzles any scalar dereferences into appropriately
+ * sized vector arguments. For example, if combining
+ *
+ * (assign (x) (var_ref r1) (expression float + (swiz x (var_ref v0) (var_ref v1))))
+ * (assign (y) (var_ref r1) (expression float + (swiz y (var_ref v0) (var_ref v1))))
+ *
+ * where v1 is a scalar, rewrite_swizzle() would insert a swizzle on
+ * (var_ref v1) such that the final result was
+ *
+ * (assign (xy) (var_ref r1) (expression vec2 + (swiz xy (var_ref v0))
+ *                                              (swiz xx (var_ref v1))))
+ *
+ * For swizzles, it sets a new type, and if the variable being swizzled is a
+ * vector it overwrites the swizzle mask with the ir_swizzle_mask passed as the
+ * data parameter. If the swizzled variable is scalar, then the swizzle was
+ * added by an earlier call to rewrite_swizzle() on an expression, so the
+ * mask should not be modified.
+ */
+static void
+rewrite_swizzle(ir_instruction *ir, void *data)
+{
+   ir_swizzle_mask *mask = (ir_swizzle_mask *)data;
+   switch (ir->ir_type) {
+   case ir_type_swizzle: {
+      ir_swizzle *swz = (ir_swizzle *)ir;
+      if (swz->val->type->is_vector()) {
+         swz->mask = *mask;
+      }
+      swz->type = glsl_type::get_instance(swz->type->base_type,
+                                          mask->num_components, 1);
+      break;
+   }
+   case ir_type_expression: {
+      ir_expression *expr = (ir_expression *)ir;
+      expr->type = glsl_type::get_instance(expr->type->base_type,
+                                           mask->num_components, 1);
+      for (unsigned i = 0; i < 4; i++) {
+         if (expr->operands[i]) {
+            ir_dereference *deref = expr->operands[i]->as_dereference();
+            if (deref && deref->type->is_scalar()) {
+               expr->operands[i] = new(ir) ir_swizzle(deref, 0, 0, 0, 0,
+                                                      mask->num_components);
+            }
+         }
+      }
+      break;
+   }
+   default:
+      break;
+   }
+}
+/**
+ * Attempt to vectorize the previously saved assignments, and clear them from
+ * consideration.
+ *
+ * If the assignments are able to be combined, it modifies in-place the last
+ * assignment seen to be an equivalent vector form of the scalar assignments.
+ * It then removes the other now obsolete scalar assignments.
+ */
+void
+ir_vectorize_visitor::try_vectorize()
+{
+   if (this->last_assignment && this->channels > 1) {
+      ir_swizzle_mask mask = {0, 1, 2, 3, channels, 0};
+      visit_tree(this->last_assignment->rhs, rewrite_swizzle, &mask);
+      this->last_assignment->write_mask = 0;
+      for (unsigned i = 0; i < 4; i++) {
+         if (this->assignment[i]) {
+            this->last_assignment->write_mask |= 1 << i;
+            if (this->assignment[i] != this->last_assignment) {
+               this->assignment[i]->remove();
+            }
+         }
+      }
+      this->progress = true;
+   }
+   clear();
+}
+/**
+ * Returns whether the write mask is a single channel.
+ */
+static bool
+single_channel_write_mask(unsigned write_mask)
+{
+   return write_mask != 0 && (write_mask & (write_mask - 1)) == 0;
+}
+/**
+ * Translates single-channeled write mask to single-channeled swizzle.
+ */
+static unsigned
+write_mask_to_swizzle(unsigned write_mask)
+{
+   switch (write_mask) {
+   case WRITEMASK_X: return SWIZZLE_X;
+   case WRITEMASK_Y: return SWIZZLE_Y;
+   case WRITEMASK_Z: return SWIZZLE_Z;
+   case WRITEMASK_W: return SWIZZLE_W;
+   }
+   assert(!"not reached");
+   unreachable();
+}
+/**
+ * Returns whether a single-channeled write mask matches a swizzle.
+ */
+static bool
+write_mask_matches_swizzle(unsigned write_mask,
+                           const ir_swizzle *swz)
+{
+   return ((write_mask == WRITEMASK_X && swz->mask.x == SWIZZLE_X) ||
+           (write_mask == WRITEMASK_Y && swz->mask.x == SWIZZLE_Y) ||
+           (write_mask == WRITEMASK_Z && swz->mask.x == SWIZZLE_Z) ||
+           (write_mask == WRITEMASK_W && swz->mask.x == SWIZZLE_W));
+}
+/**
+ * Upon entering an ir_assignment, attempt to vectorize the currently tracked
+ * assignments if the current assignment is not suitable. Keep a pointer to
+ * the current assignment.
+ */
+ir_visitor_status
+ir_vectorize_visitor::visit_enter(ir_assignment *ir)
+{
+   ir_dereference *lhs = this->last_assignment != NULL ?
+                         this->last_assignment->lhs : NULL;
+   ir_rvalue *rhs = this->last_assignment != NULL ?
+                    this->last_assignment->rhs : NULL;
+   if (ir->condition ||
+       this->channels >= 4 ||
+       !single_channel_write_mask(ir->write_mask) ||
+       (lhs && !ir->lhs->equals(lhs)) ||
+       (rhs && !ir->rhs->equals(rhs, ir_type_swizzle))) {
+      try_vectorize();
+   }
+   this->current_assignment = ir;
+   return visit_continue;
+}
+/**
+ * Upon entering an ir_swizzle, set ::has_swizzle if we're visiting from an
+ * ir_assignment (i.e., that ::current_assignment is set) and the swizzle mask
+ * matches the current assignment's write mask.
+ *
+ * If the write mask doesn't match the swizzle mask, remove the current
+ * assignment from further consideration.
+ */
+ir_visitor_status
+ir_vectorize_visitor::visit_enter(ir_swizzle *ir)
+{
+   if (this->current_assignment) {
+      if (write_mask_matches_swizzle(this->current_assignment->write_mask, ir)) {
+         this->has_swizzle = true;
+      } else {
+         this->current_assignment = NULL;
+      }
+   }
+   return visit_continue;
+}
+/**
+ * Upon leaving an ir_assignment, save a pointer to it in ::assignment[] if
+ * the swizzle mask(s) found were appropriate. Also save a pointer in
+ * ::last_assignment so that we can compare future assignments with it.
+ *
+ * Finally, clear ::current_assignment and ::has_swizzle.
+ */
+ir_visitor_status
+ir_vectorize_visitor::visit_leave(ir_assignment *ir)
+{
+   if (this->has_swizzle && this->current_assignment) {
+      assert(this->current_assignment == ir);
+      unsigned channel = write_mask_to_swizzle(this->current_assignment->write_mask);
+      this->assignment[channel] = ir;
+      this->channels++;
+      this->last_assignment = this->current_assignment;
+   }
+   this->current_assignment = NULL;
+   this->has_swizzle = false;
+   return visit_continue;
+}
+/**
+ * Combines scalar assignments of the same expression (modulo swizzle) to
+ * multiple channels of the same variable into a single vectorized expression
+ * and assignment.
+ */
+bool
+do_vectorize(exec_list *instructions)
+{
+   ir_vectorize_visitor v;
+   v.run(instructions);
+   /* Try to vectorize the last assignments seen. */
+   v.try_vectorize();
+   return v.progress;
+}
author	Matt Turner <mattst88@gmail.com>	2013-12-21 11:28:05 -0800
committer	Matt Turner <mattst88@gmail.com>	2014-01-21 14:20:44 -0800
commit	4bd6e0d7c69b304be88996a6c2b96ce7d996e627 (patch)
tree	6ed796744e87637628cba67217f31da8f78fea68
parent	5e82d8a9da9eeab33e2819c2d90f1419e42cb33d (diff)

diff --git a/src/glsl/Makefile.sources b/src/glsl/Makefile.sources index 2e81deddabb..e69c1ac612b 100644 --- a/src/glsl/Makefile.sources +++ b/src/glsl/Makefile.sources
@@ -99,6 +99,7 @@ LIBGLSL_FILES = \
99	$(GLSL_SRCDIR)/opt_structure_splitting.cpp \	99	$(GLSL_SRCDIR)/opt_structure_splitting.cpp \
100	$(GLSL_SRCDIR)/opt_swizzle_swizzle.cpp \	100	$(GLSL_SRCDIR)/opt_swizzle_swizzle.cpp \
101	$(GLSL_SRCDIR)/opt_tree_grafting.cpp \	101	$(GLSL_SRCDIR)/opt_tree_grafting.cpp \
		102	$(GLSL_SRCDIR)/opt_vectorize.cpp \
102	$(GLSL_SRCDIR)/s_expression.cpp \	103	$(GLSL_SRCDIR)/s_expression.cpp \
103	$(GLSL_SRCDIR)/strtod.c	104	$(GLSL_SRCDIR)/strtod.c
104		105


diff --git a/src/glsl/glsl_parser_extras.cpp b/src/glsl/glsl_parser_extras.cpp index 35d5679b58b..9a1e0a249ed 100644 --- a/src/glsl/glsl_parser_extras.cpp +++ b/src/glsl/glsl_parser_extras.cpp
@@ -1564,6 +1564,10 @@ do_common_optimization(exec_list *ir, bool linked,
1564	if (options->OptimizeForAOS && !linked)	1564	if (options->OptimizeForAOS && !linked)
1565	progress = opt_flip_matrices(ir) \|\| progress;	1565	progress = opt_flip_matrices(ir) \|\| progress;
1566		1566
		1567	if (linked && options->OptimizeForAOS) {
		1568	progress = do_vectorize(ir) \|\| progress;
		1569	}
		1570
1567	if (linked)	1571	if (linked)
1568	progress = do_dead_code(ir, uniform_locations_assigned) \|\| progress;	1572	progress = do_dead_code(ir, uniform_locations_assigned) \|\| progress;
1569	else	1573	else


diff --git a/src/glsl/ir_optimization.h b/src/glsl/ir_optimization.h index 3ca9f574453..055d65547ba 100644 --- a/src/glsl/ir_optimization.h +++ b/src/glsl/ir_optimization.h
@@ -98,6 +98,7 @@ bool do_mat_op_to_vec(exec_list *instructions);
98	bool do_noop_swizzle(exec_list *instructions);	98	bool do_noop_swizzle(exec_list *instructions);
99	bool do_structure_splitting(exec_list *instructions);	99	bool do_structure_splitting(exec_list *instructions);
100	bool do_swizzle_swizzle(exec_list *instructions);	100	bool do_swizzle_swizzle(exec_list *instructions);
		101	bool do_vectorize(exec_list *instructions);
101	bool do_tree_grafting(exec_list *instructions);	102	bool do_tree_grafting(exec_list *instructions);
102	bool do_vec_index_to_cond_assign(exec_list *instructions);	103	bool do_vec_index_to_cond_assign(exec_list *instructions);
103	bool do_vec_index_to_swizzle(exec_list *instructions);	104	bool do_vec_index_to_swizzle(exec_list *instructions);


diff --git a/src/glsl/opt_vectorize.cpp b/src/glsl/opt_vectorize.cpp new file mode 100644 index 00000000000..9ca811a8603 --- /dev/null +++ b/src/glsl/opt_vectorize.cpp
@@ -0,0 +1,319 @@
		1	/*
		2	* Copyright © 2013 Intel Corporation
		3	*
		4	* Permission is hereby granted, free of charge, to any person obtaining a
		5	* copy of this software and associated documentation files (the "Software"),
		6	* to deal in the Software without restriction, including without limitation
		7	* the rights to use, copy, modify, merge, publish, distribute, sublicense,
		8	* and/or sell copies of the Software, and to permit persons to whom the
		9	* Software is furnished to do so, subject to the following conditions:
		10	*
		11	* The above copyright notice and this permission notice (including the next
		12	* paragraph) shall be included in all copies or substantial portions of the
		13	* Software.
		14	*
		15	* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
		16	* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
		17	* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
		18	* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
		19	* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
		20	* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
		21	* DEALINGS IN THE SOFTWARE.
		22	*/
		23
		24	/**
		25	* \file opt_vectorize.cpp
		26	*
		27	* Combines scalar assignments of the same expression (modulo swizzle) to
		28	* multiple channels of the same variable into a single vectorized expression
		29	* and assignment.
		30	*
		31	* Many generated shaders contain scalarized code. That is, they contain
		32	*
		33	* r1.x = log2(v0.x);
		34	* r1.y = log2(v0.y);
		35	* r1.z = log2(v0.z);
		36	*
		37	* rather than
		38	*
		39	* r1.xyz = log2(v0.xyz);
		40	*
		41	* We look for consecutive assignments of the same expression (modulo swizzle)
		42	* to each channel of the same variable.
		43	*
		44	* For instance, we want to convert these three scalar operations
		45	*
		46	* (assign (x) (var_ref r1) (expression float log2 (swiz x (var_ref v0))))
		47	* (assign (y) (var_ref r1) (expression float log2 (swiz y (var_ref v0))))
		48	* (assign (z) (var_ref r1) (expression float log2 (swiz z (var_ref v0))))
		49	*
		50	* into a single vector operation
		51	*
		52	* (assign (xyz) (var_ref r1) (expression vec3 log2 (swiz xyz (var_ref v0))))
		53	*/
		54
		55	#include "ir.h"
		56	#include "ir_visitor.h"
		57	#include "ir_optimization.h"
		58	#include "glsl_types.h"
		59	#include "program/prog_instruction.h"
		60
		61	namespace {
		62
		63	class ir_vectorize_visitor : public ir_hierarchical_visitor {
		64	public:
		65	void clear()
		66	{
		67	assignment[0] = NULL;
		68	assignment[1] = NULL;
		69	assignment[2] = NULL;
		70	assignment[3] = NULL;
		71	current_assignment = NULL;
		72	last_assignment = NULL;
		73	channels = 0;
		74	has_swizzle = false;
		75	}
		76
		77	ir_vectorize_visitor()
		78	{
		79	clear();
		80	progress = false;
		81	}
		82
		83	virtual ir_visitor_status visit_enter(ir_assignment *);
		84	virtual ir_visitor_status visit_enter(ir_swizzle *);
		85
		86	virtual ir_visitor_status visit_leave(ir_assignment *);
		87
		88	void try_vectorize();
		89
		90	ir_assignment *assignment[4];
		91	ir_assignment current_assignment, last_assignment;
		92	unsigned channels;
		93	bool has_swizzle;
		94
		95	bool progress;
		96	};
		97
		98	} /* unnamed namespace */
		99
		100	/**
		101	* Rewrites the swizzles and types of a right-hand side of an assignment.
		102	*
		103	* From the example above, this function would be called (by visit_tree()) on
		104	* the nodes of the tree (expression float log2 (swiz z (var_ref v0))),
		105	* rewriting it into (expression vec3 log2 (swiz xyz (var_ref v0))).
		106	*
		107	* The function modifies only ir_expressions and ir_swizzles. For expressions
		108	* it sets a new type and swizzles any scalar dereferences into appropriately
		109	* sized vector arguments. For example, if combining
		110	*
		111	* (assign (x) (var_ref r1) (expression float + (swiz x (var_ref v0) (var_ref v1))))
		112	* (assign (y) (var_ref r1) (expression float + (swiz y (var_ref v0) (var_ref v1))))
		113	*
		114	* where v1 is a scalar, rewrite_swizzle() would insert a swizzle on
		115	* (var_ref v1) such that the final result was
		116	*
		117	* (assign (xy) (var_ref r1) (expression vec2 + (swiz xy (var_ref v0))
		118	* (swiz xx (var_ref v1))))
		119	*
		120	* For swizzles, it sets a new type, and if the variable being swizzled is a
		121	* vector it overwrites the swizzle mask with the ir_swizzle_mask passed as the
		122	* data parameter. If the swizzled variable is scalar, then the swizzle was
		123	* added by an earlier call to rewrite_swizzle() on an expression, so the
		124	* mask should not be modified.
		125	*/
		126	static void
		127	rewrite_swizzle(ir_instruction ir, void data)
		128	{
		129	ir_swizzle_mask mask = (ir_swizzle_mask )data;
		130
		131	switch (ir->ir_type) {
		132	case ir_type_swizzle: {
		133	ir_swizzle swz = (ir_swizzle )ir;
		134	if (swz->val->type->is_vector()) {
		135	swz->mask = *mask;
		136	}
		137	swz->type = glsl_type::get_instance(swz->type->base_type,
		138	mask->num_components, 1);
		139	break;
		140	}
		141	case ir_type_expression: {
		142	ir_expression expr = (ir_expression )ir;
		143	expr->type = glsl_type::get_instance(expr->type->base_type,
		144	mask->num_components, 1);
		145	for (unsigned i = 0; i < 4; i++) {
		146	if (expr->operands[i]) {
		147	ir_dereference *deref = expr->operands[i]->as_dereference();
		148	if (deref && deref->type->is_scalar()) {
		149	expr->operands[i] = new(ir) ir_swizzle(deref, 0, 0, 0, 0,
		150	mask->num_components);
		151	}
		152	}
		153	}
		154	break;
		155	}
		156	default:
		157	break;
		158	}
		159	}
		160
		161	/**
		162	* Attempt to vectorize the previously saved assignments, and clear them from
		163	* consideration.
		164	*
		165	* If the assignments are able to be combined, it modifies in-place the last
		166	* assignment seen to be an equivalent vector form of the scalar assignments.
		167	* It then removes the other now obsolete scalar assignments.
		168	*/
		169	void
		170	ir_vectorize_visitor::try_vectorize()
		171	{
		172	if (this->last_assignment && this->channels > 1) {
		173	ir_swizzle_mask mask = {0, 1, 2, 3, channels, 0};
		174
		175	visit_tree(this->last_assignment->rhs, rewrite_swizzle, &mask);
		176
		177	this->last_assignment->write_mask = 0;
		178
		179	for (unsigned i = 0; i < 4; i++) {
		180	if (this->assignment[i]) {
		181	this->last_assignment->write_mask \|= 1 << i;
		182
		183	if (this->assignment[i] != this->last_assignment) {
		184	this->assignment[i]->remove();
		185	}
		186	}
		187	}
		188
		189	this->progress = true;
		190	}
		191	clear();
		192	}
		193
		194	/**
		195	* Returns whether the write mask is a single channel.
		196	*/
		197	static bool
		198	single_channel_write_mask(unsigned write_mask)
		199	{
		200	return write_mask != 0 && (write_mask & (write_mask - 1)) == 0;
		201	}
		202
		203	/**
		204	* Translates single-channeled write mask to single-channeled swizzle.
		205	*/
		206	static unsigned
		207	write_mask_to_swizzle(unsigned write_mask)
		208	{
		209	switch (write_mask) {
		210	case WRITEMASK_X: return SWIZZLE_X;
		211	case WRITEMASK_Y: return SWIZZLE_Y;
		212	case WRITEMASK_Z: return SWIZZLE_Z;
		213	case WRITEMASK_W: return SWIZZLE_W;
		214	}
		215	assert(!"not reached");
		216	unreachable();
		217	}
		218
		219	/**
		220	* Returns whether a single-channeled write mask matches a swizzle.
		221	*/
		222	static bool
		223	write_mask_matches_swizzle(unsigned write_mask,
		224	const ir_swizzle *swz)
		225	{
		226	return ((write_mask == WRITEMASK_X && swz->mask.x == SWIZZLE_X) \|\|
		227	(write_mask == WRITEMASK_Y && swz->mask.x == SWIZZLE_Y) \|\|
		228	(write_mask == WRITEMASK_Z && swz->mask.x == SWIZZLE_Z) \|\|
		229	(write_mask == WRITEMASK_W && swz->mask.x == SWIZZLE_W));
		230	}
		231
		232	/**
		233	* Upon entering an ir_assignment, attempt to vectorize the currently tracked
		234	* assignments if the current assignment is not suitable. Keep a pointer to
		235	* the current assignment.
		236	*/
		237	ir_visitor_status
		238	ir_vectorize_visitor::visit_enter(ir_assignment *ir)
		239	{
		240	ir_dereference *lhs = this->last_assignment != NULL ?
		241	this->last_assignment->lhs : NULL;
		242	ir_rvalue *rhs = this->last_assignment != NULL ?
		243	this->last_assignment->rhs : NULL;
		244
		245	if (ir->condition \|\|
		246	this->channels >= 4 \|\|
		247	!single_channel_write_mask(ir->write_mask) \|\|
		248	(lhs && !ir->lhs->equals(lhs)) \|\|
		249	(rhs && !ir->rhs->equals(rhs, ir_type_swizzle))) {
		250	try_vectorize();
		251	}
		252
		253	this->current_assignment = ir;
		254
		255	return visit_continue;
		256	}
		257
		258	/**
		259	* Upon entering an ir_swizzle, set ::has_swizzle if we're visiting from an
		260	* ir_assignment (i.e., that ::current_assignment is set) and the swizzle mask
		261	* matches the current assignment's write mask.
		262	*
		263	* If the write mask doesn't match the swizzle mask, remove the current
		264	* assignment from further consideration.
		265	*/
		266	ir_visitor_status
		267	ir_vectorize_visitor::visit_enter(ir_swizzle *ir)
		268	{
		269	if (this->current_assignment) {
		270	if (write_mask_matches_swizzle(this->current_assignment->write_mask, ir)) {
		271	this->has_swizzle = true;
		272	} else {
		273	this->current_assignment = NULL;
		274	}
		275	}
		276	return visit_continue;
		277	}
		278
		279	/**
		280	* Upon leaving an ir_assignment, save a pointer to it in ::assignment[] if
		281	* the swizzle mask(s) found were appropriate. Also save a pointer in
		282	* ::last_assignment so that we can compare future assignments with it.
		283	*
		284	* Finally, clear ::current_assignment and ::has_swizzle.
		285	*/
		286	ir_visitor_status
		287	ir_vectorize_visitor::visit_leave(ir_assignment *ir)
		288	{
		289	if (this->has_swizzle && this->current_assignment) {
		290	assert(this->current_assignment == ir);
		291
		292	unsigned channel = write_mask_to_swizzle(this->current_assignment->write_mask);
		293	this->assignment[channel] = ir;
		294	this->channels++;
		295
		296	this->last_assignment = this->current_assignment;
		297	}
		298	this->current_assignment = NULL;
		299	this->has_swizzle = false;
		300	return visit_continue;
		301	}
		302
		303	/**
		304	* Combines scalar assignments of the same expression (modulo swizzle) to
		305	* multiple channels of the same variable into a single vectorized expression
		306	* and assignment.
		307	*/
		308	bool
		309	do_vectorize(exec_list *instructions)
		310	{
		311	ir_vectorize_visitor v;
		312
		313	v.run(instructions);
		314
		315	/* Try to vectorize the last assignments seen. */
		316	v.try_vectorize();
		317
		318	return v.progress;
		319	}