summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorMatt Turner <mattst88@gmail.com>2013-12-21 11:28:05 -0800
committerMatt Turner <mattst88@gmail.com>2014-01-21 14:20:44 -0800
commit4bd6e0d7c69b304be88996a6c2b96ce7d996e627 (patch)
tree6ed796744e87637628cba67217f31da8f78fea68
parent5e82d8a9da9eeab33e2819c2d90f1419e42cb33d (diff)
glsl: Vectorize multiple scalar assignments
Reduces vertex shader instruction counts in DOTA2 by 6.42%, L4D2 by 4.61%, and CS:GO by 5.71%. total instructions in shared programs: 1500153 -> 1498191 (-0.13%) instructions in affected programs: 59919 -> 57957 (-3.27%) Reviewed-by: Ian Romanick <ian.d.romanick@intel.com>
-rw-r--r--src/glsl/Makefile.sources1
-rw-r--r--src/glsl/glsl_parser_extras.cpp4
-rw-r--r--src/glsl/ir_optimization.h1
-rw-r--r--src/glsl/opt_vectorize.cpp319
4 files changed, 325 insertions, 0 deletions
diff --git a/src/glsl/Makefile.sources b/src/glsl/Makefile.sources
index 2e81deddabb..e69c1ac612b 100644
--- a/src/glsl/Makefile.sources
+++ b/src/glsl/Makefile.sources
@@ -99,6 +99,7 @@ LIBGLSL_FILES = \
99 $(GLSL_SRCDIR)/opt_structure_splitting.cpp \ 99 $(GLSL_SRCDIR)/opt_structure_splitting.cpp \
100 $(GLSL_SRCDIR)/opt_swizzle_swizzle.cpp \ 100 $(GLSL_SRCDIR)/opt_swizzle_swizzle.cpp \
101 $(GLSL_SRCDIR)/opt_tree_grafting.cpp \ 101 $(GLSL_SRCDIR)/opt_tree_grafting.cpp \
102 $(GLSL_SRCDIR)/opt_vectorize.cpp \
102 $(GLSL_SRCDIR)/s_expression.cpp \ 103 $(GLSL_SRCDIR)/s_expression.cpp \
103 $(GLSL_SRCDIR)/strtod.c 104 $(GLSL_SRCDIR)/strtod.c
104 105
diff --git a/src/glsl/glsl_parser_extras.cpp b/src/glsl/glsl_parser_extras.cpp
index 35d5679b58b..9a1e0a249ed 100644
--- a/src/glsl/glsl_parser_extras.cpp
+++ b/src/glsl/glsl_parser_extras.cpp
@@ -1564,6 +1564,10 @@ do_common_optimization(exec_list *ir, bool linked,
1564 if (options->OptimizeForAOS && !linked) 1564 if (options->OptimizeForAOS && !linked)
1565 progress = opt_flip_matrices(ir) || progress; 1565 progress = opt_flip_matrices(ir) || progress;
1566 1566
1567 if (linked && options->OptimizeForAOS) {
1568 progress = do_vectorize(ir) || progress;
1569 }
1570
1567 if (linked) 1571 if (linked)
1568 progress = do_dead_code(ir, uniform_locations_assigned) || progress; 1572 progress = do_dead_code(ir, uniform_locations_assigned) || progress;
1569 else 1573 else
diff --git a/src/glsl/ir_optimization.h b/src/glsl/ir_optimization.h
index 3ca9f574453..055d65547ba 100644
--- a/src/glsl/ir_optimization.h
+++ b/src/glsl/ir_optimization.h
@@ -98,6 +98,7 @@ bool do_mat_op_to_vec(exec_list *instructions);
98bool do_noop_swizzle(exec_list *instructions); 98bool do_noop_swizzle(exec_list *instructions);
99bool do_structure_splitting(exec_list *instructions); 99bool do_structure_splitting(exec_list *instructions);
100bool do_swizzle_swizzle(exec_list *instructions); 100bool do_swizzle_swizzle(exec_list *instructions);
101bool do_vectorize(exec_list *instructions);
101bool do_tree_grafting(exec_list *instructions); 102bool do_tree_grafting(exec_list *instructions);
102bool do_vec_index_to_cond_assign(exec_list *instructions); 103bool do_vec_index_to_cond_assign(exec_list *instructions);
103bool do_vec_index_to_swizzle(exec_list *instructions); 104bool do_vec_index_to_swizzle(exec_list *instructions);
diff --git a/src/glsl/opt_vectorize.cpp b/src/glsl/opt_vectorize.cpp
new file mode 100644
index 00000000000..9ca811a8603
--- /dev/null
+++ b/src/glsl/opt_vectorize.cpp
@@ -0,0 +1,319 @@
1/*
2 * Copyright © 2013 Intel Corporation
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
13 * Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
21 * DEALINGS IN THE SOFTWARE.
22 */
23
24/**
25 * \file opt_vectorize.cpp
26 *
27 * Combines scalar assignments of the same expression (modulo swizzle) to
28 * multiple channels of the same variable into a single vectorized expression
29 * and assignment.
30 *
31 * Many generated shaders contain scalarized code. That is, they contain
32 *
33 * r1.x = log2(v0.x);
34 * r1.y = log2(v0.y);
35 * r1.z = log2(v0.z);
36 *
37 * rather than
38 *
39 * r1.xyz = log2(v0.xyz);
40 *
41 * We look for consecutive assignments of the same expression (modulo swizzle)
42 * to each channel of the same variable.
43 *
44 * For instance, we want to convert these three scalar operations
45 *
46 * (assign (x) (var_ref r1) (expression float log2 (swiz x (var_ref v0))))
47 * (assign (y) (var_ref r1) (expression float log2 (swiz y (var_ref v0))))
48 * (assign (z) (var_ref r1) (expression float log2 (swiz z (var_ref v0))))
49 *
50 * into a single vector operation
51 *
52 * (assign (xyz) (var_ref r1) (expression vec3 log2 (swiz xyz (var_ref v0))))
53 */
54
55#include "ir.h"
56#include "ir_visitor.h"
57#include "ir_optimization.h"
58#include "glsl_types.h"
59#include "program/prog_instruction.h"
60
61namespace {
62
63class ir_vectorize_visitor : public ir_hierarchical_visitor {
64public:
65 void clear()
66 {
67 assignment[0] = NULL;
68 assignment[1] = NULL;
69 assignment[2] = NULL;
70 assignment[3] = NULL;
71 current_assignment = NULL;
72 last_assignment = NULL;
73 channels = 0;
74 has_swizzle = false;
75 }
76
77 ir_vectorize_visitor()
78 {
79 clear();
80 progress = false;
81 }
82
83 virtual ir_visitor_status visit_enter(ir_assignment *);
84 virtual ir_visitor_status visit_enter(ir_swizzle *);
85
86 virtual ir_visitor_status visit_leave(ir_assignment *);
87
88 void try_vectorize();
89
90 ir_assignment *assignment[4];
91 ir_assignment *current_assignment, *last_assignment;
92 unsigned channels;
93 bool has_swizzle;
94
95 bool progress;
96};
97
98} /* unnamed namespace */
99
100/**
101 * Rewrites the swizzles and types of a right-hand side of an assignment.
102 *
103 * From the example above, this function would be called (by visit_tree()) on
104 * the nodes of the tree (expression float log2 (swiz z (var_ref v0))),
105 * rewriting it into (expression vec3 log2 (swiz xyz (var_ref v0))).
106 *
107 * The function modifies only ir_expressions and ir_swizzles. For expressions
108 * it sets a new type and swizzles any scalar dereferences into appropriately
109 * sized vector arguments. For example, if combining
110 *
111 * (assign (x) (var_ref r1) (expression float + (swiz x (var_ref v0) (var_ref v1))))
112 * (assign (y) (var_ref r1) (expression float + (swiz y (var_ref v0) (var_ref v1))))
113 *
114 * where v1 is a scalar, rewrite_swizzle() would insert a swizzle on
115 * (var_ref v1) such that the final result was
116 *
117 * (assign (xy) (var_ref r1) (expression vec2 + (swiz xy (var_ref v0))
118 * (swiz xx (var_ref v1))))
119 *
120 * For swizzles, it sets a new type, and if the variable being swizzled is a
121 * vector it overwrites the swizzle mask with the ir_swizzle_mask passed as the
122 * data parameter. If the swizzled variable is scalar, then the swizzle was
123 * added by an earlier call to rewrite_swizzle() on an expression, so the
124 * mask should not be modified.
125 */
126static void
127rewrite_swizzle(ir_instruction *ir, void *data)
128{
129 ir_swizzle_mask *mask = (ir_swizzle_mask *)data;
130
131 switch (ir->ir_type) {
132 case ir_type_swizzle: {
133 ir_swizzle *swz = (ir_swizzle *)ir;
134 if (swz->val->type->is_vector()) {
135 swz->mask = *mask;
136 }
137 swz->type = glsl_type::get_instance(swz->type->base_type,
138 mask->num_components, 1);
139 break;
140 }
141 case ir_type_expression: {
142 ir_expression *expr = (ir_expression *)ir;
143 expr->type = glsl_type::get_instance(expr->type->base_type,
144 mask->num_components, 1);
145 for (unsigned i = 0; i < 4; i++) {
146 if (expr->operands[i]) {
147 ir_dereference *deref = expr->operands[i]->as_dereference();
148 if (deref && deref->type->is_scalar()) {
149 expr->operands[i] = new(ir) ir_swizzle(deref, 0, 0, 0, 0,
150 mask->num_components);
151 }
152 }
153 }
154 break;
155 }
156 default:
157 break;
158 }
159}
160
161/**
162 * Attempt to vectorize the previously saved assignments, and clear them from
163 * consideration.
164 *
165 * If the assignments are able to be combined, it modifies in-place the last
166 * assignment seen to be an equivalent vector form of the scalar assignments.
167 * It then removes the other now obsolete scalar assignments.
168 */
169void
170ir_vectorize_visitor::try_vectorize()
171{
172 if (this->last_assignment && this->channels > 1) {
173 ir_swizzle_mask mask = {0, 1, 2, 3, channels, 0};
174
175 visit_tree(this->last_assignment->rhs, rewrite_swizzle, &mask);
176
177 this->last_assignment->write_mask = 0;
178
179 for (unsigned i = 0; i < 4; i++) {
180 if (this->assignment[i]) {
181 this->last_assignment->write_mask |= 1 << i;
182
183 if (this->assignment[i] != this->last_assignment) {
184 this->assignment[i]->remove();
185 }
186 }
187 }
188
189 this->progress = true;
190 }
191 clear();
192}
193
194/**
195 * Returns whether the write mask is a single channel.
196 */
197static bool
198single_channel_write_mask(unsigned write_mask)
199{
200 return write_mask != 0 && (write_mask & (write_mask - 1)) == 0;
201}
202
203/**
204 * Translates single-channeled write mask to single-channeled swizzle.
205 */
206static unsigned
207write_mask_to_swizzle(unsigned write_mask)
208{
209 switch (write_mask) {
210 case WRITEMASK_X: return SWIZZLE_X;
211 case WRITEMASK_Y: return SWIZZLE_Y;
212 case WRITEMASK_Z: return SWIZZLE_Z;
213 case WRITEMASK_W: return SWIZZLE_W;
214 }
215 assert(!"not reached");
216 unreachable();
217}
218
219/**
220 * Returns whether a single-channeled write mask matches a swizzle.
221 */
222static bool
223write_mask_matches_swizzle(unsigned write_mask,
224 const ir_swizzle *swz)
225{
226 return ((write_mask == WRITEMASK_X && swz->mask.x == SWIZZLE_X) ||
227 (write_mask == WRITEMASK_Y && swz->mask.x == SWIZZLE_Y) ||
228 (write_mask == WRITEMASK_Z && swz->mask.x == SWIZZLE_Z) ||
229 (write_mask == WRITEMASK_W && swz->mask.x == SWIZZLE_W));
230}
231
232/**
233 * Upon entering an ir_assignment, attempt to vectorize the currently tracked
234 * assignments if the current assignment is not suitable. Keep a pointer to
235 * the current assignment.
236 */
237ir_visitor_status
238ir_vectorize_visitor::visit_enter(ir_assignment *ir)
239{
240 ir_dereference *lhs = this->last_assignment != NULL ?
241 this->last_assignment->lhs : NULL;
242 ir_rvalue *rhs = this->last_assignment != NULL ?
243 this->last_assignment->rhs : NULL;
244
245 if (ir->condition ||
246 this->channels >= 4 ||
247 !single_channel_write_mask(ir->write_mask) ||
248 (lhs && !ir->lhs->equals(lhs)) ||
249 (rhs && !ir->rhs->equals(rhs, ir_type_swizzle))) {
250 try_vectorize();
251 }
252
253 this->current_assignment = ir;
254
255 return visit_continue;
256}
257
258/**
259 * Upon entering an ir_swizzle, set ::has_swizzle if we're visiting from an
260 * ir_assignment (i.e., that ::current_assignment is set) and the swizzle mask
261 * matches the current assignment's write mask.
262 *
263 * If the write mask doesn't match the swizzle mask, remove the current
264 * assignment from further consideration.
265 */
266ir_visitor_status
267ir_vectorize_visitor::visit_enter(ir_swizzle *ir)
268{
269 if (this->current_assignment) {
270 if (write_mask_matches_swizzle(this->current_assignment->write_mask, ir)) {
271 this->has_swizzle = true;
272 } else {
273 this->current_assignment = NULL;
274 }
275 }
276 return visit_continue;
277}
278
279/**
280 * Upon leaving an ir_assignment, save a pointer to it in ::assignment[] if
281 * the swizzle mask(s) found were appropriate. Also save a pointer in
282 * ::last_assignment so that we can compare future assignments with it.
283 *
284 * Finally, clear ::current_assignment and ::has_swizzle.
285 */
286ir_visitor_status
287ir_vectorize_visitor::visit_leave(ir_assignment *ir)
288{
289 if (this->has_swizzle && this->current_assignment) {
290 assert(this->current_assignment == ir);
291
292 unsigned channel = write_mask_to_swizzle(this->current_assignment->write_mask);
293 this->assignment[channel] = ir;
294 this->channels++;
295
296 this->last_assignment = this->current_assignment;
297 }
298 this->current_assignment = NULL;
299 this->has_swizzle = false;
300 return visit_continue;
301}
302
303/**
304 * Combines scalar assignments of the same expression (modulo swizzle) to
305 * multiple channels of the same variable into a single vectorized expression
306 * and assignment.
307 */
308bool
309do_vectorize(exec_list *instructions)
310{
311 ir_vectorize_visitor v;
312
313 v.run(instructions);
314
315 /* Try to vectorize the last assignments seen. */
316 v.try_vectorize();
317
318 return v.progress;
319}