diff options
author | Matt Turner <mattst88@gmail.com> | 2013-12-21 11:28:05 -0800 |
---|---|---|
committer | Matt Turner <mattst88@gmail.com> | 2014-01-21 14:20:44 -0800 |
commit | 4bd6e0d7c69b304be88996a6c2b96ce7d996e627 (patch) | |
tree | 6ed796744e87637628cba67217f31da8f78fea68 | |
parent | 5e82d8a9da9eeab33e2819c2d90f1419e42cb33d (diff) |
glsl: Vectorize multiple scalar assignments
Reduces vertex shader instruction counts in DOTA2 by 6.42%, L4D2 by
4.61%, and CS:GO by 5.71%.
total instructions in shared programs: 1500153 -> 1498191 (-0.13%)
instructions in affected programs: 59919 -> 57957 (-3.27%)
Reviewed-by: Ian Romanick <ian.d.romanick@intel.com>
-rw-r--r-- | src/glsl/Makefile.sources | 1 | ||||
-rw-r--r-- | src/glsl/glsl_parser_extras.cpp | 4 | ||||
-rw-r--r-- | src/glsl/ir_optimization.h | 1 | ||||
-rw-r--r-- | src/glsl/opt_vectorize.cpp | 319 |
4 files changed, 325 insertions, 0 deletions
diff --git a/src/glsl/Makefile.sources b/src/glsl/Makefile.sources index 2e81deddabb..e69c1ac612b 100644 --- a/src/glsl/Makefile.sources +++ b/src/glsl/Makefile.sources | |||
@@ -99,6 +99,7 @@ LIBGLSL_FILES = \ | |||
99 | $(GLSL_SRCDIR)/opt_structure_splitting.cpp \ | 99 | $(GLSL_SRCDIR)/opt_structure_splitting.cpp \ |
100 | $(GLSL_SRCDIR)/opt_swizzle_swizzle.cpp \ | 100 | $(GLSL_SRCDIR)/opt_swizzle_swizzle.cpp \ |
101 | $(GLSL_SRCDIR)/opt_tree_grafting.cpp \ | 101 | $(GLSL_SRCDIR)/opt_tree_grafting.cpp \ |
102 | $(GLSL_SRCDIR)/opt_vectorize.cpp \ | ||
102 | $(GLSL_SRCDIR)/s_expression.cpp \ | 103 | $(GLSL_SRCDIR)/s_expression.cpp \ |
103 | $(GLSL_SRCDIR)/strtod.c | 104 | $(GLSL_SRCDIR)/strtod.c |
104 | 105 | ||
diff --git a/src/glsl/glsl_parser_extras.cpp b/src/glsl/glsl_parser_extras.cpp index 35d5679b58b..9a1e0a249ed 100644 --- a/src/glsl/glsl_parser_extras.cpp +++ b/src/glsl/glsl_parser_extras.cpp | |||
@@ -1564,6 +1564,10 @@ do_common_optimization(exec_list *ir, bool linked, | |||
1564 | if (options->OptimizeForAOS && !linked) | 1564 | if (options->OptimizeForAOS && !linked) |
1565 | progress = opt_flip_matrices(ir) || progress; | 1565 | progress = opt_flip_matrices(ir) || progress; |
1566 | 1566 | ||
1567 | if (linked && options->OptimizeForAOS) { | ||
1568 | progress = do_vectorize(ir) || progress; | ||
1569 | } | ||
1570 | |||
1567 | if (linked) | 1571 | if (linked) |
1568 | progress = do_dead_code(ir, uniform_locations_assigned) || progress; | 1572 | progress = do_dead_code(ir, uniform_locations_assigned) || progress; |
1569 | else | 1573 | else |
diff --git a/src/glsl/ir_optimization.h b/src/glsl/ir_optimization.h index 3ca9f574453..055d65547ba 100644 --- a/src/glsl/ir_optimization.h +++ b/src/glsl/ir_optimization.h | |||
@@ -98,6 +98,7 @@ bool do_mat_op_to_vec(exec_list *instructions); | |||
98 | bool do_noop_swizzle(exec_list *instructions); | 98 | bool do_noop_swizzle(exec_list *instructions); |
99 | bool do_structure_splitting(exec_list *instructions); | 99 | bool do_structure_splitting(exec_list *instructions); |
100 | bool do_swizzle_swizzle(exec_list *instructions); | 100 | bool do_swizzle_swizzle(exec_list *instructions); |
101 | bool do_vectorize(exec_list *instructions); | ||
101 | bool do_tree_grafting(exec_list *instructions); | 102 | bool do_tree_grafting(exec_list *instructions); |
102 | bool do_vec_index_to_cond_assign(exec_list *instructions); | 103 | bool do_vec_index_to_cond_assign(exec_list *instructions); |
103 | bool do_vec_index_to_swizzle(exec_list *instructions); | 104 | bool do_vec_index_to_swizzle(exec_list *instructions); |
diff --git a/src/glsl/opt_vectorize.cpp b/src/glsl/opt_vectorize.cpp new file mode 100644 index 00000000000..9ca811a8603 --- /dev/null +++ b/src/glsl/opt_vectorize.cpp | |||
@@ -0,0 +1,319 @@ | |||
1 | /* | ||
2 | * Copyright © 2013 Intel Corporation | ||
3 | * | ||
4 | * Permission is hereby granted, free of charge, to any person obtaining a | ||
5 | * copy of this software and associated documentation files (the "Software"), | ||
6 | * to deal in the Software without restriction, including without limitation | ||
7 | * the rights to use, copy, modify, merge, publish, distribute, sublicense, | ||
8 | * and/or sell copies of the Software, and to permit persons to whom the | ||
9 | * Software is furnished to do so, subject to the following conditions: | ||
10 | * | ||
11 | * The above copyright notice and this permission notice (including the next | ||
12 | * paragraph) shall be included in all copies or substantial portions of the | ||
13 | * Software. | ||
14 | * | ||
15 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR | ||
16 | * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, | ||
17 | * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL | ||
18 | * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER | ||
19 | * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING | ||
20 | * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER | ||
21 | * DEALINGS IN THE SOFTWARE. | ||
22 | */ | ||
23 | |||
24 | /** | ||
25 | * \file opt_vectorize.cpp | ||
26 | * | ||
27 | * Combines scalar assignments of the same expression (modulo swizzle) to | ||
28 | * multiple channels of the same variable into a single vectorized expression | ||
29 | * and assignment. | ||
30 | * | ||
31 | * Many generated shaders contain scalarized code. That is, they contain | ||
32 | * | ||
33 | * r1.x = log2(v0.x); | ||
34 | * r1.y = log2(v0.y); | ||
35 | * r1.z = log2(v0.z); | ||
36 | * | ||
37 | * rather than | ||
38 | * | ||
39 | * r1.xyz = log2(v0.xyz); | ||
40 | * | ||
41 | * We look for consecutive assignments of the same expression (modulo swizzle) | ||
42 | * to each channel of the same variable. | ||
43 | * | ||
44 | * For instance, we want to convert these three scalar operations | ||
45 | * | ||
46 | * (assign (x) (var_ref r1) (expression float log2 (swiz x (var_ref v0)))) | ||
47 | * (assign (y) (var_ref r1) (expression float log2 (swiz y (var_ref v0)))) | ||
48 | * (assign (z) (var_ref r1) (expression float log2 (swiz z (var_ref v0)))) | ||
49 | * | ||
50 | * into a single vector operation | ||
51 | * | ||
52 | * (assign (xyz) (var_ref r1) (expression vec3 log2 (swiz xyz (var_ref v0)))) | ||
53 | */ | ||
54 | |||
55 | #include "ir.h" | ||
56 | #include "ir_visitor.h" | ||
57 | #include "ir_optimization.h" | ||
58 | #include "glsl_types.h" | ||
59 | #include "program/prog_instruction.h" | ||
60 | |||
61 | namespace { | ||
62 | |||
63 | class ir_vectorize_visitor : public ir_hierarchical_visitor { | ||
64 | public: | ||
65 | void clear() | ||
66 | { | ||
67 | assignment[0] = NULL; | ||
68 | assignment[1] = NULL; | ||
69 | assignment[2] = NULL; | ||
70 | assignment[3] = NULL; | ||
71 | current_assignment = NULL; | ||
72 | last_assignment = NULL; | ||
73 | channels = 0; | ||
74 | has_swizzle = false; | ||
75 | } | ||
76 | |||
77 | ir_vectorize_visitor() | ||
78 | { | ||
79 | clear(); | ||
80 | progress = false; | ||
81 | } | ||
82 | |||
83 | virtual ir_visitor_status visit_enter(ir_assignment *); | ||
84 | virtual ir_visitor_status visit_enter(ir_swizzle *); | ||
85 | |||
86 | virtual ir_visitor_status visit_leave(ir_assignment *); | ||
87 | |||
88 | void try_vectorize(); | ||
89 | |||
90 | ir_assignment *assignment[4]; | ||
91 | ir_assignment *current_assignment, *last_assignment; | ||
92 | unsigned channels; | ||
93 | bool has_swizzle; | ||
94 | |||
95 | bool progress; | ||
96 | }; | ||
97 | |||
98 | } /* unnamed namespace */ | ||
99 | |||
100 | /** | ||
101 | * Rewrites the swizzles and types of a right-hand side of an assignment. | ||
102 | * | ||
103 | * From the example above, this function would be called (by visit_tree()) on | ||
104 | * the nodes of the tree (expression float log2 (swiz z (var_ref v0))), | ||
105 | * rewriting it into (expression vec3 log2 (swiz xyz (var_ref v0))). | ||
106 | * | ||
107 | * The function modifies only ir_expressions and ir_swizzles. For expressions | ||
108 | * it sets a new type and swizzles any scalar dereferences into appropriately | ||
109 | * sized vector arguments. For example, if combining | ||
110 | * | ||
111 | * (assign (x) (var_ref r1) (expression float + (swiz x (var_ref v0) (var_ref v1)))) | ||
112 | * (assign (y) (var_ref r1) (expression float + (swiz y (var_ref v0) (var_ref v1)))) | ||
113 | * | ||
114 | * where v1 is a scalar, rewrite_swizzle() would insert a swizzle on | ||
115 | * (var_ref v1) such that the final result was | ||
116 | * | ||
117 | * (assign (xy) (var_ref r1) (expression vec2 + (swiz xy (var_ref v0)) | ||
118 | * (swiz xx (var_ref v1)))) | ||
119 | * | ||
120 | * For swizzles, it sets a new type, and if the variable being swizzled is a | ||
121 | * vector it overwrites the swizzle mask with the ir_swizzle_mask passed as the | ||
122 | * data parameter. If the swizzled variable is scalar, then the swizzle was | ||
123 | * added by an earlier call to rewrite_swizzle() on an expression, so the | ||
124 | * mask should not be modified. | ||
125 | */ | ||
126 | static void | ||
127 | rewrite_swizzle(ir_instruction *ir, void *data) | ||
128 | { | ||
129 | ir_swizzle_mask *mask = (ir_swizzle_mask *)data; | ||
130 | |||
131 | switch (ir->ir_type) { | ||
132 | case ir_type_swizzle: { | ||
133 | ir_swizzle *swz = (ir_swizzle *)ir; | ||
134 | if (swz->val->type->is_vector()) { | ||
135 | swz->mask = *mask; | ||
136 | } | ||
137 | swz->type = glsl_type::get_instance(swz->type->base_type, | ||
138 | mask->num_components, 1); | ||
139 | break; | ||
140 | } | ||
141 | case ir_type_expression: { | ||
142 | ir_expression *expr = (ir_expression *)ir; | ||
143 | expr->type = glsl_type::get_instance(expr->type->base_type, | ||
144 | mask->num_components, 1); | ||
145 | for (unsigned i = 0; i < 4; i++) { | ||
146 | if (expr->operands[i]) { | ||
147 | ir_dereference *deref = expr->operands[i]->as_dereference(); | ||
148 | if (deref && deref->type->is_scalar()) { | ||
149 | expr->operands[i] = new(ir) ir_swizzle(deref, 0, 0, 0, 0, | ||
150 | mask->num_components); | ||
151 | } | ||
152 | } | ||
153 | } | ||
154 | break; | ||
155 | } | ||
156 | default: | ||
157 | break; | ||
158 | } | ||
159 | } | ||
160 | |||
161 | /** | ||
162 | * Attempt to vectorize the previously saved assignments, and clear them from | ||
163 | * consideration. | ||
164 | * | ||
165 | * If the assignments are able to be combined, it modifies in-place the last | ||
166 | * assignment seen to be an equivalent vector form of the scalar assignments. | ||
167 | * It then removes the other now obsolete scalar assignments. | ||
168 | */ | ||
169 | void | ||
170 | ir_vectorize_visitor::try_vectorize() | ||
171 | { | ||
172 | if (this->last_assignment && this->channels > 1) { | ||
173 | ir_swizzle_mask mask = {0, 1, 2, 3, channels, 0}; | ||
174 | |||
175 | visit_tree(this->last_assignment->rhs, rewrite_swizzle, &mask); | ||
176 | |||
177 | this->last_assignment->write_mask = 0; | ||
178 | |||
179 | for (unsigned i = 0; i < 4; i++) { | ||
180 | if (this->assignment[i]) { | ||
181 | this->last_assignment->write_mask |= 1 << i; | ||
182 | |||
183 | if (this->assignment[i] != this->last_assignment) { | ||
184 | this->assignment[i]->remove(); | ||
185 | } | ||
186 | } | ||
187 | } | ||
188 | |||
189 | this->progress = true; | ||
190 | } | ||
191 | clear(); | ||
192 | } | ||
193 | |||
194 | /** | ||
195 | * Returns whether the write mask is a single channel. | ||
196 | */ | ||
197 | static bool | ||
198 | single_channel_write_mask(unsigned write_mask) | ||
199 | { | ||
200 | return write_mask != 0 && (write_mask & (write_mask - 1)) == 0; | ||
201 | } | ||
202 | |||
203 | /** | ||
204 | * Translates single-channeled write mask to single-channeled swizzle. | ||
205 | */ | ||
206 | static unsigned | ||
207 | write_mask_to_swizzle(unsigned write_mask) | ||
208 | { | ||
209 | switch (write_mask) { | ||
210 | case WRITEMASK_X: return SWIZZLE_X; | ||
211 | case WRITEMASK_Y: return SWIZZLE_Y; | ||
212 | case WRITEMASK_Z: return SWIZZLE_Z; | ||
213 | case WRITEMASK_W: return SWIZZLE_W; | ||
214 | } | ||
215 | assert(!"not reached"); | ||
216 | unreachable(); | ||
217 | } | ||
218 | |||
219 | /** | ||
220 | * Returns whether a single-channeled write mask matches a swizzle. | ||
221 | */ | ||
222 | static bool | ||
223 | write_mask_matches_swizzle(unsigned write_mask, | ||
224 | const ir_swizzle *swz) | ||
225 | { | ||
226 | return ((write_mask == WRITEMASK_X && swz->mask.x == SWIZZLE_X) || | ||
227 | (write_mask == WRITEMASK_Y && swz->mask.x == SWIZZLE_Y) || | ||
228 | (write_mask == WRITEMASK_Z && swz->mask.x == SWIZZLE_Z) || | ||
229 | (write_mask == WRITEMASK_W && swz->mask.x == SWIZZLE_W)); | ||
230 | } | ||
231 | |||
232 | /** | ||
233 | * Upon entering an ir_assignment, attempt to vectorize the currently tracked | ||
234 | * assignments if the current assignment is not suitable. Keep a pointer to | ||
235 | * the current assignment. | ||
236 | */ | ||
237 | ir_visitor_status | ||
238 | ir_vectorize_visitor::visit_enter(ir_assignment *ir) | ||
239 | { | ||
240 | ir_dereference *lhs = this->last_assignment != NULL ? | ||
241 | this->last_assignment->lhs : NULL; | ||
242 | ir_rvalue *rhs = this->last_assignment != NULL ? | ||
243 | this->last_assignment->rhs : NULL; | ||
244 | |||
245 | if (ir->condition || | ||
246 | this->channels >= 4 || | ||
247 | !single_channel_write_mask(ir->write_mask) || | ||
248 | (lhs && !ir->lhs->equals(lhs)) || | ||
249 | (rhs && !ir->rhs->equals(rhs, ir_type_swizzle))) { | ||
250 | try_vectorize(); | ||
251 | } | ||
252 | |||
253 | this->current_assignment = ir; | ||
254 | |||
255 | return visit_continue; | ||
256 | } | ||
257 | |||
258 | /** | ||
259 | * Upon entering an ir_swizzle, set ::has_swizzle if we're visiting from an | ||
260 | * ir_assignment (i.e., that ::current_assignment is set) and the swizzle mask | ||
261 | * matches the current assignment's write mask. | ||
262 | * | ||
263 | * If the write mask doesn't match the swizzle mask, remove the current | ||
264 | * assignment from further consideration. | ||
265 | */ | ||
266 | ir_visitor_status | ||
267 | ir_vectorize_visitor::visit_enter(ir_swizzle *ir) | ||
268 | { | ||
269 | if (this->current_assignment) { | ||
270 | if (write_mask_matches_swizzle(this->current_assignment->write_mask, ir)) { | ||
271 | this->has_swizzle = true; | ||
272 | } else { | ||
273 | this->current_assignment = NULL; | ||
274 | } | ||
275 | } | ||
276 | return visit_continue; | ||
277 | } | ||
278 | |||
279 | /** | ||
280 | * Upon leaving an ir_assignment, save a pointer to it in ::assignment[] if | ||
281 | * the swizzle mask(s) found were appropriate. Also save a pointer in | ||
282 | * ::last_assignment so that we can compare future assignments with it. | ||
283 | * | ||
284 | * Finally, clear ::current_assignment and ::has_swizzle. | ||
285 | */ | ||
286 | ir_visitor_status | ||
287 | ir_vectorize_visitor::visit_leave(ir_assignment *ir) | ||
288 | { | ||
289 | if (this->has_swizzle && this->current_assignment) { | ||
290 | assert(this->current_assignment == ir); | ||
291 | |||
292 | unsigned channel = write_mask_to_swizzle(this->current_assignment->write_mask); | ||
293 | this->assignment[channel] = ir; | ||
294 | this->channels++; | ||
295 | |||
296 | this->last_assignment = this->current_assignment; | ||
297 | } | ||
298 | this->current_assignment = NULL; | ||
299 | this->has_swizzle = false; | ||
300 | return visit_continue; | ||
301 | } | ||
302 | |||
303 | /** | ||
304 | * Combines scalar assignments of the same expression (modulo swizzle) to | ||
305 | * multiple channels of the same variable into a single vectorized expression | ||
306 | * and assignment. | ||
307 | */ | ||
308 | bool | ||
309 | do_vectorize(exec_list *instructions) | ||
310 | { | ||
311 | ir_vectorize_visitor v; | ||
312 | |||
313 | v.run(instructions); | ||
314 | |||
315 | /* Try to vectorize the last assignments seen. */ | ||
316 | v.try_vectorize(); | ||
317 | |||
318 | return v.progress; | ||
319 | } | ||