diff options
Diffstat (limited to 'src/gallium/drivers/cell/spu/spu_exec.c')
-rw-r--r-- | src/gallium/drivers/cell/spu/spu_exec.c | 1870 |
1 files changed, 0 insertions, 1870 deletions
diff --git a/src/gallium/drivers/cell/spu/spu_exec.c b/src/gallium/drivers/cell/spu/spu_exec.c deleted file mode 100644 index e4ebeb595ce..00000000000 --- a/src/gallium/drivers/cell/spu/spu_exec.c +++ /dev/null | |||
@@ -1,1870 +0,0 @@ | |||
1 | /************************************************************************** | ||
2 | * | ||
3 | * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas. | ||
4 | * All Rights Reserved. | ||
5 | * | ||
6 | * Permission is hereby granted, free of charge, to any person obtaining a | ||
7 | * copy of this software and associated documentation files (the | ||
8 | * "Software"), to deal in the Software without restriction, including | ||
9 | * without limitation the rights to use, copy, modify, merge, publish, | ||
10 | * distribute, sub license, and/or sell copies of the Software, and to | ||
11 | * permit persons to whom the Software is furnished to do so, subject to | ||
12 | * the following conditions: | ||
13 | * | ||
14 | * The above copyright notice and this permission notice (including the | ||
15 | * next paragraph) shall be included in all copies or substantial portions | ||
16 | * of the Software. | ||
17 | * | ||
18 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS | ||
19 | * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF | ||
20 | * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. | ||
21 | * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR | ||
22 | * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, | ||
23 | * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE | ||
24 | * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. | ||
25 | * | ||
26 | **************************************************************************/ | ||
27 | |||
28 | /** | ||
29 | * TGSI interpretor/executor. | ||
30 | * | ||
31 | * Flow control information: | ||
32 | * | ||
33 | * Since we operate on 'quads' (4 pixels or 4 vertices in parallel) | ||
34 | * flow control statements (IF/ELSE/ENDIF, LOOP/ENDLOOP) require special | ||
35 | * care since a condition may be true for some quad components but false | ||
36 | * for other components. | ||
37 | * | ||
38 | * We basically execute all statements (even if they're in the part of | ||
39 | * an IF/ELSE clause that's "not taken") and use a special mask to | ||
40 | * control writing to destination registers. This is the ExecMask. | ||
41 | * See store_dest(). | ||
42 | * | ||
43 | * The ExecMask is computed from three other masks (CondMask, LoopMask and | ||
44 | * ContMask) which are controlled by the flow control instructions (namely: | ||
45 | * (IF/ELSE/ENDIF, LOOP/ENDLOOP and CONT). | ||
46 | * | ||
47 | * | ||
48 | * Authors: | ||
49 | * Michal Krol | ||
50 | * Brian Paul | ||
51 | */ | ||
52 | |||
53 | #include <transpose_matrix4x4.h> | ||
54 | #include <simdmath/ceilf4.h> | ||
55 | #include <simdmath/cosf4.h> | ||
56 | #include <simdmath/divf4.h> | ||
57 | #include <simdmath/floorf4.h> | ||
58 | #include <simdmath/log2f4.h> | ||
59 | #include <simdmath/powf4.h> | ||
60 | #include <simdmath/sinf4.h> | ||
61 | #include <simdmath/sqrtf4.h> | ||
62 | #include <simdmath/truncf4.h> | ||
63 | |||
64 | #include "pipe/p_compiler.h" | ||
65 | #include "pipe/p_state.h" | ||
66 | #include "pipe/p_shader_tokens.h" | ||
67 | #include "tgsi/tgsi_parse.h" | ||
68 | #include "tgsi/tgsi_util.h" | ||
69 | #include "spu_exec.h" | ||
70 | #include "spu_main.h" | ||
71 | #include "spu_vertex_shader.h" | ||
72 | #include "spu_dcache.h" | ||
73 | #include "cell/common.h" | ||
74 | |||
75 | #define TILE_TOP_LEFT 0 | ||
76 | #define TILE_TOP_RIGHT 1 | ||
77 | #define TILE_BOTTOM_LEFT 2 | ||
78 | #define TILE_BOTTOM_RIGHT 3 | ||
79 | |||
80 | /* | ||
81 | * Shorthand locations of various utility registers (_I = Index, _C = Channel) | ||
82 | */ | ||
83 | #define TEMP_0_I TGSI_EXEC_TEMP_00000000_I | ||
84 | #define TEMP_0_C TGSI_EXEC_TEMP_00000000_C | ||
85 | #define TEMP_7F_I TGSI_EXEC_TEMP_7FFFFFFF_I | ||
86 | #define TEMP_7F_C TGSI_EXEC_TEMP_7FFFFFFF_C | ||
87 | #define TEMP_80_I TGSI_EXEC_TEMP_80000000_I | ||
88 | #define TEMP_80_C TGSI_EXEC_TEMP_80000000_C | ||
89 | #define TEMP_FF_I TGSI_EXEC_TEMP_FFFFFFFF_I | ||
90 | #define TEMP_FF_C TGSI_EXEC_TEMP_FFFFFFFF_C | ||
91 | #define TEMP_1_I TGSI_EXEC_TEMP_ONE_I | ||
92 | #define TEMP_1_C TGSI_EXEC_TEMP_ONE_C | ||
93 | #define TEMP_2_I TGSI_EXEC_TEMP_TWO_I | ||
94 | #define TEMP_2_C TGSI_EXEC_TEMP_TWO_C | ||
95 | #define TEMP_128_I TGSI_EXEC_TEMP_128_I | ||
96 | #define TEMP_128_C TGSI_EXEC_TEMP_128_C | ||
97 | #define TEMP_M128_I TGSI_EXEC_TEMP_MINUS_128_I | ||
98 | #define TEMP_M128_C TGSI_EXEC_TEMP_MINUS_128_C | ||
99 | #define TEMP_KILMASK_I TGSI_EXEC_TEMP_KILMASK_I | ||
100 | #define TEMP_KILMASK_C TGSI_EXEC_TEMP_KILMASK_C | ||
101 | #define TEMP_OUTPUT_I TGSI_EXEC_TEMP_OUTPUT_I | ||
102 | #define TEMP_OUTPUT_C TGSI_EXEC_TEMP_OUTPUT_C | ||
103 | #define TEMP_PRIMITIVE_I TGSI_EXEC_TEMP_PRIMITIVE_I | ||
104 | #define TEMP_PRIMITIVE_C TGSI_EXEC_TEMP_PRIMITIVE_C | ||
105 | #define TEMP_R0 TGSI_EXEC_TEMP_R0 | ||
106 | |||
107 | #define FOR_EACH_CHANNEL(CHAN)\ | ||
108 | for (CHAN = 0; CHAN < 4; CHAN++) | ||
109 | |||
110 | #define IS_CHANNEL_ENABLED(INST, CHAN)\ | ||
111 | ((INST).Dst[0].Register.WriteMask & (1 << (CHAN))) | ||
112 | |||
113 | #define IS_CHANNEL_ENABLED2(INST, CHAN)\ | ||
114 | ((INST).Dst[1].Register.WriteMask & (1 << (CHAN))) | ||
115 | |||
116 | #define FOR_EACH_ENABLED_CHANNEL(INST, CHAN)\ | ||
117 | FOR_EACH_CHANNEL( CHAN )\ | ||
118 | if (IS_CHANNEL_ENABLED( INST, CHAN )) | ||
119 | |||
120 | #define FOR_EACH_ENABLED_CHANNEL2(INST, CHAN)\ | ||
121 | FOR_EACH_CHANNEL( CHAN )\ | ||
122 | if (IS_CHANNEL_ENABLED2( INST, CHAN )) | ||
123 | |||
124 | |||
125 | /** The execution mask depends on the conditional mask and the loop mask */ | ||
126 | #define UPDATE_EXEC_MASK(MACH) \ | ||
127 | MACH->ExecMask = MACH->CondMask & MACH->LoopMask & MACH->ContMask & MACH->FuncMask | ||
128 | |||
129 | |||
130 | #define CHAN_X 0 | ||
131 | #define CHAN_Y 1 | ||
132 | #define CHAN_Z 2 | ||
133 | #define CHAN_W 3 | ||
134 | |||
135 | |||
136 | |||
137 | /** | ||
138 | * Initialize machine state by expanding tokens to full instructions, | ||
139 | * allocating temporary storage, setting up constants, etc. | ||
140 | * After this, we can call spu_exec_machine_run() many times. | ||
141 | */ | ||
142 | void | ||
143 | spu_exec_machine_init(struct spu_exec_machine *mach, | ||
144 | uint numSamplers, | ||
145 | struct spu_sampler *samplers, | ||
146 | unsigned processor) | ||
147 | { | ||
148 | const qword zero = si_il(0); | ||
149 | const qword not_zero = si_il(~0); | ||
150 | |||
151 | (void) numSamplers; | ||
152 | mach->Samplers = samplers; | ||
153 | mach->Processor = processor; | ||
154 | mach->Addrs = &mach->Temps[TGSI_EXEC_NUM_TEMPS]; | ||
155 | |||
156 | /* Setup constants. */ | ||
157 | mach->Temps[TEMP_0_I].xyzw[TEMP_0_C].q = zero; | ||
158 | mach->Temps[TEMP_FF_I].xyzw[TEMP_FF_C].q = not_zero; | ||
159 | mach->Temps[TEMP_7F_I].xyzw[TEMP_7F_C].q = si_shli(not_zero, -1); | ||
160 | mach->Temps[TEMP_80_I].xyzw[TEMP_80_C].q = si_shli(not_zero, 31); | ||
161 | |||
162 | mach->Temps[TEMP_1_I].xyzw[TEMP_1_C].q = (qword) spu_splats(1.0f); | ||
163 | mach->Temps[TEMP_2_I].xyzw[TEMP_2_C].q = (qword) spu_splats(2.0f); | ||
164 | mach->Temps[TEMP_128_I].xyzw[TEMP_128_C].q = (qword) spu_splats(128.0f); | ||
165 | mach->Temps[TEMP_M128_I].xyzw[TEMP_M128_C].q = (qword) spu_splats(-128.0f); | ||
166 | } | ||
167 | |||
168 | |||
169 | static INLINE qword | ||
170 | micro_abs(qword src) | ||
171 | { | ||
172 | return si_rotmi(si_shli(src, 1), -1); | ||
173 | } | ||
174 | |||
175 | static INLINE qword | ||
176 | micro_ceil(qword src) | ||
177 | { | ||
178 | return (qword) _ceilf4((vec_float4) src); | ||
179 | } | ||
180 | |||
181 | static INLINE qword | ||
182 | micro_cos(qword src) | ||
183 | { | ||
184 | return (qword) _cosf4((vec_float4) src); | ||
185 | } | ||
186 | |||
187 | static const qword br_shuf = { | ||
188 | TILE_BOTTOM_RIGHT + 0, TILE_BOTTOM_RIGHT + 1, | ||
189 | TILE_BOTTOM_RIGHT + 2, TILE_BOTTOM_RIGHT + 3, | ||
190 | TILE_BOTTOM_RIGHT + 0, TILE_BOTTOM_RIGHT + 1, | ||
191 | TILE_BOTTOM_RIGHT + 2, TILE_BOTTOM_RIGHT + 3, | ||
192 | TILE_BOTTOM_RIGHT + 0, TILE_BOTTOM_RIGHT + 1, | ||
193 | TILE_BOTTOM_RIGHT + 2, TILE_BOTTOM_RIGHT + 3, | ||
194 | TILE_BOTTOM_RIGHT + 0, TILE_BOTTOM_RIGHT + 1, | ||
195 | TILE_BOTTOM_RIGHT + 2, TILE_BOTTOM_RIGHT + 3, | ||
196 | }; | ||
197 | |||
198 | static const qword bl_shuf = { | ||
199 | TILE_BOTTOM_LEFT + 0, TILE_BOTTOM_LEFT + 1, | ||
200 | TILE_BOTTOM_LEFT + 2, TILE_BOTTOM_LEFT + 3, | ||
201 | TILE_BOTTOM_LEFT + 0, TILE_BOTTOM_LEFT + 1, | ||
202 | TILE_BOTTOM_LEFT + 2, TILE_BOTTOM_LEFT + 3, | ||
203 | TILE_BOTTOM_LEFT + 0, TILE_BOTTOM_LEFT + 1, | ||
204 | TILE_BOTTOM_LEFT + 2, TILE_BOTTOM_LEFT + 3, | ||
205 | TILE_BOTTOM_LEFT + 0, TILE_BOTTOM_LEFT + 1, | ||
206 | TILE_BOTTOM_LEFT + 2, TILE_BOTTOM_LEFT + 3, | ||
207 | }; | ||
208 | |||
209 | static const qword tl_shuf = { | ||
210 | TILE_TOP_LEFT + 0, TILE_TOP_LEFT + 1, | ||
211 | TILE_TOP_LEFT + 2, TILE_TOP_LEFT + 3, | ||
212 | TILE_TOP_LEFT + 0, TILE_TOP_LEFT + 1, | ||
213 | TILE_TOP_LEFT + 2, TILE_TOP_LEFT + 3, | ||
214 | TILE_TOP_LEFT + 0, TILE_TOP_LEFT + 1, | ||
215 | TILE_TOP_LEFT + 2, TILE_TOP_LEFT + 3, | ||
216 | TILE_TOP_LEFT + 0, TILE_TOP_LEFT + 1, | ||
217 | TILE_TOP_LEFT + 2, TILE_TOP_LEFT + 3, | ||
218 | }; | ||
219 | |||
220 | static qword | ||
221 | micro_ddx(qword src) | ||
222 | { | ||
223 | qword bottom_right = si_shufb(src, src, br_shuf); | ||
224 | qword bottom_left = si_shufb(src, src, bl_shuf); | ||
225 | |||
226 | return si_fs(bottom_right, bottom_left); | ||
227 | } | ||
228 | |||
229 | static qword | ||
230 | micro_ddy(qword src) | ||
231 | { | ||
232 | qword top_left = si_shufb(src, src, tl_shuf); | ||
233 | qword bottom_left = si_shufb(src, src, bl_shuf); | ||
234 | |||
235 | return si_fs(top_left, bottom_left); | ||
236 | } | ||
237 | |||
238 | static INLINE qword | ||
239 | micro_div(qword src0, qword src1) | ||
240 | { | ||
241 | return (qword) _divf4((vec_float4) src0, (vec_float4) src1); | ||
242 | } | ||
243 | |||
244 | static qword | ||
245 | micro_flr(qword src) | ||
246 | { | ||
247 | return (qword) _floorf4((vec_float4) src); | ||
248 | } | ||
249 | |||
250 | static qword | ||
251 | micro_frc(qword src) | ||
252 | { | ||
253 | return si_fs(src, (qword) _floorf4((vec_float4) src)); | ||
254 | } | ||
255 | |||
256 | static INLINE qword | ||
257 | micro_ge(qword src0, qword src1) | ||
258 | { | ||
259 | return si_or(si_fceq(src0, src1), si_fcgt(src0, src1)); | ||
260 | } | ||
261 | |||
262 | static qword | ||
263 | micro_lg2(qword src) | ||
264 | { | ||
265 | return (qword) _log2f4((vec_float4) src); | ||
266 | } | ||
267 | |||
268 | static INLINE qword | ||
269 | micro_lt(qword src0, qword src1) | ||
270 | { | ||
271 | const qword tmp = si_or(si_fceq(src0, src1), si_fcgt(src0, src1)); | ||
272 | |||
273 | return si_xori(tmp, 0xff); | ||
274 | } | ||
275 | |||
276 | static INLINE qword | ||
277 | micro_max(qword src0, qword src1) | ||
278 | { | ||
279 | return si_selb(src1, src0, si_fcgt(src0, src1)); | ||
280 | } | ||
281 | |||
282 | static INLINE qword | ||
283 | micro_min(qword src0, qword src1) | ||
284 | { | ||
285 | return si_selb(src0, src1, si_fcgt(src0, src1)); | ||
286 | } | ||
287 | |||
288 | static qword | ||
289 | micro_neg(qword src) | ||
290 | { | ||
291 | return si_xor(src, (qword) spu_splats(0x80000000)); | ||
292 | } | ||
293 | |||
294 | static qword | ||
295 | micro_set_sign(qword src) | ||
296 | { | ||
297 | return si_or(src, (qword) spu_splats(0x80000000)); | ||
298 | } | ||
299 | |||
300 | static qword | ||
301 | micro_pow(qword src0, qword src1) | ||
302 | { | ||
303 | return (qword) _powf4((vec_float4) src0, (vec_float4) src1); | ||
304 | } | ||
305 | |||
306 | static qword | ||
307 | micro_rnd(qword src) | ||
308 | { | ||
309 | const qword half = (qword) spu_splats(0.5f); | ||
310 | |||
311 | /* May be able to use _roundf4. There may be some difference, though. | ||
312 | */ | ||
313 | return (qword) _floorf4((vec_float4) si_fa(src, half)); | ||
314 | } | ||
315 | |||
316 | static INLINE qword | ||
317 | micro_ishr(qword src0, qword src1) | ||
318 | { | ||
319 | return si_rotma(src0, si_sfi(src1, 0)); | ||
320 | } | ||
321 | |||
322 | static qword | ||
323 | micro_trunc(qword src) | ||
324 | { | ||
325 | return (qword) _truncf4((vec_float4) src); | ||
326 | } | ||
327 | |||
328 | static qword | ||
329 | micro_sin(qword src) | ||
330 | { | ||
331 | return (qword) _sinf4((vec_float4) src); | ||
332 | } | ||
333 | |||
334 | static INLINE qword | ||
335 | micro_sqrt(qword src) | ||
336 | { | ||
337 | return (qword) _sqrtf4((vec_float4) src); | ||
338 | } | ||
339 | |||
340 | static void | ||
341 | fetch_src_file_channel( | ||
342 | const struct spu_exec_machine *mach, | ||
343 | const uint file, | ||
344 | const uint swizzle, | ||
345 | const union spu_exec_channel *index, | ||
346 | union spu_exec_channel *chan ) | ||
347 | { | ||
348 | switch( swizzle ) { | ||
349 | case TGSI_SWIZZLE_X: | ||
350 | case TGSI_SWIZZLE_Y: | ||
351 | case TGSI_SWIZZLE_Z: | ||
352 | case TGSI_SWIZZLE_W: | ||
353 | switch( file ) { | ||
354 | case TGSI_FILE_CONSTANT: { | ||
355 | unsigned i; | ||
356 | |||
357 | for (i = 0; i < 4; i++) { | ||
358 | const float *ptr = mach->Consts[index->i[i]]; | ||
359 | float tmp[4]; | ||
360 | |||
361 | spu_dcache_fetch_unaligned((qword *) tmp, | ||
362 | (uintptr_t)(ptr + swizzle), | ||
363 | sizeof(float)); | ||
364 | |||
365 | chan->f[i] = tmp[0]; | ||
366 | } | ||
367 | break; | ||
368 | } | ||
369 | |||
370 | case TGSI_FILE_INPUT: | ||
371 | chan->u[0] = mach->Inputs[index->i[0]].xyzw[swizzle].u[0]; | ||
372 | chan->u[1] = mach->Inputs[index->i[1]].xyzw[swizzle].u[1]; | ||
373 | chan->u[2] = mach->Inputs[index->i[2]].xyzw[swizzle].u[2]; | ||
374 | chan->u[3] = mach->Inputs[index->i[3]].xyzw[swizzle].u[3]; | ||
375 | break; | ||
376 | |||
377 | case TGSI_FILE_TEMPORARY: | ||
378 | chan->u[0] = mach->Temps[index->i[0]].xyzw[swizzle].u[0]; | ||
379 | chan->u[1] = mach->Temps[index->i[1]].xyzw[swizzle].u[1]; | ||
380 | chan->u[2] = mach->Temps[index->i[2]].xyzw[swizzle].u[2]; | ||
381 | chan->u[3] = mach->Temps[index->i[3]].xyzw[swizzle].u[3]; | ||
382 | break; | ||
383 | |||
384 | case TGSI_FILE_IMMEDIATE: | ||
385 | ASSERT( index->i[0] < (int) mach->ImmLimit ); | ||
386 | ASSERT( index->i[1] < (int) mach->ImmLimit ); | ||
387 | ASSERT( index->i[2] < (int) mach->ImmLimit ); | ||
388 | ASSERT( index->i[3] < (int) mach->ImmLimit ); | ||
389 | |||
390 | chan->f[0] = mach->Imms[index->i[0]][swizzle]; | ||
391 | chan->f[1] = mach->Imms[index->i[1]][swizzle]; | ||
392 | chan->f[2] = mach->Imms[index->i[2]][swizzle]; | ||
393 | chan->f[3] = mach->Imms[index->i[3]][swizzle]; | ||
394 | break; | ||
395 | |||
396 | case TGSI_FILE_ADDRESS: | ||
397 | chan->u[0] = mach->Addrs[index->i[0]].xyzw[swizzle].u[0]; | ||
398 | chan->u[1] = mach->Addrs[index->i[1]].xyzw[swizzle].u[1]; | ||
399 | chan->u[2] = mach->Addrs[index->i[2]].xyzw[swizzle].u[2]; | ||
400 | chan->u[3] = mach->Addrs[index->i[3]].xyzw[swizzle].u[3]; | ||
401 | break; | ||
402 | |||
403 | case TGSI_FILE_OUTPUT: | ||
404 | /* vertex/fragment output vars can be read too */ | ||
405 | chan->u[0] = mach->Outputs[index->i[0]].xyzw[swizzle].u[0]; | ||
406 | chan->u[1] = mach->Outputs[index->i[1]].xyzw[swizzle].u[1]; | ||
407 | chan->u[2] = mach->Outputs[index->i[2]].xyzw[swizzle].u[2]; | ||
408 | chan->u[3] = mach->Outputs[index->i[3]].xyzw[swizzle].u[3]; | ||
409 | break; | ||
410 | |||
411 | default: | ||
412 | ASSERT( 0 ); | ||
413 | } | ||
414 | break; | ||
415 | |||
416 | default: | ||
417 | ASSERT( 0 ); | ||
418 | } | ||
419 | } | ||
420 | |||
421 | static void | ||
422 | fetch_source( | ||
423 | const struct spu_exec_machine *mach, | ||
424 | union spu_exec_channel *chan, | ||
425 | const struct tgsi_full_src_register *reg, | ||
426 | const uint chan_index ) | ||
427 | { | ||
428 | union spu_exec_channel index; | ||
429 | uint swizzle; | ||
430 | |||
431 | index.i[0] = | ||
432 | index.i[1] = | ||
433 | index.i[2] = | ||
434 | index.i[3] = reg->Register.Index; | ||
435 | |||
436 | if (reg->Register.Indirect) { | ||
437 | union spu_exec_channel index2; | ||
438 | union spu_exec_channel indir_index; | ||
439 | |||
440 | index2.i[0] = | ||
441 | index2.i[1] = | ||
442 | index2.i[2] = | ||
443 | index2.i[3] = reg->Indirect.Index; | ||
444 | |||
445 | swizzle = tgsi_util_get_src_register_swizzle(®->Indirect, | ||
446 | CHAN_X); | ||
447 | fetch_src_file_channel( | ||
448 | mach, | ||
449 | reg->Indirect.File, | ||
450 | swizzle, | ||
451 | &index2, | ||
452 | &indir_index ); | ||
453 | |||
454 | index.q = si_a(index.q, indir_index.q); | ||
455 | } | ||
456 | |||
457 | if( reg->Register.Dimension ) { | ||
458 | switch( reg->Register.File ) { | ||
459 | case TGSI_FILE_INPUT: | ||
460 | index.q = si_mpyi(index.q, 17); | ||
461 | break; | ||
462 | case TGSI_FILE_CONSTANT: | ||
463 | index.q = si_shli(index.q, 12); | ||
464 | break; | ||
465 | default: | ||
466 | ASSERT( 0 ); | ||
467 | } | ||
468 | |||
469 | index.i[0] += reg->Dimension.Index; | ||
470 | index.i[1] += reg->Dimension.Index; | ||
471 | index.i[2] += reg->Dimension.Index; | ||
472 | index.i[3] += reg->Dimension.Index; | ||
473 | |||
474 | if (reg->Dimension.Indirect) { | ||
475 | union spu_exec_channel index2; | ||
476 | union spu_exec_channel indir_index; | ||
477 | |||
478 | index2.i[0] = | ||
479 | index2.i[1] = | ||
480 | index2.i[2] = | ||
481 | index2.i[3] = reg->DimIndirect.Index; | ||
482 | |||
483 | swizzle = tgsi_util_get_src_register_swizzle( ®->DimIndirect, CHAN_X ); | ||
484 | fetch_src_file_channel( | ||
485 | mach, | ||
486 | reg->DimIndirect.File, | ||
487 | swizzle, | ||
488 | &index2, | ||
489 | &indir_index ); | ||
490 | |||
491 | index.q = si_a(index.q, indir_index.q); | ||
492 | } | ||
493 | } | ||
494 | |||
495 | swizzle = tgsi_util_get_full_src_register_swizzle( reg, chan_index ); | ||
496 | fetch_src_file_channel( | ||
497 | mach, | ||
498 | reg->Register.File, | ||
499 | swizzle, | ||
500 | &index, | ||
501 | chan ); | ||
502 | |||
503 | switch (tgsi_util_get_full_src_register_sign_mode( reg, chan_index )) { | ||
504 | case TGSI_UTIL_SIGN_CLEAR: | ||
505 | chan->q = micro_abs(chan->q); | ||
506 | break; | ||
507 | |||
508 | case TGSI_UTIL_SIGN_SET: | ||
509 | chan->q = micro_set_sign(chan->q); | ||
510 | break; | ||
511 | |||
512 | case TGSI_UTIL_SIGN_TOGGLE: | ||
513 | chan->q = micro_neg(chan->q); | ||
514 | break; | ||
515 | |||
516 | case TGSI_UTIL_SIGN_KEEP: | ||
517 | break; | ||
518 | } | ||
519 | |||
520 | if (reg->RegisterExtMod.Complement) { | ||
521 | chan->q = si_fs(mach->Temps[TEMP_1_I].xyzw[TEMP_1_C].q, chan->q); | ||
522 | } | ||
523 | } | ||
524 | |||
525 | static void | ||
526 | store_dest( | ||
527 | struct spu_exec_machine *mach, | ||
528 | const union spu_exec_channel *chan, | ||
529 | const struct tgsi_full_dst_register *reg, | ||
530 | const struct tgsi_full_instruction *inst, | ||
531 | uint chan_index ) | ||
532 | { | ||
533 | union spu_exec_channel *dst; | ||
534 | |||
535 | switch( reg->Register.File ) { | ||
536 | case TGSI_FILE_NULL: | ||
537 | return; | ||
538 | |||
539 | case TGSI_FILE_OUTPUT: | ||
540 | dst = &mach->Outputs[mach->Temps[TEMP_OUTPUT_I].xyzw[TEMP_OUTPUT_C].u[0] | ||
541 | + reg->Register.Index].xyzw[chan_index]; | ||
542 | break; | ||
543 | |||
544 | case TGSI_FILE_TEMPORARY: | ||
545 | dst = &mach->Temps[reg->Register.Index].xyzw[chan_index]; | ||
546 | break; | ||
547 | |||
548 | case TGSI_FILE_ADDRESS: | ||
549 | dst = &mach->Addrs[reg->Register.Index].xyzw[chan_index]; | ||
550 | break; | ||
551 | |||
552 | default: | ||
553 | ASSERT( 0 ); | ||
554 | return; | ||
555 | } | ||
556 | |||
557 | switch (inst->Instruction.Saturate) | ||
558 | { | ||
559 | case TGSI_SAT_NONE: | ||
560 | if (mach->ExecMask & 0x1) | ||
561 | dst->i[0] = chan->i[0]; | ||
562 | if (mach->ExecMask & 0x2) | ||
563 | dst->i[1] = chan->i[1]; | ||
564 | if (mach->ExecMask & 0x4) | ||
565 | dst->i[2] = chan->i[2]; | ||
566 | if (mach->ExecMask & 0x8) | ||
567 | dst->i[3] = chan->i[3]; | ||
568 | break; | ||
569 | |||
570 | case TGSI_SAT_ZERO_ONE: | ||
571 | /* XXX need to obey ExecMask here */ | ||
572 | dst->q = micro_max(chan->q, mach->Temps[TEMP_0_I].xyzw[TEMP_0_C].q); | ||
573 | dst->q = micro_min(dst->q, mach->Temps[TEMP_1_I].xyzw[TEMP_1_C].q); | ||
574 | break; | ||
575 | |||
576 | case TGSI_SAT_MINUS_PLUS_ONE: | ||
577 | ASSERT( 0 ); | ||
578 | break; | ||
579 | |||
580 | default: | ||
581 | ASSERT( 0 ); | ||
582 | } | ||
583 | } | ||
584 | |||
585 | #define FETCH(VAL,INDEX,CHAN)\ | ||
586 | fetch_source (mach, VAL, &inst->Src[INDEX], CHAN) | ||
587 | |||
588 | #define STORE(VAL,INDEX,CHAN)\ | ||
589 | store_dest (mach, VAL, &inst->Dst[INDEX], inst, CHAN ) | ||
590 | |||
591 | |||
592 | /** | ||
593 | * Execute ARB-style KIL which is predicated by a src register. | ||
594 | * Kill fragment if any of the four values is less than zero. | ||
595 | */ | ||
596 | static void | ||
597 | exec_kil(struct spu_exec_machine *mach, | ||
598 | const struct tgsi_full_instruction *inst) | ||
599 | { | ||
600 | uint uniquemask; | ||
601 | uint chan_index; | ||
602 | uint kilmask = 0; /* bit 0 = pixel 0, bit 1 = pixel 1, etc */ | ||
603 | union spu_exec_channel r[1]; | ||
604 | |||
605 | /* This mask stores component bits that were already tested. */ | ||
606 | uniquemask = 0; | ||
607 | |||
608 | for (chan_index = 0; chan_index < 4; chan_index++) | ||
609 | { | ||
610 | uint swizzle; | ||
611 | uint i; | ||
612 | |||
613 | /* unswizzle channel */ | ||
614 | swizzle = tgsi_util_get_full_src_register_swizzle ( | ||
615 | &inst->Src[0], | ||
616 | chan_index); | ||
617 | |||
618 | /* check if the component has not been already tested */ | ||
619 | if (uniquemask & (1 << swizzle)) | ||
620 | continue; | ||
621 | uniquemask |= 1 << swizzle; | ||
622 | |||
623 | FETCH(&r[0], 0, chan_index); | ||
624 | for (i = 0; i < 4; i++) | ||
625 | if (r[0].f[i] < 0.0f) | ||
626 | kilmask |= 1 << i; | ||
627 | } | ||
628 | |||
629 | mach->Temps[TEMP_KILMASK_I].xyzw[TEMP_KILMASK_C].u[0] |= kilmask; | ||
630 | } | ||
631 | |||
632 | /** | ||
633 | * Execute NVIDIA-style KIL which is predicated by a condition code. | ||
634 | * Kill fragment if the condition code is TRUE. | ||
635 | */ | ||
636 | static void | ||
637 | exec_kilp(struct spu_exec_machine *mach, | ||
638 | const struct tgsi_full_instruction *inst) | ||
639 | { | ||
640 | uint kilmask = 0; /* bit 0 = pixel 0, bit 1 = pixel 1, etc */ | ||
641 | |||
642 | /* TODO: build kilmask from CC mask */ | ||
643 | |||
644 | mach->Temps[TEMP_KILMASK_I].xyzw[TEMP_KILMASK_C].u[0] |= kilmask; | ||
645 | } | ||
646 | |||
647 | /* | ||
648 | * Fetch a texel using STR texture coordinates. | ||
649 | */ | ||
650 | static void | ||
651 | fetch_texel( struct spu_sampler *sampler, | ||
652 | const union spu_exec_channel *s, | ||
653 | const union spu_exec_channel *t, | ||
654 | const union spu_exec_channel *p, | ||
655 | float lodbias, /* XXX should be float[4] */ | ||
656 | union spu_exec_channel *r, | ||
657 | union spu_exec_channel *g, | ||
658 | union spu_exec_channel *b, | ||
659 | union spu_exec_channel *a ) | ||
660 | { | ||
661 | qword rgba[4]; | ||
662 | qword out[4]; | ||
663 | |||
664 | sampler->get_samples(sampler, s->f, t->f, p->f, lodbias, | ||
665 | (float (*)[4]) rgba); | ||
666 | |||
667 | _transpose_matrix4x4((vec_float4 *) out, (vec_float4 *) rgba); | ||
668 | r->q = out[0]; | ||
669 | g->q = out[1]; | ||
670 | b->q = out[2]; | ||
671 | a->q = out[3]; | ||
672 | } | ||
673 | |||
674 | |||
675 | static void | ||
676 | exec_tex(struct spu_exec_machine *mach, | ||
677 | const struct tgsi_full_instruction *inst, | ||
678 | boolean biasLod, boolean projected) | ||
679 | { | ||
680 | const uint unit = inst->Src[1].Register.Index; | ||
681 | union spu_exec_channel r[8]; | ||
682 | uint chan_index; | ||
683 | float lodBias; | ||
684 | |||
685 | /* printf("Sampler %u unit %u\n", sampler, unit); */ | ||
686 | |||
687 | switch (inst->InstructionExtTexture.Texture) { | ||
688 | case TGSI_TEXTURE_1D: | ||
689 | |||
690 | FETCH(&r[0], 0, CHAN_X); | ||
691 | |||
692 | if (projected) { | ||
693 | FETCH(&r[1], 0, CHAN_W); | ||
694 | r[0].q = micro_div(r[0].q, r[1].q); | ||
695 | } | ||
696 | |||
697 | if (biasLod) { | ||
698 | FETCH(&r[1], 0, CHAN_W); | ||
699 | lodBias = r[2].f[0]; | ||
700 | } | ||
701 | else | ||
702 | lodBias = 0.0; | ||
703 | |||
704 | fetch_texel(&mach->Samplers[unit], | ||
705 | &r[0], NULL, NULL, lodBias, /* S, T, P, BIAS */ | ||
706 | &r[0], &r[1], &r[2], &r[3]); /* R, G, B, A */ | ||
707 | break; | ||
708 | |||
709 | case TGSI_TEXTURE_2D: | ||
710 | case TGSI_TEXTURE_RECT: | ||
711 | |||
712 | FETCH(&r[0], 0, CHAN_X); | ||
713 | FETCH(&r[1], 0, CHAN_Y); | ||
714 | FETCH(&r[2], 0, CHAN_Z); | ||
715 | |||
716 | if (projected) { | ||
717 | FETCH(&r[3], 0, CHAN_W); | ||
718 | r[0].q = micro_div(r[0].q, r[3].q); | ||
719 | r[1].q = micro_div(r[1].q, r[3].q); | ||
720 | r[2].q = micro_div(r[2].q, r[3].q); | ||
721 | } | ||
722 | |||
723 | if (biasLod) { | ||
724 | FETCH(&r[3], 0, CHAN_W); | ||
725 | lodBias = r[3].f[0]; | ||
726 | } | ||
727 | else | ||
728 | lodBias = 0.0; | ||
729 | |||
730 | fetch_texel(&mach->Samplers[unit], | ||
731 | &r[0], &r[1], &r[2], lodBias, /* inputs */ | ||
732 | &r[0], &r[1], &r[2], &r[3]); /* outputs */ | ||
733 | break; | ||
734 | |||
735 | case TGSI_TEXTURE_3D: | ||
736 | case TGSI_TEXTURE_CUBE: | ||
737 | |||
738 | FETCH(&r[0], 0, CHAN_X); | ||
739 | FETCH(&r[1], 0, CHAN_Y); | ||
740 | FETCH(&r[2], 0, CHAN_Z); | ||
741 | |||
742 | if (projected) { | ||
743 | FETCH(&r[3], 0, CHAN_W); | ||
744 | r[0].q = micro_div(r[0].q, r[3].q); | ||
745 | r[1].q = micro_div(r[1].q, r[3].q); | ||
746 | r[2].q = micro_div(r[2].q, r[3].q); | ||
747 | } | ||
748 | |||
749 | if (biasLod) { | ||
750 | FETCH(&r[3], 0, CHAN_W); | ||
751 | lodBias = r[3].f[0]; | ||
752 | } | ||
753 | else | ||
754 | lodBias = 0.0; | ||
755 | |||
756 | fetch_texel(&mach->Samplers[unit], | ||
757 | &r[0], &r[1], &r[2], lodBias, | ||
758 | &r[0], &r[1], &r[2], &r[3]); | ||
759 | break; | ||
760 | |||
761 | default: | ||
762 | ASSERT (0); | ||
763 | } | ||
764 | |||
765 | FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { | ||
766 | STORE( &r[chan_index], 0, chan_index ); | ||
767 | } | ||
768 | } | ||
769 | |||
770 | |||
771 | |||
772 | static void | ||
773 | constant_interpolation( | ||
774 | struct spu_exec_machine *mach, | ||
775 | unsigned attrib, | ||
776 | unsigned chan ) | ||
777 | { | ||
778 | unsigned i; | ||
779 | |||
780 | for( i = 0; i < QUAD_SIZE; i++ ) { | ||
781 | mach->Inputs[attrib].xyzw[chan].f[i] = mach->InterpCoefs[attrib].a0[chan]; | ||
782 | } | ||
783 | } | ||
784 | |||
785 | static void | ||
786 | linear_interpolation( | ||
787 | struct spu_exec_machine *mach, | ||
788 | unsigned attrib, | ||
789 | unsigned chan ) | ||
790 | { | ||
791 | const float x = mach->QuadPos.xyzw[0].f[0]; | ||
792 | const float y = mach->QuadPos.xyzw[1].f[0]; | ||
793 | const float dadx = mach->InterpCoefs[attrib].dadx[chan]; | ||
794 | const float dady = mach->InterpCoefs[attrib].dady[chan]; | ||
795 | const float a0 = mach->InterpCoefs[attrib].a0[chan] + dadx * x + dady * y; | ||
796 | mach->Inputs[attrib].xyzw[chan].f[0] = a0; | ||
797 | mach->Inputs[attrib].xyzw[chan].f[1] = a0 + dadx; | ||
798 | mach->Inputs[attrib].xyzw[chan].f[2] = a0 + dady; | ||
799 | mach->Inputs[attrib].xyzw[chan].f[3] = a0 + dadx + dady; | ||
800 | } | ||
801 | |||
802 | static void | ||
803 | perspective_interpolation( | ||
804 | struct spu_exec_machine *mach, | ||
805 | unsigned attrib, | ||
806 | unsigned chan ) | ||
807 | { | ||
808 | const float x = mach->QuadPos.xyzw[0].f[0]; | ||
809 | const float y = mach->QuadPos.xyzw[1].f[0]; | ||
810 | const float dadx = mach->InterpCoefs[attrib].dadx[chan]; | ||
811 | const float dady = mach->InterpCoefs[attrib].dady[chan]; | ||
812 | const float a0 = mach->InterpCoefs[attrib].a0[chan] + dadx * x + dady * y; | ||
813 | const float *w = mach->QuadPos.xyzw[3].f; | ||
814 | /* divide by W here */ | ||
815 | mach->Inputs[attrib].xyzw[chan].f[0] = a0 / w[0]; | ||
816 | mach->Inputs[attrib].xyzw[chan].f[1] = (a0 + dadx) / w[1]; | ||
817 | mach->Inputs[attrib].xyzw[chan].f[2] = (a0 + dady) / w[2]; | ||
818 | mach->Inputs[attrib].xyzw[chan].f[3] = (a0 + dadx + dady) / w[3]; | ||
819 | } | ||
820 | |||
821 | |||
822 | typedef void (* interpolation_func)( | ||
823 | struct spu_exec_machine *mach, | ||
824 | unsigned attrib, | ||
825 | unsigned chan ); | ||
826 | |||
827 | static void | ||
828 | exec_declaration(struct spu_exec_machine *mach, | ||
829 | const struct tgsi_full_declaration *decl) | ||
830 | { | ||
831 | if( mach->Processor == TGSI_PROCESSOR_FRAGMENT ) { | ||
832 | if( decl->Declaration.File == TGSI_FILE_INPUT ) { | ||
833 | unsigned first, last, mask; | ||
834 | interpolation_func interp; | ||
835 | |||
836 | first = decl->Range.First; | ||
837 | last = decl->Range.Last; | ||
838 | mask = decl->Declaration.UsageMask; | ||
839 | |||
840 | switch( decl->Declaration.Interpolate ) { | ||
841 | case TGSI_INTERPOLATE_CONSTANT: | ||
842 | interp = constant_interpolation; | ||
843 | break; | ||
844 | |||
845 | case TGSI_INTERPOLATE_LINEAR: | ||
846 | interp = linear_interpolation; | ||
847 | break; | ||
848 | |||
849 | case TGSI_INTERPOLATE_PERSPECTIVE: | ||
850 | interp = perspective_interpolation; | ||
851 | break; | ||
852 | |||
853 | default: | ||
854 | ASSERT( 0 ); | ||
855 | } | ||
856 | |||
857 | if( mask == TGSI_WRITEMASK_XYZW ) { | ||
858 | unsigned i, j; | ||
859 | |||
860 | for( i = first; i <= last; i++ ) { | ||
861 | for( j = 0; j < NUM_CHANNELS; j++ ) { | ||
862 | interp( mach, i, j ); | ||
863 | } | ||
864 | } | ||
865 | } | ||
866 | else { | ||
867 | unsigned i, j; | ||
868 | |||
869 | for( j = 0; j < NUM_CHANNELS; j++ ) { | ||
870 | if( mask & (1 << j) ) { | ||
871 | for( i = first; i <= last; i++ ) { | ||
872 | interp( mach, i, j ); | ||
873 | } | ||
874 | } | ||
875 | } | ||
876 | } | ||
877 | } | ||
878 | } | ||
879 | } | ||
880 | |||
881 | static void | ||
882 | exec_instruction( | ||
883 | struct spu_exec_machine *mach, | ||
884 | const struct tgsi_full_instruction *inst, | ||
885 | int *pc ) | ||
886 | { | ||
887 | uint chan_index; | ||
888 | union spu_exec_channel r[8]; | ||
889 | |||
890 | (*pc)++; | ||
891 | |||
892 | switch (inst->Instruction.Opcode) { | ||
893 | case TGSI_OPCODE_ARL: | ||
894 | FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { | ||
895 | FETCH( &r[0], 0, chan_index ); | ||
896 | r[0].q = si_cflts(r[0].q, 0); | ||
897 | STORE( &r[0], 0, chan_index ); | ||
898 | } | ||
899 | break; | ||
900 | |||
901 | case TGSI_OPCODE_MOV: | ||
902 | FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { | ||
903 | FETCH( &r[0], 0, chan_index ); | ||
904 | STORE( &r[0], 0, chan_index ); | ||
905 | } | ||
906 | break; | ||
907 | |||
908 | case TGSI_OPCODE_LIT: | ||
909 | if (IS_CHANNEL_ENABLED( *inst, CHAN_X )) { | ||
910 | STORE( &mach->Temps[TEMP_1_I].xyzw[TEMP_1_C], 0, CHAN_X ); | ||
911 | } | ||
912 | |||
913 | if (IS_CHANNEL_ENABLED( *inst, CHAN_Y ) || IS_CHANNEL_ENABLED( *inst, CHAN_Z )) { | ||
914 | FETCH( &r[0], 0, CHAN_X ); | ||
915 | if (IS_CHANNEL_ENABLED( *inst, CHAN_Y )) { | ||
916 | r[0].q = micro_max(r[0].q, mach->Temps[TEMP_0_I].xyzw[TEMP_0_C].q); | ||
917 | STORE( &r[0], 0, CHAN_Y ); | ||
918 | } | ||
919 | |||
920 | if (IS_CHANNEL_ENABLED( *inst, CHAN_Z )) { | ||
921 | FETCH( &r[1], 0, CHAN_Y ); | ||
922 | r[1].q = micro_max(r[1].q, mach->Temps[TEMP_0_I].xyzw[TEMP_0_C].q); | ||
923 | |||
924 | FETCH( &r[2], 0, CHAN_W ); | ||
925 | r[2].q = micro_min(r[2].q, mach->Temps[TEMP_128_I].xyzw[TEMP_128_C].q); | ||
926 | r[2].q = micro_max(r[2].q, mach->Temps[TEMP_M128_I].xyzw[TEMP_M128_C].q); | ||
927 | r[1].q = micro_pow(r[1].q, r[2].q); | ||
928 | |||
929 | /* r0 = (r0 > 0.0) ? r1 : 0.0 | ||
930 | */ | ||
931 | r[0].q = si_fcgt(r[0].q, mach->Temps[TEMP_0_I].xyzw[TEMP_0_C].q); | ||
932 | r[0].q = si_selb(mach->Temps[TEMP_0_I].xyzw[TEMP_0_C].q, r[1].q, | ||
933 | r[0].q); | ||
934 | STORE( &r[0], 0, CHAN_Z ); | ||
935 | } | ||
936 | } | ||
937 | |||
938 | if (IS_CHANNEL_ENABLED( *inst, CHAN_W )) { | ||
939 | STORE( &mach->Temps[TEMP_1_I].xyzw[TEMP_1_C], 0, CHAN_W ); | ||
940 | } | ||
941 | break; | ||
942 | |||
943 | case TGSI_OPCODE_RCP: | ||
944 | FETCH( &r[0], 0, CHAN_X ); | ||
945 | r[0].q = micro_div(mach->Temps[TEMP_1_I].xyzw[TEMP_1_C].q, r[0].q); | ||
946 | FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { | ||
947 | STORE( &r[0], 0, chan_index ); | ||
948 | } | ||
949 | break; | ||
950 | |||
951 | case TGSI_OPCODE_RSQ: | ||
952 | FETCH( &r[0], 0, CHAN_X ); | ||
953 | r[0].q = micro_sqrt(r[0].q); | ||
954 | r[0].q = micro_div(mach->Temps[TEMP_1_I].xyzw[TEMP_1_C].q, r[0].q); | ||
955 | FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { | ||
956 | STORE( &r[0], 0, chan_index ); | ||
957 | } | ||
958 | break; | ||
959 | |||
960 | case TGSI_OPCODE_EXP: | ||
961 | ASSERT (0); | ||
962 | break; | ||
963 | |||
964 | case TGSI_OPCODE_LOG: | ||
965 | ASSERT (0); | ||
966 | break; | ||
967 | |||
968 | case TGSI_OPCODE_MUL: | ||
969 | FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) | ||
970 | { | ||
971 | FETCH(&r[0], 0, chan_index); | ||
972 | FETCH(&r[1], 1, chan_index); | ||
973 | |||
974 | r[0].q = si_fm(r[0].q, r[1].q); | ||
975 | |||
976 | STORE(&r[0], 0, chan_index); | ||
977 | } | ||
978 | break; | ||
979 | |||
980 | case TGSI_OPCODE_ADD: | ||
981 | FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { | ||
982 | FETCH( &r[0], 0, chan_index ); | ||
983 | FETCH( &r[1], 1, chan_index ); | ||
984 | r[0].q = si_fa(r[0].q, r[1].q); | ||
985 | STORE( &r[0], 0, chan_index ); | ||
986 | } | ||
987 | break; | ||
988 | |||
989 | case TGSI_OPCODE_DP3: | ||
990 | /* TGSI_OPCODE_DOT3 */ | ||
991 | FETCH( &r[0], 0, CHAN_X ); | ||
992 | FETCH( &r[1], 1, CHAN_X ); | ||
993 | r[0].q = si_fm(r[0].q, r[1].q); | ||
994 | |||
995 | FETCH( &r[1], 0, CHAN_Y ); | ||
996 | FETCH( &r[2], 1, CHAN_Y ); | ||
997 | r[0].q = si_fma(r[1].q, r[2].q, r[0].q); | ||
998 | |||
999 | |||
1000 | FETCH( &r[1], 0, CHAN_Z ); | ||
1001 | FETCH( &r[2], 1, CHAN_Z ); | ||
1002 | r[0].q = si_fma(r[1].q, r[2].q, r[0].q); | ||
1003 | |||
1004 | FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { | ||
1005 | STORE( &r[0], 0, chan_index ); | ||
1006 | } | ||
1007 | break; | ||
1008 | |||
1009 | case TGSI_OPCODE_DP4: | ||
1010 | /* TGSI_OPCODE_DOT4 */ | ||
1011 | FETCH(&r[0], 0, CHAN_X); | ||
1012 | FETCH(&r[1], 1, CHAN_X); | ||
1013 | |||
1014 | r[0].q = si_fm(r[0].q, r[1].q); | ||
1015 | |||
1016 | FETCH(&r[1], 0, CHAN_Y); | ||
1017 | FETCH(&r[2], 1, CHAN_Y); | ||
1018 | |||
1019 | r[0].q = si_fma(r[1].q, r[2].q, r[0].q); | ||
1020 | |||
1021 | FETCH(&r[1], 0, CHAN_Z); | ||
1022 | FETCH(&r[2], 1, CHAN_Z); | ||
1023 | |||
1024 | r[0].q = si_fma(r[1].q, r[2].q, r[0].q); | ||
1025 | |||
1026 | FETCH(&r[1], 0, CHAN_W); | ||
1027 | FETCH(&r[2], 1, CHAN_W); | ||
1028 | |||
1029 | r[0].q = si_fma(r[1].q, r[2].q, r[0].q); | ||
1030 | |||
1031 | FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { | ||
1032 | STORE( &r[0], 0, chan_index ); | ||
1033 | } | ||
1034 | break; | ||
1035 | |||
1036 | case TGSI_OPCODE_DST: | ||
1037 | if (IS_CHANNEL_ENABLED( *inst, CHAN_X )) { | ||
1038 | STORE( &mach->Temps[TEMP_1_I].xyzw[TEMP_1_C], 0, CHAN_X ); | ||
1039 | } | ||
1040 | |||
1041 | if (IS_CHANNEL_ENABLED( *inst, CHAN_Y )) { | ||
1042 | FETCH( &r[0], 0, CHAN_Y ); | ||
1043 | FETCH( &r[1], 1, CHAN_Y); | ||
1044 | r[0].q = si_fm(r[0].q, r[1].q); | ||
1045 | STORE( &r[0], 0, CHAN_Y ); | ||
1046 | } | ||
1047 | |||
1048 | if (IS_CHANNEL_ENABLED( *inst, CHAN_Z )) { | ||
1049 | FETCH( &r[0], 0, CHAN_Z ); | ||
1050 | STORE( &r[0], 0, CHAN_Z ); | ||
1051 | } | ||
1052 | |||
1053 | if (IS_CHANNEL_ENABLED( *inst, CHAN_W )) { | ||
1054 | FETCH( &r[0], 1, CHAN_W ); | ||
1055 | STORE( &r[0], 0, CHAN_W ); | ||
1056 | } | ||
1057 | break; | ||
1058 | |||
1059 | case TGSI_OPCODE_MIN: | ||
1060 | FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { | ||
1061 | FETCH(&r[0], 0, chan_index); | ||
1062 | FETCH(&r[1], 1, chan_index); | ||
1063 | |||
1064 | r[0].q = micro_min(r[0].q, r[1].q); | ||
1065 | |||
1066 | STORE(&r[0], 0, chan_index); | ||
1067 | } | ||
1068 | break; | ||
1069 | |||
1070 | case TGSI_OPCODE_MAX: | ||
1071 | FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { | ||
1072 | FETCH(&r[0], 0, chan_index); | ||
1073 | FETCH(&r[1], 1, chan_index); | ||
1074 | |||
1075 | r[0].q = micro_max(r[0].q, r[1].q); | ||
1076 | |||
1077 | STORE(&r[0], 0, chan_index ); | ||
1078 | } | ||
1079 | break; | ||
1080 | |||
1081 | case TGSI_OPCODE_SLT: | ||
1082 | /* TGSI_OPCODE_SETLT */ | ||
1083 | FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { | ||
1084 | FETCH( &r[0], 0, chan_index ); | ||
1085 | FETCH( &r[1], 1, chan_index ); | ||
1086 | |||
1087 | r[0].q = micro_ge(r[0].q, r[1].q); | ||
1088 | r[0].q = si_xori(r[0].q, 0xff); | ||
1089 | |||
1090 | STORE( &r[0], 0, chan_index ); | ||
1091 | } | ||
1092 | break; | ||
1093 | |||
1094 | case TGSI_OPCODE_SGE: | ||
1095 | /* TGSI_OPCODE_SETGE */ | ||
1096 | FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { | ||
1097 | FETCH( &r[0], 0, chan_index ); | ||
1098 | FETCH( &r[1], 1, chan_index ); | ||
1099 | r[0].q = micro_ge(r[0].q, r[1].q); | ||
1100 | STORE( &r[0], 0, chan_index ); | ||
1101 | } | ||
1102 | break; | ||
1103 | |||
1104 | case TGSI_OPCODE_MAD: | ||
1105 | FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { | ||
1106 | FETCH( &r[0], 0, chan_index ); | ||
1107 | FETCH( &r[1], 1, chan_index ); | ||
1108 | FETCH( &r[2], 2, chan_index ); | ||
1109 | r[0].q = si_fma(r[0].q, r[1].q, r[2].q); | ||
1110 | STORE( &r[0], 0, chan_index ); | ||
1111 | } | ||
1112 | break; | ||
1113 | |||
1114 | case TGSI_OPCODE_SUB: | ||
1115 | FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { | ||
1116 | FETCH(&r[0], 0, chan_index); | ||
1117 | FETCH(&r[1], 1, chan_index); | ||
1118 | |||
1119 | r[0].q = si_fs(r[0].q, r[1].q); | ||
1120 | |||
1121 | STORE(&r[0], 0, chan_index); | ||
1122 | } | ||
1123 | break; | ||
1124 | |||
1125 | case TGSI_OPCODE_LRP: | ||
1126 | FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { | ||
1127 | FETCH(&r[0], 0, chan_index); | ||
1128 | FETCH(&r[1], 1, chan_index); | ||
1129 | FETCH(&r[2], 2, chan_index); | ||
1130 | |||
1131 | r[1].q = si_fs(r[1].q, r[2].q); | ||
1132 | r[0].q = si_fma(r[0].q, r[1].q, r[2].q); | ||
1133 | |||
1134 | STORE(&r[0], 0, chan_index); | ||
1135 | } | ||
1136 | break; | ||
1137 | |||
1138 | case TGSI_OPCODE_CND: | ||
1139 | ASSERT (0); | ||
1140 | break; | ||
1141 | |||
1142 | case TGSI_OPCODE_DP2A: | ||
1143 | ASSERT (0); | ||
1144 | break; | ||
1145 | |||
1146 | case TGSI_OPCODE_FRC: | ||
1147 | FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { | ||
1148 | FETCH( &r[0], 0, chan_index ); | ||
1149 | r[0].q = micro_frc(r[0].q); | ||
1150 | STORE( &r[0], 0, chan_index ); | ||
1151 | } | ||
1152 | break; | ||
1153 | |||
1154 | case TGSI_OPCODE_CLAMP: | ||
1155 | ASSERT (0); | ||
1156 | break; | ||
1157 | |||
1158 | case TGSI_OPCODE_FLR: | ||
1159 | FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { | ||
1160 | FETCH( &r[0], 0, chan_index ); | ||
1161 | r[0].q = micro_flr(r[0].q); | ||
1162 | STORE( &r[0], 0, chan_index ); | ||
1163 | } | ||
1164 | break; | ||
1165 | |||
1166 | case TGSI_OPCODE_ROUND: | ||
1167 | FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { | ||
1168 | FETCH( &r[0], 0, chan_index ); | ||
1169 | r[0].q = micro_rnd(r[0].q); | ||
1170 | STORE( &r[0], 0, chan_index ); | ||
1171 | } | ||
1172 | break; | ||
1173 | |||
1174 | case TGSI_OPCODE_EX2: | ||
1175 | FETCH(&r[0], 0, CHAN_X); | ||
1176 | |||
1177 | r[0].q = micro_pow(mach->Temps[TEMP_2_I].xyzw[TEMP_2_C].q, r[0].q); | ||
1178 | |||
1179 | FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { | ||
1180 | STORE( &r[0], 0, chan_index ); | ||
1181 | } | ||
1182 | break; | ||
1183 | |||
1184 | case TGSI_OPCODE_LG2: | ||
1185 | FETCH( &r[0], 0, CHAN_X ); | ||
1186 | r[0].q = micro_lg2(r[0].q); | ||
1187 | FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { | ||
1188 | STORE( &r[0], 0, chan_index ); | ||
1189 | } | ||
1190 | break; | ||
1191 | |||
1192 | case TGSI_OPCODE_POW: | ||
1193 | FETCH(&r[0], 0, CHAN_X); | ||
1194 | FETCH(&r[1], 1, CHAN_X); | ||
1195 | |||
1196 | r[0].q = micro_pow(r[0].q, r[1].q); | ||
1197 | |||
1198 | FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { | ||
1199 | STORE( &r[0], 0, chan_index ); | ||
1200 | } | ||
1201 | break; | ||
1202 | |||
1203 | case TGSI_OPCODE_XPD: | ||
1204 | /* TGSI_OPCODE_XPD */ | ||
1205 | FETCH(&r[0], 0, CHAN_Y); | ||
1206 | FETCH(&r[1], 1, CHAN_Z); | ||
1207 | FETCH(&r[3], 0, CHAN_Z); | ||
1208 | FETCH(&r[4], 1, CHAN_Y); | ||
1209 | |||
1210 | /* r2 = (r0 * r1) - (r3 * r5) | ||
1211 | */ | ||
1212 | r[2].q = si_fm(r[3].q, r[5].q); | ||
1213 | r[2].q = si_fms(r[0].q, r[1].q, r[2].q); | ||
1214 | |||
1215 | if (IS_CHANNEL_ENABLED( *inst, CHAN_X )) { | ||
1216 | STORE( &r[2], 0, CHAN_X ); | ||
1217 | } | ||
1218 | |||
1219 | FETCH(&r[2], 1, CHAN_X); | ||
1220 | FETCH(&r[5], 0, CHAN_X); | ||
1221 | |||
1222 | /* r3 = (r3 * r2) - (r1 * r5) | ||
1223 | */ | ||
1224 | r[1].q = si_fm(r[1].q, r[5].q); | ||
1225 | r[3].q = si_fms(r[3].q, r[2].q, r[1].q); | ||
1226 | |||
1227 | if (IS_CHANNEL_ENABLED( *inst, CHAN_Y )) { | ||
1228 | STORE( &r[3], 0, CHAN_Y ); | ||
1229 | } | ||
1230 | |||
1231 | /* r5 = (r5 * r4) - (r0 * r2) | ||
1232 | */ | ||
1233 | r[0].q = si_fm(r[0].q, r[2].q); | ||
1234 | r[5].q = si_fms(r[5].q, r[4].q, r[0].q); | ||
1235 | |||
1236 | if (IS_CHANNEL_ENABLED( *inst, CHAN_Z )) { | ||
1237 | STORE( &r[5], 0, CHAN_Z ); | ||
1238 | } | ||
1239 | |||
1240 | if (IS_CHANNEL_ENABLED( *inst, CHAN_W )) { | ||
1241 | STORE( &mach->Temps[TEMP_1_I].xyzw[TEMP_1_C], 0, CHAN_W ); | ||
1242 | } | ||
1243 | break; | ||
1244 | |||
1245 | case TGSI_OPCODE_ABS: | ||
1246 | FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { | ||
1247 | FETCH(&r[0], 0, chan_index); | ||
1248 | |||
1249 | r[0].q = micro_abs(r[0].q); | ||
1250 | |||
1251 | STORE(&r[0], 0, chan_index); | ||
1252 | } | ||
1253 | break; | ||
1254 | |||
1255 | case TGSI_OPCODE_RCC: | ||
1256 | ASSERT (0); | ||
1257 | break; | ||
1258 | |||
1259 | case TGSI_OPCODE_DPH: | ||
1260 | FETCH(&r[0], 0, CHAN_X); | ||
1261 | FETCH(&r[1], 1, CHAN_X); | ||
1262 | |||
1263 | r[0].q = si_fm(r[0].q, r[1].q); | ||
1264 | |||
1265 | FETCH(&r[1], 0, CHAN_Y); | ||
1266 | FETCH(&r[2], 1, CHAN_Y); | ||
1267 | |||
1268 | r[0].q = si_fma(r[1].q, r[2].q, r[0].q); | ||
1269 | |||
1270 | FETCH(&r[1], 0, CHAN_Z); | ||
1271 | FETCH(&r[2], 1, CHAN_Z); | ||
1272 | |||
1273 | r[0].q = si_fma(r[1].q, r[2].q, r[0].q); | ||
1274 | |||
1275 | FETCH(&r[1], 1, CHAN_W); | ||
1276 | |||
1277 | r[0].q = si_fa(r[0].q, r[1].q); | ||
1278 | |||
1279 | FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { | ||
1280 | STORE( &r[0], 0, chan_index ); | ||
1281 | } | ||
1282 | break; | ||
1283 | |||
1284 | case TGSI_OPCODE_COS: | ||
1285 | FETCH(&r[0], 0, CHAN_X); | ||
1286 | |||
1287 | r[0].q = micro_cos(r[0].q); | ||
1288 | |||
1289 | FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { | ||
1290 | STORE( &r[0], 0, chan_index ); | ||
1291 | } | ||
1292 | break; | ||
1293 | |||
1294 | case TGSI_OPCODE_DDX: | ||
1295 | FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { | ||
1296 | FETCH( &r[0], 0, chan_index ); | ||
1297 | r[0].q = micro_ddx(r[0].q); | ||
1298 | STORE( &r[0], 0, chan_index ); | ||
1299 | } | ||
1300 | break; | ||
1301 | |||
1302 | case TGSI_OPCODE_DDY: | ||
1303 | FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { | ||
1304 | FETCH( &r[0], 0, chan_index ); | ||
1305 | r[0].q = micro_ddy(r[0].q); | ||
1306 | STORE( &r[0], 0, chan_index ); | ||
1307 | } | ||
1308 | break; | ||
1309 | |||
1310 | case TGSI_OPCODE_KILP: | ||
1311 | exec_kilp (mach, inst); | ||
1312 | break; | ||
1313 | |||
1314 | case TGSI_OPCODE_KIL: | ||
1315 | exec_kil (mach, inst); | ||
1316 | break; | ||
1317 | |||
1318 | case TGSI_OPCODE_PK2H: | ||
1319 | ASSERT (0); | ||
1320 | break; | ||
1321 | |||
1322 | case TGSI_OPCODE_PK2US: | ||
1323 | ASSERT (0); | ||
1324 | break; | ||
1325 | |||
1326 | case TGSI_OPCODE_PK4B: | ||
1327 | ASSERT (0); | ||
1328 | break; | ||
1329 | |||
1330 | case TGSI_OPCODE_PK4UB: | ||
1331 | ASSERT (0); | ||
1332 | break; | ||
1333 | |||
1334 | case TGSI_OPCODE_RFL: | ||
1335 | ASSERT (0); | ||
1336 | break; | ||
1337 | |||
1338 | case TGSI_OPCODE_SEQ: | ||
1339 | FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { | ||
1340 | FETCH( &r[0], 0, chan_index ); | ||
1341 | FETCH( &r[1], 1, chan_index ); | ||
1342 | |||
1343 | r[0].q = si_fceq(r[0].q, r[1].q); | ||
1344 | |||
1345 | STORE( &r[0], 0, chan_index ); | ||
1346 | } | ||
1347 | break; | ||
1348 | |||
1349 | case TGSI_OPCODE_SFL: | ||
1350 | ASSERT (0); | ||
1351 | break; | ||
1352 | |||
1353 | case TGSI_OPCODE_SGT: | ||
1354 | FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { | ||
1355 | FETCH( &r[0], 0, chan_index ); | ||
1356 | FETCH( &r[1], 1, chan_index ); | ||
1357 | r[0].q = si_fcgt(r[0].q, r[1].q); | ||
1358 | STORE( &r[0], 0, chan_index ); | ||
1359 | } | ||
1360 | break; | ||
1361 | |||
1362 | case TGSI_OPCODE_SIN: | ||
1363 | FETCH( &r[0], 0, CHAN_X ); | ||
1364 | r[0].q = micro_sin(r[0].q); | ||
1365 | FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { | ||
1366 | STORE( &r[0], 0, chan_index ); | ||
1367 | } | ||
1368 | break; | ||
1369 | |||
1370 | case TGSI_OPCODE_SLE: | ||
1371 | FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { | ||
1372 | FETCH( &r[0], 0, chan_index ); | ||
1373 | FETCH( &r[1], 1, chan_index ); | ||
1374 | |||
1375 | r[0].q = si_fcgt(r[0].q, r[1].q); | ||
1376 | r[0].q = si_xori(r[0].q, 0xff); | ||
1377 | |||
1378 | STORE( &r[0], 0, chan_index ); | ||
1379 | } | ||
1380 | break; | ||
1381 | |||
1382 | case TGSI_OPCODE_SNE: | ||
1383 | FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { | ||
1384 | FETCH( &r[0], 0, chan_index ); | ||
1385 | FETCH( &r[1], 1, chan_index ); | ||
1386 | |||
1387 | r[0].q = si_fceq(r[0].q, r[1].q); | ||
1388 | r[0].q = si_xori(r[0].q, 0xff); | ||
1389 | |||
1390 | STORE( &r[0], 0, chan_index ); | ||
1391 | } | ||
1392 | break; | ||
1393 | |||
1394 | case TGSI_OPCODE_STR: | ||
1395 | ASSERT (0); | ||
1396 | break; | ||
1397 | |||
1398 | case TGSI_OPCODE_TEX: | ||
1399 | /* simple texture lookup */ | ||
1400 | /* src[0] = texcoord */ | ||
1401 | /* src[1] = sampler unit */ | ||
1402 | exec_tex(mach, inst, FALSE, FALSE); | ||
1403 | break; | ||
1404 | |||
1405 | case TGSI_OPCODE_TXB: | ||
1406 | /* Texture lookup with lod bias */ | ||
1407 | /* src[0] = texcoord (src[0].w = load bias) */ | ||
1408 | /* src[1] = sampler unit */ | ||
1409 | exec_tex(mach, inst, TRUE, FALSE); | ||
1410 | break; | ||
1411 | |||
1412 | case TGSI_OPCODE_TXD: | ||
1413 | /* Texture lookup with explict partial derivatives */ | ||
1414 | /* src[0] = texcoord */ | ||
1415 | /* src[1] = d[strq]/dx */ | ||
1416 | /* src[2] = d[strq]/dy */ | ||
1417 | /* src[3] = sampler unit */ | ||
1418 | ASSERT (0); | ||
1419 | break; | ||
1420 | |||
1421 | case TGSI_OPCODE_TXL: | ||
1422 | /* Texture lookup with explit LOD */ | ||
1423 | /* src[0] = texcoord (src[0].w = load bias) */ | ||
1424 | /* src[1] = sampler unit */ | ||
1425 | exec_tex(mach, inst, TRUE, FALSE); | ||
1426 | break; | ||
1427 | |||
1428 | case TGSI_OPCODE_TXP: | ||
1429 | /* Texture lookup with projection */ | ||
1430 | /* src[0] = texcoord (src[0].w = projection) */ | ||
1431 | /* src[1] = sampler unit */ | ||
1432 | exec_tex(mach, inst, TRUE, TRUE); | ||
1433 | break; | ||
1434 | |||
1435 | case TGSI_OPCODE_UP2H: | ||
1436 | ASSERT (0); | ||
1437 | break; | ||
1438 | |||
1439 | case TGSI_OPCODE_UP2US: | ||
1440 | ASSERT (0); | ||
1441 | break; | ||
1442 | |||
1443 | case TGSI_OPCODE_UP4B: | ||
1444 | ASSERT (0); | ||
1445 | break; | ||
1446 | |||
1447 | case TGSI_OPCODE_UP4UB: | ||
1448 | ASSERT (0); | ||
1449 | break; | ||
1450 | |||
1451 | case TGSI_OPCODE_X2D: | ||
1452 | ASSERT (0); | ||
1453 | break; | ||
1454 | |||
1455 | case TGSI_OPCODE_ARA: | ||
1456 | ASSERT (0); | ||
1457 | break; | ||
1458 | |||
1459 | case TGSI_OPCODE_ARR: | ||
1460 | ASSERT (0); | ||
1461 | break; | ||
1462 | |||
1463 | case TGSI_OPCODE_BRA: | ||
1464 | ASSERT (0); | ||
1465 | break; | ||
1466 | |||
1467 | case TGSI_OPCODE_CAL: | ||
1468 | /* skip the call if no execution channels are enabled */ | ||
1469 | if (mach->ExecMask) { | ||
1470 | /* do the call */ | ||
1471 | |||
1472 | /* push the Cond, Loop, Cont stacks */ | ||
1473 | ASSERT(mach->CondStackTop < TGSI_EXEC_MAX_COND_NESTING); | ||
1474 | mach->CondStack[mach->CondStackTop++] = mach->CondMask; | ||
1475 | ASSERT(mach->LoopStackTop < TGSI_EXEC_MAX_LOOP_NESTING); | ||
1476 | mach->LoopStack[mach->LoopStackTop++] = mach->LoopMask; | ||
1477 | ASSERT(mach->ContStackTop < TGSI_EXEC_MAX_LOOP_NESTING); | ||
1478 | mach->ContStack[mach->ContStackTop++] = mach->ContMask; | ||
1479 | |||
1480 | ASSERT(mach->FuncStackTop < TGSI_EXEC_MAX_CALL_NESTING); | ||
1481 | mach->FuncStack[mach->FuncStackTop++] = mach->FuncMask; | ||
1482 | |||
1483 | /* note that PC was already incremented above */ | ||
1484 | mach->CallStack[mach->CallStackTop++] = *pc; | ||
1485 | *pc = inst->InstructionExtLabel.Label; | ||
1486 | } | ||
1487 | break; | ||
1488 | |||
1489 | case TGSI_OPCODE_RET: | ||
1490 | mach->FuncMask &= ~mach->ExecMask; | ||
1491 | UPDATE_EXEC_MASK(mach); | ||
1492 | |||
1493 | if (mach->ExecMask == 0x0) { | ||
1494 | /* really return now (otherwise, keep executing */ | ||
1495 | |||
1496 | if (mach->CallStackTop == 0) { | ||
1497 | /* returning from main() */ | ||
1498 | *pc = -1; | ||
1499 | return; | ||
1500 | } | ||
1501 | *pc = mach->CallStack[--mach->CallStackTop]; | ||
1502 | |||
1503 | /* pop the Cond, Loop, Cont stacks */ | ||
1504 | ASSERT(mach->CondStackTop > 0); | ||
1505 | mach->CondMask = mach->CondStack[--mach->CondStackTop]; | ||
1506 | ASSERT(mach->LoopStackTop > 0); | ||
1507 | mach->LoopMask = mach->LoopStack[--mach->LoopStackTop]; | ||
1508 | ASSERT(mach->ContStackTop > 0); | ||
1509 | mach->ContMask = mach->ContStack[--mach->ContStackTop]; | ||
1510 | ASSERT(mach->FuncStackTop > 0); | ||
1511 | mach->FuncMask = mach->FuncStack[--mach->FuncStackTop]; | ||
1512 | |||
1513 | UPDATE_EXEC_MASK(mach); | ||
1514 | } | ||
1515 | break; | ||
1516 | |||
1517 | case TGSI_OPCODE_SSG: | ||
1518 | ASSERT (0); | ||
1519 | break; | ||
1520 | |||
1521 | case TGSI_OPCODE_CMP: | ||
1522 | FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { | ||
1523 | FETCH(&r[0], 0, chan_index); | ||
1524 | FETCH(&r[1], 1, chan_index); | ||
1525 | FETCH(&r[2], 2, chan_index); | ||
1526 | |||
1527 | /* r0 = (r0 < 0.0) ? r1 : r2 | ||
1528 | */ | ||
1529 | r[3].q = si_xor(r[3].q, r[3].q); | ||
1530 | r[0].q = micro_lt(r[0].q, r[3].q); | ||
1531 | r[0].q = si_selb(r[1].q, r[2].q, r[0].q); | ||
1532 | |||
1533 | STORE(&r[0], 0, chan_index); | ||
1534 | } | ||
1535 | break; | ||
1536 | |||
1537 | case TGSI_OPCODE_SCS: | ||
1538 | if( IS_CHANNEL_ENABLED( *inst, CHAN_X ) || IS_CHANNEL_ENABLED( *inst, CHAN_Y ) ) { | ||
1539 | FETCH( &r[0], 0, CHAN_X ); | ||
1540 | } | ||
1541 | if( IS_CHANNEL_ENABLED( *inst, CHAN_X ) ) { | ||
1542 | r[1].q = micro_cos(r[0].q); | ||
1543 | STORE( &r[1], 0, CHAN_X ); | ||
1544 | } | ||
1545 | if( IS_CHANNEL_ENABLED( *inst, CHAN_Y ) ) { | ||
1546 | r[1].q = micro_sin(r[0].q); | ||
1547 | STORE( &r[1], 0, CHAN_Y ); | ||
1548 | } | ||
1549 | if( IS_CHANNEL_ENABLED( *inst, CHAN_Z ) ) { | ||
1550 | STORE( &mach->Temps[TEMP_0_I].xyzw[TEMP_0_C], 0, CHAN_Z ); | ||
1551 | } | ||
1552 | if( IS_CHANNEL_ENABLED( *inst, CHAN_W ) ) { | ||
1553 | STORE( &mach->Temps[TEMP_1_I].xyzw[TEMP_1_C], 0, CHAN_W ); | ||
1554 | } | ||
1555 | break; | ||
1556 | |||
1557 | case TGSI_OPCODE_NRM: | ||
1558 | ASSERT (0); | ||
1559 | break; | ||
1560 | |||
1561 | case TGSI_OPCODE_DIV: | ||
1562 | ASSERT( 0 ); | ||
1563 | break; | ||
1564 | |||
1565 | case TGSI_OPCODE_DP2: | ||
1566 | FETCH( &r[0], 0, CHAN_X ); | ||
1567 | FETCH( &r[1], 1, CHAN_X ); | ||
1568 | r[0].q = si_fm(r[0].q, r[1].q); | ||
1569 | |||
1570 | FETCH( &r[1], 0, CHAN_Y ); | ||
1571 | FETCH( &r[2], 1, CHAN_Y ); | ||
1572 | r[0].q = si_fma(r[1].q, r[2].q, r[0].q); | ||
1573 | |||
1574 | FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { | ||
1575 | STORE( &r[0], 0, chan_index ); | ||
1576 | } | ||
1577 | break; | ||
1578 | |||
1579 | case TGSI_OPCODE_IF: | ||
1580 | /* push CondMask */ | ||
1581 | ASSERT(mach->CondStackTop < TGSI_EXEC_MAX_COND_NESTING); | ||
1582 | mach->CondStack[mach->CondStackTop++] = mach->CondMask; | ||
1583 | FETCH( &r[0], 0, CHAN_X ); | ||
1584 | /* update CondMask */ | ||
1585 | if( ! r[0].u[0] ) { | ||
1586 | mach->CondMask &= ~0x1; | ||
1587 | } | ||
1588 | if( ! r[0].u[1] ) { | ||
1589 | mach->CondMask &= ~0x2; | ||
1590 | } | ||
1591 | if( ! r[0].u[2] ) { | ||
1592 | mach->CondMask &= ~0x4; | ||
1593 | } | ||
1594 | if( ! r[0].u[3] ) { | ||
1595 | mach->CondMask &= ~0x8; | ||
1596 | } | ||
1597 | UPDATE_EXEC_MASK(mach); | ||
1598 | /* Todo: If CondMask==0, jump to ELSE */ | ||
1599 | break; | ||
1600 | |||
1601 | case TGSI_OPCODE_ELSE: | ||
1602 | /* invert CondMask wrt previous mask */ | ||
1603 | { | ||
1604 | uint prevMask; | ||
1605 | ASSERT(mach->CondStackTop > 0); | ||
1606 | prevMask = mach->CondStack[mach->CondStackTop - 1]; | ||
1607 | mach->CondMask = ~mach->CondMask & prevMask; | ||
1608 | UPDATE_EXEC_MASK(mach); | ||
1609 | /* Todo: If CondMask==0, jump to ENDIF */ | ||
1610 | } | ||
1611 | break; | ||
1612 | |||
1613 | case TGSI_OPCODE_ENDIF: | ||
1614 | /* pop CondMask */ | ||
1615 | ASSERT(mach->CondStackTop > 0); | ||
1616 | mach->CondMask = mach->CondStack[--mach->CondStackTop]; | ||
1617 | UPDATE_EXEC_MASK(mach); | ||
1618 | break; | ||
1619 | |||
1620 | case TGSI_OPCODE_END: | ||
1621 | /* halt execution */ | ||
1622 | *pc = -1; | ||
1623 | break; | ||
1624 | |||
1625 | case TGSI_OPCODE_PUSHA: | ||
1626 | ASSERT (0); | ||
1627 | break; | ||
1628 | |||
1629 | case TGSI_OPCODE_POPA: | ||
1630 | ASSERT (0); | ||
1631 | break; | ||
1632 | |||
1633 | case TGSI_OPCODE_CEIL: | ||
1634 | FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { | ||
1635 | FETCH( &r[0], 0, chan_index ); | ||
1636 | r[0].q = micro_ceil(r[0].q); | ||
1637 | STORE( &r[0], 0, chan_index ); | ||
1638 | } | ||
1639 | break; | ||
1640 | |||
1641 | case TGSI_OPCODE_I2F: | ||
1642 | FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { | ||
1643 | FETCH( &r[0], 0, chan_index ); | ||
1644 | r[0].q = si_csflt(r[0].q, 0); | ||
1645 | STORE( &r[0], 0, chan_index ); | ||
1646 | } | ||
1647 | break; | ||
1648 | |||
1649 | case TGSI_OPCODE_NOT: | ||
1650 | FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { | ||
1651 | FETCH( &r[0], 0, chan_index ); | ||
1652 | r[0].q = si_xorbi(r[0].q, 0xff); | ||
1653 | STORE( &r[0], 0, chan_index ); | ||
1654 | } | ||
1655 | break; | ||
1656 | |||
1657 | case TGSI_OPCODE_TRUNC: | ||
1658 | FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { | ||
1659 | FETCH( &r[0], 0, chan_index ); | ||
1660 | r[0].q = micro_trunc(r[0].q); | ||
1661 | STORE( &r[0], 0, chan_index ); | ||
1662 | } | ||
1663 | break; | ||
1664 | |||
1665 | case TGSI_OPCODE_SHL: | ||
1666 | FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { | ||
1667 | FETCH( &r[0], 0, chan_index ); | ||
1668 | FETCH( &r[1], 1, chan_index ); | ||
1669 | |||
1670 | r[0].q = si_shl(r[0].q, r[1].q); | ||
1671 | |||
1672 | STORE( &r[0], 0, chan_index ); | ||
1673 | } | ||
1674 | break; | ||
1675 | |||
1676 | case TGSI_OPCODE_ISHR: | ||
1677 | FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { | ||
1678 | FETCH( &r[0], 0, chan_index ); | ||
1679 | FETCH( &r[1], 1, chan_index ); | ||
1680 | r[0].q = micro_ishr(r[0].q, r[1].q); | ||
1681 | STORE( &r[0], 0, chan_index ); | ||
1682 | } | ||
1683 | break; | ||
1684 | |||
1685 | case TGSI_OPCODE_AND: | ||
1686 | FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { | ||
1687 | FETCH( &r[0], 0, chan_index ); | ||
1688 | FETCH( &r[1], 1, chan_index ); | ||
1689 | r[0].q = si_and(r[0].q, r[1].q); | ||
1690 | STORE( &r[0], 0, chan_index ); | ||
1691 | } | ||
1692 | break; | ||
1693 | |||
1694 | case TGSI_OPCODE_OR: | ||
1695 | FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { | ||
1696 | FETCH( &r[0], 0, chan_index ); | ||
1697 | FETCH( &r[1], 1, chan_index ); | ||
1698 | r[0].q = si_or(r[0].q, r[1].q); | ||
1699 | STORE( &r[0], 0, chan_index ); | ||
1700 | } | ||
1701 | break; | ||
1702 | |||
1703 | case TGSI_OPCODE_MOD: | ||
1704 | ASSERT (0); | ||
1705 | break; | ||
1706 | |||
1707 | case TGSI_OPCODE_XOR: | ||
1708 | FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { | ||
1709 | FETCH( &r[0], 0, chan_index ); | ||
1710 | FETCH( &r[1], 1, chan_index ); | ||
1711 | r[0].q = si_xor(r[0].q, r[1].q); | ||
1712 | STORE( &r[0], 0, chan_index ); | ||
1713 | } | ||
1714 | break; | ||
1715 | |||
1716 | case TGSI_OPCODE_SAD: | ||
1717 | ASSERT (0); | ||
1718 | break; | ||
1719 | |||
1720 | case TGSI_OPCODE_TXF: | ||
1721 | ASSERT (0); | ||
1722 | break; | ||
1723 | |||
1724 | case TGSI_OPCODE_TXQ: | ||
1725 | ASSERT (0); | ||
1726 | break; | ||
1727 | |||
1728 | case TGSI_OPCODE_EMIT: | ||
1729 | mach->Temps[TEMP_OUTPUT_I].xyzw[TEMP_OUTPUT_C].u[0] += 16; | ||
1730 | mach->Primitives[mach->Temps[TEMP_PRIMITIVE_I].xyzw[TEMP_PRIMITIVE_C].u[0]]++; | ||
1731 | break; | ||
1732 | |||
1733 | case TGSI_OPCODE_ENDPRIM: | ||
1734 | mach->Temps[TEMP_PRIMITIVE_I].xyzw[TEMP_PRIMITIVE_C].u[0]++; | ||
1735 | mach->Primitives[mach->Temps[TEMP_PRIMITIVE_I].xyzw[TEMP_PRIMITIVE_C].u[0]] = 0; | ||
1736 | break; | ||
1737 | |||
1738 | case TGSI_OPCODE_BGNLOOP: | ||
1739 | /* push LoopMask and ContMasks */ | ||
1740 | ASSERT(mach->LoopStackTop < TGSI_EXEC_MAX_LOOP_NESTING); | ||
1741 | mach->LoopStack[mach->LoopStackTop++] = mach->LoopMask; | ||
1742 | ASSERT(mach->ContStackTop < TGSI_EXEC_MAX_LOOP_NESTING); | ||
1743 | mach->ContStack[mach->ContStackTop++] = mach->ContMask; | ||
1744 | break; | ||
1745 | |||
1746 | case TGSI_OPCODE_ENDLOOP: | ||
1747 | /* Restore ContMask, but don't pop */ | ||
1748 | ASSERT(mach->ContStackTop > 0); | ||
1749 | mach->ContMask = mach->ContStack[mach->ContStackTop - 1]; | ||
1750 | if (mach->LoopMask) { | ||
1751 | /* repeat loop: jump to instruction just past BGNLOOP */ | ||
1752 | *pc = inst->InstructionExtLabel.Label + 1; | ||
1753 | } | ||
1754 | else { | ||
1755 | /* exit loop: pop LoopMask */ | ||
1756 | ASSERT(mach->LoopStackTop > 0); | ||
1757 | mach->LoopMask = mach->LoopStack[--mach->LoopStackTop]; | ||
1758 | /* pop ContMask */ | ||
1759 | ASSERT(mach->ContStackTop > 0); | ||
1760 | mach->ContMask = mach->ContStack[--mach->ContStackTop]; | ||
1761 | } | ||
1762 | UPDATE_EXEC_MASK(mach); | ||
1763 | break; | ||
1764 | |||
1765 | case TGSI_OPCODE_BRK: | ||
1766 | /* turn off loop channels for each enabled exec channel */ | ||
1767 | mach->LoopMask &= ~mach->ExecMask; | ||
1768 | /* Todo: if mach->LoopMask == 0, jump to end of loop */ | ||
1769 | UPDATE_EXEC_MASK(mach); | ||
1770 | break; | ||
1771 | |||
1772 | case TGSI_OPCODE_CONT: | ||
1773 | /* turn off cont channels for each enabled exec channel */ | ||
1774 | mach->ContMask &= ~mach->ExecMask; | ||
1775 | /* Todo: if mach->LoopMask == 0, jump to end of loop */ | ||
1776 | UPDATE_EXEC_MASK(mach); | ||
1777 | break; | ||
1778 | |||
1779 | case TGSI_OPCODE_BGNSUB: | ||
1780 | /* no-op */ | ||
1781 | break; | ||
1782 | |||
1783 | case TGSI_OPCODE_ENDSUB: | ||
1784 | /* no-op */ | ||
1785 | break; | ||
1786 | |||
1787 | case TGSI_OPCODE_NOP: | ||
1788 | break; | ||
1789 | |||
1790 | default: | ||
1791 | ASSERT( 0 ); | ||
1792 | } | ||
1793 | } | ||
1794 | |||
1795 | |||
1796 | /** | ||
1797 | * Run TGSI interpreter. | ||
1798 | * \return bitmask of "alive" quad components | ||
1799 | */ | ||
1800 | uint | ||
1801 | spu_exec_machine_run( struct spu_exec_machine *mach ) | ||
1802 | { | ||
1803 | uint i; | ||
1804 | int pc = 0; | ||
1805 | |||
1806 | mach->CondMask = 0xf; | ||
1807 | mach->LoopMask = 0xf; | ||
1808 | mach->ContMask = 0xf; | ||
1809 | mach->FuncMask = 0xf; | ||
1810 | mach->ExecMask = 0xf; | ||
1811 | |||
1812 | mach->CondStackTop = 0; /* temporarily subvert this ASSERTion */ | ||
1813 | ASSERT(mach->CondStackTop == 0); | ||
1814 | ASSERT(mach->LoopStackTop == 0); | ||
1815 | ASSERT(mach->ContStackTop == 0); | ||
1816 | ASSERT(mach->CallStackTop == 0); | ||
1817 | |||
1818 | mach->Temps[TEMP_KILMASK_I].xyzw[TEMP_KILMASK_C].u[0] = 0; | ||
1819 | mach->Temps[TEMP_OUTPUT_I].xyzw[TEMP_OUTPUT_C].u[0] = 0; | ||
1820 | |||
1821 | if( mach->Processor == TGSI_PROCESSOR_GEOMETRY ) { | ||
1822 | mach->Temps[TEMP_PRIMITIVE_I].xyzw[TEMP_PRIMITIVE_C].u[0] = 0; | ||
1823 | mach->Primitives[0] = 0; | ||
1824 | } | ||
1825 | |||
1826 | |||
1827 | /* execute declarations (interpolants) */ | ||
1828 | if( mach->Processor == TGSI_PROCESSOR_FRAGMENT ) { | ||
1829 | for (i = 0; i < mach->NumDeclarations; i++) { | ||
1830 | PIPE_ALIGN_VAR(16) | ||
1831 | union { | ||
1832 | struct tgsi_full_declaration decl; | ||
1833 | qword buffer[ROUNDUP16(sizeof(struct tgsi_full_declaration)) / 16]; | ||
1834 | } d; | ||
1835 | unsigned ea = (unsigned) (mach->Declarations + pc); | ||
1836 | |||
1837 | spu_dcache_fetch_unaligned(d.buffer, ea, sizeof(d.decl)); | ||
1838 | |||
1839 | exec_declaration( mach, &d.decl ); | ||
1840 | } | ||
1841 | } | ||
1842 | |||
1843 | /* execute instructions, until pc is set to -1 */ | ||
1844 | while (pc != -1) { | ||
1845 | PIPE_ALIGN_VAR(16) | ||
1846 | union { | ||
1847 | struct tgsi_full_instruction inst; | ||
1848 | qword buffer[ROUNDUP16(sizeof(struct tgsi_full_instruction)) / 16]; | ||
1849 | } i; | ||
1850 | unsigned ea = (unsigned) (mach->Instructions + pc); | ||
1851 | |||
1852 | spu_dcache_fetch_unaligned(i.buffer, ea, sizeof(i.inst)); | ||
1853 | exec_instruction( mach, & i.inst, &pc ); | ||
1854 | } | ||
1855 | |||
1856 | #if 0 | ||
1857 | /* we scale from floats in [0,1] to Zbuffer ints in sp_quad_depth_test.c */ | ||
1858 | if (mach->Processor == TGSI_PROCESSOR_FRAGMENT) { | ||
1859 | /* | ||
1860 | * Scale back depth component. | ||
1861 | */ | ||
1862 | for (i = 0; i < 4; i++) | ||
1863 | mach->Outputs[0].xyzw[2].f[i] *= ctx->DrawBuffer->_DepthMaxF; | ||
1864 | } | ||
1865 | #endif | ||
1866 | |||
1867 | return ~mach->Temps[TEMP_KILMASK_I].xyzw[TEMP_KILMASK_C].u[0]; | ||
1868 | } | ||
1869 | |||
1870 | |||