diff options
author | Kai Wasserbäch <kai@dev.carbon-project.org> | 2011-11-29 18:17:47 +0100 |
---|---|---|
committer | José Fonseca <jfonseca@vmware.com> | 2011-11-29 20:26:53 +0000 |
commit | ccd4d4367f2b4e5aebfc59b832599812a4a1c7d8 (patch) | |
tree | b5e510235d4be8dd4644a72c721693eaae02c1b5 | |
parent | 09e67706e9a74600e16fe012ecfd192b0d31960a (diff) |
gallium/cell: Remove the driver.
Complicates Gallium3D development and doesn't seem to have active users.
Signed-off-by: Kai Wasserbäch <kai@dev.carbon-project.org>
Signed-off-by: José Fonseca <jfonseca@vmware.com>
91 files changed, 9 insertions, 21010 deletions
@@ -112,8 +112,6 @@ linux \ | |||
112 | linux-i965 \ | 112 | linux-i965 \ |
113 | linux-alpha \ | 113 | linux-alpha \ |
114 | linux-alpha-static \ | 114 | linux-alpha-static \ |
115 | linux-cell \ | ||
116 | linux-cell-debug \ | ||
117 | linux-debug \ | 115 | linux-debug \ |
118 | linux-dri \ | 116 | linux-dri \ |
119 | linux-dri-debug \ | 117 | linux-dri-debug \ |
@@ -83,7 +83,7 @@ def AddOptions(opts): | |||
83 | opts.Add(EnumOption('machine', 'use machine-specific assembly code', default_machine, | 83 | opts.Add(EnumOption('machine', 'use machine-specific assembly code', default_machine, |
84 | allowed_values=('generic', 'ppc', 'x86', 'x86_64'))) | 84 | allowed_values=('generic', 'ppc', 'x86', 'x86_64'))) |
85 | opts.Add(EnumOption('platform', 'target platform', host_platform, | 85 | opts.Add(EnumOption('platform', 'target platform', host_platform, |
86 | allowed_values=('linux', 'cell', 'windows', 'darwin', 'cygwin', 'sunos', 'freebsd8'))) | 86 | allowed_values=('linux', 'windows', 'darwin', 'cygwin', 'sunos', 'freebsd8'))) |
87 | opts.Add(BoolOption('embedded', 'embedded build', 'no')) | 87 | opts.Add(BoolOption('embedded', 'embedded build', 'no')) |
88 | opts.Add('toolchain', 'compiler toolchain', default_toolchain) | 88 | opts.Add('toolchain', 'compiler toolchain', default_toolchain) |
89 | opts.Add(BoolOption('gles', 'EXPERIMENTAL: enable OpenGL ES support', 'no')) | 89 | opts.Add(BoolOption('gles', 'EXPERIMENTAL: enable OpenGL ES support', 'no')) |
diff --git a/configs/linux-cell b/configs/linux-cell deleted file mode 100644 index 7f38da971d1..00000000000 --- a/configs/linux-cell +++ /dev/null | |||
@@ -1,71 +0,0 @@ | |||
1 | # linux-cell (non-debug build) | ||
2 | |||
3 | include $(TOP)/configs/linux | ||
4 | |||
5 | CONFIG_NAME = linux-cell | ||
6 | |||
7 | |||
8 | # Omiting other gallium drivers: | ||
9 | GALLIUM_DRIVERS_DIRS = cell softpipe trace rbug identity | ||
10 | |||
11 | |||
12 | # Compiler and flags | ||
13 | CC = ppu32-gcc | ||
14 | CXX = ppu32-g++ | ||
15 | HOST_CC = gcc | ||
16 | APP_CC = gcc | ||
17 | APP_CXX = g++ | ||
18 | |||
19 | OPT_FLAGS = -O3 | ||
20 | |||
21 | # Cell SDK location | ||
22 | ## For SDK 2.1: (plus, remove -DSPU_MAIN_PARAM_LONG_LONG below) | ||
23 | #SDK = /opt/ibm/cell-sdk/prototype/sysroot/usr | ||
24 | ## For SDK 3.0: | ||
25 | SDK = /opt/cell/sdk/usr | ||
26 | |||
27 | |||
28 | |||
29 | COMMON_C_CPP_FLAGS = $(OPT_FLAGS) -Wall -Winline \ | ||
30 | -fPIC -m32 -mabi=altivec -maltivec \ | ||
31 | -I. -I$(SDK)/include \ | ||
32 | -DGALLIUM_CELL $(DEFINES) | ||
33 | |||
34 | CFLAGS = $(COMMON_C_CPP_FLAGS) -Wmissing-prototypes -std=c99 | ||
35 | |||
36 | CXXFLAGS = $(COMMON_C_CPP_FLAGS) | ||
37 | |||
38 | |||
39 | SRC_DIRS = glsl mapi/glapi mapi/vgapi mesa \ | ||
40 | gallium gallium/winsys gallium/targets glu | ||
41 | |||
42 | # Build no traditional Mesa drivers: | ||
43 | DRIVER_DIRS = | ||
44 | |||
45 | |||
46 | MKDEP_OPTIONS = -fdepend -Y | ||
47 | |||
48 | |||
49 | GL_LIB_DEPS = $(EXTRA_LIB_PATH) -lX11 -lXext -lm -lpthread \ | ||
50 | -L$(SDK)/lib -m32 -Wl,-m,elf32ppc -R$(SDK)/lib -lspe2 | ||
51 | |||
52 | |||
53 | CELL_SPU_LIB = $(TOP)/src/gallium/drivers/cell/spu/g3d_spu.a | ||
54 | |||
55 | |||
56 | ### SPU stuff | ||
57 | |||
58 | SPU_CC = spu-gcc | ||
59 | |||
60 | SPU_CFLAGS = $(OPT_FLAGS) -W -Wall -Winline -Wmissing-prototypes -Wno-main \ | ||
61 | -I. -I$(SDK)/spu/include -I$(TOP)/src/mesa/ $(INCLUDE_DIRS) \ | ||
62 | -DSPU_MAIN_PARAM_LONG_LONG \ | ||
63 | -include spu_intrinsics.h | ||
64 | |||
65 | SPU_LFLAGS = -L$(SDK)/spu/lib -Wl,-N -lmisc -lm | ||
66 | |||
67 | SPU_AR = ppu-ar | ||
68 | SPU_AR_FLAGS = -qcs | ||
69 | |||
70 | SPU_EMBED = ppu32-embedspu | ||
71 | SPU_EMBED_FLAGS = -m32 | ||
diff --git a/configs/linux-cell-debug b/configs/linux-cell-debug deleted file mode 100644 index 42f3245edc9..00000000000 --- a/configs/linux-cell-debug +++ /dev/null | |||
@@ -1,10 +0,0 @@ | |||
1 | # linux-cell-debug | ||
2 | |||
3 | include $(TOP)/configs/linux-cell | ||
4 | |||
5 | # just override name and OPT_FLAGS here: | ||
6 | |||
7 | CONFIG_NAME = linux-cell-debug | ||
8 | |||
9 | OPT_FLAGS = -g -DDEBUG | ||
10 | |||
diff --git a/docs/cell.html b/docs/cell.html deleted file mode 100644 index 30626b60b42..00000000000 --- a/docs/cell.html +++ /dev/null | |||
@@ -1,138 +0,0 @@ | |||
1 | <HTML> | ||
2 | |||
3 | <TITLE>Cell Driver</TITLE> | ||
4 | |||
5 | <link rel="stylesheet" type="text/css" href="mesa.css"></head> | ||
6 | |||
7 | <BODY> | ||
8 | |||
9 | <H1>Mesa/Gallium Cell Driver</H1> | ||
10 | |||
11 | <p> | ||
12 | The Mesa | ||
13 | <a href="http://en.wikipedia.org/wiki/Cell_%28microprocessor%29" target="_parent">Cell</a> | ||
14 | driver is part of the | ||
15 | <a href="http://wiki.freedesktop.org/wiki/Software/gallium" target="_parent">Gallium3D</a> | ||
16 | architecture. | ||
17 | Tungsten Graphics did the original implementation of the Cell driver. | ||
18 | </p> | ||
19 | |||
20 | |||
21 | <H2>Source Code</H2> | ||
22 | |||
23 | <p> | ||
24 | The latest Cell driver source code is on the master branch of the Mesa | ||
25 | git repository. | ||
26 | </p> | ||
27 | <p> | ||
28 | To build the driver you'll need the IBM Cell SDK (version 2.1 or 3.0). | ||
29 | To use the driver you'll need a Cell system, such as a PS3 running Linux, | ||
30 | or the Cell Simulator (untested, though). | ||
31 | </p> | ||
32 | |||
33 | <p> | ||
34 | If using Cell SDK 2.1, see the configs/linux-cell file for some | ||
35 | special changes. | ||
36 | </p> | ||
37 | |||
38 | <p> | ||
39 | To compile the code, run <code>make linux-cell</code>. | ||
40 | Or to build in debug mode, run <code>make linux-cell-debug</code>. | ||
41 | </p> | ||
42 | |||
43 | <p> | ||
44 | To use the library, make sure your current directory is the top of the | ||
45 | Mesa tree, then set <code>LD_LIBRARY_PATH</code> like this: | ||
46 | <pre> | ||
47 | export LD_LIBRARY_PATH=$PWD/lib/gallium:$PWD/lib/ | ||
48 | </pre> | ||
49 | |||
50 | <p> | ||
51 | Verify that the Cell driver is being used by running | ||
52 | <code>progs/xdemos/glxinfo</code> and looking for: | ||
53 | <pre> | ||
54 | OpenGL renderer string: Gallium 0.3, Cell on Xlib | ||
55 | </pre> | ||
56 | |||
57 | |||
58 | <H2>Driver Implementation Summary</H2> | ||
59 | |||
60 | <p> | ||
61 | Rasterization is parallelized across the SPUs in a tiled-based manner. | ||
62 | Batches of transformed triangles are sent to the SPUs (actually, pulled by from | ||
63 | main memory by the SPUs). | ||
64 | Each SPU loops over a set of 32x32-pixel screen tiles, rendering the triangles | ||
65 | into each tile. | ||
66 | Because of the limited SPU memory, framebuffer tiles are paged in/out of | ||
67 | SPU local store as needed. | ||
68 | Similarly, textures are tiled and brought into local store as needed. | ||
69 | </p> | ||
70 | |||
71 | |||
72 | <H2>Status</H2> | ||
73 | |||
74 | <p> | ||
75 | As of October 2008, the driver runs quite a few OpenGL demos. | ||
76 | Features that work include: | ||
77 | </p> | ||
78 | <ul> | ||
79 | <li>Point/line/triangle rendering, glDrawPixels | ||
80 | <li>2D, NPOT and cube texture maps with nearest/linear/mipmap filtering | ||
81 | <li>Dynamic SPU code generation for fragment shaders, but not complete | ||
82 | <li>Dynamic SPU code generation for fragment ops (blend, Z-test, etc), but not complete | ||
83 | <li>Dynamic PPU/PPC code generation for vertex shaders, but not complete | ||
84 | </ul> | ||
85 | <p> | ||
86 | Performance has recently improved with the addition of PPC code generation | ||
87 | for vertex shaders, but the code quality isn't too great yet. | ||
88 | </p> | ||
89 | <p> | ||
90 | Another bottleneck is SwapBuffers. It may be the limiting factor for | ||
91 | many simple GL tests. | ||
92 | </p> | ||
93 | |||
94 | |||
95 | |||
96 | <H2>Debug Options</H2> | ||
97 | |||
98 | <p> | ||
99 | The CELL_DEBUG env var can be set to a comma-separated list of one or | ||
100 | more of the following debug options: | ||
101 | </p> | ||
102 | <ul> | ||
103 | <li><b>checker</b> - use a different background clear color for each SPU. | ||
104 | This lets you see which SPU is rendering which screen tiles. | ||
105 | <li><b>sync</b> - wait/synchronize after each DMA transfer | ||
106 | <li><b>asm</b> - print generated SPU assembly code to stdout | ||
107 | <li><b>fragops</b> - emit fragment ops debug messages | ||
108 | <li><b>fragopfallback</b> - don't use codegen for fragment ops | ||
109 | <li><b>cmd</b> - print SPU commands as their received | ||
110 | <li><b>cache</b> - print texture cache statistics when program exits | ||
111 | </ul> | ||
112 | <p> | ||
113 | Note that some of these options may only work for linux-cell-debug builds. | ||
114 | </p> | ||
115 | |||
116 | <p> | ||
117 | If the GALLIUM_NOPPC env var is set, PPC code generation will not be used | ||
118 | and vertex shaders will be run with the TGSI interpreter. | ||
119 | </p> | ||
120 | <p> | ||
121 | If the GALLIUM_NOCELL env var is set, the softpipe driver will be used | ||
122 | intead of the Cell driver. | ||
123 | This is useful for comparison/validation. | ||
124 | </p> | ||
125 | |||
126 | |||
127 | |||
128 | <H2>Contributing</H2> | ||
129 | |||
130 | <p> | ||
131 | If you're interested in contributing to the effort, familiarize yourself | ||
132 | with the code, join the <a href="lists.html">mesa3d-dev mailing list</a>, | ||
133 | and describe what you'd like to do. | ||
134 | </p> | ||
135 | |||
136 | |||
137 | </BODY> | ||
138 | </HTML> | ||
diff --git a/docs/contents.html b/docs/contents.html index 8882e731879..e3cea2a7ce3 100644 --- a/docs/contents.html +++ b/docs/contents.html | |||
@@ -78,8 +78,7 @@ a:visited { | |||
78 | <li><a href="devinfo.html" target="MainFrame">Development Notes</a> | 78 | <li><a href="devinfo.html" target="MainFrame">Development Notes</a> |
79 | <li><a href="sourcedocs.html" target="MainFrame">Source Documentation</a> | 79 | <li><a href="sourcedocs.html" target="MainFrame">Source Documentation</a> |
80 | <li><a href="subset.html" target="MainFrame">Mesa Subset Driver</a> | 80 | <li><a href="subset.html" target="MainFrame">Mesa Subset Driver</a> |
81 | <LI><A HREF="dispatch.html" target="MainFrame">GL Dispatch</A> | 81 | <li><a HREF="dispatch.html" target="MainFrame">GL Dispatch</a> |
82 | <li><a href="cell.html" target="MainFrame">Cell Driver</A> | ||
83 | </ul> | 82 | </ul> |
84 | 83 | ||
85 | <b>Links</b> | 84 | <b>Links</b> |
diff --git a/docs/news.html b/docs/news.html index 91284922880..d6a2aa8e6dd 100644 --- a/docs/news.html +++ b/docs/news.html | |||
@@ -217,7 +217,7 @@ This is a bug-fix release. | |||
217 | <h2>January 24, 2008</h2> | 217 | <h2>January 24, 2008</h2> |
218 | 218 | ||
219 | <p> | 219 | <p> |
220 | Added a new page describing the <a href="cell.html">Mesa Cell driver</a>. | 220 | Added a new page describing the Mesa Cell driver. |
221 | </p> | 221 | </p> |
222 | 222 | ||
223 | 223 | ||
diff --git a/docs/relnotes-7.12.html b/docs/relnotes-7.12.html index 0d2211358cf..393b1124e58 100644 --- a/docs/relnotes-7.12.html +++ b/docs/relnotes-7.12.html | |||
@@ -74,6 +74,8 @@ tbd | |||
74 | by the gallium drivers for this hardware.</li> | 74 | by the gallium drivers for this hardware.</li> |
75 | <li>Removed the i965g driver, which was broken and with nobody in sight to fix | 75 | <li>Removed the i965g driver, which was broken and with nobody in sight to fix |
76 | the situation</li> | 76 | the situation</li> |
77 | <li>Removed the Gallium cell driver, it was just a burden on Gallium | ||
78 | development and nobody seems to use it.</li> | ||
77 | </ul> | 79 | </ul> |
78 | 80 | ||
79 | 81 | ||
diff --git a/docs/relnotes-7.5.html b/docs/relnotes-7.5.html index 56deca6a86c..a25ca8efc11 100644 --- a/docs/relnotes-7.5.html +++ b/docs/relnotes-7.5.html | |||
@@ -61,7 +61,7 @@ baa7a1e850b6e39bae58868fd0684004 MesaGLUT-7.5.tar.bz2 | |||
61 | <ul> | 61 | <ul> |
62 | <li>softpipe - a software/reference driver | 62 | <li>softpipe - a software/reference driver |
63 | <li>i915 - Intel 915/945 driver | 63 | <li>i915 - Intel 915/945 driver |
64 | <li><a href="cell.html">Cell</a> - IBM/Sony/Toshiba Cell processor driver | 64 | <li>Cell - IBM/Sony/Toshiba Cell processor driver |
65 | <li>nouveau (for NVIDIA GPUs) and R300 for (AMD/ATI R300). | 65 | <li>nouveau (for NVIDIA GPUs) and R300 for (AMD/ATI R300). |
66 | <b>PLEASE NOTE: these drivers are incomplete and still under development. | 66 | <b>PLEASE NOTE: these drivers are incomplete and still under development. |
67 | It's probably NOT worthwhile to report any bugs unless you have patches. | 67 | It's probably NOT worthwhile to report any bugs unless you have patches. |
diff --git a/docs/sourcetree.html b/docs/sourcetree.html index 3f100df49e1..e26c653abbe 100644 --- a/docs/sourcetree.html +++ b/docs/sourcetree.html | |||
@@ -86,7 +86,6 @@ each directory. | |||
86 | interfaces | 86 | interfaces |
87 | <li><b>drivers</b> - Gallium3D device drivers | 87 | <li><b>drivers</b> - Gallium3D device drivers |
88 | <ul> | 88 | <ul> |
89 | <li><b>cell</b> - Driver for Cell processor. | ||
90 | <li><b>i915</b> - Driver for Intel i915/i945. | 89 | <li><b>i915</b> - Driver for Intel i915/i945. |
91 | <li><b>llvmpipe</b> - Software driver using LLVM for runtime code generation. | 90 | <li><b>llvmpipe</b> - Software driver using LLVM for runtime code generation. |
92 | <li><b>nv*</b> - Drivers for NVIDIA GPUs. | 91 | <li><b>nv*</b> - Drivers for NVIDIA GPUs. |
diff --git a/doxygen/gallium.doc b/doxygen/gallium.doc index f0ff36075a5..e81b02e1aa6 100644 --- a/doxygen/gallium.doc +++ b/doxygen/gallium.doc | |||
@@ -34,7 +34,6 @@ | |||
34 | - Pipe drivers: | 34 | - Pipe drivers: |
35 | - \ref softpipe | 35 | - \ref softpipe |
36 | - \ref i915g | 36 | - \ref i915g |
37 | - Cell driver (cell_context.h, cell_winsys.h) | ||
38 | - \ref failover | 37 | - \ref failover |
39 | 38 | ||
40 | - Winsys drivers: | 39 | - Winsys drivers: |
diff --git a/src/gallium/auxiliary/Makefile.sources b/src/gallium/auxiliary/Makefile.sources index 15de20cb3a3..5c65533308c 100644 --- a/src/gallium/auxiliary/Makefile.sources +++ b/src/gallium/auxiliary/Makefile.sources | |||
@@ -63,7 +63,6 @@ C_SOURCES := \ | |||
63 | rtasm/rtasm_cpu.c \ | 63 | rtasm/rtasm_cpu.c \ |
64 | rtasm/rtasm_execmem.c \ | 64 | rtasm/rtasm_execmem.c \ |
65 | rtasm/rtasm_ppc.c \ | 65 | rtasm/rtasm_ppc.c \ |
66 | rtasm/rtasm_ppc_spe.c \ | ||
67 | rtasm/rtasm_x86sse.c \ | 66 | rtasm/rtasm_x86sse.c \ |
68 | tgsi/tgsi_build.c \ | 67 | tgsi/tgsi_build.c \ |
69 | tgsi/tgsi_dump.c \ | 68 | tgsi/tgsi_dump.c \ |
diff --git a/src/gallium/auxiliary/rtasm/rtasm_ppc_spe.c b/src/gallium/auxiliary/rtasm/rtasm_ppc_spe.c deleted file mode 100644 index 53a0e722cff..00000000000 --- a/src/gallium/auxiliary/rtasm/rtasm_ppc_spe.c +++ /dev/null | |||
@@ -1,1067 +0,0 @@ | |||
1 | /* | ||
2 | * (C) Copyright IBM Corporation 2008 | ||
3 | * All Rights Reserved. | ||
4 | * | ||
5 | * Permission is hereby granted, free of charge, to any person obtaining a | ||
6 | * copy of this software and associated documentation files (the "Software"), | ||
7 | * to deal in the Software without restriction, including without limitation | ||
8 | * on the rights to use, copy, modify, merge, publish, distribute, sub | ||
9 | * license, and/or sell copies of the Software, and to permit persons to whom | ||
10 | * the Software is furnished to do so, subject to the following conditions: | ||
11 | * | ||
12 | * The above copyright notice and this permission notice (including the next | ||
13 | * paragraph) shall be included in all copies or substantial portions of the | ||
14 | * Software. | ||
15 | * | ||
16 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR | ||
17 | * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, | ||
18 | * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL | ||
19 | * AUTHORS, COPYRIGHT HOLDERS, AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM, | ||
20 | * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR | ||
21 | * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE | ||
22 | * USE OR OTHER DEALINGS IN THE SOFTWARE. | ||
23 | */ | ||
24 | |||
25 | /** | ||
26 | * \file | ||
27 | * Real-time assembly generation interface for Cell B.E. SPEs. | ||
28 | * | ||
29 | * \author Ian Romanick <idr@us.ibm.com> | ||
30 | * \author Brian Paul | ||
31 | */ | ||
32 | |||
33 | |||
34 | #include <stdio.h> | ||
35 | #include "pipe/p_compiler.h" | ||
36 | #include "util/u_memory.h" | ||
37 | #include "rtasm_ppc_spe.h" | ||
38 | |||
39 | |||
40 | #ifdef GALLIUM_CELL | ||
41 | /** | ||
42 | * SPE instruction types | ||
43 | * | ||
44 | * There are 6 primary instruction encodings used on the Cell's SPEs. Each of | ||
45 | * the following unions encodes one type. | ||
46 | * | ||
47 | * \bug | ||
48 | * If, at some point, we start generating SPE code from a little-endian host | ||
49 | * these unions will not work. | ||
50 | */ | ||
51 | /*@{*/ | ||
52 | /** | ||
53 | * Encode one output register with two input registers | ||
54 | */ | ||
55 | union spe_inst_RR { | ||
56 | uint32_t bits; | ||
57 | struct { | ||
58 | unsigned op:11; | ||
59 | unsigned rB:7; | ||
60 | unsigned rA:7; | ||
61 | unsigned rT:7; | ||
62 | } inst; | ||
63 | }; | ||
64 | |||
65 | |||
66 | /** | ||
67 | * Encode one output register with three input registers | ||
68 | */ | ||
69 | union spe_inst_RRR { | ||
70 | uint32_t bits; | ||
71 | struct { | ||
72 | unsigned op:4; | ||
73 | unsigned rT:7; | ||
74 | unsigned rB:7; | ||
75 | unsigned rA:7; | ||
76 | unsigned rC:7; | ||
77 | } inst; | ||
78 | }; | ||
79 | |||
80 | |||
81 | /** | ||
82 | * Encode one output register with one input reg. and a 7-bit signed immed | ||
83 | */ | ||
84 | union spe_inst_RI7 { | ||
85 | uint32_t bits; | ||
86 | struct { | ||
87 | unsigned op:11; | ||
88 | unsigned i7:7; | ||
89 | unsigned rA:7; | ||
90 | unsigned rT:7; | ||
91 | } inst; | ||
92 | }; | ||
93 | |||
94 | |||
95 | /** | ||
96 | * Encode one output register with one input reg. and an 8-bit signed immed | ||
97 | */ | ||
98 | union spe_inst_RI8 { | ||
99 | uint32_t bits; | ||
100 | struct { | ||
101 | unsigned op:10; | ||
102 | unsigned i8:8; | ||
103 | unsigned rA:7; | ||
104 | unsigned rT:7; | ||
105 | } inst; | ||
106 | }; | ||
107 | |||
108 | |||
109 | /** | ||
110 | * Encode one output register with one input reg. and a 10-bit signed immed | ||
111 | */ | ||
112 | union spe_inst_RI10 { | ||
113 | uint32_t bits; | ||
114 | struct { | ||
115 | unsigned op:8; | ||
116 | unsigned i10:10; | ||
117 | unsigned rA:7; | ||
118 | unsigned rT:7; | ||
119 | } inst; | ||
120 | }; | ||
121 | |||
122 | |||
123 | /** | ||
124 | * Encode one output register with a 16-bit signed immediate | ||
125 | */ | ||
126 | union spe_inst_RI16 { | ||
127 | uint32_t bits; | ||
128 | struct { | ||
129 | unsigned op:9; | ||
130 | unsigned i16:16; | ||
131 | unsigned rT:7; | ||
132 | } inst; | ||
133 | }; | ||
134 | |||
135 | |||
136 | /** | ||
137 | * Encode one output register with a 18-bit signed immediate | ||
138 | */ | ||
139 | union spe_inst_RI18 { | ||
140 | uint32_t bits; | ||
141 | struct { | ||
142 | unsigned op:7; | ||
143 | unsigned i18:18; | ||
144 | unsigned rT:7; | ||
145 | } inst; | ||
146 | }; | ||
147 | /*@}*/ | ||
148 | |||
149 | |||
150 | static void | ||
151 | indent(const struct spe_function *p) | ||
152 | { | ||
153 | int i; | ||
154 | for (i = 0; i < p->indent; i++) { | ||
155 | putchar(' '); | ||
156 | } | ||
157 | } | ||
158 | |||
159 | |||
160 | static const char * | ||
161 | rem_prefix(const char *longname) | ||
162 | { | ||
163 | return longname + 4; | ||
164 | } | ||
165 | |||
166 | |||
167 | static const char * | ||
168 | reg_name(int reg) | ||
169 | { | ||
170 | switch (reg) { | ||
171 | case SPE_REG_SP: | ||
172 | return "$sp"; | ||
173 | case SPE_REG_RA: | ||
174 | return "$lr"; | ||
175 | default: | ||
176 | { | ||
177 | /* cycle through four buffers to handle multiple calls per printf */ | ||
178 | static char buf[4][10]; | ||
179 | static int b = 0; | ||
180 | b = (b + 1) % 4; | ||
181 | sprintf(buf[b], "$%d", reg); | ||
182 | return buf[b]; | ||
183 | } | ||
184 | } | ||
185 | } | ||
186 | |||
187 | |||
188 | static void | ||
189 | emit_instruction(struct spe_function *p, uint32_t inst_bits) | ||
190 | { | ||
191 | if (!p->store) | ||
192 | return; /* out of memory, drop the instruction */ | ||
193 | |||
194 | if (p->num_inst == p->max_inst) { | ||
195 | /* allocate larger buffer */ | ||
196 | uint32_t *newbuf; | ||
197 | p->max_inst *= 2; /* 2x larger */ | ||
198 | newbuf = align_malloc(p->max_inst * SPE_INST_SIZE, 16); | ||
199 | if (newbuf) { | ||
200 | memcpy(newbuf, p->store, p->num_inst * SPE_INST_SIZE); | ||
201 | } | ||
202 | align_free(p->store); | ||
203 | p->store = newbuf; | ||
204 | if (!p->store) { | ||
205 | /* out of memory */ | ||
206 | p->num_inst = 0; | ||
207 | return; | ||
208 | } | ||
209 | } | ||
210 | |||
211 | p->store[p->num_inst++] = inst_bits; | ||
212 | } | ||
213 | |||
214 | |||
215 | |||
216 | static void emit_RR(struct spe_function *p, unsigned op, int rT, | ||
217 | int rA, int rB, const char *name) | ||
218 | { | ||
219 | union spe_inst_RR inst; | ||
220 | inst.inst.op = op; | ||
221 | inst.inst.rB = rB; | ||
222 | inst.inst.rA = rA; | ||
223 | inst.inst.rT = rT; | ||
224 | emit_instruction(p, inst.bits); | ||
225 | if (p->print) { | ||
226 | indent(p); | ||
227 | printf("%s\t%s, %s, %s\n", | ||
228 | rem_prefix(name), reg_name(rT), reg_name(rA), reg_name(rB)); | ||
229 | } | ||
230 | } | ||
231 | |||
232 | |||
233 | static void emit_RRR(struct spe_function *p, unsigned op, int rT, | ||
234 | int rA, int rB, int rC, const char *name) | ||
235 | { | ||
236 | union spe_inst_RRR inst; | ||
237 | inst.inst.op = op; | ||
238 | inst.inst.rT = rT; | ||
239 | inst.inst.rB = rB; | ||
240 | inst.inst.rA = rA; | ||
241 | inst.inst.rC = rC; | ||
242 | emit_instruction(p, inst.bits); | ||
243 | if (p->print) { | ||
244 | indent(p); | ||
245 | printf("%s\t%s, %s, %s, %s\n", rem_prefix(name), reg_name(rT), | ||
246 | reg_name(rA), reg_name(rB), reg_name(rC)); | ||
247 | } | ||
248 | } | ||
249 | |||
250 | |||
251 | static void emit_RI7(struct spe_function *p, unsigned op, int rT, | ||
252 | int rA, int imm, const char *name) | ||
253 | { | ||
254 | union spe_inst_RI7 inst; | ||
255 | inst.inst.op = op; | ||
256 | inst.inst.i7 = imm; | ||
257 | inst.inst.rA = rA; | ||
258 | inst.inst.rT = rT; | ||
259 | emit_instruction(p, inst.bits); | ||
260 | if (p->print) { | ||
261 | indent(p); | ||
262 | printf("%s\t%s, %s, 0x%x\n", | ||
263 | rem_prefix(name), reg_name(rT), reg_name(rA), imm); | ||
264 | } | ||
265 | } | ||
266 | |||
267 | |||
268 | |||
269 | static void emit_RI8(struct spe_function *p, unsigned op, int rT, | ||
270 | int rA, int imm, const char *name) | ||
271 | { | ||
272 | union spe_inst_RI8 inst; | ||
273 | inst.inst.op = op; | ||
274 | inst.inst.i8 = imm; | ||
275 | inst.inst.rA = rA; | ||
276 | inst.inst.rT = rT; | ||
277 | emit_instruction(p, inst.bits); | ||
278 | if (p->print) { | ||
279 | indent(p); | ||
280 | printf("%s\t%s, %s, 0x%x\n", | ||
281 | rem_prefix(name), reg_name(rT), reg_name(rA), imm); | ||
282 | } | ||
283 | } | ||
284 | |||
285 | |||
286 | |||
287 | static void emit_RI10(struct spe_function *p, unsigned op, int rT, | ||
288 | int rA, int imm, const char *name) | ||
289 | { | ||
290 | union spe_inst_RI10 inst; | ||
291 | inst.inst.op = op; | ||
292 | inst.inst.i10 = imm; | ||
293 | inst.inst.rA = rA; | ||
294 | inst.inst.rT = rT; | ||
295 | emit_instruction(p, inst.bits); | ||
296 | if (p->print) { | ||
297 | indent(p); | ||
298 | printf("%s\t%s, %s, 0x%x\n", | ||
299 | rem_prefix(name), reg_name(rT), reg_name(rA), imm); | ||
300 | } | ||
301 | } | ||
302 | |||
303 | |||
304 | /** As above, but do range checking on signed immediate value */ | ||
305 | static void emit_RI10s(struct spe_function *p, unsigned op, int rT, | ||
306 | int rA, int imm, const char *name) | ||
307 | { | ||
308 | assert(imm <= 511); | ||
309 | assert(imm >= -512); | ||
310 | emit_RI10(p, op, rT, rA, imm, name); | ||
311 | } | ||
312 | |||
313 | |||
314 | static void emit_RI16(struct spe_function *p, unsigned op, int rT, | ||
315 | int imm, const char *name) | ||
316 | { | ||
317 | union spe_inst_RI16 inst; | ||
318 | inst.inst.op = op; | ||
319 | inst.inst.i16 = imm; | ||
320 | inst.inst.rT = rT; | ||
321 | emit_instruction(p, inst.bits); | ||
322 | if (p->print) { | ||
323 | indent(p); | ||
324 | printf("%s\t%s, 0x%x\n", rem_prefix(name), reg_name(rT), imm); | ||
325 | } | ||
326 | } | ||
327 | |||
328 | |||
329 | static void emit_RI18(struct spe_function *p, unsigned op, int rT, | ||
330 | int imm, const char *name) | ||
331 | { | ||
332 | union spe_inst_RI18 inst; | ||
333 | inst.inst.op = op; | ||
334 | inst.inst.i18 = imm; | ||
335 | inst.inst.rT = rT; | ||
336 | emit_instruction(p, inst.bits); | ||
337 | if (p->print) { | ||
338 | indent(p); | ||
339 | printf("%s\t%s, 0x%x\n", rem_prefix(name), reg_name(rT), imm); | ||
340 | } | ||
341 | } | ||
342 | |||
343 | |||
344 | #define EMIT(_name, _op) \ | ||
345 | void _name (struct spe_function *p) \ | ||
346 | { \ | ||
347 | emit_RR(p, _op, 0, 0, 0, __FUNCTION__); \ | ||
348 | } | ||
349 | |||
350 | #define EMIT_(_name, _op) \ | ||
351 | void _name (struct spe_function *p, int rT) \ | ||
352 | { \ | ||
353 | emit_RR(p, _op, rT, 0, 0, __FUNCTION__); \ | ||
354 | } | ||
355 | |||
356 | #define EMIT_R(_name, _op) \ | ||
357 | void _name (struct spe_function *p, int rT, int rA) \ | ||
358 | { \ | ||
359 | emit_RR(p, _op, rT, rA, 0, __FUNCTION__); \ | ||
360 | } | ||
361 | |||
362 | #define EMIT_RR(_name, _op) \ | ||
363 | void _name (struct spe_function *p, int rT, int rA, int rB) \ | ||
364 | { \ | ||
365 | emit_RR(p, _op, rT, rA, rB, __FUNCTION__); \ | ||
366 | } | ||
367 | |||
368 | #define EMIT_RRR(_name, _op) \ | ||
369 | void _name (struct spe_function *p, int rT, int rA, int rB, int rC) \ | ||
370 | { \ | ||
371 | emit_RRR(p, _op, rT, rA, rB, rC, __FUNCTION__); \ | ||
372 | } | ||
373 | |||
374 | #define EMIT_RI7(_name, _op) \ | ||
375 | void _name (struct spe_function *p, int rT, int rA, int imm) \ | ||
376 | { \ | ||
377 | emit_RI7(p, _op, rT, rA, imm, __FUNCTION__); \ | ||
378 | } | ||
379 | |||
380 | #define EMIT_RI8(_name, _op, bias) \ | ||
381 | void _name (struct spe_function *p, int rT, int rA, int imm) \ | ||
382 | { \ | ||
383 | emit_RI8(p, _op, rT, rA, bias - imm, __FUNCTION__); \ | ||
384 | } | ||
385 | |||
386 | #define EMIT_RI10(_name, _op) \ | ||
387 | void _name (struct spe_function *p, int rT, int rA, int imm) \ | ||
388 | { \ | ||
389 | emit_RI10(p, _op, rT, rA, imm, __FUNCTION__); \ | ||
390 | } | ||
391 | |||
392 | #define EMIT_RI10s(_name, _op) \ | ||
393 | void _name (struct spe_function *p, int rT, int rA, int imm) \ | ||
394 | { \ | ||
395 | emit_RI10s(p, _op, rT, rA, imm, __FUNCTION__); \ | ||
396 | } | ||
397 | |||
398 | #define EMIT_RI16(_name, _op) \ | ||
399 | void _name (struct spe_function *p, int rT, int imm) \ | ||
400 | { \ | ||
401 | emit_RI16(p, _op, rT, imm, __FUNCTION__); \ | ||
402 | } | ||
403 | |||
404 | #define EMIT_RI18(_name, _op) \ | ||
405 | void _name (struct spe_function *p, int rT, int imm) \ | ||
406 | { \ | ||
407 | emit_RI18(p, _op, rT, imm, __FUNCTION__); \ | ||
408 | } | ||
409 | |||
410 | #define EMIT_I16(_name, _op) \ | ||
411 | void _name (struct spe_function *p, int imm) \ | ||
412 | { \ | ||
413 | emit_RI16(p, _op, 0, imm, __FUNCTION__); \ | ||
414 | } | ||
415 | |||
416 | #include "rtasm_ppc_spe.h" | ||
417 | |||
418 | |||
419 | |||
420 | /** | ||
421 | * Initialize an spe_function. | ||
422 | * \param code_size initial size of instruction buffer to allocate, in bytes. | ||
423 | * If zero, use a default. | ||
424 | */ | ||
425 | void spe_init_func(struct spe_function *p, unsigned code_size) | ||
426 | { | ||
427 | uint i; | ||
428 | |||
429 | if (!code_size) | ||
430 | code_size = 64; | ||
431 | |||
432 | p->num_inst = 0; | ||
433 | p->max_inst = code_size / SPE_INST_SIZE; | ||
434 | p->store = align_malloc(code_size, 16); | ||
435 | |||
436 | p->set_count = 0; | ||
437 | memset(p->regs, 0, SPE_NUM_REGS * sizeof(p->regs[0])); | ||
438 | |||
439 | /* Conservatively treat R0 - R2 and R80 - R127 as non-volatile. | ||
440 | */ | ||
441 | p->regs[0] = p->regs[1] = p->regs[2] = 1; | ||
442 | for (i = 80; i <= 127; i++) { | ||
443 | p->regs[i] = 1; | ||
444 | } | ||
445 | |||
446 | p->print = FALSE; | ||
447 | p->indent = 0; | ||
448 | } | ||
449 | |||
450 | |||
451 | void spe_release_func(struct spe_function *p) | ||
452 | { | ||
453 | assert(p->num_inst <= p->max_inst); | ||
454 | if (p->store != NULL) { | ||
455 | align_free(p->store); | ||
456 | } | ||
457 | p->store = NULL; | ||
458 | } | ||
459 | |||
460 | |||
461 | /** Return current code size in bytes. */ | ||
462 | unsigned spe_code_size(const struct spe_function *p) | ||
463 | { | ||
464 | return p->num_inst * SPE_INST_SIZE; | ||
465 | } | ||
466 | |||
467 | |||
468 | /** | ||
469 | * Allocate a SPE register. | ||
470 | * \return register index or -1 if none left. | ||
471 | */ | ||
472 | int spe_allocate_available_register(struct spe_function *p) | ||
473 | { | ||
474 | unsigned i; | ||
475 | for (i = 0; i < SPE_NUM_REGS; i++) { | ||
476 | if (p->regs[i] == 0) { | ||
477 | p->regs[i] = 1; | ||
478 | return i; | ||
479 | } | ||
480 | } | ||
481 | |||
482 | return -1; | ||
483 | } | ||
484 | |||
485 | |||
486 | /** | ||
487 | * Mark the given SPE register as "allocated". | ||
488 | */ | ||
489 | int spe_allocate_register(struct spe_function *p, int reg) | ||
490 | { | ||
491 | assert(reg < SPE_NUM_REGS); | ||
492 | assert(p->regs[reg] == 0); | ||
493 | p->regs[reg] = 1; | ||
494 | return reg; | ||
495 | } | ||
496 | |||
497 | |||
498 | /** | ||
499 | * Mark the given SPE register as "unallocated". Note that this should | ||
500 | * only be used on registers allocated in the current register set; an | ||
501 | * assertion will fail if an attempt is made to deallocate a register | ||
502 | * allocated in an earlier register set. | ||
503 | */ | ||
504 | void spe_release_register(struct spe_function *p, int reg) | ||
505 | { | ||
506 | assert(reg >= 0); | ||
507 | assert(reg < SPE_NUM_REGS); | ||
508 | assert(p->regs[reg] == 1); | ||
509 | |||
510 | p->regs[reg] = 0; | ||
511 | } | ||
512 | |||
513 | /** | ||
514 | * Start a new set of registers. This can be called if | ||
515 | * it will be difficult later to determine exactly what | ||
516 | * registers were actually allocated during a code generation | ||
517 | * sequence, and you really just want to deallocate all of them. | ||
518 | */ | ||
519 | void spe_allocate_register_set(struct spe_function *p) | ||
520 | { | ||
521 | uint i; | ||
522 | |||
523 | /* Keep track of the set count. If it ever wraps around to 0, | ||
524 | * we're in trouble. | ||
525 | */ | ||
526 | p->set_count++; | ||
527 | assert(p->set_count > 0); | ||
528 | |||
529 | /* Increment the allocation count of all registers currently | ||
530 | * allocated. Then any registers that are allocated in this set | ||
531 | * will be the only ones with a count of 1; they'll all be released | ||
532 | * when the register set is released. | ||
533 | */ | ||
534 | for (i = 0; i < SPE_NUM_REGS; i++) { | ||
535 | if (p->regs[i] > 0) | ||
536 | p->regs[i]++; | ||
537 | } | ||
538 | } | ||
539 | |||
540 | void spe_release_register_set(struct spe_function *p) | ||
541 | { | ||
542 | uint i; | ||
543 | |||
544 | /* If the set count drops below zero, we're in trouble. */ | ||
545 | assert(p->set_count > 0); | ||
546 | p->set_count--; | ||
547 | |||
548 | /* Drop the allocation level of all registers. Any allocated | ||
549 | * during this register set will drop to 0 and then become | ||
550 | * available. | ||
551 | */ | ||
552 | for (i = 0; i < SPE_NUM_REGS; i++) { | ||
553 | if (p->regs[i] > 0) | ||
554 | p->regs[i]--; | ||
555 | } | ||
556 | } | ||
557 | |||
558 | |||
559 | unsigned | ||
560 | spe_get_registers_used(const struct spe_function *p, ubyte used[]) | ||
561 | { | ||
562 | unsigned i, num = 0; | ||
563 | /* only count registers in the range available to callers */ | ||
564 | for (i = 2; i < 80; i++) { | ||
565 | if (p->regs[i]) { | ||
566 | used[num++] = i; | ||
567 | } | ||
568 | } | ||
569 | return num; | ||
570 | } | ||
571 | |||
572 | |||
573 | void | ||
574 | spe_print_code(struct spe_function *p, boolean enable) | ||
575 | { | ||
576 | p->print = enable; | ||
577 | } | ||
578 | |||
579 | |||
580 | void | ||
581 | spe_indent(struct spe_function *p, int spaces) | ||
582 | { | ||
583 | p->indent += spaces; | ||
584 | } | ||
585 | |||
586 | |||
587 | void | ||
588 | spe_comment(struct spe_function *p, int rel_indent, const char *s) | ||
589 | { | ||
590 | if (p->print) { | ||
591 | p->indent += rel_indent; | ||
592 | indent(p); | ||
593 | p->indent -= rel_indent; | ||
594 | printf("# %s\n", s); | ||
595 | } | ||
596 | } | ||
597 | |||
598 | |||
599 | /** | ||
600 | * Load quad word. | ||
601 | * NOTE: offset is in bytes and the least significant 4 bits must be zero! | ||
602 | */ | ||
603 | void spe_lqd(struct spe_function *p, int rT, int rA, int offset) | ||
604 | { | ||
605 | const boolean pSave = p->print; | ||
606 | |||
607 | /* offset must be a multiple of 16 */ | ||
608 | assert(offset % 16 == 0); | ||
609 | /* offset must fit in 10-bit signed int field, after shifting */ | ||
610 | assert((offset >> 4) <= 511); | ||
611 | assert((offset >> 4) >= -512); | ||
612 | |||
613 | p->print = FALSE; | ||
614 | emit_RI10(p, 0x034, rT, rA, offset >> 4, "spe_lqd"); | ||
615 | p->print = pSave; | ||
616 | |||
617 | if (p->print) { | ||
618 | indent(p); | ||
619 | printf("lqd\t%s, %d(%s)\n", reg_name(rT), offset, reg_name(rA)); | ||
620 | } | ||
621 | } | ||
622 | |||
623 | |||
624 | /** | ||
625 | * Store quad word. | ||
626 | * NOTE: offset is in bytes and the least significant 4 bits must be zero! | ||
627 | */ | ||
628 | void spe_stqd(struct spe_function *p, int rT, int rA, int offset) | ||
629 | { | ||
630 | const boolean pSave = p->print; | ||
631 | |||
632 | /* offset must be a multiple of 16 */ | ||
633 | assert(offset % 16 == 0); | ||
634 | /* offset must fit in 10-bit signed int field, after shifting */ | ||
635 | assert((offset >> 4) <= 511); | ||
636 | assert((offset >> 4) >= -512); | ||
637 | |||
638 | p->print = FALSE; | ||
639 | emit_RI10(p, 0x024, rT, rA, offset >> 4, "spe_stqd"); | ||
640 | p->print = pSave; | ||
641 | |||
642 | if (p->print) { | ||
643 | indent(p); | ||
644 | printf("stqd\t%s, %d(%s)\n", reg_name(rT), offset, reg_name(rA)); | ||
645 | } | ||
646 | } | ||
647 | |||
648 | |||
649 | /** | ||
650 | * For branch instructions: | ||
651 | * \param d if 1, disable interupts if branch is taken | ||
652 | * \param e if 1, enable interupts if branch is taken | ||
653 | * If d and e are both zero, don't change interupt status (right?) | ||
654 | */ | ||
655 | |||
656 | /** Branch Indirect to address in rA */ | ||
657 | void spe_bi(struct spe_function *p, int rA, int d, int e) | ||
658 | { | ||
659 | emit_RI7(p, 0x1a8, 0, rA, (d << 5) | (e << 4), __FUNCTION__); | ||
660 | } | ||
661 | |||
662 | /** Interupt Return */ | ||
663 | void spe_iret(struct spe_function *p, int rA, int d, int e) | ||
664 | { | ||
665 | emit_RI7(p, 0x1aa, 0, rA, (d << 5) | (e << 4), __FUNCTION__); | ||
666 | } | ||
667 | |||
668 | /** Branch indirect and set link on external data */ | ||
669 | void spe_bisled(struct spe_function *p, int rT, int rA, int d, | ||
670 | int e) | ||
671 | { | ||
672 | emit_RI7(p, 0x1ab, rT, rA, (d << 5) | (e << 4), __FUNCTION__); | ||
673 | } | ||
674 | |||
675 | /** Branch indirect and set link. Save PC in rT, jump to rA. */ | ||
676 | void spe_bisl(struct spe_function *p, int rT, int rA, int d, | ||
677 | int e) | ||
678 | { | ||
679 | emit_RI7(p, 0x1a9, rT, rA, (d << 5) | (e << 4), __FUNCTION__); | ||
680 | } | ||
681 | |||
682 | /** Branch indirect if zero word. If rT.word[0]==0, jump to rA. */ | ||
683 | void spe_biz(struct spe_function *p, int rT, int rA, int d, int e) | ||
684 | { | ||
685 | emit_RI7(p, 0x128, rT, rA, (d << 5) | (e << 4), __FUNCTION__); | ||
686 | } | ||
687 | |||
688 | /** Branch indirect if non-zero word. If rT.word[0]!=0, jump to rA. */ | ||
689 | void spe_binz(struct spe_function *p, int rT, int rA, int d, int e) | ||
690 | { | ||
691 | emit_RI7(p, 0x129, rT, rA, (d << 5) | (e << 4), __FUNCTION__); | ||
692 | } | ||
693 | |||
694 | /** Branch indirect if zero halfword. If rT.halfword[1]==0, jump to rA. */ | ||
695 | void spe_bihz(struct spe_function *p, int rT, int rA, int d, int e) | ||
696 | { | ||
697 | emit_RI7(p, 0x12a, rT, rA, (d << 5) | (e << 4), __FUNCTION__); | ||
698 | } | ||
699 | |||
700 | /** Branch indirect if non-zero halfword. If rT.halfword[1]!=0, jump to rA. */ | ||
701 | void spe_bihnz(struct spe_function *p, int rT, int rA, int d, int e) | ||
702 | { | ||
703 | emit_RI7(p, 0x12b, rT, rA, (d << 5) | (e << 4), __FUNCTION__); | ||
704 | } | ||
705 | |||
706 | |||
707 | /* Hint-for-branch instructions | ||
708 | */ | ||
709 | #if 0 | ||
710 | hbr; | ||
711 | hbra; | ||
712 | hbrr; | ||
713 | #endif | ||
714 | |||
715 | |||
716 | /* Control instructions | ||
717 | */ | ||
718 | #if 0 | ||
719 | stop; | ||
720 | EMIT_RR (spe_stopd, 0x140); | ||
721 | EMIT_ (spe_nop, 0x201); | ||
722 | sync; | ||
723 | EMIT_ (spe_dsync, 0x003); | ||
724 | EMIT_R (spe_mfspr, 0x00c); | ||
725 | EMIT_R (spe_mtspr, 0x10c); | ||
726 | #endif | ||
727 | |||
728 | |||
729 | /** | ||
730 | ** Helper / "macro" instructions. | ||
731 | ** Use somewhat verbose names as a reminder that these aren't native | ||
732 | ** SPE instructions. | ||
733 | **/ | ||
734 | |||
735 | |||
736 | void | ||
737 | spe_load_float(struct spe_function *p, int rT, float x) | ||
738 | { | ||
739 | if (x == 0.0f) { | ||
740 | spe_il(p, rT, 0x0); | ||
741 | } | ||
742 | else if (x == 0.5f) { | ||
743 | spe_ilhu(p, rT, 0x3f00); | ||
744 | } | ||
745 | else if (x == 1.0f) { | ||
746 | spe_ilhu(p, rT, 0x3f80); | ||
747 | } | ||
748 | else if (x == -1.0f) { | ||
749 | spe_ilhu(p, rT, 0xbf80); | ||
750 | } | ||
751 | else { | ||
752 | union { | ||
753 | float f; | ||
754 | unsigned u; | ||
755 | } bits; | ||
756 | bits.f = x; | ||
757 | spe_ilhu(p, rT, bits.u >> 16); | ||
758 | spe_iohl(p, rT, bits.u & 0xffff); | ||
759 | } | ||
760 | } | ||
761 | |||
762 | |||
763 | void | ||
764 | spe_load_int(struct spe_function *p, int rT, int i) | ||
765 | { | ||
766 | if (-32768 <= i && i <= 32767) { | ||
767 | spe_il(p, rT, i); | ||
768 | } | ||
769 | else { | ||
770 | spe_ilhu(p, rT, i >> 16); | ||
771 | if (i & 0xffff) | ||
772 | spe_iohl(p, rT, i & 0xffff); | ||
773 | } | ||
774 | } | ||
775 | |||
776 | void spe_load_uint(struct spe_function *p, int rT, uint ui) | ||
777 | { | ||
778 | /* If the whole value is in the lower 18 bits, use ila, which | ||
779 | * doesn't sign-extend. Otherwise, if the two halfwords of | ||
780 | * the constant are identical, use ilh. Otherwise, if every byte of | ||
781 | * the desired value is 0x00 or 0xff, we can use Form Select Mask for | ||
782 | * Bytes Immediate (fsmbi) to load the value in a single instruction. | ||
783 | * Otherwise, in the general case, we have to use ilhu followed by iohl. | ||
784 | */ | ||
785 | if ((ui & 0x0003ffff) == ui) { | ||
786 | spe_ila(p, rT, ui); | ||
787 | } | ||
788 | else if ((ui >> 16) == (ui & 0xffff)) { | ||
789 | spe_ilh(p, rT, ui & 0xffff); | ||
790 | } | ||
791 | else if ( | ||
792 | ((ui & 0x000000ff) == 0 || (ui & 0x000000ff) == 0x000000ff) && | ||
793 | ((ui & 0x0000ff00) == 0 || (ui & 0x0000ff00) == 0x0000ff00) && | ||
794 | ((ui & 0x00ff0000) == 0 || (ui & 0x00ff0000) == 0x00ff0000) && | ||
795 | ((ui & 0xff000000) == 0 || (ui & 0xff000000) == 0xff000000) | ||
796 | ) { | ||
797 | uint mask = 0; | ||
798 | /* fsmbi duplicates each bit in the given mask eight times, | ||
799 | * using a 16-bit value to initialize a 16-byte quadword. | ||
800 | * Each 4-bit nybble of the mask corresponds to a full word | ||
801 | * of the result; look at the value and figure out the mask | ||
802 | * (replicated for each word in the quadword), and then | ||
803 | * form the "select mask" to get the value. | ||
804 | */ | ||
805 | if ((ui & 0x000000ff) == 0x000000ff) mask |= 0x1111; | ||
806 | if ((ui & 0x0000ff00) == 0x0000ff00) mask |= 0x2222; | ||
807 | if ((ui & 0x00ff0000) == 0x00ff0000) mask |= 0x4444; | ||
808 | if ((ui & 0xff000000) == 0xff000000) mask |= 0x8888; | ||
809 | spe_fsmbi(p, rT, mask); | ||
810 | } | ||
811 | else { | ||
812 | /* The general case: this usually uses two instructions, but | ||
813 | * may use only one if the low-order 16 bits of each word are 0. | ||
814 | */ | ||
815 | spe_ilhu(p, rT, ui >> 16); | ||
816 | if (ui & 0xffff) | ||
817 | spe_iohl(p, rT, ui & 0xffff); | ||
818 | } | ||
819 | } | ||
820 | |||
821 | /** | ||
822 | * This function is constructed identically to spe_xor_uint() below. | ||
823 | * Changes to one should be made in the other. | ||
824 | */ | ||
825 | void | ||
826 | spe_and_uint(struct spe_function *p, int rT, int rA, uint ui) | ||
827 | { | ||
828 | /* If we can, emit a single instruction, either And Byte Immediate | ||
829 | * (which uses the same constant across each byte), And Halfword Immediate | ||
830 | * (which sign-extends a 10-bit immediate to 16 bits and uses that | ||
831 | * across each halfword), or And Word Immediate (which sign-extends | ||
832 | * a 10-bit immediate to 32 bits). | ||
833 | * | ||
834 | * Otherwise, we'll need to use a temporary register. | ||
835 | */ | ||
836 | uint tmp; | ||
837 | |||
838 | /* If the upper 23 bits are all 0s or all 1s, sign extension | ||
839 | * will work and we can use And Word Immediate | ||
840 | */ | ||
841 | tmp = ui & 0xfffffe00; | ||
842 | if (tmp == 0xfffffe00 || tmp == 0) { | ||
843 | spe_andi(p, rT, rA, ui & 0x000003ff); | ||
844 | return; | ||
845 | } | ||
846 | |||
847 | /* If the ui field is symmetric along halfword boundaries and | ||
848 | * the upper 7 bits of each halfword are all 0s or 1s, we | ||
849 | * can use And Halfword Immediate | ||
850 | */ | ||
851 | tmp = ui & 0xfe00fe00; | ||
852 | if ((tmp == 0xfe00fe00 || tmp == 0) && ((ui >> 16) == (ui & 0x0000ffff))) { | ||
853 | spe_andhi(p, rT, rA, ui & 0x000003ff); | ||
854 | return; | ||
855 | } | ||
856 | |||
857 | /* If the ui field is symmetric in each byte, then we can use | ||
858 | * the And Byte Immediate instruction. | ||
859 | */ | ||
860 | tmp = ui & 0x000000ff; | ||
861 | if ((ui >> 24) == tmp && ((ui >> 16) & 0xff) == tmp && ((ui >> 8) & 0xff) == tmp) { | ||
862 | spe_andbi(p, rT, rA, tmp); | ||
863 | return; | ||
864 | } | ||
865 | |||
866 | /* Otherwise, we'll have to use a temporary register. */ | ||
867 | int tmp_reg = spe_allocate_available_register(p); | ||
868 | spe_load_uint(p, tmp_reg, ui); | ||
869 | spe_and(p, rT, rA, tmp_reg); | ||
870 | spe_release_register(p, tmp_reg); | ||
871 | } | ||
872 | |||
873 | |||
874 | /** | ||
875 | * This function is constructed identically to spe_and_uint() above. | ||
876 | * Changes to one should be made in the other. | ||
877 | */ | ||
878 | void | ||
879 | spe_xor_uint(struct spe_function *p, int rT, int rA, uint ui) | ||
880 | { | ||
881 | /* If we can, emit a single instruction, either Exclusive Or Byte | ||
882 | * Immediate (which uses the same constant across each byte), Exclusive | ||
883 | * Or Halfword Immediate (which sign-extends a 10-bit immediate to | ||
884 | * 16 bits and uses that across each halfword), or Exclusive Or Word | ||
885 | * Immediate (which sign-extends a 10-bit immediate to 32 bits). | ||
886 | * | ||
887 | * Otherwise, we'll need to use a temporary register. | ||
888 | */ | ||
889 | uint tmp; | ||
890 | |||
891 | /* If the upper 23 bits are all 0s or all 1s, sign extension | ||
892 | * will work and we can use Exclusive Or Word Immediate | ||
893 | */ | ||
894 | tmp = ui & 0xfffffe00; | ||
895 | if (tmp == 0xfffffe00 || tmp == 0) { | ||
896 | spe_xori(p, rT, rA, ui & 0x000003ff); | ||
897 | return; | ||
898 | } | ||
899 | |||
900 | /* If the ui field is symmetric along halfword boundaries and | ||
901 | * the upper 7 bits of each halfword are all 0s or 1s, we | ||
902 | * can use Exclusive Or Halfword Immediate | ||
903 | */ | ||
904 | tmp = ui & 0xfe00fe00; | ||
905 | if ((tmp == 0xfe00fe00 || tmp == 0) && ((ui >> 16) == (ui & 0x0000ffff))) { | ||
906 | spe_xorhi(p, rT, rA, ui & 0x000003ff); | ||
907 | return; | ||
908 | } | ||
909 | |||
910 | /* If the ui field is symmetric in each byte, then we can use | ||
911 | * the Exclusive Or Byte Immediate instruction. | ||
912 | */ | ||
913 | tmp = ui & 0x000000ff; | ||
914 | if ((ui >> 24) == tmp && ((ui >> 16) & 0xff) == tmp && ((ui >> 8) & 0xff) == tmp) { | ||
915 | spe_xorbi(p, rT, rA, tmp); | ||
916 | return; | ||
917 | } | ||
918 | |||
919 | /* Otherwise, we'll have to use a temporary register. */ | ||
920 | int tmp_reg = spe_allocate_available_register(p); | ||
921 | spe_load_uint(p, tmp_reg, ui); | ||
922 | spe_xor(p, rT, rA, tmp_reg); | ||
923 | spe_release_register(p, tmp_reg); | ||
924 | } | ||
925 | |||
926 | void | ||
927 | spe_compare_equal_uint(struct spe_function *p, int rT, int rA, uint ui) | ||
928 | { | ||
929 | /* If the comparison value is 9 bits or less, it fits inside a | ||
930 | * Compare Equal Word Immediate instruction. | ||
931 | */ | ||
932 | if ((ui & 0x000001ff) == ui) { | ||
933 | spe_ceqi(p, rT, rA, ui); | ||
934 | } | ||
935 | /* Otherwise, we're going to have to load a word first. */ | ||
936 | else { | ||
937 | int tmp_reg = spe_allocate_available_register(p); | ||
938 | spe_load_uint(p, tmp_reg, ui); | ||
939 | spe_ceq(p, rT, rA, tmp_reg); | ||
940 | spe_release_register(p, tmp_reg); | ||
941 | } | ||
942 | } | ||
943 | |||
944 | void | ||
945 | spe_compare_greater_uint(struct spe_function *p, int rT, int rA, uint ui) | ||
946 | { | ||
947 | /* If the comparison value is 10 bits or less, it fits inside a | ||
948 | * Compare Logical Greater Than Word Immediate instruction. | ||
949 | */ | ||
950 | if ((ui & 0x000003ff) == ui) { | ||
951 | spe_clgti(p, rT, rA, ui); | ||
952 | } | ||
953 | /* Otherwise, we're going to have to load a word first. */ | ||
954 | else { | ||
955 | int tmp_reg = spe_allocate_available_register(p); | ||
956 | spe_load_uint(p, tmp_reg, ui); | ||
957 | spe_clgt(p, rT, rA, tmp_reg); | ||
958 | spe_release_register(p, tmp_reg); | ||
959 | } | ||
960 | } | ||
961 | |||
962 | void | ||
963 | spe_splat(struct spe_function *p, int rT, int rA) | ||
964 | { | ||
965 | /* Use a temporary, just in case rT == rA */ | ||
966 | int tmp_reg = spe_allocate_available_register(p); | ||
967 | /* Duplicate bytes 0, 1, 2, and 3 across the whole register */ | ||
968 | spe_ila(p, tmp_reg, 0x00010203); | ||
969 | spe_shufb(p, rT, rA, rA, tmp_reg); | ||
970 | spe_release_register(p, tmp_reg); | ||
971 | } | ||
972 | |||
973 | |||
974 | void | ||
975 | spe_complement(struct spe_function *p, int rT, int rA) | ||
976 | { | ||
977 | spe_nor(p, rT, rA, rA); | ||
978 | } | ||
979 | |||
980 | |||
981 | void | ||
982 | spe_move(struct spe_function *p, int rT, int rA) | ||
983 | { | ||
984 | /* Use different instructions depending on the instruction address | ||
985 | * to take advantage of the dual pipelines. | ||
986 | */ | ||
987 | if (p->num_inst & 1) | ||
988 | spe_shlqbyi(p, rT, rA, 0); /* odd pipe */ | ||
989 | else | ||
990 | spe_ori(p, rT, rA, 0); /* even pipe */ | ||
991 | } | ||
992 | |||
993 | |||
994 | void | ||
995 | spe_zero(struct spe_function *p, int rT) | ||
996 | { | ||
997 | spe_xor(p, rT, rT, rT); | ||
998 | } | ||
999 | |||
1000 | |||
1001 | void | ||
1002 | spe_splat_word(struct spe_function *p, int rT, int rA, int word) | ||
1003 | { | ||
1004 | assert(word >= 0); | ||
1005 | assert(word <= 3); | ||
1006 | |||
1007 | if (word == 0) { | ||
1008 | int tmp1 = rT; | ||
1009 | spe_ila(p, tmp1, 66051); | ||
1010 | spe_shufb(p, rT, rA, rA, tmp1); | ||
1011 | } | ||
1012 | else { | ||
1013 | /* XXX review this, we may not need the rotqbyi instruction */ | ||
1014 | int tmp1 = rT; | ||
1015 | int tmp2 = spe_allocate_available_register(p); | ||
1016 | |||
1017 | spe_ila(p, tmp1, 66051); | ||
1018 | spe_rotqbyi(p, tmp2, rA, 4 * word); | ||
1019 | spe_shufb(p, rT, tmp2, tmp2, tmp1); | ||
1020 | |||
1021 | spe_release_register(p, tmp2); | ||
1022 | } | ||
1023 | } | ||
1024 | |||
1025 | /** | ||
1026 | * For each 32-bit float element of rA and rB, choose the smaller of the | ||
1027 | * two, compositing them into the rT register. | ||
1028 | * | ||
1029 | * The Float Compare Greater Than (fcgt) instruction will put 1s into | ||
1030 | * compare_reg where rA > rB, and 0s where rA <= rB. | ||
1031 | * | ||
1032 | * Then the Select Bits (selb) instruction will take bits from rA where | ||
1033 | * compare_reg is 0, and from rB where compare_reg is 1; i.e., from rA | ||
1034 | * where rA <= rB and from rB where rB > rA, which is exactly the | ||
1035 | * "min" operation. | ||
1036 | * | ||
1037 | * The compare_reg could in many cases be the same as rT, unless | ||
1038 | * rT == rA || rt == rB. But since this is common in constructions | ||
1039 | * like "x = min(x, a)", we always allocate a new register to be safe. | ||
1040 | */ | ||
1041 | void | ||
1042 | spe_float_min(struct spe_function *p, int rT, int rA, int rB) | ||
1043 | { | ||
1044 | int compare_reg = spe_allocate_available_register(p); | ||
1045 | spe_fcgt(p, compare_reg, rA, rB); | ||
1046 | spe_selb(p, rT, rA, rB, compare_reg); | ||
1047 | spe_release_register(p, compare_reg); | ||
1048 | } | ||
1049 | |||
1050 | /** | ||
1051 | * For each 32-bit float element of rA and rB, choose the greater of the | ||
1052 | * two, compositing them into the rT register. | ||
1053 | * | ||
1054 | * The logic is similar to that of spe_float_min() above; the only | ||
1055 | * difference is that the registers on spe_selb() have been reversed, | ||
1056 | * so that the larger of the two is selected instead of the smaller. | ||
1057 | */ | ||
1058 | void | ||
1059 | spe_float_max(struct spe_function *p, int rT, int rA, int rB) | ||
1060 | { | ||
1061 | int compare_reg = spe_allocate_available_register(p); | ||
1062 | spe_fcgt(p, compare_reg, rA, rB); | ||
1063 | spe_selb(p, rT, rB, rA, compare_reg); | ||
1064 | spe_release_register(p, compare_reg); | ||
1065 | } | ||
1066 | |||
1067 | #endif /* GALLIUM_CELL */ | ||
diff --git a/src/gallium/auxiliary/rtasm/rtasm_ppc_spe.h b/src/gallium/auxiliary/rtasm/rtasm_ppc_spe.h deleted file mode 100644 index 65d9c774154..00000000000 --- a/src/gallium/auxiliary/rtasm/rtasm_ppc_spe.h +++ /dev/null | |||
@@ -1,433 +0,0 @@ | |||
1 | /* | ||
2 | * (C) Copyright IBM Corporation 2008 | ||
3 | * All Rights Reserved. | ||
4 | * | ||
5 | * Permission is hereby granted, free of charge, to any person obtaining a | ||
6 | * copy of this software and associated documentation files (the "Software"), | ||
7 | * to deal in the Software without restriction, including without limitation | ||
8 | * on the rights to use, copy, modify, merge, publish, distribute, sub | ||
9 | * license, and/or sell copies of the Software, and to permit persons to whom | ||
10 | * the Software is furnished to do so, subject to the following conditions: | ||
11 | * | ||
12 | * The above copyright notice and this permission notice (including the next | ||
13 | * paragraph) shall be included in all copies or substantial portions of the | ||
14 | * Software. | ||
15 | * | ||
16 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR | ||
17 | * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, | ||
18 | * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL | ||
19 | * AUTHORS, COPYRIGHT HOLDERS, AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM, | ||
20 | * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR | ||
21 | * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE | ||
22 | * USE OR OTHER DEALINGS IN THE SOFTWARE. | ||
23 | */ | ||
24 | |||
25 | /** | ||
26 | * \file | ||
27 | * Real-time assembly generation interface for Cell B.E. SPEs. | ||
28 | * For details, see /opt/cell/sdk/docs/arch/SPU_ISA_v1.2_27Jan2007_pub.pdf | ||
29 | * | ||
30 | * \author Ian Romanick <idr@us.ibm.com> | ||
31 | * \author Brian Paul | ||
32 | */ | ||
33 | |||
34 | #ifndef RTASM_PPC_SPE_H | ||
35 | #define RTASM_PPC_SPE_H | ||
36 | |||
37 | /** 4 bytes per instruction */ | ||
38 | #define SPE_INST_SIZE 4 | ||
39 | |||
40 | /** number of general-purpose SIMD registers */ | ||
41 | #define SPE_NUM_REGS 128 | ||
42 | |||
43 | /** Return Address register (aka $lr / Link Register) */ | ||
44 | #define SPE_REG_RA 0 | ||
45 | |||
46 | /** Stack Pointer register (aka $sp) */ | ||
47 | #define SPE_REG_SP 1 | ||
48 | |||
49 | |||
50 | struct spe_function | ||
51 | { | ||
52 | uint32_t *store; /**< instruction buffer */ | ||
53 | uint num_inst; | ||
54 | uint max_inst; | ||
55 | |||
56 | /** | ||
57 | * The "set count" reflects the number of nested register sets | ||
58 | * are allowed. In the unlikely case that we exceed the set count, | ||
59 | * register allocation will start to be confused, which is critical | ||
60 | * enough that we check for it. | ||
61 | */ | ||
62 | unsigned char set_count; | ||
63 | |||
64 | /** | ||
65 | * Flags for used and unused registers. Each byte corresponds to a | ||
66 | * register; a 0 in that byte means that the register is available. | ||
67 | * A value of 1 means that the register was allocated in the current | ||
68 | * register set. Any other value N means that the register was allocated | ||
69 | * N register sets ago. | ||
70 | * | ||
71 | * \sa | ||
72 | * spe_allocate_register, spe_allocate_available_register, | ||
73 | * spe_allocate_register_set, spe_release_register_set, spe_release_register, | ||
74 | */ | ||
75 | unsigned char regs[SPE_NUM_REGS]; | ||
76 | |||
77 | boolean print; /**< print/dump instructions as they're emitted? */ | ||
78 | int indent; /**< number of spaces to indent */ | ||
79 | }; | ||
80 | |||
81 | |||
82 | extern void spe_init_func(struct spe_function *p, uint code_size); | ||
83 | extern void spe_release_func(struct spe_function *p); | ||
84 | extern uint spe_code_size(const struct spe_function *p); | ||
85 | |||
86 | extern int spe_allocate_available_register(struct spe_function *p); | ||
87 | extern int spe_allocate_register(struct spe_function *p, int reg); | ||
88 | extern void spe_release_register(struct spe_function *p, int reg); | ||
89 | extern void spe_allocate_register_set(struct spe_function *p); | ||
90 | extern void spe_release_register_set(struct spe_function *p); | ||
91 | |||
92 | extern uint spe_get_registers_used(const struct spe_function *p, ubyte used[]); | ||
93 | |||
94 | extern void spe_print_code(struct spe_function *p, boolean enable); | ||
95 | extern void spe_indent(struct spe_function *p, int spaces); | ||
96 | extern void spe_comment(struct spe_function *p, int rel_indent, const char *s); | ||
97 | |||
98 | |||
99 | #endif /* RTASM_PPC_SPE_H */ | ||
100 | |||
101 | #ifndef EMIT | ||
102 | #define EMIT(_name, _op) \ | ||
103 | extern void _name (struct spe_function *p); | ||
104 | #define EMIT_(_name, _op) \ | ||
105 | extern void _name (struct spe_function *p, int rT); | ||
106 | #define EMIT_R(_name, _op) \ | ||
107 | extern void _name (struct spe_function *p, int rT, int rA); | ||
108 | #define EMIT_RR(_name, _op) \ | ||
109 | extern void _name (struct spe_function *p, int rT, int rA, int rB); | ||
110 | #define EMIT_RRR(_name, _op) \ | ||
111 | extern void _name (struct spe_function *p, int rT, int rA, int rB, int rC); | ||
112 | #define EMIT_RI7(_name, _op) \ | ||
113 | extern void _name (struct spe_function *p, int rT, int rA, int imm); | ||
114 | #define EMIT_RI8(_name, _op, bias) \ | ||
115 | extern void _name (struct spe_function *p, int rT, int rA, int imm); | ||
116 | #define EMIT_RI10(_name, _op) \ | ||
117 | extern void _name (struct spe_function *p, int rT, int rA, int imm); | ||
118 | #define EMIT_RI10s(_name, _op) \ | ||
119 | extern void _name (struct spe_function *p, int rT, int rA, int imm); | ||
120 | #define EMIT_RI16(_name, _op) \ | ||
121 | extern void _name (struct spe_function *p, int rT, int imm); | ||
122 | #define EMIT_RI18(_name, _op) \ | ||
123 | extern void _name (struct spe_function *p, int rT, int imm); | ||
124 | #define EMIT_I16(_name, _op) \ | ||
125 | extern void _name (struct spe_function *p, int imm); | ||
126 | #define UNDEF_EMIT_MACROS | ||
127 | #endif /* EMIT */ | ||
128 | |||
129 | |||
130 | /* Memory load / store instructions | ||
131 | */ | ||
132 | EMIT_RR (spe_lqx, 0x1c4) | ||
133 | EMIT_RI16(spe_lqa, 0x061) | ||
134 | EMIT_RI16(spe_lqr, 0x067) | ||
135 | EMIT_RR (spe_stqx, 0x144) | ||
136 | EMIT_RI16(spe_stqa, 0x041) | ||
137 | EMIT_RI16(spe_stqr, 0x047) | ||
138 | EMIT_RI7 (spe_cbd, 0x1f4) | ||
139 | EMIT_RR (spe_cbx, 0x1d4) | ||
140 | EMIT_RI7 (spe_chd, 0x1f5) | ||
141 | EMIT_RI7 (spe_chx, 0x1d5) | ||
142 | EMIT_RI7 (spe_cwd, 0x1f6) | ||
143 | EMIT_RI7 (spe_cwx, 0x1d6) | ||
144 | EMIT_RI7 (spe_cdd, 0x1f7) | ||
145 | EMIT_RI7 (spe_cdx, 0x1d7) | ||
146 | |||
147 | |||
148 | /* Constant formation instructions | ||
149 | */ | ||
150 | EMIT_RI16(spe_ilh, 0x083) | ||
151 | EMIT_RI16(spe_ilhu, 0x082) | ||
152 | EMIT_RI16(spe_il, 0x081) | ||
153 | EMIT_RI18(spe_ila, 0x021) | ||
154 | EMIT_RI16(spe_iohl, 0x0c1) | ||
155 | EMIT_RI16(spe_fsmbi, 0x065) | ||
156 | |||
157 | |||
158 | |||
159 | /* Integer and logical instructions | ||
160 | */ | ||
161 | EMIT_RR (spe_ah, 0x0c8) | ||
162 | EMIT_RI10(spe_ahi, 0x01d) | ||
163 | EMIT_RR (spe_a, 0x0c0) | ||
164 | EMIT_RI10s(spe_ai, 0x01c) | ||
165 | EMIT_RR (spe_sfh, 0x048) | ||
166 | EMIT_RI10(spe_sfhi, 0x00d) | ||
167 | EMIT_RR (spe_sf, 0x040) | ||
168 | EMIT_RI10(spe_sfi, 0x00c) | ||
169 | EMIT_RR (spe_addx, 0x340) | ||
170 | EMIT_RR (spe_cg, 0x0c2) | ||
171 | EMIT_RR (spe_cgx, 0x342) | ||
172 | EMIT_RR (spe_sfx, 0x341) | ||
173 | EMIT_RR (spe_bg, 0x042) | ||
174 | EMIT_RR (spe_bgx, 0x343) | ||
175 | EMIT_RR (spe_mpy, 0x3c4) | ||
176 | EMIT_RR (spe_mpyu, 0x3cc) | ||
177 | EMIT_RI10(spe_mpyi, 0x074) | ||
178 | EMIT_RI10(spe_mpyui, 0x075) | ||
179 | EMIT_RRR (spe_mpya, 0x00c) | ||
180 | EMIT_RR (spe_mpyh, 0x3c5) | ||
181 | EMIT_RR (spe_mpys, 0x3c7) | ||
182 | EMIT_RR (spe_mpyhh, 0x3c6) | ||
183 | EMIT_RR (spe_mpyhha, 0x346) | ||
184 | EMIT_RR (spe_mpyhhu, 0x3ce) | ||
185 | EMIT_RR (spe_mpyhhau, 0x34e) | ||
186 | EMIT_R (spe_clz, 0x2a5) | ||
187 | EMIT_R (spe_cntb, 0x2b4) | ||
188 | EMIT_R (spe_fsmb, 0x1b6) | ||
189 | EMIT_R (spe_fsmh, 0x1b5) | ||
190 | EMIT_R (spe_fsm, 0x1b4) | ||
191 | EMIT_R (spe_gbb, 0x1b2) | ||
192 | EMIT_R (spe_gbh, 0x1b1) | ||
193 | EMIT_R (spe_gb, 0x1b0) | ||
194 | EMIT_RR (spe_avgb, 0x0d3) | ||
195 | EMIT_RR (spe_absdb, 0x053) | ||
196 | EMIT_RR (spe_sumb, 0x253) | ||
197 | EMIT_R (spe_xsbh, 0x2b6) | ||
198 | EMIT_R (spe_xshw, 0x2ae) | ||
199 | EMIT_R (spe_xswd, 0x2a6) | ||
200 | EMIT_RR (spe_and, 0x0c1) | ||
201 | EMIT_RR (spe_andc, 0x2c1) | ||
202 | EMIT_RI10s(spe_andbi, 0x016) | ||
203 | EMIT_RI10s(spe_andhi, 0x015) | ||
204 | EMIT_RI10s(spe_andi, 0x014) | ||
205 | EMIT_RR (spe_or, 0x041) | ||
206 | EMIT_RR (spe_orc, 0x2c9) | ||
207 | EMIT_RI10s(spe_orbi, 0x006) | ||
208 | EMIT_RI10s(spe_orhi, 0x005) | ||
209 | EMIT_RI10s(spe_ori, 0x004) | ||
210 | EMIT_R (spe_orx, 0x1f0) | ||
211 | EMIT_RR (spe_xor, 0x241) | ||
212 | EMIT_RI10s(spe_xorbi, 0x046) | ||
213 | EMIT_RI10s(spe_xorhi, 0x045) | ||
214 | EMIT_RI10s(spe_xori, 0x044) | ||
215 | EMIT_RR (spe_nand, 0x0c9) | ||
216 | EMIT_RR (spe_nor, 0x049) | ||
217 | EMIT_RR (spe_eqv, 0x249) | ||
218 | EMIT_RRR (spe_selb, 0x008) | ||
219 | EMIT_RRR (spe_shufb, 0x00b) | ||
220 | |||
221 | |||
222 | /* Shift and rotate instructions | ||
223 | */ | ||
224 | EMIT_RR (spe_shlh, 0x05f) | ||
225 | EMIT_RI7 (spe_shlhi, 0x07f) | ||
226 | EMIT_RR (spe_shl, 0x05b) | ||
227 | EMIT_RI7 (spe_shli, 0x07b) | ||
228 | EMIT_RR (spe_shlqbi, 0x1db) | ||
229 | EMIT_RI7 (spe_shlqbii, 0x1fb) | ||
230 | EMIT_RR (spe_shlqby, 0x1df) | ||
231 | EMIT_RI7 (spe_shlqbyi, 0x1ff) | ||
232 | EMIT_RR (spe_shlqbybi, 0x1cf) | ||
233 | EMIT_RR (spe_roth, 0x05c) | ||
234 | EMIT_RI7 (spe_rothi, 0x07c) | ||
235 | EMIT_RR (spe_rot, 0x058) | ||
236 | EMIT_RI7 (spe_roti, 0x078) | ||
237 | EMIT_RR (spe_rotqby, 0x1dc) | ||
238 | EMIT_RI7 (spe_rotqbyi, 0x1fc) | ||
239 | EMIT_RR (spe_rotqbybi, 0x1cc) | ||
240 | EMIT_RR (spe_rotqbi, 0x1d8) | ||
241 | EMIT_RI7 (spe_rotqbii, 0x1f8) | ||
242 | EMIT_RR (spe_rothm, 0x05d) | ||
243 | EMIT_RI7 (spe_rothmi, 0x07d) | ||
244 | EMIT_RR (spe_rotm, 0x059) | ||
245 | EMIT_RI7 (spe_rotmi, 0x079) | ||
246 | EMIT_RR (spe_rotqmby, 0x1dd) | ||
247 | EMIT_RI7 (spe_rotqmbyi, 0x1fd) | ||
248 | EMIT_RR (spe_rotqmbybi, 0x1cd) | ||
249 | EMIT_RR (spe_rotqmbi, 0x1c9) | ||
250 | EMIT_RI7 (spe_rotqmbii, 0x1f9) | ||
251 | EMIT_RR (spe_rotmah, 0x05e) | ||
252 | EMIT_RI7 (spe_rotmahi, 0x07e) | ||
253 | EMIT_RR (spe_rotma, 0x05a) | ||
254 | EMIT_RI7 (spe_rotmai, 0x07a) | ||
255 | |||
256 | |||
257 | /* Compare, branch, and halt instructions | ||
258 | */ | ||
259 | EMIT_RR (spe_heq, 0x3d8) | ||
260 | EMIT_RI10(spe_heqi, 0x07f) | ||
261 | EMIT_RR (spe_hgt, 0x258) | ||
262 | EMIT_RI10(spe_hgti, 0x04f) | ||
263 | EMIT_RR (spe_hlgt, 0x2d8) | ||
264 | EMIT_RI10(spe_hlgti, 0x05f) | ||
265 | EMIT_RR (spe_ceqb, 0x3d0) | ||
266 | EMIT_RI10(spe_ceqbi, 0x07e) | ||
267 | EMIT_RR (spe_ceqh, 0x3c8) | ||
268 | EMIT_RI10(spe_ceqhi, 0x07d) | ||
269 | EMIT_RR (spe_ceq, 0x3c0) | ||
270 | EMIT_RI10(spe_ceqi, 0x07c) | ||
271 | EMIT_RR (spe_cgtb, 0x250) | ||
272 | EMIT_RI10(spe_cgtbi, 0x04e) | ||
273 | EMIT_RR (spe_cgth, 0x248) | ||
274 | EMIT_RI10(spe_cgthi, 0x04d) | ||
275 | EMIT_RR (spe_cgt, 0x240) | ||
276 | EMIT_RI10(spe_cgti, 0x04c) | ||
277 | EMIT_RR (spe_clgtb, 0x2d0) | ||
278 | EMIT_RI10(spe_clgtbi, 0x05e) | ||
279 | EMIT_RR (spe_clgth, 0x2c8) | ||
280 | EMIT_RI10(spe_clgthi, 0x05d) | ||
281 | EMIT_RR (spe_clgt, 0x2c0) | ||
282 | EMIT_RI10(spe_clgti, 0x05c) | ||
283 | EMIT_I16 (spe_br, 0x064) | ||
284 | EMIT_I16 (spe_bra, 0x060) | ||
285 | EMIT_RI16(spe_brsl, 0x066) | ||
286 | EMIT_RI16(spe_brasl, 0x062) | ||
287 | EMIT_RI16(spe_brnz, 0x042) | ||
288 | EMIT_RI16(spe_brz, 0x040) | ||
289 | EMIT_RI16(spe_brhnz, 0x046) | ||
290 | EMIT_RI16(spe_brhz, 0x044) | ||
291 | |||
292 | /* Control instructions | ||
293 | */ | ||
294 | EMIT (spe_lnop, 0x001) | ||
295 | |||
296 | extern void | ||
297 | spe_lqd(struct spe_function *p, int rT, int rA, int offset); | ||
298 | |||
299 | extern void | ||
300 | spe_stqd(struct spe_function *p, int rT, int rA, int offset); | ||
301 | |||
302 | extern void spe_bi(struct spe_function *p, int rA, int d, int e); | ||
303 | extern void spe_iret(struct spe_function *p, int rA, int d, int e); | ||
304 | extern void spe_bisled(struct spe_function *p, int rT, int rA, | ||
305 | int d, int e); | ||
306 | extern void spe_bisl(struct spe_function *p, int rT, int rA, | ||
307 | int d, int e); | ||
308 | extern void spe_biz(struct spe_function *p, int rT, int rA, | ||
309 | int d, int e); | ||
310 | extern void spe_binz(struct spe_function *p, int rT, int rA, | ||
311 | int d, int e); | ||
312 | extern void spe_bihz(struct spe_function *p, int rT, int rA, | ||
313 | int d, int e); | ||
314 | extern void spe_bihnz(struct spe_function *p, int rT, int rA, | ||
315 | int d, int e); | ||
316 | |||
317 | |||
318 | /** Load/splat immediate float into rT. */ | ||
319 | extern void | ||
320 | spe_load_float(struct spe_function *p, int rT, float x); | ||
321 | |||
322 | /** Load/splat immediate int into rT. */ | ||
323 | extern void | ||
324 | spe_load_int(struct spe_function *p, int rT, int i); | ||
325 | |||
326 | /** Load/splat immediate unsigned int into rT. */ | ||
327 | extern void | ||
328 | spe_load_uint(struct spe_function *p, int rT, uint ui); | ||
329 | |||
330 | /** And immediate value into rT. */ | ||
331 | extern void | ||
332 | spe_and_uint(struct spe_function *p, int rT, int rA, uint ui); | ||
333 | |||
334 | /** Xor immediate value into rT. */ | ||
335 | extern void | ||
336 | spe_xor_uint(struct spe_function *p, int rT, int rA, uint ui); | ||
337 | |||
338 | /** Compare equal with immediate value. */ | ||
339 | extern void | ||
340 | spe_compare_equal_uint(struct spe_function *p, int rT, int rA, uint ui); | ||
341 | |||
342 | /** Compare greater with immediate value. */ | ||
343 | extern void | ||
344 | spe_compare_greater_uint(struct spe_function *p, int rT, int rA, uint ui); | ||
345 | |||
346 | /** Replicate word 0 of rA across rT. */ | ||
347 | extern void | ||
348 | spe_splat(struct spe_function *p, int rT, int rA); | ||
349 | |||
350 | /** rT = complement_all_bits(rA). */ | ||
351 | extern void | ||
352 | spe_complement(struct spe_function *p, int rT, int rA); | ||
353 | |||
354 | /** rT = rA. */ | ||
355 | extern void | ||
356 | spe_move(struct spe_function *p, int rT, int rA); | ||
357 | |||
358 | /** rT = {0,0,0,0}. */ | ||
359 | extern void | ||
360 | spe_zero(struct spe_function *p, int rT); | ||
361 | |||
362 | /** rT = splat(rA, word) */ | ||
363 | extern void | ||
364 | spe_splat_word(struct spe_function *p, int rT, int rA, int word); | ||
365 | |||
366 | /** rT = float min(rA, rB) */ | ||
367 | extern void | ||
368 | spe_float_min(struct spe_function *p, int rT, int rA, int rB); | ||
369 | |||
370 | /** rT = float max(rA, rB) */ | ||
371 | extern void | ||
372 | spe_float_max(struct spe_function *p, int rT, int rA, int rB); | ||
373 | |||
374 | |||
375 | /* Floating-point instructions | ||
376 | */ | ||
377 | EMIT_RR (spe_fa, 0x2c4) | ||
378 | EMIT_RR (spe_dfa, 0x2cc) | ||
379 | EMIT_RR (spe_fs, 0x2c5) | ||
380 | EMIT_RR (spe_dfs, 0x2cd) | ||
381 | EMIT_RR (spe_fm, 0x2c6) | ||
382 | EMIT_RR (spe_dfm, 0x2ce) | ||
383 | EMIT_RRR (spe_fma, 0x00e) | ||
384 | EMIT_RR (spe_dfma, 0x35c) | ||
385 | EMIT_RRR (spe_fnms, 0x00d) | ||
386 | EMIT_RR (spe_dfnms, 0x35e) | ||
387 | EMIT_RRR (spe_fms, 0x00f) | ||
388 | EMIT_RR (spe_dfms, 0x35d) | ||
389 | EMIT_RR (spe_dfnma, 0x35f) | ||
390 | EMIT_R (spe_frest, 0x1b8) | ||
391 | EMIT_R (spe_frsqest, 0x1b9) | ||
392 | EMIT_RR (spe_fi, 0x3d4) | ||
393 | EMIT_RI8 (spe_csflt, 0x1da, 155) | ||
394 | EMIT_RI8 (spe_cflts, 0x1d8, 173) | ||
395 | EMIT_RI8 (spe_cuflt, 0x1db, 155) | ||
396 | EMIT_RI8 (spe_cfltu, 0x1d9, 173) | ||
397 | EMIT_R (spe_frds, 0x3b9) | ||
398 | EMIT_R (spe_fesd, 0x3b8) | ||
399 | EMIT_RR (spe_dfceq, 0x3c3) | ||
400 | EMIT_RR (spe_dfcmeq, 0x3cb) | ||
401 | EMIT_RR (spe_dfcgt, 0x2c3) | ||
402 | EMIT_RR (spe_dfcmgt, 0x2cb) | ||
403 | EMIT_RI7 (spe_dftsv, 0x3bf) | ||
404 | EMIT_RR (spe_fceq, 0x3c2) | ||
405 | EMIT_RR (spe_fcmeq, 0x3ca) | ||
406 | EMIT_RR (spe_fcgt, 0x2c2) | ||
407 | EMIT_RR (spe_fcmgt, 0x2ca) | ||
408 | EMIT_R (spe_fscrwr, 0x3ba) | ||
409 | EMIT_ (spe_fscrrd, 0x398) | ||
410 | |||
411 | |||
412 | /* Channel instructions | ||
413 | */ | ||
414 | EMIT_R (spe_rdch, 0x00d) | ||
415 | EMIT_R (spe_rdchcnt, 0x00f) | ||
416 | EMIT_R (spe_wrch, 0x10d) | ||
417 | |||
418 | |||
419 | #ifdef UNDEF_EMIT_MACROS | ||
420 | #undef EMIT | ||
421 | #undef EMIT_ | ||
422 | #undef EMIT_R | ||
423 | #undef EMIT_RR | ||
424 | #undef EMIT_RRR | ||
425 | #undef EMIT_RI7 | ||
426 | #undef EMIT_RI8 | ||
427 | #undef EMIT_RI10 | ||
428 | #undef EMIT_RI10s | ||
429 | #undef EMIT_RI16 | ||
430 | #undef EMIT_RI18 | ||
431 | #undef EMIT_I16 | ||
432 | #undef UNDEF_EMIT_MACROS | ||
433 | #endif /* EMIT_ */ | ||
diff --git a/src/gallium/auxiliary/target-helpers/inline_sw_helper.h b/src/gallium/auxiliary/target-helpers/inline_sw_helper.h index 34bfa527db0..596c691e9c1 100644 --- a/src/gallium/auxiliary/target-helpers/inline_sw_helper.h +++ b/src/gallium/auxiliary/target-helpers/inline_sw_helper.h | |||
@@ -8,7 +8,7 @@ | |||
8 | 8 | ||
9 | 9 | ||
10 | /* Helper function to choose and instantiate one of the software rasterizers: | 10 | /* Helper function to choose and instantiate one of the software rasterizers: |
11 | * cell, llvmpipe, softpipe. | 11 | * llvmpipe, softpipe. |
12 | */ | 12 | */ |
13 | 13 | ||
14 | #ifdef GALLIUM_SOFTPIPE | 14 | #ifdef GALLIUM_SOFTPIPE |
@@ -19,21 +19,12 @@ | |||
19 | #include "llvmpipe/lp_public.h" | 19 | #include "llvmpipe/lp_public.h" |
20 | #endif | 20 | #endif |
21 | 21 | ||
22 | #ifdef GALLIUM_CELL | ||
23 | #include "cell/ppu/cell_public.h" | ||
24 | #endif | ||
25 | |||
26 | 22 | ||
27 | static INLINE struct pipe_screen * | 23 | static INLINE struct pipe_screen * |
28 | sw_screen_create_named(struct sw_winsys *winsys, const char *driver) | 24 | sw_screen_create_named(struct sw_winsys *winsys, const char *driver) |
29 | { | 25 | { |
30 | struct pipe_screen *screen = NULL; | 26 | struct pipe_screen *screen = NULL; |
31 | 27 | ||
32 | #if defined(GALLIUM_CELL) | ||
33 | if (screen == NULL && strcmp(driver, "cell") == 0) | ||
34 | screen = cell_create_screen(winsys); | ||
35 | #endif | ||
36 | |||
37 | #if defined(GALLIUM_LLVMPIPE) | 28 | #if defined(GALLIUM_LLVMPIPE) |
38 | if (screen == NULL && strcmp(driver, "llvmpipe") == 0) | 29 | if (screen == NULL && strcmp(driver, "llvmpipe") == 0) |
39 | screen = llvmpipe_create_screen(winsys); | 30 | screen = llvmpipe_create_screen(winsys); |
@@ -54,9 +45,7 @@ sw_screen_create(struct sw_winsys *winsys) | |||
54 | const char *default_driver; | 45 | const char *default_driver; |
55 | const char *driver; | 46 | const char *driver; |
56 | 47 | ||
57 | #if defined(GALLIUM_CELL) | 48 | #if defined(GALLIUM_LLVMPIPE) |
58 | default_driver = "cell"; | ||
59 | #elif defined(GALLIUM_LLVMPIPE) | ||
60 | default_driver = "llvmpipe"; | 49 | default_driver = "llvmpipe"; |
61 | #elif defined(GALLIUM_SOFTPIPE) | 50 | #elif defined(GALLIUM_SOFTPIPE) |
62 | default_driver = "softpipe"; | 51 | default_driver = "softpipe"; |
diff --git a/src/gallium/drivers/cell/Makefile b/src/gallium/drivers/cell/Makefile deleted file mode 100644 index 47aef7b05f6..00000000000 --- a/src/gallium/drivers/cell/Makefile +++ /dev/null | |||
@@ -1,12 +0,0 @@ | |||
1 | # Cell Gallium driver Makefile | ||
2 | |||
3 | |||
4 | default: | ||
5 | ( cd spu ; make ) | ||
6 | ( cd ppu ; make ) | ||
7 | |||
8 | |||
9 | |||
10 | clean: | ||
11 | ( cd spu ; make clean ) | ||
12 | ( cd ppu ; make clean ) | ||
diff --git a/src/gallium/drivers/cell/common.h b/src/gallium/drivers/cell/common.h deleted file mode 100644 index a8cdde34aa7..00000000000 --- a/src/gallium/drivers/cell/common.h +++ /dev/null | |||
@@ -1,377 +0,0 @@ | |||
1 | /************************************************************************** | ||
2 | * | ||
3 | * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas. | ||
4 | * All Rights Reserved. | ||
5 | * | ||
6 | * Permission is hereby granted, free of charge, to any person obtaining a | ||
7 | * copy of this software and associated documentation files (the | ||
8 | * "Software"), to deal in the Software without restriction, including | ||
9 | * without limitation the rights to use, copy, modify, merge, publish, | ||
10 | * distribute, sub license, and/or sell copies of the Software, and to | ||
11 | * permit persons to whom the Software is furnished to do so, subject to | ||
12 | * the following conditions: | ||
13 | * | ||
14 | * The above copyright notice and this permission notice (including the | ||
15 | * next paragraph) shall be included in all copies or substantial portions | ||
16 | * of the Software. | ||
17 | * | ||
18 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS | ||
19 | * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF | ||
20 | * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. | ||
21 | * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR | ||
22 | * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, | ||
23 | * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE | ||
24 | * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. | ||
25 | * | ||
26 | **************************************************************************/ | ||
27 | |||
28 | /** | ||
29 | * Types and tokens which are common to the SPU and PPU code. | ||
30 | */ | ||
31 | |||
32 | |||
33 | #ifndef CELL_COMMON_H | ||
34 | #define CELL_COMMON_H | ||
35 | |||
36 | #include "pipe/p_compiler.h" | ||
37 | #include "pipe/p_format.h" | ||
38 | #include "pipe/p_state.h" | ||
39 | #include <stdio.h> | ||
40 | |||
41 | /** The standard assert macro doesn't seem to work reliably */ | ||
42 | #define ASSERT(x) \ | ||
43 | if (!(x)) { \ | ||
44 | ubyte *p = NULL; \ | ||
45 | fprintf(stderr, "%s:%d: %s(): assertion %s failed.\n", \ | ||
46 | __FILE__, __LINE__, __FUNCTION__, #x); \ | ||
47 | *p = 0; \ | ||
48 | exit(1); \ | ||
49 | } | ||
50 | |||
51 | |||
52 | #define JOIN(x, y) JOIN_AGAIN(x, y) | ||
53 | #define JOIN_AGAIN(x, y) x ## y | ||
54 | |||
55 | #define STATIC_ASSERT(e) \ | ||
56 | {typedef char JOIN(assertion_failed_at_line_, __LINE__) [(e) ? 1 : -1];} | ||
57 | |||
58 | |||
59 | |||
60 | /** for sanity checking */ | ||
61 | #define ASSERT_ALIGN16(ptr) \ | ||
62 | ASSERT((((unsigned long) (ptr)) & 0xf) == 0); | ||
63 | |||
64 | |||
65 | /** round up value to next multiple of 4 */ | ||
66 | #define ROUNDUP4(k) (((k) + 0x3) & ~0x3) | ||
67 | |||
68 | /** round up value to next multiple of 8 */ | ||
69 | #define ROUNDUP8(k) (((k) + 0x7) & ~0x7) | ||
70 | |||
71 | /** round up value to next multiple of 16 */ | ||
72 | #define ROUNDUP16(k) (((k) + 0xf) & ~0xf) | ||
73 | |||
74 | |||
75 | #define CELL_MAX_SPUS 8 | ||
76 | |||
77 | #define CELL_MAX_SAMPLERS 4 | ||
78 | #define CELL_MAX_TEXTURE_LEVELS 12 /* 2k x 2k */ | ||
79 | #define CELL_MAX_CONSTANTS 32 /**< number of float[4] constants */ | ||
80 | #define CELL_MAX_WIDTH 1024 /**< max framebuffer width */ | ||
81 | #define CELL_MAX_HEIGHT 1024 /**< max framebuffer width */ | ||
82 | |||
83 | #define TILE_SIZE 32 | ||
84 | |||
85 | |||
86 | /** | ||
87 | * The low byte of a mailbox word contains the command opcode. | ||
88 | * Remaining higher bytes are command specific. | ||
89 | */ | ||
90 | #define CELL_CMD_OPCODE_MASK 0xff | ||
91 | |||
92 | #define CELL_CMD_EXIT 1 | ||
93 | #define CELL_CMD_CLEAR_SURFACE 2 | ||
94 | #define CELL_CMD_FINISH 3 | ||
95 | #define CELL_CMD_RENDER 4 | ||
96 | #define CELL_CMD_BATCH 5 | ||
97 | #define CELL_CMD_RELEASE_VERTS 6 | ||
98 | #define CELL_CMD_STATE_FRAMEBUFFER 10 | ||
99 | #define CELL_CMD_STATE_FRAGMENT_OPS 11 | ||
100 | #define CELL_CMD_STATE_SAMPLER 12 | ||
101 | #define CELL_CMD_STATE_TEXTURE 13 | ||
102 | #define CELL_CMD_STATE_VERTEX_INFO 14 | ||
103 | #define CELL_CMD_STATE_VIEWPORT 15 | ||
104 | #define CELL_CMD_STATE_UNIFORMS 16 | ||
105 | #define CELL_CMD_STATE_VS_ARRAY_INFO 17 | ||
106 | #define CELL_CMD_STATE_BIND_VS 18 | ||
107 | #define CELL_CMD_STATE_FRAGMENT_PROGRAM 19 | ||
108 | #define CELL_CMD_STATE_ATTRIB_FETCH 20 | ||
109 | #define CELL_CMD_STATE_FS_CONSTANTS 21 | ||
110 | #define CELL_CMD_STATE_RASTERIZER 22 | ||
111 | #define CELL_CMD_VS_EXECUTE 23 | ||
112 | #define CELL_CMD_FLUSH_BUFFER_RANGE 24 | ||
113 | #define CELL_CMD_FENCE 25 | ||
114 | |||
115 | |||
116 | /** Command/batch buffers */ | ||
117 | #define CELL_NUM_BUFFERS 4 | ||
118 | #define CELL_BUFFER_SIZE (4*1024) /**< 16KB would be the max */ | ||
119 | |||
120 | #define CELL_BUFFER_STATUS_FREE 10 | ||
121 | #define CELL_BUFFER_STATUS_USED 20 | ||
122 | |||
123 | /** Debug flags */ | ||
124 | #define CELL_DEBUG_CHECKER (1 << 0) | ||
125 | #define CELL_DEBUG_ASM (1 << 1) | ||
126 | #define CELL_DEBUG_SYNC (1 << 2) | ||
127 | #define CELL_DEBUG_FRAGMENT_OPS (1 << 3) | ||
128 | #define CELL_DEBUG_FRAGMENT_OP_FALLBACK (1 << 4) | ||
129 | #define CELL_DEBUG_CMD (1 << 5) | ||
130 | #define CELL_DEBUG_CACHE (1 << 6) | ||
131 | |||
132 | #define CELL_FENCE_IDLE 0 | ||
133 | #define CELL_FENCE_EMITTED 1 | ||
134 | #define CELL_FENCE_SIGNALLED 2 | ||
135 | |||
136 | #define CELL_FACING_FRONT 0 | ||
137 | #define CELL_FACING_BACK 1 | ||
138 | |||
139 | struct cell_fence | ||
140 | { | ||
141 | /** There's a 16-byte status qword per SPU */ | ||
142 | volatile uint status[CELL_MAX_SPUS][4]; | ||
143 | }; | ||
144 | |||
145 | #ifdef __SPU__ | ||
146 | typedef vector unsigned int opcode_t; | ||
147 | #else | ||
148 | typedef unsigned int opcode_t[4]; | ||
149 | #endif | ||
150 | |||
151 | /** | ||
152 | * Fence command sent to SPUs. In response, the SPUs will write | ||
153 | * CELL_FENCE_STATUS_SIGNALLED back to the fence status word in main memory. | ||
154 | */ | ||
155 | struct cell_command_fence | ||
156 | { | ||
157 | opcode_t opcode; /**< CELL_CMD_FENCE */ | ||
158 | struct cell_fence *fence; | ||
159 | uint32_t pad_[3]; | ||
160 | }; | ||
161 | |||
162 | |||
163 | /** | ||
164 | * Command to specify per-fragment operations state and generated code. | ||
165 | * Note that this is a variant-length structure, allocated with as | ||
166 | * much memory as needed to hold the generated code; the "code" | ||
167 | * field *must* be the last field in the structure. Also, the entire | ||
168 | * length of the structure (including the variant code field) must be | ||
169 | * a multiple of 8 bytes; we require that this structure itself be | ||
170 | * a multiple of 8 bytes, and that the generated code also be a multiple | ||
171 | * of 8 bytes. | ||
172 | * | ||
173 | * Also note that the dsa, blend, blend_color fields are really only needed | ||
174 | * for the fallback/C per-pixel code. They're not used when we generate | ||
175 | * dynamic SPU fragment code (which is the normal case), and will eventually | ||
176 | * be removed from this structure. | ||
177 | */ | ||
178 | struct cell_command_fragment_ops | ||
179 | { | ||
180 | opcode_t opcode; /**< CELL_CMD_STATE_FRAGMENT_OPS */ | ||
181 | |||
182 | /* Fields for the fallback case */ | ||
183 | struct pipe_depth_stencil_alpha_state dsa; | ||
184 | struct pipe_blend_state blend; | ||
185 | struct pipe_blend_color blend_color; | ||
186 | |||
187 | /* Fields for the generated SPU code */ | ||
188 | unsigned total_code_size; | ||
189 | unsigned front_code_index; | ||
190 | unsigned back_code_index; | ||
191 | /* this field has variant length, and must be the last field in | ||
192 | * the structure | ||
193 | */ | ||
194 | unsigned code[0]; | ||
195 | }; | ||
196 | |||
197 | |||
198 | /** Max instructions for fragment programs */ | ||
199 | #define SPU_MAX_FRAGMENT_PROGRAM_INSTS 512 | ||
200 | |||
201 | /** | ||
202 | * Command to send a fragment program to SPUs. | ||
203 | */ | ||
204 | struct cell_command_fragment_program | ||
205 | { | ||
206 | opcode_t opcode; /**< CELL_CMD_STATE_FRAGMENT_PROGRAM */ | ||
207 | uint num_inst; /**< Number of instructions */ | ||
208 | uint32_t pad[3]; | ||
209 | unsigned code[SPU_MAX_FRAGMENT_PROGRAM_INSTS]; | ||
210 | }; | ||
211 | |||
212 | |||
213 | /** | ||
214 | * Tell SPUs about the framebuffer size, location | ||
215 | */ | ||
216 | struct cell_command_framebuffer | ||
217 | { | ||
218 | opcode_t opcode; /**< CELL_CMD_STATE_FRAMEBUFFER */ | ||
219 | int width, height; | ||
220 | void *color_start, *depth_start; | ||
221 | enum pipe_format color_format, depth_format; | ||
222 | uint32_t pad_[2]; | ||
223 | }; | ||
224 | |||
225 | |||
226 | /** | ||
227 | * Tell SPUs about rasterizer state. | ||
228 | */ | ||
229 | struct cell_command_rasterizer | ||
230 | { | ||
231 | opcode_t opcode; /**< CELL_CMD_STATE_RASTERIZER */ | ||
232 | struct pipe_rasterizer_state rasterizer; | ||
233 | /*uint32_t pad[1];*/ | ||
234 | }; | ||
235 | |||
236 | |||
237 | /** | ||
238 | * Clear framebuffer to the given value/color. | ||
239 | */ | ||
240 | struct cell_command_clear_surface | ||
241 | { | ||
242 | opcode_t opcode; /**< CELL_CMD_CLEAR_SURFACE */ | ||
243 | uint surface; /**< Temporary: 0=color, 1=Z */ | ||
244 | uint value; | ||
245 | uint32_t pad[2]; | ||
246 | }; | ||
247 | |||
248 | |||
249 | /** | ||
250 | * Array info used by the vertex shader's vertex puller. | ||
251 | */ | ||
252 | struct cell_array_info | ||
253 | { | ||
254 | uint64_t base; /**< Base address of the 0th element. */ | ||
255 | uint attr; /**< Attribute that this state is for. */ | ||
256 | uint pitch; /**< Byte pitch from one entry to the next. */ | ||
257 | uint size; | ||
258 | uint function_offset; | ||
259 | }; | ||
260 | |||
261 | |||
262 | struct cell_attribute_fetch_code | ||
263 | { | ||
264 | uint64_t base; | ||
265 | uint size; | ||
266 | }; | ||
267 | |||
268 | |||
269 | struct cell_buffer_range | ||
270 | { | ||
271 | uint64_t base; | ||
272 | unsigned size; | ||
273 | }; | ||
274 | |||
275 | |||
276 | struct cell_shader_info | ||
277 | { | ||
278 | uint64_t declarations; | ||
279 | uint64_t instructions; | ||
280 | uint64_t immediates; | ||
281 | |||
282 | unsigned num_outputs; | ||
283 | unsigned num_declarations; | ||
284 | unsigned num_instructions; | ||
285 | unsigned num_immediates; | ||
286 | }; | ||
287 | |||
288 | |||
289 | #define SPU_VERTS_PER_BATCH 64 | ||
290 | struct cell_command_vs | ||
291 | { | ||
292 | opcode_t opcode; /**< CELL_CMD_VS_EXECUTE */ | ||
293 | uint64_t vOut[SPU_VERTS_PER_BATCH]; | ||
294 | unsigned num_elts; | ||
295 | unsigned elts[SPU_VERTS_PER_BATCH]; | ||
296 | float plane[12][4]; | ||
297 | unsigned nr_planes; | ||
298 | unsigned nr_attrs; | ||
299 | }; | ||
300 | |||
301 | |||
302 | struct cell_command_render | ||
303 | { | ||
304 | opcode_t opcode; /**< CELL_CMD_RENDER */ | ||
305 | uint prim_type; /**< PIPE_PRIM_x */ | ||
306 | uint num_verts; | ||
307 | uint vertex_size; /**< bytes per vertex */ | ||
308 | uint num_indexes; | ||
309 | uint vertex_buf; /**< which cell->buffer[] contains the vertex data */ | ||
310 | float xmin, ymin, xmax, ymax; /* XXX another dummy field */ | ||
311 | uint min_index; | ||
312 | boolean inline_verts; | ||
313 | uint32_t pad_[1]; | ||
314 | }; | ||
315 | |||
316 | |||
317 | struct cell_command_release_verts | ||
318 | { | ||
319 | opcode_t opcode; /**< CELL_CMD_RELEASE_VERTS */ | ||
320 | uint vertex_buf; /**< in [0, CELL_NUM_BUFFERS-1] */ | ||
321 | uint32_t pad_[3]; | ||
322 | }; | ||
323 | |||
324 | |||
325 | struct cell_command_sampler | ||
326 | { | ||
327 | opcode_t opcode; /**< CELL_CMD_STATE_SAMPLER */ | ||
328 | uint unit; | ||
329 | struct pipe_sampler_state state; | ||
330 | uint32_t pad_[3]; | ||
331 | }; | ||
332 | |||
333 | |||
334 | struct cell_command_texture | ||
335 | { | ||
336 | opcode_t opcode; /**< CELL_CMD_STATE_TEXTURE */ | ||
337 | uint target; /**< PIPE_TEXTURE_x */ | ||
338 | uint unit; | ||
339 | void *start[CELL_MAX_TEXTURE_LEVELS]; /**< Address in main memory */ | ||
340 | ushort width[CELL_MAX_TEXTURE_LEVELS]; | ||
341 | ushort height[CELL_MAX_TEXTURE_LEVELS]; | ||
342 | ushort depth[CELL_MAX_TEXTURE_LEVELS]; | ||
343 | }; | ||
344 | |||
345 | |||
346 | #define MAX_SPU_FUNCTIONS 12 | ||
347 | /** | ||
348 | * Used to tell the PPU about the address of particular functions in the | ||
349 | * SPU's address space. | ||
350 | */ | ||
351 | struct cell_spu_function_info | ||
352 | { | ||
353 | uint num; | ||
354 | char names[MAX_SPU_FUNCTIONS][16]; | ||
355 | uint addrs[MAX_SPU_FUNCTIONS]; | ||
356 | char pad[12]; /**< Pad struct to multiple of 16 bytes (256 currently) */ | ||
357 | }; | ||
358 | |||
359 | |||
360 | /** This is the object passed to spe_create_thread() */ | ||
361 | PIPE_ALIGN_TYPE(16, | ||
362 | struct cell_init_info | ||
363 | { | ||
364 | unsigned id; | ||
365 | unsigned num_spus; | ||
366 | unsigned debug_flags; /**< mask of CELL_DEBUG_x flags */ | ||
367 | float inv_timebase; /**< 1.0/timebase, for perf measurement */ | ||
368 | |||
369 | /** Buffers for command batches, vertex/index data */ | ||
370 | ubyte *buffers[CELL_NUM_BUFFERS]; | ||
371 | uint *buffer_status; /**< points at cell_context->buffer_status */ | ||
372 | |||
373 | struct cell_spu_function_info *spu_functions; | ||
374 | }); | ||
375 | |||
376 | |||
377 | #endif /* CELL_COMMON_H */ | ||
diff --git a/src/gallium/drivers/cell/ppu/Makefile b/src/gallium/drivers/cell/ppu/Makefile deleted file mode 100644 index c92f8e5cba2..00000000000 --- a/src/gallium/drivers/cell/ppu/Makefile +++ /dev/null | |||
@@ -1,86 +0,0 @@ | |||
1 | # Gallium3D Cell driver: PPU code | ||
2 | |||
3 | # This makefile builds the libcell.a library which gets pulled into | ||
4 | # the main libGL.so library | ||
5 | |||
6 | |||
7 | TOP = ../../../../.. | ||
8 | include $(TOP)/configs/current | ||
9 | |||
10 | |||
11 | # This is the "top-level" cell PPU driver code, will get pulled into libGL.so | ||
12 | # by the winsys Makefile. | ||
13 | CELL_LIB = ../libcell.a | ||
14 | |||
15 | |||
16 | # This is the SPU code. We'd like to be able to put this into the libcell.a | ||
17 | # archive with the PPU code, but nesting .a libs doesn't seem to work. | ||
18 | # So, it's pulled into libGL.so in gallium/winsys/xlib/Makefile | ||
19 | SPU_CODE_MODULE = ../spu/g3d_spu.a | ||
20 | |||
21 | |||
22 | SOURCES = \ | ||
23 | cell_batch.c \ | ||
24 | cell_clear.c \ | ||
25 | cell_context.c \ | ||
26 | cell_draw_arrays.c \ | ||
27 | cell_fence.c \ | ||
28 | cell_flush.c \ | ||
29 | cell_gen_fragment.c \ | ||
30 | cell_gen_fp.c \ | ||
31 | cell_state_derived.c \ | ||
32 | cell_state_emit.c \ | ||
33 | cell_state_shader.c \ | ||
34 | cell_pipe_state.c \ | ||
35 | cell_screen.c \ | ||
36 | cell_state_vertex.c \ | ||
37 | cell_spu.c \ | ||
38 | cell_surface.c \ | ||
39 | cell_texture.c \ | ||
40 | cell_vbuf.c \ | ||
41 | cell_vertex_fetch.c \ | ||
42 | cell_vertex_shader.c | ||
43 | |||
44 | |||
45 | OBJECTS = $(SOURCES:.c=.o) \ | ||
46 | |||
47 | INCLUDE_DIRS = \ | ||
48 | -I$(TOP)/src/mesa \ | ||
49 | -I$(TOP)/src/gallium/include \ | ||
50 | -I$(TOP)/src/gallium/auxiliary \ | ||
51 | -I$(TOP)/src/gallium/drivers | ||
52 | |||
53 | .c.o: | ||
54 | $(CC) -c $(INCLUDE_DIRS) $(CFLAGS) $< -o $@ | ||
55 | |||
56 | |||
57 | .c.s: | ||
58 | $(CC) -S $(INCLUDE_DIRS) $(CFLAGS) $< -o $@ | ||
59 | |||
60 | |||
61 | default: $(CELL_LIB) | ||
62 | |||
63 | |||
64 | $(CELL_LIB): $(OBJECTS) $(SPU_CODE_MODULE) | ||
65 | # ar -ru $(CELL_LIB) $(OBJECTS) $(SPU_CODE_MODULE) # doesn't work | ||
66 | ar -ru $(CELL_LIB) $(OBJECTS) | ||
67 | |||
68 | #$(PROG): $(PPU_OBJECTS) | ||
69 | # $(CC) -o $(PROG) $(PPU_OBJECTS) $(SPU_CODE_MODULE) $(PPU_LFLAGS) | ||
70 | |||
71 | |||
72 | |||
73 | clean: | ||
74 | rm -f *.o *~ $(CELL_LIB) | ||
75 | |||
76 | |||
77 | |||
78 | depend: $(SOURCES) | ||
79 | rm -f depend | ||
80 | touch depend | ||
81 | $(MKDEP) $(MKDEP_OPTIONS) $(INCLUDE_DIRS) $(SOURCES) 2> /dev/null | ||
82 | |||
83 | include depend | ||
84 | |||
85 | |||
86 | |||
diff --git a/src/gallium/drivers/cell/ppu/cell_batch.c b/src/gallium/drivers/cell/ppu/cell_batch.c deleted file mode 100644 index fe144f8b849..00000000000 --- a/src/gallium/drivers/cell/ppu/cell_batch.c +++ /dev/null | |||
@@ -1,260 +0,0 @@ | |||
1 | /************************************************************************** | ||
2 | * | ||
3 | * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas. | ||
4 | * All Rights Reserved. | ||
5 | * | ||
6 | * Permission is hereby granted, free of charge, to any person obtaining a | ||
7 | * copy of this software and associated documentation files (the | ||
8 | * "Software"), to deal in the Software without restriction, including | ||
9 | * without limitation the rights to use, copy, modify, merge, publish, | ||
10 | * distribute, sub license, and/or sell copies of the Software, and to | ||
11 | * permit persons to whom the Software is furnished to do so, subject to | ||
12 | * the following conditions: | ||
13 | * | ||
14 | * The above copyright notice and this permission notice (including the | ||
15 | * next paragraph) shall be included in all copies or substantial portions | ||
16 | * of the Software. | ||
17 | * | ||
18 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS | ||
19 | * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF | ||
20 | * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. | ||
21 | * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR | ||
22 | * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, | ||
23 | * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE | ||
24 | * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. | ||
25 | * | ||
26 | **************************************************************************/ | ||
27 | |||
28 | |||
29 | #include "cell_context.h" | ||
30 | #include "cell_batch.h" | ||
31 | #include "cell_fence.h" | ||
32 | #include "cell_spu.h" | ||
33 | |||
34 | |||
35 | |||
36 | /** | ||
37 | * Search the buffer pool for an empty/free buffer and return its index. | ||
38 | * Buffers are used for storing vertex data, state and commands which | ||
39 | * will be sent to the SPUs. | ||
40 | * If no empty buffers are available, wait for one. | ||
41 | * \return buffer index in [0, CELL_NUM_BUFFERS-1] | ||
42 | */ | ||
43 | uint | ||
44 | cell_get_empty_buffer(struct cell_context *cell) | ||
45 | { | ||
46 | static uint prev_buffer = 0; | ||
47 | uint buf = (prev_buffer + 1) % CELL_NUM_BUFFERS; | ||
48 | uint tries = 0; | ||
49 | |||
50 | /* Find a buffer that's marked as free by all SPUs */ | ||
51 | while (1) { | ||
52 | uint spu, num_free = 0; | ||
53 | |||
54 | for (spu = 0; spu < cell->num_spus; spu++) { | ||
55 | if (cell->buffer_status[spu][buf][0] == CELL_BUFFER_STATUS_FREE) { | ||
56 | num_free++; | ||
57 | |||
58 | if (num_free == cell->num_spus) { | ||
59 | /* found a free buffer, now mark status as used */ | ||
60 | for (spu = 0; spu < cell->num_spus; spu++) { | ||
61 | cell->buffer_status[spu][buf][0] = CELL_BUFFER_STATUS_USED; | ||
62 | } | ||
63 | /* | ||
64 | printf("PPU: ALLOC BUFFER %u, %u tries\n", buf, tries); | ||
65 | */ | ||
66 | prev_buffer = buf; | ||
67 | |||
68 | /* release tex buffer associated w/ prev use of this batch buf */ | ||
69 | cell_free_fenced_buffers(cell, &cell->fenced_buffers[buf]); | ||
70 | |||
71 | return buf; | ||
72 | } | ||
73 | } | ||
74 | else { | ||
75 | break; | ||
76 | } | ||
77 | } | ||
78 | |||
79 | /* try next buf */ | ||
80 | buf = (buf + 1) % CELL_NUM_BUFFERS; | ||
81 | |||
82 | tries++; | ||
83 | if (tries == 100) { | ||
84 | /* | ||
85 | printf("PPU WAITING for buffer...\n"); | ||
86 | */ | ||
87 | } | ||
88 | } | ||
89 | } | ||
90 | |||
91 | |||
92 | /** | ||
93 | * Append a fence command to the current batch buffer. | ||
94 | * Note that we're sure there's always room for this because of the | ||
95 | * adjusted size check in cell_batch_free_space(). | ||
96 | */ | ||
97 | static void | ||
98 | emit_fence(struct cell_context *cell) | ||
99 | { | ||
100 | const uint batch = cell->cur_batch; | ||
101 | const uint size = cell->buffer_size[batch]; | ||
102 | struct cell_command_fence *fence_cmd; | ||
103 | struct cell_fence *fence = &cell->fenced_buffers[batch].fence; | ||
104 | uint i; | ||
105 | |||
106 | /* set fence status to emitted, not yet signalled */ | ||
107 | for (i = 0; i < cell->num_spus; i++) { | ||
108 | fence->status[i][0] = CELL_FENCE_EMITTED; | ||
109 | } | ||
110 | |||
111 | STATIC_ASSERT(sizeof(struct cell_command_fence) % 16 == 0); | ||
112 | ASSERT(size % 16 == 0); | ||
113 | ASSERT(size + sizeof(struct cell_command_fence) <= CELL_BUFFER_SIZE); | ||
114 | |||
115 | fence_cmd = (struct cell_command_fence *) (cell->buffer[batch] + size); | ||
116 | fence_cmd->opcode[0] = CELL_CMD_FENCE; | ||
117 | fence_cmd->fence = fence; | ||
118 | |||
119 | /* update batch buffer size */ | ||
120 | cell->buffer_size[batch] = size + sizeof(struct cell_command_fence); | ||
121 | } | ||
122 | |||
123 | |||
124 | /** | ||
125 | * Flush the current batch buffer to the SPUs. | ||
126 | * An empty buffer will be found and set as the new current batch buffer | ||
127 | * for subsequent commands/data. | ||
128 | */ | ||
129 | void | ||
130 | cell_batch_flush(struct cell_context *cell) | ||
131 | { | ||
132 | static boolean flushing = FALSE; | ||
133 | uint batch = cell->cur_batch; | ||
134 | uint size = cell->buffer_size[batch]; | ||
135 | uint spu, cmd_word; | ||
136 | |||
137 | assert(!flushing); | ||
138 | |||
139 | if (size == 0) | ||
140 | return; | ||
141 | |||
142 | /* Before we use this batch buffer, make sure any fenced texture buffers | ||
143 | * are released. | ||
144 | */ | ||
145 | if (cell->fenced_buffers[batch].head) { | ||
146 | emit_fence(cell); | ||
147 | size = cell->buffer_size[batch]; | ||
148 | } | ||
149 | |||
150 | flushing = TRUE; | ||
151 | |||
152 | assert(batch < CELL_NUM_BUFFERS); | ||
153 | |||
154 | /* | ||
155 | printf("cell_batch_dispatch: buf %u at %p, size %u\n", | ||
156 | batch, &cell->buffer[batch][0], size); | ||
157 | */ | ||
158 | |||
159 | /* | ||
160 | * Build "BATCH" command and send to all SPUs. | ||
161 | */ | ||
162 | cmd_word = CELL_CMD_BATCH | (batch << 8) | (size << 16); | ||
163 | |||
164 | for (spu = 0; spu < cell->num_spus; spu++) { | ||
165 | assert(cell->buffer_status[spu][batch][0] == CELL_BUFFER_STATUS_USED); | ||
166 | send_mbox_message(cell_global.spe_contexts[spu], cmd_word); | ||
167 | } | ||
168 | |||
169 | /* When the SPUs are done copying the buffer into their locals stores | ||
170 | * they'll write a BUFFER_STATUS_FREE message into the buffer_status[] | ||
171 | * array indicating that the PPU can re-use the buffer. | ||
172 | */ | ||
173 | |||
174 | batch = cell_get_empty_buffer(cell); | ||
175 | |||
176 | cell->buffer_size[batch] = 0; /* empty */ | ||
177 | cell->cur_batch = batch; | ||
178 | |||
179 | flushing = FALSE; | ||
180 | } | ||
181 | |||
182 | |||
183 | /** | ||
184 | * Return the number of bytes free in the current batch buffer. | ||
185 | */ | ||
186 | uint | ||
187 | cell_batch_free_space(const struct cell_context *cell) | ||
188 | { | ||
189 | uint free = CELL_BUFFER_SIZE - cell->buffer_size[cell->cur_batch]; | ||
190 | free -= sizeof(struct cell_command_fence); | ||
191 | return free; | ||
192 | } | ||
193 | |||
194 | |||
195 | /** | ||
196 | * Allocate space in the current batch buffer for 'bytes' space. | ||
197 | * Bytes must be a multiple of 16 bytes. Allocation will be 16 byte aligned. | ||
198 | * \return address in batch buffer to put data | ||
199 | */ | ||
200 | void * | ||
201 | cell_batch_alloc16(struct cell_context *cell, uint bytes) | ||
202 | { | ||
203 | void *pos; | ||
204 | uint size; | ||
205 | |||
206 | ASSERT(bytes % 16 == 0); | ||
207 | ASSERT(bytes <= CELL_BUFFER_SIZE); | ||
208 | ASSERT(cell->cur_batch >= 0); | ||
209 | |||
210 | #ifdef ASSERT | ||
211 | { | ||
212 | uint spu; | ||
213 | for (spu = 0; spu < cell->num_spus; spu++) { | ||
214 | ASSERT(cell->buffer_status[spu][cell->cur_batch][0] | ||
215 | == CELL_BUFFER_STATUS_USED); | ||
216 | } | ||
217 | } | ||
218 | #endif | ||
219 | |||
220 | size = cell->buffer_size[cell->cur_batch]; | ||
221 | |||
222 | if (bytes > cell_batch_free_space(cell)) { | ||
223 | cell_batch_flush(cell); | ||
224 | size = 0; | ||
225 | } | ||
226 | |||
227 | ASSERT(size % 16 == 0); | ||
228 | ASSERT(size + bytes <= CELL_BUFFER_SIZE); | ||
229 | |||
230 | pos = (void *) (cell->buffer[cell->cur_batch] + size); | ||
231 | |||
232 | cell->buffer_size[cell->cur_batch] = size + bytes; | ||
233 | |||
234 | return pos; | ||
235 | } | ||
236 | |||
237 | |||
238 | /** | ||
239 | * One-time init of batch buffers. | ||
240 | */ | ||
241 | void | ||
242 | cell_init_batch_buffers(struct cell_context *cell) | ||
243 | { | ||
244 | uint spu, buf; | ||
245 | |||
246 | /* init command, vertex/index buffer info */ | ||
247 | for (buf = 0; buf < CELL_NUM_BUFFERS; buf++) { | ||
248 | cell->buffer_size[buf] = 0; | ||
249 | |||
250 | /* init batch buffer status values, | ||
251 | * mark 0th buffer as used, rest as free. | ||
252 | */ | ||
253 | for (spu = 0; spu < cell->num_spus; spu++) { | ||
254 | if (buf == 0) | ||
255 | cell->buffer_status[spu][buf][0] = CELL_BUFFER_STATUS_USED; | ||
256 | else | ||
257 | cell->buffer_status[spu][buf][0] = CELL_BUFFER_STATUS_FREE; | ||
258 | } | ||
259 | } | ||
260 | } | ||
diff --git a/src/gallium/drivers/cell/ppu/cell_batch.h b/src/gallium/drivers/cell/ppu/cell_batch.h deleted file mode 100644 index 290136031a1..00000000000 --- a/src/gallium/drivers/cell/ppu/cell_batch.h +++ /dev/null | |||
@@ -1,54 +0,0 @@ | |||
1 | /************************************************************************** | ||
2 | * | ||
3 | * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas. | ||
4 | * All Rights Reserved. | ||
5 | * | ||
6 | * Permission is hereby granted, free of charge, to any person obtaining a | ||
7 | * copy of this software and associated documentation files (the | ||
8 | * "Software"), to deal in the Software without restriction, including | ||
9 | * without limitation the rights to use, copy, modify, merge, publish, | ||
10 | * distribute, sub license, and/or sell copies of the Software, and to | ||
11 | * permit persons to whom the Software is furnished to do so, subject to | ||
12 | * the following conditions: | ||
13 | * | ||
14 | * The above copyright notice and this permission notice (including the | ||
15 | * next paragraph) shall be included in all copies or substantial portions | ||
16 | * of the Software. | ||
17 | * | ||
18 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS | ||
19 | * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF | ||
20 | * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. | ||
21 | * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR | ||
22 | * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, | ||
23 | * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE | ||
24 | * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. | ||
25 | * | ||
26 | **************************************************************************/ | ||
27 | |||
28 | |||
29 | #ifndef CELL_BATCH_H | ||
30 | #define CELL_BATCH_H | ||
31 | |||
32 | #include "pipe/p_compiler.h" | ||
33 | |||
34 | |||
35 | struct cell_context; | ||
36 | |||
37 | |||
38 | extern uint | ||
39 | cell_get_empty_buffer(struct cell_context *cell); | ||
40 | |||
41 | extern void | ||
42 | cell_batch_flush(struct cell_context *cell); | ||
43 | |||
44 | extern uint | ||
45 | cell_batch_free_space(const struct cell_context *cell); | ||
46 | |||
47 | extern void * | ||
48 | cell_batch_alloc16(struct cell_context *cell, uint bytes); | ||
49 | |||
50 | extern void | ||
51 | cell_init_batch_buffers(struct cell_context *cell); | ||
52 | |||
53 | |||
54 | #endif /* CELL_BATCH_H */ | ||
diff --git a/src/gallium/drivers/cell/ppu/cell_clear.c b/src/gallium/drivers/cell/ppu/cell_clear.c deleted file mode 100644 index 6a525ef4e41..00000000000 --- a/src/gallium/drivers/cell/ppu/cell_clear.c +++ /dev/null | |||
@@ -1,93 +0,0 @@ | |||
1 | /************************************************************************** | ||
2 | * | ||
3 | * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas. | ||
4 | * All Rights Reserved. | ||
5 | * | ||
6 | * Permission is hereby granted, free of charge, to any person obtaining a | ||
7 | * copy of this software and associated documentation files (the | ||
8 | * "Software"), to deal in the Software without restriction, including | ||
9 | * without limitation the rights to use, copy, modify, merge, publish, | ||
10 | * distribute, sub license, and/or sell copies of the Software, and to | ||
11 | * permit persons to whom the Software is furnished to do so, subject to | ||
12 | * the following conditions: | ||
13 | * | ||
14 | * The above copyright notice and this permission notice (including the | ||
15 | * next paragraph) shall be included in all copies or substantial portions | ||
16 | * of the Software. | ||
17 | * | ||
18 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS | ||
19 | * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF | ||
20 | * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. | ||
21 | * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR | ||
22 | * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, | ||
23 | * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE | ||
24 | * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. | ||
25 | * | ||
26 | **************************************************************************/ | ||
27 | |||
28 | /** | ||
29 | * Authors | ||
30 | * Brian Paul | ||
31 | */ | ||
32 | |||
33 | #include <stdio.h> | ||
34 | #include <assert.h> | ||
35 | #include <stdint.h> | ||
36 | #include "util/u_inlines.h" | ||
37 | #include "util/u_memory.h" | ||
38 | #include "util/u_pack_color.h" | ||
39 | #include "cell/common.h" | ||
40 | #include "cell_clear.h" | ||
41 | #include "cell_context.h" | ||
42 | #include "cell_batch.h" | ||
43 | #include "cell_flush.h" | ||
44 | #include "cell_spu.h" | ||
45 | #include "cell_state.h" | ||
46 | |||
47 | |||
48 | /** | ||
49 | * Called via pipe->clear() | ||
50 | */ | ||
51 | void | ||
52 | cell_clear(struct pipe_context *pipe, unsigned buffers, | ||
53 | const pipe_color_union *color, | ||
54 | double depth, unsigned stencil) | ||
55 | { | ||
56 | struct cell_context *cell = cell_context(pipe); | ||
57 | |||
58 | if (cell->dirty) | ||
59 | cell_update_derived(cell); | ||
60 | |||
61 | if (buffers & PIPE_CLEAR_COLOR) { | ||
62 | uint surfIndex = 0; | ||
63 | union util_color uc; | ||
64 | |||
65 | util_pack_color(color->f, cell->framebuffer.cbufs[0]->format, &uc); | ||
66 | |||
67 | /* Build a CLEAR command and place it in the current batch buffer */ | ||
68 | STATIC_ASSERT(sizeof(struct cell_command_clear_surface) % 16 == 0); | ||
69 | struct cell_command_clear_surface *clr | ||
70 | = (struct cell_command_clear_surface *) | ||
71 | cell_batch_alloc16(cell, sizeof(*clr)); | ||
72 | clr->opcode[0] = CELL_CMD_CLEAR_SURFACE; | ||
73 | clr->surface = surfIndex; | ||
74 | clr->value = uc.ui; | ||
75 | } | ||
76 | |||
77 | if (buffers & PIPE_CLEAR_DEPTHSTENCIL) { | ||
78 | uint surfIndex = 1; | ||
79 | uint clearValue; | ||
80 | |||
81 | clearValue = util_pack_z_stencil(cell->framebuffer.zsbuf->format, | ||
82 | depth, stencil); | ||
83 | |||
84 | /* Build a CLEAR command and place it in the current batch buffer */ | ||
85 | STATIC_ASSERT(sizeof(struct cell_command_clear_surface) % 16 == 0); | ||
86 | struct cell_command_clear_surface *clr | ||
87 | = (struct cell_command_clear_surface *) | ||
88 | cell_batch_alloc16(cell, sizeof(*clr)); | ||
89 | clr->opcode[0] = CELL_CMD_CLEAR_SURFACE; | ||
90 | clr->surface = surfIndex; | ||
91 | clr->value = clearValue; | ||
92 | } | ||
93 | } | ||
diff --git a/src/gallium/drivers/cell/ppu/cell_clear.h b/src/gallium/drivers/cell/ppu/cell_clear.h deleted file mode 100644 index a365feb0f00..00000000000 --- a/src/gallium/drivers/cell/ppu/cell_clear.h +++ /dev/null | |||
@@ -1,42 +0,0 @@ | |||
1 | /************************************************************************** | ||
2 | * | ||
3 | * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas. | ||
4 | * All Rights Reserved. | ||
5 | * | ||
6 | * Permission is hereby granted, free of charge, to any person obtaining a | ||
7 | * copy of this software and associated documentation files (the | ||
8 | * "Software"), to deal in the Software without restriction, including | ||
9 | * without limitation the rights to use, copy, modify, merge, publish, | ||
10 | * distribute, sub license, and/or sell copies of the Software, and to | ||
11 | * permit persons to whom the Software is furnished to do so, subject to | ||
12 | * the following conditions: | ||
13 | * | ||
14 | * The above copyright notice and this permission notice (including the | ||
15 | * next paragraph) shall be included in all copies or substantial portions | ||
16 | * of the Software. | ||
17 | * | ||
18 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS | ||
19 | * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF | ||
20 | * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. | ||
21 | * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR | ||
22 | * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, | ||
23 | * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE | ||
24 | * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. | ||
25 | * | ||
26 | **************************************************************************/ | ||
27 | |||
28 | |||
29 | #ifndef CELL_CLEAR_H | ||
30 | #define CELL_CLEAR_H | ||
31 | |||
32 | |||
33 | struct pipe_context; | ||
34 | |||
35 | |||
36 | extern void | ||
37 | cell_clear(struct pipe_context *pipe, unsigned buffers, | ||
38 | const union pipe_color_union *color, | ||
39 | double depth, unsigned stencil); | ||
40 | |||
41 | |||
42 | #endif /* CELL_CLEAR_H */ | ||
diff --git a/src/gallium/drivers/cell/ppu/cell_context.c b/src/gallium/drivers/cell/ppu/cell_context.c deleted file mode 100644 index 58e647a39fa..00000000000 --- a/src/gallium/drivers/cell/ppu/cell_context.c +++ /dev/null | |||
@@ -1,190 +0,0 @@ | |||
1 | /************************************************************************** | ||
2 | * | ||
3 | * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas. | ||
4 | * All Rights Reserved. | ||
5 | * | ||
6 | * Permission is hereby granted, free of charge, to any person obtaining a | ||
7 | * copy of this software and associated documentation files (the | ||
8 | * "Software"), to deal in the Software without restriction, including | ||
9 | * without limitation the rights to use, copy, modify, merge, publish, | ||
10 | * distribute, sub license, and/or sell copies of the Software, and to | ||
11 | * permit persons to whom the Software is furnished to do so, subject to | ||
12 | * the following conditions: | ||
13 | * | ||
14 | * The above copyright notice and this permission notice (including the | ||
15 | * next paragraph) shall be included in all copies or substantial portions | ||
16 | * of the Software. | ||
17 | * | ||
18 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS | ||
19 | * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF | ||
20 | * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. | ||
21 | * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR | ||
22 | * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, | ||
23 | * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE | ||
24 | * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. | ||
25 | * | ||
26 | **************************************************************************/ | ||
27 | |||
28 | /** | ||
29 | * Authors | ||
30 | * Brian Paul | ||
31 | */ | ||
32 | |||
33 | |||
34 | #include <stdio.h> | ||
35 | |||
36 | #include "pipe/p_defines.h" | ||
37 | #include "pipe/p_format.h" | ||
38 | #include "util/u_memory.h" | ||
39 | #include "pipe/p_screen.h" | ||
40 | #include "util/u_inlines.h" | ||
41 | |||
42 | #include "draw/draw_context.h" | ||
43 | #include "draw/draw_private.h" | ||
44 | |||
45 | #include "cell/common.h" | ||
46 | #include "cell_batch.h" | ||
47 | #include "cell_clear.h" | ||
48 | #include "cell_context.h" | ||
49 | #include "cell_draw_arrays.h" | ||
50 | #include "cell_fence.h" | ||
51 | #include "cell_flush.h" | ||
52 | #include "cell_state.h" | ||
53 | #include "cell_surface.h" | ||
54 | #include "cell_spu.h" | ||
55 | #include "cell_pipe_state.h" | ||
56 | #include "cell_texture.h" | ||
57 | #include "cell_vbuf.h" | ||
58 | |||
59 | |||
60 | |||
61 | static void | ||
62 | cell_destroy_context( struct pipe_context *pipe ) | ||
63 | { | ||
64 | struct cell_context *cell = cell_context(pipe); | ||
65 | unsigned i; | ||
66 | |||
67 | for (i = 0; i < cell->num_vertex_buffers; i++) { | ||
68 | pipe_resource_reference(&cell->vertex_buffer[i].buffer, NULL); | ||
69 | } | ||
70 | |||
71 | util_delete_keymap(cell->fragment_ops_cache, NULL); | ||
72 | |||
73 | cell_spu_exit(cell); | ||
74 | |||
75 | align_free(cell); | ||
76 | } | ||
77 | |||
78 | |||
79 | static struct draw_context * | ||
80 | cell_draw_create(struct cell_context *cell) | ||
81 | { | ||
82 | struct draw_context *draw = draw_create(&cell->pipe); | ||
83 | |||
84 | #if 0 /* broken */ | ||
85 | if (getenv("GALLIUM_CELL_VS")) { | ||
86 | /* plug in SPU-based vertex transformation code */ | ||
87 | draw->shader_queue_flush = cell_vertex_shader_queue_flush; | ||
88 | draw->driver_private = cell; | ||
89 | } | ||
90 | #endif | ||
91 | |||
92 | return draw; | ||
93 | } | ||
94 | |||
95 | |||
96 | static const struct debug_named_value cell_debug_flags[] = { | ||
97 | {"checker", CELL_DEBUG_CHECKER, NULL},/**< modulate tile clear color by SPU ID */ | ||
98 | {"asm", CELL_DEBUG_ASM, NULL}, /**< dump SPU asm code */ | ||
99 | {"sync", CELL_DEBUG_SYNC, NULL}, /**< SPUs do synchronous DMA */ | ||
100 | {"fragops", CELL_DEBUG_FRAGMENT_OPS, NULL}, /**< SPUs emit fragment ops debug messages*/ | ||
101 | {"fragopfallback", CELL_DEBUG_FRAGMENT_OP_FALLBACK, NULL}, /**< SPUs use reference implementation for fragment ops*/ | ||
102 | {"cmd", CELL_DEBUG_CMD, NULL}, /**< SPUs dump command buffer info */ | ||
103 | {"cache", CELL_DEBUG_CACHE, NULL}, /**< report texture cache stats on exit */ | ||
104 | DEBUG_NAMED_VALUE_END | ||
105 | }; | ||
106 | |||
107 | |||
108 | struct pipe_context * | ||
109 | cell_create_context(struct pipe_screen *screen, | ||
110 | void *priv ) | ||
111 | { | ||
112 | struct cell_context *cell; | ||
113 | uint i; | ||
114 | |||
115 | /* some fields need to be 16-byte aligned, so align the whole object */ | ||
116 | cell = (struct cell_context*) align_malloc(sizeof(struct cell_context), 16); | ||
117 | if (!cell) | ||
118 | return NULL; | ||
119 | |||
120 | memset(cell, 0, sizeof(*cell)); | ||
121 | |||
122 | cell->winsys = NULL; /* XXX: fixme - get this from screen? */ | ||
123 | cell->pipe.winsys = NULL; | ||
124 | cell->pipe.screen = screen; | ||
125 | cell->pipe.priv = priv; | ||
126 | cell->pipe.destroy = cell_destroy_context; | ||
127 | |||
128 | cell->pipe.clear = cell_clear; | ||
129 | cell->pipe.flush = cell_flush; | ||
130 | |||
131 | #if 0 | ||
132 | cell->pipe.begin_query = cell_begin_query; | ||
133 | cell->pipe.end_query = cell_end_query; | ||
134 | cell->pipe.wait_query = cell_wait_query; | ||
135 | #endif | ||
136 | |||
137 | cell_init_draw_functions(cell); | ||
138 | cell_init_state_functions(cell); | ||
139 | cell_init_shader_functions(cell); | ||
140 | cell_init_surface_functions(cell); | ||
141 | cell_init_vertex_functions(cell); | ||
142 | cell_init_texture_transfer_funcs(cell); | ||
143 | |||
144 | cell->draw = cell_draw_create(cell); | ||
145 | |||
146 | /* Create cache of fragment ops generated code */ | ||
147 | cell->fragment_ops_cache = | ||
148 | util_new_keymap(sizeof(struct cell_fragment_ops_key), ~0, NULL); | ||
149 | |||
150 | cell_init_vbuf(cell); | ||
151 | |||
152 | draw_set_rasterize_stage(cell->draw, cell->vbuf); | ||
153 | |||
154 | /* convert all points/lines to tris for the time being */ | ||
155 | draw_wide_point_threshold(cell->draw, 0.0); | ||
156 | draw_wide_line_threshold(cell->draw, 0.0); | ||
157 | |||
158 | /* get env vars or read config file to get debug flags */ | ||
159 | cell->debug_flags = debug_get_flags_option("CELL_DEBUG", | ||
160 | cell_debug_flags, | ||
161 | 0 ); | ||
162 | |||
163 | for (i = 0; i < CELL_NUM_BUFFERS; i++) | ||
164 | cell_fence_init(&cell->fenced_buffers[i].fence); | ||
165 | |||
166 | |||
167 | /* | ||
168 | * SPU stuff | ||
169 | */ | ||
170 | /* This call only works with SDK 3.0. Anyone still using 2.1??? */ | ||
171 | cell->num_cells = spe_cpu_info_get(SPE_COUNT_PHYSICAL_CPU_NODES, -1); | ||
172 | cell->num_spus = spe_cpu_info_get(SPE_COUNT_USABLE_SPES, -1); | ||
173 | if (cell->debug_flags) { | ||
174 | printf("Cell: found %d Cell(s) with %u SPUs\n", | ||
175 | cell->num_cells, cell->num_spus); | ||
176 | } | ||
177 | if (getenv("CELL_NUM_SPUS")) { | ||
178 | cell->num_spus = atoi(getenv("CELL_NUM_SPUS")); | ||
179 | assert(cell->num_spus > 0); | ||
180 | } | ||
181 | |||
182 | cell_start_spus(cell); | ||
183 | |||
184 | cell_init_batch_buffers(cell); | ||
185 | |||
186 | /* make sure SPU initializations are done before proceeding */ | ||
187 | cell_flush_int(cell, CELL_FLUSH_WAIT); | ||
188 | |||
189 | return &cell->pipe; | ||
190 | } | ||
diff --git a/src/gallium/drivers/cell/ppu/cell_context.h b/src/gallium/drivers/cell/ppu/cell_context.h deleted file mode 100644 index d1aee62ba1e..00000000000 --- a/src/gallium/drivers/cell/ppu/cell_context.h +++ /dev/null | |||
@@ -1,210 +0,0 @@ | |||
1 | /************************************************************************** | ||
2 | * | ||
3 | * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas. | ||
4 | * All Rights Reserved. | ||
5 | * | ||
6 | * Permission is hereby granted, free of charge, to any person obtaining a | ||
7 | * copy of this software and associated documentation files (the | ||
8 | * "Software"), to deal in the Software without restriction, including | ||
9 | * without limitation the rights to use, copy, modify, merge, publish, | ||
10 | * distribute, sub license, and/or sell copies of the Software, and to | ||
11 | * permit persons to whom the Software is furnished to do so, subject to | ||
12 | * the following conditions: | ||
13 | * | ||
14 | * The above copyright notice and this permission notice (including the | ||
15 | * next paragraph) shall be included in all copies or substantial portions | ||
16 | * of the Software. | ||
17 | * | ||
18 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS | ||
19 | * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF | ||
20 | * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. | ||
21 | * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR | ||
22 | * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, | ||
23 | * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE | ||
24 | * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. | ||
25 | * | ||
26 | **************************************************************************/ | ||
27 | |||
28 | |||
29 | #ifndef CELL_CONTEXT_H | ||
30 | #define CELL_CONTEXT_H | ||
31 | |||
32 | |||
33 | #include "pipe/p_context.h" | ||
34 | #include "pipe/p_defines.h" | ||
35 | #include "draw/draw_vertex.h" | ||
36 | #include "draw/draw_vbuf.h" | ||
37 | /*#include "cell_winsys.h"*/ | ||
38 | #include "cell/common.h" | ||
39 | #include "rtasm/rtasm_ppc_spe.h" | ||
40 | #include "tgsi/tgsi_scan.h" | ||
41 | #include "util/u_keymap.h" | ||
42 | |||
43 | |||
44 | struct cell_vbuf_render; | ||
45 | |||
46 | |||
47 | /** | ||
48 | * Cell vertex shader state, subclass of pipe_shader_state. | ||
49 | */ | ||
50 | struct cell_vertex_shader_state | ||
51 | { | ||
52 | struct pipe_shader_state shader; | ||
53 | struct tgsi_shader_info info; | ||
54 | void *draw_data; | ||
55 | }; | ||
56 | |||
57 | |||
58 | /** | ||
59 | * Cell fragment shader state, subclass of pipe_shader_state. | ||
60 | */ | ||
61 | struct cell_fragment_shader_state | ||
62 | { | ||
63 | struct pipe_shader_state shader; | ||
64 | struct tgsi_shader_info info; | ||
65 | struct spe_function code; | ||
66 | void *data; | ||
67 | }; | ||
68 | |||
69 | |||
70 | /** | ||
71 | * Key for mapping per-fragment state to cached SPU machine code. | ||
72 | * keymap(cell_fragment_ops_key) => cell_command_fragment_ops | ||
73 | */ | ||
74 | struct cell_fragment_ops_key | ||
75 | { | ||
76 | struct pipe_blend_state blend; | ||
77 | struct pipe_blend_color blend_color; | ||
78 | struct pipe_depth_stencil_alpha_state dsa; | ||
79 | enum pipe_format color_format; | ||
80 | enum pipe_format zs_format; | ||
81 | }; | ||
82 | |||
83 | |||
84 | struct cell_buffer_node; | ||
85 | |||
86 | /** | ||
87 | * Fenced buffer list. List of buffers which can be unreferenced after | ||
88 | * the fence has been executed/signalled. | ||
89 | */ | ||
90 | struct cell_buffer_list | ||
91 | { | ||
92 | PIPE_ALIGN_VAR(16) struct cell_fence fence; | ||
93 | struct cell_buffer_node *head; | ||
94 | }; | ||
95 | |||
96 | struct cell_velems_state | ||
97 | { | ||
98 | unsigned count; | ||
99 | struct pipe_vertex_element velem[PIPE_MAX_ATTRIBS]; | ||
100 | }; | ||
101 | |||
102 | /** | ||
103 | * Per-context state, subclass of pipe_context. | ||
104 | */ | ||
105 | struct cell_context | ||
106 | { | ||
107 | struct pipe_context pipe; | ||
108 | |||
109 | struct cell_winsys *winsys; | ||
110 | |||
111 | const struct pipe_blend_state *blend; | ||
112 | const struct pipe_sampler_state *sampler[PIPE_MAX_SAMPLERS]; | ||
113 | uint num_samplers; | ||
114 | const struct pipe_depth_stencil_alpha_state *depth_stencil; | ||
115 | const struct pipe_rasterizer_state *rasterizer; | ||
116 | const struct cell_vertex_shader_state *vs; | ||
117 | const struct cell_fragment_shader_state *fs; | ||
118 | const struct cell_velems_state *velems; | ||
119 | |||
120 | struct spe_function logic_op; | ||
121 | |||
122 | struct pipe_blend_color blend_color; | ||
123 | struct pipe_stencil_ref stencil_ref; | ||
124 | struct pipe_clip_state clip; | ||
125 | struct pipe_resource *constants[2]; | ||
126 | struct pipe_framebuffer_state framebuffer; | ||
127 | struct pipe_poly_stipple poly_stipple; | ||
128 | struct pipe_scissor_state scissor; | ||
129 | struct cell_resource *texture[PIPE_MAX_SAMPLERS]; | ||
130 | struct pipe_sampler_view *fragment_sampler_views[PIPE_MAX_SAMPLERS]; | ||
131 | uint num_textures; | ||
132 | struct pipe_viewport_state viewport; | ||
133 | struct pipe_vertex_buffer vertex_buffer[PIPE_MAX_ATTRIBS]; | ||
134 | uint num_vertex_buffers; | ||
135 | struct pipe_index_buffer index_buffer; | ||
136 | |||
137 | ubyte *cbuf_map[PIPE_MAX_COLOR_BUFS]; | ||
138 | ubyte *zsbuf_map; | ||
139 | |||
140 | uint dirty; | ||
141 | uint dirty_textures; /* bitmask of texture units */ | ||
142 | uint dirty_samplers; /* bitmask of sampler units */ | ||
143 | |||
144 | /** Cache of code generated for per-fragment ops */ | ||
145 | struct keymap *fragment_ops_cache; | ||
146 | |||
147 | /** The primitive drawing context */ | ||
148 | struct draw_context *draw; | ||
149 | struct draw_stage *render_stage; | ||
150 | |||
151 | /** For post-transformed vertex buffering: */ | ||
152 | struct cell_vbuf_render *vbuf_render; | ||
153 | struct draw_stage *vbuf; | ||
154 | |||
155 | struct vertex_info vertex_info; | ||
156 | |||
157 | /** Mapped constant buffers */ | ||
158 | const void *mapped_constants[PIPE_SHADER_TYPES]; | ||
159 | |||
160 | PIPE_ALIGN_VAR(16) struct cell_spu_function_info spu_functions; | ||
161 | |||
162 | uint num_cells, num_spus; | ||
163 | |||
164 | /** Buffers for command batches, vertex/index data */ | ||
165 | uint buffer_size[CELL_NUM_BUFFERS]; | ||
166 | PIPE_ALIGN_VAR(16) ubyte buffer[CELL_NUM_BUFFERS][CELL_BUFFER_SIZE]; | ||
167 | |||
168 | int cur_batch; /**< which buffer is being filled w/ commands */ | ||
169 | |||
170 | /** [4] to ensure 16-byte alignment for each status word */ | ||
171 | PIPE_ALIGN_VAR(16) uint buffer_status[CELL_MAX_SPUS][CELL_NUM_BUFFERS][4]; | ||
172 | |||
173 | |||
174 | /** Associated with each command/batch buffer is a list of pipe_buffers | ||
175 | * that are fenced. When the last command in a buffer is executed, the | ||
176 | * fence will be signalled, indicating that any pipe_buffers preceeding | ||
177 | * that fence can be unreferenced (and probably freed). | ||
178 | */ | ||
179 | struct cell_buffer_list fenced_buffers[CELL_NUM_BUFFERS]; | ||
180 | |||
181 | |||
182 | struct spe_function attrib_fetch; | ||
183 | unsigned attrib_fetch_offsets[PIPE_MAX_ATTRIBS]; | ||
184 | |||
185 | unsigned debug_flags; | ||
186 | }; | ||
187 | |||
188 | |||
189 | |||
190 | |||
191 | static INLINE struct cell_context * | ||
192 | cell_context(struct pipe_context *pipe) | ||
193 | { | ||
194 | return (struct cell_context *) pipe; | ||
195 | } | ||
196 | |||
197 | |||
198 | struct pipe_context * | ||
199 | cell_create_context(struct pipe_screen *screen, | ||
200 | void *priv ); | ||
201 | |||
202 | extern void | ||
203 | cell_vertex_shader_queue_flush(struct draw_context *draw); | ||
204 | |||
205 | |||
206 | /* XXX find a better home for this */ | ||
207 | extern void cell_update_vertex_fetch(struct draw_context *draw); | ||
208 | |||
209 | |||
210 | #endif /* CELL_CONTEXT_H */ | ||
diff --git a/src/gallium/drivers/cell/ppu/cell_draw_arrays.c b/src/gallium/drivers/cell/ppu/cell_draw_arrays.c deleted file mode 100644 index a367fa3fe15..00000000000 --- a/src/gallium/drivers/cell/ppu/cell_draw_arrays.c +++ /dev/null | |||
@@ -1,113 +0,0 @@ | |||
1 | /************************************************************************** | ||
2 | * | ||
3 | * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas. | ||
4 | * All Rights Reserved. | ||
5 | * | ||
6 | * Permission is hereby granted, free of charge, to any person obtaining a | ||
7 | * copy of this software and associated documentation files (the | ||
8 | * "Software"), to deal in the Software without restriction, including | ||
9 | * without limitation the rights to use, copy, modify, merge, publish, | ||
10 | * distribute, sub license, and/or sell copies of the Software, and to | ||
11 | * permit persons to whom the Software is furnished to do so, subject to | ||
12 | * the following conditions: | ||
13 | * | ||
14 | * The above copyright notice and this permission notice (including the | ||
15 | * next paragraph) shall be included in all copies or substantial portions | ||
16 | * of the Software. | ||
17 | * | ||
18 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS | ||
19 | * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF | ||
20 | * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. | ||
21 | * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR | ||
22 | * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, | ||
23 | * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE | ||
24 | * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. | ||
25 | * | ||
26 | **************************************************************************/ | ||
27 | |||
28 | /* Author: | ||
29 | * Brian Paul | ||
30 | * Keith Whitwell | ||
31 | */ | ||
32 | |||
33 | |||
34 | #include "pipe/p_defines.h" | ||
35 | #include "pipe/p_context.h" | ||
36 | #include "util/u_inlines.h" | ||
37 | |||
38 | #include "cell_context.h" | ||
39 | #include "cell_draw_arrays.h" | ||
40 | #include "cell_state.h" | ||
41 | #include "cell_flush.h" | ||
42 | #include "cell_texture.h" | ||
43 | |||
44 | #include "draw/draw_context.h" | ||
45 | |||
46 | |||
47 | |||
48 | |||
49 | |||
50 | |||
51 | /** | ||
52 | * Draw vertex arrays, with optional indexing. | ||
53 | * Basically, map the vertex buffers (and drawing surfaces), then hand off | ||
54 | * the drawing to the 'draw' module. | ||
55 | * | ||
56 | * XXX should the element buffer be specified/bound with a separate function? | ||
57 | */ | ||
58 | static void | ||
59 | cell_draw_vbo(struct pipe_context *pipe, const struct pipe_draw_info *info) | ||
60 | { | ||
61 | struct cell_context *cell = cell_context(pipe); | ||
62 | struct draw_context *draw = cell->draw; | ||
63 | void *mapped_indices = NULL; | ||
64 | unsigned i; | ||
65 | |||
66 | if (cell->dirty) | ||
67 | cell_update_derived( cell ); | ||
68 | |||
69 | #if 0 | ||
70 | cell_map_surfaces(cell); | ||
71 | #endif | ||
72 | |||
73 | /* | ||
74 | * Map vertex buffers | ||
75 | */ | ||
76 | for (i = 0; i < cell->num_vertex_buffers; i++) { | ||
77 | void *buf = cell_resource(cell->vertex_buffer[i].buffer)->data; | ||
78 | draw_set_mapped_vertex_buffer(draw, i, buf); | ||
79 | } | ||
80 | /* Map index buffer, if present */ | ||
81 | if (info->indexed && cell->index_buffer.buffer) | ||
82 | mapped_indices = cell_resource(cell->index_buffer.buffer)->data; | ||
83 | |||
84 | draw_set_mapped_index_buffer(draw, mapped_indices); | ||
85 | |||
86 | /* draw! */ | ||
87 | draw_vbo(draw, info); | ||
88 | |||
89 | /* | ||
90 | * unmap vertex/index buffers - will cause draw module to flush | ||
91 | */ | ||
92 | for (i = 0; i < cell->num_vertex_buffers; i++) { | ||
93 | draw_set_mapped_vertex_buffer(draw, i, NULL); | ||
94 | } | ||
95 | if (mapped_indices) { | ||
96 | draw_set_mapped_index_buffer(draw, NULL); | ||
97 | } | ||
98 | |||
99 | /* | ||
100 | * TODO: Flush only when a user vertex/index buffer is present | ||
101 | * (or even better, modify draw module to do this | ||
102 | * internally when this condition is seen?) | ||
103 | */ | ||
104 | draw_flush(draw); | ||
105 | } | ||
106 | |||
107 | |||
108 | void | ||
109 | cell_init_draw_functions(struct cell_context *cell) | ||
110 | { | ||
111 | cell->pipe.draw_vbo = cell_draw_vbo; | ||
112 | } | ||
113 | |||
diff --git a/src/gallium/drivers/cell/ppu/cell_draw_arrays.h b/src/gallium/drivers/cell/ppu/cell_draw_arrays.h deleted file mode 100644 index 148873aa675..00000000000 --- a/src/gallium/drivers/cell/ppu/cell_draw_arrays.h +++ /dev/null | |||
@@ -1,36 +0,0 @@ | |||
1 | /************************************************************************** | ||
2 | * | ||
3 | * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas. | ||
4 | * All Rights Reserved. | ||
5 | * | ||
6 | * Permission is hereby granted, free of charge, to any person obtaining a | ||
7 | * copy of this software and associated documentation files (the | ||
8 | * "Software"), to deal in the Software without restriction, including | ||
9 | * without limitation the rights to use, copy, modify, merge, publish, | ||
10 | * distribute, sub license, and/or sell copies of the Software, and to | ||
11 | * permit persons to whom the Software is furnished to do so, subject to | ||
12 | * the following conditions: | ||
13 | * | ||
14 | * The above copyright notice and this permission notice (including the | ||
15 | * next paragraph) shall be included in all copies or substantial portions | ||
16 | * of the Software. | ||
17 | * | ||
18 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS | ||
19 | * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF | ||
20 | * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. | ||
21 | * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR | ||
22 | * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, | ||
23 | * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE | ||
24 | * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. | ||
25 | * | ||
26 | **************************************************************************/ | ||
27 | |||
28 | #ifndef CELL_DRAW_ARRAYS_H | ||
29 | #define CELL_DRAW_ARRAYS_H | ||
30 | |||
31 | |||
32 | extern void | ||
33 | cell_init_draw_functions(struct cell_context *cell); | ||
34 | |||
35 | |||
36 | #endif /* CELL_DRAW_ARRAYS_H */ | ||
diff --git a/src/gallium/drivers/cell/ppu/cell_fence.c b/src/gallium/drivers/cell/ppu/cell_fence.c deleted file mode 100644 index 181fef44f45..00000000000 --- a/src/gallium/drivers/cell/ppu/cell_fence.c +++ /dev/null | |||
@@ -1,172 +0,0 @@ | |||
1 | /************************************************************************** | ||
2 | * | ||
3 | * Copyright 2008 Tungsten Graphics, Inc., Cedar Park, Texas. | ||
4 | * All Rights Reserved. | ||
5 | * | ||
6 | * Permission is hereby granted, free of charge, to any person obtaining a | ||
7 | * copy of this software and associated documentation files (the | ||
8 | * "Software"), to deal in the Software without restriction, including | ||
9 | * without limitation the rights to use, copy, modify, merge, publish, | ||
10 | * distribute, sub license, and/or sell copies of the Software, and to | ||
11 | * permit persons to whom the Software is furnished to do so, subject to | ||
12 | * the following conditions: | ||
13 | * | ||
14 | * The above copyright notice and this permission notice (including the | ||
15 | * next paragraph) shall be included in all copies or substantial portions | ||
16 | * of the Software. | ||
17 | * | ||
18 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS | ||
19 | * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF | ||
20 | * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. | ||
21 | * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR | ||
22 | * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, | ||
23 | * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE | ||
24 | * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. | ||
25 | * | ||
26 | **************************************************************************/ | ||
27 | |||
28 | #include <unistd.h> | ||
29 | #include "util/u_memory.h" | ||
30 | #include "util/u_inlines.h" | ||
31 | #include "cell_context.h" | ||
32 | #include "cell_batch.h" | ||
33 | #include "cell_fence.h" | ||
34 | #include "cell_texture.h" | ||
35 | |||
36 | |||
37 | void | ||
38 | cell_fence_init(struct cell_fence *fence) | ||
39 | { | ||
40 | uint i; | ||
41 | ASSERT_ALIGN16(fence->status); | ||
42 | for (i = 0; i < CELL_MAX_SPUS; i++) { | ||
43 | fence->status[i][0] = CELL_FENCE_IDLE; | ||
44 | } | ||
45 | } | ||
46 | |||
47 | |||
48 | boolean | ||
49 | cell_fence_signalled(const struct cell_context *cell, | ||
50 | const struct cell_fence *fence) | ||
51 | { | ||
52 | uint i; | ||
53 | for (i = 0; i < cell->num_spus; i++) { | ||
54 | if (fence->status[i][0] != CELL_FENCE_SIGNALLED) | ||
55 | return FALSE; | ||
56 | /*assert(fence->status[i][0] == CELL_FENCE_EMITTED);*/ | ||
57 | } | ||
58 | return TRUE; | ||
59 | } | ||
60 | |||
61 | |||
62 | boolean | ||
63 | cell_fence_finish(const struct cell_context *cell, | ||
64 | const struct cell_fence *fence, | ||
65 | uint64_t timeout) | ||
66 | { | ||
67 | while (!cell_fence_signalled(cell, fence)) { | ||
68 | usleep(10); | ||
69 | } | ||
70 | |||
71 | #ifdef DEBUG | ||
72 | { | ||
73 | uint i; | ||
74 | for (i = 0; i < cell->num_spus; i++) { | ||
75 | assert(fence->status[i][0] == CELL_FENCE_SIGNALLED); | ||
76 | } | ||
77 | } | ||
78 | #endif | ||
79 | return TRUE; | ||
80 | } | ||
81 | |||
82 | |||
83 | |||
84 | |||
85 | struct cell_buffer_node | ||
86 | { | ||
87 | struct pipe_resource *buffer; | ||
88 | struct cell_buffer_node *next; | ||
89 | }; | ||
90 | |||
91 | |||
92 | #if 0 | ||
93 | static void | ||
94 | cell_add_buffer_to_list(struct cell_context *cell, | ||
95 | struct cell_buffer_list *list, | ||
96 | struct pipe_resource *buffer) | ||
97 | { | ||
98 | struct cell_buffer_node *node = CALLOC_STRUCT(cell_buffer_node); | ||
99 | /* create new list node which references the buffer, insert at head */ | ||
100 | if (node) { | ||
101 | pipe_resource_reference(&node->buffer, buffer); | ||
102 | node->next = list->head; | ||
103 | list->head = node; | ||
104 | } | ||
105 | } | ||
106 | #endif | ||
107 | |||
108 | |||
109 | /** | ||
110 | * Wait for completion of the given fence, then unreference any buffers | ||
111 | * on the list. | ||
112 | * This typically unrefs/frees texture buffers after any rendering which uses | ||
113 | * them has completed. | ||
114 | */ | ||
115 | void | ||
116 | cell_free_fenced_buffers(struct cell_context *cell, | ||
117 | struct cell_buffer_list *list) | ||
118 | { | ||
119 | if (list->head) { | ||
120 | /*struct pipe_screen *ps = cell->pipe.screen;*/ | ||
121 | struct cell_buffer_node *node; | ||
122 | |||
123 | cell_fence_finish(cell, &list->fence); | ||
124 | |||
125 | /* traverse the list, unreferencing buffers, freeing nodes */ | ||
126 | node = list->head; | ||
127 | while (node) { | ||
128 | struct cell_buffer_node *next = node->next; | ||
129 | assert(node->buffer); | ||
130 | /* XXX need this? pipe_buffer_unmap(ps, node->buffer);*/ | ||
131 | #if 0 | ||
132 | printf("Unref buffer %p\n", node->buffer); | ||
133 | if (node->buffer->reference.count == 1) | ||
134 | printf(" Delete!\n"); | ||
135 | #endif | ||
136 | pipe_resource_reference(&node->buffer, NULL); | ||
137 | FREE(node); | ||
138 | node = next; | ||
139 | } | ||
140 | list->head = NULL; | ||
141 | } | ||
142 | } | ||
143 | |||
144 | |||
145 | /** | ||
146 | * This should be called for each render command. | ||
147 | * Any texture buffers that are current bound will be added to a fenced | ||
148 | * list to be freed later when the fence is executed/signalled. | ||
149 | */ | ||
150 | void | ||
151 | cell_add_fenced_textures(struct cell_context *cell) | ||
152 | { | ||
153 | /*struct cell_buffer_list *list = &cell->fenced_buffers[cell->cur_batch];*/ | ||
154 | uint i; | ||
155 | |||
156 | for (i = 0; i < cell->num_textures; i++) { | ||
157 | struct cell_resource *ct = cell->texture[i]; | ||
158 | if (ct) { | ||
159 | #if 0 | ||
160 | printf("Adding texture %p buffer %p to list\n", | ||
161 | ct, ct->tiled_buffer[level]); | ||
162 | #endif | ||
163 | #if 00 | ||
164 | /* XXX this needs to be fixed/restored! | ||
165 | * Maybe keep pointers to textures, not buffers. | ||
166 | */ | ||
167 | if (ct->base.buffer) | ||
168 | cell_add_buffer_to_list(cell, list, ct->buffer); | ||
169 | #endif | ||
170 | } | ||
171 | } | ||
172 | } | ||
diff --git a/src/gallium/drivers/cell/ppu/cell_fence.h b/src/gallium/drivers/cell/ppu/cell_fence.h deleted file mode 100644 index 3568230b1c0..00000000000 --- a/src/gallium/drivers/cell/ppu/cell_fence.h +++ /dev/null | |||
@@ -1,60 +0,0 @@ | |||
1 | /************************************************************************** | ||
2 | * | ||
3 | * Copyright 2008 Tungsten Graphics, Inc., Cedar Park, Texas. | ||
4 | * All Rights Reserved. | ||
5 | * | ||
6 | * Permission is hereby granted, free of charge, to any person obtaining a | ||
7 | * copy of this software and associated documentation files (the | ||
8 | * "Software"), to deal in the Software without restriction, including | ||
9 | * without limitation the rights to use, copy, modify, merge, publish, | ||
10 | * distribute, sub license, and/or sell copies of the Software, and to | ||
11 | * permit persons to whom the Software is furnished to do so, subject to | ||
12 | * the following conditions: | ||
13 | * | ||
14 | * The above copyright notice and this permission notice (including the | ||
15 | * next paragraph) shall be included in all copies or substantial portions | ||
16 | * of the Software. | ||
17 | * | ||
18 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS | ||
19 | * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF | ||
20 | * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. | ||
21 | * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR | ||
22 | * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, | ||
23 | * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE | ||
24 | * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. | ||
25 | * | ||
26 | **************************************************************************/ | ||
27 | |||
28 | |||
29 | #ifndef CELL_FENCE_H | ||
30 | #define CELL_FENCE_H | ||
31 | |||
32 | |||
33 | extern void | ||
34 | cell_fence_init(struct cell_fence *fence); | ||
35 | |||
36 | |||
37 | extern boolean | ||
38 | cell_fence_signalled(const struct cell_context *cell, | ||
39 | const struct cell_fence *fence, | ||
40 | unsigned flags); | ||
41 | |||
42 | |||
43 | extern boolean | ||
44 | cell_fence_finish(const struct cell_context *cell, | ||
45 | const struct cell_fence *fence, | ||
46 | unsigned flags, | ||
47 | uint64_t timeout); | ||
48 | |||
49 | |||
50 | |||
51 | extern void | ||
52 | cell_free_fenced_buffers(struct cell_context *cell, | ||
53 | struct cell_buffer_list *list); | ||
54 | |||
55 | |||
56 | extern void | ||
57 | cell_add_fenced_textures(struct cell_context *cell); | ||
58 | |||
59 | |||
60 | #endif /* CELL_FENCE_H */ | ||
diff --git a/src/gallium/drivers/cell/ppu/cell_flush.c b/src/gallium/drivers/cell/ppu/cell_flush.c deleted file mode 100644 index 463f4d03eb9..00000000000 --- a/src/gallium/drivers/cell/ppu/cell_flush.c +++ /dev/null | |||
@@ -1,109 +0,0 @@ | |||
1 | /************************************************************************** | ||
2 | * | ||
3 | * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas. | ||
4 | * All Rights Reserved. | ||
5 | * | ||
6 | * Permission is hereby granted, free of charge, to any person obtaining a | ||
7 | * copy of this software and associated documentation files (the | ||
8 | * "Software"), to deal in the Software without restriction, including | ||
9 | * without limitation the rights to use, copy, modify, merge, publish, | ||
10 | * distribute, sub license, and/or sell copies of the Software, and to | ||
11 | * permit persons to whom the Software is furnished to do so, subject to | ||
12 | * the following conditions: | ||
13 | * | ||
14 | * The above copyright notice and this permission notice (including the | ||
15 | * next paragraph) shall be included in all copies or substantial portions | ||
16 | * of the Software. | ||
17 | * | ||
18 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS | ||
19 | * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF | ||
20 | * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. | ||
21 | * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR | ||
22 | * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, | ||
23 | * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE | ||
24 | * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. | ||
25 | * | ||
26 | **************************************************************************/ | ||
27 | |||
28 | |||
29 | #include "cell_context.h" | ||
30 | #include "cell_batch.h" | ||
31 | #include "cell_flush.h" | ||
32 | #include "cell_spu.h" | ||
33 | #include "cell_render.h" | ||
34 | #include "draw/draw_context.h" | ||
35 | |||
36 | |||
37 | /** | ||
38 | * Called via pipe->flush() | ||
39 | */ | ||
40 | void | ||
41 | cell_flush(struct pipe_context *pipe, | ||
42 | struct pipe_fence_handle **fence) | ||
43 | { | ||
44 | struct cell_context *cell = cell_context(pipe); | ||
45 | |||
46 | if (fence) { | ||
47 | *fence = NULL; | ||
48 | } | ||
49 | |||
50 | flags |= CELL_FLUSH_WAIT; | ||
51 | |||
52 | draw_flush( cell->draw ); | ||
53 | cell_flush_int(cell, flags); | ||
54 | } | ||
55 | |||
56 | |||
57 | /** | ||
58 | * Cell internal flush function. Send the current batch buffer to all SPUs. | ||
59 | * If flags & CELL_FLUSH_WAIT, do not return until the SPUs are idle. | ||
60 | * \param flags bitmask of flags CELL_FLUSH_WAIT, or zero | ||
61 | */ | ||
62 | void | ||
63 | cell_flush_int(struct cell_context *cell, unsigned flags) | ||
64 | { | ||
65 | static boolean flushing = FALSE; /* recursion catcher */ | ||
66 | uint i; | ||
67 | |||
68 | ASSERT(!flushing); | ||
69 | flushing = TRUE; | ||
70 | |||
71 | if (flags & CELL_FLUSH_WAIT) { | ||
72 | STATIC_ASSERT(sizeof(opcode_t) % 16 == 0); | ||
73 | opcode_t *cmd = (opcode_t*) cell_batch_alloc16(cell, sizeof(opcode_t)); | ||
74 | *cmd[0] = CELL_CMD_FINISH; | ||
75 | } | ||
76 | |||
77 | cell_batch_flush(cell); | ||
78 | |||
79 | #if 0 | ||
80 | /* Send CMD_FINISH to all SPUs */ | ||
81 | for (i = 0; i < cell->num_spus; i++) { | ||
82 | send_mbox_message(cell_global.spe_contexts[i], CELL_CMD_FINISH); | ||
83 | } | ||
84 | #endif | ||
85 | |||
86 | if (flags & CELL_FLUSH_WAIT) { | ||
87 | /* Wait for ack */ | ||
88 | for (i = 0; i < cell->num_spus; i++) { | ||
89 | uint k = wait_mbox_message(cell_global.spe_contexts[i]); | ||
90 | assert(k == CELL_CMD_FINISH); | ||
91 | } | ||
92 | } | ||
93 | |||
94 | flushing = FALSE; | ||
95 | } | ||
96 | |||
97 | |||
98 | void | ||
99 | cell_flush_buffer_range(struct cell_context *cell, void *ptr, | ||
100 | unsigned size) | ||
101 | { | ||
102 | STATIC_ASSERT((sizeof(opcode_t) + sizeof(struct cell_buffer_range)) % 16 == 0); | ||
103 | uint32_t *batch = (uint32_t*)cell_batch_alloc16(cell, | ||
104 | sizeof(opcode_t) + sizeof(struct cell_buffer_range)); | ||
105 | struct cell_buffer_range *br = (struct cell_buffer_range *) &batch[4]; | ||
106 | batch[0] = CELL_CMD_FLUSH_BUFFER_RANGE; | ||
107 | br->base = (uintptr_t) ptr; | ||
108 | br->size = size; | ||
109 | } | ||
diff --git a/src/gallium/drivers/cell/ppu/cell_flush.h b/src/gallium/drivers/cell/ppu/cell_flush.h deleted file mode 100644 index 509ae6239ac..00000000000 --- a/src/gallium/drivers/cell/ppu/cell_flush.h +++ /dev/null | |||
@@ -1,45 +0,0 @@ | |||
1 | /************************************************************************** | ||
2 | * | ||
3 | * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas. | ||
4 | * All Rights Reserved. | ||
5 | * | ||
6 | * Permission is hereby granted, free of charge, to any person obtaining a | ||
7 | * copy of this software and associated documentation files (the | ||
8 | * "Software"), to deal in the Software without restriction, including | ||
9 | * without limitation the rights to use, copy, modify, merge, publish, | ||
10 | * distribute, sub license, and/or sell copies of the Software, and to | ||
11 | * permit persons to whom the Software is furnished to do so, subject to | ||
12 | * the following conditions: | ||
13 | * | ||
14 | * The above copyright notice and this permission notice (including the | ||
15 | * next paragraph) shall be included in all copies or substantial portions | ||
16 | * of the Software. | ||
17 | * | ||
18 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS | ||
19 | * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF | ||
20 | * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. | ||
21 | * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR | ||
22 | * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, | ||
23 | * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE | ||
24 | * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. | ||
25 | * | ||
26 | **************************************************************************/ | ||
27 | |||
28 | |||
29 | #ifndef CELL_FLUSH | ||
30 | #define CELL_FLUSH | ||
31 | |||
32 | #define CELL_FLUSH_WAIT 0x80000000 | ||
33 | |||
34 | extern void | ||
35 | cell_flush(struct pipe_context *pipe, unsigned flags, | ||
36 | struct pipe_fence_handle **fence); | ||
37 | |||
38 | extern void | ||
39 | cell_flush_int(struct cell_context *cell, unsigned flags); | ||
40 | |||
41 | extern void | ||
42 | cell_flush_buffer_range(struct cell_context *cell, void *ptr, | ||
43 | unsigned size); | ||
44 | |||
45 | #endif | ||
diff --git a/src/gallium/drivers/cell/ppu/cell_gen_fp.c b/src/gallium/drivers/cell/ppu/cell_gen_fp.c deleted file mode 100644 index 1d8a11a4ac9..00000000000 --- a/src/gallium/drivers/cell/ppu/cell_gen_fp.c +++ /dev/null | |||
@@ -1,2036 +0,0 @@ | |||
1 | /************************************************************************** | ||
2 | * | ||
3 | * Copyright 2008 Tungsten Graphics, Inc., Cedar Park, Texas. | ||
4 | * All Rights Reserved. | ||
5 | * Copyright 2009 VMware, Inc. All rights reserved. | ||
6 | * | ||
7 | * Permission is hereby granted, free of charge, to any person obtaining a | ||
8 | * copy of this software and associated documentation files (the | ||
9 | * "Software"), to deal in the Software without restriction, including | ||
10 | * without limitation the rights to use, copy, modify, merge, publish, | ||
11 | * distribute, sub license, and/or sell copies of the Software, and to | ||
12 | * permit persons to whom the Software is furnished to do so, subject to | ||
13 | * the following conditions: | ||
14 | * | ||
15 | * The above copyright notice and this permission notice (including the | ||
16 | * next paragraph) shall be included in all copies or substantial portions | ||
17 | * of the Software. | ||
18 | * | ||
19 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS | ||
20 | * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF | ||
21 | * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. | ||
22 | * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR | ||
23 | * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, | ||
24 | * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE | ||
25 | * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. | ||
26 | * | ||
27 | **************************************************************************/ | ||
28 | |||
29 | |||
30 | |||
31 | /** | ||
32 | * Generate SPU fragment program/shader code. | ||
33 | * | ||
34 | * Note that we generate SOA-style code here. So each TGSI instruction | ||
35 | * operates on four pixels (and is translated into four SPU instructions, | ||
36 | * generally speaking). | ||
37 | * | ||
38 | * \author Brian Paul | ||
39 | */ | ||
40 | |||
41 | #include <math.h> | ||
42 | #include "pipe/p_defines.h" | ||
43 | #include "pipe/p_state.h" | ||
44 | #include "pipe/p_shader_tokens.h" | ||
45 | #include "tgsi/tgsi_parse.h" | ||
46 | #include "tgsi/tgsi_util.h" | ||
47 | #include "tgsi/tgsi_exec.h" | ||
48 | #include "tgsi/tgsi_dump.h" | ||
49 | #include "rtasm/rtasm_ppc_spe.h" | ||
50 | #include "util/u_memory.h" | ||
51 | #include "cell_context.h" | ||
52 | #include "cell_gen_fp.h" | ||
53 | |||
54 | |||
55 | #define MAX_TEMPS 16 | ||
56 | #define MAX_IMMED 8 | ||
57 | |||
58 | #define CHAN_X 0 | ||
59 | #define CHAN_Y 1 | ||
60 | #define CHAN_Z 2 | ||
61 | #define CHAN_W 3 | ||
62 | |||
63 | /** | ||
64 | * Context needed during code generation. | ||
65 | */ | ||
66 | struct codegen | ||
67 | { | ||
68 | struct cell_context *cell; | ||
69 | int inputs_reg; /**< 1st function parameter */ | ||
70 | int outputs_reg; /**< 2nd function parameter */ | ||
71 | int constants_reg; /**< 3rd function parameter */ | ||
72 | int temp_regs[MAX_TEMPS][4]; /**< maps TGSI temps to SPE registers */ | ||
73 | int imm_regs[MAX_IMMED][4]; /**< maps TGSI immediates to SPE registers */ | ||
74 | |||
75 | int num_imm; /**< number of immediates */ | ||
76 | |||
77 | int one_reg; /**< register containing {1.0, 1.0, 1.0, 1.0} */ | ||
78 | |||
79 | int addr_reg; /**< address register, integer values */ | ||
80 | |||
81 | /** Per-instruction temps / intermediate temps */ | ||
82 | int num_itemps; | ||
83 | int itemps[12]; | ||
84 | |||
85 | /** Current IF/ELSE/ENDIF nesting level */ | ||
86 | int if_nesting; | ||
87 | /** Current BGNLOOP/ENDLOOP nesting level */ | ||
88 | int loop_nesting; | ||
89 | /** Location of start of current loop */ | ||
90 | int loop_start; | ||
91 | |||
92 | /** Index of if/conditional mask register */ | ||
93 | int cond_mask_reg; | ||
94 | /** Index of loop mask register */ | ||
95 | int loop_mask_reg; | ||
96 | |||
97 | /** Index of master execution mask register */ | ||
98 | int exec_mask_reg; | ||
99 | |||
100 | /** KIL mask: indicates which fragments have been killed */ | ||
101 | int kill_mask_reg; | ||
102 | |||
103 | int frame_size; /**< Stack frame size, in words */ | ||
104 | |||
105 | struct spe_function *f; | ||
106 | boolean error; | ||
107 | }; | ||
108 | |||
109 | |||
110 | /** | ||
111 | * Allocate an intermediate temporary register. | ||
112 | */ | ||
113 | static int | ||
114 | get_itemp(struct codegen *gen) | ||
115 | { | ||
116 | int t = spe_allocate_available_register(gen->f); | ||
117 | assert(gen->num_itemps < Elements(gen->itemps)); | ||
118 | gen->itemps[gen->num_itemps++] = t; | ||
119 | return t; | ||
120 | } | ||
121 | |||
122 | /** | ||
123 | * Free all intermediate temporary registers. To be called after each | ||
124 | * instruction has been emitted. | ||
125 | */ | ||
126 | static void | ||
127 | free_itemps(struct codegen *gen) | ||
128 | { | ||
129 | int i; | ||
130 | for (i = 0; i < gen->num_itemps; i++) { | ||
131 | spe_release_register(gen->f, gen->itemps[i]); | ||
132 | } | ||
133 | gen->num_itemps = 0; | ||
134 | } | ||
135 | |||
136 | |||
137 | /** | ||
138 | * Return index of an SPE register containing {1.0, 1.0, 1.0, 1.0}. | ||
139 | * The register is allocated and initialized upon the first call. | ||
140 | */ | ||
141 | static int | ||
142 | get_const_one_reg(struct codegen *gen) | ||
143 | { | ||
144 | if (gen->one_reg <= 0) { | ||
145 | gen->one_reg = spe_allocate_available_register(gen->f); | ||
146 | |||
147 | spe_indent(gen->f, 4); | ||
148 | spe_comment(gen->f, -4, "init constant reg = 1.0:"); | ||
149 | |||
150 | /* one = {1.0, 1.0, 1.0, 1.0} */ | ||
151 | spe_load_float(gen->f, gen->one_reg, 1.0f); | ||
152 | |||
153 | spe_indent(gen->f, -4); | ||
154 | } | ||
155 | |||
156 | return gen->one_reg; | ||
157 | } | ||
158 | |||
159 | |||
160 | /** | ||
161 | * Return index of the address register. | ||
162 | * Used for indirect register loads/stores. | ||
163 | */ | ||
164 | static int | ||
165 | get_address_reg(struct codegen *gen) | ||
166 | { | ||
167 | if (gen->addr_reg <= 0) { | ||
168 | gen->addr_reg = spe_allocate_available_register(gen->f); | ||
169 | |||
170 | spe_indent(gen->f, 4); | ||
171 | spe_comment(gen->f, -4, "init address reg = 0:"); | ||
172 | |||
173 | /* init addr = {0, 0, 0, 0} */ | ||
174 | spe_zero(gen->f, gen->addr_reg); | ||
175 | |||
176 | spe_indent(gen->f, -4); | ||
177 | } | ||
178 | |||
179 | return gen->addr_reg; | ||
180 | } | ||
181 | |||
182 | |||
183 | /** | ||
184 | * Return index of the master execution mask. | ||
185 | * The register is allocated an initialized upon the first call. | ||
186 | * | ||
187 | * The master execution mask controls which pixels in a quad are | ||
188 | * modified, according to surrounding conditionals, loops, etc. | ||
189 | */ | ||
190 | static int | ||
191 | get_exec_mask_reg(struct codegen *gen) | ||
192 | { | ||
193 | if (gen->exec_mask_reg <= 0) { | ||
194 | gen->exec_mask_reg = spe_allocate_available_register(gen->f); | ||
195 | |||
196 | /* XXX this may not be needed */ | ||
197 | spe_comment(gen->f, 0*-4, "initialize master execution mask = ~0"); | ||
198 | spe_load_int(gen->f, gen->exec_mask_reg, ~0); | ||
199 | } | ||
200 | |||
201 | return gen->exec_mask_reg; | ||
202 | } | ||
203 | |||
204 | |||
205 | /** Return index of the conditional (if/else) execution mask register */ | ||
206 | static int | ||
207 | get_cond_mask_reg(struct codegen *gen) | ||
208 | { | ||
209 | if (gen->cond_mask_reg <= 0) { | ||
210 | gen->cond_mask_reg = spe_allocate_available_register(gen->f); | ||
211 | } | ||
212 | |||
213 | return gen->cond_mask_reg; | ||
214 | } | ||
215 | |||
216 | |||
217 | /** Return index of the loop execution mask register */ | ||
218 | static int | ||
219 | get_loop_mask_reg(struct codegen *gen) | ||
220 | { | ||
221 | if (gen->loop_mask_reg <= 0) { | ||
222 | gen->loop_mask_reg = spe_allocate_available_register(gen->f); | ||
223 | } | ||
224 | |||
225 | return gen->loop_mask_reg; | ||
226 | } | ||
227 | |||
228 | |||
229 | |||
230 | static boolean | ||
231 | is_register_src(struct codegen *gen, int channel, | ||
232 | const struct tgsi_full_src_register *src) | ||
233 | { | ||
234 | int swizzle = tgsi_util_get_full_src_register_swizzle(src, channel); | ||
235 | int sign_op = tgsi_util_get_full_src_register_sign_mode(src, channel); | ||
236 | |||
237 | if (swizzle > TGSI_SWIZZLE_W || sign_op != TGSI_UTIL_SIGN_KEEP) { | ||
238 | return FALSE; | ||
239 | } | ||
240 | if (src->Register.File == TGSI_FILE_TEMPORARY || | ||
241 | src->Register.File == TGSI_FILE_IMMEDIATE) { | ||
242 | return TRUE; | ||
243 | } | ||
244 | return FALSE; | ||
245 | } | ||
246 | |||
247 | |||
248 | static boolean | ||
249 | is_memory_dst(struct codegen *gen, int channel, | ||
250 | const struct tgsi_full_dst_register *dst) | ||
251 | { | ||
252 | if (dst->Register.File == TGSI_FILE_OUTPUT) { | ||
253 | return TRUE; | ||
254 | } | ||
255 | else { | ||
256 | return FALSE; | ||
257 | } | ||
258 | } | ||
259 | |||
260 | |||
261 | /** | ||
262 | * Return the index of the SPU temporary containing the named TGSI | ||
263 | * source register. If the TGSI register is a TGSI_FILE_TEMPORARY we | ||
264 | * just return the corresponding SPE register. If the TGIS register | ||
265 | * is TGSI_FILE_INPUT/CONSTANT/IMMEDIATE we allocate a new SPE register | ||
266 | * and emit an SPE load instruction. | ||
267 | */ | ||
268 | static int | ||
269 | get_src_reg(struct codegen *gen, | ||
270 | int channel, | ||
271 | const struct tgsi_full_src_register *src) | ||
272 | { | ||
273 | int reg = -1; | ||
274 | int swizzle = tgsi_util_get_full_src_register_swizzle(src, channel); | ||
275 | boolean reg_is_itemp = FALSE; | ||
276 | uint sign_op; | ||
277 | |||
278 | assert(swizzle >= TGSI_SWIZZLE_X); | ||
279 | assert(swizzle <= TGSI_SWIZZLE_W); | ||
280 | |||
281 | { | ||
282 | int index = src->Register.Index; | ||
283 | |||
284 | assert(swizzle < 4); | ||
285 | |||
286 | if (src->Register.Indirect) { | ||
287 | /* XXX unfinished */ | ||
288 | } | ||
289 | |||
290 | switch (src->Register.File) { | ||
291 | case TGSI_FILE_TEMPORARY: | ||
292 | reg = gen->temp_regs[index][swizzle]; | ||
293 | break; | ||
294 | case TGSI_FILE_INPUT: | ||
295 | { | ||
296 | /* offset is measured in quadwords, not bytes */ | ||
297 | int offset = index * 4 + swizzle; | ||
298 | reg = get_itemp(gen); | ||
299 | reg_is_itemp = TRUE; | ||
300 | /* Load: reg = memory[(machine_reg) + offset] */ | ||
301 | spe_lqd(gen->f, reg, gen->inputs_reg, offset * 16); | ||
302 | } | ||
303 | break; | ||
304 | case TGSI_FILE_IMMEDIATE: | ||
305 | reg = gen->imm_regs[index][swizzle]; | ||
306 | break; | ||
307 | case TGSI_FILE_CONSTANT: | ||
308 | { | ||
309 | /* offset is measured in quadwords, not bytes */ | ||
310 | int offset = index * 4 + swizzle; | ||
311 | reg = get_itemp(gen); | ||
312 | reg_is_itemp = TRUE; | ||
313 | /* Load: reg = memory[(machine_reg) + offset] */ | ||
314 | spe_lqd(gen->f, reg, gen->constants_reg, offset * 16); | ||
315 | } | ||
316 | break; | ||
317 | default: | ||
318 | assert(0); | ||
319 | } | ||
320 | } | ||
321 | |||
322 | /* | ||
323 | * Handle absolute value, negate or set-negative of src register. | ||
324 | */ | ||
325 | sign_op = tgsi_util_get_full_src_register_sign_mode(src, channel); | ||
326 | if (sign_op != TGSI_UTIL_SIGN_KEEP) { | ||
327 | /* | ||
328 | * All sign ops are done by manipulating bit 31, the IEEE float sign bit. | ||
329 | */ | ||
330 | const int bit31mask_reg = get_itemp(gen); | ||
331 | int result_reg; | ||
332 | |||
333 | if (reg_is_itemp) { | ||
334 | /* re-use 'reg' for the result */ | ||
335 | result_reg = reg; | ||
336 | } | ||
337 | else { | ||
338 | /* alloc a new reg for the result */ | ||
339 | result_reg = get_itemp(gen); | ||
340 | } | ||
341 | |||
342 | /* mask with bit 31 set, the rest cleared */ | ||
343 | spe_load_uint(gen->f, bit31mask_reg, (1 << 31)); | ||
344 | |||
345 | if (sign_op == TGSI_UTIL_SIGN_CLEAR) { | ||
346 | spe_andc(gen->f, result_reg, reg, bit31mask_reg); | ||
347 | } | ||
348 | else if (sign_op == TGSI_UTIL_SIGN_SET) { | ||
349 | spe_and(gen->f, result_reg, reg, bit31mask_reg); | ||
350 | } | ||
351 | else { | ||
352 | assert(sign_op == TGSI_UTIL_SIGN_TOGGLE); | ||
353 | spe_xor(gen->f, result_reg, reg, bit31mask_reg); | ||
354 | } | ||
355 | |||
356 | reg = result_reg; | ||
357 | } | ||
358 | |||
359 | return reg; | ||
360 | } | ||
361 | |||
362 | |||
363 | /** | ||
364 | * Return the index of an SPE register to use for the given TGSI register. | ||
365 | * If the TGSI register is TGSI_FILE_TEMPORARAY, the index of the | ||
366 | * corresponding SPE register is returned. If the TGSI register is | ||
367 | * TGSI_FILE_OUTPUT we allocate an intermediate temporary register. | ||
368 | * See store_dest_reg() below... | ||
369 | */ | ||
370 | static int | ||
371 | get_dst_reg(struct codegen *gen, | ||
372 | int channel, | ||
373 | const struct tgsi_full_dst_register *dest) | ||
374 | { | ||
375 | int reg = -1; | ||
376 | |||
377 | switch (dest->Register.File) { | ||
378 | case TGSI_FILE_TEMPORARY: | ||
379 | if (gen->if_nesting > 0 || gen->loop_nesting > 0) | ||
380 | reg = get_itemp(gen); | ||
381 | else | ||
382 | reg = gen->temp_regs[dest->Register.Index][channel]; | ||
383 | break; | ||
384 | case TGSI_FILE_OUTPUT: | ||
385 | reg = get_itemp(gen); | ||
386 | break; | ||
387 | default: | ||
388 | assert(0); | ||
389 | } | ||
390 | |||
391 | return reg; | ||
392 | } | ||
393 | |||
394 | |||
395 | /** | ||
396 | * When a TGSI instruction is writing to an output register, this | ||
397 | * function emits the SPE store instruction to store the value_reg. | ||
398 | * \param value_reg the SPE register containing the value to store. | ||
399 | * This would have been returned by get_dst_reg(). | ||
400 | */ | ||
401 | static void | ||
402 | store_dest_reg(struct codegen *gen, | ||
403 | int value_reg, int channel, | ||
404 | const struct tgsi_full_dst_register *dest) | ||
405 | { | ||
406 | /* | ||
407 | * XXX need to implement dst reg clamping/saturation | ||
408 | */ | ||
409 | #if 0 | ||
410 | switch (inst->Instruction.Saturate) { | ||
411 | case TGSI_SAT_NONE: | ||
412 | break; | ||
413 | case TGSI_SAT_ZERO_ONE: | ||
414 | break; | ||
415 | case TGSI_SAT_MINUS_PLUS_ONE: | ||
416 | break; | ||
417 | default: | ||
418 | assert( 0 ); | ||
419 | } | ||
420 | #endif | ||
421 | |||
422 | switch (dest->Register.File) { | ||
423 | case TGSI_FILE_TEMPORARY: | ||
424 | if (gen->if_nesting > 0 || gen->loop_nesting > 0) { | ||
425 | int d_reg = gen->temp_regs[dest->Register.Index][channel]; | ||
426 | int exec_reg = get_exec_mask_reg(gen); | ||
427 | /* Mix d with new value according to exec mask: | ||
428 | * d[i] = mask_reg[i] ? value_reg : d_reg | ||
429 | */ | ||
430 | spe_selb(gen->f, d_reg, d_reg, value_reg, exec_reg); | ||
431 | } | ||
432 | else { | ||
433 | /* we're not inside a condition or loop: do nothing special */ | ||
434 | |||
435 | } | ||
436 | break; | ||
437 | case TGSI_FILE_OUTPUT: | ||
438 | { | ||
439 | /* offset is measured in quadwords, not bytes */ | ||
440 | int offset = dest->Register.Index * 4 + channel; | ||
441 | if (gen->if_nesting > 0 || gen->loop_nesting > 0) { | ||
442 | int exec_reg = get_exec_mask_reg(gen); | ||
443 | int curval_reg = get_itemp(gen); | ||
444 | /* First read the current value from memory: | ||
445 | * Load: curval = memory[(machine_reg) + offset] | ||
446 | */ | ||
447 | spe_lqd(gen->f, curval_reg, gen->outputs_reg, offset * 16); | ||
448 | /* Mix curval with newvalue according to exec mask: | ||
449 | * d[i] = mask_reg[i] ? value_reg : d_reg | ||
450 | */ | ||
451 | spe_selb(gen->f, curval_reg, curval_reg, value_reg, exec_reg); | ||
452 | /* Store: memory[(machine_reg) + offset] = curval */ | ||
453 | spe_stqd(gen->f, curval_reg, gen->outputs_reg, offset * 16); | ||
454 | } | ||
455 | else { | ||
456 | /* Store: memory[(machine_reg) + offset] = reg */ | ||
457 | spe_stqd(gen->f, value_reg, gen->outputs_reg, offset * 16); | ||
458 | } | ||
459 | } | ||
460 | break; | ||
461 | default: | ||
462 | assert(0); | ||
463 | } | ||
464 | } | ||
465 | |||
466 | |||
467 | |||
468 | static void | ||
469 | emit_prologue(struct codegen *gen) | ||
470 | { | ||
471 | gen->frame_size = 1024; /* XXX temporary, should be dynamic */ | ||
472 | |||
473 | spe_comment(gen->f, 0, "Function prologue:"); | ||
474 | |||
475 | /* save $lr on stack # stqd $lr,16($sp) */ | ||
476 | spe_stqd(gen->f, SPE_REG_RA, SPE_REG_SP, 16); | ||
477 | |||
478 | if (gen->frame_size >= 512) { | ||
479 | /* offset is too large for ai instruction */ | ||
480 | int offset_reg = spe_allocate_available_register(gen->f); | ||
481 | int sp_reg = spe_allocate_available_register(gen->f); | ||
482 | /* offset = -framesize */ | ||
483 | spe_load_int(gen->f, offset_reg, -gen->frame_size); | ||
484 | /* sp = $sp */ | ||
485 | spe_move(gen->f, sp_reg, SPE_REG_SP); | ||
486 | /* $sp = $sp + offset_reg */ | ||
487 | spe_a(gen->f, SPE_REG_SP, SPE_REG_SP, offset_reg); | ||
488 | /* save $sp in stack frame */ | ||
489 | spe_stqd(gen->f, sp_reg, SPE_REG_SP, 0); | ||
490 | /* clean up */ | ||
491 | spe_release_register(gen->f, offset_reg); | ||
492 | spe_release_register(gen->f, sp_reg); | ||
493 | } | ||
494 | else { | ||
495 | /* save stack pointer # stqd $sp,-frameSize($sp) */ | ||
496 | spe_stqd(gen->f, SPE_REG_SP, SPE_REG_SP, -gen->frame_size); | ||
497 | |||
498 | /* adjust stack pointer # ai $sp,$sp,-frameSize */ | ||
499 | spe_ai(gen->f, SPE_REG_SP, SPE_REG_SP, -gen->frame_size); | ||
500 | } | ||
501 | } | ||
502 | |||
503 | |||
504 | static void | ||
505 | emit_epilogue(struct codegen *gen) | ||
506 | { | ||
507 | const int return_reg = 3; | ||
508 | |||
509 | spe_comment(gen->f, 0, "Function epilogue:"); | ||
510 | |||
511 | spe_comment(gen->f, 0, "return the killed mask"); | ||
512 | if (gen->kill_mask_reg > 0) { | ||
513 | /* shader called KIL, return the "alive" mask */ | ||
514 | spe_move(gen->f, return_reg, gen->kill_mask_reg); | ||
515 | } | ||
516 | else { | ||
517 | /* return {0,0,0,0} */ | ||
518 | spe_load_uint(gen->f, return_reg, 0); | ||
519 | } | ||
520 | |||
521 | spe_comment(gen->f, 0, "restore stack and return"); | ||
522 | if (gen->frame_size >= 512) { | ||
523 | /* offset is too large for ai instruction */ | ||
524 | int offset_reg = spe_allocate_available_register(gen->f); | ||
525 | /* offset = framesize */ | ||
526 | spe_load_int(gen->f, offset_reg, gen->frame_size); | ||
527 | /* $sp = $sp + offset */ | ||
528 | spe_a(gen->f, SPE_REG_SP, SPE_REG_SP, offset_reg); | ||
529 | /* clean up */ | ||
530 | spe_release_register(gen->f, offset_reg); | ||
531 | } | ||
532 | else { | ||
533 | /* restore stack pointer # ai $sp,$sp,frameSize */ | ||
534 | spe_ai(gen->f, SPE_REG_SP, SPE_REG_SP, gen->frame_size); | ||
535 | } | ||
536 | |||
537 | /* restore $lr # lqd $lr,16($sp) */ | ||
538 | spe_lqd(gen->f, SPE_REG_RA, SPE_REG_SP, 16); | ||
539 | |||
540 | /* return from function call */ | ||
541 | spe_bi(gen->f, SPE_REG_RA, 0, 0); | ||
542 | } | ||
543 | |||
544 | |||
545 | #define FOR_EACH_ENABLED_CHANNEL(inst, ch) \ | ||
546 | for (ch = 0; ch < 4; ch++) \ | ||
547 | if (inst->Dst[0].Register.WriteMask & (1 << ch)) | ||
548 | |||
549 | |||
550 | static boolean | ||
551 | emit_ARL(struct codegen *gen, const struct tgsi_full_instruction *inst) | ||
552 | { | ||
553 | int ch = 0, src_reg, addr_reg; | ||
554 | |||
555 | src_reg = get_src_reg(gen, ch, &inst->Src[0]); | ||
556 | addr_reg = get_address_reg(gen); | ||
557 | |||
558 | /* convert float to int */ | ||
559 | spe_cflts(gen->f, addr_reg, src_reg, 0); | ||
560 | |||
561 | free_itemps(gen); | ||
562 | |||
563 | return TRUE; | ||
564 | } | ||
565 | |||
566 | |||
567 | static boolean | ||
568 | emit_MOV(struct codegen *gen, const struct tgsi_full_instruction *inst) | ||
569 | { | ||
570 | int ch, src_reg[4], dst_reg[4]; | ||
571 | |||
572 | FOR_EACH_ENABLED_CHANNEL(inst, ch) { | ||
573 | src_reg[ch] = get_src_reg(gen, ch, &inst->Src[0]); | ||
574 | dst_reg[ch] = get_dst_reg(gen, ch, &inst->Dst[0]); | ||
575 | } | ||
576 | |||
577 | FOR_EACH_ENABLED_CHANNEL(inst, ch) { | ||
578 | if (is_register_src(gen, ch, &inst->Src[0]) && | ||
579 | is_memory_dst(gen, ch, &inst->Dst[0])) { | ||
580 | /* special-case: register to memory store */ | ||
581 | store_dest_reg(gen, src_reg[ch], ch, &inst->Dst[0]); | ||
582 | } | ||
583 | else { | ||
584 | spe_move(gen->f, dst_reg[ch], src_reg[ch]); | ||
585 | store_dest_reg(gen, dst_reg[ch], ch, &inst->Dst[0]); | ||
586 | } | ||
587 | } | ||
588 | |||
589 | free_itemps(gen); | ||
590 | |||
591 | return TRUE; | ||
592 | } | ||
593 | |||
594 | /** | ||
595 | * Emit binary operation | ||
596 | */ | ||
597 | static boolean | ||
598 | emit_binop(struct codegen *gen, const struct tgsi_full_instruction *inst) | ||
599 | { | ||
600 | int ch, s1_reg[4], s2_reg[4], d_reg[4]; | ||
601 | |||
602 | /* Loop over Red/Green/Blue/Alpha channels, fetch src operands */ | ||
603 | FOR_EACH_ENABLED_CHANNEL(inst, ch) { | ||
604 | s1_reg[ch] = get_src_reg(gen, ch, &inst->Src[0]); | ||
605 | s2_reg[ch] = get_src_reg(gen, ch, &inst->Src[1]); | ||
606 | d_reg[ch] = get_dst_reg(gen, ch, &inst->Dst[0]); | ||
607 | } | ||
608 | |||
609 | /* Loop over Red/Green/Blue/Alpha channels, do the op, store results */ | ||
610 | FOR_EACH_ENABLED_CHANNEL(inst, ch) { | ||
611 | /* Emit actual SPE instruction: d = s1 + s2 */ | ||
612 | switch (inst->Instruction.Opcode) { | ||
613 | case TGSI_OPCODE_ADD: | ||
614 | spe_fa(gen->f, d_reg[ch], s1_reg[ch], s2_reg[ch]); | ||
615 | break; | ||
616 | case TGSI_OPCODE_SUB: | ||
617 | spe_fs(gen->f, d_reg[ch], s1_reg[ch], s2_reg[ch]); | ||
618 | break; | ||
619 | case TGSI_OPCODE_MUL: | ||
620 | spe_fm(gen->f, d_reg[ch], s1_reg[ch], s2_reg[ch]); | ||
621 | break; | ||
622 | default: | ||
623 | ; | ||
624 | } | ||
625 | } | ||
626 | |||
627 | /* Store the result (a no-op for TGSI_FILE_TEMPORARY dests) */ | ||
628 | FOR_EACH_ENABLED_CHANNEL(inst, ch) { | ||
629 | store_dest_reg(gen, d_reg[ch], ch, &inst->Dst[0]); | ||
630 | } | ||
631 | |||
632 | /* Free any intermediate temps we allocated */ | ||
633 | free_itemps(gen); | ||
634 | |||
635 | return TRUE; | ||
636 | } | ||
637 | |||
638 | |||
639 | /** | ||
640 | * Emit multiply add. See emit_ADD for comments. | ||
641 | */ | ||
642 | static boolean | ||
643 | emit_MAD(struct codegen *gen, const struct tgsi_full_instruction *inst) | ||
644 | { | ||
645 | int ch, s1_reg[4], s2_reg[4], s3_reg[4], d_reg[4]; | ||
646 | |||
647 | FOR_EACH_ENABLED_CHANNEL(inst, ch) { | ||
648 | s1_reg[ch] = get_src_reg(gen, ch, &inst->Src[0]); | ||
649 | s2_reg[ch] = get_src_reg(gen, ch, &inst->Src[1]); | ||
650 | s3_reg[ch] = get_src_reg(gen, ch, &inst->Src[2]); | ||
651 | d_reg[ch] = get_dst_reg(gen, ch, &inst->Dst[0]); | ||
652 | } | ||
653 | FOR_EACH_ENABLED_CHANNEL(inst, ch) { | ||
654 | spe_fma(gen->f, d_reg[ch], s1_reg[ch], s2_reg[ch], s3_reg[ch]); | ||
655 | } | ||
656 | FOR_EACH_ENABLED_CHANNEL(inst, ch) { | ||
657 | store_dest_reg(gen, d_reg[ch], ch, &inst->Dst[0]); | ||
658 | } | ||
659 | free_itemps(gen); | ||
660 | return TRUE; | ||
661 | } | ||
662 | |||
663 | |||
664 | /** | ||
665 | * Emit linear interpolate. See emit_ADD for comments. | ||
666 | */ | ||
667 | static boolean | ||
668 | emit_LRP(struct codegen *gen, const struct tgsi_full_instruction *inst) | ||
669 | { | ||
670 | int ch, s1_reg[4], s2_reg[4], s3_reg[4], d_reg[4], tmp_reg[4]; | ||
671 | |||
672 | /* setup/get src/dst/temp regs */ | ||
673 | FOR_EACH_ENABLED_CHANNEL(inst, ch) { | ||
674 | s1_reg[ch] = get_src_reg(gen, ch, &inst->Src[0]); | ||
675 | s2_reg[ch] = get_src_reg(gen, ch, &inst->Src[1]); | ||
676 | s3_reg[ch] = get_src_reg(gen, ch, &inst->Src[2]); | ||
677 | d_reg[ch] = get_dst_reg(gen, ch, &inst->Dst[0]); | ||
678 | tmp_reg[ch] = get_itemp(gen); | ||
679 | } | ||
680 | |||
681 | /* d = s3 + s1(s2 - s3) */ | ||
682 | /* do all subtracts, then all fma, then all stores to better pipeline */ | ||
683 | FOR_EACH_ENABLED_CHANNEL(inst, ch) { | ||
684 | spe_fs(gen->f, tmp_reg[ch], s2_reg[ch], s3_reg[ch]); | ||
685 | } | ||
686 | FOR_EACH_ENABLED_CHANNEL(inst, ch) { | ||
687 | spe_fma(gen->f, d_reg[ch], tmp_reg[ch], s1_reg[ch], s3_reg[ch]); | ||
688 | } | ||
689 | FOR_EACH_ENABLED_CHANNEL(inst, ch) { | ||
690 | store_dest_reg(gen, d_reg[ch], ch, &inst->Dst[0]); | ||
691 | } | ||
692 | free_itemps(gen); | ||
693 | return TRUE; | ||
694 | } | ||
695 | |||
696 | |||
697 | |||
698 | /** | ||
699 | * Emit reciprocal or recip sqrt. | ||
700 | */ | ||
701 | static boolean | ||
702 | emit_RCP_RSQ(struct codegen *gen, const struct tgsi_full_instruction *inst) | ||
703 | { | ||
704 | int ch, s1_reg[4], d_reg[4], tmp_reg[4]; | ||
705 | |||
706 | FOR_EACH_ENABLED_CHANNEL(inst, ch) { | ||
707 | s1_reg[ch] = get_src_reg(gen, ch, &inst->Src[0]); | ||
708 | d_reg[ch] = get_dst_reg(gen, ch, &inst->Dst[0]); | ||
709 | tmp_reg[ch] = get_itemp(gen); | ||
710 | } | ||
711 | |||
712 | FOR_EACH_ENABLED_CHANNEL(inst, ch) { | ||
713 | if (inst->Instruction.Opcode == TGSI_OPCODE_RCP) { | ||
714 | /* tmp = 1/s1 */ | ||
715 | spe_frest(gen->f, tmp_reg[ch], s1_reg[ch]); | ||
716 | } | ||
717 | else { | ||
718 | /* tmp = 1/sqrt(s1) */ | ||
719 | spe_frsqest(gen->f, tmp_reg[ch], s1_reg[ch]); | ||
720 | } | ||
721 | } | ||
722 | |||
723 | FOR_EACH_ENABLED_CHANNEL(inst, ch) { | ||
724 | /* d = float_interp(s1, tmp) */ | ||
725 | spe_fi(gen->f, d_reg[ch], s1_reg[ch], tmp_reg[ch]); | ||
726 | } | ||
727 | |||
728 | FOR_EACH_ENABLED_CHANNEL(inst, ch) { | ||
729 | store_dest_reg(gen, d_reg[ch], ch, &inst->Dst[0]); | ||
730 | } | ||
731 | |||
732 | free_itemps(gen); | ||
733 | return TRUE; | ||
734 | } | ||
735 | |||
736 | |||
737 | /** | ||
738 | * Emit absolute value. See emit_ADD for comments. | ||
739 | */ | ||
740 | static boolean | ||
741 | emit_ABS(struct codegen *gen, const struct tgsi_full_instruction *inst) | ||
742 | { | ||
743 | int ch, s1_reg[4], d_reg[4]; | ||
744 | const int bit31mask_reg = get_itemp(gen); | ||
745 | |||
746 | /* mask with bit 31 set, the rest cleared */ | ||
747 | spe_load_uint(gen->f, bit31mask_reg, (1 << 31)); | ||
748 | |||
749 | FOR_EACH_ENABLED_CHANNEL(inst, ch) { | ||
750 | s1_reg[ch] = get_src_reg(gen, ch, &inst->Src[0]); | ||
751 | d_reg[ch] = get_dst_reg(gen, ch, &inst->Dst[0]); | ||
752 | } | ||
753 | |||
754 | /* d = sign bit cleared in s1 */ | ||
755 | FOR_EACH_ENABLED_CHANNEL(inst, ch) { | ||
756 | spe_andc(gen->f, d_reg[ch], s1_reg[ch], bit31mask_reg); | ||
757 | } | ||
758 | |||
759 | FOR_EACH_ENABLED_CHANNEL(inst, ch) { | ||
760 | store_dest_reg(gen, d_reg[ch], ch, &inst->Dst[0]); | ||
761 | } | ||
762 | |||
763 | free_itemps(gen); | ||
764 | return TRUE; | ||
765 | } | ||
766 | |||
767 | /** | ||
768 | * Emit 3 component dot product. See emit_ADD for comments. | ||
769 | */ | ||
770 | static boolean | ||
771 | emit_DP3(struct codegen *gen, const struct tgsi_full_instruction *inst) | ||
772 | { | ||
773 | int ch; | ||
774 | int s1x_reg, s1y_reg, s1z_reg; | ||
775 | int s2x_reg, s2y_reg, s2z_reg; | ||
776 | int t0_reg = get_itemp(gen), t1_reg = get_itemp(gen); | ||
777 | |||
778 | s1x_reg = get_src_reg(gen, CHAN_X, &inst->Src[0]); | ||
779 | s2x_reg = get_src_reg(gen, CHAN_X, &inst->Src[1]); | ||
780 | s1y_reg = get_src_reg(gen, CHAN_Y, &inst->Src[0]); | ||
781 | s2y_reg = get_src_reg(gen, CHAN_Y, &inst->Src[1]); | ||
782 | s1z_reg = get_src_reg(gen, CHAN_Z, &inst->Src[0]); | ||
783 | s2z_reg = get_src_reg(gen, CHAN_Z, &inst->Src[1]); | ||
784 | |||
785 | /* t0 = x0 * x1 */ | ||
786 | spe_fm(gen->f, t0_reg, s1x_reg, s2x_reg); | ||
787 | |||
788 | /* t1 = y0 * y1 */ | ||
789 | spe_fm(gen->f, t1_reg, s1y_reg, s2y_reg); | ||
790 | |||
791 | /* t0 = z0 * z1 + t0 */ | ||
792 | spe_fma(gen->f, t0_reg, s1z_reg, s2z_reg, t0_reg); | ||
793 | |||
794 | /* t0 = t0 + t1 */ | ||
795 | spe_fa(gen->f, t0_reg, t0_reg, t1_reg); | ||
796 | |||
797 | FOR_EACH_ENABLED_CHANNEL(inst, ch) { | ||
798 | int d_reg = get_dst_reg(gen, ch, &inst->Dst[0]); | ||
799 | spe_move(gen->f, d_reg, t0_reg); | ||
800 | store_dest_reg(gen, d_reg, ch, &inst->Dst[0]); | ||
801 | } | ||
802 | |||
803 | free_itemps(gen); | ||
804 | return TRUE; | ||
805 | } | ||
806 | |||
807 | /** | ||
808 | * Emit 4 component dot product. See emit_ADD for comments. | ||
809 | */ | ||
810 | static boolean | ||
811 | emit_DP4(struct codegen *gen, const struct tgsi_full_instruction *inst) | ||
812 | { | ||
813 | int ch; | ||
814 | int s0x_reg, s0y_reg, s0z_reg, s0w_reg; | ||
815 | int s1x_reg, s1y_reg, s1z_reg, s1w_reg; | ||
816 | int t0_reg = get_itemp(gen), t1_reg = get_itemp(gen); | ||
817 | |||
818 | s0x_reg = get_src_reg(gen, CHAN_X, &inst->Src[0]); | ||
819 | s1x_reg = get_src_reg(gen, CHAN_X, &inst->Src[1]); | ||
820 | s0y_reg = get_src_reg(gen, CHAN_Y, &inst->Src[0]); | ||
821 | s1y_reg = get_src_reg(gen, CHAN_Y, &inst->Src[1]); | ||
822 | s0z_reg = get_src_reg(gen, CHAN_Z, &inst->Src[0]); | ||
823 | s1z_reg = get_src_reg(gen, CHAN_Z, &inst->Src[1]); | ||
824 | s0w_reg = get_src_reg(gen, CHAN_W, &inst->Src[0]); | ||
825 | s1w_reg = get_src_reg(gen, CHAN_W, &inst->Src[1]); | ||
826 | |||
827 | /* t0 = x0 * x1 */ | ||
828 | spe_fm(gen->f, t0_reg, s0x_reg, s1x_reg); | ||
829 | |||
830 | /* t1 = y0 * y1 */ | ||
831 | spe_fm(gen->f, t1_reg, s0y_reg, s1y_reg); | ||
832 | |||
833 | /* t0 = z0 * z1 + t0 */ | ||
834 | spe_fma(gen->f, t0_reg, s0z_reg, s1z_reg, t0_reg); | ||
835 | |||
836 | /* t1 = w0 * w1 + t1 */ | ||
837 | spe_fma(gen->f, t1_reg, s0w_reg, s1w_reg, t1_reg); | ||
838 | |||
839 | /* t0 = t0 + t1 */ | ||
840 | spe_fa(gen->f, t0_reg, t0_reg, t1_reg); | ||
841 | |||
842 | FOR_EACH_ENABLED_CHANNEL(inst, ch) { | ||
843 | int d_reg = get_dst_reg(gen, ch, &inst->Dst[0]); | ||
844 | spe_move(gen->f, d_reg, t0_reg); | ||
845 | store_dest_reg(gen, d_reg, ch, &inst->Dst[0]); | ||
846 | } | ||
847 | |||
848 | free_itemps(gen); | ||
849 | return TRUE; | ||
850 | } | ||
851 | |||
852 | /** | ||
853 | * Emit homogeneous dot product. See emit_ADD for comments. | ||
854 | */ | ||
855 | static boolean | ||
856 | emit_DPH(struct codegen *gen, const struct tgsi_full_instruction *inst) | ||
857 | { | ||
858 | /* XXX rewrite this function to look more like DP3/DP4 */ | ||
859 | int ch; | ||
860 | int s1_reg = get_src_reg(gen, CHAN_X, &inst->Src[0]); | ||
861 | int s2_reg = get_src_reg(gen, CHAN_X, &inst->Src[1]); | ||
862 | int tmp_reg = get_itemp(gen); | ||
863 | |||
864 | /* t = x0 * x1 */ | ||
865 | spe_fm(gen->f, tmp_reg, s1_reg, s2_reg); | ||
866 | |||
867 | s1_reg = get_src_reg(gen, CHAN_Y, &inst->Src[0]); | ||
868 | s2_reg = get_src_reg(gen, CHAN_Y, &inst->Src[1]); | ||
869 | /* t = y0 * y1 + t */ | ||
870 | spe_fma(gen->f, tmp_reg, s1_reg, s2_reg, tmp_reg); | ||
871 | |||
872 | s1_reg = get_src_reg(gen, CHAN_Z, &inst->Src[0]); | ||
873 | s2_reg = get_src_reg(gen, CHAN_Z, &inst->Src[1]); | ||
874 | /* t = z0 * z1 + t */ | ||
875 | spe_fma(gen->f, tmp_reg, s1_reg, s2_reg, tmp_reg); | ||
876 | |||
877 | s2_reg = get_src_reg(gen, CHAN_W, &inst->Src[1]); | ||
878 | /* t = w1 + t */ | ||
879 | spe_fa(gen->f, tmp_reg, s2_reg, tmp_reg); | ||
880 | |||
881 | FOR_EACH_ENABLED_CHANNEL(inst, ch) { | ||
882 | int d_reg = get_dst_reg(gen, ch, &inst->Dst[0]); | ||
883 | spe_move(gen->f, d_reg, tmp_reg); | ||
884 | store_dest_reg(gen, tmp_reg, ch, &inst->Dst[0]); | ||
885 | } | ||
886 | |||
887 | free_itemps(gen); | ||
888 | return TRUE; | ||
889 | } | ||
890 | |||
891 | /** | ||
892 | * Emit 3-component vector normalize. | ||
893 | */ | ||
894 | static boolean | ||
895 | emit_NRM3(struct codegen *gen, const struct tgsi_full_instruction *inst) | ||
896 | { | ||
897 | int ch; | ||
898 | int src_reg[3]; | ||
899 | int t0_reg = get_itemp(gen), t1_reg = get_itemp(gen); | ||
900 | |||
901 | src_reg[0] = get_src_reg(gen, CHAN_X, &inst->Src[0]); | ||
902 | src_reg[1] = get_src_reg(gen, CHAN_Y, &inst->Src[0]); | ||
903 | src_reg[2] = get_src_reg(gen, CHAN_Z, &inst->Src[0]); | ||
904 | |||
905 | /* t0 = x * x */ | ||
906 | spe_fm(gen->f, t0_reg, src_reg[0], src_reg[0]); | ||
907 | |||
908 | /* t1 = y * y */ | ||
909 | spe_fm(gen->f, t1_reg, src_reg[1], src_reg[1]); | ||
910 | |||
911 | /* t0 = z * z + t0 */ | ||
912 | spe_fma(gen->f, t0_reg, src_reg[2], src_reg[2], t0_reg); | ||
913 | |||
914 | /* t0 = t0 + t1 */ | ||
915 | spe_fa(gen->f, t0_reg, t0_reg, t1_reg); | ||
916 | |||
917 | /* t1 = 1.0 / sqrt(t0) */ | ||
918 | spe_frsqest(gen->f, t1_reg, t0_reg); | ||
919 | spe_fi(gen->f, t1_reg, t0_reg, t1_reg); | ||
920 | |||
921 | FOR_EACH_ENABLED_CHANNEL(inst, ch) { | ||
922 | int d_reg = get_dst_reg(gen, ch, &inst->Dst[0]); | ||
923 | /* dst = src[ch] * t1 */ | ||
924 | spe_fm(gen->f, d_reg, src_reg[ch], t1_reg); | ||
925 | store_dest_reg(gen, d_reg, ch, &inst->Dst[0]); | ||
926 | } | ||
927 | |||
928 | free_itemps(gen); | ||
929 | return TRUE; | ||
930 | } | ||
931 | |||
932 | |||
933 | /** | ||
934 | * Emit cross product. See emit_ADD for comments. | ||
935 | */ | ||
936 | static boolean | ||
937 | emit_XPD(struct codegen *gen, const struct tgsi_full_instruction *inst) | ||
938 | { | ||
939 | int s1_reg = get_src_reg(gen, CHAN_Z, &inst->Src[0]); | ||
940 | int s2_reg = get_src_reg(gen, CHAN_Y, &inst->Src[1]); | ||
941 | int tmp_reg = get_itemp(gen); | ||
942 | |||
943 | /* t = z0 * y1 */ | ||
944 | spe_fm(gen->f, tmp_reg, s1_reg, s2_reg); | ||
945 | |||
946 | s1_reg = get_src_reg(gen, CHAN_Y, &inst->Src[0]); | ||
947 | s2_reg = get_src_reg(gen, CHAN_Z, &inst->Src[1]); | ||
948 | /* t = y0 * z1 - t */ | ||
949 | spe_fms(gen->f, tmp_reg, s1_reg, s2_reg, tmp_reg); | ||
950 | |||
951 | if (inst->Dst[0].Register.WriteMask & (1 << CHAN_X)) { | ||
952 | store_dest_reg(gen, tmp_reg, CHAN_X, &inst->Dst[0]); | ||
953 | } | ||
954 | |||
955 | s1_reg = get_src_reg(gen, CHAN_X, &inst->Src[0]); | ||
956 | s2_reg = get_src_reg(gen, CHAN_Z, &inst->Src[1]); | ||
957 | /* t = x0 * z1 */ | ||
958 | spe_fm(gen->f, tmp_reg, s1_reg, s2_reg); | ||
959 | |||
960 | s1_reg = get_src_reg(gen, CHAN_Z, &inst->Src[0]); | ||
961 | s2_reg = get_src_reg(gen, CHAN_X, &inst->Src[1]); | ||
962 | /* t = z0 * x1 - t */ | ||
963 | spe_fms(gen->f, tmp_reg, s1_reg, s2_reg, tmp_reg); | ||
964 | |||
965 | if (inst->Dst[0].Register.WriteMask & (1 << CHAN_Y)) { | ||
966 | store_dest_reg(gen, tmp_reg, CHAN_Y, &inst->Dst[0]); | ||
967 | } | ||
968 | |||
969 | s1_reg = get_src_reg(gen, CHAN_Y, &inst->Src[0]); | ||
970 | s2_reg = get_src_reg(gen, CHAN_X, &inst->Src[1]); | ||
971 | /* t = y0 * x1 */ | ||
972 | spe_fm(gen->f, tmp_reg, s1_reg, s2_reg); | ||
973 | |||
974 | s1_reg = get_src_reg(gen, CHAN_X, &inst->Src[0]); | ||
975 | s2_reg = get_src_reg(gen, CHAN_Y, &inst->Src[1]); | ||
976 | /* t = x0 * y1 - t */ | ||
977 | spe_fms(gen->f, tmp_reg, s1_reg, s2_reg, tmp_reg); | ||
978 | |||
979 | if (inst->Dst[0].Register.WriteMask & (1 << CHAN_Z)) { | ||
980 | store_dest_reg(gen, tmp_reg, CHAN_Z, &inst->Dst[0]); | ||
981 | } | ||
982 | |||
983 | free_itemps(gen); | ||
984 | return TRUE; | ||
985 | } | ||
986 | |||
987 | |||
988 | /** | ||
989 | * Emit inequality instruction. | ||
990 | * Note that the SPE fcgt instruction produces 0x0 and 0xffffffff as | ||
991 | * the result but OpenGL/TGSI needs 0.0 and 1.0 results. | ||
992 | * We can easily convert 0x0/0xffffffff to 0.0/1.0 with a bitwise AND. | ||
993 | */ | ||
994 | static boolean | ||
995 | emit_inequality(struct codegen *gen, const struct tgsi_full_instruction *inst) | ||
996 | { | ||
997 | int ch, s1_reg[4], s2_reg[4], d_reg[4], one_reg; | ||
998 | boolean complement = FALSE; | ||
999 | |||
1000 | one_reg = get_const_one_reg(gen); | ||
1001 | |||
1002 | FOR_EACH_ENABLED_CHANNEL(inst, ch) { | ||
1003 | s1_reg[ch] = get_src_reg(gen, ch, &inst->Src[0]); | ||
1004 | s2_reg[ch] = get_src_reg(gen, ch, &inst->Src[1]); | ||
1005 | d_reg[ch] = get_dst_reg(gen, ch, &inst->Dst[0]); | ||
1006 | } | ||
1007 | |||
1008 | FOR_EACH_ENABLED_CHANNEL(inst, ch) { | ||
1009 | switch (inst->Instruction.Opcode) { | ||
1010 | case TGSI_OPCODE_SGT: | ||
1011 | spe_fcgt(gen->f, d_reg[ch], s1_reg[ch], s2_reg[ch]); | ||
1012 | break; | ||
1013 | case TGSI_OPCODE_SLT: | ||
1014 | spe_fcgt(gen->f, d_reg[ch], s2_reg[ch], s1_reg[ch]); | ||
1015 | break; | ||
1016 | case TGSI_OPCODE_SGE: | ||
1017 | spe_fcgt(gen->f, d_reg[ch], s2_reg[ch], s1_reg[ch]); | ||
1018 | complement = TRUE; | ||
1019 | break; | ||
1020 | case TGSI_OPCODE_SLE: | ||
1021 | spe_fcgt(gen->f, d_reg[ch], s1_reg[ch], s2_reg[ch]); | ||
1022 | complement = TRUE; | ||
1023 | break; | ||
1024 | case TGSI_OPCODE_SEQ: | ||
1025 | spe_fceq(gen->f, d_reg[ch], s1_reg[ch], s2_reg[ch]); | ||
1026 | break; | ||
1027 | case TGSI_OPCODE_SNE: | ||
1028 | spe_fceq(gen->f, d_reg[ch], s1_reg[ch], s2_reg[ch]); | ||
1029 | complement = TRUE; | ||
1030 | break; | ||
1031 | default: | ||
1032 | assert(0); | ||
1033 | } | ||
1034 | } | ||
1035 | |||
1036 | /* convert d from 0x0/0xffffffff to 0.0/1.0 */ | ||
1037 | FOR_EACH_ENABLED_CHANNEL(inst, ch) { | ||
1038 | /* d = d & one_reg */ | ||
1039 | if (complement) | ||
1040 | spe_andc(gen->f, d_reg[ch], one_reg, d_reg[ch]); | ||
1041 | else | ||
1042 | spe_and(gen->f, d_reg[ch], one_reg, d_reg[ch]); | ||
1043 | } | ||
1044 | |||
1045 | FOR_EACH_ENABLED_CHANNEL(inst, ch) { | ||
1046 | store_dest_reg(gen, d_reg[ch], ch, &inst->Dst[0]); | ||
1047 | } | ||
1048 | |||
1049 | free_itemps(gen); | ||
1050 | return TRUE; | ||
1051 | } | ||
1052 | |||
1053 | |||
1054 | /** | ||
1055 | * Emit compare. | ||
1056 | */ | ||
1057 | static boolean | ||
1058 | emit_CMP(struct codegen *gen, const struct tgsi_full_instruction *inst) | ||
1059 | { | ||
1060 | int ch; | ||
1061 | |||
1062 | FOR_EACH_ENABLED_CHANNEL(inst, ch) { | ||
1063 | int s1_reg = get_src_reg(gen, ch, &inst->Src[0]); | ||
1064 | int s2_reg = get_src_reg(gen, ch, &inst->Src[1]); | ||
1065 | int s3_reg = get_src_reg(gen, ch, &inst->Src[2]); | ||
1066 | int d_reg = get_dst_reg(gen, ch, &inst->Dst[0]); | ||
1067 | int zero_reg = get_itemp(gen); | ||
1068 | |||
1069 | spe_zero(gen->f, zero_reg); | ||
1070 | |||
1071 | /* d = (s1 < 0) ? s2 : s3 */ | ||
1072 | spe_fcgt(gen->f, d_reg, zero_reg, s1_reg); | ||
1073 | spe_selb(gen->f, d_reg, s3_reg, s2_reg, d_reg); | ||
1074 | |||
1075 | store_dest_reg(gen, d_reg, ch, &inst->Dst[0]); | ||
1076 | free_itemps(gen); | ||
1077 | } | ||
1078 | |||
1079 | return TRUE; | ||
1080 | } | ||
1081 | |||
1082 | /** | ||
1083 | * Emit trunc. | ||
1084 | * Convert float to signed int | ||
1085 | * Convert signed int to float | ||
1086 | */ | ||
1087 | static boolean | ||
1088 | emit_TRUNC(struct codegen *gen, const struct tgsi_full_instruction *inst) | ||
1089 | { | ||
1090 | int ch, s1_reg[4], d_reg[4]; | ||
1091 | |||
1092 | FOR_EACH_ENABLED_CHANNEL(inst, ch) { | ||
1093 | s1_reg[ch] = get_src_reg(gen, ch, &inst->Src[0]); | ||
1094 | d_reg[ch] = get_dst_reg(gen, ch, &inst->Dst[0]); | ||
1095 | } | ||
1096 | |||
1097 | /* Convert float to int */ | ||
1098 | FOR_EACH_ENABLED_CHANNEL(inst, ch) { | ||
1099 | spe_cflts(gen->f, d_reg[ch], s1_reg[ch], 0); | ||
1100 | } | ||
1101 | |||
1102 | /* Convert int to float */ | ||
1103 | FOR_EACH_ENABLED_CHANNEL(inst, ch) { | ||
1104 | spe_csflt(gen->f, d_reg[ch], d_reg[ch], 0); | ||
1105 | } | ||
1106 | |||
1107 | FOR_EACH_ENABLED_CHANNEL(inst, ch) { | ||
1108 | store_dest_reg(gen, d_reg[ch], ch, &inst->Dst[0]); | ||
1109 | } | ||
1110 | |||
1111 | free_itemps(gen); | ||
1112 | return TRUE; | ||
1113 | } | ||
1114 | |||
1115 | |||
1116 | /** | ||
1117 | * Emit floor. | ||
1118 | * If negative int subtract one | ||
1119 | * Convert float to signed int | ||
1120 | * Convert signed int to float | ||
1121 | */ | ||
1122 | static boolean | ||
1123 | emit_FLR(struct codegen *gen, const struct tgsi_full_instruction *inst) | ||
1124 | { | ||
1125 | int ch, s1_reg[4], d_reg[4], tmp_reg[4], zero_reg, one_reg; | ||
1126 | |||
1127 | zero_reg = get_itemp(gen); | ||
1128 | spe_zero(gen->f, zero_reg); | ||
1129 | one_reg = get_const_one_reg(gen); | ||
1130 | |||
1131 | FOR_EACH_ENABLED_CHANNEL(inst, ch) { | ||
1132 | s1_reg[ch] = get_src_reg(gen, ch, &inst->Src[0]); | ||
1133 | d_reg[ch] = get_dst_reg(gen, ch, &inst->Dst[0]); | ||
1134 | tmp_reg[ch] = get_itemp(gen); | ||
1135 | } | ||
1136 | |||
1137 | /* If negative, subtract 1.0 */ | ||
1138 | FOR_EACH_ENABLED_CHANNEL(inst, ch) { | ||
1139 | spe_fcgt(gen->f, tmp_reg[ch], zero_reg, s1_reg[ch]); | ||
1140 | } | ||
1141 | FOR_EACH_ENABLED_CHANNEL(inst, ch) { | ||
1142 | spe_selb(gen->f, tmp_reg[ch], zero_reg, one_reg, tmp_reg[ch]); | ||
1143 | } | ||
1144 | FOR_EACH_ENABLED_CHANNEL(inst, ch) { | ||
1145 | spe_fs(gen->f, tmp_reg[ch], s1_reg[ch], tmp_reg[ch]); | ||
1146 | } | ||
1147 | |||
1148 | /* Convert float to int */ | ||
1149 | FOR_EACH_ENABLED_CHANNEL(inst, ch) { | ||
1150 | spe_cflts(gen->f, tmp_reg[ch], tmp_reg[ch], 0); | ||
1151 | } | ||
1152 | |||
1153 | /* Convert int to float */ | ||
1154 | FOR_EACH_ENABLED_CHANNEL(inst, ch) { | ||
1155 | spe_csflt(gen->f, d_reg[ch], tmp_reg[ch], 0); | ||
1156 | } | ||
1157 | |||
1158 | FOR_EACH_ENABLED_CHANNEL(inst, ch) { | ||
1159 | store_dest_reg(gen, d_reg[ch], ch, &inst->Dst[0]); | ||
1160 | } | ||
1161 | |||
1162 | free_itemps(gen); | ||
1163 | return TRUE; | ||
1164 | } | ||
1165 | |||
1166 | |||
1167 | /** | ||
1168 | * Compute frac = Input - FLR(Input) | ||
1169 | */ | ||
1170 | static boolean | ||
1171 | emit_FRC(struct codegen *gen, const struct tgsi_full_instruction *inst) | ||
1172 | { | ||
1173 | int ch, s1_reg[4], d_reg[4], tmp_reg[4], zero_reg, one_reg; | ||
1174 | |||
1175 | zero_reg = get_itemp(gen); | ||
1176 | spe_zero(gen->f, zero_reg); | ||
1177 | one_reg = get_const_one_reg(gen); | ||
1178 | |||
1179 | FOR_EACH_ENABLED_CHANNEL(inst, ch) { | ||
1180 | s1_reg[ch] = get_src_reg(gen, ch, &inst->Src[0]); | ||
1181 | d_reg[ch] = get_dst_reg(gen, ch, &inst->Dst[0]); | ||
1182 | tmp_reg[ch] = get_itemp(gen); | ||
1183 | } | ||
1184 | |||
1185 | /* If negative, subtract 1.0 */ | ||
1186 | FOR_EACH_ENABLED_CHANNEL(inst, ch) { | ||
1187 | spe_fcgt(gen->f, tmp_reg[ch], zero_reg, s1_reg[ch]); | ||
1188 | } | ||
1189 | FOR_EACH_ENABLED_CHANNEL(inst, ch) { | ||
1190 | spe_selb(gen->f, tmp_reg[ch], zero_reg, one_reg, tmp_reg[ch]); | ||
1191 | } | ||
1192 | FOR_EACH_ENABLED_CHANNEL(inst, ch) { | ||
1193 | spe_fs(gen->f, tmp_reg[ch], s1_reg[ch], tmp_reg[ch]); | ||
1194 | } | ||
1195 | |||
1196 | /* Convert float to int */ | ||
1197 | FOR_EACH_ENABLED_CHANNEL(inst, ch) { | ||
1198 | spe_cflts(gen->f, tmp_reg[ch], tmp_reg[ch], 0); | ||
1199 | } | ||
1200 | |||
1201 | /* Convert int to float */ | ||
1202 | FOR_EACH_ENABLED_CHANNEL(inst, ch) { | ||
1203 | spe_csflt(gen->f, tmp_reg[ch], tmp_reg[ch], 0); | ||
1204 | } | ||
1205 | |||
1206 | /* d = s1 - FLR(s1) */ | ||
1207 | FOR_EACH_ENABLED_CHANNEL(inst, ch) { | ||
1208 | spe_fs(gen->f, d_reg[ch], s1_reg[ch], tmp_reg[ch]); | ||
1209 | } | ||
1210 | |||
1211 | /* store result */ | ||
1212 | FOR_EACH_ENABLED_CHANNEL(inst, ch) { | ||
1213 | store_dest_reg(gen, d_reg[ch], ch, &inst->Dst[0]); | ||
1214 | } | ||
1215 | |||
1216 | free_itemps(gen); | ||
1217 | return TRUE; | ||
1218 | } | ||
1219 | |||
1220 | |||
1221 | #if 0 | ||
1222 | static void | ||
1223 | print_functions(struct cell_context *cell) | ||
1224 | { | ||
1225 | struct cell_spu_function_info *funcs = &cell->spu_functions; | ||
1226 | uint i; | ||
1227 | for (i = 0; i < funcs->num; i++) { | ||
1228 | printf("SPU func %u: %s at %u\n", | ||
1229 | i, funcs->names[i], funcs->addrs[i]); | ||
1230 | } | ||
1231 | } | ||
1232 | #endif | ||
1233 | |||
1234 | |||
1235 | static uint | ||
1236 | lookup_function(struct cell_context *cell, const char *funcname) | ||
1237 | { | ||
1238 | const struct cell_spu_function_info *funcs = &cell->spu_functions; | ||
1239 | uint i, addr = 0; | ||
1240 | for (i = 0; i < funcs->num; i++) { | ||
1241 | if (strcmp(funcs->names[i], funcname) == 0) { | ||
1242 | addr = funcs->addrs[i]; | ||
1243 | } | ||
1244 | } | ||
1245 | assert(addr && "spu function not found"); | ||
1246 | return addr / 4; /* discard 2 least significant bits */ | ||
1247 | } | ||
1248 | |||
1249 | |||
1250 | /** | ||
1251 | * Emit code to call a SPU function. | ||
1252 | * Used to implement instructions like SIN/COS/POW/TEX/etc. | ||
1253 | * If scalar, only the X components of the src regs are used, and the | ||
1254 | * result is replicated across the dest register's XYZW components. | ||
1255 | */ | ||
1256 | static boolean | ||
1257 | emit_function_call(struct codegen *gen, | ||
1258 | const struct tgsi_full_instruction *inst, | ||
1259 | char *funcname, uint num_args, boolean scalar) | ||
1260 | { | ||
1261 | const uint addr = lookup_function(gen->cell, funcname); | ||
1262 | char comment[100]; | ||
1263 | int s_regs[3]; | ||
1264 | int func_called = FALSE; | ||
1265 | uint a, ch; | ||
1266 | int retval_reg = -1; | ||
1267 | |||
1268 | assert(num_args <= 3); | ||
1269 | |||
1270 | snprintf(comment, sizeof(comment), "CALL %s:", funcname); | ||
1271 | spe_comment(gen->f, -4, comment); | ||
1272 | |||
1273 | if (scalar) { | ||
1274 | for (a = 0; a < num_args; a++) { | ||
1275 | s_regs[a] = get_src_reg(gen, CHAN_X, &inst->Src[a]); | ||
1276 | } | ||
1277 | /* we'll call the function, put the return value in this register, | ||
1278 | * then replicate it across all write-enabled components in d_reg. | ||
1279 | */ | ||
1280 | retval_reg = spe_allocate_available_register(gen->f); | ||
1281 | } | ||
1282 | |||
1283 | FOR_EACH_ENABLED_CHANNEL(inst, ch) { | ||
1284 | int d_reg; | ||
1285 | ubyte usedRegs[SPE_NUM_REGS]; | ||
1286 | uint i, numUsed; | ||
1287 | |||
1288 | if (!scalar) { | ||
1289 | for (a = 0; a < num_args; a++) { | ||
1290 | s_regs[a] = get_src_reg(gen, ch, &inst->Src[a]); | ||
1291 | } | ||
1292 | } | ||
1293 | |||
1294 | d_reg = get_dst_reg(gen, ch, &inst->Dst[0]); | ||
1295 | |||
1296 | if (!scalar || !func_called) { | ||
1297 | /* for a scalar function, we'll really only call the function once */ | ||
1298 | |||
1299 | numUsed = spe_get_registers_used(gen->f, usedRegs); | ||
1300 | assert(numUsed < gen->frame_size / 16 - 2); | ||
1301 | |||
1302 | /* save registers to stack */ | ||
1303 | for (i = 0; i < numUsed; i++) { | ||
1304 | uint reg = usedRegs[i]; | ||
1305 | int offset = 2 + i; | ||
1306 | spe_stqd(gen->f, reg, SPE_REG_SP, 16 * offset); | ||
1307 | } | ||
1308 | |||
1309 | /* setup function arguments */ | ||
1310 | for (a = 0; a < num_args; a++) { | ||
1311 | spe_move(gen->f, 3 + a, s_regs[a]); | ||
1312 | } | ||
1313 | |||
1314 | /* branch to function, save return addr */ | ||
1315 | spe_brasl(gen->f, SPE_REG_RA, addr); | ||
1316 | |||
1317 | /* save function's return value */ | ||
1318 | if (scalar) | ||
1319 | spe_move(gen->f, retval_reg, 3); | ||
1320 | else | ||
1321 | spe_move(gen->f, d_reg, 3); | ||
1322 | |||
1323 | /* restore registers from stack */ | ||
1324 | for (i = 0; i < numUsed; i++) { | ||
1325 | uint reg = usedRegs[i]; | ||
1326 | if (reg != d_reg && reg != retval_reg) { | ||
1327 | int offset = 2 + i; | ||
1328 | spe_lqd(gen->f, reg, SPE_REG_SP, 16 * offset); | ||
1329 | } | ||
1330 | } | ||
1331 | |||
1332 | func_called = TRUE; | ||
1333 | } | ||
1334 | |||
1335 | if (scalar) { | ||
1336 | spe_move(gen->f, d_reg, retval_reg); | ||
1337 | } | ||
1338 | |||
1339 | store_dest_reg(gen, d_reg, ch, &inst->Dst[0]); | ||
1340 | free_itemps(gen); | ||
1341 | } | ||
1342 | |||
1343 | if (scalar) { | ||
1344 | spe_release_register(gen->f, retval_reg); | ||
1345 | } | ||
1346 | |||
1347 | return TRUE; | ||
1348 | } | ||
1349 | |||
1350 | |||
1351 | static boolean | ||
1352 | emit_TEX(struct codegen *gen, const struct tgsi_full_instruction *inst) | ||
1353 | { | ||
1354 | const uint target = inst->Texture.Texture; | ||
1355 | const uint unit = inst->Src[1].Register.Index; | ||
1356 | uint addr; | ||
1357 | int ch; | ||
1358 | int coord_regs[4], d_regs[4]; | ||
1359 | |||
1360 | switch (target) { | ||
1361 | case TGSI_TEXTURE_1D: | ||
1362 | case TGSI_TEXTURE_2D: | ||
1363 | addr = lookup_function(gen->cell, "spu_tex_2d"); | ||
1364 | break; | ||
1365 | case TGSI_TEXTURE_3D: | ||
1366 | addr = lookup_function(gen->cell, "spu_tex_3d"); | ||
1367 | break; | ||
1368 | case TGSI_TEXTURE_CUBE: | ||
1369 | addr = lookup_function(gen->cell, "spu_tex_cube"); | ||
1370 | break; | ||
1371 | default: | ||
1372 | ASSERT(0 && "unsupported texture target"); | ||
1373 | return FALSE; | ||
1374 | } | ||
1375 | |||
1376 | assert(inst->Src[1].Register.File == TGSI_FILE_SAMPLER); | ||
1377 | |||
1378 | spe_comment(gen->f, -4, "CALL tex:"); | ||
1379 | |||
1380 | /* get src/dst reg info */ | ||
1381 | for (ch = 0; ch < 4; ch++) { | ||
1382 | coord_regs[ch] = get_src_reg(gen, ch, &inst->Src[0]); | ||
1383 | d_regs[ch] = get_dst_reg(gen, ch, &inst->Dst[0]); | ||
1384 | } | ||
1385 | |||
1386 | { | ||
1387 | ubyte usedRegs[SPE_NUM_REGS]; | ||
1388 | uint i, numUsed; | ||
1389 | |||
1390 | numUsed = spe_get_registers_used(gen->f, usedRegs); | ||
1391 | assert(numUsed < gen->frame_size / 16 - 2); | ||
1392 | |||
1393 | /* save registers to stack */ | ||
1394 | for (i = 0; i < numUsed; i++) { | ||
1395 | uint reg = usedRegs[i]; | ||
1396 | int offset = 2 + i; | ||
1397 | spe_stqd(gen->f, reg, SPE_REG_SP, 16 * offset); | ||
1398 | } | ||
1399 | |||
1400 | /* setup function arguments (XXX depends on target) */ | ||
1401 | for (i = 0; i < 4; i++) { | ||
1402 | spe_move(gen->f, 3 + i, coord_regs[i]); | ||
1403 | } | ||
1404 | spe_load_uint(gen->f, 7, unit); /* sampler unit */ | ||
1405 | |||
1406 | /* branch to function, save return addr */ | ||
1407 | spe_brasl(gen->f, SPE_REG_RA, addr); | ||
1408 | |||
1409 | /* save function's return values (four pixel's colors) */ | ||
1410 | for (i = 0; i < 4; i++) { | ||
1411 | spe_move(gen->f, d_regs[i], 3 + i); | ||
1412 | } | ||
1413 | |||
1414 | /* restore registers from stack */ | ||
1415 | for (i = 0; i < numUsed; i++) { | ||
1416 | uint reg = usedRegs[i]; | ||
1417 | if (reg != d_regs[0] && | ||
1418 | reg != d_regs[1] && | ||
1419 | reg != d_regs[2] && | ||
1420 | reg != d_regs[3]) { | ||
1421 | int offset = 2 + i; | ||
1422 | spe_lqd(gen->f, reg, SPE_REG_SP, 16 * offset); | ||
1423 | } | ||
1424 | } | ||
1425 | } | ||
1426 | |||
1427 | FOR_EACH_ENABLED_CHANNEL(inst, ch) { | ||
1428 | store_dest_reg(gen, d_regs[ch], ch, &inst->Dst[0]); | ||
1429 | free_itemps(gen); | ||
1430 | } | ||
1431 | |||
1432 | return TRUE; | ||
1433 | } | ||
1434 | |||
1435 | |||
1436 | /** | ||
1437 | * KILL if any of src reg values are less than zero. | ||
1438 | */ | ||
1439 | static boolean | ||
1440 | emit_KIL(struct codegen *gen, const struct tgsi_full_instruction *inst) | ||
1441 | { | ||
1442 | int ch; | ||
1443 | int s_regs[4], kil_reg = -1, cmp_reg, zero_reg; | ||
1444 | |||
1445 | spe_comment(gen->f, -4, "CALL kil:"); | ||
1446 | |||
1447 | /* zero = {0,0,0,0} */ | ||
1448 | zero_reg = get_itemp(gen); | ||
1449 | spe_zero(gen->f, zero_reg); | ||
1450 | |||
1451 | cmp_reg = get_itemp(gen); | ||
1452 | |||
1453 | /* get src regs */ | ||
1454 | FOR_EACH_ENABLED_CHANNEL(inst, ch) { | ||
1455 | s_regs[ch] = get_src_reg(gen, ch, &inst->Src[0]); | ||
1456 | } | ||
1457 | |||
1458 | /* test if any src regs are < 0 */ | ||
1459 | FOR_EACH_ENABLED_CHANNEL(inst, ch) { | ||
1460 | if (kil_reg >= 0) { | ||
1461 | /* cmp = 0 > src ? : ~0 : 0 */ | ||
1462 | spe_fcgt(gen->f, cmp_reg, zero_reg, s_regs[ch]); | ||
1463 | /* kil = kil | cmp */ | ||
1464 | spe_or(gen->f, kil_reg, kil_reg, cmp_reg); | ||
1465 | } | ||
1466 | else { | ||
1467 | kil_reg = get_itemp(gen); | ||
1468 | /* kil = 0 > src ? : ~0 : 0 */ | ||
1469 | spe_fcgt(gen->f, kil_reg, zero_reg, s_regs[ch]); | ||
1470 | } | ||
1471 | } | ||
1472 | |||
1473 | if (gen->if_nesting || gen->loop_nesting) { | ||
1474 | /* may have been a conditional kil */ | ||
1475 | spe_and(gen->f, kil_reg, kil_reg, gen->exec_mask_reg); | ||
1476 | } | ||
1477 | |||
1478 | /* allocate the kill mask reg if needed */ | ||
1479 | if (gen->kill_mask_reg <= 0) { | ||
1480 | gen->kill_mask_reg = spe_allocate_available_register(gen->f); | ||
1481 | spe_move(gen->f, gen->kill_mask_reg, kil_reg); | ||
1482 | } | ||
1483 | else { | ||
1484 | spe_or(gen->f, gen->kill_mask_reg, gen->kill_mask_reg, kil_reg); | ||
1485 | } | ||
1486 | |||
1487 | free_itemps(gen); | ||
1488 | |||
1489 | return TRUE; | ||
1490 | } | ||
1491 | |||
1492 | |||
1493 | |||
1494 | /** | ||
1495 | * Emit min or max. | ||
1496 | */ | ||
1497 | static boolean | ||
1498 | emit_MIN_MAX(struct codegen *gen, const struct tgsi_full_instruction *inst) | ||
1499 | { | ||
1500 | int ch, s0_reg[4], s1_reg[4], d_reg[4], tmp_reg[4]; | ||
1501 | |||
1502 | FOR_EACH_ENABLED_CHANNEL(inst, ch) { | ||
1503 | s0_reg[ch] = get_src_reg(gen, ch, &inst->Src[0]); | ||
1504 | s1_reg[ch] = get_src_reg(gen, ch, &inst->Src[1]); | ||
1505 | d_reg[ch] = get_dst_reg(gen, ch, &inst->Dst[0]); | ||
1506 | tmp_reg[ch] = get_itemp(gen); | ||
1507 | } | ||
1508 | |||
1509 | /* d = (s0 > s1) ? s0 : s1 */ | ||
1510 | FOR_EACH_ENABLED_CHANNEL(inst, ch) { | ||
1511 | if (inst->Instruction.Opcode == TGSI_OPCODE_MAX) | ||
1512 | spe_fcgt(gen->f, tmp_reg[ch], s0_reg[ch], s1_reg[ch]); | ||
1513 | else | ||
1514 | spe_fcgt(gen->f, tmp_reg[ch], s1_reg[ch], s0_reg[ch]); | ||
1515 | } | ||
1516 | FOR_EACH_ENABLED_CHANNEL(inst, ch) { | ||
1517 | spe_selb(gen->f, d_reg[ch], s1_reg[ch], s0_reg[ch], tmp_reg[ch]); | ||
1518 | } | ||
1519 | |||
1520 | FOR_EACH_ENABLED_CHANNEL(inst, ch) { | ||
1521 | store_dest_reg(gen, d_reg[ch], ch, &inst->Dst[0]); | ||
1522 | } | ||
1523 | |||
1524 | free_itemps(gen); | ||
1525 | return TRUE; | ||
1526 | } | ||
1527 | |||
1528 | |||
1529 | /** | ||
1530 | * Emit code to update the execution mask. | ||
1531 | * This needs to be done whenever the execution status of a conditional | ||
1532 | * or loop is changed. | ||
1533 | */ | ||
1534 | static void | ||
1535 | emit_update_exec_mask(struct codegen *gen) | ||
1536 | { | ||
1537 | const int exec_reg = get_exec_mask_reg(gen); | ||
1538 | const int cond_reg = gen->cond_mask_reg; | ||
1539 | const int loop_reg = gen->loop_mask_reg; | ||
1540 | |||
1541 | spe_comment(gen->f, 0, "Update master execution mask"); | ||
1542 | |||
1543 | if (gen->if_nesting > 0 && gen->loop_nesting > 0) { | ||
1544 | /* exec_mask = cond_mask & loop_mask */ | ||
1545 | assert(cond_reg > 0); | ||
1546 | assert(loop_reg > 0); | ||
1547 | spe_and(gen->f, exec_reg, cond_reg, loop_reg); | ||
1548 | } | ||
1549 | else if (gen->if_nesting > 0) { | ||
1550 | assert(cond_reg > 0); | ||
1551 | spe_move(gen->f, exec_reg, cond_reg); | ||
1552 | } | ||
1553 | else if (gen->loop_nesting > 0) { | ||
1554 | assert(loop_reg > 0); | ||
1555 | spe_move(gen->f, exec_reg, loop_reg); | ||
1556 | } | ||
1557 | else { | ||
1558 | spe_load_int(gen->f, exec_reg, ~0x0); | ||
1559 | } | ||
1560 | } | ||
1561 | |||
1562 | |||
1563 | static boolean | ||
1564 | emit_IF(struct codegen *gen, const struct tgsi_full_instruction *inst) | ||
1565 | { | ||
1566 | const int channel = 0; | ||
1567 | int cond_reg; | ||
1568 | |||
1569 | cond_reg = get_cond_mask_reg(gen); | ||
1570 | |||
1571 | /* XXX push cond exec mask */ | ||
1572 | |||
1573 | spe_comment(gen->f, 0, "init conditional exec mask = ~0:"); | ||
1574 | spe_load_int(gen->f, cond_reg, ~0); | ||
1575 | |||
1576 | /* update conditional execution mask with the predicate register */ | ||
1577 | int tmp_reg = get_itemp(gen); | ||
1578 | int s1_reg = get_src_reg(gen, channel, &inst->Src[0]); | ||
1579 | |||
1580 | /* tmp = (s1_reg == 0) */ | ||
1581 | spe_ceqi(gen->f, tmp_reg, s1_reg, 0); | ||
1582 | /* tmp = !tmp */ | ||
1583 | spe_complement(gen->f, tmp_reg, tmp_reg); | ||
1584 | /* cond_mask = cond_mask & tmp */ | ||
1585 | spe_and(gen->f, cond_reg, cond_reg, tmp_reg); | ||
1586 | |||
1587 | gen->if_nesting++; | ||
1588 | |||
1589 | /* update the master execution mask */ | ||
1590 | emit_update_exec_mask(gen); | ||
1591 | |||
1592 | free_itemps(gen); | ||
1593 | |||
1594 | return TRUE; | ||
1595 | } | ||
1596 | |||
1597 | |||
1598 | static boolean | ||
1599 | emit_ELSE(struct codegen *gen, const struct tgsi_full_instruction *inst) | ||
1600 | { | ||
1601 | const int cond_reg = get_cond_mask_reg(gen); | ||
1602 | |||
1603 | spe_comment(gen->f, 0, "cond exec mask = !cond exec mask"); | ||
1604 | spe_complement(gen->f, cond_reg, cond_reg); | ||
1605 | emit_update_exec_mask(gen); | ||
1606 | |||
1607 | return TRUE; | ||
1608 | } | ||
1609 | |||
1610 | |||
1611 | static boolean | ||
1612 | emit_ENDIF(struct codegen *gen, const struct tgsi_full_instruction *inst) | ||
1613 | { | ||
1614 | /* XXX todo: pop cond exec mask */ | ||
1615 | |||
1616 | gen->if_nesting--; | ||
1617 | |||
1618 | emit_update_exec_mask(gen); | ||
1619 | |||
1620 | return TRUE; | ||
1621 | } | ||
1622 | |||
1623 | |||
1624 | static boolean | ||
1625 | emit_BGNLOOP(struct codegen *gen, const struct tgsi_full_instruction *inst) | ||
1626 | { | ||
1627 | int exec_reg, loop_reg; | ||
1628 | |||
1629 | exec_reg = get_exec_mask_reg(gen); | ||
1630 | loop_reg = get_loop_mask_reg(gen); | ||
1631 | |||
1632 | /* XXX push loop_exec mask */ | ||
1633 | |||
1634 | spe_comment(gen->f, 0*-4, "initialize loop exec mask = ~0"); | ||
1635 | spe_load_int(gen->f, loop_reg, ~0x0); | ||
1636 | |||
1637 | gen->loop_nesting++; | ||
1638 | gen->loop_start = spe_code_size(gen->f); /* in bytes */ | ||
1639 | |||
1640 | return TRUE; | ||
1641 | } | ||
1642 | |||
1643 | |||
1644 | static boolean | ||
1645 | emit_ENDLOOP(struct codegen *gen, const struct tgsi_full_instruction *inst) | ||
1646 | { | ||
1647 | const int loop_reg = get_loop_mask_reg(gen); | ||
1648 | const int tmp_reg = get_itemp(gen); | ||
1649 | int offset; | ||
1650 | |||
1651 | /* tmp_reg = exec[0] | exec[1] | exec[2] | exec[3] */ | ||
1652 | spe_orx(gen->f, tmp_reg, loop_reg); | ||
1653 | |||
1654 | offset = gen->loop_start - spe_code_size(gen->f); /* in bytes */ | ||
1655 | |||
1656 | /* branch back to top of loop if tmp_reg != 0 */ | ||
1657 | spe_brnz(gen->f, tmp_reg, offset / 4); | ||
1658 | |||
1659 | /* XXX pop loop_exec mask */ | ||
1660 | |||
1661 | gen->loop_nesting--; | ||
1662 | |||
1663 | emit_update_exec_mask(gen); | ||
1664 | |||
1665 | return TRUE; | ||
1666 | } | ||
1667 | |||
1668 | |||
1669 | static boolean | ||
1670 | emit_BRK(struct codegen *gen, const struct tgsi_full_instruction *inst) | ||
1671 | { | ||
1672 | const int exec_reg = get_exec_mask_reg(gen); | ||
1673 | const int loop_reg = get_loop_mask_reg(gen); | ||
1674 | |||
1675 | assert(gen->loop_nesting > 0); | ||
1676 | |||
1677 | spe_comment(gen->f, 0, "loop exec mask &= ~master exec mask"); | ||
1678 | spe_andc(gen->f, loop_reg, loop_reg, exec_reg); | ||
1679 | |||
1680 | emit_update_exec_mask(gen); | ||
1681 | |||
1682 | return TRUE; | ||
1683 | } | ||
1684 | |||
1685 | |||
1686 | static boolean | ||
1687 | emit_CONT(struct codegen *gen, const struct tgsi_full_instruction *inst) | ||
1688 | { | ||
1689 | assert(gen->loop_nesting > 0); | ||
1690 | |||
1691 | return TRUE; | ||
1692 | } | ||
1693 | |||
1694 | |||
1695 | static boolean | ||
1696 | emit_DDX_DDY(struct codegen *gen, const struct tgsi_full_instruction *inst, | ||
1697 | boolean ddx) | ||
1698 | { | ||
1699 | int ch; | ||
1700 | |||
1701 | FOR_EACH_ENABLED_CHANNEL(inst, ch) { | ||
1702 | int s_reg = get_src_reg(gen, ch, &inst->Src[0]); | ||
1703 | int d_reg = get_dst_reg(gen, ch, &inst->Dst[0]); | ||
1704 | |||
1705 | int t1_reg = get_itemp(gen); | ||
1706 | int t2_reg = get_itemp(gen); | ||
1707 | |||
1708 | spe_splat_word(gen->f, t1_reg, s_reg, 0); /* upper-left pixel */ | ||
1709 | if (ddx) { | ||
1710 | spe_splat_word(gen->f, t2_reg, s_reg, 1); /* upper-right pixel */ | ||
1711 | } | ||
1712 | else { | ||
1713 | spe_splat_word(gen->f, t2_reg, s_reg, 2); /* lower-left pixel */ | ||
1714 | } | ||
1715 | spe_fs(gen->f, d_reg, t2_reg, t1_reg); | ||
1716 | |||
1717 | free_itemps(gen); | ||
1718 | } | ||
1719 | |||
1720 | return TRUE; | ||
1721 | } | ||
1722 | |||
1723 | |||
1724 | |||
1725 | |||
1726 | /** | ||
1727 | * Emit END instruction. | ||
1728 | * We just return from the shader function at this point. | ||
1729 | * | ||
1730 | * Note that there may be more code after this that would be | ||
1731 | * called by TGSI_OPCODE_CALL. | ||
1732 | */ | ||
1733 | static boolean | ||
1734 | emit_END(struct codegen *gen) | ||
1735 | { | ||
1736 | emit_epilogue(gen); | ||
1737 | return TRUE; | ||
1738 | } | ||
1739 | |||
1740 | |||
1741 | /** | ||
1742 | * Emit code for the given instruction. Just a big switch stmt. | ||
1743 | */ | ||
1744 | static boolean | ||
1745 | emit_instruction(struct codegen *gen, | ||
1746 | const struct tgsi_full_instruction *inst) | ||
1747 | { | ||
1748 | switch (inst->Instruction.Opcode) { | ||
1749 | case TGSI_OPCODE_ARL: | ||
1750 | return emit_ARL(gen, inst); | ||
1751 | case TGSI_OPCODE_MOV: | ||
1752 | return emit_MOV(gen, inst); | ||
1753 | case TGSI_OPCODE_ADD: | ||
1754 | case TGSI_OPCODE_SUB: | ||
1755 | case TGSI_OPCODE_MUL: | ||
1756 | return emit_binop(gen, inst); | ||
1757 | case TGSI_OPCODE_MAD: | ||
1758 | return emit_MAD(gen, inst); | ||
1759 | case TGSI_OPCODE_LRP: | ||
1760 | return emit_LRP(gen, inst); | ||
1761 | case TGSI_OPCODE_DP3: | ||
1762 | return emit_DP3(gen, inst); | ||
1763 | case TGSI_OPCODE_DP4: | ||
1764 | return emit_DP4(gen, inst); | ||
1765 | case TGSI_OPCODE_DPH: | ||
1766 | return emit_DPH(gen, inst); | ||
1767 | case TGSI_OPCODE_NRM: | ||
1768 | return emit_NRM3(gen, inst); | ||
1769 | case TGSI_OPCODE_XPD: | ||
1770 | return emit_XPD(gen, inst); | ||
1771 | case TGSI_OPCODE_RCP: | ||
1772 | case TGSI_OPCODE_RSQ: | ||
1773 | return emit_RCP_RSQ(gen, inst); | ||
1774 | case TGSI_OPCODE_ABS: | ||
1775 | return emit_ABS(gen, inst); | ||
1776 | case TGSI_OPCODE_SGT: | ||
1777 | case TGSI_OPCODE_SLT: | ||
1778 | case TGSI_OPCODE_SGE: | ||
1779 | case TGSI_OPCODE_SLE: | ||
1780 | case TGSI_OPCODE_SEQ: | ||
1781 | case TGSI_OPCODE_SNE: | ||
1782 | return emit_inequality(gen, inst); | ||
1783 | case TGSI_OPCODE_CMP: | ||
1784 | return emit_CMP(gen, inst); | ||
1785 | case TGSI_OPCODE_MIN: | ||
1786 | case TGSI_OPCODE_MAX: | ||
1787 | return emit_MIN_MAX(gen, inst); | ||
1788 | case TGSI_OPCODE_TRUNC: | ||
1789 | return emit_TRUNC(gen, inst); | ||
1790 | case TGSI_OPCODE_FLR: | ||
1791 | return emit_FLR(gen, inst); | ||
1792 | case TGSI_OPCODE_FRC: | ||
1793 | return emit_FRC(gen, inst); | ||
1794 | case TGSI_OPCODE_END: | ||
1795 | return emit_END(gen); | ||
1796 | |||
1797 | case TGSI_OPCODE_COS: | ||
1798 | return emit_function_call(gen, inst, "spu_cos", 1, TRUE); | ||
1799 | case TGSI_OPCODE_SIN: | ||
1800 | return emit_function_call(gen, inst, "spu_sin", 1, TRUE); | ||
1801 | case TGSI_OPCODE_POW: | ||
1802 | return emit_function_call(gen, inst, "spu_pow", 2, TRUE); | ||
1803 | case TGSI_OPCODE_EX2: | ||
1804 | return emit_function_call(gen, inst, "spu_exp2", 1, TRUE); | ||
1805 | case TGSI_OPCODE_LG2: | ||
1806 | return emit_function_call(gen, inst, "spu_log2", 1, TRUE); | ||
1807 | case TGSI_OPCODE_TEX: | ||
1808 | /* fall-through for now */ | ||
1809 | case TGSI_OPCODE_TXD: | ||
1810 | /* fall-through for now */ | ||
1811 | case TGSI_OPCODE_TXB: | ||
1812 | /* fall-through for now */ | ||
1813 | case TGSI_OPCODE_TXL: | ||
1814 | /* fall-through for now */ | ||
1815 | case TGSI_OPCODE_TXP: | ||
1816 | return emit_TEX(gen, inst); | ||
1817 | case TGSI_OPCODE_KIL: | ||
1818 | return emit_KIL(gen, inst); | ||
1819 | |||
1820 | case TGSI_OPCODE_IF: | ||
1821 | return emit_IF(gen, inst); | ||
1822 | case TGSI_OPCODE_ELSE: | ||
1823 | return emit_ELSE(gen, inst); | ||
1824 | case TGSI_OPCODE_ENDIF: | ||
1825 | return emit_ENDIF(gen, inst); | ||
1826 | |||
1827 | case TGSI_OPCODE_BGNLOOP: | ||
1828 | return emit_BGNLOOP(gen, inst); | ||
1829 | case TGSI_OPCODE_ENDLOOP: | ||
1830 | return emit_ENDLOOP(gen, inst); | ||
1831 | case TGSI_OPCODE_BRK: | ||
1832 | return emit_BRK(gen, inst); | ||
1833 | case TGSI_OPCODE_CONT: | ||
1834 | return emit_CONT(gen, inst); | ||
1835 | |||
1836 | case TGSI_OPCODE_DDX: | ||
1837 | return emit_DDX_DDY(gen, inst, TRUE); | ||
1838 | case TGSI_OPCODE_DDY: | ||
1839 | return emit_DDX_DDY(gen, inst, FALSE); | ||
1840 | |||
1841 | /* XXX lots more cases to do... */ | ||
1842 | |||
1843 | default: | ||
1844 | fprintf(stderr, "Cell: unimplemented TGSI instruction %d!\n", | ||
1845 | inst->Instruction.Opcode); | ||
1846 | return FALSE; | ||
1847 | } | ||
1848 | |||
1849 | return TRUE; | ||
1850 | } | ||
1851 | |||
1852 | |||
1853 | |||
1854 | /** | ||
1855 | * Emit code for a TGSI immediate value (vector of four floats). | ||
1856 | * This involves register allocation and initialization. | ||
1857 | * XXX the initialization should be done by a "prepare" stage, not | ||
1858 | * per quad execution! | ||
1859 | */ | ||
1860 | static boolean | ||
1861 | emit_immediate(struct codegen *gen, const struct tgsi_full_immediate *immed) | ||
1862 | { | ||
1863 | int ch; | ||
1864 | |||
1865 | assert(gen->num_imm < MAX_TEMPS); | ||
1866 | |||
1867 | for (ch = 0; ch < 4; ch++) { | ||
1868 | float val = immed->u[ch].Float; | ||
1869 | |||
1870 | if (ch > 0 && val == immed->u[ch - 1].Float) { | ||
1871 | /* re-use previous register */ | ||
1872 | gen->imm_regs[gen->num_imm][ch] = gen->imm_regs[gen->num_imm][ch - 1]; | ||
1873 | } | ||
1874 | else { | ||
1875 | char str[100]; | ||
1876 | int reg = spe_allocate_available_register(gen->f); | ||
1877 | |||
1878 | if (reg < 0) | ||
1879 | return FALSE; | ||
1880 | |||
1881 | sprintf(str, "init $%d = %f", reg, val); | ||
1882 | spe_comment(gen->f, 0, str); | ||
1883 | |||
1884 | /* update immediate map */ | ||
1885 | gen->imm_regs[gen->num_imm][ch] = reg; | ||
1886 | |||
1887 | /* emit initializer instruction */ | ||
1888 | spe_load_float(gen->f, reg, val); | ||
1889 | } | ||
1890 | } | ||
1891 | |||
1892 | gen->num_imm++; | ||
1893 | |||
1894 | return TRUE; | ||
1895 | } | ||
1896 | |||
1897 | |||
1898 | |||
1899 | /** | ||
1900 | * Emit "code" for a TGSI declaration. | ||
1901 | * We only care about TGSI TEMPORARY register declarations at this time. | ||
1902 | * For each TGSI TEMPORARY we allocate four SPE registers. | ||
1903 | */ | ||
1904 | static boolean | ||
1905 | emit_declaration(struct cell_context *cell, | ||
1906 | struct codegen *gen, const struct tgsi_full_declaration *decl) | ||
1907 | { | ||
1908 | int i, ch; | ||
1909 | |||
1910 | switch (decl->Declaration.File) { | ||
1911 | case TGSI_FILE_TEMPORARY: | ||
1912 | for (i = decl->Range.First; | ||
1913 | i <= decl->Range.Last; | ||
1914 | i++) { | ||
1915 | assert(i < MAX_TEMPS); | ||
1916 | for (ch = 0; ch < 4; ch++) { | ||
1917 | gen->temp_regs[i][ch] = spe_allocate_available_register(gen->f); | ||
1918 | if (gen->temp_regs[i][ch] < 0) | ||
1919 | return FALSE; /* out of regs */ | ||
1920 | } | ||
1921 | |||
1922 | /* XXX if we run out of SPE registers, we need to spill | ||
1923 | * to SPU memory. someday... | ||
1924 | */ | ||
1925 | |||
1926 | { | ||
1927 | char buf[100]; | ||
1928 | sprintf(buf, "TGSI temp[%d] maps to SPU regs [$%d $%d $%d $%d]", i, | ||
1929 | gen->temp_regs[i][0], gen->temp_regs[i][1], | ||
1930 | gen->temp_regs[i][2], gen->temp_regs[i][3]); | ||
1931 | spe_comment(gen->f, 0, buf); | ||
1932 | } | ||
1933 | } | ||
1934 | break; | ||
1935 | default: | ||
1936 | ; /* ignore */ | ||
1937 | } | ||
1938 | |||
1939 | return TRUE; | ||
1940 | } | ||
1941 | |||
1942 | |||
1943 | |||
1944 | /** | ||
1945 | * Translate TGSI shader code to SPE instructions. This is done when | ||
1946 | * the state tracker gives us a new shader (via pipe->create_fs_state()). | ||
1947 | * | ||
1948 | * \param cell the rendering context (in) | ||
1949 | * \param tokens the TGSI shader (in) | ||
1950 | * \param f the generated function (out) | ||
1951 | */ | ||
1952 | boolean | ||
1953 | cell_gen_fragment_program(struct cell_context *cell, | ||
1954 | const struct tgsi_token *tokens, | ||
1955 | struct spe_function *f) | ||
1956 | { | ||
1957 | struct tgsi_parse_context parse; | ||
1958 | struct codegen gen; | ||
1959 | uint ic = 0; | ||
1960 | |||
1961 | memset(&gen, 0, sizeof(gen)); | ||
1962 | gen.cell = cell; | ||
1963 | gen.f = f; | ||
1964 | |||
1965 | /* For SPE function calls: reg $3 = first param, $4 = second param, etc. */ | ||
1966 | gen.inputs_reg = 3; /* pointer to inputs array */ | ||
1967 | gen.outputs_reg = 4; /* pointer to outputs array */ | ||
1968 | gen.constants_reg = 5; /* pointer to constants array */ | ||
1969 | |||
1970 | spe_init_func(f, SPU_MAX_FRAGMENT_PROGRAM_INSTS * SPE_INST_SIZE); | ||
1971 | spe_allocate_register(f, gen.inputs_reg); | ||
1972 | spe_allocate_register(f, gen.outputs_reg); | ||
1973 | spe_allocate_register(f, gen.constants_reg); | ||
1974 | |||
1975 | if (cell->debug_flags & CELL_DEBUG_ASM) { | ||
1976 | spe_print_code(f, TRUE); | ||
1977 | spe_indent(f, 2*8); | ||
1978 | printf("Begin %s\n", __FUNCTION__); | ||
1979 | tgsi_dump(tokens, 0); | ||
1980 | } | ||
1981 | |||
1982 | tgsi_parse_init(&parse, tokens); | ||
1983 | |||
1984 | emit_prologue(&gen); | ||
1985 | |||
1986 | while (!tgsi_parse_end_of_tokens(&parse) && !gen.error) { | ||
1987 | tgsi_parse_token(&parse); | ||
1988 | |||
1989 | switch (parse.FullToken.Token.Type) { | ||
1990 | case TGSI_TOKEN_TYPE_IMMEDIATE: | ||
1991 | if (f->print) { | ||
1992 | _debug_printf(" # "); | ||
1993 | tgsi_dump_immediate(&parse.FullToken.FullImmediate); | ||
1994 | } | ||
1995 | if (!emit_immediate(&gen, &parse.FullToken.FullImmediate)) | ||
1996 | gen.error = TRUE; | ||
1997 | break; | ||
1998 | |||
1999 | case TGSI_TOKEN_TYPE_DECLARATION: | ||
2000 | if (f->print) { | ||
2001 | _debug_printf(" # "); | ||
2002 | tgsi_dump_declaration(&parse.FullToken.FullDeclaration); | ||
2003 | } | ||
2004 | if (!emit_declaration(cell, &gen, &parse.FullToken.FullDeclaration)) | ||
2005 | gen.error = TRUE; | ||
2006 | break; | ||
2007 | |||
2008 | case TGSI_TOKEN_TYPE_INSTRUCTION: | ||
2009 | if (f->print) { | ||
2010 | _debug_printf(" # "); | ||
2011 | ic++; | ||
2012 | tgsi_dump_instruction(&parse.FullToken.FullInstruction, ic); | ||
2013 | } | ||
2014 | if (!emit_instruction(&gen, &parse.FullToken.FullInstruction)) | ||
2015 | gen.error = TRUE; | ||
2016 | break; | ||
2017 | |||
2018 | default: | ||
2019 | assert(0); | ||
2020 | } | ||
2021 | } | ||
2022 | |||
2023 | if (gen.error) { | ||
2024 | /* terminate the SPE code */ | ||
2025 | return emit_END(&gen); | ||
2026 | } | ||
2027 | |||
2028 | if (cell->debug_flags & CELL_DEBUG_ASM) { | ||
2029 | printf("cell_gen_fragment_program nr instructions: %d\n", f->num_inst); | ||
2030 | printf("End %s\n", __FUNCTION__); | ||
2031 | } | ||
2032 | |||
2033 | tgsi_parse_free( &parse ); | ||
2034 | |||
2035 | return !gen.error; | ||
2036 | } | ||
diff --git a/src/gallium/drivers/cell/ppu/cell_gen_fp.h b/src/gallium/drivers/cell/ppu/cell_gen_fp.h deleted file mode 100644 index 99faea70462..00000000000 --- a/src/gallium/drivers/cell/ppu/cell_gen_fp.h +++ /dev/null | |||
@@ -1,42 +0,0 @@ | |||
1 | /************************************************************************** | ||
2 | * | ||
3 | * Copyright 2008 Tungsten Graphics, Inc., Cedar Park, Texas. | ||
4 | * All Rights Reserved. | ||
5 | * | ||
6 | * Permission is hereby granted, free of charge, to any person obtaining a | ||
7 | * copy of this software and associated documentation files (the | ||
8 | * "Software"), to deal in the Software without restriction, including | ||
9 | * without limitation the rights to use, copy, modify, merge, publish, | ||
10 | * distribute, sub license, and/or sell copies of the Software, and to | ||
11 | * permit persons to whom the Software is furnished to do so, subject to | ||
12 | * the following conditions: | ||
13 | * | ||
14 | * The above copyright notice and this permission notice (including the | ||
15 | * next paragraph) shall be included in all copies or substantial portions | ||
16 | * of the Software. | ||
17 | * | ||
18 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS | ||
19 | * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF | ||
20 | * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. | ||
21 | * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR | ||
22 | * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, | ||
23 | * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE | ||
24 | * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. | ||
25 | * | ||
26 | **************************************************************************/ | ||
27 | |||
28 | |||
29 | |||
30 | #ifndef CELL_GEN_FP_H | ||
31 | #define CELL_GEN_FP_H | ||
32 | |||
33 | |||
34 | |||
35 | extern boolean | ||
36 | cell_gen_fragment_program(struct cell_context *cell, | ||
37 | const struct tgsi_token *tokens, | ||
38 | struct spe_function *f); | ||
39 | |||
40 | |||
41 | #endif /* CELL_GEN_FP_H */ | ||
42 | |||
diff --git a/src/gallium/drivers/cell/ppu/cell_gen_fragment.c b/src/gallium/drivers/cell/ppu/cell_gen_fragment.c deleted file mode 100644 index 76a85178007..00000000000 --- a/src/gallium/drivers/cell/ppu/cell_gen_fragment.c +++ /dev/null | |||
@@ -1,2189 +0,0 @@ | |||
1 | /************************************************************************** | ||
2 | * | ||
3 | * Copyright 2008 Tungsten Graphics, Inc., Cedar Park, Texas. | ||
4 | * All Rights Reserved. | ||
5 | * Copyright 2009 VMware, Inc. All Rights Reserved. | ||
6 | * | ||
7 | * Permission is hereby granted, free of charge, to any person obtaining a | ||
8 | * copy of this software and associated documentation files (the | ||
9 | * "Software"), to deal in the Software without restriction, including | ||
10 | * without limitation the rights to use, copy, modify, merge, publish, | ||
11 | * distribute, sub license, and/or sell copies of the Software, and to | ||
12 | * permit persons to whom the Software is furnished to do so, subject to | ||
13 | * the following conditions: | ||
14 | * | ||
15 | * The above copyright notice and this permission notice (including the | ||
16 | * next paragraph) shall be included in all copies or substantial portions | ||
17 | * of the Software. | ||
18 | * | ||
19 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS | ||
20 | * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF | ||
21 | * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. | ||
22 | * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR | ||
23 | * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, | ||
24 | * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE | ||
25 | * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. | ||
26 | * | ||
27 | **************************************************************************/ | ||
28 | |||
29 | /** | ||
30 | * Generate SPU per-fragment code (actually per-quad code). | ||
31 | * \author Brian Paul | ||
32 | * \author Bob Ellison | ||
33 | */ | ||
34 | |||
35 | |||
36 | #include "pipe/p_defines.h" | ||
37 | #include "pipe/p_state.h" | ||
38 | #include "rtasm/rtasm_ppc_spe.h" | ||
39 | #include "cell_context.h" | ||
40 | #include "cell_gen_fragment.h" | ||
41 | |||
42 | |||
43 | |||
44 | /** Do extra optimizations? */ | ||
45 | #define OPTIMIZATIONS 1 | ||
46 | |||
47 | |||
48 | /** | ||
49 | * Generate SPE code to perform Z/depth testing. | ||
50 | * | ||
51 | * \param dsa Gallium depth/stencil/alpha state to gen code for | ||
52 | * \param f SPE function to append instruction onto. | ||
53 | * \param mask_reg register containing quad/pixel "alive" mask (in/out) | ||
54 | * \param ifragZ_reg register containing integer fragment Z values (in) | ||
55 | * \param ifbZ_reg register containing integer frame buffer Z values (in/out) | ||
56 | * \param zmask_reg register containing result of Z test/comparison (out) | ||
57 | * | ||
58 | * Returns TRUE if the Z-buffer needs to be updated. | ||
59 | */ | ||
60 | static boolean | ||
61 | gen_depth_test(struct spe_function *f, | ||
62 | const struct pipe_depth_stencil_alpha_state *dsa, | ||
63 | int mask_reg, int ifragZ_reg, int ifbZ_reg, int zmask_reg) | ||
64 | { | ||
65 | /* NOTE: we use clgt below, not cgt, because we want to compare _unsigned_ | ||
66 | * quantities. This only makes a difference for 32-bit Z values though. | ||
67 | */ | ||
68 | ASSERT(dsa->depth.enabled); | ||
69 | |||
70 | switch (dsa->depth.func) { | ||
71 | case PIPE_FUNC_EQUAL: | ||
72 | /* zmask = (ifragZ == ref) */ | ||
73 | spe_ceq(f, zmask_reg, ifragZ_reg, ifbZ_reg); | ||
74 | /* mask = (mask & zmask) */ | ||
75 | spe_and(f, mask_reg, mask_reg, zmask_reg); | ||
76 | break; | ||
77 | |||
78 | case PIPE_FUNC_NOTEQUAL: | ||
79 | /* zmask = (ifragZ == ref) */ | ||
80 | spe_ceq(f, zmask_reg, ifragZ_reg, ifbZ_reg); | ||
81 | /* mask = (mask & ~zmask) */ | ||
82 | spe_andc(f, mask_reg, mask_reg, zmask_reg); | ||
83 | break; | ||
84 | |||
85 | case PIPE_FUNC_GREATER: | ||
86 | /* zmask = (ifragZ > ref) */ | ||
87 | spe_clgt(f, zmask_reg, ifragZ_reg, ifbZ_reg); | ||
88 | /* mask = (mask & zmask) */ | ||
89 | spe_and(f, mask_reg, mask_reg, zmask_reg); | ||
90 | break; | ||
91 | |||
92 | case PIPE_FUNC_LESS: | ||
93 | /* zmask = (ref > ifragZ) */ | ||
94 | spe_clgt(f, zmask_reg, ifbZ_reg, ifragZ_reg); | ||
95 | /* mask = (mask & zmask) */ | ||
96 | spe_and(f, mask_reg, mask_reg, zmask_reg); | ||
97 | break; | ||
98 | |||
99 | case PIPE_FUNC_LEQUAL: | ||
100 | /* zmask = (ifragZ > ref) */ | ||
101 | spe_clgt(f, zmask_reg, ifragZ_reg, ifbZ_reg); | ||
102 | /* mask = (mask & ~zmask) */ | ||
103 | spe_andc(f, mask_reg, mask_reg, zmask_reg); | ||
104 | break; | ||
105 | |||
106 | case PIPE_FUNC_GEQUAL: | ||
107 | /* zmask = (ref > ifragZ) */ | ||
108 | spe_clgt(f, zmask_reg, ifbZ_reg, ifragZ_reg); | ||
109 | /* mask = (mask & ~zmask) */ | ||
110 | spe_andc(f, mask_reg, mask_reg, zmask_reg); | ||
111 | break; | ||
112 | |||
113 | case PIPE_FUNC_NEVER: | ||
114 | spe_il(f, mask_reg, 0); /* mask = {0,0,0,0} */ | ||
115 | spe_move(f, zmask_reg, mask_reg); /* zmask = mask */ | ||
116 | break; | ||
117 | |||
118 | case PIPE_FUNC_ALWAYS: | ||
119 | /* mask unchanged */ | ||
120 | spe_il(f, zmask_reg, ~0); /* zmask = {~0,~0,~0,~0} */ | ||
121 | break; | ||
122 | |||
123 | default: | ||
124 | ASSERT(0); | ||
125 | break; | ||
126 | } | ||
127 | |||
128 | if (dsa->depth.writemask) { | ||
129 | /* | ||
130 | * If (ztest passed) { | ||
131 | * framebufferZ = fragmentZ; | ||
132 | * } | ||
133 | * OR, | ||
134 | * framebufferZ = (ztest_passed ? fragmentZ : framebufferZ; | ||
135 | */ | ||
136 | spe_selb(f, ifbZ_reg, ifbZ_reg, ifragZ_reg, mask_reg); | ||
137 | return TRUE; | ||
138 | } | ||
139 | |||
140 | return FALSE; | ||
141 | } | ||
142 | |||
143 | |||
144 | /** | ||
145 | * Generate SPE code to perform alpha testing. | ||
146 | * | ||
147 | * \param dsa Gallium depth/stencil/alpha state to gen code for | ||
148 | * \param f SPE function to append instruction onto. | ||
149 | * \param mask_reg register containing quad/pixel "alive" mask (in/out) | ||
150 | * \param fragA_reg register containing four fragment alpha values (in) | ||
151 | */ | ||
152 | static void | ||
153 | gen_alpha_test(const struct pipe_depth_stencil_alpha_state *dsa, | ||
154 | struct spe_function *f, int mask_reg, int fragA_reg) | ||
155 | { | ||
156 | int ref_reg = spe_allocate_available_register(f); | ||
157 | int amask_reg = spe_allocate_available_register(f); | ||
158 | |||
159 | ASSERT(dsa->alpha.enabled); | ||
160 | |||
161 | if ((dsa->alpha.func != PIPE_FUNC_NEVER) && | ||
162 | (dsa->alpha.func != PIPE_FUNC_ALWAYS)) { | ||
163 | /* load/splat the alpha reference float value */ | ||
164 | spe_load_float(f, ref_reg, dsa->alpha.ref_value); | ||
165 | } | ||
166 | |||
167 | /* emit code to do the alpha comparison, updating 'mask' */ | ||
168 | switch (dsa->alpha.func) { | ||
169 | case PIPE_FUNC_EQUAL: | ||
170 | /* amask = (fragA == ref) */ | ||
171 | spe_fceq(f, amask_reg, fragA_reg, ref_reg); | ||
172 | /* mask = (mask & amask) */ | ||
173 | spe_and(f, mask_reg, mask_reg, amask_reg); | ||
174 | break; | ||
175 | |||
176 | case PIPE_FUNC_NOTEQUAL: | ||
177 | /* amask = (fragA == ref) */ | ||
178 | spe_fceq(f, amask_reg, fragA_reg, ref_reg); | ||
179 | /* mask = (mask & ~amask) */ | ||
180 | spe_andc(f, mask_reg, mask_reg, amask_reg); | ||
181 | break; | ||
182 | |||
183 | case PIPE_FUNC_GREATER: | ||
184 | /* amask = (fragA > ref) */ | ||
185 | spe_fcgt(f, amask_reg, fragA_reg, ref_reg); | ||
186 | /* mask = (mask & amask) */ | ||
187 | spe_and(f, mask_reg, mask_reg, amask_reg); | ||
188 | break; | ||
189 | |||
190 | case PIPE_FUNC_LESS: | ||
191 | /* amask = (ref > fragA) */ | ||
192 | spe_fcgt(f, amask_reg, ref_reg, fragA_reg); | ||
193 | /* mask = (mask & amask) */ | ||
194 | spe_and(f, mask_reg, mask_reg, amask_reg); | ||
195 | break; | ||
196 | |||
197 | case PIPE_FUNC_LEQUAL: | ||
198 | /* amask = (fragA > ref) */ | ||
199 | spe_fcgt(f, amask_reg, fragA_reg, ref_reg); | ||
200 | /* mask = (mask & ~amask) */ | ||
201 | spe_andc(f, mask_reg, mask_reg, amask_reg); | ||
202 | break; | ||
203 | |||
204 | case PIPE_FUNC_GEQUAL: | ||
205 | /* amask = (ref > fragA) */ | ||
206 | spe_fcgt(f, amask_reg, ref_reg, fragA_reg); | ||
207 | /* mask = (mask & ~amask) */ | ||
208 | spe_andc(f, mask_reg, mask_reg, amask_reg); | ||
209 | break; | ||
210 | |||
211 | case PIPE_FUNC_NEVER: | ||
212 | spe_il(f, mask_reg, 0); /* mask = [0,0,0,0] */ | ||
213 | break; | ||
214 | |||
215 | case PIPE_FUNC_ALWAYS: | ||
216 | /* no-op, mask unchanged */ | ||
217 | break; | ||
218 | |||
219 | default: | ||
220 | ASSERT(0); | ||
221 | break; | ||
222 | } | ||
223 | |||
224 | #if OPTIMIZATIONS | ||
225 | /* if mask == {0,0,0,0} we're all done, return */ | ||
226 | { | ||
227 | /* re-use amask reg here */ | ||
228 | int tmp_reg = amask_reg; | ||
229 | /* tmp[0] = (mask[0] | mask[1] | mask[2] | mask[3]) */ | ||
230 | spe_orx(f, tmp_reg, mask_reg); | ||
231 | /* if tmp[0] == 0 then return from function call */ | ||
232 | spe_biz(f, tmp_reg, SPE_REG_RA, 0, 0); | ||
233 | } | ||
234 | #endif | ||
235 | |||
236 | spe_release_register(f, ref_reg); | ||
237 | spe_release_register(f, amask_reg); | ||
238 | } | ||
239 | |||
240 | |||
241 | /** | ||
242 | * This pair of functions is used inline to allocate and deallocate | ||
243 | * optional constant registers. Once a constant is discovered to be | ||
244 | * needed, we will likely need it again, so we don't want to deallocate | ||
245 | * it and have to allocate and load it again unnecessarily. | ||
246 | */ | ||
247 | static INLINE void | ||
248 | setup_optional_register(struct spe_function *f, | ||
249 | int *r) | ||
250 | { | ||
251 | if (*r < 0) | ||
252 | *r = spe_allocate_available_register(f); | ||
253 | } | ||
254 | |||
255 | static INLINE void | ||
256 | release_optional_register(struct spe_function *f, | ||
257 | int r) | ||
258 | { | ||
259 | if (r >= 0) | ||
260 | spe_release_register(f, r); | ||
261 | } | ||
262 | |||
263 | static INLINE void | ||
264 | setup_const_register(struct spe_function *f, | ||
265 | int *r, | ||
266 | float value) | ||
267 | { | ||
268 | if (*r >= 0) | ||
269 | return; | ||
270 | setup_optional_register(f, r); | ||
271 | spe_load_float(f, *r, value); | ||
272 | } | ||
273 | |||
274 | static INLINE void | ||
275 | release_const_register(struct spe_function *f, | ||
276 | int r) | ||
277 | { | ||
278 | release_optional_register(f, r); | ||
279 | } | ||
280 | |||
281 | |||
282 | |||
283 | /** | ||
284 | * Unpack/convert framebuffer colors from four 32-bit packed colors | ||
285 | * (fbRGBA) to four float RGBA vectors (fbR, fbG, fbB, fbA). | ||
286 | * Each 8-bit color component is expanded into a float in [0.0, 1.0]. | ||
287 | */ | ||
288 | static void | ||
289 | unpack_colors(struct spe_function *f, | ||
290 | enum pipe_format color_format, | ||
291 | int fbRGBA_reg, | ||
292 | int fbR_reg, int fbG_reg, int fbB_reg, int fbA_reg) | ||
293 | { | ||
294 | int mask0_reg = spe_allocate_available_register(f); | ||
295 | int mask1_reg = spe_allocate_available_register(f); | ||
296 | int mask2_reg = spe_allocate_available_register(f); | ||
297 | int mask3_reg = spe_allocate_available_register(f); | ||
298 | |||
299 | spe_load_int(f, mask0_reg, 0xff); | ||
300 | spe_load_int(f, mask1_reg, 0xff00); | ||
301 | spe_load_int(f, mask2_reg, 0xff0000); | ||
302 | spe_load_int(f, mask3_reg, 0xff000000); | ||
303 | |||
304 | spe_comment(f, 0, "Unpack framebuffer colors, convert to floats"); | ||
305 | |||
306 | switch (color_format) { | ||
307 | case PIPE_FORMAT_B8G8R8A8_UNORM: | ||
308 | /* fbB = fbRGBA & mask */ | ||
309 | spe_and(f, fbB_reg, fbRGBA_reg, mask0_reg); | ||
310 | |||
311 | /* fbG = fbRGBA & mask */ | ||
312 | spe_and(f, fbG_reg, fbRGBA_reg, mask1_reg); | ||
313 | |||
314 | /* fbR = fbRGBA & mask */ | ||
315 | spe_and(f, fbR_reg, fbRGBA_reg, mask2_reg); | ||
316 | |||
317 | /* fbA = fbRGBA & mask */ | ||
318 | spe_and(f, fbA_reg, fbRGBA_reg, mask3_reg); | ||
319 | |||
320 | /* fbG = fbG >> 8 */ | ||
321 | spe_roti(f, fbG_reg, fbG_reg, -8); | ||
322 | |||
323 | /* fbR = fbR >> 16 */ | ||
324 | spe_roti(f, fbR_reg, fbR_reg, -16); | ||
325 | |||
326 | /* fbA = fbA >> 24 */ | ||
327 | spe_roti(f, fbA_reg, fbA_reg, -24); | ||
328 | break; | ||
329 | |||
330 | case PIPE_FORMAT_A8R8G8B8_UNORM: | ||
331 | /* fbA = fbRGBA & mask */ | ||
332 | spe_and(f, fbA_reg, fbRGBA_reg, mask0_reg); | ||
333 | |||
334 | /* fbR = fbRGBA & mask */ | ||
335 | spe_and(f, fbR_reg, fbRGBA_reg, mask1_reg); | ||
336 | |||
337 | /* fbG = fbRGBA & mask */ | ||
338 | spe_and(f, fbG_reg, fbRGBA_reg, mask2_reg); | ||
339 | |||
340 | /* fbB = fbRGBA & mask */ | ||
341 | spe_and(f, fbB_reg, fbRGBA_reg, mask3_reg); | ||
342 | |||
343 | /* fbR = fbR >> 8 */ | ||
344 | spe_roti(f, fbR_reg, fbR_reg, -8); | ||
345 | |||
346 | /* fbG = fbG >> 16 */ | ||
347 | spe_roti(f, fbG_reg, fbG_reg, -16); | ||
348 | |||
349 | /* fbB = fbB >> 24 */ | ||
350 | spe_roti(f, fbB_reg, fbB_reg, -24); | ||
351 | break; | ||
352 | |||
353 | default: | ||
354 | ASSERT(0); | ||
355 | } | ||
356 | |||
357 | /* convert int[4] in [0,255] to float[4] in [0.0, 1.0] */ | ||
358 | spe_cuflt(f, fbR_reg, fbR_reg, 8); | ||
359 | spe_cuflt(f, fbG_reg, fbG_reg, 8); | ||
360 | spe_cuflt(f, fbB_reg, fbB_reg, 8); | ||
361 | spe_cuflt(f, fbA_reg, fbA_reg, 8); | ||
362 | |||
363 | spe_release_register(f, mask0_reg); | ||
364 | spe_release_register(f, mask1_reg); | ||
365 | spe_release_register(f, mask2_reg); | ||
366 | spe_release_register(f, mask3_reg); | ||
367 | } | ||
368 | |||
369 | |||
370 | /** | ||
371 | * Generate SPE code to implement the given blend mode for a quad of pixels. | ||
372 | * \param f SPE function to append instruction onto. | ||
373 | * \param fragR_reg register with fragment red values (float) (in/out) | ||
374 | * \param fragG_reg register with fragment green values (float) (in/out) | ||
375 | * \param fragB_reg register with fragment blue values (float) (in/out) | ||
376 | * \param fragA_reg register with fragment alpha values (float) (in/out) | ||
377 | * \param fbRGBA_reg register with packed framebuffer colors (integer) (in) | ||
378 | */ | ||
379 | static void | ||
380 | gen_blend(const struct pipe_blend_state *blend, | ||
381 | const struct pipe_blend_color *blend_color, | ||
382 | struct spe_function *f, | ||
383 | enum pipe_format color_format, | ||
384 | int fragR_reg, int fragG_reg, int fragB_reg, int fragA_reg, | ||
385 | int fbRGBA_reg) | ||
386 | { | ||
387 | int term1R_reg = spe_allocate_available_register(f); | ||
388 | int term1G_reg = spe_allocate_available_register(f); | ||
389 | int term1B_reg = spe_allocate_available_register(f); | ||
390 | int term1A_reg = spe_allocate_available_register(f); | ||
391 | |||
392 | int term2R_reg = spe_allocate_available_register(f); | ||
393 | int term2G_reg = spe_allocate_available_register(f); | ||
394 | int term2B_reg = spe_allocate_available_register(f); | ||
395 | int term2A_reg = spe_allocate_available_register(f); | ||
396 | |||
397 | int fbR_reg = spe_allocate_available_register(f); | ||
398 | int fbG_reg = spe_allocate_available_register(f); | ||
399 | int fbB_reg = spe_allocate_available_register(f); | ||
400 | int fbA_reg = spe_allocate_available_register(f); | ||
401 | |||
402 | int tmp_reg = spe_allocate_available_register(f); | ||
403 | |||
404 | /* Optional constant registers we might or might not end up using; | ||
405 | * if we do use them, make sure we only allocate them once by | ||
406 | * keeping a flag on each one. | ||
407 | */ | ||
408 | int one_reg = -1; | ||
409 | int constR_reg = -1, constG_reg = -1, constB_reg = -1, constA_reg = -1; | ||
410 | |||
411 | ASSERT(blend->rt[0].blend_enable); | ||
412 | |||
413 | /* packed RGBA -> float colors */ | ||
414 | unpack_colors(f, color_format, fbRGBA_reg, | ||
415 | fbR_reg, fbG_reg, fbB_reg, fbA_reg); | ||
416 | |||
417 | /* | ||
418 | * Compute Src RGB terms. We're actually looking for the value | ||
419 | * of (the appropriate RGB factors) * (the incoming source RGB color), | ||
420 | * because in some cases (like PIPE_BLENDFACTOR_ONE and | ||
421 | * PIPE_BLENDFACTOR_ZERO) we can avoid doing unnecessary math. | ||
422 | */ | ||
423 | switch (blend->rt[0].rgb_src_factor) { | ||
424 | case PIPE_BLENDFACTOR_ONE: | ||
425 | /* factors = (1,1,1), so term = (R,G,B) */ | ||
426 | spe_move(f, term1R_reg, fragR_reg); | ||
427 | spe_move(f, term1G_reg, fragG_reg); | ||
428 | spe_move(f, term1B_reg, fragB_reg); | ||
429 | break; | ||
430 | case PIPE_BLENDFACTOR_ZERO: | ||
431 | /* factors = (0,0,0), so term = (0,0,0) */ | ||
432 | spe_load_float(f, term1R_reg, 0.0f); | ||
433 | spe_load_float(f, term1G_reg, 0.0f); | ||
434 | spe_load_float(f, term1B_reg, 0.0f); | ||
435 | break; | ||
436 | case PIPE_BLENDFACTOR_SRC_COLOR: | ||
437 | /* factors = (R,G,B), so term = (R*R, G*G, B*B) */ | ||
438 | spe_fm(f, term1R_reg, fragR_reg, fragR_reg); | ||
439 | spe_fm(f, term1G_reg, fragG_reg, fragG_reg); | ||
440 | spe_fm(f, term1B_reg, fragB_reg, fragB_reg); | ||
441 | break; | ||
442 | case PIPE_BLENDFACTOR_SRC_ALPHA: | ||
443 | /* factors = (A,A,A), so term = (R*A, G*A, B*A) */ | ||
444 | spe_fm(f, term1R_reg, fragR_reg, fragA_reg); | ||
445 | spe_fm(f, term1G_reg, fragG_reg, fragA_reg); | ||
446 | spe_fm(f, term1B_reg, fragB_reg, fragA_reg); | ||
447 | break; | ||
448 | case PIPE_BLENDFACTOR_INV_SRC_COLOR: | ||
449 | /* factors = (1-R,1-G,1-B), so term = (R*(1-R), G*(1-G), B*(1-B)) | ||
450 | * or in other words term = (R-R*R, G-G*G, B-B*B) | ||
451 | * fnms(a,b,c,d) computes a = d - b*c | ||
452 | */ | ||
453 | spe_fnms(f, term1R_reg, fragR_reg, fragR_reg, fragR_reg); | ||
454 | spe_fnms(f, term1G_reg, fragG_reg, fragG_reg, fragG_reg); | ||
455 | spe_fnms(f, term1B_reg, fragB_reg, fragB_reg, fragB_reg); | ||
456 | break; | ||
457 | case PIPE_BLENDFACTOR_DST_COLOR: | ||
458 | /* factors = (Rfb,Gfb,Bfb), so term = (R*Rfb, G*Gfb, B*Bfb) */ | ||
459 | spe_fm(f, term1R_reg, fragR_reg, fbR_reg); | ||
460 | spe_fm(f, term1G_reg, fragG_reg, fbG_reg); | ||
461 | spe_fm(f, term1B_reg, fragB_reg, fbB_reg); | ||
462 | break; | ||
463 | case PIPE_BLENDFACTOR_INV_DST_COLOR: | ||
464 | /* factors = (1-Rfb,1-Gfb,1-Bfb), so term = (R*(1-Rfb),G*(1-Gfb),B*(1-Bfb)) | ||
465 | * or term = (R-R*Rfb, G-G*Gfb, B-B*Bfb) | ||
466 | * fnms(a,b,c,d) computes a = d - b*c | ||
467 | */ | ||
468 | spe_fnms(f, term1R_reg, fragR_reg, fbR_reg, fragR_reg); | ||
469 | spe_fnms(f, term1G_reg, fragG_reg, fbG_reg, fragG_reg); | ||
470 | spe_fnms(f, term1B_reg, fragB_reg, fbB_reg, fragB_reg); | ||
471 | break; | ||
472 | case PIPE_BLENDFACTOR_INV_SRC_ALPHA: | ||
473 | /* factors = (1-A,1-A,1-A), so term = (R*(1-A),G*(1-A),B*(1-A)) | ||
474 | * or term = (R-R*A,G-G*A,B-B*A) | ||
475 | * fnms(a,b,c,d) computes a = d - b*c | ||
476 | */ | ||
477 | spe_fnms(f, term1R_reg, fragR_reg, fragA_reg, fragR_reg); | ||
478 | spe_fnms(f, term1G_reg, fragG_reg, fragA_reg, fragG_reg); | ||
479 | spe_fnms(f, term1B_reg, fragB_reg, fragA_reg, fragB_reg); | ||
480 | break; | ||
481 | case PIPE_BLENDFACTOR_DST_ALPHA: | ||
482 | /* factors = (Afb, Afb, Afb), so term = (R*Afb, G*Afb, B*Afb) */ | ||
483 | spe_fm(f, term1R_reg, fragR_reg, fbA_reg); | ||
484 | spe_fm(f, term1G_reg, fragG_reg, fbA_reg); | ||
485 | spe_fm(f, term1B_reg, fragB_reg, fbA_reg); | ||
486 | break; | ||
487 | case PIPE_BLENDFACTOR_INV_DST_ALPHA: | ||
488 | /* factors = (1-Afb, 1-Afb, 1-Afb), so term = (R*(1-Afb),G*(1-Afb),B*(1-Afb)) | ||
489 | * or term = (R-R*Afb,G-G*Afb,b-B*Afb) | ||
490 | * fnms(a,b,c,d) computes a = d - b*c | ||
491 | */ | ||
492 | spe_fnms(f, term1R_reg, fragR_reg, fbA_reg, fragR_reg); | ||
493 | spe_fnms(f, term1G_reg, fragG_reg, fbA_reg, fragG_reg); | ||
494 | spe_fnms(f, term1B_reg, fragB_reg, fbA_reg, fragB_reg); | ||
495 | break; | ||
496 | case PIPE_BLENDFACTOR_CONST_COLOR: | ||
497 | /* We need the optional constant color registers */ | ||
498 | setup_const_register(f, &constR_reg, blend_color->color[0]); | ||
499 | setup_const_register(f, &constG_reg, blend_color->color[1]); | ||
500 | setup_const_register(f, &constB_reg, blend_color->color[2]); | ||
501 | /* now, factor = (Rc,Gc,Bc), so term = (R*Rc,G*Gc,B*Bc) */ | ||
502 | spe_fm(f, term1R_reg, fragR_reg, constR_reg); | ||
503 | spe_fm(f, term1G_reg, fragG_reg, constG_reg); | ||
504 | spe_fm(f, term1B_reg, fragB_reg, constB_reg); | ||
505 | break; | ||
506 | case PIPE_BLENDFACTOR_CONST_ALPHA: | ||
507 | /* we'll need the optional constant alpha register */ | ||
508 | setup_const_register(f, &constA_reg, blend_color->color[3]); | ||
509 | /* factor = (Ac,Ac,Ac), so term = (R*Ac,G*Ac,B*Ac) */ | ||
510 | spe_fm(f, term1R_reg, fragR_reg, constA_reg); | ||
511 | spe_fm(f, term1G_reg, fragG_reg, constA_reg); | ||
512 | spe_fm(f, term1B_reg, fragB_reg, constA_reg); | ||
513 | break; | ||
514 | case PIPE_BLENDFACTOR_INV_CONST_COLOR: | ||
515 | /* We need the optional constant color registers */ | ||
516 | setup_const_register(f, &constR_reg, blend_color->color[0]); | ||
517 | setup_const_register(f, &constG_reg, blend_color->color[1]); | ||
518 | setup_const_register(f, &constB_reg, blend_color->color[2]); | ||
519 | /* factor = (1-Rc,1-Gc,1-Bc), so term = (R*(1-Rc),G*(1-Gc),B*(1-Bc)) | ||
520 | * or term = (R-R*Rc, G-G*Gc, B-B*Bc) | ||
521 | * fnms(a,b,c,d) computes a = d - b*c | ||
522 | */ | ||
523 | spe_fnms(f, term1R_reg, fragR_reg, constR_reg, fragR_reg); | ||
524 | spe_fnms(f, term1G_reg, fragG_reg, constG_reg, fragG_reg); | ||
525 | spe_fnms(f, term1B_reg, fragB_reg, constB_reg, fragB_reg); | ||
526 | break; | ||
527 | case PIPE_BLENDFACTOR_INV_CONST_ALPHA: | ||
528 | /* We need the optional constant color registers */ | ||
529 | setup_const_register(f, &constR_reg, blend_color->color[0]); | ||
530 | setup_const_register(f, &constG_reg, blend_color->color[1]); | ||
531 | setup_const_register(f, &constB_reg, blend_color->color[2]); | ||
532 | /* factor = (1-Ac,1-Ac,1-Ac), so term = (R*(1-Ac),G*(1-Ac),B*(1-Ac)) | ||
533 | * or term = (R-R*Ac,G-G*Ac,B-B*Ac) | ||
534 | * fnms(a,b,c,d) computes a = d - b*c | ||
535 | */ | ||
536 | spe_fnms(f, term1R_reg, fragR_reg, constA_reg, fragR_reg); | ||
537 | spe_fnms(f, term1G_reg, fragG_reg, constA_reg, fragG_reg); | ||
538 | spe_fnms(f, term1B_reg, fragB_reg, constA_reg, fragB_reg); | ||
539 | break; | ||
540 | case PIPE_BLENDFACTOR_SRC_ALPHA_SATURATE: | ||
541 | /* We'll need the optional {1,1,1,1} register */ | ||
542 | setup_const_register(f, &one_reg, 1.0f); | ||
543 | /* factor = (min(A,1-Afb),min(A,1-Afb),min(A,1-Afb)), so | ||
544 | * term = (R*min(A,1-Afb), G*min(A,1-Afb), B*min(A,1-Afb)) | ||
545 | * We could expand the term (as a*min(b,c) == min(a*b,a*c) | ||
546 | * as long as a is positive), but then we'd have to do three | ||
547 | * spe_float_min() functions instead of one, so this is simpler. | ||
548 | */ | ||
549 | /* tmp = 1 - Afb */ | ||
550 | spe_fs(f, tmp_reg, one_reg, fbA_reg); | ||
551 | /* tmp = min(A,tmp) */ | ||
552 | spe_float_min(f, tmp_reg, fragA_reg, tmp_reg); | ||
553 | /* term = R*tmp */ | ||
554 | spe_fm(f, term1R_reg, fragR_reg, tmp_reg); | ||
555 | spe_fm(f, term1G_reg, fragG_reg, tmp_reg); | ||
556 | spe_fm(f, term1B_reg, fragB_reg, tmp_reg); | ||
557 | break; | ||
558 | |||
559 | /* These are special D3D cases involving a second color output | ||
560 | * from the fragment shader. I'm not sure we can support them | ||
561 | * yet... XXX | ||
562 | */ | ||
563 | case PIPE_BLENDFACTOR_SRC1_COLOR: | ||
564 | case PIPE_BLENDFACTOR_SRC1_ALPHA: | ||
565 | case PIPE_BLENDFACTOR_INV_SRC1_COLOR: | ||
566 | case PIPE_BLENDFACTOR_INV_SRC1_ALPHA: | ||
567 | |||
568 | default: | ||
569 | ASSERT(0); | ||
570 | } | ||
571 | |||
572 | /* | ||
573 | * Compute Src Alpha term. Like the above, we're looking for | ||
574 | * the full term A*factor, not just the factor itself, because | ||
575 | * in many cases we can avoid doing unnecessary multiplies. | ||
576 | */ | ||
577 | switch (blend->rt[0].alpha_src_factor) { | ||
578 | case PIPE_BLENDFACTOR_ZERO: | ||
579 | /* factor = 0, so term = 0 */ | ||
580 | spe_load_float(f, term1A_reg, 0.0f); | ||
581 | break; | ||
582 | |||
583 | case PIPE_BLENDFACTOR_SRC_ALPHA_SATURATE: /* fall through */ | ||
584 | case PIPE_BLENDFACTOR_ONE: | ||
585 | /* factor = 1, so term = A */ | ||
586 | spe_move(f, term1A_reg, fragA_reg); | ||
587 | break; | ||
588 | |||
589 | case PIPE_BLENDFACTOR_SRC_COLOR: | ||
590 | /* factor = A, so term = A*A */ | ||
591 | spe_fm(f, term1A_reg, fragA_reg, fragA_reg); | ||
592 | break; | ||
593 | case PIPE_BLENDFACTOR_SRC_ALPHA: | ||
594 | spe_fm(f, term1A_reg, fragA_reg, fragA_reg); | ||
595 | break; | ||
596 | |||
597 | case PIPE_BLENDFACTOR_INV_SRC_ALPHA: /* fall through */ | ||
598 | case PIPE_BLENDFACTOR_INV_SRC_COLOR: | ||
599 | /* factor = 1-A, so term = A*(1-A) = A-A*A */ | ||
600 | /* fnms(a,b,c,d) computes a = d - b*c */ | ||
601 | spe_fnms(f, term1A_reg, fragA_reg, fragA_reg, fragA_reg); | ||
602 | break; | ||
603 | |||
604 | case PIPE_BLENDFACTOR_DST_ALPHA: /* fall through */ | ||
605 | case PIPE_BLENDFACTOR_DST_COLOR: | ||
606 | /* factor = Afb, so term = A*Afb */ | ||
607 | spe_fm(f, term1A_reg, fragA_reg, fbA_reg); | ||
608 | break; | ||
609 | |||
610 | case PIPE_BLENDFACTOR_INV_DST_ALPHA: /* fall through */ | ||
611 | case PIPE_BLENDFACTOR_INV_DST_COLOR: | ||
612 | /* factor = 1-Afb, so term = A*(1-Afb) = A - A*Afb */ | ||
613 | /* fnms(a,b,c,d) computes a = d - b*c */ | ||
614 | spe_fnms(f, term1A_reg, fragA_reg, fbA_reg, fragA_reg); | ||
615 | break; | ||
616 | |||
617 | case PIPE_BLENDFACTOR_CONST_ALPHA: /* fall through */ | ||
618 | case PIPE_BLENDFACTOR_CONST_COLOR: | ||
619 | /* We need the optional constA_reg register */ | ||
620 | setup_const_register(f, &constA_reg, blend_color->color[3]); | ||
621 | /* factor = Ac, so term = A*Ac */ | ||
622 | spe_fm(f, term1A_reg, fragA_reg, constA_reg); | ||
623 | break; | ||
624 | |||
625 | case PIPE_BLENDFACTOR_INV_CONST_ALPHA: /* fall through */ | ||
626 | case PIPE_BLENDFACTOR_INV_CONST_COLOR: | ||
627 | /* We need the optional constA_reg register */ | ||
628 | setup_const_register(f, &constA_reg, blend_color->color[3]); | ||
629 | /* factor = 1-Ac, so term = A*(1-Ac) = A-A*Ac */ | ||
630 | /* fnms(a,b,c,d) computes a = d - b*c */ | ||
631 | spe_fnms(f, term1A_reg, fragA_reg, constA_reg, fragA_reg); | ||
632 | break; | ||
633 | |||
634 | /* These are special D3D cases involving a second color output | ||
635 | * from the fragment shader. I'm not sure we can support them | ||
636 | * yet... XXX | ||
637 | */ | ||
638 | case PIPE_BLENDFACTOR_SRC1_COLOR: | ||
639 | case PIPE_BLENDFACTOR_SRC1_ALPHA: | ||
640 | case PIPE_BLENDFACTOR_INV_SRC1_COLOR: | ||
641 | case PIPE_BLENDFACTOR_INV_SRC1_ALPHA: | ||
642 | default: | ||
643 | ASSERT(0); | ||
644 | } | ||
645 | |||
646 | /* | ||
647 | * Compute Dest RGB term. Like the above, we're looking for | ||
648 | * the full term (Rfb,Gfb,Bfb)*(factor), not just the factor itself, because | ||
649 | * in many cases we can avoid doing unnecessary multiplies. | ||
650 | */ | ||
651 | switch (blend->rt[0].rgb_dst_factor) { | ||
652 | case PIPE_BLENDFACTOR_ONE: | ||
653 | /* factors = (1,1,1), so term = (Rfb,Gfb,Bfb) */ | ||
654 | spe_move(f, term2R_reg, fbR_reg); | ||
655 | spe_move(f, term2G_reg, fbG_reg); | ||
656 | spe_move(f, term2B_reg, fbB_reg); | ||
657 | break; | ||
658 | case PIPE_BLENDFACTOR_ZERO: | ||
659 | /* factor s= (0,0,0), so term = (0,0,0) */ | ||
660 | spe_load_float(f, term2R_reg, 0.0f); | ||
661 | spe_load_float(f, term2G_reg, 0.0f); | ||
662 | spe_load_float(f, term2B_reg, 0.0f); | ||
663 | break; | ||
664 | case PIPE_BLENDFACTOR_SRC_COLOR: | ||
665 | /* factors = (R,G,B), so term = (R*Rfb, G*Gfb, B*Bfb) */ | ||
666 | spe_fm(f, term2R_reg, fbR_reg, fragR_reg); | ||
667 | spe_fm(f, term2G_reg, fbG_reg, fragG_reg); | ||
668 | spe_fm(f, term2B_reg, fbB_reg, fragB_reg); | ||
669 | break; | ||
670 | case PIPE_BLENDFACTOR_INV_SRC_COLOR: | ||
671 | /* factors = (1-R,1-G,1-B), so term = (Rfb*(1-R), Gfb*(1-G), Bfb*(1-B)) | ||
672 | * or in other words term = (Rfb-Rfb*R, Gfb-Gfb*G, Bfb-Bfb*B) | ||
673 | * fnms(a,b,c,d) computes a = d - b*c | ||
674 | */ | ||
675 | spe_fnms(f, term2R_reg, fragR_reg, fbR_reg, fbR_reg); | ||
676 | spe_fnms(f, term2G_reg, fragG_reg, fbG_reg, fbG_reg); | ||
677 | spe_fnms(f, term2B_reg, fragB_reg, fbB_reg, fbB_reg); | ||
678 | break; | ||
679 | case PIPE_BLENDFACTOR_SRC_ALPHA: | ||
680 | /* factors = (A,A,A), so term = (Rfb*A, Gfb*A, Bfb*A) */ | ||
681 | spe_fm(f, term2R_reg, fbR_reg, fragA_reg); | ||
682 | spe_fm(f, term2G_reg, fbG_reg, fragA_reg); | ||
683 | spe_fm(f, term2B_reg, fbB_reg, fragA_reg); | ||
684 | break; | ||
685 | case PIPE_BLENDFACTOR_INV_SRC_ALPHA: | ||
686 | /* factors = (1-A,1-A,1-A) so term = (Rfb-Rfb*A,Gfb-Gfb*A,Bfb-Bfb*A) */ | ||
687 | /* fnms(a,b,c,d) computes a = d - b*c */ | ||
688 | spe_fnms(f, term2R_reg, fbR_reg, fragA_reg, fbR_reg); | ||
689 | spe_fnms(f, term2G_reg, fbG_reg, fragA_reg, fbG_reg); | ||
690 | spe_fnms(f, term2B_reg, fbB_reg, fragA_reg, fbB_reg); | ||
691 | break; | ||
692 | case PIPE_BLENDFACTOR_DST_COLOR: | ||
693 | /* factors = (Rfb,Gfb,Bfb), so term = (Rfb*Rfb, Gfb*Gfb, Bfb*Bfb) */ | ||
694 | spe_fm(f, term2R_reg, fbR_reg, fbR_reg); | ||
695 | spe_fm(f, term2G_reg, fbG_reg, fbG_reg); | ||
696 | spe_fm(f, term2B_reg, fbB_reg, fbB_reg); | ||
697 | break; | ||
698 | case PIPE_BLENDFACTOR_INV_DST_COLOR: | ||
699 | /* factors = (1-Rfb,1-Gfb,1-Bfb), so term = (Rfb*(1-Rfb),Gfb*(1-Gfb),Bfb*(1-Bfb)) | ||
700 | * or term = (Rfb-Rfb*Rfb, Gfb-Gfb*Gfb, Bfb-Bfb*Bfb) | ||
701 | * fnms(a,b,c,d) computes a = d - b*c | ||
702 | */ | ||
703 | spe_fnms(f, term2R_reg, fbR_reg, fbR_reg, fbR_reg); | ||
704 | spe_fnms(f, term2G_reg, fbG_reg, fbG_reg, fbG_reg); | ||
705 | spe_fnms(f, term2B_reg, fbB_reg, fbB_reg, fbB_reg); | ||
706 | break; | ||
707 | |||
708 | case PIPE_BLENDFACTOR_DST_ALPHA: | ||
709 | /* factors = (Afb, Afb, Afb), so term = (Rfb*Afb, Gfb*Afb, Bfb*Afb) */ | ||
710 | spe_fm(f, term2R_reg, fbR_reg, fbA_reg); | ||
711 | spe_fm(f, term2G_reg, fbG_reg, fbA_reg); | ||
712 | spe_fm(f, term2B_reg, fbB_reg, fbA_reg); | ||
713 | break; | ||
714 | case PIPE_BLENDFACTOR_INV_DST_ALPHA: | ||
715 | /* factors = (1-Afb, 1-Afb, 1-Afb), so term = (Rfb*(1-Afb),Gfb*(1-Afb),Bfb*(1-Afb)) | ||
716 | * or term = (Rfb-Rfb*Afb,Gfb-Gfb*Afb,Bfb-Bfb*Afb) | ||
717 | * fnms(a,b,c,d) computes a = d - b*c | ||
718 | */ | ||
719 | spe_fnms(f, term2R_reg, fbR_reg, fbA_reg, fbR_reg); | ||
720 | spe_fnms(f, term2G_reg, fbG_reg, fbA_reg, fbG_reg); | ||
721 | spe_fnms(f, term2B_reg, fbB_reg, fbA_reg, fbB_reg); | ||
722 | break; | ||
723 | case PIPE_BLENDFACTOR_CONST_COLOR: | ||
724 | /* We need the optional constant color registers */ | ||
725 | setup_const_register(f, &constR_reg, blend_color->color[0]); | ||
726 | setup_const_register(f, &constG_reg, blend_color->color[1]); | ||
727 | setup_const_register(f, &constB_reg, blend_color->color[2]); | ||
728 | /* now, factor = (Rc,Gc,Bc), so term = (Rfb*Rc,Gfb*Gc,Bfb*Bc) */ | ||
729 | spe_fm(f, term2R_reg, fbR_reg, constR_reg); | ||
730 | spe_fm(f, term2G_reg, fbG_reg, constG_reg); | ||
731 | spe_fm(f, term2B_reg, fbB_reg, constB_reg); | ||
732 | break; | ||
733 | case PIPE_BLENDFACTOR_CONST_ALPHA: | ||
734 | /* we'll need the optional constant alpha register */ | ||
735 | setup_const_register(f, &constA_reg, blend_color->color[3]); | ||
736 | /* factor = (Ac,Ac,Ac), so term = (Rfb*Ac,Gfb*Ac,Bfb*Ac) */ | ||
737 | spe_fm(f, term2R_reg, fbR_reg, constA_reg); | ||
738 | spe_fm(f, term2G_reg, fbG_reg, constA_reg); | ||
739 | spe_fm(f, term2B_reg, fbB_reg, constA_reg); | ||
740 | break; | ||
741 | case PIPE_BLENDFACTOR_INV_CONST_COLOR: | ||
742 | /* We need the optional constant color registers */ | ||
743 | setup_const_register(f, &constR_reg, blend_color->color[0]); | ||
744 | setup_const_register(f, &constG_reg, blend_color->color[1]); | ||
745 | setup_const_register(f, &constB_reg, blend_color->color[2]); | ||
746 | /* factor = (1-Rc,1-Gc,1-Bc), so term = (Rfb*(1-Rc),Gfb*(1-Gc),Bfb*(1-Bc)) | ||
747 | * or term = (Rfb-Rfb*Rc, Gfb-Gfb*Gc, Bfb-Bfb*Bc) | ||
748 | * fnms(a,b,c,d) computes a = d - b*c | ||
749 | */ | ||
750 | spe_fnms(f, term2R_reg, fbR_reg, constR_reg, fbR_reg); | ||
751 | spe_fnms(f, term2G_reg, fbG_reg, constG_reg, fbG_reg); | ||
752 | spe_fnms(f, term2B_reg, fbB_reg, constB_reg, fbB_reg); | ||
753 | break; | ||
754 | case PIPE_BLENDFACTOR_INV_CONST_ALPHA: | ||
755 | /* We need the optional constant color registers */ | ||
756 | setup_const_register(f, &constR_reg, blend_color->color[0]); | ||
757 | setup_const_register(f, &constG_reg, blend_color->color[1]); | ||
758 | setup_const_register(f, &constB_reg, blend_color->color[2]); | ||
759 | /* factor = (1-Ac,1-Ac,1-Ac), so term = (Rfb*(1-Ac),Gfb*(1-Ac),Bfb*(1-Ac)) | ||
760 | * or term = (Rfb-Rfb*Ac,Gfb-Gfb*Ac,Bfb-Bfb*Ac) | ||
761 | * fnms(a,b,c,d) computes a = d - b*c | ||
762 | */ | ||
763 | spe_fnms(f, term2R_reg, fbR_reg, constA_reg, fbR_reg); | ||
764 | spe_fnms(f, term2G_reg, fbG_reg, constA_reg, fbG_reg); | ||
765 | spe_fnms(f, term2B_reg, fbB_reg, constA_reg, fbB_reg); | ||
766 | break; | ||
767 | case PIPE_BLENDFACTOR_SRC_ALPHA_SATURATE: /* not supported for dest RGB */ | ||
768 | ASSERT(0); | ||
769 | break; | ||
770 | |||
771 | /* These are special D3D cases involving a second color output | ||
772 | * from the fragment shader. I'm not sure we can support them | ||
773 | * yet... XXX | ||
774 | */ | ||
775 | case PIPE_BLENDFACTOR_SRC1_COLOR: | ||
776 | case PIPE_BLENDFACTOR_SRC1_ALPHA: | ||
777 | case PIPE_BLENDFACTOR_INV_SRC1_COLOR: | ||
778 | case PIPE_BLENDFACTOR_INV_SRC1_ALPHA: | ||
779 | |||
780 | default: | ||
781 | ASSERT(0); | ||
782 | } | ||
783 | |||
784 | /* | ||
785 | * Compute Dest Alpha term. Like the above, we're looking for | ||
786 | * the full term Afb*factor, not just the factor itself, because | ||
787 | * in many cases we can avoid doing unnecessary multiplies. | ||
788 | */ | ||
789 | switch (blend->rt[0].alpha_dst_factor) { | ||
790 | case PIPE_BLENDFACTOR_ONE: | ||
791 | /* factor = 1, so term = Afb */ | ||
792 | spe_move(f, term2A_reg, fbA_reg); | ||
793 | break; | ||
794 | case PIPE_BLENDFACTOR_ZERO: | ||
795 | /* factor = 0, so term = 0 */ | ||
796 | spe_load_float(f, term2A_reg, 0.0f); | ||
797 | break; | ||
798 | |||
799 | case PIPE_BLENDFACTOR_SRC_ALPHA: /* fall through */ | ||
800 | case PIPE_BLENDFACTOR_SRC_COLOR: | ||
801 | /* factor = A, so term = Afb*A */ | ||
802 | spe_fm(f, term2A_reg, fbA_reg, fragA_reg); | ||
803 | break; | ||
804 | |||
805 | case PIPE_BLENDFACTOR_INV_SRC_ALPHA: /* fall through */ | ||
806 | case PIPE_BLENDFACTOR_INV_SRC_COLOR: | ||
807 | /* factor = 1-A, so term = Afb*(1-A) = Afb-Afb*A */ | ||
808 | /* fnms(a,b,c,d) computes a = d - b*c */ | ||
809 | spe_fnms(f, term2A_reg, fbA_reg, fragA_reg, fbA_reg); | ||
810 | break; | ||
811 | |||
812 | case PIPE_BLENDFACTOR_DST_ALPHA: /* fall through */ | ||
813 | case PIPE_BLENDFACTOR_DST_COLOR: | ||
814 | /* factor = Afb, so term = Afb*Afb */ | ||
815 | spe_fm(f, term2A_reg, fbA_reg, fbA_reg); | ||
816 | break; | ||
817 | |||
818 | case PIPE_BLENDFACTOR_INV_DST_ALPHA: /* fall through */ | ||
819 | case PIPE_BLENDFACTOR_INV_DST_COLOR: | ||
820 | /* factor = 1-Afb, so term = Afb*(1-Afb) = Afb - Afb*Afb */ | ||
821 | /* fnms(a,b,c,d) computes a = d - b*c */ | ||
822 | spe_fnms(f, term2A_reg, fbA_reg, fbA_reg, fbA_reg); | ||
823 | break; | ||
824 | |||
825 | case PIPE_BLENDFACTOR_CONST_ALPHA: /* fall through */ | ||
826 | case PIPE_BLENDFACTOR_CONST_COLOR: | ||
827 | /* We need the optional constA_reg register */ | ||
828 | setup_const_register(f, &constA_reg, blend_color->color[3]); | ||
829 | /* factor = Ac, so term = Afb*Ac */ | ||
830 | spe_fm(f, term2A_reg, fbA_reg, constA_reg); | ||
831 | break; | ||
832 | |||
833 | case PIPE_BLENDFACTOR_INV_CONST_ALPHA: /* fall through */ | ||
834 | case PIPE_BLENDFACTOR_INV_CONST_COLOR: | ||
835 | /* We need the optional constA_reg register */ | ||
836 | setup_const_register(f, &constA_reg, blend_color->color[3]); | ||
837 | /* factor = 1-Ac, so term = Afb*(1-Ac) = Afb-Afb*Ac */ | ||
838 | /* fnms(a,b,c,d) computes a = d - b*c */ | ||
839 | spe_fnms(f, term2A_reg, fbA_reg, constA_reg, fbA_reg); | ||
840 | break; | ||
841 | |||
842 | case PIPE_BLENDFACTOR_SRC_ALPHA_SATURATE: /* not supported for dest alpha */ | ||
843 | ASSERT(0); | ||
844 | break; | ||
845 | |||
846 | /* These are special D3D cases involving a second color output | ||
847 | * from the fragment shader. I'm not sure we can support them | ||
848 | * yet... XXX | ||
849 | */ | ||
850 | case PIPE_BLENDFACTOR_SRC1_COLOR: | ||
851 | case PIPE_BLENDFACTOR_SRC1_ALPHA: | ||
852 | case PIPE_BLENDFACTOR_INV_SRC1_COLOR: | ||
853 | case PIPE_BLENDFACTOR_INV_SRC1_ALPHA: | ||
854 | default: | ||
855 | ASSERT(0); | ||
856 | } | ||
857 | |||
858 | /* | ||
859 | * Combine Src/Dest RGB terms as per the blend equation. | ||
860 | */ | ||
861 | switch (blend->rt[0].rgb_func) { | ||
862 | case PIPE_BLEND_ADD: | ||
863 | spe_fa(f, fragR_reg, term1R_reg, term2R_reg); | ||
864 | spe_fa(f, fragG_reg, term1G_reg, term2G_reg); | ||
865 | spe_fa(f, fragB_reg, term1B_reg, term2B_reg); | ||
866 | break; | ||
867 | case PIPE_BLEND_SUBTRACT: | ||
868 | spe_fs(f, fragR_reg, term1R_reg, term2R_reg); | ||
869 | spe_fs(f, fragG_reg, term1G_reg, term2G_reg); | ||
870 | spe_fs(f, fragB_reg, term1B_reg, term2B_reg); | ||
871 | break; | ||
872 | case PIPE_BLEND_REVERSE_SUBTRACT: | ||
873 | spe_fs(f, fragR_reg, term2R_reg, term1R_reg); | ||
874 | spe_fs(f, fragG_reg, term2G_reg, term1G_reg); | ||
875 | spe_fs(f, fragB_reg, term2B_reg, term1B_reg); | ||
876 | break; | ||
877 | case PIPE_BLEND_MIN: | ||
878 | spe_float_min(f, fragR_reg, term1R_reg, term2R_reg); | ||
879 | spe_float_min(f, fragG_reg, term1G_reg, term2G_reg); | ||
880 | spe_float_min(f, fragB_reg, term1B_reg, term2B_reg); | ||
881 | break; | ||
882 | case PIPE_BLEND_MAX: | ||
883 | spe_float_max(f, fragR_reg, term1R_reg, term2R_reg); | ||
884 | spe_float_max(f, fragG_reg, term1G_reg, term2G_reg); | ||
885 | spe_float_max(f, fragB_reg, term1B_reg, term2B_reg); | ||
886 | break; | ||
887 | default: | ||
888 | ASSERT(0); | ||
889 | } | ||
890 | |||
891 | /* | ||
892 | * Combine Src/Dest A term | ||
893 | */ | ||
894 | switch (blend->rt[0].alpha_func) { | ||
895 | case PIPE_BLEND_ADD: | ||
896 | spe_fa(f, fragA_reg, term1A_reg, term2A_reg); | ||
897 | break; | ||
898 | case PIPE_BLEND_SUBTRACT: | ||
899 | spe_fs(f, fragA_reg, term1A_reg, term2A_reg); | ||
900 | break; | ||
901 | case PIPE_BLEND_REVERSE_SUBTRACT: | ||
902 | spe_fs(f, fragA_reg, term2A_reg, term1A_reg); | ||
903 | break; | ||
904 | case PIPE_BLEND_MIN: | ||
905 | spe_float_min(f, fragA_reg, term1A_reg, term2A_reg); | ||
906 | break; | ||
907 | case PIPE_BLEND_MAX: | ||
908 | spe_float_max(f, fragA_reg, term1A_reg, term2A_reg); | ||
909 | break; | ||
910 | default: | ||
911 | ASSERT(0); | ||
912 | } | ||
913 | |||
914 | spe_release_register(f, term1R_reg); | ||
915 | spe_release_register(f, term1G_reg); | ||
916 | spe_release_register(f, term1B_reg); | ||
917 | spe_release_register(f, term1A_reg); | ||
918 | |||
919 | spe_release_register(f, term2R_reg); | ||
920 | spe_release_register(f, term2G_reg); | ||
921 | spe_release_register(f, term2B_reg); | ||
922 | spe_release_register(f, term2A_reg); | ||
923 | |||
924 | spe_release_register(f, fbR_reg); | ||
925 | spe_release_register(f, fbG_reg); | ||
926 | spe_release_register(f, fbB_reg); | ||
927 | spe_release_register(f, fbA_reg); | ||
928 | |||
929 | spe_release_register(f, tmp_reg); | ||
930 | |||
931 | /* Free any optional registers that actually got used */ | ||
932 | release_const_register(f, one_reg); | ||
933 | release_const_register(f, constR_reg); | ||
934 | release_const_register(f, constG_reg); | ||
935 | release_const_register(f, constB_reg); | ||
936 | release_const_register(f, constA_reg); | ||
937 | } | ||
938 | |||
939 | |||
940 | static void | ||
941 | gen_logicop(const struct pipe_blend_state *blend, | ||
942 | struct spe_function *f, | ||
943 | int fragRGBA_reg, int fbRGBA_reg) | ||
944 | { | ||
945 | /* We've got four 32-bit RGBA packed pixels in each of | ||
946 | * fragRGBA_reg and fbRGBA_reg, not sets of floating-point | ||
947 | * reds, greens, blues, and alphas. | ||
948 | * */ | ||
949 | ASSERT(blend->logicop_enable); | ||
950 | |||
951 | switch(blend->logicop_func) { | ||
952 | case PIPE_LOGICOP_CLEAR: /* 0 */ | ||
953 | spe_zero(f, fragRGBA_reg); | ||
954 | break; | ||
955 | case PIPE_LOGICOP_NOR: /* ~(s | d) */ | ||
956 | spe_nor(f, fragRGBA_reg, fragRGBA_reg, fbRGBA_reg); | ||
957 | break; | ||
958 | case PIPE_LOGICOP_AND_INVERTED: /* ~s & d */ | ||
959 | /* andc R, A, B computes R = A & ~B */ | ||
960 | spe_andc(f, fragRGBA_reg, fbRGBA_reg, fragRGBA_reg); | ||
961 | break; | ||
962 | case PIPE_LOGICOP_COPY_INVERTED: /* ~s */ | ||
963 | spe_complement(f, fragRGBA_reg, fragRGBA_reg); | ||
964 | break; | ||
965 | case PIPE_LOGICOP_AND_REVERSE: /* s & ~d */ | ||
966 | /* andc R, A, B computes R = A & ~B */ | ||
967 | spe_andc(f, fragRGBA_reg, fragRGBA_reg, fbRGBA_reg); | ||
968 | break; | ||
969 | case PIPE_LOGICOP_INVERT: /* ~d */ | ||
970 | /* Note that (A nor A) == ~(A|A) == ~A */ | ||
971 | spe_nor(f, fragRGBA_reg, fbRGBA_reg, fbRGBA_reg); | ||
972 | break; | ||
973 | case PIPE_LOGICOP_XOR: /* s ^ d */ | ||
974 | spe_xor(f, fragRGBA_reg, fragRGBA_reg, fbRGBA_reg); | ||
975 | break; | ||
976 | case PIPE_LOGICOP_NAND: /* ~(s & d) */ | ||
977 | spe_nand(f, fragRGBA_reg, fragRGBA_reg, fbRGBA_reg); | ||
978 | break; | ||
979 | case PIPE_LOGICOP_AND: /* s & d */ | ||
980 | spe_and(f, fragRGBA_reg, fragRGBA_reg, fbRGBA_reg); | ||
981 | break; | ||
982 | case PIPE_LOGICOP_EQUIV: /* ~(s ^ d) */ | ||
983 | spe_xor(f, fragRGBA_reg, fragRGBA_reg, fbRGBA_reg); | ||
984 | spe_complement(f, fragRGBA_reg, fragRGBA_reg); | ||
985 | break; | ||
986 | case PIPE_LOGICOP_NOOP: /* d */ | ||
987 | spe_move(f, fragRGBA_reg, fbRGBA_reg); | ||
988 | break; | ||
989 | case PIPE_LOGICOP_OR_INVERTED: /* ~s | d */ | ||
990 | /* orc R, A, B computes R = A | ~B */ | ||
991 | spe_orc(f, fragRGBA_reg, fbRGBA_reg, fragRGBA_reg); | ||
992 | break; | ||
993 | case PIPE_LOGICOP_COPY: /* s */ | ||
994 | break; | ||
995 | case PIPE_LOGICOP_OR_REVERSE: /* s | ~d */ | ||
996 | /* orc R, A, B computes R = A | ~B */ | ||
997 | spe_orc(f, fragRGBA_reg, fragRGBA_reg, fbRGBA_reg); | ||
998 | break; | ||
999 | case PIPE_LOGICOP_OR: /* s | d */ | ||
1000 | spe_or(f, fragRGBA_reg, fragRGBA_reg, fbRGBA_reg); | ||
1001 | break; | ||
1002 | case PIPE_LOGICOP_SET: /* 1 */ | ||
1003 | spe_load_int(f, fragRGBA_reg, 0xffffffff); | ||
1004 | break; | ||
1005 | default: | ||
1006 | ASSERT(0); | ||
1007 | } | ||
1008 | } | ||
1009 | |||
1010 | |||
1011 | /** | ||
1012 | * Generate code to pack a quad of float colors into four 32-bit integers. | ||
1013 | * | ||
1014 | * \param f SPE function to append instruction onto. | ||
1015 | * \param color_format the dest color packing format | ||
1016 | * \param r_reg register containing four red values (in/clobbered) | ||
1017 | * \param g_reg register containing four green values (in/clobbered) | ||
1018 | * \param b_reg register containing four blue values (in/clobbered) | ||
1019 | * \param a_reg register containing four alpha values (in/clobbered) | ||
1020 | * \param rgba_reg register to store the packed RGBA colors (out) | ||
1021 | */ | ||
1022 | static void | ||
1023 | gen_pack_colors(struct spe_function *f, | ||
1024 | enum pipe_format color_format, | ||
1025 | int r_reg, int g_reg, int b_reg, int a_reg, | ||
1026 | int rgba_reg) | ||
1027 | { | ||
1028 | int rg_reg = spe_allocate_available_register(f); | ||
1029 | int ba_reg = spe_allocate_available_register(f); | ||
1030 | |||
1031 | /* Convert float[4] in [0.0,1.0] to int[4] in [0,~0], with clamping */ | ||
1032 | spe_cfltu(f, r_reg, r_reg, 32); | ||
1033 | spe_cfltu(f, g_reg, g_reg, 32); | ||
1034 | spe_cfltu(f, b_reg, b_reg, 32); | ||
1035 | spe_cfltu(f, a_reg, a_reg, 32); | ||
1036 | |||
1037 | /* Shift the most significant bytes to the least significant positions. | ||
1038 | * I.e.: reg = reg >> 24 | ||
1039 | */ | ||
1040 | spe_rotmi(f, r_reg, r_reg, -24); | ||
1041 | spe_rotmi(f, g_reg, g_reg, -24); | ||
1042 | spe_rotmi(f, b_reg, b_reg, -24); | ||
1043 | spe_rotmi(f, a_reg, a_reg, -24); | ||
1044 | |||
1045 | /* Shift the color bytes according to the surface format */ | ||
1046 | if (color_format == PIPE_FORMAT_B8G8R8A8_UNORM) { | ||
1047 | spe_roti(f, g_reg, g_reg, 8); /* green <<= 8 */ | ||
1048 | spe_roti(f, r_reg, r_reg, 16); /* red <<= 16 */ | ||
1049 | spe_roti(f, a_reg, a_reg, 24); /* alpha <<= 24 */ | ||
1050 | } | ||
1051 | else if (color_format == PIPE_FORMAT_A8R8G8B8_UNORM) { | ||
1052 | spe_roti(f, r_reg, r_reg, 8); /* red <<= 8 */ | ||
1053 | spe_roti(f, g_reg, g_reg, 16); /* green <<= 16 */ | ||
1054 | spe_roti(f, b_reg, b_reg, 24); /* blue <<= 24 */ | ||
1055 | } | ||
1056 | else { | ||
1057 | ASSERT(0); | ||
1058 | } | ||
1059 | |||
1060 | /* Merge red, green, blue, alpha registers to make packed RGBA colors. | ||
1061 | * Eg: after shifting according to color_format we might have: | ||
1062 | * R = {0x00ff0000, 0x00110000, 0x00220000, 0x00330000} | ||
1063 | * G = {0x0000ff00, 0x00004400, 0x00005500, 0x00006600} | ||
1064 | * B = {0x000000ff, 0x00000077, 0x00000088, 0x00000099} | ||
1065 | * A = {0xff000000, 0xaa000000, 0xbb000000, 0xcc000000} | ||
1066 | * OR-ing all those together gives us four packed colors: | ||
1067 | * RGBA = {0xffffffff, 0xaa114477, 0xbb225588, 0xcc336699} | ||
1068 | */ | ||
1069 | spe_or(f, rg_reg, r_reg, g_reg); | ||
1070 | spe_or(f, ba_reg, a_reg, b_reg); | ||
1071 | spe_or(f, rgba_reg, rg_reg, ba_reg); | ||
1072 | |||
1073 | spe_release_register(f, rg_reg); | ||
1074 | spe_release_register(f, ba_reg); | ||
1075 | } | ||
1076 | |||
1077 | |||
1078 | static void | ||
1079 | gen_colormask(struct spe_function *f, | ||
1080 | uint colormask, | ||
1081 | enum pipe_format color_format, | ||
1082 | int fragRGBA_reg, int fbRGBA_reg) | ||
1083 | { | ||
1084 | /* We've got four 32-bit RGBA packed pixels in each of | ||
1085 | * fragRGBA_reg and fbRGBA_reg, not sets of floating-point | ||
1086 | * reds, greens, blues, and alphas. Further, the pixels | ||
1087 | * are packed according to the given color format, not | ||
1088 | * necessarily RGBA... | ||
1089 | */ | ||
1090 | uint r_mask; | ||
1091 | uint g_mask; | ||
1092 | uint b_mask; | ||
1093 | uint a_mask; | ||
1094 | |||
1095 | /* Calculate exactly where the bits for any particular color | ||
1096 | * end up, so we can mask them correctly. | ||
1097 | */ | ||
1098 | switch(color_format) { | ||
1099 | case PIPE_FORMAT_B8G8R8A8_UNORM: | ||
1100 | /* ARGB */ | ||
1101 | a_mask = 0xff000000; | ||
1102 | r_mask = 0x00ff0000; | ||
1103 | g_mask = 0x0000ff00; | ||
1104 | b_mask = 0x000000ff; | ||
1105 | break; | ||
1106 | case PIPE_FORMAT_A8R8G8B8_UNORM: | ||
1107 | /* BGRA */ | ||
1108 | b_mask = 0xff000000; | ||
1109 | g_mask = 0x00ff0000; | ||
1110 | r_mask = 0x0000ff00; | ||
1111 | a_mask = 0x000000ff; | ||
1112 | break; | ||
1113 | default: | ||
1114 | ASSERT(0); | ||
1115 | } | ||
1116 | |||
1117 | /* For each R, G, B, and A component we're supposed to mask out, | ||
1118 | * clear its bits. Then our mask operation later will work | ||
1119 | * as expected. | ||
1120 | */ | ||
1121 | if (!(colormask & PIPE_MASK_R)) { | ||
1122 | r_mask = 0; | ||
1123 | } | ||
1124 | if (!(colormask & PIPE_MASK_G)) { | ||
1125 | g_mask = 0; | ||
1126 | } | ||
1127 | if (!(colormask & PIPE_MASK_B)) { | ||
1128 | b_mask = 0; | ||
1129 | } | ||
1130 | if (!(colormask & PIPE_MASK_A)) { | ||
1131 | a_mask = 0; | ||
1132 | } | ||
1133 | |||
1134 | /* Get a temporary register to hold the mask that will be applied | ||
1135 | * to the fragment | ||
1136 | */ | ||
1137 | int colormask_reg = spe_allocate_available_register(f); | ||
1138 | |||
1139 | /* The actual mask we're going to use is an OR of the remaining R, G, B, | ||
1140 | * and A masks. Load the result value into our temporary register. | ||
1141 | */ | ||
1142 | spe_load_uint(f, colormask_reg, r_mask | g_mask | b_mask | a_mask); | ||
1143 | |||
1144 | /* Use the mask register to select between the fragment color | ||
1145 | * values and the frame buffer color values. Wherever the | ||
1146 | * mask has a 0 bit, the current frame buffer color should override | ||
1147 | * the fragment color. Wherever the mask has a 1 bit, the | ||
1148 | * fragment color should persevere. The Select Bits (selb rt, rA, rB, rM) | ||
1149 | * instruction will select bits from its first operand rA wherever the | ||
1150 | * the mask bits rM are 0, and from its second operand rB wherever the | ||
1151 | * mask bits rM are 1. That means that the frame buffer color is the | ||
1152 | * first operand, and the fragment color the second. | ||
1153 | */ | ||
1154 | spe_selb(f, fragRGBA_reg, fbRGBA_reg, fragRGBA_reg, colormask_reg); | ||
1155 | |||
1156 | /* Release the temporary register and we're done */ | ||
1157 | spe_release_register(f, colormask_reg); | ||
1158 | } | ||
1159 | |||
1160 | |||
1161 | /** | ||
1162 | * This function is annoyingly similar to gen_depth_test(), above, except | ||
1163 | * that instead of comparing two varying values (i.e. fragment and buffer), | ||
1164 | * we're comparing a varying value with a static value. As such, we have | ||
1165 | * access to the Compare Immediate instructions where we don't in | ||
1166 | * gen_depth_test(), which is what makes us very different. | ||
1167 | * | ||
1168 | * There's some added complexity if there's a non-trivial state->mask | ||
1169 | * value; then stencil and reference both must be masked | ||
1170 | * | ||
1171 | * The return value in the stencil_pass_reg is a bitmask of valid | ||
1172 | * fragments that also passed the stencil test. The bitmask of valid | ||
1173 | * fragments that failed would be found in | ||
1174 | * (fragment_mask_reg & ~stencil_pass_reg). | ||
1175 | */ | ||
1176 | static void | ||
1177 | gen_stencil_test(struct spe_function *f, | ||
1178 | const struct pipe_stencil_state *state, | ||
1179 | const unsigned ref_value, | ||
1180 | uint stencil_max_value, | ||
1181 | int fragment_mask_reg, | ||
1182 | int fbS_reg, | ||
1183 | int stencil_pass_reg) | ||
1184 | { | ||
1185 | /* Generate code that puts the set of passing fragments into the | ||
1186 | * stencil_pass_reg register, taking into account whether each fragment | ||
1187 | * was active to begin with. | ||
1188 | */ | ||
1189 | switch (state->func) { | ||
1190 | case PIPE_FUNC_EQUAL: | ||
1191 | if (state->valuemask == stencil_max_value) { | ||
1192 | /* stencil_pass = fragment_mask & (s == reference) */ | ||
1193 | spe_compare_equal_uint(f, stencil_pass_reg, fbS_reg, ref_value); | ||
1194 | spe_and(f, stencil_pass_reg, fragment_mask_reg, stencil_pass_reg); | ||
1195 | } | ||
1196 | else { | ||
1197 | /* stencil_pass = fragment_mask & ((s&mask) == (reference&mask)) */ | ||
1198 | uint tmp_masked_stencil = spe_allocate_available_register(f); | ||
1199 | spe_and_uint(f, tmp_masked_stencil, fbS_reg, state->valuemask); | ||
1200 | spe_compare_equal_uint(f, stencil_pass_reg, tmp_masked_stencil, | ||
1201 | state->valuemask & ref_value); | ||
1202 | spe_and(f, stencil_pass_reg, fragment_mask_reg, stencil_pass_reg); | ||
1203 | spe_release_register(f, tmp_masked_stencil); | ||
1204 | } | ||
1205 | break; | ||
1206 | |||
1207 | case PIPE_FUNC_NOTEQUAL: | ||
1208 | if (state->valuemask == stencil_max_value) { | ||
1209 | /* stencil_pass = fragment_mask & ~(s == reference) */ | ||
1210 | spe_compare_equal_uint(f, stencil_pass_reg, fbS_reg, ref_value); | ||
1211 | spe_andc(f, stencil_pass_reg, fragment_mask_reg, stencil_pass_reg); | ||
1212 | } | ||
1213 | else { | ||
1214 | /* stencil_pass = fragment_mask & ~((s&mask) == (reference&mask)) */ | ||
1215 | int tmp_masked_stencil = spe_allocate_available_register(f); | ||
1216 | spe_and_uint(f, tmp_masked_stencil, fbS_reg, state->valuemask); | ||
1217 | spe_compare_equal_uint(f, stencil_pass_reg, tmp_masked_stencil, | ||
1218 | state->valuemask & ref_value); | ||
1219 | spe_andc(f, stencil_pass_reg, fragment_mask_reg, stencil_pass_reg); | ||
1220 | spe_release_register(f, tmp_masked_stencil); | ||
1221 | } | ||
1222 | break; | ||
1223 | |||
1224 | case PIPE_FUNC_LESS: | ||
1225 | if (state->valuemask == stencil_max_value) { | ||
1226 | /* stencil_pass = fragment_mask & (reference < s) */ | ||
1227 | spe_compare_greater_uint(f, stencil_pass_reg, fbS_reg, ref_value); | ||
1228 | spe_and(f, stencil_pass_reg, fragment_mask_reg, stencil_pass_reg); | ||
1229 | } | ||
1230 | else { | ||
1231 | /* stencil_pass = fragment_mask & ((reference&mask) < (s & mask)) */ | ||
1232 | int tmp_masked_stencil = spe_allocate_available_register(f); | ||
1233 | spe_and_uint(f, tmp_masked_stencil, fbS_reg, state->valuemask); | ||
1234 | spe_compare_greater_uint(f, stencil_pass_reg, tmp_masked_stencil, | ||
1235 | state->valuemask & ref_value); | ||
1236 | spe_and(f, stencil_pass_reg, fragment_mask_reg, stencil_pass_reg); | ||
1237 | spe_release_register(f, tmp_masked_stencil); | ||
1238 | } | ||
1239 | break; | ||
1240 | |||
1241 | case PIPE_FUNC_GREATER: | ||
1242 | if (state->valuemask == stencil_max_value) { | ||
1243 | /* stencil_pass = fragment_mask & (reference > s) */ | ||
1244 | /* There's no convenient Compare Less Than Immediate instruction, so | ||
1245 | * we'll have to do this one the harder way, by loading a register and | ||
1246 | * comparing directly. Compare Logical Greater Than Word (clgt) | ||
1247 | * treats its operands as unsigned - no sign extension. | ||
1248 | */ | ||
1249 | int tmp_reg = spe_allocate_available_register(f); | ||
1250 | spe_load_uint(f, tmp_reg, ref_value); | ||
1251 | spe_clgt(f, stencil_pass_reg, tmp_reg, fbS_reg); | ||
1252 | spe_and(f, stencil_pass_reg, fragment_mask_reg, stencil_pass_reg); | ||
1253 | spe_release_register(f, tmp_reg); | ||
1254 | } | ||
1255 | else { | ||
1256 | /* stencil_pass = fragment_mask & ((reference&mask) > (s&mask)) */ | ||
1257 | int tmp_reg = spe_allocate_available_register(f); | ||
1258 | int tmp_masked_stencil = spe_allocate_available_register(f); | ||
1259 | spe_load_uint(f, tmp_reg, state->valuemask & ref_value); | ||
1260 | spe_and_uint(f, tmp_masked_stencil, fbS_reg, state->valuemask); | ||
1261 | spe_clgt(f, stencil_pass_reg, tmp_reg, tmp_masked_stencil); | ||
1262 | spe_and(f, stencil_pass_reg, fragment_mask_reg, stencil_pass_reg); | ||
1263 | spe_release_register(f, tmp_reg); | ||
1264 | spe_release_register(f, tmp_masked_stencil); | ||
1265 | } | ||
1266 | break; | ||
1267 | |||
1268 | case PIPE_FUNC_GEQUAL: | ||
1269 | if (state->valuemask == stencil_max_value) { | ||
1270 | /* stencil_pass = fragment_mask & (reference >= s) | ||
1271 | * = fragment_mask & ~(s > reference) */ | ||
1272 | spe_compare_greater_uint(f, stencil_pass_reg, fbS_reg, | ||
1273 | ref_value); | ||
1274 | spe_andc(f, stencil_pass_reg, fragment_mask_reg, stencil_pass_reg); | ||
1275 | } | ||
1276 | else { | ||
1277 | /* stencil_pass = fragment_mask & ~((s&mask) > (reference&mask)) */ | ||
1278 | int tmp_masked_stencil = spe_allocate_available_register(f); | ||
1279 | spe_and_uint(f, tmp_masked_stencil, fbS_reg, state->valuemask); | ||
1280 | spe_compare_greater_uint(f, stencil_pass_reg, tmp_masked_stencil, | ||
1281 | state->valuemask & ref_value); | ||
1282 | spe_andc(f, stencil_pass_reg, fragment_mask_reg, stencil_pass_reg); | ||
1283 | spe_release_register(f, tmp_masked_stencil); | ||
1284 | } | ||
1285 | break; | ||
1286 | |||
1287 | case PIPE_FUNC_LEQUAL: | ||
1288 | if (state->valuemask == stencil_max_value) { | ||
1289 | /* stencil_pass = fragment_mask & (reference <= s) ] | ||
1290 | * = fragment_mask & ~(reference > s) */ | ||
1291 | /* As above, we have to do this by loading a register */ | ||
1292 | int tmp_reg = spe_allocate_available_register(f); | ||
1293 | spe_load_uint(f, tmp_reg, ref_value); | ||
1294 | spe_clgt(f, stencil_pass_reg, tmp_reg, fbS_reg); | ||
1295 | spe_andc(f, stencil_pass_reg, fragment_mask_reg, stencil_pass_reg); | ||
1296 | spe_release_register(f, tmp_reg); | ||
1297 | } | ||
1298 | else { | ||
1299 | /* stencil_pass = fragment_mask & ~((reference&mask) > (s&mask)) */ | ||
1300 | int tmp_reg = spe_allocate_available_register(f); | ||
1301 | int tmp_masked_stencil = spe_allocate_available_register(f); | ||
1302 | spe_load_uint(f, tmp_reg, ref_value & state->valuemask); | ||
1303 | spe_and_uint(f, tmp_masked_stencil, fbS_reg, state->valuemask); | ||
1304 | spe_clgt(f, stencil_pass_reg, tmp_reg, tmp_masked_stencil); | ||
1305 | spe_andc(f, stencil_pass_reg, fragment_mask_reg, stencil_pass_reg); | ||
1306 | spe_release_register(f, tmp_reg); | ||
1307 | spe_release_register(f, tmp_masked_stencil); | ||
1308 | } | ||
1309 | break; | ||
1310 | |||
1311 | case PIPE_FUNC_NEVER: | ||
1312 | /* stencil_pass = fragment_mask & 0 = 0 */ | ||
1313 | spe_load_uint(f, stencil_pass_reg, 0); | ||
1314 | break; | ||
1315 | |||
1316 | case PIPE_FUNC_ALWAYS: | ||
1317 | /* stencil_pass = fragment_mask & 1 = fragment_mask */ | ||
1318 | spe_move(f, stencil_pass_reg, fragment_mask_reg); | ||
1319 | break; | ||
1320 | } | ||
1321 | |||
1322 | /* The fragments that passed the stencil test are now in stencil_pass_reg. | ||
1323 | * The fragments that failed would be (fragment_mask_reg & ~stencil_pass_reg). | ||
1324 | */ | ||
1325 | } | ||
1326 | |||
1327 | |||
1328 | /** | ||
1329 | * This function generates code that calculates a set of new stencil values | ||
1330 | * given the earlier values and the operation to apply. It does not | ||
1331 | * apply any tests. It is intended to be called up to 3 times | ||
1332 | * (for the stencil fail operation, for the stencil pass-z fail operation, | ||
1333 | * and for the stencil pass-z pass operation) to collect up to three | ||
1334 | * possible sets of values, and for the caller to combine them based | ||
1335 | * on the result of the tests. | ||
1336 | * | ||
1337 | * stencil_max_value should be (2^n - 1) where n is the number of bits | ||
1338 | * in the stencil buffer - in other words, it should be usable as a mask. | ||
1339 | */ | ||
1340 | static void | ||
1341 | gen_stencil_values(struct spe_function *f, | ||
1342 | uint stencil_op, | ||
1343 | uint stencil_ref_value, | ||
1344 | uint stencil_max_value, | ||
1345 | int fbS_reg, | ||
1346 | int newS_reg) | ||
1347 | { | ||
1348 | /* The code below assumes that newS_reg and fbS_reg are not the same | ||
1349 | * register; if they can be, the calculations below will have to use | ||
1350 | * an additional temporary register. For now, mark the assumption | ||
1351 | * with an assertion that will fail if they are the same. | ||
1352 | */ | ||
1353 | ASSERT(fbS_reg != newS_reg); | ||
1354 | |||
1355 | /* The code also assumes that the stencil_max_value is of the form | ||
1356 | * 2^n-1 and can therefore be used as a mask for the valid bits in | ||
1357 | * addition to a maximum. Make sure this is the case as well. | ||
1358 | * The clever math below exploits the fact that incrementing a | ||
1359 | * binary number serves to flip all the bits of a number starting at | ||
1360 | * the LSB and continuing to (and including) the first zero bit | ||
1361 | * found. That means that a number and its increment will always | ||
1362 | * have at least one bit in common (the high order bit, if nothing | ||
1363 | * else) *unless* the number is zero, *or* the number is of a form | ||
1364 | * consisting of some number of 1s in the low-order bits followed | ||
1365 | * by nothing but 0s in the high-order bits. The latter case | ||
1366 | * implies it's of the form 2^n-1. | ||
1367 | */ | ||
1368 | ASSERT(stencil_max_value > 0 && ((stencil_max_value + 1) & stencil_max_value) == 0); | ||
1369 | |||
1370 | switch(stencil_op) { | ||
1371 | case PIPE_STENCIL_OP_KEEP: | ||
1372 | /* newS = S */ | ||
1373 | spe_move(f, newS_reg, fbS_reg); | ||
1374 | break; | ||
1375 | |||
1376 | case PIPE_STENCIL_OP_ZERO: | ||
1377 | /* newS = 0 */ | ||
1378 | spe_zero(f, newS_reg); | ||
1379 | break; | ||
1380 | |||
1381 | case PIPE_STENCIL_OP_REPLACE: | ||
1382 | /* newS = stencil reference value */ | ||
1383 | spe_load_uint(f, newS_reg, stencil_ref_value); | ||
1384 | break; | ||
1385 | |||
1386 | case PIPE_STENCIL_OP_INCR: { | ||
1387 | /* newS = (s == max ? max : s + 1) */ | ||
1388 | int equals_reg = spe_allocate_available_register(f); | ||
1389 | |||
1390 | spe_compare_equal_uint(f, equals_reg, fbS_reg, stencil_max_value); | ||
1391 | /* Add Word Immediate computes rT = rA + 10-bit signed immediate */ | ||
1392 | spe_ai(f, newS_reg, fbS_reg, 1); | ||
1393 | /* Select from the current value or the new value based on the equality test */ | ||
1394 | spe_selb(f, newS_reg, newS_reg, fbS_reg, equals_reg); | ||
1395 | |||
1396 | spe_release_register(f, equals_reg); | ||
1397 | break; | ||
1398 | } | ||
1399 | case PIPE_STENCIL_OP_DECR: { | ||
1400 | /* newS = (s == 0 ? 0 : s - 1) */ | ||
1401 | int equals_reg = spe_allocate_available_register(f); | ||
1402 | |||
1403 | spe_compare_equal_uint(f, equals_reg, fbS_reg, 0); | ||
1404 | /* Add Word Immediate with a (-1) value works */ | ||
1405 | spe_ai(f, newS_reg, fbS_reg, -1); | ||
1406 | /* Select from the current value or the new value based on the equality test */ | ||
1407 | spe_selb(f, newS_reg, newS_reg, fbS_reg, equals_reg); | ||
1408 | |||
1409 | spe_release_register(f, equals_reg); | ||
1410 | break; | ||
1411 | } | ||
1412 | case PIPE_STENCIL_OP_INCR_WRAP: | ||
1413 | /* newS = (s == max ? 0 : s + 1), but since max is 2^n-1, we can | ||
1414 | * do a normal add and mask off the correct bits | ||
1415 | */ | ||
1416 | spe_ai(f, newS_reg, fbS_reg, 1); | ||
1417 | spe_and_uint(f, newS_reg, newS_reg, stencil_max_value); | ||
1418 | break; | ||
1419 | |||
1420 | case PIPE_STENCIL_OP_DECR_WRAP: | ||
1421 | /* newS = (s == 0 ? max : s - 1), but we'll pull the same mask trick as above */ | ||
1422 | spe_ai(f, newS_reg, fbS_reg, -1); | ||
1423 | spe_and_uint(f, newS_reg, newS_reg, stencil_max_value); | ||
1424 | break; | ||
1425 | |||
1426 | case PIPE_STENCIL_OP_INVERT: | ||
1427 | /* newS = ~s. We take advantage of the mask/max value to invert only | ||
1428 | * the valid bits for the field so we don't have to do an extra "and". | ||
1429 | */ | ||
1430 | spe_xor_uint(f, newS_reg, fbS_reg, stencil_max_value); | ||
1431 | break; | ||
1432 | |||
1433 | default: | ||
1434 | ASSERT(0); | ||
1435 | } | ||
1436 | } | ||
1437 | |||
1438 | |||
1439 | /** | ||
1440 | * This function generates code to get all the necessary possible | ||
1441 | * stencil values. For each of the output registers (fail_reg, | ||
1442 | * zfail_reg, and zpass_reg), it either allocates a new register | ||
1443 | * and calculates a new set of values based on the stencil operation, | ||
1444 | * or it reuses a register allocation and calculation done for an | ||
1445 | * earlier (matching) operation, or it reuses the fbS_reg register | ||
1446 | * (if the stencil operation is KEEP, which doesn't change the | ||
1447 | * stencil buffer). | ||
1448 | * | ||
1449 | * Since this function allocates a variable number of registers, | ||
1450 | * to avoid incurring complex logic to free them, they should | ||
1451 | * be allocated after a spe_allocate_register_set() call | ||
1452 | * and released by the corresponding spe_release_register_set() call. | ||
1453 | */ | ||
1454 | static void | ||
1455 | gen_get_stencil_values(struct spe_function *f, | ||
1456 | const struct pipe_stencil_state *stencil, | ||
1457 | const unsigned ref_value, | ||
1458 | const uint depth_enabled, | ||
1459 | int fbS_reg, | ||
1460 | int *fail_reg, | ||
1461 | int *zfail_reg, | ||
1462 | int *zpass_reg) | ||
1463 | { | ||
1464 | uint zfail_op; | ||
1465 | |||
1466 | /* Stenciling had better be enabled here */ | ||
1467 | ASSERT(stencil->enabled); | ||
1468 | |||
1469 | /* If the depth test is not enabled, it is treated as though it always | ||
1470 | * passes, which means that the zfail_op is not considered - a | ||
1471 | * failing stencil test triggers the fail_op, and a passing one | ||
1472 | * triggers the zpass_op | ||
1473 | * | ||
1474 | * As an optimization, override calculation of the zfail_op values | ||
1475 | * if they aren't going to be used. By setting the value of | ||
1476 | * the operation to PIPE_STENCIL_OP_KEEP, its value will be assumed | ||
1477 | * to match the incoming stencil values, and no calculation will | ||
1478 | * be done. | ||
1479 | */ | ||
1480 | if (depth_enabled) { | ||
1481 | zfail_op = stencil->zfail_op; | ||
1482 | } | ||
1483 | else { | ||
1484 | zfail_op = PIPE_STENCIL_OP_KEEP; | ||
1485 | } | ||
1486 | |||
1487 | /* One-sided or front-facing stencil */ | ||
1488 | if (stencil->fail_op == PIPE_STENCIL_OP_KEEP) { | ||
1489 | *fail_reg = fbS_reg; | ||
1490 | } | ||
1491 | else { | ||
1492 | *fail_reg = spe_allocate_available_register(f); | ||
1493 | gen_stencil_values(f, stencil->fail_op, ref_value, | ||
1494 | 0xff, fbS_reg, *fail_reg); | ||
1495 | } | ||
1496 | |||
1497 | /* Check the possibly overridden value, not the structure value */ | ||
1498 | if (zfail_op == PIPE_STENCIL_OP_KEEP) { | ||
1499 | *zfail_reg = fbS_reg; | ||
1500 | } | ||
1501 | else if (zfail_op == stencil->fail_op) { | ||
1502 | *zfail_reg = *fail_reg; | ||
1503 | } | ||
1504 | else { | ||
1505 | *zfail_reg = spe_allocate_available_register(f); | ||
1506 | gen_stencil_values(f, stencil->zfail_op, ref_value, | ||
1507 | 0xff, fbS_reg, *zfail_reg); | ||
1508 | } | ||
1509 | |||
1510 | if (stencil->zpass_op == PIPE_STENCIL_OP_KEEP) { | ||
1511 | *zpass_reg = fbS_reg; | ||
1512 | } | ||
1513 | else if (stencil->zpass_op == stencil->fail_op) { | ||
1514 | *zpass_reg = *fail_reg; | ||
1515 | } | ||
1516 | else if (stencil->zpass_op == zfail_op) { | ||
1517 | *zpass_reg = *zfail_reg; | ||
1518 | } | ||
1519 | else { | ||
1520 | *zpass_reg = spe_allocate_available_register(f); | ||
1521 | gen_stencil_values(f, stencil->zpass_op, ref_value, | ||
1522 | 0xff, fbS_reg, *zpass_reg); | ||
1523 | } | ||
1524 | } | ||
1525 | |||
1526 | /** | ||
1527 | * Note that fbZ_reg may *not* be set on entry, if in fact | ||
1528 | * the depth test is not enabled. This function must not use | ||
1529 | * the register if depth is not enabled. | ||
1530 | */ | ||
1531 | static boolean | ||
1532 | gen_stencil_depth_test(struct spe_function *f, | ||
1533 | const struct pipe_depth_stencil_alpha_state *dsa, | ||
1534 | const struct pipe_stencil_ref *stencil_ref, | ||
1535 | const uint facing, | ||
1536 | const int mask_reg, const int fragZ_reg, | ||
1537 | const int fbZ_reg, const int fbS_reg) | ||
1538 | { | ||
1539 | /* True if we've generated code that could require writeback to the | ||
1540 | * depth and/or stencil buffers | ||
1541 | */ | ||
1542 | boolean modified_buffers = FALSE; | ||
1543 | |||
1544 | boolean need_to_calculate_stencil_values; | ||
1545 | boolean need_to_writemask_stencil_values; | ||
1546 | |||
1547 | struct pipe_stencil_state *stencil; | ||
1548 | |||
1549 | /* Registers. We may or may not actually allocate these, depending | ||
1550 | * on whether the state values indicate that we need them. | ||
1551 | */ | ||
1552 | int stencil_pass_reg, stencil_fail_reg; | ||
1553 | int stencil_fail_values, stencil_pass_depth_fail_values, stencil_pass_depth_pass_values; | ||
1554 | int stencil_writemask_reg; | ||
1555 | int zmask_reg; | ||
1556 | int newS_reg; | ||
1557 | unsigned ref_value; | ||
1558 | |||
1559 | /* Stenciling is quite complex: up to six different configurable stencil | ||
1560 | * operations/calculations can be required (three each for front-facing | ||
1561 | * and back-facing fragments). Many of those operations will likely | ||
1562 | * be identical, so there's good reason to try to avoid calculating | ||
1563 | * the same values more than once (which unfortunately makes the code less | ||
1564 | * straightforward). | ||
1565 | * | ||
1566 | * To make register management easier, we start a new | ||
1567 | * register set; we can release all the registers in the set at | ||
1568 | * once, and avoid having to keep track of exactly which registers | ||
1569 | * we allocate. We can still allocate and free registers as | ||
1570 | * desired (if we know we no longer need a register), but we don't | ||
1571 | * have to spend the complexity to track the more difficult variant | ||
1572 | * register usage scenarios. | ||
1573 | */ | ||
1574 | spe_comment(f, 0, "Allocating stencil register set"); | ||
1575 | spe_allocate_register_set(f); | ||
1576 | |||
1577 | /* The facing we're given is the fragment facing; it doesn't | ||
1578 | * exactly match the stencil facing. If stencil is enabled, | ||
1579 | * but two-sided stencil is *not* enabled, we use the same | ||
1580 | * stencil settings for both front- and back-facing fragments. | ||
1581 | * We only use the "back-facing" stencil for backfacing fragments | ||
1582 | * if two-sided stenciling is enabled. | ||
1583 | */ | ||
1584 | if (facing == CELL_FACING_BACK && dsa->stencil[1].enabled) { | ||
1585 | stencil = &dsa->stencil[1]; | ||
1586 | ref_value = stencil_ref->ref_value[1]; | ||
1587 | } | ||
1588 | else { | ||
1589 | stencil = &dsa->stencil[0]; | ||
1590 | ref_value = stencil_ref->ref_value[0]; | ||
1591 | } | ||
1592 | |||
1593 | /* Calculate the writemask. If the writemask is trivial (either | ||
1594 | * all 0s, meaning that we don't need to calculate any stencil values | ||
1595 | * because they're not going to change the stencil anyway, or all 1s, | ||
1596 | * meaning that we have to calculate the stencil values but do not | ||
1597 | * need to mask them), we can avoid generating code. Don't forget | ||
1598 | * that we need to consider backfacing stencil, if enabled. | ||
1599 | * | ||
1600 | * Note that if the backface stencil is *not* enabled, the backface | ||
1601 | * stencil will have the same values as the frontface stencil. | ||
1602 | */ | ||
1603 | if (stencil->fail_op == PIPE_STENCIL_OP_KEEP && | ||
1604 | stencil->zfail_op == PIPE_STENCIL_OP_KEEP && | ||
1605 | stencil->zpass_op == PIPE_STENCIL_OP_KEEP) { | ||
1606 | need_to_calculate_stencil_values = FALSE; | ||
1607 | need_to_writemask_stencil_values = FALSE; | ||
1608 | } | ||
1609 | else if (stencil->writemask == 0x0) { | ||
1610 | /* All changes are writemasked out, so no need to calculate | ||
1611 | * what those changes might be, and no need to write anything back. | ||
1612 | */ | ||
1613 | need_to_calculate_stencil_values = FALSE; | ||
1614 | need_to_writemask_stencil_values = FALSE; | ||
1615 | } | ||
1616 | else if (stencil->writemask == 0xff) { | ||
1617 | /* Still trivial, but a little less so. We need to write the stencil | ||
1618 | * values, but we don't need to mask them. | ||
1619 | */ | ||
1620 | need_to_calculate_stencil_values = TRUE; | ||
1621 | need_to_writemask_stencil_values = FALSE; | ||
1622 | } | ||
1623 | else { | ||
1624 | /* The general case: calculate, mask, and write */ | ||
1625 | need_to_calculate_stencil_values = TRUE; | ||
1626 | need_to_writemask_stencil_values = TRUE; | ||
1627 | |||
1628 | /* While we're here, generate code that calculates what the | ||
1629 | * writemask should be. If backface stenciling is enabled, | ||
1630 | * and the backface writemask is not the same as the frontface | ||
1631 | * writemask, we'll have to generate code that merges the | ||
1632 | * two masks into a single effective mask based on fragment facing. | ||
1633 | */ | ||
1634 | spe_comment(f, 0, "Computing stencil writemask"); | ||
1635 | stencil_writemask_reg = spe_allocate_available_register(f); | ||
1636 | spe_load_uint(f, stencil_writemask_reg, dsa->stencil[facing].writemask); | ||
1637 | } | ||
1638 | |||
1639 | /* At least one-sided stenciling must be on. Generate code that | ||
1640 | * runs the stencil test on the basic/front-facing stencil, leaving | ||
1641 | * the mask of passing stencil bits in stencil_pass_reg. This mask will | ||
1642 | * be used both to mask the set of active pixels, and also to | ||
1643 | * determine how the stencil buffer changes. | ||
1644 | * | ||
1645 | * This test will *not* change the value in mask_reg (because we don't | ||
1646 | * yet know whether to apply the two-sided stencil or one-sided stencil). | ||
1647 | */ | ||
1648 | spe_comment(f, 0, "Running basic stencil test"); | ||
1649 | stencil_pass_reg = spe_allocate_available_register(f); | ||
1650 | gen_stencil_test(f, stencil, ref_value, 0xff, mask_reg, fbS_reg, stencil_pass_reg); | ||
1651 | |||
1652 | /* Generate code that, given the mask of valid fragments and the | ||
1653 | * mask of valid fragments that passed the stencil test, computes | ||
1654 | * the mask of valid fragments that failed the stencil test. We | ||
1655 | * have to do this before we run a depth test (because the | ||
1656 | * depth test should not be performed on fragments that failed the | ||
1657 | * stencil test, and because the depth test will update the | ||
1658 | * mask of valid fragments based on the results of the depth test). | ||
1659 | */ | ||
1660 | spe_comment(f, 0, "Computing stencil fail mask and updating fragment mask"); | ||
1661 | stencil_fail_reg = spe_allocate_available_register(f); | ||
1662 | spe_andc(f, stencil_fail_reg, mask_reg, stencil_pass_reg); | ||
1663 | /* Now remove the stenciled-out pixels from the valid fragment mask, | ||
1664 | * so we can later use the valid fragment mask in the depth test. | ||
1665 | */ | ||
1666 | spe_and(f, mask_reg, mask_reg, stencil_pass_reg); | ||
1667 | |||
1668 | /* We may not need to calculate stencil values, if the writemask is off */ | ||
1669 | if (need_to_calculate_stencil_values) { | ||
1670 | /* Generate code that calculates exactly which stencil values we need, | ||
1671 | * without calculating the same value twice (say, if two different | ||
1672 | * stencil ops have the same value). This code will work for one-sided | ||
1673 | * and two-sided stenciling (so that we take into account that operations | ||
1674 | * may match between front and back stencils), and will also take into | ||
1675 | * account whether the depth test is enabled (if the depth test is off, | ||
1676 | * we don't need any of the zfail results, because the depth test always | ||
1677 | * is considered to pass if it is disabled). Any register value that | ||
1678 | * does not need to be calculated will come back with the same value | ||
1679 | * that's in fbS_reg. | ||
1680 | * | ||
1681 | * This function will allocate a variant number of registers that | ||
1682 | * will be released as part of the register set. | ||
1683 | */ | ||
1684 | spe_comment(f, 0, facing == CELL_FACING_FRONT | ||
1685 | ? "Computing front-facing stencil values" | ||
1686 | : "Computing back-facing stencil values"); | ||
1687 | gen_get_stencil_values(f, stencil, ref_value, dsa->depth.enabled, fbS_reg, | ||
1688 | &stencil_fail_values, &stencil_pass_depth_fail_values, | ||
1689 | &stencil_pass_depth_pass_values); | ||
1690 | } | ||
1691 | |||
1692 | /* We now have all the stencil values we need. We also need | ||
1693 | * the results of the depth test to figure out which | ||
1694 | * stencil values will become the new stencil values. (Even if | ||
1695 | * we aren't actually calculating stencil values, we need to apply | ||
1696 | * the depth test if it's enabled.) | ||
1697 | * | ||
1698 | * The code generated by gen_depth_test() returns the results of the | ||
1699 | * test in the given register, but also alters the mask_reg based | ||
1700 | * on the results of the test. | ||
1701 | */ | ||
1702 | if (dsa->depth.enabled) { | ||
1703 | spe_comment(f, 0, "Running stencil depth test"); | ||
1704 | zmask_reg = spe_allocate_available_register(f); | ||
1705 | modified_buffers |= gen_depth_test(f, dsa, mask_reg, fragZ_reg, | ||
1706 | fbZ_reg, zmask_reg); | ||
1707 | } | ||
1708 | |||
1709 | if (need_to_calculate_stencil_values) { | ||
1710 | |||
1711 | /* If we need to writemask the stencil values before going into | ||
1712 | * the stencil buffer, we'll have to use a new register to | ||
1713 | * hold the new values. If not, we can just keep using the | ||
1714 | * current register. | ||
1715 | */ | ||
1716 | if (need_to_writemask_stencil_values) { | ||
1717 | newS_reg = spe_allocate_available_register(f); | ||
1718 | spe_comment(f, 0, "Saving current stencil values for writemasking"); | ||
1719 | spe_move(f, newS_reg, fbS_reg); | ||
1720 | } | ||
1721 | else { | ||
1722 | newS_reg = fbS_reg; | ||
1723 | } | ||
1724 | |||
1725 | /* Merge in the selected stencil fail values */ | ||
1726 | if (stencil_fail_values != fbS_reg) { | ||
1727 | spe_comment(f, 0, "Loading stencil fail values"); | ||
1728 | spe_selb(f, newS_reg, newS_reg, stencil_fail_values, stencil_fail_reg); | ||
1729 | modified_buffers = TRUE; | ||
1730 | } | ||
1731 | |||
1732 | /* Same for the stencil pass/depth fail values. If this calculation | ||
1733 | * is not needed (say, if depth test is off), then the | ||
1734 | * stencil_pass_depth_fail_values register will be equal to fbS_reg | ||
1735 | * and we'll skip the calculation. | ||
1736 | */ | ||
1737 | if (stencil_pass_depth_fail_values != fbS_reg) { | ||
1738 | /* We don't actually have a stencil pass/depth fail mask yet. | ||
1739 | * Calculate it here from the stencil passing mask and the | ||
1740 | * depth passing mask. Note that zmask_reg *must* have been | ||
1741 | * set above if we're here. | ||
1742 | */ | ||
1743 | uint stencil_pass_depth_fail_mask = | ||
1744 | spe_allocate_available_register(f); | ||
1745 | |||
1746 | spe_comment(f, 0, "Loading stencil pass/depth fail values"); | ||
1747 | spe_andc(f, stencil_pass_depth_fail_mask, stencil_pass_reg, zmask_reg); | ||
1748 | |||
1749 | spe_selb(f, newS_reg, newS_reg, stencil_pass_depth_fail_values, | ||
1750 | stencil_pass_depth_fail_mask); | ||
1751 | |||
1752 | spe_release_register(f, stencil_pass_depth_fail_mask); | ||
1753 | modified_buffers = TRUE; | ||
1754 | } | ||
1755 | |||
1756 | /* Same for the stencil pass/depth pass mask. Note that we | ||
1757 | * *can* get here with zmask_reg being unset (if the depth | ||
1758 | * test is off but the stencil test is on). In this case, | ||
1759 | * we assume the depth test passes, and don't need to mask | ||
1760 | * the stencil pass mask with the Z mask. | ||
1761 | */ | ||
1762 | if (stencil_pass_depth_pass_values != fbS_reg) { | ||
1763 | if (dsa->depth.enabled) { | ||
1764 | uint stencil_pass_depth_pass_mask = spe_allocate_available_register(f); | ||
1765 | /* We'll need a separate register */ | ||
1766 | spe_comment(f, 0, "Loading stencil pass/depth pass values"); | ||
1767 | spe_and(f, stencil_pass_depth_pass_mask, stencil_pass_reg, zmask_reg); | ||
1768 | spe_selb(f, newS_reg, newS_reg, stencil_pass_depth_pass_values, stencil_pass_depth_pass_mask); | ||
1769 | spe_release_register(f, stencil_pass_depth_pass_mask); | ||
1770 | } | ||
1771 | else { | ||
1772 | /* We can use the same stencil-pass register */ | ||
1773 | spe_comment(f, 0, "Loading stencil pass values"); | ||
1774 | spe_selb(f, newS_reg, newS_reg, stencil_pass_depth_pass_values, stencil_pass_reg); | ||
1775 | } | ||
1776 | modified_buffers = TRUE; | ||
1777 | } | ||
1778 | |||
1779 | /* Almost done. If we need to writemask, do it now, leaving the | ||
1780 | * results in the fbS_reg register passed in. If we don't need | ||
1781 | * to writemask, then the results are *already* in the fbS_reg, | ||
1782 | * so there's nothing more to do. | ||
1783 | */ | ||
1784 | |||
1785 | if (need_to_writemask_stencil_values && modified_buffers) { | ||
1786 | /* The Select Bytes command makes a fine writemask. Where | ||
1787 | * the mask is 0, the first (original) values are retained, | ||
1788 | * effectively masking out changes. Where the mask is 1, the | ||
1789 | * second (new) values are retained, incorporating changes. | ||
1790 | */ | ||
1791 | spe_comment(f, 0, "Writemasking new stencil values"); | ||
1792 | spe_selb(f, fbS_reg, fbS_reg, newS_reg, stencil_writemask_reg); | ||
1793 | } | ||
1794 | |||
1795 | } /* done calculating stencil values */ | ||
1796 | |||
1797 | /* The stencil and/or depth values have been applied, and the | ||
1798 | * mask_reg, fbS_reg, and fbZ_reg values have been updated. | ||
1799 | * We're all done, except that we've allocated a fair number | ||
1800 | * of registers that we didn't bother tracking. Release all | ||
1801 | * those registers as part of the register set, and go home. | ||
1802 | */ | ||
1803 | spe_comment(f, 0, "Releasing stencil register set"); | ||
1804 | spe_release_register_set(f); | ||
1805 | |||
1806 | /* Return TRUE if we could have modified the stencil and/or | ||
1807 | * depth buffers. | ||
1808 | */ | ||
1809 | return modified_buffers; | ||
1810 | } | ||
1811 | |||
1812 | |||
1813 | /** | ||
1814 | * Generate depth and/or stencil test code. | ||
1815 | * \param cell context | ||
1816 | * \param dsa depth/stencil/alpha state | ||
1817 | * \param f spe function to emit | ||
1818 | * \param facing either CELL_FACING_FRONT or CELL_FACING_BACK | ||
1819 | * \param mask_reg register containing the pixel alive/dead mask | ||
1820 | * \param depth_tile_reg register containing address of z/stencil tile | ||
1821 | * \param quad_offset_reg offset to quad from start of tile | ||
1822 | * \param fragZ_reg register containg fragment Z values | ||
1823 | */ | ||
1824 | static void | ||
1825 | gen_depth_stencil(struct cell_context *cell, | ||
1826 | const struct pipe_depth_stencil_alpha_state *dsa, | ||
1827 | const struct pipe_stencil_ref *stencil_ref, | ||
1828 | struct spe_function *f, | ||
1829 | uint facing, | ||
1830 | int mask_reg, | ||
1831 | int depth_tile_reg, | ||
1832 | int quad_offset_reg, | ||
1833 | int fragZ_reg) | ||
1834 | |||
1835 | { | ||
1836 | const enum pipe_format zs_format = cell->framebuffer.zsbuf->format; | ||
1837 | boolean write_depth_stencil; | ||
1838 | |||
1839 | /* framebuffer's combined z/stencil values register */ | ||
1840 | int fbZS_reg = spe_allocate_available_register(f); | ||
1841 | |||
1842 | /* Framebufer Z values register */ | ||
1843 | int fbZ_reg = spe_allocate_available_register(f); | ||
1844 | |||
1845 | /* Framebuffer stencil values register (may not be used) */ | ||
1846 | int fbS_reg = spe_allocate_available_register(f); | ||
1847 | |||
1848 | /* 24-bit mask register (may not be used) */ | ||
1849 | int zmask_reg = spe_allocate_available_register(f); | ||
1850 | |||
1851 | /** | ||
1852 | * The following code: | ||
1853 | * 1. fetch quad of packed Z/S values from the framebuffer tile. | ||
1854 | * 2. extract the separate the Z and S values from packed values | ||
1855 | * 3. convert fragment Z values from float in [0,1] to 32/24/16-bit ints | ||
1856 | * | ||
1857 | * The instructions for doing this are interleaved for better performance. | ||
1858 | */ | ||
1859 | spe_comment(f, 0, "Fetch Z/stencil quad from tile"); | ||
1860 | |||
1861 | switch(zs_format) { | ||
1862 | case PIPE_FORMAT_Z24_UNORM_S8_UINT: /* fall through */ | ||
1863 | case PIPE_FORMAT_Z24X8_UNORM: | ||
1864 | /* prepare mask to extract Z vals from ZS vals */ | ||
1865 | spe_load_uint(f, zmask_reg, 0x00ffffff); | ||
1866 | |||
1867 | /* convert fragment Z from [0,1] to 32-bit ints */ | ||
1868 | spe_cfltu(f, fragZ_reg, fragZ_reg, 32); | ||
1869 | |||
1870 | /* Load: fbZS_reg = memory[depth_tile_reg + offset_reg] */ | ||
1871 | spe_lqx(f, fbZS_reg, depth_tile_reg, quad_offset_reg); | ||
1872 | |||
1873 | /* right shift 32-bit fragment Z to 24 bits */ | ||
1874 | spe_rotmi(f, fragZ_reg, fragZ_reg, -8); | ||
1875 | |||
1876 | /* extract 24-bit Z values from ZS values by masking */ | ||
1877 | spe_and(f, fbZ_reg, fbZS_reg, zmask_reg); | ||
1878 | |||
1879 | /* extract 8-bit stencil values by shifting */ | ||
1880 | spe_rotmi(f, fbS_reg, fbZS_reg, -24); | ||
1881 | break; | ||
1882 | |||
1883 | case PIPE_FORMAT_S8_UINT_Z24_UNORM: /* fall through */ | ||
1884 | case PIPE_FORMAT_X8Z24_UNORM: | ||
1885 | /* convert fragment Z from [0,1] to 32-bit ints */ | ||
1886 | spe_cfltu(f, fragZ_reg, fragZ_reg, 32); | ||
1887 | |||
1888 | /* Load: fbZS_reg = memory[depth_tile_reg + offset_reg] */ | ||
1889 | spe_lqx(f, fbZS_reg, depth_tile_reg, quad_offset_reg); | ||
1890 | |||
1891 | /* right shift 32-bit fragment Z to 24 bits */ | ||
1892 | spe_rotmi(f, fragZ_reg, fragZ_reg, -8); | ||
1893 | |||
1894 | /* extract 24-bit Z values from ZS values by shifting */ | ||
1895 | spe_rotmi(f, fbZ_reg, fbZS_reg, -8); | ||
1896 | |||
1897 | /* extract 8-bit stencil values by masking */ | ||
1898 | spe_and_uint(f, fbS_reg, fbZS_reg, 0x000000ff); | ||
1899 | break; | ||
1900 | |||
1901 | case PIPE_FORMAT_Z32_UNORM: | ||
1902 | /* Load: fbZ_reg = memory[depth_tile_reg + offset_reg] */ | ||
1903 | spe_lqx(f, fbZ_reg, depth_tile_reg, quad_offset_reg); | ||
1904 | |||
1905 | /* convert fragment Z from [0,1] to 32-bit ints */ | ||
1906 | spe_cfltu(f, fragZ_reg, fragZ_reg, 32); | ||
1907 | |||
1908 | /* No stencil, so can't do anything there */ | ||
1909 | break; | ||
1910 | |||
1911 | case PIPE_FORMAT_Z16_UNORM: | ||
1912 | /* XXX This code for 16bpp Z is broken! */ | ||
1913 | |||
1914 | /* Load: fbZS_reg = memory[depth_tile_reg + offset_reg] */ | ||
1915 | spe_lqx(f, fbZS_reg, depth_tile_reg, quad_offset_reg); | ||
1916 | |||
1917 | /* Copy over 4 32-bit values */ | ||
1918 | spe_move(f, fbZ_reg, fbZS_reg); | ||
1919 | |||
1920 | /* convert Z from [0,1] to 16-bit ints */ | ||
1921 | spe_cfltu(f, fragZ_reg, fragZ_reg, 32); | ||
1922 | spe_rotmi(f, fragZ_reg, fragZ_reg, -16); | ||
1923 | /* No stencil */ | ||
1924 | break; | ||
1925 | |||
1926 | default: | ||
1927 | ASSERT(0); /* invalid format */ | ||
1928 | } | ||
1929 | |||
1930 | /* If stencil is enabled, use the stencil-specific code | ||
1931 | * generator to generate both the stencil and depth (if needed) | ||
1932 | * tests. Otherwise, if only depth is enabled, generate | ||
1933 | * a quick depth test. The test generators themselves will | ||
1934 | * report back whether the depth/stencil buffer has to be | ||
1935 | * written back. | ||
1936 | */ | ||
1937 | if (dsa->stencil[0].enabled) { | ||
1938 | /* This will perform the stencil and depth tests, and update | ||
1939 | * the mask_reg, fbZ_reg, and fbS_reg as required by the | ||
1940 | * tests. | ||
1941 | */ | ||
1942 | ASSERT(fbS_reg >= 0); | ||
1943 | spe_comment(f, 0, "Perform stencil test"); | ||
1944 | |||
1945 | /* Note that fbZ_reg may not be set on entry, if stenciling | ||
1946 | * is enabled but there's no Z-buffer. The | ||
1947 | * gen_stencil_depth_test() function must ignore the | ||
1948 | * fbZ_reg register if depth is not enabled. | ||
1949 | */ | ||
1950 | write_depth_stencil = gen_stencil_depth_test(f, dsa, stencil_ref, facing, | ||
1951 | mask_reg, fragZ_reg, | ||
1952 | fbZ_reg, fbS_reg); | ||
1953 | } | ||
1954 | else if (dsa->depth.enabled) { | ||
1955 | int zmask_reg = spe_allocate_available_register(f); | ||
1956 | ASSERT(fbZ_reg >= 0); | ||
1957 | spe_comment(f, 0, "Perform depth test"); | ||
1958 | write_depth_stencil = gen_depth_test(f, dsa, mask_reg, fragZ_reg, | ||
1959 | fbZ_reg, zmask_reg); | ||
1960 | spe_release_register(f, zmask_reg); | ||
1961 | } | ||
1962 | else { | ||
1963 | write_depth_stencil = FALSE; | ||
1964 | } | ||
1965 | |||
1966 | if (write_depth_stencil) { | ||
1967 | /* Merge latest Z and Stencil values into fbZS_reg. | ||
1968 | * fbZ_reg has four Z vals in bits [23..0] or bits [15..0]. | ||
1969 | * fbS_reg has four 8-bit Z values in bits [7..0]. | ||
1970 | */ | ||
1971 | spe_comment(f, 0, "Store quad's depth/stencil values in tile"); | ||
1972 | if (zs_format == PIPE_FORMAT_Z24_UNORM_S8_UINT || | ||
1973 | zs_format == PIPE_FORMAT_Z24X8_UNORM) { | ||
1974 | spe_shli(f, fbS_reg, fbS_reg, 24); /* fbS = fbS << 24 */ | ||
1975 | spe_or(f, fbZS_reg, fbS_reg, fbZ_reg); /* fbZS = fbS | fbZ */ | ||
1976 | } | ||
1977 | else if (zs_format == PIPE_FORMAT_S8_UINT_Z24_UNORM || | ||
1978 | zs_format == PIPE_FORMAT_X8Z24_UNORM) { | ||
1979 | spe_shli(f, fbZ_reg, fbZ_reg, 8); /* fbZ = fbZ << 8 */ | ||
1980 | spe_or(f, fbZS_reg, fbS_reg, fbZ_reg); /* fbZS = fbS | fbZ */ | ||
1981 | } | ||
1982 | else if (zs_format == PIPE_FORMAT_Z32_UNORM) { | ||
1983 | spe_move(f, fbZS_reg, fbZ_reg); /* fbZS = fbZ */ | ||
1984 | } | ||
1985 | else if (zs_format == PIPE_FORMAT_Z16_UNORM) { | ||
1986 | spe_move(f, fbZS_reg, fbZ_reg); /* fbZS = fbZ */ | ||
1987 | } | ||
1988 | else if (zs_format == PIPE_FORMAT_S8_UINT) { | ||
1989 | ASSERT(0); /* XXX to do */ | ||
1990 | } | ||
1991 | else { | ||
1992 | ASSERT(0); /* bad zs_format */ | ||
1993 | } | ||
1994 | |||
1995 | /* Store: memory[depth_tile_reg + quad_offset_reg] = fbZS */ | ||
1996 | spe_stqx(f, fbZS_reg, depth_tile_reg, quad_offset_reg); | ||
1997 | } | ||
1998 | |||
1999 | /* Don't need these any more */ | ||
2000 | spe_release_register(f, fbZS_reg); | ||
2001 | spe_release_register(f, fbZ_reg); | ||
2002 | spe_release_register(f, fbS_reg); | ||
2003 | spe_release_register(f, zmask_reg); | ||
2004 | } | ||
2005 | |||
2006 | |||
2007 | |||
2008 | /** | ||
2009 | * Generate SPE code to implement the fragment operations (alpha test, | ||
2010 | * depth test, stencil test, blending, colormask, and final | ||
2011 | * framebuffer write) as specified by the current context state. | ||
2012 | * | ||
2013 | * Logically, this code will be called after running the fragment | ||
2014 | * shader. But under some circumstances we could run some of this | ||
2015 | * code before the fragment shader to cull fragments/quads that are | ||
2016 | * totally occluded/discarded. | ||
2017 | * | ||
2018 | * XXX we only support PIPE_FORMAT_S8_UINT_Z24_UNORM z/stencil buffer right now. | ||
2019 | * | ||
2020 | * See the spu_default_fragment_ops() function to see how the per-fragment | ||
2021 | * operations would be done with ordinary C code. | ||
2022 | * The code we generate here though has no branches, is SIMD, etc and | ||
2023 | * should be much faster. | ||
2024 | * | ||
2025 | * \param cell the rendering context (in) | ||
2026 | * \param facing whether the generated code is for front-facing or | ||
2027 | * back-facing fragments | ||
2028 | * \param f the generated function (in/out); on input, the function | ||
2029 | * must already have been initialized. On exit, whatever | ||
2030 | * instructions within the generated function have had | ||
2031 | * the fragment ops appended. | ||
2032 | */ | ||
2033 | void | ||
2034 | cell_gen_fragment_function(struct cell_context *cell, | ||
2035 | const uint facing, | ||
2036 | struct spe_function *f) | ||
2037 | { | ||
2038 | const struct pipe_depth_stencil_alpha_state *dsa = cell->depth_stencil; | ||
2039 | const struct pipe_stencil_ref *stencil_ref = &cell->stencil_ref; | ||
2040 | const struct pipe_blend_state *blend = cell->blend; | ||
2041 | const struct pipe_blend_color *blend_color = &cell->blend_color; | ||
2042 | const enum pipe_format color_format = cell->framebuffer.cbufs[0]->format; | ||
2043 | |||
2044 | /* For SPE function calls: reg $3 = first param, $4 = second param, etc. */ | ||
2045 | const int x_reg = 3; /* uint */ | ||
2046 | const int y_reg = 4; /* uint */ | ||
2047 | const int color_tile_reg = 5; /* tile_t * */ | ||
2048 | const int depth_tile_reg = 6; /* tile_t * */ | ||
2049 | const int fragZ_reg = 7; /* vector float */ | ||
2050 | const int fragR_reg = 8; /* vector float */ | ||
2051 | const int fragG_reg = 9; /* vector float */ | ||
2052 | const int fragB_reg = 10; /* vector float */ | ||
2053 | const int fragA_reg = 11; /* vector float */ | ||
2054 | const int mask_reg = 12; /* vector uint */ | ||
2055 | |||
2056 | ASSERT(facing == CELL_FACING_FRONT || facing == CELL_FACING_BACK); | ||
2057 | |||
2058 | /* offset of quad from start of tile | ||
2059 | * XXX assuming 4-byte pixels for color AND Z/stencil!!!! | ||
2060 | */ | ||
2061 | int quad_offset_reg; | ||
2062 | |||
2063 | int fbRGBA_reg; /**< framebuffer's RGBA colors for quad */ | ||
2064 | |||
2065 | if (cell->debug_flags & CELL_DEBUG_ASM) { | ||
2066 | spe_print_code(f, TRUE); | ||
2067 | spe_indent(f, 8); | ||
2068 | spe_comment(f, -4, facing == CELL_FACING_FRONT | ||
2069 | ? "Begin front-facing per-fragment ops" | ||
2070 | : "Begin back-facing per-fragment ops"); | ||
2071 | } | ||
2072 | |||
2073 | spe_allocate_register(f, x_reg); | ||
2074 | spe_allocate_register(f, y_reg); | ||
2075 | spe_allocate_register(f, color_tile_reg); | ||
2076 | spe_allocate_register(f, depth_tile_reg); | ||
2077 | spe_allocate_register(f, fragZ_reg); | ||
2078 | spe_allocate_register(f, fragR_reg); | ||
2079 | spe_allocate_register(f, fragG_reg); | ||
2080 | spe_allocate_register(f, fragB_reg); | ||
2081 | spe_allocate_register(f, fragA_reg); | ||
2082 | spe_allocate_register(f, mask_reg); | ||
2083 | |||
2084 | quad_offset_reg = spe_allocate_available_register(f); | ||
2085 | fbRGBA_reg = spe_allocate_available_register(f); | ||
2086 | |||
2087 | /* compute offset of quad from start of tile, in bytes */ | ||
2088 | { | ||
2089 | int x2_reg = spe_allocate_available_register(f); | ||
2090 | int y2_reg = spe_allocate_available_register(f); | ||
2091 | |||
2092 | ASSERT(TILE_SIZE == 32); | ||
2093 | |||
2094 | spe_comment(f, 0, "Compute quad offset within tile"); | ||
2095 | spe_rotmi(f, y2_reg, y_reg, -1); /* y2 = y / 2 */ | ||
2096 | spe_rotmi(f, x2_reg, x_reg, -1); /* x2 = x / 2 */ | ||
2097 | spe_shli(f, y2_reg, y2_reg, 4); /* y2 *= 16 */ | ||
2098 | spe_a(f, quad_offset_reg, y2_reg, x2_reg); /* offset = y2 + x2 */ | ||
2099 | spe_shli(f, quad_offset_reg, quad_offset_reg, 4); /* offset *= 16 */ | ||
2100 | |||
2101 | spe_release_register(f, x2_reg); | ||
2102 | spe_release_register(f, y2_reg); | ||
2103 | } | ||
2104 | |||
2105 | /* Generate the alpha test, if needed. */ | ||
2106 | if (dsa->alpha.enabled) { | ||
2107 | gen_alpha_test(dsa, f, mask_reg, fragA_reg); | ||
2108 | } | ||
2109 | |||
2110 | /* generate depth and/or stencil test code */ | ||
2111 | if (dsa->depth.enabled || dsa->stencil[0].enabled) { | ||
2112 | gen_depth_stencil(cell, dsa, stencil_ref, f, | ||
2113 | facing, | ||
2114 | mask_reg, | ||
2115 | depth_tile_reg, | ||
2116 | quad_offset_reg, | ||
2117 | fragZ_reg); | ||
2118 | } | ||
2119 | |||
2120 | /* Get framebuffer quad/colors. We'll need these for blending, | ||
2121 | * color masking, and to obey the quad/pixel mask. | ||
2122 | * Load: fbRGBA_reg = memory[color_tile + quad_offset] | ||
2123 | * Note: if mask={~0,~0,~0,~0} and we're not blending or colormasking | ||
2124 | * we could skip this load. | ||
2125 | */ | ||
2126 | spe_comment(f, 0, "Fetch quad colors from tile"); | ||
2127 | spe_lqx(f, fbRGBA_reg, color_tile_reg, quad_offset_reg); | ||
2128 | |||
2129 | if (blend->rt[0].blend_enable) { | ||
2130 | spe_comment(f, 0, "Perform blending"); | ||
2131 | gen_blend(blend, blend_color, f, color_format, | ||
2132 | fragR_reg, fragG_reg, fragB_reg, fragA_reg, fbRGBA_reg); | ||
2133 | } | ||
2134 | |||
2135 | /* | ||
2136 | * Write fragment colors to framebuffer/tile. | ||
2137 | * This involves converting the fragment colors from float[4] to the | ||
2138 | * tile's specific format and obeying the quad/pixel mask. | ||
2139 | */ | ||
2140 | { | ||
2141 | int rgba_reg = spe_allocate_available_register(f); | ||
2142 | |||
2143 | /* Pack four float colors as four 32-bit int colors */ | ||
2144 | spe_comment(f, 0, "Convert float quad colors to packed int framebuffer colors"); | ||
2145 | gen_pack_colors(f, color_format, | ||
2146 | fragR_reg, fragG_reg, fragB_reg, fragA_reg, | ||
2147 | rgba_reg); | ||
2148 | |||
2149 | if (blend->logicop_enable) { | ||
2150 | spe_comment(f, 0, "Compute logic op"); | ||
2151 | gen_logicop(blend, f, rgba_reg, fbRGBA_reg); | ||
2152 | } | ||
2153 | |||
2154 | if (blend->rt[0].colormask != PIPE_MASK_RGBA) { | ||
2155 | spe_comment(f, 0, "Compute color mask"); | ||
2156 | gen_colormask(f, blend->rt[0].colormask, color_format, rgba_reg, fbRGBA_reg); | ||
2157 | } | ||
2158 | |||
2159 | /* Mix fragment colors with framebuffer colors using the quad/pixel mask: | ||
2160 | * if (mask[i]) | ||
2161 | * rgba[i] = rgba[i]; | ||
2162 | * else | ||
2163 | * rgba[i] = framebuffer[i]; | ||
2164 | */ | ||
2165 | spe_selb(f, rgba_reg, fbRGBA_reg, rgba_reg, mask_reg); | ||
2166 | |||
2167 | /* Store updated quad in tile: | ||
2168 | * memory[color_tile + quad_offset] = rgba_reg; | ||
2169 | */ | ||
2170 | spe_comment(f, 0, "Store quad colors into color tile"); | ||
2171 | spe_stqx(f, rgba_reg, color_tile_reg, quad_offset_reg); | ||
2172 | |||
2173 | spe_release_register(f, rgba_reg); | ||
2174 | } | ||
2175 | |||
2176 | //printf("gen_fragment_ops nr instructions: %u\n", f->num_inst); | ||
2177 | |||
2178 | spe_bi(f, SPE_REG_RA, 0, 0); /* return from function call */ | ||
2179 | |||
2180 | spe_release_register(f, fbRGBA_reg); | ||
2181 | spe_release_register(f, quad_offset_reg); | ||
2182 | |||
2183 | if (cell->debug_flags & CELL_DEBUG_ASM) { | ||
2184 | char buffer[1024]; | ||
2185 | sprintf(buffer, "End %s-facing per-fragment ops: %d instructions", | ||
2186 | facing == CELL_FACING_FRONT ? "front" : "back", f->num_inst); | ||
2187 | spe_comment(f, -4, buffer); | ||
2188 | } | ||
2189 | } | ||
diff --git a/src/gallium/drivers/cell/ppu/cell_gen_fragment.h b/src/gallium/drivers/cell/ppu/cell_gen_fragment.h deleted file mode 100644 index 21b35d1fafe..00000000000 --- a/src/gallium/drivers/cell/ppu/cell_gen_fragment.h +++ /dev/null | |||
@@ -1,38 +0,0 @@ | |||
1 | /************************************************************************** | ||
2 | * | ||
3 | * Copyright 2008 Tungsten Graphics, Inc., Cedar Park, Texas. | ||
4 | * All Rights Reserved. | ||
5 | * | ||
6 | * Permission is hereby granted, free of charge, to any person obtaining a | ||
7 | * copy of this software and associated documentation files (the | ||
8 | * "Software"), to deal in the Software without restriction, including | ||
9 | * without limitation the rights to use, copy, modify, merge, publish, | ||
10 | * distribute, sub license, and/or sell copies of the Software, and to | ||
11 | * permit persons to whom the Software is furnished to do so, subject to | ||
12 | * the following conditions: | ||
13 | * | ||
14 | * The above copyright notice and this permission notice (including the | ||
15 | * next paragraph) shall be included in all copies or substantial portions | ||
16 | * of the Software. | ||
17 | * | ||
18 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS | ||
19 | * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF | ||
20 | * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. | ||
21 | * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR | ||
22 | * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, | ||
23 | * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE | ||
24 | * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. | ||
25 | * | ||
26 | **************************************************************************/ | ||
27 | |||
28 | |||
29 | #ifndef CELL_GEN_FRAGMENT_H | ||
30 | #define CELL_GEN_FRAGMENT_H | ||
31 | |||
32 | |||
33 | extern void | ||
34 | cell_gen_fragment_function(struct cell_context *cell, const uint facing, struct spe_function *f); | ||
35 | |||
36 | |||
37 | #endif /* CELL_GEN_FRAGMENT_H */ | ||
38 | |||
diff --git a/src/gallium/drivers/cell/ppu/cell_pipe_state.c b/src/gallium/drivers/cell/ppu/cell_pipe_state.c deleted file mode 100644 index 223adda48f0..00000000000 --- a/src/gallium/drivers/cell/ppu/cell_pipe_state.c +++ /dev/null | |||
@@ -1,473 +0,0 @@ | |||
1 | /************************************************************************** | ||
2 | * | ||
3 | * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas. | ||
4 | * All Rights Reserved. | ||
5 | * | ||
6 | * Permission is hereby granted, free of charge, to any person obtaining a | ||
7 | * copy of this software and associated documentation files (the | ||
8 | * "Software"), to deal in the Software without restriction, including | ||
9 | * without limitation the rights to use, copy, modify, merge, publish, | ||
10 | * distribute, sub license, and/or sell copies of the Software, and to | ||
11 | * permit persons to whom the Software is furnished to do so, subject to | ||
12 | * the following conditions: | ||
13 | * | ||
14 | * The above copyright notice and this permission notice (including the | ||
15 | * next paragraph) shall be included in all copies or substantial portions | ||
16 | * of the Software. | ||
17 | * | ||
18 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS | ||
19 | * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF | ||
20 | * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. | ||
21 | * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR | ||
22 | * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, | ||
23 | * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE | ||
24 | * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. | ||
25 | * | ||
26 | **************************************************************************/ | ||
27 | |||
28 | /* Authors: | ||
29 | * Keith Whitwell <keith@tungstengraphics.com> | ||
30 | * Brian Paul | ||
31 | */ | ||
32 | |||
33 | #include "util/u_memory.h" | ||
34 | #include "util/u_inlines.h" | ||
35 | #include "draw/draw_context.h" | ||
36 | #include "cell_context.h" | ||
37 | #include "cell_flush.h" | ||
38 | #include "cell_pipe_state.h" | ||
39 | #include "cell_state.h" | ||
40 | #include "cell_texture.h" | ||
41 | |||
42 | |||
43 | |||
44 | static void * | ||
45 | cell_create_blend_state(struct pipe_context *pipe, | ||
46 | const struct pipe_blend_state *blend) | ||
47 | { | ||
48 | return mem_dup(blend, sizeof(*blend)); | ||
49 | } | ||
50 | |||
51 | |||
52 | static void | ||
53 | cell_bind_blend_state(struct pipe_context *pipe, void *blend) | ||
54 | { | ||
55 | struct cell_context *cell = cell_context(pipe); | ||
56 | |||
57 | draw_flush(cell->draw); | ||
58 | |||
59 | cell->blend = (struct pipe_blend_state *) blend; | ||
60 | cell->dirty |= CELL_NEW_BLEND; | ||
61 | } | ||
62 | |||
63 | |||
64 | static void | ||
65 | cell_delete_blend_state(struct pipe_context *pipe, void *blend) | ||
66 | { | ||
67 | FREE(blend); | ||
68 | } | ||
69 | |||
70 | |||
71 | static void | ||
72 | cell_set_blend_color(struct pipe_context *pipe, | ||
73 | const struct pipe_blend_color *blend_color) | ||
74 | { | ||
75 | struct cell_context *cell = cell_context(pipe); | ||
76 | |||
77 | draw_flush(cell->draw); | ||
78 | |||
79 | cell->blend_color = *blend_color; | ||
80 | |||
81 | cell->dirty |= CELL_NEW_BLEND; | ||
82 | } | ||
83 | |||
84 | |||
85 | |||
86 | |||
87 | static void * | ||
88 | cell_create_depth_stencil_alpha_state(struct pipe_context *pipe, | ||
89 | const struct pipe_depth_stencil_alpha_state *dsa) | ||
90 | { | ||
91 | return mem_dup(dsa, sizeof(*dsa)); | ||
92 | } | ||
93 | |||
94 | |||
95 | static void | ||
96 | cell_bind_depth_stencil_alpha_state(struct pipe_context *pipe, | ||
97 | void *dsa) | ||
98 | { | ||
99 | struct cell_context *cell = cell_context(pipe); | ||
100 | |||
101 | draw_flush(cell->draw); | ||
102 | |||
103 | cell->depth_stencil = (struct pipe_depth_stencil_alpha_state *) dsa; | ||
104 | cell->dirty |= CELL_NEW_DEPTH_STENCIL; | ||
105 | } | ||
106 | |||
107 | |||
108 | static void | ||
109 | cell_delete_depth_stencil_alpha_state(struct pipe_context *pipe, void *dsa) | ||
110 | { | ||
111 | FREE(dsa); | ||
112 | } | ||
113 | |||
114 | |||
115 | static void | ||
116 | cell_set_stencil_ref(struct pipe_context *pipe, | ||
117 | const struct pipe_stencil_ref *stencil_ref) | ||
118 | { | ||
119 | struct cell_context *cell = cell_context(pipe); | ||
120 | |||
121 | draw_flush(cell->draw); | ||
122 | |||
123 | cell->stencil_ref = *stencil_ref; | ||
124 | |||
125 | cell->dirty |= CELL_NEW_DEPTH_STENCIL; | ||
126 | } | ||
127 | |||
128 | |||
129 | static void | ||
130 | cell_set_clip_state(struct pipe_context *pipe, | ||
131 | const struct pipe_clip_state *clip) | ||
132 | { | ||
133 | struct cell_context *cell = cell_context(pipe); | ||
134 | |||
135 | /* pass the clip state to the draw module */ | ||
136 | draw_set_clip_state(cell->draw, clip); | ||
137 | } | ||
138 | |||
139 | |||
140 | static void | ||
141 | cell_set_sample_mask(struct pipe_context *pipe, | ||
142 | unsigned sample_mask) | ||
143 | { | ||
144 | } | ||
145 | |||
146 | |||
147 | /* Called when driver state tracker notices changes to the viewport | ||
148 | * matrix: | ||
149 | */ | ||
150 | static void | ||
151 | cell_set_viewport_state( struct pipe_context *pipe, | ||
152 | const struct pipe_viewport_state *viewport ) | ||
153 | { | ||
154 | struct cell_context *cell = cell_context(pipe); | ||
155 | |||
156 | cell->viewport = *viewport; /* struct copy */ | ||
157 | cell->dirty |= CELL_NEW_VIEWPORT; | ||
158 | |||
159 | /* pass the viewport info to the draw module */ | ||
160 | draw_set_viewport_state(cell->draw, viewport); | ||
161 | |||
162 | /* Using tnl/ and vf/ modules is temporary while getting started. | ||
163 | * Full pipe will have vertex shader, vertex fetch of its own. | ||
164 | */ | ||
165 | } | ||
166 | |||
167 | |||
168 | static void | ||
169 | cell_set_scissor_state( struct pipe_context *pipe, | ||
170 | const struct pipe_scissor_state *scissor ) | ||
171 | { | ||
172 | struct cell_context *cell = cell_context(pipe); | ||
173 | |||
174 | memcpy( &cell->scissor, scissor, sizeof(*scissor) ); | ||
175 | cell->dirty |= CELL_NEW_SCISSOR; | ||
176 | } | ||
177 | |||
178 | |||
179 | static void | ||
180 | cell_set_polygon_stipple( struct pipe_context *pipe, | ||
181 | const struct pipe_poly_stipple *stipple ) | ||
182 | { | ||
183 | struct cell_context *cell = cell_context(pipe); | ||
184 | |||
185 | memcpy( &cell->poly_stipple, stipple, sizeof(*stipple) ); | ||
186 | cell->dirty |= CELL_NEW_STIPPLE; | ||
187 | } | ||
188 | |||
189 | |||
190 | |||
191 | static void * | ||
192 | cell_create_rasterizer_state(struct pipe_context *pipe, | ||
193 | const struct pipe_rasterizer_state *rasterizer) | ||
194 | { | ||
195 | return mem_dup(rasterizer, sizeof(*rasterizer)); | ||
196 | } | ||
197 | |||
198 | |||
199 | static void | ||
200 | cell_bind_rasterizer_state(struct pipe_context *pipe, void *rast) | ||
201 | { | ||
202 | struct pipe_rasterizer_state *rasterizer = | ||
203 | (struct pipe_rasterizer_state *) rast; | ||
204 | struct cell_context *cell = cell_context(pipe); | ||
205 | |||
206 | /* pass-through to draw module */ | ||
207 | draw_set_rasterizer_state(cell->draw, rasterizer, rast); | ||
208 | |||
209 | cell->rasterizer = rasterizer; | ||
210 | |||
211 | cell->dirty |= CELL_NEW_RASTERIZER; | ||
212 | } | ||
213 | |||
214 | |||
215 | static void | ||
216 | cell_delete_rasterizer_state(struct pipe_context *pipe, void *rasterizer) | ||
217 | { | ||
218 | FREE(rasterizer); | ||
219 | } | ||
220 | |||
221 | |||
222 | |||
223 | static void * | ||
224 | cell_create_sampler_state(struct pipe_context *pipe, | ||
225 | const struct pipe_sampler_state *sampler) | ||
226 | { | ||
227 | return mem_dup(sampler, sizeof(*sampler)); | ||
228 | } | ||
229 | |||
230 | |||
231 | static void | ||
232 | cell_bind_sampler_states(struct pipe_context *pipe, | ||
233 | unsigned num, void **samplers) | ||
234 | { | ||
235 | struct cell_context *cell = cell_context(pipe); | ||
236 | uint i, changed = 0x0; | ||
237 | |||
238 | assert(num <= CELL_MAX_SAMPLERS); | ||
239 | |||
240 | draw_flush(cell->draw); | ||
241 | |||
242 | for (i = 0; i < CELL_MAX_SAMPLERS; i++) { | ||
243 | struct pipe_sampler_state *new_samp = i < num ? samplers[i] : NULL; | ||
244 | if (cell->sampler[i] != new_samp) { | ||
245 | cell->sampler[i] = new_samp; | ||
246 | changed |= (1 << i); | ||
247 | } | ||
248 | } | ||
249 | |||
250 | if (changed) { | ||
251 | cell->dirty |= CELL_NEW_SAMPLER; | ||
252 | cell->dirty_samplers |= changed; | ||
253 | } | ||
254 | } | ||
255 | |||
256 | |||
257 | static void | ||
258 | cell_delete_sampler_state(struct pipe_context *pipe, | ||
259 | void *sampler) | ||
260 | { | ||
261 | FREE( sampler ); | ||
262 | } | ||
263 | |||
264 | |||
265 | |||
266 | static void | ||
267 | cell_set_fragment_sampler_views(struct pipe_context *pipe, | ||
268 | unsigned num, | ||
269 | struct pipe_sampler_view **views) | ||
270 | { | ||
271 | struct cell_context *cell = cell_context(pipe); | ||
272 | uint i, changed = 0x0; | ||
273 | |||
274 | assert(num <= CELL_MAX_SAMPLERS); | ||
275 | |||
276 | for (i = 0; i < CELL_MAX_SAMPLERS; i++) { | ||
277 | struct pipe_sampler_view *new_view = i < num ? views[i] : NULL; | ||
278 | struct pipe_sampler_view *old_view = cell->fragment_sampler_views[i]; | ||
279 | |||
280 | if (old_view != new_view) { | ||
281 | struct pipe_resource *new_tex = new_view ? new_view->texture : NULL; | ||
282 | |||
283 | pipe_sampler_view_reference(&cell->fragment_sampler_views[i], | ||
284 | new_view); | ||
285 | pipe_resource_reference((struct pipe_resource **) &cell->texture[i], | ||
286 | (struct pipe_resource *) new_tex); | ||
287 | |||
288 | changed |= (1 << i); | ||
289 | } | ||
290 | } | ||
291 | |||
292 | cell->num_textures = num; | ||
293 | |||
294 | if (changed) { | ||
295 | cell->dirty |= CELL_NEW_TEXTURE; | ||
296 | cell->dirty_textures |= changed; | ||
297 | } | ||
298 | } | ||
299 | |||
300 | |||
301 | static struct pipe_sampler_view * | ||
302 | cell_create_sampler_view(struct pipe_context *pipe, | ||
303 | struct pipe_resource *texture, | ||
304 | const struct pipe_sampler_view *templ) | ||
305 | { | ||
306 | struct pipe_sampler_view *view = CALLOC_STRUCT(pipe_sampler_view); | ||
307 | |||
308 | if (view) { | ||
309 | *view = *templ; | ||
310 | view->reference.count = 1; | ||
311 | view->texture = NULL; | ||
312 | pipe_resource_reference(&view->texture, texture); | ||
313 | view->context = pipe; | ||
314 | } | ||
315 | |||
316 | return view; | ||
317 | } | ||
318 | |||
319 | |||
320 | static void | ||
321 | cell_sampler_view_destroy(struct pipe_context *pipe, | ||
322 | struct pipe_sampler_view *view) | ||
323 | { | ||
324 | pipe_resource_reference(&view->texture, NULL); | ||
325 | FREE(view); | ||
326 | } | ||
327 | |||
328 | |||
329 | /** | ||
330 | * Map color and z/stencil framebuffer surfaces. | ||
331 | */ | ||
332 | static void | ||
333 | cell_map_surfaces(struct cell_context *cell) | ||
334 | { | ||
335 | #if 0 | ||
336 | struct pipe_screen *screen = cell->pipe.screen; | ||
337 | #endif | ||
338 | uint i; | ||
339 | |||
340 | for (i = 0; i < 1; i++) { | ||
341 | struct pipe_surface *ps = cell->framebuffer.cbufs[i]; | ||
342 | if (ps) { | ||
343 | struct cell_resource *ct = cell_resource(ps->texture); | ||
344 | #if 0 | ||
345 | cell->cbuf_map[i] = screen->buffer_map(screen, | ||
346 | ct->buffer, | ||
347 | (PIPE_BUFFER_USAGE_GPU_READ | | ||
348 | PIPE_BUFFER_USAGE_GPU_WRITE)); | ||
349 | #else | ||
350 | cell->cbuf_map[i] = ct->data; | ||
351 | #endif | ||
352 | } | ||
353 | } | ||
354 | |||
355 | { | ||
356 | struct pipe_surface *ps = cell->framebuffer.zsbuf; | ||
357 | if (ps) { | ||
358 | struct cell_resource *ct = cell_resource(ps->texture); | ||
359 | #if 0 | ||
360 | cell->zsbuf_map = screen->buffer_map(screen, | ||
361 | ct->buffer, | ||
362 | (PIPE_BUFFER_USAGE_GPU_READ | | ||
363 | PIPE_BUFFER_USAGE_GPU_WRITE)); | ||
364 | #else | ||
365 | cell->zsbuf_map = ct->data; | ||
366 | #endif | ||
367 | } | ||
368 | } | ||
369 | } | ||
370 | |||
371 | |||
372 | /** | ||
373 | * Unmap color and z/stencil framebuffer surfaces. | ||
374 | */ | ||
375 | static void | ||
376 | cell_unmap_surfaces(struct cell_context *cell) | ||
377 | { | ||
378 | /*struct pipe_screen *screen = cell->pipe.screen;*/ | ||
379 | uint i; | ||
380 | |||
381 | for (i = 0; i < PIPE_MAX_COLOR_BUFS; i++) { | ||
382 | struct pipe_surface *ps = cell->framebuffer.cbufs[i]; | ||
383 | if (ps && cell->cbuf_map[i]) { | ||
384 | /*struct cell_resource *ct = cell_resource(ps->texture);*/ | ||
385 | assert(ps->texture); | ||
386 | /*assert(ct->buffer);*/ | ||
387 | |||
388 | /*screen->buffer_unmap(screen, ct->buffer);*/ | ||
389 | cell->cbuf_map[i] = NULL; | ||
390 | } | ||
391 | } | ||
392 | |||
393 | { | ||
394 | struct pipe_surface *ps = cell->framebuffer.zsbuf; | ||
395 | if (ps && cell->zsbuf_map) { | ||
396 | /*struct cell_resource *ct = cell_resource(ps->texture);*/ | ||
397 | /*screen->buffer_unmap(screen, ct->buffer);*/ | ||
398 | cell->zsbuf_map = NULL; | ||
399 | } | ||
400 | } | ||
401 | } | ||
402 | |||
403 | |||
404 | static void | ||
405 | cell_set_framebuffer_state(struct pipe_context *pipe, | ||
406 | const struct pipe_framebuffer_state *fb) | ||
407 | { | ||
408 | struct cell_context *cell = cell_context(pipe); | ||
409 | |||
410 | if (1 /*memcmp(&cell->framebuffer, fb, sizeof(*fb))*/) { | ||
411 | uint i; | ||
412 | |||
413 | /* unmap old surfaces */ | ||
414 | cell_unmap_surfaces(cell); | ||
415 | |||
416 | /* Finish any pending rendering to the current surface before | ||
417 | * installing a new surface! | ||
418 | */ | ||
419 | cell_flush_int(cell, CELL_FLUSH_WAIT); | ||
420 | |||
421 | /* update my state | ||
422 | * (this is also where old surfaces will finally get freed) | ||
423 | */ | ||
424 | cell->framebuffer.width = fb->width; | ||
425 | cell->framebuffer.height = fb->height; | ||
426 | cell->framebuffer.nr_cbufs = fb->nr_cbufs; | ||
427 | for (i = 0; i < PIPE_MAX_COLOR_BUFS; i++) { | ||
428 | pipe_surface_reference(&cell->framebuffer.cbufs[i], fb->cbufs[i]); | ||
429 | } | ||
430 | pipe_surface_reference(&cell->framebuffer.zsbuf, fb->zsbuf); | ||
431 | |||
432 | /* map new surfaces */ | ||
433 | cell_map_surfaces(cell); | ||
434 | |||
435 | cell->dirty |= CELL_NEW_FRAMEBUFFER; | ||
436 | } | ||
437 | } | ||
438 | |||
439 | |||
440 | void | ||
441 | cell_init_state_functions(struct cell_context *cell) | ||
442 | { | ||
443 | cell->pipe.create_blend_state = cell_create_blend_state; | ||
444 | cell->pipe.bind_blend_state = cell_bind_blend_state; | ||
445 | cell->pipe.delete_blend_state = cell_delete_blend_state; | ||
446 | |||
447 | cell->pipe.create_sampler_state = cell_create_sampler_state; | ||
448 | cell->pipe.bind_fragment_sampler_states = cell_bind_sampler_states; | ||
449 | cell->pipe.delete_sampler_state = cell_delete_sampler_state; | ||
450 | |||
451 | cell->pipe.set_fragment_sampler_views = cell_set_fragment_sampler_views; | ||
452 | cell->pipe.create_sampler_view = cell_create_sampler_view; | ||
453 | cell->pipe.sampler_view_destroy = cell_sampler_view_destroy; | ||
454 | |||
455 | cell->pipe.create_depth_stencil_alpha_state = cell_create_depth_stencil_alpha_state; | ||
456 | cell->pipe.bind_depth_stencil_alpha_state = cell_bind_depth_stencil_alpha_state; | ||
457 | cell->pipe.delete_depth_stencil_alpha_state = cell_delete_depth_stencil_alpha_state; | ||
458 | |||
459 | cell->pipe.create_rasterizer_state = cell_create_rasterizer_state; | ||
460 | cell->pipe.bind_rasterizer_state = cell_bind_rasterizer_state; | ||
461 | cell->pipe.delete_rasterizer_state = cell_delete_rasterizer_state; | ||
462 | |||
463 | cell->pipe.set_blend_color = cell_set_blend_color; | ||
464 | cell->pipe.set_stencil_ref = cell_set_stencil_ref; | ||
465 | cell->pipe.set_clip_state = cell_set_clip_state; | ||
466 | cell->pipe.set_sample_mask = cell_set_sample_mask; | ||
467 | |||
468 | cell->pipe.set_framebuffer_state = cell_set_framebuffer_state; | ||
469 | |||
470 | cell->pipe.set_polygon_stipple = cell_set_polygon_stipple; | ||
471 | cell->pipe.set_scissor_state = cell_set_scissor_state; | ||
472 | cell->pipe.set_viewport_state = cell_set_viewport_state; | ||
473 | } | ||
diff --git a/src/gallium/drivers/cell/ppu/cell_pipe_state.h b/src/gallium/drivers/cell/ppu/cell_pipe_state.h deleted file mode 100644 index 1889bd52ff5..00000000000 --- a/src/gallium/drivers/cell/ppu/cell_pipe_state.h +++ /dev/null | |||
@@ -1,39 +0,0 @@ | |||
1 | /************************************************************************** | ||
2 | * | ||
3 | * Copyright 2008 Tungsten Graphics, Inc., Cedar Park, Texas. | ||
4 | * All Rights Reserved. | ||
5 | * | ||
6 | * Permission is hereby granted, free of charge, to any person obtaining a | ||
7 | * copy of this software and associated documentation files (the | ||
8 | * "Software"), to deal in the Software without restriction, including | ||
9 | * without limitation the rights to use, copy, modify, merge, publish, | ||
10 | * distribute, sub license, and/or sell copies of the Software, and to | ||
11 | * permit persons to whom the Software is furnished to do so, subject to | ||
12 | * the following conditions: | ||
13 | * | ||
14 | * The above copyright notice and this permission notice (including the | ||
15 | * next paragraph) shall be included in all copies or substantial portions | ||
16 | * of the Software. | ||
17 | * | ||
18 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS | ||
19 | * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF | ||
20 | * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. | ||
21 | * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR | ||
22 | * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, | ||
23 | * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE | ||
24 | * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. | ||
25 | * | ||
26 | **************************************************************************/ | ||
27 | |||
28 | |||
29 | #ifndef CELL_PIPE_STATE_H | ||
30 | #define CELL_PIPE_STATE_H | ||
31 | |||
32 | |||
33 | struct cell_context; | ||
34 | |||
35 | extern void | ||
36 | cell_init_state_functions(struct cell_context *cell); | ||
37 | |||
38 | |||
39 | #endif /* CELL_PIPE_STATE_H */ | ||
diff --git a/src/gallium/drivers/cell/ppu/cell_public.h b/src/gallium/drivers/cell/ppu/cell_public.h deleted file mode 100644 index 7e2e093565d..00000000000 --- a/src/gallium/drivers/cell/ppu/cell_public.h +++ /dev/null | |||
@@ -1,10 +0,0 @@ | |||
1 | #ifndef CELL_PUBLIC_H | ||
2 | #define CELL_PUBLIC_H | ||
3 | |||
4 | struct pipe_screen; | ||
5 | struct sw_winsys; | ||
6 | |||
7 | struct pipe_screen * | ||
8 | cell_create_screen(struct sw_winsys *winsys); | ||
9 | |||
10 | #endif | ||
diff --git a/src/gallium/drivers/cell/ppu/cell_render.c b/src/gallium/drivers/cell/ppu/cell_render.c deleted file mode 100644 index f648482c551..00000000000 --- a/src/gallium/drivers/cell/ppu/cell_render.c +++ /dev/null | |||
@@ -1,211 +0,0 @@ | |||
1 | /************************************************************************** | ||
2 | * | ||
3 | * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas. | ||
4 | * All Rights Reserved. | ||
5 | * | ||
6 | * Permission is hereby granted, free of charge, to any person obtaining a | ||
7 | * copy of this software and associated documentation files (the | ||
8 | * "Software"), to deal in the Software without restriction, including | ||
9 | * without limitation the rights to use, copy, modify, merge, publish, | ||
10 | * distribute, sub license, and/or sell copies of the Software, and to | ||
11 | * permit persons to whom the Software is furnished to do so, subject to | ||
12 | * the following conditions: | ||
13 | * | ||
14 | * The above copyright notice and this permission notice (including the | ||
15 | * next paragraph) shall be included in all copies or substantial portions | ||
16 | * of the Software. | ||
17 | * | ||
18 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS | ||
19 | * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF | ||
20 | * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. | ||
21 | * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR | ||
22 | * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, | ||
23 | * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE | ||
24 | * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. | ||
25 | * | ||
26 | **************************************************************************/ | ||
27 | |||
28 | /** | ||
29 | * \brief Last stage of 'draw' pipeline: send tris to SPUs. | ||
30 | * \author Brian Paul | ||
31 | */ | ||
32 | |||
33 | #include "cell_context.h" | ||
34 | #include "cell_render.h" | ||
35 | #include "cell_spu.h" | ||
36 | #include "util/u_memory.h" | ||
37 | #include "draw/draw_private.h" | ||
38 | |||
39 | |||
40 | struct render_stage { | ||
41 | struct draw_stage stage; /**< This must be first (base class) */ | ||
42 | |||
43 | struct cell_context *cell; | ||
44 | }; | ||
45 | |||
46 | |||
47 | static INLINE struct render_stage * | ||
48 | render_stage(struct draw_stage *stage) | ||
49 | { | ||
50 | return (struct render_stage *) stage; | ||
51 | } | ||
52 | |||
53 | |||
54 | static void render_begin( struct draw_stage *stage ) | ||
55 | { | ||
56 | #if 0 | ||
57 | struct render_stage *render = render_stage(stage); | ||
58 | struct cell_context *sp = render->cell; | ||
59 | const struct pipe_shader_state *fs = &render->cell->fs->shader; | ||
60 | render->quad.nr_attrs = render->cell->nr_frag_attrs; | ||
61 | |||
62 | render->firstFpInput = fs->input_semantic_name[0]; | ||
63 | |||
64 | sp->quad.first->begin(sp->quad.first); | ||
65 | #endif | ||
66 | } | ||
67 | |||
68 | |||
69 | static void render_end( struct draw_stage *stage ) | ||
70 | { | ||
71 | } | ||
72 | |||
73 | |||
74 | static void reset_stipple_counter( struct draw_stage *stage ) | ||
75 | { | ||
76 | struct render_stage *render = render_stage(stage); | ||
77 | /*render->cell->line_stipple_counter = 0;*/ | ||
78 | } | ||
79 | |||
80 | |||
81 | static void | ||
82 | render_point(struct draw_stage *stage, struct prim_header *prim) | ||
83 | { | ||
84 | } | ||
85 | |||
86 | |||
87 | static void | ||
88 | render_line(struct draw_stage *stage, struct prim_header *prim) | ||
89 | { | ||
90 | } | ||
91 | |||
92 | |||
93 | /** Write a vertex into the prim buffer */ | ||
94 | static void | ||
95 | save_vertex(struct cell_prim_buffer *buf, uint pos, | ||
96 | const struct vertex_header *vert) | ||
97 | { | ||
98 | uint attr, j; | ||
99 | |||
100 | for (attr = 0; attr < 2; attr++) { | ||
101 | for (j = 0; j < 4; j++) { | ||
102 | buf->vertex[pos][attr][j] = vert->data[attr][j]; | ||
103 | } | ||
104 | } | ||
105 | |||
106 | /* update bounding box */ | ||
107 | if (vert->data[0][0] < buf->xmin) | ||
108 | buf->xmin = vert->data[0][0]; | ||
109 | if (vert->data[0][0] > buf->xmax) | ||
110 | buf->xmax = vert->data[0][0]; | ||
111 | if (vert->data[0][1] < buf->ymin) | ||
112 | buf->ymin = vert->data[0][1]; | ||
113 | if (vert->data[0][1] > buf->ymax) | ||
114 | buf->ymax = vert->data[0][1]; | ||
115 | } | ||
116 | |||
117 | |||
118 | static void | ||
119 | render_tri(struct draw_stage *stage, struct prim_header *prim) | ||
120 | { | ||
121 | struct render_stage *rs = render_stage(stage); | ||
122 | struct cell_context *cell = rs->cell; | ||
123 | struct cell_prim_buffer *buf = &cell->prim_buffer; | ||
124 | uint i; | ||
125 | |||
126 | if (buf->num_verts + 3 > CELL_MAX_VERTS) { | ||
127 | cell_flush_prim_buffer(cell); | ||
128 | } | ||
129 | |||
130 | i = buf->num_verts; | ||
131 | assert(i+2 <= CELL_MAX_VERTS); | ||
132 | save_vertex(buf, i+0, prim->v[0]); | ||
133 | save_vertex(buf, i+1, prim->v[1]); | ||
134 | save_vertex(buf, i+2, prim->v[2]); | ||
135 | buf->num_verts += 3; | ||
136 | } | ||
137 | |||
138 | |||
139 | /** | ||
140 | * Send the a RENDER command to all SPUs to have them render the prims | ||
141 | * in the current prim_buffer. | ||
142 | */ | ||
143 | void | ||
144 | cell_flush_prim_buffer(struct cell_context *cell) | ||
145 | { | ||
146 | uint i; | ||
147 | |||
148 | if (cell->prim_buffer.num_verts == 0) | ||
149 | return; | ||
150 | |||
151 | for (i = 0; i < cell->num_spus; i++) { | ||
152 | struct cell_command_render *render = &cell_global.command[i].render; | ||
153 | render->prim_type = PIPE_PRIM_TRIANGLES; | ||
154 | render->num_verts = cell->prim_buffer.num_verts; | ||
155 | render->front_ccw = cell->rasterizer->front_ccw; | ||
156 | render->vertex_size = cell->vertex_info->size * 4; | ||
157 | render->xmin = cell->prim_buffer.xmin; | ||
158 | render->ymin = cell->prim_buffer.ymin; | ||
159 | render->xmax = cell->prim_buffer.xmax; | ||
160 | render->ymax = cell->prim_buffer.ymax; | ||
161 | render->vertex_data = &cell->prim_buffer.vertex; | ||
162 | ASSERT_ALIGN16(render->vertex_data); | ||
163 | send_mbox_message(cell_global.spe_contexts[i], CELL_CMD_RENDER); | ||
164 | } | ||
165 | |||
166 | cell->prim_buffer.num_verts = 0; | ||
167 | |||
168 | cell->prim_buffer.xmin = 1e100; | ||
169 | cell->prim_buffer.ymin = 1e100; | ||
170 | cell->prim_buffer.xmax = -1e100; | ||
171 | cell->prim_buffer.ymax = -1e100; | ||
172 | |||
173 | /* XXX temporary, need to double-buffer the prim buffer until we get | ||
174 | * a real command buffer/list system. | ||
175 | */ | ||
176 | cell_flush(&cell->pipe, 0x0); | ||
177 | } | ||
178 | |||
179 | |||
180 | |||
181 | static void render_destroy( struct draw_stage *stage ) | ||
182 | { | ||
183 | FREE( stage ); | ||
184 | } | ||
185 | |||
186 | |||
187 | /** | ||
188 | * Create a new draw/render stage. This will be plugged into the | ||
189 | * draw module as the last pipeline stage. | ||
190 | */ | ||
191 | struct draw_stage *cell_draw_render_stage( struct cell_context *cell ) | ||
192 | { | ||
193 | struct render_stage *render = CALLOC_STRUCT(render_stage); | ||
194 | |||
195 | render->cell = cell; | ||
196 | render->stage.draw = cell->draw; | ||
197 | render->stage.begin = render_begin; | ||
198 | render->stage.point = render_point; | ||
199 | render->stage.line = render_line; | ||
200 | render->stage.tri = render_tri; | ||
201 | render->stage.end = render_end; | ||
202 | render->stage.reset_stipple_counter = reset_stipple_counter; | ||
203 | render->stage.destroy = render_destroy; | ||
204 | |||
205 | /* | ||
206 | render->quad.coef = render->coef; | ||
207 | render->quad.posCoef = &render->posCoef; | ||
208 | */ | ||
209 | |||
210 | return &render->stage; | ||
211 | } | ||
diff --git a/src/gallium/drivers/cell/ppu/cell_render.h b/src/gallium/drivers/cell/ppu/cell_render.h deleted file mode 100644 index 826dcbafeba..00000000000 --- a/src/gallium/drivers/cell/ppu/cell_render.h +++ /dev/null | |||
@@ -1,39 +0,0 @@ | |||
1 | /************************************************************************** | ||
2 | * | ||
3 | * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas. | ||
4 | * All Rights Reserved. | ||
5 | * | ||
6 | * Permission is hereby granted, free of charge, to any person obtaining a | ||
7 | * copy of this software and associated documentation files (the | ||
8 | * "Software"), to deal in the Software without restriction, including | ||
9 | * without limitation the rights to use, copy, modify, merge, publish, | ||
10 | * distribute, sub license, and/or sell copies of the Software, and to | ||
11 | * permit persons to whom the Software is furnished to do so, subject to | ||
12 | * the following conditions: | ||
13 | * | ||
14 | * The above copyright notice and this permission notice (including the | ||
15 | * next paragraph) shall be included in all copies or substantial portions | ||
16 | * of the Software. | ||
17 | * | ||
18 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS | ||
19 | * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF | ||
20 | * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. | ||
21 | * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR | ||
22 | * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, | ||
23 | * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE | ||
24 | * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. | ||
25 | * | ||
26 | **************************************************************************/ | ||
27 | |||
28 | #ifndef CELL_RENDER_H | ||
29 | #define CELL_RENDER_H | ||
30 | |||
31 | struct cell_context; | ||
32 | struct draw_stage; | ||
33 | |||
34 | extern void | ||
35 | cell_flush_prim_buffer(struct cell_context *cell); | ||
36 | |||
37 | extern struct draw_stage *cell_draw_render_stage( struct cell_context *cell ); | ||
38 | |||
39 | #endif /* CELL_RENDER_H */ | ||
diff --git a/src/gallium/drivers/cell/ppu/cell_screen.c b/src/gallium/drivers/cell/ppu/cell_screen.c deleted file mode 100644 index 7ffdcc51bbd..00000000000 --- a/src/gallium/drivers/cell/ppu/cell_screen.c +++ /dev/null | |||
@@ -1,221 +0,0 @@ | |||
1 | /************************************************************************** | ||
2 | * | ||
3 | * Copyright 2008 Tungsten Graphics, Inc., Cedar Park, Texas. | ||
4 | * All Rights Reserved. | ||
5 | * | ||
6 | * Permission is hereby granted, free of charge, to any person obtaining a | ||
7 | * copy of this software and associated documentation files (the | ||
8 | * "Software"), to deal in the Software without restriction, including | ||
9 | * without limitation the rights to use, copy, modify, merge, publish, | ||
10 | * distribute, sub license, and/or sell copies of the Software, and to | ||
11 | * permit persons to whom the Software is furnished to do so, subject to | ||
12 | * the following conditions: | ||
13 | * | ||
14 | * The above copyright notice and this permission notice (including the | ||
15 | * next paragraph) shall be included in all copies or substantial portions | ||
16 | * of the Software. | ||
17 | * | ||
18 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS | ||
19 | * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF | ||
20 | * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. | ||
21 | * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR | ||
22 | * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, | ||
23 | * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE | ||
24 | * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. | ||
25 | * | ||
26 | **************************************************************************/ | ||
27 | |||
28 | |||
29 | #include "util/u_memory.h" | ||
30 | #include "util/u_simple_screen.h" | ||
31 | #include "pipe/p_defines.h" | ||
32 | #include "pipe/p_screen.h" | ||
33 | |||
34 | #include "cell/common.h" | ||
35 | #include "cell_context.h" | ||
36 | #include "cell_screen.h" | ||
37 | #include "cell_texture.h" | ||
38 | #include "cell_public.h" | ||
39 | |||
40 | #include "state_tracker/sw_winsys.h" | ||
41 | |||
42 | |||
43 | static const char * | ||
44 | cell_get_vendor(struct pipe_screen *screen) | ||
45 | { | ||
46 | return "VMware, Inc."; | ||
47 | } | ||
48 | |||
49 | |||
50 | static const char * | ||
51 | cell_get_name(struct pipe_screen *screen) | ||
52 | { | ||
53 | return "Cell"; | ||
54 | } | ||
55 | |||
56 | |||
57 | static int | ||
58 | cell_get_param(struct pipe_screen *screen, enum pipe_cap param) | ||
59 | { | ||
60 | switch (param) { | ||
61 | case PIPE_CAP_MAX_COMBINED_SAMPLERS: | ||
62 | return CELL_MAX_SAMPLERS; | ||
63 | case PIPE_CAP_NPOT_TEXTURES: | ||
64 | return 1; | ||
65 | case PIPE_CAP_TWO_SIDED_STENCIL: | ||
66 | return 1; | ||
67 | case PIPE_CAP_ANISOTROPIC_FILTER: | ||
68 | return 0; | ||
69 | case PIPE_CAP_POINT_SPRITE: | ||
70 | return 1; | ||
71 | case PIPE_CAP_MAX_RENDER_TARGETS: | ||
72 | return 1; | ||
73 | case PIPE_CAP_OCCLUSION_QUERY: | ||
74 | return 1; | ||
75 | case PIPE_CAP_TIMER_QUERY: | ||
76 | return 0; | ||
77 | case PIPE_CAP_TEXTURE_SHADOW_MAP: | ||
78 | return 10; | ||
79 | case PIPE_CAP_MAX_TEXTURE_2D_LEVELS: | ||
80 | return CELL_MAX_TEXTURE_LEVELS; | ||
81 | case PIPE_CAP_MAX_TEXTURE_3D_LEVELS: | ||
82 | return 8; /* max 128x128x128 */ | ||
83 | case PIPE_CAP_MAX_TEXTURE_CUBE_LEVELS: | ||
84 | return CELL_MAX_TEXTURE_LEVELS; | ||
85 | case PIPE_CAP_TEXTURE_MIRROR_CLAMP: | ||
86 | return 0; /* XXX to do */ | ||
87 | case PIPE_CAP_TGSI_FS_COORD_ORIGIN_UPPER_LEFT: | ||
88 | case PIPE_CAP_TGSI_FS_COORD_PIXEL_CENTER_HALF_INTEGER: | ||
89 | return 1; | ||
90 | case PIPE_CAP_TGSI_FS_COORD_ORIGIN_LOWER_LEFT: | ||
91 | case PIPE_CAP_TGSI_FS_COORD_PIXEL_CENTER_INTEGER: | ||
92 | return 0; | ||
93 | case PIPE_CAP_BLEND_EQUATION_SEPARATE: | ||
94 | return 1; | ||
95 | default: | ||
96 | return 0; | ||
97 | } | ||
98 | } | ||
99 | |||
100 | static int | ||
101 | cell_get_shader_param(struct pipe_screen *screen, unsigned shader, enum pipe_shader_cap param) | ||
102 | { | ||
103 | switch(shader) | ||
104 | { | ||
105 | case PIPE_SHADER_FRAGMENT: | ||
106 | switch (param) { | ||
107 | case PIPE_SHADER_CAP_MAX_TEXTURE_SAMPLERS: | ||
108 | return CELL_MAX_SAMPLERS; | ||
109 | default: | ||
110 | return tgsi_exec_get_shader_param(param); | ||
111 | } | ||
112 | case PIPE_SHADER_VERTEX: | ||
113 | case PIPE_SHADER_GEOMETRY: | ||
114 | return draw_get_shader_param(shader, param); | ||
115 | default: | ||
116 | return 0; | ||
117 | } | ||
118 | } | ||
119 | |||
120 | static float | ||
121 | cell_get_paramf(struct pipe_screen *screen, enum pipe_capf param) | ||
122 | { | ||
123 | switch (param) { | ||
124 | case PIPE_CAPF_MAX_LINE_WIDTH: | ||
125 | /* fall-through */ | ||
126 | case PIPE_CAPF_MAX_LINE_WIDTH_AA: | ||
127 | return 255.0; /* arbitrary */ | ||
128 | |||
129 | case PIPE_CAPF_MAX_POINT_WIDTH: | ||
130 | /* fall-through */ | ||
131 | case PIPE_CAPF_MAX_POINT_WIDTH_AA: | ||
132 | return 255.0; /* arbitrary */ | ||
133 | |||
134 | case PIPE_CAPF_MAX_TEXTURE_ANISOTROPY: | ||
135 | return 0.0; | ||
136 | |||
137 | case PIPE_CAPF_MAX_TEXTURE_LOD_BIAS: | ||
138 | return 16.0; /* arbitrary */ | ||
139 | |||
140 | default: | ||
141 | return 0; | ||
142 | } | ||
143 | } | ||
144 | |||
145 | |||
146 | static boolean | ||
147 | cell_is_format_supported( struct pipe_screen *screen, | ||
148 | enum pipe_format format, | ||
149 | enum pipe_texture_target target, | ||
150 | unsigned sample_count, | ||
151 | unsigned tex_usage) | ||
152 | { | ||
153 | struct sw_winsys *winsys = cell_screen(screen)->winsys; | ||
154 | |||
155 | if (sample_count > 1) | ||
156 | return FALSE; | ||
157 | |||
158 | if (tex_usage & (PIPE_BIND_DISPLAY_TARGET | | ||
159 | PIPE_BIND_SCANOUT | | ||
160 | PIPE_BIND_SHARED)) { | ||
161 | if (!winsys->is_displaytarget_format_supported(winsys, tex_usage, format)) | ||
162 | return FALSE; | ||
163 | } | ||
164 | |||
165 | /* only a few formats are known to work at this time */ | ||
166 | switch (format) { | ||
167 | case PIPE_FORMAT_Z24_UNORM_S8_UINT: | ||
168 | case PIPE_FORMAT_Z24X8_UNORM: | ||
169 | case PIPE_FORMAT_B8G8R8A8_UNORM: | ||
170 | case PIPE_FORMAT_I8_UNORM: | ||
171 | return TRUE; | ||
172 | default: | ||
173 | return FALSE; | ||
174 | } | ||
175 | } | ||
176 | |||
177 | |||
178 | static void | ||
179 | cell_destroy_screen( struct pipe_screen *screen ) | ||
180 | { | ||
181 | struct cell_screen *sp_screen = cell_screen(screen); | ||
182 | struct sw_winsys *winsys = sp_screen->winsys; | ||
183 | |||
184 | if(winsys->destroy) | ||
185 | winsys->destroy(winsys); | ||
186 | |||
187 | FREE(screen); | ||
188 | } | ||
189 | |||
190 | |||
191 | |||
192 | /** | ||
193 | * Create a new pipe_screen object | ||
194 | * Note: we're not presently subclassing pipe_screen (no cell_screen) but | ||
195 | * that would be the place to put SPU thread/context info... | ||
196 | */ | ||
197 | struct pipe_screen * | ||
198 | cell_create_screen(struct sw_winsys *winsys) | ||
199 | { | ||
200 | struct cell_screen *screen = CALLOC_STRUCT(cell_screen); | ||
201 | |||
202 | if (!screen) | ||
203 | return NULL; | ||
204 | |||
205 | screen->winsys = winsys; | ||
206 | screen->base.winsys = NULL; | ||
207 | |||
208 | screen->base.destroy = cell_destroy_screen; | ||
209 | |||
210 | screen->base.get_name = cell_get_name; | ||
211 | screen->base.get_vendor = cell_get_vendor; | ||
212 | screen->base.get_param = cell_get_param; | ||
213 | screen->base.get_shader_param = cell_get_shader_param; | ||
214 | screen->base.get_paramf = cell_get_paramf; | ||
215 | screen->base.is_format_supported = cell_is_format_supported; | ||
216 | screen->base.context_create = cell_create_context; | ||
217 | |||
218 | cell_init_screen_texture_funcs(&screen->base); | ||
219 | |||
220 | return &screen->base; | ||
221 | } | ||
diff --git a/src/gallium/drivers/cell/ppu/cell_screen.h b/src/gallium/drivers/cell/ppu/cell_screen.h deleted file mode 100644 index baff9d3b7d4..00000000000 --- a/src/gallium/drivers/cell/ppu/cell_screen.h +++ /dev/null | |||
@@ -1,55 +0,0 @@ | |||
1 | /************************************************************************** | ||
2 | * | ||
3 | * Copyright 2008 Tungsten Graphics, Inc., Cedar Park, Texas. | ||
4 | * All Rights Reserved. | ||
5 | * | ||
6 | * Permission is hereby granted, free of charge, to any person obtaining a | ||
7 | * copy of this software and associated documentation files (the | ||
8 | * "Software"), to deal in the Software without restriction, including | ||
9 | * without limitation the rights to use, copy, modify, merge, publish, | ||
10 | * distribute, sub license, and/or sell copies of the Software, and to | ||
11 | * permit persons to whom the Software is furnished to do so, subject to | ||
12 | * the following conditions: | ||
13 | * | ||
14 | * The above copyright notice and this permission notice (including the | ||
15 | * next paragraph) shall be included in all copies or substantial portions | ||
16 | * of the Software. | ||
17 | * | ||
18 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS | ||
19 | * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF | ||
20 | * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. | ||
21 | * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR | ||
22 | * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, | ||
23 | * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE | ||
24 | * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. | ||
25 | * | ||
26 | **************************************************************************/ | ||
27 | |||
28 | |||
29 | #ifndef CELL_SCREEN_H | ||
30 | #define CELL_SCREEN_H | ||
31 | |||
32 | |||
33 | #include "pipe/p_screen.h" | ||
34 | |||
35 | struct sw_winsys; | ||
36 | |||
37 | struct cell_screen { | ||
38 | struct pipe_screen base; | ||
39 | |||
40 | struct sw_winsys *winsys; | ||
41 | |||
42 | /* Increments whenever textures are modified. Contexts can track | ||
43 | * this. | ||
44 | */ | ||
45 | unsigned timestamp; | ||
46 | }; | ||
47 | |||
48 | static INLINE struct cell_screen * | ||
49 | cell_screen( struct pipe_screen *pipe ) | ||
50 | { | ||
51 | return (struct cell_screen *)pipe; | ||
52 | } | ||
53 | |||
54 | |||
55 | #endif /* CELL_SCREEN_H */ | ||
diff --git a/src/gallium/drivers/cell/ppu/cell_spu.c b/src/gallium/drivers/cell/ppu/cell_spu.c deleted file mode 100644 index 39284f3a5d1..00000000000 --- a/src/gallium/drivers/cell/ppu/cell_spu.c +++ /dev/null | |||
@@ -1,219 +0,0 @@ | |||
1 | /************************************************************************** | ||
2 | * | ||
3 | * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas. | ||
4 | * All Rights Reserved. | ||
5 | * | ||
6 | * Permission is hereby granted, free of charge, to any person obtaining a | ||
7 | * copy of this software and associated documentation files (the | ||
8 | * "Software"), to deal in the Software without restriction, including | ||
9 | * without limitation the rights to use, copy, modify, merge, publish, | ||
10 | * distribute, sub license, and/or sell copies of the Software, and to | ||
11 | * permit persons to whom the Software is furnished to do so, subject to | ||
12 | * the following conditions: | ||
13 | * | ||
14 | * The above copyright notice and this permission notice (including the | ||
15 | * next paragraph) shall be included in all copies or substantial portions | ||
16 | * of the Software. | ||
17 | * | ||
18 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS | ||
19 | * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF | ||
20 | * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. | ||
21 | * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR | ||
22 | * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, | ||
23 | * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE | ||
24 | * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. | ||
25 | * | ||
26 | **************************************************************************/ | ||
27 | |||
28 | |||
29 | /** | ||
30 | * Utility/wrappers for communicating with the SPUs. | ||
31 | */ | ||
32 | |||
33 | |||
34 | #include <pthread.h> | ||
35 | |||
36 | #include "cell_spu.h" | ||
37 | #include "pipe/p_format.h" | ||
38 | #include "pipe/p_state.h" | ||
39 | #include "util/u_memory.h" | ||
40 | #include "cell/common.h" | ||
41 | |||
42 | |||
43 | /* | ||
44 | helpful headers: | ||
45 | /opt/ibm/cell-sdk/prototype/src/include/ppu/cbe_mfc.h | ||
46 | */ | ||
47 | |||
48 | |||
49 | /** | ||
50 | * Cell/SPU info that's not per-context. | ||
51 | */ | ||
52 | struct cell_global_info cell_global; | ||
53 | |||
54 | |||
55 | /** | ||
56 | * Scan /proc/cpuinfo to determine the timebase for the system. | ||
57 | * This is used by the SPUs to convert 'decrementer' ticks to seconds. | ||
58 | * There may be a better way to get this value... | ||
59 | */ | ||
60 | static unsigned | ||
61 | get_timebase(void) | ||
62 | { | ||
63 | FILE *f = fopen("/proc/cpuinfo", "r"); | ||
64 | unsigned timebase; | ||
65 | |||
66 | assert(f); | ||
67 | while (!feof(f)) { | ||
68 | char line[80]; | ||
69 | fgets(line, sizeof(line), f); | ||
70 | if (strncmp(line, "timebase", 8) == 0) { | ||
71 | char *colon = strchr(line, ':'); | ||
72 | if (colon) { | ||
73 | timebase = atoi(colon + 2); | ||
74 | break; | ||
75 | } | ||
76 | } | ||
77 | } | ||
78 | fclose(f); | ||
79 | |||
80 | return timebase; | ||
81 | } | ||
82 | |||
83 | |||
84 | /** | ||
85 | * Write a 1-word message to the given SPE mailbox. | ||
86 | */ | ||
87 | void | ||
88 | send_mbox_message(spe_context_ptr_t ctx, unsigned int msg) | ||
89 | { | ||
90 | spe_in_mbox_write(ctx, &msg, 1, SPE_MBOX_ALL_BLOCKING); | ||
91 | } | ||
92 | |||
93 | |||
94 | /** | ||
95 | * Wait for a 1-word message to arrive in given mailbox. | ||
96 | */ | ||
97 | uint | ||
98 | wait_mbox_message(spe_context_ptr_t ctx) | ||
99 | { | ||
100 | do { | ||
101 | unsigned data; | ||
102 | int count = spe_out_mbox_read(ctx, &data, 1); | ||
103 | |||
104 | if (count == 1) { | ||
105 | return data; | ||
106 | } | ||
107 | |||
108 | if (count < 0) { | ||
109 | /* error */ ; | ||
110 | } | ||
111 | } while (1); | ||
112 | } | ||
113 | |||
114 | |||
115 | /** | ||
116 | * Called by pthread_create() to spawn an SPU thread. | ||
117 | */ | ||
118 | static void * | ||
119 | cell_thread_function(void *arg) | ||
120 | { | ||
121 | struct cell_init_info *init = (struct cell_init_info *) arg; | ||
122 | unsigned entry = SPE_DEFAULT_ENTRY; | ||
123 | |||
124 | ASSERT_ALIGN16(init); | ||
125 | |||
126 | if (spe_context_run(cell_global.spe_contexts[init->id], &entry, 0, | ||
127 | init, NULL, NULL) < 0) { | ||
128 | fprintf(stderr, "spe_context_run() failed\n"); | ||
129 | exit(1); | ||
130 | } | ||
131 | |||
132 | pthread_exit(NULL); | ||
133 | } | ||
134 | |||
135 | |||
136 | /** | ||
137 | * Create the SPU threads. This is done once during driver initialization. | ||
138 | * This involves setting the "init" message which is sent to each SPU. | ||
139 | * The init message specifies an SPU id, total number of SPUs, location | ||
140 | * and number of batch buffers, etc. | ||
141 | */ | ||
142 | void | ||
143 | cell_start_spus(struct cell_context *cell) | ||
144 | { | ||
145 | static boolean one_time_init = FALSE; | ||
146 | uint i, j; | ||
147 | uint timebase = get_timebase(); | ||
148 | |||
149 | if (one_time_init) { | ||
150 | fprintf(stderr, "PPU: Multiple rendering contexts not yet supported " | ||
151 | "on Cell.\n"); | ||
152 | abort(); | ||
153 | } | ||
154 | |||
155 | one_time_init = TRUE; | ||
156 | |||
157 | assert(cell->num_spus <= CELL_MAX_SPUS); | ||
158 | |||
159 | ASSERT_ALIGN16(&cell_global.inits[0]); | ||
160 | ASSERT_ALIGN16(&cell_global.inits[1]); | ||
161 | |||
162 | /* | ||
163 | * Initialize the global 'inits' structure for each SPU. | ||
164 | * A pointer to the init struct will be passed to each SPU. | ||
165 | * The SPUs will then each grab their init info with mfc_get(). | ||
166 | */ | ||
167 | for (i = 0; i < cell->num_spus; i++) { | ||
168 | cell_global.inits[i].id = i; | ||
169 | cell_global.inits[i].num_spus = cell->num_spus; | ||
170 | cell_global.inits[i].debug_flags = cell->debug_flags; | ||
171 | cell_global.inits[i].inv_timebase = 1000.0f / timebase; | ||
172 | |||
173 | for (j = 0; j < CELL_NUM_BUFFERS; j++) { | ||
174 | cell_global.inits[i].buffers[j] = cell->buffer[j]; | ||
175 | } | ||
176 | cell_global.inits[i].buffer_status = &cell->buffer_status[0][0][0]; | ||
177 | |||
178 | cell_global.inits[i].spu_functions = &cell->spu_functions; | ||
179 | |||
180 | cell_global.spe_contexts[i] = spe_context_create(0, NULL); | ||
181 | if (!cell_global.spe_contexts[i]) { | ||
182 | fprintf(stderr, "spe_context_create() failed\n"); | ||
183 | exit(1); | ||
184 | } | ||
185 | |||
186 | if (spe_program_load(cell_global.spe_contexts[i], &g3d_spu)) { | ||
187 | fprintf(stderr, "spe_program_load() failed\n"); | ||
188 | exit(1); | ||
189 | } | ||
190 | |||
191 | pthread_create(&cell_global.spe_threads[i], /* returned thread handle */ | ||
192 | NULL, /* pthread attribs */ | ||
193 | &cell_thread_function, /* start routine */ | ||
194 | &cell_global.inits[i]); /* thread argument */ | ||
195 | } | ||
196 | } | ||
197 | |||
198 | |||
199 | /** | ||
200 | * Tell all the SPUs to stop/exit. | ||
201 | * This is done when the driver's exiting / cleaning up. | ||
202 | */ | ||
203 | void | ||
204 | cell_spu_exit(struct cell_context *cell) | ||
205 | { | ||
206 | uint i; | ||
207 | |||
208 | for (i = 0; i < cell->num_spus; i++) { | ||
209 | send_mbox_message(cell_global.spe_contexts[i], CELL_CMD_EXIT); | ||
210 | } | ||
211 | |||
212 | /* wait for threads to exit */ | ||
213 | for (i = 0; i < cell->num_spus; i++) { | ||
214 | void *value; | ||
215 | pthread_join(cell_global.spe_threads[i], &value); | ||
216 | cell_global.spe_threads[i] = 0; | ||
217 | cell_global.spe_contexts[i] = 0; | ||
218 | } | ||
219 | } | ||
diff --git a/src/gallium/drivers/cell/ppu/cell_spu.h b/src/gallium/drivers/cell/ppu/cell_spu.h deleted file mode 100644 index c93958a9ed5..00000000000 --- a/src/gallium/drivers/cell/ppu/cell_spu.h +++ /dev/null | |||
@@ -1,79 +0,0 @@ | |||
1 | /************************************************************************** | ||
2 | * | ||
3 | * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas. | ||
4 | * All Rights Reserved. | ||
5 | * | ||
6 | * Permission is hereby granted, free of charge, to any person obtaining a | ||
7 | * copy of this software and associated documentation files (the | ||
8 | * "Software"), to deal in the Software without restriction, including | ||
9 | * without limitation the rights to use, copy, modify, merge, publish, | ||
10 | * distribute, sub license, and/or sell copies of the Software, and to | ||
11 | * permit persons to whom the Software is furnished to do so, subject to | ||
12 | * the following conditions: | ||
13 | * | ||
14 | * The above copyright notice and this permission notice (including the | ||
15 | * next paragraph) shall be included in all copies or substantial portions | ||
16 | * of the Software. | ||
17 | * | ||
18 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS | ||
19 | * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF | ||
20 | * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. | ||
21 | * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR | ||
22 | * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, | ||
23 | * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE | ||
24 | * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. | ||
25 | * | ||
26 | **************************************************************************/ | ||
27 | |||
28 | #ifndef CELL_SPU | ||
29 | #define CELL_SPU | ||
30 | |||
31 | |||
32 | #include <libspe2.h> | ||
33 | #include <pthread.h> | ||
34 | #include "cell/common.h" | ||
35 | |||
36 | #include "cell_context.h" | ||
37 | |||
38 | |||
39 | /** | ||
40 | * Global vars, for now anyway. | ||
41 | */ | ||
42 | struct cell_global_info | ||
43 | { | ||
44 | /** | ||
45 | * SPU/SPE handles, etc | ||
46 | */ | ||
47 | spe_context_ptr_t spe_contexts[CELL_MAX_SPUS]; | ||
48 | pthread_t spe_threads[CELL_MAX_SPUS]; | ||
49 | |||
50 | /** | ||
51 | * Data sent to SPUs at start-up | ||
52 | */ | ||
53 | struct cell_init_info inits[CELL_MAX_SPUS]; | ||
54 | }; | ||
55 | |||
56 | |||
57 | extern struct cell_global_info cell_global; | ||
58 | |||
59 | |||
60 | /** This is the handle for the actual SPE code */ | ||
61 | extern spe_program_handle_t g3d_spu; | ||
62 | |||
63 | |||
64 | extern void | ||
65 | send_mbox_message(spe_context_ptr_t ctx, unsigned int msg); | ||
66 | |||
67 | extern uint | ||
68 | wait_mbox_message(spe_context_ptr_t ctx); | ||
69 | |||
70 | |||
71 | extern void | ||
72 | cell_start_spus(struct cell_context *cell); | ||
73 | |||
74 | |||
75 | extern void | ||
76 | cell_spu_exit(struct cell_context *cell); | ||
77 | |||
78 | |||
79 | #endif /* CELL_SPU */ | ||
diff --git a/src/gallium/drivers/cell/ppu/cell_state.h b/src/gallium/drivers/cell/ppu/cell_state.h deleted file mode 100644 index 7adedcde57c..00000000000 --- a/src/gallium/drivers/cell/ppu/cell_state.h +++ /dev/null | |||
@@ -1,65 +0,0 @@ | |||
1 | /************************************************************************** | ||
2 | * | ||
3 | * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas. | ||
4 | * All Rights Reserved. | ||
5 | * | ||
6 | * Permission is hereby granted, free of charge, to any person obtaining a | ||
7 | * copy of this software and associated documentation files (the | ||
8 | * "Software"), to deal in the Software without restriction, including | ||
9 | * without limitation the rights to use, copy, modify, merge, publish, | ||
10 | * distribute, sub license, and/or sell copies of the Software, and to | ||
11 | * permit persons to whom the Software is furnished to do so, subject to | ||
12 | * the following conditions: | ||
13 | * | ||
14 | * The above copyright notice and this permission notice (including the | ||
15 | * next paragraph) shall be included in all copies or substantial portions | ||
16 | * of the Software. | ||
17 | * | ||
18 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS | ||
19 | * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF | ||
20 | * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. | ||
21 | * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR | ||
22 | * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, | ||
23 | * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE | ||
24 | * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. | ||
25 | * | ||
26 | **************************************************************************/ | ||
27 | |||
28 | |||
29 | #ifndef CELL_STATE_H | ||
30 | #define CELL_STATE_H | ||
31 | |||
32 | |||
33 | #define CELL_NEW_VIEWPORT 0x1 | ||
34 | #define CELL_NEW_RASTERIZER 0x2 | ||
35 | #define CELL_NEW_FS 0x4 | ||
36 | #define CELL_NEW_BLEND 0x8 | ||
37 | #define CELL_NEW_CLIP 0x10 | ||
38 | #define CELL_NEW_SCISSOR 0x20 | ||
39 | #define CELL_NEW_STIPPLE 0x40 | ||
40 | #define CELL_NEW_FRAMEBUFFER 0x80 | ||
41 | #define CELL_NEW_ALPHA_TEST 0x100 | ||
42 | #define CELL_NEW_DEPTH_STENCIL 0x200 | ||
43 | #define CELL_NEW_SAMPLER 0x400 | ||
44 | #define CELL_NEW_TEXTURE 0x800 | ||
45 | #define CELL_NEW_VERTEX 0x1000 | ||
46 | #define CELL_NEW_VS 0x2000 | ||
47 | #define CELL_NEW_VS_CONSTANTS 0x4000 | ||
48 | #define CELL_NEW_FS_CONSTANTS 0x8000 | ||
49 | #define CELL_NEW_VERTEX_INFO 0x10000 | ||
50 | |||
51 | |||
52 | extern void | ||
53 | cell_update_derived( struct cell_context *cell ); | ||
54 | |||
55 | |||
56 | extern void | ||
57 | cell_init_shader_functions(struct cell_context *cell); | ||
58 | |||
59 | |||
60 | extern void | ||
61 | cell_init_vertex_functions(struct cell_context *cell); | ||
62 | |||
63 | |||
64 | #endif /* CELL_STATE_H */ | ||
65 | |||
diff --git a/src/gallium/drivers/cell/ppu/cell_state_derived.c b/src/gallium/drivers/cell/ppu/cell_state_derived.c deleted file mode 100644 index b723e794e71..00000000000 --- a/src/gallium/drivers/cell/ppu/cell_state_derived.c +++ /dev/null | |||
@@ -1,170 +0,0 @@ | |||
1 | /************************************************************************** | ||
2 | * | ||
3 | * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas. | ||
4 | * All Rights Reserved. | ||
5 | * | ||
6 | * Permission is hereby granted, free of charge, to any person obtaining a | ||
7 | * copy of this software and associated documentation files (the | ||
8 | * "Software"), to deal in the Software without restriction, including | ||
9 | * without limitation the rights to use, copy, modify, merge, publish, | ||
10 | * distribute, sub license, and/or sell copies of the Software, and to | ||
11 | * permit persons to whom the Software is furnished to do so, subject to | ||
12 | * the following conditions: | ||
13 | * | ||
14 | * The above copyright notice and this permission notice (including the | ||
15 | * next paragraph) shall be included in all copies or substantial portions | ||
16 | * of the Software. | ||
17 | * | ||
18 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS | ||
19 | * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF | ||
20 | * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. | ||
21 | * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR | ||
22 | * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, | ||
23 | * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE | ||
24 | * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. | ||
25 | * | ||
26 | **************************************************************************/ | ||
27 | |||
28 | #include "util/u_memory.h" | ||
29 | #include "pipe/p_shader_tokens.h" | ||
30 | #include "draw/draw_context.h" | ||
31 | #include "draw/draw_vertex.h" | ||
32 | #include "cell_context.h" | ||
33 | #include "cell_batch.h" | ||
34 | #include "cell_state.h" | ||
35 | #include "cell_state_emit.h" | ||
36 | |||
37 | |||
38 | /** | ||
39 | * Determine how to map vertex program outputs to fragment program inputs. | ||
40 | * Basically, this will be used when computing the triangle interpolation | ||
41 | * coefficients from the post-transform vertex attributes. | ||
42 | */ | ||
43 | static void | ||
44 | calculate_vertex_layout( struct cell_context *cell ) | ||
45 | { | ||
46 | const struct cell_fragment_shader_state *fs = cell->fs; | ||
47 | const enum interp_mode colorInterp | ||
48 | = cell->rasterizer->flatshade ? INTERP_CONSTANT : INTERP_LINEAR; | ||
49 | struct vertex_info *vinfo = &cell->vertex_info; | ||
50 | uint i; | ||
51 | int src; | ||
52 | |||
53 | #if 0 | ||
54 | if (cell->vbuf) { | ||
55 | /* if using the post-transform vertex buffer, tell draw_vbuf to | ||
56 | * simply emit the whole post-xform vertex as-is: | ||
57 | */ | ||
58 | struct vertex_info *vinfo_vbuf = &cell->vertex_info_vbuf; | ||
59 | vinfo_vbuf->num_attribs = 0; | ||
60 | draw_emit_vertex_attr(vinfo_vbuf, EMIT_ALL, INTERP_NONE, 0); | ||
61 | vinfo_vbuf->size = 4 * vs->num_outputs + sizeof(struct vertex_header)/4; | ||
62 | } | ||
63 | #endif | ||
64 | |||
65 | /* reset vinfo */ | ||
66 | vinfo->num_attribs = 0; | ||
67 | |||
68 | /* we always want to emit vertex pos */ | ||
69 | src = draw_find_shader_output(cell->draw, TGSI_SEMANTIC_POSITION, 0); | ||
70 | assert(src >= 0); | ||
71 | draw_emit_vertex_attr(vinfo, EMIT_4F, INTERP_POS, src); | ||
72 | |||
73 | |||
74 | /* | ||
75 | * Loop over fragment shader inputs, searching for the matching output | ||
76 | * from the vertex shader. | ||
77 | */ | ||
78 | for (i = 0; i < fs->info.num_inputs; i++) { | ||
79 | switch (fs->info.input_semantic_name[i]) { | ||
80 | case TGSI_SEMANTIC_POSITION: | ||
81 | /* already done above */ | ||
82 | break; | ||
83 | |||
84 | case TGSI_SEMANTIC_COLOR: | ||
85 | src = draw_find_shader_output(cell->draw, TGSI_SEMANTIC_COLOR, | ||
86 | fs->info.input_semantic_index[i]); | ||
87 | assert(src >= 0); | ||
88 | draw_emit_vertex_attr(vinfo, EMIT_4F, colorInterp, src); | ||
89 | break; | ||
90 | |||
91 | case TGSI_SEMANTIC_FOG: | ||
92 | src = draw_find_shader_output(cell->draw, TGSI_SEMANTIC_FOG, 0); | ||
93 | #if 1 | ||
94 | if (src < 0) /* XXX temp hack, try demos/fogcoord.c with this */ | ||
95 | src = 0; | ||
96 | #endif | ||
97 | assert(src >= 0); | ||
98 | draw_emit_vertex_attr(vinfo, EMIT_1F, INTERP_PERSPECTIVE, src); | ||
99 | break; | ||
100 | |||
101 | case TGSI_SEMANTIC_GENERIC: | ||
102 | /* this includes texcoords and varying vars */ | ||
103 | src = draw_find_shader_output(cell->draw, TGSI_SEMANTIC_GENERIC, | ||
104 | fs->info.input_semantic_index[i]); | ||
105 | assert(src >= 0); | ||
106 | draw_emit_vertex_attr(vinfo, EMIT_4F, INTERP_PERSPECTIVE, src); | ||
107 | break; | ||
108 | |||
109 | default: | ||
110 | assert(0); | ||
111 | } | ||
112 | } | ||
113 | |||
114 | draw_compute_vertex_size(vinfo); | ||
115 | |||
116 | /* XXX only signal this if format really changes */ | ||
117 | cell->dirty |= CELL_NEW_VERTEX_INFO; | ||
118 | } | ||
119 | |||
120 | |||
121 | #if 0 | ||
122 | /** | ||
123 | * Recompute cliprect from scissor bounds, scissor enable and surface size. | ||
124 | */ | ||
125 | static void | ||
126 | compute_cliprect(struct cell_context *sp) | ||
127 | { | ||
128 | uint surfWidth = sp->framebuffer.width; | ||
129 | uint surfHeight = sp->framebuffer.height; | ||
130 | |||
131 | if (sp->rasterizer->scissor) { | ||
132 | /* clip to scissor rect */ | ||
133 | sp->cliprect.minx = MAX2(sp->scissor.minx, 0); | ||
134 | sp->cliprect.miny = MAX2(sp->scissor.miny, 0); | ||
135 | sp->cliprect.maxx = MIN2(sp->scissor.maxx, surfWidth); | ||
136 | sp->cliprect.maxy = MIN2(sp->scissor.maxy, surfHeight); | ||
137 | } | ||
138 | else { | ||
139 | /* clip to surface bounds */ | ||
140 | sp->cliprect.minx = 0; | ||
141 | sp->cliprect.miny = 0; | ||
142 | sp->cliprect.maxx = surfWidth; | ||
143 | sp->cliprect.maxy = surfHeight; | ||
144 | } | ||
145 | } | ||
146 | #endif | ||
147 | |||
148 | |||
149 | |||
150 | /** | ||
151 | * Update derived state, send current state to SPUs prior to rendering. | ||
152 | */ | ||
153 | void cell_update_derived( struct cell_context *cell ) | ||
154 | { | ||
155 | if (cell->dirty & (CELL_NEW_RASTERIZER | | ||
156 | CELL_NEW_FS | | ||
157 | CELL_NEW_VS)) | ||
158 | calculate_vertex_layout( cell ); | ||
159 | |||
160 | #if 0 | ||
161 | if (cell->dirty & (CELL_NEW_SCISSOR | | ||
162 | CELL_NEW_DEPTH_STENCIL_ALPHA | | ||
163 | CELL_NEW_FRAMEBUFFER)) | ||
164 | compute_cliprect(cell); | ||
165 | #endif | ||
166 | |||
167 | cell_emit_state(cell); | ||
168 | |||
169 | cell->dirty = 0; | ||
170 | } | ||
diff --git a/src/gallium/drivers/cell/ppu/cell_state_emit.c b/src/gallium/drivers/cell/ppu/cell_state_emit.c deleted file mode 100644 index bb11c68fa24..00000000000 --- a/src/gallium/drivers/cell/ppu/cell_state_emit.c +++ /dev/null | |||
@@ -1,343 +0,0 @@ | |||
1 | /************************************************************************** | ||
2 | * | ||
3 | * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas. | ||
4 | * All Rights Reserved. | ||
5 | * | ||
6 | * Permission is hereby granted, free of charge, to any person obtaining a | ||
7 | * copy of this software and associated documentation files (the | ||
8 | * "Software"), to deal in the Software without restriction, including | ||
9 | * without limitation the rights to use, copy, modify, merge, publish, | ||
10 | * distribute, sub license, and/or sell copies of the Software, and to | ||
11 | * permit persons to whom the Software is furnished to do so, subject to | ||
12 | * the following conditions: | ||
13 | * | ||
14 | * The above copyright notice and this permission notice (including the | ||
15 | * next paragraph) shall be included in all copies or substantial portions | ||
16 | * of the Software. | ||
17 | * | ||
18 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS | ||
19 | * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF | ||
20 | * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. | ||
21 | * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR | ||
22 | * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, | ||
23 | * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE | ||
24 | * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. | ||
25 | * | ||
26 | **************************************************************************/ | ||
27 | |||
28 | #include "util/u_inlines.h" | ||
29 | #include "util/u_memory.h" | ||
30 | #include "util/u_math.h" | ||
31 | #include "util/u_format.h" | ||
32 | #include "cell_context.h" | ||
33 | #include "cell_gen_fragment.h" | ||
34 | #include "cell_state.h" | ||
35 | #include "cell_state_emit.h" | ||
36 | #include "cell_batch.h" | ||
37 | #include "cell_texture.h" | ||
38 | #include "draw/draw_context.h" | ||
39 | #include "draw/draw_private.h" | ||
40 | |||
41 | |||
42 | /** | ||
43 | * Find/create a cell_command_fragment_ops object corresponding to the | ||
44 | * current blend/stencil/z/colormask/etc. state. | ||
45 | */ | ||
46 | static struct cell_command_fragment_ops * | ||
47 | lookup_fragment_ops(struct cell_context *cell) | ||
48 | { | ||
49 | struct cell_fragment_ops_key key; | ||
50 | struct cell_command_fragment_ops *ops; | ||
51 | |||
52 | /* | ||
53 | * Build key | ||
54 | */ | ||
55 | memset(&key, 0, sizeof(key)); | ||
56 | key.blend = *cell->blend; | ||
57 | key.blend_color = cell->blend_color; | ||
58 | key.dsa = *cell->depth_stencil; | ||
59 | |||
60 | if (cell->framebuffer.cbufs[0]) | ||
61 | key.color_format = cell->framebuffer.cbufs[0]->format; | ||
62 | else | ||
63 | key.color_format = PIPE_FORMAT_NONE; | ||
64 | |||
65 | if (cell->framebuffer.zsbuf) | ||
66 | key.zs_format = cell->framebuffer.zsbuf->format; | ||
67 | else | ||
68 | key.zs_format = PIPE_FORMAT_NONE; | ||
69 | |||
70 | /* | ||
71 | * Look up key in cache. | ||
72 | */ | ||
73 | ops = (struct cell_command_fragment_ops *) | ||
74 | util_keymap_lookup(cell->fragment_ops_cache, &key); | ||
75 | |||
76 | /* | ||
77 | * If not found, create/save new fragment ops command. | ||
78 | */ | ||
79 | if (!ops) { | ||
80 | struct spe_function spe_code_front, spe_code_back; | ||
81 | unsigned int facing_dependent, total_code_size; | ||
82 | |||
83 | if (0) | ||
84 | debug_printf("**** Create New Fragment Ops\n"); | ||
85 | |||
86 | /* Prepare the buffer that will hold the generated code. The | ||
87 | * "0" passed in for the size means that the SPE code will | ||
88 | * use a default size. | ||
89 | */ | ||
90 | spe_init_func(&spe_code_front, 0); | ||
91 | spe_init_func(&spe_code_back, 0); | ||
92 | |||
93 | /* Generate new code. Always generate new code for both front-facing | ||
94 | * and back-facing fragments, even if it's the same code in both | ||
95 | * cases. | ||
96 | */ | ||
97 | cell_gen_fragment_function(cell, CELL_FACING_FRONT, &spe_code_front); | ||
98 | cell_gen_fragment_function(cell, CELL_FACING_BACK, &spe_code_back); | ||
99 | |||
100 | /* Make sure the code is a multiple of 8 bytes long; this is | ||
101 | * required to ensure that the dual pipe instruction alignment | ||
102 | * is correct. It's also important for the SPU unpacking, | ||
103 | * which assumes 8-byte boundaries. | ||
104 | */ | ||
105 | unsigned int front_code_size = spe_code_size(&spe_code_front); | ||
106 | while (front_code_size % 8 != 0) { | ||
107 | spe_lnop(&spe_code_front); | ||
108 | front_code_size = spe_code_size(&spe_code_front); | ||
109 | } | ||
110 | unsigned int back_code_size = spe_code_size(&spe_code_back); | ||
111 | while (back_code_size % 8 != 0) { | ||
112 | spe_lnop(&spe_code_back); | ||
113 | back_code_size = spe_code_size(&spe_code_back); | ||
114 | } | ||
115 | |||
116 | /* Determine whether the code we generated is facing-dependent, by | ||
117 | * determining whether the generated code is different for the front- | ||
118 | * and back-facing fragments. | ||
119 | */ | ||
120 | if (front_code_size == back_code_size && memcmp(spe_code_front.store, spe_code_back.store, front_code_size) == 0) { | ||
121 | /* Code is identical; only need one copy. */ | ||
122 | facing_dependent = 0; | ||
123 | total_code_size = front_code_size; | ||
124 | } | ||
125 | else { | ||
126 | /* Code is different for front-facing and back-facing fragments. | ||
127 | * Need to send both copies. | ||
128 | */ | ||
129 | facing_dependent = 1; | ||
130 | total_code_size = front_code_size + back_code_size; | ||
131 | } | ||
132 | |||
133 | /* alloc new fragment ops command. Note that this structure | ||
134 | * has variant length based on the total code size required. | ||
135 | */ | ||
136 | ops = CALLOC_VARIANT_LENGTH_STRUCT(cell_command_fragment_ops, total_code_size); | ||
137 | /* populate the new cell_command_fragment_ops object */ | ||
138 | ops->opcode[0] = CELL_CMD_STATE_FRAGMENT_OPS; | ||
139 | ops->total_code_size = total_code_size; | ||
140 | ops->front_code_index = 0; | ||
141 | memcpy(ops->code, spe_code_front.store, front_code_size); | ||
142 | if (facing_dependent) { | ||
143 | /* We have separate front- and back-facing code. Append the | ||
144 | * back-facing code to the buffer. Be careful because the code | ||
145 | * size is in bytes, but the buffer is of unsigned elements. | ||
146 | */ | ||
147 | ops->back_code_index = front_code_size / sizeof(spe_code_front.store[0]); | ||
148 | memcpy(ops->code + ops->back_code_index, spe_code_back.store, back_code_size); | ||
149 | } | ||
150 | else { | ||
151 | /* Use the same code for front- and back-facing fragments */ | ||
152 | ops->back_code_index = ops->front_code_index; | ||
153 | } | ||
154 | |||
155 | /* Set the fields for the fallback case. Note that these fields | ||
156 | * (and the whole fallback case) will eventually go away. | ||
157 | */ | ||
158 | ops->dsa = *cell->depth_stencil; | ||
159 | ops->blend = *cell->blend; | ||
160 | ops->blend_color = cell->blend_color; | ||
161 | |||
162 | /* insert cell_command_fragment_ops object into keymap/cache */ | ||
163 | util_keymap_insert(cell->fragment_ops_cache, &key, ops, NULL); | ||
164 | |||
165 | /* release rtasm buffer */ | ||
166 | spe_release_func(&spe_code_front); | ||
167 | spe_release_func(&spe_code_back); | ||
168 | } | ||
169 | else { | ||
170 | if (0) | ||
171 | debug_printf("**** Re-use Fragment Ops\n"); | ||
172 | } | ||
173 | |||
174 | return ops; | ||
175 | } | ||
176 | |||
177 | |||
178 | |||
179 | static void | ||
180 | emit_state_cmd(struct cell_context *cell, uint cmd, | ||
181 | const void *state, uint state_size) | ||
182 | { | ||
183 | uint32_t *dst = (uint32_t *) | ||
184 | cell_batch_alloc16(cell, ROUNDUP16(sizeof(opcode_t) + state_size)); | ||
185 | *dst = cmd; | ||
186 | memcpy(dst + 4, state, state_size); | ||
187 | } | ||
188 | |||
189 | |||
190 | /** | ||
191 | * For state marked as 'dirty', construct a state-update command block | ||
192 | * and insert it into the current batch buffer. | ||
193 | */ | ||
194 | void | ||
195 | cell_emit_state(struct cell_context *cell) | ||
196 | { | ||
197 | if (cell->dirty & CELL_NEW_FRAMEBUFFER) { | ||
198 | struct pipe_surface *cbuf = cell->framebuffer.cbufs[0]; | ||
199 | struct pipe_surface *zbuf = cell->framebuffer.zsbuf; | ||
200 | STATIC_ASSERT(sizeof(struct cell_command_framebuffer) % 16 == 0); | ||
201 | struct cell_command_framebuffer *fb | ||
202 | = cell_batch_alloc16(cell, sizeof(*fb)); | ||
203 | fb->opcode[0] = CELL_CMD_STATE_FRAMEBUFFER; | ||
204 | fb->color_start = cell->cbuf_map[0]; | ||
205 | fb->color_format = cbuf->format; | ||
206 | fb->depth_start = cell->zsbuf_map; | ||
207 | fb->depth_format = zbuf ? zbuf->format : PIPE_FORMAT_NONE; | ||
208 | fb->width = cell->framebuffer.width; | ||
209 | fb->height = cell->framebuffer.height; | ||
210 | #if 0 | ||
211 | printf("EMIT color format %s\n", util_format_name(fb->color_format)); | ||
212 | printf("EMIT depth format %s\n", util_format_name(fb->depth_format)); | ||
213 | #endif | ||
214 | } | ||
215 | |||
216 | if (cell->dirty & (CELL_NEW_RASTERIZER)) { | ||
217 | STATIC_ASSERT(sizeof(struct cell_command_rasterizer) % 16 == 0); | ||
218 | struct cell_command_rasterizer *rast = | ||
219 | cell_batch_alloc16(cell, sizeof(*rast)); | ||
220 | rast->opcode[0] = CELL_CMD_STATE_RASTERIZER; | ||
221 | rast->rasterizer = *cell->rasterizer; | ||
222 | } | ||
223 | |||
224 | if (cell->dirty & (CELL_NEW_FS)) { | ||
225 | /* Send new fragment program to SPUs */ | ||
226 | STATIC_ASSERT(sizeof(struct cell_command_fragment_program) % 16 == 0); | ||
227 | struct cell_command_fragment_program *fp | ||
228 | = cell_batch_alloc16(cell, sizeof(*fp)); | ||
229 | fp->opcode[0] = CELL_CMD_STATE_FRAGMENT_PROGRAM; | ||
230 | fp->num_inst = cell->fs->code.num_inst; | ||
231 | memcpy(&fp->code, cell->fs->code.store, | ||
232 | SPU_MAX_FRAGMENT_PROGRAM_INSTS * SPE_INST_SIZE); | ||
233 | if (0) { | ||
234 | int i; | ||
235 | printf("PPU Emit CELL_CMD_STATE_FRAGMENT_PROGRAM:\n"); | ||
236 | for (i = 0; i < fp->num_inst; i++) { | ||
237 | printf(" %3d: 0x%08x\n", i, fp->code[i]); | ||
238 | } | ||
239 | } | ||
240 | } | ||
241 | |||
242 | if (cell->dirty & (CELL_NEW_FS_CONSTANTS)) { | ||
243 | const uint shader = PIPE_SHADER_FRAGMENT; | ||
244 | const uint num_const = cell->constants[shader]->width0 / sizeof(float); | ||
245 | uint i, j; | ||
246 | float *buf = cell_batch_alloc16(cell, ROUNDUP16(32 + num_const * sizeof(float))); | ||
247 | uint32_t *ibuf = (uint32_t *) buf; | ||
248 | const float *constants = cell->mapped_constants[shader]; | ||
249 | ibuf[0] = CELL_CMD_STATE_FS_CONSTANTS; | ||
250 | ibuf[4] = num_const; | ||
251 | j = 8; | ||
252 | for (i = 0; i < num_const; i++) { | ||
253 | buf[j++] = constants[i]; | ||
254 | } | ||
255 | } | ||
256 | |||
257 | if (cell->dirty & (CELL_NEW_FRAMEBUFFER | | ||
258 | CELL_NEW_DEPTH_STENCIL | | ||
259 | CELL_NEW_BLEND)) { | ||
260 | struct cell_command_fragment_ops *fops, *fops_cmd; | ||
261 | /* Note that cell_command_fragment_ops is a variant-sized record */ | ||
262 | fops = lookup_fragment_ops(cell); | ||
263 | fops_cmd = cell_batch_alloc16(cell, ROUNDUP16(sizeof(*fops_cmd) + fops->total_code_size)); | ||
264 | memcpy(fops_cmd, fops, sizeof(*fops) + fops->total_code_size); | ||
265 | } | ||
266 | |||
267 | if (cell->dirty & CELL_NEW_SAMPLER) { | ||
268 | uint i; | ||
269 | for (i = 0; i < CELL_MAX_SAMPLERS; i++) { | ||
270 | if (cell->dirty_samplers & (1 << i)) { | ||
271 | if (cell->sampler[i]) { | ||
272 | STATIC_ASSERT(sizeof(struct cell_command_sampler) % 16 == 0); | ||
273 | struct cell_command_sampler *sampler | ||
274 | = cell_batch_alloc16(cell, sizeof(*sampler)); | ||
275 | sampler->opcode[0] = CELL_CMD_STATE_SAMPLER; | ||
276 | sampler->unit = i; | ||
277 | sampler->state = *cell->sampler[i]; | ||
278 | } | ||
279 | } | ||
280 | } | ||
281 | cell->dirty_samplers = 0x0; | ||
282 | } | ||
283 | |||
284 | if (cell->dirty & CELL_NEW_TEXTURE) { | ||
285 | uint i; | ||
286 | for (i = 0;i < CELL_MAX_SAMPLERS; i++) { | ||
287 | if (cell->dirty_textures & (1 << i)) { | ||
288 | STATIC_ASSERT(sizeof(struct cell_command_texture) % 16 == 0); | ||
289 | struct cell_command_texture *texture = | ||
290 | (struct cell_command_texture *) | ||
291 | cell_batch_alloc16(cell, sizeof(*texture)); | ||
292 | |||
293 | texture->opcode[0] = CELL_CMD_STATE_TEXTURE; | ||
294 | texture->unit = i; | ||
295 | if (cell->texture[i]) { | ||
296 | struct cell_resource *ct = cell->texture[i]; | ||
297 | uint level; | ||
298 | for (level = 0; level < CELL_MAX_TEXTURE_LEVELS; level++) { | ||
299 | texture->start[level] = (ct->mapped + | ||
300 | ct->level_offset[level]); | ||
301 | texture->width[level] = u_minify(ct->base.width0, level); | ||
302 | texture->height[level] = u_minify(ct->base.height0, level); | ||
303 | texture->depth[level] = u_minify(ct->base.depth0, level); | ||
304 | } | ||
305 | texture->target = ct->base.target; | ||
306 | } | ||
307 | else { | ||
308 | uint level; | ||
309 | for (level = 0; level < CELL_MAX_TEXTURE_LEVELS; level++) { | ||
310 | texture->start[level] = NULL; | ||
311 | texture->width[level] = 0; | ||
312 | texture->height[level] = 0; | ||
313 | texture->depth[level] = 0; | ||
314 | } | ||
315 | texture->target = 0; | ||
316 | } | ||
317 | } | ||
318 | } | ||
319 | cell->dirty_textures = 0x0; | ||
320 | } | ||
321 | |||
322 | if (cell->dirty & CELL_NEW_VERTEX_INFO) { | ||
323 | emit_state_cmd(cell, CELL_CMD_STATE_VERTEX_INFO, | ||
324 | &cell->vertex_info, sizeof(struct vertex_info)); | ||
325 | } | ||
326 | |||
327 | #if 0 | ||
328 | if (cell->dirty & CELL_NEW_VS) { | ||
329 | const struct draw_context *const draw = cell->draw; | ||
330 | struct cell_shader_info info; | ||
331 | |||
332 | info.num_outputs = draw_num_shader_outputs(draw); | ||
333 | info.declarations = (uintptr_t) draw->vs.machine.Declarations; | ||
334 | info.num_declarations = draw->vs.machine.NumDeclarations; | ||
335 | info.instructions = (uintptr_t) draw->vs.machine.Instructions; | ||
336 | info.num_instructions = draw->vs.machine.NumInstructions; | ||
337 | info.immediates = (uintptr_t) draw->vs.machine.Imms; | ||
338 | info.num_immediates = draw->vs.machine.ImmLimit / 4; | ||
339 | |||
340 | emit_state_cmd(cell, CELL_CMD_STATE_BIND_VS, &info, sizeof(info)); | ||
341 | } | ||
342 | #endif | ||
343 | } | ||
diff --git a/src/gallium/drivers/cell/ppu/cell_state_emit.h b/src/gallium/drivers/cell/ppu/cell_state_emit.h deleted file mode 100644 index 59f8affe8d3..00000000000 --- a/src/gallium/drivers/cell/ppu/cell_state_emit.h +++ /dev/null | |||
@@ -1,36 +0,0 @@ | |||
1 | /************************************************************************** | ||
2 | * | ||
3 | * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas. | ||
4 | * All Rights Reserved. | ||
5 | * | ||
6 | * Permission is hereby granted, free of charge, to any person obtaining a | ||
7 | * copy of this software and associated documentation files (the | ||
8 | * "Software"), to deal in the Software without restriction, including | ||
9 | * without limitation the rights to use, copy, modify, merge, publish, | ||
10 | * distribute, sub license, and/or sell copies of the Software, and to | ||
11 | * permit persons to whom the Software is furnished to do so, subject to | ||
12 | * the following conditions: | ||
13 | * | ||
14 | * The above copyright notice and this permission notice (including the | ||
15 | * next paragraph) shall be included in all copies or substantial portions | ||
16 | * of the Software. | ||
17 | * | ||
18 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS | ||
19 | * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF | ||
20 | * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. | ||
21 | * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR | ||
22 | * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, | ||
23 | * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE | ||
24 | * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. | ||
25 | * | ||
26 | **************************************************************************/ | ||
27 | |||
28 | #ifndef CELL_STATE_EMIT_H | ||
29 | #define CELL_STATE_EMIT_H | ||
30 | |||
31 | |||
32 | extern void | ||
33 | cell_emit_state(struct cell_context *cell); | ||
34 | |||
35 | |||
36 | #endif /* CELL_STATE_EMIT_H */ | ||
diff --git a/src/gallium/drivers/cell/ppu/cell_state_per_fragment.c b/src/gallium/drivers/cell/ppu/cell_state_per_fragment.c deleted file mode 100644 index dc33e7ccc2c..00000000000 --- a/src/gallium/drivers/cell/ppu/cell_state_per_fragment.c +++ /dev/null | |||
@@ -1,1432 +0,0 @@ | |||
1 | /* | ||
2 | * (C) Copyright IBM Corporation 2008 | ||
3 | * All Rights Reserved. | ||
4 | * | ||
5 | * Permission is hereby granted, free of charge, to any person obtaining a | ||
6 | * copy of this software and associated documentation files (the "Software"), | ||
7 | * to deal in the Software without restriction, including without limitation | ||
8 | * on the rights to use, copy, modify, merge, publish, distribute, sub | ||
9 | * license, and/or sell copies of the Software, and to permit persons to whom | ||
10 | * the Software is furnished to do so, subject to the following conditions: | ||
11 | * | ||
12 | * The above copyright notice and this permission notice (including the next | ||
13 | * paragraph) shall be included in all copies or substantial portions of the | ||
14 | * Software. | ||
15 | * | ||
16 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR | ||
17 | * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, | ||
18 | * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL | ||
19 | * AUTHORS, COPYRIGHT HOLDERS, AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM, | ||
20 | * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR | ||
21 | * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE | ||
22 | * USE OR OTHER DEALINGS IN THE SOFTWARE. | ||
23 | */ | ||
24 | |||
25 | /** | ||
26 | * \file | ||
27 | * Generate code to perform all per-fragment operations. | ||
28 | * | ||
29 | * Code generated by these functions perform both alpha, depth, and stencil | ||
30 | * testing as well as alpha blending. | ||
31 | * | ||
32 | * \note | ||
33 | * Occlusion query is not supported, but this is the right place to add that | ||
34 | * support. | ||
35 | * | ||
36 | * \author Ian Romanick <idr@us.ibm.com> | ||
37 | */ | ||
38 | |||
39 | #include "pipe/p_defines.h" | ||
40 | #include "pipe/p_state.h" | ||
41 | |||
42 | #include "cell_context.h" | ||
43 | |||
44 | #include "rtasm/rtasm_ppc_spe.h" | ||
45 | |||
46 | |||
47 | /** | ||
48 | * Generate code to perform alpha testing. | ||
49 | * | ||
50 | * The code generated by this function uses the register specificed by | ||
51 | * \c mask as both an input and an output. | ||
52 | * | ||
53 | * \param dsa Current alpha-test state | ||
54 | * \param f Function to which code should be appended | ||
55 | * \param mask Index of register containing active fragment mask | ||
56 | * \param alphas Index of register containing per-fragment alpha values | ||
57 | * | ||
58 | * \note Emits a maximum of 6 instructions. | ||
59 | */ | ||
60 | static void | ||
61 | emit_alpha_test(struct pipe_depth_stencil_alpha_state *dsa, | ||
62 | struct spe_function *f, int mask, int alphas) | ||
63 | { | ||
64 | /* If the alpha function is either NEVER or ALWAYS, there is no need to | ||
65 | * load the reference value into a register. ALWAYS is a fairly common | ||
66 | * case, and this optimization saves 2 instructions. | ||
67 | */ | ||
68 | if (dsa->alpha.enabled | ||
69 | && (dsa->alpha.func != PIPE_FUNC_NEVER) | ||
70 | && (dsa->alpha.func != PIPE_FUNC_ALWAYS)) { | ||
71 | int ref = spe_allocate_available_register(f); | ||
72 | int tmp_a = spe_allocate_available_register(f); | ||
73 | int tmp_b = spe_allocate_available_register(f); | ||
74 | union { | ||
75 | float f; | ||
76 | unsigned u; | ||
77 | } ref_val; | ||
78 | boolean complement = FALSE; | ||
79 | |||
80 | ref_val.f = dsa->alpha.ref; | ||
81 | |||
82 | spe_il(f, ref, ref_val.u & 0x0000ffff); | ||
83 | spe_ilh(f, ref, ref_val.u >> 16); | ||
84 | |||
85 | switch (dsa->alpha.func) { | ||
86 | case PIPE_FUNC_NOTEQUAL: | ||
87 | complement = TRUE; | ||
88 | /* FALLTHROUGH */ | ||
89 | |||
90 | case PIPE_FUNC_EQUAL: | ||
91 | spe_fceq(f, tmp_a, ref, alphas); | ||
92 | break; | ||
93 | |||
94 | case PIPE_FUNC_LEQUAL: | ||
95 | complement = TRUE; | ||
96 | /* FALLTHROUGH */ | ||
97 | |||
98 | case PIPE_FUNC_GREATER: | ||
99 | spe_fcgt(f, tmp_a, ref, alphas); | ||
100 | break; | ||
101 | |||
102 | case PIPE_FUNC_LESS: | ||
103 | complement = TRUE; | ||
104 | /* FALLTHROUGH */ | ||
105 | |||
106 | case PIPE_FUNC_GEQUAL: | ||
107 | spe_fcgt(f, tmp_a, ref, alphas); | ||
108 | spe_fceq(f, tmp_b, ref, alphas); | ||
109 | spe_or(f, tmp_a, tmp_b, tmp_a); | ||
110 | break; | ||
111 | |||
112 | case PIPE_FUNC_ALWAYS: | ||
113 | case PIPE_FUNC_NEVER: | ||
114 | default: | ||
115 | assert(0); | ||
116 | break; | ||
117 | } | ||
118 | |||
119 | if (complement) { | ||
120 | spe_andc(f, mask, mask, tmp_a); | ||
121 | } else { | ||
122 | spe_and(f, mask, mask, tmp_a); | ||
123 | } | ||
124 | |||
125 | spe_release_register(f, ref); | ||
126 | spe_release_register(f, tmp_a); | ||
127 | spe_release_register(f, tmp_b); | ||
128 | } else if (dsa->alpha.enabled && (dsa->alpha.func == PIPE_FUNC_NEVER)) { | ||
129 | spe_il(f, mask, 0); | ||
130 | } | ||
131 | } | ||
132 | |||
133 | |||
134 | /** | ||
135 | * Generate code to perform Z testing. Four Z values are tested at once. | ||
136 | * \param dsa Current depth-test state | ||
137 | * \param f Function to which code should be appended | ||
138 | * \param mask Index of register to contain depth-pass mask | ||
139 | * \param stored Index of register containing values from depth buffer | ||
140 | * \param calculated Index of register containing per-fragment depth values | ||
141 | * | ||
142 | * \return | ||
143 | * If the calculated depth comparison mask is the actual mask, \c FALSE is | ||
144 | * returned. If the calculated depth comparison mask is the compliment of | ||
145 | * the actual mask, \c TRUE is returned. | ||
146 | * | ||
147 | * \note Emits a maximum of 3 instructions. | ||
148 | */ | ||
149 | static boolean | ||
150 | emit_depth_test(struct pipe_depth_stencil_alpha_state *dsa, | ||
151 | struct spe_function *f, int mask, int stored, int calculated) | ||
152 | { | ||
153 | unsigned func = (dsa->depth.enabled) | ||
154 | ? dsa->depth.func : PIPE_FUNC_ALWAYS; | ||
155 | int tmp = spe_allocate_available_register(f); | ||
156 | boolean compliment = FALSE; | ||
157 | |||
158 | switch (func) { | ||
159 | case PIPE_FUNC_NEVER: | ||
160 | spe_il(f, mask, 0); | ||
161 | break; | ||
162 | |||
163 | case PIPE_FUNC_NOTEQUAL: | ||
164 | compliment = TRUE; | ||
165 | /* FALLTHROUGH */ | ||
166 | case PIPE_FUNC_EQUAL: | ||
167 | spe_ceq(f, mask, calculated, stored); | ||
168 | break; | ||
169 | |||
170 | case PIPE_FUNC_LEQUAL: | ||
171 | compliment = TRUE; | ||
172 | /* FALLTHROUGH */ | ||
173 | case PIPE_FUNC_GREATER: | ||
174 | spe_clgt(f, mask, calculated, stored); | ||
175 | break; | ||
176 | |||
177 | case PIPE_FUNC_LESS: | ||
178 | compliment = TRUE; | ||
179 | /* FALLTHROUGH */ | ||
180 | case PIPE_FUNC_GEQUAL: | ||
181 | spe_clgt(f, mask, calculated, stored); | ||
182 | spe_ceq(f, tmp, calculated, stored); | ||
183 | spe_or(f, mask, mask, tmp); | ||
184 | break; | ||
185 | |||
186 | case PIPE_FUNC_ALWAYS: | ||
187 | spe_il(f, mask, ~0); | ||
188 | break; | ||
189 | |||
190 | default: | ||
191 | assert(0); | ||
192 | break; | ||
193 | } | ||
194 | |||
195 | spe_release_register(f, tmp); | ||
196 | return compliment; | ||
197 | } | ||
198 | |||
199 | |||
200 | /** | ||
201 | * Generate code to apply the stencil operation (after testing). | ||
202 | * \note Emits a maximum of 5 instructions. | ||
203 | * | ||
204 | * \warning | ||
205 | * Since \c out and \c in might be the same register, this routine cannot | ||
206 | * generate code that uses \c out as a temporary. | ||
207 | */ | ||
208 | static void | ||
209 | emit_stencil_op(struct spe_function *f, | ||
210 | int out, int in, int mask, unsigned op, unsigned ref) | ||
211 | { | ||
212 | const int clamp = spe_allocate_available_register(f); | ||
213 | const int clamp_mask = spe_allocate_available_register(f); | ||
214 | const int result = spe_allocate_available_register(f); | ||
215 | |||
216 | switch(op) { | ||
217 | case PIPE_STENCIL_OP_KEEP: | ||
218 | assert(0); | ||
219 | case PIPE_STENCIL_OP_ZERO: | ||
220 | spe_il(f, result, 0); | ||
221 | break; | ||
222 | case PIPE_STENCIL_OP_REPLACE: | ||
223 | spe_il(f, result, ref); | ||
224 | break; | ||
225 | case PIPE_STENCIL_OP_INCR: | ||
226 | /* clamp = [0xff, 0xff, 0xff, 0xff] */ | ||
227 | spe_il(f, clamp, 0x0ff); | ||
228 | /* result[i] = in[i] + 1 */ | ||
229 | spe_ai(f, result, in, 1); | ||
230 | /* clamp_mask[i] = (result[i] > 0xff) */ | ||
231 | spe_clgti(f, clamp_mask, result, 0x0ff); | ||
232 | /* result[i] = clamp_mask[i] ? clamp[i] : result[i] */ | ||
233 | spe_selb(f, result, result, clamp, clamp_mask); | ||
234 | break; | ||
235 | case PIPE_STENCIL_OP_DECR: | ||
236 | spe_il(f, clamp, 0); | ||
237 | spe_ai(f, result, in, -1); | ||
238 | |||
239 | /* If "(s-1) < 0" in signed arithemtic, then "(s-1) > MAX" in unsigned | ||
240 | * arithmetic. | ||
241 | */ | ||
242 | spe_clgti(f, clamp_mask, result, 0x0ff); | ||
243 | spe_selb(f, result, result, clamp, clamp_mask); | ||
244 | break; | ||
245 | case PIPE_STENCIL_OP_INCR_WRAP: | ||
246 | spe_ai(f, result, in, 1); | ||
247 | break; | ||
248 | case PIPE_STENCIL_OP_DECR_WRAP: | ||
249 | spe_ai(f, result, in, -1); | ||
250 | break; | ||
251 | case PIPE_STENCIL_OP_INVERT: | ||
252 | spe_nor(f, result, in, in); | ||
253 | break; | ||
254 | default: | ||
255 | assert(0); | ||
256 | } | ||
257 | |||
258 | spe_selb(f, out, in, result, mask); | ||
259 | |||
260 | spe_release_register(f, result); | ||
261 | spe_release_register(f, clamp_mask); | ||
262 | spe_release_register(f, clamp); | ||
263 | } | ||
264 | |||
265 | |||
266 | /** | ||
267 | * Generate code to do stencil test. Four pixels are tested at once. | ||
268 | * \param dsa Depth / stencil test state | ||
269 | * \param face 0 for front face, 1 for back face | ||
270 | * \param f Function to append instructions to | ||
271 | * \param mask Register containing mask of fragments passing the | ||
272 | * alpha test | ||
273 | * \param depth_mask Register containing mask of fragments passing the | ||
274 | * depth test | ||
275 | * \param depth_compliment Is \c depth_mask the compliment of the actual mask? | ||
276 | * \param stencil Register containing values from stencil buffer | ||
277 | * \param depth_pass Register to store mask of fragments passing stencil test | ||
278 | * and depth test | ||
279 | * | ||
280 | * \note | ||
281 | * Emits a maximum of 10 + (3 * 5) = 25 instructions. | ||
282 | */ | ||
283 | static int | ||
284 | emit_stencil_test(struct pipe_depth_stencil_alpha_state *dsa, | ||
285 | struct pipe_stencil_ref *sr, | ||
286 | unsigned face, | ||
287 | struct spe_function *f, | ||
288 | int mask, | ||
289 | int depth_mask, | ||
290 | boolean depth_complement, | ||
291 | int stencil, | ||
292 | int depth_pass) | ||
293 | { | ||
294 | int stencil_fail = spe_allocate_available_register(f); | ||
295 | int depth_fail = spe_allocate_available_register(f); | ||
296 | int stencil_mask = spe_allocate_available_register(f); | ||
297 | int stencil_pass = spe_allocate_available_register(f); | ||
298 | int face_stencil = spe_allocate_available_register(f); | ||
299 | int stencil_src = stencil; | ||
300 | const unsigned ref = (sr->ref_value[face] | ||
301 | & dsa->stencil[face].valuemask); | ||
302 | boolean complement = FALSE; | ||
303 | int stored; | ||
304 | int tmp = spe_allocate_available_register(f); | ||
305 | |||
306 | |||
307 | if ((dsa->stencil[face].func != PIPE_FUNC_NEVER) | ||
308 | && (dsa->stencil[face].func != PIPE_FUNC_ALWAYS) | ||
309 | && (dsa->stencil[face].valuemask != 0x0ff)) { | ||
310 | stored = spe_allocate_available_register(f); | ||
311 | spe_andi(f, stored, stencil, dsa->stencil[face].valuemask); | ||
312 | } else { | ||
313 | stored = stencil; | ||
314 | } | ||
315 | |||
316 | |||
317 | switch (dsa->stencil[face].func) { | ||
318 | case PIPE_FUNC_NEVER: | ||
319 | spe_il(f, stencil_mask, 0); /* stencil_mask[0..3] = [0,0,0,0] */ | ||
320 | break; | ||
321 | |||
322 | case PIPE_FUNC_NOTEQUAL: | ||
323 | complement = TRUE; | ||
324 | /* FALLTHROUGH */ | ||
325 | case PIPE_FUNC_EQUAL: | ||
326 | /* stencil_mask[i] = (stored[i] == ref) */ | ||
327 | spe_ceqi(f, stencil_mask, stored, ref); | ||
328 | break; | ||
329 | |||
330 | case PIPE_FUNC_LEQUAL: | ||
331 | complement = TRUE; | ||
332 | /* FALLTHROUGH */ | ||
333 | case PIPE_FUNC_GREATER: | ||
334 | complement = TRUE; | ||
335 | /* stencil_mask[i] = (stored[i] > ref) */ | ||
336 | spe_clgti(f, stencil_mask, stored, ref); | ||
337 | break; | ||
338 | |||
339 | case PIPE_FUNC_LESS: | ||
340 | complement = TRUE; | ||
341 | /* FALLTHROUGH */ | ||
342 | case PIPE_FUNC_GEQUAL: | ||
343 | /* stencil_mask[i] = (stored[i] > ref) */ | ||
344 | spe_clgti(f, stencil_mask, stored, ref); | ||
345 | /* tmp[i] = (stored[i] == ref) */ | ||
346 | spe_ceqi(f, tmp, stored, ref); | ||
347 | /* stencil_mask[i] = stencil_mask[i] | tmp[i] */ | ||
348 | spe_or(f, stencil_mask, stencil_mask, tmp); | ||
349 | break; | ||
350 | |||
351 | case PIPE_FUNC_ALWAYS: | ||
352 | /* See comment below. */ | ||
353 | break; | ||
354 | |||
355 | default: | ||
356 | assert(0); | ||
357 | break; | ||
358 | } | ||
359 | |||
360 | if (stored != stencil) { | ||
361 | spe_release_register(f, stored); | ||
362 | } | ||
363 | spe_release_register(f, tmp); | ||
364 | |||
365 | |||
366 | /* ALWAYS is a very common stencil-test, so some effort is applied to | ||
367 | * optimize that case. The stencil-pass mask is the same as the input | ||
368 | * fragment mask. This makes the stencil-test (above) a no-op, and the | ||
369 | * input fragment mask can be "renamed" the stencil-pass mask. | ||
370 | */ | ||
371 | if (dsa->stencil[face].func == PIPE_FUNC_ALWAYS) { | ||
372 | spe_release_register(f, stencil_pass); | ||
373 | stencil_pass = mask; | ||
374 | } else { | ||
375 | if (complement) { | ||
376 | spe_andc(f, stencil_pass, mask, stencil_mask); | ||
377 | } else { | ||
378 | spe_and(f, stencil_pass, mask, stencil_mask); | ||
379 | } | ||
380 | } | ||
381 | |||
382 | if (depth_complement) { | ||
383 | spe_andc(f, depth_pass, stencil_pass, depth_mask); | ||
384 | } else { | ||
385 | spe_and(f, depth_pass, stencil_pass, depth_mask); | ||
386 | } | ||
387 | |||
388 | |||
389 | /* Conditionally emit code to update the stencil value under various | ||
390 | * condititons. Note that there is no need to generate code under the | ||
391 | * following circumstances: | ||
392 | * | ||
393 | * - Stencil write mask is zero. | ||
394 | * - For stencil-fail if the stencil test is ALWAYS | ||
395 | * - For depth-fail if the stencil test is NEVER | ||
396 | * - For depth-pass if the stencil test is NEVER | ||
397 | * - Any of the 3 conditions if the operation is KEEP | ||
398 | */ | ||
399 | if (dsa->stencil[face].writemask != 0) { | ||
400 | if ((dsa->stencil[face].func != PIPE_FUNC_ALWAYS) | ||
401 | && (dsa->stencil[face].fail_op != PIPE_STENCIL_OP_KEEP)) { | ||
402 | if (complement) { | ||
403 | spe_and(f, stencil_fail, mask, stencil_mask); | ||
404 | } else { | ||
405 | spe_andc(f, stencil_fail, mask, stencil_mask); | ||
406 | } | ||
407 | |||
408 | emit_stencil_op(f, face_stencil, stencil_src, stencil_fail, | ||
409 | dsa->stencil[face].fail_op, | ||
410 | sr->ref_value[face]); | ||
411 | |||
412 | stencil_src = face_stencil; | ||
413 | } | ||
414 | |||
415 | if ((dsa->stencil[face].func != PIPE_FUNC_NEVER) | ||
416 | && (dsa->stencil[face].zfail_op != PIPE_STENCIL_OP_KEEP)) { | ||
417 | if (depth_complement) { | ||
418 | spe_and(f, depth_fail, stencil_pass, depth_mask); | ||
419 | } else { | ||
420 | spe_andc(f, depth_fail, stencil_pass, depth_mask); | ||
421 | } | ||
422 | |||
423 | emit_stencil_op(f, face_stencil, stencil_src, depth_fail, | ||
424 | dsa->stencil[face].zfail_op, | ||
425 | sr->ref_value[face]); | ||
426 | stencil_src = face_stencil; | ||
427 | } | ||
428 | |||
429 | if ((dsa->stencil[face].func != PIPE_FUNC_NEVER) | ||
430 | && (dsa->stencil[face].zpass_op != PIPE_STENCIL_OP_KEEP)) { | ||
431 | emit_stencil_op(f, face_stencil, stencil_src, depth_pass, | ||
432 | dsa->stencil[face].zpass_op, | ||
433 | sr->ref_value[face]); | ||
434 | stencil_src = face_stencil; | ||
435 | } | ||
436 | } | ||
437 | |||
438 | spe_release_register(f, stencil_fail); | ||
439 | spe_release_register(f, depth_fail); | ||
440 | spe_release_register(f, stencil_mask); | ||
441 | if (stencil_pass != mask) { | ||
442 | spe_release_register(f, stencil_pass); | ||
443 | } | ||
444 | |||
445 | /* If all of the stencil operations were KEEP or the stencil write mask was | ||
446 | * zero, "stencil_src" will still be set to "stencil". In this case | ||
447 | * release the "face_stencil" register. Otherwise apply the stencil write | ||
448 | * mask to select bits from the calculated stencil value and the previous | ||
449 | * stencil value. | ||
450 | */ | ||
451 | if (stencil_src == stencil) { | ||
452 | spe_release_register(f, face_stencil); | ||
453 | } else if (dsa->stencil[face].writemask != 0x0ff) { | ||
454 | int tmp = spe_allocate_available_register(f); | ||
455 | |||
456 | spe_il(f, tmp, dsa->stencil[face].writemask); | ||
457 | spe_selb(f, stencil_src, stencil, stencil_src, tmp); | ||
458 | |||
459 | spe_release_register(f, tmp); | ||
460 | } | ||
461 | |||
462 | return stencil_src; | ||
463 | } | ||
464 | |||
465 | |||
466 | void | ||
467 | cell_generate_depth_stencil_test(struct cell_depth_stencil_alpha_state *cdsa, | ||
468 | struct pipe_stencil_ref *sr) | ||
469 | { | ||
470 | struct pipe_depth_stencil_alpha_state *const dsa = &cdsa->base; | ||
471 | struct spe_function *const f = &cdsa->code; | ||
472 | |||
473 | /* This code generates a maximum of 6 (alpha test) + 3 (depth test) | ||
474 | * + 25 (front stencil) + 25 (back stencil) + 4 = 63 instructions. Round | ||
475 | * up to 64 to make it a happy power-of-two. | ||
476 | */ | ||
477 | spe_init_func(f, SPE_INST_SIZE * 64); | ||
478 | |||
479 | |||
480 | /* Allocate registers for the function's input parameters. Cleverly (and | ||
481 | * clever code is usually dangerous, but I couldn't resist) the generated | ||
482 | * function returns a structure. Returned structures start with register | ||
483 | * 3, and the structure fields are ordered to match up exactly with the | ||
484 | * input parameters. | ||
485 | */ | ||
486 | int mask = spe_allocate_register(f, 3); | ||
487 | int depth = spe_allocate_register(f, 4); | ||
488 | int stencil = spe_allocate_register(f, 5); | ||
489 | int zvals = spe_allocate_register(f, 6); | ||
490 | int frag_a = spe_allocate_register(f, 7); | ||
491 | int facing = spe_allocate_register(f, 8); | ||
492 | |||
493 | int depth_mask = spe_allocate_available_register(f); | ||
494 | |||
495 | boolean depth_complement; | ||
496 | |||
497 | |||
498 | emit_alpha_test(dsa, f, mask, frag_a); | ||
499 | |||
500 | depth_complement = emit_depth_test(dsa, f, depth_mask, depth, zvals); | ||
501 | |||
502 | if (dsa->stencil[0].enabled) { | ||
503 | const int front_depth_pass = spe_allocate_available_register(f); | ||
504 | int front_stencil = emit_stencil_test(dsa, sr, 0, f, mask, | ||
505 | depth_mask, depth_complement, | ||
506 | stencil, front_depth_pass); | ||
507 | |||
508 | if (dsa->stencil[1].enabled) { | ||
509 | const int back_depth_pass = spe_allocate_available_register(f); | ||
510 | int back_stencil = emit_stencil_test(dsa, sr, 1, f, mask, | ||
511 | depth_mask, depth_complement, | ||
512 | stencil, back_depth_pass); | ||
513 | |||
514 | /* If the front facing stencil value and the back facing stencil | ||
515 | * value are stored in the same register, there is no need to select | ||
516 | * a value based on the facing. This can happen if the stencil value | ||
517 | * was not modified due to the write masks being zero, the stencil | ||
518 | * operations being KEEP, etc. | ||
519 | */ | ||
520 | if (front_stencil != back_stencil) { | ||
521 | spe_selb(f, stencil, back_stencil, front_stencil, facing); | ||
522 | } | ||
523 | |||
524 | if (back_stencil != stencil) { | ||
525 | spe_release_register(f, back_stencil); | ||
526 | } | ||
527 | |||
528 | if (front_stencil != stencil) { | ||
529 | spe_release_register(f, front_stencil); | ||
530 | } | ||
531 | |||
532 | spe_selb(f, mask, back_depth_pass, front_depth_pass, facing); | ||
533 | |||
534 | spe_release_register(f, back_depth_pass); | ||
535 | } else { | ||
536 | if (front_stencil != stencil) { | ||
537 | spe_or(f, stencil, front_stencil, front_stencil); | ||
538 | spe_release_register(f, front_stencil); | ||
539 | } | ||
540 | spe_or(f, mask, front_depth_pass, front_depth_pass); | ||
541 | } | ||
542 | |||
543 | spe_release_register(f, front_depth_pass); | ||
544 | } else if (dsa->depth.enabled) { | ||
545 | if (depth_complement) { | ||
546 | spe_andc(f, mask, mask, depth_mask); | ||
547 | } else { | ||
548 | spe_and(f, mask, mask, depth_mask); | ||
549 | } | ||
550 | } | ||
551 | |||
552 | if (dsa->depth.writemask) { | ||
553 | spe_selb(f, depth, depth, zvals, mask); | ||
554 | } | ||
555 | |||
556 | spe_bi(f, 0, 0, 0); /* return from function call */ | ||
557 | |||
558 | |||
559 | #if 0 | ||
560 | { | ||
561 | const uint32_t *p = f->store; | ||
562 | unsigned i; | ||
563 | |||
564 | printf("# alpha (%sabled)\n", | ||
565 | (dsa->alpha.enabled) ? "en" : "dis"); | ||
566 | printf("# func: %u\n", dsa->alpha.func); | ||
567 | printf("# ref: %.2f\n", dsa->alpha.ref); | ||
568 | |||
569 | printf("# depth (%sabled)\n", | ||
570 | (dsa->depth.enabled) ? "en" : "dis"); | ||
571 | printf("# func: %u\n", dsa->depth.func); | ||
572 | |||
573 | for (i = 0; i < 2; i++) { | ||
574 | printf("# %s stencil (%sabled)\n", | ||
575 | (i == 0) ? "front" : "back", | ||
576 | (dsa->stencil[i].enabled) ? "en" : "dis"); | ||
577 | |||
578 | printf("# func: %u\n", dsa->stencil[i].func); | ||
579 | printf("# op (sf, zf, zp): %u %u %u\n", | ||
580 | dsa->stencil[i].fail_op, | ||
581 | dsa->stencil[i].zfail_op, | ||
582 | dsa->stencil[i].zpass_op); | ||
583 | printf("# ref value / value mask / write mask: %02x %02x %02x\n", | ||
584 | sr->ref_value[i], | ||
585 | dsa->stencil[i].valuemask, | ||
586 | dsa->stencil[i].writemask); | ||
587 | } | ||
588 | |||
589 | printf("\t.text\n"); | ||
590 | for (/* empty */; p < f->csr; p++) { | ||
591 | printf("\t.long\t0x%04x\n", *p); | ||
592 | } | ||
593 | fflush(stdout); | ||
594 | } | ||
595 | #endif | ||
596 | } | ||
597 | |||
598 | |||
599 | /** | ||
600 | * \note Emits a maximum of 3 instructions | ||
601 | */ | ||
602 | static int | ||
603 | emit_alpha_factor_calculation(struct spe_function *f, | ||
604 | unsigned factor, | ||
605 | int src_alpha, int dst_alpha, int const_alpha) | ||
606 | { | ||
607 | int factor_reg; | ||
608 | int tmp; | ||
609 | |||
610 | |||
611 | switch (factor) { | ||
612 | case PIPE_BLENDFACTOR_ONE: | ||
613 | factor_reg = -1; | ||
614 | break; | ||
615 | |||
616 | case PIPE_BLENDFACTOR_SRC_ALPHA: | ||
617 | factor_reg = spe_allocate_available_register(f); | ||
618 | |||
619 | spe_or(f, factor_reg, src_alpha, src_alpha); | ||
620 | break; | ||
621 | |||
622 | case PIPE_BLENDFACTOR_DST_ALPHA: | ||
623 | factor_reg = dst_alpha; | ||
624 | break; | ||
625 | |||
626 | case PIPE_BLENDFACTOR_SRC_ALPHA_SATURATE: | ||
627 | factor_reg = -1; | ||
628 | break; | ||
629 | |||
630 | case PIPE_BLENDFACTOR_INV_CONST_ALPHA: | ||
631 | factor_reg = spe_allocate_available_register(f); | ||
632 | |||
633 | tmp = spe_allocate_available_register(f); | ||
634 | spe_il(f, tmp, 1); | ||
635 | spe_cuflt(f, tmp, tmp, 0); | ||
636 | spe_fs(f, factor_reg, tmp, const_alpha); | ||
637 | spe_release_register(f, tmp); | ||
638 | break; | ||
639 | |||
640 | case PIPE_BLENDFACTOR_CONST_ALPHA: | ||
641 | factor_reg = const_alpha; | ||
642 | break; | ||
643 | |||
644 | case PIPE_BLENDFACTOR_ZERO: | ||
645 | factor_reg = -1; | ||
646 | break; | ||
647 | |||
648 | case PIPE_BLENDFACTOR_INV_SRC_ALPHA: | ||
649 | tmp = spe_allocate_available_register(f); | ||
650 | factor_reg = spe_allocate_available_register(f); | ||
651 | |||
652 | spe_il(f, tmp, 1); | ||
653 | spe_cuflt(f, tmp, tmp, 0); | ||
654 | spe_fs(f, factor_reg, tmp, src_alpha); | ||
655 | |||
656 | spe_release_register(f, tmp); | ||
657 | break; | ||
658 | |||
659 | case PIPE_BLENDFACTOR_INV_DST_ALPHA: | ||
660 | tmp = spe_allocate_available_register(f); | ||
661 | factor_reg = spe_allocate_available_register(f); | ||
662 | |||
663 | spe_il(f, tmp, 1); | ||
664 | spe_cuflt(f, tmp, tmp, 0); | ||
665 | spe_fs(f, factor_reg, tmp, dst_alpha); | ||
666 | |||
667 | spe_release_register(f, tmp); | ||
668 | break; | ||
669 | |||
670 | case PIPE_BLENDFACTOR_SRC1_ALPHA: | ||
671 | case PIPE_BLENDFACTOR_INV_SRC1_ALPHA: | ||
672 | default: | ||
673 | assert(0); | ||
674 | factor_reg = -1; | ||
675 | break; | ||
676 | } | ||
677 | |||
678 | return factor_reg; | ||
679 | } | ||
680 | |||
681 | |||
682 | /** | ||
683 | * \note Emits a maximum of 6 instructions | ||
684 | */ | ||
685 | static void | ||
686 | emit_color_factor_calculation(struct spe_function *f, | ||
687 | unsigned sF, unsigned mask, | ||
688 | const int *src, | ||
689 | const int *dst, | ||
690 | const int *const_color, | ||
691 | int *factor) | ||
692 | { | ||
693 | int tmp; | ||
694 | unsigned i; | ||
695 | |||
696 | |||
697 | factor[0] = -1; | ||
698 | factor[1] = -1; | ||
699 | factor[2] = -1; | ||
700 | factor[3] = -1; | ||
701 | |||
702 | switch (sF) { | ||
703 | case PIPE_BLENDFACTOR_ONE: | ||
704 | break; | ||
705 | |||
706 | case PIPE_BLENDFACTOR_SRC_COLOR: | ||
707 | for (i = 0; i < 3; ++i) { | ||
708 | if ((mask & (1U << i)) != 0) { | ||
709 | factor[i] = spe_allocate_available_register(f); | ||
710 | spe_or(f, factor[i], src[i], src[i]); | ||
711 | } | ||
712 | } | ||
713 | break; | ||
714 | |||
715 | case PIPE_BLENDFACTOR_SRC_ALPHA: | ||
716 | factor[0] = spe_allocate_available_register(f); | ||
717 | factor[1] = factor[0]; | ||
718 | factor[2] = factor[0]; | ||
719 | |||
720 | spe_or(f, factor[0], src[3], src[3]); | ||
721 | break; | ||
722 | |||
723 | case PIPE_BLENDFACTOR_DST_ALPHA: | ||
724 | factor[0] = dst[3]; | ||
725 | factor[1] = dst[3]; | ||
726 | factor[2] = dst[3]; | ||
727 | break; | ||
728 | |||
729 | case PIPE_BLENDFACTOR_DST_COLOR: | ||
730 | factor[0] = dst[0]; | ||
731 | factor[1] = dst[1]; | ||
732 | factor[2] = dst[2]; | ||
733 | break; | ||
734 | |||
735 | case PIPE_BLENDFACTOR_SRC_ALPHA_SATURATE: | ||
736 | tmp = spe_allocate_available_register(f); | ||
737 | factor[0] = spe_allocate_available_register(f); | ||
738 | factor[1] = factor[0]; | ||
739 | factor[2] = factor[0]; | ||
740 | |||
741 | /* Alpha saturate means min(As, 1-Ad). | ||
742 | */ | ||
743 | spe_il(f, tmp, 1); | ||
744 | spe_cuflt(f, tmp, tmp, 0); | ||
745 | spe_fs(f, tmp, tmp, dst[3]); | ||
746 | spe_fcgt(f, factor[0], tmp, src[3]); | ||
747 | spe_selb(f, factor[0], src[3], tmp, factor[0]); | ||
748 | |||
749 | spe_release_register(f, tmp); | ||
750 | break; | ||
751 | |||
752 | case PIPE_BLENDFACTOR_INV_CONST_COLOR: | ||
753 | tmp = spe_allocate_available_register(f); | ||
754 | spe_il(f, tmp, 1); | ||
755 | spe_cuflt(f, tmp, tmp, 0); | ||
756 | |||
757 | for (i = 0; i < 3; i++) { | ||
758 | factor[i] = spe_allocate_available_register(f); | ||
759 | |||
760 | spe_fs(f, factor[i], tmp, const_color[i]); | ||
761 | } | ||
762 | spe_release_register(f, tmp); | ||
763 | break; | ||
764 | |||
765 | case PIPE_BLENDFACTOR_CONST_COLOR: | ||
766 | for (i = 0; i < 3; i++) { | ||
767 | factor[i] = const_color[i]; | ||
768 | } | ||
769 | break; | ||
770 | |||
771 | case PIPE_BLENDFACTOR_INV_CONST_ALPHA: | ||
772 | factor[0] = spe_allocate_available_register(f); | ||
773 | factor[1] = factor[0]; | ||
774 | factor[2] = factor[0]; | ||
775 | |||
776 | tmp = spe_allocate_available_register(f); | ||
777 | spe_il(f, tmp, 1); | ||
778 | spe_cuflt(f, tmp, tmp, 0); | ||
779 | spe_fs(f, factor[0], tmp, const_color[3]); | ||
780 | spe_release_register(f, tmp); | ||
781 | break; | ||
782 | |||
783 | case PIPE_BLENDFACTOR_CONST_ALPHA: | ||
784 | factor[0] = const_color[3]; | ||
785 | factor[1] = factor[0]; | ||
786 | factor[2] = factor[0]; | ||
787 | break; | ||
788 | |||
789 | case PIPE_BLENDFACTOR_ZERO: | ||
790 | break; | ||
791 | |||
792 | case PIPE_BLENDFACTOR_INV_SRC_COLOR: | ||
793 | tmp = spe_allocate_available_register(f); | ||
794 | |||
795 | spe_il(f, tmp, 1); | ||
796 | spe_cuflt(f, tmp, tmp, 0); | ||
797 | |||
798 | for (i = 0; i < 3; ++i) { | ||
799 | if ((mask & (1U << i)) != 0) { | ||
800 | factor[i] = spe_allocate_available_register(f); | ||
801 | spe_fs(f, factor[i], tmp, src[i]); | ||
802 | } | ||
803 | } | ||
804 | |||
805 | spe_release_register(f, tmp); | ||
806 | break; | ||
807 | |||
808 | case PIPE_BLENDFACTOR_INV_SRC_ALPHA: | ||
809 | tmp = spe_allocate_available_register(f); | ||
810 | factor[0] = spe_allocate_available_register(f); | ||
811 | factor[1] = factor[0]; | ||
812 | factor[2] = factor[0]; | ||
813 | |||
814 | spe_il(f, tmp, 1); | ||
815 | spe_cuflt(f, tmp, tmp, 0); | ||
816 | spe_fs(f, factor[0], tmp, src[3]); | ||
817 | |||
818 | spe_release_register(f, tmp); | ||
819 | break; | ||
820 | |||
821 | case PIPE_BLENDFACTOR_INV_DST_ALPHA: | ||
822 | tmp = spe_allocate_available_register(f); | ||
823 | factor[0] = spe_allocate_available_register(f); | ||
824 | factor[1] = factor[0]; | ||
825 | factor[2] = factor[0]; | ||
826 | |||
827 | spe_il(f, tmp, 1); | ||
828 | spe_cuflt(f, tmp, tmp, 0); | ||
829 | spe_fs(f, factor[0], tmp, dst[3]); | ||
830 | |||
831 | spe_release_register(f, tmp); | ||
832 | break; | ||
833 | |||
834 | case PIPE_BLENDFACTOR_INV_DST_COLOR: | ||
835 | tmp = spe_allocate_available_register(f); | ||
836 | |||
837 | spe_il(f, tmp, 1); | ||
838 | spe_cuflt(f, tmp, tmp, 0); | ||
839 | |||
840 | for (i = 0; i < 3; ++i) { | ||
841 | if ((mask & (1U << i)) != 0) { | ||
842 | factor[i] = spe_allocate_available_register(f); | ||
843 | spe_fs(f, factor[i], tmp, dst[i]); | ||
844 | } | ||
845 | } | ||
846 | |||
847 | spe_release_register(f, tmp); | ||
848 | break; | ||
849 | |||
850 | case PIPE_BLENDFACTOR_SRC1_COLOR: | ||
851 | case PIPE_BLENDFACTOR_SRC1_ALPHA: | ||
852 | case PIPE_BLENDFACTOR_INV_SRC1_COLOR: | ||
853 | case PIPE_BLENDFACTOR_INV_SRC1_ALPHA: | ||
854 | default: | ||
855 | assert(0); | ||
856 | } | ||
857 | } | ||
858 | |||
859 | |||
860 | static void | ||
861 | emit_blend_calculation(struct spe_function *f, | ||
862 | unsigned func, unsigned sF, unsigned dF, | ||
863 | int src, int src_factor, int dst, int dst_factor) | ||
864 | { | ||
865 | int tmp = spe_allocate_available_register(f); | ||
866 | |||
867 | switch (func) { | ||
868 | case PIPE_BLEND_ADD: | ||
869 | if (sF == PIPE_BLENDFACTOR_ONE) { | ||
870 | if (dF == PIPE_BLENDFACTOR_ZERO) { | ||
871 | /* Do nothing. */ | ||
872 | } else if (dF == PIPE_BLENDFACTOR_ONE) { | ||
873 | spe_fa(f, src, src, dst); | ||
874 | } | ||
875 | } else if (sF == PIPE_BLENDFACTOR_ZERO) { | ||
876 | if (dF == PIPE_BLENDFACTOR_ZERO) { | ||
877 | spe_il(f, src, 0); | ||
878 | } else if (dF == PIPE_BLENDFACTOR_ONE) { | ||
879 | spe_or(f, src, dst, dst); | ||
880 | } else { | ||
881 | spe_fm(f, src, dst, dst_factor); | ||
882 | } | ||
883 | } else if (dF == PIPE_BLENDFACTOR_ZERO) { | ||
884 | spe_fm(f, src, src, src_factor); | ||
885 | } else { | ||
886 | spe_fm(f, tmp, dst, dst_factor); | ||
887 | spe_fma(f, src, src, src_factor, tmp); | ||
888 | } | ||
889 | break; | ||
890 | |||
891 | case PIPE_BLEND_SUBTRACT: | ||
892 | if (sF == PIPE_BLENDFACTOR_ONE) { | ||
893 | if (dF == PIPE_BLENDFACTOR_ZERO) { | ||
894 | /* Do nothing. */ | ||
895 | } else if (dF == PIPE_BLENDFACTOR_ONE) { | ||
896 | spe_fs(f, src, src, dst); | ||
897 | } | ||
898 | } else if (sF == PIPE_BLENDFACTOR_ZERO) { | ||
899 | if (dF == PIPE_BLENDFACTOR_ZERO) { | ||
900 | spe_il(f, src, 0); | ||
901 | } else if (dF == PIPE_BLENDFACTOR_ONE) { | ||
902 | spe_il(f, tmp, 0); | ||
903 | spe_fs(f, src, tmp, dst); | ||
904 | } else { | ||
905 | spe_fm(f, src, dst, dst_factor); | ||
906 | } | ||
907 | } else if (dF == PIPE_BLENDFACTOR_ZERO) { | ||
908 | spe_fm(f, src, src, src_factor); | ||
909 | } else { | ||
910 | spe_fm(f, tmp, dst, dst_factor); | ||
911 | spe_fms(f, src, src, src_factor, tmp); | ||
912 | } | ||
913 | break; | ||
914 | |||
915 | case PIPE_BLEND_REVERSE_SUBTRACT: | ||
916 | if (sF == PIPE_BLENDFACTOR_ONE) { | ||
917 | if (dF == PIPE_BLENDFACTOR_ZERO) { | ||
918 | spe_il(f, tmp, 0); | ||
919 | spe_fs(f, src, tmp, src); | ||
920 | } else if (dF == PIPE_BLENDFACTOR_ONE) { | ||
921 | spe_fs(f, src, dst, src); | ||
922 | } | ||
923 | } else if (sF == PIPE_BLENDFACTOR_ZERO) { | ||
924 | if (dF == PIPE_BLENDFACTOR_ZERO) { | ||
925 | spe_il(f, src, 0); | ||
926 | } else if (dF == PIPE_BLENDFACTOR_ONE) { | ||
927 | spe_or(f, src, dst, dst); | ||
928 | } else { | ||
929 | spe_fm(f, src, dst, dst_factor); | ||
930 | } | ||
931 | } else if (dF == PIPE_BLENDFACTOR_ZERO) { | ||
932 | spe_fm(f, src, src, src_factor); | ||
933 | } else { | ||
934 | spe_fm(f, tmp, src, src_factor); | ||
935 | spe_fms(f, src, src, dst_factor, tmp); | ||
936 | } | ||
937 | break; | ||
938 | |||
939 | case PIPE_BLEND_MIN: | ||
940 | spe_cgt(f, tmp, src, dst); | ||
941 | spe_selb(f, src, src, dst, tmp); | ||
942 | break; | ||
943 | |||
944 | case PIPE_BLEND_MAX: | ||
945 | spe_cgt(f, tmp, src, dst); | ||
946 | spe_selb(f, src, dst, src, tmp); | ||
947 | break; | ||
948 | |||
949 | default: | ||
950 | assert(0); | ||
951 | } | ||
952 | |||
953 | spe_release_register(f, tmp); | ||
954 | } | ||
955 | |||
956 | |||
957 | /** | ||
958 | * Generate code to perform alpha blending on the SPE | ||
959 | */ | ||
960 | void | ||
961 | cell_generate_alpha_blend(struct cell_blend_state *cb) | ||
962 | { | ||
963 | struct pipe_blend_state *const b = &cb->base; | ||
964 | struct spe_function *const f = &cb->code; | ||
965 | |||
966 | /* This code generates a maximum of 3 (source alpha factor) | ||
967 | * + 3 (destination alpha factor) + (3 * 6) (source color factor) | ||
968 | * + (3 * 6) (destination color factor) + (4 * 2) (blend equation) | ||
969 | * + 4 (fragment mask) + 1 (return) = 55 instlructions. Round up to 64 to | ||
970 | * make it a happy power-of-two. | ||
971 | */ | ||
972 | spe_init_func(f, SPE_INST_SIZE * 64); | ||
973 | |||
974 | |||
975 | const int frag[4] = { | ||
976 | spe_allocate_register(f, 3), | ||
977 | spe_allocate_register(f, 4), | ||
978 | spe_allocate_register(f, 5), | ||
979 | spe_allocate_register(f, 6), | ||
980 | }; | ||
981 | const int pixel[4] = { | ||
982 | spe_allocate_register(f, 7), | ||
983 | spe_allocate_register(f, 8), | ||
984 | spe_allocate_register(f, 9), | ||
985 | spe_allocate_register(f, 10), | ||
986 | }; | ||
987 | const int const_color[4] = { | ||
988 | spe_allocate_register(f, 11), | ||
989 | spe_allocate_register(f, 12), | ||
990 | spe_allocate_register(f, 13), | ||
991 | spe_allocate_register(f, 14), | ||
992 | }; | ||
993 | unsigned func[4]; | ||
994 | unsigned sF[4]; | ||
995 | unsigned dF[4]; | ||
996 | unsigned i; | ||
997 | int src_factor[4]; | ||
998 | int dst_factor[4]; | ||
999 | |||
1000 | |||
1001 | /* Does the selected blend mode make use of the source / destination | ||
1002 | * color (RGB) blend factors? | ||
1003 | */ | ||
1004 | boolean need_color_factor = b->rt[0].blend_enable | ||
1005 | && (b->rt[0].rgb_func != PIPE_BLEND_MIN) | ||
1006 | && (b->rt[0].rgb_func != PIPE_BLEND_MAX); | ||
1007 | |||
1008 | /* Does the selected blend mode make use of the source / destination | ||
1009 | * alpha blend factors? | ||
1010 | */ | ||
1011 | boolean need_alpha_factor = b->rt[0].blend_enable | ||
1012 | && (b->rt[0].alpha_func != PIPE_BLEND_MIN) | ||
1013 | && (b->rt[0].alpha_func != PIPE_BLEND_MAX); | ||
1014 | |||
1015 | |||
1016 | if (b->rt[0].blend_enable) { | ||
1017 | sF[0] = b->rt[0].rgb_src_factor; | ||
1018 | sF[1] = sF[0]; | ||
1019 | sF[2] = sF[0]; | ||
1020 | switch (b->rt[0].alpha_src_factor & 0x0f) { | ||
1021 | case PIPE_BLENDFACTOR_SRC_ALPHA_SATURATE: | ||
1022 | sF[3] = PIPE_BLENDFACTOR_ONE; | ||
1023 | break; | ||
1024 | case PIPE_BLENDFACTOR_SRC_COLOR: | ||
1025 | case PIPE_BLENDFACTOR_DST_COLOR: | ||
1026 | case PIPE_BLENDFACTOR_CONST_COLOR: | ||
1027 | case PIPE_BLENDFACTOR_SRC1_COLOR: | ||
1028 | sF[3] = b->rt[0].alpha_src_factor + 1; | ||
1029 | break; | ||
1030 | default: | ||
1031 | sF[3] = b->rt[0].alpha_src_factor; | ||
1032 | } | ||
1033 | |||
1034 | dF[0] = b->rt[0].rgb_dst_factor; | ||
1035 | dF[1] = dF[0]; | ||
1036 | dF[2] = dF[0]; | ||
1037 | switch (b->rt[0].alpha_dst_factor & 0x0f) { | ||
1038 | case PIPE_BLENDFACTOR_SRC_COLOR: | ||
1039 | case PIPE_BLENDFACTOR_DST_COLOR: | ||
1040 | case PIPE_BLENDFACTOR_CONST_COLOR: | ||
1041 | case PIPE_BLENDFACTOR_SRC1_COLOR: | ||
1042 | dF[3] = b->rt[0].alpha_dst_factor + 1; | ||
1043 | break; | ||
1044 | default: | ||
1045 | dF[3] = b->rt[0].alpha_dst_factor; | ||
1046 | } | ||
1047 | |||
1048 | func[0] = b->rt[0].rgb_func; | ||
1049 | func[1] = func[0]; | ||
1050 | func[2] = func[0]; | ||
1051 | func[3] = b->rt[0].alpha_func; | ||
1052 | } else { | ||
1053 | sF[0] = PIPE_BLENDFACTOR_ONE; | ||
1054 | sF[1] = PIPE_BLENDFACTOR_ONE; | ||
1055 | sF[2] = PIPE_BLENDFACTOR_ONE; | ||
1056 | sF[3] = PIPE_BLENDFACTOR_ONE; | ||
1057 | dF[0] = PIPE_BLENDFACTOR_ZERO; | ||
1058 | dF[1] = PIPE_BLENDFACTOR_ZERO; | ||
1059 | dF[2] = PIPE_BLENDFACTOR_ZERO; | ||
1060 | dF[3] = PIPE_BLENDFACTOR_ZERO; | ||
1061 | |||
1062 | func[0] = PIPE_BLEND_ADD; | ||
1063 | func[1] = PIPE_BLEND_ADD; | ||
1064 | func[2] = PIPE_BLEND_ADD; | ||
1065 | func[3] = PIPE_BLEND_ADD; | ||
1066 | } | ||
1067 | |||
1068 | |||
1069 | /* If alpha writing is enabled and the alpha blend mode requires use of | ||
1070 | * the alpha factor, calculate the alpha factor. | ||
1071 | */ | ||
1072 | if (((b->rt[0].colormask & 8) != 0) && need_alpha_factor) { | ||
1073 | src_factor[3] = emit_alpha_factor_calculation(f, sF[3], const_color[3], | ||
1074 | frag[3], pixel[3]); | ||
1075 | |||
1076 | /* If the alpha destination blend factor is the same as the alpha source | ||
1077 | * blend factor, re-use the previously calculated value. | ||
1078 | */ | ||
1079 | dst_factor[3] = (dF[3] == sF[3]) | ||
1080 | ? src_factor[3] | ||
1081 | : emit_alpha_factor_calculation(f, dF[3], const_color[3], | ||
1082 | frag[3], pixel[3]); | ||
1083 | } | ||
1084 | |||
1085 | |||
1086 | if (sF[0] == sF[3]) { | ||
1087 | src_factor[0] = src_factor[3]; | ||
1088 | src_factor[1] = src_factor[3]; | ||
1089 | src_factor[2] = src_factor[3]; | ||
1090 | } else if (sF[0] == dF[3]) { | ||
1091 | src_factor[0] = dst_factor[3]; | ||
1092 | src_factor[1] = dst_factor[3]; | ||
1093 | src_factor[2] = dst_factor[3]; | ||
1094 | } else if (need_color_factor) { | ||
1095 | emit_color_factor_calculation(f, | ||
1096 | b->rt[0].rgb_src_factor, | ||
1097 | b->rt[0].colormask, | ||
1098 | frag, pixel, const_color, src_factor); | ||
1099 | } | ||
1100 | |||
1101 | |||
1102 | if (dF[0] == sF[3]) { | ||
1103 | dst_factor[0] = src_factor[3]; | ||
1104 | dst_factor[1] = src_factor[3]; | ||
1105 | dst_factor[2] = src_factor[3]; | ||
1106 | } else if (dF[0] == dF[3]) { | ||
1107 | dst_factor[0] = dst_factor[3]; | ||
1108 | dst_factor[1] = dst_factor[3]; | ||
1109 | dst_factor[2] = dst_factor[3]; | ||
1110 | } else if (dF[0] == sF[0]) { | ||
1111 | dst_factor[0] = src_factor[0]; | ||
1112 | dst_factor[1] = src_factor[1]; | ||
1113 | dst_factor[2] = src_factor[2]; | ||
1114 | } else if (need_color_factor) { | ||
1115 | emit_color_factor_calculation(f, | ||
1116 | b->rt[0].rgb_dst_factor, | ||
1117 | b->rt[0].colormask, | ||
1118 | frag, pixel, const_color, dst_factor); | ||
1119 | } | ||
1120 | |||
1121 | |||
1122 | |||
1123 | for (i = 0; i < 4; ++i) { | ||
1124 | if ((b->rt[0].colormask & (1U << i)) != 0) { | ||
1125 | emit_blend_calculation(f, | ||
1126 | func[i], sF[i], dF[i], | ||
1127 | frag[i], src_factor[i], | ||
1128 | pixel[i], dst_factor[i]); | ||
1129 | } | ||
1130 | } | ||
1131 | |||
1132 | spe_bi(f, 0, 0, 0); | ||
1133 | |||
1134 | #if 0 | ||
1135 | { | ||
1136 | const uint32_t *p = f->store; | ||
1137 | |||
1138 | printf("# %u instructions\n", f->csr - f->store); | ||
1139 | printf("# blend (%sabled)\n", | ||
1140 | (cb->base.blend_enable) ? "en" : "dis"); | ||
1141 | printf("# RGB func / sf / df: %u %u %u\n", | ||
1142 | cb->base.rgb_func, | ||
1143 | cb->base.rgb_src_factor, | ||
1144 | cb->base.rgb_dst_factor); | ||
1145 | printf("# ALP func / sf / df: %u %u %u\n", | ||
1146 | cb->base.alpha_func, | ||
1147 | cb->base.alpha_src_factor, | ||
1148 | cb->base.alpha_dst_factor); | ||
1149 | |||
1150 | printf("\t.text\n"); | ||
1151 | for (/* empty */; p < f->csr; p++) { | ||
1152 | printf("\t.long\t0x%04x\n", *p); | ||
1153 | } | ||
1154 | fflush(stdout); | ||
1155 | } | ||
1156 | #endif | ||
1157 | } | ||
1158 | |||
1159 | |||
1160 | static int | ||
1161 | PC_OFFSET(const struct spe_function *f, const void *d) | ||
1162 | { | ||
1163 | const intptr_t pc = (intptr_t) &f->store[f->num_inst]; | ||
1164 | const intptr_t ea = ~0x0f & (intptr_t) d; | ||
1165 | |||
1166 | return (ea - pc) >> 2; | ||
1167 | } | ||
1168 | |||
1169 | |||
1170 | /** | ||
1171 | * Generate code to perform color conversion and logic op | ||
1172 | * | ||
1173 | * \bug | ||
1174 | * The code generated by this function should also perform dithering. | ||
1175 | * | ||
1176 | * \bug | ||
1177 | * The code generated by this function should also perform color-write | ||
1178 | * masking. | ||
1179 | * | ||
1180 | * \bug | ||
1181 | * Only two framebuffer formats are supported at this time. | ||
1182 | */ | ||
1183 | void | ||
1184 | cell_generate_logic_op(struct spe_function *f, | ||
1185 | const struct pipe_blend_state *blend, | ||
1186 | struct pipe_surface *surf) | ||
1187 | { | ||
1188 | const unsigned logic_op = (blend->logicop_enable) | ||
1189 | ? blend->logicop_func : PIPE_LOGICOP_COPY; | ||
1190 | |||
1191 | /* This code generates a maximum of 37 instructions. An additional 32 | ||
1192 | * bytes (equiv. to 8 instructions) are needed for data storage. Round up | ||
1193 | * to 64 to make it a happy power-of-two. | ||
1194 | */ | ||
1195 | spe_init_func(f, SPE_INST_SIZE * 64); | ||
1196 | |||
1197 | |||
1198 | /* Pixel colors in framebuffer format in AoS layout. | ||
1199 | */ | ||
1200 | const int pixel[4] = { | ||
1201 | spe_allocate_register(f, 3), | ||
1202 | spe_allocate_register(f, 4), | ||
1203 | spe_allocate_register(f, 5), | ||
1204 | spe_allocate_register(f, 6), | ||
1205 | }; | ||
1206 | |||
1207 | /* Fragment colors stored as floats in SoA layout. | ||
1208 | */ | ||
1209 | const int frag[4] = { | ||
1210 | spe_allocate_register(f, 7), | ||
1211 | spe_allocate_register(f, 8), | ||
1212 | spe_allocate_register(f, 9), | ||
1213 | spe_allocate_register(f, 10), | ||
1214 | }; | ||
1215 | |||
1216 | const int mask = spe_allocate_register(f, 11); | ||
1217 | |||
1218 | |||
1219 | /* Short-circuit the noop and invert cases. | ||
1220 | */ | ||
1221 | if ((logic_op == PIPE_LOGICOP_NOOP) || (blend->rt[0].colormask == 0)) { | ||
1222 | spe_bi(f, 0, 0, 0); | ||
1223 | return; | ||
1224 | } else if (logic_op == PIPE_LOGICOP_INVERT) { | ||
1225 | spe_nor(f, pixel[0], pixel[0], pixel[0]); | ||
1226 | spe_nor(f, pixel[1], pixel[1], pixel[1]); | ||
1227 | spe_nor(f, pixel[2], pixel[2], pixel[2]); | ||
1228 | spe_nor(f, pixel[3], pixel[3], pixel[3]); | ||
1229 | spe_bi(f, 0, 0, 0); | ||
1230 | return; | ||
1231 | } | ||
1232 | |||
1233 | |||
1234 | const int tmp[4] = { | ||
1235 | spe_allocate_available_register(f), | ||
1236 | spe_allocate_available_register(f), | ||
1237 | spe_allocate_available_register(f), | ||
1238 | spe_allocate_available_register(f), | ||
1239 | }; | ||
1240 | |||
1241 | const int shuf_xpose_hi = spe_allocate_available_register(f); | ||
1242 | const int shuf_xpose_lo = spe_allocate_available_register(f); | ||
1243 | const int shuf_color = spe_allocate_available_register(f); | ||
1244 | |||
1245 | |||
1246 | /* Pointer to the begining of the function's private data area. | ||
1247 | */ | ||
1248 | uint32_t *const data = ((uint32_t *) f->store) + (64 - 8); | ||
1249 | |||
1250 | |||
1251 | /* Convert fragment colors to framebuffer format in AoS layout. | ||
1252 | */ | ||
1253 | switch (surf->format) { | ||
1254 | case PIPE_FORMAT_B8G8R8A8_UNORM: | ||
1255 | data[0] = 0x00010203; | ||
1256 | data[1] = 0x10111213; | ||
1257 | data[2] = 0x04050607; | ||
1258 | data[3] = 0x14151617; | ||
1259 | data[4] = 0x0c000408; | ||
1260 | data[5] = 0x80808080; | ||
1261 | data[6] = 0x80808080; | ||
1262 | data[7] = 0x80808080; | ||
1263 | break; | ||
1264 | case PIPE_FORMAT_A8R8G8B8_UNORM: | ||
1265 | data[0] = 0x03020100; | ||
1266 | data[1] = 0x13121110; | ||
1267 | data[2] = 0x07060504; | ||
1268 | data[3] = 0x17161514; | ||
1269 | data[4] = 0x0804000c; | ||
1270 | data[5] = 0x80808080; | ||
1271 | data[6] = 0x80808080; | ||
1272 | data[7] = 0x80808080; | ||
1273 | break; | ||
1274 | default: | ||
1275 | fprintf(stderr, "CELL: Bad pixel format in cell_generate_logic_op()"); | ||
1276 | ASSERT(0); | ||
1277 | } | ||
1278 | |||
1279 | spe_ilh(f, tmp[0], 0x0808); | ||
1280 | spe_lqr(f, shuf_xpose_hi, PC_OFFSET(f, data+0)); | ||
1281 | spe_lqr(f, shuf_color, PC_OFFSET(f, data+4)); | ||
1282 | spe_a(f, shuf_xpose_lo, shuf_xpose_hi, tmp[0]); | ||
1283 | |||
1284 | spe_shufb(f, tmp[0], frag[0], frag[2], shuf_xpose_hi); | ||
1285 | spe_shufb(f, tmp[1], frag[0], frag[2], shuf_xpose_lo); | ||
1286 | spe_shufb(f, tmp[2], frag[1], frag[3], shuf_xpose_hi); | ||
1287 | spe_shufb(f, tmp[3], frag[1], frag[3], shuf_xpose_lo); | ||
1288 | |||
1289 | spe_shufb(f, frag[0], tmp[0], tmp[2], shuf_xpose_hi); | ||
1290 | spe_shufb(f, frag[1], tmp[0], tmp[2], shuf_xpose_lo); | ||
1291 | spe_shufb(f, frag[2], tmp[1], tmp[3], shuf_xpose_hi); | ||
1292 | spe_shufb(f, frag[3], tmp[1], tmp[3], shuf_xpose_lo); | ||
1293 | |||
1294 | spe_cfltu(f, frag[0], frag[0], 32); | ||
1295 | spe_cfltu(f, frag[1], frag[1], 32); | ||
1296 | spe_cfltu(f, frag[2], frag[2], 32); | ||
1297 | spe_cfltu(f, frag[3], frag[3], 32); | ||
1298 | |||
1299 | spe_shufb(f, frag[0], frag[0], pixel[0], shuf_color); | ||
1300 | spe_shufb(f, frag[1], frag[1], pixel[1], shuf_color); | ||
1301 | spe_shufb(f, frag[2], frag[2], pixel[2], shuf_color); | ||
1302 | spe_shufb(f, frag[3], frag[3], pixel[3], shuf_color); | ||
1303 | |||
1304 | |||
1305 | /* If logic op is enabled, perform the requested logical operation on the | ||
1306 | * converted fragment colors and the pixel colors. | ||
1307 | */ | ||
1308 | switch (logic_op) { | ||
1309 | case PIPE_LOGICOP_CLEAR: | ||
1310 | spe_il(f, frag[0], 0); | ||
1311 | spe_il(f, frag[1], 0); | ||
1312 | spe_il(f, frag[2], 0); | ||
1313 | spe_il(f, frag[3], 0); | ||
1314 | break; | ||
1315 | case PIPE_LOGICOP_NOR: | ||
1316 | spe_nor(f, frag[0], frag[0], pixel[0]); | ||
1317 | spe_nor(f, frag[1], frag[1], pixel[1]); | ||
1318 | spe_nor(f, frag[2], frag[2], pixel[2]); | ||
1319 | spe_nor(f, frag[3], frag[3], pixel[3]); | ||
1320 | break; | ||
1321 | case PIPE_LOGICOP_AND_INVERTED: | ||
1322 | spe_andc(f, frag[0], pixel[0], frag[0]); | ||
1323 | spe_andc(f, frag[1], pixel[1], frag[1]); | ||
1324 | spe_andc(f, frag[2], pixel[2], frag[2]); | ||
1325 | spe_andc(f, frag[3], pixel[3], frag[3]); | ||
1326 | break; | ||
1327 | case PIPE_LOGICOP_COPY_INVERTED: | ||
1328 | spe_nor(f, frag[0], frag[0], frag[0]); | ||
1329 | spe_nor(f, frag[1], frag[1], frag[1]); | ||
1330 | spe_nor(f, frag[2], frag[2], frag[2]); | ||
1331 | spe_nor(f, frag[3], frag[3], frag[3]); | ||
1332 | break; | ||
1333 | case PIPE_LOGICOP_AND_REVERSE: | ||
1334 | spe_andc(f, frag[0], frag[0], pixel[0]); | ||
1335 | spe_andc(f, frag[1], frag[1], pixel[1]); | ||
1336 | spe_andc(f, frag[2], frag[2], pixel[2]); | ||
1337 | spe_andc(f, frag[3], frag[3], pixel[3]); | ||
1338 | break; | ||
1339 | case PIPE_LOGICOP_XOR: | ||
1340 | spe_xor(f, frag[0], frag[0], pixel[0]); | ||
1341 | spe_xor(f, frag[1], frag[1], pixel[1]); | ||
1342 | spe_xor(f, frag[2], frag[2], pixel[2]); | ||
1343 | spe_xor(f, frag[3], frag[3], pixel[3]); | ||
1344 | break; | ||
1345 | case PIPE_LOGICOP_NAND: | ||
1346 | spe_nand(f, frag[0], frag[0], pixel[0]); | ||
1347 | spe_nand(f, frag[1], frag[1], pixel[1]); | ||
1348 | spe_nand(f, frag[2], frag[2], pixel[2]); | ||
1349 | spe_nand(f, frag[3], frag[3], pixel[3]); | ||
1350 | break; | ||
1351 | case PIPE_LOGICOP_AND: | ||
1352 | spe_and(f, frag[0], frag[0], pixel[0]); | ||
1353 | spe_and(f, frag[1], frag[1], pixel[1]); | ||
1354 | spe_and(f, frag[2], frag[2], pixel[2]); | ||
1355 | spe_and(f, frag[3], frag[3], pixel[3]); | ||
1356 | break; | ||
1357 | case PIPE_LOGICOP_EQUIV: | ||
1358 | spe_eqv(f, frag[0], frag[0], pixel[0]); | ||
1359 | spe_eqv(f, frag[1], frag[1], pixel[1]); | ||
1360 | spe_eqv(f, frag[2], frag[2], pixel[2]); | ||
1361 | spe_eqv(f, frag[3], frag[3], pixel[3]); | ||
1362 | break; | ||
1363 | case PIPE_LOGICOP_OR_INVERTED: | ||
1364 | spe_orc(f, frag[0], pixel[0], frag[0]); | ||
1365 | spe_orc(f, frag[1], pixel[1], frag[1]); | ||
1366 | spe_orc(f, frag[2], pixel[2], frag[2]); | ||
1367 | spe_orc(f, frag[3], pixel[3], frag[3]); | ||
1368 | break; | ||
1369 | case PIPE_LOGICOP_COPY: | ||
1370 | break; | ||
1371 | case PIPE_LOGICOP_OR_REVERSE: | ||
1372 | spe_orc(f, frag[0], frag[0], pixel[0]); | ||
1373 | spe_orc(f, frag[1], frag[1], pixel[1]); | ||
1374 | spe_orc(f, frag[2], frag[2], pixel[2]); | ||
1375 | spe_orc(f, frag[3], frag[3], pixel[3]); | ||
1376 | break; | ||
1377 | case PIPE_LOGICOP_OR: | ||
1378 | spe_or(f, frag[0], frag[0], pixel[0]); | ||
1379 | spe_or(f, frag[1], frag[1], pixel[1]); | ||
1380 | spe_or(f, frag[2], frag[2], pixel[2]); | ||
1381 | spe_or(f, frag[3], frag[3], pixel[3]); | ||
1382 | break; | ||
1383 | case PIPE_LOGICOP_SET: | ||
1384 | spe_il(f, frag[0], ~0); | ||
1385 | spe_il(f, frag[1], ~0); | ||
1386 | spe_il(f, frag[2], ~0); | ||
1387 | spe_il(f, frag[3], ~0); | ||
1388 | break; | ||
1389 | |||
1390 | /* These two cases are short-circuited above. | ||
1391 | */ | ||
1392 | case PIPE_LOGICOP_INVERT: | ||
1393 | case PIPE_LOGICOP_NOOP: | ||
1394 | default: | ||
1395 | assert(0); | ||
1396 | } | ||
1397 | |||
1398 | |||
1399 | /* Apply fragment mask. | ||
1400 | */ | ||
1401 | spe_ilh(f, tmp[0], 0x0000); | ||
1402 | spe_ilh(f, tmp[1], 0x0404); | ||
1403 | spe_ilh(f, tmp[2], 0x0808); | ||
1404 | spe_ilh(f, tmp[3], 0x0c0c); | ||
1405 | |||
1406 | spe_shufb(f, tmp[0], mask, mask, tmp[0]); | ||
1407 | spe_shufb(f, tmp[1], mask, mask, tmp[1]); | ||
1408 | spe_shufb(f, tmp[2], mask, mask, tmp[2]); | ||
1409 | spe_shufb(f, tmp[3], mask, mask, tmp[3]); | ||
1410 | |||
1411 | spe_selb(f, pixel[0], pixel[0], frag[0], tmp[0]); | ||
1412 | spe_selb(f, pixel[1], pixel[1], frag[1], tmp[1]); | ||
1413 | spe_selb(f, pixel[2], pixel[2], frag[2], tmp[2]); | ||
1414 | spe_selb(f, pixel[3], pixel[3], frag[3], tmp[3]); | ||
1415 | |||
1416 | spe_bi(f, 0, 0, 0); | ||
1417 | |||
1418 | #if 0 | ||
1419 | { | ||
1420 | const uint32_t *p = f->store; | ||
1421 | unsigned i; | ||
1422 | |||
1423 | printf("# %u instructions\n", f->csr - f->store); | ||
1424 | |||
1425 | printf("\t.text\n"); | ||
1426 | for (i = 0; i < 64; i++) { | ||
1427 | printf("\t.long\t0x%04x\n", p[i]); | ||
1428 | } | ||
1429 | fflush(stdout); | ||
1430 | } | ||
1431 | #endif | ||
1432 | } | ||
diff --git a/src/gallium/drivers/cell/ppu/cell_state_per_fragment.h b/src/gallium/drivers/cell/ppu/cell_state_per_fragment.h deleted file mode 100644 index a8267a51331..00000000000 --- a/src/gallium/drivers/cell/ppu/cell_state_per_fragment.h +++ /dev/null | |||
@@ -1,39 +0,0 @@ | |||
1 | /* | ||
2 | * (C) Copyright IBM Corporation 2008 | ||
3 | * All Rights Reserved. | ||
4 | * | ||
5 | * Permission is hereby granted, free of charge, to any person obtaining a | ||
6 | * copy of this software and associated documentation files (the "Software"), | ||
7 | * to deal in the Software without restriction, including without limitation | ||
8 | * on the rights to use, copy, modify, merge, publish, distribute, sub | ||
9 | * license, and/or sell copies of the Software, and to permit persons to whom | ||
10 | * the Software is furnished to do so, subject to the following conditions: | ||
11 | * | ||
12 | * The above copyright notice and this permission notice (including the next | ||
13 | * paragraph) shall be included in all copies or substantial portions of the | ||
14 | * Software. | ||
15 | * | ||
16 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR | ||
17 | * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, | ||
18 | * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL | ||
19 | * AUTHORS, COPYRIGHT HOLDERS, AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM, | ||
20 | * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR | ||
21 | * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE | ||
22 | * USE OR OTHER DEALINGS IN THE SOFTWARE. | ||
23 | */ | ||
24 | |||
25 | #ifndef CELL_STATE_PER_FRAGMENT_H | ||
26 | #define CELL_STATE_PER_FRAGMENT_H | ||
27 | |||
28 | extern void | ||
29 | cell_generate_depth_stencil_test(struct cell_depth_stencil_alpha_state *cdsa); | ||
30 | |||
31 | extern void | ||
32 | cell_generate_alpha_blend(struct cell_blend_state *cb); | ||
33 | |||
34 | extern void | ||
35 | cell_generate_logic_op(struct spe_function *f, | ||
36 | const struct pipe_blend_state *blend, | ||
37 | struct pipe_surface *surf); | ||
38 | |||
39 | #endif /* CELL_STATE_PER_FRAGMENT_H */ | ||
diff --git a/src/gallium/drivers/cell/ppu/cell_state_shader.c b/src/gallium/drivers/cell/ppu/cell_state_shader.c deleted file mode 100644 index ddf14772689..00000000000 --- a/src/gallium/drivers/cell/ppu/cell_state_shader.c +++ /dev/null | |||
@@ -1,229 +0,0 @@ | |||
1 | /************************************************************************** | ||
2 | * | ||
3 | * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas. | ||
4 | * All Rights Reserved. | ||
5 | * | ||
6 | * Permission is hereby granted, free of charge, to any person obtaining a | ||
7 | * copy of this software and associated documentation files (the | ||
8 | * "Software"), to deal in the Software without restriction, including | ||
9 | * without limitation the rights to use, copy, modify, merge, publish, | ||
10 | * distribute, sub license, and/or sell copies of the Software, and to | ||
11 | * permit persons to whom the Software is furnished to do so, subject to | ||
12 | * the following conditions: | ||
13 | * | ||
14 | * The above copyright notice and this permission notice (including the | ||
15 | * next paragraph) shall be included in all copies or substantial portions | ||
16 | * of the Software. | ||
17 | * | ||
18 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS | ||
19 | * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF | ||
20 | * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. | ||
21 | * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR | ||
22 | * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, | ||
23 | * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE | ||
24 | * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. | ||
25 | * | ||
26 | **************************************************************************/ | ||
27 | |||
28 | #include "pipe/p_defines.h" | ||
29 | #include "util/u_memory.h" | ||
30 | #include "util/u_inlines.h" | ||
31 | #include "draw/draw_context.h" | ||
32 | #include "tgsi/tgsi_parse.h" | ||
33 | |||
34 | #include "cell_context.h" | ||
35 | #include "cell_state.h" | ||
36 | #include "cell_gen_fp.h" | ||
37 | #include "cell_texture.h" | ||
38 | |||
39 | |||
40 | /** cast wrapper */ | ||
41 | static INLINE struct cell_fragment_shader_state * | ||
42 | cell_fragment_shader_state(void *shader) | ||
43 | { | ||
44 | return (struct cell_fragment_shader_state *) shader; | ||
45 | } | ||
46 | |||
47 | |||
48 | /** cast wrapper */ | ||
49 | static INLINE struct cell_vertex_shader_state * | ||
50 | cell_vertex_shader_state(void *shader) | ||
51 | { | ||
52 | return (struct cell_vertex_shader_state *) shader; | ||
53 | } | ||
54 | |||
55 | |||
56 | /** | ||
57 | * Create fragment shader state. | ||
58 | * Called via pipe->create_fs_state() | ||
59 | */ | ||
60 | static void * | ||
61 | cell_create_fs_state(struct pipe_context *pipe, | ||
62 | const struct pipe_shader_state *templ) | ||
63 | { | ||
64 | struct cell_context *cell = cell_context(pipe); | ||
65 | struct cell_fragment_shader_state *cfs; | ||
66 | |||
67 | cfs = CALLOC_STRUCT(cell_fragment_shader_state); | ||
68 | if (!cfs) | ||
69 | return NULL; | ||
70 | |||
71 | cfs->shader.tokens = tgsi_dup_tokens(templ->tokens); | ||
72 | if (!cfs->shader.tokens) { | ||
73 | FREE(cfs); | ||
74 | return NULL; | ||
75 | } | ||
76 | |||
77 | tgsi_scan_shader(templ->tokens, &cfs->info); | ||
78 | |||
79 | cell_gen_fragment_program(cell, cfs->shader.tokens, &cfs->code); | ||
80 | |||
81 | return cfs; | ||
82 | } | ||
83 | |||
84 | |||
85 | /** | ||
86 | * Called via pipe->bind_fs_state() | ||
87 | */ | ||
88 | static void | ||
89 | cell_bind_fs_state(struct pipe_context *pipe, void *fs) | ||
90 | { | ||
91 | struct cell_context *cell = cell_context(pipe); | ||
92 | |||
93 | cell->fs = cell_fragment_shader_state(fs); | ||
94 | |||
95 | cell->dirty |= CELL_NEW_FS; | ||
96 | } | ||
97 | |||
98 | |||
99 | /** | ||
100 | * Called via pipe->delete_fs_state() | ||
101 | */ | ||
102 | static void | ||
103 | cell_delete_fs_state(struct pipe_context *pipe, void *fs) | ||
104 | { | ||
105 | struct cell_fragment_shader_state *cfs = cell_fragment_shader_state(fs); | ||
106 | |||
107 | spe_release_func(&cfs->code); | ||
108 | |||
109 | FREE((void *) cfs->shader.tokens); | ||
110 | FREE(cfs); | ||
111 | } | ||
112 | |||
113 | |||
114 | /** | ||
115 | * Create vertex shader state. | ||
116 | * Called via pipe->create_vs_state() | ||
117 | */ | ||
118 | static void * | ||
119 | cell_create_vs_state(struct pipe_context *pipe, | ||
120 | const struct pipe_shader_state *templ) | ||
121 | { | ||
122 | struct cell_context *cell = cell_context(pipe); | ||
123 | struct cell_vertex_shader_state *cvs; | ||
124 | |||
125 | cvs = CALLOC_STRUCT(cell_vertex_shader_state); | ||
126 | if (!cvs) | ||
127 | return NULL; | ||
128 | |||
129 | cvs->shader.tokens = tgsi_dup_tokens(templ->tokens); | ||
130 | if (!cvs->shader.tokens) { | ||
131 | FREE(cvs); | ||
132 | return NULL; | ||
133 | } | ||
134 | |||
135 | tgsi_scan_shader(templ->tokens, &cvs->info); | ||
136 | |||
137 | cvs->draw_data = draw_create_vertex_shader(cell->draw, &cvs->shader); | ||
138 | if (cvs->draw_data == NULL) { | ||
139 | FREE( (void *) cvs->shader.tokens ); | ||
140 | FREE( cvs ); | ||
141 | return NULL; | ||
142 | } | ||
143 | |||
144 | return cvs; | ||
145 | } | ||
146 | |||
147 | |||
148 | /** | ||
149 | * Called via pipe->bind_vs_state() | ||
150 | */ | ||
151 | static void | ||
152 | cell_bind_vs_state(struct pipe_context *pipe, void *vs) | ||
153 | { | ||
154 | struct cell_context *cell = cell_context(pipe); | ||
155 | |||
156 | cell->vs = cell_vertex_shader_state(vs); | ||
157 | |||
158 | draw_bind_vertex_shader(cell->draw, | ||
159 | (cell->vs ? cell->vs->draw_data : NULL)); | ||
160 | |||
161 | cell->dirty |= CELL_NEW_VS; | ||
162 | } | ||
163 | |||
164 | |||
165 | /** | ||
166 | * Called via pipe->delete_vs_state() | ||
167 | */ | ||
168 | static void | ||
169 | cell_delete_vs_state(struct pipe_context *pipe, void *vs) | ||
170 | { | ||
171 | struct cell_context *cell = cell_context(pipe); | ||
172 | struct cell_vertex_shader_state *cvs = cell_vertex_shader_state(vs); | ||
173 | |||
174 | draw_delete_vertex_shader(cell->draw, cvs->draw_data); | ||
175 | FREE( (void *) cvs->shader.tokens ); | ||
176 | FREE( cvs ); | ||
177 | } | ||
178 | |||
179 | |||
180 | /** | ||
181 | * Called via pipe->set_constant_buffer() | ||
182 | */ | ||
183 | static void | ||
184 | cell_set_constant_buffer(struct pipe_context *pipe, | ||
185 | uint shader, uint index, | ||
186 | struct pipe_resource *constants) | ||
187 | { | ||
188 | struct cell_context *cell = cell_context(pipe); | ||
189 | unsigned size = constants ? constants->width0 : 0; | ||
190 | const void *data = constants ? cell_resource(constants)->data : NULL; | ||
191 | |||
192 | assert(shader < PIPE_SHADER_TYPES); | ||
193 | assert(index == 0); | ||
194 | |||
195 | if (cell->constants[shader] == constants) | ||
196 | return; | ||
197 | |||
198 | draw_flush(cell->draw); | ||
199 | |||
200 | /* note: reference counting */ | ||
201 | pipe_resource_reference(&cell->constants[shader], constants); | ||
202 | |||
203 | if(shader == PIPE_SHADER_VERTEX) { | ||
204 | draw_set_mapped_constant_buffer(cell->draw, PIPE_SHADER_VERTEX, 0, | ||
205 | data, size); | ||
206 | } | ||
207 | |||
208 | cell->mapped_constants[shader] = data; | ||
209 | |||
210 | if (shader == PIPE_SHADER_VERTEX) | ||
211 | cell->dirty |= CELL_NEW_VS_CONSTANTS; | ||
212 | else if (shader == PIPE_SHADER_FRAGMENT) | ||
213 | cell->dirty |= CELL_NEW_FS_CONSTANTS; | ||
214 | } | ||
215 | |||
216 | |||
217 | void | ||
218 | cell_init_shader_functions(struct cell_context *cell) | ||
219 | { | ||
220 | cell->pipe.create_fs_state = cell_create_fs_state; | ||
221 | cell->pipe.bind_fs_state = cell_bind_fs_state; | ||
222 | cell->pipe.delete_fs_state = cell_delete_fs_state; | ||
223 | |||
224 | cell->pipe.create_vs_state = cell_create_vs_state; | ||
225 | cell->pipe.bind_vs_state = cell_bind_vs_state; | ||
226 | cell->pipe.delete_vs_state = cell_delete_vs_state; | ||
227 | |||
228 | cell->pipe.set_constant_buffer = cell_set_constant_buffer; | ||
229 | } | ||
diff --git a/src/gallium/drivers/cell/ppu/cell_state_vertex.c b/src/gallium/drivers/cell/ppu/cell_state_vertex.c deleted file mode 100644 index 7f65b82619e..00000000000 --- a/src/gallium/drivers/cell/ppu/cell_state_vertex.c +++ /dev/null | |||
@@ -1,120 +0,0 @@ | |||
1 | /************************************************************************** | ||
2 | * | ||
3 | * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas. | ||
4 | * All Rights Reserved. | ||
5 | * | ||
6 | * Permission is hereby granted, free of charge, to any person obtaining a | ||
7 | * copy of this software and associated documentation files (the | ||
8 | * "Software"), to deal in the Software without restriction, including | ||
9 | * without limitation the rights to use, copy, modify, merge, publish, | ||
10 | * distribute, sub license, and/or sell copies of the Software, and to | ||
11 | * permit persons to whom the Software is furnished to do so, subject to | ||
12 | * the following conditions: | ||
13 | * | ||
14 | * The above copyright notice and this permission notice (including the | ||
15 | * next paragraph) shall be included in all copies or substantial portions | ||
16 | * of the Software. | ||
17 | * | ||
18 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS | ||
19 | * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF | ||
20 | * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. | ||
21 | * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR | ||
22 | * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, | ||
23 | * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE | ||
24 | * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. | ||
25 | * | ||
26 | **************************************************************************/ | ||
27 | |||
28 | /* Authors: Keith Whitwell <keith@tungstengraphics.com> | ||
29 | */ | ||
30 | |||
31 | |||
32 | #include "cell_context.h" | ||
33 | #include "cell_state.h" | ||
34 | |||
35 | #include "util/u_memory.h" | ||
36 | #include "util/u_transfer.h" | ||
37 | #include "draw/draw_context.h" | ||
38 | |||
39 | |||
40 | static void * | ||
41 | cell_create_vertex_elements_state(struct pipe_context *pipe, | ||
42 | unsigned count, | ||
43 | const struct pipe_vertex_element *attribs) | ||
44 | { | ||
45 | struct cell_velems_state *velems; | ||
46 | assert(count <= PIPE_MAX_ATTRIBS); | ||
47 | velems = (struct cell_velems_state *) MALLOC(sizeof(struct cell_velems_state)); | ||
48 | if (velems) { | ||
49 | velems->count = count; | ||
50 | memcpy(velems->velem, attribs, sizeof(*attribs) * count); | ||
51 | } | ||
52 | return velems; | ||
53 | } | ||
54 | |||
55 | static void | ||
56 | cell_bind_vertex_elements_state(struct pipe_context *pipe, | ||
57 | void *velems) | ||
58 | { | ||
59 | struct cell_context *cell = cell_context(pipe); | ||
60 | struct cell_velems_state *cell_velems = (struct cell_velems_state *) velems; | ||
61 | |||
62 | cell->velems = cell_velems; | ||
63 | |||
64 | cell->dirty |= CELL_NEW_VERTEX; | ||
65 | |||
66 | if (cell_velems) | ||
67 | draw_set_vertex_elements(cell->draw, cell_velems->count, cell_velems->velem); | ||
68 | } | ||
69 | |||
70 | static void | ||
71 | cell_delete_vertex_elements_state(struct pipe_context *pipe, void *velems) | ||
72 | { | ||
73 | FREE( velems ); | ||
74 | } | ||
75 | |||
76 | |||
77 | static void | ||
78 | cell_set_vertex_buffers(struct pipe_context *pipe, | ||
79 | unsigned count, | ||
80 | const struct pipe_vertex_buffer *buffers) | ||
81 | { | ||
82 | struct cell_context *cell = cell_context(pipe); | ||
83 | |||
84 | assert(count <= PIPE_MAX_ATTRIBS); | ||
85 | |||
86 | util_copy_vertex_buffers(cell->vertex_buffer, | ||
87 | &cell->num_vertex_buffers, | ||
88 | buffers, count); | ||
89 | |||
90 | cell->dirty |= CELL_NEW_VERTEX; | ||
91 | |||
92 | draw_set_vertex_buffers(cell->draw, count, buffers); | ||
93 | } | ||
94 | |||
95 | |||
96 | static void | ||
97 | cell_set_index_buffer(struct pipe_context *pipe, | ||
98 | const struct pipe_index_buffer *ib) | ||
99 | { | ||
100 | struct cell_context *cell = cell_context(pipe); | ||
101 | |||
102 | if (ib) | ||
103 | memcpy(&cell->index_buffer, ib, sizeof(cell->index_buffer)); | ||
104 | else | ||
105 | memset(&cell->index_buffer, 0, sizeof(cell->index_buffer)); | ||
106 | |||
107 | draw_set_index_buffer(cell->draw, ib); | ||
108 | } | ||
109 | |||
110 | |||
111 | void | ||
112 | cell_init_vertex_functions(struct cell_context *cell) | ||
113 | { | ||
114 | cell->pipe.set_vertex_buffers = cell_set_vertex_buffers; | ||
115 | cell->pipe.set_index_buffer = cell_set_index_buffer; | ||
116 | cell->pipe.create_vertex_elements_state = cell_create_vertex_elements_state; | ||
117 | cell->pipe.bind_vertex_elements_state = cell_bind_vertex_elements_state; | ||
118 | cell->pipe.delete_vertex_elements_state = cell_delete_vertex_elements_state; | ||
119 | cell->pipe.redefine_user_buffer = u_default_redefine_user_buffer; | ||
120 | } | ||
diff --git a/src/gallium/drivers/cell/ppu/cell_surface.c b/src/gallium/drivers/cell/ppu/cell_surface.c deleted file mode 100644 index 777454479b1..00000000000 --- a/src/gallium/drivers/cell/ppu/cell_surface.c +++ /dev/null | |||
@@ -1,37 +0,0 @@ | |||
1 | /************************************************************************** | ||
2 | * | ||
3 | * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas. | ||
4 | * All Rights Reserved. | ||
5 | * | ||
6 | * Permission is hereby granted, free of charge, to any person obtaining a | ||
7 | * copy of this software and associated documentation files (the | ||
8 | * "Software"), to deal in the Software without restriction, including | ||
9 | * without limitation the rights to use, copy, modify, merge, publish, | ||
10 | * distribute, sub license, and/or sell copies of the Software, and to | ||
11 | * permit persons to whom the Software is furnished to do so, subject to | ||
12 | * the following conditions: | ||
13 | * | ||
14 | * The above copyright notice and this permission notice (including the | ||
15 | * next paragraph) shall be included in all copies or substantial portions | ||
16 | * of the Software. | ||
17 | * | ||
18 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS | ||
19 | * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF | ||
20 | * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. | ||
21 | * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR | ||
22 | * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, | ||
23 | * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE | ||
24 | * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. | ||
25 | * | ||
26 | **************************************************************************/ | ||
27 | |||
28 | #include "util/u_surface.h" | ||
29 | #include "cell_context.h" | ||
30 | #include "cell_surface.h" | ||
31 | |||
32 | |||
33 | void | ||
34 | cell_init_surface_functions(struct cell_context *cell) | ||
35 | { | ||
36 | cell->pipe.resource_copy_region = util_resource_copy_region; | ||
37 | } | ||
diff --git a/src/gallium/drivers/cell/ppu/cell_surface.h b/src/gallium/drivers/cell/ppu/cell_surface.h deleted file mode 100644 index 9e58f329443..00000000000 --- a/src/gallium/drivers/cell/ppu/cell_surface.h +++ /dev/null | |||
@@ -1,42 +0,0 @@ | |||
1 | /************************************************************************** | ||
2 | * | ||
3 | * Copyright 2008 Tungsten Graphics, Inc., Cedar Park, Texas. | ||
4 | * All Rights Reserved. | ||
5 | * | ||
6 | * Permission is hereby granted, free of charge, to any person obtaining a | ||
7 | * copy of this software and associated documentation files (the | ||
8 | * "Software"), to deal in the Software without restriction, including | ||
9 | * without limitation the rights to use, copy, modify, merge, publish, | ||
10 | * distribute, sub license, and/or sell copies of the Software, and to | ||
11 | * permit persons to whom the Software is furnished to do so, subject to | ||
12 | * the following conditions: | ||
13 | * | ||
14 | * The above copyright notice and this permission notice (including the | ||
15 | * next paragraph) shall be included in all copies or substantial portions | ||
16 | * of the Software. | ||
17 | * | ||
18 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS | ||
19 | * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF | ||
20 | * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. | ||
21 | * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR | ||
22 | * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, | ||
23 | * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE | ||
24 | * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. | ||
25 | * | ||
26 | **************************************************************************/ | ||
27 | |||
28 | /* Authors: Keith Whitwell <keith@tungstengraphics.com> | ||
29 | */ | ||
30 | |||
31 | #ifndef CELL_SURFACE_H | ||
32 | #define CELL_SURFACE_H | ||
33 | |||
34 | |||
35 | struct cell_context; | ||
36 | |||
37 | |||
38 | extern void | ||
39 | cell_init_surface_functions(struct cell_context *cell); | ||
40 | |||
41 | |||
42 | #endif /* SP_SURFACE_H */ | ||
diff --git a/src/gallium/drivers/cell/ppu/cell_texture.c b/src/gallium/drivers/cell/ppu/cell_texture.c deleted file mode 100644 index 946a7050e5f..00000000000 --- a/src/gallium/drivers/cell/ppu/cell_texture.c +++ /dev/null | |||
@@ -1,644 +0,0 @@ | |||
1 | /************************************************************************** | ||
2 | * | ||
3 | * Copyright 2006 Tungsten Graphics, Inc., Cedar Park, Texas. | ||
4 | * All Rights Reserved. | ||
5 | * | ||
6 | * Permission is hereby granted, free of charge, to any person obtaining a | ||
7 | * copy of this software and associated documentation files (the | ||
8 | * "Software"), to deal in the Software without restriction, including | ||
9 | * without limitation the rights to use, copy, modify, merge, publish, | ||
10 | * distribute, sub license, and/or sell copies of the Software, and to | ||
11 | * permit persons to whom the Software is furnished to do so, subject to | ||
12 | * the following conditions: | ||
13 | * | ||
14 | * The above copyright notice and this permission notice (including the | ||
15 | * next paragraph) shall be included in all copies or substantial portions | ||
16 | * of the Software. | ||
17 | * | ||
18 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS | ||
19 | * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF | ||
20 | * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. | ||
21 | * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR | ||
22 | * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, | ||
23 | * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE | ||
24 | * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. | ||
25 | * | ||
26 | **************************************************************************/ | ||
27 | /* | ||
28 | * Authors: | ||
29 | * Keith Whitwell <keith@tungstengraphics.com> | ||
30 | * Michel Dänzer <michel@tungstengraphics.com> | ||
31 | * Brian Paul | ||
32 | */ | ||
33 | |||
34 | #include "pipe/p_context.h" | ||
35 | #include "pipe/p_defines.h" | ||
36 | #include "util/u_inlines.h" | ||
37 | #include "util/u_transfer.h" | ||
38 | #include "util/u_format.h" | ||
39 | #include "util/u_math.h" | ||
40 | #include "util/u_memory.h" | ||
41 | |||
42 | #include "cell_context.h" | ||
43 | #include "cell_screen.h" | ||
44 | #include "cell_state.h" | ||
45 | #include "cell_texture.h" | ||
46 | |||
47 | #include "state_tracker/sw_winsys.h" | ||
48 | |||
49 | |||
50 | |||
51 | static boolean | ||
52 | cell_resource_layout(struct pipe_screen *screen, | ||
53 | struct cell_resource *ct) | ||
54 | { | ||
55 | struct pipe_resource *pt = &ct->base; | ||
56 | unsigned level; | ||
57 | unsigned width = pt->width0; | ||
58 | unsigned height = pt->height0; | ||
59 | unsigned depth = pt->depth0; | ||
60 | |||
61 | ct->buffer_size = 0; | ||
62 | |||
63 | for (level = 0; level <= pt->last_level; level++) { | ||
64 | unsigned size; | ||
65 | unsigned w_tile, h_tile; | ||
66 | |||
67 | assert(level < CELL_MAX_TEXTURE_LEVELS); | ||
68 | |||
69 | /* width, height, rounded up to tile size */ | ||
70 | w_tile = align(width, TILE_SIZE); | ||
71 | h_tile = align(height, TILE_SIZE); | ||
72 | |||
73 | ct->stride[level] = util_format_get_stride(pt->format, w_tile); | ||
74 | |||
75 | ct->level_offset[level] = ct->buffer_size; | ||
76 | |||
77 | size = ct->stride[level] * util_format_get_nblocksy(pt->format, h_tile); | ||
78 | if (pt->target == PIPE_TEXTURE_CUBE) | ||
79 | size *= 6; | ||
80 | else | ||
81 | size *= depth; | ||
82 | |||
83 | ct->buffer_size += size; | ||
84 | |||
85 | width = u_minify(width, 1); | ||
86 | height = u_minify(height, 1); | ||
87 | depth = u_minify(depth, 1); | ||
88 | } | ||
89 | |||
90 | ct->data = align_malloc(ct->buffer_size, 16); | ||
91 | |||
92 | return ct->data != NULL; | ||
93 | } | ||
94 | |||
95 | |||
96 | /** | ||
97 | * Texture layout for simple color buffers. | ||
98 | */ | ||
99 | static boolean | ||
100 | cell_displaytarget_layout(struct pipe_screen *screen, | ||
101 | struct cell_resource * ct) | ||
102 | { | ||
103 | struct sw_winsys *winsys = cell_screen(screen)->winsys; | ||
104 | |||
105 | /* Round up the surface size to a multiple of the tile size? | ||
106 | */ | ||
107 | ct->dt = winsys->displaytarget_create(winsys, | ||
108 | ct->base.bind, | ||
109 | ct->base.format, | ||
110 | ct->base.width0, | ||
111 | ct->base.height0, | ||
112 | 16, | ||
113 | &ct->dt_stride ); | ||
114 | |||
115 | return ct->dt != NULL; | ||
116 | } | ||
117 | |||
118 | static struct pipe_resource * | ||
119 | cell_resource_create(struct pipe_screen *screen, | ||
120 | const struct pipe_resource *templat) | ||
121 | { | ||
122 | struct cell_resource *ct = CALLOC_STRUCT(cell_resource); | ||
123 | if (!ct) | ||
124 | return NULL; | ||
125 | |||
126 | ct->base = *templat; | ||
127 | pipe_reference_init(&ct->base.reference, 1); | ||
128 | ct->base.screen = screen; | ||
129 | |||
130 | /* Create both a displaytarget (linear) and regular texture | ||
131 | * (twiddled). Convert twiddled->linear at flush_frontbuffer time. | ||
132 | */ | ||
133 | if (ct->base.bind & (PIPE_BIND_DISPLAY_TARGET | | ||
134 | PIPE_BIND_SCANOUT | | ||
135 | PIPE_BIND_SHARED)) { | ||
136 | if (!cell_displaytarget_layout(screen, ct)) | ||
137 | goto fail; | ||
138 | } | ||
139 | |||
140 | if (!cell_resource_layout(screen, ct)) | ||
141 | goto fail; | ||
142 | |||
143 | return &ct->base; | ||
144 | |||
145 | fail: | ||
146 | if (ct->dt) { | ||
147 | struct sw_winsys *winsys = cell_screen(screen)->winsys; | ||
148 | winsys->displaytarget_destroy(winsys, ct->dt); | ||
149 | } | ||
150 | |||
151 | FREE(ct); | ||
152 | |||
153 | return NULL; | ||
154 | } | ||
155 | |||
156 | |||
157 | static void | ||
158 | cell_resource_destroy(struct pipe_screen *scrn, struct pipe_resource *pt) | ||
159 | { | ||
160 | struct cell_screen *screen = cell_screen(scrn); | ||
161 | struct sw_winsys *winsys = screen->winsys; | ||
162 | struct cell_resource *ct = cell_resource(pt); | ||
163 | |||
164 | if (ct->dt) { | ||
165 | /* display target */ | ||
166 | winsys->displaytarget_destroy(winsys, ct->dt); | ||
167 | } | ||
168 | else if (!ct->userBuffer) { | ||
169 | align_free(ct->data); | ||
170 | } | ||
171 | |||
172 | FREE(ct); | ||
173 | } | ||
174 | |||
175 | |||
176 | |||
177 | /** | ||
178 | * Convert image from linear layout to tiled layout. 4-byte pixels. | ||
179 | */ | ||
180 | static void | ||
181 | twiddle_image_uint(uint w, uint h, uint tile_size, uint *dst, | ||
182 | uint src_stride, const uint *src) | ||
183 | { | ||
184 | const uint tile_size2 = tile_size * tile_size; | ||
185 | const uint h_t = (h + tile_size - 1) / tile_size; | ||
186 | const uint w_t = (w + tile_size - 1) / tile_size; | ||
187 | |||
188 | uint it, jt; /* tile counters */ | ||
189 | uint i, j; /* intra-tile counters */ | ||
190 | |||
191 | src_stride /= 4; /* convert from bytes to pixels */ | ||
192 | |||
193 | /* loop over dest tiles */ | ||
194 | for (it = 0; it < h_t; it++) { | ||
195 | for (jt = 0; jt < w_t; jt++) { | ||
196 | /* start of dest tile: */ | ||
197 | uint *tdst = dst + (it * w_t + jt) * tile_size2; | ||
198 | |||
199 | /* compute size of this tile (may be smaller than tile_size) */ | ||
200 | /* XXX note: a compiler bug was found here. That's why the code | ||
201 | * looks as it does. | ||
202 | */ | ||
203 | uint tile_width = w - jt * tile_size; | ||
204 | tile_width = MIN2(tile_width, tile_size); | ||
205 | uint tile_height = h - it * tile_size; | ||
206 | tile_height = MIN2(tile_height, tile_size); | ||
207 | |||
208 | /* loop over texels in the tile */ | ||
209 | for (i = 0; i < tile_height; i++) { | ||
210 | for (j = 0; j < tile_width; j++) { | ||
211 | const uint srci = it * tile_size + i; | ||
212 | const uint srcj = jt * tile_size + j; | ||
213 | ASSERT(srci < h); | ||
214 | ASSERT(srcj < w); | ||
215 | tdst[i * tile_size + j] = src[srci * src_stride + srcj]; | ||
216 | } | ||
217 | } | ||
218 | } | ||
219 | } | ||
220 | } | ||
221 | |||
222 | |||
223 | /** | ||
224 | * For Cell. Basically, rearrange the pixels/quads from this layout: | ||
225 | * +--+--+--+--+ | ||
226 | * |p0|p1|p2|p3|.... | ||
227 | * +--+--+--+--+ | ||
228 | * | ||
229 | * to this layout: | ||
230 | * +--+--+ | ||
231 | * |p0|p1|.... | ||
232 | * +--+--+ | ||
233 | * |p2|p3| | ||
234 | * +--+--+ | ||
235 | */ | ||
236 | static void | ||
237 | twiddle_tile(const uint *tileIn, uint *tileOut) | ||
238 | { | ||
239 | int y, x; | ||
240 | |||
241 | for (y = 0; y < TILE_SIZE; y+=2) { | ||
242 | for (x = 0; x < TILE_SIZE; x+=2) { | ||
243 | int k = 4 * (y/2 * TILE_SIZE/2 + x/2); | ||
244 | tileOut[y * TILE_SIZE + (x + 0)] = tileIn[k]; | ||
245 | tileOut[y * TILE_SIZE + (x + 1)] = tileIn[k+1]; | ||
246 | tileOut[(y + 1) * TILE_SIZE + (x + 0)] = tileIn[k+2]; | ||
247 | tileOut[(y + 1) * TILE_SIZE + (x + 1)] = tileIn[k+3]; | ||
248 | } | ||
249 | } | ||
250 | } | ||
251 | |||
252 | |||
253 | /** | ||
254 | * Convert image from tiled layout to linear layout. 4-byte pixels. | ||
255 | */ | ||
256 | static void | ||
257 | untwiddle_image_uint(uint w, uint h, uint tile_size, uint *dst, | ||
258 | uint dst_stride, const uint *src) | ||
259 | { | ||
260 | const uint tile_size2 = tile_size * tile_size; | ||
261 | const uint h_t = (h + tile_size - 1) / tile_size; | ||
262 | const uint w_t = (w + tile_size - 1) / tile_size; | ||
263 | uint *tile_buf; | ||
264 | uint it, jt; /* tile counters */ | ||
265 | uint i, j; /* intra-tile counters */ | ||
266 | |||
267 | dst_stride /= 4; /* convert from bytes to pixels */ | ||
268 | |||
269 | tile_buf = align_malloc(tile_size * tile_size * 4, 16); | ||
270 | |||
271 | /* loop over src tiles */ | ||
272 | for (it = 0; it < h_t; it++) { | ||
273 | for (jt = 0; jt < w_t; jt++) { | ||
274 | /* start of src tile: */ | ||
275 | const uint *tsrc = src + (it * w_t + jt) * tile_size2; | ||
276 | |||
277 | twiddle_tile(tsrc, tile_buf); | ||
278 | tsrc = tile_buf; | ||
279 | |||
280 | /* compute size of this tile (may be smaller than tile_size) */ | ||
281 | /* XXX note: a compiler bug was found here. That's why the code | ||
282 | * looks as it does. | ||
283 | */ | ||
284 | uint tile_width = w - jt * tile_size; | ||
285 | tile_width = MIN2(tile_width, tile_size); | ||
286 | uint tile_height = h - it * tile_size; | ||
287 | tile_height = MIN2(tile_height, tile_size); | ||
288 | |||
289 | /* loop over texels in the tile */ | ||
290 | for (i = 0; i < tile_height; i++) { | ||
291 | for (j = 0; j < tile_width; j++) { | ||
292 | uint dsti = it * tile_size + i; | ||
293 | uint dstj = jt * tile_size + j; | ||
294 | ASSERT(dsti < h); | ||
295 | ASSERT(dstj < w); | ||
296 | dst[dsti * dst_stride + dstj] = tsrc[i * tile_size + j]; | ||
297 | } | ||
298 | } | ||
299 | } | ||
300 | } | ||
301 | |||
302 | align_free(tile_buf); | ||
303 | } | ||
304 | |||
305 | |||
306 | static struct pipe_surface * | ||
307 | cell_create_surface(struct pipe_context *ctx, | ||
308 | struct pipe_resource *pt, | ||
309 | const struct pipe_surface *surf_tmpl) | ||
310 | { | ||
311 | struct cell_resource *ct = cell_resource(pt); | ||
312 | struct pipe_surface *ps; | ||
313 | |||
314 | assert(surf_tmpl->u.tex.first_layer == surf_tmpl->u.tex.last_layer); | ||
315 | ps = CALLOC_STRUCT(pipe_surface); | ||
316 | if (ps) { | ||
317 | pipe_reference_init(&ps->reference, 1); | ||
318 | pipe_resource_reference(&ps->texture, pt); | ||
319 | ps->format = surf_tmpl->format; | ||
320 | ps->context = ctx; | ||
321 | ps->width = u_minify(pt->width0, surf_tmpl->u.tex.level); | ||
322 | ps->height = u_minify(pt->height0, surf_tmpl->u.tex.level); | ||
323 | /* XXX may need to override usage flags (see sp_texture.c) */ | ||
324 | ps->usage = surf_tmpl->usage; | ||
325 | ps->u.tex.level = surf_tmpl->u.tex.level; | ||
326 | ps->u.tex.first_layer = surf_tmpl->u.tex.first_layer; | ||
327 | ps->u.tex.last_layer = surf_tmpl->u.tex.last_layer; | ||
328 | } | ||
329 | return ps; | ||
330 | } | ||
331 | |||
332 | |||
333 | static void | ||
334 | cell_surface_destroy(struct pipe_context *ctx, struct pipe_surface *surf) | ||
335 | { | ||
336 | pipe_resource_reference(&surf->texture, NULL); | ||
337 | FREE(surf); | ||
338 | } | ||
339 | |||
340 | |||
341 | /** | ||
342 | * Create new pipe_transfer object. | ||
343 | * This is used by the user to put tex data into a texture (and get it | ||
344 | * back out for glGetTexImage). | ||
345 | */ | ||
346 | static struct pipe_transfer * | ||
347 | cell_get_transfer(struct pipe_context *ctx, | ||
348 | struct pipe_resource *resource, | ||
349 | unsigned level, | ||
350 | unsigned usage, | ||
351 | const struct pipe_box *box) | ||
352 | { | ||
353 | struct cell_resource *ct = cell_resource(resource); | ||
354 | struct cell_transfer *ctrans; | ||
355 | enum pipe_format format = resource->format; | ||
356 | |||
357 | assert(resource); | ||
358 | assert(level <= resource->last_level); | ||
359 | |||
360 | /* make sure the requested region is in the image bounds */ | ||
361 | assert(box->x + box->width <= u_minify(resource->width0, level)); | ||
362 | assert(box->y + box->height <= u_minify(resource->height0, level)); | ||
363 | assert(box->z + box->depth <= (u_minify(resource->depth0, level) + resource->array_size - 1)); | ||
364 | |||
365 | ctrans = CALLOC_STRUCT(cell_transfer); | ||
366 | if (ctrans) { | ||
367 | struct pipe_transfer *pt = &ctrans->base; | ||
368 | pipe_resource_reference(&pt->resource, resource); | ||
369 | pt->level = level; | ||
370 | pt->usage = usage; | ||
371 | pt->box = *box; | ||
372 | pt->stride = ct->stride[level]; | ||
373 | |||
374 | ctrans->offset = ct->level_offset[level]; | ||
375 | |||
376 | if (resource->target == PIPE_TEXTURE_CUBE || resource->target == PIPE_TEXTURE_3D) { | ||
377 | unsigned h_tile = align(u_minify(resource->height0, level), TILE_SIZE); | ||
378 | ctrans->offset += box->z * util_format_get_nblocksy(format, h_tile) * pt->stride; | ||
379 | } | ||
380 | else { | ||
381 | assert(box->z == 0); | ||
382 | } | ||
383 | |||
384 | return pt; | ||
385 | } | ||
386 | return NULL; | ||
387 | } | ||
388 | |||
389 | |||
390 | static void | ||
391 | cell_transfer_destroy(struct pipe_context *ctx, struct pipe_transfer *t) | ||
392 | { | ||
393 | struct cell_transfer *transfer = cell_transfer(t); | ||
394 | /* Effectively do the texture_update work here - if texture images | ||
395 | * needed post-processing to put them into hardware layout, this is | ||
396 | * where it would happen. For cell, nothing to do. | ||
397 | */ | ||
398 | assert (transfer->base.resource); | ||
399 | pipe_resource_reference(&transfer->base.resource, NULL); | ||
400 | FREE(transfer); | ||
401 | } | ||
402 | |||
403 | |||
404 | /** | ||
405 | * Return pointer to texture image data in linear layout. | ||
406 | */ | ||
407 | static void * | ||
408 | cell_transfer_map(struct pipe_context *ctx, struct pipe_transfer *transfer) | ||
409 | { | ||
410 | struct cell_transfer *ctrans = cell_transfer(transfer); | ||
411 | struct pipe_resource *pt = transfer->resource; | ||
412 | struct cell_resource *ct = cell_resource(pt); | ||
413 | |||
414 | assert(transfer->resource); | ||
415 | |||
416 | if (ct->mapped == NULL) { | ||
417 | ct->mapped = ct->data; | ||
418 | } | ||
419 | |||
420 | |||
421 | /* Better test would be resource->is_linear | ||
422 | */ | ||
423 | if (transfer->resource->target != PIPE_BUFFER) { | ||
424 | const uint level = ctrans->base.level; | ||
425 | const uint texWidth = u_minify(pt->width0, level); | ||
426 | const uint texHeight = u_minify(pt->height0, level); | ||
427 | unsigned size; | ||
428 | |||
429 | |||
430 | /* | ||
431 | * Create a buffer of ordinary memory for the linear texture. | ||
432 | * This is the memory that the user will read/write. | ||
433 | */ | ||
434 | size = (util_format_get_stride(pt->format, align(texWidth, TILE_SIZE)) * | ||
435 | util_format_get_nblocksy(pt->format, align(texHeight, TILE_SIZE))); | ||
436 | |||
437 | ctrans->map = align_malloc(size, 16); | ||
438 | if (!ctrans->map) | ||
439 | return NULL; /* out of memory */ | ||
440 | |||
441 | if (transfer->usage & PIPE_TRANSFER_READ) { | ||
442 | /* Textures always stored twiddled, need to untwiddle the | ||
443 | * texture to make a linear version. | ||
444 | */ | ||
445 | const uint bpp = util_format_get_blocksize(ct->base.format); | ||
446 | if (bpp == 4) { | ||
447 | const uint *src = (uint *) (ct->mapped + ctrans->offset); | ||
448 | uint *dst = ctrans->map; | ||
449 | untwiddle_image_uint(texWidth, texHeight, TILE_SIZE, | ||
450 | dst, transfer->stride, src); | ||
451 | } | ||
452 | else { | ||
453 | // xxx fix | ||
454 | } | ||
455 | } | ||
456 | } | ||
457 | else { | ||
458 | unsigned stride = transfer->stride; | ||
459 | enum pipe_format format = pt->format; | ||
460 | unsigned blocksize = util_format_get_blocksize(format); | ||
461 | |||
462 | ctrans->map = (ct->mapped + | ||
463 | ctrans->offset + | ||
464 | ctrans->base.box.y / util_format_get_blockheight(format) * stride + | ||
465 | ctrans->base.box.x / util_format_get_blockwidth(format) * blocksize); | ||
466 | } | ||
467 | |||
468 | |||
469 | return ctrans->map; | ||
470 | } | ||
471 | |||
472 | |||
473 | /** | ||
474 | * Called when user is done reading/writing texture data. | ||
475 | * If new data was written, this is where we convert the linear data | ||
476 | * to tiled data. | ||
477 | */ | ||
478 | static void | ||
479 | cell_transfer_unmap(struct pipe_context *ctx, | ||
480 | struct pipe_transfer *transfer) | ||
481 | { | ||
482 | struct cell_transfer *ctrans = cell_transfer(transfer); | ||
483 | struct pipe_resource *pt = transfer->resource; | ||
484 | struct cell_resource *ct = cell_resource(pt); | ||
485 | const uint level = ctrans->base.level; | ||
486 | const uint texWidth = u_minify(pt->width0, level); | ||
487 | const uint texHeight = u_minify(pt->height0, level); | ||
488 | const uint stride = ct->stride[level]; | ||
489 | |||
490 | if (!ct->mapped) { | ||
491 | assert(0); | ||
492 | return; | ||
493 | } | ||
494 | |||
495 | if (pt->target != PIPE_BUFFER) { | ||
496 | if (transfer->usage & PIPE_TRANSFER_WRITE) { | ||
497 | /* The user wrote new texture data into the mapped buffer. | ||
498 | * We need to convert the new linear data into the twiddled/tiled format. | ||
499 | */ | ||
500 | const uint bpp = util_format_get_blocksize(ct->base.format); | ||
501 | if (bpp == 4) { | ||
502 | const uint *src = ctrans->map; | ||
503 | uint *dst = (uint *) (ct->mapped + ctrans->offset); | ||
504 | twiddle_image_uint(texWidth, texHeight, TILE_SIZE, dst, stride, src); | ||
505 | } | ||
506 | else { | ||
507 | // xxx fix | ||
508 | } | ||
509 | } | ||
510 | |||
511 | align_free(ctrans->map); | ||
512 | } | ||
513 | else { | ||
514 | /* nothing to do */ | ||
515 | } | ||
516 | |||
517 | ctrans->map = NULL; | ||
518 | } | ||
519 | |||
520 | |||
521 | |||
522 | /* This used to be overriden by the co-state tracker, but really needs | ||
523 | * to be active with sw_winsys. | ||
524 | * | ||
525 | * Contrasting with llvmpipe and softpipe, this is the only place | ||
526 | * where we use the ct->dt display target in any real sense. | ||
527 | * | ||
528 | * Basically just untwiddle our local data into the linear | ||
529 | * displaytarget. | ||
530 | */ | ||
531 | static void | ||
532 | cell_flush_frontbuffer(struct pipe_screen *_screen, | ||
533 | struct pipe_resource *resource, | ||
534 | unsigned level, unsigned layer, | ||
535 | void *context_private) | ||
536 | { | ||
537 | struct cell_screen *screen = cell_screen(_screen); | ||
538 | struct sw_winsys *winsys = screen->winsys; | ||
539 | struct cell_resource *ct = cell_resource(resource); | ||
540 | |||
541 | if (!ct->dt) | ||
542 | return; | ||
543 | |||
544 | /* Need to untwiddle from our internal representation here: | ||
545 | */ | ||
546 | { | ||
547 | unsigned *map = winsys->displaytarget_map(winsys, ct->dt, | ||
548 | (PIPE_TRANSFER_READ | | ||
549 | PIPE_TRANSFER_WRITE)); | ||
550 | unsigned *src = (unsigned *)(ct->data + ct->level_offset[level]); | ||
551 | |||
552 | untwiddle_image_uint(u_minify(resource->width0, level), | ||
553 | u_minify(resource->height0, level), | ||
554 | TILE_SIZE, | ||
555 | map, | ||
556 | ct->dt_stride, | ||
557 | src); | ||
558 | |||
559 | winsys->displaytarget_unmap(winsys, ct->dt); | ||
560 | } | ||
561 | |||
562 | winsys->displaytarget_display(winsys, ct->dt, context_private); | ||
563 | } | ||
564 | |||
565 | |||
566 | |||
567 | /** | ||
568 | * Create buffer which wraps user-space data. | ||
569 | */ | ||
570 | static struct pipe_resource * | ||
571 | cell_user_buffer_create(struct pipe_screen *screen, | ||
572 | void *ptr, | ||
573 | unsigned bytes, | ||
574 | unsigned bind_flags) | ||
575 | { | ||
576 | struct cell_resource *buffer; | ||
577 | |||
578 | buffer = CALLOC_STRUCT(cell_resource); | ||
579 | if(!buffer) | ||
580 | return NULL; | ||
581 | |||
582 | pipe_reference_init(&buffer->base.reference, 1); | ||
583 | buffer->base.screen = screen; | ||
584 | buffer->base.format = PIPE_FORMAT_R8_UNORM; /* ?? */ | ||
585 | buffer->base.bind = PIPE_BIND_TRANSFER_READ | bind_flags; | ||
586 | buffer->base.usage = PIPE_USAGE_IMMUTABLE; | ||
587 | buffer->base.flags = 0; | ||
588 | buffer->base.width0 = bytes; | ||
589 | buffer->base.height0 = 1; | ||
590 | buffer->base.depth0 = 1; | ||
591 | buffer->base.array_size = 1; | ||
592 | buffer->userBuffer = TRUE; | ||
593 | buffer->data = ptr; | ||
594 | |||
595 | return &buffer->base; | ||
596 | } | ||
597 | |||
598 | |||
599 | static struct pipe_resource * | ||
600 | cell_resource_from_handle(struct pipe_screen *screen, | ||
601 | const struct pipe_resource *templat, | ||
602 | struct winsys_handle *handle) | ||
603 | { | ||
604 | /* XXX todo */ | ||
605 | return NULL; | ||
606 | } | ||
607 | |||
608 | |||
609 | static boolean | ||
610 | cell_resource_get_handle(struct pipe_screen *scree, | ||
611 | struct pipe_resource *tex, | ||
612 | struct winsys_handle *handle) | ||
613 | { | ||
614 | /* XXX todo */ | ||
615 | return FALSE; | ||
616 | } | ||
617 | |||
618 | |||
619 | void | ||
620 | cell_init_screen_texture_funcs(struct pipe_screen *screen) | ||
621 | { | ||
622 | screen->resource_create = cell_resource_create; | ||
623 | screen->resource_destroy = cell_resource_destroy; | ||
624 | screen->resource_from_handle = cell_resource_from_handle; | ||
625 | screen->resource_get_handle = cell_resource_get_handle; | ||
626 | screen->user_buffer_create = cell_user_buffer_create; | ||
627 | |||
628 | screen->flush_frontbuffer = cell_flush_frontbuffer; | ||
629 | } | ||
630 | |||
631 | void | ||
632 | cell_init_texture_transfer_funcs(struct cell_context *cell) | ||
633 | { | ||
634 | cell->pipe.get_transfer = cell_get_transfer; | ||
635 | cell->pipe.transfer_destroy = cell_transfer_destroy; | ||
636 | cell->pipe.transfer_map = cell_transfer_map; | ||
637 | cell->pipe.transfer_unmap = cell_transfer_unmap; | ||
638 | |||
639 | cell->pipe.transfer_flush_region = u_default_transfer_flush_region; | ||
640 | cell->pipe.transfer_inline_write = u_default_transfer_inline_write; | ||
641 | |||
642 | cell->pipe.create_surface = cell_create_surface; | ||
643 | cell->pipe.surface_destroy = cell_surface_destroy; | ||
644 | } | ||
diff --git a/src/gallium/drivers/cell/ppu/cell_texture.h b/src/gallium/drivers/cell/ppu/cell_texture.h deleted file mode 100644 index bd8224b3b7b..00000000000 --- a/src/gallium/drivers/cell/ppu/cell_texture.h +++ /dev/null | |||
@@ -1,102 +0,0 @@ | |||
1 | /************************************************************************** | ||
2 | * | ||
3 | * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas. | ||
4 | * All Rights Reserved. | ||
5 | * | ||
6 | * Permission is hereby granted, free of charge, to any person obtaining a | ||
7 | * copy of this software and associated documentation files (the | ||
8 | * "Software"), to deal in the Software without restriction, including | ||
9 | * without limitation the rights to use, copy, modify, merge, publish, | ||
10 | * distribute, sub license, and/or sell copies of the Software, and to | ||
11 | * permit persons to whom the Software is furnished to do so, subject to | ||
12 | * the following conditions: | ||
13 | * | ||
14 | * The above copyright notice and this permission notice (including the | ||
15 | * next paragraph) shall be included in all copies or substantial portions | ||
16 | * of the Software. | ||
17 | * | ||
18 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS | ||
19 | * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF | ||
20 | * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. | ||
21 | * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR | ||
22 | * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, | ||
23 | * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE | ||
24 | * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. | ||
25 | * | ||
26 | **************************************************************************/ | ||
27 | |||
28 | #ifndef CELL_TEXTURE_H | ||
29 | #define CELL_TEXTURE_H | ||
30 | |||
31 | #include "cell/common.h" | ||
32 | |||
33 | struct cell_context; | ||
34 | struct pipe_resource; | ||
35 | |||
36 | |||
37 | /** | ||
38 | * Subclass of pipe_resource | ||
39 | */ | ||
40 | struct cell_resource | ||
41 | { | ||
42 | struct pipe_resource base; | ||
43 | |||
44 | unsigned long level_offset[CELL_MAX_TEXTURE_LEVELS]; | ||
45 | unsigned long stride[CELL_MAX_TEXTURE_LEVELS]; | ||
46 | |||
47 | /** | ||
48 | * Display target, for textures with the PIPE_BIND_DISPLAY_TARGET | ||
49 | * usage. | ||
50 | */ | ||
51 | struct sw_displaytarget *dt; | ||
52 | unsigned dt_stride; | ||
53 | |||
54 | /** | ||
55 | * Malloc'ed data for regular textures, or a mapping to dt above. | ||
56 | */ | ||
57 | void *data; | ||
58 | boolean userBuffer; | ||
59 | |||
60 | /* Size of the linear buffer?? | ||
61 | */ | ||
62 | unsigned long buffer_size; | ||
63 | |||
64 | /** The buffer above, mapped. This is the memory from which the | ||
65 | * SPUs will fetch texels. This texture data is in the tiled layout. | ||
66 | */ | ||
67 | ubyte *mapped; | ||
68 | }; | ||
69 | |||
70 | |||
71 | struct cell_transfer | ||
72 | { | ||
73 | struct pipe_transfer base; | ||
74 | |||
75 | unsigned long offset; | ||
76 | void *map; | ||
77 | }; | ||
78 | |||
79 | |||
80 | /** cast wrapper */ | ||
81 | static INLINE struct cell_resource * | ||
82 | cell_resource(struct pipe_resource *pt) | ||
83 | { | ||
84 | return (struct cell_resource *) pt; | ||
85 | } | ||
86 | |||
87 | |||
88 | /** cast wrapper */ | ||
89 | static INLINE struct cell_transfer * | ||
90 | cell_transfer(struct pipe_transfer *pt) | ||
91 | { | ||
92 | return (struct cell_transfer *) pt; | ||
93 | } | ||
94 | |||
95 | |||
96 | extern void | ||
97 | cell_init_screen_texture_funcs(struct pipe_screen *screen); | ||
98 | |||
99 | extern void | ||
100 | cell_init_texture_transfer_funcs(struct cell_context *cell); | ||
101 | |||
102 | #endif /* CELL_TEXTURE_H */ | ||
diff --git a/src/gallium/drivers/cell/ppu/cell_vbuf.c b/src/gallium/drivers/cell/ppu/cell_vbuf.c deleted file mode 100644 index 37b71956482..00000000000 --- a/src/gallium/drivers/cell/ppu/cell_vbuf.c +++ /dev/null | |||
@@ -1,332 +0,0 @@ | |||
1 | /************************************************************************** | ||
2 | * | ||
3 | * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas. | ||
4 | * All Rights Reserved. | ||
5 | * | ||
6 | * Permission is hereby granted, free of charge, to any person obtaining a | ||
7 | * copy of this software and associated documentation files (the | ||
8 | * "Software"), to deal in the Software without restriction, including | ||
9 | * without limitation the rights to use, copy, modify, merge, publish, | ||
10 | * distribute, sub license, and/or sell copies of the Software, and to | ||
11 | * permit persons to whom the Software is furnished to do so, subject to | ||
12 | * the following conditions: | ||
13 | * | ||
14 | * The above copyright notice and this permission notice (including the | ||
15 | * next paragraph) shall be included in all copies or substantial portions | ||
16 | * of the Software. | ||
17 | * | ||
18 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS | ||
19 | * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF | ||
20 | * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. | ||
21 | * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR | ||
22 | * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, | ||
23 | * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE | ||
24 | * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. | ||
25 | * | ||
26 | **************************************************************************/ | ||
27 | |||
28 | /** | ||
29 | * Vertex buffer code. The draw module transforms vertices to window | ||
30 | * coords, etc. and emits the vertices into buffer supplied by this module. | ||
31 | * When a vertex buffer is full, or we flush, we'll send the vertex data | ||
32 | * to the SPUs. | ||
33 | * | ||
34 | * Authors | ||
35 | * Brian Paul | ||
36 | */ | ||
37 | |||
38 | |||
39 | #include "cell_batch.h" | ||
40 | #include "cell_context.h" | ||
41 | #include "cell_fence.h" | ||
42 | #include "cell_flush.h" | ||
43 | #include "cell_spu.h" | ||
44 | #include "cell_vbuf.h" | ||
45 | #include "draw/draw_vbuf.h" | ||
46 | #include "util/u_memory.h" | ||
47 | |||
48 | |||
49 | /** Allow vertex data to be inlined after RENDER command */ | ||
50 | #define ALLOW_INLINE_VERTS 1 | ||
51 | |||
52 | |||
53 | /** | ||
54 | * Subclass of vbuf_render because we need a cell_context pointer in | ||
55 | * a few places. | ||
56 | */ | ||
57 | struct cell_vbuf_render | ||
58 | { | ||
59 | struct vbuf_render base; | ||
60 | struct cell_context *cell; | ||
61 | uint prim; /**< PIPE_PRIM_x */ | ||
62 | uint vertex_size; /**< in bytes */ | ||
63 | void *vertex_buffer; /**< just for debug, really */ | ||
64 | uint vertex_buf; /**< in [0, CELL_NUM_BUFFERS-1] */ | ||
65 | uint vertex_buffer_size; /**< size in bytes */ | ||
66 | }; | ||
67 | |||
68 | |||
69 | /** cast wrapper */ | ||
70 | static struct cell_vbuf_render * | ||
71 | cell_vbuf_render(struct vbuf_render *vbr) | ||
72 | { | ||
73 | return (struct cell_vbuf_render *) vbr; | ||
74 | } | ||
75 | |||
76 | |||
77 | |||
78 | static const struct vertex_info * | ||
79 | cell_vbuf_get_vertex_info(struct vbuf_render *vbr) | ||
80 | { | ||
81 | struct cell_vbuf_render *cvbr = cell_vbuf_render(vbr); | ||
82 | return &cvbr->cell->vertex_info; | ||
83 | } | ||
84 | |||
85 | |||
86 | static boolean | ||
87 | cell_vbuf_allocate_vertices(struct vbuf_render *vbr, | ||
88 | ushort vertex_size, ushort nr_vertices) | ||
89 | { | ||
90 | struct cell_vbuf_render *cvbr = cell_vbuf_render(vbr); | ||
91 | unsigned size = vertex_size * nr_vertices; | ||
92 | /*printf("Alloc verts %u * %u\n", vertex_size, nr_vertices);*/ | ||
93 | |||
94 | assert(cvbr->vertex_buf == ~0); | ||
95 | cvbr->vertex_buf = cell_get_empty_buffer(cvbr->cell); | ||
96 | cvbr->vertex_buffer = cvbr->cell->buffer[cvbr->vertex_buf]; | ||
97 | cvbr->vertex_buffer_size = size; | ||
98 | cvbr->vertex_size = vertex_size; | ||
99 | |||
100 | return cvbr->vertex_buffer != NULL; | ||
101 | } | ||
102 | |||
103 | |||
104 | static void | ||
105 | cell_vbuf_release_vertices(struct vbuf_render *vbr) | ||
106 | { | ||
107 | struct cell_vbuf_render *cvbr = cell_vbuf_render(vbr); | ||
108 | struct cell_context *cell = cvbr->cell; | ||
109 | |||
110 | /* | ||
111 | printf("%s vertex_buf = %u count = %u\n", | ||
112 | __FUNCTION__, cvbr->vertex_buf, vertices_used); | ||
113 | */ | ||
114 | |||
115 | /* Make sure texture buffers aren't released until we're done rendering | ||
116 | * with them. | ||
117 | */ | ||
118 | cell_add_fenced_textures(cell); | ||
119 | |||
120 | /* Tell SPUs they can release the vert buf */ | ||
121 | if (cvbr->vertex_buf != ~0U) { | ||
122 | STATIC_ASSERT(sizeof(struct cell_command_release_verts) % 16 == 0); | ||
123 | struct cell_command_release_verts *release | ||
124 | = (struct cell_command_release_verts *) | ||
125 | cell_batch_alloc16(cell, sizeof(struct cell_command_release_verts)); | ||
126 | release->opcode[0] = CELL_CMD_RELEASE_VERTS; | ||
127 | release->vertex_buf = cvbr->vertex_buf; | ||
128 | } | ||
129 | |||
130 | cvbr->vertex_buf = ~0; | ||
131 | cell_flush_int(cell, 0x0); | ||
132 | |||
133 | cvbr->vertex_buffer = NULL; | ||
134 | } | ||
135 | |||
136 | |||
137 | static void * | ||
138 | cell_vbuf_map_vertices(struct vbuf_render *vbr) | ||
139 | { | ||
140 | struct cell_vbuf_render *cvbr = cell_vbuf_render(vbr); | ||
141 | return cvbr->vertex_buffer; | ||
142 | } | ||
143 | |||
144 | |||
145 | static void | ||
146 | cell_vbuf_unmap_vertices(struct vbuf_render *vbr, | ||
147 | ushort min_index, | ||
148 | ushort max_index ) | ||
149 | { | ||
150 | struct cell_vbuf_render *cvbr = cell_vbuf_render(vbr); | ||
151 | assert( cvbr->vertex_buffer_size >= (max_index+1) * cvbr->vertex_size ); | ||
152 | /* do nothing */ | ||
153 | } | ||
154 | |||
155 | |||
156 | |||
157 | static boolean | ||
158 | cell_vbuf_set_primitive(struct vbuf_render *vbr, unsigned prim) | ||
159 | { | ||
160 | struct cell_vbuf_render *cvbr = cell_vbuf_render(vbr); | ||
161 | cvbr->prim = prim; | ||
162 | /*printf("cell_set_prim %u\n", prim);*/ | ||
163 | return TRUE; | ||
164 | } | ||
165 | |||
166 | |||
167 | static void | ||
168 | cell_vbuf_draw_elements(struct vbuf_render *vbr, | ||
169 | const ushort *indices, | ||
170 | uint nr_indices) | ||
171 | { | ||
172 | struct cell_vbuf_render *cvbr = cell_vbuf_render(vbr); | ||
173 | struct cell_context *cell = cvbr->cell; | ||
174 | float xmin, ymin, xmax, ymax; | ||
175 | uint i; | ||
176 | uint nr_vertices = 0, min_index = ~0; | ||
177 | const void *vertices = cvbr->vertex_buffer; | ||
178 | const uint vertex_size = cvbr->vertex_size; | ||
179 | |||
180 | for (i = 0; i < nr_indices; i++) { | ||
181 | if (indices[i] > nr_vertices) | ||
182 | nr_vertices = indices[i]; | ||
183 | if (indices[i] < min_index) | ||
184 | min_index = indices[i]; | ||
185 | } | ||
186 | nr_vertices++; | ||
187 | |||
188 | #if 0 | ||
189 | /*if (min_index > 0)*/ | ||
190 | printf("%s min_index = %u\n", __FUNCTION__, min_index); | ||
191 | #endif | ||
192 | |||
193 | #if 0 | ||
194 | printf("cell_vbuf_draw() nr_indices = %u nr_verts = %u\n", | ||
195 | nr_indices, nr_vertices); | ||
196 | printf(" "); | ||
197 | for (i = 0; i < nr_indices; i += 3) { | ||
198 | printf("%u %u %u, ", indices[i+0], indices[i+1], indices[i+2]); | ||
199 | } | ||
200 | printf("\n"); | ||
201 | #elif 0 | ||
202 | printf("cell_vbuf_draw() nr_indices = %u nr_verts = %u indexes = [%u %u %u ...]\n", | ||
203 | nr_indices, nr_vertices, | ||
204 | indices[0], indices[1], indices[2]); | ||
205 | printf("ind space = %u, vert space = %u, space = %u\n", | ||
206 | nr_indices * 2, | ||
207 | nr_vertices * 4 * cell->vertex_info.size, | ||
208 | cell_batch_free_space(cell)); | ||
209 | #endif | ||
210 | |||
211 | /* compute x/y bounding box */ | ||
212 | xmin = ymin = 1e50; | ||
213 | xmax = ymax = -1e50; | ||
214 | for (i = min_index; i < nr_vertices; i++) { | ||
215 | const float *v = (float *) ((ubyte *) vertices + i * vertex_size); | ||
216 | if (v[0] < xmin) | ||
217 | xmin = v[0]; | ||
218 | if (v[0] > xmax) | ||
219 | xmax = v[0]; | ||
220 | if (v[1] < ymin) | ||
221 | ymin = v[1]; | ||
222 | if (v[1] > ymax) | ||
223 | ymax = v[1]; | ||
224 | } | ||
225 | #if 0 | ||
226 | printf("PPU Bounds %g, %g .. %g, %g\n", xmin, ymin, xmax, ymax); | ||
227 | fflush(stdout); | ||
228 | #endif | ||
229 | |||
230 | if (cvbr->prim != PIPE_PRIM_TRIANGLES) | ||
231 | return; /* only render tris for now */ | ||
232 | |||
233 | /* build/insert batch RENDER command */ | ||
234 | { | ||
235 | const uint index_bytes = ROUNDUP16(nr_indices * 2); | ||
236 | const uint vertex_bytes = ROUNDUP16(nr_vertices * 4 * cell->vertex_info.size); | ||
237 | STATIC_ASSERT(sizeof(struct cell_command_render) % 16 == 0); | ||
238 | const uint batch_size = sizeof(struct cell_command_render) + index_bytes; | ||
239 | |||
240 | struct cell_command_render *render | ||
241 | = (struct cell_command_render *) | ||
242 | cell_batch_alloc16(cell, batch_size); | ||
243 | |||
244 | render->opcode[0] = CELL_CMD_RENDER; | ||
245 | render->prim_type = cvbr->prim; | ||
246 | |||
247 | render->num_indexes = nr_indices; | ||
248 | render->min_index = min_index; | ||
249 | |||
250 | /* append indices after render command */ | ||
251 | memcpy(render + 1, indices, nr_indices * 2); | ||
252 | |||
253 | /* if there's room, append vertices after the indices, else leave | ||
254 | * vertices in the original/separate buffer. | ||
255 | */ | ||
256 | render->vertex_size = 4 * cell->vertex_info.size; | ||
257 | render->num_verts = nr_vertices; | ||
258 | if (ALLOW_INLINE_VERTS && | ||
259 | min_index == 0 && | ||
260 | vertex_bytes + 16 <= cell_batch_free_space(cell)) { | ||
261 | /* vertex data inlined, after indices, at 16-byte boundary */ | ||
262 | void *dst = cell_batch_alloc16(cell, vertex_bytes); | ||
263 | memcpy(dst, vertices, vertex_bytes); | ||
264 | render->inline_verts = TRUE; | ||
265 | render->vertex_buf = ~0; | ||
266 | } | ||
267 | else { | ||
268 | /* vertex data in separate buffer */ | ||
269 | render->inline_verts = FALSE; | ||
270 | ASSERT(cvbr->vertex_buf >= 0); | ||
271 | render->vertex_buf = cvbr->vertex_buf; | ||
272 | } | ||
273 | |||
274 | render->xmin = xmin; | ||
275 | render->ymin = ymin; | ||
276 | render->xmax = xmax; | ||
277 | render->ymax = ymax; | ||
278 | } | ||
279 | |||
280 | #if 0 | ||
281 | /* helpful for debug */ | ||
282 | cell_flush_int(cell, CELL_FLUSH_WAIT); | ||
283 | #endif | ||
284 | } | ||
285 | |||
286 | |||
287 | static void | ||
288 | cell_vbuf_destroy(struct vbuf_render *vbr) | ||
289 | { | ||
290 | struct cell_vbuf_render *cvbr = cell_vbuf_render(vbr); | ||
291 | cvbr->cell->vbuf_render = NULL; | ||
292 | FREE(cvbr); | ||
293 | } | ||
294 | |||
295 | |||
296 | /** | ||
297 | * Initialize the post-transform vertex buffer information for the given | ||
298 | * context. | ||
299 | */ | ||
300 | void | ||
301 | cell_init_vbuf(struct cell_context *cell) | ||
302 | { | ||
303 | assert(cell->draw); | ||
304 | |||
305 | cell->vbuf_render = CALLOC_STRUCT(cell_vbuf_render); | ||
306 | |||
307 | /* The max number of indexes is what can fix into a batch buffer, | ||
308 | * minus the render and release-verts commands. | ||
309 | */ | ||
310 | cell->vbuf_render->base.max_indices | ||
311 | = (CELL_BUFFER_SIZE | ||
312 | - sizeof(struct cell_command_render) | ||
313 | - sizeof(struct cell_command_release_verts)) | ||
314 | / sizeof(ushort); | ||
315 | cell->vbuf_render->base.max_vertex_buffer_bytes = CELL_BUFFER_SIZE; | ||
316 | |||
317 | cell->vbuf_render->base.get_vertex_info = cell_vbuf_get_vertex_info; | ||
318 | cell->vbuf_render->base.allocate_vertices = cell_vbuf_allocate_vertices; | ||
319 | cell->vbuf_render->base.map_vertices = cell_vbuf_map_vertices; | ||
320 | cell->vbuf_render->base.unmap_vertices = cell_vbuf_unmap_vertices; | ||
321 | cell->vbuf_render->base.set_primitive = cell_vbuf_set_primitive; | ||
322 | cell->vbuf_render->base.draw_elements = cell_vbuf_draw_elements; | ||
323 | cell->vbuf_render->base.release_vertices = cell_vbuf_release_vertices; | ||
324 | cell->vbuf_render->base.destroy = cell_vbuf_destroy; | ||
325 | |||
326 | cell->vbuf_render->cell = cell; | ||
327 | #if 1 | ||
328 | cell->vbuf_render->vertex_buf = ~0; | ||
329 | #endif | ||
330 | |||
331 | cell->vbuf = draw_vbuf_stage(cell->draw, &cell->vbuf_render->base); | ||
332 | } | ||
diff --git a/src/gallium/drivers/cell/ppu/cell_vbuf.h b/src/gallium/drivers/cell/ppu/cell_vbuf.h deleted file mode 100644 index d265cbf7701..00000000000 --- a/src/gallium/drivers/cell/ppu/cell_vbuf.h +++ /dev/null | |||
@@ -1,38 +0,0 @@ | |||
1 | /************************************************************************** | ||
2 | * | ||
3 | * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas. | ||
4 | * All Rights Reserved. | ||
5 | * | ||
6 | * Permission is hereby granted, free of charge, to any person obtaining a | ||
7 | * copy of this software and associated documentation files (the | ||
8 | * "Software"), to deal in the Software without restriction, including | ||
9 | * without limitation the rights to use, copy, modify, merge, publish, | ||
10 | * distribute, sub license, and/or sell copies of the Software, and to | ||
11 | * permit persons to whom the Software is furnished to do so, subject to | ||
12 | * the following conditions: | ||
13 | * | ||
14 | * The above copyright notice and this permission notice (including the | ||
15 | * next paragraph) shall be included in all copies or substantial portions | ||
16 | * of the Software. | ||
17 | * | ||
18 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS | ||
19 | * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF | ||
20 | * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. | ||
21 | * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR | ||
22 | * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, | ||
23 | * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE | ||
24 | * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. | ||
25 | * | ||
26 | **************************************************************************/ | ||
27 | |||
28 | #ifndef CELL_VBUF_H | ||
29 | #define CELL_VBUF_H | ||
30 | |||
31 | |||
32 | struct cell_context; | ||
33 | |||
34 | extern void | ||
35 | cell_init_vbuf(struct cell_context *cell); | ||
36 | |||
37 | |||
38 | #endif /* CELL_VBUF_H */ | ||
diff --git a/src/gallium/drivers/cell/ppu/cell_vertex_fetch.c b/src/gallium/drivers/cell/ppu/cell_vertex_fetch.c deleted file mode 100644 index 9cba537d9eb..00000000000 --- a/src/gallium/drivers/cell/ppu/cell_vertex_fetch.c +++ /dev/null | |||
@@ -1,346 +0,0 @@ | |||
1 | /* | ||
2 | * (C) Copyright IBM Corporation 2008 | ||
3 | * All Rights Reserved. | ||
4 | * | ||
5 | * Permission is hereby granted, free of charge, to any person obtaining a | ||
6 | * copy of this software and associated documentation files (the "Software"), | ||
7 | * to deal in the Software without restriction, including without limitation | ||
8 | * on the rights to use, copy, modify, merge, publish, distribute, sub | ||
9 | * license, and/or sell copies of the Software, and to permit persons to whom | ||
10 | * the Software is furnished to do so, subject to the following conditions: | ||
11 | * | ||
12 | * The above copyright notice and this permission notice (including the next | ||
13 | * paragraph) shall be included in all copies or substantial portions of the | ||
14 | * Software. | ||
15 | * | ||
16 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR | ||
17 | * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, | ||
18 | * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL | ||
19 | * AUTHORS, COPYRIGHT HOLDERS, AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM, | ||
20 | * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR | ||
21 | * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE | ||
22 | * USE OR OTHER DEALINGS IN THE SOFTWARE. | ||
23 | */ | ||
24 | |||
25 | #include <inttypes.h> | ||
26 | #include "pipe/p_defines.h" | ||
27 | #include "pipe/p_context.h" | ||
28 | #include "pipe/p_format.h" | ||
29 | |||
30 | #include "../auxiliary/draw/draw_context.h" | ||
31 | #include "../auxiliary/draw/draw_private.h" | ||
32 | |||
33 | #include "cell_context.h" | ||
34 | #include "rtasm/rtasm_ppc_spe.h" | ||
35 | |||
36 | |||
37 | /** | ||
38 | * Emit a 4x4 matrix transpose operation | ||
39 | * | ||
40 | * \param p Function that the transpose operation is to be appended to | ||
41 | * \param row0 Register containing row 0 of the source matrix | ||
42 | * \param row1 Register containing row 1 of the source matrix | ||
43 | * \param row2 Register containing row 2 of the source matrix | ||
44 | * \param row3 Register containing row 3 of the source matrix | ||
45 | * \param dest_ptr Register containing the address of the destination matrix | ||
46 | * \param shuf_ptr Register containing the address of the shuffled data | ||
47 | * \param count Number of colums to actually be written to the destination | ||
48 | * | ||
49 | * \note | ||
50 | * This function assumes that the registers named by \c row0, \c row1, | ||
51 | * \c row2, and \c row3 are scratch and can be modified by the generated code. | ||
52 | * Furthermore, these registers will be released, via calls to | ||
53 | * \c release_register, by this function. | ||
54 | * | ||
55 | * \note | ||
56 | * This function requires that four temporary are available on entry. | ||
57 | */ | ||
58 | static void | ||
59 | emit_matrix_transpose(struct spe_function *p, | ||
60 | unsigned row0, unsigned row1, unsigned row2, | ||
61 | unsigned row3, unsigned dest_ptr, | ||
62 | unsigned shuf_ptr, unsigned count) | ||
63 | { | ||
64 | int shuf_hi = spe_allocate_available_register(p); | ||
65 | int shuf_lo = spe_allocate_available_register(p); | ||
66 | int t1 = spe_allocate_available_register(p); | ||
67 | int t2 = spe_allocate_available_register(p); | ||
68 | int t3; | ||
69 | int t4; | ||
70 | int col0; | ||
71 | int col1; | ||
72 | int col2; | ||
73 | int col3; | ||
74 | |||
75 | |||
76 | spe_lqd(p, shuf_hi, shuf_ptr, 3*16); | ||
77 | spe_lqd(p, shuf_lo, shuf_ptr, 4*16); | ||
78 | spe_shufb(p, t1, row0, row2, shuf_hi); | ||
79 | spe_shufb(p, t2, row0, row2, shuf_lo); | ||
80 | |||
81 | |||
82 | /* row0 and row2 are now no longer needed. Re-use those registers as | ||
83 | * temporaries. | ||
84 | */ | ||
85 | t3 = row0; | ||
86 | t4 = row2; | ||
87 | |||
88 | spe_shufb(p, t3, row1, row3, shuf_hi); | ||
89 | spe_shufb(p, t4, row1, row3, shuf_lo); | ||
90 | |||
91 | |||
92 | /* row1 and row3 are now no longer needed. Re-use those registers as | ||
93 | * temporaries. | ||
94 | */ | ||
95 | col0 = row1; | ||
96 | col1 = row3; | ||
97 | |||
98 | spe_shufb(p, col0, t1, t3, shuf_hi); | ||
99 | if (count > 1) { | ||
100 | spe_shufb(p, col1, t1, t3, shuf_lo); | ||
101 | } | ||
102 | |||
103 | /* t1 and t3 are now no longer needed. Re-use those registers as | ||
104 | * temporaries. | ||
105 | */ | ||
106 | col2 = t1; | ||
107 | col3 = t3; | ||
108 | |||
109 | if (count > 2) { | ||
110 | spe_shufb(p, col2, t2, t4, shuf_hi); | ||
111 | } | ||
112 | |||
113 | if (count > 3) { | ||
114 | spe_shufb(p, col3, t2, t4, shuf_lo); | ||
115 | } | ||
116 | |||
117 | |||
118 | /* Store the results. Remember that the stqd instruction is encoded using | ||
119 | * the qword offset (stand-alone assemblers to the byte-offset to | ||
120 | * qword-offset conversion for you), so the byte-offset needs be divided by | ||
121 | * 16. | ||
122 | */ | ||
123 | switch (count) { | ||
124 | case 4: | ||
125 | spe_stqd(p, col3, dest_ptr, 3 * 16); | ||
126 | case 3: | ||
127 | spe_stqd(p, col2, dest_ptr, 2 * 16); | ||
128 | case 2: | ||
129 | spe_stqd(p, col1, dest_ptr, 1 * 16); | ||
130 | case 1: | ||
131 | spe_stqd(p, col0, dest_ptr, 0 * 16); | ||
132 | } | ||
133 | |||
134 | |||
135 | /* Release all of the temporary registers used. | ||
136 | */ | ||
137 | spe_release_register(p, col0); | ||
138 | spe_release_register(p, col1); | ||
139 | spe_release_register(p, col2); | ||
140 | spe_release_register(p, col3); | ||
141 | spe_release_register(p, shuf_hi); | ||
142 | spe_release_register(p, shuf_lo); | ||
143 | spe_release_register(p, t2); | ||
144 | spe_release_register(p, t4); | ||
145 | } | ||
146 | |||
147 | |||
148 | #if 0 | ||
149 | /* This appears to not be used currently */ | ||
150 | static void | ||
151 | emit_fetch(struct spe_function *p, | ||
152 | unsigned in_ptr, unsigned *offset, | ||
153 | unsigned out_ptr, unsigned shuf_ptr, | ||
154 | enum pipe_format format) | ||
155 | { | ||
156 | const unsigned count = (pf_size_x(format) != 0) + (pf_size_y(format) != 0) | ||
157 | + (pf_size_z(format) != 0) + (pf_size_w(format) != 0); | ||
158 | const unsigned type = pf_type(format); | ||
159 | const unsigned bytes = pf_size_x(format); | ||
160 | |||
161 | int v0 = spe_allocate_available_register(p); | ||
162 | int v1 = spe_allocate_available_register(p); | ||
163 | int v2 = spe_allocate_available_register(p); | ||
164 | int v3 = spe_allocate_available_register(p); | ||
165 | int tmp = spe_allocate_available_register(p); | ||
166 | int float_zero = -1; | ||
167 | int float_one = -1; | ||
168 | float scale_signed = 0.0; | ||
169 | float scale_unsigned = 0.0; | ||
170 | |||
171 | spe_lqd(p, v0, in_ptr, (0 + offset[0]) * 16); | ||
172 | spe_lqd(p, v1, in_ptr, (1 + offset[0]) * 16); | ||
173 | spe_lqd(p, v2, in_ptr, (2 + offset[0]) * 16); | ||
174 | spe_lqd(p, v3, in_ptr, (3 + offset[0]) * 16); | ||
175 | offset[0] += 4; | ||
176 | |||
177 | switch (bytes) { | ||
178 | case 1: | ||
179 | scale_signed = 1.0f / 127.0f; | ||
180 | scale_unsigned = 1.0f / 255.0f; | ||
181 | spe_lqd(p, tmp, shuf_ptr, 1 * 16); | ||
182 | spe_shufb(p, v0, v0, v0, tmp); | ||
183 | spe_shufb(p, v1, v1, v1, tmp); | ||
184 | spe_shufb(p, v2, v2, v2, tmp); | ||
185 | spe_shufb(p, v3, v3, v3, tmp); | ||
186 | break; | ||
187 | case 2: | ||
188 | scale_signed = 1.0f / 32767.0f; | ||
189 | scale_unsigned = 1.0f / 65535.0f; | ||
190 | spe_lqd(p, tmp, shuf_ptr, 2 * 16); | ||
191 | spe_shufb(p, v0, v0, v0, tmp); | ||
192 | spe_shufb(p, v1, v1, v1, tmp); | ||
193 | spe_shufb(p, v2, v2, v2, tmp); | ||
194 | spe_shufb(p, v3, v3, v3, tmp); | ||
195 | break; | ||
196 | case 4: | ||
197 | scale_signed = 1.0f / 2147483647.0f; | ||
198 | scale_unsigned = 1.0f / 4294967295.0f; | ||
199 | break; | ||
200 | default: | ||
201 | assert(0); | ||
202 | break; | ||
203 | } | ||
204 | |||
205 | switch (type) { | ||
206 | case PIPE_FORMAT_TYPE_FLOAT: | ||
207 | break; | ||
208 | case PIPE_FORMAT_TYPE_UNORM: | ||
209 | spe_ilhu(p, tmp, ((unsigned) scale_unsigned) >> 16); | ||
210 | spe_iohl(p, tmp, ((unsigned) scale_unsigned) & 0x0ffff); | ||
211 | spe_cuflt(p, v0, v0, 0); | ||
212 | spe_fm(p, v0, v0, tmp); | ||
213 | break; | ||
214 | case PIPE_FORMAT_TYPE_SNORM: | ||
215 | spe_ilhu(p, tmp, ((unsigned) scale_signed) >> 16); | ||
216 | spe_iohl(p, tmp, ((unsigned) scale_signed) & 0x0ffff); | ||
217 | spe_csflt(p, v0, v0, 0); | ||
218 | spe_fm(p, v0, v0, tmp); | ||
219 | break; | ||
220 | case PIPE_FORMAT_TYPE_USCALED: | ||
221 | spe_cuflt(p, v0, v0, 0); | ||
222 | break; | ||
223 | case PIPE_FORMAT_TYPE_SSCALED: | ||
224 | spe_csflt(p, v0, v0, 0); | ||
225 | break; | ||
226 | } | ||
227 | |||
228 | |||
229 | if (count < 4) { | ||
230 | float_one = spe_allocate_available_register(p); | ||
231 | spe_il(p, float_one, 1); | ||
232 | spe_cuflt(p, float_one, float_one, 0); | ||
233 | |||
234 | if (count < 3) { | ||
235 | float_zero = spe_allocate_available_register(p); | ||
236 | spe_il(p, float_zero, 0); | ||
237 | } | ||
238 | } | ||
239 | |||
240 | spe_release_register(p, tmp); | ||
241 | |||
242 | emit_matrix_transpose(p, v0, v1, v2, v3, out_ptr, shuf_ptr, count); | ||
243 | |||
244 | switch (count) { | ||
245 | case 1: | ||
246 | spe_stqd(p, float_zero, out_ptr, 1 * 16); | ||
247 | case 2: | ||
248 | spe_stqd(p, float_zero, out_ptr, 2 * 16); | ||
249 | case 3: | ||
250 | spe_stqd(p, float_one, out_ptr, 3 * 16); | ||
251 | } | ||
252 | |||
253 | if (float_zero != -1) { | ||
254 | spe_release_register(p, float_zero); | ||
255 | } | ||
256 | |||
257 | if (float_one != -1) { | ||
258 | spe_release_register(p, float_one); | ||
259 | } | ||
260 | } | ||
261 | #endif | ||
262 | |||
263 | |||
264 | void cell_update_vertex_fetch(struct draw_context *draw) | ||
265 | { | ||
266 | #if 0 | ||
267 | struct cell_context *const cell = | ||
268 | (struct cell_context *) draw->driver_private; | ||
269 | struct spe_function *p = &cell->attrib_fetch; | ||
270 | unsigned function_index[PIPE_MAX_ATTRIBS]; | ||
271 | unsigned unique_attr_formats; | ||
272 | int out_ptr; | ||
273 | int in_ptr; | ||
274 | int shuf_ptr; | ||
275 | unsigned i; | ||
276 | unsigned j; | ||
277 | |||
278 | |||
279 | /* Determine how many unique input attribute formats there are. At the | ||
280 | * same time, store the index of the lowest numbered attribute that has | ||
281 | * the same format as any non-unique format. | ||
282 | */ | ||
283 | unique_attr_formats = 1; | ||
284 | function_index[0] = 0; | ||
285 | for (i = 1; i < draw->vertex_fetch.nr_attrs; i++) { | ||
286 | const enum pipe_format curr_fmt = draw->vertex_element[i].src_format; | ||
287 | |||
288 | for (j = 0; j < i; j++) { | ||
289 | if (curr_fmt == draw->vertex_element[j].src_format) { | ||
290 | break; | ||
291 | } | ||
292 | } | ||
293 | |||
294 | if (j == i) { | ||
295 | unique_attr_formats++; | ||
296 | } | ||
297 | |||
298 | function_index[i] = j; | ||
299 | } | ||
300 | |||
301 | |||
302 | /* Each fetch function can be a maximum of 34 instructions (note: this is | ||
303 | * actually a slight over-estimate). | ||
304 | */ | ||
305 | spe_init_func(p, 34 * SPE_INST_SIZE * unique_attr_formats); | ||
306 | |||
307 | |||
308 | /* Allocate registers for the function's input parameters. | ||
309 | */ | ||
310 | out_ptr = spe_allocate_register(p, 3); | ||
311 | in_ptr = spe_allocate_register(p, 4); | ||
312 | shuf_ptr = spe_allocate_register(p, 5); | ||
313 | |||
314 | |||
315 | /* Generate code for the individual attribute fetch functions. | ||
316 | */ | ||
317 | for (i = 0; i < draw->vertex_fetch.nr_attrs; i++) { | ||
318 | unsigned offset; | ||
319 | |||
320 | if (function_index[i] == i) { | ||
321 | cell->attrib_fetch_offsets[i] = (unsigned) ((void *) p->csr | ||
322 | - (void *) p->store); | ||
323 | |||
324 | offset = 0; | ||
325 | emit_fetch(p, in_ptr, &offset, out_ptr, shuf_ptr, | ||
326 | draw->vertex_element[i].src_format); | ||
327 | spe_bi(p, 0, 0, 0); | ||
328 | |||
329 | /* Round up to the next 16-byte boundary. | ||
330 | */ | ||
331 | if ((((unsigned) p->store) & 0x0f) != 0) { | ||
332 | const unsigned align = ((unsigned) p->store) & 0x0f; | ||
333 | p->store = (uint32_t *) (((void *) p->store) + align); | ||
334 | } | ||
335 | } else { | ||
336 | /* Use the same function entry-point as a previously seen attribute | ||
337 | * with the same format. | ||
338 | */ | ||
339 | cell->attrib_fetch_offsets[i] = | ||
340 | cell->attrib_fetch_offsets[function_index[i]]; | ||
341 | } | ||
342 | } | ||
343 | #else | ||
344 | assert(0); | ||
345 | #endif | ||
346 | } | ||
diff --git a/src/gallium/drivers/cell/ppu/cell_vertex_shader.c b/src/gallium/drivers/cell/ppu/cell_vertex_shader.c deleted file mode 100644 index 3d389d6ea36..00000000000 --- a/src/gallium/drivers/cell/ppu/cell_vertex_shader.c +++ /dev/null | |||
@@ -1,145 +0,0 @@ | |||
1 | /* | ||
2 | * (C) Copyright IBM Corporation 2008 | ||
3 | * All Rights Reserved. | ||
4 | * | ||
5 | * Permission is hereby granted, free of charge, to any person obtaining a | ||
6 | * copy of this software and associated documentation files (the "Software"), | ||
7 | * to deal in the Software without restriction, including without limitation | ||
8 | * on the rights to use, copy, modify, merge, publish, distribute, sub | ||
9 | * license, and/or sell copies of the Software, and to permit persons to whom | ||
10 | * the Software is furnished to do so, subject to the following conditions: | ||
11 | * | ||
12 | * The above copyright notice and this permission notice (including the next | ||
13 | * paragraph) shall be included in all copies or substantial portions of the | ||
14 | * Software. | ||
15 | * | ||
16 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR | ||
17 | * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, | ||
18 | * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL | ||
19 | * AUTHORS, COPYRIGHT HOLDERS, AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM, | ||
20 | * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR | ||
21 | * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE | ||
22 | * USE OR OTHER DEALINGS IN THE SOFTWARE. | ||
23 | */ | ||
24 | |||
25 | /** | ||
26 | * \file cell_vertex_shader.c | ||
27 | * Vertex shader interface routines for Cell. | ||
28 | * | ||
29 | * \author Ian Romanick <idr@us.ibm.com> | ||
30 | */ | ||
31 | |||
32 | #include "pipe/p_defines.h" | ||
33 | #include "pipe/p_context.h" | ||
34 | #include "util/u_math.h" | ||
35 | |||
36 | #include "cell_context.h" | ||
37 | #include "cell_draw_arrays.h" | ||
38 | #include "cell_flush.h" | ||
39 | #include "cell_spu.h" | ||
40 | #include "cell_batch.h" | ||
41 | |||
42 | #include "cell/common.h" | ||
43 | #include "draw/draw_context.h" | ||
44 | #include "draw/draw_private.h" | ||
45 | |||
46 | /** | ||
47 | * Run the vertex shader on all vertices in the vertex queue. | ||
48 | * Called by the draw module when the vertx cache needs to be flushed. | ||
49 | */ | ||
50 | void | ||
51 | cell_vertex_shader_queue_flush(struct draw_context *draw) | ||
52 | { | ||
53 | #if 0 | ||
54 | struct cell_context *const cell = | ||
55 | (struct cell_context *) draw->driver_private; | ||
56 | struct cell_command_vs *const vs = &cell_global.command[0].vs; | ||
57 | uint64_t *batch; | ||
58 | struct cell_array_info *array_info; | ||
59 | unsigned i, j; | ||
60 | struct cell_attribute_fetch_code *cf; | ||
61 | |||
62 | assert(draw->vs.queue_nr != 0); | ||
63 | |||
64 | /* XXX: do this on statechange: | ||
65 | */ | ||
66 | draw_update_vertex_fetch(draw); | ||
67 | cell_update_vertex_fetch(draw); | ||
68 | |||
69 | |||
70 | batch = cell_batch_alloc(cell, sizeof(batch[0]) + sizeof(*cf)); | ||
71 | batch[0] = CELL_CMD_STATE_ATTRIB_FETCH; | ||
72 | cf = (struct cell_attribute_fetch_code *) (&batch[1]); | ||
73 | cf->base = (uint64_t) cell->attrib_fetch.store; | ||
74 | cf->size = ROUNDUP16((unsigned)((void *) cell->attrib_fetch.csr | ||
75 | - (void *) cell->attrib_fetch.store)); | ||
76 | |||
77 | |||
78 | for (i = 0; i < draw->vertex_fetch.nr_attrs; i++) { | ||
79 | const enum pipe_format format = draw->vertex_element[i].src_format; | ||
80 | const unsigned count = ((pf_size_x(format) != 0) | ||
81 | + (pf_size_y(format) != 0) | ||
82 | + (pf_size_z(format) != 0) | ||
83 | + (pf_size_w(format) != 0)); | ||
84 | const unsigned size = pf_size_x(format) * count; | ||
85 | |||
86 | batch = cell_batch_alloc(cell, sizeof(batch[0]) + sizeof(*array_info)); | ||
87 | |||
88 | batch[0] = CELL_CMD_STATE_VS_ARRAY_INFO; | ||
89 | |||
90 | array_info = (struct cell_array_info *) &batch[1]; | ||
91 | assert(draw->vertex_fetch.src_ptr[i] != NULL); | ||
92 | array_info->base = (uintptr_t) draw->vertex_fetch.src_ptr[i]; | ||
93 | array_info->attr = i; | ||
94 | array_info->pitch = draw->vertex_fetch.pitch[i]; | ||
95 | array_info->size = size; | ||
96 | array_info->function_offset = cell->attrib_fetch_offsets[i]; | ||
97 | } | ||
98 | |||
99 | batch = cell_batch_alloc(cell, sizeof(batch[0]) | ||
100 | + sizeof(struct pipe_viewport_state)); | ||
101 | batch[0] = CELL_CMD_STATE_VIEWPORT; | ||
102 | (void) memcpy(&batch[1], &draw->viewport, | ||
103 | sizeof(struct pipe_viewport_state)); | ||
104 | |||
105 | { | ||
106 | uint64_t uniforms = (uintptr_t) draw->user.constants; | ||
107 | |||
108 | batch = cell_batch_alloc(cell, 2 *sizeof(batch[0])); | ||
109 | batch[0] = CELL_CMD_STATE_UNIFORMS; | ||
110 | batch[1] = uniforms; | ||
111 | } | ||
112 | |||
113 | cell_batch_flush(cell); | ||
114 | |||
115 | vs->opcode = CELL_CMD_VS_EXECUTE; | ||
116 | vs->nr_attrs = draw->vertex_fetch.nr_attrs; | ||
117 | |||
118 | (void) memcpy(vs->plane, draw->plane, sizeof(draw->plane)); | ||
119 | vs->nr_planes = draw->nr_planes; | ||
120 | |||
121 | for (i = 0; i < draw->vs.queue_nr; i += SPU_VERTS_PER_BATCH) { | ||
122 | const unsigned n = MIN2(SPU_VERTS_PER_BATCH, draw->vs.queue_nr - i); | ||
123 | |||
124 | for (j = 0; j < n; j++) { | ||
125 | vs->elts[j] = draw->vs.queue[i + j].elt; | ||
126 | vs->vOut[j] = (uintptr_t) draw->vs.queue[i + j].vertex; | ||
127 | } | ||
128 | |||
129 | for (/* empty */; j < SPU_VERTS_PER_BATCH; j++) { | ||
130 | vs->elts[j] = vs->elts[0]; | ||
131 | vs->vOut[j] = (uintptr_t) draw->vs.queue[i + j].vertex; | ||
132 | } | ||
133 | |||
134 | vs->num_elts = n; | ||
135 | send_mbox_message(cell_global.spe_contexts[0], CELL_CMD_VS_EXECUTE); | ||
136 | |||
137 | cell_flush_int(cell, CELL_FLUSH_WAIT); | ||
138 | } | ||
139 | |||
140 | draw->vs.post_nr = draw->vs.queue_nr; | ||
141 | draw->vs.queue_nr = 0; | ||
142 | #else | ||
143 | assert(0); | ||
144 | #endif | ||
145 | } | ||
diff --git a/src/gallium/drivers/cell/spu/.gitignore b/src/gallium/drivers/cell/spu/.gitignore deleted file mode 100644 index 2be9a2d3242..00000000000 --- a/src/gallium/drivers/cell/spu/.gitignore +++ /dev/null | |||
@@ -1 +0,0 @@ | |||
1 | g3d_spu | ||
diff --git a/src/gallium/drivers/cell/spu/Makefile b/src/gallium/drivers/cell/spu/Makefile deleted file mode 100644 index 3cc52301da2..00000000000 --- a/src/gallium/drivers/cell/spu/Makefile +++ /dev/null | |||
@@ -1,83 +0,0 @@ | |||
1 | # Gallium3D Cell driver: SPU code | ||
2 | |||
3 | # This makefile builds the g3d_spu.a file that's linked into the | ||
4 | # PPU code/library. | ||
5 | |||
6 | |||
7 | TOP = ../../../../.. | ||
8 | include $(TOP)/configs/current | ||
9 | |||
10 | |||
11 | PROG = g3d | ||
12 | |||
13 | PROG_SPU = $(PROG)_spu | ||
14 | PROG_SPU_A = $(PROG)_spu.a | ||
15 | PROG_SPU_EMBED_O = $(PROG)_spu-embed.o | ||
16 | |||
17 | |||
18 | SOURCES = \ | ||
19 | spu_command.c \ | ||
20 | spu_dcache.c \ | ||
21 | spu_funcs.c \ | ||
22 | spu_main.c \ | ||
23 | spu_per_fragment_op.c \ | ||
24 | spu_render.c \ | ||
25 | spu_texture.c \ | ||
26 | spu_tile.c \ | ||
27 | spu_tri.c | ||
28 | |||
29 | OLD_SOURCES = \ | ||
30 | spu_exec.c \ | ||
31 | spu_util.c \ | ||
32 | spu_vertex_fetch.c \ | ||
33 | spu_vertex_shader.c | ||
34 | |||
35 | |||
36 | SPU_OBJECTS = $(SOURCES:.c=.o) | ||
37 | |||
38 | SPU_ASM_OUT = $(SOURCES:.c=.s) | ||
39 | |||
40 | |||
41 | INCLUDE_DIRS = \ | ||
42 | -I$(TOP)/src/mesa \ | ||
43 | -I$(TOP)/src/gallium/include \ | ||
44 | -I$(TOP)/src/gallium/auxiliary \ | ||
45 | -I$(TOP)/src/gallium/drivers | ||
46 | |||
47 | |||
48 | .c.o: | ||
49 | $(SPU_CC) $(SPU_CFLAGS) -c $< | ||
50 | |||
51 | .c.s: | ||
52 | $(SPU_CC) $(SPU_CFLAGS) -O3 -S $< | ||
53 | |||
54 | |||
55 | # The .a file will be linked into the main/PPU executable | ||
56 | default: $(PROG_SPU_A) | ||
57 | |||
58 | $(PROG_SPU_A): $(PROG_SPU_EMBED_O) | ||
59 | $(SPU_AR) $(SPU_AR_FLAGS) $(PROG_SPU_A) $(PROG_SPU_EMBED_O) | ||
60 | |||
61 | $(PROG_SPU_EMBED_O): $(PROG_SPU) | ||
62 | $(SPU_EMBED) $(SPU_EMBED_FLAGS) $(PROG_SPU) $(PROG_SPU) $(PROG_SPU_EMBED_O) | ||
63 | |||
64 | $(PROG_SPU): $(SPU_OBJECTS) | ||
65 | $(SPU_CC) -o $(PROG_SPU) $(SPU_OBJECTS) $(SPU_LFLAGS) | ||
66 | |||
67 | |||
68 | |||
69 | asmfiles: $(SPU_ASM_OUT) | ||
70 | |||
71 | |||
72 | clean: | ||
73 | rm -f *~ *.o *.a *.d *.s $(PROG_SPU) | ||
74 | |||
75 | |||
76 | |||
77 | depend: $(SOURCES) | ||
78 | rm -f depend | ||
79 | touch depend | ||
80 | $(MKDEP) $(MKDEP_OPTIONS) $(INCLUDE_DIRS) $(SOURCES) 2> /dev/null | ||
81 | |||
82 | include depend | ||
83 | |||
diff --git a/src/gallium/drivers/cell/spu/spu_colorpack.h b/src/gallium/drivers/cell/spu/spu_colorpack.h deleted file mode 100644 index d7ce0055248..00000000000 --- a/src/gallium/drivers/cell/spu/spu_colorpack.h +++ /dev/null | |||
@@ -1,145 +0,0 @@ | |||
1 | /************************************************************************** | ||
2 | * | ||
3 | * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas. | ||
4 | * All Rights Reserved. | ||
5 | * | ||
6 | * Permission is hereby granted, free of charge, to any person obtaining a | ||
7 | * copy of this software and associated documentation files (the | ||
8 | * "Software"), to deal in the Software without restriction, including | ||
9 | * without limitation the rights to use, copy, modify, merge, publish, | ||
10 | * distribute, sub license, and/or sell copies of the Software, and to | ||
11 | * permit persons to whom the Software is furnished to do so, subject to | ||
12 | * the following conditions: | ||
13 | * | ||
14 | * The above copyright notice and this permission notice (including the | ||
15 | * next paragraph) shall be included in all copies or substantial portions | ||
16 | * of the Software. | ||
17 | * | ||
18 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS | ||
19 | * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF | ||
20 | * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. | ||
21 | * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR | ||
22 | * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, | ||
23 | * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE | ||
24 | * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. | ||
25 | * | ||
26 | **************************************************************************/ | ||
27 | |||
28 | |||
29 | |||
30 | #ifndef SPU_COLORPACK_H | ||
31 | #define SPU_COLORPACK_H | ||
32 | |||
33 | |||
34 | #include <transpose_matrix4x4.h> | ||
35 | #include <spu_intrinsics.h> | ||
36 | |||
37 | |||
38 | static INLINE unsigned int | ||
39 | spu_pack_R8G8B8A8(vector float rgba) | ||
40 | { | ||
41 | vector unsigned int out = spu_convtu(rgba, 32); | ||
42 | |||
43 | out = spu_shuffle(out, out, ((vector unsigned char) { | ||
44 | 0, 4, 8, 12, 0, 0, 0, 0, | ||
45 | 0, 0, 0, 0, 0, 0, 0, 0 }) ); | ||
46 | |||
47 | return spu_extract(out, 0); | ||
48 | } | ||
49 | |||
50 | |||
51 | static INLINE unsigned int | ||
52 | spu_pack_A8R8G8B8(vector float rgba) | ||
53 | { | ||
54 | vector unsigned int out = spu_convtu(rgba, 32); | ||
55 | out = spu_shuffle(out, out, ((vector unsigned char) { | ||
56 | 12, 0, 4, 8, 0, 0, 0, 0, | ||
57 | 0, 0, 0, 0, 0, 0, 0, 0}) ); | ||
58 | return spu_extract(out, 0); | ||
59 | } | ||
60 | |||
61 | |||
62 | static INLINE unsigned int | ||
63 | spu_pack_B8G8R8A8(vector float rgba) | ||
64 | { | ||
65 | vector unsigned int out = spu_convtu(rgba, 32); | ||
66 | out = spu_shuffle(out, out, ((vector unsigned char) { | ||
67 | 8, 4, 0, 12, 0, 0, 0, 0, | ||
68 | 0, 0, 0, 0, 0, 0, 0, 0}) ); | ||
69 | return spu_extract(out, 0); | ||
70 | } | ||
71 | |||
72 | |||
73 | static INLINE unsigned int | ||
74 | spu_pack_color_shuffle(vector float rgba, vector unsigned char shuffle) | ||
75 | { | ||
76 | vector unsigned int out = spu_convtu(rgba, 32); | ||
77 | out = spu_shuffle(out, out, shuffle); | ||
78 | return spu_extract(out, 0); | ||
79 | } | ||
80 | |||
81 | |||
82 | static INLINE vector float | ||
83 | spu_unpack_B8G8R8A8(uint color) | ||
84 | { | ||
85 | vector unsigned int color_u4 = spu_splats(color); | ||
86 | color_u4 = spu_shuffle(color_u4, color_u4, | ||
87 | ((vector unsigned char) { | ||
88 | 2, 2, 2, 2, | ||
89 | 1, 1, 1, 1, | ||
90 | 0, 0, 0, 0, | ||
91 | 3, 3, 3, 3}) ); | ||
92 | return spu_convtf(color_u4, 32); | ||
93 | } | ||
94 | |||
95 | |||
96 | static INLINE vector float | ||
97 | spu_unpack_A8R8G8B8(uint color) | ||
98 | { | ||
99 | vector unsigned int color_u4 = spu_splats(color); | ||
100 | color_u4 = spu_shuffle(color_u4, color_u4, | ||
101 | ((vector unsigned char) { | ||
102 | 1, 1, 1, 1, | ||
103 | 2, 2, 2, 2, | ||
104 | 3, 3, 3, 3, | ||
105 | 0, 0, 0, 0}) ); | ||
106 | return spu_convtf(color_u4, 32); | ||
107 | } | ||
108 | |||
109 | |||
110 | /** | ||
111 | * \param color_in - array of 32-bit packed ARGB colors | ||
112 | * \param color_out - returns float colors in RRRR, GGGG, BBBB, AAAA order | ||
113 | */ | ||
114 | static INLINE void | ||
115 | spu_unpack_A8R8G8B8_transpose4(const vector unsigned int color_in[4], | ||
116 | vector float color_out[4]) | ||
117 | { | ||
118 | vector unsigned int c0; | ||
119 | |||
120 | c0 = spu_shuffle(color_in[0], color_in[0], | ||
121 | ((vector unsigned char) { | ||
122 | 1, 1, 1, 1, 2, 2, 2, 2, 3, 3, 3, 3, 0, 0, 0, 0}) ); | ||
123 | color_out[0] = spu_convtf(c0, 32); | ||
124 | |||
125 | c0 = spu_shuffle(color_in[1], color_in[1], | ||
126 | ((vector unsigned char) { | ||
127 | 1, 1, 1, 1, 2, 2, 2, 2, 3, 3, 3, 3, 0, 0, 0, 0}) ); | ||
128 | color_out[1] = spu_convtf(c0, 32); | ||
129 | |||
130 | c0 = spu_shuffle(color_in[2], color_in[2], | ||
131 | ((vector unsigned char) { | ||
132 | 1, 1, 1, 1, 2, 2, 2, 2, 3, 3, 3, 3, 0, 0, 0, 0}) ); | ||
133 | color_out[2] = spu_convtf(c0, 32); | ||
134 | |||
135 | c0 = spu_shuffle(color_in[3], color_in[3], | ||
136 | ((vector unsigned char) { | ||
137 | 1, 1, 1, 1, 2, 2, 2, 2, 3, 3, 3, 3, 0, 0, 0, 0}) ); | ||
138 | color_out[3] = spu_convtf(c0, 32); | ||
139 | |||
140 | _transpose_matrix4x4(color_out, color_out); | ||
141 | } | ||
142 | |||
143 | |||
144 | |||
145 | #endif /* SPU_COLORPACK_H */ | ||
diff --git a/src/gallium/drivers/cell/spu/spu_command.c b/src/gallium/drivers/cell/spu/spu_command.c deleted file mode 100644 index 6f8ba9562d2..00000000000 --- a/src/gallium/drivers/cell/spu/spu_command.c +++ /dev/null | |||
@@ -1,810 +0,0 @@ | |||
1 | /************************************************************************** | ||
2 | * | ||
3 | * Copyright 2008 Tungsten Graphics, Inc., Cedar Park, Texas. | ||
4 | * All Rights Reserved. | ||
5 | * | ||
6 | * Permission is hereby granted, free of charge, to any person obtaining a | ||
7 | * copy of this software and associated documentation files (the | ||
8 | * "Software"), to deal in the Software without restriction, including | ||
9 | * without limitation the rights to use, copy, modify, merge, publish, | ||
10 | * distribute, sub license, and/or sell copies of the Software, and to | ||
11 | * permit persons to whom the Software is furnished to do so, subject to | ||
12 | * the following conditions: | ||
13 | * | ||
14 | * The above copyright notice and this permission notice (including the | ||
15 | * next paragraph) shall be included in all copies or substantial portions | ||
16 | * of the Software. | ||
17 | * | ||
18 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS | ||
19 | * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF | ||
20 | * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. | ||
21 | * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR | ||
22 | * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, | ||
23 | * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE | ||
24 | * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. | ||
25 | * | ||
26 | **************************************************************************/ | ||
27 | |||
28 | |||
29 | /** | ||
30 | * SPU command processing code | ||
31 | */ | ||
32 | |||
33 | |||
34 | #include <stdio.h> | ||
35 | #include <libmisc.h> | ||
36 | |||
37 | #include "pipe/p_defines.h" | ||
38 | |||
39 | #include "spu_command.h" | ||
40 | #include "spu_main.h" | ||
41 | #include "spu_render.h" | ||
42 | #include "spu_per_fragment_op.h" | ||
43 | #include "spu_texture.h" | ||
44 | #include "spu_tile.h" | ||
45 | #include "spu_vertex_shader.h" | ||
46 | #include "spu_dcache.h" | ||
47 | #include "cell/common.h" | ||
48 | |||
49 | |||
50 | struct spu_vs_context draw; | ||
51 | |||
52 | |||
53 | /** | ||
54 | * Buffers containing dynamically generated SPU code: | ||
55 | */ | ||
56 | PIPE_ALIGN_VAR(16) static unsigned char attribute_fetch_code_buffer[136 * PIPE_MAX_ATTRIBS]; | ||
57 | |||
58 | |||
59 | |||
60 | static INLINE int | ||
61 | align(int value, int alignment) | ||
62 | { | ||
63 | return (value + alignment - 1) & ~(alignment - 1); | ||
64 | } | ||
65 | |||
66 | |||
67 | |||
68 | /** | ||
69 | * Tell the PPU that this SPU has finished copying a buffer to | ||
70 | * local store and that it may be reused by the PPU. | ||
71 | * This is done by writting a 16-byte batch-buffer-status block back into | ||
72 | * main memory (in cell_context->buffer_status[]). | ||
73 | */ | ||
74 | static void | ||
75 | release_buffer(uint buffer) | ||
76 | { | ||
77 | /* Evidently, using less than a 16-byte status doesn't work reliably */ | ||
78 | static const vector unsigned int status = {CELL_BUFFER_STATUS_FREE, | ||
79 | CELL_BUFFER_STATUS_FREE, | ||
80 | CELL_BUFFER_STATUS_FREE, | ||
81 | CELL_BUFFER_STATUS_FREE}; | ||
82 | const uint index = 4 * (spu.init.id * CELL_NUM_BUFFERS + buffer); | ||
83 | uint *dst = spu.init.buffer_status + index; | ||
84 | |||
85 | ASSERT(buffer < CELL_NUM_BUFFERS); | ||
86 | |||
87 | mfc_put((void *) &status, /* src in local memory */ | ||
88 | (unsigned int) dst, /* dst in main memory */ | ||
89 | sizeof(status), /* size */ | ||
90 | TAG_MISC, /* tag is unimportant */ | ||
91 | 0, /* tid */ | ||
92 | 0 /* rid */); | ||
93 | } | ||
94 | |||
95 | |||
96 | /** | ||
97 | * Write CELL_FENCE_SIGNALLED back to the fence status qword in main memory. | ||
98 | * There's a qword of status per SPU. | ||
99 | */ | ||
100 | static void | ||
101 | cmd_fence(struct cell_command_fence *fence_cmd) | ||
102 | { | ||
103 | static const vector unsigned int status = {CELL_FENCE_SIGNALLED, | ||
104 | CELL_FENCE_SIGNALLED, | ||
105 | CELL_FENCE_SIGNALLED, | ||
106 | CELL_FENCE_SIGNALLED}; | ||
107 | uint *dst = (uint *) fence_cmd->fence; | ||
108 | dst += 4 * spu.init.id; /* main store/memory address, not local store */ | ||
109 | ASSERT_ALIGN16(dst); | ||
110 | mfc_put((void *) &status, /* src in local memory */ | ||
111 | (unsigned int) dst, /* dst in main memory */ | ||
112 | sizeof(status), /* size */ | ||
113 | TAG_FENCE, /* tag */ | ||
114 | 0, /* tid */ | ||
115 | 0 /* rid */); | ||
116 | } | ||
117 | |||
118 | |||
119 | static void | ||
120 | cmd_clear_surface(const struct cell_command_clear_surface *clear) | ||
121 | { | ||
122 | D_PRINTF(CELL_DEBUG_CMD, "CLEAR SURF %u to 0x%08x\n", clear->surface, clear->value); | ||
123 | |||
124 | if (clear->surface == 0) { | ||
125 | spu.fb.color_clear_value = clear->value; | ||
126 | if (spu.init.debug_flags & CELL_DEBUG_CHECKER) { | ||
127 | uint x = (spu.init.id << 4) | (spu.init.id << 12) | | ||
128 | (spu.init.id << 20) | (spu.init.id << 28); | ||
129 | spu.fb.color_clear_value ^= x; | ||
130 | } | ||
131 | } | ||
132 | else { | ||
133 | spu.fb.depth_clear_value = clear->value; | ||
134 | } | ||
135 | |||
136 | #define CLEAR_OPT 1 | ||
137 | #if CLEAR_OPT | ||
138 | |||
139 | /* Simply set all tiles' status to CLEAR. | ||
140 | * When we actually begin rendering into a tile, we'll initialize it to | ||
141 | * the clear value. If any tiles go untouched during the frame, | ||
142 | * really_clear_tiles() will set them to the clear value. | ||
143 | */ | ||
144 | if (clear->surface == 0) { | ||
145 | memset(spu.ctile_status, TILE_STATUS_CLEAR, sizeof(spu.ctile_status)); | ||
146 | } | ||
147 | else { | ||
148 | memset(spu.ztile_status, TILE_STATUS_CLEAR, sizeof(spu.ztile_status)); | ||
149 | } | ||
150 | |||
151 | #else | ||
152 | |||
153 | /* | ||
154 | * This path clears the whole framebuffer to the clear color right now. | ||
155 | */ | ||
156 | |||
157 | /* | ||
158 | printf("SPU: %s num=%d w=%d h=%d\n", | ||
159 | __FUNCTION__, num_tiles, spu.fb.width_tiles, spu.fb.height_tiles); | ||
160 | */ | ||
161 | |||
162 | /* init a single tile to the clear value */ | ||
163 | if (clear->surface == 0) { | ||
164 | clear_c_tile(&spu.ctile); | ||
165 | } | ||
166 | else { | ||
167 | clear_z_tile(&spu.ztile); | ||
168 | } | ||
169 | |||
170 | /* walk over my tiles, writing the 'clear' tile's data */ | ||
171 | { | ||
172 | const uint num_tiles = spu.fb.width_tiles * spu.fb.height_tiles; | ||
173 | uint i; | ||
174 | for (i = spu.init.id; i < num_tiles; i += spu.init.num_spus) { | ||
175 | uint tx = i % spu.fb.width_tiles; | ||
176 | uint ty = i / spu.fb.width_tiles; | ||
177 | if (clear->surface == 0) | ||
178 | put_tile(tx, ty, &spu.ctile, TAG_SURFACE_CLEAR, 0); | ||
179 | else | ||
180 | put_tile(tx, ty, &spu.ztile, TAG_SURFACE_CLEAR, 1); | ||
181 | } | ||
182 | } | ||
183 | |||
184 | if (spu.init.debug_flags & CELL_DEBUG_SYNC) { | ||
185 | wait_on_mask(1 << TAG_SURFACE_CLEAR); | ||
186 | } | ||
187 | |||
188 | #endif /* CLEAR_OPT */ | ||
189 | |||
190 | D_PRINTF(CELL_DEBUG_CMD, "CLEAR SURF done\n"); | ||
191 | } | ||
192 | |||
193 | |||
194 | static void | ||
195 | cmd_release_verts(const struct cell_command_release_verts *release) | ||
196 | { | ||
197 | D_PRINTF(CELL_DEBUG_CMD, "RELEASE VERTS %u\n", release->vertex_buf); | ||
198 | ASSERT(release->vertex_buf != ~0U); | ||
199 | release_buffer(release->vertex_buf); | ||
200 | } | ||
201 | |||
202 | |||
203 | /** | ||
204 | * Process a CELL_CMD_STATE_FRAGMENT_OPS command. | ||
205 | * This involves installing new fragment ops SPU code. | ||
206 | * If this function is never called, we'll use a regular C fallback function | ||
207 | * for fragment processing. | ||
208 | */ | ||
209 | static void | ||
210 | cmd_state_fragment_ops(const struct cell_command_fragment_ops *fops) | ||
211 | { | ||
212 | D_PRINTF(CELL_DEBUG_CMD, "CMD_STATE_FRAGMENT_OPS\n"); | ||
213 | |||
214 | /* Copy state info (for fallback case only - this will eventually | ||
215 | * go away when the fallback case goes away) | ||
216 | */ | ||
217 | memcpy(&spu.depth_stencil_alpha, &fops->dsa, sizeof(fops->dsa)); | ||
218 | memcpy(&spu.blend, &fops->blend, sizeof(fops->blend)); | ||
219 | memcpy(&spu.blend_color, &fops->blend_color, sizeof(fops->blend_color)); | ||
220 | |||
221 | /* Make sure the SPU knows which buffers it's expected to read when | ||
222 | * it's told to pull tiles. | ||
223 | */ | ||
224 | spu.read_depth_stencil = (spu.depth_stencil_alpha.depth.enabled || spu.depth_stencil_alpha.stencil[0].enabled); | ||
225 | |||
226 | /* If we're forcing the fallback code to be used (for debug purposes), | ||
227 | * install that. Otherwise install the incoming SPU code. | ||
228 | */ | ||
229 | if ((spu.init.debug_flags & CELL_DEBUG_FRAGMENT_OP_FALLBACK) != 0) { | ||
230 | static unsigned int warned = 0; | ||
231 | if (!warned) { | ||
232 | fprintf(stderr, "Cell Warning: using fallback per-fragment code\n"); | ||
233 | warned = 1; | ||
234 | } | ||
235 | /* The following two lines aren't really necessary if you | ||
236 | * know the debug flags won't change during a run, and if you | ||
237 | * know that the function pointers are initialized correctly. | ||
238 | * We set them here to allow a person to change the debug | ||
239 | * flags during a run (from inside a debugger). | ||
240 | */ | ||
241 | spu.fragment_ops[CELL_FACING_FRONT] = spu_fallback_fragment_ops; | ||
242 | spu.fragment_ops[CELL_FACING_BACK] = spu_fallback_fragment_ops; | ||
243 | return; | ||
244 | } | ||
245 | |||
246 | /* Make sure the SPU code buffer is large enough to hold the incoming code. | ||
247 | * Note that we *don't* use align_malloc() and align_free(), because | ||
248 | * those utility functions are *not* available in SPU code. | ||
249 | * */ | ||
250 | if (spu.fragment_ops_code_size < fops->total_code_size) { | ||
251 | if (spu.fragment_ops_code != NULL) { | ||
252 | free(spu.fragment_ops_code); | ||
253 | } | ||
254 | spu.fragment_ops_code_size = fops->total_code_size; | ||
255 | spu.fragment_ops_code = malloc(fops->total_code_size); | ||
256 | if (spu.fragment_ops_code == NULL) { | ||
257 | /* Whoops. */ | ||
258 | fprintf(stderr, "CELL Warning: failed to allocate fragment ops code (%d bytes) - using fallback\n", fops->total_code_size); | ||
259 | spu.fragment_ops_code = NULL; | ||
260 | spu.fragment_ops_code_size = 0; | ||
261 | spu.fragment_ops[CELL_FACING_FRONT] = spu_fallback_fragment_ops; | ||
262 | spu.fragment_ops[CELL_FACING_BACK] = spu_fallback_fragment_ops; | ||
263 | return; | ||
264 | } | ||
265 | } | ||
266 | |||
267 | /* Copy the SPU code from the command buffer to the spu buffer */ | ||
268 | memcpy(spu.fragment_ops_code, fops->code, fops->total_code_size); | ||
269 | |||
270 | /* Set the pointers for the front-facing and back-facing fragments | ||
271 | * to the specified offsets within the code. Note that if the | ||
272 | * front-facing and back-facing code are the same, they'll have | ||
273 | * the same offset. | ||
274 | */ | ||
275 | spu.fragment_ops[CELL_FACING_FRONT] = (spu_fragment_ops_func) &spu.fragment_ops_code[fops->front_code_index]; | ||
276 | spu.fragment_ops[CELL_FACING_BACK] = (spu_fragment_ops_func) &spu.fragment_ops_code[fops->back_code_index]; | ||
277 | } | ||
278 | |||
279 | static void | ||
280 | cmd_state_fragment_program(const struct cell_command_fragment_program *fp) | ||
281 | { | ||
282 | D_PRINTF(CELL_DEBUG_CMD, "CMD_STATE_FRAGMENT_PROGRAM\n"); | ||
283 | /* Copy SPU code from batch buffer to spu buffer */ | ||
284 | memcpy(spu.fragment_program_code, fp->code, | ||
285 | SPU_MAX_FRAGMENT_PROGRAM_INSTS * 4); | ||
286 | #if 01 | ||
287 | /* Point function pointer at new code */ | ||
288 | spu.fragment_program = (spu_fragment_program_func)spu.fragment_program_code; | ||
289 | #endif | ||
290 | } | ||
291 | |||
292 | |||
293 | static uint | ||
294 | cmd_state_fs_constants(const qword *buffer, uint pos) | ||
295 | { | ||
296 | const uint num_const = spu_extract((vector unsigned int)buffer[pos+1], 0); | ||
297 | const float *constants = (const float *) &buffer[pos+2]; | ||
298 | uint i; | ||
299 | |||
300 | D_PRINTF(CELL_DEBUG_CMD, "CMD_STATE_FS_CONSTANTS (%u)\n", num_const); | ||
301 | |||
302 | /* Expand each float to float[4] for SOA execution */ | ||
303 | for (i = 0; i < num_const; i++) { | ||
304 | D_PRINTF(CELL_DEBUG_CMD, " const[%u] = %f\n", i, constants[i]); | ||
305 | spu.constants[i] = spu_splats(constants[i]); | ||
306 | } | ||
307 | |||
308 | /* return new buffer pos (in 16-byte words) */ | ||
309 | return pos + 2 + (ROUNDUP16(num_const * sizeof(float)) / 16); | ||
310 | } | ||
311 | |||
312 | |||
313 | static void | ||
314 | cmd_state_framebuffer(const struct cell_command_framebuffer *cmd) | ||
315 | { | ||
316 | D_PRINTF(CELL_DEBUG_CMD, "FRAMEBUFFER: %d x %d at %p, cformat 0x%x zformat 0x%x\n", | ||
317 | cmd->width, | ||
318 | cmd->height, | ||
319 | cmd->color_start, | ||
320 | cmd->color_format, | ||
321 | cmd->depth_format); | ||
322 | |||
323 | ASSERT_ALIGN16(cmd->color_start); | ||
324 | ASSERT_ALIGN16(cmd->depth_start); | ||
325 | |||
326 | spu.fb.color_start = cmd->color_start; | ||
327 | spu.fb.depth_start = cmd->depth_start; | ||
328 | spu.fb.color_format = cmd->color_format; | ||
329 | spu.fb.depth_format = cmd->depth_format; | ||
330 | spu.fb.width = cmd->width; | ||
331 | spu.fb.height = cmd->height; | ||
332 | spu.fb.width_tiles = (spu.fb.width + TILE_SIZE - 1) / TILE_SIZE; | ||
333 | spu.fb.height_tiles = (spu.fb.height + TILE_SIZE - 1) / TILE_SIZE; | ||
334 | |||
335 | switch (spu.fb.depth_format) { | ||
336 | case PIPE_FORMAT_Z32_UNORM: | ||
337 | spu.fb.zsize = 4; | ||
338 | spu.fb.zscale = (float) 0xffffffffu; | ||
339 | break; | ||
340 | case PIPE_FORMAT_S8_UINT_Z24_UNORM: | ||
341 | case PIPE_FORMAT_Z24_UNORM_S8_UINT: | ||
342 | case PIPE_FORMAT_X8Z24_UNORM: | ||
343 | case PIPE_FORMAT_Z24X8_UNORM: | ||
344 | spu.fb.zsize = 4; | ||
345 | spu.fb.zscale = (float) 0x00ffffffu; | ||
346 | break; | ||
347 | case PIPE_FORMAT_Z16_UNORM: | ||
348 | spu.fb.zsize = 2; | ||
349 | spu.fb.zscale = (float) 0xffffu; | ||
350 | break; | ||
351 | default: | ||
352 | spu.fb.zsize = 0; | ||
353 | break; | ||
354 | } | ||
355 | } | ||
356 | |||
357 | |||
358 | /** | ||
359 | * Tex texture mask_s/t and scale_s/t fields depend on the texture size and | ||
360 | * sampler wrap modes. | ||
361 | */ | ||
362 | static void | ||
363 | update_tex_masks(struct spu_texture *texture, | ||
364 | const struct pipe_sampler_state *sampler) | ||
365 | { | ||
366 | uint i; | ||
367 | |||
368 | for (i = 0; i < CELL_MAX_TEXTURE_LEVELS; i++) { | ||
369 | int width = texture->level[i].width; | ||
370 | int height = texture->level[i].height; | ||
371 | |||
372 | if (sampler->wrap_s == PIPE_TEX_WRAP_REPEAT) | ||
373 | texture->level[i].mask_s = spu_splats(width - 1); | ||
374 | else | ||
375 | texture->level[i].mask_s = spu_splats(~0); | ||
376 | |||
377 | if (sampler->wrap_t == PIPE_TEX_WRAP_REPEAT) | ||
378 | texture->level[i].mask_t = spu_splats(height - 1); | ||
379 | else | ||
380 | texture->level[i].mask_t = spu_splats(~0); | ||
381 | |||
382 | if (sampler->normalized_coords) { | ||
383 | texture->level[i].scale_s = spu_splats((float) width); | ||
384 | texture->level[i].scale_t = spu_splats((float) height); | ||
385 | } | ||
386 | else { | ||
387 | texture->level[i].scale_s = spu_splats(1.0f); | ||
388 | texture->level[i].scale_t = spu_splats(1.0f); | ||
389 | } | ||
390 | } | ||
391 | } | ||
392 | |||
393 | |||
394 | static void | ||
395 | cmd_state_sampler(const struct cell_command_sampler *sampler) | ||
396 | { | ||
397 | uint unit = sampler->unit; | ||
398 | |||
399 | D_PRINTF(CELL_DEBUG_CMD, "SAMPLER [%u]\n", unit); | ||
400 | |||
401 | spu.sampler[unit] = sampler->state; | ||
402 | |||
403 | switch (spu.sampler[unit].min_img_filter) { | ||
404 | case PIPE_TEX_FILTER_LINEAR: | ||
405 | spu.min_sample_texture_2d[unit] = sample_texture_2d_bilinear; | ||
406 | break; | ||
407 | case PIPE_TEX_FILTER_NEAREST: | ||
408 | spu.min_sample_texture_2d[unit] = sample_texture_2d_nearest; | ||
409 | break; | ||
410 | default: | ||
411 | ASSERT(0); | ||
412 | } | ||
413 | |||
414 | switch (spu.sampler[sampler->unit].mag_img_filter) { | ||
415 | case PIPE_TEX_FILTER_LINEAR: | ||
416 | spu.mag_sample_texture_2d[unit] = sample_texture_2d_bilinear; | ||
417 | break; | ||
418 | case PIPE_TEX_FILTER_NEAREST: | ||
419 | spu.mag_sample_texture_2d[unit] = sample_texture_2d_nearest; | ||
420 | break; | ||
421 | default: | ||
422 | ASSERT(0); | ||
423 | } | ||
424 | |||
425 | switch (spu.sampler[sampler->unit].min_mip_filter) { | ||
426 | case PIPE_TEX_MIPFILTER_NEAREST: | ||
427 | case PIPE_TEX_MIPFILTER_LINEAR: | ||
428 | spu.sample_texture_2d[unit] = sample_texture_2d_lod; | ||
429 | break; | ||
430 | case PIPE_TEX_MIPFILTER_NONE: | ||
431 | spu.sample_texture_2d[unit] = spu.mag_sample_texture_2d[unit]; | ||
432 | break; | ||
433 | default: | ||
434 | ASSERT(0); | ||
435 | } | ||
436 | |||
437 | update_tex_masks(&spu.texture[unit], &spu.sampler[unit]); | ||
438 | } | ||
439 | |||
440 | |||
441 | static void | ||
442 | cmd_state_texture(const struct cell_command_texture *texture) | ||
443 | { | ||
444 | const uint unit = texture->unit; | ||
445 | uint i; | ||
446 | |||
447 | D_PRINTF(CELL_DEBUG_CMD, "TEXTURE [%u]\n", texture->unit); | ||
448 | |||
449 | spu.texture[unit].max_level = 0; | ||
450 | spu.texture[unit].target = texture->target; | ||
451 | |||
452 | for (i = 0; i < CELL_MAX_TEXTURE_LEVELS; i++) { | ||
453 | uint width = texture->width[i]; | ||
454 | uint height = texture->height[i]; | ||
455 | uint depth = texture->depth[i]; | ||
456 | |||
457 | D_PRINTF(CELL_DEBUG_CMD, " LEVEL %u: at %p size[0] %u x %u\n", i, | ||
458 | texture->start[i], texture->width[i], texture->height[i]); | ||
459 | |||
460 | spu.texture[unit].level[i].start = texture->start[i]; | ||
461 | spu.texture[unit].level[i].width = width; | ||
462 | spu.texture[unit].level[i].height = height; | ||
463 | spu.texture[unit].level[i].depth = depth; | ||
464 | |||
465 | spu.texture[unit].level[i].tiles_per_row = | ||
466 | (width + TILE_SIZE - 1) / TILE_SIZE; | ||
467 | |||
468 | spu.texture[unit].level[i].bytes_per_image = | ||
469 | 4 * align(width, TILE_SIZE) * align(height, TILE_SIZE) * depth; | ||
470 | |||
471 | spu.texture[unit].level[i].max_s = spu_splats((int) width - 1); | ||
472 | spu.texture[unit].level[i].max_t = spu_splats((int) height - 1); | ||
473 | |||
474 | if (texture->start[i]) | ||
475 | spu.texture[unit].max_level = i; | ||
476 | } | ||
477 | |||
478 | update_tex_masks(&spu.texture[unit], &spu.sampler[unit]); | ||
479 | } | ||
480 | |||
481 | |||
482 | static void | ||
483 | cmd_state_vertex_info(const struct vertex_info *vinfo) | ||
484 | { | ||
485 | D_PRINTF(CELL_DEBUG_CMD, "VERTEX_INFO num_attribs=%u\n", vinfo->num_attribs); | ||
486 | ASSERT(vinfo->num_attribs >= 1); | ||
487 | ASSERT(vinfo->num_attribs <= 8); | ||
488 | memcpy(&spu.vertex_info, vinfo, sizeof(*vinfo)); | ||
489 | } | ||
490 | |||
491 | |||
492 | static void | ||
493 | cmd_state_vs_array_info(const struct cell_array_info *vs_info) | ||
494 | { | ||
495 | const unsigned attr = vs_info->attr; | ||
496 | |||
497 | ASSERT(attr < PIPE_MAX_ATTRIBS); | ||
498 | draw.vertex_fetch.src_ptr[attr] = vs_info->base; | ||
499 | draw.vertex_fetch.pitch[attr] = vs_info->pitch; | ||
500 | draw.vertex_fetch.size[attr] = vs_info->size; | ||
501 | draw.vertex_fetch.code_offset[attr] = vs_info->function_offset; | ||
502 | draw.vertex_fetch.dirty = 1; | ||
503 | } | ||
504 | |||
505 | |||
506 | static void | ||
507 | cmd_state_attrib_fetch(const struct cell_attribute_fetch_code *code) | ||
508 | { | ||
509 | mfc_get(attribute_fetch_code_buffer, | ||
510 | (unsigned int) code->base, /* src */ | ||
511 | code->size, | ||
512 | TAG_BATCH_BUFFER, | ||
513 | 0, /* tid */ | ||
514 | 0 /* rid */); | ||
515 | wait_on_mask(1 << TAG_BATCH_BUFFER); | ||
516 | |||
517 | draw.vertex_fetch.code = attribute_fetch_code_buffer; | ||
518 | } | ||
519 | |||
520 | |||
521 | static void | ||
522 | cmd_finish(void) | ||
523 | { | ||
524 | D_PRINTF(CELL_DEBUG_CMD, "FINISH\n"); | ||
525 | really_clear_tiles(0); | ||
526 | /* wait for all outstanding DMAs to finish */ | ||
527 | mfc_write_tag_mask(~0); | ||
528 | mfc_read_tag_status_all(); | ||
529 | /* send mbox message to PPU */ | ||
530 | spu_write_out_mbox(CELL_CMD_FINISH); | ||
531 | } | ||
532 | |||
533 | |||
534 | /** | ||
535 | * Execute a batch of commands which was sent to us by the PPU. | ||
536 | * See the cell_emit_state.c code to see where the commands come from. | ||
537 | * | ||
538 | * The opcode param encodes the location of the buffer and its size. | ||
539 | */ | ||
540 | static void | ||
541 | cmd_batch(uint opcode) | ||
542 | { | ||
543 | const uint buf = (opcode >> 8) & 0xff; | ||
544 | uint size = (opcode >> 16); | ||
545 | PIPE_ALIGN_VAR(16) qword buffer[CELL_BUFFER_SIZE / 16]; | ||
546 | const unsigned usize = ROUNDUP16(size) / sizeof(buffer[0]); | ||
547 | uint pos; | ||
548 | |||
549 | D_PRINTF(CELL_DEBUG_CMD, "BATCH buffer %u, len %u, from %p\n", | ||
550 | buf, size, spu.init.buffers[buf]); | ||
551 | |||
552 | ASSERT((opcode & CELL_CMD_OPCODE_MASK) == CELL_CMD_BATCH); | ||
553 | |||
554 | ASSERT_ALIGN16(spu.init.buffers[buf]); | ||
555 | |||
556 | size = ROUNDUP16(size); | ||
557 | |||
558 | ASSERT_ALIGN16(spu.init.buffers[buf]); | ||
559 | |||
560 | mfc_get(buffer, /* dest */ | ||
561 | (unsigned int) spu.init.buffers[buf], /* src */ | ||
562 | size, | ||
563 | TAG_BATCH_BUFFER, | ||
564 | 0, /* tid */ | ||
565 | 0 /* rid */); | ||
566 | wait_on_mask(1 << TAG_BATCH_BUFFER); | ||
567 | |||
568 | /* Tell PPU we're done copying the buffer to local store */ | ||
569 | D_PRINTF(CELL_DEBUG_CMD, "release batch buf %u\n", buf); | ||
570 | release_buffer(buf); | ||
571 | |||
572 | /* | ||
573 | * Loop over commands in the batch buffer | ||
574 | */ | ||
575 | for (pos = 0; pos < usize; /* no incr */) { | ||
576 | switch (si_to_uint(buffer[pos])) { | ||
577 | /* | ||
578 | * rendering commands | ||
579 | */ | ||
580 | case CELL_CMD_CLEAR_SURFACE: | ||
581 | { | ||
582 | struct cell_command_clear_surface *clr | ||
583 | = (struct cell_command_clear_surface *) &buffer[pos]; | ||
584 | cmd_clear_surface(clr); | ||
585 | pos += sizeof(*clr) / 16; | ||
586 | } | ||
587 | break; | ||
588 | case CELL_CMD_RENDER: | ||
589 | { | ||
590 | struct cell_command_render *render | ||
591 | = (struct cell_command_render *) &buffer[pos]; | ||
592 | uint pos_incr; | ||
593 | cmd_render(render, &pos_incr); | ||
594 | pos += ((pos_incr+1)&~1) / 2; // should 'fix' cmd_render return | ||
595 | } | ||
596 | break; | ||
597 | /* | ||
598 | * state-update commands | ||
599 | */ | ||
600 | case CELL_CMD_STATE_FRAMEBUFFER: | ||
601 | { | ||
602 | struct cell_command_framebuffer *fb | ||
603 | = (struct cell_command_framebuffer *) &buffer[pos]; | ||
604 | cmd_state_framebuffer(fb); | ||
605 | pos += sizeof(*fb) / 16; | ||
606 | } | ||
607 | break; | ||
608 | case CELL_CMD_STATE_FRAGMENT_OPS: | ||
609 | { | ||
610 | struct cell_command_fragment_ops *fops | ||
611 | = (struct cell_command_fragment_ops *) &buffer[pos]; | ||
612 | cmd_state_fragment_ops(fops); | ||
613 | /* This is a variant-sized command */ | ||
614 | pos += ROUNDUP16(sizeof(*fops) + fops->total_code_size) / 16; | ||
615 | } | ||
616 | break; | ||
617 | case CELL_CMD_STATE_FRAGMENT_PROGRAM: | ||
618 | { | ||
619 | struct cell_command_fragment_program *fp | ||
620 | = (struct cell_command_fragment_program *) &buffer[pos]; | ||
621 | cmd_state_fragment_program(fp); | ||
622 | pos += sizeof(*fp) / 16; | ||
623 | } | ||
624 | break; | ||
625 | case CELL_CMD_STATE_FS_CONSTANTS: | ||
626 | pos = cmd_state_fs_constants(buffer, pos); | ||
627 | break; | ||
628 | case CELL_CMD_STATE_RASTERIZER: | ||
629 | { | ||
630 | struct cell_command_rasterizer *rast = | ||
631 | (struct cell_command_rasterizer *) &buffer[pos]; | ||
632 | spu.rasterizer = rast->rasterizer; | ||
633 | pos += sizeof(*rast) / 16; | ||
634 | } | ||
635 | break; | ||
636 | case CELL_CMD_STATE_SAMPLER: | ||
637 | { | ||
638 | struct cell_command_sampler *sampler | ||
639 | = (struct cell_command_sampler *) &buffer[pos]; | ||
640 | cmd_state_sampler(sampler); | ||
641 | pos += sizeof(*sampler) / 16; | ||
642 | } | ||
643 | break; | ||
644 | case CELL_CMD_STATE_TEXTURE: | ||
645 | { | ||
646 | struct cell_command_texture *texture | ||
647 | = (struct cell_command_texture *) &buffer[pos]; | ||
648 | cmd_state_texture(texture); | ||
649 | pos += sizeof(*texture) / 16; | ||
650 | } | ||
651 | break; | ||
652 | case CELL_CMD_STATE_VERTEX_INFO: | ||
653 | cmd_state_vertex_info((struct vertex_info *) &buffer[pos+1]); | ||
654 | pos += 1 + ROUNDUP16(sizeof(struct vertex_info)) / 16; | ||
655 | break; | ||
656 | case CELL_CMD_STATE_VIEWPORT: | ||
657 | (void) memcpy(& draw.viewport, &buffer[pos+1], | ||
658 | sizeof(struct pipe_viewport_state)); | ||
659 | pos += 1 + ROUNDUP16(sizeof(struct pipe_viewport_state)) / 16; | ||
660 | break; | ||
661 | case CELL_CMD_STATE_UNIFORMS: | ||
662 | draw.constants = (const float (*)[4]) (uintptr_t)spu_extract((vector unsigned int)buffer[pos+1],0); | ||
663 | pos += 2; | ||
664 | break; | ||
665 | case CELL_CMD_STATE_VS_ARRAY_INFO: | ||
666 | cmd_state_vs_array_info((struct cell_array_info *) &buffer[pos+1]); | ||
667 | pos += 1 + ROUNDUP16(sizeof(struct cell_array_info)) / 16; | ||
668 | break; | ||
669 | case CELL_CMD_STATE_BIND_VS: | ||
670 | #if 0 | ||
671 | spu_bind_vertex_shader(&draw, | ||
672 | (struct cell_shader_info *) &buffer[pos+1]); | ||
673 | #endif | ||
674 | pos += 1 + ROUNDUP16(sizeof(struct cell_shader_info)) / 16; | ||
675 | break; | ||
676 | case CELL_CMD_STATE_ATTRIB_FETCH: | ||
677 | cmd_state_attrib_fetch((struct cell_attribute_fetch_code *) | ||
678 | &buffer[pos+1]); | ||
679 | pos += 1 + ROUNDUP16(sizeof(struct cell_attribute_fetch_code)) / 16; | ||
680 | break; | ||
681 | /* | ||
682 | * misc commands | ||
683 | */ | ||
684 | case CELL_CMD_FINISH: | ||
685 | cmd_finish(); | ||
686 | pos += 1; | ||
687 | break; | ||
688 | case CELL_CMD_FENCE: | ||
689 | { | ||
690 | struct cell_command_fence *fence_cmd = | ||
691 | (struct cell_command_fence *) &buffer[pos]; | ||
692 | cmd_fence(fence_cmd); | ||
693 | pos += sizeof(*fence_cmd) / 16; | ||
694 | } | ||
695 | break; | ||
696 | case CELL_CMD_RELEASE_VERTS: | ||
697 | { | ||
698 | struct cell_command_release_verts *release | ||
699 | = (struct cell_command_release_verts *) &buffer[pos]; | ||
700 | cmd_release_verts(release); | ||
701 | pos += sizeof(*release) / 16; | ||
702 | } | ||
703 | break; | ||
704 | case CELL_CMD_FLUSH_BUFFER_RANGE: { | ||
705 | struct cell_buffer_range *br = (struct cell_buffer_range *) | ||
706 | &buffer[pos+1]; | ||
707 | |||
708 | spu_dcache_mark_dirty((unsigned) br->base, br->size); | ||
709 | pos += 1 + ROUNDUP16(sizeof(struct cell_buffer_range)) / 16; | ||
710 | break; | ||
711 | } | ||
712 | default: | ||
713 | printf("SPU %u: bad opcode: 0x%x\n", spu.init.id, si_to_uint(buffer[pos])); | ||
714 | ASSERT(0); | ||
715 | break; | ||
716 | } | ||
717 | } | ||
718 | |||
719 | D_PRINTF(CELL_DEBUG_CMD, "BATCH complete\n"); | ||
720 | } | ||
721 | |||
722 | |||
723 | #define PERF 0 | ||
724 | |||
725 | |||
726 | /** | ||
727 | * Main loop for SPEs: Get a command, execute it, repeat. | ||
728 | */ | ||
729 | void | ||
730 | command_loop(void) | ||
731 | { | ||
732 | int exitFlag = 0; | ||
733 | uint t0, t1; | ||
734 | |||
735 | D_PRINTF(CELL_DEBUG_CMD, "Enter command loop\n"); | ||
736 | |||
737 | while (!exitFlag) { | ||
738 | unsigned opcode; | ||
739 | |||
740 | D_PRINTF(CELL_DEBUG_CMD, "Wait for cmd...\n"); | ||
741 | |||
742 | if (PERF) | ||
743 | spu_write_decrementer(~0); | ||
744 | |||
745 | /* read/wait from mailbox */ | ||
746 | opcode = (unsigned int) spu_read_in_mbox(); | ||
747 | D_PRINTF(CELL_DEBUG_CMD, "got cmd 0x%x\n", opcode); | ||
748 | |||
749 | if (PERF) | ||
750 | t0 = spu_read_decrementer(); | ||
751 | |||
752 | switch (opcode & CELL_CMD_OPCODE_MASK) { | ||
753 | case CELL_CMD_EXIT: | ||
754 | D_PRINTF(CELL_DEBUG_CMD, "EXIT\n"); | ||
755 | exitFlag = 1; | ||
756 | break; | ||
757 | case CELL_CMD_VS_EXECUTE: | ||
758 | #if 0 | ||
759 | spu_execute_vertex_shader(&draw, &cmd.vs); | ||
760 | #endif | ||
761 | break; | ||
762 | case CELL_CMD_BATCH: | ||
763 | cmd_batch(opcode); | ||
764 | break; | ||
765 | default: | ||
766 | printf("Bad opcode 0x%x!\n", opcode & CELL_CMD_OPCODE_MASK); | ||
767 | } | ||
768 | |||
769 | if (PERF) { | ||
770 | t1 = spu_read_decrementer(); | ||
771 | printf("wait mbox time: %gms batch time: %gms\n", | ||
772 | (~0u - t0) * spu.init.inv_timebase, | ||
773 | (t0 - t1) * spu.init.inv_timebase); | ||
774 | } | ||
775 | } | ||
776 | |||
777 | D_PRINTF(CELL_DEBUG_CMD, "Exit command loop\n"); | ||
778 | |||
779 | if (spu.init.debug_flags & CELL_DEBUG_CACHE) | ||
780 | spu_dcache_report(); | ||
781 | } | ||
782 | |||
783 | /* Initialize this module; we manage the fragment ops buffer here. */ | ||
784 | void | ||
785 | spu_command_init(void) | ||
786 | { | ||
787 | /* Install default/fallback fragment processing function. | ||
788 | * This will normally be overriden by a code-gen'd function | ||
789 | * unless CELL_FORCE_FRAGMENT_OPS_FALLBACK is set. | ||
790 | */ | ||
791 | spu.fragment_ops[CELL_FACING_FRONT] = spu_fallback_fragment_ops; | ||
792 | spu.fragment_ops[CELL_FACING_BACK] = spu_fallback_fragment_ops; | ||
793 | |||
794 | /* Set up the basic empty buffer for code-gen'ed fragment ops */ | ||
795 | spu.fragment_ops_code = NULL; | ||
796 | spu.fragment_ops_code_size = 0; | ||
797 | } | ||
798 | |||
799 | void | ||
800 | spu_command_close(void) | ||
801 | { | ||
802 | /* Deallocate the code-gen buffer for fragment ops, and reset the | ||
803 | * fragment ops functions to their initial setting (just to leave | ||
804 | * things in a good state). | ||
805 | */ | ||
806 | if (spu.fragment_ops_code != NULL) { | ||
807 | free(spu.fragment_ops_code); | ||
808 | } | ||
809 | spu_command_init(); | ||
810 | } | ||
diff --git a/src/gallium/drivers/cell/spu/spu_command.h b/src/gallium/drivers/cell/spu/spu_command.h deleted file mode 100644 index 83dcdade288..00000000000 --- a/src/gallium/drivers/cell/spu/spu_command.h +++ /dev/null | |||
@@ -1,35 +0,0 @@ | |||
1 | /************************************************************************** | ||
2 | * | ||
3 | * Copyright 2008 Tungsten Graphics, Inc., Cedar Park, Texas. | ||
4 | * All Rights Reserved. | ||
5 | * | ||
6 | * Permission is hereby granted, free of charge, to any person obtaining a | ||
7 | * copy of this software and associated documentation files (the | ||
8 | * "Software"), to deal in the Software without restriction, including | ||
9 | * without limitation the rights to use, copy, modify, merge, publish, | ||
10 | * distribute, sub license, and/or sell copies of the Software, and to | ||
11 | * permit persons to whom the Software is furnished to do so, subject to | ||
12 | * the following conditions: | ||
13 | * | ||
14 | * The above copyright notice and this permission notice (including the | ||
15 | * next paragraph) shall be included in all copies or substantial portions | ||
16 | * of the Software. | ||
17 | * | ||
18 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS | ||
19 | * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF | ||
20 | * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. | ||
21 | * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR | ||
22 | * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, | ||
23 | * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE | ||
24 | * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. | ||
25 | * | ||
26 | **************************************************************************/ | ||
27 | |||
28 | extern void | ||
29 | command_loop(void); | ||
30 | |||
31 | extern void | ||
32 | spu_command_init(void); | ||
33 | |||
34 | extern void | ||
35 | spu_command_close(void); | ||
diff --git a/src/gallium/drivers/cell/spu/spu_dcache.c b/src/gallium/drivers/cell/spu/spu_dcache.c deleted file mode 100644 index a6d67634fd8..00000000000 --- a/src/gallium/drivers/cell/spu/spu_dcache.c +++ /dev/null | |||
@@ -1,145 +0,0 @@ | |||
1 | /* | ||
2 | * (C) Copyright IBM Corporation 2008 | ||
3 | * All Rights Reserved. | ||
4 | * | ||
5 | * Permission is hereby granted, free of charge, to any person obtaining a | ||
6 | * copy of this software and associated documentation files (the "Software"), | ||
7 | * to deal in the Software without restriction, including without limitation | ||
8 | * on the rights to use, copy, modify, merge, publish, distribute, sub | ||
9 | * license, and/or sell copies of the Software, and to permit persons to whom | ||
10 | * the Software is furnished to do so, subject to the following conditions: | ||
11 | * | ||
12 | * The above copyright notice and this permission notice (including the next | ||
13 | * paragraph) shall be included in all copies or substantial portions of the | ||
14 | * Software. | ||
15 | * | ||
16 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR | ||
17 | * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, | ||
18 | * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL | ||
19 | * AUTHORS, COPYRIGHT HOLDERS, AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM, | ||
20 | * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR | ||
21 | * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE | ||
22 | * USE OR OTHER DEALINGS IN THE SOFTWARE. | ||
23 | */ | ||
24 | |||
25 | #include "cell/common.h" | ||
26 | #include "spu_main.h" | ||
27 | #include "spu_dcache.h" | ||
28 | |||
29 | #define CACHELINE_LOG2SIZE 7 | ||
30 | #define LINE_SIZE (1U << 7) | ||
31 | #define ALIGN_MASK (~(LINE_SIZE - 1)) | ||
32 | |||
33 | #define CACHE_NAME data | ||
34 | #define CACHED_TYPE qword | ||
35 | #define CACHE_TYPE CACHE_TYPE_RO | ||
36 | #define CACHE_SET_TAGID(set) (((set) & 0x03) + TAG_DCACHE0) | ||
37 | #define CACHE_LOG2NNWAY 2 | ||
38 | #define CACHE_LOG2NSETS 6 | ||
39 | #ifdef DEBUG | ||
40 | #define CACHE_STATS 1 | ||
41 | #endif | ||
42 | #include <cache-api.h> | ||
43 | |||
44 | /* Yes folks, this is ugly. | ||
45 | */ | ||
46 | #undef CACHE_NWAY | ||
47 | #undef CACHE_NSETS | ||
48 | #define CACHE_NAME data | ||
49 | #define CACHE_NWAY 4 | ||
50 | #define CACHE_NSETS (1U << 6) | ||
51 | |||
52 | |||
53 | /** | ||
54 | * Fetch between arbitrary number of bytes from an unaligned address | ||
55 | * | ||
56 | * \param dst Destination data buffer | ||
57 | * \param ea Main memory effective address of source data | ||
58 | * \param size Number of bytes to read | ||
59 | * | ||
60 | * \warning | ||
61 | * As is hinted by the type of the \c dst pointer, this function writes | ||
62 | * multiples of 16-bytes. | ||
63 | */ | ||
64 | void | ||
65 | spu_dcache_fetch_unaligned(qword *dst, unsigned ea, unsigned size) | ||
66 | { | ||
67 | const int shift = ea & 0x0f; | ||
68 | const unsigned read_size = ROUNDUP16(size + shift); | ||
69 | const unsigned last_read = ROUNDUP16(ea + size); | ||
70 | const qword *const last_write = dst + (ROUNDUP16(size) / 16); | ||
71 | unsigned i; | ||
72 | |||
73 | |||
74 | if (shift == 0) { | ||
75 | /* Data is already aligned. Fetch directly into the destination buffer. | ||
76 | */ | ||
77 | for (i = 0; i < size; i += 16) { | ||
78 | *(dst++) = cache_rd(data, ea + i); | ||
79 | } | ||
80 | } else { | ||
81 | qword hi; | ||
82 | |||
83 | |||
84 | /* Please exercise extreme caution when modifying this code. This code | ||
85 | * must not read past the end of the page containing the source data, | ||
86 | * and it must not write more than ((size + 15) / 16) qwords to the | ||
87 | * destination buffer. | ||
88 | */ | ||
89 | ea &= ~0x0f; | ||
90 | hi = cache_rd(data, ea); | ||
91 | for (i = 16; i < read_size; i += 16) { | ||
92 | qword lo = cache_rd(data, ea + i); | ||
93 | |||
94 | *(dst++) = si_or((qword) spu_slqwbyte(hi, shift), | ||
95 | (qword) spu_rlmaskqwbyte(lo, shift - 16)); | ||
96 | hi = lo; | ||
97 | } | ||
98 | |||
99 | if (dst != last_write) { | ||
100 | *(dst++) = si_or((qword) spu_slqwbyte(hi, shift), si_il(0)); | ||
101 | } | ||
102 | } | ||
103 | |||
104 | ASSERT((ea + i) == last_read); | ||
105 | ASSERT(dst == last_write); | ||
106 | } | ||
107 | |||
108 | |||
109 | /** | ||
110 | * Notify the cache that a range of main memory may have been modified | ||
111 | */ | ||
112 | void | ||
113 | spu_dcache_mark_dirty(unsigned ea, unsigned size) | ||
114 | { | ||
115 | unsigned i; | ||
116 | const unsigned aligned_start = (ea & ALIGN_MASK); | ||
117 | const unsigned aligned_end = (ea + size + (LINE_SIZE - 1)) | ||
118 | & ALIGN_MASK; | ||
119 | |||
120 | |||
121 | for (i = 0; i < (CACHE_NWAY * CACHE_NSETS); i++) { | ||
122 | const unsigned entry = __cache_dir[i]; | ||
123 | const unsigned addr = entry & ~0x0f; | ||
124 | |||
125 | __cache_dir[i] = ((addr >= aligned_start) && (addr < aligned_end)) | ||
126 | ? (entry & ~CACHELINE_VALID) : entry; | ||
127 | } | ||
128 | } | ||
129 | |||
130 | |||
131 | /** | ||
132 | * Print cache utilization report | ||
133 | */ | ||
134 | void | ||
135 | spu_dcache_report(void) | ||
136 | { | ||
137 | #ifdef CACHE_STATS | ||
138 | if (spu.init.id == 0) { | ||
139 | printf("SPU 0: Texture cache report:\n"); | ||
140 | cache_pr_stats(data); | ||
141 | } | ||
142 | #endif | ||
143 | } | ||
144 | |||
145 | |||
diff --git a/src/gallium/drivers/cell/spu/spu_dcache.h b/src/gallium/drivers/cell/spu/spu_dcache.h deleted file mode 100644 index 39a19eb31b5..00000000000 --- a/src/gallium/drivers/cell/spu/spu_dcache.h +++ /dev/null | |||
@@ -1,37 +0,0 @@ | |||
1 | /* | ||
2 | * (C) Copyright IBM Corporation 2008 | ||
3 | * All Rights Reserved. | ||
4 | * | ||
5 | * Permission is hereby granted, free of charge, to any person obtaining a | ||
6 | * copy of this software and associated documentation files (the "Software"), | ||
7 | * to deal in the Software without restriction, including without limitation | ||
8 | * on the rights to use, copy, modify, merge, publish, distribute, sub | ||
9 | * license, and/or sell copies of the Software, and to permit persons to whom | ||
10 | * the Software is furnished to do so, subject to the following conditions: | ||
11 | * | ||
12 | * The above copyright notice and this permission notice (including the next | ||
13 | * paragraph) shall be included in all copies or substantial portions of the | ||
14 | * Software. | ||
15 | * | ||
16 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR | ||
17 | * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, | ||
18 | * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL | ||
19 | * AUTHORS, COPYRIGHT HOLDERS, AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM, | ||
20 | * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR | ||
21 | * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE | ||
22 | * USE OR OTHER DEALINGS IN THE SOFTWARE. | ||
23 | */ | ||
24 | |||
25 | #ifndef SPU_DCACHE_H | ||
26 | #define SPU_DCACHE_H | ||
27 | |||
28 | extern void | ||
29 | spu_dcache_fetch_unaligned(qword *dst, unsigned ea, unsigned size); | ||
30 | |||
31 | extern void | ||
32 | spu_dcache_mark_dirty(unsigned ea, unsigned size); | ||
33 | |||
34 | extern void | ||
35 | spu_dcache_report(void); | ||
36 | |||
37 | #endif /* SPU_DCACHE_H */ | ||
diff --git a/src/gallium/drivers/cell/spu/spu_exec.c b/src/gallium/drivers/cell/spu/spu_exec.c deleted file mode 100644 index e4ebeb595ce..00000000000 --- a/src/gallium/drivers/cell/spu/spu_exec.c +++ /dev/null | |||
@@ -1,1870 +0,0 @@ | |||
1 | /************************************************************************** | ||
2 | * | ||
3 | * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas. | ||
4 | * All Rights Reserved. | ||
5 | * | ||
6 | * Permission is hereby granted, free of charge, to any person obtaining a | ||
7 | * copy of this software and associated documentation files (the | ||
8 | * "Software"), to deal in the Software without restriction, including | ||
9 | * without limitation the rights to use, copy, modify, merge, publish, | ||
10 | * distribute, sub license, and/or sell copies of the Software, and to | ||
11 | * permit persons to whom the Software is furnished to do so, subject to | ||
12 | * the following conditions: | ||
13 | * | ||
14 | * The above copyright notice and this permission notice (including the | ||
15 | * next paragraph) shall be included in all copies or substantial portions | ||
16 | * of the Software. | ||
17 | * | ||
18 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS | ||
19 | * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF | ||
20 | * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. | ||
21 | * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR | ||
22 | * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, | ||
23 | * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE | ||
24 | * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. | ||
25 | * | ||
26 | **************************************************************************/ | ||
27 | |||
28 | /** | ||
29 | * TGSI interpretor/executor. | ||
30 | * | ||
31 | * Flow control information: | ||
32 | * | ||
33 | * Since we operate on 'quads' (4 pixels or 4 vertices in parallel) | ||
34 | * flow control statements (IF/ELSE/ENDIF, LOOP/ENDLOOP) require special | ||
35 | * care since a condition may be true for some quad components but false | ||
36 | * for other components. | ||
37 | * | ||
38 | * We basically execute all statements (even if they're in the part of | ||
39 | * an IF/ELSE clause that's "not taken") and use a special mask to | ||
40 | * control writing to destination registers. This is the ExecMask. | ||
41 | * See store_dest(). | ||
42 | * | ||
43 | * The ExecMask is computed from three other masks (CondMask, LoopMask and | ||
44 | * ContMask) which are controlled by the flow control instructions (namely: | ||
45 | * (IF/ELSE/ENDIF, LOOP/ENDLOOP and CONT). | ||
46 | * | ||
47 | * | ||
48 | * Authors: | ||
49 | * Michal Krol | ||
50 | * Brian Paul | ||
51 | */ | ||
52 | |||
53 | #include <transpose_matrix4x4.h> | ||
54 | #include <simdmath/ceilf4.h> | ||
55 | #include <simdmath/cosf4.h> | ||
56 | #include <simdmath/divf4.h> | ||
57 | #include <simdmath/floorf4.h> | ||
58 | #include <simdmath/log2f4.h> | ||
59 | #include <simdmath/powf4.h> | ||
60 | #include <simdmath/sinf4.h> | ||
61 | #include <simdmath/sqrtf4.h> | ||
62 | #include <simdmath/truncf4.h> | ||
63 | |||
64 | #include "pipe/p_compiler.h" | ||
65 | #include "pipe/p_state.h" | ||
66 | #include "pipe/p_shader_tokens.h" | ||
67 | #include "tgsi/tgsi_parse.h" | ||
68 | #include "tgsi/tgsi_util.h" | ||
69 | #include "spu_exec.h" | ||
70 | #include "spu_main.h" | ||
71 | #include "spu_vertex_shader.h" | ||
72 | #include "spu_dcache.h" | ||
73 | #include "cell/common.h" | ||
74 | |||
75 | #define TILE_TOP_LEFT 0 | ||
76 | #define TILE_TOP_RIGHT 1 | ||
77 | #define TILE_BOTTOM_LEFT 2 | ||
78 | #define TILE_BOTTOM_RIGHT 3 | ||
79 | |||
80 | /* | ||
81 | * Shorthand locations of various utility registers (_I = Index, _C = Channel) | ||
82 | */ | ||
83 | #define TEMP_0_I TGSI_EXEC_TEMP_00000000_I | ||
84 | #define TEMP_0_C TGSI_EXEC_TEMP_00000000_C | ||
85 | #define TEMP_7F_I TGSI_EXEC_TEMP_7FFFFFFF_I | ||
86 | #define TEMP_7F_C TGSI_EXEC_TEMP_7FFFFFFF_C | ||
87 | #define TEMP_80_I TGSI_EXEC_TEMP_80000000_I | ||
88 | #define TEMP_80_C TGSI_EXEC_TEMP_80000000_C | ||
89 | #define TEMP_FF_I TGSI_EXEC_TEMP_FFFFFFFF_I | ||
90 | #define TEMP_FF_C TGSI_EXEC_TEMP_FFFFFFFF_C | ||
91 | #define TEMP_1_I TGSI_EXEC_TEMP_ONE_I | ||
92 | #define TEMP_1_C TGSI_EXEC_TEMP_ONE_C | ||
93 | #define TEMP_2_I TGSI_EXEC_TEMP_TWO_I | ||
94 | #define TEMP_2_C TGSI_EXEC_TEMP_TWO_C | ||
95 | #define TEMP_128_I TGSI_EXEC_TEMP_128_I | ||
96 | #define TEMP_128_C TGSI_EXEC_TEMP_128_C | ||
97 | #define TEMP_M128_I TGSI_EXEC_TEMP_MINUS_128_I | ||
98 | #define TEMP_M128_C TGSI_EXEC_TEMP_MINUS_128_C | ||
99 | #define TEMP_KILMASK_I TGSI_EXEC_TEMP_KILMASK_I | ||
100 | #define TEMP_KILMASK_C TGSI_EXEC_TEMP_KILMASK_C | ||
101 | #define TEMP_OUTPUT_I TGSI_EXEC_TEMP_OUTPUT_I | ||
102 | #define TEMP_OUTPUT_C TGSI_EXEC_TEMP_OUTPUT_C | ||
103 | #define TEMP_PRIMITIVE_I TGSI_EXEC_TEMP_PRIMITIVE_I | ||
104 | #define TEMP_PRIMITIVE_C TGSI_EXEC_TEMP_PRIMITIVE_C | ||
105 | #define TEMP_R0 TGSI_EXEC_TEMP_R0 | ||
106 | |||
107 | #define FOR_EACH_CHANNEL(CHAN)\ | ||
108 | for (CHAN = 0; CHAN < 4; CHAN++) | ||
109 | |||
110 | #define IS_CHANNEL_ENABLED(INST, CHAN)\ | ||
111 | ((INST).Dst[0].Register.WriteMask & (1 << (CHAN))) | ||
112 | |||
113 | #define IS_CHANNEL_ENABLED2(INST, CHAN)\ | ||
114 | ((INST).Dst[1].Register.WriteMask & (1 << (CHAN))) | ||
115 | |||
116 | #define FOR_EACH_ENABLED_CHANNEL(INST, CHAN)\ | ||
117 | FOR_EACH_CHANNEL( CHAN )\ | ||
118 | if (IS_CHANNEL_ENABLED( INST, CHAN )) | ||
119 | |||
120 | #define FOR_EACH_ENABLED_CHANNEL2(INST, CHAN)\ | ||
121 | FOR_EACH_CHANNEL( CHAN )\ | ||
122 | if (IS_CHANNEL_ENABLED2( INST, CHAN )) | ||
123 | |||
124 | |||
125 | /** The execution mask depends on the conditional mask and the loop mask */ | ||
126 | #define UPDATE_EXEC_MASK(MACH) \ | ||
127 | MACH->ExecMask = MACH->CondMask & MACH->LoopMask & MACH->ContMask & MACH->FuncMask | ||
128 | |||
129 | |||
130 | #define CHAN_X 0 | ||
131 | #define CHAN_Y 1 | ||
132 | #define CHAN_Z 2 | ||
133 | #define CHAN_W 3 | ||
134 | |||
135 | |||
136 | |||
137 | /** | ||
138 | * Initialize machine state by expanding tokens to full instructions, | ||
139 | * allocating temporary storage, setting up constants, etc. | ||
140 | * After this, we can call spu_exec_machine_run() many times. | ||
141 | */ | ||
142 | void | ||
143 | spu_exec_machine_init(struct spu_exec_machine *mach, | ||
144 | uint numSamplers, | ||
145 | struct spu_sampler *samplers, | ||
146 | unsigned processor) | ||
147 | { | ||
148 | const qword zero = si_il(0); | ||
149 | const qword not_zero = si_il(~0); | ||
150 | |||
151 | (void) numSamplers; | ||
152 | mach->Samplers = samplers; | ||
153 | mach->Processor = processor; | ||
154 | mach->Addrs = &mach->Temps[TGSI_EXEC_NUM_TEMPS]; | ||
155 | |||
156 | /* Setup constants. */ | ||
157 | mach->Temps[TEMP_0_I].xyzw[TEMP_0_C].q = zero; | ||
158 | mach->Temps[TEMP_FF_I].xyzw[TEMP_FF_C].q = not_zero; | ||
159 | mach->Temps[TEMP_7F_I].xyzw[TEMP_7F_C].q = si_shli(not_zero, -1); | ||
160 | mach->Temps[TEMP_80_I].xyzw[TEMP_80_C].q = si_shli(not_zero, 31); | ||
161 | |||
162 | mach->Temps[TEMP_1_I].xyzw[TEMP_1_C].q = (qword) spu_splats(1.0f); | ||
163 | mach->Temps[TEMP_2_I].xyzw[TEMP_2_C].q = (qword) spu_splats(2.0f); | ||
164 | mach->Temps[TEMP_128_I].xyzw[TEMP_128_C].q = (qword) spu_splats(128.0f); | ||
165 | mach->Temps[TEMP_M128_I].xyzw[TEMP_M128_C].q = (qword) spu_splats(-128.0f); | ||
166 | } | ||
167 | |||
168 | |||
169 | static INLINE qword | ||
170 | micro_abs(qword src) | ||
171 | { | ||
172 | return si_rotmi(si_shli(src, 1), -1); | ||
173 | } | ||
174 | |||
175 | static INLINE qword | ||
176 | micro_ceil(qword src) | ||
177 | { | ||
178 | return (qword) _ceilf4((vec_float4) src); | ||
179 | } | ||
180 | |||
181 | static INLINE qword | ||
182 | micro_cos(qword src) | ||
183 | { | ||
184 | return (qword) _cosf4((vec_float4) src); | ||
185 | } | ||
186 | |||
187 | static const qword br_shuf = { | ||
188 | TILE_BOTTOM_RIGHT + 0, TILE_BOTTOM_RIGHT + 1, | ||
189 | TILE_BOTTOM_RIGHT + 2, TILE_BOTTOM_RIGHT + 3, | ||
190 | TILE_BOTTOM_RIGHT + 0, TILE_BOTTOM_RIGHT + 1, | ||
191 | TILE_BOTTOM_RIGHT + 2, TILE_BOTTOM_RIGHT + 3, | ||
192 | TILE_BOTTOM_RIGHT + 0, TILE_BOTTOM_RIGHT + 1, | ||
193 | TILE_BOTTOM_RIGHT + 2, TILE_BOTTOM_RIGHT + 3, | ||
194 | TILE_BOTTOM_RIGHT + 0, TILE_BOTTOM_RIGHT + 1, | ||
195 | TILE_BOTTOM_RIGHT + 2, TILE_BOTTOM_RIGHT + 3, | ||
196 | }; | ||
197 | |||
198 | static const qword bl_shuf = { | ||
199 | TILE_BOTTOM_LEFT + 0, TILE_BOTTOM_LEFT + 1, | ||
200 | TILE_BOTTOM_LEFT + 2, TILE_BOTTOM_LEFT + 3, | ||
201 | TILE_BOTTOM_LEFT + 0, TILE_BOTTOM_LEFT + 1, | ||
202 | TILE_BOTTOM_LEFT + 2, TILE_BOTTOM_LEFT + 3, | ||
203 | TILE_BOTTOM_LEFT + 0, TILE_BOTTOM_LEFT + 1, | ||
204 | TILE_BOTTOM_LEFT + 2, TILE_BOTTOM_LEFT + 3, | ||
205 | TILE_BOTTOM_LEFT + 0, TILE_BOTTOM_LEFT + 1, | ||
206 | TILE_BOTTOM_LEFT + 2, TILE_BOTTOM_LEFT + 3, | ||
207 | }; | ||
208 | |||
209 | static const qword tl_shuf = { | ||
210 | TILE_TOP_LEFT + 0, TILE_TOP_LEFT + 1, | ||
211 | TILE_TOP_LEFT + 2, TILE_TOP_LEFT + 3, | ||
212 | TILE_TOP_LEFT + 0, TILE_TOP_LEFT + 1, | ||
213 | TILE_TOP_LEFT + 2, TILE_TOP_LEFT + 3, | ||
214 | TILE_TOP_LEFT + 0, TILE_TOP_LEFT + 1, | ||
215 | TILE_TOP_LEFT + 2, TILE_TOP_LEFT + 3, | ||
216 | TILE_TOP_LEFT + 0, TILE_TOP_LEFT + 1, | ||
217 | TILE_TOP_LEFT + 2, TILE_TOP_LEFT + 3, | ||
218 | }; | ||
219 | |||
220 | static qword | ||
221 | micro_ddx(qword src) | ||
222 | { | ||
223 | qword bottom_right = si_shufb(src, src, br_shuf); | ||
224 | qword bottom_left = si_shufb(src, src, bl_shuf); | ||
225 | |||
226 | return si_fs(bottom_right, bottom_left); | ||
227 | } | ||
228 | |||
229 | static qword | ||
230 | micro_ddy(qword src) | ||
231 | { | ||
232 | qword top_left = si_shufb(src, src, tl_shuf); | ||
233 | qword bottom_left = si_shufb(src, src, bl_shuf); | ||
234 | |||
235 | return si_fs(top_left, bottom_left); | ||
236 | } | ||
237 | |||
238 | static INLINE qword | ||
239 | micro_div(qword src0, qword src1) | ||
240 | { | ||
241 | return (qword) _divf4((vec_float4) src0, (vec_float4) src1); | ||
242 | } | ||
243 | |||
244 | static qword | ||
245 | micro_flr(qword src) | ||
246 | { | ||
247 | return (qword) _floorf4((vec_float4) src); | ||
248 | } | ||
249 | |||
250 | static qword | ||
251 | micro_frc(qword src) | ||
252 | { | ||
253 | return si_fs(src, (qword) _floorf4((vec_float4) src)); | ||
254 | } | ||
255 | |||
256 | static INLINE qword | ||
257 | micro_ge(qword src0, qword src1) | ||
258 | { | ||
259 | return si_or(si_fceq(src0, src1), si_fcgt(src0, src1)); | ||
260 | } | ||
261 | |||
262 | static qword | ||
263 | micro_lg2(qword src) | ||
264 | { | ||
265 | return (qword) _log2f4((vec_float4) src); | ||
266 | } | ||
267 | |||
268 | static INLINE qword | ||
269 | micro_lt(qword src0, qword src1) | ||
270 | { | ||
271 | const qword tmp = si_or(si_fceq(src0, src1), si_fcgt(src0, src1)); | ||
272 | |||
273 | return si_xori(tmp, 0xff); | ||
274 | } | ||
275 | |||
276 | static INLINE qword | ||
277 | micro_max(qword src0, qword src1) | ||
278 | { | ||
279 | return si_selb(src1, src0, si_fcgt(src0, src1)); | ||
280 | } | ||
281 | |||
282 | static INLINE qword | ||
283 | micro_min(qword src0, qword src1) | ||
284 | { | ||
285 | return si_selb(src0, src1, si_fcgt(src0, src1)); | ||
286 | } | ||
287 | |||
288 | static qword | ||
289 | micro_neg(qword src) | ||
290 | { | ||
291 | return si_xor(src, (qword) spu_splats(0x80000000)); | ||
292 | } | ||
293 | |||
294 | static qword | ||
295 | micro_set_sign(qword src) | ||
296 | { | ||
297 | return si_or(src, (qword) spu_splats(0x80000000)); | ||
298 | } | ||
299 | |||
300 | static qword | ||
301 | micro_pow(qword src0, qword src1) | ||
302 | { | ||
303 | return (qword) _powf4((vec_float4) src0, (vec_float4) src1); | ||
304 | } | ||
305 | |||
306 | static qword | ||
307 | micro_rnd(qword src) | ||
308 | { | ||
309 | const qword half = (qword) spu_splats(0.5f); | ||
310 | |||
311 | /* May be able to use _roundf4. There may be some difference, though. | ||
312 | */ | ||
313 | return (qword) _floorf4((vec_float4) si_fa(src, half)); | ||
314 | } | ||
315 | |||
316 | static INLINE qword | ||
317 | micro_ishr(qword src0, qword src1) | ||
318 | { | ||
319 | return si_rotma(src0, si_sfi(src1, 0)); | ||
320 | } | ||
321 | |||
322 | static qword | ||
323 | micro_trunc(qword src) | ||
324 | { | ||
325 | return (qword) _truncf4((vec_float4) src); | ||
326 | } | ||
327 | |||
328 | static qword | ||
329 | micro_sin(qword src) | ||
330 | { | ||
331 | return (qword) _sinf4((vec_float4) src); | ||
332 | } | ||
333 | |||
334 | static INLINE qword | ||
335 | micro_sqrt(qword src) | ||
336 | { | ||
337 | return (qword) _sqrtf4((vec_float4) src); | ||
338 | } | ||
339 | |||
340 | static void | ||
341 | fetch_src_file_channel( | ||
342 | const struct spu_exec_machine *mach, | ||
343 | const uint file, | ||
344 | const uint swizzle, | ||
345 | const union spu_exec_channel *index, | ||
346 | union spu_exec_channel *chan ) | ||
347 | { | ||
348 | switch( swizzle ) { | ||
349 | case TGSI_SWIZZLE_X: | ||
350 | case TGSI_SWIZZLE_Y: | ||
351 | case TGSI_SWIZZLE_Z: | ||
352 | case TGSI_SWIZZLE_W: | ||
353 | switch( file ) { | ||
354 | case TGSI_FILE_CONSTANT: { | ||
355 | unsigned i; | ||
356 | |||
357 | for (i = 0; i < 4; i++) { | ||
358 | const float *ptr = mach->Consts[index->i[i]]; | ||
359 | float tmp[4]; | ||
360 | |||
361 | spu_dcache_fetch_unaligned((qword *) tmp, | ||
362 | (uintptr_t)(ptr + swizzle), | ||
363 | sizeof(float)); | ||
364 | |||
365 | chan->f[i] = tmp[0]; | ||
366 | } | ||
367 | break; | ||
368 | } | ||
369 | |||
370 | case TGSI_FILE_INPUT: | ||
371 | chan->u[0] = mach->Inputs[index->i[0]].xyzw[swizzle].u[0]; | ||
372 | chan->u[1] = mach->Inputs[index->i[1]].xyzw[swizzle].u[1]; | ||
373 | chan->u[2] = mach->Inputs[index->i[2]].xyzw[swizzle].u[2]; | ||
374 | chan->u[3] = mach->Inputs[index->i[3]].xyzw[swizzle].u[3]; | ||
375 | break; | ||
376 | |||
377 | case TGSI_FILE_TEMPORARY: | ||
378 | chan->u[0] = mach->Temps[index->i[0]].xyzw[swizzle].u[0]; | ||
379 | chan->u[1] = mach->Temps[index->i[1]].xyzw[swizzle].u[1]; | ||
380 | chan->u[2] = mach->Temps[index->i[2]].xyzw[swizzle].u[2]; | ||
381 | chan->u[3] = mach->Temps[index->i[3]].xyzw[swizzle].u[3]; | ||
382 | break; | ||
383 | |||
384 | case TGSI_FILE_IMMEDIATE: | ||
385 | ASSERT( index->i[0] < (int) mach->ImmLimit ); | ||
386 | ASSERT( index->i[1] < (int) mach->ImmLimit ); | ||
387 | ASSERT( index->i[2] < (int) mach->ImmLimit ); | ||
388 | ASSERT( index->i[3] < (int) mach->ImmLimit ); | ||
389 | |||
390 | chan->f[0] = mach->Imms[index->i[0]][swizzle]; | ||
391 | chan->f[1] = mach->Imms[index->i[1]][swizzle]; | ||
392 | chan->f[2] = mach->Imms[index->i[2]][swizzle]; | ||
393 | chan->f[3] = mach->Imms[index->i[3]][swizzle]; | ||
394 | break; | ||
395 | |||
396 | case TGSI_FILE_ADDRESS: | ||
397 | chan->u[0] = mach->Addrs[index->i[0]].xyzw[swizzle].u[0]; | ||
398 | chan->u[1] = mach->Addrs[index->i[1]].xyzw[swizzle].u[1]; | ||
399 | chan->u[2] = mach->Addrs[index->i[2]].xyzw[swizzle].u[2]; | ||
400 | chan->u[3] = mach->Addrs[index->i[3]].xyzw[swizzle].u[3]; | ||
401 | break; | ||
402 | |||
403 | case TGSI_FILE_OUTPUT: | ||
404 | /* vertex/fragment output vars can be read too */ | ||
405 | chan->u[0] = mach->Outputs[index->i[0]].xyzw[swizzle].u[0]; | ||
406 | chan->u[1] = mach->Outputs[index->i[1]].xyzw[swizzle].u[1]; | ||
407 | chan->u[2] = mach->Outputs[index->i[2]].xyzw[swizzle].u[2]; | ||
408 | chan->u[3] = mach->Outputs[index->i[3]].xyzw[swizzle].u[3]; | ||
409 | break; | ||
410 | |||
411 | default: | ||
412 | ASSERT( 0 ); | ||
413 | } | ||
414 | break; | ||
415 | |||
416 | default: | ||
417 | ASSERT( 0 ); | ||
418 | } | ||
419 | } | ||
420 | |||
421 | static void | ||
422 | fetch_source( | ||
423 | const struct spu_exec_machine *mach, | ||
424 | union spu_exec_channel *chan, | ||
425 | const struct tgsi_full_src_register *reg, | ||
426 | const uint chan_index ) | ||
427 | { | ||
428 | union spu_exec_channel index; | ||
429 | uint swizzle; | ||
430 | |||
431 | index.i[0] = | ||
432 | index.i[1] = | ||
433 | index.i[2] = | ||
434 | index.i[3] = reg->Register.Index; | ||
435 | |||
436 | if (reg->Register.Indirect) { | ||
437 | union spu_exec_channel index2; | ||
438 | union spu_exec_channel indir_index; | ||
439 | |||
440 | index2.i[0] = | ||
441 | index2.i[1] = | ||
442 | index2.i[2] = | ||
443 | index2.i[3] = reg->Indirect.Index; | ||
444 | |||
445 | swizzle = tgsi_util_get_src_register_swizzle(®->Indirect, | ||
446 | CHAN_X); | ||
447 | fetch_src_file_channel( | ||
448 | mach, | ||
449 | reg->Indirect.File, | ||
450 | swizzle, | ||
451 | &index2, | ||
452 | &indir_index ); | ||
453 | |||
454 | index.q = si_a(index.q, indir_index.q); | ||
455 | } | ||
456 | |||
457 | if( reg->Register.Dimension ) { | ||
458 | switch( reg->Register.File ) { | ||
459 | case TGSI_FILE_INPUT: | ||
460 | index.q = si_mpyi(index.q, 17); | ||
461 | break; | ||
462 | case TGSI_FILE_CONSTANT: | ||
463 | index.q = si_shli(index.q, 12); | ||
464 | break; | ||
465 | default: | ||
466 | ASSERT( 0 ); | ||
467 | } | ||
468 | |||
469 | index.i[0] += reg->Dimension.Index; | ||
470 | index.i[1] += reg->Dimension.Index; | ||
471 | index.i[2] += reg->Dimension.Index; | ||
472 | index.i[3] += reg->Dimension.Index; | ||
473 | |||
474 | if (reg->Dimension.Indirect) { | ||
475 | union spu_exec_channel index2; | ||
476 | union spu_exec_channel indir_index; | ||
477 | |||
478 | index2.i[0] = | ||
479 | index2.i[1] = | ||
480 | index2.i[2] = | ||
481 | index2.i[3] = reg->DimIndirect.Index; | ||
482 | |||
483 | swizzle = tgsi_util_get_src_register_swizzle( ®->DimIndirect, CHAN_X ); | ||
484 | fetch_src_file_channel( | ||
485 | mach, | ||
486 | reg->DimIndirect.File, | ||
487 | swizzle, | ||
488 | &index2, | ||
489 | &indir_index ); | ||
490 | |||
491 | index.q = si_a(index.q, indir_index.q); | ||
492 | } | ||
493 | } | ||
494 | |||
495 | swizzle = tgsi_util_get_full_src_register_swizzle( reg, chan_index ); | ||
496 | fetch_src_file_channel( | ||
497 | mach, | ||
498 | reg->Register.File, | ||
499 | swizzle, | ||
500 | &index, | ||
501 | chan ); | ||
502 | |||
503 | switch (tgsi_util_get_full_src_register_sign_mode( reg, chan_index )) { | ||
504 | case TGSI_UTIL_SIGN_CLEAR: | ||
505 | chan->q = micro_abs(chan->q); | ||
506 | break; | ||
507 | |||
508 | case TGSI_UTIL_SIGN_SET: | ||
509 | chan->q = micro_set_sign(chan->q); | ||
510 | break; | ||
511 | |||
512 | case TGSI_UTIL_SIGN_TOGGLE: | ||
513 | chan->q = micro_neg(chan->q); | ||
514 | break; | ||
515 | |||
516 | case TGSI_UTIL_SIGN_KEEP: | ||
517 | break; | ||
518 | } | ||
519 | |||
520 | if (reg->RegisterExtMod.Complement) { | ||
521 | chan->q = si_fs(mach->Temps[TEMP_1_I].xyzw[TEMP_1_C].q, chan->q); | ||
522 | } | ||
523 | } | ||
524 | |||
525 | static void | ||
526 | store_dest( | ||
527 | struct spu_exec_machine *mach, | ||
528 | const union spu_exec_channel *chan, | ||
529 | const struct tgsi_full_dst_register *reg, | ||
530 | const struct tgsi_full_instruction *inst, | ||
531 | uint chan_index ) | ||
532 | { | ||
533 | union spu_exec_channel *dst; | ||
534 | |||
535 | switch( reg->Register.File ) { | ||
536 | case TGSI_FILE_NULL: | ||
537 | return; | ||
538 | |||
539 | case TGSI_FILE_OUTPUT: | ||
540 | dst = &mach->Outputs[mach->Temps[TEMP_OUTPUT_I].xyzw[TEMP_OUTPUT_C].u[0] | ||
541 | + reg->Register.Index].xyzw[chan_index]; | ||
542 | break; | ||
543 | |||
544 | case TGSI_FILE_TEMPORARY: | ||
545 | dst = &mach->Temps[reg->Register.Index].xyzw[chan_index]; | ||
546 | break; | ||
547 | |||
548 | case TGSI_FILE_ADDRESS: | ||
549 | dst = &mach->Addrs[reg->Register.Index].xyzw[chan_index]; | ||
550 | break; | ||
551 | |||
552 | default: | ||
553 | ASSERT( 0 ); | ||
554 | return; | ||
555 | } | ||
556 | |||
557 | switch (inst->Instruction.Saturate) | ||
558 | { | ||
559 | case TGSI_SAT_NONE: | ||
560 | if (mach->ExecMask & 0x1) | ||
561 | dst->i[0] = chan->i[0]; | ||
562 | if (mach->ExecMask & 0x2) | ||
563 | dst->i[1] = chan->i[1]; | ||
564 | if (mach->ExecMask & 0x4) | ||
565 | dst->i[2] = chan->i[2]; | ||
566 | if (mach->ExecMask & 0x8) | ||
567 | dst->i[3] = chan->i[3]; | ||
568 | break; | ||
569 | |||
570 | case TGSI_SAT_ZERO_ONE: | ||
571 | /* XXX need to obey ExecMask here */ | ||
572 | dst->q = micro_max(chan->q, mach->Temps[TEMP_0_I].xyzw[TEMP_0_C].q); | ||
573 | dst->q = micro_min(dst->q, mach->Temps[TEMP_1_I].xyzw[TEMP_1_C].q); | ||
574 | break; | ||
575 | |||
576 | case TGSI_SAT_MINUS_PLUS_ONE: | ||
577 | ASSERT( 0 ); | ||
578 | break; | ||
579 | |||
580 | default: | ||
581 | ASSERT( 0 ); | ||
582 | } | ||
583 | } | ||
584 | |||
585 | #define FETCH(VAL,INDEX,CHAN)\ | ||
586 | fetch_source (mach, VAL, &inst->Src[INDEX], CHAN) | ||
587 | |||
588 | #define STORE(VAL,INDEX,CHAN)\ | ||
589 | store_dest (mach, VAL, &inst->Dst[INDEX], inst, CHAN ) | ||
590 | |||
591 | |||
592 | /** | ||
593 | * Execute ARB-style KIL which is predicated by a src register. | ||
594 | * Kill fragment if any of the four values is less than zero. | ||
595 | */ | ||
596 | static void | ||
597 | exec_kil(struct spu_exec_machine *mach, | ||
598 | const struct tgsi_full_instruction *inst) | ||
599 | { | ||
600 | uint uniquemask; | ||
601 | uint chan_index; | ||
602 | uint kilmask = 0; /* bit 0 = pixel 0, bit 1 = pixel 1, etc */ | ||
603 | union spu_exec_channel r[1]; | ||
604 | |||
605 | /* This mask stores component bits that were already tested. */ | ||
606 | uniquemask = 0; | ||
607 | |||
608 | for (chan_index = 0; chan_index < 4; chan_index++) | ||
609 | { | ||
610 | uint swizzle; | ||
611 | uint i; | ||
612 | |||
613 | /* unswizzle channel */ | ||
614 | swizzle = tgsi_util_get_full_src_register_swizzle ( | ||
615 | &inst->Src[0], | ||
616 | chan_index); | ||
617 | |||
618 | /* check if the component has not been already tested */ | ||
619 | if (uniquemask & (1 << swizzle)) | ||
620 | continue; | ||
621 | uniquemask |= 1 << swizzle; | ||
622 | |||
623 | FETCH(&r[0], 0, chan_index); | ||
624 | for (i = 0; i < 4; i++) | ||
625 | if (r[0].f[i] < 0.0f) | ||
626 | kilmask |= 1 << i; | ||
627 | } | ||
628 | |||
629 | mach->Temps[TEMP_KILMASK_I].xyzw[TEMP_KILMASK_C].u[0] |= kilmask; | ||
630 | } | ||
631 | |||
632 | /** | ||
633 | * Execute NVIDIA-style KIL which is predicated by a condition code. | ||
634 | * Kill fragment if the condition code is TRUE. | ||
635 | */ | ||
636 | static void | ||
637 | exec_kilp(struct spu_exec_machine *mach, | ||
638 | const struct tgsi_full_instruction *inst) | ||
639 | { | ||
640 | uint kilmask = 0; /* bit 0 = pixel 0, bit 1 = pixel 1, etc */ | ||
641 | |||
642 | /* TODO: build kilmask from CC mask */ | ||
643 | |||
644 | mach->Temps[TEMP_KILMASK_I].xyzw[TEMP_KILMASK_C].u[0] |= kilmask; | ||
645 | } | ||
646 | |||
647 | /* | ||
648 | * Fetch a texel using STR texture coordinates. | ||
649 | */ | ||
650 | static void | ||
651 | fetch_texel( struct spu_sampler *sampler, | ||
652 | const union spu_exec_channel *s, | ||
653 | const union spu_exec_channel *t, | ||
654 | const union spu_exec_channel *p, | ||
655 | float lodbias, /* XXX should be float[4] */ | ||
656 | union spu_exec_channel *r, | ||
657 | union spu_exec_channel *g, | ||
658 | union spu_exec_channel *b, | ||
659 | union spu_exec_channel *a ) | ||
660 | { | ||
661 | qword rgba[4]; | ||
662 | qword out[4]; | ||
663 | |||
664 | sampler->get_samples(sampler, s->f, t->f, p->f, lodbias, | ||
665 | (float (*)[4]) rgba); | ||
666 | |||
667 | _transpose_matrix4x4((vec_float4 *) out, (vec_float4 *) rgba); | ||
668 | r->q = out[0]; | ||
669 | g->q = out[1]; | ||
670 | b->q = out[2]; | ||
671 | a->q = out[3]; | ||
672 | } | ||
673 | |||
674 | |||
675 | static void | ||
676 | exec_tex(struct spu_exec_machine *mach, | ||
677 | const struct tgsi_full_instruction *inst, | ||
678 | boolean biasLod, boolean projected) | ||
679 | { | ||
680 | const uint unit = inst->Src[1].Register.Index; | ||
681 | union spu_exec_channel r[8]; | ||
682 | uint chan_index; | ||
683 | float lodBias; | ||
684 | |||
685 | /* printf("Sampler %u unit %u\n", sampler, unit); */ | ||
686 | |||
687 | switch (inst->InstructionExtTexture.Texture) { | ||
688 | case TGSI_TEXTURE_1D: | ||
689 | |||
690 | FETCH(&r[0], 0, CHAN_X); | ||
691 | |||
692 | if (projected) { | ||
693 | FETCH(&r[1], 0, CHAN_W); | ||
694 | r[0].q = micro_div(r[0].q, r[1].q); | ||
695 | } | ||
696 | |||
697 | if (biasLod) { | ||
698 | FETCH(&r[1], 0, CHAN_W); | ||
699 | lodBias = r[2].f[0]; | ||
700 | } | ||
701 | else | ||
702 | lodBias = 0.0; | ||
703 | |||
704 | fetch_texel(&mach->Samplers[unit], | ||
705 | &r[0], NULL, NULL, lodBias, /* S, T, P, BIAS */ | ||
706 | &r[0], &r[1], &r[2], &r[3]); /* R, G, B, A */ | ||
707 | break; | ||
708 | |||
709 | case TGSI_TEXTURE_2D: | ||
710 | case TGSI_TEXTURE_RECT: | ||
711 | |||
712 | FETCH(&r[0], 0, CHAN_X); | ||
713 | FETCH(&r[1], 0, CHAN_Y); | ||
714 | FETCH(&r[2], 0, CHAN_Z); | ||
715 | |||
716 | if (projected) { | ||
717 | FETCH(&r[3], 0, CHAN_W); | ||
718 | r[0].q = micro_div(r[0].q, r[3].q); | ||
719 | r[1].q = micro_div(r[1].q, r[3].q); | ||
720 | r[2].q = micro_div(r[2].q, r[3].q); | ||
721 | } | ||
722 | |||
723 | if (biasLod) { | ||
724 | FETCH(&r[3], 0, CHAN_W); | ||
725 | lodBias = r[3].f[0]; | ||
726 | } | ||
727 | else | ||
728 | lodBias = 0.0; | ||
729 | |||
730 | fetch_texel(&mach->Samplers[unit], | ||
731 | &r[0], &r[1], &r[2], lodBias, /* inputs */ | ||
732 | &r[0], &r[1], &r[2], &r[3]); /* outputs */ | ||
733 | break; | ||
734 | |||
735 | case TGSI_TEXTURE_3D: | ||
736 | case TGSI_TEXTURE_CUBE: | ||
737 | |||
738 | FETCH(&r[0], 0, CHAN_X); | ||
739 | FETCH(&r[1], 0, CHAN_Y); | ||
740 | FETCH(&r[2], 0, CHAN_Z); | ||
741 | |||
742 | if (projected) { | ||
743 | FETCH(&r[3], 0, CHAN_W); | ||
744 | r[0].q = micro_div(r[0].q, r[3].q); | ||
745 | r[1].q = micro_div(r[1].q, r[3].q); | ||
746 | r[2].q = micro_div(r[2].q, r[3].q); | ||
747 | } | ||
748 | |||
749 | if (biasLod) { | ||
750 | FETCH(&r[3], 0, CHAN_W); | ||
751 | lodBias = r[3].f[0]; | ||
752 | } | ||
753 | else | ||
754 | lodBias = 0.0; | ||
755 | |||
756 | fetch_texel(&mach->Samplers[unit], | ||
757 | &r[0], &r[1], &r[2], lodBias, | ||
758 | &r[0], &r[1], &r[2], &r[3]); | ||
759 | break; | ||
760 | |||
761 | default: | ||
762 | ASSERT (0); | ||
763 | } | ||
764 | |||
765 | FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { | ||
766 | STORE( &r[chan_index], 0, chan_index ); | ||
767 | } | ||
768 | } | ||
769 | |||
770 | |||
771 | |||
772 | static void | ||
773 | constant_interpolation( | ||
774 | struct spu_exec_machine *mach, | ||
775 | unsigned attrib, | ||
776 | unsigned chan ) | ||
777 | { | ||
778 | unsigned i; | ||
779 | |||
780 | for( i = 0; i < QUAD_SIZE; i++ ) { | ||
781 | mach->Inputs[attrib].xyzw[chan].f[i] = mach->InterpCoefs[attrib].a0[chan]; | ||
782 | } | ||
783 | } | ||
784 | |||
785 | static void | ||
786 | linear_interpolation( | ||
787 | struct spu_exec_machine *mach, | ||
788 | unsigned attrib, | ||
789 | unsigned chan ) | ||
790 | { | ||
791 | const float x = mach->QuadPos.xyzw[0].f[0]; | ||
792 | const float y = mach->QuadPos.xyzw[1].f[0]; | ||
793 | const float dadx = mach->InterpCoefs[attrib].dadx[chan]; | ||
794 | const float dady = mach->InterpCoefs[attrib].dady[chan]; | ||
795 | const float a0 = mach->InterpCoefs[attrib].a0[chan] + dadx * x + dady * y; | ||
796 | mach->Inputs[attrib].xyzw[chan].f[0] = a0; | ||
797 | mach->Inputs[attrib].xyzw[chan].f[1] = a0 + dadx; | ||
798 | mach->Inputs[attrib].xyzw[chan].f[2] = a0 + dady; | ||
799 | mach->Inputs[attrib].xyzw[chan].f[3] = a0 + dadx + dady; | ||
800 | } | ||
801 | |||
802 | static void | ||
803 | perspective_interpolation( | ||
804 | struct spu_exec_machine *mach, | ||
805 | unsigned attrib, | ||
806 | unsigned chan ) | ||
807 | { | ||
808 | const float x = mach->QuadPos.xyzw[0].f[0]; | ||
809 | const float y = mach->QuadPos.xyzw[1].f[0]; | ||
810 | const float dadx = mach->InterpCoefs[attrib].dadx[chan]; | ||
811 | const float dady = mach->InterpCoefs[attrib].dady[chan]; | ||
812 | const float a0 = mach->InterpCoefs[attrib].a0[chan] + dadx * x + dady * y; | ||
813 | const float *w = mach->QuadPos.xyzw[3].f; | ||
814 | /* divide by W here */ | ||
815 | mach->Inputs[attrib].xyzw[chan].f[0] = a0 / w[0]; | ||
816 | mach->Inputs[attrib].xyzw[chan].f[1] = (a0 + dadx) / w[1]; | ||
817 | mach->Inputs[attrib].xyzw[chan].f[2] = (a0 + dady) / w[2]; | ||
818 | mach->Inputs[attrib].xyzw[chan].f[3] = (a0 + dadx + dady) / w[3]; | ||
819 | } | ||
820 | |||
821 | |||
822 | typedef void (* interpolation_func)( | ||
823 | struct spu_exec_machine *mach, | ||
824 | unsigned attrib, | ||
825 | unsigned chan ); | ||
826 | |||
827 | static void | ||
828 | exec_declaration(struct spu_exec_machine *mach, | ||
829 | const struct tgsi_full_declaration *decl) | ||
830 | { | ||
831 | if( mach->Processor == TGSI_PROCESSOR_FRAGMENT ) { | ||
832 | if( decl->Declaration.File == TGSI_FILE_INPUT ) { | ||
833 | unsigned first, last, mask; | ||
834 | interpolation_func interp; | ||
835 | |||
836 | first = decl->Range.First; | ||
837 | last = decl->Range.Last; | ||
838 | mask = decl->Declaration.UsageMask; | ||
839 | |||
840 | switch( decl->Declaration.Interpolate ) { | ||
841 | case TGSI_INTERPOLATE_CONSTANT: | ||
842 | interp = constant_interpolation; | ||
843 | break; | ||
844 | |||
845 | case TGSI_INTERPOLATE_LINEAR: | ||
846 | interp = linear_interpolation; | ||
847 | break; | ||
848 | |||
849 | case TGSI_INTERPOLATE_PERSPECTIVE: | ||
850 | interp = perspective_interpolation; | ||
851 | break; | ||
852 | |||
853 | default: | ||
854 | ASSERT( 0 ); | ||
855 | } | ||
856 | |||
857 | if( mask == TGSI_WRITEMASK_XYZW ) { | ||
858 | unsigned i, j; | ||
859 | |||
860 | for( i = first; i <= last; i++ ) { | ||
861 | for( j = 0; j < NUM_CHANNELS; j++ ) { | ||
862 | interp( mach, i, j ); | ||
863 | } | ||
864 | } | ||
865 | } | ||
866 | else { | ||
867 | unsigned i, j; | ||
868 | |||
869 | for( j = 0; j < NUM_CHANNELS; j++ ) { | ||
870 | if( mask & (1 << j) ) { | ||
871 | for( i = first; i <= last; i++ ) { | ||
872 | interp( mach, i, j ); | ||
873 | } | ||
874 | } | ||
875 | } | ||
876 | } | ||
877 | } | ||
878 | } | ||
879 | } | ||
880 | |||
881 | static void | ||
882 | exec_instruction( | ||
883 | struct spu_exec_machine *mach, | ||
884 | const struct tgsi_full_instruction *inst, | ||
885 | int *pc ) | ||
886 | { | ||
887 | uint chan_index; | ||
888 | union spu_exec_channel r[8]; | ||
889 | |||
890 | (*pc)++; | ||
891 | |||
892 | switch (inst->Instruction.Opcode) { | ||
893 | case TGSI_OPCODE_ARL: | ||
894 | FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { | ||
895 | FETCH( &r[0], 0, chan_index ); | ||
896 | r[0].q = si_cflts(r[0].q, 0); | ||
897 | STORE( &r[0], 0, chan_index ); | ||
898 | } | ||
899 | break; | ||
900 | |||
901 | case TGSI_OPCODE_MOV: | ||
902 | FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { | ||
903 | FETCH( &r[0], 0, chan_index ); | ||
904 | STORE( &r[0], 0, chan_index ); | ||
905 | } | ||
906 | break; | ||
907 | |||
908 | case TGSI_OPCODE_LIT: | ||
909 | if (IS_CHANNEL_ENABLED( *inst, CHAN_X )) { | ||
910 | STORE( &mach->Temps[TEMP_1_I].xyzw[TEMP_1_C], 0, CHAN_X ); | ||
911 | } | ||
912 | |||
913 | if (IS_CHANNEL_ENABLED( *inst, CHAN_Y ) || IS_CHANNEL_ENABLED( *inst, CHAN_Z )) { | ||
914 | FETCH( &r[0], 0, CHAN_X ); | ||
915 | if (IS_CHANNEL_ENABLED( *inst, CHAN_Y )) { | ||
916 | r[0].q = micro_max(r[0].q, mach->Temps[TEMP_0_I].xyzw[TEMP_0_C].q); | ||
917 | STORE( &r[0], 0, CHAN_Y ); | ||
918 | } | ||
919 | |||
920 | if (IS_CHANNEL_ENABLED( *inst, CHAN_Z )) { | ||
921 | FETCH( &r[1], 0, CHAN_Y ); | ||
922 | r[1].q = micro_max(r[1].q, mach->Temps[TEMP_0_I].xyzw[TEMP_0_C].q); | ||
923 | |||
924 | FETCH( &r[2], 0, CHAN_W ); | ||
925 | r[2].q = micro_min(r[2].q, mach->Temps[TEMP_128_I].xyzw[TEMP_128_C].q); | ||
926 | r[2].q = micro_max(r[2].q, mach->Temps[TEMP_M128_I].xyzw[TEMP_M128_C].q); | ||
927 | r[1].q = micro_pow(r[1].q, r[2].q); | ||
928 | |||
929 | /* r0 = (r0 > 0.0) ? r1 : 0.0 | ||
930 | */ | ||
931 | r[0].q = si_fcgt(r[0].q, mach->Temps[TEMP_0_I].xyzw[TEMP_0_C].q); | ||
932 | r[0].q = si_selb(mach->Temps[TEMP_0_I].xyzw[TEMP_0_C].q, r[1].q, | ||
933 | r[0].q); | ||
934 | STORE( &r[0], 0, CHAN_Z ); | ||
935 | } | ||
936 | } | ||
937 | |||
938 | if (IS_CHANNEL_ENABLED( *inst, CHAN_W )) { | ||
939 | STORE( &mach->Temps[TEMP_1_I].xyzw[TEMP_1_C], 0, CHAN_W ); | ||
940 | } | ||
941 | break; | ||
942 | |||
943 | case TGSI_OPCODE_RCP: | ||
944 | FETCH( &r[0], 0, CHAN_X ); | ||
945 | r[0].q = micro_div(mach->Temps[TEMP_1_I].xyzw[TEMP_1_C].q, r[0].q); | ||
946 | FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { | ||
947 | STORE( &r[0], 0, chan_index ); | ||
948 | } | ||
949 | break; | ||
950 | |||
951 | case TGSI_OPCODE_RSQ: | ||
952 | FETCH( &r[0], 0, CHAN_X ); | ||
953 | r[0].q = micro_sqrt(r[0].q); | ||
954 | r[0].q = micro_div(mach->Temps[TEMP_1_I].xyzw[TEMP_1_C].q, r[0].q); | ||
955 | FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { | ||
956 | STORE( &r[0], 0, chan_index ); | ||
957 | } | ||
958 | break; | ||
959 | |||
960 | case TGSI_OPCODE_EXP: | ||
961 | ASSERT (0); | ||
962 | break; | ||
963 | |||
964 | case TGSI_OPCODE_LOG: | ||
965 | ASSERT (0); | ||
966 | break; | ||
967 | |||
968 | case TGSI_OPCODE_MUL: | ||
969 | FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) | ||
970 | { | ||
971 | FETCH(&r[0], 0, chan_index); | ||
972 | FETCH(&r[1], 1, chan_index); | ||
973 | |||
974 | r[0].q = si_fm(r[0].q, r[1].q); | ||
975 | |||
976 | STORE(&r[0], 0, chan_index); | ||
977 | } | ||
978 | break; | ||
979 | |||
980 | case TGSI_OPCODE_ADD: | ||
981 | FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { | ||
982 | FETCH( &r[0], 0, chan_index ); | ||
983 | FETCH( &r[1], 1, chan_index ); | ||
984 | r[0].q = si_fa(r[0].q, r[1].q); | ||
985 | STORE( &r[0], 0, chan_index ); | ||
986 | } | ||
987 | break; | ||
988 | |||
989 | case TGSI_OPCODE_DP3: | ||
990 | /* TGSI_OPCODE_DOT3 */ | ||
991 | FETCH( &r[0], 0, CHAN_X ); | ||
992 | FETCH( &r[1], 1, CHAN_X ); | ||
993 | r[0].q = si_fm(r[0].q, r[1].q); | ||
994 | |||
995 | FETCH( &r[1], 0, CHAN_Y ); | ||
996 | FETCH( &r[2], 1, CHAN_Y ); | ||
997 | r[0].q = si_fma(r[1].q, r[2].q, r[0].q); | ||
998 | |||
999 | |||
1000 | FETCH( &r[1], 0, CHAN_Z ); | ||
1001 | FETCH( &r[2], 1, CHAN_Z ); | ||
1002 | r[0].q = si_fma(r[1].q, r[2].q, r[0].q); | ||
1003 | |||
1004 | FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { | ||
1005 | STORE( &r[0], 0, chan_index ); | ||
1006 | } | ||
1007 | break; | ||
1008 | |||
1009 | case TGSI_OPCODE_DP4: | ||
1010 | /* TGSI_OPCODE_DOT4 */ | ||
1011 | FETCH(&r[0], 0, CHAN_X); | ||
1012 | FETCH(&r[1], 1, CHAN_X); | ||
1013 | |||
1014 | r[0].q = si_fm(r[0].q, r[1].q); | ||
1015 | |||
1016 | FETCH(&r[1], 0, CHAN_Y); | ||
1017 | FETCH(&r[2], 1, CHAN_Y); | ||
1018 | |||
1019 | r[0].q = si_fma(r[1].q, r[2].q, r[0].q); | ||
1020 | |||
1021 | FETCH(&r[1], 0, CHAN_Z); | ||
1022 | FETCH(&r[2], 1, CHAN_Z); | ||
1023 | |||
1024 | r[0].q = si_fma(r[1].q, r[2].q, r[0].q); | ||
1025 | |||
1026 | FETCH(&r[1], 0, CHAN_W); | ||
1027 | FETCH(&r[2], 1, CHAN_W); | ||
1028 | |||
1029 | r[0].q = si_fma(r[1].q, r[2].q, r[0].q); | ||
1030 | |||
1031 | FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { | ||
1032 | STORE( &r[0], 0, chan_index ); | ||
1033 | } | ||
1034 | break; | ||
1035 | |||
1036 | case TGSI_OPCODE_DST: | ||
1037 | if (IS_CHANNEL_ENABLED( *inst, CHAN_X )) { | ||
1038 | STORE( &mach->Temps[TEMP_1_I].xyzw[TEMP_1_C], 0, CHAN_X ); | ||
1039 | } | ||
1040 | |||
1041 | if (IS_CHANNEL_ENABLED( *inst, CHAN_Y )) { | ||
1042 | FETCH( &r[0], 0, CHAN_Y ); | ||
1043 | FETCH( &r[1], 1, CHAN_Y); | ||
1044 | r[0].q = si_fm(r[0].q, r[1].q); | ||
1045 | STORE( &r[0], 0, CHAN_Y ); | ||
1046 | } | ||
1047 | |||
1048 | if (IS_CHANNEL_ENABLED( *inst, CHAN_Z )) { | ||
1049 | FETCH( &r[0], 0, CHAN_Z ); | ||
1050 | STORE( &r[0], 0, CHAN_Z ); | ||
1051 | } | ||
1052 | |||
1053 | if (IS_CHANNEL_ENABLED( *inst, CHAN_W )) { | ||
1054 | FETCH( &r[0], 1, CHAN_W ); | ||
1055 | STORE( &r[0], 0, CHAN_W ); | ||
1056 | } | ||
1057 | break; | ||
1058 | |||
1059 | case TGSI_OPCODE_MIN: | ||
1060 | FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { | ||
1061 | FETCH(&r[0], 0, chan_index); | ||
1062 | FETCH(&r[1], 1, chan_index); | ||
1063 | |||
1064 | r[0].q = micro_min(r[0].q, r[1].q); | ||
1065 | |||
1066 | STORE(&r[0], 0, chan_index); | ||
1067 | } | ||
1068 | break; | ||
1069 | |||
1070 | case TGSI_OPCODE_MAX: | ||
1071 | FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { | ||
1072 | FETCH(&r[0], 0, chan_index); | ||
1073 | FETCH(&r[1], 1, chan_index); | ||
1074 | |||
1075 | r[0].q = micro_max(r[0].q, r[1].q); | ||
1076 | |||
1077 | STORE(&r[0], 0, chan_index ); | ||
1078 | } | ||
1079 | break; | ||
1080 | |||
1081 | case TGSI_OPCODE_SLT: | ||
1082 | /* TGSI_OPCODE_SETLT */ | ||
1083 | FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { | ||
1084 | FETCH( &r[0], 0, chan_index ); | ||
1085 | FETCH( &r[1], 1, chan_index ); | ||
1086 | |||
1087 | r[0].q = micro_ge(r[0].q, r[1].q); | ||
1088 | r[0].q = si_xori(r[0].q, 0xff); | ||
1089 | |||
1090 | STORE( &r[0], 0, chan_index ); | ||
1091 | } | ||
1092 | break; | ||
1093 | |||
1094 | case TGSI_OPCODE_SGE: | ||
1095 | /* TGSI_OPCODE_SETGE */ | ||
1096 | FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { | ||
1097 | FETCH( &r[0], 0, chan_index ); | ||
1098 | FETCH( &r[1], 1, chan_index ); | ||
1099 | r[0].q = micro_ge(r[0].q, r[1].q); | ||
1100 | STORE( &r[0], 0, chan_index ); | ||
1101 | } | ||
1102 | break; | ||
1103 | |||
1104 | case TGSI_OPCODE_MAD: | ||
1105 | FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { | ||
1106 | FETCH( &r[0], 0, chan_index ); | ||
1107 | FETCH( &r[1], 1, chan_index ); | ||
1108 | FETCH( &r[2], 2, chan_index ); | ||
1109 | r[0].q = si_fma(r[0].q, r[1].q, r[2].q); | ||
1110 | STORE( &r[0], 0, chan_index ); | ||
1111 | } | ||
1112 | break; | ||
1113 | |||
1114 | case TGSI_OPCODE_SUB: | ||
1115 | FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { | ||
1116 | FETCH(&r[0], 0, chan_index); | ||
1117 | FETCH(&r[1], 1, chan_index); | ||
1118 | |||
1119 | r[0].q = si_fs(r[0].q, r[1].q); | ||
1120 | |||
1121 | STORE(&r[0], 0, chan_index); | ||
1122 | } | ||
1123 | break; | ||
1124 | |||
1125 | case TGSI_OPCODE_LRP: | ||
1126 | FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { | ||
1127 | FETCH(&r[0], 0, chan_index); | ||
1128 | FETCH(&r[1], 1, chan_index); | ||
1129 | FETCH(&r[2], 2, chan_index); | ||
1130 | |||
1131 | r[1].q = si_fs(r[1].q, r[2].q); | ||
1132 | r[0].q = si_fma(r[0].q, r[1].q, r[2].q); | ||
1133 | |||
1134 | STORE(&r[0], 0, chan_index); | ||
1135 | } | ||
1136 | break; | ||
1137 | |||
1138 | case TGSI_OPCODE_CND: | ||
1139 | ASSERT (0); | ||
1140 | break; | ||
1141 | |||
1142 | case TGSI_OPCODE_DP2A: | ||
1143 | ASSERT (0); | ||
1144 | break; | ||
1145 | |||
1146 | case TGSI_OPCODE_FRC: | ||
1147 | FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { | ||
1148 | FETCH( &r[0], 0, chan_index ); | ||
1149 | r[0].q = micro_frc(r[0].q); | ||
1150 | STORE( &r[0], 0, chan_index ); | ||
1151 | } | ||
1152 | break; | ||
1153 | |||
1154 | case TGSI_OPCODE_CLAMP: | ||
1155 | ASSERT (0); | ||
1156 | break; | ||
1157 | |||
1158 | case TGSI_OPCODE_FLR: | ||
1159 | FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { | ||
1160 | FETCH( &r[0], 0, chan_index ); | ||
1161 | r[0].q = micro_flr(r[0].q); | ||
1162 | STORE( &r[0], 0, chan_index ); | ||
1163 | } | ||
1164 | break; | ||
1165 | |||
1166 | case TGSI_OPCODE_ROUND: | ||
1167 | FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { | ||
1168 | FETCH( &r[0], 0, chan_index ); | ||
1169 | r[0].q = micro_rnd(r[0].q); | ||
1170 | STORE( &r[0], 0, chan_index ); | ||
1171 | } | ||
1172 | break; | ||
1173 | |||
1174 | case TGSI_OPCODE_EX2: | ||
1175 | FETCH(&r[0], 0, CHAN_X); | ||
1176 | |||
1177 | r[0].q = micro_pow(mach->Temps[TEMP_2_I].xyzw[TEMP_2_C].q, r[0].q); | ||
1178 | |||
1179 | FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { | ||
1180 | STORE( &r[0], 0, chan_index ); | ||
1181 | } | ||
1182 | break; | ||
1183 | |||
1184 | case TGSI_OPCODE_LG2: | ||
1185 | FETCH( &r[0], 0, CHAN_X ); | ||
1186 | r[0].q = micro_lg2(r[0].q); | ||
1187 | FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { | ||
1188 | STORE( &r[0], 0, chan_index ); | ||
1189 | } | ||
1190 | break; | ||
1191 | |||
1192 | case TGSI_OPCODE_POW: | ||
1193 | FETCH(&r[0], 0, CHAN_X); | ||
1194 | FETCH(&r[1], 1, CHAN_X); | ||
1195 | |||
1196 | r[0].q = micro_pow(r[0].q, r[1].q); | ||
1197 | |||
1198 | FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { | ||
1199 | STORE( &r[0], 0, chan_index ); | ||
1200 | } | ||
1201 | break; | ||
1202 | |||
1203 | case TGSI_OPCODE_XPD: | ||
1204 | /* TGSI_OPCODE_XPD */ | ||
1205 | FETCH(&r[0], 0, CHAN_Y); | ||
1206 | FETCH(&r[1], 1, CHAN_Z); | ||
1207 | FETCH(&r[3], 0, CHAN_Z); | ||
1208 | FETCH(&r[4], 1, CHAN_Y); | ||
1209 | |||
1210 | /* r2 = (r0 * r1) - (r3 * r5) | ||
1211 | */ | ||
1212 | r[2].q = si_fm(r[3].q, r[5].q); | ||
1213 | r[2].q = si_fms(r[0].q, r[1].q, r[2].q); | ||
1214 | |||
1215 | if (IS_CHANNEL_ENABLED( *inst, CHAN_X )) { | ||
1216 | STORE( &r[2], 0, CHAN_X ); | ||
1217 | } | ||
1218 | |||
1219 | FETCH(&r[2], 1, CHAN_X); | ||
1220 | FETCH(&r[5], 0, CHAN_X); | ||
1221 | |||
1222 | /* r3 = (r3 * r2) - (r1 * r5) | ||
1223 | */ | ||
1224 | r[1].q = si_fm(r[1].q, r[5].q); | ||
1225 | r[3].q = si_fms(r[3].q, r[2].q, r[1].q); | ||
1226 | |||
1227 | if (IS_CHANNEL_ENABLED( *inst, CHAN_Y )) { | ||
1228 | STORE( &r[3], 0, CHAN_Y ); | ||
1229 | } | ||
1230 | |||
1231 | /* r5 = (r5 * r4) - (r0 * r2) | ||
1232 | */ | ||
1233 | r[0].q = si_fm(r[0].q, r[2].q); | ||
1234 | r[5].q = si_fms(r[5].q, r[4].q, r[0].q); | ||
1235 | |||
1236 | if (IS_CHANNEL_ENABLED( *inst, CHAN_Z )) { | ||
1237 | STORE( &r[5], 0, CHAN_Z ); | ||
1238 | } | ||
1239 | |||
1240 | if (IS_CHANNEL_ENABLED( *inst, CHAN_W )) { | ||
1241 | STORE( &mach->Temps[TEMP_1_I].xyzw[TEMP_1_C], 0, CHAN_W ); | ||
1242 | } | ||
1243 | break; | ||
1244 | |||
1245 | case TGSI_OPCODE_ABS: | ||
1246 | FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { | ||
1247 | FETCH(&r[0], 0, chan_index); | ||
1248 | |||
1249 | r[0].q = micro_abs(r[0].q); | ||
1250 | |||
1251 | STORE(&r[0], 0, chan_index); | ||
1252 | } | ||
1253 | break; | ||
1254 | |||
1255 | case TGSI_OPCODE_RCC: | ||
1256 | ASSERT (0); | ||
1257 | break; | ||
1258 | |||
1259 | case TGSI_OPCODE_DPH: | ||
1260 | FETCH(&r[0], 0, CHAN_X); | ||
1261 | FETCH(&r[1], 1, CHAN_X); | ||
1262 | |||
1263 | r[0].q = si_fm(r[0].q, r[1].q); | ||
1264 | |||
1265 | FETCH(&r[1], 0, CHAN_Y); | ||
1266 | FETCH(&r[2], 1, CHAN_Y); | ||
1267 | |||
1268 | r[0].q = si_fma(r[1].q, r[2].q, r[0].q); | ||
1269 | |||
1270 | FETCH(&r[1], 0, CHAN_Z); | ||
1271 | FETCH(&r[2], 1, CHAN_Z); | ||
1272 | |||
1273 | r[0].q = si_fma(r[1].q, r[2].q, r[0].q); | ||
1274 | |||
1275 | FETCH(&r[1], 1, CHAN_W); | ||
1276 | |||
1277 | r[0].q = si_fa(r[0].q, r[1].q); | ||
1278 | |||
1279 | FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { | ||
1280 | STORE( &r[0], 0, chan_index ); | ||
1281 | } | ||
1282 | break; | ||
1283 | |||
1284 | case TGSI_OPCODE_COS: | ||
1285 | FETCH(&r[0], 0, CHAN_X); | ||
1286 | |||
1287 | r[0].q = micro_cos(r[0].q); | ||
1288 | |||
1289 | FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { | ||
1290 | STORE( &r[0], 0, chan_index ); | ||
1291 | } | ||
1292 | break; | ||
1293 | |||
1294 | case TGSI_OPCODE_DDX: | ||
1295 | FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { | ||
1296 | FETCH( &r[0], 0, chan_index ); | ||
1297 | r[0].q = micro_ddx(r[0].q); | ||
1298 | STORE( &r[0], 0, chan_index ); | ||
1299 | } | ||
1300 | break; | ||
1301 | |||
1302 | case TGSI_OPCODE_DDY: | ||
1303 | FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { | ||
1304 | FETCH( &r[0], 0, chan_index ); | ||
1305 | r[0].q = micro_ddy(r[0].q); | ||
1306 | STORE( &r[0], 0, chan_index ); | ||
1307 | } | ||
1308 | break; | ||
1309 | |||
1310 | case TGSI_OPCODE_KILP: | ||
1311 | exec_kilp (mach, inst); | ||
1312 | break; | ||
1313 | |||
1314 | case TGSI_OPCODE_KIL: | ||
1315 | exec_kil (mach, inst); | ||
1316 | break; | ||
1317 | |||
1318 | case TGSI_OPCODE_PK2H: | ||
1319 | ASSERT (0); | ||
1320 | break; | ||
1321 | |||
1322 | case TGSI_OPCODE_PK2US: | ||
1323 | ASSERT (0); | ||
1324 | break; | ||
1325 | |||
1326 | case TGSI_OPCODE_PK4B: | ||
1327 | ASSERT (0); | ||
1328 | break; | ||
1329 | |||
1330 | case TGSI_OPCODE_PK4UB: | ||
1331 | ASSERT (0); | ||
1332 | break; | ||
1333 | |||
1334 | case TGSI_OPCODE_RFL: | ||
1335 | ASSERT (0); | ||
1336 | break; | ||
1337 | |||
1338 | case TGSI_OPCODE_SEQ: | ||
1339 | FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { | ||
1340 | FETCH( &r[0], 0, chan_index ); | ||
1341 | FETCH( &r[1], 1, chan_index ); | ||
1342 | |||
1343 | r[0].q = si_fceq(r[0].q, r[1].q); | ||
1344 | |||
1345 | STORE( &r[0], 0, chan_index ); | ||
1346 | } | ||
1347 | break; | ||
1348 | |||
1349 | case TGSI_OPCODE_SFL: | ||
1350 | ASSERT (0); | ||
1351 | break; | ||
1352 | |||
1353 | case TGSI_OPCODE_SGT: | ||
1354 | FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { | ||
1355 | FETCH( &r[0], 0, chan_index ); | ||
1356 | FETCH( &r[1], 1, chan_index ); | ||
1357 | r[0].q = si_fcgt(r[0].q, r[1].q); | ||
1358 | STORE( &r[0], 0, chan_index ); | ||
1359 | } | ||
1360 | break; | ||
1361 | |||
1362 | case TGSI_OPCODE_SIN: | ||
1363 | FETCH( &r[0], 0, CHAN_X ); | ||
1364 | r[0].q = micro_sin(r[0].q); | ||
1365 | FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { | ||
1366 | STORE( &r[0], 0, chan_index ); | ||
1367 | } | ||
1368 | break; | ||
1369 | |||
1370 | case TGSI_OPCODE_SLE: | ||
1371 | FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { | ||
1372 | FETCH( &r[0], 0, chan_index ); | ||
1373 | FETCH( &r[1], 1, chan_index ); | ||
1374 | |||
1375 | r[0].q = si_fcgt(r[0].q, r[1].q); | ||
1376 | r[0].q = si_xori(r[0].q, 0xff); | ||
1377 | |||
1378 | STORE( &r[0], 0, chan_index ); | ||
1379 | } | ||
1380 | break; | ||
1381 | |||
1382 | case TGSI_OPCODE_SNE: | ||
1383 | FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { | ||
1384 | FETCH( &r[0], 0, chan_index ); | ||
1385 | FETCH( &r[1], 1, chan_index ); | ||
1386 | |||
1387 | r[0].q = si_fceq(r[0].q, r[1].q); | ||
1388 | r[0].q = si_xori(r[0].q, 0xff); | ||
1389 | |||
1390 | STORE( &r[0], 0, chan_index ); | ||
1391 | } | ||
1392 | break; | ||
1393 | |||
1394 | case TGSI_OPCODE_STR: | ||
1395 | ASSERT (0); | ||
1396 | break; | ||
1397 | |||
1398 | case TGSI_OPCODE_TEX: | ||
1399 | /* simple texture lookup */ | ||
1400 | /* src[0] = texcoord */ | ||
1401 | /* src[1] = sampler unit */ | ||
1402 | exec_tex(mach, inst, FALSE, FALSE); | ||
1403 | break; | ||
1404 | |||
1405 | case TGSI_OPCODE_TXB: | ||
1406 | /* Texture lookup with lod bias */ | ||
1407 | /* src[0] = texcoord (src[0].w = load bias) */ | ||
1408 | /* src[1] = sampler unit */ | ||
1409 | exec_tex(mach, inst, TRUE, FALSE); | ||
1410 | break; | ||
1411 | |||
1412 | case TGSI_OPCODE_TXD: | ||
1413 | /* Texture lookup with explict partial derivatives */ | ||
1414 | /* src[0] = texcoord */ | ||
1415 | /* src[1] = d[strq]/dx */ | ||
1416 | /* src[2] = d[strq]/dy */ | ||
1417 | /* src[3] = sampler unit */ | ||
1418 | ASSERT (0); | ||
1419 | break; | ||
1420 | |||
1421 | case TGSI_OPCODE_TXL: | ||
1422 | /* Texture lookup with explit LOD */ | ||
1423 | /* src[0] = texcoord (src[0].w = load bias) */ | ||
1424 | /* src[1] = sampler unit */ | ||
1425 | exec_tex(mach, inst, TRUE, FALSE); | ||
1426 | break; | ||
1427 | |||
1428 | case TGSI_OPCODE_TXP: | ||
1429 | /* Texture lookup with projection */ | ||
1430 | /* src[0] = texcoord (src[0].w = projection) */ | ||
1431 | /* src[1] = sampler unit */ | ||
1432 | exec_tex(mach, inst, TRUE, TRUE); | ||
1433 | break; | ||
1434 | |||
1435 | case TGSI_OPCODE_UP2H: | ||
1436 | ASSERT (0); | ||
1437 | break; | ||
1438 | |||
1439 | case TGSI_OPCODE_UP2US: | ||
1440 | ASSERT (0); | ||
1441 | break; | ||
1442 | |||
1443 | case TGSI_OPCODE_UP4B: | ||
1444 | ASSERT (0); | ||
1445 | break; | ||
1446 | |||
1447 | case TGSI_OPCODE_UP4UB: | ||
1448 | ASSERT (0); | ||
1449 | break; | ||
1450 | |||
1451 | case TGSI_OPCODE_X2D: | ||
1452 | ASSERT (0); | ||
1453 | break; | ||
1454 | |||
1455 | case TGSI_OPCODE_ARA: | ||
1456 | ASSERT (0); | ||
1457 | break; | ||
1458 | |||
1459 | case TGSI_OPCODE_ARR: | ||
1460 | ASSERT (0); | ||
1461 | break; | ||
1462 | |||
1463 | case TGSI_OPCODE_BRA: | ||
1464 | ASSERT (0); | ||
1465 | break; | ||
1466 | |||
1467 | case TGSI_OPCODE_CAL: | ||
1468 | /* skip the call if no execution channels are enabled */ | ||
1469 | if (mach->ExecMask) { | ||
1470 | /* do the call */ | ||
1471 | |||
1472 | /* push the Cond, Loop, Cont stacks */ | ||
1473 | ASSERT(mach->CondStackTop < TGSI_EXEC_MAX_COND_NESTING); | ||
1474 | mach->CondStack[mach->CondStackTop++] = mach->CondMask; | ||
1475 | ASSERT(mach->LoopStackTop < TGSI_EXEC_MAX_LOOP_NESTING); | ||
1476 | mach->LoopStack[mach->LoopStackTop++] = mach->LoopMask; | ||
1477 | ASSERT(mach->ContStackTop < TGSI_EXEC_MAX_LOOP_NESTING); | ||
1478 | mach->ContStack[mach->ContStackTop++] = mach->ContMask; | ||
1479 | |||
1480 | ASSERT(mach->FuncStackTop < TGSI_EXEC_MAX_CALL_NESTING); | ||
1481 | mach->FuncStack[mach->FuncStackTop++] = mach->FuncMask; | ||
1482 | |||
1483 | /* note that PC was already incremented above */ | ||
1484 | mach->CallStack[mach->CallStackTop++] = *pc; | ||
1485 | *pc = inst->InstructionExtLabel.Label; | ||
1486 | } | ||
1487 | break; | ||
1488 | |||
1489 | case TGSI_OPCODE_RET: | ||
1490 | mach->FuncMask &= ~mach->ExecMask; | ||
1491 | UPDATE_EXEC_MASK(mach); | ||
1492 | |||
1493 | if (mach->ExecMask == 0x0) { | ||
1494 | /* really return now (otherwise, keep executing */ | ||
1495 | |||
1496 | if (mach->CallStackTop == 0) { | ||
1497 | /* returning from main() */ | ||
1498 | *pc = -1; | ||
1499 | return; | ||
1500 | } | ||
1501 | *pc = mach->CallStack[--mach->CallStackTop]; | ||
1502 | |||
1503 | /* pop the Cond, Loop, Cont stacks */ | ||
1504 | ASSERT(mach->CondStackTop > 0); | ||
1505 | mach->CondMask = mach->CondStack[--mach->CondStackTop]; | ||
1506 | ASSERT(mach->LoopStackTop > 0); | ||
1507 | mach->LoopMask = mach->LoopStack[--mach->LoopStackTop]; | ||
1508 | ASSERT(mach->ContStackTop > 0); | ||
1509 | mach->ContMask = mach->ContStack[--mach->ContStackTop]; | ||
1510 | ASSERT(mach->FuncStackTop > 0); | ||
1511 | mach->FuncMask = mach->FuncStack[--mach->FuncStackTop]; | ||
1512 | |||
1513 | UPDATE_EXEC_MASK(mach); | ||
1514 | } | ||
1515 | break; | ||
1516 | |||
1517 | case TGSI_OPCODE_SSG: | ||
1518 | ASSERT (0); | ||
1519 | break; | ||
1520 | |||
1521 | case TGSI_OPCODE_CMP: | ||
1522 | FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { | ||
1523 | FETCH(&r[0], 0, chan_index); | ||
1524 | FETCH(&r[1], 1, chan_index); | ||
1525 | FETCH(&r[2], 2, chan_index); | ||
1526 | |||
1527 | /* r0 = (r0 < 0.0) ? r1 : r2 | ||
1528 | */ | ||
1529 | r[3].q = si_xor(r[3].q, r[3].q); | ||
1530 | r[0].q = micro_lt(r[0].q, r[3].q); | ||
1531 | r[0].q = si_selb(r[1].q, r[2].q, r[0].q); | ||
1532 | |||
1533 | STORE(&r[0], 0, chan_index); | ||
1534 | } | ||
1535 | break; | ||
1536 | |||
1537 | case TGSI_OPCODE_SCS: | ||
1538 | if( IS_CHANNEL_ENABLED( *inst, CHAN_X ) || IS_CHANNEL_ENABLED( *inst, CHAN_Y ) ) { | ||
1539 | FETCH( &r[0], 0, CHAN_X ); | ||
1540 | } | ||
1541 | if( IS_CHANNEL_ENABLED( *inst, CHAN_X ) ) { | ||
1542 | r[1].q = micro_cos(r[0].q); | ||
1543 | STORE( &r[1], 0, CHAN_X ); | ||
1544 | } | ||
1545 | if( IS_CHANNEL_ENABLED( *inst, CHAN_Y ) ) { | ||
1546 | r[1].q = micro_sin(r[0].q); | ||
1547 | STORE( &r[1], 0, CHAN_Y ); | ||
1548 | } | ||
1549 | if( IS_CHANNEL_ENABLED( *inst, CHAN_Z ) ) { | ||
1550 | STORE( &mach->Temps[TEMP_0_I].xyzw[TEMP_0_C], 0, CHAN_Z ); | ||
1551 | } | ||
1552 | if( IS_CHANNEL_ENABLED( *inst, CHAN_W ) ) { | ||
1553 | STORE( &mach->Temps[TEMP_1_I].xyzw[TEMP_1_C], 0, CHAN_W ); | ||
1554 | } | ||
1555 | break; | ||
1556 | |||
1557 | case TGSI_OPCODE_NRM: | ||
1558 | ASSERT (0); | ||
1559 | break; | ||
1560 | |||
1561 | case TGSI_OPCODE_DIV: | ||
1562 | ASSERT( 0 ); | ||
1563 | break; | ||
1564 | |||
1565 | case TGSI_OPCODE_DP2: | ||
1566 | FETCH( &r[0], 0, CHAN_X ); | ||
1567 | FETCH( &r[1], 1, CHAN_X ); | ||
1568 | r[0].q = si_fm(r[0].q, r[1].q); | ||
1569 | |||
1570 | FETCH( &r[1], 0, CHAN_Y ); | ||
1571 | FETCH( &r[2], 1, CHAN_Y ); | ||
1572 | r[0].q = si_fma(r[1].q, r[2].q, r[0].q); | ||
1573 | |||
1574 | FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { | ||
1575 | STORE( &r[0], 0, chan_index ); | ||
1576 | } | ||
1577 | break; | ||
1578 | |||
1579 | case TGSI_OPCODE_IF: | ||
1580 | /* push CondMask */ | ||
1581 | ASSERT(mach->CondStackTop < TGSI_EXEC_MAX_COND_NESTING); | ||
1582 | mach->CondStack[mach->CondStackTop++] = mach->CondMask; | ||
1583 | FETCH( &r[0], 0, CHAN_X ); | ||
1584 | /* update CondMask */ | ||
1585 | if( ! r[0].u[0] ) { | ||
1586 | mach->CondMask &= ~0x1; | ||
1587 | } | ||
1588 | if( ! r[0].u[1] ) { | ||
1589 | mach->CondMask &= ~0x2; | ||
1590 | } | ||
1591 | if( ! r[0].u[2] ) { | ||
1592 | mach->CondMask &= ~0x4; | ||
1593 | } | ||
1594 | if( ! r[0].u[3] ) { | ||
1595 | mach->CondMask &= ~0x8; | ||
1596 | } | ||
1597 | UPDATE_EXEC_MASK(mach); | ||
1598 | /* Todo: If CondMask==0, jump to ELSE */ | ||
1599 | break; | ||
1600 | |||
1601 | case TGSI_OPCODE_ELSE: | ||
1602 | /* invert CondMask wrt previous mask */ | ||
1603 | { | ||
1604 | uint prevMask; | ||
1605 | ASSERT(mach->CondStackTop > 0); | ||
1606 | prevMask = mach->CondStack[mach->CondStackTop - 1]; | ||
1607 | mach->CondMask = ~mach->CondMask & prevMask; | ||
1608 | UPDATE_EXEC_MASK(mach); | ||
1609 | /* Todo: If CondMask==0, jump to ENDIF */ | ||
1610 | } | ||
1611 | break; | ||
1612 | |||
1613 | case TGSI_OPCODE_ENDIF: | ||
1614 | /* pop CondMask */ | ||
1615 | ASSERT(mach->CondStackTop > 0); | ||
1616 | mach->CondMask = mach->CondStack[--mach->CondStackTop]; | ||
1617 | UPDATE_EXEC_MASK(mach); | ||
1618 | break; | ||
1619 | |||
1620 | case TGSI_OPCODE_END: | ||
1621 | /* halt execution */ | ||
1622 | *pc = -1; | ||
1623 | break; | ||
1624 | |||
1625 | case TGSI_OPCODE_PUSHA: | ||
1626 | ASSERT (0); | ||
1627 | break; | ||
1628 | |||
1629 | case TGSI_OPCODE_POPA: | ||
1630 | ASSERT (0); | ||
1631 | break; | ||
1632 | |||
1633 | case TGSI_OPCODE_CEIL: | ||
1634 | FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { | ||
1635 | FETCH( &r[0], 0, chan_index ); | ||
1636 | r[0].q = micro_ceil(r[0].q); | ||
1637 | STORE( &r[0], 0, chan_index ); | ||
1638 | } | ||
1639 | break; | ||
1640 | |||
1641 | case TGSI_OPCODE_I2F: | ||
1642 | FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { | ||
1643 | FETCH( &r[0], 0, chan_index ); | ||
1644 | r[0].q = si_csflt(r[0].q, 0); | ||
1645 | STORE( &r[0], 0, chan_index ); | ||
1646 | } | ||
1647 | break; | ||
1648 | |||
1649 | case TGSI_OPCODE_NOT: | ||
1650 | FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { | ||
1651 | FETCH( &r[0], 0, chan_index ); | ||
1652 | r[0].q = si_xorbi(r[0].q, 0xff); | ||
1653 | STORE( &r[0], 0, chan_index ); | ||
1654 | } | ||
1655 | break; | ||
1656 | |||
1657 | case TGSI_OPCODE_TRUNC: | ||
1658 | FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { | ||
1659 | FETCH( &r[0], 0, chan_index ); | ||
1660 | r[0].q = micro_trunc(r[0].q); | ||
1661 | STORE( &r[0], 0, chan_index ); | ||
1662 | } | ||
1663 | break; | ||
1664 | |||
1665 | case TGSI_OPCODE_SHL: | ||
1666 | FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { | ||
1667 | FETCH( &r[0], 0, chan_index ); | ||
1668 | FETCH( &r[1], 1, chan_index ); | ||
1669 | |||
1670 | r[0].q = si_shl(r[0].q, r[1].q); | ||
1671 | |||
1672 | STORE( &r[0], 0, chan_index ); | ||
1673 | } | ||
1674 | break; | ||
1675 | |||
1676 | case TGSI_OPCODE_ISHR: | ||
1677 | FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { | ||
1678 | FETCH( &r[0], 0, chan_index ); | ||
1679 | FETCH( &r[1], 1, chan_index ); | ||
1680 | r[0].q = micro_ishr(r[0].q, r[1].q); | ||
1681 | STORE( &r[0], 0, chan_index ); | ||
1682 | } | ||
1683 | break; | ||
1684 | |||
1685 | case TGSI_OPCODE_AND: | ||
1686 | FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { | ||
1687 | FETCH( &r[0], 0, chan_index ); | ||
1688 | FETCH( &r[1], 1, chan_index ); | ||
1689 | r[0].q = si_and(r[0].q, r[1].q); | ||
1690 | STORE( &r[0], 0, chan_index ); | ||
1691 | } | ||
1692 | break; | ||
1693 | |||
1694 | case TGSI_OPCODE_OR: | ||
1695 | FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { | ||
1696 | FETCH( &r[0], 0, chan_index ); | ||
1697 | FETCH( &r[1], 1, chan_index ); | ||
1698 | r[0].q = si_or(r[0].q, r[1].q); | ||
1699 | STORE( &r[0], 0, chan_index ); | ||
1700 | } | ||
1701 | break; | ||
1702 | |||
1703 | case TGSI_OPCODE_MOD: | ||
1704 | ASSERT (0); | ||
1705 | break; | ||
1706 | |||
1707 | case TGSI_OPCODE_XOR: | ||
1708 | FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { | ||
1709 | FETCH( &r[0], 0, chan_index ); | ||
1710 | FETCH( &r[1], 1, chan_index ); | ||
1711 | r[0].q = si_xor(r[0].q, r[1].q); | ||
1712 | STORE( &r[0], 0, chan_index ); | ||
1713 | } | ||
1714 | break; | ||
1715 | |||
1716 | case TGSI_OPCODE_SAD: | ||
1717 | ASSERT (0); | ||
1718 | break; | ||
1719 | |||
1720 | case TGSI_OPCODE_TXF: | ||
1721 | ASSERT (0); | ||
1722 | break; | ||
1723 | |||
1724 | case TGSI_OPCODE_TXQ: | ||
1725 | ASSERT (0); | ||
1726 | break; | ||
1727 | |||
1728 | case TGSI_OPCODE_EMIT: | ||
1729 | mach->Temps[TEMP_OUTPUT_I].xyzw[TEMP_OUTPUT_C].u[0] += 16; | ||
1730 | mach->Primitives[mach->Temps[TEMP_PRIMITIVE_I].xyzw[TEMP_PRIMITIVE_C].u[0]]++; | ||
1731 | break; | ||
1732 | |||
1733 | case TGSI_OPCODE_ENDPRIM: | ||
1734 | mach->Temps[TEMP_PRIMITIVE_I].xyzw[TEMP_PRIMITIVE_C].u[0]++; | ||
1735 | mach->Primitives[mach->Temps[TEMP_PRIMITIVE_I].xyzw[TEMP_PRIMITIVE_C].u[0]] = 0; | ||
1736 | break; | ||
1737 | |||
1738 | case TGSI_OPCODE_BGNLOOP: | ||
1739 | /* push LoopMask and ContMasks */ | ||
1740 | ASSERT(mach->LoopStackTop < TGSI_EXEC_MAX_LOOP_NESTING); | ||
1741 | mach->LoopStack[mach->LoopStackTop++] = mach->LoopMask; | ||
1742 | ASSERT(mach->ContStackTop < TGSI_EXEC_MAX_LOOP_NESTING); | ||
1743 | mach->ContStack[mach->ContStackTop++] = mach->ContMask; | ||
1744 | break; | ||
1745 | |||
1746 | case TGSI_OPCODE_ENDLOOP: | ||
1747 | /* Restore ContMask, but don't pop */ | ||
1748 | ASSERT(mach->ContStackTop > 0); | ||
1749 | mach->ContMask = mach->ContStack[mach->ContStackTop - 1]; | ||
1750 | if (mach->LoopMask) { | ||
1751 | /* repeat loop: jump to instruction just past BGNLOOP */ | ||
1752 | *pc = inst->InstructionExtLabel.Label + 1; | ||
1753 | } | ||
1754 | else { | ||
1755 | /* exit loop: pop LoopMask */ | ||
1756 | ASSERT(mach->LoopStackTop > 0); | ||
1757 | mach->LoopMask = mach->LoopStack[--mach->LoopStackTop]; | ||
1758 | /* pop ContMask */ | ||
1759 | ASSERT(mach->ContStackTop > 0); | ||
1760 | mach->ContMask = mach->ContStack[--mach->ContStackTop]; | ||
1761 | } | ||
1762 | UPDATE_EXEC_MASK(mach); | ||
1763 | break; | ||
1764 | |||
1765 | case TGSI_OPCODE_BRK: | ||
1766 | /* turn off loop channels for each enabled exec channel */ | ||
1767 | mach->LoopMask &= ~mach->ExecMask; | ||
1768 | /* Todo: if mach->LoopMask == 0, jump to end of loop */ | ||
1769 | UPDATE_EXEC_MASK(mach); | ||
1770 | break; | ||
1771 | |||
1772 | case TGSI_OPCODE_CONT: | ||
1773 | /* turn off cont channels for each enabled exec channel */ | ||
1774 | mach->ContMask &= ~mach->ExecMask; | ||
1775 | /* Todo: if mach->LoopMask == 0, jump to end of loop */ | ||
1776 | UPDATE_EXEC_MASK(mach); | ||
1777 | break; | ||
1778 | |||
1779 | case TGSI_OPCODE_BGNSUB: | ||
1780 | /* no-op */ | ||
1781 | break; | ||
1782 | |||
1783 | case TGSI_OPCODE_ENDSUB: | ||
1784 | /* no-op */ | ||
1785 | break; | ||
1786 | |||
1787 | case TGSI_OPCODE_NOP: | ||
1788 | break; | ||
1789 | |||
1790 | default: | ||
1791 | ASSERT( 0 ); | ||
1792 | } | ||
1793 | } | ||
1794 | |||
1795 | |||
1796 | /** | ||
1797 | * Run TGSI interpreter. | ||
1798 | * \return bitmask of "alive" quad components | ||
1799 | */ | ||
1800 | uint | ||
1801 | spu_exec_machine_run( struct spu_exec_machine *mach ) | ||
1802 | { | ||
1803 | uint i; | ||
1804 | int pc = 0; | ||
1805 | |||
1806 | mach->CondMask = 0xf; | ||
1807 | mach->LoopMask = 0xf; | ||
1808 | mach->ContMask = 0xf; | ||
1809 | mach->FuncMask = 0xf; | ||
1810 | mach->ExecMask = 0xf; | ||
1811 | |||
1812 | mach->CondStackTop = 0; /* temporarily subvert this ASSERTion */ | ||
1813 | ASSERT(mach->CondStackTop == 0); | ||
1814 | ASSERT(mach->LoopStackTop == 0); | ||
1815 | ASSERT(mach->ContStackTop == 0); | ||
1816 | ASSERT(mach->CallStackTop == 0); | ||
1817 | |||
1818 | mach->Temps[TEMP_KILMASK_I].xyzw[TEMP_KILMASK_C].u[0] = 0; | ||
1819 | mach->Temps[TEMP_OUTPUT_I].xyzw[TEMP_OUTPUT_C].u[0] = 0; | ||
1820 | |||
1821 | if( mach->Processor == TGSI_PROCESSOR_GEOMETRY ) { | ||
1822 | mach->Temps[TEMP_PRIMITIVE_I].xyzw[TEMP_PRIMITIVE_C].u[0] = 0; | ||
1823 | mach->Primitives[0] = 0; | ||
1824 | } | ||
1825 | |||
1826 | |||
1827 | /* execute declarations (interpolants) */ | ||
1828 | if( mach->Processor == TGSI_PROCESSOR_FRAGMENT ) { | ||
1829 | for (i = 0; i < mach->NumDeclarations; i++) { | ||
1830 | PIPE_ALIGN_VAR(16) | ||
1831 | union { | ||
1832 | struct tgsi_full_declaration decl; | ||
1833 | qword buffer[ROUNDUP16(sizeof(struct tgsi_full_declaration)) / 16]; | ||
1834 | } d; | ||
1835 | unsigned ea = (unsigned) (mach->Declarations + pc); | ||
1836 | |||
1837 | spu_dcache_fetch_unaligned(d.buffer, ea, sizeof(d.decl)); | ||
1838 | |||
1839 | exec_declaration( mach, &d.decl ); | ||
1840 | } | ||
1841 | } | ||
1842 | |||
1843 | /* execute instructions, until pc is set to -1 */ | ||
1844 | while (pc != -1) { | ||
1845 | PIPE_ALIGN_VAR(16) | ||
1846 | union { | ||
1847 | struct tgsi_full_instruction inst; | ||
1848 | qword buffer[ROUNDUP16(sizeof(struct tgsi_full_instruction)) / 16]; | ||
1849 | } i; | ||
1850 | unsigned ea = (unsigned) (mach->Instructions + pc); | ||
1851 | |||
1852 | spu_dcache_fetch_unaligned(i.buffer, ea, sizeof(i.inst)); | ||
1853 | exec_instruction( mach, & i.inst, &pc ); | ||
1854 | } | ||
1855 | |||
1856 | #if 0 | ||
1857 | /* we scale from floats in [0,1] to Zbuffer ints in sp_quad_depth_test.c */ | ||
1858 | if (mach->Processor == TGSI_PROCESSOR_FRAGMENT) { | ||
1859 | /* | ||
1860 | * Scale back depth component. | ||
1861 | */ | ||
1862 | for (i = 0; i < 4; i++) | ||
1863 | mach->Outputs[0].xyzw[2].f[i] *= ctx->DrawBuffer->_DepthMaxF; | ||
1864 | } | ||
1865 | #endif | ||
1866 | |||
1867 | return ~mach->Temps[TEMP_KILMASK_I].xyzw[TEMP_KILMASK_C].u[0]; | ||
1868 | } | ||
1869 | |||
1870 | |||
diff --git a/src/gallium/drivers/cell/spu/spu_exec.h b/src/gallium/drivers/cell/spu/spu_exec.h deleted file mode 100644 index 68f4479e53d..00000000000 --- a/src/gallium/drivers/cell/spu/spu_exec.h +++ /dev/null | |||
@@ -1,173 +0,0 @@ | |||
1 | /************************************************************************** | ||
2 | * | ||
3 | * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas. | ||
4 | * All Rights Reserved. | ||
5 | * | ||
6 | * Permission is hereby granted, free of charge, to any person obtaining a | ||
7 | * copy of this software and associated documentation files (the | ||
8 | * "Software"), to deal in the Software without restriction, including | ||
9 | * without limitation the rights to use, copy, modify, merge, publish, | ||
10 | * distribute, sub license, and/or sell copies of the Software, and to | ||
11 | * permit persons to whom the Software is furnished to do so, subject to | ||
12 | * the following conditions: | ||
13 | * | ||
14 | * The above copyright notice and this permission notice (including the | ||
15 | * next paragraph) shall be included in all copies or substantial portions | ||
16 | * of the Software. | ||
17 | * | ||
18 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS | ||
19 | * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF | ||
20 | * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. | ||
21 | * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR | ||
22 | * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, | ||
23 | * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE | ||
24 | * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. | ||
25 | * | ||
26 | **************************************************************************/ | ||
27 | |||
28 | #if !defined SPU_EXEC_H | ||
29 | #define SPU_EXEC_H | ||
30 | |||
31 | #include "pipe/p_compiler.h" | ||
32 | |||
33 | #include "spu_tgsi_exec.h" | ||
34 | |||
35 | #if defined __cplusplus | ||
36 | extern "C" { | ||
37 | #endif | ||
38 | |||
39 | /** | ||
40 | * Registers may be treated as float, signed int or unsigned int. | ||
41 | */ | ||
42 | union spu_exec_channel | ||
43 | { | ||
44 | float f[QUAD_SIZE]; | ||
45 | int i[QUAD_SIZE]; | ||
46 | unsigned u[QUAD_SIZE]; | ||
47 | qword q; | ||
48 | }; | ||
49 | |||
50 | /** | ||
51 | * A vector[RGBA] of channels[4 pixels] | ||
52 | */ | ||
53 | struct spu_exec_vector | ||
54 | { | ||
55 | union spu_exec_channel xyzw[NUM_CHANNELS]; | ||
56 | }; | ||
57 | |||
58 | /** | ||
59 | * For fragment programs, information for computing fragment input | ||
60 | * values from plane equation of the triangle/line. | ||
61 | */ | ||
62 | struct spu_interp_coef | ||
63 | { | ||
64 | float a0[NUM_CHANNELS]; /* in an xyzw layout */ | ||
65 | float dadx[NUM_CHANNELS]; | ||
66 | float dady[NUM_CHANNELS]; | ||
67 | }; | ||
68 | |||
69 | |||
70 | struct softpipe_tile_cache; /**< Opaque to TGSI */ | ||
71 | |||
72 | /** | ||
73 | * Information for sampling textures, which must be implemented | ||
74 | * by code outside the TGSI executor. | ||
75 | */ | ||
76 | struct spu_sampler | ||
77 | { | ||
78 | const struct pipe_sampler_state *state; | ||
79 | struct pipe_resource *texture; | ||
80 | /** Get samples for four fragments in a quad */ | ||
81 | void (*get_samples)(struct spu_sampler *sampler, | ||
82 | const float s[QUAD_SIZE], | ||
83 | const float t[QUAD_SIZE], | ||
84 | const float p[QUAD_SIZE], | ||
85 | float lodbias, | ||
86 | float rgba[NUM_CHANNELS][QUAD_SIZE]); | ||
87 | void *pipe; /*XXX temporary*/ | ||
88 | struct softpipe_tile_cache *cache; | ||
89 | }; | ||
90 | |||
91 | |||
92 | /** | ||
93 | * Run-time virtual machine state for executing TGSI shader. | ||
94 | */ | ||
95 | struct spu_exec_machine | ||
96 | { | ||
97 | /* | ||
98 | * 32 program temporaries | ||
99 | * 4 internal temporaries | ||
100 | * 1 address | ||
101 | */ | ||
102 | PIPE_ALIGN_VAR(16) | ||
103 | struct spu_exec_vector Temps[TGSI_EXEC_NUM_TEMPS | ||
104 | + TGSI_EXEC_NUM_TEMP_EXTRAS + 1]; | ||
105 | |||
106 | struct spu_exec_vector *Addrs; | ||
107 | |||
108 | struct spu_sampler *Samplers; | ||
109 | |||
110 | float Imms[TGSI_EXEC_NUM_IMMEDIATES][4]; | ||
111 | unsigned ImmLimit; | ||
112 | float (*Consts)[4]; | ||
113 | struct spu_exec_vector *Inputs; | ||
114 | struct spu_exec_vector *Outputs; | ||
115 | unsigned Processor; | ||
116 | |||
117 | /* GEOMETRY processor only. */ | ||
118 | unsigned *Primitives; | ||
119 | |||
120 | /* FRAGMENT processor only. */ | ||
121 | const struct spu_interp_coef *InterpCoefs; | ||
122 | struct spu_exec_vector QuadPos; | ||
123 | |||
124 | /* Conditional execution masks */ | ||
125 | uint CondMask; /**< For IF/ELSE/ENDIF */ | ||
126 | uint LoopMask; /**< For BGNLOOP/ENDLOOP */ | ||
127 | uint ContMask; /**< For loop CONT statements */ | ||
128 | uint FuncMask; /**< For function calls */ | ||
129 | uint ExecMask; /**< = CondMask & LoopMask */ | ||
130 | |||
131 | /** Condition mask stack (for nested conditionals) */ | ||
132 | uint CondStack[TGSI_EXEC_MAX_COND_NESTING]; | ||
133 | int CondStackTop; | ||
134 | |||
135 | /** Loop mask stack (for nested loops) */ | ||
136 | uint LoopStack[TGSI_EXEC_MAX_LOOP_NESTING]; | ||
137 | int LoopStackTop; | ||
138 | |||
139 | /** Loop continue mask stack (see comments in tgsi_exec.c) */ | ||
140 | uint ContStack[TGSI_EXEC_MAX_LOOP_NESTING]; | ||
141 | int ContStackTop; | ||
142 | |||
143 | /** Function execution mask stack (for executing subroutine code) */ | ||
144 | uint FuncStack[TGSI_EXEC_MAX_CALL_NESTING]; | ||
145 | int FuncStackTop; | ||
146 | |||
147 | /** Function call stack for saving/restoring the program counter */ | ||
148 | uint CallStack[TGSI_EXEC_MAX_CALL_NESTING]; | ||
149 | int CallStackTop; | ||
150 | |||
151 | struct tgsi_full_instruction *Instructions; | ||
152 | uint NumInstructions; | ||
153 | |||
154 | struct tgsi_full_declaration *Declarations; | ||
155 | uint NumDeclarations; | ||
156 | }; | ||
157 | |||
158 | |||
159 | extern void | ||
160 | spu_exec_machine_init(struct spu_exec_machine *mach, | ||
161 | uint numSamplers, | ||
162 | struct spu_sampler *samplers, | ||
163 | unsigned processor); | ||
164 | |||
165 | extern uint | ||
166 | spu_exec_machine_run( struct spu_exec_machine *mach ); | ||
167 | |||
168 | |||
169 | #if defined __cplusplus | ||
170 | } /* extern "C" */ | ||
171 | #endif | ||
172 | |||
173 | #endif /* SPU_EXEC_H */ | ||
diff --git a/src/gallium/drivers/cell/spu/spu_funcs.c b/src/gallium/drivers/cell/spu/spu_funcs.c deleted file mode 100644 index 98919c43ffc..00000000000 --- a/src/gallium/drivers/cell/spu/spu_funcs.c +++ /dev/null | |||
@@ -1,173 +0,0 @@ | |||
1 | /************************************************************************** | ||
2 | * | ||
3 | * Copyright 2008 Tungsten Graphics, Inc., Cedar Park, Texas. | ||
4 | * All Rights Reserved. | ||
5 | * | ||
6 | * Permission is hereby granted, free of charge, to any person obtaining a | ||
7 | * copy of this software and associated documentation files (the | ||
8 | * "Software"), to deal in the Software without restriction, including | ||
9 | * without limitation the rights to use, copy, modify, merge, publish, | ||
10 | * distribute, sub license, and/or sell copies of the Software, and to | ||
11 | * permit persons to whom the Software is furnished to do so, subject to | ||
12 | * the following conditions: | ||
13 | * | ||
14 | * The above copyright notice and this permission notice (including the | ||
15 | * next paragraph) shall be included in all copies or substantial portions | ||
16 | * of the Software. | ||
17 | * | ||
18 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS | ||
19 | * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF | ||
20 | * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. | ||
21 | * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR | ||
22 | * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, | ||
23 | * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE | ||
24 | * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. | ||
25 | * | ||
26 | **************************************************************************/ | ||
27 | |||
28 | |||
29 | /** | ||
30 | * SPU functions accessed by shaders. | ||
31 | * | ||
32 | * Authors: Brian Paul | ||
33 | */ | ||
34 | |||
35 | |||
36 | #include <string.h> | ||
37 | #include <libmisc.h> | ||
38 | #include <math.h> | ||
39 | #include <cos14_v.h> | ||
40 | #include <sin14_v.h> | ||
41 | #include <simdmath/exp2f4.h> | ||
42 | #include <simdmath/log2f4.h> | ||
43 | #include <simdmath/powf4.h> | ||
44 | |||
45 | #include "cell/common.h" | ||
46 | #include "spu_main.h" | ||
47 | #include "spu_funcs.h" | ||
48 | #include "spu_texture.h" | ||
49 | |||
50 | |||
51 | /** For "return"-ing four vectors */ | ||
52 | struct vec_4x4 | ||
53 | { | ||
54 | vector float v[4]; | ||
55 | }; | ||
56 | |||
57 | |||
58 | static vector float | ||
59 | spu_cos(vector float x) | ||
60 | { | ||
61 | return _cos14_v(x); | ||
62 | } | ||
63 | |||
64 | static vector float | ||
65 | spu_sin(vector float x) | ||
66 | { | ||
67 | return _sin14_v(x); | ||
68 | } | ||
69 | |||
70 | static vector float | ||
71 | spu_pow(vector float x, vector float y) | ||
72 | { | ||
73 | return _powf4(x, y); | ||
74 | } | ||
75 | |||
76 | static vector float | ||
77 | spu_exp2(vector float x) | ||
78 | { | ||
79 | return _exp2f4(x); | ||
80 | } | ||
81 | |||
82 | static vector float | ||
83 | spu_log2(vector float x) | ||
84 | { | ||
85 | return _log2f4(x); | ||
86 | } | ||
87 | |||
88 | |||
89 | static struct vec_4x4 | ||
90 | spu_tex_2d(vector float s, vector float t, vector float r, vector float q, | ||
91 | unsigned unit) | ||
92 | { | ||
93 | struct vec_4x4 colors; | ||
94 | (void) r; | ||
95 | (void) q; | ||
96 | spu.sample_texture_2d[unit](s, t, unit, 0, 0, colors.v); | ||
97 | return colors; | ||
98 | } | ||
99 | |||
100 | static struct vec_4x4 | ||
101 | spu_tex_3d(vector float s, vector float t, vector float r, vector float q, | ||
102 | unsigned unit) | ||
103 | { | ||
104 | struct vec_4x4 colors; | ||
105 | (void) r; | ||
106 | (void) q; | ||
107 | spu.sample_texture_2d[unit](s, t, unit, 0, 0, colors.v); | ||
108 | return colors; | ||
109 | } | ||
110 | |||
111 | static struct vec_4x4 | ||
112 | spu_tex_cube(vector float s, vector float t, vector float r, vector float q, | ||
113 | unsigned unit) | ||
114 | { | ||
115 | struct vec_4x4 colors; | ||
116 | (void) q; | ||
117 | sample_texture_cube(s, t, r, unit, colors.v); | ||
118 | return colors; | ||
119 | } | ||
120 | |||
121 | |||
122 | /** | ||
123 | * Add named function to list of "exported" functions that will be | ||
124 | * made available to the PPU-hosted code generator. | ||
125 | */ | ||
126 | static void | ||
127 | export_func(struct cell_spu_function_info *spu_functions, | ||
128 | const char *name, void *addr) | ||
129 | { | ||
130 | uint n = spu_functions->num; | ||
131 | ASSERT(strlen(name) < 16); | ||
132 | strcpy(spu_functions->names[n], name); | ||
133 | spu_functions->addrs[n] = (uint) addr; | ||
134 | spu_functions->num++; | ||
135 | ASSERT(spu_functions->num <= 16); | ||
136 | } | ||
137 | |||
138 | |||
139 | /** | ||
140 | * Return info about the SPU's function to the PPU / main memory. | ||
141 | * The PPU needs to know the address of some SPU-side functions so | ||
142 | * that we can generate shader code with function calls. | ||
143 | */ | ||
144 | void | ||
145 | return_function_info(void) | ||
146 | { | ||
147 | PIPE_ALIGN_VAR(16) struct cell_spu_function_info funcs; | ||
148 | int tag = TAG_MISC; | ||
149 | |||
150 | ASSERT(sizeof(funcs) == 256); /* must be multiple of 16 bytes */ | ||
151 | |||
152 | funcs.num = 0; | ||
153 | export_func(&funcs, "spu_cos", &spu_cos); | ||
154 | export_func(&funcs, "spu_sin", &spu_sin); | ||
155 | export_func(&funcs, "spu_pow", &spu_pow); | ||
156 | export_func(&funcs, "spu_exp2", &spu_exp2); | ||
157 | export_func(&funcs, "spu_log2", &spu_log2); | ||
158 | export_func(&funcs, "spu_tex_2d", &spu_tex_2d); | ||
159 | export_func(&funcs, "spu_tex_3d", &spu_tex_3d); | ||
160 | export_func(&funcs, "spu_tex_cube", &spu_tex_cube); | ||
161 | |||
162 | /* Send the function info back to the PPU / main memory */ | ||
163 | mfc_put((void *) &funcs, /* src in local store */ | ||
164 | (unsigned int) spu.init.spu_functions, /* dst in main memory */ | ||
165 | sizeof(funcs), /* bytes */ | ||
166 | tag, | ||
167 | 0, /* tid */ | ||
168 | 0 /* rid */); | ||
169 | wait_on_mask(1 << tag); | ||
170 | } | ||
171 | |||
172 | |||
173 | |||
diff --git a/src/gallium/drivers/cell/spu/spu_funcs.h b/src/gallium/drivers/cell/spu/spu_funcs.h deleted file mode 100644 index 3adb6ae99f9..00000000000 --- a/src/gallium/drivers/cell/spu/spu_funcs.h +++ /dev/null | |||
@@ -1,35 +0,0 @@ | |||
1 | /************************************************************************** | ||
2 | * | ||
3 | * Copyright 2008 Tungsten Graphics, Inc., Cedar Park, Texas. | ||
4 | * All Rights Reserved. | ||
5 | * | ||
6 | * Permission is hereby granted, free of charge, to any person obtaining a | ||
7 | * copy of this software and associated documentation files (the | ||
8 | * "Software"), to deal in the Software without restriction, including | ||
9 | * without limitation the rights to use, copy, modify, merge, publish, | ||
10 | * distribute, sub license, and/or sell copies of the Software, and to | ||
11 | * permit persons to whom the Software is furnished to do so, subject to | ||
12 | * the following conditions: | ||
13 | * | ||
14 | * The above copyright notice and this permission notice (including the | ||
15 | * next paragraph) shall be included in all copies or substantial portions | ||
16 | * of the Software. | ||
17 | * | ||
18 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS | ||
19 | * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF | ||
20 | * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. | ||
21 | * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR | ||
22 | * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, | ||
23 | * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE | ||
24 | * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. | ||
25 | * | ||
26 | **************************************************************************/ | ||
27 | |||
28 | #ifndef SPU_FUNCS_H | ||
29 | #define SPU_FUNCS_H | ||
30 | |||
31 | extern void | ||
32 | return_function_info(void); | ||
33 | |||
34 | #endif | ||
35 | |||
diff --git a/src/gallium/drivers/cell/spu/spu_main.c b/src/gallium/drivers/cell/spu/spu_main.c deleted file mode 100644 index 97c86d194da..00000000000 --- a/src/gallium/drivers/cell/spu/spu_main.c +++ /dev/null | |||
@@ -1,117 +0,0 @@ | |||
1 | /************************************************************************** | ||
2 | * | ||
3 | * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas. | ||
4 | * All Rights Reserved. | ||
5 | * | ||
6 | * Permission is hereby granted, free of charge, to any person obtaining a | ||
7 | * copy of this software and associated documentation files (the | ||
8 | * "Software"), to deal in the Software without restriction, including | ||
9 | * without limitation the rights to use, copy, modify, merge, publish, | ||
10 | * distribute, sub license, and/or sell copies of the Software, and to | ||
11 | * permit persons to whom the Software is furnished to do so, subject to | ||
12 | * the following conditions: | ||
13 | * | ||
14 | * The above copyright notice and this permission notice (including the | ||
15 | * next paragraph) shall be included in all copies or substantial portions | ||
16 | * of the Software. | ||
17 | * | ||
18 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS | ||
19 | * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF | ||
20 | * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. | ||
21 | * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR | ||
22 | * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, | ||
23 | * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE | ||
24 | * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. | ||
25 | * | ||
26 | **************************************************************************/ | ||
27 | |||
28 | |||
29 | /* main() for Cell SPU code */ | ||
30 | |||
31 | |||
32 | #include <stdio.h> | ||
33 | #include <libmisc.h> | ||
34 | |||
35 | #include "pipe/p_defines.h" | ||
36 | |||
37 | #include "spu_funcs.h" | ||
38 | #include "spu_command.h" | ||
39 | #include "spu_main.h" | ||
40 | #include "spu_per_fragment_op.h" | ||
41 | #include "spu_texture.h" | ||
42 | //#include "spu_test.h" | ||
43 | #include "cell/common.h" | ||
44 | |||
45 | |||
46 | /* | ||
47 | helpful headers: | ||
48 | /usr/lib/gcc/spu/4.1.1/include/spu_mfcio.h | ||
49 | /opt/cell/sdk/usr/include/libmisc.h | ||
50 | */ | ||
51 | |||
52 | struct spu_global spu; | ||
53 | |||
54 | |||
55 | static void | ||
56 | one_time_init(void) | ||
57 | { | ||
58 | memset(spu.ctile_status, TILE_STATUS_DEFINED, sizeof(spu.ctile_status)); | ||
59 | memset(spu.ztile_status, TILE_STATUS_DEFINED, sizeof(spu.ztile_status)); | ||
60 | invalidate_tex_cache(); | ||
61 | } | ||
62 | |||
63 | /* In some versions of the SDK the SPE main takes 'unsigned long' as a | ||
64 | * parameter. In others it takes 'unsigned long long'. Use a define to | ||
65 | * select between the two. | ||
66 | */ | ||
67 | #ifdef SPU_MAIN_PARAM_LONG_LONG | ||
68 | typedef unsigned long long main_param_t; | ||
69 | #else | ||
70 | typedef unsigned long main_param_t; | ||
71 | #endif | ||
72 | |||
73 | /** | ||
74 | * SPE entrypoint. | ||
75 | */ | ||
76 | int | ||
77 | main(main_param_t speid, main_param_t argp) | ||
78 | { | ||
79 | int tag = 0; | ||
80 | |||
81 | (void) speid; | ||
82 | |||
83 | ASSERT(sizeof(tile_t) == TILE_SIZE * TILE_SIZE * 4); | ||
84 | ASSERT(sizeof(struct cell_command_render) % 8 == 0); | ||
85 | ASSERT(sizeof(struct cell_command_fragment_ops) % 8 == 0); | ||
86 | ASSERT(((unsigned long) &spu.fragment_program_code) % 8 == 0); | ||
87 | |||
88 | one_time_init(); | ||
89 | spu_command_init(); | ||
90 | |||
91 | D_PRINTF(CELL_DEBUG_CMD, "main() speid=%lu\n", (unsigned long) speid); | ||
92 | D_PRINTF(CELL_DEBUG_FRAGMENT_OP_FALLBACK, "using fragment op fallback\n"); | ||
93 | |||
94 | /* get initialization data */ | ||
95 | mfc_get(&spu.init, /* dest */ | ||
96 | (unsigned int) argp, /* src */ | ||
97 | sizeof(struct cell_init_info), /* bytes */ | ||
98 | tag, | ||
99 | 0, /* tid */ | ||
100 | 0 /* rid */); | ||
101 | wait_on_mask( 1 << tag ); | ||
102 | |||
103 | if (spu.init.id == 0) { | ||
104 | return_function_info(); | ||
105 | } | ||
106 | |||
107 | #if 0 | ||
108 | if (spu.init.id==0) | ||
109 | spu_test_misc(spu.init.id); | ||
110 | #endif | ||
111 | |||
112 | command_loop(); | ||
113 | |||
114 | spu_command_close(); | ||
115 | |||
116 | return 0; | ||
117 | } | ||
diff --git a/src/gallium/drivers/cell/spu/spu_main.h b/src/gallium/drivers/cell/spu/spu_main.h deleted file mode 100644 index a9d72f84d56..00000000000 --- a/src/gallium/drivers/cell/spu/spu_main.h +++ /dev/null | |||
@@ -1,269 +0,0 @@ | |||
1 | /************************************************************************** | ||
2 | * | ||
3 | * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas. | ||
4 | * All Rights Reserved. | ||
5 | * | ||
6 | * Permission is hereby granted, free of charge, to any person obtaining a | ||
7 | * copy of this software and associated documentation files (the | ||
8 | * "Software"), to deal in the Software without restriction, including | ||
9 | * without limitation the rights to use, copy, modify, merge, publish, | ||
10 | * distribute, sub license, and/or sell copies of the Software, and to | ||
11 | * permit persons to whom the Software is furnished to do so, subject to | ||
12 | * the following conditions: | ||
13 | * | ||
14 | * The above copyright notice and this permission notice (including the | ||
15 | * next paragraph) shall be included in all copies or substantial portions | ||
16 | * of the Software. | ||
17 | * | ||
18 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS | ||
19 | * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF | ||
20 | * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. | ||
21 | * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR | ||
22 | * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, | ||
23 | * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE | ||
24 | * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. | ||
25 | * | ||
26 | **************************************************************************/ | ||
27 | |||
28 | #ifndef SPU_MAIN_H | ||
29 | #define SPU_MAIN_H | ||
30 | |||
31 | |||
32 | #include <spu_mfcio.h> | ||
33 | |||
34 | #include "cell/common.h" | ||
35 | #include "draw/draw_vertex.h" | ||
36 | #include "pipe/p_state.h" | ||
37 | |||
38 | |||
39 | #if DEBUG | ||
40 | /* These debug macros use the unusual construction ", ##__VA_ARGS__" | ||
41 | * which expands to the expected comma + args if variadic arguments | ||
42 | * are supplied, but swallows the comma if there are no variadic | ||
43 | * arguments (which avoids syntax errors that would otherwise occur). | ||
44 | */ | ||
45 | #define D_PRINTF(flag, format,...) \ | ||
46 | if (spu.init.debug_flags & (flag)) \ | ||
47 | printf("SPU %u: " format, spu.init.id, ##__VA_ARGS__) | ||
48 | #else | ||
49 | #define D_PRINTF(...) | ||
50 | #endif | ||
51 | |||
52 | |||
53 | /** | ||
54 | * A tile is basically a TILE_SIZE x TILE_SIZE block of 4-byte pixels. | ||
55 | * The data may be addressed through several different types. | ||
56 | */ | ||
57 | typedef union { | ||
58 | ushort us[TILE_SIZE][TILE_SIZE]; | ||
59 | uint ui[TILE_SIZE][TILE_SIZE]; | ||
60 | vector unsigned short us8[TILE_SIZE/2][TILE_SIZE/4]; | ||
61 | vector unsigned int ui4[TILE_SIZE/2][TILE_SIZE/2]; | ||
62 | } tile_t; | ||
63 | |||
64 | |||
65 | #define TILE_STATUS_CLEAR 1 | ||
66 | #define TILE_STATUS_DEFINED 2 /**< defined in FB, but not in local store */ | ||
67 | #define TILE_STATUS_CLEAN 3 /**< in local store, but not changed */ | ||
68 | #define TILE_STATUS_DIRTY 4 /**< modified locally, but not put back yet */ | ||
69 | #define TILE_STATUS_GETTING 5 /**< mfc_get() called but not yet arrived */ | ||
70 | |||
71 | |||
72 | /** Function for sampling textures */ | ||
73 | typedef void (*spu_sample_texture_2d_func)(vector float s, | ||
74 | vector float t, | ||
75 | uint unit, uint level, uint face, | ||
76 | vector float colors[4]); | ||
77 | |||
78 | |||
79 | /** Function for performing per-fragment ops */ | ||
80 | typedef void (*spu_fragment_ops_func)(uint x, uint y, | ||
81 | tile_t *colorTile, | ||
82 | tile_t *depthStencilTile, | ||
83 | vector float fragZ, | ||
84 | vector float fragRed, | ||
85 | vector float fragGreen, | ||
86 | vector float fragBlue, | ||
87 | vector float fragAlpha, | ||
88 | vector unsigned int mask); | ||
89 | |||
90 | /** Function for running fragment program */ | ||
91 | typedef vector unsigned int (*spu_fragment_program_func)(vector float *inputs, | ||
92 | vector float *outputs, | ||
93 | vector float *constants); | ||
94 | |||
95 | |||
96 | PIPE_ALIGN_TYPE(16, | ||
97 | struct spu_framebuffer | ||
98 | { | ||
99 | void *color_start; /**< addr of color surface in main memory */ | ||
100 | void *depth_start; /**< addr of depth surface in main memory */ | ||
101 | enum pipe_format color_format; | ||
102 | enum pipe_format depth_format; | ||
103 | uint width; /**< width in pixels */ | ||
104 | uint height; /**< height in pixels */ | ||
105 | uint width_tiles; /**< width in tiles */ | ||
106 | uint height_tiles; /**< width in tiles */ | ||
107 | |||
108 | uint color_clear_value; | ||
109 | uint depth_clear_value; | ||
110 | |||
111 | uint zsize; /**< 0, 2 or 4 bytes per Z */ | ||
112 | float zscale; /**< 65535.0, 2^24-1 or 2^32-1 */ | ||
113 | }); | ||
114 | |||
115 | |||
116 | /** per-texture level info */ | ||
117 | PIPE_ALIGN_TYPE(16, | ||
118 | struct spu_texture_level | ||
119 | { | ||
120 | void *start; | ||
121 | ushort width; | ||
122 | ushort height; | ||
123 | ushort depth; | ||
124 | ushort tiles_per_row; | ||
125 | uint bytes_per_image; | ||
126 | /** texcoord scale factors */ | ||
127 | vector float scale_s; | ||
128 | vector float scale_t; | ||
129 | vector float scale_r; | ||
130 | /** texcoord masks (if REPEAT then size-1, else ~0) */ | ||
131 | vector signed int mask_s; | ||
132 | vector signed int mask_t; | ||
133 | vector signed int mask_r; | ||
134 | /** texcoord clamp limits */ | ||
135 | vector signed int max_s; | ||
136 | vector signed int max_t; | ||
137 | vector signed int max_r; | ||
138 | }); | ||
139 | |||
140 | |||
141 | PIPE_ALIGN_TYPE(16, | ||
142 | struct spu_texture | ||
143 | { | ||
144 | struct spu_texture_level level[CELL_MAX_TEXTURE_LEVELS]; | ||
145 | uint max_level; | ||
146 | uint target; /**< PIPE_TEXTURE_x */ | ||
147 | }); | ||
148 | |||
149 | |||
150 | /** | ||
151 | * All SPU global/context state will be in a singleton object of this type: | ||
152 | */ | ||
153 | PIPE_ALIGN_TYPE(16, | ||
154 | struct spu_global | ||
155 | { | ||
156 | /** One-time init/constant info */ | ||
157 | struct cell_init_info init; | ||
158 | |||
159 | /* | ||
160 | * Current state | ||
161 | */ | ||
162 | struct spu_framebuffer fb; | ||
163 | struct pipe_depth_stencil_alpha_state depth_stencil_alpha; | ||
164 | struct pipe_blend_state blend; | ||
165 | struct pipe_blend_color blend_color; | ||
166 | struct pipe_sampler_state sampler[PIPE_MAX_SAMPLERS]; | ||
167 | struct pipe_rasterizer_state rasterizer; | ||
168 | struct spu_texture texture[PIPE_MAX_SAMPLERS]; | ||
169 | struct vertex_info vertex_info; | ||
170 | |||
171 | /** Current color and Z tiles */ | ||
172 | PIPE_ALIGN_VAR(16) tile_t ctile; | ||
173 | PIPE_ALIGN_VAR(16) tile_t ztile; | ||
174 | |||
175 | /** Read depth/stencil tiles? */ | ||
176 | boolean read_depth_stencil; | ||
177 | |||
178 | /** Current tiles' status */ | ||
179 | ubyte cur_ctile_status; | ||
180 | ubyte cur_ztile_status; | ||
181 | |||
182 | /** Status of all tiles in framebuffer */ | ||
183 | PIPE_ALIGN_VAR(16) ubyte ctile_status[CELL_MAX_HEIGHT/TILE_SIZE][CELL_MAX_WIDTH/TILE_SIZE]; | ||
184 | PIPE_ALIGN_VAR(16) ubyte ztile_status[CELL_MAX_HEIGHT/TILE_SIZE][CELL_MAX_WIDTH/TILE_SIZE]; | ||
185 | |||
186 | /** Current fragment ops machine code, at 8-byte boundary */ | ||
187 | uint *fragment_ops_code; | ||
188 | uint fragment_ops_code_size; | ||
189 | /** Current fragment ops functions, 0 = frontfacing, 1 = backfacing */ | ||
190 | spu_fragment_ops_func fragment_ops[2]; | ||
191 | |||
192 | /** Current fragment program machine code, at 8-byte boundary */ | ||
193 | PIPE_ALIGN_VAR(8) uint fragment_program_code[SPU_MAX_FRAGMENT_PROGRAM_INSTS]; | ||
194 | /** Current fragment ops function */ | ||
195 | spu_fragment_program_func fragment_program; | ||
196 | |||
197 | /** Current texture sampler function */ | ||
198 | spu_sample_texture_2d_func sample_texture_2d[CELL_MAX_SAMPLERS]; | ||
199 | spu_sample_texture_2d_func min_sample_texture_2d[CELL_MAX_SAMPLERS]; | ||
200 | spu_sample_texture_2d_func mag_sample_texture_2d[CELL_MAX_SAMPLERS]; | ||
201 | |||
202 | /** Fragment program constants */ | ||
203 | vector float constants[4 * CELL_MAX_CONSTANTS]; | ||
204 | |||
205 | }); | ||
206 | |||
207 | |||
208 | extern struct spu_global spu; | ||
209 | |||
210 | |||
211 | |||
212 | /* DMA TAGS */ | ||
213 | |||
214 | #define TAG_SURFACE_CLEAR 10 | ||
215 | #define TAG_VERTEX_BUFFER 11 | ||
216 | #define TAG_READ_TILE_COLOR 12 | ||
217 | #define TAG_READ_TILE_Z 13 | ||
218 | #define TAG_WRITE_TILE_COLOR 14 | ||
219 | #define TAG_WRITE_TILE_Z 15 | ||
220 | #define TAG_INDEX_BUFFER 16 | ||
221 | #define TAG_BATCH_BUFFER 17 | ||
222 | #define TAG_MISC 18 | ||
223 | #define TAG_DCACHE0 20 | ||
224 | #define TAG_DCACHE1 21 | ||
225 | #define TAG_DCACHE2 22 | ||
226 | #define TAG_DCACHE3 23 | ||
227 | #define TAG_FENCE 24 | ||
228 | |||
229 | |||
230 | static INLINE void | ||
231 | wait_on_mask(unsigned tagMask) | ||
232 | { | ||
233 | mfc_write_tag_mask( tagMask ); | ||
234 | /* wait for completion of _any_ DMAs specified by tagMask */ | ||
235 | mfc_read_tag_status_any(); | ||
236 | } | ||
237 | |||
238 | |||
239 | static INLINE void | ||
240 | wait_on_mask_all(unsigned tagMask) | ||
241 | { | ||
242 | mfc_write_tag_mask( tagMask ); | ||
243 | /* wait for completion of _any_ DMAs specified by tagMask */ | ||
244 | mfc_read_tag_status_all(); | ||
245 | } | ||
246 | |||
247 | |||
248 | |||
249 | |||
250 | |||
251 | static INLINE void | ||
252 | memset16(ushort *d, ushort value, uint count) | ||
253 | { | ||
254 | uint i; | ||
255 | for (i = 0; i < count; i++) | ||
256 | d[i] = value; | ||
257 | } | ||
258 | |||
259 | |||
260 | static INLINE void | ||
261 | memset32(uint *d, uint value, uint count) | ||
262 | { | ||
263 | uint i; | ||
264 | for (i = 0; i < count; i++) | ||
265 | d[i] = value; | ||
266 | } | ||
267 | |||
268 | |||
269 | #endif /* SPU_MAIN_H */ | ||
diff --git a/src/gallium/drivers/cell/spu/spu_per_fragment_op.c b/src/gallium/drivers/cell/spu/spu_per_fragment_op.c deleted file mode 100644 index 2415226a244..00000000000 --- a/src/gallium/drivers/cell/spu/spu_per_fragment_op.c +++ /dev/null | |||
@@ -1,631 +0,0 @@ | |||
1 | /************************************************************************** | ||
2 | * | ||
3 | * Copyright 2008 Tungsten Graphics, Inc., Cedar Park, Texas. | ||
4 | * All Rights Reserved. | ||
5 | * | ||
6 | * Permission is hereby granted, free of charge, to any person obtaining a | ||
7 | * copy of this software and associated documentation files (the | ||
8 | * "Software"), to deal in the Software without restriction, including | ||
9 | * without limitation the rights to use, copy, modify, merge, publish, | ||
10 | * distribute, sub license, and/or sell copies of the Software, and to | ||
11 | * permit persons to whom the Software is furnished to do so, subject to | ||
12 | * the following conditions: | ||
13 | * | ||
14 | * The above copyright notice and this permission notice (including the | ||
15 | * next paragraph) shall be included in all copies or substantial portions | ||
16 | * of the Software. | ||
17 | * | ||
18 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS | ||
19 | * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF | ||
20 | * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. | ||
21 | * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR | ||
22 | * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, | ||
23 | * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE | ||
24 | * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. | ||
25 | * | ||
26 | **************************************************************************/ | ||
27 | |||
28 | /** | ||
29 | * \author Brian Paul | ||
30 | */ | ||
31 | |||
32 | |||
33 | #include <transpose_matrix4x4.h> | ||
34 | #include "pipe/p_format.h" | ||
35 | #include "spu_main.h" | ||
36 | #include "spu_colorpack.h" | ||
37 | #include "spu_per_fragment_op.h" | ||
38 | |||
39 | |||
40 | #define LINEAR_QUAD_LAYOUT 1 | ||
41 | |||
42 | |||
43 | static INLINE vector float | ||
44 | spu_min(vector float a, vector float b) | ||
45 | { | ||
46 | vector unsigned int m; | ||
47 | m = spu_cmpgt(a, b); /* m = a > b ? ~0 : 0 */ | ||
48 | return spu_sel(a, b, m); | ||
49 | } | ||
50 | |||
51 | |||
52 | static INLINE vector float | ||
53 | spu_max(vector float a, vector float b) | ||
54 | { | ||
55 | vector unsigned int m; | ||
56 | m = spu_cmpgt(a, b); /* m = a > b ? ~0 : 0 */ | ||
57 | return spu_sel(b, a, m); | ||
58 | } | ||
59 | |||
60 | |||
61 | /** | ||
62 | * Called by rasterizer for each quad after the shader has run. Do | ||
63 | * all the per-fragment operations including alpha test, z test, | ||
64 | * stencil test, blend, colormask and logicops. This is a | ||
65 | * fallback/debug function. In reality we'll use a generated function | ||
66 | * produced by the PPU. But this function is useful for | ||
67 | * debug/validation. | ||
68 | */ | ||
69 | void | ||
70 | spu_fallback_fragment_ops(uint x, uint y, | ||
71 | tile_t *colorTile, | ||
72 | tile_t *depthStencilTile, | ||
73 | vector float fragZ, | ||
74 | vector float fragR, | ||
75 | vector float fragG, | ||
76 | vector float fragB, | ||
77 | vector float fragA, | ||
78 | vector unsigned int mask) | ||
79 | { | ||
80 | vector float frag_aos[4]; | ||
81 | unsigned int fbc0, fbc1, fbc2, fbc3 ; /* framebuffer/tile colors */ | ||
82 | unsigned int fragc0, fragc1, fragc2, fragc3; /* fragment colors */ | ||
83 | |||
84 | /* | ||
85 | * Do alpha test | ||
86 | */ | ||
87 | if (spu.depth_stencil_alpha.alpha.enabled) { | ||
88 | vector float ref = spu_splats(spu.depth_stencil_alpha.alpha.ref_value); | ||
89 | vector unsigned int amask; | ||
90 | |||
91 | switch (spu.depth_stencil_alpha.alpha.func) { | ||
92 | case PIPE_FUNC_LESS: | ||
93 | amask = spu_cmpgt(ref, fragA); /* mask = (fragA < ref) */ | ||
94 | break; | ||
95 | case PIPE_FUNC_GREATER: | ||
96 | amask = spu_cmpgt(fragA, ref); /* mask = (fragA > ref) */ | ||
97 | break; | ||
98 | case PIPE_FUNC_GEQUAL: | ||
99 | amask = spu_cmpgt(ref, fragA); | ||
100 | amask = spu_nor(amask, amask); | ||
101 | break; | ||
102 | case PIPE_FUNC_LEQUAL: | ||
103 | amask = spu_cmpgt(fragA, ref); | ||
104 | amask = spu_nor(amask, amask); | ||
105 | break; | ||
106 | case PIPE_FUNC_EQUAL: | ||
107 | amask = spu_cmpeq(ref, fragA); | ||
108 | break; | ||
109 | case PIPE_FUNC_NOTEQUAL: | ||
110 | amask = spu_cmpeq(ref, fragA); | ||
111 | amask = spu_nor(amask, amask); | ||
112 | break; | ||
113 | case PIPE_FUNC_ALWAYS: | ||
114 | amask = spu_splats(0xffffffffU); | ||
115 | break; | ||
116 | case PIPE_FUNC_NEVER: | ||
117 | amask = spu_splats( 0x0U); | ||
118 | break; | ||
119 | default: | ||
120 | ; | ||
121 | } | ||
122 | |||
123 | mask = spu_and(mask, amask); | ||
124 | } | ||
125 | |||
126 | |||
127 | /* | ||
128 | * Z and/or stencil testing... | ||
129 | */ | ||
130 | if (spu.depth_stencil_alpha.depth.enabled || | ||
131 | spu.depth_stencil_alpha.stencil[0].enabled) { | ||
132 | |||
133 | /* get four Z/Stencil values from tile */ | ||
134 | vector unsigned int mask24 = spu_splats((unsigned int)0x00ffffffU); | ||
135 | vector unsigned int ifbZS = depthStencilTile->ui4[y/2][x/2]; | ||
136 | vector unsigned int ifbZ = spu_and(ifbZS, mask24); | ||
137 | vector unsigned int ifbS = spu_andc(ifbZS, mask24); | ||
138 | |||
139 | if (spu.depth_stencil_alpha.stencil[0].enabled) { | ||
140 | /* do stencil test */ | ||
141 | ASSERT(spu.fb.depth_format == PIPE_FORMAT_Z24_UNORM_S8_UINT); | ||
142 | |||
143 | } | ||
144 | else if (spu.depth_stencil_alpha.depth.enabled) { | ||
145 | /* do depth test */ | ||
146 | |||
147 | ASSERT(spu.fb.depth_format == PIPE_FORMAT_Z24_UNORM_S8_UINT || | ||
148 | spu.fb.depth_format == PIPE_FORMAT_Z24X8_UNORM); | ||
149 | |||
150 | vector unsigned int ifragZ; | ||
151 | vector unsigned int zmask; | ||
152 | |||
153 | /* convert four fragZ from float to uint */ | ||
154 | fragZ = spu_mul(fragZ, spu_splats((float) 0xffffff)); | ||
155 | ifragZ = spu_convtu(fragZ, 0); | ||
156 | |||
157 | /* do depth comparison, setting zmask with results */ | ||
158 | switch (spu.depth_stencil_alpha.depth.func) { | ||
159 | case PIPE_FUNC_LESS: | ||
160 | zmask = spu_cmpgt(ifbZ, ifragZ); /* mask = (ifragZ < ifbZ) */ | ||
161 | break; | ||
162 | case PIPE_FUNC_GREATER: | ||
163 | zmask = spu_cmpgt(ifragZ, ifbZ); /* mask = (ifbZ > ifragZ) */ | ||
164 | break; | ||
165 | case PIPE_FUNC_GEQUAL: | ||
166 | zmask = spu_cmpgt(ifbZ, ifragZ); | ||
167 | zmask = spu_nor(zmask, zmask); | ||
168 | break; | ||
169 | case PIPE_FUNC_LEQUAL: | ||
170 | zmask = spu_cmpgt(ifragZ, ifbZ); | ||
171 | zmask = spu_nor(zmask, zmask); | ||
172 | break; | ||
173 | case PIPE_FUNC_EQUAL: | ||
174 | zmask = spu_cmpeq(ifbZ, ifragZ); | ||
175 | break; | ||
176 | case PIPE_FUNC_NOTEQUAL: | ||
177 | zmask = spu_cmpeq(ifbZ, ifragZ); | ||
178 | zmask = spu_nor(zmask, zmask); | ||
179 | break; | ||
180 | case PIPE_FUNC_ALWAYS: | ||
181 | zmask = spu_splats(0xffffffffU); | ||
182 | break; | ||
183 | case PIPE_FUNC_NEVER: | ||
184 | zmask = spu_splats( 0x0U); | ||
185 | break; | ||
186 | default: | ||
187 | ; | ||
188 | } | ||
189 | |||
190 | mask = spu_and(mask, zmask); | ||
191 | |||
192 | /* merge framebuffer Z and fragment Z according to the mask */ | ||
193 | ifbZ = spu_or(spu_and(ifragZ, mask), | ||
194 | spu_andc(ifbZ, mask)); | ||
195 | } | ||
196 | |||
197 | if (spu_extract(spu_orx(mask), 0)) { | ||
198 | /* put new fragment Z/Stencil values back into Z/Stencil tile */ | ||
199 | depthStencilTile->ui4[y/2][x/2] = spu_or(ifbZ, ifbS); | ||
200 | |||
201 | spu.cur_ztile_status = TILE_STATUS_DIRTY; | ||
202 | } | ||
203 | } | ||
204 | |||
205 | |||
206 | /* | ||
207 | * If we'll need the current framebuffer/tile colors for blending | ||
208 | * or logicop or colormask, fetch them now. | ||
209 | */ | ||
210 | if (spu.blend.rt[0].blend_enable || | ||
211 | spu.blend.logicop_enable || | ||
212 | spu.blend.rt[0].colormask != 0xf) { | ||
213 | |||
214 | #if LINEAR_QUAD_LAYOUT /* See comments/diagram below */ | ||
215 | fbc0 = colorTile->ui[y][x*2+0]; | ||
216 | fbc1 = colorTile->ui[y][x*2+1]; | ||
217 | fbc2 = colorTile->ui[y][x*2+2]; | ||
218 | fbc3 = colorTile->ui[y][x*2+3]; | ||
219 | #else | ||
220 | fbc0 = colorTile->ui[y+0][x+0]; | ||
221 | fbc1 = colorTile->ui[y+0][x+1]; | ||
222 | fbc2 = colorTile->ui[y+1][x+0]; | ||
223 | fbc3 = colorTile->ui[y+1][x+1]; | ||
224 | #endif | ||
225 | } | ||
226 | |||
227 | |||
228 | /* | ||
229 | * Do blending | ||
230 | */ | ||
231 | if (spu.blend.rt[0].blend_enable) { | ||
232 | /* blending terms, misc regs */ | ||
233 | vector float term1r, term1g, term1b, term1a; | ||
234 | vector float term2r, term2g, term2b, term2a; | ||
235 | vector float one, tmp; | ||
236 | |||
237 | vector float fbRGBA[4]; /* current framebuffer colors */ | ||
238 | |||
239 | /* convert framebuffer colors from packed int to vector float */ | ||
240 | { | ||
241 | vector float temp[4]; /* float colors in AOS form */ | ||
242 | switch (spu.fb.color_format) { | ||
243 | case PIPE_FORMAT_A8R8G8B8_UNORM: | ||
244 | temp[0] = spu_unpack_B8G8R8A8(fbc0); | ||
245 | temp[1] = spu_unpack_B8G8R8A8(fbc1); | ||
246 | temp[2] = spu_unpack_B8G8R8A8(fbc2); | ||
247 | temp[3] = spu_unpack_B8G8R8A8(fbc3); | ||
248 | break; | ||
249 | case PIPE_FORMAT_B8G8R8A8_UNORM: | ||
250 | temp[0] = spu_unpack_A8R8G8B8(fbc0); | ||
251 | temp[1] = spu_unpack_A8R8G8B8(fbc1); | ||
252 | temp[2] = spu_unpack_A8R8G8B8(fbc2); | ||
253 | temp[3] = spu_unpack_A8R8G8B8(fbc3); | ||
254 | break; | ||
255 | default: | ||
256 | ASSERT(0); | ||
257 | } | ||
258 | _transpose_matrix4x4(fbRGBA, temp); /* fbRGBA = transpose(temp) */ | ||
259 | } | ||
260 | |||
261 | /* | ||
262 | * Compute Src RGB terms (fragment color * factor) | ||
263 | */ | ||
264 | switch (spu.blend.rt[0].rgb_src_factor) { | ||
265 | case PIPE_BLENDFACTOR_ONE: | ||
266 | term1r = fragR; | ||
267 | term1g = fragG; | ||
268 | term1b = fragB; | ||
269 | break; | ||
270 | case PIPE_BLENDFACTOR_ZERO: | ||
271 | term1r = | ||
272 | term1g = | ||
273 | term1b = spu_splats(0.0f); | ||
274 | break; | ||
275 | case PIPE_BLENDFACTOR_SRC_COLOR: | ||
276 | term1r = spu_mul(fragR, fragR); | ||
277 | term1g = spu_mul(fragG, fragG); | ||
278 | term1b = spu_mul(fragB, fragB); | ||
279 | break; | ||
280 | case PIPE_BLENDFACTOR_SRC_ALPHA: | ||
281 | term1r = spu_mul(fragR, fragA); | ||
282 | term1g = spu_mul(fragG, fragA); | ||
283 | term1b = spu_mul(fragB, fragA); | ||
284 | break; | ||
285 | case PIPE_BLENDFACTOR_DST_COLOR: | ||
286 | term1r = spu_mul(fragR, fbRGBA[0]); | ||
287 | term1g = spu_mul(fragG, fbRGBA[1]); | ||
288 | term1b = spu_mul(fragB, fbRGBA[1]); | ||
289 | break; | ||
290 | case PIPE_BLENDFACTOR_DST_ALPHA: | ||
291 | term1r = spu_mul(fragR, fbRGBA[3]); | ||
292 | term1g = spu_mul(fragG, fbRGBA[3]); | ||
293 | term1b = spu_mul(fragB, fbRGBA[3]); | ||
294 | break; | ||
295 | case PIPE_BLENDFACTOR_CONST_COLOR: | ||
296 | term1r = spu_mul(fragR, spu_splats(spu.blend_color.color[0])); | ||
297 | term1g = spu_mul(fragG, spu_splats(spu.blend_color.color[1])); | ||
298 | term1b = spu_mul(fragB, spu_splats(spu.blend_color.color[2])); | ||
299 | break; | ||
300 | case PIPE_BLENDFACTOR_CONST_ALPHA: | ||
301 | term1r = spu_mul(fragR, spu_splats(spu.blend_color.color[3])); | ||
302 | term1g = spu_mul(fragG, spu_splats(spu.blend_color.color[3])); | ||
303 | term1b = spu_mul(fragB, spu_splats(spu.blend_color.color[3])); | ||
304 | break; | ||
305 | /* XXX more cases */ | ||
306 | default: | ||
307 | ASSERT(0); | ||
308 | } | ||
309 | |||
310 | /* | ||
311 | * Compute Src Alpha term (fragment alpha * factor) | ||
312 | */ | ||
313 | switch (spu.blend.rt[0].alpha_src_factor) { | ||
314 | case PIPE_BLENDFACTOR_ONE: | ||
315 | term1a = fragA; | ||
316 | break; | ||
317 | case PIPE_BLENDFACTOR_SRC_COLOR: | ||
318 | term1a = spu_splats(0.0f); | ||
319 | break; | ||
320 | case PIPE_BLENDFACTOR_SRC_ALPHA: | ||
321 | term1a = spu_mul(fragA, fragA); | ||
322 | break; | ||
323 | case PIPE_BLENDFACTOR_DST_COLOR: | ||
324 | /* fall-through */ | ||
325 | case PIPE_BLENDFACTOR_DST_ALPHA: | ||
326 | term1a = spu_mul(fragA, fbRGBA[3]); | ||
327 | break; | ||
328 | case PIPE_BLENDFACTOR_CONST_COLOR: | ||
329 | /* fall-through */ | ||
330 | case PIPE_BLENDFACTOR_CONST_ALPHA: | ||
331 | term1a = spu_mul(fragR, spu_splats(spu.blend_color.color[3])); | ||
332 | break; | ||
333 | /* XXX more cases */ | ||
334 | default: | ||
335 | ASSERT(0); | ||
336 | } | ||
337 | |||
338 | /* | ||
339 | * Compute Dest RGB terms (framebuffer color * factor) | ||
340 | */ | ||
341 | switch (spu.blend.rt[0].rgb_dst_factor) { | ||
342 | case PIPE_BLENDFACTOR_ONE: | ||
343 | term2r = fbRGBA[0]; | ||
344 | term2g = fbRGBA[1]; | ||
345 | term2b = fbRGBA[2]; | ||
346 | break; | ||
347 | case PIPE_BLENDFACTOR_ZERO: | ||
348 | term2r = | ||
349 | term2g = | ||
350 | term2b = spu_splats(0.0f); | ||
351 | break; | ||
352 | case PIPE_BLENDFACTOR_SRC_COLOR: | ||
353 | term2r = spu_mul(fbRGBA[0], fragR); | ||
354 | term2g = spu_mul(fbRGBA[1], fragG); | ||
355 | term2b = spu_mul(fbRGBA[2], fragB); | ||
356 | break; | ||
357 | case PIPE_BLENDFACTOR_SRC_ALPHA: | ||
358 | term2r = spu_mul(fbRGBA[0], fragA); | ||
359 | term2g = spu_mul(fbRGBA[1], fragA); | ||
360 | term2b = spu_mul(fbRGBA[2], fragA); | ||
361 | break; | ||
362 | case PIPE_BLENDFACTOR_INV_SRC_ALPHA: | ||
363 | one = spu_splats(1.0f); | ||
364 | tmp = spu_sub(one, fragA); | ||
365 | term2r = spu_mul(fbRGBA[0], tmp); | ||
366 | term2g = spu_mul(fbRGBA[1], tmp); | ||
367 | term2b = spu_mul(fbRGBA[2], tmp); | ||
368 | break; | ||
369 | case PIPE_BLENDFACTOR_DST_COLOR: | ||
370 | term2r = spu_mul(fbRGBA[0], fbRGBA[0]); | ||
371 | term2g = spu_mul(fbRGBA[1], fbRGBA[1]); | ||
372 | term2b = spu_mul(fbRGBA[2], fbRGBA[2]); | ||
373 | break; | ||
374 | case PIPE_BLENDFACTOR_DST_ALPHA: | ||
375 | term2r = spu_mul(fbRGBA[0], fbRGBA[3]); | ||
376 | term2g = spu_mul(fbRGBA[1], fbRGBA[3]); | ||
377 | term2b = spu_mul(fbRGBA[2], fbRGBA[3]); | ||
378 | break; | ||
379 | case PIPE_BLENDFACTOR_CONST_COLOR: | ||
380 | term2r = spu_mul(fbRGBA[0], spu_splats(spu.blend_color.color[0])); | ||
381 | term2g = spu_mul(fbRGBA[1], spu_splats(spu.blend_color.color[1])); | ||
382 | term2b = spu_mul(fbRGBA[2], spu_splats(spu.blend_color.color[2])); | ||
383 | break; | ||
384 | case PIPE_BLENDFACTOR_CONST_ALPHA: | ||
385 | term2r = spu_mul(fbRGBA[0], spu_splats(spu.blend_color.color[3])); | ||
386 | term2g = spu_mul(fbRGBA[1], spu_splats(spu.blend_color.color[3])); | ||
387 | term2b = spu_mul(fbRGBA[2], spu_splats(spu.blend_color.color[3])); | ||
388 | break; | ||
389 | /* XXX more cases */ | ||
390 | default: | ||
391 | ASSERT(0); | ||
392 | } | ||
393 | |||
394 | /* | ||
395 | * Compute Dest Alpha term (framebuffer alpha * factor) | ||
396 | */ | ||
397 | switch (spu.blend.rt[0].alpha_dst_factor) { | ||
398 | case PIPE_BLENDFACTOR_ONE: | ||
399 | term2a = fbRGBA[3]; | ||
400 | break; | ||
401 | case PIPE_BLENDFACTOR_SRC_COLOR: | ||
402 | term2a = spu_splats(0.0f); | ||
403 | break; | ||
404 | case PIPE_BLENDFACTOR_SRC_ALPHA: | ||
405 | term2a = spu_mul(fbRGBA[3], fragA); | ||
406 | break; | ||
407 | case PIPE_BLENDFACTOR_INV_SRC_ALPHA: | ||
408 | one = spu_splats(1.0f); | ||
409 | tmp = spu_sub(one, fragA); | ||
410 | term2a = spu_mul(fbRGBA[3], tmp); | ||
411 | break; | ||
412 | case PIPE_BLENDFACTOR_DST_COLOR: | ||
413 | /* fall-through */ | ||
414 | case PIPE_BLENDFACTOR_DST_ALPHA: | ||
415 | term2a = spu_mul(fbRGBA[3], fbRGBA[3]); | ||
416 | break; | ||
417 | case PIPE_BLENDFACTOR_CONST_COLOR: | ||
418 | /* fall-through */ | ||
419 | case PIPE_BLENDFACTOR_CONST_ALPHA: | ||
420 | term2a = spu_mul(fbRGBA[3], spu_splats(spu.blend_color.color[3])); | ||
421 | break; | ||
422 | /* XXX more cases */ | ||
423 | default: | ||
424 | ASSERT(0); | ||
425 | } | ||
426 | |||
427 | /* | ||
428 | * Combine Src/Dest RGB terms | ||
429 | */ | ||
430 | switch (spu.blend.rt[0].rgb_func) { | ||
431 | case PIPE_BLEND_ADD: | ||
432 | fragR = spu_add(term1r, term2r); | ||
433 | fragG = spu_add(term1g, term2g); | ||
434 | fragB = spu_add(term1b, term2b); | ||
435 | break; | ||
436 | case PIPE_BLEND_SUBTRACT: | ||
437 | fragR = spu_sub(term1r, term2r); | ||
438 | fragG = spu_sub(term1g, term2g); | ||
439 | fragB = spu_sub(term1b, term2b); | ||
440 | break; | ||
441 | case PIPE_BLEND_REVERSE_SUBTRACT: | ||
442 | fragR = spu_sub(term2r, term1r); | ||
443 | fragG = spu_sub(term2g, term1g); | ||
444 | fragB = spu_sub(term2b, term1b); | ||
445 | break; | ||
446 | case PIPE_BLEND_MIN: | ||
447 | fragR = spu_min(term1r, term2r); | ||
448 | fragG = spu_min(term1g, term2g); | ||
449 | fragB = spu_min(term1b, term2b); | ||
450 | break; | ||
451 | case PIPE_BLEND_MAX: | ||
452 | fragR = spu_max(term1r, term2r); | ||
453 | fragG = spu_max(term1g, term2g); | ||
454 | fragB = spu_max(term1b, term2b); | ||
455 | break; | ||
456 | default: | ||
457 | ASSERT(0); | ||
458 | } | ||
459 | |||
460 | /* | ||
461 | * Combine Src/Dest A term | ||
462 | */ | ||
463 | switch (spu.blend.rt[0].alpha_func) { | ||
464 | case PIPE_BLEND_ADD: | ||
465 | fragA = spu_add(term1a, term2a); | ||
466 | break; | ||
467 | case PIPE_BLEND_SUBTRACT: | ||
468 | fragA = spu_sub(term1a, term2a); | ||
469 | break; | ||
470 | case PIPE_BLEND_REVERSE_SUBTRACT: | ||
471 | fragA = spu_sub(term2a, term1a); | ||
472 | break; | ||
473 | case PIPE_BLEND_MIN: | ||
474 | fragA = spu_min(term1a, term2a); | ||
475 | break; | ||
476 | case PIPE_BLEND_MAX: | ||
477 | fragA = spu_max(term1a, term2a); | ||
478 | break; | ||
479 | default: | ||
480 | ASSERT(0); | ||
481 | } | ||
482 | } | ||
483 | |||
484 | |||
485 | /* | ||
486 | * Convert RRRR,GGGG,BBBB,AAAA to RGBA,RGBA,RGBA,RGBA. | ||
487 | */ | ||
488 | #if 0 | ||
489 | /* original code */ | ||
490 | { | ||
491 | vector float frag_soa[4]; | ||
492 | frag_soa[0] = fragR; | ||
493 | frag_soa[1] = fragG; | ||
494 | frag_soa[2] = fragB; | ||
495 | frag_soa[3] = fragA; | ||
496 | _transpose_matrix4x4(frag_aos, frag_soa); | ||
497 | } | ||
498 | #else | ||
499 | /* short-cut relying on function parameter layout: */ | ||
500 | _transpose_matrix4x4(frag_aos, &fragR); | ||
501 | (void) fragG; | ||
502 | (void) fragB; | ||
503 | #endif | ||
504 | |||
505 | /* | ||
506 | * Pack fragment float colors into 32-bit RGBA words. | ||
507 | */ | ||
508 | switch (spu.fb.color_format) { | ||
509 | case PIPE_FORMAT_B8G8R8A8_UNORM: | ||
510 | fragc0 = spu_pack_A8R8G8B8(frag_aos[0]); | ||
511 | fragc1 = spu_pack_A8R8G8B8(frag_aos[1]); | ||
512 | fragc2 = spu_pack_A8R8G8B8(frag_aos[2]); | ||
513 | fragc3 = spu_pack_A8R8G8B8(frag_aos[3]); | ||
514 | break; | ||
515 | case PIPE_FORMAT_A8R8G8B8_UNORM: | ||
516 | fragc0 = spu_pack_B8G8R8A8(frag_aos[0]); | ||
517 | fragc1 = spu_pack_B8G8R8A8(frag_aos[1]); | ||
518 | fragc2 = spu_pack_B8G8R8A8(frag_aos[2]); | ||
519 | fragc3 = spu_pack_B8G8R8A8(frag_aos[3]); | ||
520 | break; | ||
521 | default: | ||
522 | fprintf(stderr, "SPU: Bad pixel format in spu_default_fragment_ops\n"); | ||
523 | ASSERT(0); | ||
524 | } | ||
525 | |||
526 | |||
527 | /* | ||
528 | * Do color masking | ||
529 | */ | ||
530 | if (spu.blend.rt[0].colormask != 0xf) { | ||
531 | uint cmask = 0x0; /* each byte corresponds to a color channel */ | ||
532 | |||
533 | /* Form bitmask depending on color buffer format and colormask bits */ | ||
534 | switch (spu.fb.color_format) { | ||
535 | case PIPE_FORMAT_B8G8R8A8_UNORM: | ||
536 | if (spu.blend.rt[0].colormask & PIPE_MASK_R) | ||
537 | cmask |= 0x00ff0000; /* red */ | ||
538 | if (spu.blend.rt[0].colormask & PIPE_MASK_G) | ||
539 | cmask |= 0x0000ff00; /* green */ | ||
540 | if (spu.blend.rt[0].colormask & PIPE_MASK_B) | ||
541 | cmask |= 0x000000ff; /* blue */ | ||
542 | if (spu.blend.rt[0].colormask & PIPE_MASK_A) | ||
543 | cmask |= 0xff000000; /* alpha */ | ||
544 | break; | ||
545 | case PIPE_FORMAT_A8R8G8B8_UNORM: | ||
546 | if (spu.blend.rt[0].colormask & PIPE_MASK_R) | ||
547 | cmask |= 0x0000ff00; /* red */ | ||
548 | if (spu.blend.rt[0].colormask & PIPE_MASK_G) | ||
549 | cmask |= 0x00ff0000; /* green */ | ||
550 | if (spu.blend.rt[0].colormask & PIPE_MASK_B) | ||
551 | cmask |= 0xff000000; /* blue */ | ||
552 | if (spu.blend.rt[0].colormask & PIPE_MASK_A) | ||
553 | cmask |= 0x000000ff; /* alpha */ | ||
554 | break; | ||
555 | default: | ||
556 | ASSERT(0); | ||
557 | } | ||
558 | |||
559 | /* | ||
560 | * Apply color mask to the 32-bit packed colors. | ||
561 | * if (cmask[i]) | ||
562 | * frag color[i] = frag color[i]; | ||
563 | * else | ||
564 | * frag color[i] = framebuffer color[i]; | ||
565 | */ | ||
566 | fragc0 = (fragc0 & cmask) | (fbc0 & ~cmask); | ||
567 | fragc1 = (fragc1 & cmask) | (fbc1 & ~cmask); | ||
568 | fragc2 = (fragc2 & cmask) | (fbc2 & ~cmask); | ||
569 | fragc3 = (fragc3 & cmask) | (fbc3 & ~cmask); | ||
570 | } | ||
571 | |||
572 | |||
573 | /* | ||
574 | * Do logic ops | ||
575 | */ | ||
576 | if (spu.blend.logicop_enable) { | ||
577 | /* XXX to do */ | ||
578 | /* apply logicop to 32-bit packed colors (fragcx and fbcx) */ | ||
579 | } | ||
580 | |||
581 | |||
582 | /* | ||
583 | * If mask is non-zero, mark tile as dirty. | ||
584 | */ | ||
585 | if (spu_extract(spu_orx(mask), 0)) { | ||
586 | spu.cur_ctile_status = TILE_STATUS_DIRTY; | ||
587 | } | ||
588 | else { | ||
589 | /* write no fragments */ | ||
590 | return; | ||
591 | } | ||
592 | |||
593 | |||
594 | /* | ||
595 | * Write new fragment/quad colors to the framebuffer/tile. | ||
596 | * Only write pixels where the corresponding mask word is set. | ||
597 | */ | ||
598 | #if LINEAR_QUAD_LAYOUT | ||
599 | /* | ||
600 | * Quad layout: | ||
601 | * +--+--+--+--+ | ||
602 | * |p0|p1|p2|p3|... | ||
603 | * +--+--+--+--+ | ||
604 | */ | ||
605 | if (spu_extract(mask, 0)) | ||
606 | colorTile->ui[y][x*2] = fragc0; | ||
607 | if (spu_extract(mask, 1)) | ||
608 | colorTile->ui[y][x*2+1] = fragc1; | ||
609 | if (spu_extract(mask, 2)) | ||
610 | colorTile->ui[y][x*2+2] = fragc2; | ||
611 | if (spu_extract(mask, 3)) | ||
612 | colorTile->ui[y][x*2+3] = fragc3; | ||
613 | #else | ||
614 | /* | ||
615 | * Quad layout: | ||
616 | * +--+--+ | ||
617 | * |p0|p1|... | ||
618 | * +--+--+ | ||
619 | * |p2|p3|... | ||
620 | * +--+--+ | ||
621 | */ | ||
622 | if (spu_extract(mask, 0)) | ||
623 | colorTile->ui[y+0][x+0] = fragc0; | ||
624 | if (spu_extract(mask, 1)) | ||
625 | colorTile->ui[y+0][x+1] = fragc1; | ||
626 | if (spu_extract(mask, 2)) | ||
627 | colorTile->ui[y+1][x+0] = fragc2; | ||
628 | if (spu_extract(mask, 3)) | ||
629 | colorTile->ui[y+1][x+1] = fragc3; | ||
630 | #endif | ||
631 | } | ||
diff --git a/src/gallium/drivers/cell/spu/spu_per_fragment_op.h b/src/gallium/drivers/cell/spu/spu_per_fragment_op.h deleted file mode 100644 index f817abf0463..00000000000 --- a/src/gallium/drivers/cell/spu/spu_per_fragment_op.h +++ /dev/null | |||
@@ -1,44 +0,0 @@ | |||
1 | /************************************************************************** | ||
2 | * | ||
3 | * Copyright 2008 Tungsten Graphics, Inc., Cedar Park, Texas. | ||
4 | * All Rights Reserved. | ||
5 | * | ||
6 | * Permission is hereby granted, free of charge, to any person obtaining a | ||
7 | * copy of this software and associated documentation files (the | ||
8 | * "Software"), to deal in the Software without restriction, including | ||
9 | * without limitation the rights to use, copy, modify, merge, publish, | ||
10 | * distribute, sub license, and/or sell copies of the Software, and to | ||
11 | * permit persons to whom the Software is furnished to do so, subject to | ||
12 | * the following conditions: | ||
13 | * | ||
14 | * The above copyright notice and this permission notice (including the | ||
15 | * next paragraph) shall be included in all copies or substantial portions | ||
16 | * of the Software. | ||
17 | * | ||
18 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS | ||
19 | * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF | ||
20 | * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. | ||
21 | * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR | ||
22 | * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, | ||
23 | * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE | ||
24 | * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. | ||
25 | * | ||
26 | **************************************************************************/ | ||
27 | |||
28 | #ifndef SPU_PER_FRAGMENT_OP | ||
29 | #define SPU_PER_FRAGMENT_OP | ||
30 | |||
31 | |||
32 | extern void | ||
33 | spu_fallback_fragment_ops(uint x, uint y, | ||
34 | tile_t *colorTile, | ||
35 | tile_t *depthStencilTile, | ||
36 | vector float fragZ, | ||
37 | vector float fragRed, | ||
38 | vector float fragGreen, | ||
39 | vector float fragBlue, | ||
40 | vector float fragAlpha, | ||
41 | vector unsigned int mask); | ||
42 | |||
43 | |||
44 | #endif /* SPU_PER_FRAGMENT_OP */ | ||
diff --git a/src/gallium/drivers/cell/spu/spu_render.c b/src/gallium/drivers/cell/spu/spu_render.c deleted file mode 100644 index 14987e3c3a2..00000000000 --- a/src/gallium/drivers/cell/spu/spu_render.c +++ /dev/null | |||
@@ -1,356 +0,0 @@ | |||
1 | /************************************************************************** | ||
2 | * | ||
3 | * Copyright 2008 Tungsten Graphics, Inc., Cedar Park, Texas. | ||
4 | * All Rights Reserved. | ||
5 | * | ||
6 | * Permission is hereby granted, free of charge, to any person obtaining a | ||
7 | * copy of this software and associated documentation files (the | ||
8 | * "Software"), to deal in the Software without restriction, including | ||
9 | * without limitation the rights to use, copy, modify, merge, publish, | ||
10 | * distribute, sub license, and/or sell copies of the Software, and to | ||
11 | * permit persons to whom the Software is furnished to do so, subject to | ||
12 | * the following conditions: | ||
13 | * | ||
14 | * The above copyright notice and this permission notice (including the | ||
15 | * next paragraph) shall be included in all copies or substantial portions | ||
16 | * of the Software. | ||
17 | * | ||
18 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS | ||
19 | * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF | ||
20 | * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. | ||
21 | * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR | ||
22 | * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, | ||
23 | * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE | ||
24 | * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. | ||
25 | * | ||
26 | **************************************************************************/ | ||
27 | |||
28 | |||
29 | #include <stdio.h> | ||
30 | #include <libmisc.h> | ||
31 | #include <spu_mfcio.h> | ||
32 | |||
33 | #include "spu_main.h" | ||
34 | #include "spu_render.h" | ||
35 | #include "spu_shuffle.h" | ||
36 | #include "spu_tri.h" | ||
37 | #include "spu_tile.h" | ||
38 | #include "cell/common.h" | ||
39 | #include "util/u_memory.h" | ||
40 | |||
41 | |||
42 | /** | ||
43 | * Given a rendering command's bounding box (in pixels) compute the | ||
44 | * location of the corresponding screen tile bounding box. | ||
45 | */ | ||
46 | static INLINE void | ||
47 | tile_bounding_box(const struct cell_command_render *render, | ||
48 | uint *txmin, uint *tymin, | ||
49 | uint *box_num_tiles, uint *box_width_tiles) | ||
50 | { | ||
51 | #if 0 | ||
52 | /* Debug: full-window bounding box */ | ||
53 | uint txmax = spu.fb.width_tiles - 1; | ||
54 | uint tymax = spu.fb.height_tiles - 1; | ||
55 | *txmin = 0; | ||
56 | *tymin = 0; | ||
57 | *box_num_tiles = spu.fb.width_tiles * spu.fb.height_tiles; | ||
58 | *box_width_tiles = spu.fb.width_tiles; | ||
59 | (void) render; | ||
60 | (void) txmax; | ||
61 | (void) tymax; | ||
62 | #else | ||
63 | uint txmax, tymax, box_height_tiles; | ||
64 | |||
65 | *txmin = (uint) render->xmin / TILE_SIZE; | ||
66 | *tymin = (uint) render->ymin / TILE_SIZE; | ||
67 | txmax = (uint) render->xmax / TILE_SIZE; | ||
68 | tymax = (uint) render->ymax / TILE_SIZE; | ||
69 | if (txmax >= spu.fb.width_tiles) | ||
70 | txmax = spu.fb.width_tiles-1; | ||
71 | if (tymax >= spu.fb.height_tiles) | ||
72 | tymax = spu.fb.height_tiles-1; | ||
73 | *box_width_tiles = txmax - *txmin + 1; | ||
74 | box_height_tiles = tymax - *tymin + 1; | ||
75 | *box_num_tiles = *box_width_tiles * box_height_tiles; | ||
76 | #endif | ||
77 | #if 0 | ||
78 | printf("SPU %u: bounds: %g, %g ... %g, %g\n", spu.init.id, | ||
79 | render->xmin, render->ymin, render->xmax, render->ymax); | ||
80 | printf("SPU %u: tiles: %u, %u .. %u, %u\n", | ||
81 | spu.init.id, *txmin, *tymin, txmax, tymax); | ||
82 | ASSERT(render->xmin <= render->xmax); | ||
83 | ASSERT(render->ymin <= render->ymax); | ||
84 | #endif | ||
85 | } | ||
86 | |||
87 | |||
88 | /** Check if the tile at (tx,ty) belongs to this SPU */ | ||
89 | static INLINE boolean | ||
90 | my_tile(uint tx, uint ty) | ||
91 | { | ||
92 | return (spu.fb.width_tiles * ty + tx) % spu.init.num_spus == spu.init.id; | ||
93 | } | ||
94 | |||
95 | |||
96 | /** | ||
97 | * Start fetching non-clear color/Z tiles from main memory | ||
98 | */ | ||
99 | static INLINE void | ||
100 | get_cz_tiles(uint tx, uint ty) | ||
101 | { | ||
102 | if (spu.read_depth_stencil) { | ||
103 | if (spu.cur_ztile_status != TILE_STATUS_CLEAR) { | ||
104 | //printf("SPU %u: getting Z tile %u, %u\n", spu.init.id, tx, ty); | ||
105 | get_tile(tx, ty, &spu.ztile, TAG_READ_TILE_Z, 1); | ||
106 | spu.cur_ztile_status = TILE_STATUS_GETTING; | ||
107 | } | ||
108 | } | ||
109 | |||
110 | if (spu.cur_ctile_status != TILE_STATUS_CLEAR) { | ||
111 | //printf("SPU %u: getting C tile %u, %u\n", spu.init.id, tx, ty); | ||
112 | get_tile(tx, ty, &spu.ctile, TAG_READ_TILE_COLOR, 0); | ||
113 | spu.cur_ctile_status = TILE_STATUS_GETTING; | ||
114 | } | ||
115 | } | ||
116 | |||
117 | |||
118 | /** | ||
119 | * Start putting dirty color/Z tiles back to main memory | ||
120 | */ | ||
121 | static INLINE void | ||
122 | put_cz_tiles(uint tx, uint ty) | ||
123 | { | ||
124 | if (spu.cur_ztile_status == TILE_STATUS_DIRTY) { | ||
125 | /* tile was modified and needs to be written back */ | ||
126 | //printf("SPU %u: put dirty Z tile %u, %u\n", spu.init.id, tx, ty); | ||
127 | put_tile(tx, ty, &spu.ztile, TAG_WRITE_TILE_Z, 1); | ||
128 | spu.cur_ztile_status = TILE_STATUS_DEFINED; | ||
129 | } | ||
130 | else if (spu.cur_ztile_status == TILE_STATUS_GETTING) { | ||
131 | /* tile was never used */ | ||
132 | spu.cur_ztile_status = TILE_STATUS_DEFINED; | ||
133 | //printf("SPU %u: put getting Z tile %u, %u\n", spu.init.id, tx, ty); | ||
134 | } | ||
135 | |||
136 | if (spu.cur_ctile_status == TILE_STATUS_DIRTY) { | ||
137 | /* tile was modified and needs to be written back */ | ||
138 | //printf("SPU %u: put dirty C tile %u, %u\n", spu.init.id, tx, ty); | ||
139 | put_tile(tx, ty, &spu.ctile, TAG_WRITE_TILE_COLOR, 0); | ||
140 | spu.cur_ctile_status = TILE_STATUS_DEFINED; | ||
141 | } | ||
142 | else if (spu.cur_ctile_status == TILE_STATUS_GETTING) { | ||
143 | /* tile was never used */ | ||
144 | spu.cur_ctile_status = TILE_STATUS_DEFINED; | ||
145 | //printf("SPU %u: put getting C tile %u, %u\n", spu.init.id, tx, ty); | ||
146 | } | ||
147 | } | ||
148 | |||
149 | |||
150 | /** | ||
151 | * Wait for 'put' of color/z tiles to complete. | ||
152 | */ | ||
153 | static INLINE void | ||
154 | wait_put_cz_tiles(void) | ||
155 | { | ||
156 | wait_on_mask(1 << TAG_WRITE_TILE_COLOR); | ||
157 | if (spu.read_depth_stencil) { | ||
158 | wait_on_mask(1 << TAG_WRITE_TILE_Z); | ||
159 | } | ||
160 | } | ||
161 | |||
162 | |||
163 | /** | ||
164 | * Render primitives | ||
165 | * \param pos_incr returns value indicating how may words to skip after | ||
166 | * this command in the batch buffer | ||
167 | */ | ||
168 | void | ||
169 | cmd_render(const struct cell_command_render *render, uint *pos_incr) | ||
170 | { | ||
171 | /* we'll DMA into these buffers */ | ||
172 | PIPE_ALIGN_VAR(16) ubyte vertex_data[CELL_BUFFER_SIZE]; | ||
173 | const uint vertex_size = render->vertex_size; /* in bytes */ | ||
174 | /*const*/ uint total_vertex_bytes = render->num_verts * vertex_size; | ||
175 | uint index_bytes; | ||
176 | const ubyte *vertices; | ||
177 | const ushort *indexes; | ||
178 | uint i, j; | ||
179 | uint num_tiles; | ||
180 | |||
181 | D_PRINTF(CELL_DEBUG_CMD, | ||
182 | "RENDER prim=%u num_vert=%u num_ind=%u inline_vert=%u\n", | ||
183 | render->prim_type, | ||
184 | render->num_verts, | ||
185 | render->num_indexes, | ||
186 | render->inline_verts); | ||
187 | |||
188 | ASSERT(sizeof(*render) % 4 == 0); | ||
189 | ASSERT(total_vertex_bytes % 16 == 0); | ||
190 | ASSERT(render->prim_type == PIPE_PRIM_TRIANGLES); | ||
191 | ASSERT(render->num_indexes % 3 == 0); | ||
192 | |||
193 | |||
194 | /* indexes are right after the render command in the batch buffer */ | ||
195 | indexes = (const ushort *) (render + 1); | ||
196 | index_bytes = ROUNDUP8(render->num_indexes * 2); | ||
197 | *pos_incr = index_bytes / 8 + sizeof(*render) / 8; | ||
198 | |||
199 | |||
200 | if (render->inline_verts) { | ||
201 | /* Vertices are after indexes in batch buffer at next 16-byte addr */ | ||
202 | vertices = (const ubyte *) render + (*pos_incr * 8); | ||
203 | vertices = (const ubyte *) align_pointer((void *) vertices, 16); | ||
204 | ASSERT_ALIGN16(vertices); | ||
205 | *pos_incr = ((vertices + total_vertex_bytes) - (ubyte *) render) / 8; | ||
206 | } | ||
207 | else { | ||
208 | /* Begin DMA fetch of vertex buffer */ | ||
209 | ubyte *src = spu.init.buffers[render->vertex_buf]; | ||
210 | ubyte *dest = vertex_data; | ||
211 | |||
212 | /* skip vertex data we won't use */ | ||
213 | #if 01 | ||
214 | src += render->min_index * vertex_size; | ||
215 | dest += render->min_index * vertex_size; | ||
216 | total_vertex_bytes -= render->min_index * vertex_size; | ||
217 | #endif | ||
218 | ASSERT(total_vertex_bytes % 16 == 0); | ||
219 | ASSERT_ALIGN16(dest); | ||
220 | ASSERT_ALIGN16(src); | ||
221 | |||
222 | mfc_get(dest, /* in vertex_data[] array */ | ||
223 | (unsigned int) src, /* src in main memory */ | ||
224 | total_vertex_bytes, /* size */ | ||
225 | TAG_VERTEX_BUFFER, | ||
226 | 0, /* tid */ | ||
227 | 0 /* rid */); | ||
228 | |||
229 | vertices = vertex_data; | ||
230 | |||
231 | wait_on_mask(1 << TAG_VERTEX_BUFFER); | ||
232 | } | ||
233 | |||
234 | |||
235 | /** | ||
236 | ** find tiles which intersect the prim bounding box | ||
237 | **/ | ||
238 | uint txmin, tymin, box_width_tiles, box_num_tiles; | ||
239 | tile_bounding_box(render, &txmin, &tymin, | ||
240 | &box_num_tiles, &box_width_tiles); | ||
241 | |||
242 | |||
243 | /* make sure any pending clears have completed */ | ||
244 | wait_on_mask(1 << TAG_SURFACE_CLEAR); /* XXX temporary */ | ||
245 | |||
246 | |||
247 | num_tiles = 0; | ||
248 | |||
249 | /** | ||
250 | ** loop over tiles, rendering tris | ||
251 | **/ | ||
252 | for (i = 0; i < box_num_tiles; i++) { | ||
253 | const uint tx = txmin + i % box_width_tiles; | ||
254 | const uint ty = tymin + i / box_width_tiles; | ||
255 | |||
256 | ASSERT(tx < spu.fb.width_tiles); | ||
257 | ASSERT(ty < spu.fb.height_tiles); | ||
258 | |||
259 | if (!my_tile(tx, ty)) | ||
260 | continue; | ||
261 | |||
262 | num_tiles++; | ||
263 | |||
264 | spu.cur_ctile_status = spu.ctile_status[ty][tx]; | ||
265 | spu.cur_ztile_status = spu.ztile_status[ty][tx]; | ||
266 | |||
267 | get_cz_tiles(tx, ty); | ||
268 | |||
269 | uint drawn = 0; | ||
270 | |||
271 | const qword vertex_sizes = (qword)spu_splats(vertex_size); | ||
272 | const qword verticess = (qword)spu_splats((uint)vertices); | ||
273 | |||
274 | ASSERT_ALIGN16(&indexes[0]); | ||
275 | |||
276 | const uint num_indexes = render->num_indexes; | ||
277 | |||
278 | /* loop over tris | ||
279 | * &indexes[0] will be 16 byte aligned. This loop is heavily unrolled | ||
280 | * avoiding variable rotates when extracting vertex indices. | ||
281 | */ | ||
282 | for (j = 0; j < num_indexes; j += 24) { | ||
283 | /* Load three vectors, containing 24 ushort indices */ | ||
284 | const qword* lower_qword = (qword*)&indexes[j]; | ||
285 | const qword indices0 = lower_qword[0]; | ||
286 | const qword indices1 = lower_qword[1]; | ||
287 | const qword indices2 = lower_qword[2]; | ||
288 | |||
289 | /* stores three indices for each tri n in slots 0, 1 and 2 of vsn */ | ||
290 | /* Straightforward rotates for these */ | ||
291 | qword vs0 = indices0; | ||
292 | qword vs1 = si_shlqbyi(indices0, 6); | ||
293 | qword vs3 = si_shlqbyi(indices1, 2); | ||
294 | qword vs4 = si_shlqbyi(indices1, 8); | ||
295 | qword vs6 = si_shlqbyi(indices2, 4); | ||
296 | qword vs7 = si_shlqbyi(indices2, 10); | ||
297 | |||
298 | /* For tri 2 and 5, the three indices are split across two machine | ||
299 | * words - rotate and combine */ | ||
300 | const qword tmp2a = si_shlqbyi(indices0, 12); | ||
301 | const qword tmp2b = si_rotqmbyi(indices1, 12|16); | ||
302 | qword vs2 = si_selb(tmp2a, tmp2b, si_fsmh(si_from_uint(0x20))); | ||
303 | |||
304 | const qword tmp5a = si_shlqbyi(indices1, 14); | ||
305 | const qword tmp5b = si_rotqmbyi(indices2, 14|16); | ||
306 | qword vs5 = si_selb(tmp5a, tmp5b, si_fsmh(si_from_uint(0x60))); | ||
307 | |||
308 | /* unpack indices from halfword slots to word slots */ | ||
309 | vs0 = si_shufb(vs0, vs0, SHUFB8(0,A,0,B,0,C,0,0)); | ||
310 | vs1 = si_shufb(vs1, vs1, SHUFB8(0,A,0,B,0,C,0,0)); | ||
311 | vs2 = si_shufb(vs2, vs2, SHUFB8(0,A,0,B,0,C,0,0)); | ||
312 | vs3 = si_shufb(vs3, vs3, SHUFB8(0,A,0,B,0,C,0,0)); | ||
313 | vs4 = si_shufb(vs4, vs4, SHUFB8(0,A,0,B,0,C,0,0)); | ||
314 | vs5 = si_shufb(vs5, vs5, SHUFB8(0,A,0,B,0,C,0,0)); | ||
315 | vs6 = si_shufb(vs6, vs6, SHUFB8(0,A,0,B,0,C,0,0)); | ||
316 | vs7 = si_shufb(vs7, vs7, SHUFB8(0,A,0,B,0,C,0,0)); | ||
317 | |||
318 | /* Calculate address of vertex in vertices[] */ | ||
319 | vs0 = si_mpya(vs0, vertex_sizes, verticess); | ||
320 | vs1 = si_mpya(vs1, vertex_sizes, verticess); | ||
321 | vs2 = si_mpya(vs2, vertex_sizes, verticess); | ||
322 | vs3 = si_mpya(vs3, vertex_sizes, verticess); | ||
323 | vs4 = si_mpya(vs4, vertex_sizes, verticess); | ||
324 | vs5 = si_mpya(vs5, vertex_sizes, verticess); | ||
325 | vs6 = si_mpya(vs6, vertex_sizes, verticess); | ||
326 | vs7 = si_mpya(vs7, vertex_sizes, verticess); | ||
327 | |||
328 | /* Select the appropriate call based on the number of vertices | ||
329 | * remaining */ | ||
330 | switch(num_indexes - j) { | ||
331 | default: drawn += tri_draw(vs7, tx, ty); | ||
332 | case 21: drawn += tri_draw(vs6, tx, ty); | ||
333 | case 18: drawn += tri_draw(vs5, tx, ty); | ||
334 | case 15: drawn += tri_draw(vs4, tx, ty); | ||
335 | case 12: drawn += tri_draw(vs3, tx, ty); | ||
336 | case 9: drawn += tri_draw(vs2, tx, ty); | ||
337 | case 6: drawn += tri_draw(vs1, tx, ty); | ||
338 | case 3: drawn += tri_draw(vs0, tx, ty); | ||
339 | } | ||
340 | } | ||
341 | |||
342 | //printf("SPU %u: drew %u of %u\n", spu.init.id, drawn, render->num_indexes/3); | ||
343 | |||
344 | /* write color/z tiles back to main framebuffer, if dirtied */ | ||
345 | put_cz_tiles(tx, ty); | ||
346 | |||
347 | wait_put_cz_tiles(); /* XXX seems unnecessary... */ | ||
348 | |||
349 | spu.ctile_status[ty][tx] = spu.cur_ctile_status; | ||
350 | spu.ztile_status[ty][tx] = spu.cur_ztile_status; | ||
351 | } | ||
352 | |||
353 | D_PRINTF(CELL_DEBUG_CMD, | ||
354 | "RENDER done (%u tiles hit)\n", | ||
355 | num_tiles); | ||
356 | } | ||
diff --git a/src/gallium/drivers/cell/spu/spu_render.h b/src/gallium/drivers/cell/spu/spu_render.h deleted file mode 100644 index 493434f0878..00000000000 --- a/src/gallium/drivers/cell/spu/spu_render.h +++ /dev/null | |||
@@ -1,38 +0,0 @@ | |||
1 | /************************************************************************** | ||
2 | * | ||
3 | * Copyright 2008 Tungsten Graphics, Inc., Cedar Park, Texas. | ||
4 | * All Rights Reserved. | ||
5 | * | ||
6 | * Permission is hereby granted, free of charge, to any person obtaining a | ||
7 | * copy of this software and associated documentation files (the | ||
8 | * "Software"), to deal in the Software without restriction, including | ||
9 | * without limitation the rights to use, copy, modify, merge, publish, | ||
10 | * distribute, sub license, and/or sell copies of the Software, and to | ||
11 | * permit persons to whom the Software is furnished to do so, subject to | ||
12 | * the following conditions: | ||
13 | * | ||
14 | * The above copyright notice and this permission notice (including the | ||
15 | * next paragraph) shall be included in all copies or substantial portions | ||
16 | * of the Software. | ||
17 | * | ||
18 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS | ||
19 | * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF | ||
20 | * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. | ||
21 | * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR | ||
22 | * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, | ||
23 | * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE | ||
24 | * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. | ||
25 | * | ||
26 | **************************************************************************/ | ||
27 | |||
28 | |||
29 | #ifndef SPU_RENDER_H | ||
30 | #define SPU_RENDER_H | ||
31 | |||
32 | #include "cell/common.h" | ||
33 | |||
34 | extern void | ||
35 | cmd_render(const struct cell_command_render *render, uint *pos_incr); | ||
36 | |||
37 | #endif /* SPU_RENDER_H */ | ||
38 | |||
diff --git a/src/gallium/drivers/cell/spu/spu_shuffle.h b/src/gallium/drivers/cell/spu/spu_shuffle.h deleted file mode 100644 index 74f2a0b6d2e..00000000000 --- a/src/gallium/drivers/cell/spu/spu_shuffle.h +++ /dev/null | |||
@@ -1,186 +0,0 @@ | |||
1 | #ifndef SPU_SHUFFLE_H | ||
2 | #define SPU_SHUFFLE_H | ||
3 | |||
4 | /* | ||
5 | * Generate shuffle patterns with minimal fuss. | ||
6 | * | ||
7 | * Based on ideas from | ||
8 | * http://www.insomniacgames.com/tech/articles/0408/files/shuffles.pdf | ||
9 | * | ||
10 | * A-P indicates 0-15th position in first vector | ||
11 | * a-p indicates 0-15th position in second vector | ||
12 | * | ||
13 | * +--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+ | ||
14 | * |00|01|02|03|04|05|06|07|08|09|0a|0b|0c|0d|0e|0f| | ||
15 | * +--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+ | ||
16 | * | A| B| C| D| | ||
17 | * +-----+-----+-----+-----+-----+-----+-----+-----+ | ||
18 | * | A| B| C| D| E| F| G| H| | ||
19 | * +--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+ | ||
20 | * | A| B| C| D| E| F| G| H| I| J| K| L| M| N| O| P| | ||
21 | * +--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+ | ||
22 | * | ||
23 | * x or X indicates 0xff | ||
24 | * 8 indicates 0x80 | ||
25 | * 0 indicates 0x00 | ||
26 | * | ||
27 | * The macros SHUFFLE4() SHUFFLE8() and SHUFFLE16() provide a const vector | ||
28 | * unsigned char literal suitable for use with spu_shuffle(). | ||
29 | * | ||
30 | * The macros SHUFB4() SHUFB8() and SHUFB16() provide a const qword vector | ||
31 | * literal suitable for use with si_shufb(). | ||
32 | * | ||
33 | * | ||
34 | * For example : | ||
35 | * SHUFB4(A,A,A,A) | ||
36 | * expands to : | ||
37 | * ((const qword){0,1,2,3,0,1,2,3,0,1,2,3,0,1,2,3}) | ||
38 | * | ||
39 | * SHUFFLE8(A,B,a,b,C,c,8,8) | ||
40 | * expands to : | ||
41 | * ((const vector unsigned char){0x00,0x01,0x02,0x03,0x10,0x11,0x12,0x13, | ||
42 | * 0x04,0x05,0x14,0x15,0xe0,0xe0,0xe0,0xe0}) | ||
43 | * | ||
44 | */ | ||
45 | |||
46 | #include <spu_intrinsics.h> | ||
47 | |||
48 | #define SHUFFLE_PATTERN_4_A__ 0x00, 0x01, 0x02, 0x03 | ||
49 | #define SHUFFLE_PATTERN_4_B__ 0x04, 0x05, 0x06, 0x07 | ||
50 | #define SHUFFLE_PATTERN_4_C__ 0x08, 0x09, 0x0a, 0x0b | ||
51 | #define SHUFFLE_PATTERN_4_D__ 0x0c, 0x0d, 0x0e, 0x0f | ||
52 | #define SHUFFLE_PATTERN_4_a__ 0x10, 0x11, 0x12, 0x13 | ||
53 | #define SHUFFLE_PATTERN_4_b__ 0x14, 0x15, 0x16, 0x17 | ||
54 | #define SHUFFLE_PATTERN_4_c__ 0x18, 0x19, 0x1a, 0x1b | ||
55 | #define SHUFFLE_PATTERN_4_d__ 0x1c, 0x1d, 0x1e, 0x1f | ||
56 | #define SHUFFLE_PATTERN_4_X__ 0xc0, 0xc0, 0xc0, 0xc0 | ||
57 | #define SHUFFLE_PATTERN_4_x__ 0xc0, 0xc0, 0xc0, 0xc0 | ||
58 | #define SHUFFLE_PATTERN_4_0__ 0x80, 0x80, 0x80, 0x80 | ||
59 | #define SHUFFLE_PATTERN_4_8__ 0xe0, 0xe0, 0xe0, 0xe0 | ||
60 | |||
61 | #define SHUFFLE_VECTOR_4__(A, B, C, D) \ | ||
62 | SHUFFLE_PATTERN_4_##A##__, \ | ||
63 | SHUFFLE_PATTERN_4_##B##__, \ | ||
64 | SHUFFLE_PATTERN_4_##C##__, \ | ||
65 | SHUFFLE_PATTERN_4_##D##__ | ||
66 | |||
67 | #define SHUFFLE4(A, B, C, D) \ | ||
68 | ((const vector unsigned char){ \ | ||
69 | SHUFFLE_VECTOR_4__(A, B, C, D) \ | ||
70 | }) | ||
71 | |||
72 | #define SHUFB4(A, B, C, D) \ | ||
73 | ((const qword){ \ | ||
74 | SHUFFLE_VECTOR_4__(A, B, C, D) \ | ||
75 | }) | ||
76 | |||
77 | |||
78 | #define SHUFFLE_PATTERN_8_A__ 0x00, 0x01 | ||
79 | #define SHUFFLE_PATTERN_8_B__ 0x02, 0x03 | ||
80 | #define SHUFFLE_PATTERN_8_C__ 0x04, 0x05 | ||
81 | #define SHUFFLE_PATTERN_8_D__ 0x06, 0x07 | ||
82 | #define SHUFFLE_PATTERN_8_E__ 0x08, 0x09 | ||
83 | #define SHUFFLE_PATTERN_8_F__ 0x0a, 0x0b | ||
84 | #define SHUFFLE_PATTERN_8_G__ 0x0c, 0x0d | ||
85 | #define SHUFFLE_PATTERN_8_H__ 0x0e, 0x0f | ||
86 | #define SHUFFLE_PATTERN_8_a__ 0x10, 0x11 | ||
87 | #define SHUFFLE_PATTERN_8_b__ 0x12, 0x13 | ||
88 | #define SHUFFLE_PATTERN_8_c__ 0x14, 0x15 | ||
89 | #define SHUFFLE_PATTERN_8_d__ 0x16, 0x17 | ||
90 | #define SHUFFLE_PATTERN_8_e__ 0x18, 0x19 | ||
91 | #define SHUFFLE_PATTERN_8_f__ 0x1a, 0x1b | ||
92 | #define SHUFFLE_PATTERN_8_g__ 0x1c, 0x1d | ||
93 | #define SHUFFLE_PATTERN_8_h__ 0x1e, 0x1f | ||
94 | #define SHUFFLE_PATTERN_8_X__ 0xc0, 0xc0 | ||
95 | #define SHUFFLE_PATTERN_8_x__ 0xc0, 0xc0 | ||
96 | #define SHUFFLE_PATTERN_8_0__ 0x80, 0x80 | ||
97 | #define SHUFFLE_PATTERN_8_8__ 0xe0, 0xe0 | ||
98 | |||
99 | |||
100 | #define SHUFFLE_VECTOR_8__(A, B, C, D, E, F, G, H) \ | ||
101 | SHUFFLE_PATTERN_8_##A##__, \ | ||
102 | SHUFFLE_PATTERN_8_##B##__, \ | ||
103 | SHUFFLE_PATTERN_8_##C##__, \ | ||
104 | SHUFFLE_PATTERN_8_##D##__, \ | ||
105 | SHUFFLE_PATTERN_8_##E##__, \ | ||
106 | SHUFFLE_PATTERN_8_##F##__, \ | ||
107 | SHUFFLE_PATTERN_8_##G##__, \ | ||
108 | SHUFFLE_PATTERN_8_##H##__ | ||
109 | |||
110 | #define SHUFFLE8(A, B, C, D, E, F, G, H) \ | ||
111 | ((const vector unsigned char){ \ | ||
112 | SHUFFLE_VECTOR_8__(A, B, C, D, E, F, G, H) \ | ||
113 | }) | ||
114 | |||
115 | #define SHUFB8(A, B, C, D, E, F, G, H) \ | ||
116 | ((const qword){ \ | ||
117 | SHUFFLE_VECTOR_8__(A, B, C, D, E, F, G, H) \ | ||
118 | }) | ||
119 | |||
120 | |||
121 | #define SHUFFLE_PATTERN_16_A__ 0x00 | ||
122 | #define SHUFFLE_PATTERN_16_B__ 0x01 | ||
123 | #define SHUFFLE_PATTERN_16_C__ 0x02 | ||
124 | #define SHUFFLE_PATTERN_16_D__ 0x03 | ||
125 | #define SHUFFLE_PATTERN_16_E__ 0x04 | ||
126 | #define SHUFFLE_PATTERN_16_F__ 0x05 | ||
127 | #define SHUFFLE_PATTERN_16_G__ 0x06 | ||
128 | #define SHUFFLE_PATTERN_16_H__ 0x07 | ||
129 | #define SHUFFLE_PATTERN_16_I__ 0x08 | ||
130 | #define SHUFFLE_PATTERN_16_J__ 0x09 | ||
131 | #define SHUFFLE_PATTERN_16_K__ 0x0a | ||
132 | #define SHUFFLE_PATTERN_16_L__ 0x0b | ||
133 | #define SHUFFLE_PATTERN_16_M__ 0x0c | ||
134 | #define SHUFFLE_PATTERN_16_N__ 0x0d | ||
135 | #define SHUFFLE_PATTERN_16_O__ 0x0e | ||
136 | #define SHUFFLE_PATTERN_16_P__ 0x0f | ||
137 | #define SHUFFLE_PATTERN_16_a__ 0x10 | ||
138 | #define SHUFFLE_PATTERN_16_b__ 0x11 | ||
139 | #define SHUFFLE_PATTERN_16_c__ 0x12 | ||
140 | #define SHUFFLE_PATTERN_16_d__ 0x13 | ||
141 | #define SHUFFLE_PATTERN_16_e__ 0x14 | ||
142 | #define SHUFFLE_PATTERN_16_f__ 0x15 | ||
143 | #define SHUFFLE_PATTERN_16_g__ 0x16 | ||
144 | #define SHUFFLE_PATTERN_16_h__ 0x17 | ||
145 | #define SHUFFLE_PATTERN_16_i__ 0x18 | ||
146 | #define SHUFFLE_PATTERN_16_j__ 0x19 | ||
147 | #define SHUFFLE_PATTERN_16_k__ 0x1a | ||
148 | #define SHUFFLE_PATTERN_16_l__ 0x1b | ||
149 | #define SHUFFLE_PATTERN_16_m__ 0x1c | ||
150 | #define SHUFFLE_PATTERN_16_n__ 0x1d | ||
151 | #define SHUFFLE_PATTERN_16_o__ 0x1e | ||
152 | #define SHUFFLE_PATTERN_16_p__ 0x1f | ||
153 | #define SHUFFLE_PATTERN_16_X__ 0xc0 | ||
154 | #define SHUFFLE_PATTERN_16_x__ 0xc0 | ||
155 | #define SHUFFLE_PATTERN_16_0__ 0x80 | ||
156 | #define SHUFFLE_PATTERN_16_8__ 0xe0 | ||
157 | |||
158 | #define SHUFFLE_VECTOR_16__(A, B, C, D, E, F, G, H, I, J, K, L, M, N, O, P) \ | ||
159 | SHUFFLE_PATTERN_16_##A##__, \ | ||
160 | SHUFFLE_PATTERN_16_##B##__, \ | ||
161 | SHUFFLE_PATTERN_16_##C##__, \ | ||
162 | SHUFFLE_PATTERN_16_##D##__, \ | ||
163 | SHUFFLE_PATTERN_16_##E##__, \ | ||
164 | SHUFFLE_PATTERN_16_##F##__, \ | ||
165 | SHUFFLE_PATTERN_16_##G##__, \ | ||
166 | SHUFFLE_PATTERN_16_##H##__, \ | ||
167 | SHUFFLE_PATTERN_16_##I##__, \ | ||
168 | SHUFFLE_PATTERN_16_##J##__, \ | ||
169 | SHUFFLE_PATTERN_16_##K##__, \ | ||
170 | SHUFFLE_PATTERN_16_##L##__, \ | ||
171 | SHUFFLE_PATTERN_16_##M##__, \ | ||
172 | SHUFFLE_PATTERN_16_##N##__, \ | ||
173 | SHUFFLE_PATTERN_16_##O##__, \ | ||
174 | SHUFFLE_PATTERN_16_##P##__ | ||
175 | |||
176 | #define SHUFFLE16(A, B, C, D, E, F, G, H, I, J, K, L, M, N, O, P) \ | ||
177 | ((const vector unsigned char){ \ | ||
178 | SHUFFLE_VECTOR_16__(A, B, C, D, E, F, G, H, I, J, K, L, M, N, O, P) \ | ||
179 | }) | ||
180 | |||
181 | #define SHUFB16(A, B, C, D, E, F, G, H, I, J, K, L, M, N, O, P) \ | ||
182 | ((const qword){ \ | ||
183 | SHUFFLE_VECTOR_16__(A, B, C, D, E, F, G, H, I, J, K, L, M, N, O, P) \ | ||
184 | }) | ||
185 | |||
186 | #endif | ||
diff --git a/src/gallium/drivers/cell/spu/spu_texture.c b/src/gallium/drivers/cell/spu/spu_texture.c deleted file mode 100644 index 69784c89788..00000000000 --- a/src/gallium/drivers/cell/spu/spu_texture.c +++ /dev/null | |||
@@ -1,641 +0,0 @@ | |||
1 | /************************************************************************** | ||
2 | * | ||
3 | * Copyright 2008 Tungsten Graphics, Inc., Cedar Park, Texas. | ||
4 | * All Rights Reserved. | ||
5 | * | ||
6 | * Permission is hereby granted, free of charge, to any person obtaining a | ||
7 | * copy of this software and associated documentation files (the | ||
8 | * "Software"), to deal in the Software without restriction, including | ||
9 | * without limitation the rights to use, copy, modify, merge, publish, | ||
10 | * distribute, sub license, and/or sell copies of the Software, and to | ||
11 | * permit persons to whom the Software is furnished to do so, subject to | ||
12 | * the following conditions: | ||
13 | * | ||
14 | * The above copyright notice and this permission notice (including the | ||
15 | * next paragraph) shall be included in all copies or substantial portions | ||
16 | * of the Software. | ||
17 | * | ||
18 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS | ||
19 | * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF | ||
20 | * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. | ||
21 | * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR | ||
22 | * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, | ||
23 | * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE | ||
24 | * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. | ||
25 | * | ||
26 | **************************************************************************/ | ||
27 | |||
28 | |||
29 | #include <math.h> | ||
30 | |||
31 | #include "pipe/p_compiler.h" | ||
32 | #include "spu_main.h" | ||
33 | #include "spu_texture.h" | ||
34 | #include "spu_tile.h" | ||
35 | #include "spu_colorpack.h" | ||
36 | #include "spu_dcache.h" | ||
37 | |||
38 | |||
39 | /** | ||
40 | * Mark all tex cache entries as invalid. | ||
41 | */ | ||
42 | void | ||
43 | invalidate_tex_cache(void) | ||
44 | { | ||
45 | uint lvl; | ||
46 | for (lvl = 0; lvl < CELL_MAX_TEXTURE_LEVELS; lvl++) { | ||
47 | uint unit = 0; | ||
48 | uint bytes = 4 * spu.texture[unit].level[lvl].width | ||
49 | * spu.texture[unit].level[lvl].height; | ||
50 | |||
51 | if (spu.texture[unit].target == PIPE_TEXTURE_CUBE) | ||
52 | bytes *= 6; | ||
53 | else if (spu.texture[unit].target == PIPE_TEXTURE_3D) | ||
54 | bytes *= spu.texture[unit].level[lvl].depth; | ||
55 | |||
56 | spu_dcache_mark_dirty((unsigned) spu.texture[unit].level[lvl].start, bytes); | ||
57 | } | ||
58 | } | ||
59 | |||
60 | |||
61 | /** | ||
62 | * Get four texels from locations (x[0], y[0]), (x[1], y[1]) ... | ||
63 | * | ||
64 | * NOTE: in the typical case of bilinear filtering, the four texels | ||
65 | * are in a 2x2 group so we could get by with just two dcache fetches | ||
66 | * (two side-by-side texels per fetch). But when bilinear filtering | ||
67 | * wraps around a texture edge, we'll probably need code like we have | ||
68 | * now. | ||
69 | * FURTHERMORE: since we're rasterizing a quad of 2x2 pixels at a time, | ||
70 | * it's quite likely that the four pixels in a quad will need some of the | ||
71 | * same texels. So look into doing texture fetches for four pixels at | ||
72 | * a time. | ||
73 | */ | ||
74 | static void | ||
75 | get_four_texels(const struct spu_texture_level *tlevel, uint face, | ||
76 | vec_int4 x, vec_int4 y, | ||
77 | vec_uint4 *texels) | ||
78 | { | ||
79 | unsigned texture_ea = (uintptr_t) tlevel->start; | ||
80 | const vec_int4 tile_x = spu_rlmask(x, -5); /* tile_x = x / 32 */ | ||
81 | const vec_int4 tile_y = spu_rlmask(y, -5); /* tile_y = y / 32 */ | ||
82 | const qword offset_x = si_andi((qword) x, 0x1f); /* offset_x = x & 0x1f */ | ||
83 | const qword offset_y = si_andi((qword) y, 0x1f); /* offset_y = y & 0x1f */ | ||
84 | |||
85 | const qword tiles_per_row = (qword) spu_splats(tlevel->tiles_per_row); | ||
86 | const qword tile_size = (qword) spu_splats((unsigned) sizeof(tile_t)); | ||
87 | |||
88 | qword tile_offset = si_mpya((qword) tile_y, tiles_per_row, (qword) tile_x); | ||
89 | tile_offset = si_mpy((qword) tile_offset, tile_size); | ||
90 | |||
91 | qword texel_offset = si_a(si_mpyui(offset_y, 32), offset_x); | ||
92 | texel_offset = si_mpyui(texel_offset, 4); | ||
93 | |||
94 | vec_uint4 offset = (vec_uint4) si_a(tile_offset, texel_offset); | ||
95 | |||
96 | texture_ea = texture_ea + face * tlevel->bytes_per_image; | ||
97 | |||
98 | spu_dcache_fetch_unaligned((qword *) & texels[0], | ||
99 | texture_ea + spu_extract(offset, 0), 4); | ||
100 | spu_dcache_fetch_unaligned((qword *) & texels[1], | ||
101 | texture_ea + spu_extract(offset, 1), 4); | ||
102 | spu_dcache_fetch_unaligned((qword *) & texels[2], | ||
103 | texture_ea + spu_extract(offset, 2), 4); | ||
104 | spu_dcache_fetch_unaligned((qword *) & texels[3], | ||
105 | texture_ea + spu_extract(offset, 3), 4); | ||
106 | } | ||
107 | |||
108 | |||
109 | /** clamp vec to [0, max] */ | ||
110 | static INLINE vector signed int | ||
111 | spu_clamp(vector signed int vec, vector signed int max) | ||
112 | { | ||
113 | static const vector signed int zero = {0,0,0,0}; | ||
114 | vector unsigned int c; | ||
115 | c = spu_cmpgt(vec, zero); /* c = vec > zero ? ~0 : 0 */ | ||
116 | vec = spu_sel(zero, vec, c); | ||
117 | c = spu_cmpgt(vec, max); /* c = vec > max ? ~0 : 0 */ | ||
118 | vec = spu_sel(vec, max, c); | ||
119 | return vec; | ||
120 | } | ||
121 | |||
122 | |||
123 | |||
124 | /** | ||
125 | * Do nearest texture sampling for four pixels. | ||
126 | * \param colors returned colors in SOA format (rrrr, gggg, bbbb, aaaa). | ||
127 | */ | ||
128 | void | ||
129 | sample_texture_2d_nearest(vector float s, vector float t, | ||
130 | uint unit, uint level, uint face, | ||
131 | vector float colors[4]) | ||
132 | { | ||
133 | const struct spu_texture_level *tlevel = &spu.texture[unit].level[level]; | ||
134 | vector float ss = spu_mul(s, tlevel->scale_s); | ||
135 | vector float tt = spu_mul(t, tlevel->scale_t); | ||
136 | vector signed int is = spu_convts(ss, 0); | ||
137 | vector signed int it = spu_convts(tt, 0); | ||
138 | vec_uint4 texels[4]; | ||
139 | |||
140 | /* PIPE_TEX_WRAP_REPEAT */ | ||
141 | is = spu_and(is, tlevel->mask_s); | ||
142 | it = spu_and(it, tlevel->mask_t); | ||
143 | |||
144 | /* PIPE_TEX_WRAP_CLAMP */ | ||
145 | is = spu_clamp(is, tlevel->max_s); | ||
146 | it = spu_clamp(it, tlevel->max_t); | ||
147 | |||
148 | get_four_texels(tlevel, face, is, it, texels); | ||
149 | |||
150 | /* convert four packed ARGBA pixels to float RRRR,GGGG,BBBB,AAAA */ | ||
151 | spu_unpack_A8R8G8B8_transpose4(texels, colors); | ||
152 | } | ||
153 | |||
154 | |||
155 | /** | ||
156 | * Do bilinear texture sampling for four pixels. | ||
157 | * \param colors returned colors in SOA format (rrrr, gggg, bbbb, aaaa). | ||
158 | */ | ||
159 | void | ||
160 | sample_texture_2d_bilinear(vector float s, vector float t, | ||
161 | uint unit, uint level, uint face, | ||
162 | vector float colors[4]) | ||
163 | { | ||
164 | const struct spu_texture_level *tlevel = &spu.texture[unit].level[level]; | ||
165 | static const vector float half = {-0.5f, -0.5f, -0.5f, -0.5f}; | ||
166 | |||
167 | vector float ss = spu_madd(s, tlevel->scale_s, half); | ||
168 | vector float tt = spu_madd(t, tlevel->scale_t, half); | ||
169 | |||
170 | vector signed int is0 = spu_convts(ss, 0); | ||
171 | vector signed int it0 = spu_convts(tt, 0); | ||
172 | |||
173 | /* is + 1, it + 1 */ | ||
174 | vector signed int is1 = spu_add(is0, 1); | ||
175 | vector signed int it1 = spu_add(it0, 1); | ||
176 | |||
177 | /* PIPE_TEX_WRAP_REPEAT */ | ||
178 | is0 = spu_and(is0, tlevel->mask_s); | ||
179 | it0 = spu_and(it0, tlevel->mask_t); | ||
180 | is1 = spu_and(is1, tlevel->mask_s); | ||
181 | it1 = spu_and(it1, tlevel->mask_t); | ||
182 | |||
183 | /* PIPE_TEX_WRAP_CLAMP */ | ||
184 | is0 = spu_clamp(is0, tlevel->max_s); | ||
185 | it0 = spu_clamp(it0, tlevel->max_t); | ||
186 | is1 = spu_clamp(is1, tlevel->max_s); | ||
187 | it1 = spu_clamp(it1, tlevel->max_t); | ||
188 | |||
189 | /* get packed int texels */ | ||
190 | vector unsigned int texels[16]; | ||
191 | get_four_texels(tlevel, face, is0, it0, texels + 0); /* upper-left */ | ||
192 | get_four_texels(tlevel, face, is1, it0, texels + 4); /* upper-right */ | ||
193 | get_four_texels(tlevel, face, is0, it1, texels + 8); /* lower-left */ | ||
194 | get_four_texels(tlevel, face, is1, it1, texels + 12); /* lower-right */ | ||
195 | |||
196 | /* convert packed int texels to float colors */ | ||
197 | vector float ftexels[16]; | ||
198 | spu_unpack_A8R8G8B8_transpose4(texels + 0, ftexels + 0); | ||
199 | spu_unpack_A8R8G8B8_transpose4(texels + 4, ftexels + 4); | ||
200 | spu_unpack_A8R8G8B8_transpose4(texels + 8, ftexels + 8); | ||
201 | spu_unpack_A8R8G8B8_transpose4(texels + 12, ftexels + 12); | ||
202 | |||
203 | /* Compute weighting factors in [0,1] | ||
204 | * Multiply texcoord by 1024, AND with 1023, convert back to float. | ||
205 | */ | ||
206 | vector float ss1024 = spu_mul(ss, spu_splats(1024.0f)); | ||
207 | vector signed int iss1024 = spu_convts(ss1024, 0); | ||
208 | iss1024 = spu_and(iss1024, 1023); | ||
209 | vector float sWeights0 = spu_convtf(iss1024, 10); | ||
210 | |||
211 | vector float tt1024 = spu_mul(tt, spu_splats(1024.0f)); | ||
212 | vector signed int itt1024 = spu_convts(tt1024, 0); | ||
213 | itt1024 = spu_and(itt1024, 1023); | ||
214 | vector float tWeights0 = spu_convtf(itt1024, 10); | ||
215 | |||
216 | /* 1 - sWeight and 1 - tWeight */ | ||
217 | vector float sWeights1 = spu_sub(spu_splats(1.0f), sWeights0); | ||
218 | vector float tWeights1 = spu_sub(spu_splats(1.0f), tWeights0); | ||
219 | |||
220 | /* reds, for four pixels */ | ||
221 | ftexels[ 0] = spu_mul(ftexels[ 0], spu_mul(sWeights1, tWeights1)); /*ul*/ | ||
222 | ftexels[ 4] = spu_mul(ftexels[ 4], spu_mul(sWeights0, tWeights1)); /*ur*/ | ||
223 | ftexels[ 8] = spu_mul(ftexels[ 8], spu_mul(sWeights1, tWeights0)); /*ll*/ | ||
224 | ftexels[12] = spu_mul(ftexels[12], spu_mul(sWeights0, tWeights0)); /*lr*/ | ||
225 | colors[0] = spu_add(spu_add(ftexels[0], ftexels[4]), | ||
226 | spu_add(ftexels[8], ftexels[12])); | ||
227 | |||
228 | /* greens, for four pixels */ | ||
229 | ftexels[ 1] = spu_mul(ftexels[ 1], spu_mul(sWeights1, tWeights1)); /*ul*/ | ||
230 | ftexels[ 5] = spu_mul(ftexels[ 5], spu_mul(sWeights0, tWeights1)); /*ur*/ | ||
231 | ftexels[ 9] = spu_mul(ftexels[ 9], spu_mul(sWeights1, tWeights0)); /*ll*/ | ||
232 | ftexels[13] = spu_mul(ftexels[13], spu_mul(sWeights0, tWeights0)); /*lr*/ | ||
233 | colors[1] = spu_add(spu_add(ftexels[1], ftexels[5]), | ||
234 | spu_add(ftexels[9], ftexels[13])); | ||
235 | |||
236 | /* blues, for four pixels */ | ||
237 | ftexels[ 2] = spu_mul(ftexels[ 2], spu_mul(sWeights1, tWeights1)); /*ul*/ | ||
238 | ftexels[ 6] = spu_mul(ftexels[ 6], spu_mul(sWeights0, tWeights1)); /*ur*/ | ||
239 | ftexels[10] = spu_mul(ftexels[10], spu_mul(sWeights1, tWeights0)); /*ll*/ | ||
240 | ftexels[14] = spu_mul(ftexels[14], spu_mul(sWeights0, tWeights0)); /*lr*/ | ||
241 | colors[2] = spu_add(spu_add(ftexels[2], ftexels[6]), | ||
242 | spu_add(ftexels[10], ftexels[14])); | ||
243 | |||
244 | /* alphas, for four pixels */ | ||
245 | ftexels[ 3] = spu_mul(ftexels[ 3], spu_mul(sWeights1, tWeights1)); /*ul*/ | ||
246 | ftexels[ 7] = spu_mul(ftexels[ 7], spu_mul(sWeights0, tWeights1)); /*ur*/ | ||
247 | ftexels[11] = spu_mul(ftexels[11], spu_mul(sWeights1, tWeights0)); /*ll*/ | ||
248 | ftexels[15] = spu_mul(ftexels[15], spu_mul(sWeights0, tWeights0)); /*lr*/ | ||
249 | colors[3] = spu_add(spu_add(ftexels[3], ftexels[7]), | ||
250 | spu_add(ftexels[11], ftexels[15])); | ||
251 | } | ||
252 | |||
253 | |||
254 | |||
255 | /** | ||
256 | * Adapted from /opt/cell/sdk/usr/spu/include/transpose_matrix4x4.h | ||
257 | */ | ||
258 | static INLINE void | ||
259 | transpose(vector unsigned int *mOut0, | ||
260 | vector unsigned int *mOut1, | ||
261 | vector unsigned int *mOut2, | ||
262 | vector unsigned int *mOut3, | ||
263 | vector unsigned int *mIn) | ||
264 | { | ||
265 | vector unsigned int abcd, efgh, ijkl, mnop; /* input vectors */ | ||
266 | vector unsigned int aeim, bfjn, cgko, dhlp; /* output vectors */ | ||
267 | vector unsigned int aibj, ckdl, emfn, gohp; /* intermediate vectors */ | ||
268 | |||
269 | vector unsigned char shufflehi = ((vector unsigned char) { | ||
270 | 0x00, 0x01, 0x02, 0x03, | ||
271 | 0x10, 0x11, 0x12, 0x13, | ||
272 | 0x04, 0x05, 0x06, 0x07, | ||
273 | 0x14, 0x15, 0x16, 0x17}); | ||
274 | vector unsigned char shufflelo = ((vector unsigned char) { | ||
275 | 0x08, 0x09, 0x0A, 0x0B, | ||
276 | 0x18, 0x19, 0x1A, 0x1B, | ||
277 | 0x0C, 0x0D, 0x0E, 0x0F, | ||
278 | 0x1C, 0x1D, 0x1E, 0x1F}); | ||
279 | abcd = *(mIn+0); | ||
280 | efgh = *(mIn+1); | ||
281 | ijkl = *(mIn+2); | ||
282 | mnop = *(mIn+3); | ||
283 | |||
284 | aibj = spu_shuffle(abcd, ijkl, shufflehi); | ||
285 | ckdl = spu_shuffle(abcd, ijkl, shufflelo); | ||
286 | emfn = spu_shuffle(efgh, mnop, shufflehi); | ||
287 | gohp = spu_shuffle(efgh, mnop, shufflelo); | ||
288 | |||
289 | aeim = spu_shuffle(aibj, emfn, shufflehi); | ||
290 | bfjn = spu_shuffle(aibj, emfn, shufflelo); | ||
291 | cgko = spu_shuffle(ckdl, gohp, shufflehi); | ||
292 | dhlp = spu_shuffle(ckdl, gohp, shufflelo); | ||
293 | |||
294 | *mOut0 = aeim; | ||
295 | *mOut1 = bfjn; | ||
296 | *mOut2 = cgko; | ||
297 | *mOut3 = dhlp; | ||
298 | } | ||
299 | |||
300 | |||
301 | /** | ||
302 | * Bilinear filtering, using int instead of float arithmetic for computing | ||
303 | * sample weights. | ||
304 | */ | ||
305 | void | ||
306 | sample_texture_2d_bilinear_int(vector float s, vector float t, | ||
307 | uint unit, uint level, uint face, | ||
308 | vector float colors[4]) | ||
309 | { | ||
310 | const struct spu_texture_level *tlevel = &spu.texture[unit].level[level]; | ||
311 | static const vector float half = {-0.5f, -0.5f, -0.5f, -0.5f}; | ||
312 | |||
313 | /* Scale texcoords by size of texture, and add half pixel bias */ | ||
314 | vector float ss = spu_madd(s, tlevel->scale_s, half); | ||
315 | vector float tt = spu_madd(t, tlevel->scale_t, half); | ||
316 | |||
317 | /* convert float coords to fixed-pt coords with 7 fraction bits */ | ||
318 | vector signed int is = spu_convts(ss, 7); /* XXX really need floor() here */ | ||
319 | vector signed int it = spu_convts(tt, 7); /* XXX really need floor() here */ | ||
320 | |||
321 | /* compute integer texel weights in [0, 127] */ | ||
322 | vector signed int sWeights0 = spu_and(is, 127); | ||
323 | vector signed int tWeights0 = spu_and(it, 127); | ||
324 | vector signed int sWeights1 = spu_sub(127, sWeights0); | ||
325 | vector signed int tWeights1 = spu_sub(127, tWeights0); | ||
326 | |||
327 | /* texel coords: is0 = is / 128, it0 = is / 128 */ | ||
328 | vector signed int is0 = spu_rlmask(is, -7); | ||
329 | vector signed int it0 = spu_rlmask(it, -7); | ||
330 | |||
331 | /* texel coords: i1 = is0 + 1, it1 = it0 + 1 */ | ||
332 | vector signed int is1 = spu_add(is0, 1); | ||
333 | vector signed int it1 = spu_add(it0, 1); | ||
334 | |||
335 | /* PIPE_TEX_WRAP_REPEAT */ | ||
336 | is0 = spu_and(is0, tlevel->mask_s); | ||
337 | it0 = spu_and(it0, tlevel->mask_t); | ||
338 | is1 = spu_and(is1, tlevel->mask_s); | ||
339 | it1 = spu_and(it1, tlevel->mask_t); | ||
340 | |||
341 | /* PIPE_TEX_WRAP_CLAMP */ | ||
342 | is0 = spu_clamp(is0, tlevel->max_s); | ||
343 | it0 = spu_clamp(it0, tlevel->max_t); | ||
344 | is1 = spu_clamp(is1, tlevel->max_s); | ||
345 | it1 = spu_clamp(it1, tlevel->max_t); | ||
346 | |||
347 | /* get packed int texels */ | ||
348 | vector unsigned int texels[16]; | ||
349 | get_four_texels(tlevel, face, is0, it0, texels + 0); /* upper-left */ | ||
350 | get_four_texels(tlevel, face, is1, it0, texels + 4); /* upper-right */ | ||
351 | get_four_texels(tlevel, face, is0, it1, texels + 8); /* lower-left */ | ||
352 | get_four_texels(tlevel, face, is1, it1, texels + 12); /* lower-right */ | ||
353 | |||
354 | /* twiddle packed 32-bit BGRA pixels into RGBA as four unsigned ints */ | ||
355 | { | ||
356 | static const unsigned char ZERO = 0x80; | ||
357 | int i; | ||
358 | for (i = 0; i < 16; i++) { | ||
359 | texels[i] = spu_shuffle(texels[i], texels[i], | ||
360 | ((vector unsigned char) { | ||
361 | ZERO, ZERO, ZERO, 1, | ||
362 | ZERO, ZERO, ZERO, 2, | ||
363 | ZERO, ZERO, ZERO, 3, | ||
364 | ZERO, ZERO, ZERO, 0})); | ||
365 | } | ||
366 | } | ||
367 | |||
368 | /* convert RGBA,RGBA,RGBA,RGBA to RRRR,GGGG,BBBB,AAAA */ | ||
369 | vector unsigned int texel0, texel1, texel2, texel3, texel4, texel5, texel6, texel7, | ||
370 | texel8, texel9, texel10, texel11, texel12, texel13, texel14, texel15; | ||
371 | transpose(&texel0, &texel1, &texel2, &texel3, texels + 0); | ||
372 | transpose(&texel4, &texel5, &texel6, &texel7, texels + 4); | ||
373 | transpose(&texel8, &texel9, &texel10, &texel11, texels + 8); | ||
374 | transpose(&texel12, &texel13, &texel14, &texel15, texels + 12); | ||
375 | |||
376 | /* computed weighted colors */ | ||
377 | vector unsigned int c0, c1, c2, c3, cSum; | ||
378 | |||
379 | /* red */ | ||
380 | c0 = (vector unsigned int) si_mpy((qword) texel0, si_mpy((qword) sWeights1, (qword) tWeights1)); /*ul*/ | ||
381 | c1 = (vector unsigned int) si_mpy((qword) texel4, si_mpy((qword) sWeights0, (qword) tWeights1)); /*ur*/ | ||
382 | c2 = (vector unsigned int) si_mpy((qword) texel8, si_mpy((qword) sWeights1, (qword) tWeights0)); /*ll*/ | ||
383 | c3 = (vector unsigned int) si_mpy((qword) texel12, si_mpy((qword) sWeights0, (qword) tWeights0)); /*lr*/ | ||
384 | cSum = spu_add(spu_add(c0, c1), spu_add(c2, c3)); | ||
385 | colors[0] = spu_convtf(cSum, 22); | ||
386 | |||
387 | /* green */ | ||
388 | c0 = (vector unsigned int) si_mpy((qword) texel1, si_mpy((qword) sWeights1, (qword) tWeights1)); /*ul*/ | ||
389 | c1 = (vector unsigned int) si_mpy((qword) texel5, si_mpy((qword) sWeights0, (qword) tWeights1)); /*ur*/ | ||
390 | c2 = (vector unsigned int) si_mpy((qword) texel9, si_mpy((qword) sWeights1, (qword) tWeights0)); /*ll*/ | ||
391 | c3 = (vector unsigned int) si_mpy((qword) texel13, si_mpy((qword) sWeights0, (qword) tWeights0)); /*lr*/ | ||
392 | cSum = spu_add(spu_add(c0, c1), spu_add(c2, c3)); | ||
393 | colors[1] = spu_convtf(cSum, 22); | ||
394 | |||
395 | /* blue */ | ||
396 | c0 = (vector unsigned int) si_mpy((qword) texel2, si_mpy((qword) sWeights1, (qword) tWeights1)); /*ul*/ | ||
397 | c1 = (vector unsigned int) si_mpy((qword) texel6, si_mpy((qword) sWeights0, (qword) tWeights1)); /*ur*/ | ||
398 | c2 = (vector unsigned int) si_mpy((qword) texel10, si_mpy((qword) sWeights1, (qword) tWeights0)); /*ll*/ | ||
399 | c3 = (vector unsigned int) si_mpy((qword) texel14, si_mpy((qword) sWeights0, (qword) tWeights0)); /*lr*/ | ||
400 | cSum = spu_add(spu_add(c0, c1), spu_add(c2, c3)); | ||
401 | colors[2] = spu_convtf(cSum, 22); | ||
402 | |||
403 | /* alpha */ | ||
404 | c0 = (vector unsigned int) si_mpy((qword) texel3, si_mpy((qword) sWeights1, (qword) tWeights1)); /*ul*/ | ||
405 | c1 = (vector unsigned int) si_mpy((qword) texel7, si_mpy((qword) sWeights0, (qword) tWeights1)); /*ur*/ | ||
406 | c2 = (vector unsigned int) si_mpy((qword) texel11, si_mpy((qword) sWeights1, (qword) tWeights0)); /*ll*/ | ||
407 | c3 = (vector unsigned int) si_mpy((qword) texel15, si_mpy((qword) sWeights0, (qword) tWeights0)); /*lr*/ | ||
408 | cSum = spu_add(spu_add(c0, c1), spu_add(c2, c3)); | ||
409 | colors[3] = spu_convtf(cSum, 22); | ||
410 | } | ||
411 | |||
412 | |||
413 | |||
414 | /** | ||
415 | * Compute level of detail factor from texcoords. | ||
416 | */ | ||
417 | static INLINE float | ||
418 | compute_lambda_2d(uint unit, vector float s, vector float t) | ||
419 | { | ||
420 | uint baseLevel = 0; | ||
421 | float width = spu.texture[unit].level[baseLevel].width; | ||
422 | float height = spu.texture[unit].level[baseLevel].width; | ||
423 | float dsdx = width * (spu_extract(s, 1) - spu_extract(s, 0)); | ||
424 | float dsdy = width * (spu_extract(s, 2) - spu_extract(s, 0)); | ||
425 | float dtdx = height * (spu_extract(t, 1) - spu_extract(t, 0)); | ||
426 | float dtdy = height * (spu_extract(t, 2) - spu_extract(t, 0)); | ||
427 | #if 0 | ||
428 | /* ideal value */ | ||
429 | float x = dsdx * dsdx + dtdx * dtdx; | ||
430 | float y = dsdy * dsdy + dtdy * dtdy; | ||
431 | float rho = x > y ? x : y; | ||
432 | rho = sqrtf(rho); | ||
433 | #else | ||
434 | /* approximation */ | ||
435 | dsdx = fabsf(dsdx); | ||
436 | dsdy = fabsf(dsdy); | ||
437 | dtdx = fabsf(dtdx); | ||
438 | dtdy = fabsf(dtdy); | ||
439 | float rho = (dsdx + dsdy + dtdx + dtdy) * 0.5; | ||
440 | #endif | ||
441 | float lambda = logf(rho) * 1.442695f; /* compute logbase2(rho) */ | ||
442 | return lambda; | ||
443 | } | ||
444 | |||
445 | |||
446 | /** | ||
447 | * Blend two sets of colors according to weight. | ||
448 | */ | ||
449 | static void | ||
450 | blend_colors(vector float c0[4], const vector float c1[4], float weight) | ||
451 | { | ||
452 | vector float t = spu_splats(weight); | ||
453 | vector float dc0 = spu_sub(c1[0], c0[0]); | ||
454 | vector float dc1 = spu_sub(c1[1], c0[1]); | ||
455 | vector float dc2 = spu_sub(c1[2], c0[2]); | ||
456 | vector float dc3 = spu_sub(c1[3], c0[3]); | ||
457 | c0[0] = spu_madd(dc0, t, c0[0]); | ||
458 | c0[1] = spu_madd(dc1, t, c0[1]); | ||
459 | c0[2] = spu_madd(dc2, t, c0[2]); | ||
460 | c0[3] = spu_madd(dc3, t, c0[3]); | ||
461 | } | ||
462 | |||
463 | |||
464 | /** | ||
465 | * Texture sampling with level of detail selection and possibly mipmap | ||
466 | * interpolation. | ||
467 | */ | ||
468 | void | ||
469 | sample_texture_2d_lod(vector float s, vector float t, | ||
470 | uint unit, uint level_ignored, uint face, | ||
471 | vector float colors[4]) | ||
472 | { | ||
473 | /* | ||
474 | * Note that we're computing a lambda/lod here that's used for all | ||
475 | * four pixels in the quad. | ||
476 | */ | ||
477 | float lambda = compute_lambda_2d(unit, s, t); | ||
478 | |||
479 | (void) face; | ||
480 | (void) level_ignored; | ||
481 | |||
482 | /* apply lod bias */ | ||
483 | lambda += spu.sampler[unit].lod_bias; | ||
484 | |||
485 | /* clamp */ | ||
486 | if (lambda < spu.sampler[unit].min_lod) | ||
487 | lambda = spu.sampler[unit].min_lod; | ||
488 | else if (lambda > spu.sampler[unit].max_lod) | ||
489 | lambda = spu.sampler[unit].max_lod; | ||
490 | |||
491 | if (lambda <= 0.0f) { | ||
492 | /* magnify */ | ||
493 | spu.mag_sample_texture_2d[unit](s, t, unit, 0, face, colors); | ||
494 | } | ||
495 | else { | ||
496 | /* minify */ | ||
497 | if (spu.sampler[unit].min_img_filter == PIPE_TEX_FILTER_LINEAR) { | ||
498 | /* sample two mipmap levels and interpolate */ | ||
499 | int level = (int) lambda; | ||
500 | if (level > (int) spu.texture[unit].max_level) | ||
501 | level = spu.texture[unit].max_level; | ||
502 | spu.min_sample_texture_2d[unit](s, t, unit, level, face, colors); | ||
503 | if (spu.sampler[unit].min_img_filter == PIPE_TEX_FILTER_LINEAR) { | ||
504 | /* sample second mipmap level */ | ||
505 | float weight = lambda - (float) level; | ||
506 | level++; | ||
507 | if (level <= (int) spu.texture[unit].max_level) { | ||
508 | vector float colors2[4]; | ||
509 | spu.min_sample_texture_2d[unit](s, t, unit, level, face, colors2); | ||
510 | blend_colors(colors, colors2, weight); | ||
511 | } | ||
512 | } | ||
513 | } | ||
514 | else { | ||
515 | /* sample one mipmap level */ | ||
516 | int level = (int) (lambda + 0.5f); | ||
517 | if (level > (int) spu.texture[unit].max_level) | ||
518 | level = spu.texture[unit].max_level; | ||
519 | spu.min_sample_texture_2d[unit](s, t, unit, level, face, colors); | ||
520 | } | ||
521 | } | ||
522 | } | ||
523 | |||
524 | |||
525 | /** XXX need a SIMD version of this */ | ||
526 | static unsigned | ||
527 | choose_cube_face(float rx, float ry, float rz, float *newS, float *newT) | ||
528 | { | ||
529 | /* | ||
530 | major axis | ||
531 | direction target sc tc ma | ||
532 | ---------- ------------------------------- --- --- --- | ||
533 | +rx TEXTURE_CUBE_MAP_POSITIVE_X_EXT -rz -ry rx | ||
534 | -rx TEXTURE_CUBE_MAP_NEGATIVE_X_EXT +rz -ry rx | ||
535 | +ry TEXTURE_CUBE_MAP_POSITIVE_Y_EXT +rx +rz ry | ||
536 | -ry TEXTURE_CUBE_MAP_NEGATIVE_Y_EXT +rx -rz ry | ||
537 | +rz TEXTURE_CUBE_MAP_POSITIVE_Z_EXT +rx -ry rz | ||
538 | -rz TEXTURE_CUBE_MAP_NEGATIVE_Z_EXT -rx -ry rz | ||
539 | */ | ||
540 | const float arx = fabsf(rx); | ||
541 | const float ary = fabsf(ry); | ||
542 | const float arz = fabsf(rz); | ||
543 | unsigned face; | ||
544 | float sc, tc, ma; | ||
545 | |||
546 | if (arx > ary && arx > arz) { | ||
547 | if (rx >= 0.0F) { | ||
548 | face = PIPE_TEX_FACE_POS_X; | ||
549 | sc = -rz; | ||
550 | tc = -ry; | ||
551 | ma = arx; | ||
552 | } | ||
553 | else { | ||
554 | face = PIPE_TEX_FACE_NEG_X; | ||
555 | sc = rz; | ||
556 | tc = -ry; | ||
557 | ma = arx; | ||
558 | } | ||
559 | } | ||
560 | else if (ary > arx && ary > arz) { | ||
561 | if (ry >= 0.0F) { | ||
562 | face = PIPE_TEX_FACE_POS_Y; | ||
563 | sc = rx; | ||
564 | tc = rz; | ||
565 | ma = ary; | ||
566 | } | ||
567 | else { | ||
568 | face = PIPE_TEX_FACE_NEG_Y; | ||
569 | sc = rx; | ||
570 | tc = -rz; | ||
571 | ma = ary; | ||
572 | } | ||
573 | } | ||
574 | else { | ||
575 | if (rz > 0.0F) { | ||
576 | face = PIPE_TEX_FACE_POS_Z; | ||
577 | sc = rx; | ||
578 | tc = -ry; | ||
579 | ma = arz; | ||
580 | } | ||
581 | else { | ||
582 | face = PIPE_TEX_FACE_NEG_Z; | ||
583 | sc = -rx; | ||
584 | tc = -ry; | ||
585 | ma = arz; | ||
586 | } | ||
587 | } | ||
588 | |||
589 | *newS = (sc / ma + 1.0F) * 0.5F; | ||
590 | *newT = (tc / ma + 1.0F) * 0.5F; | ||
591 | |||
592 | return face; | ||
593 | } | ||
594 | |||
595 | |||
596 | |||
597 | void | ||
598 | sample_texture_cube(vector float s, vector float t, vector float r, | ||
599 | uint unit, vector float colors[4]) | ||
600 | { | ||
601 | uint p, faces[4], level = 0; | ||
602 | float newS[4], newT[4]; | ||
603 | |||
604 | /* Compute cube faces referenced by the four sets of texcoords. | ||
605 | * XXX we should SIMD-ize this. | ||
606 | */ | ||
607 | for (p = 0; p < 4; p++) { | ||
608 | float rx = spu_extract(s, p); | ||
609 | float ry = spu_extract(t, p); | ||
610 | float rz = spu_extract(r, p); | ||
611 | faces[p] = choose_cube_face(rx, ry, rz, &newS[p], &newT[p]); | ||
612 | } | ||
613 | |||
614 | if (faces[0] == faces[1] && | ||
615 | faces[0] == faces[2] && | ||
616 | faces[0] == faces[3]) { | ||
617 | /* GOOD! All four texcoords refer to the same cube face */ | ||
618 | s = (vector float) {newS[0], newS[1], newS[2], newS[3]}; | ||
619 | t = (vector float) {newT[0], newT[1], newT[2], newT[3]}; | ||
620 | spu.sample_texture_2d[unit](s, t, unit, level, faces[0], colors); | ||
621 | } | ||
622 | else { | ||
623 | /* BAD! The four texcoords refer to different faces */ | ||
624 | for (p = 0; p < 4; p++) { | ||
625 | vector float c[4]; | ||
626 | |||
627 | spu.sample_texture_2d[unit](spu_splats(newS[p]), spu_splats(newT[p]), | ||
628 | unit, level, faces[p], c); | ||
629 | |||
630 | float red = spu_extract(c[0], p); | ||
631 | float green = spu_extract(c[1], p); | ||
632 | float blue = spu_extract(c[2], p); | ||
633 | float alpha = spu_extract(c[3], p); | ||
634 | |||
635 | colors[0] = spu_insert(red, colors[0], p); | ||
636 | colors[1] = spu_insert(green, colors[1], p); | ||
637 | colors[2] = spu_insert(blue, colors[2], p); | ||
638 | colors[3] = spu_insert(alpha, colors[3], p); | ||
639 | } | ||
640 | } | ||
641 | } | ||
diff --git a/src/gallium/drivers/cell/spu/spu_texture.h b/src/gallium/drivers/cell/spu/spu_texture.h deleted file mode 100644 index 7b75b007b5a..00000000000 --- a/src/gallium/drivers/cell/spu/spu_texture.h +++ /dev/null | |||
@@ -1,67 +0,0 @@ | |||
1 | /************************************************************************** | ||
2 | * | ||
3 | * Copyright 2008 Tungsten Graphics, Inc., Cedar Park, Texas. | ||
4 | * All Rights Reserved. | ||
5 | * | ||
6 | * Permission is hereby granted, free of charge, to any person obtaining a | ||
7 | * copy of this software and associated documentation files (the | ||
8 | * "Software"), to deal in the Software without restriction, including | ||
9 | * without limitation the rights to use, copy, modify, merge, publish, | ||
10 | * distribute, sub license, and/or sell copies of the Software, and to | ||
11 | * permit persons to whom the Software is furnished to do so, subject to | ||
12 | * the following conditions: | ||
13 | * | ||
14 | * The above copyright notice and this permission notice (including the | ||
15 | * next paragraph) shall be included in all copies or substantial portions | ||
16 | * of the Software. | ||
17 | * | ||
18 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS | ||
19 | * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF | ||
20 | * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. | ||
21 | * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR | ||
22 | * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, | ||
23 | * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE | ||
24 | * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. | ||
25 | * | ||
26 | **************************************************************************/ | ||
27 | |||
28 | #ifndef SPU_TEXTURE_H | ||
29 | #define SPU_TEXTURE_H | ||
30 | |||
31 | |||
32 | #include "pipe/p_compiler.h" | ||
33 | |||
34 | |||
35 | extern void | ||
36 | invalidate_tex_cache(void); | ||
37 | |||
38 | |||
39 | extern void | ||
40 | sample_texture_2d_nearest(vector float s, vector float t, | ||
41 | uint unit, uint level, uint face, | ||
42 | vector float colors[4]); | ||
43 | |||
44 | |||
45 | extern void | ||
46 | sample_texture_2d_bilinear(vector float s, vector float t, | ||
47 | uint unit, uint level, uint face, | ||
48 | vector float colors[4]); | ||
49 | |||
50 | extern void | ||
51 | sample_texture_2d_bilinear_int(vector float s, vector float t, | ||
52 | uint unit, uint level, uint face, | ||
53 | vector float colors[4]); | ||
54 | |||
55 | |||
56 | extern void | ||
57 | sample_texture_2d_lod(vector float s, vector float t, | ||
58 | uint unit, uint level, uint face, | ||
59 | vector float colors[4]); | ||
60 | |||
61 | |||
62 | extern void | ||
63 | sample_texture_cube(vector float s, vector float t, vector float r, | ||
64 | uint unit, vector float colors[4]); | ||
65 | |||
66 | |||
67 | #endif /* SPU_TEXTURE_H */ | ||
diff --git a/src/gallium/drivers/cell/spu/spu_tgsi_exec.h b/src/gallium/drivers/cell/spu/spu_tgsi_exec.h deleted file mode 100644 index 6f2a3d30b91..00000000000 --- a/src/gallium/drivers/cell/spu/spu_tgsi_exec.h +++ /dev/null | |||
@@ -1,158 +0,0 @@ | |||
1 | /************************************************************************** | ||
2 | * | ||
3 | * Copyright 2007-2008 Tungsten Graphics, Inc., Cedar Park, Texas. | ||
4 | * All Rights Reserved. | ||
5 | * Copyright 2009-2010 VMware, Inc. All rights Reserved. | ||
6 | * | ||
7 | * Permission is hereby granted, free of charge, to any person obtaining a | ||
8 | * copy of this software and associated documentation files (the | ||
9 | * "Software"), to deal in the Software without restriction, including | ||
10 | * without limitation the rights to use, copy, modify, merge, publish, | ||
11 | * distribute, sub license, and/or sell copies of the Software, and to | ||
12 | * permit persons to whom the Software is furnished to do so, subject to | ||
13 | * the following conditions: | ||
14 | * | ||
15 | * The above copyright notice and this permission notice (including the | ||
16 | * next paragraph) shall be included in all copies or substantial portions | ||
17 | * of the Software. | ||
18 | * | ||
19 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS | ||
20 | * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF | ||
21 | * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. | ||
22 | * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR | ||
23 | * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, | ||
24 | * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE | ||
25 | * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. | ||
26 | * | ||
27 | **************************************************************************/ | ||
28 | |||
29 | #ifndef SPU_TGSI_EXEC_H | ||
30 | #define SPU_TGSI_EXEC_H | ||
31 | |||
32 | #include "pipe/p_compiler.h" | ||
33 | #include "pipe/p_state.h" | ||
34 | |||
35 | #if defined __cplusplus | ||
36 | extern "C" { | ||
37 | #endif | ||
38 | |||
39 | |||
40 | #define NUM_CHANNELS 4 /* R,G,B,A */ | ||
41 | #define QUAD_SIZE 4 /* 4 pixel/quad */ | ||
42 | |||
43 | |||
44 | |||
45 | #define TGSI_EXEC_NUM_TEMPS 128 | ||
46 | #define TGSI_EXEC_NUM_IMMEDIATES 256 | ||
47 | |||
48 | /* | ||
49 | * Locations of various utility registers (_I = Index, _C = Channel) | ||
50 | */ | ||
51 | #define TGSI_EXEC_TEMP_00000000_IDX (TGSI_EXEC_NUM_TEMPS + 0) | ||
52 | #define TGSI_EXEC_TEMP_00000000_CHAN 0 | ||
53 | |||
54 | #define TGSI_EXEC_TEMP_7FFFFFFF_IDX (TGSI_EXEC_NUM_TEMPS + 0) | ||
55 | #define TGSI_EXEC_TEMP_7FFFFFFF_CHAN 1 | ||
56 | |||
57 | #define TGSI_EXEC_TEMP_80000000_IDX (TGSI_EXEC_NUM_TEMPS + 0) | ||
58 | #define TGSI_EXEC_TEMP_80000000_CHAN 2 | ||
59 | |||
60 | #define TGSI_EXEC_TEMP_FFFFFFFF_IDX (TGSI_EXEC_NUM_TEMPS + 0) | ||
61 | #define TGSI_EXEC_TEMP_FFFFFFFF_CHAN 3 | ||
62 | |||
63 | #define TGSI_EXEC_TEMP_ONE_IDX (TGSI_EXEC_NUM_TEMPS + 1) | ||
64 | #define TGSI_EXEC_TEMP_ONE_CHAN 0 | ||
65 | |||
66 | #define TGSI_EXEC_TEMP_TWO_IDX (TGSI_EXEC_NUM_TEMPS + 1) | ||
67 | #define TGSI_EXEC_TEMP_TWO_CHAN 1 | ||
68 | |||
69 | #define TGSI_EXEC_TEMP_128_IDX (TGSI_EXEC_NUM_TEMPS + 1) | ||
70 | #define TGSI_EXEC_TEMP_128_CHAN 2 | ||
71 | |||
72 | #define TGSI_EXEC_TEMP_MINUS_128_IDX (TGSI_EXEC_NUM_TEMPS + 1) | ||
73 | #define TGSI_EXEC_TEMP_MINUS_128_CHAN 3 | ||
74 | |||
75 | #define TGSI_EXEC_TEMP_KILMASK_IDX (TGSI_EXEC_NUM_TEMPS + 2) | ||
76 | #define TGSI_EXEC_TEMP_KILMASK_CHAN 0 | ||
77 | |||
78 | #define TGSI_EXEC_TEMP_OUTPUT_IDX (TGSI_EXEC_NUM_TEMPS + 2) | ||
79 | #define TGSI_EXEC_TEMP_OUTPUT_CHAN 1 | ||
80 | |||
81 | #define TGSI_EXEC_TEMP_PRIMITIVE_IDX (TGSI_EXEC_NUM_TEMPS + 2) | ||
82 | #define TGSI_EXEC_TEMP_PRIMITIVE_CHAN 2 | ||
83 | |||
84 | /* NVIDIA condition code (CC) vector | ||
85 | */ | ||
86 | #define TGSI_EXEC_CC_GT 0x01 | ||
87 | #define TGSI_EXEC_CC_EQ 0x02 | ||
88 | #define TGSI_EXEC_CC_LT 0x04 | ||
89 | #define TGSI_EXEC_CC_UN 0x08 | ||
90 | |||
91 | #define TGSI_EXEC_CC_X_MASK 0x000000ff | ||
92 | #define TGSI_EXEC_CC_X_SHIFT 0 | ||
93 | #define TGSI_EXEC_CC_Y_MASK 0x0000ff00 | ||
94 | #define TGSI_EXEC_CC_Y_SHIFT 8 | ||
95 | #define TGSI_EXEC_CC_Z_MASK 0x00ff0000 | ||
96 | #define TGSI_EXEC_CC_Z_SHIFT 16 | ||
97 | #define TGSI_EXEC_CC_W_MASK 0xff000000 | ||
98 | #define TGSI_EXEC_CC_W_SHIFT 24 | ||
99 | |||
100 | #define TGSI_EXEC_TEMP_CC_IDX (TGSI_EXEC_NUM_TEMPS + 2) | ||
101 | #define TGSI_EXEC_TEMP_CC_CHAN 3 | ||
102 | |||
103 | #define TGSI_EXEC_TEMP_THREE_IDX (TGSI_EXEC_NUM_TEMPS + 3) | ||
104 | #define TGSI_EXEC_TEMP_THREE_CHAN 0 | ||
105 | |||
106 | #define TGSI_EXEC_TEMP_HALF_IDX (TGSI_EXEC_NUM_TEMPS + 3) | ||
107 | #define TGSI_EXEC_TEMP_HALF_CHAN 1 | ||
108 | |||
109 | /* execution mask, each value is either 0 or ~0 */ | ||
110 | #define TGSI_EXEC_MASK_IDX (TGSI_EXEC_NUM_TEMPS + 3) | ||
111 | #define TGSI_EXEC_MASK_CHAN 2 | ||
112 | |||
113 | /* 4 register buffer for various purposes */ | ||
114 | #define TGSI_EXEC_TEMP_R0 (TGSI_EXEC_NUM_TEMPS + 4) | ||
115 | #define TGSI_EXEC_NUM_TEMP_R 4 | ||
116 | |||
117 | #define TGSI_EXEC_TEMP_ADDR (TGSI_EXEC_NUM_TEMPS + 8) | ||
118 | #define TGSI_EXEC_NUM_ADDRS 1 | ||
119 | |||
120 | /* predicate register */ | ||
121 | #define TGSI_EXEC_TEMP_P0 (TGSI_EXEC_NUM_TEMPS + 9) | ||
122 | #define TGSI_EXEC_NUM_PREDS 1 | ||
123 | |||
124 | #define TGSI_EXEC_NUM_TEMP_EXTRAS 10 | ||
125 | |||
126 | |||
127 | |||
128 | #define TGSI_EXEC_MAX_NESTING 32 | ||
129 | #define TGSI_EXEC_MAX_COND_NESTING TGSI_EXEC_MAX_NESTING | ||
130 | #define TGSI_EXEC_MAX_LOOP_NESTING TGSI_EXEC_MAX_NESTING | ||
131 | #define TGSI_EXEC_MAX_SWITCH_NESTING TGSI_EXEC_MAX_NESTING | ||
132 | #define TGSI_EXEC_MAX_CALL_NESTING TGSI_EXEC_MAX_NESTING | ||
133 | |||
134 | /* The maximum number of input attributes per vertex. For 2D | ||
135 | * input register files, this is the stride between two 1D | ||
136 | * arrays. | ||
137 | */ | ||
138 | #define TGSI_EXEC_MAX_INPUT_ATTRIBS 17 | ||
139 | |||
140 | /* The maximum number of constant vectors per constant buffer. | ||
141 | */ | ||
142 | #define TGSI_EXEC_MAX_CONST_BUFFER 4096 | ||
143 | |||
144 | /* The maximum number of vertices per primitive */ | ||
145 | #define TGSI_MAX_PRIM_VERTICES 6 | ||
146 | |||
147 | /* The maximum number of primitives to be generated */ | ||
148 | #define TGSI_MAX_PRIMITIVES 64 | ||
149 | |||
150 | /* The maximum total number of vertices */ | ||
151 | #define TGSI_MAX_TOTAL_VERTICES (TGSI_MAX_PRIM_VERTICES * TGSI_MAX_PRIMITIVES * PIPE_MAX_ATTRIBS) | ||
152 | |||
153 | |||
154 | #if defined __cplusplus | ||
155 | } /* extern "C" */ | ||
156 | #endif | ||
157 | |||
158 | #endif /* TGSI_EXEC_H */ | ||
diff --git a/src/gallium/drivers/cell/spu/spu_tile.c b/src/gallium/drivers/cell/spu/spu_tile.c deleted file mode 100644 index 6905015a483..00000000000 --- a/src/gallium/drivers/cell/spu/spu_tile.c +++ /dev/null | |||
@@ -1,126 +0,0 @@ | |||
1 | /************************************************************************** | ||
2 | * | ||
3 | * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas. | ||
4 | * All Rights Reserved. | ||
5 | * | ||
6 | * Permission is hereby granted, free of charge, to any person obtaining a | ||
7 | * copy of this software and associated documentation files (the | ||
8 | * "Software"), to deal in the Software without restriction, including | ||
9 | * without limitation the rights to use, copy, modify, merge, publish, | ||
10 | * distribute, sub license, and/or sell copies of the Software, and to | ||
11 | * permit persons to whom the Software is furnished to do so, subject to | ||
12 | * the following conditions: | ||
13 | * | ||
14 | * The above copyright notice and this permission notice (including the | ||
15 | * next paragraph) shall be included in all copies or substantial portions | ||
16 | * of the Software. | ||
17 | * | ||
18 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS | ||
19 | * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF | ||
20 | * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. | ||
21 | * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR | ||
22 | * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, | ||
23 | * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE | ||
24 | * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. | ||
25 | * | ||
26 | **************************************************************************/ | ||
27 | |||
28 | |||
29 | |||
30 | #include "spu_tile.h" | ||
31 | #include "spu_main.h" | ||
32 | |||
33 | |||
34 | /** | ||
35 | * Get tile of color or Z values from main memory, put into SPU memory. | ||
36 | */ | ||
37 | void | ||
38 | get_tile(uint tx, uint ty, tile_t *tile, int tag, int zBuf) | ||
39 | { | ||
40 | const uint offset = ty * spu.fb.width_tiles + tx; | ||
41 | const uint bytesPerTile = TILE_SIZE * TILE_SIZE * (zBuf ? spu.fb.zsize : 4); | ||
42 | const ubyte *src = zBuf ? spu.fb.depth_start : spu.fb.color_start; | ||
43 | |||
44 | src += offset * bytesPerTile; | ||
45 | |||
46 | ASSERT(tx < spu.fb.width_tiles); | ||
47 | ASSERT(ty < spu.fb.height_tiles); | ||
48 | ASSERT_ALIGN16(tile); | ||
49 | /* | ||
50 | printf("get_tile: dest: %p src: 0x%x size: %d\n", | ||
51 | tile, (unsigned int) src, bytesPerTile); | ||
52 | */ | ||
53 | mfc_get(tile->ui, /* dest in local memory */ | ||
54 | (unsigned int) src, /* src in main memory */ | ||
55 | bytesPerTile, | ||
56 | tag, | ||
57 | 0, /* tid */ | ||
58 | 0 /* rid */); | ||
59 | } | ||
60 | |||
61 | |||
62 | /** | ||
63 | * Move tile of color or Z values from SPU memory to main memory. | ||
64 | */ | ||
65 | void | ||
66 | put_tile(uint tx, uint ty, const tile_t *tile, int tag, int zBuf) | ||
67 | { | ||
68 | const uint offset = ty * spu.fb.width_tiles + tx; | ||
69 | const uint bytesPerTile = TILE_SIZE * TILE_SIZE * (zBuf ? spu.fb.zsize : 4); | ||
70 | ubyte *dst = zBuf ? spu.fb.depth_start : spu.fb.color_start; | ||
71 | |||
72 | dst += offset * bytesPerTile; | ||
73 | |||
74 | ASSERT(tx < spu.fb.width_tiles); | ||
75 | ASSERT(ty < spu.fb.height_tiles); | ||
76 | ASSERT_ALIGN16(tile); | ||
77 | /* | ||
78 | printf("SPU %u: put_tile: src: %p dst: 0x%x size: %d\n", | ||
79 | spu.init.id, | ||
80 | tile, (unsigned int) dst, bytesPerTile); | ||
81 | */ | ||
82 | mfc_put((void *) tile->ui, /* src in local memory */ | ||
83 | (unsigned int) dst, /* dst in main memory */ | ||
84 | bytesPerTile, | ||
85 | tag, | ||
86 | 0, /* tid */ | ||
87 | 0 /* rid */); | ||
88 | } | ||
89 | |||
90 | |||
91 | /** | ||
92 | * For tiles whose status is TILE_STATUS_CLEAR, write solid-filled | ||
93 | * tiles back to the main framebuffer. | ||
94 | */ | ||
95 | void | ||
96 | really_clear_tiles(uint surfaceIndex) | ||
97 | { | ||
98 | const uint num_tiles = spu.fb.width_tiles * spu.fb.height_tiles; | ||
99 | uint i; | ||
100 | |||
101 | if (surfaceIndex == 0) { | ||
102 | clear_c_tile(&spu.ctile); | ||
103 | |||
104 | for (i = spu.init.id; i < num_tiles; i += spu.init.num_spus) { | ||
105 | uint tx = i % spu.fb.width_tiles; | ||
106 | uint ty = i / spu.fb.width_tiles; | ||
107 | if (spu.ctile_status[ty][tx] == TILE_STATUS_CLEAR) { | ||
108 | put_tile(tx, ty, &spu.ctile, TAG_SURFACE_CLEAR, 0); | ||
109 | } | ||
110 | } | ||
111 | } | ||
112 | else { | ||
113 | clear_z_tile(&spu.ztile); | ||
114 | |||
115 | for (i = spu.init.id; i < num_tiles; i += spu.init.num_spus) { | ||
116 | uint tx = i % spu.fb.width_tiles; | ||
117 | uint ty = i / spu.fb.width_tiles; | ||
118 | if (spu.ztile_status[ty][tx] == TILE_STATUS_CLEAR) | ||
119 | put_tile(tx, ty, &spu.ctile, TAG_SURFACE_CLEAR, 1); | ||
120 | } | ||
121 | } | ||
122 | |||
123 | #if 0 | ||
124 | wait_on_mask(1 << TAG_SURFACE_CLEAR); | ||
125 | #endif | ||
126 | } | ||
diff --git a/src/gallium/drivers/cell/spu/spu_tile.h b/src/gallium/drivers/cell/spu/spu_tile.h deleted file mode 100644 index 7bfb52be8f3..00000000000 --- a/src/gallium/drivers/cell/spu/spu_tile.h +++ /dev/null | |||
@@ -1,75 +0,0 @@ | |||
1 | /************************************************************************** | ||
2 | * | ||
3 | * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas. | ||
4 | * All Rights Reserved. | ||
5 | * | ||
6 | * Permission is hereby granted, free of charge, to any person obtaining a | ||
7 | * copy of this software and associated documentation files (the | ||
8 | * "Software"), to deal in the Software without restriction, including | ||
9 | * without limitation the rights to use, copy, modify, merge, publish, | ||
10 | * distribute, sub license, and/or sell copies of the Software, and to | ||
11 | * permit persons to whom the Software is furnished to do so, subject to | ||
12 | * the following conditions: | ||
13 | * | ||
14 | * The above copyright notice and this permission notice (including the | ||
15 | * next paragraph) shall be included in all copies or substantial portions | ||
16 | * of the Software. | ||
17 | * | ||
18 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS | ||
19 | * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF | ||
20 | * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. | ||
21 | * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR | ||
22 | * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, | ||
23 | * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE | ||
24 | * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. | ||
25 | * | ||
26 | **************************************************************************/ | ||
27 | |||
28 | #ifndef SPU_TILE_H | ||
29 | #define SPU_TILE_H | ||
30 | |||
31 | |||
32 | #include <libmisc.h> | ||
33 | #include <spu_mfcio.h> | ||
34 | #include "spu_main.h" | ||
35 | #include "cell/common.h" | ||
36 | |||
37 | |||
38 | |||
39 | extern void | ||
40 | get_tile(uint tx, uint ty, tile_t *tile, int tag, int zBuf); | ||
41 | |||
42 | extern void | ||
43 | put_tile(uint tx, uint ty, const tile_t *tile, int tag, int zBuf); | ||
44 | |||
45 | extern void | ||
46 | really_clear_tiles(uint surfaceIndex); | ||
47 | |||
48 | |||
49 | static INLINE void | ||
50 | clear_c_tile(tile_t *ctile) | ||
51 | { | ||
52 | memset32((uint*) ctile->ui, | ||
53 | spu.fb.color_clear_value, | ||
54 | TILE_SIZE * TILE_SIZE); | ||
55 | } | ||
56 | |||
57 | |||
58 | static INLINE void | ||
59 | clear_z_tile(tile_t *ztile) | ||
60 | { | ||
61 | if (spu.fb.zsize == 2) { | ||
62 | memset16((ushort*) ztile->us, | ||
63 | spu.fb.depth_clear_value, | ||
64 | TILE_SIZE * TILE_SIZE); | ||
65 | } | ||
66 | else { | ||
67 | ASSERT(spu.fb.zsize != 0); | ||
68 | memset32((uint*) ztile->ui, | ||
69 | spu.fb.depth_clear_value, | ||
70 | TILE_SIZE * TILE_SIZE); | ||
71 | } | ||
72 | } | ||
73 | |||
74 | |||
75 | #endif /* SPU_TILE_H */ | ||
diff --git a/src/gallium/drivers/cell/spu/spu_tri.c b/src/gallium/drivers/cell/spu/spu_tri.c deleted file mode 100644 index efeebca27bb..00000000000 --- a/src/gallium/drivers/cell/spu/spu_tri.c +++ /dev/null | |||
@@ -1,843 +0,0 @@ | |||
1 | /************************************************************************** | ||
2 | * | ||
3 | * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas. | ||
4 | * All Rights Reserved. | ||
5 | * | ||
6 | * Permission is hereby granted, free of charge, to any person obtaining a | ||
7 | * copy of this software and associated documentation files (the | ||
8 | * "Software"), to deal in the Software without restriction, including | ||
9 | * without limitation the rights to use, copy, modify, merge, publish, | ||
10 | * distribute, sub license, and/or sell copies of the Software, and to | ||
11 | * permit persons to whom the Software is furnished to do so, subject to | ||
12 | * the following conditions: | ||
13 | * | ||
14 | * The above copyright notice and this permission notice (including the | ||
15 | * next paragraph) shall be included in all copies or substantial portions | ||
16 | * of the Software. | ||
17 | * | ||
18 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS | ||
19 | * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF | ||
20 | * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. | ||
21 | * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR | ||
22 | * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, | ||
23 | * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE | ||
24 | * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. | ||
25 | * | ||
26 | **************************************************************************/ | ||
27 | |||
28 | /** | ||
29 | * Triangle rendering within a tile. | ||
30 | */ | ||
31 | |||
32 | #include "pipe/p_compiler.h" | ||
33 | #include "pipe/p_format.h" | ||
34 | #include "util/u_math.h" | ||
35 | #include "spu_colorpack.h" | ||
36 | #include "spu_main.h" | ||
37 | #include "spu_shuffle.h" | ||
38 | #include "spu_texture.h" | ||
39 | #include "spu_tile.h" | ||
40 | #include "spu_tri.h" | ||
41 | |||
42 | |||
43 | /** Masks are uint[4] vectors with each element being 0 or 0xffffffff */ | ||
44 | typedef vector unsigned int mask_t; | ||
45 | |||
46 | |||
47 | |||
48 | /** | ||
49 | * Simplified types taken from other parts of Gallium | ||
50 | */ | ||
51 | struct vertex_header { | ||
52 | vector float data[1]; | ||
53 | }; | ||
54 | |||
55 | |||
56 | |||
57 | /* XXX fix this */ | ||
58 | #undef CEILF | ||
59 | #define CEILF(X) ((float) (int) ((X) + 0.99999f)) | ||
60 | |||
61 | |||
62 | #define QUAD_TOP_LEFT 0 | ||
63 | #define QUAD_TOP_RIGHT 1 | ||
64 | #define QUAD_BOTTOM_LEFT 2 | ||
65 | #define QUAD_BOTTOM_RIGHT 3 | ||
66 | #define MASK_TOP_LEFT (1 << QUAD_TOP_LEFT) | ||
67 | #define MASK_TOP_RIGHT (1 << QUAD_TOP_RIGHT) | ||
68 | #define MASK_BOTTOM_LEFT (1 << QUAD_BOTTOM_LEFT) | ||
69 | #define MASK_BOTTOM_RIGHT (1 << QUAD_BOTTOM_RIGHT) | ||
70 | #define MASK_ALL 0xf | ||
71 | |||
72 | |||
73 | #define CHAN0 0 | ||
74 | #define CHAN1 1 | ||
75 | #define CHAN2 2 | ||
76 | #define CHAN3 3 | ||
77 | |||
78 | |||
79 | #define DEBUG_VERTS 0 | ||
80 | |||
81 | /** | ||
82 | * Triangle edge info | ||
83 | */ | ||
84 | struct edge { | ||
85 | union { | ||
86 | struct { | ||
87 | float dx; /**< X(v1) - X(v0), used only during setup */ | ||
88 | float dy; /**< Y(v1) - Y(v0), used only during setup */ | ||
89 | }; | ||
90 | vec_float4 ds; /**< vector accessor for dx and dy */ | ||
91 | }; | ||
92 | float dxdy; /**< dx/dy */ | ||
93 | float sx, sy; /**< first sample point coord */ | ||
94 | int lines; /**< number of lines on this edge */ | ||
95 | }; | ||
96 | |||
97 | |||
98 | struct interp_coef | ||
99 | { | ||
100 | vector float a0; | ||
101 | vector float dadx; | ||
102 | vector float dady; | ||
103 | }; | ||
104 | |||
105 | |||
106 | /** | ||
107 | * Triangle setup info (derived from draw_stage). | ||
108 | * Also used for line drawing (taking some liberties). | ||
109 | */ | ||
110 | struct setup_stage { | ||
111 | |||
112 | /* Vertices are just an array of floats making up each attribute in | ||
113 | * turn. Currently fixed at 4 floats, but should change in time. | ||
114 | * Codegen will help cope with this. | ||
115 | */ | ||
116 | union { | ||
117 | struct { | ||
118 | const struct vertex_header *vmin; | ||
119 | const struct vertex_header *vmid; | ||
120 | const struct vertex_header *vmax; | ||
121 | const struct vertex_header *vprovoke; | ||
122 | }; | ||
123 | qword vertex_headers; | ||
124 | }; | ||
125 | |||
126 | struct edge ebot; | ||
127 | struct edge etop; | ||
128 | struct edge emaj; | ||
129 | |||
130 | float oneOverArea; /* XXX maybe make into vector? */ | ||
131 | |||
132 | uint facing; | ||
133 | |||
134 | uint tx, ty; /**< position of current tile (x, y) */ | ||
135 | |||
136 | union { | ||
137 | struct { | ||
138 | int cliprect_minx; | ||
139 | int cliprect_miny; | ||
140 | int cliprect_maxx; | ||
141 | int cliprect_maxy; | ||
142 | }; | ||
143 | qword cliprect; | ||
144 | }; | ||
145 | |||
146 | struct interp_coef coef[PIPE_MAX_SHADER_INPUTS]; | ||
147 | |||
148 | struct { | ||
149 | vec_int4 quad; /**< [0] = row0, [1] = row1; {left[0],left[1],right[0],right[1]} */ | ||
150 | int y; | ||
151 | unsigned y_flags; | ||
152 | unsigned mask; /**< mask of MASK_BOTTOM/TOP_LEFT/RIGHT bits */ | ||
153 | } span; | ||
154 | }; | ||
155 | |||
156 | |||
157 | static struct setup_stage setup; | ||
158 | |||
159 | |||
160 | static INLINE vector float | ||
161 | splatx(vector float v) | ||
162 | { | ||
163 | return spu_splats(spu_extract(v, CHAN0)); | ||
164 | } | ||
165 | |||
166 | static INLINE vector float | ||
167 | splaty(vector float v) | ||
168 | { | ||
169 | return spu_splats(spu_extract(v, CHAN1)); | ||
170 | } | ||
171 | |||
172 | static INLINE vector float | ||
173 | splatz(vector float v) | ||
174 | { | ||
175 | return spu_splats(spu_extract(v, CHAN2)); | ||
176 | } | ||
177 | |||
178 | static INLINE vector float | ||
179 | splatw(vector float v) | ||
180 | { | ||
181 | return spu_splats(spu_extract(v, CHAN3)); | ||
182 | } | ||
183 | |||
184 | |||
185 | /** | ||
186 | * Setup fragment shader inputs by evaluating triangle's vertex | ||
187 | * attribute coefficient info. | ||
188 | * \param x quad x pos | ||
189 | * \param y quad y pos | ||
190 | * \param fragZ returns quad Z values | ||
191 | * \param fragInputs returns fragment program inputs | ||
192 | * Note: this code could be incorporated into the fragment program | ||
193 | * itself to avoid the loop and switch. | ||
194 | */ | ||
195 | static void | ||
196 | eval_inputs(float x, float y, vector float *fragZ, vector float fragInputs[]) | ||
197 | { | ||
198 | static const vector float deltaX = (const vector float) {0, 1, 0, 1}; | ||
199 | static const vector float deltaY = (const vector float) {0, 0, 1, 1}; | ||
200 | |||
201 | const uint posSlot = 0; | ||
202 | const vector float pos = setup.coef[posSlot].a0; | ||
203 | const vector float dposdx = setup.coef[posSlot].dadx; | ||
204 | const vector float dposdy = setup.coef[posSlot].dady; | ||
205 | const vector float fragX = spu_splats(x) + deltaX; | ||
206 | const vector float fragY = spu_splats(y) + deltaY; | ||
207 | vector float fragW, wInv; | ||
208 | uint i; | ||
209 | |||
210 | *fragZ = splatz(pos) + fragX * splatz(dposdx) + fragY * splatz(dposdy); | ||
211 | fragW = splatw(pos) + fragX * splatw(dposdx) + fragY * splatw(dposdy); | ||
212 | wInv = spu_re(fragW); /* 1 / w */ | ||
213 | |||
214 | /* loop over fragment program inputs */ | ||
215 | for (i = 0; i < spu.vertex_info.num_attribs; i++) { | ||
216 | uint attr = i + 1; | ||
217 | enum interp_mode interp = spu.vertex_info.attrib[attr].interp_mode; | ||
218 | |||
219 | /* constant term */ | ||
220 | vector float a0 = setup.coef[attr].a0; | ||
221 | vector float r0 = splatx(a0); | ||
222 | vector float r1 = splaty(a0); | ||
223 | vector float r2 = splatz(a0); | ||
224 | vector float r3 = splatw(a0); | ||
225 | |||
226 | if (interp == INTERP_LINEAR || interp == INTERP_PERSPECTIVE) { | ||
227 | /* linear term */ | ||
228 | vector float dadx = setup.coef[attr].dadx; | ||
229 | vector float dady = setup.coef[attr].dady; | ||
230 | /* Use SPU intrinsics here to get slightly better code. | ||
231 | * originally: r0 += fragX * splatx(dadx) + fragY * splatx(dady); | ||
232 | */ | ||
233 | r0 = spu_madd(fragX, splatx(dadx), spu_madd(fragY, splatx(dady), r0)); | ||
234 | r1 = spu_madd(fragX, splaty(dadx), spu_madd(fragY, splaty(dady), r1)); | ||
235 | r2 = spu_madd(fragX, splatz(dadx), spu_madd(fragY, splatz(dady), r2)); | ||
236 | r3 = spu_madd(fragX, splatw(dadx), spu_madd(fragY, splatw(dady), r3)); | ||
237 | if (interp == INTERP_PERSPECTIVE) { | ||
238 | /* perspective term */ | ||
239 | r0 *= wInv; | ||
240 | r1 *= wInv; | ||
241 | r2 *= wInv; | ||
242 | r3 *= wInv; | ||
243 | } | ||
244 | } | ||
245 | fragInputs[CHAN0] = r0; | ||
246 | fragInputs[CHAN1] = r1; | ||
247 | fragInputs[CHAN2] = r2; | ||
248 | fragInputs[CHAN3] = r3; | ||
249 | fragInputs += 4; | ||
250 | } | ||
251 | } | ||
252 | |||
253 | |||
254 | /** | ||
255 | * Emit a quad (pass to next stage). No clipping is done. | ||
256 | * Note: about 1/5 to 1/7 of the time, mask is zero and this function | ||
257 | * should be skipped. But adding the test for that slows things down | ||
258 | * overall. | ||
259 | */ | ||
260 | static INLINE void | ||
261 | emit_quad( int x, int y, mask_t mask) | ||
262 | { | ||
263 | /* If any bits in mask are set... */ | ||
264 | if (spu_extract(spu_orx(mask), 0)) { | ||
265 | const int ix = x - setup.cliprect_minx; | ||
266 | const int iy = y - setup.cliprect_miny; | ||
267 | |||
268 | spu.cur_ctile_status = TILE_STATUS_DIRTY; | ||
269 | spu.cur_ztile_status = TILE_STATUS_DIRTY; | ||
270 | |||
271 | { | ||
272 | /* | ||
273 | * Run fragment shader, execute per-fragment ops, update fb/tile. | ||
274 | */ | ||
275 | vector float inputs[4*4], outputs[2*4]; | ||
276 | vector unsigned int kill_mask; | ||
277 | vector float fragZ; | ||
278 | |||
279 | eval_inputs((float) x, (float) y, &fragZ, inputs); | ||
280 | |||
281 | ASSERT(spu.fragment_program); | ||
282 | ASSERT(spu.fragment_ops); | ||
283 | |||
284 | /* Execute the current fragment program */ | ||
285 | kill_mask = spu.fragment_program(inputs, outputs, spu.constants); | ||
286 | |||
287 | mask = spu_andc(mask, kill_mask); | ||
288 | |||
289 | /* Execute per-fragment/quad operations, including: | ||
290 | * alpha test, z test, stencil test, blend and framebuffer writing. | ||
291 | * Note that there are two different fragment operations functions | ||
292 | * that can be called, one for front-facing fragments, and one | ||
293 | * for back-facing fragments. (Often the two are the same; | ||
294 | * but in some cases, like two-sided stenciling, they can be | ||
295 | * very different.) So choose the correct function depending | ||
296 | * on the calculated facing. | ||
297 | */ | ||
298 | spu.fragment_ops[setup.facing](ix, iy, &spu.ctile, &spu.ztile, | ||
299 | fragZ, | ||
300 | outputs[0*4+0], | ||
301 | outputs[0*4+1], | ||
302 | outputs[0*4+2], | ||
303 | outputs[0*4+3], | ||
304 | mask); | ||
305 | } | ||
306 | } | ||
307 | } | ||
308 | |||
309 | |||
310 | /** | ||
311 | * Given an X or Y coordinate, return the block/quad coordinate that it | ||
312 | * belongs to. | ||
313 | */ | ||
314 | static INLINE int | ||
315 | block(int x) | ||
316 | { | ||
317 | return x & ~1; | ||
318 | } | ||
319 | |||
320 | |||
321 | /** | ||
322 | * Render a horizontal span of quads | ||
323 | */ | ||
324 | static void | ||
325 | flush_spans(void) | ||
326 | { | ||
327 | int minleft, maxright; | ||
328 | |||
329 | const int l0 = spu_extract(setup.span.quad, 0); | ||
330 | const int l1 = spu_extract(setup.span.quad, 1); | ||
331 | const int r0 = spu_extract(setup.span.quad, 2); | ||
332 | const int r1 = spu_extract(setup.span.quad, 3); | ||
333 | |||
334 | switch (setup.span.y_flags) { | ||
335 | case 0x3: | ||
336 | /* both odd and even lines written (both quad rows) */ | ||
337 | minleft = MIN2(l0, l1); | ||
338 | maxright = MAX2(r0, r1); | ||
339 | break; | ||
340 | |||
341 | case 0x1: | ||
342 | /* only even line written (quad top row) */ | ||
343 | minleft = l0; | ||
344 | maxright = r0; | ||
345 | break; | ||
346 | |||
347 | case 0x2: | ||
348 | /* only odd line written (quad bottom row) */ | ||
349 | minleft = l1; | ||
350 | maxright = r1; | ||
351 | break; | ||
352 | |||
353 | default: | ||
354 | return; | ||
355 | } | ||
356 | |||
357 | /* OK, we're very likely to need the tile data now. | ||
358 | * clear or finish waiting if needed. | ||
359 | */ | ||
360 | if (spu.cur_ctile_status == TILE_STATUS_GETTING) { | ||
361 | /* wait for mfc_get() to complete */ | ||
362 | //printf("SPU: %u: waiting for ctile\n", spu.init.id); | ||
363 | wait_on_mask(1 << TAG_READ_TILE_COLOR); | ||
364 | spu.cur_ctile_status = TILE_STATUS_CLEAN; | ||
365 | } | ||
366 | else if (spu.cur_ctile_status == TILE_STATUS_CLEAR) { | ||
367 | //printf("SPU %u: clearing C tile %u, %u\n", spu.init.id, setup.tx, setup.ty); | ||
368 | clear_c_tile(&spu.ctile); | ||
369 | spu.cur_ctile_status = TILE_STATUS_DIRTY; | ||
370 | } | ||
371 | ASSERT(spu.cur_ctile_status != TILE_STATUS_DEFINED); | ||
372 | |||
373 | if (spu.read_depth_stencil) { | ||
374 | if (spu.cur_ztile_status == TILE_STATUS_GETTING) { | ||
375 | /* wait for mfc_get() to complete */ | ||
376 | //printf("SPU: %u: waiting for ztile\n", spu.init.id); | ||
377 | wait_on_mask(1 << TAG_READ_TILE_Z); | ||
378 | spu.cur_ztile_status = TILE_STATUS_CLEAN; | ||
379 | } | ||
380 | else if (spu.cur_ztile_status == TILE_STATUS_CLEAR) { | ||
381 | //printf("SPU %u: clearing Z tile %u, %u\n", spu.init.id, setup.tx, setup.ty); | ||
382 | clear_z_tile(&spu.ztile); | ||
383 | spu.cur_ztile_status = TILE_STATUS_DIRTY; | ||
384 | } | ||
385 | ASSERT(spu.cur_ztile_status != TILE_STATUS_DEFINED); | ||
386 | } | ||
387 | |||
388 | /* XXX this loop could be moved into the above switch cases... */ | ||
389 | |||
390 | /* Setup for mask calculation */ | ||
391 | const vec_int4 quad_LlRr = setup.span.quad; | ||
392 | const vec_int4 quad_RrLl = spu_rlqwbyte(quad_LlRr, 8); | ||
393 | const vec_int4 quad_LLll = spu_shuffle(quad_LlRr, quad_LlRr, SHUFFLE4(A,A,B,B)); | ||
394 | const vec_int4 quad_RRrr = spu_shuffle(quad_RrLl, quad_RrLl, SHUFFLE4(A,A,B,B)); | ||
395 | |||
396 | const vec_int4 twos = spu_splats(2); | ||
397 | |||
398 | const int x = block(minleft); | ||
399 | vec_int4 xs = {x, x+1, x, x+1}; | ||
400 | |||
401 | for (; spu_extract(xs, 0) <= block(maxright); xs += twos) { | ||
402 | /** | ||
403 | * Computes mask to indicate which pixels in the 2x2 quad are actually | ||
404 | * inside the triangle's bounds. | ||
405 | */ | ||
406 | |||
407 | /* Calculate ({x,x+1,x,x+1} >= {l[0],l[0],l[1],l[1]}) */ | ||
408 | const mask_t gt_LLll_xs = spu_cmpgt(quad_LLll, xs); | ||
409 | const mask_t gte_xs_LLll = spu_nand(gt_LLll_xs, gt_LLll_xs); | ||
410 | |||
411 | /* Calculate ({r[0],r[0],r[1],r[1]} > {x,x+1,x,x+1}) */ | ||
412 | const mask_t gt_RRrr_xs = spu_cmpgt(quad_RRrr, xs); | ||
413 | |||
414 | /* Combine results to create mask */ | ||
415 | const mask_t mask = spu_and(gte_xs_LLll, gt_RRrr_xs); | ||
416 | |||
417 | emit_quad(spu_extract(xs, 0), setup.span.y, mask); | ||
418 | } | ||
419 | |||
420 | setup.span.y = 0; | ||
421 | setup.span.y_flags = 0; | ||
422 | /* Zero right elements */ | ||
423 | setup.span.quad = spu_shuffle(setup.span.quad, setup.span.quad, SHUFFLE4(A,B,0,0)); | ||
424 | } | ||
425 | |||
426 | |||
427 | #if DEBUG_VERTS | ||
428 | static void | ||
429 | print_vertex(const struct vertex_header *v) | ||
430 | { | ||
431 | uint i; | ||
432 | fprintf(stderr, " Vertex: (%p)\n", v); | ||
433 | for (i = 0; i < spu.vertex_info.num_attribs; i++) { | ||
434 | fprintf(stderr, " %d: %f %f %f %f\n", i, | ||
435 | spu_extract(v->data[i], 0), | ||
436 | spu_extract(v->data[i], 1), | ||
437 | spu_extract(v->data[i], 2), | ||
438 | spu_extract(v->data[i], 3)); | ||
439 | } | ||
440 | } | ||
441 | #endif | ||
442 | |||
443 | /* Returns the minimum of each slot of two vec_float4s as qwords. | ||
444 | * i.e. return[n] = min(q0[n],q1[n]); | ||
445 | */ | ||
446 | static qword | ||
447 | minfq(qword q0, qword q1) | ||
448 | { | ||
449 | const qword q0q1m = si_fcgt(q0, q1); | ||
450 | return si_selb(q0, q1, q0q1m); | ||
451 | } | ||
452 | |||
453 | /* Returns the minimum of each slot of three vec_float4s as qwords. | ||
454 | * i.e. return[n] = min(q0[n],q1[n],q2[n]); | ||
455 | */ | ||
456 | static qword | ||
457 | min3fq(qword q0, qword q1, qword q2) | ||
458 | { | ||
459 | return minfq(minfq(q0, q1), q2); | ||
460 | } | ||
461 | |||
462 | /* Returns the maximum of each slot of two vec_float4s as qwords. | ||
463 | * i.e. return[n] = min(q0[n],q1[n],q2[n]); | ||
464 | */ | ||
465 | static qword | ||
466 | maxfq(qword q0, qword q1) { | ||
467 | const qword q0q1m = si_fcgt(q0, q1); | ||
468 | return si_selb(q1, q0, q0q1m); | ||
469 | } | ||
470 | |||
471 | /* Returns the maximum of each slot of three vec_float4s as qwords. | ||
472 | * i.e. return[n] = min(q0[n],q1[n],q2[n]); | ||
473 | */ | ||
474 | static qword | ||
475 | max3fq(qword q0, qword q1, qword q2) { | ||
476 | return maxfq(maxfq(q0, q1), q2); | ||
477 | } | ||
478 | |||
479 | /** | ||
480 | * Sort vertices from top to bottom. | ||
481 | * Compute area and determine front vs. back facing. | ||
482 | * Do coarse clip test against tile bounds | ||
483 | * \return FALSE if tri is totally outside tile, TRUE otherwise | ||
484 | */ | ||
485 | static boolean | ||
486 | setup_sort_vertices(const qword vs) | ||
487 | { | ||
488 | float area, sign; | ||
489 | |||
490 | #if DEBUG_VERTS | ||
491 | if (spu.init.id==0) { | ||
492 | fprintf(stderr, "SPU %u: Triangle:\n", spu.init.id); | ||
493 | print_vertex(v0); | ||
494 | print_vertex(v1); | ||
495 | print_vertex(v2); | ||
496 | } | ||
497 | #endif | ||
498 | |||
499 | { | ||
500 | /* Load the float values for various processing... */ | ||
501 | const qword f0 = (qword)(((const struct vertex_header*)si_to_ptr(vs))->data[0]); | ||
502 | const qword f1 = (qword)(((const struct vertex_header*)si_to_ptr(si_rotqbyi(vs, 4)))->data[0]); | ||
503 | const qword f2 = (qword)(((const struct vertex_header*)si_to_ptr(si_rotqbyi(vs, 8)))->data[0]); | ||
504 | |||
505 | /* Check if triangle is completely outside the tile bounds | ||
506 | * Find the min and max x and y positions of the three poits */ | ||
507 | const qword minf = min3fq(f0, f1, f2); | ||
508 | const qword maxf = max3fq(f0, f1, f2); | ||
509 | |||
510 | /* Compare min and max against cliprect vals */ | ||
511 | const qword maxsmins = si_shufb(maxf, minf, SHUFB4(A,B,a,b)); | ||
512 | const qword outside = si_fcgt(maxsmins, si_csflt(setup.cliprect, 0)); | ||
513 | |||
514 | /* Use a little magic to work out of the tri is visible or not */ | ||
515 | if(si_to_uint(si_xori(si_gb(outside), 0xc))) return FALSE; | ||
516 | |||
517 | /* determine bottom to top order of vertices */ | ||
518 | /* A table of shuffle patterns for putting vertex_header pointers into | ||
519 | correct order. Quite magical. */ | ||
520 | const qword sort_order_patterns[] = { | ||
521 | SHUFB4(A,B,C,C), | ||
522 | SHUFB4(C,A,B,C), | ||
523 | SHUFB4(A,C,B,C), | ||
524 | SHUFB4(B,C,A,C), | ||
525 | SHUFB4(B,A,C,C), | ||
526 | SHUFB4(C,B,A,C) }; | ||
527 | |||
528 | /* Collate y values into two vectors for comparison. | ||
529 | Using only one shuffle constant! ;) */ | ||
530 | const qword y_02_ = si_shufb(f0, f2, SHUFB4(0,B,b,C)); | ||
531 | const qword y_10_ = si_shufb(f1, f0, SHUFB4(0,B,b,C)); | ||
532 | const qword y_012 = si_shufb(y_02_, f1, SHUFB4(0,B,b,C)); | ||
533 | const qword y_120 = si_shufb(y_10_, f2, SHUFB4(0,B,b,C)); | ||
534 | |||
535 | /* Perform comparison: {y0,y1,y2} > {y1,y2,y0} */ | ||
536 | const qword compare = si_fcgt(y_012, y_120); | ||
537 | /* Compress the result of the comparison into 4 bits */ | ||
538 | const qword gather = si_gb(compare); | ||
539 | /* Subtract one to attain the index into the LUT. Magical. */ | ||
540 | const unsigned int index = si_to_uint(gather) - 1; | ||
541 | |||
542 | /* Load the appropriate pattern and construct the desired vector. */ | ||
543 | setup.vertex_headers = si_shufb(vs, vs, sort_order_patterns[index]); | ||
544 | |||
545 | /* Using the result of the comparison, set sign. | ||
546 | Very magical. */ | ||
547 | sign = ((si_to_uint(si_cntb(gather)) == 2) ? 1.0f : -1.0f); | ||
548 | } | ||
549 | |||
550 | setup.ebot.ds = spu_sub(setup.vmid->data[0], setup.vmin->data[0]); | ||
551 | setup.emaj.ds = spu_sub(setup.vmax->data[0], setup.vmin->data[0]); | ||
552 | setup.etop.ds = spu_sub(setup.vmax->data[0], setup.vmid->data[0]); | ||
553 | |||
554 | /* | ||
555 | * Compute triangle's area. Use 1/area to compute partial | ||
556 | * derivatives of attributes later. | ||
557 | */ | ||
558 | area = setup.emaj.dx * setup.ebot.dy - setup.ebot.dx * setup.emaj.dy; | ||
559 | |||
560 | setup.oneOverArea = 1.0f / area; | ||
561 | |||
562 | /* The product of area * sign indicates front/back orientation (0/1). | ||
563 | * Just in case someone gets the bright idea of switching the front | ||
564 | * and back constants without noticing that we're assuming their | ||
565 | * values in this operation, also assert that the values are | ||
566 | * what we think they are. | ||
567 | */ | ||
568 | ASSERT(CELL_FACING_FRONT == 0); | ||
569 | ASSERT(CELL_FACING_BACK == 1); | ||
570 | setup.facing = (area * sign > 0.0f) | ||
571 | ^ (!spu.rasterizer.front_ccw); | ||
572 | |||
573 | return TRUE; | ||
574 | } | ||
575 | |||
576 | |||
577 | /** | ||
578 | * Compute a0 for a constant-valued coefficient (GL_FLAT shading). | ||
579 | * The value value comes from vertex->data[slot]. | ||
580 | * The result will be put into setup.coef[slot].a0. | ||
581 | * \param slot which attribute slot | ||
582 | */ | ||
583 | static INLINE void | ||
584 | const_coeff4(uint slot) | ||
585 | { | ||
586 | setup.coef[slot].dadx = (vector float) {0.0, 0.0, 0.0, 0.0}; | ||
587 | setup.coef[slot].dady = (vector float) {0.0, 0.0, 0.0, 0.0}; | ||
588 | setup.coef[slot].a0 = setup.vprovoke->data[slot]; | ||
589 | } | ||
590 | |||
591 | |||
592 | /** | ||
593 | * As above, but interp setup all four vector components. | ||
594 | */ | ||
595 | static INLINE void | ||
596 | tri_linear_coeff4(uint slot) | ||
597 | { | ||
598 | const vector float vmin_d = setup.vmin->data[slot]; | ||
599 | const vector float vmid_d = setup.vmid->data[slot]; | ||
600 | const vector float vmax_d = setup.vmax->data[slot]; | ||
601 | const vector float xxxx = spu_splats(spu_extract(setup.vmin->data[0], 0) - 0.5f); | ||
602 | const vector float yyyy = spu_splats(spu_extract(setup.vmin->data[0], 1) - 0.5f); | ||
603 | |||
604 | vector float botda = vmid_d - vmin_d; | ||
605 | vector float majda = vmax_d - vmin_d; | ||
606 | |||
607 | vector float a = spu_sub(spu_mul(spu_splats(setup.ebot.dy), majda), | ||
608 | spu_mul(botda, spu_splats(setup.emaj.dy))); | ||
609 | vector float b = spu_sub(spu_mul(spu_splats(setup.emaj.dx), botda), | ||
610 | spu_mul(majda, spu_splats(setup.ebot.dx))); | ||
611 | |||
612 | setup.coef[slot].dadx = spu_mul(a, spu_splats(setup.oneOverArea)); | ||
613 | setup.coef[slot].dady = spu_mul(b, spu_splats(setup.oneOverArea)); | ||
614 | |||
615 | vector float tempx = spu_mul(setup.coef[slot].dadx, xxxx); | ||
616 | vector float tempy = spu_mul(setup.coef[slot].dady, yyyy); | ||
617 | |||
618 | setup.coef[slot].a0 = spu_sub(vmin_d, spu_add(tempx, tempy)); | ||
619 | } | ||
620 | |||
621 | |||
622 | /** | ||
623 | * Compute a0, dadx and dady for a perspective-corrected interpolant, | ||
624 | * for a triangle. | ||
625 | * We basically multiply the vertex value by 1/w before computing | ||
626 | * the plane coefficients (a0, dadx, dady). | ||
627 | * Later, when we compute the value at a particular fragment position we'll | ||
628 | * divide the interpolated value by the interpolated W at that fragment. | ||
629 | */ | ||
630 | static void | ||
631 | tri_persp_coeff4(uint slot) | ||
632 | { | ||
633 | const vector float xxxx = spu_splats(spu_extract(setup.vmin->data[0], 0) - 0.5f); | ||
634 | const vector float yyyy = spu_splats(spu_extract(setup.vmin->data[0], 1) - 0.5f); | ||
635 | |||
636 | const vector float vmin_w = spu_splats(spu_extract(setup.vmin->data[0], 3)); | ||
637 | const vector float vmid_w = spu_splats(spu_extract(setup.vmid->data[0], 3)); | ||
638 | const vector float vmax_w = spu_splats(spu_extract(setup.vmax->data[0], 3)); | ||
639 | |||
640 | vector float vmin_d = setup.vmin->data[slot]; | ||
641 | vector float vmid_d = setup.vmid->data[slot]; | ||
642 | vector float vmax_d = setup.vmax->data[slot]; | ||
643 | |||
644 | vmin_d = spu_mul(vmin_d, vmin_w); | ||
645 | vmid_d = spu_mul(vmid_d, vmid_w); | ||
646 | vmax_d = spu_mul(vmax_d, vmax_w); | ||
647 | |||
648 | vector float botda = vmid_d - vmin_d; | ||
649 | vector float majda = vmax_d - vmin_d; | ||
650 | |||
651 | vector float a = spu_sub(spu_mul(spu_splats(setup.ebot.dy), majda), | ||
652 | spu_mul(botda, spu_splats(setup.emaj.dy))); | ||
653 | vector float b = spu_sub(spu_mul(spu_splats(setup.emaj.dx), botda), | ||
654 | spu_mul(majda, spu_splats(setup.ebot.dx))); | ||
655 | |||
656 | setup.coef[slot].dadx = spu_mul(a, spu_splats(setup.oneOverArea)); | ||
657 | setup.coef[slot].dady = spu_mul(b, spu_splats(setup.oneOverArea)); | ||
658 | |||
659 | vector float tempx = spu_mul(setup.coef[slot].dadx, xxxx); | ||
660 | vector float tempy = spu_mul(setup.coef[slot].dady, yyyy); | ||
661 | |||
662 | setup.coef[slot].a0 = spu_sub(vmin_d, spu_add(tempx, tempy)); | ||
663 | } | ||
664 | |||
665 | |||
666 | |||
667 | /** | ||
668 | * Compute the setup.coef[] array dadx, dady, a0 values. | ||
669 | * Must be called after setup.vmin,vmid,vmax,vprovoke are initialized. | ||
670 | */ | ||
671 | static void | ||
672 | setup_tri_coefficients(void) | ||
673 | { | ||
674 | uint i; | ||
675 | |||
676 | for (i = 0; i < spu.vertex_info.num_attribs; i++) { | ||
677 | switch (spu.vertex_info.attrib[i].interp_mode) { | ||
678 | case INTERP_NONE: | ||
679 | break; | ||
680 | case INTERP_CONSTANT: | ||
681 | const_coeff4(i); | ||
682 | break; | ||
683 | case INTERP_POS: | ||
684 | /* fall-through */ | ||
685 | case INTERP_LINEAR: | ||
686 | tri_linear_coeff4(i); | ||
687 | break; | ||
688 | case INTERP_PERSPECTIVE: | ||
689 | tri_persp_coeff4(i); | ||
690 | break; | ||
691 | default: | ||
692 | ASSERT(0); | ||
693 | } | ||
694 | } | ||
695 | } | ||
696 | |||
697 | |||
698 | static void | ||
699 | setup_tri_edges(void) | ||
700 | { | ||
701 | float vmin_x = spu_extract(setup.vmin->data[0], 0) + 0.5f; | ||
702 | float vmid_x = spu_extract(setup.vmid->data[0], 0) + 0.5f; | ||
703 | |||
704 | float vmin_y = spu_extract(setup.vmin->data[0], 1) - 0.5f; | ||
705 | float vmid_y = spu_extract(setup.vmid->data[0], 1) - 0.5f; | ||
706 | float vmax_y = spu_extract(setup.vmax->data[0], 1) - 0.5f; | ||
707 | |||
708 | setup.emaj.sy = CEILF(vmin_y); | ||
709 | setup.emaj.lines = (int) CEILF(vmax_y - setup.emaj.sy); | ||
710 | setup.emaj.dxdy = setup.emaj.dx / setup.emaj.dy; | ||
711 | setup.emaj.sx = vmin_x + (setup.emaj.sy - vmin_y) * setup.emaj.dxdy; | ||
712 | |||
713 | setup.etop.sy = CEILF(vmid_y); | ||
714 | setup.etop.lines = (int) CEILF(vmax_y - setup.etop.sy); | ||
715 | setup.etop.dxdy = setup.etop.dx / setup.etop.dy; | ||
716 | setup.etop.sx = vmid_x + (setup.etop.sy - vmid_y) * setup.etop.dxdy; | ||
717 | |||
718 | setup.ebot.sy = CEILF(vmin_y); | ||
719 | setup.ebot.lines = (int) CEILF(vmid_y - setup.ebot.sy); | ||
720 | setup.ebot.dxdy = setup.ebot.dx / setup.ebot.dy; | ||
721 | setup.ebot.sx = vmin_x + (setup.ebot.sy - vmin_y) * setup.ebot.dxdy; | ||
722 | } | ||
723 | |||
724 | |||
725 | /** | ||
726 | * Render the upper or lower half of a triangle. | ||
727 | * Scissoring/cliprect is applied here too. | ||
728 | */ | ||
729 | static void | ||
730 | subtriangle(struct edge *eleft, struct edge *eright, unsigned lines) | ||
731 | { | ||
732 | const int minx = setup.cliprect_minx; | ||
733 | const int maxx = setup.cliprect_maxx; | ||
734 | const int miny = setup.cliprect_miny; | ||
735 | const int maxy = setup.cliprect_maxy; | ||
736 | int y, start_y, finish_y; | ||
737 | int sy = (int)eleft->sy; | ||
738 | |||
739 | ASSERT((int)eleft->sy == (int) eright->sy); | ||
740 | |||
741 | /* clip top/bottom */ | ||
742 | start_y = sy; | ||
743 | finish_y = sy + lines; | ||
744 | |||
745 | if (start_y < miny) | ||
746 | start_y = miny; | ||
747 | |||
748 | if (finish_y > maxy) | ||
749 | finish_y = maxy; | ||
750 | |||
751 | start_y -= sy; | ||
752 | finish_y -= sy; | ||
753 | |||
754 | /* | ||
755 | printf("%s %d %d\n", __FUNCTION__, start_y, finish_y); | ||
756 | */ | ||
757 | |||
758 | for (y = start_y; y < finish_y; y++) { | ||
759 | |||
760 | /* avoid accumulating adds as floats don't have the precision to | ||
761 | * accurately iterate large triangle edges that way. luckily we | ||
762 | * can just multiply these days. | ||
763 | * | ||
764 | * this is all drowned out by the attribute interpolation anyway. | ||
765 | */ | ||
766 | int left = (int)(eleft->sx + y * eleft->dxdy); | ||
767 | int right = (int)(eright->sx + y * eright->dxdy); | ||
768 | |||
769 | /* clip left/right */ | ||
770 | if (left < minx) | ||
771 | left = minx; | ||
772 | if (right > maxx) | ||
773 | right = maxx; | ||
774 | |||
775 | if (left < right) { | ||
776 | int _y = sy + y; | ||
777 | if (block(_y) != setup.span.y) { | ||
778 | flush_spans(); | ||
779 | setup.span.y = block(_y); | ||
780 | } | ||
781 | |||
782 | int offset = _y&1; | ||
783 | vec_int4 quad_LlRr = {left, left, right, right}; | ||
784 | /* Store left and right in 0 or 1 row of quad based on offset */ | ||
785 | setup.span.quad = spu_sel(quad_LlRr, setup.span.quad, spu_maskw(5<<offset)); | ||
786 | setup.span.y_flags |= 1<<offset; | ||
787 | } | ||
788 | } | ||
789 | |||
790 | |||
791 | /* save the values so that emaj can be restarted: | ||
792 | */ | ||
793 | eleft->sx += lines * eleft->dxdy; | ||
794 | eright->sx += lines * eright->dxdy; | ||
795 | eleft->sy += lines; | ||
796 | eright->sy += lines; | ||
797 | } | ||
798 | |||
799 | |||
800 | /** | ||
801 | * Draw triangle into tile at (tx, ty) (tile coords) | ||
802 | * The tile data should have already been fetched. | ||
803 | */ | ||
804 | boolean | ||
805 | tri_draw(const qword vs, | ||
806 | uint tx, uint ty) | ||
807 | { | ||
808 | setup.tx = tx; | ||
809 | setup.ty = ty; | ||
810 | |||
811 | /* set clipping bounds to tile bounds */ | ||
812 | const qword clipbase = (qword)((vec_uint4){tx, ty}); | ||
813 | const qword clipmin = si_mpyui(clipbase, TILE_SIZE); | ||
814 | const qword clipmax = si_ai(clipmin, TILE_SIZE); | ||
815 | setup.cliprect = si_shufb(clipmin, clipmax, SHUFB4(A,B,a,b)); | ||
816 | |||
817 | if(!setup_sort_vertices(vs)) { | ||
818 | return FALSE; /* totally clipped */ | ||
819 | } | ||
820 | |||
821 | setup_tri_coefficients(); | ||
822 | setup_tri_edges(); | ||
823 | |||
824 | setup.span.y = 0; | ||
825 | setup.span.y_flags = 0; | ||
826 | /* Zero right elements */ | ||
827 | setup.span.quad = spu_shuffle(setup.span.quad, setup.span.quad, SHUFFLE4(A,B,0,0)); | ||
828 | |||
829 | if (setup.oneOverArea < 0.0) { | ||
830 | /* emaj on left */ | ||
831 | subtriangle( &setup.emaj, &setup.ebot, setup.ebot.lines ); | ||
832 | subtriangle( &setup.emaj, &setup.etop, setup.etop.lines ); | ||
833 | } | ||
834 | else { | ||
835 | /* emaj on right */ | ||
836 | subtriangle( &setup.ebot, &setup.emaj, setup.ebot.lines ); | ||
837 | subtriangle( &setup.etop, &setup.emaj, setup.etop.lines ); | ||
838 | } | ||
839 | |||
840 | flush_spans(); | ||
841 | |||
842 | return TRUE; | ||
843 | } | ||
diff --git a/src/gallium/drivers/cell/spu/spu_tri.h b/src/gallium/drivers/cell/spu/spu_tri.h deleted file mode 100644 index 82e3b19ad7e..00000000000 --- a/src/gallium/drivers/cell/spu/spu_tri.h +++ /dev/null | |||
@@ -1,37 +0,0 @@ | |||
1 | /************************************************************************** | ||
2 | * | ||
3 | * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas. | ||
4 | * All Rights Reserved. | ||
5 | * | ||
6 | * Permission is hereby granted, free of charge, to any person obtaining a | ||
7 | * copy of this software and associated documentation files (the | ||
8 | * "Software"), to deal in the Software without restriction, including | ||
9 | * without limitation the rights to use, copy, modify, merge, publish, | ||
10 | * distribute, sub license, and/or sell copies of the Software, and to | ||
11 | * permit persons to whom the Software is furnished to do so, subject to | ||
12 | * the following conditions: | ||
13 | * | ||
14 | * The above copyright notice and this permission notice (including the | ||
15 | * next paragraph) shall be included in all copies or substantial portions | ||
16 | * of the Software. | ||
17 | * | ||
18 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS | ||
19 | * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF | ||
20 | * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. | ||
21 | * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR | ||
22 | * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, | ||
23 | * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE | ||
24 | * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. | ||
25 | * | ||
26 | **************************************************************************/ | ||
27 | |||
28 | |||
29 | #ifndef SPU_TRI_H | ||
30 | #define SPU_TRI_H | ||
31 | |||
32 | |||
33 | extern boolean | ||
34 | tri_draw(const qword vs, uint tx, uint ty); | ||
35 | |||
36 | |||
37 | #endif /* SPU_TRI_H */ | ||
diff --git a/src/gallium/drivers/cell/spu/spu_util.c b/src/gallium/drivers/cell/spu/spu_util.c deleted file mode 100644 index 24057e29e36..00000000000 --- a/src/gallium/drivers/cell/spu/spu_util.c +++ /dev/null | |||
@@ -1,77 +0,0 @@ | |||
1 | |||
2 | #include "cell/common.h" | ||
3 | #include "pipe/p_shader_tokens.h" | ||
4 | #include "util/u_debug.h" | ||
5 | #include "tgsi/tgsi_parse.h" | ||
6 | //#include "tgsi_build.h" | ||
7 | #include "tgsi/tgsi_util.h" | ||
8 | |||
9 | unsigned | ||
10 | tgsi_util_get_src_register_swizzle( | ||
11 | const struct tgsi_src_register *reg, | ||
12 | unsigned component ) | ||
13 | { | ||
14 | switch( component ) { | ||
15 | case 0: | ||
16 | return reg->SwizzleX; | ||
17 | case 1: | ||
18 | return reg->SwizzleY; | ||
19 | case 2: | ||
20 | return reg->SwizzleZ; | ||
21 | case 3: | ||
22 | return reg->SwizzleW; | ||
23 | default: | ||
24 | ASSERT( 0 ); | ||
25 | } | ||
26 | return 0; | ||
27 | } | ||
28 | |||
29 | |||
30 | unsigned | ||
31 | tgsi_util_get_full_src_register_swizzle( | ||
32 | const struct tgsi_full_src_register *reg, | ||
33 | unsigned component ) | ||
34 | { | ||
35 | return tgsi_util_get_src_register_swizzle( | ||
36 | reg->Register, | ||
37 | component ); | ||
38 | } | ||
39 | |||
40 | |||
41 | unsigned | ||
42 | tgsi_util_get_full_src_register_sign_mode( | ||
43 | const struct tgsi_full_src_register *reg, | ||
44 | unsigned component ) | ||
45 | { | ||
46 | unsigned sign_mode; | ||
47 | |||
48 | if( reg->RegisterExtMod.Absolute ) { | ||
49 | /* Consider only the post-abs negation. */ | ||
50 | |||
51 | if( reg->RegisterExtMod.Negate ) { | ||
52 | sign_mode = TGSI_UTIL_SIGN_SET; | ||
53 | } | ||
54 | else { | ||
55 | sign_mode = TGSI_UTIL_SIGN_CLEAR; | ||
56 | } | ||
57 | } | ||
58 | else { | ||
59 | /* Accumulate the three negations. */ | ||
60 | |||
61 | unsigned negate; | ||
62 | |||
63 | negate = reg->Register.Negate; | ||
64 | if( reg->RegisterExtMod.Negate ) { | ||
65 | negate = !negate; | ||
66 | } | ||
67 | |||
68 | if( negate ) { | ||
69 | sign_mode = TGSI_UTIL_SIGN_TOGGLE; | ||
70 | } | ||
71 | else { | ||
72 | sign_mode = TGSI_UTIL_SIGN_KEEP; | ||
73 | } | ||
74 | } | ||
75 | |||
76 | return sign_mode; | ||
77 | } | ||
diff --git a/src/gallium/drivers/cell/spu/spu_vertex_fetch.c b/src/gallium/drivers/cell/spu/spu_vertex_fetch.c deleted file mode 100644 index 087963960df..00000000000 --- a/src/gallium/drivers/cell/spu/spu_vertex_fetch.c +++ /dev/null | |||
@@ -1,146 +0,0 @@ | |||
1 | /************************************************************************** | ||
2 | * | ||
3 | * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas. | ||
4 | * (C) Copyright IBM Corporation 2008 | ||
5 | * All Rights Reserved. | ||
6 | * | ||
7 | * Permission is hereby granted, free of charge, to any person obtaining a | ||
8 | * copy of this software and associated documentation files (the | ||
9 | * "Software"), to deal in the Software without restriction, including | ||
10 | * without limitation the rights to use, copy, modify, merge, publish, | ||
11 | * distribute, sub license, and/or sell copies of the Software, and to | ||
12 | * permit persons to whom the Software is furnished to do so, subject to | ||
13 | * the following conditions: | ||
14 | * | ||
15 | * The above copyright notice and this permission notice (including the | ||
16 | * next paragraph) shall be included in all copies or substantial portions | ||
17 | * of the Software. | ||
18 | * | ||
19 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS | ||
20 | * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF | ||
21 | * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. | ||
22 | * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR | ||
23 | * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, | ||
24 | * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE | ||
25 | * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. | ||
26 | * | ||
27 | **************************************************************************/ | ||
28 | |||
29 | /* | ||
30 | * Authors: | ||
31 | * Keith Whitwell <keith@tungstengraphics.com> | ||
32 | * Ian Romanick <idr@us.ibm.com> | ||
33 | */ | ||
34 | |||
35 | #include "pipe/p_state.h" | ||
36 | #include "pipe/p_shader_tokens.h" | ||
37 | #include "spu_exec.h" | ||
38 | #include "spu_vertex_shader.h" | ||
39 | #include "spu_main.h" | ||
40 | #include "spu_dcache.h" | ||
41 | |||
42 | typedef void (*spu_fetch_func)(qword *out, const qword *in, | ||
43 | const qword *shuffle_data); | ||
44 | |||
45 | |||
46 | PIPE_ALIGN_VAR(16) static const qword | ||
47 | fetch_shuffle_data[5] = { | ||
48 | /* Shuffle used by CVT_64_FLOAT | ||
49 | */ | ||
50 | { | ||
51 | 0x00, 0x01, 0x02, 0x03, 0x10, 0x11, 0x12, 0x13, | ||
52 | 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, | ||
53 | }, | ||
54 | |||
55 | /* Shuffle used by CVT_8_USCALED and CVT_8_SSCALED | ||
56 | */ | ||
57 | { | ||
58 | 0x00, 0x80, 0x80, 0x80, 0x01, 0x80, 0x80, 0x80, | ||
59 | 0x02, 0x80, 0x80, 0x80, 0x03, 0x80, 0x80, 0x80, | ||
60 | }, | ||
61 | |||
62 | /* Shuffle used by CVT_16_USCALED and CVT_16_SSCALED | ||
63 | */ | ||
64 | { | ||
65 | 0x00, 0x01, 0x80, 0x80, 0x02, 0x03, 0x80, 0x80, | ||
66 | 0x04, 0x05, 0x80, 0x80, 0x06, 0x07, 0x80, 0x80, | ||
67 | }, | ||
68 | |||
69 | /* High value shuffle used by trans4x4. | ||
70 | */ | ||
71 | { | ||
72 | 0x00, 0x01, 0x02, 0x03, 0x10, 0x11, 0x12, 0x13, | ||
73 | 0x04, 0x05, 0x06, 0x07, 0x14, 0x15, 0x16, 0x17 | ||
74 | }, | ||
75 | |||
76 | /* Low value shuffle used by trans4x4. | ||
77 | */ | ||
78 | { | ||
79 | 0x08, 0x09, 0x0A, 0x0B, 0x18, 0x19, 0x1A, 0x1B, | ||
80 | 0x0C, 0x0D, 0x0E, 0x0F, 0x1C, 0x1D, 0x1E, 0x1F | ||
81 | } | ||
82 | }; | ||
83 | |||
84 | |||
85 | /** | ||
86 | * Fetch vertex attributes for 'count' vertices. | ||
87 | */ | ||
88 | static void generic_vertex_fetch(struct spu_vs_context *draw, | ||
89 | struct spu_exec_machine *machine, | ||
90 | const unsigned *elts, | ||
91 | unsigned count) | ||
92 | { | ||
93 | unsigned nr_attrs = draw->vertex_fetch.nr_attrs; | ||
94 | unsigned attr; | ||
95 | |||
96 | ASSERT(count <= 4); | ||
97 | |||
98 | #if DRAW_DBG | ||
99 | printf("SPU: %s count = %u, nr_attrs = %u\n", | ||
100 | __FUNCTION__, count, nr_attrs); | ||
101 | #endif | ||
102 | |||
103 | /* loop over vertex attributes (vertex shader inputs) | ||
104 | */ | ||
105 | for (attr = 0; attr < nr_attrs; attr++) { | ||
106 | const unsigned pitch = draw->vertex_fetch.pitch[attr]; | ||
107 | const uint64_t src = draw->vertex_fetch.src_ptr[attr]; | ||
108 | const spu_fetch_func fetch = (spu_fetch_func) | ||
109 | (draw->vertex_fetch.code + draw->vertex_fetch.code_offset[attr]); | ||
110 | unsigned i; | ||
111 | unsigned idx; | ||
112 | const unsigned bytes_per_entry = draw->vertex_fetch.size[attr]; | ||
113 | const unsigned quads_per_entry = (bytes_per_entry + 15) / 16; | ||
114 | PIPE_ALIGN_VAR(16) qword in[2 * 4]; | ||
115 | |||
116 | |||
117 | /* Fetch four attributes for four vertices. | ||
118 | */ | ||
119 | idx = 0; | ||
120 | for (i = 0; i < count; i++) { | ||
121 | const uint64_t addr = src + (elts[i] * pitch); | ||
122 | |||
123 | #if DRAW_DBG | ||
124 | printf("SPU: fetching = 0x%llx\n", addr); | ||
125 | #endif | ||
126 | |||
127 | spu_dcache_fetch_unaligned(& in[idx], addr, bytes_per_entry); | ||
128 | idx += quads_per_entry; | ||
129 | } | ||
130 | |||
131 | /* Be nice and zero out any missing vertices. | ||
132 | */ | ||
133 | (void) memset(& in[idx], 0, (8 - idx) * sizeof(qword)); | ||
134 | |||
135 | |||
136 | /* Convert all 4 vertices to vectors of float. | ||
137 | */ | ||
138 | (*fetch)(&machine->Inputs[attr].xyzw[0].q, in, fetch_shuffle_data); | ||
139 | } | ||
140 | } | ||
141 | |||
142 | |||
143 | void spu_update_vertex_fetch( struct spu_vs_context *draw ) | ||
144 | { | ||
145 | draw->vertex_fetch.fetch_func = generic_vertex_fetch; | ||
146 | } | ||
diff --git a/src/gallium/drivers/cell/spu/spu_vertex_shader.c b/src/gallium/drivers/cell/spu/spu_vertex_shader.c deleted file mode 100644 index d6febd36f41..00000000000 --- a/src/gallium/drivers/cell/spu/spu_vertex_shader.c +++ /dev/null | |||
@@ -1,245 +0,0 @@ | |||
1 | /************************************************************************** | ||
2 | * | ||
3 | * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas. | ||
4 | * All Rights Reserved. | ||
5 | * | ||
6 | * Permission is hereby granted, free of charge, to any person obtaining a | ||
7 | * copy of this software and associated documentation files (the | ||
8 | * "Software"), to deal in the Software without restriction, including | ||
9 | * without limitation the rights to use, copy, modify, merge, publish, | ||
10 | * distribute, sub license, and/or sell copies of the Software, and to | ||
11 | * permit persons to whom the Software is furnished to do so, subject to | ||
12 | * the following conditions: | ||
13 | * | ||
14 | * The above copyright notice and this permission notice (including the | ||
15 | * next paragraph) shall be included in all copies or substantial portions | ||
16 | * of the Software. | ||
17 | * | ||
18 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS | ||
19 | * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF | ||
20 | * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. | ||
21 | * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR | ||
22 | * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, | ||
23 | * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE | ||
24 | * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. | ||
25 | * | ||
26 | **************************************************************************/ | ||
27 | |||
28 | /* | ||
29 | * Authors: | ||
30 | * Keith Whitwell <keith@tungstengraphics.com> | ||
31 | * Brian Paul | ||
32 | * Ian Romanick <idr@us.ibm.com> | ||
33 | */ | ||
34 | |||
35 | #include <spu_mfcio.h> | ||
36 | |||
37 | #include "pipe/p_state.h" | ||
38 | #include "pipe/p_shader_tokens.h" | ||
39 | #include "util/u_math.h" | ||
40 | #include "draw/draw_private.h" | ||
41 | #include "draw/draw_context.h" | ||
42 | #include "cell/common.h" | ||
43 | #include "spu_vertex_shader.h" | ||
44 | #include "spu_exec.h" | ||
45 | #include "spu_main.h" | ||
46 | |||
47 | |||
48 | #define MAX_VERTEX_SIZE ((2 + PIPE_MAX_SHADER_OUTPUTS) * 4 * sizeof(float)) | ||
49 | |||
50 | |||
51 | #define CLIP_RIGHT_BIT 0x01 | ||
52 | #define CLIP_LEFT_BIT 0x02 | ||
53 | #define CLIP_TOP_BIT 0x04 | ||
54 | #define CLIP_BOTTOM_BIT 0x08 | ||
55 | #define CLIP_FAR_BIT 0x10 | ||
56 | #define CLIP_NEAR_BIT 0x20 | ||
57 | |||
58 | |||
59 | static INLINE float | ||
60 | dot4(const float *a, const float *b) | ||
61 | { | ||
62 | return (a[0]*b[0] + | ||
63 | a[1]*b[1] + | ||
64 | a[2]*b[2] + | ||
65 | a[3]*b[3]); | ||
66 | } | ||
67 | |||
68 | static INLINE unsigned | ||
69 | compute_clipmask(const float *clip, /*const*/ float plane[][4], unsigned nr) | ||
70 | { | ||
71 | unsigned mask = 0; | ||
72 | unsigned i; | ||
73 | |||
74 | /* Do the hardwired planes first: | ||
75 | */ | ||
76 | if (-clip[0] + clip[3] < 0) mask |= CLIP_RIGHT_BIT; | ||
77 | if ( clip[0] + clip[3] < 0) mask |= CLIP_LEFT_BIT; | ||
78 | if (-clip[1] + clip[3] < 0) mask |= CLIP_TOP_BIT; | ||
79 | if ( clip[1] + clip[3] < 0) mask |= CLIP_BOTTOM_BIT; | ||
80 | if (-clip[2] + clip[3] < 0) mask |= CLIP_FAR_BIT; | ||
81 | if ( clip[2] + clip[3] < 0) mask |= CLIP_NEAR_BIT; | ||
82 | |||
83 | /* Followed by any remaining ones: | ||
84 | */ | ||
85 | for (i = 6; i < nr; i++) { | ||
86 | if (dot4(clip, plane[i]) < 0) | ||
87 | mask |= (1<<i); | ||
88 | } | ||
89 | |||
90 | return mask; | ||
91 | } | ||
92 | |||
93 | |||
94 | /** | ||
95 | * Transform vertices with the current vertex program/shader | ||
96 | * Up to four vertices can be shaded at a time. | ||
97 | * \param vbuffer the input vertex data | ||
98 | * \param elts indexes of four input vertices | ||
99 | * \param count number of vertices to shade [1..4] | ||
100 | * \param vOut array of pointers to four output vertices | ||
101 | */ | ||
102 | static void | ||
103 | run_vertex_program(struct spu_vs_context *draw, | ||
104 | unsigned elts[4], unsigned count, | ||
105 | const uint64_t *vOut) | ||
106 | { | ||
107 | struct spu_exec_machine *machine = &draw->machine; | ||
108 | unsigned int j; | ||
109 | |||
110 | PIPE_ALIGN_VAR(16) struct spu_exec_vector inputs[PIPE_MAX_ATTRIBS]; | ||
111 | PIPE_ALIGN_VAR(16) struct spu_exec_vector outputs[PIPE_MAX_ATTRIBS]; | ||
112 | const float *scale = draw->viewport.scale; | ||
113 | const float *trans = draw->viewport.translate; | ||
114 | |||
115 | ASSERT(count <= 4); | ||
116 | |||
117 | machine->Processor = TGSI_PROCESSOR_VERTEX; | ||
118 | |||
119 | ASSERT_ALIGN16(draw->constants); | ||
120 | machine->Consts = (float (*)[4]) draw->constants; | ||
121 | |||
122 | machine->Inputs = inputs; | ||
123 | machine->Outputs = outputs; | ||
124 | |||
125 | spu_vertex_fetch( draw, machine, elts, count ); | ||
126 | |||
127 | /* run shader */ | ||
128 | spu_exec_machine_run( machine ); | ||
129 | |||
130 | |||
131 | /* store machine results */ | ||
132 | for (j = 0; j < count; j++) { | ||
133 | unsigned slot; | ||
134 | float x, y, z, w; | ||
135 | PIPE_ALIGN_VAR(16) | ||
136 | unsigned char buffer[sizeof(struct vertex_header) | ||
137 | + MAX_VERTEX_SIZE]; | ||
138 | struct vertex_header *const tmpOut = | ||
139 | (struct vertex_header *) buffer; | ||
140 | const unsigned vert_size = ROUNDUP16(sizeof(struct vertex_header) | ||
141 | + (sizeof(float) * 4 | ||
142 | * draw->num_vs_outputs)); | ||
143 | |||
144 | mfc_get(tmpOut, vOut[j], vert_size, TAG_VERTEX_BUFFER, 0, 0); | ||
145 | wait_on_mask(1 << TAG_VERTEX_BUFFER); | ||
146 | |||
147 | |||
148 | /* Handle attr[0] (position) specially: | ||
149 | * | ||
150 | * XXX: Computing the clipmask should be done in the vertex | ||
151 | * program as a set of DP4 instructions appended to the | ||
152 | * user-provided code. | ||
153 | */ | ||
154 | x = tmpOut->clip[0] = machine->Outputs[0].xyzw[0].f[j]; | ||
155 | y = tmpOut->clip[1] = machine->Outputs[0].xyzw[1].f[j]; | ||
156 | z = tmpOut->clip[2] = machine->Outputs[0].xyzw[2].f[j]; | ||
157 | w = tmpOut->clip[3] = machine->Outputs[0].xyzw[3].f[j]; | ||
158 | |||
159 | tmpOut->clipmask = compute_clipmask(tmpOut->clip, draw->plane, | ||
160 | draw->nr_planes); | ||
161 | tmpOut->edgeflag = 1; | ||
162 | |||
163 | /* divide by w */ | ||
164 | w = 1.0f / w; | ||
165 | x *= w; | ||
166 | y *= w; | ||
167 | z *= w; | ||
168 | |||
169 | /* Viewport mapping */ | ||
170 | tmpOut->data[0][0] = x * scale[0] + trans[0]; | ||
171 | tmpOut->data[0][1] = y * scale[1] + trans[1]; | ||
172 | tmpOut->data[0][2] = z * scale[2] + trans[2]; | ||
173 | tmpOut->data[0][3] = w; | ||
174 | |||
175 | /* Remaining attributes are packed into sequential post-transform | ||
176 | * vertex attrib slots. | ||
177 | */ | ||
178 | for (slot = 1; slot < draw->num_vs_outputs; slot++) { | ||
179 | tmpOut->data[slot][0] = machine->Outputs[slot].xyzw[0].f[j]; | ||
180 | tmpOut->data[slot][1] = machine->Outputs[slot].xyzw[1].f[j]; | ||
181 | tmpOut->data[slot][2] = machine->Outputs[slot].xyzw[2].f[j]; | ||
182 | tmpOut->data[slot][3] = machine->Outputs[slot].xyzw[3].f[j]; | ||
183 | } | ||
184 | |||
185 | mfc_put(tmpOut, vOut[j], vert_size, TAG_VERTEX_BUFFER, 0, 0); | ||
186 | } /* loop over vertices */ | ||
187 | } | ||
188 | |||
189 | |||
190 | PIPE_ALIGN_VAR(16) unsigned char | ||
191 | immediates[(sizeof(float) * 4 * TGSI_EXEC_NUM_IMMEDIATES) + 32]; | ||
192 | |||
193 | |||
194 | void | ||
195 | spu_bind_vertex_shader(struct spu_vs_context *draw, | ||
196 | struct cell_shader_info *vs) | ||
197 | { | ||
198 | const unsigned immediate_addr = vs->immediates; | ||
199 | const unsigned immediate_size = | ||
200 | ROUNDUP16((sizeof(float) * 4 * vs->num_immediates) | ||
201 | + (immediate_addr & 0x0f)); | ||
202 | |||
203 | |||
204 | mfc_get(immediates, immediate_addr & ~0x0f, immediate_size, | ||
205 | TAG_VERTEX_BUFFER, 0, 0); | ||
206 | |||
207 | draw->machine.Instructions = (struct tgsi_full_instruction *) | ||
208 | vs->instructions; | ||
209 | draw->machine.NumInstructions = vs->num_instructions; | ||
210 | |||
211 | draw->machine.Declarations = (struct tgsi_full_declaration *) | ||
212 | vs->declarations; | ||
213 | draw->machine.NumDeclarations = vs->num_declarations; | ||
214 | |||
215 | draw->num_vs_outputs = vs->num_outputs; | ||
216 | |||
217 | /* specify the shader to interpret/execute */ | ||
218 | spu_exec_machine_init(&draw->machine, | ||
219 | PIPE_MAX_SAMPLERS, | ||
220 | NULL /*samplers*/, | ||
221 | PIPE_SHADER_VERTEX); | ||
222 | |||
223 | wait_on_mask(1 << TAG_VERTEX_BUFFER); | ||
224 | |||
225 | (void) memcpy(& draw->machine.Imms, &immediates[immediate_addr & 0x0f], | ||
226 | sizeof(float) * 4 * vs->num_immediates); | ||
227 | } | ||
228 | |||
229 | |||
230 | void | ||
231 | spu_execute_vertex_shader(struct spu_vs_context *draw, | ||
232 | const struct cell_command_vs *vs) | ||
233 | { | ||
234 | unsigned i; | ||
235 | |||
236 | (void) memcpy(draw->plane, vs->plane, sizeof(float) * 4 * vs->nr_planes); | ||
237 | draw->nr_planes = vs->nr_planes; | ||
238 | draw->vertex_fetch.nr_attrs = vs->nr_attrs; | ||
239 | |||
240 | for (i = 0; i < vs->num_elts; i += 4) { | ||
241 | const unsigned batch_size = MIN2(vs->num_elts - i, 4); | ||
242 | |||
243 | run_vertex_program(draw, & vs->elts[i], batch_size, &vs->vOut[i]); | ||
244 | } | ||
245 | } | ||
diff --git a/src/gallium/drivers/cell/spu/spu_vertex_shader.h b/src/gallium/drivers/cell/spu/spu_vertex_shader.h deleted file mode 100644 index 4c74f5e74d5..00000000000 --- a/src/gallium/drivers/cell/spu/spu_vertex_shader.h +++ /dev/null | |||
@@ -1,66 +0,0 @@ | |||
1 | #ifndef SPU_VERTEX_SHADER_H | ||
2 | #define SPU_VERTEX_SHADER_H | ||
3 | |||
4 | #include "cell/common.h" | ||
5 | #include "pipe/p_format.h" | ||
6 | #include "spu_exec.h" | ||
7 | |||
8 | struct spu_vs_context; | ||
9 | |||
10 | typedef void (*spu_full_fetch_func)( struct spu_vs_context *draw, | ||
11 | struct spu_exec_machine *machine, | ||
12 | const unsigned *elts, | ||
13 | unsigned count ); | ||
14 | |||
15 | struct spu_vs_context { | ||
16 | struct pipe_viewport_state viewport; | ||
17 | |||
18 | struct { | ||
19 | uint64_t src_ptr[PIPE_MAX_ATTRIBS]; | ||
20 | unsigned pitch[PIPE_MAX_ATTRIBS]; | ||
21 | unsigned size[PIPE_MAX_ATTRIBS]; | ||
22 | unsigned code_offset[PIPE_MAX_ATTRIBS]; | ||
23 | unsigned nr_attrs; | ||
24 | boolean dirty; | ||
25 | |||
26 | spu_full_fetch_func fetch_func; | ||
27 | void *code; | ||
28 | } vertex_fetch; | ||
29 | |||
30 | /* Clip derived state: | ||
31 | */ | ||
32 | float plane[12][4]; | ||
33 | unsigned nr_planes; | ||
34 | |||
35 | struct spu_exec_machine machine; | ||
36 | const float (*constants)[4]; | ||
37 | |||
38 | unsigned num_vs_outputs; | ||
39 | }; | ||
40 | |||
41 | extern void spu_update_vertex_fetch(struct spu_vs_context *draw); | ||
42 | |||
43 | static INLINE void spu_vertex_fetch(struct spu_vs_context *draw, | ||
44 | struct spu_exec_machine *machine, | ||
45 | const unsigned *elts, | ||
46 | unsigned count) | ||
47 | { | ||
48 | if (draw->vertex_fetch.dirty) { | ||
49 | spu_update_vertex_fetch(draw); | ||
50 | draw->vertex_fetch.dirty = 0; | ||
51 | } | ||
52 | |||
53 | (*draw->vertex_fetch.fetch_func)(draw, machine, elts, count); | ||
54 | } | ||
55 | |||
56 | struct cell_command_vs; | ||
57 | |||
58 | extern void | ||
59 | spu_bind_vertex_shader(struct spu_vs_context *draw, | ||
60 | struct cell_shader_info *vs); | ||
61 | |||
62 | extern void | ||
63 | spu_execute_vertex_shader(struct spu_vs_context *draw, | ||
64 | const struct cell_command_vs *vs); | ||
65 | |||
66 | #endif /* SPU_VERTEX_SHADER_H */ | ||
diff --git a/src/gallium/targets/libgl-xlib/Makefile b/src/gallium/targets/libgl-xlib/Makefile index f8f6c81b3f2..867b2da323b 100644 --- a/src/gallium/targets/libgl-xlib/Makefile +++ b/src/gallium/targets/libgl-xlib/Makefile | |||
@@ -29,7 +29,6 @@ DEFINES += \ | |||
29 | -DGALLIUM_RBUG \ | 29 | -DGALLIUM_RBUG \ |
30 | -DGALLIUM_TRACE \ | 30 | -DGALLIUM_TRACE \ |
31 | -DGALLIUM_GALAHAD | 31 | -DGALLIUM_GALAHAD |
32 | #-DGALLIUM_CELL will be defined by the config */ | ||
33 | 32 | ||
34 | XLIB_TARGET_SOURCES = \ | 33 | XLIB_TARGET_SOURCES = \ |
35 | xlib.c | 34 | xlib.c |
@@ -38,7 +37,6 @@ XLIB_TARGET_SOURCES = \ | |||
38 | XLIB_TARGET_OBJECTS = $(XLIB_TARGET_SOURCES:.c=.o) | 37 | XLIB_TARGET_OBJECTS = $(XLIB_TARGET_SOURCES:.c=.o) |
39 | 38 | ||
40 | 39 | ||
41 | # Note: CELL_SPU_LIB is only defined for cell configs | ||
42 | 40 | ||
43 | LIBS = \ | 41 | LIBS = \ |
44 | $(GALLIUM_DRIVERS) \ | 42 | $(GALLIUM_DRIVERS) \ |
@@ -50,7 +48,6 @@ LIBS = \ | |||
50 | $(TOP)/src/mapi/glapi/libglapi.a \ | 48 | $(TOP)/src/mapi/glapi/libglapi.a \ |
51 | $(TOP)/src/mesa/libmesagallium.a \ | 49 | $(TOP)/src/mesa/libmesagallium.a \ |
52 | $(GALLIUM_AUXILIARIES) \ | 50 | $(GALLIUM_AUXILIARIES) \ |
53 | $(CELL_SPU_LIB) \ | ||
54 | 51 | ||
55 | 52 | ||
56 | # LLVM | 53 | # LLVM |
diff --git a/src/gallium/targets/libgl-xlib/SConscript b/src/gallium/targets/libgl-xlib/SConscript index ad8b0992e46..25a4582d7a3 100644 --- a/src/gallium/targets/libgl-xlib/SConscript +++ b/src/gallium/targets/libgl-xlib/SConscript | |||
@@ -42,11 +42,6 @@ if True: | |||
42 | if env['llvm']: | 42 | if env['llvm']: |
43 | env.Append(CPPDEFINES = ['GALLIUM_LLVMPIPE']) | 43 | env.Append(CPPDEFINES = ['GALLIUM_LLVMPIPE']) |
44 | env.Prepend(LIBS = [llvmpipe]) | 44 | env.Prepend(LIBS = [llvmpipe]) |
45 | |||
46 | if False: | ||
47 | # TODO: Detect Cell SDK | ||
48 | env.Append(CPPDEFINES = 'GALLIUM_CELL') | ||
49 | env.Prepend(LIBS = [cell]) | ||
50 | 45 | ||
51 | # libGL.so.1.5 | 46 | # libGL.so.1.5 |
52 | libgl_1_5 = env.SharedLibrary( | 47 | libgl_1_5 = env.SharedLibrary( |
diff --git a/src/gallium/targets/libgl-xlib/xlib.c b/src/gallium/targets/libgl-xlib/xlib.c index 1a5892b94a0..0ede7e6096b 100644 --- a/src/gallium/targets/libgl-xlib/xlib.c +++ b/src/gallium/targets/libgl-xlib/xlib.c | |||
@@ -42,7 +42,7 @@ | |||
42 | 42 | ||
43 | 43 | ||
44 | /* Helper function to build a subset of a driver stack consisting of | 44 | /* Helper function to build a subset of a driver stack consisting of |
45 | * one of the software rasterizers (cell, llvmpipe, softpipe) and the | 45 | * one of the software rasterizers (llvmpipe, softpipe) and the |
46 | * xlib winsys. | 46 | * xlib winsys. |
47 | */ | 47 | */ |
48 | static struct pipe_screen * | 48 | static struct pipe_screen * |