summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorKai Wasserbäch <kai@dev.carbon-project.org>2011-11-29 18:17:47 +0100
committerJosé Fonseca <jfonseca@vmware.com>2011-11-29 20:26:53 +0000
commitccd4d4367f2b4e5aebfc59b832599812a4a1c7d8 (patch)
treeb5e510235d4be8dd4644a72c721693eaae02c1b5
parent09e67706e9a74600e16fe012ecfd192b0d31960a (diff)
gallium/cell: Remove the driver.
Complicates Gallium3D development and doesn't seem to have active users. Signed-off-by: Kai Wasserbäch <kai@dev.carbon-project.org> Signed-off-by: José Fonseca <jfonseca@vmware.com>
-rw-r--r--Makefile2
-rw-r--r--common.py2
-rw-r--r--configs/linux-cell71
-rw-r--r--configs/linux-cell-debug10
-rw-r--r--docs/cell.html138
-rw-r--r--docs/contents.html3
-rw-r--r--docs/news.html2
-rw-r--r--docs/relnotes-7.12.html2
-rw-r--r--docs/relnotes-7.5.html2
-rw-r--r--docs/sourcetree.html1
-rw-r--r--doxygen/gallium.doc1
-rw-r--r--src/gallium/auxiliary/Makefile.sources1
-rw-r--r--src/gallium/auxiliary/rtasm/rtasm_ppc_spe.c1067
-rw-r--r--src/gallium/auxiliary/rtasm/rtasm_ppc_spe.h433
-rw-r--r--src/gallium/auxiliary/target-helpers/inline_sw_helper.h15
-rw-r--r--src/gallium/drivers/cell/Makefile12
-rw-r--r--src/gallium/drivers/cell/common.h377
-rw-r--r--src/gallium/drivers/cell/ppu/Makefile86
-rw-r--r--src/gallium/drivers/cell/ppu/cell_batch.c260
-rw-r--r--src/gallium/drivers/cell/ppu/cell_batch.h54
-rw-r--r--src/gallium/drivers/cell/ppu/cell_clear.c93
-rw-r--r--src/gallium/drivers/cell/ppu/cell_clear.h42
-rw-r--r--src/gallium/drivers/cell/ppu/cell_context.c190
-rw-r--r--src/gallium/drivers/cell/ppu/cell_context.h210
-rw-r--r--src/gallium/drivers/cell/ppu/cell_draw_arrays.c113
-rw-r--r--src/gallium/drivers/cell/ppu/cell_draw_arrays.h36
-rw-r--r--src/gallium/drivers/cell/ppu/cell_fence.c172
-rw-r--r--src/gallium/drivers/cell/ppu/cell_fence.h60
-rw-r--r--src/gallium/drivers/cell/ppu/cell_flush.c109
-rw-r--r--src/gallium/drivers/cell/ppu/cell_flush.h45
-rw-r--r--src/gallium/drivers/cell/ppu/cell_gen_fp.c2036
-rw-r--r--src/gallium/drivers/cell/ppu/cell_gen_fp.h42
-rw-r--r--src/gallium/drivers/cell/ppu/cell_gen_fragment.c2189
-rw-r--r--src/gallium/drivers/cell/ppu/cell_gen_fragment.h38
-rw-r--r--src/gallium/drivers/cell/ppu/cell_pipe_state.c473
-rw-r--r--src/gallium/drivers/cell/ppu/cell_pipe_state.h39
-rw-r--r--src/gallium/drivers/cell/ppu/cell_public.h10
-rw-r--r--src/gallium/drivers/cell/ppu/cell_render.c211
-rw-r--r--src/gallium/drivers/cell/ppu/cell_render.h39
-rw-r--r--src/gallium/drivers/cell/ppu/cell_screen.c221
-rw-r--r--src/gallium/drivers/cell/ppu/cell_screen.h55
-rw-r--r--src/gallium/drivers/cell/ppu/cell_spu.c219
-rw-r--r--src/gallium/drivers/cell/ppu/cell_spu.h79
-rw-r--r--src/gallium/drivers/cell/ppu/cell_state.h65
-rw-r--r--src/gallium/drivers/cell/ppu/cell_state_derived.c170
-rw-r--r--src/gallium/drivers/cell/ppu/cell_state_emit.c343
-rw-r--r--src/gallium/drivers/cell/ppu/cell_state_emit.h36
-rw-r--r--src/gallium/drivers/cell/ppu/cell_state_per_fragment.c1432
-rw-r--r--src/gallium/drivers/cell/ppu/cell_state_per_fragment.h39
-rw-r--r--src/gallium/drivers/cell/ppu/cell_state_shader.c229
-rw-r--r--src/gallium/drivers/cell/ppu/cell_state_vertex.c120
-rw-r--r--src/gallium/drivers/cell/ppu/cell_surface.c37
-rw-r--r--src/gallium/drivers/cell/ppu/cell_surface.h42
-rw-r--r--src/gallium/drivers/cell/ppu/cell_texture.c644
-rw-r--r--src/gallium/drivers/cell/ppu/cell_texture.h102
-rw-r--r--src/gallium/drivers/cell/ppu/cell_vbuf.c332
-rw-r--r--src/gallium/drivers/cell/ppu/cell_vbuf.h38
-rw-r--r--src/gallium/drivers/cell/ppu/cell_vertex_fetch.c346
-rw-r--r--src/gallium/drivers/cell/ppu/cell_vertex_shader.c145
-rw-r--r--src/gallium/drivers/cell/spu/.gitignore1
-rw-r--r--src/gallium/drivers/cell/spu/Makefile83
-rw-r--r--src/gallium/drivers/cell/spu/spu_colorpack.h145
-rw-r--r--src/gallium/drivers/cell/spu/spu_command.c810
-rw-r--r--src/gallium/drivers/cell/spu/spu_command.h35
-rw-r--r--src/gallium/drivers/cell/spu/spu_dcache.c145
-rw-r--r--src/gallium/drivers/cell/spu/spu_dcache.h37
-rw-r--r--src/gallium/drivers/cell/spu/spu_exec.c1870
-rw-r--r--src/gallium/drivers/cell/spu/spu_exec.h173
-rw-r--r--src/gallium/drivers/cell/spu/spu_funcs.c173
-rw-r--r--src/gallium/drivers/cell/spu/spu_funcs.h35
-rw-r--r--src/gallium/drivers/cell/spu/spu_main.c117
-rw-r--r--src/gallium/drivers/cell/spu/spu_main.h269
-rw-r--r--src/gallium/drivers/cell/spu/spu_per_fragment_op.c631
-rw-r--r--src/gallium/drivers/cell/spu/spu_per_fragment_op.h44
-rw-r--r--src/gallium/drivers/cell/spu/spu_render.c356
-rw-r--r--src/gallium/drivers/cell/spu/spu_render.h38
-rw-r--r--src/gallium/drivers/cell/spu/spu_shuffle.h186
-rw-r--r--src/gallium/drivers/cell/spu/spu_texture.c641
-rw-r--r--src/gallium/drivers/cell/spu/spu_texture.h67
-rw-r--r--src/gallium/drivers/cell/spu/spu_tgsi_exec.h158
-rw-r--r--src/gallium/drivers/cell/spu/spu_tile.c126
-rw-r--r--src/gallium/drivers/cell/spu/spu_tile.h75
-rw-r--r--src/gallium/drivers/cell/spu/spu_tri.c843
-rw-r--r--src/gallium/drivers/cell/spu/spu_tri.h37
-rw-r--r--src/gallium/drivers/cell/spu/spu_util.c77
-rw-r--r--src/gallium/drivers/cell/spu/spu_vertex_fetch.c146
-rw-r--r--src/gallium/drivers/cell/spu/spu_vertex_shader.c245
-rw-r--r--src/gallium/drivers/cell/spu/spu_vertex_shader.h66
-rw-r--r--src/gallium/targets/libgl-xlib/Makefile3
-rw-r--r--src/gallium/targets/libgl-xlib/SConscript5
-rw-r--r--src/gallium/targets/libgl-xlib/xlib.c2
91 files changed, 9 insertions, 21010 deletions
diff --git a/Makefile b/Makefile
index d65d4c4968e..cf6555c782d 100644
--- a/Makefile
+++ b/Makefile
@@ -112,8 +112,6 @@ linux \
112linux-i965 \ 112linux-i965 \
113linux-alpha \ 113linux-alpha \
114linux-alpha-static \ 114linux-alpha-static \
115linux-cell \
116linux-cell-debug \
117linux-debug \ 115linux-debug \
118linux-dri \ 116linux-dri \
119linux-dri-debug \ 117linux-dri-debug \
diff --git a/common.py b/common.py
index 5e2967fc59b..5578f72af1f 100644
--- a/common.py
+++ b/common.py
@@ -83,7 +83,7 @@ def AddOptions(opts):
83 opts.Add(EnumOption('machine', 'use machine-specific assembly code', default_machine, 83 opts.Add(EnumOption('machine', 'use machine-specific assembly code', default_machine,
84 allowed_values=('generic', 'ppc', 'x86', 'x86_64'))) 84 allowed_values=('generic', 'ppc', 'x86', 'x86_64')))
85 opts.Add(EnumOption('platform', 'target platform', host_platform, 85 opts.Add(EnumOption('platform', 'target platform', host_platform,
86 allowed_values=('linux', 'cell', 'windows', 'darwin', 'cygwin', 'sunos', 'freebsd8'))) 86 allowed_values=('linux', 'windows', 'darwin', 'cygwin', 'sunos', 'freebsd8')))
87 opts.Add(BoolOption('embedded', 'embedded build', 'no')) 87 opts.Add(BoolOption('embedded', 'embedded build', 'no'))
88 opts.Add('toolchain', 'compiler toolchain', default_toolchain) 88 opts.Add('toolchain', 'compiler toolchain', default_toolchain)
89 opts.Add(BoolOption('gles', 'EXPERIMENTAL: enable OpenGL ES support', 'no')) 89 opts.Add(BoolOption('gles', 'EXPERIMENTAL: enable OpenGL ES support', 'no'))
diff --git a/configs/linux-cell b/configs/linux-cell
deleted file mode 100644
index 7f38da971d1..00000000000
--- a/configs/linux-cell
+++ /dev/null
@@ -1,71 +0,0 @@
1# linux-cell (non-debug build)
2
3include $(TOP)/configs/linux
4
5CONFIG_NAME = linux-cell
6
7
8# Omiting other gallium drivers:
9GALLIUM_DRIVERS_DIRS = cell softpipe trace rbug identity
10
11
12# Compiler and flags
13CC = ppu32-gcc
14CXX = ppu32-g++
15HOST_CC = gcc
16APP_CC = gcc
17APP_CXX = g++
18
19OPT_FLAGS = -O3
20
21# Cell SDK location
22## For SDK 2.1: (plus, remove -DSPU_MAIN_PARAM_LONG_LONG below)
23#SDK = /opt/ibm/cell-sdk/prototype/sysroot/usr
24## For SDK 3.0:
25SDK = /opt/cell/sdk/usr
26
27
28
29COMMON_C_CPP_FLAGS = $(OPT_FLAGS) -Wall -Winline \
30 -fPIC -m32 -mabi=altivec -maltivec \
31 -I. -I$(SDK)/include \
32 -DGALLIUM_CELL $(DEFINES)
33
34CFLAGS = $(COMMON_C_CPP_FLAGS) -Wmissing-prototypes -std=c99
35
36CXXFLAGS = $(COMMON_C_CPP_FLAGS)
37
38
39SRC_DIRS = glsl mapi/glapi mapi/vgapi mesa \
40 gallium gallium/winsys gallium/targets glu
41
42# Build no traditional Mesa drivers:
43DRIVER_DIRS =
44
45
46MKDEP_OPTIONS = -fdepend -Y
47
48
49GL_LIB_DEPS = $(EXTRA_LIB_PATH) -lX11 -lXext -lm -lpthread \
50 -L$(SDK)/lib -m32 -Wl,-m,elf32ppc -R$(SDK)/lib -lspe2
51
52
53CELL_SPU_LIB = $(TOP)/src/gallium/drivers/cell/spu/g3d_spu.a
54
55
56### SPU stuff
57
58SPU_CC = spu-gcc
59
60SPU_CFLAGS = $(OPT_FLAGS) -W -Wall -Winline -Wmissing-prototypes -Wno-main \
61 -I. -I$(SDK)/spu/include -I$(TOP)/src/mesa/ $(INCLUDE_DIRS) \
62 -DSPU_MAIN_PARAM_LONG_LONG \
63 -include spu_intrinsics.h
64
65SPU_LFLAGS = -L$(SDK)/spu/lib -Wl,-N -lmisc -lm
66
67SPU_AR = ppu-ar
68SPU_AR_FLAGS = -qcs
69
70SPU_EMBED = ppu32-embedspu
71SPU_EMBED_FLAGS = -m32
diff --git a/configs/linux-cell-debug b/configs/linux-cell-debug
deleted file mode 100644
index 42f3245edc9..00000000000
--- a/configs/linux-cell-debug
+++ /dev/null
@@ -1,10 +0,0 @@
1# linux-cell-debug
2
3include $(TOP)/configs/linux-cell
4
5# just override name and OPT_FLAGS here:
6
7CONFIG_NAME = linux-cell-debug
8
9OPT_FLAGS = -g -DDEBUG
10
diff --git a/docs/cell.html b/docs/cell.html
deleted file mode 100644
index 30626b60b42..00000000000
--- a/docs/cell.html
+++ /dev/null
@@ -1,138 +0,0 @@
1<HTML>
2
3<TITLE>Cell Driver</TITLE>
4
5<link rel="stylesheet" type="text/css" href="mesa.css"></head>
6
7<BODY>
8
9<H1>Mesa/Gallium Cell Driver</H1>
10
11<p>
12The Mesa
13<a href="http://en.wikipedia.org/wiki/Cell_%28microprocessor%29" target="_parent">Cell</a>
14driver is part of the
15<a href="http://wiki.freedesktop.org/wiki/Software/gallium" target="_parent">Gallium3D</a>
16architecture.
17Tungsten Graphics did the original implementation of the Cell driver.
18</p>
19
20
21<H2>Source Code</H2>
22
23<p>
24The latest Cell driver source code is on the master branch of the Mesa
25git repository.
26</p>
27<p>
28To build the driver you'll need the IBM Cell SDK (version 2.1 or 3.0).
29To use the driver you'll need a Cell system, such as a PS3 running Linux,
30or the Cell Simulator (untested, though).
31</p>
32
33<p>
34If using Cell SDK 2.1, see the configs/linux-cell file for some
35special changes.
36</p>
37
38<p>
39To compile the code, run <code>make linux-cell</code>.
40Or to build in debug mode, run <code>make linux-cell-debug</code>.
41</p>
42
43<p>
44To use the library, make sure your current directory is the top of the
45Mesa tree, then set <code>LD_LIBRARY_PATH</code> like this:
46<pre>
47 export LD_LIBRARY_PATH=$PWD/lib/gallium:$PWD/lib/
48</pre>
49
50<p>
51Verify that the Cell driver is being used by running
52<code>progs/xdemos/glxinfo</code> and looking for:
53<pre>
54 OpenGL renderer string: Gallium 0.3, Cell on Xlib
55</pre>
56
57
58<H2>Driver Implementation Summary</H2>
59
60<p>
61Rasterization is parallelized across the SPUs in a tiled-based manner.
62Batches of transformed triangles are sent to the SPUs (actually, pulled by from
63main memory by the SPUs).
64Each SPU loops over a set of 32x32-pixel screen tiles, rendering the triangles
65into each tile.
66Because of the limited SPU memory, framebuffer tiles are paged in/out of
67SPU local store as needed.
68Similarly, textures are tiled and brought into local store as needed.
69</p>
70
71
72<H2>Status</H2>
73
74<p>
75As of October 2008, the driver runs quite a few OpenGL demos.
76Features that work include:
77</p>
78<ul>
79<li>Point/line/triangle rendering, glDrawPixels
80<li>2D, NPOT and cube texture maps with nearest/linear/mipmap filtering
81<li>Dynamic SPU code generation for fragment shaders, but not complete
82<li>Dynamic SPU code generation for fragment ops (blend, Z-test, etc), but not complete
83<li>Dynamic PPU/PPC code generation for vertex shaders, but not complete
84</ul>
85<p>
86Performance has recently improved with the addition of PPC code generation
87for vertex shaders, but the code quality isn't too great yet.
88</p>
89<p>
90Another bottleneck is SwapBuffers. It may be the limiting factor for
91many simple GL tests.
92</p>
93
94
95
96<H2>Debug Options</H2>
97
98<p>
99The CELL_DEBUG env var can be set to a comma-separated list of one or
100more of the following debug options:
101</p>
102<ul>
103<li><b>checker</b> - use a different background clear color for each SPU.
104 This lets you see which SPU is rendering which screen tiles.
105<li><b>sync</b> - wait/synchronize after each DMA transfer
106<li><b>asm</b> - print generated SPU assembly code to stdout
107<li><b>fragops</b> - emit fragment ops debug messages
108<li><b>fragopfallback</b> - don't use codegen for fragment ops
109<li><b>cmd</b> - print SPU commands as their received
110<li><b>cache</b> - print texture cache statistics when program exits
111</ul>
112<p>
113Note that some of these options may only work for linux-cell-debug builds.
114</p>
115
116<p>
117If the GALLIUM_NOPPC env var is set, PPC code generation will not be used
118and vertex shaders will be run with the TGSI interpreter.
119</p>
120<p>
121If the GALLIUM_NOCELL env var is set, the softpipe driver will be used
122intead of the Cell driver.
123This is useful for comparison/validation.
124</p>
125
126
127
128<H2>Contributing</H2>
129
130<p>
131If you're interested in contributing to the effort, familiarize yourself
132with the code, join the <a href="lists.html">mesa3d-dev mailing list</a>,
133and describe what you'd like to do.
134</p>
135
136
137</BODY>
138</HTML>
diff --git a/docs/contents.html b/docs/contents.html
index 8882e731879..e3cea2a7ce3 100644
--- a/docs/contents.html
+++ b/docs/contents.html
@@ -78,8 +78,7 @@ a:visited {
78<li><a href="devinfo.html" target="MainFrame">Development Notes</a> 78<li><a href="devinfo.html" target="MainFrame">Development Notes</a>
79<li><a href="sourcedocs.html" target="MainFrame">Source Documentation</a> 79<li><a href="sourcedocs.html" target="MainFrame">Source Documentation</a>
80<li><a href="subset.html" target="MainFrame">Mesa Subset Driver</a> 80<li><a href="subset.html" target="MainFrame">Mesa Subset Driver</a>
81<LI><A HREF="dispatch.html" target="MainFrame">GL Dispatch</A> 81<li><a HREF="dispatch.html" target="MainFrame">GL Dispatch</a>
82<li><a href="cell.html" target="MainFrame">Cell Driver</A>
83</ul> 82</ul>
84 83
85<b>Links</b> 84<b>Links</b>
diff --git a/docs/news.html b/docs/news.html
index 91284922880..d6a2aa8e6dd 100644
--- a/docs/news.html
+++ b/docs/news.html
@@ -217,7 +217,7 @@ This is a bug-fix release.
217<h2>January 24, 2008</h2> 217<h2>January 24, 2008</h2>
218 218
219<p> 219<p>
220Added a new page describing the <a href="cell.html">Mesa Cell driver</a>. 220Added a new page describing the Mesa Cell driver.
221</p> 221</p>
222 222
223 223
diff --git a/docs/relnotes-7.12.html b/docs/relnotes-7.12.html
index 0d2211358cf..393b1124e58 100644
--- a/docs/relnotes-7.12.html
+++ b/docs/relnotes-7.12.html
@@ -74,6 +74,8 @@ tbd
74 by the gallium drivers for this hardware.</li> 74 by the gallium drivers for this hardware.</li>
75<li>Removed the i965g driver, which was broken and with nobody in sight to fix 75<li>Removed the i965g driver, which was broken and with nobody in sight to fix
76 the situation</li> 76 the situation</li>
77<li>Removed the Gallium cell driver, it was just a burden on Gallium
78 development and nobody seems to use it.</li>
77</ul> 79</ul>
78 80
79 81
diff --git a/docs/relnotes-7.5.html b/docs/relnotes-7.5.html
index 56deca6a86c..a25ca8efc11 100644
--- a/docs/relnotes-7.5.html
+++ b/docs/relnotes-7.5.html
@@ -61,7 +61,7 @@ baa7a1e850b6e39bae58868fd0684004 MesaGLUT-7.5.tar.bz2
61 <ul> 61 <ul>
62 <li>softpipe - a software/reference driver 62 <li>softpipe - a software/reference driver
63 <li>i915 - Intel 915/945 driver 63 <li>i915 - Intel 915/945 driver
64 <li><a href="cell.html">Cell</a> - IBM/Sony/Toshiba Cell processor driver 64 <li>Cell - IBM/Sony/Toshiba Cell processor driver
65 <li>nouveau (for NVIDIA GPUs) and R300 for (AMD/ATI R300). 65 <li>nouveau (for NVIDIA GPUs) and R300 for (AMD/ATI R300).
66 <b>PLEASE NOTE: these drivers are incomplete and still under development. 66 <b>PLEASE NOTE: these drivers are incomplete and still under development.
67 It's probably NOT worthwhile to report any bugs unless you have patches. 67 It's probably NOT worthwhile to report any bugs unless you have patches.
diff --git a/docs/sourcetree.html b/docs/sourcetree.html
index 3f100df49e1..e26c653abbe 100644
--- a/docs/sourcetree.html
+++ b/docs/sourcetree.html
@@ -86,7 +86,6 @@ each directory.
86 interfaces 86 interfaces
87 <li><b>drivers</b> - Gallium3D device drivers 87 <li><b>drivers</b> - Gallium3D device drivers
88 <ul> 88 <ul>
89 <li><b>cell</b> - Driver for Cell processor.
90 <li><b>i915</b> - Driver for Intel i915/i945. 89 <li><b>i915</b> - Driver for Intel i915/i945.
91 <li><b>llvmpipe</b> - Software driver using LLVM for runtime code generation. 90 <li><b>llvmpipe</b> - Software driver using LLVM for runtime code generation.
92 <li><b>nv*</b> - Drivers for NVIDIA GPUs. 91 <li><b>nv*</b> - Drivers for NVIDIA GPUs.
diff --git a/doxygen/gallium.doc b/doxygen/gallium.doc
index f0ff36075a5..e81b02e1aa6 100644
--- a/doxygen/gallium.doc
+++ b/doxygen/gallium.doc
@@ -34,7 +34,6 @@
34 - Pipe drivers: 34 - Pipe drivers:
35 - \ref softpipe 35 - \ref softpipe
36 - \ref i915g 36 - \ref i915g
37 - Cell driver (cell_context.h, cell_winsys.h)
38 - \ref failover 37 - \ref failover
39 38
40 - Winsys drivers: 39 - Winsys drivers:
diff --git a/src/gallium/auxiliary/Makefile.sources b/src/gallium/auxiliary/Makefile.sources
index 15de20cb3a3..5c65533308c 100644
--- a/src/gallium/auxiliary/Makefile.sources
+++ b/src/gallium/auxiliary/Makefile.sources
@@ -63,7 +63,6 @@ C_SOURCES := \
63 rtasm/rtasm_cpu.c \ 63 rtasm/rtasm_cpu.c \
64 rtasm/rtasm_execmem.c \ 64 rtasm/rtasm_execmem.c \
65 rtasm/rtasm_ppc.c \ 65 rtasm/rtasm_ppc.c \
66 rtasm/rtasm_ppc_spe.c \
67 rtasm/rtasm_x86sse.c \ 66 rtasm/rtasm_x86sse.c \
68 tgsi/tgsi_build.c \ 67 tgsi/tgsi_build.c \
69 tgsi/tgsi_dump.c \ 68 tgsi/tgsi_dump.c \
diff --git a/src/gallium/auxiliary/rtasm/rtasm_ppc_spe.c b/src/gallium/auxiliary/rtasm/rtasm_ppc_spe.c
deleted file mode 100644
index 53a0e722cff..00000000000
--- a/src/gallium/auxiliary/rtasm/rtasm_ppc_spe.c
+++ /dev/null
@@ -1,1067 +0,0 @@
1/*
2 * (C) Copyright IBM Corporation 2008
3 * All Rights Reserved.
4 *
5 * Permission is hereby granted, free of charge, to any person obtaining a
6 * copy of this software and associated documentation files (the "Software"),
7 * to deal in the Software without restriction, including without limitation
8 * on the rights to use, copy, modify, merge, publish, distribute, sub
9 * license, and/or sell copies of the Software, and to permit persons to whom
10 * the Software is furnished to do so, subject to the following conditions:
11 *
12 * The above copyright notice and this permission notice (including the next
13 * paragraph) shall be included in all copies or substantial portions of the
14 * Software.
15 *
16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18 * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
19 * AUTHORS, COPYRIGHT HOLDERS, AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
20 * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
21 * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
22 * USE OR OTHER DEALINGS IN THE SOFTWARE.
23 */
24
25/**
26 * \file
27 * Real-time assembly generation interface for Cell B.E. SPEs.
28 *
29 * \author Ian Romanick <idr@us.ibm.com>
30 * \author Brian Paul
31 */
32
33
34#include <stdio.h>
35#include "pipe/p_compiler.h"
36#include "util/u_memory.h"
37#include "rtasm_ppc_spe.h"
38
39
40#ifdef GALLIUM_CELL
41/**
42 * SPE instruction types
43 *
44 * There are 6 primary instruction encodings used on the Cell's SPEs. Each of
45 * the following unions encodes one type.
46 *
47 * \bug
48 * If, at some point, we start generating SPE code from a little-endian host
49 * these unions will not work.
50 */
51/*@{*/
52/**
53 * Encode one output register with two input registers
54 */
55union spe_inst_RR {
56 uint32_t bits;
57 struct {
58 unsigned op:11;
59 unsigned rB:7;
60 unsigned rA:7;
61 unsigned rT:7;
62 } inst;
63};
64
65
66/**
67 * Encode one output register with three input registers
68 */
69union spe_inst_RRR {
70 uint32_t bits;
71 struct {
72 unsigned op:4;
73 unsigned rT:7;
74 unsigned rB:7;
75 unsigned rA:7;
76 unsigned rC:7;
77 } inst;
78};
79
80
81/**
82 * Encode one output register with one input reg. and a 7-bit signed immed
83 */
84union spe_inst_RI7 {
85 uint32_t bits;
86 struct {
87 unsigned op:11;
88 unsigned i7:7;
89 unsigned rA:7;
90 unsigned rT:7;
91 } inst;
92};
93
94
95/**
96 * Encode one output register with one input reg. and an 8-bit signed immed
97 */
98union spe_inst_RI8 {
99 uint32_t bits;
100 struct {
101 unsigned op:10;
102 unsigned i8:8;
103 unsigned rA:7;
104 unsigned rT:7;
105 } inst;
106};
107
108
109/**
110 * Encode one output register with one input reg. and a 10-bit signed immed
111 */
112union spe_inst_RI10 {
113 uint32_t bits;
114 struct {
115 unsigned op:8;
116 unsigned i10:10;
117 unsigned rA:7;
118 unsigned rT:7;
119 } inst;
120};
121
122
123/**
124 * Encode one output register with a 16-bit signed immediate
125 */
126union spe_inst_RI16 {
127 uint32_t bits;
128 struct {
129 unsigned op:9;
130 unsigned i16:16;
131 unsigned rT:7;
132 } inst;
133};
134
135
136/**
137 * Encode one output register with a 18-bit signed immediate
138 */
139union spe_inst_RI18 {
140 uint32_t bits;
141 struct {
142 unsigned op:7;
143 unsigned i18:18;
144 unsigned rT:7;
145 } inst;
146};
147/*@}*/
148
149
150static void
151indent(const struct spe_function *p)
152{
153 int i;
154 for (i = 0; i < p->indent; i++) {
155 putchar(' ');
156 }
157}
158
159
160static const char *
161rem_prefix(const char *longname)
162{
163 return longname + 4;
164}
165
166
167static const char *
168reg_name(int reg)
169{
170 switch (reg) {
171 case SPE_REG_SP:
172 return "$sp";
173 case SPE_REG_RA:
174 return "$lr";
175 default:
176 {
177 /* cycle through four buffers to handle multiple calls per printf */
178 static char buf[4][10];
179 static int b = 0;
180 b = (b + 1) % 4;
181 sprintf(buf[b], "$%d", reg);
182 return buf[b];
183 }
184 }
185}
186
187
188static void
189emit_instruction(struct spe_function *p, uint32_t inst_bits)
190{
191 if (!p->store)
192 return; /* out of memory, drop the instruction */
193
194 if (p->num_inst == p->max_inst) {
195 /* allocate larger buffer */
196 uint32_t *newbuf;
197 p->max_inst *= 2; /* 2x larger */
198 newbuf = align_malloc(p->max_inst * SPE_INST_SIZE, 16);
199 if (newbuf) {
200 memcpy(newbuf, p->store, p->num_inst * SPE_INST_SIZE);
201 }
202 align_free(p->store);
203 p->store = newbuf;
204 if (!p->store) {
205 /* out of memory */
206 p->num_inst = 0;
207 return;
208 }
209 }
210
211 p->store[p->num_inst++] = inst_bits;
212}
213
214
215
216static void emit_RR(struct spe_function *p, unsigned op, int rT,
217 int rA, int rB, const char *name)
218{
219 union spe_inst_RR inst;
220 inst.inst.op = op;
221 inst.inst.rB = rB;
222 inst.inst.rA = rA;
223 inst.inst.rT = rT;
224 emit_instruction(p, inst.bits);
225 if (p->print) {
226 indent(p);
227 printf("%s\t%s, %s, %s\n",
228 rem_prefix(name), reg_name(rT), reg_name(rA), reg_name(rB));
229 }
230}
231
232
233static void emit_RRR(struct spe_function *p, unsigned op, int rT,
234 int rA, int rB, int rC, const char *name)
235{
236 union spe_inst_RRR inst;
237 inst.inst.op = op;
238 inst.inst.rT = rT;
239 inst.inst.rB = rB;
240 inst.inst.rA = rA;
241 inst.inst.rC = rC;
242 emit_instruction(p, inst.bits);
243 if (p->print) {
244 indent(p);
245 printf("%s\t%s, %s, %s, %s\n", rem_prefix(name), reg_name(rT),
246 reg_name(rA), reg_name(rB), reg_name(rC));
247 }
248}
249
250
251static void emit_RI7(struct spe_function *p, unsigned op, int rT,
252 int rA, int imm, const char *name)
253{
254 union spe_inst_RI7 inst;
255 inst.inst.op = op;
256 inst.inst.i7 = imm;
257 inst.inst.rA = rA;
258 inst.inst.rT = rT;
259 emit_instruction(p, inst.bits);
260 if (p->print) {
261 indent(p);
262 printf("%s\t%s, %s, 0x%x\n",
263 rem_prefix(name), reg_name(rT), reg_name(rA), imm);
264 }
265}
266
267
268
269static void emit_RI8(struct spe_function *p, unsigned op, int rT,
270 int rA, int imm, const char *name)
271{
272 union spe_inst_RI8 inst;
273 inst.inst.op = op;
274 inst.inst.i8 = imm;
275 inst.inst.rA = rA;
276 inst.inst.rT = rT;
277 emit_instruction(p, inst.bits);
278 if (p->print) {
279 indent(p);
280 printf("%s\t%s, %s, 0x%x\n",
281 rem_prefix(name), reg_name(rT), reg_name(rA), imm);
282 }
283}
284
285
286
287static void emit_RI10(struct spe_function *p, unsigned op, int rT,
288 int rA, int imm, const char *name)
289{
290 union spe_inst_RI10 inst;
291 inst.inst.op = op;
292 inst.inst.i10 = imm;
293 inst.inst.rA = rA;
294 inst.inst.rT = rT;
295 emit_instruction(p, inst.bits);
296 if (p->print) {
297 indent(p);
298 printf("%s\t%s, %s, 0x%x\n",
299 rem_prefix(name), reg_name(rT), reg_name(rA), imm);
300 }
301}
302
303
304/** As above, but do range checking on signed immediate value */
305static void emit_RI10s(struct spe_function *p, unsigned op, int rT,
306 int rA, int imm, const char *name)
307{
308 assert(imm <= 511);
309 assert(imm >= -512);
310 emit_RI10(p, op, rT, rA, imm, name);
311}
312
313
314static void emit_RI16(struct spe_function *p, unsigned op, int rT,
315 int imm, const char *name)
316{
317 union spe_inst_RI16 inst;
318 inst.inst.op = op;
319 inst.inst.i16 = imm;
320 inst.inst.rT = rT;
321 emit_instruction(p, inst.bits);
322 if (p->print) {
323 indent(p);
324 printf("%s\t%s, 0x%x\n", rem_prefix(name), reg_name(rT), imm);
325 }
326}
327
328
329static void emit_RI18(struct spe_function *p, unsigned op, int rT,
330 int imm, const char *name)
331{
332 union spe_inst_RI18 inst;
333 inst.inst.op = op;
334 inst.inst.i18 = imm;
335 inst.inst.rT = rT;
336 emit_instruction(p, inst.bits);
337 if (p->print) {
338 indent(p);
339 printf("%s\t%s, 0x%x\n", rem_prefix(name), reg_name(rT), imm);
340 }
341}
342
343
344#define EMIT(_name, _op) \
345void _name (struct spe_function *p) \
346{ \
347 emit_RR(p, _op, 0, 0, 0, __FUNCTION__); \
348}
349
350#define EMIT_(_name, _op) \
351void _name (struct spe_function *p, int rT) \
352{ \
353 emit_RR(p, _op, rT, 0, 0, __FUNCTION__); \
354}
355
356#define EMIT_R(_name, _op) \
357void _name (struct spe_function *p, int rT, int rA) \
358{ \
359 emit_RR(p, _op, rT, rA, 0, __FUNCTION__); \
360}
361
362#define EMIT_RR(_name, _op) \
363void _name (struct spe_function *p, int rT, int rA, int rB) \
364{ \
365 emit_RR(p, _op, rT, rA, rB, __FUNCTION__); \
366}
367
368#define EMIT_RRR(_name, _op) \
369void _name (struct spe_function *p, int rT, int rA, int rB, int rC) \
370{ \
371 emit_RRR(p, _op, rT, rA, rB, rC, __FUNCTION__); \
372}
373
374#define EMIT_RI7(_name, _op) \
375void _name (struct spe_function *p, int rT, int rA, int imm) \
376{ \
377 emit_RI7(p, _op, rT, rA, imm, __FUNCTION__); \
378}
379
380#define EMIT_RI8(_name, _op, bias) \
381void _name (struct spe_function *p, int rT, int rA, int imm) \
382{ \
383 emit_RI8(p, _op, rT, rA, bias - imm, __FUNCTION__); \
384}
385
386#define EMIT_RI10(_name, _op) \
387void _name (struct spe_function *p, int rT, int rA, int imm) \
388{ \
389 emit_RI10(p, _op, rT, rA, imm, __FUNCTION__); \
390}
391
392#define EMIT_RI10s(_name, _op) \
393void _name (struct spe_function *p, int rT, int rA, int imm) \
394{ \
395 emit_RI10s(p, _op, rT, rA, imm, __FUNCTION__); \
396}
397
398#define EMIT_RI16(_name, _op) \
399void _name (struct spe_function *p, int rT, int imm) \
400{ \
401 emit_RI16(p, _op, rT, imm, __FUNCTION__); \
402}
403
404#define EMIT_RI18(_name, _op) \
405void _name (struct spe_function *p, int rT, int imm) \
406{ \
407 emit_RI18(p, _op, rT, imm, __FUNCTION__); \
408}
409
410#define EMIT_I16(_name, _op) \
411void _name (struct spe_function *p, int imm) \
412{ \
413 emit_RI16(p, _op, 0, imm, __FUNCTION__); \
414}
415
416#include "rtasm_ppc_spe.h"
417
418
419
420/**
421 * Initialize an spe_function.
422 * \param code_size initial size of instruction buffer to allocate, in bytes.
423 * If zero, use a default.
424 */
425void spe_init_func(struct spe_function *p, unsigned code_size)
426{
427 uint i;
428
429 if (!code_size)
430 code_size = 64;
431
432 p->num_inst = 0;
433 p->max_inst = code_size / SPE_INST_SIZE;
434 p->store = align_malloc(code_size, 16);
435
436 p->set_count = 0;
437 memset(p->regs, 0, SPE_NUM_REGS * sizeof(p->regs[0]));
438
439 /* Conservatively treat R0 - R2 and R80 - R127 as non-volatile.
440 */
441 p->regs[0] = p->regs[1] = p->regs[2] = 1;
442 for (i = 80; i <= 127; i++) {
443 p->regs[i] = 1;
444 }
445
446 p->print = FALSE;
447 p->indent = 0;
448}
449
450
451void spe_release_func(struct spe_function *p)
452{
453 assert(p->num_inst <= p->max_inst);
454 if (p->store != NULL) {
455 align_free(p->store);
456 }
457 p->store = NULL;
458}
459
460
461/** Return current code size in bytes. */
462unsigned spe_code_size(const struct spe_function *p)
463{
464 return p->num_inst * SPE_INST_SIZE;
465}
466
467
468/**
469 * Allocate a SPE register.
470 * \return register index or -1 if none left.
471 */
472int spe_allocate_available_register(struct spe_function *p)
473{
474 unsigned i;
475 for (i = 0; i < SPE_NUM_REGS; i++) {
476 if (p->regs[i] == 0) {
477 p->regs[i] = 1;
478 return i;
479 }
480 }
481
482 return -1;
483}
484
485
486/**
487 * Mark the given SPE register as "allocated".
488 */
489int spe_allocate_register(struct spe_function *p, int reg)
490{
491 assert(reg < SPE_NUM_REGS);
492 assert(p->regs[reg] == 0);
493 p->regs[reg] = 1;
494 return reg;
495}
496
497
498/**
499 * Mark the given SPE register as "unallocated". Note that this should
500 * only be used on registers allocated in the current register set; an
501 * assertion will fail if an attempt is made to deallocate a register
502 * allocated in an earlier register set.
503 */
504void spe_release_register(struct spe_function *p, int reg)
505{
506 assert(reg >= 0);
507 assert(reg < SPE_NUM_REGS);
508 assert(p->regs[reg] == 1);
509
510 p->regs[reg] = 0;
511}
512
513/**
514 * Start a new set of registers. This can be called if
515 * it will be difficult later to determine exactly what
516 * registers were actually allocated during a code generation
517 * sequence, and you really just want to deallocate all of them.
518 */
519void spe_allocate_register_set(struct spe_function *p)
520{
521 uint i;
522
523 /* Keep track of the set count. If it ever wraps around to 0,
524 * we're in trouble.
525 */
526 p->set_count++;
527 assert(p->set_count > 0);
528
529 /* Increment the allocation count of all registers currently
530 * allocated. Then any registers that are allocated in this set
531 * will be the only ones with a count of 1; they'll all be released
532 * when the register set is released.
533 */
534 for (i = 0; i < SPE_NUM_REGS; i++) {
535 if (p->regs[i] > 0)
536 p->regs[i]++;
537 }
538}
539
540void spe_release_register_set(struct spe_function *p)
541{
542 uint i;
543
544 /* If the set count drops below zero, we're in trouble. */
545 assert(p->set_count > 0);
546 p->set_count--;
547
548 /* Drop the allocation level of all registers. Any allocated
549 * during this register set will drop to 0 and then become
550 * available.
551 */
552 for (i = 0; i < SPE_NUM_REGS; i++) {
553 if (p->regs[i] > 0)
554 p->regs[i]--;
555 }
556}
557
558
559unsigned
560spe_get_registers_used(const struct spe_function *p, ubyte used[])
561{
562 unsigned i, num = 0;
563 /* only count registers in the range available to callers */
564 for (i = 2; i < 80; i++) {
565 if (p->regs[i]) {
566 used[num++] = i;
567 }
568 }
569 return num;
570}
571
572
573void
574spe_print_code(struct spe_function *p, boolean enable)
575{
576 p->print = enable;
577}
578
579
580void
581spe_indent(struct spe_function *p, int spaces)
582{
583 p->indent += spaces;
584}
585
586
587void
588spe_comment(struct spe_function *p, int rel_indent, const char *s)
589{
590 if (p->print) {
591 p->indent += rel_indent;
592 indent(p);
593 p->indent -= rel_indent;
594 printf("# %s\n", s);
595 }
596}
597
598
599/**
600 * Load quad word.
601 * NOTE: offset is in bytes and the least significant 4 bits must be zero!
602 */
603void spe_lqd(struct spe_function *p, int rT, int rA, int offset)
604{
605 const boolean pSave = p->print;
606
607 /* offset must be a multiple of 16 */
608 assert(offset % 16 == 0);
609 /* offset must fit in 10-bit signed int field, after shifting */
610 assert((offset >> 4) <= 511);
611 assert((offset >> 4) >= -512);
612
613 p->print = FALSE;
614 emit_RI10(p, 0x034, rT, rA, offset >> 4, "spe_lqd");
615 p->print = pSave;
616
617 if (p->print) {
618 indent(p);
619 printf("lqd\t%s, %d(%s)\n", reg_name(rT), offset, reg_name(rA));
620 }
621}
622
623
624/**
625 * Store quad word.
626 * NOTE: offset is in bytes and the least significant 4 bits must be zero!
627 */
628void spe_stqd(struct spe_function *p, int rT, int rA, int offset)
629{
630 const boolean pSave = p->print;
631
632 /* offset must be a multiple of 16 */
633 assert(offset % 16 == 0);
634 /* offset must fit in 10-bit signed int field, after shifting */
635 assert((offset >> 4) <= 511);
636 assert((offset >> 4) >= -512);
637
638 p->print = FALSE;
639 emit_RI10(p, 0x024, rT, rA, offset >> 4, "spe_stqd");
640 p->print = pSave;
641
642 if (p->print) {
643 indent(p);
644 printf("stqd\t%s, %d(%s)\n", reg_name(rT), offset, reg_name(rA));
645 }
646}
647
648
649/**
650 * For branch instructions:
651 * \param d if 1, disable interupts if branch is taken
652 * \param e if 1, enable interupts if branch is taken
653 * If d and e are both zero, don't change interupt status (right?)
654 */
655
656/** Branch Indirect to address in rA */
657void spe_bi(struct spe_function *p, int rA, int d, int e)
658{
659 emit_RI7(p, 0x1a8, 0, rA, (d << 5) | (e << 4), __FUNCTION__);
660}
661
662/** Interupt Return */
663void spe_iret(struct spe_function *p, int rA, int d, int e)
664{
665 emit_RI7(p, 0x1aa, 0, rA, (d << 5) | (e << 4), __FUNCTION__);
666}
667
668/** Branch indirect and set link on external data */
669void spe_bisled(struct spe_function *p, int rT, int rA, int d,
670 int e)
671{
672 emit_RI7(p, 0x1ab, rT, rA, (d << 5) | (e << 4), __FUNCTION__);
673}
674
675/** Branch indirect and set link. Save PC in rT, jump to rA. */
676void spe_bisl(struct spe_function *p, int rT, int rA, int d,
677 int e)
678{
679 emit_RI7(p, 0x1a9, rT, rA, (d << 5) | (e << 4), __FUNCTION__);
680}
681
682/** Branch indirect if zero word. If rT.word[0]==0, jump to rA. */
683void spe_biz(struct spe_function *p, int rT, int rA, int d, int e)
684{
685 emit_RI7(p, 0x128, rT, rA, (d << 5) | (e << 4), __FUNCTION__);
686}
687
688/** Branch indirect if non-zero word. If rT.word[0]!=0, jump to rA. */
689void spe_binz(struct spe_function *p, int rT, int rA, int d, int e)
690{
691 emit_RI7(p, 0x129, rT, rA, (d << 5) | (e << 4), __FUNCTION__);
692}
693
694/** Branch indirect if zero halfword. If rT.halfword[1]==0, jump to rA. */
695void spe_bihz(struct spe_function *p, int rT, int rA, int d, int e)
696{
697 emit_RI7(p, 0x12a, rT, rA, (d << 5) | (e << 4), __FUNCTION__);
698}
699
700/** Branch indirect if non-zero halfword. If rT.halfword[1]!=0, jump to rA. */
701void spe_bihnz(struct spe_function *p, int rT, int rA, int d, int e)
702{
703 emit_RI7(p, 0x12b, rT, rA, (d << 5) | (e << 4), __FUNCTION__);
704}
705
706
707/* Hint-for-branch instructions
708 */
709#if 0
710hbr;
711hbra;
712hbrr;
713#endif
714
715
716/* Control instructions
717 */
718#if 0
719stop;
720EMIT_RR (spe_stopd, 0x140);
721EMIT_ (spe_nop, 0x201);
722sync;
723EMIT_ (spe_dsync, 0x003);
724EMIT_R (spe_mfspr, 0x00c);
725EMIT_R (spe_mtspr, 0x10c);
726#endif
727
728
729/**
730 ** Helper / "macro" instructions.
731 ** Use somewhat verbose names as a reminder that these aren't native
732 ** SPE instructions.
733 **/
734
735
736void
737spe_load_float(struct spe_function *p, int rT, float x)
738{
739 if (x == 0.0f) {
740 spe_il(p, rT, 0x0);
741 }
742 else if (x == 0.5f) {
743 spe_ilhu(p, rT, 0x3f00);
744 }
745 else if (x == 1.0f) {
746 spe_ilhu(p, rT, 0x3f80);
747 }
748 else if (x == -1.0f) {
749 spe_ilhu(p, rT, 0xbf80);
750 }
751 else {
752 union {
753 float f;
754 unsigned u;
755 } bits;
756 bits.f = x;
757 spe_ilhu(p, rT, bits.u >> 16);
758 spe_iohl(p, rT, bits.u & 0xffff);
759 }
760}
761
762
763void
764spe_load_int(struct spe_function *p, int rT, int i)
765{
766 if (-32768 <= i && i <= 32767) {
767 spe_il(p, rT, i);
768 }
769 else {
770 spe_ilhu(p, rT, i >> 16);
771 if (i & 0xffff)
772 spe_iohl(p, rT, i & 0xffff);
773 }
774}
775
776void spe_load_uint(struct spe_function *p, int rT, uint ui)
777{
778 /* If the whole value is in the lower 18 bits, use ila, which
779 * doesn't sign-extend. Otherwise, if the two halfwords of
780 * the constant are identical, use ilh. Otherwise, if every byte of
781 * the desired value is 0x00 or 0xff, we can use Form Select Mask for
782 * Bytes Immediate (fsmbi) to load the value in a single instruction.
783 * Otherwise, in the general case, we have to use ilhu followed by iohl.
784 */
785 if ((ui & 0x0003ffff) == ui) {
786 spe_ila(p, rT, ui);
787 }
788 else if ((ui >> 16) == (ui & 0xffff)) {
789 spe_ilh(p, rT, ui & 0xffff);
790 }
791 else if (
792 ((ui & 0x000000ff) == 0 || (ui & 0x000000ff) == 0x000000ff) &&
793 ((ui & 0x0000ff00) == 0 || (ui & 0x0000ff00) == 0x0000ff00) &&
794 ((ui & 0x00ff0000) == 0 || (ui & 0x00ff0000) == 0x00ff0000) &&
795 ((ui & 0xff000000) == 0 || (ui & 0xff000000) == 0xff000000)
796 ) {
797 uint mask = 0;
798 /* fsmbi duplicates each bit in the given mask eight times,
799 * using a 16-bit value to initialize a 16-byte quadword.
800 * Each 4-bit nybble of the mask corresponds to a full word
801 * of the result; look at the value and figure out the mask
802 * (replicated for each word in the quadword), and then
803 * form the "select mask" to get the value.
804 */
805 if ((ui & 0x000000ff) == 0x000000ff) mask |= 0x1111;
806 if ((ui & 0x0000ff00) == 0x0000ff00) mask |= 0x2222;
807 if ((ui & 0x00ff0000) == 0x00ff0000) mask |= 0x4444;
808 if ((ui & 0xff000000) == 0xff000000) mask |= 0x8888;
809 spe_fsmbi(p, rT, mask);
810 }
811 else {
812 /* The general case: this usually uses two instructions, but
813 * may use only one if the low-order 16 bits of each word are 0.
814 */
815 spe_ilhu(p, rT, ui >> 16);
816 if (ui & 0xffff)
817 spe_iohl(p, rT, ui & 0xffff);
818 }
819}
820
821/**
822 * This function is constructed identically to spe_xor_uint() below.
823 * Changes to one should be made in the other.
824 */
825void
826spe_and_uint(struct spe_function *p, int rT, int rA, uint ui)
827{
828 /* If we can, emit a single instruction, either And Byte Immediate
829 * (which uses the same constant across each byte), And Halfword Immediate
830 * (which sign-extends a 10-bit immediate to 16 bits and uses that
831 * across each halfword), or And Word Immediate (which sign-extends
832 * a 10-bit immediate to 32 bits).
833 *
834 * Otherwise, we'll need to use a temporary register.
835 */
836 uint tmp;
837
838 /* If the upper 23 bits are all 0s or all 1s, sign extension
839 * will work and we can use And Word Immediate
840 */
841 tmp = ui & 0xfffffe00;
842 if (tmp == 0xfffffe00 || tmp == 0) {
843 spe_andi(p, rT, rA, ui & 0x000003ff);
844 return;
845 }
846
847 /* If the ui field is symmetric along halfword boundaries and
848 * the upper 7 bits of each halfword are all 0s or 1s, we
849 * can use And Halfword Immediate
850 */
851 tmp = ui & 0xfe00fe00;
852 if ((tmp == 0xfe00fe00 || tmp == 0) && ((ui >> 16) == (ui & 0x0000ffff))) {
853 spe_andhi(p, rT, rA, ui & 0x000003ff);
854 return;
855 }
856
857 /* If the ui field is symmetric in each byte, then we can use
858 * the And Byte Immediate instruction.
859 */
860 tmp = ui & 0x000000ff;
861 if ((ui >> 24) == tmp && ((ui >> 16) & 0xff) == tmp && ((ui >> 8) & 0xff) == tmp) {
862 spe_andbi(p, rT, rA, tmp);
863 return;
864 }
865
866 /* Otherwise, we'll have to use a temporary register. */
867 int tmp_reg = spe_allocate_available_register(p);
868 spe_load_uint(p, tmp_reg, ui);
869 spe_and(p, rT, rA, tmp_reg);
870 spe_release_register(p, tmp_reg);
871}
872
873
874/**
875 * This function is constructed identically to spe_and_uint() above.
876 * Changes to one should be made in the other.
877 */
878void
879spe_xor_uint(struct spe_function *p, int rT, int rA, uint ui)
880{
881 /* If we can, emit a single instruction, either Exclusive Or Byte
882 * Immediate (which uses the same constant across each byte), Exclusive
883 * Or Halfword Immediate (which sign-extends a 10-bit immediate to
884 * 16 bits and uses that across each halfword), or Exclusive Or Word
885 * Immediate (which sign-extends a 10-bit immediate to 32 bits).
886 *
887 * Otherwise, we'll need to use a temporary register.
888 */
889 uint tmp;
890
891 /* If the upper 23 bits are all 0s or all 1s, sign extension
892 * will work and we can use Exclusive Or Word Immediate
893 */
894 tmp = ui & 0xfffffe00;
895 if (tmp == 0xfffffe00 || tmp == 0) {
896 spe_xori(p, rT, rA, ui & 0x000003ff);
897 return;
898 }
899
900 /* If the ui field is symmetric along halfword boundaries and
901 * the upper 7 bits of each halfword are all 0s or 1s, we
902 * can use Exclusive Or Halfword Immediate
903 */
904 tmp = ui & 0xfe00fe00;
905 if ((tmp == 0xfe00fe00 || tmp == 0) && ((ui >> 16) == (ui & 0x0000ffff))) {
906 spe_xorhi(p, rT, rA, ui & 0x000003ff);
907 return;
908 }
909
910 /* If the ui field is symmetric in each byte, then we can use
911 * the Exclusive Or Byte Immediate instruction.
912 */
913 tmp = ui & 0x000000ff;
914 if ((ui >> 24) == tmp && ((ui >> 16) & 0xff) == tmp && ((ui >> 8) & 0xff) == tmp) {
915 spe_xorbi(p, rT, rA, tmp);
916 return;
917 }
918
919 /* Otherwise, we'll have to use a temporary register. */
920 int tmp_reg = spe_allocate_available_register(p);
921 spe_load_uint(p, tmp_reg, ui);
922 spe_xor(p, rT, rA, tmp_reg);
923 spe_release_register(p, tmp_reg);
924}
925
926void
927spe_compare_equal_uint(struct spe_function *p, int rT, int rA, uint ui)
928{
929 /* If the comparison value is 9 bits or less, it fits inside a
930 * Compare Equal Word Immediate instruction.
931 */
932 if ((ui & 0x000001ff) == ui) {
933 spe_ceqi(p, rT, rA, ui);
934 }
935 /* Otherwise, we're going to have to load a word first. */
936 else {
937 int tmp_reg = spe_allocate_available_register(p);
938 spe_load_uint(p, tmp_reg, ui);
939 spe_ceq(p, rT, rA, tmp_reg);
940 spe_release_register(p, tmp_reg);
941 }
942}
943
944void
945spe_compare_greater_uint(struct spe_function *p, int rT, int rA, uint ui)
946{
947 /* If the comparison value is 10 bits or less, it fits inside a
948 * Compare Logical Greater Than Word Immediate instruction.
949 */
950 if ((ui & 0x000003ff) == ui) {
951 spe_clgti(p, rT, rA, ui);
952 }
953 /* Otherwise, we're going to have to load a word first. */
954 else {
955 int tmp_reg = spe_allocate_available_register(p);
956 spe_load_uint(p, tmp_reg, ui);
957 spe_clgt(p, rT, rA, tmp_reg);
958 spe_release_register(p, tmp_reg);
959 }
960}
961
962void
963spe_splat(struct spe_function *p, int rT, int rA)
964{
965 /* Use a temporary, just in case rT == rA */
966 int tmp_reg = spe_allocate_available_register(p);
967 /* Duplicate bytes 0, 1, 2, and 3 across the whole register */
968 spe_ila(p, tmp_reg, 0x00010203);
969 spe_shufb(p, rT, rA, rA, tmp_reg);
970 spe_release_register(p, tmp_reg);
971}
972
973
974void
975spe_complement(struct spe_function *p, int rT, int rA)
976{
977 spe_nor(p, rT, rA, rA);
978}
979
980
981void
982spe_move(struct spe_function *p, int rT, int rA)
983{
984 /* Use different instructions depending on the instruction address
985 * to take advantage of the dual pipelines.
986 */
987 if (p->num_inst & 1)
988 spe_shlqbyi(p, rT, rA, 0); /* odd pipe */
989 else
990 spe_ori(p, rT, rA, 0); /* even pipe */
991}
992
993
994void
995spe_zero(struct spe_function *p, int rT)
996{
997 spe_xor(p, rT, rT, rT);
998}
999
1000
1001void
1002spe_splat_word(struct spe_function *p, int rT, int rA, int word)
1003{
1004 assert(word >= 0);
1005 assert(word <= 3);
1006
1007 if (word == 0) {
1008 int tmp1 = rT;
1009 spe_ila(p, tmp1, 66051);
1010 spe_shufb(p, rT, rA, rA, tmp1);
1011 }
1012 else {
1013 /* XXX review this, we may not need the rotqbyi instruction */
1014 int tmp1 = rT;
1015 int tmp2 = spe_allocate_available_register(p);
1016
1017 spe_ila(p, tmp1, 66051);
1018 spe_rotqbyi(p, tmp2, rA, 4 * word);
1019 spe_shufb(p, rT, tmp2, tmp2, tmp1);
1020
1021 spe_release_register(p, tmp2);
1022 }
1023}
1024
1025/**
1026 * For each 32-bit float element of rA and rB, choose the smaller of the
1027 * two, compositing them into the rT register.
1028 *
1029 * The Float Compare Greater Than (fcgt) instruction will put 1s into
1030 * compare_reg where rA > rB, and 0s where rA <= rB.
1031 *
1032 * Then the Select Bits (selb) instruction will take bits from rA where
1033 * compare_reg is 0, and from rB where compare_reg is 1; i.e., from rA
1034 * where rA <= rB and from rB where rB > rA, which is exactly the
1035 * "min" operation.
1036 *
1037 * The compare_reg could in many cases be the same as rT, unless
1038 * rT == rA || rt == rB. But since this is common in constructions
1039 * like "x = min(x, a)", we always allocate a new register to be safe.
1040 */
1041void
1042spe_float_min(struct spe_function *p, int rT, int rA, int rB)
1043{
1044 int compare_reg = spe_allocate_available_register(p);
1045 spe_fcgt(p, compare_reg, rA, rB);
1046 spe_selb(p, rT, rA, rB, compare_reg);
1047 spe_release_register(p, compare_reg);
1048}
1049
1050/**
1051 * For each 32-bit float element of rA and rB, choose the greater of the
1052 * two, compositing them into the rT register.
1053 *
1054 * The logic is similar to that of spe_float_min() above; the only
1055 * difference is that the registers on spe_selb() have been reversed,
1056 * so that the larger of the two is selected instead of the smaller.
1057 */
1058void
1059spe_float_max(struct spe_function *p, int rT, int rA, int rB)
1060{
1061 int compare_reg = spe_allocate_available_register(p);
1062 spe_fcgt(p, compare_reg, rA, rB);
1063 spe_selb(p, rT, rB, rA, compare_reg);
1064 spe_release_register(p, compare_reg);
1065}
1066
1067#endif /* GALLIUM_CELL */
diff --git a/src/gallium/auxiliary/rtasm/rtasm_ppc_spe.h b/src/gallium/auxiliary/rtasm/rtasm_ppc_spe.h
deleted file mode 100644
index 65d9c774154..00000000000
--- a/src/gallium/auxiliary/rtasm/rtasm_ppc_spe.h
+++ /dev/null
@@ -1,433 +0,0 @@
1/*
2 * (C) Copyright IBM Corporation 2008
3 * All Rights Reserved.
4 *
5 * Permission is hereby granted, free of charge, to any person obtaining a
6 * copy of this software and associated documentation files (the "Software"),
7 * to deal in the Software without restriction, including without limitation
8 * on the rights to use, copy, modify, merge, publish, distribute, sub
9 * license, and/or sell copies of the Software, and to permit persons to whom
10 * the Software is furnished to do so, subject to the following conditions:
11 *
12 * The above copyright notice and this permission notice (including the next
13 * paragraph) shall be included in all copies or substantial portions of the
14 * Software.
15 *
16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18 * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
19 * AUTHORS, COPYRIGHT HOLDERS, AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
20 * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
21 * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
22 * USE OR OTHER DEALINGS IN THE SOFTWARE.
23 */
24
25/**
26 * \file
27 * Real-time assembly generation interface for Cell B.E. SPEs.
28 * For details, see /opt/cell/sdk/docs/arch/SPU_ISA_v1.2_27Jan2007_pub.pdf
29 *
30 * \author Ian Romanick <idr@us.ibm.com>
31 * \author Brian Paul
32 */
33
34#ifndef RTASM_PPC_SPE_H
35#define RTASM_PPC_SPE_H
36
37/** 4 bytes per instruction */
38#define SPE_INST_SIZE 4
39
40/** number of general-purpose SIMD registers */
41#define SPE_NUM_REGS 128
42
43/** Return Address register (aka $lr / Link Register) */
44#define SPE_REG_RA 0
45
46/** Stack Pointer register (aka $sp) */
47#define SPE_REG_SP 1
48
49
50struct spe_function
51{
52 uint32_t *store; /**< instruction buffer */
53 uint num_inst;
54 uint max_inst;
55
56 /**
57 * The "set count" reflects the number of nested register sets
58 * are allowed. In the unlikely case that we exceed the set count,
59 * register allocation will start to be confused, which is critical
60 * enough that we check for it.
61 */
62 unsigned char set_count;
63
64 /**
65 * Flags for used and unused registers. Each byte corresponds to a
66 * register; a 0 in that byte means that the register is available.
67 * A value of 1 means that the register was allocated in the current
68 * register set. Any other value N means that the register was allocated
69 * N register sets ago.
70 *
71 * \sa
72 * spe_allocate_register, spe_allocate_available_register,
73 * spe_allocate_register_set, spe_release_register_set, spe_release_register,
74 */
75 unsigned char regs[SPE_NUM_REGS];
76
77 boolean print; /**< print/dump instructions as they're emitted? */
78 int indent; /**< number of spaces to indent */
79};
80
81
82extern void spe_init_func(struct spe_function *p, uint code_size);
83extern void spe_release_func(struct spe_function *p);
84extern uint spe_code_size(const struct spe_function *p);
85
86extern int spe_allocate_available_register(struct spe_function *p);
87extern int spe_allocate_register(struct spe_function *p, int reg);
88extern void spe_release_register(struct spe_function *p, int reg);
89extern void spe_allocate_register_set(struct spe_function *p);
90extern void spe_release_register_set(struct spe_function *p);
91
92extern uint spe_get_registers_used(const struct spe_function *p, ubyte used[]);
93
94extern void spe_print_code(struct spe_function *p, boolean enable);
95extern void spe_indent(struct spe_function *p, int spaces);
96extern void spe_comment(struct spe_function *p, int rel_indent, const char *s);
97
98
99#endif /* RTASM_PPC_SPE_H */
100
101#ifndef EMIT
102#define EMIT(_name, _op) \
103 extern void _name (struct spe_function *p);
104#define EMIT_(_name, _op) \
105 extern void _name (struct spe_function *p, int rT);
106#define EMIT_R(_name, _op) \
107 extern void _name (struct spe_function *p, int rT, int rA);
108#define EMIT_RR(_name, _op) \
109 extern void _name (struct spe_function *p, int rT, int rA, int rB);
110#define EMIT_RRR(_name, _op) \
111 extern void _name (struct spe_function *p, int rT, int rA, int rB, int rC);
112#define EMIT_RI7(_name, _op) \
113 extern void _name (struct spe_function *p, int rT, int rA, int imm);
114#define EMIT_RI8(_name, _op, bias) \
115 extern void _name (struct spe_function *p, int rT, int rA, int imm);
116#define EMIT_RI10(_name, _op) \
117 extern void _name (struct spe_function *p, int rT, int rA, int imm);
118#define EMIT_RI10s(_name, _op) \
119 extern void _name (struct spe_function *p, int rT, int rA, int imm);
120#define EMIT_RI16(_name, _op) \
121 extern void _name (struct spe_function *p, int rT, int imm);
122#define EMIT_RI18(_name, _op) \
123 extern void _name (struct spe_function *p, int rT, int imm);
124#define EMIT_I16(_name, _op) \
125 extern void _name (struct spe_function *p, int imm);
126#define UNDEF_EMIT_MACROS
127#endif /* EMIT */
128
129
130/* Memory load / store instructions
131 */
132EMIT_RR (spe_lqx, 0x1c4)
133EMIT_RI16(spe_lqa, 0x061)
134EMIT_RI16(spe_lqr, 0x067)
135EMIT_RR (spe_stqx, 0x144)
136EMIT_RI16(spe_stqa, 0x041)
137EMIT_RI16(spe_stqr, 0x047)
138EMIT_RI7 (spe_cbd, 0x1f4)
139EMIT_RR (spe_cbx, 0x1d4)
140EMIT_RI7 (spe_chd, 0x1f5)
141EMIT_RI7 (spe_chx, 0x1d5)
142EMIT_RI7 (spe_cwd, 0x1f6)
143EMIT_RI7 (spe_cwx, 0x1d6)
144EMIT_RI7 (spe_cdd, 0x1f7)
145EMIT_RI7 (spe_cdx, 0x1d7)
146
147
148/* Constant formation instructions
149 */
150EMIT_RI16(spe_ilh, 0x083)
151EMIT_RI16(spe_ilhu, 0x082)
152EMIT_RI16(spe_il, 0x081)
153EMIT_RI18(spe_ila, 0x021)
154EMIT_RI16(spe_iohl, 0x0c1)
155EMIT_RI16(spe_fsmbi, 0x065)
156
157
158
159/* Integer and logical instructions
160 */
161EMIT_RR (spe_ah, 0x0c8)
162EMIT_RI10(spe_ahi, 0x01d)
163EMIT_RR (spe_a, 0x0c0)
164EMIT_RI10s(spe_ai, 0x01c)
165EMIT_RR (spe_sfh, 0x048)
166EMIT_RI10(spe_sfhi, 0x00d)
167EMIT_RR (spe_sf, 0x040)
168EMIT_RI10(spe_sfi, 0x00c)
169EMIT_RR (spe_addx, 0x340)
170EMIT_RR (spe_cg, 0x0c2)
171EMIT_RR (spe_cgx, 0x342)
172EMIT_RR (spe_sfx, 0x341)
173EMIT_RR (spe_bg, 0x042)
174EMIT_RR (spe_bgx, 0x343)
175EMIT_RR (spe_mpy, 0x3c4)
176EMIT_RR (spe_mpyu, 0x3cc)
177EMIT_RI10(spe_mpyi, 0x074)
178EMIT_RI10(spe_mpyui, 0x075)
179EMIT_RRR (spe_mpya, 0x00c)
180EMIT_RR (spe_mpyh, 0x3c5)
181EMIT_RR (spe_mpys, 0x3c7)
182EMIT_RR (spe_mpyhh, 0x3c6)
183EMIT_RR (spe_mpyhha, 0x346)
184EMIT_RR (spe_mpyhhu, 0x3ce)
185EMIT_RR (spe_mpyhhau, 0x34e)
186EMIT_R (spe_clz, 0x2a5)
187EMIT_R (spe_cntb, 0x2b4)
188EMIT_R (spe_fsmb, 0x1b6)
189EMIT_R (spe_fsmh, 0x1b5)
190EMIT_R (spe_fsm, 0x1b4)
191EMIT_R (spe_gbb, 0x1b2)
192EMIT_R (spe_gbh, 0x1b1)
193EMIT_R (spe_gb, 0x1b0)
194EMIT_RR (spe_avgb, 0x0d3)
195EMIT_RR (spe_absdb, 0x053)
196EMIT_RR (spe_sumb, 0x253)
197EMIT_R (spe_xsbh, 0x2b6)
198EMIT_R (spe_xshw, 0x2ae)
199EMIT_R (spe_xswd, 0x2a6)
200EMIT_RR (spe_and, 0x0c1)
201EMIT_RR (spe_andc, 0x2c1)
202EMIT_RI10s(spe_andbi, 0x016)
203EMIT_RI10s(spe_andhi, 0x015)
204EMIT_RI10s(spe_andi, 0x014)
205EMIT_RR (spe_or, 0x041)
206EMIT_RR (spe_orc, 0x2c9)
207EMIT_RI10s(spe_orbi, 0x006)
208EMIT_RI10s(spe_orhi, 0x005)
209EMIT_RI10s(spe_ori, 0x004)
210EMIT_R (spe_orx, 0x1f0)
211EMIT_RR (spe_xor, 0x241)
212EMIT_RI10s(spe_xorbi, 0x046)
213EMIT_RI10s(spe_xorhi, 0x045)
214EMIT_RI10s(spe_xori, 0x044)
215EMIT_RR (spe_nand, 0x0c9)
216EMIT_RR (spe_nor, 0x049)
217EMIT_RR (spe_eqv, 0x249)
218EMIT_RRR (spe_selb, 0x008)
219EMIT_RRR (spe_shufb, 0x00b)
220
221
222/* Shift and rotate instructions
223 */
224EMIT_RR (spe_shlh, 0x05f)
225EMIT_RI7 (spe_shlhi, 0x07f)
226EMIT_RR (spe_shl, 0x05b)
227EMIT_RI7 (spe_shli, 0x07b)
228EMIT_RR (spe_shlqbi, 0x1db)
229EMIT_RI7 (spe_shlqbii, 0x1fb)
230EMIT_RR (spe_shlqby, 0x1df)
231EMIT_RI7 (spe_shlqbyi, 0x1ff)
232EMIT_RR (spe_shlqbybi, 0x1cf)
233EMIT_RR (spe_roth, 0x05c)
234EMIT_RI7 (spe_rothi, 0x07c)
235EMIT_RR (spe_rot, 0x058)
236EMIT_RI7 (spe_roti, 0x078)
237EMIT_RR (spe_rotqby, 0x1dc)
238EMIT_RI7 (spe_rotqbyi, 0x1fc)
239EMIT_RR (spe_rotqbybi, 0x1cc)
240EMIT_RR (spe_rotqbi, 0x1d8)
241EMIT_RI7 (spe_rotqbii, 0x1f8)
242EMIT_RR (spe_rothm, 0x05d)
243EMIT_RI7 (spe_rothmi, 0x07d)
244EMIT_RR (spe_rotm, 0x059)
245EMIT_RI7 (spe_rotmi, 0x079)
246EMIT_RR (spe_rotqmby, 0x1dd)
247EMIT_RI7 (spe_rotqmbyi, 0x1fd)
248EMIT_RR (spe_rotqmbybi, 0x1cd)
249EMIT_RR (spe_rotqmbi, 0x1c9)
250EMIT_RI7 (spe_rotqmbii, 0x1f9)
251EMIT_RR (spe_rotmah, 0x05e)
252EMIT_RI7 (spe_rotmahi, 0x07e)
253EMIT_RR (spe_rotma, 0x05a)
254EMIT_RI7 (spe_rotmai, 0x07a)
255
256
257/* Compare, branch, and halt instructions
258 */
259EMIT_RR (spe_heq, 0x3d8)
260EMIT_RI10(spe_heqi, 0x07f)
261EMIT_RR (spe_hgt, 0x258)
262EMIT_RI10(spe_hgti, 0x04f)
263EMIT_RR (spe_hlgt, 0x2d8)
264EMIT_RI10(spe_hlgti, 0x05f)
265EMIT_RR (spe_ceqb, 0x3d0)
266EMIT_RI10(spe_ceqbi, 0x07e)
267EMIT_RR (spe_ceqh, 0x3c8)
268EMIT_RI10(spe_ceqhi, 0x07d)
269EMIT_RR (spe_ceq, 0x3c0)
270EMIT_RI10(spe_ceqi, 0x07c)
271EMIT_RR (spe_cgtb, 0x250)
272EMIT_RI10(spe_cgtbi, 0x04e)
273EMIT_RR (spe_cgth, 0x248)
274EMIT_RI10(spe_cgthi, 0x04d)
275EMIT_RR (spe_cgt, 0x240)
276EMIT_RI10(spe_cgti, 0x04c)
277EMIT_RR (spe_clgtb, 0x2d0)
278EMIT_RI10(spe_clgtbi, 0x05e)
279EMIT_RR (spe_clgth, 0x2c8)
280EMIT_RI10(spe_clgthi, 0x05d)
281EMIT_RR (spe_clgt, 0x2c0)
282EMIT_RI10(spe_clgti, 0x05c)
283EMIT_I16 (spe_br, 0x064)
284EMIT_I16 (spe_bra, 0x060)
285EMIT_RI16(spe_brsl, 0x066)
286EMIT_RI16(spe_brasl, 0x062)
287EMIT_RI16(spe_brnz, 0x042)
288EMIT_RI16(spe_brz, 0x040)
289EMIT_RI16(spe_brhnz, 0x046)
290EMIT_RI16(spe_brhz, 0x044)
291
292/* Control instructions
293 */
294EMIT (spe_lnop, 0x001)
295
296extern void
297spe_lqd(struct spe_function *p, int rT, int rA, int offset);
298
299extern void
300spe_stqd(struct spe_function *p, int rT, int rA, int offset);
301
302extern void spe_bi(struct spe_function *p, int rA, int d, int e);
303extern void spe_iret(struct spe_function *p, int rA, int d, int e);
304extern void spe_bisled(struct spe_function *p, int rT, int rA,
305 int d, int e);
306extern void spe_bisl(struct spe_function *p, int rT, int rA,
307 int d, int e);
308extern void spe_biz(struct spe_function *p, int rT, int rA,
309 int d, int e);
310extern void spe_binz(struct spe_function *p, int rT, int rA,
311 int d, int e);
312extern void spe_bihz(struct spe_function *p, int rT, int rA,
313 int d, int e);
314extern void spe_bihnz(struct spe_function *p, int rT, int rA,
315 int d, int e);
316
317
318/** Load/splat immediate float into rT. */
319extern void
320spe_load_float(struct spe_function *p, int rT, float x);
321
322/** Load/splat immediate int into rT. */
323extern void
324spe_load_int(struct spe_function *p, int rT, int i);
325
326/** Load/splat immediate unsigned int into rT. */
327extern void
328spe_load_uint(struct spe_function *p, int rT, uint ui);
329
330/** And immediate value into rT. */
331extern void
332spe_and_uint(struct spe_function *p, int rT, int rA, uint ui);
333
334/** Xor immediate value into rT. */
335extern void
336spe_xor_uint(struct spe_function *p, int rT, int rA, uint ui);
337
338/** Compare equal with immediate value. */
339extern void
340spe_compare_equal_uint(struct spe_function *p, int rT, int rA, uint ui);
341
342/** Compare greater with immediate value. */
343extern void
344spe_compare_greater_uint(struct spe_function *p, int rT, int rA, uint ui);
345
346/** Replicate word 0 of rA across rT. */
347extern void
348spe_splat(struct spe_function *p, int rT, int rA);
349
350/** rT = complement_all_bits(rA). */
351extern void
352spe_complement(struct spe_function *p, int rT, int rA);
353
354/** rT = rA. */
355extern void
356spe_move(struct spe_function *p, int rT, int rA);
357
358/** rT = {0,0,0,0}. */
359extern void
360spe_zero(struct spe_function *p, int rT);
361
362/** rT = splat(rA, word) */
363extern void
364spe_splat_word(struct spe_function *p, int rT, int rA, int word);
365
366/** rT = float min(rA, rB) */
367extern void
368spe_float_min(struct spe_function *p, int rT, int rA, int rB);
369
370/** rT = float max(rA, rB) */
371extern void
372spe_float_max(struct spe_function *p, int rT, int rA, int rB);
373
374
375/* Floating-point instructions
376 */
377EMIT_RR (spe_fa, 0x2c4)
378EMIT_RR (spe_dfa, 0x2cc)
379EMIT_RR (spe_fs, 0x2c5)
380EMIT_RR (spe_dfs, 0x2cd)
381EMIT_RR (spe_fm, 0x2c6)
382EMIT_RR (spe_dfm, 0x2ce)
383EMIT_RRR (spe_fma, 0x00e)
384EMIT_RR (spe_dfma, 0x35c)
385EMIT_RRR (spe_fnms, 0x00d)
386EMIT_RR (spe_dfnms, 0x35e)
387EMIT_RRR (spe_fms, 0x00f)
388EMIT_RR (spe_dfms, 0x35d)
389EMIT_RR (spe_dfnma, 0x35f)
390EMIT_R (spe_frest, 0x1b8)
391EMIT_R (spe_frsqest, 0x1b9)
392EMIT_RR (spe_fi, 0x3d4)
393EMIT_RI8 (spe_csflt, 0x1da, 155)
394EMIT_RI8 (spe_cflts, 0x1d8, 173)
395EMIT_RI8 (spe_cuflt, 0x1db, 155)
396EMIT_RI8 (spe_cfltu, 0x1d9, 173)
397EMIT_R (spe_frds, 0x3b9)
398EMIT_R (spe_fesd, 0x3b8)
399EMIT_RR (spe_dfceq, 0x3c3)
400EMIT_RR (spe_dfcmeq, 0x3cb)
401EMIT_RR (spe_dfcgt, 0x2c3)
402EMIT_RR (spe_dfcmgt, 0x2cb)
403EMIT_RI7 (spe_dftsv, 0x3bf)
404EMIT_RR (spe_fceq, 0x3c2)
405EMIT_RR (spe_fcmeq, 0x3ca)
406EMIT_RR (spe_fcgt, 0x2c2)
407EMIT_RR (spe_fcmgt, 0x2ca)
408EMIT_R (spe_fscrwr, 0x3ba)
409EMIT_ (spe_fscrrd, 0x398)
410
411
412/* Channel instructions
413 */
414EMIT_R (spe_rdch, 0x00d)
415EMIT_R (spe_rdchcnt, 0x00f)
416EMIT_R (spe_wrch, 0x10d)
417
418
419#ifdef UNDEF_EMIT_MACROS
420#undef EMIT
421#undef EMIT_
422#undef EMIT_R
423#undef EMIT_RR
424#undef EMIT_RRR
425#undef EMIT_RI7
426#undef EMIT_RI8
427#undef EMIT_RI10
428#undef EMIT_RI10s
429#undef EMIT_RI16
430#undef EMIT_RI18
431#undef EMIT_I16
432#undef UNDEF_EMIT_MACROS
433#endif /* EMIT_ */
diff --git a/src/gallium/auxiliary/target-helpers/inline_sw_helper.h b/src/gallium/auxiliary/target-helpers/inline_sw_helper.h
index 34bfa527db0..596c691e9c1 100644
--- a/src/gallium/auxiliary/target-helpers/inline_sw_helper.h
+++ b/src/gallium/auxiliary/target-helpers/inline_sw_helper.h
@@ -8,7 +8,7 @@
8 8
9 9
10/* Helper function to choose and instantiate one of the software rasterizers: 10/* Helper function to choose and instantiate one of the software rasterizers:
11 * cell, llvmpipe, softpipe. 11 * llvmpipe, softpipe.
12 */ 12 */
13 13
14#ifdef GALLIUM_SOFTPIPE 14#ifdef GALLIUM_SOFTPIPE
@@ -19,21 +19,12 @@
19#include "llvmpipe/lp_public.h" 19#include "llvmpipe/lp_public.h"
20#endif 20#endif
21 21
22#ifdef GALLIUM_CELL
23#include "cell/ppu/cell_public.h"
24#endif
25
26 22
27static INLINE struct pipe_screen * 23static INLINE struct pipe_screen *
28sw_screen_create_named(struct sw_winsys *winsys, const char *driver) 24sw_screen_create_named(struct sw_winsys *winsys, const char *driver)
29{ 25{
30 struct pipe_screen *screen = NULL; 26 struct pipe_screen *screen = NULL;
31 27
32#if defined(GALLIUM_CELL)
33 if (screen == NULL && strcmp(driver, "cell") == 0)
34 screen = cell_create_screen(winsys);
35#endif
36
37#if defined(GALLIUM_LLVMPIPE) 28#if defined(GALLIUM_LLVMPIPE)
38 if (screen == NULL && strcmp(driver, "llvmpipe") == 0) 29 if (screen == NULL && strcmp(driver, "llvmpipe") == 0)
39 screen = llvmpipe_create_screen(winsys); 30 screen = llvmpipe_create_screen(winsys);
@@ -54,9 +45,7 @@ sw_screen_create(struct sw_winsys *winsys)
54 const char *default_driver; 45 const char *default_driver;
55 const char *driver; 46 const char *driver;
56 47
57#if defined(GALLIUM_CELL) 48#if defined(GALLIUM_LLVMPIPE)
58 default_driver = "cell";
59#elif defined(GALLIUM_LLVMPIPE)
60 default_driver = "llvmpipe"; 49 default_driver = "llvmpipe";
61#elif defined(GALLIUM_SOFTPIPE) 50#elif defined(GALLIUM_SOFTPIPE)
62 default_driver = "softpipe"; 51 default_driver = "softpipe";
diff --git a/src/gallium/drivers/cell/Makefile b/src/gallium/drivers/cell/Makefile
deleted file mode 100644
index 47aef7b05f6..00000000000
--- a/src/gallium/drivers/cell/Makefile
+++ /dev/null
@@ -1,12 +0,0 @@
1# Cell Gallium driver Makefile
2
3
4default:
5 ( cd spu ; make )
6 ( cd ppu ; make )
7
8
9
10clean:
11 ( cd spu ; make clean )
12 ( cd ppu ; make clean )
diff --git a/src/gallium/drivers/cell/common.h b/src/gallium/drivers/cell/common.h
deleted file mode 100644
index a8cdde34aa7..00000000000
--- a/src/gallium/drivers/cell/common.h
+++ /dev/null
@@ -1,377 +0,0 @@
1/**************************************************************************
2 *
3 * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas.
4 * All Rights Reserved.
5 *
6 * Permission is hereby granted, free of charge, to any person obtaining a
7 * copy of this software and associated documentation files (the
8 * "Software"), to deal in the Software without restriction, including
9 * without limitation the rights to use, copy, modify, merge, publish,
10 * distribute, sub license, and/or sell copies of the Software, and to
11 * permit persons to whom the Software is furnished to do so, subject to
12 * the following conditions:
13 *
14 * The above copyright notice and this permission notice (including the
15 * next paragraph) shall be included in all copies or substantial portions
16 * of the Software.
17 *
18 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
19 * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
20 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
21 * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
22 * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
23 * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
24 * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
25 *
26 **************************************************************************/
27
28/**
29 * Types and tokens which are common to the SPU and PPU code.
30 */
31
32
33#ifndef CELL_COMMON_H
34#define CELL_COMMON_H
35
36#include "pipe/p_compiler.h"
37#include "pipe/p_format.h"
38#include "pipe/p_state.h"
39#include <stdio.h>
40
41/** The standard assert macro doesn't seem to work reliably */
42#define ASSERT(x) \
43 if (!(x)) { \
44 ubyte *p = NULL; \
45 fprintf(stderr, "%s:%d: %s(): assertion %s failed.\n", \
46 __FILE__, __LINE__, __FUNCTION__, #x); \
47 *p = 0; \
48 exit(1); \
49 }
50
51
52#define JOIN(x, y) JOIN_AGAIN(x, y)
53#define JOIN_AGAIN(x, y) x ## y
54
55#define STATIC_ASSERT(e) \
56{typedef char JOIN(assertion_failed_at_line_, __LINE__) [(e) ? 1 : -1];}
57
58
59
60/** for sanity checking */
61#define ASSERT_ALIGN16(ptr) \
62 ASSERT((((unsigned long) (ptr)) & 0xf) == 0);
63
64
65/** round up value to next multiple of 4 */
66#define ROUNDUP4(k) (((k) + 0x3) & ~0x3)
67
68/** round up value to next multiple of 8 */
69#define ROUNDUP8(k) (((k) + 0x7) & ~0x7)
70
71/** round up value to next multiple of 16 */
72#define ROUNDUP16(k) (((k) + 0xf) & ~0xf)
73
74
75#define CELL_MAX_SPUS 8
76
77#define CELL_MAX_SAMPLERS 4
78#define CELL_MAX_TEXTURE_LEVELS 12 /* 2k x 2k */
79#define CELL_MAX_CONSTANTS 32 /**< number of float[4] constants */
80#define CELL_MAX_WIDTH 1024 /**< max framebuffer width */
81#define CELL_MAX_HEIGHT 1024 /**< max framebuffer width */
82
83#define TILE_SIZE 32
84
85
86/**
87 * The low byte of a mailbox word contains the command opcode.
88 * Remaining higher bytes are command specific.
89 */
90#define CELL_CMD_OPCODE_MASK 0xff
91
92#define CELL_CMD_EXIT 1
93#define CELL_CMD_CLEAR_SURFACE 2
94#define CELL_CMD_FINISH 3
95#define CELL_CMD_RENDER 4
96#define CELL_CMD_BATCH 5
97#define CELL_CMD_RELEASE_VERTS 6
98#define CELL_CMD_STATE_FRAMEBUFFER 10
99#define CELL_CMD_STATE_FRAGMENT_OPS 11
100#define CELL_CMD_STATE_SAMPLER 12
101#define CELL_CMD_STATE_TEXTURE 13
102#define CELL_CMD_STATE_VERTEX_INFO 14
103#define CELL_CMD_STATE_VIEWPORT 15
104#define CELL_CMD_STATE_UNIFORMS 16
105#define CELL_CMD_STATE_VS_ARRAY_INFO 17
106#define CELL_CMD_STATE_BIND_VS 18
107#define CELL_CMD_STATE_FRAGMENT_PROGRAM 19
108#define CELL_CMD_STATE_ATTRIB_FETCH 20
109#define CELL_CMD_STATE_FS_CONSTANTS 21
110#define CELL_CMD_STATE_RASTERIZER 22
111#define CELL_CMD_VS_EXECUTE 23
112#define CELL_CMD_FLUSH_BUFFER_RANGE 24
113#define CELL_CMD_FENCE 25
114
115
116/** Command/batch buffers */
117#define CELL_NUM_BUFFERS 4
118#define CELL_BUFFER_SIZE (4*1024) /**< 16KB would be the max */
119
120#define CELL_BUFFER_STATUS_FREE 10
121#define CELL_BUFFER_STATUS_USED 20
122
123/** Debug flags */
124#define CELL_DEBUG_CHECKER (1 << 0)
125#define CELL_DEBUG_ASM (1 << 1)
126#define CELL_DEBUG_SYNC (1 << 2)
127#define CELL_DEBUG_FRAGMENT_OPS (1 << 3)
128#define CELL_DEBUG_FRAGMENT_OP_FALLBACK (1 << 4)
129#define CELL_DEBUG_CMD (1 << 5)
130#define CELL_DEBUG_CACHE (1 << 6)
131
132#define CELL_FENCE_IDLE 0
133#define CELL_FENCE_EMITTED 1
134#define CELL_FENCE_SIGNALLED 2
135
136#define CELL_FACING_FRONT 0
137#define CELL_FACING_BACK 1
138
139struct cell_fence
140{
141 /** There's a 16-byte status qword per SPU */
142 volatile uint status[CELL_MAX_SPUS][4];
143};
144
145#ifdef __SPU__
146typedef vector unsigned int opcode_t;
147#else
148typedef unsigned int opcode_t[4];
149#endif
150
151/**
152 * Fence command sent to SPUs. In response, the SPUs will write
153 * CELL_FENCE_STATUS_SIGNALLED back to the fence status word in main memory.
154 */
155struct cell_command_fence
156{
157 opcode_t opcode; /**< CELL_CMD_FENCE */
158 struct cell_fence *fence;
159 uint32_t pad_[3];
160};
161
162
163/**
164 * Command to specify per-fragment operations state and generated code.
165 * Note that this is a variant-length structure, allocated with as
166 * much memory as needed to hold the generated code; the "code"
167 * field *must* be the last field in the structure. Also, the entire
168 * length of the structure (including the variant code field) must be
169 * a multiple of 8 bytes; we require that this structure itself be
170 * a multiple of 8 bytes, and that the generated code also be a multiple
171 * of 8 bytes.
172 *
173 * Also note that the dsa, blend, blend_color fields are really only needed
174 * for the fallback/C per-pixel code. They're not used when we generate
175 * dynamic SPU fragment code (which is the normal case), and will eventually
176 * be removed from this structure.
177 */
178struct cell_command_fragment_ops
179{
180 opcode_t opcode; /**< CELL_CMD_STATE_FRAGMENT_OPS */
181
182 /* Fields for the fallback case */
183 struct pipe_depth_stencil_alpha_state dsa;
184 struct pipe_blend_state blend;
185 struct pipe_blend_color blend_color;
186
187 /* Fields for the generated SPU code */
188 unsigned total_code_size;
189 unsigned front_code_index;
190 unsigned back_code_index;
191 /* this field has variant length, and must be the last field in
192 * the structure
193 */
194 unsigned code[0];
195};
196
197
198/** Max instructions for fragment programs */
199#define SPU_MAX_FRAGMENT_PROGRAM_INSTS 512
200
201/**
202 * Command to send a fragment program to SPUs.
203 */
204struct cell_command_fragment_program
205{
206 opcode_t opcode; /**< CELL_CMD_STATE_FRAGMENT_PROGRAM */
207 uint num_inst; /**< Number of instructions */
208 uint32_t pad[3];
209 unsigned code[SPU_MAX_FRAGMENT_PROGRAM_INSTS];
210};
211
212
213/**
214 * Tell SPUs about the framebuffer size, location
215 */
216struct cell_command_framebuffer
217{
218 opcode_t opcode; /**< CELL_CMD_STATE_FRAMEBUFFER */
219 int width, height;
220 void *color_start, *depth_start;
221 enum pipe_format color_format, depth_format;
222 uint32_t pad_[2];
223};
224
225
226/**
227 * Tell SPUs about rasterizer state.
228 */
229struct cell_command_rasterizer
230{
231 opcode_t opcode; /**< CELL_CMD_STATE_RASTERIZER */
232 struct pipe_rasterizer_state rasterizer;
233 /*uint32_t pad[1];*/
234};
235
236
237/**
238 * Clear framebuffer to the given value/color.
239 */
240struct cell_command_clear_surface
241{
242 opcode_t opcode; /**< CELL_CMD_CLEAR_SURFACE */
243 uint surface; /**< Temporary: 0=color, 1=Z */
244 uint value;
245 uint32_t pad[2];
246};
247
248
249/**
250 * Array info used by the vertex shader's vertex puller.
251 */
252struct cell_array_info
253{
254 uint64_t base; /**< Base address of the 0th element. */
255 uint attr; /**< Attribute that this state is for. */
256 uint pitch; /**< Byte pitch from one entry to the next. */
257 uint size;
258 uint function_offset;
259};
260
261
262struct cell_attribute_fetch_code
263{
264 uint64_t base;
265 uint size;
266};
267
268
269struct cell_buffer_range
270{
271 uint64_t base;
272 unsigned size;
273};
274
275
276struct cell_shader_info
277{
278 uint64_t declarations;
279 uint64_t instructions;
280 uint64_t immediates;
281
282 unsigned num_outputs;
283 unsigned num_declarations;
284 unsigned num_instructions;
285 unsigned num_immediates;
286};
287
288
289#define SPU_VERTS_PER_BATCH 64
290struct cell_command_vs
291{
292 opcode_t opcode; /**< CELL_CMD_VS_EXECUTE */
293 uint64_t vOut[SPU_VERTS_PER_BATCH];
294 unsigned num_elts;
295 unsigned elts[SPU_VERTS_PER_BATCH];
296 float plane[12][4];
297 unsigned nr_planes;
298 unsigned nr_attrs;
299};
300
301
302struct cell_command_render
303{
304 opcode_t opcode; /**< CELL_CMD_RENDER */
305 uint prim_type; /**< PIPE_PRIM_x */
306 uint num_verts;
307 uint vertex_size; /**< bytes per vertex */
308 uint num_indexes;
309 uint vertex_buf; /**< which cell->buffer[] contains the vertex data */
310 float xmin, ymin, xmax, ymax; /* XXX another dummy field */
311 uint min_index;
312 boolean inline_verts;
313 uint32_t pad_[1];
314};
315
316
317struct cell_command_release_verts
318{
319 opcode_t opcode; /**< CELL_CMD_RELEASE_VERTS */
320 uint vertex_buf; /**< in [0, CELL_NUM_BUFFERS-1] */
321 uint32_t pad_[3];
322};
323
324
325struct cell_command_sampler
326{
327 opcode_t opcode; /**< CELL_CMD_STATE_SAMPLER */
328 uint unit;
329 struct pipe_sampler_state state;
330 uint32_t pad_[3];
331};
332
333
334struct cell_command_texture
335{
336 opcode_t opcode; /**< CELL_CMD_STATE_TEXTURE */
337 uint target; /**< PIPE_TEXTURE_x */
338 uint unit;
339 void *start[CELL_MAX_TEXTURE_LEVELS]; /**< Address in main memory */
340 ushort width[CELL_MAX_TEXTURE_LEVELS];
341 ushort height[CELL_MAX_TEXTURE_LEVELS];
342 ushort depth[CELL_MAX_TEXTURE_LEVELS];
343};
344
345
346#define MAX_SPU_FUNCTIONS 12
347/**
348 * Used to tell the PPU about the address of particular functions in the
349 * SPU's address space.
350 */
351struct cell_spu_function_info
352{
353 uint num;
354 char names[MAX_SPU_FUNCTIONS][16];
355 uint addrs[MAX_SPU_FUNCTIONS];
356 char pad[12]; /**< Pad struct to multiple of 16 bytes (256 currently) */
357};
358
359
360/** This is the object passed to spe_create_thread() */
361PIPE_ALIGN_TYPE(16,
362struct cell_init_info
363{
364 unsigned id;
365 unsigned num_spus;
366 unsigned debug_flags; /**< mask of CELL_DEBUG_x flags */
367 float inv_timebase; /**< 1.0/timebase, for perf measurement */
368
369 /** Buffers for command batches, vertex/index data */
370 ubyte *buffers[CELL_NUM_BUFFERS];
371 uint *buffer_status; /**< points at cell_context->buffer_status */
372
373 struct cell_spu_function_info *spu_functions;
374});
375
376
377#endif /* CELL_COMMON_H */
diff --git a/src/gallium/drivers/cell/ppu/Makefile b/src/gallium/drivers/cell/ppu/Makefile
deleted file mode 100644
index c92f8e5cba2..00000000000
--- a/src/gallium/drivers/cell/ppu/Makefile
+++ /dev/null
@@ -1,86 +0,0 @@
1# Gallium3D Cell driver: PPU code
2
3# This makefile builds the libcell.a library which gets pulled into
4# the main libGL.so library
5
6
7TOP = ../../../../..
8include $(TOP)/configs/current
9
10
11# This is the "top-level" cell PPU driver code, will get pulled into libGL.so
12# by the winsys Makefile.
13CELL_LIB = ../libcell.a
14
15
16# This is the SPU code. We'd like to be able to put this into the libcell.a
17# archive with the PPU code, but nesting .a libs doesn't seem to work.
18# So, it's pulled into libGL.so in gallium/winsys/xlib/Makefile
19SPU_CODE_MODULE = ../spu/g3d_spu.a
20
21
22SOURCES = \
23 cell_batch.c \
24 cell_clear.c \
25 cell_context.c \
26 cell_draw_arrays.c \
27 cell_fence.c \
28 cell_flush.c \
29 cell_gen_fragment.c \
30 cell_gen_fp.c \
31 cell_state_derived.c \
32 cell_state_emit.c \
33 cell_state_shader.c \
34 cell_pipe_state.c \
35 cell_screen.c \
36 cell_state_vertex.c \
37 cell_spu.c \
38 cell_surface.c \
39 cell_texture.c \
40 cell_vbuf.c \
41 cell_vertex_fetch.c \
42 cell_vertex_shader.c
43
44
45OBJECTS = $(SOURCES:.c=.o) \
46
47INCLUDE_DIRS = \
48 -I$(TOP)/src/mesa \
49 -I$(TOP)/src/gallium/include \
50 -I$(TOP)/src/gallium/auxiliary \
51 -I$(TOP)/src/gallium/drivers
52
53.c.o:
54 $(CC) -c $(INCLUDE_DIRS) $(CFLAGS) $< -o $@
55
56
57.c.s:
58 $(CC) -S $(INCLUDE_DIRS) $(CFLAGS) $< -o $@
59
60
61default: $(CELL_LIB)
62
63
64$(CELL_LIB): $(OBJECTS) $(SPU_CODE_MODULE)
65# ar -ru $(CELL_LIB) $(OBJECTS) $(SPU_CODE_MODULE) # doesn't work
66 ar -ru $(CELL_LIB) $(OBJECTS)
67
68#$(PROG): $(PPU_OBJECTS)
69# $(CC) -o $(PROG) $(PPU_OBJECTS) $(SPU_CODE_MODULE) $(PPU_LFLAGS)
70
71
72
73clean:
74 rm -f *.o *~ $(CELL_LIB)
75
76
77
78depend: $(SOURCES)
79 rm -f depend
80 touch depend
81 $(MKDEP) $(MKDEP_OPTIONS) $(INCLUDE_DIRS) $(SOURCES) 2> /dev/null
82
83include depend
84
85
86
diff --git a/src/gallium/drivers/cell/ppu/cell_batch.c b/src/gallium/drivers/cell/ppu/cell_batch.c
deleted file mode 100644
index fe144f8b849..00000000000
--- a/src/gallium/drivers/cell/ppu/cell_batch.c
+++ /dev/null
@@ -1,260 +0,0 @@
1/**************************************************************************
2 *
3 * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas.
4 * All Rights Reserved.
5 *
6 * Permission is hereby granted, free of charge, to any person obtaining a
7 * copy of this software and associated documentation files (the
8 * "Software"), to deal in the Software without restriction, including
9 * without limitation the rights to use, copy, modify, merge, publish,
10 * distribute, sub license, and/or sell copies of the Software, and to
11 * permit persons to whom the Software is furnished to do so, subject to
12 * the following conditions:
13 *
14 * The above copyright notice and this permission notice (including the
15 * next paragraph) shall be included in all copies or substantial portions
16 * of the Software.
17 *
18 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
19 * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
20 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
21 * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
22 * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
23 * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
24 * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
25 *
26 **************************************************************************/
27
28
29#include "cell_context.h"
30#include "cell_batch.h"
31#include "cell_fence.h"
32#include "cell_spu.h"
33
34
35
36/**
37 * Search the buffer pool for an empty/free buffer and return its index.
38 * Buffers are used for storing vertex data, state and commands which
39 * will be sent to the SPUs.
40 * If no empty buffers are available, wait for one.
41 * \return buffer index in [0, CELL_NUM_BUFFERS-1]
42 */
43uint
44cell_get_empty_buffer(struct cell_context *cell)
45{
46 static uint prev_buffer = 0;
47 uint buf = (prev_buffer + 1) % CELL_NUM_BUFFERS;
48 uint tries = 0;
49
50 /* Find a buffer that's marked as free by all SPUs */
51 while (1) {
52 uint spu, num_free = 0;
53
54 for (spu = 0; spu < cell->num_spus; spu++) {
55 if (cell->buffer_status[spu][buf][0] == CELL_BUFFER_STATUS_FREE) {
56 num_free++;
57
58 if (num_free == cell->num_spus) {
59 /* found a free buffer, now mark status as used */
60 for (spu = 0; spu < cell->num_spus; spu++) {
61 cell->buffer_status[spu][buf][0] = CELL_BUFFER_STATUS_USED;
62 }
63 /*
64 printf("PPU: ALLOC BUFFER %u, %u tries\n", buf, tries);
65 */
66 prev_buffer = buf;
67
68 /* release tex buffer associated w/ prev use of this batch buf */
69 cell_free_fenced_buffers(cell, &cell->fenced_buffers[buf]);
70
71 return buf;
72 }
73 }
74 else {
75 break;
76 }
77 }
78
79 /* try next buf */
80 buf = (buf + 1) % CELL_NUM_BUFFERS;
81
82 tries++;
83 if (tries == 100) {
84 /*
85 printf("PPU WAITING for buffer...\n");
86 */
87 }
88 }
89}
90
91
92/**
93 * Append a fence command to the current batch buffer.
94 * Note that we're sure there's always room for this because of the
95 * adjusted size check in cell_batch_free_space().
96 */
97static void
98emit_fence(struct cell_context *cell)
99{
100 const uint batch = cell->cur_batch;
101 const uint size = cell->buffer_size[batch];
102 struct cell_command_fence *fence_cmd;
103 struct cell_fence *fence = &cell->fenced_buffers[batch].fence;
104 uint i;
105
106 /* set fence status to emitted, not yet signalled */
107 for (i = 0; i < cell->num_spus; i++) {
108 fence->status[i][0] = CELL_FENCE_EMITTED;
109 }
110
111 STATIC_ASSERT(sizeof(struct cell_command_fence) % 16 == 0);
112 ASSERT(size % 16 == 0);
113 ASSERT(size + sizeof(struct cell_command_fence) <= CELL_BUFFER_SIZE);
114
115 fence_cmd = (struct cell_command_fence *) (cell->buffer[batch] + size);
116 fence_cmd->opcode[0] = CELL_CMD_FENCE;
117 fence_cmd->fence = fence;
118
119 /* update batch buffer size */
120 cell->buffer_size[batch] = size + sizeof(struct cell_command_fence);
121}
122
123
124/**
125 * Flush the current batch buffer to the SPUs.
126 * An empty buffer will be found and set as the new current batch buffer
127 * for subsequent commands/data.
128 */
129void
130cell_batch_flush(struct cell_context *cell)
131{
132 static boolean flushing = FALSE;
133 uint batch = cell->cur_batch;
134 uint size = cell->buffer_size[batch];
135 uint spu, cmd_word;
136
137 assert(!flushing);
138
139 if (size == 0)
140 return;
141
142 /* Before we use this batch buffer, make sure any fenced texture buffers
143 * are released.
144 */
145 if (cell->fenced_buffers[batch].head) {
146 emit_fence(cell);
147 size = cell->buffer_size[batch];
148 }
149
150 flushing = TRUE;
151
152 assert(batch < CELL_NUM_BUFFERS);
153
154 /*
155 printf("cell_batch_dispatch: buf %u at %p, size %u\n",
156 batch, &cell->buffer[batch][0], size);
157 */
158
159 /*
160 * Build "BATCH" command and send to all SPUs.
161 */
162 cmd_word = CELL_CMD_BATCH | (batch << 8) | (size << 16);
163
164 for (spu = 0; spu < cell->num_spus; spu++) {
165 assert(cell->buffer_status[spu][batch][0] == CELL_BUFFER_STATUS_USED);
166 send_mbox_message(cell_global.spe_contexts[spu], cmd_word);
167 }
168
169 /* When the SPUs are done copying the buffer into their locals stores
170 * they'll write a BUFFER_STATUS_FREE message into the buffer_status[]
171 * array indicating that the PPU can re-use the buffer.
172 */
173
174 batch = cell_get_empty_buffer(cell);
175
176 cell->buffer_size[batch] = 0; /* empty */
177 cell->cur_batch = batch;
178
179 flushing = FALSE;
180}
181
182
183/**
184 * Return the number of bytes free in the current batch buffer.
185 */
186uint
187cell_batch_free_space(const struct cell_context *cell)
188{
189 uint free = CELL_BUFFER_SIZE - cell->buffer_size[cell->cur_batch];
190 free -= sizeof(struct cell_command_fence);
191 return free;
192}
193
194
195/**
196 * Allocate space in the current batch buffer for 'bytes' space.
197 * Bytes must be a multiple of 16 bytes. Allocation will be 16 byte aligned.
198 * \return address in batch buffer to put data
199 */
200void *
201cell_batch_alloc16(struct cell_context *cell, uint bytes)
202{
203 void *pos;
204 uint size;
205
206 ASSERT(bytes % 16 == 0);
207 ASSERT(bytes <= CELL_BUFFER_SIZE);
208 ASSERT(cell->cur_batch >= 0);
209
210#ifdef ASSERT
211 {
212 uint spu;
213 for (spu = 0; spu < cell->num_spus; spu++) {
214 ASSERT(cell->buffer_status[spu][cell->cur_batch][0]
215 == CELL_BUFFER_STATUS_USED);
216 }
217 }
218#endif
219
220 size = cell->buffer_size[cell->cur_batch];
221
222 if (bytes > cell_batch_free_space(cell)) {
223 cell_batch_flush(cell);
224 size = 0;
225 }
226
227 ASSERT(size % 16 == 0);
228 ASSERT(size + bytes <= CELL_BUFFER_SIZE);
229
230 pos = (void *) (cell->buffer[cell->cur_batch] + size);
231
232 cell->buffer_size[cell->cur_batch] = size + bytes;
233
234 return pos;
235}
236
237
238/**
239 * One-time init of batch buffers.
240 */
241void
242cell_init_batch_buffers(struct cell_context *cell)
243{
244 uint spu, buf;
245
246 /* init command, vertex/index buffer info */
247 for (buf = 0; buf < CELL_NUM_BUFFERS; buf++) {
248 cell->buffer_size[buf] = 0;
249
250 /* init batch buffer status values,
251 * mark 0th buffer as used, rest as free.
252 */
253 for (spu = 0; spu < cell->num_spus; spu++) {
254 if (buf == 0)
255 cell->buffer_status[spu][buf][0] = CELL_BUFFER_STATUS_USED;
256 else
257 cell->buffer_status[spu][buf][0] = CELL_BUFFER_STATUS_FREE;
258 }
259 }
260}
diff --git a/src/gallium/drivers/cell/ppu/cell_batch.h b/src/gallium/drivers/cell/ppu/cell_batch.h
deleted file mode 100644
index 290136031a1..00000000000
--- a/src/gallium/drivers/cell/ppu/cell_batch.h
+++ /dev/null
@@ -1,54 +0,0 @@
1/**************************************************************************
2 *
3 * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas.
4 * All Rights Reserved.
5 *
6 * Permission is hereby granted, free of charge, to any person obtaining a
7 * copy of this software and associated documentation files (the
8 * "Software"), to deal in the Software without restriction, including
9 * without limitation the rights to use, copy, modify, merge, publish,
10 * distribute, sub license, and/or sell copies of the Software, and to
11 * permit persons to whom the Software is furnished to do so, subject to
12 * the following conditions:
13 *
14 * The above copyright notice and this permission notice (including the
15 * next paragraph) shall be included in all copies or substantial portions
16 * of the Software.
17 *
18 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
19 * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
20 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
21 * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
22 * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
23 * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
24 * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
25 *
26 **************************************************************************/
27
28
29#ifndef CELL_BATCH_H
30#define CELL_BATCH_H
31
32#include "pipe/p_compiler.h"
33
34
35struct cell_context;
36
37
38extern uint
39cell_get_empty_buffer(struct cell_context *cell);
40
41extern void
42cell_batch_flush(struct cell_context *cell);
43
44extern uint
45cell_batch_free_space(const struct cell_context *cell);
46
47extern void *
48cell_batch_alloc16(struct cell_context *cell, uint bytes);
49
50extern void
51cell_init_batch_buffers(struct cell_context *cell);
52
53
54#endif /* CELL_BATCH_H */
diff --git a/src/gallium/drivers/cell/ppu/cell_clear.c b/src/gallium/drivers/cell/ppu/cell_clear.c
deleted file mode 100644
index 6a525ef4e41..00000000000
--- a/src/gallium/drivers/cell/ppu/cell_clear.c
+++ /dev/null
@@ -1,93 +0,0 @@
1/**************************************************************************
2 *
3 * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas.
4 * All Rights Reserved.
5 *
6 * Permission is hereby granted, free of charge, to any person obtaining a
7 * copy of this software and associated documentation files (the
8 * "Software"), to deal in the Software without restriction, including
9 * without limitation the rights to use, copy, modify, merge, publish,
10 * distribute, sub license, and/or sell copies of the Software, and to
11 * permit persons to whom the Software is furnished to do so, subject to
12 * the following conditions:
13 *
14 * The above copyright notice and this permission notice (including the
15 * next paragraph) shall be included in all copies or substantial portions
16 * of the Software.
17 *
18 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
19 * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
20 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
21 * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
22 * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
23 * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
24 * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
25 *
26 **************************************************************************/
27
28/**
29 * Authors
30 * Brian Paul
31 */
32
33#include <stdio.h>
34#include <assert.h>
35#include <stdint.h>
36#include "util/u_inlines.h"
37#include "util/u_memory.h"
38#include "util/u_pack_color.h"
39#include "cell/common.h"
40#include "cell_clear.h"
41#include "cell_context.h"
42#include "cell_batch.h"
43#include "cell_flush.h"
44#include "cell_spu.h"
45#include "cell_state.h"
46
47
48/**
49 * Called via pipe->clear()
50 */
51void
52cell_clear(struct pipe_context *pipe, unsigned buffers,
53 const pipe_color_union *color,
54 double depth, unsigned stencil)
55{
56 struct cell_context *cell = cell_context(pipe);
57
58 if (cell->dirty)
59 cell_update_derived(cell);
60
61 if (buffers & PIPE_CLEAR_COLOR) {
62 uint surfIndex = 0;
63 union util_color uc;
64
65 util_pack_color(color->f, cell->framebuffer.cbufs[0]->format, &uc);
66
67 /* Build a CLEAR command and place it in the current batch buffer */
68 STATIC_ASSERT(sizeof(struct cell_command_clear_surface) % 16 == 0);
69 struct cell_command_clear_surface *clr
70 = (struct cell_command_clear_surface *)
71 cell_batch_alloc16(cell, sizeof(*clr));
72 clr->opcode[0] = CELL_CMD_CLEAR_SURFACE;
73 clr->surface = surfIndex;
74 clr->value = uc.ui;
75 }
76
77 if (buffers & PIPE_CLEAR_DEPTHSTENCIL) {
78 uint surfIndex = 1;
79 uint clearValue;
80
81 clearValue = util_pack_z_stencil(cell->framebuffer.zsbuf->format,
82 depth, stencil);
83
84 /* Build a CLEAR command and place it in the current batch buffer */
85 STATIC_ASSERT(sizeof(struct cell_command_clear_surface) % 16 == 0);
86 struct cell_command_clear_surface *clr
87 = (struct cell_command_clear_surface *)
88 cell_batch_alloc16(cell, sizeof(*clr));
89 clr->opcode[0] = CELL_CMD_CLEAR_SURFACE;
90 clr->surface = surfIndex;
91 clr->value = clearValue;
92 }
93}
diff --git a/src/gallium/drivers/cell/ppu/cell_clear.h b/src/gallium/drivers/cell/ppu/cell_clear.h
deleted file mode 100644
index a365feb0f00..00000000000
--- a/src/gallium/drivers/cell/ppu/cell_clear.h
+++ /dev/null
@@ -1,42 +0,0 @@
1/**************************************************************************
2 *
3 * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas.
4 * All Rights Reserved.
5 *
6 * Permission is hereby granted, free of charge, to any person obtaining a
7 * copy of this software and associated documentation files (the
8 * "Software"), to deal in the Software without restriction, including
9 * without limitation the rights to use, copy, modify, merge, publish,
10 * distribute, sub license, and/or sell copies of the Software, and to
11 * permit persons to whom the Software is furnished to do so, subject to
12 * the following conditions:
13 *
14 * The above copyright notice and this permission notice (including the
15 * next paragraph) shall be included in all copies or substantial portions
16 * of the Software.
17 *
18 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
19 * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
20 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
21 * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
22 * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
23 * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
24 * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
25 *
26 **************************************************************************/
27
28
29#ifndef CELL_CLEAR_H
30#define CELL_CLEAR_H
31
32
33struct pipe_context;
34
35
36extern void
37cell_clear(struct pipe_context *pipe, unsigned buffers,
38 const union pipe_color_union *color,
39 double depth, unsigned stencil);
40
41
42#endif /* CELL_CLEAR_H */
diff --git a/src/gallium/drivers/cell/ppu/cell_context.c b/src/gallium/drivers/cell/ppu/cell_context.c
deleted file mode 100644
index 58e647a39fa..00000000000
--- a/src/gallium/drivers/cell/ppu/cell_context.c
+++ /dev/null
@@ -1,190 +0,0 @@
1/**************************************************************************
2 *
3 * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas.
4 * All Rights Reserved.
5 *
6 * Permission is hereby granted, free of charge, to any person obtaining a
7 * copy of this software and associated documentation files (the
8 * "Software"), to deal in the Software without restriction, including
9 * without limitation the rights to use, copy, modify, merge, publish,
10 * distribute, sub license, and/or sell copies of the Software, and to
11 * permit persons to whom the Software is furnished to do so, subject to
12 * the following conditions:
13 *
14 * The above copyright notice and this permission notice (including the
15 * next paragraph) shall be included in all copies or substantial portions
16 * of the Software.
17 *
18 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
19 * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
20 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
21 * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
22 * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
23 * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
24 * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
25 *
26 **************************************************************************/
27
28/**
29 * Authors
30 * Brian Paul
31 */
32
33
34#include <stdio.h>
35
36#include "pipe/p_defines.h"
37#include "pipe/p_format.h"
38#include "util/u_memory.h"
39#include "pipe/p_screen.h"
40#include "util/u_inlines.h"
41
42#include "draw/draw_context.h"
43#include "draw/draw_private.h"
44
45#include "cell/common.h"
46#include "cell_batch.h"
47#include "cell_clear.h"
48#include "cell_context.h"
49#include "cell_draw_arrays.h"
50#include "cell_fence.h"
51#include "cell_flush.h"
52#include "cell_state.h"
53#include "cell_surface.h"
54#include "cell_spu.h"
55#include "cell_pipe_state.h"
56#include "cell_texture.h"
57#include "cell_vbuf.h"
58
59
60
61static void
62cell_destroy_context( struct pipe_context *pipe )
63{
64 struct cell_context *cell = cell_context(pipe);
65 unsigned i;
66
67 for (i = 0; i < cell->num_vertex_buffers; i++) {
68 pipe_resource_reference(&cell->vertex_buffer[i].buffer, NULL);
69 }
70
71 util_delete_keymap(cell->fragment_ops_cache, NULL);
72
73 cell_spu_exit(cell);
74
75 align_free(cell);
76}
77
78
79static struct draw_context *
80cell_draw_create(struct cell_context *cell)
81{
82 struct draw_context *draw = draw_create(&cell->pipe);
83
84#if 0 /* broken */
85 if (getenv("GALLIUM_CELL_VS")) {
86 /* plug in SPU-based vertex transformation code */
87 draw->shader_queue_flush = cell_vertex_shader_queue_flush;
88 draw->driver_private = cell;
89 }
90#endif
91
92 return draw;
93}
94
95
96static const struct debug_named_value cell_debug_flags[] = {
97 {"checker", CELL_DEBUG_CHECKER, NULL},/**< modulate tile clear color by SPU ID */
98 {"asm", CELL_DEBUG_ASM, NULL}, /**< dump SPU asm code */
99 {"sync", CELL_DEBUG_SYNC, NULL}, /**< SPUs do synchronous DMA */
100 {"fragops", CELL_DEBUG_FRAGMENT_OPS, NULL}, /**< SPUs emit fragment ops debug messages*/
101 {"fragopfallback", CELL_DEBUG_FRAGMENT_OP_FALLBACK, NULL}, /**< SPUs use reference implementation for fragment ops*/
102 {"cmd", CELL_DEBUG_CMD, NULL}, /**< SPUs dump command buffer info */
103 {"cache", CELL_DEBUG_CACHE, NULL}, /**< report texture cache stats on exit */
104 DEBUG_NAMED_VALUE_END
105};
106
107
108struct pipe_context *
109cell_create_context(struct pipe_screen *screen,
110 void *priv )
111{
112 struct cell_context *cell;
113 uint i;
114
115 /* some fields need to be 16-byte aligned, so align the whole object */
116 cell = (struct cell_context*) align_malloc(sizeof(struct cell_context), 16);
117 if (!cell)
118 return NULL;
119
120 memset(cell, 0, sizeof(*cell));
121
122 cell->winsys = NULL; /* XXX: fixme - get this from screen? */
123 cell->pipe.winsys = NULL;
124 cell->pipe.screen = screen;
125 cell->pipe.priv = priv;
126 cell->pipe.destroy = cell_destroy_context;
127
128 cell->pipe.clear = cell_clear;
129 cell->pipe.flush = cell_flush;
130
131#if 0
132 cell->pipe.begin_query = cell_begin_query;
133 cell->pipe.end_query = cell_end_query;
134 cell->pipe.wait_query = cell_wait_query;
135#endif
136
137 cell_init_draw_functions(cell);
138 cell_init_state_functions(cell);
139 cell_init_shader_functions(cell);
140 cell_init_surface_functions(cell);
141 cell_init_vertex_functions(cell);
142 cell_init_texture_transfer_funcs(cell);
143
144 cell->draw = cell_draw_create(cell);
145
146 /* Create cache of fragment ops generated code */
147 cell->fragment_ops_cache =
148 util_new_keymap(sizeof(struct cell_fragment_ops_key), ~0, NULL);
149
150 cell_init_vbuf(cell);
151
152 draw_set_rasterize_stage(cell->draw, cell->vbuf);
153
154 /* convert all points/lines to tris for the time being */
155 draw_wide_point_threshold(cell->draw, 0.0);
156 draw_wide_line_threshold(cell->draw, 0.0);
157
158 /* get env vars or read config file to get debug flags */
159 cell->debug_flags = debug_get_flags_option("CELL_DEBUG",
160 cell_debug_flags,
161 0 );
162
163 for (i = 0; i < CELL_NUM_BUFFERS; i++)
164 cell_fence_init(&cell->fenced_buffers[i].fence);
165
166
167 /*
168 * SPU stuff
169 */
170 /* This call only works with SDK 3.0. Anyone still using 2.1??? */
171 cell->num_cells = spe_cpu_info_get(SPE_COUNT_PHYSICAL_CPU_NODES, -1);
172 cell->num_spus = spe_cpu_info_get(SPE_COUNT_USABLE_SPES, -1);
173 if (cell->debug_flags) {
174 printf("Cell: found %d Cell(s) with %u SPUs\n",
175 cell->num_cells, cell->num_spus);
176 }
177 if (getenv("CELL_NUM_SPUS")) {
178 cell->num_spus = atoi(getenv("CELL_NUM_SPUS"));
179 assert(cell->num_spus > 0);
180 }
181
182 cell_start_spus(cell);
183
184 cell_init_batch_buffers(cell);
185
186 /* make sure SPU initializations are done before proceeding */
187 cell_flush_int(cell, CELL_FLUSH_WAIT);
188
189 return &cell->pipe;
190}
diff --git a/src/gallium/drivers/cell/ppu/cell_context.h b/src/gallium/drivers/cell/ppu/cell_context.h
deleted file mode 100644
index d1aee62ba1e..00000000000
--- a/src/gallium/drivers/cell/ppu/cell_context.h
+++ /dev/null
@@ -1,210 +0,0 @@
1/**************************************************************************
2 *
3 * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas.
4 * All Rights Reserved.
5 *
6 * Permission is hereby granted, free of charge, to any person obtaining a
7 * copy of this software and associated documentation files (the
8 * "Software"), to deal in the Software without restriction, including
9 * without limitation the rights to use, copy, modify, merge, publish,
10 * distribute, sub license, and/or sell copies of the Software, and to
11 * permit persons to whom the Software is furnished to do so, subject to
12 * the following conditions:
13 *
14 * The above copyright notice and this permission notice (including the
15 * next paragraph) shall be included in all copies or substantial portions
16 * of the Software.
17 *
18 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
19 * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
20 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
21 * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
22 * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
23 * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
24 * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
25 *
26 **************************************************************************/
27
28
29#ifndef CELL_CONTEXT_H
30#define CELL_CONTEXT_H
31
32
33#include "pipe/p_context.h"
34#include "pipe/p_defines.h"
35#include "draw/draw_vertex.h"
36#include "draw/draw_vbuf.h"
37/*#include "cell_winsys.h"*/
38#include "cell/common.h"
39#include "rtasm/rtasm_ppc_spe.h"
40#include "tgsi/tgsi_scan.h"
41#include "util/u_keymap.h"
42
43
44struct cell_vbuf_render;
45
46
47/**
48 * Cell vertex shader state, subclass of pipe_shader_state.
49 */
50struct cell_vertex_shader_state
51{
52 struct pipe_shader_state shader;
53 struct tgsi_shader_info info;
54 void *draw_data;
55};
56
57
58/**
59 * Cell fragment shader state, subclass of pipe_shader_state.
60 */
61struct cell_fragment_shader_state
62{
63 struct pipe_shader_state shader;
64 struct tgsi_shader_info info;
65 struct spe_function code;
66 void *data;
67};
68
69
70/**
71 * Key for mapping per-fragment state to cached SPU machine code.
72 * keymap(cell_fragment_ops_key) => cell_command_fragment_ops
73 */
74struct cell_fragment_ops_key
75{
76 struct pipe_blend_state blend;
77 struct pipe_blend_color blend_color;
78 struct pipe_depth_stencil_alpha_state dsa;
79 enum pipe_format color_format;
80 enum pipe_format zs_format;
81};
82
83
84struct cell_buffer_node;
85
86/**
87 * Fenced buffer list. List of buffers which can be unreferenced after
88 * the fence has been executed/signalled.
89 */
90struct cell_buffer_list
91{
92 PIPE_ALIGN_VAR(16) struct cell_fence fence;
93 struct cell_buffer_node *head;
94};
95
96struct cell_velems_state
97{
98 unsigned count;
99 struct pipe_vertex_element velem[PIPE_MAX_ATTRIBS];
100};
101
102/**
103 * Per-context state, subclass of pipe_context.
104 */
105struct cell_context
106{
107 struct pipe_context pipe;
108
109 struct cell_winsys *winsys;
110
111 const struct pipe_blend_state *blend;
112 const struct pipe_sampler_state *sampler[PIPE_MAX_SAMPLERS];
113 uint num_samplers;
114 const struct pipe_depth_stencil_alpha_state *depth_stencil;
115 const struct pipe_rasterizer_state *rasterizer;
116 const struct cell_vertex_shader_state *vs;
117 const struct cell_fragment_shader_state *fs;
118 const struct cell_velems_state *velems;
119
120 struct spe_function logic_op;
121
122 struct pipe_blend_color blend_color;
123 struct pipe_stencil_ref stencil_ref;
124 struct pipe_clip_state clip;
125 struct pipe_resource *constants[2];
126 struct pipe_framebuffer_state framebuffer;
127 struct pipe_poly_stipple poly_stipple;
128 struct pipe_scissor_state scissor;
129 struct cell_resource *texture[PIPE_MAX_SAMPLERS];
130 struct pipe_sampler_view *fragment_sampler_views[PIPE_MAX_SAMPLERS];
131 uint num_textures;
132 struct pipe_viewport_state viewport;
133 struct pipe_vertex_buffer vertex_buffer[PIPE_MAX_ATTRIBS];
134 uint num_vertex_buffers;
135 struct pipe_index_buffer index_buffer;
136
137 ubyte *cbuf_map[PIPE_MAX_COLOR_BUFS];
138 ubyte *zsbuf_map;
139
140 uint dirty;
141 uint dirty_textures; /* bitmask of texture units */
142 uint dirty_samplers; /* bitmask of sampler units */
143
144 /** Cache of code generated for per-fragment ops */
145 struct keymap *fragment_ops_cache;
146
147 /** The primitive drawing context */
148 struct draw_context *draw;
149 struct draw_stage *render_stage;
150
151 /** For post-transformed vertex buffering: */
152 struct cell_vbuf_render *vbuf_render;
153 struct draw_stage *vbuf;
154
155 struct vertex_info vertex_info;
156
157 /** Mapped constant buffers */
158 const void *mapped_constants[PIPE_SHADER_TYPES];
159
160 PIPE_ALIGN_VAR(16) struct cell_spu_function_info spu_functions;
161
162 uint num_cells, num_spus;
163
164 /** Buffers for command batches, vertex/index data */
165 uint buffer_size[CELL_NUM_BUFFERS];
166 PIPE_ALIGN_VAR(16) ubyte buffer[CELL_NUM_BUFFERS][CELL_BUFFER_SIZE];
167
168 int cur_batch; /**< which buffer is being filled w/ commands */
169
170 /** [4] to ensure 16-byte alignment for each status word */
171 PIPE_ALIGN_VAR(16) uint buffer_status[CELL_MAX_SPUS][CELL_NUM_BUFFERS][4];
172
173
174 /** Associated with each command/batch buffer is a list of pipe_buffers
175 * that are fenced. When the last command in a buffer is executed, the
176 * fence will be signalled, indicating that any pipe_buffers preceeding
177 * that fence can be unreferenced (and probably freed).
178 */
179 struct cell_buffer_list fenced_buffers[CELL_NUM_BUFFERS];
180
181
182 struct spe_function attrib_fetch;
183 unsigned attrib_fetch_offsets[PIPE_MAX_ATTRIBS];
184
185 unsigned debug_flags;
186};
187
188
189
190
191static INLINE struct cell_context *
192cell_context(struct pipe_context *pipe)
193{
194 return (struct cell_context *) pipe;
195}
196
197
198struct pipe_context *
199cell_create_context(struct pipe_screen *screen,
200 void *priv );
201
202extern void
203cell_vertex_shader_queue_flush(struct draw_context *draw);
204
205
206/* XXX find a better home for this */
207extern void cell_update_vertex_fetch(struct draw_context *draw);
208
209
210#endif /* CELL_CONTEXT_H */
diff --git a/src/gallium/drivers/cell/ppu/cell_draw_arrays.c b/src/gallium/drivers/cell/ppu/cell_draw_arrays.c
deleted file mode 100644
index a367fa3fe15..00000000000
--- a/src/gallium/drivers/cell/ppu/cell_draw_arrays.c
+++ /dev/null
@@ -1,113 +0,0 @@
1/**************************************************************************
2 *
3 * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas.
4 * All Rights Reserved.
5 *
6 * Permission is hereby granted, free of charge, to any person obtaining a
7 * copy of this software and associated documentation files (the
8 * "Software"), to deal in the Software without restriction, including
9 * without limitation the rights to use, copy, modify, merge, publish,
10 * distribute, sub license, and/or sell copies of the Software, and to
11 * permit persons to whom the Software is furnished to do so, subject to
12 * the following conditions:
13 *
14 * The above copyright notice and this permission notice (including the
15 * next paragraph) shall be included in all copies or substantial portions
16 * of the Software.
17 *
18 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
19 * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
20 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
21 * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
22 * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
23 * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
24 * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
25 *
26 **************************************************************************/
27
28/* Author:
29 * Brian Paul
30 * Keith Whitwell
31 */
32
33
34#include "pipe/p_defines.h"
35#include "pipe/p_context.h"
36#include "util/u_inlines.h"
37
38#include "cell_context.h"
39#include "cell_draw_arrays.h"
40#include "cell_state.h"
41#include "cell_flush.h"
42#include "cell_texture.h"
43
44#include "draw/draw_context.h"
45
46
47
48
49
50
51/**
52 * Draw vertex arrays, with optional indexing.
53 * Basically, map the vertex buffers (and drawing surfaces), then hand off
54 * the drawing to the 'draw' module.
55 *
56 * XXX should the element buffer be specified/bound with a separate function?
57 */
58static void
59cell_draw_vbo(struct pipe_context *pipe, const struct pipe_draw_info *info)
60{
61 struct cell_context *cell = cell_context(pipe);
62 struct draw_context *draw = cell->draw;
63 void *mapped_indices = NULL;
64 unsigned i;
65
66 if (cell->dirty)
67 cell_update_derived( cell );
68
69#if 0
70 cell_map_surfaces(cell);
71#endif
72
73 /*
74 * Map vertex buffers
75 */
76 for (i = 0; i < cell->num_vertex_buffers; i++) {
77 void *buf = cell_resource(cell->vertex_buffer[i].buffer)->data;
78 draw_set_mapped_vertex_buffer(draw, i, buf);
79 }
80 /* Map index buffer, if present */
81 if (info->indexed && cell->index_buffer.buffer)
82 mapped_indices = cell_resource(cell->index_buffer.buffer)->data;
83
84 draw_set_mapped_index_buffer(draw, mapped_indices);
85
86 /* draw! */
87 draw_vbo(draw, info);
88
89 /*
90 * unmap vertex/index buffers - will cause draw module to flush
91 */
92 for (i = 0; i < cell->num_vertex_buffers; i++) {
93 draw_set_mapped_vertex_buffer(draw, i, NULL);
94 }
95 if (mapped_indices) {
96 draw_set_mapped_index_buffer(draw, NULL);
97 }
98
99 /*
100 * TODO: Flush only when a user vertex/index buffer is present
101 * (or even better, modify draw module to do this
102 * internally when this condition is seen?)
103 */
104 draw_flush(draw);
105}
106
107
108void
109cell_init_draw_functions(struct cell_context *cell)
110{
111 cell->pipe.draw_vbo = cell_draw_vbo;
112}
113
diff --git a/src/gallium/drivers/cell/ppu/cell_draw_arrays.h b/src/gallium/drivers/cell/ppu/cell_draw_arrays.h
deleted file mode 100644
index 148873aa675..00000000000
--- a/src/gallium/drivers/cell/ppu/cell_draw_arrays.h
+++ /dev/null
@@ -1,36 +0,0 @@
1/**************************************************************************
2 *
3 * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas.
4 * All Rights Reserved.
5 *
6 * Permission is hereby granted, free of charge, to any person obtaining a
7 * copy of this software and associated documentation files (the
8 * "Software"), to deal in the Software without restriction, including
9 * without limitation the rights to use, copy, modify, merge, publish,
10 * distribute, sub license, and/or sell copies of the Software, and to
11 * permit persons to whom the Software is furnished to do so, subject to
12 * the following conditions:
13 *
14 * The above copyright notice and this permission notice (including the
15 * next paragraph) shall be included in all copies or substantial portions
16 * of the Software.
17 *
18 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
19 * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
20 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
21 * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
22 * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
23 * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
24 * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
25 *
26 **************************************************************************/
27
28#ifndef CELL_DRAW_ARRAYS_H
29#define CELL_DRAW_ARRAYS_H
30
31
32extern void
33cell_init_draw_functions(struct cell_context *cell);
34
35
36#endif /* CELL_DRAW_ARRAYS_H */
diff --git a/src/gallium/drivers/cell/ppu/cell_fence.c b/src/gallium/drivers/cell/ppu/cell_fence.c
deleted file mode 100644
index 181fef44f45..00000000000
--- a/src/gallium/drivers/cell/ppu/cell_fence.c
+++ /dev/null
@@ -1,172 +0,0 @@
1/**************************************************************************
2 *
3 * Copyright 2008 Tungsten Graphics, Inc., Cedar Park, Texas.
4 * All Rights Reserved.
5 *
6 * Permission is hereby granted, free of charge, to any person obtaining a
7 * copy of this software and associated documentation files (the
8 * "Software"), to deal in the Software without restriction, including
9 * without limitation the rights to use, copy, modify, merge, publish,
10 * distribute, sub license, and/or sell copies of the Software, and to
11 * permit persons to whom the Software is furnished to do so, subject to
12 * the following conditions:
13 *
14 * The above copyright notice and this permission notice (including the
15 * next paragraph) shall be included in all copies or substantial portions
16 * of the Software.
17 *
18 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
19 * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
20 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
21 * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
22 * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
23 * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
24 * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
25 *
26 **************************************************************************/
27
28#include <unistd.h>
29#include "util/u_memory.h"
30#include "util/u_inlines.h"
31#include "cell_context.h"
32#include "cell_batch.h"
33#include "cell_fence.h"
34#include "cell_texture.h"
35
36
37void
38cell_fence_init(struct cell_fence *fence)
39{
40 uint i;
41 ASSERT_ALIGN16(fence->status);
42 for (i = 0; i < CELL_MAX_SPUS; i++) {
43 fence->status[i][0] = CELL_FENCE_IDLE;
44 }
45}
46
47
48boolean
49cell_fence_signalled(const struct cell_context *cell,
50 const struct cell_fence *fence)
51{
52 uint i;
53 for (i = 0; i < cell->num_spus; i++) {
54 if (fence->status[i][0] != CELL_FENCE_SIGNALLED)
55 return FALSE;
56 /*assert(fence->status[i][0] == CELL_FENCE_EMITTED);*/
57 }
58 return TRUE;
59}
60
61
62boolean
63cell_fence_finish(const struct cell_context *cell,
64 const struct cell_fence *fence,
65 uint64_t timeout)
66{
67 while (!cell_fence_signalled(cell, fence)) {
68 usleep(10);
69 }
70
71#ifdef DEBUG
72 {
73 uint i;
74 for (i = 0; i < cell->num_spus; i++) {
75 assert(fence->status[i][0] == CELL_FENCE_SIGNALLED);
76 }
77 }
78#endif
79 return TRUE;
80}
81
82
83
84
85struct cell_buffer_node
86{
87 struct pipe_resource *buffer;
88 struct cell_buffer_node *next;
89};
90
91
92#if 0
93static void
94cell_add_buffer_to_list(struct cell_context *cell,
95 struct cell_buffer_list *list,
96 struct pipe_resource *buffer)
97{
98 struct cell_buffer_node *node = CALLOC_STRUCT(cell_buffer_node);
99 /* create new list node which references the buffer, insert at head */
100 if (node) {
101 pipe_resource_reference(&node->buffer, buffer);
102 node->next = list->head;
103 list->head = node;
104 }
105}
106#endif
107
108
109/**
110 * Wait for completion of the given fence, then unreference any buffers
111 * on the list.
112 * This typically unrefs/frees texture buffers after any rendering which uses
113 * them has completed.
114 */
115void
116cell_free_fenced_buffers(struct cell_context *cell,
117 struct cell_buffer_list *list)
118{
119 if (list->head) {
120 /*struct pipe_screen *ps = cell->pipe.screen;*/
121 struct cell_buffer_node *node;
122
123 cell_fence_finish(cell, &list->fence);
124
125 /* traverse the list, unreferencing buffers, freeing nodes */
126 node = list->head;
127 while (node) {
128 struct cell_buffer_node *next = node->next;
129 assert(node->buffer);
130 /* XXX need this? pipe_buffer_unmap(ps, node->buffer);*/
131#if 0
132 printf("Unref buffer %p\n", node->buffer);
133 if (node->buffer->reference.count == 1)
134 printf(" Delete!\n");
135#endif
136 pipe_resource_reference(&node->buffer, NULL);
137 FREE(node);
138 node = next;
139 }
140 list->head = NULL;
141 }
142}
143
144
145/**
146 * This should be called for each render command.
147 * Any texture buffers that are current bound will be added to a fenced
148 * list to be freed later when the fence is executed/signalled.
149 */
150void
151cell_add_fenced_textures(struct cell_context *cell)
152{
153 /*struct cell_buffer_list *list = &cell->fenced_buffers[cell->cur_batch];*/
154 uint i;
155
156 for (i = 0; i < cell->num_textures; i++) {
157 struct cell_resource *ct = cell->texture[i];
158 if (ct) {
159#if 0
160 printf("Adding texture %p buffer %p to list\n",
161 ct, ct->tiled_buffer[level]);
162#endif
163#if 00
164 /* XXX this needs to be fixed/restored!
165 * Maybe keep pointers to textures, not buffers.
166 */
167 if (ct->base.buffer)
168 cell_add_buffer_to_list(cell, list, ct->buffer);
169#endif
170 }
171 }
172}
diff --git a/src/gallium/drivers/cell/ppu/cell_fence.h b/src/gallium/drivers/cell/ppu/cell_fence.h
deleted file mode 100644
index 3568230b1c0..00000000000
--- a/src/gallium/drivers/cell/ppu/cell_fence.h
+++ /dev/null
@@ -1,60 +0,0 @@
1/**************************************************************************
2 *
3 * Copyright 2008 Tungsten Graphics, Inc., Cedar Park, Texas.
4 * All Rights Reserved.
5 *
6 * Permission is hereby granted, free of charge, to any person obtaining a
7 * copy of this software and associated documentation files (the
8 * "Software"), to deal in the Software without restriction, including
9 * without limitation the rights to use, copy, modify, merge, publish,
10 * distribute, sub license, and/or sell copies of the Software, and to
11 * permit persons to whom the Software is furnished to do so, subject to
12 * the following conditions:
13 *
14 * The above copyright notice and this permission notice (including the
15 * next paragraph) shall be included in all copies or substantial portions
16 * of the Software.
17 *
18 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
19 * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
20 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
21 * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
22 * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
23 * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
24 * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
25 *
26 **************************************************************************/
27
28
29#ifndef CELL_FENCE_H
30#define CELL_FENCE_H
31
32
33extern void
34cell_fence_init(struct cell_fence *fence);
35
36
37extern boolean
38cell_fence_signalled(const struct cell_context *cell,
39 const struct cell_fence *fence,
40 unsigned flags);
41
42
43extern boolean
44cell_fence_finish(const struct cell_context *cell,
45 const struct cell_fence *fence,
46 unsigned flags,
47 uint64_t timeout);
48
49
50
51extern void
52cell_free_fenced_buffers(struct cell_context *cell,
53 struct cell_buffer_list *list);
54
55
56extern void
57cell_add_fenced_textures(struct cell_context *cell);
58
59
60#endif /* CELL_FENCE_H */
diff --git a/src/gallium/drivers/cell/ppu/cell_flush.c b/src/gallium/drivers/cell/ppu/cell_flush.c
deleted file mode 100644
index 463f4d03eb9..00000000000
--- a/src/gallium/drivers/cell/ppu/cell_flush.c
+++ /dev/null
@@ -1,109 +0,0 @@
1/**************************************************************************
2 *
3 * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas.
4 * All Rights Reserved.
5 *
6 * Permission is hereby granted, free of charge, to any person obtaining a
7 * copy of this software and associated documentation files (the
8 * "Software"), to deal in the Software without restriction, including
9 * without limitation the rights to use, copy, modify, merge, publish,
10 * distribute, sub license, and/or sell copies of the Software, and to
11 * permit persons to whom the Software is furnished to do so, subject to
12 * the following conditions:
13 *
14 * The above copyright notice and this permission notice (including the
15 * next paragraph) shall be included in all copies or substantial portions
16 * of the Software.
17 *
18 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
19 * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
20 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
21 * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
22 * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
23 * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
24 * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
25 *
26 **************************************************************************/
27
28
29#include "cell_context.h"
30#include "cell_batch.h"
31#include "cell_flush.h"
32#include "cell_spu.h"
33#include "cell_render.h"
34#include "draw/draw_context.h"
35
36
37/**
38 * Called via pipe->flush()
39 */
40void
41cell_flush(struct pipe_context *pipe,
42 struct pipe_fence_handle **fence)
43{
44 struct cell_context *cell = cell_context(pipe);
45
46 if (fence) {
47 *fence = NULL;
48 }
49
50 flags |= CELL_FLUSH_WAIT;
51
52 draw_flush( cell->draw );
53 cell_flush_int(cell, flags);
54}
55
56
57/**
58 * Cell internal flush function. Send the current batch buffer to all SPUs.
59 * If flags & CELL_FLUSH_WAIT, do not return until the SPUs are idle.
60 * \param flags bitmask of flags CELL_FLUSH_WAIT, or zero
61 */
62void
63cell_flush_int(struct cell_context *cell, unsigned flags)
64{
65 static boolean flushing = FALSE; /* recursion catcher */
66 uint i;
67
68 ASSERT(!flushing);
69 flushing = TRUE;
70
71 if (flags & CELL_FLUSH_WAIT) {
72 STATIC_ASSERT(sizeof(opcode_t) % 16 == 0);
73 opcode_t *cmd = (opcode_t*) cell_batch_alloc16(cell, sizeof(opcode_t));
74 *cmd[0] = CELL_CMD_FINISH;
75 }
76
77 cell_batch_flush(cell);
78
79#if 0
80 /* Send CMD_FINISH to all SPUs */
81 for (i = 0; i < cell->num_spus; i++) {
82 send_mbox_message(cell_global.spe_contexts[i], CELL_CMD_FINISH);
83 }
84#endif
85
86 if (flags & CELL_FLUSH_WAIT) {
87 /* Wait for ack */
88 for (i = 0; i < cell->num_spus; i++) {
89 uint k = wait_mbox_message(cell_global.spe_contexts[i]);
90 assert(k == CELL_CMD_FINISH);
91 }
92 }
93
94 flushing = FALSE;
95}
96
97
98void
99cell_flush_buffer_range(struct cell_context *cell, void *ptr,
100 unsigned size)
101{
102 STATIC_ASSERT((sizeof(opcode_t) + sizeof(struct cell_buffer_range)) % 16 == 0);
103 uint32_t *batch = (uint32_t*)cell_batch_alloc16(cell,
104 sizeof(opcode_t) + sizeof(struct cell_buffer_range));
105 struct cell_buffer_range *br = (struct cell_buffer_range *) &batch[4];
106 batch[0] = CELL_CMD_FLUSH_BUFFER_RANGE;
107 br->base = (uintptr_t) ptr;
108 br->size = size;
109}
diff --git a/src/gallium/drivers/cell/ppu/cell_flush.h b/src/gallium/drivers/cell/ppu/cell_flush.h
deleted file mode 100644
index 509ae6239ac..00000000000
--- a/src/gallium/drivers/cell/ppu/cell_flush.h
+++ /dev/null
@@ -1,45 +0,0 @@
1/**************************************************************************
2 *
3 * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas.
4 * All Rights Reserved.
5 *
6 * Permission is hereby granted, free of charge, to any person obtaining a
7 * copy of this software and associated documentation files (the
8 * "Software"), to deal in the Software without restriction, including
9 * without limitation the rights to use, copy, modify, merge, publish,
10 * distribute, sub license, and/or sell copies of the Software, and to
11 * permit persons to whom the Software is furnished to do so, subject to
12 * the following conditions:
13 *
14 * The above copyright notice and this permission notice (including the
15 * next paragraph) shall be included in all copies or substantial portions
16 * of the Software.
17 *
18 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
19 * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
20 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
21 * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
22 * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
23 * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
24 * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
25 *
26 **************************************************************************/
27
28
29#ifndef CELL_FLUSH
30#define CELL_FLUSH
31
32#define CELL_FLUSH_WAIT 0x80000000
33
34extern void
35cell_flush(struct pipe_context *pipe, unsigned flags,
36 struct pipe_fence_handle **fence);
37
38extern void
39cell_flush_int(struct cell_context *cell, unsigned flags);
40
41extern void
42cell_flush_buffer_range(struct cell_context *cell, void *ptr,
43 unsigned size);
44
45#endif
diff --git a/src/gallium/drivers/cell/ppu/cell_gen_fp.c b/src/gallium/drivers/cell/ppu/cell_gen_fp.c
deleted file mode 100644
index 1d8a11a4ac9..00000000000
--- a/src/gallium/drivers/cell/ppu/cell_gen_fp.c
+++ /dev/null
@@ -1,2036 +0,0 @@
1/**************************************************************************
2 *
3 * Copyright 2008 Tungsten Graphics, Inc., Cedar Park, Texas.
4 * All Rights Reserved.
5 * Copyright 2009 VMware, Inc. All rights reserved.
6 *
7 * Permission is hereby granted, free of charge, to any person obtaining a
8 * copy of this software and associated documentation files (the
9 * "Software"), to deal in the Software without restriction, including
10 * without limitation the rights to use, copy, modify, merge, publish,
11 * distribute, sub license, and/or sell copies of the Software, and to
12 * permit persons to whom the Software is furnished to do so, subject to
13 * the following conditions:
14 *
15 * The above copyright notice and this permission notice (including the
16 * next paragraph) shall be included in all copies or substantial portions
17 * of the Software.
18 *
19 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
20 * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
21 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
22 * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
23 * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
24 * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
25 * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
26 *
27 **************************************************************************/
28
29
30
31/**
32 * Generate SPU fragment program/shader code.
33 *
34 * Note that we generate SOA-style code here. So each TGSI instruction
35 * operates on four pixels (and is translated into four SPU instructions,
36 * generally speaking).
37 *
38 * \author Brian Paul
39 */
40
41#include <math.h>
42#include "pipe/p_defines.h"
43#include "pipe/p_state.h"
44#include "pipe/p_shader_tokens.h"
45#include "tgsi/tgsi_parse.h"
46#include "tgsi/tgsi_util.h"
47#include "tgsi/tgsi_exec.h"
48#include "tgsi/tgsi_dump.h"
49#include "rtasm/rtasm_ppc_spe.h"
50#include "util/u_memory.h"
51#include "cell_context.h"
52#include "cell_gen_fp.h"
53
54
55#define MAX_TEMPS 16
56#define MAX_IMMED 8
57
58#define CHAN_X 0
59#define CHAN_Y 1
60#define CHAN_Z 2
61#define CHAN_W 3
62
63/**
64 * Context needed during code generation.
65 */
66struct codegen
67{
68 struct cell_context *cell;
69 int inputs_reg; /**< 1st function parameter */
70 int outputs_reg; /**< 2nd function parameter */
71 int constants_reg; /**< 3rd function parameter */
72 int temp_regs[MAX_TEMPS][4]; /**< maps TGSI temps to SPE registers */
73 int imm_regs[MAX_IMMED][4]; /**< maps TGSI immediates to SPE registers */
74
75 int num_imm; /**< number of immediates */
76
77 int one_reg; /**< register containing {1.0, 1.0, 1.0, 1.0} */
78
79 int addr_reg; /**< address register, integer values */
80
81 /** Per-instruction temps / intermediate temps */
82 int num_itemps;
83 int itemps[12];
84
85 /** Current IF/ELSE/ENDIF nesting level */
86 int if_nesting;
87 /** Current BGNLOOP/ENDLOOP nesting level */
88 int loop_nesting;
89 /** Location of start of current loop */
90 int loop_start;
91
92 /** Index of if/conditional mask register */
93 int cond_mask_reg;
94 /** Index of loop mask register */
95 int loop_mask_reg;
96
97 /** Index of master execution mask register */
98 int exec_mask_reg;
99
100 /** KIL mask: indicates which fragments have been killed */
101 int kill_mask_reg;
102
103 int frame_size; /**< Stack frame size, in words */
104
105 struct spe_function *f;
106 boolean error;
107};
108
109
110/**
111 * Allocate an intermediate temporary register.
112 */
113static int
114get_itemp(struct codegen *gen)
115{
116 int t = spe_allocate_available_register(gen->f);
117 assert(gen->num_itemps < Elements(gen->itemps));
118 gen->itemps[gen->num_itemps++] = t;
119 return t;
120}
121
122/**
123 * Free all intermediate temporary registers. To be called after each
124 * instruction has been emitted.
125 */
126static void
127free_itemps(struct codegen *gen)
128{
129 int i;
130 for (i = 0; i < gen->num_itemps; i++) {
131 spe_release_register(gen->f, gen->itemps[i]);
132 }
133 gen->num_itemps = 0;
134}
135
136
137/**
138 * Return index of an SPE register containing {1.0, 1.0, 1.0, 1.0}.
139 * The register is allocated and initialized upon the first call.
140 */
141static int
142get_const_one_reg(struct codegen *gen)
143{
144 if (gen->one_reg <= 0) {
145 gen->one_reg = spe_allocate_available_register(gen->f);
146
147 spe_indent(gen->f, 4);
148 spe_comment(gen->f, -4, "init constant reg = 1.0:");
149
150 /* one = {1.0, 1.0, 1.0, 1.0} */
151 spe_load_float(gen->f, gen->one_reg, 1.0f);
152
153 spe_indent(gen->f, -4);
154 }
155
156 return gen->one_reg;
157}
158
159
160/**
161 * Return index of the address register.
162 * Used for indirect register loads/stores.
163 */
164static int
165get_address_reg(struct codegen *gen)
166{
167 if (gen->addr_reg <= 0) {
168 gen->addr_reg = spe_allocate_available_register(gen->f);
169
170 spe_indent(gen->f, 4);
171 spe_comment(gen->f, -4, "init address reg = 0:");
172
173 /* init addr = {0, 0, 0, 0} */
174 spe_zero(gen->f, gen->addr_reg);
175
176 spe_indent(gen->f, -4);
177 }
178
179 return gen->addr_reg;
180}
181
182
183/**
184 * Return index of the master execution mask.
185 * The register is allocated an initialized upon the first call.
186 *
187 * The master execution mask controls which pixels in a quad are
188 * modified, according to surrounding conditionals, loops, etc.
189 */
190static int
191get_exec_mask_reg(struct codegen *gen)
192{
193 if (gen->exec_mask_reg <= 0) {
194 gen->exec_mask_reg = spe_allocate_available_register(gen->f);
195
196 /* XXX this may not be needed */
197 spe_comment(gen->f, 0*-4, "initialize master execution mask = ~0");
198 spe_load_int(gen->f, gen->exec_mask_reg, ~0);
199 }
200
201 return gen->exec_mask_reg;
202}
203
204
205/** Return index of the conditional (if/else) execution mask register */
206static int
207get_cond_mask_reg(struct codegen *gen)
208{
209 if (gen->cond_mask_reg <= 0) {
210 gen->cond_mask_reg = spe_allocate_available_register(gen->f);
211 }
212
213 return gen->cond_mask_reg;
214}
215
216
217/** Return index of the loop execution mask register */
218static int
219get_loop_mask_reg(struct codegen *gen)
220{
221 if (gen->loop_mask_reg <= 0) {
222 gen->loop_mask_reg = spe_allocate_available_register(gen->f);
223 }
224
225 return gen->loop_mask_reg;
226}
227
228
229
230static boolean
231is_register_src(struct codegen *gen, int channel,
232 const struct tgsi_full_src_register *src)
233{
234 int swizzle = tgsi_util_get_full_src_register_swizzle(src, channel);
235 int sign_op = tgsi_util_get_full_src_register_sign_mode(src, channel);
236
237 if (swizzle > TGSI_SWIZZLE_W || sign_op != TGSI_UTIL_SIGN_KEEP) {
238 return FALSE;
239 }
240 if (src->Register.File == TGSI_FILE_TEMPORARY ||
241 src->Register.File == TGSI_FILE_IMMEDIATE) {
242 return TRUE;
243 }
244 return FALSE;
245}
246
247
248static boolean
249is_memory_dst(struct codegen *gen, int channel,
250 const struct tgsi_full_dst_register *dst)
251{
252 if (dst->Register.File == TGSI_FILE_OUTPUT) {
253 return TRUE;
254 }
255 else {
256 return FALSE;
257 }
258}
259
260
261/**
262 * Return the index of the SPU temporary containing the named TGSI
263 * source register. If the TGSI register is a TGSI_FILE_TEMPORARY we
264 * just return the corresponding SPE register. If the TGIS register
265 * is TGSI_FILE_INPUT/CONSTANT/IMMEDIATE we allocate a new SPE register
266 * and emit an SPE load instruction.
267 */
268static int
269get_src_reg(struct codegen *gen,
270 int channel,
271 const struct tgsi_full_src_register *src)
272{
273 int reg = -1;
274 int swizzle = tgsi_util_get_full_src_register_swizzle(src, channel);
275 boolean reg_is_itemp = FALSE;
276 uint sign_op;
277
278 assert(swizzle >= TGSI_SWIZZLE_X);
279 assert(swizzle <= TGSI_SWIZZLE_W);
280
281 {
282 int index = src->Register.Index;
283
284 assert(swizzle < 4);
285
286 if (src->Register.Indirect) {
287 /* XXX unfinished */
288 }
289
290 switch (src->Register.File) {
291 case TGSI_FILE_TEMPORARY:
292 reg = gen->temp_regs[index][swizzle];
293 break;
294 case TGSI_FILE_INPUT:
295 {
296 /* offset is measured in quadwords, not bytes */
297 int offset = index * 4 + swizzle;
298 reg = get_itemp(gen);
299 reg_is_itemp = TRUE;
300 /* Load: reg = memory[(machine_reg) + offset] */
301 spe_lqd(gen->f, reg, gen->inputs_reg, offset * 16);
302 }
303 break;
304 case TGSI_FILE_IMMEDIATE:
305 reg = gen->imm_regs[index][swizzle];
306 break;
307 case TGSI_FILE_CONSTANT:
308 {
309 /* offset is measured in quadwords, not bytes */
310 int offset = index * 4 + swizzle;
311 reg = get_itemp(gen);
312 reg_is_itemp = TRUE;
313 /* Load: reg = memory[(machine_reg) + offset] */
314 spe_lqd(gen->f, reg, gen->constants_reg, offset * 16);
315 }
316 break;
317 default:
318 assert(0);
319 }
320 }
321
322 /*
323 * Handle absolute value, negate or set-negative of src register.
324 */
325 sign_op = tgsi_util_get_full_src_register_sign_mode(src, channel);
326 if (sign_op != TGSI_UTIL_SIGN_KEEP) {
327 /*
328 * All sign ops are done by manipulating bit 31, the IEEE float sign bit.
329 */
330 const int bit31mask_reg = get_itemp(gen);
331 int result_reg;
332
333 if (reg_is_itemp) {
334 /* re-use 'reg' for the result */
335 result_reg = reg;
336 }
337 else {
338 /* alloc a new reg for the result */
339 result_reg = get_itemp(gen);
340 }
341
342 /* mask with bit 31 set, the rest cleared */
343 spe_load_uint(gen->f, bit31mask_reg, (1 << 31));
344
345 if (sign_op == TGSI_UTIL_SIGN_CLEAR) {
346 spe_andc(gen->f, result_reg, reg, bit31mask_reg);
347 }
348 else if (sign_op == TGSI_UTIL_SIGN_SET) {
349 spe_and(gen->f, result_reg, reg, bit31mask_reg);
350 }
351 else {
352 assert(sign_op == TGSI_UTIL_SIGN_TOGGLE);
353 spe_xor(gen->f, result_reg, reg, bit31mask_reg);
354 }
355
356 reg = result_reg;
357 }
358
359 return reg;
360}
361
362
363/**
364 * Return the index of an SPE register to use for the given TGSI register.
365 * If the TGSI register is TGSI_FILE_TEMPORARAY, the index of the
366 * corresponding SPE register is returned. If the TGSI register is
367 * TGSI_FILE_OUTPUT we allocate an intermediate temporary register.
368 * See store_dest_reg() below...
369 */
370static int
371get_dst_reg(struct codegen *gen,
372 int channel,
373 const struct tgsi_full_dst_register *dest)
374{
375 int reg = -1;
376
377 switch (dest->Register.File) {
378 case TGSI_FILE_TEMPORARY:
379 if (gen->if_nesting > 0 || gen->loop_nesting > 0)
380 reg = get_itemp(gen);
381 else
382 reg = gen->temp_regs[dest->Register.Index][channel];
383 break;
384 case TGSI_FILE_OUTPUT:
385 reg = get_itemp(gen);
386 break;
387 default:
388 assert(0);
389 }
390
391 return reg;
392}
393
394
395/**
396 * When a TGSI instruction is writing to an output register, this
397 * function emits the SPE store instruction to store the value_reg.
398 * \param value_reg the SPE register containing the value to store.
399 * This would have been returned by get_dst_reg().
400 */
401static void
402store_dest_reg(struct codegen *gen,
403 int value_reg, int channel,
404 const struct tgsi_full_dst_register *dest)
405{
406 /*
407 * XXX need to implement dst reg clamping/saturation
408 */
409#if 0
410 switch (inst->Instruction.Saturate) {
411 case TGSI_SAT_NONE:
412 break;
413 case TGSI_SAT_ZERO_ONE:
414 break;
415 case TGSI_SAT_MINUS_PLUS_ONE:
416 break;
417 default:
418 assert( 0 );
419 }
420#endif
421
422 switch (dest->Register.File) {
423 case TGSI_FILE_TEMPORARY:
424 if (gen->if_nesting > 0 || gen->loop_nesting > 0) {
425 int d_reg = gen->temp_regs[dest->Register.Index][channel];
426 int exec_reg = get_exec_mask_reg(gen);
427 /* Mix d with new value according to exec mask:
428 * d[i] = mask_reg[i] ? value_reg : d_reg
429 */
430 spe_selb(gen->f, d_reg, d_reg, value_reg, exec_reg);
431 }
432 else {
433 /* we're not inside a condition or loop: do nothing special */
434
435 }
436 break;
437 case TGSI_FILE_OUTPUT:
438 {
439 /* offset is measured in quadwords, not bytes */
440 int offset = dest->Register.Index * 4 + channel;
441 if (gen->if_nesting > 0 || gen->loop_nesting > 0) {
442 int exec_reg = get_exec_mask_reg(gen);
443 int curval_reg = get_itemp(gen);
444 /* First read the current value from memory:
445 * Load: curval = memory[(machine_reg) + offset]
446 */
447 spe_lqd(gen->f, curval_reg, gen->outputs_reg, offset * 16);
448 /* Mix curval with newvalue according to exec mask:
449 * d[i] = mask_reg[i] ? value_reg : d_reg
450 */
451 spe_selb(gen->f, curval_reg, curval_reg, value_reg, exec_reg);
452 /* Store: memory[(machine_reg) + offset] = curval */
453 spe_stqd(gen->f, curval_reg, gen->outputs_reg, offset * 16);
454 }
455 else {
456 /* Store: memory[(machine_reg) + offset] = reg */
457 spe_stqd(gen->f, value_reg, gen->outputs_reg, offset * 16);
458 }
459 }
460 break;
461 default:
462 assert(0);
463 }
464}
465
466
467
468static void
469emit_prologue(struct codegen *gen)
470{
471 gen->frame_size = 1024; /* XXX temporary, should be dynamic */
472
473 spe_comment(gen->f, 0, "Function prologue:");
474
475 /* save $lr on stack # stqd $lr,16($sp) */
476 spe_stqd(gen->f, SPE_REG_RA, SPE_REG_SP, 16);
477
478 if (gen->frame_size >= 512) {
479 /* offset is too large for ai instruction */
480 int offset_reg = spe_allocate_available_register(gen->f);
481 int sp_reg = spe_allocate_available_register(gen->f);
482 /* offset = -framesize */
483 spe_load_int(gen->f, offset_reg, -gen->frame_size);
484 /* sp = $sp */
485 spe_move(gen->f, sp_reg, SPE_REG_SP);
486 /* $sp = $sp + offset_reg */
487 spe_a(gen->f, SPE_REG_SP, SPE_REG_SP, offset_reg);
488 /* save $sp in stack frame */
489 spe_stqd(gen->f, sp_reg, SPE_REG_SP, 0);
490 /* clean up */
491 spe_release_register(gen->f, offset_reg);
492 spe_release_register(gen->f, sp_reg);
493 }
494 else {
495 /* save stack pointer # stqd $sp,-frameSize($sp) */
496 spe_stqd(gen->f, SPE_REG_SP, SPE_REG_SP, -gen->frame_size);
497
498 /* adjust stack pointer # ai $sp,$sp,-frameSize */
499 spe_ai(gen->f, SPE_REG_SP, SPE_REG_SP, -gen->frame_size);
500 }
501}
502
503
504static void
505emit_epilogue(struct codegen *gen)
506{
507 const int return_reg = 3;
508
509 spe_comment(gen->f, 0, "Function epilogue:");
510
511 spe_comment(gen->f, 0, "return the killed mask");
512 if (gen->kill_mask_reg > 0) {
513 /* shader called KIL, return the "alive" mask */
514 spe_move(gen->f, return_reg, gen->kill_mask_reg);
515 }
516 else {
517 /* return {0,0,0,0} */
518 spe_load_uint(gen->f, return_reg, 0);
519 }
520
521 spe_comment(gen->f, 0, "restore stack and return");
522 if (gen->frame_size >= 512) {
523 /* offset is too large for ai instruction */
524 int offset_reg = spe_allocate_available_register(gen->f);
525 /* offset = framesize */
526 spe_load_int(gen->f, offset_reg, gen->frame_size);
527 /* $sp = $sp + offset */
528 spe_a(gen->f, SPE_REG_SP, SPE_REG_SP, offset_reg);
529 /* clean up */
530 spe_release_register(gen->f, offset_reg);
531 }
532 else {
533 /* restore stack pointer # ai $sp,$sp,frameSize */
534 spe_ai(gen->f, SPE_REG_SP, SPE_REG_SP, gen->frame_size);
535 }
536
537 /* restore $lr # lqd $lr,16($sp) */
538 spe_lqd(gen->f, SPE_REG_RA, SPE_REG_SP, 16);
539
540 /* return from function call */
541 spe_bi(gen->f, SPE_REG_RA, 0, 0);
542}
543
544
545#define FOR_EACH_ENABLED_CHANNEL(inst, ch) \
546 for (ch = 0; ch < 4; ch++) \
547 if (inst->Dst[0].Register.WriteMask & (1 << ch))
548
549
550static boolean
551emit_ARL(struct codegen *gen, const struct tgsi_full_instruction *inst)
552{
553 int ch = 0, src_reg, addr_reg;
554
555 src_reg = get_src_reg(gen, ch, &inst->Src[0]);
556 addr_reg = get_address_reg(gen);
557
558 /* convert float to int */
559 spe_cflts(gen->f, addr_reg, src_reg, 0);
560
561 free_itemps(gen);
562
563 return TRUE;
564}
565
566
567static boolean
568emit_MOV(struct codegen *gen, const struct tgsi_full_instruction *inst)
569{
570 int ch, src_reg[4], dst_reg[4];
571
572 FOR_EACH_ENABLED_CHANNEL(inst, ch) {
573 src_reg[ch] = get_src_reg(gen, ch, &inst->Src[0]);
574 dst_reg[ch] = get_dst_reg(gen, ch, &inst->Dst[0]);
575 }
576
577 FOR_EACH_ENABLED_CHANNEL(inst, ch) {
578 if (is_register_src(gen, ch, &inst->Src[0]) &&
579 is_memory_dst(gen, ch, &inst->Dst[0])) {
580 /* special-case: register to memory store */
581 store_dest_reg(gen, src_reg[ch], ch, &inst->Dst[0]);
582 }
583 else {
584 spe_move(gen->f, dst_reg[ch], src_reg[ch]);
585 store_dest_reg(gen, dst_reg[ch], ch, &inst->Dst[0]);
586 }
587 }
588
589 free_itemps(gen);
590
591 return TRUE;
592}
593
594/**
595 * Emit binary operation
596 */
597static boolean
598emit_binop(struct codegen *gen, const struct tgsi_full_instruction *inst)
599{
600 int ch, s1_reg[4], s2_reg[4], d_reg[4];
601
602 /* Loop over Red/Green/Blue/Alpha channels, fetch src operands */
603 FOR_EACH_ENABLED_CHANNEL(inst, ch) {
604 s1_reg[ch] = get_src_reg(gen, ch, &inst->Src[0]);
605 s2_reg[ch] = get_src_reg(gen, ch, &inst->Src[1]);
606 d_reg[ch] = get_dst_reg(gen, ch, &inst->Dst[0]);
607 }
608
609 /* Loop over Red/Green/Blue/Alpha channels, do the op, store results */
610 FOR_EACH_ENABLED_CHANNEL(inst, ch) {
611 /* Emit actual SPE instruction: d = s1 + s2 */
612 switch (inst->Instruction.Opcode) {
613 case TGSI_OPCODE_ADD:
614 spe_fa(gen->f, d_reg[ch], s1_reg[ch], s2_reg[ch]);
615 break;
616 case TGSI_OPCODE_SUB:
617 spe_fs(gen->f, d_reg[ch], s1_reg[ch], s2_reg[ch]);
618 break;
619 case TGSI_OPCODE_MUL:
620 spe_fm(gen->f, d_reg[ch], s1_reg[ch], s2_reg[ch]);
621 break;
622 default:
623 ;
624 }
625 }
626
627 /* Store the result (a no-op for TGSI_FILE_TEMPORARY dests) */
628 FOR_EACH_ENABLED_CHANNEL(inst, ch) {
629 store_dest_reg(gen, d_reg[ch], ch, &inst->Dst[0]);
630 }
631
632 /* Free any intermediate temps we allocated */
633 free_itemps(gen);
634
635 return TRUE;
636}
637
638
639/**
640 * Emit multiply add. See emit_ADD for comments.
641 */
642static boolean
643emit_MAD(struct codegen *gen, const struct tgsi_full_instruction *inst)
644{
645 int ch, s1_reg[4], s2_reg[4], s3_reg[4], d_reg[4];
646
647 FOR_EACH_ENABLED_CHANNEL(inst, ch) {
648 s1_reg[ch] = get_src_reg(gen, ch, &inst->Src[0]);
649 s2_reg[ch] = get_src_reg(gen, ch, &inst->Src[1]);
650 s3_reg[ch] = get_src_reg(gen, ch, &inst->Src[2]);
651 d_reg[ch] = get_dst_reg(gen, ch, &inst->Dst[0]);
652 }
653 FOR_EACH_ENABLED_CHANNEL(inst, ch) {
654 spe_fma(gen->f, d_reg[ch], s1_reg[ch], s2_reg[ch], s3_reg[ch]);
655 }
656 FOR_EACH_ENABLED_CHANNEL(inst, ch) {
657 store_dest_reg(gen, d_reg[ch], ch, &inst->Dst[0]);
658 }
659 free_itemps(gen);
660 return TRUE;
661}
662
663
664/**
665 * Emit linear interpolate. See emit_ADD for comments.
666 */
667static boolean
668emit_LRP(struct codegen *gen, const struct tgsi_full_instruction *inst)
669{
670 int ch, s1_reg[4], s2_reg[4], s3_reg[4], d_reg[4], tmp_reg[4];
671
672 /* setup/get src/dst/temp regs */
673 FOR_EACH_ENABLED_CHANNEL(inst, ch) {
674 s1_reg[ch] = get_src_reg(gen, ch, &inst->Src[0]);
675 s2_reg[ch] = get_src_reg(gen, ch, &inst->Src[1]);
676 s3_reg[ch] = get_src_reg(gen, ch, &inst->Src[2]);
677 d_reg[ch] = get_dst_reg(gen, ch, &inst->Dst[0]);
678 tmp_reg[ch] = get_itemp(gen);
679 }
680
681 /* d = s3 + s1(s2 - s3) */
682 /* do all subtracts, then all fma, then all stores to better pipeline */
683 FOR_EACH_ENABLED_CHANNEL(inst, ch) {
684 spe_fs(gen->f, tmp_reg[ch], s2_reg[ch], s3_reg[ch]);
685 }
686 FOR_EACH_ENABLED_CHANNEL(inst, ch) {
687 spe_fma(gen->f, d_reg[ch], tmp_reg[ch], s1_reg[ch], s3_reg[ch]);
688 }
689 FOR_EACH_ENABLED_CHANNEL(inst, ch) {
690 store_dest_reg(gen, d_reg[ch], ch, &inst->Dst[0]);
691 }
692 free_itemps(gen);
693 return TRUE;
694}
695
696
697
698/**
699 * Emit reciprocal or recip sqrt.
700 */
701static boolean
702emit_RCP_RSQ(struct codegen *gen, const struct tgsi_full_instruction *inst)
703{
704 int ch, s1_reg[4], d_reg[4], tmp_reg[4];
705
706 FOR_EACH_ENABLED_CHANNEL(inst, ch) {
707 s1_reg[ch] = get_src_reg(gen, ch, &inst->Src[0]);
708 d_reg[ch] = get_dst_reg(gen, ch, &inst->Dst[0]);
709 tmp_reg[ch] = get_itemp(gen);
710 }
711
712 FOR_EACH_ENABLED_CHANNEL(inst, ch) {
713 if (inst->Instruction.Opcode == TGSI_OPCODE_RCP) {
714 /* tmp = 1/s1 */
715 spe_frest(gen->f, tmp_reg[ch], s1_reg[ch]);
716 }
717 else {
718 /* tmp = 1/sqrt(s1) */
719 spe_frsqest(gen->f, tmp_reg[ch], s1_reg[ch]);
720 }
721 }
722
723 FOR_EACH_ENABLED_CHANNEL(inst, ch) {
724 /* d = float_interp(s1, tmp) */
725 spe_fi(gen->f, d_reg[ch], s1_reg[ch], tmp_reg[ch]);
726 }
727
728 FOR_EACH_ENABLED_CHANNEL(inst, ch) {
729 store_dest_reg(gen, d_reg[ch], ch, &inst->Dst[0]);
730 }
731
732 free_itemps(gen);
733 return TRUE;
734}
735
736
737/**
738 * Emit absolute value. See emit_ADD for comments.
739 */
740static boolean
741emit_ABS(struct codegen *gen, const struct tgsi_full_instruction *inst)
742{
743 int ch, s1_reg[4], d_reg[4];
744 const int bit31mask_reg = get_itemp(gen);
745
746 /* mask with bit 31 set, the rest cleared */
747 spe_load_uint(gen->f, bit31mask_reg, (1 << 31));
748
749 FOR_EACH_ENABLED_CHANNEL(inst, ch) {
750 s1_reg[ch] = get_src_reg(gen, ch, &inst->Src[0]);
751 d_reg[ch] = get_dst_reg(gen, ch, &inst->Dst[0]);
752 }
753
754 /* d = sign bit cleared in s1 */
755 FOR_EACH_ENABLED_CHANNEL(inst, ch) {
756 spe_andc(gen->f, d_reg[ch], s1_reg[ch], bit31mask_reg);
757 }
758
759 FOR_EACH_ENABLED_CHANNEL(inst, ch) {
760 store_dest_reg(gen, d_reg[ch], ch, &inst->Dst[0]);
761 }
762
763 free_itemps(gen);
764 return TRUE;
765}
766
767/**
768 * Emit 3 component dot product. See emit_ADD for comments.
769 */
770static boolean
771emit_DP3(struct codegen *gen, const struct tgsi_full_instruction *inst)
772{
773 int ch;
774 int s1x_reg, s1y_reg, s1z_reg;
775 int s2x_reg, s2y_reg, s2z_reg;
776 int t0_reg = get_itemp(gen), t1_reg = get_itemp(gen);
777
778 s1x_reg = get_src_reg(gen, CHAN_X, &inst->Src[0]);
779 s2x_reg = get_src_reg(gen, CHAN_X, &inst->Src[1]);
780 s1y_reg = get_src_reg(gen, CHAN_Y, &inst->Src[0]);
781 s2y_reg = get_src_reg(gen, CHAN_Y, &inst->Src[1]);
782 s1z_reg = get_src_reg(gen, CHAN_Z, &inst->Src[0]);
783 s2z_reg = get_src_reg(gen, CHAN_Z, &inst->Src[1]);
784
785 /* t0 = x0 * x1 */
786 spe_fm(gen->f, t0_reg, s1x_reg, s2x_reg);
787
788 /* t1 = y0 * y1 */
789 spe_fm(gen->f, t1_reg, s1y_reg, s2y_reg);
790
791 /* t0 = z0 * z1 + t0 */
792 spe_fma(gen->f, t0_reg, s1z_reg, s2z_reg, t0_reg);
793
794 /* t0 = t0 + t1 */
795 spe_fa(gen->f, t0_reg, t0_reg, t1_reg);
796
797 FOR_EACH_ENABLED_CHANNEL(inst, ch) {
798 int d_reg = get_dst_reg(gen, ch, &inst->Dst[0]);
799 spe_move(gen->f, d_reg, t0_reg);
800 store_dest_reg(gen, d_reg, ch, &inst->Dst[0]);
801 }
802
803 free_itemps(gen);
804 return TRUE;
805}
806
807/**
808 * Emit 4 component dot product. See emit_ADD for comments.
809 */
810static boolean
811emit_DP4(struct codegen *gen, const struct tgsi_full_instruction *inst)
812{
813 int ch;
814 int s0x_reg, s0y_reg, s0z_reg, s0w_reg;
815 int s1x_reg, s1y_reg, s1z_reg, s1w_reg;
816 int t0_reg = get_itemp(gen), t1_reg = get_itemp(gen);
817
818 s0x_reg = get_src_reg(gen, CHAN_X, &inst->Src[0]);
819 s1x_reg = get_src_reg(gen, CHAN_X, &inst->Src[1]);
820 s0y_reg = get_src_reg(gen, CHAN_Y, &inst->Src[0]);
821 s1y_reg = get_src_reg(gen, CHAN_Y, &inst->Src[1]);
822 s0z_reg = get_src_reg(gen, CHAN_Z, &inst->Src[0]);
823 s1z_reg = get_src_reg(gen, CHAN_Z, &inst->Src[1]);
824 s0w_reg = get_src_reg(gen, CHAN_W, &inst->Src[0]);
825 s1w_reg = get_src_reg(gen, CHAN_W, &inst->Src[1]);
826
827 /* t0 = x0 * x1 */
828 spe_fm(gen->f, t0_reg, s0x_reg, s1x_reg);
829
830 /* t1 = y0 * y1 */
831 spe_fm(gen->f, t1_reg, s0y_reg, s1y_reg);
832
833 /* t0 = z0 * z1 + t0 */
834 spe_fma(gen->f, t0_reg, s0z_reg, s1z_reg, t0_reg);
835
836 /* t1 = w0 * w1 + t1 */
837 spe_fma(gen->f, t1_reg, s0w_reg, s1w_reg, t1_reg);
838
839 /* t0 = t0 + t1 */
840 spe_fa(gen->f, t0_reg, t0_reg, t1_reg);
841
842 FOR_EACH_ENABLED_CHANNEL(inst, ch) {
843 int d_reg = get_dst_reg(gen, ch, &inst->Dst[0]);
844 spe_move(gen->f, d_reg, t0_reg);
845 store_dest_reg(gen, d_reg, ch, &inst->Dst[0]);
846 }
847
848 free_itemps(gen);
849 return TRUE;
850}
851
852/**
853 * Emit homogeneous dot product. See emit_ADD for comments.
854 */
855static boolean
856emit_DPH(struct codegen *gen, const struct tgsi_full_instruction *inst)
857{
858 /* XXX rewrite this function to look more like DP3/DP4 */
859 int ch;
860 int s1_reg = get_src_reg(gen, CHAN_X, &inst->Src[0]);
861 int s2_reg = get_src_reg(gen, CHAN_X, &inst->Src[1]);
862 int tmp_reg = get_itemp(gen);
863
864 /* t = x0 * x1 */
865 spe_fm(gen->f, tmp_reg, s1_reg, s2_reg);
866
867 s1_reg = get_src_reg(gen, CHAN_Y, &inst->Src[0]);
868 s2_reg = get_src_reg(gen, CHAN_Y, &inst->Src[1]);
869 /* t = y0 * y1 + t */
870 spe_fma(gen->f, tmp_reg, s1_reg, s2_reg, tmp_reg);
871
872 s1_reg = get_src_reg(gen, CHAN_Z, &inst->Src[0]);
873 s2_reg = get_src_reg(gen, CHAN_Z, &inst->Src[1]);
874 /* t = z0 * z1 + t */
875 spe_fma(gen->f, tmp_reg, s1_reg, s2_reg, tmp_reg);
876
877 s2_reg = get_src_reg(gen, CHAN_W, &inst->Src[1]);
878 /* t = w1 + t */
879 spe_fa(gen->f, tmp_reg, s2_reg, tmp_reg);
880
881 FOR_EACH_ENABLED_CHANNEL(inst, ch) {
882 int d_reg = get_dst_reg(gen, ch, &inst->Dst[0]);
883 spe_move(gen->f, d_reg, tmp_reg);
884 store_dest_reg(gen, tmp_reg, ch, &inst->Dst[0]);
885 }
886
887 free_itemps(gen);
888 return TRUE;
889}
890
891/**
892 * Emit 3-component vector normalize.
893 */
894static boolean
895emit_NRM3(struct codegen *gen, const struct tgsi_full_instruction *inst)
896{
897 int ch;
898 int src_reg[3];
899 int t0_reg = get_itemp(gen), t1_reg = get_itemp(gen);
900
901 src_reg[0] = get_src_reg(gen, CHAN_X, &inst->Src[0]);
902 src_reg[1] = get_src_reg(gen, CHAN_Y, &inst->Src[0]);
903 src_reg[2] = get_src_reg(gen, CHAN_Z, &inst->Src[0]);
904
905 /* t0 = x * x */
906 spe_fm(gen->f, t0_reg, src_reg[0], src_reg[0]);
907
908 /* t1 = y * y */
909 spe_fm(gen->f, t1_reg, src_reg[1], src_reg[1]);
910
911 /* t0 = z * z + t0 */
912 spe_fma(gen->f, t0_reg, src_reg[2], src_reg[2], t0_reg);
913
914 /* t0 = t0 + t1 */
915 spe_fa(gen->f, t0_reg, t0_reg, t1_reg);
916
917 /* t1 = 1.0 / sqrt(t0) */
918 spe_frsqest(gen->f, t1_reg, t0_reg);
919 spe_fi(gen->f, t1_reg, t0_reg, t1_reg);
920
921 FOR_EACH_ENABLED_CHANNEL(inst, ch) {
922 int d_reg = get_dst_reg(gen, ch, &inst->Dst[0]);
923 /* dst = src[ch] * t1 */
924 spe_fm(gen->f, d_reg, src_reg[ch], t1_reg);
925 store_dest_reg(gen, d_reg, ch, &inst->Dst[0]);
926 }
927
928 free_itemps(gen);
929 return TRUE;
930}
931
932
933/**
934 * Emit cross product. See emit_ADD for comments.
935 */
936static boolean
937emit_XPD(struct codegen *gen, const struct tgsi_full_instruction *inst)
938{
939 int s1_reg = get_src_reg(gen, CHAN_Z, &inst->Src[0]);
940 int s2_reg = get_src_reg(gen, CHAN_Y, &inst->Src[1]);
941 int tmp_reg = get_itemp(gen);
942
943 /* t = z0 * y1 */
944 spe_fm(gen->f, tmp_reg, s1_reg, s2_reg);
945
946 s1_reg = get_src_reg(gen, CHAN_Y, &inst->Src[0]);
947 s2_reg = get_src_reg(gen, CHAN_Z, &inst->Src[1]);
948 /* t = y0 * z1 - t */
949 spe_fms(gen->f, tmp_reg, s1_reg, s2_reg, tmp_reg);
950
951 if (inst->Dst[0].Register.WriteMask & (1 << CHAN_X)) {
952 store_dest_reg(gen, tmp_reg, CHAN_X, &inst->Dst[0]);
953 }
954
955 s1_reg = get_src_reg(gen, CHAN_X, &inst->Src[0]);
956 s2_reg = get_src_reg(gen, CHAN_Z, &inst->Src[1]);
957 /* t = x0 * z1 */
958 spe_fm(gen->f, tmp_reg, s1_reg, s2_reg);
959
960 s1_reg = get_src_reg(gen, CHAN_Z, &inst->Src[0]);
961 s2_reg = get_src_reg(gen, CHAN_X, &inst->Src[1]);
962 /* t = z0 * x1 - t */
963 spe_fms(gen->f, tmp_reg, s1_reg, s2_reg, tmp_reg);
964
965 if (inst->Dst[0].Register.WriteMask & (1 << CHAN_Y)) {
966 store_dest_reg(gen, tmp_reg, CHAN_Y, &inst->Dst[0]);
967 }
968
969 s1_reg = get_src_reg(gen, CHAN_Y, &inst->Src[0]);
970 s2_reg = get_src_reg(gen, CHAN_X, &inst->Src[1]);
971 /* t = y0 * x1 */
972 spe_fm(gen->f, tmp_reg, s1_reg, s2_reg);
973
974 s1_reg = get_src_reg(gen, CHAN_X, &inst->Src[0]);
975 s2_reg = get_src_reg(gen, CHAN_Y, &inst->Src[1]);
976 /* t = x0 * y1 - t */
977 spe_fms(gen->f, tmp_reg, s1_reg, s2_reg, tmp_reg);
978
979 if (inst->Dst[0].Register.WriteMask & (1 << CHAN_Z)) {
980 store_dest_reg(gen, tmp_reg, CHAN_Z, &inst->Dst[0]);
981 }
982
983 free_itemps(gen);
984 return TRUE;
985}
986
987
988/**
989 * Emit inequality instruction.
990 * Note that the SPE fcgt instruction produces 0x0 and 0xffffffff as
991 * the result but OpenGL/TGSI needs 0.0 and 1.0 results.
992 * We can easily convert 0x0/0xffffffff to 0.0/1.0 with a bitwise AND.
993 */
994static boolean
995emit_inequality(struct codegen *gen, const struct tgsi_full_instruction *inst)
996{
997 int ch, s1_reg[4], s2_reg[4], d_reg[4], one_reg;
998 boolean complement = FALSE;
999
1000 one_reg = get_const_one_reg(gen);
1001
1002 FOR_EACH_ENABLED_CHANNEL(inst, ch) {
1003 s1_reg[ch] = get_src_reg(gen, ch, &inst->Src[0]);
1004 s2_reg[ch] = get_src_reg(gen, ch, &inst->Src[1]);
1005 d_reg[ch] = get_dst_reg(gen, ch, &inst->Dst[0]);
1006 }
1007
1008 FOR_EACH_ENABLED_CHANNEL(inst, ch) {
1009 switch (inst->Instruction.Opcode) {
1010 case TGSI_OPCODE_SGT:
1011 spe_fcgt(gen->f, d_reg[ch], s1_reg[ch], s2_reg[ch]);
1012 break;
1013 case TGSI_OPCODE_SLT:
1014 spe_fcgt(gen->f, d_reg[ch], s2_reg[ch], s1_reg[ch]);
1015 break;
1016 case TGSI_OPCODE_SGE:
1017 spe_fcgt(gen->f, d_reg[ch], s2_reg[ch], s1_reg[ch]);
1018 complement = TRUE;
1019 break;
1020 case TGSI_OPCODE_SLE:
1021 spe_fcgt(gen->f, d_reg[ch], s1_reg[ch], s2_reg[ch]);
1022 complement = TRUE;
1023 break;
1024 case TGSI_OPCODE_SEQ:
1025 spe_fceq(gen->f, d_reg[ch], s1_reg[ch], s2_reg[ch]);
1026 break;
1027 case TGSI_OPCODE_SNE:
1028 spe_fceq(gen->f, d_reg[ch], s1_reg[ch], s2_reg[ch]);
1029 complement = TRUE;
1030 break;
1031 default:
1032 assert(0);
1033 }
1034 }
1035
1036 /* convert d from 0x0/0xffffffff to 0.0/1.0 */
1037 FOR_EACH_ENABLED_CHANNEL(inst, ch) {
1038 /* d = d & one_reg */
1039 if (complement)
1040 spe_andc(gen->f, d_reg[ch], one_reg, d_reg[ch]);
1041 else
1042 spe_and(gen->f, d_reg[ch], one_reg, d_reg[ch]);
1043 }
1044
1045 FOR_EACH_ENABLED_CHANNEL(inst, ch) {
1046 store_dest_reg(gen, d_reg[ch], ch, &inst->Dst[0]);
1047 }
1048
1049 free_itemps(gen);
1050 return TRUE;
1051}
1052
1053
1054/**
1055 * Emit compare.
1056 */
1057static boolean
1058emit_CMP(struct codegen *gen, const struct tgsi_full_instruction *inst)
1059{
1060 int ch;
1061
1062 FOR_EACH_ENABLED_CHANNEL(inst, ch) {
1063 int s1_reg = get_src_reg(gen, ch, &inst->Src[0]);
1064 int s2_reg = get_src_reg(gen, ch, &inst->Src[1]);
1065 int s3_reg = get_src_reg(gen, ch, &inst->Src[2]);
1066 int d_reg = get_dst_reg(gen, ch, &inst->Dst[0]);
1067 int zero_reg = get_itemp(gen);
1068
1069 spe_zero(gen->f, zero_reg);
1070
1071 /* d = (s1 < 0) ? s2 : s3 */
1072 spe_fcgt(gen->f, d_reg, zero_reg, s1_reg);
1073 spe_selb(gen->f, d_reg, s3_reg, s2_reg, d_reg);
1074
1075 store_dest_reg(gen, d_reg, ch, &inst->Dst[0]);
1076 free_itemps(gen);
1077 }
1078
1079 return TRUE;
1080}
1081
1082/**
1083 * Emit trunc.
1084 * Convert float to signed int
1085 * Convert signed int to float
1086 */
1087static boolean
1088emit_TRUNC(struct codegen *gen, const struct tgsi_full_instruction *inst)
1089{
1090 int ch, s1_reg[4], d_reg[4];
1091
1092 FOR_EACH_ENABLED_CHANNEL(inst, ch) {
1093 s1_reg[ch] = get_src_reg(gen, ch, &inst->Src[0]);
1094 d_reg[ch] = get_dst_reg(gen, ch, &inst->Dst[0]);
1095 }
1096
1097 /* Convert float to int */
1098 FOR_EACH_ENABLED_CHANNEL(inst, ch) {
1099 spe_cflts(gen->f, d_reg[ch], s1_reg[ch], 0);
1100 }
1101
1102 /* Convert int to float */
1103 FOR_EACH_ENABLED_CHANNEL(inst, ch) {
1104 spe_csflt(gen->f, d_reg[ch], d_reg[ch], 0);
1105 }
1106
1107 FOR_EACH_ENABLED_CHANNEL(inst, ch) {
1108 store_dest_reg(gen, d_reg[ch], ch, &inst->Dst[0]);
1109 }
1110
1111 free_itemps(gen);
1112 return TRUE;
1113}
1114
1115
1116/**
1117 * Emit floor.
1118 * If negative int subtract one
1119 * Convert float to signed int
1120 * Convert signed int to float
1121 */
1122static boolean
1123emit_FLR(struct codegen *gen, const struct tgsi_full_instruction *inst)
1124{
1125 int ch, s1_reg[4], d_reg[4], tmp_reg[4], zero_reg, one_reg;
1126
1127 zero_reg = get_itemp(gen);
1128 spe_zero(gen->f, zero_reg);
1129 one_reg = get_const_one_reg(gen);
1130
1131 FOR_EACH_ENABLED_CHANNEL(inst, ch) {
1132 s1_reg[ch] = get_src_reg(gen, ch, &inst->Src[0]);
1133 d_reg[ch] = get_dst_reg(gen, ch, &inst->Dst[0]);
1134 tmp_reg[ch] = get_itemp(gen);
1135 }
1136
1137 /* If negative, subtract 1.0 */
1138 FOR_EACH_ENABLED_CHANNEL(inst, ch) {
1139 spe_fcgt(gen->f, tmp_reg[ch], zero_reg, s1_reg[ch]);
1140 }
1141 FOR_EACH_ENABLED_CHANNEL(inst, ch) {
1142 spe_selb(gen->f, tmp_reg[ch], zero_reg, one_reg, tmp_reg[ch]);
1143 }
1144 FOR_EACH_ENABLED_CHANNEL(inst, ch) {
1145 spe_fs(gen->f, tmp_reg[ch], s1_reg[ch], tmp_reg[ch]);
1146 }
1147
1148 /* Convert float to int */
1149 FOR_EACH_ENABLED_CHANNEL(inst, ch) {
1150 spe_cflts(gen->f, tmp_reg[ch], tmp_reg[ch], 0);
1151 }
1152
1153 /* Convert int to float */
1154 FOR_EACH_ENABLED_CHANNEL(inst, ch) {
1155 spe_csflt(gen->f, d_reg[ch], tmp_reg[ch], 0);
1156 }
1157
1158 FOR_EACH_ENABLED_CHANNEL(inst, ch) {
1159 store_dest_reg(gen, d_reg[ch], ch, &inst->Dst[0]);
1160 }
1161
1162 free_itemps(gen);
1163 return TRUE;
1164}
1165
1166
1167/**
1168 * Compute frac = Input - FLR(Input)
1169 */
1170static boolean
1171emit_FRC(struct codegen *gen, const struct tgsi_full_instruction *inst)
1172{
1173 int ch, s1_reg[4], d_reg[4], tmp_reg[4], zero_reg, one_reg;
1174
1175 zero_reg = get_itemp(gen);
1176 spe_zero(gen->f, zero_reg);
1177 one_reg = get_const_one_reg(gen);
1178
1179 FOR_EACH_ENABLED_CHANNEL(inst, ch) {
1180 s1_reg[ch] = get_src_reg(gen, ch, &inst->Src[0]);
1181 d_reg[ch] = get_dst_reg(gen, ch, &inst->Dst[0]);
1182 tmp_reg[ch] = get_itemp(gen);
1183 }
1184
1185 /* If negative, subtract 1.0 */
1186 FOR_EACH_ENABLED_CHANNEL(inst, ch) {
1187 spe_fcgt(gen->f, tmp_reg[ch], zero_reg, s1_reg[ch]);
1188 }
1189 FOR_EACH_ENABLED_CHANNEL(inst, ch) {
1190 spe_selb(gen->f, tmp_reg[ch], zero_reg, one_reg, tmp_reg[ch]);
1191 }
1192 FOR_EACH_ENABLED_CHANNEL(inst, ch) {
1193 spe_fs(gen->f, tmp_reg[ch], s1_reg[ch], tmp_reg[ch]);
1194 }
1195
1196 /* Convert float to int */
1197 FOR_EACH_ENABLED_CHANNEL(inst, ch) {
1198 spe_cflts(gen->f, tmp_reg[ch], tmp_reg[ch], 0);
1199 }
1200
1201 /* Convert int to float */
1202 FOR_EACH_ENABLED_CHANNEL(inst, ch) {
1203 spe_csflt(gen->f, tmp_reg[ch], tmp_reg[ch], 0);
1204 }
1205
1206 /* d = s1 - FLR(s1) */
1207 FOR_EACH_ENABLED_CHANNEL(inst, ch) {
1208 spe_fs(gen->f, d_reg[ch], s1_reg[ch], tmp_reg[ch]);
1209 }
1210
1211 /* store result */
1212 FOR_EACH_ENABLED_CHANNEL(inst, ch) {
1213 store_dest_reg(gen, d_reg[ch], ch, &inst->Dst[0]);
1214 }
1215
1216 free_itemps(gen);
1217 return TRUE;
1218}
1219
1220
1221#if 0
1222static void
1223print_functions(struct cell_context *cell)
1224{
1225 struct cell_spu_function_info *funcs = &cell->spu_functions;
1226 uint i;
1227 for (i = 0; i < funcs->num; i++) {
1228 printf("SPU func %u: %s at %u\n",
1229 i, funcs->names[i], funcs->addrs[i]);
1230 }
1231}
1232#endif
1233
1234
1235static uint
1236lookup_function(struct cell_context *cell, const char *funcname)
1237{
1238 const struct cell_spu_function_info *funcs = &cell->spu_functions;
1239 uint i, addr = 0;
1240 for (i = 0; i < funcs->num; i++) {
1241 if (strcmp(funcs->names[i], funcname) == 0) {
1242 addr = funcs->addrs[i];
1243 }
1244 }
1245 assert(addr && "spu function not found");
1246 return addr / 4; /* discard 2 least significant bits */
1247}
1248
1249
1250/**
1251 * Emit code to call a SPU function.
1252 * Used to implement instructions like SIN/COS/POW/TEX/etc.
1253 * If scalar, only the X components of the src regs are used, and the
1254 * result is replicated across the dest register's XYZW components.
1255 */
1256static boolean
1257emit_function_call(struct codegen *gen,
1258 const struct tgsi_full_instruction *inst,
1259 char *funcname, uint num_args, boolean scalar)
1260{
1261 const uint addr = lookup_function(gen->cell, funcname);
1262 char comment[100];
1263 int s_regs[3];
1264 int func_called = FALSE;
1265 uint a, ch;
1266 int retval_reg = -1;
1267
1268 assert(num_args <= 3);
1269
1270 snprintf(comment, sizeof(comment), "CALL %s:", funcname);
1271 spe_comment(gen->f, -4, comment);
1272
1273 if (scalar) {
1274 for (a = 0; a < num_args; a++) {
1275 s_regs[a] = get_src_reg(gen, CHAN_X, &inst->Src[a]);
1276 }
1277 /* we'll call the function, put the return value in this register,
1278 * then replicate it across all write-enabled components in d_reg.
1279 */
1280 retval_reg = spe_allocate_available_register(gen->f);
1281 }
1282
1283 FOR_EACH_ENABLED_CHANNEL(inst, ch) {
1284 int d_reg;
1285 ubyte usedRegs[SPE_NUM_REGS];
1286 uint i, numUsed;
1287
1288 if (!scalar) {
1289 for (a = 0; a < num_args; a++) {
1290 s_regs[a] = get_src_reg(gen, ch, &inst->Src[a]);
1291 }
1292 }
1293
1294 d_reg = get_dst_reg(gen, ch, &inst->Dst[0]);
1295
1296 if (!scalar || !func_called) {
1297 /* for a scalar function, we'll really only call the function once */
1298
1299 numUsed = spe_get_registers_used(gen->f, usedRegs);
1300 assert(numUsed < gen->frame_size / 16 - 2);
1301
1302 /* save registers to stack */
1303 for (i = 0; i < numUsed; i++) {
1304 uint reg = usedRegs[i];
1305 int offset = 2 + i;
1306 spe_stqd(gen->f, reg, SPE_REG_SP, 16 * offset);
1307 }
1308
1309 /* setup function arguments */
1310 for (a = 0; a < num_args; a++) {
1311 spe_move(gen->f, 3 + a, s_regs[a]);
1312 }
1313
1314 /* branch to function, save return addr */
1315 spe_brasl(gen->f, SPE_REG_RA, addr);
1316
1317 /* save function's return value */
1318 if (scalar)
1319 spe_move(gen->f, retval_reg, 3);
1320 else
1321 spe_move(gen->f, d_reg, 3);
1322
1323 /* restore registers from stack */
1324 for (i = 0; i < numUsed; i++) {
1325 uint reg = usedRegs[i];
1326 if (reg != d_reg && reg != retval_reg) {
1327 int offset = 2 + i;
1328 spe_lqd(gen->f, reg, SPE_REG_SP, 16 * offset);
1329 }
1330 }
1331
1332 func_called = TRUE;
1333 }
1334
1335 if (scalar) {
1336 spe_move(gen->f, d_reg, retval_reg);
1337 }
1338
1339 store_dest_reg(gen, d_reg, ch, &inst->Dst[0]);
1340 free_itemps(gen);
1341 }
1342
1343 if (scalar) {
1344 spe_release_register(gen->f, retval_reg);
1345 }
1346
1347 return TRUE;
1348}
1349
1350
1351static boolean
1352emit_TEX(struct codegen *gen, const struct tgsi_full_instruction *inst)
1353{
1354 const uint target = inst->Texture.Texture;
1355 const uint unit = inst->Src[1].Register.Index;
1356 uint addr;
1357 int ch;
1358 int coord_regs[4], d_regs[4];
1359
1360 switch (target) {
1361 case TGSI_TEXTURE_1D:
1362 case TGSI_TEXTURE_2D:
1363 addr = lookup_function(gen->cell, "spu_tex_2d");
1364 break;
1365 case TGSI_TEXTURE_3D:
1366 addr = lookup_function(gen->cell, "spu_tex_3d");
1367 break;
1368 case TGSI_TEXTURE_CUBE:
1369 addr = lookup_function(gen->cell, "spu_tex_cube");
1370 break;
1371 default:
1372 ASSERT(0 && "unsupported texture target");
1373 return FALSE;
1374 }
1375
1376 assert(inst->Src[1].Register.File == TGSI_FILE_SAMPLER);
1377
1378 spe_comment(gen->f, -4, "CALL tex:");
1379
1380 /* get src/dst reg info */
1381 for (ch = 0; ch < 4; ch++) {
1382 coord_regs[ch] = get_src_reg(gen, ch, &inst->Src[0]);
1383 d_regs[ch] = get_dst_reg(gen, ch, &inst->Dst[0]);
1384 }
1385
1386 {
1387 ubyte usedRegs[SPE_NUM_REGS];
1388 uint i, numUsed;
1389
1390 numUsed = spe_get_registers_used(gen->f, usedRegs);
1391 assert(numUsed < gen->frame_size / 16 - 2);
1392
1393 /* save registers to stack */
1394 for (i = 0; i < numUsed; i++) {
1395 uint reg = usedRegs[i];
1396 int offset = 2 + i;
1397 spe_stqd(gen->f, reg, SPE_REG_SP, 16 * offset);
1398 }
1399
1400 /* setup function arguments (XXX depends on target) */
1401 for (i = 0; i < 4; i++) {
1402 spe_move(gen->f, 3 + i, coord_regs[i]);
1403 }
1404 spe_load_uint(gen->f, 7, unit); /* sampler unit */
1405
1406 /* branch to function, save return addr */
1407 spe_brasl(gen->f, SPE_REG_RA, addr);
1408
1409 /* save function's return values (four pixel's colors) */
1410 for (i = 0; i < 4; i++) {
1411 spe_move(gen->f, d_regs[i], 3 + i);
1412 }
1413
1414 /* restore registers from stack */
1415 for (i = 0; i < numUsed; i++) {
1416 uint reg = usedRegs[i];
1417 if (reg != d_regs[0] &&
1418 reg != d_regs[1] &&
1419 reg != d_regs[2] &&
1420 reg != d_regs[3]) {
1421 int offset = 2 + i;
1422 spe_lqd(gen->f, reg, SPE_REG_SP, 16 * offset);
1423 }
1424 }
1425 }
1426
1427 FOR_EACH_ENABLED_CHANNEL(inst, ch) {
1428 store_dest_reg(gen, d_regs[ch], ch, &inst->Dst[0]);
1429 free_itemps(gen);
1430 }
1431
1432 return TRUE;
1433}
1434
1435
1436/**
1437 * KILL if any of src reg values are less than zero.
1438 */
1439static boolean
1440emit_KIL(struct codegen *gen, const struct tgsi_full_instruction *inst)
1441{
1442 int ch;
1443 int s_regs[4], kil_reg = -1, cmp_reg, zero_reg;
1444
1445 spe_comment(gen->f, -4, "CALL kil:");
1446
1447 /* zero = {0,0,0,0} */
1448 zero_reg = get_itemp(gen);
1449 spe_zero(gen->f, zero_reg);
1450
1451 cmp_reg = get_itemp(gen);
1452
1453 /* get src regs */
1454 FOR_EACH_ENABLED_CHANNEL(inst, ch) {
1455 s_regs[ch] = get_src_reg(gen, ch, &inst->Src[0]);
1456 }
1457
1458 /* test if any src regs are < 0 */
1459 FOR_EACH_ENABLED_CHANNEL(inst, ch) {
1460 if (kil_reg >= 0) {
1461 /* cmp = 0 > src ? : ~0 : 0 */
1462 spe_fcgt(gen->f, cmp_reg, zero_reg, s_regs[ch]);
1463 /* kil = kil | cmp */
1464 spe_or(gen->f, kil_reg, kil_reg, cmp_reg);
1465 }
1466 else {
1467 kil_reg = get_itemp(gen);
1468 /* kil = 0 > src ? : ~0 : 0 */
1469 spe_fcgt(gen->f, kil_reg, zero_reg, s_regs[ch]);
1470 }
1471 }
1472
1473 if (gen->if_nesting || gen->loop_nesting) {
1474 /* may have been a conditional kil */
1475 spe_and(gen->f, kil_reg, kil_reg, gen->exec_mask_reg);
1476 }
1477
1478 /* allocate the kill mask reg if needed */
1479 if (gen->kill_mask_reg <= 0) {
1480 gen->kill_mask_reg = spe_allocate_available_register(gen->f);
1481 spe_move(gen->f, gen->kill_mask_reg, kil_reg);
1482 }
1483 else {
1484 spe_or(gen->f, gen->kill_mask_reg, gen->kill_mask_reg, kil_reg);
1485 }
1486
1487 free_itemps(gen);
1488
1489 return TRUE;
1490}
1491
1492
1493
1494/**
1495 * Emit min or max.
1496 */
1497static boolean
1498emit_MIN_MAX(struct codegen *gen, const struct tgsi_full_instruction *inst)
1499{
1500 int ch, s0_reg[4], s1_reg[4], d_reg[4], tmp_reg[4];
1501
1502 FOR_EACH_ENABLED_CHANNEL(inst, ch) {
1503 s0_reg[ch] = get_src_reg(gen, ch, &inst->Src[0]);
1504 s1_reg[ch] = get_src_reg(gen, ch, &inst->Src[1]);
1505 d_reg[ch] = get_dst_reg(gen, ch, &inst->Dst[0]);
1506 tmp_reg[ch] = get_itemp(gen);
1507 }
1508
1509 /* d = (s0 > s1) ? s0 : s1 */
1510 FOR_EACH_ENABLED_CHANNEL(inst, ch) {
1511 if (inst->Instruction.Opcode == TGSI_OPCODE_MAX)
1512 spe_fcgt(gen->f, tmp_reg[ch], s0_reg[ch], s1_reg[ch]);
1513 else
1514 spe_fcgt(gen->f, tmp_reg[ch], s1_reg[ch], s0_reg[ch]);
1515 }
1516 FOR_EACH_ENABLED_CHANNEL(inst, ch) {
1517 spe_selb(gen->f, d_reg[ch], s1_reg[ch], s0_reg[ch], tmp_reg[ch]);
1518 }
1519
1520 FOR_EACH_ENABLED_CHANNEL(inst, ch) {
1521 store_dest_reg(gen, d_reg[ch], ch, &inst->Dst[0]);
1522 }
1523
1524 free_itemps(gen);
1525 return TRUE;
1526}
1527
1528
1529/**
1530 * Emit code to update the execution mask.
1531 * This needs to be done whenever the execution status of a conditional
1532 * or loop is changed.
1533 */
1534static void
1535emit_update_exec_mask(struct codegen *gen)
1536{
1537 const int exec_reg = get_exec_mask_reg(gen);
1538 const int cond_reg = gen->cond_mask_reg;
1539 const int loop_reg = gen->loop_mask_reg;
1540
1541 spe_comment(gen->f, 0, "Update master execution mask");
1542
1543 if (gen->if_nesting > 0 && gen->loop_nesting > 0) {
1544 /* exec_mask = cond_mask & loop_mask */
1545 assert(cond_reg > 0);
1546 assert(loop_reg > 0);
1547 spe_and(gen->f, exec_reg, cond_reg, loop_reg);
1548 }
1549 else if (gen->if_nesting > 0) {
1550 assert(cond_reg > 0);
1551 spe_move(gen->f, exec_reg, cond_reg);
1552 }
1553 else if (gen->loop_nesting > 0) {
1554 assert(loop_reg > 0);
1555 spe_move(gen->f, exec_reg, loop_reg);
1556 }
1557 else {
1558 spe_load_int(gen->f, exec_reg, ~0x0);
1559 }
1560}
1561
1562
1563static boolean
1564emit_IF(struct codegen *gen, const struct tgsi_full_instruction *inst)
1565{
1566 const int channel = 0;
1567 int cond_reg;
1568
1569 cond_reg = get_cond_mask_reg(gen);
1570
1571 /* XXX push cond exec mask */
1572
1573 spe_comment(gen->f, 0, "init conditional exec mask = ~0:");
1574 spe_load_int(gen->f, cond_reg, ~0);
1575
1576 /* update conditional execution mask with the predicate register */
1577 int tmp_reg = get_itemp(gen);
1578 int s1_reg = get_src_reg(gen, channel, &inst->Src[0]);
1579
1580 /* tmp = (s1_reg == 0) */
1581 spe_ceqi(gen->f, tmp_reg, s1_reg, 0);
1582 /* tmp = !tmp */
1583 spe_complement(gen->f, tmp_reg, tmp_reg);
1584 /* cond_mask = cond_mask & tmp */
1585 spe_and(gen->f, cond_reg, cond_reg, tmp_reg);
1586
1587 gen->if_nesting++;
1588
1589 /* update the master execution mask */
1590 emit_update_exec_mask(gen);
1591
1592 free_itemps(gen);
1593
1594 return TRUE;
1595}
1596
1597
1598static boolean
1599emit_ELSE(struct codegen *gen, const struct tgsi_full_instruction *inst)
1600{
1601 const int cond_reg = get_cond_mask_reg(gen);
1602
1603 spe_comment(gen->f, 0, "cond exec mask = !cond exec mask");
1604 spe_complement(gen->f, cond_reg, cond_reg);
1605 emit_update_exec_mask(gen);
1606
1607 return TRUE;
1608}
1609
1610
1611static boolean
1612emit_ENDIF(struct codegen *gen, const struct tgsi_full_instruction *inst)
1613{
1614 /* XXX todo: pop cond exec mask */
1615
1616 gen->if_nesting--;
1617
1618 emit_update_exec_mask(gen);
1619
1620 return TRUE;
1621}
1622
1623
1624static boolean
1625emit_BGNLOOP(struct codegen *gen, const struct tgsi_full_instruction *inst)
1626{
1627 int exec_reg, loop_reg;
1628
1629 exec_reg = get_exec_mask_reg(gen);
1630 loop_reg = get_loop_mask_reg(gen);
1631
1632 /* XXX push loop_exec mask */
1633
1634 spe_comment(gen->f, 0*-4, "initialize loop exec mask = ~0");
1635 spe_load_int(gen->f, loop_reg, ~0x0);
1636
1637 gen->loop_nesting++;
1638 gen->loop_start = spe_code_size(gen->f); /* in bytes */
1639
1640 return TRUE;
1641}
1642
1643
1644static boolean
1645emit_ENDLOOP(struct codegen *gen, const struct tgsi_full_instruction *inst)
1646{
1647 const int loop_reg = get_loop_mask_reg(gen);
1648 const int tmp_reg = get_itemp(gen);
1649 int offset;
1650
1651 /* tmp_reg = exec[0] | exec[1] | exec[2] | exec[3] */
1652 spe_orx(gen->f, tmp_reg, loop_reg);
1653
1654 offset = gen->loop_start - spe_code_size(gen->f); /* in bytes */
1655
1656 /* branch back to top of loop if tmp_reg != 0 */
1657 spe_brnz(gen->f, tmp_reg, offset / 4);
1658
1659 /* XXX pop loop_exec mask */
1660
1661 gen->loop_nesting--;
1662
1663 emit_update_exec_mask(gen);
1664
1665 return TRUE;
1666}
1667
1668
1669static boolean
1670emit_BRK(struct codegen *gen, const struct tgsi_full_instruction *inst)
1671{
1672 const int exec_reg = get_exec_mask_reg(gen);
1673 const int loop_reg = get_loop_mask_reg(gen);
1674
1675 assert(gen->loop_nesting > 0);
1676
1677 spe_comment(gen->f, 0, "loop exec mask &= ~master exec mask");
1678 spe_andc(gen->f, loop_reg, loop_reg, exec_reg);
1679
1680 emit_update_exec_mask(gen);
1681
1682 return TRUE;
1683}
1684
1685
1686static boolean
1687emit_CONT(struct codegen *gen, const struct tgsi_full_instruction *inst)
1688{
1689 assert(gen->loop_nesting > 0);
1690
1691 return TRUE;
1692}
1693
1694
1695static boolean
1696emit_DDX_DDY(struct codegen *gen, const struct tgsi_full_instruction *inst,
1697 boolean ddx)
1698{
1699 int ch;
1700
1701 FOR_EACH_ENABLED_CHANNEL(inst, ch) {
1702 int s_reg = get_src_reg(gen, ch, &inst->Src[0]);
1703 int d_reg = get_dst_reg(gen, ch, &inst->Dst[0]);
1704
1705 int t1_reg = get_itemp(gen);
1706 int t2_reg = get_itemp(gen);
1707
1708 spe_splat_word(gen->f, t1_reg, s_reg, 0); /* upper-left pixel */
1709 if (ddx) {
1710 spe_splat_word(gen->f, t2_reg, s_reg, 1); /* upper-right pixel */
1711 }
1712 else {
1713 spe_splat_word(gen->f, t2_reg, s_reg, 2); /* lower-left pixel */
1714 }
1715 spe_fs(gen->f, d_reg, t2_reg, t1_reg);
1716
1717 free_itemps(gen);
1718 }
1719
1720 return TRUE;
1721}
1722
1723
1724
1725
1726/**
1727 * Emit END instruction.
1728 * We just return from the shader function at this point.
1729 *
1730 * Note that there may be more code after this that would be
1731 * called by TGSI_OPCODE_CALL.
1732 */
1733static boolean
1734emit_END(struct codegen *gen)
1735{
1736 emit_epilogue(gen);
1737 return TRUE;
1738}
1739
1740
1741/**
1742 * Emit code for the given instruction. Just a big switch stmt.
1743 */
1744static boolean
1745emit_instruction(struct codegen *gen,
1746 const struct tgsi_full_instruction *inst)
1747{
1748 switch (inst->Instruction.Opcode) {
1749 case TGSI_OPCODE_ARL:
1750 return emit_ARL(gen, inst);
1751 case TGSI_OPCODE_MOV:
1752 return emit_MOV(gen, inst);
1753 case TGSI_OPCODE_ADD:
1754 case TGSI_OPCODE_SUB:
1755 case TGSI_OPCODE_MUL:
1756 return emit_binop(gen, inst);
1757 case TGSI_OPCODE_MAD:
1758 return emit_MAD(gen, inst);
1759 case TGSI_OPCODE_LRP:
1760 return emit_LRP(gen, inst);
1761 case TGSI_OPCODE_DP3:
1762 return emit_DP3(gen, inst);
1763 case TGSI_OPCODE_DP4:
1764 return emit_DP4(gen, inst);
1765 case TGSI_OPCODE_DPH:
1766 return emit_DPH(gen, inst);
1767 case TGSI_OPCODE_NRM:
1768 return emit_NRM3(gen, inst);
1769 case TGSI_OPCODE_XPD:
1770 return emit_XPD(gen, inst);
1771 case TGSI_OPCODE_RCP:
1772 case TGSI_OPCODE_RSQ:
1773 return emit_RCP_RSQ(gen, inst);
1774 case TGSI_OPCODE_ABS:
1775 return emit_ABS(gen, inst);
1776 case TGSI_OPCODE_SGT:
1777 case TGSI_OPCODE_SLT:
1778 case TGSI_OPCODE_SGE:
1779 case TGSI_OPCODE_SLE:
1780 case TGSI_OPCODE_SEQ:
1781 case TGSI_OPCODE_SNE:
1782 return emit_inequality(gen, inst);
1783 case TGSI_OPCODE_CMP:
1784 return emit_CMP(gen, inst);
1785 case TGSI_OPCODE_MIN:
1786 case TGSI_OPCODE_MAX:
1787 return emit_MIN_MAX(gen, inst);
1788 case TGSI_OPCODE_TRUNC:
1789 return emit_TRUNC(gen, inst);
1790 case TGSI_OPCODE_FLR:
1791 return emit_FLR(gen, inst);
1792 case TGSI_OPCODE_FRC:
1793 return emit_FRC(gen, inst);
1794 case TGSI_OPCODE_END:
1795 return emit_END(gen);
1796
1797 case TGSI_OPCODE_COS:
1798 return emit_function_call(gen, inst, "spu_cos", 1, TRUE);
1799 case TGSI_OPCODE_SIN:
1800 return emit_function_call(gen, inst, "spu_sin", 1, TRUE);
1801 case TGSI_OPCODE_POW:
1802 return emit_function_call(gen, inst, "spu_pow", 2, TRUE);
1803 case TGSI_OPCODE_EX2:
1804 return emit_function_call(gen, inst, "spu_exp2", 1, TRUE);
1805 case TGSI_OPCODE_LG2:
1806 return emit_function_call(gen, inst, "spu_log2", 1, TRUE);
1807 case TGSI_OPCODE_TEX:
1808 /* fall-through for now */
1809 case TGSI_OPCODE_TXD:
1810 /* fall-through for now */
1811 case TGSI_OPCODE_TXB:
1812 /* fall-through for now */
1813 case TGSI_OPCODE_TXL:
1814 /* fall-through for now */
1815 case TGSI_OPCODE_TXP:
1816 return emit_TEX(gen, inst);
1817 case TGSI_OPCODE_KIL:
1818 return emit_KIL(gen, inst);
1819
1820 case TGSI_OPCODE_IF:
1821 return emit_IF(gen, inst);
1822 case TGSI_OPCODE_ELSE:
1823 return emit_ELSE(gen, inst);
1824 case TGSI_OPCODE_ENDIF:
1825 return emit_ENDIF(gen, inst);
1826
1827 case TGSI_OPCODE_BGNLOOP:
1828 return emit_BGNLOOP(gen, inst);
1829 case TGSI_OPCODE_ENDLOOP:
1830 return emit_ENDLOOP(gen, inst);
1831 case TGSI_OPCODE_BRK:
1832 return emit_BRK(gen, inst);
1833 case TGSI_OPCODE_CONT:
1834 return emit_CONT(gen, inst);
1835
1836 case TGSI_OPCODE_DDX:
1837 return emit_DDX_DDY(gen, inst, TRUE);
1838 case TGSI_OPCODE_DDY:
1839 return emit_DDX_DDY(gen, inst, FALSE);
1840
1841 /* XXX lots more cases to do... */
1842
1843 default:
1844 fprintf(stderr, "Cell: unimplemented TGSI instruction %d!\n",
1845 inst->Instruction.Opcode);
1846 return FALSE;
1847 }
1848
1849 return TRUE;
1850}
1851
1852
1853
1854/**
1855 * Emit code for a TGSI immediate value (vector of four floats).
1856 * This involves register allocation and initialization.
1857 * XXX the initialization should be done by a "prepare" stage, not
1858 * per quad execution!
1859 */
1860static boolean
1861emit_immediate(struct codegen *gen, const struct tgsi_full_immediate *immed)
1862{
1863 int ch;
1864
1865 assert(gen->num_imm < MAX_TEMPS);
1866
1867 for (ch = 0; ch < 4; ch++) {
1868 float val = immed->u[ch].Float;
1869
1870 if (ch > 0 && val == immed->u[ch - 1].Float) {
1871 /* re-use previous register */
1872 gen->imm_regs[gen->num_imm][ch] = gen->imm_regs[gen->num_imm][ch - 1];
1873 }
1874 else {
1875 char str[100];
1876 int reg = spe_allocate_available_register(gen->f);
1877
1878 if (reg < 0)
1879 return FALSE;
1880
1881 sprintf(str, "init $%d = %f", reg, val);
1882 spe_comment(gen->f, 0, str);
1883
1884 /* update immediate map */
1885 gen->imm_regs[gen->num_imm][ch] = reg;
1886
1887 /* emit initializer instruction */
1888 spe_load_float(gen->f, reg, val);
1889 }
1890 }
1891
1892 gen->num_imm++;
1893
1894 return TRUE;
1895}
1896
1897
1898
1899/**
1900 * Emit "code" for a TGSI declaration.
1901 * We only care about TGSI TEMPORARY register declarations at this time.
1902 * For each TGSI TEMPORARY we allocate four SPE registers.
1903 */
1904static boolean
1905emit_declaration(struct cell_context *cell,
1906 struct codegen *gen, const struct tgsi_full_declaration *decl)
1907{
1908 int i, ch;
1909
1910 switch (decl->Declaration.File) {
1911 case TGSI_FILE_TEMPORARY:
1912 for (i = decl->Range.First;
1913 i <= decl->Range.Last;
1914 i++) {
1915 assert(i < MAX_TEMPS);
1916 for (ch = 0; ch < 4; ch++) {
1917 gen->temp_regs[i][ch] = spe_allocate_available_register(gen->f);
1918 if (gen->temp_regs[i][ch] < 0)
1919 return FALSE; /* out of regs */
1920 }
1921
1922 /* XXX if we run out of SPE registers, we need to spill
1923 * to SPU memory. someday...
1924 */
1925
1926 {
1927 char buf[100];
1928 sprintf(buf, "TGSI temp[%d] maps to SPU regs [$%d $%d $%d $%d]", i,
1929 gen->temp_regs[i][0], gen->temp_regs[i][1],
1930 gen->temp_regs[i][2], gen->temp_regs[i][3]);
1931 spe_comment(gen->f, 0, buf);
1932 }
1933 }
1934 break;
1935 default:
1936 ; /* ignore */
1937 }
1938
1939 return TRUE;
1940}
1941
1942
1943
1944/**
1945 * Translate TGSI shader code to SPE instructions. This is done when
1946 * the state tracker gives us a new shader (via pipe->create_fs_state()).
1947 *
1948 * \param cell the rendering context (in)
1949 * \param tokens the TGSI shader (in)
1950 * \param f the generated function (out)
1951 */
1952boolean
1953cell_gen_fragment_program(struct cell_context *cell,
1954 const struct tgsi_token *tokens,
1955 struct spe_function *f)
1956{
1957 struct tgsi_parse_context parse;
1958 struct codegen gen;
1959 uint ic = 0;
1960
1961 memset(&gen, 0, sizeof(gen));
1962 gen.cell = cell;
1963 gen.f = f;
1964
1965 /* For SPE function calls: reg $3 = first param, $4 = second param, etc. */
1966 gen.inputs_reg = 3; /* pointer to inputs array */
1967 gen.outputs_reg = 4; /* pointer to outputs array */
1968 gen.constants_reg = 5; /* pointer to constants array */
1969
1970 spe_init_func(f, SPU_MAX_FRAGMENT_PROGRAM_INSTS * SPE_INST_SIZE);
1971 spe_allocate_register(f, gen.inputs_reg);
1972 spe_allocate_register(f, gen.outputs_reg);
1973 spe_allocate_register(f, gen.constants_reg);
1974
1975 if (cell->debug_flags & CELL_DEBUG_ASM) {
1976 spe_print_code(f, TRUE);
1977 spe_indent(f, 2*8);
1978 printf("Begin %s\n", __FUNCTION__);
1979 tgsi_dump(tokens, 0);
1980 }
1981
1982 tgsi_parse_init(&parse, tokens);
1983
1984 emit_prologue(&gen);
1985
1986 while (!tgsi_parse_end_of_tokens(&parse) && !gen.error) {
1987 tgsi_parse_token(&parse);
1988
1989 switch (parse.FullToken.Token.Type) {
1990 case TGSI_TOKEN_TYPE_IMMEDIATE:
1991 if (f->print) {
1992 _debug_printf(" # ");
1993 tgsi_dump_immediate(&parse.FullToken.FullImmediate);
1994 }
1995 if (!emit_immediate(&gen, &parse.FullToken.FullImmediate))
1996 gen.error = TRUE;
1997 break;
1998
1999 case TGSI_TOKEN_TYPE_DECLARATION:
2000 if (f->print) {
2001 _debug_printf(" # ");
2002 tgsi_dump_declaration(&parse.FullToken.FullDeclaration);
2003 }
2004 if (!emit_declaration(cell, &gen, &parse.FullToken.FullDeclaration))
2005 gen.error = TRUE;
2006 break;
2007
2008 case TGSI_TOKEN_TYPE_INSTRUCTION:
2009 if (f->print) {
2010 _debug_printf(" # ");
2011 ic++;
2012 tgsi_dump_instruction(&parse.FullToken.FullInstruction, ic);
2013 }
2014 if (!emit_instruction(&gen, &parse.FullToken.FullInstruction))
2015 gen.error = TRUE;
2016 break;
2017
2018 default:
2019 assert(0);
2020 }
2021 }
2022
2023 if (gen.error) {
2024 /* terminate the SPE code */
2025 return emit_END(&gen);
2026 }
2027
2028 if (cell->debug_flags & CELL_DEBUG_ASM) {
2029 printf("cell_gen_fragment_program nr instructions: %d\n", f->num_inst);
2030 printf("End %s\n", __FUNCTION__);
2031 }
2032
2033 tgsi_parse_free( &parse );
2034
2035 return !gen.error;
2036}
diff --git a/src/gallium/drivers/cell/ppu/cell_gen_fp.h b/src/gallium/drivers/cell/ppu/cell_gen_fp.h
deleted file mode 100644
index 99faea70462..00000000000
--- a/src/gallium/drivers/cell/ppu/cell_gen_fp.h
+++ /dev/null
@@ -1,42 +0,0 @@
1/**************************************************************************
2 *
3 * Copyright 2008 Tungsten Graphics, Inc., Cedar Park, Texas.
4 * All Rights Reserved.
5 *
6 * Permission is hereby granted, free of charge, to any person obtaining a
7 * copy of this software and associated documentation files (the
8 * "Software"), to deal in the Software without restriction, including
9 * without limitation the rights to use, copy, modify, merge, publish,
10 * distribute, sub license, and/or sell copies of the Software, and to
11 * permit persons to whom the Software is furnished to do so, subject to
12 * the following conditions:
13 *
14 * The above copyright notice and this permission notice (including the
15 * next paragraph) shall be included in all copies or substantial portions
16 * of the Software.
17 *
18 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
19 * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
20 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
21 * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
22 * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
23 * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
24 * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
25 *
26 **************************************************************************/
27
28
29
30#ifndef CELL_GEN_FP_H
31#define CELL_GEN_FP_H
32
33
34
35extern boolean
36cell_gen_fragment_program(struct cell_context *cell,
37 const struct tgsi_token *tokens,
38 struct spe_function *f);
39
40
41#endif /* CELL_GEN_FP_H */
42
diff --git a/src/gallium/drivers/cell/ppu/cell_gen_fragment.c b/src/gallium/drivers/cell/ppu/cell_gen_fragment.c
deleted file mode 100644
index 76a85178007..00000000000
--- a/src/gallium/drivers/cell/ppu/cell_gen_fragment.c
+++ /dev/null
@@ -1,2189 +0,0 @@
1/**************************************************************************
2 *
3 * Copyright 2008 Tungsten Graphics, Inc., Cedar Park, Texas.
4 * All Rights Reserved.
5 * Copyright 2009 VMware, Inc. All Rights Reserved.
6 *
7 * Permission is hereby granted, free of charge, to any person obtaining a
8 * copy of this software and associated documentation files (the
9 * "Software"), to deal in the Software without restriction, including
10 * without limitation the rights to use, copy, modify, merge, publish,
11 * distribute, sub license, and/or sell copies of the Software, and to
12 * permit persons to whom the Software is furnished to do so, subject to
13 * the following conditions:
14 *
15 * The above copyright notice and this permission notice (including the
16 * next paragraph) shall be included in all copies or substantial portions
17 * of the Software.
18 *
19 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
20 * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
21 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
22 * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
23 * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
24 * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
25 * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
26 *
27 **************************************************************************/
28
29/**
30 * Generate SPU per-fragment code (actually per-quad code).
31 * \author Brian Paul
32 * \author Bob Ellison
33 */
34
35
36#include "pipe/p_defines.h"
37#include "pipe/p_state.h"
38#include "rtasm/rtasm_ppc_spe.h"
39#include "cell_context.h"
40#include "cell_gen_fragment.h"
41
42
43
44/** Do extra optimizations? */
45#define OPTIMIZATIONS 1
46
47
48/**
49 * Generate SPE code to perform Z/depth testing.
50 *
51 * \param dsa Gallium depth/stencil/alpha state to gen code for
52 * \param f SPE function to append instruction onto.
53 * \param mask_reg register containing quad/pixel "alive" mask (in/out)
54 * \param ifragZ_reg register containing integer fragment Z values (in)
55 * \param ifbZ_reg register containing integer frame buffer Z values (in/out)
56 * \param zmask_reg register containing result of Z test/comparison (out)
57 *
58 * Returns TRUE if the Z-buffer needs to be updated.
59 */
60static boolean
61gen_depth_test(struct spe_function *f,
62 const struct pipe_depth_stencil_alpha_state *dsa,
63 int mask_reg, int ifragZ_reg, int ifbZ_reg, int zmask_reg)
64{
65 /* NOTE: we use clgt below, not cgt, because we want to compare _unsigned_
66 * quantities. This only makes a difference for 32-bit Z values though.
67 */
68 ASSERT(dsa->depth.enabled);
69
70 switch (dsa->depth.func) {
71 case PIPE_FUNC_EQUAL:
72 /* zmask = (ifragZ == ref) */
73 spe_ceq(f, zmask_reg, ifragZ_reg, ifbZ_reg);
74 /* mask = (mask & zmask) */
75 spe_and(f, mask_reg, mask_reg, zmask_reg);
76 break;
77
78 case PIPE_FUNC_NOTEQUAL:
79 /* zmask = (ifragZ == ref) */
80 spe_ceq(f, zmask_reg, ifragZ_reg, ifbZ_reg);
81 /* mask = (mask & ~zmask) */
82 spe_andc(f, mask_reg, mask_reg, zmask_reg);
83 break;
84
85 case PIPE_FUNC_GREATER:
86 /* zmask = (ifragZ > ref) */
87 spe_clgt(f, zmask_reg, ifragZ_reg, ifbZ_reg);
88 /* mask = (mask & zmask) */
89 spe_and(f, mask_reg, mask_reg, zmask_reg);
90 break;
91
92 case PIPE_FUNC_LESS:
93 /* zmask = (ref > ifragZ) */
94 spe_clgt(f, zmask_reg, ifbZ_reg, ifragZ_reg);
95 /* mask = (mask & zmask) */
96 spe_and(f, mask_reg, mask_reg, zmask_reg);
97 break;
98
99 case PIPE_FUNC_LEQUAL:
100 /* zmask = (ifragZ > ref) */
101 spe_clgt(f, zmask_reg, ifragZ_reg, ifbZ_reg);
102 /* mask = (mask & ~zmask) */
103 spe_andc(f, mask_reg, mask_reg, zmask_reg);
104 break;
105
106 case PIPE_FUNC_GEQUAL:
107 /* zmask = (ref > ifragZ) */
108 spe_clgt(f, zmask_reg, ifbZ_reg, ifragZ_reg);
109 /* mask = (mask & ~zmask) */
110 spe_andc(f, mask_reg, mask_reg, zmask_reg);
111 break;
112
113 case PIPE_FUNC_NEVER:
114 spe_il(f, mask_reg, 0); /* mask = {0,0,0,0} */
115 spe_move(f, zmask_reg, mask_reg); /* zmask = mask */
116 break;
117
118 case PIPE_FUNC_ALWAYS:
119 /* mask unchanged */
120 spe_il(f, zmask_reg, ~0); /* zmask = {~0,~0,~0,~0} */
121 break;
122
123 default:
124 ASSERT(0);
125 break;
126 }
127
128 if (dsa->depth.writemask) {
129 /*
130 * If (ztest passed) {
131 * framebufferZ = fragmentZ;
132 * }
133 * OR,
134 * framebufferZ = (ztest_passed ? fragmentZ : framebufferZ;
135 */
136 spe_selb(f, ifbZ_reg, ifbZ_reg, ifragZ_reg, mask_reg);
137 return TRUE;
138 }
139
140 return FALSE;
141}
142
143
144/**
145 * Generate SPE code to perform alpha testing.
146 *
147 * \param dsa Gallium depth/stencil/alpha state to gen code for
148 * \param f SPE function to append instruction onto.
149 * \param mask_reg register containing quad/pixel "alive" mask (in/out)
150 * \param fragA_reg register containing four fragment alpha values (in)
151 */
152static void
153gen_alpha_test(const struct pipe_depth_stencil_alpha_state *dsa,
154 struct spe_function *f, int mask_reg, int fragA_reg)
155{
156 int ref_reg = spe_allocate_available_register(f);
157 int amask_reg = spe_allocate_available_register(f);
158
159 ASSERT(dsa->alpha.enabled);
160
161 if ((dsa->alpha.func != PIPE_FUNC_NEVER) &&
162 (dsa->alpha.func != PIPE_FUNC_ALWAYS)) {
163 /* load/splat the alpha reference float value */
164 spe_load_float(f, ref_reg, dsa->alpha.ref_value);
165 }
166
167 /* emit code to do the alpha comparison, updating 'mask' */
168 switch (dsa->alpha.func) {
169 case PIPE_FUNC_EQUAL:
170 /* amask = (fragA == ref) */
171 spe_fceq(f, amask_reg, fragA_reg, ref_reg);
172 /* mask = (mask & amask) */
173 spe_and(f, mask_reg, mask_reg, amask_reg);
174 break;
175
176 case PIPE_FUNC_NOTEQUAL:
177 /* amask = (fragA == ref) */
178 spe_fceq(f, amask_reg, fragA_reg, ref_reg);
179 /* mask = (mask & ~amask) */
180 spe_andc(f, mask_reg, mask_reg, amask_reg);
181 break;
182
183 case PIPE_FUNC_GREATER:
184 /* amask = (fragA > ref) */
185 spe_fcgt(f, amask_reg, fragA_reg, ref_reg);
186 /* mask = (mask & amask) */
187 spe_and(f, mask_reg, mask_reg, amask_reg);
188 break;
189
190 case PIPE_FUNC_LESS:
191 /* amask = (ref > fragA) */
192 spe_fcgt(f, amask_reg, ref_reg, fragA_reg);
193 /* mask = (mask & amask) */
194 spe_and(f, mask_reg, mask_reg, amask_reg);
195 break;
196
197 case PIPE_FUNC_LEQUAL:
198 /* amask = (fragA > ref) */
199 spe_fcgt(f, amask_reg, fragA_reg, ref_reg);
200 /* mask = (mask & ~amask) */
201 spe_andc(f, mask_reg, mask_reg, amask_reg);
202 break;
203
204 case PIPE_FUNC_GEQUAL:
205 /* amask = (ref > fragA) */
206 spe_fcgt(f, amask_reg, ref_reg, fragA_reg);
207 /* mask = (mask & ~amask) */
208 spe_andc(f, mask_reg, mask_reg, amask_reg);
209 break;
210
211 case PIPE_FUNC_NEVER:
212 spe_il(f, mask_reg, 0); /* mask = [0,0,0,0] */
213 break;
214
215 case PIPE_FUNC_ALWAYS:
216 /* no-op, mask unchanged */
217 break;
218
219 default:
220 ASSERT(0);
221 break;
222 }
223
224#if OPTIMIZATIONS
225 /* if mask == {0,0,0,0} we're all done, return */
226 {
227 /* re-use amask reg here */
228 int tmp_reg = amask_reg;
229 /* tmp[0] = (mask[0] | mask[1] | mask[2] | mask[3]) */
230 spe_orx(f, tmp_reg, mask_reg);
231 /* if tmp[0] == 0 then return from function call */
232 spe_biz(f, tmp_reg, SPE_REG_RA, 0, 0);
233 }
234#endif
235
236 spe_release_register(f, ref_reg);
237 spe_release_register(f, amask_reg);
238}
239
240
241/**
242 * This pair of functions is used inline to allocate and deallocate
243 * optional constant registers. Once a constant is discovered to be
244 * needed, we will likely need it again, so we don't want to deallocate
245 * it and have to allocate and load it again unnecessarily.
246 */
247static INLINE void
248setup_optional_register(struct spe_function *f,
249 int *r)
250{
251 if (*r < 0)
252 *r = spe_allocate_available_register(f);
253}
254
255static INLINE void
256release_optional_register(struct spe_function *f,
257 int r)
258{
259 if (r >= 0)
260 spe_release_register(f, r);
261}
262
263static INLINE void
264setup_const_register(struct spe_function *f,
265 int *r,
266 float value)
267{
268 if (*r >= 0)
269 return;
270 setup_optional_register(f, r);
271 spe_load_float(f, *r, value);
272}
273
274static INLINE void
275release_const_register(struct spe_function *f,
276 int r)
277{
278 release_optional_register(f, r);
279}
280
281
282
283/**
284 * Unpack/convert framebuffer colors from four 32-bit packed colors
285 * (fbRGBA) to four float RGBA vectors (fbR, fbG, fbB, fbA).
286 * Each 8-bit color component is expanded into a float in [0.0, 1.0].
287 */
288static void
289unpack_colors(struct spe_function *f,
290 enum pipe_format color_format,
291 int fbRGBA_reg,
292 int fbR_reg, int fbG_reg, int fbB_reg, int fbA_reg)
293{
294 int mask0_reg = spe_allocate_available_register(f);
295 int mask1_reg = spe_allocate_available_register(f);
296 int mask2_reg = spe_allocate_available_register(f);
297 int mask3_reg = spe_allocate_available_register(f);
298
299 spe_load_int(f, mask0_reg, 0xff);
300 spe_load_int(f, mask1_reg, 0xff00);
301 spe_load_int(f, mask2_reg, 0xff0000);
302 spe_load_int(f, mask3_reg, 0xff000000);
303
304 spe_comment(f, 0, "Unpack framebuffer colors, convert to floats");
305
306 switch (color_format) {
307 case PIPE_FORMAT_B8G8R8A8_UNORM:
308 /* fbB = fbRGBA & mask */
309 spe_and(f, fbB_reg, fbRGBA_reg, mask0_reg);
310
311 /* fbG = fbRGBA & mask */
312 spe_and(f, fbG_reg, fbRGBA_reg, mask1_reg);
313
314 /* fbR = fbRGBA & mask */
315 spe_and(f, fbR_reg, fbRGBA_reg, mask2_reg);
316
317 /* fbA = fbRGBA & mask */
318 spe_and(f, fbA_reg, fbRGBA_reg, mask3_reg);
319
320 /* fbG = fbG >> 8 */
321 spe_roti(f, fbG_reg, fbG_reg, -8);
322
323 /* fbR = fbR >> 16 */
324 spe_roti(f, fbR_reg, fbR_reg, -16);
325
326 /* fbA = fbA >> 24 */
327 spe_roti(f, fbA_reg, fbA_reg, -24);
328 break;
329
330 case PIPE_FORMAT_A8R8G8B8_UNORM:
331 /* fbA = fbRGBA & mask */
332 spe_and(f, fbA_reg, fbRGBA_reg, mask0_reg);
333
334 /* fbR = fbRGBA & mask */
335 spe_and(f, fbR_reg, fbRGBA_reg, mask1_reg);
336
337 /* fbG = fbRGBA & mask */
338 spe_and(f, fbG_reg, fbRGBA_reg, mask2_reg);
339
340 /* fbB = fbRGBA & mask */
341 spe_and(f, fbB_reg, fbRGBA_reg, mask3_reg);
342
343 /* fbR = fbR >> 8 */
344 spe_roti(f, fbR_reg, fbR_reg, -8);
345
346 /* fbG = fbG >> 16 */
347 spe_roti(f, fbG_reg, fbG_reg, -16);
348
349 /* fbB = fbB >> 24 */
350 spe_roti(f, fbB_reg, fbB_reg, -24);
351 break;
352
353 default:
354 ASSERT(0);
355 }
356
357 /* convert int[4] in [0,255] to float[4] in [0.0, 1.0] */
358 spe_cuflt(f, fbR_reg, fbR_reg, 8);
359 spe_cuflt(f, fbG_reg, fbG_reg, 8);
360 spe_cuflt(f, fbB_reg, fbB_reg, 8);
361 spe_cuflt(f, fbA_reg, fbA_reg, 8);
362
363 spe_release_register(f, mask0_reg);
364 spe_release_register(f, mask1_reg);
365 spe_release_register(f, mask2_reg);
366 spe_release_register(f, mask3_reg);
367}
368
369
370/**
371 * Generate SPE code to implement the given blend mode for a quad of pixels.
372 * \param f SPE function to append instruction onto.
373 * \param fragR_reg register with fragment red values (float) (in/out)
374 * \param fragG_reg register with fragment green values (float) (in/out)
375 * \param fragB_reg register with fragment blue values (float) (in/out)
376 * \param fragA_reg register with fragment alpha values (float) (in/out)
377 * \param fbRGBA_reg register with packed framebuffer colors (integer) (in)
378 */
379static void
380gen_blend(const struct pipe_blend_state *blend,
381 const struct pipe_blend_color *blend_color,
382 struct spe_function *f,
383 enum pipe_format color_format,
384 int fragR_reg, int fragG_reg, int fragB_reg, int fragA_reg,
385 int fbRGBA_reg)
386{
387 int term1R_reg = spe_allocate_available_register(f);
388 int term1G_reg = spe_allocate_available_register(f);
389 int term1B_reg = spe_allocate_available_register(f);
390 int term1A_reg = spe_allocate_available_register(f);
391
392 int term2R_reg = spe_allocate_available_register(f);
393 int term2G_reg = spe_allocate_available_register(f);
394 int term2B_reg = spe_allocate_available_register(f);
395 int term2A_reg = spe_allocate_available_register(f);
396
397 int fbR_reg = spe_allocate_available_register(f);
398 int fbG_reg = spe_allocate_available_register(f);
399 int fbB_reg = spe_allocate_available_register(f);
400 int fbA_reg = spe_allocate_available_register(f);
401
402 int tmp_reg = spe_allocate_available_register(f);
403
404 /* Optional constant registers we might or might not end up using;
405 * if we do use them, make sure we only allocate them once by
406 * keeping a flag on each one.
407 */
408 int one_reg = -1;
409 int constR_reg = -1, constG_reg = -1, constB_reg = -1, constA_reg = -1;
410
411 ASSERT(blend->rt[0].blend_enable);
412
413 /* packed RGBA -> float colors */
414 unpack_colors(f, color_format, fbRGBA_reg,
415 fbR_reg, fbG_reg, fbB_reg, fbA_reg);
416
417 /*
418 * Compute Src RGB terms. We're actually looking for the value
419 * of (the appropriate RGB factors) * (the incoming source RGB color),
420 * because in some cases (like PIPE_BLENDFACTOR_ONE and
421 * PIPE_BLENDFACTOR_ZERO) we can avoid doing unnecessary math.
422 */
423 switch (blend->rt[0].rgb_src_factor) {
424 case PIPE_BLENDFACTOR_ONE:
425 /* factors = (1,1,1), so term = (R,G,B) */
426 spe_move(f, term1R_reg, fragR_reg);
427 spe_move(f, term1G_reg, fragG_reg);
428 spe_move(f, term1B_reg, fragB_reg);
429 break;
430 case PIPE_BLENDFACTOR_ZERO:
431 /* factors = (0,0,0), so term = (0,0,0) */
432 spe_load_float(f, term1R_reg, 0.0f);
433 spe_load_float(f, term1G_reg, 0.0f);
434 spe_load_float(f, term1B_reg, 0.0f);
435 break;
436 case PIPE_BLENDFACTOR_SRC_COLOR:
437 /* factors = (R,G,B), so term = (R*R, G*G, B*B) */
438 spe_fm(f, term1R_reg, fragR_reg, fragR_reg);
439 spe_fm(f, term1G_reg, fragG_reg, fragG_reg);
440 spe_fm(f, term1B_reg, fragB_reg, fragB_reg);
441 break;
442 case PIPE_BLENDFACTOR_SRC_ALPHA:
443 /* factors = (A,A,A), so term = (R*A, G*A, B*A) */
444 spe_fm(f, term1R_reg, fragR_reg, fragA_reg);
445 spe_fm(f, term1G_reg, fragG_reg, fragA_reg);
446 spe_fm(f, term1B_reg, fragB_reg, fragA_reg);
447 break;
448 case PIPE_BLENDFACTOR_INV_SRC_COLOR:
449 /* factors = (1-R,1-G,1-B), so term = (R*(1-R), G*(1-G), B*(1-B))
450 * or in other words term = (R-R*R, G-G*G, B-B*B)
451 * fnms(a,b,c,d) computes a = d - b*c
452 */
453 spe_fnms(f, term1R_reg, fragR_reg, fragR_reg, fragR_reg);
454 spe_fnms(f, term1G_reg, fragG_reg, fragG_reg, fragG_reg);
455 spe_fnms(f, term1B_reg, fragB_reg, fragB_reg, fragB_reg);
456 break;
457 case PIPE_BLENDFACTOR_DST_COLOR:
458 /* factors = (Rfb,Gfb,Bfb), so term = (R*Rfb, G*Gfb, B*Bfb) */
459 spe_fm(f, term1R_reg, fragR_reg, fbR_reg);
460 spe_fm(f, term1G_reg, fragG_reg, fbG_reg);
461 spe_fm(f, term1B_reg, fragB_reg, fbB_reg);
462 break;
463 case PIPE_BLENDFACTOR_INV_DST_COLOR:
464 /* factors = (1-Rfb,1-Gfb,1-Bfb), so term = (R*(1-Rfb),G*(1-Gfb),B*(1-Bfb))
465 * or term = (R-R*Rfb, G-G*Gfb, B-B*Bfb)
466 * fnms(a,b,c,d) computes a = d - b*c
467 */
468 spe_fnms(f, term1R_reg, fragR_reg, fbR_reg, fragR_reg);
469 spe_fnms(f, term1G_reg, fragG_reg, fbG_reg, fragG_reg);
470 spe_fnms(f, term1B_reg, fragB_reg, fbB_reg, fragB_reg);
471 break;
472 case PIPE_BLENDFACTOR_INV_SRC_ALPHA:
473 /* factors = (1-A,1-A,1-A), so term = (R*(1-A),G*(1-A),B*(1-A))
474 * or term = (R-R*A,G-G*A,B-B*A)
475 * fnms(a,b,c,d) computes a = d - b*c
476 */
477 spe_fnms(f, term1R_reg, fragR_reg, fragA_reg, fragR_reg);
478 spe_fnms(f, term1G_reg, fragG_reg, fragA_reg, fragG_reg);
479 spe_fnms(f, term1B_reg, fragB_reg, fragA_reg, fragB_reg);
480 break;
481 case PIPE_BLENDFACTOR_DST_ALPHA:
482 /* factors = (Afb, Afb, Afb), so term = (R*Afb, G*Afb, B*Afb) */
483 spe_fm(f, term1R_reg, fragR_reg, fbA_reg);
484 spe_fm(f, term1G_reg, fragG_reg, fbA_reg);
485 spe_fm(f, term1B_reg, fragB_reg, fbA_reg);
486 break;
487 case PIPE_BLENDFACTOR_INV_DST_ALPHA:
488 /* factors = (1-Afb, 1-Afb, 1-Afb), so term = (R*(1-Afb),G*(1-Afb),B*(1-Afb))
489 * or term = (R-R*Afb,G-G*Afb,b-B*Afb)
490 * fnms(a,b,c,d) computes a = d - b*c
491 */
492 spe_fnms(f, term1R_reg, fragR_reg, fbA_reg, fragR_reg);
493 spe_fnms(f, term1G_reg, fragG_reg, fbA_reg, fragG_reg);
494 spe_fnms(f, term1B_reg, fragB_reg, fbA_reg, fragB_reg);
495 break;
496 case PIPE_BLENDFACTOR_CONST_COLOR:
497 /* We need the optional constant color registers */
498 setup_const_register(f, &constR_reg, blend_color->color[0]);
499 setup_const_register(f, &constG_reg, blend_color->color[1]);
500 setup_const_register(f, &constB_reg, blend_color->color[2]);
501 /* now, factor = (Rc,Gc,Bc), so term = (R*Rc,G*Gc,B*Bc) */
502 spe_fm(f, term1R_reg, fragR_reg, constR_reg);
503 spe_fm(f, term1G_reg, fragG_reg, constG_reg);
504 spe_fm(f, term1B_reg, fragB_reg, constB_reg);
505 break;
506 case PIPE_BLENDFACTOR_CONST_ALPHA:
507 /* we'll need the optional constant alpha register */
508 setup_const_register(f, &constA_reg, blend_color->color[3]);
509 /* factor = (Ac,Ac,Ac), so term = (R*Ac,G*Ac,B*Ac) */
510 spe_fm(f, term1R_reg, fragR_reg, constA_reg);
511 spe_fm(f, term1G_reg, fragG_reg, constA_reg);
512 spe_fm(f, term1B_reg, fragB_reg, constA_reg);
513 break;
514 case PIPE_BLENDFACTOR_INV_CONST_COLOR:
515 /* We need the optional constant color registers */
516 setup_const_register(f, &constR_reg, blend_color->color[0]);
517 setup_const_register(f, &constG_reg, blend_color->color[1]);
518 setup_const_register(f, &constB_reg, blend_color->color[2]);
519 /* factor = (1-Rc,1-Gc,1-Bc), so term = (R*(1-Rc),G*(1-Gc),B*(1-Bc))
520 * or term = (R-R*Rc, G-G*Gc, B-B*Bc)
521 * fnms(a,b,c,d) computes a = d - b*c
522 */
523 spe_fnms(f, term1R_reg, fragR_reg, constR_reg, fragR_reg);
524 spe_fnms(f, term1G_reg, fragG_reg, constG_reg, fragG_reg);
525 spe_fnms(f, term1B_reg, fragB_reg, constB_reg, fragB_reg);
526 break;
527 case PIPE_BLENDFACTOR_INV_CONST_ALPHA:
528 /* We need the optional constant color registers */
529 setup_const_register(f, &constR_reg, blend_color->color[0]);
530 setup_const_register(f, &constG_reg, blend_color->color[1]);
531 setup_const_register(f, &constB_reg, blend_color->color[2]);
532 /* factor = (1-Ac,1-Ac,1-Ac), so term = (R*(1-Ac),G*(1-Ac),B*(1-Ac))
533 * or term = (R-R*Ac,G-G*Ac,B-B*Ac)
534 * fnms(a,b,c,d) computes a = d - b*c
535 */
536 spe_fnms(f, term1R_reg, fragR_reg, constA_reg, fragR_reg);
537 spe_fnms(f, term1G_reg, fragG_reg, constA_reg, fragG_reg);
538 spe_fnms(f, term1B_reg, fragB_reg, constA_reg, fragB_reg);
539 break;
540 case PIPE_BLENDFACTOR_SRC_ALPHA_SATURATE:
541 /* We'll need the optional {1,1,1,1} register */
542 setup_const_register(f, &one_reg, 1.0f);
543 /* factor = (min(A,1-Afb),min(A,1-Afb),min(A,1-Afb)), so
544 * term = (R*min(A,1-Afb), G*min(A,1-Afb), B*min(A,1-Afb))
545 * We could expand the term (as a*min(b,c) == min(a*b,a*c)
546 * as long as a is positive), but then we'd have to do three
547 * spe_float_min() functions instead of one, so this is simpler.
548 */
549 /* tmp = 1 - Afb */
550 spe_fs(f, tmp_reg, one_reg, fbA_reg);
551 /* tmp = min(A,tmp) */
552 spe_float_min(f, tmp_reg, fragA_reg, tmp_reg);
553 /* term = R*tmp */
554 spe_fm(f, term1R_reg, fragR_reg, tmp_reg);
555 spe_fm(f, term1G_reg, fragG_reg, tmp_reg);
556 spe_fm(f, term1B_reg, fragB_reg, tmp_reg);
557 break;
558
559 /* These are special D3D cases involving a second color output
560 * from the fragment shader. I'm not sure we can support them
561 * yet... XXX
562 */
563 case PIPE_BLENDFACTOR_SRC1_COLOR:
564 case PIPE_BLENDFACTOR_SRC1_ALPHA:
565 case PIPE_BLENDFACTOR_INV_SRC1_COLOR:
566 case PIPE_BLENDFACTOR_INV_SRC1_ALPHA:
567
568 default:
569 ASSERT(0);
570 }
571
572 /*
573 * Compute Src Alpha term. Like the above, we're looking for
574 * the full term A*factor, not just the factor itself, because
575 * in many cases we can avoid doing unnecessary multiplies.
576 */
577 switch (blend->rt[0].alpha_src_factor) {
578 case PIPE_BLENDFACTOR_ZERO:
579 /* factor = 0, so term = 0 */
580 spe_load_float(f, term1A_reg, 0.0f);
581 break;
582
583 case PIPE_BLENDFACTOR_SRC_ALPHA_SATURATE: /* fall through */
584 case PIPE_BLENDFACTOR_ONE:
585 /* factor = 1, so term = A */
586 spe_move(f, term1A_reg, fragA_reg);
587 break;
588
589 case PIPE_BLENDFACTOR_SRC_COLOR:
590 /* factor = A, so term = A*A */
591 spe_fm(f, term1A_reg, fragA_reg, fragA_reg);
592 break;
593 case PIPE_BLENDFACTOR_SRC_ALPHA:
594 spe_fm(f, term1A_reg, fragA_reg, fragA_reg);
595 break;
596
597 case PIPE_BLENDFACTOR_INV_SRC_ALPHA: /* fall through */
598 case PIPE_BLENDFACTOR_INV_SRC_COLOR:
599 /* factor = 1-A, so term = A*(1-A) = A-A*A */
600 /* fnms(a,b,c,d) computes a = d - b*c */
601 spe_fnms(f, term1A_reg, fragA_reg, fragA_reg, fragA_reg);
602 break;
603
604 case PIPE_BLENDFACTOR_DST_ALPHA: /* fall through */
605 case PIPE_BLENDFACTOR_DST_COLOR:
606 /* factor = Afb, so term = A*Afb */
607 spe_fm(f, term1A_reg, fragA_reg, fbA_reg);
608 break;
609
610 case PIPE_BLENDFACTOR_INV_DST_ALPHA: /* fall through */
611 case PIPE_BLENDFACTOR_INV_DST_COLOR:
612 /* factor = 1-Afb, so term = A*(1-Afb) = A - A*Afb */
613 /* fnms(a,b,c,d) computes a = d - b*c */
614 spe_fnms(f, term1A_reg, fragA_reg, fbA_reg, fragA_reg);
615 break;
616
617 case PIPE_BLENDFACTOR_CONST_ALPHA: /* fall through */
618 case PIPE_BLENDFACTOR_CONST_COLOR:
619 /* We need the optional constA_reg register */
620 setup_const_register(f, &constA_reg, blend_color->color[3]);
621 /* factor = Ac, so term = A*Ac */
622 spe_fm(f, term1A_reg, fragA_reg, constA_reg);
623 break;
624
625 case PIPE_BLENDFACTOR_INV_CONST_ALPHA: /* fall through */
626 case PIPE_BLENDFACTOR_INV_CONST_COLOR:
627 /* We need the optional constA_reg register */
628 setup_const_register(f, &constA_reg, blend_color->color[3]);
629 /* factor = 1-Ac, so term = A*(1-Ac) = A-A*Ac */
630 /* fnms(a,b,c,d) computes a = d - b*c */
631 spe_fnms(f, term1A_reg, fragA_reg, constA_reg, fragA_reg);
632 break;
633
634 /* These are special D3D cases involving a second color output
635 * from the fragment shader. I'm not sure we can support them
636 * yet... XXX
637 */
638 case PIPE_BLENDFACTOR_SRC1_COLOR:
639 case PIPE_BLENDFACTOR_SRC1_ALPHA:
640 case PIPE_BLENDFACTOR_INV_SRC1_COLOR:
641 case PIPE_BLENDFACTOR_INV_SRC1_ALPHA:
642 default:
643 ASSERT(0);
644 }
645
646 /*
647 * Compute Dest RGB term. Like the above, we're looking for
648 * the full term (Rfb,Gfb,Bfb)*(factor), not just the factor itself, because
649 * in many cases we can avoid doing unnecessary multiplies.
650 */
651 switch (blend->rt[0].rgb_dst_factor) {
652 case PIPE_BLENDFACTOR_ONE:
653 /* factors = (1,1,1), so term = (Rfb,Gfb,Bfb) */
654 spe_move(f, term2R_reg, fbR_reg);
655 spe_move(f, term2G_reg, fbG_reg);
656 spe_move(f, term2B_reg, fbB_reg);
657 break;
658 case PIPE_BLENDFACTOR_ZERO:
659 /* factor s= (0,0,0), so term = (0,0,0) */
660 spe_load_float(f, term2R_reg, 0.0f);
661 spe_load_float(f, term2G_reg, 0.0f);
662 spe_load_float(f, term2B_reg, 0.0f);
663 break;
664 case PIPE_BLENDFACTOR_SRC_COLOR:
665 /* factors = (R,G,B), so term = (R*Rfb, G*Gfb, B*Bfb) */
666 spe_fm(f, term2R_reg, fbR_reg, fragR_reg);
667 spe_fm(f, term2G_reg, fbG_reg, fragG_reg);
668 spe_fm(f, term2B_reg, fbB_reg, fragB_reg);
669 break;
670 case PIPE_BLENDFACTOR_INV_SRC_COLOR:
671 /* factors = (1-R,1-G,1-B), so term = (Rfb*(1-R), Gfb*(1-G), Bfb*(1-B))
672 * or in other words term = (Rfb-Rfb*R, Gfb-Gfb*G, Bfb-Bfb*B)
673 * fnms(a,b,c,d) computes a = d - b*c
674 */
675 spe_fnms(f, term2R_reg, fragR_reg, fbR_reg, fbR_reg);
676 spe_fnms(f, term2G_reg, fragG_reg, fbG_reg, fbG_reg);
677 spe_fnms(f, term2B_reg, fragB_reg, fbB_reg, fbB_reg);
678 break;
679 case PIPE_BLENDFACTOR_SRC_ALPHA:
680 /* factors = (A,A,A), so term = (Rfb*A, Gfb*A, Bfb*A) */
681 spe_fm(f, term2R_reg, fbR_reg, fragA_reg);
682 spe_fm(f, term2G_reg, fbG_reg, fragA_reg);
683 spe_fm(f, term2B_reg, fbB_reg, fragA_reg);
684 break;
685 case PIPE_BLENDFACTOR_INV_SRC_ALPHA:
686 /* factors = (1-A,1-A,1-A) so term = (Rfb-Rfb*A,Gfb-Gfb*A,Bfb-Bfb*A) */
687 /* fnms(a,b,c,d) computes a = d - b*c */
688 spe_fnms(f, term2R_reg, fbR_reg, fragA_reg, fbR_reg);
689 spe_fnms(f, term2G_reg, fbG_reg, fragA_reg, fbG_reg);
690 spe_fnms(f, term2B_reg, fbB_reg, fragA_reg, fbB_reg);
691 break;
692 case PIPE_BLENDFACTOR_DST_COLOR:
693 /* factors = (Rfb,Gfb,Bfb), so term = (Rfb*Rfb, Gfb*Gfb, Bfb*Bfb) */
694 spe_fm(f, term2R_reg, fbR_reg, fbR_reg);
695 spe_fm(f, term2G_reg, fbG_reg, fbG_reg);
696 spe_fm(f, term2B_reg, fbB_reg, fbB_reg);
697 break;
698 case PIPE_BLENDFACTOR_INV_DST_COLOR:
699 /* factors = (1-Rfb,1-Gfb,1-Bfb), so term = (Rfb*(1-Rfb),Gfb*(1-Gfb),Bfb*(1-Bfb))
700 * or term = (Rfb-Rfb*Rfb, Gfb-Gfb*Gfb, Bfb-Bfb*Bfb)
701 * fnms(a,b,c,d) computes a = d - b*c
702 */
703 spe_fnms(f, term2R_reg, fbR_reg, fbR_reg, fbR_reg);
704 spe_fnms(f, term2G_reg, fbG_reg, fbG_reg, fbG_reg);
705 spe_fnms(f, term2B_reg, fbB_reg, fbB_reg, fbB_reg);
706 break;
707
708 case PIPE_BLENDFACTOR_DST_ALPHA:
709 /* factors = (Afb, Afb, Afb), so term = (Rfb*Afb, Gfb*Afb, Bfb*Afb) */
710 spe_fm(f, term2R_reg, fbR_reg, fbA_reg);
711 spe_fm(f, term2G_reg, fbG_reg, fbA_reg);
712 spe_fm(f, term2B_reg, fbB_reg, fbA_reg);
713 break;
714 case PIPE_BLENDFACTOR_INV_DST_ALPHA:
715 /* factors = (1-Afb, 1-Afb, 1-Afb), so term = (Rfb*(1-Afb),Gfb*(1-Afb),Bfb*(1-Afb))
716 * or term = (Rfb-Rfb*Afb,Gfb-Gfb*Afb,Bfb-Bfb*Afb)
717 * fnms(a,b,c,d) computes a = d - b*c
718 */
719 spe_fnms(f, term2R_reg, fbR_reg, fbA_reg, fbR_reg);
720 spe_fnms(f, term2G_reg, fbG_reg, fbA_reg, fbG_reg);
721 spe_fnms(f, term2B_reg, fbB_reg, fbA_reg, fbB_reg);
722 break;
723 case PIPE_BLENDFACTOR_CONST_COLOR:
724 /* We need the optional constant color registers */
725 setup_const_register(f, &constR_reg, blend_color->color[0]);
726 setup_const_register(f, &constG_reg, blend_color->color[1]);
727 setup_const_register(f, &constB_reg, blend_color->color[2]);
728 /* now, factor = (Rc,Gc,Bc), so term = (Rfb*Rc,Gfb*Gc,Bfb*Bc) */
729 spe_fm(f, term2R_reg, fbR_reg, constR_reg);
730 spe_fm(f, term2G_reg, fbG_reg, constG_reg);
731 spe_fm(f, term2B_reg, fbB_reg, constB_reg);
732 break;
733 case PIPE_BLENDFACTOR_CONST_ALPHA:
734 /* we'll need the optional constant alpha register */
735 setup_const_register(f, &constA_reg, blend_color->color[3]);
736 /* factor = (Ac,Ac,Ac), so term = (Rfb*Ac,Gfb*Ac,Bfb*Ac) */
737 spe_fm(f, term2R_reg, fbR_reg, constA_reg);
738 spe_fm(f, term2G_reg, fbG_reg, constA_reg);
739 spe_fm(f, term2B_reg, fbB_reg, constA_reg);
740 break;
741 case PIPE_BLENDFACTOR_INV_CONST_COLOR:
742 /* We need the optional constant color registers */
743 setup_const_register(f, &constR_reg, blend_color->color[0]);
744 setup_const_register(f, &constG_reg, blend_color->color[1]);
745 setup_const_register(f, &constB_reg, blend_color->color[2]);
746 /* factor = (1-Rc,1-Gc,1-Bc), so term = (Rfb*(1-Rc),Gfb*(1-Gc),Bfb*(1-Bc))
747 * or term = (Rfb-Rfb*Rc, Gfb-Gfb*Gc, Bfb-Bfb*Bc)
748 * fnms(a,b,c,d) computes a = d - b*c
749 */
750 spe_fnms(f, term2R_reg, fbR_reg, constR_reg, fbR_reg);
751 spe_fnms(f, term2G_reg, fbG_reg, constG_reg, fbG_reg);
752 spe_fnms(f, term2B_reg, fbB_reg, constB_reg, fbB_reg);
753 break;
754 case PIPE_BLENDFACTOR_INV_CONST_ALPHA:
755 /* We need the optional constant color registers */
756 setup_const_register(f, &constR_reg, blend_color->color[0]);
757 setup_const_register(f, &constG_reg, blend_color->color[1]);
758 setup_const_register(f, &constB_reg, blend_color->color[2]);
759 /* factor = (1-Ac,1-Ac,1-Ac), so term = (Rfb*(1-Ac),Gfb*(1-Ac),Bfb*(1-Ac))
760 * or term = (Rfb-Rfb*Ac,Gfb-Gfb*Ac,Bfb-Bfb*Ac)
761 * fnms(a,b,c,d) computes a = d - b*c
762 */
763 spe_fnms(f, term2R_reg, fbR_reg, constA_reg, fbR_reg);
764 spe_fnms(f, term2G_reg, fbG_reg, constA_reg, fbG_reg);
765 spe_fnms(f, term2B_reg, fbB_reg, constA_reg, fbB_reg);
766 break;
767 case PIPE_BLENDFACTOR_SRC_ALPHA_SATURATE: /* not supported for dest RGB */
768 ASSERT(0);
769 break;
770
771 /* These are special D3D cases involving a second color output
772 * from the fragment shader. I'm not sure we can support them
773 * yet... XXX
774 */
775 case PIPE_BLENDFACTOR_SRC1_COLOR:
776 case PIPE_BLENDFACTOR_SRC1_ALPHA:
777 case PIPE_BLENDFACTOR_INV_SRC1_COLOR:
778 case PIPE_BLENDFACTOR_INV_SRC1_ALPHA:
779
780 default:
781 ASSERT(0);
782 }
783
784 /*
785 * Compute Dest Alpha term. Like the above, we're looking for
786 * the full term Afb*factor, not just the factor itself, because
787 * in many cases we can avoid doing unnecessary multiplies.
788 */
789 switch (blend->rt[0].alpha_dst_factor) {
790 case PIPE_BLENDFACTOR_ONE:
791 /* factor = 1, so term = Afb */
792 spe_move(f, term2A_reg, fbA_reg);
793 break;
794 case PIPE_BLENDFACTOR_ZERO:
795 /* factor = 0, so term = 0 */
796 spe_load_float(f, term2A_reg, 0.0f);
797 break;
798
799 case PIPE_BLENDFACTOR_SRC_ALPHA: /* fall through */
800 case PIPE_BLENDFACTOR_SRC_COLOR:
801 /* factor = A, so term = Afb*A */
802 spe_fm(f, term2A_reg, fbA_reg, fragA_reg);
803 break;
804
805 case PIPE_BLENDFACTOR_INV_SRC_ALPHA: /* fall through */
806 case PIPE_BLENDFACTOR_INV_SRC_COLOR:
807 /* factor = 1-A, so term = Afb*(1-A) = Afb-Afb*A */
808 /* fnms(a,b,c,d) computes a = d - b*c */
809 spe_fnms(f, term2A_reg, fbA_reg, fragA_reg, fbA_reg);
810 break;
811
812 case PIPE_BLENDFACTOR_DST_ALPHA: /* fall through */
813 case PIPE_BLENDFACTOR_DST_COLOR:
814 /* factor = Afb, so term = Afb*Afb */
815 spe_fm(f, term2A_reg, fbA_reg, fbA_reg);
816 break;
817
818 case PIPE_BLENDFACTOR_INV_DST_ALPHA: /* fall through */
819 case PIPE_BLENDFACTOR_INV_DST_COLOR:
820 /* factor = 1-Afb, so term = Afb*(1-Afb) = Afb - Afb*Afb */
821 /* fnms(a,b,c,d) computes a = d - b*c */
822 spe_fnms(f, term2A_reg, fbA_reg, fbA_reg, fbA_reg);
823 break;
824
825 case PIPE_BLENDFACTOR_CONST_ALPHA: /* fall through */
826 case PIPE_BLENDFACTOR_CONST_COLOR:
827 /* We need the optional constA_reg register */
828 setup_const_register(f, &constA_reg, blend_color->color[3]);
829 /* factor = Ac, so term = Afb*Ac */
830 spe_fm(f, term2A_reg, fbA_reg, constA_reg);
831 break;
832
833 case PIPE_BLENDFACTOR_INV_CONST_ALPHA: /* fall through */
834 case PIPE_BLENDFACTOR_INV_CONST_COLOR:
835 /* We need the optional constA_reg register */
836 setup_const_register(f, &constA_reg, blend_color->color[3]);
837 /* factor = 1-Ac, so term = Afb*(1-Ac) = Afb-Afb*Ac */
838 /* fnms(a,b,c,d) computes a = d - b*c */
839 spe_fnms(f, term2A_reg, fbA_reg, constA_reg, fbA_reg);
840 break;
841
842 case PIPE_BLENDFACTOR_SRC_ALPHA_SATURATE: /* not supported for dest alpha */
843 ASSERT(0);
844 break;
845
846 /* These are special D3D cases involving a second color output
847 * from the fragment shader. I'm not sure we can support them
848 * yet... XXX
849 */
850 case PIPE_BLENDFACTOR_SRC1_COLOR:
851 case PIPE_BLENDFACTOR_SRC1_ALPHA:
852 case PIPE_BLENDFACTOR_INV_SRC1_COLOR:
853 case PIPE_BLENDFACTOR_INV_SRC1_ALPHA:
854 default:
855 ASSERT(0);
856 }
857
858 /*
859 * Combine Src/Dest RGB terms as per the blend equation.
860 */
861 switch (blend->rt[0].rgb_func) {
862 case PIPE_BLEND_ADD:
863 spe_fa(f, fragR_reg, term1R_reg, term2R_reg);
864 spe_fa(f, fragG_reg, term1G_reg, term2G_reg);
865 spe_fa(f, fragB_reg, term1B_reg, term2B_reg);
866 break;
867 case PIPE_BLEND_SUBTRACT:
868 spe_fs(f, fragR_reg, term1R_reg, term2R_reg);
869 spe_fs(f, fragG_reg, term1G_reg, term2G_reg);
870 spe_fs(f, fragB_reg, term1B_reg, term2B_reg);
871 break;
872 case PIPE_BLEND_REVERSE_SUBTRACT:
873 spe_fs(f, fragR_reg, term2R_reg, term1R_reg);
874 spe_fs(f, fragG_reg, term2G_reg, term1G_reg);
875 spe_fs(f, fragB_reg, term2B_reg, term1B_reg);
876 break;
877 case PIPE_BLEND_MIN:
878 spe_float_min(f, fragR_reg, term1R_reg, term2R_reg);
879 spe_float_min(f, fragG_reg, term1G_reg, term2G_reg);
880 spe_float_min(f, fragB_reg, term1B_reg, term2B_reg);
881 break;
882 case PIPE_BLEND_MAX:
883 spe_float_max(f, fragR_reg, term1R_reg, term2R_reg);
884 spe_float_max(f, fragG_reg, term1G_reg, term2G_reg);
885 spe_float_max(f, fragB_reg, term1B_reg, term2B_reg);
886 break;
887 default:
888 ASSERT(0);
889 }
890
891 /*
892 * Combine Src/Dest A term
893 */
894 switch (blend->rt[0].alpha_func) {
895 case PIPE_BLEND_ADD:
896 spe_fa(f, fragA_reg, term1A_reg, term2A_reg);
897 break;
898 case PIPE_BLEND_SUBTRACT:
899 spe_fs(f, fragA_reg, term1A_reg, term2A_reg);
900 break;
901 case PIPE_BLEND_REVERSE_SUBTRACT:
902 spe_fs(f, fragA_reg, term2A_reg, term1A_reg);
903 break;
904 case PIPE_BLEND_MIN:
905 spe_float_min(f, fragA_reg, term1A_reg, term2A_reg);
906 break;
907 case PIPE_BLEND_MAX:
908 spe_float_max(f, fragA_reg, term1A_reg, term2A_reg);
909 break;
910 default:
911 ASSERT(0);
912 }
913
914 spe_release_register(f, term1R_reg);
915 spe_release_register(f, term1G_reg);
916 spe_release_register(f, term1B_reg);
917 spe_release_register(f, term1A_reg);
918
919 spe_release_register(f, term2R_reg);
920 spe_release_register(f, term2G_reg);
921 spe_release_register(f, term2B_reg);
922 spe_release_register(f, term2A_reg);
923
924 spe_release_register(f, fbR_reg);
925 spe_release_register(f, fbG_reg);
926 spe_release_register(f, fbB_reg);
927 spe_release_register(f, fbA_reg);
928
929 spe_release_register(f, tmp_reg);
930
931 /* Free any optional registers that actually got used */
932 release_const_register(f, one_reg);
933 release_const_register(f, constR_reg);
934 release_const_register(f, constG_reg);
935 release_const_register(f, constB_reg);
936 release_const_register(f, constA_reg);
937}
938
939
940static void
941gen_logicop(const struct pipe_blend_state *blend,
942 struct spe_function *f,
943 int fragRGBA_reg, int fbRGBA_reg)
944{
945 /* We've got four 32-bit RGBA packed pixels in each of
946 * fragRGBA_reg and fbRGBA_reg, not sets of floating-point
947 * reds, greens, blues, and alphas.
948 * */
949 ASSERT(blend->logicop_enable);
950
951 switch(blend->logicop_func) {
952 case PIPE_LOGICOP_CLEAR: /* 0 */
953 spe_zero(f, fragRGBA_reg);
954 break;
955 case PIPE_LOGICOP_NOR: /* ~(s | d) */
956 spe_nor(f, fragRGBA_reg, fragRGBA_reg, fbRGBA_reg);
957 break;
958 case PIPE_LOGICOP_AND_INVERTED: /* ~s & d */
959 /* andc R, A, B computes R = A & ~B */
960 spe_andc(f, fragRGBA_reg, fbRGBA_reg, fragRGBA_reg);
961 break;
962 case PIPE_LOGICOP_COPY_INVERTED: /* ~s */
963 spe_complement(f, fragRGBA_reg, fragRGBA_reg);
964 break;
965 case PIPE_LOGICOP_AND_REVERSE: /* s & ~d */
966 /* andc R, A, B computes R = A & ~B */
967 spe_andc(f, fragRGBA_reg, fragRGBA_reg, fbRGBA_reg);
968 break;
969 case PIPE_LOGICOP_INVERT: /* ~d */
970 /* Note that (A nor A) == ~(A|A) == ~A */
971 spe_nor(f, fragRGBA_reg, fbRGBA_reg, fbRGBA_reg);
972 break;
973 case PIPE_LOGICOP_XOR: /* s ^ d */
974 spe_xor(f, fragRGBA_reg, fragRGBA_reg, fbRGBA_reg);
975 break;
976 case PIPE_LOGICOP_NAND: /* ~(s & d) */
977 spe_nand(f, fragRGBA_reg, fragRGBA_reg, fbRGBA_reg);
978 break;
979 case PIPE_LOGICOP_AND: /* s & d */
980 spe_and(f, fragRGBA_reg, fragRGBA_reg, fbRGBA_reg);
981 break;
982 case PIPE_LOGICOP_EQUIV: /* ~(s ^ d) */
983 spe_xor(f, fragRGBA_reg, fragRGBA_reg, fbRGBA_reg);
984 spe_complement(f, fragRGBA_reg, fragRGBA_reg);
985 break;
986 case PIPE_LOGICOP_NOOP: /* d */
987 spe_move(f, fragRGBA_reg, fbRGBA_reg);
988 break;
989 case PIPE_LOGICOP_OR_INVERTED: /* ~s | d */
990 /* orc R, A, B computes R = A | ~B */
991 spe_orc(f, fragRGBA_reg, fbRGBA_reg, fragRGBA_reg);
992 break;
993 case PIPE_LOGICOP_COPY: /* s */
994 break;
995 case PIPE_LOGICOP_OR_REVERSE: /* s | ~d */
996 /* orc R, A, B computes R = A | ~B */
997 spe_orc(f, fragRGBA_reg, fragRGBA_reg, fbRGBA_reg);
998 break;
999 case PIPE_LOGICOP_OR: /* s | d */
1000 spe_or(f, fragRGBA_reg, fragRGBA_reg, fbRGBA_reg);
1001 break;
1002 case PIPE_LOGICOP_SET: /* 1 */
1003 spe_load_int(f, fragRGBA_reg, 0xffffffff);
1004 break;
1005 default:
1006 ASSERT(0);
1007 }
1008}
1009
1010
1011/**
1012 * Generate code to pack a quad of float colors into four 32-bit integers.
1013 *
1014 * \param f SPE function to append instruction onto.
1015 * \param color_format the dest color packing format
1016 * \param r_reg register containing four red values (in/clobbered)
1017 * \param g_reg register containing four green values (in/clobbered)
1018 * \param b_reg register containing four blue values (in/clobbered)
1019 * \param a_reg register containing four alpha values (in/clobbered)
1020 * \param rgba_reg register to store the packed RGBA colors (out)
1021 */
1022static void
1023gen_pack_colors(struct spe_function *f,
1024 enum pipe_format color_format,
1025 int r_reg, int g_reg, int b_reg, int a_reg,
1026 int rgba_reg)
1027{
1028 int rg_reg = spe_allocate_available_register(f);
1029 int ba_reg = spe_allocate_available_register(f);
1030
1031 /* Convert float[4] in [0.0,1.0] to int[4] in [0,~0], with clamping */
1032 spe_cfltu(f, r_reg, r_reg, 32);
1033 spe_cfltu(f, g_reg, g_reg, 32);
1034 spe_cfltu(f, b_reg, b_reg, 32);
1035 spe_cfltu(f, a_reg, a_reg, 32);
1036
1037 /* Shift the most significant bytes to the least significant positions.
1038 * I.e.: reg = reg >> 24
1039 */
1040 spe_rotmi(f, r_reg, r_reg, -24);
1041 spe_rotmi(f, g_reg, g_reg, -24);
1042 spe_rotmi(f, b_reg, b_reg, -24);
1043 spe_rotmi(f, a_reg, a_reg, -24);
1044
1045 /* Shift the color bytes according to the surface format */
1046 if (color_format == PIPE_FORMAT_B8G8R8A8_UNORM) {
1047 spe_roti(f, g_reg, g_reg, 8); /* green <<= 8 */
1048 spe_roti(f, r_reg, r_reg, 16); /* red <<= 16 */
1049 spe_roti(f, a_reg, a_reg, 24); /* alpha <<= 24 */
1050 }
1051 else if (color_format == PIPE_FORMAT_A8R8G8B8_UNORM) {
1052 spe_roti(f, r_reg, r_reg, 8); /* red <<= 8 */
1053 spe_roti(f, g_reg, g_reg, 16); /* green <<= 16 */
1054 spe_roti(f, b_reg, b_reg, 24); /* blue <<= 24 */
1055 }
1056 else {
1057 ASSERT(0);
1058 }
1059
1060 /* Merge red, green, blue, alpha registers to make packed RGBA colors.
1061 * Eg: after shifting according to color_format we might have:
1062 * R = {0x00ff0000, 0x00110000, 0x00220000, 0x00330000}
1063 * G = {0x0000ff00, 0x00004400, 0x00005500, 0x00006600}
1064 * B = {0x000000ff, 0x00000077, 0x00000088, 0x00000099}
1065 * A = {0xff000000, 0xaa000000, 0xbb000000, 0xcc000000}
1066 * OR-ing all those together gives us four packed colors:
1067 * RGBA = {0xffffffff, 0xaa114477, 0xbb225588, 0xcc336699}
1068 */
1069 spe_or(f, rg_reg, r_reg, g_reg);
1070 spe_or(f, ba_reg, a_reg, b_reg);
1071 spe_or(f, rgba_reg, rg_reg, ba_reg);
1072
1073 spe_release_register(f, rg_reg);
1074 spe_release_register(f, ba_reg);
1075}
1076
1077
1078static void
1079gen_colormask(struct spe_function *f,
1080 uint colormask,
1081 enum pipe_format color_format,
1082 int fragRGBA_reg, int fbRGBA_reg)
1083{
1084 /* We've got four 32-bit RGBA packed pixels in each of
1085 * fragRGBA_reg and fbRGBA_reg, not sets of floating-point
1086 * reds, greens, blues, and alphas. Further, the pixels
1087 * are packed according to the given color format, not
1088 * necessarily RGBA...
1089 */
1090 uint r_mask;
1091 uint g_mask;
1092 uint b_mask;
1093 uint a_mask;
1094
1095 /* Calculate exactly where the bits for any particular color
1096 * end up, so we can mask them correctly.
1097 */
1098 switch(color_format) {
1099 case PIPE_FORMAT_B8G8R8A8_UNORM:
1100 /* ARGB */
1101 a_mask = 0xff000000;
1102 r_mask = 0x00ff0000;
1103 g_mask = 0x0000ff00;
1104 b_mask = 0x000000ff;
1105 break;
1106 case PIPE_FORMAT_A8R8G8B8_UNORM:
1107 /* BGRA */
1108 b_mask = 0xff000000;
1109 g_mask = 0x00ff0000;
1110 r_mask = 0x0000ff00;
1111 a_mask = 0x000000ff;
1112 break;
1113 default:
1114 ASSERT(0);
1115 }
1116
1117 /* For each R, G, B, and A component we're supposed to mask out,
1118 * clear its bits. Then our mask operation later will work
1119 * as expected.
1120 */
1121 if (!(colormask & PIPE_MASK_R)) {
1122 r_mask = 0;
1123 }
1124 if (!(colormask & PIPE_MASK_G)) {
1125 g_mask = 0;
1126 }
1127 if (!(colormask & PIPE_MASK_B)) {
1128 b_mask = 0;
1129 }
1130 if (!(colormask & PIPE_MASK_A)) {
1131 a_mask = 0;
1132 }
1133
1134 /* Get a temporary register to hold the mask that will be applied
1135 * to the fragment
1136 */
1137 int colormask_reg = spe_allocate_available_register(f);
1138
1139 /* The actual mask we're going to use is an OR of the remaining R, G, B,
1140 * and A masks. Load the result value into our temporary register.
1141 */
1142 spe_load_uint(f, colormask_reg, r_mask | g_mask | b_mask | a_mask);
1143
1144 /* Use the mask register to select between the fragment color
1145 * values and the frame buffer color values. Wherever the
1146 * mask has a 0 bit, the current frame buffer color should override
1147 * the fragment color. Wherever the mask has a 1 bit, the
1148 * fragment color should persevere. The Select Bits (selb rt, rA, rB, rM)
1149 * instruction will select bits from its first operand rA wherever the
1150 * the mask bits rM are 0, and from its second operand rB wherever the
1151 * mask bits rM are 1. That means that the frame buffer color is the
1152 * first operand, and the fragment color the second.
1153 */
1154 spe_selb(f, fragRGBA_reg, fbRGBA_reg, fragRGBA_reg, colormask_reg);
1155
1156 /* Release the temporary register and we're done */
1157 spe_release_register(f, colormask_reg);
1158}
1159
1160
1161/**
1162 * This function is annoyingly similar to gen_depth_test(), above, except
1163 * that instead of comparing two varying values (i.e. fragment and buffer),
1164 * we're comparing a varying value with a static value. As such, we have
1165 * access to the Compare Immediate instructions where we don't in
1166 * gen_depth_test(), which is what makes us very different.
1167 *
1168 * There's some added complexity if there's a non-trivial state->mask
1169 * value; then stencil and reference both must be masked
1170 *
1171 * The return value in the stencil_pass_reg is a bitmask of valid
1172 * fragments that also passed the stencil test. The bitmask of valid
1173 * fragments that failed would be found in
1174 * (fragment_mask_reg & ~stencil_pass_reg).
1175 */
1176static void
1177gen_stencil_test(struct spe_function *f,
1178 const struct pipe_stencil_state *state,
1179 const unsigned ref_value,
1180 uint stencil_max_value,
1181 int fragment_mask_reg,
1182 int fbS_reg,
1183 int stencil_pass_reg)
1184{
1185 /* Generate code that puts the set of passing fragments into the
1186 * stencil_pass_reg register, taking into account whether each fragment
1187 * was active to begin with.
1188 */
1189 switch (state->func) {
1190 case PIPE_FUNC_EQUAL:
1191 if (state->valuemask == stencil_max_value) {
1192 /* stencil_pass = fragment_mask & (s == reference) */
1193 spe_compare_equal_uint(f, stencil_pass_reg, fbS_reg, ref_value);
1194 spe_and(f, stencil_pass_reg, fragment_mask_reg, stencil_pass_reg);
1195 }
1196 else {
1197 /* stencil_pass = fragment_mask & ((s&mask) == (reference&mask)) */
1198 uint tmp_masked_stencil = spe_allocate_available_register(f);
1199 spe_and_uint(f, tmp_masked_stencil, fbS_reg, state->valuemask);
1200 spe_compare_equal_uint(f, stencil_pass_reg, tmp_masked_stencil,
1201 state->valuemask & ref_value);
1202 spe_and(f, stencil_pass_reg, fragment_mask_reg, stencil_pass_reg);
1203 spe_release_register(f, tmp_masked_stencil);
1204 }
1205 break;
1206
1207 case PIPE_FUNC_NOTEQUAL:
1208 if (state->valuemask == stencil_max_value) {
1209 /* stencil_pass = fragment_mask & ~(s == reference) */
1210 spe_compare_equal_uint(f, stencil_pass_reg, fbS_reg, ref_value);
1211 spe_andc(f, stencil_pass_reg, fragment_mask_reg, stencil_pass_reg);
1212 }
1213 else {
1214 /* stencil_pass = fragment_mask & ~((s&mask) == (reference&mask)) */
1215 int tmp_masked_stencil = spe_allocate_available_register(f);
1216 spe_and_uint(f, tmp_masked_stencil, fbS_reg, state->valuemask);
1217 spe_compare_equal_uint(f, stencil_pass_reg, tmp_masked_stencil,
1218 state->valuemask & ref_value);
1219 spe_andc(f, stencil_pass_reg, fragment_mask_reg, stencil_pass_reg);
1220 spe_release_register(f, tmp_masked_stencil);
1221 }
1222 break;
1223
1224 case PIPE_FUNC_LESS:
1225 if (state->valuemask == stencil_max_value) {
1226 /* stencil_pass = fragment_mask & (reference < s) */
1227 spe_compare_greater_uint(f, stencil_pass_reg, fbS_reg, ref_value);
1228 spe_and(f, stencil_pass_reg, fragment_mask_reg, stencil_pass_reg);
1229 }
1230 else {
1231 /* stencil_pass = fragment_mask & ((reference&mask) < (s & mask)) */
1232 int tmp_masked_stencil = spe_allocate_available_register(f);
1233 spe_and_uint(f, tmp_masked_stencil, fbS_reg, state->valuemask);
1234 spe_compare_greater_uint(f, stencil_pass_reg, tmp_masked_stencil,
1235 state->valuemask & ref_value);
1236 spe_and(f, stencil_pass_reg, fragment_mask_reg, stencil_pass_reg);
1237 spe_release_register(f, tmp_masked_stencil);
1238 }
1239 break;
1240
1241 case PIPE_FUNC_GREATER:
1242 if (state->valuemask == stencil_max_value) {
1243 /* stencil_pass = fragment_mask & (reference > s) */
1244 /* There's no convenient Compare Less Than Immediate instruction, so
1245 * we'll have to do this one the harder way, by loading a register and
1246 * comparing directly. Compare Logical Greater Than Word (clgt)
1247 * treats its operands as unsigned - no sign extension.
1248 */
1249 int tmp_reg = spe_allocate_available_register(f);
1250 spe_load_uint(f, tmp_reg, ref_value);
1251 spe_clgt(f, stencil_pass_reg, tmp_reg, fbS_reg);
1252 spe_and(f, stencil_pass_reg, fragment_mask_reg, stencil_pass_reg);
1253 spe_release_register(f, tmp_reg);
1254 }
1255 else {
1256 /* stencil_pass = fragment_mask & ((reference&mask) > (s&mask)) */
1257 int tmp_reg = spe_allocate_available_register(f);
1258 int tmp_masked_stencil = spe_allocate_available_register(f);
1259 spe_load_uint(f, tmp_reg, state->valuemask & ref_value);
1260 spe_and_uint(f, tmp_masked_stencil, fbS_reg, state->valuemask);
1261 spe_clgt(f, stencil_pass_reg, tmp_reg, tmp_masked_stencil);
1262 spe_and(f, stencil_pass_reg, fragment_mask_reg, stencil_pass_reg);
1263 spe_release_register(f, tmp_reg);
1264 spe_release_register(f, tmp_masked_stencil);
1265 }
1266 break;
1267
1268 case PIPE_FUNC_GEQUAL:
1269 if (state->valuemask == stencil_max_value) {
1270 /* stencil_pass = fragment_mask & (reference >= s)
1271 * = fragment_mask & ~(s > reference) */
1272 spe_compare_greater_uint(f, stencil_pass_reg, fbS_reg,
1273 ref_value);
1274 spe_andc(f, stencil_pass_reg, fragment_mask_reg, stencil_pass_reg);
1275 }
1276 else {
1277 /* stencil_pass = fragment_mask & ~((s&mask) > (reference&mask)) */
1278 int tmp_masked_stencil = spe_allocate_available_register(f);
1279 spe_and_uint(f, tmp_masked_stencil, fbS_reg, state->valuemask);
1280 spe_compare_greater_uint(f, stencil_pass_reg, tmp_masked_stencil,
1281 state->valuemask & ref_value);
1282 spe_andc(f, stencil_pass_reg, fragment_mask_reg, stencil_pass_reg);
1283 spe_release_register(f, tmp_masked_stencil);
1284 }
1285 break;
1286
1287 case PIPE_FUNC_LEQUAL:
1288 if (state->valuemask == stencil_max_value) {
1289 /* stencil_pass = fragment_mask & (reference <= s) ]
1290 * = fragment_mask & ~(reference > s) */
1291 /* As above, we have to do this by loading a register */
1292 int tmp_reg = spe_allocate_available_register(f);
1293 spe_load_uint(f, tmp_reg, ref_value);
1294 spe_clgt(f, stencil_pass_reg, tmp_reg, fbS_reg);
1295 spe_andc(f, stencil_pass_reg, fragment_mask_reg, stencil_pass_reg);
1296 spe_release_register(f, tmp_reg);
1297 }
1298 else {
1299 /* stencil_pass = fragment_mask & ~((reference&mask) > (s&mask)) */
1300 int tmp_reg = spe_allocate_available_register(f);
1301 int tmp_masked_stencil = spe_allocate_available_register(f);
1302 spe_load_uint(f, tmp_reg, ref_value & state->valuemask);
1303 spe_and_uint(f, tmp_masked_stencil, fbS_reg, state->valuemask);
1304 spe_clgt(f, stencil_pass_reg, tmp_reg, tmp_masked_stencil);
1305 spe_andc(f, stencil_pass_reg, fragment_mask_reg, stencil_pass_reg);
1306 spe_release_register(f, tmp_reg);
1307 spe_release_register(f, tmp_masked_stencil);
1308 }
1309 break;
1310
1311 case PIPE_FUNC_NEVER:
1312 /* stencil_pass = fragment_mask & 0 = 0 */
1313 spe_load_uint(f, stencil_pass_reg, 0);
1314 break;
1315
1316 case PIPE_FUNC_ALWAYS:
1317 /* stencil_pass = fragment_mask & 1 = fragment_mask */
1318 spe_move(f, stencil_pass_reg, fragment_mask_reg);
1319 break;
1320 }
1321
1322 /* The fragments that passed the stencil test are now in stencil_pass_reg.
1323 * The fragments that failed would be (fragment_mask_reg & ~stencil_pass_reg).
1324 */
1325}
1326
1327
1328/**
1329 * This function generates code that calculates a set of new stencil values
1330 * given the earlier values and the operation to apply. It does not
1331 * apply any tests. It is intended to be called up to 3 times
1332 * (for the stencil fail operation, for the stencil pass-z fail operation,
1333 * and for the stencil pass-z pass operation) to collect up to three
1334 * possible sets of values, and for the caller to combine them based
1335 * on the result of the tests.
1336 *
1337 * stencil_max_value should be (2^n - 1) where n is the number of bits
1338 * in the stencil buffer - in other words, it should be usable as a mask.
1339 */
1340static void
1341gen_stencil_values(struct spe_function *f,
1342 uint stencil_op,
1343 uint stencil_ref_value,
1344 uint stencil_max_value,
1345 int fbS_reg,
1346 int newS_reg)
1347{
1348 /* The code below assumes that newS_reg and fbS_reg are not the same
1349 * register; if they can be, the calculations below will have to use
1350 * an additional temporary register. For now, mark the assumption
1351 * with an assertion that will fail if they are the same.
1352 */
1353 ASSERT(fbS_reg != newS_reg);
1354
1355 /* The code also assumes that the stencil_max_value is of the form
1356 * 2^n-1 and can therefore be used as a mask for the valid bits in
1357 * addition to a maximum. Make sure this is the case as well.
1358 * The clever math below exploits the fact that incrementing a
1359 * binary number serves to flip all the bits of a number starting at
1360 * the LSB and continuing to (and including) the first zero bit
1361 * found. That means that a number and its increment will always
1362 * have at least one bit in common (the high order bit, if nothing
1363 * else) *unless* the number is zero, *or* the number is of a form
1364 * consisting of some number of 1s in the low-order bits followed
1365 * by nothing but 0s in the high-order bits. The latter case
1366 * implies it's of the form 2^n-1.
1367 */
1368 ASSERT(stencil_max_value > 0 && ((stencil_max_value + 1) & stencil_max_value) == 0);
1369
1370 switch(stencil_op) {
1371 case PIPE_STENCIL_OP_KEEP:
1372 /* newS = S */
1373 spe_move(f, newS_reg, fbS_reg);
1374 break;
1375
1376 case PIPE_STENCIL_OP_ZERO:
1377 /* newS = 0 */
1378 spe_zero(f, newS_reg);
1379 break;
1380
1381 case PIPE_STENCIL_OP_REPLACE:
1382 /* newS = stencil reference value */
1383 spe_load_uint(f, newS_reg, stencil_ref_value);
1384 break;
1385
1386 case PIPE_STENCIL_OP_INCR: {
1387 /* newS = (s == max ? max : s + 1) */
1388 int equals_reg = spe_allocate_available_register(f);
1389
1390 spe_compare_equal_uint(f, equals_reg, fbS_reg, stencil_max_value);
1391 /* Add Word Immediate computes rT = rA + 10-bit signed immediate */
1392 spe_ai(f, newS_reg, fbS_reg, 1);
1393 /* Select from the current value or the new value based on the equality test */
1394 spe_selb(f, newS_reg, newS_reg, fbS_reg, equals_reg);
1395
1396 spe_release_register(f, equals_reg);
1397 break;
1398 }
1399 case PIPE_STENCIL_OP_DECR: {
1400 /* newS = (s == 0 ? 0 : s - 1) */
1401 int equals_reg = spe_allocate_available_register(f);
1402
1403 spe_compare_equal_uint(f, equals_reg, fbS_reg, 0);
1404 /* Add Word Immediate with a (-1) value works */
1405 spe_ai(f, newS_reg, fbS_reg, -1);
1406 /* Select from the current value or the new value based on the equality test */
1407 spe_selb(f, newS_reg, newS_reg, fbS_reg, equals_reg);
1408
1409 spe_release_register(f, equals_reg);
1410 break;
1411 }
1412 case PIPE_STENCIL_OP_INCR_WRAP:
1413 /* newS = (s == max ? 0 : s + 1), but since max is 2^n-1, we can
1414 * do a normal add and mask off the correct bits
1415 */
1416 spe_ai(f, newS_reg, fbS_reg, 1);
1417 spe_and_uint(f, newS_reg, newS_reg, stencil_max_value);
1418 break;
1419
1420 case PIPE_STENCIL_OP_DECR_WRAP:
1421 /* newS = (s == 0 ? max : s - 1), but we'll pull the same mask trick as above */
1422 spe_ai(f, newS_reg, fbS_reg, -1);
1423 spe_and_uint(f, newS_reg, newS_reg, stencil_max_value);
1424 break;
1425
1426 case PIPE_STENCIL_OP_INVERT:
1427 /* newS = ~s. We take advantage of the mask/max value to invert only
1428 * the valid bits for the field so we don't have to do an extra "and".
1429 */
1430 spe_xor_uint(f, newS_reg, fbS_reg, stencil_max_value);
1431 break;
1432
1433 default:
1434 ASSERT(0);
1435 }
1436}
1437
1438
1439/**
1440 * This function generates code to get all the necessary possible
1441 * stencil values. For each of the output registers (fail_reg,
1442 * zfail_reg, and zpass_reg), it either allocates a new register
1443 * and calculates a new set of values based on the stencil operation,
1444 * or it reuses a register allocation and calculation done for an
1445 * earlier (matching) operation, or it reuses the fbS_reg register
1446 * (if the stencil operation is KEEP, which doesn't change the
1447 * stencil buffer).
1448 *
1449 * Since this function allocates a variable number of registers,
1450 * to avoid incurring complex logic to free them, they should
1451 * be allocated after a spe_allocate_register_set() call
1452 * and released by the corresponding spe_release_register_set() call.
1453 */
1454static void
1455gen_get_stencil_values(struct spe_function *f,
1456 const struct pipe_stencil_state *stencil,
1457 const unsigned ref_value,
1458 const uint depth_enabled,
1459 int fbS_reg,
1460 int *fail_reg,
1461 int *zfail_reg,
1462 int *zpass_reg)
1463{
1464 uint zfail_op;
1465
1466 /* Stenciling had better be enabled here */
1467 ASSERT(stencil->enabled);
1468
1469 /* If the depth test is not enabled, it is treated as though it always
1470 * passes, which means that the zfail_op is not considered - a
1471 * failing stencil test triggers the fail_op, and a passing one
1472 * triggers the zpass_op
1473 *
1474 * As an optimization, override calculation of the zfail_op values
1475 * if they aren't going to be used. By setting the value of
1476 * the operation to PIPE_STENCIL_OP_KEEP, its value will be assumed
1477 * to match the incoming stencil values, and no calculation will
1478 * be done.
1479 */
1480 if (depth_enabled) {
1481 zfail_op = stencil->zfail_op;
1482 }
1483 else {
1484 zfail_op = PIPE_STENCIL_OP_KEEP;
1485 }
1486
1487 /* One-sided or front-facing stencil */
1488 if (stencil->fail_op == PIPE_STENCIL_OP_KEEP) {
1489 *fail_reg = fbS_reg;
1490 }
1491 else {
1492 *fail_reg = spe_allocate_available_register(f);
1493 gen_stencil_values(f, stencil->fail_op, ref_value,
1494 0xff, fbS_reg, *fail_reg);
1495 }
1496
1497 /* Check the possibly overridden value, not the structure value */
1498 if (zfail_op == PIPE_STENCIL_OP_KEEP) {
1499 *zfail_reg = fbS_reg;
1500 }
1501 else if (zfail_op == stencil->fail_op) {
1502 *zfail_reg = *fail_reg;
1503 }
1504 else {
1505 *zfail_reg = spe_allocate_available_register(f);
1506 gen_stencil_values(f, stencil->zfail_op, ref_value,
1507 0xff, fbS_reg, *zfail_reg);
1508 }
1509
1510 if (stencil->zpass_op == PIPE_STENCIL_OP_KEEP) {
1511 *zpass_reg = fbS_reg;
1512 }
1513 else if (stencil->zpass_op == stencil->fail_op) {
1514 *zpass_reg = *fail_reg;
1515 }
1516 else if (stencil->zpass_op == zfail_op) {
1517 *zpass_reg = *zfail_reg;
1518 }
1519 else {
1520 *zpass_reg = spe_allocate_available_register(f);
1521 gen_stencil_values(f, stencil->zpass_op, ref_value,
1522 0xff, fbS_reg, *zpass_reg);
1523 }
1524}
1525
1526/**
1527 * Note that fbZ_reg may *not* be set on entry, if in fact
1528 * the depth test is not enabled. This function must not use
1529 * the register if depth is not enabled.
1530 */
1531static boolean
1532gen_stencil_depth_test(struct spe_function *f,
1533 const struct pipe_depth_stencil_alpha_state *dsa,
1534 const struct pipe_stencil_ref *stencil_ref,
1535 const uint facing,
1536 const int mask_reg, const int fragZ_reg,
1537 const int fbZ_reg, const int fbS_reg)
1538{
1539 /* True if we've generated code that could require writeback to the
1540 * depth and/or stencil buffers
1541 */
1542 boolean modified_buffers = FALSE;
1543
1544 boolean need_to_calculate_stencil_values;
1545 boolean need_to_writemask_stencil_values;
1546
1547 struct pipe_stencil_state *stencil;
1548
1549 /* Registers. We may or may not actually allocate these, depending
1550 * on whether the state values indicate that we need them.
1551 */
1552 int stencil_pass_reg, stencil_fail_reg;
1553 int stencil_fail_values, stencil_pass_depth_fail_values, stencil_pass_depth_pass_values;
1554 int stencil_writemask_reg;
1555 int zmask_reg;
1556 int newS_reg;
1557 unsigned ref_value;
1558
1559 /* Stenciling is quite complex: up to six different configurable stencil
1560 * operations/calculations can be required (three each for front-facing
1561 * and back-facing fragments). Many of those operations will likely
1562 * be identical, so there's good reason to try to avoid calculating
1563 * the same values more than once (which unfortunately makes the code less
1564 * straightforward).
1565 *
1566 * To make register management easier, we start a new
1567 * register set; we can release all the registers in the set at
1568 * once, and avoid having to keep track of exactly which registers
1569 * we allocate. We can still allocate and free registers as
1570 * desired (if we know we no longer need a register), but we don't
1571 * have to spend the complexity to track the more difficult variant
1572 * register usage scenarios.
1573 */
1574 spe_comment(f, 0, "Allocating stencil register set");
1575 spe_allocate_register_set(f);
1576
1577 /* The facing we're given is the fragment facing; it doesn't
1578 * exactly match the stencil facing. If stencil is enabled,
1579 * but two-sided stencil is *not* enabled, we use the same
1580 * stencil settings for both front- and back-facing fragments.
1581 * We only use the "back-facing" stencil for backfacing fragments
1582 * if two-sided stenciling is enabled.
1583 */
1584 if (facing == CELL_FACING_BACK && dsa->stencil[1].enabled) {
1585 stencil = &dsa->stencil[1];
1586 ref_value = stencil_ref->ref_value[1];
1587 }
1588 else {
1589 stencil = &dsa->stencil[0];
1590 ref_value = stencil_ref->ref_value[0];
1591 }
1592
1593 /* Calculate the writemask. If the writemask is trivial (either
1594 * all 0s, meaning that we don't need to calculate any stencil values
1595 * because they're not going to change the stencil anyway, or all 1s,
1596 * meaning that we have to calculate the stencil values but do not
1597 * need to mask them), we can avoid generating code. Don't forget
1598 * that we need to consider backfacing stencil, if enabled.
1599 *
1600 * Note that if the backface stencil is *not* enabled, the backface
1601 * stencil will have the same values as the frontface stencil.
1602 */
1603 if (stencil->fail_op == PIPE_STENCIL_OP_KEEP &&
1604 stencil->zfail_op == PIPE_STENCIL_OP_KEEP &&
1605 stencil->zpass_op == PIPE_STENCIL_OP_KEEP) {
1606 need_to_calculate_stencil_values = FALSE;
1607 need_to_writemask_stencil_values = FALSE;
1608 }
1609 else if (stencil->writemask == 0x0) {
1610 /* All changes are writemasked out, so no need to calculate
1611 * what those changes might be, and no need to write anything back.
1612 */
1613 need_to_calculate_stencil_values = FALSE;
1614 need_to_writemask_stencil_values = FALSE;
1615 }
1616 else if (stencil->writemask == 0xff) {
1617 /* Still trivial, but a little less so. We need to write the stencil
1618 * values, but we don't need to mask them.
1619 */
1620 need_to_calculate_stencil_values = TRUE;
1621 need_to_writemask_stencil_values = FALSE;
1622 }
1623 else {
1624 /* The general case: calculate, mask, and write */
1625 need_to_calculate_stencil_values = TRUE;
1626 need_to_writemask_stencil_values = TRUE;
1627
1628 /* While we're here, generate code that calculates what the
1629 * writemask should be. If backface stenciling is enabled,
1630 * and the backface writemask is not the same as the frontface
1631 * writemask, we'll have to generate code that merges the
1632 * two masks into a single effective mask based on fragment facing.
1633 */
1634 spe_comment(f, 0, "Computing stencil writemask");
1635 stencil_writemask_reg = spe_allocate_available_register(f);
1636 spe_load_uint(f, stencil_writemask_reg, dsa->stencil[facing].writemask);
1637 }
1638
1639 /* At least one-sided stenciling must be on. Generate code that
1640 * runs the stencil test on the basic/front-facing stencil, leaving
1641 * the mask of passing stencil bits in stencil_pass_reg. This mask will
1642 * be used both to mask the set of active pixels, and also to
1643 * determine how the stencil buffer changes.
1644 *
1645 * This test will *not* change the value in mask_reg (because we don't
1646 * yet know whether to apply the two-sided stencil or one-sided stencil).
1647 */
1648 spe_comment(f, 0, "Running basic stencil test");
1649 stencil_pass_reg = spe_allocate_available_register(f);
1650 gen_stencil_test(f, stencil, ref_value, 0xff, mask_reg, fbS_reg, stencil_pass_reg);
1651
1652 /* Generate code that, given the mask of valid fragments and the
1653 * mask of valid fragments that passed the stencil test, computes
1654 * the mask of valid fragments that failed the stencil test. We
1655 * have to do this before we run a depth test (because the
1656 * depth test should not be performed on fragments that failed the
1657 * stencil test, and because the depth test will update the
1658 * mask of valid fragments based on the results of the depth test).
1659 */
1660 spe_comment(f, 0, "Computing stencil fail mask and updating fragment mask");
1661 stencil_fail_reg = spe_allocate_available_register(f);
1662 spe_andc(f, stencil_fail_reg, mask_reg, stencil_pass_reg);
1663 /* Now remove the stenciled-out pixels from the valid fragment mask,
1664 * so we can later use the valid fragment mask in the depth test.
1665 */
1666 spe_and(f, mask_reg, mask_reg, stencil_pass_reg);
1667
1668 /* We may not need to calculate stencil values, if the writemask is off */
1669 if (need_to_calculate_stencil_values) {
1670 /* Generate code that calculates exactly which stencil values we need,
1671 * without calculating the same value twice (say, if two different
1672 * stencil ops have the same value). This code will work for one-sided
1673 * and two-sided stenciling (so that we take into account that operations
1674 * may match between front and back stencils), and will also take into
1675 * account whether the depth test is enabled (if the depth test is off,
1676 * we don't need any of the zfail results, because the depth test always
1677 * is considered to pass if it is disabled). Any register value that
1678 * does not need to be calculated will come back with the same value
1679 * that's in fbS_reg.
1680 *
1681 * This function will allocate a variant number of registers that
1682 * will be released as part of the register set.
1683 */
1684 spe_comment(f, 0, facing == CELL_FACING_FRONT
1685 ? "Computing front-facing stencil values"
1686 : "Computing back-facing stencil values");
1687 gen_get_stencil_values(f, stencil, ref_value, dsa->depth.enabled, fbS_reg,
1688 &stencil_fail_values, &stencil_pass_depth_fail_values,
1689 &stencil_pass_depth_pass_values);
1690 }
1691
1692 /* We now have all the stencil values we need. We also need
1693 * the results of the depth test to figure out which
1694 * stencil values will become the new stencil values. (Even if
1695 * we aren't actually calculating stencil values, we need to apply
1696 * the depth test if it's enabled.)
1697 *
1698 * The code generated by gen_depth_test() returns the results of the
1699 * test in the given register, but also alters the mask_reg based
1700 * on the results of the test.
1701 */
1702 if (dsa->depth.enabled) {
1703 spe_comment(f, 0, "Running stencil depth test");
1704 zmask_reg = spe_allocate_available_register(f);
1705 modified_buffers |= gen_depth_test(f, dsa, mask_reg, fragZ_reg,
1706 fbZ_reg, zmask_reg);
1707 }
1708
1709 if (need_to_calculate_stencil_values) {
1710
1711 /* If we need to writemask the stencil values before going into
1712 * the stencil buffer, we'll have to use a new register to
1713 * hold the new values. If not, we can just keep using the
1714 * current register.
1715 */
1716 if (need_to_writemask_stencil_values) {
1717 newS_reg = spe_allocate_available_register(f);
1718 spe_comment(f, 0, "Saving current stencil values for writemasking");
1719 spe_move(f, newS_reg, fbS_reg);
1720 }
1721 else {
1722 newS_reg = fbS_reg;
1723 }
1724
1725 /* Merge in the selected stencil fail values */
1726 if (stencil_fail_values != fbS_reg) {
1727 spe_comment(f, 0, "Loading stencil fail values");
1728 spe_selb(f, newS_reg, newS_reg, stencil_fail_values, stencil_fail_reg);
1729 modified_buffers = TRUE;
1730 }
1731
1732 /* Same for the stencil pass/depth fail values. If this calculation
1733 * is not needed (say, if depth test is off), then the
1734 * stencil_pass_depth_fail_values register will be equal to fbS_reg
1735 * and we'll skip the calculation.
1736 */
1737 if (stencil_pass_depth_fail_values != fbS_reg) {
1738 /* We don't actually have a stencil pass/depth fail mask yet.
1739 * Calculate it here from the stencil passing mask and the
1740 * depth passing mask. Note that zmask_reg *must* have been
1741 * set above if we're here.
1742 */
1743 uint stencil_pass_depth_fail_mask =
1744 spe_allocate_available_register(f);
1745
1746 spe_comment(f, 0, "Loading stencil pass/depth fail values");
1747 spe_andc(f, stencil_pass_depth_fail_mask, stencil_pass_reg, zmask_reg);
1748
1749 spe_selb(f, newS_reg, newS_reg, stencil_pass_depth_fail_values,
1750 stencil_pass_depth_fail_mask);
1751
1752 spe_release_register(f, stencil_pass_depth_fail_mask);
1753 modified_buffers = TRUE;
1754 }
1755
1756 /* Same for the stencil pass/depth pass mask. Note that we
1757 * *can* get here with zmask_reg being unset (if the depth
1758 * test is off but the stencil test is on). In this case,
1759 * we assume the depth test passes, and don't need to mask
1760 * the stencil pass mask with the Z mask.
1761 */
1762 if (stencil_pass_depth_pass_values != fbS_reg) {
1763 if (dsa->depth.enabled) {
1764 uint stencil_pass_depth_pass_mask = spe_allocate_available_register(f);
1765 /* We'll need a separate register */
1766 spe_comment(f, 0, "Loading stencil pass/depth pass values");
1767 spe_and(f, stencil_pass_depth_pass_mask, stencil_pass_reg, zmask_reg);
1768 spe_selb(f, newS_reg, newS_reg, stencil_pass_depth_pass_values, stencil_pass_depth_pass_mask);
1769 spe_release_register(f, stencil_pass_depth_pass_mask);
1770 }
1771 else {
1772 /* We can use the same stencil-pass register */
1773 spe_comment(f, 0, "Loading stencil pass values");
1774 spe_selb(f, newS_reg, newS_reg, stencil_pass_depth_pass_values, stencil_pass_reg);
1775 }
1776 modified_buffers = TRUE;
1777 }
1778
1779 /* Almost done. If we need to writemask, do it now, leaving the
1780 * results in the fbS_reg register passed in. If we don't need
1781 * to writemask, then the results are *already* in the fbS_reg,
1782 * so there's nothing more to do.
1783 */
1784
1785 if (need_to_writemask_stencil_values && modified_buffers) {
1786 /* The Select Bytes command makes a fine writemask. Where
1787 * the mask is 0, the first (original) values are retained,
1788 * effectively masking out changes. Where the mask is 1, the
1789 * second (new) values are retained, incorporating changes.
1790 */
1791 spe_comment(f, 0, "Writemasking new stencil values");
1792 spe_selb(f, fbS_reg, fbS_reg, newS_reg, stencil_writemask_reg);
1793 }
1794
1795 } /* done calculating stencil values */
1796
1797 /* The stencil and/or depth values have been applied, and the
1798 * mask_reg, fbS_reg, and fbZ_reg values have been updated.
1799 * We're all done, except that we've allocated a fair number
1800 * of registers that we didn't bother tracking. Release all
1801 * those registers as part of the register set, and go home.
1802 */
1803 spe_comment(f, 0, "Releasing stencil register set");
1804 spe_release_register_set(f);
1805
1806 /* Return TRUE if we could have modified the stencil and/or
1807 * depth buffers.
1808 */
1809 return modified_buffers;
1810}
1811
1812
1813/**
1814 * Generate depth and/or stencil test code.
1815 * \param cell context
1816 * \param dsa depth/stencil/alpha state
1817 * \param f spe function to emit
1818 * \param facing either CELL_FACING_FRONT or CELL_FACING_BACK
1819 * \param mask_reg register containing the pixel alive/dead mask
1820 * \param depth_tile_reg register containing address of z/stencil tile
1821 * \param quad_offset_reg offset to quad from start of tile
1822 * \param fragZ_reg register containg fragment Z values
1823 */
1824static void
1825gen_depth_stencil(struct cell_context *cell,
1826 const struct pipe_depth_stencil_alpha_state *dsa,
1827 const struct pipe_stencil_ref *stencil_ref,
1828 struct spe_function *f,
1829 uint facing,
1830 int mask_reg,
1831 int depth_tile_reg,
1832 int quad_offset_reg,
1833 int fragZ_reg)
1834
1835{
1836 const enum pipe_format zs_format = cell->framebuffer.zsbuf->format;
1837 boolean write_depth_stencil;
1838
1839 /* framebuffer's combined z/stencil values register */
1840 int fbZS_reg = spe_allocate_available_register(f);
1841
1842 /* Framebufer Z values register */
1843 int fbZ_reg = spe_allocate_available_register(f);
1844
1845 /* Framebuffer stencil values register (may not be used) */
1846 int fbS_reg = spe_allocate_available_register(f);
1847
1848 /* 24-bit mask register (may not be used) */
1849 int zmask_reg = spe_allocate_available_register(f);
1850
1851 /**
1852 * The following code:
1853 * 1. fetch quad of packed Z/S values from the framebuffer tile.
1854 * 2. extract the separate the Z and S values from packed values
1855 * 3. convert fragment Z values from float in [0,1] to 32/24/16-bit ints
1856 *
1857 * The instructions for doing this are interleaved for better performance.
1858 */
1859 spe_comment(f, 0, "Fetch Z/stencil quad from tile");
1860
1861 switch(zs_format) {
1862 case PIPE_FORMAT_Z24_UNORM_S8_UINT: /* fall through */
1863 case PIPE_FORMAT_Z24X8_UNORM:
1864 /* prepare mask to extract Z vals from ZS vals */
1865 spe_load_uint(f, zmask_reg, 0x00ffffff);
1866
1867 /* convert fragment Z from [0,1] to 32-bit ints */
1868 spe_cfltu(f, fragZ_reg, fragZ_reg, 32);
1869
1870 /* Load: fbZS_reg = memory[depth_tile_reg + offset_reg] */
1871 spe_lqx(f, fbZS_reg, depth_tile_reg, quad_offset_reg);
1872
1873 /* right shift 32-bit fragment Z to 24 bits */
1874 spe_rotmi(f, fragZ_reg, fragZ_reg, -8);
1875
1876 /* extract 24-bit Z values from ZS values by masking */
1877 spe_and(f, fbZ_reg, fbZS_reg, zmask_reg);
1878
1879 /* extract 8-bit stencil values by shifting */
1880 spe_rotmi(f, fbS_reg, fbZS_reg, -24);
1881 break;
1882
1883 case PIPE_FORMAT_S8_UINT_Z24_UNORM: /* fall through */
1884 case PIPE_FORMAT_X8Z24_UNORM:
1885 /* convert fragment Z from [0,1] to 32-bit ints */
1886 spe_cfltu(f, fragZ_reg, fragZ_reg, 32);
1887
1888 /* Load: fbZS_reg = memory[depth_tile_reg + offset_reg] */
1889 spe_lqx(f, fbZS_reg, depth_tile_reg, quad_offset_reg);
1890
1891 /* right shift 32-bit fragment Z to 24 bits */
1892 spe_rotmi(f, fragZ_reg, fragZ_reg, -8);
1893
1894 /* extract 24-bit Z values from ZS values by shifting */
1895 spe_rotmi(f, fbZ_reg, fbZS_reg, -8);
1896
1897 /* extract 8-bit stencil values by masking */
1898 spe_and_uint(f, fbS_reg, fbZS_reg, 0x000000ff);
1899 break;
1900
1901 case PIPE_FORMAT_Z32_UNORM:
1902 /* Load: fbZ_reg = memory[depth_tile_reg + offset_reg] */
1903 spe_lqx(f, fbZ_reg, depth_tile_reg, quad_offset_reg);
1904
1905 /* convert fragment Z from [0,1] to 32-bit ints */
1906 spe_cfltu(f, fragZ_reg, fragZ_reg, 32);
1907
1908 /* No stencil, so can't do anything there */
1909 break;
1910
1911 case PIPE_FORMAT_Z16_UNORM:
1912 /* XXX This code for 16bpp Z is broken! */
1913
1914 /* Load: fbZS_reg = memory[depth_tile_reg + offset_reg] */
1915 spe_lqx(f, fbZS_reg, depth_tile_reg, quad_offset_reg);
1916
1917 /* Copy over 4 32-bit values */
1918 spe_move(f, fbZ_reg, fbZS_reg);
1919
1920 /* convert Z from [0,1] to 16-bit ints */
1921 spe_cfltu(f, fragZ_reg, fragZ_reg, 32);
1922 spe_rotmi(f, fragZ_reg, fragZ_reg, -16);
1923 /* No stencil */
1924 break;
1925
1926 default:
1927 ASSERT(0); /* invalid format */
1928 }
1929
1930 /* If stencil is enabled, use the stencil-specific code
1931 * generator to generate both the stencil and depth (if needed)
1932 * tests. Otherwise, if only depth is enabled, generate
1933 * a quick depth test. The test generators themselves will
1934 * report back whether the depth/stencil buffer has to be
1935 * written back.
1936 */
1937 if (dsa->stencil[0].enabled) {
1938 /* This will perform the stencil and depth tests, and update
1939 * the mask_reg, fbZ_reg, and fbS_reg as required by the
1940 * tests.
1941 */
1942 ASSERT(fbS_reg >= 0);
1943 spe_comment(f, 0, "Perform stencil test");
1944
1945 /* Note that fbZ_reg may not be set on entry, if stenciling
1946 * is enabled but there's no Z-buffer. The
1947 * gen_stencil_depth_test() function must ignore the
1948 * fbZ_reg register if depth is not enabled.
1949 */
1950 write_depth_stencil = gen_stencil_depth_test(f, dsa, stencil_ref, facing,
1951 mask_reg, fragZ_reg,
1952 fbZ_reg, fbS_reg);
1953 }
1954 else if (dsa->depth.enabled) {
1955 int zmask_reg = spe_allocate_available_register(f);
1956 ASSERT(fbZ_reg >= 0);
1957 spe_comment(f, 0, "Perform depth test");
1958 write_depth_stencil = gen_depth_test(f, dsa, mask_reg, fragZ_reg,
1959 fbZ_reg, zmask_reg);
1960 spe_release_register(f, zmask_reg);
1961 }
1962 else {
1963 write_depth_stencil = FALSE;
1964 }
1965
1966 if (write_depth_stencil) {
1967 /* Merge latest Z and Stencil values into fbZS_reg.
1968 * fbZ_reg has four Z vals in bits [23..0] or bits [15..0].
1969 * fbS_reg has four 8-bit Z values in bits [7..0].
1970 */
1971 spe_comment(f, 0, "Store quad's depth/stencil values in tile");
1972 if (zs_format == PIPE_FORMAT_Z24_UNORM_S8_UINT ||
1973 zs_format == PIPE_FORMAT_Z24X8_UNORM) {
1974 spe_shli(f, fbS_reg, fbS_reg, 24); /* fbS = fbS << 24 */
1975 spe_or(f, fbZS_reg, fbS_reg, fbZ_reg); /* fbZS = fbS | fbZ */
1976 }
1977 else if (zs_format == PIPE_FORMAT_S8_UINT_Z24_UNORM ||
1978 zs_format == PIPE_FORMAT_X8Z24_UNORM) {
1979 spe_shli(f, fbZ_reg, fbZ_reg, 8); /* fbZ = fbZ << 8 */
1980 spe_or(f, fbZS_reg, fbS_reg, fbZ_reg); /* fbZS = fbS | fbZ */
1981 }
1982 else if (zs_format == PIPE_FORMAT_Z32_UNORM) {
1983 spe_move(f, fbZS_reg, fbZ_reg); /* fbZS = fbZ */
1984 }
1985 else if (zs_format == PIPE_FORMAT_Z16_UNORM) {
1986 spe_move(f, fbZS_reg, fbZ_reg); /* fbZS = fbZ */
1987 }
1988 else if (zs_format == PIPE_FORMAT_S8_UINT) {
1989 ASSERT(0); /* XXX to do */
1990 }
1991 else {
1992 ASSERT(0); /* bad zs_format */
1993 }
1994
1995 /* Store: memory[depth_tile_reg + quad_offset_reg] = fbZS */
1996 spe_stqx(f, fbZS_reg, depth_tile_reg, quad_offset_reg);
1997 }
1998
1999 /* Don't need these any more */
2000 spe_release_register(f, fbZS_reg);
2001 spe_release_register(f, fbZ_reg);
2002 spe_release_register(f, fbS_reg);
2003 spe_release_register(f, zmask_reg);
2004}
2005
2006
2007
2008/**
2009 * Generate SPE code to implement the fragment operations (alpha test,
2010 * depth test, stencil test, blending, colormask, and final
2011 * framebuffer write) as specified by the current context state.
2012 *
2013 * Logically, this code will be called after running the fragment
2014 * shader. But under some circumstances we could run some of this
2015 * code before the fragment shader to cull fragments/quads that are
2016 * totally occluded/discarded.
2017 *
2018 * XXX we only support PIPE_FORMAT_S8_UINT_Z24_UNORM z/stencil buffer right now.
2019 *
2020 * See the spu_default_fragment_ops() function to see how the per-fragment
2021 * operations would be done with ordinary C code.
2022 * The code we generate here though has no branches, is SIMD, etc and
2023 * should be much faster.
2024 *
2025 * \param cell the rendering context (in)
2026 * \param facing whether the generated code is for front-facing or
2027 * back-facing fragments
2028 * \param f the generated function (in/out); on input, the function
2029 * must already have been initialized. On exit, whatever
2030 * instructions within the generated function have had
2031 * the fragment ops appended.
2032 */
2033void
2034cell_gen_fragment_function(struct cell_context *cell,
2035 const uint facing,
2036 struct spe_function *f)
2037{
2038 const struct pipe_depth_stencil_alpha_state *dsa = cell->depth_stencil;
2039 const struct pipe_stencil_ref *stencil_ref = &cell->stencil_ref;
2040 const struct pipe_blend_state *blend = cell->blend;
2041 const struct pipe_blend_color *blend_color = &cell->blend_color;
2042 const enum pipe_format color_format = cell->framebuffer.cbufs[0]->format;
2043
2044 /* For SPE function calls: reg $3 = first param, $4 = second param, etc. */
2045 const int x_reg = 3; /* uint */
2046 const int y_reg = 4; /* uint */
2047 const int color_tile_reg = 5; /* tile_t * */
2048 const int depth_tile_reg = 6; /* tile_t * */
2049 const int fragZ_reg = 7; /* vector float */
2050 const int fragR_reg = 8; /* vector float */
2051 const int fragG_reg = 9; /* vector float */
2052 const int fragB_reg = 10; /* vector float */
2053 const int fragA_reg = 11; /* vector float */
2054 const int mask_reg = 12; /* vector uint */
2055
2056 ASSERT(facing == CELL_FACING_FRONT || facing == CELL_FACING_BACK);
2057
2058 /* offset of quad from start of tile
2059 * XXX assuming 4-byte pixels for color AND Z/stencil!!!!
2060 */
2061 int quad_offset_reg;
2062
2063 int fbRGBA_reg; /**< framebuffer's RGBA colors for quad */
2064
2065 if (cell->debug_flags & CELL_DEBUG_ASM) {
2066 spe_print_code(f, TRUE);
2067 spe_indent(f, 8);
2068 spe_comment(f, -4, facing == CELL_FACING_FRONT
2069 ? "Begin front-facing per-fragment ops"
2070 : "Begin back-facing per-fragment ops");
2071 }
2072
2073 spe_allocate_register(f, x_reg);
2074 spe_allocate_register(f, y_reg);
2075 spe_allocate_register(f, color_tile_reg);
2076 spe_allocate_register(f, depth_tile_reg);
2077 spe_allocate_register(f, fragZ_reg);
2078 spe_allocate_register(f, fragR_reg);
2079 spe_allocate_register(f, fragG_reg);
2080 spe_allocate_register(f, fragB_reg);
2081 spe_allocate_register(f, fragA_reg);
2082 spe_allocate_register(f, mask_reg);
2083
2084 quad_offset_reg = spe_allocate_available_register(f);
2085 fbRGBA_reg = spe_allocate_available_register(f);
2086
2087 /* compute offset of quad from start of tile, in bytes */
2088 {
2089 int x2_reg = spe_allocate_available_register(f);
2090 int y2_reg = spe_allocate_available_register(f);
2091
2092 ASSERT(TILE_SIZE == 32);
2093
2094 spe_comment(f, 0, "Compute quad offset within tile");
2095 spe_rotmi(f, y2_reg, y_reg, -1); /* y2 = y / 2 */
2096 spe_rotmi(f, x2_reg, x_reg, -1); /* x2 = x / 2 */
2097 spe_shli(f, y2_reg, y2_reg, 4); /* y2 *= 16 */
2098 spe_a(f, quad_offset_reg, y2_reg, x2_reg); /* offset = y2 + x2 */
2099 spe_shli(f, quad_offset_reg, quad_offset_reg, 4); /* offset *= 16 */
2100
2101 spe_release_register(f, x2_reg);
2102 spe_release_register(f, y2_reg);
2103 }
2104
2105 /* Generate the alpha test, if needed. */
2106 if (dsa->alpha.enabled) {
2107 gen_alpha_test(dsa, f, mask_reg, fragA_reg);
2108 }
2109
2110 /* generate depth and/or stencil test code */
2111 if (dsa->depth.enabled || dsa->stencil[0].enabled) {
2112 gen_depth_stencil(cell, dsa, stencil_ref, f,
2113 facing,
2114 mask_reg,
2115 depth_tile_reg,
2116 quad_offset_reg,
2117 fragZ_reg);
2118 }
2119
2120 /* Get framebuffer quad/colors. We'll need these for blending,
2121 * color masking, and to obey the quad/pixel mask.
2122 * Load: fbRGBA_reg = memory[color_tile + quad_offset]
2123 * Note: if mask={~0,~0,~0,~0} and we're not blending or colormasking
2124 * we could skip this load.
2125 */
2126 spe_comment(f, 0, "Fetch quad colors from tile");
2127 spe_lqx(f, fbRGBA_reg, color_tile_reg, quad_offset_reg);
2128
2129 if (blend->rt[0].blend_enable) {
2130 spe_comment(f, 0, "Perform blending");
2131 gen_blend(blend, blend_color, f, color_format,
2132 fragR_reg, fragG_reg, fragB_reg, fragA_reg, fbRGBA_reg);
2133 }
2134
2135 /*
2136 * Write fragment colors to framebuffer/tile.
2137 * This involves converting the fragment colors from float[4] to the
2138 * tile's specific format and obeying the quad/pixel mask.
2139 */
2140 {
2141 int rgba_reg = spe_allocate_available_register(f);
2142
2143 /* Pack four float colors as four 32-bit int colors */
2144 spe_comment(f, 0, "Convert float quad colors to packed int framebuffer colors");
2145 gen_pack_colors(f, color_format,
2146 fragR_reg, fragG_reg, fragB_reg, fragA_reg,
2147 rgba_reg);
2148
2149 if (blend->logicop_enable) {
2150 spe_comment(f, 0, "Compute logic op");
2151 gen_logicop(blend, f, rgba_reg, fbRGBA_reg);
2152 }
2153
2154 if (blend->rt[0].colormask != PIPE_MASK_RGBA) {
2155 spe_comment(f, 0, "Compute color mask");
2156 gen_colormask(f, blend->rt[0].colormask, color_format, rgba_reg, fbRGBA_reg);
2157 }
2158
2159 /* Mix fragment colors with framebuffer colors using the quad/pixel mask:
2160 * if (mask[i])
2161 * rgba[i] = rgba[i];
2162 * else
2163 * rgba[i] = framebuffer[i];
2164 */
2165 spe_selb(f, rgba_reg, fbRGBA_reg, rgba_reg, mask_reg);
2166
2167 /* Store updated quad in tile:
2168 * memory[color_tile + quad_offset] = rgba_reg;
2169 */
2170 spe_comment(f, 0, "Store quad colors into color tile");
2171 spe_stqx(f, rgba_reg, color_tile_reg, quad_offset_reg);
2172
2173 spe_release_register(f, rgba_reg);
2174 }
2175
2176 //printf("gen_fragment_ops nr instructions: %u\n", f->num_inst);
2177
2178 spe_bi(f, SPE_REG_RA, 0, 0); /* return from function call */
2179
2180 spe_release_register(f, fbRGBA_reg);
2181 spe_release_register(f, quad_offset_reg);
2182
2183 if (cell->debug_flags & CELL_DEBUG_ASM) {
2184 char buffer[1024];
2185 sprintf(buffer, "End %s-facing per-fragment ops: %d instructions",
2186 facing == CELL_FACING_FRONT ? "front" : "back", f->num_inst);
2187 spe_comment(f, -4, buffer);
2188 }
2189}
diff --git a/src/gallium/drivers/cell/ppu/cell_gen_fragment.h b/src/gallium/drivers/cell/ppu/cell_gen_fragment.h
deleted file mode 100644
index 21b35d1fafe..00000000000
--- a/src/gallium/drivers/cell/ppu/cell_gen_fragment.h
+++ /dev/null
@@ -1,38 +0,0 @@
1/**************************************************************************
2 *
3 * Copyright 2008 Tungsten Graphics, Inc., Cedar Park, Texas.
4 * All Rights Reserved.
5 *
6 * Permission is hereby granted, free of charge, to any person obtaining a
7 * copy of this software and associated documentation files (the
8 * "Software"), to deal in the Software without restriction, including
9 * without limitation the rights to use, copy, modify, merge, publish,
10 * distribute, sub license, and/or sell copies of the Software, and to
11 * permit persons to whom the Software is furnished to do so, subject to
12 * the following conditions:
13 *
14 * The above copyright notice and this permission notice (including the
15 * next paragraph) shall be included in all copies or substantial portions
16 * of the Software.
17 *
18 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
19 * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
20 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
21 * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
22 * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
23 * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
24 * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
25 *
26 **************************************************************************/
27
28
29#ifndef CELL_GEN_FRAGMENT_H
30#define CELL_GEN_FRAGMENT_H
31
32
33extern void
34cell_gen_fragment_function(struct cell_context *cell, const uint facing, struct spe_function *f);
35
36
37#endif /* CELL_GEN_FRAGMENT_H */
38
diff --git a/src/gallium/drivers/cell/ppu/cell_pipe_state.c b/src/gallium/drivers/cell/ppu/cell_pipe_state.c
deleted file mode 100644
index 223adda48f0..00000000000
--- a/src/gallium/drivers/cell/ppu/cell_pipe_state.c
+++ /dev/null
@@ -1,473 +0,0 @@
1/**************************************************************************
2 *
3 * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas.
4 * All Rights Reserved.
5 *
6 * Permission is hereby granted, free of charge, to any person obtaining a
7 * copy of this software and associated documentation files (the
8 * "Software"), to deal in the Software without restriction, including
9 * without limitation the rights to use, copy, modify, merge, publish,
10 * distribute, sub license, and/or sell copies of the Software, and to
11 * permit persons to whom the Software is furnished to do so, subject to
12 * the following conditions:
13 *
14 * The above copyright notice and this permission notice (including the
15 * next paragraph) shall be included in all copies or substantial portions
16 * of the Software.
17 *
18 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
19 * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
20 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
21 * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
22 * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
23 * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
24 * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
25 *
26 **************************************************************************/
27
28/* Authors:
29 * Keith Whitwell <keith@tungstengraphics.com>
30 * Brian Paul
31 */
32
33#include "util/u_memory.h"
34#include "util/u_inlines.h"
35#include "draw/draw_context.h"
36#include "cell_context.h"
37#include "cell_flush.h"
38#include "cell_pipe_state.h"
39#include "cell_state.h"
40#include "cell_texture.h"
41
42
43
44static void *
45cell_create_blend_state(struct pipe_context *pipe,
46 const struct pipe_blend_state *blend)
47{
48 return mem_dup(blend, sizeof(*blend));
49}
50
51
52static void
53cell_bind_blend_state(struct pipe_context *pipe, void *blend)
54{
55 struct cell_context *cell = cell_context(pipe);
56
57 draw_flush(cell->draw);
58
59 cell->blend = (struct pipe_blend_state *) blend;
60 cell->dirty |= CELL_NEW_BLEND;
61}
62
63
64static void
65cell_delete_blend_state(struct pipe_context *pipe, void *blend)
66{
67 FREE(blend);
68}
69
70
71static void
72cell_set_blend_color(struct pipe_context *pipe,
73 const struct pipe_blend_color *blend_color)
74{
75 struct cell_context *cell = cell_context(pipe);
76
77 draw_flush(cell->draw);
78
79 cell->blend_color = *blend_color;
80
81 cell->dirty |= CELL_NEW_BLEND;
82}
83
84
85
86
87static void *
88cell_create_depth_stencil_alpha_state(struct pipe_context *pipe,
89 const struct pipe_depth_stencil_alpha_state *dsa)
90{
91 return mem_dup(dsa, sizeof(*dsa));
92}
93
94
95static void
96cell_bind_depth_stencil_alpha_state(struct pipe_context *pipe,
97 void *dsa)
98{
99 struct cell_context *cell = cell_context(pipe);
100
101 draw_flush(cell->draw);
102
103 cell->depth_stencil = (struct pipe_depth_stencil_alpha_state *) dsa;
104 cell->dirty |= CELL_NEW_DEPTH_STENCIL;
105}
106
107
108static void
109cell_delete_depth_stencil_alpha_state(struct pipe_context *pipe, void *dsa)
110{
111 FREE(dsa);
112}
113
114
115static void
116cell_set_stencil_ref(struct pipe_context *pipe,
117 const struct pipe_stencil_ref *stencil_ref)
118{
119 struct cell_context *cell = cell_context(pipe);
120
121 draw_flush(cell->draw);
122
123 cell->stencil_ref = *stencil_ref;
124
125 cell->dirty |= CELL_NEW_DEPTH_STENCIL;
126}
127
128
129static void
130cell_set_clip_state(struct pipe_context *pipe,
131 const struct pipe_clip_state *clip)
132{
133 struct cell_context *cell = cell_context(pipe);
134
135 /* pass the clip state to the draw module */
136 draw_set_clip_state(cell->draw, clip);
137}
138
139
140static void
141cell_set_sample_mask(struct pipe_context *pipe,
142 unsigned sample_mask)
143{
144}
145
146
147/* Called when driver state tracker notices changes to the viewport
148 * matrix:
149 */
150static void
151cell_set_viewport_state( struct pipe_context *pipe,
152 const struct pipe_viewport_state *viewport )
153{
154 struct cell_context *cell = cell_context(pipe);
155
156 cell->viewport = *viewport; /* struct copy */
157 cell->dirty |= CELL_NEW_VIEWPORT;
158
159 /* pass the viewport info to the draw module */
160 draw_set_viewport_state(cell->draw, viewport);
161
162 /* Using tnl/ and vf/ modules is temporary while getting started.
163 * Full pipe will have vertex shader, vertex fetch of its own.
164 */
165}
166
167
168static void
169cell_set_scissor_state( struct pipe_context *pipe,
170 const struct pipe_scissor_state *scissor )
171{
172 struct cell_context *cell = cell_context(pipe);
173
174 memcpy( &cell->scissor, scissor, sizeof(*scissor) );
175 cell->dirty |= CELL_NEW_SCISSOR;
176}
177
178
179static void
180cell_set_polygon_stipple( struct pipe_context *pipe,
181 const struct pipe_poly_stipple *stipple )
182{
183 struct cell_context *cell = cell_context(pipe);
184
185 memcpy( &cell->poly_stipple, stipple, sizeof(*stipple) );
186 cell->dirty |= CELL_NEW_STIPPLE;
187}
188
189
190
191static void *
192cell_create_rasterizer_state(struct pipe_context *pipe,
193 const struct pipe_rasterizer_state *rasterizer)
194{
195 return mem_dup(rasterizer, sizeof(*rasterizer));
196}
197
198
199static void
200cell_bind_rasterizer_state(struct pipe_context *pipe, void *rast)
201{
202 struct pipe_rasterizer_state *rasterizer =
203 (struct pipe_rasterizer_state *) rast;
204 struct cell_context *cell = cell_context(pipe);
205
206 /* pass-through to draw module */
207 draw_set_rasterizer_state(cell->draw, rasterizer, rast);
208
209 cell->rasterizer = rasterizer;
210
211 cell->dirty |= CELL_NEW_RASTERIZER;
212}
213
214
215static void
216cell_delete_rasterizer_state(struct pipe_context *pipe, void *rasterizer)
217{
218 FREE(rasterizer);
219}
220
221
222
223static void *
224cell_create_sampler_state(struct pipe_context *pipe,
225 const struct pipe_sampler_state *sampler)
226{
227 return mem_dup(sampler, sizeof(*sampler));
228}
229
230
231static void
232cell_bind_sampler_states(struct pipe_context *pipe,
233 unsigned num, void **samplers)
234{
235 struct cell_context *cell = cell_context(pipe);
236 uint i, changed = 0x0;
237
238 assert(num <= CELL_MAX_SAMPLERS);
239
240 draw_flush(cell->draw);
241
242 for (i = 0; i < CELL_MAX_SAMPLERS; i++) {
243 struct pipe_sampler_state *new_samp = i < num ? samplers[i] : NULL;
244 if (cell->sampler[i] != new_samp) {
245 cell->sampler[i] = new_samp;
246 changed |= (1 << i);
247 }
248 }
249
250 if (changed) {
251 cell->dirty |= CELL_NEW_SAMPLER;
252 cell->dirty_samplers |= changed;
253 }
254}
255
256
257static void
258cell_delete_sampler_state(struct pipe_context *pipe,
259 void *sampler)
260{
261 FREE( sampler );
262}
263
264
265
266static void
267cell_set_fragment_sampler_views(struct pipe_context *pipe,
268 unsigned num,
269 struct pipe_sampler_view **views)
270{
271 struct cell_context *cell = cell_context(pipe);
272 uint i, changed = 0x0;
273
274 assert(num <= CELL_MAX_SAMPLERS);
275
276 for (i = 0; i < CELL_MAX_SAMPLERS; i++) {
277 struct pipe_sampler_view *new_view = i < num ? views[i] : NULL;
278 struct pipe_sampler_view *old_view = cell->fragment_sampler_views[i];
279
280 if (old_view != new_view) {
281 struct pipe_resource *new_tex = new_view ? new_view->texture : NULL;
282
283 pipe_sampler_view_reference(&cell->fragment_sampler_views[i],
284 new_view);
285 pipe_resource_reference((struct pipe_resource **) &cell->texture[i],
286 (struct pipe_resource *) new_tex);
287
288 changed |= (1 << i);
289 }
290 }
291
292 cell->num_textures = num;
293
294 if (changed) {
295 cell->dirty |= CELL_NEW_TEXTURE;
296 cell->dirty_textures |= changed;
297 }
298}
299
300
301static struct pipe_sampler_view *
302cell_create_sampler_view(struct pipe_context *pipe,
303 struct pipe_resource *texture,
304 const struct pipe_sampler_view *templ)
305{
306 struct pipe_sampler_view *view = CALLOC_STRUCT(pipe_sampler_view);
307
308 if (view) {
309 *view = *templ;
310 view->reference.count = 1;
311 view->texture = NULL;
312 pipe_resource_reference(&view->texture, texture);
313 view->context = pipe;
314 }
315
316 return view;
317}
318
319
320static void
321cell_sampler_view_destroy(struct pipe_context *pipe,
322 struct pipe_sampler_view *view)
323{
324 pipe_resource_reference(&view->texture, NULL);
325 FREE(view);
326}
327
328
329/**
330 * Map color and z/stencil framebuffer surfaces.
331 */
332static void
333cell_map_surfaces(struct cell_context *cell)
334{
335#if 0
336 struct pipe_screen *screen = cell->pipe.screen;
337#endif
338 uint i;
339
340 for (i = 0; i < 1; i++) {
341 struct pipe_surface *ps = cell->framebuffer.cbufs[i];
342 if (ps) {
343 struct cell_resource *ct = cell_resource(ps->texture);
344#if 0
345 cell->cbuf_map[i] = screen->buffer_map(screen,
346 ct->buffer,
347 (PIPE_BUFFER_USAGE_GPU_READ |
348 PIPE_BUFFER_USAGE_GPU_WRITE));
349#else
350 cell->cbuf_map[i] = ct->data;
351#endif
352 }
353 }
354
355 {
356 struct pipe_surface *ps = cell->framebuffer.zsbuf;
357 if (ps) {
358 struct cell_resource *ct = cell_resource(ps->texture);
359#if 0
360 cell->zsbuf_map = screen->buffer_map(screen,
361 ct->buffer,
362 (PIPE_BUFFER_USAGE_GPU_READ |
363 PIPE_BUFFER_USAGE_GPU_WRITE));
364#else
365 cell->zsbuf_map = ct->data;
366#endif
367 }
368 }
369}
370
371
372/**
373 * Unmap color and z/stencil framebuffer surfaces.
374 */
375static void
376cell_unmap_surfaces(struct cell_context *cell)
377{
378 /*struct pipe_screen *screen = cell->pipe.screen;*/
379 uint i;
380
381 for (i = 0; i < PIPE_MAX_COLOR_BUFS; i++) {
382 struct pipe_surface *ps = cell->framebuffer.cbufs[i];
383 if (ps && cell->cbuf_map[i]) {
384 /*struct cell_resource *ct = cell_resource(ps->texture);*/
385 assert(ps->texture);
386 /*assert(ct->buffer);*/
387
388 /*screen->buffer_unmap(screen, ct->buffer);*/
389 cell->cbuf_map[i] = NULL;
390 }
391 }
392
393 {
394 struct pipe_surface *ps = cell->framebuffer.zsbuf;
395 if (ps && cell->zsbuf_map) {
396 /*struct cell_resource *ct = cell_resource(ps->texture);*/
397 /*screen->buffer_unmap(screen, ct->buffer);*/
398 cell->zsbuf_map = NULL;
399 }
400 }
401}
402
403
404static void
405cell_set_framebuffer_state(struct pipe_context *pipe,
406 const struct pipe_framebuffer_state *fb)
407{
408 struct cell_context *cell = cell_context(pipe);
409
410 if (1 /*memcmp(&cell->framebuffer, fb, sizeof(*fb))*/) {
411 uint i;
412
413 /* unmap old surfaces */
414 cell_unmap_surfaces(cell);
415
416 /* Finish any pending rendering to the current surface before
417 * installing a new surface!
418 */
419 cell_flush_int(cell, CELL_FLUSH_WAIT);
420
421 /* update my state
422 * (this is also where old surfaces will finally get freed)
423 */
424 cell->framebuffer.width = fb->width;
425 cell->framebuffer.height = fb->height;
426 cell->framebuffer.nr_cbufs = fb->nr_cbufs;
427 for (i = 0; i < PIPE_MAX_COLOR_BUFS; i++) {
428 pipe_surface_reference(&cell->framebuffer.cbufs[i], fb->cbufs[i]);
429 }
430 pipe_surface_reference(&cell->framebuffer.zsbuf, fb->zsbuf);
431
432 /* map new surfaces */
433 cell_map_surfaces(cell);
434
435 cell->dirty |= CELL_NEW_FRAMEBUFFER;
436 }
437}
438
439
440void
441cell_init_state_functions(struct cell_context *cell)
442{
443 cell->pipe.create_blend_state = cell_create_blend_state;
444 cell->pipe.bind_blend_state = cell_bind_blend_state;
445 cell->pipe.delete_blend_state = cell_delete_blend_state;
446
447 cell->pipe.create_sampler_state = cell_create_sampler_state;
448 cell->pipe.bind_fragment_sampler_states = cell_bind_sampler_states;
449 cell->pipe.delete_sampler_state = cell_delete_sampler_state;
450
451 cell->pipe.set_fragment_sampler_views = cell_set_fragment_sampler_views;
452 cell->pipe.create_sampler_view = cell_create_sampler_view;
453 cell->pipe.sampler_view_destroy = cell_sampler_view_destroy;
454
455 cell->pipe.create_depth_stencil_alpha_state = cell_create_depth_stencil_alpha_state;
456 cell->pipe.bind_depth_stencil_alpha_state = cell_bind_depth_stencil_alpha_state;
457 cell->pipe.delete_depth_stencil_alpha_state = cell_delete_depth_stencil_alpha_state;
458
459 cell->pipe.create_rasterizer_state = cell_create_rasterizer_state;
460 cell->pipe.bind_rasterizer_state = cell_bind_rasterizer_state;
461 cell->pipe.delete_rasterizer_state = cell_delete_rasterizer_state;
462
463 cell->pipe.set_blend_color = cell_set_blend_color;
464 cell->pipe.set_stencil_ref = cell_set_stencil_ref;
465 cell->pipe.set_clip_state = cell_set_clip_state;
466 cell->pipe.set_sample_mask = cell_set_sample_mask;
467
468 cell->pipe.set_framebuffer_state = cell_set_framebuffer_state;
469
470 cell->pipe.set_polygon_stipple = cell_set_polygon_stipple;
471 cell->pipe.set_scissor_state = cell_set_scissor_state;
472 cell->pipe.set_viewport_state = cell_set_viewport_state;
473}
diff --git a/src/gallium/drivers/cell/ppu/cell_pipe_state.h b/src/gallium/drivers/cell/ppu/cell_pipe_state.h
deleted file mode 100644
index 1889bd52ff5..00000000000
--- a/src/gallium/drivers/cell/ppu/cell_pipe_state.h
+++ /dev/null
@@ -1,39 +0,0 @@
1/**************************************************************************
2 *
3 * Copyright 2008 Tungsten Graphics, Inc., Cedar Park, Texas.
4 * All Rights Reserved.
5 *
6 * Permission is hereby granted, free of charge, to any person obtaining a
7 * copy of this software and associated documentation files (the
8 * "Software"), to deal in the Software without restriction, including
9 * without limitation the rights to use, copy, modify, merge, publish,
10 * distribute, sub license, and/or sell copies of the Software, and to
11 * permit persons to whom the Software is furnished to do so, subject to
12 * the following conditions:
13 *
14 * The above copyright notice and this permission notice (including the
15 * next paragraph) shall be included in all copies or substantial portions
16 * of the Software.
17 *
18 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
19 * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
20 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
21 * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
22 * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
23 * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
24 * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
25 *
26 **************************************************************************/
27
28
29#ifndef CELL_PIPE_STATE_H
30#define CELL_PIPE_STATE_H
31
32
33struct cell_context;
34
35extern void
36cell_init_state_functions(struct cell_context *cell);
37
38
39#endif /* CELL_PIPE_STATE_H */
diff --git a/src/gallium/drivers/cell/ppu/cell_public.h b/src/gallium/drivers/cell/ppu/cell_public.h
deleted file mode 100644
index 7e2e093565d..00000000000
--- a/src/gallium/drivers/cell/ppu/cell_public.h
+++ /dev/null
@@ -1,10 +0,0 @@
1#ifndef CELL_PUBLIC_H
2#define CELL_PUBLIC_H
3
4struct pipe_screen;
5struct sw_winsys;
6
7struct pipe_screen *
8cell_create_screen(struct sw_winsys *winsys);
9
10#endif
diff --git a/src/gallium/drivers/cell/ppu/cell_render.c b/src/gallium/drivers/cell/ppu/cell_render.c
deleted file mode 100644
index f648482c551..00000000000
--- a/src/gallium/drivers/cell/ppu/cell_render.c
+++ /dev/null
@@ -1,211 +0,0 @@
1/**************************************************************************
2 *
3 * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas.
4 * All Rights Reserved.
5 *
6 * Permission is hereby granted, free of charge, to any person obtaining a
7 * copy of this software and associated documentation files (the
8 * "Software"), to deal in the Software without restriction, including
9 * without limitation the rights to use, copy, modify, merge, publish,
10 * distribute, sub license, and/or sell copies of the Software, and to
11 * permit persons to whom the Software is furnished to do so, subject to
12 * the following conditions:
13 *
14 * The above copyright notice and this permission notice (including the
15 * next paragraph) shall be included in all copies or substantial portions
16 * of the Software.
17 *
18 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
19 * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
20 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
21 * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
22 * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
23 * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
24 * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
25 *
26 **************************************************************************/
27
28/**
29 * \brief Last stage of 'draw' pipeline: send tris to SPUs.
30 * \author Brian Paul
31 */
32
33#include "cell_context.h"
34#include "cell_render.h"
35#include "cell_spu.h"
36#include "util/u_memory.h"
37#include "draw/draw_private.h"
38
39
40struct render_stage {
41 struct draw_stage stage; /**< This must be first (base class) */
42
43 struct cell_context *cell;
44};
45
46
47static INLINE struct render_stage *
48render_stage(struct draw_stage *stage)
49{
50 return (struct render_stage *) stage;
51}
52
53
54static void render_begin( struct draw_stage *stage )
55{
56#if 0
57 struct render_stage *render = render_stage(stage);
58 struct cell_context *sp = render->cell;
59 const struct pipe_shader_state *fs = &render->cell->fs->shader;
60 render->quad.nr_attrs = render->cell->nr_frag_attrs;
61
62 render->firstFpInput = fs->input_semantic_name[0];
63
64 sp->quad.first->begin(sp->quad.first);
65#endif
66}
67
68
69static void render_end( struct draw_stage *stage )
70{
71}
72
73
74static void reset_stipple_counter( struct draw_stage *stage )
75{
76 struct render_stage *render = render_stage(stage);
77 /*render->cell->line_stipple_counter = 0;*/
78}
79
80
81static void
82render_point(struct draw_stage *stage, struct prim_header *prim)
83{
84}
85
86
87static void
88render_line(struct draw_stage *stage, struct prim_header *prim)
89{
90}
91
92
93/** Write a vertex into the prim buffer */
94static void
95save_vertex(struct cell_prim_buffer *buf, uint pos,
96 const struct vertex_header *vert)
97{
98 uint attr, j;
99
100 for (attr = 0; attr < 2; attr++) {
101 for (j = 0; j < 4; j++) {
102 buf->vertex[pos][attr][j] = vert->data[attr][j];
103 }
104 }
105
106 /* update bounding box */
107 if (vert->data[0][0] < buf->xmin)
108 buf->xmin = vert->data[0][0];
109 if (vert->data[0][0] > buf->xmax)
110 buf->xmax = vert->data[0][0];
111 if (vert->data[0][1] < buf->ymin)
112 buf->ymin = vert->data[0][1];
113 if (vert->data[0][1] > buf->ymax)
114 buf->ymax = vert->data[0][1];
115}
116
117
118static void
119render_tri(struct draw_stage *stage, struct prim_header *prim)
120{
121 struct render_stage *rs = render_stage(stage);
122 struct cell_context *cell = rs->cell;
123 struct cell_prim_buffer *buf = &cell->prim_buffer;
124 uint i;
125
126 if (buf->num_verts + 3 > CELL_MAX_VERTS) {
127 cell_flush_prim_buffer(cell);
128 }
129
130 i = buf->num_verts;
131 assert(i+2 <= CELL_MAX_VERTS);
132 save_vertex(buf, i+0, prim->v[0]);
133 save_vertex(buf, i+1, prim->v[1]);
134 save_vertex(buf, i+2, prim->v[2]);
135 buf->num_verts += 3;
136}
137
138
139/**
140 * Send the a RENDER command to all SPUs to have them render the prims
141 * in the current prim_buffer.
142 */
143void
144cell_flush_prim_buffer(struct cell_context *cell)
145{
146 uint i;
147
148 if (cell->prim_buffer.num_verts == 0)
149 return;
150
151 for (i = 0; i < cell->num_spus; i++) {
152 struct cell_command_render *render = &cell_global.command[i].render;
153 render->prim_type = PIPE_PRIM_TRIANGLES;
154 render->num_verts = cell->prim_buffer.num_verts;
155 render->front_ccw = cell->rasterizer->front_ccw;
156 render->vertex_size = cell->vertex_info->size * 4;
157 render->xmin = cell->prim_buffer.xmin;
158 render->ymin = cell->prim_buffer.ymin;
159 render->xmax = cell->prim_buffer.xmax;
160 render->ymax = cell->prim_buffer.ymax;
161 render->vertex_data = &cell->prim_buffer.vertex;
162 ASSERT_ALIGN16(render->vertex_data);
163 send_mbox_message(cell_global.spe_contexts[i], CELL_CMD_RENDER);
164 }
165
166 cell->prim_buffer.num_verts = 0;
167
168 cell->prim_buffer.xmin = 1e100;
169 cell->prim_buffer.ymin = 1e100;
170 cell->prim_buffer.xmax = -1e100;
171 cell->prim_buffer.ymax = -1e100;
172
173 /* XXX temporary, need to double-buffer the prim buffer until we get
174 * a real command buffer/list system.
175 */
176 cell_flush(&cell->pipe, 0x0);
177}
178
179
180
181static void render_destroy( struct draw_stage *stage )
182{
183 FREE( stage );
184}
185
186
187/**
188 * Create a new draw/render stage. This will be plugged into the
189 * draw module as the last pipeline stage.
190 */
191struct draw_stage *cell_draw_render_stage( struct cell_context *cell )
192{
193 struct render_stage *render = CALLOC_STRUCT(render_stage);
194
195 render->cell = cell;
196 render->stage.draw = cell->draw;
197 render->stage.begin = render_begin;
198 render->stage.point = render_point;
199 render->stage.line = render_line;
200 render->stage.tri = render_tri;
201 render->stage.end = render_end;
202 render->stage.reset_stipple_counter = reset_stipple_counter;
203 render->stage.destroy = render_destroy;
204
205 /*
206 render->quad.coef = render->coef;
207 render->quad.posCoef = &render->posCoef;
208 */
209
210 return &render->stage;
211}
diff --git a/src/gallium/drivers/cell/ppu/cell_render.h b/src/gallium/drivers/cell/ppu/cell_render.h
deleted file mode 100644
index 826dcbafeba..00000000000
--- a/src/gallium/drivers/cell/ppu/cell_render.h
+++ /dev/null
@@ -1,39 +0,0 @@
1/**************************************************************************
2 *
3 * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas.
4 * All Rights Reserved.
5 *
6 * Permission is hereby granted, free of charge, to any person obtaining a
7 * copy of this software and associated documentation files (the
8 * "Software"), to deal in the Software without restriction, including
9 * without limitation the rights to use, copy, modify, merge, publish,
10 * distribute, sub license, and/or sell copies of the Software, and to
11 * permit persons to whom the Software is furnished to do so, subject to
12 * the following conditions:
13 *
14 * The above copyright notice and this permission notice (including the
15 * next paragraph) shall be included in all copies or substantial portions
16 * of the Software.
17 *
18 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
19 * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
20 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
21 * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
22 * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
23 * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
24 * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
25 *
26 **************************************************************************/
27
28#ifndef CELL_RENDER_H
29#define CELL_RENDER_H
30
31struct cell_context;
32struct draw_stage;
33
34extern void
35cell_flush_prim_buffer(struct cell_context *cell);
36
37extern struct draw_stage *cell_draw_render_stage( struct cell_context *cell );
38
39#endif /* CELL_RENDER_H */
diff --git a/src/gallium/drivers/cell/ppu/cell_screen.c b/src/gallium/drivers/cell/ppu/cell_screen.c
deleted file mode 100644
index 7ffdcc51bbd..00000000000
--- a/src/gallium/drivers/cell/ppu/cell_screen.c
+++ /dev/null
@@ -1,221 +0,0 @@
1/**************************************************************************
2 *
3 * Copyright 2008 Tungsten Graphics, Inc., Cedar Park, Texas.
4 * All Rights Reserved.
5 *
6 * Permission is hereby granted, free of charge, to any person obtaining a
7 * copy of this software and associated documentation files (the
8 * "Software"), to deal in the Software without restriction, including
9 * without limitation the rights to use, copy, modify, merge, publish,
10 * distribute, sub license, and/or sell copies of the Software, and to
11 * permit persons to whom the Software is furnished to do so, subject to
12 * the following conditions:
13 *
14 * The above copyright notice and this permission notice (including the
15 * next paragraph) shall be included in all copies or substantial portions
16 * of the Software.
17 *
18 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
19 * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
20 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
21 * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
22 * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
23 * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
24 * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
25 *
26 **************************************************************************/
27
28
29#include "util/u_memory.h"
30#include "util/u_simple_screen.h"
31#include "pipe/p_defines.h"
32#include "pipe/p_screen.h"
33
34#include "cell/common.h"
35#include "cell_context.h"
36#include "cell_screen.h"
37#include "cell_texture.h"
38#include "cell_public.h"
39
40#include "state_tracker/sw_winsys.h"
41
42
43static const char *
44cell_get_vendor(struct pipe_screen *screen)
45{
46 return "VMware, Inc.";
47}
48
49
50static const char *
51cell_get_name(struct pipe_screen *screen)
52{
53 return "Cell";
54}
55
56
57static int
58cell_get_param(struct pipe_screen *screen, enum pipe_cap param)
59{
60 switch (param) {
61 case PIPE_CAP_MAX_COMBINED_SAMPLERS:
62 return CELL_MAX_SAMPLERS;
63 case PIPE_CAP_NPOT_TEXTURES:
64 return 1;
65 case PIPE_CAP_TWO_SIDED_STENCIL:
66 return 1;
67 case PIPE_CAP_ANISOTROPIC_FILTER:
68 return 0;
69 case PIPE_CAP_POINT_SPRITE:
70 return 1;
71 case PIPE_CAP_MAX_RENDER_TARGETS:
72 return 1;
73 case PIPE_CAP_OCCLUSION_QUERY:
74 return 1;
75 case PIPE_CAP_TIMER_QUERY:
76 return 0;
77 case PIPE_CAP_TEXTURE_SHADOW_MAP:
78 return 10;
79 case PIPE_CAP_MAX_TEXTURE_2D_LEVELS:
80 return CELL_MAX_TEXTURE_LEVELS;
81 case PIPE_CAP_MAX_TEXTURE_3D_LEVELS:
82 return 8; /* max 128x128x128 */
83 case PIPE_CAP_MAX_TEXTURE_CUBE_LEVELS:
84 return CELL_MAX_TEXTURE_LEVELS;
85 case PIPE_CAP_TEXTURE_MIRROR_CLAMP:
86 return 0; /* XXX to do */
87 case PIPE_CAP_TGSI_FS_COORD_ORIGIN_UPPER_LEFT:
88 case PIPE_CAP_TGSI_FS_COORD_PIXEL_CENTER_HALF_INTEGER:
89 return 1;
90 case PIPE_CAP_TGSI_FS_COORD_ORIGIN_LOWER_LEFT:
91 case PIPE_CAP_TGSI_FS_COORD_PIXEL_CENTER_INTEGER:
92 return 0;
93 case PIPE_CAP_BLEND_EQUATION_SEPARATE:
94 return 1;
95 default:
96 return 0;
97 }
98}
99
100static int
101cell_get_shader_param(struct pipe_screen *screen, unsigned shader, enum pipe_shader_cap param)
102{
103 switch(shader)
104 {
105 case PIPE_SHADER_FRAGMENT:
106 switch (param) {
107 case PIPE_SHADER_CAP_MAX_TEXTURE_SAMPLERS:
108 return CELL_MAX_SAMPLERS;
109 default:
110 return tgsi_exec_get_shader_param(param);
111 }
112 case PIPE_SHADER_VERTEX:
113 case PIPE_SHADER_GEOMETRY:
114 return draw_get_shader_param(shader, param);
115 default:
116 return 0;
117 }
118}
119
120static float
121cell_get_paramf(struct pipe_screen *screen, enum pipe_capf param)
122{
123 switch (param) {
124 case PIPE_CAPF_MAX_LINE_WIDTH:
125 /* fall-through */
126 case PIPE_CAPF_MAX_LINE_WIDTH_AA:
127 return 255.0; /* arbitrary */
128
129 case PIPE_CAPF_MAX_POINT_WIDTH:
130 /* fall-through */
131 case PIPE_CAPF_MAX_POINT_WIDTH_AA:
132 return 255.0; /* arbitrary */
133
134 case PIPE_CAPF_MAX_TEXTURE_ANISOTROPY:
135 return 0.0;
136
137 case PIPE_CAPF_MAX_TEXTURE_LOD_BIAS:
138 return 16.0; /* arbitrary */
139
140 default:
141 return 0;
142 }
143}
144
145
146static boolean
147cell_is_format_supported( struct pipe_screen *screen,
148 enum pipe_format format,
149 enum pipe_texture_target target,
150 unsigned sample_count,
151 unsigned tex_usage)
152{
153 struct sw_winsys *winsys = cell_screen(screen)->winsys;
154
155 if (sample_count > 1)
156 return FALSE;
157
158 if (tex_usage & (PIPE_BIND_DISPLAY_TARGET |
159 PIPE_BIND_SCANOUT |
160 PIPE_BIND_SHARED)) {
161 if (!winsys->is_displaytarget_format_supported(winsys, tex_usage, format))
162 return FALSE;
163 }
164
165 /* only a few formats are known to work at this time */
166 switch (format) {
167 case PIPE_FORMAT_Z24_UNORM_S8_UINT:
168 case PIPE_FORMAT_Z24X8_UNORM:
169 case PIPE_FORMAT_B8G8R8A8_UNORM:
170 case PIPE_FORMAT_I8_UNORM:
171 return TRUE;
172 default:
173 return FALSE;
174 }
175}
176
177
178static void
179cell_destroy_screen( struct pipe_screen *screen )
180{
181 struct cell_screen *sp_screen = cell_screen(screen);
182 struct sw_winsys *winsys = sp_screen->winsys;
183
184 if(winsys->destroy)
185 winsys->destroy(winsys);
186
187 FREE(screen);
188}
189
190
191
192/**
193 * Create a new pipe_screen object
194 * Note: we're not presently subclassing pipe_screen (no cell_screen) but
195 * that would be the place to put SPU thread/context info...
196 */
197struct pipe_screen *
198cell_create_screen(struct sw_winsys *winsys)
199{
200 struct cell_screen *screen = CALLOC_STRUCT(cell_screen);
201
202 if (!screen)
203 return NULL;
204
205 screen->winsys = winsys;
206 screen->base.winsys = NULL;
207
208 screen->base.destroy = cell_destroy_screen;
209
210 screen->base.get_name = cell_get_name;
211 screen->base.get_vendor = cell_get_vendor;
212 screen->base.get_param = cell_get_param;
213 screen->base.get_shader_param = cell_get_shader_param;
214 screen->base.get_paramf = cell_get_paramf;
215 screen->base.is_format_supported = cell_is_format_supported;
216 screen->base.context_create = cell_create_context;
217
218 cell_init_screen_texture_funcs(&screen->base);
219
220 return &screen->base;
221}
diff --git a/src/gallium/drivers/cell/ppu/cell_screen.h b/src/gallium/drivers/cell/ppu/cell_screen.h
deleted file mode 100644
index baff9d3b7d4..00000000000
--- a/src/gallium/drivers/cell/ppu/cell_screen.h
+++ /dev/null
@@ -1,55 +0,0 @@
1/**************************************************************************
2 *
3 * Copyright 2008 Tungsten Graphics, Inc., Cedar Park, Texas.
4 * All Rights Reserved.
5 *
6 * Permission is hereby granted, free of charge, to any person obtaining a
7 * copy of this software and associated documentation files (the
8 * "Software"), to deal in the Software without restriction, including
9 * without limitation the rights to use, copy, modify, merge, publish,
10 * distribute, sub license, and/or sell copies of the Software, and to
11 * permit persons to whom the Software is furnished to do so, subject to
12 * the following conditions:
13 *
14 * The above copyright notice and this permission notice (including the
15 * next paragraph) shall be included in all copies or substantial portions
16 * of the Software.
17 *
18 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
19 * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
20 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
21 * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
22 * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
23 * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
24 * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
25 *
26 **************************************************************************/
27
28
29#ifndef CELL_SCREEN_H
30#define CELL_SCREEN_H
31
32
33#include "pipe/p_screen.h"
34
35struct sw_winsys;
36
37struct cell_screen {
38 struct pipe_screen base;
39
40 struct sw_winsys *winsys;
41
42 /* Increments whenever textures are modified. Contexts can track
43 * this.
44 */
45 unsigned timestamp;
46};
47
48static INLINE struct cell_screen *
49cell_screen( struct pipe_screen *pipe )
50{
51 return (struct cell_screen *)pipe;
52}
53
54
55#endif /* CELL_SCREEN_H */
diff --git a/src/gallium/drivers/cell/ppu/cell_spu.c b/src/gallium/drivers/cell/ppu/cell_spu.c
deleted file mode 100644
index 39284f3a5d1..00000000000
--- a/src/gallium/drivers/cell/ppu/cell_spu.c
+++ /dev/null
@@ -1,219 +0,0 @@
1/**************************************************************************
2 *
3 * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas.
4 * All Rights Reserved.
5 *
6 * Permission is hereby granted, free of charge, to any person obtaining a
7 * copy of this software and associated documentation files (the
8 * "Software"), to deal in the Software without restriction, including
9 * without limitation the rights to use, copy, modify, merge, publish,
10 * distribute, sub license, and/or sell copies of the Software, and to
11 * permit persons to whom the Software is furnished to do so, subject to
12 * the following conditions:
13 *
14 * The above copyright notice and this permission notice (including the
15 * next paragraph) shall be included in all copies or substantial portions
16 * of the Software.
17 *
18 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
19 * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
20 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
21 * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
22 * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
23 * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
24 * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
25 *
26 **************************************************************************/
27
28
29/**
30 * Utility/wrappers for communicating with the SPUs.
31 */
32
33
34#include <pthread.h>
35
36#include "cell_spu.h"
37#include "pipe/p_format.h"
38#include "pipe/p_state.h"
39#include "util/u_memory.h"
40#include "cell/common.h"
41
42
43/*
44helpful headers:
45/opt/ibm/cell-sdk/prototype/src/include/ppu/cbe_mfc.h
46*/
47
48
49/**
50 * Cell/SPU info that's not per-context.
51 */
52struct cell_global_info cell_global;
53
54
55/**
56 * Scan /proc/cpuinfo to determine the timebase for the system.
57 * This is used by the SPUs to convert 'decrementer' ticks to seconds.
58 * There may be a better way to get this value...
59 */
60static unsigned
61get_timebase(void)
62{
63 FILE *f = fopen("/proc/cpuinfo", "r");
64 unsigned timebase;
65
66 assert(f);
67 while (!feof(f)) {
68 char line[80];
69 fgets(line, sizeof(line), f);
70 if (strncmp(line, "timebase", 8) == 0) {
71 char *colon = strchr(line, ':');
72 if (colon) {
73 timebase = atoi(colon + 2);
74 break;
75 }
76 }
77 }
78 fclose(f);
79
80 return timebase;
81}
82
83
84/**
85 * Write a 1-word message to the given SPE mailbox.
86 */
87void
88send_mbox_message(spe_context_ptr_t ctx, unsigned int msg)
89{
90 spe_in_mbox_write(ctx, &msg, 1, SPE_MBOX_ALL_BLOCKING);
91}
92
93
94/**
95 * Wait for a 1-word message to arrive in given mailbox.
96 */
97uint
98wait_mbox_message(spe_context_ptr_t ctx)
99{
100 do {
101 unsigned data;
102 int count = spe_out_mbox_read(ctx, &data, 1);
103
104 if (count == 1) {
105 return data;
106 }
107
108 if (count < 0) {
109 /* error */ ;
110 }
111 } while (1);
112}
113
114
115/**
116 * Called by pthread_create() to spawn an SPU thread.
117 */
118static void *
119cell_thread_function(void *arg)
120{
121 struct cell_init_info *init = (struct cell_init_info *) arg;
122 unsigned entry = SPE_DEFAULT_ENTRY;
123
124 ASSERT_ALIGN16(init);
125
126 if (spe_context_run(cell_global.spe_contexts[init->id], &entry, 0,
127 init, NULL, NULL) < 0) {
128 fprintf(stderr, "spe_context_run() failed\n");
129 exit(1);
130 }
131
132 pthread_exit(NULL);
133}
134
135
136/**
137 * Create the SPU threads. This is done once during driver initialization.
138 * This involves setting the "init" message which is sent to each SPU.
139 * The init message specifies an SPU id, total number of SPUs, location
140 * and number of batch buffers, etc.
141 */
142void
143cell_start_spus(struct cell_context *cell)
144{
145 static boolean one_time_init = FALSE;
146 uint i, j;
147 uint timebase = get_timebase();
148
149 if (one_time_init) {
150 fprintf(stderr, "PPU: Multiple rendering contexts not yet supported "
151 "on Cell.\n");
152 abort();
153 }
154
155 one_time_init = TRUE;
156
157 assert(cell->num_spus <= CELL_MAX_SPUS);
158
159 ASSERT_ALIGN16(&cell_global.inits[0]);
160 ASSERT_ALIGN16(&cell_global.inits[1]);
161
162 /*
163 * Initialize the global 'inits' structure for each SPU.
164 * A pointer to the init struct will be passed to each SPU.
165 * The SPUs will then each grab their init info with mfc_get().
166 */
167 for (i = 0; i < cell->num_spus; i++) {
168 cell_global.inits[i].id = i;
169 cell_global.inits[i].num_spus = cell->num_spus;
170 cell_global.inits[i].debug_flags = cell->debug_flags;
171 cell_global.inits[i].inv_timebase = 1000.0f / timebase;
172
173 for (j = 0; j < CELL_NUM_BUFFERS; j++) {
174 cell_global.inits[i].buffers[j] = cell->buffer[j];
175 }
176 cell_global.inits[i].buffer_status = &cell->buffer_status[0][0][0];
177
178 cell_global.inits[i].spu_functions = &cell->spu_functions;
179
180 cell_global.spe_contexts[i] = spe_context_create(0, NULL);
181 if (!cell_global.spe_contexts[i]) {
182 fprintf(stderr, "spe_context_create() failed\n");
183 exit(1);
184 }
185
186 if (spe_program_load(cell_global.spe_contexts[i], &g3d_spu)) {
187 fprintf(stderr, "spe_program_load() failed\n");
188 exit(1);
189 }
190
191 pthread_create(&cell_global.spe_threads[i], /* returned thread handle */
192 NULL, /* pthread attribs */
193 &cell_thread_function, /* start routine */
194 &cell_global.inits[i]); /* thread argument */
195 }
196}
197
198
199/**
200 * Tell all the SPUs to stop/exit.
201 * This is done when the driver's exiting / cleaning up.
202 */
203void
204cell_spu_exit(struct cell_context *cell)
205{
206 uint i;
207
208 for (i = 0; i < cell->num_spus; i++) {
209 send_mbox_message(cell_global.spe_contexts[i], CELL_CMD_EXIT);
210 }
211
212 /* wait for threads to exit */
213 for (i = 0; i < cell->num_spus; i++) {
214 void *value;
215 pthread_join(cell_global.spe_threads[i], &value);
216 cell_global.spe_threads[i] = 0;
217 cell_global.spe_contexts[i] = 0;
218 }
219}
diff --git a/src/gallium/drivers/cell/ppu/cell_spu.h b/src/gallium/drivers/cell/ppu/cell_spu.h
deleted file mode 100644
index c93958a9ed5..00000000000
--- a/src/gallium/drivers/cell/ppu/cell_spu.h
+++ /dev/null
@@ -1,79 +0,0 @@
1/**************************************************************************
2 *
3 * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas.
4 * All Rights Reserved.
5 *
6 * Permission is hereby granted, free of charge, to any person obtaining a
7 * copy of this software and associated documentation files (the
8 * "Software"), to deal in the Software without restriction, including
9 * without limitation the rights to use, copy, modify, merge, publish,
10 * distribute, sub license, and/or sell copies of the Software, and to
11 * permit persons to whom the Software is furnished to do so, subject to
12 * the following conditions:
13 *
14 * The above copyright notice and this permission notice (including the
15 * next paragraph) shall be included in all copies or substantial portions
16 * of the Software.
17 *
18 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
19 * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
20 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
21 * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
22 * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
23 * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
24 * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
25 *
26 **************************************************************************/
27
28#ifndef CELL_SPU
29#define CELL_SPU
30
31
32#include <libspe2.h>
33#include <pthread.h>
34#include "cell/common.h"
35
36#include "cell_context.h"
37
38
39/**
40 * Global vars, for now anyway.
41 */
42struct cell_global_info
43{
44 /**
45 * SPU/SPE handles, etc
46 */
47 spe_context_ptr_t spe_contexts[CELL_MAX_SPUS];
48 pthread_t spe_threads[CELL_MAX_SPUS];
49
50 /**
51 * Data sent to SPUs at start-up
52 */
53 struct cell_init_info inits[CELL_MAX_SPUS];
54};
55
56
57extern struct cell_global_info cell_global;
58
59
60/** This is the handle for the actual SPE code */
61extern spe_program_handle_t g3d_spu;
62
63
64extern void
65send_mbox_message(spe_context_ptr_t ctx, unsigned int msg);
66
67extern uint
68wait_mbox_message(spe_context_ptr_t ctx);
69
70
71extern void
72cell_start_spus(struct cell_context *cell);
73
74
75extern void
76cell_spu_exit(struct cell_context *cell);
77
78
79#endif /* CELL_SPU */
diff --git a/src/gallium/drivers/cell/ppu/cell_state.h b/src/gallium/drivers/cell/ppu/cell_state.h
deleted file mode 100644
index 7adedcde57c..00000000000
--- a/src/gallium/drivers/cell/ppu/cell_state.h
+++ /dev/null
@@ -1,65 +0,0 @@
1/**************************************************************************
2 *
3 * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas.
4 * All Rights Reserved.
5 *
6 * Permission is hereby granted, free of charge, to any person obtaining a
7 * copy of this software and associated documentation files (the
8 * "Software"), to deal in the Software without restriction, including
9 * without limitation the rights to use, copy, modify, merge, publish,
10 * distribute, sub license, and/or sell copies of the Software, and to
11 * permit persons to whom the Software is furnished to do so, subject to
12 * the following conditions:
13 *
14 * The above copyright notice and this permission notice (including the
15 * next paragraph) shall be included in all copies or substantial portions
16 * of the Software.
17 *
18 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
19 * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
20 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
21 * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
22 * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
23 * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
24 * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
25 *
26 **************************************************************************/
27
28
29#ifndef CELL_STATE_H
30#define CELL_STATE_H
31
32
33#define CELL_NEW_VIEWPORT 0x1
34#define CELL_NEW_RASTERIZER 0x2
35#define CELL_NEW_FS 0x4
36#define CELL_NEW_BLEND 0x8
37#define CELL_NEW_CLIP 0x10
38#define CELL_NEW_SCISSOR 0x20
39#define CELL_NEW_STIPPLE 0x40
40#define CELL_NEW_FRAMEBUFFER 0x80
41#define CELL_NEW_ALPHA_TEST 0x100
42#define CELL_NEW_DEPTH_STENCIL 0x200
43#define CELL_NEW_SAMPLER 0x400
44#define CELL_NEW_TEXTURE 0x800
45#define CELL_NEW_VERTEX 0x1000
46#define CELL_NEW_VS 0x2000
47#define CELL_NEW_VS_CONSTANTS 0x4000
48#define CELL_NEW_FS_CONSTANTS 0x8000
49#define CELL_NEW_VERTEX_INFO 0x10000
50
51
52extern void
53cell_update_derived( struct cell_context *cell );
54
55
56extern void
57cell_init_shader_functions(struct cell_context *cell);
58
59
60extern void
61cell_init_vertex_functions(struct cell_context *cell);
62
63
64#endif /* CELL_STATE_H */
65
diff --git a/src/gallium/drivers/cell/ppu/cell_state_derived.c b/src/gallium/drivers/cell/ppu/cell_state_derived.c
deleted file mode 100644
index b723e794e71..00000000000
--- a/src/gallium/drivers/cell/ppu/cell_state_derived.c
+++ /dev/null
@@ -1,170 +0,0 @@
1/**************************************************************************
2 *
3 * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas.
4 * All Rights Reserved.
5 *
6 * Permission is hereby granted, free of charge, to any person obtaining a
7 * copy of this software and associated documentation files (the
8 * "Software"), to deal in the Software without restriction, including
9 * without limitation the rights to use, copy, modify, merge, publish,
10 * distribute, sub license, and/or sell copies of the Software, and to
11 * permit persons to whom the Software is furnished to do so, subject to
12 * the following conditions:
13 *
14 * The above copyright notice and this permission notice (including the
15 * next paragraph) shall be included in all copies or substantial portions
16 * of the Software.
17 *
18 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
19 * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
20 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
21 * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
22 * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
23 * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
24 * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
25 *
26 **************************************************************************/
27
28#include "util/u_memory.h"
29#include "pipe/p_shader_tokens.h"
30#include "draw/draw_context.h"
31#include "draw/draw_vertex.h"
32#include "cell_context.h"
33#include "cell_batch.h"
34#include "cell_state.h"
35#include "cell_state_emit.h"
36
37
38/**
39 * Determine how to map vertex program outputs to fragment program inputs.
40 * Basically, this will be used when computing the triangle interpolation
41 * coefficients from the post-transform vertex attributes.
42 */
43static void
44calculate_vertex_layout( struct cell_context *cell )
45{
46 const struct cell_fragment_shader_state *fs = cell->fs;
47 const enum interp_mode colorInterp
48 = cell->rasterizer->flatshade ? INTERP_CONSTANT : INTERP_LINEAR;
49 struct vertex_info *vinfo = &cell->vertex_info;
50 uint i;
51 int src;
52
53#if 0
54 if (cell->vbuf) {
55 /* if using the post-transform vertex buffer, tell draw_vbuf to
56 * simply emit the whole post-xform vertex as-is:
57 */
58 struct vertex_info *vinfo_vbuf = &cell->vertex_info_vbuf;
59 vinfo_vbuf->num_attribs = 0;
60 draw_emit_vertex_attr(vinfo_vbuf, EMIT_ALL, INTERP_NONE, 0);
61 vinfo_vbuf->size = 4 * vs->num_outputs + sizeof(struct vertex_header)/4;
62 }
63#endif
64
65 /* reset vinfo */
66 vinfo->num_attribs = 0;
67
68 /* we always want to emit vertex pos */
69 src = draw_find_shader_output(cell->draw, TGSI_SEMANTIC_POSITION, 0);
70 assert(src >= 0);
71 draw_emit_vertex_attr(vinfo, EMIT_4F, INTERP_POS, src);
72
73
74 /*
75 * Loop over fragment shader inputs, searching for the matching output
76 * from the vertex shader.
77 */
78 for (i = 0; i < fs->info.num_inputs; i++) {
79 switch (fs->info.input_semantic_name[i]) {
80 case TGSI_SEMANTIC_POSITION:
81 /* already done above */
82 break;
83
84 case TGSI_SEMANTIC_COLOR:
85 src = draw_find_shader_output(cell->draw, TGSI_SEMANTIC_COLOR,
86 fs->info.input_semantic_index[i]);
87 assert(src >= 0);
88 draw_emit_vertex_attr(vinfo, EMIT_4F, colorInterp, src);
89 break;
90
91 case TGSI_SEMANTIC_FOG:
92 src = draw_find_shader_output(cell->draw, TGSI_SEMANTIC_FOG, 0);
93#if 1
94 if (src < 0) /* XXX temp hack, try demos/fogcoord.c with this */
95 src = 0;
96#endif
97 assert(src >= 0);
98 draw_emit_vertex_attr(vinfo, EMIT_1F, INTERP_PERSPECTIVE, src);
99 break;
100
101 case TGSI_SEMANTIC_GENERIC:
102 /* this includes texcoords and varying vars */
103 src = draw_find_shader_output(cell->draw, TGSI_SEMANTIC_GENERIC,
104 fs->info.input_semantic_index[i]);
105 assert(src >= 0);
106 draw_emit_vertex_attr(vinfo, EMIT_4F, INTERP_PERSPECTIVE, src);
107 break;
108
109 default:
110 assert(0);
111 }
112 }
113
114 draw_compute_vertex_size(vinfo);
115
116 /* XXX only signal this if format really changes */
117 cell->dirty |= CELL_NEW_VERTEX_INFO;
118}
119
120
121#if 0
122/**
123 * Recompute cliprect from scissor bounds, scissor enable and surface size.
124 */
125static void
126compute_cliprect(struct cell_context *sp)
127{
128 uint surfWidth = sp->framebuffer.width;
129 uint surfHeight = sp->framebuffer.height;
130
131 if (sp->rasterizer->scissor) {
132 /* clip to scissor rect */
133 sp->cliprect.minx = MAX2(sp->scissor.minx, 0);
134 sp->cliprect.miny = MAX2(sp->scissor.miny, 0);
135 sp->cliprect.maxx = MIN2(sp->scissor.maxx, surfWidth);
136 sp->cliprect.maxy = MIN2(sp->scissor.maxy, surfHeight);
137 }
138 else {
139 /* clip to surface bounds */
140 sp->cliprect.minx = 0;
141 sp->cliprect.miny = 0;
142 sp->cliprect.maxx = surfWidth;
143 sp->cliprect.maxy = surfHeight;
144 }
145}
146#endif
147
148
149
150/**
151 * Update derived state, send current state to SPUs prior to rendering.
152 */
153void cell_update_derived( struct cell_context *cell )
154{
155 if (cell->dirty & (CELL_NEW_RASTERIZER |
156 CELL_NEW_FS |
157 CELL_NEW_VS))
158 calculate_vertex_layout( cell );
159
160#if 0
161 if (cell->dirty & (CELL_NEW_SCISSOR |
162 CELL_NEW_DEPTH_STENCIL_ALPHA |
163 CELL_NEW_FRAMEBUFFER))
164 compute_cliprect(cell);
165#endif
166
167 cell_emit_state(cell);
168
169 cell->dirty = 0;
170}
diff --git a/src/gallium/drivers/cell/ppu/cell_state_emit.c b/src/gallium/drivers/cell/ppu/cell_state_emit.c
deleted file mode 100644
index bb11c68fa24..00000000000
--- a/src/gallium/drivers/cell/ppu/cell_state_emit.c
+++ /dev/null
@@ -1,343 +0,0 @@
1/**************************************************************************
2 *
3 * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas.
4 * All Rights Reserved.
5 *
6 * Permission is hereby granted, free of charge, to any person obtaining a
7 * copy of this software and associated documentation files (the
8 * "Software"), to deal in the Software without restriction, including
9 * without limitation the rights to use, copy, modify, merge, publish,
10 * distribute, sub license, and/or sell copies of the Software, and to
11 * permit persons to whom the Software is furnished to do so, subject to
12 * the following conditions:
13 *
14 * The above copyright notice and this permission notice (including the
15 * next paragraph) shall be included in all copies or substantial portions
16 * of the Software.
17 *
18 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
19 * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
20 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
21 * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
22 * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
23 * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
24 * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
25 *
26 **************************************************************************/
27
28#include "util/u_inlines.h"
29#include "util/u_memory.h"
30#include "util/u_math.h"
31#include "util/u_format.h"
32#include "cell_context.h"
33#include "cell_gen_fragment.h"
34#include "cell_state.h"
35#include "cell_state_emit.h"
36#include "cell_batch.h"
37#include "cell_texture.h"
38#include "draw/draw_context.h"
39#include "draw/draw_private.h"
40
41
42/**
43 * Find/create a cell_command_fragment_ops object corresponding to the
44 * current blend/stencil/z/colormask/etc. state.
45 */
46static struct cell_command_fragment_ops *
47lookup_fragment_ops(struct cell_context *cell)
48{
49 struct cell_fragment_ops_key key;
50 struct cell_command_fragment_ops *ops;
51
52 /*
53 * Build key
54 */
55 memset(&key, 0, sizeof(key));
56 key.blend = *cell->blend;
57 key.blend_color = cell->blend_color;
58 key.dsa = *cell->depth_stencil;
59
60 if (cell->framebuffer.cbufs[0])
61 key.color_format = cell->framebuffer.cbufs[0]->format;
62 else
63 key.color_format = PIPE_FORMAT_NONE;
64
65 if (cell->framebuffer.zsbuf)
66 key.zs_format = cell->framebuffer.zsbuf->format;
67 else
68 key.zs_format = PIPE_FORMAT_NONE;
69
70 /*
71 * Look up key in cache.
72 */
73 ops = (struct cell_command_fragment_ops *)
74 util_keymap_lookup(cell->fragment_ops_cache, &key);
75
76 /*
77 * If not found, create/save new fragment ops command.
78 */
79 if (!ops) {
80 struct spe_function spe_code_front, spe_code_back;
81 unsigned int facing_dependent, total_code_size;
82
83 if (0)
84 debug_printf("**** Create New Fragment Ops\n");
85
86 /* Prepare the buffer that will hold the generated code. The
87 * "0" passed in for the size means that the SPE code will
88 * use a default size.
89 */
90 spe_init_func(&spe_code_front, 0);
91 spe_init_func(&spe_code_back, 0);
92
93 /* Generate new code. Always generate new code for both front-facing
94 * and back-facing fragments, even if it's the same code in both
95 * cases.
96 */
97 cell_gen_fragment_function(cell, CELL_FACING_FRONT, &spe_code_front);
98 cell_gen_fragment_function(cell, CELL_FACING_BACK, &spe_code_back);
99
100 /* Make sure the code is a multiple of 8 bytes long; this is
101 * required to ensure that the dual pipe instruction alignment
102 * is correct. It's also important for the SPU unpacking,
103 * which assumes 8-byte boundaries.
104 */
105 unsigned int front_code_size = spe_code_size(&spe_code_front);
106 while (front_code_size % 8 != 0) {
107 spe_lnop(&spe_code_front);
108 front_code_size = spe_code_size(&spe_code_front);
109 }
110 unsigned int back_code_size = spe_code_size(&spe_code_back);
111 while (back_code_size % 8 != 0) {
112 spe_lnop(&spe_code_back);
113 back_code_size = spe_code_size(&spe_code_back);
114 }
115
116 /* Determine whether the code we generated is facing-dependent, by
117 * determining whether the generated code is different for the front-
118 * and back-facing fragments.
119 */
120 if (front_code_size == back_code_size && memcmp(spe_code_front.store, spe_code_back.store, front_code_size) == 0) {
121 /* Code is identical; only need one copy. */
122 facing_dependent = 0;
123 total_code_size = front_code_size;
124 }
125 else {
126 /* Code is different for front-facing and back-facing fragments.
127 * Need to send both copies.
128 */
129 facing_dependent = 1;
130 total_code_size = front_code_size + back_code_size;
131 }
132
133 /* alloc new fragment ops command. Note that this structure
134 * has variant length based on the total code size required.
135 */
136 ops = CALLOC_VARIANT_LENGTH_STRUCT(cell_command_fragment_ops, total_code_size);
137 /* populate the new cell_command_fragment_ops object */
138 ops->opcode[0] = CELL_CMD_STATE_FRAGMENT_OPS;
139 ops->total_code_size = total_code_size;
140 ops->front_code_index = 0;
141 memcpy(ops->code, spe_code_front.store, front_code_size);
142 if (facing_dependent) {
143 /* We have separate front- and back-facing code. Append the
144 * back-facing code to the buffer. Be careful because the code
145 * size is in bytes, but the buffer is of unsigned elements.
146 */
147 ops->back_code_index = front_code_size / sizeof(spe_code_front.store[0]);
148 memcpy(ops->code + ops->back_code_index, spe_code_back.store, back_code_size);
149 }
150 else {
151 /* Use the same code for front- and back-facing fragments */
152 ops->back_code_index = ops->front_code_index;
153 }
154
155 /* Set the fields for the fallback case. Note that these fields
156 * (and the whole fallback case) will eventually go away.
157 */
158 ops->dsa = *cell->depth_stencil;
159 ops->blend = *cell->blend;
160 ops->blend_color = cell->blend_color;
161
162 /* insert cell_command_fragment_ops object into keymap/cache */
163 util_keymap_insert(cell->fragment_ops_cache, &key, ops, NULL);
164
165 /* release rtasm buffer */
166 spe_release_func(&spe_code_front);
167 spe_release_func(&spe_code_back);
168 }
169 else {
170 if (0)
171 debug_printf("**** Re-use Fragment Ops\n");
172 }
173
174 return ops;
175}
176
177
178
179static void
180emit_state_cmd(struct cell_context *cell, uint cmd,
181 const void *state, uint state_size)
182{
183 uint32_t *dst = (uint32_t *)
184 cell_batch_alloc16(cell, ROUNDUP16(sizeof(opcode_t) + state_size));
185 *dst = cmd;
186 memcpy(dst + 4, state, state_size);
187}
188
189
190/**
191 * For state marked as 'dirty', construct a state-update command block
192 * and insert it into the current batch buffer.
193 */
194void
195cell_emit_state(struct cell_context *cell)
196{
197 if (cell->dirty & CELL_NEW_FRAMEBUFFER) {
198 struct pipe_surface *cbuf = cell->framebuffer.cbufs[0];
199 struct pipe_surface *zbuf = cell->framebuffer.zsbuf;
200 STATIC_ASSERT(sizeof(struct cell_command_framebuffer) % 16 == 0);
201 struct cell_command_framebuffer *fb
202 = cell_batch_alloc16(cell, sizeof(*fb));
203 fb->opcode[0] = CELL_CMD_STATE_FRAMEBUFFER;
204 fb->color_start = cell->cbuf_map[0];
205 fb->color_format = cbuf->format;
206 fb->depth_start = cell->zsbuf_map;
207 fb->depth_format = zbuf ? zbuf->format : PIPE_FORMAT_NONE;
208 fb->width = cell->framebuffer.width;
209 fb->height = cell->framebuffer.height;
210#if 0
211 printf("EMIT color format %s\n", util_format_name(fb->color_format));
212 printf("EMIT depth format %s\n", util_format_name(fb->depth_format));
213#endif
214 }
215
216 if (cell->dirty & (CELL_NEW_RASTERIZER)) {
217 STATIC_ASSERT(sizeof(struct cell_command_rasterizer) % 16 == 0);
218 struct cell_command_rasterizer *rast =
219 cell_batch_alloc16(cell, sizeof(*rast));
220 rast->opcode[0] = CELL_CMD_STATE_RASTERIZER;
221 rast->rasterizer = *cell->rasterizer;
222 }
223
224 if (cell->dirty & (CELL_NEW_FS)) {
225 /* Send new fragment program to SPUs */
226 STATIC_ASSERT(sizeof(struct cell_command_fragment_program) % 16 == 0);
227 struct cell_command_fragment_program *fp
228 = cell_batch_alloc16(cell, sizeof(*fp));
229 fp->opcode[0] = CELL_CMD_STATE_FRAGMENT_PROGRAM;
230 fp->num_inst = cell->fs->code.num_inst;
231 memcpy(&fp->code, cell->fs->code.store,
232 SPU_MAX_FRAGMENT_PROGRAM_INSTS * SPE_INST_SIZE);
233 if (0) {
234 int i;
235 printf("PPU Emit CELL_CMD_STATE_FRAGMENT_PROGRAM:\n");
236 for (i = 0; i < fp->num_inst; i++) {
237 printf(" %3d: 0x%08x\n", i, fp->code[i]);
238 }
239 }
240 }
241
242 if (cell->dirty & (CELL_NEW_FS_CONSTANTS)) {
243 const uint shader = PIPE_SHADER_FRAGMENT;
244 const uint num_const = cell->constants[shader]->width0 / sizeof(float);
245 uint i, j;
246 float *buf = cell_batch_alloc16(cell, ROUNDUP16(32 + num_const * sizeof(float)));
247 uint32_t *ibuf = (uint32_t *) buf;
248 const float *constants = cell->mapped_constants[shader];
249 ibuf[0] = CELL_CMD_STATE_FS_CONSTANTS;
250 ibuf[4] = num_const;
251 j = 8;
252 for (i = 0; i < num_const; i++) {
253 buf[j++] = constants[i];
254 }
255 }
256
257 if (cell->dirty & (CELL_NEW_FRAMEBUFFER |
258 CELL_NEW_DEPTH_STENCIL |
259 CELL_NEW_BLEND)) {
260 struct cell_command_fragment_ops *fops, *fops_cmd;
261 /* Note that cell_command_fragment_ops is a variant-sized record */
262 fops = lookup_fragment_ops(cell);
263 fops_cmd = cell_batch_alloc16(cell, ROUNDUP16(sizeof(*fops_cmd) + fops->total_code_size));
264 memcpy(fops_cmd, fops, sizeof(*fops) + fops->total_code_size);
265 }
266
267 if (cell->dirty & CELL_NEW_SAMPLER) {
268 uint i;
269 for (i = 0; i < CELL_MAX_SAMPLERS; i++) {
270 if (cell->dirty_samplers & (1 << i)) {
271 if (cell->sampler[i]) {
272 STATIC_ASSERT(sizeof(struct cell_command_sampler) % 16 == 0);
273 struct cell_command_sampler *sampler
274 = cell_batch_alloc16(cell, sizeof(*sampler));
275 sampler->opcode[0] = CELL_CMD_STATE_SAMPLER;
276 sampler->unit = i;
277 sampler->state = *cell->sampler[i];
278 }
279 }
280 }
281 cell->dirty_samplers = 0x0;
282 }
283
284 if (cell->dirty & CELL_NEW_TEXTURE) {
285 uint i;
286 for (i = 0;i < CELL_MAX_SAMPLERS; i++) {
287 if (cell->dirty_textures & (1 << i)) {
288 STATIC_ASSERT(sizeof(struct cell_command_texture) % 16 == 0);
289 struct cell_command_texture *texture =
290 (struct cell_command_texture *)
291 cell_batch_alloc16(cell, sizeof(*texture));
292
293 texture->opcode[0] = CELL_CMD_STATE_TEXTURE;
294 texture->unit = i;
295 if (cell->texture[i]) {
296 struct cell_resource *ct = cell->texture[i];
297 uint level;
298 for (level = 0; level < CELL_MAX_TEXTURE_LEVELS; level++) {
299 texture->start[level] = (ct->mapped +
300 ct->level_offset[level]);
301 texture->width[level] = u_minify(ct->base.width0, level);
302 texture->height[level] = u_minify(ct->base.height0, level);
303 texture->depth[level] = u_minify(ct->base.depth0, level);
304 }
305 texture->target = ct->base.target;
306 }
307 else {
308 uint level;
309 for (level = 0; level < CELL_MAX_TEXTURE_LEVELS; level++) {
310 texture->start[level] = NULL;
311 texture->width[level] = 0;
312 texture->height[level] = 0;
313 texture->depth[level] = 0;
314 }
315 texture->target = 0;
316 }
317 }
318 }
319 cell->dirty_textures = 0x0;
320 }
321
322 if (cell->dirty & CELL_NEW_VERTEX_INFO) {
323 emit_state_cmd(cell, CELL_CMD_STATE_VERTEX_INFO,
324 &cell->vertex_info, sizeof(struct vertex_info));
325 }
326
327#if 0
328 if (cell->dirty & CELL_NEW_VS) {
329 const struct draw_context *const draw = cell->draw;
330 struct cell_shader_info info;
331
332 info.num_outputs = draw_num_shader_outputs(draw);
333 info.declarations = (uintptr_t) draw->vs.machine.Declarations;
334 info.num_declarations = draw->vs.machine.NumDeclarations;
335 info.instructions = (uintptr_t) draw->vs.machine.Instructions;
336 info.num_instructions = draw->vs.machine.NumInstructions;
337 info.immediates = (uintptr_t) draw->vs.machine.Imms;
338 info.num_immediates = draw->vs.machine.ImmLimit / 4;
339
340 emit_state_cmd(cell, CELL_CMD_STATE_BIND_VS, &info, sizeof(info));
341 }
342#endif
343}
diff --git a/src/gallium/drivers/cell/ppu/cell_state_emit.h b/src/gallium/drivers/cell/ppu/cell_state_emit.h
deleted file mode 100644
index 59f8affe8d3..00000000000
--- a/src/gallium/drivers/cell/ppu/cell_state_emit.h
+++ /dev/null
@@ -1,36 +0,0 @@
1/**************************************************************************
2 *
3 * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas.
4 * All Rights Reserved.
5 *
6 * Permission is hereby granted, free of charge, to any person obtaining a
7 * copy of this software and associated documentation files (the
8 * "Software"), to deal in the Software without restriction, including
9 * without limitation the rights to use, copy, modify, merge, publish,
10 * distribute, sub license, and/or sell copies of the Software, and to
11 * permit persons to whom the Software is furnished to do so, subject to
12 * the following conditions:
13 *
14 * The above copyright notice and this permission notice (including the
15 * next paragraph) shall be included in all copies or substantial portions
16 * of the Software.
17 *
18 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
19 * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
20 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
21 * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
22 * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
23 * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
24 * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
25 *
26 **************************************************************************/
27
28#ifndef CELL_STATE_EMIT_H
29#define CELL_STATE_EMIT_H
30
31
32extern void
33cell_emit_state(struct cell_context *cell);
34
35
36#endif /* CELL_STATE_EMIT_H */
diff --git a/src/gallium/drivers/cell/ppu/cell_state_per_fragment.c b/src/gallium/drivers/cell/ppu/cell_state_per_fragment.c
deleted file mode 100644
index dc33e7ccc2c..00000000000
--- a/src/gallium/drivers/cell/ppu/cell_state_per_fragment.c
+++ /dev/null
@@ -1,1432 +0,0 @@
1/*
2 * (C) Copyright IBM Corporation 2008
3 * All Rights Reserved.
4 *
5 * Permission is hereby granted, free of charge, to any person obtaining a
6 * copy of this software and associated documentation files (the "Software"),
7 * to deal in the Software without restriction, including without limitation
8 * on the rights to use, copy, modify, merge, publish, distribute, sub
9 * license, and/or sell copies of the Software, and to permit persons to whom
10 * the Software is furnished to do so, subject to the following conditions:
11 *
12 * The above copyright notice and this permission notice (including the next
13 * paragraph) shall be included in all copies or substantial portions of the
14 * Software.
15 *
16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18 * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
19 * AUTHORS, COPYRIGHT HOLDERS, AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
20 * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
21 * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
22 * USE OR OTHER DEALINGS IN THE SOFTWARE.
23 */
24
25/**
26 * \file
27 * Generate code to perform all per-fragment operations.
28 *
29 * Code generated by these functions perform both alpha, depth, and stencil
30 * testing as well as alpha blending.
31 *
32 * \note
33 * Occlusion query is not supported, but this is the right place to add that
34 * support.
35 *
36 * \author Ian Romanick <idr@us.ibm.com>
37 */
38
39#include "pipe/p_defines.h"
40#include "pipe/p_state.h"
41
42#include "cell_context.h"
43
44#include "rtasm/rtasm_ppc_spe.h"
45
46
47/**
48 * Generate code to perform alpha testing.
49 *
50 * The code generated by this function uses the register specificed by
51 * \c mask as both an input and an output.
52 *
53 * \param dsa Current alpha-test state
54 * \param f Function to which code should be appended
55 * \param mask Index of register containing active fragment mask
56 * \param alphas Index of register containing per-fragment alpha values
57 *
58 * \note Emits a maximum of 6 instructions.
59 */
60static void
61emit_alpha_test(struct pipe_depth_stencil_alpha_state *dsa,
62 struct spe_function *f, int mask, int alphas)
63{
64 /* If the alpha function is either NEVER or ALWAYS, there is no need to
65 * load the reference value into a register. ALWAYS is a fairly common
66 * case, and this optimization saves 2 instructions.
67 */
68 if (dsa->alpha.enabled
69 && (dsa->alpha.func != PIPE_FUNC_NEVER)
70 && (dsa->alpha.func != PIPE_FUNC_ALWAYS)) {
71 int ref = spe_allocate_available_register(f);
72 int tmp_a = spe_allocate_available_register(f);
73 int tmp_b = spe_allocate_available_register(f);
74 union {
75 float f;
76 unsigned u;
77 } ref_val;
78 boolean complement = FALSE;
79
80 ref_val.f = dsa->alpha.ref;
81
82 spe_il(f, ref, ref_val.u & 0x0000ffff);
83 spe_ilh(f, ref, ref_val.u >> 16);
84
85 switch (dsa->alpha.func) {
86 case PIPE_FUNC_NOTEQUAL:
87 complement = TRUE;
88 /* FALLTHROUGH */
89
90 case PIPE_FUNC_EQUAL:
91 spe_fceq(f, tmp_a, ref, alphas);
92 break;
93
94 case PIPE_FUNC_LEQUAL:
95 complement = TRUE;
96 /* FALLTHROUGH */
97
98 case PIPE_FUNC_GREATER:
99 spe_fcgt(f, tmp_a, ref, alphas);
100 break;
101
102 case PIPE_FUNC_LESS:
103 complement = TRUE;
104 /* FALLTHROUGH */
105
106 case PIPE_FUNC_GEQUAL:
107 spe_fcgt(f, tmp_a, ref, alphas);
108 spe_fceq(f, tmp_b, ref, alphas);
109 spe_or(f, tmp_a, tmp_b, tmp_a);
110 break;
111
112 case PIPE_FUNC_ALWAYS:
113 case PIPE_FUNC_NEVER:
114 default:
115 assert(0);
116 break;
117 }
118
119 if (complement) {
120 spe_andc(f, mask, mask, tmp_a);
121 } else {
122 spe_and(f, mask, mask, tmp_a);
123 }
124
125 spe_release_register(f, ref);
126 spe_release_register(f, tmp_a);
127 spe_release_register(f, tmp_b);
128 } else if (dsa->alpha.enabled && (dsa->alpha.func == PIPE_FUNC_NEVER)) {
129 spe_il(f, mask, 0);
130 }
131}
132
133
134/**
135 * Generate code to perform Z testing. Four Z values are tested at once.
136 * \param dsa Current depth-test state
137 * \param f Function to which code should be appended
138 * \param mask Index of register to contain depth-pass mask
139 * \param stored Index of register containing values from depth buffer
140 * \param calculated Index of register containing per-fragment depth values
141 *
142 * \return
143 * If the calculated depth comparison mask is the actual mask, \c FALSE is
144 * returned. If the calculated depth comparison mask is the compliment of
145 * the actual mask, \c TRUE is returned.
146 *
147 * \note Emits a maximum of 3 instructions.
148 */
149static boolean
150emit_depth_test(struct pipe_depth_stencil_alpha_state *dsa,
151 struct spe_function *f, int mask, int stored, int calculated)
152{
153 unsigned func = (dsa->depth.enabled)
154 ? dsa->depth.func : PIPE_FUNC_ALWAYS;
155 int tmp = spe_allocate_available_register(f);
156 boolean compliment = FALSE;
157
158 switch (func) {
159 case PIPE_FUNC_NEVER:
160 spe_il(f, mask, 0);
161 break;
162
163 case PIPE_FUNC_NOTEQUAL:
164 compliment = TRUE;
165 /* FALLTHROUGH */
166 case PIPE_FUNC_EQUAL:
167 spe_ceq(f, mask, calculated, stored);
168 break;
169
170 case PIPE_FUNC_LEQUAL:
171 compliment = TRUE;
172 /* FALLTHROUGH */
173 case PIPE_FUNC_GREATER:
174 spe_clgt(f, mask, calculated, stored);
175 break;
176
177 case PIPE_FUNC_LESS:
178 compliment = TRUE;
179 /* FALLTHROUGH */
180 case PIPE_FUNC_GEQUAL:
181 spe_clgt(f, mask, calculated, stored);
182 spe_ceq(f, tmp, calculated, stored);
183 spe_or(f, mask, mask, tmp);
184 break;
185
186 case PIPE_FUNC_ALWAYS:
187 spe_il(f, mask, ~0);
188 break;
189
190 default:
191 assert(0);
192 break;
193 }
194
195 spe_release_register(f, tmp);
196 return compliment;
197}
198
199
200/**
201 * Generate code to apply the stencil operation (after testing).
202 * \note Emits a maximum of 5 instructions.
203 *
204 * \warning
205 * Since \c out and \c in might be the same register, this routine cannot
206 * generate code that uses \c out as a temporary.
207 */
208static void
209emit_stencil_op(struct spe_function *f,
210 int out, int in, int mask, unsigned op, unsigned ref)
211{
212 const int clamp = spe_allocate_available_register(f);
213 const int clamp_mask = spe_allocate_available_register(f);
214 const int result = spe_allocate_available_register(f);
215
216 switch(op) {
217 case PIPE_STENCIL_OP_KEEP:
218 assert(0);
219 case PIPE_STENCIL_OP_ZERO:
220 spe_il(f, result, 0);
221 break;
222 case PIPE_STENCIL_OP_REPLACE:
223 spe_il(f, result, ref);
224 break;
225 case PIPE_STENCIL_OP_INCR:
226 /* clamp = [0xff, 0xff, 0xff, 0xff] */
227 spe_il(f, clamp, 0x0ff);
228 /* result[i] = in[i] + 1 */
229 spe_ai(f, result, in, 1);
230 /* clamp_mask[i] = (result[i] > 0xff) */
231 spe_clgti(f, clamp_mask, result, 0x0ff);
232 /* result[i] = clamp_mask[i] ? clamp[i] : result[i] */
233 spe_selb(f, result, result, clamp, clamp_mask);
234 break;
235 case PIPE_STENCIL_OP_DECR:
236 spe_il(f, clamp, 0);
237 spe_ai(f, result, in, -1);
238
239 /* If "(s-1) < 0" in signed arithemtic, then "(s-1) > MAX" in unsigned
240 * arithmetic.
241 */
242 spe_clgti(f, clamp_mask, result, 0x0ff);
243 spe_selb(f, result, result, clamp, clamp_mask);
244 break;
245 case PIPE_STENCIL_OP_INCR_WRAP:
246 spe_ai(f, result, in, 1);
247 break;
248 case PIPE_STENCIL_OP_DECR_WRAP:
249 spe_ai(f, result, in, -1);
250 break;
251 case PIPE_STENCIL_OP_INVERT:
252 spe_nor(f, result, in, in);
253 break;
254 default:
255 assert(0);
256 }
257
258 spe_selb(f, out, in, result, mask);
259
260 spe_release_register(f, result);
261 spe_release_register(f, clamp_mask);
262 spe_release_register(f, clamp);
263}
264
265
266/**
267 * Generate code to do stencil test. Four pixels are tested at once.
268 * \param dsa Depth / stencil test state
269 * \param face 0 for front face, 1 for back face
270 * \param f Function to append instructions to
271 * \param mask Register containing mask of fragments passing the
272 * alpha test
273 * \param depth_mask Register containing mask of fragments passing the
274 * depth test
275 * \param depth_compliment Is \c depth_mask the compliment of the actual mask?
276 * \param stencil Register containing values from stencil buffer
277 * \param depth_pass Register to store mask of fragments passing stencil test
278 * and depth test
279 *
280 * \note
281 * Emits a maximum of 10 + (3 * 5) = 25 instructions.
282 */
283static int
284emit_stencil_test(struct pipe_depth_stencil_alpha_state *dsa,
285 struct pipe_stencil_ref *sr,
286 unsigned face,
287 struct spe_function *f,
288 int mask,
289 int depth_mask,
290 boolean depth_complement,
291 int stencil,
292 int depth_pass)
293{
294 int stencil_fail = spe_allocate_available_register(f);
295 int depth_fail = spe_allocate_available_register(f);
296 int stencil_mask = spe_allocate_available_register(f);
297 int stencil_pass = spe_allocate_available_register(f);
298 int face_stencil = spe_allocate_available_register(f);
299 int stencil_src = stencil;
300 const unsigned ref = (sr->ref_value[face]
301 & dsa->stencil[face].valuemask);
302 boolean complement = FALSE;
303 int stored;
304 int tmp = spe_allocate_available_register(f);
305
306
307 if ((dsa->stencil[face].func != PIPE_FUNC_NEVER)
308 && (dsa->stencil[face].func != PIPE_FUNC_ALWAYS)
309 && (dsa->stencil[face].valuemask != 0x0ff)) {
310 stored = spe_allocate_available_register(f);
311 spe_andi(f, stored, stencil, dsa->stencil[face].valuemask);
312 } else {
313 stored = stencil;
314 }
315
316
317 switch (dsa->stencil[face].func) {
318 case PIPE_FUNC_NEVER:
319 spe_il(f, stencil_mask, 0); /* stencil_mask[0..3] = [0,0,0,0] */
320 break;
321
322 case PIPE_FUNC_NOTEQUAL:
323 complement = TRUE;
324 /* FALLTHROUGH */
325 case PIPE_FUNC_EQUAL:
326 /* stencil_mask[i] = (stored[i] == ref) */
327 spe_ceqi(f, stencil_mask, stored, ref);
328 break;
329
330 case PIPE_FUNC_LEQUAL:
331 complement = TRUE;
332 /* FALLTHROUGH */
333 case PIPE_FUNC_GREATER:
334 complement = TRUE;
335 /* stencil_mask[i] = (stored[i] > ref) */
336 spe_clgti(f, stencil_mask, stored, ref);
337 break;
338
339 case PIPE_FUNC_LESS:
340 complement = TRUE;
341 /* FALLTHROUGH */
342 case PIPE_FUNC_GEQUAL:
343 /* stencil_mask[i] = (stored[i] > ref) */
344 spe_clgti(f, stencil_mask, stored, ref);
345 /* tmp[i] = (stored[i] == ref) */
346 spe_ceqi(f, tmp, stored, ref);
347 /* stencil_mask[i] = stencil_mask[i] | tmp[i] */
348 spe_or(f, stencil_mask, stencil_mask, tmp);
349 break;
350
351 case PIPE_FUNC_ALWAYS:
352 /* See comment below. */
353 break;
354
355 default:
356 assert(0);
357 break;
358 }
359
360 if (stored != stencil) {
361 spe_release_register(f, stored);
362 }
363 spe_release_register(f, tmp);
364
365
366 /* ALWAYS is a very common stencil-test, so some effort is applied to
367 * optimize that case. The stencil-pass mask is the same as the input
368 * fragment mask. This makes the stencil-test (above) a no-op, and the
369 * input fragment mask can be "renamed" the stencil-pass mask.
370 */
371 if (dsa->stencil[face].func == PIPE_FUNC_ALWAYS) {
372 spe_release_register(f, stencil_pass);
373 stencil_pass = mask;
374 } else {
375 if (complement) {
376 spe_andc(f, stencil_pass, mask, stencil_mask);
377 } else {
378 spe_and(f, stencil_pass, mask, stencil_mask);
379 }
380 }
381
382 if (depth_complement) {
383 spe_andc(f, depth_pass, stencil_pass, depth_mask);
384 } else {
385 spe_and(f, depth_pass, stencil_pass, depth_mask);
386 }
387
388
389 /* Conditionally emit code to update the stencil value under various
390 * condititons. Note that there is no need to generate code under the
391 * following circumstances:
392 *
393 * - Stencil write mask is zero.
394 * - For stencil-fail if the stencil test is ALWAYS
395 * - For depth-fail if the stencil test is NEVER
396 * - For depth-pass if the stencil test is NEVER
397 * - Any of the 3 conditions if the operation is KEEP
398 */
399 if (dsa->stencil[face].writemask != 0) {
400 if ((dsa->stencil[face].func != PIPE_FUNC_ALWAYS)
401 && (dsa->stencil[face].fail_op != PIPE_STENCIL_OP_KEEP)) {
402 if (complement) {
403 spe_and(f, stencil_fail, mask, stencil_mask);
404 } else {
405 spe_andc(f, stencil_fail, mask, stencil_mask);
406 }
407
408 emit_stencil_op(f, face_stencil, stencil_src, stencil_fail,
409 dsa->stencil[face].fail_op,
410 sr->ref_value[face]);
411
412 stencil_src = face_stencil;
413 }
414
415 if ((dsa->stencil[face].func != PIPE_FUNC_NEVER)
416 && (dsa->stencil[face].zfail_op != PIPE_STENCIL_OP_KEEP)) {
417 if (depth_complement) {
418 spe_and(f, depth_fail, stencil_pass, depth_mask);
419 } else {
420 spe_andc(f, depth_fail, stencil_pass, depth_mask);
421 }
422
423 emit_stencil_op(f, face_stencil, stencil_src, depth_fail,
424 dsa->stencil[face].zfail_op,
425 sr->ref_value[face]);
426 stencil_src = face_stencil;
427 }
428
429 if ((dsa->stencil[face].func != PIPE_FUNC_NEVER)
430 && (dsa->stencil[face].zpass_op != PIPE_STENCIL_OP_KEEP)) {
431 emit_stencil_op(f, face_stencil, stencil_src, depth_pass,
432 dsa->stencil[face].zpass_op,
433 sr->ref_value[face]);
434 stencil_src = face_stencil;
435 }
436 }
437
438 spe_release_register(f, stencil_fail);
439 spe_release_register(f, depth_fail);
440 spe_release_register(f, stencil_mask);
441 if (stencil_pass != mask) {
442 spe_release_register(f, stencil_pass);
443 }
444
445 /* If all of the stencil operations were KEEP or the stencil write mask was
446 * zero, "stencil_src" will still be set to "stencil". In this case
447 * release the "face_stencil" register. Otherwise apply the stencil write
448 * mask to select bits from the calculated stencil value and the previous
449 * stencil value.
450 */
451 if (stencil_src == stencil) {
452 spe_release_register(f, face_stencil);
453 } else if (dsa->stencil[face].writemask != 0x0ff) {
454 int tmp = spe_allocate_available_register(f);
455
456 spe_il(f, tmp, dsa->stencil[face].writemask);
457 spe_selb(f, stencil_src, stencil, stencil_src, tmp);
458
459 spe_release_register(f, tmp);
460 }
461
462 return stencil_src;
463}
464
465
466void
467cell_generate_depth_stencil_test(struct cell_depth_stencil_alpha_state *cdsa,
468 struct pipe_stencil_ref *sr)
469{
470 struct pipe_depth_stencil_alpha_state *const dsa = &cdsa->base;
471 struct spe_function *const f = &cdsa->code;
472
473 /* This code generates a maximum of 6 (alpha test) + 3 (depth test)
474 * + 25 (front stencil) + 25 (back stencil) + 4 = 63 instructions. Round
475 * up to 64 to make it a happy power-of-two.
476 */
477 spe_init_func(f, SPE_INST_SIZE * 64);
478
479
480 /* Allocate registers for the function's input parameters. Cleverly (and
481 * clever code is usually dangerous, but I couldn't resist) the generated
482 * function returns a structure. Returned structures start with register
483 * 3, and the structure fields are ordered to match up exactly with the
484 * input parameters.
485 */
486 int mask = spe_allocate_register(f, 3);
487 int depth = spe_allocate_register(f, 4);
488 int stencil = spe_allocate_register(f, 5);
489 int zvals = spe_allocate_register(f, 6);
490 int frag_a = spe_allocate_register(f, 7);
491 int facing = spe_allocate_register(f, 8);
492
493 int depth_mask = spe_allocate_available_register(f);
494
495 boolean depth_complement;
496
497
498 emit_alpha_test(dsa, f, mask, frag_a);
499
500 depth_complement = emit_depth_test(dsa, f, depth_mask, depth, zvals);
501
502 if (dsa->stencil[0].enabled) {
503 const int front_depth_pass = spe_allocate_available_register(f);
504 int front_stencil = emit_stencil_test(dsa, sr, 0, f, mask,
505 depth_mask, depth_complement,
506 stencil, front_depth_pass);
507
508 if (dsa->stencil[1].enabled) {
509 const int back_depth_pass = spe_allocate_available_register(f);
510 int back_stencil = emit_stencil_test(dsa, sr, 1, f, mask,
511 depth_mask, depth_complement,
512 stencil, back_depth_pass);
513
514 /* If the front facing stencil value and the back facing stencil
515 * value are stored in the same register, there is no need to select
516 * a value based on the facing. This can happen if the stencil value
517 * was not modified due to the write masks being zero, the stencil
518 * operations being KEEP, etc.
519 */
520 if (front_stencil != back_stencil) {
521 spe_selb(f, stencil, back_stencil, front_stencil, facing);
522 }
523
524 if (back_stencil != stencil) {
525 spe_release_register(f, back_stencil);
526 }
527
528 if (front_stencil != stencil) {
529 spe_release_register(f, front_stencil);
530 }
531
532 spe_selb(f, mask, back_depth_pass, front_depth_pass, facing);
533
534 spe_release_register(f, back_depth_pass);
535 } else {
536 if (front_stencil != stencil) {
537 spe_or(f, stencil, front_stencil, front_stencil);
538 spe_release_register(f, front_stencil);
539 }
540 spe_or(f, mask, front_depth_pass, front_depth_pass);
541 }
542
543 spe_release_register(f, front_depth_pass);
544 } else if (dsa->depth.enabled) {
545 if (depth_complement) {
546 spe_andc(f, mask, mask, depth_mask);
547 } else {
548 spe_and(f, mask, mask, depth_mask);
549 }
550 }
551
552 if (dsa->depth.writemask) {
553 spe_selb(f, depth, depth, zvals, mask);
554 }
555
556 spe_bi(f, 0, 0, 0); /* return from function call */
557
558
559#if 0
560 {
561 const uint32_t *p = f->store;
562 unsigned i;
563
564 printf("# alpha (%sabled)\n",
565 (dsa->alpha.enabled) ? "en" : "dis");
566 printf("# func: %u\n", dsa->alpha.func);
567 printf("# ref: %.2f\n", dsa->alpha.ref);
568
569 printf("# depth (%sabled)\n",
570 (dsa->depth.enabled) ? "en" : "dis");
571 printf("# func: %u\n", dsa->depth.func);
572
573 for (i = 0; i < 2; i++) {
574 printf("# %s stencil (%sabled)\n",
575 (i == 0) ? "front" : "back",
576 (dsa->stencil[i].enabled) ? "en" : "dis");
577
578 printf("# func: %u\n", dsa->stencil[i].func);
579 printf("# op (sf, zf, zp): %u %u %u\n",
580 dsa->stencil[i].fail_op,
581 dsa->stencil[i].zfail_op,
582 dsa->stencil[i].zpass_op);
583 printf("# ref value / value mask / write mask: %02x %02x %02x\n",
584 sr->ref_value[i],
585 dsa->stencil[i].valuemask,
586 dsa->stencil[i].writemask);
587 }
588
589 printf("\t.text\n");
590 for (/* empty */; p < f->csr; p++) {
591 printf("\t.long\t0x%04x\n", *p);
592 }
593 fflush(stdout);
594 }
595#endif
596}
597
598
599/**
600 * \note Emits a maximum of 3 instructions
601 */
602static int
603emit_alpha_factor_calculation(struct spe_function *f,
604 unsigned factor,
605 int src_alpha, int dst_alpha, int const_alpha)
606{
607 int factor_reg;
608 int tmp;
609
610
611 switch (factor) {
612 case PIPE_BLENDFACTOR_ONE:
613 factor_reg = -1;
614 break;
615
616 case PIPE_BLENDFACTOR_SRC_ALPHA:
617 factor_reg = spe_allocate_available_register(f);
618
619 spe_or(f, factor_reg, src_alpha, src_alpha);
620 break;
621
622 case PIPE_BLENDFACTOR_DST_ALPHA:
623 factor_reg = dst_alpha;
624 break;
625
626 case PIPE_BLENDFACTOR_SRC_ALPHA_SATURATE:
627 factor_reg = -1;
628 break;
629
630 case PIPE_BLENDFACTOR_INV_CONST_ALPHA:
631 factor_reg = spe_allocate_available_register(f);
632
633 tmp = spe_allocate_available_register(f);
634 spe_il(f, tmp, 1);
635 spe_cuflt(f, tmp, tmp, 0);
636 spe_fs(f, factor_reg, tmp, const_alpha);
637 spe_release_register(f, tmp);
638 break;
639
640 case PIPE_BLENDFACTOR_CONST_ALPHA:
641 factor_reg = const_alpha;
642 break;
643
644 case PIPE_BLENDFACTOR_ZERO:
645 factor_reg = -1;
646 break;
647
648 case PIPE_BLENDFACTOR_INV_SRC_ALPHA:
649 tmp = spe_allocate_available_register(f);
650 factor_reg = spe_allocate_available_register(f);
651
652 spe_il(f, tmp, 1);
653 spe_cuflt(f, tmp, tmp, 0);
654 spe_fs(f, factor_reg, tmp, src_alpha);
655
656 spe_release_register(f, tmp);
657 break;
658
659 case PIPE_BLENDFACTOR_INV_DST_ALPHA:
660 tmp = spe_allocate_available_register(f);
661 factor_reg = spe_allocate_available_register(f);
662
663 spe_il(f, tmp, 1);
664 spe_cuflt(f, tmp, tmp, 0);
665 spe_fs(f, factor_reg, tmp, dst_alpha);
666
667 spe_release_register(f, tmp);
668 break;
669
670 case PIPE_BLENDFACTOR_SRC1_ALPHA:
671 case PIPE_BLENDFACTOR_INV_SRC1_ALPHA:
672 default:
673 assert(0);
674 factor_reg = -1;
675 break;
676 }
677
678 return factor_reg;
679}
680
681
682/**
683 * \note Emits a maximum of 6 instructions
684 */
685static void
686emit_color_factor_calculation(struct spe_function *f,
687 unsigned sF, unsigned mask,
688 const int *src,
689 const int *dst,
690 const int *const_color,
691 int *factor)
692{
693 int tmp;
694 unsigned i;
695
696
697 factor[0] = -1;
698 factor[1] = -1;
699 factor[2] = -1;
700 factor[3] = -1;
701
702 switch (sF) {
703 case PIPE_BLENDFACTOR_ONE:
704 break;
705
706 case PIPE_BLENDFACTOR_SRC_COLOR:
707 for (i = 0; i < 3; ++i) {
708 if ((mask & (1U << i)) != 0) {
709 factor[i] = spe_allocate_available_register(f);
710 spe_or(f, factor[i], src[i], src[i]);
711 }
712 }
713 break;
714
715 case PIPE_BLENDFACTOR_SRC_ALPHA:
716 factor[0] = spe_allocate_available_register(f);
717 factor[1] = factor[0];
718 factor[2] = factor[0];
719
720 spe_or(f, factor[0], src[3], src[3]);
721 break;
722
723 case PIPE_BLENDFACTOR_DST_ALPHA:
724 factor[0] = dst[3];
725 factor[1] = dst[3];
726 factor[2] = dst[3];
727 break;
728
729 case PIPE_BLENDFACTOR_DST_COLOR:
730 factor[0] = dst[0];
731 factor[1] = dst[1];
732 factor[2] = dst[2];
733 break;
734
735 case PIPE_BLENDFACTOR_SRC_ALPHA_SATURATE:
736 tmp = spe_allocate_available_register(f);
737 factor[0] = spe_allocate_available_register(f);
738 factor[1] = factor[0];
739 factor[2] = factor[0];
740
741 /* Alpha saturate means min(As, 1-Ad).
742 */
743 spe_il(f, tmp, 1);
744 spe_cuflt(f, tmp, tmp, 0);
745 spe_fs(f, tmp, tmp, dst[3]);
746 spe_fcgt(f, factor[0], tmp, src[3]);
747 spe_selb(f, factor[0], src[3], tmp, factor[0]);
748
749 spe_release_register(f, tmp);
750 break;
751
752 case PIPE_BLENDFACTOR_INV_CONST_COLOR:
753 tmp = spe_allocate_available_register(f);
754 spe_il(f, tmp, 1);
755 spe_cuflt(f, tmp, tmp, 0);
756
757 for (i = 0; i < 3; i++) {
758 factor[i] = spe_allocate_available_register(f);
759
760 spe_fs(f, factor[i], tmp, const_color[i]);
761 }
762 spe_release_register(f, tmp);
763 break;
764
765 case PIPE_BLENDFACTOR_CONST_COLOR:
766 for (i = 0; i < 3; i++) {
767 factor[i] = const_color[i];
768 }
769 break;
770
771 case PIPE_BLENDFACTOR_INV_CONST_ALPHA:
772 factor[0] = spe_allocate_available_register(f);
773 factor[1] = factor[0];
774 factor[2] = factor[0];
775
776 tmp = spe_allocate_available_register(f);
777 spe_il(f, tmp, 1);
778 spe_cuflt(f, tmp, tmp, 0);
779 spe_fs(f, factor[0], tmp, const_color[3]);
780 spe_release_register(f, tmp);
781 break;
782
783 case PIPE_BLENDFACTOR_CONST_ALPHA:
784 factor[0] = const_color[3];
785 factor[1] = factor[0];
786 factor[2] = factor[0];
787 break;
788
789 case PIPE_BLENDFACTOR_ZERO:
790 break;
791
792 case PIPE_BLENDFACTOR_INV_SRC_COLOR:
793 tmp = spe_allocate_available_register(f);
794
795 spe_il(f, tmp, 1);
796 spe_cuflt(f, tmp, tmp, 0);
797
798 for (i = 0; i < 3; ++i) {
799 if ((mask & (1U << i)) != 0) {
800 factor[i] = spe_allocate_available_register(f);
801 spe_fs(f, factor[i], tmp, src[i]);
802 }
803 }
804
805 spe_release_register(f, tmp);
806 break;
807
808 case PIPE_BLENDFACTOR_INV_SRC_ALPHA:
809 tmp = spe_allocate_available_register(f);
810 factor[0] = spe_allocate_available_register(f);
811 factor[1] = factor[0];
812 factor[2] = factor[0];
813
814 spe_il(f, tmp, 1);
815 spe_cuflt(f, tmp, tmp, 0);
816 spe_fs(f, factor[0], tmp, src[3]);
817
818 spe_release_register(f, tmp);
819 break;
820
821 case PIPE_BLENDFACTOR_INV_DST_ALPHA:
822 tmp = spe_allocate_available_register(f);
823 factor[0] = spe_allocate_available_register(f);
824 factor[1] = factor[0];
825 factor[2] = factor[0];
826
827 spe_il(f, tmp, 1);
828 spe_cuflt(f, tmp, tmp, 0);
829 spe_fs(f, factor[0], tmp, dst[3]);
830
831 spe_release_register(f, tmp);
832 break;
833
834 case PIPE_BLENDFACTOR_INV_DST_COLOR:
835 tmp = spe_allocate_available_register(f);
836
837 spe_il(f, tmp, 1);
838 spe_cuflt(f, tmp, tmp, 0);
839
840 for (i = 0; i < 3; ++i) {
841 if ((mask & (1U << i)) != 0) {
842 factor[i] = spe_allocate_available_register(f);
843 spe_fs(f, factor[i], tmp, dst[i]);
844 }
845 }
846
847 spe_release_register(f, tmp);
848 break;
849
850 case PIPE_BLENDFACTOR_SRC1_COLOR:
851 case PIPE_BLENDFACTOR_SRC1_ALPHA:
852 case PIPE_BLENDFACTOR_INV_SRC1_COLOR:
853 case PIPE_BLENDFACTOR_INV_SRC1_ALPHA:
854 default:
855 assert(0);
856 }
857}
858
859
860static void
861emit_blend_calculation(struct spe_function *f,
862 unsigned func, unsigned sF, unsigned dF,
863 int src, int src_factor, int dst, int dst_factor)
864{
865 int tmp = spe_allocate_available_register(f);
866
867 switch (func) {
868 case PIPE_BLEND_ADD:
869 if (sF == PIPE_BLENDFACTOR_ONE) {
870 if (dF == PIPE_BLENDFACTOR_ZERO) {
871 /* Do nothing. */
872 } else if (dF == PIPE_BLENDFACTOR_ONE) {
873 spe_fa(f, src, src, dst);
874 }
875 } else if (sF == PIPE_BLENDFACTOR_ZERO) {
876 if (dF == PIPE_BLENDFACTOR_ZERO) {
877 spe_il(f, src, 0);
878 } else if (dF == PIPE_BLENDFACTOR_ONE) {
879 spe_or(f, src, dst, dst);
880 } else {
881 spe_fm(f, src, dst, dst_factor);
882 }
883 } else if (dF == PIPE_BLENDFACTOR_ZERO) {
884 spe_fm(f, src, src, src_factor);
885 } else {
886 spe_fm(f, tmp, dst, dst_factor);
887 spe_fma(f, src, src, src_factor, tmp);
888 }
889 break;
890
891 case PIPE_BLEND_SUBTRACT:
892 if (sF == PIPE_BLENDFACTOR_ONE) {
893 if (dF == PIPE_BLENDFACTOR_ZERO) {
894 /* Do nothing. */
895 } else if (dF == PIPE_BLENDFACTOR_ONE) {
896 spe_fs(f, src, src, dst);
897 }
898 } else if (sF == PIPE_BLENDFACTOR_ZERO) {
899 if (dF == PIPE_BLENDFACTOR_ZERO) {
900 spe_il(f, src, 0);
901 } else if (dF == PIPE_BLENDFACTOR_ONE) {
902 spe_il(f, tmp, 0);
903 spe_fs(f, src, tmp, dst);
904 } else {
905 spe_fm(f, src, dst, dst_factor);
906 }
907 } else if (dF == PIPE_BLENDFACTOR_ZERO) {
908 spe_fm(f, src, src, src_factor);
909 } else {
910 spe_fm(f, tmp, dst, dst_factor);
911 spe_fms(f, src, src, src_factor, tmp);
912 }
913 break;
914
915 case PIPE_BLEND_REVERSE_SUBTRACT:
916 if (sF == PIPE_BLENDFACTOR_ONE) {
917 if (dF == PIPE_BLENDFACTOR_ZERO) {
918 spe_il(f, tmp, 0);
919 spe_fs(f, src, tmp, src);
920 } else if (dF == PIPE_BLENDFACTOR_ONE) {
921 spe_fs(f, src, dst, src);
922 }
923 } else if (sF == PIPE_BLENDFACTOR_ZERO) {
924 if (dF == PIPE_BLENDFACTOR_ZERO) {
925 spe_il(f, src, 0);
926 } else if (dF == PIPE_BLENDFACTOR_ONE) {
927 spe_or(f, src, dst, dst);
928 } else {
929 spe_fm(f, src, dst, dst_factor);
930 }
931 } else if (dF == PIPE_BLENDFACTOR_ZERO) {
932 spe_fm(f, src, src, src_factor);
933 } else {
934 spe_fm(f, tmp, src, src_factor);
935 spe_fms(f, src, src, dst_factor, tmp);
936 }
937 break;
938
939 case PIPE_BLEND_MIN:
940 spe_cgt(f, tmp, src, dst);
941 spe_selb(f, src, src, dst, tmp);
942 break;
943
944 case PIPE_BLEND_MAX:
945 spe_cgt(f, tmp, src, dst);
946 spe_selb(f, src, dst, src, tmp);
947 break;
948
949 default:
950 assert(0);
951 }
952
953 spe_release_register(f, tmp);
954}
955
956
957/**
958 * Generate code to perform alpha blending on the SPE
959 */
960void
961cell_generate_alpha_blend(struct cell_blend_state *cb)
962{
963 struct pipe_blend_state *const b = &cb->base;
964 struct spe_function *const f = &cb->code;
965
966 /* This code generates a maximum of 3 (source alpha factor)
967 * + 3 (destination alpha factor) + (3 * 6) (source color factor)
968 * + (3 * 6) (destination color factor) + (4 * 2) (blend equation)
969 * + 4 (fragment mask) + 1 (return) = 55 instlructions. Round up to 64 to
970 * make it a happy power-of-two.
971 */
972 spe_init_func(f, SPE_INST_SIZE * 64);
973
974
975 const int frag[4] = {
976 spe_allocate_register(f, 3),
977 spe_allocate_register(f, 4),
978 spe_allocate_register(f, 5),
979 spe_allocate_register(f, 6),
980 };
981 const int pixel[4] = {
982 spe_allocate_register(f, 7),
983 spe_allocate_register(f, 8),
984 spe_allocate_register(f, 9),
985 spe_allocate_register(f, 10),
986 };
987 const int const_color[4] = {
988 spe_allocate_register(f, 11),
989 spe_allocate_register(f, 12),
990 spe_allocate_register(f, 13),
991 spe_allocate_register(f, 14),
992 };
993 unsigned func[4];
994 unsigned sF[4];
995 unsigned dF[4];
996 unsigned i;
997 int src_factor[4];
998 int dst_factor[4];
999
1000
1001 /* Does the selected blend mode make use of the source / destination
1002 * color (RGB) blend factors?
1003 */
1004 boolean need_color_factor = b->rt[0].blend_enable
1005 && (b->rt[0].rgb_func != PIPE_BLEND_MIN)
1006 && (b->rt[0].rgb_func != PIPE_BLEND_MAX);
1007
1008 /* Does the selected blend mode make use of the source / destination
1009 * alpha blend factors?
1010 */
1011 boolean need_alpha_factor = b->rt[0].blend_enable
1012 && (b->rt[0].alpha_func != PIPE_BLEND_MIN)
1013 && (b->rt[0].alpha_func != PIPE_BLEND_MAX);
1014
1015
1016 if (b->rt[0].blend_enable) {
1017 sF[0] = b->rt[0].rgb_src_factor;
1018 sF[1] = sF[0];
1019 sF[2] = sF[0];
1020 switch (b->rt[0].alpha_src_factor & 0x0f) {
1021 case PIPE_BLENDFACTOR_SRC_ALPHA_SATURATE:
1022 sF[3] = PIPE_BLENDFACTOR_ONE;
1023 break;
1024 case PIPE_BLENDFACTOR_SRC_COLOR:
1025 case PIPE_BLENDFACTOR_DST_COLOR:
1026 case PIPE_BLENDFACTOR_CONST_COLOR:
1027 case PIPE_BLENDFACTOR_SRC1_COLOR:
1028 sF[3] = b->rt[0].alpha_src_factor + 1;
1029 break;
1030 default:
1031 sF[3] = b->rt[0].alpha_src_factor;
1032 }
1033
1034 dF[0] = b->rt[0].rgb_dst_factor;
1035 dF[1] = dF[0];
1036 dF[2] = dF[0];
1037 switch (b->rt[0].alpha_dst_factor & 0x0f) {
1038 case PIPE_BLENDFACTOR_SRC_COLOR:
1039 case PIPE_BLENDFACTOR_DST_COLOR:
1040 case PIPE_BLENDFACTOR_CONST_COLOR:
1041 case PIPE_BLENDFACTOR_SRC1_COLOR:
1042 dF[3] = b->rt[0].alpha_dst_factor + 1;
1043 break;
1044 default:
1045 dF[3] = b->rt[0].alpha_dst_factor;
1046 }
1047
1048 func[0] = b->rt[0].rgb_func;
1049 func[1] = func[0];
1050 func[2] = func[0];
1051 func[3] = b->rt[0].alpha_func;
1052 } else {
1053 sF[0] = PIPE_BLENDFACTOR_ONE;
1054 sF[1] = PIPE_BLENDFACTOR_ONE;
1055 sF[2] = PIPE_BLENDFACTOR_ONE;
1056 sF[3] = PIPE_BLENDFACTOR_ONE;
1057 dF[0] = PIPE_BLENDFACTOR_ZERO;
1058 dF[1] = PIPE_BLENDFACTOR_ZERO;
1059 dF[2] = PIPE_BLENDFACTOR_ZERO;
1060 dF[3] = PIPE_BLENDFACTOR_ZERO;
1061
1062 func[0] = PIPE_BLEND_ADD;
1063 func[1] = PIPE_BLEND_ADD;
1064 func[2] = PIPE_BLEND_ADD;
1065 func[3] = PIPE_BLEND_ADD;
1066 }
1067
1068
1069 /* If alpha writing is enabled and the alpha blend mode requires use of
1070 * the alpha factor, calculate the alpha factor.
1071 */
1072 if (((b->rt[0].colormask & 8) != 0) && need_alpha_factor) {
1073 src_factor[3] = emit_alpha_factor_calculation(f, sF[3], const_color[3],
1074 frag[3], pixel[3]);
1075
1076 /* If the alpha destination blend factor is the same as the alpha source
1077 * blend factor, re-use the previously calculated value.
1078 */
1079 dst_factor[3] = (dF[3] == sF[3])
1080 ? src_factor[3]
1081 : emit_alpha_factor_calculation(f, dF[3], const_color[3],
1082 frag[3], pixel[3]);
1083 }
1084
1085
1086 if (sF[0] == sF[3]) {
1087 src_factor[0] = src_factor[3];
1088 src_factor[1] = src_factor[3];
1089 src_factor[2] = src_factor[3];
1090 } else if (sF[0] == dF[3]) {
1091 src_factor[0] = dst_factor[3];
1092 src_factor[1] = dst_factor[3];
1093 src_factor[2] = dst_factor[3];
1094 } else if (need_color_factor) {
1095 emit_color_factor_calculation(f,
1096 b->rt[0].rgb_src_factor,
1097 b->rt[0].colormask,
1098 frag, pixel, const_color, src_factor);
1099 }
1100
1101
1102 if (dF[0] == sF[3]) {
1103 dst_factor[0] = src_factor[3];
1104 dst_factor[1] = src_factor[3];
1105 dst_factor[2] = src_factor[3];
1106 } else if (dF[0] == dF[3]) {
1107 dst_factor[0] = dst_factor[3];
1108 dst_factor[1] = dst_factor[3];
1109 dst_factor[2] = dst_factor[3];
1110 } else if (dF[0] == sF[0]) {
1111 dst_factor[0] = src_factor[0];
1112 dst_factor[1] = src_factor[1];
1113 dst_factor[2] = src_factor[2];
1114 } else if (need_color_factor) {
1115 emit_color_factor_calculation(f,
1116 b->rt[0].rgb_dst_factor,
1117 b->rt[0].colormask,
1118 frag, pixel, const_color, dst_factor);
1119 }
1120
1121
1122
1123 for (i = 0; i < 4; ++i) {
1124 if ((b->rt[0].colormask & (1U << i)) != 0) {
1125 emit_blend_calculation(f,
1126 func[i], sF[i], dF[i],
1127 frag[i], src_factor[i],
1128 pixel[i], dst_factor[i]);
1129 }
1130 }
1131
1132 spe_bi(f, 0, 0, 0);
1133
1134#if 0
1135 {
1136 const uint32_t *p = f->store;
1137
1138 printf("# %u instructions\n", f->csr - f->store);
1139 printf("# blend (%sabled)\n",
1140 (cb->base.blend_enable) ? "en" : "dis");
1141 printf("# RGB func / sf / df: %u %u %u\n",
1142 cb->base.rgb_func,
1143 cb->base.rgb_src_factor,
1144 cb->base.rgb_dst_factor);
1145 printf("# ALP func / sf / df: %u %u %u\n",
1146 cb->base.alpha_func,
1147 cb->base.alpha_src_factor,
1148 cb->base.alpha_dst_factor);
1149
1150 printf("\t.text\n");
1151 for (/* empty */; p < f->csr; p++) {
1152 printf("\t.long\t0x%04x\n", *p);
1153 }
1154 fflush(stdout);
1155 }
1156#endif
1157}
1158
1159
1160static int
1161PC_OFFSET(const struct spe_function *f, const void *d)
1162{
1163 const intptr_t pc = (intptr_t) &f->store[f->num_inst];
1164 const intptr_t ea = ~0x0f & (intptr_t) d;
1165
1166 return (ea - pc) >> 2;
1167}
1168
1169
1170/**
1171 * Generate code to perform color conversion and logic op
1172 *
1173 * \bug
1174 * The code generated by this function should also perform dithering.
1175 *
1176 * \bug
1177 * The code generated by this function should also perform color-write
1178 * masking.
1179 *
1180 * \bug
1181 * Only two framebuffer formats are supported at this time.
1182 */
1183void
1184cell_generate_logic_op(struct spe_function *f,
1185 const struct pipe_blend_state *blend,
1186 struct pipe_surface *surf)
1187{
1188 const unsigned logic_op = (blend->logicop_enable)
1189 ? blend->logicop_func : PIPE_LOGICOP_COPY;
1190
1191 /* This code generates a maximum of 37 instructions. An additional 32
1192 * bytes (equiv. to 8 instructions) are needed for data storage. Round up
1193 * to 64 to make it a happy power-of-two.
1194 */
1195 spe_init_func(f, SPE_INST_SIZE * 64);
1196
1197
1198 /* Pixel colors in framebuffer format in AoS layout.
1199 */
1200 const int pixel[4] = {
1201 spe_allocate_register(f, 3),
1202 spe_allocate_register(f, 4),
1203 spe_allocate_register(f, 5),
1204 spe_allocate_register(f, 6),
1205 };
1206
1207 /* Fragment colors stored as floats in SoA layout.
1208 */
1209 const int frag[4] = {
1210 spe_allocate_register(f, 7),
1211 spe_allocate_register(f, 8),
1212 spe_allocate_register(f, 9),
1213 spe_allocate_register(f, 10),
1214 };
1215
1216 const int mask = spe_allocate_register(f, 11);
1217
1218
1219 /* Short-circuit the noop and invert cases.
1220 */
1221 if ((logic_op == PIPE_LOGICOP_NOOP) || (blend->rt[0].colormask == 0)) {
1222 spe_bi(f, 0, 0, 0);
1223 return;
1224 } else if (logic_op == PIPE_LOGICOP_INVERT) {
1225 spe_nor(f, pixel[0], pixel[0], pixel[0]);
1226 spe_nor(f, pixel[1], pixel[1], pixel[1]);
1227 spe_nor(f, pixel[2], pixel[2], pixel[2]);
1228 spe_nor(f, pixel[3], pixel[3], pixel[3]);
1229 spe_bi(f, 0, 0, 0);
1230 return;
1231 }
1232
1233
1234 const int tmp[4] = {
1235 spe_allocate_available_register(f),
1236 spe_allocate_available_register(f),
1237 spe_allocate_available_register(f),
1238 spe_allocate_available_register(f),
1239 };
1240
1241 const int shuf_xpose_hi = spe_allocate_available_register(f);
1242 const int shuf_xpose_lo = spe_allocate_available_register(f);
1243 const int shuf_color = spe_allocate_available_register(f);
1244
1245
1246 /* Pointer to the begining of the function's private data area.
1247 */
1248 uint32_t *const data = ((uint32_t *) f->store) + (64 - 8);
1249
1250
1251 /* Convert fragment colors to framebuffer format in AoS layout.
1252 */
1253 switch (surf->format) {
1254 case PIPE_FORMAT_B8G8R8A8_UNORM:
1255 data[0] = 0x00010203;
1256 data[1] = 0x10111213;
1257 data[2] = 0x04050607;
1258 data[3] = 0x14151617;
1259 data[4] = 0x0c000408;
1260 data[5] = 0x80808080;
1261 data[6] = 0x80808080;
1262 data[7] = 0x80808080;
1263 break;
1264 case PIPE_FORMAT_A8R8G8B8_UNORM:
1265 data[0] = 0x03020100;
1266 data[1] = 0x13121110;
1267 data[2] = 0x07060504;
1268 data[3] = 0x17161514;
1269 data[4] = 0x0804000c;
1270 data[5] = 0x80808080;
1271 data[6] = 0x80808080;
1272 data[7] = 0x80808080;
1273 break;
1274 default:
1275 fprintf(stderr, "CELL: Bad pixel format in cell_generate_logic_op()");
1276 ASSERT(0);
1277 }
1278
1279 spe_ilh(f, tmp[0], 0x0808);
1280 spe_lqr(f, shuf_xpose_hi, PC_OFFSET(f, data+0));
1281 spe_lqr(f, shuf_color, PC_OFFSET(f, data+4));
1282 spe_a(f, shuf_xpose_lo, shuf_xpose_hi, tmp[0]);
1283
1284 spe_shufb(f, tmp[0], frag[0], frag[2], shuf_xpose_hi);
1285 spe_shufb(f, tmp[1], frag[0], frag[2], shuf_xpose_lo);
1286 spe_shufb(f, tmp[2], frag[1], frag[3], shuf_xpose_hi);
1287 spe_shufb(f, tmp[3], frag[1], frag[3], shuf_xpose_lo);
1288
1289 spe_shufb(f, frag[0], tmp[0], tmp[2], shuf_xpose_hi);
1290 spe_shufb(f, frag[1], tmp[0], tmp[2], shuf_xpose_lo);
1291 spe_shufb(f, frag[2], tmp[1], tmp[3], shuf_xpose_hi);
1292 spe_shufb(f, frag[3], tmp[1], tmp[3], shuf_xpose_lo);
1293
1294 spe_cfltu(f, frag[0], frag[0], 32);
1295 spe_cfltu(f, frag[1], frag[1], 32);
1296 spe_cfltu(f, frag[2], frag[2], 32);
1297 spe_cfltu(f, frag[3], frag[3], 32);
1298
1299 spe_shufb(f, frag[0], frag[0], pixel[0], shuf_color);
1300 spe_shufb(f, frag[1], frag[1], pixel[1], shuf_color);
1301 spe_shufb(f, frag[2], frag[2], pixel[2], shuf_color);
1302 spe_shufb(f, frag[3], frag[3], pixel[3], shuf_color);
1303
1304
1305 /* If logic op is enabled, perform the requested logical operation on the
1306 * converted fragment colors and the pixel colors.
1307 */
1308 switch (logic_op) {
1309 case PIPE_LOGICOP_CLEAR:
1310 spe_il(f, frag[0], 0);
1311 spe_il(f, frag[1], 0);
1312 spe_il(f, frag[2], 0);
1313 spe_il(f, frag[3], 0);
1314 break;
1315 case PIPE_LOGICOP_NOR:
1316 spe_nor(f, frag[0], frag[0], pixel[0]);
1317 spe_nor(f, frag[1], frag[1], pixel[1]);
1318 spe_nor(f, frag[2], frag[2], pixel[2]);
1319 spe_nor(f, frag[3], frag[3], pixel[3]);
1320 break;
1321 case PIPE_LOGICOP_AND_INVERTED:
1322 spe_andc(f, frag[0], pixel[0], frag[0]);
1323 spe_andc(f, frag[1], pixel[1], frag[1]);
1324 spe_andc(f, frag[2], pixel[2], frag[2]);
1325 spe_andc(f, frag[3], pixel[3], frag[3]);
1326 break;
1327 case PIPE_LOGICOP_COPY_INVERTED:
1328 spe_nor(f, frag[0], frag[0], frag[0]);
1329 spe_nor(f, frag[1], frag[1], frag[1]);
1330 spe_nor(f, frag[2], frag[2], frag[2]);
1331 spe_nor(f, frag[3], frag[3], frag[3]);
1332 break;
1333 case PIPE_LOGICOP_AND_REVERSE:
1334 spe_andc(f, frag[0], frag[0], pixel[0]);
1335 spe_andc(f, frag[1], frag[1], pixel[1]);
1336 spe_andc(f, frag[2], frag[2], pixel[2]);
1337 spe_andc(f, frag[3], frag[3], pixel[3]);
1338 break;
1339 case PIPE_LOGICOP_XOR:
1340 spe_xor(f, frag[0], frag[0], pixel[0]);
1341 spe_xor(f, frag[1], frag[1], pixel[1]);
1342 spe_xor(f, frag[2], frag[2], pixel[2]);
1343 spe_xor(f, frag[3], frag[3], pixel[3]);
1344 break;
1345 case PIPE_LOGICOP_NAND:
1346 spe_nand(f, frag[0], frag[0], pixel[0]);
1347 spe_nand(f, frag[1], frag[1], pixel[1]);
1348 spe_nand(f, frag[2], frag[2], pixel[2]);
1349 spe_nand(f, frag[3], frag[3], pixel[3]);
1350 break;
1351 case PIPE_LOGICOP_AND:
1352 spe_and(f, frag[0], frag[0], pixel[0]);
1353 spe_and(f, frag[1], frag[1], pixel[1]);
1354 spe_and(f, frag[2], frag[2], pixel[2]);
1355 spe_and(f, frag[3], frag[3], pixel[3]);
1356 break;
1357 case PIPE_LOGICOP_EQUIV:
1358 spe_eqv(f, frag[0], frag[0], pixel[0]);
1359 spe_eqv(f, frag[1], frag[1], pixel[1]);
1360 spe_eqv(f, frag[2], frag[2], pixel[2]);
1361 spe_eqv(f, frag[3], frag[3], pixel[3]);
1362 break;
1363 case PIPE_LOGICOP_OR_INVERTED:
1364 spe_orc(f, frag[0], pixel[0], frag[0]);
1365 spe_orc(f, frag[1], pixel[1], frag[1]);
1366 spe_orc(f, frag[2], pixel[2], frag[2]);
1367 spe_orc(f, frag[3], pixel[3], frag[3]);
1368 break;
1369 case PIPE_LOGICOP_COPY:
1370 break;
1371 case PIPE_LOGICOP_OR_REVERSE:
1372 spe_orc(f, frag[0], frag[0], pixel[0]);
1373 spe_orc(f, frag[1], frag[1], pixel[1]);
1374 spe_orc(f, frag[2], frag[2], pixel[2]);
1375 spe_orc(f, frag[3], frag[3], pixel[3]);
1376 break;
1377 case PIPE_LOGICOP_OR:
1378 spe_or(f, frag[0], frag[0], pixel[0]);
1379 spe_or(f, frag[1], frag[1], pixel[1]);
1380 spe_or(f, frag[2], frag[2], pixel[2]);
1381 spe_or(f, frag[3], frag[3], pixel[3]);
1382 break;
1383 case PIPE_LOGICOP_SET:
1384 spe_il(f, frag[0], ~0);
1385 spe_il(f, frag[1], ~0);
1386 spe_il(f, frag[2], ~0);
1387 spe_il(f, frag[3], ~0);
1388 break;
1389
1390 /* These two cases are short-circuited above.
1391 */
1392 case PIPE_LOGICOP_INVERT:
1393 case PIPE_LOGICOP_NOOP:
1394 default:
1395 assert(0);
1396 }
1397
1398
1399 /* Apply fragment mask.
1400 */
1401 spe_ilh(f, tmp[0], 0x0000);
1402 spe_ilh(f, tmp[1], 0x0404);
1403 spe_ilh(f, tmp[2], 0x0808);
1404 spe_ilh(f, tmp[3], 0x0c0c);
1405
1406 spe_shufb(f, tmp[0], mask, mask, tmp[0]);
1407 spe_shufb(f, tmp[1], mask, mask, tmp[1]);
1408 spe_shufb(f, tmp[2], mask, mask, tmp[2]);
1409 spe_shufb(f, tmp[3], mask, mask, tmp[3]);
1410
1411 spe_selb(f, pixel[0], pixel[0], frag[0], tmp[0]);
1412 spe_selb(f, pixel[1], pixel[1], frag[1], tmp[1]);
1413 spe_selb(f, pixel[2], pixel[2], frag[2], tmp[2]);
1414 spe_selb(f, pixel[3], pixel[3], frag[3], tmp[3]);
1415
1416 spe_bi(f, 0, 0, 0);
1417
1418#if 0
1419 {
1420 const uint32_t *p = f->store;
1421 unsigned i;
1422
1423 printf("# %u instructions\n", f->csr - f->store);
1424
1425 printf("\t.text\n");
1426 for (i = 0; i < 64; i++) {
1427 printf("\t.long\t0x%04x\n", p[i]);
1428 }
1429 fflush(stdout);
1430 }
1431#endif
1432}
diff --git a/src/gallium/drivers/cell/ppu/cell_state_per_fragment.h b/src/gallium/drivers/cell/ppu/cell_state_per_fragment.h
deleted file mode 100644
index a8267a51331..00000000000
--- a/src/gallium/drivers/cell/ppu/cell_state_per_fragment.h
+++ /dev/null
@@ -1,39 +0,0 @@
1/*
2 * (C) Copyright IBM Corporation 2008
3 * All Rights Reserved.
4 *
5 * Permission is hereby granted, free of charge, to any person obtaining a
6 * copy of this software and associated documentation files (the "Software"),
7 * to deal in the Software without restriction, including without limitation
8 * on the rights to use, copy, modify, merge, publish, distribute, sub
9 * license, and/or sell copies of the Software, and to permit persons to whom
10 * the Software is furnished to do so, subject to the following conditions:
11 *
12 * The above copyright notice and this permission notice (including the next
13 * paragraph) shall be included in all copies or substantial portions of the
14 * Software.
15 *
16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18 * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
19 * AUTHORS, COPYRIGHT HOLDERS, AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
20 * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
21 * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
22 * USE OR OTHER DEALINGS IN THE SOFTWARE.
23 */
24
25#ifndef CELL_STATE_PER_FRAGMENT_H
26#define CELL_STATE_PER_FRAGMENT_H
27
28extern void
29cell_generate_depth_stencil_test(struct cell_depth_stencil_alpha_state *cdsa);
30
31extern void
32cell_generate_alpha_blend(struct cell_blend_state *cb);
33
34extern void
35cell_generate_logic_op(struct spe_function *f,
36 const struct pipe_blend_state *blend,
37 struct pipe_surface *surf);
38
39#endif /* CELL_STATE_PER_FRAGMENT_H */
diff --git a/src/gallium/drivers/cell/ppu/cell_state_shader.c b/src/gallium/drivers/cell/ppu/cell_state_shader.c
deleted file mode 100644
index ddf14772689..00000000000
--- a/src/gallium/drivers/cell/ppu/cell_state_shader.c
+++ /dev/null
@@ -1,229 +0,0 @@
1/**************************************************************************
2 *
3 * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas.
4 * All Rights Reserved.
5 *
6 * Permission is hereby granted, free of charge, to any person obtaining a
7 * copy of this software and associated documentation files (the
8 * "Software"), to deal in the Software without restriction, including
9 * without limitation the rights to use, copy, modify, merge, publish,
10 * distribute, sub license, and/or sell copies of the Software, and to
11 * permit persons to whom the Software is furnished to do so, subject to
12 * the following conditions:
13 *
14 * The above copyright notice and this permission notice (including the
15 * next paragraph) shall be included in all copies or substantial portions
16 * of the Software.
17 *
18 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
19 * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
20 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
21 * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
22 * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
23 * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
24 * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
25 *
26 **************************************************************************/
27
28#include "pipe/p_defines.h"
29#include "util/u_memory.h"
30#include "util/u_inlines.h"
31#include "draw/draw_context.h"
32#include "tgsi/tgsi_parse.h"
33
34#include "cell_context.h"
35#include "cell_state.h"
36#include "cell_gen_fp.h"
37#include "cell_texture.h"
38
39
40/** cast wrapper */
41static INLINE struct cell_fragment_shader_state *
42cell_fragment_shader_state(void *shader)
43{
44 return (struct cell_fragment_shader_state *) shader;
45}
46
47
48/** cast wrapper */
49static INLINE struct cell_vertex_shader_state *
50cell_vertex_shader_state(void *shader)
51{
52 return (struct cell_vertex_shader_state *) shader;
53}
54
55
56/**
57 * Create fragment shader state.
58 * Called via pipe->create_fs_state()
59 */
60static void *
61cell_create_fs_state(struct pipe_context *pipe,
62 const struct pipe_shader_state *templ)
63{
64 struct cell_context *cell = cell_context(pipe);
65 struct cell_fragment_shader_state *cfs;
66
67 cfs = CALLOC_STRUCT(cell_fragment_shader_state);
68 if (!cfs)
69 return NULL;
70
71 cfs->shader.tokens = tgsi_dup_tokens(templ->tokens);
72 if (!cfs->shader.tokens) {
73 FREE(cfs);
74 return NULL;
75 }
76
77 tgsi_scan_shader(templ->tokens, &cfs->info);
78
79 cell_gen_fragment_program(cell, cfs->shader.tokens, &cfs->code);
80
81 return cfs;
82}
83
84
85/**
86 * Called via pipe->bind_fs_state()
87 */
88static void
89cell_bind_fs_state(struct pipe_context *pipe, void *fs)
90{
91 struct cell_context *cell = cell_context(pipe);
92
93 cell->fs = cell_fragment_shader_state(fs);
94
95 cell->dirty |= CELL_NEW_FS;
96}
97
98
99/**
100 * Called via pipe->delete_fs_state()
101 */
102static void
103cell_delete_fs_state(struct pipe_context *pipe, void *fs)
104{
105 struct cell_fragment_shader_state *cfs = cell_fragment_shader_state(fs);
106
107 spe_release_func(&cfs->code);
108
109 FREE((void *) cfs->shader.tokens);
110 FREE(cfs);
111}
112
113
114/**
115 * Create vertex shader state.
116 * Called via pipe->create_vs_state()
117 */
118static void *
119cell_create_vs_state(struct pipe_context *pipe,
120 const struct pipe_shader_state *templ)
121{
122 struct cell_context *cell = cell_context(pipe);
123 struct cell_vertex_shader_state *cvs;
124
125 cvs = CALLOC_STRUCT(cell_vertex_shader_state);
126 if (!cvs)
127 return NULL;
128
129 cvs->shader.tokens = tgsi_dup_tokens(templ->tokens);
130 if (!cvs->shader.tokens) {
131 FREE(cvs);
132 return NULL;
133 }
134
135 tgsi_scan_shader(templ->tokens, &cvs->info);
136
137 cvs->draw_data = draw_create_vertex_shader(cell->draw, &cvs->shader);
138 if (cvs->draw_data == NULL) {
139 FREE( (void *) cvs->shader.tokens );
140 FREE( cvs );
141 return NULL;
142 }
143
144 return cvs;
145}
146
147
148/**
149 * Called via pipe->bind_vs_state()
150 */
151static void
152cell_bind_vs_state(struct pipe_context *pipe, void *vs)
153{
154 struct cell_context *cell = cell_context(pipe);
155
156 cell->vs = cell_vertex_shader_state(vs);
157
158 draw_bind_vertex_shader(cell->draw,
159 (cell->vs ? cell->vs->draw_data : NULL));
160
161 cell->dirty |= CELL_NEW_VS;
162}
163
164
165/**
166 * Called via pipe->delete_vs_state()
167 */
168static void
169cell_delete_vs_state(struct pipe_context *pipe, void *vs)
170{
171 struct cell_context *cell = cell_context(pipe);
172 struct cell_vertex_shader_state *cvs = cell_vertex_shader_state(vs);
173
174 draw_delete_vertex_shader(cell->draw, cvs->draw_data);
175 FREE( (void *) cvs->shader.tokens );
176 FREE( cvs );
177}
178
179
180/**
181 * Called via pipe->set_constant_buffer()
182 */
183static void
184cell_set_constant_buffer(struct pipe_context *pipe,
185 uint shader, uint index,
186 struct pipe_resource *constants)
187{
188 struct cell_context *cell = cell_context(pipe);
189 unsigned size = constants ? constants->width0 : 0;
190 const void *data = constants ? cell_resource(constants)->data : NULL;
191
192 assert(shader < PIPE_SHADER_TYPES);
193 assert(index == 0);
194
195 if (cell->constants[shader] == constants)
196 return;
197
198 draw_flush(cell->draw);
199
200 /* note: reference counting */
201 pipe_resource_reference(&cell->constants[shader], constants);
202
203 if(shader == PIPE_SHADER_VERTEX) {
204 draw_set_mapped_constant_buffer(cell->draw, PIPE_SHADER_VERTEX, 0,
205 data, size);
206 }
207
208 cell->mapped_constants[shader] = data;
209
210 if (shader == PIPE_SHADER_VERTEX)
211 cell->dirty |= CELL_NEW_VS_CONSTANTS;
212 else if (shader == PIPE_SHADER_FRAGMENT)
213 cell->dirty |= CELL_NEW_FS_CONSTANTS;
214}
215
216
217void
218cell_init_shader_functions(struct cell_context *cell)
219{
220 cell->pipe.create_fs_state = cell_create_fs_state;
221 cell->pipe.bind_fs_state = cell_bind_fs_state;
222 cell->pipe.delete_fs_state = cell_delete_fs_state;
223
224 cell->pipe.create_vs_state = cell_create_vs_state;
225 cell->pipe.bind_vs_state = cell_bind_vs_state;
226 cell->pipe.delete_vs_state = cell_delete_vs_state;
227
228 cell->pipe.set_constant_buffer = cell_set_constant_buffer;
229}
diff --git a/src/gallium/drivers/cell/ppu/cell_state_vertex.c b/src/gallium/drivers/cell/ppu/cell_state_vertex.c
deleted file mode 100644
index 7f65b82619e..00000000000
--- a/src/gallium/drivers/cell/ppu/cell_state_vertex.c
+++ /dev/null
@@ -1,120 +0,0 @@
1/**************************************************************************
2 *
3 * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas.
4 * All Rights Reserved.
5 *
6 * Permission is hereby granted, free of charge, to any person obtaining a
7 * copy of this software and associated documentation files (the
8 * "Software"), to deal in the Software without restriction, including
9 * without limitation the rights to use, copy, modify, merge, publish,
10 * distribute, sub license, and/or sell copies of the Software, and to
11 * permit persons to whom the Software is furnished to do so, subject to
12 * the following conditions:
13 *
14 * The above copyright notice and this permission notice (including the
15 * next paragraph) shall be included in all copies or substantial portions
16 * of the Software.
17 *
18 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
19 * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
20 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
21 * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
22 * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
23 * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
24 * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
25 *
26 **************************************************************************/
27
28/* Authors: Keith Whitwell <keith@tungstengraphics.com>
29 */
30
31
32#include "cell_context.h"
33#include "cell_state.h"
34
35#include "util/u_memory.h"
36#include "util/u_transfer.h"
37#include "draw/draw_context.h"
38
39
40static void *
41cell_create_vertex_elements_state(struct pipe_context *pipe,
42 unsigned count,
43 const struct pipe_vertex_element *attribs)
44{
45 struct cell_velems_state *velems;
46 assert(count <= PIPE_MAX_ATTRIBS);
47 velems = (struct cell_velems_state *) MALLOC(sizeof(struct cell_velems_state));
48 if (velems) {
49 velems->count = count;
50 memcpy(velems->velem, attribs, sizeof(*attribs) * count);
51 }
52 return velems;
53}
54
55static void
56cell_bind_vertex_elements_state(struct pipe_context *pipe,
57 void *velems)
58{
59 struct cell_context *cell = cell_context(pipe);
60 struct cell_velems_state *cell_velems = (struct cell_velems_state *) velems;
61
62 cell->velems = cell_velems;
63
64 cell->dirty |= CELL_NEW_VERTEX;
65
66 if (cell_velems)
67 draw_set_vertex_elements(cell->draw, cell_velems->count, cell_velems->velem);
68}
69
70static void
71cell_delete_vertex_elements_state(struct pipe_context *pipe, void *velems)
72{
73 FREE( velems );
74}
75
76
77static void
78cell_set_vertex_buffers(struct pipe_context *pipe,
79 unsigned count,
80 const struct pipe_vertex_buffer *buffers)
81{
82 struct cell_context *cell = cell_context(pipe);
83
84 assert(count <= PIPE_MAX_ATTRIBS);
85
86 util_copy_vertex_buffers(cell->vertex_buffer,
87 &cell->num_vertex_buffers,
88 buffers, count);
89
90 cell->dirty |= CELL_NEW_VERTEX;
91
92 draw_set_vertex_buffers(cell->draw, count, buffers);
93}
94
95
96static void
97cell_set_index_buffer(struct pipe_context *pipe,
98 const struct pipe_index_buffer *ib)
99{
100 struct cell_context *cell = cell_context(pipe);
101
102 if (ib)
103 memcpy(&cell->index_buffer, ib, sizeof(cell->index_buffer));
104 else
105 memset(&cell->index_buffer, 0, sizeof(cell->index_buffer));
106
107 draw_set_index_buffer(cell->draw, ib);
108}
109
110
111void
112cell_init_vertex_functions(struct cell_context *cell)
113{
114 cell->pipe.set_vertex_buffers = cell_set_vertex_buffers;
115 cell->pipe.set_index_buffer = cell_set_index_buffer;
116 cell->pipe.create_vertex_elements_state = cell_create_vertex_elements_state;
117 cell->pipe.bind_vertex_elements_state = cell_bind_vertex_elements_state;
118 cell->pipe.delete_vertex_elements_state = cell_delete_vertex_elements_state;
119 cell->pipe.redefine_user_buffer = u_default_redefine_user_buffer;
120}
diff --git a/src/gallium/drivers/cell/ppu/cell_surface.c b/src/gallium/drivers/cell/ppu/cell_surface.c
deleted file mode 100644
index 777454479b1..00000000000
--- a/src/gallium/drivers/cell/ppu/cell_surface.c
+++ /dev/null
@@ -1,37 +0,0 @@
1/**************************************************************************
2 *
3 * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas.
4 * All Rights Reserved.
5 *
6 * Permission is hereby granted, free of charge, to any person obtaining a
7 * copy of this software and associated documentation files (the
8 * "Software"), to deal in the Software without restriction, including
9 * without limitation the rights to use, copy, modify, merge, publish,
10 * distribute, sub license, and/or sell copies of the Software, and to
11 * permit persons to whom the Software is furnished to do so, subject to
12 * the following conditions:
13 *
14 * The above copyright notice and this permission notice (including the
15 * next paragraph) shall be included in all copies or substantial portions
16 * of the Software.
17 *
18 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
19 * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
20 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
21 * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
22 * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
23 * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
24 * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
25 *
26 **************************************************************************/
27
28#include "util/u_surface.h"
29#include "cell_context.h"
30#include "cell_surface.h"
31
32
33void
34cell_init_surface_functions(struct cell_context *cell)
35{
36 cell->pipe.resource_copy_region = util_resource_copy_region;
37}
diff --git a/src/gallium/drivers/cell/ppu/cell_surface.h b/src/gallium/drivers/cell/ppu/cell_surface.h
deleted file mode 100644
index 9e58f329443..00000000000
--- a/src/gallium/drivers/cell/ppu/cell_surface.h
+++ /dev/null
@@ -1,42 +0,0 @@
1/**************************************************************************
2 *
3 * Copyright 2008 Tungsten Graphics, Inc., Cedar Park, Texas.
4 * All Rights Reserved.
5 *
6 * Permission is hereby granted, free of charge, to any person obtaining a
7 * copy of this software and associated documentation files (the
8 * "Software"), to deal in the Software without restriction, including
9 * without limitation the rights to use, copy, modify, merge, publish,
10 * distribute, sub license, and/or sell copies of the Software, and to
11 * permit persons to whom the Software is furnished to do so, subject to
12 * the following conditions:
13 *
14 * The above copyright notice and this permission notice (including the
15 * next paragraph) shall be included in all copies or substantial portions
16 * of the Software.
17 *
18 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
19 * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
20 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
21 * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
22 * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
23 * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
24 * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
25 *
26 **************************************************************************/
27
28/* Authors: Keith Whitwell <keith@tungstengraphics.com>
29 */
30
31#ifndef CELL_SURFACE_H
32#define CELL_SURFACE_H
33
34
35struct cell_context;
36
37
38extern void
39cell_init_surface_functions(struct cell_context *cell);
40
41
42#endif /* SP_SURFACE_H */
diff --git a/src/gallium/drivers/cell/ppu/cell_texture.c b/src/gallium/drivers/cell/ppu/cell_texture.c
deleted file mode 100644
index 946a7050e5f..00000000000
--- a/src/gallium/drivers/cell/ppu/cell_texture.c
+++ /dev/null
@@ -1,644 +0,0 @@
1/**************************************************************************
2 *
3 * Copyright 2006 Tungsten Graphics, Inc., Cedar Park, Texas.
4 * All Rights Reserved.
5 *
6 * Permission is hereby granted, free of charge, to any person obtaining a
7 * copy of this software and associated documentation files (the
8 * "Software"), to deal in the Software without restriction, including
9 * without limitation the rights to use, copy, modify, merge, publish,
10 * distribute, sub license, and/or sell copies of the Software, and to
11 * permit persons to whom the Software is furnished to do so, subject to
12 * the following conditions:
13 *
14 * The above copyright notice and this permission notice (including the
15 * next paragraph) shall be included in all copies or substantial portions
16 * of the Software.
17 *
18 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
19 * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
20 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
21 * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
22 * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
23 * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
24 * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
25 *
26 **************************************************************************/
27 /*
28 * Authors:
29 * Keith Whitwell <keith@tungstengraphics.com>
30 * Michel Dänzer <michel@tungstengraphics.com>
31 * Brian Paul
32 */
33
34#include "pipe/p_context.h"
35#include "pipe/p_defines.h"
36#include "util/u_inlines.h"
37#include "util/u_transfer.h"
38#include "util/u_format.h"
39#include "util/u_math.h"
40#include "util/u_memory.h"
41
42#include "cell_context.h"
43#include "cell_screen.h"
44#include "cell_state.h"
45#include "cell_texture.h"
46
47#include "state_tracker/sw_winsys.h"
48
49
50
51static boolean
52cell_resource_layout(struct pipe_screen *screen,
53 struct cell_resource *ct)
54{
55 struct pipe_resource *pt = &ct->base;
56 unsigned level;
57 unsigned width = pt->width0;
58 unsigned height = pt->height0;
59 unsigned depth = pt->depth0;
60
61 ct->buffer_size = 0;
62
63 for (level = 0; level <= pt->last_level; level++) {
64 unsigned size;
65 unsigned w_tile, h_tile;
66
67 assert(level < CELL_MAX_TEXTURE_LEVELS);
68
69 /* width, height, rounded up to tile size */
70 w_tile = align(width, TILE_SIZE);
71 h_tile = align(height, TILE_SIZE);
72
73 ct->stride[level] = util_format_get_stride(pt->format, w_tile);
74
75 ct->level_offset[level] = ct->buffer_size;
76
77 size = ct->stride[level] * util_format_get_nblocksy(pt->format, h_tile);
78 if (pt->target == PIPE_TEXTURE_CUBE)
79 size *= 6;
80 else
81 size *= depth;
82
83 ct->buffer_size += size;
84
85 width = u_minify(width, 1);
86 height = u_minify(height, 1);
87 depth = u_minify(depth, 1);
88 }
89
90 ct->data = align_malloc(ct->buffer_size, 16);
91
92 return ct->data != NULL;
93}
94
95
96/**
97 * Texture layout for simple color buffers.
98 */
99static boolean
100cell_displaytarget_layout(struct pipe_screen *screen,
101 struct cell_resource * ct)
102{
103 struct sw_winsys *winsys = cell_screen(screen)->winsys;
104
105 /* Round up the surface size to a multiple of the tile size?
106 */
107 ct->dt = winsys->displaytarget_create(winsys,
108 ct->base.bind,
109 ct->base.format,
110 ct->base.width0,
111 ct->base.height0,
112 16,
113 &ct->dt_stride );
114
115 return ct->dt != NULL;
116}
117
118static struct pipe_resource *
119cell_resource_create(struct pipe_screen *screen,
120 const struct pipe_resource *templat)
121{
122 struct cell_resource *ct = CALLOC_STRUCT(cell_resource);
123 if (!ct)
124 return NULL;
125
126 ct->base = *templat;
127 pipe_reference_init(&ct->base.reference, 1);
128 ct->base.screen = screen;
129
130 /* Create both a displaytarget (linear) and regular texture
131 * (twiddled). Convert twiddled->linear at flush_frontbuffer time.
132 */
133 if (ct->base.bind & (PIPE_BIND_DISPLAY_TARGET |
134 PIPE_BIND_SCANOUT |
135 PIPE_BIND_SHARED)) {
136 if (!cell_displaytarget_layout(screen, ct))
137 goto fail;
138 }
139
140 if (!cell_resource_layout(screen, ct))
141 goto fail;
142
143 return &ct->base;
144
145fail:
146 if (ct->dt) {
147 struct sw_winsys *winsys = cell_screen(screen)->winsys;
148 winsys->displaytarget_destroy(winsys, ct->dt);
149 }
150
151 FREE(ct);
152
153 return NULL;
154}
155
156
157static void
158cell_resource_destroy(struct pipe_screen *scrn, struct pipe_resource *pt)
159{
160 struct cell_screen *screen = cell_screen(scrn);
161 struct sw_winsys *winsys = screen->winsys;
162 struct cell_resource *ct = cell_resource(pt);
163
164 if (ct->dt) {
165 /* display target */
166 winsys->displaytarget_destroy(winsys, ct->dt);
167 }
168 else if (!ct->userBuffer) {
169 align_free(ct->data);
170 }
171
172 FREE(ct);
173}
174
175
176
177/**
178 * Convert image from linear layout to tiled layout. 4-byte pixels.
179 */
180static void
181twiddle_image_uint(uint w, uint h, uint tile_size, uint *dst,
182 uint src_stride, const uint *src)
183{
184 const uint tile_size2 = tile_size * tile_size;
185 const uint h_t = (h + tile_size - 1) / tile_size;
186 const uint w_t = (w + tile_size - 1) / tile_size;
187
188 uint it, jt; /* tile counters */
189 uint i, j; /* intra-tile counters */
190
191 src_stride /= 4; /* convert from bytes to pixels */
192
193 /* loop over dest tiles */
194 for (it = 0; it < h_t; it++) {
195 for (jt = 0; jt < w_t; jt++) {
196 /* start of dest tile: */
197 uint *tdst = dst + (it * w_t + jt) * tile_size2;
198
199 /* compute size of this tile (may be smaller than tile_size) */
200 /* XXX note: a compiler bug was found here. That's why the code
201 * looks as it does.
202 */
203 uint tile_width = w - jt * tile_size;
204 tile_width = MIN2(tile_width, tile_size);
205 uint tile_height = h - it * tile_size;
206 tile_height = MIN2(tile_height, tile_size);
207
208 /* loop over texels in the tile */
209 for (i = 0; i < tile_height; i++) {
210 for (j = 0; j < tile_width; j++) {
211 const uint srci = it * tile_size + i;
212 const uint srcj = jt * tile_size + j;
213 ASSERT(srci < h);
214 ASSERT(srcj < w);
215 tdst[i * tile_size + j] = src[srci * src_stride + srcj];
216 }
217 }
218 }
219 }
220}
221
222
223/**
224 * For Cell. Basically, rearrange the pixels/quads from this layout:
225 * +--+--+--+--+
226 * |p0|p1|p2|p3|....
227 * +--+--+--+--+
228 *
229 * to this layout:
230 * +--+--+
231 * |p0|p1|....
232 * +--+--+
233 * |p2|p3|
234 * +--+--+
235 */
236static void
237twiddle_tile(const uint *tileIn, uint *tileOut)
238{
239 int y, x;
240
241 for (y = 0; y < TILE_SIZE; y+=2) {
242 for (x = 0; x < TILE_SIZE; x+=2) {
243 int k = 4 * (y/2 * TILE_SIZE/2 + x/2);
244 tileOut[y * TILE_SIZE + (x + 0)] = tileIn[k];
245 tileOut[y * TILE_SIZE + (x + 1)] = tileIn[k+1];
246 tileOut[(y + 1) * TILE_SIZE + (x + 0)] = tileIn[k+2];
247 tileOut[(y + 1) * TILE_SIZE + (x + 1)] = tileIn[k+3];
248 }
249 }
250}
251
252
253/**
254 * Convert image from tiled layout to linear layout. 4-byte pixels.
255 */
256static void
257untwiddle_image_uint(uint w, uint h, uint tile_size, uint *dst,
258 uint dst_stride, const uint *src)
259{
260 const uint tile_size2 = tile_size * tile_size;
261 const uint h_t = (h + tile_size - 1) / tile_size;
262 const uint w_t = (w + tile_size - 1) / tile_size;
263 uint *tile_buf;
264 uint it, jt; /* tile counters */
265 uint i, j; /* intra-tile counters */
266
267 dst_stride /= 4; /* convert from bytes to pixels */
268
269 tile_buf = align_malloc(tile_size * tile_size * 4, 16);
270
271 /* loop over src tiles */
272 for (it = 0; it < h_t; it++) {
273 for (jt = 0; jt < w_t; jt++) {
274 /* start of src tile: */
275 const uint *tsrc = src + (it * w_t + jt) * tile_size2;
276
277 twiddle_tile(tsrc, tile_buf);
278 tsrc = tile_buf;
279
280 /* compute size of this tile (may be smaller than tile_size) */
281 /* XXX note: a compiler bug was found here. That's why the code
282 * looks as it does.
283 */
284 uint tile_width = w - jt * tile_size;
285 tile_width = MIN2(tile_width, tile_size);
286 uint tile_height = h - it * tile_size;
287 tile_height = MIN2(tile_height, tile_size);
288
289 /* loop over texels in the tile */
290 for (i = 0; i < tile_height; i++) {
291 for (j = 0; j < tile_width; j++) {
292 uint dsti = it * tile_size + i;
293 uint dstj = jt * tile_size + j;
294 ASSERT(dsti < h);
295 ASSERT(dstj < w);
296 dst[dsti * dst_stride + dstj] = tsrc[i * tile_size + j];
297 }
298 }
299 }
300 }
301
302 align_free(tile_buf);
303}
304
305
306static struct pipe_surface *
307cell_create_surface(struct pipe_context *ctx,
308 struct pipe_resource *pt,
309 const struct pipe_surface *surf_tmpl)
310{
311 struct cell_resource *ct = cell_resource(pt);
312 struct pipe_surface *ps;
313
314 assert(surf_tmpl->u.tex.first_layer == surf_tmpl->u.tex.last_layer);
315 ps = CALLOC_STRUCT(pipe_surface);
316 if (ps) {
317 pipe_reference_init(&ps->reference, 1);
318 pipe_resource_reference(&ps->texture, pt);
319 ps->format = surf_tmpl->format;
320 ps->context = ctx;
321 ps->width = u_minify(pt->width0, surf_tmpl->u.tex.level);
322 ps->height = u_minify(pt->height0, surf_tmpl->u.tex.level);
323 /* XXX may need to override usage flags (see sp_texture.c) */
324 ps->usage = surf_tmpl->usage;
325 ps->u.tex.level = surf_tmpl->u.tex.level;
326 ps->u.tex.first_layer = surf_tmpl->u.tex.first_layer;
327 ps->u.tex.last_layer = surf_tmpl->u.tex.last_layer;
328 }
329 return ps;
330}
331
332
333static void
334cell_surface_destroy(struct pipe_context *ctx, struct pipe_surface *surf)
335{
336 pipe_resource_reference(&surf->texture, NULL);
337 FREE(surf);
338}
339
340
341/**
342 * Create new pipe_transfer object.
343 * This is used by the user to put tex data into a texture (and get it
344 * back out for glGetTexImage).
345 */
346static struct pipe_transfer *
347cell_get_transfer(struct pipe_context *ctx,
348 struct pipe_resource *resource,
349 unsigned level,
350 unsigned usage,
351 const struct pipe_box *box)
352{
353 struct cell_resource *ct = cell_resource(resource);
354 struct cell_transfer *ctrans;
355 enum pipe_format format = resource->format;
356
357 assert(resource);
358 assert(level <= resource->last_level);
359
360 /* make sure the requested region is in the image bounds */
361 assert(box->x + box->width <= u_minify(resource->width0, level));
362 assert(box->y + box->height <= u_minify(resource->height0, level));
363 assert(box->z + box->depth <= (u_minify(resource->depth0, level) + resource->array_size - 1));
364
365 ctrans = CALLOC_STRUCT(cell_transfer);
366 if (ctrans) {
367 struct pipe_transfer *pt = &ctrans->base;
368 pipe_resource_reference(&pt->resource, resource);
369 pt->level = level;
370 pt->usage = usage;
371 pt->box = *box;
372 pt->stride = ct->stride[level];
373
374 ctrans->offset = ct->level_offset[level];
375
376 if (resource->target == PIPE_TEXTURE_CUBE || resource->target == PIPE_TEXTURE_3D) {
377 unsigned h_tile = align(u_minify(resource->height0, level), TILE_SIZE);
378 ctrans->offset += box->z * util_format_get_nblocksy(format, h_tile) * pt->stride;
379 }
380 else {
381 assert(box->z == 0);
382 }
383
384 return pt;
385 }
386 return NULL;
387}
388
389
390static void
391cell_transfer_destroy(struct pipe_context *ctx, struct pipe_transfer *t)
392{
393 struct cell_transfer *transfer = cell_transfer(t);
394 /* Effectively do the texture_update work here - if texture images
395 * needed post-processing to put them into hardware layout, this is
396 * where it would happen. For cell, nothing to do.
397 */
398 assert (transfer->base.resource);
399 pipe_resource_reference(&transfer->base.resource, NULL);
400 FREE(transfer);
401}
402
403
404/**
405 * Return pointer to texture image data in linear layout.
406 */
407static void *
408cell_transfer_map(struct pipe_context *ctx, struct pipe_transfer *transfer)
409{
410 struct cell_transfer *ctrans = cell_transfer(transfer);
411 struct pipe_resource *pt = transfer->resource;
412 struct cell_resource *ct = cell_resource(pt);
413
414 assert(transfer->resource);
415
416 if (ct->mapped == NULL) {
417 ct->mapped = ct->data;
418 }
419
420
421 /* Better test would be resource->is_linear
422 */
423 if (transfer->resource->target != PIPE_BUFFER) {
424 const uint level = ctrans->base.level;
425 const uint texWidth = u_minify(pt->width0, level);
426 const uint texHeight = u_minify(pt->height0, level);
427 unsigned size;
428
429
430 /*
431 * Create a buffer of ordinary memory for the linear texture.
432 * This is the memory that the user will read/write.
433 */
434 size = (util_format_get_stride(pt->format, align(texWidth, TILE_SIZE)) *
435 util_format_get_nblocksy(pt->format, align(texHeight, TILE_SIZE)));
436
437 ctrans->map = align_malloc(size, 16);
438 if (!ctrans->map)
439 return NULL; /* out of memory */
440
441 if (transfer->usage & PIPE_TRANSFER_READ) {
442 /* Textures always stored twiddled, need to untwiddle the
443 * texture to make a linear version.
444 */
445 const uint bpp = util_format_get_blocksize(ct->base.format);
446 if (bpp == 4) {
447 const uint *src = (uint *) (ct->mapped + ctrans->offset);
448 uint *dst = ctrans->map;
449 untwiddle_image_uint(texWidth, texHeight, TILE_SIZE,
450 dst, transfer->stride, src);
451 }
452 else {
453 // xxx fix
454 }
455 }
456 }
457 else {
458 unsigned stride = transfer->stride;
459 enum pipe_format format = pt->format;
460 unsigned blocksize = util_format_get_blocksize(format);
461
462 ctrans->map = (ct->mapped +
463 ctrans->offset +
464 ctrans->base.box.y / util_format_get_blockheight(format) * stride +
465 ctrans->base.box.x / util_format_get_blockwidth(format) * blocksize);
466 }
467
468
469 return ctrans->map;
470}
471
472
473/**
474 * Called when user is done reading/writing texture data.
475 * If new data was written, this is where we convert the linear data
476 * to tiled data.
477 */
478static void
479cell_transfer_unmap(struct pipe_context *ctx,
480 struct pipe_transfer *transfer)
481{
482 struct cell_transfer *ctrans = cell_transfer(transfer);
483 struct pipe_resource *pt = transfer->resource;
484 struct cell_resource *ct = cell_resource(pt);
485 const uint level = ctrans->base.level;
486 const uint texWidth = u_minify(pt->width0, level);
487 const uint texHeight = u_minify(pt->height0, level);
488 const uint stride = ct->stride[level];
489
490 if (!ct->mapped) {
491 assert(0);
492 return;
493 }
494
495 if (pt->target != PIPE_BUFFER) {
496 if (transfer->usage & PIPE_TRANSFER_WRITE) {
497 /* The user wrote new texture data into the mapped buffer.
498 * We need to convert the new linear data into the twiddled/tiled format.
499 */
500 const uint bpp = util_format_get_blocksize(ct->base.format);
501 if (bpp == 4) {
502 const uint *src = ctrans->map;
503 uint *dst = (uint *) (ct->mapped + ctrans->offset);
504 twiddle_image_uint(texWidth, texHeight, TILE_SIZE, dst, stride, src);
505 }
506 else {
507 // xxx fix
508 }
509 }
510
511 align_free(ctrans->map);
512 }
513 else {
514 /* nothing to do */
515 }
516
517 ctrans->map = NULL;
518}
519
520
521
522/* This used to be overriden by the co-state tracker, but really needs
523 * to be active with sw_winsys.
524 *
525 * Contrasting with llvmpipe and softpipe, this is the only place
526 * where we use the ct->dt display target in any real sense.
527 *
528 * Basically just untwiddle our local data into the linear
529 * displaytarget.
530 */
531static void
532cell_flush_frontbuffer(struct pipe_screen *_screen,
533 struct pipe_resource *resource,
534 unsigned level, unsigned layer,
535 void *context_private)
536{
537 struct cell_screen *screen = cell_screen(_screen);
538 struct sw_winsys *winsys = screen->winsys;
539 struct cell_resource *ct = cell_resource(resource);
540
541 if (!ct->dt)
542 return;
543
544 /* Need to untwiddle from our internal representation here:
545 */
546 {
547 unsigned *map = winsys->displaytarget_map(winsys, ct->dt,
548 (PIPE_TRANSFER_READ |
549 PIPE_TRANSFER_WRITE));
550 unsigned *src = (unsigned *)(ct->data + ct->level_offset[level]);
551
552 untwiddle_image_uint(u_minify(resource->width0, level),
553 u_minify(resource->height0, level),
554 TILE_SIZE,
555 map,
556 ct->dt_stride,
557 src);
558
559 winsys->displaytarget_unmap(winsys, ct->dt);
560 }
561
562 winsys->displaytarget_display(winsys, ct->dt, context_private);
563}
564
565
566
567/**
568 * Create buffer which wraps user-space data.
569 */
570static struct pipe_resource *
571cell_user_buffer_create(struct pipe_screen *screen,
572 void *ptr,
573 unsigned bytes,
574 unsigned bind_flags)
575{
576 struct cell_resource *buffer;
577
578 buffer = CALLOC_STRUCT(cell_resource);
579 if(!buffer)
580 return NULL;
581
582 pipe_reference_init(&buffer->base.reference, 1);
583 buffer->base.screen = screen;
584 buffer->base.format = PIPE_FORMAT_R8_UNORM; /* ?? */
585 buffer->base.bind = PIPE_BIND_TRANSFER_READ | bind_flags;
586 buffer->base.usage = PIPE_USAGE_IMMUTABLE;
587 buffer->base.flags = 0;
588 buffer->base.width0 = bytes;
589 buffer->base.height0 = 1;
590 buffer->base.depth0 = 1;
591 buffer->base.array_size = 1;
592 buffer->userBuffer = TRUE;
593 buffer->data = ptr;
594
595 return &buffer->base;
596}
597
598
599static struct pipe_resource *
600cell_resource_from_handle(struct pipe_screen *screen,
601 const struct pipe_resource *templat,
602 struct winsys_handle *handle)
603{
604 /* XXX todo */
605 return NULL;
606}
607
608
609static boolean
610cell_resource_get_handle(struct pipe_screen *scree,
611 struct pipe_resource *tex,
612 struct winsys_handle *handle)
613{
614 /* XXX todo */
615 return FALSE;
616}
617
618
619void
620cell_init_screen_texture_funcs(struct pipe_screen *screen)
621{
622 screen->resource_create = cell_resource_create;
623 screen->resource_destroy = cell_resource_destroy;
624 screen->resource_from_handle = cell_resource_from_handle;
625 screen->resource_get_handle = cell_resource_get_handle;
626 screen->user_buffer_create = cell_user_buffer_create;
627
628 screen->flush_frontbuffer = cell_flush_frontbuffer;
629}
630
631void
632cell_init_texture_transfer_funcs(struct cell_context *cell)
633{
634 cell->pipe.get_transfer = cell_get_transfer;
635 cell->pipe.transfer_destroy = cell_transfer_destroy;
636 cell->pipe.transfer_map = cell_transfer_map;
637 cell->pipe.transfer_unmap = cell_transfer_unmap;
638
639 cell->pipe.transfer_flush_region = u_default_transfer_flush_region;
640 cell->pipe.transfer_inline_write = u_default_transfer_inline_write;
641
642 cell->pipe.create_surface = cell_create_surface;
643 cell->pipe.surface_destroy = cell_surface_destroy;
644}
diff --git a/src/gallium/drivers/cell/ppu/cell_texture.h b/src/gallium/drivers/cell/ppu/cell_texture.h
deleted file mode 100644
index bd8224b3b7b..00000000000
--- a/src/gallium/drivers/cell/ppu/cell_texture.h
+++ /dev/null
@@ -1,102 +0,0 @@
1/**************************************************************************
2 *
3 * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas.
4 * All Rights Reserved.
5 *
6 * Permission is hereby granted, free of charge, to any person obtaining a
7 * copy of this software and associated documentation files (the
8 * "Software"), to deal in the Software without restriction, including
9 * without limitation the rights to use, copy, modify, merge, publish,
10 * distribute, sub license, and/or sell copies of the Software, and to
11 * permit persons to whom the Software is furnished to do so, subject to
12 * the following conditions:
13 *
14 * The above copyright notice and this permission notice (including the
15 * next paragraph) shall be included in all copies or substantial portions
16 * of the Software.
17 *
18 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
19 * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
20 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
21 * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
22 * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
23 * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
24 * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
25 *
26 **************************************************************************/
27
28#ifndef CELL_TEXTURE_H
29#define CELL_TEXTURE_H
30
31#include "cell/common.h"
32
33struct cell_context;
34struct pipe_resource;
35
36
37/**
38 * Subclass of pipe_resource
39 */
40struct cell_resource
41{
42 struct pipe_resource base;
43
44 unsigned long level_offset[CELL_MAX_TEXTURE_LEVELS];
45 unsigned long stride[CELL_MAX_TEXTURE_LEVELS];
46
47 /**
48 * Display target, for textures with the PIPE_BIND_DISPLAY_TARGET
49 * usage.
50 */
51 struct sw_displaytarget *dt;
52 unsigned dt_stride;
53
54 /**
55 * Malloc'ed data for regular textures, or a mapping to dt above.
56 */
57 void *data;
58 boolean userBuffer;
59
60 /* Size of the linear buffer??
61 */
62 unsigned long buffer_size;
63
64 /** The buffer above, mapped. This is the memory from which the
65 * SPUs will fetch texels. This texture data is in the tiled layout.
66 */
67 ubyte *mapped;
68};
69
70
71struct cell_transfer
72{
73 struct pipe_transfer base;
74
75 unsigned long offset;
76 void *map;
77};
78
79
80/** cast wrapper */
81static INLINE struct cell_resource *
82cell_resource(struct pipe_resource *pt)
83{
84 return (struct cell_resource *) pt;
85}
86
87
88/** cast wrapper */
89static INLINE struct cell_transfer *
90cell_transfer(struct pipe_transfer *pt)
91{
92 return (struct cell_transfer *) pt;
93}
94
95
96extern void
97cell_init_screen_texture_funcs(struct pipe_screen *screen);
98
99extern void
100cell_init_texture_transfer_funcs(struct cell_context *cell);
101
102#endif /* CELL_TEXTURE_H */
diff --git a/src/gallium/drivers/cell/ppu/cell_vbuf.c b/src/gallium/drivers/cell/ppu/cell_vbuf.c
deleted file mode 100644
index 37b71956482..00000000000
--- a/src/gallium/drivers/cell/ppu/cell_vbuf.c
+++ /dev/null
@@ -1,332 +0,0 @@
1/**************************************************************************
2 *
3 * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas.
4 * All Rights Reserved.
5 *
6 * Permission is hereby granted, free of charge, to any person obtaining a
7 * copy of this software and associated documentation files (the
8 * "Software"), to deal in the Software without restriction, including
9 * without limitation the rights to use, copy, modify, merge, publish,
10 * distribute, sub license, and/or sell copies of the Software, and to
11 * permit persons to whom the Software is furnished to do so, subject to
12 * the following conditions:
13 *
14 * The above copyright notice and this permission notice (including the
15 * next paragraph) shall be included in all copies or substantial portions
16 * of the Software.
17 *
18 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
19 * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
20 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
21 * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
22 * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
23 * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
24 * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
25 *
26 **************************************************************************/
27
28/**
29 * Vertex buffer code. The draw module transforms vertices to window
30 * coords, etc. and emits the vertices into buffer supplied by this module.
31 * When a vertex buffer is full, or we flush, we'll send the vertex data
32 * to the SPUs.
33 *
34 * Authors
35 * Brian Paul
36 */
37
38
39#include "cell_batch.h"
40#include "cell_context.h"
41#include "cell_fence.h"
42#include "cell_flush.h"
43#include "cell_spu.h"
44#include "cell_vbuf.h"
45#include "draw/draw_vbuf.h"
46#include "util/u_memory.h"
47
48
49/** Allow vertex data to be inlined after RENDER command */
50#define ALLOW_INLINE_VERTS 1
51
52
53/**
54 * Subclass of vbuf_render because we need a cell_context pointer in
55 * a few places.
56 */
57struct cell_vbuf_render
58{
59 struct vbuf_render base;
60 struct cell_context *cell;
61 uint prim; /**< PIPE_PRIM_x */
62 uint vertex_size; /**< in bytes */
63 void *vertex_buffer; /**< just for debug, really */
64 uint vertex_buf; /**< in [0, CELL_NUM_BUFFERS-1] */
65 uint vertex_buffer_size; /**< size in bytes */
66};
67
68
69/** cast wrapper */
70static struct cell_vbuf_render *
71cell_vbuf_render(struct vbuf_render *vbr)
72{
73 return (struct cell_vbuf_render *) vbr;
74}
75
76
77
78static const struct vertex_info *
79cell_vbuf_get_vertex_info(struct vbuf_render *vbr)
80{
81 struct cell_vbuf_render *cvbr = cell_vbuf_render(vbr);
82 return &cvbr->cell->vertex_info;
83}
84
85
86static boolean
87cell_vbuf_allocate_vertices(struct vbuf_render *vbr,
88 ushort vertex_size, ushort nr_vertices)
89{
90 struct cell_vbuf_render *cvbr = cell_vbuf_render(vbr);
91 unsigned size = vertex_size * nr_vertices;
92 /*printf("Alloc verts %u * %u\n", vertex_size, nr_vertices);*/
93
94 assert(cvbr->vertex_buf == ~0);
95 cvbr->vertex_buf = cell_get_empty_buffer(cvbr->cell);
96 cvbr->vertex_buffer = cvbr->cell->buffer[cvbr->vertex_buf];
97 cvbr->vertex_buffer_size = size;
98 cvbr->vertex_size = vertex_size;
99
100 return cvbr->vertex_buffer != NULL;
101}
102
103
104static void
105cell_vbuf_release_vertices(struct vbuf_render *vbr)
106{
107 struct cell_vbuf_render *cvbr = cell_vbuf_render(vbr);
108 struct cell_context *cell = cvbr->cell;
109
110 /*
111 printf("%s vertex_buf = %u count = %u\n",
112 __FUNCTION__, cvbr->vertex_buf, vertices_used);
113 */
114
115 /* Make sure texture buffers aren't released until we're done rendering
116 * with them.
117 */
118 cell_add_fenced_textures(cell);
119
120 /* Tell SPUs they can release the vert buf */
121 if (cvbr->vertex_buf != ~0U) {
122 STATIC_ASSERT(sizeof(struct cell_command_release_verts) % 16 == 0);
123 struct cell_command_release_verts *release
124 = (struct cell_command_release_verts *)
125 cell_batch_alloc16(cell, sizeof(struct cell_command_release_verts));
126 release->opcode[0] = CELL_CMD_RELEASE_VERTS;
127 release->vertex_buf = cvbr->vertex_buf;
128 }
129
130 cvbr->vertex_buf = ~0;
131 cell_flush_int(cell, 0x0);
132
133 cvbr->vertex_buffer = NULL;
134}
135
136
137static void *
138cell_vbuf_map_vertices(struct vbuf_render *vbr)
139{
140 struct cell_vbuf_render *cvbr = cell_vbuf_render(vbr);
141 return cvbr->vertex_buffer;
142}
143
144
145static void
146cell_vbuf_unmap_vertices(struct vbuf_render *vbr,
147 ushort min_index,
148 ushort max_index )
149{
150 struct cell_vbuf_render *cvbr = cell_vbuf_render(vbr);
151 assert( cvbr->vertex_buffer_size >= (max_index+1) * cvbr->vertex_size );
152 /* do nothing */
153}
154
155
156
157static boolean
158cell_vbuf_set_primitive(struct vbuf_render *vbr, unsigned prim)
159{
160 struct cell_vbuf_render *cvbr = cell_vbuf_render(vbr);
161 cvbr->prim = prim;
162 /*printf("cell_set_prim %u\n", prim);*/
163 return TRUE;
164}
165
166
167static void
168cell_vbuf_draw_elements(struct vbuf_render *vbr,
169 const ushort *indices,
170 uint nr_indices)
171{
172 struct cell_vbuf_render *cvbr = cell_vbuf_render(vbr);
173 struct cell_context *cell = cvbr->cell;
174 float xmin, ymin, xmax, ymax;
175 uint i;
176 uint nr_vertices = 0, min_index = ~0;
177 const void *vertices = cvbr->vertex_buffer;
178 const uint vertex_size = cvbr->vertex_size;
179
180 for (i = 0; i < nr_indices; i++) {
181 if (indices[i] > nr_vertices)
182 nr_vertices = indices[i];
183 if (indices[i] < min_index)
184 min_index = indices[i];
185 }
186 nr_vertices++;
187
188#if 0
189 /*if (min_index > 0)*/
190 printf("%s min_index = %u\n", __FUNCTION__, min_index);
191#endif
192
193#if 0
194 printf("cell_vbuf_draw() nr_indices = %u nr_verts = %u\n",
195 nr_indices, nr_vertices);
196 printf(" ");
197 for (i = 0; i < nr_indices; i += 3) {
198 printf("%u %u %u, ", indices[i+0], indices[i+1], indices[i+2]);
199 }
200 printf("\n");
201#elif 0
202 printf("cell_vbuf_draw() nr_indices = %u nr_verts = %u indexes = [%u %u %u ...]\n",
203 nr_indices, nr_vertices,
204 indices[0], indices[1], indices[2]);
205 printf("ind space = %u, vert space = %u, space = %u\n",
206 nr_indices * 2,
207 nr_vertices * 4 * cell->vertex_info.size,
208 cell_batch_free_space(cell));
209#endif
210
211 /* compute x/y bounding box */
212 xmin = ymin = 1e50;
213 xmax = ymax = -1e50;
214 for (i = min_index; i < nr_vertices; i++) {
215 const float *v = (float *) ((ubyte *) vertices + i * vertex_size);
216 if (v[0] < xmin)
217 xmin = v[0];
218 if (v[0] > xmax)
219 xmax = v[0];
220 if (v[1] < ymin)
221 ymin = v[1];
222 if (v[1] > ymax)
223 ymax = v[1];
224 }
225#if 0
226 printf("PPU Bounds %g, %g .. %g, %g\n", xmin, ymin, xmax, ymax);
227 fflush(stdout);
228#endif
229
230 if (cvbr->prim != PIPE_PRIM_TRIANGLES)
231 return; /* only render tris for now */
232
233 /* build/insert batch RENDER command */
234 {
235 const uint index_bytes = ROUNDUP16(nr_indices * 2);
236 const uint vertex_bytes = ROUNDUP16(nr_vertices * 4 * cell->vertex_info.size);
237 STATIC_ASSERT(sizeof(struct cell_command_render) % 16 == 0);
238 const uint batch_size = sizeof(struct cell_command_render) + index_bytes;
239
240 struct cell_command_render *render
241 = (struct cell_command_render *)
242 cell_batch_alloc16(cell, batch_size);
243
244 render->opcode[0] = CELL_CMD_RENDER;
245 render->prim_type = cvbr->prim;
246
247 render->num_indexes = nr_indices;
248 render->min_index = min_index;
249
250 /* append indices after render command */
251 memcpy(render + 1, indices, nr_indices * 2);
252
253 /* if there's room, append vertices after the indices, else leave
254 * vertices in the original/separate buffer.
255 */
256 render->vertex_size = 4 * cell->vertex_info.size;
257 render->num_verts = nr_vertices;
258 if (ALLOW_INLINE_VERTS &&
259 min_index == 0 &&
260 vertex_bytes + 16 <= cell_batch_free_space(cell)) {
261 /* vertex data inlined, after indices, at 16-byte boundary */
262 void *dst = cell_batch_alloc16(cell, vertex_bytes);
263 memcpy(dst, vertices, vertex_bytes);
264 render->inline_verts = TRUE;
265 render->vertex_buf = ~0;
266 }
267 else {
268 /* vertex data in separate buffer */
269 render->inline_verts = FALSE;
270 ASSERT(cvbr->vertex_buf >= 0);
271 render->vertex_buf = cvbr->vertex_buf;
272 }
273
274 render->xmin = xmin;
275 render->ymin = ymin;
276 render->xmax = xmax;
277 render->ymax = ymax;
278 }
279
280#if 0
281 /* helpful for debug */
282 cell_flush_int(cell, CELL_FLUSH_WAIT);
283#endif
284}
285
286
287static void
288cell_vbuf_destroy(struct vbuf_render *vbr)
289{
290 struct cell_vbuf_render *cvbr = cell_vbuf_render(vbr);
291 cvbr->cell->vbuf_render = NULL;
292 FREE(cvbr);
293}
294
295
296/**
297 * Initialize the post-transform vertex buffer information for the given
298 * context.
299 */
300void
301cell_init_vbuf(struct cell_context *cell)
302{
303 assert(cell->draw);
304
305 cell->vbuf_render = CALLOC_STRUCT(cell_vbuf_render);
306
307 /* The max number of indexes is what can fix into a batch buffer,
308 * minus the render and release-verts commands.
309 */
310 cell->vbuf_render->base.max_indices
311 = (CELL_BUFFER_SIZE
312 - sizeof(struct cell_command_render)
313 - sizeof(struct cell_command_release_verts))
314 / sizeof(ushort);
315 cell->vbuf_render->base.max_vertex_buffer_bytes = CELL_BUFFER_SIZE;
316
317 cell->vbuf_render->base.get_vertex_info = cell_vbuf_get_vertex_info;
318 cell->vbuf_render->base.allocate_vertices = cell_vbuf_allocate_vertices;
319 cell->vbuf_render->base.map_vertices = cell_vbuf_map_vertices;
320 cell->vbuf_render->base.unmap_vertices = cell_vbuf_unmap_vertices;
321 cell->vbuf_render->base.set_primitive = cell_vbuf_set_primitive;
322 cell->vbuf_render->base.draw_elements = cell_vbuf_draw_elements;
323 cell->vbuf_render->base.release_vertices = cell_vbuf_release_vertices;
324 cell->vbuf_render->base.destroy = cell_vbuf_destroy;
325
326 cell->vbuf_render->cell = cell;
327#if 1
328 cell->vbuf_render->vertex_buf = ~0;
329#endif
330
331 cell->vbuf = draw_vbuf_stage(cell->draw, &cell->vbuf_render->base);
332}
diff --git a/src/gallium/drivers/cell/ppu/cell_vbuf.h b/src/gallium/drivers/cell/ppu/cell_vbuf.h
deleted file mode 100644
index d265cbf7701..00000000000
--- a/src/gallium/drivers/cell/ppu/cell_vbuf.h
+++ /dev/null
@@ -1,38 +0,0 @@
1/**************************************************************************
2 *
3 * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas.
4 * All Rights Reserved.
5 *
6 * Permission is hereby granted, free of charge, to any person obtaining a
7 * copy of this software and associated documentation files (the
8 * "Software"), to deal in the Software without restriction, including
9 * without limitation the rights to use, copy, modify, merge, publish,
10 * distribute, sub license, and/or sell copies of the Software, and to
11 * permit persons to whom the Software is furnished to do so, subject to
12 * the following conditions:
13 *
14 * The above copyright notice and this permission notice (including the
15 * next paragraph) shall be included in all copies or substantial portions
16 * of the Software.
17 *
18 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
19 * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
20 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
21 * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
22 * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
23 * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
24 * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
25 *
26 **************************************************************************/
27
28#ifndef CELL_VBUF_H
29#define CELL_VBUF_H
30
31
32struct cell_context;
33
34extern void
35cell_init_vbuf(struct cell_context *cell);
36
37
38#endif /* CELL_VBUF_H */
diff --git a/src/gallium/drivers/cell/ppu/cell_vertex_fetch.c b/src/gallium/drivers/cell/ppu/cell_vertex_fetch.c
deleted file mode 100644
index 9cba537d9eb..00000000000
--- a/src/gallium/drivers/cell/ppu/cell_vertex_fetch.c
+++ /dev/null
@@ -1,346 +0,0 @@
1/*
2 * (C) Copyright IBM Corporation 2008
3 * All Rights Reserved.
4 *
5 * Permission is hereby granted, free of charge, to any person obtaining a
6 * copy of this software and associated documentation files (the "Software"),
7 * to deal in the Software without restriction, including without limitation
8 * on the rights to use, copy, modify, merge, publish, distribute, sub
9 * license, and/or sell copies of the Software, and to permit persons to whom
10 * the Software is furnished to do so, subject to the following conditions:
11 *
12 * The above copyright notice and this permission notice (including the next
13 * paragraph) shall be included in all copies or substantial portions of the
14 * Software.
15 *
16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18 * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
19 * AUTHORS, COPYRIGHT HOLDERS, AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
20 * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
21 * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
22 * USE OR OTHER DEALINGS IN THE SOFTWARE.
23 */
24
25#include <inttypes.h>
26#include "pipe/p_defines.h"
27#include "pipe/p_context.h"
28#include "pipe/p_format.h"
29
30#include "../auxiliary/draw/draw_context.h"
31#include "../auxiliary/draw/draw_private.h"
32
33#include "cell_context.h"
34#include "rtasm/rtasm_ppc_spe.h"
35
36
37/**
38 * Emit a 4x4 matrix transpose operation
39 *
40 * \param p Function that the transpose operation is to be appended to
41 * \param row0 Register containing row 0 of the source matrix
42 * \param row1 Register containing row 1 of the source matrix
43 * \param row2 Register containing row 2 of the source matrix
44 * \param row3 Register containing row 3 of the source matrix
45 * \param dest_ptr Register containing the address of the destination matrix
46 * \param shuf_ptr Register containing the address of the shuffled data
47 * \param count Number of colums to actually be written to the destination
48 *
49 * \note
50 * This function assumes that the registers named by \c row0, \c row1,
51 * \c row2, and \c row3 are scratch and can be modified by the generated code.
52 * Furthermore, these registers will be released, via calls to
53 * \c release_register, by this function.
54 *
55 * \note
56 * This function requires that four temporary are available on entry.
57 */
58static void
59emit_matrix_transpose(struct spe_function *p,
60 unsigned row0, unsigned row1, unsigned row2,
61 unsigned row3, unsigned dest_ptr,
62 unsigned shuf_ptr, unsigned count)
63{
64 int shuf_hi = spe_allocate_available_register(p);
65 int shuf_lo = spe_allocate_available_register(p);
66 int t1 = spe_allocate_available_register(p);
67 int t2 = spe_allocate_available_register(p);
68 int t3;
69 int t4;
70 int col0;
71 int col1;
72 int col2;
73 int col3;
74
75
76 spe_lqd(p, shuf_hi, shuf_ptr, 3*16);
77 spe_lqd(p, shuf_lo, shuf_ptr, 4*16);
78 spe_shufb(p, t1, row0, row2, shuf_hi);
79 spe_shufb(p, t2, row0, row2, shuf_lo);
80
81
82 /* row0 and row2 are now no longer needed. Re-use those registers as
83 * temporaries.
84 */
85 t3 = row0;
86 t4 = row2;
87
88 spe_shufb(p, t3, row1, row3, shuf_hi);
89 spe_shufb(p, t4, row1, row3, shuf_lo);
90
91
92 /* row1 and row3 are now no longer needed. Re-use those registers as
93 * temporaries.
94 */
95 col0 = row1;
96 col1 = row3;
97
98 spe_shufb(p, col0, t1, t3, shuf_hi);
99 if (count > 1) {
100 spe_shufb(p, col1, t1, t3, shuf_lo);
101 }
102
103 /* t1 and t3 are now no longer needed. Re-use those registers as
104 * temporaries.
105 */
106 col2 = t1;
107 col3 = t3;
108
109 if (count > 2) {
110 spe_shufb(p, col2, t2, t4, shuf_hi);
111 }
112
113 if (count > 3) {
114 spe_shufb(p, col3, t2, t4, shuf_lo);
115 }
116
117
118 /* Store the results. Remember that the stqd instruction is encoded using
119 * the qword offset (stand-alone assemblers to the byte-offset to
120 * qword-offset conversion for you), so the byte-offset needs be divided by
121 * 16.
122 */
123 switch (count) {
124 case 4:
125 spe_stqd(p, col3, dest_ptr, 3 * 16);
126 case 3:
127 spe_stqd(p, col2, dest_ptr, 2 * 16);
128 case 2:
129 spe_stqd(p, col1, dest_ptr, 1 * 16);
130 case 1:
131 spe_stqd(p, col0, dest_ptr, 0 * 16);
132 }
133
134
135 /* Release all of the temporary registers used.
136 */
137 spe_release_register(p, col0);
138 spe_release_register(p, col1);
139 spe_release_register(p, col2);
140 spe_release_register(p, col3);
141 spe_release_register(p, shuf_hi);
142 spe_release_register(p, shuf_lo);
143 spe_release_register(p, t2);
144 spe_release_register(p, t4);
145}
146
147
148#if 0
149/* This appears to not be used currently */
150static void
151emit_fetch(struct spe_function *p,
152 unsigned in_ptr, unsigned *offset,
153 unsigned out_ptr, unsigned shuf_ptr,
154 enum pipe_format format)
155{
156 const unsigned count = (pf_size_x(format) != 0) + (pf_size_y(format) != 0)
157 + (pf_size_z(format) != 0) + (pf_size_w(format) != 0);
158 const unsigned type = pf_type(format);
159 const unsigned bytes = pf_size_x(format);
160
161 int v0 = spe_allocate_available_register(p);
162 int v1 = spe_allocate_available_register(p);
163 int v2 = spe_allocate_available_register(p);
164 int v3 = spe_allocate_available_register(p);
165 int tmp = spe_allocate_available_register(p);
166 int float_zero = -1;
167 int float_one = -1;
168 float scale_signed = 0.0;
169 float scale_unsigned = 0.0;
170
171 spe_lqd(p, v0, in_ptr, (0 + offset[0]) * 16);
172 spe_lqd(p, v1, in_ptr, (1 + offset[0]) * 16);
173 spe_lqd(p, v2, in_ptr, (2 + offset[0]) * 16);
174 spe_lqd(p, v3, in_ptr, (3 + offset[0]) * 16);
175 offset[0] += 4;
176
177 switch (bytes) {
178 case 1:
179 scale_signed = 1.0f / 127.0f;
180 scale_unsigned = 1.0f / 255.0f;
181 spe_lqd(p, tmp, shuf_ptr, 1 * 16);
182 spe_shufb(p, v0, v0, v0, tmp);
183 spe_shufb(p, v1, v1, v1, tmp);
184 spe_shufb(p, v2, v2, v2, tmp);
185 spe_shufb(p, v3, v3, v3, tmp);
186 break;
187 case 2:
188 scale_signed = 1.0f / 32767.0f;
189 scale_unsigned = 1.0f / 65535.0f;
190 spe_lqd(p, tmp, shuf_ptr, 2 * 16);
191 spe_shufb(p, v0, v0, v0, tmp);
192 spe_shufb(p, v1, v1, v1, tmp);
193 spe_shufb(p, v2, v2, v2, tmp);
194 spe_shufb(p, v3, v3, v3, tmp);
195 break;
196 case 4:
197 scale_signed = 1.0f / 2147483647.0f;
198 scale_unsigned = 1.0f / 4294967295.0f;
199 break;
200 default:
201 assert(0);
202 break;
203 }
204
205 switch (type) {
206 case PIPE_FORMAT_TYPE_FLOAT:
207 break;
208 case PIPE_FORMAT_TYPE_UNORM:
209 spe_ilhu(p, tmp, ((unsigned) scale_unsigned) >> 16);
210 spe_iohl(p, tmp, ((unsigned) scale_unsigned) & 0x0ffff);
211 spe_cuflt(p, v0, v0, 0);
212 spe_fm(p, v0, v0, tmp);
213 break;
214 case PIPE_FORMAT_TYPE_SNORM:
215 spe_ilhu(p, tmp, ((unsigned) scale_signed) >> 16);
216 spe_iohl(p, tmp, ((unsigned) scale_signed) & 0x0ffff);
217 spe_csflt(p, v0, v0, 0);
218 spe_fm(p, v0, v0, tmp);
219 break;
220 case PIPE_FORMAT_TYPE_USCALED:
221 spe_cuflt(p, v0, v0, 0);
222 break;
223 case PIPE_FORMAT_TYPE_SSCALED:
224 spe_csflt(p, v0, v0, 0);
225 break;
226 }
227
228
229 if (count < 4) {
230 float_one = spe_allocate_available_register(p);
231 spe_il(p, float_one, 1);
232 spe_cuflt(p, float_one, float_one, 0);
233
234 if (count < 3) {
235 float_zero = spe_allocate_available_register(p);
236 spe_il(p, float_zero, 0);
237 }
238 }
239
240 spe_release_register(p, tmp);
241
242 emit_matrix_transpose(p, v0, v1, v2, v3, out_ptr, shuf_ptr, count);
243
244 switch (count) {
245 case 1:
246 spe_stqd(p, float_zero, out_ptr, 1 * 16);
247 case 2:
248 spe_stqd(p, float_zero, out_ptr, 2 * 16);
249 case 3:
250 spe_stqd(p, float_one, out_ptr, 3 * 16);
251 }
252
253 if (float_zero != -1) {
254 spe_release_register(p, float_zero);
255 }
256
257 if (float_one != -1) {
258 spe_release_register(p, float_one);
259 }
260}
261#endif
262
263
264void cell_update_vertex_fetch(struct draw_context *draw)
265{
266#if 0
267 struct cell_context *const cell =
268 (struct cell_context *) draw->driver_private;
269 struct spe_function *p = &cell->attrib_fetch;
270 unsigned function_index[PIPE_MAX_ATTRIBS];
271 unsigned unique_attr_formats;
272 int out_ptr;
273 int in_ptr;
274 int shuf_ptr;
275 unsigned i;
276 unsigned j;
277
278
279 /* Determine how many unique input attribute formats there are. At the
280 * same time, store the index of the lowest numbered attribute that has
281 * the same format as any non-unique format.
282 */
283 unique_attr_formats = 1;
284 function_index[0] = 0;
285 for (i = 1; i < draw->vertex_fetch.nr_attrs; i++) {
286 const enum pipe_format curr_fmt = draw->vertex_element[i].src_format;
287
288 for (j = 0; j < i; j++) {
289 if (curr_fmt == draw->vertex_element[j].src_format) {
290 break;
291 }
292 }
293
294 if (j == i) {
295 unique_attr_formats++;
296 }
297
298 function_index[i] = j;
299 }
300
301
302 /* Each fetch function can be a maximum of 34 instructions (note: this is
303 * actually a slight over-estimate).
304 */
305 spe_init_func(p, 34 * SPE_INST_SIZE * unique_attr_formats);
306
307
308 /* Allocate registers for the function's input parameters.
309 */
310 out_ptr = spe_allocate_register(p, 3);
311 in_ptr = spe_allocate_register(p, 4);
312 shuf_ptr = spe_allocate_register(p, 5);
313
314
315 /* Generate code for the individual attribute fetch functions.
316 */
317 for (i = 0; i < draw->vertex_fetch.nr_attrs; i++) {
318 unsigned offset;
319
320 if (function_index[i] == i) {
321 cell->attrib_fetch_offsets[i] = (unsigned) ((void *) p->csr
322 - (void *) p->store);
323
324 offset = 0;
325 emit_fetch(p, in_ptr, &offset, out_ptr, shuf_ptr,
326 draw->vertex_element[i].src_format);
327 spe_bi(p, 0, 0, 0);
328
329 /* Round up to the next 16-byte boundary.
330 */
331 if ((((unsigned) p->store) & 0x0f) != 0) {
332 const unsigned align = ((unsigned) p->store) & 0x0f;
333 p->store = (uint32_t *) (((void *) p->store) + align);
334 }
335 } else {
336 /* Use the same function entry-point as a previously seen attribute
337 * with the same format.
338 */
339 cell->attrib_fetch_offsets[i] =
340 cell->attrib_fetch_offsets[function_index[i]];
341 }
342 }
343#else
344 assert(0);
345#endif
346}
diff --git a/src/gallium/drivers/cell/ppu/cell_vertex_shader.c b/src/gallium/drivers/cell/ppu/cell_vertex_shader.c
deleted file mode 100644
index 3d389d6ea36..00000000000
--- a/src/gallium/drivers/cell/ppu/cell_vertex_shader.c
+++ /dev/null
@@ -1,145 +0,0 @@
1/*
2 * (C) Copyright IBM Corporation 2008
3 * All Rights Reserved.
4 *
5 * Permission is hereby granted, free of charge, to any person obtaining a
6 * copy of this software and associated documentation files (the "Software"),
7 * to deal in the Software without restriction, including without limitation
8 * on the rights to use, copy, modify, merge, publish, distribute, sub
9 * license, and/or sell copies of the Software, and to permit persons to whom
10 * the Software is furnished to do so, subject to the following conditions:
11 *
12 * The above copyright notice and this permission notice (including the next
13 * paragraph) shall be included in all copies or substantial portions of the
14 * Software.
15 *
16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18 * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
19 * AUTHORS, COPYRIGHT HOLDERS, AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
20 * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
21 * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
22 * USE OR OTHER DEALINGS IN THE SOFTWARE.
23 */
24
25/**
26 * \file cell_vertex_shader.c
27 * Vertex shader interface routines for Cell.
28 *
29 * \author Ian Romanick <idr@us.ibm.com>
30 */
31
32#include "pipe/p_defines.h"
33#include "pipe/p_context.h"
34#include "util/u_math.h"
35
36#include "cell_context.h"
37#include "cell_draw_arrays.h"
38#include "cell_flush.h"
39#include "cell_spu.h"
40#include "cell_batch.h"
41
42#include "cell/common.h"
43#include "draw/draw_context.h"
44#include "draw/draw_private.h"
45
46/**
47 * Run the vertex shader on all vertices in the vertex queue.
48 * Called by the draw module when the vertx cache needs to be flushed.
49 */
50void
51cell_vertex_shader_queue_flush(struct draw_context *draw)
52{
53#if 0
54 struct cell_context *const cell =
55 (struct cell_context *) draw->driver_private;
56 struct cell_command_vs *const vs = &cell_global.command[0].vs;
57 uint64_t *batch;
58 struct cell_array_info *array_info;
59 unsigned i, j;
60 struct cell_attribute_fetch_code *cf;
61
62 assert(draw->vs.queue_nr != 0);
63
64 /* XXX: do this on statechange:
65 */
66 draw_update_vertex_fetch(draw);
67 cell_update_vertex_fetch(draw);
68
69
70 batch = cell_batch_alloc(cell, sizeof(batch[0]) + sizeof(*cf));
71 batch[0] = CELL_CMD_STATE_ATTRIB_FETCH;
72 cf = (struct cell_attribute_fetch_code *) (&batch[1]);
73 cf->base = (uint64_t) cell->attrib_fetch.store;
74 cf->size = ROUNDUP16((unsigned)((void *) cell->attrib_fetch.csr
75 - (void *) cell->attrib_fetch.store));
76
77
78 for (i = 0; i < draw->vertex_fetch.nr_attrs; i++) {
79 const enum pipe_format format = draw->vertex_element[i].src_format;
80 const unsigned count = ((pf_size_x(format) != 0)
81 + (pf_size_y(format) != 0)
82 + (pf_size_z(format) != 0)
83 + (pf_size_w(format) != 0));
84 const unsigned size = pf_size_x(format) * count;
85
86 batch = cell_batch_alloc(cell, sizeof(batch[0]) + sizeof(*array_info));
87
88 batch[0] = CELL_CMD_STATE_VS_ARRAY_INFO;
89
90 array_info = (struct cell_array_info *) &batch[1];
91 assert(draw->vertex_fetch.src_ptr[i] != NULL);
92 array_info->base = (uintptr_t) draw->vertex_fetch.src_ptr[i];
93 array_info->attr = i;
94 array_info->pitch = draw->vertex_fetch.pitch[i];
95 array_info->size = size;
96 array_info->function_offset = cell->attrib_fetch_offsets[i];
97 }
98
99 batch = cell_batch_alloc(cell, sizeof(batch[0])
100 + sizeof(struct pipe_viewport_state));
101 batch[0] = CELL_CMD_STATE_VIEWPORT;
102 (void) memcpy(&batch[1], &draw->viewport,
103 sizeof(struct pipe_viewport_state));
104
105 {
106 uint64_t uniforms = (uintptr_t) draw->user.constants;
107
108 batch = cell_batch_alloc(cell, 2 *sizeof(batch[0]));
109 batch[0] = CELL_CMD_STATE_UNIFORMS;
110 batch[1] = uniforms;
111 }
112
113 cell_batch_flush(cell);
114
115 vs->opcode = CELL_CMD_VS_EXECUTE;
116 vs->nr_attrs = draw->vertex_fetch.nr_attrs;
117
118 (void) memcpy(vs->plane, draw->plane, sizeof(draw->plane));
119 vs->nr_planes = draw->nr_planes;
120
121 for (i = 0; i < draw->vs.queue_nr; i += SPU_VERTS_PER_BATCH) {
122 const unsigned n = MIN2(SPU_VERTS_PER_BATCH, draw->vs.queue_nr - i);
123
124 for (j = 0; j < n; j++) {
125 vs->elts[j] = draw->vs.queue[i + j].elt;
126 vs->vOut[j] = (uintptr_t) draw->vs.queue[i + j].vertex;
127 }
128
129 for (/* empty */; j < SPU_VERTS_PER_BATCH; j++) {
130 vs->elts[j] = vs->elts[0];
131 vs->vOut[j] = (uintptr_t) draw->vs.queue[i + j].vertex;
132 }
133
134 vs->num_elts = n;
135 send_mbox_message(cell_global.spe_contexts[0], CELL_CMD_VS_EXECUTE);
136
137 cell_flush_int(cell, CELL_FLUSH_WAIT);
138 }
139
140 draw->vs.post_nr = draw->vs.queue_nr;
141 draw->vs.queue_nr = 0;
142#else
143 assert(0);
144#endif
145}
diff --git a/src/gallium/drivers/cell/spu/.gitignore b/src/gallium/drivers/cell/spu/.gitignore
deleted file mode 100644
index 2be9a2d3242..00000000000
--- a/src/gallium/drivers/cell/spu/.gitignore
+++ /dev/null
@@ -1 +0,0 @@
1g3d_spu
diff --git a/src/gallium/drivers/cell/spu/Makefile b/src/gallium/drivers/cell/spu/Makefile
deleted file mode 100644
index 3cc52301da2..00000000000
--- a/src/gallium/drivers/cell/spu/Makefile
+++ /dev/null
@@ -1,83 +0,0 @@
1# Gallium3D Cell driver: SPU code
2
3# This makefile builds the g3d_spu.a file that's linked into the
4# PPU code/library.
5
6
7TOP = ../../../../..
8include $(TOP)/configs/current
9
10
11PROG = g3d
12
13PROG_SPU = $(PROG)_spu
14PROG_SPU_A = $(PROG)_spu.a
15PROG_SPU_EMBED_O = $(PROG)_spu-embed.o
16
17
18SOURCES = \
19 spu_command.c \
20 spu_dcache.c \
21 spu_funcs.c \
22 spu_main.c \
23 spu_per_fragment_op.c \
24 spu_render.c \
25 spu_texture.c \
26 spu_tile.c \
27 spu_tri.c
28
29OLD_SOURCES = \
30 spu_exec.c \
31 spu_util.c \
32 spu_vertex_fetch.c \
33 spu_vertex_shader.c
34
35
36SPU_OBJECTS = $(SOURCES:.c=.o)
37
38SPU_ASM_OUT = $(SOURCES:.c=.s)
39
40
41INCLUDE_DIRS = \
42 -I$(TOP)/src/mesa \
43 -I$(TOP)/src/gallium/include \
44 -I$(TOP)/src/gallium/auxiliary \
45 -I$(TOP)/src/gallium/drivers
46
47
48.c.o:
49 $(SPU_CC) $(SPU_CFLAGS) -c $<
50
51.c.s:
52 $(SPU_CC) $(SPU_CFLAGS) -O3 -S $<
53
54
55# The .a file will be linked into the main/PPU executable
56default: $(PROG_SPU_A)
57
58$(PROG_SPU_A): $(PROG_SPU_EMBED_O)
59 $(SPU_AR) $(SPU_AR_FLAGS) $(PROG_SPU_A) $(PROG_SPU_EMBED_O)
60
61$(PROG_SPU_EMBED_O): $(PROG_SPU)
62 $(SPU_EMBED) $(SPU_EMBED_FLAGS) $(PROG_SPU) $(PROG_SPU) $(PROG_SPU_EMBED_O)
63
64$(PROG_SPU): $(SPU_OBJECTS)
65 $(SPU_CC) -o $(PROG_SPU) $(SPU_OBJECTS) $(SPU_LFLAGS)
66
67
68
69asmfiles: $(SPU_ASM_OUT)
70
71
72clean:
73 rm -f *~ *.o *.a *.d *.s $(PROG_SPU)
74
75
76
77depend: $(SOURCES)
78 rm -f depend
79 touch depend
80 $(MKDEP) $(MKDEP_OPTIONS) $(INCLUDE_DIRS) $(SOURCES) 2> /dev/null
81
82include depend
83
diff --git a/src/gallium/drivers/cell/spu/spu_colorpack.h b/src/gallium/drivers/cell/spu/spu_colorpack.h
deleted file mode 100644
index d7ce0055248..00000000000
--- a/src/gallium/drivers/cell/spu/spu_colorpack.h
+++ /dev/null
@@ -1,145 +0,0 @@
1/**************************************************************************
2 *
3 * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas.
4 * All Rights Reserved.
5 *
6 * Permission is hereby granted, free of charge, to any person obtaining a
7 * copy of this software and associated documentation files (the
8 * "Software"), to deal in the Software without restriction, including
9 * without limitation the rights to use, copy, modify, merge, publish,
10 * distribute, sub license, and/or sell copies of the Software, and to
11 * permit persons to whom the Software is furnished to do so, subject to
12 * the following conditions:
13 *
14 * The above copyright notice and this permission notice (including the
15 * next paragraph) shall be included in all copies or substantial portions
16 * of the Software.
17 *
18 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
19 * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
20 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
21 * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
22 * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
23 * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
24 * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
25 *
26 **************************************************************************/
27
28
29
30#ifndef SPU_COLORPACK_H
31#define SPU_COLORPACK_H
32
33
34#include <transpose_matrix4x4.h>
35#include <spu_intrinsics.h>
36
37
38static INLINE unsigned int
39spu_pack_R8G8B8A8(vector float rgba)
40{
41 vector unsigned int out = spu_convtu(rgba, 32);
42
43 out = spu_shuffle(out, out, ((vector unsigned char) {
44 0, 4, 8, 12, 0, 0, 0, 0,
45 0, 0, 0, 0, 0, 0, 0, 0 }) );
46
47 return spu_extract(out, 0);
48}
49
50
51static INLINE unsigned int
52spu_pack_A8R8G8B8(vector float rgba)
53{
54 vector unsigned int out = spu_convtu(rgba, 32);
55 out = spu_shuffle(out, out, ((vector unsigned char) {
56 12, 0, 4, 8, 0, 0, 0, 0,
57 0, 0, 0, 0, 0, 0, 0, 0}) );
58 return spu_extract(out, 0);
59}
60
61
62static INLINE unsigned int
63spu_pack_B8G8R8A8(vector float rgba)
64{
65 vector unsigned int out = spu_convtu(rgba, 32);
66 out = spu_shuffle(out, out, ((vector unsigned char) {
67 8, 4, 0, 12, 0, 0, 0, 0,
68 0, 0, 0, 0, 0, 0, 0, 0}) );
69 return spu_extract(out, 0);
70}
71
72
73static INLINE unsigned int
74spu_pack_color_shuffle(vector float rgba, vector unsigned char shuffle)
75{
76 vector unsigned int out = spu_convtu(rgba, 32);
77 out = spu_shuffle(out, out, shuffle);
78 return spu_extract(out, 0);
79}
80
81
82static INLINE vector float
83spu_unpack_B8G8R8A8(uint color)
84{
85 vector unsigned int color_u4 = spu_splats(color);
86 color_u4 = spu_shuffle(color_u4, color_u4,
87 ((vector unsigned char) {
88 2, 2, 2, 2,
89 1, 1, 1, 1,
90 0, 0, 0, 0,
91 3, 3, 3, 3}) );
92 return spu_convtf(color_u4, 32);
93}
94
95
96static INLINE vector float
97spu_unpack_A8R8G8B8(uint color)
98{
99 vector unsigned int color_u4 = spu_splats(color);
100 color_u4 = spu_shuffle(color_u4, color_u4,
101 ((vector unsigned char) {
102 1, 1, 1, 1,
103 2, 2, 2, 2,
104 3, 3, 3, 3,
105 0, 0, 0, 0}) );
106 return spu_convtf(color_u4, 32);
107}
108
109
110/**
111 * \param color_in - array of 32-bit packed ARGB colors
112 * \param color_out - returns float colors in RRRR, GGGG, BBBB, AAAA order
113 */
114static INLINE void
115spu_unpack_A8R8G8B8_transpose4(const vector unsigned int color_in[4],
116 vector float color_out[4])
117{
118 vector unsigned int c0;
119
120 c0 = spu_shuffle(color_in[0], color_in[0],
121 ((vector unsigned char) {
122 1, 1, 1, 1, 2, 2, 2, 2, 3, 3, 3, 3, 0, 0, 0, 0}) );
123 color_out[0] = spu_convtf(c0, 32);
124
125 c0 = spu_shuffle(color_in[1], color_in[1],
126 ((vector unsigned char) {
127 1, 1, 1, 1, 2, 2, 2, 2, 3, 3, 3, 3, 0, 0, 0, 0}) );
128 color_out[1] = spu_convtf(c0, 32);
129
130 c0 = spu_shuffle(color_in[2], color_in[2],
131 ((vector unsigned char) {
132 1, 1, 1, 1, 2, 2, 2, 2, 3, 3, 3, 3, 0, 0, 0, 0}) );
133 color_out[2] = spu_convtf(c0, 32);
134
135 c0 = spu_shuffle(color_in[3], color_in[3],
136 ((vector unsigned char) {
137 1, 1, 1, 1, 2, 2, 2, 2, 3, 3, 3, 3, 0, 0, 0, 0}) );
138 color_out[3] = spu_convtf(c0, 32);
139
140 _transpose_matrix4x4(color_out, color_out);
141}
142
143
144
145#endif /* SPU_COLORPACK_H */
diff --git a/src/gallium/drivers/cell/spu/spu_command.c b/src/gallium/drivers/cell/spu/spu_command.c
deleted file mode 100644
index 6f8ba9562d2..00000000000
--- a/src/gallium/drivers/cell/spu/spu_command.c
+++ /dev/null
@@ -1,810 +0,0 @@
1/**************************************************************************
2 *
3 * Copyright 2008 Tungsten Graphics, Inc., Cedar Park, Texas.
4 * All Rights Reserved.
5 *
6 * Permission is hereby granted, free of charge, to any person obtaining a
7 * copy of this software and associated documentation files (the
8 * "Software"), to deal in the Software without restriction, including
9 * without limitation the rights to use, copy, modify, merge, publish,
10 * distribute, sub license, and/or sell copies of the Software, and to
11 * permit persons to whom the Software is furnished to do so, subject to
12 * the following conditions:
13 *
14 * The above copyright notice and this permission notice (including the
15 * next paragraph) shall be included in all copies or substantial portions
16 * of the Software.
17 *
18 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
19 * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
20 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
21 * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
22 * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
23 * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
24 * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
25 *
26 **************************************************************************/
27
28
29/**
30 * SPU command processing code
31 */
32
33
34#include <stdio.h>
35#include <libmisc.h>
36
37#include "pipe/p_defines.h"
38
39#include "spu_command.h"
40#include "spu_main.h"
41#include "spu_render.h"
42#include "spu_per_fragment_op.h"
43#include "spu_texture.h"
44#include "spu_tile.h"
45#include "spu_vertex_shader.h"
46#include "spu_dcache.h"
47#include "cell/common.h"
48
49
50struct spu_vs_context draw;
51
52
53/**
54 * Buffers containing dynamically generated SPU code:
55 */
56PIPE_ALIGN_VAR(16) static unsigned char attribute_fetch_code_buffer[136 * PIPE_MAX_ATTRIBS];
57
58
59
60static INLINE int
61align(int value, int alignment)
62{
63 return (value + alignment - 1) & ~(alignment - 1);
64}
65
66
67
68/**
69 * Tell the PPU that this SPU has finished copying a buffer to
70 * local store and that it may be reused by the PPU.
71 * This is done by writting a 16-byte batch-buffer-status block back into
72 * main memory (in cell_context->buffer_status[]).
73 */
74static void
75release_buffer(uint buffer)
76{
77 /* Evidently, using less than a 16-byte status doesn't work reliably */
78 static const vector unsigned int status = {CELL_BUFFER_STATUS_FREE,
79 CELL_BUFFER_STATUS_FREE,
80 CELL_BUFFER_STATUS_FREE,
81 CELL_BUFFER_STATUS_FREE};
82 const uint index = 4 * (spu.init.id * CELL_NUM_BUFFERS + buffer);
83 uint *dst = spu.init.buffer_status + index;
84
85 ASSERT(buffer < CELL_NUM_BUFFERS);
86
87 mfc_put((void *) &status, /* src in local memory */
88 (unsigned int) dst, /* dst in main memory */
89 sizeof(status), /* size */
90 TAG_MISC, /* tag is unimportant */
91 0, /* tid */
92 0 /* rid */);
93}
94
95
96/**
97 * Write CELL_FENCE_SIGNALLED back to the fence status qword in main memory.
98 * There's a qword of status per SPU.
99 */
100static void
101cmd_fence(struct cell_command_fence *fence_cmd)
102{
103 static const vector unsigned int status = {CELL_FENCE_SIGNALLED,
104 CELL_FENCE_SIGNALLED,
105 CELL_FENCE_SIGNALLED,
106 CELL_FENCE_SIGNALLED};
107 uint *dst = (uint *) fence_cmd->fence;
108 dst += 4 * spu.init.id; /* main store/memory address, not local store */
109 ASSERT_ALIGN16(dst);
110 mfc_put((void *) &status, /* src in local memory */
111 (unsigned int) dst, /* dst in main memory */
112 sizeof(status), /* size */
113 TAG_FENCE, /* tag */
114 0, /* tid */
115 0 /* rid */);
116}
117
118
119static void
120cmd_clear_surface(const struct cell_command_clear_surface *clear)
121{
122 D_PRINTF(CELL_DEBUG_CMD, "CLEAR SURF %u to 0x%08x\n", clear->surface, clear->value);
123
124 if (clear->surface == 0) {
125 spu.fb.color_clear_value = clear->value;
126 if (spu.init.debug_flags & CELL_DEBUG_CHECKER) {
127 uint x = (spu.init.id << 4) | (spu.init.id << 12) |
128 (spu.init.id << 20) | (spu.init.id << 28);
129 spu.fb.color_clear_value ^= x;
130 }
131 }
132 else {
133 spu.fb.depth_clear_value = clear->value;
134 }
135
136#define CLEAR_OPT 1
137#if CLEAR_OPT
138
139 /* Simply set all tiles' status to CLEAR.
140 * When we actually begin rendering into a tile, we'll initialize it to
141 * the clear value. If any tiles go untouched during the frame,
142 * really_clear_tiles() will set them to the clear value.
143 */
144 if (clear->surface == 0) {
145 memset(spu.ctile_status, TILE_STATUS_CLEAR, sizeof(spu.ctile_status));
146 }
147 else {
148 memset(spu.ztile_status, TILE_STATUS_CLEAR, sizeof(spu.ztile_status));
149 }
150
151#else
152
153 /*
154 * This path clears the whole framebuffer to the clear color right now.
155 */
156
157 /*
158 printf("SPU: %s num=%d w=%d h=%d\n",
159 __FUNCTION__, num_tiles, spu.fb.width_tiles, spu.fb.height_tiles);
160 */
161
162 /* init a single tile to the clear value */
163 if (clear->surface == 0) {
164 clear_c_tile(&spu.ctile);
165 }
166 else {
167 clear_z_tile(&spu.ztile);
168 }
169
170 /* walk over my tiles, writing the 'clear' tile's data */
171 {
172 const uint num_tiles = spu.fb.width_tiles * spu.fb.height_tiles;
173 uint i;
174 for (i = spu.init.id; i < num_tiles; i += spu.init.num_spus) {
175 uint tx = i % spu.fb.width_tiles;
176 uint ty = i / spu.fb.width_tiles;
177 if (clear->surface == 0)
178 put_tile(tx, ty, &spu.ctile, TAG_SURFACE_CLEAR, 0);
179 else
180 put_tile(tx, ty, &spu.ztile, TAG_SURFACE_CLEAR, 1);
181 }
182 }
183
184 if (spu.init.debug_flags & CELL_DEBUG_SYNC) {
185 wait_on_mask(1 << TAG_SURFACE_CLEAR);
186 }
187
188#endif /* CLEAR_OPT */
189
190 D_PRINTF(CELL_DEBUG_CMD, "CLEAR SURF done\n");
191}
192
193
194static void
195cmd_release_verts(const struct cell_command_release_verts *release)
196{
197 D_PRINTF(CELL_DEBUG_CMD, "RELEASE VERTS %u\n", release->vertex_buf);
198 ASSERT(release->vertex_buf != ~0U);
199 release_buffer(release->vertex_buf);
200}
201
202
203/**
204 * Process a CELL_CMD_STATE_FRAGMENT_OPS command.
205 * This involves installing new fragment ops SPU code.
206 * If this function is never called, we'll use a regular C fallback function
207 * for fragment processing.
208 */
209static void
210cmd_state_fragment_ops(const struct cell_command_fragment_ops *fops)
211{
212 D_PRINTF(CELL_DEBUG_CMD, "CMD_STATE_FRAGMENT_OPS\n");
213
214 /* Copy state info (for fallback case only - this will eventually
215 * go away when the fallback case goes away)
216 */
217 memcpy(&spu.depth_stencil_alpha, &fops->dsa, sizeof(fops->dsa));
218 memcpy(&spu.blend, &fops->blend, sizeof(fops->blend));
219 memcpy(&spu.blend_color, &fops->blend_color, sizeof(fops->blend_color));
220
221 /* Make sure the SPU knows which buffers it's expected to read when
222 * it's told to pull tiles.
223 */
224 spu.read_depth_stencil = (spu.depth_stencil_alpha.depth.enabled || spu.depth_stencil_alpha.stencil[0].enabled);
225
226 /* If we're forcing the fallback code to be used (for debug purposes),
227 * install that. Otherwise install the incoming SPU code.
228 */
229 if ((spu.init.debug_flags & CELL_DEBUG_FRAGMENT_OP_FALLBACK) != 0) {
230 static unsigned int warned = 0;
231 if (!warned) {
232 fprintf(stderr, "Cell Warning: using fallback per-fragment code\n");
233 warned = 1;
234 }
235 /* The following two lines aren't really necessary if you
236 * know the debug flags won't change during a run, and if you
237 * know that the function pointers are initialized correctly.
238 * We set them here to allow a person to change the debug
239 * flags during a run (from inside a debugger).
240 */
241 spu.fragment_ops[CELL_FACING_FRONT] = spu_fallback_fragment_ops;
242 spu.fragment_ops[CELL_FACING_BACK] = spu_fallback_fragment_ops;
243 return;
244 }
245
246 /* Make sure the SPU code buffer is large enough to hold the incoming code.
247 * Note that we *don't* use align_malloc() and align_free(), because
248 * those utility functions are *not* available in SPU code.
249 * */
250 if (spu.fragment_ops_code_size < fops->total_code_size) {
251 if (spu.fragment_ops_code != NULL) {
252 free(spu.fragment_ops_code);
253 }
254 spu.fragment_ops_code_size = fops->total_code_size;
255 spu.fragment_ops_code = malloc(fops->total_code_size);
256 if (spu.fragment_ops_code == NULL) {
257 /* Whoops. */
258 fprintf(stderr, "CELL Warning: failed to allocate fragment ops code (%d bytes) - using fallback\n", fops->total_code_size);
259 spu.fragment_ops_code = NULL;
260 spu.fragment_ops_code_size = 0;
261 spu.fragment_ops[CELL_FACING_FRONT] = spu_fallback_fragment_ops;
262 spu.fragment_ops[CELL_FACING_BACK] = spu_fallback_fragment_ops;
263 return;
264 }
265 }
266
267 /* Copy the SPU code from the command buffer to the spu buffer */
268 memcpy(spu.fragment_ops_code, fops->code, fops->total_code_size);
269
270 /* Set the pointers for the front-facing and back-facing fragments
271 * to the specified offsets within the code. Note that if the
272 * front-facing and back-facing code are the same, they'll have
273 * the same offset.
274 */
275 spu.fragment_ops[CELL_FACING_FRONT] = (spu_fragment_ops_func) &spu.fragment_ops_code[fops->front_code_index];
276 spu.fragment_ops[CELL_FACING_BACK] = (spu_fragment_ops_func) &spu.fragment_ops_code[fops->back_code_index];
277}
278
279static void
280cmd_state_fragment_program(const struct cell_command_fragment_program *fp)
281{
282 D_PRINTF(CELL_DEBUG_CMD, "CMD_STATE_FRAGMENT_PROGRAM\n");
283 /* Copy SPU code from batch buffer to spu buffer */
284 memcpy(spu.fragment_program_code, fp->code,
285 SPU_MAX_FRAGMENT_PROGRAM_INSTS * 4);
286#if 01
287 /* Point function pointer at new code */
288 spu.fragment_program = (spu_fragment_program_func)spu.fragment_program_code;
289#endif
290}
291
292
293static uint
294cmd_state_fs_constants(const qword *buffer, uint pos)
295{
296 const uint num_const = spu_extract((vector unsigned int)buffer[pos+1], 0);
297 const float *constants = (const float *) &buffer[pos+2];
298 uint i;
299
300 D_PRINTF(CELL_DEBUG_CMD, "CMD_STATE_FS_CONSTANTS (%u)\n", num_const);
301
302 /* Expand each float to float[4] for SOA execution */
303 for (i = 0; i < num_const; i++) {
304 D_PRINTF(CELL_DEBUG_CMD, " const[%u] = %f\n", i, constants[i]);
305 spu.constants[i] = spu_splats(constants[i]);
306 }
307
308 /* return new buffer pos (in 16-byte words) */
309 return pos + 2 + (ROUNDUP16(num_const * sizeof(float)) / 16);
310}
311
312
313static void
314cmd_state_framebuffer(const struct cell_command_framebuffer *cmd)
315{
316 D_PRINTF(CELL_DEBUG_CMD, "FRAMEBUFFER: %d x %d at %p, cformat 0x%x zformat 0x%x\n",
317 cmd->width,
318 cmd->height,
319 cmd->color_start,
320 cmd->color_format,
321 cmd->depth_format);
322
323 ASSERT_ALIGN16(cmd->color_start);
324 ASSERT_ALIGN16(cmd->depth_start);
325
326 spu.fb.color_start = cmd->color_start;
327 spu.fb.depth_start = cmd->depth_start;
328 spu.fb.color_format = cmd->color_format;
329 spu.fb.depth_format = cmd->depth_format;
330 spu.fb.width = cmd->width;
331 spu.fb.height = cmd->height;
332 spu.fb.width_tiles = (spu.fb.width + TILE_SIZE - 1) / TILE_SIZE;
333 spu.fb.height_tiles = (spu.fb.height + TILE_SIZE - 1) / TILE_SIZE;
334
335 switch (spu.fb.depth_format) {
336 case PIPE_FORMAT_Z32_UNORM:
337 spu.fb.zsize = 4;
338 spu.fb.zscale = (float) 0xffffffffu;
339 break;
340 case PIPE_FORMAT_S8_UINT_Z24_UNORM:
341 case PIPE_FORMAT_Z24_UNORM_S8_UINT:
342 case PIPE_FORMAT_X8Z24_UNORM:
343 case PIPE_FORMAT_Z24X8_UNORM:
344 spu.fb.zsize = 4;
345 spu.fb.zscale = (float) 0x00ffffffu;
346 break;
347 case PIPE_FORMAT_Z16_UNORM:
348 spu.fb.zsize = 2;
349 spu.fb.zscale = (float) 0xffffu;
350 break;
351 default:
352 spu.fb.zsize = 0;
353 break;
354 }
355}
356
357
358/**
359 * Tex texture mask_s/t and scale_s/t fields depend on the texture size and
360 * sampler wrap modes.
361 */
362static void
363update_tex_masks(struct spu_texture *texture,
364 const struct pipe_sampler_state *sampler)
365{
366 uint i;
367
368 for (i = 0; i < CELL_MAX_TEXTURE_LEVELS; i++) {
369 int width = texture->level[i].width;
370 int height = texture->level[i].height;
371
372 if (sampler->wrap_s == PIPE_TEX_WRAP_REPEAT)
373 texture->level[i].mask_s = spu_splats(width - 1);
374 else
375 texture->level[i].mask_s = spu_splats(~0);
376
377 if (sampler->wrap_t == PIPE_TEX_WRAP_REPEAT)
378 texture->level[i].mask_t = spu_splats(height - 1);
379 else
380 texture->level[i].mask_t = spu_splats(~0);
381
382 if (sampler->normalized_coords) {
383 texture->level[i].scale_s = spu_splats((float) width);
384 texture->level[i].scale_t = spu_splats((float) height);
385 }
386 else {
387 texture->level[i].scale_s = spu_splats(1.0f);
388 texture->level[i].scale_t = spu_splats(1.0f);
389 }
390 }
391}
392
393
394static void
395cmd_state_sampler(const struct cell_command_sampler *sampler)
396{
397 uint unit = sampler->unit;
398
399 D_PRINTF(CELL_DEBUG_CMD, "SAMPLER [%u]\n", unit);
400
401 spu.sampler[unit] = sampler->state;
402
403 switch (spu.sampler[unit].min_img_filter) {
404 case PIPE_TEX_FILTER_LINEAR:
405 spu.min_sample_texture_2d[unit] = sample_texture_2d_bilinear;
406 break;
407 case PIPE_TEX_FILTER_NEAREST:
408 spu.min_sample_texture_2d[unit] = sample_texture_2d_nearest;
409 break;
410 default:
411 ASSERT(0);
412 }
413
414 switch (spu.sampler[sampler->unit].mag_img_filter) {
415 case PIPE_TEX_FILTER_LINEAR:
416 spu.mag_sample_texture_2d[unit] = sample_texture_2d_bilinear;
417 break;
418 case PIPE_TEX_FILTER_NEAREST:
419 spu.mag_sample_texture_2d[unit] = sample_texture_2d_nearest;
420 break;
421 default:
422 ASSERT(0);
423 }
424
425 switch (spu.sampler[sampler->unit].min_mip_filter) {
426 case PIPE_TEX_MIPFILTER_NEAREST:
427 case PIPE_TEX_MIPFILTER_LINEAR:
428 spu.sample_texture_2d[unit] = sample_texture_2d_lod;
429 break;
430 case PIPE_TEX_MIPFILTER_NONE:
431 spu.sample_texture_2d[unit] = spu.mag_sample_texture_2d[unit];
432 break;
433 default:
434 ASSERT(0);
435 }
436
437 update_tex_masks(&spu.texture[unit], &spu.sampler[unit]);
438}
439
440
441static void
442cmd_state_texture(const struct cell_command_texture *texture)
443{
444 const uint unit = texture->unit;
445 uint i;
446
447 D_PRINTF(CELL_DEBUG_CMD, "TEXTURE [%u]\n", texture->unit);
448
449 spu.texture[unit].max_level = 0;
450 spu.texture[unit].target = texture->target;
451
452 for (i = 0; i < CELL_MAX_TEXTURE_LEVELS; i++) {
453 uint width = texture->width[i];
454 uint height = texture->height[i];
455 uint depth = texture->depth[i];
456
457 D_PRINTF(CELL_DEBUG_CMD, " LEVEL %u: at %p size[0] %u x %u\n", i,
458 texture->start[i], texture->width[i], texture->height[i]);
459
460 spu.texture[unit].level[i].start = texture->start[i];
461 spu.texture[unit].level[i].width = width;
462 spu.texture[unit].level[i].height = height;
463 spu.texture[unit].level[i].depth = depth;
464
465 spu.texture[unit].level[i].tiles_per_row =
466 (width + TILE_SIZE - 1) / TILE_SIZE;
467
468 spu.texture[unit].level[i].bytes_per_image =
469 4 * align(width, TILE_SIZE) * align(height, TILE_SIZE) * depth;
470
471 spu.texture[unit].level[i].max_s = spu_splats((int) width - 1);
472 spu.texture[unit].level[i].max_t = spu_splats((int) height - 1);
473
474 if (texture->start[i])
475 spu.texture[unit].max_level = i;
476 }
477
478 update_tex_masks(&spu.texture[unit], &spu.sampler[unit]);
479}
480
481
482static void
483cmd_state_vertex_info(const struct vertex_info *vinfo)
484{
485 D_PRINTF(CELL_DEBUG_CMD, "VERTEX_INFO num_attribs=%u\n", vinfo->num_attribs);
486 ASSERT(vinfo->num_attribs >= 1);
487 ASSERT(vinfo->num_attribs <= 8);
488 memcpy(&spu.vertex_info, vinfo, sizeof(*vinfo));
489}
490
491
492static void
493cmd_state_vs_array_info(const struct cell_array_info *vs_info)
494{
495 const unsigned attr = vs_info->attr;
496
497 ASSERT(attr < PIPE_MAX_ATTRIBS);
498 draw.vertex_fetch.src_ptr[attr] = vs_info->base;
499 draw.vertex_fetch.pitch[attr] = vs_info->pitch;
500 draw.vertex_fetch.size[attr] = vs_info->size;
501 draw.vertex_fetch.code_offset[attr] = vs_info->function_offset;
502 draw.vertex_fetch.dirty = 1;
503}
504
505
506static void
507cmd_state_attrib_fetch(const struct cell_attribute_fetch_code *code)
508{
509 mfc_get(attribute_fetch_code_buffer,
510 (unsigned int) code->base, /* src */
511 code->size,
512 TAG_BATCH_BUFFER,
513 0, /* tid */
514 0 /* rid */);
515 wait_on_mask(1 << TAG_BATCH_BUFFER);
516
517 draw.vertex_fetch.code = attribute_fetch_code_buffer;
518}
519
520
521static void
522cmd_finish(void)
523{
524 D_PRINTF(CELL_DEBUG_CMD, "FINISH\n");
525 really_clear_tiles(0);
526 /* wait for all outstanding DMAs to finish */
527 mfc_write_tag_mask(~0);
528 mfc_read_tag_status_all();
529 /* send mbox message to PPU */
530 spu_write_out_mbox(CELL_CMD_FINISH);
531}
532
533
534/**
535 * Execute a batch of commands which was sent to us by the PPU.
536 * See the cell_emit_state.c code to see where the commands come from.
537 *
538 * The opcode param encodes the location of the buffer and its size.
539 */
540static void
541cmd_batch(uint opcode)
542{
543 const uint buf = (opcode >> 8) & 0xff;
544 uint size = (opcode >> 16);
545 PIPE_ALIGN_VAR(16) qword buffer[CELL_BUFFER_SIZE / 16];
546 const unsigned usize = ROUNDUP16(size) / sizeof(buffer[0]);
547 uint pos;
548
549 D_PRINTF(CELL_DEBUG_CMD, "BATCH buffer %u, len %u, from %p\n",
550 buf, size, spu.init.buffers[buf]);
551
552 ASSERT((opcode & CELL_CMD_OPCODE_MASK) == CELL_CMD_BATCH);
553
554 ASSERT_ALIGN16(spu.init.buffers[buf]);
555
556 size = ROUNDUP16(size);
557
558 ASSERT_ALIGN16(spu.init.buffers[buf]);
559
560 mfc_get(buffer, /* dest */
561 (unsigned int) spu.init.buffers[buf], /* src */
562 size,
563 TAG_BATCH_BUFFER,
564 0, /* tid */
565 0 /* rid */);
566 wait_on_mask(1 << TAG_BATCH_BUFFER);
567
568 /* Tell PPU we're done copying the buffer to local store */
569 D_PRINTF(CELL_DEBUG_CMD, "release batch buf %u\n", buf);
570 release_buffer(buf);
571
572 /*
573 * Loop over commands in the batch buffer
574 */
575 for (pos = 0; pos < usize; /* no incr */) {
576 switch (si_to_uint(buffer[pos])) {
577 /*
578 * rendering commands
579 */
580 case CELL_CMD_CLEAR_SURFACE:
581 {
582 struct cell_command_clear_surface *clr
583 = (struct cell_command_clear_surface *) &buffer[pos];
584 cmd_clear_surface(clr);
585 pos += sizeof(*clr) / 16;
586 }
587 break;
588 case CELL_CMD_RENDER:
589 {
590 struct cell_command_render *render
591 = (struct cell_command_render *) &buffer[pos];
592 uint pos_incr;
593 cmd_render(render, &pos_incr);
594 pos += ((pos_incr+1)&~1) / 2; // should 'fix' cmd_render return
595 }
596 break;
597 /*
598 * state-update commands
599 */
600 case CELL_CMD_STATE_FRAMEBUFFER:
601 {
602 struct cell_command_framebuffer *fb
603 = (struct cell_command_framebuffer *) &buffer[pos];
604 cmd_state_framebuffer(fb);
605 pos += sizeof(*fb) / 16;
606 }
607 break;
608 case CELL_CMD_STATE_FRAGMENT_OPS:
609 {
610 struct cell_command_fragment_ops *fops
611 = (struct cell_command_fragment_ops *) &buffer[pos];
612 cmd_state_fragment_ops(fops);
613 /* This is a variant-sized command */
614 pos += ROUNDUP16(sizeof(*fops) + fops->total_code_size) / 16;
615 }
616 break;
617 case CELL_CMD_STATE_FRAGMENT_PROGRAM:
618 {
619 struct cell_command_fragment_program *fp
620 = (struct cell_command_fragment_program *) &buffer[pos];
621 cmd_state_fragment_program(fp);
622 pos += sizeof(*fp) / 16;
623 }
624 break;
625 case CELL_CMD_STATE_FS_CONSTANTS:
626 pos = cmd_state_fs_constants(buffer, pos);
627 break;
628 case CELL_CMD_STATE_RASTERIZER:
629 {
630 struct cell_command_rasterizer *rast =
631 (struct cell_command_rasterizer *) &buffer[pos];
632 spu.rasterizer = rast->rasterizer;
633 pos += sizeof(*rast) / 16;
634 }
635 break;
636 case CELL_CMD_STATE_SAMPLER:
637 {
638 struct cell_command_sampler *sampler
639 = (struct cell_command_sampler *) &buffer[pos];
640 cmd_state_sampler(sampler);
641 pos += sizeof(*sampler) / 16;
642 }
643 break;
644 case CELL_CMD_STATE_TEXTURE:
645 {
646 struct cell_command_texture *texture
647 = (struct cell_command_texture *) &buffer[pos];
648 cmd_state_texture(texture);
649 pos += sizeof(*texture) / 16;
650 }
651 break;
652 case CELL_CMD_STATE_VERTEX_INFO:
653 cmd_state_vertex_info((struct vertex_info *) &buffer[pos+1]);
654 pos += 1 + ROUNDUP16(sizeof(struct vertex_info)) / 16;
655 break;
656 case CELL_CMD_STATE_VIEWPORT:
657 (void) memcpy(& draw.viewport, &buffer[pos+1],
658 sizeof(struct pipe_viewport_state));
659 pos += 1 + ROUNDUP16(sizeof(struct pipe_viewport_state)) / 16;
660 break;
661 case CELL_CMD_STATE_UNIFORMS:
662 draw.constants = (const float (*)[4]) (uintptr_t)spu_extract((vector unsigned int)buffer[pos+1],0);
663 pos += 2;
664 break;
665 case CELL_CMD_STATE_VS_ARRAY_INFO:
666 cmd_state_vs_array_info((struct cell_array_info *) &buffer[pos+1]);
667 pos += 1 + ROUNDUP16(sizeof(struct cell_array_info)) / 16;
668 break;
669 case CELL_CMD_STATE_BIND_VS:
670#if 0
671 spu_bind_vertex_shader(&draw,
672 (struct cell_shader_info *) &buffer[pos+1]);
673#endif
674 pos += 1 + ROUNDUP16(sizeof(struct cell_shader_info)) / 16;
675 break;
676 case CELL_CMD_STATE_ATTRIB_FETCH:
677 cmd_state_attrib_fetch((struct cell_attribute_fetch_code *)
678 &buffer[pos+1]);
679 pos += 1 + ROUNDUP16(sizeof(struct cell_attribute_fetch_code)) / 16;
680 break;
681 /*
682 * misc commands
683 */
684 case CELL_CMD_FINISH:
685 cmd_finish();
686 pos += 1;
687 break;
688 case CELL_CMD_FENCE:
689 {
690 struct cell_command_fence *fence_cmd =
691 (struct cell_command_fence *) &buffer[pos];
692 cmd_fence(fence_cmd);
693 pos += sizeof(*fence_cmd) / 16;
694 }
695 break;
696 case CELL_CMD_RELEASE_VERTS:
697 {
698 struct cell_command_release_verts *release
699 = (struct cell_command_release_verts *) &buffer[pos];
700 cmd_release_verts(release);
701 pos += sizeof(*release) / 16;
702 }
703 break;
704 case CELL_CMD_FLUSH_BUFFER_RANGE: {
705 struct cell_buffer_range *br = (struct cell_buffer_range *)
706 &buffer[pos+1];
707
708 spu_dcache_mark_dirty((unsigned) br->base, br->size);
709 pos += 1 + ROUNDUP16(sizeof(struct cell_buffer_range)) / 16;
710 break;
711 }
712 default:
713 printf("SPU %u: bad opcode: 0x%x\n", spu.init.id, si_to_uint(buffer[pos]));
714 ASSERT(0);
715 break;
716 }
717 }
718
719 D_PRINTF(CELL_DEBUG_CMD, "BATCH complete\n");
720}
721
722
723#define PERF 0
724
725
726/**
727 * Main loop for SPEs: Get a command, execute it, repeat.
728 */
729void
730command_loop(void)
731{
732 int exitFlag = 0;
733 uint t0, t1;
734
735 D_PRINTF(CELL_DEBUG_CMD, "Enter command loop\n");
736
737 while (!exitFlag) {
738 unsigned opcode;
739
740 D_PRINTF(CELL_DEBUG_CMD, "Wait for cmd...\n");
741
742 if (PERF)
743 spu_write_decrementer(~0);
744
745 /* read/wait from mailbox */
746 opcode = (unsigned int) spu_read_in_mbox();
747 D_PRINTF(CELL_DEBUG_CMD, "got cmd 0x%x\n", opcode);
748
749 if (PERF)
750 t0 = spu_read_decrementer();
751
752 switch (opcode & CELL_CMD_OPCODE_MASK) {
753 case CELL_CMD_EXIT:
754 D_PRINTF(CELL_DEBUG_CMD, "EXIT\n");
755 exitFlag = 1;
756 break;
757 case CELL_CMD_VS_EXECUTE:
758#if 0
759 spu_execute_vertex_shader(&draw, &cmd.vs);
760#endif
761 break;
762 case CELL_CMD_BATCH:
763 cmd_batch(opcode);
764 break;
765 default:
766 printf("Bad opcode 0x%x!\n", opcode & CELL_CMD_OPCODE_MASK);
767 }
768
769 if (PERF) {
770 t1 = spu_read_decrementer();
771 printf("wait mbox time: %gms batch time: %gms\n",
772 (~0u - t0) * spu.init.inv_timebase,
773 (t0 - t1) * spu.init.inv_timebase);
774 }
775 }
776
777 D_PRINTF(CELL_DEBUG_CMD, "Exit command loop\n");
778
779 if (spu.init.debug_flags & CELL_DEBUG_CACHE)
780 spu_dcache_report();
781}
782
783/* Initialize this module; we manage the fragment ops buffer here. */
784void
785spu_command_init(void)
786{
787 /* Install default/fallback fragment processing function.
788 * This will normally be overriden by a code-gen'd function
789 * unless CELL_FORCE_FRAGMENT_OPS_FALLBACK is set.
790 */
791 spu.fragment_ops[CELL_FACING_FRONT] = spu_fallback_fragment_ops;
792 spu.fragment_ops[CELL_FACING_BACK] = spu_fallback_fragment_ops;
793
794 /* Set up the basic empty buffer for code-gen'ed fragment ops */
795 spu.fragment_ops_code = NULL;
796 spu.fragment_ops_code_size = 0;
797}
798
799void
800spu_command_close(void)
801{
802 /* Deallocate the code-gen buffer for fragment ops, and reset the
803 * fragment ops functions to their initial setting (just to leave
804 * things in a good state).
805 */
806 if (spu.fragment_ops_code != NULL) {
807 free(spu.fragment_ops_code);
808 }
809 spu_command_init();
810}
diff --git a/src/gallium/drivers/cell/spu/spu_command.h b/src/gallium/drivers/cell/spu/spu_command.h
deleted file mode 100644
index 83dcdade288..00000000000
--- a/src/gallium/drivers/cell/spu/spu_command.h
+++ /dev/null
@@ -1,35 +0,0 @@
1/**************************************************************************
2 *
3 * Copyright 2008 Tungsten Graphics, Inc., Cedar Park, Texas.
4 * All Rights Reserved.
5 *
6 * Permission is hereby granted, free of charge, to any person obtaining a
7 * copy of this software and associated documentation files (the
8 * "Software"), to deal in the Software without restriction, including
9 * without limitation the rights to use, copy, modify, merge, publish,
10 * distribute, sub license, and/or sell copies of the Software, and to
11 * permit persons to whom the Software is furnished to do so, subject to
12 * the following conditions:
13 *
14 * The above copyright notice and this permission notice (including the
15 * next paragraph) shall be included in all copies or substantial portions
16 * of the Software.
17 *
18 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
19 * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
20 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
21 * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
22 * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
23 * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
24 * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
25 *
26 **************************************************************************/
27
28extern void
29command_loop(void);
30
31extern void
32spu_command_init(void);
33
34extern void
35spu_command_close(void);
diff --git a/src/gallium/drivers/cell/spu/spu_dcache.c b/src/gallium/drivers/cell/spu/spu_dcache.c
deleted file mode 100644
index a6d67634fd8..00000000000
--- a/src/gallium/drivers/cell/spu/spu_dcache.c
+++ /dev/null
@@ -1,145 +0,0 @@
1/*
2 * (C) Copyright IBM Corporation 2008
3 * All Rights Reserved.
4 *
5 * Permission is hereby granted, free of charge, to any person obtaining a
6 * copy of this software and associated documentation files (the "Software"),
7 * to deal in the Software without restriction, including without limitation
8 * on the rights to use, copy, modify, merge, publish, distribute, sub
9 * license, and/or sell copies of the Software, and to permit persons to whom
10 * the Software is furnished to do so, subject to the following conditions:
11 *
12 * The above copyright notice and this permission notice (including the next
13 * paragraph) shall be included in all copies or substantial portions of the
14 * Software.
15 *
16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18 * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
19 * AUTHORS, COPYRIGHT HOLDERS, AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
20 * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
21 * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
22 * USE OR OTHER DEALINGS IN THE SOFTWARE.
23 */
24
25#include "cell/common.h"
26#include "spu_main.h"
27#include "spu_dcache.h"
28
29#define CACHELINE_LOG2SIZE 7
30#define LINE_SIZE (1U << 7)
31#define ALIGN_MASK (~(LINE_SIZE - 1))
32
33#define CACHE_NAME data
34#define CACHED_TYPE qword
35#define CACHE_TYPE CACHE_TYPE_RO
36#define CACHE_SET_TAGID(set) (((set) & 0x03) + TAG_DCACHE0)
37#define CACHE_LOG2NNWAY 2
38#define CACHE_LOG2NSETS 6
39#ifdef DEBUG
40#define CACHE_STATS 1
41#endif
42#include <cache-api.h>
43
44/* Yes folks, this is ugly.
45 */
46#undef CACHE_NWAY
47#undef CACHE_NSETS
48#define CACHE_NAME data
49#define CACHE_NWAY 4
50#define CACHE_NSETS (1U << 6)
51
52
53/**
54 * Fetch between arbitrary number of bytes from an unaligned address
55 *
56 * \param dst Destination data buffer
57 * \param ea Main memory effective address of source data
58 * \param size Number of bytes to read
59 *
60 * \warning
61 * As is hinted by the type of the \c dst pointer, this function writes
62 * multiples of 16-bytes.
63 */
64void
65spu_dcache_fetch_unaligned(qword *dst, unsigned ea, unsigned size)
66{
67 const int shift = ea & 0x0f;
68 const unsigned read_size = ROUNDUP16(size + shift);
69 const unsigned last_read = ROUNDUP16(ea + size);
70 const qword *const last_write = dst + (ROUNDUP16(size) / 16);
71 unsigned i;
72
73
74 if (shift == 0) {
75 /* Data is already aligned. Fetch directly into the destination buffer.
76 */
77 for (i = 0; i < size; i += 16) {
78 *(dst++) = cache_rd(data, ea + i);
79 }
80 } else {
81 qword hi;
82
83
84 /* Please exercise extreme caution when modifying this code. This code
85 * must not read past the end of the page containing the source data,
86 * and it must not write more than ((size + 15) / 16) qwords to the
87 * destination buffer.
88 */
89 ea &= ~0x0f;
90 hi = cache_rd(data, ea);
91 for (i = 16; i < read_size; i += 16) {
92 qword lo = cache_rd(data, ea + i);
93
94 *(dst++) = si_or((qword) spu_slqwbyte(hi, shift),
95 (qword) spu_rlmaskqwbyte(lo, shift - 16));
96 hi = lo;
97 }
98
99 if (dst != last_write) {
100 *(dst++) = si_or((qword) spu_slqwbyte(hi, shift), si_il(0));
101 }
102 }
103
104 ASSERT((ea + i) == last_read);
105 ASSERT(dst == last_write);
106}
107
108
109/**
110 * Notify the cache that a range of main memory may have been modified
111 */
112void
113spu_dcache_mark_dirty(unsigned ea, unsigned size)
114{
115 unsigned i;
116 const unsigned aligned_start = (ea & ALIGN_MASK);
117 const unsigned aligned_end = (ea + size + (LINE_SIZE - 1))
118 & ALIGN_MASK;
119
120
121 for (i = 0; i < (CACHE_NWAY * CACHE_NSETS); i++) {
122 const unsigned entry = __cache_dir[i];
123 const unsigned addr = entry & ~0x0f;
124
125 __cache_dir[i] = ((addr >= aligned_start) && (addr < aligned_end))
126 ? (entry & ~CACHELINE_VALID) : entry;
127 }
128}
129
130
131/**
132 * Print cache utilization report
133 */
134void
135spu_dcache_report(void)
136{
137#ifdef CACHE_STATS
138 if (spu.init.id == 0) {
139 printf("SPU 0: Texture cache report:\n");
140 cache_pr_stats(data);
141 }
142#endif
143}
144
145
diff --git a/src/gallium/drivers/cell/spu/spu_dcache.h b/src/gallium/drivers/cell/spu/spu_dcache.h
deleted file mode 100644
index 39a19eb31b5..00000000000
--- a/src/gallium/drivers/cell/spu/spu_dcache.h
+++ /dev/null
@@ -1,37 +0,0 @@
1/*
2 * (C) Copyright IBM Corporation 2008
3 * All Rights Reserved.
4 *
5 * Permission is hereby granted, free of charge, to any person obtaining a
6 * copy of this software and associated documentation files (the "Software"),
7 * to deal in the Software without restriction, including without limitation
8 * on the rights to use, copy, modify, merge, publish, distribute, sub
9 * license, and/or sell copies of the Software, and to permit persons to whom
10 * the Software is furnished to do so, subject to the following conditions:
11 *
12 * The above copyright notice and this permission notice (including the next
13 * paragraph) shall be included in all copies or substantial portions of the
14 * Software.
15 *
16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18 * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
19 * AUTHORS, COPYRIGHT HOLDERS, AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
20 * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
21 * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
22 * USE OR OTHER DEALINGS IN THE SOFTWARE.
23 */
24
25#ifndef SPU_DCACHE_H
26#define SPU_DCACHE_H
27
28extern void
29spu_dcache_fetch_unaligned(qword *dst, unsigned ea, unsigned size);
30
31extern void
32spu_dcache_mark_dirty(unsigned ea, unsigned size);
33
34extern void
35spu_dcache_report(void);
36
37#endif /* SPU_DCACHE_H */
diff --git a/src/gallium/drivers/cell/spu/spu_exec.c b/src/gallium/drivers/cell/spu/spu_exec.c
deleted file mode 100644
index e4ebeb595ce..00000000000
--- a/src/gallium/drivers/cell/spu/spu_exec.c
+++ /dev/null
@@ -1,1870 +0,0 @@
1/**************************************************************************
2 *
3 * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas.
4 * All Rights Reserved.
5 *
6 * Permission is hereby granted, free of charge, to any person obtaining a
7 * copy of this software and associated documentation files (the
8 * "Software"), to deal in the Software without restriction, including
9 * without limitation the rights to use, copy, modify, merge, publish,
10 * distribute, sub license, and/or sell copies of the Software, and to
11 * permit persons to whom the Software is furnished to do so, subject to
12 * the following conditions:
13 *
14 * The above copyright notice and this permission notice (including the
15 * next paragraph) shall be included in all copies or substantial portions
16 * of the Software.
17 *
18 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
19 * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
20 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
21 * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
22 * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
23 * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
24 * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
25 *
26 **************************************************************************/
27
28/**
29 * TGSI interpretor/executor.
30 *
31 * Flow control information:
32 *
33 * Since we operate on 'quads' (4 pixels or 4 vertices in parallel)
34 * flow control statements (IF/ELSE/ENDIF, LOOP/ENDLOOP) require special
35 * care since a condition may be true for some quad components but false
36 * for other components.
37 *
38 * We basically execute all statements (even if they're in the part of
39 * an IF/ELSE clause that's "not taken") and use a special mask to
40 * control writing to destination registers. This is the ExecMask.
41 * See store_dest().
42 *
43 * The ExecMask is computed from three other masks (CondMask, LoopMask and
44 * ContMask) which are controlled by the flow control instructions (namely:
45 * (IF/ELSE/ENDIF, LOOP/ENDLOOP and CONT).
46 *
47 *
48 * Authors:
49 * Michal Krol
50 * Brian Paul
51 */
52
53#include <transpose_matrix4x4.h>
54#include <simdmath/ceilf4.h>
55#include <simdmath/cosf4.h>
56#include <simdmath/divf4.h>
57#include <simdmath/floorf4.h>
58#include <simdmath/log2f4.h>
59#include <simdmath/powf4.h>
60#include <simdmath/sinf4.h>
61#include <simdmath/sqrtf4.h>
62#include <simdmath/truncf4.h>
63
64#include "pipe/p_compiler.h"
65#include "pipe/p_state.h"
66#include "pipe/p_shader_tokens.h"
67#include "tgsi/tgsi_parse.h"
68#include "tgsi/tgsi_util.h"
69#include "spu_exec.h"
70#include "spu_main.h"
71#include "spu_vertex_shader.h"
72#include "spu_dcache.h"
73#include "cell/common.h"
74
75#define TILE_TOP_LEFT 0
76#define TILE_TOP_RIGHT 1
77#define TILE_BOTTOM_LEFT 2
78#define TILE_BOTTOM_RIGHT 3
79
80/*
81 * Shorthand locations of various utility registers (_I = Index, _C = Channel)
82 */
83#define TEMP_0_I TGSI_EXEC_TEMP_00000000_I
84#define TEMP_0_C TGSI_EXEC_TEMP_00000000_C
85#define TEMP_7F_I TGSI_EXEC_TEMP_7FFFFFFF_I
86#define TEMP_7F_C TGSI_EXEC_TEMP_7FFFFFFF_C
87#define TEMP_80_I TGSI_EXEC_TEMP_80000000_I
88#define TEMP_80_C TGSI_EXEC_TEMP_80000000_C
89#define TEMP_FF_I TGSI_EXEC_TEMP_FFFFFFFF_I
90#define TEMP_FF_C TGSI_EXEC_TEMP_FFFFFFFF_C
91#define TEMP_1_I TGSI_EXEC_TEMP_ONE_I
92#define TEMP_1_C TGSI_EXEC_TEMP_ONE_C
93#define TEMP_2_I TGSI_EXEC_TEMP_TWO_I
94#define TEMP_2_C TGSI_EXEC_TEMP_TWO_C
95#define TEMP_128_I TGSI_EXEC_TEMP_128_I
96#define TEMP_128_C TGSI_EXEC_TEMP_128_C
97#define TEMP_M128_I TGSI_EXEC_TEMP_MINUS_128_I
98#define TEMP_M128_C TGSI_EXEC_TEMP_MINUS_128_C
99#define TEMP_KILMASK_I TGSI_EXEC_TEMP_KILMASK_I
100#define TEMP_KILMASK_C TGSI_EXEC_TEMP_KILMASK_C
101#define TEMP_OUTPUT_I TGSI_EXEC_TEMP_OUTPUT_I
102#define TEMP_OUTPUT_C TGSI_EXEC_TEMP_OUTPUT_C
103#define TEMP_PRIMITIVE_I TGSI_EXEC_TEMP_PRIMITIVE_I
104#define TEMP_PRIMITIVE_C TGSI_EXEC_TEMP_PRIMITIVE_C
105#define TEMP_R0 TGSI_EXEC_TEMP_R0
106
107#define FOR_EACH_CHANNEL(CHAN)\
108 for (CHAN = 0; CHAN < 4; CHAN++)
109
110#define IS_CHANNEL_ENABLED(INST, CHAN)\
111 ((INST).Dst[0].Register.WriteMask & (1 << (CHAN)))
112
113#define IS_CHANNEL_ENABLED2(INST, CHAN)\
114 ((INST).Dst[1].Register.WriteMask & (1 << (CHAN)))
115
116#define FOR_EACH_ENABLED_CHANNEL(INST, CHAN)\
117 FOR_EACH_CHANNEL( CHAN )\
118 if (IS_CHANNEL_ENABLED( INST, CHAN ))
119
120#define FOR_EACH_ENABLED_CHANNEL2(INST, CHAN)\
121 FOR_EACH_CHANNEL( CHAN )\
122 if (IS_CHANNEL_ENABLED2( INST, CHAN ))
123
124
125/** The execution mask depends on the conditional mask and the loop mask */
126#define UPDATE_EXEC_MASK(MACH) \
127 MACH->ExecMask = MACH->CondMask & MACH->LoopMask & MACH->ContMask & MACH->FuncMask
128
129
130#define CHAN_X 0
131#define CHAN_Y 1
132#define CHAN_Z 2
133#define CHAN_W 3
134
135
136
137/**
138 * Initialize machine state by expanding tokens to full instructions,
139 * allocating temporary storage, setting up constants, etc.
140 * After this, we can call spu_exec_machine_run() many times.
141 */
142void
143spu_exec_machine_init(struct spu_exec_machine *mach,
144 uint numSamplers,
145 struct spu_sampler *samplers,
146 unsigned processor)
147{
148 const qword zero = si_il(0);
149 const qword not_zero = si_il(~0);
150
151 (void) numSamplers;
152 mach->Samplers = samplers;
153 mach->Processor = processor;
154 mach->Addrs = &mach->Temps[TGSI_EXEC_NUM_TEMPS];
155
156 /* Setup constants. */
157 mach->Temps[TEMP_0_I].xyzw[TEMP_0_C].q = zero;
158 mach->Temps[TEMP_FF_I].xyzw[TEMP_FF_C].q = not_zero;
159 mach->Temps[TEMP_7F_I].xyzw[TEMP_7F_C].q = si_shli(not_zero, -1);
160 mach->Temps[TEMP_80_I].xyzw[TEMP_80_C].q = si_shli(not_zero, 31);
161
162 mach->Temps[TEMP_1_I].xyzw[TEMP_1_C].q = (qword) spu_splats(1.0f);
163 mach->Temps[TEMP_2_I].xyzw[TEMP_2_C].q = (qword) spu_splats(2.0f);
164 mach->Temps[TEMP_128_I].xyzw[TEMP_128_C].q = (qword) spu_splats(128.0f);
165 mach->Temps[TEMP_M128_I].xyzw[TEMP_M128_C].q = (qword) spu_splats(-128.0f);
166}
167
168
169static INLINE qword
170micro_abs(qword src)
171{
172 return si_rotmi(si_shli(src, 1), -1);
173}
174
175static INLINE qword
176micro_ceil(qword src)
177{
178 return (qword) _ceilf4((vec_float4) src);
179}
180
181static INLINE qword
182micro_cos(qword src)
183{
184 return (qword) _cosf4((vec_float4) src);
185}
186
187static const qword br_shuf = {
188 TILE_BOTTOM_RIGHT + 0, TILE_BOTTOM_RIGHT + 1,
189 TILE_BOTTOM_RIGHT + 2, TILE_BOTTOM_RIGHT + 3,
190 TILE_BOTTOM_RIGHT + 0, TILE_BOTTOM_RIGHT + 1,
191 TILE_BOTTOM_RIGHT + 2, TILE_BOTTOM_RIGHT + 3,
192 TILE_BOTTOM_RIGHT + 0, TILE_BOTTOM_RIGHT + 1,
193 TILE_BOTTOM_RIGHT + 2, TILE_BOTTOM_RIGHT + 3,
194 TILE_BOTTOM_RIGHT + 0, TILE_BOTTOM_RIGHT + 1,
195 TILE_BOTTOM_RIGHT + 2, TILE_BOTTOM_RIGHT + 3,
196};
197
198static const qword bl_shuf = {
199 TILE_BOTTOM_LEFT + 0, TILE_BOTTOM_LEFT + 1,
200 TILE_BOTTOM_LEFT + 2, TILE_BOTTOM_LEFT + 3,
201 TILE_BOTTOM_LEFT + 0, TILE_BOTTOM_LEFT + 1,
202 TILE_BOTTOM_LEFT + 2, TILE_BOTTOM_LEFT + 3,
203 TILE_BOTTOM_LEFT + 0, TILE_BOTTOM_LEFT + 1,
204 TILE_BOTTOM_LEFT + 2, TILE_BOTTOM_LEFT + 3,
205 TILE_BOTTOM_LEFT + 0, TILE_BOTTOM_LEFT + 1,
206 TILE_BOTTOM_LEFT + 2, TILE_BOTTOM_LEFT + 3,
207};
208
209static const qword tl_shuf = {
210 TILE_TOP_LEFT + 0, TILE_TOP_LEFT + 1,
211 TILE_TOP_LEFT + 2, TILE_TOP_LEFT + 3,
212 TILE_TOP_LEFT + 0, TILE_TOP_LEFT + 1,
213 TILE_TOP_LEFT + 2, TILE_TOP_LEFT + 3,
214 TILE_TOP_LEFT + 0, TILE_TOP_LEFT + 1,
215 TILE_TOP_LEFT + 2, TILE_TOP_LEFT + 3,
216 TILE_TOP_LEFT + 0, TILE_TOP_LEFT + 1,
217 TILE_TOP_LEFT + 2, TILE_TOP_LEFT + 3,
218};
219
220static qword
221micro_ddx(qword src)
222{
223 qword bottom_right = si_shufb(src, src, br_shuf);
224 qword bottom_left = si_shufb(src, src, bl_shuf);
225
226 return si_fs(bottom_right, bottom_left);
227}
228
229static qword
230micro_ddy(qword src)
231{
232 qword top_left = si_shufb(src, src, tl_shuf);
233 qword bottom_left = si_shufb(src, src, bl_shuf);
234
235 return si_fs(top_left, bottom_left);
236}
237
238static INLINE qword
239micro_div(qword src0, qword src1)
240{
241 return (qword) _divf4((vec_float4) src0, (vec_float4) src1);
242}
243
244static qword
245micro_flr(qword src)
246{
247 return (qword) _floorf4((vec_float4) src);
248}
249
250static qword
251micro_frc(qword src)
252{
253 return si_fs(src, (qword) _floorf4((vec_float4) src));
254}
255
256static INLINE qword
257micro_ge(qword src0, qword src1)
258{
259 return si_or(si_fceq(src0, src1), si_fcgt(src0, src1));
260}
261
262static qword
263micro_lg2(qword src)
264{
265 return (qword) _log2f4((vec_float4) src);
266}
267
268static INLINE qword
269micro_lt(qword src0, qword src1)
270{
271 const qword tmp = si_or(si_fceq(src0, src1), si_fcgt(src0, src1));
272
273 return si_xori(tmp, 0xff);
274}
275
276static INLINE qword
277micro_max(qword src0, qword src1)
278{
279 return si_selb(src1, src0, si_fcgt(src0, src1));
280}
281
282static INLINE qword
283micro_min(qword src0, qword src1)
284{
285 return si_selb(src0, src1, si_fcgt(src0, src1));
286}
287
288static qword
289micro_neg(qword src)
290{
291 return si_xor(src, (qword) spu_splats(0x80000000));
292}
293
294static qword
295micro_set_sign(qword src)
296{
297 return si_or(src, (qword) spu_splats(0x80000000));
298}
299
300static qword
301micro_pow(qword src0, qword src1)
302{
303 return (qword) _powf4((vec_float4) src0, (vec_float4) src1);
304}
305
306static qword
307micro_rnd(qword src)
308{
309 const qword half = (qword) spu_splats(0.5f);
310
311 /* May be able to use _roundf4. There may be some difference, though.
312 */
313 return (qword) _floorf4((vec_float4) si_fa(src, half));
314}
315
316static INLINE qword
317micro_ishr(qword src0, qword src1)
318{
319 return si_rotma(src0, si_sfi(src1, 0));
320}
321
322static qword
323micro_trunc(qword src)
324{
325 return (qword) _truncf4((vec_float4) src);
326}
327
328static qword
329micro_sin(qword src)
330{
331 return (qword) _sinf4((vec_float4) src);
332}
333
334static INLINE qword
335micro_sqrt(qword src)
336{
337 return (qword) _sqrtf4((vec_float4) src);
338}
339
340static void
341fetch_src_file_channel(
342 const struct spu_exec_machine *mach,
343 const uint file,
344 const uint swizzle,
345 const union spu_exec_channel *index,
346 union spu_exec_channel *chan )
347{
348 switch( swizzle ) {
349 case TGSI_SWIZZLE_X:
350 case TGSI_SWIZZLE_Y:
351 case TGSI_SWIZZLE_Z:
352 case TGSI_SWIZZLE_W:
353 switch( file ) {
354 case TGSI_FILE_CONSTANT: {
355 unsigned i;
356
357 for (i = 0; i < 4; i++) {
358 const float *ptr = mach->Consts[index->i[i]];
359 float tmp[4];
360
361 spu_dcache_fetch_unaligned((qword *) tmp,
362 (uintptr_t)(ptr + swizzle),
363 sizeof(float));
364
365 chan->f[i] = tmp[0];
366 }
367 break;
368 }
369
370 case TGSI_FILE_INPUT:
371 chan->u[0] = mach->Inputs[index->i[0]].xyzw[swizzle].u[0];
372 chan->u[1] = mach->Inputs[index->i[1]].xyzw[swizzle].u[1];
373 chan->u[2] = mach->Inputs[index->i[2]].xyzw[swizzle].u[2];
374 chan->u[3] = mach->Inputs[index->i[3]].xyzw[swizzle].u[3];
375 break;
376
377 case TGSI_FILE_TEMPORARY:
378 chan->u[0] = mach->Temps[index->i[0]].xyzw[swizzle].u[0];
379 chan->u[1] = mach->Temps[index->i[1]].xyzw[swizzle].u[1];
380 chan->u[2] = mach->Temps[index->i[2]].xyzw[swizzle].u[2];
381 chan->u[3] = mach->Temps[index->i[3]].xyzw[swizzle].u[3];
382 break;
383
384 case TGSI_FILE_IMMEDIATE:
385 ASSERT( index->i[0] < (int) mach->ImmLimit );
386 ASSERT( index->i[1] < (int) mach->ImmLimit );
387 ASSERT( index->i[2] < (int) mach->ImmLimit );
388 ASSERT( index->i[3] < (int) mach->ImmLimit );
389
390 chan->f[0] = mach->Imms[index->i[0]][swizzle];
391 chan->f[1] = mach->Imms[index->i[1]][swizzle];
392 chan->f[2] = mach->Imms[index->i[2]][swizzle];
393 chan->f[3] = mach->Imms[index->i[3]][swizzle];
394 break;
395
396 case TGSI_FILE_ADDRESS:
397 chan->u[0] = mach->Addrs[index->i[0]].xyzw[swizzle].u[0];
398 chan->u[1] = mach->Addrs[index->i[1]].xyzw[swizzle].u[1];
399 chan->u[2] = mach->Addrs[index->i[2]].xyzw[swizzle].u[2];
400 chan->u[3] = mach->Addrs[index->i[3]].xyzw[swizzle].u[3];
401 break;
402
403 case TGSI_FILE_OUTPUT:
404 /* vertex/fragment output vars can be read too */
405 chan->u[0] = mach->Outputs[index->i[0]].xyzw[swizzle].u[0];
406 chan->u[1] = mach->Outputs[index->i[1]].xyzw[swizzle].u[1];
407 chan->u[2] = mach->Outputs[index->i[2]].xyzw[swizzle].u[2];
408 chan->u[3] = mach->Outputs[index->i[3]].xyzw[swizzle].u[3];
409 break;
410
411 default:
412 ASSERT( 0 );
413 }
414 break;
415
416 default:
417 ASSERT( 0 );
418 }
419}
420
421static void
422fetch_source(
423 const struct spu_exec_machine *mach,
424 union spu_exec_channel *chan,
425 const struct tgsi_full_src_register *reg,
426 const uint chan_index )
427{
428 union spu_exec_channel index;
429 uint swizzle;
430
431 index.i[0] =
432 index.i[1] =
433 index.i[2] =
434 index.i[3] = reg->Register.Index;
435
436 if (reg->Register.Indirect) {
437 union spu_exec_channel index2;
438 union spu_exec_channel indir_index;
439
440 index2.i[0] =
441 index2.i[1] =
442 index2.i[2] =
443 index2.i[3] = reg->Indirect.Index;
444
445 swizzle = tgsi_util_get_src_register_swizzle(&reg->Indirect,
446 CHAN_X);
447 fetch_src_file_channel(
448 mach,
449 reg->Indirect.File,
450 swizzle,
451 &index2,
452 &indir_index );
453
454 index.q = si_a(index.q, indir_index.q);
455 }
456
457 if( reg->Register.Dimension ) {
458 switch( reg->Register.File ) {
459 case TGSI_FILE_INPUT:
460 index.q = si_mpyi(index.q, 17);
461 break;
462 case TGSI_FILE_CONSTANT:
463 index.q = si_shli(index.q, 12);
464 break;
465 default:
466 ASSERT( 0 );
467 }
468
469 index.i[0] += reg->Dimension.Index;
470 index.i[1] += reg->Dimension.Index;
471 index.i[2] += reg->Dimension.Index;
472 index.i[3] += reg->Dimension.Index;
473
474 if (reg->Dimension.Indirect) {
475 union spu_exec_channel index2;
476 union spu_exec_channel indir_index;
477
478 index2.i[0] =
479 index2.i[1] =
480 index2.i[2] =
481 index2.i[3] = reg->DimIndirect.Index;
482
483 swizzle = tgsi_util_get_src_register_swizzle( &reg->DimIndirect, CHAN_X );
484 fetch_src_file_channel(
485 mach,
486 reg->DimIndirect.File,
487 swizzle,
488 &index2,
489 &indir_index );
490
491 index.q = si_a(index.q, indir_index.q);
492 }
493 }
494
495 swizzle = tgsi_util_get_full_src_register_swizzle( reg, chan_index );
496 fetch_src_file_channel(
497 mach,
498 reg->Register.File,
499 swizzle,
500 &index,
501 chan );
502
503 switch (tgsi_util_get_full_src_register_sign_mode( reg, chan_index )) {
504 case TGSI_UTIL_SIGN_CLEAR:
505 chan->q = micro_abs(chan->q);
506 break;
507
508 case TGSI_UTIL_SIGN_SET:
509 chan->q = micro_set_sign(chan->q);
510 break;
511
512 case TGSI_UTIL_SIGN_TOGGLE:
513 chan->q = micro_neg(chan->q);
514 break;
515
516 case TGSI_UTIL_SIGN_KEEP:
517 break;
518 }
519
520 if (reg->RegisterExtMod.Complement) {
521 chan->q = si_fs(mach->Temps[TEMP_1_I].xyzw[TEMP_1_C].q, chan->q);
522 }
523}
524
525static void
526store_dest(
527 struct spu_exec_machine *mach,
528 const union spu_exec_channel *chan,
529 const struct tgsi_full_dst_register *reg,
530 const struct tgsi_full_instruction *inst,
531 uint chan_index )
532{
533 union spu_exec_channel *dst;
534
535 switch( reg->Register.File ) {
536 case TGSI_FILE_NULL:
537 return;
538
539 case TGSI_FILE_OUTPUT:
540 dst = &mach->Outputs[mach->Temps[TEMP_OUTPUT_I].xyzw[TEMP_OUTPUT_C].u[0]
541 + reg->Register.Index].xyzw[chan_index];
542 break;
543
544 case TGSI_FILE_TEMPORARY:
545 dst = &mach->Temps[reg->Register.Index].xyzw[chan_index];
546 break;
547
548 case TGSI_FILE_ADDRESS:
549 dst = &mach->Addrs[reg->Register.Index].xyzw[chan_index];
550 break;
551
552 default:
553 ASSERT( 0 );
554 return;
555 }
556
557 switch (inst->Instruction.Saturate)
558 {
559 case TGSI_SAT_NONE:
560 if (mach->ExecMask & 0x1)
561 dst->i[0] = chan->i[0];
562 if (mach->ExecMask & 0x2)
563 dst->i[1] = chan->i[1];
564 if (mach->ExecMask & 0x4)
565 dst->i[2] = chan->i[2];
566 if (mach->ExecMask & 0x8)
567 dst->i[3] = chan->i[3];
568 break;
569
570 case TGSI_SAT_ZERO_ONE:
571 /* XXX need to obey ExecMask here */
572 dst->q = micro_max(chan->q, mach->Temps[TEMP_0_I].xyzw[TEMP_0_C].q);
573 dst->q = micro_min(dst->q, mach->Temps[TEMP_1_I].xyzw[TEMP_1_C].q);
574 break;
575
576 case TGSI_SAT_MINUS_PLUS_ONE:
577 ASSERT( 0 );
578 break;
579
580 default:
581 ASSERT( 0 );
582 }
583}
584
585#define FETCH(VAL,INDEX,CHAN)\
586 fetch_source (mach, VAL, &inst->Src[INDEX], CHAN)
587
588#define STORE(VAL,INDEX,CHAN)\
589 store_dest (mach, VAL, &inst->Dst[INDEX], inst, CHAN )
590
591
592/**
593 * Execute ARB-style KIL which is predicated by a src register.
594 * Kill fragment if any of the four values is less than zero.
595 */
596static void
597exec_kil(struct spu_exec_machine *mach,
598 const struct tgsi_full_instruction *inst)
599{
600 uint uniquemask;
601 uint chan_index;
602 uint kilmask = 0; /* bit 0 = pixel 0, bit 1 = pixel 1, etc */
603 union spu_exec_channel r[1];
604
605 /* This mask stores component bits that were already tested. */
606 uniquemask = 0;
607
608 for (chan_index = 0; chan_index < 4; chan_index++)
609 {
610 uint swizzle;
611 uint i;
612
613 /* unswizzle channel */
614 swizzle = tgsi_util_get_full_src_register_swizzle (
615 &inst->Src[0],
616 chan_index);
617
618 /* check if the component has not been already tested */
619 if (uniquemask & (1 << swizzle))
620 continue;
621 uniquemask |= 1 << swizzle;
622
623 FETCH(&r[0], 0, chan_index);
624 for (i = 0; i < 4; i++)
625 if (r[0].f[i] < 0.0f)
626 kilmask |= 1 << i;
627 }
628
629 mach->Temps[TEMP_KILMASK_I].xyzw[TEMP_KILMASK_C].u[0] |= kilmask;
630}
631
632/**
633 * Execute NVIDIA-style KIL which is predicated by a condition code.
634 * Kill fragment if the condition code is TRUE.
635 */
636static void
637exec_kilp(struct spu_exec_machine *mach,
638 const struct tgsi_full_instruction *inst)
639{
640 uint kilmask = 0; /* bit 0 = pixel 0, bit 1 = pixel 1, etc */
641
642 /* TODO: build kilmask from CC mask */
643
644 mach->Temps[TEMP_KILMASK_I].xyzw[TEMP_KILMASK_C].u[0] |= kilmask;
645}
646
647/*
648 * Fetch a texel using STR texture coordinates.
649 */
650static void
651fetch_texel( struct spu_sampler *sampler,
652 const union spu_exec_channel *s,
653 const union spu_exec_channel *t,
654 const union spu_exec_channel *p,
655 float lodbias, /* XXX should be float[4] */
656 union spu_exec_channel *r,
657 union spu_exec_channel *g,
658 union spu_exec_channel *b,
659 union spu_exec_channel *a )
660{
661 qword rgba[4];
662 qword out[4];
663
664 sampler->get_samples(sampler, s->f, t->f, p->f, lodbias,
665 (float (*)[4]) rgba);
666
667 _transpose_matrix4x4((vec_float4 *) out, (vec_float4 *) rgba);
668 r->q = out[0];
669 g->q = out[1];
670 b->q = out[2];
671 a->q = out[3];
672}
673
674
675static void
676exec_tex(struct spu_exec_machine *mach,
677 const struct tgsi_full_instruction *inst,
678 boolean biasLod, boolean projected)
679{
680 const uint unit = inst->Src[1].Register.Index;
681 union spu_exec_channel r[8];
682 uint chan_index;
683 float lodBias;
684
685 /* printf("Sampler %u unit %u\n", sampler, unit); */
686
687 switch (inst->InstructionExtTexture.Texture) {
688 case TGSI_TEXTURE_1D:
689
690 FETCH(&r[0], 0, CHAN_X);
691
692 if (projected) {
693 FETCH(&r[1], 0, CHAN_W);
694 r[0].q = micro_div(r[0].q, r[1].q);
695 }
696
697 if (biasLod) {
698 FETCH(&r[1], 0, CHAN_W);
699 lodBias = r[2].f[0];
700 }
701 else
702 lodBias = 0.0;
703
704 fetch_texel(&mach->Samplers[unit],
705 &r[0], NULL, NULL, lodBias, /* S, T, P, BIAS */
706 &r[0], &r[1], &r[2], &r[3]); /* R, G, B, A */
707 break;
708
709 case TGSI_TEXTURE_2D:
710 case TGSI_TEXTURE_RECT:
711
712 FETCH(&r[0], 0, CHAN_X);
713 FETCH(&r[1], 0, CHAN_Y);
714 FETCH(&r[2], 0, CHAN_Z);
715
716 if (projected) {
717 FETCH(&r[3], 0, CHAN_W);
718 r[0].q = micro_div(r[0].q, r[3].q);
719 r[1].q = micro_div(r[1].q, r[3].q);
720 r[2].q = micro_div(r[2].q, r[3].q);
721 }
722
723 if (biasLod) {
724 FETCH(&r[3], 0, CHAN_W);
725 lodBias = r[3].f[0];
726 }
727 else
728 lodBias = 0.0;
729
730 fetch_texel(&mach->Samplers[unit],
731 &r[0], &r[1], &r[2], lodBias, /* inputs */
732 &r[0], &r[1], &r[2], &r[3]); /* outputs */
733 break;
734
735 case TGSI_TEXTURE_3D:
736 case TGSI_TEXTURE_CUBE:
737
738 FETCH(&r[0], 0, CHAN_X);
739 FETCH(&r[1], 0, CHAN_Y);
740 FETCH(&r[2], 0, CHAN_Z);
741
742 if (projected) {
743 FETCH(&r[3], 0, CHAN_W);
744 r[0].q = micro_div(r[0].q, r[3].q);
745 r[1].q = micro_div(r[1].q, r[3].q);
746 r[2].q = micro_div(r[2].q, r[3].q);
747 }
748
749 if (biasLod) {
750 FETCH(&r[3], 0, CHAN_W);
751 lodBias = r[3].f[0];
752 }
753 else
754 lodBias = 0.0;
755
756 fetch_texel(&mach->Samplers[unit],
757 &r[0], &r[1], &r[2], lodBias,
758 &r[0], &r[1], &r[2], &r[3]);
759 break;
760
761 default:
762 ASSERT (0);
763 }
764
765 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
766 STORE( &r[chan_index], 0, chan_index );
767 }
768}
769
770
771
772static void
773constant_interpolation(
774 struct spu_exec_machine *mach,
775 unsigned attrib,
776 unsigned chan )
777{
778 unsigned i;
779
780 for( i = 0; i < QUAD_SIZE; i++ ) {
781 mach->Inputs[attrib].xyzw[chan].f[i] = mach->InterpCoefs[attrib].a0[chan];
782 }
783}
784
785static void
786linear_interpolation(
787 struct spu_exec_machine *mach,
788 unsigned attrib,
789 unsigned chan )
790{
791 const float x = mach->QuadPos.xyzw[0].f[0];
792 const float y = mach->QuadPos.xyzw[1].f[0];
793 const float dadx = mach->InterpCoefs[attrib].dadx[chan];
794 const float dady = mach->InterpCoefs[attrib].dady[chan];
795 const float a0 = mach->InterpCoefs[attrib].a0[chan] + dadx * x + dady * y;
796 mach->Inputs[attrib].xyzw[chan].f[0] = a0;
797 mach->Inputs[attrib].xyzw[chan].f[1] = a0 + dadx;
798 mach->Inputs[attrib].xyzw[chan].f[2] = a0 + dady;
799 mach->Inputs[attrib].xyzw[chan].f[3] = a0 + dadx + dady;
800}
801
802static void
803perspective_interpolation(
804 struct spu_exec_machine *mach,
805 unsigned attrib,
806 unsigned chan )
807{
808 const float x = mach->QuadPos.xyzw[0].f[0];
809 const float y = mach->QuadPos.xyzw[1].f[0];
810 const float dadx = mach->InterpCoefs[attrib].dadx[chan];
811 const float dady = mach->InterpCoefs[attrib].dady[chan];
812 const float a0 = mach->InterpCoefs[attrib].a0[chan] + dadx * x + dady * y;
813 const float *w = mach->QuadPos.xyzw[3].f;
814 /* divide by W here */
815 mach->Inputs[attrib].xyzw[chan].f[0] = a0 / w[0];
816 mach->Inputs[attrib].xyzw[chan].f[1] = (a0 + dadx) / w[1];
817 mach->Inputs[attrib].xyzw[chan].f[2] = (a0 + dady) / w[2];
818 mach->Inputs[attrib].xyzw[chan].f[3] = (a0 + dadx + dady) / w[3];
819}
820
821
822typedef void (* interpolation_func)(
823 struct spu_exec_machine *mach,
824 unsigned attrib,
825 unsigned chan );
826
827static void
828exec_declaration(struct spu_exec_machine *mach,
829 const struct tgsi_full_declaration *decl)
830{
831 if( mach->Processor == TGSI_PROCESSOR_FRAGMENT ) {
832 if( decl->Declaration.File == TGSI_FILE_INPUT ) {
833 unsigned first, last, mask;
834 interpolation_func interp;
835
836 first = decl->Range.First;
837 last = decl->Range.Last;
838 mask = decl->Declaration.UsageMask;
839
840 switch( decl->Declaration.Interpolate ) {
841 case TGSI_INTERPOLATE_CONSTANT:
842 interp = constant_interpolation;
843 break;
844
845 case TGSI_INTERPOLATE_LINEAR:
846 interp = linear_interpolation;
847 break;
848
849 case TGSI_INTERPOLATE_PERSPECTIVE:
850 interp = perspective_interpolation;
851 break;
852
853 default:
854 ASSERT( 0 );
855 }
856
857 if( mask == TGSI_WRITEMASK_XYZW ) {
858 unsigned i, j;
859
860 for( i = first; i <= last; i++ ) {
861 for( j = 0; j < NUM_CHANNELS; j++ ) {
862 interp( mach, i, j );
863 }
864 }
865 }
866 else {
867 unsigned i, j;
868
869 for( j = 0; j < NUM_CHANNELS; j++ ) {
870 if( mask & (1 << j) ) {
871 for( i = first; i <= last; i++ ) {
872 interp( mach, i, j );
873 }
874 }
875 }
876 }
877 }
878 }
879}
880
881static void
882exec_instruction(
883 struct spu_exec_machine *mach,
884 const struct tgsi_full_instruction *inst,
885 int *pc )
886{
887 uint chan_index;
888 union spu_exec_channel r[8];
889
890 (*pc)++;
891
892 switch (inst->Instruction.Opcode) {
893 case TGSI_OPCODE_ARL:
894 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
895 FETCH( &r[0], 0, chan_index );
896 r[0].q = si_cflts(r[0].q, 0);
897 STORE( &r[0], 0, chan_index );
898 }
899 break;
900
901 case TGSI_OPCODE_MOV:
902 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
903 FETCH( &r[0], 0, chan_index );
904 STORE( &r[0], 0, chan_index );
905 }
906 break;
907
908 case TGSI_OPCODE_LIT:
909 if (IS_CHANNEL_ENABLED( *inst, CHAN_X )) {
910 STORE( &mach->Temps[TEMP_1_I].xyzw[TEMP_1_C], 0, CHAN_X );
911 }
912
913 if (IS_CHANNEL_ENABLED( *inst, CHAN_Y ) || IS_CHANNEL_ENABLED( *inst, CHAN_Z )) {
914 FETCH( &r[0], 0, CHAN_X );
915 if (IS_CHANNEL_ENABLED( *inst, CHAN_Y )) {
916 r[0].q = micro_max(r[0].q, mach->Temps[TEMP_0_I].xyzw[TEMP_0_C].q);
917 STORE( &r[0], 0, CHAN_Y );
918 }
919
920 if (IS_CHANNEL_ENABLED( *inst, CHAN_Z )) {
921 FETCH( &r[1], 0, CHAN_Y );
922 r[1].q = micro_max(r[1].q, mach->Temps[TEMP_0_I].xyzw[TEMP_0_C].q);
923
924 FETCH( &r[2], 0, CHAN_W );
925 r[2].q = micro_min(r[2].q, mach->Temps[TEMP_128_I].xyzw[TEMP_128_C].q);
926 r[2].q = micro_max(r[2].q, mach->Temps[TEMP_M128_I].xyzw[TEMP_M128_C].q);
927 r[1].q = micro_pow(r[1].q, r[2].q);
928
929 /* r0 = (r0 > 0.0) ? r1 : 0.0
930 */
931 r[0].q = si_fcgt(r[0].q, mach->Temps[TEMP_0_I].xyzw[TEMP_0_C].q);
932 r[0].q = si_selb(mach->Temps[TEMP_0_I].xyzw[TEMP_0_C].q, r[1].q,
933 r[0].q);
934 STORE( &r[0], 0, CHAN_Z );
935 }
936 }
937
938 if (IS_CHANNEL_ENABLED( *inst, CHAN_W )) {
939 STORE( &mach->Temps[TEMP_1_I].xyzw[TEMP_1_C], 0, CHAN_W );
940 }
941 break;
942
943 case TGSI_OPCODE_RCP:
944 FETCH( &r[0], 0, CHAN_X );
945 r[0].q = micro_div(mach->Temps[TEMP_1_I].xyzw[TEMP_1_C].q, r[0].q);
946 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
947 STORE( &r[0], 0, chan_index );
948 }
949 break;
950
951 case TGSI_OPCODE_RSQ:
952 FETCH( &r[0], 0, CHAN_X );
953 r[0].q = micro_sqrt(r[0].q);
954 r[0].q = micro_div(mach->Temps[TEMP_1_I].xyzw[TEMP_1_C].q, r[0].q);
955 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
956 STORE( &r[0], 0, chan_index );
957 }
958 break;
959
960 case TGSI_OPCODE_EXP:
961 ASSERT (0);
962 break;
963
964 case TGSI_OPCODE_LOG:
965 ASSERT (0);
966 break;
967
968 case TGSI_OPCODE_MUL:
969 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index )
970 {
971 FETCH(&r[0], 0, chan_index);
972 FETCH(&r[1], 1, chan_index);
973
974 r[0].q = si_fm(r[0].q, r[1].q);
975
976 STORE(&r[0], 0, chan_index);
977 }
978 break;
979
980 case TGSI_OPCODE_ADD:
981 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
982 FETCH( &r[0], 0, chan_index );
983 FETCH( &r[1], 1, chan_index );
984 r[0].q = si_fa(r[0].q, r[1].q);
985 STORE( &r[0], 0, chan_index );
986 }
987 break;
988
989 case TGSI_OPCODE_DP3:
990 /* TGSI_OPCODE_DOT3 */
991 FETCH( &r[0], 0, CHAN_X );
992 FETCH( &r[1], 1, CHAN_X );
993 r[0].q = si_fm(r[0].q, r[1].q);
994
995 FETCH( &r[1], 0, CHAN_Y );
996 FETCH( &r[2], 1, CHAN_Y );
997 r[0].q = si_fma(r[1].q, r[2].q, r[0].q);
998
999
1000 FETCH( &r[1], 0, CHAN_Z );
1001 FETCH( &r[2], 1, CHAN_Z );
1002 r[0].q = si_fma(r[1].q, r[2].q, r[0].q);
1003
1004 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
1005 STORE( &r[0], 0, chan_index );
1006 }
1007 break;
1008
1009 case TGSI_OPCODE_DP4:
1010 /* TGSI_OPCODE_DOT4 */
1011 FETCH(&r[0], 0, CHAN_X);
1012 FETCH(&r[1], 1, CHAN_X);
1013
1014 r[0].q = si_fm(r[0].q, r[1].q);
1015
1016 FETCH(&r[1], 0, CHAN_Y);
1017 FETCH(&r[2], 1, CHAN_Y);
1018
1019 r[0].q = si_fma(r[1].q, r[2].q, r[0].q);
1020
1021 FETCH(&r[1], 0, CHAN_Z);
1022 FETCH(&r[2], 1, CHAN_Z);
1023
1024 r[0].q = si_fma(r[1].q, r[2].q, r[0].q);
1025
1026 FETCH(&r[1], 0, CHAN_W);
1027 FETCH(&r[2], 1, CHAN_W);
1028
1029 r[0].q = si_fma(r[1].q, r[2].q, r[0].q);
1030
1031 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
1032 STORE( &r[0], 0, chan_index );
1033 }
1034 break;
1035
1036 case TGSI_OPCODE_DST:
1037 if (IS_CHANNEL_ENABLED( *inst, CHAN_X )) {
1038 STORE( &mach->Temps[TEMP_1_I].xyzw[TEMP_1_C], 0, CHAN_X );
1039 }
1040
1041 if (IS_CHANNEL_ENABLED( *inst, CHAN_Y )) {
1042 FETCH( &r[0], 0, CHAN_Y );
1043 FETCH( &r[1], 1, CHAN_Y);
1044 r[0].q = si_fm(r[0].q, r[1].q);
1045 STORE( &r[0], 0, CHAN_Y );
1046 }
1047
1048 if (IS_CHANNEL_ENABLED( *inst, CHAN_Z )) {
1049 FETCH( &r[0], 0, CHAN_Z );
1050 STORE( &r[0], 0, CHAN_Z );
1051 }
1052
1053 if (IS_CHANNEL_ENABLED( *inst, CHAN_W )) {
1054 FETCH( &r[0], 1, CHAN_W );
1055 STORE( &r[0], 0, CHAN_W );
1056 }
1057 break;
1058
1059 case TGSI_OPCODE_MIN:
1060 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
1061 FETCH(&r[0], 0, chan_index);
1062 FETCH(&r[1], 1, chan_index);
1063
1064 r[0].q = micro_min(r[0].q, r[1].q);
1065
1066 STORE(&r[0], 0, chan_index);
1067 }
1068 break;
1069
1070 case TGSI_OPCODE_MAX:
1071 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
1072 FETCH(&r[0], 0, chan_index);
1073 FETCH(&r[1], 1, chan_index);
1074
1075 r[0].q = micro_max(r[0].q, r[1].q);
1076
1077 STORE(&r[0], 0, chan_index );
1078 }
1079 break;
1080
1081 case TGSI_OPCODE_SLT:
1082 /* TGSI_OPCODE_SETLT */
1083 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
1084 FETCH( &r[0], 0, chan_index );
1085 FETCH( &r[1], 1, chan_index );
1086
1087 r[0].q = micro_ge(r[0].q, r[1].q);
1088 r[0].q = si_xori(r[0].q, 0xff);
1089
1090 STORE( &r[0], 0, chan_index );
1091 }
1092 break;
1093
1094 case TGSI_OPCODE_SGE:
1095 /* TGSI_OPCODE_SETGE */
1096 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
1097 FETCH( &r[0], 0, chan_index );
1098 FETCH( &r[1], 1, chan_index );
1099 r[0].q = micro_ge(r[0].q, r[1].q);
1100 STORE( &r[0], 0, chan_index );
1101 }
1102 break;
1103
1104 case TGSI_OPCODE_MAD:
1105 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
1106 FETCH( &r[0], 0, chan_index );
1107 FETCH( &r[1], 1, chan_index );
1108 FETCH( &r[2], 2, chan_index );
1109 r[0].q = si_fma(r[0].q, r[1].q, r[2].q);
1110 STORE( &r[0], 0, chan_index );
1111 }
1112 break;
1113
1114 case TGSI_OPCODE_SUB:
1115 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
1116 FETCH(&r[0], 0, chan_index);
1117 FETCH(&r[1], 1, chan_index);
1118
1119 r[0].q = si_fs(r[0].q, r[1].q);
1120
1121 STORE(&r[0], 0, chan_index);
1122 }
1123 break;
1124
1125 case TGSI_OPCODE_LRP:
1126 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
1127 FETCH(&r[0], 0, chan_index);
1128 FETCH(&r[1], 1, chan_index);
1129 FETCH(&r[2], 2, chan_index);
1130
1131 r[1].q = si_fs(r[1].q, r[2].q);
1132 r[0].q = si_fma(r[0].q, r[1].q, r[2].q);
1133
1134 STORE(&r[0], 0, chan_index);
1135 }
1136 break;
1137
1138 case TGSI_OPCODE_CND:
1139 ASSERT (0);
1140 break;
1141
1142 case TGSI_OPCODE_DP2A:
1143 ASSERT (0);
1144 break;
1145
1146 case TGSI_OPCODE_FRC:
1147 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
1148 FETCH( &r[0], 0, chan_index );
1149 r[0].q = micro_frc(r[0].q);
1150 STORE( &r[0], 0, chan_index );
1151 }
1152 break;
1153
1154 case TGSI_OPCODE_CLAMP:
1155 ASSERT (0);
1156 break;
1157
1158 case TGSI_OPCODE_FLR:
1159 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
1160 FETCH( &r[0], 0, chan_index );
1161 r[0].q = micro_flr(r[0].q);
1162 STORE( &r[0], 0, chan_index );
1163 }
1164 break;
1165
1166 case TGSI_OPCODE_ROUND:
1167 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
1168 FETCH( &r[0], 0, chan_index );
1169 r[0].q = micro_rnd(r[0].q);
1170 STORE( &r[0], 0, chan_index );
1171 }
1172 break;
1173
1174 case TGSI_OPCODE_EX2:
1175 FETCH(&r[0], 0, CHAN_X);
1176
1177 r[0].q = micro_pow(mach->Temps[TEMP_2_I].xyzw[TEMP_2_C].q, r[0].q);
1178
1179 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
1180 STORE( &r[0], 0, chan_index );
1181 }
1182 break;
1183
1184 case TGSI_OPCODE_LG2:
1185 FETCH( &r[0], 0, CHAN_X );
1186 r[0].q = micro_lg2(r[0].q);
1187 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
1188 STORE( &r[0], 0, chan_index );
1189 }
1190 break;
1191
1192 case TGSI_OPCODE_POW:
1193 FETCH(&r[0], 0, CHAN_X);
1194 FETCH(&r[1], 1, CHAN_X);
1195
1196 r[0].q = micro_pow(r[0].q, r[1].q);
1197
1198 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
1199 STORE( &r[0], 0, chan_index );
1200 }
1201 break;
1202
1203 case TGSI_OPCODE_XPD:
1204 /* TGSI_OPCODE_XPD */
1205 FETCH(&r[0], 0, CHAN_Y);
1206 FETCH(&r[1], 1, CHAN_Z);
1207 FETCH(&r[3], 0, CHAN_Z);
1208 FETCH(&r[4], 1, CHAN_Y);
1209
1210 /* r2 = (r0 * r1) - (r3 * r5)
1211 */
1212 r[2].q = si_fm(r[3].q, r[5].q);
1213 r[2].q = si_fms(r[0].q, r[1].q, r[2].q);
1214
1215 if (IS_CHANNEL_ENABLED( *inst, CHAN_X )) {
1216 STORE( &r[2], 0, CHAN_X );
1217 }
1218
1219 FETCH(&r[2], 1, CHAN_X);
1220 FETCH(&r[5], 0, CHAN_X);
1221
1222 /* r3 = (r3 * r2) - (r1 * r5)
1223 */
1224 r[1].q = si_fm(r[1].q, r[5].q);
1225 r[3].q = si_fms(r[3].q, r[2].q, r[1].q);
1226
1227 if (IS_CHANNEL_ENABLED( *inst, CHAN_Y )) {
1228 STORE( &r[3], 0, CHAN_Y );
1229 }
1230
1231 /* r5 = (r5 * r4) - (r0 * r2)
1232 */
1233 r[0].q = si_fm(r[0].q, r[2].q);
1234 r[5].q = si_fms(r[5].q, r[4].q, r[0].q);
1235
1236 if (IS_CHANNEL_ENABLED( *inst, CHAN_Z )) {
1237 STORE( &r[5], 0, CHAN_Z );
1238 }
1239
1240 if (IS_CHANNEL_ENABLED( *inst, CHAN_W )) {
1241 STORE( &mach->Temps[TEMP_1_I].xyzw[TEMP_1_C], 0, CHAN_W );
1242 }
1243 break;
1244
1245 case TGSI_OPCODE_ABS:
1246 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
1247 FETCH(&r[0], 0, chan_index);
1248
1249 r[0].q = micro_abs(r[0].q);
1250
1251 STORE(&r[0], 0, chan_index);
1252 }
1253 break;
1254
1255 case TGSI_OPCODE_RCC:
1256 ASSERT (0);
1257 break;
1258
1259 case TGSI_OPCODE_DPH:
1260 FETCH(&r[0], 0, CHAN_X);
1261 FETCH(&r[1], 1, CHAN_X);
1262
1263 r[0].q = si_fm(r[0].q, r[1].q);
1264
1265 FETCH(&r[1], 0, CHAN_Y);
1266 FETCH(&r[2], 1, CHAN_Y);
1267
1268 r[0].q = si_fma(r[1].q, r[2].q, r[0].q);
1269
1270 FETCH(&r[1], 0, CHAN_Z);
1271 FETCH(&r[2], 1, CHAN_Z);
1272
1273 r[0].q = si_fma(r[1].q, r[2].q, r[0].q);
1274
1275 FETCH(&r[1], 1, CHAN_W);
1276
1277 r[0].q = si_fa(r[0].q, r[1].q);
1278
1279 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
1280 STORE( &r[0], 0, chan_index );
1281 }
1282 break;
1283
1284 case TGSI_OPCODE_COS:
1285 FETCH(&r[0], 0, CHAN_X);
1286
1287 r[0].q = micro_cos(r[0].q);
1288
1289 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
1290 STORE( &r[0], 0, chan_index );
1291 }
1292 break;
1293
1294 case TGSI_OPCODE_DDX:
1295 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
1296 FETCH( &r[0], 0, chan_index );
1297 r[0].q = micro_ddx(r[0].q);
1298 STORE( &r[0], 0, chan_index );
1299 }
1300 break;
1301
1302 case TGSI_OPCODE_DDY:
1303 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
1304 FETCH( &r[0], 0, chan_index );
1305 r[0].q = micro_ddy(r[0].q);
1306 STORE( &r[0], 0, chan_index );
1307 }
1308 break;
1309
1310 case TGSI_OPCODE_KILP:
1311 exec_kilp (mach, inst);
1312 break;
1313
1314 case TGSI_OPCODE_KIL:
1315 exec_kil (mach, inst);
1316 break;
1317
1318 case TGSI_OPCODE_PK2H:
1319 ASSERT (0);
1320 break;
1321
1322 case TGSI_OPCODE_PK2US:
1323 ASSERT (0);
1324 break;
1325
1326 case TGSI_OPCODE_PK4B:
1327 ASSERT (0);
1328 break;
1329
1330 case TGSI_OPCODE_PK4UB:
1331 ASSERT (0);
1332 break;
1333
1334 case TGSI_OPCODE_RFL:
1335 ASSERT (0);
1336 break;
1337
1338 case TGSI_OPCODE_SEQ:
1339 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
1340 FETCH( &r[0], 0, chan_index );
1341 FETCH( &r[1], 1, chan_index );
1342
1343 r[0].q = si_fceq(r[0].q, r[1].q);
1344
1345 STORE( &r[0], 0, chan_index );
1346 }
1347 break;
1348
1349 case TGSI_OPCODE_SFL:
1350 ASSERT (0);
1351 break;
1352
1353 case TGSI_OPCODE_SGT:
1354 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
1355 FETCH( &r[0], 0, chan_index );
1356 FETCH( &r[1], 1, chan_index );
1357 r[0].q = si_fcgt(r[0].q, r[1].q);
1358 STORE( &r[0], 0, chan_index );
1359 }
1360 break;
1361
1362 case TGSI_OPCODE_SIN:
1363 FETCH( &r[0], 0, CHAN_X );
1364 r[0].q = micro_sin(r[0].q);
1365 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
1366 STORE( &r[0], 0, chan_index );
1367 }
1368 break;
1369
1370 case TGSI_OPCODE_SLE:
1371 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
1372 FETCH( &r[0], 0, chan_index );
1373 FETCH( &r[1], 1, chan_index );
1374
1375 r[0].q = si_fcgt(r[0].q, r[1].q);
1376 r[0].q = si_xori(r[0].q, 0xff);
1377
1378 STORE( &r[0], 0, chan_index );
1379 }
1380 break;
1381
1382 case TGSI_OPCODE_SNE:
1383 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
1384 FETCH( &r[0], 0, chan_index );
1385 FETCH( &r[1], 1, chan_index );
1386
1387 r[0].q = si_fceq(r[0].q, r[1].q);
1388 r[0].q = si_xori(r[0].q, 0xff);
1389
1390 STORE( &r[0], 0, chan_index );
1391 }
1392 break;
1393
1394 case TGSI_OPCODE_STR:
1395 ASSERT (0);
1396 break;
1397
1398 case TGSI_OPCODE_TEX:
1399 /* simple texture lookup */
1400 /* src[0] = texcoord */
1401 /* src[1] = sampler unit */
1402 exec_tex(mach, inst, FALSE, FALSE);
1403 break;
1404
1405 case TGSI_OPCODE_TXB:
1406 /* Texture lookup with lod bias */
1407 /* src[0] = texcoord (src[0].w = load bias) */
1408 /* src[1] = sampler unit */
1409 exec_tex(mach, inst, TRUE, FALSE);
1410 break;
1411
1412 case TGSI_OPCODE_TXD:
1413 /* Texture lookup with explict partial derivatives */
1414 /* src[0] = texcoord */
1415 /* src[1] = d[strq]/dx */
1416 /* src[2] = d[strq]/dy */
1417 /* src[3] = sampler unit */
1418 ASSERT (0);
1419 break;
1420
1421 case TGSI_OPCODE_TXL:
1422 /* Texture lookup with explit LOD */
1423 /* src[0] = texcoord (src[0].w = load bias) */
1424 /* src[1] = sampler unit */
1425 exec_tex(mach, inst, TRUE, FALSE);
1426 break;
1427
1428 case TGSI_OPCODE_TXP:
1429 /* Texture lookup with projection */
1430 /* src[0] = texcoord (src[0].w = projection) */
1431 /* src[1] = sampler unit */
1432 exec_tex(mach, inst, TRUE, TRUE);
1433 break;
1434
1435 case TGSI_OPCODE_UP2H:
1436 ASSERT (0);
1437 break;
1438
1439 case TGSI_OPCODE_UP2US:
1440 ASSERT (0);
1441 break;
1442
1443 case TGSI_OPCODE_UP4B:
1444 ASSERT (0);
1445 break;
1446
1447 case TGSI_OPCODE_UP4UB:
1448 ASSERT (0);
1449 break;
1450
1451 case TGSI_OPCODE_X2D:
1452 ASSERT (0);
1453 break;
1454
1455 case TGSI_OPCODE_ARA:
1456 ASSERT (0);
1457 break;
1458
1459 case TGSI_OPCODE_ARR:
1460 ASSERT (0);
1461 break;
1462
1463 case TGSI_OPCODE_BRA:
1464 ASSERT (0);
1465 break;
1466
1467 case TGSI_OPCODE_CAL:
1468 /* skip the call if no execution channels are enabled */
1469 if (mach->ExecMask) {
1470 /* do the call */
1471
1472 /* push the Cond, Loop, Cont stacks */
1473 ASSERT(mach->CondStackTop < TGSI_EXEC_MAX_COND_NESTING);
1474 mach->CondStack[mach->CondStackTop++] = mach->CondMask;
1475 ASSERT(mach->LoopStackTop < TGSI_EXEC_MAX_LOOP_NESTING);
1476 mach->LoopStack[mach->LoopStackTop++] = mach->LoopMask;
1477 ASSERT(mach->ContStackTop < TGSI_EXEC_MAX_LOOP_NESTING);
1478 mach->ContStack[mach->ContStackTop++] = mach->ContMask;
1479
1480 ASSERT(mach->FuncStackTop < TGSI_EXEC_MAX_CALL_NESTING);
1481 mach->FuncStack[mach->FuncStackTop++] = mach->FuncMask;
1482
1483 /* note that PC was already incremented above */
1484 mach->CallStack[mach->CallStackTop++] = *pc;
1485 *pc = inst->InstructionExtLabel.Label;
1486 }
1487 break;
1488
1489 case TGSI_OPCODE_RET:
1490 mach->FuncMask &= ~mach->ExecMask;
1491 UPDATE_EXEC_MASK(mach);
1492
1493 if (mach->ExecMask == 0x0) {
1494 /* really return now (otherwise, keep executing */
1495
1496 if (mach->CallStackTop == 0) {
1497 /* returning from main() */
1498 *pc = -1;
1499 return;
1500 }
1501 *pc = mach->CallStack[--mach->CallStackTop];
1502
1503 /* pop the Cond, Loop, Cont stacks */
1504 ASSERT(mach->CondStackTop > 0);
1505 mach->CondMask = mach->CondStack[--mach->CondStackTop];
1506 ASSERT(mach->LoopStackTop > 0);
1507 mach->LoopMask = mach->LoopStack[--mach->LoopStackTop];
1508 ASSERT(mach->ContStackTop > 0);
1509 mach->ContMask = mach->ContStack[--mach->ContStackTop];
1510 ASSERT(mach->FuncStackTop > 0);
1511 mach->FuncMask = mach->FuncStack[--mach->FuncStackTop];
1512
1513 UPDATE_EXEC_MASK(mach);
1514 }
1515 break;
1516
1517 case TGSI_OPCODE_SSG:
1518 ASSERT (0);
1519 break;
1520
1521 case TGSI_OPCODE_CMP:
1522 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
1523 FETCH(&r[0], 0, chan_index);
1524 FETCH(&r[1], 1, chan_index);
1525 FETCH(&r[2], 2, chan_index);
1526
1527 /* r0 = (r0 < 0.0) ? r1 : r2
1528 */
1529 r[3].q = si_xor(r[3].q, r[3].q);
1530 r[0].q = micro_lt(r[0].q, r[3].q);
1531 r[0].q = si_selb(r[1].q, r[2].q, r[0].q);
1532
1533 STORE(&r[0], 0, chan_index);
1534 }
1535 break;
1536
1537 case TGSI_OPCODE_SCS:
1538 if( IS_CHANNEL_ENABLED( *inst, CHAN_X ) || IS_CHANNEL_ENABLED( *inst, CHAN_Y ) ) {
1539 FETCH( &r[0], 0, CHAN_X );
1540 }
1541 if( IS_CHANNEL_ENABLED( *inst, CHAN_X ) ) {
1542 r[1].q = micro_cos(r[0].q);
1543 STORE( &r[1], 0, CHAN_X );
1544 }
1545 if( IS_CHANNEL_ENABLED( *inst, CHAN_Y ) ) {
1546 r[1].q = micro_sin(r[0].q);
1547 STORE( &r[1], 0, CHAN_Y );
1548 }
1549 if( IS_CHANNEL_ENABLED( *inst, CHAN_Z ) ) {
1550 STORE( &mach->Temps[TEMP_0_I].xyzw[TEMP_0_C], 0, CHAN_Z );
1551 }
1552 if( IS_CHANNEL_ENABLED( *inst, CHAN_W ) ) {
1553 STORE( &mach->Temps[TEMP_1_I].xyzw[TEMP_1_C], 0, CHAN_W );
1554 }
1555 break;
1556
1557 case TGSI_OPCODE_NRM:
1558 ASSERT (0);
1559 break;
1560
1561 case TGSI_OPCODE_DIV:
1562 ASSERT( 0 );
1563 break;
1564
1565 case TGSI_OPCODE_DP2:
1566 FETCH( &r[0], 0, CHAN_X );
1567 FETCH( &r[1], 1, CHAN_X );
1568 r[0].q = si_fm(r[0].q, r[1].q);
1569
1570 FETCH( &r[1], 0, CHAN_Y );
1571 FETCH( &r[2], 1, CHAN_Y );
1572 r[0].q = si_fma(r[1].q, r[2].q, r[0].q);
1573
1574 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
1575 STORE( &r[0], 0, chan_index );
1576 }
1577 break;
1578
1579 case TGSI_OPCODE_IF:
1580 /* push CondMask */
1581 ASSERT(mach->CondStackTop < TGSI_EXEC_MAX_COND_NESTING);
1582 mach->CondStack[mach->CondStackTop++] = mach->CondMask;
1583 FETCH( &r[0], 0, CHAN_X );
1584 /* update CondMask */
1585 if( ! r[0].u[0] ) {
1586 mach->CondMask &= ~0x1;
1587 }
1588 if( ! r[0].u[1] ) {
1589 mach->CondMask &= ~0x2;
1590 }
1591 if( ! r[0].u[2] ) {
1592 mach->CondMask &= ~0x4;
1593 }
1594 if( ! r[0].u[3] ) {
1595 mach->CondMask &= ~0x8;
1596 }
1597 UPDATE_EXEC_MASK(mach);
1598 /* Todo: If CondMask==0, jump to ELSE */
1599 break;
1600
1601 case TGSI_OPCODE_ELSE:
1602 /* invert CondMask wrt previous mask */
1603 {
1604 uint prevMask;
1605 ASSERT(mach->CondStackTop > 0);
1606 prevMask = mach->CondStack[mach->CondStackTop - 1];
1607 mach->CondMask = ~mach->CondMask & prevMask;
1608 UPDATE_EXEC_MASK(mach);
1609 /* Todo: If CondMask==0, jump to ENDIF */
1610 }
1611 break;
1612
1613 case TGSI_OPCODE_ENDIF:
1614 /* pop CondMask */
1615 ASSERT(mach->CondStackTop > 0);
1616 mach->CondMask = mach->CondStack[--mach->CondStackTop];
1617 UPDATE_EXEC_MASK(mach);
1618 break;
1619
1620 case TGSI_OPCODE_END:
1621 /* halt execution */
1622 *pc = -1;
1623 break;
1624
1625 case TGSI_OPCODE_PUSHA:
1626 ASSERT (0);
1627 break;
1628
1629 case TGSI_OPCODE_POPA:
1630 ASSERT (0);
1631 break;
1632
1633 case TGSI_OPCODE_CEIL:
1634 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
1635 FETCH( &r[0], 0, chan_index );
1636 r[0].q = micro_ceil(r[0].q);
1637 STORE( &r[0], 0, chan_index );
1638 }
1639 break;
1640
1641 case TGSI_OPCODE_I2F:
1642 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
1643 FETCH( &r[0], 0, chan_index );
1644 r[0].q = si_csflt(r[0].q, 0);
1645 STORE( &r[0], 0, chan_index );
1646 }
1647 break;
1648
1649 case TGSI_OPCODE_NOT:
1650 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
1651 FETCH( &r[0], 0, chan_index );
1652 r[0].q = si_xorbi(r[0].q, 0xff);
1653 STORE( &r[0], 0, chan_index );
1654 }
1655 break;
1656
1657 case TGSI_OPCODE_TRUNC:
1658 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
1659 FETCH( &r[0], 0, chan_index );
1660 r[0].q = micro_trunc(r[0].q);
1661 STORE( &r[0], 0, chan_index );
1662 }
1663 break;
1664
1665 case TGSI_OPCODE_SHL:
1666 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
1667 FETCH( &r[0], 0, chan_index );
1668 FETCH( &r[1], 1, chan_index );
1669
1670 r[0].q = si_shl(r[0].q, r[1].q);
1671
1672 STORE( &r[0], 0, chan_index );
1673 }
1674 break;
1675
1676 case TGSI_OPCODE_ISHR:
1677 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
1678 FETCH( &r[0], 0, chan_index );
1679 FETCH( &r[1], 1, chan_index );
1680 r[0].q = micro_ishr(r[0].q, r[1].q);
1681 STORE( &r[0], 0, chan_index );
1682 }
1683 break;
1684
1685 case TGSI_OPCODE_AND:
1686 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
1687 FETCH( &r[0], 0, chan_index );
1688 FETCH( &r[1], 1, chan_index );
1689 r[0].q = si_and(r[0].q, r[1].q);
1690 STORE( &r[0], 0, chan_index );
1691 }
1692 break;
1693
1694 case TGSI_OPCODE_OR:
1695 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
1696 FETCH( &r[0], 0, chan_index );
1697 FETCH( &r[1], 1, chan_index );
1698 r[0].q = si_or(r[0].q, r[1].q);
1699 STORE( &r[0], 0, chan_index );
1700 }
1701 break;
1702
1703 case TGSI_OPCODE_MOD:
1704 ASSERT (0);
1705 break;
1706
1707 case TGSI_OPCODE_XOR:
1708 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
1709 FETCH( &r[0], 0, chan_index );
1710 FETCH( &r[1], 1, chan_index );
1711 r[0].q = si_xor(r[0].q, r[1].q);
1712 STORE( &r[0], 0, chan_index );
1713 }
1714 break;
1715
1716 case TGSI_OPCODE_SAD:
1717 ASSERT (0);
1718 break;
1719
1720 case TGSI_OPCODE_TXF:
1721 ASSERT (0);
1722 break;
1723
1724 case TGSI_OPCODE_TXQ:
1725 ASSERT (0);
1726 break;
1727
1728 case TGSI_OPCODE_EMIT:
1729 mach->Temps[TEMP_OUTPUT_I].xyzw[TEMP_OUTPUT_C].u[0] += 16;
1730 mach->Primitives[mach->Temps[TEMP_PRIMITIVE_I].xyzw[TEMP_PRIMITIVE_C].u[0]]++;
1731 break;
1732
1733 case TGSI_OPCODE_ENDPRIM:
1734 mach->Temps[TEMP_PRIMITIVE_I].xyzw[TEMP_PRIMITIVE_C].u[0]++;
1735 mach->Primitives[mach->Temps[TEMP_PRIMITIVE_I].xyzw[TEMP_PRIMITIVE_C].u[0]] = 0;
1736 break;
1737
1738 case TGSI_OPCODE_BGNLOOP:
1739 /* push LoopMask and ContMasks */
1740 ASSERT(mach->LoopStackTop < TGSI_EXEC_MAX_LOOP_NESTING);
1741 mach->LoopStack[mach->LoopStackTop++] = mach->LoopMask;
1742 ASSERT(mach->ContStackTop < TGSI_EXEC_MAX_LOOP_NESTING);
1743 mach->ContStack[mach->ContStackTop++] = mach->ContMask;
1744 break;
1745
1746 case TGSI_OPCODE_ENDLOOP:
1747 /* Restore ContMask, but don't pop */
1748 ASSERT(mach->ContStackTop > 0);
1749 mach->ContMask = mach->ContStack[mach->ContStackTop - 1];
1750 if (mach->LoopMask) {
1751 /* repeat loop: jump to instruction just past BGNLOOP */
1752 *pc = inst->InstructionExtLabel.Label + 1;
1753 }
1754 else {
1755 /* exit loop: pop LoopMask */
1756 ASSERT(mach->LoopStackTop > 0);
1757 mach->LoopMask = mach->LoopStack[--mach->LoopStackTop];
1758 /* pop ContMask */
1759 ASSERT(mach->ContStackTop > 0);
1760 mach->ContMask = mach->ContStack[--mach->ContStackTop];
1761 }
1762 UPDATE_EXEC_MASK(mach);
1763 break;
1764
1765 case TGSI_OPCODE_BRK:
1766 /* turn off loop channels for each enabled exec channel */
1767 mach->LoopMask &= ~mach->ExecMask;
1768 /* Todo: if mach->LoopMask == 0, jump to end of loop */
1769 UPDATE_EXEC_MASK(mach);
1770 break;
1771
1772 case TGSI_OPCODE_CONT:
1773 /* turn off cont channels for each enabled exec channel */
1774 mach->ContMask &= ~mach->ExecMask;
1775 /* Todo: if mach->LoopMask == 0, jump to end of loop */
1776 UPDATE_EXEC_MASK(mach);
1777 break;
1778
1779 case TGSI_OPCODE_BGNSUB:
1780 /* no-op */
1781 break;
1782
1783 case TGSI_OPCODE_ENDSUB:
1784 /* no-op */
1785 break;
1786
1787 case TGSI_OPCODE_NOP:
1788 break;
1789
1790 default:
1791 ASSERT( 0 );
1792 }
1793}
1794
1795
1796/**
1797 * Run TGSI interpreter.
1798 * \return bitmask of "alive" quad components
1799 */
1800uint
1801spu_exec_machine_run( struct spu_exec_machine *mach )
1802{
1803 uint i;
1804 int pc = 0;
1805
1806 mach->CondMask = 0xf;
1807 mach->LoopMask = 0xf;
1808 mach->ContMask = 0xf;
1809 mach->FuncMask = 0xf;
1810 mach->ExecMask = 0xf;
1811
1812 mach->CondStackTop = 0; /* temporarily subvert this ASSERTion */
1813 ASSERT(mach->CondStackTop == 0);
1814 ASSERT(mach->LoopStackTop == 0);
1815 ASSERT(mach->ContStackTop == 0);
1816 ASSERT(mach->CallStackTop == 0);
1817
1818 mach->Temps[TEMP_KILMASK_I].xyzw[TEMP_KILMASK_C].u[0] = 0;
1819 mach->Temps[TEMP_OUTPUT_I].xyzw[TEMP_OUTPUT_C].u[0] = 0;
1820
1821 if( mach->Processor == TGSI_PROCESSOR_GEOMETRY ) {
1822 mach->Temps[TEMP_PRIMITIVE_I].xyzw[TEMP_PRIMITIVE_C].u[0] = 0;
1823 mach->Primitives[0] = 0;
1824 }
1825
1826
1827 /* execute declarations (interpolants) */
1828 if( mach->Processor == TGSI_PROCESSOR_FRAGMENT ) {
1829 for (i = 0; i < mach->NumDeclarations; i++) {
1830 PIPE_ALIGN_VAR(16)
1831 union {
1832 struct tgsi_full_declaration decl;
1833 qword buffer[ROUNDUP16(sizeof(struct tgsi_full_declaration)) / 16];
1834 } d;
1835 unsigned ea = (unsigned) (mach->Declarations + pc);
1836
1837 spu_dcache_fetch_unaligned(d.buffer, ea, sizeof(d.decl));
1838
1839 exec_declaration( mach, &d.decl );
1840 }
1841 }
1842
1843 /* execute instructions, until pc is set to -1 */
1844 while (pc != -1) {
1845 PIPE_ALIGN_VAR(16)
1846 union {
1847 struct tgsi_full_instruction inst;
1848 qword buffer[ROUNDUP16(sizeof(struct tgsi_full_instruction)) / 16];
1849 } i;
1850 unsigned ea = (unsigned) (mach->Instructions + pc);
1851
1852 spu_dcache_fetch_unaligned(i.buffer, ea, sizeof(i.inst));
1853 exec_instruction( mach, & i.inst, &pc );
1854 }
1855
1856#if 0
1857 /* we scale from floats in [0,1] to Zbuffer ints in sp_quad_depth_test.c */
1858 if (mach->Processor == TGSI_PROCESSOR_FRAGMENT) {
1859 /*
1860 * Scale back depth component.
1861 */
1862 for (i = 0; i < 4; i++)
1863 mach->Outputs[0].xyzw[2].f[i] *= ctx->DrawBuffer->_DepthMaxF;
1864 }
1865#endif
1866
1867 return ~mach->Temps[TEMP_KILMASK_I].xyzw[TEMP_KILMASK_C].u[0];
1868}
1869
1870
diff --git a/src/gallium/drivers/cell/spu/spu_exec.h b/src/gallium/drivers/cell/spu/spu_exec.h
deleted file mode 100644
index 68f4479e53d..00000000000
--- a/src/gallium/drivers/cell/spu/spu_exec.h
+++ /dev/null
@@ -1,173 +0,0 @@
1/**************************************************************************
2 *
3 * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas.
4 * All Rights Reserved.
5 *
6 * Permission is hereby granted, free of charge, to any person obtaining a
7 * copy of this software and associated documentation files (the
8 * "Software"), to deal in the Software without restriction, including
9 * without limitation the rights to use, copy, modify, merge, publish,
10 * distribute, sub license, and/or sell copies of the Software, and to
11 * permit persons to whom the Software is furnished to do so, subject to
12 * the following conditions:
13 *
14 * The above copyright notice and this permission notice (including the
15 * next paragraph) shall be included in all copies or substantial portions
16 * of the Software.
17 *
18 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
19 * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
20 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
21 * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
22 * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
23 * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
24 * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
25 *
26 **************************************************************************/
27
28#if !defined SPU_EXEC_H
29#define SPU_EXEC_H
30
31#include "pipe/p_compiler.h"
32
33#include "spu_tgsi_exec.h"
34
35#if defined __cplusplus
36extern "C" {
37#endif
38
39/**
40 * Registers may be treated as float, signed int or unsigned int.
41 */
42union spu_exec_channel
43{
44 float f[QUAD_SIZE];
45 int i[QUAD_SIZE];
46 unsigned u[QUAD_SIZE];
47 qword q;
48};
49
50/**
51 * A vector[RGBA] of channels[4 pixels]
52 */
53struct spu_exec_vector
54{
55 union spu_exec_channel xyzw[NUM_CHANNELS];
56};
57
58/**
59 * For fragment programs, information for computing fragment input
60 * values from plane equation of the triangle/line.
61 */
62struct spu_interp_coef
63{
64 float a0[NUM_CHANNELS]; /* in an xyzw layout */
65 float dadx[NUM_CHANNELS];
66 float dady[NUM_CHANNELS];
67};
68
69
70struct softpipe_tile_cache; /**< Opaque to TGSI */
71
72/**
73 * Information for sampling textures, which must be implemented
74 * by code outside the TGSI executor.
75 */
76struct spu_sampler
77{
78 const struct pipe_sampler_state *state;
79 struct pipe_resource *texture;
80 /** Get samples for four fragments in a quad */
81 void (*get_samples)(struct spu_sampler *sampler,
82 const float s[QUAD_SIZE],
83 const float t[QUAD_SIZE],
84 const float p[QUAD_SIZE],
85 float lodbias,
86 float rgba[NUM_CHANNELS][QUAD_SIZE]);
87 void *pipe; /*XXX temporary*/
88 struct softpipe_tile_cache *cache;
89};
90
91
92/**
93 * Run-time virtual machine state for executing TGSI shader.
94 */
95struct spu_exec_machine
96{
97 /*
98 * 32 program temporaries
99 * 4 internal temporaries
100 * 1 address
101 */
102 PIPE_ALIGN_VAR(16)
103 struct spu_exec_vector Temps[TGSI_EXEC_NUM_TEMPS
104 + TGSI_EXEC_NUM_TEMP_EXTRAS + 1];
105
106 struct spu_exec_vector *Addrs;
107
108 struct spu_sampler *Samplers;
109
110 float Imms[TGSI_EXEC_NUM_IMMEDIATES][4];
111 unsigned ImmLimit;
112 float (*Consts)[4];
113 struct spu_exec_vector *Inputs;
114 struct spu_exec_vector *Outputs;
115 unsigned Processor;
116
117 /* GEOMETRY processor only. */
118 unsigned *Primitives;
119
120 /* FRAGMENT processor only. */
121 const struct spu_interp_coef *InterpCoefs;
122 struct spu_exec_vector QuadPos;
123
124 /* Conditional execution masks */
125 uint CondMask; /**< For IF/ELSE/ENDIF */
126 uint LoopMask; /**< For BGNLOOP/ENDLOOP */
127 uint ContMask; /**< For loop CONT statements */
128 uint FuncMask; /**< For function calls */
129 uint ExecMask; /**< = CondMask & LoopMask */
130
131 /** Condition mask stack (for nested conditionals) */
132 uint CondStack[TGSI_EXEC_MAX_COND_NESTING];
133 int CondStackTop;
134
135 /** Loop mask stack (for nested loops) */
136 uint LoopStack[TGSI_EXEC_MAX_LOOP_NESTING];
137 int LoopStackTop;
138
139 /** Loop continue mask stack (see comments in tgsi_exec.c) */
140 uint ContStack[TGSI_EXEC_MAX_LOOP_NESTING];
141 int ContStackTop;
142
143 /** Function execution mask stack (for executing subroutine code) */
144 uint FuncStack[TGSI_EXEC_MAX_CALL_NESTING];
145 int FuncStackTop;
146
147 /** Function call stack for saving/restoring the program counter */
148 uint CallStack[TGSI_EXEC_MAX_CALL_NESTING];
149 int CallStackTop;
150
151 struct tgsi_full_instruction *Instructions;
152 uint NumInstructions;
153
154 struct tgsi_full_declaration *Declarations;
155 uint NumDeclarations;
156};
157
158
159extern void
160spu_exec_machine_init(struct spu_exec_machine *mach,
161 uint numSamplers,
162 struct spu_sampler *samplers,
163 unsigned processor);
164
165extern uint
166spu_exec_machine_run( struct spu_exec_machine *mach );
167
168
169#if defined __cplusplus
170} /* extern "C" */
171#endif
172
173#endif /* SPU_EXEC_H */
diff --git a/src/gallium/drivers/cell/spu/spu_funcs.c b/src/gallium/drivers/cell/spu/spu_funcs.c
deleted file mode 100644
index 98919c43ffc..00000000000
--- a/src/gallium/drivers/cell/spu/spu_funcs.c
+++ /dev/null
@@ -1,173 +0,0 @@
1/**************************************************************************
2 *
3 * Copyright 2008 Tungsten Graphics, Inc., Cedar Park, Texas.
4 * All Rights Reserved.
5 *
6 * Permission is hereby granted, free of charge, to any person obtaining a
7 * copy of this software and associated documentation files (the
8 * "Software"), to deal in the Software without restriction, including
9 * without limitation the rights to use, copy, modify, merge, publish,
10 * distribute, sub license, and/or sell copies of the Software, and to
11 * permit persons to whom the Software is furnished to do so, subject to
12 * the following conditions:
13 *
14 * The above copyright notice and this permission notice (including the
15 * next paragraph) shall be included in all copies or substantial portions
16 * of the Software.
17 *
18 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
19 * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
20 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
21 * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
22 * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
23 * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
24 * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
25 *
26 **************************************************************************/
27
28
29/**
30 * SPU functions accessed by shaders.
31 *
32 * Authors: Brian Paul
33 */
34
35
36#include <string.h>
37#include <libmisc.h>
38#include <math.h>
39#include <cos14_v.h>
40#include <sin14_v.h>
41#include <simdmath/exp2f4.h>
42#include <simdmath/log2f4.h>
43#include <simdmath/powf4.h>
44
45#include "cell/common.h"
46#include "spu_main.h"
47#include "spu_funcs.h"
48#include "spu_texture.h"
49
50
51/** For "return"-ing four vectors */
52struct vec_4x4
53{
54 vector float v[4];
55};
56
57
58static vector float
59spu_cos(vector float x)
60{
61 return _cos14_v(x);
62}
63
64static vector float
65spu_sin(vector float x)
66{
67 return _sin14_v(x);
68}
69
70static vector float
71spu_pow(vector float x, vector float y)
72{
73 return _powf4(x, y);
74}
75
76static vector float
77spu_exp2(vector float x)
78{
79 return _exp2f4(x);
80}
81
82static vector float
83spu_log2(vector float x)
84{
85 return _log2f4(x);
86}
87
88
89static struct vec_4x4
90spu_tex_2d(vector float s, vector float t, vector float r, vector float q,
91 unsigned unit)
92{
93 struct vec_4x4 colors;
94 (void) r;
95 (void) q;
96 spu.sample_texture_2d[unit](s, t, unit, 0, 0, colors.v);
97 return colors;
98}
99
100static struct vec_4x4
101spu_tex_3d(vector float s, vector float t, vector float r, vector float q,
102 unsigned unit)
103{
104 struct vec_4x4 colors;
105 (void) r;
106 (void) q;
107 spu.sample_texture_2d[unit](s, t, unit, 0, 0, colors.v);
108 return colors;
109}
110
111static struct vec_4x4
112spu_tex_cube(vector float s, vector float t, vector float r, vector float q,
113 unsigned unit)
114{
115 struct vec_4x4 colors;
116 (void) q;
117 sample_texture_cube(s, t, r, unit, colors.v);
118 return colors;
119}
120
121
122/**
123 * Add named function to list of "exported" functions that will be
124 * made available to the PPU-hosted code generator.
125 */
126static void
127export_func(struct cell_spu_function_info *spu_functions,
128 const char *name, void *addr)
129{
130 uint n = spu_functions->num;
131 ASSERT(strlen(name) < 16);
132 strcpy(spu_functions->names[n], name);
133 spu_functions->addrs[n] = (uint) addr;
134 spu_functions->num++;
135 ASSERT(spu_functions->num <= 16);
136}
137
138
139/**
140 * Return info about the SPU's function to the PPU / main memory.
141 * The PPU needs to know the address of some SPU-side functions so
142 * that we can generate shader code with function calls.
143 */
144void
145return_function_info(void)
146{
147 PIPE_ALIGN_VAR(16) struct cell_spu_function_info funcs;
148 int tag = TAG_MISC;
149
150 ASSERT(sizeof(funcs) == 256); /* must be multiple of 16 bytes */
151
152 funcs.num = 0;
153 export_func(&funcs, "spu_cos", &spu_cos);
154 export_func(&funcs, "spu_sin", &spu_sin);
155 export_func(&funcs, "spu_pow", &spu_pow);
156 export_func(&funcs, "spu_exp2", &spu_exp2);
157 export_func(&funcs, "spu_log2", &spu_log2);
158 export_func(&funcs, "spu_tex_2d", &spu_tex_2d);
159 export_func(&funcs, "spu_tex_3d", &spu_tex_3d);
160 export_func(&funcs, "spu_tex_cube", &spu_tex_cube);
161
162 /* Send the function info back to the PPU / main memory */
163 mfc_put((void *) &funcs, /* src in local store */
164 (unsigned int) spu.init.spu_functions, /* dst in main memory */
165 sizeof(funcs), /* bytes */
166 tag,
167 0, /* tid */
168 0 /* rid */);
169 wait_on_mask(1 << tag);
170}
171
172
173
diff --git a/src/gallium/drivers/cell/spu/spu_funcs.h b/src/gallium/drivers/cell/spu/spu_funcs.h
deleted file mode 100644
index 3adb6ae99f9..00000000000
--- a/src/gallium/drivers/cell/spu/spu_funcs.h
+++ /dev/null
@@ -1,35 +0,0 @@
1/**************************************************************************
2 *
3 * Copyright 2008 Tungsten Graphics, Inc., Cedar Park, Texas.
4 * All Rights Reserved.
5 *
6 * Permission is hereby granted, free of charge, to any person obtaining a
7 * copy of this software and associated documentation files (the
8 * "Software"), to deal in the Software without restriction, including
9 * without limitation the rights to use, copy, modify, merge, publish,
10 * distribute, sub license, and/or sell copies of the Software, and to
11 * permit persons to whom the Software is furnished to do so, subject to
12 * the following conditions:
13 *
14 * The above copyright notice and this permission notice (including the
15 * next paragraph) shall be included in all copies or substantial portions
16 * of the Software.
17 *
18 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
19 * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
20 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
21 * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
22 * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
23 * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
24 * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
25 *
26 **************************************************************************/
27
28#ifndef SPU_FUNCS_H
29#define SPU_FUNCS_H
30
31extern void
32return_function_info(void);
33
34#endif
35
diff --git a/src/gallium/drivers/cell/spu/spu_main.c b/src/gallium/drivers/cell/spu/spu_main.c
deleted file mode 100644
index 97c86d194da..00000000000
--- a/src/gallium/drivers/cell/spu/spu_main.c
+++ /dev/null
@@ -1,117 +0,0 @@
1/**************************************************************************
2 *
3 * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas.
4 * All Rights Reserved.
5 *
6 * Permission is hereby granted, free of charge, to any person obtaining a
7 * copy of this software and associated documentation files (the
8 * "Software"), to deal in the Software without restriction, including
9 * without limitation the rights to use, copy, modify, merge, publish,
10 * distribute, sub license, and/or sell copies of the Software, and to
11 * permit persons to whom the Software is furnished to do so, subject to
12 * the following conditions:
13 *
14 * The above copyright notice and this permission notice (including the
15 * next paragraph) shall be included in all copies or substantial portions
16 * of the Software.
17 *
18 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
19 * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
20 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
21 * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
22 * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
23 * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
24 * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
25 *
26 **************************************************************************/
27
28
29/* main() for Cell SPU code */
30
31
32#include <stdio.h>
33#include <libmisc.h>
34
35#include "pipe/p_defines.h"
36
37#include "spu_funcs.h"
38#include "spu_command.h"
39#include "spu_main.h"
40#include "spu_per_fragment_op.h"
41#include "spu_texture.h"
42//#include "spu_test.h"
43#include "cell/common.h"
44
45
46/*
47helpful headers:
48/usr/lib/gcc/spu/4.1.1/include/spu_mfcio.h
49/opt/cell/sdk/usr/include/libmisc.h
50*/
51
52struct spu_global spu;
53
54
55static void
56one_time_init(void)
57{
58 memset(spu.ctile_status, TILE_STATUS_DEFINED, sizeof(spu.ctile_status));
59 memset(spu.ztile_status, TILE_STATUS_DEFINED, sizeof(spu.ztile_status));
60 invalidate_tex_cache();
61}
62
63/* In some versions of the SDK the SPE main takes 'unsigned long' as a
64 * parameter. In others it takes 'unsigned long long'. Use a define to
65 * select between the two.
66 */
67#ifdef SPU_MAIN_PARAM_LONG_LONG
68typedef unsigned long long main_param_t;
69#else
70typedef unsigned long main_param_t;
71#endif
72
73/**
74 * SPE entrypoint.
75 */
76int
77main(main_param_t speid, main_param_t argp)
78{
79 int tag = 0;
80
81 (void) speid;
82
83 ASSERT(sizeof(tile_t) == TILE_SIZE * TILE_SIZE * 4);
84 ASSERT(sizeof(struct cell_command_render) % 8 == 0);
85 ASSERT(sizeof(struct cell_command_fragment_ops) % 8 == 0);
86 ASSERT(((unsigned long) &spu.fragment_program_code) % 8 == 0);
87
88 one_time_init();
89 spu_command_init();
90
91 D_PRINTF(CELL_DEBUG_CMD, "main() speid=%lu\n", (unsigned long) speid);
92 D_PRINTF(CELL_DEBUG_FRAGMENT_OP_FALLBACK, "using fragment op fallback\n");
93
94 /* get initialization data */
95 mfc_get(&spu.init, /* dest */
96 (unsigned int) argp, /* src */
97 sizeof(struct cell_init_info), /* bytes */
98 tag,
99 0, /* tid */
100 0 /* rid */);
101 wait_on_mask( 1 << tag );
102
103 if (spu.init.id == 0) {
104 return_function_info();
105 }
106
107#if 0
108 if (spu.init.id==0)
109 spu_test_misc(spu.init.id);
110#endif
111
112 command_loop();
113
114 spu_command_close();
115
116 return 0;
117}
diff --git a/src/gallium/drivers/cell/spu/spu_main.h b/src/gallium/drivers/cell/spu/spu_main.h
deleted file mode 100644
index a9d72f84d56..00000000000
--- a/src/gallium/drivers/cell/spu/spu_main.h
+++ /dev/null
@@ -1,269 +0,0 @@
1/**************************************************************************
2 *
3 * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas.
4 * All Rights Reserved.
5 *
6 * Permission is hereby granted, free of charge, to any person obtaining a
7 * copy of this software and associated documentation files (the
8 * "Software"), to deal in the Software without restriction, including
9 * without limitation the rights to use, copy, modify, merge, publish,
10 * distribute, sub license, and/or sell copies of the Software, and to
11 * permit persons to whom the Software is furnished to do so, subject to
12 * the following conditions:
13 *
14 * The above copyright notice and this permission notice (including the
15 * next paragraph) shall be included in all copies or substantial portions
16 * of the Software.
17 *
18 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
19 * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
20 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
21 * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
22 * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
23 * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
24 * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
25 *
26 **************************************************************************/
27
28#ifndef SPU_MAIN_H
29#define SPU_MAIN_H
30
31
32#include <spu_mfcio.h>
33
34#include "cell/common.h"
35#include "draw/draw_vertex.h"
36#include "pipe/p_state.h"
37
38
39#if DEBUG
40/* These debug macros use the unusual construction ", ##__VA_ARGS__"
41 * which expands to the expected comma + args if variadic arguments
42 * are supplied, but swallows the comma if there are no variadic
43 * arguments (which avoids syntax errors that would otherwise occur).
44 */
45#define D_PRINTF(flag, format,...) \
46 if (spu.init.debug_flags & (flag)) \
47 printf("SPU %u: " format, spu.init.id, ##__VA_ARGS__)
48#else
49#define D_PRINTF(...)
50#endif
51
52
53/**
54 * A tile is basically a TILE_SIZE x TILE_SIZE block of 4-byte pixels.
55 * The data may be addressed through several different types.
56 */
57typedef union {
58 ushort us[TILE_SIZE][TILE_SIZE];
59 uint ui[TILE_SIZE][TILE_SIZE];
60 vector unsigned short us8[TILE_SIZE/2][TILE_SIZE/4];
61 vector unsigned int ui4[TILE_SIZE/2][TILE_SIZE/2];
62} tile_t;
63
64
65#define TILE_STATUS_CLEAR 1
66#define TILE_STATUS_DEFINED 2 /**< defined in FB, but not in local store */
67#define TILE_STATUS_CLEAN 3 /**< in local store, but not changed */
68#define TILE_STATUS_DIRTY 4 /**< modified locally, but not put back yet */
69#define TILE_STATUS_GETTING 5 /**< mfc_get() called but not yet arrived */
70
71
72/** Function for sampling textures */
73typedef void (*spu_sample_texture_2d_func)(vector float s,
74 vector float t,
75 uint unit, uint level, uint face,
76 vector float colors[4]);
77
78
79/** Function for performing per-fragment ops */
80typedef void (*spu_fragment_ops_func)(uint x, uint y,
81 tile_t *colorTile,
82 tile_t *depthStencilTile,
83 vector float fragZ,
84 vector float fragRed,
85 vector float fragGreen,
86 vector float fragBlue,
87 vector float fragAlpha,
88 vector unsigned int mask);
89
90/** Function for running fragment program */
91typedef vector unsigned int (*spu_fragment_program_func)(vector float *inputs,
92 vector float *outputs,
93 vector float *constants);
94
95
96PIPE_ALIGN_TYPE(16,
97struct spu_framebuffer
98{
99 void *color_start; /**< addr of color surface in main memory */
100 void *depth_start; /**< addr of depth surface in main memory */
101 enum pipe_format color_format;
102 enum pipe_format depth_format;
103 uint width; /**< width in pixels */
104 uint height; /**< height in pixels */
105 uint width_tiles; /**< width in tiles */
106 uint height_tiles; /**< width in tiles */
107
108 uint color_clear_value;
109 uint depth_clear_value;
110
111 uint zsize; /**< 0, 2 or 4 bytes per Z */
112 float zscale; /**< 65535.0, 2^24-1 or 2^32-1 */
113});
114
115
116/** per-texture level info */
117PIPE_ALIGN_TYPE(16,
118struct spu_texture_level
119{
120 void *start;
121 ushort width;
122 ushort height;
123 ushort depth;
124 ushort tiles_per_row;
125 uint bytes_per_image;
126 /** texcoord scale factors */
127 vector float scale_s;
128 vector float scale_t;
129 vector float scale_r;
130 /** texcoord masks (if REPEAT then size-1, else ~0) */
131 vector signed int mask_s;
132 vector signed int mask_t;
133 vector signed int mask_r;
134 /** texcoord clamp limits */
135 vector signed int max_s;
136 vector signed int max_t;
137 vector signed int max_r;
138});
139
140
141PIPE_ALIGN_TYPE(16,
142struct spu_texture
143{
144 struct spu_texture_level level[CELL_MAX_TEXTURE_LEVELS];
145 uint max_level;
146 uint target; /**< PIPE_TEXTURE_x */
147});
148
149
150/**
151 * All SPU global/context state will be in a singleton object of this type:
152 */
153PIPE_ALIGN_TYPE(16,
154struct spu_global
155{
156 /** One-time init/constant info */
157 struct cell_init_info init;
158
159 /*
160 * Current state
161 */
162 struct spu_framebuffer fb;
163 struct pipe_depth_stencil_alpha_state depth_stencil_alpha;
164 struct pipe_blend_state blend;
165 struct pipe_blend_color blend_color;
166 struct pipe_sampler_state sampler[PIPE_MAX_SAMPLERS];
167 struct pipe_rasterizer_state rasterizer;
168 struct spu_texture texture[PIPE_MAX_SAMPLERS];
169 struct vertex_info vertex_info;
170
171 /** Current color and Z tiles */
172 PIPE_ALIGN_VAR(16) tile_t ctile;
173 PIPE_ALIGN_VAR(16) tile_t ztile;
174
175 /** Read depth/stencil tiles? */
176 boolean read_depth_stencil;
177
178 /** Current tiles' status */
179 ubyte cur_ctile_status;
180 ubyte cur_ztile_status;
181
182 /** Status of all tiles in framebuffer */
183 PIPE_ALIGN_VAR(16) ubyte ctile_status[CELL_MAX_HEIGHT/TILE_SIZE][CELL_MAX_WIDTH/TILE_SIZE];
184 PIPE_ALIGN_VAR(16) ubyte ztile_status[CELL_MAX_HEIGHT/TILE_SIZE][CELL_MAX_WIDTH/TILE_SIZE];
185
186 /** Current fragment ops machine code, at 8-byte boundary */
187 uint *fragment_ops_code;
188 uint fragment_ops_code_size;
189 /** Current fragment ops functions, 0 = frontfacing, 1 = backfacing */
190 spu_fragment_ops_func fragment_ops[2];
191
192 /** Current fragment program machine code, at 8-byte boundary */
193 PIPE_ALIGN_VAR(8) uint fragment_program_code[SPU_MAX_FRAGMENT_PROGRAM_INSTS];
194 /** Current fragment ops function */
195 spu_fragment_program_func fragment_program;
196
197 /** Current texture sampler function */
198 spu_sample_texture_2d_func sample_texture_2d[CELL_MAX_SAMPLERS];
199 spu_sample_texture_2d_func min_sample_texture_2d[CELL_MAX_SAMPLERS];
200 spu_sample_texture_2d_func mag_sample_texture_2d[CELL_MAX_SAMPLERS];
201
202 /** Fragment program constants */
203 vector float constants[4 * CELL_MAX_CONSTANTS];
204
205});
206
207
208extern struct spu_global spu;
209
210
211
212/* DMA TAGS */
213
214#define TAG_SURFACE_CLEAR 10
215#define TAG_VERTEX_BUFFER 11
216#define TAG_READ_TILE_COLOR 12
217#define TAG_READ_TILE_Z 13
218#define TAG_WRITE_TILE_COLOR 14
219#define TAG_WRITE_TILE_Z 15
220#define TAG_INDEX_BUFFER 16
221#define TAG_BATCH_BUFFER 17
222#define TAG_MISC 18
223#define TAG_DCACHE0 20
224#define TAG_DCACHE1 21
225#define TAG_DCACHE2 22
226#define TAG_DCACHE3 23
227#define TAG_FENCE 24
228
229
230static INLINE void
231wait_on_mask(unsigned tagMask)
232{
233 mfc_write_tag_mask( tagMask );
234 /* wait for completion of _any_ DMAs specified by tagMask */
235 mfc_read_tag_status_any();
236}
237
238
239static INLINE void
240wait_on_mask_all(unsigned tagMask)
241{
242 mfc_write_tag_mask( tagMask );
243 /* wait for completion of _any_ DMAs specified by tagMask */
244 mfc_read_tag_status_all();
245}
246
247
248
249
250
251static INLINE void
252memset16(ushort *d, ushort value, uint count)
253{
254 uint i;
255 for (i = 0; i < count; i++)
256 d[i] = value;
257}
258
259
260static INLINE void
261memset32(uint *d, uint value, uint count)
262{
263 uint i;
264 for (i = 0; i < count; i++)
265 d[i] = value;
266}
267
268
269#endif /* SPU_MAIN_H */
diff --git a/src/gallium/drivers/cell/spu/spu_per_fragment_op.c b/src/gallium/drivers/cell/spu/spu_per_fragment_op.c
deleted file mode 100644
index 2415226a244..00000000000
--- a/src/gallium/drivers/cell/spu/spu_per_fragment_op.c
+++ /dev/null
@@ -1,631 +0,0 @@
1/**************************************************************************
2 *
3 * Copyright 2008 Tungsten Graphics, Inc., Cedar Park, Texas.
4 * All Rights Reserved.
5 *
6 * Permission is hereby granted, free of charge, to any person obtaining a
7 * copy of this software and associated documentation files (the
8 * "Software"), to deal in the Software without restriction, including
9 * without limitation the rights to use, copy, modify, merge, publish,
10 * distribute, sub license, and/or sell copies of the Software, and to
11 * permit persons to whom the Software is furnished to do so, subject to
12 * the following conditions:
13 *
14 * The above copyright notice and this permission notice (including the
15 * next paragraph) shall be included in all copies or substantial portions
16 * of the Software.
17 *
18 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
19 * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
20 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
21 * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
22 * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
23 * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
24 * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
25 *
26 **************************************************************************/
27
28/**
29 * \author Brian Paul
30 */
31
32
33#include <transpose_matrix4x4.h>
34#include "pipe/p_format.h"
35#include "spu_main.h"
36#include "spu_colorpack.h"
37#include "spu_per_fragment_op.h"
38
39
40#define LINEAR_QUAD_LAYOUT 1
41
42
43static INLINE vector float
44spu_min(vector float a, vector float b)
45{
46 vector unsigned int m;
47 m = spu_cmpgt(a, b); /* m = a > b ? ~0 : 0 */
48 return spu_sel(a, b, m);
49}
50
51
52static INLINE vector float
53spu_max(vector float a, vector float b)
54{
55 vector unsigned int m;
56 m = spu_cmpgt(a, b); /* m = a > b ? ~0 : 0 */
57 return spu_sel(b, a, m);
58}
59
60
61/**
62 * Called by rasterizer for each quad after the shader has run. Do
63 * all the per-fragment operations including alpha test, z test,
64 * stencil test, blend, colormask and logicops. This is a
65 * fallback/debug function. In reality we'll use a generated function
66 * produced by the PPU. But this function is useful for
67 * debug/validation.
68 */
69void
70spu_fallback_fragment_ops(uint x, uint y,
71 tile_t *colorTile,
72 tile_t *depthStencilTile,
73 vector float fragZ,
74 vector float fragR,
75 vector float fragG,
76 vector float fragB,
77 vector float fragA,
78 vector unsigned int mask)
79{
80 vector float frag_aos[4];
81 unsigned int fbc0, fbc1, fbc2, fbc3 ; /* framebuffer/tile colors */
82 unsigned int fragc0, fragc1, fragc2, fragc3; /* fragment colors */
83
84 /*
85 * Do alpha test
86 */
87 if (spu.depth_stencil_alpha.alpha.enabled) {
88 vector float ref = spu_splats(spu.depth_stencil_alpha.alpha.ref_value);
89 vector unsigned int amask;
90
91 switch (spu.depth_stencil_alpha.alpha.func) {
92 case PIPE_FUNC_LESS:
93 amask = spu_cmpgt(ref, fragA); /* mask = (fragA < ref) */
94 break;
95 case PIPE_FUNC_GREATER:
96 amask = spu_cmpgt(fragA, ref); /* mask = (fragA > ref) */
97 break;
98 case PIPE_FUNC_GEQUAL:
99 amask = spu_cmpgt(ref, fragA);
100 amask = spu_nor(amask, amask);
101 break;
102 case PIPE_FUNC_LEQUAL:
103 amask = spu_cmpgt(fragA, ref);
104 amask = spu_nor(amask, amask);
105 break;
106 case PIPE_FUNC_EQUAL:
107 amask = spu_cmpeq(ref, fragA);
108 break;
109 case PIPE_FUNC_NOTEQUAL:
110 amask = spu_cmpeq(ref, fragA);
111 amask = spu_nor(amask, amask);
112 break;
113 case PIPE_FUNC_ALWAYS:
114 amask = spu_splats(0xffffffffU);
115 break;
116 case PIPE_FUNC_NEVER:
117 amask = spu_splats( 0x0U);
118 break;
119 default:
120 ;
121 }
122
123 mask = spu_and(mask, amask);
124 }
125
126
127 /*
128 * Z and/or stencil testing...
129 */
130 if (spu.depth_stencil_alpha.depth.enabled ||
131 spu.depth_stencil_alpha.stencil[0].enabled) {
132
133 /* get four Z/Stencil values from tile */
134 vector unsigned int mask24 = spu_splats((unsigned int)0x00ffffffU);
135 vector unsigned int ifbZS = depthStencilTile->ui4[y/2][x/2];
136 vector unsigned int ifbZ = spu_and(ifbZS, mask24);
137 vector unsigned int ifbS = spu_andc(ifbZS, mask24);
138
139 if (spu.depth_stencil_alpha.stencil[0].enabled) {
140 /* do stencil test */
141 ASSERT(spu.fb.depth_format == PIPE_FORMAT_Z24_UNORM_S8_UINT);
142
143 }
144 else if (spu.depth_stencil_alpha.depth.enabled) {
145 /* do depth test */
146
147 ASSERT(spu.fb.depth_format == PIPE_FORMAT_Z24_UNORM_S8_UINT ||
148 spu.fb.depth_format == PIPE_FORMAT_Z24X8_UNORM);
149
150 vector unsigned int ifragZ;
151 vector unsigned int zmask;
152
153 /* convert four fragZ from float to uint */
154 fragZ = spu_mul(fragZ, spu_splats((float) 0xffffff));
155 ifragZ = spu_convtu(fragZ, 0);
156
157 /* do depth comparison, setting zmask with results */
158 switch (spu.depth_stencil_alpha.depth.func) {
159 case PIPE_FUNC_LESS:
160 zmask = spu_cmpgt(ifbZ, ifragZ); /* mask = (ifragZ < ifbZ) */
161 break;
162 case PIPE_FUNC_GREATER:
163 zmask = spu_cmpgt(ifragZ, ifbZ); /* mask = (ifbZ > ifragZ) */
164 break;
165 case PIPE_FUNC_GEQUAL:
166 zmask = spu_cmpgt(ifbZ, ifragZ);
167 zmask = spu_nor(zmask, zmask);
168 break;
169 case PIPE_FUNC_LEQUAL:
170 zmask = spu_cmpgt(ifragZ, ifbZ);
171 zmask = spu_nor(zmask, zmask);
172 break;
173 case PIPE_FUNC_EQUAL:
174 zmask = spu_cmpeq(ifbZ, ifragZ);
175 break;
176 case PIPE_FUNC_NOTEQUAL:
177 zmask = spu_cmpeq(ifbZ, ifragZ);
178 zmask = spu_nor(zmask, zmask);
179 break;
180 case PIPE_FUNC_ALWAYS:
181 zmask = spu_splats(0xffffffffU);
182 break;
183 case PIPE_FUNC_NEVER:
184 zmask = spu_splats( 0x0U);
185 break;
186 default:
187 ;
188 }
189
190 mask = spu_and(mask, zmask);
191
192 /* merge framebuffer Z and fragment Z according to the mask */
193 ifbZ = spu_or(spu_and(ifragZ, mask),
194 spu_andc(ifbZ, mask));
195 }
196
197 if (spu_extract(spu_orx(mask), 0)) {
198 /* put new fragment Z/Stencil values back into Z/Stencil tile */
199 depthStencilTile->ui4[y/2][x/2] = spu_or(ifbZ, ifbS);
200
201 spu.cur_ztile_status = TILE_STATUS_DIRTY;
202 }
203 }
204
205
206 /*
207 * If we'll need the current framebuffer/tile colors for blending
208 * or logicop or colormask, fetch them now.
209 */
210 if (spu.blend.rt[0].blend_enable ||
211 spu.blend.logicop_enable ||
212 spu.blend.rt[0].colormask != 0xf) {
213
214#if LINEAR_QUAD_LAYOUT /* See comments/diagram below */
215 fbc0 = colorTile->ui[y][x*2+0];
216 fbc1 = colorTile->ui[y][x*2+1];
217 fbc2 = colorTile->ui[y][x*2+2];
218 fbc3 = colorTile->ui[y][x*2+3];
219#else
220 fbc0 = colorTile->ui[y+0][x+0];
221 fbc1 = colorTile->ui[y+0][x+1];
222 fbc2 = colorTile->ui[y+1][x+0];
223 fbc3 = colorTile->ui[y+1][x+1];
224#endif
225 }
226
227
228 /*
229 * Do blending
230 */
231 if (spu.blend.rt[0].blend_enable) {
232 /* blending terms, misc regs */
233 vector float term1r, term1g, term1b, term1a;
234 vector float term2r, term2g, term2b, term2a;
235 vector float one, tmp;
236
237 vector float fbRGBA[4]; /* current framebuffer colors */
238
239 /* convert framebuffer colors from packed int to vector float */
240 {
241 vector float temp[4]; /* float colors in AOS form */
242 switch (spu.fb.color_format) {
243 case PIPE_FORMAT_A8R8G8B8_UNORM:
244 temp[0] = spu_unpack_B8G8R8A8(fbc0);
245 temp[1] = spu_unpack_B8G8R8A8(fbc1);
246 temp[2] = spu_unpack_B8G8R8A8(fbc2);
247 temp[3] = spu_unpack_B8G8R8A8(fbc3);
248 break;
249 case PIPE_FORMAT_B8G8R8A8_UNORM:
250 temp[0] = spu_unpack_A8R8G8B8(fbc0);
251 temp[1] = spu_unpack_A8R8G8B8(fbc1);
252 temp[2] = spu_unpack_A8R8G8B8(fbc2);
253 temp[3] = spu_unpack_A8R8G8B8(fbc3);
254 break;
255 default:
256 ASSERT(0);
257 }
258 _transpose_matrix4x4(fbRGBA, temp); /* fbRGBA = transpose(temp) */
259 }
260
261 /*
262 * Compute Src RGB terms (fragment color * factor)
263 */
264 switch (spu.blend.rt[0].rgb_src_factor) {
265 case PIPE_BLENDFACTOR_ONE:
266 term1r = fragR;
267 term1g = fragG;
268 term1b = fragB;
269 break;
270 case PIPE_BLENDFACTOR_ZERO:
271 term1r =
272 term1g =
273 term1b = spu_splats(0.0f);
274 break;
275 case PIPE_BLENDFACTOR_SRC_COLOR:
276 term1r = spu_mul(fragR, fragR);
277 term1g = spu_mul(fragG, fragG);
278 term1b = spu_mul(fragB, fragB);
279 break;
280 case PIPE_BLENDFACTOR_SRC_ALPHA:
281 term1r = spu_mul(fragR, fragA);
282 term1g = spu_mul(fragG, fragA);
283 term1b = spu_mul(fragB, fragA);
284 break;
285 case PIPE_BLENDFACTOR_DST_COLOR:
286 term1r = spu_mul(fragR, fbRGBA[0]);
287 term1g = spu_mul(fragG, fbRGBA[1]);
288 term1b = spu_mul(fragB, fbRGBA[1]);
289 break;
290 case PIPE_BLENDFACTOR_DST_ALPHA:
291 term1r = spu_mul(fragR, fbRGBA[3]);
292 term1g = spu_mul(fragG, fbRGBA[3]);
293 term1b = spu_mul(fragB, fbRGBA[3]);
294 break;
295 case PIPE_BLENDFACTOR_CONST_COLOR:
296 term1r = spu_mul(fragR, spu_splats(spu.blend_color.color[0]));
297 term1g = spu_mul(fragG, spu_splats(spu.blend_color.color[1]));
298 term1b = spu_mul(fragB, spu_splats(spu.blend_color.color[2]));
299 break;
300 case PIPE_BLENDFACTOR_CONST_ALPHA:
301 term1r = spu_mul(fragR, spu_splats(spu.blend_color.color[3]));
302 term1g = spu_mul(fragG, spu_splats(spu.blend_color.color[3]));
303 term1b = spu_mul(fragB, spu_splats(spu.blend_color.color[3]));
304 break;
305 /* XXX more cases */
306 default:
307 ASSERT(0);
308 }
309
310 /*
311 * Compute Src Alpha term (fragment alpha * factor)
312 */
313 switch (spu.blend.rt[0].alpha_src_factor) {
314 case PIPE_BLENDFACTOR_ONE:
315 term1a = fragA;
316 break;
317 case PIPE_BLENDFACTOR_SRC_COLOR:
318 term1a = spu_splats(0.0f);
319 break;
320 case PIPE_BLENDFACTOR_SRC_ALPHA:
321 term1a = spu_mul(fragA, fragA);
322 break;
323 case PIPE_BLENDFACTOR_DST_COLOR:
324 /* fall-through */
325 case PIPE_BLENDFACTOR_DST_ALPHA:
326 term1a = spu_mul(fragA, fbRGBA[3]);
327 break;
328 case PIPE_BLENDFACTOR_CONST_COLOR:
329 /* fall-through */
330 case PIPE_BLENDFACTOR_CONST_ALPHA:
331 term1a = spu_mul(fragR, spu_splats(spu.blend_color.color[3]));
332 break;
333 /* XXX more cases */
334 default:
335 ASSERT(0);
336 }
337
338 /*
339 * Compute Dest RGB terms (framebuffer color * factor)
340 */
341 switch (spu.blend.rt[0].rgb_dst_factor) {
342 case PIPE_BLENDFACTOR_ONE:
343 term2r = fbRGBA[0];
344 term2g = fbRGBA[1];
345 term2b = fbRGBA[2];
346 break;
347 case PIPE_BLENDFACTOR_ZERO:
348 term2r =
349 term2g =
350 term2b = spu_splats(0.0f);
351 break;
352 case PIPE_BLENDFACTOR_SRC_COLOR:
353 term2r = spu_mul(fbRGBA[0], fragR);
354 term2g = spu_mul(fbRGBA[1], fragG);
355 term2b = spu_mul(fbRGBA[2], fragB);
356 break;
357 case PIPE_BLENDFACTOR_SRC_ALPHA:
358 term2r = spu_mul(fbRGBA[0], fragA);
359 term2g = spu_mul(fbRGBA[1], fragA);
360 term2b = spu_mul(fbRGBA[2], fragA);
361 break;
362 case PIPE_BLENDFACTOR_INV_SRC_ALPHA:
363 one = spu_splats(1.0f);
364 tmp = spu_sub(one, fragA);
365 term2r = spu_mul(fbRGBA[0], tmp);
366 term2g = spu_mul(fbRGBA[1], tmp);
367 term2b = spu_mul(fbRGBA[2], tmp);
368 break;
369 case PIPE_BLENDFACTOR_DST_COLOR:
370 term2r = spu_mul(fbRGBA[0], fbRGBA[0]);
371 term2g = spu_mul(fbRGBA[1], fbRGBA[1]);
372 term2b = spu_mul(fbRGBA[2], fbRGBA[2]);
373 break;
374 case PIPE_BLENDFACTOR_DST_ALPHA:
375 term2r = spu_mul(fbRGBA[0], fbRGBA[3]);
376 term2g = spu_mul(fbRGBA[1], fbRGBA[3]);
377 term2b = spu_mul(fbRGBA[2], fbRGBA[3]);
378 break;
379 case PIPE_BLENDFACTOR_CONST_COLOR:
380 term2r = spu_mul(fbRGBA[0], spu_splats(spu.blend_color.color[0]));
381 term2g = spu_mul(fbRGBA[1], spu_splats(spu.blend_color.color[1]));
382 term2b = spu_mul(fbRGBA[2], spu_splats(spu.blend_color.color[2]));
383 break;
384 case PIPE_BLENDFACTOR_CONST_ALPHA:
385 term2r = spu_mul(fbRGBA[0], spu_splats(spu.blend_color.color[3]));
386 term2g = spu_mul(fbRGBA[1], spu_splats(spu.blend_color.color[3]));
387 term2b = spu_mul(fbRGBA[2], spu_splats(spu.blend_color.color[3]));
388 break;
389 /* XXX more cases */
390 default:
391 ASSERT(0);
392 }
393
394 /*
395 * Compute Dest Alpha term (framebuffer alpha * factor)
396 */
397 switch (spu.blend.rt[0].alpha_dst_factor) {
398 case PIPE_BLENDFACTOR_ONE:
399 term2a = fbRGBA[3];
400 break;
401 case PIPE_BLENDFACTOR_SRC_COLOR:
402 term2a = spu_splats(0.0f);
403 break;
404 case PIPE_BLENDFACTOR_SRC_ALPHA:
405 term2a = spu_mul(fbRGBA[3], fragA);
406 break;
407 case PIPE_BLENDFACTOR_INV_SRC_ALPHA:
408 one = spu_splats(1.0f);
409 tmp = spu_sub(one, fragA);
410 term2a = spu_mul(fbRGBA[3], tmp);
411 break;
412 case PIPE_BLENDFACTOR_DST_COLOR:
413 /* fall-through */
414 case PIPE_BLENDFACTOR_DST_ALPHA:
415 term2a = spu_mul(fbRGBA[3], fbRGBA[3]);
416 break;
417 case PIPE_BLENDFACTOR_CONST_COLOR:
418 /* fall-through */
419 case PIPE_BLENDFACTOR_CONST_ALPHA:
420 term2a = spu_mul(fbRGBA[3], spu_splats(spu.blend_color.color[3]));
421 break;
422 /* XXX more cases */
423 default:
424 ASSERT(0);
425 }
426
427 /*
428 * Combine Src/Dest RGB terms
429 */
430 switch (spu.blend.rt[0].rgb_func) {
431 case PIPE_BLEND_ADD:
432 fragR = spu_add(term1r, term2r);
433 fragG = spu_add(term1g, term2g);
434 fragB = spu_add(term1b, term2b);
435 break;
436 case PIPE_BLEND_SUBTRACT:
437 fragR = spu_sub(term1r, term2r);
438 fragG = spu_sub(term1g, term2g);
439 fragB = spu_sub(term1b, term2b);
440 break;
441 case PIPE_BLEND_REVERSE_SUBTRACT:
442 fragR = spu_sub(term2r, term1r);
443 fragG = spu_sub(term2g, term1g);
444 fragB = spu_sub(term2b, term1b);
445 break;
446 case PIPE_BLEND_MIN:
447 fragR = spu_min(term1r, term2r);
448 fragG = spu_min(term1g, term2g);
449 fragB = spu_min(term1b, term2b);
450 break;
451 case PIPE_BLEND_MAX:
452 fragR = spu_max(term1r, term2r);
453 fragG = spu_max(term1g, term2g);
454 fragB = spu_max(term1b, term2b);
455 break;
456 default:
457 ASSERT(0);
458 }
459
460 /*
461 * Combine Src/Dest A term
462 */
463 switch (spu.blend.rt[0].alpha_func) {
464 case PIPE_BLEND_ADD:
465 fragA = spu_add(term1a, term2a);
466 break;
467 case PIPE_BLEND_SUBTRACT:
468 fragA = spu_sub(term1a, term2a);
469 break;
470 case PIPE_BLEND_REVERSE_SUBTRACT:
471 fragA = spu_sub(term2a, term1a);
472 break;
473 case PIPE_BLEND_MIN:
474 fragA = spu_min(term1a, term2a);
475 break;
476 case PIPE_BLEND_MAX:
477 fragA = spu_max(term1a, term2a);
478 break;
479 default:
480 ASSERT(0);
481 }
482 }
483
484
485 /*
486 * Convert RRRR,GGGG,BBBB,AAAA to RGBA,RGBA,RGBA,RGBA.
487 */
488#if 0
489 /* original code */
490 {
491 vector float frag_soa[4];
492 frag_soa[0] = fragR;
493 frag_soa[1] = fragG;
494 frag_soa[2] = fragB;
495 frag_soa[3] = fragA;
496 _transpose_matrix4x4(frag_aos, frag_soa);
497 }
498#else
499 /* short-cut relying on function parameter layout: */
500 _transpose_matrix4x4(frag_aos, &fragR);
501 (void) fragG;
502 (void) fragB;
503#endif
504
505 /*
506 * Pack fragment float colors into 32-bit RGBA words.
507 */
508 switch (spu.fb.color_format) {
509 case PIPE_FORMAT_B8G8R8A8_UNORM:
510 fragc0 = spu_pack_A8R8G8B8(frag_aos[0]);
511 fragc1 = spu_pack_A8R8G8B8(frag_aos[1]);
512 fragc2 = spu_pack_A8R8G8B8(frag_aos[2]);
513 fragc3 = spu_pack_A8R8G8B8(frag_aos[3]);
514 break;
515 case PIPE_FORMAT_A8R8G8B8_UNORM:
516 fragc0 = spu_pack_B8G8R8A8(frag_aos[0]);
517 fragc1 = spu_pack_B8G8R8A8(frag_aos[1]);
518 fragc2 = spu_pack_B8G8R8A8(frag_aos[2]);
519 fragc3 = spu_pack_B8G8R8A8(frag_aos[3]);
520 break;
521 default:
522 fprintf(stderr, "SPU: Bad pixel format in spu_default_fragment_ops\n");
523 ASSERT(0);
524 }
525
526
527 /*
528 * Do color masking
529 */
530 if (spu.blend.rt[0].colormask != 0xf) {
531 uint cmask = 0x0; /* each byte corresponds to a color channel */
532
533 /* Form bitmask depending on color buffer format and colormask bits */
534 switch (spu.fb.color_format) {
535 case PIPE_FORMAT_B8G8R8A8_UNORM:
536 if (spu.blend.rt[0].colormask & PIPE_MASK_R)
537 cmask |= 0x00ff0000; /* red */
538 if (spu.blend.rt[0].colormask & PIPE_MASK_G)
539 cmask |= 0x0000ff00; /* green */
540 if (spu.blend.rt[0].colormask & PIPE_MASK_B)
541 cmask |= 0x000000ff; /* blue */
542 if (spu.blend.rt[0].colormask & PIPE_MASK_A)
543 cmask |= 0xff000000; /* alpha */
544 break;
545 case PIPE_FORMAT_A8R8G8B8_UNORM:
546 if (spu.blend.rt[0].colormask & PIPE_MASK_R)
547 cmask |= 0x0000ff00; /* red */
548 if (spu.blend.rt[0].colormask & PIPE_MASK_G)
549 cmask |= 0x00ff0000; /* green */
550 if (spu.blend.rt[0].colormask & PIPE_MASK_B)
551 cmask |= 0xff000000; /* blue */
552 if (spu.blend.rt[0].colormask & PIPE_MASK_A)
553 cmask |= 0x000000ff; /* alpha */
554 break;
555 default:
556 ASSERT(0);
557 }
558
559 /*
560 * Apply color mask to the 32-bit packed colors.
561 * if (cmask[i])
562 * frag color[i] = frag color[i];
563 * else
564 * frag color[i] = framebuffer color[i];
565 */
566 fragc0 = (fragc0 & cmask) | (fbc0 & ~cmask);
567 fragc1 = (fragc1 & cmask) | (fbc1 & ~cmask);
568 fragc2 = (fragc2 & cmask) | (fbc2 & ~cmask);
569 fragc3 = (fragc3 & cmask) | (fbc3 & ~cmask);
570 }
571
572
573 /*
574 * Do logic ops
575 */
576 if (spu.blend.logicop_enable) {
577 /* XXX to do */
578 /* apply logicop to 32-bit packed colors (fragcx and fbcx) */
579 }
580
581
582 /*
583 * If mask is non-zero, mark tile as dirty.
584 */
585 if (spu_extract(spu_orx(mask), 0)) {
586 spu.cur_ctile_status = TILE_STATUS_DIRTY;
587 }
588 else {
589 /* write no fragments */
590 return;
591 }
592
593
594 /*
595 * Write new fragment/quad colors to the framebuffer/tile.
596 * Only write pixels where the corresponding mask word is set.
597 */
598#if LINEAR_QUAD_LAYOUT
599 /*
600 * Quad layout:
601 * +--+--+--+--+
602 * |p0|p1|p2|p3|...
603 * +--+--+--+--+
604 */
605 if (spu_extract(mask, 0))
606 colorTile->ui[y][x*2] = fragc0;
607 if (spu_extract(mask, 1))
608 colorTile->ui[y][x*2+1] = fragc1;
609 if (spu_extract(mask, 2))
610 colorTile->ui[y][x*2+2] = fragc2;
611 if (spu_extract(mask, 3))
612 colorTile->ui[y][x*2+3] = fragc3;
613#else
614 /*
615 * Quad layout:
616 * +--+--+
617 * |p0|p1|...
618 * +--+--+
619 * |p2|p3|...
620 * +--+--+
621 */
622 if (spu_extract(mask, 0))
623 colorTile->ui[y+0][x+0] = fragc0;
624 if (spu_extract(mask, 1))
625 colorTile->ui[y+0][x+1] = fragc1;
626 if (spu_extract(mask, 2))
627 colorTile->ui[y+1][x+0] = fragc2;
628 if (spu_extract(mask, 3))
629 colorTile->ui[y+1][x+1] = fragc3;
630#endif
631}
diff --git a/src/gallium/drivers/cell/spu/spu_per_fragment_op.h b/src/gallium/drivers/cell/spu/spu_per_fragment_op.h
deleted file mode 100644
index f817abf0463..00000000000
--- a/src/gallium/drivers/cell/spu/spu_per_fragment_op.h
+++ /dev/null
@@ -1,44 +0,0 @@
1/**************************************************************************
2 *
3 * Copyright 2008 Tungsten Graphics, Inc., Cedar Park, Texas.
4 * All Rights Reserved.
5 *
6 * Permission is hereby granted, free of charge, to any person obtaining a
7 * copy of this software and associated documentation files (the
8 * "Software"), to deal in the Software without restriction, including
9 * without limitation the rights to use, copy, modify, merge, publish,
10 * distribute, sub license, and/or sell copies of the Software, and to
11 * permit persons to whom the Software is furnished to do so, subject to
12 * the following conditions:
13 *
14 * The above copyright notice and this permission notice (including the
15 * next paragraph) shall be included in all copies or substantial portions
16 * of the Software.
17 *
18 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
19 * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
20 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
21 * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
22 * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
23 * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
24 * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
25 *
26 **************************************************************************/
27
28#ifndef SPU_PER_FRAGMENT_OP
29#define SPU_PER_FRAGMENT_OP
30
31
32extern void
33spu_fallback_fragment_ops(uint x, uint y,
34 tile_t *colorTile,
35 tile_t *depthStencilTile,
36 vector float fragZ,
37 vector float fragRed,
38 vector float fragGreen,
39 vector float fragBlue,
40 vector float fragAlpha,
41 vector unsigned int mask);
42
43
44#endif /* SPU_PER_FRAGMENT_OP */
diff --git a/src/gallium/drivers/cell/spu/spu_render.c b/src/gallium/drivers/cell/spu/spu_render.c
deleted file mode 100644
index 14987e3c3a2..00000000000
--- a/src/gallium/drivers/cell/spu/spu_render.c
+++ /dev/null
@@ -1,356 +0,0 @@
1/**************************************************************************
2 *
3 * Copyright 2008 Tungsten Graphics, Inc., Cedar Park, Texas.
4 * All Rights Reserved.
5 *
6 * Permission is hereby granted, free of charge, to any person obtaining a
7 * copy of this software and associated documentation files (the
8 * "Software"), to deal in the Software without restriction, including
9 * without limitation the rights to use, copy, modify, merge, publish,
10 * distribute, sub license, and/or sell copies of the Software, and to
11 * permit persons to whom the Software is furnished to do so, subject to
12 * the following conditions:
13 *
14 * The above copyright notice and this permission notice (including the
15 * next paragraph) shall be included in all copies or substantial portions
16 * of the Software.
17 *
18 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
19 * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
20 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
21 * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
22 * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
23 * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
24 * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
25 *
26 **************************************************************************/
27
28
29#include <stdio.h>
30#include <libmisc.h>
31#include <spu_mfcio.h>
32
33#include "spu_main.h"
34#include "spu_render.h"
35#include "spu_shuffle.h"
36#include "spu_tri.h"
37#include "spu_tile.h"
38#include "cell/common.h"
39#include "util/u_memory.h"
40
41
42/**
43 * Given a rendering command's bounding box (in pixels) compute the
44 * location of the corresponding screen tile bounding box.
45 */
46static INLINE void
47tile_bounding_box(const struct cell_command_render *render,
48 uint *txmin, uint *tymin,
49 uint *box_num_tiles, uint *box_width_tiles)
50{
51#if 0
52 /* Debug: full-window bounding box */
53 uint txmax = spu.fb.width_tiles - 1;
54 uint tymax = spu.fb.height_tiles - 1;
55 *txmin = 0;
56 *tymin = 0;
57 *box_num_tiles = spu.fb.width_tiles * spu.fb.height_tiles;
58 *box_width_tiles = spu.fb.width_tiles;
59 (void) render;
60 (void) txmax;
61 (void) tymax;
62#else
63 uint txmax, tymax, box_height_tiles;
64
65 *txmin = (uint) render->xmin / TILE_SIZE;
66 *tymin = (uint) render->ymin / TILE_SIZE;
67 txmax = (uint) render->xmax / TILE_SIZE;
68 tymax = (uint) render->ymax / TILE_SIZE;
69 if (txmax >= spu.fb.width_tiles)
70 txmax = spu.fb.width_tiles-1;
71 if (tymax >= spu.fb.height_tiles)
72 tymax = spu.fb.height_tiles-1;
73 *box_width_tiles = txmax - *txmin + 1;
74 box_height_tiles = tymax - *tymin + 1;
75 *box_num_tiles = *box_width_tiles * box_height_tiles;
76#endif
77#if 0
78 printf("SPU %u: bounds: %g, %g ... %g, %g\n", spu.init.id,
79 render->xmin, render->ymin, render->xmax, render->ymax);
80 printf("SPU %u: tiles: %u, %u .. %u, %u\n",
81 spu.init.id, *txmin, *tymin, txmax, tymax);
82 ASSERT(render->xmin <= render->xmax);
83 ASSERT(render->ymin <= render->ymax);
84#endif
85}
86
87
88/** Check if the tile at (tx,ty) belongs to this SPU */
89static INLINE boolean
90my_tile(uint tx, uint ty)
91{
92 return (spu.fb.width_tiles * ty + tx) % spu.init.num_spus == spu.init.id;
93}
94
95
96/**
97 * Start fetching non-clear color/Z tiles from main memory
98 */
99static INLINE void
100get_cz_tiles(uint tx, uint ty)
101{
102 if (spu.read_depth_stencil) {
103 if (spu.cur_ztile_status != TILE_STATUS_CLEAR) {
104 //printf("SPU %u: getting Z tile %u, %u\n", spu.init.id, tx, ty);
105 get_tile(tx, ty, &spu.ztile, TAG_READ_TILE_Z, 1);
106 spu.cur_ztile_status = TILE_STATUS_GETTING;
107 }
108 }
109
110 if (spu.cur_ctile_status != TILE_STATUS_CLEAR) {
111 //printf("SPU %u: getting C tile %u, %u\n", spu.init.id, tx, ty);
112 get_tile(tx, ty, &spu.ctile, TAG_READ_TILE_COLOR, 0);
113 spu.cur_ctile_status = TILE_STATUS_GETTING;
114 }
115}
116
117
118/**
119 * Start putting dirty color/Z tiles back to main memory
120 */
121static INLINE void
122put_cz_tiles(uint tx, uint ty)
123{
124 if (spu.cur_ztile_status == TILE_STATUS_DIRTY) {
125 /* tile was modified and needs to be written back */
126 //printf("SPU %u: put dirty Z tile %u, %u\n", spu.init.id, tx, ty);
127 put_tile(tx, ty, &spu.ztile, TAG_WRITE_TILE_Z, 1);
128 spu.cur_ztile_status = TILE_STATUS_DEFINED;
129 }
130 else if (spu.cur_ztile_status == TILE_STATUS_GETTING) {
131 /* tile was never used */
132 spu.cur_ztile_status = TILE_STATUS_DEFINED;
133 //printf("SPU %u: put getting Z tile %u, %u\n", spu.init.id, tx, ty);
134 }
135
136 if (spu.cur_ctile_status == TILE_STATUS_DIRTY) {
137 /* tile was modified and needs to be written back */
138 //printf("SPU %u: put dirty C tile %u, %u\n", spu.init.id, tx, ty);
139 put_tile(tx, ty, &spu.ctile, TAG_WRITE_TILE_COLOR, 0);
140 spu.cur_ctile_status = TILE_STATUS_DEFINED;
141 }
142 else if (spu.cur_ctile_status == TILE_STATUS_GETTING) {
143 /* tile was never used */
144 spu.cur_ctile_status = TILE_STATUS_DEFINED;
145 //printf("SPU %u: put getting C tile %u, %u\n", spu.init.id, tx, ty);
146 }
147}
148
149
150/**
151 * Wait for 'put' of color/z tiles to complete.
152 */
153static INLINE void
154wait_put_cz_tiles(void)
155{
156 wait_on_mask(1 << TAG_WRITE_TILE_COLOR);
157 if (spu.read_depth_stencil) {
158 wait_on_mask(1 << TAG_WRITE_TILE_Z);
159 }
160}
161
162
163/**
164 * Render primitives
165 * \param pos_incr returns value indicating how may words to skip after
166 * this command in the batch buffer
167 */
168void
169cmd_render(const struct cell_command_render *render, uint *pos_incr)
170{
171 /* we'll DMA into these buffers */
172 PIPE_ALIGN_VAR(16) ubyte vertex_data[CELL_BUFFER_SIZE];
173 const uint vertex_size = render->vertex_size; /* in bytes */
174 /*const*/ uint total_vertex_bytes = render->num_verts * vertex_size;
175 uint index_bytes;
176 const ubyte *vertices;
177 const ushort *indexes;
178 uint i, j;
179 uint num_tiles;
180
181 D_PRINTF(CELL_DEBUG_CMD,
182 "RENDER prim=%u num_vert=%u num_ind=%u inline_vert=%u\n",
183 render->prim_type,
184 render->num_verts,
185 render->num_indexes,
186 render->inline_verts);
187
188 ASSERT(sizeof(*render) % 4 == 0);
189 ASSERT(total_vertex_bytes % 16 == 0);
190 ASSERT(render->prim_type == PIPE_PRIM_TRIANGLES);
191 ASSERT(render->num_indexes % 3 == 0);
192
193
194 /* indexes are right after the render command in the batch buffer */
195 indexes = (const ushort *) (render + 1);
196 index_bytes = ROUNDUP8(render->num_indexes * 2);
197 *pos_incr = index_bytes / 8 + sizeof(*render) / 8;
198
199
200 if (render->inline_verts) {
201 /* Vertices are after indexes in batch buffer at next 16-byte addr */
202 vertices = (const ubyte *) render + (*pos_incr * 8);
203 vertices = (const ubyte *) align_pointer((void *) vertices, 16);
204 ASSERT_ALIGN16(vertices);
205 *pos_incr = ((vertices + total_vertex_bytes) - (ubyte *) render) / 8;
206 }
207 else {
208 /* Begin DMA fetch of vertex buffer */
209 ubyte *src = spu.init.buffers[render->vertex_buf];
210 ubyte *dest = vertex_data;
211
212 /* skip vertex data we won't use */
213#if 01
214 src += render->min_index * vertex_size;
215 dest += render->min_index * vertex_size;
216 total_vertex_bytes -= render->min_index * vertex_size;
217#endif
218 ASSERT(total_vertex_bytes % 16 == 0);
219 ASSERT_ALIGN16(dest);
220 ASSERT_ALIGN16(src);
221
222 mfc_get(dest, /* in vertex_data[] array */
223 (unsigned int) src, /* src in main memory */
224 total_vertex_bytes, /* size */
225 TAG_VERTEX_BUFFER,
226 0, /* tid */
227 0 /* rid */);
228
229 vertices = vertex_data;
230
231 wait_on_mask(1 << TAG_VERTEX_BUFFER);
232 }
233
234
235 /**
236 ** find tiles which intersect the prim bounding box
237 **/
238 uint txmin, tymin, box_width_tiles, box_num_tiles;
239 tile_bounding_box(render, &txmin, &tymin,
240 &box_num_tiles, &box_width_tiles);
241
242
243 /* make sure any pending clears have completed */
244 wait_on_mask(1 << TAG_SURFACE_CLEAR); /* XXX temporary */
245
246
247 num_tiles = 0;
248
249 /**
250 ** loop over tiles, rendering tris
251 **/
252 for (i = 0; i < box_num_tiles; i++) {
253 const uint tx = txmin + i % box_width_tiles;
254 const uint ty = tymin + i / box_width_tiles;
255
256 ASSERT(tx < spu.fb.width_tiles);
257 ASSERT(ty < spu.fb.height_tiles);
258
259 if (!my_tile(tx, ty))
260 continue;
261
262 num_tiles++;
263
264 spu.cur_ctile_status = spu.ctile_status[ty][tx];
265 spu.cur_ztile_status = spu.ztile_status[ty][tx];
266
267 get_cz_tiles(tx, ty);
268
269 uint drawn = 0;
270
271 const qword vertex_sizes = (qword)spu_splats(vertex_size);
272 const qword verticess = (qword)spu_splats((uint)vertices);
273
274 ASSERT_ALIGN16(&indexes[0]);
275
276 const uint num_indexes = render->num_indexes;
277
278 /* loop over tris
279 * &indexes[0] will be 16 byte aligned. This loop is heavily unrolled
280 * avoiding variable rotates when extracting vertex indices.
281 */
282 for (j = 0; j < num_indexes; j += 24) {
283 /* Load three vectors, containing 24 ushort indices */
284 const qword* lower_qword = (qword*)&indexes[j];
285 const qword indices0 = lower_qword[0];
286 const qword indices1 = lower_qword[1];
287 const qword indices2 = lower_qword[2];
288
289 /* stores three indices for each tri n in slots 0, 1 and 2 of vsn */
290 /* Straightforward rotates for these */
291 qword vs0 = indices0;
292 qword vs1 = si_shlqbyi(indices0, 6);
293 qword vs3 = si_shlqbyi(indices1, 2);
294 qword vs4 = si_shlqbyi(indices1, 8);
295 qword vs6 = si_shlqbyi(indices2, 4);
296 qword vs7 = si_shlqbyi(indices2, 10);
297
298 /* For tri 2 and 5, the three indices are split across two machine
299 * words - rotate and combine */
300 const qword tmp2a = si_shlqbyi(indices0, 12);
301 const qword tmp2b = si_rotqmbyi(indices1, 12|16);
302 qword vs2 = si_selb(tmp2a, tmp2b, si_fsmh(si_from_uint(0x20)));
303
304 const qword tmp5a = si_shlqbyi(indices1, 14);
305 const qword tmp5b = si_rotqmbyi(indices2, 14|16);
306 qword vs5 = si_selb(tmp5a, tmp5b, si_fsmh(si_from_uint(0x60)));
307
308 /* unpack indices from halfword slots to word slots */
309 vs0 = si_shufb(vs0, vs0, SHUFB8(0,A,0,B,0,C,0,0));
310 vs1 = si_shufb(vs1, vs1, SHUFB8(0,A,0,B,0,C,0,0));
311 vs2 = si_shufb(vs2, vs2, SHUFB8(0,A,0,B,0,C,0,0));
312 vs3 = si_shufb(vs3, vs3, SHUFB8(0,A,0,B,0,C,0,0));
313 vs4 = si_shufb(vs4, vs4, SHUFB8(0,A,0,B,0,C,0,0));
314 vs5 = si_shufb(vs5, vs5, SHUFB8(0,A,0,B,0,C,0,0));
315 vs6 = si_shufb(vs6, vs6, SHUFB8(0,A,0,B,0,C,0,0));
316 vs7 = si_shufb(vs7, vs7, SHUFB8(0,A,0,B,0,C,0,0));
317
318 /* Calculate address of vertex in vertices[] */
319 vs0 = si_mpya(vs0, vertex_sizes, verticess);
320 vs1 = si_mpya(vs1, vertex_sizes, verticess);
321 vs2 = si_mpya(vs2, vertex_sizes, verticess);
322 vs3 = si_mpya(vs3, vertex_sizes, verticess);
323 vs4 = si_mpya(vs4, vertex_sizes, verticess);
324 vs5 = si_mpya(vs5, vertex_sizes, verticess);
325 vs6 = si_mpya(vs6, vertex_sizes, verticess);
326 vs7 = si_mpya(vs7, vertex_sizes, verticess);
327
328 /* Select the appropriate call based on the number of vertices
329 * remaining */
330 switch(num_indexes - j) {
331 default: drawn += tri_draw(vs7, tx, ty);
332 case 21: drawn += tri_draw(vs6, tx, ty);
333 case 18: drawn += tri_draw(vs5, tx, ty);
334 case 15: drawn += tri_draw(vs4, tx, ty);
335 case 12: drawn += tri_draw(vs3, tx, ty);
336 case 9: drawn += tri_draw(vs2, tx, ty);
337 case 6: drawn += tri_draw(vs1, tx, ty);
338 case 3: drawn += tri_draw(vs0, tx, ty);
339 }
340 }
341
342 //printf("SPU %u: drew %u of %u\n", spu.init.id, drawn, render->num_indexes/3);
343
344 /* write color/z tiles back to main framebuffer, if dirtied */
345 put_cz_tiles(tx, ty);
346
347 wait_put_cz_tiles(); /* XXX seems unnecessary... */
348
349 spu.ctile_status[ty][tx] = spu.cur_ctile_status;
350 spu.ztile_status[ty][tx] = spu.cur_ztile_status;
351 }
352
353 D_PRINTF(CELL_DEBUG_CMD,
354 "RENDER done (%u tiles hit)\n",
355 num_tiles);
356}
diff --git a/src/gallium/drivers/cell/spu/spu_render.h b/src/gallium/drivers/cell/spu/spu_render.h
deleted file mode 100644
index 493434f0878..00000000000
--- a/src/gallium/drivers/cell/spu/spu_render.h
+++ /dev/null
@@ -1,38 +0,0 @@
1/**************************************************************************
2 *
3 * Copyright 2008 Tungsten Graphics, Inc., Cedar Park, Texas.
4 * All Rights Reserved.
5 *
6 * Permission is hereby granted, free of charge, to any person obtaining a
7 * copy of this software and associated documentation files (the
8 * "Software"), to deal in the Software without restriction, including
9 * without limitation the rights to use, copy, modify, merge, publish,
10 * distribute, sub license, and/or sell copies of the Software, and to
11 * permit persons to whom the Software is furnished to do so, subject to
12 * the following conditions:
13 *
14 * The above copyright notice and this permission notice (including the
15 * next paragraph) shall be included in all copies or substantial portions
16 * of the Software.
17 *
18 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
19 * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
20 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
21 * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
22 * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
23 * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
24 * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
25 *
26 **************************************************************************/
27
28
29#ifndef SPU_RENDER_H
30#define SPU_RENDER_H
31
32#include "cell/common.h"
33
34extern void
35cmd_render(const struct cell_command_render *render, uint *pos_incr);
36
37#endif /* SPU_RENDER_H */
38
diff --git a/src/gallium/drivers/cell/spu/spu_shuffle.h b/src/gallium/drivers/cell/spu/spu_shuffle.h
deleted file mode 100644
index 74f2a0b6d2e..00000000000
--- a/src/gallium/drivers/cell/spu/spu_shuffle.h
+++ /dev/null
@@ -1,186 +0,0 @@
1#ifndef SPU_SHUFFLE_H
2#define SPU_SHUFFLE_H
3
4/*
5 * Generate shuffle patterns with minimal fuss.
6 *
7 * Based on ideas from
8 * http://www.insomniacgames.com/tech/articles/0408/files/shuffles.pdf
9 *
10 * A-P indicates 0-15th position in first vector
11 * a-p indicates 0-15th position in second vector
12 *
13 * +--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+
14 * |00|01|02|03|04|05|06|07|08|09|0a|0b|0c|0d|0e|0f|
15 * +--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+
16 * | A| B| C| D|
17 * +-----+-----+-----+-----+-----+-----+-----+-----+
18 * | A| B| C| D| E| F| G| H|
19 * +--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+
20 * | A| B| C| D| E| F| G| H| I| J| K| L| M| N| O| P|
21 * +--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+
22 *
23 * x or X indicates 0xff
24 * 8 indicates 0x80
25 * 0 indicates 0x00
26 *
27 * The macros SHUFFLE4() SHUFFLE8() and SHUFFLE16() provide a const vector
28 * unsigned char literal suitable for use with spu_shuffle().
29 *
30 * The macros SHUFB4() SHUFB8() and SHUFB16() provide a const qword vector
31 * literal suitable for use with si_shufb().
32 *
33 *
34 * For example :
35 * SHUFB4(A,A,A,A)
36 * expands to :
37 * ((const qword){0,1,2,3,0,1,2,3,0,1,2,3,0,1,2,3})
38 *
39 * SHUFFLE8(A,B,a,b,C,c,8,8)
40 * expands to :
41 * ((const vector unsigned char){0x00,0x01,0x02,0x03,0x10,0x11,0x12,0x13,
42 * 0x04,0x05,0x14,0x15,0xe0,0xe0,0xe0,0xe0})
43 *
44 */
45
46#include <spu_intrinsics.h>
47
48#define SHUFFLE_PATTERN_4_A__ 0x00, 0x01, 0x02, 0x03
49#define SHUFFLE_PATTERN_4_B__ 0x04, 0x05, 0x06, 0x07
50#define SHUFFLE_PATTERN_4_C__ 0x08, 0x09, 0x0a, 0x0b
51#define SHUFFLE_PATTERN_4_D__ 0x0c, 0x0d, 0x0e, 0x0f
52#define SHUFFLE_PATTERN_4_a__ 0x10, 0x11, 0x12, 0x13
53#define SHUFFLE_PATTERN_4_b__ 0x14, 0x15, 0x16, 0x17
54#define SHUFFLE_PATTERN_4_c__ 0x18, 0x19, 0x1a, 0x1b
55#define SHUFFLE_PATTERN_4_d__ 0x1c, 0x1d, 0x1e, 0x1f
56#define SHUFFLE_PATTERN_4_X__ 0xc0, 0xc0, 0xc0, 0xc0
57#define SHUFFLE_PATTERN_4_x__ 0xc0, 0xc0, 0xc0, 0xc0
58#define SHUFFLE_PATTERN_4_0__ 0x80, 0x80, 0x80, 0x80
59#define SHUFFLE_PATTERN_4_8__ 0xe0, 0xe0, 0xe0, 0xe0
60
61#define SHUFFLE_VECTOR_4__(A, B, C, D) \
62 SHUFFLE_PATTERN_4_##A##__, \
63 SHUFFLE_PATTERN_4_##B##__, \
64 SHUFFLE_PATTERN_4_##C##__, \
65 SHUFFLE_PATTERN_4_##D##__
66
67#define SHUFFLE4(A, B, C, D) \
68 ((const vector unsigned char){ \
69 SHUFFLE_VECTOR_4__(A, B, C, D) \
70 })
71
72#define SHUFB4(A, B, C, D) \
73 ((const qword){ \
74 SHUFFLE_VECTOR_4__(A, B, C, D) \
75 })
76
77
78#define SHUFFLE_PATTERN_8_A__ 0x00, 0x01
79#define SHUFFLE_PATTERN_8_B__ 0x02, 0x03
80#define SHUFFLE_PATTERN_8_C__ 0x04, 0x05
81#define SHUFFLE_PATTERN_8_D__ 0x06, 0x07
82#define SHUFFLE_PATTERN_8_E__ 0x08, 0x09
83#define SHUFFLE_PATTERN_8_F__ 0x0a, 0x0b
84#define SHUFFLE_PATTERN_8_G__ 0x0c, 0x0d
85#define SHUFFLE_PATTERN_8_H__ 0x0e, 0x0f
86#define SHUFFLE_PATTERN_8_a__ 0x10, 0x11
87#define SHUFFLE_PATTERN_8_b__ 0x12, 0x13
88#define SHUFFLE_PATTERN_8_c__ 0x14, 0x15
89#define SHUFFLE_PATTERN_8_d__ 0x16, 0x17
90#define SHUFFLE_PATTERN_8_e__ 0x18, 0x19
91#define SHUFFLE_PATTERN_8_f__ 0x1a, 0x1b
92#define SHUFFLE_PATTERN_8_g__ 0x1c, 0x1d
93#define SHUFFLE_PATTERN_8_h__ 0x1e, 0x1f
94#define SHUFFLE_PATTERN_8_X__ 0xc0, 0xc0
95#define SHUFFLE_PATTERN_8_x__ 0xc0, 0xc0
96#define SHUFFLE_PATTERN_8_0__ 0x80, 0x80
97#define SHUFFLE_PATTERN_8_8__ 0xe0, 0xe0
98
99
100#define SHUFFLE_VECTOR_8__(A, B, C, D, E, F, G, H) \
101 SHUFFLE_PATTERN_8_##A##__, \
102 SHUFFLE_PATTERN_8_##B##__, \
103 SHUFFLE_PATTERN_8_##C##__, \
104 SHUFFLE_PATTERN_8_##D##__, \
105 SHUFFLE_PATTERN_8_##E##__, \
106 SHUFFLE_PATTERN_8_##F##__, \
107 SHUFFLE_PATTERN_8_##G##__, \
108 SHUFFLE_PATTERN_8_##H##__
109
110#define SHUFFLE8(A, B, C, D, E, F, G, H) \
111 ((const vector unsigned char){ \
112 SHUFFLE_VECTOR_8__(A, B, C, D, E, F, G, H) \
113 })
114
115#define SHUFB8(A, B, C, D, E, F, G, H) \
116 ((const qword){ \
117 SHUFFLE_VECTOR_8__(A, B, C, D, E, F, G, H) \
118 })
119
120
121#define SHUFFLE_PATTERN_16_A__ 0x00
122#define SHUFFLE_PATTERN_16_B__ 0x01
123#define SHUFFLE_PATTERN_16_C__ 0x02
124#define SHUFFLE_PATTERN_16_D__ 0x03
125#define SHUFFLE_PATTERN_16_E__ 0x04
126#define SHUFFLE_PATTERN_16_F__ 0x05
127#define SHUFFLE_PATTERN_16_G__ 0x06
128#define SHUFFLE_PATTERN_16_H__ 0x07
129#define SHUFFLE_PATTERN_16_I__ 0x08
130#define SHUFFLE_PATTERN_16_J__ 0x09
131#define SHUFFLE_PATTERN_16_K__ 0x0a
132#define SHUFFLE_PATTERN_16_L__ 0x0b
133#define SHUFFLE_PATTERN_16_M__ 0x0c
134#define SHUFFLE_PATTERN_16_N__ 0x0d
135#define SHUFFLE_PATTERN_16_O__ 0x0e
136#define SHUFFLE_PATTERN_16_P__ 0x0f
137#define SHUFFLE_PATTERN_16_a__ 0x10
138#define SHUFFLE_PATTERN_16_b__ 0x11
139#define SHUFFLE_PATTERN_16_c__ 0x12
140#define SHUFFLE_PATTERN_16_d__ 0x13
141#define SHUFFLE_PATTERN_16_e__ 0x14
142#define SHUFFLE_PATTERN_16_f__ 0x15
143#define SHUFFLE_PATTERN_16_g__ 0x16
144#define SHUFFLE_PATTERN_16_h__ 0x17
145#define SHUFFLE_PATTERN_16_i__ 0x18
146#define SHUFFLE_PATTERN_16_j__ 0x19
147#define SHUFFLE_PATTERN_16_k__ 0x1a
148#define SHUFFLE_PATTERN_16_l__ 0x1b
149#define SHUFFLE_PATTERN_16_m__ 0x1c
150#define SHUFFLE_PATTERN_16_n__ 0x1d
151#define SHUFFLE_PATTERN_16_o__ 0x1e
152#define SHUFFLE_PATTERN_16_p__ 0x1f
153#define SHUFFLE_PATTERN_16_X__ 0xc0
154#define SHUFFLE_PATTERN_16_x__ 0xc0
155#define SHUFFLE_PATTERN_16_0__ 0x80
156#define SHUFFLE_PATTERN_16_8__ 0xe0
157
158#define SHUFFLE_VECTOR_16__(A, B, C, D, E, F, G, H, I, J, K, L, M, N, O, P) \
159 SHUFFLE_PATTERN_16_##A##__, \
160 SHUFFLE_PATTERN_16_##B##__, \
161 SHUFFLE_PATTERN_16_##C##__, \
162 SHUFFLE_PATTERN_16_##D##__, \
163 SHUFFLE_PATTERN_16_##E##__, \
164 SHUFFLE_PATTERN_16_##F##__, \
165 SHUFFLE_PATTERN_16_##G##__, \
166 SHUFFLE_PATTERN_16_##H##__, \
167 SHUFFLE_PATTERN_16_##I##__, \
168 SHUFFLE_PATTERN_16_##J##__, \
169 SHUFFLE_PATTERN_16_##K##__, \
170 SHUFFLE_PATTERN_16_##L##__, \
171 SHUFFLE_PATTERN_16_##M##__, \
172 SHUFFLE_PATTERN_16_##N##__, \
173 SHUFFLE_PATTERN_16_##O##__, \
174 SHUFFLE_PATTERN_16_##P##__
175
176#define SHUFFLE16(A, B, C, D, E, F, G, H, I, J, K, L, M, N, O, P) \
177 ((const vector unsigned char){ \
178 SHUFFLE_VECTOR_16__(A, B, C, D, E, F, G, H, I, J, K, L, M, N, O, P) \
179 })
180
181#define SHUFB16(A, B, C, D, E, F, G, H, I, J, K, L, M, N, O, P) \
182 ((const qword){ \
183 SHUFFLE_VECTOR_16__(A, B, C, D, E, F, G, H, I, J, K, L, M, N, O, P) \
184 })
185
186#endif
diff --git a/src/gallium/drivers/cell/spu/spu_texture.c b/src/gallium/drivers/cell/spu/spu_texture.c
deleted file mode 100644
index 69784c89788..00000000000
--- a/src/gallium/drivers/cell/spu/spu_texture.c
+++ /dev/null
@@ -1,641 +0,0 @@
1/**************************************************************************
2 *
3 * Copyright 2008 Tungsten Graphics, Inc., Cedar Park, Texas.
4 * All Rights Reserved.
5 *
6 * Permission is hereby granted, free of charge, to any person obtaining a
7 * copy of this software and associated documentation files (the
8 * "Software"), to deal in the Software without restriction, including
9 * without limitation the rights to use, copy, modify, merge, publish,
10 * distribute, sub license, and/or sell copies of the Software, and to
11 * permit persons to whom the Software is furnished to do so, subject to
12 * the following conditions:
13 *
14 * The above copyright notice and this permission notice (including the
15 * next paragraph) shall be included in all copies or substantial portions
16 * of the Software.
17 *
18 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
19 * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
20 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
21 * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
22 * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
23 * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
24 * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
25 *
26 **************************************************************************/
27
28
29#include <math.h>
30
31#include "pipe/p_compiler.h"
32#include "spu_main.h"
33#include "spu_texture.h"
34#include "spu_tile.h"
35#include "spu_colorpack.h"
36#include "spu_dcache.h"
37
38
39/**
40 * Mark all tex cache entries as invalid.
41 */
42void
43invalidate_tex_cache(void)
44{
45 uint lvl;
46 for (lvl = 0; lvl < CELL_MAX_TEXTURE_LEVELS; lvl++) {
47 uint unit = 0;
48 uint bytes = 4 * spu.texture[unit].level[lvl].width
49 * spu.texture[unit].level[lvl].height;
50
51 if (spu.texture[unit].target == PIPE_TEXTURE_CUBE)
52 bytes *= 6;
53 else if (spu.texture[unit].target == PIPE_TEXTURE_3D)
54 bytes *= spu.texture[unit].level[lvl].depth;
55
56 spu_dcache_mark_dirty((unsigned) spu.texture[unit].level[lvl].start, bytes);
57 }
58}
59
60
61/**
62 * Get four texels from locations (x[0], y[0]), (x[1], y[1]) ...
63 *
64 * NOTE: in the typical case of bilinear filtering, the four texels
65 * are in a 2x2 group so we could get by with just two dcache fetches
66 * (two side-by-side texels per fetch). But when bilinear filtering
67 * wraps around a texture edge, we'll probably need code like we have
68 * now.
69 * FURTHERMORE: since we're rasterizing a quad of 2x2 pixels at a time,
70 * it's quite likely that the four pixels in a quad will need some of the
71 * same texels. So look into doing texture fetches for four pixels at
72 * a time.
73 */
74static void
75get_four_texels(const struct spu_texture_level *tlevel, uint face,
76 vec_int4 x, vec_int4 y,
77 vec_uint4 *texels)
78{
79 unsigned texture_ea = (uintptr_t) tlevel->start;
80 const vec_int4 tile_x = spu_rlmask(x, -5); /* tile_x = x / 32 */
81 const vec_int4 tile_y = spu_rlmask(y, -5); /* tile_y = y / 32 */
82 const qword offset_x = si_andi((qword) x, 0x1f); /* offset_x = x & 0x1f */
83 const qword offset_y = si_andi((qword) y, 0x1f); /* offset_y = y & 0x1f */
84
85 const qword tiles_per_row = (qword) spu_splats(tlevel->tiles_per_row);
86 const qword tile_size = (qword) spu_splats((unsigned) sizeof(tile_t));
87
88 qword tile_offset = si_mpya((qword) tile_y, tiles_per_row, (qword) tile_x);
89 tile_offset = si_mpy((qword) tile_offset, tile_size);
90
91 qword texel_offset = si_a(si_mpyui(offset_y, 32), offset_x);
92 texel_offset = si_mpyui(texel_offset, 4);
93
94 vec_uint4 offset = (vec_uint4) si_a(tile_offset, texel_offset);
95
96 texture_ea = texture_ea + face * tlevel->bytes_per_image;
97
98 spu_dcache_fetch_unaligned((qword *) & texels[0],
99 texture_ea + spu_extract(offset, 0), 4);
100 spu_dcache_fetch_unaligned((qword *) & texels[1],
101 texture_ea + spu_extract(offset, 1), 4);
102 spu_dcache_fetch_unaligned((qword *) & texels[2],
103 texture_ea + spu_extract(offset, 2), 4);
104 spu_dcache_fetch_unaligned((qword *) & texels[3],
105 texture_ea + spu_extract(offset, 3), 4);
106}
107
108
109/** clamp vec to [0, max] */
110static INLINE vector signed int
111spu_clamp(vector signed int vec, vector signed int max)
112{
113 static const vector signed int zero = {0,0,0,0};
114 vector unsigned int c;
115 c = spu_cmpgt(vec, zero); /* c = vec > zero ? ~0 : 0 */
116 vec = spu_sel(zero, vec, c);
117 c = spu_cmpgt(vec, max); /* c = vec > max ? ~0 : 0 */
118 vec = spu_sel(vec, max, c);
119 return vec;
120}
121
122
123
124/**
125 * Do nearest texture sampling for four pixels.
126 * \param colors returned colors in SOA format (rrrr, gggg, bbbb, aaaa).
127 */
128void
129sample_texture_2d_nearest(vector float s, vector float t,
130 uint unit, uint level, uint face,
131 vector float colors[4])
132{
133 const struct spu_texture_level *tlevel = &spu.texture[unit].level[level];
134 vector float ss = spu_mul(s, tlevel->scale_s);
135 vector float tt = spu_mul(t, tlevel->scale_t);
136 vector signed int is = spu_convts(ss, 0);
137 vector signed int it = spu_convts(tt, 0);
138 vec_uint4 texels[4];
139
140 /* PIPE_TEX_WRAP_REPEAT */
141 is = spu_and(is, tlevel->mask_s);
142 it = spu_and(it, tlevel->mask_t);
143
144 /* PIPE_TEX_WRAP_CLAMP */
145 is = spu_clamp(is, tlevel->max_s);
146 it = spu_clamp(it, tlevel->max_t);
147
148 get_four_texels(tlevel, face, is, it, texels);
149
150 /* convert four packed ARGBA pixels to float RRRR,GGGG,BBBB,AAAA */
151 spu_unpack_A8R8G8B8_transpose4(texels, colors);
152}
153
154
155/**
156 * Do bilinear texture sampling for four pixels.
157 * \param colors returned colors in SOA format (rrrr, gggg, bbbb, aaaa).
158 */
159void
160sample_texture_2d_bilinear(vector float s, vector float t,
161 uint unit, uint level, uint face,
162 vector float colors[4])
163{
164 const struct spu_texture_level *tlevel = &spu.texture[unit].level[level];
165 static const vector float half = {-0.5f, -0.5f, -0.5f, -0.5f};
166
167 vector float ss = spu_madd(s, tlevel->scale_s, half);
168 vector float tt = spu_madd(t, tlevel->scale_t, half);
169
170 vector signed int is0 = spu_convts(ss, 0);
171 vector signed int it0 = spu_convts(tt, 0);
172
173 /* is + 1, it + 1 */
174 vector signed int is1 = spu_add(is0, 1);
175 vector signed int it1 = spu_add(it0, 1);
176
177 /* PIPE_TEX_WRAP_REPEAT */
178 is0 = spu_and(is0, tlevel->mask_s);
179 it0 = spu_and(it0, tlevel->mask_t);
180 is1 = spu_and(is1, tlevel->mask_s);
181 it1 = spu_and(it1, tlevel->mask_t);
182
183 /* PIPE_TEX_WRAP_CLAMP */
184 is0 = spu_clamp(is0, tlevel->max_s);
185 it0 = spu_clamp(it0, tlevel->max_t);
186 is1 = spu_clamp(is1, tlevel->max_s);
187 it1 = spu_clamp(it1, tlevel->max_t);
188
189 /* get packed int texels */
190 vector unsigned int texels[16];
191 get_four_texels(tlevel, face, is0, it0, texels + 0); /* upper-left */
192 get_four_texels(tlevel, face, is1, it0, texels + 4); /* upper-right */
193 get_four_texels(tlevel, face, is0, it1, texels + 8); /* lower-left */
194 get_four_texels(tlevel, face, is1, it1, texels + 12); /* lower-right */
195
196 /* convert packed int texels to float colors */
197 vector float ftexels[16];
198 spu_unpack_A8R8G8B8_transpose4(texels + 0, ftexels + 0);
199 spu_unpack_A8R8G8B8_transpose4(texels + 4, ftexels + 4);
200 spu_unpack_A8R8G8B8_transpose4(texels + 8, ftexels + 8);
201 spu_unpack_A8R8G8B8_transpose4(texels + 12, ftexels + 12);
202
203 /* Compute weighting factors in [0,1]
204 * Multiply texcoord by 1024, AND with 1023, convert back to float.
205 */
206 vector float ss1024 = spu_mul(ss, spu_splats(1024.0f));
207 vector signed int iss1024 = spu_convts(ss1024, 0);
208 iss1024 = spu_and(iss1024, 1023);
209 vector float sWeights0 = spu_convtf(iss1024, 10);
210
211 vector float tt1024 = spu_mul(tt, spu_splats(1024.0f));
212 vector signed int itt1024 = spu_convts(tt1024, 0);
213 itt1024 = spu_and(itt1024, 1023);
214 vector float tWeights0 = spu_convtf(itt1024, 10);
215
216 /* 1 - sWeight and 1 - tWeight */
217 vector float sWeights1 = spu_sub(spu_splats(1.0f), sWeights0);
218 vector float tWeights1 = spu_sub(spu_splats(1.0f), tWeights0);
219
220 /* reds, for four pixels */
221 ftexels[ 0] = spu_mul(ftexels[ 0], spu_mul(sWeights1, tWeights1)); /*ul*/
222 ftexels[ 4] = spu_mul(ftexels[ 4], spu_mul(sWeights0, tWeights1)); /*ur*/
223 ftexels[ 8] = spu_mul(ftexels[ 8], spu_mul(sWeights1, tWeights0)); /*ll*/
224 ftexels[12] = spu_mul(ftexels[12], spu_mul(sWeights0, tWeights0)); /*lr*/
225 colors[0] = spu_add(spu_add(ftexels[0], ftexels[4]),
226 spu_add(ftexels[8], ftexels[12]));
227
228 /* greens, for four pixels */
229 ftexels[ 1] = spu_mul(ftexels[ 1], spu_mul(sWeights1, tWeights1)); /*ul*/
230 ftexels[ 5] = spu_mul(ftexels[ 5], spu_mul(sWeights0, tWeights1)); /*ur*/
231 ftexels[ 9] = spu_mul(ftexels[ 9], spu_mul(sWeights1, tWeights0)); /*ll*/
232 ftexels[13] = spu_mul(ftexels[13], spu_mul(sWeights0, tWeights0)); /*lr*/
233 colors[1] = spu_add(spu_add(ftexels[1], ftexels[5]),
234 spu_add(ftexels[9], ftexels[13]));
235
236 /* blues, for four pixels */
237 ftexels[ 2] = spu_mul(ftexels[ 2], spu_mul(sWeights1, tWeights1)); /*ul*/
238 ftexels[ 6] = spu_mul(ftexels[ 6], spu_mul(sWeights0, tWeights1)); /*ur*/
239 ftexels[10] = spu_mul(ftexels[10], spu_mul(sWeights1, tWeights0)); /*ll*/
240 ftexels[14] = spu_mul(ftexels[14], spu_mul(sWeights0, tWeights0)); /*lr*/
241 colors[2] = spu_add(spu_add(ftexels[2], ftexels[6]),
242 spu_add(ftexels[10], ftexels[14]));
243
244 /* alphas, for four pixels */
245 ftexels[ 3] = spu_mul(ftexels[ 3], spu_mul(sWeights1, tWeights1)); /*ul*/
246 ftexels[ 7] = spu_mul(ftexels[ 7], spu_mul(sWeights0, tWeights1)); /*ur*/
247 ftexels[11] = spu_mul(ftexels[11], spu_mul(sWeights1, tWeights0)); /*ll*/
248 ftexels[15] = spu_mul(ftexels[15], spu_mul(sWeights0, tWeights0)); /*lr*/
249 colors[3] = spu_add(spu_add(ftexels[3], ftexels[7]),
250 spu_add(ftexels[11], ftexels[15]));
251}
252
253
254
255/**
256 * Adapted from /opt/cell/sdk/usr/spu/include/transpose_matrix4x4.h
257 */
258static INLINE void
259transpose(vector unsigned int *mOut0,
260 vector unsigned int *mOut1,
261 vector unsigned int *mOut2,
262 vector unsigned int *mOut3,
263 vector unsigned int *mIn)
264{
265 vector unsigned int abcd, efgh, ijkl, mnop; /* input vectors */
266 vector unsigned int aeim, bfjn, cgko, dhlp; /* output vectors */
267 vector unsigned int aibj, ckdl, emfn, gohp; /* intermediate vectors */
268
269 vector unsigned char shufflehi = ((vector unsigned char) {
270 0x00, 0x01, 0x02, 0x03,
271 0x10, 0x11, 0x12, 0x13,
272 0x04, 0x05, 0x06, 0x07,
273 0x14, 0x15, 0x16, 0x17});
274 vector unsigned char shufflelo = ((vector unsigned char) {
275 0x08, 0x09, 0x0A, 0x0B,
276 0x18, 0x19, 0x1A, 0x1B,
277 0x0C, 0x0D, 0x0E, 0x0F,
278 0x1C, 0x1D, 0x1E, 0x1F});
279 abcd = *(mIn+0);
280 efgh = *(mIn+1);
281 ijkl = *(mIn+2);
282 mnop = *(mIn+3);
283
284 aibj = spu_shuffle(abcd, ijkl, shufflehi);
285 ckdl = spu_shuffle(abcd, ijkl, shufflelo);
286 emfn = spu_shuffle(efgh, mnop, shufflehi);
287 gohp = spu_shuffle(efgh, mnop, shufflelo);
288
289 aeim = spu_shuffle(aibj, emfn, shufflehi);
290 bfjn = spu_shuffle(aibj, emfn, shufflelo);
291 cgko = spu_shuffle(ckdl, gohp, shufflehi);
292 dhlp = spu_shuffle(ckdl, gohp, shufflelo);
293
294 *mOut0 = aeim;
295 *mOut1 = bfjn;
296 *mOut2 = cgko;
297 *mOut3 = dhlp;
298}
299
300
301/**
302 * Bilinear filtering, using int instead of float arithmetic for computing
303 * sample weights.
304 */
305void
306sample_texture_2d_bilinear_int(vector float s, vector float t,
307 uint unit, uint level, uint face,
308 vector float colors[4])
309{
310 const struct spu_texture_level *tlevel = &spu.texture[unit].level[level];
311 static const vector float half = {-0.5f, -0.5f, -0.5f, -0.5f};
312
313 /* Scale texcoords by size of texture, and add half pixel bias */
314 vector float ss = spu_madd(s, tlevel->scale_s, half);
315 vector float tt = spu_madd(t, tlevel->scale_t, half);
316
317 /* convert float coords to fixed-pt coords with 7 fraction bits */
318 vector signed int is = spu_convts(ss, 7); /* XXX really need floor() here */
319 vector signed int it = spu_convts(tt, 7); /* XXX really need floor() here */
320
321 /* compute integer texel weights in [0, 127] */
322 vector signed int sWeights0 = spu_and(is, 127);
323 vector signed int tWeights0 = spu_and(it, 127);
324 vector signed int sWeights1 = spu_sub(127, sWeights0);
325 vector signed int tWeights1 = spu_sub(127, tWeights0);
326
327 /* texel coords: is0 = is / 128, it0 = is / 128 */
328 vector signed int is0 = spu_rlmask(is, -7);
329 vector signed int it0 = spu_rlmask(it, -7);
330
331 /* texel coords: i1 = is0 + 1, it1 = it0 + 1 */
332 vector signed int is1 = spu_add(is0, 1);
333 vector signed int it1 = spu_add(it0, 1);
334
335 /* PIPE_TEX_WRAP_REPEAT */
336 is0 = spu_and(is0, tlevel->mask_s);
337 it0 = spu_and(it0, tlevel->mask_t);
338 is1 = spu_and(is1, tlevel->mask_s);
339 it1 = spu_and(it1, tlevel->mask_t);
340
341 /* PIPE_TEX_WRAP_CLAMP */
342 is0 = spu_clamp(is0, tlevel->max_s);
343 it0 = spu_clamp(it0, tlevel->max_t);
344 is1 = spu_clamp(is1, tlevel->max_s);
345 it1 = spu_clamp(it1, tlevel->max_t);
346
347 /* get packed int texels */
348 vector unsigned int texels[16];
349 get_four_texels(tlevel, face, is0, it0, texels + 0); /* upper-left */
350 get_four_texels(tlevel, face, is1, it0, texels + 4); /* upper-right */
351 get_four_texels(tlevel, face, is0, it1, texels + 8); /* lower-left */
352 get_four_texels(tlevel, face, is1, it1, texels + 12); /* lower-right */
353
354 /* twiddle packed 32-bit BGRA pixels into RGBA as four unsigned ints */
355 {
356 static const unsigned char ZERO = 0x80;
357 int i;
358 for (i = 0; i < 16; i++) {
359 texels[i] = spu_shuffle(texels[i], texels[i],
360 ((vector unsigned char) {
361 ZERO, ZERO, ZERO, 1,
362 ZERO, ZERO, ZERO, 2,
363 ZERO, ZERO, ZERO, 3,
364 ZERO, ZERO, ZERO, 0}));
365 }
366 }
367
368 /* convert RGBA,RGBA,RGBA,RGBA to RRRR,GGGG,BBBB,AAAA */
369 vector unsigned int texel0, texel1, texel2, texel3, texel4, texel5, texel6, texel7,
370 texel8, texel9, texel10, texel11, texel12, texel13, texel14, texel15;
371 transpose(&texel0, &texel1, &texel2, &texel3, texels + 0);
372 transpose(&texel4, &texel5, &texel6, &texel7, texels + 4);
373 transpose(&texel8, &texel9, &texel10, &texel11, texels + 8);
374 transpose(&texel12, &texel13, &texel14, &texel15, texels + 12);
375
376 /* computed weighted colors */
377 vector unsigned int c0, c1, c2, c3, cSum;
378
379 /* red */
380 c0 = (vector unsigned int) si_mpy((qword) texel0, si_mpy((qword) sWeights1, (qword) tWeights1)); /*ul*/
381 c1 = (vector unsigned int) si_mpy((qword) texel4, si_mpy((qword) sWeights0, (qword) tWeights1)); /*ur*/
382 c2 = (vector unsigned int) si_mpy((qword) texel8, si_mpy((qword) sWeights1, (qword) tWeights0)); /*ll*/
383 c3 = (vector unsigned int) si_mpy((qword) texel12, si_mpy((qword) sWeights0, (qword) tWeights0)); /*lr*/
384 cSum = spu_add(spu_add(c0, c1), spu_add(c2, c3));
385 colors[0] = spu_convtf(cSum, 22);
386
387 /* green */
388 c0 = (vector unsigned int) si_mpy((qword) texel1, si_mpy((qword) sWeights1, (qword) tWeights1)); /*ul*/
389 c1 = (vector unsigned int) si_mpy((qword) texel5, si_mpy((qword) sWeights0, (qword) tWeights1)); /*ur*/
390 c2 = (vector unsigned int) si_mpy((qword) texel9, si_mpy((qword) sWeights1, (qword) tWeights0)); /*ll*/
391 c3 = (vector unsigned int) si_mpy((qword) texel13, si_mpy((qword) sWeights0, (qword) tWeights0)); /*lr*/
392 cSum = spu_add(spu_add(c0, c1), spu_add(c2, c3));
393 colors[1] = spu_convtf(cSum, 22);
394
395 /* blue */
396 c0 = (vector unsigned int) si_mpy((qword) texel2, si_mpy((qword) sWeights1, (qword) tWeights1)); /*ul*/
397 c1 = (vector unsigned int) si_mpy((qword) texel6, si_mpy((qword) sWeights0, (qword) tWeights1)); /*ur*/
398 c2 = (vector unsigned int) si_mpy((qword) texel10, si_mpy((qword) sWeights1, (qword) tWeights0)); /*ll*/
399 c3 = (vector unsigned int) si_mpy((qword) texel14, si_mpy((qword) sWeights0, (qword) tWeights0)); /*lr*/
400 cSum = spu_add(spu_add(c0, c1), spu_add(c2, c3));
401 colors[2] = spu_convtf(cSum, 22);
402
403 /* alpha */
404 c0 = (vector unsigned int) si_mpy((qword) texel3, si_mpy((qword) sWeights1, (qword) tWeights1)); /*ul*/
405 c1 = (vector unsigned int) si_mpy((qword) texel7, si_mpy((qword) sWeights0, (qword) tWeights1)); /*ur*/
406 c2 = (vector unsigned int) si_mpy((qword) texel11, si_mpy((qword) sWeights1, (qword) tWeights0)); /*ll*/
407 c3 = (vector unsigned int) si_mpy((qword) texel15, si_mpy((qword) sWeights0, (qword) tWeights0)); /*lr*/
408 cSum = spu_add(spu_add(c0, c1), spu_add(c2, c3));
409 colors[3] = spu_convtf(cSum, 22);
410}
411
412
413
414/**
415 * Compute level of detail factor from texcoords.
416 */
417static INLINE float
418compute_lambda_2d(uint unit, vector float s, vector float t)
419{
420 uint baseLevel = 0;
421 float width = spu.texture[unit].level[baseLevel].width;
422 float height = spu.texture[unit].level[baseLevel].width;
423 float dsdx = width * (spu_extract(s, 1) - spu_extract(s, 0));
424 float dsdy = width * (spu_extract(s, 2) - spu_extract(s, 0));
425 float dtdx = height * (spu_extract(t, 1) - spu_extract(t, 0));
426 float dtdy = height * (spu_extract(t, 2) - spu_extract(t, 0));
427#if 0
428 /* ideal value */
429 float x = dsdx * dsdx + dtdx * dtdx;
430 float y = dsdy * dsdy + dtdy * dtdy;
431 float rho = x > y ? x : y;
432 rho = sqrtf(rho);
433#else
434 /* approximation */
435 dsdx = fabsf(dsdx);
436 dsdy = fabsf(dsdy);
437 dtdx = fabsf(dtdx);
438 dtdy = fabsf(dtdy);
439 float rho = (dsdx + dsdy + dtdx + dtdy) * 0.5;
440#endif
441 float lambda = logf(rho) * 1.442695f; /* compute logbase2(rho) */
442 return lambda;
443}
444
445
446/**
447 * Blend two sets of colors according to weight.
448 */
449static void
450blend_colors(vector float c0[4], const vector float c1[4], float weight)
451{
452 vector float t = spu_splats(weight);
453 vector float dc0 = spu_sub(c1[0], c0[0]);
454 vector float dc1 = spu_sub(c1[1], c0[1]);
455 vector float dc2 = spu_sub(c1[2], c0[2]);
456 vector float dc3 = spu_sub(c1[3], c0[3]);
457 c0[0] = spu_madd(dc0, t, c0[0]);
458 c0[1] = spu_madd(dc1, t, c0[1]);
459 c0[2] = spu_madd(dc2, t, c0[2]);
460 c0[3] = spu_madd(dc3, t, c0[3]);
461}
462
463
464/**
465 * Texture sampling with level of detail selection and possibly mipmap
466 * interpolation.
467 */
468void
469sample_texture_2d_lod(vector float s, vector float t,
470 uint unit, uint level_ignored, uint face,
471 vector float colors[4])
472{
473 /*
474 * Note that we're computing a lambda/lod here that's used for all
475 * four pixels in the quad.
476 */
477 float lambda = compute_lambda_2d(unit, s, t);
478
479 (void) face;
480 (void) level_ignored;
481
482 /* apply lod bias */
483 lambda += spu.sampler[unit].lod_bias;
484
485 /* clamp */
486 if (lambda < spu.sampler[unit].min_lod)
487 lambda = spu.sampler[unit].min_lod;
488 else if (lambda > spu.sampler[unit].max_lod)
489 lambda = spu.sampler[unit].max_lod;
490
491 if (lambda <= 0.0f) {
492 /* magnify */
493 spu.mag_sample_texture_2d[unit](s, t, unit, 0, face, colors);
494 }
495 else {
496 /* minify */
497 if (spu.sampler[unit].min_img_filter == PIPE_TEX_FILTER_LINEAR) {
498 /* sample two mipmap levels and interpolate */
499 int level = (int) lambda;
500 if (level > (int) spu.texture[unit].max_level)
501 level = spu.texture[unit].max_level;
502 spu.min_sample_texture_2d[unit](s, t, unit, level, face, colors);
503 if (spu.sampler[unit].min_img_filter == PIPE_TEX_FILTER_LINEAR) {
504 /* sample second mipmap level */
505 float weight = lambda - (float) level;
506 level++;
507 if (level <= (int) spu.texture[unit].max_level) {
508 vector float colors2[4];
509 spu.min_sample_texture_2d[unit](s, t, unit, level, face, colors2);
510 blend_colors(colors, colors2, weight);
511 }
512 }
513 }
514 else {
515 /* sample one mipmap level */
516 int level = (int) (lambda + 0.5f);
517 if (level > (int) spu.texture[unit].max_level)
518 level = spu.texture[unit].max_level;
519 spu.min_sample_texture_2d[unit](s, t, unit, level, face, colors);
520 }
521 }
522}
523
524
525/** XXX need a SIMD version of this */
526static unsigned
527choose_cube_face(float rx, float ry, float rz, float *newS, float *newT)
528{
529 /*
530 major axis
531 direction target sc tc ma
532 ---------- ------------------------------- --- --- ---
533 +rx TEXTURE_CUBE_MAP_POSITIVE_X_EXT -rz -ry rx
534 -rx TEXTURE_CUBE_MAP_NEGATIVE_X_EXT +rz -ry rx
535 +ry TEXTURE_CUBE_MAP_POSITIVE_Y_EXT +rx +rz ry
536 -ry TEXTURE_CUBE_MAP_NEGATIVE_Y_EXT +rx -rz ry
537 +rz TEXTURE_CUBE_MAP_POSITIVE_Z_EXT +rx -ry rz
538 -rz TEXTURE_CUBE_MAP_NEGATIVE_Z_EXT -rx -ry rz
539 */
540 const float arx = fabsf(rx);
541 const float ary = fabsf(ry);
542 const float arz = fabsf(rz);
543 unsigned face;
544 float sc, tc, ma;
545
546 if (arx > ary && arx > arz) {
547 if (rx >= 0.0F) {
548 face = PIPE_TEX_FACE_POS_X;
549 sc = -rz;
550 tc = -ry;
551 ma = arx;
552 }
553 else {
554 face = PIPE_TEX_FACE_NEG_X;
555 sc = rz;
556 tc = -ry;
557 ma = arx;
558 }
559 }
560 else if (ary > arx && ary > arz) {
561 if (ry >= 0.0F) {
562 face = PIPE_TEX_FACE_POS_Y;
563 sc = rx;
564 tc = rz;
565 ma = ary;
566 }
567 else {
568 face = PIPE_TEX_FACE_NEG_Y;
569 sc = rx;
570 tc = -rz;
571 ma = ary;
572 }
573 }
574 else {
575 if (rz > 0.0F) {
576 face = PIPE_TEX_FACE_POS_Z;
577 sc = rx;
578 tc = -ry;
579 ma = arz;
580 }
581 else {
582 face = PIPE_TEX_FACE_NEG_Z;
583 sc = -rx;
584 tc = -ry;
585 ma = arz;
586 }
587 }
588
589 *newS = (sc / ma + 1.0F) * 0.5F;
590 *newT = (tc / ma + 1.0F) * 0.5F;
591
592 return face;
593}
594
595
596
597void
598sample_texture_cube(vector float s, vector float t, vector float r,
599 uint unit, vector float colors[4])
600{
601 uint p, faces[4], level = 0;
602 float newS[4], newT[4];
603
604 /* Compute cube faces referenced by the four sets of texcoords.
605 * XXX we should SIMD-ize this.
606 */
607 for (p = 0; p < 4; p++) {
608 float rx = spu_extract(s, p);
609 float ry = spu_extract(t, p);
610 float rz = spu_extract(r, p);
611 faces[p] = choose_cube_face(rx, ry, rz, &newS[p], &newT[p]);
612 }
613
614 if (faces[0] == faces[1] &&
615 faces[0] == faces[2] &&
616 faces[0] == faces[3]) {
617 /* GOOD! All four texcoords refer to the same cube face */
618 s = (vector float) {newS[0], newS[1], newS[2], newS[3]};
619 t = (vector float) {newT[0], newT[1], newT[2], newT[3]};
620 spu.sample_texture_2d[unit](s, t, unit, level, faces[0], colors);
621 }
622 else {
623 /* BAD! The four texcoords refer to different faces */
624 for (p = 0; p < 4; p++) {
625 vector float c[4];
626
627 spu.sample_texture_2d[unit](spu_splats(newS[p]), spu_splats(newT[p]),
628 unit, level, faces[p], c);
629
630 float red = spu_extract(c[0], p);
631 float green = spu_extract(c[1], p);
632 float blue = spu_extract(c[2], p);
633 float alpha = spu_extract(c[3], p);
634
635 colors[0] = spu_insert(red, colors[0], p);
636 colors[1] = spu_insert(green, colors[1], p);
637 colors[2] = spu_insert(blue, colors[2], p);
638 colors[3] = spu_insert(alpha, colors[3], p);
639 }
640 }
641}
diff --git a/src/gallium/drivers/cell/spu/spu_texture.h b/src/gallium/drivers/cell/spu/spu_texture.h
deleted file mode 100644
index 7b75b007b5a..00000000000
--- a/src/gallium/drivers/cell/spu/spu_texture.h
+++ /dev/null
@@ -1,67 +0,0 @@
1/**************************************************************************
2 *
3 * Copyright 2008 Tungsten Graphics, Inc., Cedar Park, Texas.
4 * All Rights Reserved.
5 *
6 * Permission is hereby granted, free of charge, to any person obtaining a
7 * copy of this software and associated documentation files (the
8 * "Software"), to deal in the Software without restriction, including
9 * without limitation the rights to use, copy, modify, merge, publish,
10 * distribute, sub license, and/or sell copies of the Software, and to
11 * permit persons to whom the Software is furnished to do so, subject to
12 * the following conditions:
13 *
14 * The above copyright notice and this permission notice (including the
15 * next paragraph) shall be included in all copies or substantial portions
16 * of the Software.
17 *
18 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
19 * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
20 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
21 * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
22 * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
23 * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
24 * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
25 *
26 **************************************************************************/
27
28#ifndef SPU_TEXTURE_H
29#define SPU_TEXTURE_H
30
31
32#include "pipe/p_compiler.h"
33
34
35extern void
36invalidate_tex_cache(void);
37
38
39extern void
40sample_texture_2d_nearest(vector float s, vector float t,
41 uint unit, uint level, uint face,
42 vector float colors[4]);
43
44
45extern void
46sample_texture_2d_bilinear(vector float s, vector float t,
47 uint unit, uint level, uint face,
48 vector float colors[4]);
49
50extern void
51sample_texture_2d_bilinear_int(vector float s, vector float t,
52 uint unit, uint level, uint face,
53 vector float colors[4]);
54
55
56extern void
57sample_texture_2d_lod(vector float s, vector float t,
58 uint unit, uint level, uint face,
59 vector float colors[4]);
60
61
62extern void
63sample_texture_cube(vector float s, vector float t, vector float r,
64 uint unit, vector float colors[4]);
65
66
67#endif /* SPU_TEXTURE_H */
diff --git a/src/gallium/drivers/cell/spu/spu_tgsi_exec.h b/src/gallium/drivers/cell/spu/spu_tgsi_exec.h
deleted file mode 100644
index 6f2a3d30b91..00000000000
--- a/src/gallium/drivers/cell/spu/spu_tgsi_exec.h
+++ /dev/null
@@ -1,158 +0,0 @@
1/**************************************************************************
2 *
3 * Copyright 2007-2008 Tungsten Graphics, Inc., Cedar Park, Texas.
4 * All Rights Reserved.
5 * Copyright 2009-2010 VMware, Inc. All rights Reserved.
6 *
7 * Permission is hereby granted, free of charge, to any person obtaining a
8 * copy of this software and associated documentation files (the
9 * "Software"), to deal in the Software without restriction, including
10 * without limitation the rights to use, copy, modify, merge, publish,
11 * distribute, sub license, and/or sell copies of the Software, and to
12 * permit persons to whom the Software is furnished to do so, subject to
13 * the following conditions:
14 *
15 * The above copyright notice and this permission notice (including the
16 * next paragraph) shall be included in all copies or substantial portions
17 * of the Software.
18 *
19 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
20 * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
21 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
22 * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
23 * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
24 * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
25 * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
26 *
27 **************************************************************************/
28
29#ifndef SPU_TGSI_EXEC_H
30#define SPU_TGSI_EXEC_H
31
32#include "pipe/p_compiler.h"
33#include "pipe/p_state.h"
34
35#if defined __cplusplus
36extern "C" {
37#endif
38
39
40#define NUM_CHANNELS 4 /* R,G,B,A */
41#define QUAD_SIZE 4 /* 4 pixel/quad */
42
43
44
45#define TGSI_EXEC_NUM_TEMPS 128
46#define TGSI_EXEC_NUM_IMMEDIATES 256
47
48/*
49 * Locations of various utility registers (_I = Index, _C = Channel)
50 */
51#define TGSI_EXEC_TEMP_00000000_IDX (TGSI_EXEC_NUM_TEMPS + 0)
52#define TGSI_EXEC_TEMP_00000000_CHAN 0
53
54#define TGSI_EXEC_TEMP_7FFFFFFF_IDX (TGSI_EXEC_NUM_TEMPS + 0)
55#define TGSI_EXEC_TEMP_7FFFFFFF_CHAN 1
56
57#define TGSI_EXEC_TEMP_80000000_IDX (TGSI_EXEC_NUM_TEMPS + 0)
58#define TGSI_EXEC_TEMP_80000000_CHAN 2
59
60#define TGSI_EXEC_TEMP_FFFFFFFF_IDX (TGSI_EXEC_NUM_TEMPS + 0)
61#define TGSI_EXEC_TEMP_FFFFFFFF_CHAN 3
62
63#define TGSI_EXEC_TEMP_ONE_IDX (TGSI_EXEC_NUM_TEMPS + 1)
64#define TGSI_EXEC_TEMP_ONE_CHAN 0
65
66#define TGSI_EXEC_TEMP_TWO_IDX (TGSI_EXEC_NUM_TEMPS + 1)
67#define TGSI_EXEC_TEMP_TWO_CHAN 1
68
69#define TGSI_EXEC_TEMP_128_IDX (TGSI_EXEC_NUM_TEMPS + 1)
70#define TGSI_EXEC_TEMP_128_CHAN 2
71
72#define TGSI_EXEC_TEMP_MINUS_128_IDX (TGSI_EXEC_NUM_TEMPS + 1)
73#define TGSI_EXEC_TEMP_MINUS_128_CHAN 3
74
75#define TGSI_EXEC_TEMP_KILMASK_IDX (TGSI_EXEC_NUM_TEMPS + 2)
76#define TGSI_EXEC_TEMP_KILMASK_CHAN 0
77
78#define TGSI_EXEC_TEMP_OUTPUT_IDX (TGSI_EXEC_NUM_TEMPS + 2)
79#define TGSI_EXEC_TEMP_OUTPUT_CHAN 1
80
81#define TGSI_EXEC_TEMP_PRIMITIVE_IDX (TGSI_EXEC_NUM_TEMPS + 2)
82#define TGSI_EXEC_TEMP_PRIMITIVE_CHAN 2
83
84/* NVIDIA condition code (CC) vector
85 */
86#define TGSI_EXEC_CC_GT 0x01
87#define TGSI_EXEC_CC_EQ 0x02
88#define TGSI_EXEC_CC_LT 0x04
89#define TGSI_EXEC_CC_UN 0x08
90
91#define TGSI_EXEC_CC_X_MASK 0x000000ff
92#define TGSI_EXEC_CC_X_SHIFT 0
93#define TGSI_EXEC_CC_Y_MASK 0x0000ff00
94#define TGSI_EXEC_CC_Y_SHIFT 8
95#define TGSI_EXEC_CC_Z_MASK 0x00ff0000
96#define TGSI_EXEC_CC_Z_SHIFT 16
97#define TGSI_EXEC_CC_W_MASK 0xff000000
98#define TGSI_EXEC_CC_W_SHIFT 24
99
100#define TGSI_EXEC_TEMP_CC_IDX (TGSI_EXEC_NUM_TEMPS + 2)
101#define TGSI_EXEC_TEMP_CC_CHAN 3
102
103#define TGSI_EXEC_TEMP_THREE_IDX (TGSI_EXEC_NUM_TEMPS + 3)
104#define TGSI_EXEC_TEMP_THREE_CHAN 0
105
106#define TGSI_EXEC_TEMP_HALF_IDX (TGSI_EXEC_NUM_TEMPS + 3)
107#define TGSI_EXEC_TEMP_HALF_CHAN 1
108
109/* execution mask, each value is either 0 or ~0 */
110#define TGSI_EXEC_MASK_IDX (TGSI_EXEC_NUM_TEMPS + 3)
111#define TGSI_EXEC_MASK_CHAN 2
112
113/* 4 register buffer for various purposes */
114#define TGSI_EXEC_TEMP_R0 (TGSI_EXEC_NUM_TEMPS + 4)
115#define TGSI_EXEC_NUM_TEMP_R 4
116
117#define TGSI_EXEC_TEMP_ADDR (TGSI_EXEC_NUM_TEMPS + 8)
118#define TGSI_EXEC_NUM_ADDRS 1
119
120/* predicate register */
121#define TGSI_EXEC_TEMP_P0 (TGSI_EXEC_NUM_TEMPS + 9)
122#define TGSI_EXEC_NUM_PREDS 1
123
124#define TGSI_EXEC_NUM_TEMP_EXTRAS 10
125
126
127
128#define TGSI_EXEC_MAX_NESTING 32
129#define TGSI_EXEC_MAX_COND_NESTING TGSI_EXEC_MAX_NESTING
130#define TGSI_EXEC_MAX_LOOP_NESTING TGSI_EXEC_MAX_NESTING
131#define TGSI_EXEC_MAX_SWITCH_NESTING TGSI_EXEC_MAX_NESTING
132#define TGSI_EXEC_MAX_CALL_NESTING TGSI_EXEC_MAX_NESTING
133
134/* The maximum number of input attributes per vertex. For 2D
135 * input register files, this is the stride between two 1D
136 * arrays.
137 */
138#define TGSI_EXEC_MAX_INPUT_ATTRIBS 17
139
140/* The maximum number of constant vectors per constant buffer.
141 */
142#define TGSI_EXEC_MAX_CONST_BUFFER 4096
143
144/* The maximum number of vertices per primitive */
145#define TGSI_MAX_PRIM_VERTICES 6
146
147/* The maximum number of primitives to be generated */
148#define TGSI_MAX_PRIMITIVES 64
149
150/* The maximum total number of vertices */
151#define TGSI_MAX_TOTAL_VERTICES (TGSI_MAX_PRIM_VERTICES * TGSI_MAX_PRIMITIVES * PIPE_MAX_ATTRIBS)
152
153
154#if defined __cplusplus
155} /* extern "C" */
156#endif
157
158#endif /* TGSI_EXEC_H */
diff --git a/src/gallium/drivers/cell/spu/spu_tile.c b/src/gallium/drivers/cell/spu/spu_tile.c
deleted file mode 100644
index 6905015a483..00000000000
--- a/src/gallium/drivers/cell/spu/spu_tile.c
+++ /dev/null
@@ -1,126 +0,0 @@
1/**************************************************************************
2 *
3 * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas.
4 * All Rights Reserved.
5 *
6 * Permission is hereby granted, free of charge, to any person obtaining a
7 * copy of this software and associated documentation files (the
8 * "Software"), to deal in the Software without restriction, including
9 * without limitation the rights to use, copy, modify, merge, publish,
10 * distribute, sub license, and/or sell copies of the Software, and to
11 * permit persons to whom the Software is furnished to do so, subject to
12 * the following conditions:
13 *
14 * The above copyright notice and this permission notice (including the
15 * next paragraph) shall be included in all copies or substantial portions
16 * of the Software.
17 *
18 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
19 * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
20 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
21 * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
22 * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
23 * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
24 * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
25 *
26 **************************************************************************/
27
28
29
30#include "spu_tile.h"
31#include "spu_main.h"
32
33
34/**
35 * Get tile of color or Z values from main memory, put into SPU memory.
36 */
37void
38get_tile(uint tx, uint ty, tile_t *tile, int tag, int zBuf)
39{
40 const uint offset = ty * spu.fb.width_tiles + tx;
41 const uint bytesPerTile = TILE_SIZE * TILE_SIZE * (zBuf ? spu.fb.zsize : 4);
42 const ubyte *src = zBuf ? spu.fb.depth_start : spu.fb.color_start;
43
44 src += offset * bytesPerTile;
45
46 ASSERT(tx < spu.fb.width_tiles);
47 ASSERT(ty < spu.fb.height_tiles);
48 ASSERT_ALIGN16(tile);
49 /*
50 printf("get_tile: dest: %p src: 0x%x size: %d\n",
51 tile, (unsigned int) src, bytesPerTile);
52 */
53 mfc_get(tile->ui, /* dest in local memory */
54 (unsigned int) src, /* src in main memory */
55 bytesPerTile,
56 tag,
57 0, /* tid */
58 0 /* rid */);
59}
60
61
62/**
63 * Move tile of color or Z values from SPU memory to main memory.
64 */
65void
66put_tile(uint tx, uint ty, const tile_t *tile, int tag, int zBuf)
67{
68 const uint offset = ty * spu.fb.width_tiles + tx;
69 const uint bytesPerTile = TILE_SIZE * TILE_SIZE * (zBuf ? spu.fb.zsize : 4);
70 ubyte *dst = zBuf ? spu.fb.depth_start : spu.fb.color_start;
71
72 dst += offset * bytesPerTile;
73
74 ASSERT(tx < spu.fb.width_tiles);
75 ASSERT(ty < spu.fb.height_tiles);
76 ASSERT_ALIGN16(tile);
77 /*
78 printf("SPU %u: put_tile: src: %p dst: 0x%x size: %d\n",
79 spu.init.id,
80 tile, (unsigned int) dst, bytesPerTile);
81 */
82 mfc_put((void *) tile->ui, /* src in local memory */
83 (unsigned int) dst, /* dst in main memory */
84 bytesPerTile,
85 tag,
86 0, /* tid */
87 0 /* rid */);
88}
89
90
91/**
92 * For tiles whose status is TILE_STATUS_CLEAR, write solid-filled
93 * tiles back to the main framebuffer.
94 */
95void
96really_clear_tiles(uint surfaceIndex)
97{
98 const uint num_tiles = spu.fb.width_tiles * spu.fb.height_tiles;
99 uint i;
100
101 if (surfaceIndex == 0) {
102 clear_c_tile(&spu.ctile);
103
104 for (i = spu.init.id; i < num_tiles; i += spu.init.num_spus) {
105 uint tx = i % spu.fb.width_tiles;
106 uint ty = i / spu.fb.width_tiles;
107 if (spu.ctile_status[ty][tx] == TILE_STATUS_CLEAR) {
108 put_tile(tx, ty, &spu.ctile, TAG_SURFACE_CLEAR, 0);
109 }
110 }
111 }
112 else {
113 clear_z_tile(&spu.ztile);
114
115 for (i = spu.init.id; i < num_tiles; i += spu.init.num_spus) {
116 uint tx = i % spu.fb.width_tiles;
117 uint ty = i / spu.fb.width_tiles;
118 if (spu.ztile_status[ty][tx] == TILE_STATUS_CLEAR)
119 put_tile(tx, ty, &spu.ctile, TAG_SURFACE_CLEAR, 1);
120 }
121 }
122
123#if 0
124 wait_on_mask(1 << TAG_SURFACE_CLEAR);
125#endif
126}
diff --git a/src/gallium/drivers/cell/spu/spu_tile.h b/src/gallium/drivers/cell/spu/spu_tile.h
deleted file mode 100644
index 7bfb52be8f3..00000000000
--- a/src/gallium/drivers/cell/spu/spu_tile.h
+++ /dev/null
@@ -1,75 +0,0 @@
1/**************************************************************************
2 *
3 * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas.
4 * All Rights Reserved.
5 *
6 * Permission is hereby granted, free of charge, to any person obtaining a
7 * copy of this software and associated documentation files (the
8 * "Software"), to deal in the Software without restriction, including
9 * without limitation the rights to use, copy, modify, merge, publish,
10 * distribute, sub license, and/or sell copies of the Software, and to
11 * permit persons to whom the Software is furnished to do so, subject to
12 * the following conditions:
13 *
14 * The above copyright notice and this permission notice (including the
15 * next paragraph) shall be included in all copies or substantial portions
16 * of the Software.
17 *
18 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
19 * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
20 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
21 * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
22 * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
23 * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
24 * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
25 *
26 **************************************************************************/
27
28#ifndef SPU_TILE_H
29#define SPU_TILE_H
30
31
32#include <libmisc.h>
33#include <spu_mfcio.h>
34#include "spu_main.h"
35#include "cell/common.h"
36
37
38
39extern void
40get_tile(uint tx, uint ty, tile_t *tile, int tag, int zBuf);
41
42extern void
43put_tile(uint tx, uint ty, const tile_t *tile, int tag, int zBuf);
44
45extern void
46really_clear_tiles(uint surfaceIndex);
47
48
49static INLINE void
50clear_c_tile(tile_t *ctile)
51{
52 memset32((uint*) ctile->ui,
53 spu.fb.color_clear_value,
54 TILE_SIZE * TILE_SIZE);
55}
56
57
58static INLINE void
59clear_z_tile(tile_t *ztile)
60{
61 if (spu.fb.zsize == 2) {
62 memset16((ushort*) ztile->us,
63 spu.fb.depth_clear_value,
64 TILE_SIZE * TILE_SIZE);
65 }
66 else {
67 ASSERT(spu.fb.zsize != 0);
68 memset32((uint*) ztile->ui,
69 spu.fb.depth_clear_value,
70 TILE_SIZE * TILE_SIZE);
71 }
72}
73
74
75#endif /* SPU_TILE_H */
diff --git a/src/gallium/drivers/cell/spu/spu_tri.c b/src/gallium/drivers/cell/spu/spu_tri.c
deleted file mode 100644
index efeebca27bb..00000000000
--- a/src/gallium/drivers/cell/spu/spu_tri.c
+++ /dev/null
@@ -1,843 +0,0 @@
1/**************************************************************************
2 *
3 * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas.
4 * All Rights Reserved.
5 *
6 * Permission is hereby granted, free of charge, to any person obtaining a
7 * copy of this software and associated documentation files (the
8 * "Software"), to deal in the Software without restriction, including
9 * without limitation the rights to use, copy, modify, merge, publish,
10 * distribute, sub license, and/or sell copies of the Software, and to
11 * permit persons to whom the Software is furnished to do so, subject to
12 * the following conditions:
13 *
14 * The above copyright notice and this permission notice (including the
15 * next paragraph) shall be included in all copies or substantial portions
16 * of the Software.
17 *
18 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
19 * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
20 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
21 * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
22 * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
23 * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
24 * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
25 *
26 **************************************************************************/
27
28/**
29 * Triangle rendering within a tile.
30 */
31
32#include "pipe/p_compiler.h"
33#include "pipe/p_format.h"
34#include "util/u_math.h"
35#include "spu_colorpack.h"
36#include "spu_main.h"
37#include "spu_shuffle.h"
38#include "spu_texture.h"
39#include "spu_tile.h"
40#include "spu_tri.h"
41
42
43/** Masks are uint[4] vectors with each element being 0 or 0xffffffff */
44typedef vector unsigned int mask_t;
45
46
47
48/**
49 * Simplified types taken from other parts of Gallium
50 */
51struct vertex_header {
52 vector float data[1];
53};
54
55
56
57/* XXX fix this */
58#undef CEILF
59#define CEILF(X) ((float) (int) ((X) + 0.99999f))
60
61
62#define QUAD_TOP_LEFT 0
63#define QUAD_TOP_RIGHT 1
64#define QUAD_BOTTOM_LEFT 2
65#define QUAD_BOTTOM_RIGHT 3
66#define MASK_TOP_LEFT (1 << QUAD_TOP_LEFT)
67#define MASK_TOP_RIGHT (1 << QUAD_TOP_RIGHT)
68#define MASK_BOTTOM_LEFT (1 << QUAD_BOTTOM_LEFT)
69#define MASK_BOTTOM_RIGHT (1 << QUAD_BOTTOM_RIGHT)
70#define MASK_ALL 0xf
71
72
73#define CHAN0 0
74#define CHAN1 1
75#define CHAN2 2
76#define CHAN3 3
77
78
79#define DEBUG_VERTS 0
80
81/**
82 * Triangle edge info
83 */
84struct edge {
85 union {
86 struct {
87 float dx; /**< X(v1) - X(v0), used only during setup */
88 float dy; /**< Y(v1) - Y(v0), used only during setup */
89 };
90 vec_float4 ds; /**< vector accessor for dx and dy */
91 };
92 float dxdy; /**< dx/dy */
93 float sx, sy; /**< first sample point coord */
94 int lines; /**< number of lines on this edge */
95};
96
97
98struct interp_coef
99{
100 vector float a0;
101 vector float dadx;
102 vector float dady;
103};
104
105
106/**
107 * Triangle setup info (derived from draw_stage).
108 * Also used for line drawing (taking some liberties).
109 */
110struct setup_stage {
111
112 /* Vertices are just an array of floats making up each attribute in
113 * turn. Currently fixed at 4 floats, but should change in time.
114 * Codegen will help cope with this.
115 */
116 union {
117 struct {
118 const struct vertex_header *vmin;
119 const struct vertex_header *vmid;
120 const struct vertex_header *vmax;
121 const struct vertex_header *vprovoke;
122 };
123 qword vertex_headers;
124 };
125
126 struct edge ebot;
127 struct edge etop;
128 struct edge emaj;
129
130 float oneOverArea; /* XXX maybe make into vector? */
131
132 uint facing;
133
134 uint tx, ty; /**< position of current tile (x, y) */
135
136 union {
137 struct {
138 int cliprect_minx;
139 int cliprect_miny;
140 int cliprect_maxx;
141 int cliprect_maxy;
142 };
143 qword cliprect;
144 };
145
146 struct interp_coef coef[PIPE_MAX_SHADER_INPUTS];
147
148 struct {
149 vec_int4 quad; /**< [0] = row0, [1] = row1; {left[0],left[1],right[0],right[1]} */
150 int y;
151 unsigned y_flags;
152 unsigned mask; /**< mask of MASK_BOTTOM/TOP_LEFT/RIGHT bits */
153 } span;
154};
155
156
157static struct setup_stage setup;
158
159
160static INLINE vector float
161splatx(vector float v)
162{
163 return spu_splats(spu_extract(v, CHAN0));
164}
165
166static INLINE vector float
167splaty(vector float v)
168{
169 return spu_splats(spu_extract(v, CHAN1));
170}
171
172static INLINE vector float
173splatz(vector float v)
174{
175 return spu_splats(spu_extract(v, CHAN2));
176}
177
178static INLINE vector float
179splatw(vector float v)
180{
181 return spu_splats(spu_extract(v, CHAN3));
182}
183
184
185/**
186 * Setup fragment shader inputs by evaluating triangle's vertex
187 * attribute coefficient info.
188 * \param x quad x pos
189 * \param y quad y pos
190 * \param fragZ returns quad Z values
191 * \param fragInputs returns fragment program inputs
192 * Note: this code could be incorporated into the fragment program
193 * itself to avoid the loop and switch.
194 */
195static void
196eval_inputs(float x, float y, vector float *fragZ, vector float fragInputs[])
197{
198 static const vector float deltaX = (const vector float) {0, 1, 0, 1};
199 static const vector float deltaY = (const vector float) {0, 0, 1, 1};
200
201 const uint posSlot = 0;
202 const vector float pos = setup.coef[posSlot].a0;
203 const vector float dposdx = setup.coef[posSlot].dadx;
204 const vector float dposdy = setup.coef[posSlot].dady;
205 const vector float fragX = spu_splats(x) + deltaX;
206 const vector float fragY = spu_splats(y) + deltaY;
207 vector float fragW, wInv;
208 uint i;
209
210 *fragZ = splatz(pos) + fragX * splatz(dposdx) + fragY * splatz(dposdy);
211 fragW = splatw(pos) + fragX * splatw(dposdx) + fragY * splatw(dposdy);
212 wInv = spu_re(fragW); /* 1 / w */
213
214 /* loop over fragment program inputs */
215 for (i = 0; i < spu.vertex_info.num_attribs; i++) {
216 uint attr = i + 1;
217 enum interp_mode interp = spu.vertex_info.attrib[attr].interp_mode;
218
219 /* constant term */
220 vector float a0 = setup.coef[attr].a0;
221 vector float r0 = splatx(a0);
222 vector float r1 = splaty(a0);
223 vector float r2 = splatz(a0);
224 vector float r3 = splatw(a0);
225
226 if (interp == INTERP_LINEAR || interp == INTERP_PERSPECTIVE) {
227 /* linear term */
228 vector float dadx = setup.coef[attr].dadx;
229 vector float dady = setup.coef[attr].dady;
230 /* Use SPU intrinsics here to get slightly better code.
231 * originally: r0 += fragX * splatx(dadx) + fragY * splatx(dady);
232 */
233 r0 = spu_madd(fragX, splatx(dadx), spu_madd(fragY, splatx(dady), r0));
234 r1 = spu_madd(fragX, splaty(dadx), spu_madd(fragY, splaty(dady), r1));
235 r2 = spu_madd(fragX, splatz(dadx), spu_madd(fragY, splatz(dady), r2));
236 r3 = spu_madd(fragX, splatw(dadx), spu_madd(fragY, splatw(dady), r3));
237 if (interp == INTERP_PERSPECTIVE) {
238 /* perspective term */
239 r0 *= wInv;
240 r1 *= wInv;
241 r2 *= wInv;
242 r3 *= wInv;
243 }
244 }
245 fragInputs[CHAN0] = r0;
246 fragInputs[CHAN1] = r1;
247 fragInputs[CHAN2] = r2;
248 fragInputs[CHAN3] = r3;
249 fragInputs += 4;
250 }
251}
252
253
254/**
255 * Emit a quad (pass to next stage). No clipping is done.
256 * Note: about 1/5 to 1/7 of the time, mask is zero and this function
257 * should be skipped. But adding the test for that slows things down
258 * overall.
259 */
260static INLINE void
261emit_quad( int x, int y, mask_t mask)
262{
263 /* If any bits in mask are set... */
264 if (spu_extract(spu_orx(mask), 0)) {
265 const int ix = x - setup.cliprect_minx;
266 const int iy = y - setup.cliprect_miny;
267
268 spu.cur_ctile_status = TILE_STATUS_DIRTY;
269 spu.cur_ztile_status = TILE_STATUS_DIRTY;
270
271 {
272 /*
273 * Run fragment shader, execute per-fragment ops, update fb/tile.
274 */
275 vector float inputs[4*4], outputs[2*4];
276 vector unsigned int kill_mask;
277 vector float fragZ;
278
279 eval_inputs((float) x, (float) y, &fragZ, inputs);
280
281 ASSERT(spu.fragment_program);
282 ASSERT(spu.fragment_ops);
283
284 /* Execute the current fragment program */
285 kill_mask = spu.fragment_program(inputs, outputs, spu.constants);
286
287 mask = spu_andc(mask, kill_mask);
288
289 /* Execute per-fragment/quad operations, including:
290 * alpha test, z test, stencil test, blend and framebuffer writing.
291 * Note that there are two different fragment operations functions
292 * that can be called, one for front-facing fragments, and one
293 * for back-facing fragments. (Often the two are the same;
294 * but in some cases, like two-sided stenciling, they can be
295 * very different.) So choose the correct function depending
296 * on the calculated facing.
297 */
298 spu.fragment_ops[setup.facing](ix, iy, &spu.ctile, &spu.ztile,
299 fragZ,
300 outputs[0*4+0],
301 outputs[0*4+1],
302 outputs[0*4+2],
303 outputs[0*4+3],
304 mask);
305 }
306 }
307}
308
309
310/**
311 * Given an X or Y coordinate, return the block/quad coordinate that it
312 * belongs to.
313 */
314static INLINE int
315block(int x)
316{
317 return x & ~1;
318}
319
320
321/**
322 * Render a horizontal span of quads
323 */
324static void
325flush_spans(void)
326{
327 int minleft, maxright;
328
329 const int l0 = spu_extract(setup.span.quad, 0);
330 const int l1 = spu_extract(setup.span.quad, 1);
331 const int r0 = spu_extract(setup.span.quad, 2);
332 const int r1 = spu_extract(setup.span.quad, 3);
333
334 switch (setup.span.y_flags) {
335 case 0x3:
336 /* both odd and even lines written (both quad rows) */
337 minleft = MIN2(l0, l1);
338 maxright = MAX2(r0, r1);
339 break;
340
341 case 0x1:
342 /* only even line written (quad top row) */
343 minleft = l0;
344 maxright = r0;
345 break;
346
347 case 0x2:
348 /* only odd line written (quad bottom row) */
349 minleft = l1;
350 maxright = r1;
351 break;
352
353 default:
354 return;
355 }
356
357 /* OK, we're very likely to need the tile data now.
358 * clear or finish waiting if needed.
359 */
360 if (spu.cur_ctile_status == TILE_STATUS_GETTING) {
361 /* wait for mfc_get() to complete */
362 //printf("SPU: %u: waiting for ctile\n", spu.init.id);
363 wait_on_mask(1 << TAG_READ_TILE_COLOR);
364 spu.cur_ctile_status = TILE_STATUS_CLEAN;
365 }
366 else if (spu.cur_ctile_status == TILE_STATUS_CLEAR) {
367 //printf("SPU %u: clearing C tile %u, %u\n", spu.init.id, setup.tx, setup.ty);
368 clear_c_tile(&spu.ctile);
369 spu.cur_ctile_status = TILE_STATUS_DIRTY;
370 }
371 ASSERT(spu.cur_ctile_status != TILE_STATUS_DEFINED);
372
373 if (spu.read_depth_stencil) {
374 if (spu.cur_ztile_status == TILE_STATUS_GETTING) {
375 /* wait for mfc_get() to complete */
376 //printf("SPU: %u: waiting for ztile\n", spu.init.id);
377 wait_on_mask(1 << TAG_READ_TILE_Z);
378 spu.cur_ztile_status = TILE_STATUS_CLEAN;
379 }
380 else if (spu.cur_ztile_status == TILE_STATUS_CLEAR) {
381 //printf("SPU %u: clearing Z tile %u, %u\n", spu.init.id, setup.tx, setup.ty);
382 clear_z_tile(&spu.ztile);
383 spu.cur_ztile_status = TILE_STATUS_DIRTY;
384 }
385 ASSERT(spu.cur_ztile_status != TILE_STATUS_DEFINED);
386 }
387
388 /* XXX this loop could be moved into the above switch cases... */
389
390 /* Setup for mask calculation */
391 const vec_int4 quad_LlRr = setup.span.quad;
392 const vec_int4 quad_RrLl = spu_rlqwbyte(quad_LlRr, 8);
393 const vec_int4 quad_LLll = spu_shuffle(quad_LlRr, quad_LlRr, SHUFFLE4(A,A,B,B));
394 const vec_int4 quad_RRrr = spu_shuffle(quad_RrLl, quad_RrLl, SHUFFLE4(A,A,B,B));
395
396 const vec_int4 twos = spu_splats(2);
397
398 const int x = block(minleft);
399 vec_int4 xs = {x, x+1, x, x+1};
400
401 for (; spu_extract(xs, 0) <= block(maxright); xs += twos) {
402 /**
403 * Computes mask to indicate which pixels in the 2x2 quad are actually
404 * inside the triangle's bounds.
405 */
406
407 /* Calculate ({x,x+1,x,x+1} >= {l[0],l[0],l[1],l[1]}) */
408 const mask_t gt_LLll_xs = spu_cmpgt(quad_LLll, xs);
409 const mask_t gte_xs_LLll = spu_nand(gt_LLll_xs, gt_LLll_xs);
410
411 /* Calculate ({r[0],r[0],r[1],r[1]} > {x,x+1,x,x+1}) */
412 const mask_t gt_RRrr_xs = spu_cmpgt(quad_RRrr, xs);
413
414 /* Combine results to create mask */
415 const mask_t mask = spu_and(gte_xs_LLll, gt_RRrr_xs);
416
417 emit_quad(spu_extract(xs, 0), setup.span.y, mask);
418 }
419
420 setup.span.y = 0;
421 setup.span.y_flags = 0;
422 /* Zero right elements */
423 setup.span.quad = spu_shuffle(setup.span.quad, setup.span.quad, SHUFFLE4(A,B,0,0));
424}
425
426
427#if DEBUG_VERTS
428static void
429print_vertex(const struct vertex_header *v)
430{
431 uint i;
432 fprintf(stderr, " Vertex: (%p)\n", v);
433 for (i = 0; i < spu.vertex_info.num_attribs; i++) {
434 fprintf(stderr, " %d: %f %f %f %f\n", i,
435 spu_extract(v->data[i], 0),
436 spu_extract(v->data[i], 1),
437 spu_extract(v->data[i], 2),
438 spu_extract(v->data[i], 3));
439 }
440}
441#endif
442
443/* Returns the minimum of each slot of two vec_float4s as qwords.
444 * i.e. return[n] = min(q0[n],q1[n]);
445 */
446static qword
447minfq(qword q0, qword q1)
448{
449 const qword q0q1m = si_fcgt(q0, q1);
450 return si_selb(q0, q1, q0q1m);
451}
452
453/* Returns the minimum of each slot of three vec_float4s as qwords.
454 * i.e. return[n] = min(q0[n],q1[n],q2[n]);
455 */
456static qword
457min3fq(qword q0, qword q1, qword q2)
458{
459 return minfq(minfq(q0, q1), q2);
460}
461
462/* Returns the maximum of each slot of two vec_float4s as qwords.
463 * i.e. return[n] = min(q0[n],q1[n],q2[n]);
464 */
465static qword
466maxfq(qword q0, qword q1) {
467 const qword q0q1m = si_fcgt(q0, q1);
468 return si_selb(q1, q0, q0q1m);
469}
470
471/* Returns the maximum of each slot of three vec_float4s as qwords.
472 * i.e. return[n] = min(q0[n],q1[n],q2[n]);
473 */
474static qword
475max3fq(qword q0, qword q1, qword q2) {
476 return maxfq(maxfq(q0, q1), q2);
477}
478
479/**
480 * Sort vertices from top to bottom.
481 * Compute area and determine front vs. back facing.
482 * Do coarse clip test against tile bounds
483 * \return FALSE if tri is totally outside tile, TRUE otherwise
484 */
485static boolean
486setup_sort_vertices(const qword vs)
487{
488 float area, sign;
489
490#if DEBUG_VERTS
491 if (spu.init.id==0) {
492 fprintf(stderr, "SPU %u: Triangle:\n", spu.init.id);
493 print_vertex(v0);
494 print_vertex(v1);
495 print_vertex(v2);
496 }
497#endif
498
499 {
500 /* Load the float values for various processing... */
501 const qword f0 = (qword)(((const struct vertex_header*)si_to_ptr(vs))->data[0]);
502 const qword f1 = (qword)(((const struct vertex_header*)si_to_ptr(si_rotqbyi(vs, 4)))->data[0]);
503 const qword f2 = (qword)(((const struct vertex_header*)si_to_ptr(si_rotqbyi(vs, 8)))->data[0]);
504
505 /* Check if triangle is completely outside the tile bounds
506 * Find the min and max x and y positions of the three poits */
507 const qword minf = min3fq(f0, f1, f2);
508 const qword maxf = max3fq(f0, f1, f2);
509
510 /* Compare min and max against cliprect vals */
511 const qword maxsmins = si_shufb(maxf, minf, SHUFB4(A,B,a,b));
512 const qword outside = si_fcgt(maxsmins, si_csflt(setup.cliprect, 0));
513
514 /* Use a little magic to work out of the tri is visible or not */
515 if(si_to_uint(si_xori(si_gb(outside), 0xc))) return FALSE;
516
517 /* determine bottom to top order of vertices */
518 /* A table of shuffle patterns for putting vertex_header pointers into
519 correct order. Quite magical. */
520 const qword sort_order_patterns[] = {
521 SHUFB4(A,B,C,C),
522 SHUFB4(C,A,B,C),
523 SHUFB4(A,C,B,C),
524 SHUFB4(B,C,A,C),
525 SHUFB4(B,A,C,C),
526 SHUFB4(C,B,A,C) };
527
528 /* Collate y values into two vectors for comparison.
529 Using only one shuffle constant! ;) */
530 const qword y_02_ = si_shufb(f0, f2, SHUFB4(0,B,b,C));
531 const qword y_10_ = si_shufb(f1, f0, SHUFB4(0,B,b,C));
532 const qword y_012 = si_shufb(y_02_, f1, SHUFB4(0,B,b,C));
533 const qword y_120 = si_shufb(y_10_, f2, SHUFB4(0,B,b,C));
534
535 /* Perform comparison: {y0,y1,y2} > {y1,y2,y0} */
536 const qword compare = si_fcgt(y_012, y_120);
537 /* Compress the result of the comparison into 4 bits */
538 const qword gather = si_gb(compare);
539 /* Subtract one to attain the index into the LUT. Magical. */
540 const unsigned int index = si_to_uint(gather) - 1;
541
542 /* Load the appropriate pattern and construct the desired vector. */
543 setup.vertex_headers = si_shufb(vs, vs, sort_order_patterns[index]);
544
545 /* Using the result of the comparison, set sign.
546 Very magical. */
547 sign = ((si_to_uint(si_cntb(gather)) == 2) ? 1.0f : -1.0f);
548 }
549
550 setup.ebot.ds = spu_sub(setup.vmid->data[0], setup.vmin->data[0]);
551 setup.emaj.ds = spu_sub(setup.vmax->data[0], setup.vmin->data[0]);
552 setup.etop.ds = spu_sub(setup.vmax->data[0], setup.vmid->data[0]);
553
554 /*
555 * Compute triangle's area. Use 1/area to compute partial
556 * derivatives of attributes later.
557 */
558 area = setup.emaj.dx * setup.ebot.dy - setup.ebot.dx * setup.emaj.dy;
559
560 setup.oneOverArea = 1.0f / area;
561
562 /* The product of area * sign indicates front/back orientation (0/1).
563 * Just in case someone gets the bright idea of switching the front
564 * and back constants without noticing that we're assuming their
565 * values in this operation, also assert that the values are
566 * what we think they are.
567 */
568 ASSERT(CELL_FACING_FRONT == 0);
569 ASSERT(CELL_FACING_BACK == 1);
570 setup.facing = (area * sign > 0.0f)
571 ^ (!spu.rasterizer.front_ccw);
572
573 return TRUE;
574}
575
576
577/**
578 * Compute a0 for a constant-valued coefficient (GL_FLAT shading).
579 * The value value comes from vertex->data[slot].
580 * The result will be put into setup.coef[slot].a0.
581 * \param slot which attribute slot
582 */
583static INLINE void
584const_coeff4(uint slot)
585{
586 setup.coef[slot].dadx = (vector float) {0.0, 0.0, 0.0, 0.0};
587 setup.coef[slot].dady = (vector float) {0.0, 0.0, 0.0, 0.0};
588 setup.coef[slot].a0 = setup.vprovoke->data[slot];
589}
590
591
592/**
593 * As above, but interp setup all four vector components.
594 */
595static INLINE void
596tri_linear_coeff4(uint slot)
597{
598 const vector float vmin_d = setup.vmin->data[slot];
599 const vector float vmid_d = setup.vmid->data[slot];
600 const vector float vmax_d = setup.vmax->data[slot];
601 const vector float xxxx = spu_splats(spu_extract(setup.vmin->data[0], 0) - 0.5f);
602 const vector float yyyy = spu_splats(spu_extract(setup.vmin->data[0], 1) - 0.5f);
603
604 vector float botda = vmid_d - vmin_d;
605 vector float majda = vmax_d - vmin_d;
606
607 vector float a = spu_sub(spu_mul(spu_splats(setup.ebot.dy), majda),
608 spu_mul(botda, spu_splats(setup.emaj.dy)));
609 vector float b = spu_sub(spu_mul(spu_splats(setup.emaj.dx), botda),
610 spu_mul(majda, spu_splats(setup.ebot.dx)));
611
612 setup.coef[slot].dadx = spu_mul(a, spu_splats(setup.oneOverArea));
613 setup.coef[slot].dady = spu_mul(b, spu_splats(setup.oneOverArea));
614
615 vector float tempx = spu_mul(setup.coef[slot].dadx, xxxx);
616 vector float tempy = spu_mul(setup.coef[slot].dady, yyyy);
617
618 setup.coef[slot].a0 = spu_sub(vmin_d, spu_add(tempx, tempy));
619}
620
621
622/**
623 * Compute a0, dadx and dady for a perspective-corrected interpolant,
624 * for a triangle.
625 * We basically multiply the vertex value by 1/w before computing
626 * the plane coefficients (a0, dadx, dady).
627 * Later, when we compute the value at a particular fragment position we'll
628 * divide the interpolated value by the interpolated W at that fragment.
629 */
630static void
631tri_persp_coeff4(uint slot)
632{
633 const vector float xxxx = spu_splats(spu_extract(setup.vmin->data[0], 0) - 0.5f);
634 const vector float yyyy = spu_splats(spu_extract(setup.vmin->data[0], 1) - 0.5f);
635
636 const vector float vmin_w = spu_splats(spu_extract(setup.vmin->data[0], 3));
637 const vector float vmid_w = spu_splats(spu_extract(setup.vmid->data[0], 3));
638 const vector float vmax_w = spu_splats(spu_extract(setup.vmax->data[0], 3));
639
640 vector float vmin_d = setup.vmin->data[slot];
641 vector float vmid_d = setup.vmid->data[slot];
642 vector float vmax_d = setup.vmax->data[slot];
643
644 vmin_d = spu_mul(vmin_d, vmin_w);
645 vmid_d = spu_mul(vmid_d, vmid_w);
646 vmax_d = spu_mul(vmax_d, vmax_w);
647
648 vector float botda = vmid_d - vmin_d;
649 vector float majda = vmax_d - vmin_d;
650
651 vector float a = spu_sub(spu_mul(spu_splats(setup.ebot.dy), majda),
652 spu_mul(botda, spu_splats(setup.emaj.dy)));
653 vector float b = spu_sub(spu_mul(spu_splats(setup.emaj.dx), botda),
654 spu_mul(majda, spu_splats(setup.ebot.dx)));
655
656 setup.coef[slot].dadx = spu_mul(a, spu_splats(setup.oneOverArea));
657 setup.coef[slot].dady = spu_mul(b, spu_splats(setup.oneOverArea));
658
659 vector float tempx = spu_mul(setup.coef[slot].dadx, xxxx);
660 vector float tempy = spu_mul(setup.coef[slot].dady, yyyy);
661
662 setup.coef[slot].a0 = spu_sub(vmin_d, spu_add(tempx, tempy));
663}
664
665
666
667/**
668 * Compute the setup.coef[] array dadx, dady, a0 values.
669 * Must be called after setup.vmin,vmid,vmax,vprovoke are initialized.
670 */
671static void
672setup_tri_coefficients(void)
673{
674 uint i;
675
676 for (i = 0; i < spu.vertex_info.num_attribs; i++) {
677 switch (spu.vertex_info.attrib[i].interp_mode) {
678 case INTERP_NONE:
679 break;
680 case INTERP_CONSTANT:
681 const_coeff4(i);
682 break;
683 case INTERP_POS:
684 /* fall-through */
685 case INTERP_LINEAR:
686 tri_linear_coeff4(i);
687 break;
688 case INTERP_PERSPECTIVE:
689 tri_persp_coeff4(i);
690 break;
691 default:
692 ASSERT(0);
693 }
694 }
695}
696
697
698static void
699setup_tri_edges(void)
700{
701 float vmin_x = spu_extract(setup.vmin->data[0], 0) + 0.5f;
702 float vmid_x = spu_extract(setup.vmid->data[0], 0) + 0.5f;
703
704 float vmin_y = spu_extract(setup.vmin->data[0], 1) - 0.5f;
705 float vmid_y = spu_extract(setup.vmid->data[0], 1) - 0.5f;
706 float vmax_y = spu_extract(setup.vmax->data[0], 1) - 0.5f;
707
708 setup.emaj.sy = CEILF(vmin_y);
709 setup.emaj.lines = (int) CEILF(vmax_y - setup.emaj.sy);
710 setup.emaj.dxdy = setup.emaj.dx / setup.emaj.dy;
711 setup.emaj.sx = vmin_x + (setup.emaj.sy - vmin_y) * setup.emaj.dxdy;
712
713 setup.etop.sy = CEILF(vmid_y);
714 setup.etop.lines = (int) CEILF(vmax_y - setup.etop.sy);
715 setup.etop.dxdy = setup.etop.dx / setup.etop.dy;
716 setup.etop.sx = vmid_x + (setup.etop.sy - vmid_y) * setup.etop.dxdy;
717
718 setup.ebot.sy = CEILF(vmin_y);
719 setup.ebot.lines = (int) CEILF(vmid_y - setup.ebot.sy);
720 setup.ebot.dxdy = setup.ebot.dx / setup.ebot.dy;
721 setup.ebot.sx = vmin_x + (setup.ebot.sy - vmin_y) * setup.ebot.dxdy;
722}
723
724
725/**
726 * Render the upper or lower half of a triangle.
727 * Scissoring/cliprect is applied here too.
728 */
729static void
730subtriangle(struct edge *eleft, struct edge *eright, unsigned lines)
731{
732 const int minx = setup.cliprect_minx;
733 const int maxx = setup.cliprect_maxx;
734 const int miny = setup.cliprect_miny;
735 const int maxy = setup.cliprect_maxy;
736 int y, start_y, finish_y;
737 int sy = (int)eleft->sy;
738
739 ASSERT((int)eleft->sy == (int) eright->sy);
740
741 /* clip top/bottom */
742 start_y = sy;
743 finish_y = sy + lines;
744
745 if (start_y < miny)
746 start_y = miny;
747
748 if (finish_y > maxy)
749 finish_y = maxy;
750
751 start_y -= sy;
752 finish_y -= sy;
753
754 /*
755 printf("%s %d %d\n", __FUNCTION__, start_y, finish_y);
756 */
757
758 for (y = start_y; y < finish_y; y++) {
759
760 /* avoid accumulating adds as floats don't have the precision to
761 * accurately iterate large triangle edges that way. luckily we
762 * can just multiply these days.
763 *
764 * this is all drowned out by the attribute interpolation anyway.
765 */
766 int left = (int)(eleft->sx + y * eleft->dxdy);
767 int right = (int)(eright->sx + y * eright->dxdy);
768
769 /* clip left/right */
770 if (left < minx)
771 left = minx;
772 if (right > maxx)
773 right = maxx;
774
775 if (left < right) {
776 int _y = sy + y;
777 if (block(_y) != setup.span.y) {
778 flush_spans();
779 setup.span.y = block(_y);
780 }
781
782 int offset = _y&1;
783 vec_int4 quad_LlRr = {left, left, right, right};
784 /* Store left and right in 0 or 1 row of quad based on offset */
785 setup.span.quad = spu_sel(quad_LlRr, setup.span.quad, spu_maskw(5<<offset));
786 setup.span.y_flags |= 1<<offset;
787 }
788 }
789
790
791 /* save the values so that emaj can be restarted:
792 */
793 eleft->sx += lines * eleft->dxdy;
794 eright->sx += lines * eright->dxdy;
795 eleft->sy += lines;
796 eright->sy += lines;
797}
798
799
800/**
801 * Draw triangle into tile at (tx, ty) (tile coords)
802 * The tile data should have already been fetched.
803 */
804boolean
805tri_draw(const qword vs,
806 uint tx, uint ty)
807{
808 setup.tx = tx;
809 setup.ty = ty;
810
811 /* set clipping bounds to tile bounds */
812 const qword clipbase = (qword)((vec_uint4){tx, ty});
813 const qword clipmin = si_mpyui(clipbase, TILE_SIZE);
814 const qword clipmax = si_ai(clipmin, TILE_SIZE);
815 setup.cliprect = si_shufb(clipmin, clipmax, SHUFB4(A,B,a,b));
816
817 if(!setup_sort_vertices(vs)) {
818 return FALSE; /* totally clipped */
819 }
820
821 setup_tri_coefficients();
822 setup_tri_edges();
823
824 setup.span.y = 0;
825 setup.span.y_flags = 0;
826 /* Zero right elements */
827 setup.span.quad = spu_shuffle(setup.span.quad, setup.span.quad, SHUFFLE4(A,B,0,0));
828
829 if (setup.oneOverArea < 0.0) {
830 /* emaj on left */
831 subtriangle( &setup.emaj, &setup.ebot, setup.ebot.lines );
832 subtriangle( &setup.emaj, &setup.etop, setup.etop.lines );
833 }
834 else {
835 /* emaj on right */
836 subtriangle( &setup.ebot, &setup.emaj, setup.ebot.lines );
837 subtriangle( &setup.etop, &setup.emaj, setup.etop.lines );
838 }
839
840 flush_spans();
841
842 return TRUE;
843}
diff --git a/src/gallium/drivers/cell/spu/spu_tri.h b/src/gallium/drivers/cell/spu/spu_tri.h
deleted file mode 100644
index 82e3b19ad7e..00000000000
--- a/src/gallium/drivers/cell/spu/spu_tri.h
+++ /dev/null
@@ -1,37 +0,0 @@
1/**************************************************************************
2 *
3 * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas.
4 * All Rights Reserved.
5 *
6 * Permission is hereby granted, free of charge, to any person obtaining a
7 * copy of this software and associated documentation files (the
8 * "Software"), to deal in the Software without restriction, including
9 * without limitation the rights to use, copy, modify, merge, publish,
10 * distribute, sub license, and/or sell copies of the Software, and to
11 * permit persons to whom the Software is furnished to do so, subject to
12 * the following conditions:
13 *
14 * The above copyright notice and this permission notice (including the
15 * next paragraph) shall be included in all copies or substantial portions
16 * of the Software.
17 *
18 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
19 * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
20 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
21 * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
22 * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
23 * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
24 * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
25 *
26 **************************************************************************/
27
28
29#ifndef SPU_TRI_H
30#define SPU_TRI_H
31
32
33extern boolean
34tri_draw(const qword vs, uint tx, uint ty);
35
36
37#endif /* SPU_TRI_H */
diff --git a/src/gallium/drivers/cell/spu/spu_util.c b/src/gallium/drivers/cell/spu/spu_util.c
deleted file mode 100644
index 24057e29e36..00000000000
--- a/src/gallium/drivers/cell/spu/spu_util.c
+++ /dev/null
@@ -1,77 +0,0 @@
1
2#include "cell/common.h"
3#include "pipe/p_shader_tokens.h"
4#include "util/u_debug.h"
5#include "tgsi/tgsi_parse.h"
6//#include "tgsi_build.h"
7#include "tgsi/tgsi_util.h"
8
9unsigned
10tgsi_util_get_src_register_swizzle(
11 const struct tgsi_src_register *reg,
12 unsigned component )
13{
14 switch( component ) {
15 case 0:
16 return reg->SwizzleX;
17 case 1:
18 return reg->SwizzleY;
19 case 2:
20 return reg->SwizzleZ;
21 case 3:
22 return reg->SwizzleW;
23 default:
24 ASSERT( 0 );
25 }
26 return 0;
27}
28
29
30unsigned
31tgsi_util_get_full_src_register_swizzle(
32 const struct tgsi_full_src_register *reg,
33 unsigned component )
34{
35 return tgsi_util_get_src_register_swizzle(
36 reg->Register,
37 component );
38}
39
40
41unsigned
42tgsi_util_get_full_src_register_sign_mode(
43 const struct tgsi_full_src_register *reg,
44 unsigned component )
45{
46 unsigned sign_mode;
47
48 if( reg->RegisterExtMod.Absolute ) {
49 /* Consider only the post-abs negation. */
50
51 if( reg->RegisterExtMod.Negate ) {
52 sign_mode = TGSI_UTIL_SIGN_SET;
53 }
54 else {
55 sign_mode = TGSI_UTIL_SIGN_CLEAR;
56 }
57 }
58 else {
59 /* Accumulate the three negations. */
60
61 unsigned negate;
62
63 negate = reg->Register.Negate;
64 if( reg->RegisterExtMod.Negate ) {
65 negate = !negate;
66 }
67
68 if( negate ) {
69 sign_mode = TGSI_UTIL_SIGN_TOGGLE;
70 }
71 else {
72 sign_mode = TGSI_UTIL_SIGN_KEEP;
73 }
74 }
75
76 return sign_mode;
77}
diff --git a/src/gallium/drivers/cell/spu/spu_vertex_fetch.c b/src/gallium/drivers/cell/spu/spu_vertex_fetch.c
deleted file mode 100644
index 087963960df..00000000000
--- a/src/gallium/drivers/cell/spu/spu_vertex_fetch.c
+++ /dev/null
@@ -1,146 +0,0 @@
1/**************************************************************************
2 *
3 * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas.
4 * (C) Copyright IBM Corporation 2008
5 * All Rights Reserved.
6 *
7 * Permission is hereby granted, free of charge, to any person obtaining a
8 * copy of this software and associated documentation files (the
9 * "Software"), to deal in the Software without restriction, including
10 * without limitation the rights to use, copy, modify, merge, publish,
11 * distribute, sub license, and/or sell copies of the Software, and to
12 * permit persons to whom the Software is furnished to do so, subject to
13 * the following conditions:
14 *
15 * The above copyright notice and this permission notice (including the
16 * next paragraph) shall be included in all copies or substantial portions
17 * of the Software.
18 *
19 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
20 * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
21 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
22 * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
23 * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
24 * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
25 * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
26 *
27 **************************************************************************/
28
29 /*
30 * Authors:
31 * Keith Whitwell <keith@tungstengraphics.com>
32 * Ian Romanick <idr@us.ibm.com>
33 */
34
35#include "pipe/p_state.h"
36#include "pipe/p_shader_tokens.h"
37#include "spu_exec.h"
38#include "spu_vertex_shader.h"
39#include "spu_main.h"
40#include "spu_dcache.h"
41
42typedef void (*spu_fetch_func)(qword *out, const qword *in,
43 const qword *shuffle_data);
44
45
46PIPE_ALIGN_VAR(16) static const qword
47fetch_shuffle_data[5] = {
48 /* Shuffle used by CVT_64_FLOAT
49 */
50 {
51 0x00, 0x01, 0x02, 0x03, 0x10, 0x11, 0x12, 0x13,
52 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80,
53 },
54
55 /* Shuffle used by CVT_8_USCALED and CVT_8_SSCALED
56 */
57 {
58 0x00, 0x80, 0x80, 0x80, 0x01, 0x80, 0x80, 0x80,
59 0x02, 0x80, 0x80, 0x80, 0x03, 0x80, 0x80, 0x80,
60 },
61
62 /* Shuffle used by CVT_16_USCALED and CVT_16_SSCALED
63 */
64 {
65 0x00, 0x01, 0x80, 0x80, 0x02, 0x03, 0x80, 0x80,
66 0x04, 0x05, 0x80, 0x80, 0x06, 0x07, 0x80, 0x80,
67 },
68
69 /* High value shuffle used by trans4x4.
70 */
71 {
72 0x00, 0x01, 0x02, 0x03, 0x10, 0x11, 0x12, 0x13,
73 0x04, 0x05, 0x06, 0x07, 0x14, 0x15, 0x16, 0x17
74 },
75
76 /* Low value shuffle used by trans4x4.
77 */
78 {
79 0x08, 0x09, 0x0A, 0x0B, 0x18, 0x19, 0x1A, 0x1B,
80 0x0C, 0x0D, 0x0E, 0x0F, 0x1C, 0x1D, 0x1E, 0x1F
81 }
82};
83
84
85/**
86 * Fetch vertex attributes for 'count' vertices.
87 */
88static void generic_vertex_fetch(struct spu_vs_context *draw,
89 struct spu_exec_machine *machine,
90 const unsigned *elts,
91 unsigned count)
92{
93 unsigned nr_attrs = draw->vertex_fetch.nr_attrs;
94 unsigned attr;
95
96 ASSERT(count <= 4);
97
98#if DRAW_DBG
99 printf("SPU: %s count = %u, nr_attrs = %u\n",
100 __FUNCTION__, count, nr_attrs);
101#endif
102
103 /* loop over vertex attributes (vertex shader inputs)
104 */
105 for (attr = 0; attr < nr_attrs; attr++) {
106 const unsigned pitch = draw->vertex_fetch.pitch[attr];
107 const uint64_t src = draw->vertex_fetch.src_ptr[attr];
108 const spu_fetch_func fetch = (spu_fetch_func)
109 (draw->vertex_fetch.code + draw->vertex_fetch.code_offset[attr]);
110 unsigned i;
111 unsigned idx;
112 const unsigned bytes_per_entry = draw->vertex_fetch.size[attr];
113 const unsigned quads_per_entry = (bytes_per_entry + 15) / 16;
114 PIPE_ALIGN_VAR(16) qword in[2 * 4];
115
116
117 /* Fetch four attributes for four vertices.
118 */
119 idx = 0;
120 for (i = 0; i < count; i++) {
121 const uint64_t addr = src + (elts[i] * pitch);
122
123#if DRAW_DBG
124 printf("SPU: fetching = 0x%llx\n", addr);
125#endif
126
127 spu_dcache_fetch_unaligned(& in[idx], addr, bytes_per_entry);
128 idx += quads_per_entry;
129 }
130
131 /* Be nice and zero out any missing vertices.
132 */
133 (void) memset(& in[idx], 0, (8 - idx) * sizeof(qword));
134
135
136 /* Convert all 4 vertices to vectors of float.
137 */
138 (*fetch)(&machine->Inputs[attr].xyzw[0].q, in, fetch_shuffle_data);
139 }
140}
141
142
143void spu_update_vertex_fetch( struct spu_vs_context *draw )
144{
145 draw->vertex_fetch.fetch_func = generic_vertex_fetch;
146}
diff --git a/src/gallium/drivers/cell/spu/spu_vertex_shader.c b/src/gallium/drivers/cell/spu/spu_vertex_shader.c
deleted file mode 100644
index d6febd36f41..00000000000
--- a/src/gallium/drivers/cell/spu/spu_vertex_shader.c
+++ /dev/null
@@ -1,245 +0,0 @@
1/**************************************************************************
2 *
3 * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas.
4 * All Rights Reserved.
5 *
6 * Permission is hereby granted, free of charge, to any person obtaining a
7 * copy of this software and associated documentation files (the
8 * "Software"), to deal in the Software without restriction, including
9 * without limitation the rights to use, copy, modify, merge, publish,
10 * distribute, sub license, and/or sell copies of the Software, and to
11 * permit persons to whom the Software is furnished to do so, subject to
12 * the following conditions:
13 *
14 * The above copyright notice and this permission notice (including the
15 * next paragraph) shall be included in all copies or substantial portions
16 * of the Software.
17 *
18 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
19 * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
20 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
21 * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
22 * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
23 * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
24 * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
25 *
26 **************************************************************************/
27
28 /*
29 * Authors:
30 * Keith Whitwell <keith@tungstengraphics.com>
31 * Brian Paul
32 * Ian Romanick <idr@us.ibm.com>
33 */
34
35#include <spu_mfcio.h>
36
37#include "pipe/p_state.h"
38#include "pipe/p_shader_tokens.h"
39#include "util/u_math.h"
40#include "draw/draw_private.h"
41#include "draw/draw_context.h"
42#include "cell/common.h"
43#include "spu_vertex_shader.h"
44#include "spu_exec.h"
45#include "spu_main.h"
46
47
48#define MAX_VERTEX_SIZE ((2 + PIPE_MAX_SHADER_OUTPUTS) * 4 * sizeof(float))
49
50
51#define CLIP_RIGHT_BIT 0x01
52#define CLIP_LEFT_BIT 0x02
53#define CLIP_TOP_BIT 0x04
54#define CLIP_BOTTOM_BIT 0x08
55#define CLIP_FAR_BIT 0x10
56#define CLIP_NEAR_BIT 0x20
57
58
59static INLINE float
60dot4(const float *a, const float *b)
61{
62 return (a[0]*b[0] +
63 a[1]*b[1] +
64 a[2]*b[2] +
65 a[3]*b[3]);
66}
67
68static INLINE unsigned
69compute_clipmask(const float *clip, /*const*/ float plane[][4], unsigned nr)
70{
71 unsigned mask = 0;
72 unsigned i;
73
74 /* Do the hardwired planes first:
75 */
76 if (-clip[0] + clip[3] < 0) mask |= CLIP_RIGHT_BIT;
77 if ( clip[0] + clip[3] < 0) mask |= CLIP_LEFT_BIT;
78 if (-clip[1] + clip[3] < 0) mask |= CLIP_TOP_BIT;
79 if ( clip[1] + clip[3] < 0) mask |= CLIP_BOTTOM_BIT;
80 if (-clip[2] + clip[3] < 0) mask |= CLIP_FAR_BIT;
81 if ( clip[2] + clip[3] < 0) mask |= CLIP_NEAR_BIT;
82
83 /* Followed by any remaining ones:
84 */
85 for (i = 6; i < nr; i++) {
86 if (dot4(clip, plane[i]) < 0)
87 mask |= (1<<i);
88 }
89
90 return mask;
91}
92
93
94/**
95 * Transform vertices with the current vertex program/shader
96 * Up to four vertices can be shaded at a time.
97 * \param vbuffer the input vertex data
98 * \param elts indexes of four input vertices
99 * \param count number of vertices to shade [1..4]
100 * \param vOut array of pointers to four output vertices
101 */
102static void
103run_vertex_program(struct spu_vs_context *draw,
104 unsigned elts[4], unsigned count,
105 const uint64_t *vOut)
106{
107 struct spu_exec_machine *machine = &draw->machine;
108 unsigned int j;
109
110 PIPE_ALIGN_VAR(16) struct spu_exec_vector inputs[PIPE_MAX_ATTRIBS];
111 PIPE_ALIGN_VAR(16) struct spu_exec_vector outputs[PIPE_MAX_ATTRIBS];
112 const float *scale = draw->viewport.scale;
113 const float *trans = draw->viewport.translate;
114
115 ASSERT(count <= 4);
116
117 machine->Processor = TGSI_PROCESSOR_VERTEX;
118
119 ASSERT_ALIGN16(draw->constants);
120 machine->Consts = (float (*)[4]) draw->constants;
121
122 machine->Inputs = inputs;
123 machine->Outputs = outputs;
124
125 spu_vertex_fetch( draw, machine, elts, count );
126
127 /* run shader */
128 spu_exec_machine_run( machine );
129
130
131 /* store machine results */
132 for (j = 0; j < count; j++) {
133 unsigned slot;
134 float x, y, z, w;
135 PIPE_ALIGN_VAR(16)
136 unsigned char buffer[sizeof(struct vertex_header)
137 + MAX_VERTEX_SIZE];
138 struct vertex_header *const tmpOut =
139 (struct vertex_header *) buffer;
140 const unsigned vert_size = ROUNDUP16(sizeof(struct vertex_header)
141 + (sizeof(float) * 4
142 * draw->num_vs_outputs));
143
144 mfc_get(tmpOut, vOut[j], vert_size, TAG_VERTEX_BUFFER, 0, 0);
145 wait_on_mask(1 << TAG_VERTEX_BUFFER);
146
147
148 /* Handle attr[0] (position) specially:
149 *
150 * XXX: Computing the clipmask should be done in the vertex
151 * program as a set of DP4 instructions appended to the
152 * user-provided code.
153 */
154 x = tmpOut->clip[0] = machine->Outputs[0].xyzw[0].f[j];
155 y = tmpOut->clip[1] = machine->Outputs[0].xyzw[1].f[j];
156 z = tmpOut->clip[2] = machine->Outputs[0].xyzw[2].f[j];
157 w = tmpOut->clip[3] = machine->Outputs[0].xyzw[3].f[j];
158
159 tmpOut->clipmask = compute_clipmask(tmpOut->clip, draw->plane,
160 draw->nr_planes);
161 tmpOut->edgeflag = 1;
162
163 /* divide by w */
164 w = 1.0f / w;
165 x *= w;
166 y *= w;
167 z *= w;
168
169 /* Viewport mapping */
170 tmpOut->data[0][0] = x * scale[0] + trans[0];
171 tmpOut->data[0][1] = y * scale[1] + trans[1];
172 tmpOut->data[0][2] = z * scale[2] + trans[2];
173 tmpOut->data[0][3] = w;
174
175 /* Remaining attributes are packed into sequential post-transform
176 * vertex attrib slots.
177 */
178 for (slot = 1; slot < draw->num_vs_outputs; slot++) {
179 tmpOut->data[slot][0] = machine->Outputs[slot].xyzw[0].f[j];
180 tmpOut->data[slot][1] = machine->Outputs[slot].xyzw[1].f[j];
181 tmpOut->data[slot][2] = machine->Outputs[slot].xyzw[2].f[j];
182 tmpOut->data[slot][3] = machine->Outputs[slot].xyzw[3].f[j];
183 }
184
185 mfc_put(tmpOut, vOut[j], vert_size, TAG_VERTEX_BUFFER, 0, 0);
186 } /* loop over vertices */
187}
188
189
190PIPE_ALIGN_VAR(16) unsigned char
191immediates[(sizeof(float) * 4 * TGSI_EXEC_NUM_IMMEDIATES) + 32];
192
193
194void
195spu_bind_vertex_shader(struct spu_vs_context *draw,
196 struct cell_shader_info *vs)
197{
198 const unsigned immediate_addr = vs->immediates;
199 const unsigned immediate_size =
200 ROUNDUP16((sizeof(float) * 4 * vs->num_immediates)
201 + (immediate_addr & 0x0f));
202
203
204 mfc_get(immediates, immediate_addr & ~0x0f, immediate_size,
205 TAG_VERTEX_BUFFER, 0, 0);
206
207 draw->machine.Instructions = (struct tgsi_full_instruction *)
208 vs->instructions;
209 draw->machine.NumInstructions = vs->num_instructions;
210
211 draw->machine.Declarations = (struct tgsi_full_declaration *)
212 vs->declarations;
213 draw->machine.NumDeclarations = vs->num_declarations;
214
215 draw->num_vs_outputs = vs->num_outputs;
216
217 /* specify the shader to interpret/execute */
218 spu_exec_machine_init(&draw->machine,
219 PIPE_MAX_SAMPLERS,
220 NULL /*samplers*/,
221 PIPE_SHADER_VERTEX);
222
223 wait_on_mask(1 << TAG_VERTEX_BUFFER);
224
225 (void) memcpy(& draw->machine.Imms, &immediates[immediate_addr & 0x0f],
226 sizeof(float) * 4 * vs->num_immediates);
227}
228
229
230void
231spu_execute_vertex_shader(struct spu_vs_context *draw,
232 const struct cell_command_vs *vs)
233{
234 unsigned i;
235
236 (void) memcpy(draw->plane, vs->plane, sizeof(float) * 4 * vs->nr_planes);
237 draw->nr_planes = vs->nr_planes;
238 draw->vertex_fetch.nr_attrs = vs->nr_attrs;
239
240 for (i = 0; i < vs->num_elts; i += 4) {
241 const unsigned batch_size = MIN2(vs->num_elts - i, 4);
242
243 run_vertex_program(draw, & vs->elts[i], batch_size, &vs->vOut[i]);
244 }
245}
diff --git a/src/gallium/drivers/cell/spu/spu_vertex_shader.h b/src/gallium/drivers/cell/spu/spu_vertex_shader.h
deleted file mode 100644
index 4c74f5e74d5..00000000000
--- a/src/gallium/drivers/cell/spu/spu_vertex_shader.h
+++ /dev/null
@@ -1,66 +0,0 @@
1#ifndef SPU_VERTEX_SHADER_H
2#define SPU_VERTEX_SHADER_H
3
4#include "cell/common.h"
5#include "pipe/p_format.h"
6#include "spu_exec.h"
7
8struct spu_vs_context;
9
10typedef void (*spu_full_fetch_func)( struct spu_vs_context *draw,
11 struct spu_exec_machine *machine,
12 const unsigned *elts,
13 unsigned count );
14
15struct spu_vs_context {
16 struct pipe_viewport_state viewport;
17
18 struct {
19 uint64_t src_ptr[PIPE_MAX_ATTRIBS];
20 unsigned pitch[PIPE_MAX_ATTRIBS];
21 unsigned size[PIPE_MAX_ATTRIBS];
22 unsigned code_offset[PIPE_MAX_ATTRIBS];
23 unsigned nr_attrs;
24 boolean dirty;
25
26 spu_full_fetch_func fetch_func;
27 void *code;
28 } vertex_fetch;
29
30 /* Clip derived state:
31 */
32 float plane[12][4];
33 unsigned nr_planes;
34
35 struct spu_exec_machine machine;
36 const float (*constants)[4];
37
38 unsigned num_vs_outputs;
39};
40
41extern void spu_update_vertex_fetch(struct spu_vs_context *draw);
42
43static INLINE void spu_vertex_fetch(struct spu_vs_context *draw,
44 struct spu_exec_machine *machine,
45 const unsigned *elts,
46 unsigned count)
47{
48 if (draw->vertex_fetch.dirty) {
49 spu_update_vertex_fetch(draw);
50 draw->vertex_fetch.dirty = 0;
51 }
52
53 (*draw->vertex_fetch.fetch_func)(draw, machine, elts, count);
54}
55
56struct cell_command_vs;
57
58extern void
59spu_bind_vertex_shader(struct spu_vs_context *draw,
60 struct cell_shader_info *vs);
61
62extern void
63spu_execute_vertex_shader(struct spu_vs_context *draw,
64 const struct cell_command_vs *vs);
65
66#endif /* SPU_VERTEX_SHADER_H */
diff --git a/src/gallium/targets/libgl-xlib/Makefile b/src/gallium/targets/libgl-xlib/Makefile
index f8f6c81b3f2..867b2da323b 100644
--- a/src/gallium/targets/libgl-xlib/Makefile
+++ b/src/gallium/targets/libgl-xlib/Makefile
@@ -29,7 +29,6 @@ DEFINES += \
29 -DGALLIUM_RBUG \ 29 -DGALLIUM_RBUG \
30 -DGALLIUM_TRACE \ 30 -DGALLIUM_TRACE \
31 -DGALLIUM_GALAHAD 31 -DGALLIUM_GALAHAD
32#-DGALLIUM_CELL will be defined by the config */
33 32
34XLIB_TARGET_SOURCES = \ 33XLIB_TARGET_SOURCES = \
35 xlib.c 34 xlib.c
@@ -38,7 +37,6 @@ XLIB_TARGET_SOURCES = \
38XLIB_TARGET_OBJECTS = $(XLIB_TARGET_SOURCES:.c=.o) 37XLIB_TARGET_OBJECTS = $(XLIB_TARGET_SOURCES:.c=.o)
39 38
40 39
41# Note: CELL_SPU_LIB is only defined for cell configs
42 40
43LIBS = \ 41LIBS = \
44 $(GALLIUM_DRIVERS) \ 42 $(GALLIUM_DRIVERS) \
@@ -50,7 +48,6 @@ LIBS = \
50 $(TOP)/src/mapi/glapi/libglapi.a \ 48 $(TOP)/src/mapi/glapi/libglapi.a \
51 $(TOP)/src/mesa/libmesagallium.a \ 49 $(TOP)/src/mesa/libmesagallium.a \
52 $(GALLIUM_AUXILIARIES) \ 50 $(GALLIUM_AUXILIARIES) \
53 $(CELL_SPU_LIB) \
54 51
55 52
56# LLVM 53# LLVM
diff --git a/src/gallium/targets/libgl-xlib/SConscript b/src/gallium/targets/libgl-xlib/SConscript
index ad8b0992e46..25a4582d7a3 100644
--- a/src/gallium/targets/libgl-xlib/SConscript
+++ b/src/gallium/targets/libgl-xlib/SConscript
@@ -42,11 +42,6 @@ if True:
42if env['llvm']: 42if env['llvm']:
43 env.Append(CPPDEFINES = ['GALLIUM_LLVMPIPE']) 43 env.Append(CPPDEFINES = ['GALLIUM_LLVMPIPE'])
44 env.Prepend(LIBS = [llvmpipe]) 44 env.Prepend(LIBS = [llvmpipe])
45
46if False:
47 # TODO: Detect Cell SDK
48 env.Append(CPPDEFINES = 'GALLIUM_CELL')
49 env.Prepend(LIBS = [cell])
50 45
51# libGL.so.1.5 46# libGL.so.1.5
52libgl_1_5 = env.SharedLibrary( 47libgl_1_5 = env.SharedLibrary(
diff --git a/src/gallium/targets/libgl-xlib/xlib.c b/src/gallium/targets/libgl-xlib/xlib.c
index 1a5892b94a0..0ede7e6096b 100644
--- a/src/gallium/targets/libgl-xlib/xlib.c
+++ b/src/gallium/targets/libgl-xlib/xlib.c
@@ -42,7 +42,7 @@
42 42
43 43
44/* Helper function to build a subset of a driver stack consisting of 44/* Helper function to build a subset of a driver stack consisting of
45 * one of the software rasterizers (cell, llvmpipe, softpipe) and the 45 * one of the software rasterizers (llvmpipe, softpipe) and the
46 * xlib winsys. 46 * xlib winsys.
47 */ 47 */
48static struct pipe_screen * 48static struct pipe_screen *