summaryrefslogtreecommitdiff
path: root/src/sna/kgem_debug_gen3.c
diff options
context:
space:
mode:
authorChris Wilson <chris@chris-wilson.co.uk>2011-04-08 07:17:14 +0100
committerChris Wilson <chris@chris-wilson.co.uk>2011-06-04 09:19:46 +0100
commitbcef98af561939aa48d9236b2dfa2c5626adf4cb (patch)
tree9d05558947a97595a6fdece968b50eeae45bbfb1 /src/sna/kgem_debug_gen3.c
parent340cfb7f5271fd1df4c8948e5c9336f5b69a6e6c (diff)
sna: Introduce a new acceleration model.
The premise is that switching between rings (i.e. the BLT and RENDER rings) on SandyBridge imposes a large latency overhead whilst rendering. The cause is that in order to switch rings, we need to split the batch earlier than is desired and to add serialisation between the rings. Both of which incur large overhead. By switching to using a pure 3D blit engine (ok, not so pure as the BLT engine still has uses for the core drawing model which can not be easily represented without a combinatorial explosion of shaders) we can take advantage of additional efficiencies, such as relative relocations, that have been incorporated into recent hardware advances. However, even older hardware performs better from avoiding the implicit context switches and from the batching efficiency of the 3D pipeline... But this is X, and PolyGlyphBlt still exists and remains in use. So for the operations that are not worth accelerating in hardware, we introduce a shadow buffer mechanism through out and reintroduce pixmap migration. Doing this efficiently is the cornerstone of ensuring that we do exploit the increased potential of recent hardware for running old applications and environments (i.e. so that the latest and greatest chip is actually faster than gen2!) For the curious, sna is SandyBridge's New Acceleration. If you are running older chipsets and welcome the performance increase offered by this patch, then you may choose to call it Snazzy instead. Speedups ======== gen3 firefox-fishtank 1203584.56 (1203842.75 0.01%) -> 85561.71 (125146.44 14.87%): 14.07x speedup gen5 grads-heat-map 3385.42 (3489.73 1.44%) -> 350.29 (350.75 0.18%): 9.66x speedup gen3 xfce4-terminal-a1 4179.02 (4180.09 0.06%) -> 503.90 (531.88 4.48%): 8.29x speedup gen4 grads-heat-map 2458.66 (2826.34 4.64%) -> 348.82 (349.20 0.29%): 7.05x speedup gen3 grads-heat-map 1443.33 (1445.32 0.09%) -> 298.55 (298.76 0.05%): 4.83x speedup gen3 swfdec-youtube 3836.14 (3894.14 0.95%) -> 889.84 (979.56 5.99%): 4.31x speedup gen6 grads-heat-map 742.11 (744.44 0.15%) -> 172.51 (172.93 0.20%): 4.30x speedup gen3 firefox-talos-svg 71740.44 (72370.13 0.59%) -> 21959.29 (21995.09 0.68%): 3.27x speedup gen5 gvim 8045.51 (8071.47 0.17%) -> 2589.38 (3246.78 10.74%): 3.11x speedup gen6 poppler 3800.78 (3817.92 0.24%) -> 1227.36 (1230.12 0.30%): 3.10x speedup gen6 gnome-terminal-vim 9106.84 (9111.56 0.03%) -> 3459.49 (3478.52 0.25%): 2.63x speedup gen5 midori-zoomed 9564.53 (9586.58 0.17%) -> 3677.73 (3837.02 2.02%): 2.60x speedup gen5 gnome-terminal-vim 38167.25 (38215.82 0.08%) -> 14901.09 (14902.28 0.01%): 2.56x speedup gen5 poppler 13575.66 (13605.04 0.16%) -> 5554.27 (5555.84 0.01%): 2.44x speedup gen5 swfdec-giant-steps 8941.61 (8988.72 0.52%) -> 3851.98 (3871.01 0.93%): 2.32x speedup gen5 xfce4-terminal-a1 18956.60 (18986.90 0.07%) -> 8362.75 (8365.70 0.01%): 2.27x speedup gen5 firefox-fishtank 88750.31 (88858.23 0.14%) -> 39164.57 (39835.54 0.80%): 2.27x speedup gen3 midori-zoomed 2392.13 (2397.82 0.14%) -> 1109.96 (1303.10 30.35%): 2.16x speedup gen6 gvim 2510.34 (2513.34 0.20%) -> 1200.76 (1204.30 0.22%): 2.09x speedup gen5 firefox-planet-gnome 40478.16 (40565.68 0.09%) -> 19606.22 (19648.79 0.16%): 2.06x speedup gen5 gnome-system-monitor 10344.47 (10385.62 0.29%) -> 5136.69 (5256.85 1.15%): 2.01x speedup gen3 poppler 2595.23 (2603.10 0.17%) -> 1297.56 (1302.42 0.61%): 2.00x speedup gen6 firefox-talos-gfx 7184.03 (7194.97 0.13%) -> 3806.31 (3811.66 0.06%): 1.89x speedup gen5 evolution 8739.25 (8766.12 0.27%) -> 4817.54 (5050.96 1.54%): 1.81x speedup gen3 evolution 1684.06 (1696.88 0.35%) -> 1004.99 (1008.55 0.85%): 1.68x speedup gen3 gnome-terminal-vim 4285.13 (4287.68 0.04%) -> 2715.97 (3202.17 13.52%): 1.58x speedup gen5 swfdec-youtube 5843.94 (5951.07 0.91%) -> 3810.86 (3826.04 1.32%): 1.53x speedup gen4 poppler 7496.72 (7558.83 0.58%) -> 5125.08 (5247.65 1.44%): 1.46x speedup gen4 gnome-terminal-vim 21126.24 (21292.08 0.85%) -> 14590.25 (15066.33 1.80%): 1.45x speedup gen5 firefox-talos-svg 99873.69 (100300.95 0.37%) -> 70745.66 (70818.86 0.05%): 1.41x speedup gen4 firefox-planet-gnome 28205.10 (28304.45 0.27%) -> 19996.11 (20081.44 0.56%): 1.41x speedup gen5 firefox-talos-gfx 93070.85 (93194.72 0.10%) -> 67687.93 (70374.37 1.30%): 1.37x speedup gen4 evolution 6696.25 (6854.14 0.85%) -> 4958.62 (5027.73 0.85%): 1.35x speedup gen3 swfdec-giant-steps 2538.03 (2539.30 0.04%) -> 1895.71 (2050.62 62.43%): 1.34x speedup gen4 gvim 4356.18 (4422.78 0.70%) -> 3276.31 (3281.69 0.13%): 1.33x speedup gen6 evolution 1242.13 (1245.44 0.72%) -> 953.76 (954.54 0.07%): 1.30x speedup gen6 firefox-planet-gnome 4554.23 (4560.69 0.08%) -> 3758.76 (3768.97 0.28%): 1.21x speedup gen3 firefox-talos-gfx 6264.13 (6284.65 0.30%) -> 5261.56 (5370.87 1.28%): 1.19x speedup gen4 midori-zoomed 4771.13 (4809.90 0.73%) -> 4037.03 (4118.93 0.85%): 1.18x speedup gen6 swfdec-giant-steps 1557.06 (1560.13 0.12%) -> 1336.34 (1341.29 0.32%): 1.17x speedup gen4 firefox-talos-gfx 80767.28 (80986.31 0.17%) -> 69629.08 (69721.71 0.06%): 1.16x speedup gen6 midori-zoomed 1463.70 (1463.76 0.08%) -> 1331.45 (1336.56 0.22%): 1.10x speedup Slowdowns ========= gen6 xfce4-terminal-a1 2030.25 (2036.23 0.25%) -> 2144.60 (2240.31 4.29%): 1.06x slowdown gen4 swfdec-youtube 3580.00 (3597.23 3.92%) -> 3826.90 (3862.24 0.91%): 1.07x slowdown gen4 firefox-talos-svg 66112.25 (66256.51 0.11%) -> 71433.40 (71584.31 0.14%): 1.08x slowdown gen4 gnome-system-monitor 5691.60 (5724.03 0.56%) -> 6707.56 (6747.83 0.33%): 1.18x slowdown gen3 ocitysmap 3494.05 (3502.44 0.20%) -> 4321.99 (4524.42 2.78%): 1.24x slowdown gen4 ocitysmap 3628.42 (3641.66 9.37%) -> 5177.16 (5828.74 8.38%): 1.43x slowdown gen5 ocitysmap 4027.77 (4068.11 0.80%) -> 5748.26 (6282.25 7.38%): 1.43x slowdown gen6 ocitysmap 1401.61 (1402.24 0.40%) -> 2365.74 (2379.14 4.12%): 1.69x slowdown [Note the performance regression for ocitysmap comes from that we now attempt to support rendering to and (more importantly) from large surfaces. By enabling such operations is the only way to one day be faster than purely using the CPU, in the meantime we suffer regression due to the increased migration and aperture thrashing. The other couple of regressions will be eliminated with improved span and shader support, now that the framework for such is in place.] The performance increase for Cairo completely overlooks the other critical aspects of the architecture: World of Padman: gen3 (800x600): 57.5 -> 96.2 gen4 (800x600): 47.8 -> 74.6 gen6 (1366x768): 100.4 -> 140.3 [F15] 144.3 -> 146.4 [drm-intel-next] x11perf (gen6); aa10text: 3.47 -> 14.3 Mglyphs/s [unthrottled!] copywinwin10: 1.66 -> 1.99 Mops/s copywinpix10: 2.28 -> 2.98 Mops/s And we do not have a good measure for how much improvement the reworking of the fallback paths give, except that xterm is now over 4x faster... PS: This depends upon the Xorg patchset "Remove the cacheing of the last scratch PixmapRec" for correct invalidations of scratch Pixmaps (used by the dix to implement SHM operations, used by chromium and gtk+ pixbufs. PPS: ./configure --enable-sna Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
Diffstat (limited to 'src/sna/kgem_debug_gen3.c')
-rw-r--r--src/sna/kgem_debug_gen3.c1615
1 files changed, 1615 insertions, 0 deletions
diff --git a/src/sna/kgem_debug_gen3.c b/src/sna/kgem_debug_gen3.c
new file mode 100644
index 00000000..da1d9fc9
--- /dev/null
+++ b/src/sna/kgem_debug_gen3.c
@@ -0,0 +1,1615 @@
+/*
+ * Copyright © 2007-2011 Intel Corporation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ *
+ * Authors:
+ * Eric Anholt <eric@anholt.net>
+ * Chris Wilson <chris@chris-wilson.co.uk>
+ *
+ */
+
+#ifdef HAVE_CONFIG_H
+#include "config.h"
+#endif
+
+#include <sys/mman.h>
+#include <assert.h>
+
+#include "sna.h"
+#include "sna_reg.h"
+
+#include "gen3_render.h"
+
+#include "kgem_debug.h"
+
+enum type {
+ T_FLOAT32,
+ T_FLOAT16,
+};
+
+static struct state {
+ struct vertex_buffer {
+ int handle;
+ void *base;
+ const char *ptr;
+ int pitch;
+
+ struct kgem_bo *current;
+ } vb;
+ struct vertex_elements {
+ int offset;
+ bool valid;
+ enum type type;
+ int size;
+ uint8_t swizzle[4];
+ } ve[33];
+ int num_ve;
+} state;
+
+static float int_as_float(int i)
+{
+ union {
+ float f;
+ int i;
+ } x;
+ x.i = i;
+ return x.f;
+}
+
+static void gen3_update_vertex_buffer_addr(struct kgem *kgem,
+ uint32_t offset)
+{
+ uint32_t handle;
+ struct kgem_bo *bo = NULL;
+ void *base, *ptr;
+ int i;
+
+ offset *= sizeof(uint32_t);
+
+ for (i = 0; i < kgem->nreloc; i++)
+ if (kgem->reloc[i].offset == offset)
+ break;
+ assert(i < kgem->nreloc);
+ handle = kgem->reloc[i].target_handle;
+
+ if (handle == 0) {
+ base = kgem->batch;
+ } else {
+ list_for_each_entry(bo, &kgem->next_request->buffers, request)
+ if (bo->handle == handle)
+ break;
+ assert(&bo->request != &kgem->next_request->buffers);
+ base = kgem_bo_map(kgem, bo, PROT_READ);
+ }
+ ptr = (char *)base + kgem->reloc[i].delta;
+
+ if (state.vb.current)
+ munmap(state.vb.base, state.vb.current->size);
+
+ state.vb.current = bo;
+ state.vb.base = base;
+ state.vb.ptr = ptr;
+}
+
+static void gen3_update_vertex_buffer_pitch(struct kgem *kgem,
+ uint32_t offset)
+{
+ state.vb.pitch = kgem->batch[offset] >> 16 & 0x3f;
+ state.vb.pitch *= sizeof(uint32_t);
+}
+
+static void gen3_update_vertex_elements(struct kgem *kgem, uint32_t data)
+{
+ state.ve[1].valid = 1;
+
+ switch ((data >> 6) & 7) {
+ case 1:
+ state.ve[1].type = T_FLOAT32;
+ state.ve[1].size = 3;
+ state.ve[1].swizzle[0] = 1;
+ state.ve[1].swizzle[1] = 1;
+ state.ve[1].swizzle[2] = 1;
+ state.ve[1].swizzle[3] = 3;
+ break;
+ case 2:
+ state.ve[1].type = T_FLOAT32;
+ state.ve[1].size = 4;
+ state.ve[1].swizzle[0] = 1;
+ state.ve[1].swizzle[1] = 1;
+ state.ve[1].swizzle[2] = 1;
+ state.ve[1].swizzle[3] = 1;
+ break;
+ case 3:
+ state.ve[1].type = T_FLOAT32;
+ state.ve[1].size = 2;
+ state.ve[1].swizzle[0] = 1;
+ state.ve[1].swizzle[1] = 1;
+ state.ve[1].swizzle[2] = 2;
+ state.ve[1].swizzle[3] = 3;
+ break;
+ case 4:
+ state.ve[1].type = T_FLOAT32;
+ state.ve[1].size = 3;
+ state.ve[1].swizzle[0] = 1;
+ state.ve[1].swizzle[1] = 1;
+ state.ve[1].swizzle[2] = 3;
+ state.ve[1].swizzle[3] = 1;
+ break;
+ }
+
+ state.ve[2].valid = 0;
+ state.ve[3].valid = 0;
+}
+
+static void gen3_update_vertex_texcoords(struct kgem *kgem, uint32_t data)
+{
+ int id;
+ for (id = 0; id < 8; id++) {
+ uint32_t fmt = (data >> (id*4)) & 0xf;
+ int width;
+
+ state.ve[id+4].valid = fmt != 0xf;
+
+ width = 0;
+ switch (fmt) {
+ case 0:
+ state.ve[id+4].type = T_FLOAT32;
+ width = state.ve[id+4].size = 2;
+ break;
+ case 1:
+ state.ve[id+4].type = T_FLOAT32;
+ width = state.ve[id+4].size = 3;
+ break;
+ case 2:
+ state.ve[id+4].type = T_FLOAT32;
+ width = state.ve[id+4].size = 4;
+ break;
+ case 3:
+ state.ve[id+4].type = T_FLOAT32;
+ width = state.ve[id+4].size = 1;
+ break;
+ case 4:
+ state.ve[id+4].type = T_FLOAT16;
+ width = state.ve[id+4].size = 2;
+ break;
+ case 5:
+ state.ve[id+4].type = T_FLOAT16;
+ width = state.ve[id+4].size = 4;
+ break;
+ }
+
+ state.ve[id+4].swizzle[0] = width > 0 ? 1 : 2;
+ state.ve[id+4].swizzle[1] = width > 1 ? 1 : 2;
+ state.ve[id+4].swizzle[2] = width > 2 ? 1 : 2;
+ state.ve[id+4].swizzle[3] = width > 3 ? 1 : 2;
+ }
+}
+
+static void gen3_update_vertex_elements_offsets(struct kgem *kgem)
+{
+ int i, offset;
+
+ for (i = offset = 0; i < ARRAY_SIZE(state.ve); i++) {
+ int size;
+
+ if (!state.ve[i].valid)
+ continue;
+
+ size = 0;
+ switch (state.ve[i].type) {
+ case T_FLOAT16:
+ size = 4;
+ break;
+ case T_FLOAT32:
+ size = 4;
+ break;
+ }
+ state.ve[i].offset = offset;
+ offset += size * state.ve[i].size;
+ state.num_ve = i;
+ }
+}
+
+static void vertices_float32_out(const struct vertex_elements *ve, const float *f, int max)
+{
+ int c;
+
+ ErrorF("(");
+ for (c = 0; c < max; c++) {
+ switch (ve->swizzle[c]) {
+ case 0: ErrorF("#"); break;
+ case 1: ErrorF("%f", f[c]); break;
+ case 2: ErrorF("0.0"); break;
+ case 3: ErrorF("1.0"); break;
+ case 4: ErrorF("0x1"); break;
+ case 5: break;
+ default: ErrorF("?");
+ }
+ if (c < max-1)
+ ErrorF(", ");
+ }
+ ErrorF(")");
+}
+
+static void ve_out(const struct vertex_elements *ve, const void *ptr)
+{
+ switch (ve->type) {
+ case T_FLOAT32:
+ vertices_float32_out(ve, ptr, ve->size);
+ break;
+ case T_FLOAT16:
+ //vertices_float16_out(ve, ptr, ve->size);
+ break;
+ }
+}
+
+static void indirect_vertex_out(struct kgem *kgem, uint32_t v)
+{
+ const struct vertex_buffer *vb = &state.vb;
+ int i = 1;
+
+ do {
+ const struct vertex_elements *ve = &state.ve[i];
+ const void *ptr = vb->ptr + v * vb->pitch + ve->offset;
+
+ if (!ve->valid)
+ continue;
+
+ ve_out(ve, ptr);
+
+ while (++i <= state.num_ve && !state.ve[i].valid)
+ ;
+
+ if (i <= state.num_ve)
+ ErrorF(", ");
+ } while (i <= state.num_ve);
+}
+
+static int inline_vertex_out(struct kgem *kgem, void *base)
+{
+ const struct vertex_buffer *vb = &state.vb;
+ int i = 1;
+
+ do {
+ const struct vertex_elements *ve = &state.ve[i];
+ const void *ptr = (char *)base + ve->offset;
+
+ if (!ve->valid)
+ continue;
+
+ ve_out(ve, ptr);
+
+ while (++i <= state.num_ve && !state.ve[i].valid)
+ ;
+
+ if (i <= state.num_ve)
+ ErrorF(", ");
+ } while (i <= state.num_ve);
+
+ return vb->pitch;
+}
+
+static int
+gen3_decode_3d_1c(struct kgem *kgem, uint32_t offset)
+{
+ uint32_t *data = kgem->batch + offset;
+ uint32_t opcode;
+
+ opcode = (data[0] & 0x00f80000) >> 19;
+
+ switch (opcode) {
+ case 0x11:
+ kgem_debug_print(data, offset, 0, "3DSTATE_DEPTH_SUBRECTANGLE_DISABLE\n");
+ return 1;
+ case 0x10:
+ kgem_debug_print(data, offset, 0, "3DSTATE_SCISSOR_ENABLE %s\n",
+ data[0]&1?"enabled":"disabled");
+ return 1;
+ case 0x01:
+ kgem_debug_print(data, offset, 0, "3DSTATE_MAP_COORD_SET_I830\n");
+ return 1;
+ case 0x0a:
+ kgem_debug_print(data, offset, 0, "3DSTATE_MAP_CUBE_I830\n");
+ return 1;
+ case 0x05:
+ kgem_debug_print(data, offset, 0, "3DSTATE_MAP_TEX_STREAM_I830\n");
+ return 1;
+ }
+
+ kgem_debug_print(data, offset, 0, "3D UNKNOWN: 3d_1c opcode = 0x%x\n",
+ opcode);
+ assert(0);
+ return 1;
+}
+
+/** Sets the string dstname to describe the destination of the PS instruction */
+static void
+gen3_get_instruction_dst(uint32_t *data, int i, char *dstname, int do_mask)
+{
+ uint32_t a0 = data[i];
+ int dst_nr = (a0 >> 14) & 0xf;
+ char dstmask[8];
+ char *sat;
+
+ if (do_mask) {
+ if (((a0 >> 10) & 0xf) == 0xf) {
+ dstmask[0] = 0;
+ } else {
+ int dstmask_index = 0;
+
+ dstmask[dstmask_index++] = '.';
+ if (a0 & (1 << 10))
+ dstmask[dstmask_index++] = 'x';
+ if (a0 & (1 << 11))
+ dstmask[dstmask_index++] = 'y';
+ if (a0 & (1 << 12))
+ dstmask[dstmask_index++] = 'z';
+ if (a0 & (1 << 13))
+ dstmask[dstmask_index++] = 'w';
+ dstmask[dstmask_index++] = 0;
+ }
+
+ if (a0 & (1 << 22))
+ sat = ".sat";
+ else
+ sat = "";
+ } else {
+ dstmask[0] = 0;
+ sat = "";
+ }
+
+ switch ((a0 >> 19) & 0x7) {
+ case 0:
+ assert(dst_nr <= 15);
+ sprintf(dstname, "R%d%s%s", dst_nr, dstmask, sat);
+ break;
+ case 4:
+ assert(dst_nr == 0);
+ sprintf(dstname, "oC%s%s", dstmask, sat);
+ break;
+ case 5:
+ assert(dst_nr == 0);
+ sprintf(dstname, "oD%s%s", dstmask, sat);
+ break;
+ case 6:
+ assert(dst_nr <= 3);
+ sprintf(dstname, "U%d%s%s", dst_nr, dstmask, sat);
+ break;
+ default:
+ sprintf(dstname, "RESERVED");
+ break;
+ }
+}
+
+static char *
+gen3_get_channel_swizzle(uint32_t select)
+{
+ switch (select & 0x7) {
+ case 0:
+ return (select & 8) ? "-x" : "x";
+ case 1:
+ return (select & 8) ? "-y" : "y";
+ case 2:
+ return (select & 8) ? "-z" : "z";
+ case 3:
+ return (select & 8) ? "-w" : "w";
+ case 4:
+ return (select & 8) ? "-0" : "0";
+ case 5:
+ return (select & 8) ? "-1" : "1";
+ default:
+ return (select & 8) ? "-bad" : "bad";
+ }
+}
+
+static void
+gen3_get_instruction_src_name(uint32_t src_type, uint32_t src_nr, char *name)
+{
+ switch (src_type) {
+ case 0:
+ sprintf(name, "R%d", src_nr);
+ assert(src_nr <= 15);
+ break;
+ case 1:
+ if (src_nr < 8)
+ sprintf(name, "T%d", src_nr);
+ else if (src_nr == 8)
+ sprintf(name, "DIFFUSE");
+ else if (src_nr == 9)
+ sprintf(name, "SPECULAR");
+ else if (src_nr == 10)
+ sprintf(name, "FOG");
+ else {
+ assert(0);
+ sprintf(name, "RESERVED");
+ }
+ break;
+ case 2:
+ sprintf(name, "C%d", src_nr);
+ assert(src_nr <= 31);
+ break;
+ case 4:
+ sprintf(name, "oC");
+ assert(src_nr == 0);
+ break;
+ case 5:
+ sprintf(name, "oD");
+ assert(src_nr == 0);
+ break;
+ case 6:
+ sprintf(name, "U%d", src_nr);
+ assert(src_nr <= 3);
+ break;
+ default:
+ sprintf(name, "RESERVED");
+ assert(0);
+ break;
+ }
+}
+
+static void
+gen3_get_instruction_src0(uint32_t *data, int i, char *srcname)
+{
+ uint32_t a0 = data[i];
+ uint32_t a1 = data[i + 1];
+ int src_nr = (a0 >> 2) & 0x1f;
+ char *swizzle_x = gen3_get_channel_swizzle((a1 >> 28) & 0xf);
+ char *swizzle_y = gen3_get_channel_swizzle((a1 >> 24) & 0xf);
+ char *swizzle_z = gen3_get_channel_swizzle((a1 >> 20) & 0xf);
+ char *swizzle_w = gen3_get_channel_swizzle((a1 >> 16) & 0xf);
+ char swizzle[100];
+
+ gen3_get_instruction_src_name((a0 >> 7) & 0x7, src_nr, srcname);
+ sprintf(swizzle, ".%s%s%s%s", swizzle_x, swizzle_y, swizzle_z, swizzle_w);
+ if (strcmp(swizzle, ".xyzw") != 0)
+ strcat(srcname, swizzle);
+}
+
+static void
+gen3_get_instruction_src1(uint32_t *data, int i, char *srcname)
+{
+ uint32_t a1 = data[i + 1];
+ uint32_t a2 = data[i + 2];
+ int src_nr = (a1 >> 8) & 0x1f;
+ char *swizzle_x = gen3_get_channel_swizzle((a1 >> 4) & 0xf);
+ char *swizzle_y = gen3_get_channel_swizzle((a1 >> 0) & 0xf);
+ char *swizzle_z = gen3_get_channel_swizzle((a2 >> 28) & 0xf);
+ char *swizzle_w = gen3_get_channel_swizzle((a2 >> 24) & 0xf);
+ char swizzle[100];
+
+ gen3_get_instruction_src_name((a1 >> 13) & 0x7, src_nr, srcname);
+ sprintf(swizzle, ".%s%s%s%s", swizzle_x, swizzle_y, swizzle_z, swizzle_w);
+ if (strcmp(swizzle, ".xyzw") != 0)
+ strcat(srcname, swizzle);
+}
+
+static void
+gen3_get_instruction_src2(uint32_t *data, int i, char *srcname)
+{
+ uint32_t a2 = data[i + 2];
+ int src_nr = (a2 >> 16) & 0x1f;
+ char *swizzle_x = gen3_get_channel_swizzle((a2 >> 12) & 0xf);
+ char *swizzle_y = gen3_get_channel_swizzle((a2 >> 8) & 0xf);
+ char *swizzle_z = gen3_get_channel_swizzle((a2 >> 4) & 0xf);
+ char *swizzle_w = gen3_get_channel_swizzle((a2 >> 0) & 0xf);
+ char swizzle[100];
+
+ gen3_get_instruction_src_name((a2 >> 21) & 0x7, src_nr, srcname);
+ sprintf(swizzle, ".%s%s%s%s", swizzle_x, swizzle_y, swizzle_z, swizzle_w);
+ if (strcmp(swizzle, ".xyzw") != 0)
+ strcat(srcname, swizzle);
+}
+
+static void
+gen3_get_instruction_addr(uint32_t src_type, uint32_t src_nr, char *name)
+{
+ switch (src_type) {
+ case 0:
+ sprintf(name, "R%d", src_nr);
+ assert(src_nr <= 15);
+ break;
+ case 1:
+ if (src_nr < 8)
+ sprintf(name, "T%d", src_nr);
+ else if (src_nr == 8)
+ sprintf(name, "DIFFUSE");
+ else if (src_nr == 9)
+ sprintf(name, "SPECULAR");
+ else if (src_nr == 10)
+ sprintf(name, "FOG");
+ else {
+ assert(0);
+ sprintf(name, "RESERVED");
+ }
+ break;
+ case 4:
+ sprintf(name, "oC");
+ assert(src_nr == 0);
+ break;
+ case 5:
+ sprintf(name, "oD");
+ assert(src_nr == 0);
+ break;
+ default:
+ assert(0);
+ sprintf(name, "RESERVED");
+ break;
+ }
+}
+
+static void
+gen3_decode_alu1(uint32_t *data, uint32_t offset,
+ int i, char *instr_prefix, char *op_name)
+{
+ char dst[100], src0[100];
+
+ gen3_get_instruction_dst(data, i, dst, 1);
+ gen3_get_instruction_src0(data, i, src0);
+
+ kgem_debug_print(data, offset, i++, "%s: %s %s, %s\n", instr_prefix,
+ op_name, dst, src0);
+ kgem_debug_print(data, offset, i++, "%s\n", instr_prefix);
+ kgem_debug_print(data, offset, i++, "%s\n", instr_prefix);
+}
+
+static void
+gen3_decode_alu2(uint32_t *data, uint32_t offset,
+ int i, char *instr_prefix, char *op_name)
+{
+ char dst[100], src0[100], src1[100];
+
+ gen3_get_instruction_dst(data, i, dst, 1);
+ gen3_get_instruction_src0(data, i, src0);
+ gen3_get_instruction_src1(data, i, src1);
+
+ kgem_debug_print(data, offset, i++, "%s: %s %s, %s, %s\n", instr_prefix,
+ op_name, dst, src0, src1);
+ kgem_debug_print(data, offset, i++, "%s\n", instr_prefix);
+ kgem_debug_print(data, offset, i++, "%s\n", instr_prefix);
+}
+
+static void
+gen3_decode_alu3(uint32_t *data, uint32_t offset,
+ int i, char *instr_prefix, char *op_name)
+{
+ char dst[100], src0[100], src1[100], src2[100];
+
+ gen3_get_instruction_dst(data, i, dst, 1);
+ gen3_get_instruction_src0(data, i, src0);
+ gen3_get_instruction_src1(data, i, src1);
+ gen3_get_instruction_src2(data, i, src2);
+
+ kgem_debug_print(data, offset, i++, "%s: %s %s, %s, %s, %s\n", instr_prefix,
+ op_name, dst, src0, src1, src2);
+ kgem_debug_print(data, offset, i++, "%s\n", instr_prefix);
+ kgem_debug_print(data, offset, i++, "%s\n", instr_prefix);
+}
+
+static void
+gen3_decode_tex(uint32_t *data, uint32_t offset, int i, char *instr_prefix,
+ char *tex_name)
+{
+ uint32_t t0 = data[i];
+ uint32_t t1 = data[i + 1];
+ char dst_name[100];
+ char addr_name[100];
+ int sampler_nr;
+
+ gen3_get_instruction_dst(data, i, dst_name, 0);
+ gen3_get_instruction_addr((t1 >> 24) & 0x7,
+ (t1 >> 17) & 0xf,
+ addr_name);
+ sampler_nr = t0 & 0xf;
+
+ kgem_debug_print(data, offset, i++, "%s: %s %s, S%d, %s\n", instr_prefix,
+ tex_name, dst_name, sampler_nr, addr_name);
+ kgem_debug_print(data, offset, i++, "%s\n", instr_prefix);
+ kgem_debug_print(data, offset, i++, "%s\n", instr_prefix);
+}
+
+static void
+gen3_decode_dcl(uint32_t *data, uint32_t offset, int i, char *instr_prefix)
+{
+ uint32_t d0 = data[i];
+ char *sampletype;
+ int dcl_nr = (d0 >> 14) & 0xf;
+ char *dcl_x = d0 & (1 << 10) ? "x" : "";
+ char *dcl_y = d0 & (1 << 11) ? "y" : "";
+ char *dcl_z = d0 & (1 << 12) ? "z" : "";
+ char *dcl_w = d0 & (1 << 13) ? "w" : "";
+ char dcl_mask[10];
+
+ switch ((d0 >> 19) & 0x3) {
+ case 1:
+ sprintf(dcl_mask, ".%s%s%s%s", dcl_x, dcl_y, dcl_z, dcl_w);
+ assert (strcmp(dcl_mask, "."));
+
+ assert(dcl_nr <= 10);
+ if (dcl_nr < 8) {
+ if (strcmp(dcl_mask, ".x") != 0 &&
+ strcmp(dcl_mask, ".xy") != 0 &&
+ strcmp(dcl_mask, ".xz") != 0 &&
+ strcmp(dcl_mask, ".w") != 0 &&
+ strcmp(dcl_mask, ".xyzw") != 0) {
+ assert(0);
+ }
+ kgem_debug_print(data, offset, i++, "%s: DCL T%d%s\n", instr_prefix,
+ dcl_nr, dcl_mask);
+ } else {
+ if (strcmp(dcl_mask, ".xz") == 0)
+ assert(0);
+ else if (strcmp(dcl_mask, ".xw") == 0)
+ assert(0);
+ else if (strcmp(dcl_mask, ".xzw") == 0)
+ assert(0);
+
+ if (dcl_nr == 8) {
+ kgem_debug_print(data, offset, i++, "%s: DCL DIFFUSE%s\n", instr_prefix,
+ dcl_mask);
+ } else if (dcl_nr == 9) {
+ kgem_debug_print(data, offset, i++, "%s: DCL SPECULAR%s\n", instr_prefix,
+ dcl_mask);
+ } else if (dcl_nr == 10) {
+ kgem_debug_print(data, offset, i++, "%s: DCL FOG%s\n", instr_prefix,
+ dcl_mask);
+ }
+ }
+ kgem_debug_print(data, offset, i++, "%s\n", instr_prefix);
+ kgem_debug_print(data, offset, i++, "%s\n", instr_prefix);
+ break;
+ case 3:
+ switch ((d0 >> 22) & 0x3) {
+ case 0:
+ sampletype = "2D";
+ break;
+ case 1:
+ sampletype = "CUBE";
+ break;
+ case 2:
+ sampletype = "3D";
+ break;
+ default:
+ sampletype = "RESERVED";
+ break;
+ }
+ assert(dcl_nr <= 15);
+ kgem_debug_print(data, offset, i++, "%s: DCL S%d %s\n", instr_prefix,
+ dcl_nr, sampletype);
+ kgem_debug_print(data, offset, i++, "%s\n", instr_prefix);
+ kgem_debug_print(data, offset, i++, "%s\n", instr_prefix);
+ break;
+ default:
+ kgem_debug_print(data, offset, i++, "%s: DCL RESERVED%d\n", instr_prefix, dcl_nr);
+ kgem_debug_print(data, offset, i++, "%s\n", instr_prefix);
+ kgem_debug_print(data, offset, i++, "%s\n", instr_prefix);
+ }
+}
+
+static void
+gen3_decode_instruction(uint32_t *data, uint32_t offset,
+ int i, char *instr_prefix)
+{
+ switch ((data[i] >> 24) & 0x1f) {
+ case 0x0:
+ kgem_debug_print(data, offset, i++, "%s: NOP\n", instr_prefix);
+ kgem_debug_print(data, offset, i++, "%s\n", instr_prefix);
+ kgem_debug_print(data, offset, i++, "%s\n", instr_prefix);
+ break;
+ case 0x01:
+ gen3_decode_alu2(data, offset, i, instr_prefix, "ADD");
+ break;
+ case 0x02:
+ gen3_decode_alu1(data, offset, i, instr_prefix, "MOV");
+ break;
+ case 0x03:
+ gen3_decode_alu2(data, offset, i, instr_prefix, "MUL");
+ break;
+ case 0x04:
+ gen3_decode_alu3(data, offset, i, instr_prefix, "MAD");
+ break;
+ case 0x05:
+ gen3_decode_alu3(data, offset, i, instr_prefix, "DP2ADD");
+ break;
+ case 0x06:
+ gen3_decode_alu2(data, offset, i, instr_prefix, "DP3");
+ break;
+ case 0x07:
+ gen3_decode_alu2(data, offset, i, instr_prefix, "DP4");
+ break;
+ case 0x08:
+ gen3_decode_alu1(data, offset, i, instr_prefix, "FRC");
+ break;
+ case 0x09:
+ gen3_decode_alu1(data, offset, i, instr_prefix, "RCP");
+ break;
+ case 0x0a:
+ gen3_decode_alu1(data, offset, i, instr_prefix, "RSQ");
+ break;
+ case 0x0b:
+ gen3_decode_alu1(data, offset, i, instr_prefix, "EXP");
+ break;
+ case 0x0c:
+ gen3_decode_alu1(data, offset, i, instr_prefix, "LOG");
+ break;
+ case 0x0d:
+ gen3_decode_alu2(data, offset, i, instr_prefix, "CMP");
+ break;
+ case 0x0e:
+ gen3_decode_alu2(data, offset, i, instr_prefix, "MIN");
+ break;
+ case 0x0f:
+ gen3_decode_alu2(data, offset, i, instr_prefix, "MAX");
+ break;
+ case 0x10:
+ gen3_decode_alu1(data, offset, i, instr_prefix, "FLR");
+ break;
+ case 0x11:
+ gen3_decode_alu1(data, offset, i, instr_prefix, "MOD");
+ break;
+ case 0x12:
+ gen3_decode_alu1(data, offset, i, instr_prefix, "TRC");
+ break;
+ case 0x13:
+ gen3_decode_alu2(data, offset, i, instr_prefix, "SGE");
+ break;
+ case 0x14:
+ gen3_decode_alu2(data, offset, i, instr_prefix, "SLT");
+ break;
+ case 0x15:
+ gen3_decode_tex(data, offset, i, instr_prefix, "TEXLD");
+ break;
+ case 0x16:
+ gen3_decode_tex(data, offset, i, instr_prefix, "TEXLDP");
+ break;
+ case 0x17:
+ gen3_decode_tex(data, offset, i, instr_prefix, "TEXLDB");
+ break;
+ case 0x19:
+ gen3_decode_dcl(data, offset, i, instr_prefix);
+ break;
+ default:
+ kgem_debug_print(data, offset, i++, "%s: unknown\n", instr_prefix);
+ kgem_debug_print(data, offset, i++, "%s\n", instr_prefix);
+ kgem_debug_print(data, offset, i++, "%s\n", instr_prefix);
+ break;
+ }
+}
+
+static char *
+gen3_decode_compare_func(uint32_t op)
+{
+ switch (op&0x7) {
+ case 0: return "always";
+ case 1: return "never";
+ case 2: return "less";
+ case 3: return "equal";
+ case 4: return "lequal";
+ case 5: return "greater";
+ case 6: return "notequal";
+ case 7: return "gequal";
+ }
+ return "";
+}
+
+static char *
+gen3_decode_stencil_op(uint32_t op)
+{
+ switch (op&0x7) {
+ case 0: return "keep";
+ case 1: return "zero";
+ case 2: return "replace";
+ case 3: return "incr_sat";
+ case 4: return "decr_sat";
+ case 5: return "greater";
+ case 6: return "incr";
+ case 7: return "decr";
+ }
+ return "";
+}
+
+#if 0
+/* part of MODES_4 */
+static char *
+gen3_decode_logic_op(uint32_t op)
+{
+ switch (op&0xf) {
+ case 0: return "clear";
+ case 1: return "nor";
+ case 2: return "and_inv";
+ case 3: return "copy_inv";
+ case 4: return "and_rvrse";
+ case 5: return "inv";
+ case 6: return "xor";
+ case 7: return "nand";
+ case 8: return "and";
+ case 9: return "equiv";
+ case 10: return "noop";
+ case 11: return "or_inv";
+ case 12: return "copy";
+ case 13: return "or_rvrse";
+ case 14: return "or";
+ case 15: return "set";
+ }
+ return "";
+}
+#endif
+
+static char *
+gen3_decode_blend_fact(uint32_t op)
+{
+ switch (op&0xf) {
+ case 1: return "zero";
+ case 2: return "one";
+ case 3: return "src_colr";
+ case 4: return "inv_src_colr";
+ case 5: return "src_alpha";
+ case 6: return "inv_src_alpha";
+ case 7: return "dst_alpha";
+ case 8: return "inv_dst_alpha";
+ case 9: return "dst_colr";
+ case 10: return "inv_dst_colr";
+ case 11: return "src_alpha_sat";
+ case 12: return "cnst_colr";
+ case 13: return "inv_cnst_colr";
+ case 14: return "cnst_alpha";
+ case 15: return "inv_const_alpha";
+ }
+ return "";
+}
+
+static char *
+decode_tex_coord_mode(uint32_t mode)
+{
+ switch (mode&0x7) {
+ case 0: return "wrap";
+ case 1: return "mirror";
+ case 2: return "clamp_edge";
+ case 3: return "cube";
+ case 4: return "clamp_border";
+ case 5: return "mirror_once";
+ }
+ return "";
+}
+
+static char *
+gen3_decode_sample_filter(uint32_t mode)
+{
+ switch (mode&0x7) {
+ case 0: return "nearest";
+ case 1: return "linear";
+ case 2: return "anisotropic";
+ case 3: return "4x4_1";
+ case 4: return "4x4_2";
+ case 5: return "4x4_flat";
+ case 6: return "6x5_mono";
+ }
+ return "";
+}
+
+static int
+gen3_decode_load_state_immediate_1(struct kgem *kgem, uint32_t offset)
+{
+ const uint32_t *data = kgem->batch + offset;
+ int len, i, word;
+
+ kgem_debug_print(data, offset, 0, "3DSTATE_LOAD_STATE_IMMEDIATE_1\n");
+ len = (data[0] & 0x0000000f) + 2;
+ i = 1;
+ for (word = 0; word <= 8; word++) {
+ if (data[0] & (1 << (4 + word))) {
+ switch (word) {
+ case 0:
+ kgem_debug_print(data, offset, i, "S0: vbo offset: 0x%08x%s\n",
+ data[i]&(~1),data[i]&1?", auto cache invalidate disabled":"");
+ gen3_update_vertex_buffer_addr(kgem, offset + i);
+ break;
+ case 1:
+ kgem_debug_print(data, offset, i, "S1: vertex width: %i, vertex pitch: %i\n",
+ (data[i]>>24)&0x3f,(data[i]>>16)&0x3f);
+ gen3_update_vertex_buffer_pitch(kgem, offset + i);
+ break;
+ case 2:
+ {
+ char buf[200];
+ int len = 0;
+ int tex_num;
+ for (tex_num = 0; tex_num < 8; tex_num++) {
+ switch((data[i]>>tex_num*4)&0xf) {
+ case 0: len += sprintf(buf + len, "%i=2D ", tex_num); break;
+ case 1: len += sprintf(buf + len, "%i=3D ", tex_num); break;
+ case 2: len += sprintf(buf + len, "%i=4D ", tex_num); break;
+ case 3: len += sprintf(buf + len, "%i=1D ", tex_num); break;
+ case 4: len += sprintf(buf + len, "%i=2D_16 ", tex_num); break;
+ case 5: len += sprintf(buf + len, "%i=4D_16 ", tex_num); break;
+ case 0xf: len += sprintf(buf + len, "%i=NP ", tex_num); break;
+ }
+ }
+ kgem_debug_print(data, offset, i, "S2: texcoord formats: %s\n", buf);
+ gen3_update_vertex_texcoords(kgem, data[i]);
+ }
+
+ break;
+ case 3:
+ kgem_debug_print(data, offset, i, "S3: not documented\n", word);
+ break;
+ case 4:
+ {
+ char *cullmode = "";
+ char *vfmt_xyzw = "";
+ switch((data[i]>>13)&0x3) {
+ case 0: cullmode = "both"; break;
+ case 1: cullmode = "none"; break;
+ case 2: cullmode = "cw"; break;
+ case 3: cullmode = "ccw"; break;
+ }
+ switch(data[i] & (7<<6 | 1<<2)) {
+ case 1<<6: vfmt_xyzw = "XYZ,"; break;
+ case 2<<6: vfmt_xyzw = "XYZW,"; break;
+ case 3<<6: vfmt_xyzw = "XY,"; break;
+ case 4<<6: vfmt_xyzw = "XYW,"; break;
+ case 1<<6 | 1<<2: vfmt_xyzw = "XYZF,"; break;
+ case 2<<6 | 1<<2: vfmt_xyzw = "XYZWF,"; break;
+ case 3<<6 | 1<<2: vfmt_xyzw = "XYF,"; break;
+ case 4<<6 | 1<<2: vfmt_xyzw = "XYWF,"; break;
+ }
+ kgem_debug_print(data, offset, i, "S4: point_width=%i, line_width=%.1f,"
+ "%s%s%s%s%s cullmode=%s, vfmt=%s%s%s%s%s%s "
+ "%s%s\n",
+ (data[i]>>23)&0x1ff,
+ ((data[i]>>19)&0xf) / 2.0,
+ data[i]&(0xf<<15)?" flatshade=":"",
+ data[i]&(1<<18)?"Alpha,":"",
+ data[i]&(1<<17)?"Fog,":"",
+ data[i]&(1<<16)?"Specular,":"",
+ data[i]&(1<<15)?"Color,":"",
+ cullmode,
+ data[i]&(1<<12)?"PointWidth,":"",
+ data[i]&(1<<11)?"SpecFog,":"",
+ data[i]&(1<<10)?"Color,":"",
+ data[i]&(1<<9)?"DepthOfs,":"",
+ vfmt_xyzw,
+ data[i]&(1<<9)?"FogParam,":"",
+ data[i]&(1<<5)?"force default diffuse, ":"",
+ data[i]&(1<<4)?"force default specular, ":"",
+ data[i]&(1<<3)?"local depth ofs enable, ":"",
+ data[i]&(1<<1)?"point sprite enable, ":"",
+ data[i]&(1<<0)?"line AA enable, ":"");
+ gen3_update_vertex_elements(kgem, data[i]);
+ break;
+ }
+ case 5:
+ {
+ kgem_debug_print(data, offset, i, "S5:%s%s%s%s%s"
+ "%s%s%s%s stencil_ref=0x%x, stencil_test=%s, "
+ "stencil_fail=%s, stencil_pass_z_fail=%s, "
+ "stencil_pass_z_pass=%s, %s%s%s%s\n",
+ data[i]&(0xf<<28)?" write_disable=":"",
+ data[i]&(1<<31)?"Alpha,":"",
+ data[i]&(1<<30)?"Red,":"",
+ data[i]&(1<<29)?"Green,":"",
+ data[i]&(1<<28)?"Blue,":"",
+ data[i]&(1<<27)?" force default point size,":"",
+ data[i]&(1<<26)?" last pixel enable,":"",
+ data[i]&(1<<25)?" global depth ofs enable,":"",
+ data[i]&(1<<24)?" fog enable,":"",
+ (data[i]>>16)&0xff,
+ gen3_decode_compare_func(data[i]>>13),
+ gen3_decode_stencil_op(data[i]>>10),
+ gen3_decode_stencil_op(data[i]>>7),
+ gen3_decode_stencil_op(data[i]>>4),
+ data[i]&(1<<3)?"stencil write enable, ":"",
+ data[i]&(1<<2)?"stencil test enable, ":"",
+ data[i]&(1<<1)?"color dither enable, ":"",
+ data[i]&(1<<0)?"logicop enable, ":"");
+ }
+ break;
+ case 6:
+ kgem_debug_print(data, offset, i, "S6: %salpha_test=%s, alpha_ref=0x%x, "
+ "depth_test=%s, %ssrc_blnd_fct=%s, dst_blnd_fct=%s, "
+ "%s%stristrip_provoking_vertex=%i\n",
+ data[i]&(1<<31)?"alpha test enable, ":"",
+ gen3_decode_compare_func(data[i]>>28),
+ data[i]&(0xff<<20),
+ gen3_decode_compare_func(data[i]>>16),
+ data[i]&(1<<15)?"cbuf blend enable, ":"",
+ gen3_decode_blend_fact(data[i]>>8),
+ gen3_decode_blend_fact(data[i]>>4),
+ data[i]&(1<<3)?"depth write enable, ":"",
+ data[i]&(1<<2)?"cbuf write enable, ":"",
+ data[i]&(0x3));
+ break;
+ case 7:
+ kgem_debug_print(data, offset, i, "S7: depth offset constant: 0x%08x\n", data[i]);
+ break;
+ }
+ i++;
+ }
+ }
+
+ assert(len == i);
+ return len;
+}
+
+static int
+gen3_decode_3d_1d(struct kgem *kgem, uint32_t offset)
+{
+ uint32_t *data = kgem->batch + offset;
+ unsigned int len, i, c, idx, word, map, sampler, instr;
+ char *format, *zformat, *type;
+ uint32_t opcode;
+ const struct {
+ uint32_t opcode;
+ int min_len;
+ int max_len;
+ char *name;
+ } opcodes_3d_1d[] = {
+ { 0x86, 4, 4, "3DSTATE_CHROMA_KEY" },
+ { 0x88, 2, 2, "3DSTATE_CONSTANT_BLEND_COLOR" },
+ { 0x99, 2, 2, "3DSTATE_DEFAULT_DIFFUSE" },
+ { 0x9a, 2, 2, "3DSTATE_DEFAULT_SPECULAR" },
+ { 0x98, 2, 2, "3DSTATE_DEFAULT_Z" },
+ { 0x97, 2, 2, "3DSTATE_DEPTH_OFFSET_SCALE" },
+ { 0x9d, 65, 65, "3DSTATE_FILTER_COEFFICIENTS_4X4" },
+ { 0x9e, 4, 4, "3DSTATE_MONO_FILTER" },
+ { 0x89, 4, 4, "3DSTATE_FOG_MODE" },
+ { 0x8f, 2, 16, "3DSTATE_MAP_PALLETE_LOAD_32" },
+ { 0x83, 2, 2, "3DSTATE_SPAN_STIPPLE" },
+ }, *opcode_3d_1d;
+
+ opcode = (data[0] & 0x00ff0000) >> 16;
+
+ switch (opcode) {
+ case 0x07:
+ /* This instruction is unusual. A 0 length means just 1 DWORD instead of
+ * 2. The 0 length is specified in one place to be unsupported, but
+ * stated to be required in another, and 0 length LOAD_INDIRECTs appear
+ * to cause no harm at least.
+ */
+ kgem_debug_print(data, offset, 0, "3DSTATE_LOAD_INDIRECT\n");
+ len = (data[0] & 0x000000ff) + 1;
+ i = 1;
+ if (data[0] & (0x01 << 8)) {
+ kgem_debug_print(data, offset, i++, "SIS.0\n");
+ kgem_debug_print(data, offset, i++, "SIS.1\n");
+ }
+ if (data[0] & (0x02 << 8)) {
+ kgem_debug_print(data, offset, i++, "DIS.0\n");
+ }
+ if (data[0] & (0x04 << 8)) {
+ kgem_debug_print(data, offset, i++, "SSB.0\n");
+ kgem_debug_print(data, offset, i++, "SSB.1\n");
+ }
+ if (data[0] & (0x08 << 8)) {
+ kgem_debug_print(data, offset, i++, "MSB.0\n");
+ kgem_debug_print(data, offset, i++, "MSB.1\n");
+ }
+ if (data[0] & (0x10 << 8)) {
+ kgem_debug_print(data, offset, i++, "PSP.0\n");
+ kgem_debug_print(data, offset, i++, "PSP.1\n");
+ }
+ if (data[0] & (0x20 << 8)) {
+ kgem_debug_print(data, offset, i++, "PSC.0\n");
+ kgem_debug_print(data, offset, i++, "PSC.1\n");
+ }
+ assert(len == i);
+ return len;
+ case 0x04:
+ return gen3_decode_load_state_immediate_1(kgem, offset);
+ case 0x03:
+ kgem_debug_print(data, offset, 0, "3DSTATE_LOAD_STATE_IMMEDIATE_2\n");
+ len = (data[0] & 0x0000000f) + 2;
+ i = 1;
+ for (word = 6; word <= 14; word++) {
+ if (data[0] & (1 << word)) {
+ if (word == 6)
+ kgem_debug_print(data, offset, i++, "TBCF\n");
+ else if (word >= 7 && word <= 10) {
+ kgem_debug_print(data, offset, i++, "TB%dC\n", word - 7);
+ kgem_debug_print(data, offset, i++, "TB%dA\n", word - 7);
+ } else if (word >= 11 && word <= 14) {
+ kgem_debug_print(data, offset, i, "TM%dS0: offset=0x%08x, %s\n",
+ word - 11,
+ data[i]&0xfffffffe,
+ data[i]&1?"use fence":"");
+ i++;
+ kgem_debug_print(data, offset, i, "TM%dS1: height=%i, width=%i, %s\n",
+ word - 11,
+ data[i]>>21, (data[i]>>10)&0x3ff,
+ data[i]&2?(data[i]&1?"y-tiled":"x-tiled"):"");
+ i++;
+ kgem_debug_print(data, offset, i, "TM%dS2: pitch=%i, \n",
+ word - 11,
+ ((data[i]>>21) + 1)*4);
+ i++;
+ kgem_debug_print(data, offset, i++, "TM%dS3\n", word - 11);
+ kgem_debug_print(data, offset, i++, "TM%dS4: dflt color\n", word - 11);
+ }
+ }
+ }
+ assert(len == i);
+ return len;
+ case 0x00:
+ kgem_debug_print(data, offset, 0, "3DSTATE_MAP_STATE\n");
+ len = (data[0] & 0x0000003f) + 2;
+ kgem_debug_print(data, offset, 1, "mask\n");
+
+ i = 2;
+ for (map = 0; map <= 15; map++) {
+ if (data[1] & (1 << map)) {
+ int width, height, pitch, dword;
+ const char *tiling;
+
+ dword = data[i];
+ kgem_debug_print(data, offset, i++, "map %d MS2 %s%s%s\n", map,
+ dword&(1<<31)?"untrusted surface, ":"",
+ dword&(1<<1)?"vertical line stride enable, ":"",
+ dword&(1<<0)?"vertical ofs enable, ":"");
+
+ dword = data[i];
+ width = ((dword >> 10) & ((1 << 11) - 1))+1;
+ height = ((dword >> 21) & ((1 << 11) - 1))+1;
+
+ tiling = "none";
+ if (dword & (1 << 2))
+ tiling = "fenced";
+ else if (dword & (1 << 1))
+ tiling = dword & (1 << 0) ? "Y" : "X";
+ type = " BAD";
+ format = " (invalid)";
+ switch ((dword>>7) & 0x7) {
+ case 1:
+ type = "8";
+ switch ((dword>>3) & 0xf) {
+ case 0: format = "I"; break;
+ case 1: format = "L"; break;
+ case 4: format = "A"; break;
+ case 5: format = " mono"; break;
+ }
+ break;
+ case 2:
+ type = "16";
+ switch ((dword>>3) & 0xf) {
+ case 0: format = " rgb565"; break;
+ case 1: format = " argb1555"; break;
+ case 2: format = " argb4444"; break;
+ case 3: format = " ay88"; break;
+ case 5: format = " 88dvdu"; break;
+ case 6: format = " bump655"; break;
+ case 7: format = "I"; break;
+ case 8: format = "L"; break;
+ case 9: format = "A"; break;
+ }
+ break;
+ case 3:
+ type = "32";
+ switch ((dword>>3) & 0xf) {
+ case 0: format = " argb8888"; break;
+ case 1: format = " abgr8888"; break;
+ case 2: format = " xrgb8888"; break;
+ case 3: format = " xbgr8888"; break;
+ case 4: format = " qwvu8888"; break;
+ case 5: format = " axvu8888"; break;
+ case 6: format = " lxvu8888"; break;
+ case 7: format = " xlvu8888"; break;
+ case 8: format = " argb2101010"; break;
+ case 9: format = " abgr2101010"; break;
+ case 10: format = " awvu2101010"; break;
+ case 11: format = " gr1616"; break;
+ case 12: format = " vu1616"; break;
+ case 13: format = " xI824"; break;
+ case 14: format = " xA824"; break;
+ case 15: format = " xL824"; break;
+ }
+ break;
+ case 5:
+ type = "422";
+ switch ((dword>>3) & 0xf) {
+ case 0: format = " yuv_swapy"; break;
+ case 1: format = " yuv"; break;
+ case 2: format = " yuv_swapuv"; break;
+ case 3: format = " yuv_swapuvy"; break;
+ }
+ break;
+ case 6:
+ type = "compressed";
+ switch ((dword>>3) & 0x7) {
+ case 0: format = " dxt1"; break;
+ case 1: format = " dxt2_3"; break;
+ case 2: format = " dxt4_5"; break;
+ case 3: format = " fxt1"; break;
+ case 4: format = " dxt1_rb"; break;
+ }
+ break;
+ case 7:
+ type = "4b indexed";
+ switch ((dword>>3) & 0xf) {
+ case 7: format = " argb8888"; break;
+ }
+ break;
+ default:
+ format = "BAD";
+ break;
+ }
+ dword = data[i];
+ kgem_debug_print(data, offset, i++, "map %d MS3 [width=%d, height=%d, format=%s%s, tiling=%s%s]\n",
+ map, width, height, type, format, tiling,
+ dword&(1<<9)?" palette select":"");
+
+ dword = data[i];
+ pitch = 4*(((dword >> 21) & ((1 << 11) - 1))+1);
+ kgem_debug_print(data, offset, i++, "map %d MS4 [pitch=%d, max_lod=%i, vol_depth=%i, cube_face_ena=%x, %s]\n",
+ map, pitch,
+ (dword>>9)&0x3f, dword&0xff, (dword>>15)&0x3f,
+ dword&(1<<8)?"miplayout legacy":"miplayout right");
+ }
+ }
+ assert(len == i);
+ return len;
+ case 0x06:
+ kgem_debug_print(data, offset, 0, "3DSTATE_PIXEL_SHADER_CONSTANTS\n");
+ len = (data[0] & 0x000000ff) + 2;
+
+ i = 2;
+ for (c = 0; c <= 31; c++) {
+ if (data[1] & (1 << c)) {
+ kgem_debug_print(data, offset, i, "C%d.X = %f\n",
+ c, int_as_float(data[i]));
+ i++;
+ kgem_debug_print(data, offset, i, "C%d.Y = %f\n",
+ c, int_as_float(data[i]));
+ i++;
+ kgem_debug_print(data, offset, i, "C%d.Z = %f\n",
+ c, int_as_float(data[i]));
+ i++;
+ kgem_debug_print(data, offset, i, "C%d.W = %f\n",
+ c, int_as_float(data[i]));
+ i++;
+ }
+ }
+ assert(len == i);
+ return len;
+ case 0x05:
+ kgem_debug_print(data, offset, 0, "3DSTATE_PIXEL_SHADER_PROGRAM\n");
+ len = (data[0] & 0x000000ff) + 2;
+ assert(((len-1) % 3) == 0);
+ assert(len <= 370);
+ i = 1;
+ for (instr = 0; instr < (len - 1) / 3; instr++) {
+ char instr_prefix[10];
+
+ sprintf(instr_prefix, "PS%03d", instr);
+ gen3_decode_instruction(data, offset, i, instr_prefix);
+ i += 3;
+ }
+ return len;
+ case 0x01:
+ kgem_debug_print(data, offset, 0, "3DSTATE_SAMPLER_STATE\n");
+ kgem_debug_print(data, offset, 1, "mask\n");
+ len = (data[0] & 0x0000003f) + 2;
+ i = 2;
+ for (sampler = 0; sampler <= 15; sampler++) {
+ if (data[1] & (1 << sampler)) {
+ uint32_t dword;
+ char *mip_filter = "";
+ dword = data[i];
+ switch ((dword>>20)&0x3) {
+ case 0: mip_filter = "none"; break;
+ case 1: mip_filter = "nearest"; break;
+ case 3: mip_filter = "linear"; break;
+ }
+ kgem_debug_print(data, offset, i++, "sampler %d SS2:%s%s%s "
+ "base_mip_level=%i, mip_filter=%s, mag_filter=%s, min_filter=%s "
+ "lod_bias=%.2f,%s max_aniso=%i, shadow_func=%s\n", sampler,
+ dword&(1<<31)?" reverse gamma,":"",
+ dword&(1<<30)?" packed2planar,":"",
+ dword&(1<<29)?" colorspace conversion,":"",
+ (dword>>22)&0x1f,
+ mip_filter,
+ gen3_decode_sample_filter(dword>>17),
+ gen3_decode_sample_filter(dword>>14),
+ ((dword>>5)&0x1ff)/(0x10*1.0),
+ dword&(1<<4)?" shadow,":"",
+ dword&(1<<3)?4:2,
+ gen3_decode_compare_func(dword));
+ dword = data[i];
+ kgem_debug_print(data, offset, i++, "sampler %d SS3: min_lod=%.2f,%s "
+ "tcmode_x=%s, tcmode_y=%s, tcmode_z=%s,%s texmap_idx=%i,%s\n",
+ sampler, ((dword>>24)&0xff)/(0x10*1.0),
+ dword&(1<<17)?" kill pixel enable,":"",
+ decode_tex_coord_mode(dword>>12),
+ decode_tex_coord_mode(dword>>9),
+ decode_tex_coord_mode(dword>>6),
+ dword&(1<<5)?" normalized coords,":"",
+ (dword>>1)&0xf,
+ dword&(1<<0)?" deinterlacer,":"");
+ dword = data[i];
+ kgem_debug_print(data, offset, i++, "sampler %d SS4: border color\n",
+ sampler, ((dword>>24)&0xff)/(0x10*1.0),
+ dword);
+ }
+ }
+ assert(len == i);
+ return len;
+ case 0x85:
+ len = (data[0] & 0x0000000f) + 2;
+ assert(len == 2);
+
+ kgem_debug_print(data, offset, 0,
+ "3DSTATE_DEST_BUFFER_VARIABLES\n");
+
+ switch ((data[1] >> 8) & 0xf) {
+ case 0x0: format = "g8"; break;
+ case 0x1: format = "x1r5g5b5"; break;
+ case 0x2: format = "r5g6b5"; break;
+ case 0x3: format = "a8r8g8b8"; break;
+ case 0x4: format = "ycrcb_swapy"; break;
+ case 0x5: format = "ycrcb_normal"; break;
+ case 0x6: format = "ycrcb_swapuv"; break;
+ case 0x7: format = "ycrcb_swapuvy"; break;
+ case 0x8: format = "a4r4g4b4"; break;
+ case 0x9: format = "a1r5g5b5"; break;
+ case 0xa: format = "a2r10g10b10"; break;
+ default: format = "BAD"; break;
+ }
+ switch ((data[1] >> 2) & 0x3) {
+ case 0x0: zformat = "u16"; break;
+ case 0x1: zformat = "f16"; break;
+ case 0x2: zformat = "u24x8"; break;
+ default: zformat = "BAD"; break;
+ }
+ kgem_debug_print(data, offset, 1, "%s format, %s depth format, early Z %sabled\n",
+ format, zformat,
+ (data[1] & (1 << 31)) ? "en" : "dis");
+ return len;
+
+ case 0x8e:
+ {
+ const char *name, *tiling;
+
+ len = (data[0] & 0x0000000f) + 2;
+ assert(len == 3);
+
+ switch((data[1] >> 24) & 0x7) {
+ case 0x3: name = "color"; break;
+ case 0x7: name = "depth"; break;
+ default: name = "unknown"; break;
+ }
+
+ tiling = "none";
+ if (data[1] & (1 << 23))
+ tiling = "fenced";
+ else if (data[1] & (1 << 22))
+ tiling = data[1] & (1 << 21) ? "Y" : "X";
+
+ kgem_debug_print(data, offset, 0, "3DSTATE_BUFFER_INFO\n");
+ kgem_debug_print(data, offset, 1, "%s, tiling = %s, pitch=%d\n", name, tiling, data[1]&0xffff);
+
+ kgem_debug_print(data, offset, 2, "address\n");
+ return len;
+ }
+ case 0x81:
+ len = (data[0] & 0x0000000f) + 2;
+ assert(len == 3);
+
+ kgem_debug_print(data, offset, 0,
+ "3DSTATE_SCISSOR_RECTANGLE\n");
+ kgem_debug_print(data, offset, 1, "(%d,%d)\n",
+ data[1] & 0xffff, data[1] >> 16);
+ kgem_debug_print(data, offset, 2, "(%d,%d)\n",
+ data[2] & 0xffff, data[2] >> 16);
+
+ return len;
+ case 0x80:
+ len = (data[0] & 0x0000000f) + 2;
+ assert(len == 5);
+
+ kgem_debug_print(data, offset, 0,
+ "3DSTATE_DRAWING_RECTANGLE\n");
+ kgem_debug_print(data, offset, 1, "%s\n",
+ data[1]&(1<<30)?"depth ofs disabled ":"");
+ kgem_debug_print(data, offset, 2, "(%d,%d)\n",
+ data[2] & 0xffff, data[2] >> 16);
+ kgem_debug_print(data, offset, 3, "(%d,%d)\n",
+ data[3] & 0xffff, data[3] >> 16);
+ kgem_debug_print(data, offset, 4, "(%d,%d)\n",
+ (int16_t)(data[4] & 0xffff),
+ (int16_t)(data[4] >> 16));
+
+ return len;
+ case 0x9c:
+ len = (data[0] & 0x0000000f) + 2;
+ assert(len == 7);
+
+ kgem_debug_print(data, offset, 0,
+ "3DSTATE_CLEAR_PARAMETERS\n");
+ kgem_debug_print(data, offset, 1, "prim_type=%s, clear=%s%s%s\n",
+ data[1]&(1<<16)?"CLEAR_RECT":"ZONE_INIT",
+ data[1]&(1<<2)?"color,":"",
+ data[1]&(1<<1)?"depth,":"",
+ data[1]&(1<<0)?"stencil,":"");
+ kgem_debug_print(data, offset, 2, "clear color\n");
+ kgem_debug_print(data, offset, 3, "clear depth/stencil\n");
+ kgem_debug_print(data, offset, 4, "color value (rgba8888)\n");
+ kgem_debug_print(data, offset, 5, "depth value %f\n",
+ int_as_float(data[5]));
+ kgem_debug_print(data, offset, 6, "clear stencil\n");
+ return len;
+ }
+
+ for (idx = 0; idx < ARRAY_SIZE(opcodes_3d_1d); idx++) {
+ opcode_3d_1d = &opcodes_3d_1d[idx];
+ if (((data[0] & 0x00ff0000) >> 16) == opcode_3d_1d->opcode) {
+ len = (data[0] & 0xf) + 2;
+ kgem_debug_print(data, offset, 0, "%s\n", opcode_3d_1d->name);
+ for (i = 1; i < len; i++)
+ kgem_debug_print(data, offset, i, "dword %d\n", i);
+
+ return len;
+ }
+ }
+
+ kgem_debug_print(data, offset, 0, "3D UNKNOWN: 3d_1d opcode = 0x%x\n", opcode);
+ assert(0);
+ return 1;
+}
+
+#define VERTEX_OUT(fmt, ...) do { \
+ kgem_debug_print(data, offset, i, " V%d."fmt"\n", vertex, __VA_ARGS__); \
+ i++; \
+} while (0)
+
+static int
+gen3_decode_3d_primitive(struct kgem *kgem, uint32_t offset)
+{
+ uint32_t *data = kgem->batch + offset;
+ char immediate = (data[0] & (1 << 23)) == 0;
+ unsigned int len, i, ret;
+ char *primtype;
+ unsigned int vertex = 0;
+
+ switch ((data[0] >> 18) & 0xf) {
+ case 0x0: primtype = "TRILIST"; break;
+ case 0x1: primtype = "TRISTRIP"; break;
+ case 0x2: primtype = "TRISTRIP_REVERSE"; break;
+ case 0x3: primtype = "TRIFAN"; break;
+ case 0x4: primtype = "POLYGON"; break;
+ case 0x5: primtype = "LINELIST"; break;
+ case 0x6: primtype = "LINESTRIP"; break;
+ case 0x7: primtype = "RECTLIST"; break;
+ case 0x8: primtype = "POINTLIST"; break;
+ case 0x9: primtype = "DIB"; break;
+ case 0xa: primtype = "CLEAR_RECT"; assert(0); break;
+ default: primtype = "unknown"; break;
+ }
+
+ gen3_update_vertex_elements_offsets(kgem);
+
+ /* XXX: 3DPRIM_DIB not supported */
+ if (immediate) {
+ len = (data[0] & 0x0003ffff) + 2;
+ kgem_debug_print(data, offset, 0, "3DPRIMITIVE inline %s\n", primtype);
+ for (i = 1; i < len; ) {
+ ErrorF(" [%d]: ", vertex);
+ i += inline_vertex_out(kgem, data + i) / sizeof(uint32_t);
+ ErrorF("\n");
+ vertex++;
+ }
+
+ ret = len;
+ } else {
+ /* indirect vertices */
+ len = data[0] & 0x0000ffff; /* index count */
+ if (data[0] & (1 << 17)) {
+ /* random vertex access */
+ kgem_debug_print(data, offset, 0,
+ "3DPRIMITIVE random indirect %s (%d)\n", primtype, len);
+ assert(0);
+ if (len == 0) {
+ /* vertex indices continue until 0xffff is found */
+ } else {
+ /* fixed size vertex index buffer */
+ }
+ ret = (len + 1) / 2 + 1;
+ goto out;
+ } else {
+ /* sequential vertex access */
+ vertex = data[1] & 0xffff;
+ kgem_debug_print(data, offset, 0,
+ "3DPRIMITIVE sequential indirect %s, %d starting from "
+ "%d\n", primtype, len, vertex);
+ kgem_debug_print(data, offset, 1, " start\n");
+ for (i = 0; i < len; i++) {
+ ErrorF(" [%d]: ", vertex);
+ indirect_vertex_out(kgem, vertex++);
+ ErrorF("\n");
+ }
+ ret = 2;
+ goto out;
+ }
+ }
+
+out:
+ return ret;
+}
+
+int kgem_gen3_decode_3d(struct kgem *kgem, uint32_t offset)
+{
+ uint32_t opcode;
+ unsigned int idx;
+
+ struct {
+ uint32_t opcode;
+ int min_len;
+ int max_len;
+ char *name;
+ } opcodes[] = {
+ { 0x06, 1, 1, "3DSTATE_ANTI_ALIASING" },
+ { 0x08, 1, 1, "3DSTATE_BACKFACE_STENCIL_OPS" },
+ { 0x09, 1, 1, "3DSTATE_BACKFACE_STENCIL_MASKS" },
+ { 0x16, 1, 1, "3DSTATE_COORD_SET_BINDINGS" },
+ { 0x15, 1, 1, "3DSTATE_FOG_COLOR" },
+ { 0x0b, 1, 1, "3DSTATE_INDEPENDENT_ALPHA_BLEND" },
+ { 0x0d, 1, 1, "3DSTATE_MODES_4" },
+ { 0x0c, 1, 1, "3DSTATE_MODES_5" },
+ { 0x07, 1, 1, "3DSTATE_RASTERIZATION_RULES" },
+ };
+ uint32_t *data = kgem->batch + offset;
+
+ opcode = (data[0] & 0x1f000000) >> 24;
+
+ switch (opcode) {
+ case 0x1f:
+ return gen3_decode_3d_primitive(kgem, offset);
+ case 0x1d:
+ return gen3_decode_3d_1d(kgem, offset);
+ case 0x1c:
+ return gen3_decode_3d_1c(kgem, offset);
+ }
+
+ for (idx = 0; idx < ARRAY_SIZE(opcodes); idx++) {
+ if (opcode == opcodes[idx].opcode) {
+ unsigned int len = 1, i;
+
+ kgem_debug_print(data, offset, 0, "%s\n", opcodes[idx].name);
+ if (opcodes[idx].max_len > 1) {
+ len = (data[0] & 0xff) + 2;
+ assert(len >= opcodes[idx].min_len ||
+ len <= opcodes[idx].max_len);
+ }
+
+ for (i = 1; i < len; i++)
+ kgem_debug_print(data, offset, i, "dword %d\n", i);
+ return len;
+ }
+ }
+
+ kgem_debug_print(data, offset, 0, "3D UNKNOWN: 3d opcode = 0x%x\n", opcode);
+ return 1;
+}
+
+
+void kgem_gen3_finish_state(struct kgem *kgem)
+{
+ if (state.vb.current)
+ munmap(state.vb.base, state.vb.current->size);
+
+ memset(&state, 0, sizeof(state));
+}