summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorBen Skeggs <bskeggs@redhat.com>2020-06-07 09:52:49 +1000
committerMarge Bot <eric+marge@anholt.net>2020-06-10 22:52:42 +0000
commitaf3c2f3cfd81186b0041e5297db5225fc788b04e (patch)
tree998fffd76c56b0f22cd80687d5ab3d472dbbf2d3
parent268dc60d3a091bc563e319c38e74cc10e544aa8a (diff)
nvc0: initial support for tu1xx
v2: - add proper method definitions Signed-off-by: Ben Skeggs <bskeggs@redhat.com> Acked-by: Karol Herbst <kherbst@redhat.com> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/5377>
-rw-r--r--src/gallium/drivers/nouveau/nv50/nv50_2d.xml.h4
-rw-r--r--src/gallium/drivers/nouveau/nv_object.xml.h2
-rw-r--r--src/gallium/drivers/nouveau/nvc0/mme/comc597.mme.h904
-rw-r--r--src/gallium/drivers/nouveau/nvc0/nvc0_3d.xml.h9
-rw-r--r--src/gallium/drivers/nouveau/nvc0/nvc0_miptree.c54
-rw-r--r--src/gallium/drivers/nouveau/nvc0/nvc0_program.c37
-rw-r--r--src/gallium/drivers/nouveau/nvc0/nvc0_program.h6
-rw-r--r--src/gallium/drivers/nouveau/nvc0/nvc0_screen.c83
-rw-r--r--src/gallium/drivers/nouveau/nvc0/nvc0_surface.c17
-rw-r--r--src/gallium/drivers/nouveau/nvc0/nvc0_vbo.c35
-rw-r--r--src/gallium/drivers/nouveau/nvc0/nvc0_vbo_translate.c12
-rw-r--r--src/gallium/drivers/nouveau/nvc0/nve4_compute.c3
-rw-r--r--src/gallium/winsys/nouveau/drm/nouveau_drm_winsys.c1
13 files changed, 1126 insertions, 41 deletions
diff --git a/src/gallium/drivers/nouveau/nv50/nv50_2d.xml.h b/src/gallium/drivers/nouveau/nv50/nv50_2d.xml.h
index 899d73d7398..31e7cf82233 100644
--- a/src/gallium/drivers/nouveau/nv50/nv50_2d.xml.h
+++ b/src/gallium/drivers/nouveau/nv50/nv50_2d.xml.h
@@ -218,9 +218,7 @@ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
#define NV50_2D_PATTERN_SELECT_BITMAP_1X64 0x00000002
#define NV50_2D_PATTERN_SELECT_COLOR 0x00000003
-#define NVC0_2D_UNK02B8(i0) (0x000002b8 + 0x4*(i0))
-#define NVC0_2D_UNK02B8__ESIZE 0x00000004
-#define NVC0_2D_UNK02B8__LEN 0x00000009
+#define NVC0_2D_SET_DST_COLOR_RENDER_TO_ZETA_SURFACE 0x000002b8
#define NVC0_2D_UNK2DC 0x000002dc
diff --git a/src/gallium/drivers/nouveau/nv_object.xml.h b/src/gallium/drivers/nouveau/nv_object.xml.h
index 0c1337028f3..fac195d4846 100644
--- a/src/gallium/drivers/nouveau/nv_object.xml.h
+++ b/src/gallium/drivers/nouveau/nv_object.xml.h
@@ -196,6 +196,7 @@ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
#define GP100_3D_CLASS 0x0000c097
#define GP102_3D_CLASS 0x0000c197
#define GV100_3D_CLASS 0x0000c397
+#define TU102_3D_CLASS 0x0000c597
#define NV50_2D_CLASS 0x0000502d
#define NVC0_2D_CLASS 0x0000902d
#define NV50_COMPUTE_CLASS 0x000050c0
@@ -209,6 +210,7 @@ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
#define GP100_COMPUTE_CLASS 0x0000c0c0
#define GP104_COMPUTE_CLASS 0x0000c1c0
#define GV100_COMPUTE_CLASS 0x0000c3c0
+#define TU102_COMPUTE_CLASS 0x0000c5c0
#define NV84_CRYPT_CLASS 0x000074c1
#define BLOB_NVC0_PCOPY1_CLASS 0x000090b8
#define BLOB_NVC0_PCOPY0_CLASS 0x000090b5
diff --git a/src/gallium/drivers/nouveau/nvc0/mme/comc597.mme.h b/src/gallium/drivers/nouveau/nvc0/mme/comc597.mme.h
new file mode 100644
index 00000000000..390741cbd04
--- /dev/null
+++ b/src/gallium/drivers/nouveau/nvc0/mme/comc597.mme.h
@@ -0,0 +1,904 @@
+#define NV_MME_PRED_MODE_UUUU 0
+#define NV_MME_PRED_MODE_TTTT 1
+#define NV_MME_PRED_MODE_FFFF 2
+#define NV_MME_PRED_MODE_TTUU 3
+#define NV_MME_PRED_MODE_FFUU 4
+#define NV_MME_PRED_MODE_TFUU 5
+#define NV_MME_PRED_MODE_TUUU 6
+#define NV_MME_PRED_MODE_FUUU 7
+#define NV_MME_PRED_MODE_UUTT 8
+#define NV_MME_PRED_MODE_UUTF 9
+#define NV_MME_PRED_MODE_UUTU 10
+#define NV_MME_PRED_MODE_UUFT 11
+#define NV_MME_PRED_MODE_UUFF 12
+#define NV_MME_PRED_MODE_UUFU 13
+#define NV_MME_PRED_MODE_UUUT 14
+#define NV_MME_PRED_MODE_UUUF 15
+
+#define NV_MME_REG_R0 0
+#define NV_MME_REG_R1 1
+#define NV_MME_REG_R2 2
+#define NV_MME_REG_R3 3
+#define NV_MME_REG_R4 4
+#define NV_MME_REG_R5 5
+#define NV_MME_REG_R6 6
+#define NV_MME_REG_R7 7
+#define NV_MME_REG_R8 8
+#define NV_MME_REG_R9 9
+#define NV_MME_REG_R10 10
+#define NV_MME_REG_R11 11
+#define NV_MME_REG_R12 12
+#define NV_MME_REG_R13 13
+#define NV_MME_REG_R14 14
+#define NV_MME_REG_R15 15
+#define NV_MME_REG_R16 16
+#define NV_MME_REG_R17 17
+#define NV_MME_REG_R18 18
+#define NV_MME_REG_R19 19
+#define NV_MME_REG_R20 20
+#define NV_MME_REG_R21 21
+#define NV_MME_REG_R22 22
+#define NV_MME_REG_R23 23
+#define NV_MME_REG_ZERO 24
+#define NV_MME_REG_IMMED 25
+#define NV_MME_REG_IMMEDPAIR 26
+#define NV_MME_REG_IMMED32 27
+#define NV_MME_REG_LOAD0 28
+#define NV_MME_REG_LOAD1 29
+
+#define NV_MME_ALU_ADD 0
+#define NV_MME_ALU_ADDC 1
+#define NV_MME_ALU_SUB 2
+#define NV_MME_ALU_SUBB 3
+#define NV_MME_ALU_MUL 4
+#define NV_MME_ALU_MULH 5
+#define NV_MME_ALU_MULU 6
+#define NV_MME_ALU_EXTENDED 7
+#define NV_MME_ALU_CLZ 8
+#define NV_MME_ALU_SLL 9
+#define NV_MME_ALU_SRL 10
+#define NV_MME_ALU_SRA 11
+#define NV_MME_ALU_AND 12
+#define NV_MME_ALU_NAND 13
+#define NV_MME_ALU_OR 14
+#define NV_MME_ALU_XOR 15
+#define NV_MME_ALU_MERGE 16
+#define NV_MME_ALU_SLT 17
+#define NV_MME_ALU_SLTU 18
+#define NV_MME_ALU_SLE 19
+#define NV_MME_ALU_SLEU 20
+#define NV_MME_ALU_SEQ 21
+#define NV_MME_ALU_STATE 22
+#define NV_MME_ALU_LOOP 23
+#define NV_MME_ALU_JAL 24
+#define NV_MME_ALU_BLT 25
+#define NV_MME_ALU_BLTU 26
+#define NV_MME_ALU_BLE 27
+#define NV_MME_ALU_BLEU 28
+#define NV_MME_ALU_BEQ 29
+#define NV_MME_ALU_DREAD 30
+#define NV_MME_ALU_DWRITE 31
+
+#define NV_MME_OUT_NONE 0
+#define NV_MME_OUT_ALU0 1
+#define NV_MME_OUT_ALU1 2
+#define NV_MME_OUT_LOAD0 3
+#define NV_MME_OUT_LOAD1 4
+#define NV_MME_OUT_IMMED0 5
+#define NV_MME_OUT_IMMED1 6
+#define NV_MME_OUT_RESERVED 7
+#define NV_MME_OUT_IMMEDHIGH0 8
+#define NV_MME_OUT_IMMEDHIGH1 9
+#define NV_MME_OUT_IMMED32_0 10
+
+#define MME_BITS(en,pm,pr,o0,d0,a0,b0,i0,o1,d1,a1,b1,i1,m0,e0,m1,e1) \
+ ((e1) << (92 - 64) | (m1) << (89 - 64) | \
+ (e0) << (85 - 64) | (m0) << (82 - 64) | \
+ (i1) << (66 - 64) | (b1) >> (64 - 61)), \
+ (((b1) & 7) << (61 - 32) | (a1) << (56 - 32) | \
+ (d1) << (51 - 32) | (o1) << (46 - 32) | \
+ (i0) >> (32 - 30)), \
+ (((i0) & 3) << 30 | (b0) << 25 | (a0) << 20 | (d0) << 15 | (o0) << 10 | \
+ (pr) << 5 | (pm) << 1 | (en))
+
+#define MME_INSN(en,o0,d0,a0,b0,i0,m0,e0,o1,d1,a1,b1,i1,m1,e1) \
+ MME_BITS((en), NV_MME_PRED_MODE_UUUU, NV_MME_REG_ZERO, \
+ NV_MME_ALU_##o0, NV_MME_REG_##d0, \
+ NV_MME_REG_##a0, NV_MME_REG_##b0, (i0), \
+ NV_MME_ALU_##o1, NV_MME_REG_##d1, \
+ NV_MME_REG_##a1, NV_MME_REG_##b1, (i1), \
+ NV_MME_OUT_##m0, NV_MME_OUT_##e0, \
+ NV_MME_OUT_##m1, NV_MME_OUT_##e1)
+
+uint32_t mmec597_per_instance_bf[] = {
+// r1 = load(); // count
+// r3 = load(); // mask
+// mthd(0x1880, 1); // VERTEX_ARRAY_PER_INSTANCE[0]
+ MME_INSN(0, ADD, R1, LOAD0, ZERO, (1<<12)|0x1880/4, IMMED0, NONE,
+ ADD, R3, LOAD1, ZERO, 0, NONE, NONE),
+// while (HW_LOOP_COUNT < r1) {
+// send(r3 & 1);
+// r3 >>= 1;
+// }
+ MME_INSN(0, LOOP, ZERO, R1, ZERO, 0x0003, NONE, NONE,
+ ADD, ZERO, ZERO, ZERO, 0, NONE, NONE),
+ MME_INSN(0, AND, ZERO, R3, IMMED, 1, NONE, ALU0,
+ ADD, ZERO, ZERO, ZERO, 0, NONE, NONE),
+ MME_INSN(0, SRL, R3, R3, IMMED, 1, NONE, NONE,
+ ADD, ZERO, ZERO, ZERO, 0, NONE, NONE),
+ MME_INSN(1, ADD, ZERO, ZERO, ZERO, 0, NONE, NONE,
+ ADD, ZERO, ZERO, ZERO, 0, NONE, NONE),
+ MME_INSN(0, ADD, ZERO, ZERO, ZERO, 0, NONE, NONE,
+ ADD, ZERO, ZERO, ZERO, 0, NONE, NONE),
+};
+
+uint32_t mmec597_vertex_array_select[] = {
+// r1 = load(); // array
+// r2 = load(); // limit hi
+// r3 = load(); // limit lo
+// r4 = load(); // start hi
+// r5 = load(); // start lo
+// r6 = (r1 & 0x1f) << 2;
+// r7 = (r1 & 0x1f) << 1;
+// mthd(0x1c04 + r6, 1); // VERTEX_ARRAY_START_HIGH[]
+// send(r4);
+// send(r5);
+// mthd(0x0600 + r7, 1); // VERTEX_ARRAY_LIMIT_HIGH[]
+// send(r2);
+// send(r3);
+ MME_INSN(0, ADD, R1, LOAD0, ZERO, 0, NONE, NONE,
+ ADD, R2, LOAD1, ZERO, 0, NONE, NONE),
+ MME_INSN(0, ADD, R3, LOAD0, ZERO, 0, NONE, NONE,
+ ADD, R4, LOAD1, ZERO, 0, NONE, NONE),
+ MME_INSN(0, ADD, R5, LOAD0, ZERO, 0, NONE, NONE,
+ MERGE, R6, ZERO, R1, (2<<10)|(5<<5)|0, NONE, NONE),
+ MME_INSN(0, MERGE, R7, ZERO, R1, (1<<10)|(5<<5)|0, ALU1, NONE,
+ ADD, ZERO, R6, IMMED, (1<<12)|0x1c04/4, NONE, NONE),
+ MME_INSN(0, ADD, ZERO, R4, ZERO, 0, NONE, ALU0,
+ ADD, ZERO, R5, ZERO, 0, NONE, ALU1),
+ MME_INSN(1, ADD, ZERO, R7, IMMED, (1<<12)|0x0600/4, ALU0, ALU1,
+ ADD, ZERO, R2, ZERO, 0, NONE, NONE),
+ MME_INSN(0, ADD, ZERO, R3, ZERO, 0, NONE, ALU0,
+ ADD, ZERO, ZERO, ZERO, 0, NONE, NONE),
+};
+
+uint32_t mmec597_blend_enables[] = {
+// r1 = load(); // enable mask
+// mthd(0x1360, 1); // NVC0_3D_BLEND_ENABLE[]
+// send((r1 >> 0) & 1);
+// send((r1 >> 1) & 1);
+// send((r1 >> 2) & 1);
+// send((r1 >> 3) & 1);
+// send((r1 >> 4) & 1);
+// send((r1 >> 5) & 1);
+// send((r1 >> 6) & 1);
+// send((r1 >> 7) & 1);
+ MME_INSN(0, ADD, R1, LOAD0, ZERO, 0, IMMED1, NONE,
+ ADD, ZERO, ZERO, ZERO, (1<<12)|0x1360/4, NONE, NONE),
+ MME_INSN(0, MERGE, ZERO, ZERO, R1, (0<<10)|(1<<5)|0, NONE, ALU0,
+ MERGE, ZERO, ZERO, R1, (0<<10)|(1<<5)|1, NONE, ALU1),
+ MME_INSN(0, MERGE, ZERO, ZERO, R1, (0<<10)|(1<<5)|2, NONE, ALU0,
+ MERGE, ZERO, ZERO, R1, (0<<10)|(1<<5)|3, NONE, ALU1),
+ MME_INSN(1, MERGE, ZERO, ZERO, R1, (0<<10)|(1<<5)|4, NONE, ALU0,
+ MERGE, ZERO, ZERO, R1, (0<<10)|(1<<5)|5, NONE, ALU1),
+ MME_INSN(0, MERGE, ZERO, ZERO, R1, (0<<10)|(1<<5)|6, NONE, ALU0,
+ MERGE, ZERO, ZERO, R1, (0<<10)|(1<<5)|7, NONE, ALU1),
+};
+
+uint32_t mmec597_poly_mode_front[] = {
+// r1 = load();
+// mthd(0x0dac,0); // POLYGON_MODE_FRONT
+// send(r1);
+// r2 = read(0x0db0); // POLYGON_MODE_BACK
+// r3 = read(0x20c0); // SP_SELECT[3]
+// r7 = r1 | r2;
+// r4 = read(0x2100); // SP_SELECT[4]
+// r6 = 0x60;
+// r7 = r7 & 1;
+// if (r7 != 0)
+ MME_INSN(0, ADD, R1, LOAD0, ZERO, (0<<12)|0x0dac/4, IMMED0, ALU0,
+ STATE, R2, IMMED, ZERO, 0x0db0/4, NONE, NONE),
+ MME_INSN(0, STATE, R3, IMMED, ZERO, 0x20c0/4, NONE, NONE,
+ OR, R7, R1, R2, 0, NONE, NONE),
+ MME_INSN(0, STATE, R4, IMMED, ZERO, 0x2100/4, NONE, NONE,
+ ADD, R6, IMMED, ZERO, 0x60, NONE, NONE),
+ MME_INSN(0, AND, R7, R7, IMMED, 1, NONE, NONE,
+ ADD, ZERO, ZERO, ZERO, 0, NONE, NONE),
+ MME_INSN(0, BEQ, ZERO, R7, ZERO, (2<<14)|0x0002, NONE, NONE,
+ ADD, ZERO, ZERO, ZERO, 0, NONE, NONE),
+// r6 = 0x200;
+ MME_INSN(0, ADD, R6, IMMED, ZERO, 0x200, NONE, NONE,
+ ADD, ZERO, ZERO, ZERO, 0, NONE, NONE),
+// r7 = r3 | r4;
+// r7 = r7 & 1;
+// if (r7 != 0)
+ MME_INSN(0, OR, R7, R3, R4, 0, NONE, NONE,
+ ADD, ZERO, ZERO, ZERO, 0, NONE, NONE),
+ MME_INSN(0, AND, R7, R7, IMMED, 1, NONE, NONE,
+ ADD, ZERO, ZERO, ZERO, 0, NONE, NONE),
+ MME_INSN(0, BEQ, ZERO, R7, ZERO, (2<<14)|0x0002, NONE, NONE,
+ ADD, ZERO, ZERO, ZERO, 0, NONE, NONE),
+// r6 = 0;
+ MME_INSN(0, ADD, R6, ZERO, ZERO, 0, NONE, NONE,
+ ADD, ZERO, ZERO, ZERO, 0, NONE, NONE),
+// mthd(0x02ec, 0);
+// send(r6);
+ MME_INSN(1, ADD, ZERO, ZERO, ZERO, (0<<12)|0x02ec/4, IMMED0, NONE,
+ ADD, ZERO, ZERO, ZERO, 0, NONE, NONE),
+ MME_INSN(0, ADD, ZERO, R6, ZERO, 0, NONE, ALU0,
+ ADD, ZERO, ZERO, ZERO, 0, NONE, NONE),
+};
+
+uint32_t mmec597_poly_mode_back[] = {
+// r1 = load();
+// mthd(0x0db0,0); // POLYGON_MODE_BACK
+// send(r1);
+// r2 = read(0x0dac); // POLYGON_MODE_FRONT
+// r3 = read(0x20c0); // SP_SELECT[3]
+// r7 = r1 | r2;
+// r4 = read(0x2100); // SP_SELECT[4]
+// r6 = 0x60;
+// r7 = r7 & 1;
+// if (r7 != 0)
+ MME_INSN(0, ADD, R1, LOAD0, ZERO, (0<<12)|0x0db0/4, IMMED0, ALU0,
+ STATE, R2, IMMED, ZERO, 0x0dac/4, NONE, NONE),
+ MME_INSN(0, STATE, R3, IMMED, ZERO, 0x20c0/4, NONE, NONE,
+ OR, R7, R1, R2, 0, NONE, NONE),
+ MME_INSN(0, STATE, R4, IMMED, ZERO, 0x2100/4, NONE, NONE,
+ ADD, R6, IMMED, ZERO, 0x60, NONE, NONE),
+ MME_INSN(0, AND, R7, R7, IMMED, 1, NONE, NONE,
+ ADD, ZERO, ZERO, ZERO, 0, NONE, NONE),
+ MME_INSN(0, BEQ, ZERO, R7, ZERO, (2<<14)|0x0002, NONE, NONE,
+ ADD, ZERO, ZERO, ZERO, 0, NONE, NONE),
+// r6 = 0x200;
+ MME_INSN(0, ADD, R6, IMMED, ZERO, 0x200, NONE, NONE,
+ ADD, ZERO, ZERO, ZERO, 0, NONE, NONE),
+// r7 = r3 | r4;
+// r7 = r7 & 1;
+// if (r7 != 0)
+ MME_INSN(0, OR, R7, R3, R4, 0, NONE, NONE,
+ ADD, ZERO, ZERO, ZERO, 0, NONE, NONE),
+ MME_INSN(0, AND, R7, R7, IMMED, 1, NONE, NONE,
+ ADD, ZERO, ZERO, ZERO, 0, NONE, NONE),
+ MME_INSN(0, BEQ, ZERO, R7, ZERO, (2<<14)|0x0002, NONE, NONE,
+ ADD, ZERO, ZERO, ZERO, 0, NONE, NONE),
+// r6 = 0;
+ MME_INSN(0, ADD, R6, ZERO, ZERO, 0, NONE, NONE,
+ ADD, ZERO, ZERO, ZERO, 0, NONE, NONE),
+// mthd(0x02ec, 0);
+// send(r6);
+ MME_INSN(1, ADD, ZERO, ZERO, ZERO, (0<<12)|0x02ec/4, IMMED0, NONE,
+ ADD, ZERO, ZERO, ZERO, 0, NONE, NONE),
+ MME_INSN(0, ADD, ZERO, R6, ZERO, 0, NONE, ALU0,
+ ADD, ZERO, ZERO, ZERO, 0, NONE, NONE),
+};
+
+uint32_t mmec597_gp_select[] = {
+// r1 = load();
+// mthd(0x2100,0); // SP_SELECT[4]
+// send(r1);
+// r2 = read(0x0dac); // POLYGON_MODE_FRONT
+// r3 = read(0x0db0); // POLYGON_MODE_BACK
+// r7 = r2 | r3;
+// r4 = read(0x20c0); // SP_SELECT[3]
+// r6 = 0x60;
+// r7 = r7 & 1;
+// if (r7 != 0)
+ MME_INSN(0, ADD, R1, LOAD0, ZERO, (0<<12)|0x2100/4, IMMED0, ALU0,
+ STATE, R2, IMMED, ZERO, 0x0dac/4, NONE, NONE),
+ MME_INSN(0, STATE, R3, IMMED, ZERO, 0x0db0/4, NONE, NONE,
+ OR, R7, R2, R3, 0, NONE, NONE),
+ MME_INSN(0, STATE, R4, IMMED, ZERO, 0x20c0/4, NONE, NONE,
+ ADD, R6, IMMED, ZERO, 0x60, NONE, NONE),
+ MME_INSN(0, AND, R7, R7, IMMED, 1, NONE, NONE,
+ ADD, ZERO, ZERO, ZERO, 0, NONE, NONE),
+ MME_INSN(0, BEQ, ZERO, R7, ZERO, (2<<14)|0x0002, NONE, NONE,
+ ADD, ZERO, ZERO, ZERO, 0, NONE, NONE),
+// r6 = 0x200;
+ MME_INSN(0, ADD, R6, IMMED, ZERO, 0x200, NONE, NONE,
+ ADD, ZERO, ZERO, ZERO, 0, NONE, NONE),
+// r7 = r1 | r4;
+// r7 = r7 & 1;
+// if (r7 != 0)
+ MME_INSN(0, OR, R7, R1, R4, 0, NONE, NONE,
+ ADD, ZERO, ZERO, ZERO, 0, NONE, NONE),
+ MME_INSN(0, AND, R7, R7, IMMED, 1, NONE, NONE,
+ ADD, ZERO, ZERO, ZERO, 0, NONE, NONE),
+ MME_INSN(0, BEQ, ZERO, R7, ZERO, (2<<14)|0x0002, NONE, NONE,
+ ADD, ZERO, ZERO, ZERO, 0, NONE, NONE),
+// r6 = 0;
+ MME_INSN(0, ADD, R6, ZERO, ZERO, 0, NONE, NONE,
+ ADD, ZERO, ZERO, ZERO, 0, NONE, NONE),
+// mthd(0x02ec, 0);
+// send(r6);
+ MME_INSN(1, ADD, ZERO, ZERO, ZERO, (0<<12)|0x02ec/4, IMMED0, NONE,
+ ADD, ZERO, ZERO, ZERO, 0, NONE, NONE),
+ MME_INSN(0, ADD, ZERO, R6, ZERO, 0, NONE, ALU0,
+ ADD, ZERO, ZERO, ZERO, 0, NONE, NONE),
+};
+
+uint32_t mmec597_tep_select[] = {
+// r1 = load();
+// mthd(0x20c0,0); // SP_SELECT[3]
+// send(r1);
+// r2 = read(0x0dac); // POLYGON_MODE_FRONT
+// r3 = read(0x0db0); // POLYGON_MODE_BACK
+// r7 = r2 | r3;
+// r4 = read(0x2100); // SP_SELECT[4]
+// r6 = 0x60;
+// r7 = r7 & 1;
+// if (r7 != 0)
+ MME_INSN(0, ADD, R1, LOAD0, ZERO, (0<<12)|0x20c0/4, IMMED0, ALU0,
+ STATE, R2, IMMED, ZERO, 0x0dac/4, NONE, NONE),
+ MME_INSN(0, STATE, R3, IMMED, ZERO, 0x0db0/4, NONE, NONE,
+ OR, R7, R2, R3, 0, NONE, NONE),
+ MME_INSN(0, STATE, R4, IMMED, ZERO, 0x2100/4, NONE, NONE,
+ ADD, R6, IMMED, ZERO, 0x60, NONE, NONE),
+ MME_INSN(0, AND, R7, R7, IMMED, 1, NONE, NONE,
+ ADD, ZERO, ZERO, ZERO, 0, NONE, NONE),
+ MME_INSN(0, BEQ, ZERO, R7, ZERO, (2<<14)|0x0002, NONE, NONE,
+ ADD, ZERO, ZERO, ZERO, 0, NONE, NONE),
+// r6 = 0x200;
+ MME_INSN(0, ADD, R6, IMMED, ZERO, 0x200, NONE, NONE,
+ ADD, ZERO, ZERO, ZERO, 0, NONE, NONE),
+// r7 = r1 | r4;
+// r7 = r7 & 1;
+// if (r7 != 0)
+ MME_INSN(0, OR, R7, R1, R4, 0, NONE, NONE,
+ ADD, ZERO, ZERO, ZERO, 0, NONE, NONE),
+ MME_INSN(0, AND, R7, R7, IMMED, 1, NONE, NONE,
+ ADD, ZERO, ZERO, ZERO, 0, NONE, NONE),
+ MME_INSN(0, BEQ, ZERO, R7, ZERO, (2<<14)|0x0002, NONE, NONE,
+ ADD, ZERO, ZERO, ZERO, 0, NONE, NONE),
+// r6 = 0;
+ MME_INSN(0, ADD, R6, ZERO, ZERO, 0, NONE, NONE,
+ ADD, ZERO, ZERO, ZERO, 0, NONE, NONE),
+// mthd(0x02ec, 0);
+// send(r6);
+ MME_INSN(1, ADD, ZERO, ZERO, ZERO, (0<<12)|0x02ec/4, IMMED0, NONE,
+ ADD, ZERO, ZERO, ZERO, 0, NONE, NONE),
+ MME_INSN(0, ADD, ZERO, R6, ZERO, 0, NONE, ALU0,
+ ADD, ZERO, ZERO, ZERO, 0, NONE, NONE),
+};
+
+uint32_t mmec597_draw_arrays_indirect[] = {
+// r1 = load(); // mode
+// r5 = read(0x1438); // VB_INSTANCE_BASE
+// r6 = load(); // start_drawid
+// r7 = load(); // numparams
+ MME_INSN(0, ADD, R1, LOAD0, ZERO, 0, NONE, NONE,
+ ADD, R6, LOAD1, ZERO, 0, NONE, NONE),
+ MME_INSN(0, ADD, R7, LOAD0, ZERO, 0, NONE, NONE,
+ STATE, R5, IMMED, ZERO, 0x1438/4, NONE, NONE),
+// while (HW_LOOP_COUNT < r7) {
+// r2 = load(); // count
+// r3 = load(); // instance_count
+// mthd(0x0d74, 0); // VERTEX_BUFFER_FIRST
+// send(load()); // start
+// r4 = load(); // start_instance
+// if (r3) {
+ MME_INSN(0, LOOP, ZERO, R7, ZERO, 0x000c, NONE, NONE,
+ ADD, ZERO, ZERO, ZERO, 0, NONE, NONE),
+ MME_INSN(0, ADD, R2, LOAD0, ZERO, 0x0d74/4, IMMED0, NONE,
+ ADD, R3, LOAD1, ZERO, 0, NONE, NONE),
+ MME_INSN(0, ADD, ZERO, LOAD0, ZERO, 0, NONE, ALU0,
+ ADD, R4, LOAD1, ZERO, 0, NONE, NONE),
+ MME_INSN(0, BEQ, ZERO, R3, ZERO, (2<<14)|0x0008, NONE, NONE,
+ ADD, ZERO, ZERO, ZERO, 0, NONE, NONE),
+// mthd(0x238c, 1); // CB_POS
+// send(256 + 160);
+// send(0); // base_vertex
+// send(r4); // start_instance
+// send(r6); // draw id
+// mthd(0x1438, 0); // VB_INSTANCE_BASE
+// send(r4);
+// r1 = r1 & ~(1<<26); // clear INSTANCE_NEXT
+ MME_INSN(0, ADD, ZERO, ZERO, ZERO, (1<<12)|0x238c/4, IMMED0, IMMED1,
+ ADD, ZERO, ZERO, ZERO, 256 + 160, NONE, ALU0),
+ MME_INSN(0, ADD, ZERO, R4, ZERO, 0, NONE, ALU0,
+ ADD, ZERO, R6, ZERO, 0, NONE, ALU1),
+ MME_INSN(0, ADD, ZERO, R4, ZERO, 0x1438/4, IMMED0, ALU0,
+ MERGE, R1, R1, ZERO, (26<<10)|(1<<5)|0, NONE, NONE),
+// do {
+// mthd(0x1618, 0); // VERTEX_BEGIN_GL
+// send(r1); // mode
+// mthd(0x0d78, 0); // VERTEX_BUFFER_COUNT
+// send(r2); // count
+// mthd(0x1614, 0); // VERTEX_END_GL
+// send(0);
+// r1 |= (1<<26); // set INSTANCE_NEXT
+// } while(--r3);
+// }
+ MME_INSN(0, ADD, ZERO, R1, ZERO, 0x1618/4, IMMED0, ALU0,
+ ADD, ZERO, R2, ZERO, 0x0d78/4, IMMED1, ALU1),
+ MME_INSN(0, ADD, ZERO, ZERO, ZERO, 0x1614/4, IMMED0, ALU0,
+ ADD, R4, IMMED, ZERO, 1, NONE, NONE),
+ MME_INSN(0, MERGE, R1, R1, R4, (26<<10)|(1<<5)|0, NONE, NONE,
+ SUB, R3, R3, IMMED, 1, NONE, NONE),
+ MME_INSN(0, BEQ, ZERO, R3, ZERO, (1<<14)|0x3ffd, NONE, NONE,
+ ADD, ZERO, ZERO, ZERO, 0, NONE, NONE),
+// r6 = r6 + 1;
+// };
+ MME_INSN(0, ADD, R6, R6, IMMED, 1, NONE, NONE,
+ ADD, ZERO, ZERO, ZERO, 0, NONE, NONE),
+// mthd(0x1438, 0); // restore VB_INSTANCE_BASE
+// send(r5);
+ MME_INSN(1, ADD, ZERO, ZERO, ZERO, 0x1438/4, IMMED0, NONE,
+ ADD, ZERO, ZERO, ZERO, 0, NONE, NONE),
+ MME_INSN(0, ADD, ZERO, R5, ZERO, 0, NONE, ALU0,
+ ADD, ZERO, ZERO, ZERO, 0, NONE, NONE),
+};
+
+uint32_t mmec597_draw_elts_indirect[] = {
+// r1 = load(); // mode
+// r8 = read(0x1434); // VB_ELEMENT_BASE
+// r9 = read(0x1438); // VB_INSTANCE_BASE
+// r6 = load(); // start_drawid
+// r7 = load(); // numparams
+ MME_INSN(0, ADD, R1, LOAD0, ZERO, 0, NONE, NONE,
+ STATE, R8, IMMED, ZERO, 0x1434/4, NONE, NONE),
+ MME_INSN(0, STATE, R9, IMMED, ZERO, 0x1438/4, NONE, NONE,
+ ADD, R6, LOAD0, ZERO, 0, NONE, NONE),
+ MME_INSN(0, ADD, R7, LOAD0, ZERO, 0, NONE, NONE,
+ ADD, ZERO, ZERO, ZERO, 0, NONE, NONE),
+// while (HW_LOOP_COUNT < r7) {
+// r3 = load(); // count
+// r2 = load(); // instance_count
+// mthd(0x17dc, 0); // INDEX_BATCH_FIRST
+// send(load()); // start
+// r4 = load(); // index_bias
+// mthd(0x238c, 1); // CB_POS
+// send(256 + 160);
+// send(r4); // index_bias
+// r5 = load(); // start_instance
+// if (r2) {
+ MME_INSN(0, LOOP, ZERO, R7, ZERO, 0x000d, NONE, NONE,
+ ADD, ZERO, ZERO, ZERO, 0, NONE, NONE),
+ MME_INSN(0, ADD, R3, LOAD0, ZERO, 0x17dc/4, IMMED0, NONE,
+ ADD, R2, LOAD1, ZERO, 0, NONE, NONE),
+ MME_INSN(0, ADD, ZERO, LOAD0, ZERO, 0, NONE, ALU0,
+ ADD, R4, LOAD1, ZERO, 0, NONE, NONE),
+ MME_INSN(0, ADD, ZERO, ZERO, ZERO, (1<<12)|0x238c/4, IMMED0, IMMED1,
+ ADD, ZERO, R4, ZERO, 256 + 160, NONE, ALU1),
+ MME_INSN(0, BEQ, ZERO, R2, ZERO, (2<<14)|0x0008, NONE, NONE,
+ ADD, R5, LOAD0, ZERO, 0, NONE, NONE),
+// send(r5); // start_instance
+// send(r6); // draw_id
+// mthd(0x1434, 1); // VB_ELEMENT_BASE
+// send(r4); // index_bias
+// send(r5); // start_instance
+// mthd(0x1118, 0); // VERTEX_ID_BASE
+// send(r4); // index_bias
+// r1 &= ~(1 << 26); // clear INSTANCE_NEXT
+ MME_INSN(0, ADD, ZERO, R5, ZERO, 0, NONE, ALU0,
+ ADD, ZERO, R6, ZERO, 0, NONE, ALU1),
+ MME_INSN(0, ADD, ZERO, R4, ZERO, (1<<12)|0x1434/4, IMMED0, ALU0,
+ ADD, ZERO, R5, ZERO, 0, NONE, ALU1),
+ MME_INSN(0, ADD, ZERO, R4, ZERO, 0x1118/4, IMMED0, ALU0,
+ MERGE, R1, R1, ZERO, (26<<10)|(1<<5)|0, NONE, NONE),
+// do {
+// mthd(0x1618, 0); // VERTEX_BEGIN_GL
+// send(r1); // mode
+// mthd(0x17e0, 0); // INDEX_BATCH_COUNT
+// send(r3); // count
+// mthd(0x1614, 0); // VERTEX_END_GL
+// send(0);
+// r1 |= (1 << 26); // set INSTANCE_NEXT
+// } while (--r2);
+// }
+ MME_INSN(0, ADD, ZERO, R1, ZERO, 0x1618/4, IMMED0, ALU0,
+ ADD, ZERO, R3, ZERO, 0x17e0/4, IMMED1, ALU1),
+ MME_INSN(0, ADD, ZERO, ZERO, ZERO, 0x1614/4, IMMED0, ALU0,
+ ADD, R4, IMMED, ZERO, 1, NONE, NONE),
+ MME_INSN(0, MERGE, R1, R1, R4, (26<<10)|(1<<5)|0, NONE, NONE,
+ SUB, R2, R2, IMMED, 1, NONE, NONE),
+ MME_INSN(0, BEQ, ZERO, R2, ZERO, (1<<14)|0x3ffd, NONE, NONE,
+ ADD, ZERO, ZERO, ZERO, 0, NONE, NONE),
+// r6 = r6 + 1;
+// };
+ MME_INSN(0, ADD, R6, R6, IMMED, 1, NONE, NONE,
+ ADD, ZERO, ZERO, ZERO, 0, NONE, NONE),
+// mthd(0x1434, 1);
+// send(r8); // restore VB_ELEMENT_BASE
+// send(r9); // restore VB_INSTANCE_BASE
+// mthd(0x1118, 0);
+// send(r8); // restore VERTEX_ID_BASE
+ MME_INSN(1, ADD, ZERO, R8, ZERO, (1<<12)|0x1434/4, IMMED0, ALU0,
+ ADD, ZERO, R9, ZERO, 0, NONE, ALU1),
+ MME_INSN(0, ADD, ZERO, R8, ZERO, 0x1118/4, IMMED0, ALU0,
+ ADD, ZERO, ZERO, ZERO, 0, NONE, NONE),
+};
+
+uint32_t mmec597_draw_arrays_indirect_count[] = {
+// r1 = load(); // mode
+// r6 = load(); // start_drawid
+// r7 = load(); // numparams
+// r5 = load(); // totaldraws
+// r8 = read(0x1438); // VB_INSTANCE_BASE
+// r5 = r5 - r6; // remaining draws
+// if (r5 > r7)
+ MME_INSN(0, ADD, R1, LOAD0, ZERO, 0, NONE, NONE,
+ ADD, R6, LOAD1, ZERO, 0, NONE, NONE),
+ MME_INSN(0, ADD, R7, LOAD0, ZERO, 0, NONE, NONE,
+ ADD, R5, LOAD1, ZERO, 0, NONE, NONE),
+ MME_INSN(0, STATE, R8, IMMED, ZERO, 0x1438/4, NONE, NONE,
+ SUB, R5, R5, R6, 0, NONE, NONE),
+ MME_INSN(0, BLE, ZERO, R5, R7, (2<<14)|0x0002, NONE, NONE,
+ ADD, ZERO, ZERO, ZERO, 0, NONE, NONE),
+// r5 = r7;
+ MME_INSN(0, ADD, R5, R7, ZERO, 0, NONE, NONE,
+ ADD, ZERO, ZERO, ZERO, 0, NONE, NONE),
+// if (r5 >= 0) {
+ MME_INSN(0, BLT, ZERO, R5, ZERO, (2<<14)|0x000e, NONE, NONE,
+ ADD, ZERO, ZERO, ZERO, 0, NONE, NONE),
+// while (HW_LOOP_COUNT < r5) {
+// r2 = load(); // count
+// r3 = load(); // instance_count
+// mthd(0x0d74, 0); // VERTEX_BUFFER_FIRST
+// send(load()); // start
+// r4 = load(); // start_instance
+// if (r3) {
+ MME_INSN(0, LOOP, ZERO, R5, ZERO, 0x000c, NONE, NONE,
+ ADD, ZERO, ZERO, ZERO, 0, NONE, NONE),
+ MME_INSN(0, ADD, R2, LOAD0, ZERO, 0x0d74/4, IMMED0, NONE,
+ ADD, R3, LOAD1, ZERO, 0, NONE, NONE),
+ MME_INSN(0, ADD, ZERO, LOAD0, ZERO, 0, NONE, ALU0,
+ ADD, R4, LOAD1, ZERO, 0, NONE, NONE),
+ MME_INSN(0, BEQ, ZERO, R3, ZERO, (2<<14)|0x0008, NONE, NONE,
+ ADD, ZERO, ZERO, ZERO, 0, NONE, NONE),
+// mthd(0x238c, 1); // CB_POS
+// send(256 + 160);
+// send(0); // base_vertex
+// send(r4); // start_instance
+// send(r6); // draw_id
+// mthd(0x1438, 0); // VB_INSTANCE_BASE
+// send(r4);
+// r1 &= ~(1 << 26); // clear INSTANCE_NEXT
+ MME_INSN(0, ADD, ZERO, ZERO, ZERO, (1<<12)|0x238c/4, IMMED0, IMMED1,
+ ADD, ZERO, ZERO, ZERO, 256+160, NONE, ALU0),
+ MME_INSN(0, ADD, ZERO, R4, ZERO, 0, NONE, ALU0,
+ ADD, ZERO, R6, ZERO, 0, NONE, ALU1),
+ MME_INSN(0, ADD, ZERO, R4, ZERO, 0x1438/4, IMMED0, ALU0,
+ MERGE, R1, R1, ZERO, (26<<10)|(1<<5)|0, NONE, NONE),
+// do {
+// mthd(0x1618, 0); // VERTEX_BEGIN_GL
+// send(r1); // mode
+// mthd(0x0d78, 0); // VERTEX_BUFFER_COUNT
+// send(r2);
+// mthd(0x1614, 0); // VERTEX_END_GL
+// send(0);
+// r1 |= (1 << 26); // set INSTANCE_NEXT
+// } while (--r3);
+// }
+ MME_INSN(0, ADD, ZERO, R1, ZERO, 0x1618/4, IMMED0, ALU0,
+ ADD, ZERO, R2, ZERO, 0x0d78/4, IMMED1, ALU1),
+ MME_INSN(0, ADD, ZERO, ZERO, ZERO, 0x1614/4, IMMED0, ALU0,
+ ADD, R4, IMMED, ZERO, 1, NONE, NONE),
+ MME_INSN(0, MERGE, R1, R1, R4, (26<<10)|(1<<5)|0, NONE, NONE,
+ SUB, R3, R3, IMMED, 1, NONE, NONE),
+ MME_INSN(0, BEQ, ZERO, R3, ZERO, (1<<14)|0x3ffd, NONE, NONE,
+ ADD, ZERO, ZERO, ZERO, 0, NONE, NONE),
+// r6 = r6 + 1; // draw_id++
+// }
+ MME_INSN(0, ADD, R6, R6, IMMED, 1, NONE, NONE,
+ ADD, ZERO, ZERO, ZERO, 0, NONE, NONE),
+// r7 = r7 - r5; // unneeded params
+// }
+ MME_INSN(0, SUB, R7, R7, R5, 0, NONE, NONE,
+ ADD, ZERO, ZERO, ZERO, 0, NONE, NONE),
+// while (HW_LOOP_COUNT < r7) {
+// load();
+// load();
+// load();
+// load();
+// }
+ MME_INSN(0, LOOP, ZERO, R7, ZERO, 0x0003, NONE, NONE,
+ ADD, ZERO, ZERO, ZERO, 0, NONE, NONE),
+ MME_INSN(0, ADD, ZERO, LOAD0, ZERO, 0, NONE, NONE,
+ ADD, ZERO, LOAD1, ZERO, 0, NONE, NONE),
+ MME_INSN(0, ADD, ZERO, LOAD0, ZERO, 0, NONE, NONE,
+ ADD, ZERO, LOAD1, ZERO, 0, NONE, NONE),
+// exit mthd(0x1438, 0); // VB_INSTANCE_BASE
+// send(r8);
+ MME_INSN(1, ADD, ZERO, ZERO, ZERO, 0x1438/4, IMMED0, NONE,
+ ADD, ZERO, ZERO, ZERO, 0, NONE, NONE),
+ MME_INSN(0, ADD, ZERO, R8, ZERO, 0, NONE, ALU0,
+ ADD, ZERO, ZERO, ZERO, 0, NONE, NONE),
+};
+
+uint32_t mmec597_draw_elts_indirect_count[] = {
+// r8 = read(0x1434);
+// r1 = load();
+// r9 = read(0x1438);
+// r6 = load();
+// r7 = load();
+// r5 = load();
+// r5 = r5 - r6;
+// if (r5 > r7)
+ MME_INSN(0, STATE, R8, IMMED, ZERO, 0x1434/4, NONE, NONE,
+ ADD, R1, LOAD0, ZERO, 0, NONE, NONE),
+ MME_INSN(0, STATE, R9, IMMED, ZERO, 0x1438/4, NONE, NONE,
+ ADD, R6, LOAD0, ZERO, 0, NONE, NONE),
+ MME_INSN(0, ADD, R7, LOAD0, ZERO, 0, NONE, NONE,
+ ADD, R5, LOAD1, ZERO, 0, NONE, NONE),
+ MME_INSN(0, SUB, R5, R5, R6, 0, NONE, NONE,
+ ADD, ZERO, ZERO, ZERO, 0, NONE, NONE),
+ MME_INSN(0, BLE, ZERO, R5, R7, (2<<14)|0x0002, NONE, NONE,
+ ADD, ZERO, ZERO, ZERO, 0, NONE, NONE),
+// r5 = r7;
+ MME_INSN(0, ADD, R5, R7, ZERO, 0, NONE, NONE,
+ ADD, ZERO, ZERO, ZERO, 0, NONE, NONE),
+// if (r5 >= 0) {
+ MME_INSN(0, BLT, ZERO, R5, ZERO, (2<<14)|0x000f, NONE, NONE,
+ ADD, ZERO, ZERO, ZERO, 0, NONE, NONE),
+// while (HW_LOOP_COUNT < r5) {
+// r3 = load();
+// r2 = load();
+// mthd(0x17dc, 0);
+// send(load());
+// r4 = load();
+// mthd(0x238c, 1);
+// send(256 + 160);
+// send(r4);
+// r10 = load();
+// if (r2) {
+ MME_INSN(0, LOOP, ZERO, R5, ZERO, 0x000d, NONE, NONE,
+ ADD, ZERO, ZERO, ZERO, 0, NONE, NONE),
+ MME_INSN(0, ADD, R3, LOAD0, ZERO, (0<<12)|0x17dc/4, IMMED0, NONE,
+ ADD, R2, LOAD1, ZERO, 0, NONE, NONE),
+ MME_INSN(0, ADD, ZERO, LOAD0, ZERO, (1<<12)|0x238c/4, NONE, ALU0,
+ ADD, R4, LOAD1, ZERO, 256 + 160, IMMED0, IMMED1),
+ MME_INSN(0, ADD, ZERO, R4, ZERO, 0, NONE, ALU0,
+ ADD, R10, LOAD0, ZERO, 0, NONE, NONE),
+ MME_INSN(0, BEQ, ZERO, R2, ZERO, (2<<14)|0x0008, NONE, NONE,
+ ADD, ZERO, ZERO, ZERO, 0, NONE, NONE),
+// send(r10);
+// send(r6);
+// mthd(0x1434, 1);
+// send(r4);
+// send(r10);
+// mthd(0x1118, 0);
+// send(r4);
+// r1 &= ~(1 << 26);
+ MME_INSN(0, ADD, ZERO, R10, ZERO, 0, NONE, ALU0,
+ ADD, ZERO, R6, ZERO, 0, NONE, ALU1),
+ MME_INSN(0, ADD, ZERO, R4, ZERO, (1<<12)|0x1434/4, IMMED0, ALU0,
+ ADD, ZERO, R10, ZERO, 0, NONE, ALU1),
+ MME_INSN(0, ADD, ZERO, R4, ZERO, (0<<12)|0x1118/4, IMMED0, ALU0,
+ MERGE, R1, R1, ZERO, (26<<10)|(1<<5)|0, NONE, NONE),
+// do {
+// mthd(0x1618, 0);
+// send(r1);
+// mthd(0x17e0, 0);
+// send(r3);
+// mthd(0x1614, 0);
+// send(0);
+// r1 |= (1 << 26);
+// } while (--r2);
+// }
+ MME_INSN(0, ADD, ZERO, R1, ZERO, 0x1618/4, IMMED0, ALU0,
+ ADD, ZERO, R3, ZERO, 0x17e0/4, IMMED1, ALU1),
+ MME_INSN(0, ADD, ZERO, ZERO, ZERO, 0x1614/4, IMMED0, ALU0,
+ ADD, R4, IMMED, ZERO, 1, NONE, NONE),
+ MME_INSN(0, MERGE, R1, R1, R4, (26<<10)|(1<<5)|0, NONE, NONE,
+ SUB, R2, R2, IMMED, 1, NONE, NONE),
+ MME_INSN(0, BEQ, ZERO, R2, ZERO, (1<<14)|0x3ffd, NONE, NONE,
+ ADD, ZERO, ZERO, ZERO, 0, NONE, NONE),
+// r6 = r6 + 1;
+// }
+ MME_INSN(0, ADD, R6, R6, IMMED, 1, NONE, NONE,
+ ADD, ZERO, ZERO, ZERO, 0, NONE, NONE),
+// r7 = r7 - r5; // unneeded params
+// }
+ MME_INSN(0, SUB, R7, R7, R5, 0, NONE, NONE,
+ ADD, ZERO, ZERO, ZERO, 0, NONE, NONE),
+// while (HW_LOOP_COUNT < r7) {
+// r2 = load();
+// r2 = load();
+// r2 = load();
+// r2 = load();
+// r2 = load();
+// }
+ MME_INSN(0, LOOP, ZERO, R7, ZERO, 0x0004, NONE, NONE,
+ ADD, ZERO, ZERO, ZERO, 0, NONE, NONE),
+ MME_INSN(0, ADD, ZERO, LOAD0, ZERO, 0, NONE, NONE,
+ ADD, ZERO, LOAD1, ZERO, 0, NONE, NONE),
+ MME_INSN(0, ADD, ZERO, LOAD0, ZERO, 0, NONE, NONE,
+ ADD, ZERO, LOAD1, ZERO, 0, NONE, NONE),
+ MME_INSN(0, ADD, ZERO, LOAD0, ZERO, 0, NONE, NONE,
+ ADD, ZERO, ZERO, ZERO, 0, NONE, NONE),
+// mthd(0x1434, 1);
+// send(r8);
+// send(r9);
+// exit mthd(0x1118, 0);
+// send(r8);
+ MME_INSN(1, ADD, ZERO, R8, ZERO, (1<<12)|0x1434/4, IMMED0, ALU0,
+ ADD, ZERO, R9, ZERO, 0, NONE, ALU1),
+ MME_INSN(0, ADD, ZERO, R8, ZERO, (0<<12)|0x1118/4, IMMED0, ALU0,
+ ADD, ZERO, ZERO, ZERO, 0, NONE, NONE),
+};
+
+uint32_t mmec597_query_buffer_write[] = {
+// r1 = load(); // clamp value
+// r2 = load(); // end value (lo)
+// r3 = load(); // end value (hi)
+// r4 = load(); // start value (lo)
+// r5 = load(); // start value (hi)
+// r8 = load(); // desired sequence
+// r9 = load(); // actual sequence
+// r7 = load(); // query address (hi)
+// r6 = load(); // query address (lo)
+// if (r9 >= r8) {
+ MME_INSN(0, ADD, R1, LOAD0, ZERO, 0, NONE, NONE,
+ ADD, R2, LOAD1, ZERO, 0, NONE, NONE),
+ MME_INSN(0, ADD, R3, LOAD0, ZERO, 0, NONE, NONE,
+ ADD, R4, LOAD1, ZERO, 0, NONE, NONE),
+ MME_INSN(0, ADD, R5, LOAD0, ZERO, 0, NONE, NONE,
+ ADD, R8, LOAD1, ZERO, 0, NONE, NONE),
+ MME_INSN(0, ADD, R9, LOAD0, ZERO, 0, NONE, NONE,
+ ADD, R7, LOAD1, ZERO, 0, NONE, NONE),
+ MME_INSN(0, ADD, R6, LOAD0, ZERO, 0, NONE, NONE,
+ ADD, ZERO, ZERO, ZERO, 0, NONE, NONE),
+ MME_INSN(0, BLT, ZERO, R9, R8, (2<<14)|0x000e, NONE, NONE,
+ ADD, ZERO, ZERO, ZERO, 0, NONE, NONE),
+// [r3,r2] = [r3,r2] - [r5,r4];
+// if (r1) {
+ MME_INSN(0, SUB, R2, R2, R4, 0, NONE, NONE,
+ SUBB, R3, R3, R5, 0, NONE, NONE),
+ MME_INSN(0, BEQ, ZERO, R1, ZERO, (2<<14)|0x0004, NONE, NONE,
+ ADD, ZERO, ZERO, ZERO, 0, NONE, NONE),
+// if (r3 != 0 || r1 < r2)
+// r2 = r1;
+// }
+ MME_INSN(0, BEQ, ZERO, R3, ZERO, (1<<14)|0x0002, NONE, NONE,
+ ADD, ZERO, ZERO, ZERO, 0, NONE, NONE),
+ MME_INSN(0, BLTU, ZERO, R1, R2, (1<<14)|0x0002, NONE, NONE,
+ ADD, ZERO, ZERO, ZERO, 0, NONE, NONE),
+ MME_INSN(0, ADD, R2, R1, ZERO, 0, NONE, NONE,
+ ADD, ZERO, ZERO, ZERO, 0, NONE, NONE),
+// mthd(0x1b00, 1);
+// send(r7);
+// send(r6);
+// send(r2)
+// send(0x10000000);
+// if (!r1) {
+ MME_INSN(0, ADD, ZERO, R7, ZERO, (1<<12)|0x1b00/4, IMMED0, ALU0,
+ ADD, ZERO, R6, ZERO, 0, NONE, ALU1),
+ MME_INSN(0, ADD, ZERO, R2, ZERO, 0, NONE, ALU0,
+ ADD, ZERO, ZERO, ZERO, 0, NONE, NONE),
+ MME_INSN(0, ADD, ZERO, ZERO, ZERO, 0x1000, NONE, IMMED32_0,
+ ADD, ZERO, ZERO, ZERO, 0x0000, NONE, NONE),
+ MME_INSN(0, BEQ, ZERO, R1, ZERO, (1<<14)|0x0004, NONE, NONE,
+ ADD, ZERO, ZERO, ZERO, 0, NONE, NONE),
+// [r7,r6] = [r7,r6] + 4;
+// mthd(0x1b00, 1);
+// send(r7);
+// send(r6);
+// send(r3);
+// send(0x10000000);
+// }
+ MME_INSN(0, ADD, ZERO, R6, IMMED, 4, IMMED1, ALU1,
+ ADDC, ZERO, R7, ZERO, (1<<12)|0x1b00/4, NONE, ALU0),
+ MME_INSN(0, ADD, ZERO, R3, ZERO, 0, NONE, ALU0,
+ ADD, ZERO, ZERO, ZERO, 0, NONE, NONE),
+ MME_INSN(0, ADD, ZERO, ZERO, ZERO, 0x1000, NONE, IMMED32_0,
+ ADD, ZERO, ZERO, ZERO, 0x0000, NONE, NONE),
+// mthd(0x0110, 0);
+// send(0);
+ MME_INSN(0, ADD, ZERO, ZERO, ZERO, (0<<12)|0x0110/4, IMMED0, ALU0,
+ ADD, ZERO, ZERO, ZERO, 0, NONE, NONE),
+// }
+ MME_INSN(1, ADD, ZERO, ZERO, ZERO, 0, NONE, NONE,
+ ADD, ZERO, ZERO, ZERO, 0, NONE, NONE),
+ MME_INSN(0, ADD, ZERO, ZERO, ZERO, 0, NONE, NONE,
+ ADD, ZERO, ZERO, ZERO, 0, NONE, NONE),
+};
+
+uint32_t mmec597_conservative_raster_state[] = {
+// r1 = load();
+// mthd(0x3400, 1);
+// send(0);
+// send(((r1 >> 8) & 7) << 23);
+// send(0x03800000);
+// mthd(0x2310, 1);
+// send(0x00418800);
+// r2 = r1 & 0xf;
+// r3 = 16;
+// r2 = r2 | (((r1 >> 4) & 0xf) << 8);
+// mthd(0x0a1c, 8);
+ MME_INSN(0, ADD, R1, LOAD0, ZERO, (1<<12)|0x3400/4, IMMED0, IMMED1,
+ ADD, ZERO, ZERO, ZERO, 0, NONE, NONE),
+ MME_INSN(0, MERGE, ZERO, ZERO, R1, (23<<10)|(3<<5)|8, NONE, ALU0,
+ ADD, ZERO, ZERO, ZERO, 0, NONE, NONE),
+ MME_INSN(0, ADD, ZERO, ZERO, ZERO, 0x0380, NONE, IMMED32_0,
+ ADD, ZERO, ZERO, ZERO, 0x0000, NONE, NONE),
+ MME_INSN(0, ADD, ZERO, ZERO, ZERO, (1<<12)|0x2310/4, IMMED0, NONE,
+ ADD, ZERO, ZERO, ZERO, 0x0000, NONE, NONE),
+ MME_INSN(0, ADD, ZERO, ZERO, ZERO, 0x0041, NONE, IMMED32_0,
+ ADD, ZERO, ZERO, ZERO, 0x8800, NONE, NONE),
+ MME_INSN(0, AND, R2, R1, IMMED, 0xf, NONE, NONE,
+ ADD, R3, ZERO, IMMED, 16, NONE, NONE),
+ MME_INSN(0, MERGE, R2, R2, R1, (8<<10)|(4<<5)|4, IMMED1, NONE,
+ ADD, ZERO, ZERO, ZERO, (8<<12)|0x0a1c/4, NONE, NONE),
+// while (HW_LOOP_COUNT < r3)
+// send(r2);
+ MME_INSN(0, LOOP, ZERO, R3, ZERO, 0x0002, NONE, NONE,
+ ADD, ZERO, ZERO, ZERO, 0, NONE, NONE),
+ MME_INSN(0, ADD, ZERO, R2, ZERO, 0, NONE, ALU0,
+ ADD, ZERO, ZERO, ZERO, 0, NONE, NONE),
+// mthd(0x1148, 0);
+// send(1);
+ MME_INSN(1, ADD, ZERO, ZERO, ZERO, (0<<12)|0x1148/4, IMMED0, NONE,
+ ADD, ZERO, ZERO, ZERO, 0, NONE, NONE),
+ MME_INSN(0, ADD, ZERO, ZERO, ZERO, 1, NONE, IMMED1,
+ ADD, ZERO, ZERO, ZERO, 0, NONE, NONE),
+};
+
+uint32_t mmec597_compute_counter[] = {
+// r0 = load();
+// r1 = 1;
+// r2 = 0;
+// while (HW_LOOP_COUNT < r2) {
+ MME_INSN(0, ADD, R0, LOAD0, ZERO, 0, NONE, NONE,
+ ADD, R1, IMMED, ZERO, 1, NONE, NONE),
+ MME_INSN(0, LOOP, ZERO, R0, ZERO, 0x0003, NONE, NONE,
+ ADD, R2, ZERO, ZERO, 0, NONE, NONE),
+// r3 = load();
+// [r1,r0] *= r3;
+// }
+ MME_INSN(0, ADD, R3, LOAD0, ZERO, 0, NONE, NONE,
+ ADD, ZERO, ZERO, ZERO, 0, NONE, NONE),
+ MME_INSN(0, MULU, R1, R1, R3, 0, NONE, NONE,
+ MULH, R2, ZERO, ZERO, 0, NONE, NONE),
+// r3 = read(0x3410);
+// r4 = read(0x3414);
+// [r4,r3] += [r2,r1];
+// mthd(0x3410, 1);
+// send(r3);
+// send(r4);
+ MME_INSN(0, STATE, ZERO, ZERO, ZERO, 0x3410/4, NONE, NONE,
+ ADD, ZERO, ZERO, ZERO, 0, NONE, NONE),
+ MME_INSN(1, STATE, ZERO, ZERO, ZERO, 0x3414/4, NONE, NONE,
+ ADD, ZERO, ZERO, ZERO, 0, NONE, NONE),
+ MME_INSN(0, ADD, R3, R3, R1, (1<<12)|0x3410/4, IMMED0, ALU0,
+ ADDC, R4, R4, R2, 0, NONE, ALU1),
+};
+
+uint32_t mmec597_compute_counter_to_query[] = {
+// r1 = load();
+// r3 = read(0x3410);
+// r2 = load();
+// r4 = read(0x3414);
+// [r2,r1] = [r2,r1] + [r4,r3];
+// mthd(0x1b00, 1);
+// r3 = load();
+// send(r3);
+// r4 = load();
+// send(r4);
+// send(r1);
+// send(0x10000000);
+ MME_INSN(0, ADD, R1, LOAD0, ZERO, 0, NONE, NONE,
+ STATE, R3, IMMED, ZERO, 0x3410/4, NONE, NONE),
+ MME_INSN(0, ADD, R2, LOAD0, ZERO, 0, NONE, NONE,
+ STATE, R4, IMMED, ZERO, 0x3414/4, NONE, NONE),
+ MME_INSN(0, ADD, R1, R1, R3, (1<<12)|0x1b00/4, IMMED0, NONE,
+ ADDC, R2, R2, R4, 0, NONE, NONE),
+ MME_INSN(0, ADD, R3, LOAD0, ZERO, 0, NONE, ALU0,
+ ADD, R4, LOAD1, ZERO, 0, NONE, ALU1),
+ MME_INSN(0, ADD, ZERO, R1, ZERO, 0, NONE, ALU0,
+ ADD, ZERO, ZERO, ZERO, 0, NONE, NONE),
+ MME_INSN(0, ADD, ZERO, ZERO, ZERO, 0x1000, NONE, IMMED32_0,
+ ADD, ZERO, ZERO, ZERO, 0x0000, NONE, NONE),
+// [r3,r4] = [r3,r4] + 4;
+// mthd(0x1b00, 1);
+// send(r3);
+// send(r4);
+// send(r2);
+// send(0x10000000);
+ MME_INSN(0, ADD, ZERO, R4, IMMED, 4, IMMED1, ALU1,
+ ADDC, ZERO, R3, ZERO, (1<<12)|0x1b00/4, NONE, ALU0),
+ MME_INSN(1, ADD, ZERO, R2, ZERO, 0, NONE, ALU0,
+ ADD, ZERO, ZERO, ZERO, 0, NONE, NONE),
+ MME_INSN(0, ADD, ZERO, ZERO, ZERO, 0x1000, NONE, IMMED32_0,
+ ADD, ZERO, ZERO, ZERO, 0x0000, NONE, NONE),
+};
diff --git a/src/gallium/drivers/nouveau/nvc0/nvc0_3d.xml.h b/src/gallium/drivers/nouveau/nvc0/nvc0_3d.xml.h
index 1c5a8dc0b1f..539bdc75022 100644
--- a/src/gallium/drivers/nouveau/nvc0/nvc0_3d.xml.h
+++ b/src/gallium/drivers/nouveau/nvc0/nvc0_3d.xml.h
@@ -157,6 +157,12 @@ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
#define NVC0_3D_UNK0220__ESIZE 0x00000004
#define NVC0_3D_UNK0220__LEN 0x00000028
+#define TU102_3D_INDEX_ARRAY_LIMIT_HIGH 0x00000238
+
+#define TU102_3D_INDEX_ARRAY_LIMIT_LOW 0x0000023c
+
+#define TU102_3D_SET_COLOR_RENDER_TO_ZETA_SURFACE 0x000002b8
+
#define NVC0_3D_UNK02C0 0x000002c0
#define NVC0_3D_UNK02C4 0x000002c4
@@ -278,6 +284,9 @@ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
#define NVC0_3D_UNK0400__ESIZE 0x00000004
#define NVC0_3D_UNK0400__LEN 0x000000c0
+#define TU102_3D_VERTEX_ARRAY_LIMIT_HIGH(i0) (0x00000600 + 0x8*(i0))
+#define TU102_3D_VERTEX_ARRAY_LIMIT_LOW(i0) (0x00000604 + 0x8*(i0))
+
#define NVC0_3D_TFB_STREAM(i0) (0x00000700 + 0x10*(i0))
#define NVC0_3D_TFB_STREAM__ESIZE 0x00000010
#define NVC0_3D_TFB_STREAM__LEN 0x00000004
diff --git a/src/gallium/drivers/nouveau/nvc0/nvc0_miptree.c b/src/gallium/drivers/nouveau/nvc0/nvc0_miptree.c
index 578335d7001..a095515e48d 100644
--- a/src/gallium/drivers/nouveau/nvc0/nvc0_miptree.c
+++ b/src/gallium/drivers/nouveau/nvc0/nvc0_miptree.c
@@ -38,6 +38,55 @@ nvc0_tex_choose_tile_dims(unsigned nx, unsigned ny, unsigned nz, bool is_3d)
}
static uint32_t
+tu102_mt_choose_storage_type(struct nv50_miptree *mt, bool compressed)
+{
+ uint32_t kind;
+
+ if (unlikely(mt->base.base.bind & PIPE_BIND_CURSOR))
+ return 0;
+ if (unlikely(mt->base.base.flags & NOUVEAU_RESOURCE_FLAG_LINEAR))
+ return 0;
+
+ switch (mt->base.base.format) {
+ case PIPE_FORMAT_Z16_UNORM:
+ if (compressed)
+ kind = 0x0b; // NV_MMU_PTE_KIND_Z16_COMPRESSIBLE_DISABLE_PLC
+ else
+ kind = 0x01; // NV_MMU_PTE_KIND_Z16
+ break;
+ case PIPE_FORMAT_X8Z24_UNORM:
+ case PIPE_FORMAT_S8X24_UINT:
+ case PIPE_FORMAT_S8_UINT_Z24_UNORM:
+ if (compressed)
+ kind = 0x0e; // NV_MMU_PTE_KIND_Z24S8_COMPRESSIBLE_DISABLE_PLC
+ else
+ kind = 0x05; // NV_MMU_PTE_KIND_Z24S8
+ break;
+ case PIPE_FORMAT_X24S8_UINT:
+ case PIPE_FORMAT_Z24X8_UNORM:
+ case PIPE_FORMAT_Z24_UNORM_S8_UINT:
+ if (compressed)
+ kind = 0x0c; // NV_MMU_PTE_KIND_S8Z24_COMPRESSIBLE_DISABLE_PLC
+ else
+ kind = 0x03; // NV_MMU_PTE_KIND_S8Z24
+ break;
+ case PIPE_FORMAT_X32_S8X24_UINT:
+ case PIPE_FORMAT_Z32_FLOAT_S8X24_UINT:
+ if (compressed)
+ kind = 0x0d; // NV_MMU_PTE_KIND_ZF32_X24S8_COMPRESSIBLE_DISABLE_PLC
+ else
+ kind = 0x04; // NV_MMU_PTE_KIND_ZF32_X24S8
+ break;
+ case PIPE_FORMAT_Z32_FLOAT:
+ default:
+ kind = 0x06;
+ break;
+ }
+
+ return kind;
+}
+
+static uint32_t
nvc0_mt_choose_storage_type(struct nv50_miptree *mt, bool compressed)
{
const unsigned ms = util_logbase2(mt->base.base.nr_samples);
@@ -357,7 +406,10 @@ nvc0_miptree_create(struct pipe_screen *pscreen,
if (pt->bind & PIPE_BIND_LINEAR)
pt->flags |= NOUVEAU_RESOURCE_FLAG_LINEAR;
- bo_config.nvc0.memtype = nvc0_mt_choose_storage_type(mt, compressed);
+ if (dev->chipset < 0x160)
+ bo_config.nvc0.memtype = nvc0_mt_choose_storage_type(mt, compressed);
+ else
+ bo_config.nvc0.memtype = tu102_mt_choose_storage_type(mt, compressed);
if (!nvc0_miptree_init_ms_mode(mt)) {
FREE(mt);
diff --git a/src/gallium/drivers/nouveau/nvc0/nvc0_program.c b/src/gallium/drivers/nouveau/nvc0/nvc0_program.c
index b9fff341f28..d2b2de47c8d 100644
--- a/src/gallium/drivers/nouveau/nvc0/nvc0_program.c
+++ b/src/gallium/drivers/nouveau/nvc0/nvc0_program.c
@@ -737,7 +737,14 @@ nvc0_program_alloc_code(struct nvc0_context *nvc0, struct nvc0_program *prog)
struct nvc0_screen *screen = nvc0->screen;
const bool is_cp = prog->type == PIPE_SHADER_COMPUTE;
int ret;
- uint32_t size = prog->code_size + (is_cp ? 0 : NVC0_SHADER_HEADER_SIZE);
+ uint32_t size = prog->code_size;
+
+ if (!is_cp) {
+ if (screen->eng3d->oclass < TU102_3D_CLASS)
+ size += GF100_SHADER_HEADER_SIZE;
+ else
+ size += TU102_SHADER_HEADER_SIZE;
+ }
/* On Fermi, SP_START_ID must be aligned to 0x40.
* On Kepler, the first instruction must be aligned to 0x80 because
@@ -753,7 +760,8 @@ nvc0_program_alloc_code(struct nvc0_context *nvc0, struct nvc0_program *prog)
prog->code_base = prog->mem->start;
if (!is_cp) {
- if (screen->base.class_3d >= NVE4_3D_CLASS) {
+ if (screen->base.class_3d >= NVE4_3D_CLASS &&
+ screen->base.class_3d < TU102_3D_CLASS) {
switch (prog->mem->start & 0xff) {
case 0x40: prog->code_base += 0x70; break;
case 0x80: prog->code_base += 0x30; break;
@@ -780,7 +788,16 @@ nvc0_program_upload_code(struct nvc0_context *nvc0, struct nvc0_program *prog)
{
struct nvc0_screen *screen = nvc0->screen;
const bool is_cp = prog->type == PIPE_SHADER_COMPUTE;
- uint32_t code_pos = prog->code_base + (is_cp ? 0 : NVC0_SHADER_HEADER_SIZE);
+ uint32_t code_pos = prog->code_base;
+ uint32_t size_sph = 0;
+
+ if (!is_cp) {
+ if (screen->eng3d->oclass < TU102_3D_CLASS)
+ size_sph = GF100_SHADER_HEADER_SIZE;
+ else
+ size_sph = TU102_SHADER_HEADER_SIZE;
+ }
+ code_pos += size_sph;
if (prog->relocs)
nv50_ir_relocate_code(prog->relocs, prog->code, code_pos,
@@ -806,8 +823,7 @@ nvc0_program_upload_code(struct nvc0_context *nvc0, struct nvc0_program *prog)
if (!is_cp)
nvc0->base.push_data(&nvc0->base, screen->text, prog->code_base,
- NV_VRAM_DOMAIN(&screen->base),
- NVC0_SHADER_HEADER_SIZE, prog->hdr);
+ NV_VRAM_DOMAIN(&screen->base), size_sph, prog->hdr);
nvc0->base.push_data(&nvc0->base, screen->text, code_pos,
NV_VRAM_DOMAIN(&screen->base), prog->code_size,
@@ -820,7 +836,14 @@ nvc0_program_upload(struct nvc0_context *nvc0, struct nvc0_program *prog)
struct nvc0_screen *screen = nvc0->screen;
const bool is_cp = prog->type == PIPE_SHADER_COMPUTE;
int ret;
- uint32_t size = prog->code_size + (is_cp ? 0 : NVC0_SHADER_HEADER_SIZE);
+ uint32_t size = prog->code_size;
+
+ if (!is_cp) {
+ if (screen->eng3d->oclass < TU102_3D_CLASS)
+ size += GF100_SHADER_HEADER_SIZE;
+ else
+ size += TU102_SHADER_HEADER_SIZE;
+ }
ret = nvc0_program_alloc_code(nvc0, prog);
if (ret) {
@@ -955,7 +978,7 @@ nvc0_program_symbol_offset(const struct nvc0_program *prog, uint32_t label)
unsigned base = 0;
unsigned i;
if (prog->type != PIPE_SHADER_COMPUTE)
- base = NVC0_SHADER_HEADER_SIZE;
+ base = GF100_SHADER_HEADER_SIZE;
for (i = 0; i < prog->cp.num_syms; ++i)
if (syms[i].label == label)
return prog->code_base + base + syms[i].offset;
diff --git a/src/gallium/drivers/nouveau/nvc0/nvc0_program.h b/src/gallium/drivers/nouveau/nvc0/nvc0_program.h
index 6e965ae9d9e..2c465b342e9 100644
--- a/src/gallium/drivers/nouveau/nvc0/nvc0_program.h
+++ b/src/gallium/drivers/nouveau/nvc0/nvc0_program.h
@@ -15,7 +15,9 @@ struct nvc0_transform_feedback_state {
};
-#define NVC0_SHADER_HEADER_SIZE (20 * 4)
+#define GF100_SHADER_HEADER_SIZE (20 * 4)
+#define TU102_SHADER_HEADER_SIZE (32 * 4)
+#define NVC0_MAX_SHADER_HEADER_SIZE TU102_SHADER_HEADER_SIZE
struct nvc0_program {
struct pipe_shader_state pipe;
@@ -30,7 +32,7 @@ struct nvc0_program {
unsigned code_size;
unsigned parm_size; /* size of non-bindable uniforms (c0[]) */
- uint32_t hdr[20];
+ uint32_t hdr[NVC0_MAX_SHADER_HEADER_SIZE/4];
uint32_t flags[2];
struct {
diff --git a/src/gallium/drivers/nouveau/nvc0/nvc0_screen.c b/src/gallium/drivers/nouveau/nvc0/nvc0_screen.c
index 2807b59a4fd..073b44dc79f 100644
--- a/src/gallium/drivers/nouveau/nvc0/nvc0_screen.c
+++ b/src/gallium/drivers/nouveau/nvc0/nvc0_screen.c
@@ -37,6 +37,7 @@
#include "nvc0/mme/com9097.mme.h"
#include "nvc0/mme/com90c0.mme.h"
+#include "nvc0/mme/comc597.mme.h"
#include "nv50/g80_texture.xml.h"
@@ -728,6 +729,26 @@ nvc0_graph_set_macro(struct nvc0_screen *screen, uint32_t m, unsigned pos,
return pos + size;
}
+static int
+tu102_graph_set_macro(struct nvc0_screen *screen, uint32_t m, unsigned pos,
+ unsigned size, const uint32_t *data)
+{
+ struct nouveau_pushbuf *push = screen->base.pushbuf;
+
+ size /= 4;
+
+ assert((pos + size) <= 0x800);
+
+ BEGIN_NVC0(push, SUBC_3D(NVC0_GRAPH_MACRO_ID), 2);
+ PUSH_DATA (push, (m - 0x3800) / 8);
+ PUSH_DATA (push, pos);
+ BEGIN_1IC0(push, SUBC_3D(NVC0_GRAPH_MACRO_UPLOAD_POS), size + 1);
+ PUSH_DATA (push, pos);
+ PUSH_DATAp(push, data, size);
+
+ return pos + (size / 3);
+}
+
static void
nvc0_magic_3d_init(struct nouveau_pushbuf *push, uint16_t obj_class)
{
@@ -838,6 +859,7 @@ nvc0_screen_init_compute(struct nvc0_screen *screen)
case 0x120:
case 0x130:
case 0x140:
+ case 0x160:
return nve4_screen_compute_setup(screen, screen->base.pushbuf);
default:
return -1;
@@ -997,6 +1019,7 @@ nvc0_screen_create(struct nouveau_device *dev)
case 0x120:
case 0x130:
case 0x140:
+ case 0x160:
break;
default:
return NULL;
@@ -1074,6 +1097,7 @@ nvc0_screen_create(struct nouveau_device *dev)
}
switch (dev->chipset & ~0xf) {
+ case 0x160:
case 0x140:
case 0x130:
case 0x120:
@@ -1128,6 +1152,9 @@ nvc0_screen_create(struct nouveau_device *dev)
PUSH_DATA (push, screen->fence.bo->offset + 16);
switch (dev->chipset & ~0xf) {
+ case 0x160:
+ obj_class = TU102_3D_CLASS;
+ break;
case 0x140:
obj_class = GV100_3D_CLASS;
break;
@@ -1378,25 +1405,47 @@ nvc0_screen_create(struct nouveau_device *dev)
PUSH_DATA (push, 16384 << 16);
}
+ if (screen->eng3d->oclass < TU102_3D_CLASS) {
#define MK_MACRO(m, n) i = nvc0_graph_set_macro(screen, m, i, sizeof(n), n);
- i = 0;
- MK_MACRO(NVC0_3D_MACRO_VERTEX_ARRAY_PER_INSTANCE, mme9097_per_instance_bf);
- MK_MACRO(NVC0_3D_MACRO_BLEND_ENABLES, mme9097_blend_enables);
- MK_MACRO(NVC0_3D_MACRO_VERTEX_ARRAY_SELECT, mme9097_vertex_array_select);
- MK_MACRO(NVC0_3D_MACRO_TEP_SELECT, mme9097_tep_select);
- MK_MACRO(NVC0_3D_MACRO_GP_SELECT, mme9097_gp_select);
- MK_MACRO(NVC0_3D_MACRO_POLYGON_MODE_FRONT, mme9097_poly_mode_front);
- MK_MACRO(NVC0_3D_MACRO_POLYGON_MODE_BACK, mme9097_poly_mode_back);
- MK_MACRO(NVC0_3D_MACRO_DRAW_ARRAYS_INDIRECT, mme9097_draw_arrays_indirect);
- MK_MACRO(NVC0_3D_MACRO_DRAW_ELEMENTS_INDIRECT, mme9097_draw_elts_indirect);
- MK_MACRO(NVC0_3D_MACRO_DRAW_ARRAYS_INDIRECT_COUNT, mme9097_draw_arrays_indirect_count);
- MK_MACRO(NVC0_3D_MACRO_DRAW_ELEMENTS_INDIRECT_COUNT, mme9097_draw_elts_indirect_count);
- MK_MACRO(NVC0_3D_MACRO_QUERY_BUFFER_WRITE, mme9097_query_buffer_write);
- MK_MACRO(NVC0_3D_MACRO_CONSERVATIVE_RASTER_STATE, mme9097_conservative_raster_state);
- MK_MACRO(NVC0_3D_MACRO_COMPUTE_COUNTER, mme9097_compute_counter);
- MK_MACRO(NVC0_3D_MACRO_COMPUTE_COUNTER_TO_QUERY, mme9097_compute_counter_to_query);
- MK_MACRO(NVC0_CP_MACRO_LAUNCH_GRID_INDIRECT, mme90c0_launch_grid_indirect);
+ i = 0;
+ MK_MACRO(NVC0_3D_MACRO_VERTEX_ARRAY_PER_INSTANCE, mme9097_per_instance_bf);
+ MK_MACRO(NVC0_3D_MACRO_BLEND_ENABLES, mme9097_blend_enables);
+ MK_MACRO(NVC0_3D_MACRO_VERTEX_ARRAY_SELECT, mme9097_vertex_array_select);
+ MK_MACRO(NVC0_3D_MACRO_TEP_SELECT, mme9097_tep_select);
+ MK_MACRO(NVC0_3D_MACRO_GP_SELECT, mme9097_gp_select);
+ MK_MACRO(NVC0_3D_MACRO_POLYGON_MODE_FRONT, mme9097_poly_mode_front);
+ MK_MACRO(NVC0_3D_MACRO_POLYGON_MODE_BACK, mme9097_poly_mode_back);
+ MK_MACRO(NVC0_3D_MACRO_DRAW_ARRAYS_INDIRECT, mme9097_draw_arrays_indirect);
+ MK_MACRO(NVC0_3D_MACRO_DRAW_ELEMENTS_INDIRECT, mme9097_draw_elts_indirect);
+ MK_MACRO(NVC0_3D_MACRO_DRAW_ARRAYS_INDIRECT_COUNT, mme9097_draw_arrays_indirect_count);
+ MK_MACRO(NVC0_3D_MACRO_DRAW_ELEMENTS_INDIRECT_COUNT, mme9097_draw_elts_indirect_count);
+ MK_MACRO(NVC0_3D_MACRO_QUERY_BUFFER_WRITE, mme9097_query_buffer_write);
+ MK_MACRO(NVC0_3D_MACRO_CONSERVATIVE_RASTER_STATE, mme9097_conservative_raster_state);
+ MK_MACRO(NVC0_3D_MACRO_COMPUTE_COUNTER, mme9097_compute_counter);
+ MK_MACRO(NVC0_3D_MACRO_COMPUTE_COUNTER_TO_QUERY, mme9097_compute_counter_to_query);
+ MK_MACRO(NVC0_CP_MACRO_LAUNCH_GRID_INDIRECT, mme90c0_launch_grid_indirect);
+ } else {
+#undef MK_MACRO
+#define MK_MACRO(m, n) i = tu102_graph_set_macro(screen, m, i, sizeof(n), n);
+
+ i = 0;
+ MK_MACRO(NVC0_3D_MACRO_VERTEX_ARRAY_PER_INSTANCE, mmec597_per_instance_bf);
+ MK_MACRO(NVC0_3D_MACRO_BLEND_ENABLES, mmec597_blend_enables);
+ MK_MACRO(NVC0_3D_MACRO_VERTEX_ARRAY_SELECT, mmec597_vertex_array_select);
+ MK_MACRO(NVC0_3D_MACRO_TEP_SELECT, mmec597_tep_select);
+ MK_MACRO(NVC0_3D_MACRO_GP_SELECT, mmec597_gp_select);
+ MK_MACRO(NVC0_3D_MACRO_POLYGON_MODE_FRONT, mmec597_poly_mode_front);
+ MK_MACRO(NVC0_3D_MACRO_POLYGON_MODE_BACK, mmec597_poly_mode_back);
+ MK_MACRO(NVC0_3D_MACRO_DRAW_ARRAYS_INDIRECT, mmec597_draw_arrays_indirect);
+ MK_MACRO(NVC0_3D_MACRO_DRAW_ELEMENTS_INDIRECT, mmec597_draw_elts_indirect);
+ MK_MACRO(NVC0_3D_MACRO_DRAW_ARRAYS_INDIRECT_COUNT, mmec597_draw_arrays_indirect_count);
+ MK_MACRO(NVC0_3D_MACRO_DRAW_ELEMENTS_INDIRECT_COUNT, mmec597_draw_elts_indirect_count);
+ MK_MACRO(NVC0_3D_MACRO_QUERY_BUFFER_WRITE, mmec597_query_buffer_write);
+ MK_MACRO(NVC0_3D_MACRO_CONSERVATIVE_RASTER_STATE, mmec597_conservative_raster_state);
+ MK_MACRO(NVC0_3D_MACRO_COMPUTE_COUNTER, mmec597_compute_counter);
+ MK_MACRO(NVC0_3D_MACRO_COMPUTE_COUNTER_TO_QUERY, mmec597_compute_counter_to_query);
+ }
BEGIN_NVC0(push, NVC0_3D(RASTERIZE_ENABLE), 1);
PUSH_DATA (push, 1);
diff --git a/src/gallium/drivers/nouveau/nvc0/nvc0_surface.c b/src/gallium/drivers/nouveau/nvc0/nvc0_surface.c
index 0bba3ada8da..731b0b5dbf8 100644
--- a/src/gallium/drivers/nouveau/nvc0/nvc0_surface.c
+++ b/src/gallium/drivers/nouveau/nvc0/nvc0_surface.c
@@ -140,6 +140,11 @@ nvc0_2d_texture_set(struct nouveau_pushbuf *push, bool dst,
PUSH_DATA (push, bo->offset + offset);
}
+ if (dst) {
+ IMMED_NVC0(push, SUBC_2D(NVC0_2D_SET_DST_COLOR_RENDER_TO_ZETA_SURFACE),
+ util_format_is_depth_or_stencil(pformat));
+ }
+
#if 0
if (dst) {
BEGIN_NVC0(push, SUBC_2D(NVC0_2D_CLIP_X), 4);
@@ -1233,6 +1238,11 @@ nvc0_blit_3d(struct nvc0_context *nvc0, const struct pipe_blit_info *info)
}
}
+ if (screen->eng3d->oclass >= TU102_3D_CLASS) {
+ IMMED_NVC0(push, SUBC_3D(TU102_3D_SET_COLOR_RENDER_TO_ZETA_SURFACE),
+ util_format_is_depth_or_stencil(info->dst.format));
+ }
+
IMMED_NVC0(push, NVC0_3D(VIEWPORT_TRANSFORM_EN), 0);
IMMED_NVC0(push, NVC0_3D(VIEW_VOLUME_CLIP_CTRL), 0x2 |
NVC0_3D_VIEW_VOLUME_CLIP_CTRL_DEPTH_RANGE_0_1);
@@ -1293,7 +1303,10 @@ nvc0_blit_3d(struct nvc0_context *nvc0, const struct pipe_blit_info *info)
PUSH_DATAh(push, vtxbuf);
PUSH_DATA (push, vtxbuf);
PUSH_DATA (push, 0);
- BEGIN_NVC0(push, NVC0_3D(VERTEX_ARRAY_LIMIT_HIGH(0)), 2);
+ if (screen->eng3d->oclass < TU102_3D_CLASS)
+ BEGIN_NVC0(push, NVC0_3D(VERTEX_ARRAY_LIMIT_HIGH(0)), 2);
+ else
+ BEGIN_NVC0(push, SUBC_3D(TU102_3D_VERTEX_ARRAY_LIMIT_HIGH(0)), 2);
PUSH_DATAh(push, vtxbuf + length - 1);
PUSH_DATA (push, vtxbuf + length - 1);
@@ -1370,6 +1383,8 @@ nvc0_blit_3d(struct nvc0_context *nvc0, const struct pipe_blit_info *info)
/* restore viewport transform */
IMMED_NVC0(push, NVC0_3D(VIEWPORT_TRANSFORM_EN), 1);
+ if (screen->eng3d->oclass >= TU102_3D_CLASS)
+ IMMED_NVC0(push, SUBC_3D(TU102_3D_SET_COLOR_RENDER_TO_ZETA_SURFACE), 0);
}
static void
diff --git a/src/gallium/drivers/nouveau/nvc0/nvc0_vbo.c b/src/gallium/drivers/nouveau/nvc0/nvc0_vbo.c
index 92bd7eb5b8e..8287d8431b1 100644
--- a/src/gallium/drivers/nouveau/nvc0/nvc0_vbo.c
+++ b/src/gallium/drivers/nouveau/nvc0/nvc0_vbo.c
@@ -360,7 +360,11 @@ nvc0_validate_vertex_buffers(struct nvc0_context *nvc0)
PUSH_DATAh(push, res->address + offset);
PUSH_DATA (push, res->address + offset);
}
- BEGIN_NVC0(push, NVC0_3D(VERTEX_ARRAY_LIMIT_HIGH(i)), 2);
+
+ if (nvc0->screen->eng3d->oclass < TU102_3D_CLASS)
+ BEGIN_NVC0(push, NVC0_3D(VERTEX_ARRAY_LIMIT_HIGH(i)), 2);
+ else
+ BEGIN_NVC0(push, SUBC_3D(TU102_3D_VERTEX_ARRAY_LIMIT_HIGH(i)), 2);
PUSH_DATAh(push, res->address + limit);
PUSH_DATA (push, res->address + limit);
@@ -406,7 +410,11 @@ nvc0_validate_vertex_buffers_shared(struct nvc0_context *nvc0)
PUSH_DATA (push, NVC0_3D_VERTEX_ARRAY_FETCH_ENABLE | vb->stride);
PUSH_DATAh(push, buf->address + offset);
PUSH_DATA (push, buf->address + offset);
- BEGIN_NVC0(push, NVC0_3D(VERTEX_ARRAY_LIMIT_HIGH(b)), 2);
+
+ if (nvc0->screen->eng3d->oclass < TU102_3D_CLASS)
+ BEGIN_NVC0(push, NVC0_3D(VERTEX_ARRAY_LIMIT_HIGH(b)), 2);
+ else
+ BEGIN_NVC0(push, SUBC_3D(TU102_3D_VERTEX_ARRAY_LIMIT_HIGH(b)), 2);
PUSH_DATAh(push, buf->address + limit);
PUSH_DATA (push, buf->address + limit);
@@ -961,12 +969,23 @@ nvc0_draw_vbo(struct pipe_context *pipe, const struct pipe_draw_info *info)
assert(nouveau_resource_mapped_by_gpu(&buf->base));
PUSH_SPACE(push, 6);
- BEGIN_NVC0(push, NVC0_3D(INDEX_ARRAY_START_HIGH), 5);
- PUSH_DATAh(push, buf->address);
- PUSH_DATA (push, buf->address);
- PUSH_DATAh(push, buf->address + buf->base.width0 - 1);
- PUSH_DATA (push, buf->address + buf->base.width0 - 1);
- PUSH_DATA (push, info->index_size >> 1);
+ if (nvc0->screen->eng3d->oclass < TU102_3D_CLASS) {
+ BEGIN_NVC0(push, NVC0_3D(INDEX_ARRAY_START_HIGH), 5);
+ PUSH_DATAh(push, buf->address);
+ PUSH_DATA (push, buf->address);
+ PUSH_DATAh(push, buf->address + buf->base.width0 - 1);
+ PUSH_DATA (push, buf->address + buf->base.width0 - 1);
+ PUSH_DATA (push, info->index_size >> 1);
+ } else {
+ BEGIN_NVC0(push, NVC0_3D(INDEX_ARRAY_START_HIGH), 2);
+ PUSH_DATAh(push, buf->address);
+ PUSH_DATA (push, buf->address);
+ BEGIN_NVC0(push, SUBC_3D(TU102_3D_INDEX_ARRAY_LIMIT_HIGH), 2);
+ PUSH_DATAh(push, buf->address + buf->base.width0 - 1);
+ PUSH_DATA (push, buf->address + buf->base.width0 - 1);
+ BEGIN_NVC0(push, NVC0_3D(INDEX_FORMAT), 1);
+ PUSH_DATA (push, info->index_size >> 1);
+ }
BCTX_REFN(nvc0->bufctx_3d, 3D_IDX, buf, RD);
}
diff --git a/src/gallium/drivers/nouveau/nvc0/nvc0_vbo_translate.c b/src/gallium/drivers/nouveau/nvc0/nvc0_vbo_translate.c
index 8aa7088dfec..d49a5dfd2cf 100644
--- a/src/gallium/drivers/nouveau/nvc0/nvc0_vbo_translate.c
+++ b/src/gallium/drivers/nouveau/nvc0/nvc0_vbo_translate.c
@@ -228,7 +228,11 @@ nvc0_push_setup_vertex_array(struct nvc0_context *nvc0, const unsigned count)
BEGIN_NVC0(push, NVC0_3D(VERTEX_ARRAY_START_HIGH(0)), 2);
PUSH_DATAh(push, va);
PUSH_DATA (push, va);
- BEGIN_NVC0(push, NVC0_3D(VERTEX_ARRAY_LIMIT_HIGH(0)), 2);
+
+ if (nvc0->screen->eng3d->oclass < TU102_3D_CLASS)
+ BEGIN_NVC0(push, NVC0_3D(VERTEX_ARRAY_LIMIT_HIGH(0)), 2);
+ else
+ BEGIN_NVC0(push, SUBC_3D(TU102_3D_VERTEX_ARRAY_LIMIT_HIGH(0)), 2);
PUSH_DATAh(push, va + size - 1);
PUSH_DATA (push, va + size - 1);
@@ -771,7 +775,11 @@ nvc0_push_upload_vertex_ids(struct push_context *ctx,
PUSH_DATA (push, NVC0_3D_VERTEX_ARRAY_FETCH_ENABLE | index_size);
PUSH_DATAh(push, va);
PUSH_DATA (push, va);
- BEGIN_NVC0(push, NVC0_3D(VERTEX_ARRAY_LIMIT_HIGH(1)), 2);
+
+ if (nvc0->screen->eng3d->oclass < TU102_3D_CLASS)
+ BEGIN_NVC0(push, NVC0_3D(VERTEX_ARRAY_LIMIT_HIGH(1)), 2);
+ else
+ BEGIN_NVC0(push, SUBC_3D(TU102_3D_VERTEX_ARRAY_LIMIT_HIGH(1)), 2);
PUSH_DATAh(push, va + info->count * index_size - 1);
PUSH_DATA (push, va + info->count * index_size - 1);
diff --git a/src/gallium/drivers/nouveau/nvc0/nve4_compute.c b/src/gallium/drivers/nouveau/nvc0/nve4_compute.c
index 3a3f0a926de..d4687b652ba 100644
--- a/src/gallium/drivers/nouveau/nvc0/nve4_compute.c
+++ b/src/gallium/drivers/nouveau/nvc0/nve4_compute.c
@@ -52,6 +52,9 @@ nve4_screen_compute_setup(struct nvc0_screen *screen,
uint64_t address;
switch (dev->chipset & ~0xf) {
+ case 0x160:
+ obj_class = TU102_COMPUTE_CLASS;
+ break;
case 0x140:
obj_class = GV100_COMPUTE_CLASS;
break;
diff --git a/src/gallium/winsys/nouveau/drm/nouveau_drm_winsys.c b/src/gallium/winsys/nouveau/drm/nouveau_drm_winsys.c
index 2dbe7be0211..d123c8a1c17 100644
--- a/src/gallium/winsys/nouveau/drm/nouveau_drm_winsys.c
+++ b/src/gallium/winsys/nouveau/drm/nouveau_drm_winsys.c
@@ -105,6 +105,7 @@ nouveau_drm_screen_create(int fd)
case 0x120:
case 0x130:
case 0x140:
+ case 0x160:
init = nvc0_screen_create;
break;
default: