summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorChristoph Bumiller <e0425955@student.tuwien.ac.at>2012-04-14 23:56:56 +0200
committerChristoph Bumiller <e0425955@student.tuwien.ac.at>2012-04-15 00:08:51 +0200
commite44089b2f79aa2dcaacf348911433d1e21235c0c (patch)
tree955d621392f0068ef8e3c98dc46195ff3916525e
parent69a921892d2303f1400576aa73980c28880f8654 (diff)
nvc0: add initial support for nve4+ (Kepler) chipsets
Most things that work on Fermi should work on Kepler too. There are a few performance optimizations left to do, like better placement of texture barriers and adding scheduling data to the shader instructions (without them, a thread group will be masked for 32 cycles after each single instruction issue).
-rw-r--r--src/gallium/drivers/nouveau/nouveau_screen.h2
-rw-r--r--src/gallium/drivers/nouveau/nv_object.xml.h3
-rw-r--r--src/gallium/drivers/nv50/codegen/nv50_ir.h2
-rw-r--r--src/gallium/drivers/nv50/codegen/nv50_ir_driver.h2
-rw-r--r--src/gallium/drivers/nv50/codegen/nv50_ir_from_tgsi.cpp6
-rw-r--r--src/gallium/drivers/nv50/codegen/nv50_ir_inlines.h1
-rw-r--r--src/gallium/drivers/nv50/codegen/nv50_ir_print.cpp1
-rw-r--r--src/gallium/drivers/nv50/codegen/nv50_ir_target.cpp3
-rw-r--r--src/gallium/drivers/nv50/nv50_screen.c1
-rw-r--r--src/gallium/drivers/nv50/nv50_state.c7
-rw-r--r--src/gallium/drivers/nv50/nv50_texture.xml.h12
-rw-r--r--src/gallium/drivers/nvc0/codegen/nv50_ir_emit_nvc0.cpp12
-rw-r--r--src/gallium/drivers/nvc0/codegen/nv50_ir_lowering_nvc0.cpp74
-rw-r--r--src/gallium/drivers/nvc0/codegen/nv50_ir_target_nvc0.cpp49
-rw-r--r--src/gallium/drivers/nvc0/nvc0_3d.xml.h13
-rw-r--r--src/gallium/drivers/nvc0/nvc0_context.c8
-rw-r--r--src/gallium/drivers/nvc0/nvc0_context.h25
-rw-r--r--src/gallium/drivers/nvc0/nvc0_program.c61
-rw-r--r--src/gallium/drivers/nvc0/nvc0_screen.c174
-rw-r--r--src/gallium/drivers/nvc0/nvc0_screen.h8
-rw-r--r--src/gallium/drivers/nvc0/nvc0_shader_state.c3
-rw-r--r--src/gallium/drivers/nvc0/nvc0_state_validate.c38
-rw-r--r--src/gallium/drivers/nvc0/nvc0_surface.c2
-rw-r--r--src/gallium/drivers/nvc0/nvc0_tex.c165
-rw-r--r--src/gallium/drivers/nvc0/nvc0_transfer.c159
-rw-r--r--src/gallium/drivers/nvc0/nvc0_winsys.h19
-rw-r--r--src/gallium/drivers/nvc0/nve4_p2mf.xml.h107
-rw-r--r--src/gallium/winsys/nouveau/drm/nouveau_drm_winsys.c1
28 files changed, 799 insertions, 159 deletions
diff --git a/src/gallium/drivers/nouveau/nouveau_screen.h b/src/gallium/drivers/nouveau/nouveau_screen.h
index a2784773143..4ca286bfe8d 100644
--- a/src/gallium/drivers/nouveau/nouveau_screen.h
+++ b/src/gallium/drivers/nouveau/nouveau_screen.h
@@ -19,6 +19,8 @@ struct nouveau_screen {
19 19
20 unsigned sysmem_bindings; 20 unsigned sysmem_bindings;
21 21
22 uint16_t class_3d;
23
22 struct { 24 struct {
23 struct nouveau_fence *head; 25 struct nouveau_fence *head;
24 struct nouveau_fence *tail; 26 struct nouveau_fence *tail;
diff --git a/src/gallium/drivers/nouveau/nv_object.xml.h b/src/gallium/drivers/nouveau/nv_object.xml.h
index d87d7139bf3..66ba61b4622 100644
--- a/src/gallium/drivers/nouveau/nv_object.xml.h
+++ b/src/gallium/drivers/nouveau/nv_object.xml.h
@@ -188,15 +188,18 @@ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
188#define NVC0_3D_CLASS 0x00009097 188#define NVC0_3D_CLASS 0x00009097
189#define NVC1_3D_CLASS 0x00009197 189#define NVC1_3D_CLASS 0x00009197
190#define NVC8_3D_CLASS 0x00009297 190#define NVC8_3D_CLASS 0x00009297
191#define NVE4_3D_CLASS 0x0000a097
191#define NV50_2D_CLASS 0x0000502d 192#define NV50_2D_CLASS 0x0000502d
192#define NVC0_2D_CLASS 0x0000902d 193#define NVC0_2D_CLASS 0x0000902d
193#define NV50_COMPUTE_CLASS 0x000050c0 194#define NV50_COMPUTE_CLASS 0x000050c0
194#define NVA3_COMPUTE_CLASS 0x000085c0 195#define NVA3_COMPUTE_CLASS 0x000085c0
195#define NVC0_COMPUTE_CLASS 0x000090c0 196#define NVC0_COMPUTE_CLASS 0x000090c0
196#define NVC8_COMPUTE_CLASS 0x000092c0 197#define NVC8_COMPUTE_CLASS 0x000092c0
198#define NVE4_COMPUTE_CLASS 0x0000a0c0
197#define NV84_CRYPT_CLASS 0x000074c1 199#define NV84_CRYPT_CLASS 0x000074c1
198#define BLOB_NVC0_PCOPY1_CLASS 0x000090b8 200#define BLOB_NVC0_PCOPY1_CLASS 0x000090b8
199#define BLOB_NVC0_PCOPY0_CLASS 0x000090b5 201#define BLOB_NVC0_PCOPY0_CLASS 0x000090b5
202#define NVE4_P2MF_CLASS 0x0000a040
200#define NV31_MPEG_CLASS 0x00003174 203#define NV31_MPEG_CLASS 0x00003174
201#define NV84_MPEG_CLASS 0x00008274 204#define NV84_MPEG_CLASS 0x00008274
202 205
diff --git a/src/gallium/drivers/nv50/codegen/nv50_ir.h b/src/gallium/drivers/nv50/codegen/nv50_ir.h
index 6ec4fc95441..c299cab3f52 100644
--- a/src/gallium/drivers/nv50/codegen/nv50_ir.h
+++ b/src/gallium/drivers/nv50/codegen/nv50_ir.h
@@ -131,6 +131,7 @@ enum operation
131 OP_POPCNT, // bitcount(src0 & src1) 131 OP_POPCNT, // bitcount(src0 & src1)
132 OP_INSBF, // insert first src1[8:15] bits of src0 into src2 at src1[0:7] 132 OP_INSBF, // insert first src1[8:15] bits of src0 into src2 at src1[0:7]
133 OP_EXTBF, 133 OP_EXTBF,
134 OP_TEXBAR,
134 OP_LAST 135 OP_LAST
135}; 136};
136 137
@@ -141,6 +142,7 @@ enum operation
141#define NV50_IR_SUBOP_LDC_ISL 3 142#define NV50_IR_SUBOP_LDC_ISL 3
142#define NV50_IR_SUBOP_SHIFT_WRAP 1 143#define NV50_IR_SUBOP_SHIFT_WRAP 1
143#define NV50_IR_SUBOP_EMU_PRERET 1 144#define NV50_IR_SUBOP_EMU_PRERET 1
145#define NV50_IR_SUBOP_TEXBAR(n) n
144 146
145enum DataType 147enum DataType
146{ 148{
diff --git a/src/gallium/drivers/nv50/codegen/nv50_ir_driver.h b/src/gallium/drivers/nv50/codegen/nv50_ir_driver.h
index e734c5b03bd..9632986fe40 100644
--- a/src/gallium/drivers/nv50/codegen/nv50_ir_driver.h
+++ b/src/gallium/drivers/nv50/codegen/nv50_ir_driver.h
@@ -163,6 +163,8 @@ struct nv50_ir_prog_info
163 uint8_t clipDistanceMask; /* mask of clip distances defined */ 163 uint8_t clipDistanceMask; /* mask of clip distances defined */
164 uint8_t cullDistanceMask; /* clip distance mode (1 bit per output) */ 164 uint8_t cullDistanceMask; /* clip distance mode (1 bit per output) */
165 int8_t genUserClip; /* request user clip planes for ClipVertex */ 165 int8_t genUserClip; /* request user clip planes for ClipVertex */
166 uint16_t ucpBase; /* base address for UCPs */
167 uint8_t ucpBinding; /* constant buffer index of UCP data */
166 uint8_t pointSize; /* output index for PointSize */ 168 uint8_t pointSize; /* output index for PointSize */
167 uint8_t instanceId; /* system value index of InstanceID */ 169 uint8_t instanceId; /* system value index of InstanceID */
168 uint8_t vertexId; /* system value index of VertexID */ 170 uint8_t vertexId; /* system value index of VertexID */
diff --git a/src/gallium/drivers/nv50/codegen/nv50_ir_from_tgsi.cpp b/src/gallium/drivers/nv50/codegen/nv50_ir_from_tgsi.cpp
index 4530dc23715..8bd784fa47d 100644
--- a/src/gallium/drivers/nv50/codegen/nv50_ir_from_tgsi.cpp
+++ b/src/gallium/drivers/nv50/codegen/nv50_ir_from_tgsi.cpp
@@ -2250,9 +2250,9 @@ Converter::handleUserClipPlanes()
2250 2250
2251 for (c = 0; c < 4; ++c) { 2251 for (c = 0; c < 4; ++c) {
2252 for (i = 0; i < info->io.genUserClip; ++i) { 2252 for (i = 0; i < info->io.genUserClip; ++i) {
2253 Value *ucp; 2253 Symbol *sym = mkSymbol(FILE_MEMORY_CONST, info->io.ucpBinding,
2254 ucp = mkLoad(TYPE_F32, mkSymbol(FILE_MEMORY_CONST, 15, TYPE_F32, 2254 TYPE_F32, info->io.ucpBase + i * 16 + c * 4);
2255 i * 16 + c * 4), NULL); 2255 Value *ucp = mkLoad(TYPE_F32, sym, NULL);
2256 if (c == 0) 2256 if (c == 0)
2257 res[i] = mkOp2v(OP_MUL, TYPE_F32, getScratch(), clipVtx[c], ucp); 2257 res[i] = mkOp2v(OP_MUL, TYPE_F32, getScratch(), clipVtx[c], ucp);
2258 else 2258 else
diff --git a/src/gallium/drivers/nv50/codegen/nv50_ir_inlines.h b/src/gallium/drivers/nv50/codegen/nv50_ir_inlines.h
index 4ce9deb131f..93e502ea609 100644
--- a/src/gallium/drivers/nv50/codegen/nv50_ir_inlines.h
+++ b/src/gallium/drivers/nv50/codegen/nv50_ir_inlines.h
@@ -40,6 +40,7 @@ static inline bool isMemoryFile(DataFile f)
40 return (f >= FILE_MEMORY_CONST && f <= FILE_MEMORY_LOCAL); 40 return (f >= FILE_MEMORY_CONST && f <= FILE_MEMORY_LOCAL);
41} 41}
42 42
43// contrary to asTex(), this will never include SULD/SUST
43static inline bool isTextureOp(operation op) 44static inline bool isTextureOp(operation op)
44{ 45{
45 return (op >= OP_TEX && op <= OP_TEXCSAA); 46 return (op >= OP_TEX && op <= OP_TEXCSAA);
diff --git a/src/gallium/drivers/nv50/codegen/nv50_ir_print.cpp b/src/gallium/drivers/nv50/codegen/nv50_ir_print.cpp
index 45e61c5e58a..4652bb95f69 100644
--- a/src/gallium/drivers/nv50/codegen/nv50_ir_print.cpp
+++ b/src/gallium/drivers/nv50/codegen/nv50_ir_print.cpp
@@ -147,6 +147,7 @@ const char *operationStr[OP_LAST + 1] =
147 "popcnt", 147 "popcnt",
148 "insbf", 148 "insbf",
149 "extbf", 149 "extbf",
150 "texbar",
150 "(invalid)" 151 "(invalid)"
151}; 152};
152 153
diff --git a/src/gallium/drivers/nv50/codegen/nv50_ir_target.cpp b/src/gallium/drivers/nv50/codegen/nv50_ir_target.cpp
index 27b9610ed52..e3eae69554c 100644
--- a/src/gallium/drivers/nv50/codegen/nv50_ir_target.cpp
+++ b/src/gallium/drivers/nv50/codegen/nv50_ir_target.cpp
@@ -48,7 +48,7 @@ const uint8_t Target::operationSrcNr[OP_LAST + 1] =
48 1, 2, // SULD, SUST 48 1, 2, // SULD, SUST
49 1, 1, // DFDX, DFDY 49 1, 1, // DFDX, DFDY
50 1, 2, 2, 2, 0, 0, // RDSV, WRSV, PIXLD, QUADOP, QUADON, QUADPOP 50 1, 2, 2, 2, 0, 0, // RDSV, WRSV, PIXLD, QUADOP, QUADON, QUADPOP
51 2, 3, 2, // POPCNT, INSBF, EXTBF 51 2, 3, 2, 0, // POPCNT, INSBF, EXTBF, TEXBAR
52 0 52 0
53}; 53};
54 54
@@ -61,6 +61,7 @@ Target *Target::create(unsigned int chipset)
61 switch (chipset & 0xf0) { 61 switch (chipset & 0xf0) {
62 case 0xc0: 62 case 0xc0:
63 case 0xd0: 63 case 0xd0:
64 case 0xe0:
64 return getTargetNVC0(chipset); 65 return getTargetNVC0(chipset);
65 case 0x50: 66 case 0x50:
66 case 0x80: 67 case 0x80:
diff --git a/src/gallium/drivers/nv50/nv50_screen.c b/src/gallium/drivers/nv50/nv50_screen.c
index 4bcd2049099..e8118d70ca7 100644
--- a/src/gallium/drivers/nv50/nv50_screen.c
+++ b/src/gallium/drivers/nv50/nv50_screen.c
@@ -594,6 +594,7 @@ nv50_screen_create(struct nouveau_device *dev)
594 FAIL_SCREEN_INIT("Not a known NV50 chipset: NV%02x\n", dev->chipset); 594 FAIL_SCREEN_INIT("Not a known NV50 chipset: NV%02x\n", dev->chipset);
595 break; 595 break;
596 } 596 }
597 screen->base.class_3d = tesla_class;
597 598
598 ret = nouveau_object_new(chan, 0xbeef5097, tesla_class, 599 ret = nouveau_object_new(chan, 0xbeef5097, tesla_class,
599 NULL, 0, &screen->tesla); 600 NULL, 0, &screen->tesla);
diff --git a/src/gallium/drivers/nv50/nv50_state.c b/src/gallium/drivers/nv50/nv50_state.c
index bf554427ca0..5b783da7ad7 100644
--- a/src/gallium/drivers/nv50/nv50_state.c
+++ b/src/gallium/drivers/nv50/nv50_state.c
@@ -465,6 +465,13 @@ nv50_sampler_state_create(struct pipe_context *pipe,
465 (nv50_tsc_wrap_mode(cso->wrap_t) << 3) | 465 (nv50_tsc_wrap_mode(cso->wrap_t) << 3) |
466 (nv50_tsc_wrap_mode(cso->wrap_r) << 6)); 466 (nv50_tsc_wrap_mode(cso->wrap_r) << 6));
467 467
468 if (nouveau_screen(pipe->screen)->class_3d >= NVE4_3D_CLASS) {
469 if (cso->seamless_cube_map)
470 so->tsc[1] |= NVE4_TSC_1_CUBE_SEAMLESS;
471 if (!cso->normalized_coords)
472 so->tsc[1] |= NVE4_TSC_1_FORCE_NONNORMALIZED_COORDS;
473 }
474
468 switch (cso->mag_img_filter) { 475 switch (cso->mag_img_filter) {
469 case PIPE_TEX_FILTER_LINEAR: 476 case PIPE_TEX_FILTER_LINEAR:
470 so->tsc[1] |= NV50_TSC_1_MAGF_LINEAR; 477 so->tsc[1] |= NV50_TSC_1_MAGF_LINEAR;
diff --git a/src/gallium/drivers/nv50/nv50_texture.xml.h b/src/gallium/drivers/nv50/nv50_texture.xml.h
index 08f6efdd7bf..2b140be8d80 100644
--- a/src/gallium/drivers/nv50/nv50_texture.xml.h
+++ b/src/gallium/drivers/nv50/nv50_texture.xml.h
@@ -8,12 +8,12 @@ http://0x04.net/cgit/index.cgi/rules-ng-ng
8git clone git://0x04.net/rules-ng-ng 8git clone git://0x04.net/rules-ng-ng
9 9
10The rules-ng-ng source files this header was generated from are: 10The rules-ng-ng source files this header was generated from are:
11- rnndb/nv50_texture.xml ( 7947 bytes, from 2011-07-09 13:43:58) 11- rnndb/nv50_texture.xml ( 8111 bytes, from 2012-03-31 16:47:45)
12- ./rnndb/copyright.xml ( 6452 bytes, from 2011-07-09 13:43:58) 12- ./rnndb/copyright.xml ( 6452 bytes, from 2011-08-11 18:25:12)
13- ./rnndb/nvchipsets.xml ( 3617 bytes, from 2011-07-09 13:43:58) 13- ./rnndb/nvchipsets.xml ( 3701 bytes, from 2012-03-22 20:40:59)
14- ./rnndb/nv50_defs.xml ( 5468 bytes, from 2011-07-09 13:43:58) 14- ./rnndb/nv50_defs.xml ( 5468 bytes, from 2011-08-11 18:25:12)
15 15
16Copyright (C) 2006-2011 by the following authors: 16Copyright (C) 2006-2012 by the following authors:
17- Artur Huillet <arthur.huillet@free.fr> (ahuillet) 17- Artur Huillet <arthur.huillet@free.fr> (ahuillet)
18- Ben Skeggs (darktama, darktama_) 18- Ben Skeggs (darktama, darktama_)
19- B. R. <koala_br@users.sourceforge.net> (koala_br) 19- B. R. <koala_br@users.sourceforge.net> (koala_br)
@@ -265,8 +265,10 @@ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
265#define NV50_TSC_1_MIPF_NONE 0x00000040 265#define NV50_TSC_1_MIPF_NONE 0x00000040
266#define NV50_TSC_1_MIPF_NEAREST 0x00000080 266#define NV50_TSC_1_MIPF_NEAREST 0x00000080
267#define NV50_TSC_1_MIPF_LINEAR 0x000000c0 267#define NV50_TSC_1_MIPF_LINEAR 0x000000c0
268#define NVE4_TSC_1_CUBE_SEAMLESS 0x00000200
268#define NV50_TSC_1_LOD_BIAS__MASK 0x01fff000 269#define NV50_TSC_1_LOD_BIAS__MASK 0x01fff000
269#define NV50_TSC_1_LOD_BIAS__SHIFT 12 270#define NV50_TSC_1_LOD_BIAS__SHIFT 12
271#define NVE4_TSC_1_FORCE_NONNORMALIZED_COORDS 0x02000000
270 272
271#define NV50_TSC_2 0x00000008 273#define NV50_TSC_2 0x00000008
272#define NV50_TSC_2_MIN_LOD__MASK 0x00000fff 274#define NV50_TSC_2_MIN_LOD__MASK 0x00000fff
diff --git a/src/gallium/drivers/nvc0/codegen/nv50_ir_emit_nvc0.cpp b/src/gallium/drivers/nvc0/codegen/nv50_ir_emit_nvc0.cpp
index d4fd4da07e7..912540d0c40 100644
--- a/src/gallium/drivers/nvc0/codegen/nv50_ir_emit_nvc0.cpp
+++ b/src/gallium/drivers/nvc0/codegen/nv50_ir_emit_nvc0.cpp
@@ -102,6 +102,7 @@ private:
102 void emitSLCT(const CmpInstruction *); 102 void emitSLCT(const CmpInstruction *);
103 void emitSELP(const Instruction *); 103 void emitSELP(const Instruction *);
104 104
105 void emitTEXBAR(const Instruction *);
105 void emitTEX(const TexInstruction *); 106 void emitTEX(const TexInstruction *);
106 void emitTEXCSAA(const TexInstruction *); 107 void emitTEXCSAA(const TexInstruction *);
107 void emitTXQ(const TexInstruction *); 108 void emitTXQ(const TexInstruction *);
@@ -938,6 +939,14 @@ void CodeEmitterNVC0::emitSELP(const Instruction *i)
938 code[1] |= 1 << 20; 939 code[1] |= 1 << 20;
939} 940}
940 941
942void CodeEmitterNVC0::emitTEXBAR(const Instruction *i)
943{
944 code[0] = 0x00000006 | (i->subOp << 26);
945 code[1] = 0xf0000000;
946 emitPredicate(i);
947 emitCondCode(i->predSrc >= 0 ? i->cc : CC_ALWAYS, 5);
948}
949
941void CodeEmitterNVC0::emitTEXCSAA(const TexInstruction *i) 950void CodeEmitterNVC0::emitTEXCSAA(const TexInstruction *i)
942{ 951{
943 code[0] = 0x00000086; 952 code[0] = 0x00000086;
@@ -1630,6 +1639,9 @@ CodeEmitterNVC0::emitInstruction(Instruction *insn)
1630 case OP_TXQ: 1639 case OP_TXQ:
1631 emitTXQ(insn->asTex()); 1640 emitTXQ(insn->asTex());
1632 break; 1641 break;
1642 case OP_TEXBAR:
1643 emitTEXBAR(insn);
1644 break;
1633 case OP_BRA: 1645 case OP_BRA:
1634 case OP_CALL: 1646 case OP_CALL:
1635 case OP_PRERET: 1647 case OP_PRERET:
diff --git a/src/gallium/drivers/nvc0/codegen/nv50_ir_lowering_nvc0.cpp b/src/gallium/drivers/nvc0/codegen/nv50_ir_lowering_nvc0.cpp
index bd33fbfac5c..318d345efdb 100644
--- a/src/gallium/drivers/nvc0/codegen/nv50_ir_lowering_nvc0.cpp
+++ b/src/gallium/drivers/nvc0/codegen/nv50_ir_lowering_nvc0.cpp
@@ -117,6 +117,9 @@ NVC0LegalizeSSA::visit(BasicBlock *bb)
117 117
118class NVC0LegalizePostRA : public Pass 118class NVC0LegalizePostRA : public Pass
119{ 119{
120public:
121 NVC0LegalizePostRA(const Program *);
122
120private: 123private:
121 virtual bool visit(Function *); 124 virtual bool visit(Function *);
122 virtual bool visit(BasicBlock *); 125 virtual bool visit(BasicBlock *);
@@ -127,8 +130,15 @@ private:
127 void propagateJoin(BasicBlock *); 130 void propagateJoin(BasicBlock *);
128 131
129 LValue *r63; 132 LValue *r63;
133
134 const bool needTexBar;
130}; 135};
131 136
137NVC0LegalizePostRA::NVC0LegalizePostRA(const Program *prog)
138 : needTexBar(prog->getTarget()->getChipset() >= 0xe0)
139{
140}
141
132bool 142bool
133NVC0LegalizePostRA::visit(Function *fn) 143NVC0LegalizePostRA::visit(Function *fn)
134{ 144{
@@ -225,6 +235,12 @@ NVC0LegalizePostRA::visit(BasicBlock *bb)
225 } else 235 } else
226 if (i->isNop()) { 236 if (i->isNop()) {
227 bb->remove(i); 237 bb->remove(i);
238 } else
239 if (needTexBar && isTextureOp(i->op)) {
240 Instruction *bar = new_Instruction(func, OP_TEXBAR, TYPE_NONE);
241 bar->fixed = 1;
242 bar->subOp = 0;
243 bb->insertAfter(i, bar);
228 } else { 244 } else {
229 if (i->op != OP_MOV && i->op != OP_PFETCH) 245 if (i->op != OP_MOV && i->op != OP_PFETCH)
230 replaceZero(i); 246 replaceZero(i);
@@ -310,7 +326,61 @@ NVC0LoweringPass::handleTEX(TexInstruction *i)
310 const int dim = i->tex.target.getDim() + i->tex.target.isCube(); 326 const int dim = i->tex.target.getDim() + i->tex.target.isCube();
311 const int arg = i->tex.target.getArgCount(); 327 const int arg = i->tex.target.getArgCount();
312 328
313 // generate and move the tsc/tic/array source to the front 329 if (prog->getTarget()->getChipset() >= 0xe0) {
330 if (i->tex.r == i->tex.s) {
331 i->tex.r += 8; // NOTE: offset should probably be a driver option
332 i->tex.s = 0; // only a single cX[] value possible here
333 } else {
334 // TODO: extract handles and use register to select TIC/TSC entries
335 }
336 if (i->tex.target.isArray()) {
337 LValue *layer = new_LValue(func, FILE_GPR);
338 Value *src = i->getSrc(arg - 1);
339 const int sat = (i->op == OP_TXF) ? 1 : 0;
340 DataType sTy = (i->op == OP_TXF) ? TYPE_U32 : TYPE_F32;
341 bld.mkCvt(OP_CVT, TYPE_U16, layer, sTy, src)->saturate = sat;
342 for (int s = dim; s >= 1; --s)
343 i->setSrc(s, i->getSrc(s - 1));
344 i->setSrc(0, layer);
345 }
346 if (i->tex.rIndirectSrc >= 0 || i->tex.sIndirectSrc >= 0) {
347 Value *tmp[2];
348 Symbol *bind;
349 Value *rRel = i->getIndirectR();
350 Value *sRel = i->getIndirectS();
351 Value *shCnt = bld.loadImm(NULL, 2);
352
353 if (rRel) {
354 tmp[0] = bld.getScratch();
355 bind = bld.mkSymbol(FILE_MEMORY_CONST, 15, TYPE_U32, i->tex.r * 4);
356 bld.mkOp2(OP_SHL, TYPE_U32, tmp[0], rRel, shCnt);
357 tmp[1] = bld.mkLoad(TYPE_U32, bind, tmp[0]);
358 bld.mkOp2(OP_AND, TYPE_U32, tmp[0], tmp[1],
359 bld.loadImm(tmp[0], 0x00ffffffu));
360 rRel = tmp[0];
361 i->setSrc(i->tex.rIndirectSrc, NULL);
362 }
363 if (sRel) {
364 tmp[0] = bld.getScratch();
365 bind = bld.mkSymbol(FILE_MEMORY_CONST, 15, TYPE_U32, i->tex.s * 4);
366 bld.mkOp2(OP_SHL, TYPE_U32, tmp[0], sRel, shCnt);
367 tmp[1] = bld.mkLoad(TYPE_U32, bind, tmp[0]);
368 bld.mkOp2(OP_AND, TYPE_U32, tmp[0], tmp[1],
369 bld.loadImm(tmp[0], 0xff000000u));
370 sRel = tmp[0];
371 i->setSrc(i->tex.sIndirectSrc, NULL);
372 }
373 bld.mkOp2(OP_OR, TYPE_U32, rRel, rRel, sRel);
374
375 int min = i->tex.rIndirectSrc;
376 if (min < 0 || min > i->tex.sIndirectSrc)
377 min = i->tex.sIndirectSrc;
378 for (int s = min; s >= 1; --s)
379 i->setSrc(s, i->getSrc(s - 1));
380 i->setSrc(0, rRel);
381 }
382 } else
383 // (nvc0) generate and move the tsc/tic/array source to the front
314 if (dim != arg || i->tex.rIndirectSrc >= 0 || i->tex.sIndirectSrc >= 0) { 384 if (dim != arg || i->tex.rIndirectSrc >= 0 || i->tex.sIndirectSrc >= 0) {
315 LValue *src = new_LValue(func, FILE_GPR); // 0xttxsaaaa 385 LValue *src = new_LValue(func, FILE_GPR); // 0xttxsaaaa
316 386
@@ -717,7 +787,7 @@ TargetNVC0::runLegalizePass(Program *prog, CGStage stage) const
717 return pass.run(prog, false, true); 787 return pass.run(prog, false, true);
718 } else 788 } else
719 if (stage == CG_STAGE_POST_RA) { 789 if (stage == CG_STAGE_POST_RA) {
720 NVC0LegalizePostRA pass; 790 NVC0LegalizePostRA pass(prog);
721 return pass.run(prog, false, true); 791 return pass.run(prog, false, true);
722 } else 792 } else
723 if (stage == CG_STAGE_SSA) { 793 if (stage == CG_STAGE_SSA) {
diff --git a/src/gallium/drivers/nvc0/codegen/nv50_ir_target_nvc0.cpp b/src/gallium/drivers/nvc0/codegen/nv50_ir_target_nvc0.cpp
index 04425623bdb..2aa20053c14 100644
--- a/src/gallium/drivers/nvc0/codegen/nv50_ir_target_nvc0.cpp
+++ b/src/gallium/drivers/nvc0/codegen/nv50_ir_target_nvc0.cpp
@@ -42,6 +42,7 @@ TargetNVC0::TargetNVC0(unsigned int card)
42// Will probably make this nicer once we support subroutines properly, 42// Will probably make this nicer once we support subroutines properly,
43// i.e. when we have an input IR that provides function declarations. 43// i.e. when we have an input IR that provides function declarations.
44 44
45// TODO: separate version for nve4+ which doesn't like the 4-byte insn formats
45static const uint32_t nvc0_builtin_code[] = 46static const uint32_t nvc0_builtin_code[] =
46{ 47{
47// DIV U32: slow unsigned integer division 48// DIV U32: slow unsigned integer division
@@ -57,11 +58,11 @@ static const uint32_t nvc0_builtin_code[] =
57// 58//
58#if 1 59#if 1
59 0x04009c03, 0x78000000, 60 0x04009c03, 0x78000000,
60 0x7c209cdd, 61 0x7c209c82, 0x38000000, // 0x7c209cdd,
61 0x0010dd18, 62 0x0400dde2, 0x18000000, // 0x0010dd18,
62 0x08309c03, 0x60000000, 63 0x08309c03, 0x60000000,
63 0x05605c18, 64 0x05205d04, 0x1c000000, // 0x05605c18,
64 0x0810dc2a, 65 0x0810dc03, 0x50000000, // 0x0810dc2a,
65 0x0c209c43, 0x20040000, 66 0x0c209c43, 0x20040000,
66 0x0810dc03, 0x50000000, 67 0x0810dc03, 0x50000000,
67 0x0c209c43, 0x20040000, 68 0x0c209c43, 0x20040000,
@@ -73,15 +74,15 @@ static const uint32_t nvc0_builtin_code[] =
73 0x0c209c43, 0x20040000, 74 0x0c209c43, 0x20040000,
74 0x0000dde4, 0x28000000, 75 0x0000dde4, 0x28000000,
75 0x08001c43, 0x50000000, 76 0x08001c43, 0x50000000,
76 0x05609c18, 77 0x05209d04, 0x1c000000, // 0x05609c18,
77 0x0010430d, 78 0x00105c03, 0x20060000, // 0x0010430d,
78 0x0811dc03, 0x1b0e0000, 79 0x0811dc03, 0x1b0e0000,
79 0x08104103, 0x48000000, 80 0x08104103, 0x48000000,
80 0x04000002, 0x08000000, 81 0x04000002, 0x08000000,
81 0x0811c003, 0x1b0e0000, 82 0x0811c003, 0x1b0e0000,
82 0x08104103, 0x48000000, 83 0x08104103, 0x48000000,
83 0x040000ac, 84 0x04000002, 0x08000000, // 0x040000ac,
84 0x90001dff, 85 0x00001de7, 0x90000000, // 0x90001dff,
85#else 86#else
86 0x0401dc03, 0x1b0e0000, 87 0x0401dc03, 0x1b0e0000,
87 0x00008003, 0x78000000, 88 0x00008003, 0x78000000,
@@ -111,27 +112,27 @@ static const uint32_t nvc0_builtin_code[] =
111// 112//
112 0xfc05dc23, 0x188e0000, 113 0xfc05dc23, 0x188e0000,
113 0xfc17dc23, 0x18c40000, 114 0xfc17dc23, 0x18c40000,
114 0x03301e18, 115 0x01201ec4, 0x1c000000, // 0x03301e18,
115 0x07305e18, 116 0x05205ec4, 0x1c000000, // 0x07305e18,
116 0x0401dc03, 0x1b0e0000, 117 0x0401dc03, 0x1b0e0000,
117 0x00008003, 0x78000000, 118 0x00008003, 0x78000000,
118 0x0400c003, 0x78000000, 119 0x0400c003, 0x78000000,
119 0x0c20c103, 0x48000000, 120 0x0c20c103, 0x48000000,
120 0x0c108003, 0x60000000, 121 0x0c108003, 0x60000000,
121 0x00005c28, 122 0x00005de4, 0x28000000, // 0x00005c28,
122 0x00001d18, 123 0x00001de2, 0x18000000, // 0x00001d18,
123 0x0031c023, 0x1b0ec000, 124 0x0031c023, 0x1b0ec000,
124 0xb000a1e7, 0x40000000, 125 0xe000a1e7, 0x40000000, // 0xb000a1e7, 0x40000000,
125 0x04000003, 0x6000c000, 126 0x04000003, 0x6000c000,
126 0x0813dc03, 0x1b000000, 127 0x0813dc03, 0x1b000000,
127 0x0420446c, 128 0x04204603, 0x48000000, // 0x0420446c,
128 0x040004bd, 129 0x04000442, 0x38000000, // 0x040004bd,
129 0x04208003, 0x5800c000, 130 0x04208003, 0x5800c000,
130 0x0430c103, 0x4800c000, 131 0x0430c103, 0x4800c000,
131 0x0ffc5dff, 132 0xe0001de7, 0x4003fffe, // 0x0ffc5dff,
132 0x01700e18, 133 0x01200f84, 0x1c000000, // 0x01700e18,
133 0x05704a18, 134 0x05204b84, 0x1c000000, // 0x05704a18,
134 0x90001dff, 135 0x00001de7, 0x90000000, // 0x90001dff,
135 136
136// RCP F64: Newton Raphson reciprocal(x): r_{i+1} = r_i * (2.0 - x * r_i) 137// RCP F64: Newton Raphson reciprocal(x): r_{i+1} = r_i * (2.0 - x * r_i)
137// 138//
@@ -180,9 +181,9 @@ static const uint32_t nvc0_builtin_code[] =
180static const uint16_t nvc0_builtin_offsets[NVC0_BUILTIN_COUNT] = 181static const uint16_t nvc0_builtin_offsets[NVC0_BUILTIN_COUNT] =
181{ 182{
182 0, 183 0,
183 8 * (22), 184 8 * (26),
184 8 * (22 + 18), 185 8 * (26 + 23),
185 8 * (22 + 18 + 9) 186 8 * (26 + 23 + 9)
186}; 187};
187 188
188void 189void
@@ -270,7 +271,7 @@ void TargetNVC0::initOpInfo()
270 OP_STORE, OP_WRSV, OP_EXPORT, OP_BRA, OP_CALL, OP_RET, OP_EXIT, 271 OP_STORE, OP_WRSV, OP_EXPORT, OP_BRA, OP_CALL, OP_RET, OP_EXIT,
271 OP_DISCARD, OP_CONT, OP_BREAK, OP_PRECONT, OP_PREBREAK, OP_PRERET, 272 OP_DISCARD, OP_CONT, OP_BREAK, OP_PRECONT, OP_PREBREAK, OP_PRERET,
272 OP_JOIN, OP_JOINAT, OP_BRKPT, OP_MEMBAR, OP_EMIT, OP_RESTART, 273 OP_JOIN, OP_JOINAT, OP_BRKPT, OP_MEMBAR, OP_EMIT, OP_RESTART,
273 OP_QUADON, OP_QUADPOP 274 OP_QUADON, OP_QUADPOP, OP_TEXBAR
274 }; 275 };
275 276
276 joinAnterior = false; 277 joinAnterior = false;
@@ -445,6 +446,8 @@ TargetNVC0::isAccessSupported(DataFile file, DataType ty) const
445{ 446{
446 if (ty == TYPE_NONE) 447 if (ty == TYPE_NONE)
447 return false; 448 return false;
449 if (file == FILE_MEMORY_CONST && getChipset() >= 0xe0) // wrong encoding ?
450 return typeSizeof(ty) <= 4;
448 if (ty == TYPE_B96) 451 if (ty == TYPE_B96)
449 return (file == FILE_SHADER_INPUT) || (file == FILE_SHADER_OUTPUT); 452 return (file == FILE_SHADER_INPUT) || (file == FILE_SHADER_OUTPUT);
450 return true; 453 return true;
diff --git a/src/gallium/drivers/nvc0/nvc0_3d.xml.h b/src/gallium/drivers/nvc0/nvc0_3d.xml.h
index 71fa1516e16..1cf1f96569f 100644
--- a/src/gallium/drivers/nvc0/nvc0_3d.xml.h
+++ b/src/gallium/drivers/nvc0/nvc0_3d.xml.h
@@ -94,6 +94,11 @@ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
94#define NVC0_3D_MEM_BARRIER_UNK8 0x00000100 94#define NVC0_3D_MEM_BARRIER_UNK8 0x00000100
95#define NVC0_3D_MEM_BARRIER_UNK12 0x00001000 95#define NVC0_3D_MEM_BARRIER_UNK12 0x00001000
96 96
97#define NVC0_3D_CACHE_SPLIT 0x00000308
98#define NVC1_3D_CACHE_SPLIT_16K_SHARED_48K_L1 0x00000001
99#define NVE4_3D_CACHE_SPLIT_32K_SHARED_32K_L1 0x00000002
100#define NVC0_3D_CACHE_SPLIT_48K_SHARED_16K_L1 0x00000003
101
97#define NVC0_3D_TESS_MODE 0x00000320 102#define NVC0_3D_TESS_MODE 0x00000320
98#define NVC0_3D_TESS_MODE_PRIM__MASK 0x0000000f 103#define NVC0_3D_TESS_MODE_PRIM__MASK 0x0000000f
99#define NVC0_3D_TESS_MODE_PRIM__SHIFT 0 104#define NVC0_3D_TESS_MODE_PRIM__SHIFT 0
@@ -289,6 +294,8 @@ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
289#define NVC0_3D_CLIPID_REGION_VERT_H__MASK 0xffff0000 294#define NVC0_3D_CLIPID_REGION_VERT_H__MASK 0xffff0000
290#define NVC0_3D_CLIPID_REGION_VERT_H__SHIFT 16 295#define NVC0_3D_CLIPID_REGION_VERT_H__SHIFT 16
291 296
297#define NVC0_3D_CALL_LIMIT_LOG 0x00000d64
298
292#define NVC0_3D_COUNTER_ENABLE 0x00000d68 299#define NVC0_3D_COUNTER_ENABLE 0x00000d68
293#define NVC0_3D_COUNTER_ENABLE_UNK00 0x00000001 300#define NVC0_3D_COUNTER_ENABLE_UNK00 0x00000001
294#define NVC0_3D_COUNTER_ENABLE_UNK01 0x00000002 301#define NVC0_3D_COUNTER_ENABLE_UNK01 0x00000002
@@ -727,6 +734,8 @@ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
727 734
728#define NVC0_3D_POINT_SIZE 0x00001518 735#define NVC0_3D_POINT_SIZE 0x00001518
729 736
737#define NVC0_3D_ZCULL_STATCTRS_ENABLE 0x0000151c
738
730#define NVC0_3D_POINT_SPRITE_ENABLE 0x00001520 739#define NVC0_3D_POINT_SPRITE_ENABLE 0x00001520
731 740
732#define NVC0_3D_COUNTER_RESET 0x00001530 741#define NVC0_3D_COUNTER_RESET 0x00001530
@@ -1303,6 +1312,10 @@ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
1303 1312
1304#define NVC0_3D_VERT_COLOR_CLAMP_EN 0x00002600 1313#define NVC0_3D_VERT_COLOR_CLAMP_EN 0x00002600
1305 1314
1315#define NVE4_3D_TEX_CB_INDEX 0x00002608
1316#define NVE4_3D_TEX_CB_INDEX__MIN 0x00000000
1317#define NVE4_3D_TEX_CB_INDEX__MAX 0x00000010
1318
1306#define NVC0_3D_TFB_VARYING_LOCS(i0, i1) (0x00002800 + 0x80*(i0) + 0x4*(i1)) 1319#define NVC0_3D_TFB_VARYING_LOCS(i0, i1) (0x00002800 + 0x80*(i0) + 0x4*(i1))
1307#define NVC0_3D_TFB_VARYING_LOCS__ESIZE 0x00000004 1320#define NVC0_3D_TFB_VARYING_LOCS__ESIZE 0x00000004
1308#define NVC0_3D_TFB_VARYING_LOCS__LEN 0x00000020 1321#define NVC0_3D_TFB_VARYING_LOCS__LEN 0x00000020
diff --git a/src/gallium/drivers/nvc0/nvc0_context.c b/src/gallium/drivers/nvc0/nvc0_context.c
index 461ceb14c45..8abac09ffd5 100644
--- a/src/gallium/drivers/nvc0/nvc0_context.c
+++ b/src/gallium/drivers/nvc0/nvc0_context.c
@@ -133,10 +133,7 @@ nvc0_create(struct pipe_screen *pscreen, void *priv)
133 goto out_err; 133 goto out_err;
134 134
135 nvc0->screen = screen; 135 nvc0->screen = screen;
136 nvc0->base.screen = &screen->base; 136 nvc0->base.screen = &screen->base;
137 nvc0->base.copy_data = nvc0_m2mf_copy_linear;
138 nvc0->base.push_data = nvc0_m2mf_push_linear;
139 nvc0->base.push_cb = nvc0_cb_push;
140 137
141 pipe->screen = pscreen; 138 pipe->screen = pscreen;
142 pipe->priv = priv; 139 pipe->priv = priv;
@@ -158,6 +155,7 @@ nvc0_create(struct pipe_screen *pscreen, void *priv)
158 nvc0_init_query_functions(nvc0); 155 nvc0_init_query_functions(nvc0);
159 nvc0_init_surface_functions(nvc0); 156 nvc0_init_surface_functions(nvc0);
160 nvc0_init_state_functions(nvc0); 157 nvc0_init_state_functions(nvc0);
158 nvc0_init_transfer_functions(nvc0);
161 nvc0_init_resource_functions(pipe); 159 nvc0_init_resource_functions(pipe);
162 160
163 nvc0->draw = draw_create(pipe); 161 nvc0->draw = draw_create(pipe);
@@ -174,7 +172,7 @@ nvc0_create(struct pipe_screen *pscreen, void *priv)
174 flags = NOUVEAU_BO_VRAM | NOUVEAU_BO_RD; 172 flags = NOUVEAU_BO_VRAM | NOUVEAU_BO_RD;
175 173
176 BCTX_REFN_bo(nvc0->bufctx_3d, SCREEN, flags, screen->text); 174 BCTX_REFN_bo(nvc0->bufctx_3d, SCREEN, flags, screen->text);
177 BCTX_REFN_bo(nvc0->bufctx_3d, SCREEN, flags, screen->uniforms); 175 BCTX_REFN_bo(nvc0->bufctx_3d, SCREEN, flags, screen->uniform_bo);
178 BCTX_REFN_bo(nvc0->bufctx_3d, SCREEN, flags, screen->txc); 176 BCTX_REFN_bo(nvc0->bufctx_3d, SCREEN, flags, screen->txc);
179 177
180 flags = NOUVEAU_BO_GART | NOUVEAU_BO_WR; 178 flags = NOUVEAU_BO_GART | NOUVEAU_BO_WR;
diff --git a/src/gallium/drivers/nvc0/nvc0_context.h b/src/gallium/drivers/nvc0/nvc0_context.h
index 7072b5918fa..140ce1ac7ef 100644
--- a/src/gallium/drivers/nvc0/nvc0_context.h
+++ b/src/gallium/drivers/nvc0/nvc0_context.h
@@ -27,7 +27,9 @@
27#include "nvc0_3d.xml.h" 27#include "nvc0_3d.xml.h"
28#include "nvc0_2d.xml.h" 28#include "nvc0_2d.xml.h"
29#include "nvc0_m2mf.xml.h" 29#include "nvc0_m2mf.xml.h"
30#include "nve4_p2mf.xml.h"
30 31
32/* NOTE: must keep NVC0_NEW_...PROG in consecutive bits in this order */
31#define NVC0_NEW_BLEND (1 << 0) 33#define NVC0_NEW_BLEND (1 << 0)
32#define NVC0_NEW_RASTERIZER (1 << 1) 34#define NVC0_NEW_RASTERIZER (1 << 1)
33#define NVC0_NEW_ZSA (1 << 2) 35#define NVC0_NEW_ZSA (1 << 2)
@@ -75,6 +77,11 @@ struct nvc0_context {
75 77
76 struct nvc0_screen *screen; 78 struct nvc0_screen *screen;
77 79
80 void (*m2mf_copy_rect)(struct nvc0_context *,
81 const struct nv50_m2mf_rect *dst,
82 const struct nv50_m2mf_rect *src,
83 uint32_t nblocksx, uint32_t nblocksy);
84
78 uint32_t dirty; 85 uint32_t dirty;
79 86
80 struct { 87 struct {
@@ -130,6 +137,8 @@ struct nvc0_context {
130 unsigned num_samplers[5]; 137 unsigned num_samplers[5];
131 uint16_t samplers_dirty[5]; 138 uint16_t samplers_dirty[5];
132 139
140 uint32_t tex_handles[5][PIPE_MAX_SAMPLERS]; /* for nve4 */
141
133 struct pipe_framebuffer_state framebuffer; 142 struct pipe_framebuffer_state framebuffer;
134 struct pipe_blend_color blend_colour; 143 struct pipe_blend_color blend_colour;
135 struct pipe_stencil_ref stencil_ref; 144 struct pipe_stencil_ref stencil_ref;
@@ -165,7 +174,7 @@ void nvc0_default_kick_notify(struct nouveau_pushbuf *);
165extern struct draw_stage *nvc0_draw_render_stage(struct nvc0_context *); 174extern struct draw_stage *nvc0_draw_render_stage(struct nvc0_context *);
166 175
167/* nvc0_program.c */ 176/* nvc0_program.c */
168boolean nvc0_program_translate(struct nvc0_program *); 177boolean nvc0_program_translate(struct nvc0_program *, uint16_t chipset);
169boolean nvc0_program_upload_code(struct nvc0_context *, struct nvc0_program *); 178boolean nvc0_program_upload_code(struct nvc0_context *, struct nvc0_program *);
170void nvc0_program_destroy(struct nvc0_context *, struct nvc0_program *); 179void nvc0_program_destroy(struct nvc0_context *, struct nvc0_program *);
171void nvc0_program_library_upload(struct nvc0_context *); 180void nvc0_program_library_upload(struct nvc0_context *);
@@ -206,6 +215,7 @@ extern void nvc0_init_surface_functions(struct nvc0_context *);
206/* nvc0_tex.c */ 215/* nvc0_tex.c */
207void nvc0_validate_textures(struct nvc0_context *); 216void nvc0_validate_textures(struct nvc0_context *);
208void nvc0_validate_samplers(struct nvc0_context *); 217void nvc0_validate_samplers(struct nvc0_context *);
218void nve4_set_tex_handles(struct nvc0_context *);
209 219
210struct pipe_sampler_view * 220struct pipe_sampler_view *
211nvc0_create_sampler_view(struct pipe_context *, 221nvc0_create_sampler_view(struct pipe_context *,
@@ -214,19 +224,16 @@ nvc0_create_sampler_view(struct pipe_context *,
214 224
215/* nvc0_transfer.c */ 225/* nvc0_transfer.c */
216void 226void
217nvc0_m2mf_transfer_rect(struct nvc0_context *, 227nvc0_init_transfer_functions(struct nvc0_context *);
218 const struct nv50_m2mf_rect *dst, 228
219 const struct nv50_m2mf_rect *src,
220 uint32_t nblocksx, uint32_t nblocksy);
221void 229void
222nvc0_m2mf_push_linear(struct nouveau_context *nv, 230nvc0_m2mf_push_linear(struct nouveau_context *nv,
223 struct nouveau_bo *dst, unsigned offset, unsigned domain, 231 struct nouveau_bo *dst, unsigned offset, unsigned domain,
224 unsigned size, const void *data); 232 unsigned size, const void *data);
225void 233void
226nvc0_m2mf_copy_linear(struct nouveau_context *nv, 234nve4_p2mf_push_linear(struct nouveau_context *nv,
227 struct nouveau_bo *dst, unsigned dstoff, unsigned dstdom, 235 struct nouveau_bo *dst, unsigned offset, unsigned domain,
228 struct nouveau_bo *src, unsigned srcoff, unsigned srcdom, 236 unsigned size, const void *data);
229 unsigned size);
230void 237void
231nvc0_cb_push(struct nouveau_context *, 238nvc0_cb_push(struct nouveau_context *,
232 struct nouveau_bo *bo, unsigned domain, 239 struct nouveau_bo *bo, unsigned domain,
diff --git a/src/gallium/drivers/nvc0/nvc0_program.c b/src/gallium/drivers/nvc0/nvc0_program.c
index 50a853abed9..f228d07bf6b 100644
--- a/src/gallium/drivers/nvc0/nvc0_program.c
+++ b/src/gallium/drivers/nvc0/nvc0_program.c
@@ -152,7 +152,7 @@ nvc0_sp_assign_input_slots(struct nv50_ir_prog_info *info)
152static int 152static int
153nvc0_fp_assign_output_slots(struct nv50_ir_prog_info *info) 153nvc0_fp_assign_output_slots(struct nv50_ir_prog_info *info)
154{ 154{
155 unsigned last = info->prop.fp.numColourResults * 4; 155 unsigned count = info->prop.fp.numColourResults * 4;
156 unsigned i, c; 156 unsigned i, c;
157 157
158 for (i = 0; i < info->numOutputs; ++i) 158 for (i = 0; i < info->numOutputs; ++i)
@@ -161,10 +161,13 @@ nvc0_fp_assign_output_slots(struct nv50_ir_prog_info *info)
161 info->out[i].slot[c] = info->out[i].si * 4 + c; 161 info->out[i].slot[c] = info->out[i].si * 4 + c;
162 162
163 if (info->io.sampleMask < PIPE_MAX_SHADER_OUTPUTS) 163 if (info->io.sampleMask < PIPE_MAX_SHADER_OUTPUTS)
164 info->out[info->io.sampleMask].slot[0] = last++; 164 info->out[info->io.sampleMask].slot[0] = count++;
165 else
166 if (info->target >= 0xe0)
167 count++; /* on Kepler, depth is always last colour reg + 2 */
165 168
166 if (info->io.fragDepth < PIPE_MAX_SHADER_OUTPUTS) 169 if (info->io.fragDepth < PIPE_MAX_SHADER_OUTPUTS)
167 info->out[info->io.fragDepth].slot[2] = last; 170 info->out[info->io.fragDepth].slot[2] = count;
168 171
169 return 0; 172 return 0;
170} 173}
@@ -278,7 +281,7 @@ nvc0_vtgp_gen_header(struct nvc0_program *vp, struct nv50_ir_prog_info *info)
278 vp->vp.clip_mode |= 1 << (i * 4); 281 vp->vp.clip_mode |= 1 << (i * 4);
279 282
280 if (info->io.genUserClip < 0) 283 if (info->io.genUserClip < 0)
281 vp->vp.num_ucps = PIPE_MAX_CLIP_PLANES; /* prevent rebuilding */ 284 vp->vp.num_ucps = PIPE_MAX_CLIP_PLANES + 1; /* prevent rebuilding */
282 285
283 return 0; 286 return 0;
284} 287}
@@ -434,6 +437,7 @@ nvc0_fp_gen_header(struct nvc0_program *fp, struct nv50_ir_prog_info *info)
434{ 437{
435 unsigned i, c, a, m; 438 unsigned i, c, a, m;
436 439
440 /* just 00062 on Kepler */
437 fp->hdr[0] = 0x20062 | (5 << 10); 441 fp->hdr[0] = 0x20062 | (5 << 10);
438 fp->hdr[5] = 0x80000000; /* getting a trap if FRAG_COORD_UMASK.w = 0 */ 442 fp->hdr[5] = 0x80000000; /* getting a trap if FRAG_COORD_UMASK.w = 0 */
439 443
@@ -538,7 +542,7 @@ nvc0_program_dump(struct nvc0_program *prog)
538#endif 542#endif
539 543
540boolean 544boolean
541nvc0_program_translate(struct nvc0_program *prog) 545nvc0_program_translate(struct nvc0_program *prog, uint16_t chipset)
542{ 546{
543 struct nv50_ir_prog_info *info; 547 struct nv50_ir_prog_info *info;
544 int ret; 548 int ret;
@@ -548,11 +552,13 @@ nvc0_program_translate(struct nvc0_program *prog)
548 return FALSE; 552 return FALSE;
549 553
550 info->type = prog->type; 554 info->type = prog->type;
551 info->target = 0xc0; 555 info->target = chipset;
552 info->bin.sourceRep = NV50_PROGRAM_IR_TGSI; 556 info->bin.sourceRep = NV50_PROGRAM_IR_TGSI;
553 info->bin.source = (void *)prog->pipe.tokens; 557 info->bin.source = (void *)prog->pipe.tokens;
554 558
555 info->io.genUserClip = prog->vp.num_ucps; 559 info->io.genUserClip = prog->vp.num_ucps;
560 info->io.ucpBase = 256;
561 info->io.ucpBinding = 15;
556 562
557 info->assignSlots = nvc0_program_assign_varying_slots; 563 info->assignSlots = nvc0_program_assign_varying_slots;
558 564
@@ -655,7 +661,13 @@ nvc0_program_upload_code(struct nvc0_context *nvc0, struct nvc0_program *prog)
655 size = align(size, 0x40); 661 size = align(size, 0x40);
656 size += prog->immd_size + 0xc0; /* add 0xc0 for align 0x40 -> 0x100 */ 662 size += prog->immd_size + 0xc0; /* add 0xc0 for align 0x40 -> 0x100 */
657 } 663 }
658 size = align(size, 0x40); /* required by SP_START_ID */ 664 /* On Fermi, SP_START_ID must be aligned to 0x40.
665 * On Kepler, the first instruction must be aligned to 0x80 because
666 * latency information is expected only at certain positions.
667 */
668 if (screen->base.class_3d >= NVE4_3D_CLASS)
669 size = size + 0x70;
670 size = align(size, 0x40);
659 671
660 ret = nouveau_heap_alloc(screen->text_heap, size, prog, &prog->mem); 672 ret = nouveau_heap_alloc(screen->text_heap, size, prog, &prog->mem);
661 if (ret) { 673 if (ret) {
@@ -667,6 +679,17 @@ nvc0_program_upload_code(struct nvc0_context *nvc0, struct nvc0_program *prog)
667 assert((prog->immd_size == 0) || (prog->immd_base + prog->immd_size <= 679 assert((prog->immd_size == 0) || (prog->immd_base + prog->immd_size <=
668 prog->mem->start + prog->mem->size)); 680 prog->mem->start + prog->mem->size));
669 681
682 if (screen->base.class_3d >= NVE4_3D_CLASS) {
683 switch (prog->mem->start & 0xff) {
684 case 0x40: prog->code_base += 0x70; break;
685 case 0x80: prog->code_base += 0x30; break;
686 case 0xc0: prog->code_base += 0x70; break;
687 default:
688 prog->code_base += 0x30;
689 assert((prog->mem->start & 0xff) == 0x00);
690 break;
691 }
692 }
670 code_pos = prog->code_base + NVC0_SHADER_HEADER_SIZE; 693 code_pos = prog->code_base + NVC0_SHADER_HEADER_SIZE;
671 694
672 if (prog->relocs) 695 if (prog->relocs)
@@ -677,18 +700,18 @@ nvc0_program_upload_code(struct nvc0_context *nvc0, struct nvc0_program *prog)
677 nvc0_program_dump(prog); 700 nvc0_program_dump(prog);
678#endif 701#endif
679 702
680 nvc0_m2mf_push_linear(&nvc0->base, screen->text, prog->code_base, 703 nvc0->base.push_data(&nvc0->base, screen->text, prog->code_base,
681 NOUVEAU_BO_VRAM, NVC0_SHADER_HEADER_SIZE, prog->hdr); 704 NOUVEAU_BO_VRAM, NVC0_SHADER_HEADER_SIZE, prog->hdr);
682 nvc0_m2mf_push_linear(&nvc0->base, screen->text, 705 nvc0->base.push_data(&nvc0->base, screen->text,
683 prog->code_base + NVC0_SHADER_HEADER_SIZE, 706 prog->code_base + NVC0_SHADER_HEADER_SIZE,
684 NOUVEAU_BO_VRAM, prog->code_size, prog->code); 707 NOUVEAU_BO_VRAM, prog->code_size, prog->code);
685 if (prog->immd_size) 708 if (prog->immd_size)
686 nvc0_m2mf_push_linear(&nvc0->base, 709 nvc0->base.push_data(&nvc0->base,
687 screen->text, prog->immd_base, NOUVEAU_BO_VRAM, 710 screen->text, prog->immd_base, NOUVEAU_BO_VRAM,
688 prog->immd_size, prog->immd_data); 711 prog->immd_size, prog->immd_data);
689 712
690 BEGIN_NVC0(nvc0->base.pushbuf, NVC0_3D(MEM_BARRIER), 1); 713 BEGIN_NVC0(nvc0->base.pushbuf, NVC0_3D(MEM_BARRIER), 1);
691 PUSH_DATA (nvc0->base.pushbuf, 0x1111); 714 PUSH_DATA (nvc0->base.pushbuf, 0x1011);
692 715
693 return TRUE; 716 return TRUE;
694} 717}
@@ -714,9 +737,9 @@ nvc0_program_library_upload(struct nvc0_context *nvc0)
714 if (ret) 737 if (ret)
715 return; 738 return;
716 739
717 nvc0_m2mf_push_linear(&nvc0->base, 740 nvc0->base.push_data(&nvc0->base,
718 screen->text, screen->lib_code->start, NOUVEAU_BO_VRAM, 741 screen->text, screen->lib_code->start, NOUVEAU_BO_VRAM,
719 size, code); 742 size, code);
720 /* no need for a memory barrier, will be emitted with first program */ 743 /* no need for a memory barrier, will be emitted with first program */
721} 744}
722 745
diff --git a/src/gallium/drivers/nvc0/nvc0_screen.c b/src/gallium/drivers/nvc0/nvc0_screen.c
index bad06c3f009..eb8a9c5a0e0 100644
--- a/src/gallium/drivers/nvc0/nvc0_screen.c
+++ b/src/gallium/drivers/nvc0/nvc0_screen.c
@@ -30,7 +30,6 @@
30#include "nvc0_context.h" 30#include "nvc0_context.h"
31#include "nvc0_screen.h" 31#include "nvc0_screen.h"
32 32
33#include "nouveau/nv_object.xml.h"
34#include "nvc0_graph_macros.h" 33#include "nvc0_graph_macros.h"
35 34
36static boolean 35static boolean
@@ -67,6 +66,8 @@ nvc0_screen_is_format_supported(struct pipe_screen *pscreen,
67static int 66static int
68nvc0_screen_get_param(struct pipe_screen *pscreen, enum pipe_cap param) 67nvc0_screen_get_param(struct pipe_screen *pscreen, enum pipe_cap param)
69{ 68{
69 const uint16_t class_3d = nouveau_screen(pscreen)->class_3d;
70
70 switch (param) { 71 switch (param) {
71 case PIPE_CAP_MAX_COMBINED_SAMPLERS: 72 case PIPE_CAP_MAX_COMBINED_SAMPLERS:
72 return 16 * PIPE_SHADER_TYPES; /* NOTE: should not count COMPUTE */ 73 return 16 * PIPE_SHADER_TYPES; /* NOTE: should not count COMPUTE */
@@ -89,7 +90,7 @@ nvc0_screen_get_param(struct pipe_screen *pscreen, enum pipe_cap param)
89 case PIPE_CAP_SEAMLESS_CUBE_MAP: 90 case PIPE_CAP_SEAMLESS_CUBE_MAP:
90 return 1; 91 return 1;
91 case PIPE_CAP_SEAMLESS_CUBE_MAP_PER_TEXTURE: 92 case PIPE_CAP_SEAMLESS_CUBE_MAP_PER_TEXTURE:
92 return 0; 93 return (class_3d >= NVE4_3D_CLASS) ? 1 : 0;
93 case PIPE_CAP_TWO_SIDED_STENCIL: 94 case PIPE_CAP_TWO_SIDED_STENCIL:
94 case PIPE_CAP_DEPTH_CLIP_DISABLE: 95 case PIPE_CAP_DEPTH_CLIP_DISABLE:
95 case PIPE_CAP_DEPTHSTENCIL_CLEAR_SEPARATE: 96 case PIPE_CAP_DEPTHSTENCIL_CLEAR_SEPARATE:
@@ -247,10 +248,11 @@ nvc0_screen_destroy(struct pipe_screen *pscreen)
247 FREE(screen->blitctx); 248 FREE(screen->blitctx);
248 249
249 nouveau_bo_ref(NULL, &screen->text); 250 nouveau_bo_ref(NULL, &screen->text);
251 nouveau_bo_ref(NULL, &screen->uniform_bo);
250 nouveau_bo_ref(NULL, &screen->tls); 252 nouveau_bo_ref(NULL, &screen->tls);
251 nouveau_bo_ref(NULL, &screen->txc); 253 nouveau_bo_ref(NULL, &screen->txc);
252 nouveau_bo_ref(NULL, &screen->fence.bo); 254 nouveau_bo_ref(NULL, &screen->fence.bo);
253 nouveau_bo_ref(NULL, &screen->vfetch_cache); 255 nouveau_bo_ref(NULL, &screen->poly_cache);
254 256
255 nouveau_heap_destroy(&screen->lib_code); 257 nouveau_heap_destroy(&screen->lib_code);
256 nouveau_heap_destroy(&screen->text_heap); 258 nouveau_heap_destroy(&screen->text_heap);
@@ -260,7 +262,7 @@ nvc0_screen_destroy(struct pipe_screen *pscreen)
260 262
261 nouveau_mm_destroy(screen->mm_VRAM_fe0); 263 nouveau_mm_destroy(screen->mm_VRAM_fe0);
262 264
263 nouveau_object_del(&screen->fermi); 265 nouveau_object_del(&screen->eng3d);
264 nouveau_object_del(&screen->eng2d); 266 nouveau_object_del(&screen->eng2d);
265 nouveau_object_del(&screen->m2mf); 267 nouveau_object_del(&screen->m2mf);
266 268
@@ -288,16 +290,16 @@ nvc0_graph_set_macro(struct nvc0_screen *screen, uint32_t m, unsigned pos,
288} 290}
289 291
290static void 292static void
291nvc0_magic_3d_init(struct nouveau_pushbuf *push) 293nvc0_magic_3d_init(struct nouveau_pushbuf *push, uint16_t obj_class)
292{ 294{
293 BEGIN_NVC0(push, SUBC_3D(0x10cc), 1); 295 BEGIN_NVC0(push, SUBC_3D(0x10cc), 1);
294 PUSH_DATA (push, 0xff); 296 PUSH_DATA (push, 0xff);
295 BEGIN_NVC0(push, SUBC_3D(0x10e0), 2); 297 BEGIN_NVC0(push, SUBC_3D(0x10e0), 2);
296 PUSH_DATA(push, 0xff); 298 PUSH_DATA (push, 0xff);
297 PUSH_DATA(push, 0xff); 299 PUSH_DATA (push, 0xff);
298 BEGIN_NVC0(push, SUBC_3D(0x10ec), 2); 300 BEGIN_NVC0(push, SUBC_3D(0x10ec), 2);
299 PUSH_DATA(push, 0xff); 301 PUSH_DATA (push, 0xff);
300 PUSH_DATA(push, 0xff); 302 PUSH_DATA (push, 0xff);
301 BEGIN_NVC0(push, SUBC_3D(0x074c), 1); 303 BEGIN_NVC0(push, SUBC_3D(0x074c), 1);
302 PUSH_DATA (push, 0x3f); 304 PUSH_DATA (push, 0x3f);
303 305
@@ -308,11 +310,6 @@ nvc0_magic_3d_init(struct nouveau_pushbuf *push)
308 BEGIN_NVC0(push, SUBC_3D(0x0de8), 1); 310 BEGIN_NVC0(push, SUBC_3D(0x0de8), 1);
309 PUSH_DATA (push, 1); 311 PUSH_DATA (push, 1);
310 312
311#if 0 /* software method */
312 BEGIN_NVC0(push, SUBC_3D(0x1528), 1); /* MP poke */
313 PUSH_DATA (push, 0);
314#endif
315
316 BEGIN_NVC0(push, SUBC_3D(0x12ac), 1); 313 BEGIN_NVC0(push, SUBC_3D(0x12ac), 1);
317 PUSH_DATA (push, 0); 314 PUSH_DATA (push, 0);
318 BEGIN_NVC0(push, SUBC_3D(0x0218), 1); 315 BEGIN_NVC0(push, SUBC_3D(0x0218), 1);
@@ -324,8 +321,6 @@ nvc0_magic_3d_init(struct nouveau_pushbuf *push)
324 BEGIN_NVC0(push, SUBC_3D(0x12d8), 2); 321 BEGIN_NVC0(push, SUBC_3D(0x12d8), 2);
325 PUSH_DATA (push, 0x10); 322 PUSH_DATA (push, 0x10);
326 PUSH_DATA (push, 0x10); 323 PUSH_DATA (push, 0x10);
327 BEGIN_NVC0(push, SUBC_3D(0x06d4), 1);
328 PUSH_DATA (push, 8);
329 BEGIN_NVC0(push, SUBC_3D(0x1140), 1); 324 BEGIN_NVC0(push, SUBC_3D(0x1140), 1);
330 PUSH_DATA (push, 0x10); 325 PUSH_DATA (push, 0x10);
331 BEGIN_NVC0(push, SUBC_3D(0x1610), 1); 326 BEGIN_NVC0(push, SUBC_3D(0x1610), 1);
@@ -333,24 +328,27 @@ nvc0_magic_3d_init(struct nouveau_pushbuf *push)
333 328
334 BEGIN_NVC0(push, SUBC_3D(0x164c), 1); 329 BEGIN_NVC0(push, SUBC_3D(0x164c), 1);
335 PUSH_DATA (push, 1 << 12); 330 PUSH_DATA (push, 1 << 12);
336 BEGIN_NVC0(push, SUBC_3D(0x151c), 1);
337 PUSH_DATA (push, 1);
338 BEGIN_NVC0(push, SUBC_3D(0x030c), 1); 331 BEGIN_NVC0(push, SUBC_3D(0x030c), 1);
339 PUSH_DATA (push, 0); 332 PUSH_DATA (push, 0);
340 BEGIN_NVC0(push, SUBC_3D(0x0300), 1); 333 BEGIN_NVC0(push, SUBC_3D(0x0300), 1);
341 PUSH_DATA (push, 3); 334 PUSH_DATA (push, 3);
342#if 0 /* software method */ 335
343 BEGIN_NVC0(push, SUBC_3D(0x1280), 1); /* PGRAPH poke */
344 PUSH_DATA (push, 0);
345#endif
346 BEGIN_NVC0(push, SUBC_3D(0x02d0), 1); 336 BEGIN_NVC0(push, SUBC_3D(0x02d0), 1);
347 PUSH_DATA (push, 0x1f40); 337 PUSH_DATA (push, 0x3fffff);
348 BEGIN_NVC0(push, SUBC_3D(0x0fdc), 1); 338 BEGIN_NVC0(push, SUBC_3D(0x0fdc), 1);
349 PUSH_DATA (push, 1); 339 PUSH_DATA (push, 1);
350 BEGIN_NVC0(push, SUBC_3D(0x19c0), 1); 340 BEGIN_NVC0(push, SUBC_3D(0x19c0), 1);
351 PUSH_DATA (push, 1); 341 PUSH_DATA (push, 1);
352 BEGIN_NVC0(push, SUBC_3D(0x075c), 1); 342 BEGIN_NVC0(push, SUBC_3D(0x075c), 1);
353 PUSH_DATA (push, 3); 343 PUSH_DATA (push, 3);
344
345 if (obj_class >= NVE4_3D_CLASS) {
346 BEGIN_NVC0(push, SUBC_3D(0x07fc), 1);
347 PUSH_DATA (push, 1);
348 }
349
350 /* TODO: find out what software methods 0x1528, 0x1280 and (on nve4) 0x02dc
351 * are supposed to do */
354} 352}
355 353
356static void 354static void
@@ -391,10 +389,20 @@ nvc0_screen_create(struct nouveau_device *dev)
391 struct pipe_screen *pscreen; 389 struct pipe_screen *pscreen;
392 struct nouveau_object *chan; 390 struct nouveau_object *chan;
393 struct nouveau_pushbuf *push; 391 struct nouveau_pushbuf *push;
392 uint32_t obj_class;
394 int ret; 393 int ret;
395 unsigned i; 394 unsigned i;
396 union nouveau_bo_config mm_config; 395 union nouveau_bo_config mm_config;
397 396
397 switch (dev->chipset & ~0xf) {
398 case 0xc0:
399 case 0xd0:
400 case 0xe0:
401 break;
402 default:
403 return NULL;
404 }
405
398 screen = CALLOC_STRUCT(nvc0_screen); 406 screen = CALLOC_STRUCT(nvc0_screen);
399 if (!screen) 407 if (!screen)
400 return NULL; 408 return NULL;
@@ -431,17 +439,25 @@ nvc0_screen_create(struct nouveau_device *dev)
431 screen->base.fence.emit = nvc0_screen_fence_emit; 439 screen->base.fence.emit = nvc0_screen_fence_emit;
432 screen->base.fence.update = nvc0_screen_fence_update; 440 screen->base.fence.update = nvc0_screen_fence_update;
433 441
434 ret = nouveau_object_new(chan, 0xbeef9039, NVC0_M2MF_CLASS, NULL, 0, 442 switch (dev->chipset & 0xf0) {
443 case 0xe0:
444 obj_class = NVE4_P2MF_CLASS;
445 break;
446 default:
447 obj_class = NVC0_M2MF_CLASS;
448 break;
449 }
450 ret = nouveau_object_new(chan, 0xbeef323f, obj_class, NULL, 0,
435 &screen->m2mf); 451 &screen->m2mf);
436 if (ret) 452 if (ret)
437 FAIL_SCREEN_INIT("Error allocating PGRAPH context for M2MF: %d\n", ret); 453 FAIL_SCREEN_INIT("Error allocating PGRAPH context for M2MF: %d\n", ret);
438 454
439 BEGIN_NVC0(push, SUBC_M2MF(NV01_SUBCHAN_OBJECT), 1); 455 BEGIN_NVC0(push, SUBC_M2MF(NV01_SUBCHAN_OBJECT), 1);
440 PUSH_DATA (push, screen->m2mf->oclass); 456 PUSH_DATA (push, screen->m2mf->oclass);
441 BEGIN_NVC0(push, NVC0_M2MF(NOTIFY_ADDRESS_HIGH), 3); 457 if (screen->m2mf->oclass == NVE4_P2MF_CLASS) {
442 PUSH_DATAh(push, screen->fence.bo->offset + 16); 458 BEGIN_NVC0(push, SUBC_COPY(NV01_SUBCHAN_OBJECT), 1);
443 PUSH_DATA (push, screen->fence.bo->offset + 16); 459 PUSH_DATA (push, 0xa0b5);
444 PUSH_DATA (push, 0); 460 }
445 461
446 ret = nouveau_object_new(chan, 0xbeef902d, NVC0_2D_CLASS, NULL, 0, 462 ret = nouveau_object_new(chan, 0xbeef902d, NVC0_2D_CLASS, NULL, 0,
447 &screen->eng2d); 463 &screen->eng2d);
@@ -461,17 +477,39 @@ nvc0_screen_create(struct nouveau_device *dev)
461 BEGIN_NVC0(push, SUBC_2D(0x0888), 1); 477 BEGIN_NVC0(push, SUBC_2D(0x0888), 1);
462 PUSH_DATA (push, 1); 478 PUSH_DATA (push, 1);
463 479
464 ret = nouveau_object_new(chan, 0xbeef9097, NVC0_3D_CLASS, NULL, 0, 480 BEGIN_NVC0(push, SUBC_2D(NVC0_GRAPH_NOTIFY_ADDRESS_HIGH), 2);
465 &screen->fermi); 481 PUSH_DATAh(push, screen->fence.bo->offset + 16);
482 PUSH_DATA (push, screen->fence.bo->offset + 16);
483
484 switch (dev->chipset & 0xf0) {
485 case 0xe0:
486 obj_class = NVE4_3D_CLASS;
487 break;
488 case 0xd0:
489 case 0xc0:
490 default:
491 switch (dev->chipset) {
492 case 0xd9:
493 case 0xc8:
494 obj_class = NVC8_3D_CLASS;
495 break;
496 case 0xc1:
497 obj_class = NVC1_3D_CLASS;
498 break;
499 default:
500 obj_class = NVC0_3D_CLASS;
501 break;
502 }
503 break;
504 }
505 ret = nouveau_object_new(chan, 0xbeef003d, obj_class, NULL, 0,
506 &screen->eng3d);
466 if (ret) 507 if (ret)
467 FAIL_SCREEN_INIT("Error allocating PGRAPH context for 3D: %d\n", ret); 508 FAIL_SCREEN_INIT("Error allocating PGRAPH context for 3D: %d\n", ret);
509 screen->base.class_3d = obj_class;
468 510
469 BEGIN_NVC0(push, SUBC_3D(NV01_SUBCHAN_OBJECT), 1); 511 BEGIN_NVC0(push, SUBC_3D(NV01_SUBCHAN_OBJECT), 1);
470 PUSH_DATA (push, screen->fermi->oclass); 512 PUSH_DATA (push, screen->eng3d->oclass);
471 BEGIN_NVC0(push, NVC0_3D(NOTIFY_ADDRESS_HIGH), 3);
472 PUSH_DATAh(push, screen->fence.bo->offset + 32);
473 PUSH_DATA (push, screen->fence.bo->offset + 32);
474 PUSH_DATA (push, 0);
475 513
476 BEGIN_NVC0(push, NVC0_3D(COND_MODE), 1); 514 BEGIN_NVC0(push, NVC0_3D(COND_MODE), 1);
477 PUSH_DATA (push, NVC0_3D_COND_MODE_ALWAYS); 515 PUSH_DATA (push, NVC0_3D_COND_MODE_ALWAYS);
@@ -501,10 +539,23 @@ nvc0_screen_create(struct nouveau_device *dev)
501 PUSH_DATA (push, 1); 539 PUSH_DATA (push, 1);
502 BEGIN_NVC0(push, NVC0_3D(BLEND_ENABLE_COMMON), 1); 540 BEGIN_NVC0(push, NVC0_3D(BLEND_ENABLE_COMMON), 1);
503 PUSH_DATA (push, 0); 541 PUSH_DATA (push, 0);
504 BEGIN_NVC0(push, NVC0_3D(TEX_MISC), 1); 542 if (screen->eng3d->oclass < NVE4_3D_CLASS) {
505 PUSH_DATA (push, NVC0_3D_TEX_MISC_SEAMLESS_CUBE_MAP); 543 BEGIN_NVC0(push, NVC0_3D(TEX_MISC), 1);
544 PUSH_DATA (push, NVC0_3D_TEX_MISC_SEAMLESS_CUBE_MAP);
545 } else {
546 BEGIN_NVC0(push, NVE4_3D(TEX_CB_INDEX), 1);
547 PUSH_DATA (push, 15);
548 }
549 BEGIN_NVC0(push, NVC0_3D(CALL_LIMIT_LOG), 1);
550 PUSH_DATA (push, 8); /* 128 */
551 BEGIN_NVC0(push, NVC0_3D(ZCULL_STATCTRS_ENABLE), 1);
552 PUSH_DATA (push, 1);
553 if (screen->eng3d->oclass >= NVC1_3D_CLASS) {
554 BEGIN_NVC0(push, NVC0_3D(CACHE_SPLIT), 1);
555 PUSH_DATA (push, NVC0_3D_CACHE_SPLIT_48K_SHARED_16K_L1);
556 }
506 557
507 nvc0_magic_3d_init(push); 558 nvc0_magic_3d_init(push, screen->eng3d->oclass);
508 559
509 ret = nouveau_bo_new(dev, NOUVEAU_BO_VRAM, 1 << 17, 1 << 20, NULL, 560 ret = nouveau_bo_new(dev, NOUVEAU_BO_VRAM, 1 << 17, 1 << 20, NULL,
510 &screen->text); 561 &screen->text);
@@ -517,21 +568,41 @@ nvc0_screen_create(struct nouveau_device *dev)
517 nouveau_heap_init(&screen->text_heap, 0, (1 << 20) - 0x100); 568 nouveau_heap_init(&screen->text_heap, 0, (1 << 20) - 0x100);
518 569
519 ret = nouveau_bo_new(dev, NOUVEAU_BO_VRAM, 1 << 12, 6 << 16, NULL, 570 ret = nouveau_bo_new(dev, NOUVEAU_BO_VRAM, 1 << 12, 6 << 16, NULL,
520 &screen->uniforms); 571 &screen->uniform_bo);
521 if (ret) 572 if (ret)
522 goto fail; 573 goto fail;
523 574
524 /* auxiliary constants (6 user clip planes, base instance id) */
525 BEGIN_NVC0(push, NVC0_3D(CB_SIZE), 3);
526 PUSH_DATA (push, 256);
527 PUSH_DATAh(push, screen->uniforms->offset + (5 << 16));
528 PUSH_DATA (push, screen->uniforms->offset + (5 << 16));
529 for (i = 0; i < 5; ++i) { 575 for (i = 0; i < 5; ++i) {
576 /* TIC and TSC entries for each unit (nve4+ only) */
577 /* auxiliary constants (6 user clip planes, base instance id */
578 BEGIN_NVC0(push, NVC0_3D(CB_SIZE), 3);
579 PUSH_DATA (push, 512);
580 PUSH_DATAh(push, screen->uniform_bo->offset + (5 << 16) + (i << 9));
581 PUSH_DATA (push, screen->uniform_bo->offset + (5 << 16) + (i << 9));
530 BEGIN_NVC0(push, NVC0_3D(CB_BIND(i)), 1); 582 BEGIN_NVC0(push, NVC0_3D(CB_BIND(i)), 1);
531 PUSH_DATA (push, (15 << 4) | 1); 583 PUSH_DATA (push, (15 << 4) | 1);
584 if (screen->eng3d->oclass >= NVE4_3D_CLASS) {
585 unsigned j;
586 BEGIN_1IC0(push, NVC0_3D(CB_POS), 9);
587 PUSH_DATA (push, 0);
588 for (j = 0; j < 8; ++j)
589 PUSH_DATA(push, j);
590 } else {
591 BEGIN_NVC0(push, NVC0_3D(TEX_LIMITS(i)), 1);
592 PUSH_DATA (push, 0x54);
593 }
532 } 594 }
595 BEGIN_NVC0(push, NVC0_3D(LINKED_TSC), 1);
596 PUSH_DATA (push, 0);
597
598 /* max MPs * max warps per MP (TODO: ask kernel) */
599 if (screen->eng3d->oclass >= NVE4_3D_CLASS)
600 screen->tls_size = 8 * 64;
601 else
602 screen->tls_size = 16 * 48;
603 screen->tls_size *= NVC0_CAP_MAX_PROGRAM_TEMPS * 16;
604 screen->tls_size = align(screen->tls_size, 1 << 17);
533 605
534 screen->tls_size = (16 * 32) * (NVC0_CAP_MAX_PROGRAM_TEMPS * 16);
535 ret = nouveau_bo_new(dev, NOUVEAU_BO_VRAM, 1 << 17, 606 ret = nouveau_bo_new(dev, NOUVEAU_BO_VRAM, 1 << 17,
536 screen->tls_size, NULL, &screen->tls); 607 screen->tls_size, NULL, &screen->tls);
537 if (ret) 608 if (ret)
@@ -550,21 +621,14 @@ nvc0_screen_create(struct nouveau_device *dev)
550 BEGIN_NVC0(push, NVC0_3D(LOCAL_BASE), 1); 621 BEGIN_NVC0(push, NVC0_3D(LOCAL_BASE), 1);
551 PUSH_DATA (push, 0); 622 PUSH_DATA (push, 0);
552 623
553 for (i = 0; i < 5; ++i) {
554 BEGIN_NVC0(push, NVC0_3D(TEX_LIMITS(i)), 1);
555 PUSH_DATA (push, 0x54);
556 }
557 BEGIN_NVC0(push, NVC0_3D(LINKED_TSC), 1);
558 PUSH_DATA (push, 0);
559
560 ret = nouveau_bo_new(dev, NOUVEAU_BO_VRAM, 1 << 17, 1 << 20, NULL, 624 ret = nouveau_bo_new(dev, NOUVEAU_BO_VRAM, 1 << 17, 1 << 20, NULL,
561 &screen->vfetch_cache); 625 &screen->poly_cache);
562 if (ret) 626 if (ret)
563 goto fail; 627 goto fail;
564 628
565 BEGIN_NVC0(push, NVC0_3D(VERTEX_QUARANTINE_ADDRESS_HIGH), 3); 629 BEGIN_NVC0(push, NVC0_3D(VERTEX_QUARANTINE_ADDRESS_HIGH), 3);
566 PUSH_DATAh(push, screen->vfetch_cache->offset); 630 PUSH_DATAh(push, screen->poly_cache->offset);
567 PUSH_DATA (push, screen->vfetch_cache->offset); 631 PUSH_DATA (push, screen->poly_cache->offset);
568 PUSH_DATA (push, 3); 632 PUSH_DATA (push, 3);
569 633
570 ret = nouveau_bo_new(dev, NOUVEAU_BO_VRAM, 1 << 17, 1 << 17, NULL, 634 ret = nouveau_bo_new(dev, NOUVEAU_BO_VRAM, 1 << 17, 1 << 17, NULL,
diff --git a/src/gallium/drivers/nvc0/nvc0_screen.h b/src/gallium/drivers/nvc0/nvc0_screen.h
index e0f5e5ec246..8bcc1470593 100644
--- a/src/gallium/drivers/nvc0/nvc0_screen.h
+++ b/src/gallium/drivers/nvc0/nvc0_screen.h
@@ -6,6 +6,8 @@
6#include "nouveau/nouveau_fence.h" 6#include "nouveau/nouveau_fence.h"
7#include "nouveau/nouveau_heap.h" 7#include "nouveau/nouveau_heap.h"
8 8
9#include "nouveau/nv_object.xml.h"
10
9#include "nvc0_winsys.h" 11#include "nvc0_winsys.h"
10#include "nvc0_stateobj.h" 12#include "nvc0_stateobj.h"
11 13
@@ -24,10 +26,10 @@ struct nvc0_screen {
24 int num_occlusion_queries_active; 26 int num_occlusion_queries_active;
25 27
26 struct nouveau_bo *text; 28 struct nouveau_bo *text;
27 struct nouveau_bo *uniforms; 29 struct nouveau_bo *uniform_bo;
28 struct nouveau_bo *tls; 30 struct nouveau_bo *tls;
29 struct nouveau_bo *txc; /* TIC (offset 0) and TSC (65536) */ 31 struct nouveau_bo *txc; /* TIC (offset 0) and TSC (65536) */
30 struct nouveau_bo *vfetch_cache; 32 struct nouveau_bo *poly_cache;
31 33
32 uint64_t tls_size; 34 uint64_t tls_size;
33 35
@@ -55,7 +57,7 @@ struct nvc0_screen {
55 57
56 struct nouveau_mman *mm_VRAM_fe0; 58 struct nouveau_mman *mm_VRAM_fe0;
57 59
58 struct nouveau_object *fermi; 60 struct nouveau_object *eng3d; /* sqrt(1/2)|kepler> + sqrt(1/2)|fermi> */
59 struct nouveau_object *eng2d; 61 struct nouveau_object *eng2d;
60 struct nouveau_object *m2mf; 62 struct nouveau_object *m2mf;
61 struct nouveau_object *dijkstra; 63 struct nouveau_object *dijkstra;
diff --git a/src/gallium/drivers/nvc0/nvc0_shader_state.c b/src/gallium/drivers/nvc0/nvc0_shader_state.c
index 54dfd8d1a1a..786889f8b57 100644
--- a/src/gallium/drivers/nvc0/nvc0_shader_state.c
+++ b/src/gallium/drivers/nvc0/nvc0_shader_state.c
@@ -70,7 +70,8 @@ nvc0_program_validate(struct nvc0_context *nvc0, struct nvc0_program *prog)
70 return TRUE; 70 return TRUE;
71 71
72 if (!prog->translated) { 72 if (!prog->translated) {
73 prog->translated = nvc0_program_translate(prog); 73 prog->translated = nvc0_program_translate(
74 prog, nvc0->screen->base.device->chipset);
74 if (!prog->translated) 75 if (!prog->translated)
75 return FALSE; 76 return FALSE;
76 } 77 }
diff --git a/src/gallium/drivers/nvc0/nvc0_state_validate.c b/src/gallium/drivers/nvc0/nvc0_state_validate.c
index 3533a5e1ba4..5d34f2b0bcc 100644
--- a/src/gallium/drivers/nvc0/nvc0_state_validate.c
+++ b/src/gallium/drivers/nvc0/nvc0_state_validate.c
@@ -250,17 +250,17 @@ nvc0_validate_viewport(struct nvc0_context *nvc0)
250} 250}
251 251
252static INLINE void 252static INLINE void
253nvc0_upload_uclip_planes(struct nvc0_context *nvc0) 253nvc0_upload_uclip_planes(struct nvc0_context *nvc0, unsigned s)
254{ 254{
255 struct nouveau_pushbuf *push = nvc0->base.pushbuf; 255 struct nouveau_pushbuf *push = nvc0->base.pushbuf;
256 struct nouveau_bo *bo = nvc0->screen->uniforms; 256 struct nouveau_bo *bo = nvc0->screen->uniform_bo;
257 257
258 BEGIN_NVC0(push, NVC0_3D(CB_SIZE), 3); 258 BEGIN_NVC0(push, NVC0_3D(CB_SIZE), 3);
259 PUSH_DATA (push, 256); 259 PUSH_DATA (push, 512);
260 PUSH_DATAh(push, bo->offset + (5 << 16)); 260 PUSH_DATAh(push, bo->offset + (5 << 16) + (s << 9));
261 PUSH_DATA (push, bo->offset + (5 << 16)); 261 PUSH_DATA (push, bo->offset + (5 << 16) + (s << 9));
262 BEGIN_1IC0(push, NVC0_3D(CB_POS), PIPE_MAX_CLIP_PLANES * 4 + 1); 262 BEGIN_1IC0(push, NVC0_3D(CB_POS), PIPE_MAX_CLIP_PLANES * 4 + 1);
263 PUSH_DATA (push, 0); 263 PUSH_DATA (push, 256);
264 PUSH_DATAp(push, &nvc0->clip.ucp[0][0], PIPE_MAX_CLIP_PLANES * 4); 264 PUSH_DATAp(push, &nvc0->clip.ucp[0][0], PIPE_MAX_CLIP_PLANES * 4);
265} 265}
266 266
@@ -289,21 +289,28 @@ nvc0_validate_clip(struct nvc0_context *nvc0)
289{ 289{
290 struct nouveau_pushbuf *push = nvc0->base.pushbuf; 290 struct nouveau_pushbuf *push = nvc0->base.pushbuf;
291 struct nvc0_program *vp; 291 struct nvc0_program *vp;
292 unsigned stage;
292 uint8_t clip_enable = nvc0->rast->pipe.clip_plane_enable; 293 uint8_t clip_enable = nvc0->rast->pipe.clip_plane_enable;
293 294
294 if (nvc0->dirty & NVC0_NEW_CLIP) 295 if (nvc0->gmtyprog) {
295 nvc0_upload_uclip_planes(nvc0); 296 stage = 3;
296 297 vp = nvc0->gmtyprog;
297 vp = nvc0->gmtyprog; 298 } else
298 if (!vp) { 299 if (nvc0->tevlprog) {
300 stage = 2;
299 vp = nvc0->tevlprog; 301 vp = nvc0->tevlprog;
300 if (!vp) 302 } else {
301 vp = nvc0->vertprog; 303 stage = 0;
304 vp = nvc0->vertprog;
302 } 305 }
303 306
304 if (clip_enable && vp->vp.num_ucps < PIPE_MAX_CLIP_PLANES) 307 if (clip_enable && vp->vp.num_ucps < PIPE_MAX_CLIP_PLANES)
305 nvc0_check_program_ucps(nvc0, vp, clip_enable); 308 nvc0_check_program_ucps(nvc0, vp, clip_enable);
306 309
310 if (nvc0->dirty & (NVC0_NEW_CLIP | (NVC0_NEW_VERTPROG << stage)))
311 if (vp->vp.num_ucps <= PIPE_MAX_CLIP_PLANES)
312 nvc0_upload_uclip_planes(nvc0, stage);
313
307 clip_enable &= vp->vp.clip_enable; 314 clip_enable &= vp->vp.clip_enable;
308 315
309 if (nvc0->state.clip_enable != clip_enable) { 316 if (nvc0->state.clip_enable != clip_enable) {
@@ -375,7 +382,7 @@ nvc0_constbufs_validate(struct nvc0_context *nvc0)
375 if (!nouveau_resource_mapped_by_gpu(&res->base)) { 382 if (!nouveau_resource_mapped_by_gpu(&res->base)) {
376 if (i == 0 && (res->status & NOUVEAU_BUFFER_STATUS_USER_MEMORY)) { 383 if (i == 0 && (res->status & NOUVEAU_BUFFER_STATUS_USER_MEMORY)) {
377 base = s << 16; 384 base = s << 16;
378 bo = nvc0->screen->uniforms; 385 bo = nvc0->screen->uniform_bo;
379 386
380 if (nvc0->state.uniform_buffer_bound[s] >= res->base.width0) 387 if (nvc0->state.uniform_buffer_bound[s] >= res->base.width0)
381 rebind = FALSE; 388 rebind = FALSE;
@@ -396,7 +403,7 @@ nvc0_constbufs_validate(struct nvc0_context *nvc0)
396 nvc0->state.uniform_buffer_bound[s] = 0; 403 nvc0->state.uniform_buffer_bound[s] = 0;
397 } 404 }
398 405
399 if (bo != nvc0->screen->uniforms) 406 if (bo != nvc0->screen->uniform_bo)
400 BCTX_REFN(nvc0->bufctx_3d, CB(s, i), res, RD); 407 BCTX_REFN(nvc0->bufctx_3d, CB(s, i), res, RD);
401 408
402 if (rebind) { 409 if (rebind) {
@@ -517,6 +524,7 @@ static struct state_validate {
517 { nvc0_constbufs_validate, NVC0_NEW_CONSTBUF }, 524 { nvc0_constbufs_validate, NVC0_NEW_CONSTBUF },
518 { nvc0_validate_textures, NVC0_NEW_TEXTURES }, 525 { nvc0_validate_textures, NVC0_NEW_TEXTURES },
519 { nvc0_validate_samplers, NVC0_NEW_SAMPLERS }, 526 { nvc0_validate_samplers, NVC0_NEW_SAMPLERS },
527 { nve4_set_tex_handles, NVC0_NEW_TEXTURES | NVC0_NEW_SAMPLERS },
520 { nvc0_vertex_arrays_validate, NVC0_NEW_VERTEX | NVC0_NEW_ARRAYS }, 528 { nvc0_vertex_arrays_validate, NVC0_NEW_VERTEX | NVC0_NEW_ARRAYS },
521 { nvc0_idxbuf_validate, NVC0_NEW_IDXBUF }, 529 { nvc0_idxbuf_validate, NVC0_NEW_IDXBUF },
522 { nvc0_tfb_validate, NVC0_NEW_TFB_TARGETS | NVC0_NEW_GMTYPROG } 530 { nvc0_tfb_validate, NVC0_NEW_TFB_TARGETS | NVC0_NEW_GMTYPROG }
diff --git a/src/gallium/drivers/nvc0/nvc0_surface.c b/src/gallium/drivers/nvc0/nvc0_surface.c
index 3378b513936..2b47c04056e 100644
--- a/src/gallium/drivers/nvc0/nvc0_surface.c
+++ b/src/gallium/drivers/nvc0/nvc0_surface.c
@@ -233,7 +233,7 @@ nvc0_resource_copy_region(struct pipe_context *pipe,
233 src_box->x, src_box->y, src_box->z); 233 src_box->x, src_box->y, src_box->z);
234 234
235 for (i = 0; i < src_box->depth; ++i) { 235 for (i = 0; i < src_box->depth; ++i) {
236 nvc0_m2mf_transfer_rect(nvc0, &drect, &srect, nx, ny); 236 nvc0->m2mf_copy_rect(nvc0, &drect, &srect, nx, ny);
237 237
238 if (nv50_miptree(dst)->layout_3d) 238 if (nv50_miptree(dst)->layout_3d)
239 drect.z++; 239 drect.z++;
diff --git a/src/gallium/drivers/nvc0/nvc0_tex.c b/src/gallium/drivers/nvc0/nvc0_tex.c
index f6c4ab39bd9..8dd7185bcdf 100644
--- a/src/gallium/drivers/nvc0/nvc0_tex.c
+++ b/src/gallium/drivers/nvc0/nvc0_tex.c
@@ -26,6 +26,9 @@
26 26
27#include "util/u_format.h" 27#include "util/u_format.h"
28 28
29#define NVE4_TIC_ENTRY_INVALID 0x000fffff
30#define NVE4_TSC_ENTRY_INVALID 0xfff00000
31
29#define NV50_TIC_0_SWIZZLE__MASK \ 32#define NV50_TIC_0_SWIZZLE__MASK \
30 (NV50_TIC_0_MAPA__MASK | NV50_TIC_0_MAPB__MASK | \ 33 (NV50_TIC_0_MAPA__MASK | NV50_TIC_0_MAPB__MASK | \
31 NV50_TIC_0_MAPG__MASK | NV50_TIC_0_MAPR__MASK) 34 NV50_TIC_0_MAPG__MASK | NV50_TIC_0_MAPR__MASK)
@@ -271,13 +274,76 @@ nvc0_validate_tic(struct nvc0_context *nvc0, int s)
271 return need_flush; 274 return need_flush;
272} 275}
273 276
277static boolean
278nve4_validate_tic(struct nvc0_context *nvc0, unsigned s)
279{
280 struct nouveau_bo *txc = nvc0->screen->txc;
281 struct nouveau_pushbuf *push = nvc0->base.pushbuf;
282 unsigned i;
283 boolean need_flush = FALSE;
284
285 for (i = 0; i < nvc0->num_textures[s]; ++i) {
286 struct nv50_tic_entry *tic = nv50_tic_entry(nvc0->textures[s][i]);
287 struct nv04_resource *res;
288 const boolean dirty = !!(nvc0->textures_dirty[s] & (1 << i));
289
290 if (!tic) {
291 nvc0->tex_handles[s][i] |= NVE4_TIC_ENTRY_INVALID;
292 continue;
293 }
294 res = nv04_resource(tic->pipe.texture);
295
296 if (tic->id < 0) {
297 tic->id = nvc0_screen_tic_alloc(nvc0->screen, tic);
298
299 PUSH_SPACE(push, 16);
300 BEGIN_NVC0(push, NVE4_P2MF(DST_ADDRESS_HIGH), 2);
301 PUSH_DATAh(push, txc->offset + (tic->id * 32));
302 PUSH_DATA (push, txc->offset + (tic->id * 32));
303 BEGIN_NVC0(push, NVE4_P2MF(LINE_LENGTH_IN), 2);
304 PUSH_DATA (push, 32);
305 PUSH_DATA (push, 1);
306 BEGIN_1IC0(push, NVE4_P2MF(EXEC), 9);
307 PUSH_DATA (push, 0x1001);
308 PUSH_DATAp(push, &tic->tic[0], 8);
309
310 need_flush = TRUE;
311 } else
312 if (res->status & NOUVEAU_BUFFER_STATUS_GPU_WRITING) {
313 BEGIN_NVC0(push, NVC0_3D(TEX_CACHE_CTL), 1);
314 PUSH_DATA (push, (tic->id << 4) | 1);
315 }
316 nvc0->screen->tic.lock[tic->id / 32] |= 1 << (tic->id % 32);
317
318 res->status &= ~NOUVEAU_BUFFER_STATUS_GPU_WRITING;
319 res->status |= NOUVEAU_BUFFER_STATUS_GPU_READING;
320
321 nvc0->tex_handles[s][i] &= ~NVE4_TIC_ENTRY_INVALID;
322 nvc0->tex_handles[s][i] |= tic->id;
323 if (dirty)
324 BCTX_REFN(nvc0->bufctx_3d, TEX(s, i), res, RD);
325 }
326 for (; i < nvc0->state.num_textures[s]; ++i)
327 nvc0->tex_handles[s][i] |= NVE4_TIC_ENTRY_INVALID;
328
329 nvc0->state.num_textures[s] = nvc0->num_textures[s];
330
331 return need_flush;
332}
333
274void nvc0_validate_textures(struct nvc0_context *nvc0) 334void nvc0_validate_textures(struct nvc0_context *nvc0)
275{ 335{
276 boolean need_flush; 336 boolean need_flush;
277 337
278 need_flush = nvc0_validate_tic(nvc0, 0); 338 if (nvc0->screen->base.class_3d >= NVE4_3D_CLASS) {
279 need_flush |= nvc0_validate_tic(nvc0, 3); 339 need_flush = nve4_validate_tic(nvc0, 0);
280 need_flush |= nvc0_validate_tic(nvc0, 4); 340 need_flush |= nve4_validate_tic(nvc0, 3);
341 need_flush |= nve4_validate_tic(nvc0, 4);
342 } else {
343 need_flush = nvc0_validate_tic(nvc0, 0);
344 need_flush |= nvc0_validate_tic(nvc0, 3);
345 need_flush |= nvc0_validate_tic(nvc0, 4);
346 }
281 347
282 if (need_flush) { 348 if (need_flush) {
283 BEGIN_NVC0(nvc0->base.pushbuf, NVC0_3D(TIC_FLUSH), 1); 349 BEGIN_NVC0(nvc0->base.pushbuf, NVC0_3D(TIC_FLUSH), 1);
@@ -329,16 +395,103 @@ nvc0_validate_tsc(struct nvc0_context *nvc0, int s)
329 return need_flush; 395 return need_flush;
330} 396}
331 397
398static boolean
399nve4_validate_tsc(struct nvc0_context *nvc0, int s)
400{
401 struct nouveau_bo *txc = nvc0->screen->txc;
402 struct nouveau_pushbuf *push = nvc0->base.pushbuf;
403 unsigned i;
404 boolean need_flush = FALSE;
405
406 for (i = 0; i < nvc0->num_samplers[s]; ++i) {
407 struct nv50_tsc_entry *tsc = nv50_tsc_entry(nvc0->samplers[s][i]);
408
409 if (!tsc) {
410 nvc0->tex_handles[s][i] |= NVE4_TSC_ENTRY_INVALID;
411 continue;
412 }
413 if (tsc->id < 0) {
414 tsc->id = nvc0_screen_tsc_alloc(nvc0->screen, tsc);
415
416 PUSH_SPACE(push, 16);
417 BEGIN_NVC0(push, NVE4_P2MF(DST_ADDRESS_HIGH), 2);
418 PUSH_DATAh(push, txc->offset + 65536 + (tsc->id * 32));
419 PUSH_DATA (push, txc->offset + 65536 + (tsc->id * 32));
420 BEGIN_NVC0(push, NVE4_P2MF(LINE_LENGTH_IN), 2);
421 PUSH_DATA (push, 32);
422 PUSH_DATA (push, 1);
423 BEGIN_1IC0(push, NVE4_P2MF(EXEC), 9);
424 PUSH_DATA (push, 0x1001);
425 PUSH_DATAp(push, &tsc->tsc[0], 8);
426
427 need_flush = TRUE;
428 }
429 nvc0->screen->tsc.lock[tsc->id / 32] |= 1 << (tsc->id % 32);
430
431 nvc0->tex_handles[s][i] &= ~NVE4_TSC_ENTRY_INVALID;
432 nvc0->tex_handles[s][i] |= tsc->id << 20;
433 }
434 for (; i < nvc0->state.num_samplers[s]; ++i)
435 nvc0->tex_handles[s][i] |= NVE4_TSC_ENTRY_INVALID;
436
437 nvc0->state.num_samplers[s] = nvc0->num_samplers[s];
438
439 return need_flush;
440}
441
332void nvc0_validate_samplers(struct nvc0_context *nvc0) 442void nvc0_validate_samplers(struct nvc0_context *nvc0)
333{ 443{
334 boolean need_flush; 444 boolean need_flush;
335 445
336 need_flush = nvc0_validate_tsc(nvc0, 0); 446 if (nvc0->screen->base.class_3d >= NVE4_3D_CLASS) {
337 need_flush |= nvc0_validate_tsc(nvc0, 3); 447 need_flush = nve4_validate_tsc(nvc0, 0);
338 need_flush |= nvc0_validate_tsc(nvc0, 4); 448 need_flush |= nve4_validate_tsc(nvc0, 3);
449 need_flush |= nve4_validate_tsc(nvc0, 4);
450 } else {
451 need_flush = nvc0_validate_tsc(nvc0, 0);
452 need_flush |= nvc0_validate_tsc(nvc0, 3);
453 need_flush |= nvc0_validate_tsc(nvc0, 4);
454 }
339 455
340 if (need_flush) { 456 if (need_flush) {
341 BEGIN_NVC0(nvc0->base.pushbuf, NVC0_3D(TSC_FLUSH), 1); 457 BEGIN_NVC0(nvc0->base.pushbuf, NVC0_3D(TSC_FLUSH), 1);
342 PUSH_DATA (nvc0->base.pushbuf, 0); 458 PUSH_DATA (nvc0->base.pushbuf, 0);
343 } 459 }
344} 460}
461
462/* Upload the "diagonal" entries for the possible texture sources ($t == $s).
463 * At some point we might want to get a list of the combinations used by a
464 * shader and fill in those entries instead of having it extract the handles.
465 */
466void
467nve4_set_tex_handles(struct nvc0_context *nvc0)
468{
469 struct nouveau_pushbuf *push = nvc0->base.pushbuf;
470 uint64_t address;
471 unsigned s;
472
473 if (nvc0->screen->base.class_3d < NVE4_3D_CLASS)
474 return;
475 address = nvc0->screen->uniform_bo->offset + (5 << 16);
476
477 for (s = 0; s < 5; ++s, address += (1 << 9)) {
478 uint32_t dirty = nvc0->textures_dirty[s] | nvc0->samplers_dirty[s];
479 if (!dirty)
480 continue;
481 BEGIN_NVC0(push, NVC0_3D(CB_SIZE), 3);
482 PUSH_DATA (push, 512);
483 PUSH_DATAh(push, address);
484 PUSH_DATA (push, address);
485 do {
486 int i = ffs(dirty) - 1;
487 dirty &= ~(1 << i);
488
489 BEGIN_NVC0(push, NVC0_3D(CB_POS), 2);
490 PUSH_DATA (push, (8 + i) * 4);
491 PUSH_DATA (push, nvc0->tex_handles[s][i]);
492 } while (dirty);
493
494 nvc0->textures_dirty[s] = 0;
495 nvc0->samplers_dirty[s] = 0;
496 }
497}
diff --git a/src/gallium/drivers/nvc0/nvc0_transfer.c b/src/gallium/drivers/nvc0/nvc0_transfer.c
index 774793d8d02..fb44190574e 100644
--- a/src/gallium/drivers/nvc0/nvc0_transfer.c
+++ b/src/gallium/drivers/nvc0/nvc0_transfer.c
@@ -13,7 +13,7 @@ struct nvc0_transfer {
13 uint16_t nlayers; 13 uint16_t nlayers;
14}; 14};
15 15
16void 16static void
17nvc0_m2mf_transfer_rect(struct nvc0_context *nvc0, 17nvc0_m2mf_transfer_rect(struct nvc0_context *nvc0,
18 const struct nv50_m2mf_rect *dst, 18 const struct nv50_m2mf_rect *dst,
19 const struct nv50_m2mf_rect *src, 19 const struct nv50_m2mf_rect *src,
@@ -108,6 +108,71 @@ nvc0_m2mf_transfer_rect(struct nvc0_context *nvc0,
108 nouveau_bufctx_reset(bctx, 0); 108 nouveau_bufctx_reset(bctx, 0);
109} 109}
110 110
111static void
112nve4_m2mf_transfer_rect(struct nvc0_context *nvc0,
113 const struct nv50_m2mf_rect *dst,
114 const struct nv50_m2mf_rect *src,
115 uint32_t nblocksx, uint32_t nblocksy)
116{
117 struct nouveau_pushbuf *push = nvc0->base.pushbuf;
118 struct nouveau_bufctx *bctx = nvc0->bufctx;
119 uint32_t exec;
120 uint32_t src_base = src->base;
121 uint32_t dst_base = dst->base;
122 const int cpp = dst->cpp;
123
124 assert(dst->cpp == src->cpp);
125
126 nouveau_bufctx_refn(bctx, 0, dst->bo, dst->domain | NOUVEAU_BO_WR);
127 nouveau_bufctx_refn(bctx, 0, src->bo, src->domain | NOUVEAU_BO_RD);
128 nouveau_pushbuf_bufctx(push, bctx);
129 nouveau_pushbuf_validate(push);
130
131 exec = 0x200 /* 2D_ENABLE */ | 0x6 /* UNK */;
132
133 if (!nouveau_bo_memtype(dst->bo)) {
134 assert(!dst->z);
135 dst_base += dst->y * dst->pitch + dst->x * cpp;
136 exec |= 0x100; /* DST_MODE_2D_LINEAR */
137 }
138 if (!nouveau_bo_memtype(src->bo)) {
139 assert(!src->z);
140 src_base += src->y * src->pitch + src->x * cpp;
141 exec |= 0x080; /* SRC_MODE_2D_LINEAR */
142 }
143
144 BEGIN_NVC0(push, SUBC_COPY(0x070c), 6);
145 PUSH_DATA (push, 0x1000 | dst->tile_mode);
146 PUSH_DATA (push, dst->pitch);
147 PUSH_DATA (push, dst->height);
148 PUSH_DATA (push, dst->depth);
149 PUSH_DATA (push, dst->z);
150 PUSH_DATA (push, (dst->y << 16) | (dst->x * cpp));
151
152 BEGIN_NVC0(push, SUBC_COPY(0x0728), 6);
153 PUSH_DATA (push, 0x1000 | src->tile_mode);
154 PUSH_DATA (push, src->pitch);
155 PUSH_DATA (push, src->height);
156 PUSH_DATA (push, src->depth);
157 PUSH_DATA (push, src->z);
158 PUSH_DATA (push, (src->y << 16) | (src->x * cpp));
159
160 BEGIN_NVC0(push, SUBC_COPY(0x0400), 8);
161 PUSH_DATAh(push, src->bo->offset + src_base);
162 PUSH_DATA (push, src->bo->offset + src_base);
163 PUSH_DATAh(push, dst->bo->offset + dst_base);
164 PUSH_DATA (push, dst->bo->offset + dst_base);
165 PUSH_DATA (push, src->pitch);
166 PUSH_DATA (push, dst->pitch);
167 PUSH_DATA (push, nblocksx * cpp);
168 PUSH_DATA (push, nblocksy);
169
170 BEGIN_NVC0(push, SUBC_COPY(0x0300), 1);
171 PUSH_DATA (push, exec);
172
173 nouveau_bufctx_reset(bctx, 0);
174}
175
111void 176void
112nvc0_m2mf_push_linear(struct nouveau_context *nv, 177nvc0_m2mf_push_linear(struct nouveau_context *nv,
113 struct nouveau_bo *dst, unsigned offset, unsigned domain, 178 struct nouveau_bo *dst, unsigned offset, unsigned domain,
@@ -154,6 +219,49 @@ nvc0_m2mf_push_linear(struct nouveau_context *nv,
154} 219}
155 220
156void 221void
222nve4_p2mf_push_linear(struct nouveau_context *nv,
223 struct nouveau_bo *dst, unsigned offset, unsigned domain,
224 unsigned size, const void *data)
225{
226 struct nvc0_context *nvc0 = nvc0_context(&nv->pipe);
227 struct nouveau_pushbuf *push = nv->pushbuf;
228 uint32_t *src = (uint32_t *)data;
229 unsigned count = (size + 3) / 4;
230
231 nouveau_bufctx_refn(nvc0->bufctx, 0, dst, domain | NOUVEAU_BO_WR);
232 nouveau_pushbuf_bufctx(push, nvc0->bufctx);
233 nouveau_pushbuf_validate(push);
234
235 while (count) {
236 unsigned nr;
237
238 if (!PUSH_SPACE(push, 16))
239 break;
240 nr = PUSH_AVAIL(push);
241 assert(nr >= 16);
242 nr = MIN2(count, nr - 8);
243 nr = MIN2(nr, (NV04_PFIFO_MAX_PACKET_LEN - 1));
244
245 BEGIN_NVC0(push, NVE4_P2MF(DST_ADDRESS_HIGH), 2);
246 PUSH_DATAh(push, dst->offset + offset);
247 PUSH_DATA (push, dst->offset + offset);
248 BEGIN_NVC0(push, NVE4_P2MF(LINE_LENGTH_IN), 2);
249 PUSH_DATA (push, nr * 4);
250 PUSH_DATA (push, 1);
251 /* must not be interrupted (trap on QUERY fence, 0x50 works however) */
252 BEGIN_1IC0(push, NVE4_P2MF(EXEC), nr + 1);
253 PUSH_DATA (push, 0x1001);
254 PUSH_DATAp(push, src, nr);
255
256 count -= nr;
257 src += nr;
258 offset += nr * 4;
259 }
260
261 nouveau_bufctx_reset(nvc0->bufctx, 0);
262}
263
264static void
157nvc0_m2mf_copy_linear(struct nouveau_context *nv, 265nvc0_m2mf_copy_linear(struct nouveau_context *nv,
158 struct nouveau_bo *dst, unsigned dstoff, unsigned dstdom, 266 struct nouveau_bo *dst, unsigned dstoff, unsigned dstdom,
159 struct nouveau_bo *src, unsigned srcoff, unsigned srcdom, 267 struct nouveau_bo *src, unsigned srcoff, unsigned srcdom,
@@ -191,6 +299,32 @@ nvc0_m2mf_copy_linear(struct nouveau_context *nv,
191 nouveau_bufctx_reset(bctx, 0); 299 nouveau_bufctx_reset(bctx, 0);
192} 300}
193 301
302static void
303nve4_m2mf_copy_linear(struct nouveau_context *nv,
304 struct nouveau_bo *dst, unsigned dstoff, unsigned dstdom,
305 struct nouveau_bo *src, unsigned srcoff, unsigned srcdom,
306 unsigned size)
307{
308 struct nouveau_pushbuf *push = nv->pushbuf;
309 struct nouveau_bufctx *bctx = nvc0_context(&nv->pipe)->bufctx;
310
311 nouveau_bufctx_refn(bctx, 0, src, srcdom | NOUVEAU_BO_RD);
312 nouveau_bufctx_refn(bctx, 0, dst, dstdom | NOUVEAU_BO_WR);
313 nouveau_pushbuf_bufctx(push, bctx);
314 nouveau_pushbuf_validate(push);
315
316 BEGIN_NVC0(push, SUBC_COPY(0x0400), 4);
317 PUSH_DATAh(push, src->offset + srcoff);
318 PUSH_DATA (push, src->offset + srcoff);
319 PUSH_DATAh(push, dst->offset + dstoff);
320 PUSH_DATA (push, dst->offset + dstoff);
321 BEGIN_NVC0(push, SUBC_COPY(0x0418), 1);
322 PUSH_DATA (push, size);
323 IMMED_NVC0(push, SUBC_COPY(0x0300), 0x6);
324
325 nouveau_bufctx_reset(bctx, 0);
326}
327
194struct pipe_transfer * 328struct pipe_transfer *
195nvc0_miptree_transfer_new(struct pipe_context *pctx, 329nvc0_miptree_transfer_new(struct pipe_context *pctx,
196 struct pipe_resource *res, 330 struct pipe_resource *res,
@@ -253,8 +387,8 @@ nvc0_miptree_transfer_new(struct pipe_context *pctx,
253 unsigned z = tx->rect[0].z; 387 unsigned z = tx->rect[0].z;
254 unsigned i; 388 unsigned i;
255 for (i = 0; i < tx->nlayers; ++i) { 389 for (i = 0; i < tx->nlayers; ++i) {
256 nvc0_m2mf_transfer_rect(nvc0, &tx->rect[1], &tx->rect[0], 390 nvc0->m2mf_copy_rect(nvc0, &tx->rect[1], &tx->rect[0],
257 tx->nblocksx, tx->nblocksy); 391 tx->nblocksx, tx->nblocksy);
258 if (mt->layout_3d) 392 if (mt->layout_3d)
259 tx->rect[0].z++; 393 tx->rect[0].z++;
260 else 394 else
@@ -280,8 +414,8 @@ nvc0_miptree_transfer_del(struct pipe_context *pctx,
280 414
281 if (tx->base.usage & PIPE_TRANSFER_WRITE) { 415 if (tx->base.usage & PIPE_TRANSFER_WRITE) {
282 for (i = 0; i < tx->nlayers; ++i) { 416 for (i = 0; i < tx->nlayers; ++i) {
283 nvc0_m2mf_transfer_rect(nvc0, &tx->rect[0], &tx->rect[1], 417 nvc0->m2mf_copy_rect(nvc0, &tx->rect[0], &tx->rect[1],
284 tx->nblocksx, tx->nblocksy); 418 tx->nblocksx, tx->nblocksy);
285 if (mt->layout_3d) 419 if (mt->layout_3d)
286 tx->rect[0].z++; 420 tx->rect[0].z++;
287 else 421 else
@@ -362,3 +496,18 @@ nvc0_cb_push(struct nouveau_context *nv,
362 496
363 nouveau_bufctx_reset(bctx, 0); 497 nouveau_bufctx_reset(bctx, 0);
364} 498}
499
500void
501nvc0_init_transfer_functions(struct nvc0_context *nvc0)
502{
503 if (nvc0->screen->base.class_3d >= NVE4_3D_CLASS) {
504 nvc0->m2mf_copy_rect = nve4_m2mf_transfer_rect;
505 nvc0->base.copy_data = nve4_m2mf_copy_linear;
506 nvc0->base.push_data = nve4_p2mf_push_linear;
507 } else {
508 nvc0->m2mf_copy_rect = nvc0_m2mf_transfer_rect;
509 nvc0->base.copy_data = nvc0_m2mf_copy_linear;
510 nvc0->base.push_data = nvc0_m2mf_push_linear;
511 }
512 nvc0->base.push_cb = nvc0_cb_push;
513}
diff --git a/src/gallium/drivers/nvc0/nvc0_winsys.h b/src/gallium/drivers/nvc0/nvc0_winsys.h
index c3ab1c93644..c13ebd5fb58 100644
--- a/src/gallium/drivers/nvc0/nvc0_winsys.h
+++ b/src/gallium/drivers/nvc0/nvc0_winsys.h
@@ -46,17 +46,24 @@ PUSH_REFN(struct nouveau_pushbuf *push, struct nouveau_bo *bo, uint32_t flags)
46} 46}
47 47
48 48
49#define SUBC_3D(m) 1, (m) 49#define SUBC_3D(m) 0, (m)
50#define NVC0_3D(n) SUBC_3D(NVC0_3D_##n) 50#define NVC0_3D(n) SUBC_3D(NVC0_3D_##n)
51#define NVE4_3D(n) SUBC_3D(NVE4_3D_##n)
51 52
52#define SUBC_2D(m) 2, (m) 53#define SUBC_COMPUTE(m) 1, (m)
53#define NVC0_2D(n) SUBC_2D(NVC0_2D_##n) 54#define NVC0_COMPUTE(n) SUBC_COMPUTE(NVC0_COMPUTE_##n)
55#define NVE4_COMPUTE(n) SUBC_COMPUTE(NVE4_COMPUTE_##n)
54 56
55#define SUBC_M2MF(m) 3, (m) 57#define SUBC_M2MF(m) 2, (m)
58#define SUBC_P2MF(m) 2, (m)
56#define NVC0_M2MF(n) SUBC_M2MF(NVC0_M2MF_##n) 59#define NVC0_M2MF(n) SUBC_M2MF(NVC0_M2MF_##n)
60#define NVE4_P2MF(n) SUBC_P2MF(NVE4_P2MF_##n)
57 61
58#define SUBC_COMPUTE(m) 4, (m) 62#define SUBC_2D(m) 3, (m)
59#define NVC0_COMPUTE(n) SUBC_COMPUTE(NVC0_COMPUTE_##n) 63#define NVC0_2D(n) SUBC_2D(NVC0_2D_##n)
64
65#define SUBC_COPY(m) 4, (m)
66#define NVE4_COPY(m) SUBC_COPY(NVE4_COPY_##n)
60 67
61static INLINE uint32_t 68static INLINE uint32_t
62NVC0_FIFO_PKHDR_SQ(int subc, int mthd, unsigned size) 69NVC0_FIFO_PKHDR_SQ(int subc, int mthd, unsigned size)
diff --git a/src/gallium/drivers/nvc0/nve4_p2mf.xml.h b/src/gallium/drivers/nvc0/nve4_p2mf.xml.h
new file mode 100644
index 00000000000..68a742fadfe
--- /dev/null
+++ b/src/gallium/drivers/nvc0/nve4_p2mf.xml.h
@@ -0,0 +1,107 @@
1#ifndef RNNDB_NVE4_P2MF_XML
2#define RNNDB_NVE4_P2MF_XML
3
4/* Autogenerated file, DO NOT EDIT manually!
5
6This file was generated by the rules-ng-ng headergen tool in this git repository:
7http://0x04.net/cgit/index.cgi/rules-ng-ng
8git clone git://0x04.net/rules-ng-ng
9
10The rules-ng-ng source files this header was generated from are:
11- rnndb/nve4_p2mf.xml ( 1400 bytes, from 2012-04-14 21:29:11)
12- ./rnndb/copyright.xml ( 6452 bytes, from 2011-08-11 18:25:12)
13- ./rnndb/nv_object.xml ( 12736 bytes, from 2012-04-14 21:30:24)
14- ./rnndb/nvchipsets.xml ( 3701 bytes, from 2012-03-22 20:40:59)
15- ./rnndb/nv_defs.xml ( 4437 bytes, from 2011-08-11 18:25:12)
16- ./rnndb/nv50_defs.xml ( 5468 bytes, from 2011-08-11 18:25:12)
17
18Copyright (C) 2006-2012 by the following authors:
19- Artur Huillet <arthur.huillet@free.fr> (ahuillet)
20- Ben Skeggs (darktama, darktama_)
21- B. R. <koala_br@users.sourceforge.net> (koala_br)
22- Carlos Martin <carlosmn@users.sf.net> (carlosmn)
23- Christoph Bumiller <e0425955@student.tuwien.ac.at> (calim, chrisbmr)
24- Dawid Gajownik <gajownik@users.sf.net> (gajownik)
25- Dmitry Baryshkov
26- Dmitry Eremin-Solenikov <lumag@users.sf.net> (lumag)
27- EdB <edb_@users.sf.net> (edb_)
28- Erik Waling <erikwailing@users.sf.net> (erikwaling)
29- Francisco Jerez <currojerez@riseup.net> (curro)
30- imirkin <imirkin@users.sf.net> (imirkin)
31- jb17bsome <jb17bsome@bellsouth.net> (jb17bsome)
32- Jeremy Kolb <kjeremy@users.sf.net> (kjeremy)
33- Laurent Carlier <lordheavym@gmail.com> (lordheavy)
34- Luca Barbieri <luca@luca-barbieri.com> (lb, lb1)
35- Maarten Maathuis <madman2003@gmail.com> (stillunknown)
36- Marcin Koƛcielnicki <koriakin@0x04.net> (mwk, koriakin)
37- Mark Carey <mark.carey@gmail.com> (careym)
38- Matthieu Castet <matthieu.castet@parrot.com> (mat-c)
39- nvidiaman <nvidiaman@users.sf.net> (nvidiaman)
40- Patrice Mandin <patmandin@gmail.com> (pmandin, pmdata)
41- Pekka Paalanen <pq@iki.fi> (pq, ppaalanen)
42- Peter Popov <ironpeter@users.sf.net> (ironpeter)
43- Richard Hughes <hughsient@users.sf.net> (hughsient)
44- Rudi Cilibrasi <cilibrar@users.sf.net> (cilibrar)
45- Serge Martin
46- Simon Raffeiner
47- Stephane Loeuillet <leroutier@users.sf.net> (leroutier)
48- Stephane Marchesin <stephane.marchesin@gmail.com> (marcheu)
49- sturmflut <sturmflut@users.sf.net> (sturmflut)
50- Sylvain Munaut <tnt@246tNt.com>
51- Victor Stinner <victor.stinner@haypocalc.com> (haypo)
52- Wladmir van der Laan <laanwj@gmail.com> (miathan6)
53- Younes Manton <younes.m@gmail.com> (ymanton)
54
55Permission is hereby granted, free of charge, to any person obtaining
56a copy of this software and associated documentation files (the
57"Software"), to deal in the Software without restriction, including
58without limitation the rights to use, copy, modify, merge, publish,
59distribute, sublicense, and/or sell copies of the Software, and to
60permit persons to whom the Software is furnished to do so, subject to
61the following conditions:
62
63The above copyright notice and this permission notice (including the
64next paragraph) shall be included in all copies or substantial
65portions of the Software.
66
67THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
68EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
69MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
70IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
71LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
72OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
73WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
74*/
75
76
77
78#define NVE4_P2MF_LINE_LENGTH_IN 0x00000180
79
80#define NVE4_P2MF_LINE_COUNT 0x00000184
81
82#define NVE4_P2MF_DST_ADDRESS_HIGH 0x00000188
83
84#define NVE4_P2MF_DST_ADDRESS_LOW 0x0000018c
85
86#define NVE4_P2MF_DST_TILE_MODE 0x00000194
87
88#define NVE4_P2MF_DST_PITCH 0x00000198
89
90#define NVE4_P2MF_DST_HEIGHT 0x0000019c
91
92#define NVE4_P2MF_DST_DEPTH 0x000001a0
93
94#define NVE4_P2MF_DST_Z 0x000001a4
95
96#define NVE4_P2MF_DST_X 0x000001a8
97
98#define NVE4_P2MF_DST_Y 0x000001ac
99
100#define NVE4_P2MF_EXEC 0x000001b0
101#define NVE4_P2MF_EXEC_LINEAR 0x00000001
102#define NVE4_P2MF_EXEC_UNK12 0x00001000
103
104#define NVE4_P2MF_DATA 0x000001b4
105
106
107#endif /* RNNDB_NVE4_P2MF_XML */
diff --git a/src/gallium/winsys/nouveau/drm/nouveau_drm_winsys.c b/src/gallium/winsys/nouveau/drm/nouveau_drm_winsys.c
index 91d51c08ed5..bf990147764 100644
--- a/src/gallium/winsys/nouveau/drm/nouveau_drm_winsys.c
+++ b/src/gallium/winsys/nouveau/drm/nouveau_drm_winsys.c
@@ -34,6 +34,7 @@ nouveau_drm_screen_create(int fd)
34 break; 34 break;
35 case 0xc0: 35 case 0xc0:
36 case 0xd0: 36 case 0xd0:
37 case 0xe0:
37 init = nvc0_screen_create; 38 init = nvc0_screen_create;
38 break; 39 break;
39 default: 40 default: